From cfd0b0ea83fca939fa267f0c4ac45b4ee3157d22 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 31 May 2022 14:30:27 +0800 Subject: [PATCH 0001/1149] server: fix race during raft data migration (#12701) close tikv/tikv#12698 Close engine before cleaning up its data during raft engine migration. Signed-off-by: tabokie --- components/raft_log_engine/src/engine.rs | 2 -- components/server/src/server.rs | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 145a122802d..9707bdb28b7 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -444,8 +444,6 @@ impl RaftEngine for RaftLogEngine { None } - fn stop(&self) {} - fn dump_stats(&self) -> Result { // Raft engine won't dump anything. Ok("".to_owned()) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index b9f3c7bd6f2..4344a706fde 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1414,6 +1414,8 @@ impl ConfiguredRaftEngine for RocksEngine { RaftLogEngine::new(config.raft_engine.config(), key_manager.clone(), None) .expect("failed to open raft engine for migration"); dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /*threads*/); + raft_engine.stop(); + drop(raft_engine); raft_data_state_machine.after_dump_data(); } raftdb @@ -1463,6 +1465,8 @@ impl ConfiguredRaftEngine for RaftLogEngine { .expect("failed to open raftdb for migration"); let raftdb = RocksEngine::from_db(Arc::new(raftdb)); dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /*threads*/); + raftdb.stop(); + drop(raftdb); raft_data_state_machine.after_dump_data(); } raft_engine From 6a67b08d7fc6fa0623009e5bcb756333c2a655af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 1 Jun 2022 00:42:27 +0800 Subject: [PATCH 0002/1149] log backup: disable test log by default (#12710) close tikv/tikv#12709 Signed-off-by: Yu Juncen --- components/backup-stream/src/router.rs | 5 ----- components/backup-stream/tests/mod.rs | 6 ------ 2 files changed, 11 deletions(-) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 294ec2c0c98..8db9244d916 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -1328,7 +1328,6 @@ mod tests { #[tokio::test] async fn test_basic_file() -> Result<()> { - test_util::init_log_for_test(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); tokio::fs::create_dir_all(&tmp).await?; let (tx, rx) = dummy_scheduler(); @@ -1485,7 +1484,6 @@ mod tests { #[tokio::test] async fn test_flush_with_error() -> Result<()> { - test_util::init_log_for_test(); let (tx, _rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); let router = Arc::new(RouterInner::new( @@ -1517,7 +1515,6 @@ mod tests { #[tokio::test] async fn test_empty_resolved_ts() { - test_util::init_log_for_test(); let (tx, _rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); let router = RouterInner::new(tmp.clone(), tx, 32, Duration::from_secs(300)); @@ -1544,7 +1541,6 @@ mod tests { #[tokio::test] async fn test_flush_with_pausing_self() -> Result<()> { - test_util::init_log_for_test(); let (tx, rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); let router = Arc::new(RouterInner::new( @@ -1585,7 +1581,6 @@ mod tests { #[test] fn test_format_datetime() { - test_util::init_log_for_test(); let s = TempFileKey::format_date_time(431656320867237891); let s = s.to_string(); assert_eq!(s, "20220307"); diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 064b954d7bf..85bb633955b 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -507,7 +507,6 @@ mod test { #[test] fn basic() { - // test_util::init_log_for_test(); let mut suite = super::Suite::new("basic", 4); run_async_test(async { @@ -528,7 +527,6 @@ mod test { #[test] fn with_split() { - // test_util::init_log_for_test(); let mut suite = super::Suite::new("with_split", 4); run_async_test(async { let round1 = suite.write_records(0, 128, 1).await; @@ -548,7 +546,6 @@ mod test { #[test] /// This case tests whether the backup can continue when the leader failes. fn leader_down() { - // test_util::init_log_for_test(); let mut suite = super::Suite::new("leader_down", 4); suite.must_register_task(1, "test_leader_down"); suite.sync(); @@ -569,7 +566,6 @@ mod test { /// This case tests whehter the checkpoint ts (next backup ts) can be advanced correctly /// when async commit is enabled. fn async_commit() { - // test_util::init_log_for_test(); let mut suite = super::Suite::new("async_commit", 3); run_async_test(async { suite.must_register_task(1, "test_async_commit"); @@ -600,7 +596,6 @@ mod test { #[test] fn fatal_error() { - // test_util::init_log_for_test(); let mut suite = super::Suite::new("fatal_error", 3); suite.must_register_task(1, "test_fatal_error"); suite.sync(); @@ -657,7 +652,6 @@ mod test { #[test] fn inflight_messages() { - test_util::init_log_for_test(); // We should remove the failpoints when paniked or we may get stucked. defer! {{ fail::remove("delay_on_start_observe"); From 6d883b37a9a3299a62b649c97de27f3cd10e46c5 Mon Sep 17 00:00:00 2001 From: Xiaoguang Sun Date: Wed, 1 Jun 2022 13:26:27 +0800 Subject: [PATCH 0003/1149] Add link to website of TiKV's creator (#12703) close tikv/tikv#12702 Signed-off-by: Xiaoguang Sun --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index b9a2d9d9519..65bad6835ee 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![Coverage Status](https://codecov.io/gh/tikv/tikv/branch/master/graph/badge.svg)](https://codecov.io/gh/tikv/tikv) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/2574/badge)](https://bestpractices.coreinfrastructure.org/projects/2574) -TiKV is an open-source, distributed, and transactional key-value database. Unlike other traditional NoSQL systems, TiKV not only provides classical key-value APIs, but also transactional APIs with ACID compliance. Built in Rust and powered by Raft, TiKV was originally created to complement [TiDB](https://github.com/pingcap/tidb), a distributed HTAP database compatible with the MySQL protocol. +TiKV is an open-source, distributed, and transactional key-value database. Unlike other traditional NoSQL systems, TiKV not only provides classical key-value APIs, but also transactional APIs with ACID compliance. Built in Rust and powered by Raft, TiKV was originally created by [PingCAP](https://en.pingcap.com) to complement [TiDB](https://github.com/pingcap/tidb), a distributed HTAP database compatible with the MySQL protocol. The design of TiKV ('Ti' stands for titanium) is inspired by some great distributed systems from Google, such as BigTable, Spanner, and Percolator, and some of the latest achievements in academia in recent years, such as the Raft consensus algorithm. @@ -134,10 +134,6 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md). ## Client drivers -Currently, the interfaces to TiKV are the [TiDB Go client](https://github.com/pingcap/tidb/tree/master/store/tikv) and the [TiSpark Java client](https://github.com/pingcap/tispark/tree/master/tikv-client/src/main/java/com/pingcap/tikv). - -These are the clients for TiKV: - - [Go](https://github.com/tikv/client-go) (The most stable and widely used) - [Java](https://github.com/tikv/client-java) - [Rust](https://github.com/tikv/client-rust) From f7edbcf610cd6bb5c9040317edd3260188a6e87d Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 31 May 2022 23:10:27 -0700 Subject: [PATCH 0004/1149] raftstore: use approximate size to generate bucket unless split region check is needed and scan is used for it. (#12678) ref tikv/tikv#12597 When bucket is enabled, using CheckPolicy::Scan would lead to large amount of read IO after tikv restart. Before this PR, the Scan is used unless the region size reaches 1.5G, which is very rare for a 96 ~ 256MB's region-split-size. After this change, generating bucket won't introduce new scan unless the scan is necessary for splitting region. This can significantly reduce the read IO. Also refine the logic for the fix of 12597. Signed-off-by: qi.xu Co-authored-by: qi.xu Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/config.rs | 3 + .../src/coprocessor/split_check/half.rs | 13 ++- .../src/coprocessor/split_check/keys.rs | 8 +- .../src/coprocessor/split_check/size.rs | 102 ++++++++++++++---- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 6 files changed, 101 insertions(+), 27 deletions(-) diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index 0f553c879a2..1609cc3001a 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -50,6 +50,8 @@ pub struct Config { pub region_bucket_size: ReadableSize, // region size threshold for using approximate size instead of scan pub region_size_threshold_for_approximate: ReadableSize, + #[online_config(skip)] + pub prefer_approximate_bucket: bool, // ratio of region_bucket_size. (0, 0.5) // The region_bucket_merge_size_ratio * region_bucket_size is threshold to merge with its left neighbor bucket pub region_bucket_merge_size_ratio: f64, @@ -91,6 +93,7 @@ impl Default for Config { region_bucket_size: DEFAULT_BUCKET_SIZE, region_size_threshold_for_approximate: DEFAULT_BUCKET_SIZE * BATCH_SPLIT_LIMIT / 2 * 3, region_bucket_merge_size_ratio: DEFAULT_REGION_BUCKET_MERGE_SIZE_RATIO, + prefer_approximate_bucket: true, } } } diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index 87ee861c95c..a52b7a59d60 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -218,13 +218,13 @@ mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { - region_max_size: Some(ReadableSize(BUCKET_NUMBER_LIMIT as u64)), + region_split_size: ReadableSize(130_u64), enable_region_bucket: true, region_bucket_size: ReadableSize(20_u64), // so that each key below will form a bucket ..Default::default() }; - let mut runnable = - SplitCheckRunner::new(engine.clone(), tx.clone(), CoprocessorHost::new(tx, cfg)); + let cop_host = CoprocessorHost::new(tx.clone(), cfg); + let mut runnable = SplitCheckRunner::new(engine.clone(), tx, cop_host.clone()); let key_gen = |k: &[u8], i: u64, mvcc: bool| { if !mvcc { @@ -276,6 +276,9 @@ mod tests { Some(vec![bucket_range]), )); + let host = cop_host.new_split_checker_host(®ion, &engine, true, CheckPolicy::Scan); + assert_eq!(host.policy(), CheckPolicy::Scan); + must_generate_buckets(&rx, &exp_bucket_keys); // testing split bucket with end key "" @@ -299,6 +302,8 @@ mod tests { CheckPolicy::Scan, Some(vec![bucket_range]), )); + let host = cop_host.new_split_checker_host(®ion, &engine, true, CheckPolicy::Scan); + assert_eq!(host.policy(), CheckPolicy::Scan); must_generate_buckets(&rx, &exp_bucket_keys); @@ -345,7 +350,7 @@ mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { - region_max_size: Some(ReadableSize(BUCKET_NUMBER_LIMIT as u64)), + region_split_size: ReadableSize(130_u64), enable_region_bucket: true, region_bucket_size: ReadableSize(20_u64), // so that each key below will form a bucket ..Default::default() diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index bc9c847225a..22a81e54f31 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -182,12 +182,16 @@ where } REGION_KEYS_HISTOGRAM.observe(region_keys as f64); - if region_keys >= host.cfg.region_max_keys() { + // if bucket checker using scan is added, to utilize the scan, + // add keys checker as well for free + // It has the assumption that the size's checker is before the keys's check in the host + let need_split_region = region_keys >= host.cfg.region_max_keys(); + if need_split_region { info!( "approximate keys over threshold, need to do split check"; "region_id" => region.get_id(), "keys" => region_keys, - "threshold" => host.cfg.region_max_keys, + "threshold" => host.cfg.region_max_keys(), ); // Need to check keys. host.add_checker(Box::new(Checker::new( diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 59603782f5c..30198cd2337 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -51,13 +51,6 @@ where E: KvEngine, { fn on_kv(&mut self, _: &mut ObserverContext<'_>, entry: &KeyEntry) -> bool { - // If there's no need to check region split, skip it. - // Otherwise, the region whose keys > max region keys will not be splitted when batch_split_limit is zero, - // because eventually "over_limit && self.current_size + self.split_size >= self.max_size" - // will return true. - if self.batch_split_limit == 0 { - return false; - } let size = entry.entry_size() as u64; self.current_size += size; @@ -189,18 +182,19 @@ where ); } + let need_bucket_checker = + host.cfg.enable_region_bucket && region_size >= 2 * host.cfg.region_bucket_size.0; REGION_SIZE_HISTOGRAM.observe(region_size as f64); - if region_size >= host.cfg.region_max_size().0 - || host.cfg.enable_region_bucket && region_size >= 2 * host.cfg.region_bucket_size.0 - { - let batch_split_limit = if region_size >= host.cfg.region_max_size().0 { - host.cfg.batch_split_limit - } else { - // no region split check needed - 0 - }; + + let need_split_region = region_size >= host.cfg.region_max_size().0; + if need_split_region || need_bucket_checker { // when it's a large region use approximate way to produce split keys - if region_size >= host.cfg.region_size_threshold_for_approximate.0 { + if need_split_region { + if region_size >= host.cfg.region_size_threshold_for_approximate.0 { + policy = CheckPolicy::Approximate; + } + } else if host.cfg.prefer_approximate_bucket { + // when the check is only for bucket, use approximate anyway policy = CheckPolicy::Approximate; } @@ -210,13 +204,12 @@ where "size" => region_size, "threshold" => host.cfg.region_max_size().0, "policy" => ?policy, - "split_check" => batch_split_limit > 0, ); // Need to check size. host.add_checker(Box::new(Checker::new( host.cfg.region_max_size().0, host.cfg.region_split_size.0, - batch_split_limit, + host.cfg.batch_split_limit, policy, ))); } else { @@ -619,8 +612,8 @@ pub mod tests { keys::data_key(Key::from_raw(bytes).append_ts(ts).as_encoded()) } }; - let mut runnable = - SplitCheckRunner::new(engine.clone(), tx.clone(), CoprocessorHost::new(tx, cfg)); + let cop_host = CoprocessorHost::new(tx.clone(), cfg); + let mut runnable = SplitCheckRunner::new(engine.clone(), tx, cop_host.clone()); for i in 0..2000 { // if not mvcc, kv size is (6+1)*2 = 14, given bucket size is 3000, expect each bucket has about 210 keys // if mvcc, kv size is about 18*2 = 36, expect each bucket has about 80 keys @@ -638,6 +631,9 @@ pub mod tests { None, )); + let host = cop_host.new_split_checker_host(®ion, &engine, true, CheckPolicy::Scan); + assert_eq!(host.policy(), CheckPolicy::Approximate); + if !mvcc { must_generate_buckets_approximate(&rx, None, 15000, 45000, mvcc); } else { @@ -664,6 +660,8 @@ pub mod tests { CheckPolicy::Approximate, Some(vec![BucketRange(start.clone(), end.clone())]), )); + let host = cop_host.new_split_checker_host(®ion, &engine, true, CheckPolicy::Scan); + assert_eq!(host.policy(), CheckPolicy::Approximate); if !mvcc { must_generate_buckets_approximate(&rx, Some(BucketRange(start, end)), 150, 450, mvcc); @@ -696,6 +694,68 @@ pub mod tests { } } + #[test] + fn test_check_policy_for_bucket_generation() { + let path = Builder::new() + .prefix("test_check_policy_for_bucket_generation") + .tempdir() + .unwrap(); + let path_str = path.path().to_str().unwrap(); + let db_opts = DBOptions::default(); + let cfs_with_range_prop: HashSet<_> = LARGE_CFS.iter().cloned().collect(); + let mut cf_opt = ColumnFamilyOptions::new(); + cf_opt.set_no_range_properties(true); + cf_opt.set_disable_auto_compactions(true); + + let cfs_opts = ALL_CFS + .iter() + .map(|cf| { + if cfs_with_range_prop.contains(cf) { + let mut opt = ColumnFamilyOptions::new(); + opt.set_disable_auto_compactions(true); + CFOptions::new(cf, opt) + } else { + CFOptions::new(cf, cf_opt.clone()) + } + }) + .collect(); + let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let (tx, _rx) = mpsc::sync_channel(100); + let mut cfg = Config { + region_max_size: Some(ReadableSize(50000)), + region_split_size: ReadableSize(50000), + region_max_keys: Some(1000000), + region_split_keys: Some(1000000), + batch_split_limit: 5, + enable_region_bucket: true, + region_bucket_size: ReadableSize(1), // minimal bucket size + region_size_threshold_for_approximate: ReadableSize(500000000), + // follow split region's check policy, not force to use approximate + prefer_approximate_bucket: false, + ..Default::default() + }; + let mut region = Region::default(); + region.set_id(1); + region.set_start_key(vec![]); + region.set_end_key(vec![]); + region.mut_peers().push(Peer::default()); + region.mut_region_epoch().set_version(2); + region.mut_region_epoch().set_conf_ver(5); + for i in 0..20 { + let s = keys::data_key(format!("{:04}00", i).as_bytes()); + engine.put_cf(CF_WRITE, &s, &s).unwrap(); + } + + let cop_host = CoprocessorHost::new(tx.clone(), cfg.clone()); + let host = cop_host.new_split_checker_host(®ion, &engine, true, CheckPolicy::Scan); + assert_eq!(host.policy(), CheckPolicy::Scan); + + cfg.prefer_approximate_bucket = true; + let cop_host = CoprocessorHost::new(tx, cfg); + let host = cop_host.new_split_checker_host(®ion, &engine, true, CheckPolicy::Scan); + assert_eq!(host.policy(), CheckPolicy::Approximate); + } + #[test] fn test_cf_lock_without_range_prop() { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 21ca6747378..aa0559cbeb2 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -693,6 +693,7 @@ fn test_serde_custom_tikv_config() { enable_region_bucket: true, region_bucket_size: ReadableSize::mb(1), region_size_threshold_for_approximate: ReadableSize::mb(3), + prefer_approximate_bucket: false, region_bucket_merge_size_ratio: 0.4, }; let mut cert_allowed_cn = HashSet::default(); diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 61f0cb87e20..17c7635e846 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -232,6 +232,7 @@ enable-region-bucket = true region-bucket-size = "1MB" region-size-threshold-for-approximate = "3MB" region-bucket-merge-size-ratio = 0.4 +prefer-approximate-bucket = false [rocksdb] wal-recovery-mode = "absolute-consistency" From 8a2245455d4b6d117d203c599444f5455762cff6 Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 1 Jun 2022 17:02:28 +0800 Subject: [PATCH 0005/1149] cdc: skip prewrite without value (#12612) ref tikv/tikv#12717 cdc: skip prewrite without value Signed-off-by: qupeng --- components/cdc/src/delegate.rs | 202 +++++++++--------- components/cdc/tests/integrations/test_cdc.rs | 38 +++- 2 files changed, 135 insertions(+), 105 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index dc9f36e92ec..2fb971a4024 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -10,7 +10,7 @@ use std::{ }; use api_version::{ApiV2, KeyMode, KvFormat}; -use collections::HashMap; +use collections::{HashMap, HashMapEntry}; use crossbeam::atomic::AtomicCell; use kvproto::{ cdcpb::{ @@ -481,83 +481,63 @@ impl Delegate { let mut rows = vec![Vec::with_capacity(entries_len)]; let mut current_rows_size: usize = 0; for entry in entries { + let (mut row, mut _has_value) = (EventRow::default(), false); + let row_size: usize; match entry { Some(KvEntry::RawKvEntry(kv_pair)) => { - let mut row = EventRow::default(); decode_rawkv(kv_pair.0, kv_pair.1, &mut row)?; - let row_size = row.key.len() + row.value.len(); - if current_rows_size + row_size >= CDC_EVENT_MAX_BYTES { - rows.push(Vec::with_capacity(entries_len)); - current_rows_size = 0; + row_size = row.key.len() + row.value.len(); + } + Some(KvEntry::TxnEntry(TxnEntry::Prewrite { + default, + lock, + old_value, + })) => { + let l = Lock::parse(&lock.1).unwrap(); + if decode_lock(lock.0, l, &mut row, &mut _has_value) { + continue; } - current_rows_size += row_size; - rows.last_mut().unwrap().push(row); + decode_default(default.1, &mut row, &mut _has_value); + row.old_value = old_value.finalized().unwrap_or_default(); + row_size = row.key.len() + row.value.len(); } - Some(KvEntry::TxnEntry(txn_entry)) => { - match txn_entry { - TxnEntry::Prewrite { - default, - lock, - old_value, - } => { - let mut row = EventRow::default(); - let skip = decode_lock(lock.0, Lock::parse(&lock.1).unwrap(), &mut row); - if skip { - continue; - } - decode_default(default.1, &mut row); - let row_size = row.key.len() + row.value.len(); - if current_rows_size + row_size >= CDC_EVENT_MAX_BYTES { - rows.push(Vec::with_capacity(entries_len)); - current_rows_size = 0; - } - current_rows_size += row_size; - row.old_value = old_value.finalized().unwrap_or_default(); - rows.last_mut().unwrap().push(row); - } - TxnEntry::Commit { - default, - write, - old_value, - } => { - let mut row = EventRow::default(); - let skip = decode_write(write.0, &write.1, &mut row, false); - if skip { - continue; - } - decode_default(default.1, &mut row); - - // This type means the row is self-contained, it has, - // 1. start_ts - // 2. commit_ts - // 3. key - // 4. value - if row.get_type() == EventLogType::Rollback { - // We dont need to send rollbacks to downstream, - // because downstream does not needs rollback to clean - // prewrite as it drops all previous stashed data. - continue; - } - set_event_row_type(&mut row, EventLogType::Committed); - row.old_value = old_value.finalized().unwrap_or_default(); - let row_size = row.key.len() + row.value.len(); - if current_rows_size + row_size >= CDC_EVENT_MAX_BYTES { - rows.push(Vec::with_capacity(entries_len)); - current_rows_size = 0; - } - current_rows_size += row_size; - rows.last_mut().unwrap().push(row); - } + Some(KvEntry::TxnEntry(TxnEntry::Commit { + default, + write, + old_value, + })) => { + if decode_write(write.0, &write.1, &mut row, &mut _has_value, false) { + continue; + } + decode_default(default.1, &mut row, &mut _has_value); + + // This type means the row is self-contained, it has, + // 1. start_ts + // 2. commit_ts + // 3. key + // 4. value + if row.get_type() == EventLogType::Rollback { + // We dont need to send rollbacks to downstream, + // because downstream does not needs rollback to clean + // prewrite as it drops all previous stashed data. + continue; } + set_event_row_type(&mut row, EventLogType::Committed); + row.old_value = old_value.finalized().unwrap_or_default(); + row_size = row.key.len() + row.value.len(); } None => { - let mut row = EventRow::default(); - // This type means scan has finished. set_event_row_type(&mut row, EventLogType::Initialized); - rows.last_mut().unwrap().push(row); + row_size = 0; } } + if current_rows_size + row_size >= CDC_EVENT_MAX_BYTES { + rows.push(Vec::with_capacity(entries_len)); + current_rows_size = 0; + } + current_rows_size += row_size; + rows.last_mut().unwrap().push(row); } let rows = rows @@ -596,7 +576,8 @@ impl Delegate { Ok(()) }; - let mut txn_rows: HashMap, EventRow> = HashMap::default(); + // map[key] -> (event, has_value). + let mut txn_rows: HashMap, (EventRow, bool)> = HashMap::default(); let mut raw_rows: Vec = Vec::new(); for mut req in requests { match req.get_cmd_type() { @@ -620,17 +601,20 @@ impl Delegate { } } - if !txn_rows.is_empty() { - let mut rows = Vec::with_capacity(txn_rows.len()); - for (_, v) in txn_rows { - rows.push(v); + let mut rows = Vec::with_capacity(txn_rows.len()); + for (_, (v, has_value)) in txn_rows { + if v.r_type == EventLogType::Prewrite && v.op_type == EventRowOpType::Put && !has_value + { + // It's possible that a prewrite command only contains lock but without + // default. It's not documented by classic Percolator but introduced with + // Large-Transaction. Those prewrites are not complete, we must skip them. + continue; } - self.sink_downstream(rows, index, ChangeDataRequestKvApi::TiDb)?; + rows.push(v); } + self.sink_downstream(rows, index, ChangeDataRequestKvApi::TiDb)?; - if !raw_rows.is_empty() { - self.sink_downstream(raw_rows, index, ChangeDataRequestKvApi::RawKv)?; - } + self.sink_downstream(raw_rows, index, ChangeDataRequestKvApi::RawKv)?; Ok(()) } @@ -641,6 +625,9 @@ impl Delegate { index: u64, kv_api: ChangeDataRequestKvApi, ) -> Result<()> { + if entries.is_empty() { + return Ok(()); + } let event_entries = EventEntries { entries: entries.into(), ..Default::default() @@ -676,7 +663,7 @@ impl Delegate { &mut self, put: PutRequest, is_one_pc: bool, - txn_rows: &mut HashMap, EventRow>, + txn_rows: &mut HashMap, (EventRow, bool)>, raw_rows: &mut Vec, read_old_value: impl FnMut(&mut EventRow, TimeStamp) -> Result<()>, ) -> Result<()> { @@ -699,13 +686,13 @@ impl Delegate { &mut self, mut put: PutRequest, is_one_pc: bool, - rows: &mut HashMap, EventRow>, + rows: &mut HashMap, (EventRow, bool)>, mut read_old_value: impl FnMut(&mut EventRow, TimeStamp) -> Result<()>, ) -> Result<()> { match put.cf.as_str() { "write" => { - let mut row = EventRow::default(); - if decode_write(put.take_key(), put.get_value(), &mut row, true) { + let (mut row, mut has_value) = (EventRow::default(), false); + if decode_write(put.take_key(), &put.value, &mut row, &mut has_value, true) { return Ok(()); } @@ -734,36 +721,29 @@ impl Delegate { ); } - match rows.get_mut(&row.key) { - Some(row_with_value) => { - row.value = mem::take(&mut row_with_value.value); - *row_with_value = row; + match rows.entry(row.key.clone()) { + HashMapEntry::Occupied(o) => { + let o = o.into_mut(); + mem::swap(&mut o.0.value, &mut row.value); + o.0 = row; } - None => { - rows.insert(row.key.clone(), row); + HashMapEntry::Vacant(v) => { + v.insert((row, has_value)); } } } "lock" => { - let mut row = EventRow::default(); + let (mut row, mut has_value) = (EventRow::default(), false); let lock = Lock::parse(put.get_value()).unwrap(); let for_update_ts = lock.for_update_ts; - if decode_lock(put.take_key(), lock, &mut row) { + if decode_lock(put.take_key(), lock, &mut row, &mut has_value) { return Ok(()); } let read_old_ts = std::cmp::max(for_update_ts, row.start_ts.into()); read_old_value(&mut row, read_old_ts)?; - let occupied = rows.entry(row.key.clone()).or_default(); - if !occupied.value.is_empty() { - assert!(row.value.is_empty()); - let mut value = vec![]; - mem::swap(&mut occupied.value, &mut value); - row.value = value; - } - // In order to compute resolved ts, - // we must track inflight txns. + // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { resolver.track_lock(row.start_ts.into(), row.key.clone(), None) @@ -780,16 +760,20 @@ impl Delegate { } } - *occupied = row; + let occupied = rows.entry(row.key.clone()).or_default(); + if occupied.1 { + assert!(!has_value); + has_value = true; + mem::swap(&mut occupied.0.value, &mut row.value); + } + *occupied = (row, has_value); } "" | "default" => { let key = Key::from_encoded(put.take_key()).truncate_ts().unwrap(); let row = rows.entry(key.into_raw().unwrap()).or_default(); - decode_default(put.take_value(), row); - } - other => { - panic!("invalid cf {}", other); + decode_default(put.take_value(), &mut row.0, &mut row.1); } + other => panic!("invalid cf {}", other), } Ok(()) } @@ -909,7 +893,13 @@ fn make_overlapped_rollback(key: Key, row: &mut EventRow) { /// Decodes the write record and store its information in `row`. This may be called both when /// doing incremental scan of observing apply events. There's different behavior for the two /// case, distinguished by the `is_apply` parameter. -fn decode_write(key: Vec, value: &[u8], row: &mut EventRow, is_apply: bool) -> bool { +fn decode_write( + key: Vec, + value: &[u8], + row: &mut EventRow, + has_value: &mut bool, + is_apply: bool, +) -> bool { let key = Key::from_encoded(key); let write = WriteRef::parse(value).unwrap().to_owned(); @@ -946,12 +936,13 @@ fn decode_write(key: Vec, value: &[u8], row: &mut EventRow, is_apply: bool) set_event_row_type(row, r_type); if let Some(value) = write.short_value { row.value = value; + *has_value = true; } false } -fn decode_lock(key: Vec, lock: Lock, row: &mut EventRow) -> bool { +fn decode_lock(key: Vec, lock: Lock, row: &mut EventRow, has_value: &mut bool) -> bool { let op_type = match lock.lock_type { LockType::Put => EventRowOpType::Put, LockType::Delete => EventRowOpType::Delete, @@ -971,6 +962,7 @@ fn decode_lock(key: Vec, lock: Lock, row: &mut EventRow) -> bool { set_event_row_type(row, EventLogType::Prewrite); if let Some(value) = lock.short_value { row.value = value; + *has_value = true; } false @@ -998,10 +990,12 @@ fn decode_rawkv(key: Vec, value: Vec, row: &mut EventRow) -> Result<()> Ok(()) } -fn decode_default(value: Vec, row: &mut EventRow) { +fn decode_default(value: Vec, row: &mut EventRow, has_value: &mut bool) { if !value.is_empty() { row.value = value.to_vec(); } + // If default CF is given in a command it means the command always has a value. + *has_value = true; } #[cfg(test)] diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 06b16de1f20..5f9f9bf7209 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -935,7 +935,7 @@ fn test_cdc_batch_size_limit_impl() { assert_eq!(events.len(), 1, "{:?}", events); match events.pop().unwrap().event.unwrap() { Event_oneof_event::Entries(es) => { - assert!(es.entries.len() == 2); + assert_eq!(es.entries.len(), 2); let e = &es.entries[0]; assert_eq!(e.get_type(), EventLogType::Prewrite, "{:?}", e.get_type()); assert_eq!(e.key, b"xk3", "{:?}", e.key); @@ -2318,3 +2318,39 @@ fn test_resolved_ts_with_learners() { } panic!("resolved timestamp should be advanced correctly"); } + +#[test] +fn test_prewrite_without_value() { + let cluster = new_server_cluster(0, 2); + cluster.pd_client.disable_default_operator(); + let mut suite = TestSuiteBuilder::new().cluster(cluster).build(); + let rid = suite.cluster.get_region(&[]).id; + let ctx = suite.get_context(rid); + let client = suite.get_tikv_client(rid).clone(); + let large_value = vec![b'x'; 2 * txn_types::SHORT_VALUE_MAX_LEN]; + + // Perform a pessimistic prewrite with a large value. + let mut muts = vec![Mutation::default()]; + muts[0].set_op(Op::Put); + muts[0].key = b"key".to_vec(); + muts[0].value = large_value.clone(); + try_kv_prewrite_pessimistic(&client, ctx.clone(), muts, b"key".to_vec(), 10); + + let req = suite.new_changedata_request(rid); + let (mut req_tx, _, receive_event) = new_event_feed(suite.get_region_cdc_client(rid)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + + // The prewrite can be retrieved from incremental scan. + let event = receive_event(false); + assert_eq!( + event.get_events()[0].get_entries().entries[0].value, + large_value + ); + + // check_txn_status will put the lock again, but without value. + must_check_txn_status(&client, ctx.clone(), b"key", 10, 12, 12); + must_kv_commit(&client, ctx, vec![b"key".to_vec()], 10, 14, 14); + // The lock without value shouldn't be retrieved. + let event = receive_event(false); + assert_eq!(event.get_events()[0].get_entries().entries[0].commit_ts, 14); +} From 4fca4e86f37f630a31b04f33ca21a4f8a42872f2 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 1 Jun 2022 02:28:27 -0700 Subject: [PATCH 0006/1149] *: optimize debug build (#12708) close tikv/tikv#12707 This PR optimize debug build by disabling all debuginfo excepts tests itself. So that the generated artifacts will be smaller and also speed up compile time a little. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- CONTRIBUTING.md | 2 +- Cargo.toml | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 85fcea3193e..faccf2818c1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -92,7 +92,7 @@ Please follow this style to make TiKV easy to review, maintain, and develop. ### Build issues -To reduce compilation time, TiKV builds do not include full debugging information by default — `release` and `bench` builds include no debuginfo; `dev` and `test` builds include full debug. To decrease compilation time with another ~5% (around 10 seconds for a 4 min build time), change the `debug = true` to `debug = 1` in the Cargo.toml file to only include line numbers for `dev` and `test`. Another way to change debuginfo is to precede build commands with `RUSTFLAGS=-Cdebuginfo=1` (for line numbers), or `RUSTFLAGS=-Cdebuginfo=2` (for full debuginfo). For example, +To reduce compilation time and disk usage, TiKV builds do not include full debugging information by default — only tests package will have line debug info enabled. To change debuginfo, just precede build commands with `RUSTFLAGS=-Cdebuginfo=1` (for line numbers), or `RUSTFLAGS=-Cdebuginfo=2` (for full debuginfo). For example, ```bash RUSTFLAGS=-Cdebuginfo=1 make dev diff --git a/Cargo.toml b/Cargo.toml index 61759a4b68a..477716d8893 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -277,9 +277,25 @@ members = [ ] default-members = ["cmd/tikv-server", "cmd/tikv-ctl"] +[profile.dev.package.grpcio-sys] +debug = false +opt-level = 1 + +[profile.dev.package.librocksdb_sys] +debug = false +opt-level = 1 + +[profile.dev.package.libtitan_sys] +debug = false +opt-level = 1 + +[profile.dev.package.tests] +debug = 1 +opt-level = 1 + [profile.dev] opt-level = 0 -debug = true +debug = 0 codegen-units = 4 lto = false incremental = true @@ -305,7 +321,7 @@ codegen-units = 4 [profile.test] opt-level = 0 -debug = true +debug = 0 codegen-units = 16 lto = false incremental = true From 4a8a3c5d5ec3be7176b4d5708c6fbec7c60b6108 Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 1 Jun 2022 17:56:29 +0800 Subject: [PATCH 0007/1149] cdc: make tso worker threads configuable (#12576) ref tikv/tikv#12592 cdc: make tso worker threads configuable Signed-off-by: qupeng --- components/cdc/src/endpoint.rs | 2 +- components/cdc/src/initializer.rs | 2 +- components/resolved_ts/src/advance.rs | 39 +++++++++++++--------- src/config.rs | 8 +++++ src/server/service/kv.rs | 9 ++++- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 7 files changed, 43 insertions(+), 19 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 0a0a7d9fcd5..3adaa8aca65 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -377,7 +377,7 @@ impl, E: KvEngine> Endpoint { .unwrap(); let tso_worker = Builder::new_multi_thread() .thread_name("tso") - .worker_threads(1) + .worker_threads(config.tso_worker_threads) .enable_time() .build() .unwrap(); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 9a06448afba..6b80a8c21a0 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -522,7 +522,7 @@ impl Initializer { }); let valid_count = total_count - filtered_count; - let use_ts_filter = valid_count as f64 / total_count as f64 <= self.ts_filter_ratio; + let use_ts_filter = valid_count as f64 <= total_count as f64 * self.ts_filter_ratio; info!("cdc incremental scan uses ts filter: {}", use_ts_filter; "region_id" => self.region_id, "hint_min_ts" => hint_min_ts, diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index ddc52443cec..c438c4c53fa 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -14,7 +14,7 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use fail::fail_point; use futures::{compat::Future01CompatExt, future::select_all, FutureExt, TryFutureExt}; -use grpcio::{ChannelBuilder, Environment}; +use grpcio::{ChannelBuilder, Environment, Error as GrpcError, RpcStatusCode}; use kvproto::{ kvrpcpb::{CheckLeaderRequest, LeaderInfo}, metapb::{Peer, PeerRole}, @@ -254,17 +254,26 @@ pub async fn region_resolved_ts_store( .observe(elapsed.as_secs_f64()); }); - let rpc = client - .check_leader_async(&req) - .map_err(|e| (to_store, true, format!("[rpc create failed]{}", e)))?; + let rpc = match client.check_leader_async(&req) { + Ok(rpc) => rpc, + Err(GrpcError::RpcFailure(status)) + if status.code() == RpcStatusCode::UNIMPLEMENTED => + { + // Some stores like TiFlash don't implement it. + return Ok((to_store, vec![])); + } + Err(e) => return Err((to_store, true, format!("[rpc create failed]{}", e))), + }; + PENDING_CHECK_LEADER_REQ_SENT_COUNT.inc(); defer!(PENDING_CHECK_LEADER_REQ_SENT_COUNT.dec()); let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); - let resp = tokio::time::timeout(timeout, rpc) + let regions = tokio::time::timeout(timeout, rpc) .map_err(|e| (to_store, true, format!("[timeout] {}", e))) .await? - .map_err(|e| (to_store, true, format!("[rpc failed] {}", e)))?; - Ok((to_store, resp)) + .map_err(|e| (to_store, true, format!("[rpc failed] {}", e)))? + .take_regions(); + Ok((to_store, regions)) } .boxed() }) @@ -281,17 +290,15 @@ pub async fn region_resolved_ts_store( let (res, _, remains) = select_all(stores).await; stores = remains; match res { - Ok((to_store, resp)) => { - for region_id in resp.regions { - if let Some(r) = region_map.get(®ion_id) { - let resps = resp_map.entry(region_id).or_default(); - resps.push(to_store); - if region_has_quorum(r, resps) { - valid_regions.insert(region_id); - } + Ok((to_store, regions)) => regions.into_iter().for_each(|region_id| { + if let Some(r) = region_map.get(®ion_id) { + let resps = resp_map.entry(region_id).or_default(); + resps.push(to_store); + if region_has_quorum(r, resps) { + valid_regions.insert(region_id); } } - } + }), Err((to_store, reconnect, err)) => { info!("check leader failed"; "error" => ?err, "to_store" => to_store); if reconnect { diff --git a/src/config.rs b/src/config.rs index b36c14e5ee4..3908cdc9eac 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2394,6 +2394,13 @@ pub struct CdcConfig { /// `TsFilter` will be enabled if `incremental/total <= incremental_scan_ts_filter_ratio`. /// Set `incremental_scan_ts_filter_ratio` to 0 will disable it. pub incremental_scan_ts_filter_ratio: f64, + + /// Count of threads to confirm Region leadership in TiKV instances, 1 by default. + /// Please consider to increase it if count of regions on one TiKV instance is + /// greater than 20k. + #[online_config(skip)] + pub tso_worker_threads: usize, + pub sink_memory_quota: ReadableSize, pub old_value_cache_memory_quota: ReadableSize, // Deprecated! preserved for compatibility check. @@ -2416,6 +2423,7 @@ impl Default for CdcConfig { // is more than 500MB/s, so 128MB/s is enough. incremental_scan_speed_limit: ReadableSize::mb(128), incremental_scan_ts_filter_ratio: 0.2, + tso_worker_threads: 1, // 512MB memory for CDC sink. sink_memory_quota: ReadableSize::mb(512), // 512MB memory for old value cache. diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 6d578334dff..c4960b0629a 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1108,7 +1108,14 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let mut resp = CheckLeaderResponse::default(); resp.set_ts(ts); resp.set_regions(regions); - sink.success(resp).await?; + if let Err(e) = sink.success(resp).await { + // CheckLeader has a built-in fast-success mechanism, so `RemoteStopped` + // can be treated as a general situation. + if let GrpcError::RemoteStopped = e { + return ServerResult::Ok(()); + } + return Err(Error::from(e)); + } ServerResult::Ok(()) } .map_err(move |e| { diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index aa0559cbeb2..cbd695191d8 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -760,6 +760,7 @@ fn test_serde_custom_tikv_config() { incremental_scan_concurrency: 4, incremental_scan_speed_limit: ReadableSize(7), incremental_scan_ts_filter_ratio: 0.7, + tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), sink_memory_quota: ReadableSize::mb(7), }; diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 17c7635e846..d02aebc4df3 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -639,6 +639,7 @@ incremental-scan-threads = 3 incremental-scan-concurrency = 4 incremental-scan-speed-limit = 7 incremental-scan-ts-filter-ratio = 0.7 +tso-worker-threads = 2 old-value-cache-memory-quota = "14MB" sink-memory-quota = "7MB" From 761d591826c75eddc05170780e0afe59718f6b39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 1 Jun 2022 18:36:28 +0800 Subject: [PATCH 0008/1149] security, server: fix running local test (#12712) close tikv/tikv#12711 Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- components/backup-stream/tests/mod.rs | 22 ++++++---------------- components/security/Cargo.toml | 5 ++++- components/security/src/lib.rs | 2 ++ components/server/Cargo.toml | 2 +- 4 files changed, 13 insertions(+), 18 deletions(-) diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 85bb633955b..339dd07f773 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -628,26 +628,16 @@ mod test { .global_progress_of_task("test_fatal_error"), ) .unwrap(); - assert_eq!(safepoints.len(), 4, "{:?}", safepoints); + assert!( - safepoints - .iter() - .take(3) - // They are choosing the lock safepoint, it must greater than the global checkpoint. - .all(|sp| { sp.safepoint.into_inner() >= checkpoint }), + safepoints.iter().any(|sp| { + sp.serivce.contains(&format!("{}", victim)) + && sp.ttl >= Duration::from_secs(60 * 60 * 24) + && sp.safepoint.into_inner() == checkpoint + }), "{:?}", safepoints ); - - let sp = &safepoints[3]; - assert!(sp.serivce.contains(&format!("{}", victim)), "{:?}", sp); - assert!(sp.ttl >= Duration::from_secs(60 * 60 * 24), "{:?}", sp); - assert!( - sp.safepoint.into_inner() == checkpoint, - "{:?} vs {}", - sp, - checkpoint - ); } #[test] diff --git a/components/security/Cargo.toml b/components/security/Cargo.toml index 2b498bc0965..8257d04f51f 100644 --- a/components/security/Cargo.toml +++ b/components/security/Cargo.toml @@ -4,6 +4,9 @@ version = "0.0.1" edition = "2018" publish = false +[features] +tonic = ["dep:tonic"] + [dependencies] collections = { path = "../collections" } encryption = { path = "../encryption", default-features = false } @@ -12,7 +15,7 @@ serde = "1.0" serde_derive = "1.0" serde_json = "1.0" tikv_util = { path = "../tikv_util", default-features = false } -tonic = "0.5" +tonic = { version = "0.5", features = ["tls"], optional = true } [dev-dependencies] tempfile = "3.0" diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index ec6cf0e6df2..ed5ff0d1fa4 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -18,6 +18,7 @@ use grpcio::{ RpcContext, RpcStatus, RpcStatusCode, ServerBuilder, ServerChecker, ServerCredentialsBuilder, ServerCredentialsFetcher, }; +#[cfg(feature = "tonic")] use tonic::transport::{channel::ClientTlsConfig, Certificate, Identity}; #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)] @@ -122,6 +123,7 @@ impl SecurityManager { }) } + #[cfg(feature = "tonic")] /// Make a tonic tls config via the config. pub fn tonic_tls_config(&self) -> Option { let (ca, cert, key) = self.cfg.load_certs().unwrap_or_default(); diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index f5a35c9bb2c..b53fde02cef 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -70,7 +70,7 @@ raftstore = { path = "../raftstore", default-features = false } rand = "0.8" resolved_ts = { path = "../../components/resolved_ts", default-features = false } resource_metering = { path = "../resource_metering" } -security = { path = "../security", default-features = false } +security = { path = "../security", default-features = false, features = ["tonic"] } serde_json = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } From a5987f34ade71caa34cc340c953bd67de5901ace Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Wed, 1 Jun 2022 19:56:27 +0800 Subject: [PATCH 0009/1149] util: record schedule wait duration of yatp pool (#12441) close tikv/tikv#12359 This commit makes use of the `schedule_time` in yatp to calculate the wait duration of each wake. The wait duration panel is added to the grafana for the unified read pool and the txn scheduler pool. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- .../tikv_util/src/yatp_pool/future_pool.rs | 12 +- components/tikv_util/src/yatp_pool/metrics.rs | 8 +- components/tikv_util/src/yatp_pool/mod.rs | 59 ++- metrics/grafana/tikv_details.json | 342 +++++++++++++++++- 5 files changed, 401 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96b637fdc43..080a1ccc35f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7047,7 +7047,7 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#5f3d58002b383bfd0014e271ae58261ecc072de3" +source = "git+https://github.com/tikv/yatp.git?branch=master#2f5f6e47ba6fce8d55e7a57b7ee39a93bc0e8194" dependencies = [ "crossbeam-deque", "dashmap", diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index e2ee39e2616..0beca9a5dee 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -13,19 +13,17 @@ use std::{ use fail::fail_point; use futures::channel::oneshot::{self, Canceled}; -use prometheus::{Histogram, IntCounter, IntGauge}; +use prometheus::{IntCounter, IntGauge}; use yatp::task::future; pub type ThreadPool = yatp::ThreadPool; use super::metrics; -use crate::time::Instant; #[derive(Clone)] struct Env { metrics_running_task_count: IntGauge, metrics_handled_task_count: IntCounter, - metrics_pool_schedule_duration: Histogram, } #[derive(Clone)] @@ -49,8 +47,6 @@ impl FuturePool { .with_label_values(&[name]), metrics_handled_task_count: metrics::FUTUREPOOL_HANDLED_TASK_VEC .with_label_values(&[name]), - metrics_pool_schedule_duration: metrics::FUTUREPOOL_SCHEDULE_DURATION_VEC - .with_label_values(&[name]), }; FuturePool { inner: Arc::new(PoolInner { @@ -149,8 +145,6 @@ impl PoolInner { where F: Future + Send + 'static, { - let timer = Instant::now_coarse(); - let h_schedule = self.env.metrics_pool_schedule_duration.clone(); let metrics_handled_task_count = self.env.metrics_handled_task_count.clone(); let metrics_running_task_count = self.env.metrics_running_task_count.clone(); @@ -159,7 +153,6 @@ impl PoolInner { metrics_running_task_count.inc(); self.pool.spawn(async move { - h_schedule.observe(timer.saturating_elapsed_secs()); let _ = future.await; metrics_handled_task_count.inc(); metrics_running_task_count.dec(); @@ -175,8 +168,6 @@ impl PoolInner { F: Future + Send + 'static, F::Output: Send, { - let timer = Instant::now_coarse(); - let h_schedule = self.env.metrics_pool_schedule_duration.clone(); let metrics_handled_task_count = self.env.metrics_handled_task_count.clone(); let metrics_running_task_count = self.env.metrics_running_task_count.clone(); @@ -185,7 +176,6 @@ impl PoolInner { let (tx, rx) = oneshot::channel(); metrics_running_task_count.inc(); self.pool.spawn(async move { - h_schedule.observe(timer.saturating_elapsed_secs()); let res = future.await; metrics_handled_task_count.inc(); metrics_running_task_count.dec(); diff --git a/components/tikv_util/src/yatp_pool/metrics.rs b/components/tikv_util/src/yatp_pool/metrics.rs index a472a6e000b..8ae1aa8910e 100644 --- a/components/tikv_util/src/yatp_pool/metrics.rs +++ b/components/tikv_util/src/yatp_pool/metrics.rs @@ -16,11 +16,11 @@ lazy_static! { &["name"] ) .unwrap(); - pub static ref FUTUREPOOL_SCHEDULE_DURATION_VEC: HistogramVec = register_histogram_vec!( - "tikv_futurepool_schedule_duration", - "Histogram of future_pool handle duration.", + pub static ref YATP_POOL_SCHEDULE_WAIT_DURATION_VEC: HistogramVec = register_histogram_vec!( + "tikv_yatp_pool_schedule_wait_duration", + "Histogram of yatp pool schedule wait duration.", &["name"], - exponential_buckets(0.0005, 2.0, 15).unwrap() + exponential_buckets(1e-5, 4.0, 12).unwrap() // 10us ~ 41s ) .unwrap(); } diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 023e39b1e67..93cd46cc6ac 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -2,13 +2,15 @@ mod future_pool; mod metrics; + use std::sync::Arc; use fail::fail_point; pub use future_pool::{Full, FuturePool}; +use prometheus::Histogram; use yatp::{ pool::{CloneRunnerBuilder, Local, Runner}, - queue::{multilevel, QueueType}, + queue::{multilevel, QueueType, TaskCell as _}, task::future::{Runner as FutureRunner, TaskCell}, ThreadPool, }; @@ -89,6 +91,9 @@ pub struct YatpPoolRunner { after_start: Option>, before_stop: Option>, before_pause: Option>, + + // Statistics about the schedule wait duration. + schedule_wait_duration: Histogram, } impl Runner for YatpPoolRunner { @@ -105,7 +110,12 @@ impl Runner for YatpPoolRunner { tikv_alloc::add_thread_memory_accessor() } - fn handle(&mut self, local: &mut Local, task_cell: Self::TaskCell) -> bool { + fn handle(&mut self, local: &mut Local, mut task_cell: Self::TaskCell) -> bool { + let extras = task_cell.mut_extras(); + if let Some(schedule_time) = extras.schedule_time() { + self.schedule_wait_duration + .observe(schedule_time.elapsed().as_secs_f64()); + } let finished = self.inner.handle(local, task_cell); self.ticker.try_tick(); finished @@ -139,6 +149,7 @@ impl YatpPoolRunner { after_start: Option>, before_stop: Option>, before_pause: Option>, + schedule_wait_duration: Histogram, ) -> Self { YatpPoolRunner { inner, @@ -147,6 +158,7 @@ impl YatpPoolRunner { after_start, before_stop, before_pause, + schedule_wait_duration, } } } @@ -265,9 +277,8 @@ impl YatpPoolBuilder { } fn create_builder(&mut self) -> (yatp::Builder, YatpPoolRunner) { - let mut builder = yatp::Builder::new(thd_name!( - self.name_prefix.clone().unwrap_or_else(|| "".to_string()) - )); + let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); + let mut builder = yatp::Builder::new(thd_name!(name)); builder .stack_size(self.stack_size) .min_thread_count(self.min_thread_count) @@ -277,13 +288,51 @@ impl YatpPoolBuilder { let after_start = self.after_start.take(); let before_stop = self.before_stop.take(); let before_pause = self.before_pause.take(); + let schedule_wait_duration = + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); let read_pool_runner = YatpPoolRunner::new( Default::default(), self.ticker.clone(), after_start, before_stop, before_pause, + schedule_wait_duration, ); (builder, read_pool_runner) } } + +#[cfg(test)] +mod tests { + use std::sync::mpsc; + + use futures::compat::Future01CompatExt; + + use super::*; + use crate::timer::GLOBAL_TIMER_HANDLE; + + #[test] + fn test_record_schedule_wait_duration() { + let name = "test_record_schedule_wait_duration"; + let pool = YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix(name) + .build_single_level_pool(); + let (tx, rx) = mpsc::channel(); + for _ in 0..3 { + let tx = tx.clone(); + pool.spawn(async move { + GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + Duration::from_millis(100)) + .compat() + .await + .unwrap(); + tx.send(()).unwrap(); + }); + } + for _ in 0..3 { + rx.recv().unwrap(); + } + let histogram = metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); + assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); + } +} diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 97619803256..050a6727622 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4454,6 +4454,206 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, + "hiddenSeries": false, + "id": 23763572581, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, name))", + "hide": false, + "interval": "", + "legendFormat": "{{name}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% Thread Pool Schedule Wait Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:150", + "format": "s", + "label": null, + "logBase": 2, + "max": "30", + "min": null, + "show": true + }, + { + "$$hashKey": "object:151", + "format": "short", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 60 + }, + "hiddenSeries": false, + "id": 23763572692, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name) / sum(rate(tikv_yatp_pool_schedule_wait_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "hide": false, + "interval": "", + "legendFormat": "{{name}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average Thread Pool Schedule Wait Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:150", + "format": "s", + "label": null, + "logBase": 2, + "max": "30", + "min": null, + "show": true + }, + { + "$$hashKey": "object:151", + "format": "short", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, @@ -16537,6 +16737,76 @@ "align": false, "alignLevel": null } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763572469, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified-read.*\"}[1m])", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Unified Read Pool Wait Duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null } ], "title": "Unified Read Pool", @@ -19666,6 +19936,76 @@ "align": false, "alignLevel": null } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763572468, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker.*\"}[1m])", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Txn Scheduler Pool Wait Duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null } ], "repeat": null, @@ -42014,4 +42354,4 @@ "title": "Test-Cluster-TiKV-Details", "uid": "RDVQiEzZz", "version": 1 -} \ No newline at end of file +} From 2a508a583c52bdd40fd84630e094debd5b04e623 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 2 Jun 2022 10:32:27 +0800 Subject: [PATCH 0010/1149] engine: only override write stall configurations if unspecified (#12127) ref tikv/tikv#11424, ref tikv/tikv#11840 Signed-off-by: tabokie --- etc/config-template.toml | 20 +- src/config.rs | 311 +++++++++++++++++++++++-------- tests/integrations/config/mod.rs | 40 ++-- 3 files changed, 268 insertions(+), 103 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index f301c553167..1e673fbc3fa 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -693,7 +693,7 @@ ## Maximum number of level-0 files. ## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`, ## RocksDB stalls the new write operation. -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 ## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a ## sorted on-disk file. It is the RocksDB MemTable size. @@ -745,6 +745,12 @@ ## "min-overlapping-ratio" # compaction-pri = "min-overlapping-ratio" +## Refer to storage.flow-control.soft-pending-compaction-bytes-limit. +# soft-pending-compaction-bytes-limit = "192GB" + +## Refer to storage.flow-control.hard-pending-compaction-bytes-limit. +# hard-pending-compaction-bytes-limit = "1000GB" + ## Indicating if we'd put index/filter blocks to the block cache. ## If not specified, each "table reader" object will pre-load index/filter block during table ## initialization. @@ -859,10 +865,12 @@ # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 # cache-index-and-filter-blocks = true # pin-l0-filter-and-index-blocks = true # compaction-pri = "min-overlapping-ratio" +# soft-pending-compaction-bytes-limit = "192GB" +# hard-pending-compaction-bytes-limit = "1000GB" # read-amp-bytes-per-bit = 0 # dynamic-level-bytes = true # optimize-filters-for-hits = false @@ -880,10 +888,12 @@ # target-file-size-base = "8MB" # level0-file-num-compaction-trigger = 1 # level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 # cache-index-and-filter-blocks = true # pin-l0-filter-and-index-blocks = true # compaction-pri = "by-compensated-size" +# soft-pending-compaction-bytes-limit = "192GB" +# hard-pending-compaction-bytes-limit = "1000GB" # read-amp-bytes-per-bit = 0 # dynamic-level-bytes = true # optimize-filters-for-hits = false @@ -937,10 +947,12 @@ # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 # cache-index-and-filter-blocks = true # pin-l0-filter-and-index-blocks = true # compaction-pri = "by-compensated-size" +# soft-pending-compaction-bytes-limit = "192GB" +# hard-pending-compaction-bytes-limit = "1000GB" # read-amp-bytes-per-bit = 0 # dynamic-level-bytes = true # optimize-filters-for-hits = true diff --git a/src/config.rs b/src/config.rs index 3908cdc9eac..627901481d1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -278,8 +278,8 @@ macro_rules! cf_config { pub max_bytes_for_level_base: ReadableSize, pub target_file_size_base: ReadableSize, pub level0_file_num_compaction_trigger: i32, - pub level0_slowdown_writes_trigger: i32, - pub level0_stop_writes_trigger: i32, + pub level0_slowdown_writes_trigger: Option, + pub level0_stop_writes_trigger: Option, pub max_compaction_bytes: ReadableSize, #[serde(with = "rocks_config::compaction_pri_serde")] #[online_config(skip)] @@ -294,8 +294,8 @@ macro_rules! cf_config { pub compaction_style: DBCompactionStyle, pub disable_auto_compactions: bool, pub disable_write_stall: bool, - pub soft_pending_compaction_bytes_limit: ReadableSize, - pub hard_pending_compaction_bytes_limit: ReadableSize, + pub soft_pending_compaction_bytes_limit: Option, + pub hard_pending_compaction_bytes_limit: Option, #[online_config(skip)] pub force_consistency_checks: bool, #[online_config(skip)] @@ -396,10 +396,14 @@ macro_rules! write_into_metrics { .set($cf.level0_file_num_compaction_trigger.into()); $metrics .with_label_values(&[$tag, "level0_slowdown_writes_trigger"]) - .set($cf.level0_slowdown_writes_trigger.into()); + .set( + $cf.level0_slowdown_writes_trigger + .unwrap_or_default() + .into(), + ); $metrics .with_label_values(&[$tag, "level0_stop_writes_trigger"]) - .set($cf.level0_stop_writes_trigger.into()); + .set($cf.level0_stop_writes_trigger.unwrap_or_default().into()); $metrics .with_label_values(&[$tag, "max_compaction_bytes"]) .set($cf.max_compaction_bytes.0 as f64); @@ -421,10 +425,18 @@ macro_rules! write_into_metrics { .set(($cf.disable_write_stall as i32).into()); $metrics .with_label_values(&[$tag, "soft_pending_compaction_bytes_limit"]) - .set($cf.soft_pending_compaction_bytes_limit.0 as f64); + .set( + $cf.soft_pending_compaction_bytes_limit + .unwrap_or_default() + .0 as f64, + ); $metrics .with_label_values(&[$tag, "hard_pending_compaction_bytes_limit"]) - .set($cf.hard_pending_compaction_bytes_limit.0 as f64); + .set( + $cf.hard_pending_compaction_bytes_limit + .unwrap_or_default() + .0 as f64, + ); $metrics .with_label_values(&[$tag, "force_consistency_checks"]) .set(($cf.force_consistency_checks as i32).into()); @@ -500,8 +512,12 @@ macro_rules! build_cf_opt { cf_opts.set_max_bytes_for_level_base($opt.max_bytes_for_level_base.0); cf_opts.set_target_file_size_base($opt.target_file_size_base.0); cf_opts.set_level_zero_file_num_compaction_trigger($opt.level0_file_num_compaction_trigger); - cf_opts.set_level_zero_slowdown_writes_trigger($opt.level0_slowdown_writes_trigger); - cf_opts.set_level_zero_stop_writes_trigger($opt.level0_stop_writes_trigger); + cf_opts.set_level_zero_slowdown_writes_trigger( + $opt.level0_slowdown_writes_trigger.unwrap_or_default(), + ); + cf_opts.set_level_zero_stop_writes_trigger( + $opt.level0_stop_writes_trigger.unwrap_or_default(), + ); cf_opts.set_max_compaction_bytes($opt.max_compaction_bytes.0); cf_opts.compaction_priority($opt.compaction_pri); cf_opts.set_level_compaction_dynamic_level_bytes($opt.dynamic_level_bytes); @@ -509,8 +525,16 @@ macro_rules! build_cf_opt { cf_opts.set_compaction_style($opt.compaction_style); cf_opts.set_disable_auto_compactions($opt.disable_auto_compactions); cf_opts.set_disable_write_stall($opt.disable_write_stall); - cf_opts.set_soft_pending_compaction_bytes_limit($opt.soft_pending_compaction_bytes_limit.0); - cf_opts.set_hard_pending_compaction_bytes_limit($opt.hard_pending_compaction_bytes_limit.0); + cf_opts.set_soft_pending_compaction_bytes_limit( + $opt.soft_pending_compaction_bytes_limit + .unwrap_or_default() + .0, + ); + cf_opts.set_hard_pending_compaction_bytes_limit( + $opt.hard_pending_compaction_bytes_limit + .unwrap_or_default() + .0, + ); cf_opts.set_optimize_filters_for_hits($opt.optimize_filters_for_hits); cf_opts.set_force_consistency_checks($opt.force_consistency_checks); if $opt.enable_doubly_skiplist { @@ -567,8 +591,8 @@ impl Default for DefaultCfConfig { max_bytes_for_level_base: ReadableSize::mb(512), target_file_size_base: ReadableSize::mb(8), level0_file_num_compaction_trigger: 4, - level0_slowdown_writes_trigger: 20, - level0_stop_writes_trigger: 36, + level0_slowdown_writes_trigger: None, + level0_stop_writes_trigger: None, max_compaction_bytes: ReadableSize::gb(2), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -577,8 +601,8 @@ impl Default for DefaultCfConfig { compaction_style: DBCompactionStyle::Level, disable_auto_compactions: false, disable_write_stall: false, - soft_pending_compaction_bytes_limit: ReadableSize::gb(192), - hard_pending_compaction_bytes_limit: ReadableSize::gb(256), + soft_pending_compaction_bytes_limit: None, + hard_pending_compaction_bytes_limit: None, force_consistency_checks: false, prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, @@ -676,8 +700,8 @@ impl Default for WriteCfConfig { max_bytes_for_level_base: ReadableSize::mb(512), target_file_size_base: ReadableSize::mb(8), level0_file_num_compaction_trigger: 4, - level0_slowdown_writes_trigger: 20, - level0_stop_writes_trigger: 36, + level0_slowdown_writes_trigger: None, + level0_stop_writes_trigger: None, max_compaction_bytes: ReadableSize::gb(2), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -686,8 +710,8 @@ impl Default for WriteCfConfig { compaction_style: DBCompactionStyle::Level, disable_auto_compactions: false, disable_write_stall: false, - soft_pending_compaction_bytes_limit: ReadableSize::gb(192), - hard_pending_compaction_bytes_limit: ReadableSize::gb(256), + soft_pending_compaction_bytes_limit: None, + hard_pending_compaction_bytes_limit: None, force_consistency_checks: false, prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, @@ -771,8 +795,8 @@ impl Default for LockCfConfig { max_bytes_for_level_base: ReadableSize::mb(128), target_file_size_base: ReadableSize::mb(8), level0_file_num_compaction_trigger: 1, - level0_slowdown_writes_trigger: 20, - level0_stop_writes_trigger: 36, + level0_slowdown_writes_trigger: None, + level0_stop_writes_trigger: None, max_compaction_bytes: ReadableSize::gb(2), compaction_pri: CompactionPriority::ByCompensatedSize, dynamic_level_bytes: true, @@ -781,8 +805,8 @@ impl Default for LockCfConfig { compaction_style: DBCompactionStyle::Level, disable_auto_compactions: false, disable_write_stall: false, - soft_pending_compaction_bytes_limit: ReadableSize::gb(192), - hard_pending_compaction_bytes_limit: ReadableSize::gb(256), + soft_pending_compaction_bytes_limit: None, + hard_pending_compaction_bytes_limit: None, force_consistency_checks: false, prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, @@ -844,8 +868,8 @@ impl Default for RaftCfConfig { max_bytes_for_level_base: ReadableSize::mb(128), target_file_size_base: ReadableSize::mb(8), level0_file_num_compaction_trigger: 1, - level0_slowdown_writes_trigger: 20, - level0_stop_writes_trigger: 36, + level0_slowdown_writes_trigger: None, + level0_stop_writes_trigger: None, max_compaction_bytes: ReadableSize::gb(2), compaction_pri: CompactionPriority::ByCompensatedSize, dynamic_level_bytes: true, @@ -854,8 +878,8 @@ impl Default for RaftCfConfig { compaction_style: DBCompactionStyle::Level, disable_auto_compactions: false, disable_write_stall: false, - soft_pending_compaction_bytes_limit: ReadableSize::gb(192), - hard_pending_compaction_bytes_limit: ReadableSize::gb(256), + soft_pending_compaction_bytes_limit: None, + hard_pending_compaction_bytes_limit: None, force_consistency_checks: false, prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, @@ -1207,8 +1231,8 @@ impl Default for RaftDefaultCfConfig { max_bytes_for_level_base: ReadableSize::mb(512), target_file_size_base: ReadableSize::mb(8), level0_file_num_compaction_trigger: 4, - level0_slowdown_writes_trigger: 20, - level0_stop_writes_trigger: 36, + level0_slowdown_writes_trigger: None, + level0_stop_writes_trigger: None, max_compaction_bytes: ReadableSize::gb(2), compaction_pri: CompactionPriority::ByCompensatedSize, dynamic_level_bytes: true, @@ -1217,8 +1241,8 @@ impl Default for RaftDefaultCfConfig { compaction_style: DBCompactionStyle::Level, disable_auto_compactions: false, disable_write_stall: false, - soft_pending_compaction_bytes_limit: ReadableSize::gb(192), - hard_pending_compaction_bytes_limit: ReadableSize::gb(256), + soft_pending_compaction_bytes_limit: None, + hard_pending_compaction_bytes_limit: None, force_consistency_checks: false, prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, @@ -2890,58 +2914,81 @@ impl TiKvConfig { self.causal_ts.validate()?; if self.storage.flow_control.enable { - // using raftdb write stall to control memtables as a safety net - self.raftdb.defaultcf.level0_slowdown_writes_trigger = 10000; - self.raftdb.defaultcf.level0_stop_writes_trigger = 10000; - self.raftdb.defaultcf.soft_pending_compaction_bytes_limit = ReadableSize(0); - self.raftdb.defaultcf.hard_pending_compaction_bytes_limit = ReadableSize(0); - - // disable kvdb write stall, and override related configs self.rocksdb.defaultcf.disable_write_stall = true; - self.rocksdb.defaultcf.level0_slowdown_writes_trigger = - self.storage.flow_control.l0_files_threshold as i32; - self.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = self - .storage - .flow_control - .soft_pending_compaction_bytes_limit; - self.rocksdb.defaultcf.hard_pending_compaction_bytes_limit = self - .storage - .flow_control - .hard_pending_compaction_bytes_limit; self.rocksdb.writecf.disable_write_stall = true; - self.rocksdb.writecf.level0_slowdown_writes_trigger = - self.storage.flow_control.l0_files_threshold as i32; - self.rocksdb.writecf.soft_pending_compaction_bytes_limit = self - .storage - .flow_control - .soft_pending_compaction_bytes_limit; - self.rocksdb.writecf.hard_pending_compaction_bytes_limit = self - .storage - .flow_control - .hard_pending_compaction_bytes_limit; self.rocksdb.lockcf.disable_write_stall = true; - self.rocksdb.lockcf.level0_slowdown_writes_trigger = - self.storage.flow_control.l0_files_threshold as i32; - self.rocksdb.lockcf.soft_pending_compaction_bytes_limit = self - .storage - .flow_control - .soft_pending_compaction_bytes_limit; - self.rocksdb.lockcf.hard_pending_compaction_bytes_limit = self - .storage - .flow_control - .hard_pending_compaction_bytes_limit; self.rocksdb.raftcf.disable_write_stall = true; - self.rocksdb.raftcf.level0_slowdown_writes_trigger = - self.storage.flow_control.l0_files_threshold as i32; - self.rocksdb.raftcf.soft_pending_compaction_bytes_limit = self - .storage - .flow_control - .soft_pending_compaction_bytes_limit; - self.rocksdb.raftcf.hard_pending_compaction_bytes_limit = self - .storage - .flow_control - .hard_pending_compaction_bytes_limit; } + // Fill in values for unspecified write stall configurations. + macro_rules! fill_cf_opts { + ($cf_opts:expr, $cfg:expr) => { + if let Some(v) = &mut $cf_opts.level0_slowdown_writes_trigger { + if $cfg.enable && *v > $cfg.l0_files_threshold as i32 { + warn!( + "{}.level0-slowdown-writes-trigger is too large. Setting it to \ + storage.flow-control.l0-files-threshold ({})", + stringify!($cf_opts), $cfg.l0_files_threshold + ); + *v = $cfg.l0_files_threshold as i32; + } + } else { + $cf_opts.level0_slowdown_writes_trigger = + Some($cfg.l0_files_threshold as i32); + } + if let Some(v) = &mut $cf_opts.level0_stop_writes_trigger { + if $cfg.enable && *v > $cfg.l0_files_threshold as i32 { + warn!( + "{}.level0-stop-writes-trigger is too large. Setting it to \ + storage.flow-control.l0-files-threshold ({})", + stringify!($cf_opts), $cfg.l0_files_threshold + ); + *v = $cfg.l0_files_threshold as i32; + } + } else { + $cf_opts.level0_stop_writes_trigger = + Some($cfg.l0_files_threshold as i32); + } + if let Some(v) = &mut $cf_opts.soft_pending_compaction_bytes_limit { + if $cfg.enable && v.0 > $cfg.soft_pending_compaction_bytes_limit.0 { + warn!( + "{}.soft-pending-compaction-bytes-limit is too large. Setting it to \ + storage.flow-control.soft-pending-compaction-bytes-limit ({})", + stringify!($cf_opts), $cfg.soft_pending_compaction_bytes_limit.0 + ); + *v = $cfg.soft_pending_compaction_bytes_limit; + } + } else { + $cf_opts.soft_pending_compaction_bytes_limit = + Some($cfg.soft_pending_compaction_bytes_limit); + } + if let Some(v) = &mut $cf_opts.hard_pending_compaction_bytes_limit { + if $cfg.enable && v.0 > $cfg.hard_pending_compaction_bytes_limit.0 { + warn!( + "{}.hard-pending-compaction-bytes-limit is too large. Setting it to \ + storage.flow-control.hard-pending-compaction-bytes-limit ({})", + stringify!($cf_opts), $cfg.hard_pending_compaction_bytes_limit.0 + ); + *v = $cfg.hard_pending_compaction_bytes_limit; + } + } else { + $cf_opts.hard_pending_compaction_bytes_limit = + Some($cfg.hard_pending_compaction_bytes_limit); + } + }; + } + let flow_control_cfg = if self.storage.flow_control.enable { + self.storage.flow_control.clone() + } else { + crate::storage::config::FlowControlConfig { + enable: false, + ..Default::default() + } + }; + fill_cf_opts!(self.raftdb.defaultcf, flow_control_cfg); + fill_cf_opts!(self.rocksdb.defaultcf, flow_control_cfg); + fill_cf_opts!(self.rocksdb.writecf, flow_control_cfg); + fill_cf_opts!(self.rocksdb.lockcf, flow_control_cfg); + fill_cf_opts!(self.rocksdb.raftcf, flow_control_cfg); if let Some(memory_usage_limit) = self.memory_usage_limit { let total = SysQuota::memory_limit_in_bytes(); @@ -5007,6 +5054,26 @@ mod tests { cfg.memory_usage_limit = None; cfg.raft_engine.mut_config().memory_limit = None; cfg.coprocessor_v2.coprocessor_plugin_directory = None; // Default is `None`, which is represented by not setting the key. + cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.defaultcf.level0_stop_writes_trigger = None; + cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.defaultcf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.writecf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.writecf.level0_stop_writes_trigger = None; + cfg.rocksdb.writecf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.writecf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.lockcf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.lockcf.level0_stop_writes_trigger = None; + cfg.rocksdb.lockcf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.lockcf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.raftcf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.raftcf.level0_stop_writes_trigger = None; + cfg.rocksdb.raftcf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.raftcf.hard_pending_compaction_bytes_limit = None; + cfg.raftdb.defaultcf.level0_slowdown_writes_trigger = None; + cfg.raftdb.defaultcf.level0_stop_writes_trigger = None; + cfg.raftdb.defaultcf.soft_pending_compaction_bytes_limit = None; + cfg.raftdb.defaultcf.hard_pending_compaction_bytes_limit = None; assert_eq!(cfg, default_cfg); } @@ -5224,4 +5291,90 @@ mod tests { assert_eq!(serde_to_online_config(name.into()).as_str(), res); } } + + #[test] + fn test_flow_control_override() { + let content = r#" + [storage.flow-control] + enable = true + l0-files-threshold = 77 + soft-pending-compaction-bytes-limit = "777GB" + "#; + let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!( + cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, + Some(77) + ); + assert_eq!( + cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit, + Some(ReadableSize::gb(777)) + ); + + // Override with default values if flow control is disabled. + let content = r#" + [storage.flow-control] + enable = false + l0-files-threshold = 77 + soft-pending-compaction-bytes-limit = "777GB" + [rocksdb.defaultcf] + level0-slowdown-writes-trigger = 888 + soft-pending-compaction-bytes-limit = "888GB" + [rocksdb.writecf] + "#; + let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!( + cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, + Some(888) + ); + assert_eq!( + cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit, + Some(ReadableSize::gb(888)) + ); + matches!(cfg.rocksdb.writecf.level0_slowdown_writes_trigger, Some(v) if v != 77); + matches!(cfg.rocksdb.writecf.soft_pending_compaction_bytes_limit, Some(v) if v != ReadableSize::gb(777)); + + // Do not override when RocksDB configurations are specified. + let content = r#" + [storage.flow-control] + enable = true + l0-files-threshold = 77 + soft-pending-compaction-bytes-limit = "777GB" + [rocksdb.defaultcf] + level0-slowdown-writes-trigger = 66 + soft-pending-compaction-bytes-limit = "666GB" + "#; + let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!( + cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, + Some(66) + ); + assert_eq!( + cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit, + Some(ReadableSize::gb(666)) + ); + + // Cannot specify larger configurations for RocksDB. + let content = r#" + [storage.flow-control] + enable = true + l0-files-threshold = 1 + soft-pending-compaction-bytes-limit = "1GB" + [rocksdb.defaultcf] + level0-slowdown-writes-trigger = 88 + soft-pending-compaction-bytes-limit = "888GB" + "#; + let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!( + cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, + Some(1) + ); + assert_eq!( + cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit, + Some(ReadableSize::gb(1)) + ); + } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index cbd695191d8..3bd932262e5 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -324,8 +324,8 @@ fn test_serde_custom_tikv_config() { max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), level0_file_num_compaction_trigger: 123, - level0_slowdown_writes_trigger: 123, - level0_stop_writes_trigger: 123, + level0_slowdown_writes_trigger: Some(123), + level0_stop_writes_trigger: Some(123), max_compaction_bytes: ReadableSize::gb(1), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -334,8 +334,8 @@ fn test_serde_custom_tikv_config() { compaction_style: DBCompactionStyle::Universal, disable_auto_compactions: true, disable_write_stall: true, - soft_pending_compaction_bytes_limit: ReadableSize::gb(12), - hard_pending_compaction_bytes_limit: ReadableSize::gb(12), + soft_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), + hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: titan_cf_config.clone(), prop_size_index_distance: 4000000, @@ -375,8 +375,8 @@ fn test_serde_custom_tikv_config() { max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), level0_file_num_compaction_trigger: 123, - level0_slowdown_writes_trigger: 123, - level0_stop_writes_trigger: 123, + level0_slowdown_writes_trigger: Some(123), + level0_stop_writes_trigger: Some(123), max_compaction_bytes: ReadableSize::gb(1), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -385,8 +385,8 @@ fn test_serde_custom_tikv_config() { compaction_style: DBCompactionStyle::Universal, disable_auto_compactions: true, disable_write_stall: true, - soft_pending_compaction_bytes_limit: ReadableSize::gb(12), - hard_pending_compaction_bytes_limit: ReadableSize::gb(12), + soft_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), + hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value @@ -440,8 +440,8 @@ fn test_serde_custom_tikv_config() { max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), level0_file_num_compaction_trigger: 123, - level0_slowdown_writes_trigger: 123, - level0_stop_writes_trigger: 123, + level0_slowdown_writes_trigger: Some(123), + level0_stop_writes_trigger: Some(123), max_compaction_bytes: ReadableSize::gb(1), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -450,8 +450,8 @@ fn test_serde_custom_tikv_config() { compaction_style: DBCompactionStyle::Universal, disable_auto_compactions: true, disable_write_stall: true, - soft_pending_compaction_bytes_limit: ReadableSize::gb(12), - hard_pending_compaction_bytes_limit: ReadableSize::gb(12), + soft_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), + hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value @@ -505,8 +505,8 @@ fn test_serde_custom_tikv_config() { max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), level0_file_num_compaction_trigger: 123, - level0_slowdown_writes_trigger: 123, - level0_stop_writes_trigger: 123, + level0_slowdown_writes_trigger: Some(123), + level0_stop_writes_trigger: Some(123), max_compaction_bytes: ReadableSize::gb(1), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -515,8 +515,8 @@ fn test_serde_custom_tikv_config() { compaction_style: DBCompactionStyle::Universal, disable_auto_compactions: true, disable_write_stall: true, - soft_pending_compaction_bytes_limit: ReadableSize::gb(12), - hard_pending_compaction_bytes_limit: ReadableSize::gb(12), + soft_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), + hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value @@ -599,8 +599,8 @@ fn test_serde_custom_tikv_config() { max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), level0_file_num_compaction_trigger: 123, - level0_slowdown_writes_trigger: 123, - level0_stop_writes_trigger: 123, + level0_slowdown_writes_trigger: Some(123), + level0_stop_writes_trigger: Some(123), max_compaction_bytes: ReadableSize::gb(1), compaction_pri: CompactionPriority::MinOverlappingRatio, dynamic_level_bytes: true, @@ -609,8 +609,8 @@ fn test_serde_custom_tikv_config() { compaction_style: DBCompactionStyle::Universal, disable_auto_compactions: true, disable_write_stall: true, - soft_pending_compaction_bytes_limit: ReadableSize::gb(12), - hard_pending_compaction_bytes_limit: ReadableSize::gb(12), + soft_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), + hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: titan_cf_config, prop_size_index_distance: 4000000, From da16e5eca3e44f8a987acdd6aad31acad3f5c05e Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Thu, 2 Jun 2022 14:18:28 +0800 Subject: [PATCH 0011/1149] copr: fix a wrong check in time parsing (#12740) ref tikv/tikv#12739, close tikv/tikv#12739 See #12739 Signed-off-by: gengliqi --- .../src/codec/mysql/time/mod.rs | 58 ++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 5d3222d0f3b..29b66725e2a 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -489,7 +489,7 @@ mod parser { } // the following statement checks fsp ((components.len() != 7 && components.len() != 2) - || input.as_bytes()[input.len() - components.last().unwrap().len() - 1] == b'.') + || (separators.len() >= components.len() - 1 /* should always true */ && separators[components.len() - 2] == b".")) .as_option()?; Some((components, if has_tz { Some(tz_offset) } else { None })) @@ -2237,6 +2237,25 @@ mod tests { ("2020-12-23 07:59:23", "2020-12-23 15:59:23+0800", 0, false), ("2020-12-23 23:59:23", "2020-12-23 15:59:23-08", 0, false), ("2020-12-23 07:59:23", "2020-12-23 15:59:23+08:00", 0, false), + ("2022-06-02 11:59:30", "2022-06-02 11:59:30.123Z", 0, false), + ( + "2022-06-02 03:59:30", + "2022-06-02 11:59:30.123+0800", + 0, + false, + ), + ( + "2022-06-02 19:59:30", + "2022-06-02 11:59:30.123-08", + 0, + false, + ), + ( + "2022-06-02 03:29:30", + "2022-06-02 11:59:30.123+08:30", + 0, + false, + ), ]; for (expected, actual, fsp, round) in cases { assert_eq!( @@ -2400,6 +2419,43 @@ mod tests { r: Some("2020-10-10 10:10:10.000000"), tp: TimeType::Timestamp, }, + Case { + tz: "+08:00", + t: "2022-06-02T10:10:10Z", + r: Some("2022-06-02 18:10:10.000000"), + tp: TimeType::DateTime, + }, + Case { + tz: "-08:00", + t: "2022-06-02T10:10:10Z", + r: Some("2022-06-02 02:10:10.000000"), + tp: TimeType::DateTime, + }, + Case { + tz: "+06:30", + t: "2022-06-02T10:10:10-05:00", + r: Some("2022-06-02 21:40:10.000000"), + tp: TimeType::DateTime, + }, + // Time with fraction + Case { + tz: "+08:00", + t: "2022-06-02T10:10:10.123Z", + r: Some("2022-06-02 18:10:10.123000"), + tp: TimeType::DateTime, + }, + Case { + tz: "-08:00", + t: "2022-06-02T10:10:10.123Z", + r: Some("2022-06-02 02:10:10.123000"), + tp: TimeType::DateTime, + }, + Case { + tz: "+06:30", + t: "2022-06-02T10:10:10.654321-05:00", + r: Some("2022-06-02 21:40:10.654321"), + tp: TimeType::DateTime, + }, ]; let mut result: Vec> = vec![]; for Case { tz, t, r: _, tp } in &cases { From 033d62d7f7b65d1edcd6da8cd70acee7041eefaa Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 6 Jun 2022 13:16:29 +0800 Subject: [PATCH 0012/1149] log details for PessimisitcLockNotFound in check_for_newer_version (#12713) ref tikv/tikv#11612 Signed-off-by: ekexium Co-authored-by: Ti Chi Robot --- src/storage/txn/actions/prewrite.rs | 8 ++++++++ src/storage/txn/commands/prewrite.rs | 6 +++--- src/storage/txn/scheduler.rs | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index de5270a6b10..a96c5eabc8d 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -352,8 +352,16 @@ impl<'a> PrewriteMutation<'a> { self.write_conflict_error(&write, commit_ts)?; } } + // Note: PessimisticLockNotFound can happen on a non-pessimistically locked key, + // if it is a retrying prewrite request. TransactionKind::Pessimistic(for_update_ts) => { if commit_ts > for_update_ts { + warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; + "key" => %self.key, + "start_ts" => self.txn_props.start_ts, + "for_update_ts" => for_update_ts, + "conflicting start_ts" => write.start_ts, + "conflicting commit_ts" => commit_ts); return Err(ErrorInner::PessimisticLockNotFound { start_ts: self.txn_props.start_ts, key: self.key.clone().into_raw()?, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index bb64c7641b8..4c2caec12b2 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -462,7 +462,7 @@ impl Prewriter { let mut final_min_commit_ts = TimeStamp::zero(); let mut locks = Vec::new(); - // Further check whether the prewrited transaction has been committed + // Further check whether the prewritten transaction has been committed // when encountering a WriteConflict or PessimisticLockNotFound error. // This extra check manages to make prewrite idempotent after the transaction // was committed. @@ -479,7 +479,7 @@ impl Prewriter { TxnCommitRecord::SingleRecord { commit_ts, write } if write.write_type != WriteType::Rollback => { - info!("prewrited transaction has been committed"; + info!("prewritten transaction has been committed"; "start_ts" => reader.start_ts, "commit_ts" => commit_ts, "key" => ?key, "write_type" => ?write.write_type); txn.clear(); @@ -943,7 +943,7 @@ mod tests { None, ) .unwrap(); - // All keys are prewrited successful with only one seek operations. + // All keys are prewritten successful with only one seek operations. assert_eq!(1, statistic.write.seek); let keys: Vec = mutations.iter().map(|m| m.key().clone()).collect(); commit(&engine, &mut statistic, keys.clone(), 104, 105).unwrap(); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 60972dcfaec..3460a1de5fd 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -820,7 +820,7 @@ impl Scheduler { // error to the callback, and releases the latches. Err(err) => { SCHED_STAGE_COUNTER_VEC.get(tag).prepare_write_err.inc(); - debug!("write command failed at prewrite"; "cid" => cid, "err" => ?err); + debug!("write command failed"; "cid" => cid, "err" => ?err); scheduler.finish_with_err(cid, err); return; } From ba391ff506c8b7f4b0cd7c9ef0b9f04ce87c3d7e Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Mon, 6 Jun 2022 15:32:29 +0800 Subject: [PATCH 0013/1149] storage: measure the read duration for scheduler commands (#12716) ref tikv/tikv#12362 This commit measures the read duration spent on reading (e.g. write conflict checks). This fixes the missing part of scheduler commands and may help diagnosis when scheduler reading takes a long time. This commit also changes some now_coarse to now. now_coarse has a precision of 10ms on many systems, so it may not meet our precision requirement. Instant::now is fast enough to be called in these cases. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 143 +++++++++++++++++++++++++++++- src/storage/mod.rs | 56 ++++++------ src/storage/txn/sched_pool.rs | 16 ---- src/storage/txn/scheduler.rs | 21 +++-- 4 files changed, 182 insertions(+), 54 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 050a6727622..6192b4f3a5e 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -19400,6 +19400,145 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The time consumed on reading when executing commit command", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, + "hiddenSeries": false, + "id": 23763572710, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "metric": "", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%", + "metric": "", + "refId": "B", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_scheduler_processing_read_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_processing_read_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg", + "metric": "", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scheduler command read duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:95", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:96", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "cards": { "cardPadding": null, @@ -19422,8 +19561,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 60 + "x": 12, + "y": 63 }, "heatmap": {}, "hideZeroBuckets": true, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 802c35af020..9e778afe064 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -547,7 +547,7 @@ impl Storage { key: Key, start_ts: TimeStamp, ) -> impl Future, KvGetStatistics)>> { - let stage_begin_ts = Instant::now_coarse(); + let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); let priority_tag = get_priority_tag(priority); @@ -563,7 +563,7 @@ impl Storage { let res = self.read_pool.spawn_handle( async move { - let stage_scheduled_ts = Instant::now_coarse(); + let stage_scheduled_ts = Instant::now(); tls_collect_query( ctx.get_region_id(), ctx.get_peer(), @@ -580,7 +580,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [key.as_encoded()])?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); // The bypass_locks and access_locks set will be checked at most once. // `TsSet::vec` is more efficient here. @@ -598,7 +598,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; let buckets = snapshot.ext().get_buckets(); let mut statistics = Statistics::default(); @@ -656,7 +656,7 @@ impl Storage { .inc_by(quota_delay.as_micros() as u64); } - let stage_finished_ts = Instant::now_coarse(); + let stage_finished_ts = Instant::now(); let schedule_wait_time = stage_scheduled_ts.saturating_duration_since(stage_begin_ts); let snapshot_wait_time = @@ -724,7 +724,7 @@ impl Storage { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC .get(CMD) .observe(requests.len() as f64); - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let read_id = Some(ThreadReadId::new()); let mut statistics = Statistics::default(); let mut req_snaps = vec![]; @@ -871,7 +871,7 @@ impl Storage { keys: Vec, start_ts: TimeStamp, ) -> impl Future>, KvGetStatistics)>> { - let stage_begin_ts = Instant::now_coarse(); + let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); let priority_tag = get_priority_tag(priority); @@ -888,7 +888,7 @@ impl Storage { let mut sample = quota_limiter.new_sample(); let res = self.read_pool.spawn_handle( async move { - let stage_scheduled_ts = Instant::now_coarse(); + let stage_scheduled_ts = Instant::now(); let mut key_ranges = vec![]; for key in &keys { key_ranges.push(build_key_range(key.as_encoded(), key.as_encoded(), false)); @@ -912,7 +912,7 @@ impl Storage { keys.iter().map(Key::as_encoded), )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -928,7 +928,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; let mut statistics = Vec::with_capacity(keys.len()); @@ -999,7 +999,7 @@ impl Storage { .inc_by(quota_delay.as_micros() as u64); } - let stage_finished_ts = Instant::now_coarse(); + let stage_finished_ts = Instant::now(); let schedule_wait_time = stage_scheduled_ts.saturating_duration_since(stage_begin_ts); let snapshot_wait_time = @@ -1102,7 +1102,7 @@ impl Storage { if reverse_scan { std::mem::swap(&mut start_key, &mut end_key); } - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -1155,7 +1155,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let perf_statistics = ReadPerfInstant::new(); let buckets = snapshot.ext().get_buckets(); @@ -1266,7 +1266,7 @@ impl Storage { // Do not check_api_version in scan_lock, to be compatible with TiDB gc-worker, // which resolves locks on regions, and boundary of regions will be out of range of TiDB keys. - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); concurrency_manager.update_max_ts(max_ts); let begin_instant = Instant::now(); @@ -1305,7 +1305,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut statistics = Statistics::default(); let perf_statistics = ReadPerfInstant::new(); let buckets = snapshot.ext().get_buckets(); @@ -1481,7 +1481,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1492,7 +1492,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf(&cf, api_version)?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut stats = Statistics::default(); let key = F::encode_raw_key_owned(key, None); // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. @@ -1577,7 +1577,7 @@ impl Storage { .map_err(Error::from)?; } - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let read_id = Some(ThreadReadId::new()); let mut snaps = vec![]; for (mut req, id) in gets.into_iter().zip(ids) { @@ -1604,7 +1604,7 @@ impl Storage { snaps.push((id, key, ctx, req, snap)); } Self::with_tls_engine(|engine| engine.release_snapshot()); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); for (id, key, ctx, mut req, snap) in snaps { let cf = req.take_cf(); match snap.await { @@ -1684,7 +1684,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, &keys)?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1694,7 +1694,7 @@ impl Storage { let buckets = snapshot.ext().get_buckets(); let store = RawStore::new(snapshot, api_version); { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let cf = Self::rawkv_cf(&cf, api_version)?; // no scan_count for this kind of op. @@ -2020,7 +2020,7 @@ impl Storage { [(Some(&start_key), end_key.as_ref())], )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2031,7 +2031,7 @@ impl Storage { let cf = Self::rawkv_cf(&cf, api_version)?; { let store = RawStore::new(snapshot, api_version); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let start_key = F::encode_raw_key_owned(start_key, None); let end_key = end_key.map(|k| F::encode_raw_key_owned(k, None)); @@ -2155,7 +2155,7 @@ impl Storage { .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2297,7 +2297,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2308,7 +2308,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf(&cf, api_version)?; { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut stats = Statistics::default(); let key = F::encode_raw_key_owned(key, None); // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. @@ -2462,7 +2462,7 @@ impl Storage { .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; - let command_duration = tikv_util::time::Instant::now_coarse(); + let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2473,7 +2473,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf("", api_version)?; - let begin_instant = tikv_util::time::Instant::now_coarse(); + let begin_instant = tikv_util::time::Instant::now(); let mut stats = Vec::with_capacity(ranges.len()); let ret = store .raw_checksum_ranges(cf, &ranges, &mut stats) diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index d83d8fe6f46..12ff44bbd61 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -13,7 +13,6 @@ use prometheus::local::*; use raftstore::store::WriteStats; use tikv_util::{ sys::SysQuota, - time::Duration, yatp_pool::{FuturePool, PoolTicker, YatpPoolBuilder}, }; @@ -24,8 +23,6 @@ use crate::storage::{ pub struct SchedLocalMetrics { local_scan_details: HashMap<&'static str, Statistics>, - processing_read_duration: LocalHistogramVec, - processing_write_duration: LocalHistogramVec, command_keyread_histogram_vec: LocalHistogramVec, local_write_stats: WriteStats, } @@ -34,8 +31,6 @@ thread_local! { static TLS_SCHED_METRICS: RefCell = RefCell::new( SchedLocalMetrics { local_scan_details: HashMap::default(), - processing_read_duration: SCHED_PROCESSING_READ_HISTOGRAM_VEC.local(), - processing_write_duration: SCHED_PROCESSING_WRITE_HISTOGRAM_VEC.local(), command_keyread_histogram_vec: KV_COMMAND_KEYREAD_HISTOGRAM_VEC.local(), local_write_stats:WriteStats::default(), } @@ -112,8 +107,6 @@ pub fn tls_flush(reporter: &R) { } } } - m.processing_read_duration.flush(); - m.processing_write_duration.flush(); m.command_keyread_histogram_vec.flush(); // Report PD metrics @@ -132,15 +125,6 @@ pub fn tls_collect_query(region_id: u64, kind: QueryKind) { }); } -pub fn tls_collect_read_duration(cmd: &str, duration: Duration) { - TLS_SCHED_METRICS.with(|m| { - m.borrow_mut() - .processing_read_duration - .with_label_values(&[cmd]) - .observe(tikv_util::time::duration_to_sec(duration)) - }); -} - pub fn tls_collect_keyread_histogram_vec(cmd: &str, count: f64) { TLS_SCHED_METRICS.with(|m| { m.borrow_mut() diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 3460a1de5fd..283787e9ba1 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -64,9 +64,7 @@ use crate::{ commands::{Command, ResponsePolicy, WriteContext, WriteResult, WriteResultLockInfo}, flow_controller::FlowController, latch::{Latches, Lock}, - sched_pool::{ - tls_collect_query, tls_collect_read_duration, tls_collect_scan_details, SchedPool, - }, + sched_pool::{tls_collect_query, tls_collect_scan_details, SchedPool}, Error, ProcessResult, }, types::StorageCallback, @@ -733,8 +731,6 @@ impl Scheduler { tag, ts ); - - tls_collect_read_duration(tag.get_str(), elapsed); } .in_resource_metering_tag(resource_tag) .await; @@ -748,10 +744,14 @@ impl Scheduler { let tag = task.cmd.tag(); + let begin_instant = Instant::now(); let pr = task .cmd .process_read(snapshot, statistics) .unwrap_or_else(|e| ProcessResult::Failed { err: e.into() }); + SCHED_PROCESSING_READ_HISTOGRAM_STATIC + .get(tag) + .observe(begin_instant.saturating_elapsed_secs()); self.on_read_finished(task.cid, pr, tag); } @@ -782,10 +782,15 @@ impl Scheduler { statistics, async_apply_prewrite: self.inner.enable_async_apply_prewrite, }; - - task.cmd + let begin_instant = Instant::now(); + let res = task + .cmd .process_write(snapshot, context) - .map_err(StorageError::from) + .map_err(StorageError::from); + SCHED_PROCESSING_READ_HISTOGRAM_STATIC + .get(tag) + .observe(begin_instant.saturating_elapsed_secs()); + res }; if write_result.is_ok() { From 62545b0c5c854b4e42bf37d03dddfab2099ce20c Mon Sep 17 00:00:00 2001 From: haojinming Date: Tue, 7 Jun 2022 14:34:30 +0800 Subject: [PATCH 0014/1149] Reserve key space id encoding in backup convert (#12759) close tikv/tikv#12758 Signed-off-by: haojinming --- components/api_version/src/api_v1.rs | 24 ++++++----------- components/api_version/src/api_v1ttl.rs | 31 +++++++++------------ components/api_version/src/api_v2.rs | 27 ++++++++++++------- components/api_version/src/lib.rs | 36 ++++++++----------------- components/backup/src/endpoint.rs | 32 ++++++++++++++++------ components/backup/src/utils.rs | 14 +++++----- tests/integrations/backup/mod.rs | 16 ++++++++--- 7 files changed, 94 insertions(+), 86 deletions(-) diff --git a/components/api_version/src/api_v1.rs b/components/api_version/src/api_v1.rs index 9267d1397c7..5b980ea75f1 100644 --- a/components/api_version/src/api_v1.rs +++ b/components/api_version/src/api_v1.rs @@ -1,5 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. +use tikv_util::box_err; + use super::*; impl KvFormat for ApiV1 { @@ -43,28 +45,18 @@ impl KvFormat for ApiV1 { ) -> Result { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => Ok(Key::from_encoded_slice(key)), - ApiVersion::V2 => { - debug_assert_eq!(ApiV2::parse_key_mode(key), KeyMode::Raw); - let (mut user_key, _) = ApiV2::decode_raw_key(&Key::from_encoded_slice(key), true)?; - user_key.remove(0); // remove first byte `RAW_KEY_PREFIX` - Ok(Self::encode_raw_key_owned(user_key, None)) - } + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1")), // reject apiv2 -> apiv1 conversion } } fn convert_raw_user_key_range_version_from( src_api: ApiVersion, - mut start_key: Vec, - mut end_key: Vec, - ) -> (Vec, Vec) { + start_key: Vec, + end_key: Vec, + ) -> Result<(Vec, Vec)> { match src_api { - ApiVersion::V1 | ApiVersion::V1ttl => (start_key, end_key), - ApiVersion::V2 => { - // TODO: check raw key range after check_api_version_range is refactored. - start_key.remove(0); - end_key.remove(0); - (start_key, end_key) - } + ApiVersion::V1 | ApiVersion::V1ttl => Ok((start_key, end_key)), + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1")), // reject apiv2 -> apiv1 conversion } } } diff --git a/components/api_version/src/api_v1ttl.rs b/components/api_version/src/api_v1ttl.rs index ce42a023273..65c7f569aa6 100644 --- a/components/api_version/src/api_v1ttl.rs +++ b/components/api_version/src/api_v1ttl.rs @@ -1,9 +1,12 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::Result; -use tikv_util::codec::{ - number::{self, NumberEncoder}, - Error, +use tikv_util::{ + box_err, + codec::{ + number::{self, NumberEncoder}, + Error, + }, }; use super::*; @@ -67,28 +70,18 @@ impl KvFormat for ApiV1Ttl { ) -> Result { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => Ok(Key::from_encoded_slice(key)), - ApiVersion::V2 => { - debug_assert_eq!(ApiV2::parse_key_mode(key), KeyMode::Raw); - let (mut user_key, _) = ApiV2::decode_raw_key(&Key::from_encoded_slice(key), true)?; - user_key.remove(0); // remove first byte `RAW_KEY_PREFIX` - Ok(Self::encode_raw_key_owned(user_key, None)) - } + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1ttl")), // reject apiv2 -> apiv1ttl conversion } } fn convert_raw_user_key_range_version_from( src_api: ApiVersion, - mut start_key: Vec, - mut end_key: Vec, - ) -> (Vec, Vec) { + start_key: Vec, + end_key: Vec, + ) -> Result<(Vec, Vec)> { match src_api { - ApiVersion::V1 | ApiVersion::V1ttl => (start_key, end_key), - ApiVersion::V2 => { - // TODO: check raw key range after check_api_version_range is refactored. - start_key.remove(0); - end_key.remove(0); - (start_key, end_key) - } + ApiVersion::V1 | ApiVersion::V1ttl => Ok((start_key, end_key)), + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1ttl")), // reject apiv2 -> apiv1ttl conversion } } } diff --git a/components/api_version/src/api_v2.rs b/components/api_version/src/api_v2.rs index d12926cb39b..a8a177596ad 100644 --- a/components/api_version/src/api_v2.rs +++ b/components/api_version/src/api_v2.rs @@ -16,6 +16,8 @@ pub const RAW_KEY_PREFIX_END: u8 = RAW_KEY_PREFIX + 1; pub const TXN_KEY_PREFIX: u8 = b'x'; pub const TIDB_META_KEY_PREFIX: u8 = b'm'; pub const TIDB_TABLE_KEY_PREFIX: u8 = b't'; +pub const DEFAULT_KEY_SPACE_ID: [u8; 3] = [0, 0, 0]; // reserve 3 bytes for key space id. +pub const DEFAULT_KEY_SPACE_ID_END: [u8; 3] = [0, 0, 1]; pub const TIDB_RANGES: &[(&[u8], &[u8])] = &[ (&[TIDB_META_KEY_PREFIX], &[TIDB_META_KEY_PREFIX + 1]), @@ -182,9 +184,7 @@ impl KvFormat for ApiV2 { ) -> Result { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => { - let mut apiv2_key = Vec::with_capacity(ApiV2::get_encode_len(key.len() + 1)); - apiv2_key.push(RAW_KEY_PREFIX); - apiv2_key.extend(key); + let apiv2_key = ApiV2::add_prefix(key, &DEFAULT_KEY_SPACE_ID); Ok(Self::encode_raw_key_owned(apiv2_key, ts)) } ApiVersion::V2 => Ok(Key::from_encoded_slice(key)), @@ -195,18 +195,18 @@ impl KvFormat for ApiV2 { src_api: ApiVersion, mut start_key: Vec, mut end_key: Vec, - ) -> (Vec, Vec) { + ) -> Result<(Vec, Vec)> { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => { - start_key.insert(0, RAW_KEY_PREFIX); + start_key = ApiV2::add_prefix(&start_key, &DEFAULT_KEY_SPACE_ID); if end_key.is_empty() { - end_key.insert(0, RAW_KEY_PREFIX_END); + end_key = ApiV2::add_prefix(&end_key, &DEFAULT_KEY_SPACE_ID_END); } else { - end_key.insert(0, RAW_KEY_PREFIX); + end_key = ApiV2::add_prefix(&end_key, &DEFAULT_KEY_SPACE_ID); } - (start_key, end_key) + Ok((start_key, end_key)) } - ApiVersion::V2 => (start_key, end_key), + ApiVersion::V2 => Ok((start_key, end_key)), } } } @@ -235,6 +235,15 @@ impl ApiV2 { Ok(Key::split_on_ts_for(key)?) } + pub fn add_prefix(key: &[u8], key_space: &[u8]) -> Vec { + let mut apiv2_key = + Vec::with_capacity(ApiV2::get_encode_len(key.len() + key_space.len() + 1)); + apiv2_key.push(RAW_KEY_PREFIX); + apiv2_key.extend(key_space); // Reserved 3 bytes for key space id. + apiv2_key.extend(key); + apiv2_key + } + pub const ENCODED_LOGICAL_DELETE: [u8; 1] = [ValueMeta::DELETE_FLAG.bits]; } diff --git a/components/api_version/src/lib.rs b/components/api_version/src/lib.rs index b57b1dfae45..0dbdc833b86 100644 --- a/components/api_version/src/lib.rs +++ b/components/api_version/src/lib.rs @@ -80,7 +80,7 @@ pub trait KvFormat: Clone + Copy + 'static + Send + Sync { src_api: ApiVersion, start_key: Vec, end_key: Vec, - ) -> (Vec, Vec); + ) -> Result<(Vec, Vec)>; /// Convert the encoded value from src_api version to Self::TAG version fn convert_raw_encoded_value_version_from( @@ -633,8 +633,8 @@ mod tests { .clone() .into_iter() .map(|key| { - let mut v2_key = key; - v2_key.insert(0, RAW_KEY_PREFIX); + let mut v2_key = vec![RAW_KEY_PREFIX, 0, 0, 0]; + v2_key.extend(key); ApiV2::encode_raw_key_owned(v2_key, Some(TimeStamp::from(timestamp))).into_encoded() }) .collect(); @@ -642,8 +642,6 @@ mod tests { let test_cases = vec![ (ApiVersion::V1, ApiVersion::V2, &apiv1_keys, &apiv2_keys), (ApiVersion::V1ttl, ApiVersion::V2, &apiv1_keys, &apiv2_keys), - (ApiVersion::V2, ApiVersion::V1, &apiv2_keys, &apiv1_keys), - (ApiVersion::V2, ApiVersion::V1ttl, &apiv2_keys, &apiv1_keys), ]; for i in 0..apiv1_keys.len() { for (src_api_ver, dst_api_ver, src_data, dst_data) in test_cases.clone() { @@ -731,14 +729,14 @@ mod tests { .clone() .into_iter() .map(|(start_key, end_key)| { - let mut v2_start_key = start_key; - let mut v2_end_key = end_key; - v2_start_key.insert(0, RAW_KEY_PREFIX); - if v2_end_key.is_empty() { - v2_end_key.insert(0, RAW_KEY_PREFIX_END); + let mut v2_start_key = vec![RAW_KEY_PREFIX, 0, 0, 0]; // key space takes 3 bytes. + let mut v2_end_key = if end_key.is_empty() { + vec![RAW_KEY_PREFIX, 0, 0, 1] } else { - v2_end_key.insert(0, RAW_KEY_PREFIX); - } + vec![RAW_KEY_PREFIX, 0, 0, 0] // key space takes 3 bytes. + }; + v2_start_key.extend(start_key); + v2_end_key.extend(end_key); (v2_start_key, v2_end_key) }) .collect(); @@ -756,18 +754,6 @@ mod tests { &apiv1_key_ranges, &apiv2_key_ranges, ), - ( - ApiVersion::V2, - ApiVersion::V1, - &apiv2_key_ranges, - &apiv1_key_ranges, - ), - ( - ApiVersion::V2, - ApiVersion::V1ttl, - &apiv2_key_ranges, - &apiv1_key_ranges, - ), ]; for (src_api_ver, dst_api_ver, src_data, dst_data) in test_cases { for i in 0..apiv1_key_ranges.len() { @@ -775,7 +761,7 @@ mod tests { let (src_start, src_end) = src_data[i].clone(); API::convert_raw_user_key_range_version_from(src_api_ver, src_start, src_end) }); - assert_eq!(dst_key_range, dst_data[i]); + assert_eq!(dst_key_range.unwrap(), dst_data[i]); } } } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 3a737ba52d2..37e6855302a 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -206,20 +206,30 @@ async fn save_backup_file_worker( let files = if msg.files.need_flush_keys() { match msg.files.save(&storage).await { Ok(mut split_files) => { + let mut has_err = false; for file in split_files.iter_mut() { // In the case that backup from v1 and restore to v2, // the file range need be encoded as v2 format. // And range in response keep in v1 format. - let (start, end) = codec.convert_key_range_to_dst_version( + let ret = codec.convert_key_range_to_dst_version( msg.start_key.clone(), msg.end_key.clone(), ); + if ret.is_err() { + has_err = true; + break; + } + let (start, end) = ret.unwrap(); file.set_start_key(start); file.set_end_key(end); file.set_start_version(msg.start_version.into_inner()); file.set_end_version(msg.end_version.into_inner()); } - Ok(split_files) + if has_err { + Err(box_err!("backup convert key range failed")) + } else { + Ok(split_files) + } } Err(e) => { error_unknown!(?e; "backup save file failed"); @@ -1524,7 +1534,10 @@ pub mod tests { format!("k{:0>10}", idx) }; if api_ver == ApiVersion::V2 { - key.insert(0, RAW_KEY_PREFIX as char); + // [0, 0, 0] is the default key space id. + let mut apiv2_key = [RAW_KEY_PREFIX, 0, 0, 0].to_vec(); + apiv2_key.extend(key.as_bytes()); + key = String::from_utf8(apiv2_key).unwrap(); } key } @@ -1561,7 +1574,10 @@ pub mod tests { ) -> Key { if (cur_ver == ApiVersion::V1 || cur_ver == ApiVersion::V1ttl) && dst_ver == ApiVersion::V2 { - raw_key.insert(0, RAW_KEY_PREFIX as char); + // [0, 0, 0] is the default key space id. + let mut apiv2_key = [RAW_KEY_PREFIX, 0, 0, 0].to_vec(); + apiv2_key.extend(raw_key.as_bytes()); + raw_key = String::from_utf8(apiv2_key).unwrap(); } Key::from_encoded(raw_key.into_bytes()) } @@ -1610,22 +1626,22 @@ pub mod tests { stats.reset(); let mut req = BackupRequest::default(); let backup_start = if cur_api_ver == ApiVersion::V2 { - vec![RAW_KEY_PREFIX] + vec![RAW_KEY_PREFIX, 0, 0, 0] // key space id takes 3 bytes. } else { vec![] }; let backup_end = if cur_api_ver == ApiVersion::V2 { - vec![RAW_KEY_PREFIX + 1] + vec![RAW_KEY_PREFIX, 0, 0, 1] // [0, 0, 1] is the end of the file } else { vec![] }; let file_start = if dst_api_ver == ApiVersion::V2 { - vec![RAW_KEY_PREFIX] + vec![RAW_KEY_PREFIX, 0, 0, 0] // key space id takes 3 bytes. } else { vec![] }; let file_end = if dst_api_ver == ApiVersion::V2 { - vec![RAW_KEY_PREFIX + 1] + vec![RAW_KEY_PREFIX, 0, 0, 1] // [0, 0, 1] is the end of the file } else { vec![] }; diff --git a/components/backup/src/utils.rs b/components/backup/src/utils.rs index 4d01631817c..1ced24f4abc 100644 --- a/components/backup/src/utils.rs +++ b/components/backup/src/utils.rs @@ -240,12 +240,14 @@ impl KeyValueCodec { &self, start_key: Vec, end_key: Vec, - ) -> (Vec, Vec) { + ) -> Result<(Vec, Vec)> { if !self.is_raw_kv { - return (start_key, end_key); + return Ok((start_key, end_key)); } dispatch_api_version!(self.dst_api_ver, { - API::convert_raw_user_key_range_version_from(self.cur_api_ver, start_key, end_key) + let (start, end) = + API::convert_raw_user_key_range_version_from(self.cur_api_ver, start_key, end_key)?; + Ok((start, end)) }) } } @@ -500,14 +502,14 @@ pub mod tests { ( ApiVersion::V1, ApiVersion::V2, - b"abc".to_vec(), - ApiV2::encode_raw_key_owned(b"rabc".to_vec(), ts), + [61, 62, 63].to_vec(), + ApiV2::encode_raw_key_owned([114, 0, 0, 0, 61, 62, 63].to_vec(), ts), ), ( ApiVersion::V1ttl, ApiVersion::V2, b"".to_vec(), - ApiV2::encode_raw_key_owned(b"r".to_vec(), ts), + ApiV2::encode_raw_key_owned([114, 0, 0, 0].to_vec(), ts), ), ]; diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 1752c529cb0..6d171bcae28 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -354,7 +354,9 @@ fn test_backup_rawkv_cross_version_impl(cur_api_ver: ApiVersion, dst_api_ver: Ap let key = { let mut key = k.into_bytes(); if cur_api_ver != ApiVersion::V2 && dst_api_ver == ApiVersion::V2 { - key.insert(0, b'r') + let mut apiv2_key = [b'r', 0, 0, 0].to_vec(); + apiv2_key.extend(key); + key = apiv2_key; } key }; @@ -364,9 +366,17 @@ fn test_backup_rawkv_cross_version_impl(cur_api_ver: ApiVersion, dst_api_ver: Ap // Backup file should have same contents. // Set non-empty range to check if it's incorrectly encoded. + let (backup_start, backup_end) = if cur_api_ver != dst_api_ver { + ( + vec![b'r', 0, 0, 0, b'r', b'a'], + vec![b'r', 0, 0, 0, b'r', b'z'], + ) + } else { + (vec![b'r', b'a'], vec![b'r', b'z']) + }; let rx = target_suite.backup_raw( - vec![b'r', b'a'], // start - vec![b'r', b'z'], // end + backup_start, // start + backup_end, // end cf, &make_unique_dir(tmp.path()), dst_api_ver, From dee0e1eaac70f9f003755bbef443a5d36b59d5ff Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Tue, 7 Jun 2022 18:16:30 +0800 Subject: [PATCH 0015/1149] log-backup: fix bug about restoring point at TiCloud with KMS (#12757) close tikv/tikv#12750, close tikv/tikv#12751 Signed-off-by: joccau --- components/external_storage/src/lib.rs | 11 -- components/sst_importer/src/sst_importer.rs | 152 +++++++++++++++++++- 2 files changed, 146 insertions(+), 17 deletions(-) diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 477b0a39a64..0bad03cbcca 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -9,7 +9,6 @@ extern crate slog_global; extern crate tikv_alloc; use std::{ - fs, io::{self, Write}, marker::Unpin, sync::Arc, @@ -87,16 +86,6 @@ pub trait ExternalStorage: 'static + Send + Sync { file_crypter: Option, ) -> io::Result<()> { let reader = self.read(storage_name); - if let Some(p) = restore_name.parent() { - // try create all parent dirs from the path (optional). - fs::create_dir_all(p).or_else(|e| { - if e.kind() == io::ErrorKind::AlreadyExists { - Ok(()) - } else { - Err(e) - } - })?; - } let output: &mut dyn Write = &mut File::create(restore_name)?; // the minimum speed of reading data, in bytes/second. // if reading speed is slower than this rate, we will stop with diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index dc92c405480..d1ef399d6d0 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -4,7 +4,7 @@ use std::{ borrow::Cow, collections::HashMap, fs::File, - io::{prelude::*, BufReader}, + io::{self, prelude::*, BufReader}, ops::Bound, path::{Path, PathBuf}, sync::Arc, @@ -230,16 +230,26 @@ impl SstImporter { dst_file: std::path::PathBuf, backend: &StorageBackend, expect_sha256: Option>, + support_kms: bool, file_crypter: Option, speed_limiter: &Limiter, ) -> Result<()> { let start_read = Instant::now(); + if let Some(p) = dst_file.parent() { + file_system::create_dir_all(p).or_else(|e| { + if e.kind() == io::ErrorKind::AlreadyExists { + Ok(()) + } else { + Err(e) + } + })?; + } // prepare to download the file from the external_storage // TODO: pass a config to support hdfs let ext_storage = external_storage_export::create_storage(backend, Default::default())?; let url = ext_storage.url()?.to_string(); - let ext_storage: Box = + let ext_storage: Box = if support_kms { if let Some(key_manager) = &self.key_manager { Box::new(external_storage_export::EncryptedExternalStorage { key_manager: (*key_manager).clone(), @@ -247,7 +257,10 @@ impl SstImporter { }) as _ } else { ext_storage as _ - }; + } + } else { + ext_storage as _ + }; let result = ext_storage.restore( src_file_name, @@ -313,6 +326,10 @@ impl SstImporter { path.temp.clone(), backend, expected_sha256, + // kv-files needn't are decrypted with KMS when download currently because these files are not encrypted when log-backup. + // It is different from sst-files because sst-files is encrypted when saved with rocksdb env with KMS. + // to do: support KMS when log-backup and restore point. + false, // don't support encrypt for now. None, speed_limiter, @@ -321,7 +338,13 @@ impl SstImporter { if let Some(p) = path.save.parent() { // we have v1 prefix in file name. - file_system::create_dir_all(p)?; + file_system::create_dir_all(p).or_else(|e| { + if e.kind() == io::ErrorKind::AlreadyExists { + Ok(()) + } else { + Err(e) + } + })?; } file_system::rename(path.temp, path.save.clone())?; @@ -474,6 +497,7 @@ impl SstImporter { path.temp.clone(), backend, None, + true, file_crypter, speed_limiter, )?; @@ -761,7 +785,7 @@ fn is_after_end_bound>(value: &[u8], bound: &Bound) -> bool { #[cfg(test)] mod tests { - use std::io; + use std::io::{self, BufWriter}; use engine_traits::{ collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, @@ -772,7 +796,7 @@ mod tests { use tempfile::Builder; use test_sst_importer::*; use test_util::new_test_key_manager; - use tikv_util::stream::block_on_external_io; + use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io}; use txn_types::{Value, WriteType}; use uuid::Uuid; @@ -926,6 +950,15 @@ mod tests { } } + fn check_file_is_same(path_a: &Path, path_b: &Path) -> bool { + assert!(path_a.exists()); + assert!(path_b.exists()); + + let content_a = file_system::read(path_a).unwrap(); + let content_b = file_system::read(path_b).unwrap(); + content_a == content_b + } + fn new_key_manager_for_test() -> (tempfile::TempDir, Arc) { // test with tde let tmp_dir = tempfile::TempDir::new().unwrap(); @@ -981,6 +1014,41 @@ mod tests { }) } + fn create_sample_external_kv_file() -> Result<(tempfile::TempDir, StorageBackend, KvMeta)> { + let ext_dir = tempfile::tempdir()?; + let file_name = "v1/t000001/abc.log"; + let file_path = ext_dir.path().join(file_name); + std::fs::create_dir_all(file_path.parent().unwrap())?; + let file = File::create(file_path).unwrap(); + let mut buff = BufWriter::new(file); + + let kvs = vec![ + (b"t1_r01".to_vec(), b"tidb".to_vec()), + (b"t1_r02".to_vec(), b"tikv".to_vec()), + (b"t1_r03".to_vec(), b"pingcap".to_vec()), + ]; + + let mut sha256 = Hasher::new(MessageDigest::sha256()).unwrap(); + let mut len = 0; + for kv in kvs { + let encoded = EventEncoder::encode_event(&kv.0, &kv.1); + for slice in encoded { + len += buff.write(slice.as_ref()).unwrap(); + sha256.update(slice.as_ref()).unwrap(); + } + } + + let mut kv_meta = KvMeta::default(); + kv_meta.set_name(file_name.to_string()); + kv_meta.set_cf(String::from("default")); + kv_meta.set_is_delete(false); + kv_meta.set_length(len as _); + kv_meta.set_sha256(sha256.finish().unwrap().to_vec()); + + let backend = external_storage_export::make_local_backend(ext_dir.path()); + Ok((ext_dir, backend, kv_meta)) + } + fn create_sample_external_rawkv_sst_file( start_key: &[u8], end_key: &[u8], @@ -1156,6 +1224,78 @@ mod tests { assert_eq!(err.kind(), io::ErrorKind::TimedOut); } + #[test] + fn test_download_file_from_external_storage_for_sst() { + // creates a sample SST file. + let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); + + // create importer object. + let import_dir = tempfile::tempdir().unwrap(); + let (_, key_manager) = new_key_manager_for_test(); + let importer = SstImporter::new( + &Config::default(), + import_dir, + Some(key_manager.clone()), + ApiVersion::V1, + ) + .unwrap(); + + // perform download file into .temp dir. + let file_name = "sample.sst"; + let path = importer.dir.get_import_path(file_name).unwrap(); + importer + .download_file_from_external_storage( + meta.get_length(), + file_name, + path.temp.clone(), + &backend, + None, + true, + None, + &Limiter::new(f64::INFINITY), + ) + .unwrap(); + check_file_exists(&path.temp, Some(&key_manager)); + assert!(!check_file_is_same( + &_ext_sst_dir.path().join(file_name), + &path.temp, + )); + } + + #[test] + fn test_download_file_from_external_storage_for_kv() { + let (_temp_dir, backend, kv_meta) = create_sample_external_kv_file().unwrap(); + let (_, key_manager) = new_key_manager_for_test(); + + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::new( + &Config::default(), + import_dir, + Some(key_manager), + ApiVersion::V1, + ) + .unwrap(); + + let path = importer.dir.get_import_path(kv_meta.get_name()).unwrap(); + importer + .download_file_from_external_storage( + kv_meta.get_length(), + kv_meta.get_name(), + path.temp.clone(), + &backend, + Some(kv_meta.get_sha256().to_vec()), + false, + None, + &Limiter::new(f64::INFINITY), + ) + .unwrap(); + + assert!(check_file_is_same( + &_temp_dir.path().join(kv_meta.get_name()), + &path.temp, + )); + } + #[test] fn test_download_sst_no_key_rewrite() { // creates a sample SST file. From ae46f9b35d77409ad3ef946e842bce17d44571fe Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 7 Jun 2022 11:06:30 -0700 Subject: [PATCH 0016/1149] *: update jemalloc to 5.3.0 (#12661) close tikv/tikv#12660 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 18 +++++++++--------- components/tikv_alloc/Cargo.toml | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 080a1ccc35f..75458e3d917 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2745,7 +2745,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#de8310c3983a30236ea03f802ed0c2401a4908ae" +source = "git+https://github.com/tikv/rust-rocksdb.git#c1f668d0c85612f5fe6ec8e4351df0fc0bef1286" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2764,7 +2764,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#de8310c3983a30236ea03f802ed0c2401a4908ae" +source = "git+https://github.com/tikv/rust-rocksdb.git#c1f668d0c85612f5fe6ec8e4351df0fc0bef1286" dependencies = [ "bzip2-sys", "cc", @@ -4573,7 +4573,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#de8310c3983a30236ea03f802ed0c2401a4908ae" +source = "git+https://github.com/tikv/rust-rocksdb.git#c1f668d0c85612f5fe6ec8e4351df0fc0bef1286" dependencies = [ "libc 0.2.125", "librocksdb_sys", @@ -6144,9 +6144,9 @@ dependencies = [ [[package]] name = "tikv-jemalloc-ctl" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb833c46ecbf8b6daeccb347cefcabf9c1beb5c9b0f853e1cec45632d9963e69" +checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1" dependencies = [ "libc 0.2.125", "paste", @@ -6155,9 +6155,9 @@ dependencies = [ [[package]] name = "tikv-jemalloc-sys" -version = "0.4.3+5.2.1-patched.2" +version = "0.5.0+5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1792ccb507d955b46af42c123ea8863668fae24d03721e40cad6a41773dbb49" +checksum = "aeab4310214fe0226df8bfeb893a291a58b19682e8a07e1e1d4483ad4200d315" dependencies = [ "cc", "fs_extra", @@ -6166,9 +6166,9 @@ dependencies = [ [[package]] name = "tikv-jemallocator" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5b7bcecfafe4998587d636f9ae9d55eb9d0499877b88757767c346875067098" +checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979" dependencies = [ "libc 0.2.125", "tikv-jemalloc-sys", diff --git a/components/tikv_alloc/Cargo.toml b/components/tikv_alloc/Cargo.toml index 2ebbd4da1bc..086744cab8f 100644 --- a/components/tikv_alloc/Cargo.toml +++ b/components/tikv_alloc/Cargo.toml @@ -35,15 +35,15 @@ optional = true features = ["bundled"] [dependencies.tikv-jemalloc-ctl] -version = "0.4.0" +version = "0.5.0" optional = true [dependencies.tikv-jemalloc-sys] -version = "0.4.0" +version = "0.5.0" optional = true features = ["stats"] [dependencies.tikv-jemallocator] -version = "0.4.0" +version = "0.5.0" optional = true features = ["unprefixed_malloc_on_supported_platforms", "stats"] From ffdff6b87606c0b1087d7507c64de9dac48a3d36 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 8 Jun 2022 16:46:31 +0800 Subject: [PATCH 0017/1149] store: add test for verifying bucket version change in try_batch (#12777) close tikv/tikv#12578 As the issue shows, try_batch may use out of date bucket meta for further operation which has not been detected by any test. This PR adds a test for it. Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/apply.rs | 94 +++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index a7c534ff823..b74a49c4273 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -5394,6 +5394,100 @@ mod tests { } } + #[test] + fn test_bucket_version_change_in_try_batch() { + let (_path, engine) = create_tmp_engine("test-bucket"); + let (_, importer) = create_tmp_importer("test-bucket"); + let obs = ApplyObserver::default(); + let mut host = CoprocessorHost::::default(); + host.registry + .register_query_observer(1, BoxQueryObserver::new(obs)); + + let (tx, rx) = mpsc::channel(); + let (region_scheduler, _) = dummy_scheduler(); + let sender = Box::new(TestNotifier { tx }); + let cfg = { + let mut cfg = Config::default(); + cfg.apply_batch_system.pool_size = 1; + cfg.apply_batch_system.low_priority_pool_size = 0; + Arc::new(VersionTrack::new(cfg)) + }; + let (router, mut system) = create_apply_batch_system(&cfg.value()); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "test-store".to_owned(), + cfg, + sender, + region_scheduler, + coprocessor_host: host, + importer, + engine, + router: router.clone(), + store_id: 1, + pending_create_peers, + }; + system.spawn("test-bucket".to_owned(), builder); + + let mut reg = Registration { + id: 1, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.mut_peers().push(new_peer(1, 1)); + reg.region.set_start_key(b"k1".to_vec()); + reg.region.set_end_key(b"k2".to_vec()); + reg.region.mut_region_epoch().set_conf_ver(1); + reg.region.mut_region_epoch().set_version(3); + router.schedule_task(1, Msg::Registration(reg)); + + let entry1 = { + let mut entry = EntryBuilder::new(1, 1); + entry = entry.put(b"key1", b"value1"); + entry.epoch(1, 3).build() + }; + + let entry2 = { + let mut entry = EntryBuilder::new(2, 1); + entry = entry.put(b"key2", b"value2"); + entry.epoch(1, 3).build() + }; + + let (capture_tx, _capture_rx) = mpsc::channel(); + let mut apply1 = apply(1, 1, 1, vec![entry1], vec![cb(1, 1, capture_tx.clone())]); + let bucket_meta = BucketMeta { + region_id: 1, + region_epoch: RegionEpoch::default(), + version: 1, + keys: vec![b"".to_vec(), b"".to_vec()], + sizes: vec![0, 0], + }; + apply1.bucket_meta = Some(Arc::new(bucket_meta)); + + let mut apply2 = apply(1, 1, 1, vec![entry2], vec![cb(2, 1, capture_tx)]); + let mut bucket_meta2 = BucketMeta { + region_id: 1, + region_epoch: RegionEpoch::default(), + version: 2, + keys: vec![b"".to_vec(), b"".to_vec()], + sizes: vec![0, 0], + }; + bucket_meta2.version = 2; + apply2.bucket_meta = Some(Arc::new(bucket_meta2)); + + router.schedule_task(1, Msg::apply(apply1)); + router.schedule_task(1, Msg::apply(apply2)); + + let res = fetch_apply_res(&rx); + let bucket_version = res.bucket_stat.unwrap().as_ref().meta.version; + + assert_eq!(bucket_version, 2); + + validate(&router, 1, |delegate| { + let bucket_version = delegate.buckets.as_ref().unwrap().meta.version; + assert_eq!(bucket_version, 2); + }); + } + #[test] fn test_cmd_observer() { let (_path, engine) = create_tmp_engine("test-delegate"); From fd7b4ad2e6662ea6b199eb2355b11fbe9c201204 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Thu, 9 Jun 2022 14:18:30 +0800 Subject: [PATCH 0018/1149] config: output warn log when some components config invalid (#12767) close tikv/tikv#12771 Signed-off-by: 3pointer Co-authored-by: zhangjinpeng1987 Co-authored-by: Ti Chi Robot --- components/cdc/src/endpoint.rs | 10 ++-- components/sst_importer/src/config.rs | 15 ++++-- src/config.rs | 70 +++++++++++++++++++-------- 3 files changed, 68 insertions(+), 27 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 3adaa8aca65..9b1b663b207 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -442,12 +442,12 @@ impl, E: KvEngine> Endpoint { fn on_change_cfg(&mut self, change: ConfigChange) { // Validate first. let mut validate_cfg = self.config.clone(); - validate_cfg.update(change.clone()); + validate_cfg.update(change); if let Err(e) = validate_cfg.validate() { warn!("cdc config update failed"; "error" => ?e); return; } - + let change = self.config.diff(&validate_cfg); info!( "cdc config updated"; "current config" => ?self.config, @@ -1542,13 +1542,13 @@ mod tests { let mut updated_cfg = cfg.clone(); { // Update it to be smaller than incremental_scan_threads, - // which will be an invalid change and will be lost. + // which will be an invalid change and will modified to incremental_scan_threads. updated_cfg.incremental_scan_concurrency = 2; } let diff = cfg.diff(&updated_cfg); ep.run(Task::ChangeConfig(diff)); - assert_eq!(ep.config.incremental_scan_concurrency, 6); - assert_eq!(ep.scan_concurrency_semaphore.available_permits(), 6); + assert_eq!(ep.config.incremental_scan_concurrency, 4); + assert_eq!(ep.scan_concurrency_semaphore.available_permits(), 4); { // Correct update. diff --git a/components/sst_importer/src/config.rs b/components/sst_importer/src/config.rs index a25d34ea24b..ef74a40fd01 100644 --- a/components/sst_importer/src/config.rs +++ b/components/sst_importer/src/config.rs @@ -27,12 +27,21 @@ impl Default for Config { } impl Config { - pub fn validate(&self) -> Result<(), Box> { + pub fn validate(&mut self) -> Result<(), Box> { + let default_cfg = Config::default(); if self.num_threads == 0 { - return Err("import.num_threads can not be 0".into()); + warn!( + "import.num_threads can not be 0, change it to {}", + default_cfg.num_threads + ); + self.num_threads = default_cfg.num_threads; } if self.stream_channel_window == 0 { - return Err("import.stream_channel_window can not be 0".into()); + warn!( + "import.stream_channel_window can not be 0, change it to {}", + default_cfg.stream_channel_window + ); + self.stream_channel_window = default_cfg.stream_channel_window; } Ok(()) } diff --git a/src/config.rs b/src/config.rs index 627901481d1..37278fd09e2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2316,16 +2316,29 @@ pub struct BackupConfig { } impl BackupConfig { - pub fn validate(&self) -> Result<(), Box> { + pub fn validate(&mut self) -> Result<(), Box> { let limit = SysQuota::cpu_cores_quota() as usize; + let default_cfg = BackupConfig::default(); if self.num_threads == 0 || self.num_threads > limit { - return Err(format!("backup.num_threads cannot be 0 or larger than {}", limit).into()); + warn!( + "backup.num_threads cannot be 0 or larger than {}, change it to {}", + limit, default_cfg.num_threads + ); + self.num_threads = default_cfg.num_threads; } if self.batch_size == 0 { - return Err("backup.batch_size cannot be 0".into()); + warn!( + "backup.batch_size cannot be 0, change it to {}", + default_cfg.batch_size + ); + self.batch_size = default_cfg.batch_size; } if self.s3_multi_part_size.0 > ReadableSize::gb(5).0 { - return Err("backup.s3_multi_part_size cannot larger than 5GB".into()); + warn!( + "backup.s3_multi_part_size cannot larger than 5GB, change it to {:?}", + default_cfg.s3_multi_part_size + ); + self.s3_multi_part_size = default_cfg.s3_multi_part_size; } Ok(()) @@ -2373,9 +2386,15 @@ pub struct BackupStreamConfig { } impl BackupStreamConfig { - pub fn validate(&self) -> Result<(), Box> { - if self.num_threads == 0 { - return Err("backup.num_threads cannot be 0".into()); + pub fn validate(&mut self) -> Result<(), Box> { + let limit = SysQuota::cpu_cores_quota() as usize; + let default_cfg = BackupStreamConfig::default(); + if self.num_threads == 0 || self.num_threads > limit { + warn!( + "log_backup.num_threads cannot be 0 or larger than {}, change it to {}", + limit, default_cfg.num_threads + ); + self.num_threads = default_cfg.num_threads; } Ok(()) } @@ -2460,25 +2479,38 @@ impl Default for CdcConfig { impl CdcConfig { pub fn validate(&mut self) -> Result<(), Box> { + let default_cfg = CdcConfig::default(); if self.min_ts_interval.is_zero() { - return Err("cdc.min-ts-interval can't be 0".into()); + warn!( + "cdc.min-ts-interval can't be 0, change it to {}", + default_cfg.min_ts_interval + ); + self.min_ts_interval = default_cfg.min_ts_interval; } if self.incremental_scan_threads == 0 { - return Err("cdc.incremental-scan-threads can't be 0".into()); + warn!( + "cdc.incremental-scan-threads can't be 0, change it to {}", + default_cfg.incremental_scan_threads + ); + self.incremental_scan_threads = default_cfg.incremental_scan_threads; } if self.incremental_scan_concurrency < self.incremental_scan_threads { - return Err( - "cdc.incremental-scan-concurrency must be larger than cdc.incremental-scan-threads" - .into(), + warn!( + "cdc.incremental-scan-concurrency must be larger than cdc.incremental-scan-threads, + change it to {}", + self.incremental_scan_threads ); + self.incremental_scan_concurrency = self.incremental_scan_threads } if self.incremental_scan_ts_filter_ratio < 0.0 || self.incremental_scan_ts_filter_ratio > 1.0 { - return Err( - "cdc.incremental-scan-ts-filter-ratio should be larger than 0 and less than 1" - .into(), + warn!( + "cdc.incremental-scan-ts-filter-ratio should be larger than 0 and less than 1, + change it to {}", + default_cfg.incremental_scan_ts_filter_ratio ); + self.incremental_scan_ts_filter_ratio = default_cfg.incremental_scan_ts_filter_ratio; } Ok(()) } @@ -5125,21 +5157,21 @@ mod tests { min-ts-interval = "0s" "#; let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); - cfg.validate().unwrap_err(); + cfg.validate().unwrap(); let content = r#" [cdc] incremental-scan-threads = 0 "#; let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); - cfg.validate().unwrap_err(); + cfg.validate().unwrap(); let content = r#" [cdc] incremental-scan-concurrency = 0 "#; let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); - cfg.validate().unwrap_err(); + cfg.validate().unwrap(); let content = r#" [cdc] @@ -5147,7 +5179,7 @@ mod tests { incremental-scan-threads = 2 "#; let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); - cfg.validate().unwrap_err(); + cfg.validate().unwrap(); } #[test] From ab968ffb5e496ea1fce4ff40ee0e562247de98dd Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 10 Jun 2022 15:04:31 +0800 Subject: [PATCH 0019/1149] *: introduce general request tracker (#12460) ref tikv/tikv#12362 This commit is a large refactoring that moves collecting engine PerfContext from storage and coprocessor to engine_rocks and the tracker. Now, the storage and coprocessor are mostly decoupled with a specific engine (engine_rocks). And it introduces a general trakcer mechanism to collect the metrics of a request during its whole lifetime. It will help us collect more performance critical data of a single request more easily. Signed-off-by: Yilin Chen --- Cargo.lock | 20 ++ Cargo.toml | 2 + components/engine_panic/Cargo.toml | 1 + components/engine_panic/src/perf_context.rs | 3 +- components/engine_rocks/Cargo.toml | 1 + components/engine_rocks/src/perf_context.rs | 5 +- .../engine_rocks/src/perf_context_impl.rs | 228 +++++++++++++-- .../engine_rocks/src/perf_context_metrics.rs | 12 + components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/perf_context.rs | 10 +- .../raftstore/src/store/async_io/write.rs | 2 +- components/raftstore/src/store/fsm/apply.rs | 2 +- components/raftstore/src/store/peer.rs | 2 +- components/test_storage/Cargo.toml | 1 + components/test_storage/src/sync_storage.rs | 4 +- components/tikv_kv/Cargo.toml | 1 + components/tikv_kv/src/lib.rs | 14 +- components/tikv_util/Cargo.toml | 1 + .../tikv_util/src/yatp_pool/future_pool.rs | 5 +- components/tracker/Cargo.toml | 14 + components/tracker/src/lib.rs | 86 ++++++ components/tracker/src/metrics.rs | 12 + components/tracker/src/slab.rs | 269 ++++++++++++++++++ components/tracker/src/tls.rs | 69 +++++ scripts/check-bins.py | 1 + src/coprocessor/endpoint.rs | 33 ++- src/coprocessor/interceptors/mod.rs | 4 +- src/coprocessor/interceptors/tracker.rs | 18 +- src/coprocessor/metrics.rs | 136 --------- src/coprocessor/tracker.rs | 138 +++++---- src/read_pool.rs | 5 +- src/server/service/batch.rs | 34 ++- src/server/service/kv.rs | 37 ++- src/storage/metrics.rs | 138 --------- src/storage/mod.rs | 134 ++++++--- 35 files changed, 996 insertions(+), 447 deletions(-) create mode 100644 components/tracker/Cargo.toml create mode 100644 components/tracker/src/lib.rs create mode 100644 components/tracker/src/metrics.rs create mode 100644 components/tracker/src/slab.rs create mode 100644 components/tracker/src/tls.rs diff --git a/Cargo.lock b/Cargo.lock index 75458e3d917..6691467f359 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1444,6 +1444,7 @@ dependencies = [ "raft", "tikv_alloc", "tikv_util", + "tracker", "txn_types", ] @@ -1482,6 +1483,7 @@ dependencies = [ "tikv_util", "time", "toml", + "tracker", "txn_types", ] @@ -1541,6 +1543,7 @@ dependencies = [ "tikv_alloc", "tikv_util", "toml", + "tracker", "txn_types", ] @@ -5666,6 +5669,7 @@ dependencies = [ "test_raftstore", "tikv", "tikv_util", + "tracker", "txn_types", ] @@ -6081,6 +6085,7 @@ dependencies = [ "tokio-openssl", "tokio-timer", "toml", + "tracker", "txn_types", "url", "uuid", @@ -6229,6 +6234,7 @@ dependencies = [ "tempfile", "thiserror", "tikv_util", + "tracker", "txn_types", ] @@ -6290,6 +6296,7 @@ dependencies = [ "tokio-executor", "tokio-timer", "toml", + "tracker", "url", "utime", "yatp", @@ -6598,6 +6605,19 @@ dependencies = [ "tracing", ] +[[package]] +name = "tracker" +version = "0.0.1" +dependencies = [ + "collections", + "kvproto", + "lazy_static", + "parking_lot 0.12.0", + "pin-project", + "prometheus", + "slab", +] + [[package]] name = "try-lock" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 477716d8893..a1c1f315de3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -164,6 +164,7 @@ tokio = { version = "1.17", features = ["full"] } tokio-openssl = "0.6" tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } toml = "0.5" +tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types", default-features = false } url = "2" uuid = { version = "0.8.1", features = ["serde", "v4"] } @@ -268,6 +269,7 @@ members = [ "components/tikv_alloc", "components/tikv_util", "components/tipb_helper", + "components/tracker", "components/txn_types", "fuzz", "fuzz/fuzzer-afl", diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index 36f9b92ec24..b00180c98d2 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -12,4 +12,5 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code tikv_alloc = { path = "../tikv_alloc" } # FIXME: Remove this dep from the engine_traits interface tikv_util = { path = "../tikv_util", default-features = false } +tracker = { path = "../tracker" } txn_types = { path = "../txn_types", default-features = false } diff --git a/components/engine_panic/src/perf_context.rs b/components/engine_panic/src/perf_context.rs index 654ac01a629..46d18c00e77 100644 --- a/components/engine_panic/src/perf_context.rs +++ b/components/engine_panic/src/perf_context.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{PerfContext, PerfContextExt, PerfContextKind, PerfLevel}; +use tracker::TrackerToken; use crate::engine::PanicEngine; @@ -19,7 +20,7 @@ impl PerfContext for PanicPerfContext { panic!() } - fn report_metrics(&mut self) { + fn report_metrics(&mut self, _: &[TrackerToken]) { panic!() } } diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index 7d1a90d7afe..e35438c4fe1 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -51,6 +51,7 @@ tempfile = "3.0" tikv_alloc = { path = "../tikv_alloc" } tikv_util = { path = "../tikv_util", default-features = false } time = "0.1" +tracker = { path = "../tracker" } txn_types = { path = "../txn_types", default-features = false } [dependencies.rocksdb] diff --git a/components/engine_rocks/src/perf_context.rs b/components/engine_rocks/src/perf_context.rs index 83ff4bca6bd..a731a9461dc 100644 --- a/components/engine_rocks/src/perf_context.rs +++ b/components/engine_rocks/src/perf_context.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{PerfContext, PerfContextExt, PerfContextKind, PerfLevel}; +use tracker::TrackerToken; use crate::{engine::RocksEngine, perf_context_impl::PerfContextStatistics}; @@ -30,7 +31,7 @@ impl PerfContext for RocksPerfContext { self.stats.start() } - fn report_metrics(&mut self) { - self.stats.report() + fn report_metrics(&mut self, trackers: &[TrackerToken]) { + self.stats.report(trackers) } } diff --git a/components/engine_rocks/src/perf_context_impl.rs b/components/engine_rocks/src/perf_context_impl.rs index 617abe506d8..c1c299def66 100644 --- a/components/engine_rocks/src/perf_context_impl.rs +++ b/components/engine_rocks/src/perf_context_impl.rs @@ -1,39 +1,39 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt::Debug, marker::PhantomData, ops::Sub}; +use std::{fmt::Debug, marker::PhantomData, mem, ops::Sub, time::Duration}; use derive_more::{Add, AddAssign, Sub, SubAssign}; use engine_traits::{PerfContextKind, PerfLevel}; use kvproto::kvrpcpb::ScanDetailV2; use lazy_static::lazy_static; use slog_derive::KV; +use tikv_util::time::Instant; +use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS}; use crate::{ - perf_context_metrics::{ - APPLY_PERF_CONTEXT_TIME_HISTOGRAM_STATIC, STORE_PERF_CONTEXT_TIME_HISTOGRAM_STATIC, - }, - raw_util, set_perf_flags, set_perf_level, PerfContext as RawPerfContext, PerfFlag, PerfFlags, + perf_context_metrics::*, raw_util, set_perf_flags, set_perf_level, + PerfContext as RawPerfContext, PerfFlag, PerfFlags, }; macro_rules! report_write_perf_context { ($ctx: expr, $metric: ident) => { if $ctx.perf_level != PerfLevel::Disable { $ctx.write = WritePerfContext::capture(); - observe_perf_context_type!($ctx, $metric, write_wal_time); - observe_perf_context_type!($ctx, $metric, write_memtable_time); - observe_perf_context_type!($ctx, $metric, db_mutex_lock_nanos); - observe_perf_context_type!($ctx, $metric, pre_and_post_process); - observe_perf_context_type!($ctx, $metric, write_thread_wait); - observe_perf_context_type!($ctx, $metric, write_scheduling_flushes_compactions_time); - observe_perf_context_type!($ctx, $metric, db_condition_wait_nanos); - observe_perf_context_type!($ctx, $metric, write_delay_time); + observe_write_time!($ctx, $metric, write_wal_time); + observe_write_time!($ctx, $metric, write_memtable_time); + observe_write_time!($ctx, $metric, db_mutex_lock_nanos); + observe_write_time!($ctx, $metric, pre_and_post_process); + observe_write_time!($ctx, $metric, write_thread_wait); + observe_write_time!($ctx, $metric, write_scheduling_flushes_compactions_time); + observe_write_time!($ctx, $metric, db_condition_wait_nanos); + observe_write_time!($ctx, $metric, write_delay_time); } }; } -macro_rules! observe_perf_context_type { - ($s:expr, $metric: expr, $v:ident) => { - $metric.$v.observe(($s.write.$v) as f64 / 1e9); +macro_rules! observe_write_time { + ($ctx:expr, $metric: expr, $v:ident) => { + $metric.$v.observe(($ctx.write.$v) as f64 / 1e9); }; } @@ -143,6 +143,14 @@ impl ReadPerfContext { detail_v2.set_rocksdb_block_read_count(self.block_read_count); detail_v2.set_rocksdb_block_read_byte(self.block_read_byte); } + + fn report_to_tracker(&self, tracker: &mut Tracker) { + tracker.metrics.block_cache_hit_count += self.block_cache_hit_count; + tracker.metrics.block_read_byte += self.block_read_byte; + tracker.metrics.block_read_count += self.block_read_count; + tracker.metrics.deleted_key_skipped_count += self.internal_delete_skipped_count; + tracker.metrics.internal_key_skipped_count += self.internal_key_skipped_count; + } } #[derive(Default, Debug, Clone, Copy, Add, AddAssign, Sub, SubAssign, KV)] @@ -159,12 +167,15 @@ pub struct WritePerfContext { #[derive(Debug)] pub struct PerfContextStatistics { - pub perf_level: PerfLevel, - pub kind: PerfContextKind, - pub read: ReadPerfContext, - pub write: WritePerfContext, + perf_level: PerfLevel, + kind: PerfContextKind, + read: ReadPerfContext, + write: WritePerfContext, + last_flush_time: Instant, } +const FLUSH_METRICS_INTERVAL: Duration = Duration::from_secs(2); + impl PerfContextStatistics { /// Create an instance which stores instant statistics values, retrieved at creation. pub fn new(perf_level: PerfLevel, kind: PerfContextKind) -> Self { @@ -173,13 +184,16 @@ impl PerfContextStatistics { kind, read: Default::default(), write: Default::default(), + last_flush_time: Instant::now_coarse(), } } fn apply_perf_settings(&self) { if self.perf_level == PerfLevel::Uninitialized { match self.kind { - PerfContextKind::GenericRead => set_perf_flags(&*DEFAULT_READ_PERF_FLAGS), + PerfContextKind::Storage(_) | PerfContextKind::Coprocessor(_) => { + set_perf_flags(&*DEFAULT_READ_PERF_FLAGS) + } PerfContextKind::RaftstoreStore | PerfContextKind::RaftstoreApply => { set_perf_flags(&*DEFAULT_WRITE_PERF_FLAGS) } @@ -198,7 +212,7 @@ impl PerfContextStatistics { self.apply_perf_settings(); } - pub fn report(&mut self) { + pub fn report(&mut self, trackers: &[TrackerToken]) { match self.kind { PerfContextKind::RaftstoreApply => { report_write_perf_context!(self, APPLY_PERF_CONTEXT_TIME_HISTOGRAM_STATIC); @@ -206,15 +220,173 @@ impl PerfContextStatistics { PerfContextKind::RaftstoreStore => { report_write_perf_context!(self, STORE_PERF_CONTEXT_TIME_HISTOGRAM_STATIC); } - PerfContextKind::GenericRead => { - // TODO: Currently, metrics about reading is reported in other ways. - // It is better to unify how to report the perf metrics. - // - // Here we only record the PerfContext data into the fields. - self.read = ReadPerfContext::capture(); + PerfContextKind::Storage(_) | PerfContextKind::Coprocessor(_) => { + let perf_context = ReadPerfContext::capture(); + for token in trackers { + GLOBAL_TRACKERS.with_tracker(*token, |t| perf_context.report_to_tracker(t)); + } + self.read += perf_context; + self.flush_read_metrics(); } } } + + fn flush_read_metrics(&mut self) { + if self.last_flush_time.saturating_elapsed() < FLUSH_METRICS_INTERVAL { + return; + } + self.last_flush_time = Instant::now_coarse(); + let ctx = mem::take(&mut self.read); + let (v, tag) = match self.kind { + PerfContextKind::Storage(tag) => (&*STORAGE_ROCKSDB_PERF_COUNTER, tag), + PerfContextKind::Coprocessor(tag) => (&*COPR_ROCKSDB_PERF_COUNTER, tag), + _ => unreachable!(), + }; + v.get_metric_with_label_values(&[tag, "user_key_comparison_count"]) + .unwrap() + .inc_by(ctx.user_key_comparison_count); + v.get_metric_with_label_values(&[tag, "block_cache_hit_count"]) + .unwrap() + .inc_by(ctx.block_cache_hit_count); + v.get_metric_with_label_values(&[tag, "block_read_count"]) + .unwrap() + .inc_by(ctx.block_read_count); + v.get_metric_with_label_values(&[tag, "block_read_byte"]) + .unwrap() + .inc_by(ctx.block_read_byte); + v.get_metric_with_label_values(&[tag, "block_read_time"]) + .unwrap() + .inc_by(ctx.block_read_time); + v.get_metric_with_label_values(&[tag, "block_cache_index_hit_count"]) + .unwrap() + .inc_by(ctx.block_cache_index_hit_count); + v.get_metric_with_label_values(&[tag, "index_block_read_count"]) + .unwrap() + .inc_by(ctx.index_block_read_count); + v.get_metric_with_label_values(&[tag, "block_cache_filter_hit_count"]) + .unwrap() + .inc_by(ctx.block_cache_filter_hit_count); + v.get_metric_with_label_values(&[tag, "filter_block_read_count"]) + .unwrap() + .inc_by(ctx.filter_block_read_count); + v.get_metric_with_label_values(&[tag, "block_checksum_time"]) + .unwrap() + .inc_by(ctx.block_checksum_time); + v.get_metric_with_label_values(&[tag, "block_decompress_time"]) + .unwrap() + .inc_by(ctx.block_decompress_time); + v.get_metric_with_label_values(&[tag, "get_read_bytes"]) + .unwrap() + .inc_by(ctx.get_read_bytes); + v.get_metric_with_label_values(&[tag, "iter_read_bytes"]) + .unwrap() + .inc_by(ctx.iter_read_bytes); + v.get_metric_with_label_values(&[tag, "internal_key_skipped_count"]) + .unwrap() + .inc_by(ctx.internal_key_skipped_count); + v.get_metric_with_label_values(&[tag, "internal_delete_skipped_count"]) + .unwrap() + .inc_by(ctx.internal_delete_skipped_count); + v.get_metric_with_label_values(&[tag, "internal_recent_skipped_count"]) + .unwrap() + .inc_by(ctx.internal_recent_skipped_count); + v.get_metric_with_label_values(&[tag, "get_snapshot_time"]) + .unwrap() + .inc_by(ctx.get_snapshot_time); + v.get_metric_with_label_values(&[tag, "get_from_memtable_time"]) + .unwrap() + .inc_by(ctx.get_from_memtable_time); + v.get_metric_with_label_values(&[tag, "get_from_memtable_count"]) + .unwrap() + .inc_by(ctx.get_from_memtable_count); + v.get_metric_with_label_values(&[tag, "get_post_process_time"]) + .unwrap() + .inc_by(ctx.get_post_process_time); + v.get_metric_with_label_values(&[tag, "get_from_output_files_time"]) + .unwrap() + .inc_by(ctx.get_from_output_files_time); + v.get_metric_with_label_values(&[tag, "seek_on_memtable_time"]) + .unwrap() + .inc_by(ctx.seek_on_memtable_time); + v.get_metric_with_label_values(&[tag, "seek_on_memtable_count"]) + .unwrap() + .inc_by(ctx.seek_on_memtable_count); + v.get_metric_with_label_values(&[tag, "next_on_memtable_count"]) + .unwrap() + .inc_by(ctx.next_on_memtable_count); + v.get_metric_with_label_values(&[tag, "prev_on_memtable_count"]) + .unwrap() + .inc_by(ctx.prev_on_memtable_count); + v.get_metric_with_label_values(&[tag, "seek_child_seek_time"]) + .unwrap() + .inc_by(ctx.seek_child_seek_time); + v.get_metric_with_label_values(&[tag, "seek_child_seek_count"]) + .unwrap() + .inc_by(ctx.seek_child_seek_count); + v.get_metric_with_label_values(&[tag, "seek_min_heap_time"]) + .unwrap() + .inc_by(ctx.seek_min_heap_time); + v.get_metric_with_label_values(&[tag, "seek_max_heap_time"]) + .unwrap() + .inc_by(ctx.seek_max_heap_time); + v.get_metric_with_label_values(&[tag, "seek_internal_seek_time"]) + .unwrap() + .inc_by(ctx.seek_internal_seek_time); + v.get_metric_with_label_values(&[tag, "db_mutex_lock_nanos"]) + .unwrap() + .inc_by(ctx.db_mutex_lock_nanos); + v.get_metric_with_label_values(&[tag, "db_condition_wait_nanos"]) + .unwrap() + .inc_by(ctx.db_condition_wait_nanos); + v.get_metric_with_label_values(&[tag, "read_index_block_nanos"]) + .unwrap() + .inc_by(ctx.read_index_block_nanos); + v.get_metric_with_label_values(&[tag, "read_filter_block_nanos"]) + .unwrap() + .inc_by(ctx.read_filter_block_nanos); + v.get_metric_with_label_values(&[tag, "new_table_block_iter_nanos"]) + .unwrap() + .inc_by(ctx.new_table_block_iter_nanos); + v.get_metric_with_label_values(&[tag, "new_table_iterator_nanos"]) + .unwrap() + .inc_by(ctx.new_table_iterator_nanos); + v.get_metric_with_label_values(&[tag, "block_seek_nanos"]) + .unwrap() + .inc_by(ctx.block_seek_nanos); + v.get_metric_with_label_values(&[tag, "find_table_nanos"]) + .unwrap() + .inc_by(ctx.find_table_nanos); + v.get_metric_with_label_values(&[tag, "bloom_memtable_hit_count"]) + .unwrap() + .inc_by(ctx.bloom_memtable_hit_count); + v.get_metric_with_label_values(&[tag, "bloom_memtable_miss_count"]) + .unwrap() + .inc_by(ctx.bloom_memtable_miss_count); + v.get_metric_with_label_values(&[tag, "bloom_sst_hit_count"]) + .unwrap() + .inc_by(ctx.bloom_sst_hit_count); + v.get_metric_with_label_values(&[tag, "bloom_sst_miss_count"]) + .unwrap() + .inc_by(ctx.bloom_sst_miss_count); + v.get_metric_with_label_values(&[tag, "get_cpu_nanos"]) + .unwrap() + .inc_by(ctx.get_cpu_nanos); + v.get_metric_with_label_values(&[tag, "iter_next_cpu_nanos"]) + .unwrap() + .inc_by(ctx.iter_next_cpu_nanos); + v.get_metric_with_label_values(&[tag, "iter_prev_cpu_nanos"]) + .unwrap() + .inc_by(ctx.iter_prev_cpu_nanos); + v.get_metric_with_label_values(&[tag, "iter_seek_cpu_nanos"]) + .unwrap() + .inc_by(ctx.iter_seek_cpu_nanos); + v.get_metric_with_label_values(&[tag, "encrypt_data_nanos"]) + .unwrap() + .inc_by(ctx.encrypt_data_nanos); + v.get_metric_with_label_values(&[tag, "decrypt_data_nanos"]) + .unwrap() + .inc_by(ctx.decrypt_data_nanos); + } } pub trait PerfContextFields: Debug + Clone + Copy + Sub + slog::KV { diff --git a/components/engine_rocks/src/perf_context_metrics.rs b/components/engine_rocks/src/perf_context_metrics.rs index 5d58066500f..cca9f551bc1 100644 --- a/components/engine_rocks/src/perf_context_metrics.rs +++ b/components/engine_rocks/src/perf_context_metrics.rs @@ -36,6 +36,18 @@ lazy_static! { exponential_buckets(0.0005, 2.0, 20).unwrap() ) .unwrap(); + pub static ref STORAGE_ROCKSDB_PERF_COUNTER: IntCounterVec = register_int_counter_vec!( + "tikv_storage_rocksdb_perf", + "Total number of RocksDB internal operations from PerfContext", + &["req", "metric"] + ) + .unwrap(); + pub static ref COPR_ROCKSDB_PERF_COUNTER: IntCounterVec = register_int_counter_vec!( + "tikv_coprocessor_rocksdb_perf", + "Total number of RocksDB internal operations from PerfContext", + &["req", "metric"] + ) + .unwrap(); pub static ref APPLY_PERF_CONTEXT_TIME_HISTOGRAM_STATIC: PerfContextTimeDuration = auto_flush_from!(APPLY_PERF_CONTEXT_TIME_HISTOGRAM, PerfContextTimeDuration); pub static ref STORE_PERF_CONTEXT_TIME_HISTOGRAM_STATIC: PerfContextTimeDuration = diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 3b8c3efa33b..fb4bb69e5bc 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -22,6 +22,7 @@ slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global thiserror = "1.0" tikv_alloc = { path = "../tikv_alloc" } tikv_util = { path = "../tikv_util", default-features = false } +tracker = { path = "../tracker" } txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] diff --git a/components/engine_traits/src/perf_context.rs b/components/engine_traits/src/perf_context.rs index f213925ddbd..c46ec4a95c8 100644 --- a/components/engine_traits/src/perf_context.rs +++ b/components/engine_traits/src/perf_context.rs @@ -1,5 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use tikv_util::numeric_enum_serializing_mod; +use tracker::TrackerToken; #[derive(Copy, Clone, Debug, PartialEq)] pub enum PerfLevel { @@ -47,7 +48,10 @@ pub trait PerfContextExt { pub enum PerfContextKind { RaftstoreApply, RaftstoreStore, - GenericRead, + /// Commands in tikv::storage, the inner str is the command tag. + Storage(&'static str), + /// Coprocessor requests in tikv::coprocessor, the inner str is the request type. + Coprocessor(&'static str), } /// Reports metrics to prometheus @@ -58,6 +62,6 @@ pub trait PerfContext: Send { /// Reinitializes statistics and the perf level fn start_observe(&mut self); - /// Reports the current collected metrics to prometheus - fn report_metrics(&mut self); + /// Reports the current collected metrics to prometheus and trackers + fn report_metrics(&mut self, trackers: &[TrackerToken]); } diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index c9490738da4..373b64134d3 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -533,7 +533,7 @@ where self.store_id, self.tag, e ); }); - self.perf_context.report_metrics(); + self.perf_context.report_metrics(&[]); // TODO: pass in request trackers write_raft_time = duration_to_sec(now.saturating_elapsed()); STORE_WRITE_RAFTDB_DURATION_HISTOGRAM.observe(write_raft_time); } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index b74a49c4273..ca6cabb7a95 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -525,7 +525,7 @@ where self.kv_wb().write_opt(&write_opts).unwrap_or_else(|e| { panic!("failed to write to engine: {:?}", e); }); - self.perf_context.report_metrics(); + self.perf_context.report_metrics(&[]); // TODO: pass in request trackers self.sync_log_hint = false; let data_size = self.kv_wb().data_size(); if data_size > APPLY_WB_SHRINK_SIZE { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 9c480182943..374df821b9b 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1306,7 +1306,7 @@ where perf_context.start_observe(); engines.raft.consume(&mut raft_wb, true)?; - perf_context.report_metrics(); + perf_context.report_metrics(&[]); if self.get_store().is_initialized() && !keep_data { // If we meet panic when deleting data and raft log, the dirty data diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index 9a2c26aad22..65aa08cd101 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -30,4 +30,5 @@ raftstore = { path = "../raftstore", default-features = false } test_raftstore = { path = "../test_raftstore", default-features = false } tikv = { path = "../../", default-features = false } tikv_util = { path = "../tikv_util", default-features = false } +tracker = { path = "../tracker", default-features = false } txn_types = { path = "../txn_types", default-features = false } diff --git a/components/test_storage/src/sync_storage.rs b/components/test_storage/src/sync_storage.rs index af8a079a4de..b32dbe08fd5 100644 --- a/components/test_storage/src/sync_storage.rs +++ b/components/test_storage/src/sync_storage.rs @@ -19,6 +19,7 @@ use tikv::{ }, }; use tikv_util::time::Instant; +use tracker::INVALID_TRACKER_TOKEN; use txn_types::{Key, KvPair, Mutation, TimeStamp, Value}; /// A builder to build a `SyncTestStorage`. @@ -179,10 +180,11 @@ impl SyncTestStorage { req }) .collect(); + let trackers = keys.iter().map(|_| INVALID_TRACKER_TOKEN).collect(); let p = GetConsumer::new(); block_on( self.store - .batch_get_command(requests, ids, p.clone(), Instant::now()), + .batch_get_command(requests, ids, trackers, p.clone(), Instant::now()), )?; let mut values = vec![]; for value in p.take_data().into_iter() { diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 50a92878404..5b640d3b0b7 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -46,6 +46,7 @@ slog_derive = "0.2" tempfile = "3.0" thiserror = "1.0" tikv_util = { path = "../tikv_util", default-features = false } +tracker = { path = "../tracker" } txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 009f8fbc93e..adb04fc25cd 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -20,7 +20,14 @@ mod raftstore_impls; mod rocksdb_engine; mod stats; -use std::{cell::UnsafeCell, error, num::NonZeroU64, ptr, result, sync::Arc, time::Duration}; +use std::{ + cell::UnsafeCell, + error, + num::NonZeroU64, + ptr, result, + sync::Arc, + time::{Duration, Instant}, +}; use engine_traits::{ CfName, IterOptions, KvEngine as LocalEngine, Mutable, MvccProperties, ReadOptions, WriteBatch, @@ -38,6 +45,7 @@ use pd_client::BucketMeta; use raftstore::store::{PessimisticLockPair, TxnExt}; use thiserror::Error; use tikv_util::{deadline::Deadline, escape, time::ThreadReadId}; +use tracker::with_tls_tracker; use txn_types::{Key, PessimisticLock, TimeStamp, TxnExtra, Value}; pub use self::{ @@ -561,6 +569,7 @@ pub fn snapshot( engine: &E, ctx: SnapContext<'_>, ) -> impl std::future::Future> { + let begin = Instant::now(); let (callback, future) = tikv_util::future::paired_must_called_future_callback(drop_snapshot_callback::); let val = engine.async_snapshot(ctx, callback); @@ -570,6 +579,9 @@ pub fn snapshot( let result = future .map_err(|cancel| Error::from(ErrorInner::Other(box_err!(cancel)))) .await?; + with_tls_tracker(|tracker| { + tracker.metrics.get_snapshot_nanos += begin.elapsed().as_nanos() as u64; + }); fail_point!("after-snapshot"); result } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 45425f83cec..9bbea72d8d5 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -55,6 +55,7 @@ time = "0.1" tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-executor = "0.1" tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tracker = { path = "../tracker" } url = "2" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 0beca9a5dee..a40221e3b6d 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -14,6 +14,7 @@ use std::{ use fail::fail_point; use futures::channel::oneshot::{self, Canceled}; use prometheus::{IntCounter, IntGauge}; +use tracker::TrackedFuture; use yatp::task::future; pub type ThreadPool = yatp::ThreadPool; @@ -81,7 +82,7 @@ impl FuturePool { where F: Future + Send + 'static, { - self.inner.spawn(future) + self.inner.spawn(TrackedFuture::new(future)) } /// Spawns a future in the pool and returns a handle to the result of the future. @@ -95,7 +96,7 @@ impl FuturePool { F: Future + Send + 'static, F::Output: Send, { - self.inner.spawn_handle(future) + self.inner.spawn_handle(TrackedFuture::new(future)) } } diff --git a/components/tracker/Cargo.toml b/components/tracker/Cargo.toml new file mode 100644 index 00000000000..fcaf546cf5b --- /dev/null +++ b/components/tracker/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "tracker" +version = "0.0.1" +edition = "2018" +publish = false + +[dependencies] +collections = { path = "../../components/collections" } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } +lazy_static = "1" +parking_lot = "0.12" +pin-project = "1" +prometheus = "0.13" +slab = "0.4" diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs new file mode 100644 index 00000000000..909e093ed3f --- /dev/null +++ b/components/tracker/src/lib.rs @@ -0,0 +1,86 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(derive_default_enum)] +#![feature(array_from_fn)] + +mod metrics; +mod slab; +mod tls; + +use kvproto::kvrpcpb as pb; + +pub use self::{ + slab::{TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}, + tls::*, +}; + +#[derive(Debug)] +pub struct Tracker { + pub req_info: RequestInfo, + pub metrics: RequestMetrics, + // TODO: Add request stage info + // pub current_stage: RequestStage, +} + +impl Tracker { + pub fn new(req_info: RequestInfo) -> Self { + Self { + req_info, + metrics: Default::default(), + } + } + + pub fn write_scan_detail(&self, detail_v2: &mut pb::ScanDetailV2) { + detail_v2.set_rocksdb_block_read_byte(self.metrics.block_read_byte); + detail_v2.set_rocksdb_block_read_count(self.metrics.block_read_count); + detail_v2.set_rocksdb_block_cache_hit_count(self.metrics.block_cache_hit_count); + detail_v2.set_rocksdb_key_skipped_count(self.metrics.internal_key_skipped_count); + detail_v2.set_rocksdb_delete_skipped_count(self.metrics.deleted_key_skipped_count); + } +} + +#[derive(Debug, Default)] +pub struct RequestInfo { + pub region_id: u64, + pub start_ts: u64, + pub task_id: u64, + pub resource_group_tag: Vec, + pub request_type: RequestType, +} + +impl RequestInfo { + pub fn new(ctx: &pb::Context, request_type: RequestType, start_ts: u64) -> RequestInfo { + RequestInfo { + region_id: ctx.get_region_id(), + start_ts, + task_id: ctx.get_task_id(), + resource_group_tag: ctx.get_resource_group_tag().to_vec(), + request_type, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum RequestType { + #[default] + Unknown, + KvGet, + KvBatchGet, + KvBatchGetCommand, + KvScan, + KvScanLock, + CoprocessorDag, + CoprocessorAnalyze, + CoprocessorChecksum, +} + +#[derive(Debug, Default, Clone)] +pub struct RequestMetrics { + pub get_snapshot_nanos: u64, + pub block_cache_hit_count: u64, + pub block_read_count: u64, + pub block_read_byte: u64, + pub block_read_nanos: u64, + pub internal_key_skipped_count: u64, + pub deleted_key_skipped_count: u64, +} diff --git a/components/tracker/src/metrics.rs b/components/tracker/src/metrics.rs new file mode 100644 index 00000000000..90cce44cd52 --- /dev/null +++ b/components/tracker/src/metrics.rs @@ -0,0 +1,12 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use lazy_static::lazy_static; +use prometheus::*; + +lazy_static! { + pub static ref SLAB_FULL_COUNTER: IntCounter = register_int_counter!( + "tikv_tracker_slab_full_counter", + "Number of tracker slab insert failures because of fullness" + ) + .unwrap(); +} diff --git a/components/tracker/src/slab.rs b/components/tracker/src/slab.rs new file mode 100644 index 00000000000..9d2803e7585 --- /dev/null +++ b/components/tracker/src/slab.rs @@ -0,0 +1,269 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{array, cell::Cell, fmt}; + +use lazy_static::lazy_static; +use parking_lot::Mutex; +use slab::Slab; + +use crate::{metrics::*, Tracker}; + +const SLAB_SHARD_BITS: u32 = 6; +const SLAB_SHARD_COUNT: usize = 1 << SLAB_SHARD_BITS; // 64 +const SLAB_SHARD_INIT_CAPACITY: usize = 256; +const SLAB_SHARD_MAX_CAPACITY: usize = 4096; + +lazy_static! { + pub static ref GLOBAL_TRACKERS: ShardedSlab = ShardedSlab::new(SLAB_SHARD_INIT_CAPACITY); +} + +fn next_shard_id() -> usize { + thread_local! { + static CURRENT_SHARD_ID: Cell = Cell::new(0); + } + CURRENT_SHARD_ID.with(|c| { + let shard_id = c.get(); + c.set((shard_id + 1) % SLAB_SHARD_COUNT); + shard_id + }) +} + +pub struct ShardedSlab { + shards: [Mutex; SLAB_SHARD_COUNT], +} + +impl ShardedSlab { + pub fn new(capacity_per_shard: usize) -> ShardedSlab { + let shards = array::from_fn(|shard_id| { + Mutex::new(TrackerSlab::with_capacity( + shard_id as u32, + capacity_per_shard, + )) + }); + ShardedSlab { shards } + } + + pub fn insert(&self, tracker: Tracker) -> TrackerToken { + let shard_id = next_shard_id(); + self.shards[shard_id].lock().insert(tracker) + } + + pub fn remove(&self, token: TrackerToken) -> Option { + if token != INVALID_TRACKER_TOKEN { + let shard_id = token.shard_id(); + self.shards[shard_id as usize].lock().remove(token) + } else { + None + } + } + + pub fn with_tracker(&self, token: TrackerToken, f: F) -> Option + where + F: FnOnce(&mut Tracker) -> T, + { + if token != INVALID_TRACKER_TOKEN { + let shard_id = token.shard_id(); + self.shards[shard_id as usize].lock().get_mut(token).map(f) + } else { + None + } + } + + pub fn for_each(&self, mut f: F) + where + F: FnMut(&mut Tracker), + { + for shard in &self.shards { + for (_, tracker) in shard.lock().slab.iter_mut() { + f(&mut tracker.tracker) + } + } + } +} + +const SLAB_KEY_BITS: u32 = 32; +const SHARD_ID_BITS_SHIFT: u32 = 64 - SLAB_SHARD_BITS; +const SEQ_BITS_MASK: u32 = (1 << (SHARD_ID_BITS_SHIFT - SLAB_KEY_BITS)) - 1; + +struct TrackerSlab { + slab: Slab, + shard_id: u32, + seq: u32, +} + +impl TrackerSlab { + fn with_capacity(shard_id: u32, capacity: usize) -> Self { + assert!(capacity < SLAB_SHARD_MAX_CAPACITY); + TrackerSlab { + slab: Slab::with_capacity(capacity), + shard_id, + seq: 0, + } + } + + // Returns the seq and key of the inserted tracker. + // If the slab reaches the max capacity, the tracker will be dropped silently + // and INVALID_TRACKER_TOKEN will be returned. + fn insert(&mut self, tracker: Tracker) -> TrackerToken { + if self.slab.len() < SLAB_SHARD_MAX_CAPACITY { + self.seq = (self.seq + 1) & SEQ_BITS_MASK; + let key = self.slab.insert(SlabEntry { + tracker, + seq: self.seq, + }); + TrackerToken::new(self.shard_id, self.seq, key) + } else { + SLAB_FULL_COUNTER.inc(); + INVALID_TRACKER_TOKEN + } + } + + pub fn get_mut(&mut self, token: TrackerToken) -> Option<&mut Tracker> { + if let Some(entry) = self.slab.get_mut(token.key()) { + if entry.seq == token.seq() { + return Some(&mut entry.tracker); + } + } + None + } + + pub fn remove(&mut self, token: TrackerToken) -> Option { + if self.get_mut(token).is_some() { + Some(self.slab.remove(token.key()).tracker) + } else { + None + } + } +} + +struct SlabEntry { + tracker: Tracker, + seq: u32, +} + +pub const INVALID_TRACKER_TOKEN: TrackerToken = TrackerToken(u64::MAX); + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct TrackerToken(u64); + +impl TrackerToken { + fn new(shard_id: u32, seq: u32, key: usize) -> TrackerToken { + debug_assert!(shard_id < SLAB_SHARD_COUNT as u32); + debug_assert!(seq <= SEQ_BITS_MASK); + debug_assert!(key < (1 << SLAB_KEY_BITS)); + TrackerToken( + ((shard_id as u64) << SHARD_ID_BITS_SHIFT) + | ((seq as u64) << SLAB_KEY_BITS) + | (key as u64), + ) + } + + fn shard_id(&self) -> u32 { + (self.0 >> SHARD_ID_BITS_SHIFT) as u32 + } + + fn seq(&self) -> u32 { + (self.0 >> SLAB_KEY_BITS) as u32 & SEQ_BITS_MASK + } + + fn key(&self) -> usize { + (self.0 & ((1 << SLAB_KEY_BITS) - 1)) as usize + } +} + +impl fmt::Debug for TrackerToken { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TrackerToken") + .field("shard_id", &self.shard_id()) + .field("seq", &self.seq()) + .field("key", &self.key()) + .finish() + } +} + +#[cfg(test)] +mod tests { + use std::{sync::Arc, thread}; + + use super::*; + use crate::RequestInfo; + + #[test] + fn test_tracker_token() { + let shard_id = 47; + let seq = SEQ_BITS_MASK - 3; + let key = 65535; + let token = TrackerToken::new(shard_id, seq, key); + assert_eq!(token.shard_id(), shard_id); + assert_eq!(token.seq(), seq); + assert_eq!(token.key(), key); + } + + #[test] + fn test_basic() { + let slab = ShardedSlab::new(2); + // Insert 192 trackers + let tokens: Vec = (0..192) + .map(|i| { + let tracker = Tracker::new(RequestInfo { + task_id: i, + ..Default::default() + }); + slab.insert(tracker) + }) + .collect(); + // Get the tracker with the token and check the content + for (i, token) in tokens.iter().enumerate() { + slab.with_tracker(*token, |tracker| { + assert_eq!(i as u64, tracker.req_info.task_id); + }); + } + // Remove 0 ~ 128 trackers + for (i, token) in tokens[..128].iter().enumerate() { + let tracker = slab.remove(*token).unwrap(); + assert_eq!(i as u64, tracker.req_info.task_id); + } + // Insert another 192 trackers + for i in 192..384 { + let tracker = Tracker::new(RequestInfo { + task_id: i, + ..Default::default() + }); + slab.insert(tracker); + } + // Iterate over all trackers in the slab + let mut tracker_ids = Vec::new(); + slab.for_each(|tracker| tracker_ids.push(tracker.req_info.task_id)); + tracker_ids.sort_unstable(); + assert_eq!(tracker_ids, (128..384).collect::>()); + } + + #[test] + fn test_shard() { + let slab = Arc::new(ShardedSlab::new(4)); + let threads = [1, 2].map(|i| { + let slab = slab.clone(); + thread::spawn(move || { + for _ in 0..SLAB_SHARD_COUNT { + slab.insert(Tracker::new(RequestInfo { + task_id: i, + ..Default::default() + })); + } + }) + }); + for th in threads { + th.join().unwrap(); + } + for shard in &slab.shards { + let mut v: Vec<_> = shard + .lock() + .slab + .iter() + .map(|(_, entry)| entry.tracker.req_info.task_id) + .collect(); + v.sort_unstable(); + assert_eq!(v, [1, 2]); + } + } +} diff --git a/components/tracker/src/tls.rs b/components/tracker/src/tls.rs new file mode 100644 index 00000000000..982f483c8bc --- /dev/null +++ b/components/tracker/src/tls.rs @@ -0,0 +1,69 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + cell::Cell, + future::Future, + pin::Pin, + task::{Context, Poll}, +}; + +use pin_project::pin_project; + +use crate::{slab::TrackerToken, Tracker, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; + +thread_local! { + static TLS_TRACKER_TOKEN: Cell = Cell::new(INVALID_TRACKER_TOKEN); +} + +pub fn set_tls_tracker_token(token: TrackerToken) { + TLS_TRACKER_TOKEN.with(|c| { + c.set(token); + }) +} + +pub fn clear_tls_tracker_token() { + set_tls_tracker_token(INVALID_TRACKER_TOKEN); +} + +pub fn get_tls_tracker_token() -> TrackerToken { + TLS_TRACKER_TOKEN.with(|c| c.get()) +} + +pub fn with_tls_tracker(mut f: F) +where + F: FnMut(&mut Tracker), +{ + TLS_TRACKER_TOKEN.with(|c| { + GLOBAL_TRACKERS.with_tracker(c.get(), &mut f); + }); +} + +#[pin_project] +pub struct TrackedFuture { + #[pin] + future: F, + tracker: TrackerToken, +} + +impl TrackedFuture { + pub fn new(future: F) -> TrackedFuture { + TrackedFuture { + future, + tracker: get_tls_tracker_token(), + } + } +} + +impl Future for TrackedFuture { + type Output = F::Output; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + TLS_TRACKER_TOKEN.with(|c| { + c.set(*this.tracker); + let res = this.future.poll(cx); + c.set(INVALID_TRACKER_TOKEN); + res + }) + } +} diff --git a/scripts/check-bins.py b/scripts/check-bins.py index 41d9d57c866..04a3b77c01d 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -14,6 +14,7 @@ "online_config", "online_config_derive", "match_template", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_plugin", "memory_trace_macros", "case_macros", + "tracker" } JEMALLOC_SYMBOL = ["je_arena_boot", " malloc"] diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 070fd25557b..fa1dce909a2 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -2,6 +2,9 @@ use std::{borrow::Cow, future::Future, marker::PhantomData, sync::Arc, time::Duration}; +use ::tracker::{ + set_tls_tracker_token, with_tls_tracker, RequestInfo, RequestType, GLOBAL_TRACKERS, +}; use async_stream::try_stream; use concurrency_manager::ConcurrencyManager; use engine_traits::PerfLevel; @@ -164,6 +167,7 @@ impl Endpoint { let mut input = CodedInputStream::from_bytes(&data); input.set_recursion_limit(self.recursion_limit); + let req_ctx: ReqContext; let builder: RequestHandlerBuilder; @@ -201,6 +205,10 @@ impl Endpoint { cache_match_version, self.perf_level, ); + with_tls_tracker(|tracker| { + tracker.req_info.request_type = RequestType::CoprocessorDag; + tracker.req_info.start_ts = start_ts; + }); self.check_memory_locks(&req_ctx)?; @@ -260,6 +268,10 @@ impl Endpoint { cache_match_version, self.perf_level, ); + with_tls_tracker(|tracker| { + tracker.req_info.request_type = RequestType::CoprocessorAnalyze; + tracker.req_info.start_ts = start_ts; + }); self.check_memory_locks(&req_ctx)?; let quota_limiter = self.quota_limiter.clone(); @@ -300,6 +312,10 @@ impl Endpoint { cache_match_version, self.perf_level, ); + with_tls_tracker(|tracker| { + tracker.req_info.request_type = RequestType::CoprocessorChecksum; + tracker.req_info.start_ts = start_ts; + }); self.check_memory_locks(&req_ctx)?; @@ -316,6 +332,7 @@ impl Endpoint { } tp => return Err(box_err!("unsupported tp {}", tp)), }; + Ok((builder, req_ctx)) } @@ -360,7 +377,7 @@ impl Endpoint { /// `RequestHandler` to process the request and produce a result. async fn handle_unary_request_impl( semaphore: Option>, - mut tracker: Box, + mut tracker: Box>, handler_builder: RequestHandlerBuilder, ) -> Result> { // When this function is being executed, it may be queued for a long time, so that @@ -468,17 +485,25 @@ impl Endpoint { req: coppb::Request, peer: Option, ) -> impl Future> { + let tracker = GLOBAL_TRACKERS.insert(::tracker::Tracker::new(RequestInfo::new( + req.get_context(), + RequestType::Unknown, + req.start_ts, + ))); + set_tls_tracker_token(tracker); let result_of_future = self .parse_request_and_check_memory_locks(req, peer, false) .map(|(handler_builder, req_ctx)| self.handle_unary_request(req_ctx, handler_builder)); async move { - match result_of_future { + let res = match result_of_future { Err(e) => make_error_response(e).into(), Ok(handle_fut) => handle_fut .await .unwrap_or_else(|e| make_error_response(e).into()), - } + }; + GLOBAL_TRACKERS.remove(tracker); + res } } @@ -489,7 +514,7 @@ impl Endpoint { /// `RequestHandler` multiple times to process the request and produce multiple results. fn handle_stream_request_impl( semaphore: Option>, - mut tracker: Box, + mut tracker: Box>, handler_builder: RequestHandlerBuilder, ) -> impl futures::stream::Stream> { try_stream! { diff --git a/src/coprocessor/interceptors/mod.rs b/src/coprocessor/interceptors/mod.rs index f7e280fb137..95496b234df 100644 --- a/src/coprocessor/interceptors/mod.rs +++ b/src/coprocessor/interceptors/mod.rs @@ -4,6 +4,4 @@ mod concurrency_limiter; mod deadline; mod tracker; -pub use concurrency_limiter::limit_concurrency; -pub use deadline::check_deadline; -pub use tracker::track; +pub use self::{concurrency_limiter::limit_concurrency, deadline::check_deadline, tracker::track}; diff --git a/src/coprocessor/interceptors/tracker.rs b/src/coprocessor/interceptors/tracker.rs index 4224a27e2be..ec8654887ef 100644 --- a/src/coprocessor/interceptors/tracker.rs +++ b/src/coprocessor/interceptors/tracker.rs @@ -7,38 +7,42 @@ use std::{ }; use pin_project::pin_project; +use tikv_kv::Engine; use crate::coprocessor::tracker::Tracker as CopTracker; -pub fn track<'a, F: Future + 'a>( +pub fn track<'a, F: Future + 'a, E: Engine>( fut: F, - cop_tracker: &'a mut CopTracker, + cop_tracker: &'a mut CopTracker, ) -> impl Future + 'a { Tracker::new(fut, cop_tracker) } #[pin_project] -struct Tracker<'a, F> +struct Tracker<'a, F, E> where F: Future, + E: Engine, { #[pin] fut: F, - cop_tracker: &'a mut CopTracker, + cop_tracker: &'a mut CopTracker, } -impl<'a, F> Tracker<'a, F> +impl<'a, F, E> Tracker<'a, F, E> where F: Future, + E: Engine, { - fn new(fut: F, cop_tracker: &'a mut CopTracker) -> Self { + fn new(fut: F, cop_tracker: &'a mut CopTracker) -> Self { Tracker { fut, cop_tracker } } } -impl<'a, F: Future> Future for Tracker<'a, F> +impl<'a, F, E> Future for Tracker<'a, F, E> where F: Future, + E: Engine, { type Output = F::Output; diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index f54064dcca3..f95ff6ee4db 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -3,7 +3,6 @@ use std::{cell::RefCell, mem, sync::Arc}; use collections::HashMap; -use engine_rocks::ReadPerfContext; use kvproto::{metapb, pdpb::QueryKind}; use pd_client::BucketMeta; use prometheus::*; @@ -62,57 +61,6 @@ make_auto_flush_static_metric! { snapshot, } - pub label_enum PerfMetric { - user_key_comparison_count, - block_cache_hit_count, - block_read_count, - block_read_byte, - block_read_time, - block_cache_index_hit_count, - index_block_read_count, - block_cache_filter_hit_count, - filter_block_read_count, - block_checksum_time, - block_decompress_time, - get_read_bytes, - iter_read_bytes, - internal_key_skipped_count, - internal_delete_skipped_count, - internal_recent_skipped_count, - get_snapshot_time, - get_from_memtable_time, - get_from_memtable_count, - get_post_process_time, - get_from_output_files_time, - seek_on_memtable_time, - seek_on_memtable_count, - next_on_memtable_count, - prev_on_memtable_count, - seek_child_seek_time, - seek_child_seek_count, - seek_min_heap_time, - seek_max_heap_time, - seek_internal_seek_time, - db_mutex_lock_nanos, - db_condition_wait_nanos, - read_index_block_nanos, - read_filter_block_nanos, - new_table_block_iter_nanos, - new_table_iterator_nanos, - block_seek_nanos, - find_table_nanos, - bloom_memtable_hit_count, - bloom_memtable_miss_count, - bloom_sst_hit_count, - bloom_sst_miss_count, - get_cpu_nanos, - iter_next_cpu_nanos, - iter_prev_cpu_nanos, - iter_seek_cpu_nanos, - encrypt_data_nanos, - decrypt_data_nanos, - } - pub label_enum MemLockCheckResult { unlocked, locked, @@ -127,11 +75,6 @@ make_auto_flush_static_metric! { "type" => WaitType, } - pub struct PerfCounter: LocalIntCounter { - "req" => ReqTag, - "metric" => PerfMetric, - } - pub struct CoprScanKeysHistogram: LocalHistogram { "req" => ReqTag, "kind" => ScanKeysKind, @@ -208,14 +151,6 @@ lazy_static! { .unwrap(); pub static ref COPR_SCAN_DETAILS_STATIC: CoprScanDetails = auto_flush_from!(COPR_SCAN_DETAILS, CoprScanDetails); - pub static ref COPR_ROCKSDB_PERF_COUNTER: IntCounterVec = register_int_counter_vec!( - "tikv_coprocessor_rocksdb_perf", - "Total number of RocksDB internal operations from PerfContext", - &["req", "metric"] - ) - .unwrap(); - pub static ref COPR_ROCKSDB_PERF_COUNTER_STATIC: PerfCounter = - auto_flush_from!(COPR_ROCKSDB_PERF_COUNTER, PerfCounter); pub static ref COPR_DAG_REQ_COUNT: IntCounterVec = register_int_counter_vec!( "tikv_coprocessor_dag_request_count", "Total number of DAG requests", @@ -266,7 +201,6 @@ make_static_metric! { pub struct CopLocalMetrics { local_scan_details: HashMap, local_read_stats: ReadStats, - local_perf_stats: HashMap, } thread_local! { @@ -274,20 +208,10 @@ thread_local! { CopLocalMetrics { local_scan_details: HashMap::default(), local_read_stats: ReadStats::default(), - local_perf_stats: HashMap::default(), } ); } -macro_rules! tls_flush_perf_stats { - ($tag:ident, $local_stats:ident, $stat:ident) => { - COPR_ROCKSDB_PERF_COUNTER_STATIC - .get($tag) - .$stat - .inc_by($local_stats.$stat as u64); - }; -} - impl From for CF { fn from(cf: GcKeysCF) -> CF { match cf { @@ -340,57 +264,6 @@ pub fn tls_flush(reporter: &R) { mem::swap(&mut read_stats, &mut m.local_read_stats); reporter.report_read_stats(read_stats); } - - for (req_tag, perf_stats) in m.local_perf_stats.drain() { - tls_flush_perf_stats!(req_tag, perf_stats, user_key_comparison_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_cache_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_read_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_read_byte); - tls_flush_perf_stats!(req_tag, perf_stats, block_read_time); - tls_flush_perf_stats!(req_tag, perf_stats, block_cache_index_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, index_block_read_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_cache_filter_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, filter_block_read_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_checksum_time); - tls_flush_perf_stats!(req_tag, perf_stats, block_decompress_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_read_bytes); - tls_flush_perf_stats!(req_tag, perf_stats, iter_read_bytes); - tls_flush_perf_stats!(req_tag, perf_stats, internal_key_skipped_count); - tls_flush_perf_stats!(req_tag, perf_stats, internal_delete_skipped_count); - tls_flush_perf_stats!(req_tag, perf_stats, internal_recent_skipped_count); - tls_flush_perf_stats!(req_tag, perf_stats, get_snapshot_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_from_memtable_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_from_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, get_post_process_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_from_output_files_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_on_memtable_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_on_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, next_on_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, prev_on_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, seek_child_seek_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_child_seek_count); - tls_flush_perf_stats!(req_tag, perf_stats, seek_min_heap_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_max_heap_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_internal_seek_time); - tls_flush_perf_stats!(req_tag, perf_stats, db_mutex_lock_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, db_condition_wait_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, read_index_block_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, read_filter_block_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, new_table_block_iter_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, new_table_iterator_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, block_seek_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, find_table_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_memtable_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_memtable_miss_count); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_sst_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_sst_miss_count); - tls_flush_perf_stats!(req_tag, perf_stats, get_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, iter_next_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, iter_prev_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, iter_seek_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, encrypt_data_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, decrypt_data_nanos); - } }); } @@ -438,12 +311,3 @@ pub fn tls_collect_query( .add_query_num(region_id, peer, key_range, QueryKind::Coprocessor); }); } - -pub fn tls_collect_perf_stats(cmd: ReqTag, perf_stats: &ReadPerfContext) { - TLS_COP_METRICS.with(|m| { - *(m.borrow_mut() - .local_perf_stats - .entry(cmd) - .or_insert_with(Default::default)) += *perf_stats; - }); -} diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index a12f4ee0c71..df43ad39a69 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -1,9 +1,12 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::{ReadPerfContext, RocksPerfContext}; -use engine_traits::{PerfContext, PerfContextKind}; +use std::{cell::RefCell, marker::PhantomData}; + +use ::tracker::{get_tls_tracker_token, with_tls_tracker}; +use engine_traits::{PerfContext, PerfContextExt, PerfContextKind}; use kvproto::{kvrpcpb, kvrpcpb::ScanDetailV2}; use pd_client::BucketMeta; +use tikv_kv::{with_tls_engine, Engine}; use tikv_util::time::{self, Duration, Instant}; use txn_types::Key; @@ -39,7 +42,7 @@ enum TrackerState { /// Track coprocessor requests to update statistics and provide slow logs. #[derive(Debug)] -pub struct Tracker { +pub struct Tracker { request_begin_at: Instant, // Intermediate results @@ -60,10 +63,6 @@ pub struct Tracker { item_process_time: Duration, total_process_time: Duration, total_storage_stats: Statistics, - // TODO: This leaks the RocksDB engine from abstraction, try to use the PerfContext - // in engine_trait instead. - perf_context: RocksPerfContext, - total_perf_stats: ReadPerfContext, // Accumulated perf statistics slow_log_threshold: Duration, scan_process_time_ms: u64, @@ -71,13 +70,15 @@ pub struct Tracker { // Request info, used to print slow log. pub req_ctx: ReqContext, + + _phantom: PhantomData E>, } -impl Tracker { +impl Tracker { /// Initialize the tracker. Normally it is called outside future pool's factory context, /// because the future pool might be full and we need to wait it. This kind of wait time /// has to be recorded. - pub fn new(req_ctx: ReqContext, slow_log_threshold: Duration) -> Tracker { + pub fn new(req_ctx: ReqContext, slow_log_threshold: Duration) -> Self { let now = Instant::now_coarse(); Tracker { request_begin_at: now, @@ -92,12 +93,11 @@ impl Tracker { total_suspend_time: Duration::default(), total_process_time: Duration::default(), total_storage_stats: Statistics::default(), - perf_context: RocksPerfContext::new(req_ctx.perf_level, PerfContextKind::GenericRead), - total_perf_stats: ReadPerfContext::default(), scan_process_time_ms: 0, slow_log_threshold, req_ctx, buckets: None, + _phantom: PhantomData, } } @@ -140,7 +140,7 @@ impl Tracker { _ => unreachable!(), } - self.perf_context.start_observe(); + self.with_perf_context(|perf_context| perf_context.start_observe()); self.current_stage = TrackerState::ItemBegan(now); } @@ -152,10 +152,9 @@ impl Tracker { if let Some(storage_stats) = some_storage_stats { self.total_storage_stats.add(&storage_stats); } - // Record delta perf statistics - self.perf_context.report_metrics(); - let perf_statistics = self.perf_context.stats.read; - self.total_perf_stats += perf_statistics; + self.with_perf_context(|perf_context| { + perf_context.report_metrics(&[get_tls_tracker_token()]) + }); self.current_stage = TrackerState::ItemFinished(now); } else { unreachable!() @@ -212,15 +211,7 @@ impl Tracker { detail_v2.set_processed_versions(self.total_storage_stats.write.processed_keys as u64); detail_v2.set_processed_versions_size(self.total_storage_stats.processed_size as u64); detail_v2.set_total_versions(self.total_storage_stats.write.total_op_count() as u64); - detail_v2.set_rocksdb_delete_skipped_count( - self.total_perf_stats.internal_delete_skipped_count as u64, - ); - detail_v2 - .set_rocksdb_key_skipped_count(self.total_perf_stats.internal_key_skipped_count as u64); - detail_v2 - .set_rocksdb_block_cache_hit_count(self.total_perf_stats.block_cache_hit_count as u64); - detail_v2.set_rocksdb_block_read_count(self.total_perf_stats.block_read_count as u64); - detail_v2.set_rocksdb_block_read_byte(self.total_perf_stats.block_read_byte as u64); + with_tls_tracker(|tracker| tracker.write_scan_detail(&mut detail_v2)); exec_details_v2.set_scan_detail_v2(detail_v2); (exec_details, exec_details_v2) @@ -252,33 +243,35 @@ impl Tracker { .unwrap_or_default() }); - info!(#"slow_log", "slow-query"; - "region_id" => &self.req_ctx.context.get_region_id(), - "remote_host" => &self.req_ctx.peer, - "total_lifetime" => ?self.req_lifetime, - "wait_time" => ?self.wait_time, - "wait_time.schedule" => ?self.schedule_wait_time, - "wait_time.snapshot" => ?self.snapshot_wait_time, - "handler_build_time" => ?self.handler_build_time, - "total_process_time" => ?self.total_process_time, - "total_suspend_time" => ?self.total_suspend_time, - "txn_start_ts" => self.req_ctx.txn_start_ts, - "table_id" => some_table_id, - "tag" => self.req_ctx.tag.get_str(), - "scan.is_desc" => self.req_ctx.is_desc_scan, - "scan.processed" => total_storage_stats.write.processed_keys, - "scan.processed_size" => total_storage_stats.processed_size, - "scan.total" => total_storage_stats.write.total_op_count(), - "scan.ranges" => self.req_ctx.ranges.len(), - "scan.range.first" => ?first_range, - "perf_stats.block_cache_hit_count" => self.total_perf_stats.block_cache_hit_count, - "perf_stats.block_read_count" => self.total_perf_stats.block_read_count, - "perf_stats.block_read_byte" => self.total_perf_stats.block_read_byte, - "perf_stats.internal_key_skipped_count" - => self.total_perf_stats.internal_key_skipped_count, - "perf_stats.internal_delete_skipped_count" - => self.total_perf_stats.internal_delete_skipped_count, - ); + with_tls_tracker(|tracker| { + info!(#"slow_log", "slow-query"; + "region_id" => &self.req_ctx.context.get_region_id(), + "remote_host" => &self.req_ctx.peer, + "total_lifetime" => ?self.req_lifetime, + "wait_time" => ?self.wait_time, + "wait_time.schedule" => ?self.schedule_wait_time, + "wait_time.snapshot" => ?self.snapshot_wait_time, + "handler_build_time" => ?self.handler_build_time, + "total_process_time" => ?self.total_process_time, + "total_suspend_time" => ?self.total_suspend_time, + "txn_start_ts" => self.req_ctx.txn_start_ts, + "table_id" => some_table_id, + "tag" => self.req_ctx.tag.get_str(), + "scan.is_desc" => self.req_ctx.is_desc_scan, + "scan.processed" => total_storage_stats.write.processed_keys, + "scan.processed_size" => total_storage_stats.processed_size, + "scan.total" => total_storage_stats.write.total_op_count(), + "scan.ranges" => self.req_ctx.ranges.len(), + "scan.range.first" => ?first_range, + "perf_stats.block_cache_hit_count" => tracker.metrics.block_cache_hit_count, + "perf_stats.block_read_count" => tracker.metrics.block_read_count, + "perf_stats.block_read_byte" => tracker.metrics.block_read_byte, + "perf_stats.internal_key_skipped_count" + => tracker.metrics.internal_key_skipped_count, + "perf_stats.internal_delete_skipped_count" + => tracker.metrics.deleted_key_skipped_count, + ) + }); } // req time @@ -325,7 +318,6 @@ impl Tracker { .observe(total_storage_stats.write.processed_keys as f64); tls_collect_scan_details(self.req_ctx.tag, &total_storage_stats); - tls_collect_perf_stats(self.req_ctx.tag, &self.total_perf_stats); let peer = self.req_ctx.context.get_peer(); let region_id = self.req_ctx.context.get_region_id(); @@ -353,9 +345,47 @@ impl Tracker { ); self.current_stage = TrackerState::Tracked; } + + fn with_perf_context(&self, f: F) -> T + where + F: FnOnce(&mut Box) -> T, + { + thread_local! { + static SELECT: RefCell>> = RefCell::new(None); + static INDEX: RefCell>> = RefCell::new(None); + static ANALYZE_TABLE: RefCell>> = RefCell::new(None); + static ANALYZE_INDEX: RefCell>> = RefCell::new(None); + static ANALYZE_FULL_SAMPLING: RefCell>> = RefCell::new(None); + static CHECKSUM_TABLE: RefCell>> = RefCell::new(None); + static CHECKSUM_INDEX: RefCell>> = RefCell::new(None); + static TEST: RefCell>> = RefCell::new(None); + } + let tls_cell = match self.req_ctx.tag { + ReqTag::select => &SELECT, + ReqTag::index => &INDEX, + ReqTag::analyze_table => &ANALYZE_TABLE, + ReqTag::analyze_index => &ANALYZE_INDEX, + ReqTag::analyze_full_sampling => &ANALYZE_FULL_SAMPLING, + ReqTag::checksum_table => &CHECKSUM_TABLE, + ReqTag::checksum_index => &CHECKSUM_INDEX, + ReqTag::test => &TEST, + }; + tls_cell.with(|c| { + let mut c = c.borrow_mut(); + let perf_context = c.get_or_insert_with(|| unsafe { + with_tls_engine::(|engine| { + Box::new(engine.kv_engine().get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Coprocessor(self.req_ctx.tag.get_str()), + )) + }) + }); + f(perf_context) + }) + } } -impl Drop for Tracker { +impl Drop for Tracker { /// `Tracker` may be dropped without even calling `on_begin_all_items`. For example, if /// get snapshot failed. So we fast-forward if some steps are missing. fn drop(&mut self) { diff --git a/src/read_pool.rs b/src/read_pool.rs index 239e0fc61e5..cebd1965153 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -15,6 +15,7 @@ use tikv_util::{ sys::SysQuota, yatp_pool::{self, FuturePool, PoolTicker, YatpPoolBuilder}, }; +use tracker::TrackedFuture; use yatp::{pool::Remote, queue::Extras, task::future::TaskCell}; use self::metrics::*; @@ -121,10 +122,10 @@ impl ReadPoolHandle { }; let extras = Extras::new_multilevel(task_id, fixed_level); let task_cell = TaskCell::new( - async move { + TrackedFuture::new(async move { f.await; running_tasks.dec(); - }, + }), extras, ); remote.spawn(task_cell); diff --git a/src/server/service/batch.rs b/src/server/service/batch.rs index e1f20439471..1a7fcb59c3a 100644 --- a/src/server/service/batch.rs +++ b/src/server/service/batch.rs @@ -2,9 +2,9 @@ // #[PerformanceCriticalPath] use api_version::KvFormat; -use engine_rocks::ReadPerfContext; use kvproto::kvrpcpb::*; use tikv_util::{future::poll_future_notify, mpsc::batch::Sender, time::Instant}; +use tracker::{with_tls_tracker, RequestInfo, RequestType, Tracker, TrackerToken, GLOBAL_TRACKERS}; use crate::{ server::{ @@ -27,6 +27,7 @@ pub struct ReqBatcher { gets: Vec, raw_gets: Vec, get_ids: Vec, + get_trackers: Vec, raw_get_ids: Vec, begin_instant: Instant, batch_size: usize, @@ -39,6 +40,7 @@ impl ReqBatcher { gets: vec![], raw_gets: vec![], get_ids: vec![], + get_trackers: vec![], raw_get_ids: vec![], begin_instant, batch_size: std::cmp::min(batch_size, MAX_BATCH_GET_REQUEST_COUNT), @@ -54,8 +56,14 @@ impl ReqBatcher { } pub fn add_get_request(&mut self, req: GetRequest, id: u64) { + let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + req.get_context(), + RequestType::KvBatchGetCommand, + req.get_version(), + ))); self.gets.push(req); self.get_ids.push(id); + self.get_trackers.push(tracker); } pub fn add_raw_get_request(&mut self, req: RawGetRequest, id: u64) { @@ -71,7 +79,8 @@ impl ReqBatcher { if self.gets.len() >= self.batch_size { let gets = std::mem::take(&mut self.gets); let ids = std::mem::take(&mut self.get_ids); - future_batch_get_command(storage, ids, gets, tx.clone(), self.begin_instant); + let trackers = std::mem::take(&mut self.get_trackers); + future_batch_get_command(storage, ids, gets, trackers, tx.clone(), self.begin_instant); } if self.raw_gets.len() >= self.batch_size { @@ -91,6 +100,7 @@ impl ReqBatcher { storage, self.get_ids, self.gets, + self.get_trackers, tx.clone(), self.begin_instant, ); @@ -141,24 +151,17 @@ pub struct GetCommandResponseConsumer { tx: Sender, } -impl ResponseBatchConsumer<(Option>, Statistics, ReadPerfContext)> - for GetCommandResponseConsumer -{ - fn consume( - &self, - id: u64, - res: Result<(Option>, Statistics, ReadPerfContext)>, - begin: Instant, - ) { +impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponseConsumer { + fn consume(&self, id: u64, res: Result<(Option>, Statistics)>, begin: Instant) { let mut resp = GetResponse::default(); if let Some(err) = extract_region_error(&res) { resp.set_region_error(err); } else { match res { - Ok((val, statistics, perf_statistics)) => { + Ok((val, statistics)) => { let scan_detail_v2 = resp.mut_exec_details_v2().mut_scan_detail_v2(); statistics.write_scan_detail(scan_detail_v2); - perf_statistics.write_scan_detail(scan_detail_v2); + with_tls_tracker(|tracker| tracker.write_scan_detail(scan_detail_v2)); match val { Some(val) => resp.set_value(val), None => resp.set_not_found(true), @@ -208,6 +211,7 @@ fn future_batch_get_command( storage: &Storage, requests: Vec, gets: Vec, + trackers: Vec, tx: Sender, begin_instant: tikv_util::time::Instant, ) { @@ -218,12 +222,16 @@ fn future_batch_get_command( let res = storage.batch_get_command( gets, requests, + trackers.clone(), GetCommandResponseConsumer { tx: tx.clone() }, begin_instant, ); let f = async move { // This error can only cause by readpool busy. let res = res.await; + for tracker in trackers { + GLOBAL_TRACKERS.remove(tracker); + } if let Some(e) = extract_region_error(&res) { let mut resp = GetResponse::default(); resp.set_region_error(e); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index c4960b0629a..73215f6922c 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -43,6 +43,7 @@ use tikv_util::{ time::{duration_to_ms, duration_to_sec, Instant}, worker::Scheduler, }; +use tracker::{set_tls_tracker_token, RequestInfo, RequestType, Tracker, GLOBAL_TRACKERS}; use txn_types::{self, Key}; use super::batch::{BatcherBuilder, ReqBatcher}; @@ -1327,6 +1328,12 @@ fn future_get( storage: &Storage, mut req: GetRequest, ) -> impl Future> { + let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + req.get_context(), + RequestType::KvGet, + req.get_version(), + ))); + set_tls_tracker_token(tracker); let start = Instant::now(); let v = storage.get( req.take_context(), @@ -1346,7 +1353,9 @@ fn future_get( let exec_detail_v2 = resp.mut_exec_details_v2(); let scan_detail_v2 = exec_detail_v2.mut_scan_detail_v2(); stats.stats.write_scan_detail(scan_detail_v2); - stats.perf_stats.write_scan_detail(scan_detail_v2); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(scan_detail_v2); + }); let time_detail = exec_detail_v2.mut_time_detail(); time_detail.set_kv_read_wall_time_ms(duration_ms as i64); time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms as i64); @@ -1360,6 +1369,7 @@ fn future_get( Err(e) => resp.set_error(extract_key_error(&e)), } } + GLOBAL_TRACKERS.remove(tracker); Ok(resp) } } @@ -1368,6 +1378,12 @@ fn future_scan( storage: &Storage, mut req: ScanRequest, ) -> impl Future> { + let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + req.get_context(), + RequestType::KvScan, + req.get_version(), + ))); + set_tls_tracker_token(tracker); let end_key = Key::from_raw_maybe_unbounded(req.get_end_key()); let v = storage.scan( @@ -1401,6 +1417,7 @@ fn future_scan( } } } + GLOBAL_TRACKERS.remove(tracker); Ok(resp) } } @@ -1409,6 +1426,12 @@ fn future_batch_get( storage: &Storage, mut req: BatchGetRequest, ) -> impl Future> { + let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + req.get_context(), + RequestType::KvBatchGet, + req.get_version(), + ))); + set_tls_tracker_token(tracker); let start = Instant::now(); let keys = req.get_keys().iter().map(|x| Key::from_raw(x)).collect(); let v = storage.batch_get(req.take_context(), keys, req.get_version().into()); @@ -1426,7 +1449,9 @@ fn future_batch_get( let exec_detail_v2 = resp.mut_exec_details_v2(); let scan_detail_v2 = exec_detail_v2.mut_scan_detail_v2(); stats.stats.write_scan_detail(scan_detail_v2); - stats.perf_stats.write_scan_detail(scan_detail_v2); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(scan_detail_v2); + }); let time_detail = exec_detail_v2.mut_time_detail(); time_detail.set_kv_read_wall_time_ms(duration_ms as i64); time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms as i64); @@ -1444,6 +1469,7 @@ fn future_batch_get( } } } + GLOBAL_TRACKERS.remove(tracker); Ok(resp) } } @@ -1452,6 +1478,12 @@ fn future_scan_lock( storage: &Storage, mut req: ScanLockRequest, ) -> impl Future> { + let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + req.get_context(), + RequestType::KvScanLock, + req.get_max_version(), + ))); + set_tls_tracker_token(tracker); let start_key = Key::from_raw_maybe_unbounded(req.get_start_key()); let end_key = Key::from_raw_maybe_unbounded(req.get_end_key()); @@ -1474,6 +1506,7 @@ fn future_scan_lock( Err(e) => resp.set_error(extract_key_error(&e)), } } + GLOBAL_TRACKERS.remove(tracker); Ok(resp) } } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 408ad13ac20..fd4df727e54 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -5,7 +5,6 @@ use std::{cell::RefCell, mem, sync::Arc}; use collections::HashMap; -use engine_rocks::ReadPerfContext; use kvproto::{kvrpcpb::KeyRange, metapb, pdpb::QueryKind}; use pd_client::BucketMeta; use prometheus::*; @@ -20,7 +19,6 @@ use crate::{ struct StorageLocalMetrics { local_scan_details: HashMap, local_read_stats: ReadStats, - local_perf_stats: HashMap, } thread_local! { @@ -28,20 +26,10 @@ thread_local! { StorageLocalMetrics { local_scan_details: HashMap::default(), local_read_stats:ReadStats::default(), - local_perf_stats: HashMap::default(), } ); } -macro_rules! tls_flush_perf_stats { - ($tag:ident, $local_stats:ident, $stat:ident) => { - STORAGE_ROCKSDB_PERF_COUNTER_STATIC - .get($tag) - .$stat - .inc_by($local_stats.$stat as u64); - }; -} - pub fn tls_flush(reporter: &R) { TLS_STORAGE_METRICS.with(|m| { let mut m = m.borrow_mut(); @@ -64,57 +52,6 @@ pub fn tls_flush(reporter: &R) { mem::swap(&mut read_stats, &mut m.local_read_stats); reporter.report_read_stats(read_stats); } - - for (req_tag, perf_stats) in m.local_perf_stats.drain() { - tls_flush_perf_stats!(req_tag, perf_stats, user_key_comparison_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_cache_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_read_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_read_byte); - tls_flush_perf_stats!(req_tag, perf_stats, block_read_time); - tls_flush_perf_stats!(req_tag, perf_stats, block_cache_index_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, index_block_read_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_cache_filter_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, filter_block_read_count); - tls_flush_perf_stats!(req_tag, perf_stats, block_checksum_time); - tls_flush_perf_stats!(req_tag, perf_stats, block_decompress_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_read_bytes); - tls_flush_perf_stats!(req_tag, perf_stats, iter_read_bytes); - tls_flush_perf_stats!(req_tag, perf_stats, internal_key_skipped_count); - tls_flush_perf_stats!(req_tag, perf_stats, internal_delete_skipped_count); - tls_flush_perf_stats!(req_tag, perf_stats, internal_recent_skipped_count); - tls_flush_perf_stats!(req_tag, perf_stats, get_snapshot_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_from_memtable_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_from_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, get_post_process_time); - tls_flush_perf_stats!(req_tag, perf_stats, get_from_output_files_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_on_memtable_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_on_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, next_on_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, prev_on_memtable_count); - tls_flush_perf_stats!(req_tag, perf_stats, seek_child_seek_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_child_seek_count); - tls_flush_perf_stats!(req_tag, perf_stats, seek_min_heap_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_max_heap_time); - tls_flush_perf_stats!(req_tag, perf_stats, seek_internal_seek_time); - tls_flush_perf_stats!(req_tag, perf_stats, db_mutex_lock_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, db_condition_wait_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, read_index_block_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, read_filter_block_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, new_table_block_iter_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, new_table_iterator_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, block_seek_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, find_table_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_memtable_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_memtable_miss_count); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_sst_hit_count); - tls_flush_perf_stats!(req_tag, perf_stats, bloom_sst_miss_count); - tls_flush_perf_stats!(req_tag, perf_stats, get_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, iter_next_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, iter_prev_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, iter_seek_cpu_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, encrypt_data_nanos); - tls_flush_perf_stats!(req_tag, perf_stats, decrypt_data_nanos); - } }); } @@ -177,15 +114,6 @@ pub fn tls_collect_query_batch( }); } -pub fn tls_collect_perf_stats(cmd: CommandKind, perf_stats: &ReadPerfContext) { - TLS_STORAGE_METRICS.with(|m| { - *(m.borrow_mut() - .local_perf_stats - .entry(cmd) - .or_insert_with(Default::default)) += *perf_stats; - }) -} - make_auto_flush_static_metric! { pub label_enum CommandKind { get, @@ -277,57 +205,6 @@ make_auto_flush_static_metric! { unlocked, } - pub label_enum PerfMetric { - user_key_comparison_count, - block_cache_hit_count, - block_read_count, - block_read_byte, - block_read_time, - block_cache_index_hit_count, - index_block_read_count, - block_cache_filter_hit_count, - filter_block_read_count, - block_checksum_time, - block_decompress_time, - get_read_bytes, - iter_read_bytes, - internal_key_skipped_count, - internal_delete_skipped_count, - internal_recent_skipped_count, - get_snapshot_time, - get_from_memtable_time, - get_from_memtable_count, - get_post_process_time, - get_from_output_files_time, - seek_on_memtable_time, - seek_on_memtable_count, - next_on_memtable_count, - prev_on_memtable_count, - seek_child_seek_time, - seek_child_seek_count, - seek_min_heap_time, - seek_max_heap_time, - seek_internal_seek_time, - db_mutex_lock_nanos, - db_condition_wait_nanos, - read_index_block_nanos, - read_filter_block_nanos, - new_table_block_iter_nanos, - new_table_iterator_nanos, - block_seek_nanos, - find_table_nanos, - bloom_memtable_hit_count, - bloom_memtable_miss_count, - bloom_sst_hit_count, - bloom_sst_miss_count, - get_cpu_nanos, - iter_next_cpu_nanos, - iter_prev_cpu_nanos, - iter_seek_cpu_nanos, - encrypt_data_nanos, - decrypt_data_nanos, - } - pub label_enum InMemoryPessimisticLockingResult { success, full, @@ -381,11 +258,6 @@ make_auto_flush_static_metric! { "result" => CheckMemLockResult, } - pub struct PerfCounter: LocalIntCounter { - "req" => CommandKind, - "metric" => PerfMetric, - } - pub struct TxnCommandThrottleTimeCounterVec: LocalIntCounter { "type" => CommandKind, } @@ -620,16 +492,6 @@ lazy_static! { pub static ref CHECK_MEM_LOCK_DURATION_HISTOGRAM_VEC: CheckMemLockHistogramVec = auto_flush_from!(CHECK_MEM_LOCK_DURATION_HISTOGRAM, CheckMemLockHistogramVec); - pub static ref STORAGE_ROCKSDB_PERF_COUNTER: IntCounterVec = register_int_counter_vec!( - "tikv_storage_rocksdb_perf", - "Total number of RocksDB internal operations from PerfContext", - &["req", "metric"] - ) - .unwrap(); - - pub static ref STORAGE_ROCKSDB_PERF_COUNTER_STATIC: PerfCounter = - auto_flush_from!(STORAGE_ROCKSDB_PERF_COUNTER, PerfCounter); - pub static ref TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_txn_command_throttle_time_total", "Total throttle time (microsecond) of txn commands.", diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 9e778afe064..692adec1ad1 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -52,6 +52,7 @@ mod types; use std::{ borrow::Cow, + cell::RefCell, iter, marker::PhantomData, sync::{ @@ -62,8 +63,10 @@ use std::{ use api_version::{ApiV1, ApiV2, KeyMode, KvFormat, RawValue}; use concurrency_manager::ConcurrencyManager; -use engine_rocks::{ReadPerfContext, ReadPerfInstant}; -use engine_traits::{raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS}; +use engine_traits::{ + raw_ttl::ttl_to_expire_ts, CfName, PerfContext, PerfContextExt, PerfContextKind, PerfLevel, + CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, +}; use futures::prelude::*; use kvproto::{ kvrpcpb::{ @@ -81,6 +84,10 @@ use tikv_util::{ quota_limiter::QuotaLimiter, time::{duration_to_ms, Instant, ThreadReadId}, }; +use tracker::{ + clear_tls_tracker_token, get_tls_tracker_token, set_tls_tracker_token, TrackedFuture, + TrackerToken, +}; use txn_types::{Key, KvPair, Lock, OldValues, TimeStamp, TsSet, Value}; pub use self::{ @@ -272,6 +279,42 @@ impl Storage { }) } + fn with_perf_context(cmd: CommandKind, f: Fn) -> T + where + Fn: FnOnce() -> T, + { + thread_local! { + static GET: RefCell>> = RefCell::new(None); + static BATCH_GET: RefCell>> = RefCell::new(None); + static BATCH_GET_COMMAND: RefCell>> = RefCell::new(None); + static SCAN: RefCell>> = RefCell::new(None); + static SCAN_LOCK: RefCell>> = RefCell::new(None); + } + let tls_cell = match cmd { + CommandKind::get => &GET, + CommandKind::batch_get => &BATCH_GET, + CommandKind::batch_get_command => &BATCH_GET_COMMAND, + CommandKind::scan => &SCAN, + CommandKind::scan_lock => &SCAN_LOCK, + _ => return f(), + }; + tls_cell.with(|c| { + let mut c = c.borrow_mut(); + let perf_context = c.get_or_insert_with(|| { + Self::with_tls_engine(|engine| { + Box::new(engine.kv_engine().get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Storage(cmd.get_str()), + )) + }) + }); + perf_context.start_observe(); + let res = f(); + perf_context.report_metrics(&[get_tls_tracker_token()]); + res + }) + } + /// Get the underlying `Engine` of the `Storage`. pub fn get_engine(&self) -> E { self.engine.clone() @@ -597,14 +640,14 @@ impl Storage { )?; let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; + { let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; let buckets = snapshot.ext().get_buckets(); let mut statistics = Statistics::default(); - let (result, delta) = { + let result = Self::with_perf_context(CMD, || { let _guard = sample.observe_cpu(); - let perf_statistics = ReadPerfInstant::new(); let snap_store = SnapshotStore::new( snapshot, start_ts, @@ -614,18 +657,15 @@ impl Storage { access_locks, false, ); - let result = snap_store + snap_store .get(&key, &mut statistics) // map storage::txn::Error -> storage::Error .map_err(Error::from) .map(|r| { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC.get(CMD).observe(1_f64); r - }); - - let delta = perf_statistics.delta(); - (result, delta) - }; + }) + }); metrics::tls_collect_scan_details(CMD, &statistics); metrics::tls_collect_read_flow( ctx.get_region_id(), @@ -634,7 +674,6 @@ impl Storage { &statistics, buckets.as_ref(), ); - metrics::tls_collect_perf_stats(CMD, &delta); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) .observe(begin_instant.saturating_elapsed_secs()); @@ -675,7 +714,6 @@ impl Storage { result?, KvGetStatistics { stats: statistics, - perf_stats: delta, latency_stats, }, )) @@ -694,12 +732,11 @@ impl Storage { /// Get values of a set of keys with separate context from a snapshot, return a list of `Result`s. /// /// Only writes that are committed before their respective `start_ts` are visible. - pub fn batch_get_command< - P: 'static + ResponseBatchConsumer<(Option>, Statistics, ReadPerfContext)>, - >( + pub fn batch_get_command>, Statistics)>>( &self, requests: Vec, ids: Vec, + trackers: Vec, consumer: P, begin_instant: tikv_util::time::Instant, ) -> impl Future> { @@ -717,7 +754,8 @@ impl Storage { let resource_tag = self .resource_tag_factory .new_tag_with_key_ranges(rand_ctx, vec![(rand_key.clone(), rand_key)]); - + // Unset the TLS tracker because the future below does not belong to any specific request + clear_tls_tracker_token(); let res = self.read_pool.spawn_handle( async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); @@ -729,7 +767,8 @@ impl Storage { let mut statistics = Statistics::default(); let mut req_snaps = vec![]; - for (mut req, id) in requests.into_iter().zip(ids) { + for ((mut req, id), tracker) in requests.into_iter().zip(ids).zip(trackers) { + set_tls_tracker_token(tracker); let mut ctx = req.take_context(); let region_id = ctx.get_region_id(); let peer = ctx.get_peer(); @@ -776,7 +815,7 @@ impl Storage { let snap = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)); req_snaps.push(( - snap, + TrackedFuture::new(snap), key, start_ts, isolation_level, @@ -785,6 +824,7 @@ impl Storage { access_locks, region_id, id, + tracker, )); } Self::with_tls_engine(|engine| engine.release_snapshot()); @@ -799,9 +839,12 @@ impl Storage { access_locks, region_id, id, + tracker, ) = req_snap; - match snap.await { - Ok(snapshot) => { + let snap_res = snap.await; + set_tls_tracker_token(tracker); + match snap_res { + Ok(snapshot) => Self::with_perf_context(CMD, || { let buckets = snapshot.ext().get_buckets(); match PointGetterBuilder::new(snapshot, start_ts) .fill_cache(fill_cache) @@ -811,10 +854,8 @@ impl Storage { .build() { Ok(mut point_getter) => { - let perf_statistics = ReadPerfInstant::new(); let v = point_getter.get(&key); let stat = point_getter.take_statistics(); - let delta = perf_statistics.delta(); metrics::tls_collect_read_flow( region_id, Some(key.as_encoded()), @@ -822,12 +863,11 @@ impl Storage { &stat, buckets.as_ref(), ); - metrics::tls_collect_perf_stats(CMD, &delta); statistics.add(&stat); consumer.consume( id, v.map_err(|e| Error::from(txn::Error::from(e))) - .map(|v| (v, stat, delta)), + .map(|v| (v, stat)), begin_instant, ); } @@ -839,7 +879,7 @@ impl Storage { ); } } - } + }), Err(e) => { consumer.consume(id, Err(e), begin_instant); } @@ -933,9 +973,8 @@ impl Storage { let stage_snap_recv_ts = begin_instant; let mut statistics = Vec::with_capacity(keys.len()); let buckets = snapshot.ext().get_buckets(); - let (result, delta, stats) = { + let (result, stats) = Self::with_perf_context(CMD, || { let _guard = sample.observe_cpu(); - let perf_statistics = ReadPerfInstant::new(); let snap_store = SnapshotStore::new( snapshot, start_ts, @@ -976,11 +1015,9 @@ impl Storage { .observe(kv_pairs.len() as f64); kv_pairs }); - let delta = perf_statistics.delta(); - (result, delta, stats) - }; + (result, stats) + }); metrics::tls_collect_scan_details(CMD, &stats); - metrics::tls_collect_perf_stats(CMD, &delta); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) .observe(begin_instant.saturating_elapsed_secs()); @@ -1018,7 +1055,6 @@ impl Storage { result?, KvGetStatistics { stats, - perf_stats: delta, latency_stats, }, )) @@ -1154,9 +1190,8 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; - { + Self::with_perf_context(CMD, || { let begin_instant = Instant::now(); - let perf_statistics = ReadPerfInstant::new(); let buckets = snapshot.ext().get_buckets(); let snap_store = SnapshotStore::new( @@ -1174,7 +1209,6 @@ impl Storage { let res = scanner.scan(limit, sample_step); let statistics = scanner.take_statistics(); - let delta = perf_statistics.delta(); metrics::tls_collect_scan_details(CMD, &statistics); metrics::tls_collect_read_flow( ctx.get_region_id(), @@ -1183,7 +1217,6 @@ impl Storage { &statistics, buckets.as_ref(), ); - metrics::tls_collect_perf_stats(CMD, &delta); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) .observe(begin_instant.saturating_elapsed_secs()); @@ -1200,7 +1233,7 @@ impl Storage { .map(|x| x.map_err(Error::from)) .collect() }) - } + }) } .in_resource_metering_tag(resource_tag), priority, @@ -1304,10 +1337,9 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; - { + Self::with_perf_context(CMD, || { let begin_instant = Instant::now(); let mut statistics = Statistics::default(); - let perf_statistics = ReadPerfInstant::new(); let buckets = snapshot.ext().get_buckets(); let mut reader = MvccReader::new( snapshot, @@ -1331,7 +1363,6 @@ impl Storage { locks.push(lock_info); } - let delta = perf_statistics.delta(); metrics::tls_collect_scan_details(CMD, &statistics); metrics::tls_collect_read_flow( ctx.get_region_id(), @@ -1340,7 +1371,6 @@ impl Storage { &statistics, buckets.as_ref(), ); - metrics::tls_collect_perf_stats(CMD, &delta); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) .observe(begin_instant.saturating_elapsed_secs()); @@ -1349,7 +1379,7 @@ impl Storage { .observe(command_duration.saturating_elapsed_secs()); Ok(locks) - } + }) } .in_resource_metering_tag(resource_tag), priority, @@ -3029,11 +3059,11 @@ pub mod test_util { } } - impl ResponseBatchConsumer<(Option>, Statistics, ReadPerfContext)> for GetConsumer { + impl ResponseBatchConsumer<(Option>, Statistics)> for GetConsumer { fn consume( &self, id: u64, - res: Result<(Option>, Statistics, ReadPerfContext)>, + res: Result<(Option>, Statistics)>, _: tikv_util::time::Instant, ) { self.data.lock().unwrap().push(GetResult { @@ -3060,7 +3090,6 @@ pub mod test_util { #[derive(Debug, Default, Clone)] pub struct KvGetStatistics { pub stats: Statistics, - pub perf_stats: ReadPerfContext, pub latency_stats: StageLatencyStats, } @@ -3085,6 +3114,7 @@ mod tests { use futures::executor::block_on; use kvproto::kvrpcpb::{AssertionLevel, CommandPri, Op}; use tikv_util::config::ReadableSize; + use tracker::INVALID_TRACKER_TOKEN; use txn_types::{Mutation, PessimisticLock, WriteType}; use super::{ @@ -3277,6 +3307,7 @@ mod tests { block_on(storage.batch_get_command( vec![create_get_request(b"c", 1), create_get_request(b"d", 1)], vec![1, 2], + vec![INVALID_TRACKER_TOKEN; 2], consumer.clone(), Instant::now(), )) @@ -3964,6 +3995,7 @@ mod tests { block_on(storage.batch_get_command( vec![create_get_request(b"c", 2), create_get_request(b"d", 2)], vec![1, 2], + vec![INVALID_TRACKER_TOKEN; 2], consumer.clone(), Instant::now(), )) @@ -4004,6 +4036,7 @@ mod tests { create_get_request(b"b", 5), ], vec![1, 2, 3, 4], + vec![INVALID_TRACKER_TOKEN; 4], consumer.clone(), Instant::now(), )) @@ -7736,6 +7769,7 @@ mod tests { block_on(storage.batch_get_command( vec![req1.clone(), req2], vec![1, 2], + vec![INVALID_TRACKER_TOKEN; 2], consumer.clone(), Instant::now(), )) @@ -7809,8 +7843,14 @@ mod tests { req.set_key(k1.clone()); req.set_version(110); let consumer = GetConsumer::new(); - block_on(storage.batch_get_command(vec![req], vec![1], consumer.clone(), Instant::now())) - .unwrap(); + block_on(storage.batch_get_command( + vec![req], + vec![1], + vec![INVALID_TRACKER_TOKEN], + consumer.clone(), + Instant::now(), + )) + .unwrap(); let res = consumer.take_data(); assert_eq!(res.len(), 1); assert_eq!(res[0].as_ref().unwrap(), &Some(v1.clone())); From c17e29b82b790d7e8379480384c4a3625d722297 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 10 Jun 2022 18:14:30 +0800 Subject: [PATCH 0020/1149] engine: deprecate gc-merge-rewrite option (#12798) close tikv/tikv#12797 Deprecate gc-merge-rewrite option. Signed-off-by: tabokie --- etc/config-template.toml | 17 +++++++++-------- src/config.rs | 20 +++++++++++++++++++- tests/integrations/config/mod.rs | 2 +- tests/integrations/config/test-custom.toml | 2 -- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 1e673fbc3fa..ab2ffa28acf 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -822,10 +822,15 @@ ## default: 0.5 # discardable-ratio = 0.5 -## The mode used to process blob files. In read-only mode Titan -## stops writing value into blob log. In fallback mode Titan -## converts blob index into real value on flush and compaction. -## This option is especially useful for downgrading Titan. +## The mode used to process blob files. In read-only mode Titan stops writing +## value into blob log. In fallback mode Titan converts blob index into real +## value on flush and compaction. +## +## This option can be used to disable Titan. More specifically, to disable +## Titan, set this option to fallback and perform a full compaction using +## tikv-ctl. Then, monitor the blob file size metrics. After the blob file size +## decreases to 0, you can set rocksdb.titan.enabled to false and restart TiKV. +## ## default: kNormal ## read-only: kReadOnly ## fallback: kFallback @@ -844,10 +849,6 @@ ## default: false # level-merge = false -## Use merge operator to rewrite GC blob index. -## default: false -# gc-merge-rewrite = false - ## Options for "Write" Column Family, which stores MVCC commit information [rocksdb.writecf] ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`. diff --git a/src/config.rs b/src/config.rs index 37278fd09e2..d37e0892082 100644 --- a/src/config.rs +++ b/src/config.rs @@ -139,7 +139,10 @@ pub struct TitanCfConfig { pub range_merge: bool, #[online_config(skip)] pub max_sorted_runs: i32, + // deprecated. #[online_config(skip)] + #[doc(hidden)] + #[serde(skip_serializing)] pub gc_merge_rewrite: bool, } @@ -178,9 +181,23 @@ impl TitanCfConfig { opts.set_level_merge(self.level_merge); opts.set_range_merge(self.range_merge); opts.set_max_sorted_runs(self.max_sorted_runs); - opts.set_gc_merge_rewrite(self.gc_merge_rewrite); opts } + + fn validate(&self) -> Result<(), Box> { + if self.gc_merge_rewrite { + return Err( + "gc-merge-rewrite is deprecated. The data produced when this \ + option is enabled cannot be read by this version. Therefore, if \ + this option has been applied to an existing node, you must downgrade \ + it to the previous version and fully clean up the old data. See more \ + details of how to do that in the documentation for the blob-run-mode \ + confuguration." + .into(), + ); + } + Ok(()) + } } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -332,6 +349,7 @@ macro_rules! cf_config { ) .into()); } + self.titan.validate()?; Ok(()) } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 3bd932262e5..589b0ff7a56 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -256,7 +256,7 @@ fn test_serde_custom_tikv_config() { level_merge: true, range_merge: true, max_sorted_runs: 100, - gc_merge_rewrite: true, + gc_merge_rewrite: false, }; let titan_db_config = TitanDBConfig { enabled: true, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index d02aebc4df3..36b82b056f1 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -334,7 +334,6 @@ blob-run-mode = "fallback" level-merge = true range-merge = true max-sorted-runs = 100 -gc-merge-rewrite = true [rocksdb.writecf] block-size = "12KB" @@ -566,7 +565,6 @@ blob-run-mode = "fallback" level-merge = true range-merge = true max-sorted-runs = 100 -gc-merge-rewrite = true [raft-engine] enable = false From 265dbd2b1a3ec9c2c16224e84f2584f6534065fe Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 10 Jun 2022 07:48:31 -0700 Subject: [PATCH 0021/1149] update several deps (#12775) close tikv/tikv#12773 replace signal with signal-hook and update other deps by depbot. Signed-off-by: Jay Lee Co-authored-by: zhangjinpeng1987 --- Cargo.lock | 170 ++++------- cmd/tikv-ctl/Cargo.toml | 3 +- components/external_storage/export/Cargo.toml | 7 +- .../export/src/bin/tikv-cloud-storage.rs | 17 +- components/file_system/Cargo.toml | 1 - components/keys/Cargo.toml | 1 - components/resource_metering/Cargo.toml | 3 - components/server/Cargo.toml | 3 +- components/server/src/signal_handler.rs | 14 +- components/tidb_query_datatype/Cargo.toml | 2 +- .../src/simple_aggr_executor.rs | 2 +- components/tidb_query_expr/src/impl_cast.rs | 11 +- components/tidb_query_expr/src/impl_math.rs | 287 ++++++++++-------- .../tidb_query_expr/src/impl_miscellaneous.rs | 17 +- components/tidb_query_expr/src/impl_op.rs | 19 +- components/tikv_util/Cargo.toml | 2 +- 16 files changed, 284 insertions(+), 275 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6691467f359..19ccbcc72c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -633,6 +633,12 @@ version = "3.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" +[[package]] +name = "bytemuck" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" + [[package]] name = "byteorder" version = "1.3.4" @@ -1091,16 +1097,16 @@ dependencies = [ [[package]] name = "crossbeam" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd01a6eb3daaafa260f6fc94c3a6c36390abc2080e38e3e34ced87393fb77d80" +checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" dependencies = [ "cfg-if 1.0.0", "crossbeam-channel", "crossbeam-deque", - "crossbeam-epoch 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-epoch 0.9.8", "crossbeam-queue", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.8.8", ] [[package]] @@ -1110,7 +1116,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.8.8", ] [[package]] @@ -1120,18 +1126,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-epoch 0.9.8", + "crossbeam-utils 0.8.8", ] [[package]] name = "crossbeam-epoch" version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12" +source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.8.3", "lazy_static", "memoffset", "scopeguard", @@ -1139,11 +1144,13 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.3" -source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" dependencies = [ + "autocfg", "cfg-if 1.0.0", - "crossbeam-utils 0.8.3 (git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0)", + "crossbeam-utils 0.8.8", "lazy_static", "memoffset", "scopeguard", @@ -1151,12 +1158,12 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.1" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756" +checksum = "1f25d8400f4a7a5778f0e4e52384a48cbd9b5c495d110786187fc750075277a2" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.8.8", ] [[package]] @@ -1165,8 +1172,8 @@ version = "0.0.0" source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch 0.9.3 (git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0)", - "crossbeam-utils 0.8.3 (git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0)", + "crossbeam-epoch 0.9.3", + "crossbeam-utils 0.8.3", "scopeguard", ] @@ -1184,8 +1191,7 @@ dependencies = [ [[package]] name = "crossbeam-utils" version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49" +source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" dependencies = [ "autocfg", "cfg-if 1.0.0", @@ -1194,10 +1200,10 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.3" -source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" dependencies = [ - "autocfg", "cfg-if 1.0.0", "lazy_static", ] @@ -1693,11 +1699,11 @@ dependencies = [ "libc 0.2.125", "libloading", "matches", - "nix 0.23.0", + "nix", "once_cell", "protobuf", "rust-ini", - "signal", + "signal-hook", "slog", "slog-global", "slog-term", @@ -1743,12 +1749,11 @@ dependencies = [ "bcc", "collections", "crc32fast", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.8.8", "fs2", "lazy_static", "libc 0.2.125", "maligned", - "nix 0.23.0", "online_config", "openssl", "parking_lot 0.12.0", @@ -2613,7 +2618,6 @@ dependencies = [ "panic_hook", "thiserror", "tikv_alloc", - "tikv_util", ] [[package]] @@ -3113,32 +3117,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "nix" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "becb657d662f1cd2ef38c7ad480ec6b8cf9e96b27adb543e594f9cf0f2e6065c" -dependencies = [ - "bitflags", - "cc", - "cfg-if 0.1.10", - "libc 0.2.125", - "void", -] - -[[package]] -name = "nix" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f305c2c2e4c39a82f7bf0bf65fb557f9070ce06781d4f2454295cc34b1c43188" -dependencies = [ - "bitflags", - "cc", - "cfg-if 1.0.0", - "libc 0.2.125", - "memoffset", -] - [[package]] name = "nix" version = "0.24.1" @@ -3196,9 +3174,9 @@ dependencies = [ [[package]] name = "notify" -version = "4.0.16" +version = "4.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2599080e87c9bd051ddb11b10074f4da7b1223298df65d4c2ec5bcf309af1533" +checksum = "ae03c8c853dba7bfd23e571ff0cff7bc9dceb40a4cd684cd1681824183f45257" dependencies = [ "bitflags", "filetime", @@ -3449,9 +3427,9 @@ checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" [[package]] name = "openssl-src" -version = "111.17.0+1.1.1m" +version = "111.20.0+1.1.1o" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d6a336abd10814198f66e2a91ccd7336611f30334119ca8ce300536666fcf4" +checksum = "92892c4f87d56e376e469ace79f1128fdaded07646ddf73aa0be4706ff712dec" dependencies = [ "cc", ] @@ -3472,18 +3450,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "1.1.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "039f02eb0f69271f26abe3202189275d7aa2258b903cb0281b5de710a2570ff3" +checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" dependencies = [ "num-traits", ] @@ -3800,7 +3769,7 @@ dependencies = [ "inferno", "libc 0.2.125", "log", - "nix 0.24.1", + "nix", "once_cell", "parking_lot 0.12.0", "protobuf", @@ -4069,7 +4038,7 @@ dependencies = [ "log", "lz4-sys", "memmap2", - "nix 0.24.1", + "nix", "num-derive", "num-traits", "parking_lot 0.12.0", @@ -4162,7 +4131,7 @@ dependencies = [ "memory_trace_macros", "online_config", "openssl", - "ordered-float 2.7.0", + "ordered-float", "panic_hook", "parking_lot 0.12.0", "pd_client", @@ -4349,7 +4318,7 @@ checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" dependencies = [ "crossbeam-channel", "crossbeam-deque", - "crossbeam-utils 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.8.8", "lazy_static", "num_cpus", ] @@ -4390,9 +4359,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.4" +version = "1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ "aho-corasick", "memchr", @@ -4410,9 +4379,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.25" +version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" [[package]] name = "remove_dir_all" @@ -4517,7 +4486,6 @@ dependencies = [ "serde_derive", "slog", "slog-global", - "thread-id", "tikv_util", ] @@ -4529,9 +4497,12 @@ checksum = "18eb52b6664d331053136fcac7e4883bdc6f5fc04a6aab3b0f75eafb80ab88b3" [[package]] name = "rgb" -version = "0.8.14" +version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089e4031214d129e201f8c3c8c2fe97cd7322478a0d1cdf78e7029b0042efdb" +checksum = "e74fdc210d8f24a7dbfedc13b04ba5764f5232754ccebfdf5fff1bad791ccbc6" +dependencies = [ + "bytemuck", +] [[package]] name = "rhai" @@ -5049,7 +5020,6 @@ dependencies = [ "libc 0.2.125", "log", "log_wrappers", - "nix 0.23.0", "pd_client", "prometheus", "protobuf", @@ -5061,7 +5031,7 @@ dependencies = [ "resource_metering", "security", "serde_json", - "signal", + "signal-hook", "slog", "slog-global", "tempfile", @@ -5100,20 +5070,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] -name = "signal" -version = "0.6.0" +name = "signal-hook" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106428d9d96840ecdec5208c13ab8a4e28c38da1e0ccf2909fb44e41b992f897" +checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ "libc 0.2.125", - "nix 0.11.1", + "signal-hook-registry", ] [[package]] name = "signal-hook-registry" -version = "1.2.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce32ea0c6c56d5eacaeb814fbed9960547021d3edd010ded1425f180536b20ab" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ "libc 0.2.125", ] @@ -5805,17 +5775,6 @@ dependencies = [ "syn", ] -[[package]] -name = "thread-id" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fdfe0627923f7411a43ec9ec9c39c3a9b4151be313e0922042581fb6c9b717f" -dependencies = [ - "libc 0.2.125", - "redox_syscall 0.2.11", - "winapi 0.3.9", -] - [[package]] name = "thread_local" version = "1.1.4" @@ -5893,7 +5852,7 @@ dependencies = [ "num 0.3.0", "num-derive", "num-traits", - "ordered-float 1.1.1", + "ordered-float", "protobuf", "regex", "serde", @@ -6120,7 +6079,6 @@ dependencies = [ "libc 0.2.125", "log", "log_wrappers", - "nix 0.23.0", "pd_client", "prometheus", "protobuf", @@ -6133,7 +6091,7 @@ dependencies = [ "security", "serde_json", "server", - "signal", + "signal-hook", "slog", "slog-global", "structopt", @@ -6265,7 +6223,7 @@ dependencies = [ "libc 0.2.125", "log", "log_wrappers", - "nix 0.23.0", + "nix", "num-traits", "num_cpus", "online_config", @@ -6794,12 +6752,6 @@ dependencies = [ "syn", ] -[[package]] -name = "void" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" - [[package]] name = "walkdir" version = "2.3.1" diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 9292df06fca..13d8b351e21 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -66,7 +66,6 @@ kvproto = { git = "https://github.com/pingcap/kvproto.git" } libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { path = "../../components/log_wrappers" } -nix = "0.23" pd_client = { path = "../../components/pd_client", default-features = false } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } @@ -95,4 +94,4 @@ cc = "1.0" time = "0.1" [target.'cfg(unix)'.dependencies] -signal = "0.6" +signal-hook = "0.3" diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml index d67e2b7a15f..1f75af2734a 100644 --- a/components/external_storage/export/Cargo.toml +++ b/components/external_storage/export/Cargo.toml @@ -40,13 +40,12 @@ cloud-storage-grpc = [ "futures", "futures-executor", "libc", - "signal", + "signal-hook", "slog", "slog-global", "slog-term", "tokio", "tokio-util", - "nix", ] [dependencies] @@ -89,8 +88,8 @@ name = "scli" path = "examples/scli.rs" [target.'cfg(unix)'.dependencies] -nix = { optional = true, version = "0.23" } -signal = { optional = true, version = "0.6" } +nix = { optional = true, version = "0.24" } +signal-hook = { optional = true, version = "0.3" } libc = { optional = true, version = "0.2" } slog = { optional = true, version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-term = { optional = true, version = "2.4" } diff --git a/components/external_storage/export/src/bin/tikv-cloud-storage.rs b/components/external_storage/export/src/bin/tikv-cloud-storage.rs index 3011a5079d1..07cd8507948 100644 --- a/components/external_storage/export/src/bin/tikv-cloud-storage.rs +++ b/components/external_storage/export/src/bin/tikv-cloud-storage.rs @@ -33,16 +33,19 @@ fn main() { #[cfg(unix)] mod wait { use libc::c_int; - use nix::sys::signal::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}; - use signal::trap::Trap; + use signal_hook::{ + consts::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}, + iterator::Signals, + Signals, + }; use slog_global::info; pub fn for_signal() { - let trap = Trap::trap(&[SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]); - for sig in trap { - match sig { - SIGUSR1 | SIGTERM | SIGINT | SIGHUP => { - info!("receive signal {}, stopping server...", sig as c_int); + let mut signals = Signals::new(&[SIGTERM, SIGINT, SIGHUP]).unwrap(); + for signal in &mut signals { + match signal { + SIGTERM | SIGINT | SIGHUP => { + info!("receive signal {}, stopping server...", signal); break; } // TODO: handle more signals diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index aa1cb56a991..e3924c0fc25 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -14,7 +14,6 @@ crossbeam-utils = "0.8.0" fs2 = "0.4" lazy_static = "1.3" libc = "0.2" -nix = "0.23" online_config = { path = "../online_config" } openssl = "0.10" parking_lot = "0.12" diff --git a/components/keys/Cargo.toml b/components/keys/Cargo.toml index de1a7089ce4..a9bd4ddbf18 100644 --- a/components/keys/Cargo.toml +++ b/components/keys/Cargo.toml @@ -10,7 +10,6 @@ kvproto = { git = "https://github.com/pingcap/kvproto.git" } log_wrappers = { path = "../log_wrappers" } thiserror = "1.0" tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } [dev-dependencies] panic_hook = { path = "../panic_hook" } diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index cecaa3c911b..72a0c0dc339 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -25,9 +25,6 @@ tikv_util = { path = "../tikv_util" } [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } -[target.'cfg(not(target_os = "linux"))'.dependencies] -thread-id = "4" - [dev-dependencies] rand = "0.8" diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index b53fde02cef..650f9f6932b 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -60,7 +60,6 @@ kvproto = { git = "https://github.com/pingcap/kvproto.git" } libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { path = "../log_wrappers" } -nix = "0.23" pd_client = { path = "../pd_client", default-features = false } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } @@ -84,4 +83,4 @@ txn_types = { path = "../txn_types", default-features = false } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [target.'cfg(unix)'.dependencies] -signal = "0.6" +signal-hook = "0.3" diff --git a/components/server/src/signal_handler.rs b/components/server/src/signal_handler.rs index 5b73154241b..88c2ddac9f4 100644 --- a/components/server/src/signal_handler.rs +++ b/components/server/src/signal_handler.rs @@ -5,17 +5,19 @@ pub use self::imp::wait_for_signal; #[cfg(unix)] mod imp { use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine}; - use libc::c_int; - use signal::{trap::Trap, Signal::*}; + use signal_hook::{ + consts::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}, + iterator::Signals, + }; use tikv_util::metrics; #[allow(dead_code)] pub fn wait_for_signal(engines: Option>) { - let trap = Trap::trap(&[SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]); - for sig in trap { - match sig { + let mut signals = Signals::new(&[SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]).unwrap(); + for signal in &mut signals { + match signal { SIGTERM | SIGINT | SIGHUP => { - info!("receive signal {}, stopping server...", sig as c_int); + info!("receive signal {}, stopping server...", signal); break; } SIGUSR1 => { diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 698ebc8049c..56acb353302 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -25,7 +25,7 @@ nom = { version = "5.1.0", default-features = false, features = ["std"] } num = { version = "0.3", default-features = false } num-derive = "0.3" num-traits = "0.2" -ordered-float = "1.0" +ordered-float = "2.0" protobuf = "2" regex = "1.1" serde = "1.0" diff --git a/components/tidb_query_executors/src/simple_aggr_executor.rs b/components/tidb_query_executors/src/simple_aggr_executor.rs index 325082f42d6..1e1dd48929b 100644 --- a/components/tidb_query_executors/src/simple_aggr_executor.rs +++ b/components/tidb_query_executors/src/simple_aggr_executor.rs @@ -309,7 +309,7 @@ mod tests { Self { rows_with_null: 0, rows_without_null: 0, - sum: Real::from(0.0), + sum: Real::new(0.0).unwrap(), } } } diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 718bb9b3fa5..f6d6af4eb02 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -1833,10 +1833,13 @@ mod tests { let cs = vec![ // (input, expect) - (EnumRef::new("enum".as_bytes(), &0), Real::from(0.)), - (EnumRef::new("int".as_bytes(), &1), Real::from(1.)), - (EnumRef::new("real".as_bytes(), &2), Real::from(2.)), - (EnumRef::new("string".as_bytes(), &3), Real::from(3.)), + (EnumRef::new("enum".as_bytes(), &0), Real::new(0.).unwrap()), + (EnumRef::new("int".as_bytes(), &1), Real::new(1.).unwrap()), + (EnumRef::new("real".as_bytes(), &2), Real::new(2.).unwrap()), + ( + EnumRef::new("string".as_bytes(), &3), + Real::new(3.).unwrap(), + ), ]; for (input, expect) in cs { diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index 5ed1973c5d6..798ca2b9c6a 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -22,7 +22,7 @@ const MAX_RAND_VALUE: u32 = 0x3FFFFFFF; #[rpn_fn] #[inline] pub fn pi() -> Result> { - Ok(Some(Real::from(std::f64::consts::PI))) + Ok(Some(Real::new(std::f64::consts::PI).unwrap())) } #[rpn_fn] @@ -65,7 +65,7 @@ pub fn log10(arg: &Real) -> Result> { // If the given f64 is finite, returns `Some(Real)`. Otherwise returns None. fn f64_to_real(n: f64) -> Option { if n.is_finite() { - Some(Real::from(n)) + Some(Real::new(n).unwrap()) } else { None } @@ -92,7 +92,7 @@ impl Ceil for CeilReal { #[inline] fn ceil(_ctx: &mut EvalContext, arg: &Self::Input) -> Result> { - Ok(Some(Real::from(arg.ceil()))) + Ok(Some(Real::new(arg.ceil()).unwrap())) } } @@ -167,7 +167,7 @@ impl Floor for FloorReal { #[inline] fn floor(_ctx: &mut EvalContext, arg: &Self::Input) -> Result> { - Ok(Some(Real::from(arg.floor()))) + Ok(Some(Real::new(arg.floor()).unwrap())) } } @@ -272,11 +272,7 @@ fn sqrt(arg: &Real) -> Result> { None } else { let res = arg.sqrt(); - if res.is_nan() { - None - } else { - Some(Real::from(res)) - } + Real::new(res).ok() } }) } @@ -485,11 +481,11 @@ fn truncate_real(x: Real, d: i32) -> Real { let shift = 10_f64.powi(d); let tmp = x * shift; if *tmp == 0_f64 { - Real::from(0_f64) + Real::new(0_f64).unwrap() } else if tmp.is_infinite() { x } else { - Real::from(tmp.trunc() / shift) + Real::new(tmp.trunc() / shift).unwrap() } } @@ -548,7 +544,7 @@ pub fn round_with_frac_real(arg0: &Real, arg1: &Int) -> Result> { let digits = arg1; let power = 10.0_f64.powi(-digits as i32); let frac = *number / power; - Ok(Some(Real::from(frac.round() * power))) + Ok(Some(Real::new(frac.round() * power).unwrap())) } thread_local! { @@ -715,7 +711,7 @@ mod tests { let output = RpnFnScalarEvaluator::new() .evaluate(ScalarFuncSig::Pi) .unwrap(); - assert_eq!(output, Some(Real::from(std::f64::consts::PI))); + assert_eq!(output, Some(Real::new(std::f64::consts::PI).unwrap())); } #[test] @@ -743,8 +739,8 @@ mod tests { #[test] fn test_log_1_arg() { let test_cases = vec![ - (Some(std::f64::consts::E), Some(Real::from(1.0_f64))), - (Some(100.0), Some(Real::from(4.605170185988092_f64))), + (Some(std::f64::consts::E), Some(Real::new(1.0_f64).unwrap())), + (Some(100.0), Some(Real::new(4.605170185988092_f64).unwrap())), (Some(-1.0), None), (Some(0.0), None), (None, None), @@ -761,9 +757,21 @@ mod tests { #[test] fn test_log_2_arg() { let test_cases = vec![ - (Some(10.0_f64), Some(100.0_f64), Some(Real::from(2.0_f64))), - (Some(2.0_f64), Some(1.0_f64), Some(Real::from(0.0_f64))), - (Some(0.5_f64), Some(0.25_f64), Some(Real::from(2.0_f64))), + ( + Some(10.0_f64), + Some(100.0_f64), + Some(Real::new(2.0_f64).unwrap()), + ), + ( + Some(2.0_f64), + Some(1.0_f64), + Some(Real::new(0.0_f64).unwrap()), + ), + ( + Some(0.5_f64), + Some(0.25_f64), + Some(Real::new(2.0_f64).unwrap()), + ), (Some(-0.23323_f64), Some(2.0_f64), None), (Some(0_f64), Some(123_f64), None), (Some(1_f64), Some(123_f64), None), @@ -785,8 +793,8 @@ mod tests { #[test] fn test_log2() { let test_cases = vec![ - (Some(16_f64), Some(Real::from(4_f64))), - (Some(5_f64), Some(Real::from(2.321928094887362_f64))), + (Some(16_f64), Some(Real::new(4_f64).unwrap())), + (Some(5_f64), Some(Real::new(2.321928094887362_f64).unwrap())), (Some(-1.234_f64), None), (Some(0_f64), None), (None, None), @@ -803,8 +811,11 @@ mod tests { #[test] fn test_log10() { let test_cases = vec![ - (Some(100_f64), Some(Real::from(2_f64))), - (Some(101_f64), Some(Real::from(2.0043213737826426_f64))), + (Some(100_f64), Some(Real::new(2_f64).unwrap())), + ( + Some(101_f64), + Some(Real::new(2.0043213737826426_f64).unwrap()), + ), (Some(-1.234_f64), None), (Some(0_f64), None), (None, None), @@ -887,7 +898,7 @@ mod tests { (f64::MIN, f64::MIN), ]; for (expected, input) in cases { - let arg = Real::from(input); + let arg = Real::new(input).unwrap(); let expected = Real::new(expected).ok(); let output = RpnFnScalarEvaluator::new() .push_param(arg) @@ -1004,7 +1015,7 @@ mod tests { (f64::MIN, f64::MIN), ]; for (input, expected) in cases { - let arg = Real::from(input); + let arg = Real::new(input).unwrap(); let expected = Real::new(expected).ok(); let output = RpnFnScalarEvaluator::new() .push_param(arg) @@ -1122,8 +1133,11 @@ mod tests { fn test_sqrt() { let test_cases = vec![ (None, None), - (Some(64f64), Some(Real::from(8f64))), - (Some(2f64), Some(Real::from(std::f64::consts::SQRT_2))), + (Some(64f64), Some(Real::new(8f64).unwrap())), + ( + Some(2f64), + Some(Real::new(std::f64::consts::SQRT_2).unwrap()), + ), (Some(-16f64), None), (Some(f64::NAN), None), ]; @@ -1140,14 +1154,17 @@ mod tests { fn test_radians() { let test_cases = vec![ (None, None), - (Some(0_f64), Some(Real::from(0_f64))), - (Some(180_f64), Some(Real::from(std::f64::consts::PI))), + (Some(0_f64), Some(Real::new(0_f64).unwrap())), + ( + Some(180_f64), + Some(Real::new(std::f64::consts::PI).unwrap()), + ), ( Some(-360_f64), - Some(Real::from(-2_f64 * std::f64::consts::PI)), + Some(Real::new(-2_f64 * std::f64::consts::PI).unwrap()), ), (Some(f64::NAN), None), - (Some(f64::INFINITY), Some(Real::from(f64::INFINITY))), + (Some(f64::INFINITY), Some(Real::new(f64::INFINITY).unwrap())), ]; for (input, expect) in test_cases { let output = RpnFnScalarEvaluator::new() @@ -1168,17 +1185,17 @@ mod tests { ]; for (x, expected) in tests { let output = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(x))) + .push_param(Some(Real::new(x).unwrap())) .evaluate(ScalarFuncSig::Exp) .unwrap(); - assert_eq!(output, Some(Real::from(expected))); + assert_eq!(output, Some(Real::new(expected).unwrap())); } test_unary_func_ok_none::(ScalarFuncSig::Exp); let overflow_tests = vec![100000_f64]; for x in overflow_tests { let output: Result> = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(x))) + .push_param(Some(Real::new(x).unwrap())) .evaluate(ScalarFuncSig::Exp); assert!(output.is_err()); } @@ -1189,12 +1206,15 @@ mod tests { let tests_cases = vec![ (None, None), (Some(f64::NAN), None), - (Some(0f64), Some(Real::from(0f64))), - (Some(1f64), Some(Real::from(57.29577951308232_f64))), - (Some(std::f64::consts::PI), Some(Real::from(180.0_f64))), + (Some(0f64), Some(Real::new(0f64).unwrap())), + (Some(1f64), Some(Real::new(57.29577951308232_f64).unwrap())), + ( + Some(std::f64::consts::PI), + Some(Real::new(180.0_f64).unwrap()), + ), ( Some(-std::f64::consts::PI / 2.0_f64), - Some(Real::from(-90.0_f64)), + Some(Real::new(-90.0_f64).unwrap()), ), ]; for (input, expect) in tests_cases { @@ -1219,7 +1239,7 @@ mod tests { ]; for (input, expect) in valid_test_cases { let output: Option = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(input))) + .push_param(Some(Real::new(input).unwrap())) .evaluate(ScalarFuncSig::Sin) .unwrap(); assert!((output.unwrap().into_inner() - expect).abs() < f64::EPSILON); @@ -1236,7 +1256,7 @@ mod tests { ]; for (input, expect) in test_cases { let output: Option = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(input))) + .push_param(Some(Real::new(input).unwrap())) .evaluate(ScalarFuncSig::Cos) .unwrap(); assert!((output.unwrap().into_inner() - expect).abs() < f64::EPSILON); @@ -1257,7 +1277,7 @@ mod tests { ]; for (input, expect) in test_cases { let output: Option = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(input))) + .push_param(Some(Real::new(input).unwrap())) .evaluate(ScalarFuncSig::Tan) .unwrap(); assert!((output.unwrap().into_inner() - expect).abs() < f64::EPSILON); @@ -1284,14 +1304,14 @@ mod tests { ]; for (input, expect) in test_cases { let output: Option = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(input))) + .push_param(Some(Real::new(input).unwrap())) .evaluate(ScalarFuncSig::Cot) .unwrap(); assert!((output.unwrap().into_inner() - expect).abs() < f64::EPSILON); } assert!( RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(0.0_f64))) + .push_param(Some(Real::new(0.0_f64).unwrap())) .evaluate::(ScalarFuncSig::Cot) .is_err() ); @@ -1301,27 +1321,27 @@ mod tests { fn test_pow() { let cases = vec![ ( - Some(Real::from(1.0f64)), - Some(Real::from(3.0f64)), - Some(Real::from(1.0f64)), + Some(Real::new(1.0f64).unwrap()), + Some(Real::new(3.0f64).unwrap()), + Some(Real::new(1.0f64).unwrap()), ), ( - Some(Real::from(3.0f64)), - Some(Real::from(0.0f64)), - Some(Real::from(1.0f64)), + Some(Real::new(3.0f64).unwrap()), + Some(Real::new(0.0f64).unwrap()), + Some(Real::new(1.0f64).unwrap()), ), ( - Some(Real::from(2.0f64)), - Some(Real::from(4.0f64)), - Some(Real::from(16.0f64)), + Some(Real::new(2.0f64).unwrap()), + Some(Real::new(4.0f64).unwrap()), + Some(Real::new(16.0f64).unwrap()), ), ( - Some(Real::from(f64::INFINITY)), - Some(Real::from(0.0f64)), - Some(Real::from(1.0f64)), + Some(Real::new(f64::INFINITY).unwrap()), + Some(Real::new(0.0f64).unwrap()), + Some(Real::new(1.0f64).unwrap()), ), - (Some(Real::from(4.0f64)), None, None), - (None, Some(Real::from(4.0f64)), None), + (Some(Real::new(4.0f64).unwrap()), None, None), + (None, Some(Real::new(4.0f64).unwrap()), None), (None, None, None), ]; @@ -1336,10 +1356,13 @@ mod tests { let invalid_cases = vec![ ( - Some(Real::from(f64::INFINITY)), - Some(Real::from(f64::INFINITY)), + Some(Real::new(f64::INFINITY).unwrap()), + Some(Real::new(f64::INFINITY).unwrap()), + ), + ( + Some(Real::new(0.0f64).unwrap()), + Some(Real::new(-9999999.0f64).unwrap()), ), - (Some(Real::from(0.0f64)), Some(Real::from(-9999999.0f64))), ]; for (lhs, rhs) in invalid_cases { @@ -1364,10 +1387,10 @@ mod tests { .unwrap() .unwrap(); - assert!(got1 < Real::from(1.0)); - assert!(got1 >= Real::from(0.0)); - assert!(got2 < Real::from(1.0)); - assert!(got2 >= Real::from(0.0)); + assert!(got1 < Real::new(1.0).unwrap()); + assert!(got1 >= Real::new(0.0).unwrap()); + assert!(got2 < Real::new(1.0).unwrap()); + assert!(got2 >= Real::new(0.0).unwrap()); assert_ne!(got1, got2); } @@ -1392,7 +1415,7 @@ mod tests { .evaluate::(ScalarFuncSig::RandWithSeedFirstGen) .unwrap() .unwrap(); - assert_eq!(got, Real::from(exp)); + assert_eq!(got, Real::new(exp).unwrap()); } let none_case_got = RpnFnScalarEvaluator::new() @@ -1400,24 +1423,27 @@ mod tests { .evaluate::(ScalarFuncSig::RandWithSeedFirstGen) .unwrap() .unwrap(); - assert_eq!(none_case_got, Real::from(0.15522042769493574)); + assert_eq!(none_case_got, Real::new(0.15522042769493574).unwrap()); } #[test] fn test_asin() { let test_cases = vec![ - (Some(Real::from(0.0_f64)), Some(Real::from(0.0_f64))), ( - Some(Real::from(1.0_f64)), - Some(Real::from(std::f64::consts::PI / 2.0_f64)), + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), + ), + ( + Some(Real::new(1.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI / 2.0_f64).unwrap()), ), ( - Some(Real::from(-1.0_f64)), - Some(Real::from(-std::f64::consts::PI / 2.0_f64)), + Some(Real::new(-1.0_f64).unwrap()), + Some(Real::new(-std::f64::consts::PI / 2.0_f64).unwrap()), ), ( - Some(Real::from(std::f64::consts::SQRT_2 / 2.0_f64)), - Some(Real::from(std::f64::consts::PI / 4.0_f64)), + Some(Real::new(std::f64::consts::SQRT_2 / 2.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI / 4.0_f64).unwrap()), ), ]; for (input, expect) in test_cases { @@ -1428,9 +1454,9 @@ mod tests { assert!((output.unwrap() - expect.unwrap()).abs() < f64::EPSILON); } let invalid_test_cases = vec![ - (Some(Real::from(f64::INFINITY)), None), - (Some(Real::from(2.0_f64)), None), - (Some(Real::from(-2.0_f64)), None), + (Some(Real::new(f64::INFINITY).unwrap()), None), + (Some(Real::new(2.0_f64).unwrap()), None), + (Some(Real::new(-2.0_f64).unwrap()), None), ]; for (input, expect) in invalid_test_cases { let output: Option = RpnFnScalarEvaluator::new() @@ -1445,17 +1471,20 @@ mod tests { fn test_acos() { let test_cases = vec![ ( - Some(Real::from(0.0_f64)), - Some(Real::from(std::f64::consts::PI / 2.0_f64)), + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI / 2.0_f64).unwrap()), + ), + ( + Some(Real::new(1.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), ), - (Some(Real::from(1.0_f64)), Some(Real::from(0.0_f64))), ( - Some(Real::from(-1.0_f64)), - Some(Real::from(std::f64::consts::PI)), + Some(Real::new(-1.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI).unwrap()), ), ( - Some(Real::from(std::f64::consts::SQRT_2 / 2.0_f64)), - Some(Real::from(std::f64::consts::PI / 4.0_f64)), + Some(Real::new(std::f64::consts::SQRT_2 / 2.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI / 4.0_f64).unwrap()), ), ]; for (input, expect) in test_cases { @@ -1466,9 +1495,9 @@ mod tests { assert!((output.unwrap() - expect.unwrap()).abs() < f64::EPSILON); } let invalid_test_cases = vec![ - (Some(Real::from(f64::INFINITY)), None), - (Some(Real::from(2.0_f64)), None), - (Some(Real::from(-2.0_f64)), None), + (Some(Real::new(f64::INFINITY).unwrap()), None), + (Some(Real::new(2.0_f64).unwrap()), None), + (Some(Real::new(-2.0_f64).unwrap()), None), ]; for (input, expect) in invalid_test_cases { let output: Option = RpnFnScalarEvaluator::new() @@ -1483,22 +1512,25 @@ mod tests { fn test_atan_1_arg() { let test_cases = vec![ ( - Some(Real::from(1.0_f64)), - Some(Real::from(std::f64::consts::PI / 4.0_f64)), + Some(Real::new(1.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI / 4.0_f64).unwrap()), ), ( - Some(Real::from(-1.0_f64)), - Some(Real::from(-std::f64::consts::PI / 4.0_f64)), + Some(Real::new(-1.0_f64).unwrap()), + Some(Real::new(-std::f64::consts::PI / 4.0_f64).unwrap()), ), ( - Some(Real::from(f64::MAX)), - Some(Real::from(std::f64::consts::PI / 2.0_f64)), + Some(Real::new(f64::MAX).unwrap()), + Some(Real::new(std::f64::consts::PI / 2.0_f64).unwrap()), ), ( - Some(Real::from(f64::MIN)), - Some(Real::from(-std::f64::consts::PI / 2.0_f64)), + Some(Real::new(f64::MIN).unwrap()), + Some(Real::new(-std::f64::consts::PI / 2.0_f64).unwrap()), + ), + ( + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), ), - (Some(Real::from(0.0_f64)), Some(Real::from(0.0_f64))), ]; for (input, expect) in test_cases { let output: Option = RpnFnScalarEvaluator::new() @@ -1513,29 +1545,29 @@ mod tests { fn test_atan_2_args() { let test_cases = vec![ ( - Some(Real::from(0.0_f64)), - Some(Real::from(0.0_f64)), - Some(Real::from(0.0_f64)), + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), ), ( - Some(Real::from(0.0_f64)), - Some(Real::from(-1.0_f64)), - Some(Real::from(std::f64::consts::PI)), + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(-1.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI).unwrap()), ), ( - Some(Real::from(1.0_f64)), - Some(Real::from(-1.0_f64)), - Some(Real::from(3.0_f64 * std::f64::consts::PI / 4.0_f64)), + Some(Real::new(1.0_f64).unwrap()), + Some(Real::new(-1.0_f64).unwrap()), + Some(Real::new(3.0_f64 * std::f64::consts::PI / 4.0_f64).unwrap()), ), ( - Some(Real::from(-1.0_f64)), - Some(Real::from(1.0_f64)), - Some(Real::from(-std::f64::consts::PI / 4.0_f64)), + Some(Real::new(-1.0_f64).unwrap()), + Some(Real::new(1.0_f64).unwrap()), + Some(Real::new(-std::f64::consts::PI / 4.0_f64).unwrap()), ), ( - Some(Real::from(1.0_f64)), - Some(Real::from(0.0_f64)), - Some(Real::from(std::f64::consts::PI / 2.0_f64)), + Some(Real::new(1.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(std::f64::consts::PI / 2.0_f64).unwrap()), ), ]; for (arg0, arg1, expect) in test_cases { @@ -1602,9 +1634,18 @@ mod tests { #[test] fn test_round_real() { let test_cases = vec![ - (Some(Real::from(-3.12_f64)), Some(Real::from(-3f64))), - (Some(Real::from(f64::MAX)), Some(Real::from(f64::MAX))), - (Some(Real::from(f64::MIN)), Some(Real::from(f64::MIN))), + ( + Some(Real::new(-3.12_f64).unwrap()), + Some(Real::new(-3f64).unwrap()), + ), + ( + Some(Real::new(f64::MAX).unwrap()), + Some(Real::new(f64::MAX).unwrap()), + ), + ( + Some(Real::new(f64::MIN).unwrap()), + Some(Real::new(f64::MIN).unwrap()), + ), (None, None), ]; @@ -1758,12 +1799,12 @@ mod tests { .build(); let output = RpnFnScalarEvaluator::new() - .push_param(Some(Real::from(lhs))) + .push_param(Some(Real::new(lhs).unwrap())) .push_param_with_field_type(Some(rhs), rhs_field_type) .evaluate::(ScalarFuncSig::TruncateReal) .unwrap(); - assert_eq!(output, Some(Real::from(expected))); + assert_eq!(output, Some(Real::new(expected).unwrap())); } } @@ -1948,26 +1989,26 @@ mod tests { let real_cases = vec![ ( - Some(Real::from(-1.298_f64)), + Some(Real::new(-1.298_f64).unwrap()), Some(1), - Some(Real::from(-1.3_f64)), + Some(Real::new(-1.3_f64).unwrap()), ), ( - Some(Real::from(-1.298_f64)), + Some(Real::new(-1.298_f64).unwrap()), Some(0), - Some(Real::from(-1.0_f64)), + Some(Real::new(-1.0_f64).unwrap()), ), ( - Some(Real::from(23.298_f64)), + Some(Real::new(23.298_f64).unwrap()), Some(2), - Some(Real::from(23.30_f64)), + Some(Real::new(23.30_f64).unwrap()), ), ( - Some(Real::from(23.298_f64)), + Some(Real::new(23.298_f64).unwrap()), Some(-1), - Some(Real::from(20.0_f64)), + Some(Real::new(20.0_f64).unwrap()), ), - (Some(Real::from(23.298_f64)), None, None), + (Some(Real::new(23.298_f64).unwrap()), None, None), (None, Some(2), None), (None, None, None), ]; diff --git a/components/tidb_query_expr/src/impl_miscellaneous.rs b/components/tidb_query_expr/src/impl_miscellaneous.rs index 9a7492b6813..5d2daed7f9a 100644 --- a/components/tidb_query_expr/src/impl_miscellaneous.rs +++ b/components/tidb_query_expr/src/impl_miscellaneous.rs @@ -318,14 +318,21 @@ mod tests { fn test_real_any_value() { let test_cases = vec![ (vec![], None), - (vec![Real::from(1.2_f64)], Some(Real::from(1.2_f64))), ( - vec![Real::from(1.2_f64), Real::from(2.3_f64)], - Some(Real::from(1.2_f64)), + vec![Real::new(1.2_f64).unwrap()], + Some(Real::new(1.2_f64).unwrap()), ), ( - vec![Real::from(1.2_f64), Real::from(2.3_f64), Real::from(3_f64)], - Some(Real::from(1.2_f64)), + vec![Real::new(1.2_f64).unwrap(), Real::new(2.3_f64).unwrap()], + Some(Real::new(1.2_f64).unwrap()), + ), + ( + vec![ + Real::new(1.2_f64).unwrap(), + Real::new(2.3_f64).unwrap(), + Real::new(3_f64).unwrap(), + ], + Some(Real::new(1.2_f64).unwrap()), ), ]; diff --git a/components/tidb_query_expr/src/impl_op.rs b/components/tidb_query_expr/src/impl_op.rs index 1b0ee419ef5..dce8920a545 100644 --- a/components/tidb_query_expr/src/impl_op.rs +++ b/components/tidb_query_expr/src/impl_op.rs @@ -440,12 +440,21 @@ mod tests { fn test_unary_minus_real() { let test_cases = vec![ (None, None), - (Some(Real::from(0.123_f64)), Some(Real::from(-0.123_f64))), - (Some(Real::from(-0.123_f64)), Some(Real::from(0.123_f64))), - (Some(Real::from(0.0_f64)), Some(Real::from(0.0_f64))), ( - Some(Real::from(f64::INFINITY)), - Some(Real::from(f64::NEG_INFINITY)), + Some(Real::new(0.123_f64).unwrap()), + Some(Real::new(-0.123_f64).unwrap()), + ), + ( + Some(Real::new(-0.123_f64).unwrap()), + Some(Real::new(0.123_f64).unwrap()), + ), + ( + Some(Real::new(0.0_f64).unwrap()), + Some(Real::new(0.0_f64).unwrap()), + ), + ( + Some(Real::new(f64::INFINITY).unwrap()), + Some(Real::new(f64::NEG_INFINITY).unwrap()), ), ]; for (arg, expect_output) in test_cases { diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 9bbea72d8d5..52d73429f4c 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -31,7 +31,7 @@ lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { path = "../log_wrappers" } -nix = "0.23" +nix = "0.24" num-traits = "0.2" num_cpus = "1" online_config = { path = "../online_config" } From 050b6c077d130ee46616b018c30e3cb42890aee5 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 10 Jun 2022 23:08:31 +0800 Subject: [PATCH 0022/1149] server: check raft_client config change after flush (#12781) close tikv/tikv#12780 Signed-off-by: glorv Co-authored-by: zhangjinpeng1987 Co-authored-by: Ti Chi Robot --- src/server/raft_client.rs | 78 +++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index e0b30061f0b..bc691bcc05f 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -216,11 +216,20 @@ impl BatchMessageBuffer { msg_size } + #[inline] + fn maybe_refresh_config(&mut self) { + if let Some(new_cfg) = self.cfg_tracker.any_new() { + self.cfg = new_cfg.clone(); + } + } + #[cfg(test)] fn clear(&mut self) { self.batch = BatchRaftMessage::default(); self.size = 0; self.overflowing = None; + // try refresh config + self.maybe_refresh_config(); } } @@ -235,10 +244,6 @@ impl Buffer for BatchMessageBuffer { #[inline] fn push(&mut self, msg: RaftMessage) { let msg_size = Self::message_size(&msg); - // try refresh config before check - if let Some(new_cfg) = self.cfg_tracker.any_new() { - self.cfg = new_cfg.clone(); - } // To avoid building too large batch, we limit each batch's size. Since `msg_size` // is estimated, `GRPC_SEND_MSG_BUF` is reserved for errors. if self.size > 0 @@ -270,6 +275,12 @@ impl Buffer for BatchMessageBuffer { if let Some(more) = self.overflowing.take() { self.push(more); } + + // try refresh config after flush. `max_grpc_send_msg_len` and `raft_msg_max_batch_size` + // can impact the buffer push logic, but since they are soft restriction, we check config change + // at here to avoid affact performance since `push` is a hot path. + self.maybe_refresh_config(); + res } @@ -1190,6 +1201,21 @@ mod tests { assert!(msg_buf.full()); } + fn new_test_msg(size: usize) -> RaftMessage { + let mut msg = RaftMessage::default(); + msg.set_region_id(1); + let mut region_epoch = RegionEpoch::default(); + region_epoch.conf_ver = 1; + region_epoch.version = 0x123456; + msg.set_region_epoch(region_epoch); + msg.set_start_key(vec![0; size]); + msg.set_end_key(vec![]); + msg.mut_message().set_snapshot(Snapshot::default()); + msg.mut_message().set_commit(0); + assert_eq!(BatchMessageBuffer::message_size(&msg), size); + msg + } + #[test] fn test_push_raft_message_cfg_change() { let version_track = Arc::new(VersionTrack::new(Config::default())); @@ -1199,38 +1225,42 @@ mod tests { ); let default_grpc_msg_len = msg_buf.cfg.max_grpc_send_msg_len as usize; - let make_msg = |size: usize| { - let mut msg = RaftMessage::default(); - msg.set_region_id(1); - let mut region_epoch = RegionEpoch::default(); - region_epoch.conf_ver = 1; - region_epoch.version = 0x123456; - msg.set_region_epoch(region_epoch); - msg.set_start_key(vec![0; size]); - msg.set_end_key(vec![]); - msg.mut_message().set_snapshot(Snapshot::default()); - msg.mut_message().set_commit(0); - assert_eq!(BatchMessageBuffer::message_size(&msg), size); - msg - }; - let max_msg_len = default_grpc_msg_len - msg_buf.cfg.raft_client_grpc_send_msg_buffer; - msg_buf.push(make_msg(max_msg_len)); + msg_buf.push(new_test_msg(max_msg_len)); assert!(!msg_buf.full()); - msg_buf.push(make_msg(1)); + msg_buf.push(new_test_msg(1)); assert!(msg_buf.full()); - msg_buf.clear(); // update config version_track.update(|cfg| cfg.max_grpc_send_msg_len *= 2); + msg_buf.clear(); let new_max_msg_len = default_grpc_msg_len * 2 - msg_buf.cfg.raft_client_grpc_send_msg_buffer; for _i in 0..2 { - msg_buf.push(make_msg(new_max_msg_len / 2 - 1)); + msg_buf.push(new_test_msg(new_max_msg_len / 2 - 1)); assert!(!msg_buf.full()); } - msg_buf.push(make_msg(2)); + msg_buf.push(new_test_msg(2)); assert!(msg_buf.full()); } + + #[bench] + fn bench_client_buffer_push(b: &mut test::Bencher) { + let version_track = Arc::new(VersionTrack::new(Config::default())); + let mut msg_buf = BatchMessageBuffer::new( + &version_track, + Arc::new(ThreadLoadPool::with_threshold(100)), + ); + + b.iter(|| { + for _i in 0..10 { + msg_buf.push(test::black_box(new_test_msg(1024))); + } + // run clear to mock flush. + msg_buf.clear(); + + test::black_box(&mut msg_buf); + }); + } } From d6d6d6ee725639d05ce81577a71e4a76c14152ce Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 10 Jun 2022 14:48:31 -0700 Subject: [PATCH 0023/1149] engine_traits: refactor Engines to support both tablet and global kvdb (#12756) ref tikv/tikv#12772 add tablet related function into Engines API. The idea is that for region's KV data read/write, using tablet() instead of engines.kv regardless multirocks is enabled or not. Signed-off-by: tonyxuqqi Signed-off-by: qi.xu Co-authored-by: qi.xu --- Cargo.lock | 6 +- components/engine_traits/src/engine.rs | 144 ++++++++++++- components/engine_traits/src/engines.rs | 1 + components/raftstore/src/store/snap.rs | 2 +- components/server/src/server.rs | 2 +- components/test_raftstore/src/util.rs | 2 +- src/server/engine_factory.rs | 78 ++++++- src/server/engine_factory_v2.rs | 271 ++++++++++++++++++++++++ src/server/mod.rs | 1 + 9 files changed, 495 insertions(+), 12 deletions(-) create mode 100644 src/server/engine_factory_v2.rs diff --git a/Cargo.lock b/Cargo.lock index 19ccbcc72c8..cd8a55146af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2752,7 +2752,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c1f668d0c85612f5fe6ec8e4351df0fc0bef1286" +source = "git+https://github.com/tikv/rust-rocksdb.git#773784178a0e8e5fdad81f4fd85448a3014a3700" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2771,7 +2771,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#c1f668d0c85612f5fe6ec8e4351df0fc0bef1286" +source = "git+https://github.com/tikv/rust-rocksdb.git#773784178a0e8e5fdad81f4fd85448a3014a3700" dependencies = [ "bzip2-sys", "cc", @@ -4547,7 +4547,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c1f668d0c85612f5fe6ec8e4351df0fc0bef1286" +source = "git+https://github.com/tikv/rust-rocksdb.git#773784178a0e8e5fdad81f4fd85448a3014a3700" dependencies = [ "libc 0.2.125", "librocksdb_sys", diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index c4dad67e3c5..e97a15c75ae 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -1,6 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::fmt::Debug; +use std::{ + fmt::Debug, + path::{Path, PathBuf}, +}; use crate::*; @@ -63,5 +66,142 @@ pub trait KvEngine: // It should be named as `EngineFactory` for consistency, but we are about to rename // engine to tablet, so always use tablet for new traits/types. pub trait TabletFactory { - fn create_tablet(&self) -> Result; + /// Create an tablet by id and suffix. If the tablet exists, it will fail. + /// The id is likely the region Id, the suffix could be the current raft log index. + /// They together could specify a unique path for a region's tablet. + /// The reason to have suffix is that we can keep more than one tablet for a region. + fn create_tablet(&self, id: u64, suffix: u64) -> Result; + + /// Open a tablet by id and suffix. If the tablet exists, it will open it. + /// If the tablet does not exist, it will create it. + fn open_tablet(&self, id: u64, suffix: u64) -> Result { + self.open_tablet_raw(&self.tablet_path(id, suffix), false) + } + + /// Open a tablet by id and suffix from cache---that means it should already be opened. + fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { + if let Ok(engine) = self.open_tablet_raw(&self.tablet_path(id, suffix), false) { + return Some(engine); + } + None + } + + /// Open a tablet by id and any suffix from cache + fn open_tablet_cache_any(&self, id: u64) -> Option { + self.open_tablet_cache(id, 0) + } + + /// Open tablet by path and readonly flag + fn open_tablet_raw(&self, path: &Path, readonly: bool) -> Result; + + /// Create the shared db for v1 + fn create_shared_db(&self) -> Result; + + /// Destroy the tablet and its data + fn destroy_tablet(&self, id: u64, suffix: u64) -> crate::Result<()>; + + /// Check if the tablet with specified id/suffix exists + #[inline] + fn exists(&self, id: u64, suffix: u64) -> bool { + self.exists_raw(&self.tablet_path(id, suffix)) + } + + /// Check if the tablet with specified path exists + fn exists_raw(&self, path: &Path) -> bool; + + /// Get the tablet path by id and suffix + fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf; + + /// Tablets root path + fn tablets_path(&self) -> PathBuf; + + /// Clone the tablet factory instance + /// Here we don't use Clone traint because it will break the trait's object safty + fn clone(&self) -> Box + Send>; + + /// Loop visit all opened tablets cached by the specified function. + /// Once the tablet is opened/created, it will be cached in a hashmap + fn loop_tablet_cache(&self, _f: Box); + + /// Load the tablet from path for id and suffix--for scenarios such as applying snapshot + fn load_tablet(&self, _path: &Path, _id: u64, _suffix: u64) -> Result { + unimplemented!(); + } + + /// Mark the tablet with specified id and suffix tombostone + fn mark_tombstone(&self, _id: u64, _suffix: u64) { + unimplemented!(); + } + + /// Check if the tablet with specified id and suffix tombostone + fn is_tombstoned(&self, _region_id: u64, _suffix: u64) -> bool { + unimplemented!(); + } +} + +pub struct DummyFactory +where + EK: KvEngine, +{ + pub engine: Option, + pub root_path: String, +} + +impl TabletFactory for DummyFactory +where + EK: KvEngine, +{ + fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { + Ok(self.engine.as_ref().unwrap().clone()) + } + fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { + Ok(self.engine.as_ref().unwrap().clone()) + } + fn create_shared_db(&self) -> Result { + Ok(self.engine.as_ref().unwrap().clone()) + } + fn destroy_tablet(&self, _id: u64, _suffix: u64) -> crate::Result<()> { + Ok(()) + } + fn exists_raw(&self, _path: &Path) -> bool { + true + } + fn tablet_path(&self, _id: u64, _suffix: u64) -> PathBuf { + PathBuf::from(&self.root_path) + } + fn tablets_path(&self) -> PathBuf { + PathBuf::from(&self.root_path) + } + + fn clone(&self) -> Box + Send> { + if self.engine.is_none() { + return Box::>::new(DummyFactory { + engine: None, + root_path: self.root_path.clone(), + }); + } + Box::>::new(DummyFactory { + engine: Some(self.engine.as_ref().unwrap().clone()), + root_path: self.root_path.clone(), + }) + } + fn loop_tablet_cache(&self, _f: Box) {} +} + +impl DummyFactory +where + EK: KvEngine, +{ + pub fn new() -> DummyFactory { + DummyFactory { + engine: None, + root_path: "/dummy_root".to_string(), + } + } +} + +impl Default for DummyFactory { + fn default() -> Self { + Self::new() + } } diff --git a/components/engine_traits/src/engines.rs b/components/engine_traits/src/engines.rs index fd0fa961c06..4e4089d52dc 100644 --- a/components/engine_traits/src/engines.rs +++ b/components/engine_traits/src/engines.rs @@ -7,6 +7,7 @@ use crate::{ #[derive(Clone, Debug)] pub struct Engines { + // kv can be either global kv store, or the tablet in multirocks version. pub kv: K, pub raft: R, } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index a39cda850fa..bb308efd054 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2005,7 +2005,7 @@ pub mod tests { region_state.set_region(region); kv.put_msg_cf(CF_RAFT, &keys::region_state_key(region_id), ®ion_state)?; } - Ok(Engines { kv, raft }) + Ok(Engines::new(kv, raft)) } pub fn get_kv_count(snap: &impl EngineSnapshot) -> usize { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 4344a706fde..f1fd2167f9d 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1503,7 +1503,7 @@ impl TiKvServer { } let factory = builder.build(); let kv_engine = factory - .create_tablet() + .create_shared_db() .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); let engines = Engines::new(kv_engine, raft_engine); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 96082bc6fbb..288e99a3837 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -661,7 +661,7 @@ pub fn create_test_engine( builder = builder.compaction_filter_router(router); } let factory = builder.build(); - let engine = factory.create_tablet().unwrap(); + let engine = factory.create_shared_db().unwrap(); let engines = Engines::new(engine, raft_engine); (engines, key_manager, dir, sst_worker) } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index e9c508a9985..0c02cde0aef 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -29,6 +29,7 @@ struct FactoryInner { api_version: ApiVersion, flow_listener: Option, sst_recovery_sender: Option>, + root_db: Mutex>, } pub struct KvEngineFactoryBuilder { @@ -48,6 +49,7 @@ impl KvEngineFactoryBuilder { api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, + root_db: Mutex::default(), }, router: None, } @@ -93,7 +95,7 @@ pub struct KvEngineFactory { } impl KvEngineFactory { - fn create_raftstore_compaction_listener(&self) -> Option { + pub fn create_raftstore_compaction_listener(&self) -> Option { let ch = match &self.router { Some(r) => Mutex::new(r.clone()), None => return None, @@ -126,7 +128,7 @@ impl KvEngineFactory { )) } - fn create_tablet(&self, tablet_path: &Path) -> Result { + pub fn create_tablet(&self, tablet_path: &Path) -> Result { // Create kv engine. let mut kv_db_opts = self.inner.rocksdb_config.build_opt(); kv_db_opts.set_env(self.inner.env.clone()); @@ -163,6 +165,34 @@ impl KvEngineFactory { Ok(kv_engine) } + pub fn destroy_tablet(&self, tablet_path: &Path) -> engine_traits::Result<()> { + info!("destroy tablet"; "path" => %tablet_path.display()); + // Create kv engine. + let mut kv_db_opts = self.inner.rocksdb_config.build_opt(); + kv_db_opts.set_env(self.inner.env.clone()); + if let Some(filter) = self.create_raftstore_compaction_listener() { + kv_db_opts.add_event_listener(filter); + } + let _kv_cfs_opts = self.inner.rocksdb_config.build_cf_opts( + &self.inner.block_cache, + self.inner.region_info_accessor.as_ref(), + self.inner.api_version, + ); + // TODOTODO: call rust-rocks or tirocks to destroy_engine; + /* + engine_rocks::raw_util::destroy_engine( + tablet_path.to_str().unwrap(), + kv_db_opts, + kv_cfs_opts, + )?;*/ + let _ = std::fs::remove_dir_all(tablet_path); + Ok(()) + } + + pub fn store_path(&self) -> PathBuf { + self.inner.store_path.clone() + } + #[inline] fn kv_engine_path(&self) -> PathBuf { self.inner.store_path.join(DEFAULT_ROCKSDB_SUB_DIR) @@ -171,8 +201,48 @@ impl KvEngineFactory { impl TabletFactory for KvEngineFactory { #[inline] - fn create_tablet(&self) -> Result { + fn create_shared_db(&self) -> Result { let root_path = self.kv_engine_path(); - self.create_tablet(&root_path) + let tablet = self.create_tablet(&root_path)?; + let mut root_db = self.inner.root_db.lock().unwrap(); + root_db.replace(tablet.clone()); + Ok(tablet) + } + + fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { + if let Ok(db) = self.inner.root_db.lock() { + let cp = db.as_ref().unwrap().clone(); + return Ok(cp); + } + self.create_shared_db() + } + + fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { + TabletFactory::create_tablet(self, 0, 0) + } + + fn exists_raw(&self, _path: &Path) -> bool { + false + } + fn tablet_path(&self, _id: u64, _suffix: u64) -> PathBuf { + self.kv_engine_path() + } + fn tablets_path(&self) -> PathBuf { + self.kv_engine_path() + } + + #[inline] + fn destroy_tablet(&self, _id: u64, _suffix: u64) -> engine_traits::Result<()> { + Ok(()) + } + fn clone(&self) -> Box + Send> { + Box::new(std::clone::Clone::clone(self)) + } + + fn loop_tablet_cache(&self, mut f: Box) { + if let Ok(db) = self.inner.root_db.lock() { + let db = db.as_ref().unwrap(); + f(0, 0, db); + } } } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs new file mode 100644 index 00000000000..2dca2ff14f3 --- /dev/null +++ b/src/server/engine_factory_v2.rs @@ -0,0 +1,271 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use collections::HashMap; +use engine_rocks::RocksEngine; +use engine_traits::{RaftEngine, Result, TabletFactory}; + +use crate::server::engine_factory::KvEngineFactory; + +const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; + +#[derive(Clone)] +pub struct KvEngineFactoryV2 { + inner: KvEngineFactory, + registry: Arc>>, +} + +impl TabletFactory for KvEngineFactoryV2 { + fn create_tablet(&self, id: u64, suffix: u64) -> Result { + let mut reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Err(box_err!( + "region {} {} already exists", + id, + db.as_inner().path() + )); + } + let tablet_path = self.tablet_path(id, suffix); + let kv_engine = self.inner.create_tablet(&tablet_path)?; + debug!("inserting tablet"; "key" => ?(id, suffix)); + reg.insert((id, suffix), kv_engine.clone()); + Ok(kv_engine) + } + + fn open_tablet(&self, id: u64, suffix: u64) -> Result { + let mut reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Ok(db.clone()); + } + + let db_path = self.tablet_path(id, suffix); + let db = self.open_tablet_raw(db_path.as_path(), false)?; + debug!("open tablet"; "key" => ?(id, suffix)); + reg.insert((id, suffix), db.clone()); + Ok(db) + } + + fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { + let reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Some(db.clone()); + } + None + } + + fn open_tablet_cache_any(&self, id: u64) -> Option { + let reg = self.registry.lock().unwrap(); + if let Some(k) = reg.keys().find(|k| k.0 == id) { + debug!("choose a random tablet"; "key" => ?k); + return Some(reg.get(k).unwrap().clone()); + } + None + } + + fn open_tablet_raw(&self, path: &Path, _readonly: bool) -> Result { + if !RocksEngine::exists(path.to_str().unwrap_or_default()) { + return Err(box_err!( + "path {} does not have db", + path.to_str().unwrap_or_default() + )); + } + let (mut tablet_id, mut tablet_suffix) = (0, 1); + if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { + let mut split = s.split('_'); + tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); + tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); + } + self.create_tablet(tablet_id, tablet_suffix) + } + + #[inline] + fn create_shared_db(&self) -> Result { + self.create_tablet(0, 0) + } + + #[inline] + fn exists_raw(&self, path: &Path) -> bool { + RocksEngine::exists(path.to_str().unwrap_or_default()) + } + + #[inline] + fn tablets_path(&self) -> PathBuf { + self.inner.store_path().join("tablets") + } + + #[inline] + fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + self.inner + .store_path() + .join(format!("tablets/{}_{}", id, suffix)) + } + + #[inline] + fn mark_tombstone(&self, region_id: u64, suffix: u64) { + let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); + std::fs::File::create(&path).unwrap(); + debug!("tombstone tablet"; "region_id" => region_id, "suffix" => suffix); + self.registry.lock().unwrap().remove(&(region_id, suffix)); + } + + #[inline] + fn is_tombstoned(&self, region_id: u64, suffix: u64) -> bool { + self.tablet_path(region_id, suffix) + .join(TOMBSTONE_MARK) + .exists() + } + + #[inline] + fn destroy_tablet(&self, id: u64, suffix: u64) -> engine_traits::Result<()> { + let path = self.tablet_path(id, suffix); + self.registry.lock().unwrap().remove(&(id, suffix)); + self.inner.destroy_tablet(&path) + } + + #[inline] + fn loop_tablet_cache(&self, mut f: Box) { + let reg = self.registry.lock().unwrap(); + for ((id, suffix), tablet) in &*reg { + f(*id, *suffix, tablet) + } + } + + #[inline] + fn load_tablet(&self, path: &Path, id: u64, suffix: u64) -> Result { + { + let reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Err(box_err!( + "region {} {} already exists", + id, + db.as_inner().path() + )); + } + } + + let db_path = self.tablet_path(id, suffix); + std::fs::rename(path, &db_path)?; + self.open_tablet_raw(db_path.as_path(), false) + } + + fn clone(&self) -> Box + Send> { + Box::new(std::clone::Clone::clone(self)) + } +} + +#[cfg(test)] +mod tests { + use engine_traits::TabletFactory; + + use super::*; + use crate::{config::TiKvConfig, server::KvEngineFactoryBuilder}; + + lazy_static! { + static ref TEST_CONFIG: TiKvConfig = { + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let common_test_cfg = + manifest_dir.join("components/test_raftstore/src/common-test.toml"); + TiKvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { + panic!( + "invalid auto generated configuration file {}, err {}", + manifest_dir.display(), + e + ); + }) + }; + } + + impl KvEngineFactoryV2 { + pub fn new(inner: KvEngineFactory) -> Self { + KvEngineFactoryV2 { + inner, + registry: Arc::new(Mutex::new(HashMap::default())), + } + } + } + + #[test] + fn test_kvengine_factory() { + let cfg = TEST_CONFIG.clone(); + let dir = test_util::temp_dir("test_kvengine_factory", false); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let builder = KvEngineFactoryBuilder::::new(env, &cfg, dir.path()); + let factory = builder.build(); + let shared_db = factory.create_shared_db().unwrap(); + let tablet = TabletFactory::create_tablet(&factory, 1, 10); + assert!(tablet.is_ok()); + let tablet = tablet.unwrap(); + let tablet2 = factory.open_tablet(1, 10).unwrap(); + assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet2 = factory.open_tablet_cache(1, 10).unwrap(); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet2 = factory.open_tablet_cache_any(1).unwrap(); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet_path = factory.tablet_path(1, 10); + let tablet2 = factory.open_tablet_raw(&tablet_path, false).unwrap(); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + } + + #[test] + fn test_kvengine_factory_v2() { + let cfg = TEST_CONFIG.clone(); + let dir = test_util::temp_dir("test_kvengine_factory_v2", false); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let builder = KvEngineFactoryBuilder::::new(env, &cfg, dir.path()); + let inner_factory = builder.build(); + let factory = KvEngineFactoryV2::new(inner_factory); + let tablet = factory.create_tablet(1, 10); + assert!(tablet.is_ok()); + let tablet = tablet.unwrap(); + let tablet2 = factory.open_tablet(1, 10).unwrap(); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet2 = factory.open_tablet_cache(1, 10).unwrap(); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet2 = factory.open_tablet_cache_any(1).unwrap(); + assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet_path = factory.tablet_path(1, 10); + let result = factory.open_tablet_raw(&tablet_path, false); + assert!(result.is_err()); + + assert!(factory.exists(1, 10)); + assert!(!factory.exists(1, 11)); + assert!(!factory.exists(2, 10)); + assert!(!factory.exists(2, 11)); + assert!(factory.exists_raw(&tablet_path)); + assert!(!factory.is_tombstoned(1, 10)); + assert!(factory.load_tablet(&tablet_path, 1, 10).is_err()); + assert!(factory.load_tablet(&tablet_path, 1, 20).is_ok()); + factory.mark_tombstone(1, 20); + assert!(factory.is_tombstoned(1, 20)); + factory.destroy_tablet(1, 20).unwrap(); + let result = factory.open_tablet(1, 20); + assert!(result.is_err()); + } + + #[test] + fn test_get_live_tablets() { + let cfg = TEST_CONFIG.clone(); + let dir = test_util::temp_dir("test_get_live_tablets", false); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let builder = KvEngineFactoryBuilder::::new(env, &cfg, dir.path()); + let inner_factory = builder.build(); + let factory = KvEngineFactoryV2::new(inner_factory); + factory.create_tablet(1, 10).unwrap(); + factory.create_tablet(2, 10).unwrap(); + let mut count = 0; + factory.loop_tablet_cache(Box::new(|id, suffix, _tablet| { + assert!(id == 1 || id == 2); + assert!(suffix == 10); + count += 1; + })); + assert_eq!(count, 2); + } +} diff --git a/src/server/mod.rs b/src/server/mod.rs index 69a8f87d58f..af1aa289de7 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -6,6 +6,7 @@ mod raft_client; pub mod config; pub mod debug; mod engine_factory; +mod engine_factory_v2; pub mod errors; pub mod gc_worker; pub mod load_statistics; From 17b8468e9411e7218befbb1372d7ced09a00f720 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 13 Jun 2022 13:54:32 +0800 Subject: [PATCH 0024/1149] txn: add more debug information for the txn commands (#12779) close tikv/tikv#12803 In the investigation process, the debug log information helps a lot, but there's still some important information missing such as retrying or 1pc flags. Changed: 1. Add more useful information displaying commands. 2. Redact necessary parts. Signed-off-by: cfzjywxk Co-authored-by: Ti Chi Robot --- components/txn_types/src/types.rs | 38 ++++++++++++- .../txn/commands/acquire_pessimistic_lock.rs | 3 +- .../txn/commands/check_secondary_locks.rs | 2 +- src/storage/txn/commands/check_txn_status.rs | 4 +- src/storage/txn/commands/commit.rs | 2 +- src/storage/txn/commands/macros.rs | 35 ++++++++++++ .../txn/commands/pessimistic_rollback.rs | 2 +- src/storage/txn/commands/prewrite.rs | 54 ++++++++++++++++++- src/storage/txn/commands/resolve_lock.rs | 2 +- src/storage/txn/commands/resolve_lock_lite.rs | 2 +- src/storage/txn/commands/rollback.rs | 2 +- 11 files changed, 135 insertions(+), 11 deletions(-) diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 53d6c9e3e00..432f1eafc34 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -266,7 +266,7 @@ pub enum MutationType { /// (the key already exist or not exist). The assertion should pass if the mutation (in a prewrite /// request) is going to be finished successfully, otherwise it indicates there should be some bug /// causing the attempt to write wrong data. -#[derive(Debug, Clone)] +#[derive(Clone)] pub enum Mutation { /// Put `Value` into `Key`, overwriting any existing value. Put((Key, Value), Assertion), @@ -284,6 +284,42 @@ pub enum Mutation { CheckNotExists(Key, Assertion), } +impl Debug for Mutation { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self) + } +} + +impl Display for Mutation { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Mutation::Put((key, value), assertion) => write!( + f, + "Put key:{:?} value:{:?} assertion:{:?}", + key, + &log_wrappers::Value::value(value), + assertion + ), + Mutation::Delete(key, assertion) => { + write!(f, "Delete key:{:?} assertion:{:?}", key, assertion) + } + Mutation::Lock(key, assertion) => { + write!(f, "Lock key:{:?} assertion:{:?}", key, assertion) + } + Mutation::Insert((key, value), assertion) => write!( + f, + "Put key:{:?} value:{:?} assertion:{:?}", + key, + &log_wrappers::Value::value(value), + assertion + ), + Mutation::CheckNotExists(key, assertion) => { + write!(f, "CheckNotExists key:{:?} assertion:{:?}", key, assertion) + } + } + } +} + impl Mutation { pub fn key(&self) -> &Key { match self { diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index e1785a7409d..ca94382491c 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -26,7 +26,8 @@ command! { /// This can be rolled back with a [`PessimisticRollback`](Command::PessimisticRollback) command. AcquirePessimisticLock: cmd_ty => StorageResult, - display => "kv::command::acquirepessimisticlock keys({}) @ {} {} | {:?}", (keys.len, start_ts, for_update_ts, ctx), + display => "kv::command::acquirepessimisticlock keys({:?}) @ {} {} {} {:?} {} {} | {:?}", + (keys, start_ts, lock_ttl, for_update_ts, wait_timeout, min_commit_ts, check_existence, ctx), content => { /// The set of keys to lock. keys: Vec<(Key, bool)>, diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 9a8f681311c..65abc2ffd1b 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -29,7 +29,7 @@ command! { /// status being changed, a rollback may be written. CheckSecondaryLocks: cmd_ty => SecondaryLocksStatus, - display => "kv::command::CheckSecondaryLocks {} keys@{} | {:?}", (keys.len, start_ts, ctx), + display => "kv::command::CheckSecondaryLocks {:?} keys@{} | {:?}", (keys, start_ts, ctx), content => { /// The keys of secondary locks. keys: Vec, diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 844ba5792a7..7ce843594a9 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -29,7 +29,9 @@ command! { /// [`Prewrite`](Command::Prewrite). CheckTxnStatus: cmd_ty => TxnStatus, - display => "kv::command::check_txn_status {} @ {} curr({}, {}) | {:?}", (primary_key, lock_ts, caller_start_ts, current_ts, ctx), + display => "kv::command::check_txn_status {} @ {} curr({}, {}, {}, {}, {}) | {:?}", + (primary_key, lock_ts, caller_start_ts, current_ts, rollback_if_not_exist, + force_sync_commit, resolving_pessimistic_lock, ctx), content => { /// The primary key of the transaction. primary_key: Key, diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index d73dc23ee06..8241b1b9c9c 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -23,7 +23,7 @@ command! { /// This should be following a [`Prewrite`](Command::Prewrite). Commit: cmd_ty => TxnStatus, - display => "kv::command::commit {} {} -> {} | {:?}", (keys.len, lock_ts, commit_ts, ctx), + display => "kv::command::commit {:?} {} -> {} | {:?}", (keys, lock_ts, commit_ts, ctx), content => { /// The keys affected. keys: Vec, diff --git a/src/storage/txn/commands/macros.rs b/src/storage/txn/commands/macros.rs index 29ec846b864..ea19f599d6d 100644 --- a/src/storage/txn/commands/macros.rs +++ b/src/storage/txn/commands/macros.rs @@ -79,6 +79,41 @@ macro_rules! command { write!(f, "{}", self) } } + }; + ( + $(#[$outer_doc: meta])* + $cmd: ident: + cmd_ty => $cmd_ty: ty, + content => { + $($(#[$inner_doc:meta])* $arg: ident : $arg_ty: ty,)* + } + ) => { + $(#[$outer_doc])* + pub struct $cmd { + pub ctx: crate::storage::Context, + pub deadline: ::tikv_util::deadline::Deadline, + $($(#[$inner_doc])* pub $arg: $arg_ty,)* + } + + impl $cmd { + /// Return a `TypedCommand` that encapsulates the result of executing this command. + pub fn new( + $($arg: $arg_ty,)* + ctx: crate::storage::Context, + ) -> TypedCommand<$cmd_ty> { + let execution_duration_limit = if ctx.max_execution_duration_ms == 0 { + crate::storage::txn::scheduler::DEFAULT_EXECUTION_DURATION_LIMIT + } else { + ::std::time::Duration::from_millis(ctx.max_execution_duration_ms) + }; + let deadline = ::tikv_util::deadline::Deadline::from_now(execution_duration_limit); + Command::$cmd($cmd { + ctx, + deadline, + $($arg,)* + }).into() + } + } } } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index e583a88d2f0..17a72610065 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -25,7 +25,7 @@ command! { /// This can roll back an [`AcquirePessimisticLock`](Command::AcquirePessimisticLock) command. PessimisticRollback: cmd_ty => Vec>, - display => "kv::command::pessimistic_rollback keys({}) @ {} {} | {:?}", (keys.len, start_ts, for_update_ts, ctx), + display => "kv::command::pessimistic_rollback keys({:?}) @ {} {} | {:?}", (keys, start_ts, for_update_ts, ctx), content => { /// The keys to be rolled back. keys: Vec, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 4c2caec12b2..1c0cbabd193 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -42,7 +42,6 @@ command! { /// or a [`Rollback`](Command::Rollback) should follow. Prewrite: cmd_ty => PrewriteResult, - display => "kv::command::prewrite mutations({}) @ {} | {:?}", (mutations.len, start_ts, ctx), content => { /// The set of mutations to apply. mutations: Vec, @@ -71,6 +70,33 @@ command! { } } +impl std::fmt::Display for Prewrite { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "kv::command::prewrite mutations({:?}) primary({:?}) secondary_len({:?})@ {} {} {} {} {} {} {} {:?} | {:?}", + self.mutations, + log_wrappers::Value::key(self.primary.as_slice()), + self.secondary_keys.as_ref().map(|sk| sk.len()), + self.start_ts, + self.lock_ttl, + self.skip_constraint_check, + self.txn_size, + self.min_commit_ts, + self.max_commit_ts, + self.try_one_pc, + self.assertion_level, + self.ctx, + ) + } +} + +impl std::fmt::Debug for Prewrite { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl Prewrite { #[cfg(test)] pub fn with_defaults( @@ -225,7 +251,6 @@ command! { /// or a [`Rollback`](Command::Rollback) should follow. PrewritePessimistic: cmd_ty => PrewriteResult, - display => "kv::command::prewrite_pessimistic mutations({}) @ {} | {:?}", (mutations.len, start_ts, ctx), content => { /// The set of mutations to apply; the bool = is pessimistic lock. mutations: Vec<(Mutation, bool)>, @@ -254,6 +279,31 @@ command! { } } +impl std::fmt::Display for PrewritePessimistic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "kv::command::pessimistic_prewrite mutations({:?}) primary({:?}) secondary_len({:?})@ {} {} {} {} {} {} {:?}| {:?}", + self.mutations, + log_wrappers::Value::key(self.primary.as_slice()), + self.secondary_keys.as_ref().map(|sk| sk.len()), + self.start_ts, + self.lock_ttl, + self.txn_size, + self.min_commit_ts, + self.max_commit_ts, + self.try_one_pc, + self.assertion_level, + self.ctx, + ) + } +} +impl std::fmt::Debug for PrewritePessimistic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl PrewritePessimistic { #[cfg(test)] pub fn with_defaults( diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index e369266fa6d..9db90f450d8 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -30,7 +30,7 @@ command! { /// This should follow after a `ResolveLockReadPhase`. ResolveLock: cmd_ty => (), - display => "kv::resolve_lock", (), + display => "kv::resolve_lock {:?} scan_key({:?}) key_locks({:?})", (txn_status, scan_key, key_locks), content => { /// Maps lock_ts to commit_ts. If a transaction was rolled back, it is mapped to 0. /// diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index 7879145369c..e797ea62bf9 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -22,7 +22,7 @@ command! { /// Resolve locks on `resolve_keys` according to `start_ts` and `commit_ts`. ResolveLockLite: cmd_ty => (), - display => "kv::resolve_lock_lite", (), + display => "kv::resolve_lock_lite resolve_keys({:?}) {} {} | {:?}", (resolve_keys, start_ts, commit_ts, ctx), content => { /// The transaction timestamp. start_ts: TimeStamp, diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index 6d686092f18..e6641147f04 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -24,7 +24,7 @@ command! { /// This should be following a [`Prewrite`](Command::Prewrite) on the given key. Rollback: cmd_ty => (), - display => "kv::command::rollback keys({}) @ {} | {:?}", (keys.len, start_ts, ctx), + display => "kv::command::rollback keys({:?}) @ {} | {:?}", (keys, start_ts, ctx), content => { keys: Vec, /// The transaction timestamp. From 7bfcf60730ad30ed83a2125dd13b576fe393c853 Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 13 Jun 2022 14:56:33 +0800 Subject: [PATCH 0025/1149] metrics: Add missing metrics for async io code path (#12788) close tikv/tikv#12787 Add missing metrics for async io code path Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 149 +++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 6192b4f3a5e..009868d3a5c 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -12092,7 +12092,7 @@ }, { "exemplar": true, - "expr": "sum(tikv_raftstore_io_reschedule_pending_task_total{instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_raftstore_io_reschedule_pending_tasks_total{instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", "legendFormat": "pending-task-{{instance}}", @@ -13215,6 +13215,153 @@ "alignLevel": null } }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dashes": false, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The handle duration of each store write task msg", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 46 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763572700, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tikv_raftstore_store_write_handle_msg_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Store write handle msg duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dashes": false, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The distribution of write trigger size", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 46 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763572701, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tikv_raftstore_store_write_trigger_wb_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Store write trigger size", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, From 0d9b7b33b7928be06016075329fe0b2c4fba25f5 Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 14 Jun 2022 15:10:33 +0800 Subject: [PATCH 0026/1149] raftstore: reset perf context before each apply write (#12808) ref tikv/tikv#11044, close tikv/tikv#11044 Signed-off-by: Lucasliang --- components/raftstore/src/store/fsm/apply.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index ca6cabb7a95..e3c1172ef5b 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -520,6 +520,7 @@ where self.pending_ssts = vec![]; } if !self.kv_wb_mut().is_empty() { + self.perf_context.start_observe(); let mut write_opts = engine_traits::WriteOptions::new(); write_opts.set_sync(need_sync); self.kv_wb().write_opt(&write_opts).unwrap_or_else(|e| { @@ -3819,7 +3820,6 @@ where } update_cfg(&incoming.apply_batch_system); } - self.apply_ctx.perf_context.start_observe(); } fn handle_control(&mut self, control: &mut ControlFsm) -> Option { From 11b5d4c3d6ca740cc0bf272691fd15ec94345cd9 Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 15 Jun 2022 18:22:34 +0800 Subject: [PATCH 0027/1149] pd_client: Do not reconnect for pd unknown error (#12827) close tikv/tikv#12345 do not reconnect for pd unknown error Signed-off-by: Connor1996 --- components/pd_client/src/errors.rs | 5 +++-- components/test_pd/src/mocker/retry.rs | 12 ++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/components/pd_client/src/errors.rs b/components/pd_client/src/errors.rs index b86edfc6e98..a9e4ffe6266 100644 --- a/components/pd_client/src/errors.rs +++ b/components/pd_client/src/errors.rs @@ -30,8 +30,9 @@ pub type Result = result::Result; impl Error { pub fn retryable(&self) -> bool { match self { - Error::Grpc(_) | Error::Other(_) | Error::ClusterNotBootstrapped(_) => true, - Error::RegionNotFound(_) + Error::Grpc(_) | Error::ClusterNotBootstrapped(_) => true, + Error::Other(_) + | Error::RegionNotFound(_) | Error::StoreTombstone(_) | Error::GlobalConfigNotFound(_) | Error::ClusterBootstrapped(_) diff --git a/components/test_pd/src/mocker/retry.rs b/components/test_pd/src/mocker/retry.rs index ef49aee3f66..be9c90633c0 100644 --- a/components/test_pd/src/mocker/retry.rs +++ b/components/test_pd/src/mocker/retry.rs @@ -87,11 +87,9 @@ impl Default for NotRetry { impl PdMocker for NotRetry { fn get_region_by_id(&self, _: &GetRegionByIdRequest) -> Option> { if !self.is_visited.swap(true, Ordering::Relaxed) { - info!( - "[NotRetry] get_region_by_id returns Ok(_) with header has IncompatibleVersion error" - ); + info!("[NotRetry] get_region_by_id returns Ok(_) with header has RegionNotFound error"); let mut err = Error::default(); - err.set_type(ErrorType::IncompatibleVersion); + err.set_type(ErrorType::RegionNotFound); let mut resp = GetRegionResponse::default(); resp.mut_header().set_error(err); Some(Ok(resp)) @@ -103,11 +101,9 @@ impl PdMocker for NotRetry { fn get_store(&self, _: &GetStoreRequest) -> Option> { if !self.is_visited.swap(true, Ordering::Relaxed) { - info!( - "[NotRetry] get_region_by_id returns Ok(_) with header has IncompatibleVersion error" - ); + info!("[NotRetry] get_region_by_id returns Ok(_) with header has Unknown error"); let mut err = Error::default(); - err.set_type(ErrorType::IncompatibleVersion); + err.set_type(ErrorType::Unknown); let mut resp = GetStoreResponse::default(); resp.mut_header().set_error(err); Some(Ok(resp)) From 2fbf7ee5a348df5f1839ff6fc47753b50ff7c76f Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 15 Jun 2022 20:20:33 +0800 Subject: [PATCH 0028/1149] tests: ignore env var dependent tests at runtime (#12805) ref rust-lang/cargo#10250, close tikv/tikv#12804, ref rust-lang/rust#68007, ref rust-lang/rust#96132 Signed-off-by: tabokie --- Cargo.lock | 41 ++++++++++--------- cmd/build.rs | 2 +- components/coprocessor_plugin_api/src/lib.rs | 1 - .../encryption/src/encrypted_file/mod.rs | 1 - components/raftstore/src/lib.rs | 1 - components/tidb_query_executors/src/lib.rs | 2 - components/tidb_query_expr/src/lib.rs | 2 - components/tikv_alloc/src/jemalloc.rs | 6 +-- components/tikv_alloc/src/lib.rs | 39 ++++++++++++++++++ components/tracker/src/lib.rs | 1 - rust-toolchain | 2 +- scripts/test-all | 12 ++---- src/import/sst_service.rs | 2 +- src/server/service/mod.rs | 2 +- src/storage/mod.rs | 5 +-- src/storage/raw/encoded.rs | 5 +-- src/storage/raw/raw_mvcc.rs | 5 +-- 17 files changed, 75 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd8a55146af..3350e0ef252 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,7 +49,7 @@ checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" dependencies = [ "getrandom 0.2.3", "once_cell", - "version_check 0.9.2", + "version_check 0.9.4", ] [[package]] @@ -2122,7 +2122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" dependencies = [ "typenum", - "version_check 0.9.2", + "version_check 0.9.4", ] [[package]] @@ -3169,7 +3169,7 @@ checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" dependencies = [ "memchr", "minimal-lexical", - "version_check 0.9.2", + "version_check 0.9.4", ] [[package]] @@ -3796,7 +3796,7 @@ dependencies = [ "proc-macro2", "quote", "syn", - "version_check 0.9.2", + "version_check 0.9.4", ] [[package]] @@ -3807,7 +3807,7 @@ checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", - "version_check 0.9.2", + "version_check 0.9.4", ] [[package]] @@ -3999,9 +3999,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.9" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] @@ -4023,8 +4023,8 @@ dependencies = [ [[package]] name = "raft-engine" -version = "0.1.0" -source = "git+https://github.com/tikv/raft-engine.git#0e066f8626b43b2a8a0a6bc9c7f0502b6fdc3d05" +version = "0.2.0" +source = "git+https://github.com/tikv/raft-engine.git#4e89901a3eff850a47ea0e6b44bc74d9fed84769" dependencies = [ "byteorder", "crc32fast", @@ -4054,8 +4054,8 @@ dependencies = [ [[package]] name = "raft-engine-ctl" -version = "0.1.0" -source = "git+https://github.com/tikv/raft-engine.git#0e066f8626b43b2a8a0a6bc9c7f0502b6fdc3d05" +version = "0.2.0" +source = "git+https://github.com/tikv/raft-engine.git#4e89901a3eff850a47ea0e6b44bc74d9fed84769" dependencies = [ "clap 3.1.6", "env_logger", @@ -4506,11 +4506,12 @@ dependencies = [ [[package]] name = "rhai" -version = "1.4.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "898b114d6cfa18af4593393fdc6c7437118e7e624d97f635fba8c75fd5c06f56" +checksum = "9f06953bb8b9e4307cb7ccc0d9d018e2ddd25a30d32831f631ce4fe8f17671f7" dependencies = [ "ahash", + "bitflags", "instant", "num-traits", "rhai_codegen", @@ -4520,9 +4521,9 @@ dependencies = [ [[package]] name = "rhai_codegen" -version = "1.3.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02d33d76a7aa8ec72ac8298d5b52134fd2dff77445ada0c65f6f8c40d8f2931" +checksum = "75a39bc2aa9258b282ee5518dac493491a9c4c11a6d7361b9d2644c922fc6488" dependencies = [ "proc-macro2", "quote", @@ -5173,11 +5174,13 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "smartstring" -version = "0.2.10" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e714dff2b33f2321fdcd475b71cec79781a692d846f37f415fb395a1d2bcd48e" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ + "autocfg", "static_assertions", + "version_check 0.9.4", ] [[package]] @@ -6738,9 +6741,9 @@ checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" [[package]] name = "version_check" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "visible" diff --git a/cmd/build.rs b/cmd/build.rs index ef751a71feb..6d11a38f705 100644 --- a/cmd/build.rs +++ b/cmd/build.rs @@ -32,7 +32,7 @@ fn link_sys_lib(lib: &str, tool: &cc::Tool) { } // remove lib prefix and .a postfix. let libname = &lib[3..lib.len() - 2]; - println!("cargo:rustc-link-lib=static={}", &libname); + println!("cargo:rustc-link-lib=static:+whole-archive={}", &libname); println!( "cargo:rustc-link-search=native={}", path.parent().unwrap().display() diff --git a/components/coprocessor_plugin_api/src/lib.rs b/components/coprocessor_plugin_api/src/lib.rs index 6e90ef83d2a..ca61b54c724 100644 --- a/components/coprocessor_plugin_api/src/lib.rs +++ b/components/coprocessor_plugin_api/src/lib.rs @@ -1,5 +1,4 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(const_fn_fn_ptr_basics)] //! This crate contains some necessary types and traits for implementing a custom coprocessor plugin //! for TiKV. diff --git a/components/encryption/src/encrypted_file/mod.rs b/components/encryption/src/encrypted_file/mod.rs index e52cba85afc..7bf31225db8 100644 --- a/components/encryption/src/encrypted_file/mod.rs +++ b/components/encryption/src/encrypted_file/mod.rs @@ -127,7 +127,6 @@ mod tests { let content = b"test content"; file.write(content, &PlaintextBackend::default()).unwrap(); - drop(file); let file = EncryptedFile::new(tmp.path(), "encrypted"); assert_eq!(file.read(&PlaintextBackend::default()).unwrap(), content); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index cd50b74dc48..b212001657a 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -6,7 +6,6 @@ #![feature(min_specialization)] #![feature(box_patterns)] #![feature(hash_drain_filter)] -#![feature(vec_retain_mut)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/tidb_query_executors/src/lib.rs b/components/tidb_query_executors/src/lib.rs index 0aa69c3b8f5..b32518c600b 100644 --- a/components/tidb_query_executors/src/lib.rs +++ b/components/tidb_query_executors/src/lib.rs @@ -10,8 +10,6 @@ #![allow(incomplete_features)] #![feature(proc_macro_hygiene)] #![feature(specialization)] -#![feature(const_fn_fn_ptr_basics)] -#![feature(const_fn_trait_bound)] #![feature(const_mut_refs)] #[macro_use(box_try, warn)] diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index f11c0f89bbf..eec5bdad844 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -12,8 +12,6 @@ #![feature(proc_macro_hygiene)] #![feature(specialization)] #![feature(test)] -#![feature(const_fn_fn_ptr_basics)] -#![feature(const_fn_trait_bound)] #![feature(const_mut_refs)] #[macro_use(box_err, box_try, try_opt)] diff --git a/components/tikv_alloc/src/jemalloc.rs b/components/tikv_alloc/src/jemalloc.rs index 959f526bfaf..e8a21115142 100644 --- a/components/tikv_alloc/src/jemalloc.rs +++ b/components/tikv_alloc/src/jemalloc.rs @@ -192,10 +192,10 @@ mod profiling { // TODO: need a test for the dump_prof(None) case, but // the cleanup afterward is not simple. #[test] - #[ignore] - fn test_profiling_memory() { + #[ignore = "#ifdef MALLOC_CONF"] + fn test_profiling_memory_ifdef_malloc_conf() { // Make sure somebody has turned on profiling - assert!(is_profiling_on(), r#"Set MALLOC_CONF="prof:true""#); + assert!(is_profiling_on(), "set MALLOC_CONF=prof:true"); let dir = Builder::new() .prefix("test_profiling_memory") diff --git a/components/tikv_alloc/src/lib.rs b/components/tikv_alloc/src/lib.rs index df7efcd80bc..1435ca2bbd0 100644 --- a/components/tikv_alloc/src/lib.rs +++ b/components/tikv_alloc/src/lib.rs @@ -82,6 +82,10 @@ //! `--features=mem-profiling` to cargo for eather `tikv_alloc` or //! `tikv`. +#![cfg_attr(test, feature(test))] +#![cfg_attr(test, feature(custom_test_frameworks))] +#![cfg_attr(test, test_runner(runner::run_env_conditional_tests))] + #[cfg(feature = "jemalloc")] #[macro_use] extern crate lazy_static; @@ -124,3 +128,38 @@ pub use crate::{imp::*, trace::*}; #[global_allocator] static ALLOC: imp::Allocator = imp::allocator(); + +#[cfg(test)] +mod runner { + extern crate test; + use test::*; + + /// Check for ignored test cases with ignore message "#ifdef ". The test + /// case will be enabled if the specific environment variable is set. + pub fn run_env_conditional_tests(cases: &[&TestDescAndFn]) { + let cases: Vec<_> = cases + .iter() + .map(|case| { + let mut desc = case.desc.clone(); + let testfn = match case.testfn { + TestFn::StaticTestFn(f) => TestFn::StaticTestFn(f), + TestFn::StaticBenchFn(f) => TestFn::StaticBenchFn(f), + ref f => panic!("unexpected testfn {:?}", f), + }; + if let Some(msg) = desc.ignore_message { + let keyword = "#ifdef"; + if let Some(s) = msg.strip_prefix(keyword) { + let var_name = s.trim(); + if var_name.is_empty() || std::env::var(var_name).is_ok() { + desc.ignore = false; + desc.ignore_message = None; + } + } + } + TestDescAndFn { desc, testfn } + }) + .collect(); + let args = std::env::args().collect::>(); + test_main(&args, cases, None) + } +} diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 909e093ed3f..ec3b6d37017 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -1,6 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(derive_default_enum)] #![feature(array_from_fn)] mod metrics; diff --git a/rust-toolchain b/rust-toolchain index f24eb00edaf..b91c1b17580 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-02-14 +nightly-2022-05-01 diff --git a/scripts/test-all b/scripts/test-all index daf7cf3f503..246a8f22176 100755 --- a/scripts/test-all +++ b/scripts/test-all @@ -13,17 +13,13 @@ if [[ -z $MAKEFILE_RUN ]] ; then fi ./scripts/test "$@" -- --nocapture -# The special Linux case below is testing the mem-profiling -# features in tikv_alloc, which are marked #[ignore] since -# they require special compile-time and run-time setup -# Fortunately rebuilding with the mem-profiling feature will only -# rebuild starting at jemalloc-sys. +# Re-run tests that requires specific environment variables. if [[ "$(uname)" == "Linux" ]]; then - export MALLOC_CONF=prof:true,prof_active:false - ./scripts/test -p tikv -p tikv_alloc --lib "$@" -- --nocapture --ignored + export MALLOC_CONF=prof:true + ./scripts/test ifdef_malloc_conf "$@" -- --nocapture fi if [[ "$(uname)" = "Linux" ]]; then EXTRA_CARGO_ARGS="" ./scripts/test --message-format=json-render-diagnostics -q --no-run -- --nocapture | - python scripts/check-bins.py --features "${TIKV_ENABLE_FEATURES}" --check-tests + python scripts/check-bins.py --features "${TIKV_ENABLE_FEATURES}" --check-tests fi \ No newline at end of file diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index fc41a504f42..ac892884e37 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -295,7 +295,7 @@ macro_rules! impl_write { Ok(resp) } .await; - crate::send_rpc_response!(res, sink, label, timer); + $crate::send_rpc_response!(res, sink, label, timer); }; self.threads.spawn_ok(buf_driver); diff --git a/src/server/service/mod.rs b/src/server/service/mod.rs index 36ea4c78a85..d80c2f6806c 100644 --- a/src/server/service/mod.rs +++ b/src/server/service/mod.rs @@ -18,7 +18,7 @@ pub use self::{ macro_rules! log_net_error { ($err:expr, $($args:tt)*) => {{ let e = $err; - if let crate::server::Error::Grpc(e) = e { + if let $crate::server::Error::Grpc(e) = e { info!($($args)*, "err" => %e); } else { debug!($($args)*, "err" => %e); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 692adec1ad1..63279780cfc 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2721,10 +2721,7 @@ pub struct TxnTestSnapshot { impl Snapshot for TxnTestSnapshot { type Iter = S::Iter; - type Ext<'a> - where - S: 'a, - = TxnTestSnapshotExt<'a>; + type Ext<'a> = TxnTestSnapshotExt<'a> where S: 'a; fn get(&self, key: &Key) -> tikv_kv::Result> { self.snapshot.get(key) diff --git a/src/storage/raw/encoded.rs b/src/storage/raw/encoded.rs index 4c3629e14ef..b9b25015891 100644 --- a/src/storage/raw/encoded.rs +++ b/src/storage/raw/encoded.rs @@ -61,10 +61,7 @@ impl RawEncodeSnapshot { impl Snapshot for RawEncodeSnapshot { type Iter = RawEncodeIterator; - type Ext<'a> - where - S: 'a, - = S::Ext<'a>; + type Ext<'a> = S::Ext<'a> where S: 'a; fn get(&self, key: &Key) -> Result> { self.map_value(self.snap.get(key)) diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 1f0bed9f945..4212b1c56ef 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -43,10 +43,7 @@ impl RawMvccSnapshot { impl Snapshot for RawMvccSnapshot { type Iter = RawMvccIterator; - type Ext<'a> - where - S: 'a, - = S::Ext<'a>; + type Ext<'a> = S::Ext<'a> where S: 'a; fn get(&self, key: &Key) -> Result> { self.seek_first_key_value_cf(None, None, key) From a80152ce04e7b18579f99d6407601599712aba2a Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 16 Jun 2022 11:56:34 +0800 Subject: [PATCH 0029/1149] storage: record perf statistics for scheduler commands (#12500) ref tikv/tikv#12362 This commit records perf contexts around executing txn scheduler commands. This helps us know the detail performance data of the underlying engine. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/tracker/Cargo.toml | 1 + components/tracker/src/lib.rs | 10 + components/tracker/src/slab.rs | 7 +- metrics/grafana/tikv_details.json | 230 +++++++++++++++++- src/server/service/kv.rs | 9 + src/storage/metrics.rs | 62 +++++ src/storage/mod.rs | 57 ++--- .../txn/commands/acquire_pessimistic_lock.rs | 1 + .../txn/commands/check_secondary_locks.rs | 1 + src/storage/txn/commands/check_txn_status.rs | 1 + src/storage/txn/commands/cleanup.rs | 1 + src/storage/txn/commands/commit.rs | 1 + src/storage/txn/commands/macros.rs | 8 + src/storage/txn/commands/mod.rs | 9 + .../txn/commands/pessimistic_rollback.rs | 1 + src/storage/txn/commands/prewrite.rs | 2 + src/storage/txn/commands/resolve_lock.rs | 1 + src/storage/txn/commands/resolve_lock_lite.rs | 1 + .../txn/commands/resolve_lock_readphase.rs | 1 + src/storage/txn/commands/rollback.rs | 1 + src/storage/txn/commands/txn_heart_beat.rs | 1 + src/storage/txn/scheduler.rs | 58 +++-- 23 files changed, 394 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3350e0ef252..f94f088e563 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6571,6 +6571,7 @@ name = "tracker" version = "0.0.1" dependencies = [ "collections", + "crossbeam-utils 0.8.8", "kvproto", "lazy_static", "parking_lot 0.12.0", diff --git a/components/tracker/Cargo.toml b/components/tracker/Cargo.toml index fcaf546cf5b..f9b97010bd8 100644 --- a/components/tracker/Cargo.toml +++ b/components/tracker/Cargo.toml @@ -6,6 +6,7 @@ publish = false [dependencies] collections = { path = "../../components/collections" } +crossbeam-utils = "0.8" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1" parking_lot = "0.12" diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index ec3b6d37017..0e932658aba 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -68,6 +68,16 @@ pub enum RequestType { KvBatchGetCommand, KvScan, KvScanLock, + KvPrewrite, + KvCommit, + KvPessimisticLock, + KvCheckTxnStatus, + KvCheckSecondaryLocks, + KvCleanup, + KvResolveLock, + KvTxnHeartBeat, + KvRollback, + KvPessimisticRollback, CoprocessorDag, CoprocessorAnalyze, CoprocessorChecksum, diff --git a/components/tracker/src/slab.rs b/components/tracker/src/slab.rs index 9d2803e7585..f737ee1ed1e 100644 --- a/components/tracker/src/slab.rs +++ b/components/tracker/src/slab.rs @@ -2,6 +2,7 @@ use std::{array, cell::Cell, fmt}; +use crossbeam_utils::CachePadded; use lazy_static::lazy_static; use parking_lot::Mutex; use slab::Slab; @@ -29,16 +30,16 @@ fn next_shard_id() -> usize { } pub struct ShardedSlab { - shards: [Mutex; SLAB_SHARD_COUNT], + shards: [CachePadded>; SLAB_SHARD_COUNT], } impl ShardedSlab { pub fn new(capacity_per_shard: usize) -> ShardedSlab { let shards = array::from_fn(|shard_id| { - Mutex::new(TrackerSlab::with_capacity( + CachePadded::new(Mutex::new(TrackerSlab::with_capacity( shard_id as u32, capacity_per_shard, - )) + ))) }); ShardedSlab { shards } } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 009868d3a5c..15dfa8c684b 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4654,6 +4654,232 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 68 + }, + "hiddenSeries": false, + "id": 23763572784, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:80", + "alias": "/.*/", + "stack": "A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_storage_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_time\"}[1m])) by (req)", + "hide": false, + "interval": "", + "legendFormat": "{{req}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_time\"}[1m])) by (req)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "copr-{{req}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IO time per second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:264", + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:265", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 68 + }, + "hiddenSeries": false, + "id": 23763572785, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:62", + "alias": "/.*/", + "stack": "A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_storage_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_byte\"}[1m])) by (req)", + "interval": "", + "legendFormat": "{{req}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_byte\"}[1m])) by (req)", + "hide": false, + "interval": "", + "legendFormat": "copr-{{req}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IO bytes per second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:264", + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:265", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, @@ -16921,7 +17147,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified-read.*\"}[1m])", + "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified-read.*\"}[1m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{le}}", @@ -20259,7 +20485,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker.*\"}[1m])", + "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker.*\"}[1m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{le}}", diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 73215f6922c..988e0624686 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1932,10 +1932,19 @@ macro_rules! txn_command_future { $req: $req_ty, ) -> impl Future> { $prelude + let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + $req.get_context(), + RequestType::Unknown, + 0, + ))); + set_tls_tracker_token(tracker); let (cb, f) = paired_future_callback(); let res = storage.sched_txn_command($req.into(), cb); async move { + defer!{{ + GLOBAL_TRACKERS.remove(tracker); + }}; let $v = match res { Err(e) => Err(e), Ok(_) => f.await?, diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index fd4df727e54..95f5809ec9e 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -5,11 +5,14 @@ use std::{cell::RefCell, mem, sync::Arc}; use collections::HashMap; +use engine_traits::{PerfContext, PerfContextExt, PerfContextKind, PerfLevel}; use kvproto::{kvrpcpb::KeyRange, metapb, pdpb::QueryKind}; use pd_client::BucketMeta; use prometheus::*; use prometheus_static_metric::*; use raftstore::store::{util::build_key_range, ReadStats}; +use tikv_kv::{with_tls_engine, Engine}; +use tracker::get_tls_tracker_token; use crate::{ server::metrics::{GcKeysCF as ServerGcKeysCF, GcKeysDetail as ServerGcKeysDetail}, @@ -296,6 +299,65 @@ impl From for GcKeysDetail { } } +// Safety: It should be only called when the thread-local engine exists. +pub unsafe fn with_perf_context(cmd: CommandKind, f: Fn) -> T +where + Fn: FnOnce() -> T, +{ + thread_local! { + static GET: RefCell>> = RefCell::new(None); + static BATCH_GET: RefCell>> = RefCell::new(None); + static BATCH_GET_COMMAND: RefCell>> = RefCell::new(None); + static SCAN: RefCell>> = RefCell::new(None); + static PREWRITE: RefCell>> = RefCell::new(None); + static ACQUIRE_PESSIMISTIC_LOCK: RefCell>> = RefCell::new(None); + static COMMIT: RefCell>> = RefCell::new(None); + static CLEANUP: RefCell>> = RefCell::new(None); + static ROLLBACK: RefCell>> = RefCell::new(None); + static PESSIMISTIC_ROLLBACK: RefCell>> = RefCell::new(None); + static TXN_HEART_BEAT: RefCell>> = RefCell::new(None); + static CHECK_TXN_STATUS: RefCell>> = RefCell::new(None); + static CHECK_SECONDARY_LOCKS: RefCell>> = RefCell::new(None); + static SCAN_LOCK: RefCell>> = RefCell::new(None); + static RESOLVE_LOCK: RefCell>> = RefCell::new(None); + static RESOLVE_LOCK_LITE: RefCell>> = RefCell::new(None); + } + let tls_cell = match cmd { + CommandKind::get => &GET, + CommandKind::batch_get => &BATCH_GET, + CommandKind::batch_get_command => &BATCH_GET_COMMAND, + CommandKind::scan => &SCAN, + CommandKind::prewrite => &PREWRITE, + CommandKind::acquire_pessimistic_lock => &ACQUIRE_PESSIMISTIC_LOCK, + CommandKind::commit => &COMMIT, + CommandKind::cleanup => &CLEANUP, + CommandKind::rollback => &ROLLBACK, + CommandKind::pessimistic_rollback => &PESSIMISTIC_ROLLBACK, + CommandKind::txn_heart_beat => &TXN_HEART_BEAT, + CommandKind::check_txn_status => &CHECK_TXN_STATUS, + CommandKind::check_secondary_locks => &CHECK_SECONDARY_LOCKS, + CommandKind::scan_lock => &SCAN_LOCK, + CommandKind::resolve_lock => &RESOLVE_LOCK, + CommandKind::resolve_lock_lite => &RESOLVE_LOCK_LITE, + _ => return f(), + }; + tls_cell.with(|c| { + let mut c = c.borrow_mut(); + let perf_context = c.get_or_insert_with(|| { + with_tls_engine(|engine: &E| { + Box::new(engine.kv_engine().get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Storage(cmd.get_str()), + )) + }) + }); + perf_context.start_observe(); + let res = f(); + perf_context.report_metrics(&[get_tls_tracker_token()]); + res + }) +} + lazy_static! { pub static ref KV_COMMAND_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_storage_command_total", diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 63279780cfc..f12f918b8aa 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -52,7 +52,6 @@ mod types; use std::{ borrow::Cow, - cell::RefCell, iter, marker::PhantomData, sync::{ @@ -63,10 +62,7 @@ use std::{ use api_version::{ApiV1, ApiV2, KeyMode, KvFormat, RawValue}; use concurrency_manager::ConcurrencyManager; -use engine_traits::{ - raw_ttl::ttl_to_expire_ts, CfName, PerfContext, PerfContextExt, PerfContextKind, PerfLevel, - CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, -}; +use engine_traits::{raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS}; use futures::prelude::*; use kvproto::{ kvrpcpb::{ @@ -85,8 +81,7 @@ use tikv_util::{ time::{duration_to_ms, Instant, ThreadReadId}, }; use tracker::{ - clear_tls_tracker_token, get_tls_tracker_token, set_tls_tracker_token, TrackedFuture, - TrackerToken, + clear_tls_tracker_token, set_tls_tracker_token, with_tls_tracker, TrackedFuture, TrackerToken, }; use txn_types::{Key, KvPair, Lock, OldValues, TimeStamp, TsSet, Value}; @@ -279,42 +274,6 @@ impl Storage { }) } - fn with_perf_context(cmd: CommandKind, f: Fn) -> T - where - Fn: FnOnce() -> T, - { - thread_local! { - static GET: RefCell>> = RefCell::new(None); - static BATCH_GET: RefCell>> = RefCell::new(None); - static BATCH_GET_COMMAND: RefCell>> = RefCell::new(None); - static SCAN: RefCell>> = RefCell::new(None); - static SCAN_LOCK: RefCell>> = RefCell::new(None); - } - let tls_cell = match cmd { - CommandKind::get => &GET, - CommandKind::batch_get => &BATCH_GET, - CommandKind::batch_get_command => &BATCH_GET_COMMAND, - CommandKind::scan => &SCAN, - CommandKind::scan_lock => &SCAN_LOCK, - _ => return f(), - }; - tls_cell.with(|c| { - let mut c = c.borrow_mut(); - let perf_context = c.get_or_insert_with(|| { - Self::with_tls_engine(|engine| { - Box::new(engine.kv_engine().get_perf_context( - PerfLevel::Uninitialized, - PerfContextKind::Storage(cmd.get_str()), - )) - }) - }); - perf_context.start_observe(); - let res = f(); - perf_context.report_metrics(&[get_tls_tracker_token()]); - res - }) - } - /// Get the underlying `Engine` of the `Storage`. pub fn get_engine(&self) -> E { self.engine.clone() @@ -359,6 +318,14 @@ impl Storage { self.read_pool.get_normal_pool_size() } + fn with_perf_context(cmd: CommandKind, f: Fn) -> T + where + Fn: FnOnce() -> T, + { + // Safety: the read pools ensure that a TLS engine exists. + unsafe { with_perf_context::(cmd, f) } + } + #[inline] fn with_tls_engine(f: impl FnOnce(&E) -> R) -> R { // Safety: the read pools ensure that a TLS engine exists. @@ -1436,6 +1403,10 @@ impl Storage { } _ => {} } + with_tls_tracker(|tracker| { + tracker.req_info.start_ts = cmd.ts().into_inner(); + tracker.req_info.request_type = cmd.request_type(); + }); fail_point!("storage_drop_message", |_| Ok(())); cmd.incr_cmd_metric(); diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index ca94382491c..d49d759f3a5 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -53,6 +53,7 @@ command! { impl CommandExt for AcquirePessimisticLock { ctx!(); tag!(acquire_pessimistic_lock); + request_type!(KvPessimisticLock); ts!(start_ts); property!(can_be_pipelined); diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 65abc2ffd1b..c27e8dc1bc0 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -41,6 +41,7 @@ command! { impl CommandExt for CheckSecondaryLocks { ctx!(); tag!(check_secondary_locks); + request_type!(KvCheckSecondaryLocks); ts!(start_ts); write_bytes!(keys: multiple); gen_lock!(keys: multiple); diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 7ce843594a9..5ec0ae5c503 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -57,6 +57,7 @@ command! { impl CommandExt for CheckTxnStatus { ctx!(); tag!(check_txn_status); + request_type!(KvCheckTxnStatus); ts!(lock_ts); write_bytes!(primary_key); gen_lock!(primary_key); diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index aefcf128740..62c0aaa98c1 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -38,6 +38,7 @@ command! { impl CommandExt for Cleanup { ctx!(); tag!(cleanup); + request_type!(KvCleanup); ts!(start_ts); write_bytes!(key); gen_lock!(key); diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index 8241b1b9c9c..f89d4fc09af 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -37,6 +37,7 @@ command! { impl CommandExt for Commit { ctx!(); tag!(commit); + request_type!(KvCommit); ts!(commit_ts); write_bytes!(keys: multiple); gen_lock!(keys: multiple); diff --git a/src/storage/txn/commands/macros.rs b/src/storage/txn/commands/macros.rs index ea19f599d6d..c505714f2a4 100644 --- a/src/storage/txn/commands/macros.rs +++ b/src/storage/txn/commands/macros.rs @@ -139,6 +139,14 @@ macro_rules! tag { }; } +macro_rules! request_type { + ($req_type:ident) => { + fn request_type(&self) -> ::tracker::RequestType { + ::tracker::RequestType::$req_type + } + }; +} + macro_rules! write_bytes { ($field: ident) => { fn write_bytes(&self) -> usize { diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 1168dd15048..5cd94b172ff 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -48,6 +48,7 @@ pub use resolve_lock_lite::ResolveLockLite; pub use resolve_lock_readphase::ResolveLockReadPhase; pub use rollback::Rollback; use tikv_util::deadline::Deadline; +use tracker::RequestType; pub use txn_heart_beat::TxnHeartBeat; use txn_types::{Key, OldValues, TimeStamp, Value, Write}; @@ -467,6 +468,10 @@ fn find_mvcc_infos_by_key( pub trait CommandExt: Display { fn tag(&self) -> metrics::CommandKind; + fn request_type(&self) -> RequestType { + RequestType::Unknown + } + fn get_ctx(&self) -> &Context; fn get_ctx_mut(&mut self) -> &mut Context; @@ -645,6 +650,10 @@ impl Command { self.command_ext().tag() } + pub fn request_type(&self) -> RequestType { + self.command_ext().request_type() + } + pub fn ts(&self) -> TimeStamp { self.command_ext().ts() } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 17a72610065..bcafed8b0e6 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -38,6 +38,7 @@ command! { impl CommandExt for PessimisticRollback { ctx!(); tag!(pessimistic_rollback); + request_type!(KvPessimisticRollback); ts!(start_ts); write_bytes!(keys: multiple); gen_lock!(keys: multiple); diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 1c0cbabd193..cfe8f68c512 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -215,6 +215,7 @@ impl Prewrite { impl CommandExt for Prewrite { ctx!(); tag!(prewrite); + request_type!(KvPrewrite); ts!(start_ts); fn write_bytes(&self) -> usize { @@ -379,6 +380,7 @@ impl PrewritePessimistic { impl CommandExt for PrewritePessimistic { ctx!(); tag!(prewrite); + request_type!(KvPrewrite); ts!(start_ts); fn write_bytes(&self) -> usize { diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index 9db90f450d8..6638fe5cffd 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -57,6 +57,7 @@ command! { impl CommandExt for ResolveLock { ctx!(); tag!(resolve_lock); + request_type!(KvResolveLock); property!(is_sys_cmd); fn write_bytes(&self) -> usize { diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index e797ea62bf9..f69d4a107fc 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -36,6 +36,7 @@ command! { impl CommandExt for ResolveLockLite { ctx!(); tag!(resolve_lock_lite); + request_type!(KvResolveLock); ts!(start_ts); property!(is_sys_cmd); write_bytes!(resolve_keys: multiple); diff --git a/src/storage/txn/commands/resolve_lock_readphase.rs b/src/storage/txn/commands/resolve_lock_readphase.rs index 7c34cc71f4f..588303e0a3d 100644 --- a/src/storage/txn/commands/resolve_lock_readphase.rs +++ b/src/storage/txn/commands/resolve_lock_readphase.rs @@ -33,6 +33,7 @@ command! { impl CommandExt for ResolveLockReadPhase { ctx!(); tag!(resolve_lock); + request_type!(KvResolveLock); property!(readonly); fn write_bytes(&self) -> usize { diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index e6641147f04..70e7fc4a49d 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -35,6 +35,7 @@ command! { impl CommandExt for Rollback { ctx!(); tag!(rollback); + request_type!(KvRollback); ts!(start_ts); write_bytes!(keys: multiple); gen_lock!(keys: multiple); diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index d2af61d4506..e894cc6835e 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -40,6 +40,7 @@ command! { impl CommandExt for TxnHeartBeat { ctx!(); tag!(txn_heart_beat); + request_type!(KvTxnHeartBeat); ts!(start_ts); write_bytes!(primary_key); gen_lock!(primary_key); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 283787e9ba1..f0e1529fab7 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -47,6 +47,7 @@ use raftstore::store::TxnExt; use resource_metering::{FutureExt, ResourceTagFactory}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData}; use tikv_util::{quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE}; +use tracker::{get_tls_tracker_token, set_tls_tracker_token, TrackerToken}; use txn_types::TimeStamp; use crate::{ @@ -59,7 +60,7 @@ use crate::{ SnapContext, Statistics, }, lock_manager::{self, DiagnosticContext, LockManager, WaitTimeout}, - metrics::{self, *}, + metrics::*, txn::{ commands::{Command, ResponsePolicy, WriteContext, WriteResult, WriteResultLockInfo}, flow_controller::FlowController, @@ -83,15 +84,17 @@ const IN_MEMORY_PESSIMISTIC_LOCK: Feature = Feature::require(6, 0, 0); /// Task is a running command. pub(super) struct Task { pub(super) cid: u64, + pub(super) tracker: TrackerToken, pub(super) cmd: Command, pub(super) extra_op: ExtraOp, } impl Task { /// Creates a task for a running command. - pub(super) fn new(cid: u64, cmd: Command) -> Task { + pub(super) fn new(cid: u64, tracker: TrackerToken, cmd: Command) -> Task { Task { cid, + tracker, cmd, extra_op: ExtraOp::Noop, } @@ -99,7 +102,7 @@ impl Task { } struct CmdTimer { - tag: metrics::CommandKind, + tag: CommandKind, begin: Instant, } @@ -122,7 +125,7 @@ struct TaskContext { // `cb` and `pr` safely. owned: AtomicBool, write_bytes: usize, - tag: metrics::CommandKind, + tag: CommandKind, // How long it waits on latches. // latch_timer: Option, latch_timer: Instant, @@ -411,8 +414,8 @@ impl Scheduler { fn schedule_command(&self, cmd: Command, callback: StorageCallback) { let cid = self.inner.gen_id(); - debug!("received new command"; "cid" => cid, "cmd" => ?cmd); - + let tracker = get_tls_tracker_token(); + debug!("received new command"; "cid" => cid, "cmd" => ?cmd, "tracker" => ?tracker); let tag = cmd.tag(); let priority_tag = get_priority_tag(cmd.priority()); SCHED_STAGE_COUNTER_VEC.get(tag).new.inc(); @@ -421,9 +424,10 @@ impl Scheduler { .inc(); let mut task_slot = self.inner.get_task_slot(cid); - let tctx = task_slot - .entry(cid) - .or_insert_with(|| self.inner.new_task_context(Task::new(cid, cmd), callback)); + let tctx = task_slot.entry(cid).or_insert_with(|| { + self.inner + .new_task_context(Task::new(cid, tracker, cmd), callback) + }); let deadline = tctx.task.as_ref().unwrap().cmd.deadline(); if self.inner.latches.acquire(&mut tctx.lock, cid) { fail_point!("txn_scheduler_acquire_success"); @@ -494,6 +498,7 @@ impl Scheduler { /// Executes the task in the sched pool. fn execute(&self, mut task: Task) { + set_tls_tracker_token(task.tracker); let sched = self.clone(); self.get_sched_pool(task.cmd.priority()) .pool @@ -537,6 +542,7 @@ impl Scheduler { debug!( "process cmd with snapshot"; "cid" => task.cid, "term" => ?term, "extra_op" => ?extra_op, + "trakcer" => ?task.tracker ); sched.process(snapshot, task).await; } @@ -575,7 +581,7 @@ impl Scheduler { /// /// If a next command is present, continues to execute; otherwise, delivers the result to the /// callback. - fn on_read_finished(&self, cid: u64, pr: ProcessResult, tag: metrics::CommandKind) { + fn on_read_finished(&self, cid: u64, pr: ProcessResult, tag: CommandKind) { SCHED_STAGE_COUNTER_VEC.get(tag).read_finish.inc(); debug!("read command finished"; "cid" => cid); @@ -599,7 +605,7 @@ impl Scheduler { lock_guards: Vec, pipelined: bool, async_apply_prewrite: bool, - tag: metrics::CommandKind, + tag: CommandKind, ) { // TODO: Does async apply prewrite worth a special metric here? if pipelined { @@ -674,8 +680,8 @@ impl Scheduler { cid: u64, cb: StorageCallback, pr: ProcessResult, - tag: metrics::CommandKind, - stage: metrics::CommandStageKind, + tag: CommandKind, + stage: CommandStageKind, ) { debug!("early return response"; "cid" => cid); SCHED_STAGE_COUNTER_VEC.get(tag).get(stage).inc(); @@ -745,10 +751,13 @@ impl Scheduler { let tag = task.cmd.tag(); let begin_instant = Instant::now(); - let pr = task - .cmd - .process_read(snapshot, statistics) - .unwrap_or_else(|e| ProcessResult::Failed { err: e.into() }); + let cmd = task.cmd; + let pr = unsafe { + with_perf_context::(tag, || { + cmd.process_read(snapshot, statistics) + .unwrap_or_else(|e| ProcessResult::Failed { err: e.into() }) + }) + }; SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(tag) .observe(begin_instant.saturating_elapsed_secs()); @@ -783,10 +792,13 @@ impl Scheduler { async_apply_prewrite: self.inner.enable_async_apply_prewrite, }; let begin_instant = Instant::now(); - let res = task - .cmd - .process_write(snapshot, context) - .map_err(StorageError::from); + let res = unsafe { + with_perf_context::(tag, || { + task.cmd + .process_write(snapshot, context) + .map_err(StorageError::from) + }) + }; SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(tag) .observe(begin_instant.saturating_elapsed_secs()); @@ -904,7 +916,7 @@ impl Scheduler { cb.unwrap(), pr.unwrap(), tag, - metrics::CommandStageKind::async_apply_prewrite, + CommandStageKind::async_apply_prewrite, ); }); is_async_apply_prewrite = true; @@ -934,7 +946,7 @@ impl Scheduler { cb.unwrap(), pr.unwrap(), tag, - metrics::CommandStageKind::pipelined_write, + CommandStageKind::pipelined_write, ); }); (Some(proposed_cb), None) From c1a09b83f6da437f49758dc713150ebc0da5fcb9 Mon Sep 17 00:00:00 2001 From: Jay Date: Sat, 18 Jun 2022 01:28:35 -0700 Subject: [PATCH 0030/1149] raftstorev2: add basic layout (#12843) ref tikv/tikv#12842 This is an attempt to reimplement raftstore using the new assumptions that peer's range can be overlapped. Currently, compatability is not considered, though we may think about how to migrate from old version by the end of this year. No concrete implementations is added yet, we may choose reuse implementation from v1 or implementing new logic base on actual requirement. The principle is 1. do not introduce history debt while reusing code as much as possible. 2. do not change the current implementations. Signed-off-by: Jay Lee --- Cargo.lock | 18 ++ Cargo.toml | 1 + components/raftstore-v2/Cargo.toml | 39 +++ components/raftstore-v2/src/fsm/apply.rs | 1 + components/raftstore-v2/src/fsm/mod.rs | 5 + components/raftstore-v2/src/fsm/peer.rs | 22 ++ components/raftstore-v2/src/fsm/store.rs | 1 + components/raftstore-v2/src/lib.rs | 19 ++ components/raftstore-v2/src/operation/mod.rs | 1 + components/raftstore-v2/src/raft/mod.rs | 7 + components/raftstore-v2/src/raft/peer.rs | 70 +++++ components/raftstore-v2/src/raft/storage.rs | 56 ++++ components/raftstore-v2/src/router/message.rs | 293 ++++++++++++++++++ components/raftstore-v2/src/router/mod.rs | 5 + components/raftstore/src/store/peer.rs | 3 +- 15 files changed, 540 insertions(+), 1 deletion(-) create mode 100644 components/raftstore-v2/Cargo.toml create mode 100644 components/raftstore-v2/src/fsm/apply.rs create mode 100644 components/raftstore-v2/src/fsm/mod.rs create mode 100644 components/raftstore-v2/src/fsm/peer.rs create mode 100644 components/raftstore-v2/src/fsm/store.rs create mode 100644 components/raftstore-v2/src/lib.rs create mode 100644 components/raftstore-v2/src/operation/mod.rs create mode 100644 components/raftstore-v2/src/raft/mod.rs create mode 100644 components/raftstore-v2/src/raft/peer.rs create mode 100644 components/raftstore-v2/src/raft/storage.rs create mode 100644 components/raftstore-v2/src/router/message.rs create mode 100644 components/raftstore-v2/src/router/mod.rs diff --git a/Cargo.lock b/Cargo.lock index f94f088e563..82978c6cbf8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4162,6 +4162,24 @@ dependencies = [ "yatp", ] +[[package]] +name = "raftstore-v2" +version = "0.1.0" +dependencies = [ + "collections", + "crossbeam", + "engine_traits", + "error_code", + "kvproto", + "pd_client", + "raft", + "raft-proto", + "raftstore", + "slog", + "smallvec", + "tikv_util", +] + [[package]] name = "rand" version = "0.4.6" diff --git a/Cargo.toml b/Cargo.toml index a1c1f315de3..e58963c694d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -247,6 +247,7 @@ members = [ "components/panic_hook", "components/pd_client", "components/raftstore", + "components/raftstore-v2", "components/resolved_ts", "components/resource_metering", "components/server", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml new file mode 100644 index 00000000000..56d08c6a6b6 --- /dev/null +++ b/components/raftstore-v2/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "raftstore-v2" +version = "0.1.0" +edition = "2021" + +[features] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] +failpoints = ["raftstore/failpoints"] +testexport = ["raftstore/testexport"] +test-engine-kv-rocksdb = [ + "raftstore/test-engine-kv-rocksdb" +] +test-engine-raft-raft-engine = [ + "raftstore/test-engine-raft-raft-engine" +] +test-engines-rocksdb = [ + "raftstore/test-engines-rocksdb", +] +test-engines-panic = [ + "raftstore/test-engines-panic", +] + +cloud-aws = ["raftstore/cloud-aws"] +cloud-gcp = ["raftstore/cloud-gcp"] +cloud-azure = ["raftstore/cloud-azure"] + +[dependencies] +collections = { path = "../collections" } +crossbeam = "0.8" +engine_traits = { path = "../engine_traits" } +error_code = { path = "../error_code" } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } +pd_client = { path = "../pd_client" } +raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft-proto = { version = "0.7.0" } +raftstore = { path = "../raftstore" } +slog = "2.3" +smallvec = "1.4" +tikv_util = { path = "../tikv_util", default-features = false } diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs new file mode 100644 index 00000000000..bb3db8c75d3 --- /dev/null +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -0,0 +1 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs new file mode 100644 index 00000000000..275313cbfb3 --- /dev/null +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -0,0 +1,5 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod apply; +mod peer; +mod store; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs new file mode 100644 index 00000000000..5eaacf3e200 --- /dev/null +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -0,0 +1,22 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::RaftEngine; +use kvproto::metapb; +use raftstore::store::Config; +use slog::Logger; + +use crate::{raft::Peer, Result}; + +pub struct PeerFsm { + peer: Peer, +} + +impl PeerFsm { + pub fn new(peer: Peer) -> Result { + Ok(PeerFsm { peer }) + } + + pub fn logger(&self) -> &Logger { + self.peer.logger() + } +} diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs new file mode 100644 index 00000000000..bb3db8c75d3 --- /dev/null +++ b/components/raftstore-v2/src/fsm/store.rs @@ -0,0 +1 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs new file mode 100644 index 00000000000..98c72ca7632 --- /dev/null +++ b/components/raftstore-v2/src/lib.rs @@ -0,0 +1,19 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! Raftstore is the place where we implement multi-raft. +//! +//! The thread module of raftstore is batch-system, more check components/batch-system. +//! All state machines are defined in [`fsm`] module. Everything that wrapping raft is +//! implemented in [`raft`] module. And the commands are implemented in [`operation`] module. +//! All state machines are expected to communicate with messages. They are defined in +//! [`router`] module. + +#![allow(unused)] + +mod fsm; +mod operation; +mod raft; +mod router; + +pub use raftstore::{Error, Result}; +pub use router::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs new file mode 100644 index 00000000000..bb3db8c75d3 --- /dev/null +++ b/components/raftstore-v2/src/operation/mod.rs @@ -0,0 +1 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. diff --git a/components/raftstore-v2/src/raft/mod.rs b/components/raftstore-v2/src/raft/mod.rs new file mode 100644 index 00000000000..7fd128d6788 --- /dev/null +++ b/components/raftstore-v2/src/raft/mod.rs @@ -0,0 +1,7 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod peer; +mod storage; + +pub use peer::Peer; +pub use storage::Storage; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs new file mode 100644 index 00000000000..4af2c1ccddb --- /dev/null +++ b/components/raftstore-v2/src/raft/peer.rs @@ -0,0 +1,70 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::RaftEngine; +use kvproto::metapb; +use raft::RawNode; +use raftstore::store::Config; +use slog::{o, Logger}; +use tikv_util::{box_err, config::ReadableSize}; + +use super::storage::Storage; +use crate::Result; + +/// A peer that delegates commands between state machine and raft. +pub struct Peer { + region_id: u64, + peer: metapb::Peer, + raft_group: RawNode>, + logger: Logger, +} + +impl Peer { + pub fn new( + cfg: &Config, + store_id: u64, + region: metapb::Region, + engine: ER, + logger: Logger, + ) -> Result { + let peer = region + .get_peers() + .iter() + .find(|p| p.get_store_id() == store_id && p.get_id() != raft::INVALID_ID); + let peer = match peer { + Some(p) => p, + None => return Err(box_err!("no valid peer found in {:?}", region.get_peers())), + }; + let l = logger.new(o!("peer_id" => peer.id)); + + let ps = Storage::new(engine, l.clone()); + + let applied_index = ps.applied_index(); + + let raft_cfg = raft::Config { + id: peer.get_id(), + election_tick: cfg.raft_election_timeout_ticks, + heartbeat_tick: cfg.raft_heartbeat_ticks, + min_election_tick: cfg.raft_min_election_timeout_ticks, + max_election_tick: cfg.raft_max_election_timeout_ticks, + max_size_per_msg: cfg.raft_max_size_per_msg.0, + max_inflight_msgs: cfg.raft_max_inflight_msgs, + applied: applied_index, + check_quorum: true, + skip_bcast_commit: true, + pre_vote: cfg.prevote, + max_committed_size_per_ready: ReadableSize::mb(16).0, + ..Default::default() + }; + + Ok(Peer { + region_id: region.get_id(), + peer: peer.clone(), + raft_group: RawNode::new(&raft_cfg, ps, &logger)?, + logger: l, + }) + } + + pub fn logger(&self) -> &Logger { + &self.logger + } +} diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs new file mode 100644 index 00000000000..f999c6890d8 --- /dev/null +++ b/components/raftstore-v2/src/raft/storage.rs @@ -0,0 +1,56 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::RaftEngine; +use raft::{ + eraftpb::{Entry, Snapshot}, + GetEntriesContext, RaftState, +}; +use slog::Logger; + +/// A storage for raft. +pub struct Storage { + engine: ER, + logger: Logger, +} + +impl Storage { + pub fn new(engine: ER, logger: Logger) -> Storage { + Storage { engine, logger } + } + + pub fn applied_index(&self) -> u64 { + unimplemented!() + } +} + +impl raft::Storage for Storage { + fn initial_state(&self) -> raft::Result { + unimplemented!() + } + + fn entries( + &self, + low: u64, + high: u64, + max_size: impl Into>, + context: GetEntriesContext, + ) -> raft::Result> { + unimplemented!() + } + + fn term(&self, idx: u64) -> raft::Result { + unimplemented!() + } + + fn first_index(&self) -> raft::Result { + unimplemented!() + } + + fn last_index(&self) -> raft::Result { + unimplemented!() + } + + fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { + unimplemented!() + } +} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs new file mode 100644 index 00000000000..1ab85608034 --- /dev/null +++ b/components/raftstore-v2/src/router/message.rs @@ -0,0 +1,293 @@ +// Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use std::{fmt, marker::PhantomData}; + +use engine_traits::{KvEngine, Snapshot}; +use kvproto::{ + kvrpcpb::ExtraOp as TxnExtraOp, + metapb, + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, +}; +use raftstore::store::{ + fsm::ApplyTaskRes, metrics::RaftEventDurationType, InspectedRaftMessage, RegionSnapshot, +}; +use tikv_util::{memory::HeapSize, time::Instant}; + +pub struct WriteResponseChannel; + +impl WriteResponseChannel { + /// Called after a request is proposed to the raft group successfully. It's + /// used to notify the caller to move on early because it's very likely the + /// request will be applied to the raftstore. + pub fn notify_proposed(&self) {} + + /// Called after a request is committed and before it's being applied, and + /// it's guaranteed that the request will be successfully applied soon. + pub fn notify_committed(&self) {} + + pub fn notify_applied(&self, _res: Result<(), RaftCmdResponse>) {} +} + +pub struct ReadResponseChannel { + _snap: PhantomData, +} + +pub struct ReadResponse { + pub snapshot: RegionSnapshot, + // What is this? + pub txn_extra_op: TxnExtraOp, +} + +impl ReadResponseChannel { + pub fn notify_read(&self, _res: Result, RaftCmdResponse>) {} +} + +// This is only necessary because of seeming limitations in derive(Clone) w/r/t +// generics. If it can be deleted in the future in favor of derive, it should +// be. +impl Clone for ReadResponse +where + S: Snapshot, +{ + fn clone(&self) -> ReadResponse { + ReadResponse { + snapshot: self.snapshot.clone(), + txn_extra_op: self.txn_extra_op, + } + } +} + +/// Variants of channels for `Msg`. +/// - `Read`: a channel for read only requests including `StatusRequest`, +/// `GetRequest` and `SnapRequest` +/// - `Write`: a channel for write only requests including `AdminRequest` +/// `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. +/// Prefer channel rather than callback because: +/// 1. channel can be reused, hence reduce allocations. +/// 2. channel may not need dynamic dispatch. +/// 3. caller can use async fashion. +/// 4. there will be no callback leak. +pub enum ResponseChannel { + /// No callback. + None, + /// Read callback. + Read(ReadResponseChannel), + /// Write callback. + Write(WriteResponseChannel), +} + +impl HeapSize for ResponseChannel {} + +impl ResponseChannel +where + S: Snapshot, +{ + pub fn notify_applied(self, resp: RaftCmdResponse) { + match self { + ResponseChannel::None => (), + ResponseChannel::Read(read) => { + read.notify_read(Err(resp)); + } + ResponseChannel::Write(write) => { + write.notify_applied(Err(resp)); + } + } + } + + pub fn notify_proposed(&mut self) { + if let ResponseChannel::Write(write) = self { + write.notify_proposed(); + } + } + + pub fn notify_committed(&mut self) { + if let ResponseChannel::Write(write) = self { + write.notify_committed(); + } + } + + pub fn invoke_read(self, args: ReadResponse) { + match self { + ResponseChannel::Read(read) => read.notify_read(Ok(args)), + other => panic!("expect Callback::Read(..), got {:?}", other), + } + } + + pub fn is_none(&self) -> bool { + matches!(self, ResponseChannel::None) + } +} + +impl fmt::Debug for ResponseChannel +where + S: Snapshot, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ResponseChannel::None => write!(fmt, "Callback::None"), + ResponseChannel::Read(_) => write!(fmt, "Callback::Read(..)"), + ResponseChannel::Write { .. } => write!(fmt, "Callback::Write(..)"), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum PeerTick { + Raft = 0, + RaftLogGc = 1, + SplitRegionCheck = 2, + PdHeartbeat = 3, + CheckMerge = 4, + CheckPeerStaleState = 5, + EntryCacheEvict = 6, + CheckLeaderLease = 7, + ReactivateMemoryLock = 8, + ReportBuckets = 9, +} + +impl PeerTick { + pub const VARIANT_COUNT: usize = Self::get_all_ticks().len(); + + #[inline] + pub fn tag(self) -> &'static str { + match self { + PeerTick::Raft => "raft", + PeerTick::RaftLogGc => "raft_log_gc", + PeerTick::SplitRegionCheck => "split_region_check", + PeerTick::PdHeartbeat => "pd_heartbeat", + PeerTick::CheckMerge => "check_merge", + PeerTick::CheckPeerStaleState => "check_peer_stale_state", + PeerTick::EntryCacheEvict => "entry_cache_evict", + PeerTick::CheckLeaderLease => "check_leader_lease", + PeerTick::ReactivateMemoryLock => "reactivate_memory_lock", + PeerTick::ReportBuckets => "report_buckets", + } + } + + pub const fn get_all_ticks() -> &'static [PeerTick] { + const TICKS: &[PeerTick] = &[ + PeerTick::Raft, + PeerTick::RaftLogGc, + PeerTick::SplitRegionCheck, + PeerTick::PdHeartbeat, + PeerTick::CheckMerge, + PeerTick::CheckPeerStaleState, + PeerTick::EntryCacheEvict, + PeerTick::CheckLeaderLease, + PeerTick::ReactivateMemoryLock, + PeerTick::ReportBuckets, + ]; + TICKS + } +} + +#[derive(Debug, Clone, Copy)] +pub enum StoreTick { + // No CompactLock and CompactCheck as they should be implemented by peer itself. + PdStoreHeartbeat, + SnapGc, + ConsistencyCheck, + CleanupImportSst, +} + +impl StoreTick { + #[inline] + pub fn tag(self) -> RaftEventDurationType { + match self { + StoreTick::PdStoreHeartbeat => RaftEventDurationType::pd_store_heartbeat, + StoreTick::SnapGc => RaftEventDurationType::snap_gc, + StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, + StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, + } + } +} + +/// Raft command is the command that is expected to be proposed by the +/// leader of the target raft group. +#[derive(Debug)] +pub struct RaftCommand { + pub send_time: Instant, + pub request: RaftCmdRequest, + pub ch: ResponseChannel, +} + +impl RaftCommand { + #[inline] + pub fn new(request: RaftCmdRequest, ch: ResponseChannel) -> RaftCommand { + RaftCommand { + request, + ch, + send_time: Instant::now(), + } + } +} + +/// Message that can be sent to a peer. +pub enum PeerMsg { + /// Raft message is the message sent between raft nodes in the same + /// raft group. Messages need to be redirected to raftstore if target + /// peer doesn't exist. + RaftMessage(InspectedRaftMessage), + /// Raft command is the command that is expected to be proposed by the + /// leader of the target raft group. If it's failed to be sent, callback + /// usually needs to be called before dropping in case of resource leak. + RaftCommand(RaftCommand), + /// Tick is periodical task. If target peer doesn't exist there is a potential + /// that the raft node will not work anymore. + Tick(PeerTick), + /// Result of applying committed entries. The message can't be lost. + ApplyRes { + res: ApplyTaskRes, + }, + /// Start the FSM. + Start, + /// A message only used to notify a peer. + Noop, + Persisted { + peer_id: u64, + ready_number: u64, + }, +} + +impl fmt::Debug for PeerMsg { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PeerMsg::RaftMessage(_) => write!(fmt, "Raft Message"), + PeerMsg::RaftCommand(_) => write!(fmt, "Raft Command"), + PeerMsg::Tick(tick) => write! { + fmt, + "{:?}", + tick + }, + PeerMsg::ApplyRes { res } => write!(fmt, "ApplyRes {:?}", res), + PeerMsg::Start => write!(fmt, "Startup"), + PeerMsg::Noop => write!(fmt, "Noop"), + PeerMsg::Persisted { + peer_id, + ready_number, + } => write!( + fmt, + "Persisted peer_id {}, ready_number {}", + peer_id, ready_number + ), + } + } +} + +pub enum StoreMsg { + RaftMessage(InspectedRaftMessage), + Tick(StoreTick), + Start { store: metapb::Store }, +} + +impl fmt::Debug for StoreMsg { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + StoreMsg::RaftMessage(_) => write!(fmt, "Raft Message"), + StoreMsg::Tick(tick) => write!(fmt, "StoreTick {:?}", tick), + StoreMsg::Start { ref store } => write!(fmt, "Start store {:?}", store), + } + } +} diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs new file mode 100644 index 00000000000..fd27349ef43 --- /dev/null +++ b/components/raftstore-v2/src/router/mod.rs @@ -0,0 +1,5 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod message; + +pub use message::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 374df821b9b..73e1a6ecb50 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -109,7 +109,8 @@ use crate::{ const SHRINK_CACHE_CAPACITY: usize = 64; const MIN_BCAST_WAKE_UP_INTERVAL: u64 = 1_000; // 1s const REGION_READ_PROGRESS_CAP: usize = 128; -const MAX_COMMITTED_SIZE_PER_READY: u64 = 16 * 1024 * 1024; +#[doc(hidden)] +pub const MAX_COMMITTED_SIZE_PER_READY: u64 = 16 * 1024 * 1024; /// The returned states of the peer after checking whether it is stale #[derive(Debug, PartialEq, Eq)] From cead3f5685f58d6dbf3db5a1a47493f5c8faa19e Mon Sep 17 00:00:00 2001 From: Ziheng Gan Date: Mon, 20 Jun 2022 12:48:36 +0800 Subject: [PATCH 0031/1149] *: fix thread name truncating issue (#12442) ref tikv/tikv#5593, close tikv/tikv#12451 Based on #5593, add a global hashmap recording relationship between thread id and thread name; add some wrappers to spawn threads and update the hashmap. It's necessary to use `after_start_wrapper` and `before_stop_wrapper` together. Otherwise it may cause reporting a wrong thread name if a thread inserts its name to hashmap and doesn't remove it, while another thread reuses the same tid and doesn't update the hashmap. Signed-off-by: GanZiheng Co-authored-by: Ti Chi Robot --- clippy.toml | 9 + cmd/tikv-ctl/src/main.rs | 4 +- components/backup-stream/src/endpoint.rs | 5 +- components/backup/src/utils.rs | 6 +- components/batch-system/src/batch.rs | 7 +- components/cdc/src/endpoint.rs | 5 + components/cdc/src/initializer.rs | 7 +- components/encryption/src/manager/mod.rs | 4 +- components/encryption/src/master_key/kms.rs | 3 + components/file_system/src/io_stats/mod.rs | 6 +- components/pd_client/src/tso.rs | 4 +- .../raftstore/src/store/async_io/write.rs | 12 +- components/raftstore/src/store/worker/pd.rs | 3 +- .../src/store/worker/refresh_config.rs | 6 +- components/resolved_ts/src/advance.rs | 6 +- components/resolved_ts/src/scanner.rs | 4 +- components/server/src/server.rs | 9 +- components/test_raftstore/src/server.rs | 3 + components/test_util/src/lib.rs | 3 +- components/tikv_util/src/lib.rs | 4 +- .../tikv_util/src/metrics/threads_linux.rs | 16 +- components/tikv_util/src/sys/thread.rs | 170 +++++++++++++++++- components/tikv_util/src/time.rs | 3 +- components/tikv_util/src/timer.rs | 9 +- components/tikv_util/src/worker/future.rs | 3 +- components/tikv_util/src/yatp_pool/mod.rs | 4 +- scripts/clippy | 1 + src/import/sst_service.rs | 5 +- src/server/debug.rs | 7 +- src/server/gc_worker/gc_manager.rs | 11 +- src/server/load_statistics/linux.rs | 4 +- src/server/reset_to_version.rs | 3 +- src/server/server.rs | 5 + src/server/service/kv.rs | 5 +- src/server/snap.rs | 5 +- src/server/status_server/mod.rs | 5 +- src/storage/txn/flow_controller.rs | 7 +- 37 files changed, 313 insertions(+), 60 deletions(-) create mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 00000000000..2a4bb3e82b2 --- /dev/null +++ b/clippy.toml @@ -0,0 +1,9 @@ +disallowed-methods = [ + { path = "std::thread::Builder::spawn", reason = "Wrapper function `::spawn_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, + + { path = "tokio::runtime::builder::Builder::on_thread_start", reason = "Wrapper function `::after_start_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, + { path = "tokio::runtime::builder::Builder::on_thread_stop", reason = "Wrapper function `::before_stop_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, + + { path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start", reason = "Wrapper function `::after_start_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, + { path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop", reason = "Wrapper function `::before_stop_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, +] \ No newline at end of file diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 8ada0c7a426..3ad066df491 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -45,7 +45,7 @@ use regex::Regex; use security::{SecurityConfig, SecurityManager}; use structopt::{clap::ErrorKind, StructOpt}; use tikv::{config::TiKvConfig, server::debug::BottommostLevelCompaction}; -use tikv_util::{escape, run_and_wait_child_process, unescape}; +use tikv_util::{escape, run_and_wait_child_process, sys::thread::StdThreadBuildWrapper, unescape}; use txn_types::Key; use crate::{cmd::*, executor::*, util::*}; @@ -604,7 +604,7 @@ fn compact_whole_cluster( let cfs: Vec = cfs.iter().map(|cf| cf.to_string()).collect(); let h = thread::Builder::new() .name(format!("compact-{}", addr)) - .spawn(move || { + .spawn_wrapper(move || { tikv_alloc::add_thread_memory_accessor(); let debug_executor = new_debug_executor(&cfg, None, false, Some(&addr), mgr); for cf in cfs { diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 470ee53bb87..1c1efdcb546 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -29,6 +29,7 @@ use tikv_util::{ box_err, config::ReadableDuration, debug, defer, info, + sys::thread::ThreadBuildWrapper, time::Instant, warn, worker::{Runnable, Scheduler}, @@ -1016,10 +1017,10 @@ fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult TokioResu .thread_name(thread_name) .enable_io() .enable_time() - .on_thread_start(|| { + .after_start_wrapper(|| { tikv_alloc::add_thread_memory_accessor(); file_system::set_io_type(IOType::Export); }) - .on_thread_stop(|| { + .before_stop_wrapper(|| { tikv_alloc::remove_thread_memory_accessor(); }) .worker_threads(thread_count) diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index 3f8d433aefd..108058ee5f2 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -17,7 +17,10 @@ use std::{ use crossbeam::channel::{self, SendError}; use fail::fail_point; use file_system::{set_io_type, IOType}; -use tikv_util::{debug, error, info, mpsc, safe_panic, thd_name, time::Instant, warn}; +use tikv_util::{ + debug, error, info, mpsc, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, + time::Instant, warn, +}; use crate::{ config::Config, @@ -581,7 +584,7 @@ where let props = tikv_util::thread_group::current_properties(); let t = thread::Builder::new() .name(name) - .spawn(move || { + .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); set_io_type(IOType::ForegroundWrite); poller.poll(); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 9b1b663b207..7ca640ac8b3 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -41,6 +41,7 @@ use security::SecurityManager; use tikv::{config::CdcConfig, storage::Statistics}; use tikv_util::{ debug, error, impl_display_as_debug, info, + sys::thread::ThreadBuildWrapper, time::Limiter, timer::SteadyTimer, warn, @@ -373,12 +374,16 @@ impl, E: KvEngine> Endpoint { let workers = Builder::new_multi_thread() .thread_name("cdcwkr") .worker_threads(config.incremental_scan_threads) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(); let tso_worker = Builder::new_multi_thread() .thread_name("tso") .worker_threads(config.tso_worker_threads) .enable_time() + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 6b80a8c21a0..a5dcf094acf 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -564,7 +564,10 @@ mod tests { }, TestEngineBuilder, }; - use tikv_util::worker::{LazyWorker, Runnable}; + use tikv_util::{ + sys::thread::ThreadBuildWrapper, + worker::{LazyWorker, Runnable}, + }; use tokio::runtime::{Builder, Runtime}; use super::*; @@ -608,6 +611,8 @@ mod tests { let pool = Builder::new_multi_thread() .thread_name("test-initializer-worker") .worker_threads(4) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(); let downstream_state = Arc::new(AtomicCell::new(DownstreamState::Initializing)); diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 0535cae16f1..bc4b97de7a2 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -17,7 +17,7 @@ use fail::fail_point; use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; use protobuf::Message; -use tikv_util::{box_err, debug, error, info, thd_name, warn}; +use tikv_util::{box_err, debug, error, info, sys::thread::StdThreadBuildWrapper, thd_name, warn}; use crate::{ config::EncryptionConfig, @@ -557,7 +557,7 @@ impl DataKeyManager { let (rotate_terminal, rx) = channel::bounded(1); let background_worker = std::thread::Builder::new() .name(thd_name!("enc:key")) - .spawn(move || { + .spawn_wrapper(move || { run_background_rotate_work(dict_clone, method, &*master_key, rx); })?; diff --git a/components/encryption/src/master_key/kms.rs b/components/encryption/src/master_key/kms.rs index 601c982a961..da1b6d80e0a 100644 --- a/components/encryption/src/master_key/kms.rs +++ b/components/encryption/src/master_key/kms.rs @@ -8,6 +8,7 @@ use kvproto::encryptionpb::EncryptedContent; use tikv_util::{ box_err, error, stream::{retry, with_timeout}, + sys::thread::ThreadBuildWrapper, }; use tokio::runtime::{Builder, Runtime}; @@ -81,6 +82,8 @@ impl KmsBackend { Builder::new_current_thread() .thread_name("kms-runtime") .enable_all() + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build()?, ); diff --git a/components/file_system/src/io_stats/mod.rs b/components/file_system/src/io_stats/mod.rs index f0e644ad4a4..d9c7ae9d519 100644 --- a/components/file_system/src/io_stats/mod.rs +++ b/components/file_system/src/io_stats/mod.rs @@ -45,6 +45,8 @@ pub use proc::*; #[cfg(test)] mod tests { + use tikv_util::sys::thread::StdThreadBuildWrapper; + use super::*; use crate::IOType; @@ -54,7 +56,7 @@ mod tests { let _ths = (0..8) .map(|_| { let tx_clone = tx.clone(); - std::thread::Builder::new().spawn(move || { + std::thread::Builder::new().spawn_wrapper(move || { set_io_type(IOType::ForegroundWrite); tx_clone.send(()).unwrap(); }) @@ -72,7 +74,7 @@ mod tests { let _ths = (0..8) .map(|_| { let tx_clone = tx.clone(); - std::thread::Builder::new().spawn(move || { + std::thread::Builder::new().spawn_wrapper(move || { set_io_type(IOType::ForegroundWrite); tx_clone.send(()).unwrap(); }) diff --git a/components/pd_client/src/tso.rs b/components/pd_client/src/tso.rs index ff951a3c77c..6c99e87e4e7 100644 --- a/components/pd_client/src/tso.rs +++ b/components/pd_client/src/tso.rs @@ -21,7 +21,7 @@ use futures::{ }; use grpcio::{CallOption, WriteFlags}; use kvproto::pdpb::{PdClient, TsoRequest, TsoResponse}; -use tikv_util::{box_err, info}; +use tikv_util::{box_err, info, sys::thread::StdThreadBuildWrapper}; use tokio::sync::{mpsc, oneshot, watch}; use txn_types::TimeStamp; @@ -61,7 +61,7 @@ impl TimestampOracle { // Start a background thread to handle TSO requests and responses thread::Builder::new() .name("tso-worker".into()) - .spawn(move || { + .spawn_wrapper(move || { block_on(run_tso( cluster_id, rpc_sender.sink_err_into(), diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 373b64134d3..f81160d689d 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -27,7 +27,9 @@ use raft::eraftpb::Entry; use tikv_util::{ box_err, config::{Tracker, VersionTrack}, - debug, info, slow_log, thd_name, + debug, info, slow_log, + sys::thread::StdThreadBuildWrapper, + thd_name, time::{duration_to_sec, Instant}, warn, }; @@ -692,9 +694,11 @@ where cfg, ); info!("starting store writer {}", i); - let t = thread::Builder::new().name(thd_name!(tag)).spawn(move || { - worker.run(); - })?; + let t = thread::Builder::new() + .name(thd_name!(tag)) + .spawn_wrapper(move || { + worker.run(); + })?; self.writers.push(tx); self.handlers.push(t); } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 44954ba5e01..648e8e9344e 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -39,6 +39,7 @@ use resource_metering::{Collector, CollectorGuard, CollectorRegHandle, RawRecord use tikv_util::{ box_err, debug, error, info, metrics::ThreadInfoStatistics, + sys::thread::StdThreadBuildWrapper, thd_name, time::{Instant as TiInstant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, @@ -537,7 +538,7 @@ where } let h = Builder::new() .name(thd_name!("stats-monitor")) - .spawn(move || { + .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); let mut thread_stats = ThreadInfoStatistics::new(); diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index 4ad92d5db68..d3681654975 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -7,7 +7,9 @@ use std::{ use batch_system::{BatchRouter, Fsm, FsmTypes, HandlerBuilder, Poller, PoolState, Priority}; use file_system::{set_io_type, IOType}; -use tikv_util::{debug, error, info, safe_panic, thd_name, worker::Runnable}; +use tikv_util::{ + debug, error, info, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable, +}; use crate::store::fsm::{ apply::{ApplyFsm, ControlFsm}, @@ -70,7 +72,7 @@ where name_prefix, i + self.state.id_base, ))) - .spawn(move || { + .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); set_io_type(IOType::ForegroundWrite); poller.poll(); diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index c438c4c53fa..ef683724429 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -24,7 +24,9 @@ use pd_client::PdClient; use protobuf::Message; use raftstore::store::{fsm::StoreMeta, util::RegionReadProgressRegistry}; use security::SecurityManager; -use tikv_util::{info, time::Instant, timer::SteadyTimer, worker::Scheduler}; +use tikv_util::{ + info, sys::thread::ThreadBuildWrapper, time::Instant, timer::SteadyTimer, worker::Scheduler, +}; use tokio::{ runtime::{Builder, Runtime}, sync::Mutex, @@ -65,6 +67,8 @@ impl AdvanceTsWorker { .thread_name("advance-ts") .worker_threads(1) .enable_time() + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(); Self { diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index c52bf3bf166..835de79c161 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -19,7 +19,7 @@ use tikv::storage::{ mvcc::{DeltaScanner, MvccReader, ScannerBuilder}, txn::{TxnEntry, TxnEntryScanner}, }; -use tikv_util::{time::Instant, timer::GLOBAL_TIMER_HANDLE}; +use tikv_util::{sys::thread::ThreadBuildWrapper, time::Instant, timer::GLOBAL_TIMER_HANDLE}; use tokio::runtime::{Builder, Runtime}; use txn_types::{Key, Lock, LockType, TimeStamp}; @@ -74,6 +74,8 @@ impl, E: KvEngine> ScannerPool { Builder::new_multi_thread() .thread_name("inc-scan") .worker_threads(count) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(), ); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index f1fd2167f9d..6bf1de8e7a7 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -113,7 +113,10 @@ use tikv_util::{ }; use tokio::runtime::Builder; -use crate::{memory::*, raft_engine_switch::*, setup::*, signal_handler}; +use crate::{ + memory::*, raft_engine_switch::*, setup::*, signal_handler, + tikv_util::sys::thread::ThreadBuildWrapper, +}; #[inline] fn run_impl(config: TiKvConfig) { @@ -622,11 +625,11 @@ impl TiKvServer { Builder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) - .on_thread_start(move || { + .after_start_wrapper(move || { tikv_alloc::add_thread_memory_accessor(); tikv_util::thread_group::set_properties(props.clone()); }) - .on_thread_stop(tikv_alloc::remove_thread_memory_accessor) + .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) .build() .unwrap(), ); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index d156ab77adb..981843ddfc6 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -66,6 +66,7 @@ use tikv::{ use tikv_util::{ config::VersionTrack, quota_limiter::QuotaLimiter, + sys::thread::ThreadBuildWrapper, time::ThreadReadId, worker::{Builder as WorkerBuilder, LazyWorker}, HandyRwLock, @@ -448,6 +449,8 @@ impl ServerCluster { TokioBuilder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(), ); diff --git a/components/test_util/src/lib.rs b/components/test_util/src/lib.rs index 9dca2ee2111..dc053bd6d20 100644 --- a/components/test_util/src/lib.rs +++ b/components/test_util/src/lib.rs @@ -20,6 +20,7 @@ use std::{ }; use rand::Rng; +use tikv_util::sys::thread::StdThreadBuildWrapper; pub use crate::{ encryption::*, @@ -36,7 +37,7 @@ pub fn setup_for_ci() { // of time to avoid causing timeout. thread::Builder::new() .name(tikv_util::thd_name!("backtrace-loader")) - .spawn(::backtrace::Backtrace::new) + .spawn_wrapper(::backtrace::Backtrace::new) .unwrap(); if env::var("CI").is_ok() { diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index b3cc43c550a..8445a0a97aa 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -32,6 +32,8 @@ use nix::{ }; use rand::rngs::ThreadRng; +use crate::sys::thread::StdThreadBuildWrapper; + #[macro_use] pub mod log; pub mod buffer_vec; @@ -468,7 +470,7 @@ pub fn set_panic_hook(panic_abort: bool, data_dir: &str) { // Caching is slow, spawn it in another thread to speed up. thread::Builder::new() .name(thd_name!("backtrace-loader")) - .spawn(::backtrace::Backtrace::new) + .spawn_wrapper(::backtrace::Backtrace::new) .unwrap(); let data_dir = data_dir.to_string(); diff --git a/components/tikv_util/src/metrics/threads_linux.rs b/components/tikv_util/src/metrics/threads_linux.rs index 8ee9aed05f5..4eae41b0f06 100644 --- a/components/tikv_util/src/metrics/threads_linux.rs +++ b/components/tikv_util/src/metrics/threads_linux.rs @@ -15,7 +15,7 @@ use prometheus::{ }; use crate::{ - sys::thread::{self, Pid}, + sys::thread::{self, Pid, THREAD_NAME_HASHMAP}, time::Instant, }; @@ -150,7 +150,12 @@ impl Collector for ThreadsCollector { // Threads CPU time. let total = thread::linux::cpu_total(&stat); // sanitize thread name before push metrics. - let name = sanitize_thread_name(tid, &stat.command); + let name = if let Some(thread_name) = THREAD_NAME_HASHMAP.lock().unwrap().get(&tid) + { + sanitize_thread_name(tid, thread_name) + } else { + sanitize_thread_name(tid, &stat.command) + }; let cpu_total = metrics .cpu_totals .get_metric_with_label_values(&[&name, &format!("{}", tid)]) @@ -471,6 +476,7 @@ mod tests { use std::{env::temp_dir, fs, io::Write, sync, time::Duration}; use super::*; + use crate::sys::thread::StdThreadBuildWrapper; #[test] fn test_thread_stat_io() { @@ -479,7 +485,7 @@ mod tests { let (tx1, rx1) = sync::mpsc::channel(); let h = std::thread::Builder::new() .name(name.to_owned()) - .spawn(move || { + .spawn_wrapper(move || { // Make `io::write_bytes` > 0 let mut tmp = temp_dir(); tmp.push(name); @@ -528,7 +534,7 @@ mod tests { let (tx1, rx1) = sync::mpsc::channel(); std::thread::Builder::new() .name(str1.to_owned()) - .spawn(move || { + .spawn_wrapper(move || { tx1.send(()).unwrap(); // Make `io::write_bytes` > 0 @@ -614,7 +620,7 @@ mod tests { let (tx1, rx1) = sync::mpsc::channel(); std::thread::Builder::new() .name(name) - .spawn(move || { + .spawn_wrapper(move || { tx1.send(()).unwrap(); let start = Instant::now(); diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index cc38cc8228f..445fc93974e 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -4,7 +4,9 @@ //! Only Linux platform is implemented correctly, for other platform, it only guarantees //! successful compilation. -use std::io; +use std::{io, io::Result, sync::Mutex, thread}; + +use collections::HashMap; /// A cross-platform CPU statistics data structure. #[derive(Debug, Copy, Clone, Default, PartialEq)] @@ -361,14 +363,121 @@ pub fn current_thread_stat() -> io::Result { thread_stat(process_id(), thread_id()) } +pub trait StdThreadBuildWrapper { + fn spawn_wrapper(self, f: F) -> io::Result> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static; +} + +pub trait ThreadBuildWrapper { + fn after_start_wrapper(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static; + + fn before_stop_wrapper(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static; +} + +lazy_static::lazy_static! { + pub static ref THREAD_NAME_HASHMAP: Mutex> = Mutex::new(HashMap::default()); +} + +pub(crate) fn add_thread_name_to_map() { + if let Some(name) = std::thread::current().name() { + let tid = thread_id(); + THREAD_NAME_HASHMAP + .lock() + .unwrap() + .insert(tid, name.to_string()); + debug!("tid {} thread name is {}", tid, name); + } +} + +pub(crate) fn remove_thread_name_from_map() { + let tid = thread_id(); + THREAD_NAME_HASHMAP.lock().unwrap().remove(&tid); +} + +impl StdThreadBuildWrapper for std::thread::Builder { + fn spawn_wrapper(self, f: F) -> Result> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + #[allow(clippy::disallowed_methods)] + self.spawn(|| { + add_thread_name_to_map(); + let res = f(); + remove_thread_name_from_map(); + res + }) + } +} + +impl ThreadBuildWrapper for tokio::runtime::Builder { + fn after_start_wrapper(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + #[allow(clippy::disallowed_methods)] + self.on_thread_start(move || { + add_thread_name_to_map(); + f(); + }) + } + + fn before_stop_wrapper(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + #[allow(clippy::disallowed_methods)] + self.on_thread_stop(move || { + f(); + remove_thread_name_from_map(); + }) + } +} + +impl ThreadBuildWrapper for futures::executor::ThreadPoolBuilder { + fn after_start_wrapper(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + #[allow(clippy::disallowed_methods)] + self.after_start(move |_| { + add_thread_name_to_map(); + f(); + }) + } + + fn before_stop_wrapper(&mut self, f: F) -> &mut Self + where + F: Fn() + Send + Sync + 'static, + { + #[allow(clippy::disallowed_methods)] + self.before_stop(move |_| { + f(); + remove_thread_name_from_map(); + }) + } +} + #[cfg(test)] mod tests { use std::{ collections::HashSet, + sync, sync::{Arc, Condvar, Mutex}, }; + use futures::executor::block_on; + use super::*; + use crate::yatp_pool::{DefaultTicker, YatpPoolBuilder}; #[test] fn test_thread_id() { @@ -427,4 +536,63 @@ mod tests { assert!(!ids.contains(tid)); } } + + #[test] + fn test_thread_name_wrapper() { + let thread_name = "thread_for_test"; + + let (tx, rx) = sync::mpsc::sync_channel(10); + + let get_name = move || { + let tid = thread_id(); + if let Some(name) = THREAD_NAME_HASHMAP.lock().unwrap().get(&tid) { + tx.clone().send(name.to_string()).unwrap(); + } else { + panic!("thread not found"); + } + }; + + // test std thread builder + std::thread::Builder::new() + .name(thread_name.to_string()) + .spawn_wrapper(get_name.clone()) + .unwrap() + .join() + .unwrap(); + + let name = rx.recv().unwrap(); + assert_eq!(name, thread_name); + + // test Yatp + let get_name_fn = get_name.clone(); + block_on( + YatpPoolBuilder::new(DefaultTicker {}) + .name_prefix(thread_name) + .after_start(|| {}) + .before_stop(|| {}) + .build_future_pool() + .spawn_handle(async move { get_name_fn() }) + .unwrap(), + ) + .unwrap(); + + let name = rx.recv().unwrap(); + assert!(name.contains(thread_name)); + + // test tokio thread builder + let get_name_fn = get_name; + block_on( + tokio::runtime::Builder::new_multi_thread() + .thread_name(thread_name) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) + .build() + .unwrap() + .spawn(async move { get_name_fn() }), + ) + .unwrap(); + + let name = rx.recv().unwrap(); + assert_eq!(name, thread_name); + } } diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index c8f210db7a7..57e9e261444 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -148,7 +148,7 @@ impl Monitor { let (tx, rx) = mpsc::channel(); let h = Builder::new() .name(thd_name!("time-monitor")) - .spawn(move || { + .spawn_wrapper(move || { crate::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); while rx.try_recv().is_err() { @@ -205,6 +205,7 @@ use self::inner::monotonic_coarse_now; pub use self::inner::monotonic_now; /// Returns the monotonic raw time since some unspecified starting point. pub use self::inner::monotonic_raw_now; +use crate::sys::thread::StdThreadBuildWrapper; const NANOSECONDS_PER_SECOND: u64 = 1_000_000_000; const MILLISECOND_PER_SECOND: i64 = 1_000; diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index dc74dbb3b43..50cfa48f9aa 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -18,7 +18,10 @@ use tokio_timer::{ Delay, }; -use crate::time::{monotonic_raw_now, Instant}; +use crate::{ + sys::thread::StdThreadBuildWrapper, + time::{monotonic_raw_now, Instant}, +}; pub struct Timer { pending: BinaryHeap>>, @@ -98,7 +101,7 @@ fn start_global_timer() -> Handle { let props = crate::thread_group::current_properties(); Builder::new() .name(thd_name!("timer")) - .spawn(move || { + .spawn_wrapper(move || { crate::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); let mut timer = tokio_timer::Timer::default(); @@ -197,7 +200,7 @@ fn start_global_steady_timer() -> SteadyTimer { let clock_ = clock.clone(); Builder::new() .name(thd_name!("steady-timer")) - .spawn(move || { + .spawn_wrapper(move || { let c = Clock::new_with_now(clock_); let mut timer = tokio_timer::Timer::new_with_now(ParkThread::new(), c); tx.send(timer.handle()).unwrap(); diff --git a/components/tikv_util/src/worker/future.rs b/components/tikv_util/src/worker/future.rs index 83b4d95bc58..be7c05589cb 100644 --- a/components/tikv_util/src/worker/future.rs +++ b/components/tikv_util/src/worker/future.rs @@ -16,6 +16,7 @@ use prometheus::IntGauge; use tokio::task::LocalSet; use super::metrics::*; +use crate::sys::thread::StdThreadBuildWrapper; pub struct Stopped(pub T); @@ -156,7 +157,7 @@ impl Worker { let props = crate::thread_group::current_properties(); let h = Builder::new() .name(thd_name!(self.scheduler.name.as_ref())) - .spawn(move || { + .spawn_wrapper(move || { crate::thread_group::set_properties(props); poll(runner, rx) })?; diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 93cd46cc6ac..e2e57c9fbce 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -100,6 +100,7 @@ impl Runner for YatpPoolRunner { type TaskCell = TaskCell; fn start(&mut self, local: &mut Local) { + crate::sys::thread::add_thread_name_to_map(); if let Some(props) = self.props.take() { crate::thread_group::set_properties(Some(props)); } @@ -138,7 +139,8 @@ impl Runner for YatpPoolRunner { } self.ticker.on_tick(); self.inner.end(local); - tikv_alloc::remove_thread_memory_accessor() + tikv_alloc::remove_thread_memory_accessor(); + crate::sys::thread::remove_thread_name_from_map() } } diff --git a/scripts/clippy b/scripts/clippy index f0f46fccfa6..58bdafb817b 100755 --- a/scripts/clippy +++ b/scripts/clippy @@ -33,6 +33,7 @@ CLIPPY_LINTS=(-A clippy::module_inception \ -A clippy::enum_variant_names \ -W clippy::dbg_macro \ -W clippy::todo \ + -D clippy::disallowed-methods \ -D rust-2018-idioms) cargo clippy --workspace \ diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index ac892884e37..24e52a8057e 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -36,6 +36,7 @@ use sst_importer::{error_inc, metrics::*, sst_meta_to_path, Config, Error, Resul use tikv_util::{ config::ReadableSize, future::{create_stream_with_buffer, paired_future_callback}, + sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, }; use txn_types::{Key, WriteRef, WriteType}; @@ -83,12 +84,12 @@ where let threads = ThreadPoolBuilder::new() .pool_size(cfg.num_threads) .name_prefix("sst-importer") - .after_start(move |_| { + .after_start_wrapper(move || { tikv_util::thread_group::set_properties(props.clone()); tikv_alloc::add_thread_memory_accessor(); set_io_type(IOType::Import); }) - .before_stop(move |_| tikv_alloc::remove_thread_memory_accessor()) + .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) .create() .unwrap(); importer.start_switch_mode_check(&threads, engine.clone()); diff --git a/src/server/debug.rs b/src/server/debug.rs index f53f11eeec5..e5d6eba617f 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -35,7 +35,10 @@ use raftstore::{ }, }; use thiserror::Error; -use tikv_util::{config::ReadableSize, keybuilder::KeyBuilder, worker::Worker}; +use tikv_util::{ + config::ReadableSize, keybuilder::KeyBuilder, sys::thread::StdThreadBuildWrapper, + worker::Worker, +}; use txn_types::Key; pub use crate::storage::mvcc::MvccInfoIterator; @@ -441,7 +444,7 @@ impl Debugger { let props = tikv_util::thread_group::current_properties(); let thread = ThreadBuilder::new() .name(format!("mvcc-recover-thread-{}", thread_index)) - .spawn(move || { + .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); info!( diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index 186a4694167..b009c80b728 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -22,7 +22,7 @@ use super::{ gc_worker::{sync_gc, GcSafePointProvider, GcTask}, Result, }; -use crate::server::metrics::*; +use crate::{server::metrics::*, tikv_util::sys::thread::StdThreadBuildWrapper}; const POLL_SAFE_POINT_INTERVAL_SECS: u64 = 10; @@ -279,7 +279,7 @@ impl GcMan let props = tikv_util::thread_group::current_properties(); let res: Result<_> = ThreadBuilder::new() .name(thd_name!("gc-manager")) - .spawn(move || { + .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); self.run(); @@ -632,7 +632,10 @@ mod tests { coprocessor::{RegionInfo, Result as CopResult, SeekRegionCallback}, store::util::new_peer, }; - use tikv_util::worker::{Builder as WorkerBuilder, LazyWorker, Runnable}; + use tikv_util::{ + sys::thread::StdThreadBuildWrapper, + worker::{Builder as WorkerBuilder, LazyWorker, Runnable}, + }; use super::*; use crate::storage::Callback; @@ -821,7 +824,7 @@ mod tests { let (tx, rx) = channel(); ThreadBuilder::new() - .spawn(move || { + .spawn_wrapper(move || { let safe_point = gc_manager.wait_for_next_safe_point().unwrap(); tx.send(safe_point).unwrap(); }) diff --git a/src/server/load_statistics/linux.rs b/src/server/load_statistics/linux.rs index ff9d30a2997..f3a12593a51 100644 --- a/src/server/load_statistics/linux.rs +++ b/src/server/load_statistics/linux.rs @@ -115,6 +115,8 @@ fn calc_cpu_load(elapsed_millis: usize, start_usage: f64, end_usage: f64) -> usi mod tests { use std::{thread, time::Duration}; + use tikv_util::sys::thread::StdThreadBuildWrapper; + use super::*; #[test] @@ -124,7 +126,7 @@ mod tests { let l = loads.clone(); thread::Builder::new() .name(THREAD_NAME.to_string()) - .spawn(move || { + .spawn_wrapper(move || { let mut stats = ThreadLoadStatistics::new(2, THREAD_NAME, Arc::clone(&l)); let start = Instant::now(); loop { diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index dadb13f6692..7b99f48371d 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -11,6 +11,7 @@ use engine_traits::{ IterOptions, Iterable, Iterator, Mutable, SeekKey, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_LOCK, CF_WRITE, }; +use tikv_util::sys::thread::StdThreadBuildWrapper; use txn_types::{Key, TimeStamp, Write, WriteRef}; use super::Result; @@ -218,7 +219,7 @@ impl ResetToVersionManager { } *self.worker_handle.borrow_mut() = Some(std::thread::Builder::new() .name("reset_to_version".to_string()) - .spawn(move || { + .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); diff --git a/src/server/server.rs b/src/server/server.rs index 9a648c096c3..196a6584be7 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -44,6 +44,7 @@ use crate::{ read_pool::ReadPool, server::{gc_worker::GcWorker, Proxy}, storage::{lock_manager::LockManager, Engine, Storage}, + tikv_util::sys::thread::ThreadBuildWrapper, }; const LOAD_STATISTICS_SLOTS: usize = 4; @@ -109,6 +110,8 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En RuntimeBuilder::new_multi_thread() .thread_name(STATS_THREAD_PREFIX) .worker_threads(cfg.value().stats_concurrency) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(), ) @@ -526,6 +529,8 @@ mod tests { TokioBuilder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) .build() .unwrap(), ); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 988e0624686..64ce2abb0e6 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -2197,6 +2197,7 @@ mod tests { use std::thread; use futures::{channel::oneshot, executor::block_on}; + use tikv_util::sys::thread::StdThreadBuildWrapper; use super::*; @@ -2207,7 +2208,7 @@ mod tests { thread::Builder::new() .name("source".to_owned()) - .spawn(move || { + .spawn_wrapper(move || { block_on(signal_rx).unwrap(); tx.send(100).unwrap(); }) @@ -2230,7 +2231,7 @@ mod tests { let (signal_tx, signal_rx) = oneshot::channel(); thread::Builder::new() .name("source".to_owned()) - .spawn(move || { + .spawn_wrapper(move || { tx.send(100).unwrap(); signal_tx.send(()).unwrap(); }) diff --git a/src/server/snap.rs b/src/server/snap.rs index d367fa65047..9b86b4778b4 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -43,6 +43,7 @@ use tikv_util::{ use tokio::runtime::{Builder as RuntimeBuilder, Runtime}; use super::{metrics::*, Config, Error, Result}; +use crate::tikv_util::sys::thread::ThreadBuildWrapper; pub type Callback = Box) + Send>; @@ -354,8 +355,8 @@ where pool: RuntimeBuilder::new_multi_thread() .thread_name(thd_name!("snap-sender")) .worker_threads(DEFAULT_POOL_SIZE) - .on_thread_start(tikv_alloc::add_thread_memory_accessor) - .on_thread_stop(tikv_alloc::remove_thread_memory_accessor) + .after_start_wrapper(tikv_alloc::add_thread_memory_accessor) + .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) .build() .unwrap(), raft_router: r, diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 792d83f13de..1bb066d1a2c 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -64,6 +64,7 @@ use self::profile::{ use crate::{ config::{log_level_serde, ConfigController}, server::Result, + tikv_util::sys::thread::ThreadBuildWrapper, }; static TIMER_CANCELED: &str = "tokio timer canceled"; @@ -110,8 +111,8 @@ where .enable_all() .worker_threads(status_thread_pool_size) .thread_name("status-server") - .on_thread_start(|| debug!("Status server started")) - .on_thread_stop(|| debug!("stopping status server")) + .after_start_wrapper(|| debug!("Status server started")) + .before_stop_wrapper(|| debug!("stopping status server")) .build()?; let (tx, rx) = oneshot::channel::<()>(); diff --git a/src/storage/txn/flow_controller.rs b/src/storage/txn/flow_controller.rs index 378b4fd2aad..e29472594c6 100644 --- a/src/storage/txn/flow_controller.rs +++ b/src/storage/txn/flow_controller.rs @@ -20,7 +20,10 @@ use engine_rocks::FlowInfo; use engine_traits::{CFNamesExt, FlowControlFactorsExt}; use num_traits::cast::{AsPrimitive, FromPrimitive}; use rand::Rng; -use tikv_util::time::{Instant, Limiter}; +use tikv_util::{ + sys::thread::StdThreadBuildWrapper, + time::{Instant, Limiter}, +}; use crate::storage::{config::FlowControlConfig, metrics::*}; @@ -494,7 +497,7 @@ impl FlowChecker { fn start(self, rx: Receiver, flow_info_receiver: Receiver) -> JoinHandle<()> { Builder::new() .name(thd_name!("flow-checker")) - .spawn(move || { + .spawn_wrapper(move || { tikv_alloc::add_thread_memory_accessor(); let mut checker = self; let mut deadline = std::time::Instant::now(); From 6bc24929670b0e53cdc365b37c0c69afe63f19e9 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Mon, 20 Jun 2022 16:30:37 +0800 Subject: [PATCH 0032/1149] raftstore: record metrics of proposal send wait duration (#12837) ref tikv/tikv#12362 Our raft implementation has its flow control mechanism to limit the inflight message number. But we're not able to know whether and when we are throttled. Then, it's hard for us to know whether we should adjust the max-inflight-msgs config. This commit tries to improve this case. It's complex to add hooks to raft-rs to know how long a message is throttled, but we can do it in the raftstore. We record the propose time, and consume it and record it in the histogram when the messages is send through the transport. If flow control takes effect, the ready will be smaller than all under-replicated logs. So, we can know if flow control takes effect. Signed-off-by: Yilin Chen --- .../raftstore/src/store/local_metrics.rs | 3 + components/raftstore/src/store/metrics.rs | 6 + components/raftstore/src/store/peer.rs | 29 +++ metrics/grafana/tikv_details.json | 184 +++++++++++++++++- 4 files changed, 220 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index d6e6dc265bc..aa23f22bc2c 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -415,6 +415,7 @@ pub struct RaftMetrics { pub wf_persist_log: LocalHistogram, pub wf_commit_log: LocalHistogram, pub wf_commit_not_persist_log: LocalHistogram, + pub proposal_send_wait: LocalHistogram, pub raft_log_gc_skipped: RaftLogGcSkippedMetrics, } @@ -439,6 +440,7 @@ impl RaftMetrics { wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(), wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), + proposal_send_wait: PROPOSAL_SEND_WAIT_DURATION_HISTOGRAM.local(), raft_log_gc_skipped: RaftLogGcSkippedMetrics::default(), } } @@ -461,6 +463,7 @@ impl RaftMetrics { self.wf_persist_log.flush(); self.wf_commit_log.flush(); self.wf_commit_not_persist_log.flush(); + self.proposal_send_wait.flush(); } let mut missing = self.leader_missing.lock().unwrap(); LEADER_MISSING.set(missing.len() as i64); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 3a4426fcbcb..e3d3a23e389 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -360,6 +360,12 @@ lazy_static! { "Bucketed histogram of proposals' commit but not persist duration", exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); + pub static ref PROPOSAL_SEND_WAIT_DURATION_HISTOGRAM: Histogram = + register_histogram!( + "tikv_raftstore_proposal_send_wait_duration_seconds", + "Bucketed histogram of proposals' send wait duration", + exponential_buckets(1e-6, 2.0, 26).unwrap() + ).unwrap(); pub static ref PEER_PROPOSAL_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 73e1a6ecb50..ad63e3b1b34 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -725,6 +725,9 @@ where #[getset(get = "pub")] leader_lease: Lease, pending_reads: ReadIndexQueue, + /// Record the propose instants to calculate the wait duration before + /// the proposal is sent through the Raft client. + pending_propose_instants: VecDeque<(u64, Instant)>, /// If it fails to send messages to leader. pub leader_unreachable: bool, @@ -925,6 +928,7 @@ where raft_max_inflight_msgs: cfg.raft_max_inflight_msgs, proposals: ProposalQueue::new(tag.clone()), pending_reads: Default::default(), + pending_propose_instants: Default::default(), peer_cache: RefCell::new(HashMap::default()), peer_heartbeats: HashMap::default(), peers_start_pending_time: vec![], @@ -1571,6 +1575,7 @@ where ctx: &mut PollContext, msgs: Vec, ) { + let now = Instant::now(); for msg in msgs { let msg_type = msg.get_message().get_msg_type(); if msg_type == MessageType::MsgTimeoutNow && self.is_leader() { @@ -1596,6 +1601,26 @@ where "disk_usage" => ?msg.get_disk_usage(), ); + for index in msg + .get_message() + .get_entries() + .iter() + .map(|e| e.get_index()) + { + while let Some((propose_idx, instant)) = self.pending_propose_instants.front() { + if index == *propose_idx { + ctx.raft_metrics + .proposal_send_wait + .observe(now.saturating_duration_since(*instant).as_secs_f64()); + } + if index >= *propose_idx { + self.pending_propose_instants.pop_front(); + } else { + break; + } + } + } + if let Err(e) = ctx.trans.send(msg) { // We use metrics to observe failure on production. debug!( @@ -2048,6 +2073,7 @@ where self.mut_store().cancel_generating_snap(None); self.clear_disk_full_peers(ctx); self.clear_in_memory_pessimistic_locks(); + self.pending_propose_instants.clear(); } _ => {} } @@ -4270,6 +4296,9 @@ where } } + self.pending_propose_instants + .push_back((propose_index, Instant::now())); + Ok(Either::Left(propose_index)) } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 15dfa8c684b..46d72775cb6 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -13588,6 +13588,186 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763572784, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tikv_raftstore_proposal_send_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Proposal send wait duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 23763572783, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_proposal_send_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% Proposal send wait duration per server", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:106", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:107", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, @@ -13605,7 +13785,7 @@ "h": 7, "w": 12, "x": 0, - "y": 46 + "y": 54 }, "hiddenSeries": false, "id": 1975, @@ -13708,7 +13888,7 @@ "h": 7, "w": 12, "x": 12, - "y": 46 + "y": 54 }, "hiddenSeries": false, "id": 1976, From 90a1aa11e636b4a6735b155670f2e150f5dedfc4 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 20 Jun 2022 10:08:36 -0700 Subject: [PATCH 0033/1149] raftstore: skip flushing raft logs for uninitialized peer (#12847) close tikv/tikv#12825 Uninitialized peer has not received any logs, so it doesn't need to clean up any logs. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/peer.rs | 8 +++ .../raftstore/src/store/worker/raftlog_gc.rs | 1 + tests/failpoints/cases/test_split_region.rs | 70 +++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index c61e3c3ba55..6abfc24c486 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3126,6 +3126,14 @@ where return Some(DelayReason::UnPersistedReady); } + let is_initialized = self.fsm.peer.is_initialized(); + if !is_initialized { + // If the peer is uninitialized, then it can't receive any logs from leader. So + // no need to gc. If there was a peer with same region id on the store, and it had + // logs written, then it must be initialized, hence its log should be gc either + // before it's destroyed or during node restarts. + self.fsm.logs_gc_flushed = true; + } if !self.fsm.logs_gc_flushed { let start_index = self.fsm.peer.last_compacted_idx; let mut end_index = start_index; diff --git a/components/raftstore/src/store/worker/raftlog_gc.rs b/components/raftstore/src/store/worker/raftlog_gc.rs index bf892743300..71584a5e678 100644 --- a/components/raftstore/src/store/worker/raftlog_gc.rs +++ b/components/raftstore/src/store/worker/raftlog_gc.rs @@ -107,6 +107,7 @@ impl Runner { if self.tasks.is_empty() { return; } + fail::fail_point!("worker_gc_raft_log_flush"); // Sync wal of kv_db to make sure the data before apply_index has been persisted to disk. let start = Instant::now(); self.engines.kv.sync().unwrap_or_else(|e| { diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 68fed70ca25..8b42959fc01 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -984,3 +984,73 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { let resp = resp.join().unwrap(); assert!(resp.get_region_error().has_epoch_not_match(), "{:?}", resp); } + +/// Logs are gced asynchronously. If an uninitialized peer is destroyed before being replaced by +/// split, then the asynchronous log gc response may arrive after the peer is replaced, hence +/// it will lead to incorrect memory state. Actually, there is nothing to be gc for uninitialized +/// peer. The case is to guarantee such incorrect state will not happen. +#[test] +fn test_split_replace_skip_log_gc() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(15); + cluster.cfg.raft_store.raft_log_gc_threshold = 15; + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + let pd_client = cluster.pd_client.clone(); + + // Disable default max peer number check. + pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + pd_client.must_add_peer(r, new_peer(3, 3)); + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + let before_check_snapshot_1_2_fp = "before_check_snapshot_1_2"; + fail::cfg(before_check_snapshot_1_2_fp, "pause").unwrap(); + + // So the split peer on store 2 always uninitialized. + let filter = RegionPacketFilter::new(1000, 2).msg_type(MessageType::MsgSnapshot); + cluster.add_send_filter(CloneFilterFactory(filter)); + + pd_client.must_add_peer(r, new_peer(2, 2)); + let region = pd_client.get_region(b"k1").unwrap(); + // [-∞, k2), [k2, +∞) + // b a + cluster.must_split(®ion, b"k2"); + + cluster.must_put(b"k3", b"v3"); + + // Because a is not initialized, so b must be created using heartbeat on store 3. + + // Simulate raft log gc stall. + let gc_fp = "worker_gc_raft_log_flush"; + let destroy_fp = "destroy_peer_after_pending_move"; + + fail::cfg(gc_fp, "pause").unwrap(); + let (tx, rx) = crossbeam::channel::bounded(0); + fail::cfg_callback(destroy_fp, move || { + let _ = tx.send(()); + let _ = tx.send(()); + }) + .unwrap(); + + let left = pd_client.get_region(b"k1").unwrap(); + let left_peer_on_store_2 = find_peer(&left, 2).unwrap(); + pd_client.must_remove_peer(left.get_id(), left_peer_on_store_2.clone()); + // Wait till destroy is triggered. + rx.recv_timeout(Duration::from_secs(3)).unwrap(); + // Make it split. + fail::remove(before_check_snapshot_1_2_fp); + // Wait till split is finished. + must_get_equal(&cluster.get_engine(2), b"k3", b"v3"); + // Wait a little bit so the uninitialized peer is replaced. + thread::sleep(Duration::from_millis(10)); + // Resume destroy. + rx.recv_timeout(Duration::from_secs(3)).unwrap(); + // Resume gc. + fail::remove(gc_fp); + // Check store 3 is still working correctly. + cluster.must_put(b"k4", b"v4"); + must_get_equal(&cluster.get_engine(2), b"k4", b"v4"); +} From 53dc82927417ad5fdea10e0e5a24586a1bce61eb Mon Sep 17 00:00:00 2001 From: kevin-xianliu <105765349+kevin-xianliu@users.noreply.github.com> Date: Mon, 20 Jun 2022 15:16:36 -0700 Subject: [PATCH 0034/1149] grafana/dashboard: adjusted for better readability (#12792) close tikv/tikv#12007, ref tikv/tikv#12007 - add 99%, 95% and avg graph for aysnc snapshot & write in storage panel - adjust heatmap in coprocessor panel for better readability - change "Raft log speed" to "Raft propose speed" in Raft Propose panel Signed-off-by: kevin-xianliu Co-authored-by: Jay --- metrics/grafana/tikv_details.json | 255 ++++++++++++++++++++++++++++-- 1 file changed, 246 insertions(+), 9 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 46d72775cb6..b8204654185 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -13831,7 +13831,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Raft log speed", + "title": "Raft propose speed", "tooltip": { "shared": true, "sort": 0, @@ -17706,6 +17706,239 @@ "yBucketBound": "auto", "yBucketNumber": null, "yBucketSize": null + }, + { + "type": "graph", + "title": "Storage async snapshot duration", + "gridPos": { + "x": 0, + "y": 35, + "w": 12, + "h": 8 + }, + "id": 20000, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "legendFormat": "99%", + "interval": "", + "exemplar": true, + "refId": "A", + "queryType": "randomWalk", + "intervalFactor": 2 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "legendFormat": "95%", + "interval": "", + "exemplar": true, + "refId": "B", + "hide": false, + "intervalFactor": 2 + }, + { + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", + "legendFormat": "avg", + "interval": "", + "exemplar": true, + "refId": "C", + "hide": false, + "intervalFactor": 2 + } + ], + "options": { + "alertThreshold": true + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.10", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "s", + "$$hashKey": "object:295" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:296" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "fill": 1, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "show": true, + "values": false, + "min": false, + "max": false, + "current": false, + "total": false, + "avg": false + }, + "nullPointMode": "null", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 0 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "description": "The storage async snapshot duration", + "datasource": "${DS_TEST-CLUSTER}", + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null + }, + { + "type": "graph", + "title": "Storage async write duration", + "gridPos": { + "x": 12, + "y": 35, + "w": 12, + "h": 8 + }, + "id": 20001, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", + "legendFormat": "99%", + "interval": "", + "exemplar": true, + "refId": "A", + "intervalFactor": 1 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", + "legendFormat": "95%", + "interval": "", + "exemplar": true, + "refId": "B", + "hide": false, + "intervalFactor": 1 + }, + { + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", + "legendFormat": "avg", + "interval": "", + "exemplar": true, + "refId": "C", + "hide": false, + "intervalFactor": 1 + } + ], + "options": { + "alertThreshold": true + }, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "pluginVersion": "7.5.10", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "s", + "$$hashKey": "object:494" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short", + "$$hashKey": "object:495" + } + ], + "xaxis": { + "show": true, + "mode": "time", + "name": null, + "values": [], + "buckets": null + }, + "yaxis": { + "align": false, + "alignLevel": null + }, + "lines": true, + "fill": 2, + "linewidth": 1, + "dashLength": 10, + "spaceLength": 10, + "pointradius": 2, + "legend": { + "show": true, + "values": false, + "min": false, + "max": false, + "current": false, + "total": false, + "avg": false + }, + "nullPointMode": "null", + "tooltip": { + "value_type": "individual", + "shared": true, + "sort": 0 + }, + "aliasColors": {}, + "seriesOverrides": [], + "thresholds": [], + "timeRegions": [], + "description": "The storage async write duration", + "fillGradient": 0, + "dashes": false, + "hiddenSeries": false, + "points": false, + "bars": false, + "stack": false, + "percentage": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null } ], "repeat": null, @@ -23092,9 +23325,9 @@ "color": { "cardColor": "#5195ce", "colorScale": "linear", - "colorScheme": "interpolateBlues", + "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", @@ -23107,7 +23340,7 @@ "y": 22 }, "heatmap": {}, - "hideZeroBuckets": false, + "hideZeroBuckets": true, "highlightCards": true, "id": 3062, "legend": { @@ -23117,14 +23350,13 @@ "max": true, "min": false, "rightSide": true, - "show": true, + "show": false, "sort": "current", "sortDesc": true, "total": false, "values": true }, "links": [], - "reverseYBuckets": false, "targets": [ { "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", @@ -23138,15 +23370,13 @@ "title": "Request duration", "tooltip": { "show": true, - "showHistogram": true + "showHistogram": false }, "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", @@ -23157,6 +23387,13 @@ "splitFactor": null }, "yBucketBound": "upper", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "reverseYBuckets": false, + "xBucketNumber": null, + "xBucketSize": null, "yBucketNumber": null, "yBucketSize": null }, From 9f9333180fb7ab41095f9c579e3805d17bf431b4 Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Tue, 21 Jun 2022 15:34:37 +0800 Subject: [PATCH 0035/1149] bump master version to 6.2-alpha (#12858) close tikv/tikv#12859 Signed-off-by: Yujie Xia --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 82978c6cbf8..6a6cf62a6ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5955,7 +5955,7 @@ dependencies = [ [[package]] name = "tikv" -version = "6.1.0-alpha" +version = "6.2.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index e58963c694d..e0b8b195b0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "6.1.0-alpha" +version = "6.2.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 0ddac9965339edfcb71ad59373507b828f790b41 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 21 Jun 2022 00:58:37 -0700 Subject: [PATCH 0036/1149] *: check last sent snapshot for prepare merge (#12682) close tikv/tikv#12663 Guarantee min index of prepare merge larger than the index of last sent snapshot by recording an approximate last sent snapshot index. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/peer.rs | 16 +++- tests/failpoints/cases/test_merge.rs | 58 ------------- tests/integrations/raftstore/test_merge.rs | 96 ++++++++++++++++++++++ 3 files changed, 110 insertions(+), 60 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index ad63e3b1b34..eb1fc93e1ee 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -785,6 +785,8 @@ where last_urgent_proposal_idx: u64, /// The index of the latest committed split command. last_committed_split_idx: u64, + /// The index of last sent snapshot + last_sent_snapshot_idx: u64, /// Approximate size of logs that is applied but not compacted yet. pub raft_log_size_hint: u64, @@ -955,6 +957,7 @@ where last_compacted_idx: 0, last_urgent_proposal_idx: u64::MAX, last_committed_split_idx: 0, + last_sent_snapshot_idx: 0, consistency_state: ConsistencyState { last_check_time: Instant::now(), index: INVALID_INDEX, @@ -1578,6 +1581,12 @@ where let now = Instant::now(); for msg in msgs { let msg_type = msg.get_message().get_msg_type(); + if msg_type == MessageType::MsgSnapshot { + let snap_index = msg.get_message().get_snapshot().get_metadata().get_index(); + if snap_index > self.last_sent_snapshot_idx { + self.last_sent_snapshot_idx = snap_index; + } + } if msg_type == MessageType::MsgTimeoutNow && self.is_leader() { // After a leader transfer procedure is triggered, the lease for // the old leader may be expired earlier than usual, since a new leader @@ -2052,6 +2061,7 @@ where // prewrites or commits will be just a waste. self.last_urgent_proposal_idx = self.raft_group.raft.raft_log.last_index(); self.raft_group.skip_bcast_commit(false); + self.last_sent_snapshot_idx = self.raft_group.raft.raft_log.last_index(); // A more recent read may happen on the old leader. So max ts should // be updated after a peer becomes leader. @@ -3995,12 +4005,14 @@ where || min_committed == 0 || last_index - min_matched > ctx.cfg.merge_max_log_gap || last_index - min_committed > ctx.cfg.merge_max_log_gap * 2 + || min_matched < self.last_sent_snapshot_idx { return Err(box_err!( - "log gap from matched: {} or committed: {} to last index: {} is too large, skip merge", + "log gap too large, skip merge: matched: {}, committed: {}, last index: {}, last_snapshot: {}", min_matched, min_committed, - last_index + last_index, + self.last_sent_snapshot_idx )); } let mut entry_size = 0; diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index af3f9cca499..c341d801c9b 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1250,64 +1250,6 @@ fn test_prewrite_before_max_ts_is_synced() { assert!(!resp.get_region_error().has_max_timestamp_not_synced()); } -/// If term is changed in catching up logs, follower needs to update the term -/// correctly, otherwise will leave corrupted states. -#[test] -fn test_merge_election_and_restart() { - let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); - - let pd_client = Arc::clone(&cluster.pd_client); - pd_client.disable_default_operator(); - - let on_raft_gc_log_tick_fp = "on_raft_gc_log_tick"; - fail::cfg(on_raft_gc_log_tick_fp, "return()").unwrap(); - - cluster.run(); - - let region = pd_client.get_region(b"k1").unwrap(); - cluster.must_split(®ion, b"k2"); - - let r1 = pd_client.get_region(b"k1").unwrap(); - let r1_on_store1 = find_peer(&r1, 1).unwrap().to_owned(); - cluster.must_transfer_leader(r1.get_id(), r1_on_store1.clone()); - cluster.must_put(b"k11", b"v11"); - must_get_equal(&cluster.get_engine(2), b"k11", b"v11"); - - let r1_on_store2 = find_peer(&r1, 2).unwrap().to_owned(); - cluster.must_transfer_leader(r1.get_id(), r1_on_store2); - cluster.must_put(b"k12", b"v12"); - must_get_equal(&cluster.get_engine(1), b"k12", b"v12"); - - cluster.add_send_filter(CloneFilterFactory(RegionPacketFilter::new(r1.get_id(), 2))); - - // Wait new leader elected. - cluster.must_transfer_leader(r1.get_id(), r1_on_store1); - cluster.must_put(b"k13", b"v13"); - must_get_equal(&cluster.get_engine(1), b"k13", b"v13"); - must_get_none(&cluster.get_engine(2), b"k13"); - - // Don't actually execute commit merge - fail::cfg("after_handle_catch_up_logs_for_merge", "return()").unwrap(); - // Now region 1 can still be merged into region 2 because leader has committed index cache. - let r2 = pd_client.get_region(b"k3").unwrap(); - cluster.must_try_merge(r1.get_id(), r2.get_id()); - // r1 on store 2 should be able to apply all committed logs. - must_get_equal(&cluster.get_engine(2), b"k13", b"v13"); - - cluster.shutdown(); - cluster.clear_send_filters(); - fail::remove("after_handle_catch_up_logs_for_merge"); - cluster.start().unwrap(); - - // Wait for region elected to avoid timeout and backoff. - cluster.leader_of_region(r2.get_id()); - // If merge can be resumed correctly, the put should succeed. - cluster.must_put(b"k14", b"v14"); - // If logs from different term are process correctly, store 2 should have latest updates. - must_get_equal(&cluster.get_engine(2), b"k14", b"v14"); -} - /// Testing that the source peer's read delegate should not be removed by the target peer /// and only removed when the peer is destroyed #[test] diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 50a427b5ecd..df739d825bc 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1547,3 +1547,99 @@ fn test_stale_message_after_merge() { cluster.must_put(b"k4", b"v4"); must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } + +/// Check whether merge should be prevented if follower may not have enough logs. +#[test] +fn test_prepare_merge_with_reset_matched() { + let mut cluster = new_server_cluster(0, 3); + configure_for_merge(&mut cluster); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + pd_client.must_add_peer(r, new_peer(2, 2)); + cluster.add_send_filter(IsolationFilterFactory::new(3)); + pd_client.add_peer(r, new_peer(3, 3)); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + thread::sleep(Duration::from_millis(10)); + // So leader will replicate next command but can't know whether follower (2, 2) + // also commits the command. Supposing the index is i0. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(left.get_id(), 2) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppendResponse) + .allow(1), + )); + cluster.must_put(b"k11", b"v11"); + cluster.clear_send_filters(); + cluster.add_send_filter(IsolationFilterFactory::new(2)); + // So peer (3, 3) only have logs after i0. + must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); + // Clear match information. + let left_on_store3 = find_peer(&left, 3).unwrap().to_owned(); + cluster.must_transfer_leader(left.get_id(), left_on_store3); + let left_on_store1 = find_peer(&left, 1).unwrap().to_owned(); + cluster.must_transfer_leader(left.get_id(), left_on_store1); + let res = cluster.try_merge(left.get_id(), right.get_id()); + // Now leader still knows peer(2, 2) has committed i0 - 1, so the min_match will + // become i0 - 1. But i0 - 1 is not a safe index as peer(3, 3) starts from i0 + 1. + assert!(res.get_header().has_error(), "{:?}", res); + cluster.clear_send_filters(); + // Now leader should replicate more logs and figure out a safe index. + pd_client.must_merge(left.get_id(), right.get_id()); +} + +/// Check if prepare merge min index is chosen correctly even if all match indexes are +/// correct. +#[test] +fn test_prepare_merge_with_5_nodes_snapshot() { + let mut cluster = new_server_cluster(0, 5); + configure_for_merge(&mut cluster); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), peer_on_store1); + must_get_equal(&cluster.get_engine(5), b"k1", b"v1"); + let peer_on_store5 = find_peer(&left, 5).unwrap().clone(); + pd_client.must_remove_peer(left.get_id(), peer_on_store5); + must_get_none(&cluster.get_engine(5), b"k1"); + cluster.add_send_filter(IsolationFilterFactory::new(5)); + pd_client.add_peer(left.get_id(), new_peer(5, 16)); + + // Make sure there will be no admin entries after min_matched. + for (k, v) in &[(b"k11", b"v11"), (b"k12", b"v12")] { + cluster.must_put(*k, *v); + must_get_equal(&cluster.get_engine(4), *k, *v); + } + cluster.add_send_filter(IsolationFilterFactory::new(4)); + // So index of peer 4 becomes min_matched. + cluster.must_put(b"k13", b"v13"); + must_get_equal(&cluster.get_engine(1), b"k13", b"v13"); + + // Only remove send filter on store 5. + cluster.clear_send_filters(); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + must_get_equal(&cluster.get_engine(5), b"k13", b"v13"); + let res = cluster.try_merge(left.get_id(), right.get_id()); + // min_matched from peer 4 is beyond the first index of peer 5, it should not be chosen + // for prepare merge. + assert!(res.get_header().has_error(), "{:?}", res); + cluster.clear_send_filters(); + // Now leader should replicate more logs and figure out a safe index. + pd_client.must_merge(left.get_id(), right.get_id()); +} From 4886024bc067437fec9eead7a56d3dbcbef59078 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 21 Jun 2022 16:38:37 +0800 Subject: [PATCH 0037/1149] tests: support running benches via custom test runner (#12821) close tikv/tikv#12820 Signed-off-by: tabokie Co-authored-by: zhangjinpeng1987 Co-authored-by: Ti Chi Robot --- components/test_util/src/runner.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/components/test_util/src/runner.rs b/components/test_util/src/runner.rs index e3d6cad5979..e7ef1ba0cb5 100644 --- a/components/test_util/src/runner.rs +++ b/components/test_util/src/runner.rs @@ -57,15 +57,15 @@ pub fn run_test_with_hook(cases: &[&TestDescAndFn], hook: impl TestHook + Send + .iter() .map(|case| { let name = case.desc.name.as_slice().to_owned(); - let h = hook.clone(); + let hook = hook.clone(); let f = match case.testfn { TestFn::StaticTestFn(f) => TestFn::DynTestFn(Box::new(move || { - let _watcher = CaseLifeWatcher::new(name, h); + let _watcher = CaseLifeWatcher::new(name.clone(), hook.clone()); f(); })), - TestFn::StaticBenchFn(f) => TestFn::DynTestFn(Box::new(move || { - let _watcher = CaseLifeWatcher::new(name, h); - bench::run_once(move |b| f(b)); + TestFn::StaticBenchFn(f) => TestFn::DynBenchFn(Box::new(move |b| { + let _watcher = CaseLifeWatcher::new(name.clone(), hook.clone()); + f(b); })), ref f => panic!("unexpected testfn {:?}", f), }; From 0a7d8601d81583a56149c7912e49016595968415 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 21 Jun 2022 15:36:36 -0700 Subject: [PATCH 0038/1149] Storage: Support regional flow controller (#12845) close tikv/tikv#12844 Make FlowController as trait and implement two versions for single rocksdb version and multi-rocksdb version Signed-off-by: qi.xu Co-authored-by: qi.xu --- Cargo.lock | 1 + Cargo.toml | 1 + components/engine_rocks/src/flow_listener.rs | 95 ++- components/engine_traits/src/engine.rs | 17 +- components/server/src/server.rs | 11 +- components/test_raftstore/src/server.rs | 9 +- src/config.rs | 11 +- src/server/engine_factory.rs | 25 +- src/server/engine_factory_v2.rs | 7 +- src/server/gc_worker/gc_worker.rs | 6 +- src/storage/mod.rs | 6 +- src/storage/txn/flow_controller/mod.rs | 76 ++ .../singleton_flow_controller.rs} | 681 ++++++++++++------ .../flow_controller/tablet_flow_controller.rs | 395 ++++++++++ src/storage/txn/scheduler.rs | 42 +- tests/failpoints/cases/test_storage.rs | 9 +- 16 files changed, 1095 insertions(+), 297 deletions(-) create mode 100644 src/storage/txn/flow_controller/mod.rs rename src/storage/txn/{flow_controller.rs => flow_controller/singleton_flow_controller.rs} (71%) create mode 100644 src/storage/txn/flow_controller/tablet_flow_controller.rs diff --git a/Cargo.lock b/Cargo.lock index 6a6cf62a6ec..cedc1229d0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5989,6 +5989,7 @@ dependencies = [ "futures-executor", "futures-timer", "futures-util", + "getset", "grpcio", "grpcio-health", "hex 0.4.2", diff --git a/Cargo.toml b/Cargo.toml index e0b8b195b0c..da68c7aa75c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" futures-timer = "3.0" futures-util = { version = "0.3.1", default-features = false, features = ["io", "async-await"] } +getset = "0.1" grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } hex = "0.4" diff --git a/components/engine_rocks/src/flow_listener.rs b/components/engine_rocks/src/flow_listener.rs index 5d36c2b66e9..f36b5393f7a 100644 --- a/components/engine_rocks/src/flow_listener.rs +++ b/components/engine_rocks/src/flow_listener.rs @@ -5,26 +5,57 @@ use std::sync::{mpsc::Sender, Arc, Mutex}; use collections::hash_set_with_capacity; use rocksdb::{CompactionJobInfo, EventListener, FlushJobInfo, IngestionInfo}; +#[derive(Clone)] pub enum FlowInfo { - L0(String, u64), - L0Intra(String, u64), - Flush(String, u64), - Compaction(String), - BeforeUnsafeDestroyRange, - AfterUnsafeDestroyRange, + L0(String, u64, u64, u64), + L0Intra(String, u64, u64, u64), + Flush(String, u64, u64, u64), + Compaction(String, u64, u64), + BeforeUnsafeDestroyRange(u64), + AfterUnsafeDestroyRange(u64), + Created(u64, u64), + Destroyed(u64, u64), } #[derive(Clone)] pub struct FlowListener { flow_info_sender: Arc>>, + region_id: u64, + suffix_id: u64, } impl FlowListener { pub fn new(flow_info_sender: Sender) -> Self { Self { flow_info_sender: Arc::new(Mutex::new(flow_info_sender)), + region_id: 0, + suffix_id: 0, + } + } + + pub fn clone_with(&self, region_id: u64, suffix_id: u64) -> Self { + Self { + flow_info_sender: self.flow_info_sender.clone(), + region_id, + suffix_id, } } + + pub fn on_created(&self) { + let _ = self + .flow_info_sender + .lock() + .unwrap() + .send(FlowInfo::Created(self.region_id, self.suffix_id)); + } + + pub fn on_destroyed(&self) { + let _ = self + .flow_info_sender + .lock() + .unwrap() + .send(FlowInfo::Destroyed(self.region_id, self.suffix_id)); + } } impl EventListener for FlowListener { @@ -32,11 +63,12 @@ impl EventListener for FlowListener { let mut total = 0; let p = info.table_properties(); total += p.data_size() + p.index_size() + p.filter_size(); - let _ = self - .flow_info_sender - .lock() - .unwrap() - .send(FlowInfo::Flush(info.cf_name().to_owned(), total)); + let _ = self.flow_info_sender.lock().unwrap().send(FlowInfo::Flush( + info.cf_name().to_owned(), + total, + self.region_id, + self.suffix_id, + )); } fn on_external_file_ingested(&self, info: &IngestionInfo) { @@ -45,18 +77,23 @@ impl EventListener for FlowListener { let mut total = 0; let p = info.table_properties(); total += p.data_size() + p.index_size() + p.filter_size(); - let _ = self - .flow_info_sender - .lock() - .unwrap() - .send(FlowInfo::Flush(info.cf_name().to_owned(), total)); + let _ = self.flow_info_sender.lock().unwrap().send(FlowInfo::Flush( + info.cf_name().to_owned(), + total, + self.region_id, + self.suffix_id, + )); } else { // ingestion may change the pending bytes. let _ = self .flow_info_sender .lock() .unwrap() - .send(FlowInfo::Compaction(info.cf_name().to_owned())); + .send(FlowInfo::Compaction( + info.cf_name().to_owned(), + self.region_id, + self.suffix_id, + )); } } @@ -97,7 +134,12 @@ impl EventListener for FlowListener { .flow_info_sender .lock() .unwrap() - .send(FlowInfo::L0Intra(info.cf_name().to_owned(), diff)); + .send(FlowInfo::L0Intra( + info.cf_name().to_owned(), + diff, + self.region_id, + self.suffix_id, + )); } else { let l0_input_file_at_input_level = info.input_file_count() - info.num_input_files_at_output_level(); @@ -116,11 +158,12 @@ impl EventListener for FlowListener { } } - let _ = self - .flow_info_sender - .lock() - .unwrap() - .send(FlowInfo::L0(info.cf_name().to_owned(), read_bytes)); + let _ = self.flow_info_sender.lock().unwrap().send(FlowInfo::L0( + info.cf_name().to_owned(), + read_bytes, + self.region_id, + self.suffix_id, + )); } } @@ -128,6 +171,10 @@ impl EventListener for FlowListener { .flow_info_sender .lock() .unwrap() - .send(FlowInfo::Compaction(info.cf_name().to_owned())); + .send(FlowInfo::Compaction( + info.cf_name().to_owned(), + self.region_id, + self.suffix_id, + )); } } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index e97a15c75ae..a2aa5e5d908 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -141,7 +141,7 @@ pub trait TabletFactory { pub struct DummyFactory where - EK: KvEngine, + EK: Clone + Send + 'static, { pub engine: Option, pub root_path: String, @@ -149,7 +149,7 @@ where impl TabletFactory for DummyFactory where - EK: KvEngine, + EK: Clone + Send + 'static, { fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { Ok(self.engine.as_ref().unwrap().clone()) @@ -190,18 +190,15 @@ where impl DummyFactory where - EK: KvEngine, + EK: Clone + Send + 'static, { - pub fn new() -> DummyFactory { - DummyFactory { - engine: None, - root_path: "/dummy_root".to_string(), - } + pub fn new(engine: Option, root_path: String) -> DummyFactory { + DummyFactory { engine, root_path } } } -impl Default for DummyFactory { +impl Default for DummyFactory { fn default() -> Self { - Self::new() + Self::new(None, "/tmp".to_string()) } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 6bf1de8e7a7..11f6071dbc6 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -97,8 +97,11 @@ use tikv::{ GRPC_THREAD_PREFIX, }, storage::{ - self, config_manager::StorageConfigManger, mvcc::MvccConsistencyCheckObserver, - txn::flow_controller::FlowController, Engine, + self, + config_manager::StorageConfigManger, + mvcc::MvccConsistencyCheckObserver, + txn::flow_controller::{EngineFlowController, FlowController}, + Engine, }, }; use tikv_util::{ @@ -558,11 +561,11 @@ impl TiKvServer { } fn init_servers(&mut self) -> Arc> { - let flow_controller = Arc::new(FlowController::new( + let flow_controller = Arc::new(FlowController::Singleton(EngineFlowController::new( &self.config.storage.flow_control, self.engines.as_ref().unwrap().engine.kv_engine(), self.flow_info_receiver.take().unwrap(), - )); + ))); let gc_worker = self.init_gc_worker(); let mut ttl_checker = Box::new(LazyWorker::new("ttl-checker")); let ttl_scheduler = ttl_checker.scheduler(); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 981843ddfc6..88e0b079a4d 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -61,7 +61,12 @@ use tikv::{ ConnectionBuilder, Error, Node, PdStoreAddrResolver, RaftClient, RaftKv, Result as ServerResult, Server, ServerTransport, }, - storage::{self, kv::SnapContext, txn::flow_controller::FlowController, Engine}, + storage::{ + self, + kv::SnapContext, + txn::flow_controller::{EngineFlowController, FlowController}, + Engine, + }, }; use tikv_util::{ config::VersionTrack, @@ -385,7 +390,7 @@ impl ServerCluster { lock_mgr.clone(), concurrency_manager.clone(), lock_mgr.get_storage_dynamic_configs(), - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), pd_sender, res_tag_factory.clone(), quota_limiter.clone(), diff --git a/src/config.rs b/src/config.rs index d37e0892082..3ff087f129c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3829,8 +3829,10 @@ mod tests { use crate::{ server::{config::ServerConfigManager, ttl::TtlCheckerTask}, storage::{ - config_manager::StorageConfigManger, lock_manager::DummyLockManager, - txn::flow_controller::FlowController, Storage, TestStorageBuilder, + config_manager::StorageConfigManger, + lock_manager::DummyLockManager, + txn::flow_controller::{EngineFlowController, FlowController}, + Storage, TestStorageBuilder, }, }; @@ -4186,6 +4188,7 @@ mod tests { assert_eq!(res.get("raftstore.store-pool-size"), Some(&"17".to_owned())); } + #[allow(clippy::type_complexity)] fn new_engines( cfg: TiKvConfig, ) -> ( @@ -4215,11 +4218,11 @@ mod tests { .unwrap(); let engine = storage.get_engine().get_rocksdb(); let (_tx, rx) = std::sync::mpsc::channel(); - let flow_controller = Arc::new(FlowController::new( + let flow_controller = Arc::new(FlowController::Singleton(EngineFlowController::new( &cfg.storage.flow_control, engine.clone(), rx, - )); + ))); let (shared, cfg_controller) = (cfg.storage.block_cache.shared, ConfigController::new(cfg)); cfg_controller.register( diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 0c02cde0aef..5212a211e69 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -128,7 +128,12 @@ impl KvEngineFactory { )) } - pub fn create_tablet(&self, tablet_path: &Path) -> Result { + pub fn create_tablet( + &self, + tablet_path: &Path, + region_id: u64, + suffix: u64, + ) -> Result { // Create kv engine. let mut kv_db_opts = self.inner.rocksdb_config.build_opt(); kv_db_opts.set_env(self.inner.env.clone()); @@ -140,7 +145,7 @@ impl KvEngineFactory { kv_db_opts.add_event_listener(filter); } if let Some(listener) = &self.inner.flow_listener { - kv_db_opts.add_event_listener(listener.clone()); + kv_db_opts.add_event_listener(listener.clone_with(region_id, suffix)); } let kv_cfs_opts = self.inner.rocksdb_config.build_cf_opts( &self.inner.block_cache, @@ -165,6 +170,13 @@ impl KvEngineFactory { Ok(kv_engine) } + pub fn on_tablet_created(&self, region_id: u64, suffix: u64) { + if let Some(listener) = &self.inner.flow_listener { + let listener = listener.clone_with(region_id, suffix); + listener.on_created(); + } + } + pub fn destroy_tablet(&self, tablet_path: &Path) -> engine_traits::Result<()> { info!("destroy tablet"; "path" => %tablet_path.display()); // Create kv engine. @@ -189,6 +201,13 @@ impl KvEngineFactory { Ok(()) } + pub fn on_tablet_destroy(&self, region_id: u64, suffix: u64) { + if let Some(listener) = &self.inner.flow_listener { + let listener = listener.clone_with(region_id, suffix); + listener.on_destroyed(); + } + } + pub fn store_path(&self) -> PathBuf { self.inner.store_path.clone() } @@ -203,7 +222,7 @@ impl TabletFactory for KvEngineFactory { #[inline] fn create_shared_db(&self) -> Result { let root_path = self.kv_engine_path(); - let tablet = self.create_tablet(&root_path)?; + let tablet = self.create_tablet(&root_path, 0, 0)?; let mut root_db = self.inner.root_db.lock().unwrap(); root_db.replace(tablet.clone()); Ok(tablet) diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 2dca2ff14f3..676272334ac 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -30,9 +30,10 @@ impl TabletFactory for KvEngineFactoryV2 { )); } let tablet_path = self.tablet_path(id, suffix); - let kv_engine = self.inner.create_tablet(&tablet_path)?; + let kv_engine = self.inner.create_tablet(&tablet_path, id, suffix)?; debug!("inserting tablet"; "key" => ?(id, suffix)); reg.insert((id, suffix), kv_engine.clone()); + self.inner.on_tablet_created(id, suffix); Ok(kv_engine) } @@ -123,7 +124,9 @@ impl TabletFactory for KvEngineFactoryV2 { fn destroy_tablet(&self, id: u64, suffix: u64) -> engine_traits::Result<()> { let path = self.tablet_path(id, suffix); self.registry.lock().unwrap().remove(&(id, suffix)); - self.inner.destroy_tablet(&path) + self.inner.destroy_tablet(&path)?; + self.inner.on_tablet_destroy(id, suffix); + Ok(()) } #[inline] diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index ca59416d495..7242a984d0d 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -615,7 +615,7 @@ where Ok(()) } - fn unsafe_destroy_range(&self, _: &Context, start_key: &Key, end_key: &Key) -> Result<()> { + fn unsafe_destroy_range(&self, ctx: &Context, start_key: &Key, end_key: &Key) -> Result<()> { info!( "unsafe destroy range started"; "start_key" => %start_key, "end_key" => %end_key @@ -624,7 +624,7 @@ where fail_point!("unsafe_destroy_range"); self.flow_info_sender - .send(FlowInfo::BeforeUnsafeDestroyRange) + .send(FlowInfo::BeforeUnsafeDestroyRange(ctx.region_id)) .unwrap(); let local_storage = self.engine.kv_engine(); @@ -691,7 +691,7 @@ where "start_key" => %start_key, "end_key" => %end_key, "cost_time" => ?cleanup_all_start_time.saturating_elapsed(), ); self.flow_info_sender - .send(FlowInfo::AfterUnsafeDestroyRange) + .send(FlowInfo::AfterUnsafeDestroyRange(ctx.region_id)) .unwrap(); self.raft_store_router diff --git a/src/storage/mod.rs b/src/storage/mod.rs index f12f918b8aa..7026ebab77d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -108,7 +108,7 @@ use crate::{ mvcc::{MvccReader, PointGetterBuilder}, txn::{ commands::{RawAtomicStore, RawCompareAndSwap, TypedCommand}, - flow_controller::FlowController, + flow_controller::{EngineFlowController, FlowController}, scheduler::Scheduler as TxnScheduler, Command, }, @@ -2811,7 +2811,7 @@ impl TestStorageBuilder { pipelined_pessimistic_lock: self.pipelined_pessimistic_lock, in_memory_pessimistic_lock: self.in_memory_pessimistic_lock, }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, self.resource_tag_factory, Arc::new(QuotaLimiter::default()), @@ -2839,7 +2839,7 @@ impl TestStorageBuilder { pipelined_pessimistic_lock: self.pipelined_pessimistic_lock, in_memory_pessimistic_lock: self.in_memory_pessimistic_lock, }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), diff --git a/src/storage/txn/flow_controller/mod.rs b/src/storage/txn/flow_controller/mod.rs new file mode 100644 index 00000000000..f109b9896a3 --- /dev/null +++ b/src/storage/txn/flow_controller/mod.rs @@ -0,0 +1,76 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +pub mod singleton_flow_controller; +pub mod tablet_flow_controller; + +use std::time::Duration; + +pub use singleton_flow_controller::EngineFlowController; +pub use tablet_flow_controller::TabletFlowController; + +pub enum FlowController { + Singleton(EngineFlowController), + Tablet(TabletFlowController), +} + +macro_rules! flow_controller_fn { + ($fn_name: ident, $region_id: ident, $type: ident) => { + pub fn $fn_name(&self, $region_id: u64) -> $type { + match self { + FlowController::Singleton(ref controller) => controller.$fn_name($region_id), + FlowController::Tablet(ref controller) => controller.$fn_name($region_id), + } + } + }; + ($fn_name: ident, $region_id: ident, $bytes: ident, $type: ident) => { + pub fn $fn_name(&self, $region_id: u64, $bytes: usize) -> $type { + match self { + FlowController::Singleton(ref controller) => { + controller.$fn_name($region_id, $bytes) + } + FlowController::Tablet(ref controller) => controller.$fn_name($region_id, $bytes), + } + } + }; +} + +impl FlowController { + flow_controller_fn!(should_drop, region_id, bool); + #[cfg(test)] + flow_controller_fn!(discard_ratio, region_id, f64); + flow_controller_fn!(consume, region_id, bytes, Duration); + #[cfg(test)] + flow_controller_fn!(total_bytes_consumed, region_id, usize); + flow_controller_fn!(is_unlimited, region_id, bool); + + pub fn unconsume(&self, region_id: u64, bytes: usize) { + match self { + FlowController::Singleton(ref controller) => controller.unconsume(region_id, bytes), + FlowController::Tablet(ref controller) => controller.unconsume(region_id, bytes), + } + } + pub fn enable(&self, enable: bool) { + match self { + FlowController::Singleton(ref controller) => controller.enable(enable), + FlowController::Tablet(ref controller) => controller.enable(enable), + } + } + + pub fn enabled(&self) -> bool { + match self { + FlowController::Singleton(ref controller) => controller.enabled(), + FlowController::Tablet(ref controller) => controller.enabled(), + } + } + + #[cfg(test)] + pub fn set_speed_limit(&self, region_id: u64, speed_limit: f64) { + match self { + FlowController::Singleton(ref controller) => { + controller.set_speed_limit(region_id, speed_limit) + } + FlowController::Tablet(ref controller) => { + controller.set_speed_limit(region_id, speed_limit) + } + } + } +} diff --git a/src/storage/txn/flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs similarity index 71% rename from src/storage/txn/flow_controller.rs rename to src/storage/txn/flow_controller/singleton_flow_controller.rs index e29472594c6..76671412abc 100644 --- a/src/storage/txn/flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -18,6 +18,7 @@ use std::{ use collections::HashMap; use engine_rocks::FlowInfo; use engine_traits::{CFNamesExt, FlowControlFactorsExt}; +use getset::{CopyGetters, Setters}; use num_traits::cast::{AsPrimitive, FromPrimitive}; use rand::Rng; use tikv_util::{ @@ -27,9 +28,9 @@ use tikv_util::{ use crate::storage::{config::FlowControlConfig, metrics::*}; -const TICK_DURATION: Duration = Duration::from_millis(1000); +pub(super) const TICK_DURATION: Duration = Duration::from_millis(1000); -const RATIO_SCALE_FACTOR: u32 = 10_000_000; +pub(super) const RATIO_SCALE_FACTOR: u32 = 10_000_000; const K_INC_SLOWDOWN_RATIO: f64 = 0.8; const K_DEC_SLOWDOWN_RATIO: f64 = 1.0 / K_INC_SLOWDOWN_RATIO; const MIN_THROTTLE_SPEED: f64 = 16.0 * 1024.0; // 16KB @@ -69,7 +70,7 @@ enum Trend { /// /// Here is a brief flow showing where the mechanism works: /// grpc -> check should drop(discardable ratio) -> limiter -> async write to raftstore -pub struct FlowController { +pub struct EngineFlowController { discard_ratio: Arc, limiter: Arc, enabled: Arc, @@ -77,13 +78,13 @@ pub struct FlowController { handle: Option>, } -enum Msg { +pub(super) enum Msg { Close, Enable, Disable, } -impl Drop for FlowController { +impl Drop for EngineFlowController { fn drop(&mut self) { let h = self.handle.take(); if h.is_none() { @@ -101,7 +102,7 @@ impl Drop for FlowController { } } -impl FlowController { +impl EngineFlowController { // only for test pub fn empty() -> Self { Self { @@ -142,28 +143,30 @@ impl FlowController { handle: Some(checker.start(rx, flow_info_receiver)), } } +} - pub fn should_drop(&self) -> bool { +impl EngineFlowController { + pub fn should_drop(&self, _region_id: u64) -> bool { let ratio = self.discard_ratio.load(Ordering::Relaxed); let mut rng = rand::thread_rng(); rng.gen_ratio(ratio, RATIO_SCALE_FACTOR) } #[cfg(test)] - pub fn discard_ratio(&self) -> f64 { + pub fn discard_ratio(&self, _region_id: u64) -> f64 { self.discard_ratio.load(Ordering::Relaxed) as f64 / RATIO_SCALE_FACTOR as f64 } - pub fn consume(&self, bytes: usize) -> Duration { + pub fn consume(&self, _region_id: u64, bytes: usize) -> Duration { self.limiter.consume_duration(bytes) } - pub fn unconsume(&self, bytes: usize) { + pub fn unconsume(&self, _region_id: u64, bytes: usize) { self.limiter.unconsume(bytes); } #[cfg(test)] - pub fn total_bytes_consumed(&self) -> usize { + pub fn total_bytes_consumed(&self, _region_id: u64) -> usize { self.limiter.total_bytes_consumed() } @@ -183,11 +186,11 @@ impl FlowController { } #[cfg(test)] - pub fn set_speed_limit(&self, speed_limit: f64) { + pub fn set_speed_limit(&self, _region_id: u64, speed_limit: f64) { self.limiter.set_speed_limit(speed_limit); } - pub fn is_unlimited(&self) -> bool { + pub fn is_unlimited(&self, _region_id: u64) -> bool { self.limiter.speed_limit() == f64::INFINITY } } @@ -365,7 +368,7 @@ where } } -// CFFlowChecker records some statistics and states related to one CF. +// CfFlowChecker records some statistics and states related to one CF. // These statistics fall into five categories: // * memtable // * L0 files @@ -373,7 +376,7 @@ where // * L0 consumption flow (compaction read flow of L0) // * pending compaction bytes // And all of them are collected from the hook of RocksDB's event listener. -struct CFFlowChecker { +struct CfFlowChecker { // Memtable related last_num_memtables: Smoother, memtable_debt: f64, @@ -416,7 +419,7 @@ struct CFFlowChecker { on_start_pending_bytes: bool, } -impl Default for CFFlowChecker { +impl Default for CfFlowChecker { fn default() -> Self { Self { last_num_memtables: Smoother::default(), @@ -438,14 +441,15 @@ impl Default for CFFlowChecker { } } -struct FlowChecker { - soft_pending_compaction_bytes_limit: u64, +#[derive(CopyGetters, Setters)] +pub(super) struct FlowChecker { + pub soft_pending_compaction_bytes_limit: u64, hard_pending_compaction_bytes_limit: u64, memtables_threshold: u64, l0_files_threshold: u64, - // CFFlowChecker for each CF. - cf_checkers: HashMap, + // CfFlowChecker for each CF. + cf_checkers: HashMap, // Record which CF is taking control of throttling, the throttle speed is // decided based on the statistics of the throttle CF. If the multiple CFs // exceed the threshold, choose the larger one. @@ -454,6 +458,7 @@ struct FlowChecker { // drop write requests(return ServerIsBusy to TiDB) randomly. discard_ratio: Arc, + #[getset(set = "pub")] engine: E, limiter: Arc, // Records the foreground write flow at scheduler level of last few seconds. @@ -462,6 +467,9 @@ struct FlowChecker { last_record_time: Instant, last_speed: f64, wait_for_destroy_range_finish: bool, + + #[getset(get_copy = "pub", set = "pub")] + tablet_suffix: u64, } impl FlowChecker { @@ -470,11 +478,21 @@ impl FlowChecker { engine: E, discard_ratio: Arc, limiter: Arc, + ) -> Self { + Self::new_with_tablet_suffix(config, engine, discard_ratio, limiter, 0) + } + + pub fn new_with_tablet_suffix( + config: &FlowControlConfig, + engine: E, + discard_ratio: Arc, + limiter: Arc, + tablet_suffix: u64, ) -> Self { let cf_checkers = engine .cf_names() .into_iter() - .map(|cf| (cf.to_owned(), CFFlowChecker::default())) + .map(|cf| (cf.to_owned(), CfFlowChecker::default())) .collect(); Self { @@ -491,6 +509,88 @@ impl FlowChecker { last_record_time: Instant::now_coarse(), last_speed: 0.0, wait_for_destroy_range_finish: false, + tablet_suffix, + } + } + + pub fn on_flow_info_msg( + &mut self, + enabled: bool, + flow_info: Result, + ) { + match flow_info { + Ok(FlowInfo::L0(cf, l0_bytes, ..)) => { + self.collect_l0_consumption_stats(&cf, l0_bytes); + if enabled { + self.on_l0_change(cf) + } + } + Ok(FlowInfo::L0Intra(cf, diff_bytes, ..)) => { + if diff_bytes > 0 { + // Intra L0 merges some deletion records, so regard it as a L0 compaction. + self.collect_l0_consumption_stats(&cf, diff_bytes); + if enabled { + self.on_l0_change(cf); + } + } + } + Ok(FlowInfo::Flush(cf, flush_bytes, ..)) => { + self.collect_l0_production_stats(&cf, flush_bytes); + if enabled { + self.on_memtable_change(&cf); + self.on_l0_change(cf) + } + } + Ok(FlowInfo::Compaction(cf, ..)) => { + if enabled { + self.on_pending_compaction_bytes_change(cf); + } + } + Ok(FlowInfo::BeforeUnsafeDestroyRange(..)) => { + if !enabled { + return; + } + self.wait_for_destroy_range_finish = true; + let soft = (self.soft_pending_compaction_bytes_limit as f64).log2(); + for cf_checker in self.cf_checkers.values_mut() { + let v = cf_checker.long_term_pending_bytes.get_avg(); + if v <= soft { + cf_checker.pending_bytes_before_unsafe_destroy_range = Some(v); + } + } + } + Ok(FlowInfo::AfterUnsafeDestroyRange(..)) => { + if !enabled { + return; + } + self.wait_for_destroy_range_finish = false; + for (cf, cf_checker) in &mut self.cf_checkers { + if let Some(before) = cf_checker.pending_bytes_before_unsafe_destroy_range { + let soft = (self.soft_pending_compaction_bytes_limit as f64).log2(); + let after = (self + .engine + .get_cf_pending_compaction_bytes(cf) + .unwrap_or(None) + .unwrap_or(0) as f64) + .log2(); + + assert!(before < soft); + if after >= soft { + // there is a pending bytes jump + SCHED_THROTTLE_ACTION_COUNTER + .with_label_values(&[cf, "pending_bytes_jump"]) + .inc(); + } else { + cf_checker.pending_bytes_before_unsafe_destroy_range = None; + } + } + } + } + Ok(FlowInfo::Created(..)) => {} + Ok(FlowInfo::Destroyed(..)) => {} + Err(e) => { + error!("failed to receive compaction info {:?}", e); + } } } @@ -515,85 +615,12 @@ impl FlowChecker { Err(_) => {} } - match flow_info_receiver.recv_deadline(deadline) { - Ok(FlowInfo::L0(cf, l0_bytes)) => { - checker.collect_l0_consumption_stats(&cf, l0_bytes); - if enabled { - checker.on_l0_change(cf) - } - } - Ok(FlowInfo::L0Intra(cf, diff_bytes)) => { - if diff_bytes > 0 { - // Intra L0 merges some deletion records, so regard it as a L0 compaction. - checker.collect_l0_consumption_stats(&cf, diff_bytes); - if enabled { - checker.on_l0_change(cf); - } - } - } - Ok(FlowInfo::Flush(cf, flush_bytes)) => { - checker.collect_l0_production_stats(&cf, flush_bytes); - if enabled { - checker.on_memtable_change(&cf); - checker.on_l0_change(cf) - } - } - Ok(FlowInfo::Compaction(cf)) => { - if enabled { - checker.on_pending_compaction_bytes_change(cf); - } - } - Ok(FlowInfo::BeforeUnsafeDestroyRange) => { - if !enabled { - continue; - } - checker.wait_for_destroy_range_finish = true; - let soft = (checker.soft_pending_compaction_bytes_limit as f64).log2(); - for cf_checker in checker.cf_checkers.values_mut() { - let v = cf_checker.long_term_pending_bytes.get_avg(); - if v <= soft { - cf_checker.pending_bytes_before_unsafe_destroy_range = Some(v); - } - } - } - Ok(FlowInfo::AfterUnsafeDestroyRange) => { - if !enabled { - continue; - } - checker.wait_for_destroy_range_finish = false; - for (cf, cf_checker) in &mut checker.cf_checkers { - if let Some(before) = - cf_checker.pending_bytes_before_unsafe_destroy_range - { - let soft = - (checker.soft_pending_compaction_bytes_limit as f64).log2(); - let after = (checker - .engine - .get_cf_pending_compaction_bytes(cf) - .unwrap_or(None) - .unwrap_or(0) - as f64) - .log2(); - - assert!(before < soft); - if after >= soft { - // there is a pending bytes jump - SCHED_THROTTLE_ACTION_COUNTER - .with_label_values(&[cf, "pending_bytes_jump"]) - .inc(); - } else { - cf_checker.pending_bytes_before_unsafe_destroy_range = None; - } - } - } - } - Err(RecvTimeoutError::Timeout) => { - checker.update_statistics(); - deadline = std::time::Instant::now() + TICK_DURATION; - } - Err(e) => { - error!("failed to receive compaction info {:?}", e); - } + let msg = flow_info_receiver.recv_deadline(deadline); + if let Err(RecvTimeoutError::Timeout) = msg { + checker.update_statistics(); + deadline = std::time::Instant::now() + TICK_DURATION; + } else { + checker.on_flow_info_msg(enabled, msg); } } tikv_alloc::remove_thread_memory_accessor(); @@ -601,7 +628,7 @@ impl FlowChecker { .unwrap() } - fn reset_statistics(&mut self) { + pub fn reset_statistics(&mut self) { SCHED_L0_TARGET_FLOW_GAUGE.set(0); for cf in self.cf_checkers.keys() { SCHED_THROTTLE_CF_GAUGE.with_label_values(&[cf]).set(0); @@ -621,7 +648,7 @@ impl FlowChecker { self.discard_ratio.store(0, Ordering::Relaxed); } - fn update_statistics(&mut self) { + pub fn update_statistics(&mut self) { if let Some(throttle_cf) = self.throttle_cf.as_ref() { SCHED_THROTTLE_CF_GAUGE .with_label_values(&[throttle_cf]) @@ -959,28 +986,28 @@ impl FlowChecker { } #[cfg(test)] -mod tests { +pub(super) mod tests { use std::sync::atomic::AtomicU64; use engine_traits::Result; - use super::*; + use super::{super::FlowController, *}; #[derive(Clone)] - struct EngineStub(Arc); + pub struct EngineStub(pub Arc); - struct EngineStubInner { + pub struct EngineStubInner { pub pending_compaction_bytes: AtomicU64, pub num_l0_files: AtomicU64, - pub num_memtable_files: AtomicU64, + pub num_memtables: AtomicU64, } impl EngineStub { - fn new() -> Self { + pub fn new() -> Self { Self(Arc::new(EngineStubInner { pending_compaction_bytes: AtomicU64::new(0), num_l0_files: AtomicU64::new(0), - num_memtable_files: AtomicU64::new(0), + num_memtables: AtomicU64::new(0), })) } } @@ -997,7 +1024,7 @@ mod tests { } fn get_cf_num_immutable_mem_table(&self, _cf: &str) -> Result> { - Ok(Some(self.0.num_memtable_files.load(Ordering::Relaxed))) + Ok(Some(self.0.num_memtables.load(Ordering::Relaxed))) } fn get_cf_pending_compaction_bytes(&self, _cf: &str) -> Result> { @@ -1007,18 +1034,13 @@ mod tests { } } - #[test] - fn test_flow_controller_basic() { - let stub = EngineStub::new(); - let (_tx, rx) = mpsc::channel(); - let flow_controller = FlowController::new(&FlowControlConfig::default(), stub, rx); - + pub fn test_flow_controller_basic_impl(flow_controller: &FlowController, region_id: u64) { // enable flow controller assert_eq!(flow_controller.enabled(), true); - assert_eq!(flow_controller.should_drop(), false); - assert_eq!(flow_controller.is_unlimited(), true); - assert_eq!(flow_controller.consume(0), Duration::ZERO); - assert_eq!(flow_controller.consume(1000), Duration::ZERO); + assert_eq!(flow_controller.should_drop(region_id), false); + assert_eq!(flow_controller.is_unlimited(region_id), true); + assert_eq!(flow_controller.consume(region_id, 0), Duration::ZERO); + assert_eq!(flow_controller.consume(region_id, 1000), Duration::ZERO); // disable flow controller flow_controller.enable(false); @@ -1026,73 +1048,156 @@ mod tests { // re-enable flow controller flow_controller.enable(true); assert_eq!(flow_controller.enabled(), true); - assert_eq!(flow_controller.should_drop(), false); - assert_eq!(flow_controller.is_unlimited(), true); - assert_eq!(flow_controller.consume(1), Duration::ZERO); + assert_eq!(flow_controller.should_drop(region_id), false); + assert_eq!(flow_controller.is_unlimited(region_id), true); + assert_eq!(flow_controller.consume(region_id, 1), Duration::ZERO); } #[test] - fn test_flow_controller_memtable() { + fn test_flow_controller_basic() { let stub = EngineStub::new(); - let (tx, rx) = mpsc::sync_channel(0); - let flow_controller = FlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let (_tx, rx) = mpsc::channel(); + let flow_controller = EngineFlowController::new(&FlowControlConfig::default(), stub, rx); + let flow_controller = FlowController::Singleton(flow_controller); + test_flow_controller_basic_impl(&flow_controller, 0); + } + + pub fn test_flow_controller_memtable_impl( + flow_controller: &FlowController, + stub: &EngineStub, + tx: &mpsc::SyncSender, + region_id: u64, + tablet_suffix: u64, + ) { + assert_eq!(flow_controller.consume(0, 2000), Duration::ZERO); + loop { + if flow_controller.total_bytes_consumed(0) == 0 { + break; + } + std::thread::sleep(TICK_DURATION); + } - assert_eq!(flow_controller.consume(2000), Duration::ZERO); + assert_eq!(flow_controller.consume(region_id, 2000), Duration::ZERO); loop { - if flow_controller.total_bytes_consumed() == 0 { + if flow_controller.total_bytes_consumed(region_id) == 0 { break; } std::thread::sleep(TICK_DURATION); } // exceeds the threshold on start - stub.0.num_memtable_files.store(8, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert_eq!(flow_controller.should_drop(), false); + stub.0.num_memtables.store(8, Ordering::Relaxed); + tx.send(FlowInfo::Flush( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert_eq!(flow_controller.should_drop(region_id), false); // on start check forbids flow control - assert_eq!(flow_controller.is_unlimited(), true); + assert_eq!(flow_controller.is_unlimited(region_id), true); // once falls below the threshold, pass the on start check - stub.0.num_memtable_files.store(1, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); + stub.0.num_memtables.store(1, Ordering::Relaxed); + tx.send(FlowInfo::Flush( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); // not throttle when the average of the sliding window doesn't exceeds the threshold - stub.0.num_memtable_files.store(6, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert_eq!(flow_controller.should_drop(), false); - assert_eq!(flow_controller.is_unlimited(), true); + stub.0.num_memtables.store(6, Ordering::Relaxed); + tx.send(FlowInfo::Flush( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert_eq!(flow_controller.should_drop(region_id), false); + assert_eq!(flow_controller.is_unlimited(region_id), true); // the average of sliding window exceeds the threshold - stub.0.num_memtable_files.store(6, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert_eq!(flow_controller.should_drop(), false); - assert_eq!(flow_controller.is_unlimited(), false); - assert_ne!(flow_controller.consume(2000), Duration::ZERO); + stub.0.num_memtables.store(6, Ordering::Relaxed); + tx.send(FlowInfo::Flush( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert_eq!(flow_controller.should_drop(region_id), false); + assert_eq!(flow_controller.is_unlimited(region_id), false); + assert_ne!(flow_controller.consume(region_id, 2000), Duration::ZERO); // not throttle once the number of memtables falls below the threshold - stub.0.num_memtable_files.store(1, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert_eq!(flow_controller.should_drop(), false); - assert_eq!(flow_controller.is_unlimited(), true); + stub.0.num_memtables.store(1, Ordering::Relaxed); + tx.send(FlowInfo::Flush( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert_eq!(flow_controller.should_drop(region_id), false); + assert_eq!(flow_controller.is_unlimited(region_id), true); } - #[test] - fn test_flow_controller_l0() { + fn test_flow_controller_memtable() { let stub = EngineStub::new(); let (tx, rx) = mpsc::sync_channel(0); - let flow_controller = FlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let flow_controller = + EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let flow_controller = FlowController::Singleton(flow_controller); + test_flow_controller_memtable_impl(&flow_controller, &stub, &tx, 0, 0); + } - assert_eq!(flow_controller.consume(2000), Duration::ZERO); + pub fn test_flow_controller_l0_impl( + flow_controller: &FlowController, + stub: &EngineStub, + tx: &mpsc::SyncSender, + region_id: u64, + tablet_suffix: u64, + ) { + assert_eq!(flow_controller.consume(region_id, 2000), Duration::ZERO); loop { - if flow_controller.total_bytes_consumed() == 0 { + if flow_controller.total_bytes_consumed(region_id) == 0 { break; } std::thread::sleep(TICK_DURATION); @@ -1100,115 +1205,251 @@ mod tests { // exceeds the threshold stub.0.num_l0_files.store(30, Ordering::Relaxed); - tx.send(FlowInfo::L0("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert_eq!(flow_controller.should_drop(), false); + tx.send(FlowInfo::L0( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert_eq!(flow_controller.should_drop(region_id), false); // on start check forbids flow control - assert_eq!(flow_controller.is_unlimited(), true); + assert_eq!(flow_controller.is_unlimited(region_id), true); // once fall below the threshold, pass the on start check stub.0.num_l0_files.store(10, Ordering::Relaxed); - tx.send(FlowInfo::L0("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); + tx.send(FlowInfo::L0( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); // exceeds the threshold, throttle now stub.0.num_l0_files.store(30, Ordering::Relaxed); - tx.send(FlowInfo::L0("default".to_string(), 0)).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert_eq!(flow_controller.should_drop(), false); - assert_eq!(flow_controller.is_unlimited(), false); - assert_ne!(flow_controller.consume(2000), Duration::ZERO); + tx.send(FlowInfo::L0( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert_eq!(flow_controller.should_drop(region_id), false); + assert_eq!(flow_controller.is_unlimited(region_id), false); + assert_ne!(flow_controller.consume(region_id, 2000), Duration::ZERO); } #[test] - fn test_flow_controller_pending_compaction_bytes() { + fn test_flow_controller_l0() { let stub = EngineStub::new(); let (tx, rx) = mpsc::sync_channel(0); - let flow_controller = FlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let flow_controller = + EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let flow_controller = FlowController::Singleton(flow_controller); + test_flow_controller_l0_impl(&flow_controller, &stub, &tx, 0, 0); + } + pub fn test_flow_controller_pending_compaction_bytes_impl( + flow_controller: &FlowController, + stub: &EngineStub, + tx: &mpsc::SyncSender, + region_id: u64, + tablet_suffix: u64, + ) { // exceeds the threshold stub.0 .pending_compaction_bytes .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); // on start check forbids flow control - assert!(flow_controller.discard_ratio() < f64::EPSILON); + assert!( + flow_controller.discard_ratio(region_id) < f64::EPSILON, + "discard_ratio {}", + flow_controller.discard_ratio(region_id) + ); // once fall below the threshold, pass the on start check stub.0 .pending_compaction_bytes .store(100 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); stub.0 .pending_compaction_bytes .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert!(flow_controller.discard_ratio() > f64::EPSILON); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); stub.0 .pending_compaction_bytes .store(1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert!(flow_controller.discard_ratio() < f64::EPSILON); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); // pending compaction bytes jump after unsafe destroy range - tx.send(FlowInfo::BeforeUnsafeDestroyRange).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) + tx.send(FlowInfo::BeforeUnsafeDestroyRange(region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id, 0)) .unwrap(); - assert!(flow_controller.discard_ratio() < f64::EPSILON); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); // during unsafe destroy range, pending compaction bytes may change stub.0 .pending_compaction_bytes .store(1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert!(flow_controller.discard_ratio() < f64::EPSILON); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); stub.0 .pending_compaction_bytes .store(10000000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::AfterUnsafeDestroyRange).unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::AfterUnsafeDestroyRange(region_id)) .unwrap(); - assert!(flow_controller.discard_ratio() < f64::EPSILON); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert!( + flow_controller.discard_ratio(region_id) < f64::EPSILON, + "discard_ratio {}", + flow_controller.discard_ratio(region_id) + ); // unfreeze the control stub.0 .pending_compaction_bytes .store(1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert!(flow_controller.discard_ratio() < f64::EPSILON); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); stub.0 .pending_compaction_bytes .store(1000000000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string())) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0)) - .unwrap(); - assert!(flow_controller.discard_ratio() > f64::EPSILON); + tx.send(FlowInfo::Compaction( + "default".to_string(), + region_id, + tablet_suffix, + )) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); + } + + #[test] + fn test_flow_controller_pending_compaction_bytes() { + let stub = EngineStub::new(); + let (tx, rx) = mpsc::sync_channel(0); + let flow_controller = + EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let flow_controller = FlowController::Singleton(flow_controller); + test_flow_controller_pending_compaction_bytes_impl(&flow_controller, &stub, &tx, 0, 0); } #[test] diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs new file mode 100644 index 00000000000..d177c203ba1 --- /dev/null +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -0,0 +1,395 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use std::{ + sync::{ + atomic::{AtomicBool, AtomicU32, Ordering}, + mpsc::{self, Receiver, RecvTimeoutError, SyncSender}, + Arc, RwLock, + }, + thread::{Builder, JoinHandle}, + time::Duration, +}; + +use collections::HashMap; +use engine_rocks::FlowInfo; +use engine_traits::{CFNamesExt, FlowControlFactorsExt, TabletFactory}; +use rand::Rng; +use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; + +use super::singleton_flow_controller::{FlowChecker, Msg, RATIO_SCALE_FACTOR, TICK_DURATION}; +use crate::storage::config::FlowControlConfig; + +type Limiters = Arc, Arc)>>>; +pub struct TabletFlowController { + enabled: Arc, + tx: Option>, + handle: Option>, + limiters: Limiters, +} + +impl Drop for TabletFlowController { + fn drop(&mut self) { + let h = self.handle.take(); + if h.is_none() { + return; + } + + if let Some(Err(e)) = self.tx.as_ref().map(|tx| tx.send(Msg::Close)) { + error!("send quit message for flow controller failed"; "err" => ?e); + return; + } + + if let Err(e) = h.unwrap().join() { + error!("join flow controller failed"; "err" => ?e); + } + } +} + +impl TabletFlowController { + pub fn new( + config: &FlowControlConfig, + tablet_factory: Arc + Send + Sync>, + flow_info_receiver: Receiver, + ) -> Self { + let (tx, rx) = mpsc::sync_channel(5); + tx.send(if config.enable { + Msg::Enable + } else { + Msg::Disable + }) + .unwrap(); + let flow_checkers: Arc>>> = + Arc::new(RwLock::new(HashMap::default())); + let limiters: Limiters = Arc::new(RwLock::new(HashMap::default())); + Self { + enabled: Arc::new(AtomicBool::new(config.enable)), + tx: Some(tx), + limiters: limiters.clone(), + handle: Some(FlowInfoDispatcher::start( + rx, + flow_info_receiver, + tablet_factory, + flow_checkers, + limiters, + config.clone(), + )), + } + } + + pub fn tablet_exist(&self, region_id: u64) -> bool { + let limiters = self.limiters.as_ref().read().unwrap(); + limiters.get(®ion_id).is_some() + } +} + +struct FlowInfoDispatcher; + +impl FlowInfoDispatcher { + fn start( + rx: Receiver, + flow_info_receiver: Receiver, + tablet_factory: Arc + Send + Sync>, + flow_checkers: Arc>>>, + limiters: Limiters, + config: FlowControlConfig, + ) -> JoinHandle<()> { + Builder::new() + .name(thd_name!("flow-checker")) + .spawn_wrapper(move || { + tikv_alloc::add_thread_memory_accessor(); + let mut deadline = std::time::Instant::now(); + let mut enabled = true; + loop { + match rx.try_recv() { + Ok(Msg::Close) => break, + Ok(Msg::Disable) => { + enabled = false; + let mut checkers = flow_checkers.as_ref().write().unwrap(); + for checker in (*checkers).values_mut() { + checker.reset_statistics(); + } + } + Ok(Msg::Enable) => { + enabled = true; + } + Err(_) => {} + } + + let insert_limiter_and_checker = |region_id, suffix| -> FlowChecker { + let engine = tablet_factory.open_tablet_cache(region_id, suffix).unwrap(); + let mut v = limiters.as_ref().write().unwrap(); + let discard_ratio = Arc::new(AtomicU32::new(0)); + let limiter = v.entry(region_id).or_insert(( + Arc::new( + ::builder(f64::INFINITY) + .refill(Duration::from_millis(1)) + .build(), + ), + discard_ratio, + )); + FlowChecker::new_with_tablet_suffix( + &config, + engine, + limiter.1.clone(), + limiter.0.clone(), + suffix, + ) + }; + let msg = flow_info_receiver.recv_deadline(deadline); + match msg.clone() { + Ok(FlowInfo::L0(_cf, _, region_id, suffix)) + | Ok(FlowInfo::L0Intra(_cf, _, region_id, suffix)) + | Ok(FlowInfo::Flush(_cf, _, region_id, suffix)) + | Ok(FlowInfo::Compaction(_cf, region_id, suffix)) => { + let mut checkers = flow_checkers.as_ref().write().unwrap(); + if let Some(checker) = checkers.get_mut(®ion_id) { + if checker.tablet_suffix() != suffix { + continue; + } + checker.on_flow_info_msg(enabled, msg); + } + } + Ok(FlowInfo::BeforeUnsafeDestroyRange(region_id)) + | Ok(FlowInfo::AfterUnsafeDestroyRange(region_id)) => { + let mut checkers = flow_checkers.as_ref().write().unwrap(); + if let Some(checker) = checkers.get_mut(®ion_id) { + checker.on_flow_info_msg(enabled, msg); + } + } + Ok(FlowInfo::Created(region_id, suffix)) => { + let mut checkers = flow_checkers.as_ref().write().unwrap(); + let checker = checkers + .entry(region_id) + .or_insert_with(|| insert_limiter_and_checker(region_id, suffix)); + // check if the checker's engine is exactly (region_id, suffix) + // if checker.suffix < suffix, it means its tablet is old and needs the refresh + if checker.tablet_suffix() < suffix { + let engine = + tablet_factory.open_tablet_cache(region_id, suffix).unwrap(); + checker.set_engine(engine); + checker.set_tablet_suffix(suffix); + } + } + Ok(FlowInfo::Destroyed(region_id, suffix)) => { + let mut remove_limiter = false; + { + let mut checkers = flow_checkers.as_ref().write().unwrap(); + if let Some(checker) = checkers.get_mut(®ion_id) { + if checker.tablet_suffix() == suffix { + checkers.remove(®ion_id); + remove_limiter = true; + } + } + } + if remove_limiter { + limiters.as_ref().write().unwrap().remove(®ion_id); + } + } + Err(RecvTimeoutError::Timeout) => { + let mut checkers = flow_checkers.as_ref().write().unwrap(); + for checker in (*checkers).values_mut() { + checker.update_statistics(); + } + deadline = std::time::Instant::now() + TICK_DURATION; + } + Err(e) => { + error!("failed to receive compaction info {:?}", e); + } + } + } + tikv_alloc::remove_thread_memory_accessor(); + }) + .unwrap() + } +} + +impl TabletFlowController { + pub fn should_drop(&self, region_id: u64) -> bool { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + let ratio = limiter.1.load(Ordering::Relaxed); + let mut rng = rand::thread_rng(); + return rng.gen_ratio(ratio, RATIO_SCALE_FACTOR); + } + false + } + + #[cfg(test)] + pub fn discard_ratio(&self, region_id: u64) -> f64 { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + let ratio = limiter.1.load(Ordering::Relaxed); + return ratio as f64 / RATIO_SCALE_FACTOR as f64; + } + 0.0 + } + + pub fn consume(&self, region_id: u64, bytes: usize) -> Duration { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + return limiter.0.consume_duration(bytes); + } + Duration::ZERO + } + + pub fn unconsume(&self, region_id: u64, bytes: usize) { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + limiter.0.unconsume(bytes); + } + } + + #[cfg(test)] + pub fn total_bytes_consumed(&self, region_id: u64) -> usize { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + return limiter.0.total_bytes_consumed(); + } + 0 + } + + pub fn enable(&self, enable: bool) { + self.enabled.store(enable, Ordering::Relaxed); + if let Some(tx) = &self.tx { + if enable { + tx.send(Msg::Enable).unwrap(); + } else { + tx.send(Msg::Disable).unwrap(); + } + } + } + + pub fn enabled(&self) -> bool { + self.enabled.load(Ordering::Relaxed) + } + + #[cfg(test)] + pub fn set_speed_limit(&self, region_id: u64, speed_limit: f64) { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + limiter.0.set_speed_limit(speed_limit); + } + } + + pub fn is_unlimited(&self, region_id: u64) -> bool { + let limiters = self.limiters.as_ref().read().unwrap(); + if let Some(limiter) = limiters.get(®ion_id) { + return limiter.0.speed_limit() == f64::INFINITY; + } + true + } +} + +#[cfg(test)] +mod tests { + use engine_rocks::FlowInfo; + use engine_traits::DummyFactory; + + use super::{ + super::{singleton_flow_controller::tests::*, FlowController}, + *, + }; + + fn create_tablet_flow_controller() -> (FlowController, mpsc::SyncSender, EngineStub) { + let (tx, rx) = mpsc::sync_channel(0); + let root_path = "/tmp"; + let stub = EngineStub::new(); + let factory = DummyFactory::::new(Some(stub.clone()), root_path.to_string()); + let tablet_factory = Arc::new(factory); + ( + FlowController::Tablet(TabletFlowController::new( + &FlowControlConfig::default(), + tablet_factory, + rx, + )), + tx, + stub, + ) + } + + #[test] + fn test_tablet_flow_controller_basic() { + let (flow_controller, tx, _) = create_tablet_flow_controller(); + let region_id = 5_u64; + let tablet_suffix = 5_u64; + tx.send(FlowInfo::Created(region_id, tablet_suffix)) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + test_flow_controller_basic_impl(&flow_controller, region_id); + tx.send(FlowInfo::Destroyed(region_id, tablet_suffix)) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + //assert!(!flow_controller.tablet_exist(region_id)); + } + + #[test] + fn test_tablet_flow_controller_memtable() { + let (flow_controller, tx, stub) = create_tablet_flow_controller(); + let region_id = 5_u64; + let tablet_suffix = 5_u64; + tx.send(FlowInfo::Created(region_id, tablet_suffix)) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + test_flow_controller_memtable_impl(&flow_controller, &stub, &tx, region_id, tablet_suffix); + } + + #[test] + fn test_tablet_flow_controller_l0() { + let (flow_controller, tx, stub) = create_tablet_flow_controller(); + let region_id = 5_u64; + let tablet_suffix = 5_u64; + tx.send(FlowInfo::Created(region_id, tablet_suffix)) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + test_flow_controller_l0_impl(&flow_controller, &stub, &tx, region_id, tablet_suffix); + } + + #[test] + fn test_tablet_flow_controller_pending_compaction_bytes() { + let (flow_controller, tx, stub) = create_tablet_flow_controller(); + let region_id = 5_u64; + let tablet_suffix = 5_u64; + tx.send(FlowInfo::Created(region_id, tablet_suffix)) + .unwrap(); + tx.send(FlowInfo::L0Intra( + "default".to_string(), + 0, + region_id, + tablet_suffix, + )) + .unwrap(); + + test_flow_controller_pending_compaction_bytes_impl( + &flow_controller, + &stub, + &tx, + region_id, + tablet_suffix, + ); + } +} diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index f0e1529fab7..ab866fe18bf 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -270,10 +270,10 @@ impl SchedulerInner { self.get_task_slot(cid).get_mut(&cid).unwrap().pr = Some(pr); } - fn too_busy(&self) -> bool { + fn too_busy(&self, region_id: u64) -> bool { fail_point!("txn_scheduler_busy", |_| true); self.running_write_bytes.load(Ordering::Acquire) >= self.sched_pending_write_threshold - || self.flow_controller.should_drop() + || self.flow_controller.should_drop(region_id) } /// Tries to acquire all the required latches for a command when waken up by @@ -394,7 +394,7 @@ impl Scheduler { pub(in crate::storage) fn run_cmd(&self, cmd: Command, callback: StorageCallback) { // write flow control - if cmd.need_flow_control() && self.inner.too_busy() { + if cmd.need_flow_control() && self.inner.too_busy(cmd.ctx().region_id) { SCHED_TOO_BUSY_COUNTER_VEC.get(cmd.tag()).inc(); callback.execute(ProcessResult::Failed { err: StorageError::from(StorageErrorInner::SchedTooBusy), @@ -845,6 +845,7 @@ impl Scheduler { // message when it finishes. Ok(res) => res, }; + let region_id = ctx.get_region_id(); SCHED_STAGE_COUNTER_VEC.get(tag).write.inc(); if let Some(lock_info) = lock_info { @@ -957,9 +958,9 @@ impl Scheduler { }; if self.inner.flow_controller.enabled() { - if self.inner.flow_controller.is_unlimited() { + if self.inner.flow_controller.is_unlimited(region_id) { // no need to delay if unthrottled, just call consume to record write flow - let _ = self.inner.flow_controller.consume(write_size); + let _ = self.inner.flow_controller.consume(region_id, write_size); } else { let start = Instant::now_coarse(); // Control mutex is used to ensure there is only one request consuming the quota. @@ -968,16 +969,16 @@ impl Scheduler { // without the mutex, the write flow can't throttled strictly. let control_mutex = self.inner.control_mutex.clone(); let _guard = control_mutex.lock().await; - let delay = self.inner.flow_controller.consume(write_size); + let delay = self.inner.flow_controller.consume(region_id, write_size); let delay_end = Instant::now_coarse() + delay; - while !self.inner.flow_controller.is_unlimited() { + while !self.inner.flow_controller.is_unlimited(region_id) { let now = Instant::now_coarse(); if now >= delay_end { break; } if now >= deadline.inner() { scheduler.finish_with_err(cid, StorageErrorInner::DeadlineExceeded); - self.inner.flow_controller.unconsume(write_size); + self.inner.flow_controller.unconsume(region_id, write_size); SCHED_THROTTLE_TIME.observe(start.saturating_elapsed_secs()); return; } @@ -1072,7 +1073,7 @@ impl Scheduler { // Only consume the quota when write succeeds, otherwise failed write requests may exhaust // the quota and other write requests would be in long delay. if sched.inner.flow_controller.enabled() { - sched.inner.flow_controller.unconsume(write_size); + sched.inner.flow_controller.unconsume(region_id, write_size); } } }) @@ -1189,7 +1190,12 @@ mod tests { lock_manager::DummyLockManager, mvcc::{self, Mutation}, test_util::latest_feature_gate, - txn::{commands, commands::TypedCommand, latch::*}, + txn::{ + commands, + commands::TypedCommand, + flow_controller::{EngineFlowController, FlowController}, + latch::*, + }, TestEngineBuilder, TxnStatus, }; @@ -1336,7 +1342,7 @@ mod tests { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1394,7 +1400,7 @@ mod tests { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1452,7 +1458,7 @@ mod tests { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1469,7 +1475,7 @@ mod tests { let (cb, f) = paired_future_callback(); scheduler.inner.flow_controller.enable(true); - scheduler.inner.flow_controller.set_speed_limit(1.0); + scheduler.inner.flow_controller.set_speed_limit(0, 1.0); scheduler.run_cmd(cmd.cmd, StorageCallback::TxnStatus(cb)); // The task waits for 200ms until it locks the control_mutex, but the execution // time limit is 100ms. Before the mutex is locked, it should return @@ -1480,13 +1486,13 @@ mod tests { Err(StorageError(box StorageErrorInner::DeadlineExceeded)) )); // should unconsume if the request fails - assert_eq!(scheduler.inner.flow_controller.total_bytes_consumed(), 0); + assert_eq!(scheduler.inner.flow_controller.total_bytes_consumed(0), 0); // A new request should not be blocked without flow control. scheduler .inner .flow_controller - .set_speed_limit(f64::INFINITY); + .set_speed_limit(0, f64::INFINITY); let mut req = CheckTxnStatusRequest::default(); req.mut_context().max_execution_duration_ms = 100; req.set_primary_key(b"a".to_vec()); @@ -1518,7 +1524,7 @@ mod tests { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1578,7 +1584,7 @@ mod tests { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(false)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, - Arc::new(FlowController::empty()), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 0b43e11c468..7d0bb8c0b74 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -33,8 +33,9 @@ use tikv::{ mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, test_util::*, txn::{ - commands, flow_controller::FlowController, Error as TxnError, - ErrorInner as TxnErrorInner, + commands, + flow_controller::{EngineFlowController, FlowController}, + Error as TxnError, ErrorInner as TxnErrorInner, }, Error as StorageError, ErrorInner as StorageErrorInner, *, }, @@ -253,11 +254,11 @@ fn test_scale_scheduler_pool() { let cfg = new_tikv_config(1); let kv_engine = storage.get_engine().kv_engine(); let (_tx, rx) = std::sync::mpsc::channel(); - let flow_controller = Arc::new(FlowController::new( + let flow_controller = Arc::new(FlowController::Singleton(EngineFlowController::new( &cfg.storage.flow_control, kv_engine.clone(), rx, - )); + ))); let cfg_controller = ConfigController::new(cfg.clone()); let (scheduler, _receiver) = dummy_scheduler(); From abc4e2750e8f83a39ae4f829dccd68a192618669 Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Wed, 22 Jun 2022 13:22:36 +0800 Subject: [PATCH 0039/1149] copr: Support paging for executors (#12841) ref tikv/tikv#12848 Support paging for Aggregate/Limit/TopN executors. Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: Yilin Chen Co-authored-by: Ti Chi Robot --- .../tidb_query_datatype/src/expr/ctx.rs | 3 + .../src/fast_hash_aggr_executor.rs | 11 + components/tidb_query_executors/src/runner.rs | 4 +- .../src/slow_hash_aggr_executor.rs | 11 + .../src/stream_aggr_executor.rs | 11 + .../src/top_n_executor.rs | 194 ++++++++++++++++- .../src/util/aggr_executor.rs | 203 +++++++++++++++++- 7 files changed, 434 insertions(+), 3 deletions(-) diff --git a/components/tidb_query_datatype/src/expr/ctx.rs b/components/tidb_query_datatype/src/expr/ctx.rs index 748b47e4fe7..f92c561b013 100644 --- a/components/tidb_query_datatype/src/expr/ctx.rs +++ b/components/tidb_query_datatype/src/expr/ctx.rs @@ -70,6 +70,8 @@ pub struct EvalConfig { // warning is a executor stuff instead of a evaluation stuff. pub max_warning_cnt: usize, pub sql_mode: SqlMode, + + pub paging_size: Option, } impl Default for EvalConfig { @@ -105,6 +107,7 @@ impl EvalConfig { flag: Flag::empty(), max_warning_cnt: DEFAULT_MAX_WARNING_CNT, sql_mode: SqlMode::empty(), + paging_size: None, } } diff --git a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs index 038ce448eef..c5859e48338 100644 --- a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs @@ -126,6 +126,17 @@ impl BatchFastHashAggregationExecutor { .unwrap() } + #[cfg(test)] + pub fn new_for_test_with_config( + config: Arc, + src: Src, + group_by_exp: RpnExpression, + aggr_defs: Vec, + aggr_def_parser: impl AggrDefinitionParser, + ) -> Self { + Self::new_impl(config, src, group_by_exp, aggr_defs, aggr_def_parser).unwrap() + } + pub fn new( config: Arc, src: Src, diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 2b918186e3d..9e118f676b9 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -378,7 +378,9 @@ impl BatchExecutorsRunner { ) -> Result { let executors_len = req.get_executors().len(); let collect_exec_summary = req.get_collect_execution_summaries(); - let config = Arc::new(EvalConfig::from_request(&req)?); + let mut config = EvalConfig::from_request(&req)?; + config.paging_size = paging_size; + let config = Arc::new(config); let out_most_executor = build_executors( req.take_executors().into(), diff --git a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs index 5960caa9478..bd1e5cf8a80 100644 --- a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs @@ -104,6 +104,17 @@ impl BatchSlowHashAggregationExecutor { .unwrap() } + #[cfg(test)] + pub fn new_for_test_with_config( + config: Arc, + src: Src, + group_by_exps: Vec, + aggr_defs: Vec, + aggr_def_parser: impl AggrDefinitionParser, + ) -> Self { + Self::new_impl(config, src, group_by_exps, aggr_defs, aggr_def_parser).unwrap() + } + pub fn new( config: Arc, src: Src, diff --git a/components/tidb_query_executors/src/stream_aggr_executor.rs b/components/tidb_query_executors/src/stream_aggr_executor.rs index 705a2d8972a..52f55751442 100644 --- a/components/tidb_query_executors/src/stream_aggr_executor.rs +++ b/components/tidb_query_executors/src/stream_aggr_executor.rs @@ -123,6 +123,17 @@ impl BatchStreamAggregationExecutor { .unwrap() } + #[cfg(test)] + pub fn new_for_test_with_config( + config: Arc, + src: Src, + group_by_exps: Vec, + aggr_defs: Vec, + aggr_def_parser: impl AggrDefinitionParser, + ) -> Self { + Self::new_impl(config, src, group_by_exps, aggr_defs, aggr_def_parser).unwrap() + } + pub fn new( config: Arc, src: Src, diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index 20adbbad12c..112a3f3c33b 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -106,6 +106,35 @@ impl BatchTopNExecutor { } } + #[cfg(test)] + pub fn new_for_test_with_config( + config: Arc, + src: Src, + order_exprs: Vec, + order_is_desc: Vec, + n: usize, + ) -> Self { + assert_eq!(order_exprs.len(), order_is_desc.len()); + + let order_exprs_field_type: Vec = order_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + Self { + heap: BinaryHeap::new(), + eval_columns_buffer_unsafe: Box::new(Vec::new()), + order_exprs: order_exprs.into_boxed_slice(), + order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), + order_is_desc: order_is_desc.into_boxed_slice(), + n, + + context: EvalContext::new(config), + src, + is_ended: false, + } + } + pub fn new( config: std::sync::Arc, src: Src, @@ -296,7 +325,7 @@ impl BatchExecutor for BatchTopNExecutor { } #[inline] - fn next_batch(&mut self, _scan_rows: usize) -> BatchExecuteResult { + fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { assert!(!self.is_ended); if self.n == 0 { @@ -309,6 +338,12 @@ impl BatchExecutor for BatchTopNExecutor { }; } + if let Some(paging_size) = self.context.cfg.paging_size { + if self.n > paging_size as usize { + return self.src.next_batch(scan_rows); + } + } + let result = self.handle_next_batch(); match result { @@ -1307,4 +1342,161 @@ mod tests { ], ); } + + #[test] + fn test_top_paging() { + // Top N = 5 and PagingSize = 6, same with no-paging. + let test_top5_paging6 = |col_index: usize, is_desc: bool, expected: &[Option]| { + let mut config = EvalConfig::default(); + config.paging_size = Some(6); + let config = Arc::new(config); + let src_exec = make_src_executor_unsigned(); + let mut exec = BatchTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(col_index) + .build_for_test(), + ], + vec![is_desc], + 5, + ); + + let r = exec.next_batch(1); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = exec.next_batch(1); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = exec.next_batch(1); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[col_index].decoded().to_int_vec(), + expected + ); + assert!(r.is_drained.unwrap()); + }; + + test_top5_paging6( + 0, + false, + &[ + None, + Some(300_u64 as i64), + Some(2000_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + ], + ); + + test_top5_paging6( + 0, + true, + &[ + Some(18_446_744_073_709_551_615_u64 as i64), + Some(18_446_744_073_709_551_613_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(2000_u64 as i64), + ], + ); + + test_top5_paging6( + 1, + false, + &[ + None, + Some(-9_223_372_036_854_775_808), + Some(-3), + Some(-1), + Some(300), + ], + ); + + test_top5_paging6( + 1, + true, + &[ + Some(9_223_372_036_854_775_807), + Some(2000), + Some(300), + Some(-1), + Some(-3), + ], + ); + + test_top5_paging6( + 2, + false, + &[ + None, + Some(300_u32 as i64), + Some(2000_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2_147_483_648_u32 as i64), + ], + ); + + test_top5_paging6( + 2, + true, + &[ + Some(4_294_967_295_u32 as i64), + Some(4_294_967_295_u32 as i64), + Some(2_147_483_648_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2000_u32 as i64), + ], + ); + + // Top N = 5 and PagingSize = 4, return all data and do nothing. + let test_top5_paging4 = |build_src_executor: fn() -> MockExecutor| { + let mut config = EvalConfig::default(); + config.paging_size = Some(4); + let config = Arc::new(config); + let src_exec = build_src_executor(); + let mut exec = BatchTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + vec![false], + 5, + ); + let mut exec2 = build_src_executor(); + + loop { + let r1 = exec.next_batch(1); + let r2 = exec2.next_batch(1); + assert_eq!(r1.logical_rows, r2.logical_rows); + assert_eq!( + r1.physical_columns.rows_len(), + r2.physical_columns.rows_len() + ); + assert_eq!( + r1.physical_columns.columns_len(), + r2.physical_columns.columns_len() + ); + let r1_is_drained = r1.is_drained.unwrap(); + assert_eq!(r1_is_drained, r2.is_drained.unwrap()); + if r1_is_drained { + break; + } + } + }; + + test_top5_paging4(make_src_executor_unsigned); + test_top5_paging4(make_src_executor); + test_top5_paging4(make_bytes_src_executor); + } } diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index 96c67e1b4d8..74a9429b390 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -121,6 +121,7 @@ pub struct AggregationExecutor, + required_row: Option, } impl> AggregationExecutor { @@ -185,6 +186,7 @@ impl> AggregationExecutor> AggregationExecutor> AggregationExecutor= required_row as usize { + src_is_drained = true + } + // StreamAgg will return groups_len - 1 rows immediately + if !src_is_drained && self.imp.is_partial_results_ready() { + self.required_row = Some(required_row + 1 - self.imp.groups_len() as u64) + } + } + // aggregate result is always available when source is drained let result = if src_is_drained || self.imp.is_partial_results_ready() { Some(self.aggregate_partial_results(src_is_drained)?) @@ -468,4 +480,193 @@ pub mod tests { ], ) } + + /// Builds an executor that will return these logical data: + /// + /// == Schema == + /// Col0(Real) Col1(Real) + /// == Call #1 == + /// NULL 1.0 + /// 7.0 2.0 + /// NULL NULL + /// NULL 4.5 + /// == Call #2 == + /// == Call #3 == + /// 1.5 4.5 + /// 6.0 6.0 + /// == Call #4 == + /// 6.0 6.0 + /// 7.0 7.0 + /// (drained) + pub fn make_src_executor_2() -> MockExecutor { + MockExecutor::new( + vec![FieldTypeTp::Double.into(), FieldTypeTp::Double.into()], + vec![ + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Real( + vec![None, None, None, Real::new(-5.0).ok(), Real::new(7.0).ok()] + .into(), + ), + VectorValue::Real( + vec![ + None, + Real::new(4.5).ok(), + Real::new(1.0).ok(), + None, + Real::new(2.0).ok(), + ] + .into(), + ), + ]), + logical_rows: vec![2, 4, 0, 1], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Real(vec![None].into()), + VectorValue::Real(vec![Real::new(-10.0).ok()].into()), + ]), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Real( + vec![ + Real::new(5.5).ok(), + Real::new(1.5).ok(), + Real::new(6.0).ok(), + ] + .into(), + ), + VectorValue::Real( + vec![None, Real::new(4.5).ok(), Real::new(6.0).ok()].into(), + ), + ]), + logical_rows: vec![1, 2], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Real(vec![Real::new(7.0).ok(), Real::new(6.0).ok()].into()), + VectorValue::Real(vec![Real::new(7.0).ok(), Real::new(6.0).ok()].into()), + ]), + logical_rows: vec![1, 0], + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }, + ], + ) + } + + #[test] + #[allow(clippy::type_complexity)] + fn test_agg_paging() { + use std::sync::Arc; + + use tidb_query_datatype::expr::EvalConfig; + use tidb_query_expr::RpnExpressionBuilder; + use tipb::ExprType; + use tipb_helper::ExprDefBuilder; + + use crate::{ + BatchFastHashAggregationExecutor, BatchSlowHashAggregationExecutor, + BatchStreamAggregationExecutor, + }; + + let group_by_exp = || { + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test() + }; + + let aggr_definitions = vec![ + ExprDefBuilder::aggr_func(ExprType::Count, FieldTypeTp::LongLong) + .push_child(ExprDefBuilder::constant_int(1)) + .build(), + ]; + + let exec_fast = |src_exec, paging_size| { + let mut config = EvalConfig::default(); + config.paging_size = paging_size; + let config = Arc::new(config); + Box::new(BatchFastHashAggregationExecutor::new_for_test_with_config( + config, + src_exec, + group_by_exp(), + aggr_definitions.clone(), + AllAggrDefinitionParser, + )) as Box> + }; + + let exec_slow = |src_exec, paging_size| { + let mut config = EvalConfig::default(); + config.paging_size = paging_size; + let config = Arc::new(config); + Box::new(BatchSlowHashAggregationExecutor::new_for_test_with_config( + config, + src_exec, + vec![group_by_exp()], + aggr_definitions.clone(), + AllAggrDefinitionParser, + )) as Box> + }; + + let test_paging_size = vec![2, 5, 7]; + let expect_call_num = vec![1, 3, 4]; + let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; + let executor_builders: Vec) -> _>> = + vec![Box::new(exec_fast), Box::new(exec_slow)]; + for test_case in 0..test_paging_size.len() { + let paging_size = test_paging_size[test_case]; + let call_num = expect_call_num[test_case]; + let row_num = &expect_row_num[test_case]; + for exec_builder in &executor_builders { + let src_exec = make_src_executor_2(); + let mut exec = exec_builder(src_exec, Some(paging_size)); + for nth_call in 0..call_num { + let r = exec.next_batch(1); + if nth_call == call_num - 1 { + assert!(r.is_drained.unwrap()); + } else { + assert!(!r.is_drained.unwrap()); + } + assert_eq!(r.physical_columns.rows_len(), row_num[nth_call]); + } + } + } + + let expect_row_num2 = vec![vec![4], vec![3, 0, 2], vec![3, 0, 1, 2]]; + let exec_stream = |src_exec, paging_size| { + let mut config = EvalConfig::default(); + config.paging_size = paging_size; + let config = Arc::new(config); + Box::new(BatchStreamAggregationExecutor::new_for_test_with_config( + config, + src_exec, + vec![group_by_exp()], + aggr_definitions.clone(), + AllAggrDefinitionParser, + )) as Box> + }; + for test_case in 0..test_paging_size.len() { + let paging_size = test_paging_size[test_case]; + let call_num = expect_call_num[test_case]; + let row_num = &expect_row_num2[test_case]; + let mut exec = exec_stream(make_src_executor_2(), Some(paging_size)); + for nth_call in 0..call_num { + let r = exec.next_batch(1); + if nth_call == call_num - 1 { + assert!(r.is_drained.unwrap()); + } else { + assert!(!r.is_drained.unwrap()); + } + assert_eq!(r.physical_columns.rows_len(), row_num[nth_call]); + } + } + } } From 931c9fb2f14c0624583421a56740179fc4f158ab Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Wed, 22 Jun 2022 14:22:37 +0800 Subject: [PATCH 0040/1149] raftstore: Implement coprocessor observer on_empty_cmd (#12851) ref tikv/tikv#12849 Support new observers on_empty_cmd. Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/dispatcher.rs | 20 +++++++++++++++++++ components/raftstore/src/coprocessor/mod.rs | 3 +++ components/raftstore/src/store/fsm/apply.rs | 5 ++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 8c8b857a47b..3f51dd918c6 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -342,6 +342,16 @@ impl CoprocessorHost { CoprocessorHost { registry, cfg } } + pub fn on_empty_cmd(&self, region: &Region, index: u64, term: u64) { + loop_ob!( + region, + &self.registry.query_observers, + on_empty_cmd, + index, + term, + ); + } + /// Call all propose hooks until bypass is set to true. pub fn pre_propose(&self, region: &Region, req: &mut RaftCmdRequest) -> Result<()> { if !req.has_admin_request() { @@ -623,6 +633,11 @@ mod tests { self.called.fetch_add(6, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } + + fn on_empty_cmd(&self, ctx: &mut ObserverContext<'_>, _index: u64, _term: u64) { + self.called.fetch_add(14, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + } } impl RoleObserver for TestCoprocessor { @@ -748,6 +763,11 @@ mod tests { host.on_flush_applied_cmd_batch(cb.level, vec![cb], &PanicEngine); // `post_apply` + `on_flush_applied_cmd_batch` => 13 + 6 = 19 assert_all!([&ob.called], &[74]); + + let mut empty_req = RaftCmdRequest::default(); + empty_req.set_requests(vec![Request::default()].into()); + host.on_empty_cmd(®ion, 0, 0); + assert_all!([&ob.called], &[88]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index a9772d948ed..39b412ce950 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -89,6 +89,9 @@ pub trait AdminObserver: Coprocessor { } pub trait QueryObserver: Coprocessor { + /// Hook when observe applying empty cmd, probably caused by leadership change. + fn on_empty_cmd(&self, _: &mut ObserverContext<'_>, _index: u64, _term: u64) {} + /// Hook to call before proposing write request. /// /// We don't propose read request, hence there is no hook for it yet. diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index e3c1172ef5b..e28c8cf2424 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1077,7 +1077,10 @@ where return self.process_raft_cmd(apply_ctx, index, term, cmd); } - // TOOD(cdc): should we observe empty cmd, aka leader change? + + // we should observe empty cmd, aka leader change, + // read index during confchange, or other situations. + apply_ctx.host.on_empty_cmd(&self.region, index, term); self.apply_state.set_applied_index(index); self.applied_index_term = term; From 595ae3fcc27183ce78e38211a2bab172c0d21a88 Mon Sep 17 00:00:00 2001 From: BornChanger <97348524+BornChanger@users.noreply.github.com> Date: Thu, 23 Jun 2022 20:20:37 +0800 Subject: [PATCH 0041/1149] *: support tune quota limiter for auto analyze at execution time (#12679) close tikv/tikv#12503 Signed-off-by: BornChanger --- components/server/src/server.rs | 99 +++++- components/test_raftstore/src/server.rs | 4 + components/tidb_query_executors/src/runner.rs | 2 +- components/tikv_util/src/metrics/mod.rs | 3 + components/tikv_util/src/quota_limiter.rs | 321 ++++++++++++++---- components/tikv_util/src/sys/cpu_time.rs | 238 ++++++++++++- etc/config-template.toml | 11 +- src/config.rs | 62 +++- src/coprocessor/endpoint.rs | 1 + src/coprocessor/mod.rs | 2 + src/coprocessor/statistics/analyze.rs | 27 +- src/server/service/diagnostics/sys.rs | 8 +- src/storage/mod.rs | 4 +- src/storage/txn/scheduler.rs | 2 +- 14 files changed, 693 insertions(+), 91 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 11f6071dbc6..cded99edfe3 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -108,8 +108,9 @@ use tikv_util::{ check_environment_variables, config::{ensure_dir_exist, RaftDataStateMachine, VersionTrack}, math::MovingAvgU32, + metrics::INSTANCE_BACKEND_CPU_QUOTA, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, - sys::{disk, register_memory_usage_high_water, SysQuota}, + sys::{cpu_time::ProcessStat, disk, register_memory_usage_high_water, SysQuota}, thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, @@ -121,6 +122,19 @@ use crate::{ tikv_util::sys::thread::ThreadBuildWrapper, }; +// minimum number of core kept for background requests +const BACKGROUND_REQUEST_CORE_LOWER_BOUND: f64 = 1.0; +// max ratio of core quota for background requests +const BACKGROUND_REQUEST_CORE_MAX_RATIO: f64 = 0.95; +// default ratio of core quota for background requests = core_number * 0.5 +const BACKGROUND_REQUEST_CORE_DEFAULT_RATIO: f64 = 0.5; +// indication of TiKV instance is short of cpu +const SYSTEM_BUSY_THRESHOLD: f64 = 0.80; +// indication of TiKV instance in healthy state when cpu usage is in [0.5, 0.80) +const SYSTEM_HEALTHY_THRESHOLD: f64 = 0.50; +// pace of cpu quota adjustment +const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu + #[inline] fn run_impl(config: TiKvConfig) { let mut tikv = TiKvServer::::init(config); @@ -144,6 +158,7 @@ fn run_impl(config: TiKvConfig) { tikv.init_storage_stats_task(engines); tikv.run_server(server_config); tikv.run_status_server(); + tikv.init_quota_tuning_task(tikv.quota_limiter.clone()); signal_handler::wait_for_signal(Some(tikv.engines.take().unwrap().engines)); tikv.stop(); @@ -185,6 +200,7 @@ const DEFAULT_METRICS_FLUSH_INTERVAL: Duration = Duration::from_millis(10_000); const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); +const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); /// A complete TiKV server. struct TiKvServer { @@ -279,11 +295,16 @@ impl TiKvServer { let latest_ts = block_on(pd_client.get_tso()).expect("failed to get timestamp from PD"); let concurrency_manager = ConcurrencyManager::new(latest_ts); + // use different quota for front-end and back-end requests let quota_limiter = Arc::new(QuotaLimiter::new( config.quota.foreground_cpu_time, config.quota.foreground_write_bandwidth, config.quota.foreground_read_bandwidth, + config.quota.background_cpu_time, + config.quota.background_write_bandwidth, + config.quota.background_read_bandwidth, config.quota.max_delay_duration, + config.quota.enable_auto_tune, )); TiKvServer { @@ -1222,6 +1243,82 @@ impl TiKvServer { }); } + // Only background cpu quota tuning is implemented at present. iops and frontend quota tuning is on the way + fn init_quota_tuning_task(&self, quota_limiter: Arc) { + // No need to do auto tune when capacity is really low + if SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO + < BACKGROUND_REQUEST_CORE_LOWER_BOUND + { + return; + }; + + // Determine the base cpu quota + let base_cpu_quota = { + // if cpu quota is not specified, start from optimistic case + if quota_limiter.cputime_limiter(false).is_infinite() { + let quota = 1000_f64 + * f64::max( + BACKGROUND_REQUEST_CORE_LOWER_BOUND, + SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_DEFAULT_RATIO, + ); + quota_limiter.set_cpu_time_limit(quota as usize, false); + quota + } else { + quota_limiter.cputime_limiter(false) / 1000_f64 + } + }; + + // Calculate the celling and floor quota + let celling_quota = f64::min( + base_cpu_quota * 2.0, + 1_000_f64 * SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO, + ); + let floor_quota = f64::max( + base_cpu_quota * 0.5, + 1_000_f64 * BACKGROUND_REQUEST_CORE_LOWER_BOUND, + ); + + let mut proc_stats: ProcessStat = ProcessStat::cur_proc_stat().unwrap(); + self.background_worker.spawn_interval_task( + DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL, + move || { + if quota_limiter.auto_tune_enabled() { + let old_quota = quota_limiter.cputime_limiter(false) / 1000_f64; + let cpu_usage = match proc_stats.cpu_usage() { + Ok(r) => r, + Err(_e) => 0.0, + }; + // Try tuning quota when cpu_usage is correctly collected. + // rule based tuning: + // 1) if instance is busy, shrink cpu quota for analyze by one quota pace until lower bound is hit; + // 2) if instance cpu usage is healthy, no op; + // 3) if instance is idle, increase cpu quota by one quota pace until upper bound is hit. + if cpu_usage > 0.0f64 { + let mut target_quota = old_quota; + + let cpu_util = cpu_usage / SysQuota::cpu_cores_quota(); + if cpu_util >= SYSTEM_BUSY_THRESHOLD { + target_quota = + f64::max(target_quota - CPU_QUOTA_ADJUSTMENT_PACE, floor_quota); + } else if cpu_util < SYSTEM_HEALTHY_THRESHOLD { + target_quota = + f64::min(target_quota + CPU_QUOTA_ADJUSTMENT_PACE, celling_quota); + } + + if old_quota != target_quota { + quota_limiter.set_cpu_time_limit(target_quota as usize, false); + debug!( + "cpu_time_limiter tuned for backend request"; + "cpu_util" => ?cpu_util, + "new quota" => ?target_quota); + INSTANCE_BACKEND_CPU_QUOTA.set(target_quota as i64); + } + } + } + }, + ); + } + fn init_storage_stats_task(&self, engines: Engines) { let config_disk_capacity: u64 = self.config.raft_store.capacity.0; let data_dir = self.config.storage.data_dir.clone(); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 88e0b079a4d..ac6a72e3a06 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -381,7 +381,11 @@ impl ServerCluster { cfg.quota.foreground_cpu_time, cfg.quota.foreground_write_bandwidth, cfg.quota.foreground_read_bandwidth, + cfg.quota.background_cpu_time, + cfg.quota.background_write_bandwidth, + cfg.quota.background_read_bandwidth, cfg.quota.max_delay_duration, + cfg.quota.enable_auto_tune, )); let store = create_raft_storage::<_, _, _, F>( engine, diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 9e118f676b9..dc88c1f6993 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -465,7 +465,7 @@ impl BatchExecutorsRunner { )? }; - let quota_delay = self.quota_limiter.async_consume(sample).await; + let quota_delay = self.quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { NON_TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(ThrottleType::dag) diff --git a/components/tikv_util/src/metrics/mod.rs b/components/tikv_util/src/metrics/mod.rs index 46cc9931048..4b5a9abc2f7 100644 --- a/components/tikv_util/src/metrics/mod.rs +++ b/components/tikv_util/src/metrics/mod.rs @@ -77,6 +77,7 @@ make_auto_flush_static_metric! { pub label_enum ThrottleType { dag, analyze_full_sampling, + quota_limiter_auto_tuned, } pub struct NonTxnCommandThrottleTimeCounterVec: LocalIntCounter { @@ -102,6 +103,8 @@ lazy_static! { NON_TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC, NonTxnCommandThrottleTimeCounterVec ); + pub static ref INSTANCE_BACKEND_CPU_QUOTA: IntGauge = + register_int_gauge!("tikv_backend_cpu_quota", "cpu quota for backend request").unwrap(); } pub fn convert_record_pairs(m: HashMap) -> RecordPairVec { diff --git a/components/tikv_util/src/quota_limiter.rs b/components/tikv_util/src/quota_limiter.rs index 6179ab75da6..c9a761f49de 100644 --- a/components/tikv_util/src/quota_limiter.rs +++ b/components/tikv_util/src/quota_limiter.rs @@ -2,7 +2,7 @@ use std::{ sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicU64, Ordering}, Arc, }, time::Duration, @@ -22,15 +22,59 @@ use super::{ // It's better to use a universal approach. const CPU_LIMITER_REFILL_DURATION: Duration = Duration::from_millis(100); -// Quota limiter allows users to obtain stable performance by increasing the -// completion time of tasks through restrictions of different metrics. +// Limter can be issued to cpu, write and read bandwidth #[derive(Debug)] -pub struct QuotaLimiter { +pub struct LimiterItems { cputime_limiter: Limiter, write_bandwidth_limiter: Limiter, read_bandwidth_limiter: Limiter, +} + +impl LimiterItems { + pub fn new( + cpu_quota: usize, + write_bandwidth: ReadableSize, + read_bandwidth: ReadableSize, + ) -> Self { + let cputime_limiter = + Limiter::builder(QuotaLimiter::speed_limit(cpu_quota as f64 * 1000_f64)) + .refill(CPU_LIMITER_REFILL_DURATION) + .build(); + + let write_bandwidth_limiter = + Limiter::new(QuotaLimiter::speed_limit(write_bandwidth.0 as f64)); + + let read_bandwidth_limiter = + Limiter::new(QuotaLimiter::speed_limit(read_bandwidth.0 as f64)); + + Self { + cputime_limiter, + write_bandwidth_limiter, + read_bandwidth_limiter, + } + } +} + +impl Default for LimiterItems { + fn default() -> Self { + Self { + cputime_limiter: Limiter::new(f64::INFINITY), + write_bandwidth_limiter: Limiter::new(f64::INFINITY), + read_bandwidth_limiter: Limiter::new(f64::INFINITY), + } + } +} + +// Quota limiter allows users to obtain stable performance by increasing the +// completion time of tasks through restrictions of different metrics. +#[derive(Debug)] +pub struct QuotaLimiter { + foreground_limiters: LimiterItems, + background_limiters: LimiterItems, // max delay nano seconds max_delay_duration: AtomicU64, + // if auto tune is enabled + enable_auto_tune: AtomicBool, } // Throttle must be consumed in quota limiter. @@ -86,11 +130,13 @@ impl<'a> Drop for CpuObserveGuard<'a> { impl Default for QuotaLimiter { fn default() -> Self { + let foreground_limiters = LimiterItems::default(); + let background_limiters = LimiterItems::default(); Self { - cputime_limiter: Limiter::new(f64::INFINITY), - write_bandwidth_limiter: Limiter::new(f64::INFINITY), - read_bandwidth_limiter: Limiter::new(f64::INFINITY), + foreground_limiters, + background_limiters, max_delay_duration: AtomicU64::new(0), + enable_auto_tune: AtomicBool::new(false), } } } @@ -98,26 +144,33 @@ impl Default for QuotaLimiter { impl QuotaLimiter { // 1000 millicpu equals to 1vCPU, 0 means unlimited pub fn new( - cpu_quota: usize, - write_bandwidth: ReadableSize, - read_bandwidth: ReadableSize, + foreground_cpu_quota: usize, + foreground_write_bandwidth: ReadableSize, + foreground_read_bandwidth: ReadableSize, + background_cpu_quota: usize, + background_write_bandwidth: ReadableSize, + background_read_bandwidth: ReadableSize, max_delay_duration: ReadableDuration, + enable_auto_tune: bool, ) -> Self { - let cputime_limiter = Limiter::builder(Self::speed_limit(cpu_quota as f64 * 1000_f64)) - .refill(CPU_LIMITER_REFILL_DURATION) - .build(); - - let write_bandwidth_limiter = Limiter::new(Self::speed_limit(write_bandwidth.0 as f64)); - - let read_bandwidth_limiter = Limiter::new(Self::speed_limit(read_bandwidth.0 as f64)); - + let foreground_limiters = LimiterItems::new( + foreground_cpu_quota, + foreground_write_bandwidth, + foreground_read_bandwidth, + ); + let background_limiters = LimiterItems::new( + background_cpu_quota, + background_write_bandwidth, + background_read_bandwidth, + ); let max_delay_duration = AtomicU64::new(max_delay_duration.0.as_nanos() as u64); + let enable_auto_tune = AtomicBool::new(enable_auto_tune); Self { - cputime_limiter, - write_bandwidth_limiter, - read_bandwidth_limiter, + foreground_limiters, + background_limiters, max_delay_duration, + enable_auto_tune, } } @@ -129,18 +182,30 @@ impl QuotaLimiter { } } - pub fn set_cpu_time_limit(&self, quota_limit: usize) { - self.cputime_limiter + #[inline] + fn get_limiters(&self, is_foreground: bool) -> &LimiterItems { + if is_foreground { + &self.foreground_limiters + } else { + &self.background_limiters + } + } + + pub fn set_cpu_time_limit(&self, quota_limit: usize, is_foreground: bool) { + self.get_limiters(is_foreground) + .cputime_limiter .set_speed_limit(Self::speed_limit(quota_limit as f64 * 1000_f64)); } - pub fn set_write_bandwidth_limit(&self, write_bandwidth: ReadableSize) { - self.write_bandwidth_limiter + pub fn set_write_bandwidth_limit(&self, write_bandwidth: ReadableSize, is_foreground: bool) { + self.get_limiters(is_foreground) + .write_bandwidth_limiter .set_speed_limit(Self::speed_limit(write_bandwidth.0 as f64)); } - pub fn set_read_bandwidth_limit(&self, read_bandwidth: ReadableSize) { - self.read_bandwidth_limiter + pub fn set_read_bandwidth_limit(&self, read_bandwidth: ReadableSize, is_foreground: bool) { + self.get_limiters(is_foreground) + .read_bandwidth_limiter .set_speed_limit(Self::speed_limit(read_bandwidth.0 as f64)); } @@ -149,39 +214,68 @@ impl QuotaLimiter { .store(duration.0.as_nanos() as u64, Ordering::Relaxed); } + pub fn set_enable_auto_tune(&self, enable_auto_tune: bool) { + self.enable_auto_tune + .store(enable_auto_tune, Ordering::Relaxed); + } + + pub fn cputime_limiter(&self, is_foreground: bool) -> f64 { + self.get_limiters(is_foreground) + .cputime_limiter + .speed_limit() + } + fn max_delay_duration(&self) -> Duration { Duration::from_nanos(self.max_delay_duration.load(Ordering::Relaxed)) } + pub fn auto_tune_enabled(&self) -> bool { + self.enable_auto_tune.load(Ordering::Relaxed) + } + // To generate a sampler. pub fn new_sample(&self) -> Sample { Sample { read_bytes: 0, write_bytes: 0, cpu_time: Duration::ZERO, - enable_cpu_limit: !self.cputime_limiter.speed_limit().is_infinite(), + enable_cpu_limit: !self + .foreground_limiters + .cputime_limiter + .speed_limit() + .is_infinite() + || !self + .background_limiters + .cputime_limiter + .speed_limit() + .is_infinite(), } } // To consume a sampler and return delayed duration. // If the sampler is null, the speed limiter will just return ZERO. - pub async fn async_consume(&self, sample: Sample) -> Duration { + pub async fn consume_sample(&self, sample: Sample, is_foreground: bool) -> Duration { + let limiters = self.get_limiters(is_foreground); + let cpu_dur = if sample.cpu_time > Duration::ZERO { - self.cputime_limiter + limiters + .cputime_limiter .consume_duration(sample.cpu_time.as_micros() as usize) } else { Duration::ZERO }; let w_bw_dur = if sample.write_bytes > 0 { - self.write_bandwidth_limiter + limiters + .write_bandwidth_limiter .consume_duration(sample.write_bytes) } else { Duration::ZERO }; let r_bw_dur = if sample.read_bytes > 0 { - self.read_bandwidth_limiter + limiters + .read_bandwidth_limiter .consume_duration(sample.read_bytes) } else { Duration::ZERO @@ -206,12 +300,12 @@ impl QuotaLimiter { } pub struct QuotaLimitConfigManager { - limiter: Arc, + quota_limiter: Arc, } impl QuotaLimitConfigManager { - pub fn new(limiter: Arc) -> Self { - Self { limiter } + pub fn new(quota_limiter: Arc) -> Self { + Self { quota_limiter } } } @@ -221,22 +315,46 @@ impl ConfigManager for QuotaLimitConfigManager { change: ConfigChange, ) -> std::result::Result<(), Box> { if let Some(cpu_limit) = change.get("foreground_cpu_time") { - self.limiter.set_cpu_time_limit(cpu_limit.into()); + self.quota_limiter + .set_cpu_time_limit(cpu_limit.into(), true); } + if let Some(write_bandwidth) = change.get("foreground_write_bandwidth") { - self.limiter - .set_write_bandwidth_limit(write_bandwidth.clone().into()) + self.quota_limiter + .set_write_bandwidth_limit(write_bandwidth.clone().into(), true); } + if let Some(read_bandwidth) = change.get("foreground_read_bandwidth") { - self.limiter - .set_write_bandwidth_limit(read_bandwidth.clone().into()); + self.quota_limiter + .set_read_bandwidth_limit(read_bandwidth.clone().into(), true); + } + + if let Some(cpu_limit) = change.get("background_cpu_time") { + self.quota_limiter + .set_cpu_time_limit(cpu_limit.into(), false); + } + + if let Some(write_bandwidth) = change.get("background_write_bandwidth") { + self.quota_limiter + .set_write_bandwidth_limit(write_bandwidth.clone().into(), false); + } + + if let Some(read_bandwidth) = change.get("background_read_bandwidth") { + self.quota_limiter + .set_read_bandwidth_limit(read_bandwidth.clone().into(), false); } + if let Some(duration) = change.get("max_delay_duration") { let delay_dur: ReadableDuration = duration.clone().into(); - self.limiter + self.quota_limiter .max_delay_duration .store(delay_dur.0.as_nanos() as u64, Ordering::Relaxed); } + + if let Some(enable_auto_tune) = change.get("enable_auto_tune") { + self.quota_limiter + .set_enable_auto_tune(enable_auto_tune.clone().into()); + } Ok(()) } } @@ -252,10 +370,14 @@ mod tests { // refill duration = 100ms // bucket capacity = 100 let quota_limiter = QuotaLimiter::new( + 1000, + ReadableSize::kb(1), + ReadableSize::kb(1), 1000, ReadableSize::kb(1), ReadableSize::kb(1), ReadableDuration::millis(0), + false, ); let thread_start_time = ThreadTime::now(); @@ -269,81 +391,160 @@ mod tests { let mut sample = quota_limiter.new_sample(); sample.add_cpu_time(Duration::from_millis(60)); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::ZERO); + + let mut sample = quota_limiter.new_sample(); + sample.add_cpu_time(Duration::from_millis(50)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(110)); + + std::thread::sleep(Duration::from_millis(10)); + + let mut sample = quota_limiter.new_sample(); + sample.add_cpu_time(Duration::from_millis(20)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + // should less 60+50+20 + assert!(should_delay < Duration::from_millis(130)); + + let mut sample = quota_limiter.new_sample(); + sample.add_cpu_time(Duration::from_millis(200)); + sample.add_write_bytes(256); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(250)); + + // ThreadTime elapsed time is not long. + assert!(thread_start_time.elapsed() < Duration::from_millis(50)); + + quota_limiter.set_cpu_time_limit(2000, true); + let mut sample = quota_limiter.new_sample(); + sample.add_cpu_time(Duration::from_millis(200)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(100)); + + quota_limiter.set_read_bandwidth_limit(ReadableSize(512), true); + let mut sample = quota_limiter.new_sample(); + sample.add_read_bytes(128); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(250)); + + quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(2), true); + let mut sample = quota_limiter.new_sample(); + sample.add_write_bytes(256); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(125)); + + quota_limiter.set_max_delay_duration(ReadableDuration::millis(40)); + let mut sample = quota_limiter.new_sample(); + sample.add_read_bytes(256); + sample.add_write_bytes(512); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(40)); + + // test change limiter to 0 + quota_limiter.set_cpu_time_limit(0, true); + let mut sample = quota_limiter.new_sample(); + sample.add_cpu_time(Duration::from_millis(100)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::ZERO); + + quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(0), true); + let mut sample = quota_limiter.new_sample(); + sample.add_write_bytes(256); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::ZERO); + + quota_limiter.set_read_bandwidth_limit(ReadableSize::kb(0), true); + let mut sample = quota_limiter.new_sample(); + sample.add_read_bytes(256); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::ZERO); + + // set bandwidth back + quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(1), true); + quota_limiter.set_max_delay_duration(ReadableDuration::millis(0)); + let mut sample = quota_limiter.new_sample(); + sample.add_write_bytes(128); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + check_duration(should_delay, Duration::from_millis(125)); + + let mut sample = quota_limiter.new_sample(); + sample.add_cpu_time(Duration::from_millis(60)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); let mut sample = quota_limiter.new_sample(); sample.add_cpu_time(Duration::from_millis(50)); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(110)); std::thread::sleep(Duration::from_millis(10)); let mut sample = quota_limiter.new_sample(); sample.add_cpu_time(Duration::from_millis(20)); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); // should less 60+50+20 assert!(should_delay < Duration::from_millis(130)); let mut sample = quota_limiter.new_sample(); sample.add_cpu_time(Duration::from_millis(200)); sample.add_write_bytes(256); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(250)); // ThreadTime elapsed time is not long. assert!(thread_start_time.elapsed() < Duration::from_millis(50)); - quota_limiter.set_cpu_time_limit(2000); + quota_limiter.set_cpu_time_limit(2000, false); let mut sample = quota_limiter.new_sample(); sample.add_cpu_time(Duration::from_millis(200)); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(100)); - quota_limiter.set_read_bandwidth_limit(ReadableSize(512)); + quota_limiter.set_read_bandwidth_limit(ReadableSize(512), false); let mut sample = quota_limiter.new_sample(); sample.add_read_bytes(128); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(250)); - quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(2)); + quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(2), false); let mut sample = quota_limiter.new_sample(); sample.add_write_bytes(256); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(125)); quota_limiter.set_max_delay_duration(ReadableDuration::millis(40)); let mut sample = quota_limiter.new_sample(); sample.add_read_bytes(256); sample.add_write_bytes(512); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(40)); // test change limiter to 0 - quota_limiter.set_cpu_time_limit(0); + quota_limiter.set_cpu_time_limit(0, false); let mut sample = quota_limiter.new_sample(); sample.add_cpu_time(Duration::from_millis(100)); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); - quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(0)); + quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(0), false); let mut sample = quota_limiter.new_sample(); sample.add_write_bytes(256); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); - quota_limiter.set_read_bandwidth_limit(ReadableSize::kb(0)); + quota_limiter.set_read_bandwidth_limit(ReadableSize::kb(0), false); let mut sample = quota_limiter.new_sample(); sample.add_read_bytes(256); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); // set bandwidth back - quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(1)); + quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(1), false); quota_limiter.set_max_delay_duration(ReadableDuration::millis(0)); let mut sample = quota_limiter.new_sample(); sample.add_write_bytes(128); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(125)); } } diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index ff9515168c7..69fbb2fb251 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -2,12 +2,15 @@ // Modified from https://github.com/rust-lang/cargo/blob/426fae51f39ebf6c545a2c12f78bc09fbfdb7aa9/src/cargo/util/cpu.rs // TODO: Maybe use https://github.com/heim-rs/heim is better after https://github.com/heim-rs/heim/issues/233 is fixed. -use std::io; +use std::{ + io, mem, + time::{Duration, Instant}, +}; use derive_more::{Add, Sub}; -#[derive(Debug, Clone, Copy, Add, Sub)] -pub struct LiunxStyleCpuTime { +#[derive(Add, Sub)] +pub struct LinuxStyleCpuTime { pub user: u64, pub nice: u64, pub system: u64, @@ -20,7 +23,7 @@ pub struct LiunxStyleCpuTime { pub guest_nice: u64, } -impl LiunxStyleCpuTime { +impl LinuxStyleCpuTime { pub fn total(&self) -> u64 { // Note: guest(_nice) is not counted, since it is already in user. // See https://unix.stackexchange.com/questions/178045/proc-stat-is-guest-counted-into-user-time @@ -34,19 +37,57 @@ impl LiunxStyleCpuTime { + self.steal } - pub fn current() -> io::Result { + pub fn current() -> io::Result { imp::current() } } +pub use std::io::Result; + +pub use imp::cpu_time; + +/// A struct to monitor process cpu usage +#[derive(Clone, Copy)] +pub struct ProcessStat { + current_time: Instant, + cpu_time: Duration, +} + +impl ProcessStat { + pub fn cur_proc_stat() -> io::Result { + Ok(ProcessStat { + current_time: Instant::now(), + cpu_time: imp::cpu_time()?, + }) + } + + /// return the cpu usage from last invoke, + /// or when this struct created if it is the first invoke. + pub fn cpu_usage(&mut self) -> io::Result { + let new_time = imp::cpu_time()?; + let old_time = mem::replace(&mut self.cpu_time, new_time); + + let old_now = mem::replace(&mut self.current_time, Instant::now()); + let real_time = self.current_time.duration_since(old_now).as_secs_f64(); + + if real_time > 0.0 { + let cpu_time = new_time + .checked_sub(old_time) + .map(|dur| dur.as_secs_f64()) + .unwrap_or(0.0); + + Ok(cpu_time / real_time) + } else { + Ok(0.0) + } + } +} + #[cfg(target_os = "linux")] mod imp { - use std::{ - fs::File, - io::{self, Read}, - }; + use std::{fs::File, io, io::Read, time::Duration}; - pub fn current() -> io::Result { + pub fn current() -> io::Result { let mut state = String::new(); File::open("/proc/stat")?.read_to_string(&mut state)?; @@ -55,7 +96,7 @@ mod imp { if parts.next()? != "cpu" { return None; } - Some(super::LiunxStyleCpuTime { + Some(super::LinuxStyleCpuTime { user: parts.next()?.parse::().ok()?, nice: parts.next()?.parse::().ok()?, system: parts.next()?.parse::().ok()?, @@ -70,6 +111,19 @@ mod imp { })() .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "first line of /proc/stat malformed")) } + + pub fn cpu_time() -> io::Result { + let mut time = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + + if unsafe { libc::clock_gettime(libc::CLOCK_PROCESS_CPUTIME_ID, &mut time) } == 0 { + Ok(Duration::new(time.tv_sec as u64, time.tv_nsec as u32)) + } else { + Err(io::Error::last_os_error()) + } + } } #[cfg(target_os = "macos")] @@ -78,7 +132,7 @@ mod imp { use libc::*; - pub fn current() -> io::Result { + pub fn current() -> io::Result { // There's scant little documentation on `host_processor_info` // throughout the internet, so this is just modeled after what everyone // else is doing. For now this is modeled largely after libuv. @@ -98,7 +152,7 @@ mod imp { return Err(io::Error::from_raw_os_error(ret)); } - let mut ret = super::LiunxStyleCpuTime { + let mut ret = super::LinuxStyleCpuTime { user: 0, system: 0, idle: 0, @@ -122,16 +176,172 @@ mod imp { Ok(ret) } } + + pub fn cpu_time() -> io::Result { + let mut time = unsafe { std::mem::zeroed() }; + + if unsafe { libc::getrusage(libc::RUSAGE_SELF, &mut time) } == 0 { + let sec = time.ru_utime.tv_sec as u64 + time.ru_stime.tv_sec as u64; + let nsec = (time.ru_utime.tv_usec as u32 + time.ru_stime.tv_usec as u32) * 1000; + + Ok(std::time::Duration::new(sec, nsec)) + } else { + Err(io::Error::last_os_error()) + } + } } #[cfg(not(any(target_os = "linux", target_os = "macos")))] mod imp { use std::io; - pub fn current() -> io::Result { + pub fn current() -> io::Result { Err(io::Error::new( io::ErrorKind::Other, "unsupported platform to learn CPU state", )) } + + use std::{io, mem, time::Duration}; + + use scopeguard::defer; + use winapi::{ + shared::{ + minwindef::FILETIME, + ntdef::{FALSE, NULL}, + }, + um::{ + handleapi::CloseHandle, + processthreadsapi::{ + GetCurrentProcess, GetCurrentThreadId, GetProcessTimes, GetSystemTimes, + GetThreadTimes, OpenThread, + }, + sysinfoapi::{GetSystemInfo, SYSTEM_INFO}, + winnt::THREAD_QUERY_INFORMATION, + }, + }; + + /// convert to u64, unit 100 ns + fn filetime_to_ns100(ft: FILETIME) -> u64 { + ((ft.dwHighDateTime as u64) << 32) + ft.dwLowDateTime as u64 + } + + fn get_sys_times() -> io::Result<(u64, u64, u64)> { + let mut idle = FILETIME::default(); + let mut kernel = FILETIME::default(); + let mut user = FILETIME::default(); + + let ret = unsafe { GetSystemTimes(&mut idle, &mut kernel, &mut user) }; + if ret == 0 { + return Err(io::Error::last_os_error()); + } + + let idle = filetime_to_ns100(idle); + let kernel = filetime_to_ns100(kernel); + let user = filetime_to_ns100(user); + Ok((idle, kernel, user)) + } + + fn get_thread_times(tid: u32) -> io::Result<(u64, u64)> { + let handler = unsafe { OpenThread(THREAD_QUERY_INFORMATION, FALSE as i32, tid) }; + if handler == NULL { + return Err(io::Error::last_os_error()); + } + defer! {{ + unsafe { CloseHandle(handler) }; + }} + + let mut create_time = FILETIME::default(); + let mut exit_time = FILETIME::default(); + let mut kernel_time = FILETIME::default(); + let mut user_time = FILETIME::default(); + + let ret = unsafe { + GetThreadTimes( + handler, + &mut create_time, + &mut exit_time, + &mut kernel_time, + &mut user_time, + ) + }; + if ret == 0 { + return Err(io::Error::last_os_error()); + } + + let kernel_time = filetime_to_ns100(kernel_time); + let user_time = filetime_to_ns100(user_time); + Ok((kernel_time, user_time)) + } + + #[inline] + pub fn cpu_time() -> io::Result { + let (kernel_time, user_time) = unsafe { + let process = GetCurrentProcess(); + let mut create_time = mem::zeroed(); + let mut exit_time = mem::zeroed(); + let mut kernel_time = mem::zeroed(); + let mut user_time = mem::zeroed(); + + let ret = GetProcessTimes( + process, + &mut create_time, + &mut exit_time, + &mut kernel_time, + &mut user_time, + ); + + if ret != 0 { + (kernel_time, user_time) + } else { + return Err(io::Error::last_os_error()); + } + }; + + let kt = filetime_to_ns100(kernel_time); + let ut = filetime_to_ns100(user_time); + + // convert ns + // + // Note: make it ns unit may overflow in some cases. + // For example, a machine with 128 cores runs for one year. + let cpu = (kt + ut) * 100; + + // make it un-normalized + let cpu = cpu * processor_numbers()? as u64; + + Ok(Duration::from_nanos(cpu)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // this test should be executed alone. + #[test] + fn test_process_usage() { + let mut stat = ProcessStat::cur_proc_stat().unwrap(); + + std::thread::sleep(std::time::Duration::from_secs(1)); + + let usage = stat.cpu_usage().unwrap(); + + assert!(usage < 0.01); + + let num = 1; + for _ in 0..num * 10 { + std::thread::spawn(move || { + loop { + let _ = (0..10_000_000).into_iter().sum::(); + } + }); + } + + std::thread::sleep(std::time::Duration::from_secs(1)); + + let usage = stat.cpu_usage().unwrap(); + + assert!(usage > 0.9_f64) + } } diff --git a/etc/config-template.toml b/etc/config-template.toml index ab2ffa28acf..2195e681f62 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -52,8 +52,17 @@ # foreground-write-bandwidth = "0B" ## Read bandwidth limitation for this TiKV instance, default value is 0 which means unlimited. # foreground-read-bandwidth = "0B" -## Limitation of max delay duration for each request, default value is 0 which means unlimited. +## CPU quota for these background requests can use, default value is 0, it means unlimited. +## The unit is millicpu but for now this config is approximate and soft limit. +# background-cpu-time = 0 +## Write bandwidth limitation for backgroud request for this TiKV instance, default value is 0 which means unlimited. +# background-write-bandwidth = "0B" +## Read bandwidth limitation for background request for this TiKV instance, default value is 0 which means unlimited. +# background-read-bandwidth = "0B" +## Limitation of max delay duration, default value is 0 which means unlimited. # max-delay-duration = "500ms" +## Whether to enable quota auto tune +# enable-auto-tune = false [log] ## Log levels: debug, info, warn, error, fatal. diff --git a/src/config.rs b/src/config.rs index 3ff087f129c..d3ec96f6ba4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2629,6 +2629,10 @@ pub struct QuotaConfig { pub foreground_write_bandwidth: ReadableSize, pub foreground_read_bandwidth: ReadableSize, pub max_delay_duration: ReadableDuration, + pub background_cpu_time: usize, + pub background_write_bandwidth: ReadableSize, + pub background_read_bandwidth: ReadableSize, + pub enable_auto_tune: bool, } impl Default for QuotaConfig { @@ -2638,6 +2642,10 @@ impl Default for QuotaConfig { foreground_write_bandwidth: ReadableSize(0), foreground_read_bandwidth: ReadableSize(0), max_delay_duration: ReadableDuration::millis(500), + background_cpu_time: 0, + background_write_bandwidth: ReadableSize(0), + background_read_bandwidth: ReadableSize(0), + enable_auto_tune: false, } } } @@ -4588,6 +4596,9 @@ mod tests { cfg.quota.foreground_cpu_time = 1000; cfg.quota.foreground_write_bandwidth = ReadableSize::mb(128); cfg.quota.foreground_read_bandwidth = ReadableSize::mb(256); + cfg.quota.background_cpu_time = 1000; + cfg.quota.background_write_bandwidth = ReadableSize::mb(128); + cfg.quota.background_read_bandwidth = ReadableSize::mb(256); cfg.quota.max_delay_duration = ReadableDuration::secs(1); cfg.validate().unwrap(); @@ -4595,7 +4606,11 @@ mod tests { cfg.quota.foreground_cpu_time, cfg.quota.foreground_write_bandwidth, cfg.quota.foreground_read_bandwidth, + cfg.quota.background_cpu_time, + cfg.quota.background_write_bandwidth, + cfg.quota.background_read_bandwidth, cfg.quota.max_delay_duration, + false, )); let cfg_controller = ConfigController::new(cfg.clone()); @@ -4627,7 +4642,7 @@ mod tests { let mut sample = quota_limiter.new_sample(); sample.add_read_bytes(ReadableSize::mb(32).0 as usize); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); assert_eq!(should_delay, Duration::from_millis(125)); cfg_controller @@ -4637,8 +4652,35 @@ mod tests { assert_eq!(cfg_controller.get_current(), cfg); let mut sample = quota_limiter.new_sample(); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); - let should_delay = block_on(quota_limiter.async_consume(sample)); - assert_eq!(should_delay, Duration::from_millis(250)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); + assert_eq!(should_delay, Duration::from_millis(500)); + + cfg_controller + .update_config("quota.background-cpu-time", "2000") + .unwrap(); + cfg.quota.background_cpu_time = 2000; + assert_eq!(cfg_controller.get_current(), cfg); + + cfg_controller + .update_config("quota.background-write-bandwidth", "256MB") + .unwrap(); + cfg.quota.background_write_bandwidth = ReadableSize::mb(256); + assert_eq!(cfg_controller.get_current(), cfg); + + let mut sample = quota_limiter.new_sample(); + sample.add_read_bytes(ReadableSize::mb(32).0 as usize); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); + assert_eq!(should_delay, Duration::from_millis(125)); + + cfg_controller + .update_config("quota.background-read-bandwidth", "512MB") + .unwrap(); + cfg.quota.background_read_bandwidth = ReadableSize::mb(512); + assert_eq!(cfg_controller.get_current(), cfg); + let mut sample = quota_limiter.new_sample(); + sample.add_write_bytes(ReadableSize::mb(128).0 as usize); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); + assert_eq!(should_delay, Duration::from_millis(500)); cfg_controller .update_config("quota.max-delay-duration", "50ms") @@ -4647,8 +4689,20 @@ mod tests { assert_eq!(cfg_controller.get_current(), cfg); let mut sample = quota_limiter.new_sample(); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); - let should_delay = block_on(quota_limiter.async_consume(sample)); + let should_delay = block_on(quota_limiter.consume_sample(sample, true)); assert_eq!(should_delay, Duration::from_millis(50)); + + let mut sample = quota_limiter.new_sample(); + sample.add_write_bytes(ReadableSize::mb(128).0 as usize); + let should_delay = block_on(quota_limiter.consume_sample(sample, false)); + assert_eq!(should_delay, Duration::from_millis(50)); + + assert_eq!(cfg.quota.enable_auto_tune, false); + cfg_controller + .update_config("quota.enable-auto-tune", "true") + .unwrap(); + cfg.quota.enable_auto_tune = true; + assert_eq!(cfg_controller.get_current(), cfg); } #[test] diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index fa1dce909a2..9f2507562e6 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -274,6 +274,7 @@ impl Endpoint { }); self.check_memory_locks(&req_ctx)?; + let quota_limiter = self.quota_limiter.clone(); builder = Box::new(move |snap, req_ctx| { diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 38d6dccc441..834033a60e1 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -54,6 +54,8 @@ pub const REQ_TYPE_DAG: i64 = 103; pub const REQ_TYPE_ANALYZE: i64 = 104; pub const REQ_TYPE_CHECKSUM: i64 = 105; +pub const REQ_FLAG_TIDB_SYSSESSION: u64 = 2048; + type HandlerStreamStepResult = Result<(Option, bool)>; /// An interface for all kind of Coprocessor request handlers. diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 2a8fc6ee81c..7b826487cc1 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -51,6 +51,7 @@ pub struct AnalyzeContext { ranges: Vec, storage_stats: Statistics, quota_limiter: Arc, + is_auto_analyze: bool, } impl AnalyzeContext { @@ -71,12 +72,15 @@ impl AnalyzeContext { req_ctx.access_locks.clone(), false, ); + let is_auto_analyze = req.get_flags() & REQ_FLAG_TIDB_SYSSESSION > 0; + Ok(Self { req, storage: Some(TiKvStorage::new(store, false)), ranges, storage_stats: Statistics::default(), quota_limiter, + is_auto_analyze, }) } @@ -272,8 +276,15 @@ impl RequestHandler for AnalyzeContext { let col_req = self.req.take_col_req(); let storage = self.storage.take().unwrap(); let ranges = std::mem::take(&mut self.ranges); - let mut builder = - RowSampleBuilder::new(col_req, storage, ranges, self.quota_limiter.clone())?; + + let mut builder = RowSampleBuilder::new( + col_req, + storage, + ranges, + self.quota_limiter.clone(), + self.is_auto_analyze, + )?; + let res = AnalyzeContext::handle_full_sampling(&mut builder).await; builder.data.collect_storage_stats(&mut self.storage_stats); res @@ -314,6 +325,7 @@ struct RowSampleBuilder { columns_info: Vec, column_groups: Vec, quota_limiter: Arc, + is_quota_auto_tune: bool, } impl RowSampleBuilder { @@ -322,6 +334,7 @@ impl RowSampleBuilder { storage: TiKvStorage>, ranges: Vec, quota_limiter: Arc, + is_quota_auto_tune: bool, ) -> Result { let columns_info: Vec<_> = req.take_columns_info().into(); if columns_info.is_empty() { @@ -346,6 +359,7 @@ impl RowSampleBuilder { columns_info, column_groups: req.take_column_groups().into(), quota_limiter, + is_quota_auto_tune, }) } @@ -431,7 +445,14 @@ impl RowSampleBuilder { } // Don't let analyze bandwidth limit the quota limiter, this is already limited in rate limiter. - let quota_delay = self.quota_limiter.async_consume(sample).await; + let quota_delay = { + if !self.is_quota_auto_tune { + self.quota_limiter.consume_sample(sample, true).await + } else { + self.quota_limiter.consume_sample(sample, false).await + } + }; + if !quota_delay.is_zero() { NON_TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(ThrottleType::analyze_full_sampling) diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 742b8a8cb55..c0cc3eb1c6a 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -5,13 +5,13 @@ use std::{collections::HashMap, string::ToString}; use kvproto::diagnosticspb::{ServerInfoItem, ServerInfoPair}; use tikv_util::{ config::KIB, - sys::{cpu_time::LiunxStyleCpuTime, SysQuota, *}, + sys::{cpu_time::LinuxStyleCpuTime, SysQuota, *}, }; use walkdir::WalkDir; use crate::server::service::diagnostics::{ioload, SYS_INFO}; -type CpuTimeSnapshot = Option; +type CpuTimeSnapshot = Option; #[derive(Clone, Debug)] pub struct NicSnapshot { @@ -87,7 +87,7 @@ fn cpu_load_info(prev_cpu: CpuTimeSnapshot, collector: &mut Vec) return; } - let t2 = LiunxStyleCpuTime::current(); + let t2 = LinuxStyleCpuTime::current(); if t2.is_err() { return; } @@ -265,7 +265,7 @@ fn io_load_info(prev_io: HashMap, collector: &mut Vec CpuTimeSnapshot { - let t1 = LiunxStyleCpuTime::current(); + let t1 = LinuxStyleCpuTime::current(); if t1.is_err() { return None; } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 7026ebab77d..768579f0b15 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -655,7 +655,7 @@ impl Storage { .as_ref() .map_or(0, |v| v.len()); sample.add_read_bytes(read_bytes); - let quota_delay = quota_limiter.async_consume(sample).await; + let quota_delay = quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(CMD) @@ -996,7 +996,7 @@ impl Storage { + stats.cf_statistics(CF_LOCK).flow_stats.read_bytes + stats.cf_statistics(CF_WRITE).flow_stats.read_bytes; sample.add_read_bytes(read_bytes); - let quota_delay = quota_limiter.async_consume(sample).await; + let quota_delay = quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(CMD) diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index ab866fe18bf..a9b34b9b189 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -813,7 +813,7 @@ impl Scheduler { + statistics.cf_statistics(CF_LOCK).flow_stats.read_bytes + statistics.cf_statistics(CF_WRITE).flow_stats.read_bytes; sample.add_read_bytes(read_bytes); - let quota_delay = quota_limiter.async_consume(sample).await; + let quota_delay = quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(tag) From 1d66eddeb904de6222ba9d3dd94a7bef04af0725 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 23 Jun 2022 11:26:37 -0700 Subject: [PATCH 0042/1149] raftstorev2: add bootstrapping (#12877) ref tikv/tikv#12842 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 6 + components/engine_panic/src/raft_engine.rs | 49 +++- components/engine_rocks/src/raft_engine.rs | 57 ++++- components/engine_traits/src/raft_engine.rs | 21 +- components/raft_log_engine/src/engine.rs | 84 +++++- components/raftstore-v2/Cargo.toml | 21 +- components/raftstore-v2/src/bootstrap.rs | 241 ++++++++++++++++++ components/raftstore-v2/src/fsm/mod.rs | 5 + components/raftstore-v2/src/fsm/store.rs | 2 + components/raftstore-v2/src/lib.rs | 8 +- components/raftstore-v2/src/raft/mod.rs | 2 +- components/raftstore-v2/src/raft/storage.rs | 82 +++++- components/raftstore-v2/src/router/message.rs | 2 +- .../raftstore-v2/tests/failpoints/mod.rs | 8 + .../tests/failpoints/test_bootstrap.rs | 61 +++++ components/test_pd/src/mocker/service.rs | 2 +- src/server/node.rs | 10 +- 17 files changed, 641 insertions(+), 20 deletions(-) create mode 100644 components/raftstore-v2/src/bootstrap.rs create mode 100644 components/raftstore-v2/tests/failpoints/mod.rs create mode 100644 components/raftstore-v2/tests/failpoints/test_bootstrap.rs diff --git a/Cargo.lock b/Cargo.lock index cedc1229d0b..489ef39eaec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4168,15 +4168,21 @@ version = "0.1.0" dependencies = [ "collections", "crossbeam", + "engine_test", "engine_traits", "error_code", + "fail", "kvproto", "pd_client", "raft", "raft-proto", "raftstore", "slog", + "slog-global", "smallvec", + "tempfile", + "test_pd", + "test_util", "tikv_util", ] diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 9842e1100ed..384bc60ffa6 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -1,7 +1,10 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{Error, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch, Result}; -use kvproto::raft_serverpb::RaftLocalState; +use kvproto::{ + metapb::Region, + raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, +}; use raft::eraftpb::Entry; use crate::{engine::PanicEngine, write_batch::PanicWriteBatch}; @@ -29,6 +32,26 @@ impl RaftEngineReadOnly for PanicEngine { fn get_all_entries_to(&self, region_id: u64, buf: &mut Vec) -> Result<()> { panic!() } + + fn is_empty(&self) -> Result { + panic!() + } + + fn get_store_ident(&self) -> Result> { + panic!() + } + + fn get_prepare_bootstrap_region(&self) -> Result> { + panic!() + } + + fn get_region_state(&self, raft_group_id: u64) -> Result> { + panic!() + } + + fn get_apply_state(&self, raft_group_id: u64) -> Result> { + panic!() + } } impl RaftEngineDebug for PanicEngine { @@ -114,6 +137,10 @@ impl RaftEngine for PanicEngine { fn get_engine_size(&self) -> Result { panic!() } + + fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { + panic!() + } } impl RaftLogBatch for PanicWriteBatch { @@ -140,4 +167,24 @@ impl RaftLogBatch for PanicWriteBatch { fn merge(&mut self, _: Self) -> Result<()> { panic!() } + + fn put_store_ident(&mut self, ident: &StoreIdent) -> Result<()> { + panic!() + } + + fn put_prepare_bootstrap_region(&mut self, region: &Region) -> Result<()> { + panic!() + } + + fn remove_prepare_bootstrap_region(&mut self) -> Result<()> { + panic!() + } + + fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()> { + panic!() + } + + fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { + panic!() + } } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index e081d057191..57a65ba661f 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -6,7 +6,10 @@ use engine_traits::{ RaftEngineReadOnly, RaftLogBatch, RaftLogGCTask, Result, SyncMutable, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT, RAFT_LOG_MULTI_GET_CNT, }; -use kvproto::raft_serverpb::RaftLocalState; +use kvproto::{ + metapb::Region, + raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, +}; use protobuf::Message; use raft::eraftpb::Entry; use tikv_util::{box_err, box_try}; @@ -117,6 +120,34 @@ impl RaftEngineReadOnly for RocksEngine { )?; Ok(()) } + + fn is_empty(&self) -> Result { + let mut is_empty = true; + self.scan_cf(CF_DEFAULT, b"", b"", false, |_, _| { + is_empty = false; + Ok(false) + })?; + + Ok(is_empty) + } + + fn get_store_ident(&self) -> Result> { + self.get_msg_cf(CF_DEFAULT, keys::STORE_IDENT_KEY) + } + + fn get_prepare_bootstrap_region(&self) -> Result> { + self.get_msg_cf(CF_DEFAULT, keys::PREPARE_BOOTSTRAP_KEY) + } + + fn get_region_state(&self, raft_group_id: u64) -> Result> { + let key = keys::region_state_key(raft_group_id); + self.get_msg_cf(CF_DEFAULT, &key) + } + + fn get_apply_state(&self, raft_group_id: u64) -> Result> { + let key = keys::apply_state_key(raft_group_id); + self.get_msg_cf(CF_DEFAULT, &key) + } } impl RaftEngineDebug for RocksEngine { @@ -303,6 +334,10 @@ impl RaftEngine for RocksEngine { Ok(used_size) } + + fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { + self.put_msg(keys::STORE_IDENT_KEY, ident) + } } impl RaftLogBatch for RocksWriteBatch { @@ -336,6 +371,26 @@ impl RaftLogBatch for RocksWriteBatch { fn merge(&mut self, src: Self) -> Result<()> { WriteBatch::merge(self, src) } + + fn put_store_ident(&mut self, ident: &StoreIdent) -> Result<()> { + self.put_msg(keys::STORE_IDENT_KEY, ident) + } + + fn put_prepare_bootstrap_region(&mut self, region: &Region) -> Result<()> { + self.put_msg(keys::PREPARE_BOOTSTRAP_KEY, region) + } + + fn remove_prepare_bootstrap_region(&mut self) -> Result<()> { + self.delete(keys::PREPARE_BOOTSTRAP_KEY) + } + + fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()> { + self.put_msg(&keys::region_state_key(raft_group_id), state) + } + + fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { + self.put_msg(&keys::apply_state_key(raft_group_id), state) + } } impl RocksWriteBatch { diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index a0697218cf7..03cb2a41a41 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -1,6 +1,9 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use kvproto::raft_serverpb::RaftLocalState; +use kvproto::{ + metapb::Region, + raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, +}; use raft::eraftpb::Entry; use crate::*; @@ -8,7 +11,14 @@ use crate::*; pub const RAFT_LOG_MULTI_GET_CNT: u64 = 8; pub trait RaftEngineReadOnly: Sync + Send + 'static { + fn is_empty(&self) -> Result; + + fn get_store_ident(&self) -> Result>; + fn get_prepare_bootstrap_region(&self) -> Result>; + fn get_raft_state(&self, raft_group_id: u64) -> Result>; + fn get_region_state(&self, raft_group_id: u64) -> Result>; + fn get_apply_state(&self, raft_group_id: u64) -> Result>; fn get_entry(&self, raft_group_id: u64, index: u64) -> Result>; @@ -89,6 +99,8 @@ pub trait RaftEngine: RaftEngineReadOnly + Clone + Sync + Send + 'static { /// Note: `RaftLocalState` won't be updated in this call. fn append(&self, raft_group_id: u64, entries: Vec) -> Result; + fn put_store_ident(&self, ident: &StoreIdent) -> Result<()>; + fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()>; /// Like `cut_logs` but the range could be very large. Return the deleted count. @@ -135,7 +147,14 @@ pub trait RaftLogBatch: Send { /// Remove Raft logs in [`from`, `to`) which will be overwritten later. fn cut_logs(&mut self, raft_group_id: u64, from: u64, to: u64); + fn put_store_ident(&mut self, ident: &StoreIdent) -> Result<()>; + + fn put_prepare_bootstrap_region(&mut self, region: &Region) -> Result<()>; + fn remove_prepare_bootstrap_region(&mut self) -> Result<()>; + fn put_raft_state(&mut self, raft_group_id: u64, state: &RaftLocalState) -> Result<()>; + fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()>; + fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()>; /// The data size of this RaftLogBatch. fn persist_size(&self) -> usize; diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 9707bdb28b7..ae895f1ac36 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -13,7 +13,10 @@ use engine_traits::{ RaftLogBatch as RaftLogBatchTrait, RaftLogGCTask, Result, }; use file_system::{IOOp, IORateLimiter, IOType}; -use kvproto::raft_serverpb::RaftLocalState; +use kvproto::{ + metapb::Region, + raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, +}; use raft::eraftpb::Entry; use raft_engine::{ env::{DefaultFileSystem, FileSystem, Handle, WriteExt}, @@ -22,6 +25,9 @@ use raft_engine::{ pub use raft_engine::{Config as RaftEngineConfig, ReadableSize, RecoveryMode}; use tikv_util::Either; +// A special region ID representing global state. +const STORE_REGION_ID: u64 = 0; + #[derive(Clone)] pub struct MessageExtTyped; @@ -259,6 +265,10 @@ impl RaftLogEngine { pub struct RaftLogBatch(LogBatch); const RAFT_LOG_STATE_KEY: &[u8] = b"R"; +const STORE_IDENT_KEY: &[u8] = &[0x01]; +const PREPARE_BOOTSTRAP_REGION_KEY: &[u8] = &[0x02]; +const REGION_STATE_KEY: &[u8] = &[0x03]; +const APPLY_STATE_KEY: &[u8] = &[0x04]; impl RaftLogBatchTrait for RaftLogBatch { fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { @@ -288,6 +298,40 @@ impl RaftLogBatchTrait for RaftLogBatch { fn merge(&mut self, mut src: Self) -> Result<()> { self.0.merge(&mut src.0).map_err(transfer_error) } + + fn put_store_ident(&mut self, ident: &StoreIdent) -> Result<()> { + self.0 + .put_message(STORE_REGION_ID, STORE_IDENT_KEY.to_vec(), ident) + .map_err(transfer_error) + } + + fn put_prepare_bootstrap_region(&mut self, region: &Region) -> Result<()> { + self.0 + .put_message( + STORE_REGION_ID, + PREPARE_BOOTSTRAP_REGION_KEY.to_vec(), + region, + ) + .map_err(transfer_error) + } + + fn remove_prepare_bootstrap_region(&mut self) -> Result<()> { + self.0 + .delete(STORE_REGION_ID, PREPARE_BOOTSTRAP_REGION_KEY.to_vec()); + Ok(()) + } + + fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()> { + self.0 + .put_message(raft_group_id, REGION_STATE_KEY.to_vec(), state) + .map_err(transfer_error) + } + + fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { + self.0 + .put_message(raft_group_id, APPLY_STATE_KEY.to_vec(), state) + .map_err(transfer_error) + } } impl RaftEngineReadOnly for RaftLogEngine { @@ -324,6 +368,34 @@ impl RaftEngineReadOnly for RaftLogEngine { } Ok(()) } + + fn is_empty(&self) -> Result { + self.get_store_ident().map(|i| i.is_none()) + } + + fn get_store_ident(&self) -> Result> { + self.0 + .get_message(STORE_REGION_ID, STORE_IDENT_KEY) + .map_err(transfer_error) + } + + fn get_prepare_bootstrap_region(&self) -> Result> { + self.0 + .get_message(STORE_REGION_ID, PREPARE_BOOTSTRAP_REGION_KEY) + .map_err(transfer_error) + } + + fn get_region_state(&self, raft_group_id: u64) -> Result> { + self.0 + .get_message(raft_group_id, REGION_STATE_KEY) + .map_err(transfer_error) + } + + fn get_apply_state(&self, raft_group_id: u64) -> Result> { + self.0 + .get_message(raft_group_id, APPLY_STATE_KEY) + .map_err(transfer_error) + } } impl RaftEngineDebug for RaftLogEngine { @@ -389,6 +461,16 @@ impl RaftEngine for RaftLogEngine { self.0.write(&mut batch.0, false).map_err(transfer_error) } + fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { + let mut batch = Self::LogBatch::default(); + batch + .0 + .put_message(STORE_REGION_ID, STORE_IDENT_KEY.to_vec(), ident) + .map_err(transfer_error)?; + self.0.write(&mut batch.0, true).map_err(transfer_error)?; + Ok(()) + } + fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { let mut batch = Self::LogBatch::default(); batch diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 56d08c6a6b6..100a2be409d 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -8,16 +8,20 @@ default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] failpoints = ["raftstore/failpoints"] testexport = ["raftstore/testexport"] test-engine-kv-rocksdb = [ - "raftstore/test-engine-kv-rocksdb" + "raftstore/test-engine-kv-rocksdb", + "engine_test/test-engine-kv-rocksdb", ] test-engine-raft-raft-engine = [ - "raftstore/test-engine-raft-raft-engine" + "raftstore/test-engine-raft-raft-engine", + "engine_test/test-engine-raft-raft-engine", ] test-engines-rocksdb = [ "raftstore/test-engines-rocksdb", + "engine_test/test-engines-rocksdb", ] test-engines-panic = [ "raftstore/test-engines-panic", + "engine_test/test-engines-panic", ] cloud-aws = ["raftstore/cloud-aws"] @@ -29,6 +33,7 @@ collections = { path = "../collections" } crossbeam = "0.8" engine_traits = { path = "../engine_traits" } error_code = { path = "../error_code" } +fail = "0.5" kvproto = { git = "https://github.com/pingcap/kvproto.git" } pd_client = { path = "../pd_client" } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } @@ -37,3 +42,15 @@ raftstore = { path = "../raftstore" } slog = "2.3" smallvec = "1.4" tikv_util = { path = "../tikv_util", default-features = false } + +[dev-dependencies] +engine_test = { path = "../engine_test", default-features = false } +slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +tempfile = "3.0" +test_pd = { path = "../test_pd" } +test_util = { path = "../test_util" } + +[[test]] +name = "raftstore-v2-failpoints" +path = "tests/failpoints/mod.rs" +required-features = ["failpoints"] diff --git a/components/raftstore-v2/src/bootstrap.rs b/components/raftstore-v2/src/bootstrap.rs new file mode 100644 index 00000000000..55e1f6814c5 --- /dev/null +++ b/components/raftstore-v2/src/bootstrap.rs @@ -0,0 +1,241 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{thread, time::Duration}; + +use engine_traits::{RaftEngine, RaftLogBatch}; +use error_code::ErrorCodeExt; +use fail::fail_point; +use kvproto::{ + metapb::{Region, Store}, + raft_serverpb::{RaftLocalState, RegionLocalState, StoreIdent}, +}; +use pd_client::PdClient; +use raft::INVALID_ID; +use raftstore::store::initial_region; +use slog::{debug, error, info, warn, Logger}; +use tikv_util::{box_err, box_try}; + +use crate::{raft::write_initial_states, Result}; + +const MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT: u64 = 60; +const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs(3); + +/// A struct for bootstrapping the store. +/// +/// A typical bootstrap process should follow following order: +/// 1. bootstrap the store to get a store ID. +/// 2. bootstrap the first region using the last store ID. +pub struct Bootstrap<'a, ER: RaftEngine> { + engine: &'a ER, + cluster_id: u64, + // It's not performance critical. + pd_client: &'a dyn PdClient, + logger: Logger, +} + +// Although all methods won't change internal state, but they still receive `&mut self` as it's +// not thread safe to bootstrap concurrently. +impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { + pub fn new( + engine: &'a ER, + cluster_id: u64, + pd_client: &'a impl PdClient, + logger: Logger, + ) -> Self { + Self { + engine, + cluster_id, + pd_client, + logger, + } + } + + /// check store, return store id for the engine. + /// If the store is not bootstrapped, use None. + fn check_store(&mut self) -> Result> { + let ident = match self.engine.get_store_ident()? { + Some(ident) => ident, + None => return Ok(None), + }; + if ident.get_cluster_id() != self.cluster_id { + return Err(box_err!( + "cluster ID mismatch, local {} != remote {}, \ + you are trying to connect to another cluster, please reconnect to the correct PD", + ident.get_cluster_id(), + self.cluster_id + )); + } + if ident.get_store_id() == INVALID_ID { + return Err(box_err!("invalid store ident {:?}", ident)); + } + Ok(Some(ident.get_store_id())) + } + + fn inner_bootstrap_store(&mut self) -> Result { + let id = self.pd_client.alloc_id()?; + debug!(self.logger, "alloc store id"; "store_id" => id); + let mut ident = StoreIdent::default(); + if !self.engine.is_empty()? { + return Err(box_err!("store is not empty and has already had data.")); + } + ident.set_cluster_id(self.cluster_id); + ident.set_store_id(id); + self.engine.put_store_ident(&ident)?; + self.engine.sync()?; + fail_point!("node_after_bootstrap_store", |_| Err(box_err!( + "injected error: node_after_bootstrap_store" + ))); + Ok(id) + } + + /// Bootstrap the store and return the store ID. + /// + /// If store is bootstrapped already, return the store ID directly. + pub fn bootstrap_store(&mut self) -> Result { + let store_id = match self.check_store()? { + Some(id) => id, + None => self.inner_bootstrap_store()?, + }; + + Ok(store_id) + } + + fn prepare_bootstrap_first_region(&mut self, store_id: u64) -> Result { + let region_id = self.pd_client.alloc_id()?; + debug!( + self.logger, + "alloc first region id"; + "region_id" => region_id, + "cluster_id" => self.cluster_id, + "store_id" => store_id + ); + let peer_id = self.pd_client.alloc_id()?; + debug!( + self.logger, + "alloc first peer id for first region"; + "peer_id" => peer_id, + "region_id" => region_id, + ); + + let region = initial_region(store_id, region_id, peer_id); + + let mut wb = self.engine.log_batch(10); + wb.put_prepare_bootstrap_region(®ion)?; + write_initial_states(&mut wb, region.clone())?; + box_try!(self.engine.consume(&mut wb, true)); + + Ok(region) + } + + fn check_first_region_bootstrapped(&mut self) -> Result { + for _ in 0..MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT { + match self.pd_client.is_cluster_bootstrapped() { + Ok(b) => return Ok(b), + Err(e) => { + warn!(self.logger, "check cluster bootstrapped failed"; "err" => ?e); + } + } + thread::sleep(CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL); + } + Err(box_err!("check cluster bootstrapped failed")) + } + + fn check_or_prepare_bootstrap_first_region(&mut self, store_id: u64) -> Result> { + if let Some(first_region) = self.engine.get_prepare_bootstrap_region()? { + // Bootstrap is aborted last time, resume. It may succeed or fail last time, no matter + // what, at least we need a way to clean up. + Ok(Some(first_region)) + } else if self.check_first_region_bootstrapped()? { + // If other node has bootstrap the cluster, skip to avoid useless ID allocating and + // disk writes. + Ok(None) + } else { + // We are probably the first one triggering bootstrap. + self.prepare_bootstrap_first_region(store_id).map(Some) + } + } + + fn clear_prepare_bootstrap(&mut self, first_region_id: Option) -> Result<()> { + let mut wb = self.engine.log_batch(10); + wb.remove_prepare_bootstrap_region()?; + if let Some(id) = first_region_id { + box_try!( + self.engine + .clean(id, 0, &RaftLocalState::default(), &mut wb) + ); + } + box_try!(self.engine.consume(&mut wb, true)); + Ok(()) + } + + fn inner_bootstrap_first_region( + &mut self, + store: &Store, + first_region: &Region, + ) -> Result { + let region_id = first_region.get_id(); + let mut retry = 0; + while retry < MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT { + match self + .pd_client + .bootstrap_cluster(store.clone(), first_region.clone()) + { + Ok(_) => { + info!(self.logger, "bootstrap cluster ok"; "cluster_id" => self.cluster_id); + fail_point!("node_after_bootstrap_cluster", |_| Err(box_err!( + "injected error: node_after_bootstrap_cluster" + ))); + self.clear_prepare_bootstrap(None)?; + return Ok(true); + } + Err(pd_client::Error::ClusterBootstrapped(_)) => { + match self.pd_client.get_region(b"") { + Ok(region) => { + if region == *first_region { + self.clear_prepare_bootstrap(None)?; + return Ok(true); + } else { + info!(self.logger, "cluster is already bootstrapped"; "cluster_id" => self.cluster_id); + self.clear_prepare_bootstrap(Some(region_id))?; + return Ok(false); + } + } + Err(e) => { + warn!(self.logger, "get the first region failed"; "err" => ?e); + } + } + } + Err(e) => { + error!(self.logger, "bootstrap cluster"; "cluster_id" => self.cluster_id, "err" => ?e, "err_code" => %e.error_code()) + } + } + retry += 1; + thread::sleep(CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL); + } + Err(box_err!("bootstrapped cluster failed")) + } + + /// Bootstrap the first region. + /// + /// If the cluster is already bootstrapped, `None` is returned. + pub fn bootstrap_first_region( + &mut self, + store: &Store, + store_id: u64, + ) -> Result> { + let first_region = match self.check_or_prepare_bootstrap_first_region(store_id)? { + Some(r) => r, + None => return Ok(None), + }; + info!(self.logger, "trying to bootstrap first region"; "store_id" => store_id, "region" => ?first_region); + // cluster is not bootstrapped, and we choose first store to bootstrap + fail_point!("node_after_prepare_bootstrap_cluster", |_| Err(box_err!( + "injected error: node_after_prepare_bootstrap_cluster" + ))); + if self.inner_bootstrap_first_region(store, &first_region)? { + Ok(Some(first_region)) + } else { + Ok(None) + } + } +} diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 275313cbfb3..60c84984793 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -1,5 +1,10 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +//! FSM is short for finite state machine. There are three types of FSMs, +//! - StoreFsm, used for handling control messages and global initialization. +//! - PeerFsm, used for handling messages specific for one raft peer. +//! - ApplyFsm, used for handling apply task for one raft peer. + mod apply; mod peer; mod store; diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index bb3db8c75d3..b568454e2c9 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -1 +1,3 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +pub struct StoreFsm {} diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 98c72ca7632..fac4511cfd4 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -4,16 +4,18 @@ //! //! The thread module of raftstore is batch-system, more check components/batch-system. //! All state machines are defined in [`fsm`] module. Everything that wrapping raft is -//! implemented in [`raft`] module. And the commands are implemented in [`operation`] module. -//! All state machines are expected to communicate with messages. They are defined in -//! [`router`] module. +//! implemented in [`raft`] module. And the commands, including split/merge/confchange/read/write, +//! are implemented in [`operation`] module. All state machines are expected to communicate with +//! messages. They are defined in [`router`] module. #![allow(unused)] +mod bootstrap; mod fsm; mod operation; mod raft; mod router; +pub use bootstrap::Bootstrap; pub use raftstore::{Error, Result}; pub use router::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore-v2/src/raft/mod.rs b/components/raftstore-v2/src/raft/mod.rs index 7fd128d6788..045e9ff89b3 100644 --- a/components/raftstore-v2/src/raft/mod.rs +++ b/components/raftstore-v2/src/raft/mod.rs @@ -4,4 +4,4 @@ mod peer; mod storage; pub use peer::Peer; -pub use storage::Storage; +pub use storage::{write_initial_states, Storage}; diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index f999c6890d8..f6dcad9578c 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -1,12 +1,45 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::RaftEngine; +use engine_traits::{RaftEngine, RaftLogBatch}; +use kvproto::{ + metapb::Region, + raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState}, +}; use raft::{ eraftpb::{Entry, Snapshot}, GetEntriesContext, RaftState, }; +use raftstore::store::{RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; use slog::Logger; +use crate::Result; + +pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Result<()> { + let region_id = region.get_id(); + + let mut state = RegionLocalState::default(); + state.set_region(region); + wb.put_region_state(region_id, &state)?; + + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(RAFT_INIT_LOG_INDEX); + apply_state + .mut_truncated_state() + .set_index(RAFT_INIT_LOG_INDEX); + apply_state + .mut_truncated_state() + .set_term(RAFT_INIT_LOG_TERM); + wb.put_apply_state(region_id, &apply_state)?; + + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(RAFT_INIT_LOG_INDEX); + raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); + raft_state.mut_hard_state().set_commit(RAFT_INIT_LOG_INDEX); + wb.put_raft_state(region_id, &raft_state)?; + + Ok(()) +} + /// A storage for raft. pub struct Storage { engine: ER, @@ -54,3 +87,50 @@ impl raft::Storage for Storage { unimplemented!() } } + +#[cfg(test)] +mod tests { + use engine_traits::{RaftEngine, RaftEngineReadOnly, RaftLogBatch}; + use kvproto::{ + metapb::{Peer, Region}, + raft_serverpb::PeerState, + }; + use raftstore::store::{RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; + use tempfile::TempDir; + + #[test] + fn test_write_initial_states() { + let mut region = Region::default(); + region.set_id(4); + let mut p = Peer::default(); + p.set_id(5); + p.set_store_id(6); + region.mut_peers().push(p); + region.mut_region_epoch().set_version(2); + region.mut_region_epoch().set_conf_ver(4); + + let path = TempDir::new().unwrap(); + let engine = engine_test::new_temp_engine(&path); + let raft_engine = &engine.raft; + let mut wb = raft_engine.log_batch(10); + super::write_initial_states(&mut wb, region.clone()).unwrap(); + assert!(!wb.is_empty()); + raft_engine.consume(&mut wb, true).unwrap(); + + let local_state = raft_engine.get_region_state(4).unwrap().unwrap(); + assert_eq!(local_state.get_state(), PeerState::Normal); + assert_eq!(*local_state.get_region(), region); + + let raft_state = raft_engine.get_raft_state(4).unwrap().unwrap(); + assert_eq!(raft_state.get_last_index(), RAFT_INIT_LOG_INDEX); + let hs = raft_state.get_hard_state(); + assert_eq!(hs.get_term(), RAFT_INIT_LOG_TERM); + assert_eq!(hs.get_commit(), RAFT_INIT_LOG_INDEX); + + let apply_state = raft_engine.get_apply_state(4).unwrap().unwrap(); + assert_eq!(apply_state.get_applied_index(), RAFT_INIT_LOG_INDEX); + let ts = apply_state.get_truncated_state(); + assert_eq!(ts.get_index(), RAFT_INIT_LOG_INDEX); + assert_eq!(ts.get_term(), RAFT_INIT_LOG_TERM); + } +} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 1ab85608034..75011163e83 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -1,4 +1,4 @@ -// Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] use std::{fmt, marker::PhantomData}; diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs new file mode 100644 index 00000000000..88dfd0a81aa --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -0,0 +1,8 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(test)] +#![feature(assert_matches)] +#![feature(custom_test_frameworks)] +#![test_runner(test_util::run_failpoint_tests)] + +mod test_bootstrap; diff --git a/components/raftstore-v2/tests/failpoints/test_bootstrap.rs b/components/raftstore-v2/tests/failpoints/test_bootstrap.rs new file mode 100644 index 00000000000..f56078a59f5 --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_bootstrap.rs @@ -0,0 +1,61 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::assert_matches::assert_matches; + +use engine_traits::RaftEngineReadOnly; +use kvproto::metapb::Store; +use raftstore_v2::Bootstrap; +use slog::o; +use tempfile::TempDir; + +#[test] +fn test_bootstrap_half_way_failure() { + let server = test_pd::Server::new(1); + let eps = server.bind_addrs(); + let pd_client = test_pd::util::new_client(eps, None); + let path = TempDir::new().unwrap(); + let engines = engine_test::new_temp_engine(&path); + let bootstrap = || { + let logger = slog_global::borrow_global().new(o!()); + let mut bootstrap = Bootstrap::new(&engines.raft, 0, &pd_client, logger); + match bootstrap.bootstrap_store() { + Ok(store_id) => { + let mut store = Store::default(); + store.set_id(store_id); + bootstrap.bootstrap_first_region(&store, store_id) + } + Err(e) => Err(e), + } + }; + + // Try to start this node, return after persisted some keys. + fail::cfg("node_after_bootstrap_store", "return").unwrap(); + let s = format!("{}", bootstrap().unwrap_err()); + assert!(s.contains("node_after_bootstrap_store"), "{}", s); + assert_matches!(engines.raft.get_prepare_bootstrap_region(), Ok(None)); + + let ident = engines.raft.get_store_ident().unwrap().unwrap(); + assert_ne!(ident.get_store_id(), 0); + + // Check whether it can bootstrap cluster successfully. + fail::remove("node_after_bootstrap_store"); + fail::cfg("node_after_prepare_bootstrap_cluster", "return").unwrap(); + let s = format!("{}", bootstrap().unwrap_err()); + assert!(s.contains("node_after_prepare_bootstrap_cluster"), "{}", s); + assert_matches!(engines.raft.get_prepare_bootstrap_region(), Ok(Some(_))); + + fail::remove("node_after_prepare_bootstrap_cluster"); + fail::cfg("node_after_bootstrap_cluster", "return").unwrap(); + let s = format!("{}", bootstrap().unwrap_err()); + assert!(s.contains("node_after_bootstrap_cluster"), "{}", s); + assert_matches!(engines.raft.get_prepare_bootstrap_region(), Ok(Some(_))); + + // Although aborted by error, rebootstrap should continue. + bootstrap().unwrap().unwrap(); + assert_matches!(engines.raft.get_prepare_bootstrap_region(), Ok(None)); + + // Second bootstrap should be noop. + assert_eq!(bootstrap().unwrap(), None); + + assert_matches!(engines.raft.get_prepare_bootstrap_region(), Ok(None)); +} diff --git a/components/test_pd/src/mocker/service.rs b/components/test_pd/src/mocker/service.rs index 95ffde14b7c..572eb9534f9 100644 --- a/components/test_pd/src/mocker/service.rs +++ b/components/test_pd/src/mocker/service.rs @@ -96,7 +96,7 @@ impl PdMocker for Service { if self.is_bootstrapped.load(Ordering::SeqCst) { let mut err = Error::default(); - err.set_type(ErrorType::Unknown); + err.set_type(ErrorType::AlreadyBootstrapped); err.set_message("cluster is already bootstrapped".to_owned()); header.set_error(err); resp.set_header(header); diff --git a/src/server/node.rs b/src/server/node.rs index 559055cbbb9..dfed9459b1c 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -43,7 +43,7 @@ use crate::{ }; const MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT: u64 = 60; -const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_SECONDS: u64 = 3; +const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs(3); /// Creates a new storage engine which is backed by the Raft consensus /// protocol. @@ -436,9 +436,7 @@ where Err(e) => error!(?e; "bootstrap cluster"; "cluster_id" => self.cluster_id,), } retry += 1; - thread::sleep(Duration::from_secs( - CHECK_CLUSTER_BOOTSTRAPPED_RETRY_SECONDS, - )); + thread::sleep(CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL); } Err(box_err!("bootstrapped cluster failed")) } @@ -451,9 +449,7 @@ where warn!("check cluster bootstrapped failed"; "err" => ?e); } } - thread::sleep(Duration::from_secs( - CHECK_CLUSTER_BOOTSTRAPPED_RETRY_SECONDS, - )); + thread::sleep(CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL); } Err(box_err!("check cluster bootstrapped failed")) } From 89694308cf98822a232c166cc54dc7b8a02ed3dc Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 24 Jun 2022 13:44:38 +0800 Subject: [PATCH 0043/1149] coprocessor: fix panic on `analyze` when `max_sample_size == 0` (#12696) close tikv/tikv#11192, ref tikv/tikv#11425 Signed-off-by: Lucasliang Co-authored-by: Ti Chi Robot --- src/coprocessor/statistics/analyze.rs | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 7b826487cc1..bb0348be98f 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -748,6 +748,10 @@ impl RowSampleCollector for ReservoirRowSampleCollector { } fn sampling(&mut self, data: Vec>) { + // We should tolerate the abnormal case => `self.max_sample_size == 0`. + if self.max_sample_size == 0 { + return; + } let mut need_push = false; let cur_rng = self.base.rng.gen_range(0, i64::MAX); if self.samples.len() < self.max_sample_size { @@ -1330,4 +1334,33 @@ mod tests { ); } } + + #[test] + fn test_abnormal_sampling() { + let sample_num = 0; // abnormal. + let row_num = 100; + let mut nums: Vec> = Vec::with_capacity(row_num); + for i in 0..row_num { + nums.push( + datum::encode_value(&mut EvalContext::default(), &[Datum::I64(i as i64)]).unwrap(), + ); + } + { + // Test for ReservoirRowSampleCollector + let mut collector = ReservoirRowSampleCollector::new(sample_num, 1000, 1); + for row in &nums { + collector.sampling([row.clone()].to_vec()); + } + assert_eq!(collector.samples.len(), 0); + } + { + // Test for BernoulliRowSampleCollector + let mut collector = + BernoulliRowSampleCollector::new(sample_num as f64 / row_num as f64, 1000, 1); + for row in &nums { + collector.sampling([row.clone()].to_vec()); + } + assert_eq!(collector.samples.len(), 0); + } + } } From 54b5cca4810c5687d718bebd6181bbd2948b4264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BE=99=E6=96=B9=E6=B7=9E?= Date: Fri, 24 Jun 2022 14:20:38 +0800 Subject: [PATCH 0044/1149] add reset_to_version command back (#12823) close tikv/tikv#12824 Signed-off-by: longfangsong Co-authored-by: Ti Chi Robot --- cmd/tikv-ctl/src/cmd.rs | 6 ++++++ cmd/tikv-ctl/src/main.rs | 1 + src/server/reset_to_version.rs | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index a1934c1acb8..4c49ccfa5ef 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -539,6 +539,12 @@ pub enum Cmd { /// PD endpoints pd: String, }, + /// Reset data in a TiKV to a certain version + ResetToVersion { + #[structopt(short = "v")] + /// The version to reset TiKV to + version: u64, + }, #[structopt(external_subcommand)] External(Vec), } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 3ad066df491..e2ed740e779 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -479,6 +479,7 @@ fn main() { Cmd::Cluster {} => { debug_executor.dump_cluster_info(); } + Cmd::ResetToVersion { version } => debug_executor.reset_to_version(version), _ => { unreachable!() } diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 7b99f48371d..263a8d2565a 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -233,7 +233,7 @@ impl ResetToVersionManager { *worker.state.lock() .expect("failed to lock `ResetToVersionWorker::state` in `ResetToVersionWorker::process_next_batch_lock`") = ResetToVersionState::Done; - + info!("Reset to version done!"); tikv_alloc::remove_thread_memory_accessor(); }) .expect("failed to spawn reset_to_version thread")); From a9c3e56552c803642f640e9b3fa8725aa3072400 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 24 Jun 2022 17:20:38 +0800 Subject: [PATCH 0045/1149] raftstore: record write durations into tracker (#12783) ref tikv/tikv#12362 This commit replaces the request_times in the raftstore callback with a tracker token. Then, the waterfall metrics of a raft command will be recorded into the tracker. Signed-off-by: Yilin Chen --- Cargo.lock | 1 + components/raftstore/Cargo.toml | 1 + components/raftstore/src/lib.rs | 1 + .../raftstore/src/store/async_io/write.rs | 45 ++++---- components/raftstore/src/store/fsm/apply.rs | 41 ++++--- components/raftstore/src/store/fsm/peer.rs | 25 ++-- .../raftstore/src/store/local_metrics.rs | 49 ++++++++ components/raftstore/src/store/msg.rs | 22 +++- components/raftstore/src/store/peer.rs | 107 +++++++++--------- components/tracker/src/lib.rs | 16 +++ 10 files changed, 197 insertions(+), 111 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 489ef39eaec..7c9902b7534 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4157,6 +4157,7 @@ dependencies = [ "tikv_util", "time", "tokio", + "tracker", "txn_types", "uuid", "yatp", diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 01519444b92..9d8c39d5746 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -85,6 +85,7 @@ tikv_alloc = { path = "../tikv_alloc" } tikv_util = { path = "../tikv_util", default-features = false } time = "0.1" tokio = { version = "1.5", features = ["sync", "rt-multi-thread"] } +tracker = { path = "../tracker" } txn_types = { path = "../txn_types", default-features = false } uuid = { version = "0.8.1", features = ["serde", "v4"] } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index b212001657a..ed70dacb37b 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -6,6 +6,7 @@ #![feature(min_specialization)] #![feature(box_patterns)] #![feature(hash_drain_filter)] +#![feature(let_chains)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index f81160d689d..99c4f56b7e4 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -38,7 +38,7 @@ use crate::{ store::{ config::Config, fsm::RaftRouter, - local_metrics::{RaftSendMessageMetrics, StoreWriteMetrics}, + local_metrics::{RaftSendMessageMetrics, StoreWriteMetrics, TimeTracker}, metrics::*, transport::Transport, util::LatencyInspector, @@ -97,7 +97,7 @@ where pub cut_logs: Option<(u64, u64)>, pub raft_state: Option, pub messages: Vec, - pub request_times: Vec, + pub trackers: Vec, } impl WriteTask @@ -117,7 +117,7 @@ where cut_logs: None, raft_state: None, messages: vec![], - request_times: vec![], + trackers: vec![], } } @@ -298,12 +298,12 @@ where } self.state_size = 0; if metrics.waterfall_metrics { - let now = Instant::now(); + let now = std::time::Instant::now(); for task in &self.tasks { - for t in &task.request_times { - metrics - .wf_before_write - .observe(duration_to_sec(now.saturating_duration_since(*t))); + for tracker in &task.trackers { + tracker.observe(now, &metrics.wf_before_write, |t| { + &mut t.metrics.wf_before_write_nanos + }); } } } @@ -311,12 +311,12 @@ where fn after_write_to_kv_db(&mut self, metrics: &StoreWriteMetrics) { if metrics.waterfall_metrics { - let now = Instant::now(); + let now = std::time::Instant::now(); for task in &self.tasks { - for t in &task.request_times { - metrics - .wf_kvdb_end - .observe(duration_to_sec(now.saturating_duration_since(*t))); + for tracker in &task.trackers { + tracker.observe(now, &metrics.wf_kvdb_end, |t| { + &mut t.metrics.wf_kvdb_end_nanos + }); } } } @@ -324,12 +324,12 @@ where fn after_write_to_raft_db(&mut self, metrics: &StoreWriteMetrics) { if metrics.waterfall_metrics { - let now = Instant::now(); + let now = std::time::Instant::now(); for task in &self.tasks { - for t in &task.request_times { - metrics - .wf_write_end - .observe(duration_to_sec(now.saturating_duration_since(*t))) + for tracker in &task.trackers { + tracker.observe(now, &metrics.wf_write_end, |t| { + &mut t.metrics.wf_write_end_nanos + }); } } } @@ -535,7 +535,14 @@ where self.store_id, self.tag, e ); }); - self.perf_context.report_metrics(&[]); // TODO: pass in request trackers + let trackers: Vec<_> = self + .batch + .tasks + .iter() + .flat_map(|task| task.trackers.iter().flat_map(|t| t.as_tracker_token())) + .collect(); + // TODO: Add a different perf context for raft engine. + self.perf_context.report_metrics(&trackers); write_raft_time = duration_to_sec(now.saturating_elapsed()); STORE_WRITE_RAFTDB_DURATION_HISTOGRAM.observe(write_raft_time); } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index e28c8cf2424..03034b76245 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -75,7 +75,7 @@ use crate::{ store::{ cmd_resp, fsm::RaftPollerBuilder, - local_metrics::RaftMetrics, + local_metrics::{RaftMetrics, TimeTracker}, memory::*, metrics::*, msg::{Callback, PeerMsg, ReadResponse, SignificantMsg}, @@ -526,7 +526,15 @@ where self.kv_wb().write_opt(&write_opts).unwrap_or_else(|e| { panic!("failed to write to engine: {:?}", e); }); - self.perf_context.report_metrics(&[]); // TODO: pass in request trackers + let trackers: Vec<_> = self + .applied_batch + .cb_batch + .iter() + .flat_map(|(cb, _)| cb.get_trackers()) + .flat_map(|trackers| trackers.iter().map(|t| t.as_tracker_token())) + .flatten() + .collect(); + self.perf_context.report_metrics(&trackers); self.sync_log_hint = false; let data_size = self.kv_wb().data_size(); if data_size > APPLY_WB_SHRINK_SIZE { @@ -557,13 +565,10 @@ where self.host .on_flush_applied_cmd_batch(batch_max_level, cmd_batch, &self.engine); // Invoke callbacks - let now = Instant::now(); + let now = std::time::Instant::now(); for (cb, resp) in cb_batch.drain(..) { - if let Some(times) = cb.get_request_times() { - for t in times { - self.apply_time - .observe(duration_to_sec(now.saturating_duration_since(*t))); - } + for tracker in cb.get_trackers().iter().flat_map(|v| *v) { + tracker.observe(now, &self.apply_time, |t| &mut t.metrics.apply_time_nanos); } cb.invoke_with_response(resp); } @@ -2912,17 +2917,17 @@ impl Apply { } pub fn on_schedule(&mut self, metrics: &RaftMetrics) { - let mut now = None; + let now = std::time::Instant::now(); for cb in &mut self.cbs { - if let Callback::Write { request_times, .. } = &mut cb.cb { - if now.is_none() { - now = Some(Instant::now()); - } - for t in request_times { - metrics - .store_time - .observe(duration_to_sec(now.unwrap().saturating_duration_since(*t))); - *t = now.unwrap(); + if let Callback::Write { trackers, .. } = &mut cb.cb { + for tracker in trackers { + tracker.observe(now, &metrics.store_time, |t| { + t.metrics.write_instant = Some(now); + &mut t.metrics.store_time_nanos + }); + if let TimeTracker::Instant(t) = tracker { + *t = now; + } } } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 6abfc24c486..e08c440d6a1 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -73,7 +73,7 @@ use crate::{ ExecResult, }, hibernate_state::{GroupState, HibernateState}, - local_metrics::RaftMetrics, + local_metrics::{RaftMetrics, TimeTracker}, memory::*, metrics::*, msg::{Callback, ExtCallback, InspectedRaftMessage}, @@ -523,11 +523,11 @@ where })) }; - let times: SmallVec<[TiInstant; 4]> = cbs + let tokens: SmallVec<[TimeTracker; 4]> = cbs .iter_mut() .filter_map(|cb| { - if let Callback::Write { request_times, .. } = cb { - Some(request_times[0]) + if let Callback::Write { trackers, .. } = cb { + Some(trackers[0]) } else { None } @@ -546,8 +546,8 @@ where committed_cb, ); - if let Callback::Write { request_times, .. } = &mut cb { - *request_times = times; + if let Callback::Write { trackers, .. } = &mut cb { + *trackers = tokens; } return Some((req, cb)); @@ -4774,14 +4774,11 @@ where } if self.ctx.raft_metrics.waterfall_metrics { - if let Some(request_times) = cb.get_request_times() { - let now = TiInstant::now(); - for t in request_times { - self.ctx - .raft_metrics - .wf_batch_wait - .observe(duration_to_sec(now.saturating_duration_since(*t))); - } + let now = Instant::now(); + for tracker in cb.get_trackers().iter().flat_map(|v| *v) { + tracker.observe(now, &self.ctx.raft_metrics.wf_batch_wait, |t| { + &mut t.metrics.wf_batch_wait_nanos + }); } } diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index aa23f22bc2c..304259c4571 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -6,6 +6,7 @@ use std::sync::{Arc, Mutex}; use collections::HashSet; use prometheus::local::LocalHistogram; use raft::eraftpb::MessageType; +use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS}; use super::metrics::*; @@ -499,3 +500,51 @@ impl StoreWriteMetrics { } } } + +/// Tracker for the durations of a raftstore request. +/// If a global tracker is not available, it will fallback to an Instant. +#[derive(Debug, Clone, Copy)] +pub enum TimeTracker { + Tracker(TrackerToken), + Instant(std::time::Instant), +} + +impl TimeTracker { + pub fn as_tracker_token(&self) -> Option { + match self { + TimeTracker::Tracker(tt) => Some(*tt), + TimeTracker::Instant(_) => None, + } + } + + pub fn observe( + &self, + now: std::time::Instant, + local_metric: &LocalHistogram, + tracker_metric: impl FnOnce(&mut Tracker) -> &mut u64, + ) { + match self { + TimeTracker::Tracker(t) => { + if let Some(dur) = GLOBAL_TRACKERS + .with_tracker(*t, |tracker| { + tracker.metrics.write_instant.map(|write_instant| { + let dur = now.saturating_duration_since(write_instant); + let metric = tracker_metric(tracker); + if *metric == 0 { + *metric = dur.as_nanos() as u64; + } + dur + }) + }) + .flatten() + { + local_metric.observe(dur.as_secs_f64()); + } + } + TimeTracker::Instant(t) => { + let dur = now.saturating_duration_since(*t); + local_metric.observe(dur.as_secs_f64()); + } + } + } +} diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 46903771344..46900878178 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -22,8 +22,9 @@ use pd_client::BucketMeta; use raft::{GetEntriesContext, SnapshotStatus}; use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; +use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; -use super::{AbstractPeer, RegionSnapshot}; +use super::{local_metrics::TimeTracker, AbstractPeer, RegionSnapshot}; use crate::store::{ fsm::apply::{CatchUpLogs, ChangeObserver, TaskRes as ApplyTaskRes}, metrics::RaftEventDurationType, @@ -98,7 +99,7 @@ pub enum Callback { /// `committed_cb` is called after a request is committed and before it's being applied, and /// it's guaranteed that the request will be successfully applied soon. committed_cb: Option, - request_times: SmallVec<[Instant; 4]>, + trackers: SmallVec<[TimeTracker; 4]>, }, #[cfg(any(test, feature = "testexport"))] /// Test purpose callback @@ -120,17 +121,28 @@ where proposed_cb: Option, committed_cb: Option, ) -> Self { + let tracker_token = get_tls_tracker_token(); + let now = std::time::Instant::now(); + let tracker = if tracker_token == INVALID_TRACKER_TOKEN { + TimeTracker::Instant(now) + } else { + GLOBAL_TRACKERS.with_tracker(tracker_token, |tracker| { + tracker.metrics.write_instant = Some(now); + }); + TimeTracker::Tracker(tracker_token) + }; + Callback::Write { cb, proposed_cb, committed_cb, - request_times: smallvec![Instant::now()], + trackers: smallvec![tracker], } } - pub fn get_request_times(&self) -> Option<&SmallVec<[Instant; 4]>> { + pub fn get_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { match self { - Callback::Write { request_times, .. } => Some(request_times), + Callback::Write { trackers, .. } => Some(trackers), _ => None, } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index eb1fc93e1ee..2853fcd4169 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -65,12 +65,13 @@ use tikv_util::{ Either, }; use time::Timespec; +use tracker::GLOBAL_TRACKERS; use txn_types::WriteBatchFlags; use uuid::Uuid; use super::{ cmd_resp, - local_metrics::{RaftMetrics, RaftReadyMetrics}, + local_metrics::{RaftMetrics, RaftReadyMetrics, TimeTracker}, metrics::*, peer_storage::{write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage}, read_queue::{ReadIndexQueue, ReadIndexRequest}, @@ -137,16 +138,16 @@ impl ProposalQueue { } } - /// Find the request times of given index. - /// Caller should check if term is matched before using request times. - fn find_request_times(&self, index: u64) -> Option<(u64, &SmallVec<[TiInstant; 4]>)> { + /// Find the trackers of given index. + /// Caller should check if term is matched before using trackers. + fn find_trackers(&self, index: u64) -> Option<(u64, &SmallVec<[TimeTracker; 4]>)> { self.queue .binary_search_by_key(&index, |p: &Proposal<_>| p.index) .ok() .and_then(|i| { self.queue[i] .cb - .get_request_times() + .get_trackers() .map(|ts| (self.queue[i].term, ts)) }) } @@ -725,9 +726,6 @@ where #[getset(get = "pub")] leader_lease: Lease, pending_reads: ReadIndexQueue, - /// Record the propose instants to calculate the wait duration before - /// the proposal is sent through the Raft client. - pending_propose_instants: VecDeque<(u64, Instant)>, /// If it fails to send messages to leader. pub leader_unreachable: bool, @@ -930,7 +928,6 @@ where raft_max_inflight_msgs: cfg.raft_max_inflight_msgs, proposals: ProposalQueue::new(tag.clone()), pending_reads: Default::default(), - pending_propose_instants: Default::default(), peer_cache: RefCell::new(HashMap::default()), peer_heartbeats: HashMap::default(), peers_start_pending_time: vec![], @@ -1578,7 +1575,7 @@ where ctx: &mut PollContext, msgs: Vec, ) { - let now = Instant::now(); + let mut now = None; for msg in msgs { let msg_type = msg.get_message().get_msg_type(); if msg_type == MessageType::MsgSnapshot { @@ -1594,7 +1591,7 @@ where // network partition from the new leader. // For lease safety during leader transfer, transit `leader_lease` // to suspect. - self.leader_lease.suspect(monotonic_raw_now()); + self.leader_lease.suspect(*now.insert(monotonic_raw_now())); } let to_peer_id = msg.get_to_peer().get_id(); @@ -1610,22 +1607,31 @@ where "disk_usage" => ?msg.get_disk_usage(), ); - for index in msg + for (term, index) in msg .get_message() .get_entries() .iter() - .map(|e| e.get_index()) + .map(|e| (e.get_term(), e.get_index())) { - while let Some((propose_idx, instant)) = self.pending_propose_instants.front() { - if index == *propose_idx { + if let Ok(idx) = self + .proposals + .queue + .binary_search_by_key(&index, |p: &Proposal<_>| p.index) + { + let proposal = &self.proposals.queue[idx]; + if term == proposal.term + && let Some(propose_time) = proposal.propose_time + && let Ok(dur) = ((*now.get_or_insert(monotonic_raw_now())) - propose_time).to_std() { ctx.raft_metrics .proposal_send_wait - .observe(now.saturating_duration_since(*instant).as_secs_f64()); - } - if index >= *propose_idx { - self.pending_propose_instants.pop_front(); - } else { - break; + .observe(dur.as_secs_f64()); + for t in proposal.cb.get_trackers().iter().flat_map(|v| v.iter().flat_map(|t| t.as_tracker_token())) { + GLOBAL_TRACKERS.with_tracker(t, |trakcer| { + if trakcer.metrics.propose_send_wait_nanos == 0{ + trakcer.metrics.propose_send_wait_nanos = dur.as_nanos() as u64; + } + }); + } } } } @@ -1753,22 +1759,19 @@ where if !metrics.waterfall_metrics || self.proposals.is_empty() { return; } - let mut now = None; + let now = Instant::now(); for index in pre_persist_index + 1..=self.raft_group.raft.raft_log.persisted { - if let Some((term, times)) = self.proposals.find_request_times(index) { + if let Some((term, trackers)) = self.proposals.find_trackers(index) { if self .get_store() .term(index) .map(|t| t == term) .unwrap_or(false) { - if now.is_none() { - now = Some(TiInstant::now()); - } - for t in times { - metrics - .wf_persist_log - .observe(duration_to_sec(now.unwrap().saturating_duration_since(*t))); + for tracker in trackers { + tracker.observe(now, &metrics.wf_persist_log, |t| { + &mut t.metrics.wf_persist_log_nanos + }); } } } @@ -1779,25 +1782,26 @@ where if !metrics.waterfall_metrics || self.proposals.is_empty() { return; } - let mut now = None; + let now = Instant::now(); for index in pre_commit_index + 1..=self.raft_group.raft.raft_log.committed { - if let Some((term, times)) = self.proposals.find_request_times(index) { + if let Some((term, trackers)) = self.proposals.find_trackers(index) { if self .get_store() .term(index) .map(|t| t == term) .unwrap_or(false) { - if now.is_none() { - now = Some(TiInstant::now()); - } - let hist = if index <= self.raft_group.raft.raft_log.persisted { + let commit_persisted = index <= self.raft_group.raft.raft_log.persisted; + let hist = if commit_persisted { &metrics.wf_commit_log } else { &metrics.wf_commit_not_persist_log }; - for t in times { - hist.observe(duration_to_sec(now.unwrap().saturating_duration_since(*t))); + for tracker in trackers { + tracker.observe(now, hist, |t| { + t.metrics.commit_not_persisted = !commit_persisted; + &mut t.metrics.wf_commit_log_nanos + }); } } } @@ -2083,7 +2087,6 @@ where self.mut_store().cancel_generating_snap(None); self.clear_disk_full_peers(ctx); self.clear_in_memory_pessimistic_locks(); - self.pending_propose_instants.clear(); } _ => {} } @@ -2520,20 +2523,17 @@ where let state_role = ready.ss().map(|ss| ss.raft_state); let has_new_entries = !ready.entries().is_empty(); - let mut request_times = vec![]; + let mut trackers = vec![]; if ctx.raft_metrics.waterfall_metrics { - let mut now = None; + let now = Instant::now(); for entry in ready.entries() { - if let Some((term, times)) = self.proposals.find_request_times(entry.get_index()) { + if let Some((term, times)) = self.proposals.find_trackers(entry.get_index()) { if entry.term == term { - request_times.extend_from_slice(times); - if now.is_none() { - now = Some(TiInstant::now()); - } - for t in times { - ctx.raft_metrics.wf_send_to_queue.observe(duration_to_sec( - now.unwrap().saturating_duration_since(*t), - )); + trackers.extend_from_slice(times); + for tracker in times { + tracker.observe(now, &ctx.raft_metrics.wf_send_to_queue, |t| { + &mut t.metrics.wf_send_to_queue_nanos + }); } } } @@ -2560,8 +2560,8 @@ where task.messages = self.build_raft_messages(ctx, persisted_msgs); } - if !request_times.is_empty() { - task.request_times = request_times; + if !trackers.is_empty() { + task.trackers = trackers; } if let Some(write_worker) = &mut ctx.sync_write_worker { @@ -4308,9 +4308,6 @@ where } } - self.pending_propose_instants - .push_back((propose_index, Instant::now())); - Ok(Either::Left(propose_index)) } diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 0e932658aba..25a5610d034 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -6,6 +6,8 @@ mod metrics; mod slab; mod tls; +use std::time::Instant; + use kvproto::kvrpcpb as pb; pub use self::{ @@ -92,4 +94,18 @@ pub struct RequestMetrics { pub block_read_nanos: u64, pub internal_key_skipped_count: u64, pub deleted_key_skipped_count: u64, + // temp instant used in raftstore metrics, first be the instant when creating the write callback, + // then reset when it is ready to apply + pub write_instant: Option, + pub wf_batch_wait_nanos: u64, + pub wf_send_to_queue_nanos: u64, + pub wf_persist_log_nanos: u64, + pub wf_before_write_nanos: u64, + pub wf_write_end_nanos: u64, + pub wf_kvdb_end_nanos: u64, + pub wf_commit_log_nanos: u64, + pub propose_send_wait_nanos: u64, + pub commit_not_persisted: bool, + pub store_time_nanos: u64, + pub apply_time_nanos: u64, } From 9fe0d5772399deffb4fcf199ccc9969792f1bcb5 Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Mon, 27 Jun 2022 21:10:39 +0800 Subject: [PATCH 0046/1149] copr: update scanned range for each batch when paging is enable (#12886) ref tikv/tikv#12848 Update scanned range for each batch when paging is enable Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: Ti Chi Robot --- .../src/storage/ranges_iter.rs | 6 + .../tidb_query_common/src/storage/scanner.rs | 223 ++++++++++++++++-- .../src/util/scan_executor.rs | 4 +- 3 files changed, 211 insertions(+), 22 deletions(-) diff --git a/components/tidb_query_common/src/storage/ranges_iter.rs b/components/tidb_query_common/src/storage/ranges_iter.rs index 88d103a763f..061cd339129 100644 --- a/components/tidb_query_common/src/storage/ranges_iter.rs +++ b/components/tidb_query_common/src/storage/ranges_iter.rs @@ -64,6 +64,12 @@ impl RangesIterator { pub fn notify_drained(&mut self) { self.in_range = false; } + + /// Check drained. + #[inline] + pub fn is_drained(&mut self) -> bool { + self.iter.len() == 0 + } } #[cfg(test)] diff --git a/components/tidb_query_common/src/storage/scanner.rs b/components/tidb_query_common/src/storage/scanner.rs index 6e72ba13fca..1c1a1cea111 100644 --- a/components/tidb_query_common/src/storage/scanner.rs +++ b/components/tidb_query_common/src/storage/scanner.rs @@ -65,6 +65,15 @@ impl RangesScanner { // Note: This is not implemented over `Iterator` since it can fail. // TODO: Change to use reference to avoid allocation and copy. pub fn next(&mut self) -> Result, StorageError> { + self.next_opt(true) + } + + /// Fetches next row. + /// Note: `update_scanned_range` can control whether update the scanned range when `is_scanned_range_aware` is true. + pub fn next_opt( + &mut self, + update_scanned_range: bool, + ) -> Result, StorageError> { loop { let range = self.ranges_iter.next(); let some_row = match range { @@ -93,7 +102,7 @@ impl RangesScanner { return Ok(None); // drained } }; - if self.is_scanned_range_aware { + if self.is_scanned_range_aware && update_scanned_range { self.update_scanned_range_from_scanned_row(&some_row); } if some_row.is_some() { @@ -159,31 +168,35 @@ impl RangesScanner { fn update_scanned_range_from_new_point(&mut self, point: &PointRange) { assert!(self.is_scanned_range_aware); - self.update_working_range_end_key(); - self.current_range.lower_inclusive.clear(); - self.current_range.upper_exclusive.clear(); - self.current_range - .lower_inclusive - .extend_from_slice(&point.0); - self.current_range - .upper_exclusive - .extend_from_slice(&point.0); - self.current_range.upper_exclusive.push(0); + // Only update current_range for the first and the last range. + if self.current_range.lower_inclusive.is_empty() || self.ranges_iter.is_drained() { + self.current_range.lower_inclusive.clear(); + self.current_range.upper_exclusive.clear(); + self.current_range + .lower_inclusive + .extend_from_slice(&point.0); + self.current_range + .upper_exclusive + .extend_from_slice(&point.0); + self.current_range.upper_exclusive.push(0); + } self.update_working_range_begin_key(); } fn update_scanned_range_from_new_range(&mut self, range: &IntervalRange) { assert!(self.is_scanned_range_aware); - self.update_working_range_end_key(); - self.current_range.lower_inclusive.clear(); - self.current_range.upper_exclusive.clear(); - self.current_range - .lower_inclusive - .extend_from_slice(&range.lower_inclusive); - self.current_range - .upper_exclusive - .extend_from_slice(&range.upper_exclusive); + // Only update current_range for the first and the last range. + if self.current_range.lower_inclusive.is_empty() || self.ranges_iter.is_drained() { + self.current_range.lower_inclusive.clear(); + self.current_range.upper_exclusive.clear(); + self.current_range + .lower_inclusive + .extend_from_slice(&range.lower_inclusive); + self.current_range + .upper_exclusive + .extend_from_slice(&range.upper_exclusive); + } self.update_working_range_begin_key(); } @@ -666,4 +679,174 @@ mod tests { assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"foo"); } + + #[test] + fn test_scanned_range_forward2() { + let storage = create_storage(); + // Filled interval range + let ranges = vec![IntervalRange::from(("foo", "foo_8")).into()]; + let mut scanner = RangesScanner::new(RangesScannerOptions { + storage: storage.clone(), + ranges, + scan_backward_in_range: false, + is_key_only: false, + is_scanned_range_aware: true, + }); + + // Only lower_inclusive is updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b""); + + // Upper_exclusive is updated. + assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); + + // Upper_exclusive is not updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo_3"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); + + // Drained. + assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"foo_8"); + + let r = scanner.take_scanned_range(); + assert_eq!(&r.lower_inclusive, b"foo"); + assert_eq!(&r.upper_exclusive, b"foo_8"); + + // Multiple ranges + // TODO: caller should not pass in unordered ranges otherwise scanned ranges would be + // unsound. + let ranges = vec![ + IntervalRange::from(("foo", "foo_3")).into(), + IntervalRange::from(("foo_5", "foo_50")).into(), + IntervalRange::from(("bar", "bar_")).into(), + PointRange::from("bar_2").into(), + PointRange::from("bar_3").into(), + IntervalRange::from(("bar_4", "box")).into(), + ]; + let mut scanner = RangesScanner::new(RangesScannerOptions { + storage, + ranges, + scan_backward_in_range: false, + is_key_only: false, + is_scanned_range_aware: true, + }); + + // Only lower_inclusive is updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b""); + + // Upper_exclusive is updated. Updated by scanned row. + assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); + + // Upper_exclusive is not updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"bar"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); + + // Upper_exclusive is not updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"bar_2"); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); + + // Drain. + assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(&scanner.working_range_begin_key, b"foo"); + assert_eq!(&scanner.working_range_end_key, b"box"); + + let r = scanner.take_scanned_range(); + assert_eq!(&r.lower_inclusive, b"foo"); + assert_eq!(&r.upper_exclusive, b"box"); + } + + #[test] + fn test_scanned_range_backward2() { + let storage = create_storage(); + // Filled interval range + let ranges = vec![IntervalRange::from(("foo", "foo_8")).into()]; + let mut scanner = RangesScanner::new(RangesScannerOptions { + storage: storage.clone(), + ranges, + scan_backward_in_range: true, + is_key_only: false, + is_scanned_range_aware: true, + }); + + // Only lower_inclusive is updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo_3"); + assert_eq!(&scanner.working_range_begin_key, b"foo_8"); + assert_eq!(&scanner.working_range_end_key, b""); + + // Upper_exclusive is updated. + assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&scanner.working_range_begin_key, b"foo_8"); + assert_eq!(&scanner.working_range_end_key, b"foo_2"); + + // Upper_exclusive is not updated. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!(&scanner.working_range_begin_key, b"foo_8"); + assert_eq!(&scanner.working_range_end_key, b"foo_2"); + + // Drained. + assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(&scanner.working_range_begin_key, b"foo_8"); + assert_eq!(&scanner.working_range_end_key, b"foo"); + + let r = scanner.take_scanned_range(); + assert_eq!(&r.lower_inclusive, b"foo"); + assert_eq!(&r.upper_exclusive, b"foo_8"); + + // Multiple ranges + let ranges = vec![ + IntervalRange::from(("bar_4", "box")).into(), + PointRange::from("bar_3").into(), + PointRange::from("bar_2").into(), + IntervalRange::from(("bar", "bar_")).into(), + IntervalRange::from(("foo_5", "foo_50")).into(), + IntervalRange::from(("foo", "foo_3")).into(), + ]; + let mut scanner = RangesScanner::new(RangesScannerOptions { + storage, + ranges, + scan_backward_in_range: true, + is_key_only: false, + is_scanned_range_aware: true, + }); + + // Lower_inclusive is updated. Upper_exclusive is not update. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"bar_2"); + assert_eq!(&scanner.working_range_begin_key, b"box"); + assert_eq!(&scanner.working_range_end_key, b""); + + // Upper_exclusive is updated. Updated by scanned row. + assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"bar"); + assert_eq!(&scanner.working_range_begin_key, b"box"); + assert_eq!(&scanner.working_range_end_key, b"bar"); + + // Upper_exclusive is not update. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&scanner.working_range_begin_key, b"box"); + assert_eq!(&scanner.working_range_end_key, b"bar"); + + // Upper_exclusive is not update. + assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!(&scanner.working_range_begin_key, b"box"); + assert_eq!(&scanner.working_range_end_key, b"bar"); + + // Drain. + assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(&scanner.working_range_begin_key, b"box"); + assert_eq!(&scanner.working_range_end_key, b"foo"); + + let r = scanner.take_scanned_range(); + assert_eq!(&r.lower_inclusive, b"foo"); + assert_eq!(&r.upper_exclusive, b"box"); + } } diff --git a/components/tidb_query_executors/src/util/scan_executor.rs b/components/tidb_query_executors/src/util/scan_executor.rs index 14cf9abb1b6..114bc77ee1a 100644 --- a/components/tidb_query_executors/src/util/scan_executor.rs +++ b/components/tidb_query_executors/src/util/scan_executor.rs @@ -102,8 +102,8 @@ impl ScanExecutor { ) -> Result { assert!(scan_rows > 0); - for _ in 0..scan_rows { - let some_row = self.scanner.next()?; + for i in 0..scan_rows { + let some_row = self.scanner.next_opt(i == scan_rows - 1)?; if let Some((key, value)) = some_row { // Retrieved one row from point range or non-point range. From 43ebcba1b7d89b7cd17cf2d5f20f3ba9689468ee Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 27 Jun 2022 16:10:39 -0700 Subject: [PATCH 0047/1149] TiKV: support tablet in DBConfigManager (#12884) close tikv/tikv#12883 DBConfigManager should work with tablets, meanwhile compatible with current single kv engine. Signed-off-by: qi.xu Co-authored-by: qi.xu --- components/engine_panic/src/engine.rs | 12 ++- components/engine_rocks/src/engine.rs | 11 ++ components/engine_traits/src/engine.rs | 135 +++++++++++++++++++++++-- components/server/src/server.rs | 8 +- src/config.rs | 108 ++++++++++++++------ src/server/engine_factory.rs | 29 ++++-- src/server/engine_factory_v2.rs | 39 +++++-- 7 files changed, 287 insertions(+), 55 deletions(-) diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index 33c7bc01541..5608b55ea00 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -2,7 +2,7 @@ use engine_traits::{ IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SeekKey, SyncMutable, - WriteOptions, + TabletAccessor, WriteOptions, }; use crate::{db_vector::PanicDBVector, snapshot::PanicSnapshot, write_batch::PanicWriteBatch}; @@ -24,6 +24,16 @@ impl KvEngine for PanicEngine { } } +impl TabletAccessor for PanicEngine { + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &PanicEngine)) { + panic!() + } + + fn is_single_engine(&self) -> bool { + panic!() + } +} + impl Peekable for PanicEngine { type DBVector = PanicDBVector; diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 32bd259f160..60be2007367 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -4,6 +4,7 @@ use std::{any::Any, fs, path::Path, sync::Arc}; use engine_traits::{ Error, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, + TabletAccessor, }; use rocksdb::{DBIterator, Writable, DB}; @@ -110,6 +111,16 @@ impl KvEngine for RocksEngine { } } +impl TabletAccessor for RocksEngine { + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { + f(0, 0, self); + } + + fn is_single_engine(&self) -> bool { + true + } +} + impl Iterable for RocksEngine { type Iterator = RocksEngineIterator; diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index a2aa5e5d908..de99f924038 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -2,9 +2,14 @@ use std::{ fmt::Debug, + io::Write, path::{Path, PathBuf}, + str, + vec::Vec, }; +use tikv_util::error; + use crate::*; // FIXME: Revisit the remaining types and methods on KvEngine. Some of these are @@ -61,11 +66,95 @@ pub trait KvEngine: fn bad_downcast(&self) -> &T; } +/// TabletAccessor is the trait to access all the tablets with provided accessor +/// +/// For single rocksdb instance, it essentially accesses the global kvdb with the accessor +/// For multi rocksdb instances, it accesses all the tablets with the accessor +pub trait TabletAccessor { + /// Loop visit all opened tablets by the specified function. + fn for_each_opened_tablet(&self, _f: &mut (dyn FnMut(u64, u64, &EK))); + + /// return true if it's single engine; + /// return false if it's a multi-tablet factory; + fn is_single_engine(&self) -> bool; +} + +/// max error count to log +const MAX_ERROR_COUNT: u32 = 5; + +/// TabletErrorCollector is the facility struct to handle errors when using TabletAccessor::for_each_opened_tablet +/// +/// It will choose the last failed result as the final result, meanwhile logging errors up to MAX_ERROR_COUNT. +pub struct TabletErrorCollector { + errors: Vec, + max_error_count: u32, + error_count: u32, + result: std::result::Result<(), Box>, +} + +impl TabletErrorCollector { + pub fn new() -> Self { + Self { + errors: vec![], + max_error_count: MAX_ERROR_COUNT, + error_count: 0, + result: Ok(()), + } + } + + pub fn add_result(&mut self, region_id: u64, suffix: u64, result: Result<()>) { + if result.is_ok() { + return; + } + self.result = Err(Box::from(result.err().unwrap())); + self.error_count += 1; + if self.error_count > self.max_error_count { + return; + } + writeln!( + &mut self.errors, + "Tablet {}_{} encountered error: {:?}.", + region_id, suffix, self.result + ) + .unwrap(); + } + + fn flush_error(&self) { + if self.error_count > 0 { + error!( + "Total count {}. Sample errors: {}", + self.error_count, + str::from_utf8(&self.errors).unwrap() + ); + } + } + + pub fn take_result(&mut self) -> std::result::Result<(), Box> { + std::mem::replace(&mut self.result, Ok(())) + } + + pub fn get_error_count(&self) -> u32 { + self.error_count + } +} + +impl Default for TabletErrorCollector { + fn default() -> Self { + Self::new() + } +} + +impl Drop for TabletErrorCollector { + fn drop(&mut self) { + self.flush_error() + } +} + /// A factory trait to create new engine. /// // It should be named as `EngineFactory` for consistency, but we are about to rename // engine to tablet, so always use tablet for new traits/types. -pub trait TabletFactory { +pub trait TabletFactory: TabletAccessor { /// Create an tablet by id and suffix. If the tablet exists, it will fail. /// The id is likely the region Id, the suffix could be the current raft log index. /// They together could specify a unique path for a region's tablet. @@ -119,10 +208,6 @@ pub trait TabletFactory { /// Here we don't use Clone traint because it will break the trait's object safty fn clone(&self) -> Box + Send>; - /// Loop visit all opened tablets cached by the specified function. - /// Once the tablet is opened/created, it will be cached in a hashmap - fn loop_tablet_cache(&self, _f: Box); - /// Load the tablet from path for id and suffix--for scenarios such as applying snapshot fn load_tablet(&self, _path: &Path, _id: u64, _suffix: u64) -> Result { unimplemented!(); @@ -185,7 +270,20 @@ where root_path: self.root_path.clone(), }) } - fn loop_tablet_cache(&self, _f: Box) {} +} +impl TabletAccessor for DummyFactory +where + EK: Clone + Send + 'static, +{ + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &EK)) { + if let Some(engine) = &self.engine { + f(0, 0, engine); + } + } + + fn is_single_engine(&self) -> bool { + true + } } impl DummyFactory @@ -202,3 +300,28 @@ impl Default for DummyFactory { Self::new(None, "/tmp".to_string()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tablet_error_collector_ok() { + let mut err = TabletErrorCollector::new(); + err.add_result(1, 1, Ok(())); + assert!(err.take_result().is_ok()); + assert_eq!(err.get_error_count(), 0); + } + + #[test] + fn test_tablet_error_collector_err() { + let mut err = TabletErrorCollector::new(); + err.add_result(1, 1, Ok(())); + err.add_result(1, 1, Err("this is an error1".to_string().into())); + err.add_result(1, 1, Err("this is an error2".to_string().into())); + err.add_result(1, 1, Ok(())); + let r = err.take_result(); + assert!(r.is_err()); + assert_eq!(err.get_error_count(), 2); + } +} diff --git a/components/server/src/server.rs b/components/server/src/server.rs index cded99edfe3..e09eec7d5d8 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1531,7 +1531,11 @@ impl ConfiguredRaftEngine for RocksEngine { fn register_config(&self, cfg_controller: &mut ConfigController, share_cache: bool) { cfg_controller.register( tikv::config::Module::Raftdb, - Box::new(DBConfigManger::new(self.clone(), DBType::Raft, share_cache)), + Box::new(DBConfigManger::new( + Arc::new(self.clone()), + DBType::Raft, + share_cache, + )), ); } } @@ -1614,7 +1618,7 @@ impl TiKvServer { cfg_controller.register( tikv::config::Module::Rocksdb, Box::new(DBConfigManger::new( - engines.kv.clone(), + Arc::new(factory), DBType::Kv, self.config.storage.block_cache.shared, )), diff --git a/src/config.rs b/src/config.rs index d3ec96f6ba4..fd6ec16253b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,6 +12,7 @@ use std::{ fs, i32, io::{Error as IoError, ErrorKind, Write}, path::Path, + str, sync::{Arc, RwLock}, usize, }; @@ -35,8 +36,8 @@ use engine_rocks::{ DEFAULT_PROP_KEYS_INDEX_DISTANCE, DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptionsExt, CF_DEFAULT, - CF_LOCK, CF_RAFT, CF_WRITE, + CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptionsExt, TabletAccessor, + TabletErrorCollector, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use file_system::{IOPriority, IORateLimiter}; use keys::region_raft_prefix_len; @@ -1494,31 +1495,39 @@ pub enum DBType { Raft, } -pub struct DBConfigManger { - db: RocksEngine, +pub struct DBConfigManger> { + tablet_accessor: Arc, db_type: DBType, shared_block_cache: bool, } -impl DBConfigManger { - pub fn new(db: RocksEngine, db_type: DBType, shared_block_cache: bool) -> Self { +impl> DBConfigManger { + pub fn new(tablet_accessor: Arc, db_type: DBType, shared_block_cache: bool) -> Self { DBConfigManger { - db, + tablet_accessor, db_type, shared_block_cache, } } -} -impl DBConfigManger { fn set_db_config(&self, opts: &[(&str, &str)]) -> Result<(), Box> { - self.db.set_db_options(opts)?; - Ok(()) + let mut error_collector = TabletErrorCollector::new(); + self.tablet_accessor + .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { + error_collector.add_result(region_id, suffix, db.set_db_options(opts)); + }); + error_collector.take_result() } fn set_cf_config(&self, cf: &str, opts: &[(&str, &str)]) -> Result<(), Box> { + let mut error_collector = TabletErrorCollector::new(); self.validate_cf(cf)?; - self.db.set_options_cf(cf, opts)?; + self.tablet_accessor + .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { + error_collector.add_result(region_id, suffix, db.set_options_cf(cf, opts)); + }); + error_collector.take_result()?; + // Write config to metric for (cfg_name, cfg_value) in opts { let cfg_value = match cfg_value { @@ -1542,33 +1551,68 @@ impl DBConfigManger { block-cache.capacity in storage module instead" .into()); } - let opt = self.db.get_options_cf(cf)?; - opt.set_block_cache_capacity(size.0)?; + // for multi-rocks, shared block cache has to be enabled and thus should shortcut in the above if statement. + assert!(self.tablet_accessor.is_single_engine()); + let mut error_collector = TabletErrorCollector::new(); + self.tablet_accessor + .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { + let r = db.get_options_cf(cf); + if let Ok(opt) = r { + let r = opt.set_block_cache_capacity(size.0); + if let Err(r) = r { + error_collector.add_result(region_id, suffix, Err(r.into())); + } + } else if let Err(r) = r { + error_collector.add_result(region_id, suffix, Err(r)); + } + }); // Write config to metric CONFIG_ROCKSDB_GAUGE .with_label_values(&[cf, "block_cache_size"]) .set(size.0 as f64); - Ok(()) + error_collector.take_result() } fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> Result<(), Box> { - let mut opt = self.db.as_inner().get_db_options(); - opt.set_rate_bytes_per_sec(rate_bytes_per_sec)?; - Ok(()) + let mut error_collector = TabletErrorCollector::new(); + self.tablet_accessor + .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { + let mut opt = db.as_inner().get_db_options(); + let r = opt.set_rate_bytes_per_sec(rate_bytes_per_sec); + if let Err(r) = r { + error_collector.add_result(region_id, suffix, Err(r.into())); + } + }); + error_collector.take_result() } fn set_rate_limiter_auto_tuned( &self, rate_limiter_auto_tuned: bool, ) -> Result<(), Box> { - let mut opt = self.db.as_inner().get_db_options(); - opt.set_auto_tuned(rate_limiter_auto_tuned)?; - // double check the new state - let new_auto_tuned = opt.get_auto_tuned(); - if new_auto_tuned.is_none() || new_auto_tuned.unwrap() != rate_limiter_auto_tuned { - return Err("fail to set rate_limiter_auto_tuned".into()); - } - Ok(()) + let mut error_collector = TabletErrorCollector::new(); + self.tablet_accessor + .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { + let mut opt = db.as_inner().get_db_options(); + let r = opt.set_auto_tuned(rate_limiter_auto_tuned); + if let Err(r) = r { + error_collector.add_result(region_id, suffix, Err(r.into())); + } else { + // double check the new state + let new_auto_tuned = opt.get_auto_tuned(); + if new_auto_tuned.is_none() + || new_auto_tuned.unwrap() != rate_limiter_auto_tuned + { + error_collector.add_result( + region_id, + suffix, + Err("fail to set rate_limiter_auto_tuned".to_string().into()), + ); + } + } + }); + + error_collector.take_result() } fn set_max_background_jobs(&self, max_background_jobs: i32) -> Result<(), Box> { @@ -1599,7 +1643,7 @@ impl DBConfigManger { } } -impl ConfigManager for DBConfigManger { +impl + Send + Sync> ConfigManager for DBConfigManger { fn dispatch(&mut self, change: ConfigChange) -> Result<(), Box> { let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); @@ -3817,7 +3861,9 @@ mod tests { use api_version::{ApiV1, KvFormat}; use case_macros::*; - use engine_traits::{DBOptions as DBOptionsTrait, ALL_CFS}; + use engine_traits::{ + ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as DBOptionsTrait, ALL_CFS, + }; use futures::executor::block_on; use grpcio::ResourceQuota; use itertools::Itertools; @@ -4235,7 +4281,11 @@ mod tests { let (shared, cfg_controller) = (cfg.storage.block_cache.shared, ConfigController::new(cfg)); cfg_controller.register( Module::Rocksdb, - Box::new(DBConfigManger::new(engine.clone(), DBType::Kv, shared)), + Box::new(DBConfigManger::new( + Arc::new(engine.clone()), + DBType::Kv, + shared, + )), ); let (scheduler, receiver) = dummy_scheduler(); cfg_controller.register( diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 5212a211e69..fde3bc5a40f 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -10,7 +10,9 @@ use engine_rocks::{ CompactionListener, FlowListener, RocksCompactedEvent, RocksCompactionJobInfo, RocksEngine, RocksEventListener, }; -use engine_traits::{CompactionJobInfo, RaftEngine, Result, TabletFactory, CF_DEFAULT, CF_WRITE}; +use engine_traits::{ + CompactionJobInfo, RaftEngine, Result, TabletAccessor, TabletFactory, CF_DEFAULT, CF_WRITE, +}; use kvproto::kvrpcpb::ApiVersion; use raftstore::{ store::{RaftRouter, StoreMsg}, @@ -83,20 +85,29 @@ impl KvEngineFactoryBuilder { pub fn build(self) -> KvEngineFactory { KvEngineFactory { inner: Arc::new(self.inner), - router: self.router, + router: Mutex::new(self.router), } } } -#[derive(Clone)] pub struct KvEngineFactory { inner: Arc, - router: Option>, + router: Mutex>>, +} + +impl Clone for KvEngineFactory { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + router: Mutex::new(self.router.lock().unwrap().clone()), + } + } } impl KvEngineFactory { pub fn create_raftstore_compaction_listener(&self) -> Option { - let ch = match &self.router { + let router = self.router.lock().unwrap(); + let ch = match &*router { Some(r) => Mutex::new(r.clone()), None => return None, }; @@ -257,11 +268,17 @@ impl TabletFactory for KvEngineFactory { fn clone(&self) -> Box + Send> { Box::new(std::clone::Clone::clone(self)) } +} - fn loop_tablet_cache(&self, mut f: Box) { +impl TabletAccessor for KvEngineFactory { + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { if let Ok(db) = self.inner.root_db.lock() { let db = db.as_ref().unwrap(); f(0, 0, db); } } + + fn is_single_engine(&self) -> bool { + true + } } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 676272334ac..4027823f23c 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -7,7 +7,7 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; -use engine_traits::{RaftEngine, Result, TabletFactory}; +use engine_traits::{RaftEngine, Result, TabletAccessor, TabletFactory}; use crate::server::engine_factory::KvEngineFactory; @@ -129,14 +129,6 @@ impl TabletFactory for KvEngineFactoryV2 { Ok(()) } - #[inline] - fn loop_tablet_cache(&self, mut f: Box) { - let reg = self.registry.lock().unwrap(); - for ((id, suffix), tablet) in &*reg { - f(*id, *suffix, tablet) - } - } - #[inline] fn load_tablet(&self, path: &Path, id: u64, suffix: u64) -> Result { { @@ -160,6 +152,21 @@ impl TabletFactory for KvEngineFactoryV2 { } } +impl TabletAccessor for KvEngineFactoryV2 { + #[inline] + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { + let reg = self.registry.lock().unwrap(); + for ((id, suffix), tablet) in &*reg { + f(*id, *suffix, tablet) + } + } + + // it have multi tablets. + fn is_single_engine(&self) -> bool { + false + } +} + #[cfg(test)] mod tests { use engine_traits::TabletFactory; @@ -213,6 +220,15 @@ mod tests { let tablet_path = factory.tablet_path(1, 10); let tablet2 = factory.open_tablet_raw(&tablet_path, false).unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let mut count = 0; + factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { + assert!(id == 0); + assert!(suffix == 0); + count += 1; + }); + assert_eq!(count, 1); + assert!(factory.is_single_engine()); + assert!(shared_db.is_single_engine()); } #[test] @@ -250,6 +266,7 @@ mod tests { factory.destroy_tablet(1, 20).unwrap(); let result = factory.open_tablet(1, 20); assert!(result.is_err()); + assert!(!factory.is_single_engine()); } #[test] @@ -264,11 +281,11 @@ mod tests { factory.create_tablet(1, 10).unwrap(); factory.create_tablet(2, 10).unwrap(); let mut count = 0; - factory.loop_tablet_cache(Box::new(|id, suffix, _tablet| { + factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { assert!(id == 1 || id == 2); assert!(suffix == 10); count += 1; - })); + }); assert_eq!(count, 2); } } From 30d2c3d89b551532d2c6aedc67fdaf8f5b2847cc Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Mon, 27 Jun 2022 23:22:39 -0700 Subject: [PATCH 0048/1149] Fix flaky test by extending raft gc wait timeout (#12810) close tikv/tikv#12809 Fix flaky test by extending raft gc wait timeout Signed-off-by: v01dstar Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/peer.rs | 15 +- .../failpoints/cases/test_unsafe_recovery.rs | 5 +- .../raftstore/test_unsafe_recovery.rs | 183 +++++++++--------- 3 files changed, 109 insertions(+), 94 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index e08c440d6a1..ba819bda155 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -12,7 +12,7 @@ use std::{ iter::{FromIterator, Iterator}, mem, sync::{Arc, Mutex}, - time::Instant, + time::{Duration, Instant}, u64, }; @@ -5573,6 +5573,19 @@ where return; } + if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { + // Clean up the force leader state after a timeout, since the PD recovery process may + // have been aborted for some reasons. + if time.saturating_elapsed() + > cmp::max( + self.ctx.cfg.peer_stale_state_check_interval.0, + Duration::from_secs(60), + ) + { + self.on_exit_force_leader(); + } + } + if self.ctx.cfg.hibernate_regions { let group_state = self.fsm.hibernate_state.group_state(); if group_state == GroupState::Idle { diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index 292cba849df..f791b40c065 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -192,7 +192,7 @@ fn test_unsafe_recovery_execution_result_report() { } #[test] -fn test_unsafe_recover_wait_for_snapshot_apply() { +fn test_unsafe_recovery_wait_for_snapshot_apply() { let mut cluster = new_server_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(8); cluster.cfg.raft_store.merge_max_log_gap = 3; @@ -219,13 +219,12 @@ fn test_unsafe_recover_wait_for_snapshot_apply() { let _ = raft_gc_finished_tx.send(()); }) .unwrap(); - // Add at least 4m data (0..10).for_each(|_| cluster.must_put(b"random_k", b"random_v")); // Unblock raft log GC. drop(raft_gc_triggered_tx); // Wait until logs are GCed. raft_gc_finished_rx - .recv_timeout(Duration::from_secs(1)) + .recv_timeout(Duration::from_secs(3)) .unwrap(); // Makes the group lose its quorum. cluster.stop_node(nodes[2]); diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index 7902c0a4c71..ebeb99ddfe7 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -725,96 +725,99 @@ fn test_force_leader_on_hibernated_follower() { // Test the case that three of five nodes fail and force leader on the rest node // with triggering snapshot. -// #[test] -// fn test_force_leader_trigger_snapshot() { -// let mut cluster = new_node_cluster(0, 5); -// cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); -// cluster.cfg.raft_store.raft_election_timeout_ticks = 10; -// cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(90); -// cluster.pd_client.disable_default_operator(); -// -// cluster.run(); -// cluster.must_put(b"k1", b"v1"); -// -// let region = cluster.get_region(b"k1"); -// cluster.must_split(®ion, b"k9"); -// let region = cluster.get_region(b"k2"); -// let peer_on_store1 = find_peer(®ion, 1).unwrap(); -// cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); -// -// // Isolate node 2 -// cluster.add_send_filter(IsolationFilterFactory::new(2)); -// -// // Compact logs to force requesting snapshot after clearing send filters. -// let state = cluster.truncated_state(region.get_id(), 1); -// // Write some data to trigger snapshot. -// for i in 100..150 { -// let key = format!("k{}", i); -// let value = format!("v{}", i); -// cluster.must_put(key.as_bytes(), value.as_bytes()); -// } -// cluster.wait_log_truncated(region.get_id(), 1, state.get_index() + 40); -// -// cluster.stop_node(3); -// cluster.stop_node(4); -// cluster.stop_node(5); -// -// // Recover the isolation of 2, but still don't permit snapshot -// let recv_filter = Box::new( -// RegionPacketFilter::new(region.get_id(), 2) -// .direction(Direction::Recv) -// .msg_type(MessageType::MsgSnapshot), -// ); -// cluster.sim.wl().add_recv_filter(2, recv_filter); -// cluster.clear_send_filters(); -// -// // wait election timeout -// sleep_ms( -// cluster.cfg.raft_store.raft_election_timeout_ticks as u64 -// * cluster.cfg.raft_store.raft_base_tick_interval.as_millis() -// * 5, -// ); -// cluster.must_enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); -// -// sleep_ms( -// cluster.cfg.raft_store.raft_election_timeout_ticks as u64 -// * cluster.cfg.raft_store.raft_base_tick_interval.as_millis() -// * 3, -// ); -// let cmd = new_change_peer_request( -// ConfChangeType::RemoveNode, -// find_peer(®ion, 3).unwrap().clone(), -// ); -// let req = new_admin_request(region.get_id(), region.get_region_epoch(), cmd); -// // Though it has a force leader now, but the command can't committed because the log is not replicated to all the alive peers. -// assert!( -// cluster -// .call_command_on_leader(req, Duration::from_millis(1000)) -// .unwrap() -// .get_header() -// .has_error() // error "there is a pending conf change" indicating no committed log after being the leader -// ); -// -// // Permit snapshot message, snapshot should be applied and advance commit index now. -// cluster.sim.wl().clear_recv_filters(2); -// cluster -// .pd_client -// .must_remove_peer(region.get_id(), find_peer(®ion, 3).unwrap().clone()); -// cluster -// .pd_client -// .must_remove_peer(region.get_id(), find_peer(®ion, 4).unwrap().clone()); -// cluster -// .pd_client -// .must_remove_peer(region.get_id(), find_peer(®ion, 5).unwrap().clone()); -// cluster.exit_force_leader(region.get_id(), 1); -// -// // quorum is formed, can propose command successfully now -// cluster.must_put(b"k4", b"v4"); -// assert_eq!(cluster.must_get(b"k2"), None); -// assert_eq!(cluster.must_get(b"k3"), None); -// assert_eq!(cluster.must_get(b"k4"), Some(b"v4".to_vec())); -// cluster.must_transfer_leader(region.get_id(), find_peer(®ion, 1).unwrap().clone()); -// } +#[test] +fn test_force_leader_trigger_snapshot() { + let mut cluster = new_node_cluster(0, 5); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(90); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(8); + cluster.cfg.raft_store.merge_max_log_gap = 3; + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); + cluster.pd_client.disable_default_operator(); + + cluster.run(); + cluster.must_put(b"k1", b"v1"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k9"); + let region = cluster.get_region(b"k2"); + let peer_on_store1 = find_peer(®ion, 1).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // Isolate node 2 + cluster.add_send_filter(IsolationFilterFactory::new(2)); + + // Compact logs to force requesting snapshot after clearing send filters. + let state = cluster.truncated_state(region.get_id(), 1); + // Write some data to trigger snapshot. + for i in 100..150 { + let key = format!("k{}", i); + let value = format!("v{}", i); + cluster.must_put(key.as_bytes(), value.as_bytes()); + } + cluster.wait_log_truncated(region.get_id(), 1, state.get_index() + 40); + + cluster.stop_node(3); + cluster.stop_node(4); + cluster.stop_node(5); + + // Recover the isolation of 2, but still don't permit snapshot + let recv_filter = Box::new( + RegionPacketFilter::new(region.get_id(), 2) + .direction(Direction::Recv) + .msg_type(MessageType::MsgSnapshot), + ); + cluster.sim.wl().add_recv_filter(2, recv_filter); + cluster.clear_send_filters(); + + // wait election timeout + sleep_ms( + cluster.cfg.raft_store.raft_election_timeout_ticks as u64 + * cluster.cfg.raft_store.raft_base_tick_interval.as_millis() + * 5, + ); + cluster.enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); + + sleep_ms( + cluster.cfg.raft_store.raft_election_timeout_ticks as u64 + * cluster.cfg.raft_store.raft_base_tick_interval.as_millis() + * 3, + ); + let cmd = new_change_peer_request( + ConfChangeType::RemoveNode, + find_peer(®ion, 3).unwrap().clone(), + ); + let req = new_admin_request(region.get_id(), region.get_region_epoch(), cmd); + // Though it has a force leader now, but the command can't committed because the log is not replicated to all the alive peers. + assert!( + cluster + .call_command_on_leader(req, Duration::from_millis(1000)) + .unwrap() + .get_header() + .has_error() // error "there is a pending conf change" indicating no committed log after being the leader + ); + + // Permit snapshot message, snapshot should be applied and advance commit index now. + cluster.sim.wl().clear_recv_filters(2); + cluster + .pd_client + .must_remove_peer(region.get_id(), find_peer(®ion, 3).unwrap().clone()); + cluster + .pd_client + .must_remove_peer(region.get_id(), find_peer(®ion, 4).unwrap().clone()); + cluster + .pd_client + .must_remove_peer(region.get_id(), find_peer(®ion, 5).unwrap().clone()); + cluster.exit_force_leader(region.get_id(), 1); + + // quorum is formed, can propose command successfully now + cluster.must_put(b"k4", b"v4"); + assert_eq!(cluster.must_get(b"k2"), None); + assert_eq!(cluster.must_get(b"k3"), None); + assert_eq!(cluster.must_get(b"k4"), Some(b"v4".to_vec())); + cluster.must_transfer_leader(region.get_id(), find_peer(®ion, 1).unwrap().clone()); +} // Test the case that three of five nodes fail and force leader on the rest node // with uncommitted conf change. From 24ad73866fcc493b6d424ec495769e05c286438e Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 28 Jun 2022 15:44:39 +0800 Subject: [PATCH 0049/1149] online_config: allow return error when update config (#12910) close tikv/tikv#12909 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- components/backup/src/endpoint.rs | 3 +- components/cdc/src/endpoint.rs | 7 +- components/engine_rocks/src/config.rs | 25 ++--- components/file_system/src/lib.rs | 27 +++--- .../online_config_derive/src/lib.rs | 9 +- components/online_config/src/lib.rs | 91 +++++++++++++++++-- components/raftstore/src/store/config.rs | 2 +- .../raftstore/src/store/worker/split_check.rs | 5 +- .../src/store/worker/split_config.rs | 2 +- components/resolved_ts/src/endpoint.rs | 5 +- components/resource_metering/src/config.rs | 2 +- components/tikv_util/src/config.rs | 15 +-- src/config.rs | 22 ++--- src/server/config.rs | 2 +- src/server/gc_worker/config.rs | 3 +- src/server/raft_client.rs | 5 +- src/storage/config_manager.rs | 4 +- .../integrations/config/dynamic/gc_worker.rs | 15 ++- .../integrations/config/test_config_client.rs | 6 +- 19 files changed, 172 insertions(+), 78 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 37e6855302a..2a68cbb6bd8 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -575,8 +575,7 @@ pub struct ConfigManager(Arc>); impl online_config::ConfigManager for ConfigManager { fn dispatch(&mut self, change: online_config::ConfigChange) -> online_config::Result<()> { - self.0.write().unwrap().update(change); - Ok(()) + self.0.write().unwrap().update(change) } } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 7ca640ac8b3..c78636b8e11 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -447,7 +447,10 @@ impl, E: KvEngine> Endpoint { fn on_change_cfg(&mut self, change: ConfigChange) { // Validate first. let mut validate_cfg = self.config.clone(); - validate_cfg.update(change); + if let Err(e) = validate_cfg.update(change) { + warn!("cdc config update failed"; "error" => ?e); + return; + } if let Err(e) = validate_cfg.validate() { warn!("cdc config update failed"; "error" => ?e); return; @@ -459,7 +462,7 @@ impl, E: KvEngine> Endpoint { "change" => ?change ); // Update the config here. The following adjustments will all use the new values. - self.config.update(change.clone()); + self.config.update(change.clone()).unwrap(); // Maybe the cache will be lost due to smaller capacity, // but it is acceptable. diff --git a/components/engine_rocks/src/config.rs b/components/engine_rocks/src/config.rs index 6442a5dab64..9c015b7e7d1 100644 --- a/components/engine_rocks/src/config.rs +++ b/components/engine_rocks/src/config.rs @@ -1,6 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::str::FromStr; +use std::{convert::TryFrom, str::FromStr}; use online_config::ConfigValue; use rocksdb::{ @@ -225,21 +225,22 @@ pub enum BlobRunMode { impl From for ConfigValue { fn from(mode: BlobRunMode) -> ConfigValue { - ConfigValue::BlobRunMode(format!("k{:?}", mode)) + let str_value = match mode { + BlobRunMode::Normal => "normal", + BlobRunMode::ReadOnly => "read-only", + BlobRunMode::Fallback => "fallback", + }; + ConfigValue::String(str_value.into()) } } -impl From for BlobRunMode { - fn from(c: ConfigValue) -> BlobRunMode { - if let ConfigValue::BlobRunMode(s) = c { - match s.as_str() { - "kNormal" => BlobRunMode::Normal, - "kReadOnly" => BlobRunMode::ReadOnly, - "kFallback" => BlobRunMode::Fallback, - m => panic!("expect: kNormal, kReadOnly or kFallback, got: {:?}", m), - } +impl TryFrom for BlobRunMode { + type Error = String; + fn try_from(c: ConfigValue) -> Result { + if let ConfigValue::String(s) = c { + Self::from_str(&s) } else { - panic!("expect: ConfigValue::BlobRunMode, got: {:?}", c); + panic!("expect: ConfigValue::String, got: {:?}", c); } } } diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index dd99b810e28..d5f8345cae3 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -18,10 +18,13 @@ mod metrics; mod metrics_manager; mod rate_limiter; -pub use std::fs::{ - canonicalize, create_dir, create_dir_all, hard_link, metadata, read_dir, read_link, remove_dir, - remove_dir_all, remove_file, rename, set_permissions, symlink_metadata, DirBuilder, DirEntry, - FileType, Metadata, Permissions, ReadDir, +pub use std::{ + convert::TryFrom, + fs::{ + canonicalize, create_dir, create_dir_all, hard_link, metadata, read_dir, read_link, + remove_dir, remove_dir_all, remove_file, rename, set_permissions, symlink_metadata, + DirBuilder, DirEntry, FileType, Metadata, Permissions, ReadDir, + }, }; use std::{ io::{self, ErrorKind, Read, Write}, @@ -205,19 +208,17 @@ impl<'de> Deserialize<'de> for IOPriority { impl From for ConfigValue { fn from(mode: IOPriority) -> ConfigValue { - ConfigValue::IOPriority(mode.as_str().to_owned()) + ConfigValue::String(mode.as_str().to_owned()) } } -impl From for IOPriority { - fn from(c: ConfigValue) -> IOPriority { - if let ConfigValue::IOPriority(s) = c { - match IOPriority::from_str(s.as_str()) { - Ok(p) => p, - _ => panic!("expect: low, medium, high, got: {:?}", s), - } +impl TryFrom for IOPriority { + type Error = String; + fn try_from(c: ConfigValue) -> Result { + if let ConfigValue::String(s) = c { + Self::from_str(s.as_str()) } else { - panic!("expect: ConfigValue::IOPriority, got: {:?}", c); + panic!("expect: ConfigValue::String, got: {:?}", c); } } } diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index 0981668d817..ed37aeac40c 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -172,7 +172,7 @@ fn update(fields: &Punctuated, crate_name: &Ident) -> Result, crate_name: &Ident) -> Result std::result::Result<(), Box> { #(#update_fields)* + Ok(()) } }) } diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index 51f1580cafd..fae347fee40 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -20,8 +20,6 @@ pub enum ConfigValue { Usize(usize), Bool(bool), String(String), - BlobRunMode(String), - IOPriority(String), Module(ConfigChange), Skip, None, @@ -39,8 +37,6 @@ impl Display for ConfigValue { ConfigValue::Usize(v) => write!(f, "{}", v), ConfigValue::Bool(v) => write!(f, "{}", v), ConfigValue::String(v) => write!(f, "{}", v), - ConfigValue::BlobRunMode(v) => write!(f, "{}", v), - ConfigValue::IOPriority(v) => write!(f, "{}", v), ConfigValue::Module(v) => write!(f, "{:?}", v), ConfigValue::Skip => write!(f, "ConfigValue::Skip"), ConfigValue::None => write!(f, ""), @@ -115,13 +111,13 @@ impl_into!(ConfigChange, Module); /// 3. `#[online_config(submodule)]` field, these fields represent the /// submodule, and should also derive `OnlineConfig` /// 4. normal fields, the type of these fields should be implment -/// `Into` and `From` for `ConfigValue` +/// `Into` and `From`/`TryFrom` for `ConfigValue` pub trait OnlineConfig<'a> { type Encoder: serde::Serialize; /// Compare to other config, return the difference fn diff(&self, _: &Self) -> ConfigChange; /// Update config with difference returned by `diff` - fn update(&mut self, _: ConfigChange); + fn update(&mut self, _: ConfigChange) -> Result<()>; /// Get encoder that can be serialize with `serde::Serializer` /// with the disappear of `#[online_config(hidden)]` field fn get_encoder(&'a self) -> Self::Encoder; @@ -137,6 +133,10 @@ pub trait ConfigManager: Send + Sync { #[cfg(test)] mod tests { + use std::convert::TryFrom; + + use serde::Serialize; + use super::*; use crate as online_config; @@ -194,7 +194,7 @@ mod tests { assert_eq!(sub_diff.remove("field1").map(Into::into), Some(1000u64)); assert_eq!(sub_diff.remove("field2").map(Into::into), Some(true)); } - cfg.update(diff); + cfg.update(diff).unwrap(); assert_eq!(cfg, updated_cfg, "cfg should be updated"); } @@ -204,7 +204,7 @@ mod tests { let diff = cfg.diff(&cfg.clone()); assert!(diff.is_empty(), "diff should be empty"); - cfg.update(diff); + cfg.update(diff).unwrap(); assert_eq!(cfg, TestConfig::default(), "cfg should not be updated"); } @@ -218,7 +218,7 @@ mod tests { let mut diff = HashMap::new(); diff.insert("skip_field".to_owned(), ConfigValue::U64(123)); - cfg.update(diff); + cfg.update(diff).unwrap(); assert_eq!(cfg, TestConfig::default(), "cfg should not be updated"); } @@ -241,7 +241,7 @@ mod tests { assert_eq!(sub_diff.remove("field2").map(Into::into), Some(true)); } - cfg.update(diff); + cfg.update(diff).unwrap(); assert_eq!( cfg.submodule_field, updated_cfg.submodule_field, "submodule should be updated" @@ -295,4 +295,75 @@ mod tests { "skip-field = \"\"\n\n[submodule-field]\nrename_field = false\n" ); } + + #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] + pub enum TestEnum { + First, + Second, + } + + impl std::fmt::Display for TestEnum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::First => f.write_str("first"), + Self::Second => f.write_str("second"), + } + } + } + + impl From for ConfigValue { + fn from(v: TestEnum) -> ConfigValue { + ConfigValue::String(format!("{}", v)) + } + } + + impl TryFrom for TestEnum { + type Error = String; + fn try_from(v: ConfigValue) -> std::result::Result { + if let ConfigValue::String(s) = v { + match s.as_str() { + "first" => Ok(Self::First), + "second" => Ok(Self::Second), + s => Err(format!("invalid config value: {}", s)), + } + } else { + panic!("expect ConfigValue::String, got: {:?}", v); + } + } + } + + #[derive(Clone, OnlineConfig, Debug, PartialEq)] + pub struct TestEnumConfig { + f1: u64, + e: TestEnum, + } + + impl Default for TestEnumConfig { + fn default() -> Self { + Self { + f1: 0, + e: TestEnum::First, + } + } + } + + #[test] + fn test_update_enum_config() { + let mut config = TestEnumConfig::default(); + + let mut diff = HashMap::new(); + diff.insert("f1".to_owned(), ConfigValue::U64(1)); + diff.insert("e".to_owned(), ConfigValue::String("second".into())); + config.update(diff).unwrap(); + + let updated = TestEnumConfig { + f1: 1, + e: TestEnum::Second, + }; + assert_eq!(config, updated); + + let mut diff = HashMap::new(); + diff.insert("e".to_owned(), ConfigValue::String("invalid".into())); + assert!(config.update(diff).is_err()); + } } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 87b299d4cbb..fdd47d6c2ae 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -947,7 +947,7 @@ impl ConfigManager for RaftstoreConfigManager { { let change = change.clone(); self.config - .update(move |cfg: &mut Config| cfg.update(change)); + .update(move |cfg: &mut Config| cfg.update(change))?; } if let Some(ConfigValue::Module(raft_batch_system_change)) = change.get("store_batch_system") diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index ecb2d43f566..922f927ddb3 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -543,11 +543,14 @@ where } fn change_cfg(&mut self, change: ConfigChange) { + if let Err(e) = self.coprocessor.cfg.update(change.clone()) { + error!("update split check config failed"; "err" => ?e); + return; + }; info!( "split check config updated"; "change" => ?change ); - self.coprocessor.cfg.update(change); } } diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index da7f137765a..4d2634514be 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -117,7 +117,7 @@ impl ConfigManager for SplitConfigManager { { let change = change.clone(); self.0 - .update(move |cfg: &mut SplitConfig| cfg.update(change)); + .update(move |cfg: &mut SplitConfig| cfg.update(change))?; } info!( "load base split config changed"; diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 06fcb8c6860..bf4f9ba881e 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -579,7 +579,10 @@ where fn handle_change_config(&mut self, change: ConfigChange) { let prev = format!("{:?}", self.cfg); let prev_advance_ts_interval = self.cfg.advance_ts_interval; - self.cfg.update(change); + if let Err(e) = self.cfg.update(change) { + error!("update resolved-ts config unexpectly failed"; "err" => ?e); + return; + } if self.cfg.advance_ts_interval != prev_advance_ts_interval { // Increase the `cfg_version` to reject advance event that registered before self.cfg_version += 1; diff --git a/components/resource_metering/src/config.rs b/components/resource_metering/src/config.rs index ae28536f10e..90b09588e3a 100644 --- a/components/resource_metering/src/config.rs +++ b/components/resource_metering/src/config.rs @@ -110,7 +110,7 @@ impl ConfigManager { impl online_config::ConfigManager for ConfigManager { fn dispatch(&mut self, change: ConfigChange) -> Result<(), Box> { let mut new_config = self.current_config.clone(); - new_config.update(change); + new_config.update(change)?; new_config.validate()?; if self.current_config.receiver_address != new_config.receiver_address { self.address_notifier diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 5a8206e234e..aa981603d17 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -1108,13 +1108,15 @@ impl VersionTrack { } } - /// Update the value - pub fn update(&self, f: F) + pub fn update(&self, f: F) -> Result where - F: FnOnce(&mut T), + F: FnOnce(&mut T) -> Result, { - f(&mut self.value.write().unwrap()); - self.version.fetch_add(1, Ordering::Release); + let res = f(&mut self.value.write().unwrap()); + if res.is_ok() { + self.version.fetch_add(1, Ordering::Release); + } + res } pub fn value(&self) -> RwLockReadGuard<'_, T> { @@ -1966,9 +1968,10 @@ mod tests { assert!(trackers.iter_mut().all(|tr| tr.any_new().is_none())); - vc.update(|v| { + let _ = vc.update(|v| -> Result<(), ()> { v.v1 = 1000; v.v2 = true; + Ok(()) }); for tr in trackers.iter_mut() { let incoming = tr.any_new(); diff --git a/src/config.rs b/src/config.rs index fd6ec16253b..fc6cde09e1c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -39,7 +39,7 @@ use engine_traits::{ CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptionsExt, TabletAccessor, TabletErrorCollector, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; -use file_system::{IOPriority, IORateLimiter}; +use file_system::IORateLimiter; use keys::region_raft_prefix_len; use kvproto::kvrpcpb::ApiVersion; use online_config::{ConfigChange, ConfigManager, ConfigValue, OnlineConfig, Result as CfgResult}; @@ -3625,8 +3625,6 @@ fn to_change_value(v: &str, typed: &ConfigValue) -> CfgResult { ConfigValue::I32(_) => ConfigValue::from(v.parse::()?), ConfigValue::Usize(_) => ConfigValue::from(v.parse::()?), ConfigValue::Bool(_) => ConfigValue::from(v.parse::()?), - ConfigValue::BlobRunMode(_) => ConfigValue::from(v.parse::()?), - ConfigValue::IOPriority(_) => ConfigValue::from(v.parse::()?), ConfigValue::String(_) => ConfigValue::String(v.to_owned()), _ => unreachable!(), }; @@ -3652,9 +3650,7 @@ fn to_toml_encode(change: HashMap) -> CfgResult Ok(true), + | ConfigValue::String(_) => Ok(true), ConfigValue::None => Err(Box::new(IoError::new( ErrorKind::Other, format!("unexpect none field: {:?}", c), @@ -3792,7 +3788,7 @@ impl ConfigController { diff = { let incoming = self.get_current(); let mut updated = incoming.clone(); - updated.update(diff); + updated.update(diff)?; // Config might be adjusted in `validate`. updated.validate()?; incoming.diff(&updated) @@ -3806,7 +3802,8 @@ impl ConfigController { // dispatched to corresponding config manager, to avoid dispatch change twice if let Some(mgr) = inner.config_mgrs.get_mut(&Module::from(name.as_str())) { if let Err(e) = mgr.dispatch(change.clone()) { - inner.current.update(to_update); + // we already verified the correctness at the beginning of this function. + inner.current.update(to_update).unwrap(); return Err(e); } } @@ -3818,7 +3815,8 @@ impl ConfigController { } } debug!("all config change had been dispatched"; "change" => ?to_update); - inner.current.update(to_update); + // we already verified the correctness at the beginning of this function. + inner.current.update(to_update).unwrap(); // Write change to the config file if let Some(change) = change { let content = { @@ -4395,7 +4393,7 @@ mod tests { cfg_controller .update_config("resolved-ts.advance-ts-interval", "100ms") .unwrap(); - resolved_ts_cfg.update(rx.recv().unwrap()); + resolved_ts_cfg.update(rx.recv().unwrap()).unwrap(); assert_eq!( resolved_ts_cfg.advance_ts_interval, ReadableDuration::millis(100) @@ -4416,7 +4414,7 @@ mod tests { cfg_controller .update_config("resolved-ts.advance-ts-interval", "3s") .unwrap(); - resolved_ts_cfg.update(rx.recv().unwrap()); + resolved_ts_cfg.update(rx.recv().unwrap()).unwrap(); assert_eq!( resolved_ts_cfg.advance_ts_interval, ReadableDuration::secs(3) @@ -4570,7 +4568,7 @@ mod tests { let diff = config_value_to_string(diff.into_iter().collect()); assert_eq!(diff.len(), 1); assert_eq!(diff[0].0.as_str(), "blob_run_mode"); - assert_eq!(diff[0].1.as_str(), "kFallback"); + assert_eq!(diff[0].1.as_str(), "fallback"); } #[test] diff --git a/src/server/config.rs b/src/server/config.rs index 648c0c0853d..e88ee55b8c9 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -421,7 +421,7 @@ impl ConfigManager for ServerConfigManager { fn dispatch(&mut self, c: ConfigChange) -> std::result::Result<(), Box> { { let change = c.clone(); - self.config.update(move |cfg| cfg.update(change)); + self.config.update(move |cfg| cfg.update(change))?; if let Some(value) = c.get("grpc_memory_pool_quota") { let mem_quota: ReadableSize = value.clone().into(); // the resize is done inplace indeed, but grpc-rs's api need self, so we just diff --git a/src/server/gc_worker/config.rs b/src/server/gc_worker/config.rs index 3b2699f5a11..9406e39d993 100644 --- a/src/server/gc_worker/config.rs +++ b/src/server/gc_worker/config.rs @@ -54,7 +54,8 @@ impl ConfigManager for GcWorkerConfigManager { ) -> std::result::Result<(), Box> { { let change = change.clone(); - self.0.update(move |cfg: &mut GcConfig| cfg.update(change)); + self.0 + .update(move |cfg: &mut GcConfig| cfg.update(change))?; } info!( "GC worker config changed"; diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index bc691bcc05f..214c5cb6b66 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -1232,7 +1232,10 @@ mod tests { assert!(msg_buf.full()); // update config - version_track.update(|cfg| cfg.max_grpc_send_msg_len *= 2); + let _ = version_track.update(|cfg| -> Result<(), ()> { + cfg.max_grpc_send_msg_len *= 2; + Ok(()) + }); msg_buf.clear(); let new_max_msg_len = diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index b72c0cbf16a..217ebbb25c8 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -2,7 +2,7 @@ //! Storage online config manager. -use std::sync::Arc; +use std::{convert::TryInto, sync::Arc}; use engine_traits::{CFNamesExt, CFOptionsExt, ColumnFamilyOptions, CF_DEFAULT}; use file_system::{get_io_rate_limiter, IOPriority, IOType}; @@ -110,7 +110,7 @@ impl ConfigManager for StorageConfigManger { for t in IOType::iter() { if let Some(priority) = io_rate_limit.remove(&(t.as_str().to_owned() + "_priority")) { - let priority: IOPriority = priority.into(); + let priority: IOPriority = priority.try_into()?; limiter.set_io_priority(t, priority); } } diff --git a/tests/integrations/config/dynamic/gc_worker.rs b/tests/integrations/config/dynamic/gc_worker.rs index fbc02b9266b..19e97058616 100644 --- a/tests/integrations/config/dynamic/gc_worker.rs +++ b/tests/integrations/config/dynamic/gc_worker.rs @@ -145,19 +145,28 @@ fn test_change_io_limit_by_debugger() { }); // Enable io iolimit - config_manager.update(|cfg: &mut GcConfig| cfg.max_write_bytes_per_sec = ReadableSize(1024)); + let _ = config_manager.update(|cfg: &mut GcConfig| -> Result<(), ()> { + cfg.max_write_bytes_per_sec = ReadableSize(1024); + Ok(()) + }); validate(&scheduler, move |_, limiter: &Limiter| { assert_eq!(limiter.speed_limit(), 1024.0); }); // Change io iolimit - config_manager.update(|cfg: &mut GcConfig| cfg.max_write_bytes_per_sec = ReadableSize(2048)); + let _ = config_manager.update(|cfg: &mut GcConfig| -> Result<(), ()> { + cfg.max_write_bytes_per_sec = ReadableSize(2048); + Ok(()) + }); validate(&scheduler, move |_, limiter: &Limiter| { assert_eq!(limiter.speed_limit(), 2048.0); }); // Disable io iolimit - config_manager.update(|cfg: &mut GcConfig| cfg.max_write_bytes_per_sec = ReadableSize(0)); + let _ = config_manager.update(|cfg: &mut GcConfig| -> Result<(), ()> { + cfg.max_write_bytes_per_sec = ReadableSize(0); + Ok(()) + }); validate(&scheduler, move |_, limiter: &Limiter| { assert_eq!(limiter.speed_limit(), f64::INFINITY); }); diff --git a/tests/integrations/config/test_config_client.rs b/tests/integrations/config/test_config_client.rs index b911dcb7b99..52cdc9cb012 100644 --- a/tests/integrations/config/test_config_client.rs +++ b/tests/integrations/config/test_config_client.rs @@ -64,8 +64,7 @@ fn test_dispatch_change() { impl ConfigManager for CfgManager { fn dispatch(&mut self, c: ConfigChange) -> Result<(), Box> { - self.0.lock().unwrap().update(c); - Ok(()) + self.0.lock().unwrap().update(c) } } @@ -198,8 +197,7 @@ fn test_update_from_toml_file() { impl ConfigManager for CfgManager { fn dispatch(&mut self, c: ConfigChange) -> Result<(), Box> { - self.0.lock().unwrap().update(c); - Ok(()) + self.0.lock().unwrap().update(c) } } From 6bd1d4510652279e4d260317b628aafa6c5fbd27 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Tue, 28 Jun 2022 15:58:39 +0800 Subject: [PATCH 0050/1149] raftstore: separate raft and kv perf contexts (#12915) ref tikv/raft-engine#227, ref tikv/tikv#12362 We used to record perf contexts for Raft RocksDB and KV RocksDB with the same PerfContext. But we also have raft-engine now. So, we will miss perf contexts if we still use RocksDB perf contexts. This commit adds PerfContext support to RaftEngine and distinguish it from the perf context used for applying. Then, we'll record correct perf statistics for both raft engine and KV DB. Updated raft-engine to include tikv/raft-engine#227 Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- Cargo.lock | 5 ++-- .../engine_rocks/src/perf_context_impl.rs | 16 ++++++++++ components/engine_traits/src/raft_engine.rs | 2 +- components/raft_log_engine/Cargo.toml | 1 + components/raft_log_engine/src/engine.rs | 18 ++++++++++-- components/raft_log_engine/src/lib.rs | 2 ++ .../raft_log_engine/src/perf_context.rs | 29 +++++++++++++++++++ .../raftstore/src/store/async_io/write.rs | 5 ++-- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/fsm/store.rs | 9 ++++-- components/raftstore/src/store/peer.rs | 2 +- components/tracker/src/lib.rs | 9 ++++++ 12 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 components/raft_log_engine/src/perf_context.rs diff --git a/Cargo.lock b/Cargo.lock index 7c9902b7534..dbc37bf0407 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4024,7 +4024,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#4e89901a3eff850a47ea0e6b44bc74d9fed84769" +source = "git+https://github.com/tikv/raft-engine.git#07dcadbf51b43fed70346e33b5db07723e655828" dependencies = [ "byteorder", "crc32fast", @@ -4055,7 +4055,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#4e89901a3eff850a47ea0e6b44bc74d9fed84769" +source = "git+https://github.com/tikv/raft-engine.git#07dcadbf51b43fed70346e33b5db07723e655828" dependencies = [ "clap 3.1.6", "env_logger", @@ -4092,6 +4092,7 @@ dependencies = [ "slog-global", "tikv_util", "time", + "tracker", ] [[package]] diff --git a/components/engine_rocks/src/perf_context_impl.rs b/components/engine_rocks/src/perf_context_impl.rs index c1c299def66..c6eb187b392 100644 --- a/components/engine_rocks/src/perf_context_impl.rs +++ b/components/engine_rocks/src/perf_context_impl.rs @@ -216,9 +216,25 @@ impl PerfContextStatistics { match self.kind { PerfContextKind::RaftstoreApply => { report_write_perf_context!(self, APPLY_PERF_CONTEXT_TIME_HISTOGRAM_STATIC); + for token in trackers { + GLOBAL_TRACKERS.with_tracker(*token, |t| { + t.metrics.apply_mutex_lock_nanos = self.write.db_mutex_lock_nanos; + t.metrics.apply_thread_wait_nanos = self.write.write_thread_wait; + t.metrics.apply_write_wal_nanos = self.write.write_wal_time; + t.metrics.apply_write_memtable_nanos = self.write.write_memtable_time; + }); + } } PerfContextKind::RaftstoreStore => { report_write_perf_context!(self, STORE_PERF_CONTEXT_TIME_HISTOGRAM_STATIC); + for token in trackers { + GLOBAL_TRACKERS.with_tracker(*token, |t| { + t.metrics.store_mutex_lock_nanos = self.write.db_mutex_lock_nanos; + t.metrics.store_thread_wait_nanos = self.write.write_thread_wait; + t.metrics.store_write_wal_nanos = self.write.write_wal_time; + t.metrics.store_write_memtable_nanos = self.write.write_memtable_time; + }); + } } PerfContextKind::Storage(_) | PerfContextKind::Coprocessor(_) => { let perf_context = ReadPerfContext::capture(); diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 03cb2a41a41..e119184c556 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -65,7 +65,7 @@ pub struct RaftLogGCTask { pub to: u64, } -pub trait RaftEngine: RaftEngineReadOnly + Clone + Sync + Send + 'static { +pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send + 'static { type LogBatch: RaftLogBatch; fn log_batch(&self, capacity: usize) -> Self::LogBatch; diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 5df8d5f3852..d13e9ea4a0b 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -21,3 +21,4 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tikv_util = { path = "../tikv_util", default-features = false } time = "0.1" +tracker = { path = "../tracker" } diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index ae895f1ac36..8c9a7fd2b88 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -9,8 +9,8 @@ use std::{ use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ - CacheStats, EncryptionKeyManager, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, - RaftLogBatch as RaftLogBatchTrait, RaftLogGCTask, Result, + CacheStats, EncryptionKeyManager, PerfContextExt, PerfContextKind, PerfLevel, RaftEngine, + RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, RaftLogGCTask, Result, }; use file_system::{IOOp, IORateLimiter, IOType}; use kvproto::{ @@ -25,6 +25,8 @@ use raft_engine::{ pub use raft_engine::{Config as RaftEngineConfig, ReadableSize, RecoveryMode}; use tikv_util::Either; +use crate::perf_context::RaftEnginePerfContext; + // A special region ID representing global state. const STORE_REGION_ID: u64 = 0; @@ -222,6 +224,10 @@ impl FileSystem for ManagedFileSystem { }) } } + + fn delete>(&self, path: P) -> IoResult<()> { + self.base_level_file_system.delete(path) + } } #[derive(Clone)] @@ -261,6 +267,14 @@ impl RaftLogEngine { } } +impl PerfContextExt for RaftLogEngine { + type PerfContext = RaftEnginePerfContext; + + fn get_perf_context(&self, _level: PerfLevel, _kind: PerfContextKind) -> Self::PerfContext { + RaftEnginePerfContext + } +} + #[derive(Default)] pub struct RaftLogBatch(LogBatch); diff --git a/components/raft_log_engine/src/lib.rs b/components/raft_log_engine/src/lib.rs index 8b83acfe6be..7b8757d6531 100644 --- a/components/raft_log_engine/src/lib.rs +++ b/components/raft_log_engine/src/lib.rs @@ -21,4 +21,6 @@ extern crate tikv_util; mod engine; +mod perf_context; + pub use engine::{RaftEngineConfig, RaftLogBatch, RaftLogEngine, ReadableSize, RecoveryMode}; diff --git a/components/raft_log_engine/src/perf_context.rs b/components/raft_log_engine/src/perf_context.rs new file mode 100644 index 00000000000..87946e2f48e --- /dev/null +++ b/components/raft_log_engine/src/perf_context.rs @@ -0,0 +1,29 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use raft_engine::get_perf_context; +use tracker::{TrackerToken, GLOBAL_TRACKERS}; + +#[derive(Debug)] +pub struct RaftEnginePerfContext; + +impl engine_traits::PerfContext for RaftEnginePerfContext { + fn start_observe(&mut self) { + raft_engine::set_perf_context(Default::default()); + } + + fn report_metrics(&mut self, trackers: &[TrackerToken]) { + let perf_context = get_perf_context(); + for token in trackers { + GLOBAL_TRACKERS.with_tracker(*token, |t| { + t.metrics.store_thread_wait_nanos = + perf_context.write_wait_duration.as_nanos() as u64; + t.metrics.store_write_wal_nanos = (perf_context.log_write_duration + + perf_context.log_sync_duration + + perf_context.log_rotate_duration) + .as_nanos() as u64; + t.metrics.store_write_memtable_nanos = + perf_context.apply_duration.as_nanos() as u64; + }); + } + } +} diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 99c4f56b7e4..c788f7c2d1e 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -353,7 +353,7 @@ where raft_write_size_limit: usize, metrics: StoreWriteMetrics, message_metrics: RaftSendMessageMetrics, - perf_context: EK::PerfContext, + perf_context: ER::PerfContext, pending_latency_inspect: Vec<(Instant, Vec)>, } @@ -378,7 +378,7 @@ where engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE), ); let perf_context = engines - .kv + .raft .get_perf_context(cfg.value().perf_level, PerfContextKind::RaftstoreStore); let cfg_tracker = cfg.clone().tracker(tag.clone()); Self { @@ -541,7 +541,6 @@ where .iter() .flat_map(|task| task.trackers.iter().flat_map(|t| t.as_tracker_token())) .collect(); - // TODO: Add a different perf context for raft engine. self.perf_context.report_metrics(&trackers); write_raft_time = duration_to_sec(now.saturating_elapsed()); STORE_WRITE_RAFTDB_DURATION_HISTOGRAM.observe(write_raft_time); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index ba819bda155..bed2b02a78f 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3294,7 +3294,7 @@ where let is_initialized = self.fsm.peer.is_initialized(); if let Err(e) = self.fsm.peer.destroy( &self.ctx.engines, - &mut self.ctx.perf_context, + &mut self.ctx.raft_perf_context, merged_by_target, &self.ctx.pending_create_peers, ) { diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 54f4f45f9ab..63b0a583030 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -481,7 +481,8 @@ where pub ready_count: usize, pub has_ready: bool, pub current_time: Option, - pub perf_context: EK::PerfContext, + pub raft_perf_context: ER::PerfContext, + pub kv_perf_context: EK::PerfContext, pub tick_batch: Vec, pub node_start_time: Option, /// Disk usage for the store itself. @@ -1280,7 +1281,11 @@ where ready_count: 0, has_ready: false, current_time: None, - perf_context: self + raft_perf_context: self + .engines + .raft + .get_perf_context(self.cfg.value().perf_level, PerfContextKind::RaftstoreStore), + kv_perf_context: self .engines .kv .get_perf_context(self.cfg.value().perf_level, PerfContextKind::RaftstoreStore), diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 2853fcd4169..cf54d962075 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1221,7 +1221,7 @@ where pub fn destroy( &mut self, engines: &Engines, - perf_context: &mut EK::PerfContext, + perf_context: &mut ER::PerfContext, keep_data: bool, pending_create_peers: &Mutex>, ) -> Result<()> { diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 25a5610d034..3729fb1ec9d 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -106,6 +106,15 @@ pub struct RequestMetrics { pub wf_commit_log_nanos: u64, pub propose_send_wait_nanos: u64, pub commit_not_persisted: bool, + pub store_mutex_lock_nanos: u64, + pub store_thread_wait_nanos: u64, + pub store_write_wal_nanos: u64, + pub store_write_memtable_nanos: u64, pub store_time_nanos: u64, + pub apply_wait_nanos: u64, pub apply_time_nanos: u64, + pub apply_mutex_lock_nanos: u64, + pub apply_thread_wait_nanos: u64, + pub apply_write_wal_nanos: u64, + pub apply_write_memtable_nanos: u64, } From d356be1d051f38b9cd9c9239468073c05ccc6c03 Mon Sep 17 00:00:00 2001 From: zkkxu <76540804+zkkxu@users.noreply.github.com> Date: Wed, 29 Jun 2022 14:48:39 +0800 Subject: [PATCH 0051/1149] gRPC: use gzip level-2 compression by default (#12791) ref tikv/tikv#12929 add initial arguments for gzip compression: gzip_compression_level: represent gzip compression level, the origin gzip compression level is 6 and hardcoding; compression_lower_bound: this represent gzip will compress the data only larger than this Signed-off-by: zkkxu Signed-off-by: xufei --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- src/server/config.rs | 8 ++++++++ src/server/raft_client.rs | 2 ++ src/server/snap.rs | 4 +++- tests/integrations/config/mod.rs | 2 ++ 6 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbc37bf0407..893b5d909f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2186,9 +2186,9 @@ dependencies = [ [[package]] name = "grpcio" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ef249d9cb1b1843767501ae7463b500542e7f9e72d9c2d61ed320fbefa6c79" +checksum = "f9bcdd3694fa08158334501af37bdf5b4f00b1865b602d917e3cd74ecf80cd0a" dependencies = [ "futures-executor", "futures-util", @@ -2223,9 +2223,9 @@ dependencies = [ [[package]] name = "grpcio-sys" -version = "0.10.1+1.44.0" +version = "0.10.3+1.44.0-patched" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925586932dbbea927e913783da0be160ee74e0b0519d7b20cec35547a0a84631" +checksum = "f23adc509a3c4dea990e0ab8d2add4a65389ee69c288b7851d75dd1df7a6d6c6" dependencies = [ "bindgen 0.59.2", "cc", diff --git a/Cargo.toml b/Cargo.toml index da68c7aa75c..622547b2294 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,7 +95,7 @@ futures-executor = "0.3.1" futures-timer = "3.0" futures-util = { version = "0.3.1", default-features = false, features = ["io", "async-await"] } getset = "0.1" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { version = "0.10.3", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } hex = "0.4" http = "0" diff --git a/src/server/config.rs b/src/server/config.rs index e88ee55b8c9..8a581d5eeba 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -26,6 +26,8 @@ const DEFAULT_GRPC_CONCURRENT_STREAM: i32 = 1024; const DEFAULT_GRPC_RAFT_CONN_NUM: usize = 1; const DEFAULT_GRPC_MEMORY_POOL_QUOTA: u64 = isize::MAX as u64; const DEFAULT_GRPC_STREAM_INITIAL_WINDOW_SIZE: u64 = 2 * 1024 * 1024; +const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; +const DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS: usize = 4096; // Number of rows in each chunk. const DEFAULT_ENDPOINT_BATCH_ROW_LIMIT: usize = 64; @@ -98,6 +100,10 @@ pub struct Config { #[online_config(skip)] pub grpc_compression_type: GrpcCompressionType, #[online_config(skip)] + pub grpc_gzip_compression_level: usize, + #[online_config(skip)] + pub grpc_min_message_size_to_compress: usize, + #[online_config(skip)] pub grpc_concurrency: usize, #[online_config(skip)] pub grpc_concurrent_stream: i32, @@ -213,6 +219,8 @@ impl Default for Config { raft_client_queue_size: 8192, raft_msg_max_batch_size: 128, grpc_compression_type: GrpcCompressionType::None, + grpc_gzip_compression_level: DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL, + grpc_min_message_size_to_compress: DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS, grpc_concurrency: DEFAULT_GRPC_CONCURRENCY, grpc_concurrent_stream: DEFAULT_GRPC_CONCURRENT_STREAM, grpc_raft_conn_num: DEFAULT_GRPC_RAFT_CONN_NUM, diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 214c5cb6b66..4b2815f5d73 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -692,6 +692,8 @@ where .keepalive_time(cfg.grpc_keepalive_time.0) .keepalive_timeout(cfg.grpc_keepalive_timeout.0) .default_compression_algorithm(cfg.grpc_compression_algorithm()) + .default_gzip_compression_level(cfg.grpc_gzip_compression_level) + .default_grpc_min_message_size_to_compress(cfg.grpc_min_message_size_to_compress) // hack: so it's different args, grpc will always create a new connection. .raw_cfg_int( CString::new("random id").unwrap(), diff --git a/src/server/snap.rs b/src/server/snap.rs index 9b86b4778b4..15304c51cdd 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -166,7 +166,9 @@ pub fn send_snap( .stream_initial_window_size(cfg.grpc_stream_initial_window_size.0 as i32) .keepalive_time(cfg.grpc_keepalive_time.0) .keepalive_timeout(cfg.grpc_keepalive_timeout.0) - .default_compression_algorithm(cfg.grpc_compression_algorithm()); + .default_compression_algorithm(cfg.grpc_compression_algorithm()) + .default_gzip_compression_level(cfg.grpc_gzip_compression_level) + .default_grpc_min_message_size_to_compress(cfg.grpc_min_message_size_to_compress); let channel = security_mgr.connect(cb, addr); let client = TikvClient::new(channel); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 589b0ff7a56..2428d265391 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -80,6 +80,8 @@ fn test_serde_custom_tikv_config() { labels: HashMap::from_iter([("a".to_owned(), "b".to_owned())]), advertise_addr: "example.com:443".to_owned(), status_addr: "example.com:443".to_owned(), + grpc_gzip_compression_level: 2, + grpc_min_message_size_to_compress: 4096, advertise_status_addr: "example.com:443".to_owned(), status_thread_pool_size: 1, max_grpc_send_msg_len: 6 * (1 << 20), From ed1c6a0affacfbfac7124c54e3a7b6931566e0e4 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 30 Jun 2022 10:44:38 +0800 Subject: [PATCH 0052/1149] tests: natively support nextest (#12799) close tikv/tikv#12769 Signed-off-by: tabokie Signed-off-by: Xinye Tao --- .config/nextest.toml | 7 +++++++ CONTRIBUTING.md | 6 ++++++ Makefile | 8 ++++++++ scripts/test | 10 ++++++---- scripts/test-all | 12 ++++++------ tests/failpoints/cases/test_split_region.rs | 2 ++ tests/failpoints/cases/test_unsafe_recovery.rs | 3 ++- 7 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 .config/nextest.toml diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 00000000000..247389fcd17 --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,7 @@ +[profile.ci] +retries = 1 # Run at most 2 times +fail-fast = false +slow-timeout = { period = "60s", terminate-after = 2 } # Timeout=120s + +[profile.ci.junit] +path = "junit.xml" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index faccf2818c1..711b2bdb192 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -77,6 +77,12 @@ make test env EXTRA_CARGO_ARGS=$TESTNAME make test ``` +Alternatively, you can use [nextest](https://github.com/nextest-rs/nextest) to run tests: + +```bash +env EXTRA_CARGO_ARGS=$TESTNAME make test_with_nextest +``` + TiKV follows the Rust community coding style. We use Rustfmt and [Clippy](https://github.com/Manishearth/rust-clippy) to automatically format and lint our code. Using these tools is checked in our CI. These are as part of `make dev`, you can also run them alone: ```bash diff --git a/Makefile b/Makefile index a41055f7430..22c575abb8f 100644 --- a/Makefile +++ b/Makefile @@ -311,6 +311,14 @@ run: # Run tests under a variety of conditions. This should pass before # submitting pull requests. test: + ./scripts/test-all -- --nocapture + +# Run tests with nextest. +ifndef CUSTOM_TEST_COMMAND +test_with_nextest: export CUSTOM_TEST_COMMAND=nextest run +endif +test_with_nextest: export RUSTDOCFLAGS="-Z unstable-options --persist-doctests" +test_with_nextest: ./scripts/test-all ## Static analysis diff --git a/scripts/test b/scripts/test index 547cd20d25d..e4c46c6a620 100755 --- a/scripts/test +++ b/scripts/test @@ -8,16 +8,17 @@ set -euo pipefail # Run from the Makefile environment MAKEFILE_RUN=${MAKEFILE_RUN:-""} if [[ -z $MAKEFILE_RUN ]] ; then - COMMAND="$0 $*" exec make run + COMMAND="$0 $*" exec make run fi SHELL_DEBUG=${SHELL_DEBUG:-""} if [[ -n "$SHELL_DEBUG" ]] ; then - set -x + set -x fi DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH:-""} LOCAL_DIR=${LOCAL_DIR:-""} TIKV_ENABLE_FEATURES=${TIKV_ENABLE_FEATURES:-""} +CUSTOM_TEST_COMMAND=${CUSTOM_TEST_COMMAND:-"test"} # EXTRA_CARGO_ARGS is unecessary now: this can just be given as arguments to ./scripts/test-all or ./scripts/test EXTRA_CARGO_ARGS=${EXTRA_CARGO_ARGS:-""} @@ -27,6 +28,7 @@ export DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}:${LOCAL_DIR}/lib" export LOG_LEVEL=DEBUG export RUST_BACKTRACE=full -cargo test --workspace \ - --exclude fuzzer-honggfuzz --exclude fuzzer-afl --exclude fuzzer-libfuzzer \ +cargo $CUSTOM_TEST_COMMAND --workspace \ + --exclude fuzz --exclude fuzzer-afl --exclude fuzzer-honggfuzz \ + --exclude fuzzer-libfuzzer --exclude fuzz-targets \ --features "${TIKV_ENABLE_FEATURES}" ${EXTRA_CARGO_ARGS} "$@" diff --git a/scripts/test-all b/scripts/test-all index 246a8f22176..2d37ccde992 100755 --- a/scripts/test-all +++ b/scripts/test-all @@ -9,17 +9,17 @@ set -euo pipefail # Run from the Makefile environment MAKEFILE_RUN=${MAKEFILE_RUN:-""} if [[ -z $MAKEFILE_RUN ]] ; then - COMMAND="$0 $*" exec make run + COMMAND="$0 $*" exec make run fi -./scripts/test "$@" -- --nocapture +./scripts/test "$@" && echo # Re-run tests that requires specific environment variables. if [[ "$(uname)" == "Linux" ]]; then export MALLOC_CONF=prof:true - ./scripts/test ifdef_malloc_conf "$@" -- --nocapture + ./scripts/test ifdef_malloc_conf "$@" && echo fi if [[ "$(uname)" = "Linux" ]]; then - EXTRA_CARGO_ARGS="" ./scripts/test --message-format=json-render-diagnostics -q --no-run -- --nocapture | - python scripts/check-bins.py --features "${TIKV_ENABLE_FEATURES}" --check-tests -fi \ No newline at end of file + CUSTOM_TEST_COMMAND="" EXTRA_CARGO_ARGS="" ./scripts/test --message-format=json-render-diagnostics -q --no-run | + python scripts/check-bins.py --features "${TIKV_ENABLE_FEATURES}" --check-tests +fi diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 8b42959fc01..09eb603ff8e 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -983,6 +983,8 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { fail::remove("txn_before_process_write"); let resp = resp.join().unwrap(); assert!(resp.get_region_error().has_epoch_not_match(), "{:?}", resp); + + fail::remove("on_split_invalidate_locks"); } /// Logs are gced asynchronously. If an uninitialized peer is destroyed before being replaced by diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index f791b40c065..290a3561be9 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -268,9 +268,10 @@ fn test_unsafe_recovery_wait_for_snapshot_apply() { sleep_ms(100); } assert_ne!(store_report, None); + fail::remove("worker_gc_raft_log"); fail::remove("worker_gc_raft_log_finished"); - fail::remove("raft_before_apply_snap_callback"); + fail::remove("region_apply_snap"); } #[test] From 2e1513c83ffa62fc2edc3d6d28c14cb92e82ddbe Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 30 Jun 2022 14:48:39 +0800 Subject: [PATCH 0053/1149] encryption: fix issue with opening plaintext files (#12272) close tikv/tikv#12162 Signed-off-by: tabokie --- components/encryption/src/crypter.rs | 3 ++ components/encryption/src/manager/mod.rs | 53 ++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index c17560d4a38..9c148e62247 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -53,6 +53,7 @@ const CTR_IV_16: usize = 16; pub enum Iv { Gcm([u8; GCM_IV_12]), Ctr([u8; CTR_IV_16]), + Empty, } impl Iv { @@ -91,6 +92,7 @@ impl Iv { match self { Iv::Ctr(iv) => iv, Iv::Gcm(iv) => iv, + Iv::Empty => &[], } } @@ -102,6 +104,7 @@ impl Iv { Ok(()) } Iv::Gcm(_) => Err(box_err!("offset addition is not supported for GCM mode")), + Iv::Empty => Err(box_err!("empty Iv")), } } } diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index bc4b97de7a2..cd9be1b554d 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -12,7 +12,9 @@ use std::{ }; use crossbeam::channel::{self, select, tick}; -use engine_traits::{EncryptionKeyManager, FileEncryptionInfo}; +use engine_traits::{ + EncryptionKeyManager, EncryptionMethod as DBEncryptionMethod, FileEncryptionInfo, +}; use fail::fail_point; use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; @@ -597,7 +599,12 @@ impl DataKeyManager { writer, crypter::encryption_method_from_db_encryption_method(file.method), &file.key, - Iv::from_slice(&file.iv)?, + if file.method == DBEncryptionMethod::Plaintext { + debug_assert!(file.iv.is_empty()); + Iv::Empty + } else { + Iv::from_slice(&file.iv)? + }, ) } @@ -622,7 +629,12 @@ impl DataKeyManager { reader, crypter::encryption_method_from_db_encryption_method(file.method), &file.key, - Iv::from_slice(&file.iv)?, + if file.method == DBEncryptionMethod::Plaintext { + debug_assert!(file.iv.is_empty()); + Iv::Empty + } else { + Iv::from_slice(&file.iv)? + }, ) } @@ -1271,4 +1283,39 @@ mod tests { let result = new_key_manager(&tmp_dir, None, right_key, previous); assert!(result.is_ok()); } + + #[test] + fn test_plaintext_encrypter_writer() { + use std::io::{Read, Write}; + + let _guard = LOCK_FOR_GAUGE.lock().unwrap(); + let (key_path, _tmp_key_dir) = create_key_file("key"); + let master_key_backend = + Box::new(FileBackend::new(key_path.as_path()).unwrap()) as Box; + let tmp_dir = tempfile::TempDir::new().unwrap(); + let previous = new_mock_backend() as Box; + let manager = new_key_manager(&tmp_dir, None, master_key_backend, previous).unwrap(); + let path = tmp_dir.path().join("nonencyrpted"); + let content = "I'm exposed.".to_string(); + { + let raw = File::create(&path).unwrap(); + let mut f = manager + .open_file_with_writer(&path, raw, false /*create*/) + .unwrap(); + f.write_all(content.as_bytes()).unwrap(); + f.sync_all().unwrap(); + } + { + let mut buffer = String::new(); + let mut f = File::open(&path).unwrap(); + assert_eq!(f.read_to_string(&mut buffer).unwrap(), content.len()); + assert_eq!(buffer, content); + } + { + let mut buffer = String::new(); + let mut f = manager.open_file_for_read(&path).unwrap(); + assert_eq!(f.read_to_string(&mut buffer).unwrap(), content.len()); + assert_eq!(buffer, content); + } + } } From 50e0cf4ee720a36ce62a2d80c341cf48533e2977 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 1 Jul 2022 15:30:39 +0800 Subject: [PATCH 0054/1149] raftstore: support to split the region on half with the given key range (#12944) close tikv/tikv#12943 Support to split the region on half with the given key range. Signed-off-by: JmPotato --- components/keys/src/lib.rs | 6 +- .../src/coprocessor/split_check/half.rs | 73 ++++++++++++ components/raftstore/src/store/fsm/peer.rs | 36 +++++- components/raftstore/src/store/msg.rs | 6 +- components/raftstore/src/store/worker/pd.rs | 2 + .../raftstore/src/store/worker/split_check.rs | 104 ++++++++++++++++-- components/test_raftstore/src/cluster.rs | 2 + 7 files changed, 211 insertions(+), 18 deletions(-) diff --git a/components/keys/src/lib.rs b/components/keys/src/lib.rs index a403b939727..fa855bbe353 100644 --- a/components/keys/src/lib.rs +++ b/components/keys/src/lib.rs @@ -241,11 +241,11 @@ pub fn enc_end_key(region: &Region) -> Vec { } #[inline] -pub fn data_end_key(region_end_key: &[u8]) -> Vec { - if region_end_key.is_empty() { +pub fn data_end_key(key: &[u8]) -> Vec { + if key.is_empty() { DATA_MAX_KEY.to_vec() } else { - data_key(region_end_key) + data_key(key) } } diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index a52b7a59d60..f6d207df875 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -197,6 +197,79 @@ mod tests { must_split_at(&rx, ®ion, vec![split_key.into_encoded()]); } + #[test] + fn test_split_check_with_key_range() { + let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); + let path_str = path.path().to_str().unwrap(); + let db_opts = DBOptions::default(); + let cfs_opts = ALL_CFS + .iter() + .map(|cf| { + let cf_opts = ColumnFamilyOptions::new(); + CFOptions::new(cf, cf_opts) + }) + .collect(); + let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + + let mut region = Region::default(); + region.set_id(1); + region.mut_peers().push(Peer::default()); + region.mut_region_epoch().set_version(2); + region.mut_region_epoch().set_conf_ver(5); + + let (tx, rx) = mpsc::sync_channel(100); + let cfg = Config { + region_max_size: Some(ReadableSize(BUCKET_NUMBER_LIMIT as u64)), + ..Default::default() + }; + let mut runnable = + SplitCheckRunner::new(engine.clone(), tx.clone(), CoprocessorHost::new(tx, cfg)); + + for i in 0..11 { + let k = format!("{:04}", i).into_bytes(); + let k = keys::data_key(Key::from_raw(&k).as_encoded()); + engine.put_cf(CF_DEFAULT, &k, &k).unwrap(); + // Flush for every key so that we can know the exact middle key. + engine.flush_cf(CF_DEFAULT, true).unwrap(); + } + let start_key = Key::from_raw(b"0000").into_encoded(); + let end_key = Key::from_raw(b"0005").into_encoded(); + runnable.run(SplitCheckTask::split_check_key_range( + region.clone(), + Some(start_key), + Some(end_key), + false, + CheckPolicy::Scan, + None, + )); + let split_key = Key::from_raw(b"0003"); + must_split_at(&rx, ®ion, vec![split_key.into_encoded()]); + let start_key = Key::from_raw(b"0005").into_encoded(); + let end_key = Key::from_raw(b"0010").into_encoded(); + runnable.run(SplitCheckTask::split_check_key_range( + region.clone(), + Some(start_key), + Some(end_key), + false, + CheckPolicy::Scan, + None, + )); + let split_key = Key::from_raw(b"0008"); + must_split_at(&rx, ®ion, vec![split_key.into_encoded()]); + let start_key = Key::from_raw(b"0003").into_encoded(); + let end_key = Key::from_raw(b"0008").into_encoded(); + runnable.run(SplitCheckTask::split_check_key_range( + region.clone(), + Some(start_key), + Some(end_key), + false, + CheckPolicy::Scan, + None, + )); + let split_key = Key::from_raw(b"0006"); + must_split_at(&rx, ®ion, vec![split_key.into_encoded()]); + } + fn test_generate_region_bucket_impl(mvcc: bool) { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index bed2b02a78f..02c8d4fe650 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -960,11 +960,20 @@ where } CasualMessage::HalfSplitRegion { region_epoch, + start_key, + end_key, policy, source, cb, } => { - self.on_schedule_half_split_region(®ion_epoch, policy, source, cb); + self.on_schedule_half_split_region( + ®ion_epoch, + start_key, + end_key, + policy, + source, + cb, + ); } CasualMessage::GcSnap { snaps } => { self.on_gc_snap(snaps); @@ -5489,14 +5498,18 @@ where fn on_schedule_half_split_region( &mut self, region_epoch: &metapb::RegionEpoch, + start_key: Option>, + end_key: Option>, policy: CheckPolicy, source: &str, _cb: Callback, ) { + let is_key_range = start_key.is_some() && end_key.is_some(); info!( "on half split"; "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), + "is_key_range" => is_key_range, "policy" => ?policy, "source" => source, ); @@ -5506,6 +5519,7 @@ where "not leader, skip"; "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), + "is_key_range" => is_key_range, ); return; } @@ -5516,11 +5530,18 @@ where "receive a stale halfsplit message"; "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), + "is_key_range" => is_key_range, ); return; } - let split_check_bucket_ranges = self.gen_bucket_range_for_update(); + // Do not check the bucket ranges if we want to split the region with a given key range, + // this is to avoid compatibility issues. + let split_check_bucket_ranges = if !is_key_range { + self.gen_bucket_range_for_update() + } else { + None + }; #[cfg(any(test, feature = "testexport"))] { if let Callback::Test { cb } = _cb { @@ -5531,13 +5552,20 @@ where cb(peer_stat); } } - let task = - SplitCheckTask::split_check(region.clone(), false, policy, split_check_bucket_ranges); + let task = SplitCheckTask::split_check_key_range( + region.clone(), + start_key, + end_key, + false, + policy, + split_check_bucket_ranges, + ); if let Err(e) = self.ctx.split_check_scheduler.schedule(task) { error!( "failed to schedule split check"; "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), + "is_key_range" => is_key_range, "err" => %e, ); } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 46900878178..4f1ea017764 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -401,9 +401,13 @@ pub enum CasualMessage { CompactionDeclinedBytes { bytes: u64, }, - /// Half split the target region. + /// Half split the target region with the given key range. + /// If the key range is not provided, the region's start key + /// and end key will be used by default. HalfSplitRegion { region_epoch: RegionEpoch, + start_key: Option>, + end_key: Option>, policy: CheckPolicy, source: &'static str, cb: Callback, diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 648e8e9344e..a16ec50a7a0 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1449,6 +1449,8 @@ where } else { CasualMessage::HalfSplitRegion { region_epoch: epoch, + start_key: None, + end_key: None, policy: split_region.get_policy(), source: "pd", cb: Callback::None, diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 922f927ddb3..3822575fb8e 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -16,6 +16,7 @@ use kvproto::{ }; use online_config::{ConfigChange, OnlineConfig}; use tikv_util::{box_err, debug, error, info, keybuilder::KeyBuilder, warn, worker::Runnable}; +use txn_types::Key; use super::metrics::*; #[cfg(any(test, feature = "testexport"))] @@ -145,6 +146,8 @@ pub struct Bucket { pub enum Task { SplitCheckTask { region: Region, + start_key: Option>, + end_key: Option>, auto_split: bool, policy: CheckPolicy, bucket_ranges: Option>, @@ -164,6 +167,26 @@ impl Task { ) -> Task { Task::SplitCheckTask { region, + start_key: None, + end_key: None, + auto_split, + policy, + bucket_ranges, + } + } + + pub fn split_check_key_range( + region: Region, + start_key: Option>, + end_key: Option>, + auto_split: bool, + policy: CheckPolicy, + bucket_ranges: Option>, + ) -> Task { + Task::SplitCheckTask { + region, + start_key, + end_key, auto_split, policy, bucket_ranges, @@ -175,11 +198,17 @@ impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Task::SplitCheckTask { - region, auto_split, .. + region, + start_key, + end_key, + auto_split, + .. } => write!( f, - "[split check worker] Split Check Task for {}, auto_split: {:?}", + "[split check worker] Split Check Task for {}, start_key: {:?}, end_key: {:?}, auto_split: {:?}", region.get_id(), + start_key, + end_key, auto_split ), Task::ChangeConfig(_) => write!(f, "[split check worker] Change Config Task"), @@ -314,16 +343,29 @@ where fn check_split_and_bucket( &mut self, region: &Region, + start_key: Option>, + end_key: Option>, auto_split: bool, policy: CheckPolicy, bucket_ranges: Option>, ) { let region_id = region.get_id(); - let start_key = keys::enc_start_key(region); - let end_key = keys::enc_end_key(region); + let is_key_range = start_key.is_some() && end_key.is_some(); + let start_key = if is_key_range { + // This key is usually from a request, which should be encoded first. + keys::data_key(Key::from_raw(&start_key.unwrap()).as_encoded().as_slice()) + } else { + keys::enc_start_key(region) + }; + let end_key = if is_key_range { + keys::data_end_key(Key::from_raw(&end_key.unwrap()).as_encoded().as_slice()) + } else { + keys::enc_end_key(region) + }; debug!( "executing task"; "region_id" => region_id, + "is_key_range" => is_key_range, "start_key" => log_wrappers::Value::key(&start_key), "end_key" => log_wrappers::Value::key(&end_key), "policy" => ?policy, @@ -334,16 +376,33 @@ where .new_split_checker_host(region, &self.engine, auto_split, policy); if host.skip() { - debug!("skip split check"; "region_id" => region.get_id()); + debug!("skip split check"; + "region_id" => region.get_id(), + "is_key_range" => is_key_range, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + ); return; } let split_keys = match host.policy() { CheckPolicy::Scan => { - match self.scan_split_keys(&mut host, region, &start_key, &end_key, bucket_ranges) { + match self.scan_split_keys( + &mut host, + region, + is_key_range, + &start_key, + &end_key, + bucket_ranges, + ) { Ok(keys) => keys, Err(e) => { - error!(%e; "failed to scan split key"; "region_id" => region_id,); + error!(%e; "failed to scan split key"; + "region_id" => region_id, + "is_key_range" => is_key_range, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + ); return; } } @@ -357,6 +416,9 @@ where error!(%e; "approximate_check_bucket failed"; "region_id" => region_id, + "is_key_range" => is_key_range, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), ); } } @@ -368,17 +430,26 @@ where error!(%e; "failed to get approximate split key, try scan way"; "region_id" => region_id, + "is_key_range" => is_key_range, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), ); match self.scan_split_keys( &mut host, region, + is_key_range, &start_key, &end_key, bucket_ranges, ) { Ok(keys) => keys, Err(e) => { - error!(%e; "failed to scan split key"; "region_id" => region_id,); + error!(%e; "failed to scan split key"; + "region_id" => region_id, + "is_key_range" => is_key_range, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + ); return; } } @@ -408,12 +479,13 @@ where /// Gets the split keys by scanning the range. /// bucket_ranges: specify the ranges to generate buckets. - /// If none, gengerate buckets for the whole region. + /// If none, generate buckets for the whole region. /// If it's Some(vec![]), skip generating buckets. fn scan_split_keys( &self, host: &mut SplitCheckerHost<'_, E>, region: &Region, + is_key_range: bool, start_key: &[u8], end_key: &[u8], bucket_ranges: Option>, @@ -509,6 +581,9 @@ where } // if we scan the whole range, we can update approximate size and keys with accurate value. + if is_key_range { + return; + } info!( "update approximate size and keys with accurate value"; "region_id" => region.get_id(), @@ -565,10 +640,19 @@ where match task { Task::SplitCheckTask { region, + start_key, + end_key, auto_split, policy, bucket_ranges, - } => self.check_split_and_bucket(®ion, auto_split, policy, bucket_ranges), + } => self.check_split_and_bucket( + ®ion, + start_key, + end_key, + auto_split, + policy, + bucket_ranges, + ), Task::ChangeConfig(c) => self.change_cfg(c), Task::ApproximateBuckets(region) => { if self.coprocessor.cfg.enable_region_bucket { diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 63c7e3023c3..046d2396382 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1761,6 +1761,8 @@ impl Cluster { region.get_id(), CasualMessage::HalfSplitRegion { region_epoch: region.get_region_epoch().clone(), + start_key: None, + end_key: None, policy: CheckPolicy::Scan, source: "test", cb, From 05fd6298d1ee07e8dc66c0b76477da9d17cc22d3 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 4 Jul 2022 15:25:01 +0800 Subject: [PATCH 0055/1149] metrics, pd_worker: add time duration metrics for the Load Base Split (#12941) close tikv/tikv#12937 Add time duration metrics for the Load Base Split. Signed-off-by: JmPotato --- components/raftstore/src/store/metrics.rs | 5 + components/raftstore/src/store/worker/pd.rs | 2 + metrics/grafana/tikv_details.json | 126 ++++++++++++++++++++ 3 files changed, 133 insertions(+) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index e3d3a23e389..c60152784a5 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -662,6 +662,11 @@ lazy_static! { linear_buckets(0.0, 0.05, 20).unwrap() ).unwrap(); + pub static ref LOAD_BASE_SPLIT_DURATION_HISTOGRAM : Histogram = register_histogram!( + "tikv_load_base_split_duration_seconds", + "Histogram of the time load base split costs in seconds" + ).unwrap(); + pub static ref QUERY_REGION_VEC: HistogramVec = register_histogram_vec!( "tikv_query_region", "Histogram of query", diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index a16ec50a7a0..5e4cf6e8399 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -598,6 +598,7 @@ where receiver: &Receiver, scheduler: &Scheduler>, ) { + let start_time = TiInstant::now(); auto_split_controller.refresh_cfg(); let mut others = vec![]; while let Ok(other) = receiver.try_recv() { @@ -621,6 +622,7 @@ where READ_QPS_TOPN.with_label_values(&[&i.to_string()]).set(0.0); } } + LOAD_BASE_SPLIT_DURATION_HISTOGRAM.observe(start_time.saturating_elapsed_secs()); } pub fn report_min_resolved_ts( diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b8204654185..686c3a39a97 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -15476,6 +15476,132 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "tidb-cluster", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 118 + }, + "hiddenSeries": false, + "id": 23763572060, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.80, sum(rate(tikv_load_base_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "80%-{{instance}}", + "refId": "A", + "step": 4 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(tikv_load_base_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (le, instance))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%-{{instance}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_load_base_split_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (instance)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg-{{instance}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load base split duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:270", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:271", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, From 16589c02a8c603423f33be8178b983f7e9577c04 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 5 Jul 2022 12:33:02 +0800 Subject: [PATCH 0056/1149] pd_worker, split_controller: introduce the new config and CPU collector registration mechanism (#12942) ref tikv/tikv#12063, ref tikv/tikv#12593 Introduce the new split config and CPU collector registration mechanism. Signed-off-by: JmPotato --- components/raftstore/src/store/worker/mod.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 65 +++++++++++-- .../src/store/worker/split_config.rs | 32 ++++++ .../src/store/worker/split_controller.rs | 97 ++++++++++++++++++- 4 files changed, 182 insertions(+), 14 deletions(-) diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index a2ac27eed38..583e9341f0d 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -42,5 +42,5 @@ pub use self::{ Bucket, BucketRange, KeyEntry, Runner as SplitCheckRunner, Task as SplitCheckTask, }, split_config::{SplitConfig, SplitConfigManager}, - split_controller::{AutoSplitController, ReadStats, WriteStats}, + split_controller::{AutoSplitController, ReadStats, SplitConfigChange, WriteStats}, }; diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 5e4cf6e8399..280c15b083f 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -58,7 +58,7 @@ use crate::store::{ worker::{ query_stats::QueryStats, split_controller::{SplitInfo, TOP_N}, - AutoSplitController, ReadStats, WriteStats, + AutoSplitController, ReadStats, SplitConfigChange, WriteStats, }, Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, RegionReadProgressRegistry, SignificantMsg, SnapManager, StoreInfo, StoreMsg, TxnExt, @@ -183,6 +183,7 @@ where id: u64, duration: RaftstoreDuration, }, + UpdateRegionCPUCollector(bool), RegionCPURecords(Arc), ReportMinResolvedTS { store_id: u64, @@ -349,7 +350,7 @@ where log_wrappers::Value::key(split_key), ), Task::AutoSplit { ref split_infos } => { - write!(f, "auto split split regions, num is {}", split_infos.len(),) + write!(f, "auto split split regions, num is {}", split_infos.len()) } Task::AskBatchSplit { ref region, @@ -405,6 +406,12 @@ where Task::UpdateSlowScore { id, ref duration } => { write!(f, "compute slow score: id {}, duration {:?}", id, duration) } + Task::UpdateRegionCPUCollector(is_register) => { + if is_register { + return write!(f, "register region cpu collector"); + } + write!(f, "deregister region cpu collector") + } Task::RegionCPURecords(ref cpu_records) => { write!(f, "get region cpu records: {:?}", cpu_records) } @@ -599,7 +606,18 @@ where scheduler: &Scheduler>, ) { let start_time = TiInstant::now(); - auto_split_controller.refresh_cfg(); + match auto_split_controller.refresh_and_check_cfg() { + SplitConfigChange::UpdateRegionCPUCollector(is_register) => { + if let Err(e) = scheduler.schedule(Task::UpdateRegionCPUCollector(is_register)) { + error!( + "failed to register or deregister the region cpu collector"; + "is_register" => is_register, + "err" => ?e, + ); + } + } + SplitConfigChange::Noop => {} + } let mut others = vec![]; while let Ok(other) = receiver.try_recv() { others.push(other); @@ -842,7 +860,8 @@ where scheduler: Scheduler>, stats_monitor: StatsMonitor, - _region_cpu_records_collector: CollectorGuard, + collector_reg_handle: CollectorRegHandle, + region_cpu_records_collector: Option, // region_id -> total_cpu_time_ms (since last region heartbeat) region_cpu_records: HashMap, @@ -879,6 +898,18 @@ where region_read_progress: RegionReadProgressRegistry, health_service: Option, ) -> Runner { + // Register the region CPU records collector. + let mut region_cpu_records_collector = None; + if auto_split_controller + .cfg + .region_cpu_overload_threshold_ratio + > 0.0 + { + region_cpu_records_collector = Some(collector_reg_handle.register( + Box::new(RegionCPUMeteringCollector::new(scheduler.clone())), + false, + )); + } let interval = store_heartbeat_interval / Self::INTERVAL_DIVISOR; let mut stats_monitor = StatsMonitor::new( interval, @@ -889,11 +920,6 @@ where error!("failed to start stats collector, error = {:?}", e); } - let _region_cpu_records_collector = collector_reg_handle.register( - Box::new(RegionCPUMeteringCollector::new(scheduler.clone())), - true, - ); - Runner { store_id, pd_client, @@ -905,7 +931,8 @@ where start_ts: UnixSecs::now(), scheduler, stats_monitor, - _region_cpu_records_collector, + collector_reg_handle, + region_cpu_records_collector, region_cpu_records: HashMap::default(), concurrency_manager, snap_mgr, @@ -968,6 +995,21 @@ where self.remote.spawn(f); } + fn handle_update_region_cpu_collector(&mut self, is_register: bool) { + // If it's a deregister task, just take and drop the original collector. + if !is_register { + self.region_cpu_records_collector.take(); + return; + } + if self.region_cpu_records_collector.is_some() { + return; + } + self.region_cpu_records_collector = Some(self.collector_reg_handle.register( + Box::new(RegionCPUMeteringCollector::new(self.scheduler.clone())), + false, + )); + } + // Note: The parameter doesn't contain `self` because this function may // be called in an asynchronous context. fn handle_ask_batch_split( @@ -1928,6 +1970,9 @@ where } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => self.slow_score.record(id, duration.sum()), + Task::UpdateRegionCPUCollector(is_register) => { + self.handle_update_region_cpu_collector(is_register) + } Task::RegionCPURecords(records) => self.handle_region_cpu_records(records), Task::ReportMinResolvedTS { store_id, diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index 4d2634514be..58df082c3e6 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -19,6 +19,19 @@ const DEFAULT_SPLIT_BALANCE_SCORE: f64 = 0.25; // We get contained score by sample.contained/(sample.right+sample.left+sample.contained). It will be used to avoid to split regions requested by range. const DEFAULT_SPLIT_CONTAINED_SCORE: f64 = 0.5; +// If the `split_balance_score` and `split_contained_score` above could not be satisfied, we will try to split the region according to its CPU load, +// then these parameters below will start to work. +// When the gRPC poll thread CPU usage is higher than gRPC poll thread count * `DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, +// the CPU-based split won't be triggered no matter if the `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO` and `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` are exceeded +// to prevent from increasing the gRPC poll CPU usage. +const DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.5; +// When the Unified Read Poll thread CPU usage is higher than Unified Read Poll thread count * `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, +// the CPU-based split will try to check and record the top hot CPU region. +const DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.8; +// When the Unified Read Poll is hot and the region's CPU usage reaches `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` as a percentage of the Unified Read Poll, +// it will be added into the hot region list and may be split later as the top hot CPU region. +pub(crate) const REGION_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.25; + lazy_static! { static ref SPLIT_CONFIG: Mutex>>> = Mutex::new(None); } @@ -43,6 +56,11 @@ pub struct SplitConfig { pub sample_num: usize, pub sample_threshold: u64, pub byte_threshold: usize, + #[doc(hidden)] + pub grpc_thread_cpu_overload_threshold_ratio: f64, + #[doc(hidden)] + pub unified_read_pool_thread_cpu_overload_threshold_ratio: f64, + pub region_cpu_overload_threshold_ratio: f64, // deprecated. #[online_config(skip)] #[doc(hidden)] @@ -65,6 +83,11 @@ impl Default for SplitConfig { sample_num: DEFAULT_SAMPLE_NUM, sample_threshold: DEFAULT_SAMPLE_THRESHOLD, byte_threshold: DEFAULT_BYTE_THRESHOLD, + grpc_thread_cpu_overload_threshold_ratio: + DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO, + unified_read_pool_thread_cpu_overload_threshold_ratio: + DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO, + region_cpu_overload_threshold_ratio: REGION_CPU_OVERLOAD_THRESHOLD_RATIO, size_threshold: None, // deprecated. key_threshold: None, // deprecated. } @@ -87,6 +110,15 @@ impl SplitConfig { ("sample_num should be less than qps_threshold for load-base-split.").into(), ); } + if self.grpc_thread_cpu_overload_threshold_ratio > 1.0 + || self.grpc_thread_cpu_overload_threshold_ratio < 0.0 + || self.unified_read_pool_thread_cpu_overload_threshold_ratio > 1.0 + || self.unified_read_pool_thread_cpu_overload_threshold_ratio < 0.0 + || self.region_cpu_overload_threshold_ratio > 1.0 + || self.region_cpu_overload_threshold_ratio < 0.0 + { + return Err(("threshold ratio should be between 0 and 1.").into()); + } Ok(()) } } diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index d21c97285d0..b644ac88d85 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -519,10 +519,16 @@ pub struct SplitInfo { pub peer: Peer, } +#[derive(PartialEq, Debug)] +pub enum SplitConfigChange { + Noop, + UpdateRegionCPUCollector(bool), +} + pub struct AutoSplitController { // RegionID -> Recorder pub recorders: HashMap, - cfg: SplitConfig, + pub cfg: SplitConfig, cfg_tracker: Tracker, } @@ -645,19 +651,36 @@ impl AutoSplitController { }); } - pub fn refresh_cfg(&mut self) { + pub fn refresh_and_check_cfg(&mut self) -> SplitConfigChange { + let mut cfg_change = SplitConfigChange::Noop; if let Some(incoming) = self.cfg_tracker.any_new() { + if self.cfg.region_cpu_overload_threshold_ratio <= 0.0 + && incoming.region_cpu_overload_threshold_ratio > 0.0 + { + cfg_change = SplitConfigChange::UpdateRegionCPUCollector(true); + } + if self.cfg.region_cpu_overload_threshold_ratio > 0.0 + && incoming.region_cpu_overload_threshold_ratio <= 0.0 + { + cfg_change = SplitConfigChange::UpdateRegionCPUCollector(false); + } self.cfg = incoming.clone(); } + cfg_change } } #[cfg(test)] mod tests { + use online_config::{ConfigChange, ConfigManager, ConfigValue}; + use tikv_util::config::VersionTrack; use txn_types::Key; use super::*; - use crate::store::{util::build_key_range, worker::split_config::DEFAULT_SAMPLE_NUM}; + use crate::store::{ + util::build_key_range, + worker::split_config::{DEFAULT_SAMPLE_NUM, REGION_CPU_OVERLOAD_THRESHOLD_RATIO}, + }; enum Position { Left, @@ -1201,6 +1224,74 @@ mod tests { qps_stats } + #[test] + fn test_refresh_and_check_cfg() { + let split_config = SplitConfig::default(); + let mut split_cfg_manager = + SplitConfigManager::new(Arc::new(VersionTrack::new(split_config))); + let mut auto_split_controller = AutoSplitController::new(split_cfg_manager.clone()); + assert_eq!( + auto_split_controller.refresh_and_check_cfg(), + SplitConfigChange::Noop, + ); + assert_eq!( + auto_split_controller + .cfg + .region_cpu_overload_threshold_ratio, + REGION_CPU_OVERLOAD_THRESHOLD_RATIO + ); + // Set to zero. + dispatch_split_cfg_change( + &mut split_cfg_manager, + "region_cpu_overload_threshold_ratio", + ConfigValue::F64(0.0), + ); + assert_eq!( + auto_split_controller.refresh_and_check_cfg(), + SplitConfigChange::UpdateRegionCPUCollector(false), + ); + assert_eq!( + auto_split_controller + .cfg + .region_cpu_overload_threshold_ratio, + 0.0 + ); + assert_eq!( + auto_split_controller.refresh_and_check_cfg(), + SplitConfigChange::Noop, + ); + // Set to non-zero. + dispatch_split_cfg_change( + &mut split_cfg_manager, + "region_cpu_overload_threshold_ratio", + ConfigValue::F64(REGION_CPU_OVERLOAD_THRESHOLD_RATIO), + ); + assert_eq!( + auto_split_controller.refresh_and_check_cfg(), + SplitConfigChange::UpdateRegionCPUCollector(true), + ); + assert_eq!( + auto_split_controller + .cfg + .region_cpu_overload_threshold_ratio, + REGION_CPU_OVERLOAD_THRESHOLD_RATIO + ); + assert_eq!( + auto_split_controller.refresh_and_check_cfg(), + SplitConfigChange::Noop, + ); + } + + fn dispatch_split_cfg_change( + split_cfg_manager: &mut SplitConfigManager, + cfg_name: &str, + cfg_value: ConfigValue, + ) { + let mut config_change = ConfigChange::new(); + config_change.insert(String::from(cfg_name), cfg_value); + split_cfg_manager.dispatch(config_change).unwrap(); + } + #[bench] fn samples_evaluate(b: &mut test::Bencher) { let mut samples = Samples(vec![Sample::new(b"c")]); From ed8257cabceb6eb5eddd4753dccedb076fb6dcb9 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Tue, 5 Jul 2022 15:15:01 +0800 Subject: [PATCH 0057/1149] server: collect count and duration by request source (#12954) ref tikv/tikv#12362 TiKV client can pass request_source through Context. It is useful for us to know how many requests there are from each source. So, this commit collects the count and the total duration by request source. The source label is not added to the command type in order to avoid creating too many label combinations. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- metrics/grafana/tikv_details.json | 216 ++++++++++++++++++++++++++++++ src/server/metrics.rs | 69 +++++++++- src/server/service/batch.rs | 52 +++++-- src/server/service/kv.rs | 92 ++++++++++--- src/storage/mod.rs | 39 +++++- 6 files changed, 427 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 893b5d909f5..1dfb74e3b13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2623,7 +2623,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#0e2f26c0a46ae7d666d6ca4410046a39e0c96f36" +source = "git+https://github.com/pingcap/kvproto.git#acfe326c7cb2bdcdbfc991cada1973a68f34836f" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 686c3a39a97..6ef292f95e5 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -5567,6 +5567,222 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The QPS of different sources of gRPC request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 23763572858, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_grpc_request_source_counter_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (source)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "gRPC request sources QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:69", + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:70", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The duration of different sources of gRPC request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 23763572859, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_grpc_request_source_duration_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (source)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "gRPC request sources duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:69", + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:70", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 8eda17034e1..caf6e1e86c4 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -1,7 +1,14 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use prometheus::{exponential_buckets, *}; +use std::{ + cell::{Cell, RefCell}, + time::Duration, +}; + +use collections::HashMap; +use prometheus::{exponential_buckets, local::LocalIntCounter, *}; use prometheus_static_metric::*; +use tikv_util::time::Instant; pub use crate::storage::kv::metrics::{ GcKeysCF, GcKeysCounterVec, GcKeysCounterVecInner, GcKeysDetail, @@ -240,6 +247,18 @@ lazy_static! { exponential_buckets(0.0001, 2.0, 20).unwrap() ) .unwrap(); + pub static ref GRPC_REQUEST_SOURCE_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( + "tikv_grpc_request_source_counter_vec", + "Counter of different sources of RPC requests", + &["source"] + ) + .unwrap(); + pub static ref GRPC_REQUEST_SOURCE_DURATION_VEC: IntCounterVec = register_int_counter_vec!( + "tikv_grpc_request_source_duration_vec", + "Total duration of different sources of RPC requests (in microseconds)", + &["source"] + ) + .unwrap(); } lazy_static! { @@ -484,3 +503,51 @@ lazy_static! { pub static ref ASYNC_REQUESTS_DURATIONS_VEC: AsyncRequestsDurationVec = auto_flush_from!(ASYNC_REQUESTS_DURATIONS, AsyncRequestsDurationVec); } + +struct LocalRequestSourceMetrics { + pub count: LocalIntCounter, + pub duration_us: LocalIntCounter, +} + +impl LocalRequestSourceMetrics { + fn new(source: &str) -> Self { + LocalRequestSourceMetrics { + count: GRPC_REQUEST_SOURCE_COUNTER_VEC + .with_label_values(&[source]) + .local(), + duration_us: GRPC_REQUEST_SOURCE_DURATION_VEC + .with_label_values(&[source]) + .local(), + } + } +} + +thread_local! { + static REQUEST_SOURCE_METRICS_MAP: RefCell> = RefCell::new(HashMap::default()); + + static LAST_LOCAL_FLUSH_TIME: Cell = Cell::new(Instant::now_coarse()); +} + +pub fn record_request_source_metrics(source: String, duration: Duration) { + let need_flush = LAST_LOCAL_FLUSH_TIME.with(|last_local_flush_time| { + let now = Instant::now_coarse(); + if now - last_local_flush_time.get() > Duration::from_secs(1) { + last_local_flush_time.set(now); + true + } else { + false + } + }); + REQUEST_SOURCE_METRICS_MAP.with(|map| { + let mut map = map.borrow_mut(); + let metrics = map + .entry(source) + .or_insert_with_key(|k| LocalRequestSourceMetrics::new(k)); + metrics.count.inc(); + metrics.duration_us.inc_by(duration.as_micros() as u64); + if need_flush { + metrics.count.flush(); + metrics.duration_us.flush(); + } + }); +} diff --git a/src/server/service/batch.rs b/src/server/service/batch.rs index 1a7fcb59c3a..931017549c1 100644 --- a/src/server/service/batch.rs +++ b/src/server/service/batch.rs @@ -152,7 +152,13 @@ pub struct GetCommandResponseConsumer { } impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponseConsumer { - fn consume(&self, id: u64, res: Result<(Option>, Statistics)>, begin: Instant) { + fn consume( + &self, + id: u64, + res: Result<(Option>, Statistics)>, + begin: Instant, + request_source: String, + ) { let mut resp = GetResponse::default(); if let Some(err) = extract_region_error(&res) { resp.set_region_error(err); @@ -175,7 +181,8 @@ impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponse cmd: Some(batch_commands_response::response::Cmd::Get(resp)), ..Default::default() }; - let mesure = GrpcRequestDuration::new(begin, GrpcTypeKind::kv_batch_get_command); + let mesure = + GrpcRequestDuration::new(begin, GrpcTypeKind::kv_batch_get_command, request_source); let task = MeasuredSingleResponse::new(id, res, mesure); if self.tx.send_and_notify(task).is_err() { error!("KvService response batch commands fail"); @@ -184,7 +191,13 @@ impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponse } impl ResponseBatchConsumer>> for GetCommandResponseConsumer { - fn consume(&self, id: u64, res: Result>>, begin: Instant) { + fn consume( + &self, + id: u64, + res: Result>>, + begin: Instant, + request_source: String, + ) { let mut resp = RawGetResponse::default(); if let Some(err) = extract_region_error(&res) { resp.set_region_error(err); @@ -199,7 +212,8 @@ impl ResponseBatchConsumer>> for GetCommandResponseConsumer { cmd: Some(batch_commands_response::response::Cmd::RawGet(resp)), ..Default::default() }; - let mesure = GrpcRequestDuration::new(begin, GrpcTypeKind::raw_batch_get_command); + let mesure = + GrpcRequestDuration::new(begin, GrpcTypeKind::raw_batch_get_command, request_source); let task = MeasuredSingleResponse::new(id, res, mesure); if self.tx.send_and_notify(task).is_err() { error!("KvService response batch commands fail"); @@ -218,7 +232,11 @@ fn future_batch_get_command( REQUEST_BATCH_SIZE_HISTOGRAM_VEC .kv_get .observe(gets.len() as f64); - let ids = requests.clone(); + let id_sources: Vec<_> = requests + .iter() + .zip(gets.iter()) + .map(|(id, req)| (*id, req.get_context().get_request_source().to_string())) + .collect(); let res = storage.batch_get_command( gets, requests, @@ -235,13 +253,16 @@ fn future_batch_get_command( if let Some(e) = extract_region_error(&res) { let mut resp = GetResponse::default(); resp.set_region_error(e); - for id in ids { + for (id, source) in id_sources { let res = batch_commands_response::Response { cmd: Some(batch_commands_response::response::Cmd::Get(resp.clone())), ..Default::default() }; - let measure = - GrpcRequestDuration::new(begin_instant, GrpcTypeKind::kv_batch_get_command); + let measure = GrpcRequestDuration::new( + begin_instant, + GrpcTypeKind::kv_batch_get_command, + source, + ); let task = MeasuredSingleResponse::new(id, res, measure); if tx.send_and_notify(task).is_err() { error!("KvService response batch commands fail"); @@ -262,7 +283,11 @@ fn future_batch_raw_get_command( REQUEST_BATCH_SIZE_HISTOGRAM_VEC .raw_get .observe(gets.len() as f64); - let ids = requests.clone(); + let id_sources: Vec<_> = requests + .iter() + .zip(gets.iter()) + .map(|(id, req)| (*id, req.get_context().get_request_source().to_string())) + .collect(); let res = storage.raw_batch_get_command( gets, requests, @@ -274,13 +299,16 @@ fn future_batch_raw_get_command( if let Some(e) = extract_region_error(&res) { let mut resp = RawGetResponse::default(); resp.set_region_error(e); - for id in ids { + for (id, source) in id_sources { let res = batch_commands_response::Response { cmd: Some(batch_commands_response::response::Cmd::RawGet(resp.clone())), ..Default::default() }; - let measure = - GrpcRequestDuration::new(begin_instant, GrpcTypeKind::raw_batch_get_command); + let measure = GrpcRequestDuration::new( + begin_instant, + GrpcTypeKind::raw_batch_get_command, + source, + ); let task = MeasuredSingleResponse::new(id, res, measure); if tx.send_and_notify(task).is_err() { error!("KvService response batch commands fail"); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 64ce2abb0e6..5b084826861 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -184,17 +184,20 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor macro_rules! handle_request { ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident) => { - fn $fn_name(&mut self, ctx: RpcContext<'_>, req: $req_ty, sink: UnarySink<$resp_ty>) { + fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); let begin_instant = Instant::now_coarse(); + let source = req.mut_context().take_request_source(); let resp = $future_name(&self.storage, req); let task = async move { let resp = resp.await?; sink.success(resp).await?; + let elapsed = begin_instant.saturating_elapsed(); GRPC_MSG_HISTOGRAM_STATIC .$fn_name - .observe(duration_to_sec(begin_instant.saturating_elapsed())); + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); ServerResult::Ok(()) } .map_err(|e| { @@ -367,16 +370,19 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ); } - fn coprocessor(&mut self, ctx: RpcContext<'_>, req: Request, sink: UnarySink) { + fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let begin_instant = Instant::now_coarse(); + let source = req.mut_context().take_request_source(); let future = future_copr(&self.copr, Some(ctx.peer()), req); let task = async move { let resp = future.await?.consume(); sink.success(resp).await?; + let elapsed = begin_instant.saturating_elapsed(); GRPC_MSG_HISTOGRAM_STATIC .coprocessor - .observe(duration_to_sec(begin_instant.saturating_elapsed())); + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); ServerResult::Ok(()) } .map_err(|e| { @@ -393,17 +399,20 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor fn raw_coprocessor( &mut self, ctx: RpcContext<'_>, - req: RawCoprocessorRequest, + mut req: RawCoprocessorRequest, sink: UnarySink, ) { let begin_instant = Instant::now_coarse(); + let source = req.mut_context().take_request_source(); let future = future_raw_coprocessor(&self.copr_v2, &self.storage, req); let task = async move { let resp = future.await?; sink.success(resp).await?; + let elapsed = begin_instant.saturating_elapsed(); GRPC_MSG_HISTOGRAM_STATIC .raw_coprocessor - .observe(duration_to_sec(begin_instant.saturating_elapsed())); + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); ServerResult::Ok(()) } .map_err(|e| { @@ -593,6 +602,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor assert!(!req.get_start_key().is_empty()); assert!(!req.get_end_key().is_empty()); + let source = req.mut_context().take_request_source(); let (cb, f) = paired_future_callback(); let res = self.gc_worker.unsafe_destroy_range( req.take_context(), @@ -612,9 +622,11 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor resp.set_error(format!("{}", e)); } sink.success(resp).await?; + let elapsed = begin_instant.saturating_elapsed(); GRPC_MSG_HISTOGRAM_STATIC .unsafe_destroy_range - .observe(duration_to_sec(begin_instant.saturating_elapsed())); + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); ServerResult::Ok(()) } .map_err(|e| { @@ -1022,10 +1034,16 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let mut response_retriever = response_retriever.map(move |item| { for measure in item.measures { - let GrpcRequestDuration { label, begin } = measure; + let GrpcRequestDuration { + label, + begin, + source, + } = measure; + let elapsed = begin.saturating_elapsed(); GRPC_MSG_HISTOGRAM_STATIC .get(label) - .observe(begin.saturating_elapsed_secs()); + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); } let mut r = item.batch_resp; @@ -1185,13 +1203,18 @@ fn response_batch_commands_request( tx: Sender, begin: Instant, label: GrpcTypeKind, + source: String, ) where MemoryTraceGuard: From, F: Future> + Send + 'static, { let task = async move { if let Ok(resp) = resp.await { - let measure = GrpcRequestDuration { begin, label }; + let measure = GrpcRequestDuration { + begin, + label, + source, + }; let task = MeasuredSingleResponse::new(id, resp, measure); if let Err(e) = tx.send_and_notify(task) { error!("KvService response batch commands fail"; "err" => ?e); @@ -1228,49 +1251,70 @@ fn handle_batch_commands_request( // For some invalid requests. let begin_instant = Instant::now(); let resp = future::ok(batch_commands_response::Response::default()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, - Some(batch_commands_request::request::Cmd::Get(req)) => { + Some(batch_commands_request::request::Cmd::Get(mut req)) => { if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) }) { batcher.as_mut().unwrap().add_get_request(req, id); } else { let begin_instant = Instant::now(); + let source = req.mut_context().take_request_source(); let resp = future_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::Get)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.kv_get.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); } }, - Some(batch_commands_request::request::Cmd::RawGet(req)) => { + Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_raw_get(&req) }) { batcher.as_mut().unwrap().add_raw_get_request(req, id); } else { let begin_instant = Instant::now(); + let source = req.mut_context().take_request_source(); let resp = future_raw_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::RawGet)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.raw_get.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { + Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { let begin_instant = Instant::now(); + let source = req.mut_context().take_request_source(); let resp = future_copr(copr, Some(peer.to_string()), req) .map_ok(|resp| { resp.map(oneof!(batch_commands_response::response::Cmd::Coprocessor)) }) .map_err(|_| GRPC_MSG_FAIL_COUNTER.coprocessor.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::coprocessor); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::coprocessor, source); }, - $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { + Some(batch_commands_request::request::Cmd::Empty(req)) => { let begin_instant = Instant::now(); + let resp = future_handle_empty(req) + .map_ok(|resp| batch_commands_response::Response { + cmd: Some(batch_commands_response::response::Cmd::Empty(resp)), + ..Default::default() + }) + .map_err(|_| GRPC_MSG_FAIL_COUNTER.invalid.inc()); + response_batch_commands_request( + id, + resp, + tx.clone(), + begin_instant, + GrpcTypeKind::invalid, + String::default(), + ); + } + $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { + let begin_instant = Instant::now(); + let source = req.mut_context().take_request_source(); let resp = $future_fn($($arg,)* req) .map_ok(oneof!(batch_commands_response::response::Cmd::$cmd)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.$metric_name.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source); })* Some(batch_commands_request::request::Cmd::Import(_)) => unimplemented!(), } @@ -1302,7 +1346,6 @@ fn handle_batch_commands_request( RawCoprocessor, future_raw_coprocessor(copr_v2, storage), coprocessor; PessimisticLock, future_acquire_pessimistic_lock(storage), kv_pessimistic_lock; PessimisticRollback, future_pessimistic_rollback(storage), kv_pessimistic_rollback; - Empty, future_handle_empty(), invalid; } } @@ -2100,10 +2143,15 @@ pub mod batch_commands_request { pub struct GrpcRequestDuration { pub begin: Instant, pub label: GrpcTypeKind, + pub source: String, } impl GrpcRequestDuration { - pub fn new(begin: Instant, label: GrpcTypeKind) -> Self { - GrpcRequestDuration { begin, label } + pub fn new(begin: Instant, label: GrpcTypeKind, source: String) -> Self { + GrpcRequestDuration { + begin, + label, + source, + } } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 768579f0b15..4e44bc0b37a 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -737,8 +737,10 @@ impl Storage { for ((mut req, id), tracker) in requests.into_iter().zip(ids).zip(trackers) { set_tls_tracker_token(tracker); let mut ctx = req.take_context(); + let source = ctx.take_request_source(); let region_id = ctx.get_region_id(); let peer = ctx.get_peer(); + let key = Key::from_raw(req.get_key()); tls_collect_query( region_id, @@ -775,7 +777,7 @@ impl Storage { snap_ctx } Err(e) => { - consumer.consume(id, Err(e), begin_instant); + consumer.consume(id, Err(e), begin_instant, source); continue; } }; @@ -791,6 +793,7 @@ impl Storage { access_locks, region_id, id, + source, tracker, )); } @@ -806,6 +809,7 @@ impl Storage { access_locks, region_id, id, + source, tracker, ) = req_snap; let snap_res = snap.await; @@ -836,6 +840,7 @@ impl Storage { v.map_err(|e| Error::from(txn::Error::from(e))) .map(|v| (v, stat)), begin_instant, + source, ); } Err(e) => { @@ -843,12 +848,13 @@ impl Storage { id, Err(Error::from(txn::Error::from(e))), begin_instant, + source, ); } } }), Err(e) => { - consumer.consume(id, Err(e), begin_instant); + consumer.consume(id, Err(e), begin_instant, source); } } } @@ -1606,7 +1612,7 @@ impl Storage { } Self::with_tls_engine(|engine| engine.release_snapshot()); let begin_instant = Instant::now(); - for (id, key, ctx, mut req, snap) in snaps { + for (id, key, mut ctx, mut req, snap) in snaps { let cf = req.take_cf(); match snap.await { Ok(snapshot) => { @@ -1621,6 +1627,7 @@ impl Storage { .raw_get_key_value(cf, &key, &mut stats) .map_err(Error::from), begin_instant, + ctx.take_request_source(), ); tls_collect_read_flow( ctx.get_region_id(), @@ -1631,12 +1638,17 @@ impl Storage { ); } Err(e) => { - consumer.consume(id, Err(e), begin_instant); + consumer.consume( + id, + Err(e), + begin_instant, + ctx.take_request_source(), + ); } } } Err(e) => { - consumer.consume(id, Err(e), begin_instant); + consumer.consume(id, Err(e), begin_instant, ctx.take_request_source()); } } } @@ -2849,7 +2861,13 @@ impl TestStorageBuilder { } pub trait ResponseBatchConsumer: Send { - fn consume(&self, id: u64, res: Result, begin: Instant); + fn consume( + &self, + id: u64, + res: Result, + begin: Instant, + request_source: String, + ); } pub mod test_util { @@ -3033,6 +3051,7 @@ pub mod test_util { id: u64, res: Result<(Option>, Statistics)>, _: tikv_util::time::Instant, + _source: String, ) { self.data.lock().unwrap().push(GetResult { id, @@ -3042,7 +3061,13 @@ pub mod test_util { } impl ResponseBatchConsumer>> for GetConsumer { - fn consume(&self, id: u64, res: Result>>, _: tikv_util::time::Instant) { + fn consume( + &self, + id: u64, + res: Result>>, + _: tikv_util::time::Instant, + _source: String, + ) { self.data.lock().unwrap().push(GetResult { id, res }); } } From f5993c19abcecdec92bf58868d0f757061196791 Mon Sep 17 00:00:00 2001 From: haojinming Date: Tue, 5 Jul 2022 18:07:02 +0800 Subject: [PATCH 0058/1149] [apiv2] encode key range in raw_checksum interface (#12951) close tikv/tikv#12950 Signed-off-by: haojinming --- src/storage/mod.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 4e44bc0b37a..cb792d7aec2 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2440,9 +2440,8 @@ impl Storage { &self, ctx: Context, algorithm: ChecksumAlgorithm, - ranges: Vec, + mut ranges: Vec, ) -> impl Future> { - // TODO: Modify this method in another PR for backup & restore feature of Api V2. const CMD: CommandKind = CommandKind::raw_checksum; let priority = ctx.get_priority(); let priority_tag = get_priority_tag(priority); @@ -2474,6 +2473,12 @@ impl Storage { .iter() .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; + for range in ranges.iter_mut() { + let start_key = F::encode_raw_key_owned(range.take_start_key(), None); + let end_key = F::encode_raw_key_owned(range.take_end_key(), None); + range.set_start_key(start_key.into_encoded()); + range.set_end_key(end_key.into_encoded()); + } let command_duration = tikv_util::time::Instant::now(); let snap_ctx = SnapContext { @@ -4564,6 +4569,7 @@ mod tests { let mut checksum: u64 = 0; let mut total_kvs: u64 = 0; let mut total_bytes: u64 = 0; + let mut is_first = true; // Write key-value pairs one by one for &(ref key, ref value) in &test_data { storage @@ -4576,13 +4582,18 @@ mod tests { expect_ok_callback(tx.clone(), 0), ) .unwrap(); - total_kvs += 1; - total_bytes += (key.len() + value.len()) as u64; - checksum = checksum_crc64_xor(checksum, digest.clone(), key, value); + // start key is set to b"r\0a\0", if raw_checksum does not encode the key, + // first key will be included in checksum. This is for testing issue #12950. + if !is_first { + total_kvs += 1; + total_bytes += (key.len() + value.len()) as u64; + checksum = checksum_crc64_xor(checksum, digest.clone(), key, value); + } + is_first = false; rx.recv().unwrap(); } let mut range = KeyRange::default(); - range.set_start_key(b"r\0a".to_vec()); + range.set_start_key(b"r\0a\0".to_vec()); range.set_end_key(b"r\0z".to_vec()); assert_eq!( (checksum, total_kvs, total_bytes), From c762224a57fddc5f3ec9c773416d6615e505f415 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 6 Jul 2022 15:57:02 +0800 Subject: [PATCH 0059/1149] cdc: add min_resolved_ts_lag metrics (#12968) close tikv/tikv#12967 cdc: add min_resolved_ts_lag metrics Signed-off-by: Neil Shen --- components/backup-stream/src/endpoint.rs | 2 +- components/cdc/src/endpoint.rs | 43 ++++++++++++++++++++---- components/cdc/src/metrics.rs | 4 +++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 1c1efdcb546..a89d5a66da4 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -454,7 +454,7 @@ where let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); match range_init_result { Ok(()) => { - info!("backup stream success to initialize"; + info!("backup stream success to initialize"; "start_key" => utils::redact(&start_key), "end_key" => utils::redact(&end_key), "take" => ?start.saturating_elapsed(),) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index c78636b8e11..54686424461 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -147,6 +147,7 @@ pub enum Task { MinTS { regions: Vec, min_ts: TimeStamp, + current_ts: TimeStamp, }, ResolverReady { observe_id: ObserveID, @@ -203,9 +204,15 @@ impl fmt::Debug for Task { .field("type", &"multi_batch") .field("multi_batch", &multi.len()) .finish(), - Task::MinTS { ref min_ts, .. } => { - de.field("type", &"mit_ts").field("min_ts", min_ts).finish() - } + Task::MinTS { + ref min_ts, + ref current_ts, + .. + } => de + .field("type", &"mit_ts") + .field("current_ts", current_ts) + .field("min_ts", min_ts) + .finish(), Task::ResolverReady { ref observe_id, ref region, @@ -348,6 +355,7 @@ pub struct Endpoint { region_read_progress: RegionReadProgressRegistry, // Metrics and logging. + current_ts: TimeStamp, min_resolved_ts: TimeStamp, min_ts_region_id: u64, resolved_region_count: usize, @@ -439,6 +447,7 @@ impl, E: KvEngine> Endpoint { region_read_progress, // Log the first resolved ts warning. warn_resolved_ts_repeat_count: WARN_RESOLVED_TS_COUNT_THRESHOLD, + current_ts: TimeStamp::zero(), }; ep.register_min_ts_event(); ep @@ -819,7 +828,7 @@ impl, E: KvEngine> Endpoint { } } - fn on_min_ts(&mut self, regions: Vec, min_ts: TimeStamp) { + fn on_min_ts(&mut self, regions: Vec, min_ts: TimeStamp, current_ts: TimeStamp) { // Reset resolved_regions to empty. let resolved_regions = &mut self.resolved_region_heap; resolved_regions.clear(); @@ -856,6 +865,7 @@ impl, E: KvEngine> Endpoint { } } } + self.current_ts = current_ts; let lag_millis = min_ts .physical() .saturating_sub(self.min_resolved_ts.physical()); @@ -1056,7 +1066,11 @@ impl, E: KvEngine> Endpoint { }; if !regions.is_empty() { - match scheduler.schedule(Task::MinTS { regions, min_ts }) { + match scheduler.schedule(Task::MinTS { + regions, + min_ts, + current_ts: min_ts_pd, + }) { Ok(_) | Err(ScheduleError::Stopped(_)) => (), // Must schedule `RegisterMinTsEvent` event otherwise resolved ts can not // advance normally. @@ -1134,7 +1148,11 @@ impl, E: KvEngine> Runnable for Endpoint { debug!("cdc run task"; "task" => %task); match task { - Task::MinTS { regions, min_ts } => self.on_min_ts(regions, min_ts), + Task::MinTS { + regions, + min_ts, + current_ts, + } => self.on_min_ts(regions, min_ts, current_ts), Task::Register { request, downstream, @@ -1214,8 +1232,14 @@ impl, E: KvEngine> RunnableWithTimer for Endpoin if self.min_resolved_ts != TimeStamp::max() { CDC_MIN_RESOLVED_TS_REGION.set(self.min_ts_region_id as i64); CDC_MIN_RESOLVED_TS.set(self.min_resolved_ts.physical() as i64); + CDC_MIN_RESOLVED_TS_LAG.set( + self.current_ts + .physical() + .saturating_sub(self.min_resolved_ts.physical()) as i64, + ); } self.min_resolved_ts = TimeStamp::max(); + self.current_ts = TimeStamp::max(); self.min_ts_region_id = 0; self.old_value_cache.flush_metrics(); @@ -1881,6 +1905,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1], min_ts: TimeStamp::from(1), + current_ts: TimeStamp::zero(), }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() @@ -1916,6 +1941,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1, 2], min_ts: TimeStamp::from(2), + current_ts: TimeStamp::zero(), }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() @@ -1960,6 +1986,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1, 2, 3], min_ts: TimeStamp::from(3), + current_ts: TimeStamp::zero(), }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() @@ -2193,6 +2220,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1], min_ts: TimeStamp::from(1), + current_ts: TimeStamp::zero(), }); // conn a must receive a resolved ts that only contains region 1. assert_batch_resolved_ts(conn_rxs.get_mut(0).unwrap(), vec![1], 1); @@ -2206,6 +2234,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1, 2], min_ts: TimeStamp::from(2), + current_ts: TimeStamp::zero(), }); // conn a must receive a resolved ts that contains region 1 and region 2. assert_batch_resolved_ts(conn_rxs.get_mut(0).unwrap(), vec![1, 2], 2); @@ -2219,6 +2248,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1, 2, 3], min_ts: TimeStamp::from(3), + current_ts: TimeStamp::zero(), }); // conn a must receive a resolved ts that contains region 1 and region 2. assert_batch_resolved_ts(conn_rxs.get_mut(0).unwrap(), vec![1, 2], 3); @@ -2228,6 +2258,7 @@ mod tests { suite.run(Task::MinTS { regions: vec![1, 3], min_ts: TimeStamp::from(4), + current_ts: TimeStamp::zero(), }); // conn a must receive a resolved ts that only contains region 1. assert_batch_resolved_ts(conn_rxs.get_mut(0).unwrap(), vec![1], 4); diff --git a/components/cdc/src/metrics.rs b/components/cdc/src/metrics.rs index 55a0124e567..0118b4d7916 100644 --- a/components/cdc/src/metrics.rs +++ b/components/cdc/src/metrics.rs @@ -108,6 +108,10 @@ lazy_static! { "The region which has minimal resolved ts" ) .unwrap(); + pub static ref CDC_MIN_RESOLVED_TS_LAG: IntGauge = register_int_gauge!( + "tikv_cdc_min_resolved_ts_lag", + "The lag between the minimal resolved ts and the current ts" + ).unwrap(); pub static ref CDC_MIN_RESOLVED_TS: IntGauge = register_int_gauge!( "tikv_cdc_min_resolved_ts", "The minimal resolved ts for current regions" From 07e7cd40dffcd5d7ce1c0f2693bb7ba59a3cd465 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 6 Jul 2022 16:27:03 +0800 Subject: [PATCH 0060/1149] raftstore: introduce the CPU-based Load Base Split strategy (#12955) ref tikv/tikv#12063, ref tikv/tikv#12593, ref tikv/tikv#12942 Introduce the CPU-based Load Base Split strategy. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/worker/pd.rs | 106 ++++-- .../src/store/worker/split_controller.rs | 325 ++++++++++++++++-- components/server/src/server.rs | 11 +- components/test_raftstore/src/server.rs | 9 +- src/read_pool.rs | 5 +- 5 files changed, 405 insertions(+), 51 deletions(-) diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 280c15b083f..afd84ad16dd 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -472,6 +472,7 @@ where handle: Option>, timer: Option>, read_stats_sender: Option>, + cpu_stats_sender: Option>>, collect_store_infos_interval: Duration, load_base_split_check_interval: Duration, collect_tick_interval: Duration, @@ -493,6 +494,7 @@ where handle: None, timer: None, read_stats_sender: None, + cpu_stats_sender: None, collect_store_infos_interval: interval, load_base_split_check_interval: cmp::min( DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL, @@ -537,6 +539,9 @@ where let (read_stats_sender, read_stats_receiver) = mpsc::channel(); self.read_stats_sender = Some(read_stats_sender); + let (cpu_stats_sender, cpu_stats_receiver) = mpsc::channel(); + self.cpu_stats_sender = Some(cpu_stats_sender); + let scheduler = self.scheduler.clone(); let props = tikv_util::thread_group::current_properties(); @@ -548,17 +553,25 @@ where .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); - let mut thread_stats = ThreadInfoStatistics::new(); + // Create different `ThreadInfoStatistics` for different purposes to + // make sure the record won't be disturbed. + let mut collect_store_infos_thread_stats = ThreadInfoStatistics::new(); + let mut load_base_split_thread_stats = ThreadInfoStatistics::new(); while let Err(mpsc::RecvTimeoutError::Timeout) = timer_rx.recv_timeout(tick_interval) { if is_enable_tick(timer_cnt, collect_store_infos_interval) { - StatsMonitor::collect_store_infos(&mut thread_stats, &scheduler); + StatsMonitor::collect_store_infos( + &mut collect_store_infos_thread_stats, + &scheduler, + ); } if is_enable_tick(timer_cnt, load_base_split_check_interval) { StatsMonitor::load_base_split( &mut auto_split_controller, &read_stats_receiver, + &cpu_stats_receiver, + &mut load_base_split_thread_stats, &scheduler, ); } @@ -602,7 +615,9 @@ where pub fn load_base_split( auto_split_controller: &mut AutoSplitController, - receiver: &Receiver, + read_stats_receiver: &Receiver, + cpu_stats_receiver: &Receiver>, + thread_stats: &mut ThreadInfoStatistics, scheduler: &Scheduler>, ) { let start_time = TiInstant::now(); @@ -618,11 +633,17 @@ where } SplitConfigChange::Noop => {} } - let mut others = vec![]; - while let Ok(other) = receiver.try_recv() { - others.push(other); + let mut read_stats_vec = vec![]; + while let Ok(read_stats) = read_stats_receiver.try_recv() { + read_stats_vec.push(read_stats); } - let (top, split_infos) = auto_split_controller.flush(others); + let mut cpu_stats_vec = vec![]; + while let Ok(cpu_stats) = cpu_stats_receiver.try_recv() { + cpu_stats_vec.push(cpu_stats); + } + thread_stats.record(); + let (top_qps, split_infos) = + auto_split_controller.flush(read_stats_vec, cpu_stats_vec, thread_stats); auto_split_controller.clear(); let task = Task::AutoSplit { split_infos }; if let Err(e) = scheduler.schedule(task) { @@ -632,10 +653,10 @@ where ); } for i in 0..TOP_N { - if i < top.len() { + if i < top_qps.len() { READ_QPS_TOPN .with_label_values(&[&i.to_string()]) - .set(top[i] as f64); + .set(top_qps[i] as f64); } else { READ_QPS_TOPN.with_label_values(&[&i.to_string()]).set(0.0); } @@ -672,15 +693,22 @@ where if let Some(h) = self.handle.take() { drop(self.timer.take()); drop(self.read_stats_sender.take()); + drop(self.cpu_stats_sender.take()); if let Err(e) = h.join() { error!("join stats collector failed"; "err" => ?e); } } } - pub fn get_read_stats_sender(&self) -> &Option> { + #[inline(always)] + fn get_read_stats_sender(&self) -> &Option> { &self.read_stats_sender } + + #[inline(always)] + fn get_cpu_stats_sender(&self) -> &Option>> { + &self.cpu_stats_sender + } } const HOTSPOT_KEY_RATE_THRESHOLD: u64 = 128; @@ -1684,6 +1712,12 @@ where // which is the read load portion of the write path. // TODO: more accurate CPU consumption of a specified region. fn handle_region_cpu_records(&mut self, records: Arc) { + // Send Region CPU info to AutoSplitController inside the stats_monitor. + if let Some(cpu_stats_sender) = self.stats_monitor.get_cpu_stats_sender() { + if cpu_stats_sender.send(records.clone()).is_err() { + warn!("send region cpu info failed, are we shutting down?") + } + } calculate_region_cpu_records(self.store_id, records, &mut self.region_cpu_records); } @@ -1831,18 +1865,46 @@ where if let Ok(Some(region)) = pd_client.get_region_by_id(split_info.region_id).await { - Self::handle_ask_batch_split( - router.clone(), - scheduler.clone(), - pd_client.clone(), - region, - vec![split_info.split_key], - split_info.peer, - true, - Callback::None, - String::from("auto_split"), - remote.clone(), - ); + // Try to split the region with the given split key. + if let Some(split_key) = split_info.split_key { + Self::handle_ask_batch_split( + router.clone(), + scheduler.clone(), + pd_client.clone(), + region, + vec![split_key], + split_info.peer, + true, + Callback::None, + String::from("auto_split"), + remote.clone(), + ); + return; + } + // Try to split the region on half within the given key range + // if there is no `split_key` been given. + if split_info.start_key.is_some() && split_info.end_key.is_some() { + let start_key = split_info.start_key.unwrap(); + let end_key = split_info.end_key.unwrap(); + let region_id = region.get_id(); + let msg = CasualMessage::HalfSplitRegion { + region_epoch: region.get_region_epoch().clone(), + start_key: Some(start_key.clone()), + end_key: Some(end_key.clone()), + policy: pdpb::CheckPolicy::Scan, + source: "auto_split", + cb: Callback::None, + }; + if let Err(e) = router.send(region_id, PeerMsg::CasualMessage(msg)) + { + error!("send auto half split request failed"; + "region_id" => region_id, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + "err" => ?e, + ); + } + } } } }; diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index b644ac88d85..dd3fcbf95be 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -4,7 +4,7 @@ use std::{ cmp::{min, Ordering}, collections::{BinaryHeap, HashMap, HashSet}, slice::{Iter, IterMut}, - sync::Arc, + sync::{mpsc::Receiver, Arc}, time::{Duration, SystemTime}, }; @@ -15,10 +15,12 @@ use kvproto::{ }; use pd_client::{merge_bucket_stats, new_bucket_stats, BucketMeta, BucketStat}; use rand::Rng; -use tikv_util::{config::Tracker, debug, info, warn}; +use resource_metering::RawRecords; +use tikv_util::{config::Tracker, debug, info, metrics::ThreadInfoStatistics, warn}; use crate::store::{ metrics::*, + util::build_key_range, worker::{ query_stats::{is_read_query, QueryStats}, split_config::get_sample_num, @@ -32,6 +34,8 @@ pub const TOP_N: usize = 10; // LOAD_BASE_SPLIT_EVENT metrics label definitions. // Workload fits the QPS threshold or byte threshold. const LOAD_FIT: &str = "load_fit"; +// Workload fits the CPU threshold. +const CPU_LOAD_FIT: &str = "cpu_load_fit"; // The statistical key is empty. const EMPTY_STATISTICAL_KEY: &str = "empty_statistical_key"; // Split info has been collected, ready to split. @@ -46,6 +50,10 @@ const NO_ENOUGH_LR_KEY: &str = "no_enough_lr_key"; const NO_BALANCE_KEY: &str = "no_balance_key"; // The number of contained keys does not meet the score. const NO_UNCROSS_KEY: &str = "no_uncross_key"; +// Split info for the top hot CPU region has been collected, ready to split. +const READY_TO_SPLIT_CPU_TOP: &str = "ready_to_split_cpu_top"; +// The top hot CPU region is not ready to split. +const UNABLE_TO_SPLIT_CPU_TOP: &str = "unable_to_split_cpu_top"; // It will return prefix sum of the given iter, // `read` is a function to process the item from the iter. @@ -277,6 +285,8 @@ pub struct Recorder { pub peer: Peer, pub key_ranges: Vec>, pub create_time: SystemTime, + pub cpu_usage: f64, + pub hottest_key_range: Option, } impl Recorder { @@ -286,6 +296,8 @@ impl Recorder { peer: Peer::default(), key_ranges: vec![], create_time: SystemTime::now(), + cpu_usage: 0.0, + hottest_key_range: None, } } @@ -299,6 +311,14 @@ impl Recorder { } } + fn update_cpu_usage(&mut self, cpu_usage: f64) { + self.cpu_usage = cpu_usage; + } + + fn update_hottest_key_range(&mut self, key_range: KeyRange) { + self.hottest_key_range = Some(key_range); + } + fn is_ready(&self) -> bool { self.key_ranges.len() >= self.detect_times } @@ -515,8 +535,41 @@ impl WriteStats { pub struct SplitInfo { pub region_id: u64, - pub split_key: Vec, pub peer: Peer, + pub split_key: Option>, + pub start_key: Option>, + pub end_key: Option>, +} + +impl SplitInfo { + // Create a SplitInfo with the given region_id, peer and split_key. + // This is used to split the region with this specified split key later. + fn with_split_key(region_id: u64, peer: Peer, split_key: Vec) -> Self { + SplitInfo { + region_id, + peer, + split_key: Some(split_key), + start_key: None, + end_key: None, + } + } + + // Create a SplitInfo with the given region_id, peer, start_key and end_key. + // This is used to split the region on half within the specified start and end keys later. + fn with_start_end_key( + region_id: u64, + peer: Peer, + start_key: Vec, + end_key: Vec, + ) -> Self { + SplitInfo { + region_id, + peer, + split_key: None, + start_key: Some(start_key), + end_key: Some(end_key), + } + } } #[derive(PartialEq, Debug)] @@ -530,25 +583,71 @@ pub struct AutoSplitController { pub recorders: HashMap, pub cfg: SplitConfig, cfg_tracker: Tracker, + // Thread-related info + max_grpc_thread_count: usize, + max_unified_read_pool_thread_count: usize, + unified_read_pool_scale_receiver: Option>, } impl AutoSplitController { - pub fn new(config_manager: SplitConfigManager) -> AutoSplitController { + pub fn new( + config_manager: SplitConfigManager, + max_grpc_thread_count: usize, + max_unified_read_pool_thread_count: usize, + unified_read_pool_scale_receiver: Option>, + ) -> AutoSplitController { AutoSplitController { recorders: HashMap::default(), cfg: config_manager.value().clone(), cfg_tracker: config_manager.0.clone().tracker("split_hub".to_owned()), + max_grpc_thread_count, + max_unified_read_pool_thread_count, + unified_read_pool_scale_receiver, } } pub fn default() -> AutoSplitController { - AutoSplitController::new(SplitConfigManager::default()) + AutoSplitController::new(SplitConfigManager::default(), 0, 0, None) + } + + fn should_check_region_cpu(&self) -> bool { + self.cfg.region_cpu_overload_threshold_ratio > 0.0 + } + + fn is_grpc_poll_busy(&self, grpc_thread_usage: f64) -> bool { + if self.max_grpc_thread_count == 0 { + return false; + } + let grpc_thread_cpu_overload_threshold = + self.max_grpc_thread_count as f64 * self.cfg.grpc_thread_cpu_overload_threshold_ratio; + grpc_thread_usage > 0.0 && grpc_thread_usage >= grpc_thread_cpu_overload_threshold + } + + fn is_unified_read_pool_busy(&self, unified_read_pool_thread_usage: f64) -> bool { + if self.max_unified_read_pool_thread_count == 0 { + return false; + } + let unified_read_pool_cpu_overload_threshold = self.max_unified_read_pool_thread_count + as f64 + * self + .cfg + .unified_read_pool_thread_cpu_overload_threshold_ratio; + unified_read_pool_thread_usage > 0.0 + && unified_read_pool_thread_usage >= unified_read_pool_cpu_overload_threshold + } + + fn is_region_busy(&self, unified_read_pool_thread_usage: f64, region_cpu_usage: f64) -> bool { + if unified_read_pool_thread_usage <= 0.0 || !self.should_check_region_cpu() { + return false; + } + region_cpu_usage / unified_read_pool_thread_usage + >= self.cfg.region_cpu_overload_threshold_ratio } - // collect the read stats from read_stats_vec and dispatch them to a region hashmap. + // collect the read stats from read_stats_vec and dispatch them to a Region HashMap. fn collect_read_stats(read_stats_vec: Vec) -> HashMap> { - // collect from different thread - let mut region_infos_map = HashMap::default(); // regionID-regionInfos + // RegionID -> Vec, collect the RegionInfo from different threads. + let mut region_infos_map = HashMap::default(); let capacity = read_stats_vec.len(); for read_stats in read_stats_vec { for (region_id, region_info) in read_stats.region_infos { @@ -561,13 +660,109 @@ impl AutoSplitController { region_infos_map } + // collect the CPU stats from cpu_stats_vec and dispatch them to a Region HashMap. + fn collect_cpu_stats( + &self, + cpu_stats_vec: Vec>, + ) -> HashMap)> { + // RegionID -> (CPU usage, Hottest Key Range), calculate the CPU usage and its hottest key range. + let mut region_cpu_map = HashMap::default(); + if !self.should_check_region_cpu() { + return region_cpu_map; + } + // Calculate the Region CPU usage. + let mut collect_interval_ms = 0; + let mut region_key_range_cpu_time_map = HashMap::new(); + cpu_stats_vec.iter().for_each(|cpu_stats| { + cpu_stats.records.iter().for_each(|(tag, record)| { + // Calculate the Region ID -> CPU Time. + region_cpu_map + .entry(tag.region_id) + .and_modify(|(cpu_time, _)| *cpu_time += record.cpu_time as f64) + .or_insert_with(|| (record.cpu_time as f64, None)); + // Calculate the (Region ID, Key Range) -> CPU Time. + tag.key_ranges.iter().for_each(|key_range| { + region_key_range_cpu_time_map + .entry((tag.region_id, key_range)) + .and_modify(|cpu_time| *cpu_time += record.cpu_time) + .or_insert_with(|| record.cpu_time); + }) + }); + collect_interval_ms += cpu_stats.duration.as_millis(); + }); + // Calculate the Region CPU usage. + region_cpu_map.iter_mut().for_each(|(_, (cpu_time, _))| { + if collect_interval_ms == 0 { + *cpu_time = 0.0; + } else { + *cpu_time /= collect_interval_ms as f64; + } + }); + // Choose the hottest key range for each Region. + let mut hottest_key_range_cpu_time_map = HashMap::with_capacity(region_cpu_map.len()); + region_key_range_cpu_time_map + .iter() + .for_each(|((region_id, key_range), cpu_time)| { + let hottest_key_range_cpu_time = hottest_key_range_cpu_time_map + .entry(*region_id) + .or_insert_with(|| 0); + if cpu_time > hottest_key_range_cpu_time { + region_cpu_map + .entry(*region_id) + .and_modify(|(_, old_key_range)| { + *old_key_range = + Some(build_key_range(&key_range.0, &key_range.1, false)); + }); + *hottest_key_range_cpu_time = *cpu_time; + } + }); + region_cpu_map + } + + fn collect_thread_usage(thread_stats: &ThreadInfoStatistics, name: &str) -> f64 { + thread_stats + .get_cpu_usages() + .iter() + .filter(|(thread_name, _)| thread_name.contains(name)) + .fold(0, |cpu_usage_sum, (_, cpu_usage)| { + // `cpu_usage` is in [0, 100]. + cpu_usage_sum + cpu_usage + }) as f64 + / 100.0 + } + // flush the read stats info into the recorder and check if the region needs to be split // according to all the stats info the recorder has collected before. - pub fn flush(&mut self, read_stats_vec: Vec) -> (Vec, Vec) { - let mut split_infos = vec![]; + pub fn flush( + &mut self, + read_stats_vec: Vec, + cpu_stats_vec: Vec>, + thread_stats: &ThreadInfoStatistics, + ) -> (Vec, Vec) { + let mut top_cpu_usage = vec![]; let mut top_qps = BinaryHeap::with_capacity(TOP_N); let region_infos_map = Self::collect_read_stats(read_stats_vec); + let region_cpu_map = self.collect_cpu_stats(cpu_stats_vec); + // Prepare some diagnostic info. + let (grpc_thread_usage, unified_read_pool_thread_usage) = ( + Self::collect_thread_usage(thread_stats, "grpc-server"), + Self::collect_thread_usage(thread_stats, "unified-read-po"), + ); + let (is_grpc_poll_busy, is_unified_read_pool_busy) = ( + self.is_grpc_poll_busy(grpc_thread_usage), + self.is_unified_read_pool_busy(unified_read_pool_thread_usage), + ); + debug!("flush to load base split"; + "max_grpc_thread_count" => self.max_grpc_thread_count, + "grpc_thread_usage" => grpc_thread_usage, + "max_unified_read_pool_thread_count" => self.max_unified_read_pool_thread_count, + "unified_read_pool_thread_usage" => unified_read_pool_thread_usage, + "is_grpc_poll_busy" => is_grpc_poll_busy, + "is_unified_read_pool_busy" => is_unified_read_pool_busy, + ); + // Start to record the read stats info. + let mut split_infos = vec![]; for (region_id, region_infos) in region_infos_map { let qps_prefix_sum = prefix_sum(region_infos.iter(), RegionInfo::get_read_qps); // region_infos is not empty, so it's safe to unwrap here. @@ -575,19 +770,32 @@ impl AutoSplitController { let byte = region_infos .iter() .fold(0, |flow, region_info| flow + region_info.flow.read_bytes); + let (cpu_usage, hottest_key_range) = region_cpu_map + .get(®ion_id) + .map(|(cpu_usage, key_range)| (*cpu_usage, key_range.clone())) + .unwrap_or((0.0, None)); + let is_region_busy = self.is_region_busy(unified_read_pool_thread_usage, cpu_usage); debug!("load base split params"; "region_id" => region_id, "qps" => qps, "qps_threshold" => self.cfg.qps_threshold, "byte" => byte, "byte_threshold" => self.cfg.byte_threshold, + "cpu_usage" => cpu_usage, + "is_region_busy" => is_region_busy, ); QUERY_REGION_VEC .with_label_values(&["read"]) .observe(qps as f64); - if qps < self.cfg.qps_threshold && byte < self.cfg.byte_threshold { + // 1. If the QPS and Byte do not meet the threshold, skip. + // 2. If the Unified Read Pool is not busy or + // the Region is not hot enough (takes up 50% of the Unified Read Pool CPU times), skip. + if qps < self.cfg.qps_threshold + && byte < self.cfg.byte_threshold + && (!is_unified_read_pool_busy || !is_region_busy) + { self.recorders.remove_entry(®ion_id); continue; } @@ -600,6 +808,10 @@ impl AutoSplitController { .entry(region_id) .or_insert_with(|| Recorder::new(detect_times)); recorder.update_peer(®ion_infos[0].peer); + recorder.update_cpu_usage(cpu_usage); + if let Some(hottest_key_range) = hottest_key_range { + recorder.update_hottest_key_range(hottest_key_range); + } let key_ranges = sample( self.cfg.sample_num, @@ -616,20 +828,27 @@ impl AutoSplitController { if recorder.is_ready() { let key = recorder.collect(&self.cfg); if !key.is_empty() { - split_infos.push(SplitInfo { + split_infos.push(SplitInfo::with_split_key( region_id, - split_key: key, - peer: recorder.peer.clone(), - }); + recorder.peer.clone(), + key, + )); LOAD_BASE_SPLIT_EVENT .with_label_values(&[READY_TO_SPLIT]) .inc(); info!("load base split region"; "region_id" => region_id, "qps" => qps, + "byte" => byte, + "cpu_usage" => cpu_usage, ); + self.recorders.remove(®ion_id); + } else if is_unified_read_pool_busy && is_region_busy { + LOAD_BASE_SPLIT_EVENT + .with_label_values(&[CPU_LOAD_FIT]) + .inc(); + top_cpu_usage.push(region_id); } - self.recorders.remove(®ion_id); } else { LOAD_BASE_SPLIT_EVENT .with_label_values(&[NOT_READY_TO_SPLIT]) @@ -639,6 +858,49 @@ impl AutoSplitController { top_qps.push(qps); } + // Check if the top CPU usage region could be split. + // TODO: avoid unnecessary split by introducing the feedback mechanism from PD. + if !top_cpu_usage.is_empty() && !is_grpc_poll_busy { + // Calculate by using the latest CPU usage. + top_cpu_usage.sort_unstable_by(|a, b| { + let cpu_usage_a = self.recorders.get(a).unwrap().cpu_usage; + let cpu_usage_b = self.recorders.get(b).unwrap().cpu_usage; + cpu_usage_b.partial_cmp(&cpu_usage_a).unwrap() + }); + let region_id = top_cpu_usage[0]; + let recorder = self.recorders.get_mut(®ion_id).unwrap(); + if recorder.hottest_key_range.is_some() { + split_infos.push(SplitInfo::with_start_end_key( + region_id, + recorder.peer.clone(), + recorder + .hottest_key_range + .as_ref() + .unwrap() + .start_key + .clone(), + recorder.hottest_key_range.as_ref().unwrap().end_key.clone(), + )); + LOAD_BASE_SPLIT_EVENT + .with_label_values(&[READY_TO_SPLIT_CPU_TOP]) + .inc(); + info!("load base split region"; + "region_id" => region_id, + "start_key" => log_wrappers::Value::key(&recorder.hottest_key_range.as_ref().unwrap().start_key), + "end_key" => log_wrappers::Value::key(&recorder.hottest_key_range.as_ref().unwrap().end_key), + "cpu_usage" => recorder.cpu_usage, + ); + } else { + LOAD_BASE_SPLIT_EVENT + .with_label_values(&[UNABLE_TO_SPLIT_CPU_TOP]) + .inc(); + } + } + // Clean up the rest top CPU usage recorders. + for region_id in top_cpu_usage { + self.recorders.remove(®ion_id); + } + (top_qps.into_vec(), split_infos) } @@ -666,6 +928,12 @@ impl AutoSplitController { } self.cfg = incoming.clone(); } + // Adjust with the size change of the Unified Read Pool. + if let Some(rx) = &self.unified_read_pool_scale_receiver { + if let Ok(max_thread_count) = rx.try_recv() { + self.max_unified_read_pool_thread_count = max_thread_count; + } + } cfg_change } } @@ -677,9 +945,8 @@ mod tests { use txn_types::Key; use super::*; - use crate::store::{ - util::build_key_range, - worker::split_config::{DEFAULT_SAMPLE_NUM, REGION_CPU_OVERLOAD_THRESHOLD_RATIO}, + use crate::store::worker::split_config::{ + DEFAULT_SAMPLE_NUM, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, }; enum Position { @@ -880,7 +1147,8 @@ mod tests { hub.cfg.sample_threshold = 0; for i in 0..10 { - let (_, split_infos) = hub.flush(qps_stats.clone()); + let (_, split_infos) = + hub.flush(qps_stats.clone(), vec![], &ThreadInfoStatistics::default()); if (i + 1) % hub.cfg.detect_times == 0 { assert_eq!( split_infos.len(), @@ -891,7 +1159,9 @@ mod tests { for obtain in &split_infos { let mut equal = false; for expect in &split_keys { - if obtain.split_key.cmp(&expect.to_vec()) == Ordering::Equal { + if obtain.split_key.as_ref().unwrap().cmp(&expect.to_vec()) + == Ordering::Equal + { equal = true; break; } @@ -936,7 +1206,7 @@ mod tests { ); } qps_stats_vec.push(qps_stats); - hub.flush(qps_stats_vec); + hub.flush(qps_stats_vec, vec![], &ThreadInfoStatistics::default()); } // Test the empty key ranges. @@ -949,7 +1219,7 @@ mod tests { qps_stats.add_query_num(1, &Peer::default(), KeyRange::default(), QueryKind::Get); } qps_stats_vec.push(qps_stats); - hub.flush(qps_stats_vec); + hub.flush(qps_stats_vec, vec![], &ThreadInfoStatistics::default()); } fn check_sample_length(key_ranges: Vec>) { @@ -1229,7 +1499,8 @@ mod tests { let split_config = SplitConfig::default(); let mut split_cfg_manager = SplitConfigManager::new(Arc::new(VersionTrack::new(split_config))); - let mut auto_split_controller = AutoSplitController::new(split_cfg_manager.clone()); + let mut auto_split_controller = + AutoSplitController::new(split_cfg_manager.clone(), 0, 0, None); assert_eq!( auto_split_controller.refresh_and_check_cfg(), SplitConfigChange::Noop, @@ -1309,7 +1580,11 @@ mod tests { } b.iter(|| { let mut hub = AutoSplitController::default(); - hub.flush(other_qps_stats.clone()); + hub.flush( + other_qps_stats.clone(), + vec![], + &ThreadInfoStatistics::default(), + ); }); } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index e09eec7d5d8..351015fdd9a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -761,13 +761,17 @@ impl TiKvServer { cop_read_pools.handle() }; + let mut unified_read_pool_scale_receiver = None; if self.config.readpool.is_unified_pool_enabled() { + let (unified_read_pool_scale_notifier, rx) = mpsc::sync_channel(10); cfg_controller.register( tikv::config::Module::Readpool, Box::new(ReadPoolConfigManager( unified_read_pool.as_ref().unwrap().handle(), + unified_read_pool_scale_notifier, )), ); + unified_read_pool_scale_receiver = Some(rx); } // Register causal observer for RawKV API V2 @@ -959,7 +963,12 @@ impl TiKvServer { Box::new(split_config_manager.clone()), ); - let auto_split_controller = AutoSplitController::new(split_config_manager); + let auto_split_controller = AutoSplitController::new( + split_config_manager, + self.config.server.grpc_concurrency, + self.config.readpool.unified.max_thread_count, + unified_read_pool_scale_receiver, + ); // `ConsistencyCheckObserver` must be registered before `Node::start`. let safe_point = Arc::new(AtomicU64::new(0)); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index ac6a72e3a06..b87cc5257a5 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -541,11 +541,13 @@ impl ServerCluster { cfg.server.addr = format!("{}", addr); let trans = server.transport(); let simulate_trans = SimulateTransport::new(trans); + let max_grpc_thread_count = cfg.server.grpc_concurrency; let server_cfg = Arc::new(VersionTrack::new(cfg.server.clone())); // Register the role change observer of the lock manager. lock_mgr.register_detector_role_change_observer(&mut coprocessor_host); + let max_unified_read_pool_thread_count = cfg.readpool.unified.max_thread_count; let pessimistic_txn_cfg = cfg.tikv.pessimistic_txn; let split_check_runner = @@ -553,7 +555,12 @@ impl ServerCluster { let split_check_scheduler = bg_worker.start("split-check", split_check_runner); let split_config_manager = SplitConfigManager::new(Arc::new(VersionTrack::new(cfg.tikv.split))); - let auto_split_controller = AutoSplitController::new(split_config_manager); + let auto_split_controller = AutoSplitController::new( + split_config_manager, + max_grpc_thread_count, + max_unified_read_pool_thread_count, + None, + ); node.start( engines, simulate_trans.clone(), diff --git a/src/read_pool.rs b/src/read_pool.rs index cebd1965153..7409c9a4b6e 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -2,7 +2,7 @@ use std::{ future::Future, - sync::{Arc, Mutex}, + sync::{mpsc::SyncSender, Arc, Mutex}, }; use file_system::{set_io_type, IOType}; @@ -292,13 +292,14 @@ impl From> for ReadPool { } } -pub struct ReadPoolConfigManager(pub ReadPoolHandle); +pub struct ReadPoolConfigManager(pub ReadPoolHandle, pub SyncSender); impl ConfigManager for ReadPoolConfigManager { fn dispatch(&mut self, change: ConfigChange) -> CfgResult<()> { if let Some(ConfigValue::Module(unified)) = change.get("unified") { if let Some(ConfigValue::Usize(max_thread_count)) = unified.get("max_thread_count") { self.0.scale_pool_size(*max_thread_count); + self.1.send(*max_thread_count)?; } } info!( From 8c39b6014e42a863e66cec1d3a360bb69ee869c6 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 7 Jul 2022 14:07:02 +0800 Subject: [PATCH 0061/1149] test: update nextest profile (#12975) ref tikv/tikv#12769 Signed-off-by: tabokie --- .config/nextest.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 247389fcd17..6f67aa5ecdb 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,7 +1,8 @@ [profile.ci] -retries = 1 # Run at most 2 times +retries = 2 # Run at most 3 times fail-fast = false -slow-timeout = { period = "60s", terminate-after = 2 } # Timeout=120s +slow-timeout = { period = "60s", terminate-after = 2 } # Timeout 2m +failure-output = "final" [profile.ci.junit] path = "junit.xml" From 2ca69a52f0d8d375e4c52f7f32504ffb0af129a5 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 7 Jul 2022 16:03:03 +0800 Subject: [PATCH 0062/1149] raftstore: add some test cases for the CPU-based Load Base Split strategy (#12969) ref tikv/tikv#12063 Add some test cases for the CPU-based Load Base Split strategy. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- .../src/store/worker/split_controller.rs | 292 ++++++++++++++++-- 1 file changed, 262 insertions(+), 30 deletions(-) diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index dd3fcbf95be..1a3fb15af45 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -8,6 +8,7 @@ use std::{ time::{Duration, SystemTime}, }; +use fail::fail_point; use kvproto::{ kvrpcpb::KeyRange, metapb::{self, Peer}, @@ -615,6 +616,8 @@ impl AutoSplitController { } fn is_grpc_poll_busy(&self, grpc_thread_usage: f64) -> bool { + #[cfg(feature = "failpoints")] + fail_point!("mock_grpc_poll_is_not_busy", |_| { false }); if self.max_grpc_thread_count == 0 { return false; } @@ -624,6 +627,8 @@ impl AutoSplitController { } fn is_unified_read_pool_busy(&self, unified_read_pool_thread_usage: f64) -> bool { + #[cfg(feature = "failpoints")] + fail_point!("mock_unified_read_pool_is_busy", |_| { true }); if self.max_unified_read_pool_thread_count == 0 { return false; } @@ -637,6 +642,8 @@ impl AutoSplitController { } fn is_region_busy(&self, unified_read_pool_thread_usage: f64, region_cpu_usage: f64) -> bool { + #[cfg(feature = "failpoints")] + fail_point!("mock_region_is_busy", |_| { true }); if unified_read_pool_thread_usage <= 0.0 || !self.should_check_region_cpu() { return false; } @@ -941,6 +948,7 @@ impl AutoSplitController { #[cfg(test)] mod tests { use online_config::{ConfigChange, ConfigManager, ConfigValue}; + use resource_metering::{RawRecord, TagInfos}; use tikv_util::config::VersionTrack; use txn_types::Key; @@ -1059,7 +1067,7 @@ mod tests { build_key_range(b"a", b"b", false), build_key_range(b"b", b"c", false), ]; - check_split( + check_split_key( b"raw key", vec![gen_read_stats(1, raw_key_ranges.clone())], vec![b"b"], @@ -1073,14 +1081,14 @@ mod tests { build_key_range(key_a.as_encoded(), key_b.as_encoded(), false), build_key_range(key_b.as_encoded(), key_c.as_encoded(), false), ]; - check_split( + check_split_key( b"encoded key", vec![gen_read_stats(1, encoded_key_ranges.clone())], vec![key_b.as_encoded()], ); // mix mode - check_split( + check_split_key( b"mix key", vec![ gen_read_stats(1, raw_key_ranges), @@ -1090,7 +1098,7 @@ mod tests { ); // test distribution with contained key - for _i in 0..100 { + for _ in 0..100 { let key_ranges = vec![ build_key_range(b"a", b"k", false), build_key_range(b"b", b"j", false), @@ -1099,7 +1107,7 @@ mod tests { build_key_range(b"e", b"g", false), build_key_range(b"f", b"f", false), ]; - check_split( + check_split_key( b"isosceles triangle", vec![gen_read_stats(1, key_ranges)], vec![], @@ -1113,7 +1121,7 @@ mod tests { build_key_range(b"e", b"j", false), build_key_range(b"f", b"k", false), ]; - check_split( + check_split_key( b"parallelogram", vec![gen_read_stats(1, key_ranges)], vec![], @@ -1123,7 +1131,7 @@ mod tests { build_key_range(b"a", b"l", false), build_key_range(b"a", b"m", false), ]; - check_split( + check_split_key( b"right-angle trapezoid", vec![gen_read_stats(1, key_ranges)], vec![], @@ -1133,15 +1141,63 @@ mod tests { build_key_range(b"a", b"l", false), build_key_range(b"b", b"l", false), ]; - check_split( + check_split_key( b"right-angle trapezoid", vec![gen_read_stats(1, key_ranges)], vec![], ); } + + // test high CPU usage + fail::cfg("mock_grpc_poll_is_not_busy", "return(0)").unwrap(); + fail::cfg("mock_unified_read_pool_is_busy", "return(0)").unwrap(); + fail::cfg("mock_region_is_busy", "return(0)").unwrap(); + for _ in 0..100 { + let key_ranges = vec![ + build_key_range(b"a", b"l", false), + build_key_range(b"a", b"m", false), + ]; + check_split_key_range( + b"right-angle trapezoid with high CPU usage", + vec![gen_read_stats(1, key_ranges.clone())], + vec![gen_cpu_stats(1, key_ranges.clone(), vec![100, 200])], + b"a", + b"m", + ); + check_split_key_range( + b"right-angle trapezoid with high CPU usage", + vec![gen_read_stats(1, key_ranges.clone())], + vec![gen_cpu_stats(1, key_ranges, vec![200, 100])], + b"a", + b"l", + ); + + let key_ranges = vec![ + build_key_range(b"a", b"l", false), + build_key_range(b"b", b"l", false), + ]; + check_split_key_range( + b"right-angle trapezoid with high CPU usage", + vec![gen_read_stats(1, key_ranges.clone())], + vec![gen_cpu_stats(1, key_ranges.clone(), vec![100, 200])], + b"b", + b"l", + ); + check_split_key_range( + b"right-angle trapezoid with high CPU usage", + vec![gen_read_stats(1, key_ranges.clone())], + vec![gen_cpu_stats(1, key_ranges, vec![200, 100])], + b"a", + b"l", + ); + } + fail::remove("mock_grpc_poll_is_not_busy"); + fail::remove("mock_unified_read_pool_is_busy"); + fail::remove("mock_region_is_busy"); } - fn check_split(mode: &[u8], qps_stats: Vec, split_keys: Vec<&[u8]>) { + fn check_split_key(mode: &[u8], qps_stats: Vec, split_keys: Vec<&[u8]>) { + let mode = String::from_utf8(Vec::from(mode)).unwrap(); let mut hub = AutoSplitController::default(); hub.cfg.qps_threshold = 1; hub.cfg.sample_threshold = 0; @@ -1149,33 +1205,95 @@ mod tests { for i in 0..10 { let (_, split_infos) = hub.flush(qps_stats.clone(), vec![], &ThreadInfoStatistics::default()); - if (i + 1) % hub.cfg.detect_times == 0 { - assert_eq!( - split_infos.len(), - split_keys.len(), - "mode: {:?}", - String::from_utf8(Vec::from(mode)).unwrap() - ); - for obtain in &split_infos { - let mut equal = false; - for expect in &split_keys { - if obtain.split_key.as_ref().unwrap().cmp(&expect.to_vec()) - == Ordering::Equal - { - equal = true; - break; - } + if (i + 1) % hub.cfg.detect_times != 0 { + continue; + } + // Check the split key. + assert_eq!(split_infos.len(), split_keys.len(), "mode: {:?}", mode); + for obtain in &split_infos { + let mut equal = false; + for expect in &split_keys { + if obtain.split_key.as_ref().unwrap().cmp(&expect.to_vec()) == Ordering::Equal { + equal = true; + break; } - assert!( - equal, - "mode: {:?}", - String::from_utf8(Vec::from(mode)).unwrap() - ); } + assert!(equal, "mode: {:?}", mode); } } } + fn check_split_key_range( + mode: &[u8], + qps_stats: Vec, + cpu_stats: Vec>, + start_key: &[u8], + end_key: &[u8], + ) { + let mode = String::from_utf8(Vec::from(mode)).unwrap(); + let mut hub = AutoSplitController::default(); + hub.cfg.qps_threshold = 1; + hub.cfg.sample_threshold = 0; + + for i in 0..10 { + let (_, split_infos) = hub.flush( + qps_stats.clone(), + cpu_stats.clone(), + &ThreadInfoStatistics::default(), + ); + if (i + 1) % hub.cfg.detect_times != 0 { + continue; + } + assert_eq!(split_infos.len(), 1, "mode: {:?}", mode); + // Check the split key range. + let split_info = &split_infos[0]; + assert!(split_info.split_key.is_none(), "mode: {:?}", mode); + assert_eq!( + split_info + .start_key + .as_ref() + .unwrap() + .cmp(&start_key.to_vec()), + Ordering::Equal, + "mode: {:?}", + mode + ); + assert_eq!( + split_info.end_key.as_ref().unwrap().cmp(&end_key.to_vec()), + Ordering::Equal, + "mode: {:?}", + mode + ); + } + } + + fn gen_cpu_stats( + region_id: u64, + key_ranges: Vec, + cpu_times: Vec, + ) -> Arc { + let mut raw_records = RawRecords::default(); + raw_records.duration = Duration::from_millis(100); + for (idx, key_range) in key_ranges.iter().enumerate() { + let key_range_tag = Arc::new(TagInfos { + store_id: 0, + region_id, + peer_id: 0, + key_ranges: vec![(key_range.start_key.clone(), key_range.end_key.clone())], + extra_attachment: vec![], + }); + raw_records.records.insert( + key_range_tag.clone(), + RawRecord { + cpu_time: cpu_times[idx], + read_keys: 0, + write_keys: 0, + }, + ); + } + Arc::new(raw_records) + } + #[test] fn test_sample_key_num() { let mut hub = AutoSplitController::default(); @@ -1563,6 +1681,120 @@ mod tests { split_cfg_manager.dispatch(config_change).unwrap(); } + #[test] + fn test_collect_cpu_stats() { + let auto_split_controller = AutoSplitController::default(); + let region_cpu_map = auto_split_controller.collect_cpu_stats(vec![]); + assert!(region_cpu_map.is_empty()); + + let ab_key_range_tag = Arc::new(TagInfos { + store_id: 0, + region_id: 1, + peer_id: 0, + key_ranges: vec![(b"a".to_vec(), b"b".to_vec())], + extra_attachment: vec![], + }); + let cd_key_range_tag = Arc::new(TagInfos { + store_id: 0, + region_id: 1, + peer_id: 0, + key_ranges: vec![(b"c".to_vec(), b"d".to_vec())], + extra_attachment: vec![], + }); + let multiple_key_ranges_tag = Arc::new(TagInfos { + store_id: 0, + region_id: 1, + peer_id: 0, + key_ranges: vec![ + (b"a".to_vec(), b"b".to_vec()), + (b"c".to_vec(), b"d".to_vec()), + ], + extra_attachment: vec![], + }); + let empty_key_range_tag = Arc::new(TagInfos { + store_id: 0, + region_id: 1, + peer_id: 0, + key_ranges: vec![], + extra_attachment: vec![], + }); + + let test_cases = vec![ + (300, 150, 50, 50, Some(build_key_range(b"a", b"b", false))), + (150, 300, 50, 50, Some(build_key_range(b"c", b"d", false))), + (150, 50, 300, 50, Some(build_key_range(b"a", b"b", false))), + (50, 150, 300, 50, Some(build_key_range(b"c", b"d", false))), + (150, 50, 50, 300, Some(build_key_range(b"a", b"b", false))), + (100, 0, 0, 0, Some(build_key_range(b"a", b"b", false))), + (50, 0, 0, 50, Some(build_key_range(b"a", b"b", false))), + (50, 0, 0, 100, Some(build_key_range(b"a", b"b", false))), + (50, 0, 50, 0, Some(build_key_range(b"a", b"b", false))), + (0, 50, 50, 0, Some(build_key_range(b"c", b"d", false))), + (0, 0, 0, 100, None), + (0, 0, 0, 0, None), + ]; + for (i, test_case) in test_cases.iter().enumerate() { + let mut raw_records = RawRecords::default(); + raw_records.duration = Duration::from_millis(100); + // ["a", "b"] with (test_case.0)ms CPU time. + raw_records.records.insert( + ab_key_range_tag.clone(), + RawRecord { + cpu_time: test_case.0, + read_keys: 0, + write_keys: 0, + }, + ); + // ["c", "d"] with (test_case.1)ms CPU time. + raw_records.records.insert( + cd_key_range_tag.clone(), + RawRecord { + cpu_time: test_case.1, + read_keys: 0, + write_keys: 0, + }, + ); + // Multiple key ranges with (test_case.2)ms CPU time. + raw_records.records.insert( + multiple_key_ranges_tag.clone(), + RawRecord { + cpu_time: test_case.2, + read_keys: 0, + write_keys: 0, + }, + ); + // Empty key range with (test_case.3)ms CPU time. + raw_records.records.insert( + empty_key_range_tag.clone(), + RawRecord { + cpu_time: test_case.3, + read_keys: 0, + write_keys: 0, + }, + ); + let region_cpu_map = + auto_split_controller.collect_cpu_stats(vec![Arc::new(raw_records)]); + assert_eq!( + region_cpu_map.len(), + 1, + "test_collect_cpu_stats case: {}", + i + ); + assert_eq!( + region_cpu_map.get(&1).unwrap().0, + (test_case.0 + test_case.1 + test_case.2 + test_case.3) as f64 / 100.0, + "test_collect_cpu_stats case: {}", + i + ); + assert_eq!( + region_cpu_map.get(&1).unwrap().1, + test_case.4, + "test_collect_cpu_stats case: {}", + i + ); + } + } + #[bench] fn samples_evaluate(b: &mut test::Bencher) { let mut samples = Samples(vec![Sample::new(b"c")]); From fc49bdf8694c629184c2b512ced9390a56641b1a Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 8 Jul 2022 13:07:03 +0800 Subject: [PATCH 0063/1149] debug: Parameterize debug service (#12960) ref tikv/tikv#12849 Parameterize debug service Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- src/server/service/debug.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 160daa1178b..740e597e5e2 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -1,7 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use engine_rocks::RocksEngine; -use engine_traits::{Engines, MiscExt, RaftEngine}; +use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine}; use futures::{ channel::oneshot, future::{Future, FutureExt, TryFutureExt}, @@ -53,25 +53,27 @@ fn error_to_grpc_error(tag: &'static str, e: Error) -> GrpcError { /// Service handles the RPC messages for the `Debug` service. #[derive(Clone)] -pub struct Service> { +pub struct Service> { pool: Handle, debugger: Debugger, raft_router: T, + _phantom: std::marker::PhantomData, } -impl> Service { +impl> Service { /// Constructs a new `Service` with `Engines`, a `RaftStoreRouter` and a `GcWorker`. pub fn new( engines: Engines, pool: Handle, raft_router: T, cfg_controller: ConfigController, - ) -> Service { + ) -> Service { let debugger = Debugger::new(engines, cfg_controller); Service { pool, debugger, raft_router, + _phantom: Default::default(), } } @@ -96,7 +98,9 @@ impl> Service { } } -impl + 'static> debugpb::Debug for Service { +impl + 'static> debugpb::Debug + for Service +{ fn get(&mut self, ctx: RpcContext<'_>, mut req: GetRequest, sink: UnarySink) { const TAG: &str = "debug_get"; @@ -532,7 +536,7 @@ impl + 'static> debugpb::Debug f } } -fn region_detail>( +fn region_detail>( raft_router: T, region_id: u64, store_id: u64, @@ -573,7 +577,7 @@ fn region_detail>( } } -fn consistency_check>( +fn consistency_check>( raft_router: T, mut detail: RegionDetailResponse, ) -> impl Future> { From b4bccd7a58faa775d1d7ec7e6b60201bb5ebc6f7 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 8 Jul 2022 05:45:04 -0700 Subject: [PATCH 0064/1149] raftstorev2: support building store batch system (#12921) ref tikv/tikv#12842 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 4 +- components/engine_panic/src/raft_engine.rs | 8 + components/engine_rocks/src/raft_engine.rs | 28 ++ components/engine_traits/src/raft_engine.rs | 8 + components/raft_log_engine/src/engine.rs | 8 + components/raftstore-v2/Cargo.toml | 2 + components/raftstore-v2/src/batch/apply.rs | 4 + components/raftstore-v2/src/batch/mod.rs | 11 + components/raftstore-v2/src/batch/store.rs | 337 ++++++++++++++++++++ components/raftstore-v2/src/fsm/mod.rs | 3 + components/raftstore-v2/src/fsm/peer.rs | 105 +++++- components/raftstore-v2/src/fsm/store.rs | 64 +++- components/raftstore-v2/src/lib.rs | 3 + components/raftstore-v2/src/raft/peer.rs | 93 ++++-- components/raftstore-v2/src/raft/storage.rs | 108 ++++++- components/raftstore/src/store/fsm/peer.rs | 3 + components/raftstore/src/store/fsm/store.rs | 32 +- components/tikv_util/src/lib.rs | 10 + src/coprocessor/endpoint.rs | 24 +- src/coprocessor/tracker.rs | 6 +- src/server/service/kv.rs | 14 +- 21 files changed, 788 insertions(+), 87 deletions(-) create mode 100644 components/raftstore-v2/src/batch/apply.rs create mode 100644 components/raftstore-v2/src/batch/mod.rs create mode 100644 components/raftstore-v2/src/batch/store.rs diff --git a/Cargo.lock b/Cargo.lock index 1dfb74e3b13..bdb55d28de2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2623,7 +2623,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#acfe326c7cb2bdcdbfc991cada1973a68f34836f" +source = "git+https://github.com/pingcap/kvproto.git#a5d4ffd2ba337dad0bc99e9fb53bf665864a3f3b" dependencies = [ "futures 0.3.15", "grpcio", @@ -4168,12 +4168,14 @@ dependencies = [ name = "raftstore-v2" version = "0.1.0" dependencies = [ + "batch-system", "collections", "crossbeam", "engine_test", "engine_traits", "error_code", "fail", + "futures-util", "kvproto", "pd_client", "raft", diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 384bc60ffa6..d6f82c7f646 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -141,6 +141,14 @@ impl RaftEngine for PanicEngine { fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { panic!() } + + fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> + where + F: FnMut(u64) -> std::result::Result<(), E>, + E: From, + { + panic!() + } } impl RaftLogBatch for PanicWriteBatch { diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 57a65ba661f..19ceea3062c 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -338,6 +338,34 @@ impl RaftEngine for RocksEngine { fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { self.put_msg(keys::STORE_IDENT_KEY, ident) } + + fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> + where + F: FnMut(u64) -> std::result::Result<(), E>, + E: From, + { + let start_key = keys::REGION_META_MIN_KEY; + let end_key = keys::REGION_META_MAX_KEY; + let mut err = None; + self.scan(start_key, end_key, false, |key, _| { + let (region_id, suffix) = box_try!(keys::decode_region_meta_key(key)); + if suffix != keys::REGION_STATE_SUFFIX { + return Ok(true); + } + + match f(region_id) { + Ok(()) => Ok(true), + Err(e) => { + err = Some(e); + Ok(false) + } + } + })?; + match err { + None => Ok(()), + Some(e) => Err(e), + } + } } impl RaftLogBatch for RocksWriteBatch { diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index e119184c556..7773ee3245f 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -138,6 +138,14 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send fn dump_stats(&self) -> Result; fn get_engine_size(&self) -> Result; + + /// Visit all available raft groups. + /// + /// If any error is returned, the iteration will stop. + fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> + where + F: FnMut(u64) -> std::result::Result<(), E>, + E: From; } pub trait RaftLogBatch: Send { diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 8c9a7fd2b88..9236e7947db 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -548,6 +548,14 @@ impl RaftEngine for RaftLogEngine { fn get_engine_size(&self) -> Result { Ok(self.0.get_used_size() as u64) } + + fn for_each_raft_group(&self, _f: &mut F) -> std::result::Result<(), E> + where + F: FnMut(u64) -> std::result::Result<(), E>, + E: From, + { + unimplemented!() + } } fn transfer_error(e: RaftEngineError) -> engine_traits::Error { diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 100a2be409d..5cdd2ee747f 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -29,11 +29,13 @@ cloud-gcp = ["raftstore/cloud-gcp"] cloud-azure = ["raftstore/cloud-azure"] [dependencies] +batch-system = { path = "../batch-system", default-features = false } collections = { path = "../collections" } crossbeam = "0.8" engine_traits = { path = "../engine_traits" } error_code = { path = "../error_code" } fail = "0.5" +futures-util = { version = "0.3", features = ["compat"] } kvproto = { git = "https://github.com/pingcap/kvproto.git" } pd_client = { path = "../pd_client" } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } diff --git a/components/raftstore-v2/src/batch/apply.rs b/components/raftstore-v2/src/batch/apply.rs new file mode 100644 index 00000000000..a7e392127d5 --- /dev/null +++ b/components/raftstore-v2/src/batch/apply.rs @@ -0,0 +1,4 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +/// Batch system for applying logs pipeline. +pub struct ApplySystem; diff --git a/components/raftstore-v2/src/batch/mod.rs b/components/raftstore-v2/src/batch/mod.rs new file mode 100644 index 00000000000..e856147220d --- /dev/null +++ b/components/raftstore-v2/src/batch/mod.rs @@ -0,0 +1,11 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains the specialized implementation of batch systems. +//! +//! StoreSystem is used for polling raft state machines, ApplySystem is used for +//! applying logs. + +mod apply; +mod store; + +pub use store::{create_store_batch_system, StoreContext, StoreSystem}; diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs new file mode 100644 index 00000000000..6a8974259ff --- /dev/null +++ b/components/raftstore-v2/src/batch/store.rs @@ -0,0 +1,337 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{mem, ops::DerefMut, sync::Arc, time::Duration}; + +use batch_system::{ + BasicMailbox, BatchRouter, BatchSystem, HandleResult, HandlerBuilder, PollHandler, +}; +use collections::HashMap; +use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; +use futures_util::{compat::Future01CompatExt, FutureExt}; +use kvproto::{metapb::Store, raft_serverpb::PeerState}; +use raftstore::store::{fsm::store::PeerTickBatch, Config, Transport}; +use slog::Logger; +use tikv_util::{ + box_err, + config::{Tracker, VersionTrack}, + future::poll_future_notify, + time::Instant as TiInstant, + timer::SteadyTimer, +}; + +use crate::{ + fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate}, + raft::Peer, + Error, PeerMsg, PeerTick, Result, StoreMsg, +}; + +/// A per thread context used for handling raft messages. +pub struct StoreContext { + /// A logger without any KV. It's clean for creating new PeerFSM. + pub logger: Logger, + /// The transport for sending messages to peers on other stores. + pub trans: T, + /// The latest configuration. + pub cfg: Config, + /// The tick batch for delay ticking. It will be flushed at the end of every round. + pub tick_batch: Vec, + /// The precise timer for scheduling tick. + pub timer: SteadyTimer, +} + +impl StoreContext { + fn new(cfg: Config, trans: T, logger: Logger) -> Self { + Self { + logger, + trans, + cfg, + tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], + timer: SteadyTimer::default(), + } + } +} + +/// Poller for polling raft state machines. +struct StorePoller { + store_msg_buf: Vec, + peer_msg_buf: Vec>, + poll_ctx: StoreContext, + cfg_tracker: Tracker, + last_flush_time: TiInstant, + need_flush_events: bool, +} + +impl StorePoller { + pub fn new(poll_ctx: StoreContext, cfg_tracker: Tracker) -> Self { + let mut poller = Self { + store_msg_buf: Vec::new(), + peer_msg_buf: Vec::new(), + poll_ctx, + cfg_tracker, + last_flush_time: TiInstant::now(), + need_flush_events: false, + }; + poller.apply_buf_capacity(); + poller + } + + /// Updates the internal buffer to latest capacity. + fn apply_buf_capacity(&mut self) { + let new_cap = self.messages_per_tick(); + tikv_util::set_vec_capacity(&mut self.store_msg_buf, new_cap); + tikv_util::set_vec_capacity(&mut self.peer_msg_buf, new_cap); + } + + #[inline] + fn messages_per_tick(&self) -> usize { + self.poll_ctx.cfg.messages_per_tick + } + + fn flush_events(&mut self) { + self.schedule_ticks(); + } + + fn schedule_ticks(&mut self) { + assert_eq!( + PeerTick::get_all_ticks().len(), + self.poll_ctx.tick_batch.len() + ); + for batch in &mut self.poll_ctx.tick_batch { + batch.schedule(&self.poll_ctx.timer); + } + } +} + +impl PollHandler, StoreFsm> + for StorePoller +{ + fn begin(&mut self, _batch_size: usize, update_cfg: F) + where + for<'a> F: FnOnce(&'a batch_system::Config), + { + let cfg = self.cfg_tracker.any_new().map(|c| c.clone()); + if let Some(cfg) = cfg { + if cfg.messages_per_tick != self.messages_per_tick() { + self.apply_buf_capacity(); + } + self.poll_ctx.cfg = cfg; + } + } + + fn handle_control(&mut self, store: &mut StoreFsm) -> Option { + let received_cnt = store.recv(&mut self.store_msg_buf); + let expected_msg_count = if received_cnt == self.messages_per_tick() { + None + } else { + Some(0) + }; + let mut delegate = StoreFsmDelegate::new(store, &mut self.poll_ctx); + delegate.handle_msgs(&mut self.store_msg_buf); + expected_msg_count + } + + fn handle_normal( + &mut self, + peer: &mut impl DerefMut>, + ) -> HandleResult { + let received_cnt = peer.recv(&mut self.peer_msg_buf); + let handle_result = if received_cnt == self.messages_per_tick() { + HandleResult::KeepProcessing + } else { + HandleResult::stop_at(0, false) + }; + let mut delegate = PeerFsmDelegate::new(peer, &mut self.poll_ctx); + delegate.handle_msgs(&mut self.peer_msg_buf); + handle_result + } + + fn light_end(&mut self, _batch: &mut [Option>>]) { + if self.poll_ctx.trans.need_flush() { + self.poll_ctx.trans.flush(); + } + + let now = TiInstant::now(); + if now.saturating_duration_since(self.last_flush_time) >= Duration::from_millis(1) { + self.last_flush_time = now; + self.need_flush_events = false; + self.flush_events(); + } else { + self.need_flush_events = true; + } + } + + fn end(&mut self, batch: &mut [Option>>]) {} + + fn pause(&mut self) { + if self.poll_ctx.trans.need_flush() { + self.poll_ctx.trans.flush(); + } + + if self.need_flush_events { + self.last_flush_time = TiInstant::now(); + self.need_flush_events = false; + self.flush_events(); + } + } +} + +struct StorePollerBuilder { + cfg: Arc>, + store_id: u64, + engine: ER, + tablet_factory: Arc>, + trans: T, + logger: Logger, +} + +impl StorePollerBuilder { + pub fn new( + cfg: Arc>, + store_id: u64, + engine: ER, + tablet_factory: Arc>, + trans: T, + logger: Logger, + ) -> Self { + StorePollerBuilder { + cfg, + store_id, + engine, + tablet_factory, + trans, + logger, + } + } + + /// Init all the existing raft machine and cleanup stale tablets. + fn init(&self) -> Result>> { + let mut regions = HashMap::default(); + let cfg = self.cfg.value(); + self.engine + .for_each_raft_group::(&mut |region_id| { + let peer = match Peer::new( + &cfg, + region_id, + self.store_id, + self.tablet_factory.as_ref(), + self.engine.clone(), + &self.logger, + )? { + Some(peer) => peer, + None => return Ok(()), + }; + let pair = PeerFsm::new(&cfg, peer)?; + let prev = regions.insert(region_id, pair); + if let Some((_, p)) = prev { + return Err(box_err!( + "duplicate region {:?} vs {:?}", + p.logger().list(), + regions[®ion_id].1.logger().list() + )); + } + Ok(()) + })?; + self.clean_up_tablets(®ions)?; + Ok(regions) + } + + fn clean_up_tablets(&self, peers: &HashMap>) -> Result<()> { + // TODO: list all available tablets and destroy those which are not in the peers. + Ok(()) + } +} + +impl HandlerBuilder, StoreFsm> for StorePollerBuilder +where + ER: RaftEngine, + EK: KvEngine, + T: Transport + 'static, +{ + type Handler = StorePoller; + + fn build(&mut self, priority: batch_system::Priority) -> Self::Handler { + let poll_ctx = StoreContext::new( + self.cfg.value().clone(), + self.trans.clone(), + self.logger.clone(), + ); + let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); + StorePoller::new(poll_ctx, cfg_tracker) + } +} + +/// The system used for poll raft activities. +pub struct StoreSystem { + system: BatchSystem, StoreFsm>, + logger: Logger, +} + +impl StoreSystem { + pub fn start( + &mut self, + store: Store, + cfg: Arc>, + raft_engine: ER, + tablet_factory: Arc>, + trans: T, + router: &StoreRouter, + ) -> Result<()> + where + T: Transport + 'static, + { + let mut builder = StorePollerBuilder::new( + cfg, + store.get_id(), + raft_engine, + tablet_factory, + trans, + self.logger.clone(), + ); + let peers = builder.init()?; + // Choose a different name so we know what version is actually used. rs stands + // for raft store. + let tag = format!("rs-{}", store.get_id()); + self.system.spawn(tag, builder); + + let mut mailboxes = Vec::with_capacity(peers.len()); + let mut address = Vec::with_capacity(peers.len()); + for (region_id, (tx, fsm)) in peers { + address.push(region_id); + mailboxes.push(( + region_id, + BasicMailbox::new(tx, fsm, router.state_cnt().clone()), + )); + } + router.register_all(mailboxes); + + // Make sure Msg::Start is the first message each FSM received. + for addr in address { + router.force_send(addr, PeerMsg::Start).unwrap(); + } + router.send_control(StoreMsg::Start { store }).unwrap(); + Ok(()) + } + + pub fn shutdown(&mut self) { + self.system.shutdown(); + } +} + +pub type StoreRouter = BatchRouter, StoreFsm>; + +/// Create the batch system for polling raft activities. +pub fn create_store_batch_system( + cfg: &Config, + store: Store, + logger: Logger, +) -> (StoreRouter, StoreSystem) +where + EK: KvEngine, + ER: RaftEngine, +{ + let (store_tx, store_fsm) = StoreFsm::new(cfg, store); + let (router, system) = + batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm); + let system = StoreSystem { system, logger }; + (router, system) +} diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 60c84984793..9f3bcefac46 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -8,3 +8,6 @@ mod apply; mod peer; mod store; + +pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; +pub use store::{StoreFsm, StoreFsmDelegate}; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 5eaacf3e200..8187575d658 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -1,22 +1,111 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::RaftEngine; +use std::borrow::Cow; + +use batch_system::{BasicMailbox, Fsm}; +use crossbeam::channel::TryRecvError; +use engine_traits::{KvEngine, RaftEngine}; use kvproto::metapb; use raftstore::store::Config; -use slog::Logger; +use slog::{info, Logger}; +use tikv_util::mpsc::{self, LooseBoundedSender, Receiver, Sender}; + +use crate::{batch::StoreContext, raft::Peer, PeerMsg, Result}; -use crate::{raft::Peer, Result}; +pub type SenderFsmPair = (LooseBoundedSender>, Box>); -pub struct PeerFsm { - peer: Peer, +pub struct PeerFsm { + peer: Peer, + logger: Logger, + mailbox: Option>>, + receiver: Receiver>, + is_stopped: bool, } -impl PeerFsm { - pub fn new(peer: Peer) -> Result { - Ok(PeerFsm { peer }) +impl PeerFsm { + pub fn new(cfg: &Config, peer: Peer) -> Result> { + let logger = peer.logger().clone(); + info!(logger, "create peer"); + let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); + let fsm = Box::new(PeerFsm { + logger, + peer, + mailbox: None, + receiver: rx, + is_stopped: false, + }); + Ok((tx, fsm)) + } + + #[inline] + pub fn peer(&self) -> &Peer { + &self.peer } + #[inline] pub fn logger(&self) -> &Logger { self.peer.logger() } + + /// Fetches messages to `peer_msg_buf`. It will stop when the buffer is full. + /// + /// Returns how many messages are fetched. + pub fn recv(&mut self, peer_msg_buf: &mut Vec>) -> usize { + let l = peer_msg_buf.len(); + for i in l..peer_msg_buf.capacity() { + match self.receiver.try_recv() { + Ok(msg) => peer_msg_buf.push(msg), + Err(e) => { + if let TryRecvError::Disconnected = e { + self.is_stopped = true; + } + return i - l; + } + } + } + peer_msg_buf.capacity() - l + } +} + +impl Fsm for PeerFsm { + type Message = PeerMsg; + + #[inline] + fn is_stopped(&self) -> bool { + self.is_stopped + } + + /// Set a mailbox to Fsm, which should be used to send message to itself. + fn set_mailbox(&mut self, mailbox: Cow<'_, BasicMailbox>) + where + Self: Sized, + { + self.mailbox = Some(mailbox.into_owned()); + } + + /// Take the mailbox from Fsm. Implementation should ensure there will be + /// no reference to mailbox after calling this method. + fn take_mailbox(&mut self) -> Option> + where + Self: Sized, + { + self.mailbox.take() + } +} + +pub struct PeerFsmDelegate<'a, EK: KvEngine, ER: RaftEngine, T> { + fsm: &'a mut PeerFsm, + store_ctx: &'a mut StoreContext, +} + +impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { + pub fn new(fsm: &'a mut PeerFsm, store_ctx: &'a mut StoreContext) -> Self { + Self { fsm, store_ctx } + } + + pub fn handle_msgs(&self, peer_msgs_buf: &mut Vec>) { + for msg in peer_msgs_buf.drain(..) { + // TODO: handle the messages. + } + } } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index b568454e2c9..091b3fe11e9 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -1,3 +1,65 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -pub struct StoreFsm {} +use batch_system::Fsm; +use crossbeam::channel::TryRecvError; +use kvproto::metapb::Store; +use raftstore::store::Config; +use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; + +use crate::{batch::StoreContext, StoreMsg}; + +pub struct StoreFsm { + store: Store, + receiver: Receiver, +} + +impl StoreFsm { + pub fn new(cfg: &Config, store: Store) -> (LooseBoundedSender, Box) { + let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); + let fsm = Box::new(StoreFsm { + store, + receiver: rx, + }); + (tx, fsm) + } + + /// Fetches messages to `store_msg_buf`. It will stop when the buffer is full. + /// + /// Returns how many messages are fetched. + pub fn recv(&self, store_msg_buf: &mut Vec) -> usize { + let l = store_msg_buf.len(); + for i in l..store_msg_buf.capacity() { + match self.receiver.try_recv() { + Ok(msg) => store_msg_buf.push(msg), + Err(_) => return i - l, + } + } + store_msg_buf.capacity() - l + } +} + +impl Fsm for StoreFsm { + type Message = StoreMsg; + + #[inline] + fn is_stopped(&self) -> bool { + false + } +} + +pub struct StoreFsmDelegate<'a, T> { + fsm: &'a mut StoreFsm, + store_ctx: &'a mut StoreContext, +} + +impl<'a, T> StoreFsmDelegate<'a, T> { + pub fn new(fsm: &'a mut StoreFsm, store_ctx: &'a mut StoreContext) -> Self { + Self { fsm, store_ctx } + } + + pub fn handle_msgs(&self, store_msg_buf: &mut Vec) { + for msg in store_msg_buf.drain(..) { + // TODO: handle the messages. + } + } +} diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index fac4511cfd4..220fa0b2d33 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -10,12 +10,15 @@ #![allow(unused)] +mod batch; mod bootstrap; mod fsm; mod operation; mod raft; mod router; +pub(crate) use batch::StoreContext; +pub use batch::{create_store_batch_system, StoreSystem}; pub use bootstrap::Bootstrap; pub use raftstore::{Error, Result}; pub use router::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 4af2c1ccddb..e2ccb068cbc 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -1,9 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::RaftEngine; -use kvproto::metapb; -use raft::RawNode; -use raftstore::store::Config; +use std::sync::Arc; + +use engine_traits::{KvEngine, RaftEngine, TabletFactory}; +use kvproto::{metapb, raft_serverpb::RegionLocalState}; +use raft::{RawNode, INVALID_ID}; +use raftstore::store::{util::find_peer, Config}; use slog::{o, Logger}; use tikv_util::{box_err, config::ReadableSize}; @@ -11,37 +13,35 @@ use super::storage::Storage; use crate::Result; /// A peer that delegates commands between state machine and raft. -pub struct Peer { - region_id: u64, - peer: metapb::Peer, +pub struct Peer { raft_group: RawNode>, + tablet: Option, logger: Logger, } -impl Peer { +impl Peer { + /// Creates a new peer. + /// + /// If peer is destroyed, None is returned. pub fn new( cfg: &Config, + region_id: u64, store_id: u64, - region: metapb::Region, + tablet_factory: &dyn TabletFactory, engine: ER, - logger: Logger, - ) -> Result { - let peer = region - .get_peers() - .iter() - .find(|p| p.get_store_id() == store_id && p.get_id() != raft::INVALID_ID); - let peer = match peer { - Some(p) => p, - None => return Err(box_err!("no valid peer found in {:?}", region.get_peers())), + logger: &Logger, + ) -> Result> { + let s = match Storage::new(region_id, store_id, engine, logger)? { + Some(s) => s, + None => return Ok(None), }; - let l = logger.new(o!("peer_id" => peer.id)); - - let ps = Storage::new(engine, l.clone()); + let logger = s.logger().clone(); - let applied_index = ps.applied_index(); + let applied_index = s.apply_state().get_applied_index(); + let peer_id = s.peer().get_id(); let raft_cfg = raft::Config { - id: peer.get_id(), + id: peer_id, election_tick: cfg.raft_election_timeout_ticks, heartbeat_tick: cfg.raft_heartbeat_ticks, min_election_tick: cfg.raft_min_election_timeout_ticks, @@ -56,14 +56,49 @@ impl Peer { ..Default::default() }; - Ok(Peer { - region_id: region.get_id(), - peer: peer.clone(), - raft_group: RawNode::new(&raft_cfg, ps, &logger)?, - logger: l, - }) + let tablet_index = s.region_state().get_tablet_index(); + let tablet = if tablet_index != 0 { + if !tablet_factory.exists(region_id, tablet_index) { + return Err(box_err!( + "missing tablet {} for region {}", + tablet_index, + region_id + )); + } + // TODO: Perhaps we should stop create the tablet automatically. + Some(tablet_factory.open_tablet(region_id, tablet_index)?) + } else { + None + }; + + Ok(Some(Peer { + raft_group: RawNode::new(&raft_cfg, s, &logger)?, + tablet, + logger, + })) + } + + #[inline] + pub fn region_id(&self) -> u64 { + self.raft_group.store().region_state().get_region().get_id() + } + + #[inline] + pub fn peer_id(&self) -> u64 { + self.raft_group.store().peer().get_id() + } + + #[inline] + pub fn storage(&self) -> &Storage { + self.raft_group.store() + } + + #[inline] + pub fn tablet(&self) -> &Option { + &self.tablet } + #[inline] pub fn logger(&self) -> &Logger { &self.logger } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index f6dcad9578c..fc25e12bad3 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -2,23 +2,25 @@ use engine_traits::{RaftEngine, RaftLogBatch}; use kvproto::{ - metapb::Region, - raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState}, + metapb::{self, Region}, + raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, }; use raft::{ eraftpb::{Entry, Snapshot}, - GetEntriesContext, RaftState, + GetEntriesContext, RaftState, INVALID_ID, }; -use raftstore::store::{RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; -use slog::Logger; +use raftstore::store::{util::find_peer, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use slog::{o, Logger}; +use tikv_util::box_err; -use crate::Result; +use crate::{Error, Result}; pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Result<()> { let region_id = region.get_id(); let mut state = RegionLocalState::default(); state.set_region(region); + state.set_tablet_index(RAFT_INIT_LOG_INDEX); wb.put_region_state(region_id, &state)?; let mut apply_state = RaftApplyState::default(); @@ -41,19 +43,104 @@ pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Resul } /// A storage for raft. +/// +/// It's similar to `PeerStorage` in v1. +#[derive(Debug)] pub struct Storage { engine: ER, + peer: metapb::Peer, + region_state: RegionLocalState, + raft_state: RaftLocalState, + apply_state: RaftApplyState, logger: Logger, } -impl Storage { - pub fn new(engine: ER, logger: Logger) -> Storage { - Storage { engine, logger } +impl Storage { + /// Creates a new storage. + /// + /// All metadata should be initialized before calling this method. If the region is destroyed + /// `None` will be returned. + pub fn new( + region_id: u64, + store_id: u64, + engine: ER, + logger: &Logger, + ) -> Result>> { + let region_state = match engine.get_region_state(region_id) { + Ok(Some(s)) => s, + res => { + return Err(box_err!("failed to get region state: {:?}", res)); + } + }; + + if region_state.get_state() == PeerState::Tombstone { + return Ok(None); + } + + let peer = find_peer(region_state.get_region(), store_id); + let peer = match peer { + Some(p) if p.get_id() != INVALID_ID => p, + _ => { + return Err(box_err!("no valid peer found in {:?}", region_state)); + } + }; + + let logger = logger.new(o!("region_id" => region_id, "peer_id" => peer.get_id())); + + let raft_state = match engine.get_raft_state(region_id) { + Ok(Some(s)) => s, + res => { + return Err(box_err!("failed to get raft state: {:?}", res)); + } + }; + + let apply_state = match engine.get_apply_state(region_id) { + Ok(Some(s)) => s, + res => { + return Err(box_err!("failed to get apply state: {:?}", res)); + } + }; + + let mut s = Storage { + engine, + peer: peer.clone(), + region_state, + raft_state, + apply_state, + logger, + }; + s.validate_state()?; + Ok(Some(s)) } - pub fn applied_index(&self) -> u64 { + fn validate_state(&mut self) -> Result<()> { unimplemented!() } + + #[inline] + pub fn region_state(&self) -> &RegionLocalState { + &self.region_state + } + + #[inline] + pub fn raft_state(&self) -> &RaftLocalState { + &self.raft_state + } + + #[inline] + pub fn apply_state(&self) -> &RaftApplyState { + &self.apply_state + } + + #[inline] + pub fn peer(&self) -> &metapb::Peer { + &self.peer + } + + #[inline] + pub fn logger(&self) -> &Logger { + &self.logger + } } impl raft::Storage for Storage { @@ -120,6 +207,7 @@ mod tests { let local_state = raft_engine.get_region_state(4).unwrap().unwrap(); assert_eq!(local_state.get_state(), PeerState::Normal); assert_eq!(*local_state.get_region(), region); + assert_eq!(local_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); let raft_state = raft_engine.get_raft_state(4).unwrap().unwrap(); assert_eq!(raft_state.get_last_index(), RAFT_INIT_LOG_INDEX); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 02c8d4fe650..fad93ac54d8 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2488,6 +2488,9 @@ where ExtraMessageType::MsgHibernateResponse => { self.on_hibernate_response(msg.get_from_peer()); } + ExtraMessageType::MsgRejectRaftLogCausedByMemoryUsage => { + unimplemented!() + } } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 63b0a583030..c46cafb7e48 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -424,6 +424,22 @@ pub struct PeerTickBatch { pub wait_duration: Duration, } +impl PeerTickBatch { + #[inline] + pub fn schedule(&mut self, timer: &SteadyTimer) { + if self.ticks.is_empty() { + return; + } + let peer_ticks = mem::take(&mut self.ticks); + let f = timer.delay(self.wait_duration).compat().map(move |_| { + for tick in peer_ticks { + tick(); + } + }); + poll_future_notify(f); + } +} + impl Clone for PeerTickBatch { fn clone(&self) -> PeerTickBatch { PeerTickBatch { @@ -760,21 +776,7 @@ impl RaftPoller { fn flush_ticks(&mut self) { for t in PeerTick::get_all_ticks() { let idx = *t as usize; - if self.poll_ctx.tick_batch[idx].ticks.is_empty() { - continue; - } - let peer_ticks = mem::take(&mut self.poll_ctx.tick_batch[idx].ticks); - let f = self - .poll_ctx - .timer - .delay(self.poll_ctx.tick_batch[idx].wait_duration) - .compat() - .map(move |_| { - for tick in peer_ticks { - tick(); - } - }); - poll_future_notify(f); + self.poll_ctx.tick_batch[idx].schedule(&self.poll_ctx.timer); } } } diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 8445a0a97aa..9b3e38aa9cc 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -9,6 +9,7 @@ extern crate test; use std::{ + cmp, collections::{ hash_map::Entry, vec_deque::{Iter, VecDeque}, @@ -584,6 +585,15 @@ pub fn build_on_master_branch() -> bool { option_env!("TIKV_BUILD_GIT_BRANCH").map_or(false, |b| "master" == b) } +/// Set the capacity of a vector to the given capacity. +pub fn set_vec_capacity(v: &mut Vec, cap: usize) { + match cap.cmp(&v.capacity()) { + cmp::Ordering::Less => v.shrink_to(cap), + cmp::Ordering::Greater => v.reserve_exact(cap - v.len()), + cmp::Ordering::Equal => {} + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 9f2507562e6..918d348f898 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -1290,19 +1290,19 @@ mod tests { use tikv_util::config::ReadableDuration; /// Asserted that the snapshot can be retrieved in 500ms. - const SNAPSHOT_DURATION_MS: i64 = 500; + const SNAPSHOT_DURATION_MS: u64 = 500; /// Asserted that the delay caused by OS scheduling other tasks is smaller than 200ms. /// This is mostly for CI. - const HANDLE_ERROR_MS: i64 = 200; + const HANDLE_ERROR_MS: u64 = 200; /// The acceptable error range for a coarse timer. Note that we use CLOCK_MONOTONIC_COARSE /// which can be slewed by time adjustment code (e.g., NTP, PTP). - const COARSE_ERROR_MS: i64 = 50; + const COARSE_ERROR_MS: u64 = 50; /// The duration that payload executes. - const PAYLOAD_SMALL: i64 = 3000; - const PAYLOAD_LARGE: i64 = 6000; + const PAYLOAD_SMALL: u64 = 3000; + const PAYLOAD_LARGE: u64 = 6000; let engine = TestEngineBuilder::new().build().unwrap(); @@ -1339,7 +1339,7 @@ mod tests { req_with_exec_detail.context.set_record_time_stat(true); { - let mut wait_time: i64 = 0; + let mut wait_time: u64 = 0; // Request 1: Unary, success response. let handler_builder = Box::new(|_, _: &_| { @@ -1388,7 +1388,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_wait_wall_time_ms(), - wait_time - HANDLE_ERROR_MS - COARSE_ERROR_MS + wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1417,7 +1417,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_wait_wall_time_ms(), - wait_time - HANDLE_ERROR_MS - COARSE_ERROR_MS + wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1504,7 +1504,7 @@ mod tests { } { - let mut wait_time: i64 = 0; + let mut wait_time: u64 = 0; // Request 1: Unary, success response. let handler_builder = Box::new(|_, _: &_| { @@ -1569,7 +1569,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_wait_wall_time_ms(), - wait_time - HANDLE_ERROR_MS - COARSE_ERROR_MS + wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1602,7 +1602,7 @@ mod tests { .get_exec_details() .get_time_detail() .get_wait_wall_time_ms(), - wait_time - HANDLE_ERROR_MS - COARSE_ERROR_MS + wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) ); assert_lt!( resp[0] @@ -1632,7 +1632,7 @@ mod tests { .get_exec_details() .get_time_detail() .get_wait_wall_time_ms(), - wait_time - HANDLE_ERROR_MS - COARSE_ERROR_MS + wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) ); assert_lt!( resp[1] diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index df43ad39a69..064073825f4 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -196,9 +196,9 @@ impl Tracker { let mut exec_details = kvrpcpb::ExecDetails::default(); let mut td = kvrpcpb::TimeDetail::default(); - td.set_process_wall_time_ms(time::duration_to_ms(measure) as i64); - td.set_wait_wall_time_ms(time::duration_to_ms(self.wait_time) as i64); - td.set_kv_read_wall_time_ms(self.scan_process_time_ms as i64); + td.set_process_wall_time_ms(time::duration_to_ms(measure)); + td.set_wait_wall_time_ms(time::duration_to_ms(self.wait_time)); + td.set_kv_read_wall_time_ms(self.scan_process_time_ms); exec_details.set_time_detail(td.clone()); let detail = self.total_storage_stats.scan_detail(); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 5b084826861..336580dda58 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1400,10 +1400,9 @@ fn future_get( tracker.write_scan_detail(scan_detail_v2); }); let time_detail = exec_detail_v2.mut_time_detail(); - time_detail.set_kv_read_wall_time_ms(duration_ms as i64); - time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms as i64); - time_detail - .set_process_wall_time_ms(stats.latency_stats.process_wall_time_ms as i64); + time_detail.set_kv_read_wall_time_ms(duration_ms); + time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms); + time_detail.set_process_wall_time_ms(stats.latency_stats.process_wall_time_ms); match val { Some(val) => resp.set_value(val), None => resp.set_not_found(true), @@ -1496,10 +1495,9 @@ fn future_batch_get( tracker.write_scan_detail(scan_detail_v2); }); let time_detail = exec_detail_v2.mut_time_detail(); - time_detail.set_kv_read_wall_time_ms(duration_ms as i64); - time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms as i64); - time_detail - .set_process_wall_time_ms(stats.latency_stats.process_wall_time_ms as i64); + time_detail.set_kv_read_wall_time_ms(duration_ms); + time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms); + time_detail.set_process_wall_time_ms(stats.latency_stats.process_wall_time_ms); resp.set_pairs(pairs.into()); } Err(e) => { From 110059e68ae87880b348571ad359a51686396dda Mon Sep 17 00:00:00 2001 From: haojinming Date: Mon, 11 Jul 2022 13:21:05 +0800 Subject: [PATCH 0065/1149] cdc: Resolved-ts for RawKV (#12866) ref tikv/tikv#11965 Signed-off-by: haojinming Co-authored-by: Ping Yu --- Cargo.lock | 1 + .../backup-stream/src/subscription_track.rs | 2 +- components/causal_ts/src/lib.rs | 15 +- components/causal_ts/src/observer.rs | 39 +- components/cdc/Cargo.toml | 1 + components/cdc/src/delegate.rs | 57 ++- components/cdc/src/endpoint.rs | 449 +++++++++++++++++- components/cdc/src/initializer.rs | 2 +- components/cdc/src/metrics.rs | 7 + components/cdc/src/observer.rs | 48 +- components/resolved_ts/src/endpoint.rs | 2 +- components/resolved_ts/src/resolver.rs | 122 ++++- components/server/src/server.rs | 36 +- components/test_raftstore/src/server.rs | 5 +- src/config.rs | 15 + src/storage/kv/test_engine_builder.rs | 4 +- tests/integrations/config/mod.rs | 1 + 17 files changed, 726 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bdb55d28de2..54b315afd36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -758,6 +758,7 @@ version = "0.0.1" dependencies = [ "api_version", "bitflags", + "causal_ts", "collections", "concurrency_manager", "criterion", diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index f3852fe9782..9199f508d62 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -304,7 +304,7 @@ impl TwoPhaseResolver { return min_ts.min(stable_ts); } - self.resolver.resolve(min_ts) + self.resolver.resolve(min_ts).min() } pub fn resolved_ts(&self) -> TimeStamp { diff --git a/components/causal_ts/src/lib.rs b/components/causal_ts/src/lib.rs index 3507dc17926..ea5fe3bdcc3 100644 --- a/components/causal_ts/src/lib.rs +++ b/components/causal_ts/src/lib.rs @@ -15,7 +15,7 @@ mod observer; pub use observer::*; use txn_types::TimeStamp; -use crate::errors::Result; +pub use crate::errors::Result; /// Trait of causal timestamp provider. pub trait CausalTsProvider: Send + Sync { @@ -28,6 +28,10 @@ pub trait CausalTsProvider: Send + Sync { } } +pub trait RawTsTracker: Send + Sync + Clone { + fn track_ts(&self, region_id: u64, ts: TimeStamp) -> Result<()>; +} + pub mod tests { use std::sync::{ atomic::{AtomicU64, Ordering}, @@ -55,4 +59,13 @@ pub mod tests { Ok(self.ts.fetch_add(1, Ordering::Relaxed).into()) } } + + #[derive(Clone, Default)] + pub struct DummyRawTsTracker {} + + impl RawTsTracker for DummyRawTsTracker { + fn track_ts(&self, _region_id: u64, _ts: TimeStamp) -> Result<()> { + Ok(()) + } + } } diff --git a/components/causal_ts/src/observer.rs b/components/causal_ts/src/observer.rs index c89d480eddd..8d2c5abc95c 100644 --- a/components/causal_ts/src/observer.rs +++ b/components/causal_ts/src/observer.rs @@ -18,19 +18,21 @@ use raftstore::{ }, }; -use crate::CausalTsProvider; +use crate::{CausalTsProvider, RawTsTracker}; /// CausalObserver appends timestamp for RawKV V2 data, /// and invoke causal_ts_provider.flush() on specified event, e.g. leader transfer, snapshot apply. /// Should be used ONLY when API v2 is enabled. -pub struct CausalObserver { +pub struct CausalObserver { causal_ts_provider: Arc, + ts_tracker: Tk, } -impl Clone for CausalObserver { +impl Clone for CausalObserver { fn clone(&self) -> Self { Self { causal_ts_provider: self.causal_ts_provider.clone(), + ts_tracker: self.ts_tracker.clone(), } } } @@ -38,9 +40,12 @@ impl Clone for CausalObserver { // Causal observer's priority should be higher than all other observers, to avoid being bypassed. const CAUSAL_OBSERVER_PRIORITY: u32 = 0; -impl CausalObserver { - pub fn new(causal_ts_provider: Arc) -> Self { - Self { causal_ts_provider } +impl CausalObserver { + pub fn new(causal_ts_provider: Arc, ts_tracker: Tk) -> Self { + Self { + causal_ts_provider, + ts_tracker, + } } pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { @@ -61,7 +66,7 @@ impl CausalObserver { const REASON_LEADER_TRANSFER: &str = "leader_transfer"; const REASON_REGION_MERGE: &str = "region_merge"; -impl CausalObserver { +impl CausalObserver { fn flush_timestamp(&self, region: &Region, reason: &'static str) { fail::fail_point!("causal_observer_flush_timestamp", |_| ()); @@ -73,9 +78,9 @@ impl CausalObserver { } } -impl Coprocessor for CausalObserver {} +impl Coprocessor for CausalObserver {} -impl QueryObserver for CausalObserver { +impl QueryObserver for CausalObserver { fn pre_propose_query( &self, ctx: &mut ObserverContext<'_>, @@ -92,6 +97,12 @@ impl QueryObserver for CausalObserver { ts = Some(self.causal_ts_provider.get_ts().map_err(|err| { coprocessor::Error::Other(box_err!("Get causal timestamp error: {:?}", err)) })?); + // use prev ts as `resolved_ts` means the data with smaller or equal ts has already sink to cdc. + self.ts_tracker + .track_ts(region_id, ts.unwrap().prev()) + .map_err(|err| { + coprocessor::Error::Other(box_err!("track ts err: {:?}", err)) + })?; } ApiV2::append_ts_on_encoded_bytes(req.mut_put().mut_key(), ts.unwrap()); @@ -102,7 +113,7 @@ impl QueryObserver for CausalObserver { } } -impl RoleObserver for CausalObserver { +impl RoleObserver for CausalObserver { /// Observe becoming leader, to flush CausalTsProvider. fn on_role_change(&self, ctx: &mut ObserverContext<'_>, role_change: &RoleChange) { // In scenario of frequent leader transfer, the observing of change from @@ -119,7 +130,7 @@ impl RoleObserver for CausalObserver { } } -impl RegionChangeObserver for CausalObserver { +impl RegionChangeObserver for CausalObserver { fn on_region_changed( &self, ctx: &mut ObserverContext<'_>, @@ -155,14 +166,14 @@ pub mod tests { use txn_types::{Key, TimeStamp}; use super::*; - use crate::BatchTsoProvider; + use crate::{tests::DummyRawTsTracker, BatchTsoProvider}; - fn init() -> CausalObserver> { + fn init() -> CausalObserver, DummyRawTsTracker> { let pd_cli = Arc::new(TestPdClient::new(0, true)); pd_cli.set_tso(100.into()); let causal_ts_provider = Arc::new(block_on(BatchTsoProvider::new_opt(pd_cli, Duration::ZERO, 100)).unwrap()); - CausalObserver::new(causal_ts_provider) + CausalObserver::new(causal_ts_provider, DummyRawTsTracker::default()) } #[test] diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index f2e2dfd57ce..255ef552c73 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -30,6 +30,7 @@ failpoints = ["tikv/failpoints"] [dependencies] api_version = { path = "../api_version" } bitflags = "1.0" +causal_ts = { path = "../causal_ts" } collections = { path = "../collections" } concurrency_manager = { path = "../concurrency_manager", default-features = false } crossbeam = "0.8" diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 2fb971a4024..55a551490ac 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -28,7 +28,7 @@ use raftstore::{ store::util::compare_region_epoch, Error as RaftStoreError, }; -use resolved_ts::Resolver; +use resolved_ts::{ResolvedTs, Resolver}; use tikv::storage::{txn::TxnEntry, Statistics}; use tikv_util::{debug, info, warn}; use txn_types::{Key, Lock, LockType, TimeStamp, WriteBatchFlags, WriteRef, WriteType}; @@ -225,6 +225,8 @@ impl Drop for Pending { enum PendingLock { Track { key: Vec, start_ts: TimeStamp }, Untrack { key: Vec }, + RawTrack { ts: TimeStamp }, + RawUntrack { ts: TimeStamp }, } /// A CDC delegate of a raftstore region peer. @@ -244,7 +246,6 @@ pub struct Delegate { pending: Option, txn_extra_op: Arc>, failed: bool, - has_resolver: bool, } impl Delegate { @@ -259,14 +260,9 @@ impl Delegate { pending: Some(Pending::default()), txn_extra_op, failed: false, - has_resolver: false, } } - pub fn has_resolver(&self) -> bool { - self.has_resolver - } - /// Let downstream subscribe the delegate. /// Return error if subscribe fails and the `Delegate` won't be changed. pub fn subscribe(&mut self, downstream: Downstream) -> Result<()> { @@ -274,9 +270,6 @@ impl Delegate { // Check if the downstream is out dated. self.check_epoch_on_ready(&downstream)?; } - if downstream.kv_api == ChangeDataRequestKvApi::TiDb { - self.has_resolver = true; - } self.add_downstream(downstream); Ok(()) } @@ -401,6 +394,8 @@ impl Delegate { match lock { PendingLock::Track { key, start_ts } => resolver.track_lock(start_ts, key, None), PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), + PendingLock::RawTrack { ts } => resolver.raw_track_lock(ts), + PendingLock::RawUntrack { ts } => resolver.raw_untrack_lock(ts), } } self.resolver = Some(resolver); @@ -416,7 +411,7 @@ impl Delegate { } /// Try advance and broadcast resolved ts. - pub fn on_min_ts(&mut self, min_ts: TimeStamp) -> Option { + pub fn on_min_ts(&mut self, min_ts: TimeStamp) -> Option { if self.resolver.is_none() { debug!("cdc region resolver not ready"; "region_id" => self.region_id, "min_ts" => min_ts); @@ -426,9 +421,9 @@ impl Delegate { let resolver = self.resolver.as_mut().unwrap(); let resolved_ts = resolver.resolve(min_ts); debug!("cdc resolved ts updated"; - "region_id" => self.region_id, "resolved_ts" => resolved_ts); + "region_id" => self.region_id, "resolved_ts" => ?resolved_ts); CDC_RESOLVED_TS_GAP_HISTOGRAM - .observe((min_ts.physical() - resolved_ts.physical()) as f64 / 1000f64); + .observe((min_ts.physical() - resolved_ts.min().physical()) as f64 / 1000f64); Some(resolved_ts) } @@ -613,10 +608,42 @@ impl Delegate { rows.push(v); } self.sink_downstream(rows, index, ChangeDataRequestKvApi::TiDb)?; + self.sink_raw_downstream(raw_rows, index) + } - self.sink_downstream(raw_rows, index, ChangeDataRequestKvApi::RawKv)?; + fn sink_raw_downstream(&mut self, entries: Vec, index: u64) -> Result<()> { + if entries.is_empty() { + return Ok(()); + } + // the entry's timestamp is non-decreasing, the last has the max ts. + let max_raw_ts = TimeStamp::from(entries.last().unwrap().commit_ts); + match self.resolver { + Some(ref mut resolver) => { + // use prev ts, see reason at CausalObserver::pre_propose_query + resolver.raw_untrack_lock(max_raw_ts.prev()); + } + None => { + assert!(self.pending.is_some(), "region resolver not ready"); + let pending = self.pending.as_mut().unwrap(); + pending + .locks + .push(PendingLock::RawUntrack { ts: max_raw_ts }); + } + } + self.sink_downstream(entries, index, ChangeDataRequestKvApi::RawKv) + } - Ok(()) + pub fn raw_track_ts(&mut self, ts: TimeStamp) { + match self.resolver { + Some(ref mut resolver) => { + resolver.raw_track_lock(ts); + } + None => { + assert!(self.pending.is_some(), "region resolver not ready"); + let pending = self.pending.as_mut().unwrap(); + pending.locks.push(PendingLock::RawTrack { ts }); + } + } } fn sink_downstream( diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 54686424461..7a67c2f9d85 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -17,7 +17,7 @@ use futures::compat::Future01CompatExt; use grpcio::Environment; use kvproto::{ cdcpb::{ - ChangeDataRequest, ChangeDataRequestKvApi, ClusterIdMismatch as ErrorClusterIdMismatch, + ChangeDataRequest, ClusterIdMismatch as ErrorClusterIdMismatch, Compatibility as ErrorCompatibility, DuplicateRequest as ErrorDuplicateRequest, Error as EventError, Event, Event_oneof_event, ResolvedTs, }, @@ -40,7 +40,7 @@ use resolved_ts::Resolver; use security::SecurityManager; use tikv::{config::CdcConfig, storage::Statistics}; use tikv_util::{ - debug, error, impl_display_as_debug, info, + box_err, debug, error, impl_display_as_debug, info, sys::thread::ThreadBuildWrapper, time::Limiter, timer::SteadyTimer, @@ -70,6 +70,8 @@ const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s const WARN_RESOLVED_TS_LAG_THRESHOLD: Duration = Duration::from_secs(600); // Suppress repeat resolved ts lag warning. const WARN_RESOLVED_TS_COUNT_THRESHOLD: usize = 10; +// if raw region's count is more than 10, begin detect outlier. +const RAW_RESOLVED_TS_OUTLIER_COUNT_THRESHOLD: usize = 10; pub enum Deregister { Downstream { @@ -170,6 +172,10 @@ pub enum Task { TxnExtra(TxnExtra), Validate(Validate), ChangeConfig(ConfigChange), + RawTrackTs { + region_id: u64, + ts: TimeStamp, + }, } impl_display_as_debug!(Task); @@ -241,11 +247,19 @@ impl fmt::Debug for Task { .field("type", &"change_config") .field("change", change) .finish(), + Task::RawTrackTs { + ref region_id, + ref ts, + } => de + .field("type", &"track_ts") + .field("region_id", ®ion_id) + .field("ts", &ts) + .finish(), } } } -#[derive(PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] struct ResolvedRegion { region_id: u64, resolved_ts: TimeStamp, @@ -315,6 +329,52 @@ impl ResolvedRegionHeap { } } +// need to sort all timestamps, vec.sort() is more efficient. +struct ResolvedRegionVec { + vec: Vec, +} + +impl ResolvedRegionVec { + fn push(&mut self, region_id: u64, resolved_ts: TimeStamp) { + self.vec.push(ResolvedRegion { + region_id, + resolved_ts, + }) + } + // extreme outier match the following two conditions: + // 1. https://en.wikipedia.org/wiki/Box_plot + // 2. the gap with min_ts is larger than raw_min_ts_outlier_threshold. + // return one region at maximum. + fn get_extreme_outlier( + &mut self, + min_ts: TimeStamp, + threshold: Duration, + ) -> Option { + // When the number is small, the confidence of outlier detection is low. + if self.vec.len() > RAW_RESOLVED_TS_OUTLIER_COUNT_THRESHOLD { + self.vec.sort(); + let size = self.vec.len(); + let q1_ts = self.vec[(size + 1) / 4].resolved_ts; + let q3_ts = self.vec[3 * (size + 1) / 4].resolved_ts; + let delta = q3_ts.physical().saturating_sub(q1_ts.physical()); + let first_resolved_region = &self.vec[0]; + if q1_ts + .physical() + .saturating_sub(first_resolved_region.resolved_ts.physical()) + > 3 * delta + && Duration::from_millis( + min_ts + .physical() + .saturating_sub(first_resolved_region.resolved_ts.physical()), + ) > threshold + { + return Some(first_resolved_region.to_owned()); + } + } + None + } +} + pub struct Endpoint { cluster_id: u64, @@ -694,10 +754,6 @@ impl, E: KvEngine> Endpoint { let checkpoint_ts = request.checkpoint_ts; let sched = self.scheduler.clone(); - // Now resolver is only used by tidb downstream. - // Resolver is created when the first tidb cdc request arrive. - let is_build_resolver = kv_api == ChangeDataRequestKvApi::TiDb && !delegate.has_resolver(); - let downstream_ = downstream.clone(); if let Err(err) = delegate.subscribe(downstream) { let error_event = err.into_error_event(region_id); @@ -739,7 +795,7 @@ impl, E: KvEngine> Endpoint { max_scan_batch_size: self.max_scan_batch_size, observe_id, checkpoint_ts: checkpoint_ts.into(), - build_resolver: is_build_resolver, + build_resolver: is_new_delegate, ts_filter_ratio: self.config.incremental_scan_ts_filter_ratio, kv_api, }; @@ -828,10 +884,44 @@ impl, E: KvEngine> Endpoint { } } + // detect outlier raw regions, schedule deregister for outlier raw regions. + fn handle_raw_outlier_regions( + &self, + raw_resolved_regions: &mut ResolvedRegionVec, + min_ts: TimeStamp, + ) { + if let Some(region) = raw_resolved_regions + .get_extreme_outlier(min_ts, self.config.raw_min_ts_outlier_threshold.into()) + { + if let Some(delegate) = self.capture_regions.get(®ion.region_id) { + let observe_id = delegate.handle.id; + let deregister = Deregister::Delegate { + region_id: region.region_id, + observe_id, + err: Error::Other(box_err!("raw region dead lock")), + }; + warn!( + "cdc deregister raw region as resolved_ts has much lag, dead lock may occurs."; + "region_id" => region.region_id, + "resolved_ts" => region.resolved_ts, + ); + if let Err(e) = self.scheduler.schedule(Task::Deregister(deregister)) { + error!("cdc schedule cdc task failed"; "error" => ?e); + } + CDC_RAW_OUTLIER_RESOLVED_TS_GAP.observe( + Duration::from_millis(min_ts.physical() - region.resolved_ts.physical()) + .as_secs_f64(), + ); + } + } + } + fn on_min_ts(&mut self, regions: Vec, min_ts: TimeStamp, current_ts: TimeStamp) { // Reset resolved_regions to empty. let resolved_regions = &mut self.resolved_region_heap; resolved_regions.clear(); + // rawkv only, if user does not use rawkv apiv2, raw_resolved_regions should be empty. + let mut raw_resolved_regions = ResolvedRegionVec { vec: vec![] }; let total_region_count = regions.len(); self.min_resolved_ts = TimeStamp::max(); @@ -849,13 +939,18 @@ impl, E: KvEngine> Endpoint { advance_failed_stale += 1; } if let Some(resolved_ts) = delegate.on_min_ts(min_ts) { - if resolved_ts < self.min_resolved_ts { - self.min_resolved_ts = resolved_ts; + if resolved_ts.min() < self.min_resolved_ts { + self.min_resolved_ts = resolved_ts.min(); self.min_ts_region_id = region_id; } - resolved_regions.push(region_id, resolved_ts); + resolved_regions.push(region_id, resolved_ts.min()); + // The judge of raw region is not accuracy here, and we may miss at most one + // "normal" raw region. But this will not break the correctness of outlier detection. + if resolved_ts.is_min_ts_from_raw() { + raw_resolved_regions.push(region_id, resolved_ts.raw_ts) + } - if resolved_ts == old_resolved_ts { + if resolved_ts.min() == old_resolved_ts { advance_failed_same += 1; } else { advance_ok += 1; @@ -897,6 +992,9 @@ impl, E: KvEngine> Endpoint { let (normal_min_resolved_ts, normal_regions) = resolved_regions.to_hash_set(); self.broadcast_resolved_ts(outlier_min_resolved_ts, outlier_regions); self.broadcast_resolved_ts(normal_min_resolved_ts, normal_regions); + + // rawkv only, if user does not use rawkv apiv2, raw_resolved_regions should be empty. + self.handle_raw_outlier_regions(&mut raw_resolved_regions, min_ts); } fn broadcast_resolved_ts(&self, min_resolved_ts: TimeStamp, regions: HashSet) { @@ -1013,6 +1111,7 @@ impl, E: KvEngine> Endpoint { let tikv_clients = self.tikv_clients.clone(); let hibernate_regions_compatible = self.config.hibernate_regions_compatible; let region_read_progress = self.region_read_progress.clone(); + let observer = self.observer.clone(); let fut = async move { let _ = timeout.compat().await; @@ -1040,6 +1139,13 @@ impl, E: KvEngine> Endpoint { Err(err) => panic!("failed to regiester min ts event, error: {:?}", err), } + // If flush_causal_timestamp fails, cannot schedule MinTS task + // as new coming raw data may use timestamp smaller than min_ts + if let Err(e) = observer.flush_causal_timestamp() { + error!("cdc flush causal timestamp failed"; "err" => ?e); + return; + } + let gate = pd_client.feature_gate(); let regions = @@ -1139,6 +1245,15 @@ impl, E: KvEngine> Endpoint { fn on_open_conn(&mut self, conn: Conn) { self.connections.insert(conn.get_id(), conn); } + + fn on_raw_track_ts(&mut self, region_id: u64, ts: TimeStamp) { + if let Some(ref mut delegate) = self.capture_regions.get_mut(®ion_id) { + delegate.raw_track_ts(ts); + } else { + // delegate should not be none, as region is checked in CdcObserver::track_ts. + warn!("no delegate is found."; "region_id" => region_id); + } + } } impl, E: KvEngine> Runnable for Endpoint { @@ -1210,6 +1325,7 @@ impl, E: KvEngine> Runnable for Endpoint { } }, Task::ChangeConfig(change) => self.on_change_cfg(change), + Task::RawTrackTs { region_id, ts } => self.on_raw_track_ts(region_id, ts), } } } @@ -1272,7 +1388,10 @@ impl TxnExtraScheduler for CdcTxnExtraScheduler { #[cfg(test)] mod tests { - use std::ops::{Deref, DerefMut}; + use std::{ + assert_matches::assert_matches, + ops::{Deref, DerefMut}, + }; use engine_rocks::RocksEngine; use kvproto::{ @@ -1861,6 +1980,252 @@ mod tests { } } + #[test] + fn test_raw_track_ts() { + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(Duration::from_secs(60)), + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V2); + suite.add_region(1, 100); + let quota = crate::channel::MemoryQuota::new(usize::MAX); + let (tx, _) = channel::channel(1, quota); + + let conn = Conn::new(tx, String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + let mut req = ChangeDataRequest::default(); + let region_id = 1; + req.set_region_id(region_id); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::RawKv, + ); + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + version, + }); + assert_eq!(suite.endpoint.capture_regions.len(), 1); + let observe_id = suite.endpoint.capture_regions[®ion_id].handle.id; + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + // Schedule resolver ready (resolver is built by conn a). + let mut region = Region::default(); + region.id = region_id; + region.set_region_epoch(region_epoch); + let resolver = Resolver::new(region_id); + suite.run(Task::ResolverReady { + observe_id, + region, + resolver, + }); + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + + let ts = TimeStamp::compose(10, 0); + suite.run(Task::RawTrackTs { region_id, ts }); + let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); + let resolver = delegate.resolver.as_mut().unwrap(); + let raw_resolved_ts = resolver.resolve(TimeStamp::compose(20, 0)).min(); + assert_eq!(raw_resolved_ts, ts); + } + + #[test] + fn test_raw_pending_lock() { + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(Duration::from_secs(60)), + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V2); + suite.add_region(1, 100); + let quota = crate::channel::MemoryQuota::new(usize::MAX); + let (tx, _) = channel::channel(1, quota); + + let conn = Conn::new(tx, String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + let mut req = ChangeDataRequest::default(); + let region_id = 1; + req.set_region_id(region_id); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::RawKv, + ); + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + version, + }); + assert_eq!(suite.endpoint.capture_regions.len(), 1); + let observe_id = suite.endpoint.capture_regions[®ion_id].handle.id; + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + for i in 100..150 { + let ts = TimeStamp::compose(i, 0); + suite.run(Task::RawTrackTs { region_id, ts }); + } + let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); + // region is not ready, so raw lock in resolver, raw ts is added to delegate.pending. + assert_eq!(delegate.resolver.is_none(), true); + // Schedule resolver ready (resolver is built by conn a). + let mut region = Region::default(); + region.id = region_id; + region.set_region_epoch(region_epoch); + let resolver = Resolver::new(region_id); + suite.run(Task::ResolverReady { + observe_id, + region, + resolver, + }); + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + // after region ready, pending locks will be added back to resolver. + let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); + let resolver = delegate.resolver.as_mut().unwrap(); + let raw_resolved_ts = resolver.resolve(TimeStamp::compose(200, 0)).min(); + assert_eq!(raw_resolved_ts, TimeStamp::compose(100, 0)); + } + + #[test] + fn test_raw_dead_lock() { + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(Duration::from_secs(60)), + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V2); + let quota = crate::channel::MemoryQuota::new(usize::MAX); + let (tx, _) = channel::channel(1, quota); + let mut region_cnt = 0; + let mut start_ts: u64 = 200; + let region_ids: Vec = (1..50).collect(); + let dead_lock_region = 1; + let dead_lock_ts = TimeStamp::compose(1, 0); + let cur_tso = TimeStamp::compose(1000000, 0); + for region_id in region_ids.clone() { + suite.add_region(region_id, 100); + let conn = Conn::new(tx.clone(), String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + let mut req = ChangeDataRequest::default(); + req.set_region_id(region_id); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + region_id, + conn_id, + ChangeDataRequestKvApi::RawKv, + ); + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + version, + }); + region_cnt += 1; + assert_eq!(suite.endpoint.capture_regions.len(), region_cnt); + let observe_id = suite.endpoint.capture_regions[®ion_id].handle.id; + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + // Schedule resolver ready (resolver is built by conn a). + let mut region = Region::default(); + region.id = region_id; + region.set_region_epoch(region_epoch); + let resolver = Resolver::new(region_id); + suite.run(Task::ResolverReady { + observe_id, + region, + resolver, + }); + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + // let ts is same with region for testing convenience + // let first region has dead lock. + let ts = if region_id == dead_lock_region { + dead_lock_ts + } else { + TimeStamp::compose(start_ts, 0) + }; + start_ts += 1; + suite.run(Task::RawTrackTs { region_id, ts }); + let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); + let resolver = delegate.resolver.as_mut().unwrap(); + let raw_resolved_ts = resolver.resolve(cur_tso).min(); + assert_eq!(raw_resolved_ts, ts); + } + let ob_id = suite + .endpoint + .capture_regions + .get(&dead_lock_region) + .unwrap() + .handle + .id; + suite.run(Task::MinTS { + regions: region_ids, + min_ts: cur_tso, + current_ts: cur_tso, + }); + let task_recv = suite + .task_rx + .recv_timeout(Duration::from_millis(500)) + .unwrap() + .unwrap(); + assert_matches!(task_recv, + Task::Deregister(Deregister::Delegate {region_id, observe_id, ..}) if + region_id == dead_lock_region && observe_id == ob_id); + let gap = Duration::from_millis(cur_tso.physical() - dead_lock_ts.physical()).as_secs_f64(); + assert_eq!(CDC_RAW_OUTLIER_RESOLVED_TS_GAP.get_sample_count(), 1); + assert_eq!(CDC_RAW_OUTLIER_RESOLVED_TS_GAP.get_sample_sum(), gap); + suite.run(task_recv); + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + assert_eq!( + suite + .endpoint + .capture_regions + .get(&dead_lock_region) + .is_none(), + true + ); + } + #[test] fn test_feature_gate() { let cfg = CdcConfig { @@ -2434,4 +2799,62 @@ mod tests { heap1.clear(); assert!(heap1.heap.is_empty()); } + + #[test] + fn test_resolved_region_vec() { + let mut region_vec = ResolvedRegionVec { + vec: Vec::with_capacity(9), + }; + let threshold = Duration::from_secs(60); + for i in 0..9 { + region_vec.push(i, TimeStamp::compose(i, 0)); + } + // count is not enough, no outlier. + assert_eq!( + region_vec + .get_extreme_outlier(1.into(), threshold) + .is_none(), + true + ); + let mut region_vec2 = ResolvedRegionVec { + vec: Vec::with_capacity(1002), + }; + for i in 2000..3000 { + region_vec2.push(i, TimeStamp::compose(i, 0)); + } + // count is enough, but no one satisfy the outlier algorithm + // outlier boundary is: 2250 - 3 * 500 = 750 + assert_eq!( + region_vec2 + .get_extreme_outlier(TimeStamp::compose(60_010, 0), threshold) + .is_none(), + true + ); + // count become 1001, boundary: 2249 - 3 * 501 = 746, no outlier + region_vec2.push(747, TimeStamp::compose(747, 0)); + assert_eq!( + region_vec2 + .get_extreme_outlier(TimeStamp::compose(61_000, 0), threshold) + .is_none(), + true + ); + // count become 1002, boundary: 2248 - 3 * 502 = 742, but ts gap is not larger than 60s. + region_vec2.push(741, TimeStamp::compose(741, 0)); + assert_eq!( + region_vec2 + .get_extreme_outlier(TimeStamp::compose(60_741, 0), threshold) + .is_none(), + true + ); + // all conditions are satisfied, return one outlier. + assert_eq!( + region_vec2 + .get_extreme_outlier(TimeStamp::compose(60_742, 0), threshold) + .unwrap(), + ResolvedRegion { + region_id: 741, + resolved_ts: TimeStamp::compose(741, 0) + } + ); + } } diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index a5dcf094acf..e1feb0c9795 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -445,7 +445,7 @@ impl Initializer { fn finish_building_resolver(&self, mut resolver: Resolver, region: Region) { let observe_id = self.observe_id; - let rts = resolver.resolve(TimeStamp::zero()); + let rts = resolver.resolve(TimeStamp::zero()).min(); info!( "cdc resolver initialized and schedule resolver ready"; "region_id" => region.get_id(), diff --git a/components/cdc/src/metrics.rs b/components/cdc/src/metrics.rs index 0118b4d7916..969e3b371a4 100644 --- a/components/cdc/src/metrics.rs +++ b/components/cdc/src/metrics.rs @@ -205,6 +205,13 @@ lazy_static! { ) .unwrap(); + pub static ref CDC_RAW_OUTLIER_RESOLVED_TS_GAP: Histogram = register_histogram!( + "tikv_cdc_raw_outlier_resolved_ts_gap_seconds", + "Bucketed histogram of the gap between cdc raw outlier resolver_ts and current tso", + exponential_buckets(1.0, 2.0, 15).unwrap() // outlier threshold is 60s by default. + ) + .unwrap(); + pub static ref CDC_ROCKSDB_PERF_COUNTER_STATIC: PerfCounter = auto_flush_from!(CDC_ROCKSDB_PERF_COUNTER, PerfCounter); } diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index cf8503450c5..5779d5f7e06 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -2,6 +2,7 @@ use std::sync::{Arc, RwLock}; +use causal_ts::{CausalTsProvider, Error as CausalTsError, RawTsTracker, Result as CausalTsResult}; use collections::HashMap; use engine_traits::KvEngine; use fail::fail_point; @@ -9,7 +10,8 @@ use kvproto::metapb::{Peer, Region}; use raft::StateRole; use raftstore::{coprocessor::*, store::RegionSnapshot, Error as RaftStoreError}; use tikv::storage::Statistics; -use tikv_util::{error, warn, worker::Scheduler}; +use tikv_util::{box_err, error, warn, worker::Scheduler}; +use txn_types::TimeStamp; use crate::{ endpoint::{Deregister, Task}, @@ -28,6 +30,8 @@ pub struct CdcObserver { // A shared registry for managing observed regions. // TODO: it may become a bottleneck, find a better way to manage the registry. observe_regions: Arc>>, + + pub causal_ts_provider: Option>, } impl CdcObserver { @@ -39,9 +43,14 @@ impl CdcObserver { CdcObserver { sched, observe_regions: Arc::default(), + causal_ts_provider: None, } } + pub fn set_causal_ts_provider(&mut self, provider: Arc) { + self.causal_ts_provider = Some(provider); + } + pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { // use 0 as the priority of the cmd observer. CDC should have a higher priority than // the `resolved-ts`'s cmd observer @@ -89,6 +98,12 @@ impl CdcObserver { .get(®ion_id) .cloned() } + + pub fn flush_causal_timestamp(&self) -> CausalTsResult<()> { + self.causal_ts_provider + .as_ref() + .map_or(Ok(()), |provider| provider.flush()) + } } impl Coprocessor for CdcObserver {} @@ -192,6 +207,24 @@ impl RegionChangeObserver for CdcObserver { } } +impl RawTsTracker for CdcObserver { + fn track_ts(&self, region_id: u64, ts: TimeStamp) -> CausalTsResult<()> { + if self.is_subscribed(region_id).is_some() { + self.sched + .schedule(Task::RawTrackTs { region_id, ts }) + .map_err(|err| { + CausalTsError::Other(box_err!( + "sched raw track ts err: {:?}, region: {:?}, ts: {:?}", + err, + region_id, + ts + )) + })?; + } + Ok(()) + } +} + #[cfg(test)] mod tests { use std::time::Duration; @@ -318,6 +351,19 @@ mod tests { observer.on_role_change(&mut ctx, &RoleChange::new(StateRole::Leader)); rx.recv_timeout(Duration::from_millis(10)).unwrap_err(); + // track for unregistered region id. + observer.track_ts(2, 10.into()).unwrap(); + // no event for unregistered region id. + rx.recv_timeout(Duration::from_millis(10)).unwrap_err(); + observer.track_ts(1, 10.into()).unwrap(); + match rx.recv_timeout(Duration::from_millis(10)).unwrap().unwrap() { + Task::RawTrackTs { region_id, ts } => { + assert_eq!(region_id, 1); + assert_eq!(ts, 10.into()); + } + _ => panic!("unexpected task"), + }; + // unsubscribed fail if observer id is different. assert_eq!(observer.unsubscribe_region(1, ObserveID::new()), None); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index bf4f9ba881e..90e3a3b7912 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -491,7 +491,7 @@ where for region_id in regions.iter() { if let Some(observe_region) = self.regions.get_mut(region_id) { if let ResolverStatus::Ready = observe_region.resolver_status { - let resolved_ts = observe_region.resolver.resolve(ts); + let resolved_ts = observe_region.resolver.resolve(ts).min(); if resolved_ts < min_ts { min_ts = resolved_ts; } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 1669a0e8b65..12c7cbe0c56 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -1,6 +1,11 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp, collections::BTreeMap, sync::Arc}; +use std::{ + cmp, + cmp::Reverse, + collections::{BTreeMap, BinaryHeap}, + sync::Arc, +}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; @@ -8,6 +13,28 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; +#[derive(Debug, Clone, Copy)] +pub struct ResolvedTs { + pub raw_ts: TimeStamp, + pub txn_ts: TimeStamp, +} + +impl ResolvedTs { + pub fn default() -> ResolvedTs { + ResolvedTs { + raw_ts: TimeStamp::zero(), + txn_ts: TimeStamp::zero(), + } + } + pub fn min(&self) -> TimeStamp { + cmp::min(self.raw_ts, self.txn_ts) + } + + pub fn is_min_ts_from_raw(&self) -> bool { + self.raw_ts < self.txn_ts + } +} + // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. pub struct Resolver { @@ -16,8 +43,11 @@ pub struct Resolver { locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, + // raw ts, depend on "non-decreasing" of entries' timestamp in the same region. + // BinaryHeap is max heap, so reverse order to get a min heap. Only used in rawkv. + raw_lock_ts_heap: BinaryHeap>, // The timestamps that guarantees no more commit will happen before. - resolved_ts: TimeStamp, + resolved_ts: ResolvedTs, // The highest index `Resolver` had been tracked tracked_index: u64, // The region read progress used to utilize `resolved_ts` to serve stale read request @@ -60,9 +90,10 @@ impl Resolver { ) -> Resolver { Resolver { region_id, - resolved_ts: TimeStamp::zero(), + resolved_ts: ResolvedTs::default(), locks_by_key: HashMap::default(), lock_ts_heap: BTreeMap::new(), + raw_lock_ts_heap: BinaryHeap::new(), read_progress, tracked_index: 0, min_ts: TimeStamp::zero(), @@ -71,7 +102,7 @@ impl Resolver { } pub fn resolved_ts(&self) -> TimeStamp { - self.resolved_ts + self.resolved_ts.min() } pub fn size(&self) -> usize { @@ -145,11 +176,27 @@ impl Resolver { } } + pub fn raw_track_lock(&mut self, ts: TimeStamp) { + debug!("raw track ts {}, region {}", ts, self.region_id); + self.raw_lock_ts_heap.push(Reverse(ts)); + } + + // untrack all timestamps smaller than input ts, depend on the raw ts in one region is non-decreasing + pub fn raw_untrack_lock(&mut self, ts: TimeStamp) { + debug!("raw untrack ts before {}, region {}", ts, self.region_id); + while let Some(&Reverse(min_ts)) = self.raw_lock_ts_heap.peek() { + if min_ts > ts { + break; + } + self.raw_lock_ts_heap.pop(); + } + } + /// Try to advance resolved ts. /// /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. - pub fn resolve(&mut self, min_ts: TimeStamp) -> TimeStamp { + pub fn resolve(&mut self, min_ts: TimeStamp) -> ResolvedTs { // The `Resolver` is stopped, not need to advance, just return the current `resolved_ts` if self.stopped { return self.resolved_ts; @@ -160,9 +207,8 @@ impl Resolver { let min_start_ts = min_lock.unwrap_or(min_ts); // No more commit happens before the ts. - let new_resolved_ts = cmp::min(min_start_ts, min_ts); - - if self.resolved_ts >= new_resolved_ts { + let new_txn_resolved_ts = cmp::min(min_start_ts, min_ts); + if self.resolved_ts.txn_ts >= new_txn_resolved_ts { let label = if has_lock { "has_lock" } else { "stale_ts" }; RTS_RESOLVED_FAIL_ADVANCE_VEC .with_label_values(&[label]) @@ -170,18 +216,25 @@ impl Resolver { } // Resolved ts never decrease. - self.resolved_ts = cmp::max(self.resolved_ts, new_resolved_ts); + self.resolved_ts.txn_ts = cmp::max(self.resolved_ts.txn_ts, new_txn_resolved_ts); // Publish an `(apply index, safe ts)` item into the region read progress if let Some(rrp) = &self.read_progress { - rrp.update_safe_ts(self.tracked_index, self.resolved_ts.into_inner()); + rrp.update_safe_ts(self.tracked_index, self.resolved_ts.txn_ts.into_inner()); } + let min_raw_ts = self + .raw_lock_ts_heap + .peek() + .map_or(min_ts, |ts| ts.to_owned().0); + // Resolved ts never decrease. + self.resolved_ts.raw_ts = cmp::max(self.resolved_ts.raw_ts, min_raw_ts); + let new_min_ts = if has_lock { // If there are some lock, the min_ts must be smaller than // the min start ts, so it guarantees to be smaller than // any late arriving commit ts. - new_resolved_ts // cmp::min(min_start_ts, min_ts) + new_txn_resolved_ts // cmp::min(min_start_ts, min_ts) } else { min_ts }; @@ -204,6 +257,10 @@ mod tests { Lock(u64, Key), // key Unlock(Key), + // raw ts + RawLock(u64), + // raw ts + RawUnlock(u64), // min_ts, expect Resolve(u64, u64), } @@ -257,6 +314,40 @@ mod tests { Event::Unlock(Key::from_raw(b"b")), Event::Unlock(Key::from_raw(b"a")), ], + // raw track lock + vec![Event::RawLock(1), Event::Resolve(2, 1)], + vec![Event::RawLock(1), Event::RawUnlock(1), Event::Resolve(2, 2)], + vec![Event::RawLock(1), Event::RawUnlock(2), Event::Resolve(5, 5)], + vec![ + Event::RawLock(1), + Event::RawUnlock(2), + Event::RawLock(3), + Event::Resolve(5, 3), + ], + vec![ + Event::RawLock(1), + Event::RawUnlock(2), + Event::RawLock(3), + Event::RawLock(4), + Event::Resolve(5, 3), + ], + // raw and txn mixed + vec![ + Event::Lock(1, Key::from_raw(b"a")), + Event::RawLock(2), + Event::RawUnlock(3), + Event::Resolve(5, 1), + Event::Unlock(Key::from_raw(b"a")), + Event::Resolve(6, 6), + ], + vec![ + Event::Lock(1, Key::from_raw(b"a")), + Event::RawLock(2), + Event::RawLock(3), + Event::Resolve(5, 1), + Event::Unlock(Key::from_raw(b"a")), + Event::Resolve(6, 2), + ], ]; for (i, case) in cases.into_iter().enumerate() { @@ -267,8 +358,15 @@ mod tests { resolver.track_lock(start_ts.into(), key.into_raw().unwrap(), None) } Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), + Event::RawLock(ts) => resolver.raw_track_lock(ts.into()), + Event::RawUnlock(ts) => resolver.raw_untrack_lock(ts.into()), Event::Resolve(min_ts, expect) => { - assert_eq!(resolver.resolve(min_ts.into()), expect.into(), "case {}", i) + assert_eq!( + resolver.resolve(min_ts.into()).min(), + expect.into(), + "case {}", + i + ) } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 351015fdd9a..51a21b91628 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -774,25 +774,8 @@ impl TiKvServer { unified_read_pool_scale_receiver = Some(rx); } - // Register causal observer for RawKV API V2 - if let ApiVersion::V2 = F::TAG { - let tso = block_on(causal_ts::BatchTsoProvider::new_opt( - self.pd_client.clone(), - self.config.causal_ts.renew_interval.0, - self.config.causal_ts.renew_batch_min_size, - )); - if let Err(e) = tso { - panic!("Causal timestamp provider initialize failed: {:?}", e); - } - let causal_ts_provider = Arc::new(tso.unwrap()); - info!("Causal timestamp provider startup."); - - let causal_ob = causal_ts::CausalObserver::new(causal_ts_provider); - causal_ob.register_to(self.coprocessor_host.as_mut().unwrap()); - } - // Register cdc. - let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); + let mut cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( @@ -818,6 +801,23 @@ impl TiKvServer { None }; + // Register causal observer for RawKV API V2 + if let ApiVersion::V2 = F::TAG { + let tso = block_on(causal_ts::BatchTsoProvider::new_opt( + self.pd_client.clone(), + self.config.causal_ts.renew_interval.0, + self.config.causal_ts.renew_batch_min_size, + )); + if let Err(e) = tso { + fatal!("Causal timestamp provider initialize failed: {:?}", e); + } + let causal_ts_provider = Arc::new(tso.unwrap()); + info!("Causal timestamp provider startup."); + cdc_ob.set_causal_ts_provider(causal_ts_provider.clone()); + let causal_ob = causal_ts::CausalObserver::new(causal_ts_provider, cdc_ob.clone()); + causal_ob.register_to(self.coprocessor_host.as_mut().unwrap()); + } + let check_leader_runner = CheckLeaderRunner::new(engines.store_meta.clone()); let check_leader_scheduler = self .background_worker diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index b87cc5257a5..5d85fff86bc 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -9,7 +9,7 @@ use std::{ }; use api_version::{dispatch_api_version, KvFormat}; -use causal_ts::CausalTsProvider; +use causal_ts::{tests::DummyRawTsTracker, CausalTsProvider}; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; @@ -365,7 +365,8 @@ impl ServerCluster { ); self.causal_ts_providers .insert(node_id, causal_ts_provider.clone()); - let causal_ob = causal_ts::CausalObserver::new(causal_ts_provider); + let causal_ob = + causal_ts::CausalObserver::new(causal_ts_provider, DummyRawTsTracker::default()); causal_ob.register_to(&mut coprocessor_host); } diff --git a/src/config.rs b/src/config.rs index fc6cde09e1c..7dfbe1b0933 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2508,6 +2508,11 @@ pub struct CdcConfig { pub sink_memory_quota: ReadableSize, pub old_value_cache_memory_quota: ReadableSize, + + /// Threshold of raw regions' resolved_ts outlier detection. 60s by default. + #[online_config(skip)] + #[doc(hidden)] + pub raw_min_ts_outlier_threshold: ReadableDuration, // Deprecated! preserved for compatibility check. #[online_config(skip)] #[doc(hidden)] @@ -2533,6 +2538,8 @@ impl Default for CdcConfig { sink_memory_quota: ReadableSize::mb(512), // 512MB memory for old value cache. old_value_cache_memory_quota: ReadableSize::mb(512), + // Trigger raw region outlier judgement if resolved_ts's lag is over 60s. + raw_min_ts_outlier_threshold: ReadableDuration::secs(60), // Deprecated! preserved for compatibility check. old_value_cache_size: 0, } @@ -2574,6 +2581,14 @@ impl CdcConfig { ); self.incremental_scan_ts_filter_ratio = default_cfg.incremental_scan_ts_filter_ratio; } + if self.raw_min_ts_outlier_threshold.is_zero() { + warn!( + "cdc.raw_min_ts_outlier_threshold should be larger than 0, + change it to {}", + default_cfg.raw_min_ts_outlier_threshold + ); + self.raw_min_ts_outlier_threshold = default_cfg.raw_min_ts_outlier_threshold; + } Ok(()) } } diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index bb6f38d9d6b..94d750a20f7 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -5,6 +5,7 @@ use std::{ sync::Arc, }; +use causal_ts::tests::DummyRawTsTracker; use engine_rocks::{raw::ColumnFamilyOptions, raw_util::CFOptions}; use engine_traits::{CfName, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; use file_system::IORateLimiter; @@ -71,7 +72,8 @@ impl TestEngineBuilder { // Consider decoupling them. fn register_causal_observer(engine: &mut RocksEngine) { let causal_ts_provider = Arc::new(causal_ts::tests::TestProvider::default()); - let causal_ob = causal_ts::CausalObserver::new(causal_ts_provider); + let causal_ob = + causal_ts::CausalObserver::new(causal_ts_provider, DummyRawTsTracker::default()); engine.register_observer(|host| { causal_ob.register_to(host); }); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2428d265391..54a596a50a2 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -765,6 +765,7 @@ fn test_serde_custom_tikv_config() { tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), sink_memory_quota: ReadableSize::mb(7), + raw_min_ts_outlier_threshold: ReadableDuration::secs(60), }; value.resolved_ts = ResolvedTsConfig { enable: true, From 88b659775dcbc3dd2d3a12fd836af75b3b423e84 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Mon, 11 Jul 2022 14:21:05 +0800 Subject: [PATCH 0066/1149] log-backup: support the new feature PiTR- backup/restore log at the tikv endpoint (#12976) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#12895 Signed-off-by: Yu Juncen Signed-off-by: joccau Signed-off-by: 3pointer Co-authored-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Co-authored-by: 3pointer Co-authored-by: Ti Chi Robot Co-authored-by: kennytm --- Cargo.lock | 1 + components/backup-stream/Cargo.toml | 4 +- .../backup-stream/src/checkpoint_manager.rs | 439 ++++++++++ components/backup-stream/src/endpoint.rs | 796 ++++++++---------- components/backup-stream/src/errors.rs | 5 +- components/backup-stream/src/event_loader.rs | 84 +- components/backup-stream/src/lib.rs | 10 +- .../backup-stream/src/metadata/client.rs | 381 ++++++++- components/backup-stream/src/metadata/keys.rs | 38 +- .../backup-stream/src/metadata/metrics.rs | 6 + components/backup-stream/src/metadata/mod.rs | 2 +- .../backup-stream/src/metadata/store/etcd.rs | 161 +++- .../src/metadata/store/lazy_etcd.rs | 4 + .../backup-stream/src/metadata/store/mod.rs | 68 +- .../src/metadata/store/slash_etc.rs | 172 +++- components/backup-stream/src/metadata/test.rs | 114 ++- components/backup-stream/src/metrics.rs | 17 +- components/backup-stream/src/observer.rs | 4 +- components/backup-stream/src/router.rs | 250 +++++- components/backup-stream/src/service.rs | 92 ++ .../backup-stream/src/subscription_manager.rs | 650 ++++++++++++++ .../backup-stream/src/subscription_track.rs | 257 ++++-- components/backup-stream/src/utils.rs | 269 +++++- components/backup-stream/tests/mod.rs | 254 +++++- components/server/src/server.rs | 25 +- src/config.rs | 6 + src/import/sst_service.rs | 4 +- 27 files changed, 3383 insertions(+), 730 deletions(-) create mode 100644 components/backup-stream/src/checkpoint_manager.rs create mode 100644 components/backup-stream/src/service.rs create mode 100644 components/backup-stream/src/subscription_manager.rs diff --git a/Cargo.lock b/Cargo.lock index 54b315afd36..2cab9eb4b2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -475,6 +475,7 @@ dependencies = [ "slog", "slog-global", "tempdir", + "tempfile", "test_raftstore", "test_util", "thiserror", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index f14c0aa3c39..9e8049e0ec0 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -33,9 +33,10 @@ etcd-client = { version = "0.7", features = ["pub-response-field", "tls"] } external_storage = { path = "../external_storage", default-features = false } external_storage_export = { path = "../external_storage/export", default-features = false } fail = { version = "0.5", optional = true } - file_system = { path = "../file_system" } futures = "0.3" + +grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } hex = "0.4" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.4" @@ -71,6 +72,7 @@ grpcio = { version = "0.10", default-features = false, features = ["openssl-vend hex = "0.4" rand = "0.8.0" tempdir = "0.3" +tempfile = "3.0" test_raftstore = { path = "../test_raftstore", default-features = false } test_util = { path = "../test_util", default-features = false } url = "2" diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs new file mode 100644 index 00000000000..96e330f956d --- /dev/null +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -0,0 +1,439 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{collections::HashMap, sync::Arc, time::Duration}; + +use kvproto::{ + errorpb::{Error as PbError, *}, + metapb::Region, +}; +use pd_client::PdClient; +use tikv_util::{info, worker::Scheduler}; +use txn_types::TimeStamp; + +use crate::{ + errors::{ContextualResultExt, Error, Result}, + metadata::{store::MetaStore, Checkpoint, CheckpointProvider, MetadataClient}, + metrics, + subscription_track::SubscriptionTracer, + try_send, RegionCheckpointOperation, Task, +}; + +/// A manager for maintaining the last flush ts. +/// This information is provided for the `advancer` in checkpoint V3, +/// which involved a central node (typically TiDB) for collecting all regions' checkpoint +/// then advancing the global checkpoint. +#[derive(Debug, Default)] +pub struct CheckpointManager { + items: HashMap, +} + +/// The result of getting a checkpoint. +/// The possibility of failed to getting checkpoint is pretty high: +/// because there is a gap between region leader change and flushing. +#[derive(Debug)] +pub enum GetCheckpointResult { + Ok { + region: Region, + checkpoint: TimeStamp, + }, + NotFound { + id: RegionIdWithVersion, + err: PbError, + }, + EpochNotMatch { + region: Region, + err: PbError, + }, +} + +impl GetCheckpointResult { + /// create an "ok" variant with region. + pub fn ok(region: Region, checkpoint: TimeStamp) -> Self { + Self::Ok { region, checkpoint } + } + + fn not_found(id: RegionIdWithVersion) -> Self { + Self::NotFound { + id, + err: not_leader(id.region_id), + } + } + + /// create a epoch not match variant with region + fn epoch_not_match(provided: RegionIdWithVersion, real: &Region) -> Self { + Self::EpochNotMatch { + region: real.clone(), + err: epoch_not_match( + provided.region_id, + provided.region_epoch_version, + real.get_region_epoch().get_version(), + ), + } + } +} + +impl CheckpointManager { + /// clear the manager. + pub fn clear(&mut self) { + self.items.clear(); + } + + /// update a region checkpoint in need. + pub fn update_region_checkpoint(&mut self, region: &Region, checkpoint: TimeStamp) { + let e = self.items.entry(region.get_id()); + e.and_modify(|old_cp| { + if old_cp.checkpoint < checkpoint + && old_cp.region.get_region_epoch().get_version() + <= region.get_region_epoch().get_version() + { + *old_cp = LastFlushTsOfRegion { + checkpoint, + region: region.clone(), + }; + } + }) + .or_insert_with(|| LastFlushTsOfRegion { + checkpoint, + region: region.clone(), + }); + } + + /// get checkpoint from a region. + pub fn get_from_region(&self, region: RegionIdWithVersion) -> GetCheckpointResult { + let checkpoint = self.items.get(®ion.region_id); + if checkpoint.is_none() { + return GetCheckpointResult::not_found(region); + } + let checkpoint = checkpoint.unwrap(); + if checkpoint.region.get_region_epoch().get_version() != region.region_epoch_version { + return GetCheckpointResult::epoch_not_match(region, &checkpoint.region); + } + GetCheckpointResult::ok(checkpoint.region.clone(), checkpoint.checkpoint) + } + + /// get all checkpoints stored. + pub fn get_all(&self) -> Vec { + self.items.values().cloned().collect() + } +} + +fn not_leader(r: u64) -> PbError { + let mut err = PbError::new(); + let mut nl = NotLeader::new(); + nl.set_region_id(r); + err.set_not_leader(nl); + err.set_message( + format!("the region {} isn't in the region_manager of log backup, maybe not leader or not flushed yet.", r)); + err +} + +fn epoch_not_match(id: u64, sent: u64, real: u64) -> PbError { + let mut err = PbError::new(); + let en = EpochNotMatch::new(); + err.set_epoch_not_match(en); + err.set_message(format!( + "the region {} has recorded version {}, but you sent {}", + id, real, sent, + )); + err +} + +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +/// A simple region id, but versioned. +pub struct RegionIdWithVersion { + pub region_id: u64, + pub region_epoch_version: u64, +} + +impl RegionIdWithVersion { + pub fn new(id: u64, version: u64) -> Self { + Self { + region_id: id, + region_epoch_version: version, + } + } +} + +#[derive(Debug, Clone)] +pub struct LastFlushTsOfRegion { + pub region: Region, + pub checkpoint: TimeStamp, +} + +// Allow some type to +#[async_trait::async_trait] +pub trait FlushObserver: Send + 'static { + /// The callback when the flush has advanced the resolver. + async fn before(&mut self, checkpoints: Vec<(Region, TimeStamp)>); + /// The callback when the flush is done. (Files are fully written to external storage.) + async fn after(&mut self, task: &str, rts: u64) -> Result<()>; + /// The optional callback to rewrite the resolved ts of this flush. + /// Because the default method (collect all leader resolved ts in the store, and use the minimal TS.) + /// may lead to resolved ts rolling back, if we desire a stronger consistency, we can rewrite a safer resolved ts here. + /// Note the new resolved ts cannot be greater than the old resolved ts. + async fn rewrite_resolved_ts( + &mut self, + #[allow(unused_variables)] task: &str, + ) -> Option { + None + } +} + +pub struct BasicFlushObserver { + pd_cli: Arc, + store_id: u64, +} + +impl BasicFlushObserver { + pub fn new(pd_cli: Arc, store_id: u64) -> Self { + Self { pd_cli, store_id } + } +} + +#[async_trait::async_trait] +impl FlushObserver for BasicFlushObserver { + async fn before(&mut self, _checkpoints: Vec<(Region, TimeStamp)>) {} + + async fn after(&mut self, task: &str, rts: u64) -> Result<()> { + if let Err(err) = self + .pd_cli + .update_service_safe_point( + format!("backup-stream-{}-{}", task, self.store_id), + TimeStamp::new(rts), + // Add a service safe point for 30 mins (6x the default flush interval). + // It would probably be safe. + Duration::from_secs(1800), + ) + .await + { + Error::from(err).report("failed to update service safe point!"); + // don't give up? + } + + // Currently, we only support one task at the same time, + // so use the task as label would be ok. + metrics::STORE_CHECKPOINT_TS + .with_label_values(&[task]) + .set(rts as _); + Ok(()) + } +} + +pub struct CheckpointV2FlushObserver { + resolvers: SubscriptionTracer, + meta_cli: MetadataClient, + + fresh_regions: Vec, + checkpoints: Vec<(Region, TimeStamp)>, + can_advance: Option, + base: O, +} + +impl CheckpointV2FlushObserver { + pub fn new( + meta_cli: MetadataClient, + can_advance: F, + resolvers: SubscriptionTracer, + base: O, + ) -> Self { + Self { + resolvers, + meta_cli, + fresh_regions: vec![], + checkpoints: vec![], + can_advance: Some(can_advance), + base, + } + } +} + +#[async_trait::async_trait] +impl FlushObserver for CheckpointV2FlushObserver +where + S: MetaStore + 'static, + F: FnOnce() -> bool + Send + 'static, + O: FlushObserver, +{ + async fn before(&mut self, _checkpoints: Vec<(Region, TimeStamp)>) { + let fresh_regions = self.resolvers.collect_fresh_subs(); + let removal = self.resolvers.collect_removal_subs(); + let checkpoints = removal + .into_iter() + .map(|sub| (sub.meta, sub.resolver.resolved_ts())) + .collect::>(); + self.checkpoints = checkpoints; + self.fresh_regions = fresh_regions; + } + + async fn after(&mut self, task: &str, rts: u64) -> Result<()> { + if !self.can_advance.take().map(|f| f()).unwrap_or(true) { + let cp_now = self + .meta_cli + .get_local_task_checkpoint(task) + .await + .context(format_args!( + "during checking whether we should skip advancing ts to {}.", + rts + ))?; + // if we need to roll back checkpoint ts, don't prevent it. + if rts >= cp_now.into_inner() { + info!("skipping advance checkpoint."; "rts" => %rts, "old_rts" => %cp_now); + return Ok(()); + } + } + // Optionally upload the region checkpoint. + // Unless in some extreme condition, skipping upload the region checkpoint won't lead to data loss. + if let Err(err) = self + .meta_cli + .upload_region_checkpoint(task, &self.checkpoints) + .await + { + err.report("failed to upload region checkpoint"); + } + // we can advance the progress at next time. + // return early so we won't be mislead by the metrics. + self.meta_cli + .set_local_task_checkpoint(task, rts) + .await + .context(format_args!("on flushing task {}", task))?; + self.base.after(task, rts).await?; + self.meta_cli + .clear_region_checkpoint(task, &self.fresh_regions) + .await + .context(format_args!("on clearing the checkpoint for task {}", task))?; + Ok(()) + } +} + +pub struct CheckpointV3FlushObserver { + /// We should modify the rts (the local rts isn't right.) + /// This should be a BasicFlushObserver or something likewise. + baseline: O, + sched: Scheduler, + meta_cli: MetadataClient, + subs: SubscriptionTracer, + + checkpoints: Vec<(Region, TimeStamp)>, + global_checkpoint_cache: HashMap, +} + +impl CheckpointV3FlushObserver { + pub fn new( + sched: Scheduler, + meta_cli: MetadataClient, + subs: SubscriptionTracer, + baseline: O, + ) -> Self { + Self { + sched, + meta_cli, + checkpoints: vec![], + // We almost always have only one entry. + global_checkpoint_cache: HashMap::with_capacity(1), + subs, + baseline, + } + } +} + +impl CheckpointV3FlushObserver +where + S: MetaStore + 'static, + O: FlushObserver + Send, +{ + async fn get_checkpoint(&mut self, task: &str) -> Result { + let cp = match self.global_checkpoint_cache.get(task) { + Some(cp) => *cp, + None => { + let global_checkpoint = self.meta_cli.global_checkpoint_of_task(task).await?; + self.global_checkpoint_cache + .insert(task.to_owned(), global_checkpoint); + global_checkpoint + } + }; + Ok(cp) + } +} + +#[async_trait::async_trait] +impl FlushObserver for CheckpointV3FlushObserver +where + S: MetaStore + 'static, + O: FlushObserver + Send, +{ + async fn before(&mut self, checkpoints: Vec<(Region, TimeStamp)>) { + self.checkpoints = checkpoints; + } + + async fn after(&mut self, task: &str, _rts: u64) -> Result<()> { + self.subs.update_status_for_v3(); + let t = Task::RegionCheckpointsOp(RegionCheckpointOperation::Update(std::mem::take( + &mut self.checkpoints, + ))); + try_send!(self.sched, t); + let global_checkpoint = self.get_checkpoint(task).await?; + info!("getting global checkpoint from cache for updating."; "checkpoint" => ?global_checkpoint); + self.baseline + .after(task, global_checkpoint.ts.into_inner()) + .await?; + Ok(()) + } + + async fn rewrite_resolved_ts(&mut self, task: &str) -> Option { + let global_checkpoint = self + .get_checkpoint(task) + .await + .map_err(|err| err.report("failed to get resolved ts for rewriting")) + .ok()?; + info!("getting global checkpoint for updating."; "checkpoint" => ?global_checkpoint); + matches!(global_checkpoint.provider, CheckpointProvider::Global) + .then(|| global_checkpoint.ts) + } +} + +#[cfg(test)] +mod tests { + use std::assert_matches; + + use kvproto::metapb::*; + use txn_types::TimeStamp; + + use super::RegionIdWithVersion; + use crate::GetCheckpointResult; + + fn region(id: u64, version: u64, conf_version: u64) -> Region { + let mut r = Region::new(); + let mut e = RegionEpoch::new(); + e.set_version(version); + e.set_conf_ver(conf_version); + r.set_id(id); + r.set_region_epoch(e); + r + } + + #[test] + fn test_mgr() { + let mut mgr = super::CheckpointManager::default(); + mgr.update_region_checkpoint(®ion(1, 32, 8), TimeStamp::new(8)); + mgr.update_region_checkpoint(®ion(2, 34, 8), TimeStamp::new(15)); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 8); + let r = mgr.get_from_region(RegionIdWithVersion::new(2, 33)); + assert_matches::assert_matches!(r, GetCheckpointResult::EpochNotMatch { .. }); + let r = mgr.get_from_region(RegionIdWithVersion::new(3, 44)); + assert_matches::assert_matches!(r, GetCheckpointResult::NotFound { .. }); + mgr.update_region_checkpoint(®ion(1, 30, 8), TimeStamp::new(16)); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 8); + + mgr.update_region_checkpoint(®ion(1, 30, 8), TimeStamp::new(16)); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 8); + mgr.update_region_checkpoint(®ion(1, 32, 8), TimeStamp::new(16)); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 16); + mgr.update_region_checkpoint(®ion(1, 33, 8), TimeStamp::new(24)); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 33)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 24); + } +} diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index a89d5a66da4..490e0b48e8d 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + collections::HashSet, fmt, marker::PhantomData, path::PathBuf, @@ -18,11 +19,9 @@ use kvproto::{ }; use online_config::ConfigChange; use pd_client::PdClient; -use raft::StateRole; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, RegionInfoProvider}, router::RaftStoreRouter, - store::fsm::ChangeObserver, }; use tikv::config::BackupStreamConfig; use tikv_util::{ @@ -30,7 +29,7 @@ use tikv_util::{ config::ReadableDuration, debug, defer, info, sys::thread::ThreadBuildWrapper, - time::Instant, + time::{Instant, Limiter}, warn, worker::{Runnable, Scheduler}, HandyRwLock, @@ -38,42 +37,58 @@ use tikv_util::{ use tokio::{ io::Result as TokioResult, runtime::{Handle, Runtime}, + sync::oneshot, }; use tokio_stream::StreamExt; use txn_types::TimeStamp; -use yatp::task::callback::Handle as YatpHandle; use super::metrics::HANDLE_EVENT_DURATION_HISTOGRAM; use crate::{ annotate, + checkpoint_manager::{ + BasicFlushObserver, CheckpointManager, CheckpointV2FlushObserver, + CheckpointV3FlushObserver, FlushObserver, GetCheckpointResult, RegionIdWithVersion, + }, errors::{Error, Result}, event_loader::{InitialDataLoader, PendingMemoryQuota}, + future, metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, metrics::{self, TaskStatus}, observer::BackupStreamObserver, - router::{ApplyEvents, Router, FLUSH_STORAGE_INTERVAL}, + router::{ApplyEvents, Router}, + subscription_manager::{RegionSubscriptionManager, ResolvedRegions}, subscription_track::SubscriptionTracer, try_send, - utils::{self, StopWatch}, + utils::{self, CallbackWaitGroup, StopWatch, Work}, }; const SLOW_EVENT_THRESHOLD: f64 = 120.0; +/// CHECKPOINT_SAFEPOINT_TTL_IF_ERROR specifies the safe point TTL(24 hour) if task has fatal error. +const CHECKPOINT_SAFEPOINT_TTL_IF_ERROR: u64 = 24; pub struct Endpoint { - meta_client: MetadataClient, + // Note: those fields are more like a shared context between components. + // For now, we copied them everywhere, maybe we'd better extract them into a + // context type. + pub(crate) meta_client: MetadataClient, + pub(crate) scheduler: Scheduler, + pub(crate) store_id: u64, + pub(crate) regions: R, + pub(crate) engine: PhantomData, + pub(crate) router: RT, + pub(crate) pd_client: Arc, + pub(crate) subs: SubscriptionTracer, + pub(crate) concurrency_manager: ConcurrencyManager, + range_router: Router, - scheduler: Scheduler, observer: BackupStreamObserver, pool: Runtime, - store_id: u64, - regions: R, - engine: PhantomData, - router: RT, - pd_client: Arc, - subs: SubscriptionTracer, - concurrency_manager: ConcurrencyManager, initial_scan_memory_quota: PendingMemoryQuota, - scan_pool: ScanPool, + initial_scan_throughput_quota: Limiter, + region_operator: RegionSubscriptionManager, + failover_time: Option, + config: BackupStreamConfig, + checkpoint_mgr: CheckpointManager, } impl Endpoint @@ -98,7 +113,6 @@ where crate::metrics::STREAM_ENABLED.inc(); let pool = create_tokio_runtime(config.io_threads, "backup-stream") .expect("failed to create tokio runtime for backup stream worker."); - let scan_pool = create_scan_pool(config.num_threads); let meta_client = MetadataClient::new(store, store_id); let range_router = Router::new( @@ -123,7 +137,31 @@ where let initial_scan_memory_quota = PendingMemoryQuota::new(config.initial_scan_pending_memory_quota.0 as _); + let limit = if config.initial_scan_rate_limit.0 > 0 { + config.initial_scan_rate_limit.0 as f64 + } else { + f64::INFINITY + }; + let initial_scan_throughput_quota = Limiter::new(limit); info!("the endpoint of stream backup started"; "path" => %config.temp_path); + let subs = SubscriptionTracer::default(); + let (region_operator, op_loop) = RegionSubscriptionManager::start( + InitialDataLoader::new( + router.clone(), + accessor.clone(), + range_router.clone(), + subs.clone(), + scheduler.clone(), + initial_scan_memory_quota.clone(), + pool.handle().clone(), + initial_scan_throughput_quota.clone(), + ), + observer.clone(), + meta_client.clone(), + pd_client.clone(), + config.num_threads, + ); + pool.spawn(op_loop); Endpoint { meta_client, range_router, @@ -135,10 +173,14 @@ where engine: PhantomData, router, pd_client, - subs: Default::default(), + subs, concurrency_manager, initial_scan_memory_quota, - scan_pool, + initial_scan_throughput_quota, + region_operator, + failover_time: None, + config, + checkpoint_mgr: Default::default(), } } } @@ -172,7 +214,7 @@ where let safepoint = meta_cli.global_progress_of_task(&task).await?; pdc.update_service_safe_point( safepoint_name, - TimeStamp::new(safepoint), + TimeStamp::new(safepoint - 1), safepoint_ttl, ) .await?; @@ -198,9 +240,9 @@ where async fn starts_flush_ticks(router: Router) { loop { - // check every 15s. + // check every 5s. // TODO: maybe use global timer handle in the `tikv_utils::timer` (instead of enabling timing in the current runtime)? - tokio::time::sleep(Duration::from_secs(FLUSH_STORAGE_INTERVAL / 20)).await; + tokio::time::sleep(Duration::from_secs(5)).await; debug!("backup stream trigger flush tick"); router.tick().await; } @@ -217,6 +259,8 @@ where if task.is_paused { continue; } + // We have meet task upon store start, we must in a failover. + scheduler.schedule(Task::MarkFailover(Instant::now()))?; // move task to schedule scheduler.schedule(Task::WatchTask(TaskOp::AddTask(task)))?; } @@ -338,6 +382,25 @@ where } } + fn flush_observer(&self) -> Box { + let basic = BasicFlushObserver::new(self.pd_client.clone(), self.store_id); + if self.config.use_checkpoint_v3 { + Box::new(CheckpointV3FlushObserver::new( + self.scheduler.clone(), + self.meta_client.clone(), + self.subs.clone(), + basic, + )) + } else { + Box::new(CheckpointV2FlushObserver::new( + self.meta_client.clone(), + self.make_flush_guard(), + self.subs.clone(), + basic, + )) + } + } + /// Convert a batch of events to the cmd batch, and update the resolver status. fn record_batch(subs: SubscriptionTracer, batch: CmdBatch) -> Option { let region_id = batch.region_id; @@ -366,7 +429,7 @@ where Some(kvs) } - fn backup_batch(&self, batch: CmdBatch) { + fn backup_batch(&self, batch: CmdBatch, work: Work) { let mut sw = StopWatch::new(); let router = self.range_router.clone(); @@ -396,7 +459,8 @@ where } HANDLE_EVENT_DURATION_HISTOGRAM .with_label_values(&["save_to_temp_file"]) - .observe(time_cost) + .observe(time_cost); + drop(work) }); } @@ -410,6 +474,7 @@ where self.scheduler.clone(), self.initial_scan_memory_quota.clone(), self.pool.handle().clone(), + self.initial_scan_throughput_quota.clone(), ) } @@ -450,20 +515,20 @@ where "end_key" => utils::redact(&end_key), ); } - self.spawn_at_scan_pool(move || { - let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); - match range_init_result { - Ok(()) => { - info!("backup stream success to initialize"; + // Assuming the `region info provider` would read region info form `StoreMeta` directly and this would be fast. + // If this gets slow, maybe make it async again. (Will that bring race conditions? say `Start` handled after `ResfreshResolver` of some region.) + let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); + match range_init_result { + Ok(()) => { + info!("backup stream success to initialize"; "start_key" => utils::redact(&start_key), "end_key" => utils::redact(&end_key), "take" => ?start.saturating_elapsed(),) - } - Err(e) => { - e.report("backup stream initialize failed"); - } } - }); + Err(e) => { + e.report("backup stream initialize failed"); + } + } Ok(()) } @@ -483,6 +548,7 @@ where let cli = self.meta_client.clone(); let init = self.make_initial_loader(); let range_router = self.range_router.clone(); + let use_v3 = self.config.use_checkpoint_v3; info!( "register backup stream task"; @@ -503,50 +569,46 @@ where }), ); self.pool.block_on(async move { - let task_name = task.info.get_name(); - match cli.ranges_of_task(task_name).await { - Ok(ranges) => { - info!( - "register backup stream ranges"; - "task" => ?task, - "ranges-count" => ranges.inner.len(), - ); - let ranges = ranges - .inner - .into_iter() - .map(|(start_key, end_key)| { - (utils::wrap_key(start_key), utils::wrap_key(end_key)) - }) - .collect::>(); - if let Err(err) = range_router - .register_task(task.clone(), ranges.clone()) - .await - { - err.report(format!( - "failed to register backup stream task {}", - task.info.name - )); - return; - } - - for (start_key, end_key) in ranges { - let init = init.clone(); - - self.observe_and_scan_region(init, &task, start_key, end_key) - .await - .unwrap(); - } - info!( - "finish register backup stream ranges"; - "task" => ?task, - ); + let task_clone = task.clone(); + let run = async move { + let task_name = task.info.get_name(); + if !use_v3 { + cli.init_task(&task.info).await?; } - Err(e) => { - e.report(format!( - "failed to register backup stream task {} to router: ranges not found", - task.info.get_name() - )); + let ranges = cli.ranges_of_task(task_name).await?; + info!( + "register backup stream ranges"; + "task" => ?task, + "ranges-count" => ranges.inner.len(), + ); + let ranges = ranges + .inner + .into_iter() + .map(|(start_key, end_key)| { + (utils::wrap_key(start_key), utils::wrap_key(end_key)) + }) + .collect::>(); + range_router + .register_task(task.clone(), ranges.clone()) + .await?; + + for (start_key, end_key) in ranges { + let init = init.clone(); + + self.observe_and_scan_region(init, &task, start_key, end_key) + .await? } + info!( + "finish register backup stream ranges"; + "task" => ?task, + ); + Result::Ok(()) + }; + if let Err(e) = run.await { + e.report(format!( + "failed to register backup stream task {} to router: ranges not found", + task_clone.info.get_name() + )); } }); metrics::update_task_status(TaskStatus::Running, &task_name); @@ -557,7 +619,7 @@ where } fn pause_guard_duration(&self) -> Duration { - ReadableDuration::hours(24).0 + ReadableDuration::hours(CHECKPOINT_SAFEPOINT_TTL_IF_ERROR).0 } pub fn on_pause(&self, task: &str) { @@ -588,14 +650,22 @@ where pub fn on_unregister(&self, task: &str) -> Option { let info = self.unload_task(task); - - // reset the checkpoint ts of the task so it won't mislead the metrics. - metrics::STORE_CHECKPOINT_TS - .with_label_values(&[task]) - .set(0); + self.remove_metrics_after_unregister(task); info } + fn remove_metrics_after_unregister(&self, task: &str) { + // remove metrics of the task so it won't mislead the metrics. + let _ = metrics::STORE_CHECKPOINT_TS + .remove_label_values(&[task]) + .map_err( + |err| info!("failed to remove checkpoint ts metric"; "task" => task, "err" => %err), + ); + let _ = metrics::remove_task_status_metric(task).map_err( + |err| info!("failed to remove checkpoint ts metric"; "task" => task, "err" => %err), + ); + } + /// unload a task from memory: this would stop observe the changes required by the task temporarily. fn unload_task(&self, task: &str) -> Option { let router = self.range_router.clone(); @@ -607,357 +677,123 @@ where self.pool.block_on(router.unregister_task(task)) } - /// try advance the resolved ts by the pd tso. - async fn try_resolve( - cm: &ConcurrencyManager, - pd_client: Arc, - resolvers: SubscriptionTracer, - ) -> TimeStamp { - let pd_tso = pd_client - .get_tso() - .await - .map_err(|err| Error::from(err).report("failed to get tso from pd")) - .unwrap_or_default(); - cm.update_max_ts(pd_tso); - let min_ts = cm.global_min_lock_ts().unwrap_or(TimeStamp::max()); - let tso = Ord::min(pd_tso, min_ts); - let ts = resolvers.resolve_with(tso); - resolvers.warn_if_gap_too_huge(ts); - ts - } - - async fn flush_for_task( - task: String, - store_id: u64, - router: Router, - pd_cli: Arc, - resolvers: SubscriptionTracer, - meta_cli: MetadataClient, - concurrency_manager: ConcurrencyManager, - ) { - let start = Instant::now_coarse(); - // NOTE: Maybe push down the resolve step to the router? - // Or if there are too many duplicated `Flush` command, we may do some useless works. - let new_rts = Self::try_resolve(&concurrency_manager, pd_cli.clone(), resolvers).await; - #[cfg(feature = "failpoints")] - fail::fail_point!("delay_on_flush"); - metrics::FLUSH_DURATION - .with_label_values(&["resolve_by_now"]) - .observe(start.saturating_elapsed_secs()); - if let Some(rts) = router.do_flush(&task, store_id, new_rts).await { - info!("flushing and refreshing checkpoint ts."; - "checkpoint_ts" => %rts, - "task" => %task, - ); - if rts == 0 { - // We cannot advance the resolved ts for now. - return; + /// Make a guard for checking whether we can flush the checkpoint ts. + fn make_flush_guard(&self) -> impl FnOnce() -> bool + Send { + let failover = self.failover_time; + let flush_duration = self.config.max_flush_interval; + move || { + if failover + .as_ref() + .map(|failover_t| failover_t.saturating_elapsed() < flush_duration.0 * 2) + .unwrap_or(false) + { + warn!("during failover, skipping advancing resolved ts"; + "failover_time_ago" => ?failover.map(|failover_t| failover_t.saturating_elapsed())); + return false; } let in_flight = crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.load(Ordering::SeqCst); if in_flight > 0 { warn!("inflight leader detected, skipping advancing resolved ts"; "in_flight" => %in_flight); - return; - } - if let Err(err) = pd_cli - .update_service_safe_point( - format!("backup-stream-{}-{}", task, store_id), - TimeStamp::new(rts), - // Add a service safe point for 30 mins (6x the default flush interval). - // It would probably be safe. - Duration::from_secs(1800), - ) - .await - { - Error::from(err).report("failed to update service safe point!"); - // don't give up? + return false; } - if let Err(err) = meta_cli.step_task(&task, rts).await { - err.report(format!("on flushing task {}", task)); - // we can advance the progress at next time. - // return early so we won't be mislead by the metrics. - return; - } - metrics::STORE_CHECKPOINT_TS - // Currently, we only support one task at the same time, - // so use the task as label would be ok. - .with_label_values(&[task.as_str()]) - .set(rts as _) + true } } - pub fn on_force_flush(&self, task: String, store_id: u64) { - let router = self.range_router.clone(); - let cli = self.meta_client.clone(); + fn prepare_min_ts(&self) -> future![TimeStamp] { let pd_cli = self.pd_client.clone(); - let resolvers = self.subs.clone(); let cm = self.concurrency_manager.clone(); - self.pool.spawn(async move { - let info = router.get_task_info(&task).await; - // This should only happen in testing, it would be to unwrap... - let _ = info.unwrap().set_flushing_status_cas(false, true); - Self::flush_for_task(task, store_id, router, pd_cli, resolvers, cli, cm).await; - }); - } - - pub fn on_flush(&self, task: String, store_id: u64) { - let router = self.range_router.clone(); - let cli = self.meta_client.clone(); - let pd_cli = self.pd_client.clone(); - let resolvers = self.subs.clone(); - let cm = self.concurrency_manager.clone(); - self.pool.spawn(Self::flush_for_task( - task, store_id, router, pd_cli, resolvers, cli, cm, - )); + async move { + let pd_tso = pd_cli + .get_tso() + .await + .map_err(|err| Error::from(err).report("failed to get tso from pd")) + .unwrap_or_default(); + cm.update_max_ts(pd_tso); + let min_ts = cm.global_min_lock_ts().unwrap_or(TimeStamp::max()); + Ord::min(pd_tso, min_ts) + } } - /// Start observe over some region. - /// This would modify some internal state, and delegate the task to InitialLoader::observe_over. - fn observe_over(&self, region: &Region, handle: ObserveHandle) -> Result<()> { - let init = self.make_initial_loader(); - let region_id = region.get_id(); - self.subs.register_region(region, handle.clone(), None); - init.observe_over_with_retry(region, || { - ChangeObserver::from_pitr(region_id, handle.clone()) - })?; - Ok(()) + fn get_resolved_regions(&self, min_ts: TimeStamp) -> future![Result] { + let (tx, rx) = oneshot::channel(); + let op = self.region_operator.clone(); + async move { + let req = ObserveOp::ResolveRegions { + callback: Box::new(move |rs| { + let _ = tx.send(rs); + }), + min_ts, + }; + op.request(req).await; + rx.await + .map_err(|err| annotate!(err, "failed to send request for resolve regions")) + } } - fn observe_over_with_initial_data_from_checkpoint( - &self, - region: &Region, - task: String, - handle: ObserveHandle, - ) -> Result<()> { - let init = self.make_initial_loader(); - - let meta_cli = self.meta_client.clone(); - let last_checkpoint = TimeStamp::new( - self.pool - .block_on(meta_cli.global_progress_of_task(&task))?, - ); - self.subs - .register_region(region, handle.clone(), Some(last_checkpoint)); - - let region_id = region.get_id(); - let snap = init.observe_over_with_retry(region, move || { - ChangeObserver::from_pitr(region_id, handle.clone()) - })?; - let region = region.clone(); - - // we should not spawn initial scanning tasks to the tokio blocking pool - // beacuse it is also used for converting sync File I/O to async. (for now!) - // In that condition, if we blocking for some resouces(for example, the `MemoryQuota`) - // at the block threads, we may meet some ghosty deadlock. - self.spawn_at_scan_pool(move || { - let begin = Instant::now_coarse(); - match init.do_initial_scan(®ion, last_checkpoint, snap) { - Ok(stat) => { - info!("initial scanning of leader transforming finished!"; "takes" => ?begin.saturating_elapsed(), "region" => %region.get_id(), "from_ts" => %last_checkpoint); - utils::record_cf_stat("lock", &stat.lock); - utils::record_cf_stat("write", &stat.write); - utils::record_cf_stat("default", &stat.data); + fn do_flush(&self, task: String, min_ts: TimeStamp) -> future![Result<()>] { + let get_rts = self.get_resolved_regions(min_ts); + let router = self.range_router.clone(); + let store_id = self.store_id; + let mut flush_ob = self.flush_observer(); + async move { + let mut resolved = get_rts.await?; + let mut new_rts = resolved.global_checkpoint(); + #[cfg(feature = "failpoints")] + fail::fail_point!("delay_on_flush"); + flush_ob.before(resolved.take_region_checkpoints()).await; + if let Some(rewritten_rts) = flush_ob.rewrite_resolved_ts(&task).await { + info!("rewriting resolved ts"; "old" => %new_rts, "new" => %rewritten_rts); + new_rts = rewritten_rts.min(new_rts); + } + if let Some(rts) = router.do_flush(&task, store_id, new_rts).await { + info!("flushing and refreshing checkpoint ts."; + "checkpoint_ts" => %rts, + "task" => %task, + ); + if rts == 0 { + // We cannot advance the resolved ts for now. + return Ok(()); } - Err(err) => err.report(format!("during initial scanning of region {:?}", region)), + flush_ob.after(&task, rts).await? } + Ok(()) + } + } + + pub fn on_force_flush(&self, task: String) { + self.pool.block_on(async move { + let info = self.range_router.get_task_info(&task).await; + // This should only happen in testing, it would be to unwrap... + let _ = info.unwrap().set_flushing_status_cas(false, true); + let mts = self.prepare_min_ts().await; + try_send!(self.scheduler, Task::FlushWithMinTs(task, mts)); }); - Ok(()) } - // spawn a task at the scan pool. - fn spawn_at_scan_pool(&self, task: impl FnOnce() + Send + 'static) { - self.scan_pool.spawn(move |_: &mut YatpHandle<'_>| { - tikv_alloc::add_thread_memory_accessor(); - let _io_guard = file_system::WithIOType::new(file_system::IOType::Replication); - task(); - tikv_alloc::remove_thread_memory_accessor(); + pub fn on_flush(&self, task: String) { + self.pool.block_on(async move { + let mts = self.prepare_min_ts().await; + info!("min_ts prepared for flushing"; "min_ts" => %mts); + try_send!(self.scheduler, Task::FlushWithMinTs(task, mts)); }) } - fn find_task_by_region(&self, r: &Region) -> Option { - self.range_router - .find_task_by_range(&r.start_key, &r.end_key) + fn on_flush_with_min_ts(&self, task: String, min_ts: TimeStamp) { + self.pool.spawn(self.do_flush(task, min_ts).map(|r| { + if let Err(err) = r { + err.report("during updating flush status") + } + })); } /// Modify observe over some region. /// This would register the region to the RaftStore. pub fn on_modify_observe(&self, op: ObserveOp) { - info!("backup stream: on_modify_observe"; "op" => ?op); - match op { - ObserveOp::Start { - region, - needs_initial_scanning, - } => { - #[cfg(feature = "failpoints")] - fail::fail_point!("delay_on_start_observe"); - self.start_observe(region, needs_initial_scanning); - metrics::INITIAL_SCAN_REASON - .with_label_values(&["leader-changed"]) - .inc(); - crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.fetch_sub(1, Ordering::SeqCst); - } - ObserveOp::Stop { ref region } => { - self.subs.deregister_region(region, |_, _| true); - } - ObserveOp::CheckEpochAndStop { ref region } => { - self.subs.deregister_region(region, |old, new| { - raftstore::store::util::compare_region_epoch( - old.meta.get_region_epoch(), - new, - true, - true, - false, - ) - .map_err(|err| warn!("check epoch and stop failed."; "err" => %err)) - .is_ok() - }); - } - ObserveOp::RefreshResolver { ref region } => { - let need_refresh_all = !self.subs.try_update_region(region); - - if need_refresh_all { - let canceled = self.subs.deregister_region(region, |_, _| true); - let handle = ObserveHandle::new(); - if canceled { - let for_task = self.find_task_by_region(region).unwrap_or_else(|| { - panic!( - "BUG: the region {:?} is register to no task but being observed", - region - ) - }); - metrics::INITIAL_SCAN_REASON - .with_label_values(&["region-changed"]) - .inc(); - if let Err(e) = self.observe_over_with_initial_data_from_checkpoint( - region, - for_task, - handle.clone(), - ) { - try_send!( - self.scheduler, - Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { - region: region.clone(), - handle, - err: Box::new(e) - }) - ); - } - } - } - } - ObserveOp::NotifyFailToStartObserve { - region, - handle, - err, - } => { - info!("retry observe region"; "region" => %region.get_id(), "err" => %err); - // No need for retrying observe canceled. - if err.error_code() == error_code::backup_stream::OBSERVE_CANCELED { - return; - } - match self.retry_observe(region, handle) { - Ok(()) => {} - Err(e) => { - try_send!( - self.scheduler, - Task::FatalError( - format!("While retring to observe region, origin error is {}", err), - Box::new(e) - ) - ); - } - } - } - } - } - - fn start_observe(&self, region: Region, needs_initial_scanning: bool) { - let handle = ObserveHandle::new(); - let result = if needs_initial_scanning { - match self.find_task_by_region(®ion) { - None => { - warn!( - "the region {:?} is register to no task but being observed (start_key = {}; end_key = {}; task_stat = {:?}): maybe stale, aborting", - region, - utils::redact(®ion.get_start_key()), - utils::redact(®ion.get_end_key()), - self.range_router - ); - return; - } - - Some(for_task) => self.observe_over_with_initial_data_from_checkpoint( - ®ion, - for_task, - handle.clone(), - ), - } - } else { - self.observe_over(®ion, handle.clone()) - }; - if let Err(err) = result { - try_send!( - self.scheduler, - Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { - region, - handle, - err: Box::new(err) - }) - ); - } + self.pool.block_on(self.region_operator.request(op)); } - fn retry_observe(&self, region: Region, handle: ObserveHandle) -> Result<()> { - let (tx, rx) = crossbeam::channel::bounded(1); - self.regions - .find_region_by_id( - region.get_id(), - Box::new(move |item| { - tx.send(item) - .expect("BUG: failed to send to newly created channel."); - }), - ) - .map_err(|err| { - annotate!( - err, - "failed to send request to region info accessor, server maybe too too too busy. (region id = {})", - region.get_id() - ) - })?; - let new_region_info = rx - .recv() - .map_err(|err| annotate!(err, "BUG?: unexpected channel message dropped."))?; - if new_region_info.is_none() { - metrics::SKIP_RETRY - .with_label_values(&["region-absent"]) - .inc(); - return Ok(()); - } - let new_region_info = new_region_info.unwrap(); - if new_region_info.role != StateRole::Leader { - metrics::SKIP_RETRY.with_label_values(&["not-leader"]).inc(); - return Ok(()); - } - let removed = self.subs.deregister_region(®ion, |old, _| { - let should_remove = old.handle().id == handle.id; - if !should_remove { - warn!("stale retry command"; "region" => ?region, "handle" => ?handle, "old_handle" => ?old.handle()); - } - should_remove - }); - if !removed { - metrics::SKIP_RETRY - .with_label_values(&["stale-command"]) - .inc(); - return Ok(()); - } - metrics::INITIAL_SCAN_REASON - .with_label_values(&["retry"]) - .inc(); - self.start_observe(region, true); - Ok(()) - } - - pub fn run_task(&self, task: Task) { + pub fn run_task(&mut self, task: Task) { debug!("run backup stream task"; "task" => ?task, "store_id" => %self.store_id); let now = Instant::now_coarse(); let label = task.label(); @@ -968,9 +804,9 @@ where match task { Task::WatchTask(op) => self.handle_watch_task(op), Task::BatchEvent(events) => self.do_backup(events), - Task::Flush(task) => self.on_flush(task, self.store_id), + Task::Flush(task) => self.on_flush(task), Task::ModifyObserve(op) => self.on_modify_observe(op), - Task::ForceFlush(task) => self.on_force_flush(task, self.store_id), + Task::ForceFlush(task) => self.on_force_flush(task), Task::FatalError(task, err) => self.on_fatal_error(task, err), Task::ChangeConfig(_) => { warn!("change config online isn't supported for now.") @@ -986,25 +822,54 @@ where }); } } + Task::MarkFailover(t) => self.failover_time = Some(t), + Task::FlushWithMinTs(task, min_ts) => self.on_flush_with_min_ts(task, min_ts), + Task::RegionCheckpointsOp(s) => self.handle_region_checkpoints_op(s), + } + } + + pub fn handle_region_checkpoints_op(&mut self, op: RegionCheckpointOperation) { + match op { + RegionCheckpointOperation::Update(u) => { + // Let's clear all stale checkpoints first. + // Or they may slow down the global checkpoint. + self.checkpoint_mgr.clear(); + for (region, checkpoint) in u { + debug!("setting region checkpoint"; "region" => %region.get_id(), "ts" => %checkpoint); + self.checkpoint_mgr + .update_region_checkpoint(®ion, checkpoint) + } + } + RegionCheckpointOperation::Get(g, cb) => { + let _guard = self.pool.handle().enter(); + match g { + RegionSet::Universal => cb(self + .checkpoint_mgr + .get_all() + .into_iter() + .map(|c| GetCheckpointResult::ok(c.region.clone(), c.checkpoint)) + .collect()), + RegionSet::Regions(rs) => cb(rs + .iter() + .map(|(id, version)| { + self.checkpoint_mgr + .get_from_region(RegionIdWithVersion::new(*id, *version)) + }) + .collect()), + } + } } } pub fn do_backup(&self, events: Vec) { + let wg = CallbackWaitGroup::new(); for batch in events { - self.backup_batch(batch) + self.backup_batch(batch, wg.clone().work()); } + self.pool.block_on(wg.wait()) } } -type ScanPool = yatp::ThreadPool; - -/// Create a yatp pool for doing initial scanning. -fn create_scan_pool(num_threads: usize) -> ScanPool { - yatp::Builder::new("log-backup-scan") - .max_thread_count(num_threads) - .build_callback_pool() -} - /// Create a standard tokio runtime /// (which allows io and time reactor, involve thread memory accessor), fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult { @@ -1026,12 +891,32 @@ fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult), +} + +pub enum RegionCheckpointOperation { + Update(Vec<(Region, TimeStamp)>), + Get(RegionSet, Box) + Send>), +} + +impl fmt::Debug for RegionCheckpointOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Update(arg0) => f.debug_tuple("Update").field(arg0).finish(), + Self::Get(arg0, _) => f.debug_tuple("Get").field(arg0).finish(), + } + } +} + pub enum Task { WatchTask(TaskOp), BatchEvent(Vec), ChangeConfig(ConfigChange), - /// Flush the task with name. - Flush(String), /// Change the observe status of some region. ModifyObserve(ObserveOp), /// Convert status of some task into `flushing` and do flush then. @@ -1047,6 +932,18 @@ pub enum Task { // This returns `true`. Box bool + Send>, ), + /// Mark the store as a failover store. + /// This would prevent store from updating its checkpoint ts for a while. + /// Because we are not sure whether the regions in the store have new leader -- + /// we keep a safe checkpoint so they can choose a safe `from_ts` for initial scanning. + MarkFailover(Instant), + /// Flush the task with name. + Flush(String), + /// Execute the flush with the calculated `min_ts`. + /// This is an internal command only issued by the `Flush` task. + FlushWithMinTs(String, TimeStamp), + /// The command for getting region checkpoints. + RegionCheckpointsOp(RegionCheckpointOperation), } #[derive(Debug)] @@ -1057,19 +954,21 @@ pub enum TaskOp { ResumeTask(String), } -#[derive(Debug)] +/// The callback for resolving region. +type ResolveRegionsCallback = Box; + pub enum ObserveOp { Start { region: Region, - // if `true`, would scan and sink change from the global checkpoint ts. - // Note: maybe we'd better make it Option to make it more generic, - // but that needs the `observer` know where the checkpoint is, which is a little dirty... - needs_initial_scanning: bool, }, Stop { region: Region, }, - CheckEpochAndStop { + /// Destroy the region subscription. + /// Unlike `Stop`, this will assume the region would never go back. + /// For now, the effect of "never go back" is that we won't try to hint other store + /// the checkpoint ts of this region. + Destroy { region: Region, }, RefreshResolver { @@ -1080,6 +979,39 @@ pub enum ObserveOp { handle: ObserveHandle, err: Box, }, + ResolveRegions { + callback: ResolveRegionsCallback, + min_ts: TimeStamp, + }, +} + +impl std::fmt::Debug for ObserveOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Start { region } => f.debug_struct("Start").field("region", region).finish(), + Self::Stop { region } => f.debug_struct("Stop").field("region", region).finish(), + Self::Destroy { region } => f.debug_struct("Destroy").field("region", region).finish(), + Self::RefreshResolver { region } => f + .debug_struct("RefreshResolver") + .field("region", region) + .finish(), + Self::NotifyFailToStartObserve { + region, + handle, + err, + } => f + .debug_struct("NotifyFailToStartObserve") + .field("region", region) + .field("handle", handle) + .field("err", err) + .finish(), + Self::ResolveRegions { min_ts, .. } => f + .debug_struct("ResolveRegions") + .field("min_ts", min_ts) + .field("callback", &format_args!("fn {{ .. }}")) + .finish(), + } + } } impl fmt::Debug for Task { @@ -1098,6 +1030,16 @@ impl fmt::Debug for Task { f.debug_tuple("FatalError").field(task).field(err).finish() } Self::Sync(..) => f.debug_tuple("Sync").finish(), + Self::MarkFailover(t) => f + .debug_tuple("MarkFailover") + .field(&format_args!("{:?} ago", t.saturating_elapsed())) + .finish(), + Self::FlushWithMinTs(arg0, arg1) => f + .debug_tuple("FlushWithMinTs") + .field(arg0) + .field(arg1) + .finish(), + Self::RegionCheckpointsOp(s) => f.debug_tuple("GetRegionCheckpoints").field(s).finish(), } } } @@ -1123,13 +1065,17 @@ impl Task { Task::ModifyObserve(o) => match o { ObserveOp::Start { .. } => "modify_observe.start", ObserveOp::Stop { .. } => "modify_observe.stop", - ObserveOp::CheckEpochAndStop { .. } => "modify_observe.check_epoch_and_stop", + ObserveOp::Destroy { .. } => "modify_observe.destroy", ObserveOp::RefreshResolver { .. } => "modify_observe.refresh_resolver", ObserveOp::NotifyFailToStartObserve { .. } => "modify_observe.retry", + ObserveOp::ResolveRegions { .. } => "modify_observe.resolve", }, Task::ForceFlush(_) => "force_flush", Task::FatalError(..) => "fatal_error", Task::Sync(..) => "sync", + Task::MarkFailover(_) => "mark_failover", + Task::FlushWithMinTs(..) => "flush_with_min_ts", + Task::RegionCheckpointsOp(..) => "get_checkpoints", } } } diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index a4d4515c213..b049b0a29be 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -120,7 +120,10 @@ where #[macro_export(crate)] macro_rules! annotate { ($inner: expr, $message: expr) => { - Error::Other(tikv_util::box_err!("{}: {}", $message, $inner)) + { + use tikv_util::box_err; + $crate::errors::Error::Other(box_err!("{}: {}", $message, $inner)) + } }; ($inner: expr, $format: literal, $($args: expr),+) => { annotate!($inner, format_args!($format, $($args),+)) diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index d791ce6a825..fdba0194000 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -20,7 +20,12 @@ use tikv::storage::{ txn::{EntryBatch, TxnEntry, TxnEntryScanner}, Snapshot, Statistics, }; -use tikv_util::{box_err, time::Instant, warn, worker::Scheduler}; +use tikv_util::{ + box_err, + time::{Instant, Limiter}, + warn, + worker::Scheduler, +}; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use txn_types::{Key, Lock, TimeStamp}; @@ -69,8 +74,12 @@ impl PendingMemoryQuota { /// EventLoader transforms data from the snapshot into ApplyEvent. pub struct EventLoader { scanner: DeltaScanner, + // pooling the memory. + entry_batch: EntryBatch, } +const ENTRY_BATCH_SIZE: usize = 1024; + impl EventLoader { pub fn load_from( snapshot: S, @@ -93,20 +102,31 @@ impl EventLoader { from_ts, to_ts, region_id ))?; - Ok(Self { scanner }) + Ok(Self { + scanner, + entry_batch: EntryBatch::with_capacity(ENTRY_BATCH_SIZE), + }) + } + + /// Scan a batch of events from the snapshot, and save them into the internal buffer. + fn fill_entries(&mut self) -> Result { + assert!( + self.entry_batch.is_empty(), + "EventLoader: the entry batch isn't empty when filling entries, which is error-prone, please call `omit_entries` first. (len = {})", + self.entry_batch.len() + ); + self.scanner.scan_entries(&mut self.entry_batch)?; + Ok(self.scanner.take_statistics()) } - /// scan a batch of events from the snapshot. Tracking the locks at the same time. - /// note: maybe make something like [`EntryBatch`] for reducing allocation. - fn scan_batch( + /// Drain the internal buffer, converting them to the [`ApplyEvents`], + /// and tracking the locks at the same time. + fn omit_entries_to( &mut self, - batch_size: usize, result: &mut ApplyEvents, resolver: &mut TwoPhaseResolver, - ) -> Result { - let mut b = EntryBatch::with_capacity(batch_size); - self.scanner.scan_entries(&mut b)?; - for entry in b.drain() { + ) -> Result<()> { + for entry in self.entry_batch.drain() { match entry { TxnEntry::Prewrite { default: (key, value), @@ -149,7 +169,7 @@ impl EventLoader { } } } - Ok(self.scanner.take_statistics()) + Ok(()) } } @@ -158,15 +178,16 @@ impl EventLoader { /// Note: maybe we can merge those two structures? #[derive(Clone)] pub struct InitialDataLoader { - router: RT, - regions: R, + pub(crate) router: RT, + pub(crate) regions: R, // Note: maybe we can make it an abstract thing like `EventSink` with // method `async (KvEvent) -> Result<()>`? - sink: Router, - tracing: SubscriptionTracer, - scheduler: Scheduler, - quota: PendingMemoryQuota, - handle: tokio::runtime::Handle, + pub(crate) sink: Router, + pub(crate) tracing: SubscriptionTracer, + pub(crate) scheduler: Scheduler, + pub(crate) quota: PendingMemoryQuota, + pub(crate) handle: tokio::runtime::Handle, + pub(crate) limit: Limiter, _engine: PhantomData, } @@ -185,6 +206,7 @@ where sched: Scheduler, quota: PendingMemoryQuota, handle: tokio::runtime::Handle, + limiter: Limiter, ) -> Self { Self { router, @@ -195,6 +217,7 @@ where _engine: PhantomData, quota, handle, + limit: limiter, } } @@ -215,12 +238,17 @@ where Error::RaftRequest(pbe) => { !(pbe.has_epoch_not_match() || pbe.has_not_leader() - || pbe.get_message().contains("stale observe id")) + || pbe.get_message().contains("stale observe id") + || pbe.has_region_not_found()) } Error::RaftStore(raftstore::Error::RegionNotFound(_)) | Error::RaftStore(raftstore::Error::NotLeader(..)) => false, _ => true, }; + e.report(format_args!( + "during getting initial snapshot for region {:?}; can retry = {}", + region, can_retry + )); last_err = match last_err { None => Some(e), Some(err) => Some(Error::Contextual { @@ -347,8 +375,14 @@ where let start = Instant::now(); loop { let mut events = ApplyEvents::with_capacity(1024, region.id); - let stat = - self.with_resolver(region, |r| event_loader.scan_batch(1024, &mut events, r))?; + let stat = event_loader.fill_entries()?; + let disk_read = self.with_resolver(region, |r| { + let (result, byte_size) = utils::with_record_read_throughput(|| { + event_loader.omit_entries_to(&mut events, r) + }); + result?; + Result::Ok(byte_size) + })?; if events.is_empty() { metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); return Ok(stats.stat); @@ -359,6 +393,7 @@ where let event_size = events.size(); let sched = self.scheduler.clone(); let permit = self.quota.pending(event_size); + self.limit.blocking_consume(disk_read as _); debug!("sending events to router"; "size" => %event_size, "region" => %region_id); metrics::INCREMENTAL_SCAN_SIZE.observe(event_size as f64); metrics::HEAP_MEMORY.add(event_size as _); @@ -376,6 +411,7 @@ where region: &Region, start_ts: TimeStamp, snap: impl Snapshot, + on_finish: impl FnOnce() + Send + 'static, ) -> Result { let _guard = self.handle.enter(); // It is ok to sink more data than needed. So scan to +inf TS for convenance. @@ -405,6 +441,7 @@ where region_id )); } + on_finish() }); stats } @@ -425,10 +462,7 @@ where // At that time, we have nowhere to record the lock status of this region. let success = try_send!( self.scheduler, - Task::ModifyObserve(ObserveOp::Start { - region: r.region, - needs_initial_scanning: true - }) + Task::ModifyObserve(ObserveOp::Start { region: r.region }) ); if success { crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.fetch_add(1, Ordering::SeqCst); diff --git a/components/backup-stream/src/lib.rs b/components/backup-stream/src/lib.rs index a19b4b4fc2f..34dbfa33e4c 100644 --- a/components/backup-stream/src/lib.rs +++ b/components/backup-stream/src/lib.rs @@ -1,17 +1,23 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(slice_group_by)] #![feature(result_flattening)] #![feature(assert_matches)] #![feature(test)] +mod checkpoint_manager; pub mod config; mod endpoint; pub mod errors; mod event_loader; pub mod metadata; -mod metrics; +pub(crate) mod metrics; pub mod observer; pub mod router; +mod service; +mod subscription_manager; mod subscription_track; mod utils; -pub use endpoint::{Endpoint, ObserveOp, Task}; +pub use checkpoint_manager::GetCheckpointResult; +pub use endpoint::{Endpoint, ObserveOp, RegionCheckpointOperation, RegionSet, Task}; +pub use service::Service; diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 5f0e8b85bed..07d93162e00 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -1,18 +1,26 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashMap, fmt::Debug}; +use std::{cmp::Ordering, collections::HashMap, fmt::Debug, path::Path, time::Duration}; -use kvproto::brpb::{StreamBackupError, StreamBackupTaskInfo}; +use kvproto::{ + brpb::{StreamBackupError, StreamBackupTaskInfo}, + metapb::Region, +}; use tikv_util::{defer, time::Instant, warn}; use tokio_stream::StreamExt; +use txn_types::TimeStamp; use super::{ keys::{self, KeyValue, MetaKey}, store::{ - GetExtra, Keys, KvEvent, KvEventType, MetaStore, Snapshot, Subscription, WithRevision, + CondTransaction, Condition, GetExtra, Keys, KvEvent, KvEventType, MetaStore, PutOption, + Snapshot, Subscription, Transaction, WithRevision, }, }; -use crate::errors::{Error, Result}; +use crate::{ + debug, + errors::{ContextualResultExt, Error, Result}, +}; /// Some operations over stream backup metadata key space. #[derive(Clone)] @@ -64,6 +72,115 @@ impl PartialEq for MetadataEvent { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CheckpointProvider { + Store(u64), + Region { id: u64, version: u64 }, + Task, + Global, +} + +/// The polymorphic checkpoint. +/// The global checkpoint should be the minimal checkpoint of all checkpoints. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Checkpoint { + pub provider: CheckpointProvider, + pub ts: TimeStamp, +} + +impl Checkpoint { + pub fn from_kv(kv: &KeyValue) -> Result { + match std::str::from_utf8(kv.0.0.as_slice()) { + Ok(key) => Checkpoint::parse_from(Path::new(key), kv.1.as_slice()), + Err(_) => { + Ok(Checkpoint { + // The V1 checkpoint, maybe fill the store id? + provider: CheckpointProvider::Store(0), + ts: TimeStamp::new(parse_ts_from_bytes(kv.1.as_slice())?), + }) + } + } + } + + pub fn parse_from(path: &Path, checkpoint_ts: &[u8]) -> Result { + let segs = path.iter().map(|os| os.to_str()).collect::>(); + match segs.as_slice() { + [ + // We always use '/' as the path. + // NOTE: Maybe just `split` and don't use `path`? + Some("/"), + Some("tidb"), + Some("br-stream"), + Some("checkpoint"), + Some(_task_name), + Some("region"), + Some(id), + Some(epoch), + .., + ] => Self::from_region_parse_result(id, epoch, checkpoint_ts) + .context(format_args!("during parsing key {}", path.display())), + [ + // We always use '/' as the path. + // NOTE: Maybe just `split` and don't use `path`? + Some("/"), + Some("tidb"), + Some("br-stream"), + Some("checkpoint"), + Some(_task_name), + Some("store"), + Some(id), + .., + ] => Self::from_store_parse_result(id, checkpoint_ts) + .context(format_args!("during parsing key {}", path.display())), + [ + // We always use '/' as the path. + // NOTE: Maybe just `split` and don't use `path`? + Some("/"), + Some("tidb"), + Some("br-stream"), + Some("checkpoint"), + Some(_task_name), + Some("central_global"), + ] => Ok(Self { + provider: CheckpointProvider::Global, + ts: TimeStamp::new(parse_ts_from_bytes(checkpoint_ts)?), + }), + _ => Err(Error::MalformedMetadata(format!( + "cannot parse path {}(segs = {:?}) as checkpoint", + path.display(), + segs + ))), + } + } + + fn from_store_parse_result(id: &str, checkpoint_ts: &[u8]) -> Result { + let provider_id = id + .parse::() + .map_err(|err| Error::MalformedMetadata(err.to_string()))?; + let provider = CheckpointProvider::Store(provider_id); + let checkpoint = TimeStamp::new(parse_ts_from_bytes(checkpoint_ts)?); + Ok(Self { + provider, + ts: checkpoint, + }) + } + + fn from_region_parse_result(id: &str, version: &str, checkpoint_ts: &[u8]) -> Result { + let id = id + .parse::() + .map_err(|err| Error::MalformedMetadata(err.to_string()))?; + let version = version + .parse::() + .map_err(|err| Error::MalformedMetadata(err.to_string()))?; + let checkpoint = TimeStamp::new(parse_ts_from_bytes(checkpoint_ts)?); + let provider = CheckpointProvider::Region { id, version }; + Ok(Self { + provider, + ts: checkpoint, + }) + } +} + impl MetadataEvent { fn from_watch_event(event: &KvEvent) -> Option { // Maybe report an error when the kv isn't present? @@ -126,6 +243,27 @@ impl MetadataClient { } } + /// Initialize a task: execute some general operations over the keys. + /// For now, it sets the checkpoint ts if there isn't one for the current store. + pub async fn init_task(&self, task: &StreamBackupTaskInfo) -> Result<()> { + let if_present = Condition::new( + MetaKey::next_backup_ts_of(&task.name, self.store_id), + Ordering::Greater, + vec![], + ); + let txn = CondTransaction::new( + if_present, + Transaction::default(), + Transaction::default().put(KeyValue( + MetaKey::next_backup_ts_of(&task.name, self.store_id), + task.get_start_ts().to_be_bytes().to_vec(), + )), + ); + self.meta_store.txn_cond(txn).await + } + + /// Upload the last error information to the etcd. + /// This won't pause the task. Even this method would usually be paired with `pause`. pub async fn report_last_error(&self, name: &str, last_error: StreamBackupError) -> Result<()> { use protobuf::Message; let now = Instant::now(); @@ -284,7 +422,7 @@ impl MetadataClient { } /// forward the progress of some task. - pub async fn step_task(&self, task_name: &str, ts: u64) -> Result<()> { + pub async fn set_local_task_checkpoint(&self, task_name: &str, ts: u64) -> Result<()> { let now = Instant::now(); defer! { super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_step"]).observe(now.saturating_elapsed().as_secs_f64()) @@ -298,6 +436,25 @@ impl MetadataClient { Ok(()) } + pub async fn get_local_task_checkpoint(&self, task_name: &str) -> Result { + let now = Instant::now(); + defer! { + super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_step"]).observe(now.saturating_elapsed().as_secs_f64()) + } + let snap = self.meta_store.snapshot().await?; + let ts = snap + .get(Keys::Key(MetaKey::next_backup_ts_of( + task_name, + self.store_id, + ))) + .await?; + + match ts.as_slice() { + [ts, ..] => Ok(TimeStamp::new(parse_ts_from_bytes(ts.value())?)), + [] => Ok(self.get_task_start_ts_checkpoint(task_name).await?.ts), + } + } + /// get all target ranges of some task. pub async fn ranges_of_task( &self, @@ -391,47 +548,58 @@ impl MetadataClient { Ok(task.unwrap().info.start_ts) } else { assert_eq!(items.len(), 1); - Self::parse_ts_from_bytes(items[0].1.as_slice()) + parse_ts_from_bytes(items[0].1.as_slice()) } } - /// get the global progress (the min next_backup_ts among all stores). - pub async fn global_progress_of_task(&self, task_name: &str) -> Result { + pub async fn checkpoints_of(&self, task_name: &str) -> Result> { let now = Instant::now(); defer! { - super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_progress_get_global"]).observe(now.saturating_elapsed().as_secs_f64()) - } - let task = self.get_task(task_name).await?; - if task.is_none() { - return Err(Error::NoSuchTask { - task_name: task_name.to_owned(), - }); + super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["checkpoints_of"]).observe(now.saturating_elapsed().as_secs_f64()) } - let snap = self.meta_store.snapshot().await?; - let global_ts = snap.get(Keys::Prefix(MetaKey::next_backup_ts(task_name))) + let checkpoints = snap + .get(Keys::Prefix(MetaKey::next_backup_ts(task_name))) .await? .iter() .filter_map(|kv| { - Self::parse_ts_from_bytes(kv.1.as_slice()) + Checkpoint::from_kv(kv) .map_err(|err| warn!("br-stream: failed to parse next_backup_ts."; "key" => ?kv.0, "err" => %err)) .ok() }) - .min() - .unwrap_or(task.unwrap().info.start_ts); - Ok(global_ts) + .collect(); + Ok(checkpoints) } - fn parse_ts_from_bytes(next_backup_ts: &[u8]) -> Result { - if next_backup_ts.len() != 8 { - return Err(Error::MalformedMetadata(format!( - "the length of next_backup_ts is {} bytes, require 8 bytes", - next_backup_ts.len() - ))); - } - let mut buf = [0u8; 8]; - buf.copy_from_slice(next_backup_ts); - Ok(u64::from_be_bytes(buf)) + async fn get_task_start_ts_checkpoint(&self, task_name: &str) -> Result { + let task = self + .get_task(task_name) + .await? + .ok_or_else(|| Error::NoSuchTask { + task_name: task_name.to_owned(), + })?; + Ok(Checkpoint { + ts: TimeStamp::new(task.info.start_ts), + provider: CheckpointProvider::Task, + }) + } + + /// Get the global checkpoint of a task. + /// It is the smallest checkpoint of all types of checkpoint. + pub async fn global_checkpoint_of_task(&self, task_name: &str) -> Result { + let cp = match self.global_checkpoint_of(task_name).await? { + Some(cp) => cp, + None => self.get_task_start_ts_checkpoint(task_name).await?, + }; + Ok(cp) + } + + /// get the global progress (the min next_backup_ts among all stores). + pub async fn global_progress_of_task(&self, task_name: &str) -> Result { + let cp = self.global_checkpoint_of_task(task_name).await?; + debug!("getting global progress of task"; "checkpoint" => ?cp); + let ts = cp.ts.into_inner(); + Ok(ts) } /// insert a task with ranges into the metadata store. @@ -464,4 +632,155 @@ impl MetadataClient { .delete(Keys::Key(MetaKey::task_of(name))) .await } + + /// upload a region-level checkpoint. + pub async fn upload_region_checkpoint( + &self, + task_name: &str, + checkpoints: &[(Region, TimeStamp)], + ) -> Result<()> { + let txn = checkpoints + .iter() + .fold(Transaction::default(), |txn, (region, cp)| { + txn.put_opt( + KeyValue( + MetaKey::next_bakcup_ts_of_region(task_name, region), + (*cp).into_inner().to_be_bytes().to_vec(), + ), + PutOption { + ttl: Duration::from_secs(600), + }, + ) + }); + self.meta_store.txn(txn).await + } + + pub async fn clear_region_checkpoint(&self, task_name: &str, regions: &[Region]) -> Result<()> { + let txn = regions.iter().fold(Transaction::default(), |txn, region| { + txn.delete(Keys::Key(MetaKey::next_bakcup_ts_of_region( + task_name, region, + ))) + }); + self.meta_store.txn(txn).await + } + + pub async fn global_checkpoint_of(&self, task: &str) -> Result> { + let cps = self.checkpoints_of(task).await?; + let mut min_checkpoint = None; + for cp in cps { + match cp.provider { + CheckpointProvider::Store(..) => { + if min_checkpoint + .as_ref() + .map(|c: &Checkpoint| c.ts > cp.ts) + .unwrap_or(true) + { + min_checkpoint = Some(cp); + } + } + // The global checkpoint has higher priority than store checkpoint. + CheckpointProvider::Task | CheckpointProvider::Global => return Ok(Some(cp)), + CheckpointProvider::Region { .. } => continue, + } + } + Ok(min_checkpoint) + } + + pub async fn get_region_checkpoint(&self, task: &str, region: &Region) -> Result { + let key = MetaKey::next_bakcup_ts_of_region(task, region); + let s = self.meta_store.snapshot().await?; + let r = s.get(Keys::Key(key.clone())).await?; + match r.len() { + 0 => { + let global_cp = self.global_checkpoint_of(task).await?; + let cp = match global_cp { + None => self.get_task_start_ts_checkpoint(task).await?, + Some(cp) => cp, + }; + Ok(cp) + } + _ => Ok(Checkpoint::from_kv(&r[0])?), + } + } +} + +fn parse_ts_from_bytes(next_backup_ts: &[u8]) -> Result { + if next_backup_ts.len() != 8 { + return Err(Error::MalformedMetadata(format!( + "the length of next_backup_ts is {} bytes, require 8 bytes", + next_backup_ts.len() + ))); + } + let mut buf = [0u8; 8]; + buf.copy_from_slice(next_backup_ts); + Ok(u64::from_be_bytes(buf)) +} + +#[cfg(test)] +mod test { + use kvproto::metapb::{Region as RegionInfo, RegionEpoch}; + use txn_types::TimeStamp; + + use super::Checkpoint; + use crate::metadata::{ + client::CheckpointProvider, + keys::{KeyValue, MetaKey}, + }; + + #[test] + fn test_parse() { + struct Case { + provider: CheckpointProvider, + checkpoint: u64, + } + + fn run_case(c: Case) { + let key = match c.provider { + CheckpointProvider::Region { id, version } => { + let mut r = RegionInfo::new(); + let mut v = RegionEpoch::new(); + v.set_version(version); + r.set_region_epoch(v); + r.set_id(id); + MetaKey::next_bakcup_ts_of_region("test", &r) + } + CheckpointProvider::Store(id) => MetaKey::next_backup_ts_of("test", id), + _ => unreachable!(), + }; + let checkpoint = c.checkpoint; + let cp_bytes = checkpoint.to_be_bytes(); + let kv = KeyValue(key, cp_bytes.to_vec()); + let parsed = Checkpoint::from_kv(&kv).unwrap(); + assert_eq!( + parsed, + Checkpoint { + provider: c.provider, + ts: TimeStamp::new(c.checkpoint), + } + ); + } + use CheckpointProvider::*; + + let cases = vec![ + Case { + checkpoint: TimeStamp::compose(TimeStamp::physical_now(), 10).into_inner(), + provider: Region { id: 42, version: 8 }, + }, + Case { + checkpoint: u64::from_be_bytes(*b"let i=0;"), + provider: Store(3), + }, + Case { + checkpoint: u64::from_be_bytes(*b"(callcc)"), + provider: Region { + id: 16961, + version: 16, + }, + }, + ]; + + for case in cases { + run_case(case) + } + } } diff --git a/components/backup-stream/src/metadata/keys.rs b/components/backup-stream/src/metadata/keys.rs index be92da123ae..6920ba14a33 100644 --- a/components/backup-stream/src/metadata/keys.rs +++ b/components/backup-stream/src/metadata/keys.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use bytes::BufMut; +use kvproto::metapb::Region; const PREFIX: &str = "/tidb/br-stream"; const PATH_INFO: &str = "/info"; @@ -99,19 +99,36 @@ impl MetaKey { ranges } - /// The key of next backup ts of some region in some store. - pub fn next_backup_ts_of(name: &str, store_id: u64) -> Self { - let base = Self::next_backup_ts(name); - let mut buf = bytes::BytesMut::from(base.0.as_slice()); - buf.put_u64(store_id); - Self(buf.to_vec()) - } - // The prefix for next backup ts. pub fn next_backup_ts(name: &str) -> Self { Self(format!("{}{}/{}/", PREFIX, PATH_NEXT_BACKUP_TS, name).into_bytes()) } + /// The key of next backup ts of some region in some store. + pub fn next_backup_ts_of(name: &str, store_id: u64) -> Self { + Self( + format!( + "{}{}/{}/store/{}", + PREFIX, PATH_NEXT_BACKUP_TS, name, store_id + ) + .into_bytes(), + ) + } + + pub fn next_bakcup_ts_of_region(name: &str, region: &Region) -> Self { + Self( + format!( + "{}{}/{}/region/{}/{}", + PREFIX, + PATH_NEXT_BACKUP_TS, + name, + region.id, + region.get_region_epoch().get_version() + ) + .into_bytes(), + ) + } + pub fn pause_prefix_len() -> usize { Self::pause_prefix().0.len() } @@ -129,8 +146,7 @@ impl MetaKey { Self(format!("{}{}/{}/{}", PREFIX, PATH_LAST_ERROR, name, store).into_bytes()) } - /// return the key that keeps the range [self, self.next()) contains only - /// `self`. + /// return the key that keeps the range [self, self.next()) contains only `self`. pub fn next(&self) -> Self { let mut next = self.clone(); next.0.push(0); diff --git a/components/backup-stream/src/metadata/metrics.rs b/components/backup-stream/src/metadata/metrics.rs index f4ea1258ab7..1dea498834e 100644 --- a/components/backup-stream/src/metadata/metrics.rs +++ b/components/backup-stream/src/metadata/metrics.rs @@ -16,4 +16,10 @@ lazy_static! { "metadata event(task_add, task_removed, error) count.", &["type"], }.unwrap(); + + pub static ref METADATA_KEY_OPERATION: IntCounterVec = register_int_counter_vec! { + "tikv_log_backup_metadata_key_operation", + "the operation over keys", + &["type"], + }.unwrap(); } diff --git a/components/backup-stream/src/metadata/mod.rs b/components/backup-stream/src/metadata/mod.rs index a49eb305fa1..4c387533e49 100644 --- a/components/backup-stream/src/metadata/mod.rs +++ b/components/backup-stream/src/metadata/mod.rs @@ -6,5 +6,5 @@ mod metrics; pub mod store; mod test; -pub use client::{MetadataClient, MetadataEvent, StreamTask}; +pub use client::{Checkpoint, CheckpointProvider, MetadataClient, MetadataEvent, StreamTask}; pub use store::lazy_etcd::{ConnectionConfig, LazyEtcdClient}; diff --git a/components/backup-stream/src/metadata/store/etcd.rs b/components/backup-stream/src/metadata/store/etcd.rs index 7da46ea5dbf..2b940c905cd 100644 --- a/components/backup-stream/src/metadata/store/etcd.rs +++ b/components/backup-stream/src/metadata/store/etcd.rs @@ -1,21 +1,32 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{pin::Pin, sync::Arc}; +use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, + pin::Pin, + sync::Arc, + time::Duration, +}; use async_trait::async_trait; use etcd_client::{ - DeleteOptions, EventType, GetOptions, SortOrder, SortTarget, Txn, TxnOp, WatchOptions, + Client, Compare, CompareOp, DeleteOptions, EventType, GetOptions, PutOptions, SortOrder, + SortTarget, Txn, TxnOp, WatchOptions, }; use futures::StreamExt; use tikv_util::warn; use tokio::sync::Mutex; use tokio_stream::Stream; -use super::{GetExtra, GetResponse, Keys, KvChangeSubscription, KvEventType, MetaStore, Snapshot}; +use super::{ + GetExtra, GetResponse, Keys, KvChangeSubscription, KvEventType, MetaStore, Snapshot, + TransactionOp, +}; use crate::{ errors::Result, metadata::{ keys::{KeyValue, MetaKey}, + metrics::METADATA_KEY_OPERATION, store::{KvEvent, Subscription}, }, }; @@ -91,11 +102,6 @@ impl MetaStore for EtcdStore { }) } - async fn set(&self, pair: KeyValue) -> Result<()> { - self.0.lock().await.put(pair.0, pair.1, None).await?; - Ok(()) - } - async fn watch(&self, keys: Keys, start_rev: i64) -> Result { let mut opt = WatchOptions::new(); let key = prepare_opt!(opt, keys); @@ -128,6 +134,20 @@ impl MetaStore for EtcdStore { }) } + async fn txn(&self, t: super::Transaction) -> Result<()> { + let mut cli = self.0.lock().await; + let txns = Self::make_txn(&mut cli, t).await?; + for txn in txns { + cli.txn(txn).await?; + } + Ok(()) + } + + async fn set(&self, pair: KeyValue) -> Result<()> { + self.0.lock().await.put(pair.0, pair.1, None).await?; + Ok(()) + } + async fn delete(&self, keys: Keys) -> Result<()> { let mut opt = DeleteOptions::new(); let key = prepare_opt!(opt, keys); @@ -136,31 +156,114 @@ impl MetaStore for EtcdStore { Ok(()) } - async fn txn(&self, t: super::Transaction) -> Result<()> { - self.0.lock().await.txn(t.into()).await?; + async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { + let mut cli = self.0.lock().await; + let txn = Self::make_conditional_txn(&mut cli, txn).await?; + cli.txn(txn).await?; Ok(()) } } -impl From for Txn { - fn from(etcd_txn: super::Transaction) -> Txn { - let txn = Txn::default(); - txn.and_then( - etcd_txn - .into_ops() - .into_iter() - .map(|op| match op { - super::TransactionOp::Put(mut pair) => { - TxnOp::put(pair.take_key(), pair.take_value(), None) - } - super::TransactionOp::Delete(rng) => { - let mut opt = DeleteOptions::new(); - let key = prepare_opt!(opt, rng); - TxnOp::delete(key, Some(opt)) - } - }) - .collect::>(), - ) +impl EtcdStore { + fn collect_leases_needed(txn: &super::Transaction) -> HashSet { + txn.ops + .iter() + .filter_map(|op| match op { + TransactionOp::Put(_, opt) if opt.ttl.as_secs() > 0 => Some(opt.ttl), + _ => None, + }) + .collect() + } + + async fn make_leases( + cli: &mut Client, + needed: HashSet, + ) -> Result> { + let mut map = HashMap::with_capacity(needed.len()); + for lease_time in needed { + let lease_id = cli.lease_grant(lease_time.as_secs() as _, None).await?.id(); + map.insert(lease_time, lease_id); + } + Ok(map) + } + + fn partition_txns(mut txn: super::Transaction, leases: HashMap) -> Vec { + txn.ops + .chunks_mut(128) + .map(|txn| Txn::default().and_then(Self::to_txn(txn, &leases))) + .collect() + } + + fn to_compare(cond: super::Condition) -> Compare { + let op = match cond.result { + Ordering::Less => CompareOp::Less, + Ordering::Equal => CompareOp::Equal, + Ordering::Greater => CompareOp::Greater, + }; + Compare::value(cond.over_key, op, cond.arg) + } + + /// Convert the transcation operations to etcd transcation ops. + fn to_txn(ops: &mut [super::TransactionOp], leases: &HashMap) -> Vec { + ops.iter_mut().map(|op| match op { + TransactionOp::Put(key, opt) => { + let opts = if opt.ttl.as_secs() > 0 { + let lease = leases.get(&opt.ttl); + match lease { + None => { + warn!("lease not found, the request key may not have a ttl"; "dur" => ?opt.ttl); + None + } + Some(lease_id) => { + Some(PutOptions::new().with_lease(*lease_id)) + } + } + } else { + None + }; + TxnOp::put(key.take_key(), key.take_value(), opts) + }, + TransactionOp::Delete(rng) => { + let rng = std::mem::replace(rng, Keys::Key(MetaKey(vec![]))); + let mut opt = DeleteOptions::new(); + let key = prepare_opt!(opt, rng); + TxnOp::delete(key, Some(opt)) + }, + }).collect::>() + } + + /// Make a conditional txn. + /// For now, this wouldn't split huge transaction into smaller ones, + /// so when playing with etcd in PD, conditional transaction should be small. + async fn make_conditional_txn( + cli: &mut Client, + mut txn: super::CondTransaction, + ) -> Result { + let cond = Self::to_compare(txn.cond); + + let mut leases_needed = Self::collect_leases_needed(&txn.success); + leases_needed.extend(Self::collect_leases_needed(&txn.failure).into_iter()); + let leases = Self::make_leases(cli, leases_needed).await?; + let success = Self::to_txn(&mut txn.success.ops, &leases); + let failure = Self::to_txn(&mut txn.failure.ops, &leases); + Ok(Txn::new().when([cond]).and_then(success).or_else(failure)) + } + + async fn make_txn(cli: &mut Client, etcd_txn: super::Transaction) -> Result> { + let (put_cnt, delete_cnt) = etcd_txn.ops.iter().fold((0, 0), |(p, d), item| match item { + TransactionOp::Put(..) => (p + 1, d), + TransactionOp::Delete(_) => (p, d + 1), + }); + METADATA_KEY_OPERATION + .with_label_values(&["put"]) + .inc_by(put_cnt); + METADATA_KEY_OPERATION + .with_label_values(&["del"]) + .inc_by(delete_cnt); + let needed_leases = Self::collect_leases_needed(&etcd_txn); + let leases = Self::make_leases(cli, needed_leases).await?; + let txns = Self::partition_txns(etcd_txn, leases); + Ok(txns) } } diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 61145455419..7e1858b913e 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -140,4 +140,8 @@ impl MetaStore for LazyEtcdClient { async fn txn(&self, txn: super::Transaction) -> Result<()> { self.0.get_cli().await?.txn(txn).await } + + async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { + self.0.get_cli().await?.txn_cond(txn).await + } } diff --git a/components/backup-stream/src/metadata/store/mod.rs b/components/backup-stream/src/metadata/store/mod.rs index 58441d7ba72..0855582da59 100644 --- a/components/backup-stream/src/metadata/store/mod.rs +++ b/components/backup-stream/src/metadata/store/mod.rs @@ -1,11 +1,16 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +pub mod lazy_etcd; + +// Note: these mods also used for integration tests, +// so we cannot compile them only when `#[cfg(test)]`. +// (See https://github.com/rust-lang/rust/issues/84629) +// Maybe we'd better make a feature like `integration-test`? pub mod slash_etc; pub use slash_etc::SlashEtcStore; pub mod etcd; -pub mod lazy_etcd; -use std::{future::Future, pin::Pin}; +use std::{cmp::Ordering, future::Future, pin::Pin, time::Duration}; use async_trait::async_trait; pub use etcd::EtcdStore; @@ -23,25 +28,73 @@ pub struct Transaction { ops: Vec, } +/// A condition for executing a transcation. +/// Compare value a key with arg. +#[derive(Debug)] +pub struct Condition { + over_key: Vec, + result: Ordering, + arg: Vec, +} + +impl Condition { + pub fn new(over_key: MetaKey, result: Ordering, arg: Vec) -> Self { + Self { + over_key: over_key.0, + result, + arg, + } + } +} + +/// A conditional transaction. +/// This would atomicly evalute the condition, and execute corresponding transaction. +#[derive(Debug)] +pub struct CondTransaction { + cond: Condition, + success: Transaction, + failure: Transaction, +} + +impl CondTransaction { + pub fn new(cond: Condition, success: Transaction, failure: Transaction) -> Self { + Self { + cond, + success, + failure, + } + } +} + impl Transaction { fn into_ops(self) -> Vec { self.ops } - fn put(mut self, kv: KeyValue) -> Self { - self.ops.push(TransactionOp::Put(kv)); + pub fn put(mut self, kv: KeyValue) -> Self { + self.ops.push(TransactionOp::Put(kv, PutOption::default())); self } - fn delete(mut self, keys: Keys) -> Self { + pub fn put_opt(mut self, kv: KeyValue, opt: PutOption) -> Self { + self.ops.push(TransactionOp::Put(kv, opt)); + self + } + + pub fn delete(mut self, keys: Keys) -> Self { self.ops.push(TransactionOp::Delete(keys)); self } } +#[derive(Default, Debug)] +pub struct PutOption { + pub ttl: Duration, +} + #[derive(Debug)] pub enum TransactionOp { - Put(KeyValue), + Put(KeyValue, PutOption), Delete(Keys), } @@ -140,8 +193,9 @@ pub trait MetaStore: Clone + Send + Sync { /// Can be canceled then by polling the `cancel` future in the Subscription. async fn watch(&self, keys: Keys, start_rev: i64) -> Result; /// Execute an atomic write (write batch) over the store. - /// Maybe support etcd-like compare operations? async fn txn(&self, txn: Transaction) -> Result<()>; + /// Execute an conditional transaction over the store. + async fn txn_cond(&self, txn: CondTransaction) -> Result<()>; /// Set a key in the store. /// Maybe rename it to `put` to keeping consistency with etcd? diff --git a/components/backup-stream/src/metadata/store/slash_etc.rs b/components/backup-stream/src/metadata/store/slash_etc.rs index 48df7dbaaca..1a2f127501c 100644 --- a/components/backup-stream/src/metadata/store/slash_etc.rs +++ b/components/backup-stream/src/metadata/store/slash_etc.rs @@ -8,14 +8,13 @@ use std::{ }; use async_trait::async_trait; -use slog_global::error; -use tikv_util::warn; use tokio::sync::{ mpsc::{self, Sender}, Mutex, }; use tokio_stream::StreamExt; +use super::{Condition, Keys}; use crate::{ errors::Result, metadata::{ @@ -33,11 +32,34 @@ struct Subscriber { tx: Sender, } +/// A key with revision. +#[derive(Default, Eq, PartialEq, Ord, PartialOrd, Clone)] +struct Key(Vec, i64); + +impl std::fmt::Debug for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Key") + .field(&format_args!( + "{}@{}", + log_wrappers::Value::key(&self.0), + self.1 + )) + .finish() + } +} + +/// A value (maybe tombstone.) +#[derive(Debug, Eq, PartialEq, Clone)] +enum Value { + Val(Vec), + Del, +} + /// An in-memory, single versioned storage. /// Emulating some interfaces of etcd for testing. #[derive(Default)] pub struct SlashEtc { - items: BTreeMap, Vec>, + items: BTreeMap, // Maybe a range tree here if the test gets too slow. subs: HashMap, revision: i64, @@ -54,26 +76,14 @@ impl Snapshot for WithRevision { extra: crate::metadata::store::GetExtra, ) -> Result { let data = self.inner.lock().await; - if data.revision != self.revision { - warn!( - "snapshot expired (multi version isn't supported yet, you may read steal data): {} vs {}", - data.revision, self.revision - ); - } - let (start_key, end_key) = keys.into_bound(); - let mut kvs = data - .items - .range::<[u8], _>(( - Bound::Included(start_key.as_slice()), - Bound::Excluded(end_key.as_slice()), - )) - .map(|(k, v)| KeyValue(MetaKey(k.clone()), v.clone())) - .collect::>(); - // use iterator operations (instead of collect all kv pairs in the range) - // if the test case get too slow. (How can we figure out whether there are more?) + let mut kvs = data.get_key(keys); + if extra.desc_order { kvs.reverse(); } + + // use iterator operations (instead of collect all kv pairs in the range) + // if the test case get too slow. (How can we figure out whether there are more?) let more = if extra.limit > 0 { let more = kvs.len() > extra.limit; kvs.truncate(extra.limit); @@ -90,9 +100,37 @@ impl Snapshot for WithRevision { } impl SlashEtc { + fn alloc_rev(&mut self) -> i64 { + self.revision += 1; + self.revision + } + + fn get_key(&self, keys: super::Keys) -> Vec { + let (start_key, end_key) = keys.into_bound(); + let mvccs = self + .items + .range(( + Bound::Included(&Key(start_key, 0)), + Bound::Excluded(&Key(end_key, 0)), + )) + .collect::>(); + let kvs = mvccs + .as_slice() + .group_by(|k1, k2| k1.0.0 == k2.0.0) + .filter_map(|k| { + let (k, v) = k.last()?; + match v { + Value::Val(val) => Some(KeyValue(MetaKey(k.0.clone()), val.clone())), + Value::Del => None, + } + }) + .collect::>(); + kvs + } + async fn set(&mut self, mut pair: crate::metadata::keys::KeyValue) -> Result<()> { let data = self; - data.revision += 1; + let rev = data.alloc_rev(); for sub in data.subs.values() { if pair.key() < sub.end_key.as_slice() && pair.key() >= sub.start_key.as_slice() { sub.tx @@ -104,33 +142,37 @@ impl SlashEtc { .unwrap(); } } - data.items.insert(pair.take_key(), pair.take_value()); + data.items + .insert(Key(pair.take_key(), rev), Value::Val(pair.take_value())); Ok(()) } async fn delete(&mut self, keys: crate::metadata::store::Keys) -> Result<()> { - let mut data = self; + let data = self; let (start_key, end_key) = keys.into_bound(); - data.revision += 1; - for mut victim in data + let rev = data.alloc_rev(); + let mut v = data .items - .range::<[u8], _>(( - Bound::Included(start_key.as_slice()), - Bound::Excluded(end_key.as_slice()), + .range(( + Bound::Included(Key(start_key, 0)), + Bound::Excluded(Key(end_key, data.revision)), )) - .map(|(k, _)| k.clone()) - .collect::>() - { - data.items.remove(&victim); + .map(|(k, _)| Key::clone(k)) + .collect::>(); + v.dedup_by(|k1, k2| k1.0 == k2.0); + + for mut victim in v { + let k = Key(victim.0.clone(), rev); + data.items.insert(k, Value::Del); for sub in data.subs.values() { - if victim.as_slice() < sub.end_key.as_slice() - && victim.as_slice() >= sub.start_key.as_slice() + if victim.0.as_slice() < sub.end_key.as_slice() + && victim.0.as_slice() >= sub.start_key.as_slice() { sub.tx .send(KvEvent { kind: KvEventType::Delete, - pair: KeyValue(MetaKey(std::mem::take(&mut victim)), vec![]), + pair: KeyValue(MetaKey(std::mem::take(&mut victim.0)), vec![]), }) .await .unwrap(); @@ -139,6 +181,16 @@ impl SlashEtc { } Ok(()) } + + /// A tool for dumpling the whole storage when test failed. + /// Add this to test code temporarily for debugging. + #[allow(dead_code)] + pub fn dump(&self) { + println!(">>>>>>> /etc (revision = {}) <<<<<<<", self.revision); + for (k, v) in self.items.iter() { + println!("{:?} => {:?}", k, v); + } + } } #[async_trait] @@ -158,17 +210,34 @@ impl MetaStore for SlashEtcStore { start_rev: i64, ) -> Result { let mut data = self.lock().await; - if start_rev != data.revision + 1 { - error!( - "start from arbitrary revision is not supported yet; only watch (current_rev + 1) supported. (self.revision = {}; start_rev = {})", - data.revision, start_rev - ); - } let id = data.sub_id_alloc.get(); data.sub_id_alloc.set(id + 1); let this = self.clone(); - let (tx, rx) = mpsc::channel(64); + let (tx, rx) = mpsc::channel(1024); let (start_key, end_key) = keys.into_bound(); + + // Sending events from [start_rev, now) to the client. + let mut pending = data + .items + .iter() + .filter(|(k, _)| k.1 >= start_rev) + .collect::>(); + pending.sort_by_key(|(k, _)| k.1); + for (k, v) in pending { + let event = match v { + Value::Val(val) => KvEvent { + kind: KvEventType::Put, + pair: KeyValue(MetaKey(k.0.clone()), val.clone()), + }, + Value::Del => KvEvent { + kind: KvEventType::Delete, + pair: KeyValue(MetaKey(k.0.clone()), vec![]), + }, + }; + // Note: may panic if too many pending here? + tx.send(event).await.expect("too many pending events"); + } + data.subs.insert( id, Subscriber { @@ -190,10 +259,27 @@ impl MetaStore for SlashEtcStore { let mut data = self.lock().await; for op in txn.into_ops() { match op { - super::TransactionOp::Put(kv) => data.set(kv).await?, + super::TransactionOp::Put(kv, _) => data.set(kv).await?, super::TransactionOp::Delete(range) => data.delete(range).await?, } } Ok(()) } + + async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { + let l = self.lock().await; + let Condition { + over_key, + result, + arg, + } = txn.cond; + let success = l + .get_key(Keys::Key(MetaKey(over_key))) + .last() + .map(|k| k.0.0.cmp(&arg) == result) + .unwrap_or(false); + drop(l); + let do_txn = if success { txn.success } else { txn.failure }; + self.txn(do_txn).await + } } diff --git a/components/backup-stream/src/metadata/test.rs b/components/backup-stream/src/metadata/test.rs index bb5addd24a8..e70ed78b32c 100644 --- a/components/backup-stream/src/metadata/test.rs +++ b/components/backup-stream/src/metadata/test.rs @@ -7,13 +7,21 @@ use std::{ iter::FromIterator, }; -use kvproto::brpb::{Noop, StorageBackend}; +use kvproto::{ + brpb::{Noop, StorageBackend}, + metapb::Region, +}; use tokio_stream::StreamExt; +use txn_types::TimeStamp; use super::{MetadataClient, StreamTask}; use crate::{ errors::Result, - metadata::{store::SlashEtcStore, MetadataEvent}, + metadata::{ + client::{Checkpoint, CheckpointProvider}, + store::SlashEtcStore, + MetadataEvent, + }, }; fn test_meta_cli() -> MetadataClient { @@ -91,6 +99,12 @@ fn task_matches(expected: &[StreamTask], real: &[StreamTask]) { ); } +fn fake_region(id: u64) -> Region { + let mut r = Region::new(); + r.set_id(id); + r +} + #[tokio::test] async fn test_watch() -> Result<()> { let cli = test_meta_cli(); @@ -98,7 +112,7 @@ async fn test_watch() -> Result<()> { cli.insert_task_with_range(&task, &[]).await?; let initial_task_set = cli.get_tasks().await?; task_matches(initial_task_set.inner.as_slice(), &[task]); - let watcher = cli.events_from(initial_task_set.revision).await?; + let watcher = cli.events_from(initial_task_set.revision + 1).await?; let task2 = simple_task("simple_2"); cli.insert_task_with_range(&task2, &[]).await?; cli.remove_task("simple_1").await?; @@ -121,17 +135,97 @@ async fn test_progress() -> Result<()> { let cli = test_meta_cli(); let task = simple_task("simple_1"); cli.insert_task_with_range(&task, &[]).await?; - let progress = cli.progress_of_task(&task.info.name).await?; + let progress = cli.global_progress_of_task(&task.info.name).await?; assert_eq!(progress, task.info.start_ts); - cli.step_task(&task.info.name, 42).await?; - let progress = cli.progress_of_task(&task.info.name).await?; + cli.set_local_task_checkpoint(&task.info.name, 42).await?; + let progress = cli.global_progress_of_task(&task.info.name).await?; assert_eq!(progress, 42); - cli.step_task(&task.info.name, 43).await?; - let progress = cli.progress_of_task(&task.info.name).await?; + cli.set_local_task_checkpoint(&task.info.name, 43).await?; + let progress = cli.global_progress_of_task(&task.info.name).await?; assert_eq!(progress, 43); let other_store = MetadataClient::new(cli.meta_store.clone(), 43); - let progress = other_store.progress_of_task(&task.info.name).await?; - assert_eq!(progress, task.info.start_ts); + let progress = other_store + .get_local_task_checkpoint(&task.info.name) + .await?; + assert_eq!(progress.into_inner(), task.info.start_ts); + + Ok(()) +} + +#[tokio::test] +async fn test_init() -> Result<()> { + let cli = test_meta_cli(); + let mut task = simple_task("simple_2"); + cli.insert_task_with_range(&task, &[]).await?; + task.info.set_start_ts(42); + // Init task should set the checkpoint. + cli.init_task(&task.info).await?; + let progress = cli.global_progress_of_task(&task.info.name).await?; + assert_eq!(progress, 42); + cli.set_local_task_checkpoint(&task.info.name, 43).await?; + + // Init task again shouldn't roll back checkpoint. + cli.init_task(&task.info).await?; + let progress = cli.global_progress_of_task(&task.info.name).await?; + assert_eq!(progress, 43); + + Ok(()) +} + +#[tokio::test] +async fn test_region_checkpoint() -> Result<()> { + let cli = test_meta_cli(); + let task = simple_task("simple_2"); + cli.insert_task_with_range(&task, &[]).await?; + let cps = [ + (fake_region(1), TimeStamp::new(42)), + (fake_region(2), TimeStamp::new(64)), + ]; + cli.upload_region_checkpoint("simple_2", &cps).await?; + cli.set_local_task_checkpoint("simple_2", 50).await?; + + let rcp = cli + .get_region_checkpoint("simple_2", &fake_region(1)) + .await?; + assert_eq!( + rcp, + Checkpoint { + provider: CheckpointProvider::Region { id: 1, version: 0 }, + ts: TimeStamp::new(42) + } + ); + let gcp = cli + .get_region_checkpoint("simple_2", &fake_region(3)) + .await?; + assert_eq!( + gcp, + Checkpoint { + provider: CheckpointProvider::Store(42), + ts: TimeStamp::new(50) + } + ); + cli.clear_region_checkpoint("simple_2", &[fake_region(1)]) + .await?; + let rcp = cli + .get_region_checkpoint("simple_2", &fake_region(2)) + .await?; + assert_eq!( + rcp, + Checkpoint { + provider: CheckpointProvider::Region { id: 2, version: 0 }, + ts: TimeStamp::new(64) + } + ); + let gcp = cli + .get_region_checkpoint("simple_2", &fake_region(1)) + .await?; + assert_eq!( + gcp, + Checkpoint { + provider: CheckpointProvider::Store(42), + ts: TimeStamp::new(50) + } + ); Ok(()) } diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index 8ac5b30b000..a27dd1ea33b 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -25,6 +25,10 @@ pub fn update_task_status(status: TaskStatus, task: &str) { } } +pub fn remove_task_status_metric(task: &str) -> Result<()> { + TASK_STATUS.remove_label_values(&[task]) +} + lazy_static! { pub static ref INTERNAL_ACTOR_MESSAGE_HANDLE_DURATION: HistogramVec = register_histogram_vec!( "tikv_log_backup_interal_actor_acting_duration_sec", @@ -63,13 +67,13 @@ lazy_static! { "The total kv size skipped by the streaming", ) .unwrap(); - pub static ref STREAM_ERROR: CounterVec = register_counter_vec!( + pub static ref STREAM_ERROR: IntCounterVec = register_int_counter_vec!( "tikv_stream_errors", "The errors during stream backup.", &["type"] ) .unwrap(); - pub static ref STREAM_FATAL_ERROR: CounterVec = register_counter_vec!( + pub static ref STREAM_FATAL_ERROR: IntCounterVec = register_int_counter_vec!( "tikv_log_backup_fatal_errors", "The errors during stream backup.", &["type"] @@ -129,10 +133,9 @@ lazy_static! { "When gt 0, this node enabled streaming." ) .unwrap(); - pub static ref TRACK_REGION: IntCounterVec = register_int_counter_vec!( + pub static ref TRACK_REGION: IntGauge = register_int_gauge!( "tikv_stream_observed_region", "the region being observed by the current store.", - &["type"], ) .unwrap(); static ref TASK_STATUS: IntGaugeVec = register_int_gauge_vec!( @@ -141,4 +144,10 @@ lazy_static! { &["task"] ) .unwrap(); + pub static ref PENDING_INITIAL_SCAN_LEN: IntGaugeVec = register_int_gauge_vec!( + "pending_initial_scan", + "The pending initial scan", + &["stage"] + ) + .unwrap(); } diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index 02c63f62a60..ad22b67e145 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -71,7 +71,6 @@ impl BackupStreamObserver { .scheduler .schedule(Task::ModifyObserve(ObserveOp::Start { region: region.clone(), - needs_initial_scanning: true, })) { use crate::errors::Error; @@ -137,7 +136,6 @@ impl CmdObserver for BackupStreamObserver { self.scheduler, Task::ModifyObserve(ObserveOp::Start { region: region.clone(), - needs_initial_scanning: true, }) ); if success { @@ -174,7 +172,7 @@ impl RegionChangeObserver for BackupStreamObserver { RegionChangeEvent::Destroy => { try_send!( self.scheduler, - Task::ModifyObserve(ObserveOp::CheckEpochAndStop { + Task::ModifyObserve(ObserveOp::Destroy { region: ctx.region().clone(), }) ); diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 8db9244d916..dec4baeae89 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -42,7 +42,7 @@ use tokio::{ sync::{Mutex, RwLock}, }; use tokio_util::compat::TokioAsyncReadCompatExt; -use txn_types::{Key, Lock, TimeStamp}; +use txn_types::{Key, Lock, TimeStamp, WriteRef}; use super::errors::Result; use crate::{ @@ -56,8 +56,12 @@ use crate::{ utils::{self, SegmentMap, Slot, SlotMap, StopWatch}, }; -pub const FLUSH_STORAGE_INTERVAL: u64 = 300; -pub const FLUSH_FAILURE_BECOME_FATAL_THRESHOLD: usize = 16; +const FLUSH_FAILURE_BECOME_FATAL_THRESHOLD: usize = 30; + +/// FLUSH_LOG_CONCURRENT_BATCH_COUNT specifies the concurrent count to write to storage. +/// 'Log backup' will produce a large mount of small files during flush interval, +/// and storage could take mistaken if writing all of these files to storage concurrently. +const FLUSH_LOG_CONCURRENT_BATCH_COUNT: usize = 128; #[derive(Debug)] pub struct ApplyEvent { @@ -476,7 +480,6 @@ impl RouterInner { let result = task_info.do_flush(store_id, resolve_to).await; // set false to flushing whether success or fail task_info.set_flushing_status(false); - task_info.update_flush_time(); if let Err(e) = result { e.report("failed to flush task."); @@ -490,6 +493,8 @@ impl RouterInner { } return None; } + // if succeed in flushing, update flush_time. Or retry do_flush immediately. + task_info.update_flush_time(); result.ok().flatten() } _ => None, @@ -601,20 +606,24 @@ impl TempFileKey { return dt.format("%Y%m%d"); } + /// path_to_log_file specifies the path of record log. + /// eg. "v1/20220625/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log" fn path_to_log_file(&self, min_ts: u64, max_ts: u64) -> String { format!( - "v1/t{:08}/{}-{:012}-{}.log", - self.table_id, + "v1/{}/t{:08}/{:012}-{}.log", // We may delete a range of files, so using the max_ts for preventing remove some records wrong. Self::format_date_time(max_ts), + self.table_id, min_ts, uuid::Uuid::new_v4() ) } + /// path_to_schema_file specifies the path of schema log. + /// eg. "v1/20220625/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log" fn path_to_schema_file(min_ts: u64, max_ts: u64) -> String { format!( - "v1/schema-meta/{}-{:012}-{}.log", + "v1/{}/schema-meta/{:012}-{}.log", Self::format_date_time(max_ts), min_ts, uuid::Uuid::new_v4(), @@ -658,6 +667,20 @@ pub struct StreamTaskInfo { flush_fail_count: AtomicUsize, } +impl Drop for StreamTaskInfo { + fn drop(&mut self) { + let (success, failed): (Vec<_>, Vec<_>) = self + .flushing_files + .get_mut() + .drain(..) + .chain(self.files.get_mut().drain()) + .map(|(_, f)| f.into_inner().local_path) + .map(std::fs::remove_file) + .partition(|r| r.is_ok()); + info!("stream task info dropped, removing temp files"; "success" => %success.len(), "failure" => %failed.len()) + } +} + impl std::fmt::Debug for StreamTaskInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StreamTaskInfo") @@ -803,6 +826,11 @@ impl StreamTaskInfo { /// move need-flushing files to flushing_files. pub async fn move_to_flushing_files(&self) -> &Self { + // if flushing_files is not empty, which represents this flush is a retry operation. + if !self.flushing_files.read().await.is_empty() { + return self; + } + let mut w = self.files.write().await; let mut fw = self.flushing_files.write().await; for (k, v) in w.drain() { @@ -863,10 +891,14 @@ impl StreamTaskInfo { // if failed to write storage, we should retry write flushing_files. let storage = self.storage.clone(); let files = self.flushing_files.write().await; - let futs = files - .iter() - .map(|(_, v)| Self::flush_log_file_to(storage.clone(), v)); - futures::future::try_join_all(futs).await?; + + for batch_files in files.chunks(FLUSH_LOG_CONCURRENT_BATCH_COUNT) { + let futs = batch_files + .iter() + .map(|(_, v)| Self::flush_log_file_to(storage.clone(), v)); + futures::future::try_join_all(futs).await?; + } + Ok(()) } @@ -967,6 +999,7 @@ struct DataFile { min_ts: TimeStamp, max_ts: TimeStamp, resolved_ts: TimeStamp, + min_begin_ts: Option, sha256: Hasher, inner: BufWriter, start_key: Vec, @@ -981,6 +1014,8 @@ struct DataFile { pub struct MetadataInfo { pub files: Vec, pub min_resolved_ts: Option, + pub min_ts: Option, + pub max_ts: Option, pub store_id: u64, } @@ -989,6 +1024,8 @@ impl MetadataInfo { Self { files: Vec::with_capacity(cap), min_resolved_ts: None, + min_ts: None, + max_ts: None, store_id: 0, } } @@ -1000,6 +1037,12 @@ impl MetadataInfo { fn push(&mut self, file: DataFileInfo) { let rts = file.resolved_ts; self.min_resolved_ts = self.min_resolved_ts.map_or(Some(rts), |r| Some(r.min(rts))); + self.min_ts = self + .min_ts + .map_or(Some(file.min_ts), |ts| Some(ts.min(file.min_ts))); + self.max_ts = self + .max_ts + .map_or(Some(file.max_ts), |ts| Some(ts.max(file.max_ts))); self.files.push(file); } @@ -1007,7 +1050,9 @@ impl MetadataInfo { let mut metadata = Metadata::new(); metadata.set_files(self.files.into()); metadata.set_store_id(self.store_id as _); - metadata.set_resolved_ts(self.min_resolved_ts.unwrap_or_default() as _); + metadata.set_resolved_ts(self.min_resolved_ts.unwrap_or_default()); + metadata.set_min_ts(self.min_ts.unwrap_or(0)); + metadata.set_max_ts(self.max_ts.unwrap_or(0)); metadata .write_to_bytes() @@ -1033,6 +1078,7 @@ impl DataFile { min_ts: TimeStamp::max(), max_ts: TimeStamp::zero(), resolved_ts: TimeStamp::zero(), + min_begin_ts: None, inner: BufWriter::with_capacity(128 * 1024, File::create(local_path.as_ref()).await?), sha256, number_of_entries: 0, @@ -1048,10 +1094,23 @@ impl DataFile { remove_file(&self.local_path).await } + fn decode_begin_ts(value: Vec) -> Result { + WriteRef::parse(&value).map_or_else( + |e| { + Err(Error::Other(box_err!( + "failed to parse write cf value: {}", + e + ))) + }, + |w| Ok(w.start_ts), + ) + } + /// Add a new KV pair to the file, returning its size. async fn on_events(&mut self, events: ApplyEvents) -> Result { let now = Instant::now_coarse(); let mut total_size = 0; + for mut event in events.events { let encoded = EventEncoder::encode_event(&event.key, &event.value); let mut size = 0; @@ -1069,6 +1128,13 @@ impl DataFile { self.min_ts = self.min_ts.min(ts); self.max_ts = self.max_ts.max(ts); self.resolved_ts = self.resolved_ts.max(events.region_resolved_ts.into()); + + // decode_begin_ts is used to maintain the txn when restore log. + // if value is empty, no need to decode begin_ts. + if event.cf == CF_WRITE && !event.value.is_empty() { + let begin_ts = Self::decode_begin_ts(event.value)?; + self.min_begin_ts = Some(self.min_begin_ts.map_or(begin_ts, |ts| ts.min(begin_ts))); + } self.number_of_entries += 1; self.file_size += size; self.update_key_bound(key.into_encoded()); @@ -1117,6 +1183,10 @@ impl DataFile { meta.set_max_ts(self.max_ts.into_inner() as _); meta.set_min_ts(self.min_ts.into_inner() as _); meta.set_resolved_ts(self.resolved_ts.into_inner() as _); + meta.set_min_begin_ts_in_default_cf( + self.min_begin_ts + .map_or(self.min_ts.into_inner(), |ts| ts.into_inner()), + ); meta.set_start_key(std::mem::take(&mut self.start_key)); meta.set_end_key(std::mem::take(&mut self.end_key)); meta.set_length(self.file_size as _); @@ -1161,6 +1231,7 @@ mod tests { codec::number::NumberEncoder, worker::{dummy_scheduler, ReceiverWrapper}, }; + use txn_types::{Write, WriteType}; use super::*; use crate::utils; @@ -1181,6 +1252,12 @@ mod tests { table_key } + fn make_value(t: WriteType, value: &[u8], start_ts: u64) -> Vec { + let start_ts = TimeStamp::new(start_ts); + let w = Write::new(t, start_ts, Some(value.to_vec())); + w.as_ref().to_bytes() + } + impl KvEventsBuilder { fn new(region_id: u64, region_resolved_ts: u64) -> Self { Self { @@ -1219,9 +1296,14 @@ mod tests { }) } - fn put_table(&mut self, cf: &'static str, table: i64, key: &[u8], value: &[u8]) { + fn put_table(&mut self, cf: CfName, table: i64, key: &[u8], value: &[u8]) { let table_key = make_table_key(table, key); - self.put_event(cf, table_key, value.to_vec()); + let value = if cf == CF_WRITE { + make_value(WriteType::Put, value, 12345) + } else { + value.to_vec() + }; + self.put_event(cf, table_key, value); } fn delete_table(&mut self, cf: &'static str, table: i64, key: &[u8]) { @@ -1229,7 +1311,7 @@ mod tests { self.delete_event(cf, table_key); } - fn flush_events(&mut self) -> ApplyEvents { + fn finish(&mut self) -> ApplyEvents { let region_id = self.events.region_id; let region_resolved_ts = self.events.region_resolved_ts; std::mem::replace( @@ -1326,15 +1408,7 @@ mod tests { } } - #[tokio::test] - async fn test_basic_file() -> Result<()> { - let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); - tokio::fs::create_dir_all(&tmp).await?; - let (tx, rx) = dummy_scheduler(); - let router = RouterInner::new(tmp.clone(), tx, 32, Duration::from_secs(300)); - let (stream_task, storage_path) = task("dummy".to_owned()).await?; - must_register_table(&router, stream_task, 1).await; - + async fn write_simple_data(router: &RouterInner) -> u64 { let now = TimeStamp::physical_now(); let mut region1 = KvEventsBuilder::new(1, now); let start_ts = TimeStamp::physical_now(); @@ -1345,8 +1419,21 @@ mod tests { region1.put_table(CF_WRITE, 2, b"hello", b"this isn't a write record :3"); region1.put_table(CF_WRITE, 1, b"hello", b"still isn't a write record :3"); region1.delete_table(CF_DEFAULT, 1, b"hello"); - let events = region1.flush_events(); + let events = region1.finish(); check_on_events_result(&router.on_events(events).await); + start_ts + } + + #[tokio::test] + async fn test_basic_file() -> Result<()> { + let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); + tokio::fs::create_dir_all(&tmp).await?; + let (tx, rx) = dummy_scheduler(); + let router = RouterInner::new(tmp.clone(), tx, 32, Duration::from_secs(300)); + let (stream_task, storage_path) = task("dummy".to_owned()).await?; + must_register_table(&router, stream_task, 1).await; + + let start_ts = write_simple_data(&router).await; tokio::time::sleep(Duration::from_millis(200)).await; let end_ts = TimeStamp::physical_now(); @@ -1405,6 +1492,58 @@ mod tests { Ok(()) } + fn mock_build_kv_events(table_id: i64, region_id: u64, resolved_ts: u64) -> ApplyEvents { + let mut events_builder = KvEventsBuilder::new(region_id, resolved_ts); + events_builder.put_table("default", table_id, b"hello", b"world"); + events_builder.finish() + } + + #[tokio::test] + async fn test_do_flush() { + let tmp_dir = tempfile::tempdir().unwrap(); + let backend = external_storage_export::make_local_backend(tmp_dir.path()); + let mut task_info = StreamBackupTaskInfo::default(); + task_info.set_storage(backend); + let stream_task = StreamTask { + info: task_info, + is_paused: false, + }; + let task = StreamTaskInfo::new( + tmp_dir.path().to_path_buf(), + stream_task, + Duration::from_secs(300), + ) + .await + .unwrap(); + + // on_event + let region_count = FLUSH_LOG_CONCURRENT_BATCH_COUNT + 5; + for i in 1..=region_count { + let kv_events = mock_build_kv_events(i as _, i as _, i as _); + task.on_events(kv_events).await.unwrap(); + } + // do_flush + task.set_flushing_status(true); + task.do_flush(1, TimeStamp::new(1)).await.unwrap(); + assert_eq!(task.flush_failure_count(), 0); + assert_eq!(task.files.read().await.is_empty(), true); + assert_eq!(task.flushing_files.read().await.is_empty(), true); + + // assert backup log files + let mut meta_count = 0; + let mut log_count = 0; + for entry in walkdir::WalkDir::new(tmp_dir.path()) { + let entry = entry.unwrap(); + if entry.path().extension() == Some(OsStr::new("meta")) { + meta_count += 1; + } else if entry.path().extension() == Some(OsStr::new("log")) { + log_count += 1; + } + } + assert_eq!(meta_count, 1); + assert_eq!(log_count, region_count); + } + struct ErrorStorage { inner: Inner, error_on_write: Box io::Result<()> + Send + Sync>, @@ -1507,8 +1646,12 @@ mod tests { .is_none() ); check_on_events_result(&router.on_events(build_kv_event(10, 10)).await); - let _ = router.do_flush("error_prone", 42, TimeStamp::max()).await; let t = router.get_task_info("error_prone").await.unwrap(); + let _ = router.do_flush("error_prone", 42, TimeStamp::max()).await; + assert_eq!(t.total_size() > 0, true); + + t.set_flushing_status(true); + let _ = router.do_flush("error_prone", 42, TimeStamp::max()).await; assert_eq!(t.total_size(), 0); Ok(()) } @@ -1539,6 +1682,47 @@ mod tests { assert_eq!(ts.into_inner(), rts); } + #[tokio::test] + async fn test_cleanup_when_stop() -> Result<()> { + let (tx, _rx) = dummy_scheduler(); + let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); + let router = Arc::new(RouterInner::new( + tmp.clone(), + tx, + 1, + Duration::from_secs(300), + )); + let (task, _path) = task("cleanup_test".to_owned()).await?; + must_register_table(&router, task, 1).await; + write_simple_data(&router).await; + router + .get_task_info("cleanup_test") + .await? + .move_to_flushing_files() + .await; + write_simple_data(&router).await; + let mut w = walkdir::WalkDir::new(&tmp).into_iter(); + assert!(w.next().is_some(), "the temp files doesn't created"); + drop(router); + let w = walkdir::WalkDir::new(&tmp) + .into_iter() + .filter_map(|entry| { + let e = entry.unwrap(); + e.path() + .extension() + .filter(|x| x.to_string_lossy() == "log") + .map(|_| e.clone()) + }) + .collect::>(); + + assert!( + w.is_empty(), + "the temp files should be removed, but it is {:?}", + w + ); + Ok(()) + } + #[tokio::test] async fn test_flush_with_pausing_self() -> Result<()> { let (tx, rx) = dummy_scheduler(); @@ -1556,8 +1740,8 @@ mod tests { i.storage = Arc::new(ErrorStorage::with_always_error(i.storage.clone())) }) .await; - for i in 0..=16 { - check_on_events_result(&router.on_events(build_kv_event(i * 10, 10)).await); + for i in 0..=FLUSH_FAILURE_BECOME_FATAL_THRESHOLD { + check_on_events_result(&router.on_events(build_kv_event((i * 10) as _, 10)).await); assert_eq!( router .do_flush("flush_failure", 42, TimeStamp::zero()) @@ -1585,4 +1769,14 @@ mod tests { let s = s.to_string(); assert_eq!(s, "20220307"); } + + #[test] + fn test_decode_begin_ts() { + let start_ts = TimeStamp::new(12345678); + let w = Write::new(WriteType::Put, start_ts, Some(b"short_value".to_vec())); + let value = w.as_ref().to_bytes(); + + let begin_ts = DataFile::decode_begin_ts(value).unwrap(); + assert_eq!(begin_ts, start_ts); + } } diff --git a/components/backup-stream/src/service.rs b/components/backup-stream/src/service.rs new file mode 100644 index 00000000000..47a149973b2 --- /dev/null +++ b/components/backup-stream/src/service.rs @@ -0,0 +1,92 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::collections::HashSet; + +use grpcio::RpcContext; +use kvproto::{logbackuppb::*, metapb::Region}; +use tikv_util::{warn, worker::Scheduler}; + +use crate::{ + checkpoint_manager::{GetCheckpointResult, RegionIdWithVersion}, + endpoint::{RegionCheckpointOperation, RegionSet}, + try_send, Task, +}; + +#[derive(Clone)] +pub struct Service { + endpoint: Scheduler, +} + +impl Service { + pub fn new(endpoint: Scheduler) -> Self { + Self { endpoint } + } +} + +fn id_of(region: &Region) -> RegionIdentity { + let mut id = RegionIdentity::new(); + id.set_id(region.get_id()); + id.set_epoch_version(region.get_region_epoch().get_version()); + id +} + +impl From for RegionIdentity { + fn from(val: RegionIdWithVersion) -> Self { + let mut id = RegionIdentity::new(); + id.set_id(val.region_id); + id.set_epoch_version(val.region_epoch_version); + id + } +} + +impl LogBackup for Service { + fn get_last_flush_ts_of_region( + &mut self, + _ctx: RpcContext<'_>, + mut req: GetLastFlushTsOfRegionRequest, + sink: grpcio::UnarySink, + ) { + let regions = req + .take_regions() + .into_iter() + .map(|id| (id.id, id.epoch_version)) + .collect::>(); + let t = Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( + RegionSet::Regions(regions), + Box::new(move |rs| { + let mut resp = GetLastFlushTsOfRegionResponse::new(); + resp.set_checkpoints( + rs.into_iter() + .map(|r| match r { + GetCheckpointResult::Ok { region, checkpoint } => { + let mut r = RegionCheckpoint::new(); + let id = id_of(®ion); + r.set_region(id); + r.set_checkpoint(checkpoint.into_inner()); + r + } + GetCheckpointResult::NotFound { id, err } => { + let mut r = RegionCheckpoint::new(); + r.set_region(id.into()); + r.set_err(err); + r + } + GetCheckpointResult::EpochNotMatch { region, err } => { + let mut r = RegionCheckpoint::new(); + r.set_region(id_of(®ion)); + r.set_err(err); + r + } + }) + .collect(), + ); + tokio::spawn(async { + if let Err(e) = sink.success(resp).await { + warn!("failed to reply grpc resonse."; "err" => %e) + } + }); + }), + )); + try_send!(self.endpoint, t); + } +} diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs new file mode 100644 index 00000000000..fc4f0e2d4a7 --- /dev/null +++ b/components/backup-stream/src/subscription_manager.rs @@ -0,0 +1,650 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::Duration, +}; + +use crossbeam::channel::{Receiver as SyncReceiver, Sender as SyncSender}; +use crossbeam_channel::SendError; +use engine_traits::KvEngine; +use error_code::{backup_stream::OBSERVE_CANCELED, ErrorCodeExt}; +use futures::FutureExt; +use kvproto::metapb::Region; +use pd_client::PdClient; +use raft::StateRole; +use raftstore::{ + coprocessor::{ObserveHandle, RegionInfoProvider}, + router::RaftStoreRouter, + store::fsm::ChangeObserver, +}; +use tikv::storage::Statistics; +use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; +use tokio::sync::mpsc::{channel, Receiver, Sender}; +use txn_types::TimeStamp; +use yatp::task::callback::Handle as YatpHandle; + +use crate::{ + annotate, + endpoint::ObserveOp, + errors::{Error, Result}, + event_loader::InitialDataLoader, + future, + metadata::{store::MetaStore, CheckpointProvider, MetadataClient}, + metrics, + observer::BackupStreamObserver, + router::Router, + subscription_track::SubscriptionTracer, + try_send, + utils::{self, CallbackWaitGroup, Work}, + Task, +}; + +type ScanPool = yatp::ThreadPool; + +/// a request for doing initial scanning. +struct ScanCmd { + region: Region, + handle: ObserveHandle, + last_checkpoint: TimeStamp, + work: Work, +} + +/// The response of requesting resolve the new checkpoint of regions. +pub struct ResolvedRegions { + items: Vec<(Region, TimeStamp)>, + checkpoint: TimeStamp, +} + +impl ResolvedRegions { + /// compose the calculated global checkpoint and region checkpoints. + /// note: maybe we can compute the global checkpoint internal and getting the interface clear. + /// however we must take the `min_ts` or we cannot provide valid global checkpoint if there + /// isn't any region checkpoint. + pub fn new(checkpoint: TimeStamp, checkpoints: Vec<(Region, TimeStamp)>) -> Self { + Self { + items: checkpoints, + checkpoint, + } + } + + /// take the region checkpoints from the structure. + pub fn take_region_checkpoints(&mut self) -> Vec<(Region, TimeStamp)> { + std::mem::take(&mut self.items) + } + + /// get the global checkpoint. + pub fn global_checkpoint(&self) -> TimeStamp { + self.checkpoint + } +} + +/// the abstraction over a "DB" which provides the initial scanning. +trait InitialScan: Clone { + fn do_initial_scan( + &self, + region: &Region, + start_ts: TimeStamp, + handle: ObserveHandle, + on_finish: impl FnOnce() + Send + 'static, + ) -> Result; +} + +impl InitialScan for InitialDataLoader +where + E: KvEngine, + R: RegionInfoProvider + Clone + 'static, + RT: RaftStoreRouter, +{ + fn do_initial_scan( + &self, + region: &Region, + start_ts: TimeStamp, + handle: ObserveHandle, + on_finish: impl FnOnce() + Send + 'static, + ) -> Result { + let region_id = region.get_id(); + let snap = self.observe_over_with_retry(region, move || { + ChangeObserver::from_pitr(region_id, handle.clone()) + })?; + let stat = self.do_initial_scan(region, start_ts, snap, on_finish)?; + Ok(stat) + } +} + +impl ScanCmd { + /// execute the initial scanning via the specificated [`InitialDataLoader`]. + fn exec_by(self, initial_scan: impl InitialScan) -> Result<()> { + let Self { + region, + handle, + last_checkpoint, + work, + } = self; + let begin = Instant::now_coarse(); + let stat = + initial_scan.do_initial_scan(®ion, last_checkpoint, handle, move || drop(work))?; + info!("initial scanning of leader transforming finished!"; "takes" => ?begin.saturating_elapsed(), "region" => %region.get_id(), "from_ts" => %last_checkpoint); + utils::record_cf_stat("lock", &stat.lock); + utils::record_cf_stat("write", &stat.write); + utils::record_cf_stat("default", &stat.data); + Ok(()) + } +} + +fn scan_executor_loop( + init: impl InitialScan, + cmds: SyncReceiver, + canceled: Arc, +) { + while let Ok(cmd) = cmds.recv() { + #[cfg(feature = "failpoints")] + fail::fail_point!("execute_scan_command"); + debug!("handling initial scan request"; "region_id" => %cmd.region.get_id()); + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["queuing"]) + .dec(); + if canceled.load(Ordering::Acquire) { + return; + } + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["executing"]) + .inc(); + let region_id = cmd.region.get_id(); + if let Err(err) = cmd.exec_by(init.clone()) { + if err.error_code() != OBSERVE_CANCELED { + err.report(format!("during initial scanning of region {}", region_id)); + } + } + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["executing"]) + .dec(); + } +} + +/// spawn the executors in the scan pool. +/// we make workers thread instead of spawn scan task directly into the pool because the [`InitialDataLoader`] isn't `Sync` hence +/// we must use it very carefully or rustc (along with tokio) would complain that we made a `!Send` future. +/// so we have moved the data loader to the synchronous context so its reference won't be shared between threads any more. +fn spawn_executors(init: impl InitialScan + Send + 'static, number: usize) -> ScanPoolHandle { + let (tx, rx) = crossbeam::channel::bounded(MESSAGE_BUFFER_SIZE); + let pool = create_scan_pool(number); + let stopped = Arc::new(AtomicBool::new(false)); + for _ in 0..number { + let init = init.clone(); + let rx = rx.clone(); + let stopped = stopped.clone(); + pool.spawn(move |_: &mut YatpHandle<'_>| { + tikv_alloc::add_thread_memory_accessor(); + let _io_guard = file_system::WithIOType::new(file_system::IOType::Replication); + scan_executor_loop(init, rx, stopped); + tikv_alloc::remove_thread_memory_accessor(); + }) + } + ScanPoolHandle { + tx, + _pool: pool, + stopped, + } +} + +struct ScanPoolHandle { + tx: SyncSender, + stopped: Arc, + + // in fact, we won't use the pool any more. + // but we should hold the reference to the pool so it won't try to join the threads running. + _pool: ScanPool, +} + +impl Drop for ScanPoolHandle { + fn drop(&mut self) { + self.stopped.store(true, Ordering::Release); + } +} + +impl ScanPoolHandle { + fn request(&self, cmd: ScanCmd) -> std::result::Result<(), SendError> { + if self.stopped.load(Ordering::Acquire) { + warn!("scan pool is stopped, ignore the scan command"; "region" => %cmd.region.get_id()); + return Ok(()); + } + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["queuing"]) + .inc(); + self.tx.send(cmd) + } +} + +/// The default channel size. +const MESSAGE_BUFFER_SIZE: usize = 4096; + +/// The operator for region subscription. +/// It make a queue for operations over the `SubscriptionTracer`, generally, +/// we should only modify the `SubscriptionTracer` itself (i.e. insert records, remove records) at here. +/// So the order subscription / desubscription won't be broken. +pub struct RegionSubscriptionManager { + // Note: these fields appear everywhere, maybe make them a `context` type? + regions: R, + meta_cli: MetadataClient, + pd_client: Arc, + range_router: Router, + scheduler: Scheduler, + observer: BackupStreamObserver, + subs: SubscriptionTracer, + + messenger: Sender, + scan_pool_handle: Arc, + scans: Arc, +} + +impl Clone for RegionSubscriptionManager +where + S: MetaStore + 'static, + R: RegionInfoProvider + Clone + 'static, + PDC: PdClient + 'static, +{ + fn clone(&self) -> Self { + Self { + regions: self.regions.clone(), + meta_cli: self.meta_cli.clone(), + // We should manually call Arc::clone here or rustc complains that `PDC` isn't `Clone`. + pd_client: Arc::clone(&self.pd_client), + range_router: self.range_router.clone(), + scheduler: self.scheduler.clone(), + observer: self.observer.clone(), + subs: self.subs.clone(), + messenger: self.messenger.clone(), + scan_pool_handle: self.scan_pool_handle.clone(), + scans: CallbackWaitGroup::new(), + } + } +} + +/// Create a yatp pool for doing initial scanning. +fn create_scan_pool(num_threads: usize) -> ScanPool { + yatp::Builder::new("log-backup-scan") + .max_thread_count(num_threads) + .build_callback_pool() +} + +impl RegionSubscriptionManager +where + S: MetaStore + 'static, + R: RegionInfoProvider + Clone + 'static, + PDC: PdClient + 'static, +{ + /// create a [`RegionSubscriptionManager`]. + /// + /// # returns + /// + /// a two-tuple, the first is the handle to the manager, the second is the operator loop future. + pub fn start( + initial_loader: InitialDataLoader, + observer: BackupStreamObserver, + meta_cli: MetadataClient, + pd_client: Arc, + scan_pool_size: usize, + ) -> (Self, future![()]) + where + E: KvEngine, + RT: RaftStoreRouter + 'static, + { + let (tx, rx) = channel(MESSAGE_BUFFER_SIZE); + let scan_pool_handle = spawn_executors(initial_loader.clone(), scan_pool_size); + let op = Self { + regions: initial_loader.regions.clone(), + meta_cli, + pd_client, + range_router: initial_loader.sink.clone(), + scheduler: initial_loader.scheduler.clone(), + observer, + subs: initial_loader.tracing, + messenger: tx, + scan_pool_handle: Arc::new(scan_pool_handle), + scans: CallbackWaitGroup::new(), + }; + let fut = op.clone().region_operator_loop(rx); + (op, fut) + } + + /// send an operation request to the manager. + /// the returned future would be resolved after send is success. + /// the opeartion would be executed asynchronously. + pub async fn request(&self, op: ObserveOp) { + if let Err(err) = self.messenger.send(op).await { + annotate!(err, "BUG: region operator channel closed.") + .report("when executing region op"); + } + } + + /// wait initial scanning get finished. + pub fn wait(&self, timeout: Duration) -> future![bool] { + tokio::time::timeout(timeout, self.scans.wait()).map(|result| result.is_err()) + } + + /// the handler loop. + async fn region_operator_loop(self, mut message_box: Receiver) { + while let Some(op) = message_box.recv().await { + info!("backup stream: on_modify_observe"; "op" => ?op); + match op { + ObserveOp::Start { region } => { + #[cfg(feature = "failpoints")] + fail::fail_point!("delay_on_start_observe"); + self.start_observe(region).await; + metrics::INITIAL_SCAN_REASON + .with_label_values(&["leader-changed"]) + .inc(); + crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.fetch_sub(1, Ordering::SeqCst); + } + ObserveOp::Stop { ref region } => { + self.subs.deregister_region_if(region, |_, _| true); + } + ObserveOp::Destroy { ref region } => { + let stopped = self.subs.deregister_region_if(region, |old, new| { + raftstore::store::util::compare_region_epoch( + old.meta.get_region_epoch(), + new, + true, + true, + false, + ) + .map_err(|err| warn!("check epoch and stop failed."; "err" => %err)) + .is_ok() + }); + if stopped { + self.subs.destroy_stopped_region(region.get_id()); + } + } + ObserveOp::RefreshResolver { ref region } => self.refresh_resolver(region).await, + ObserveOp::NotifyFailToStartObserve { + region, + handle, + err, + } => { + info!("retry observe region"; "region" => %region.get_id(), "err" => %err); + // No need for retrying observe canceled. + if err.error_code() == error_code::backup_stream::OBSERVE_CANCELED { + return; + } + match self.retry_observe(region, handle).await { + Ok(()) => {} + Err(e) => { + self.fatal( + e, + format!("While retring to observe region, origin error is {}", err), + ); + } + } + } + ObserveOp::ResolveRegions { callback, min_ts } => { + let now = Instant::now(); + let timedout = self.wait(Duration::from_secs(30)).await; + if timedout { + warn!("waiting for initial scanning done timed out, forcing progress(with risk of data loss)!"; + "take" => ?now.saturating_elapsed(), "timedout" => %timedout); + } + let cps = self.subs.resolve_with(min_ts); + let min_region = cps.iter().min_by_key(|(_, rts)| rts); + // If there isn't any region observed, the `min_ts` can be used as resolved ts safely. + let rts = min_region.map(|(_, rts)| *rts).unwrap_or(min_ts); + info!("getting checkpoint"; "defined_by_region" => ?min_region.map(|r| r.0.get_id()), "checkpoint" => %rts); + self.subs.warn_if_gap_too_huge(rts); + callback(ResolvedRegions::new(rts, cps)); + } + } + } + } + + fn fatal(&self, err: Error, message: String) { + try_send!(self.scheduler, Task::FatalError(message, Box::new(err))); + } + + async fn refresh_resolver(&self, region: &Region) { + let need_refresh_all = !self.subs.try_update_region(region); + + if need_refresh_all { + let canceled = self.subs.deregister_region_if(region, |_, _| true); + let handle = ObserveHandle::new(); + if canceled { + let for_task = self.find_task_by_region(region).unwrap_or_else(|| { + panic!( + "BUG: the region {:?} is register to no task but being observed", + region + ) + }); + metrics::INITIAL_SCAN_REASON + .with_label_values(&["region-changed"]) + .inc(); + let r = async { + self.observe_over_with_initial_data_from_checkpoint( + region, + self.get_last_checkpoint_of(&for_task, region).await?, + handle.clone(), + ); + Result::Ok(()) + } + .await; + if let Err(e) = r { + try_send!( + self.scheduler, + Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { + region: region.clone(), + handle, + err: Box::new(e) + }) + ); + } + } + } + } + + async fn try_start_observe(&self, region: &Region, handle: ObserveHandle) -> Result<()> { + match self.find_task_by_region(region) { + None => { + warn!( + "the region {:?} is register to no task but being observed (start_key = {}; end_key = {}; task_stat = {:?}): maybe stale, aborting", + region, + utils::redact(®ion.get_start_key()), + utils::redact(®ion.get_end_key()), + self.range_router + ); + } + + Some(for_task) => { + #[cfg(feature = "failpoints")] + fail::fail_point!("try_start_observe", |_| { + Err(Error::Other(box_err!("Nature is boring"))) + }); + let tso = self.get_last_checkpoint_of(&for_task, region).await?; + self.observe_over_with_initial_data_from_checkpoint(region, tso, handle.clone()); + } + } + Ok(()) + } + + async fn start_observe(&self, region: Region) { + let handle = ObserveHandle::new(); + if let Err(err) = self.try_start_observe(®ion, handle.clone()).await { + warn!("failed to start observe, retrying"; "err" => %err); + try_send!( + self.scheduler, + Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { + region, + handle, + err: Box::new(err) + }) + ); + } + } + + async fn retry_observe(&self, region: Region, handle: ObserveHandle) -> Result<()> { + let (tx, rx) = crossbeam::channel::bounded(1); + self.regions + .find_region_by_id( + region.get_id(), + Box::new(move |item| { + tx.send(item) + .expect("BUG: failed to send to newly created channel."); + }), + ) + .map_err(|err| { + annotate!( + err, + "failed to send request to region info accessor, server maybe too too too busy. (region id = {})", + region.get_id() + ) + })?; + let new_region_info = rx + .recv() + .map_err(|err| annotate!(err, "BUG?: unexpected channel message dropped."))?; + if new_region_info.is_none() { + metrics::SKIP_RETRY + .with_label_values(&["region-absent"]) + .inc(); + return Ok(()); + } + let new_region_info = new_region_info.unwrap(); + if new_region_info.role != StateRole::Leader { + metrics::SKIP_RETRY.with_label_values(&["not-leader"]).inc(); + return Ok(()); + } + // Note: we may fail before we insert the region info to the subscription map. + // At that time, the command isn't steal and we should retry it. + let mut exists = false; + let removed = self.subs.deregister_region_if(®ion, |old, _| { + exists = true; + let should_remove = old.handle().id == handle.id; + if !should_remove { + warn!("stale retry command"; "region" => ?region, "handle" => ?handle, "old_handle" => ?old.handle()); + } + should_remove + }); + if !removed && exists { + metrics::SKIP_RETRY + .with_label_values(&["stale-command"]) + .inc(); + return Ok(()); + } + metrics::INITIAL_SCAN_REASON + .with_label_values(&["retry"]) + .inc(); + self.start_observe(region).await; + Ok(()) + } + + async fn get_last_checkpoint_of(&self, task: &str, region: &Region) -> Result { + let meta_cli = self.meta_cli.clone(); + let cp = meta_cli.get_region_checkpoint(task, region).await?; + info!("got region checkpoint"; "region_id" => %region.get_id(), "checkpoint" => ?cp); + if matches!(cp.provider, CheckpointProvider::Global) { + metrics::STORE_CHECKPOINT_TS + .with_label_values(&[task]) + .set(cp.ts.into_inner() as _); + } + Ok(cp.ts) + } + + fn spawn_scan(&self, cmd: ScanCmd) { + // we should not spawn initial scanning tasks to the tokio blocking pool + // because it is also used for converting sync File I/O to async. (for now!) + // In that condition, if we blocking for some resources(for example, the `MemoryQuota`) + // at the block threads, we may meet some ghosty deadlock. + let s = self.scan_pool_handle.request(cmd); + if let Err(err) = s { + let region_id = err.0.region.get_id(); + annotate!(err, "BUG: scan_pool closed") + .report(format!("during initial scanning for region {}", region_id)); + } + } + + fn observe_over_with_initial_data_from_checkpoint( + &self, + region: &Region, + last_checkpoint: TimeStamp, + handle: ObserveHandle, + ) { + self.subs + .register_region(region, handle.clone(), Some(last_checkpoint)); + self.spawn_scan(ScanCmd { + region: region.clone(), + handle, + last_checkpoint, + work: self.scans.clone().work(), + }) + } + + fn find_task_by_region(&self, r: &Region) -> Option { + self.range_router + .find_task_by_range(&r.start_key, &r.end_key) + } +} + +#[cfg(test)] +mod test { + use kvproto::metapb::Region; + use tikv::storage::Statistics; + + use super::InitialScan; + #[cfg(feature = "failpoints")] + use crate::{subscription_manager::spawn_executors, utils::CallbackWaitGroup}; + + #[derive(Clone, Copy)] + struct NoopInitialScan; + + impl InitialScan for NoopInitialScan { + fn do_initial_scan( + &self, + _region: &Region, + _start_ts: txn_types::TimeStamp, + _handle: raftstore::coprocessor::ObserveHandle, + on_finish: impl FnOnce() + Send + 'static, + ) -> crate::errors::Result { + on_finish(); + Ok(Statistics::default()) + } + } + + #[cfg(feature = "failpoints")] + fn should_finish_in(f: impl FnOnce() + Send + 'static, d: std::time::Duration) { + let (tx, rx) = futures::channel::oneshot::channel(); + std::thread::spawn(move || { + f(); + tx.send(()).unwrap(); + }); + let pool = tokio::runtime::Builder::new_current_thread() + .enable_time() + .build() + .unwrap(); + let _e = pool.handle().enter(); + pool.block_on(tokio::time::timeout(d, rx)).unwrap().unwrap(); + } + + #[test] + #[cfg(feature = "failpoints")] + fn test_message_delay_and_exit() { + use std::time::Duration; + + use super::ScanCmd; + + let pool = spawn_executors(NoopInitialScan, 1); + let wg = CallbackWaitGroup::new(); + fail::cfg("execute_scan_command", "sleep(100)").unwrap(); + for _ in 0..100 { + let wg = wg.clone(); + pool.request(ScanCmd { + region: Default::default(), + handle: Default::default(), + last_checkpoint: Default::default(), + // Note: Maybe make here a Box or some other trait? + work: wg.work(), + }) + .unwrap() + } + + should_finish_in(move || drop(pool), Duration::from_secs(5)); + } +} diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 9199f508d62..e8a22f9840e 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -15,10 +15,21 @@ use crate::{debug, metrics::TRACK_REGION, utils}; #[derive(Clone, Default, Debug)] pub struct SubscriptionTracer(Arc>); +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum SubscriptionState { + /// When it is newly added (maybe after split or leader transfered from other store), without any flush. + Fresh, + /// It has been flushed, and running normally. + Normal, + /// It has been moved to other store. + Removal, +} + pub struct RegionSubscription { pub meta: Region, pub(crate) handle: ObserveHandle, - resolver: TwoPhaseResolver, + pub(crate) resolver: TwoPhaseResolver, + state: SubscriptionState, } impl std::fmt::Debug for RegionSubscription { @@ -31,17 +42,32 @@ impl std::fmt::Debug for RegionSubscription { } impl RegionSubscription { + /// move self out. + fn take(&mut self) -> Self { + Self { + meta: self.meta.clone(), + handle: self.handle.clone(), + resolver: std::mem::replace(&mut self.resolver, TwoPhaseResolver::new(0, None)), + state: self.state, + } + } + pub fn new(region: Region, handle: ObserveHandle, start_ts: Option) -> Self { let resolver = TwoPhaseResolver::new(region.get_id(), start_ts); Self { handle, meta: region, resolver, + state: SubscriptionState::Fresh, } } - pub fn stop_observing(&self) { - self.handle.stop_observing() + pub fn stop(&mut self) { + if self.state == SubscriptionState::Removal { + return; + } + self.handle.stop_observing(); + self.state = SubscriptionState::Removal; } pub fn is_observing(&self) -> bool { @@ -58,22 +84,11 @@ impl RegionSubscription { } impl SubscriptionTracer { - /// get the current safe point: data before this ts have already be flushed and be able to be GCed. - pub fn safepoint(&self) -> TimeStamp { - // use the current resolved_ts is safe because it is only advanced when flushing. - self.0 - .iter() - .map(|r| r.resolver.resolved_ts()) - .min() - // NOTE: Maybe use the current timestamp? - .unwrap_or(TimeStamp::zero()) - } - /// clear the current `SubscriptionTracer`. pub fn clear(&self) { self.0.retain(|_, v| { - v.stop_observing(); - TRACK_REGION.with_label_values(&["dec"]).inc(); + v.stop(); + TRACK_REGION.dec(); false }); } @@ -89,25 +104,28 @@ impl SubscriptionTracer { start_ts: Option, ) { info!("start listen stream from store"; "observer" => ?handle, "region_id" => %region.get_id()); - TRACK_REGION.with_label_values(&["inc"]).inc(); - if let Some(o) = self.0.insert( + TRACK_REGION.inc(); + if let Some(mut o) = self.0.insert( region.get_id(), RegionSubscription::new(region.clone(), handle, start_ts), ) { - TRACK_REGION.with_label_values(&["dec"]).inc(); - warn!("register region which is already registered"; "region_id" => %region.get_id()); - o.stop_observing(); + if o.state != SubscriptionState::Removal { + TRACK_REGION.dec(); + warn!("register region which is already registered"; "region_id" => %region.get_id()); + } + o.stop(); } } /// try advance the resolved ts with the min ts of in-memory locks. - pub fn resolve_with(&self, min_ts: TimeStamp) -> TimeStamp { + /// returns the regions and theirs resolved ts. + pub fn resolve_with(&self, min_ts: TimeStamp) -> Vec<(Region, TimeStamp)> { self.0 .iter_mut() - .map(|mut s| s.resolver.resolve(min_ts)) - .min() - // If there isn't any region observed, the `min_ts` can be used as resolved ts safely. - .unwrap_or(min_ts) + // Don't advance the checkpoint ts of removed region. + .filter(|s| s.state != SubscriptionState::Removal) + .map(|mut s| (s.meta.clone(), s.resolver.resolve(min_ts))) + .collect() } #[inline(always)] @@ -130,24 +148,31 @@ impl SubscriptionTracer { } } + /// destroy subscription if the subscription is stopped. + pub fn destroy_stopped_region(&self, region_id: u64) { + self.0 + .remove_if(®ion_id, |_, sub| sub.state == SubscriptionState::Removal); + } + /// try to mark a region no longer be tracked by this observer. /// returns whether success (it failed if the region hasn't been observed when calling this.) - pub fn deregister_region( + pub fn deregister_region_if( &self, region: &Region, if_cond: impl FnOnce(&RegionSubscription, &Region) -> bool, ) -> bool { let region_id = region.get_id(); - let remove_result = self - .0 - .remove_if(®ion_id, |_, old_region| if_cond(old_region, region)); + let remove_result = self.0.get_mut(®ion_id); match remove_result { - Some(o) => { - TRACK_REGION.with_label_values(&["dec"]).inc(); - o.1.stop_observing(); - info!("stop listen stream from store"; "observer" => ?o.1, "region_id"=> %region_id); + Some(mut o) if if_cond(o.value(), region) => { + if o.state != SubscriptionState::Removal { + TRACK_REGION.dec(); + } + o.value_mut().stop(); + info!("stop listen stream from store"; "observer" => ?o.value(), "region_id"=> %region_id); true } + Some(_) => false, None => { warn!("trying to deregister region not registered"; "region_id" => %region_id); false @@ -181,22 +206,60 @@ impl SubscriptionTracer { false } + /// Remove and collect the subscriptions have been marked as removed. + pub fn collect_removal_subs(&self) -> Vec { + let mut result = vec![]; + self.0.retain(|_k, v| { + if v.state == SubscriptionState::Removal { + result.push(v.take()); + false + } else { + true + } + }); + result + } + + /// Collect the fresh subscriptions, and mark them as Normal. + pub fn collect_fresh_subs(&self) -> Vec { + self.0 + .iter_mut() + .filter_map(|mut s| { + let v = s.value_mut(); + if v.state == SubscriptionState::Fresh { + v.state = SubscriptionState::Normal; + Some(v.meta.clone()) + } else { + None + } + }) + .collect() + } + + /// Remove all "Removal" entries. + /// Set all "Fresh" entries to "Normal". + pub fn update_status_for_v3(&self) { + self.0.retain(|_k, v| match v.state { + SubscriptionState::Fresh => { + v.state = SubscriptionState::Normal; + true + } + SubscriptionState::Normal => true, + SubscriptionState::Removal => false, + }) + } + /// check whether the region_id should be observed by this observer. pub fn is_observing(&self, region_id: u64) -> bool { - let mut exists = false; - - // The region traced, check it whether is still be observing, - // if not, remove it. - let still_observing = self - .0 - // Assuming this closure would be called iff the key exists. - // So we can elide a `contains` check. - .remove_if(®ion_id, |_, o| { - exists = true; - !o.is_observing() - }) - .is_none(); - exists && still_observing + let sub = self.0.get_mut(®ion_id); + match sub { + Some(mut sub) if !sub.is_observing() || sub.state == SubscriptionState::Removal => { + sub.value_mut().stop(); + false + } + Some(_) => true, + None => false, + } } pub fn get_subscription_of( @@ -207,7 +270,7 @@ impl SubscriptionTracer { } } -/// This enhanced version of `Resolver` allow some unorder of lock events. +/// This enhanced version of `Resolver` allow some unordered lock events. /// The name "2-phase" means this is used for 2 *concurrency* phases of observing a region: /// 1. Doing the initial scanning. /// 2. Listening at the incremental data. @@ -216,24 +279,24 @@ impl SubscriptionTracer { /// +->(Start TS Of Task) +->(Task registered to KV) /// +--------------------------------+------------------------> /// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ ^~~~~~~~~~~~~~~~~~~~~~~~~ -/// | +-> Phase 2: Listening incremtnal data. +/// | +-> Phase 2: Listening incremental data. /// +-> Phase 1: Initial scanning scans writes between start ts and now. /// ``` /// -/// In backup-stream, we execute these two tasks parallelly. Which may make some race conditions: -/// - When doing initial scanning, there may be a flush triggered, but the defult resolver +/// In backup-stream, we execute these two tasks parallel. Which may make some race conditions: +/// - When doing initial scanning, there may be a flush triggered, but the default resolver /// would probably resolved to the tip of incremental events. /// - When doing initial scanning, we meet and track a lock already meet by the incremental events, /// then the default resolver cannot untrack this lock any more. /// -/// This version of resolver did some change for solve these problmes: +/// This version of resolver did some change for solve these problems: /// - The resolver won't advance the resolved ts to greater than `stable_ts` if there is some. This /// can help us prevent resolved ts from advancing when initial scanning hasn't finished yet. /// - When we `untrack` a lock haven't been tracked, this would record it, and skip this lock if we want to track it then. /// This would be safe because: /// - untracking a lock not be tracked is no-op for now. /// - tracking a lock have already being untracked (unordered call of `track` and `untrack`) wouldn't happen at phase 2 for same region. -/// but only when phase 1 and phase 2 happend concurrently, at that time, we wouldn't and cannot advance the resolved ts. +/// but only when phase 1 and phase 2 happened concurrently, at that time, we wouldn't and cannot advance the resolved ts. pub struct TwoPhaseResolver { resolver: Resolver, future_locks: Vec, @@ -328,7 +391,18 @@ impl TwoPhaseResolver { for lock in std::mem::take(&mut self.future_locks).into_iter() { self.handle_future_lock(lock); } - self.stable_ts = None + let ts = self.stable_ts.take(); + match ts { + Some(ts) => { + // advance the internal resolver. + // the start ts of initial scanning would be a safe ts for min ts + // -- because is used to be a resolved ts. + self.resolver.resolve(ts); + } + None => { + warn!("BUG: a two-phase resolver is executing phase_one_done when not in phase one"; "resolver" => ?self) + } + } } } @@ -343,9 +417,11 @@ impl std::fmt::Debug for TwoPhaseResolver { #[cfg(test)] mod test { + use kvproto::metapb::{Region, RegionEpoch}; + use raftstore::coprocessor::ObserveHandle; use txn_types::TimeStamp; - use super::TwoPhaseResolver; + use super::{SubscriptionTracer, TwoPhaseResolver}; #[test] fn test_two_phase_resolver() { @@ -372,4 +448,73 @@ mod test { r.untrack_lock(&key[..]); assert_eq!(r.resolve(ts(57)), ts(57)); } + + fn region(id: u64, version: u64, conf_version: u64) -> Region { + let mut r = Region::new(); + let mut e = RegionEpoch::new(); + e.set_version(version); + e.set_conf_ver(conf_version); + r.set_id(id); + r.set_region_epoch(e); + r + } + + #[test] + fn test_delay_remove() { + let subs = SubscriptionTracer::default(); + let handle = ObserveHandle::new(); + subs.register_region(®ion(1, 1, 1), handle, Some(TimeStamp::new(42))); + assert!(subs.get_subscription_of(1).is_some()); + assert!(subs.is_observing(1)); + subs.deregister_region_if(®ion(1, 1, 1), |_, _| true); + assert!(!subs.is_observing(1)); + } + + #[test] + fn test_cal_checkpoint() { + let subs = SubscriptionTracer::default(); + subs.register_region( + ®ion(1, 1, 1), + ObserveHandle::new(), + Some(TimeStamp::new(42)), + ); + subs.register_region(®ion(2, 2, 1), ObserveHandle::new(), None); + subs.register_region( + ®ion(3, 4, 1), + ObserveHandle::new(), + Some(TimeStamp::new(88)), + ); + subs.get_subscription_of(3) + .unwrap() + .resolver + .phase_one_done(); + subs.register_region( + ®ion(4, 8, 1), + ObserveHandle::new(), + Some(TimeStamp::new(92)), + ); + let mut region4_sub = subs.get_subscription_of(4).unwrap(); + region4_sub.resolver.phase_one_done(); + region4_sub + .resolver + .track_lock(TimeStamp::new(128), b"Alpi".to_vec()); + subs.register_region(®ion(5, 8, 1), ObserveHandle::new(), None); + subs.deregister_region_if(®ion(5, 8, 1), |_, _| true); + drop(region4_sub); + + let mut rs = subs.resolve_with(TimeStamp::new(1000)); + rs.sort_by_key(|k| k.0.get_id()); + assert_eq!( + rs, + vec![ + (region(1, 1, 1), TimeStamp::new(42)), + (region(2, 2, 1), TimeStamp::new(1000)), + (region(3, 4, 1), TimeStamp::new(1000)), + (region(4, 8, 1), TimeStamp::new(128)), + ] + ); + let removal = subs.collect_removal_subs(); + assert_eq!(removal.len(), 1); + assert_eq!(removal[0].meta.get_id(), 5); + } } diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index c104a100b56..725a1c17f51 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -4,21 +4,36 @@ use std::{ borrow::Borrow, collections::{hash_map::RandomState, BTreeMap, HashMap}, ops::{Bound, RangeBounds}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, time::Duration, }; +use engine_rocks::ReadPerfInstant; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; -use futures::{channel::mpsc, executor::block_on, StreamExt}; +use futures::{channel::mpsc, executor::block_on, FutureExt, StreamExt}; use kvproto::raft_cmdpb::{CmdType, Request}; use raft::StateRole; use raftstore::{coprocessor::RegionInfoProvider, RegionInfo}; use tikv::storage::CfStatistics; -use tikv_util::{box_err, time::Instant, warn, worker::Scheduler, Either}; -use tokio::sync::{Mutex, RwLock}; +use tikv_util::{ + box_err, + sys::inspector::{ + self_thread_inspector, IoStat, ThreadInspector, ThreadInspectorImpl as OsInspector, + }, + time::Instant, + warn, + worker::Scheduler, + Either, +}; +use tokio::sync::{oneshot, Mutex, RwLock}; use txn_types::{Key, Lock, LockType}; use crate::{ errors::{Error, Result}, + metadata::store::BoxFuture, Task, }; @@ -401,9 +416,139 @@ pub fn should_track_lock(l: &Lock) -> bool { } } +pub struct CallbackWaitGroup { + running: AtomicUsize, + on_finish_all: std::sync::Mutex>>, +} + +/// A shortcut for making an opaque future type for return type or argument type, +/// which is sendable and not borrowing any variables. +/// +/// `fut![T]` == `impl Future + Send + 'static` +#[macro_export(crate)] +macro_rules! future { + ($t:ty) => { impl core::future::Future + Send + 'static }; +} + +impl CallbackWaitGroup { + pub fn new() -> Arc { + Arc::new(Self { + running: AtomicUsize::new(0), + on_finish_all: std::sync::Mutex::default(), + }) + } + + fn work_done(&self) { + let last = self.running.fetch_sub(1, Ordering::SeqCst); + if last == 1 { + self.on_finish_all + .lock() + .unwrap() + .drain(..) + .for_each(|x| x()) + } + } + + /// wait until all running tasks done. + pub fn wait(&self) -> BoxFuture<()> { + // Fast path: no uploading. + if self.running.load(Ordering::SeqCst) == 0 { + return Box::pin(futures::future::ready(())); + } + + let (tx, rx) = oneshot::channel(); + self.on_finish_all.lock().unwrap().push(Box::new(move || { + // The waiter may timed out. + let _ = tx.send(()); + })); + // try to acquire the lock again. + if self.running.load(Ordering::SeqCst) == 0 { + return Box::pin(futures::future::ready(())); + } + Box::pin(rx.map(|_| ())) + } + + /// make a work, as long as the return value held, mark a work in the group is running. + pub fn work(self: Arc) -> Work { + self.running.fetch_add(1, Ordering::SeqCst); + Work(self) + } +} + +pub struct Work(Arc); + +impl Drop for Work { + fn drop(&mut self) { + self.0.work_done(); + } +} + +struct ReadThroughputRecorder { + // The system tool set. + ins: Option, + begin: Option, + // Once the system tool set get unavailable, + // we would use the "ejector" -- RocksDB perf context. + // NOTE: In fact I'm not sure whether we need the result of system level tool set -- + // but this is the current implement of cdc. We'd better keep consistent with them. + ejector: ReadPerfInstant, +} + +impl ReadThroughputRecorder { + fn start() -> Self { + let r = self_thread_inspector().ok().and_then(|insp| { + let stat = insp.io_stat().ok()??; + Some((insp, stat)) + }); + match r { + Some((ins, begin)) => Self { + ins: Some(ins), + begin: Some(begin), + ejector: ReadPerfInstant::new(), + }, + _ => Self { + ins: None, + begin: None, + ejector: ReadPerfInstant::new(), + }, + } + } + + fn try_get_delta_from_unix(&self) -> Option { + let ins = self.ins.as_ref()?; + let begin = self.begin.as_ref()?; + let end = ins.io_stat().ok()??; + Some(end.read - begin.read) + } + + fn end(self) -> u64 { + self.try_get_delta_from_unix() + .unwrap_or_else(|| self.ejector.delta().block_read_byte) + } +} + +/// try to record read throughput. +/// this uses the `proc` fs in the linux for recording the throughput. +/// if that failed, we would use the RocksDB perf context. +pub fn with_record_read_throughput(f: impl FnOnce() -> T) -> (T, u64) { + let recorder = ReadThroughputRecorder::start(); + let r = f(); + (r, recorder.end()) +} + #[cfg(test)] mod test { - use crate::utils::SegmentMap; + use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, + }; + + use futures::executor::block_on; + + use crate::utils::{CallbackWaitGroup, SegmentMap}; #[test] fn test_segment_tree() { @@ -427,4 +572,120 @@ mod test { assert!(tree.is_overlapping((&2, &10))); assert!(tree.is_overlapping((&0, &9999999))); } + + #[test] + fn test_wait_group() { + #[derive(Debug)] + struct Case { + bg_task: usize, + repeat: usize, + } + + fn run_case(c: Case) { + for i in 0..c.repeat { + let wg = CallbackWaitGroup::new(); + let cnt = Arc::new(AtomicUsize::new(c.bg_task)); + for _ in 0..c.bg_task { + let cnt = cnt.clone(); + let work = wg.clone().work(); + tokio::spawn(async move { + cnt.fetch_sub(1, Ordering::SeqCst); + drop(work); + }); + } + let _ = block_on(tokio::time::timeout(Duration::from_secs(20), wg.wait())).unwrap(); + assert_eq!(cnt.load(Ordering::SeqCst), 0, "{:?}@{}", c, i); + } + } + + let cases = [ + Case { + bg_task: 200000, + repeat: 1, + }, + Case { + bg_task: 65535, + repeat: 1, + }, + Case { + bg_task: 512, + repeat: 1, + }, + Case { + bg_task: 2, + repeat: 100000, + }, + Case { + bg_task: 1, + repeat: 100000, + }, + Case { + bg_task: 0, + repeat: 1, + }, + ]; + + let pool = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_time() + .build() + .unwrap(); + let _guard = pool.handle().enter(); + for case in cases { + run_case(case) + } + } + + /// skip it currently. Test it at local env successfully but failed at pod. + #[cfg(FALSE)] + #[test] + fn test_recorder() { + use engine_rocks::{raw::DB, RocksEngine}; + use engine_traits::{Iterable, KvEngine, Mutable, WriteBatch, WriteBatchExt, CF_DEFAULT}; + use tempdir::TempDir; + + let p = TempDir::new("test_db").unwrap(); + let mut opt = DBOptions::default(); + opt.create_if_missing(true); + let db = DB::open(opt.clone(), p.path().as_os_str().to_str().unwrap()).unwrap(); + let engine = RocksEngine::from_db(Arc::new(db)); + let mut wb = engine.write_batch(); + for i in 0..100 { + wb.put_cf(CF_DEFAULT, format!("hello{}", i).as_bytes(), b"world") + .unwrap(); + } + let mut wopt = WriteOptions::new(); + wopt.set_sync(true); + wb.write_opt(&wopt).unwrap(); + // force memtable to disk. + engine.get_sync_db().compact_range(None, None); + + let (items, size) = super::with_record_read_throughput(|| { + let mut items = vec![]; + let snap = engine.snapshot(); + snap.scan(b"", b"", false, |k, v| { + items.push((k.to_owned(), v.to_owned())); + Ok(true) + }) + .unwrap(); + items + }); + + let items_size = items.iter().map(|(k, v)| k.len() + v.len()).sum::() as u64; + + // considering the compression, we may get at least 1/2 of the real size. + assert!( + size > items_size / 2, + "the size recorded is too small: {} vs {}", + size, + items_size + ); + // considering the read amplification, we may get at most 2x of the real size. + assert!( + size < items_size * 2, + "the size recorded is too big: {} vs {}", + size, + items_size + ); + } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 339dd07f773..fccd8a0626a 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -10,7 +10,11 @@ use std::{ }; use backup_stream::{ - metadata::{store::SlashEtcStore, MetadataClient, StreamTask}, + errors::Result, + metadata::{ + store::{MetaStore, SlashEtcStore}, + MetadataClient, StreamTask, + }, observer::BackupStreamObserver, router::Router, Endpoint, Task, @@ -78,9 +82,129 @@ fn make_encoded_record_key(table_id: i64, handle: u64, ts: u64) -> Vec { key.append_ts(TimeStamp::new(ts)).into_encoded() } +#[derive(Clone)] +struct ErrorStore { + inner: S, + + error_provider: Arc Result<()> + Send + Sync>, +} + +pub struct SuiteBuilder { + name: String, + nodes: usize, + use_v3: bool, + metastore_error: Box Result<()> + Send + Sync>, +} + +impl SuiteBuilder { + pub fn new_named(s: &str) -> Self { + Self { + name: s.to_owned(), + nodes: 4, + use_v3: false, + metastore_error: Box::new(|_| Ok(())), + } + } + + pub fn use_v3(mut self) -> Self { + self.use_v3 = true; + self + } + + pub fn nodes(mut self, n: usize) -> Self { + self.nodes = n; + self + } + + pub fn inject_meta_store_error(mut self, f: F) -> Self + where + F: Fn(&str) -> Result<()> + Send + Sync + 'static, + { + self.metastore_error = Box::new(f); + self + } + + pub fn build(self) -> Suite { + let Self { + name: case, + nodes: n, + use_v3, + metastore_error, + } = self; + + info!("start test"; "case" => %case, "nodes" => %n); + let cluster = new_server_cluster(42, n); + let mut suite = Suite { + endpoints: Default::default(), + meta_store: ErrorStore { + inner: Default::default(), + + error_provider: Arc::from(metastore_error), + }, + obs: Default::default(), + tikv_cli: Default::default(), + env: Arc::new(grpcio::Environment::new(1)), + cluster, + + temp_files: TempDir::new("temp").unwrap(), + flushed_files: TempDir::new("flush").unwrap(), + case_name: case, + }; + for id in 1..=(n as u64) { + let worker = suite.start_br_stream_on(id); + suite.endpoints.insert(id, worker); + } + suite.cluster.run(); + for id in 1..=(n as u64) { + suite.start_endpoint(id, use_v3); + } + // TODO: The current mock metastore (slash_etc) doesn't supports multi-version. + // We must wait until the endpoints get ready to watching the metastore, or some modifies may be lost. + // Either make Endpoint::with_client wait until watch did start or make slash_etc support multi-version, + // then we can get rid of this sleep. + std::thread::sleep(Duration::from_secs(1)); + suite + } +} + +#[async_trait::async_trait] +impl MetaStore for ErrorStore { + type Snap = S::Snap; + + async fn snapshot(&self) -> backup_stream::errors::Result { + (self.error_provider)("snapshot")?; + self.inner.snapshot().await + } + + async fn watch( + &self, + keys: backup_stream::metadata::store::Keys, + start_rev: i64, + ) -> backup_stream::errors::Result { + (self.error_provider)("watch")?; + self.inner.watch(keys, start_rev).await + } + + async fn txn( + &self, + txn: backup_stream::metadata::store::Transaction, + ) -> backup_stream::errors::Result<()> { + (self.error_provider)("txn")?; + self.inner.txn(txn).await + } + + async fn txn_cond( + &self, + txn: backup_stream::metadata::store::CondTransaction, + ) -> backup_stream::errors::Result<()> { + (self.error_provider)("txn_cond")?; + self.inner.txn_cond(txn).await + } +} + pub struct Suite { endpoints: HashMap>, - meta_store: SlashEtcStore, + meta_store: ErrorStore, cluster: Cluster, tikv_cli: HashMap, obs: HashMap, @@ -123,7 +247,7 @@ impl Suite { worker } - fn start_endpoint(&mut self, id: u64) { + fn start_endpoint(&mut self, id: u64, use_v3: bool) { let cluster = &mut self.cluster; let worker = self.endpoints.get_mut(&id).unwrap(); let sim = cluster.sim.wl(); @@ -132,6 +256,7 @@ impl Suite { let regions = sim.region_info_accessors.get(&id).unwrap().clone(); let mut cfg = BackupStreamConfig::default(); cfg.enable = true; + cfg.use_checkpoint_v3 = use_v3; cfg.temp_path = format!("/{}/{}", self.temp_files.path().display(), id); let ob = self.obs.get(&id).unwrap().clone(); let endpoint = Endpoint::new( @@ -148,37 +273,7 @@ impl Suite { worker.start(endpoint); } - pub fn new(case: &str, n: usize) -> Self { - let cluster = new_server_cluster(42, n); - let mut suite = Self { - endpoints: Default::default(), - meta_store: Default::default(), - obs: Default::default(), - tikv_cli: Default::default(), - env: Arc::new(grpcio::Environment::new(1)), - cluster, - - temp_files: TempDir::new("temp").unwrap(), - flushed_files: TempDir::new("flush").unwrap(), - case_name: case.to_owned(), - }; - for id in 1..=(n as u64) { - let worker = suite.start_br_stream_on(id); - suite.endpoints.insert(id, worker); - } - suite.cluster.run(); - for id in 1..=(n as u64) { - suite.start_endpoint(id); - } - // TODO: The current mock metastore (slash_etc) doesn't supports multi-version. - // We must wait until the endpoints get ready to watching the metastore, or some modifies may be lost. - // Either make Endpoint::with_client wait until watch did start or make slash_etc support multi-version, - // then we can get rid of this sleep. - std::thread::sleep(Duration::from_secs(1)); - suite - } - - fn get_meta_cli(&self) -> MetadataClient { + fn get_meta_cli(&self) -> MetadataClient> { MetadataClient::new(self.meta_store.clone(), 0) } @@ -239,7 +334,8 @@ impl Suite { } fn force_flush_files(&self, task: &str) { - self.run(|| Task::ForceFlush(task.to_owned())) + self.run(|| Task::ForceFlush(task.to_owned())); + self.sync(); } fn run(&self, mut t: impl FnMut() -> Task) { @@ -452,6 +548,10 @@ impl Suite { pub fn wait_for_flush(&self) { use std::ffi::OsString; + std::fs::File::open(&self.temp_files) + .unwrap() + .sync_all() + .unwrap(); for _ in 0..100 { if !walkdir::WalkDir::new(&self.temp_files) .into_iter() @@ -499,15 +599,19 @@ fn run_async_test(test: impl Future) -> T { mod test { use std::time::Duration; - use backup_stream::{errors::Error, metadata::MetadataClient, Task}; + use backup_stream::{ + errors::Error, metadata::MetadataClient, GetCheckpointResult, RegionCheckpointOperation, + RegionSet, Task, + }; use tikv_util::{box_err, defer, info, HandyRwLock}; use txn_types::TimeStamp; - use crate::{make_record_key, make_split_key_at_record, run_async_test}; + use crate::{make_record_key, make_split_key_at_record, run_async_test, SuiteBuilder}; #[test] fn basic() { - let mut suite = super::Suite::new("basic", 4); + let mut suite = super::SuiteBuilder::new_named("basic").use_v3().build(); + fail::cfg("try_start_observe", "1*return").unwrap(); run_async_test(async { // write data before the task starting, for testing incremental scanning. @@ -527,7 +631,9 @@ mod test { #[test] fn with_split() { - let mut suite = super::Suite::new("with_split", 4); + let mut suite = super::SuiteBuilder::new_named("with_split") + .use_v3() + .build(); run_async_test(async { let round1 = suite.write_records(0, 128, 1).await; suite.must_split(&make_split_key_at_record(1, 42)); @@ -546,7 +652,9 @@ mod test { #[test] /// This case tests whether the backup can continue when the leader failes. fn leader_down() { - let mut suite = super::Suite::new("leader_down", 4); + let mut suite = super::SuiteBuilder::new_named("leader_down") + .use_v3() + .build(); suite.must_register_task(1, "test_leader_down"); suite.sync(); let round1 = run_async_test(suite.write_records(0, 128, 1)); @@ -566,7 +674,9 @@ mod test { /// This case tests whehter the checkpoint ts (next backup ts) can be advanced correctly /// when async commit is enabled. fn async_commit() { - let mut suite = super::Suite::new("async_commit", 3); + let mut suite = super::SuiteBuilder::new_named("async_commit") + .nodes(3) + .build(); run_async_test(async { suite.must_register_task(1, "test_async_commit"); suite.sync(); @@ -596,7 +706,9 @@ mod test { #[test] fn fatal_error() { - let mut suite = super::Suite::new("fatal_error", 3); + let mut suite = super::SuiteBuilder::new_named("fatal_error") + .nodes(3) + .build(); suite.must_register_task(1, "test_fatal_error"); suite.sync(); run_async_test(suite.write_records(0, 1, 1)); @@ -633,7 +745,7 @@ mod test { safepoints.iter().any(|sp| { sp.serivce.contains(&format!("{}", victim)) && sp.ttl >= Duration::from_secs(60 * 60 * 24) - && sp.safepoint.into_inner() == checkpoint + && sp.safepoint.into_inner() == checkpoint - 1 }), "{:?}", safepoints @@ -647,7 +759,9 @@ mod test { fail::remove("delay_on_start_observe"); fail::remove("delay_on_flush"); }} - let mut suite = super::Suite::new("inflight_message", 3); + let mut suite = super::SuiteBuilder::new_named("inflight_message") + .nodes(3) + .build(); suite.must_register_task(1, "inflight_message"); run_async_test(suite.write_records(0, 128, 1)); fail::cfg("delay_on_flush", "pause").unwrap(); @@ -679,4 +793,56 @@ mod test { // The checkpoint should be advanced as expection when the inflight message has been consumed. assert!(checkpoint > 512, "checkpoint = {}", checkpoint); } + + #[test] + fn region_checkpoint_info() { + let mut suite = super::SuiteBuilder::new_named("checkpoint_info") + .nodes(1) + .use_v3() + .build(); + suite.must_register_task(1, "checkpoint_info"); + suite.must_split(&make_split_key_at_record(1, 42)); + run_async_test(suite.write_records(0, 128, 1)); + suite.force_flush_files("checkpoint_info"); + suite.wait_for_flush(); + std::thread::sleep(Duration::from_secs(1)); + let (tx, rx) = std::sync::mpsc::channel(); + suite.run(|| { + let tx = tx.clone(); + Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( + RegionSet::Universal, + Box::new(move |rs| { + tx.send(rs).unwrap(); + }), + )) + }); + let checkpoints = rx.recv().unwrap(); + assert!(!checkpoints.is_empty(), "{:?}", checkpoints); + assert!( + checkpoints + .iter() + .all(|cp| matches!(cp, GetCheckpointResult::Ok { checkpoint, .. } if checkpoint.into_inner() > 256)), + "{:?}", + checkpoints + ); + } + + #[test] + fn region_failure() { + defer! {{ + fail::remove("try_start_observe"); + }} + let mut suite = SuiteBuilder::new_named("region_failure").build(); + let keys = run_async_test(suite.write_records(0, 128, 1)); + fail::cfg("try_start_observe", "1*return").unwrap(); + suite.must_register_task(1, "region_failure"); + suite.must_shuffle_leader(1); + let keys2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("region_failure"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys.union(&keys2).map(|s| s.as_slice()), + ); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 51a21b91628..37d031753ce 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -55,7 +55,8 @@ use grpcio_health::HealthService; use kvproto::{ brpb::create_backup, cdcpb::create_change_data, deadlock::create_deadlock, debugpb::create_debug, diagnosticspb::create_diagnostics, import_sstpb::create_import_sst, - kvrpcpb::ApiVersion, resource_usage_agent::create_resource_metering_pub_sub, + kvrpcpb::ApiVersion, logbackuppb::create_log_backup, + resource_usage_agent::create_resource_metering_pub_sub, }; use pd_client::{PdClient, RpcClient}; use raft_log_engine::RaftLogEngine; @@ -244,6 +245,7 @@ struct Servers { cdc_scheduler: tikv_util::worker::Scheduler, cdc_memory_quota: MemoryQuota, rsmeter_pubsub_service: resource_metering::PubSubService, + backup_stream_scheduler: Option>, } type LocalServer = @@ -884,7 +886,7 @@ impl TiKvServer { ); // Start backup stream - if self.config.backup_stream.enable { + let backup_stream_scheduler = if self.config.backup_stream.enable { // Create backup stream. let mut backup_stream_worker = Box::new(LazyWorker::new("backup-stream")); let backup_stream_scheduler = backup_stream_worker.scheduler(); @@ -910,7 +912,7 @@ impl TiKvServer { node.id(), etcd_cli, self.config.backup_stream.clone(), - backup_stream_scheduler, + backup_stream_scheduler.clone(), backup_stream_ob, self.region_info_accessor.clone(), self.router.clone(), @@ -919,7 +921,10 @@ impl TiKvServer { ); backup_stream_worker.start(backup_stream_endpoint); self.to_stop.push(backup_stream_worker); - } + Some(backup_stream_scheduler) + } else { + None + }; let import_path = self.store_path.join("import"); let mut importer = SstImporter::new( @@ -1076,6 +1081,7 @@ impl TiKvServer { cdc_scheduler, cdc_memory_quota, rsmeter_pubsub_service, + backup_stream_scheduler, }); server_config @@ -1197,6 +1203,17 @@ impl TiKvServer { { warn!("failed to register resource metering pubsub service"); } + + if let Some(sched) = servers.backup_stream_scheduler.take() { + let pitr_service = backup_stream::Service::new(sched); + if servers + .server + .register_service(create_log_backup(pitr_service)) + .is_some() + { + fatal!("failed to register log backup service"); + } + } } fn init_io_utility(&mut self) -> BytesFetcher { diff --git a/src/config.rs b/src/config.rs index 7dfbe1b0933..9e0abe37c94 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2445,6 +2445,10 @@ pub struct BackupStreamConfig { pub temp_file_size_limit_per_task: ReadableSize, #[online_config(skip)] pub initial_scan_pending_memory_quota: ReadableSize, + #[online_config(skip)] + pub initial_scan_rate_limit: ReadableSize, + #[online_config(skip)] + pub use_checkpoint_v3: bool, } impl BackupStreamConfig { @@ -2477,6 +2481,8 @@ impl Default for BackupStreamConfig { temp_path: String::new(), temp_file_size_limit_per_task: ReadableSize::mb(128), initial_scan_pending_memory_quota: ReadableSize(quota_size as _), + initial_scan_rate_limit: ReadableSize::mb(60), + use_checkpoint_v3: true, } } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 24e52a8057e..a81a34b1e71 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1017,7 +1017,9 @@ fn write_needs_restore(write: &[u8]) -> bool { Ok(w) if matches!( w.write_type, - WriteType::Put | WriteType::Delete | WriteType::Rollback + // We only keep the last put / delete write CF, + // other write type may shadow the real data and cause data loss. + WriteType::Put | WriteType::Delete ) => { true From a49945bb134bd70211a1e6733f82518d9e02fba0 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Mon, 11 Jul 2022 14:45:05 +0800 Subject: [PATCH 0067/1149] *: set write and time details in RPC responses (#12900) ref tikv/tikv#931, ref tikv/tikv#12362 kvproto#931 adds a few more details about the time used by an RPC request. We are filling these additional information to the responses in this commit. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- .../engine_rocks/src/perf_context_impl.rs | 10 +- components/raftstore/src/store/fsm/apply.rs | 16 +- components/raftstore/src/store/peer.rs | 2 +- components/tikv_util/Cargo.toml | 2 +- components/tracker/src/lib.rs | 31 +++- src/coprocessor/endpoint.rs | 14 +- src/server/metrics.rs | 2 +- src/server/service/batch.rs | 2 +- src/server/service/kv.rs | 167 +++++++++++++----- tests/integrations/server/kv_service.rs | 128 ++++++++++++++ 10 files changed, 303 insertions(+), 71 deletions(-) diff --git a/components/engine_rocks/src/perf_context_impl.rs b/components/engine_rocks/src/perf_context_impl.rs index c6eb187b392..152a0a12785 100644 --- a/components/engine_rocks/src/perf_context_impl.rs +++ b/components/engine_rocks/src/perf_context_impl.rs @@ -4,7 +4,6 @@ use std::{fmt::Debug, marker::PhantomData, mem, ops::Sub, time::Duration}; use derive_more::{Add, AddAssign, Sub, SubAssign}; use engine_traits::{PerfContextKind, PerfLevel}; -use kvproto::kvrpcpb::ScanDetailV2; use lazy_static::lazy_static; use slog_derive::KV; use tikv_util::time::Instant; @@ -136,18 +135,11 @@ pub struct ReadPerfContext { } impl ReadPerfContext { - pub fn write_scan_detail(&self, detail_v2: &mut ScanDetailV2) { - detail_v2.set_rocksdb_delete_skipped_count(self.internal_delete_skipped_count); - detail_v2.set_rocksdb_key_skipped_count(self.internal_key_skipped_count); - detail_v2.set_rocksdb_block_cache_hit_count(self.block_cache_hit_count); - detail_v2.set_rocksdb_block_read_count(self.block_read_count); - detail_v2.set_rocksdb_block_read_byte(self.block_read_byte); - } - fn report_to_tracker(&self, tracker: &mut Tracker) { tracker.metrics.block_cache_hit_count += self.block_cache_hit_count; tracker.metrics.block_read_byte += self.block_read_byte; tracker.metrics.block_read_count += self.block_read_count; + tracker.metrics.block_read_nanos += self.block_read_time; tracker.metrics.deleted_key_skipped_count += self.internal_delete_skipped_count; tracker.metrics.internal_key_skipped_count += self.internal_key_skipped_count; } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 03034b76245..dfafcac338f 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -65,6 +65,7 @@ use tikv_util::{ Either, MustConsumeVec, }; use time::Timespec; +use tracker::GLOBAL_TRACKERS; use uuid::Builder as UuidBuilder; use self::memtrace::*; @@ -3675,9 +3676,18 @@ where match msg { Msg::Apply { start, mut apply } => { - apply_ctx - .apply_wait - .observe(start.saturating_elapsed_secs()); + let apply_wait = start.saturating_elapsed(); + apply_ctx.apply_wait.observe(apply_wait.as_secs_f64()); + for tracker in apply + .cbs + .iter() + .flat_map(|p| p.cb.get_trackers()) + .flat_map(|ts| ts.iter().flat_map(|t| t.as_tracker_token())) + { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { + t.metrics.apply_wait_nanos = apply_wait.as_nanos() as u64; + }); + } if let Some(batch) = batch_apply.as_mut() { if batch.try_batch(&mut apply) { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index cf54d962075..5897309f0b2 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1627,7 +1627,7 @@ where .observe(dur.as_secs_f64()); for t in proposal.cb.get_trackers().iter().flat_map(|v| v.iter().flat_map(|t| t.as_tracker_token())) { GLOBAL_TRACKERS.with_tracker(t, |trakcer| { - if trakcer.metrics.propose_send_wait_nanos == 0{ + if trakcer.metrics.propose_send_wait_nanos == 0 { trakcer.metrics.propose_send_wait_nanos = dur.as_nanos() as u64; } }); diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 52d73429f4c..befe6559e32 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -22,7 +22,7 @@ crossbeam = "0.8" derive_more = "0.99.3" error_code = { path = "../error_code", default-features = false } fail = "0.5" -futures = { version = "0.3", features = ["compat"] } +futures = { version = "0.3", features = ["compat", "thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } http = "0.2.0" diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 3729fb1ec9d..7e1aab80882 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -34,9 +34,38 @@ impl Tracker { pub fn write_scan_detail(&self, detail_v2: &mut pb::ScanDetailV2) { detail_v2.set_rocksdb_block_read_byte(self.metrics.block_read_byte); detail_v2.set_rocksdb_block_read_count(self.metrics.block_read_count); + detail_v2.set_rocksdb_block_read_nanos(self.metrics.block_read_nanos); detail_v2.set_rocksdb_block_cache_hit_count(self.metrics.block_cache_hit_count); detail_v2.set_rocksdb_key_skipped_count(self.metrics.internal_key_skipped_count); detail_v2.set_rocksdb_delete_skipped_count(self.metrics.deleted_key_skipped_count); + detail_v2.set_get_snapshot_nanos(self.metrics.get_snapshot_nanos); + } + + pub fn write_write_detail(&self, detail: &mut pb::WriteDetail) { + detail.set_store_batch_wait_nanos(self.metrics.wf_batch_wait_nanos); + detail.set_propose_send_wait_nanos(self.metrics.propose_send_wait_nanos); + detail.set_persist_log_nanos( + self.metrics.wf_persist_log_nanos - self.metrics.wf_send_to_queue_nanos, + ); + detail.set_raft_db_write_leader_wait_nanos( + self.metrics.store_mutex_lock_nanos + self.metrics.store_thread_wait_nanos, + ); + detail.set_raft_db_sync_log_nanos(self.metrics.store_write_wal_nanos); + detail.set_raft_db_write_memtable_nanos(self.metrics.store_write_memtable_nanos); + // It's an approximation considering generating proposal is fast CPU operation. + // And note that the time before flushing the raft message to the RPC channel is + // also counted in this value (to be improved in the future). + detail.set_commit_log_nanos( + self.metrics.wf_commit_log_nanos + - self.metrics.wf_batch_wait_nanos + - self.metrics.propose_send_wait_nanos, + ); + detail.set_apply_batch_wait_nanos(self.metrics.apply_wait_nanos); + detail.set_apply_log_nanos(self.metrics.apply_time_nanos - self.metrics.apply_wait_nanos); + detail.set_apply_mutex_lock_nanos(self.metrics.apply_mutex_lock_nanos); + detail.set_apply_write_leader_wait_nanos(self.metrics.apply_thread_wait_nanos); + detail.set_apply_write_wal_nanos(self.metrics.apply_wait_nanos); + detail.set_apply_write_memtable_nanos(self.metrics.apply_write_memtable_nanos); } } @@ -106,7 +135,7 @@ pub struct RequestMetrics { pub wf_commit_log_nanos: u64, pub propose_send_wait_nanos: u64, pub commit_not_persisted: bool, - pub store_mutex_lock_nanos: u64, + pub store_mutex_lock_nanos: u64, // should be 0 if using raft-engine pub store_thread_wait_nanos: u64, pub store_write_wal_nanos: u64, pub store_write_memtable_nanos: u64, diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 918d348f898..2b2ae03caa2 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -1376,7 +1376,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_SMALL - COARSE_ERROR_MS + PAYLOAD_SMALL.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1405,7 +1405,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_LARGE - COARSE_ERROR_MS + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1471,7 +1471,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_SMALL - COARSE_ERROR_MS + PAYLOAD_SMALL.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1493,7 +1493,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_LARGE - COARSE_ERROR_MS + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1557,7 +1557,7 @@ mod tests { resp.get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_LARGE - COARSE_ERROR_MS + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp.get_exec_details() @@ -1588,7 +1588,7 @@ mod tests { .get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_SMALL - COARSE_ERROR_MS + PAYLOAD_SMALL.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp[0] @@ -1618,7 +1618,7 @@ mod tests { .get_exec_details() .get_time_detail() .get_process_wall_time_ms(), - PAYLOAD_LARGE - COARSE_ERROR_MS + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) ); assert_lt!( resp[1] diff --git a/src/server/metrics.rs b/src/server/metrics.rs index caf6e1e86c4..9cd8631b275 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -223,7 +223,7 @@ lazy_static! { "tikv_grpc_msg_duration_seconds", "Bucketed histogram of grpc server messages", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(5e-5, 2.0, 22).unwrap() // 50us ~ 104s ) .unwrap(); pub static ref SERVER_INFO_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( diff --git a/src/server/service/batch.rs b/src/server/service/batch.rs index 931017549c1..15a755c3468 100644 --- a/src/server/service/batch.rs +++ b/src/server/service/batch.rs @@ -35,7 +35,7 @@ pub struct ReqBatcher { impl ReqBatcher { pub fn new(batch_size: usize) -> ReqBatcher { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); ReqBatcher { gets: vec![], raw_gets: vec![], diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 336580dda58..878a138aafe 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1,7 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath]: Tikv gRPC APIs implementation -use std::sync::Arc; +use std::{mem, sync::Arc}; use api_version::KvFormat; use fail::fail_point; @@ -184,16 +184,20 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor macro_rules! handle_request { ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident) => { + handle_request!($fn_name, $future_name, $req_ty, $resp_ty, no_time_detail); + }; + ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident, $time_detail: tt) => { fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = $future_name(&self.storage, req); let task = async move { let resp = resp.await?; - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + set_total_time!(resp, elapsed, $time_detail); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .$fn_name .observe(elapsed.as_secs_f64()); @@ -213,30 +217,50 @@ macro_rules! handle_request { } } +macro_rules! set_total_time { + ($resp: ident, $duration: expr, no_time_detail) => {}; + ($resp: ident, $duration: expr, has_time_detail) => { + let mut $resp = $resp; + $resp + .mut_exec_details_v2() + .mut_time_detail() + .set_total_rpc_wall_time_ns($duration.as_nanos() as u64); + }; +} + impl + 'static, E: Engine, L: LockManager, F: KvFormat> Tikv for Service { - handle_request!(kv_get, future_get, GetRequest, GetResponse); + handle_request!(kv_get, future_get, GetRequest, GetResponse, has_time_detail); handle_request!(kv_scan, future_scan, ScanRequest, ScanResponse); handle_request!( kv_prewrite, future_prewrite, PrewriteRequest, - PrewriteResponse + PrewriteResponse, + has_time_detail ); handle_request!( kv_pessimistic_lock, future_acquire_pessimistic_lock, PessimisticLockRequest, - PessimisticLockResponse + PessimisticLockResponse, + has_time_detail ); handle_request!( kv_pessimistic_rollback, future_pessimistic_rollback, PessimisticRollbackRequest, - PessimisticRollbackResponse + PessimisticRollbackResponse, + has_time_detail + ); + handle_request!( + kv_commit, + future_commit, + CommitRequest, + CommitResponse, + has_time_detail ); - handle_request!(kv_commit, future_commit, CommitRequest, CommitResponse); handle_request!(kv_cleanup, future_cleanup, CleanupRequest, CleanupResponse); handle_request!( kv_batch_get, @@ -248,37 +272,43 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor kv_batch_rollback, future_batch_rollback, BatchRollbackRequest, - BatchRollbackResponse + BatchRollbackResponse, + has_time_detail ); handle_request!( kv_txn_heart_beat, future_txn_heart_beat, TxnHeartBeatRequest, - TxnHeartBeatResponse + TxnHeartBeatResponse, + has_time_detail ); handle_request!( kv_check_txn_status, future_check_txn_status, CheckTxnStatusRequest, - CheckTxnStatusResponse + CheckTxnStatusResponse, + has_time_detail ); handle_request!( kv_check_secondary_locks, future_check_secondary_locks, CheckSecondaryLocksRequest, - CheckSecondaryLocksResponse + CheckSecondaryLocksResponse, + has_time_detail ); handle_request!( kv_scan_lock, future_scan_lock, ScanLockRequest, - ScanLockResponse + ScanLockResponse, + has_time_detail ); handle_request!( kv_resolve_lock, future_resolve_lock, ResolveLockRequest, - ResolveLockResponse + ResolveLockResponse, + has_time_detail ); handle_request!( kv_delete_range, @@ -372,8 +402,8 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); - let begin_instant = Instant::now_coarse(); let source = req.mut_context().take_request_source(); + let begin_instant = Instant::now(); let future = future_copr(&self.copr, Some(ctx.peer()), req); let task = async move { let resp = future.await?.consume(); @@ -402,8 +432,8 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor mut req: RawCoprocessorRequest, sink: UnarySink, ) { - let begin_instant = Instant::now_coarse(); let source = req.mut_context().take_request_source(); + let begin_instant = Instant::now(); let future = future_raw_coprocessor(&self.copr_v2, &self.storage, req); let task = async move { let resp = future.await?; @@ -432,7 +462,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor req: RegisterLockObserverRequest, sink: UnarySink, ) { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let (cb, f) = paired_future_callback(); let res = self.gc_worker.start_collecting(req.get_max_ts().into(), cb); @@ -471,7 +501,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor req: CheckLockObserverRequest, sink: UnarySink, ) { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let (cb, f) = paired_future_callback(); let res = self @@ -514,7 +544,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor req: RemoveLockObserverRequest, sink: UnarySink, ) { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let (cb, f) = paired_future_callback(); let res = self.gc_worker.stop_collecting(req.get_max_ts().into(), cb); @@ -551,7 +581,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor mut req: PhysicalScanLockRequest, sink: UnarySink, ) { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let (cb, f) = paired_future_callback(); let res = self.gc_worker.physical_scan_lock( @@ -595,7 +625,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor mut req: UnsafeDestroyRangeRequest, sink: UnarySink, ) { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); // DestroyRange is a very dangerous operation. We don't allow passing MIN_KEY as start, or // MAX_KEY as end here. @@ -646,7 +676,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor req: Request, mut sink: ServerStreamingSink, ) { - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut stream = self .copr @@ -794,7 +824,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor sink: UnarySink, ) { forward_unary!(self.proxy, split_region, ctx, req, sink); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let region_id = req.get_context().get_region_id(); let (cb, f) = paired_future_callback(); @@ -890,7 +920,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor sink: UnarySink, ) { forward_unary!(self.proxy, read_index, ctx, req, sink); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let region_id = req.get_context().get_region_id(); let mut cmd = RaftCmdRequest::default(); @@ -1032,20 +1062,8 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor BatchRespCollector, ); - let mut response_retriever = response_retriever.map(move |item| { - for measure in item.measures { - let GrpcRequestDuration { - label, - begin, - source, - } = measure; - let elapsed = begin.saturating_elapsed(); - GRPC_MSG_HISTOGRAM_STATIC - .get(label) - .observe(elapsed.as_secs_f64()); - record_request_source_metrics(source, elapsed); - } - + let mut response_retriever = response_retriever.map(move |mut item| { + handle_measures_for_batch_commands(&mut item); let mut r = item.batch_resp; GRPC_RESP_BATCH_COMMANDS_SIZE.observe(r.request_ids.len() as f64); // TODO: per thread load is more reasonable for batching. @@ -1349,6 +1367,46 @@ fn handle_batch_commands_request( } } +fn handle_measures_for_batch_commands(measures: &mut MeasuredBatchResponse) { + use BatchCommandsResponse_Response_oneof_cmd::*; + let now = Instant::now(); + for (resp, measure) in measures + .batch_resp + .mut_responses() + .iter_mut() + .zip(mem::take(&mut measures.measures)) + { + let GrpcRequestDuration { + label, + begin, + source, + } = measure; + let elapsed = now.saturating_duration_since(begin); + GRPC_MSG_HISTOGRAM_STATIC + .get(label) + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); + let exec_details = resp.cmd.as_mut().and_then(|cmd| match cmd { + Get(resp) => Some(resp.mut_exec_details_v2()), + Prewrite(resp) => Some(resp.mut_exec_details_v2()), + Commit(resp) => Some(resp.mut_exec_details_v2()), + BatchGet(resp) => Some(resp.mut_exec_details_v2()), + ResolveLock(resp) => Some(resp.mut_exec_details_v2()), + Coprocessor(resp) => Some(resp.mut_exec_details_v2()), + PessimisticLock(resp) => Some(resp.mut_exec_details_v2()), + CheckTxnStatus(resp) => Some(resp.mut_exec_details_v2()), + TxnHeartBeat(resp) => Some(resp.mut_exec_details_v2()), + CheckSecondaryLocks(resp) => Some(resp.mut_exec_details_v2()), + _ => None, + }); + if let Some(exec_details) = exec_details { + exec_details + .mut_time_detail() + .set_total_rpc_wall_time_ns(elapsed.as_nanos() as u64); + } + } +} + async fn future_handle_empty( req: BatchCommandsEmptyRequest, ) -> ServerResult { @@ -1967,24 +2025,24 @@ fn future_raw_coprocessor( } macro_rules! txn_command_future { - ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) $prelude: stmt; ($v: ident, $resp: ident) { $else_branch: expr }) => { + ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) $prelude: stmt; ($v: ident, $resp: ident, $tracker: ident) { $else_branch: expr }) => { fn $fn_name( storage: &Storage, $req: $req_ty, ) -> impl Future> { $prelude - let tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( + let $tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( $req.get_context(), RequestType::Unknown, 0, ))); - set_tls_tracker_token(tracker); + set_tls_tracker_token($tracker); let (cb, f) = paired_future_callback(); let res = storage.sched_txn_command($req.into(), cb); async move { defer!{{ - GLOBAL_TRACKERS.remove(tracker); + GLOBAL_TRACKERS.remove($tracker); }}; let $v = match res { Err(e) => Err(e), @@ -2000,24 +2058,35 @@ macro_rules! txn_command_future { } } }; + ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($v: ident, $resp: ident, $tracker: ident) { $else_branch: expr }) => { + txn_command_future!($fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp, $tracker) { $else_branch }); + }; ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($v: ident, $resp: ident) { $else_branch: expr }) => { - txn_command_future!($fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp) { $else_branch }); + txn_command_future!($fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp, tracker) { $else_branch }); }; } -txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp) {{ +txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp, tracker) {{ if let Ok(v) = &v { resp.set_min_commit_ts(v.min_commit_ts.into_inner()); resp.set_one_pc_commit_ts(v.one_pc_commit_ts.into_inner()); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); + }); } resp.set_errors(extract_key_errors(v.map(|v| v.locks)).into()); }}); -txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (v, resp) { +txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (v, resp, tracker) { match v { Ok(Ok(res)) => { let (values, not_founds) = res.into_values_and_not_founds(); resp.set_values(values.into()); resp.set_not_founds(not_founds); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); + }); }, Err(e) | Ok(Err(e)) => resp.set_errors(vec![extract_key_error(&e)].into()), } @@ -2035,10 +2104,14 @@ txn_command_future!(future_resolve_lock, ResolveLockRequest, ResolveLockResponse resp.set_error(extract_key_error(&e)); } }); -txn_command_future!(future_commit, CommitRequest, CommitResponse, (v, resp) { +txn_command_future!(future_commit, CommitRequest, CommitResponse, (v, resp, tracker) { match v { Ok(TxnStatus::Committed { commit_ts }) => { - resp.set_commit_version(commit_ts.into_inner()) + resp.set_commit_version(commit_ts.into_inner()); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); + }); } Ok(_) => unreachable!(), Err(e) => resp.set_error(extract_key_error(&e)), diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 935b657fa3f..18f3f7278d5 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2025,3 +2025,131 @@ fn test_storage_with_quota_limiter_disable() { assert!(begin.elapsed() < Duration::from_millis(500)); } + +#[test] +fn test_commands_write_detail() { + let (_cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + cluster.cfg.pessimistic_txn.pipelined = false; + cluster.cfg.pessimistic_txn.in_memory = false; + }); + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + + let check_scan_detail = |sc: &ScanDetailV2| { + assert!(sc.get_get_snapshot_nanos() > 0); + }; + let check_write_detail = |wd: &WriteDetail| { + assert!(wd.get_store_batch_wait_nanos() > 0); + assert!(wd.get_persist_log_nanos() > 0); + assert!(wd.get_raft_db_write_leader_wait_nanos() > 0); + assert!(wd.get_raft_db_sync_log_nanos() > 0); + assert!(wd.get_raft_db_write_memtable_nanos() > 0); + assert!(wd.get_commit_log_nanos() > 0); + assert!(wd.get_apply_batch_wait_nanos() > 0); + assert!(wd.get_apply_log_nanos() > 0); + assert!(wd.get_apply_mutex_lock_nanos() > 0); + assert!(wd.get_apply_write_wal_nanos() > 0); + assert!(wd.get_apply_write_memtable_nanos() > 0); + }; + + let mut mutation = Mutation::default(); + mutation.set_op(Op::PessimisticLock); + mutation.set_key(k.clone()); + + let mut pessimistic_lock_req = PessimisticLockRequest::default(); + pessimistic_lock_req.set_context(ctx.clone()); + pessimistic_lock_req.set_mutations(vec![mutation.clone()].into()); + pessimistic_lock_req.set_start_version(20); + pessimistic_lock_req.set_for_update_ts(20); + pessimistic_lock_req.set_primary_lock(k.clone()); + pessimistic_lock_req.set_lock_ttl(3000); + let pessimistic_lock_resp = client.kv_pessimistic_lock(&pessimistic_lock_req).unwrap(); + check_scan_detail( + pessimistic_lock_resp + .get_exec_details_v2() + .get_scan_detail_v2(), + ); + check_write_detail( + pessimistic_lock_resp + .get_exec_details_v2() + .get_write_detail(), + ); + + let mut prewrite_req = PrewriteRequest::default(); + mutation.set_op(Op::Put); + mutation.set_value(v); + prewrite_req.set_mutations(vec![mutation].into()); + prewrite_req.set_is_pessimistic_lock(vec![true]); + prewrite_req.set_context(ctx.clone()); + prewrite_req.set_primary_lock(k.clone()); + prewrite_req.set_start_version(20); + prewrite_req.set_for_update_ts(20); + prewrite_req.set_lock_ttl(3000); + let prewrite_resp = client.kv_prewrite(&prewrite_req).unwrap(); + check_scan_detail(prewrite_resp.get_exec_details_v2().get_scan_detail_v2()); + check_write_detail(prewrite_resp.get_exec_details_v2().get_write_detail()); + + let mut commit_req = CommitRequest::default(); + commit_req.set_context(ctx); + commit_req.set_keys(vec![k].into()); + commit_req.set_start_version(20); + commit_req.set_commit_version(30); + let commit_resp = client.kv_commit(&commit_req).unwrap(); + check_scan_detail(commit_resp.get_exec_details_v2().get_scan_detail_v2()); + check_write_detail(commit_resp.get_exec_details_v2().get_write_detail()); +} + +#[test] +fn test_rpc_wall_time() { + let mut cluster = new_server_cluster(0, 1); + cluster.run(); + + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let k = b"key".to_vec(); + let mut get_req = GetRequest::default(); + get_req.set_context(ctx); + get_req.key = k; + get_req.version = 10; + let get_resp = client.kv_get(&get_req).unwrap(); + assert!( + get_resp + .get_exec_details_v2() + .get_time_detail() + .get_total_rpc_wall_time_ns() + > 0 + ); + + let (mut sender, receiver) = client.batch_commands().unwrap(); + let mut batch_req = BatchCommandsRequest::default(); + for i in 0..3 { + let mut req = batch_commands_request::Request::default(); + req.cmd = Some(batch_commands_request::request::Cmd::Get(get_req.clone())); + batch_req.mut_requests().push(req); + batch_req.mut_request_ids().push(i); + } + block_on(sender.send((batch_req, WriteFlags::default()))).unwrap(); + block_on(sender.close()).unwrap(); + + let (tx, rx) = mpsc::sync_channel(1); + thread::spawn(move || { + let mut responses = Vec::new(); + for r in block_on( + receiver + .map(move |b| b.unwrap().take_responses()) + .collect::>(), + ) { + responses.extend(r.into_vec()); + } + tx.send(responses).unwrap(); + }); + let responses = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + assert_eq!(responses.len(), 3); + for resp in responses { + assert!( + resp.get_get() + .get_exec_details_v2() + .get_time_detail() + .get_total_rpc_wall_time_ns() + > 0 + ); + } +} From 5c941586f5163ca9dbb82a0d14e7f02b09732181 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 11 Jul 2022 18:11:05 +0800 Subject: [PATCH 0068/1149] raftstore: update the Load Base Split metrics event (#12992) ref tikv/tikv#12063 Update the Load Base Split metrics event to distinguish more cases. Signed-off-by: JmPotato --- .../raftstore/src/store/worker/split_controller.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 1a3fb15af45..338158c7505 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -53,7 +53,9 @@ const NO_BALANCE_KEY: &str = "no_balance_key"; const NO_UNCROSS_KEY: &str = "no_uncross_key"; // Split info for the top hot CPU region has been collected, ready to split. const READY_TO_SPLIT_CPU_TOP: &str = "ready_to_split_cpu_top"; -// The top hot CPU region is not ready to split. +// Hottest key range for the top hot CPU region could not be found. +const EMPTY_HOTTEST_KEY_RANGE: &str = "empty_hottest_key_range"; +// The top hot CPU region could not be split. const UNABLE_TO_SPLIT_CPU_TOP: &str = "unable_to_split_cpu_top"; // It will return prefix sum of the given iter, @@ -899,9 +901,13 @@ impl AutoSplitController { ); } else { LOAD_BASE_SPLIT_EVENT - .with_label_values(&[UNABLE_TO_SPLIT_CPU_TOP]) + .with_label_values(&[EMPTY_HOTTEST_KEY_RANGE]) .inc(); } + } else { + LOAD_BASE_SPLIT_EVENT + .with_label_values(&[UNABLE_TO_SPLIT_CPU_TOP]) + .inc(); } // Clean up the rest top CPU usage recorders. for region_id in top_cpu_usage { From 126da29086da78bad3171975d4269ba283d133bd Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 12 Jul 2022 14:25:05 +0800 Subject: [PATCH 0069/1149] *: mitigations for Raft Engine encryption key issue (#12892) close tikv/tikv#12890 Signed-off-by: tabokie --- Cargo.lock | 4 +- cmd/tikv-ctl/src/cmd.rs | 4 ++ cmd/tikv-ctl/src/main.rs | 29 +++++++++-- components/encryption/src/manager/mod.rs | 17 +++++++ components/raft_log_engine/src/engine.rs | 61 ++++++++++++++++-------- components/raft_log_engine/src/lib.rs | 4 +- 6 files changed, 90 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2cab9eb4b2d..9b7e72c2632 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4026,7 +4026,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#07dcadbf51b43fed70346e33b5db07723e655828" +source = "git+https://github.com/tikv/raft-engine.git#7a436eae40a6b62371123c96941e058b7fe52b63" dependencies = [ "byteorder", "crc32fast", @@ -4057,7 +4057,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#07dcadbf51b43fed70346e33b5db07723e655828" +source = "git+https://github.com/tikv/raft-engine.git#7a436eae40a6b62371123c96941e058b7fe52b63" dependencies = [ "clap 3.1.6", "env_logger", diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 4c49ccfa5ef..74cc69034fc 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -529,6 +529,8 @@ pub enum Cmd { #[structopt(subcommand)] cmd: EncryptionMetaCmd, }, + /// Delete encryption keys that are no longer associated with physical files. + CleanupEncryptionMeta {}, /// Print bad ssts related infos BadSsts { #[structopt(long)] @@ -545,6 +547,8 @@ pub enum Cmd { /// The version to reset TiKV to version: u64, }, + /// Control for Raft Engine + RaftEngineCtl { args: Vec }, #[structopt(external_subcommand)] External(Vec), } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index e2ed740e779..9609fffb9a5 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -41,6 +41,7 @@ use kvproto::{ }; use pd_client::{Config as PdConfig, PdClient, RpcClient}; use protobuf::Message; +use raft_log_engine::ManagedFileSystem; use regex::Regex; use security::{SecurityConfig, SecurityManager}; use structopt::{clap::ErrorKind, StructOpt}; @@ -99,10 +100,19 @@ fn main() { match args[0].as_str() { "ldb" => run_ldb_command(args, &cfg), "sst_dump" => run_sst_dump_command(args, &cfg), - "raft-engine-ctl" => run_raft_engine_ctl_command(args), _ => Opt::clap().print_help().unwrap(), } } + Cmd::RaftEngineCtl { args } => { + let key_manager = + data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) + .expect("data_key_manager_from_config should success"); + let file_system = Arc::new(ManagedFileSystem::new( + key_manager.map(|m| Arc::new(m)), + None, + )); + raft_engine_ctl::run_command(args, file_system); + } Cmd::BadSsts { manifest, pd } => { let data_dir = opt.data_dir.as_deref(); assert!(data_dir.is_some(), "--data-dir must be specified"); @@ -184,6 +194,19 @@ fn main() { DataKeyManager::dump_file_dict(&cfg.storage.data_dir, path.as_deref()).unwrap(); } }, + Cmd::CleanupEncryptionMeta {} => { + let key_manager = + match data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) + .expect("data_key_manager_from_config should success") + { + Some(mgr) => mgr, + None => { + println!("Encryption is disabled"); + return; + } + }; + key_manager.retain_encrypted_files(|fname| Path::new(fname).exists()) + } Cmd::CompactCluster { db, cf, @@ -662,10 +685,6 @@ fn run_sst_dump_command(args: Vec, cfg: &TiKvConfig) { engine_rocks::raw::run_sst_dump_tool(&args, &opts); } -fn run_raft_engine_ctl_command(args: Vec) { - raft_engine_ctl::run_command(args); -} - fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, cfg: &TiKvConfig) { let db = &cfg.infer_kv_engine_path(Some(data_dir)).unwrap(); println!( diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index cd9be1b554d..2240e212b84 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -466,6 +466,23 @@ impl DataKeyManager { Ok(Some(Self::from_dicts(dicts, args.method, master_key)?)) } + /// Will block file operation for a considerable amount of time. Only used for debugging purpose. + pub fn retain_encrypted_files(&self, f: impl Fn(&str) -> bool) { + let mut dict = self.dicts.file_dict.lock().unwrap(); + let mut file_dict_file = self.dicts.file_dict_file.lock().unwrap(); + dict.files.retain(|fname, info| { + if info.method != EncryptionMethod::Plaintext { + let retain = f(fname); + if !retain { + file_dict_file.remove(fname).unwrap(); + } + retain + } else { + false + } + }); + } + fn load_dicts(master_key: &dyn Backend, args: &DataKeyManagerArgs) -> Result { if args.method != EncryptionMethod::Plaintext && !master_key.is_secure() { return Err(box_err!( diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 9236e7947db..d2f8b7cb4e1 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -9,8 +9,9 @@ use std::{ use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ - CacheStats, EncryptionKeyManager, PerfContextExt, PerfContextKind, PerfLevel, RaftEngine, - RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, RaftLogGCTask, Result, + CacheStats, EncryptionKeyManager, EncryptionMethod, PerfContextExt, PerfContextKind, PerfLevel, + RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, + RaftLogGCTask, Result, }; use file_system::{IOOp, IORateLimiter, IOType}; use kvproto::{ @@ -41,7 +42,7 @@ impl MessageExt for MessageExtTyped { } } -struct ManagedReader { +pub struct ManagedReader { inner: Either< ::Reader, DecrypterReader<::Reader>, @@ -71,7 +72,7 @@ impl Read for ManagedReader { } } -struct ManagedWriter { +pub struct ManagedWriter { inner: Either< ::Writer, EncrypterWriter<::Writer>, @@ -129,26 +130,26 @@ impl WriteExt for ManagedWriter { } } -struct ManagedFileSystem { - base_level_file_system: DefaultFileSystem, +pub struct ManagedFileSystem { + base_file_system: DefaultFileSystem, key_manager: Option>, rate_limiter: Option>, } impl ManagedFileSystem { - fn new( + pub fn new( key_manager: Option>, rate_limiter: Option>, ) -> Self { Self { - base_level_file_system: DefaultFileSystem, + base_file_system: DefaultFileSystem, key_manager, rate_limiter, } } } -struct ManagedHandle { +pub struct ManagedHandle { path: PathBuf, base: Arc<::Handle>, } @@ -169,7 +170,7 @@ impl FileSystem for ManagedFileSystem { type Writer = ManagedWriter; fn create>(&self, path: P) -> IoResult { - let base = Arc::new(self.base_level_file_system.create(path.as_ref())?); + let base = Arc::new(self.base_file_system.create(path.as_ref())?); if let Some(ref manager) = self.key_manager { manager.new_file(path.as_ref().to_str().unwrap())?; } @@ -182,14 +183,38 @@ impl FileSystem for ManagedFileSystem { fn open>(&self, path: P) -> IoResult { Ok(ManagedHandle { path: path.as_ref().to_path_buf(), - base: Arc::new(self.base_level_file_system.open(path.as_ref())?), + base: Arc::new(self.base_file_system.open(path.as_ref())?), }) } + fn delete>(&self, path: P) -> IoResult<()> { + if let Some(ref manager) = self.key_manager { + manager.delete_file(path.as_ref().to_str().unwrap())?; + } + self.base_file_system.delete(path) + } + + fn exists_metadata>(&self, path: P) -> bool { + if let Some(ref manager) = self.key_manager { + if let Ok(info) = manager.get_file(path.as_ref().to_str().unwrap()) { + if info.method != EncryptionMethod::Plaintext { + return true; + } + } + } + self.base_file_system.exists_metadata(path) + } + + fn delete_metadata>(&self, path: P) -> IoResult<()> { + if let Some(ref manager) = self.key_manager { + // Note: no error if the file doesn't exist. + manager.delete_file(path.as_ref().to_str().unwrap())?; + } + self.base_file_system.delete_metadata(path) + } + fn new_reader(&self, handle: Arc) -> IoResult { - let base_reader = self - .base_level_file_system - .new_reader(handle.base.clone())?; + let base_reader = self.base_file_system.new_reader(handle.base.clone())?; if let Some(ref key_manager) = self.key_manager { Ok(ManagedReader { inner: Either::Right(key_manager.open_file_with_reader(&handle.path, base_reader)?), @@ -204,9 +229,7 @@ impl FileSystem for ManagedFileSystem { } fn new_writer(&self, handle: Arc) -> IoResult { - let base_writer = self - .base_level_file_system - .new_writer(handle.base.clone())?; + let base_writer = self.base_file_system.new_writer(handle.base.clone())?; if let Some(ref key_manager) = self.key_manager { Ok(ManagedWriter { @@ -224,10 +247,6 @@ impl FileSystem for ManagedFileSystem { }) } } - - fn delete>(&self, path: P) -> IoResult<()> { - self.base_level_file_system.delete(path) - } } #[derive(Clone)] diff --git a/components/raft_log_engine/src/lib.rs b/components/raft_log_engine/src/lib.rs index 7b8757d6531..41ba961c48a 100644 --- a/components/raft_log_engine/src/lib.rs +++ b/components/raft_log_engine/src/lib.rs @@ -23,4 +23,6 @@ extern crate tikv_util; mod engine; mod perf_context; -pub use engine::{RaftEngineConfig, RaftLogBatch, RaftLogEngine, ReadableSize, RecoveryMode}; +pub use engine::{ + ManagedFileSystem, RaftEngineConfig, RaftLogBatch, RaftLogEngine, ReadableSize, RecoveryMode, +}; From b558d0bffc554c4c0094a483c65d95a50a4141cf Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 12 Jul 2022 19:22:26 +0800 Subject: [PATCH 0070/1149] engine: use 6.29 rocksdb (#12533) Ref https://github.com/tikv/rocksdb/issues/277 Added some configurations, they will be hidden from doc for now. - rocksdb.xxcf.prepopulate-block-cache-mode = "disabled" - rocksdb.xxcf.format-version = 2 - rocksdb.xxcf.checksum = "crc32c" - WriteOptions::memtable_insert_hint_per_batch = false - ReadOptions::auto_prefix_mode = false - ReadOptions::adaptive_readahead = false A few notes: - `test_need_gc::test_without_properties` is removed, because in the new version of RocksDB, some portion of flushed data is replayed to memtable, and breaks the assumption of file layout. I haven't pinpointed the root cause, but I suppose this test case is not that important. - `test_compact_files_in_range` is partially removed, because it raises error: `Invalid argument: Cannot compact file to up level, input file: /000032.sst level 6 > output level 3`. Signed-off-by: tabokie Co-authored-by: 5kbpers --- Cargo.lock | 35 +++---- components/engine_rocks/src/compact.rs | 21 ---- components/engine_rocks/src/config.rs | 114 +++++++++++++++++++++ components/engine_rocks/src/file_system.rs | 19 +++- components/engine_rocks/src/options.rs | 10 +- components/engine_rocks/src/raw.rs | 8 +- components/engine_rocks/src/sst.rs | 12 ++- components/engine_rocks/src/write_batch.rs | 1 - components/raftstore/src/store/snap.rs | 5 +- etc/config-template.toml | 59 +++++++++++ src/config.rs | 74 ++++++++++--- src/server/gc_worker/mod.rs | 53 ++-------- tests/integrations/backup/mod.rs | 4 + tests/integrations/config/mod.rs | 26 ++++- tests/integrations/config/test-custom.toml | 17 ++- tests/integrations/storage/test_titan.rs | 1 - 16 files changed, 330 insertions(+), 129 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9b7e72c2632..7e562246adc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1052,7 +1052,7 @@ dependencies = [ "clap 2.33.0", "criterion-plot", "csv", - "itertools 0.10.0", + "itertools", "lazy_static", "num-traits", "oorandom", @@ -1094,7 +1094,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" dependencies = [ "cast", - "itertools 0.10.0", + "itertools", ] [[package]] @@ -2550,15 +2550,6 @@ dependencies = [ "serde", ] -[[package]] -name = "itertools" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.10.0" @@ -2754,7 +2745,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#773784178a0e8e5fdad81f4fd85448a3014a3700" +source = "git+https://github.com/tikv/rust-rocksdb.git#d8b7ff8aee62aa9a406b64f7093049d62eeb9a1a" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2773,7 +2764,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#773784178a0e8e5fdad81f4fd85448a3014a3700" +source = "git+https://github.com/tikv/rust-rocksdb.git#d8b7ff8aee62aa9a406b64f7093049d62eeb9a1a" dependencies = [ "bzip2-sys", "cc", @@ -3915,7 +3906,7 @@ checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603" dependencies = [ "bytes", "heck 0.3.1", - "itertools 0.10.0", + "itertools", "log", "multimap", "petgraph", @@ -3932,7 +3923,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba" dependencies = [ "anyhow", - "itertools 0.10.0", + "itertools", "proc-macro2", "quote", "syn", @@ -4125,7 +4116,7 @@ dependencies = [ "getset", "grpcio-health", "into_other", - "itertools 0.10.0", + "itertools", "keys", "kvproto", "lazy_static", @@ -4578,7 +4569,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#773784178a0e8e5fdad81f4fd85448a3014a3700" +source = "git+https://github.com/tikv/rust-rocksdb.git#d8b7ff8aee62aa9a406b64f7093049d62eeb9a1a" dependencies = [ "libc 0.2.125", "librocksdb_sys", @@ -5909,7 +5900,7 @@ dependencies = [ "collections", "fail", "futures 0.3.15", - "itertools 0.10.0", + "itertools", "kvproto", "log_wrappers", "match_template", @@ -6010,7 +6001,7 @@ dependencies = [ "hyper-openssl", "hyper-tls", "into_other", - "itertools 0.10.0", + "itertools", "keys", "kvproto", "lazy_static", @@ -7083,12 +7074,10 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.4.19+zstd.1.4.8" +version = "2.0.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec24a9273d24437afb8e71b16f3d9a5d569193cccdb7896213b59f552f387674" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", - "glob", - "itertools 0.9.0", "libc 0.2.125", ] diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index 05369015a1e..fef3af46f5c 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -229,26 +229,5 @@ mod tests { assert_eq!(level_n[0].get_smallestkey(), &[0]); assert_eq!(level_n[0].get_largestkey(), &[4]); } - - for cf_name in db.cf_names() { - let mut files = vec![]; - let cf = db.cf_handle(cf_name).unwrap(); - let cf_meta = db.get_column_family_meta_data(cf); - let cf_levels = cf_meta.get_levels(); - - for level in cf_levels.into_iter().rev() { - files.extend(level.get_files().iter().map(|f| f.get_name())); - } - - assert_eq!(files.len(), 2); - db.c() - .compact_files_cf(cf_name, files.clone(), Some(3), 0, true) - .unwrap(); - - let cf_meta = db.get_column_family_meta_data(cf); - let cf_levels = cf_meta.get_levels(); - assert_eq!(cf_levels[0].get_files().len(), 1); - assert_eq!(cf_levels[3].get_files().len(), 1); - } } } diff --git a/components/engine_rocks/src/config.rs b/components/engine_rocks/src/config.rs index 9c015b7e7d1..e121a1cea18 100644 --- a/components/engine_rocks/src/config.rs +++ b/components/engine_rocks/src/config.rs @@ -215,6 +215,120 @@ pub mod compression_type_serde { } } +pub mod checksum_serde { + use std::fmt; + + use rocksdb::ChecksumType; + use serde::{ + de::{Error, Unexpected, Visitor}, + Deserializer, Serializer, + }; + + pub fn serialize(t: &ChecksumType, serializer: S) -> Result + where + S: Serializer, + { + let name = match *t { + ChecksumType::NoChecksum => "no", + ChecksumType::CRC32c => "crc32c", + ChecksumType::XxHash => "xxhash", + ChecksumType::XxHash64 => "xxhash64", + ChecksumType::XXH3 => "xxh3", + }; + serializer.serialize_str(name) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct StrVistor; + impl<'de> Visitor<'de> for StrVistor { + type Value = ChecksumType; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "a checksum type") + } + + fn visit_str(self, value: &str) -> Result + where + E: Error, + { + let str = match &*value.trim().to_lowercase() { + "no" => ChecksumType::NoChecksum, + "crc32c" => ChecksumType::CRC32c, + "xxhash" => ChecksumType::XxHash, + "xxhash64" => ChecksumType::XxHash64, + "xxh3" => ChecksumType::XXH3, + _ => { + return Err(E::invalid_value( + Unexpected::Other("invalid checksum type"), + &self, + )); + } + }; + Ok(str) + } + } + + deserializer.deserialize_str(StrVistor) + } +} + +pub mod prepopulate_block_cache_serde { + use std::fmt; + + use rocksdb::PrepopulateBlockCache; + use serde::{ + de::{Error, Unexpected, Visitor}, + Deserializer, Serializer, + }; + + pub fn serialize(t: &PrepopulateBlockCache, serializer: S) -> Result + where + S: Serializer, + { + let name = match *t { + PrepopulateBlockCache::Disabled => "disabled", + PrepopulateBlockCache::FlushOnly => "flush-only", + }; + serializer.serialize_str(name) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct StrVistor; + impl<'de> Visitor<'de> for StrVistor { + type Value = PrepopulateBlockCache; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "a prepopulate block cache mode") + } + + fn visit_str(self, value: &str) -> Result + where + E: Error, + { + let str = match &*value.trim().to_lowercase() { + "disabled" => PrepopulateBlockCache::Disabled, + "flush-only" => PrepopulateBlockCache::FlushOnly, + _ => { + return Err(E::invalid_value( + Unexpected::Other("invalid prepopulate block cache mode"), + &self, + )); + } + }; + Ok(str) + } + } + + deserializer.deserialize_str(StrVistor) + } +} + #[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub enum BlobRunMode { diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index a9eebc161af..397eaead488 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -70,7 +70,9 @@ mod tests { #[test] fn test_inspected_compact() { - let value_size = 1024; + // NOTICE: Specific to RocksDB version. + let amplification_bytes = 2560; + let value_size = amplification_bytes * 2; let temp_dir = Builder::new() .prefix("test_inspected_compact") .tempdir() @@ -81,15 +83,16 @@ mod tests { db.put(&data_key(b"a1"), &value).unwrap(); db.put(&data_key(b"a2"), &value).unwrap(); + assert_eq!(stats.fetch(IOType::Flush, IOOp::Write), 0); db.flush(true /*sync*/).unwrap(); assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); - assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 3); + assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.put(&data_key(b"a2"), &value).unwrap(); db.put(&data_key(b"a3"), &value).unwrap(); db.flush(true /*sync*/).unwrap(); assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); - assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 3); + assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.c() .compact_range( @@ -100,8 +103,14 @@ mod tests { ) .unwrap(); assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) > value_size * 4); - assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) < value_size * 5); + assert!( + stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) + < value_size * 4 + amplification_bytes + ); assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Write) > value_size * 3); - assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Write) < value_size * 4); + assert!( + stats.fetch(IOType::LevelZeroCompaction, IOOp::Write) + < value_size * 3 + amplification_bytes + ); } } diff --git a/components/engine_rocks/src/options.rs b/components/engine_rocks/src/options.rs index c1610f64224..c50c7734f79 100644 --- a/components/engine_rocks/src/options.rs +++ b/components/engine_rocks/src/options.rs @@ -16,7 +16,7 @@ impl RocksReadOptions { impl From for RocksReadOptions { fn from(opts: engine_traits::ReadOptions) -> Self { let mut r = RawReadOptions::default(); - r.fill_cache(opts.fill_cache()); + r.set_fill_cache(opts.fill_cache()); RocksReadOptions(r) } } @@ -40,6 +40,8 @@ impl From for RocksWriteOptions { let mut r = RawWriteOptions::default(); r.set_sync(opts.sync()); r.set_no_slowdown(opts.no_slowdown()); + // TODO: enable it. + r.set_memtable_insert_hint_per_batch(false); RocksWriteOptions(r) } } @@ -59,16 +61,20 @@ impl From for RocksReadOptions { fn build_read_opts(iter_opts: engine_traits::IterOptions) -> RawReadOptions { let mut opts = RawReadOptions::new(); - opts.fill_cache(iter_opts.fill_cache()); + opts.set_fill_cache(iter_opts.fill_cache()); opts.set_max_skippable_internal_keys(iter_opts.max_skippable_internal_keys()); if iter_opts.key_only() { opts.set_titan_key_only(true); } if iter_opts.total_order_seek_used() { opts.set_total_order_seek(true); + // TODO: enable it. + opts.set_auto_prefix_mode(false); } else if iter_opts.prefix_same_as_start() { opts.set_prefix_same_as_start(true); } + // TODO: enable it. + opts.set_adaptive_readahead(false); if iter_opts.hint_min_ts().is_some() || iter_opts.hint_max_ts().is_some() { opts.set_table_filter(TsFilter::new( diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index 145931743dd..c7d2e3a0d31 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -8,13 +8,13 @@ pub use rocksdb::{ new_compaction_filter_raw, run_ldb_tool, run_sst_dump_tool, BlockBasedOptions, CFHandle, Cache, - ColumnFamilyOptions, CompactOptions, CompactionFilter, CompactionFilterContext, + ChecksumType, ColumnFamilyOptions, CompactOptions, CompactionFilter, CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, CompactionJobInfo, CompactionOptions, CompactionPriority, DBBottommostLevelCompaction, DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBInfoLogLevel, DBIterator, DBOptions, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, Env, EventListener, IngestExternalFileOptions, LRUCacheOptions, - MemoryAllocator, PerfContext, Range, ReadOptions, SeekKey, SliceTransform, TableFilter, - TablePropertiesCollector, TablePropertiesCollectorFactory, TitanBlobIndex, TitanDBOptions, - Writable, WriteOptions, DB, + MemoryAllocator, PerfContext, PrepopulateBlockCache, Range, ReadOptions, SeekKey, + SliceTransform, TableFilter, TablePropertiesCollector, TablePropertiesCollectorFactory, + TitanBlobIndex, TitanDBOptions, Writable, WriteOptions, DB, }; diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 58f300a8ec2..c7eb52e0527 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -222,9 +222,15 @@ impl SstWriterBuilder for RocksSstWriterBuilder { }; // TODO: 0 is a valid value for compression_level if self.compression_level != 0 { - // other three fields are default value. - // see: https://github.com/facebook/rocksdb/blob/8cb278d11a43773a3ac22e523f4d183b06d37d88/include/rocksdb/advanced_options.h#L146-L153 - io_options.set_compression_options(-14, self.compression_level, 0, 0, 0); + // other 4 fields are default value. + io_options.set_compression_options( + -14, + self.compression_level, + 0, /*strategy*/ + 0, /*max_dict_bytes*/ + 0, /*zstd_max_train_bytes*/ + 1, /*parallel_threads*/ + ); } io_options.compression(compress_type); // in rocksdb 5.5.1, SstFileWriter will try to use bottommost_compression and diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index 824882cc1e9..e9428b2c291 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -152,7 +152,6 @@ mod tests { let opt = RawDBOptions::default(); opt.enable_unordered_write(false); opt.enable_pipelined_write(false); - opt.enable_pipelined_commit(true); let engine = new_engine_opt( path.path().join("db").to_str().unwrap(), RocksDBOptions::from_raw(opt), diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index bb308efd054..eaf99506f4b 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2862,6 +2862,8 @@ pub mod tests { s.write_all(&recv_remain).unwrap(); s.save().unwrap(); + let snap_size = snap_mgr.get_total_snap_size().unwrap(); + let max_snap_count = (max_total_size + snap_size - 1) / snap_size; for (i, region_id) in regions.into_iter().enumerate() { let key = SnapKey::new(region_id, 1, 1); let region = gen_test_region(region_id, 1, 1); @@ -2878,9 +2880,6 @@ pub mod tests { ) .unwrap(); - // TODO: this size may change in different RocksDB version. - let snap_size = 1660; - let max_snap_count = (max_total_size + snap_size - 1) / snap_size; // The first snap_size is for region 100. // That snapshot won't be deleted because it's not for generating. assert_eq!( diff --git a/etc/config-template.toml b/etc/config-template.toml index 2195e681f62..b63fe2ce235 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -803,6 +803,56 @@ ## for the same CF. # compaction-guard-max-output-file-size = "128M" +## Available versions: +## +## 0 -- This version can be read by all TiKV releases. Doesn't support changing +## checksum type (default is CRC32). +## +## 1 -- Can be read by all TiKV releases. Supports non-default checksum, like +## xxHash. It is written by RocksDB when BlockBasedTableOptions::checksum is +## something other than kCRC32c. (version 0 is silently upconverted) +## +## 2 -- Can be read by all TiKV releases. Changes the way we encode compressed +## blocks with LZ4, BZip2 and Zlib compression. +## +## 3 -- Can be read by TiKV's versions since 2.1. Changes the way we encode the +## keys in index blocks. +## This option only affects newly written tables. When reading existing tables, +## the information about version is read from the footer. +## +## 4 -- Can be read by TiKV's versions since 3.0. Changes the way we encode the +## values in index blocks. +## This option only affects newly written tables. When reading existing tables, +## the information about version is read from the footer. +## +# format-version = 2 + +## If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and +## filter blocks) which are already in memory into block cache at the time of +## flush. On a flush, the block that is in memory (in memtables) get flushed +## to the device. If using Direct IO, additional IO is incurred to read this +## data back into memory again, which is avoided by enabling this option. This +## further helps if the workload exhibits high temporal locality, where most +## of the reads go to recently written data. This also helps in case of +## Distributed FileSystem. +## +## disabled: kDisabled +## flush-only: kFlushOnly +## +# prepopulate-block-cache = "disabled" + +## Use the specified checksum type. Newly created table files will be +## protected with this checksum type. Old table files will still be readable, +## even though they have different checksum type. +## +## no: kNoChecksum +## crc32c: kCRC32c +## xxhash: kxxHash +## xxhash64: kxxHash64 +## xxh3: kXXH3 (supported since TiKV 6.2) +## +# checksum = "crc32c" + ## Options for "Default" Column Family for `Titan`. [rocksdb.defaultcf.titan] ## The smallest value to store in blob files. Value smaller than @@ -887,6 +937,9 @@ # enable-compaction-guard = true # compaction-guard-min-output-file-size = "8M" # compaction-guard-max-output-file-size = "128M" +# format-version = 2 +# prepopulate-block-cache = "disabled" +# checksum = "crc32c" [rocksdb.lockcf] # compression-per-level = ["no", "no", "no", "no", "no", "no", "no"] @@ -908,6 +961,9 @@ # dynamic-level-bytes = true # optimize-filters-for-hits = false # enable-compaction-guard = false +# format-version = 2 +# prepopulate-block-cache = "disabled" +# checksum = "crc32c" [raftdb] # max-background-jobs = 4 @@ -967,6 +1023,9 @@ # dynamic-level-bytes = true # optimize-filters-for-hits = true # enable-compaction-guard = false +# format-version = 2 +# prepopulate-block-cache = "disabled" +# checksum = "crc32c" [raft-engine] ## Determines whether to use Raft Engine to store raft logs. When it is diff --git a/src/config.rs b/src/config.rs index 9e0abe37c94..239c80a62ab 100644 --- a/src/config.rs +++ b/src/config.rs @@ -25,9 +25,9 @@ use engine_rocks::{ get_env, properties::MvccPropertiesCollectorFactory, raw::{ - BlockBasedOptions, Cache, ColumnFamilyOptions, CompactionPriority, DBCompactionStyle, - DBCompressionType, DBOptions, DBRateLimiterMode, DBRecoveryMode, Env, LRUCacheOptions, - TitanDBOptions, + BlockBasedOptions, Cache, ChecksumType, ColumnFamilyOptions, CompactionPriority, + DBCompactionStyle, DBCompressionType, DBOptions, DBRateLimiterMode, DBRecoveryMode, Env, + LRUCacheOptions, PrepopulateBlockCache, TitanDBOptions, }, raw_util::CFOptions, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, @@ -129,8 +129,11 @@ pub struct TitanCfConfig { pub max_gc_batch_size: ReadableSize, #[online_config(skip)] pub discardable_ratio: f64, + // deprecated. #[online_config(skip)] - pub sample_ratio: f64, + #[doc(hidden)] + #[serde(skip_serializing)] + pub sample_ratio: Option, #[online_config(skip)] pub merge_small_file_threshold: ReadableSize, pub blob_run_mode: BlobRunMode, @@ -156,7 +159,7 @@ impl Default for TitanCfConfig { min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: 0.1, + sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::Normal, level_merge: false, @@ -176,7 +179,6 @@ impl TitanCfConfig { opts.set_min_gc_batch_size(self.min_gc_batch_size.0 as u64); opts.set_max_gc_batch_size(self.max_gc_batch_size.0 as u64); opts.set_discardable_ratio(self.discardable_ratio); - opts.set_sample_ratio(self.sample_ratio); opts.set_merge_small_file_threshold(self.merge_small_file_threshold.0 as u64); opts.set_blob_run_mode(self.blob_run_mode.into()); opts.set_level_merge(self.level_merge); @@ -197,6 +199,9 @@ impl TitanCfConfig { .into(), ); } + if self.sample_ratio.is_some() { + warn!("sample-ratio is deprecated. Ignoring the value."); + } Ok(()) } } @@ -335,6 +340,14 @@ macro_rules! cf_config { pub bottommost_zstd_compression_dict_size: i32, #[online_config(skip)] pub bottommost_zstd_compression_sample_size: i32, + #[serde(with = "rocks_config::prepopulate_block_cache_serde")] + #[online_config(skip)] + pub prepopulate_block_cache: PrepopulateBlockCache, + #[online_config(skip)] + pub format_version: u32, + #[serde(with = "rocks_config::checksum_serde")] + #[online_config(skip)] + pub checksum: ChecksumType, #[online_config(submodule)] pub titan: TitanCfConfig, } @@ -350,6 +363,10 @@ macro_rules! cf_config { ) .into()); } + if self.format_version > 5 { + // TODO: allow version 5 if we have another LTS capable of reading it? + return Err("format-version larger than 5 is unsupported".into()); + } self.titan.validate()?; Ok(()) } @@ -477,9 +494,6 @@ macro_rules! write_into_metrics { $metrics .with_label_values(&[$tag, "titan_discardable_ratio"]) .set($cf.titan.discardable_ratio); - $metrics - .with_label_values(&[$tag, "titan_sample_ratio"]) - .set($cf.titan.sample_ratio); $metrics .with_label_values(&[$tag, "titan_merge_small_file_threshold"]) .set($cf.titan.merge_small_file_threshold.0 as f64); @@ -503,12 +517,15 @@ macro_rules! build_cf_opt { .set_pin_l0_filter_and_index_blocks_in_cache($opt.pin_l0_filter_and_index_blocks); if $opt.use_bloom_filter { block_base_opts.set_bloom_filter( - $opt.bloom_filter_bits_per_key, + $opt.bloom_filter_bits_per_key as f64, $opt.block_based_bloom_filter, ); block_base_opts.set_whole_key_filtering($opt.whole_key_filtering); } block_base_opts.set_read_amp_bytes_per_bit($opt.read_amp_bytes_per_bit); + block_base_opts.set_prepopulate_block_cache($opt.prepopulate_block_cache); + block_base_opts.set_format_version($opt.format_version); + block_base_opts.set_checksum($opt.checksum); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_block_based_table_factory(&block_base_opts); cf_opts.set_num_levels($opt.num_levels); @@ -524,6 +541,7 @@ macro_rules! build_cf_opt { 0, /* strategy */ $opt.bottommost_zstd_compression_dict_size, $opt.bottommost_zstd_compression_sample_size, + 1, /* parallel_threads */ ); cf_opts.set_write_buffer_size($opt.write_buffer_size.0); cf_opts.set_max_write_buffer_number($opt.max_write_buffer_number); @@ -629,10 +647,13 @@ impl Default for DefaultCfConfig { enable_compaction_guard: true, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), - titan: TitanCfConfig::default(), bottommost_level_compression: DBCompressionType::Zstd, bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, + prepopulate_block_cache: PrepopulateBlockCache::Disabled, + format_version: 2, + checksum: ChecksumType::CRC32c, + titan: TitanCfConfig::default(), } } } @@ -738,10 +759,13 @@ impl Default for WriteCfConfig { enable_compaction_guard: true, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), - titan, bottommost_level_compression: DBCompressionType::Zstd, bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, + prepopulate_block_cache: PrepopulateBlockCache::Disabled, + format_version: 2, + checksum: ChecksumType::CRC32c, + titan, } } } @@ -833,10 +857,13 @@ impl Default for LockCfConfig { enable_compaction_guard: false, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), - titan, bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, + prepopulate_block_cache: PrepopulateBlockCache::Disabled, + format_version: 2, + checksum: ChecksumType::CRC32c, + titan, } } } @@ -906,10 +933,13 @@ impl Default for RaftCfConfig { enable_compaction_guard: false, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), - titan, bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, + prepopulate_block_cache: PrepopulateBlockCache::Disabled, + format_version: 2, + checksum: ChecksumType::CRC32c, + titan, } } } @@ -1141,8 +1171,6 @@ impl DbConfig { self.use_direct_io_for_flush_and_compaction, ); opts.enable_pipelined_write(self.enable_pipelined_write); - let enable_pipelined_commit = !self.enable_pipelined_write && !self.enable_unordered_write; - opts.enable_pipelined_commit(enable_pipelined_commit); opts.enable_unordered_write(self.enable_unordered_write); opts.set_info_log(RocksdbLogger::default()); opts.set_info_log_level(self.info_log_level.into()); @@ -1269,10 +1297,13 @@ impl Default for RaftDefaultCfConfig { enable_compaction_guard: false, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), - titan: TitanCfConfig::default(), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, + prepopulate_block_cache: PrepopulateBlockCache::Disabled, + format_version: 2, + checksum: ChecksumType::CRC32c, + titan: TitanCfConfig::default(), } } } @@ -3398,6 +3429,15 @@ impl TiKvConfig { } } + if last_cfg.raftdb.defaultcf.format_version > 5 + || last_cfg.rocksdb.defaultcf.format_version > 5 + || last_cfg.rocksdb.writecf.format_version > 5 + || last_cfg.rocksdb.lockcf.format_version > 5 + || last_cfg.rocksdb.raftcf.format_version > 5 + { + return Err("format_version larger than 5 is unsupported".into()); + } + Ok(()) } diff --git a/src/server/gc_worker/mod.rs b/src/server/gc_worker/mod.rs index 0eea3b77131..4e2bc6e76de 100644 --- a/src/server/gc_worker/mod.rs +++ b/src/server/gc_worker/mod.rs @@ -64,17 +64,16 @@ mod tests { region: Region, safe_point: impl Into, need_gc: bool, - ) -> Option { + ) -> MvccProperties { let safe_point = safe_point.into(); let start = keys::data_key(region.get_start_key()); let end = keys::data_end_key(region.get_end_key()); let props = db .c() - .get_mvcc_properties_cf(CF_WRITE, safe_point, &start, &end); - if let Some(props) = props.as_ref() { - assert_eq!(check_need_gc(safe_point, 1.0, props), need_gc); - } + .get_mvcc_properties_cf(CF_WRITE, safe_point, &start, &end) + .unwrap(); + assert_eq!(check_need_gc(safe_point, 1.0, &props), need_gc); props } @@ -86,48 +85,22 @@ mod tests { .unwrap(); let path = path.path().to_str().unwrap(); let region = make_region(1, vec![0], vec![10]); - test_without_properties(path, ®ion); test_with_properties(path, ®ion); } - fn test_without_properties(path: &str, region: &Region) { - let db = open_db(path, false); + fn test_with_properties(path: &str, region: &Region) { + let db = open_db(path, true); let mut engine = RegionEngine::new(&db, region); // Put 2 keys. engine.put(&[1], 1, 1); engine.put(&[4], 2, 2); - assert!( - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true).is_none() - ); - engine.flush(); - // After this flush, we have a SST file without properties. - // Without properties, we always need GC. - assert!( - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true).is_none() - ); - } - - fn test_with_properties(path: &str, region: &Region) { - let db = open_db(path, true); - let mut engine = RegionEngine::new(&db, region); - // Put 2 keys. engine.put(&[2], 3, 3); engine.put(&[3], 4, 4); engine.flush(); - // After this flush, we have a SST file w/ properties, plus the SST - // file w/o properties from previous flush. We always need GC as - // long as we can't get properties from any SST files. - assert!( - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true).is_none() - ); engine.compact(); - // After this compact, the two SST files are compacted into a new - // SST file with properties. Now all SST files have properties and - // all keys have only one version, so we don't need gc. - let props = - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, false).unwrap(); + let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, false); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 4.into()); assert_eq!(props.num_rows, 4); @@ -143,8 +116,7 @@ mod tests { engine.flush(); // After this flush, keys 5,6 in the new SST file have more than one // versions, so we need gc. - let props = - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true).unwrap(); + let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 8.into()); assert_eq!(props.num_rows, 6); @@ -152,8 +124,7 @@ mod tests { assert_eq!(props.num_versions, 8); assert_eq!(props.max_row_versions, 2); // But if the `safe_point` is older than all versions, we don't need gc too. - let props = - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 0, false).unwrap(); + let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 0, false); assert_eq!(props.min_ts, TimeStamp::max()); assert_eq!(props.max_ts, TimeStamp::zero()); assert_eq!(props.num_rows, 0); @@ -167,8 +138,7 @@ mod tests { engine.compact(); // After this compact, all versions of keys 5,6 are deleted, // no keys have more than one versions, so we don't need gc. - let props = - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, false).unwrap(); + let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, false); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 4.into()); assert_eq!(props.num_rows, 4); @@ -179,8 +149,7 @@ mod tests { // A single lock version need gc. engine.lock(&[7], 9, 9); engine.flush(); - let props = - get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true).unwrap(); + let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 9.into()); assert_eq!(props.num_rows, 5); diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 6d171bcae28..ccadcca674f 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -52,6 +52,10 @@ fn assert_same_files(mut files1: Vec, mut files2: Vec Date: Wed, 13 Jul 2022 10:59:05 +0800 Subject: [PATCH 0071/1149] raftstore: fix the building warning caused by the feature cfg (#13004) ref tikv/tikv#12063 Fix the building warning caused by the feature cfg. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- .../raftstore/src/store/worker/split_controller.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 338158c7505..2964796e4b2 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -8,7 +8,6 @@ use std::{ time::{Duration, SystemTime}, }; -use fail::fail_point; use kvproto::{ kvrpcpb::KeyRange, metapb::{self, Peer}, @@ -619,7 +618,7 @@ impl AutoSplitController { fn is_grpc_poll_busy(&self, grpc_thread_usage: f64) -> bool { #[cfg(feature = "failpoints")] - fail_point!("mock_grpc_poll_is_not_busy", |_| { false }); + fail::fail_point!("mock_grpc_poll_is_not_busy", |_| { false }); if self.max_grpc_thread_count == 0 { return false; } @@ -630,7 +629,7 @@ impl AutoSplitController { fn is_unified_read_pool_busy(&self, unified_read_pool_thread_usage: f64) -> bool { #[cfg(feature = "failpoints")] - fail_point!("mock_unified_read_pool_is_busy", |_| { true }); + fail::fail_point!("mock_unified_read_pool_is_busy", |_| { true }); if self.max_unified_read_pool_thread_count == 0 { return false; } @@ -645,7 +644,7 @@ impl AutoSplitController { fn is_region_busy(&self, unified_read_pool_thread_usage: f64, region_cpu_usage: f64) -> bool { #[cfg(feature = "failpoints")] - fail_point!("mock_region_is_busy", |_| { true }); + fail::fail_point!("mock_region_is_busy", |_| { true }); if unified_read_pool_thread_usage <= 0.0 || !self.should_check_region_cpu() { return false; } @@ -798,9 +797,8 @@ impl AutoSplitController { .with_label_values(&["read"]) .observe(qps as f64); - // 1. If the QPS and Byte do not meet the threshold, skip. - // 2. If the Unified Read Pool is not busy or - // the Region is not hot enough (takes up 50% of the Unified Read Pool CPU times), skip. + // 1. If the QPS or the byte does not meet the threshold, skip. + // 2. If the Unified Read Pool or the region is not hot enough, skip. if qps < self.cfg.qps_threshold && byte < self.cfg.byte_threshold && (!is_unified_read_pool_busy || !is_region_busy) From 46d999db06ccef233d8cc38c4a5931d9829b0dd5 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Wed, 13 Jul 2022 11:27:05 +0800 Subject: [PATCH 0072/1149] raftstore: Implement coprocessor observer pre_exec_admin(query) (#12868) ref tikv/tikv#12849 Support new observers pre_exec_admin(query). Signed-off-by: CalvinNeo --- .../raftstore/src/coprocessor/dispatcher.rs | 47 +++- components/raftstore/src/coprocessor/mod.rs | 10 + components/raftstore/src/store/fsm/apply.rs | 212 +++++++++++++++--- 3 files changed, 240 insertions(+), 29 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 3f51dd918c6..24b79bf4877 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -416,6 +416,29 @@ impl CoprocessorHost { } } + pub fn pre_exec(&self, region: &Region, cmd: &RaftCmdRequest) -> bool { + let mut ctx = ObserverContext::new(region); + if !cmd.has_admin_request() { + let query = cmd.get_requests(); + for observer in &self.registry.query_observers { + let observer = observer.observer.inner(); + if observer.pre_exec_query(&mut ctx, query) { + return true; + } + } + false + } else { + let admin = cmd.get_admin_request(); + for observer in &self.registry.admin_observers { + let observer = observer.observer.inner(); + if observer.pre_exec_admin(&mut ctx, admin) { + return true; + } + } + false + } + } + pub fn post_apply_plain_kvs_from_snapshot( &self, region: &Region, @@ -608,6 +631,12 @@ mod tests { self.called.fetch_add(3, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } + + fn pre_exec_admin(&self, ctx: &mut ObserverContext<'_>, _: &AdminRequest) -> bool { + self.called.fetch_add(16, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + false + } } impl QueryObserver for TestCoprocessor { @@ -634,6 +663,12 @@ mod tests { ctx.bypass = self.bypass.load(Ordering::SeqCst); } + fn pre_exec_query(&self, ctx: &mut ObserverContext<'_>, _: &[Request]) -> bool { + self.called.fetch_add(15, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + false + } + fn on_empty_cmd(&self, ctx: &mut ObserverContext<'_>, _index: u64, _term: u64) { self.called.fetch_add(14, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); @@ -767,7 +802,17 @@ mod tests { let mut empty_req = RaftCmdRequest::default(); empty_req.set_requests(vec![Request::default()].into()); host.on_empty_cmd(®ion, 0, 0); - assert_all!([&ob.called], &[88]); + assert_all!([&ob.called], &[88]); // 14 + + let mut query_req = RaftCmdRequest::default(); + query_req.set_requests(vec![Request::default()].into()); + host.pre_exec(®ion, &query_req); + assert_all!([&ob.called], &[103]); // 15 + + let mut admin_req = RaftCmdRequest::default(); + admin_req.set_admin_request(AdminRequest::default()); + host.pre_exec(®ion, &admin_req); + assert_all!([&ob.called], &[119]); // 16 } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 39b412ce950..2dc83c8d7af 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -86,6 +86,11 @@ pub trait AdminObserver: Coprocessor { /// Hook to call after applying admin request. /// For now, the `region` in `ObserverContext` is an empty region. fn post_apply_admin(&self, _: &mut ObserverContext<'_>, _: &AdminResponse) {} + + /// Hook before exec admin request, returns whether we should skip this admin. + fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, _: &AdminRequest) -> bool { + false + } } pub trait QueryObserver: Coprocessor { @@ -105,6 +110,11 @@ pub trait QueryObserver: Coprocessor { /// Hook to call after applying write request. /// For now, the `region` in `ObserverContext` is an empty region. fn post_apply_query(&self, _: &mut ObserverContext<'_>, _: &Cmd) {} + + /// Hook before exec write request, returns whether we should skip this write. + fn pre_exec_query(&self, _: &mut ObserverContext<'_>, _: &[Request]) -> bool { + false + } } pub trait ApplySnapshotObserver: Coprocessor { diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index dfafcac338f..9c2e548f10e 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1242,38 +1242,50 @@ where // if pending remove, apply should be aborted already. assert!(!self.pending_remove); - ctx.exec_log_index = index; - ctx.exec_log_term = term; - ctx.kv_wb_mut().set_save_point(); - let mut origin_epoch = None; // Remember if the raft cmd fails to be applied, it must have no side effects. // E.g. `RaftApplyState` must not be changed. - let (resp, exec_result) = match self.exec_raft_cmd(ctx, req) { - Ok(a) => { - ctx.kv_wb_mut().pop_save_point().unwrap(); - if req.has_admin_request() { - origin_epoch = Some(self.region.get_region_epoch().clone()); - } - a + + let mut origin_epoch = None; + let (resp, exec_result) = if ctx.host.pre_exec(&self.region, req) { + // One of the observers want to filter execution of the command. + let mut resp = RaftCmdResponse::default(); + if !req.get_header().get_uuid().is_empty() { + let uuid = req.get_header().get_uuid().to_vec(); + resp.mut_header().set_uuid(uuid); } - Err(e) => { - // clear dirty values. - ctx.kv_wb_mut().rollback_to_save_point().unwrap(); - match e { - Error::EpochNotMatch(..) => debug!( - "epoch not match"; - "region_id" => self.region_id(), - "peer_id" => self.id(), - "err" => ?e - ), - _ => error!(?e; - "execute raft command"; - "region_id" => self.region_id(), - "peer_id" => self.id(), - ), + (resp, ApplyResult::None) + } else { + ctx.exec_log_index = index; + ctx.exec_log_term = term; + ctx.kv_wb_mut().set_save_point(); + let (resp, exec_result) = match self.exec_raft_cmd(ctx, req) { + Ok(a) => { + ctx.kv_wb_mut().pop_save_point().unwrap(); + if req.has_admin_request() { + origin_epoch = Some(self.region.get_region_epoch().clone()); + } + a } - (cmd_resp::new_error(e), ApplyResult::None) - } + Err(e) => { + // clear dirty values. + ctx.kv_wb_mut().rollback_to_save_point().unwrap(); + match e { + Error::EpochNotMatch(..) => debug!( + "epoch not match"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "err" => ?e + ), + _ => error!(?e; + "execute raft command"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + ), + } + (cmd_resp::new_error(e), ApplyResult::None) + } + }; + (resp, exec_result) }; if let ApplyResult::WaitMergeSource(_) = exec_result { return (resp, exec_result); @@ -4846,6 +4858,23 @@ mod tests { self } + fn compact_log(mut self, index: u64, term: u64) -> EntryBuilder { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::CompactLog); + req.mut_compact_log().set_compact_index(index); + req.mut_compact_log().set_compact_term(term); + self.req.set_admin_request(req); + self + } + + fn compute_hash(mut self, context: Vec) -> EntryBuilder { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::ComputeHash); + req.mut_compute_hash().set_context(context); + self.req.set_admin_request(req); + self + } + fn build(mut self) -> Entry { self.entry .set_data(self.req.write_to_bytes().unwrap().into()); @@ -4858,6 +4887,8 @@ mod tests { pre_query_count: Arc, post_query_count: Arc, cmd_sink: Option>>>, + filter_compact_log: Arc, + filter_consistency_check: Arc, } impl Coprocessor for ApplyObserver {} @@ -4872,6 +4903,23 @@ mod tests { } } + impl AdminObserver for ApplyObserver { + fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, req: &AdminRequest) -> bool { + let cmd_type = req.get_cmd_type(); + if cmd_type == AdminCmdType::CompactLog + && self.filter_compact_log.deref().load(Ordering::SeqCst) + { + return true; + }; + if (cmd_type == AdminCmdType::ComputeHash || cmd_type == AdminCmdType::VerifyHash) + && self.filter_consistency_check.deref().load(Ordering::SeqCst) + { + return true; + }; + false + } + } + impl CmdObserver for ApplyObserver where E: KvEngine, @@ -5506,6 +5554,114 @@ mod tests { }); } + #[test] + fn test_exec_observer() { + let (_path, engine) = create_tmp_engine("test-exec-observer"); + let (_import_dir, importer) = create_tmp_importer("test-exec-observer"); + let mut host = CoprocessorHost::::default(); + let obs = ApplyObserver::default(); + host.registry + .register_admin_observer(1, BoxAdminObserver::new(obs.clone())); + + let (tx, rx) = mpsc::channel(); + let (region_scheduler, _) = dummy_scheduler(); + let sender = Box::new(TestNotifier { tx }); + let cfg = Config::default(); + let (router, mut system) = create_apply_batch_system(&cfg); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "test-exec-observer".to_owned(), + cfg: Arc::new(VersionTrack::new(cfg)), + sender, + region_scheduler, + coprocessor_host: host, + importer, + engine, + router: router.clone(), + store_id: 1, + pending_create_peers, + }; + system.spawn("test-exec-observer".to_owned(), builder); + + let peer_id = 3; + let mut reg = Registration { + id: peer_id, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.mut_peers().push(new_peer(1, peer_id)); + reg.region.set_end_key(b"k5".to_vec()); + reg.region.mut_region_epoch().set_conf_ver(1); + reg.region.mut_region_epoch().set_version(3); + router.schedule_task(1, Msg::Registration(reg)); + + let mut index_id = 1; + let put_entry = EntryBuilder::new(1, 1) + .put(b"k1", b"v1") + .epoch(1, 3) + .build(); + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![put_entry], vec![]))); + fetch_apply_res(&rx); + + index_id += 1; + let compact_entry = EntryBuilder::new(index_id, 1) + .compact_log(index_id - 1, 2) + .epoch(1, 3) + .build(); + // Filter CompactLog + obs.filter_compact_log.store(true, Ordering::SeqCst); + router.schedule_task( + 1, + Msg::apply(apply(peer_id, 1, 1, vec![compact_entry], vec![])), + ); + let apply_res = fetch_apply_res(&rx); + // applied_index can still be advanced. + assert_eq!(apply_res.apply_state.get_applied_index(), index_id); + assert_eq!(apply_res.applied_index_term, 1); + // Executing CompactLog is filtered and takes no effect. + assert_eq!(apply_res.exec_res.len(), 0); + assert_eq!(apply_res.apply_state.get_truncated_state().get_index(), 0); + + index_id += 1; + // Don't filter CompactLog + obs.filter_compact_log.store(false, Ordering::SeqCst); + let compact_entry = EntryBuilder::new(index_id, 1) + .compact_log(index_id - 1, 2) + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply(peer_id, 1, 1, vec![compact_entry], vec![])), + ); + let apply_res = fetch_apply_res(&rx); + // applied_index can still be advanced. + assert_eq!(apply_res.apply_state.get_applied_index(), index_id); + assert_eq!(apply_res.applied_index_term, 1); + // We can get exec result of CompactLog. + assert_eq!(apply_res.exec_res.len(), 1); + assert_eq!( + apply_res.apply_state.get_truncated_state().get_index(), + index_id - 1 + ); + + index_id += 1; + obs.filter_consistency_check.store(true, Ordering::SeqCst); + let compute_hash_entry = EntryBuilder::new(index_id, 1).compute_hash(vec![]).build(); + router.schedule_task( + 1, + Msg::apply(apply(peer_id, 1, 1, vec![compute_hash_entry], vec![])), + ); + let apply_res = fetch_apply_res(&rx); + // applied_index can still be advanced. + assert_eq!(apply_res.apply_state.get_applied_index(), index_id); + assert_eq!(apply_res.applied_index_term, 1); + // We can't get exec result of ComputeHash. + assert_eq!(apply_res.exec_res.len(), 0); + obs.filter_consistency_check.store(false, Ordering::SeqCst); + + system.shutdown(); + } + #[test] fn test_cmd_observer() { let (_path, engine) = create_tmp_engine("test-delegate"); From 4f9a52872e57b210b389dda876d633e9f522aa47 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 13 Jul 2022 12:35:06 +0800 Subject: [PATCH 0073/1149] raftstore: make the gRPC poll busy check to consider the average value (#13005) ref tikv/tikv#12063 Make the gRPC poll busy check to consider the average value to make sure the check is accurate. Signed-off-by: JmPotato --- .../src/store/worker/split_config.rs | 2 +- .../src/store/worker/split_controller.rs | 101 +++++++++++++++++- 2 files changed, 97 insertions(+), 6 deletions(-) diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index 58df082c3e6..4fe00fff448 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -21,7 +21,7 @@ const DEFAULT_SPLIT_CONTAINED_SCORE: f64 = 0.5; // If the `split_balance_score` and `split_contained_score` above could not be satisfied, we will try to split the region according to its CPU load, // then these parameters below will start to work. -// When the gRPC poll thread CPU usage is higher than gRPC poll thread count * `DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, +// When the gRPC poll thread CPU usage (over the past `detect_times` seconds by default) is higher than gRPC poll thread count * `DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, // the CPU-based split won't be triggered no matter if the `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO` and `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` are exceeded // to prevent from increasing the gRPC poll CPU usage. const DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.5; diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 2964796e4b2..3724e21c515 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -589,6 +589,7 @@ pub struct AutoSplitController { max_grpc_thread_count: usize, max_unified_read_pool_thread_count: usize, unified_read_pool_scale_receiver: Option>, + grpc_thread_usage_vec: Vec, } impl AutoSplitController { @@ -605,6 +606,7 @@ impl AutoSplitController { max_grpc_thread_count, max_unified_read_pool_thread_count, unified_read_pool_scale_receiver, + grpc_thread_usage_vec: vec![], } } @@ -612,19 +614,40 @@ impl AutoSplitController { AutoSplitController::new(SplitConfigManager::default(), 0, 0, None) } + fn update_grpc_thread_usage(&mut self, grpc_thread_usage: f64) { + self.grpc_thread_usage_vec.push(grpc_thread_usage); + let length = self.grpc_thread_usage_vec.len(); + let detect_times = self.cfg.detect_times as usize; + // Only keep the last `self.cfg.detect_times` elements. + if length > detect_times { + self.grpc_thread_usage_vec.drain(..length - detect_times); + } + } + + fn get_avg_grpc_thread_usage(&self) -> f64 { + let length = self.grpc_thread_usage_vec.len(); + if length == 0 { + return 0.0; + } + let sum = self.grpc_thread_usage_vec.iter().sum::(); + sum / length as f64 + } + fn should_check_region_cpu(&self) -> bool { self.cfg.region_cpu_overload_threshold_ratio > 0.0 } - fn is_grpc_poll_busy(&self, grpc_thread_usage: f64) -> bool { + fn is_grpc_poll_busy(&self, avg_grpc_thread_usage: f64) -> bool { #[cfg(feature = "failpoints")] fail::fail_point!("mock_grpc_poll_is_not_busy", |_| { false }); if self.max_grpc_thread_count == 0 { return false; } - let grpc_thread_cpu_overload_threshold = - self.max_grpc_thread_count as f64 * self.cfg.grpc_thread_cpu_overload_threshold_ratio; - grpc_thread_usage > 0.0 && grpc_thread_usage >= grpc_thread_cpu_overload_threshold + if self.cfg.grpc_thread_cpu_overload_threshold_ratio <= 0.0 { + return true; + } + avg_grpc_thread_usage + >= self.max_grpc_thread_count as f64 * self.cfg.grpc_thread_cpu_overload_threshold_ratio } fn is_unified_read_pool_busy(&self, unified_read_pool_thread_usage: f64) -> bool { @@ -756,13 +779,17 @@ impl AutoSplitController { Self::collect_thread_usage(thread_stats, "grpc-server"), Self::collect_thread_usage(thread_stats, "unified-read-po"), ); + // Update first before calculating the latest average gRPC poll CPU usage. + self.update_grpc_thread_usage(grpc_thread_usage); + let avg_grpc_thread_usage = self.get_avg_grpc_thread_usage(); let (is_grpc_poll_busy, is_unified_read_pool_busy) = ( - self.is_grpc_poll_busy(grpc_thread_usage), + self.is_grpc_poll_busy(avg_grpc_thread_usage), self.is_unified_read_pool_busy(unified_read_pool_thread_usage), ); debug!("flush to load base split"; "max_grpc_thread_count" => self.max_grpc_thread_count, "grpc_thread_usage" => grpc_thread_usage, + "avg_grpc_thread_usage" => avg_grpc_thread_usage, "max_unified_read_pool_thread_count" => self.max_unified_read_pool_thread_count, "unified_read_pool_thread_usage" => unified_read_pool_thread_usage, "is_grpc_poll_busy" => is_grpc_poll_busy, @@ -1799,6 +1826,70 @@ mod tests { } } + #[test] + fn test_avg_grpc_thread_cpu_usage_calculation() { + let mut auto_split_controller = AutoSplitController::default(); + let detect_times = auto_split_controller.cfg.detect_times as f64; + for grpc_thread_usage in 1..=5 { + auto_split_controller.update_grpc_thread_usage(grpc_thread_usage as f64); + } + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [1.0, 2.0, 3.0, 4.0, 5.0].iter().sum::() / 5.0, + ); + for grpc_thread_usage in 6..=10 { + auto_split_controller.update_grpc_thread_usage(grpc_thread_usage as f64); + } + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] + .iter() + .sum::() + / detect_times, + ); + for grpc_thread_usage in 11..=15 { + auto_split_controller.update_grpc_thread_usage(grpc_thread_usage as f64); + } + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0] + .iter() + .sum::() + / detect_times, + ); + for grpc_thread_usage in 1..=10 { + auto_split_controller.update_grpc_thread_usage(grpc_thread_usage as f64); + } + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] + .iter() + .sum::() + / detect_times, + ); + // Change the `detect_times` to a smaller value. + auto_split_controller.cfg.detect_times = 5; + let detect_times = auto_split_controller.cfg.detect_times as f64; + auto_split_controller.update_grpc_thread_usage(11.0); + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [7.0, 8.0, 9.0, 10.0, 11.0].iter().sum::() / detect_times, + ); + // Change the `detect_times` to a bigger value. + auto_split_controller.cfg.detect_times = 6; + let detect_times = auto_split_controller.cfg.detect_times as f64; + auto_split_controller.update_grpc_thread_usage(12.0); + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [7.0, 8.0, 9.0, 10.0, 11.0, 12.0].iter().sum::() / detect_times, + ); + auto_split_controller.update_grpc_thread_usage(13.0); + assert_eq!( + auto_split_controller.get_avg_grpc_thread_usage(), + [8.0, 9.0, 10.0, 11.0, 12.0, 13.0].iter().sum::() / detect_times, + ); + } + #[bench] fn samples_evaluate(b: &mut test::Bencher) { let mut samples = Samples(vec![Sample::new(b"c")]); From 08d2407efd80278b06bbeca7442738383378b8db Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Wed, 13 Jul 2022 12:49:05 +0800 Subject: [PATCH 0074/1149] raftstore: change send proposal time to waterfall metrics (#12993) ref tikv/tikv#12362 Durations related to a single query are recorded as waterfall metrics, which means it records the duration from the very beginning to the instant when the event happens. Previously, proposal_send_wait_nanos was an exception. So, this commit makes this consistent with other metrics. This commit also adds a Grafana Raft waterfall panel for it. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- .../raftstore/src/store/local_metrics.rs | 6 +- components/raftstore/src/store/metrics.rs | 12 +- components/raftstore/src/store/peer.rs | 17 +- components/tracker/src/lib.rs | 12 +- metrics/grafana/tikv_details.json | 436 +++++++++--------- 5 files changed, 231 insertions(+), 252 deletions(-) diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 304259c4571..923fb8ffc26 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -413,10 +413,10 @@ pub struct RaftMetrics { pub waterfall_metrics: bool, pub wf_batch_wait: LocalHistogram, pub wf_send_to_queue: LocalHistogram, + pub wf_send_proposal: LocalHistogram, pub wf_persist_log: LocalHistogram, pub wf_commit_log: LocalHistogram, pub wf_commit_not_persist_log: LocalHistogram, - pub proposal_send_wait: LocalHistogram, pub raft_log_gc_skipped: RaftLogGcSkippedMetrics, } @@ -438,10 +438,10 @@ impl RaftMetrics { waterfall_metrics, wf_batch_wait: STORE_WF_BATCH_WAIT_DURATION_HISTOGRAM.local(), wf_send_to_queue: STORE_WF_SEND_TO_QUEUE_DURATION_HISTOGRAM.local(), + wf_send_proposal: STORE_WF_SEND_PROPOSAL_DURATION_HISTOGRAM.local(), wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(), wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), - proposal_send_wait: PROPOSAL_SEND_WAIT_DURATION_HISTOGRAM.local(), raft_log_gc_skipped: RaftLogGcSkippedMetrics::default(), } } @@ -461,10 +461,10 @@ impl RaftMetrics { if self.waterfall_metrics { self.wf_batch_wait.flush(); self.wf_send_to_queue.flush(); + self.wf_send_proposal.flush(); self.wf_persist_log.flush(); self.wf_commit_log.flush(); self.wf_commit_not_persist_log.flush(); - self.proposal_send_wait.flush(); } let mut missing = self.leader_missing.lock().unwrap(); LEADER_MISSING.set(missing.len() as i64); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c60152784a5..c4a1c22d800 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -324,6 +324,12 @@ lazy_static! { "Bucketed histogram of proposals' send to write queue duration.", exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); + pub static ref STORE_WF_SEND_PROPOSAL_DURATION_HISTOGRAM: Histogram = + register_histogram!( + "tikv_raftstore_store_wf_send_proposal_duration_seconds", + "Bucketed histogram of proposals' waterfall send duration", + exponential_buckets(1e-6, 2.0, 26).unwrap() + ).unwrap(); pub static ref STORE_WF_BEFORE_WRITE_DURATION_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_store_wf_before_write_duration_seconds", @@ -360,12 +366,6 @@ lazy_static! { "Bucketed histogram of proposals' commit but not persist duration", exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); - pub static ref PROPOSAL_SEND_WAIT_DURATION_HISTOGRAM: Histogram = - register_histogram!( - "tikv_raftstore_proposal_send_wait_duration_seconds", - "Bucketed histogram of proposals' send wait duration", - exponential_buckets(1e-6, 2.0, 26).unwrap() - ).unwrap(); pub static ref PEER_PROPOSAL_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 5897309f0b2..489db8b9600 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -65,7 +65,6 @@ use tikv_util::{ Either, }; use time::Timespec; -use tracker::GLOBAL_TRACKERS; use txn_types::WriteBatchFlags; use uuid::Uuid; @@ -1576,6 +1575,7 @@ where msgs: Vec, ) { let mut now = None; + let std_now = Instant::now(); for msg in msgs { let msg_type = msg.get_message().get_msg_type(); if msg_type == MessageType::MsgSnapshot { @@ -1619,17 +1619,10 @@ where .binary_search_by_key(&index, |p: &Proposal<_>| p.index) { let proposal = &self.proposals.queue[idx]; - if term == proposal.term - && let Some(propose_time) = proposal.propose_time - && let Ok(dur) = ((*now.get_or_insert(monotonic_raw_now())) - propose_time).to_std() { - ctx.raft_metrics - .proposal_send_wait - .observe(dur.as_secs_f64()); - for t in proposal.cb.get_trackers().iter().flat_map(|v| v.iter().flat_map(|t| t.as_tracker_token())) { - GLOBAL_TRACKERS.with_tracker(t, |trakcer| { - if trakcer.metrics.propose_send_wait_nanos == 0 { - trakcer.metrics.propose_send_wait_nanos = dur.as_nanos() as u64; - } + if term == proposal.term { + for tracker in proposal.cb.get_trackers().iter().flat_map(|v| v.iter()) { + tracker.observe(std_now, &ctx.raft_metrics.wf_send_proposal, |t| { + &mut t.metrics.wf_send_proposal_nanos }); } } diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 7e1aab80882..dbefbbe770c 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -43,7 +43,11 @@ impl Tracker { pub fn write_write_detail(&self, detail: &mut pb::WriteDetail) { detail.set_store_batch_wait_nanos(self.metrics.wf_batch_wait_nanos); - detail.set_propose_send_wait_nanos(self.metrics.propose_send_wait_nanos); + detail.set_propose_send_wait_nanos( + self.metrics + .wf_send_proposal_nanos + .saturating_sub(self.metrics.wf_send_to_queue_nanos), + ); detail.set_persist_log_nanos( self.metrics.wf_persist_log_nanos - self.metrics.wf_send_to_queue_nanos, ); @@ -56,9 +60,7 @@ impl Tracker { // And note that the time before flushing the raft message to the RPC channel is // also counted in this value (to be improved in the future). detail.set_commit_log_nanos( - self.metrics.wf_commit_log_nanos - - self.metrics.wf_batch_wait_nanos - - self.metrics.propose_send_wait_nanos, + self.metrics.wf_commit_log_nanos - self.metrics.wf_batch_wait_nanos, ); detail.set_apply_batch_wait_nanos(self.metrics.apply_wait_nanos); detail.set_apply_log_nanos(self.metrics.apply_time_nanos - self.metrics.apply_wait_nanos); @@ -128,12 +130,12 @@ pub struct RequestMetrics { pub write_instant: Option, pub wf_batch_wait_nanos: u64, pub wf_send_to_queue_nanos: u64, + pub wf_send_proposal_nanos: u64, pub wf_persist_log_nanos: u64, pub wf_before_write_nanos: u64, pub wf_write_end_nanos: u64, pub wf_kvdb_end_nanos: u64, pub wf_commit_log_nanos: u64, - pub propose_send_wait_nanos: u64, pub commit_not_persisted: bool, pub store_mutex_lock_nanos: u64, // should be 0 if using raft-engine pub store_thread_wait_nanos: u64, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 6ef292f95e5..b07aff345a7 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -9877,7 +9877,7 @@ "h": 9, "w": 24, "x": 0, - "y": 9 + "y": 10 }, "hiddenSeries": false, "id": 13132, @@ -9897,7 +9897,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -10017,7 +10017,7 @@ "h": 8, "w": 12, "x": 0, - "y": 18 + "y": 19 }, "hiddenSeries": false, "id": 13257, @@ -10037,7 +10037,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -10157,7 +10157,7 @@ "h": 8, "w": 12, "x": 12, - "y": 18 + "y": 19 }, "hiddenSeries": false, "id": 13259, @@ -10177,7 +10177,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -10297,7 +10297,7 @@ "h": 8, "w": 12, "x": 0, - "y": 26 + "y": 27 }, "hiddenSeries": false, "id": 13261, @@ -10317,12 +10317,13 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { + "$$hashKey": "object:113", "alias": "count", "dashLength": 1, "dashes": true, @@ -10333,6 +10334,7 @@ "zindex": -3 }, { + "$$hashKey": "object:114", "alias": "avg", "fill": 7 } @@ -10395,6 +10397,7 @@ }, "yaxes": [ { + "$$hashKey": "object:139", "format": "s", "label": null, "logBase": 1, @@ -10403,6 +10406,7 @@ "show": true }, { + "$$hashKey": "object:140", "format": "short", "label": null, "logBase": 1, @@ -10437,7 +10441,7 @@ "h": 8, "w": 12, "x": 12, - "y": 26 + "y": 27 }, "hiddenSeries": false, "id": 13263, @@ -10457,12 +10461,13 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { + "$$hashKey": "object:217", "alias": "count", "dashLength": 1, "dashes": true, @@ -10473,6 +10478,7 @@ "zindex": -3 }, { + "$$hashKey": "object:218", "alias": "avg", "fill": 7 } @@ -10535,6 +10541,7 @@ }, "yaxes": [ { + "$$hashKey": "object:243", "format": "s", "label": null, "logBase": 1, @@ -10543,6 +10550,7 @@ "show": true }, { + "$$hashKey": "object:244", "format": "short", "label": null, "logBase": 1, @@ -10577,7 +10585,7 @@ "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 35 }, "hiddenSeries": false, "id": 13265, @@ -10597,12 +10605,13 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { + "$$hashKey": "object:529", "alias": "count", "dashLength": 1, "dashes": true, @@ -10613,6 +10622,7 @@ "zindex": -3 }, { + "$$hashKey": "object:530", "alias": "avg", "fill": 7 } @@ -10675,6 +10685,7 @@ }, "yaxes": [ { + "$$hashKey": "object:555", "format": "s", "label": null, "logBase": 1, @@ -10683,6 +10694,7 @@ "show": true }, { + "$$hashKey": "object:556", "format": "short", "label": null, "logBase": 1, @@ -10706,7 +10718,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The before write time duration of each request", + "description": "The send raft message of the proposal duration of each request", "fieldConfig": { "defaults": {}, "overrides": [] @@ -10717,10 +10729,10 @@ "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 35 }, "hiddenSeries": false, - "id": 13267, + "id": 23763572857, "legend": { "avg": false, "current": false, @@ -10737,12 +10749,13 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { + "$$hashKey": "object:529", "alias": "count", "dashLength": 1, "dashes": true, @@ -10753,6 +10766,7 @@ "zindex": -3 }, { + "$$hashKey": "object:530", "alias": "avg", "fill": 7 } @@ -10763,7 +10777,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", "hide": false, "interval": "", "legendFormat": "999%", @@ -10771,7 +10785,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", "hide": false, "interval": "", "legendFormat": "99%", @@ -10779,7 +10793,7 @@ }, { "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "expr": "sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_count{instance=~\"$instance\"}[30s]))", "hide": false, "interval": "", "legendFormat": "avg", @@ -10787,7 +10801,7 @@ }, { "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "expr": "sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_count{instance=~\"$instance\"}[30s]))", "hide": true, "instant": false, "interval": "", @@ -10799,7 +10813,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Store before write duration", + "title": "Store send proposal duration", "tooltip": { "shared": true, "sort": 0, @@ -10815,6 +10829,7 @@ }, "yaxes": [ { + "$$hashKey": "object:555", "format": "s", "label": null, "logBase": 1, @@ -10823,6 +10838,7 @@ "show": true }, { + "$$hashKey": "object:556", "format": "short", "label": null, "logBase": 1, @@ -10857,7 +10873,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 43 }, "hiddenSeries": false, "id": 13269, @@ -10877,7 +10893,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -10986,7 +11002,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The write end duration of each request", + "description": "The before write time duration of each request", "fieldConfig": { "defaults": {}, "overrides": [] @@ -10997,10 +11013,10 @@ "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 43 }, "hiddenSeries": false, - "id": 13271, + "id": 13267, "legend": { "avg": false, "current": false, @@ -11017,12 +11033,13 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { + "$$hashKey": "object:733", "alias": "count", "dashLength": 1, "dashes": true, @@ -11033,6 +11050,7 @@ "zindex": -3 }, { + "$$hashKey": "object:734", "alias": "avg", "fill": 7 } @@ -11043,7 +11061,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", "hide": false, "interval": "", "legendFormat": "999%", @@ -11051,7 +11069,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", "hide": false, "interval": "", "legendFormat": "99%", @@ -11059,7 +11077,7 @@ }, { "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", "hide": false, "interval": "", "legendFormat": "avg", @@ -11067,7 +11085,7 @@ }, { "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", "hide": true, "instant": false, "interval": "", @@ -11079,7 +11097,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Store write end duration", + "title": "Store before write duration", "tooltip": { "shared": true, "sort": 0, @@ -11095,6 +11113,7 @@ }, "yaxes": [ { + "$$hashKey": "object:759", "format": "s", "label": null, "logBase": 1, @@ -11103,6 +11122,7 @@ "show": true }, { + "$$hashKey": "object:760", "format": "short", "label": null, "logBase": 1, @@ -11137,7 +11157,7 @@ "h": 8, "w": 12, "x": 0, - "y": 50 + "y": 51 }, "hiddenSeries": false, "id": 13273, @@ -11157,7 +11177,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -11266,7 +11286,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The commit and persist duration of each request", + "description": "The write end duration of each request", "fieldConfig": { "defaults": {}, "overrides": [] @@ -11277,10 +11297,10 @@ "h": 8, "w": 12, "x": 12, - "y": 50 + "y": 51 }, "hiddenSeries": false, - "id": 13275, + "id": 13271, "legend": { "avg": false, "current": false, @@ -11297,12 +11317,13 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { + "$$hashKey": "object:841", "alias": "count", "dashLength": 1, "dashes": true, @@ -11313,6 +11334,7 @@ "zindex": -3 }, { + "$$hashKey": "object:842", "alias": "avg", "fill": 7 } @@ -11323,7 +11345,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", "hide": false, "interval": "", "legendFormat": "999%", @@ -11331,7 +11353,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", "hide": false, "interval": "", "legendFormat": "99%", @@ -11339,7 +11361,7 @@ }, { "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", "hide": false, "interval": "", "legendFormat": "avg", @@ -11347,8 +11369,8 @@ }, { "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": false, + "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "hide": true, "instant": false, "interval": "", "legendFormat": "count", @@ -11359,7 +11381,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Store commit and persist duration", + "title": "Store write end duration", "tooltip": { "shared": true, "sort": 0, @@ -11375,6 +11397,7 @@ }, "yaxes": [ { + "$$hashKey": "object:867", "format": "s", "label": null, "logBase": 1, @@ -11383,6 +11406,7 @@ "show": true }, { + "$$hashKey": "object:868", "format": "short", "label": null, "logBase": 1, @@ -11417,7 +11441,7 @@ "h": 8, "w": 12, "x": 0, - "y": 58 + "y": 59 }, "hiddenSeries": false, "id": 13277, @@ -11437,7 +11461,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -11535,6 +11559,146 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": { + "99%": "#eab839", + "999%": "dark-red", + "count": "rgb(33, 250, 2)" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The commit and persist duration of each request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "hiddenSeries": false, + "id": 13275, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "count", + "dashLength": 1, + "dashes": true, + "fill": 2, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "fill": 7 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "999%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "99%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "hide": false, + "interval": "", + "legendFormat": "avg", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "count", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Store commit and persist duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "title": "Raft Waterfall", @@ -13804,186 +13968,6 @@ "yBucketNumber": null, "yBucketSize": null }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 47 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 23763572784, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_proposal_send_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Proposal send wait duration", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 47 - }, - "hiddenSeries": false, - "id": 23763572783, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_proposal_send_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "99% Proposal send wait duration per server", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:106", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:107", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "aliasColors": {}, "bars": false, From 03b44b5e219fb795b20ebb9367b66b2adf3800ac Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 12 Jul 2022 23:19:06 -0700 Subject: [PATCH 0075/1149] add TestTabletFactory and refine TabletFactory trait (#12940) close tikv/tikv#12939 Add TestTabletFactory for testing raftstorev2 with real multi-rocksdb. Before this change there's no way to create a tablet factory in raftstore's test code. And also this PR refines TabletFactory trait so that its implementation code will not hard wire with raftstore v1's code. This will make Tabletfactory's implementation work both with raftstore v1 and raftstore v2. Signed-off-by: qi.xu Co-authored-by: qi.xu --- Cargo.lock | 1 + Cargo.toml | 2 +- .../engine_rocks/src/compact_listener.rs | 18 +- components/engine_test/Cargo.toml | 1 + components/engine_test/src/lib.rs | 221 ++++++++++++++++-- components/raftstore/Cargo.toml | 3 +- .../raftstore/src/compacted_event_sender.rs | 23 ++ components/raftstore/src/lib.rs | 4 + components/raftstore/src/store/snap.rs | 10 +- components/server/Cargo.toml | 2 +- components/server/src/server.rs | 5 +- components/test_raftstore/src/util.rs | 8 +- src/server/engine_factory.rs | 64 ++--- src/server/engine_factory_v2.rs | 20 +- 14 files changed, 297 insertions(+), 85 deletions(-) create mode 100644 components/raftstore/src/compacted_event_sender.rs diff --git a/Cargo.lock b/Cargo.lock index 7e562246adc..15da9f000b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1520,6 +1520,7 @@ dependencies = [ name = "engine_test" version = "0.0.1" dependencies = [ + "collections", "encryption", "engine_panic", "engine_rocks", diff --git a/Cargo.toml b/Cargo.toml index 622547b2294..dd071c9809e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -132,7 +132,7 @@ prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft_log_engine = { path = "components/raft_log_engine", default-features = false } -raftstore = { path = "components/raftstore", default-features = false } +raftstore = { path = "components/raftstore", default-features = false, features = ["engine_rocks"] } rand = "0.7.3" regex = "1.3" resource_metering = { path = "components/resource_metering" } diff --git a/components/engine_rocks/src/compact_listener.rs b/components/engine_rocks/src/compact_listener.rs index 0affe70dd4b..2cfdb253eb0 100644 --- a/components/engine_rocks/src/compact_listener.rs +++ b/components/engine_rocks/src/compact_listener.rs @@ -7,6 +7,7 @@ use std::{ Bound::{Excluded, Included, Unbounded}, }, path::Path, + sync::Arc, }; use collections::hash_set_with_capacity; @@ -205,17 +206,26 @@ impl CompactedEvent for RocksCompactedEvent { pub type Filter = fn(&RocksCompactionJobInfo<'_>) -> bool; +/// The trait for sending RocksCompactedEvent event +/// This is to workaround Box cannot be cloned +pub trait CompactedEventSender { + fn send(&self, event: RocksCompactedEvent); +} + pub struct CompactionListener { - ch: Box, + event_sender: Arc, filter: Option, } impl CompactionListener { pub fn new( - ch: Box, + event_sender: Arc, filter: Option, ) -> CompactionListener { - CompactionListener { ch, filter } + CompactionListener { + event_sender, + filter, + } } } @@ -288,7 +298,7 @@ impl EventListener for CompactionListener { return; } - (self.ch)(RocksCompactedEvent::new( + self.event_sender.send(RocksCompactedEvent::new( info, smallest_key.unwrap(), largest_key.unwrap(), diff --git a/components/engine_test/Cargo.toml b/components/engine_test/Cargo.toml index 61061957563..a9bfbfd41d3 100644 --- a/components/engine_test/Cargo.toml +++ b/components/engine_test/Cargo.toml @@ -24,6 +24,7 @@ test-engines-panic = [ ] [dependencies] +collections = { path = "../collections", default-features = false } encryption = { path = "../encryption", default-features = false } engine_panic = { path = "../engine_panic", default-features = false } engine_rocks = { path = "../engine_rocks", default-features = false } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 4d804a17a9f..e5dddfdcee2 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -74,6 +74,12 @@ pub mod raft { /// Types and constructors for the "kv" engine pub mod kv { + use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + }; + + use collections::HashMap; #[cfg(feature = "test-engine-kv-panic")] pub use engine_panic::{ PanicEngine as KvTestEngine, PanicEngineIterator as KvTestEngineIterator, @@ -84,7 +90,8 @@ pub mod kv { RocksEngine as KvTestEngine, RocksEngineIterator as KvTestEngineIterator, RocksSnapshot as KvTestSnapshot, RocksWriteBatch as KvTestWriteBatch, }; - use engine_traits::Result; + use engine_traits::{Result, TabletAccessor, TabletFactory}; + use tikv_util::box_err; use crate::ctor::{CFOptions, DBOptions, KvEngineConstructorExt}; @@ -92,7 +99,7 @@ pub mod kv { path: &str, db_opt: Option, cfs: &[&str], - opts: Option>>, + opts: Option>, ) -> Result { KvTestEngine::new_kv_engine(path, db_opt, cfs, opts) } @@ -100,10 +107,186 @@ pub mod kv { pub fn new_engine_opt( path: &str, db_opt: DBOptions, - cfs_opts: Vec>, + cfs_opts: Vec, ) -> Result { KvTestEngine::new_kv_engine_opt(path, db_opt, cfs_opts) } + + const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; + + #[derive(Clone)] + pub struct TestTabletFactory { + root_path: String, + db_opt: Option, + cfs: Vec, + opts: Option>, + registry: Arc>>, + } + + impl TestTabletFactory { + pub fn new( + root_path: &str, + db_opt: Option, + cfs: &[&str], + opts: Option>, + ) -> Self { + Self { + root_path: root_path.to_string(), + db_opt, + cfs: cfs.iter().map(|s| s.to_string()).collect(), + opts, + registry: Arc::new(Mutex::new(HashMap::default())), + } + } + } + + impl TabletFactory for TestTabletFactory { + fn create_tablet(&self, id: u64, suffix: u64) -> Result { + let mut reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Err(box_err!( + "region {} {} already exists", + id, + db.as_inner().path() + )); + } + let tablet_path = self.tablet_path(id, suffix); + let tablet_path = tablet_path.to_str().unwrap(); + let mut cfs = vec![]; + self.cfs.iter().for_each(|s| cfs.push(s.as_str())); + let kv_engine = KvTestEngine::new_kv_engine( + tablet_path, + self.db_opt.clone(), + cfs.as_slice(), + self.opts.clone(), + )?; + reg.insert((id, suffix), kv_engine.clone()); + Ok(kv_engine) + } + + fn open_tablet(&self, id: u64, suffix: u64) -> Result { + let mut reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Ok(db.clone()); + } + + let db_path = self.tablet_path(id, suffix); + let db = self.open_tablet_raw(db_path.as_path(), false)?; + reg.insert((id, suffix), db.clone()); + Ok(db) + } + + fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { + let reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Some(db.clone()); + } + None + } + + fn open_tablet_cache_any(&self, id: u64) -> Option { + let reg = self.registry.lock().unwrap(); + if let Some(k) = reg.keys().find(|k| k.0 == id) { + return Some(reg.get(k).unwrap().clone()); + } + None + } + + fn open_tablet_raw(&self, path: &Path, _readonly: bool) -> Result { + if !KvTestEngine::exists(path.to_str().unwrap_or_default()) { + return Err(box_err!( + "path {} does not have db", + path.to_str().unwrap_or_default() + )); + } + let (mut tablet_id, mut tablet_suffix) = (0, 1); + if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { + let mut split = s.split('_'); + tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); + tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); + } + self.create_tablet(tablet_id, tablet_suffix) + } + + #[inline] + fn create_shared_db(&self) -> Result { + self.create_tablet(0, 0) + } + + #[inline] + fn exists_raw(&self, path: &Path) -> bool { + KvTestEngine::exists(path.to_str().unwrap_or_default()) + } + + #[inline] + fn tablets_path(&self) -> PathBuf { + Path::new(&self.root_path).join("tablets") + } + + #[inline] + fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + Path::new(&self.root_path).join(format!("tablets/{}_{}", id, suffix)) + } + + #[inline] + fn mark_tombstone(&self, region_id: u64, suffix: u64) { + let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); + std::fs::File::create(&path).unwrap(); + self.registry.lock().unwrap().remove(&(region_id, suffix)); + } + + #[inline] + fn is_tombstoned(&self, region_id: u64, suffix: u64) -> bool { + self.tablet_path(region_id, suffix) + .join(TOMBSTONE_MARK) + .exists() + } + + #[inline] + fn destroy_tablet(&self, id: u64, suffix: u64) -> engine_traits::Result<()> { + let path = self.tablet_path(id, suffix); + self.registry.lock().unwrap().remove(&(id, suffix)); + let _ = std::fs::remove_dir_all(path); + Ok(()) + } + + #[inline] + fn load_tablet(&self, path: &Path, id: u64, suffix: u64) -> Result { + { + let reg = self.registry.lock().unwrap(); + if let Some(db) = reg.get(&(id, suffix)) { + return Err(box_err!( + "region {} {} already exists", + id, + db.as_inner().path() + )); + } + } + + let db_path = self.tablet_path(id, suffix); + std::fs::rename(path, &db_path)?; + self.open_tablet_raw(db_path.as_path(), false) + } + + fn clone(&self) -> Box + Send> { + Box::new(std::clone::Clone::clone(self)) + } + } + + impl TabletAccessor for TestTabletFactory { + #[inline] + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &KvTestEngine)) { + let reg = self.registry.lock().unwrap(); + for ((id, suffix), tablet) in &*reg { + f(*id, *suffix, tablet) + } + } + + // it have multi tablets. + fn is_single_engine(&self) -> bool { + false + } + } } /// Create a storage engine with a concrete type. This should ultimately be the @@ -145,7 +328,7 @@ pub mod ctor { path: &str, db_opt: Option, cfs: &[&str], - opts: Option>>, + opts: Option>, ) -> Result; /// Create a new engine with specified column families and options @@ -155,7 +338,7 @@ pub mod ctor { fn new_kv_engine_opt( path: &str, db_opt: DBOptions, - cfs_opts: Vec>, + cfs_opts: Vec, ) -> Result; } @@ -183,14 +366,18 @@ pub mod ctor { pub type RaftDBOptions = DBOptions; - pub struct CFOptions<'a> { - pub cf: &'a str, + #[derive(Clone)] + pub struct CFOptions { + pub cf: String, pub options: ColumnFamilyOptions, } - impl<'a> CFOptions<'a> { - pub fn new(cf: &'a str, options: ColumnFamilyOptions) -> CFOptions<'a> { - CFOptions { cf, options } + impl CFOptions { + pub fn new(cf: &str, options: ColumnFamilyOptions) -> CFOptions { + CFOptions { + cf: cf.to_string(), + options, + } } } @@ -297,7 +484,7 @@ pub mod ctor { _path: &str, _db_opt: Option, _cfs: &[&str], - _opts: Option>>, + _opts: Option>, ) -> Result { Ok(PanicEngine) } @@ -305,7 +492,7 @@ pub mod ctor { fn new_kv_engine_opt( _path: &str, _db_opt: DBOptions, - _cfs_opts: Vec>, + _cfs_opts: Vec, ) -> Result { Ok(PanicEngine) } @@ -345,7 +532,7 @@ pub mod ctor { path: &str, db_opt: Option, cfs: &[&str], - opts: Option>>, + opts: Option>, ) -> Result { let rocks_db_opts = match db_opt { Some(db_opt) => Some(get_rocks_db_opts(db_opt)?), @@ -367,7 +554,7 @@ pub mod ctor { let mut rocks_cf_opts = RocksColumnFamilyOptions::new(); set_standard_cf_opts(rocks_cf_opts.as_raw_mut(), &cf_opts.options); set_cf_opts(&mut rocks_cf_opts, &cf_opts.options); - RocksCFOptions::new(cf_opts.cf, rocks_cf_opts) + RocksCFOptions::new(&cf_opts.cf, rocks_cf_opts) }) .collect(); rocks_new_engine(path, rocks_db_opts, &[], Some(rocks_cfs_opts)) @@ -376,7 +563,7 @@ pub mod ctor { fn new_kv_engine_opt( path: &str, db_opt: DBOptions, - cfs_opts: Vec>, + cfs_opts: Vec, ) -> Result { let rocks_db_opts = get_rocks_db_opts(db_opt)?; let rocks_cfs_opts = cfs_opts @@ -385,7 +572,7 @@ pub mod ctor { let mut rocks_cf_opts = RocksColumnFamilyOptions::new(); set_standard_cf_opts(rocks_cf_opts.as_raw_mut(), &cf_opts.options); set_cf_opts(&mut rocks_cf_opts, &cf_opts.options); - RocksCFOptions::new(cf_opts.cf, rocks_cf_opts) + RocksCFOptions::new(&cf_opts.cf, rocks_cf_opts) }) .collect(); rocks_new_engine_opt(path, rocks_db_opts, rocks_cfs_opts) @@ -402,7 +589,7 @@ pub mod ctor { let mut rocks_cf_opts = RocksColumnFamilyOptions::new(); set_standard_cf_opts(rocks_cf_opts.as_raw_mut(), &cf_opts.options); set_cf_opts(&mut rocks_cf_opts, &cf_opts.options); - let default_cfs_opts = vec![RocksCFOptions::new(cf_opts.cf, rocks_cf_opts)]; + let default_cfs_opts = vec![RocksCFOptions::new(&cf_opts.cf, rocks_cf_opts)]; rocks_new_engine(path, rocks_db_opts, &[], Some(default_cfs_opts)) } } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 9d8c39d5746..3b47ca08ec5 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" publish = false [features] -default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "engine_rocks"] failpoints = ["fail/failpoints"] testexport = [] test-engine-kv-rocksdb = [ @@ -38,6 +38,7 @@ crc32fast = "1.2" crossbeam = "0.8" derivative = "2" encryption = { path = "../encryption", default-features = false } +engine_rocks = { path = "../engine_rocks", default-features = false, optional = true } # Should be [dev-dependencies] but we need to control the features # https://github.com/rust-lang/cargo/issues/6915 diff --git a/components/raftstore/src/compacted_event_sender.rs b/components/raftstore/src/compacted_event_sender.rs new file mode 100644 index 00000000000..99ba70a0512 --- /dev/null +++ b/components/raftstore/src/compacted_event_sender.rs @@ -0,0 +1,23 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::sync::Mutex; + +use engine_rocks::{CompactedEventSender, RocksCompactedEvent, RocksEngine}; +use engine_traits::RaftEngine; +use tikv_util::error_unknown; + +use crate::store::{fsm::store::RaftRouter, StoreMsg}; + +// raftstore v1's implementation +pub struct RaftRouterCompactedEventSender { + pub router: Mutex>, +} + +impl CompactedEventSender for RaftRouterCompactedEventSender { + fn send(&self, event: RocksCompactedEvent) { + let router = self.router.lock().unwrap(); + let event = StoreMsg::CompactedEvent(event); + if let Err(e) = router.send_control(event) { + error_unknown!(?e; "send compaction finished event to raftstore failed"); + } + } +} diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index ed70dacb37b..f26022efe64 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -13,11 +13,15 @@ extern crate test; #[macro_use] extern crate derivative; +#[cfg(feature = "engine_rocks")] +pub mod compacted_event_sender; pub mod coprocessor; pub mod errors; pub mod router; pub mod store; +#[cfg(feature = "engine_rocks")] +pub use self::compacted_event_sender::RaftRouterCompactedEventSender; pub use self::{ coprocessor::{RegionInfo, RegionInfoAccessor, SeekRegionCallback}, errors::{DiscardReason, Error, Result}, diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index eaf99506f4b..7bcaeb5529b 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1918,12 +1918,12 @@ pub mod tests { const BYTE_SIZE: usize = 1; type DBBuilder = - fn(p: &Path, db_opt: Option, cf_opts: Option>>) -> Result; + fn(p: &Path, db_opt: Option, cf_opts: Option>) -> Result; pub fn open_test_empty_db( path: &Path, db_opt: Option, - cf_opts: Option>>, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, @@ -1936,7 +1936,7 @@ pub mod tests { pub fn open_test_db( path: &Path, db_opt: Option, - cf_opts: Option>>, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, @@ -1957,7 +1957,7 @@ pub mod tests { pub fn open_test_db_with_100keys( path: &Path, db_opt: Option, - cf_opts: Option>>, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, @@ -1981,7 +1981,7 @@ pub mod tests { path: &TempDir, raft_db_opt: Option, kv_db_opt: Option, - kv_cf_opts: Option>>, + kv_cf_opts: Option>, regions: &[u64], ) -> Result> { let p = path.path(); diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 650f9f6932b..c2617d4896c 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -65,7 +65,7 @@ prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft_log_engine = { path = "../raft_log_engine", default-features = false } -raftstore = { path = "../raftstore", default-features = false } +raftstore = { path = "../raftstore", default-features = false, features = ["engine_rocks"] } rand = "0.8" resolved_ts = { path = "../../components/resolved_ts", default-features = false } resource_metering = { path = "../resource_metering" } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 37d031753ce..4bd95b1de60 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -76,6 +76,7 @@ use raftstore::{ AutoSplitController, CheckLeaderRunner, GlobalReplicationState, LocalReader, SnapManager, SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, }, + RaftRouterCompactedEventSender, }; use security::SecurityManager; use tikv::{ @@ -1627,7 +1628,9 @@ impl TiKvServer { // Create kv engine. let mut builder = KvEngineFactoryBuilder::new(env, &self.config, &self.store_path) - .compaction_filter_router(self.router.clone()) + .compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { + router: Mutex::new(self.router.clone()), + })) .region_info_accessor(self.region_info_accessor.clone()) .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 288e99a3837..12ca8f9a867 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -4,7 +4,7 @@ use std::{ fmt::Write, path::Path, str::FromStr, - sync::{mpsc, Arc}, + sync::{mpsc, Arc, Mutex}, thread, time::Duration, }; @@ -44,7 +44,7 @@ use raft::eraftpb::ConfChangeType; pub use raftstore::store::util::{find_peer, new_learner_peer, new_peer}; use raftstore::{ store::{fsm::RaftRouter, *}, - Result, + RaftRouterCompactedEventSender, Result, }; use rand::RngCore; use server::server::ConfiguredRaftEngine; @@ -658,7 +658,9 @@ pub fn create_test_engine( builder = builder.block_cache(cache); } if let Some(router) = router { - builder = builder.compaction_filter_router(router); + builder = builder.compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { + router: Mutex::new(router), + })); } let factory = builder.build(); let engine = factory.create_shared_db().unwrap(); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index fde3bc5a40f..04e1f72f05a 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -7,17 +7,14 @@ use std::{ use engine_rocks::{ raw::{Cache, Env}, - CompactionListener, FlowListener, RocksCompactedEvent, RocksCompactionJobInfo, RocksEngine, + CompactedEventSender, CompactionListener, FlowListener, RocksCompactionJobInfo, RocksEngine, RocksEventListener, }; use engine_traits::{ - CompactionJobInfo, RaftEngine, Result, TabletAccessor, TabletFactory, CF_DEFAULT, CF_WRITE, + CompactionJobInfo, Result, TabletAccessor, TabletFactory, CF_DEFAULT, CF_WRITE, }; use kvproto::kvrpcpb::ApiVersion; -use raftstore::{ - store::{RaftRouter, StoreMsg}, - RegionInfoAccessor, -}; +use raftstore::RegionInfoAccessor; use tikv_util::worker::Scheduler; use crate::config::{DbConfig, TiKvConfig, DEFAULT_ROCKSDB_SUB_DIR}; @@ -34,12 +31,12 @@ struct FactoryInner { root_db: Mutex>, } -pub struct KvEngineFactoryBuilder { +pub struct KvEngineFactoryBuilder { inner: FactoryInner, - router: Option>, + compact_event_sender: Option>, } -impl KvEngineFactoryBuilder { +impl KvEngineFactoryBuilder { pub fn new(env: Arc, config: &TiKvConfig, store_path: impl Into) -> Self { Self { inner: FactoryInner { @@ -53,7 +50,7 @@ impl KvEngineFactoryBuilder { sst_recovery_sender: None, root_db: Mutex::default(), }, - router: None, + compact_event_sender: None, } } @@ -77,40 +74,31 @@ impl KvEngineFactoryBuilder { self } - pub fn compaction_filter_router(mut self, router: RaftRouter) -> Self { - self.router = Some(router); + pub fn compaction_event_sender( + mut self, + sender: Arc, + ) -> Self { + self.compact_event_sender = Some(sender); self } - pub fn build(self) -> KvEngineFactory { + pub fn build(self) -> KvEngineFactory { KvEngineFactory { inner: Arc::new(self.inner), - router: Mutex::new(self.router), + compact_event_sender: self.compact_event_sender.clone(), } } } -pub struct KvEngineFactory { +#[derive(Clone)] +pub struct KvEngineFactory { inner: Arc, - router: Mutex>>, + compact_event_sender: Option>, } -impl Clone for KvEngineFactory { - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - router: Mutex::new(self.router.lock().unwrap().clone()), - } - } -} - -impl KvEngineFactory { +impl KvEngineFactory { pub fn create_raftstore_compaction_listener(&self) -> Option { - let router = self.router.lock().unwrap(); - let ch = match &*router { - Some(r) => Mutex::new(r.clone()), - None => return None, - }; + self.compact_event_sender.as_ref()?; fn size_change_filter(info: &RocksCompactionJobInfo<'_>) -> bool { // When calculating region size, we only consider write and default // column families. @@ -125,16 +113,8 @@ impl KvEngineFactory { true } - - let compacted_handler = Box::new(move |compacted_event: RocksCompactedEvent| { - let ch = ch.lock().unwrap(); - let event = StoreMsg::CompactedEvent(compacted_event); - if let Err(e) = ch.send_control(event) { - error_unknown!(?e; "send compaction finished event to raftstore failed"); - } - }); Some(CompactionListener::new( - compacted_handler, + self.compact_event_sender.as_ref().unwrap().clone(), Some(size_change_filter), )) } @@ -229,7 +209,7 @@ impl KvEngineFactory { } } -impl TabletFactory for KvEngineFactory { +impl TabletFactory for KvEngineFactory { #[inline] fn create_shared_db(&self) -> Result { let root_path = self.kv_engine_path(); @@ -270,7 +250,7 @@ impl TabletFactory for KvEngineFactory { } } -impl TabletAccessor for KvEngineFactory { +impl TabletAccessor for KvEngineFactory { fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { if let Ok(db) = self.inner.root_db.lock() { let db = db.as_ref().unwrap(); diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 4027823f23c..ccd2f1d7b02 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -7,19 +7,19 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; -use engine_traits::{RaftEngine, Result, TabletAccessor, TabletFactory}; +use engine_traits::{Result, TabletAccessor, TabletFactory}; use crate::server::engine_factory::KvEngineFactory; const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; #[derive(Clone)] -pub struct KvEngineFactoryV2 { - inner: KvEngineFactory, +pub struct KvEngineFactoryV2 { + inner: KvEngineFactory, registry: Arc>>, } -impl TabletFactory for KvEngineFactoryV2 { +impl TabletFactory for KvEngineFactoryV2 { fn create_tablet(&self, id: u64, suffix: u64) -> Result { let mut reg = self.registry.lock().unwrap(); if let Some(db) = reg.get(&(id, suffix)) { @@ -152,7 +152,7 @@ impl TabletFactory for KvEngineFactoryV2 { } } -impl TabletAccessor for KvEngineFactoryV2 { +impl TabletAccessor for KvEngineFactoryV2 { #[inline] fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { let reg = self.registry.lock().unwrap(); @@ -189,8 +189,8 @@ mod tests { }; } - impl KvEngineFactoryV2 { - pub fn new(inner: KvEngineFactory) -> Self { + impl KvEngineFactoryV2 { + pub fn new(inner: KvEngineFactory) -> Self { KvEngineFactoryV2 { inner, registry: Arc::new(Mutex::new(HashMap::default())), @@ -204,7 +204,7 @@ mod tests { let dir = test_util::temp_dir("test_kvengine_factory", false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let builder = KvEngineFactoryBuilder::::new(env, &cfg, dir.path()); + let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); let factory = builder.build(); let shared_db = factory.create_shared_db().unwrap(); let tablet = TabletFactory::create_tablet(&factory, 1, 10); @@ -237,7 +237,7 @@ mod tests { let dir = test_util::temp_dir("test_kvengine_factory_v2", false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let builder = KvEngineFactoryBuilder::::new(env, &cfg, dir.path()); + let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); let inner_factory = builder.build(); let factory = KvEngineFactoryV2::new(inner_factory); let tablet = factory.create_tablet(1, 10); @@ -275,7 +275,7 @@ mod tests { let dir = test_util::temp_dir("test_get_live_tablets", false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let builder = KvEngineFactoryBuilder::::new(env, &cfg, dir.path()); + let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); let inner_factory = builder.build(); let factory = KvEngineFactoryV2::new(inner_factory); factory.create_tablet(1, 10).unwrap(); From b33d3df696c74271d3674b42c9e3446b8d79e8c1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 13 Jul 2022 17:01:06 +0800 Subject: [PATCH 0076/1149] tablet: load_tablet should remove the old tablet in the cache. (#12984) close tikv/tikv#12985 Signed-off-by: SpadeA-Tang --- components/engine_test/src/lib.rs | 25 ++++++++++++++++++------- src/server/engine_factory_v2.rs | 30 +++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index e5dddfdcee2..f7fd904fd1c 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -140,6 +140,17 @@ pub mod kv { } } + // Extract tablet id and tablet suffix from the path. + fn get_id_and_suffix_from_path(path: &Path) -> (u64, u64) { + let (mut tablet_id, mut tablet_suffix) = (0, 1); + if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { + let mut split = s.split('_'); + tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); + tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); + } + (tablet_id, tablet_suffix) + } + impl TabletFactory for TestTabletFactory { fn create_tablet(&self, id: u64, suffix: u64) -> Result { let mut reg = self.registry.lock().unwrap(); @@ -199,12 +210,7 @@ pub mod kv { path.to_str().unwrap_or_default() )); } - let (mut tablet_id, mut tablet_suffix) = (0, 1); - if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { - let mut split = s.split('_'); - tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); - tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); - } + let (tablet_id, tablet_suffix) = get_id_and_suffix_from_path(path); self.create_tablet(tablet_id, tablet_suffix) } @@ -265,7 +271,12 @@ pub mod kv { let db_path = self.tablet_path(id, suffix); std::fs::rename(path, &db_path)?; - self.open_tablet_raw(db_path.as_path(), false) + let new_engine = self.open_tablet_raw(db_path.as_path(), false); + if new_engine.is_ok() { + let (old_id, old_suffix) = get_id_and_suffix_from_path(path); + self.registry.lock().unwrap().remove(&(old_id, old_suffix)); + } + new_engine } fn clone(&self) -> Box + Send> { diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index ccd2f1d7b02..d1cc29bc88f 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -19,6 +19,17 @@ pub struct KvEngineFactoryV2 { registry: Arc>>, } +// Extract tablet id and tablet suffix from the path. +fn get_id_and_suffix_from_path(path: &Path) -> (u64, u64) { + let (mut tablet_id, mut tablet_suffix) = (0, 1); + if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { + let mut split = s.split('_'); + tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); + tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); + } + (tablet_id, tablet_suffix) +} + impl TabletFactory for KvEngineFactoryV2 { fn create_tablet(&self, id: u64, suffix: u64) -> Result { let mut reg = self.registry.lock().unwrap(); @@ -74,12 +85,7 @@ impl TabletFactory for KvEngineFactoryV2 { path.to_str().unwrap_or_default() )); } - let (mut tablet_id, mut tablet_suffix) = (0, 1); - if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { - let mut split = s.split('_'); - tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); - tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); - } + let (tablet_id, tablet_suffix) = get_id_and_suffix_from_path(path); self.create_tablet(tablet_id, tablet_suffix) } @@ -144,7 +150,12 @@ impl TabletFactory for KvEngineFactoryV2 { let db_path = self.tablet_path(id, suffix); std::fs::rename(path, &db_path)?; - self.open_tablet_raw(db_path.as_path(), false) + let new_engine = self.open_tablet_raw(db_path.as_path(), false); + if new_engine.is_ok() { + let (old_id, old_suffix) = get_id_and_suffix_from_path(path); + self.registry.lock().unwrap().remove(&(old_id, old_suffix)); + } + new_engine } fn clone(&self) -> Box + Send> { @@ -261,6 +272,11 @@ mod tests { assert!(!factory.is_tombstoned(1, 10)); assert!(factory.load_tablet(&tablet_path, 1, 10).is_err()); assert!(factory.load_tablet(&tablet_path, 1, 20).is_ok()); + // After we load it as with the new id or suffix, we should be unable to get it with + // the old id and suffix in the cache. + assert!(factory.open_tablet_cache(1, 10).is_none()); + assert!(factory.open_tablet_cache(1, 20).is_some()); + factory.mark_tombstone(1, 20); assert!(factory.is_tombstoned(1, 20)); factory.destroy_tablet(1, 20).unwrap(); From ab3d866ee3b163560ba35d5ba5e1863b9de7f47c Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Wed, 13 Jul 2022 19:01:05 +0800 Subject: [PATCH 0077/1149] log-backup: store log files by date and hour in sub directory (#13006) close tikv/tikv#12902 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- components/backup-stream/src/router.rs | 36 +++++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index dec4baeae89..debb4b417c8 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -530,6 +530,11 @@ struct TempFileKey { is_meta: bool, } +pub enum FormatType { + Date, + Hour, +} + impl TempFileKey { /// Create the key for an event. The key can be used to find which temporary file the event should be stored. fn of(kv: &ApplyEvent, region_id: u64) -> Self { @@ -588,7 +593,7 @@ impl TempFileKey { } } - fn format_date_time(ts: u64) -> impl Display { + fn format_date_time(ts: u64, t: FormatType) -> impl Display { use chrono::prelude::*; let millis = TimeStamp::physical(ts.into()); let dt = Utc.timestamp_millis(millis as _); @@ -600,19 +605,26 @@ impl TempFileKey { .format(&s.unwrap_or_else(|| "%Y%m".to_owned())) .to_string(); }); - return dt.format("%Y%m%d").to_string(); + match t { + FormatType::Date => dt.format("%Y%m%d").to_string(), + FormatType::Hour => dt.format("%H").to_string(), + } } #[cfg(not(feature = "failpoints"))] - return dt.format("%Y%m%d"); + match t { + FormatType::Date => dt.format("%Y%m%d"), + FormatType::Hour => dt.format("%H"), + } } /// path_to_log_file specifies the path of record log. - /// eg. "v1/20220625/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log" + /// eg. "v1/20220625/03/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log" fn path_to_log_file(&self, min_ts: u64, max_ts: u64) -> String { format!( - "v1/{}/t{:08}/{:012}-{}.log", + "v1/{}/{}/t{:08}/{:012}-{}.log", // We may delete a range of files, so using the max_ts for preventing remove some records wrong. - Self::format_date_time(max_ts), + Self::format_date_time(max_ts, FormatType::Date), + Self::format_date_time(max_ts, FormatType::Hour), self.table_id, min_ts, uuid::Uuid::new_v4() @@ -620,11 +632,12 @@ impl TempFileKey { } /// path_to_schema_file specifies the path of schema log. - /// eg. "v1/20220625/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log" + /// eg. "v1/20220625/03/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log" fn path_to_schema_file(min_ts: u64, max_ts: u64) -> String { format!( - "v1/{}/schema-meta/{:012}-{}.log", - Self::format_date_time(max_ts), + "v1/{}/{}/schema-meta/{:012}-{}.log", + Self::format_date_time(max_ts, FormatType::Date), + Self::format_date_time(max_ts, FormatType::Hour), min_ts, uuid::Uuid::new_v4(), ) @@ -1765,9 +1778,12 @@ mod tests { #[test] fn test_format_datetime() { - let s = TempFileKey::format_date_time(431656320867237891); + let s = TempFileKey::format_date_time(431656320867237891, FormatType::Date); let s = s.to_string(); assert_eq!(s, "20220307"); + + let s = TempFileKey::format_date_time(431656320867237891, FormatType::Hour); + assert_eq!(s.to_string(), "07"); } #[test] From 5b8deaaf81c350a3ad44f842b04e9e107fbab3c0 Mon Sep 17 00:00:00 2001 From: haojinming Date: Wed, 13 Jul 2022 19:21:05 +0800 Subject: [PATCH 0078/1149] BR: flush causal timestamp before backup start for rawkv apiv2 (#12991) ref tikv/migration#138, close tikv/tikv#12989 Signed-off-by: haojinming Co-authored-by: Ping Yu Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/backup/Cargo.toml | 1 + components/backup/src/endpoint.rs | 58 +++++++++++++++++++++++++++++-- components/causal_ts/src/lib.rs | 7 ++++ components/cdc/src/endpoint.rs | 39 +++++++++++++++++++-- components/cdc/src/observer.rs | 15 +------- components/cdc/tests/mod.rs | 1 + components/server/src/server.rs | 46 +++++++++++++++--------- components/test_backup/src/lib.rs | 1 + 9 files changed, 133 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 15da9f000b0..f1d08413c9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -395,6 +395,7 @@ version = "0.0.1" dependencies = [ "api_version", "async-channel", + "causal_ts", "collections", "concurrency_manager", "crc64fast", diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index effe13c4e08..85131c8e68f 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -35,6 +35,7 @@ failpoints = ["tikv/failpoints"] [dependencies] api_version = { path = "../api_version", default-features = false } async-channel = "1.4" +causal_ts = { path = "../causal_ts" } collections = { path = "../collections" } concurrency_manager = { path = "../concurrency_manager", default-features = false } crc64fast = "0.1" diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 2a68cbb6bd8..9402879fb5c 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -9,6 +9,7 @@ use std::{ }; use async_channel::SendError; +use causal_ts::CausalTsProvider; use concurrency_manager::ConcurrencyManager; use engine_rocks::raw::DB; use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, SstCompressionType}; @@ -661,6 +662,7 @@ pub struct Endpoint { concurrency_manager: ConcurrencyManager, softlimit: SoftLimitKeeper, api_version: ApiVersion, + causal_ts_provider: Option>, // used in rawkv apiv2 only pub(crate) engine: E, pub(crate) region_info: R, @@ -782,6 +784,7 @@ impl Endpoint { config: BackupConfig, concurrency_manager: ConcurrencyManager, api_version: ApiVersion, + causal_ts_provider: Option>, ) -> Endpoint { let pool = ControlThreadPool::new(); let rt = utils::create_tokio_runtime(config.io_thread_size, "backup-io").unwrap(); @@ -799,6 +802,7 @@ impl Endpoint { config_manager, concurrency_manager, api_version, + causal_ts_provider, } } @@ -962,6 +966,26 @@ impl Endpoint { } return; } + // Flush causal timestamp to make sure that future writes will have larger timestamps. + // And help TiKV-BR acquire a backup-ts with intact data smaller than it. + // (Note that intactness is not fully ensured now, until the safe-ts of RawKV is implemented. + // TiKV-BR need a workaround by rewinding backup-ts to a small "safe interval"). + if request.is_raw_kv { + if let Err(e) = self + .causal_ts_provider + .as_ref() + .map_or(Ok(()), |provider| provider.flush()) + { + error!("backup flush causal timestamp failed"; "err" => ?e); + let mut response = BackupResponse::default(); + let err_msg = format!("fail to flush causal ts, {:?}", e); + response.set_error(crate::Error::Other(box_err!(err_msg)).into()); + if let Err(err) = resp.unbounded_send(response) { + error_unknown!(?err; "backup failed to send response"); + } + return; + } + } let start_key = codec.encode_backup_key(request.start_key.clone()); let end_key = codec.encode_backup_key(request.end_key.clone()); @@ -1198,13 +1222,14 @@ pub mod tests { } pub fn new_endpoint() -> (TempDir, Endpoint) { - new_endpoint_with_limiter(None, ApiVersion::V1, false) + new_endpoint_with_limiter(None, ApiVersion::V1, false, None) } pub fn new_endpoint_with_limiter( limiter: Option>, api_version: ApiVersion, is_raw_kv: bool, + causal_ts_provider: Option>, ) -> (TempDir, Endpoint) { let temp = TempDir::new().unwrap(); let rocks = TestEngineBuilder::new() @@ -1236,6 +1261,7 @@ pub mod tests { }, concurrency_manager, api_version, + causal_ts_provider, ), ) } @@ -1445,7 +1471,7 @@ pub mod tests { fn test_handle_backup_task() { let limiter = Arc::new(IORateLimiter::new_for_test()); let stats = limiter.statistics().unwrap(); - let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), ApiVersion::V1, false); + let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), ApiVersion::V1, false, None); let engine = endpoint.engine.clone(); endpoint @@ -1584,7 +1610,7 @@ pub mod tests { fn test_handle_backup_raw_task_impl(cur_api_ver: ApiVersion, dst_api_ver: ApiVersion) -> bool { let limiter = Arc::new(IORateLimiter::new_for_test()); let stats = limiter.statistics().unwrap(); - let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), cur_api_ver, true); + let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), cur_api_ver, true, None); let engine = endpoint.engine.clone(); let start_key_idx: u64 = 100; @@ -1721,6 +1747,32 @@ pub mod tests { } } + #[test] + fn test_backup_raw_apiv2_causal_ts() { + let limiter = Arc::new(IORateLimiter::new_for_test()); + let ts_provider = Arc::new(causal_ts::tests::TestProvider::default()); + let start_ts = ts_provider.get_ts().unwrap(); + let (tmp, endpoint) = new_endpoint_with_limiter( + Some(limiter), + ApiVersion::V2, + true, + Some(ts_provider.clone()), + ); + + let mut req = BackupRequest::default(); + let (tx, _) = unbounded(); + let tmp1 = make_unique_dir(tmp.path()); + req.set_storage_backend(make_local_backend(&tmp1)); + req.set_start_key(b"r".to_vec()); + req.set_end_key(b"s".to_vec()); + req.set_is_raw_kv(true); + req.set_dst_api_version(ApiVersion::V2); + let (task, _) = Task::new(req, tx).unwrap(); + endpoint.handle_backup_task(task); + let end_ts = ts_provider.get_ts().unwrap(); + assert_eq!(end_ts.into_inner(), start_ts.next().into_inner() + 100); + } + #[test] fn test_scan_error() { let (tmp, endpoint) = new_endpoint(); diff --git a/components/causal_ts/src/lib.rs b/components/causal_ts/src/lib.rs index ea5fe3bdcc3..615f01365cd 100644 --- a/components/causal_ts/src/lib.rs +++ b/components/causal_ts/src/lib.rs @@ -58,6 +58,13 @@ pub mod tests { fn get_ts(&self) -> Result { Ok(self.ts.fetch_add(1, Ordering::Relaxed).into()) } + + // This is used for unit test. Add 100 from current. + // Do not modify this value as several test cases depend on it. + fn flush(&self) -> Result<()> { + self.ts.fetch_add(100, Ordering::Relaxed); + Ok(()) + } } #[derive(Clone, Default)] diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 7a67c2f9d85..fa6dcb97651 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -8,6 +8,7 @@ use std::{ time::Duration, }; +use causal_ts::CausalTsProvider; use collections::{HashMap, HashMapEntry, HashSet}; use concurrency_manager::ConcurrencyManager; use crossbeam::atomic::AtomicCell; @@ -413,6 +414,7 @@ pub struct Endpoint { env: Arc, security_mgr: Arc, region_read_progress: RegionReadProgressRegistry, + causal_ts_provider: Option>, // Metrics and logging. current_ts: TimeStamp, @@ -438,6 +440,7 @@ impl, E: KvEngine> Endpoint { env: Arc, security_mgr: Arc, sink_memory_quota: MemoryQuota, + causal_ts_provider: Option>, ) -> Endpoint { let workers = Builder::new_multi_thread() .thread_name("cdcwkr") @@ -508,6 +511,7 @@ impl, E: KvEngine> Endpoint { // Log the first resolved ts warning. warn_resolved_ts_repeat_count: WARN_RESOLVED_TS_COUNT_THRESHOLD, current_ts: TimeStamp::zero(), + causal_ts_provider, }; ep.register_min_ts_event(); ep @@ -1111,7 +1115,7 @@ impl, E: KvEngine> Endpoint { let tikv_clients = self.tikv_clients.clone(); let hibernate_regions_compatible = self.config.hibernate_regions_compatible; let region_read_progress = self.region_read_progress.clone(); - let observer = self.observer.clone(); + let causal_ts_provider = self.causal_ts_provider.clone(); let fut = async move { let _ = timeout.compat().await; @@ -1141,7 +1145,7 @@ impl, E: KvEngine> Endpoint { // If flush_causal_timestamp fails, cannot schedule MinTS task // as new coming raw data may use timestamp smaller than min_ts - if let Err(e) = observer.flush_causal_timestamp() { + if let Err(e) = causal_ts_provider.map_or(Ok(()), |provider| provider.flush()) { error!("cdc flush causal timestamp failed"; "err" => ?e); return; } @@ -1473,6 +1477,15 @@ mod tests { cfg: &CdcConfig, engine: Option, api_version: ApiVersion, + ) -> TestEndpointSuite { + mock_endpoint_with_ts_provider(cfg, engine, api_version, None) + } + + fn mock_endpoint_with_ts_provider( + cfg: &CdcConfig, + engine: Option, + api_version: ApiVersion, + causal_ts_provider: Option>, ) -> TestEndpointSuite { let (task_sched, task_rx) = dummy_scheduler(); let raft_router = MockRaftStoreRouter::new(); @@ -1495,6 +1508,7 @@ mod tests { Arc::new(Environment::new(1)), Arc::new(SecurityManager::default()), MemoryQuota::new(usize::MAX), + causal_ts_provider, ); TestEndpointSuite { @@ -2226,6 +2240,27 @@ mod tests { ); } + #[test] + fn test_raw_causal_ts_flush() { + let sleep_interval = Duration::from_secs(1); + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(sleep_interval), + ..Default::default() + }; + let ts_provider = Arc::new(causal_ts::tests::TestProvider::default()); + let start_ts = ts_provider.get_ts().unwrap(); + let mut suite = + mock_endpoint_with_ts_provider(&cfg, None, ApiVersion::V2, Some(ts_provider.clone())); + suite.run(Task::RegisterMinTsEvent); + suite + .task_rx + .recv_timeout(Duration::from_millis(1500)) + .unwrap() + .unwrap(); + let end_ts = ts_provider.get_ts().unwrap(); + assert!(end_ts.into_inner() >= start_ts.next().into_inner() + 100); // may trigger more than once. + } + #[test] fn test_feature_gate() { let cfg = CdcConfig { diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index 5779d5f7e06..6c0771cbc64 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock}; -use causal_ts::{CausalTsProvider, Error as CausalTsError, RawTsTracker, Result as CausalTsResult}; +use causal_ts::{Error as CausalTsError, RawTsTracker, Result as CausalTsResult}; use collections::HashMap; use engine_traits::KvEngine; use fail::fail_point; @@ -30,8 +30,6 @@ pub struct CdcObserver { // A shared registry for managing observed regions. // TODO: it may become a bottleneck, find a better way to manage the registry. observe_regions: Arc>>, - - pub causal_ts_provider: Option>, } impl CdcObserver { @@ -43,14 +41,9 @@ impl CdcObserver { CdcObserver { sched, observe_regions: Arc::default(), - causal_ts_provider: None, } } - pub fn set_causal_ts_provider(&mut self, provider: Arc) { - self.causal_ts_provider = Some(provider); - } - pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { // use 0 as the priority of the cmd observer. CDC should have a higher priority than // the `resolved-ts`'s cmd observer @@ -98,12 +91,6 @@ impl CdcObserver { .get(®ion_id) .cloned() } - - pub fn flush_causal_timestamp(&self) -> CausalTsResult<()> { - self.causal_ts_provider - .as_ref() - .map_or(Ok(()), |provider| provider.flush()) - } } impl Coprocessor for CdcObserver {} diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 6443ffea158..25283951450 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -188,6 +188,7 @@ impl TestSuiteBuilder { env, sim.security_mgr.clone(), MemoryQuota::new(usize::MAX), + None, ); let mut updated_cfg = cfg.clone(); updated_cfg.min_ts_interval = ReadableDuration::millis(100); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 4bd95b1de60..c0ed12bf73c 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -31,6 +31,7 @@ use backup_stream::{ metadata::{ConnectionConfig, LazyEtcdClient}, observer::BackupStreamObserver, }; +use causal_ts::{BatchTsoProvider, CausalTsProvider}; use cdc::{CdcConfigManager, MemoryQuota}; use concurrency_manager::ConcurrencyManager; use encryption_export::{data_key_manager_from_config, DataKeyManager}; @@ -139,7 +140,7 @@ const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu #[inline] fn run_impl(config: TiKvConfig) { - let mut tikv = TiKvServer::::init(config); + let mut tikv = TiKvServer::::init::(config); // Must be called after `TiKvServer::init`. let memory_limit = tikv.config.memory_usage_limit.unwrap().0; @@ -230,6 +231,7 @@ struct TiKvServer { background_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, + causal_ts_provider: Option>>, // used for rawkv apiv2 } struct TiKvEngines { @@ -254,7 +256,7 @@ type LocalServer = type LocalRaftKv = RaftKv>; impl TiKvServer { - fn init(mut config: TiKvConfig) -> TiKvServer { + fn init(mut config: TiKvConfig) -> TiKvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -310,6 +312,20 @@ impl TiKvServer { config.quota.enable_auto_tune, )); + let mut causal_ts_provider = None; + if let ApiVersion::V2 = F::TAG { + let tso = block_on(causal_ts::BatchTsoProvider::new_opt( + pd_client.clone(), + config.causal_ts.renew_interval.0, + config.causal_ts.renew_batch_min_size, + )); + if let Err(e) = tso { + fatal!("Causal timestamp provider initialize failed: {:?}", e); + } + causal_ts_provider = Some(Arc::new(tso.unwrap())); + info!("Causal timestamp provider startup."); + } + TiKvServer { config, cfg_controller: Some(cfg_controller), @@ -335,6 +351,7 @@ impl TiKvServer { flow_info_receiver: None, sst_worker: None, quota_limiter, + causal_ts_provider, } } @@ -778,7 +795,7 @@ impl TiKvServer { } // Register cdc. - let mut cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); + let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( @@ -805,21 +822,10 @@ impl TiKvServer { }; // Register causal observer for RawKV API V2 - if let ApiVersion::V2 = F::TAG { - let tso = block_on(causal_ts::BatchTsoProvider::new_opt( - self.pd_client.clone(), - self.config.causal_ts.renew_interval.0, - self.config.causal_ts.renew_batch_min_size, - )); - if let Err(e) = tso { - fatal!("Causal timestamp provider initialize failed: {:?}", e); - } - let causal_ts_provider = Arc::new(tso.unwrap()); - info!("Causal timestamp provider startup."); - cdc_ob.set_causal_ts_provider(causal_ts_provider.clone()); - let causal_ob = causal_ts::CausalObserver::new(causal_ts_provider, cdc_ob.clone()); + if let Some(provider) = self.causal_ts_provider.clone() { + let causal_ob = causal_ts::CausalObserver::new(provider, cdc_ob.clone()); causal_ob.register_to(self.coprocessor_host.as_mut().unwrap()); - } + }; let check_leader_runner = CheckLeaderRunner::new(engines.store_meta.clone()); let check_leader_scheduler = self @@ -1044,6 +1050,9 @@ impl TiKvServer { server.env(), self.security_mgr.clone(), cdc_memory_quota.clone(), + self.causal_ts_provider + .clone() + .map(|provider| provider as Arc), ); cdc_worker.start_with_timer(cdc_endpoint); self.to_stop.push(cdc_worker); @@ -1177,6 +1186,9 @@ impl TiKvServer { self.config.backup.clone(), self.concurrency_manager.clone(), self.config.storage.api_version(), + self.causal_ts_provider + .clone() + .map(|provider| provider as Arc), ); self.cfg_controller.as_mut().unwrap().register( tikv::config::Module::Backup, diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index f8f96b34921..bf14b86dfc8 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -94,6 +94,7 @@ impl TestSuite { }, sim.get_concurrency_manager(*id), api_version, + None, ); let mut worker = bg_worker.lazy_build(format!("backup-{}", id)); worker.start(backup_endpoint); From 23588581613251bdec283764a604197dd00ca377 Mon Sep 17 00:00:00 2001 From: haojinming Date: Thu, 14 Jul 2022 18:25:06 +0800 Subject: [PATCH 0079/1149] Backup: Do not fill cache when backup rawkv (#13022) close tikv/tikv#13020 Signed-off-by: haojinming --- components/backup/src/endpoint.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 9402879fb5c..0734af017d2 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -454,6 +454,7 @@ impl BackupRange { let mut cursor = CursorBuilder::new(snapshot, self.cf) .range(None, self.end_key.clone()) .scan_mode(ScanMode::Forward) + .fill_cache(false) .build()?; if let Some(begin) = self.start_key.clone() { if !cursor.seek(&begin, cfstatistics)? { From 1e98feecc6b4550cf0809c1583fb7a07479ecae9 Mon Sep 17 00:00:00 2001 From: Zwb Date: Thu, 14 Jul 2022 19:55:06 +0800 Subject: [PATCH 0080/1149] Optimize Commit pipeline performance (#12899) close tikv/tikv#12898 Optimize Commit pipeline performance Signed-off-by: Wenbo Zhang Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- Cargo.lock | 6 +- components/backup-stream/src/utils.rs | 1 + components/engine_rocks/src/engine.rs | 11 +- components/engine_rocks/src/misc.rs | 1 + components/engine_rocks/src/raft_engine.rs | 12 +- components/engine_rocks/src/write_batch.rs | 226 +++- components/engine_test/src/lib.rs | 12 +- components/engine_traits_tests/src/lib.rs | 19 + .../engine_traits_tests/src/write_batch.rs | 1133 ++++++++++++++++- components/raftstore/src/store/fsm/apply.rs | 1 + src/config.rs | 2 + src/server/debug.rs | 53 +- src/server/gc_worker/compaction_filter.rs | 6 +- src/server/reset_to_version.rs | 6 +- .../misc/writebatch/bench_writebatch.rs | 37 +- tests/failpoints/cases/test_gc_worker.rs | 4 +- 16 files changed, 1430 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1d08413c9c..d08e8fc3b25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2747,7 +2747,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d8b7ff8aee62aa9a406b64f7093049d62eeb9a1a" +source = "git+https://github.com/tikv/rust-rocksdb.git#c8878e2df0c7c23d553d345d337d9dda332e2d5a" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2766,7 +2766,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#d8b7ff8aee62aa9a406b64f7093049d62eeb9a1a" +source = "git+https://github.com/tikv/rust-rocksdb.git#c8878e2df0c7c23d553d345d337d9dda332e2d5a" dependencies = [ "bzip2-sys", "cc", @@ -4571,7 +4571,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d8b7ff8aee62aa9a406b64f7093049d62eeb9a1a" +source = "git+https://github.com/tikv/rust-rocksdb.git#c8878e2df0c7c23d553d345d337d9dda332e2d5a" dependencies = [ "libc 0.2.125", "librocksdb_sys", diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 725a1c17f51..678b571f3b5 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -647,6 +647,7 @@ mod test { let p = TempDir::new("test_db").unwrap(); let mut opt = DBOptions::default(); opt.create_if_missing(true); + opt.enable_multi_write_batch(true); let db = DB::open(opt.clone(), p.path().as_os_str().to_str().unwrap()).unwrap(); let engine = RocksEngine::from_db(Arc::new(db)); let mut wb = engine.write_batch(); diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 60be2007367..33af3b78036 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -26,16 +26,21 @@ use crate::{ pub struct RocksEngine { db: Arc, shared_block_cache: bool, + support_multi_batch_write: bool, } impl RocksEngine { pub fn from_db(db: Arc) -> Self { RocksEngine { - db, + db: db.clone(), shared_block_cache: false, + support_multi_batch_write: db.get_db_options().is_enable_multi_batch_write(), } } + // Notice: After obtaining RocksEngine through this method, please make sure + // it has been initialized with db, otherwise do not call its member methods, + // as it'll contain garbage members. pub fn from_ref(db: &Arc) -> &Self { unsafe { &*(db as *const Arc as *const RocksEngine) } } @@ -63,6 +68,10 @@ impl RocksEngine { pub fn set_shared_block_cache(&mut self, enable: bool) { self.shared_block_cache = enable; } + + pub fn support_multi_batch_write(&self) -> bool { + self.support_multi_batch_write + } } impl KvEngine for RocksEngine { diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 0ae93fe34df..d7741e98c26 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -567,6 +567,7 @@ mod tests { let mut opts = DBOptions::new(); opts.create_if_missing(true); + opts.enable_multi_batch_write(true); let mut cf_opts = ColumnFamilyOptions::new(); // Prefix extractor(trim the timestamp at tail) for write cf. diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 19ceea3062c..2f67904486f 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -14,7 +14,7 @@ use protobuf::Message; use raft::eraftpb::Entry; use tikv_util::{box_err, box_try}; -use crate::{util, RocksEngine, RocksWriteBatch}; +use crate::{util, RocksEngine, RocksWriteBatchVec}; impl RaftEngineReadOnly for RocksEngine { fn get_raft_state(&self, raft_group_id: u64) -> Result> { @@ -176,7 +176,7 @@ impl RocksEngine { raft_group_id: u64, mut from: u64, to: u64, - raft_wb: &mut RocksWriteBatch, + raft_wb: &mut RocksWriteBatchVec, ) -> Result { if from == 0 { let start_key = keys::raft_log_key(raft_group_id, 0); @@ -207,10 +207,10 @@ impl RocksEngine { // for all KvEngines, but is currently implemented separately for // every engine. impl RaftEngine for RocksEngine { - type LogBatch = RocksWriteBatch; + type LogBatch = RocksWriteBatchVec; fn log_batch(&self, capacity: usize) -> Self::LogBatch { - RocksWriteBatch::with_capacity(self, capacity) + RocksWriteBatchVec::with_unit_capacity(self, capacity) } fn sync(&self) -> Result<()> { @@ -368,7 +368,7 @@ impl RaftEngine for RocksEngine { } } -impl RaftLogBatch for RocksWriteBatch { +impl RaftLogBatch for RocksWriteBatchVec { fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { if let Some(max_size) = entries.iter().map(|e| e.compute_size()).max() { let ser_buf = Vec::with_capacity(max_size as usize); @@ -421,7 +421,7 @@ impl RaftLogBatch for RocksWriteBatch { } } -impl RocksWriteBatch { +impl RocksWriteBatchVec { fn append_impl( &mut self, raft_group_id: u64, diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index e9428b2c291..1aa5c424521 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -7,126 +7,215 @@ use rocksdb::{Writable, WriteBatch as RawWriteBatch, DB}; use crate::{engine::RocksEngine, options::RocksWriteOptions, util::get_cf_handle}; +const WRITE_BATCH_MAX_BATCH: usize = 16; +const WRITE_BATCH_LIMIT: usize = 16; + impl WriteBatchExt for RocksEngine { - type WriteBatch = RocksWriteBatch; + type WriteBatch = RocksWriteBatchVec; const WRITE_BATCH_MAX_KEYS: usize = 256; - fn write_batch(&self) -> RocksWriteBatch { - RocksWriteBatch::new(self.as_inner().clone()) + fn write_batch(&self) -> RocksWriteBatchVec { + RocksWriteBatchVec::new( + Arc::clone(self.as_inner()), + WRITE_BATCH_LIMIT, + 1, + self.support_multi_batch_write(), + ) } - fn write_batch_with_cap(&self, cap: usize) -> RocksWriteBatch { - RocksWriteBatch::with_capacity(self, cap) + fn write_batch_with_cap(&self, cap: usize) -> RocksWriteBatchVec { + RocksWriteBatchVec::with_unit_capacity(self, cap) } } -pub struct RocksWriteBatch { +/// `RocksWriteBatchVec` is for method `MultiBatchWrite` of RocksDB, which splits a large WriteBatch +/// into many smaller ones and then any thread could help to deal with these small WriteBatch when it +/// is calling `MultiBatchCommit` and wait the front writer to finish writing. `MultiBatchWrite` will +/// perform much better than traditional `pipelined_write` when TiKV writes very large data into RocksDB. +/// We will remove this feature when `unordered_write` of RocksDB becomes more stable and becomes compatible +/// with Titan. +pub struct RocksWriteBatchVec { db: Arc, - wb: RawWriteBatch, + wbs: Vec, + save_points: Vec, + index: usize, + batch_size_limit: usize, + support_write_batch_vec: bool, } -impl RocksWriteBatch { - pub fn new(db: Arc) -> RocksWriteBatch { - let wb = RawWriteBatch::new(); - RocksWriteBatch { db, wb } - } - - pub fn with_capacity(engine: &RocksEngine, cap: usize) -> RocksWriteBatch { +impl RocksWriteBatchVec { + pub fn new( + db: Arc, + batch_size_limit: usize, + cap: usize, + support_write_batch_vec: bool, + ) -> RocksWriteBatchVec { let wb = RawWriteBatch::with_capacity(cap); - RocksWriteBatch { - db: engine.as_inner().clone(), - wb, + RocksWriteBatchVec { + db, + wbs: vec![wb], + save_points: vec![], + index: 0, + batch_size_limit, + support_write_batch_vec, } } - pub fn as_inner(&self) -> &RawWriteBatch { - &self.wb + pub fn with_unit_capacity(engine: &RocksEngine, cap: usize) -> RocksWriteBatchVec { + Self::new( + engine.as_inner().clone(), + WRITE_BATCH_LIMIT, + cap, + engine.support_multi_batch_write(), + ) } - pub fn as_raw(&self) -> &RawWriteBatch { - &self.wb + pub fn as_inner(&self) -> &[RawWriteBatch] { + &self.wbs[0..=self.index] } pub fn get_db(&self) -> &DB { self.db.as_ref() } + + /// `check_switch_batch` will split a large WriteBatch into many smaller ones. This is to avoid + /// a large WriteBatch blocking write_thread too long. + #[inline(always)] + fn check_switch_batch(&mut self) { + if self.support_write_batch_vec + && self.batch_size_limit > 0 + && self.wbs[self.index].count() >= self.batch_size_limit + { + self.index += 1; + if self.index >= self.wbs.len() { + self.wbs.push(RawWriteBatch::default()); + } + } + } } -impl engine_traits::WriteBatch for RocksWriteBatch { +impl engine_traits::WriteBatch for RocksWriteBatchVec { fn write_opt(&self, opts: &WriteOptions) -> Result<()> { let opt: RocksWriteOptions = opts.into(); - self.get_db() - .write_opt(&self.wb, &opt.into_raw()) - .map_err(Error::Engine) + if self.index > 0 { + self.get_db() + .multi_batch_write(self.as_inner(), &opt.into_raw()) + .map_err(Error::Engine) + } else { + self.get_db() + .write_opt(&self.wbs[0], &opt.into_raw()) + .map_err(Error::Engine) + } } fn data_size(&self) -> usize { - self.wb.data_size() + let mut size: usize = 0; + for i in 0..=self.index { + size += self.wbs[i].data_size(); + } + size } fn count(&self) -> usize { - self.wb.count() + self.wbs[self.index].count() + self.index * self.batch_size_limit } fn is_empty(&self) -> bool { - self.wb.is_empty() + self.wbs[0].is_empty() } fn should_write_to_engine(&self) -> bool { - self.count() > RocksEngine::WRITE_BATCH_MAX_KEYS + if self.support_write_batch_vec { + self.index >= WRITE_BATCH_MAX_BATCH + } else { + self.wbs[0].count() > RocksEngine::WRITE_BATCH_MAX_KEYS + } } fn clear(&mut self) { - self.wb.clear(); + for i in 0..=self.index { + self.wbs[i].clear(); + } + self.save_points.clear(); + // Avoid making the wbs too big at one time, then the memory will be kept + // after reusing + if self.index > WRITE_BATCH_MAX_BATCH + 1 { + self.wbs.shrink_to(WRITE_BATCH_MAX_BATCH + 1); + } + self.index = 0; } fn set_save_point(&mut self) { - self.wb.set_save_point(); + self.wbs[self.index].set_save_point(); + self.save_points.push(self.index); } fn pop_save_point(&mut self) -> Result<()> { - self.wb.pop_save_point().map_err(Error::Engine) + if let Some(x) = self.save_points.pop() { + return self.wbs[x].pop_save_point().map_err(Error::Engine); + } + Err(Error::Engine("no save point".into())) } fn rollback_to_save_point(&mut self) -> Result<()> { - self.wb.rollback_to_save_point().map_err(Error::Engine) + if let Some(x) = self.save_points.pop() { + for i in x + 1..=self.index { + self.wbs[i].clear(); + } + self.index = x; + return self.wbs[x].rollback_to_save_point().map_err(Error::Engine); + } + Err(Error::Engine("no save point".into())) } fn merge(&mut self, other: Self) -> Result<()> { - self.wb.append(other.wb.data()); + for wb in other.as_inner() { + self.check_switch_batch(); + self.wbs[self.index].append(wb.data()); + } Ok(()) } } -impl Mutable for RocksWriteBatch { +impl Mutable for RocksWriteBatchVec { fn put(&mut self, key: &[u8], value: &[u8]) -> Result<()> { - self.wb.put(key, value).map_err(Error::Engine) + self.check_switch_batch(); + self.wbs[self.index].put(key, value).map_err(Error::Engine) } fn put_cf(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + self.check_switch_batch(); let handle = get_cf_handle(self.db.as_ref(), cf)?; - self.wb.put_cf(handle, key, value).map_err(Error::Engine) + self.wbs[self.index] + .put_cf(handle, key, value) + .map_err(Error::Engine) } fn delete(&mut self, key: &[u8]) -> Result<()> { - self.wb.delete(key).map_err(Error::Engine) + self.check_switch_batch(); + self.wbs[self.index].delete(key).map_err(Error::Engine) } fn delete_cf(&mut self, cf: &str, key: &[u8]) -> Result<()> { + self.check_switch_batch(); let handle = get_cf_handle(self.db.as_ref(), cf)?; - self.wb.delete_cf(handle, key).map_err(Error::Engine) + self.wbs[self.index] + .delete_cf(handle, key) + .map_err(Error::Engine) } fn delete_range(&mut self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { - self.wb + self.check_switch_batch(); + self.wbs[self.index] .delete_range(begin_key, end_key) .map_err(Error::Engine) } fn delete_range_cf(&mut self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + self.check_switch_batch(); let handle = get_cf_handle(self.db.as_ref(), cf)?; - self.wb + self.wbs[self.index] .delete_range_cf(handle, begin_key, end_key) .map_err(Error::Engine) } @@ -144,20 +233,27 @@ mod tests { }; #[test] - fn test_should_write_to_engine() { + fn test_should_write_to_engine_with_pipeline_write_mode() { let path = Builder::new() .prefix("test-should-write-to-engine") .tempdir() .unwrap(); let opt = RawDBOptions::default(); opt.enable_unordered_write(false); - opt.enable_pipelined_write(false); + opt.enable_pipelined_write(true); + opt.enable_multi_batch_write(false); let engine = new_engine_opt( path.path().join("db").to_str().unwrap(), RocksDBOptions::from_raw(opt), vec![], ) .unwrap(); + assert!( + !engine + .as_inner() + .get_db_options() + .is_enable_multi_batch_write() + ); let mut wb = engine.write_batch(); for _i in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { wb.put(b"aaa", b"bbb").unwrap(); @@ -166,16 +262,58 @@ mod tests { wb.put(b"aaa", b"bbb").unwrap(); assert!(wb.should_write_to_engine()); wb.write().unwrap(); + let v = engine.get_value(b"aaa").unwrap(); + assert!(v.is_some()); assert_eq!(v.unwrap(), b"bbb"); - let mut wb = RocksWriteBatch::with_capacity(&engine, 1024); + let mut wb = RocksWriteBatchVec::with_unit_capacity(&engine, 1024); + for _i in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert!(!wb.should_write_to_engine()); + wb.put(b"aaa", b"bbb").unwrap(); + assert!(wb.should_write_to_engine()); + wb.clear(); + assert!(!wb.should_write_to_engine()); + } + + #[test] + fn test_should_write_to_engine_with_multi_batch_write_mode() { + let path = Builder::new() + .prefix("test-should-write-to-engine") + .tempdir() + .unwrap(); + let opt = RawDBOptions::default(); + opt.enable_unordered_write(false); + opt.enable_pipelined_write(false); + opt.enable_multi_batch_write(true); + let engine = new_engine_opt( + path.path().join("db").to_str().unwrap(), + RocksDBOptions::from_raw(opt), + vec![], + ) + .unwrap(); + assert!( + engine + .as_inner() + .get_db_options() + .is_enable_multi_batch_write() + ); + let mut wb = engine.write_batch(); for _i in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { wb.put(b"aaa", b"bbb").unwrap(); } assert!(!wb.should_write_to_engine()); wb.put(b"aaa", b"bbb").unwrap(); assert!(wb.should_write_to_engine()); + let mut wb = RocksWriteBatchVec::with_unit_capacity(&engine, 1024); + for _i in 0..WRITE_BATCH_MAX_BATCH * WRITE_BATCH_LIMIT { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert!(!wb.should_write_to_engine()); + wb.put(b"aaa", b"bbb").unwrap(); + assert!(wb.should_write_to_engine()); wb.clear(); assert!(!wb.should_write_to_engine()); } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index f7fd904fd1c..b670ef34500 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -88,7 +88,7 @@ pub mod kv { #[cfg(feature = "test-engine-kv-rocksdb")] pub use engine_rocks::{ RocksEngine as KvTestEngine, RocksEngineIterator as KvTestEngineIterator, - RocksSnapshot as KvTestSnapshot, RocksWriteBatch as KvTestWriteBatch, + RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; use engine_traits::{Result, TabletAccessor, TabletFactory}; use tikv_util::box_err; @@ -363,6 +363,7 @@ pub mod ctor { pub struct DBOptions { key_manager: Option>, rate_limiter: Option>, + enable_multi_batch_write: bool, } impl DBOptions { @@ -373,6 +374,10 @@ pub mod ctor { pub fn set_rate_limiter(&mut self, rate_limiter: Option>) { self.rate_limiter = rate_limiter; } + + pub fn set_enable_multi_batch_write(&mut self, enable: bool) { + self.enable_multi_batch_write = enable; + } } pub type RaftDBOptions = DBOptions; @@ -644,6 +649,11 @@ pub mod ctor { let mut rocks_db_opts = RawRocksDBOptions::new(); let env = get_env(db_opts.key_manager.clone(), db_opts.rate_limiter)?; rocks_db_opts.set_env(env); + if db_opts.enable_multi_batch_write { + rocks_db_opts.enable_unordered_write(false); + rocks_db_opts.enable_pipelined_write(false); + rocks_db_opts.enable_multi_batch_write(true); + } let rocks_db_opts = RocksDBOptions::from_raw(rocks_db_opts); Ok(rocks_db_opts) } diff --git a/components/engine_traits_tests/src/lib.rs b/components/engine_traits_tests/src/lib.rs index 49fe26b4f4d..0ddb39c61ac 100644 --- a/components/engine_traits_tests/src/lib.rs +++ b/components/engine_traits_tests/src/lib.rs @@ -71,6 +71,25 @@ fn default_engine() -> TempDirEnginePair { } } +/// Create a multi batch write engine with only CF_DEFAULT +fn multi_batch_write_engine() -> TempDirEnginePair { + use engine_test::{ + ctor::{DBOptions as KvTestDBOptions, KvEngineConstructorExt}, + kv::KvTestEngine, + }; + use engine_traits::CF_DEFAULT; + + let dir = tempdir(); + let path = dir.path().to_str().unwrap(); + let mut opt = KvTestDBOptions::default(); + opt.set_enable_multi_batch_write(true); + let engine = KvTestEngine::new_kv_engine(path, Some(opt), &[CF_DEFAULT], None).unwrap(); + TempDirEnginePair { + engine, + tempdir: dir, + } +} + /// Create an engine with the specified column families fn engine_cfs(cfs: &[&str]) -> TempDirEnginePair { use engine_test::{ctor::KvEngineConstructorExt, kv::KvTestEngine}; diff --git a/components/engine_traits_tests/src/write_batch.rs b/components/engine_traits_tests/src/write_batch.rs index 0210dee3806..dc966cf03b6 100644 --- a/components/engine_traits_tests/src/write_batch.rs +++ b/components/engine_traits_tests/src/write_batch.rs @@ -4,13 +4,17 @@ use engine_test::kv::KvTestEngine; use engine_traits::{Mutable, Peekable, SyncMutable, WriteBatch, WriteBatchExt}; use panic_hook::recover_safe; -use super::{assert_engine_error, default_engine}; +use super::{assert_engine_error, default_engine, multi_batch_write_engine}; #[test] fn write_batch_none_no_commit() { let db = default_engine(); let wb = db.engine.write_batch(); drop(wb); + + let db = multi_batch_write_engine(); + let wb = db.engine.write_batch_with_cap(1024); + drop(wb); } #[test] @@ -18,6 +22,10 @@ fn write_batch_none() { let db = default_engine(); let wb = db.engine.write_batch(); wb.write().unwrap(); + + let db = multi_batch_write_engine(); + let wb = db.engine.write_batch_with_cap(1024); + wb.write().unwrap(); } #[test] @@ -31,6 +39,28 @@ fn write_batch_put() { wb.write().unwrap(); assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + + let db = multi_batch_write_engine(); + + let mut wb = db.engine.write_batch_with_cap(1024); + + for i in 0..128_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"aa").unwrap(); + for i in 128..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + + wb.write().unwrap(); + + assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + for i in 0..256_usize { + let x = i.to_be_bytes(); + assert_eq!(db.engine.get_value(&x).unwrap().unwrap(), &x); + } } #[test] @@ -46,6 +76,33 @@ fn write_batch_delete() { wb.write().unwrap(); assert!(db.engine.get_value(b"a").unwrap().is_none()); + + let db = multi_batch_write_engine(); + + for i in 0..127_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + db.engine.put(b"a", b"aa").unwrap(); + for i in 127..255_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + for i in 0..255_usize { + let k = i.to_be_bytes(); + wb.delete(&k).unwrap(); + } + wb.delete(b"a").unwrap(); + + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_none()); + for i in 0..255_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -60,6 +117,25 @@ fn write_batch_write_twice_1() { wb.write().unwrap(); assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + + let db = multi_batch_write_engine(); + + let mut wb = db.engine.write_batch_with_cap(1024); + + for i in 0..123_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"aa").unwrap(); + + wb.write().unwrap(); + wb.write().unwrap(); + + assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + for i in 0..123_usize { + let x = i.to_be_bytes(); + assert_eq!(db.engine.get_value(&x).unwrap().unwrap(), &x); + } } #[test] @@ -78,6 +154,40 @@ fn write_batch_write_twice_2() { wb.write().unwrap(); assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + + let db = multi_batch_write_engine(); + + let mut wb = db.engine.write_batch_with_cap(1024); + + for i in 0..128_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"aa").unwrap(); + + wb.write().unwrap(); + + db.engine.put(b"a", b"b").unwrap(); + assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"b"); + + for i in 0..128_usize { + let k = i.to_be_bytes(); + let v = (2 * i + 1).to_be_bytes(); + db.engine.put(&k, &v).unwrap(); + } + for i in 0..128_usize { + let k = i.to_be_bytes(); + let v = (2 * i + 1).to_be_bytes(); + assert_eq!(db.engine.get_value(&k).unwrap().unwrap(), &v); + } + + wb.write().unwrap(); + + assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + for i in 0..128_usize { + let x = i.to_be_bytes(); + assert_eq!(db.engine.get_value(&x).unwrap().unwrap(), &x); + } } #[test] @@ -95,6 +205,37 @@ fn write_batch_write_twice_3() { assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); assert_eq!(db.engine.get_value(b"b").unwrap().unwrap(), b"bb"); + + let db = multi_batch_write_engine(); + + let mut wb = db.engine.write_batch_with_cap(1024); + + for i in 0..128_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"aa").unwrap(); + + wb.write().unwrap(); + for i in 0..128_usize { + let k = i.to_be_bytes(); + let v = (2 * i + 1).to_be_bytes(); + db.engine.put(&k, &v).unwrap(); + } + db.engine.put(b"a", b"b").unwrap(); + for i in 128..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"b", b"bb").unwrap(); + wb.write().unwrap(); + + assert_eq!(db.engine.get_value(b"a").unwrap().unwrap(), b"aa"); + assert_eq!(db.engine.get_value(b"b").unwrap().unwrap(), b"bb"); + for i in 0..256_usize { + let x = i.to_be_bytes(); + assert_eq!(db.engine.get_value(&x).unwrap().unwrap(), &x); + } } #[test] @@ -117,6 +258,43 @@ fn write_batch_delete_range_basic() { assert!(db.engine.get_value(b"c").unwrap().is_none()); assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + db.engine.put(b"d", b"").unwrap(); + db.engine.put(b"e", b"").unwrap(); + + let mut wb = db.engine.write_batch_with_cap(1024); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(&32_usize.to_be_bytes(), &128_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); + for i in 0..32_usize { + let x = i.to_be_bytes(); + assert!(db.engine.get_value(&x).unwrap().is_some()); + } + for i in 32..128_usize { + let x = i.to_be_bytes(); + assert!(db.engine.get_value(&x).unwrap().is_none()); + } + for i in 128..256_usize { + let x = i.to_be_bytes(); + assert!(db.engine.get_value(&x).unwrap().is_some()); + } } #[test] @@ -141,6 +319,54 @@ fn write_batch_delete_range_inexact() { assert!(db.engine.get_value(b"e").unwrap().is_none()); assert!(db.engine.get_value(b"f").unwrap().is_none()); assert!(db.engine.get_value(b"g").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + db.engine.put(b"d", b"").unwrap(); + db.engine.put(b"e", b"").unwrap(); + db.engine.put(b"g", b"").unwrap(); + + let mut wb = db.engine.write_batch_with_cap(1024); + for i in (0..256_usize).step_by(2) { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + + wb.delete_range(b"b", b"f").unwrap(); + wb.delete_range(&0_usize.to_be_bytes(), &252_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_none()); + assert!(db.engine.get_value(b"f").unwrap().is_none()); + assert!(db.engine.get_value(b"g").unwrap().is_some()); + for i in 0..252_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } + assert!( + db.engine + .get_value(&252_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!( + db.engine + .get_value(&253_usize.to_be_bytes()) + .unwrap() + .is_none() + ); + assert!( + db.engine + .get_value(&254_usize.to_be_bytes()) + .unwrap() + .is_some() + ); } #[test] @@ -161,6 +387,43 @@ fn write_batch_delete_range_after_put() { assert!(db.engine.get_value(b"c").unwrap().is_none()); assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + wb.put(b"b", b"").unwrap(); + wb.put(b"c", b"").unwrap(); + wb.put(b"d", b"").unwrap(); + wb.put(b"e", b"").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &255_usize.to_be_bytes()) + .unwrap(); + wb.delete_range(b"b", b"e").unwrap(); + wb.write().unwrap(); + + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + for i in 1..255_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } + assert!( + db.engine + .get_value(&255_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); } #[test] @@ -180,6 +443,37 @@ fn write_batch_delete_range_none() { assert!(db.engine.get_value(b"c").unwrap().is_none()); assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"e", b"").unwrap(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + for i in 1..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -203,6 +497,43 @@ fn write_batch_delete_range_twice() { assert!(db.engine.get_value(b"c").unwrap().is_none()); assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + db.engine.put(b"d", b"").unwrap(); + db.engine.put(b"e", b"").unwrap(); + + let mut wb = db.engine.write_batch_with_cap(1024); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + for i in 1..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -226,6 +557,43 @@ fn write_batch_delete_range_twice_1() { assert!(db.engine.get_value(b"c").unwrap().is_none()); assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + db.engine.put(b"d", b"").unwrap(); + db.engine.put(b"e", b"").unwrap(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + for i in 1..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -251,6 +619,49 @@ fn write_batch_delete_range_twice_2() { assert!(db.engine.get_value(b"c").unwrap().is_none()); assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + db.engine.put(b"d", b"").unwrap(); + db.engine.put(b"e", b"").unwrap(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + db.engine.put(b"c", b"").unwrap(); + for i in 64..128_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + wb.delete_range(b"b", b"e").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &256_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_none()); + assert!(db.engine.get_value(b"c").unwrap().is_none()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + for i in 1..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -269,6 +680,30 @@ fn write_batch_delete_range_empty_range() { assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"b").unwrap().is_some()); assert!(db.engine.get_value(b"c").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + wb.delete_range(b"b", b"b").unwrap(); + wb.delete_range(&1_usize.to_be_bytes(), &1_usize.to_be_bytes()) + .unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_some()); + assert!(db.engine.get_value(b"c").unwrap().is_some()); + for i in 0..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } } #[test] @@ -292,6 +727,37 @@ fn write_batch_delete_range_backward_range() { assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"b").unwrap().is_some()); assert!(db.engine.get_value(b"c").unwrap().is_some()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + + for i in 0..256_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + wb.delete_range(b"c", b"a").unwrap(); + wb.delete_range(&256_usize.to_be_bytes(), &0_usize.to_be_bytes()) + .unwrap(); + + assert!( + recover_safe(|| { + wb.write().unwrap(); + }) + .is_err() + ); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_some()); + assert!(db.engine.get_value(b"c").unwrap().is_some()); + for i in 0..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } } #[test] @@ -334,6 +800,54 @@ fn write_batch_delete_range_backward_range_partial_commit() { assert!(db.engine.get_value(b"d").unwrap().is_none()); assert!(db.engine.get_value(b"e").unwrap().is_some()); assert!(db.engine.get_value(b"f").unwrap().is_none()); + + let db = multi_batch_write_engine(); + + db.engine.put(b"a", b"").unwrap(); + db.engine.put(b"b", b"").unwrap(); + db.engine.put(b"c", b"").unwrap(); + db.engine.put(b"d", b"").unwrap(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + db.engine.put(&x, &x).unwrap(); + } + + let mut wb = db.engine.write_batch_with_cap(1024); + + // Everything in the write batch before the panic + // due to bad range is going to end up committed. + // + // NB: This behavior seems pretty questionable and + // should probably be re-evaluated before other engines + // try to emulate it. + // + // A more reasonable solution might be to have a bogus + // delete_range request immediately panic. + wb.put(b"e", b"").unwrap(); + wb.delete(b"d").unwrap(); + wb.delete_range(b"c", b"a").unwrap(); + wb.put(b"f", b"").unwrap(); + wb.delete(b"a").unwrap(); + wb.delete_range(&128_usize.to_be_bytes(), &64_usize.to_be_bytes()) + .unwrap(); + wb.put(&256_usize.to_be_bytes(), b"").unwrap(); + for i in 0..64_usize { + wb.delete(&i.to_be_bytes()).unwrap(); + } + + assert!( + recover_safe(|| { + wb.write().unwrap(); + }) + .is_err() + ); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"b").unwrap().is_some()); + assert!(db.engine.get_value(b"c").unwrap().is_some()); + assert!(db.engine.get_value(b"d").unwrap().is_none()); + assert!(db.engine.get_value(b"e").unwrap().is_some()); + assert!(db.engine.get_value(b"f").unwrap().is_none()); } #[test] @@ -346,6 +860,18 @@ fn write_batch_is_empty() { assert!(!wb.is_empty()); wb.write().unwrap(); assert!(!wb.is_empty()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + + assert!(wb.is_empty()); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + assert!(!wb.is_empty()); + wb.write().unwrap(); + assert!(!wb.is_empty()); } #[test] @@ -358,6 +884,17 @@ fn write_batch_count() { assert_eq!(wb.count(), 1); wb.write().unwrap(); assert_eq!(wb.count(), 1); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + assert_eq!(wb.count(), 0); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + assert_eq!(wb.count(), 256); + wb.write().unwrap(); + assert_eq!(wb.count(), 256); } #[test] @@ -374,6 +911,23 @@ fn write_batch_count_2() { assert_eq!(wb.count(), 3); wb.write().unwrap(); assert_eq!(wb.count(), 3); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + + assert_eq!(wb.count(), 0); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + assert_eq!(wb.count(), 257); + wb.delete(b"a").unwrap(); + assert_eq!(wb.count(), 258); + wb.delete_range(b"a", b"b").unwrap(); + assert_eq!(wb.count(), 259); + wb.write().unwrap(); + assert_eq!(wb.count(), 259); } #[test] @@ -388,6 +942,21 @@ fn write_batch_clear() { assert_eq!(wb.count(), 0); wb.write().unwrap(); assert!(db.engine.get_value(b"a").unwrap().is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.clear(); + assert!(wb.is_empty()); + assert_eq!(wb.count(), 0); + wb.write().unwrap(); + for i in 0..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -403,6 +972,40 @@ fn cap_zero() { wb.write().unwrap(); assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"f").unwrap().is_some()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(0); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + wb.put(b"b", b"").unwrap(); + wb.put(b"c", b"").unwrap(); + wb.put(b"d", b"").unwrap(); + wb.put(b"e", b"").unwrap(); + wb.put(b"f", b"").unwrap(); + wb.write().unwrap(); + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!( + db.engine + .get_value(&123_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!( + db.engine + .get_value(&255_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"f").unwrap().is_some()); } /// Write batch capacity seems to just be a suggestions @@ -419,6 +1022,41 @@ fn cap_two() { wb.write().unwrap(); assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"f").unwrap().is_some()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(2); + + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + wb.put(b"b", b"").unwrap(); + wb.put(b"c", b"").unwrap(); + wb.put(b"d", b"").unwrap(); + wb.put(b"e", b"").unwrap(); + wb.put(b"f", b"").unwrap(); + wb.write().unwrap(); + assert!( + db.engine + .get_value(&0_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!( + db.engine + .get_value(&123_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!( + db.engine + .get_value(&255_usize.to_be_bytes()) + .unwrap() + .is_some() + ); + assert!(db.engine.get_value(b"a").unwrap().is_some()); + assert!(db.engine.get_value(b"f").unwrap().is_some()); } // We should write when count is greater than WRITE_BATCH_MAX_KEYS @@ -441,6 +1079,24 @@ fn should_write_to_engine() { break; } } + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = KvTestEngine::WRITE_BATCH_MAX_KEYS; + + let mut key = vec![]; + loop { + key.push(b'a'); + wb.put(&key, b"").unwrap(); + if key.len() <= max_keys { + assert!(!wb.should_write_to_engine()); + } + if key.len() == max_keys + 1 { + assert!(wb.should_write_to_engine()); + wb.write().unwrap(); + break; + } + } } // But there kind of aren't consequences for making huge write batches @@ -475,6 +1131,37 @@ fn should_write_to_engine_but_whatever() { break; } } + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = KvTestEngine::WRITE_BATCH_MAX_KEYS; + + let mut key = vec![]; + + loop { + key.push(b'a'); + wb.put(&key, b"").unwrap(); + if key.len() <= max_keys { + assert!(!wb.should_write_to_engine()); + } + if key.len() > max_keys { + assert!(wb.should_write_to_engine()); + } + if key.len() == max_keys * 2 { + assert!(wb.should_write_to_engine()); + wb.write().unwrap(); + break; + } + } + + let mut key = vec![]; + loop { + key.push(b'a'); + assert!(db.engine.get_value(&key).unwrap().is_some()); + if key.len() == max_keys * 2 { + break; + } + } } #[test] @@ -504,6 +1191,43 @@ fn data_size() { wb.clear(); let size8 = wb.data_size(); assert_eq!(size8, size1); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + let size1 = wb.data_size(); + for i in 0..max_keys { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + let size2 = wb.data_size(); + assert!(size1 < size2); + wb.write().unwrap(); + let size3 = wb.data_size(); + assert_eq!(size2, size3); + wb.clear(); + let size4 = wb.data_size(); + assert_eq!(size4, size1); + for i in 0..max_keys { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + let size5 = wb.data_size(); + assert!(size4 < size5); + for i in 0..max_keys { + let x = i.to_be_bytes(); + wb.delete(&x).unwrap(); + } + let size6 = wb.data_size(); + assert!(size5 < size6); + wb.delete_range(&0_usize.to_be_bytes(), &(max_keys * 2).to_be_bytes()) + .unwrap(); + let size7 = wb.data_size(); + assert!(size6 < size7); + wb.clear(); + let size8 = wb.data_size(); + assert_eq!(size8, size1); } #[test] @@ -513,6 +1237,12 @@ fn save_point_rollback_none() { let err = wb.rollback_to_save_point(); assert_engine_error(err); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + + let err = wb.rollback_to_save_point(); + assert_engine_error(err); } #[test] @@ -522,14 +1252,40 @@ fn save_point_pop_none() { let err = wb.rollback_to_save_point(); assert_engine_error(err); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + + let err = wb.rollback_to_save_point(); + assert_engine_error(err); } -#[test] -fn save_point_rollback_one() { - let db = default_engine(); - let mut wb = db.engine.write_batch(); +#[test] +fn save_point_rollback_one() { + let db = default_engine(); + let mut wb = db.engine.write_batch(); + + wb.set_save_point(); + wb.put(b"a", b"").unwrap(); + + wb.rollback_to_save_point().unwrap(); + + let err = wb.rollback_to_save_point(); + assert_engine_error(err); + let err = wb.pop_save_point(); + assert_engine_error(err); + wb.write().unwrap(); + let val = db.engine.get_value(b"a").unwrap(); + assert!(val.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); wb.set_save_point(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } wb.put(b"a", b"").unwrap(); wb.rollback_to_save_point().unwrap(); @@ -539,6 +1295,9 @@ fn save_point_rollback_one() { let err = wb.pop_save_point(); assert_engine_error(err); wb.write().unwrap(); + for i in 0..256_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } let val = db.engine.get_value(b"a").unwrap(); assert!(val.is_none()); } @@ -565,6 +1324,39 @@ fn save_point_rollback_two() { assert!(a.is_none()); let b = db.engine.get_value(b"b").unwrap(); assert!(b.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + wb.set_save_point(); + for i in 0..max_keys { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + wb.set_save_point(); + for i in max_keys..2 * max_keys { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"b", b"").unwrap(); + + wb.rollback_to_save_point().unwrap(); + wb.rollback_to_save_point().unwrap(); + + let err = wb.rollback_to_save_point(); + assert_engine_error(err); + let err = wb.pop_save_point(); + assert_engine_error(err); + wb.write().unwrap(); + let a = db.engine.get_value(b"a").unwrap(); + assert!(a.is_none()); + let b = db.engine.get_value(b"b").unwrap(); + assert!(b.is_none()); + for i in 0..2 * max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -582,6 +1374,35 @@ fn save_point_rollback_partial() { assert!(a.is_some()); let b = db.engine.get_value(b"b").unwrap(); assert!(b.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + for i in 0..max_keys { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + wb.set_save_point(); + wb.put(b"b", b"").unwrap(); + for i in max_keys..2 * max_keys { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + + wb.rollback_to_save_point().unwrap(); + wb.write().unwrap(); + let a = db.engine.get_value(b"a").unwrap(); + assert!(a.is_some()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } + let b = db.engine.get_value(b"b").unwrap(); + assert!(b.is_none()); + for i in max_keys..2 * max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -606,6 +1427,38 @@ fn save_point_pop_rollback() { assert!(val.is_none()); let val = db.engine.get_value(b"b").unwrap(); assert!(val.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + + wb.set_save_point(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + wb.set_save_point(); + for i in 0..256_usize { + let x = i.to_be_bytes(); + wb.put(&x, &x).unwrap(); + } + wb.put(b"a", b"").unwrap(); + + wb.pop_save_point().unwrap(); + wb.rollback_to_save_point().unwrap(); + + let err = wb.rollback_to_save_point(); + assert_engine_error(err); + let err = wb.pop_save_point(); + assert_engine_error(err); + wb.write().unwrap(); + let val = db.engine.get_value(b"a").unwrap(); + assert!(val.is_none()); + let val = db.engine.get_value(b"b").unwrap(); + assert!(val.is_none()); + for i in 0..512_usize { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -631,6 +1484,41 @@ fn save_point_rollback_after_write() { let val = db.engine.get_value(b"a").unwrap(); assert!(val.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + wb.set_save_point(); + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + wb.put(b"a", b"").unwrap(); + + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } + + db.engine.delete(b"a").unwrap(); + for i in 0..max_keys { + db.engine.delete(&i.to_be_bytes()).unwrap(); + } + + assert!(db.engine.get_value(b"a").unwrap().is_none()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } + + wb.rollback_to_save_point().unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_none()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -655,6 +1543,38 @@ fn save_point_same_rollback_one() { assert!(a.is_some()); assert!(b.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + wb.put(b"a", b"").unwrap(); + + wb.set_save_point(); + wb.set_save_point(); + wb.set_save_point(); + + wb.put(b"b", b"").unwrap(); + for i in max_keys..2 * max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + wb.rollback_to_save_point().unwrap(); + + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } + + assert!(db.engine.get_value(b"b").unwrap().is_none()); + for i in max_keys..2 * max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -684,6 +1604,43 @@ fn save_point_same_rollback_all() { assert!(a.is_some()); assert!(b.is_none()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + wb.put(b"a", b"").unwrap(); + + wb.set_save_point(); + wb.set_save_point(); + wb.set_save_point(); + + wb.put(b"b", b"").unwrap(); + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + wb.rollback_to_save_point().unwrap(); + wb.rollback_to_save_point().unwrap(); + wb.rollback_to_save_point().unwrap(); + + assert_engine_error(wb.pop_save_point()); + assert_engine_error(wb.rollback_to_save_point()); + + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } + + assert!(db.engine.get_value(b"b").unwrap().is_none()); + for i in max_keys..2 * max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } } #[test] @@ -709,6 +1666,41 @@ fn save_point_pop_after_write() { let val = db.engine.get_value(b"a").unwrap(); assert!(val.is_some()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + wb.set_save_point(); + wb.put(b"a", b"").unwrap(); + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } + + db.engine.delete(b"a").unwrap(); + for i in 0..max_keys { + db.engine.delete(&i.to_be_bytes()).unwrap(); + } + + assert!(db.engine.get_value(b"a").unwrap().is_none()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_none()); + } + + wb.pop_save_point().unwrap(); + wb.write().unwrap(); + + assert!(db.engine.get_value(b"a").unwrap().is_some()); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } } #[test] @@ -733,6 +1725,42 @@ fn save_point_all_commands() { assert!(a.is_some()); assert!(b.is_none()); assert!(d.is_some()); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + for i in 0..max_keys / 2 { + db.engine.put(&i.to_be_bytes(), b"").unwrap(); + } + db.engine.put(b"a", b"").unwrap(); + for i in max_keys / 2..max_keys { + db.engine.put(&i.to_be_bytes(), b"").unwrap(); + } + db.engine.put(b"d", b"").unwrap(); + + wb.set_save_point(); + for i in 0..max_keys / 2 { + wb.delete(&i.to_be_bytes()).unwrap(); + } + wb.delete(b"a").unwrap(); + wb.put(b"b", b"").unwrap(); + wb.delete_range(b"c", b"e").unwrap(); + wb.delete_range(&(max_keys / 3).to_be_bytes(), &(2 * max_keys).to_be_bytes()) + .unwrap(); + + wb.rollback_to_save_point().unwrap(); + wb.write().unwrap(); + + let a = db.engine.get_value(b"a").unwrap(); + let b = db.engine.get_value(b"b").unwrap(); + let d = db.engine.get_value(b"d").unwrap(); + for i in 0..max_keys { + assert!(db.engine.get_value(&i.to_be_bytes()).unwrap().is_some()); + } + assert!(a.is_some()); + assert!(b.is_none()); + assert!(d.is_some()); } // What happens to the count() and is_empty() methods @@ -824,4 +1852,99 @@ fn save_points_and_counts() { assert_eq!(wb.is_empty(), true); assert_eq!(wb.count(), 0); + + let db = multi_batch_write_engine(); + let mut wb = db.engine.write_batch_with_cap(1024); + let max_keys = 256_usize; + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + wb.set_save_point(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.rollback_to_save_point().unwrap(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + wb.set_save_point(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.pop_save_point().unwrap(); + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.clear(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + wb.set_save_point(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.write().unwrap(); + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.rollback_to_save_point().unwrap(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + wb.set_save_point(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); + + for i in 0..max_keys { + wb.put(&i.to_be_bytes(), b"").unwrap(); + } + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.write().unwrap(); + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.pop_save_point().unwrap(); + + assert_eq!(wb.is_empty(), false); + assert_eq!(wb.count(), max_keys); + + wb.clear(); + + assert_eq!(wb.is_empty(), true); + assert_eq!(wb.count(), 0); } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 9c2e548f10e..aa57676925c 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -435,6 +435,7 @@ where priority: Priority, ) -> ApplyContext { let kv_wb = engine.write_batch_with_cap(DEFAULT_APPLY_WB_SIZE); + ApplyContext { tag, timer: None, diff --git a/src/config.rs b/src/config.rs index 239c80a62ab..580e91712de 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1171,6 +1171,8 @@ impl DbConfig { self.use_direct_io_for_flush_and_compaction, ); opts.enable_pipelined_write(self.enable_pipelined_write); + let enable_multi_batch_write = !self.enable_pipelined_write && !self.enable_unordered_write; + opts.enable_multi_batch_write(enable_multi_batch_write); opts.enable_unordered_write(self.enable_unordered_write); opts.set_info_log(RocksdbLogger::default()); opts.set_info_log_level(self.info_log_level.into()); diff --git a/src/server/debug.rs b/src/server/debug.rs index e5d6eba617f..d10f58cc2ad 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -13,7 +13,7 @@ use collections::HashSet; use engine_rocks::{ raw::{CompactOptions, DBBottommostLevelCompaction, DB}, util::get_cf_handle, - Compat, RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksWriteBatch, + Compat, RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksWriteBatchVec, }; use engine_traits::{ Engines, IterOptions, Iterable, Iterator as EngineIterator, Mutable, MvccProperties, Peekable, @@ -577,11 +577,11 @@ impl Debugger { let msg = format!("Store {} in the failed list", store_id); return Err(Error::Other(msg.into())); } - let mut wb = RocksWriteBatch::new(self.engines.kv.as_inner().clone()); + let mut wb = self.engines.kv.write_batch(); let store_ids = HashSet::::from_iter(store_ids); { - let remove_stores = |key: &[u8], value: &[u8], kv_wb: &mut RocksWriteBatch| { + let remove_stores = |key: &[u8], value: &[u8], kv_wb: &mut RocksWriteBatchVec| { let (_, suffix_type) = box_try!(keys::decode_region_meta_key(key)); if suffix_type != keys::REGION_STATE_SUFFIX { return Ok(()); @@ -1010,7 +1010,7 @@ fn recover_mvcc_for_range( let wb_limit: usize = 10240; loop { - let mut wb = RocksWriteBatch::new(db.clone()); + let mut wb = db.c().write_batch(); mvcc_checker.check_mvcc(&mut wb, Some(wb_limit))?; let batch_size = wb.count(); @@ -1102,7 +1102,7 @@ impl MvccChecker { } } - pub fn check_mvcc(&mut self, wb: &mut RocksWriteBatch, limit: Option) -> Result<()> { + pub fn check_mvcc(&mut self, wb: &mut RocksWriteBatchVec, limit: Option) -> Result<()> { loop { // Find min key in the 3 CFs. let mut key = MvccChecker::min_key(None, &self.default_iter, |k| { @@ -1124,7 +1124,7 @@ impl MvccChecker { } } - fn check_mvcc_key(&mut self, wb: &mut RocksWriteBatch, key: &[u8]) -> Result<()> { + fn check_mvcc_key(&mut self, wb: &mut RocksWriteBatchVec, key: &[u8]) -> Result<()> { self.scan_count += 1; if self.scan_count % 1_000_000 == 0 { info!( @@ -1292,7 +1292,7 @@ impl MvccChecker { fn delete( &mut self, - wb: &mut RocksWriteBatch, + wb: &mut RocksWriteBatchVec, cf: &str, key: &[u8], ts: Option, @@ -1333,7 +1333,7 @@ fn set_region_tombstone( db: &Arc, store_id: u64, region: Region, - wb: &mut RocksWriteBatch, + wb: &mut RocksWriteBatchVec, ) -> Result<()> { let id = region.get_id(); let key = keys::region_state_key(id); @@ -1924,7 +1924,7 @@ mod tests { let cf2 = CF_RAFT; { - let mock_region_state = |wb: &mut RocksWriteBatch, region_id: u64, peers: &[u64]| { + let mock_region_state = |wb: &mut RocksWriteBatchVec, region_id: u64, peers: &[u64]| { let region_state_key = keys::region_state_key(region_id); let mut region_state = RegionLocalState::default(); region_state.set_state(PeerState::Normal); @@ -1945,20 +1945,23 @@ mod tests { wb.put_msg_cf(cf2, ®ion_state_key, ®ion_state) .unwrap(); }; - let mock_raft_state = - |wb: &mut RocksWriteBatch, region_id: u64, last_index: u64, commit_index: u64| { - let raft_state_key = keys::raft_state_key(region_id); - let mut raft_state = RaftLocalState::default(); - raft_state.set_last_index(last_index); - raft_state.mut_hard_state().set_commit(commit_index); - wb.put_msg_cf(cf1, &raft_state_key, &raft_state).unwrap(); - }; - let mock_apply_state = |wb: &mut RocksWriteBatch, region_id: u64, apply_index: u64| { - let raft_apply_key = keys::apply_state_key(region_id); - let mut apply_state = RaftApplyState::default(); - apply_state.set_applied_index(apply_index); - wb.put_msg_cf(cf2, &raft_apply_key, &apply_state).unwrap(); + let mock_raft_state = |wb: &mut RocksWriteBatchVec, + region_id: u64, + last_index: u64, + commit_index: u64| { + let raft_state_key = keys::raft_state_key(region_id); + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(last_index); + raft_state.mut_hard_state().set_commit(commit_index); + wb.put_msg_cf(cf1, &raft_state_key, &raft_state).unwrap(); }; + let mock_apply_state = + |wb: &mut RocksWriteBatchVec, region_id: u64, apply_index: u64| { + let raft_apply_key = keys::apply_state_key(region_id); + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(apply_index); + wb.put_msg_cf(cf2, &raft_apply_key, &apply_state).unwrap(); + }; for ®ion_id in &[10, 11, 12] { mock_region_state(&mut wb2, region_id, &[store_id]); @@ -2176,9 +2179,11 @@ mod tests { .iter() .map(|cf| CFOptions::new(cf, ColumnFamilyOptions::new())) .collect(); - let db = Arc::new(new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap()); + let db_opt = DBOptions::new(); + db_opt.enable_multi_batch_write(true); + let db = Arc::new(new_engine_opt(path_str, db_opt, cfs_opts).unwrap()); // Write initial KVs. - let mut wb = db.c().write_batch(); + let mut wb = RocksEngine::from_db(db.clone()).write_batch(); for &(cf, ref k, ref v, _) in &kv { wb.put_cf(cf, &keys::data_key(k.as_encoded()), v).unwrap(); } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 5dda55751e7..7d233430f70 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -18,7 +18,7 @@ use engine_rocks::{ CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, DBCompactionFilter, }, - RocksEngine, RocksMvccProperties, RocksWriteBatch, + RocksEngine, RocksMvccProperties, RocksWriteBatchVec, }; use engine_traits::{ KvEngine, MiscExt, Mutable, MvccProperties, WriteBatch, WriteBatchExt, WriteOptions, @@ -267,7 +267,7 @@ struct WriteCompactionFilter { is_bottommost_level: bool, encountered_errors: bool, - write_batch: RocksWriteBatch, + write_batch: RocksWriteBatchVec, gc_scheduler: Scheduler>, // A key batch which is going to be sent to the GC worker. mvcc_deletions: Vec, @@ -461,7 +461,7 @@ impl WriteCompactionFilter { } fn do_flush( - wb: &RocksWriteBatch, + wb: &RocksWriteBatchVec, wopts: &WriteOptions, ) -> Result<(), engine_traits::Error> { let _io_type_guard = WithIOType::new(IOType::Gc); diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 263a8d2565a..1a7443f6d08 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -6,7 +6,7 @@ use std::{ thread::JoinHandle, }; -use engine_rocks::{RocksEngine, RocksEngineIterator, RocksWriteBatch}; +use engine_rocks::{RocksEngine, RocksEngineIterator, RocksWriteBatchVec}; use engine_traits::{ IterOptions, Iterable, Iterator, Mutable, SeekKey, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_LOCK, CF_WRITE, @@ -121,7 +121,7 @@ impl ResetToVersionWorker { pub fn process_next_batch( &mut self, batch_size: usize, - wb: &mut RocksWriteBatch, + wb: &mut RocksWriteBatchVec, ) -> Result { let Batch { writes, has_more } = self.scan_next_batch(batch_size)?; for (key, write) in writes { @@ -140,7 +140,7 @@ impl ResetToVersionWorker { pub fn process_next_batch_lock( &mut self, batch_size: usize, - wb: &mut RocksWriteBatch, + wb: &mut RocksWriteBatchVec, ) -> Result { let mut has_more = true; for _ in 0..batch_size { diff --git a/tests/benches/misc/writebatch/bench_writebatch.rs b/tests/benches/misc/writebatch/bench_writebatch.rs index 3c96d79ee82..0c6e81a35ca 100644 --- a/tests/benches/misc/writebatch/bench_writebatch.rs +++ b/tests/benches/misc/writebatch/bench_writebatch.rs @@ -2,15 +2,19 @@ use std::sync::Arc; -use engine_rocks::{raw::DB, Compat, RocksWriteBatch}; +use engine_rocks::{ + raw::{DBOptions, DB}, + RocksEngine, RocksWriteBatchVec, +}; use engine_traits::{Mutable, WriteBatch, WriteBatchExt}; use tempfile::Builder; use test::Bencher; fn writebatch(db: &Arc, round: usize, batch_keys: usize) { let v = b"operators are syntactic sugar for calls to methods of built-in traits"; + let engine = RocksEngine::from_db(db.clone()); for r in 0..round { - let mut batch = db.c().write_batch(); + let mut batch = engine.write_batch(); for i in 0..batch_keys { let k = format!("key_round{}_key{}", r, i); batch.put(k.as_bytes(), v).unwrap(); @@ -24,7 +28,12 @@ fn bench_writebatch_impl(b: &mut Bencher, batch_keys: usize) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let db = Arc::new(DB::open_default(path.path().to_str().unwrap()).unwrap()); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + opts.enable_unordered_write(false); + opts.enable_pipelined_write(false); + opts.enable_multi_batch_write(true); + let db = Arc::new(DB::open(opts, path.path().to_str().unwrap()).unwrap()); let key_count = 1 << 13; let round = key_count / batch_keys; b.iter(|| { @@ -87,7 +96,7 @@ fn bench_writebatch_1024(b: &mut Bencher) { bench_writebatch_impl(b, 1024); } -fn fill_writebatch(wb: &mut RocksWriteBatch, target_size: usize) { +fn fill_writebatch(wb: &mut RocksWriteBatchVec, target_size: usize) { let (k, v) = (b"this is the key", b"this is the value"); loop { wb.put(k, v).unwrap(); @@ -103,9 +112,15 @@ fn bench_writebatch_without_capacity(b: &mut Bencher) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let db = Arc::new(DB::open_default(path.path().to_str().unwrap()).unwrap()); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + opts.enable_unordered_write(false); + opts.enable_pipelined_write(false); + opts.enable_multi_batch_write(true); + let db = Arc::new(DB::open(opts, path.path().to_str().unwrap()).unwrap()); + let engine = RocksEngine::from_db(db); b.iter(|| { - let mut wb = db.c().write_batch(); + let mut wb = engine.write_batch(); fill_writebatch(&mut wb, 4096); }); } @@ -116,9 +131,15 @@ fn bench_writebatch_with_capacity(b: &mut Bencher) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let db = Arc::new(DB::open_default(path.path().to_str().unwrap()).unwrap()); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + opts.enable_unordered_write(false); + opts.enable_pipelined_write(false); + opts.enable_multi_batch_write(true); + let db = Arc::new(DB::open(opts, path.path().to_str().unwrap()).unwrap()); + let engine = RocksEngine::from_db(db); b.iter(|| { - let mut wb = db.c().write_batch_with_cap(4096); + let mut wb = engine.write_batch_with_cap(4096); fill_writebatch(&mut wb, 4096); }); } diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index 9ceaa16e3c7..09308646421 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -7,7 +7,7 @@ use std::{ }; use collections::HashMap; -use engine_traits::Peekable; +use engine_traits::{Peekable, WriteBatch}; use grpcio::{ChannelBuilder, Environment}; use keys::data_key; use kvproto::{kvrpcpb::*, metapb::Region, tikvpb::TikvClient}; @@ -321,7 +321,7 @@ fn test_error_in_compaction_filter() { gc_runner.gc(&raw_engine); match gc_runner.gc_receiver.recv().unwrap() { - GcTask::OrphanVersions { wb, .. } => assert_eq!(wb.as_inner().count(), 2), + GcTask::OrphanVersions { wb, .. } => assert_eq!(wb.count(), 2), GcTask::GcKeys { .. } => {} _ => unreachable!(), } From 57c4a43cb81f1196a48325913d76fd1617cada4d Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Fri, 15 Jul 2022 09:11:05 +0800 Subject: [PATCH 0081/1149] log-backup: store log files by date/hour/store_id (#13018) ref tikv/tikv#12902 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 4 +- components/backup-stream/src/router.rs | 30 ++++++----- .../backup-stream/src/subscription_manager.rs | 51 ++++++++++--------- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 490e0b48e8d..c779afebe45 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -324,7 +324,7 @@ where tokio::time::sleep(Duration::from_secs(2)).await; break; } - _ => panic!("BUG: invalid event {:?}", event), + _ => warn!("BUG: invalid event"; "event" => ?event), } } else { tokio::time::sleep(Duration::from_secs(1)).await; @@ -372,7 +372,7 @@ where tokio::time::sleep(Duration::from_secs(2)).await; break; } - _ => panic!("BUG: invalid event {:?}", event), + _ => warn!("BUG: invalid event"; "event" => ?event), } } else { tokio::time::sleep(Duration::from_secs(1)).await; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index debb4b417c8..8311c08c7de 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -618,13 +618,14 @@ impl TempFileKey { } /// path_to_log_file specifies the path of record log. - /// eg. "v1/20220625/03/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log" - fn path_to_log_file(&self, min_ts: u64, max_ts: u64) -> String { + /// eg. "v1/${date}/${hour}/${store_id}/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log" + fn path_to_log_file(&self, store_id: u64, min_ts: u64, max_ts: u64) -> String { format!( - "v1/{}/{}/t{:08}/{:012}-{}.log", + "v1/{}/{}/{}/t{:08}/{:012}-{}.log", // We may delete a range of files, so using the max_ts for preventing remove some records wrong. Self::format_date_time(max_ts, FormatType::Date), Self::format_date_time(max_ts, FormatType::Hour), + store_id, self.table_id, min_ts, uuid::Uuid::new_v4() @@ -632,22 +633,23 @@ impl TempFileKey { } /// path_to_schema_file specifies the path of schema log. - /// eg. "v1/20220625/03/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log" - fn path_to_schema_file(min_ts: u64, max_ts: u64) -> String { + /// eg. "v1/${date}/${hour}/${store_id}/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log" + fn path_to_schema_file(store_id: u64, min_ts: u64, max_ts: u64) -> String { format!( - "v1/{}/{}/schema-meta/{:012}-{}.log", + "v1/{}/{}/{}/schema-meta/{:012}-{}.log", Self::format_date_time(max_ts, FormatType::Date), Self::format_date_time(max_ts, FormatType::Hour), + store_id, min_ts, uuid::Uuid::new_v4(), ) } - fn file_name(&self, min_ts: TimeStamp, max_ts: TimeStamp) -> String { + fn file_name(&self, store_id: u64, min_ts: TimeStamp, max_ts: TimeStamp) -> String { if self.is_meta { - Self::path_to_schema_file(min_ts.into_inner(), max_ts.into_inner()) + Self::path_to_schema_file(store_id, min_ts.into_inner(), max_ts.into_inner()) } else { - self.path_to_log_file(min_ts.into_inner(), max_ts.into_inner()) + self.path_to_log_file(store_id, min_ts.into_inner(), max_ts.into_inner()) } } } @@ -803,7 +805,7 @@ impl StreamTaskInfo { metadata.set_store_id(store_id); for (file_key, data_file) in w.iter() { let mut data_file = data_file.lock().await; - let file_meta = data_file.generate_metadata(file_key)?; + let file_meta = data_file.generate_metadata(file_key, store_id)?; metadata.push(file_meta) } Ok(metadata) @@ -1181,8 +1183,8 @@ impl DataFile { } /// generate the metadata in protocol buffer of the file. - fn generate_metadata(&mut self, file_key: &TempFileKey) -> Result { - self.set_storage_path(file_key.file_name(self.min_ts, self.max_ts)); + fn generate_metadata(&mut self, file_key: &TempFileKey, store_id: u64) -> Result { + self.set_storage_path(file_key.file_name(store_id, self.min_ts, self.max_ts)); let mut meta = DataFileInfo::new(); meta.set_sha256( @@ -1416,7 +1418,7 @@ mod tests { fn check_on_events_result(item: &Vec<(String, Result<()>)>) { for (task, r) in item { if let Err(err) = r { - panic!("task {} failed: {}", task, err); + warn!("task {} failed: {}", task, err); } } } @@ -1477,7 +1479,7 @@ mod tests { assert_eq!(cmds.len(), 1, "test cmds len = {}", cmds.len()); match &cmds[0] { Task::Flush(task) => assert_eq!(task, "dummy", "task = {}", task), - _ => panic!("the cmd isn't flush!"), + _ => warn!("the cmd isn't flush!"), } let mut meta_count = 0; diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index fc4f0e2d4a7..68c025b16c2 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -410,32 +410,33 @@ where let canceled = self.subs.deregister_region_if(region, |_, _| true); let handle = ObserveHandle::new(); if canceled { - let for_task = self.find_task_by_region(region).unwrap_or_else(|| { - panic!( + if let Some(for_task) = self.find_task_by_region(region) { + metrics::INITIAL_SCAN_REASON + .with_label_values(&["region-changed"]) + .inc(); + let r = async { + self.observe_over_with_initial_data_from_checkpoint( + region, + self.get_last_checkpoint_of(&for_task, region).await?, + handle.clone(), + ); + Result::Ok(()) + } + .await; + if let Err(e) = r { + try_send!( + self.scheduler, + Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { + region: region.clone(), + handle, + err: Box::new(e) + }) + ); + } + } else { + warn!( "BUG: the region {:?} is register to no task but being observed", - region - ) - }); - metrics::INITIAL_SCAN_REASON - .with_label_values(&["region-changed"]) - .inc(); - let r = async { - self.observe_over_with_initial_data_from_checkpoint( - region, - self.get_last_checkpoint_of(&for_task, region).await?, - handle.clone(), - ); - Result::Ok(()) - } - .await; - if let Err(e) = r { - try_send!( - self.scheduler, - Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { - region: region.clone(), - handle, - err: Box::new(e) - }) + ®ion ); } } From 956c2192a020b2852a615ed1716ebdc0b1bd316d Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 15 Jul 2022 11:09:06 +0800 Subject: [PATCH 0082/1149] server: support command line output config info (#12956) ref tikv/tikv#12492, ref tikv/tikv#12517 Signed-off-by: glorv Co-authored-by: Xinye Tao --- Cargo.lock | 1 + Cargo.toml | 2 +- cmd/tikv-server/Cargo.toml | 1 + cmd/tikv-server/src/main.rs | 23 ++++++++- src/config.rs | 96 ++++++++++++++++++++++++++++++++++++- src/lib.rs | 7 ++- 6 files changed, 126 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d08e8fc3b25..dcf1eb84937 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6170,6 +6170,7 @@ version = "0.0.1" dependencies = [ "cc", "clap 2.33.0", + "serde_json", "server", "tikv", "time", diff --git a/Cargo.toml b/Cargo.toml index dd071c9809e..b094c857d5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,7 +143,7 @@ semver = "0.11" serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" serde_ignored = "0.1" -serde_json = "1.0" +serde_json = { version = "1.0", features = ["preserve_order"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } sst_importer = { path = "components/sst_importer", default-features = false } diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index e2f594cd8ad..9b1aa869037 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -32,6 +32,7 @@ pprof-fp = ["tikv/pprof-fp"] [dependencies] clap = "2.32" +serde_json = { version = "1.0", features = ["preserve_order"] } server = { path = "../../components/server", default-features = false } tikv = { path = "../../", default-features = false } toml = "0.5" diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 4cb68c6e020..0d6e472a602 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -5,8 +5,9 @@ use std::{path::Path, process}; use clap::{crate_authors, App, Arg}; +use serde_json::{Map, Value}; use server::setup::{ensure_no_unrecognized_config, validate_and_persist_config}; -use tikv::config::TiKvConfig; +use tikv::config::{to_flatten_config_info, TiKvConfig}; fn main() { let build_timestamp = option_env!("TIKV_BUILD_TIME"); @@ -32,6 +33,15 @@ fn main() { .takes_value(false) .help("Check config file validity and exit"), ) + .arg( + Arg::with_name("config-info") + .required(false) + .long("config-info") + .takes_value(true) + .value_name("FORMAT") + .possible_values(&["json"]) + .help("print configuration information with specified format") + ) .arg( Arg::with_name("log-level") .short("L") @@ -186,5 +196,16 @@ fn main() { process::exit(0) } + let is_config_info = matches.is_present("config-info"); + if is_config_info { + let config_infos = to_flatten_config_info(&config); + let mut result = Map::new(); + result.insert("Component".into(), "TiKV Server".into()); + result.insert("Version".into(), tikv::tikv_build_version().into()); + result.insert("Parameters".into(), Value::Array(config_infos)); + println!("{}", serde_json::to_string_pretty(&result).unwrap()); + process::exit(0); + } + server::server::run_tikv(config); } diff --git a/src/config.rs b/src/config.rs index 580e91712de..9b06da58926 100644 --- a/src/config.rs +++ b/src/config.rs @@ -7,7 +7,7 @@ use std::{ cmp, - collections::HashMap, + collections::{HashMap, HashSet}, error::Error, fs, i32, io::{Error as IoError, ErrorKind, Write}, @@ -53,6 +53,7 @@ use raftstore::{ }; use resource_metering::Config as ResourceMeteringConfig; use security::SecurityConfig; +use serde_json::{to_value, Map, Value}; use tikv_util::{ config::{ self, LogFormat, RaftDataStateMachine, ReadableDuration, ReadableSize, TomlWriter, GIB, MIB, @@ -1059,6 +1060,7 @@ pub struct DbConfig { // back to write mode in 3.0 when set `enable_pipelined_write` true. The code of multi-batch-write // in RocksDB has been removed. #[online_config(skip)] + #[serde(skip_serializing)] pub enable_multi_batch_write: bool, #[online_config(skip)] pub enable_unordered_write: bool, @@ -3607,6 +3609,71 @@ pub fn write_config>(path: P, content: &[u8]) -> CfgResult<()> { Ok(()) } +// convert tikv config to a flatten array. +pub fn to_flatten_config_info(cfg: &TiKvConfig) -> Vec { + fn to_cfg_value(default_value: &Value, cfg_value: Option<&Value>, key: &str) -> Value { + let mut res = Map::with_capacity(2); + res.insert("Name".into(), Value::String(key.into())); + res.insert("DefaultValue".into(), default_value.clone()); + if let Some(cfg_val) = cfg_value { + if default_value != cfg_val { + res.insert("ValueInFile".into(), cfg_val.clone()); + } + } + + Value::Object(res) + } + + // configs that should not be flatten because the config type is HashMap instead of submodule. + lazy_static! { + static ref NO_FLATTEN_CFGS: HashSet<&'static str> = { + let mut set = HashSet::new(); + set.insert("server.labels"); + set + }; + } + + fn flatten_value( + default_obj: &Map, + value_obj: &Map, + key_buf: &mut String, + res: &mut Vec, + ) { + for (k, v) in default_obj.iter() { + let cfg_val = value_obj.get(k); + let prev_len = key_buf.len(); + if !key_buf.is_empty() { + key_buf.push('.'); + } + key_buf.push_str(k); + if v.is_object() && !NO_FLATTEN_CFGS.contains(key_buf.as_str()) { + flatten_value( + v.as_object().unwrap(), + cfg_val.unwrap().as_object().unwrap(), + key_buf, + res, + ); + } else { + res.push(to_cfg_value(v, cfg_val, key_buf)); + } + key_buf.truncate(prev_len); + } + } + + let cfg_value = to_value(cfg).unwrap(); + let default_value = to_value(TiKvConfig::default()).unwrap(); + + let mut key_buf = String::new(); + let mut res = Vec::new(); + flatten_value( + default_value.as_object().unwrap(), + cfg_value.as_object().unwrap(), + &mut key_buf, + &mut res, + ); + res +} + lazy_static! { pub static ref TIKVCONFIG_TYPED: ConfigChange = TiKvConfig::default().typed(); } @@ -4114,6 +4181,33 @@ mod tests { assert_eq!(cfg_from_file.raftdb.wal_dir, s1); } + #[test] + fn test_flatten_cfg() { + let mut cfg = TiKvConfig::default(); + cfg.server.labels.insert("zone".into(), "test".into()); + cfg.raft_store.raft_log_gc_count_limit = Some(123); + + let flattened = to_flatten_config_info(&cfg); + + let mut expected = HashMap::new(); + let mut labels = Map::new(); + labels.insert("zone".into(), Value::String("test".into())); + expected.insert("server.labels", Value::Object(labels)); + expected.insert( + "raftstore.raft-log-gc-count-limit", + Value::Number(123.into()), + ); + + for v in &flattened { + let obj = v.as_object().unwrap(); + if let Some(v) = expected.get(&obj["Name"].as_str().unwrap()) { + assert_eq!(v, &obj["ValueInFile"]); + } else { + assert!(!obj.contains_key("ValueInFile")); + } + } + } + #[test] fn test_create_parent_dir_if_missing() { let root_path = Builder::new() diff --git a/src/lib.rs b/src/lib.rs index d51457b1603..5b7bf6e2ac1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,7 +61,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { \nRust Version: {}\ \nEnable Features: {}\ \nProfile: {}", - env!("CARGO_PKG_VERSION"), + tikv_build_version(), option_env!("TIKV_EDITION").unwrap_or("Community"), option_env!("TIKV_BUILD_GIT_HASH").unwrap_or(fallback), option_env!("TIKV_BUILD_GIT_BRANCH").unwrap_or(fallback), @@ -74,6 +74,11 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { ) } +/// return the build version of tikv-server +pub fn tikv_build_version() -> &'static str { + env!("CARGO_PKG_VERSION") +} + /// Prints the tikv version information to the standard output. pub fn log_tikv_info(build_time: Option<&str>) { info!("Welcome to TiKV"); From 5ae20e21af29e6f5c63abbd4db8a481dc04539b6 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 15 Jul 2022 13:57:05 +0800 Subject: [PATCH 0083/1149] raftstore: only record UNABLE_TO_SPLIT_CPU_TOP when the top_cpu_usage is not empty (#13016) ref tikv/tikv#12063 Only record `UNABLE_TO_SPLIT_CPU_TOP` when the `top_cpu_usage` is not empty. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/metrics.rs | 36 ++++- .../src/store/worker/split_controller.rs | 137 ++++++------------ 2 files changed, 81 insertions(+), 92 deletions(-) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c4a1c22d800..69d84f45056 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -234,6 +234,37 @@ make_static_metric! { hibernated, }, } + + pub label_enum LoadBaseSplitEventType { + // Workload fits the QPS threshold or byte threshold. + load_fit, + // Workload fits the CPU threshold. + cpu_load_fit, + // The statistical key is empty. + empty_statistical_key, + // Split info has been collected, ready to split. + ready_to_split, + // Split info has not been collected yet, not ready to split. + not_ready_to_split, + // The number of sampled keys does not meet the threshold. + no_enough_sampled_key, + // The number of sampled keys located on left and right does not meet the threshold. + no_enough_lr_key, + // The number of balanced keys does not meet the score. + no_balance_key, + // The number of contained keys does not meet the score. + no_uncross_key, + // Split info for the top hot CPU region has been collected, ready to split. + ready_to_split_cpu_top, + // Hottest key range for the top hot CPU region could not be found. + empty_hottest_key_range, + // The top hot CPU region could not be split. + unable_to_split_cpu_top, + } + + pub struct LoadBaseSplitEventCounterVec: IntCounter { + "type" => LoadBaseSplitEventType, + } } lazy_static! { @@ -648,8 +679,9 @@ lazy_static! { &["order"] ).unwrap(); - pub static ref LOAD_BASE_SPLIT_EVENT: IntCounterVec = - register_int_counter_vec!( + pub static ref LOAD_BASE_SPLIT_EVENT: LoadBaseSplitEventCounterVec = + register_static_int_counter_vec!( + LoadBaseSplitEventCounterVec, "tikv_load_base_split_event", "Load base split event.", &["type"] diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 3724e21c515..013ac705be9 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -31,32 +31,6 @@ use crate::store::{ const DEFAULT_MAX_SAMPLE_LOOP_COUNT: usize = 10000; pub const TOP_N: usize = 10; -// LOAD_BASE_SPLIT_EVENT metrics label definitions. -// Workload fits the QPS threshold or byte threshold. -const LOAD_FIT: &str = "load_fit"; -// Workload fits the CPU threshold. -const CPU_LOAD_FIT: &str = "cpu_load_fit"; -// The statistical key is empty. -const EMPTY_STATISTICAL_KEY: &str = "empty_statistical_key"; -// Split info has been collected, ready to split. -const READY_TO_SPLIT: &str = "ready_to_split"; -// Split info has not been collected yet, not ready to split. -const NOT_READY_TO_SPLIT: &str = "not_ready_to_split"; -// The number of sampled keys does not meet the threshold. -const NO_ENOUGH_SAMPLED_KEY: &str = "no_enough_sampled_key"; -// The number of sampled keys located on left and right does not meet the threshold. -const NO_ENOUGH_LR_KEY: &str = "no_enough_lr_key"; -// The number of balanced keys does not meet the score. -const NO_BALANCE_KEY: &str = "no_balance_key"; -// The number of contained keys does not meet the score. -const NO_UNCROSS_KEY: &str = "no_uncross_key"; -// Split info for the top hot CPU region has been collected, ready to split. -const READY_TO_SPLIT_CPU_TOP: &str = "ready_to_split_cpu_top"; -// Hottest key range for the top hot CPU region could not be found. -const EMPTY_HOTTEST_KEY_RANGE: &str = "empty_hottest_key_range"; -// The top hot CPU region could not be split. -const UNABLE_TO_SPLIT_CPU_TOP: &str = "unable_to_split_cpu_top"; - // It will return prefix sum of the given iter, // `read` is a function to process the item from the iter. #[inline(always)] @@ -231,9 +205,7 @@ impl Samples { } let evaluated_key_num_lr = sample.left + sample.right; if evaluated_key_num_lr == 0 { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[NO_ENOUGH_LR_KEY]) - .inc(); + LOAD_BASE_SPLIT_EVENT.no_enough_lr_key.inc(); continue; } let evaluated_key_num = (sample.contained + evaluated_key_num_lr) as f64; @@ -246,9 +218,7 @@ impl Samples { .with_label_values(&["balance_score"]) .observe(balance_score); if balance_score >= split_balance_score { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[NO_BALANCE_KEY]) - .inc(); + LOAD_BASE_SPLIT_EVENT.no_balance_key.inc(); continue; } @@ -259,9 +229,7 @@ impl Samples { .with_label_values(&["contained_score"]) .observe(contained_score); if contained_score >= split_contained_score { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[NO_UNCROSS_KEY]) - .inc(); + LOAD_BASE_SPLIT_EVENT.no_uncross_key.inc(); continue; } @@ -336,7 +304,7 @@ impl Recorder { // so we do this check after the samples are calculated. if (recorded_key_ranges.len() as u64) < config.sample_threshold { LOAD_BASE_SPLIT_EVENT - .with_label_values(&[NO_ENOUGH_SAMPLED_KEY]) + .no_enough_sampled_key .inc_by(samples.0.len() as u64); return vec![]; } @@ -834,7 +802,7 @@ impl AutoSplitController { continue; } - LOAD_BASE_SPLIT_EVENT.with_label_values(&[LOAD_FIT]).inc(); + LOAD_BASE_SPLIT_EVENT.load_fit.inc(); let detect_times = self.cfg.detect_times; let recorder = self @@ -853,9 +821,7 @@ impl AutoSplitController { RegionInfo::get_key_ranges_mut, ); if key_ranges.is_empty() { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[EMPTY_STATISTICAL_KEY]) - .inc(); + LOAD_BASE_SPLIT_EVENT.empty_statistical_key.inc(); continue; } recorder.record(key_ranges); @@ -867,9 +833,7 @@ impl AutoSplitController { recorder.peer.clone(), key, )); - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[READY_TO_SPLIT]) - .inc(); + LOAD_BASE_SPLIT_EVENT.ready_to_split.inc(); info!("load base split region"; "region_id" => region_id, "qps" => qps, @@ -878,15 +842,11 @@ impl AutoSplitController { ); self.recorders.remove(®ion_id); } else if is_unified_read_pool_busy && is_region_busy { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[CPU_LOAD_FIT]) - .inc(); + LOAD_BASE_SPLIT_EVENT.cpu_load_fit.inc(); top_cpu_usage.push(region_id); } } else { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[NOT_READY_TO_SPLIT]) - .inc(); + LOAD_BASE_SPLIT_EVENT.not_ready_to_split.inc(); } top_qps.push(qps); @@ -894,49 +854,46 @@ impl AutoSplitController { // Check if the top CPU usage region could be split. // TODO: avoid unnecessary split by introducing the feedback mechanism from PD. - if !top_cpu_usage.is_empty() && !is_grpc_poll_busy { - // Calculate by using the latest CPU usage. - top_cpu_usage.sort_unstable_by(|a, b| { - let cpu_usage_a = self.recorders.get(a).unwrap().cpu_usage; - let cpu_usage_b = self.recorders.get(b).unwrap().cpu_usage; - cpu_usage_b.partial_cmp(&cpu_usage_a).unwrap() - }); - let region_id = top_cpu_usage[0]; - let recorder = self.recorders.get_mut(®ion_id).unwrap(); - if recorder.hottest_key_range.is_some() { - split_infos.push(SplitInfo::with_start_end_key( - region_id, - recorder.peer.clone(), - recorder - .hottest_key_range - .as_ref() - .unwrap() - .start_key - .clone(), - recorder.hottest_key_range.as_ref().unwrap().end_key.clone(), - )); - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[READY_TO_SPLIT_CPU_TOP]) - .inc(); - info!("load base split region"; - "region_id" => region_id, - "start_key" => log_wrappers::Value::key(&recorder.hottest_key_range.as_ref().unwrap().start_key), - "end_key" => log_wrappers::Value::key(&recorder.hottest_key_range.as_ref().unwrap().end_key), - "cpu_usage" => recorder.cpu_usage, - ); + if !top_cpu_usage.is_empty() { + // Only split the top CPU region when the gRPC poll is not busy. + if !is_grpc_poll_busy { + // Calculate by using the latest CPU usage. + top_cpu_usage.sort_unstable_by(|a, b| { + let cpu_usage_a = self.recorders.get(a).unwrap().cpu_usage; + let cpu_usage_b = self.recorders.get(b).unwrap().cpu_usage; + cpu_usage_b.partial_cmp(&cpu_usage_a).unwrap() + }); + let region_id = top_cpu_usage[0]; + let recorder = self.recorders.get_mut(®ion_id).unwrap(); + if recorder.hottest_key_range.is_some() { + split_infos.push(SplitInfo::with_start_end_key( + region_id, + recorder.peer.clone(), + recorder + .hottest_key_range + .as_ref() + .unwrap() + .start_key + .clone(), + recorder.hottest_key_range.as_ref().unwrap().end_key.clone(), + )); + LOAD_BASE_SPLIT_EVENT.ready_to_split_cpu_top.inc(); + info!("load base split region"; + "region_id" => region_id, + "start_key" => log_wrappers::Value::key(&recorder.hottest_key_range.as_ref().unwrap().start_key), + "end_key" => log_wrappers::Value::key(&recorder.hottest_key_range.as_ref().unwrap().end_key), + "cpu_usage" => recorder.cpu_usage, + ); + } else { + LOAD_BASE_SPLIT_EVENT.empty_hottest_key_range.inc(); + } } else { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[EMPTY_HOTTEST_KEY_RANGE]) - .inc(); + LOAD_BASE_SPLIT_EVENT.unable_to_split_cpu_top.inc(); + } + // Clean up the rest top CPU usage recorders. + for region_id in top_cpu_usage { + self.recorders.remove(®ion_id); } - } else { - LOAD_BASE_SPLIT_EVENT - .with_label_values(&[UNABLE_TO_SPLIT_CPU_TOP]) - .inc(); - } - // Clean up the rest top CPU usage recorders. - for region_id in top_cpu_usage { - self.recorders.remove(®ion_id); } (top_qps.into_vec(), split_infos) From 59c9676795d08a5ff2e35c899e3c3d30611bd2b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 15 Jul 2022 22:51:06 +0800 Subject: [PATCH 0084/1149] log-backup: fixed initial scanning data loss (#13024) ref tikv/tikv#12538 Now, initial scanning failure won't just report as a retryable error, but would retry internally and fire a fatal error if retry failed too many times. This PR also make the report of fatal error can provide a `TaskSelector`, which allows reporting errors in some contexts which cannot access the task name. Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 94 +++++++----- components/backup-stream/src/event_loader.rs | 66 ++++----- components/backup-stream/src/metrics.rs | 2 +- components/backup-stream/src/router.rs | 139 +++++++++++++++++- .../backup-stream/src/subscription_manager.rs | 120 +++++++++++---- components/backup-stream/src/utils.rs | 88 ++++++++++- components/backup-stream/tests/mod.rs | 31 +++- 7 files changed, 428 insertions(+), 112 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index c779afebe45..958df7286a7 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -55,7 +55,7 @@ use crate::{ metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, metrics::{self, TaskStatus}, observer::BackupStreamObserver, - router::{ApplyEvents, Router}, + router::{ApplyEvents, Router, TaskSelector}, subscription_manager::{RegionSubscriptionManager, ResolvedRegions}, subscription_track::SubscriptionTracer, try_send, @@ -197,45 +197,59 @@ where self.meta_client.clone() } - fn on_fatal_error(&self, task: String, err: Box) { - // Let's pause the task first. - self.unload_task(&task); + fn on_fatal_error(&self, select: TaskSelector, err: Box) { err.report_fatal(); - metrics::update_task_status(TaskStatus::Error, &task); - - let meta_cli = self.get_meta_client(); - let pdc = self.pd_client.clone(); - let store_id = self.store_id; - let sched = self.scheduler.clone(); - let safepoint_name = self.pause_guard_id_for_task(&task); - let safepoint_ttl = self.pause_guard_duration(); - self.pool.block_on(async move { - let err_fut = async { - let safepoint = meta_cli.global_progress_of_task(&task).await?; - pdc.update_service_safe_point( - safepoint_name, - TimeStamp::new(safepoint - 1), - safepoint_ttl, - ) - .await?; - meta_cli.pause(&task).await?; - let mut last_error = StreamBackupError::new(); - last_error.set_error_code(err.error_code().code.to_owned()); - last_error.set_error_message(err.to_string()); - last_error.set_store_id(store_id); - last_error.set_happen_at(TimeStamp::physical_now()); - meta_cli.report_last_error(&task, last_error).await?; - Result::Ok(()) - }; - if let Err(err_report) = err_fut.await { - err_report.report(format_args!("failed to upload error {}", err_report)); - // Let's retry reporting after 5s. - tokio::task::spawn(async move { - tokio::time::sleep(Duration::from_secs(5)).await; - try_send!(sched, Task::FatalError(task, err)); - }); - } - }) + let tasks = self + .pool + .block_on(self.range_router.select_task(select.reference())); + warn!("fatal error reporting"; "selector" => ?select, "selected" => ?tasks, "err" => %err); + for task in tasks { + // Let's pause the task first. + self.unload_task(&task); + metrics::update_task_status(TaskStatus::Error, &task); + + let meta_cli = self.get_meta_client(); + let pdc = self.pd_client.clone(); + let store_id = self.store_id; + let sched = self.scheduler.clone(); + let safepoint_name = self.pause_guard_id_for_task(&task); + let safepoint_ttl = self.pause_guard_duration(); + let code = err.error_code().code.to_owned(); + let msg = err.to_string(); + self.pool.block_on(async move { + let err_fut = async { + let safepoint = meta_cli.global_progress_of_task(&task).await?; + pdc.update_service_safe_point( + safepoint_name, + TimeStamp::new(safepoint - 1), + safepoint_ttl, + ) + .await?; + meta_cli.pause(&task).await?; + let mut last_error = StreamBackupError::new(); + last_error.set_error_code(code); + last_error.set_error_message(msg.clone()); + last_error.set_store_id(store_id); + last_error.set_happen_at(TimeStamp::physical_now()); + meta_cli.report_last_error(&task, last_error).await?; + Result::Ok(()) + }; + if let Err(err_report) = err_fut.await { + err_report.report(format_args!("failed to upload error {}", err_report)); + // Let's retry reporting after 5s. + tokio::task::spawn(async move { + tokio::time::sleep(Duration::from_secs(5)).await; + try_send!( + sched, + Task::FatalError( + TaskSelector::ByName(task.to_owned()), + Box::new(annotate!(err_report, "origin error: {}", msg)) + ) + ); + }); + } + }); + } } async fn starts_flush_ticks(router: Router) { @@ -922,7 +936,7 @@ pub enum Task { /// Convert status of some task into `flushing` and do flush then. ForceFlush(String), /// FatalError pauses the task and set the error. - FatalError(String, Box), + FatalError(TaskSelector, Box), /// Run the callback when see this message. Only for test usage. /// NOTE: Those messages for testing are not guared by `#[cfg(test)]` for now, because /// the integration test would not enable test config when compiling (why?) diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index fdba0194000..841f6ac75b6 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -23,10 +23,12 @@ use tikv::storage::{ use tikv_util::{ box_err, time::{Instant, Limiter}, - warn, worker::Scheduler, }; -use tokio::sync::{OwnedSemaphorePermit, Semaphore}; +use tokio::{ + runtime::Handle, + sync::{OwnedSemaphorePermit, Semaphore}, +}; use txn_types::{Key, Lock, TimeStamp}; use crate::{ @@ -64,7 +66,7 @@ impl PendingMemoryQuota { pub fn pending(&self, size: usize) -> PendingMemory { PendingMemory( - tokio::runtime::Handle::current() + Handle::current() .block_on(self.0.clone().acquire_many_owned(size as _)) .expect("BUG: the semaphore is closed unexpectedly."), ) @@ -186,7 +188,7 @@ pub struct InitialDataLoader { pub(crate) tracing: SubscriptionTracer, pub(crate) scheduler: Scheduler, pub(crate) quota: PendingMemoryQuota, - pub(crate) handle: tokio::runtime::Handle, + pub(crate) handle: Handle, pub(crate) limit: Limiter, _engine: PhantomData, @@ -205,7 +207,7 @@ where tracing: SubscriptionTracer, sched: Scheduler, quota: PendingMemoryQuota, - handle: tokio::runtime::Handle, + handle: Handle, limiter: Limiter, ) -> Self { Self { @@ -252,8 +254,8 @@ where last_err = match last_err { None => Some(e), Some(err) => Some(Error::Contextual { - context: format!("and error {}", e), - inner_error: Box::new(err), + context: format!("and error {}", err), + inner_error: Box::new(e), }), }; @@ -374,6 +376,10 @@ where let mut stats = StatisticsSummary::default(); let start = Instant::now(); loop { + #[cfg(feature = "failpoints")] + fail::fail_point!("scan_and_async_send", |msg| Err(Error::Other(box_err!( + "{:?}", msg + )))); let mut events = ApplyEvents::with_capacity(1024, region.id); let stat = event_loader.fill_entries()?; let disk_read = self.with_resolver(region, |r| { @@ -411,39 +417,31 @@ where region: &Region, start_ts: TimeStamp, snap: impl Snapshot, - on_finish: impl FnOnce() + Send + 'static, ) -> Result { let _guard = self.handle.enter(); - // It is ok to sink more data than needed. So scan to +inf TS for convenance. - let event_loader = EventLoader::load_from(snap, start_ts, TimeStamp::max(), region)?; let tr = self.tracing.clone(); let region_id = region.get_id(); let mut join_handles = Vec::with_capacity(8); - let stats = self.scan_and_async_send(region, event_loader, &mut join_handles); - - // we should mark phase one as finished whether scan successed. - // TODO: use an `WaitGroup` with asynchronous support. - let r = region.clone(); - tokio::spawn(async move { - for h in join_handles { - if let Err(err) = h.await { - warn!("failed to join task."; "err" => %err); - } - } - let result = Self::with_resolver_by(&tr, &r, |r| { - r.phase_one_done(); - Ok(()) - }); - if let Err(err) = result { - err.report(format_args!( - "failed to finish phase 1 for region {:?}", - region_id - )); - } - on_finish() - }); - stats + + // It is ok to sink more data than needed. So scan to +inf TS for convenance. + let event_loader = EventLoader::load_from(snap, start_ts, TimeStamp::max(), region)?; + let stats = self.scan_and_async_send(region, event_loader, &mut join_handles)?; + + Handle::current() + .block_on(futures::future::try_join_all(join_handles)) + .map_err(|err| annotate!(err, "tokio runtime failed to join consuming threads"))?; + + Self::with_resolver_by(&tr, region, |r| { + r.phase_one_done(); + Ok(()) + }) + .context(format_args!( + "failed to finish phase 1 for region {:?}", + region_id + ))?; + + Ok(stats) } /// initialize a range: it simply scan the regions with leader role and send them to [`initialize_region`]. diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index a27dd1ea33b..24a044bb4fb 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -145,7 +145,7 @@ lazy_static! { ) .unwrap(); pub static ref PENDING_INITIAL_SCAN_LEN: IntGaugeVec = register_int_gauge_vec!( - "pending_initial_scan", + "tikv_pending_initial_scan", "The pending initial scan", &["stage"] ) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 8311c08c7de..7a2c895edb2 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -63,6 +63,50 @@ const FLUSH_FAILURE_BECOME_FATAL_THRESHOLD: usize = 30; /// and storage could take mistaken if writing all of these files to storage concurrently. const FLUSH_LOG_CONCURRENT_BATCH_COUNT: usize = 128; +#[derive(Clone, Debug)] +pub enum TaskSelector { + ByName(String), + ByKey(Vec), + ByRange(Vec, Vec), + All, +} + +impl TaskSelector { + pub fn reference(&self) -> TaskSelectorRef<'_> { + match self { + TaskSelector::ByName(s) => TaskSelectorRef::ByName(s), + TaskSelector::ByKey(k) => TaskSelectorRef::ByKey(&*k), + TaskSelector::ByRange(s, e) => TaskSelectorRef::ByRange(&*s, &*e), + TaskSelector::All => TaskSelectorRef::All, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub enum TaskSelectorRef<'a> { + ByName(&'a str), + ByKey(&'a [u8]), + ByRange(&'a [u8], &'a [u8]), + All, +} + +impl<'a> TaskSelectorRef<'a> { + fn matches<'c, 'd>( + self, + task_name: &str, + mut task_range: impl Iterator, + ) -> bool { + match self { + TaskSelectorRef::ByName(name) => task_name == name, + TaskSelectorRef::ByKey(k) => task_range.any(|(s, e)| utils::is_in_range(k, (&*s, &*e))), + TaskSelectorRef::ByRange(x1, y1) => { + task_range.any(|(x2, y2)| utils::is_overlapping((x1, y1), (&*x2, &*y2))) + } + TaskSelectorRef::All => true, + } + } +} + #[derive(Debug)] pub struct ApplyEvent { pub key: Vec, @@ -376,7 +420,8 @@ impl RouterInner { // register task info let prefix_path = self.prefix.join(&task_name); - let stream_task = StreamTaskInfo::new(prefix_path, task, self.max_flush_interval).await?; + let stream_task = + StreamTaskInfo::new(prefix_path, task, self.max_flush_interval, ranges.clone()).await?; self.tasks .lock() .await @@ -405,6 +450,21 @@ impl RouterInner { r.get_value_by_point(key).cloned() } + pub async fn select_task(&self, selector: TaskSelectorRef<'_>) -> Vec { + let s = self.tasks.lock().await; + s.iter() + .filter(|(name, info)| { + selector.matches( + name.as_str(), + info.ranges + .iter() + .map(|(s, e)| (s.as_slice(), e.as_slice())), + ) + }) + .map(|(name, _)| name.to_owned()) + .collect() + } + #[cfg(test)] pub(crate) async fn must_mut_task_info(&self, task_name: &str, mutator: F) where @@ -488,7 +548,10 @@ impl RouterInner { // NOTE: Maybe we'd better record all errors and send them to the client? try_send!( self.scheduler, - Task::FatalError(task_name.to_owned(), Box::new(e)) + Task::FatalError( + TaskSelector::ByName(task_name.to_owned()), + Box::new(e) + ) ); } return None; @@ -658,6 +721,8 @@ pub struct StreamTaskInfo { pub(crate) task: StreamTask, /// support external storage. eg local/s3. pub(crate) storage: Arc, + /// The listening range of the task. + ranges: Vec<(Vec, Vec)>, /// The parent directory of temporary files. temp_dir: PathBuf, /// The temporary file index. Both meta (m prefixed keys) and data (t prefixed keys). @@ -714,6 +779,7 @@ impl StreamTaskInfo { temp_dir: PathBuf, task: StreamTask, flush_interval: Duration, + ranges: Vec<(Vec, Vec)>, ) -> Result { tokio::fs::create_dir_all(&temp_dir).await?; let storage = Arc::from(create_storage( @@ -724,6 +790,7 @@ impl StreamTaskInfo { task, storage, temp_dir, + ranges, min_resolved_ts: TimeStamp::max(), files: SlotMap::default(), flushing_files: RwLock::default(), @@ -1527,6 +1594,7 @@ mod tests { tmp_dir.path().to_path_buf(), stream_task, Duration::from_secs(300), + vec![(vec![], vec![])], ) .await .unwrap(); @@ -1768,7 +1836,7 @@ mod tests { assert!( messages.iter().any(|task| { if let Task::FatalError(name, _err) = task { - return name == "flush_failure"; + return matches!(name.reference(), TaskSelectorRef::ByName("flush_failure")); } false }), @@ -1797,4 +1865,69 @@ mod tests { let begin_ts = DataFile::decode_begin_ts(value).unwrap(); assert_eq!(begin_ts, start_ts); } + + #[test] + fn test_selector() { + type DummyTask<'a> = (&'a str, &'a [(&'a [u8], &'a [u8])]); + + #[derive(Debug, Clone, Copy)] + struct Case<'a /* 'static */> { + tasks: &'a [DummyTask<'a>], + selector: TaskSelectorRef<'a>, + selected: &'a [&'a str], + } + + let cases = [ + Case { + tasks: &[("Zhao", &[(b"", b"")]), ("Qian", &[(b"", b"")])], + selector: TaskSelectorRef::ByName("Zhao"), + selected: &["Zhao"], + }, + Case { + tasks: &[ + ("Zhao", &[(b"0001", b"1000"), (b"2000", b"")]), + ("Qian", &[(b"0002", b"1000")]), + ], + selector: TaskSelectorRef::ByKey(b"0001"), + selected: &["Zhao"], + }, + Case { + tasks: &[ + ("Zhao", &[(b"0001", b"1000"), (b"2000", b"")]), + ("Qian", &[(b"0002", b"1000")]), + ("Sun", &[(b"0004", b"1024")]), + ("Li", &[(b"1001", b"2048")]), + ], + selector: TaskSelectorRef::ByRange(b"1001", b"2000"), + selected: &["Sun", "Li"], + }, + Case { + tasks: &[ + ("Zhao", &[(b"0001", b"1000"), (b"2000", b"")]), + ("Qian", &[(b"0002", b"1000")]), + ("Sun", &[(b"0004", b"1024")]), + ("Li", &[(b"1001", b"2048")]), + ], + selector: TaskSelectorRef::All, + selected: &["Zhao", "Qian", "Sun", "Li"], + }, + ]; + + fn run(c: Case<'static>) { + assert!( + c.tasks + .iter() + .filter(|(name, range)| c.selector.matches(name, range.iter().copied())) + .map(|(name, _)| name) + .collect::>() + == c.selected.iter().collect::>(), + "case = {:?}", + c + ) + } + + for case in cases { + run(case) + } + } } diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 68c025b16c2..28c1ed6dd78 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -11,7 +11,7 @@ use std::{ use crossbeam::channel::{Receiver as SyncReceiver, Sender as SyncSender}; use crossbeam_channel::SendError; use engine_traits::KvEngine; -use error_code::{backup_stream::OBSERVE_CANCELED, ErrorCodeExt}; +use error_code::ErrorCodeExt; use futures::FutureExt; use kvproto::metapb::Region; use pd_client::PdClient; @@ -36,7 +36,7 @@ use crate::{ metadata::{store::MetaStore, CheckpointProvider, MetadataClient}, metrics, observer::BackupStreamObserver, - router::Router, + router::{Router, TaskSelector}, subscription_track::SubscriptionTracer, try_send, utils::{self, CallbackWaitGroup, Work}, @@ -45,12 +45,14 @@ use crate::{ type ScanPool = yatp::ThreadPool; +const INITIAL_SCAN_FAILURE_MAX_RETRY_TIME: usize = 10; + /// a request for doing initial scanning. struct ScanCmd { region: Region, handle: ObserveHandle, last_checkpoint: TimeStamp, - work: Work, + _work: Work, } /// The response of requesting resolve the new checkpoint of regions. @@ -82,6 +84,25 @@ impl ResolvedRegions { } } +/// returns whether the error should be retried. +/// for some errors, like `epoch not match` or `not leader`, +/// implies that the region is drifting, and no more need to be observed by us. +fn should_retry(err: &Error) -> bool { + match err.without_context() { + Error::RaftRequest(pbe) => { + !(pbe.has_epoch_not_match() + || pbe.has_not_leader() + || pbe.get_message().contains("stale observe id") + || pbe.has_region_not_found()) + } + Error::RaftStore(raftstore::Error::RegionNotFound(_)) + | Error::RaftStore(raftstore::Error::NotLeader(..)) + | Error::ObserveCanceled(..) + | Error::RaftStore(raftstore::Error::EpochNotMatch(..)) => false, + _ => true, + } +} + /// the abstraction over a "DB" which provides the initial scanning. trait InitialScan: Clone { fn do_initial_scan( @@ -89,8 +110,9 @@ trait InitialScan: Clone { region: &Region, start_ts: TimeStamp, handle: ObserveHandle, - on_finish: impl FnOnce() + Send + 'static, ) -> Result; + + fn handle_fatal_error(&self, region: &Region, err: Error); } impl InitialScan for InitialDataLoader @@ -104,35 +126,73 @@ where region: &Region, start_ts: TimeStamp, handle: ObserveHandle, - on_finish: impl FnOnce() + Send + 'static, ) -> Result { let region_id = region.get_id(); + // Note: we have external retry at `ScanCmd::exec_by_with_retry`, should we keep retrying here? let snap = self.observe_over_with_retry(region, move || { ChangeObserver::from_pitr(region_id, handle.clone()) })?; - let stat = self.do_initial_scan(region, start_ts, snap, on_finish)?; + let stat = self.do_initial_scan(region, start_ts, snap)?; Ok(stat) } + + fn handle_fatal_error(&self, region: &Region, err: Error) { + try_send!( + self.scheduler, + Task::FatalError( + TaskSelector::ByRange( + region.get_start_key().to_owned(), + region.get_end_key().to_owned() + ), + Box::new(err), + ) + ); + } } impl ScanCmd { /// execute the initial scanning via the specificated [`InitialDataLoader`]. - fn exec_by(self, initial_scan: impl InitialScan) -> Result<()> { + fn exec_by(&self, initial_scan: impl InitialScan) -> Result<()> { let Self { region, handle, last_checkpoint, - work, + .. } = self; let begin = Instant::now_coarse(); - let stat = - initial_scan.do_initial_scan(®ion, last_checkpoint, handle, move || drop(work))?; + let stat = initial_scan.do_initial_scan(region, *last_checkpoint, handle.clone())?; info!("initial scanning of leader transforming finished!"; "takes" => ?begin.saturating_elapsed(), "region" => %region.get_id(), "from_ts" => %last_checkpoint); utils::record_cf_stat("lock", &stat.lock); utils::record_cf_stat("write", &stat.write); utils::record_cf_stat("default", &stat.data); Ok(()) } + + /// execute the command, when meeting error, retrying. + fn exec_by_with_retry(self, init: impl InitialScan, cancel: &AtomicBool) { + let mut retry_time = INITIAL_SCAN_FAILURE_MAX_RETRY_TIME; + loop { + if cancel.load(Ordering::SeqCst) { + return; + } + match self.exec_by(init.clone()) { + Err(err) if should_retry(&err) && retry_time > 0 => { + // NOTE: blocking this thread may stick the process. + // Maybe spawn a task to tokio and reschedule the task then? + std::thread::sleep(Duration::from_millis(500)); + warn!("meet retryable error"; "err" => %err, "retry_time" => retry_time); + retry_time -= 1; + continue; + } + Err(err) if retry_time == 0 => { + init.handle_fatal_error(&self.region, err.context("retry time exceeds")); + break; + } + // Errors which `should_retry` returns false means they can be ignored. + Err(_) | Ok(_) => break, + } + } + } } fn scan_executor_loop( @@ -150,15 +210,11 @@ fn scan_executor_loop( if canceled.load(Ordering::Acquire) { return; } + metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["executing"]) .inc(); - let region_id = cmd.region.get_id(); - if let Err(err) = cmd.exec_by(init.clone()) { - if err.error_code() != OBSERVE_CANCELED { - err.report(format!("during initial scanning of region {}", region_id)); - } - } + cmd.exec_by_with_retry(init.clone(), &canceled); metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["executing"]) .dec(); @@ -370,13 +426,21 @@ where if err.error_code() == error_code::backup_stream::OBSERVE_CANCELED { return; } + let (start, end) = ( + region.get_start_key().to_owned(), + region.get_end_key().to_owned(), + ); match self.retry_observe(region, handle).await { Ok(()) => {} Err(e) => { - self.fatal( - e, - format!("While retring to observe region, origin error is {}", err), + let msg = Task::FatalError( + TaskSelector::ByRange(start, end), + Box::new(Error::Contextual { + context: format!("retry meet error, origin error is {}", err), + inner_error: Box::new(e), + }), ); + try_send!(self.scheduler, msg); } } } @@ -384,7 +448,7 @@ where let now = Instant::now(); let timedout = self.wait(Duration::from_secs(30)).await; if timedout { - warn!("waiting for initial scanning done timed out, forcing progress(with risk of data loss)!"; + warn!("waiting for initial scanning done timed out, forcing progress!"; "take" => ?now.saturating_elapsed(), "timedout" => %timedout); } let cps = self.subs.resolve_with(min_ts); @@ -399,10 +463,6 @@ where } } - fn fatal(&self, err: Error, message: String) { - try_send!(self.scheduler, Task::FatalError(message, Box::new(err))); - } - async fn refresh_resolver(&self, region: &Region) { let need_refresh_all = !self.subs.try_update_region(region); @@ -540,7 +600,7 @@ where async fn get_last_checkpoint_of(&self, task: &str, region: &Region) -> Result { let meta_cli = self.meta_cli.clone(); let cp = meta_cli.get_region_checkpoint(task, region).await?; - info!("got region checkpoint"; "region_id" => %region.get_id(), "checkpoint" => ?cp); + debug!("got region checkpoint"; "region_id" => %region.get_id(), "checkpoint" => ?cp); if matches!(cp.provider, CheckpointProvider::Global) { metrics::STORE_CHECKPOINT_TS .with_label_values(&[task]) @@ -574,7 +634,7 @@ where region: region.clone(), handle, last_checkpoint, - work: self.scans.clone().work(), + _work: self.scans.clone().work(), }) } @@ -602,11 +662,13 @@ mod test { _region: &Region, _start_ts: txn_types::TimeStamp, _handle: raftstore::coprocessor::ObserveHandle, - on_finish: impl FnOnce() + Send + 'static, ) -> crate::errors::Result { - on_finish(); Ok(Statistics::default()) } + + fn handle_fatal_error(&self, region: &Region, err: crate::errors::Error) { + panic!("fatal {:?} {}", region, err) + } } #[cfg(feature = "failpoints")] @@ -641,7 +703,7 @@ mod test { handle: Default::default(), last_checkpoint: Default::default(), // Note: Maybe make here a Box or some other trait? - work: wg.work(), + _work: wg.work(), }) .unwrap() } diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 678b571f3b5..486ce6ae0f8 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -34,6 +34,7 @@ use txn_types::{Key, Lock, LockType}; use crate::{ errors::{Error, Result}, metadata::store::BoxFuture, + router::TaskSelector, Task, }; @@ -397,7 +398,7 @@ pub fn handle_on_event_result(doom_messenger: &Scheduler, result: Vec<(Str try_send!( doom_messenger, Task::FatalError( - task, + TaskSelector::ByName(task), Box::new(err.context("failed to record event to local temporary files")) ) ); @@ -536,6 +537,38 @@ pub fn with_record_read_throughput(f: impl FnOnce() -> T) -> (T, u64) { (r, recorder.end()) } +/// test whether a key is in the range. +/// end key is exclusive. +/// empty end key means infinity. +pub fn is_in_range(key: &[u8], range: (&[u8], &[u8])) -> bool { + match range { + (start, b"") => key >= start, + (start, end) => key >= start && key < end, + } +} + +/// test whether two ranges overlapping. +/// end key is exclusive. +/// empty end key means infinity. +pub fn is_overlapping(range: (&[u8], &[u8]), range2: (&[u8], &[u8])) -> bool { + let (x1, y1) = range; + let (x2, y2) = range2; + match (x1, y1, x2, y2) { + // 1: |__________________| + // 2: |______________________| + (_, b"", _, b"") => true, + // 1: (x1)|__________________| + // 2: |_________________|(y2) + (x1, b"", _, y2) => x1 < y2, + // 1: |________________|(y1) + // 2: (x2)|_________________| + (_, y1, x2, b"") => x2 < y1, + // 1: (x1)|________|(y1) + // 2: (x2)|__________|(y2) + (x1, y1, x2, y2) => x2 < y1 && x1 < y2, + } +} + #[cfg(test)] mod test { use std::{ @@ -548,7 +581,58 @@ mod test { use futures::executor::block_on; - use crate::utils::{CallbackWaitGroup, SegmentMap}; + use crate::utils::{is_in_range, CallbackWaitGroup, SegmentMap}; + + #[test] + fn test_range_functions() { + #[derive(Debug)] + struct InRangeCase<'a> { + key: &'a [u8], + range: (&'a [u8], &'a [u8]), + expected: bool, + } + + let cases = [ + InRangeCase { + key: b"0001", + range: (b"0000", b"0002"), + expected: true, + }, + InRangeCase { + key: b"0003", + range: (b"0000", b"0002"), + expected: false, + }, + InRangeCase { + key: b"0002", + range: (b"0000", b"0002"), + expected: false, + }, + InRangeCase { + key: b"0000", + range: (b"0000", b"0002"), + expected: true, + }, + InRangeCase { + key: b"0018", + range: (b"0000", b""), + expected: true, + }, + InRangeCase { + key: b"0018", + range: (b"0019", b""), + expected: false, + }, + ]; + + for case in cases { + assert!( + is_in_range(case.key, case.range) == case.expected, + "case = {:?}", + case + ); + } + } #[test] fn test_segment_tree() { diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index fccd8a0626a..9ba59a181b2 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -600,8 +600,8 @@ mod test { use std::time::Duration; use backup_stream::{ - errors::Error, metadata::MetadataClient, GetCheckpointResult, RegionCheckpointOperation, - RegionSet, Task, + errors::Error, metadata::MetadataClient, router::TaskSelector, GetCheckpointResult, + RegionCheckpointOperation, RegionSet, Task, }; use tikv_util::{box_err, defer, info, HandyRwLock}; use txn_types::TimeStamp; @@ -718,7 +718,7 @@ mod test { endpoint .scheduler() .schedule(Task::FatalError( - "test_fatal_error".to_owned(), + TaskSelector::ByName("test_fatal_error".to_owned()), Box::new(Error::Other(box_err!("everything is alright"))), )) .unwrap(); @@ -845,4 +845,29 @@ mod test { keys.union(&keys2).map(|s| s.as_slice()), ); } + + #[test] + fn initial_scan_failure() { + defer! {{ + fail::remove("scan_and_async_send"); + }} + + let mut suite = SuiteBuilder::new_named("initial_scan_failure") + .nodes(1) + .build(); + let keys = run_async_test(suite.write_records(0, 128, 1)); + fail::cfg( + "scan_and_async_send", + "1*return(dive into the temporary dream, where the SLA never bothers)", + ) + .unwrap(); + suite.must_register_task(1, "initial_scan_failure"); + let keys2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("initial_scan_failure"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys.union(&keys2).map(|s| s.as_slice()), + ); + } } From 21f00d29c0ae5b0eca8562ce50a9b3bc0d8b9583 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Mon, 18 Jul 2022 10:37:07 +0800 Subject: [PATCH 0085/1149] raftstore: Implement coprocessor observer post_exec_admin(query) (#12850) ref tikv/tikv#12849 Support new observers post_exec_admin(query). Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/cdc/src/delegate.rs | 1 + .../raftstore/src/coprocessor/dispatcher.rs | 37 ++++- components/raftstore/src/coprocessor/mod.rs | 35 ++++- components/raftstore/src/store/fsm/apply.rs | 127 ++++++++++++++++-- components/resolved_ts/src/cmd.rs | 1 + components/resolved_ts/src/observer.rs | 2 +- .../gc_worker/applied_lock_collector.rs | 2 +- 7 files changed, 189 insertions(+), 16 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 55a551490ac..752c068e72a 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -441,6 +441,7 @@ impl Delegate { for cmd in batch.into_iter(self.region_id) { let Cmd { index, + term: _, mut request, mut response, } = cmd; diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 24b79bf4877..cd370e332e3 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -439,6 +439,37 @@ impl CoprocessorHost { } } + /// `post_exec` should be called immediately after we executed one raft command. + /// It notifies observers side effects of this command before execution of the next command, + /// including req/resp, apply state, modified region state, etc. + /// Return true observers think a persistence is necessary. + pub fn post_exec( + &self, + region: &Region, + cmd: &Cmd, + apply_state: &RaftApplyState, + region_state: &RegionState, + ) -> bool { + let mut ctx = ObserverContext::new(region); + if !cmd.response.has_admin_response() { + for observer in &self.registry.query_observers { + let observer = observer.observer.inner(); + if observer.post_exec_query(&mut ctx, cmd, apply_state, region_state) { + return true; + } + } + false + } else { + for observer in &self.registry.admin_observers { + let observer = observer.observer.inner(); + if observer.post_exec_admin(&mut ctx, cmd, apply_state, region_state) { + return true; + } + } + false + } + } + pub fn post_apply_plain_kvs_from_snapshot( &self, region: &Region, @@ -764,7 +795,7 @@ mod tests { assert_all!([&ob.called], &[3]); let mut admin_resp = RaftCmdResponse::default(); admin_resp.set_admin_response(AdminResponse::default()); - host.post_apply(®ion, &Cmd::new(0, admin_req, admin_resp)); + host.post_apply(®ion, &Cmd::new(0, 0, admin_req, admin_resp)); assert_all!([&ob.called], &[6]); let mut query_req = RaftCmdRequest::default(); @@ -774,7 +805,7 @@ mod tests { host.pre_apply(®ion, &query_req); assert_all!([&ob.called], &[15]); let query_resp = RaftCmdResponse::default(); - host.post_apply(®ion, &Cmd::new(0, query_req, query_resp)); + host.post_apply(®ion, &Cmd::new(0, 0, query_req, query_resp)); assert_all!([&ob.called], &[21]); host.on_role_change(®ion, RoleChange::new(StateRole::Leader)); @@ -853,7 +884,7 @@ mod tests { host.pre_apply(®ion, &req); assert_all!([&ob1.called, &ob2.called], &[0, base_score * 2 + 3]); - host.post_apply(®ion, &Cmd::new(0, req.clone(), resp.clone())); + host.post_apply(®ion, &Cmd::new(0, 0, req.clone(), resp.clone())); assert_all!([&ob1.called, &ob2.called], &[0, base_score * 3 + 6]); set_all!(&[&ob2.bypass], false); diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 2dc83c8d7af..b798c7577af 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -14,6 +14,7 @@ use kvproto::{ metapb::Region, pdpb::CheckPolicy, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, RaftCmdResponse, Request}, + raft_serverpb::RaftApplyState, }; use raft::{eraftpb, StateRole}; @@ -74,6 +75,12 @@ impl<'a> ObserverContext<'a> { } } +pub struct RegionState { + pub peer_id: u64, + pub pending_remove: bool, + pub modified_region: Option, +} + pub trait AdminObserver: Coprocessor { /// Hook to call before proposing admin request. fn pre_propose_admin(&self, _: &mut ObserverContext<'_>, _: &mut AdminRequest) -> Result<()> { @@ -91,6 +98,18 @@ pub trait AdminObserver: Coprocessor { fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, _: &AdminRequest) -> bool { false } + + /// Hook to call immediately after exec command + /// Will be a special persistence after this exec if a observer returns true. + fn post_exec_admin( + &self, + _: &mut ObserverContext<'_>, + _: &Cmd, + _: &RaftApplyState, + _: &RegionState, + ) -> bool { + false + } } pub trait QueryObserver: Coprocessor { @@ -115,6 +134,18 @@ pub trait QueryObserver: Coprocessor { fn pre_exec_query(&self, _: &mut ObserverContext<'_>, _: &[Request]) -> bool { false } + + /// Hook to call immediately after exec command. + /// Will be a special persistence after this exec if a observer returns true. + fn post_exec_query( + &self, + _: &mut ObserverContext<'_>, + _: &Cmd, + _: &RaftApplyState, + _: &RegionState, + ) -> bool { + false + } } pub trait ApplySnapshotObserver: Coprocessor { @@ -215,14 +246,16 @@ pub trait RegionChangeObserver: Coprocessor { #[derive(Clone, Debug, Default)] pub struct Cmd { pub index: u64, + pub term: u64, pub request: RaftCmdRequest, pub response: RaftCmdResponse, } impl Cmd { - pub fn new(index: u64, request: RaftCmdRequest, response: RaftCmdResponse) -> Cmd { + pub fn new(index: u64, term: u64, request: RaftCmdRequest, response: RaftCmdResponse) -> Cmd { Cmd { index, + term, request, response, } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index aa57676925c..7ce35f827c5 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -72,7 +72,9 @@ use self::memtrace::*; use super::metrics::*; use crate::{ bytes_capacity, - coprocessor::{Cmd, CmdBatch, CmdObserveInfo, CoprocessorHost, ObserveHandle, ObserveLevel}, + coprocessor::{ + Cmd, CmdBatch, CmdObserveInfo, CoprocessorHost, ObserveHandle, ObserveLevel, RegionState, + }, store::{ cmd_resp, fsm::RaftPollerBuilder, @@ -292,6 +294,7 @@ pub enum ExecResult { } /// The possible returned value when applying logs. +#[derive(Debug)] pub enum ApplyResult { None, Yield, @@ -974,10 +977,11 @@ where let expect_index = self.apply_state.get_applied_index() + 1; if expect_index != entry.get_index() { panic!( - "{} expect index {}, but got {}", + "{} expect index {}, but got {}, ctx {}", self.tag, expect_index, - entry.get_index() + entry.get_index(), + apply_ctx.tag, ); } @@ -1202,7 +1206,8 @@ where apply_ctx.sync_log_hint |= should_sync_log(&cmd); apply_ctx.host.pre_apply(&self.region, &cmd); - let (mut resp, exec_result) = self.apply_raft_cmd(apply_ctx, index, term, &cmd); + let (mut resp, exec_result, should_write) = + self.apply_raft_cmd(apply_ctx, index, term, &cmd); if let ApplyResult::WaitMergeSource(_) = exec_result { return exec_result; } @@ -1218,10 +1223,14 @@ where // store will call it after handing exec result. cmd_resp::bind_term(&mut resp, self.term); let cmd_cb = self.find_pending(index, term, is_conf_change_cmd(&cmd)); - let cmd = Cmd::new(index, cmd, resp); + let cmd = Cmd::new(index, term, cmd, resp); apply_ctx .applied_batch .push(cmd_cb, cmd, &self.observe_info, self.region_id()); + if should_write { + debug!("persist data and apply state"; "region_id" => self.region_id(), "peer_id" => self.id(), "state" => ?self.apply_state); + apply_ctx.commit(self); + } exec_result } @@ -1239,7 +1248,7 @@ where index: u64, term: u64, req: &RaftCmdRequest, - ) -> (RaftCmdResponse, ApplyResult) { + ) -> (RaftCmdResponse, ApplyResult, bool) { // if pending remove, apply should be aborted already. assert!(!self.pending_remove); @@ -1289,12 +1298,33 @@ where (resp, exec_result) }; if let ApplyResult::WaitMergeSource(_) = exec_result { - return (resp, exec_result); + return (resp, exec_result, false); } self.apply_state.set_applied_index(index); self.applied_index_term = term; + let cmd = Cmd::new(index, term, req.clone(), resp.clone()); + let should_write = ctx.host.post_exec( + &self.region, + &cmd, + &self.apply_state, + &RegionState { + peer_id: self.id(), + pending_remove: self.pending_remove, + modified_region: match exec_result { + ApplyResult::Res(ref e) => match e { + ExecResult::SplitRegion { ref derived, .. } => Some(derived.clone()), + ExecResult::PrepareMerge { ref region, .. } => Some(region.clone()), + ExecResult::CommitMerge { ref region, .. } => Some(region.clone()), + ExecResult::RollbackMerge { ref region, .. } => Some(region.clone()), + _ => None, + }, + _ => None, + }, + }, + ); + if let ApplyResult::Res(ref exec_result) = exec_result { match *exec_result { ExecResult::ChangePeer(ref cp) => { @@ -1345,7 +1375,7 @@ where } } - (resp, exec_result) + (resp, exec_result, should_write) } fn destroy(&mut self, apply_ctx: &mut ApplyContext) { @@ -4859,6 +4889,14 @@ mod tests { self } + fn prepare_merge(mut self, target: metapb::Region) -> EntryBuilder { + let mut request = AdminRequest::default(); + request.set_cmd_type(AdminCmdType::PrepareMerge); + request.mut_prepare_merge().set_target(target); + self.req.set_admin_request(request); + self + } + fn compact_log(mut self, index: u64, term: u64) -> EntryBuilder { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::CompactLog); @@ -4905,6 +4943,27 @@ mod tests { } impl AdminObserver for ApplyObserver { + fn post_exec_admin( + &self, + _: &mut ObserverContext<'_>, + cmd: &Cmd, + _: &RaftApplyState, + region_state: &RegionState, + ) -> bool { + let request = cmd.request.get_admin_request(); + match request.get_cmd_type() { + AdminCmdType::CompactLog => true, + AdminCmdType::CommitMerge + | AdminCmdType::PrepareMerge + | AdminCmdType::RollbackMerge => { + assert!(region_state.modified_region.is_some()); + true + } + AdminCmdType::BatchSplit => true, + _ => false, + } + } + fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, req: &AdminRequest) -> bool { let cmd_type = req.get_cmd_type(); if cmd_type == AdminCmdType::CompactLog @@ -5577,7 +5636,7 @@ mod tests { region_scheduler, coprocessor_host: host, importer, - engine, + engine: engine.clone(), router: router.clone(), store_id: 1, pending_create_peers, @@ -5597,13 +5656,16 @@ mod tests { router.schedule_task(1, Msg::Registration(reg)); let mut index_id = 1; - let put_entry = EntryBuilder::new(1, 1) + let put_entry = EntryBuilder::new(index_id, 1) .put(b"k1", b"v1") + .put(b"k2", b"v2") + .put(b"k3", b"v3") .epoch(1, 3) .build(); router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![put_entry], vec![]))); fetch_apply_res(&rx); + // Phase 1: we test if pre_exec will filter execution of commands correctly. index_id += 1; let compact_entry = EntryBuilder::new(index_id, 1) .compact_log(index_id - 1, 2) @@ -5660,6 +5722,51 @@ mod tests { assert_eq!(apply_res.exec_res.len(), 0); obs.filter_consistency_check.store(false, Ordering::SeqCst); + // Phase 2: we test if post_exec will persist when need. + // We choose BatchSplit in order to make sure `modified_region` is filled. + index_id += 1; + let mut splits = BatchSplitRequest::default(); + splits.set_right_derive(true); + splits.mut_requests().push(new_split_req(b"k2", 8, vec![7])); + let split = EntryBuilder::new(index_id, 1) + .split(splits) + .epoch(1, 3) + .build(); + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![split], vec![]))); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.apply_state.get_applied_index(), index_id); + assert_eq!(apply_res.applied_index_term, 1); + let (_, r8) = if let ExecResult::SplitRegion { + regions, + derived: _, + new_split_regions: _, + } = apply_res.exec_res.front().unwrap() + { + let r8 = regions.get(0).unwrap(); + let r1 = regions.get(1).unwrap(); + assert_eq!(r8.get_id(), 8); + assert_eq!(r1.get_id(), 1); + (r1, r8) + } else { + panic!("error split exec_res"); + }; + + index_id += 1; + let merge = EntryBuilder::new(index_id, 1) + .prepare_merge(r8.clone()) + .epoch(1, 3) + .build(); + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![merge], vec![]))); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.apply_state.get_applied_index(), index_id); + assert_eq!(apply_res.applied_index_term, 1); + // PrepareMerge will trigger commit. + let state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap_or_default(); + assert_eq!(apply_res.apply_state, state); + system.shutdown(); } diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 8d1cd6e2a90..f561aa07e28 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -49,6 +49,7 @@ impl ChangeLog { .map(|cmd| { let Cmd { index, + term: _, mut request, mut response, } = cmd; diff --git a/components/resolved_ts/src/observer.rs b/components/resolved_ts/src/observer.rs index 483649c36e7..c9decaadc77 100644 --- a/components/resolved_ts/src/observer.rs +++ b/components/resolved_ts/src/observer.rs @@ -185,7 +185,7 @@ mod test { put_cf(CF_WRITE, b"k7", b"v"), put_cf(CF_WRITE, b"k8", b"v"), ]; - let mut cmd = Cmd::new(0, RaftCmdRequest::default(), RaftCmdResponse::default()); + let mut cmd = Cmd::new(0, 0, RaftCmdRequest::default(), RaftCmdResponse::default()); cmd.request.mut_requests().clear(); for put in &data { cmd.request.mut_requests().push(put.clone()); diff --git a/src/server/gc_worker/applied_lock_collector.rs b/src/server/gc_worker/applied_lock_collector.rs index 009b7fbf76c..9c30afc350b 100644 --- a/src/server/gc_worker/applied_lock_collector.rs +++ b/src/server/gc_worker/applied_lock_collector.rs @@ -541,7 +541,7 @@ mod tests { fn make_raft_cmd(requests: Vec) -> Cmd { let mut req = RaftCmdRequest::default(); req.set_requests(requests.into()); - Cmd::new(0, req, RaftCmdResponse::default()) + Cmd::new(0, 0, req, RaftCmdResponse::default()) } fn new_test_collector() -> (AppliedLockCollector, CoprocessorHost) { From 48c7c8fa9e222295d79caa926f8d3e9eb89e9310 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Mon, 18 Jul 2022 10:51:06 +0800 Subject: [PATCH 0086/1149] raftstore: Avoid printing error log in case sending CaptureChange message failed (#12995) close tikv/tikv#12996 Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/msg.rs | 11 +++++++++++ components/raftstore/src/store/transport.rs | 10 ++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 4f1ea017764..e3820a6d3ee 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -635,6 +635,17 @@ impl fmt::Debug for PeerMsg { } } +impl PeerMsg { + /// For some specific kind of messages, it's actually acceptable if failed to send it by + /// `significant_send`. This function determine if the current message is acceptable to fail. + pub fn is_send_failure_ignorable(&self) -> bool { + matches!( + self, + PeerMsg::SignificantMsg(SignificantMsg::CaptureChange { .. }) + ) + } +} + pub enum StoreMsg where EK: KvEngine, diff --git a/components/raftstore/src/store/transport.rs b/components/raftstore/src/store/transport.rs index 586b80ed6e5..f64fbae037e 100644 --- a/components/raftstore/src/store/transport.rs +++ b/components/raftstore/src/store/transport.rs @@ -6,7 +6,7 @@ use std::sync::mpsc; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, Snapshot}; use kvproto::raft_serverpb::RaftMessage; -use tikv_util::error; +use tikv_util::{error, warn}; use crate::{ store::{CasualMessage, PeerMsg, RaftCommand, RaftRouter, SignificantMsg, StoreMsg}, @@ -90,7 +90,13 @@ where .force_send(region_id, PeerMsg::SignificantMsg(msg)) { // TODO: panic here once we can detect system is shutting down reliably. - error!("failed to send significant msg"; "msg" => ?msg); + + // Avoid printing error log if it's not a severe problem failing to send it. + if msg.is_send_failure_ignorable() { + warn!("failed to send significant msg"; "msg" => ?msg); + } else { + error!("failed to send significant msg"; "msg" => ?msg); + } return Err(Error::RegionNotFound(region_id)); } From 08f4674a798a23815798c3effe948bca006314ab Mon Sep 17 00:00:00 2001 From: Jarvis Date: Mon, 18 Jul 2022 14:27:07 +0800 Subject: [PATCH 0087/1149] Sm4 support (#12927) ref tikv/tikv#299, ref tikv/tikv#302, ref tikv/tikv#706, ref tikv/tikv#930, ref tikv/tikv#962, ref tikv/tikv#1656, close tikv/tikv#13041 Add SM4 encryption algorithm. Signed-off-by: Jarvis Zheng Co-authored-by: Xinye Tao --- Cargo.lock | 28 +++++++++++++++------- components/encryption/src/config.rs | 3 +++ components/encryption/src/crypter.rs | 3 +++ components/encryption/src/io.rs | 5 ++++ components/engine_rocks/src/encryption.rs | 1 + components/engine_traits/src/encryption.rs | 1 + etc/config-template.toml | 5 ++-- 7 files changed, 36 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcf1eb84937..5ddd904e637 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2618,7 +2618,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#a5d4ffd2ba337dad0bc99e9fb53bf665864a3f3b" +source = "git+https://github.com/pingcap/kvproto.git#d88fa382391ec305e879be7635e39beae6a19890" dependencies = [ "futures 0.3.15", "grpcio", @@ -2747,7 +2747,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c8878e2df0c7c23d553d345d337d9dda332e2d5a" +source = "git+https://github.com/tikv/rust-rocksdb.git#2e00e78b945194e8a672e8e078b6c73956e9ace0" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2766,7 +2766,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#c8878e2df0c7c23d553d345d337d9dda332e2d5a" +source = "git+https://github.com/tikv/rust-rocksdb.git#2e00e78b945194e8a672e8e078b6c73956e9ace0" dependencies = [ "bzip2-sys", "cc", @@ -3402,18 +3402,30 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openssl" -version = "0.10.38" +version = "0.10.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95" +checksum = "618febf65336490dfcf20b73f885f5651a0c89c64c2d4a8c3662585a70bf5bd0" dependencies = [ "bitflags", "cfg-if 1.0.0", "foreign-types", "libc 0.2.125", "once_cell", + "openssl-macros", "openssl-sys", ] +[[package]] +name = "openssl-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "openssl-probe" version = "0.1.2" @@ -3431,9 +3443,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.72" +version = "0.9.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb" +checksum = "e5f9bd0c2710541a3cda73d6f9ac4f1b240de4ae261065d309dbe73d9dceb42f" dependencies = [ "autocfg", "cc", @@ -4571,7 +4583,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c8878e2df0c7c23d553d345d337d9dda332e2d5a" +source = "git+https://github.com/tikv/rust-rocksdb.git#2e00e78b945194e8a672e8e078b6c73956e9ace0" dependencies = [ "libc 0.2.125", "librocksdb_sys", diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 8cb779f1cdc..4f83a72855f 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -111,6 +111,7 @@ mod encryption_method_serde { const AES128_CTR: &str = "aes128-ctr"; const AES192_CTR: &str = "aes192-ctr"; const AES256_CTR: &str = "aes256-ctr"; + const SM4_CTR: &str = "sm4-ctr"; #[allow(clippy::trivially_copy_pass_by_ref)] pub fn serialize(method: &EncryptionMethod, serializer: S) -> Result @@ -123,6 +124,7 @@ mod encryption_method_serde { EncryptionMethod::Aes128Ctr => serializer.serialize_str(AES128_CTR), EncryptionMethod::Aes192Ctr => serializer.serialize_str(AES192_CTR), EncryptionMethod::Aes256Ctr => serializer.serialize_str(AES256_CTR), + EncryptionMethod::Sm4Ctr => serializer.serialize_str(SM4_CTR), } } @@ -149,6 +151,7 @@ mod encryption_method_serde { AES128_CTR => Ok(EncryptionMethod::Aes128Ctr), AES192_CTR => Ok(EncryptionMethod::Aes192Ctr), AES256_CTR => Ok(EncryptionMethod::Aes256Ctr), + SM4_CTR => Ok(EncryptionMethod::Sm4Ctr), _ => Err(E::invalid_value(Unexpected::Str(value), &self)), } } diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index 9c148e62247..f869817de2b 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -16,6 +16,7 @@ pub fn encryption_method_to_db_encryption_method(method: EncryptionMethod) -> DB EncryptionMethod::Aes128Ctr => DBEncryptionMethod::Aes128Ctr, EncryptionMethod::Aes192Ctr => DBEncryptionMethod::Aes192Ctr, EncryptionMethod::Aes256Ctr => DBEncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr => DBEncryptionMethod::Sm4Ctr, EncryptionMethod::Unknown => DBEncryptionMethod::Unknown, } } @@ -26,6 +27,7 @@ pub fn encryption_method_from_db_encryption_method(method: DBEncryptionMethod) - DBEncryptionMethod::Aes128Ctr => EncryptionMethod::Aes128Ctr, DBEncryptionMethod::Aes192Ctr => EncryptionMethod::Aes192Ctr, DBEncryptionMethod::Aes256Ctr => EncryptionMethod::Aes256Ctr, + DBEncryptionMethod::Sm4Ctr => EncryptionMethod::Sm4Ctr, DBEncryptionMethod::Unknown => EncryptionMethod::Unknown, } } @@ -40,6 +42,7 @@ pub fn get_method_key_length(method: EncryptionMethod) -> usize { EncryptionMethod::Aes128Ctr => 16, EncryptionMethod::Aes192Ctr => 24, EncryptionMethod::Aes256Ctr => 32, + EncryptionMethod::Sm4Ctr => 16, unknown => panic!("bad EncryptionMethod {:?}", unknown), } } diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index 6f7d28f61b8..d62542cb16a 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -377,6 +377,7 @@ pub fn create_aes_ctr_crypter( EncryptionMethod::Aes128Ctr => OCipher::aes_128_ctr(), EncryptionMethod::Aes192Ctr => OCipher::aes_192_ctr(), EncryptionMethod::Aes256Ctr => OCipher::aes_256_ctr(), + EncryptionMethod::Sm4Ctr => OCipher::sm4_ctr(), }; let crypter = OCrypter::new(cipher, mode, key, Some(iv.as_slice()))?; Ok((cipher, crypter)) @@ -525,6 +526,7 @@ mod tests { EncryptionMethod::Aes128Ctr, EncryptionMethod::Aes192Ctr, EncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr, ]; let ivs = [ Iv::new_ctr(), @@ -593,6 +595,7 @@ mod tests { EncryptionMethod::Aes128Ctr, EncryptionMethod::Aes192Ctr, EncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr, ]; let mut plaintext = vec![0; 10240]; OsRng.fill_bytes(&mut plaintext); @@ -628,6 +631,7 @@ mod tests { EncryptionMethod::Aes128Ctr, EncryptionMethod::Aes192Ctr, EncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr, ]; let mut plaintext = vec![0; 10240]; OsRng.fill_bytes(&mut plaintext); @@ -700,6 +704,7 @@ mod tests { EncryptionMethod::Aes128Ctr, EncryptionMethod::Aes192Ctr, EncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr, ]; let iv = Iv::new_ctr(); let mut plain_text = vec![0; 10240]; diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index a8ec54673b3..94c13e811a9 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -64,6 +64,7 @@ fn convert_encryption_method(input: EncryptionMethod) -> DBEncryptionMethod { EncryptionMethod::Aes128Ctr => DBEncryptionMethod::Aes128Ctr, EncryptionMethod::Aes192Ctr => DBEncryptionMethod::Aes192Ctr, EncryptionMethod::Aes256Ctr => DBEncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr => DBEncryptionMethod::Sm4Ctr, EncryptionMethod::Unknown => DBEncryptionMethod::Unknown, } } diff --git a/components/engine_traits/src/encryption.rs b/components/engine_traits/src/encryption.rs index 51b19c05907..41a0f97fb36 100644 --- a/components/engine_traits/src/encryption.rs +++ b/components/engine_traits/src/encryption.rs @@ -53,4 +53,5 @@ pub enum EncryptionMethod { Aes128Ctr = 2, Aes192Ctr = 3, Aes256Ctr = 4, + Sm4Ctr = 5, } diff --git a/etc/config-template.toml b/etc/config-template.toml index b63fe2ce235..a19533b7847 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -1101,8 +1101,9 @@ ## Configurations for encryption at rest. Experimental. [security.encryption] ## Encryption method to use for data files. -## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than -## "plaintext" means encryption is enabled, in which case master key must be specified. +## Possible values are "plaintext", "aes128-ctr", "aes192-ctr", "aes256-ctr" and "sm4-ctr". +## Value other than "plaintext" means encryption is enabled, in which case +## master key must be specified. # data-encryption-method = "plaintext" ## Specifies how often TiKV rotates data encryption key. From 7dc2e017b407538b1a3ce19f0345c42d712c51dc Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:43:07 +0800 Subject: [PATCH 0088/1149] log-backup: update global-checkpoint to storage periodically (#13035) ref tikv/tikv#1, ref tikv/tikv#12895 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 33 +++++++ components/backup-stream/src/router.rs | 117 ++++++++++++++++++++++- 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 958df7286a7..51e04023d60 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -801,6 +801,32 @@ where })); } + fn on_update_global_checkpoint(&self, task: String) { + self.pool.block_on(async move { + let ts = self.meta_client.global_progress_of_task(&task).await; + match ts { + Ok(global_checkpoint) => { + if let Err(e) = self + .range_router + .update_global_checkpoint(&task, global_checkpoint, self.store_id) + .await + { + warn!("backup stream failed to update global checkpoint."; + "task" => ?task, + "err" => ?e + ); + } + } + Err(e) => { + warn!("backup stream failed to get global checkpoint."; + "task" => ?task, + "err" => ?e + ); + } + } + }); + } + /// Modify observe over some region. /// This would register the region to the RaftStore. pub fn on_modify_observe(&self, op: ObserveOp) { @@ -839,6 +865,7 @@ where Task::MarkFailover(t) => self.failover_time = Some(t), Task::FlushWithMinTs(task, min_ts) => self.on_flush_with_min_ts(task, min_ts), Task::RegionCheckpointsOp(s) => self.handle_region_checkpoints_op(s), + Task::UpdateGlobalCheckpoint(task) => self.on_update_global_checkpoint(task), } } @@ -958,6 +985,8 @@ pub enum Task { FlushWithMinTs(String, TimeStamp), /// The command for getting region checkpoints. RegionCheckpointsOp(RegionCheckpointOperation), + /// update global-checkpoint-ts to storage. + UpdateGlobalCheckpoint(String), } #[derive(Debug)] @@ -1054,6 +1083,9 @@ impl fmt::Debug for Task { .field(arg1) .finish(), Self::RegionCheckpointsOp(s) => f.debug_tuple("GetRegionCheckpoints").field(s).finish(), + Self::UpdateGlobalCheckpoint(task) => { + f.debug_tuple("UpdateGlobalCheckpoint").field(task).finish() + } } } } @@ -1090,6 +1122,7 @@ impl Task { Task::MarkFailover(_) => "mark_failover", Task::FlushWithMinTs(..) => "flush_with_min_ts", Task::RegionCheckpointsOp(..) => "get_checkpoints", + Task::UpdateGlobalCheckpoint(..) => "update_global_checkpoint", } } } diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 7a2c895edb2..9812d4ed95f 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -8,7 +8,7 @@ use std::{ path::{Path, PathBuf}, result, sync::{ - atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}, + atomic::{AtomicBool, AtomicPtr, AtomicU64, AtomicUsize, Ordering}, Arc, RwLock as SyncRwLock, }, time::Duration, @@ -564,9 +564,28 @@ impl RouterInner { } } + pub async fn update_global_checkpoint( + &self, + task_name: &str, + global_checkpoint: u64, + store_id: u64, + ) -> Result<()> { + let t = self.get_task_info(task_name).await?; + t.update_global_checkpoint(global_checkpoint, store_id) + .await?; + Ok(()) + } + /// tick aims to flush log/meta to extern storage periodically. pub async fn tick(&self) { for (name, task_info) in self.tasks.lock().await.iter() { + if let Err(e) = self + .scheduler + .schedule(Task::UpdateGlobalCheckpoint(name.to_string())) + { + error!("backup stream schedule task failed"; "error" => ?e); + } + // if stream task need flush this time, schedule Task::Flush, or update time justly. if task_info.should_flush() && task_info.set_flushing_status_cas(false, true).is_ok() { info!( @@ -745,6 +764,8 @@ pub struct StreamTaskInfo { flushing: AtomicBool, /// This counts how many times this task has failed to flush. flush_fail_count: AtomicUsize, + /// global checkpoint ts for this task. + global_checkpoint_ts: AtomicU64, } impl Drop for StreamTaskInfo { @@ -786,6 +807,7 @@ impl StreamTaskInfo { task.info.get_storage(), BackendConfig::default(), )?); + let start_ts = task.info.get_start_ts(); Ok(Self { task, storage, @@ -799,6 +821,7 @@ impl StreamTaskInfo { total_size: AtomicUsize::new(0), flushing: AtomicBool::new(false), flush_fail_count: AtomicUsize::new(0), + global_checkpoint_ts: AtomicU64::new(start_ts), }) } @@ -1074,6 +1097,42 @@ impl StreamTaskInfo { result } + + pub async fn flush_global_checkpoint(&self, store_id: u64) -> Result<()> { + let filename = format!("v1/global_checkpoint/{}.ts", store_id); + let buff = self + .global_checkpoint_ts + .load(Ordering::SeqCst) + .to_le_bytes(); + self.storage + .write( + &filename, + UnpinReader(Box::new(Cursor::new(buff))), + buff.len() as _, + ) + .await?; + Ok(()) + } + + pub async fn update_global_checkpoint( + &self, + global_checkpoint: u64, + store_id: u64, + ) -> Result<()> { + let last_global_checkpoint = self.global_checkpoint_ts.load(Ordering::SeqCst); + if last_global_checkpoint < global_checkpoint { + let r = self.global_checkpoint_ts.compare_exchange( + last_global_checkpoint, + global_checkpoint, + Ordering::SeqCst, + Ordering::SeqCst, + ); + if r.is_ok() { + self.flush_global_checkpoint(store_id).await?; + } + } + Ok(()) + } } /// A opened log file with some metadata. @@ -1930,4 +1989,60 @@ mod tests { run(case) } } + + #[tokio::test] + async fn test_update_global_checkpoint() { + // create local storage + let tmp_dir = tempfile::tempdir().unwrap(); + let backend = external_storage_export::make_local_backend(tmp_dir.path()); + + // build a StreamTaskInfo + let mut task_info = StreamBackupTaskInfo::default(); + task_info.set_storage(backend); + let stream_task = StreamTask { + info: task_info, + is_paused: false, + }; + let task = StreamTaskInfo::new( + tmp_dir.path().to_path_buf(), + stream_task, + Duration::from_secs(300), + vec![(vec![], vec![])], + ) + .await + .unwrap(); + task.global_checkpoint_ts.store(10001, Ordering::SeqCst); + + // test no need to update global checkpoint + let store_id = 3; + let mut global_checkpoint = 10000; + let r = task + .update_global_checkpoint(global_checkpoint, store_id) + .await; + assert_eq!(r.is_ok(), true); + assert_eq!(task.global_checkpoint_ts.load(Ordering::SeqCst), 10001); + + // test update global checkpoint + global_checkpoint = 10002; + let r = task + .update_global_checkpoint(global_checkpoint, store_id) + .await; + assert_eq!(r.is_ok(), true); + assert_eq!( + task.global_checkpoint_ts.load(Ordering::SeqCst), + global_checkpoint + ); + + let filename = format!("v1/global_checkpoint/{}.ts", store_id); + let filepath = tmp_dir.as_ref().join(filename); + let exist = file_system::file_exists(filepath.clone()); + assert_eq!(exist, true); + + let buff = file_system::read(filepath).unwrap(); + assert_eq!(buff.len(), 8); + let mut ts = [b'0'; 8]; + ts.copy_from_slice(&buff); + let ts = u64::from_le_bytes(ts); + assert_eq!(ts, global_checkpoint); + } } From 9b1f195af8a14740ab611d630da2ab66ce105089 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Mon, 18 Jul 2022 17:39:08 +0800 Subject: [PATCH 0089/1149] raftstore: pub `check_sst_for_ingestion` (#13040) ref tikv/tikv#12849 pub `check_sst_for_ingestion` Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/apply.rs | 5 ++++- components/raftstore/src/store/fsm/mod.rs | 8 ++++---- components/raftstore/src/store/mod.rs | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 7ce35f827c5..16e039dd640 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2855,7 +2855,10 @@ pub fn is_conf_change_cmd(msg: &RaftCmdRequest) -> bool { req.has_change_peer() || req.has_change_peer_v2() } -fn check_sst_for_ingestion(sst: &SstMeta, region: &Region) -> Result<()> { +/// This function is used to check whether an sst is valid for ingestion. +/// +/// The `sst` must have epoch and range matched with `region`. +pub fn check_sst_for_ingestion(sst: &SstMeta, region: &Region) -> Result<()> { let uuid = sst.get_uuid(); if let Err(e) = UuidBuilder::from_slice(uuid) { return Err(box_err!("invalid uuid {:?}: {:?}", uuid, e)); diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index 731ad5209b4..7aa93867158 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -11,10 +11,10 @@ pub mod store; pub use self::{ apply::{ - create_apply_batch_system, Apply, ApplyBatchSystem, ApplyMetrics, ApplyRes, ApplyRouter, - Builder as ApplyPollerBuilder, CatchUpLogs, ChangeObserver, ChangePeer, ExecResult, - GenSnapTask, Msg as ApplyTask, Notifier as ApplyNotifier, Proposal, Registration, - TaskRes as ApplyTaskRes, + check_sst_for_ingestion, create_apply_batch_system, Apply, ApplyBatchSystem, ApplyMetrics, + ApplyRes, ApplyRouter, Builder as ApplyPollerBuilder, CatchUpLogs, ChangeObserver, + ChangePeer, ExecResult, GenSnapTask, Msg as ApplyTask, Notifier as ApplyNotifier, Proposal, + Registration, TaskRes as ApplyTaskRes, }, peer::{DestroyPeerJob, PeerFsm}, store::{ diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index b1b8da54e2b..64c70bbc2e7 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -33,7 +33,7 @@ pub use self::{ }, compaction_guard::CompactionGuardGeneratorFactory, config::Config, - fsm::{DestroyPeerJob, RaftRouter, StoreInfo}, + fsm::{check_sst_for_ingestion, DestroyPeerJob, RaftRouter, StoreInfo}, hibernate_state::{GroupState, HibernateState}, memory::*, metrics::RAFT_ENTRY_FETCHES_VEC, From 190f4634872ae4f78d7f8a51a7450176f20d41f8 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Mon, 18 Jul 2022 21:31:07 +0800 Subject: [PATCH 0090/1149] log-backup: fix the missing sha256 calculation in a flush retry (#13033) ref tikv/tikv#208, close tikv/tikv#13034 Signed-off-by: 3pointer --- components/backup-stream/src/router.rs | 59 ++++++++++++++++++-------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 9812d4ed95f..1ad4c4ad4ca 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -747,7 +747,7 @@ pub struct StreamTaskInfo { /// The temporary file index. Both meta (m prefixed keys) and data (t prefixed keys). files: SlotMap, /// flushing_files contains files pending flush. - flushing_files: RwLock)>>, + flushing_files: RwLock, DataFileInfo)>>, /// last_flush_ts represents last time this task flushed to storage. last_flush_time: AtomicPtr, /// flush_interval represents the tick interval of flush, setting by users. @@ -774,6 +774,7 @@ impl Drop for StreamTaskInfo { .flushing_files .get_mut() .drain(..) + .map(|(a, b, _)| (a, b)) .chain(self.files.get_mut().drain()) .map(|(_, f)| f.into_inner().local_path) .map(std::fs::remove_file) @@ -880,7 +881,7 @@ impl StreamTaskInfo { pub async fn generate_metadata(&self, store_id: u64) -> Result { let w = self.flushing_files.read().await; // Let's flush all files first... - futures::future::join_all(w.iter().map(|(_, f)| async move { + futures::future::join_all(w.iter().map(|(_, f, _)| async move { let file = &mut f.lock().await.inner; file.flush().await?; file.get_ref().sync_all().await?; @@ -893,10 +894,8 @@ impl StreamTaskInfo { let mut metadata = MetadataInfo::with_capacity(w.len()); metadata.set_store_id(store_id); - for (file_key, data_file) in w.iter() { - let mut data_file = data_file.lock().await; - let file_meta = data_file.generate_metadata(file_key, store_id)?; - metadata.push(file_meta) + for (_, _, file_meta) in w.iter() { + metadata.push(file_meta.to_owned()) } Ok(metadata) } @@ -930,22 +929,27 @@ impl StreamTaskInfo { } /// move need-flushing files to flushing_files. - pub async fn move_to_flushing_files(&self) -> &Self { + pub async fn move_to_flushing_files(&self, store_id: u64) -> Result<&Self> { // if flushing_files is not empty, which represents this flush is a retry operation. if !self.flushing_files.read().await.is_empty() { - return self; + return Ok(self); } let mut w = self.files.write().await; let mut fw = self.flushing_files.write().await; for (k, v) in w.drain() { - fw.push((k, v)); + // we should generate file metadata(calculate sha256) when moving file. + // because sha256 calculation is a unsafe move operation. + // we cannot re-calculate it in retry. + // TODO refactor move_to_flushing_files and generate_metadata + let file_meta = v.lock().await.generate_metadata(&k, store_id)?; + fw.push((k, v, file_meta)); } - self + Ok(self) } pub async fn clear_flushing_files(&self) { - for (_, v) in self.flushing_files.write().await.drain(..) { + for (_, v, _) in self.flushing_files.write().await.drain(..) { let data_file = v.lock().await; debug!("removing data file"; "size" => %data_file.file_size, "name" => %data_file.local_path.display()); self.total_size @@ -1000,7 +1004,7 @@ impl StreamTaskInfo { for batch_files in files.chunks(FLUSH_LOG_CONCURRENT_BATCH_COUNT) { let futs = batch_files .iter() - .map(|(_, v)| Self::flush_log_file_to(storage.clone(), v)); + .map(|(_, v, _)| Self::flush_log_file_to(storage.clone(), v)); futures::future::try_join_all(futs).await?; } @@ -1046,8 +1050,8 @@ impl StreamTaskInfo { // generate meta data and prepare to flush to storage let mut metadata_info = self - .move_to_flushing_files() - .await + .move_to_flushing_files(store_id) + .await? .generate_metadata(store_id) .await?; metadata_info.min_resolved_ts = metadata_info @@ -1580,8 +1584,8 @@ mod tests { let end_ts = TimeStamp::physical_now(); let files = router.tasks.lock().await.get("dummy").unwrap().clone(); let meta = files - .move_to_flushing_files() - .await + .move_to_flushing_files(1) + .await? .generate_metadata(1) .await?; assert_eq!(meta.files.len(), 3, "test file len = {}", meta.files.len()); @@ -1596,6 +1600,25 @@ mod tests { start_ts, end_ts ); + + // in some case when flush failed to write files to storage. + // we may run `generate_metadata` again with same files. + let another_meta = files + .move_to_flushing_files(1) + .await? + .generate_metadata(1) + .await?; + + assert_eq!(meta.files.len(), another_meta.files.len()); + for i in 0..meta.files.len() { + let file1 = meta.files.get(i).unwrap(); + let file2 = another_meta.files.get(i).unwrap(); + // we have to make sure two times sha256 of file must be the same. + assert_eq!(file1.sha256, file2.sha256); + assert_eq!(file1.start_key, file2.start_key); + assert_eq!(file1.end_key, file2.end_key); + } + files.flush_log().await?; files.flush_meta(meta).await?; files.clear_flushing_files().await; @@ -1840,8 +1863,8 @@ mod tests { router .get_task_info("cleanup_test") .await? - .move_to_flushing_files() - .await; + .move_to_flushing_files(1) + .await?; write_simple_data(&router).await; let mut w = walkdir::WalkDir::new(&tmp).into_iter(); assert!(w.next().is_some(), "the temp files doesn't created"); From dfb8559444e85c6eafe22435e22c98f28bb40436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 19 Jul 2022 10:33:07 +0800 Subject: [PATCH 0091/1149] log-backup: added some new metrics for log backup (#13048) ref tikv/tikv#12534 Added the advancer metrics. Signed-off-by: Yu Juncen Co-authored-by: zhangjinpeng1987 --- metrics/grafana/tikv_details.json | 1373 ++++++++++++++++++++++++++--- 1 file changed, 1249 insertions(+), 124 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b07aff345a7..adb398824ca 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -40123,7 +40123,7 @@ "h": 1, "w": 24, "x": 0, - "y": 49 + "y": 54 }, "id": 13016, "panels": [ @@ -40177,7 +40177,7 @@ "h": 4, "w": 5, "x": 0, - "y": 50 + "y": 55 }, "id": 14361, "options": { @@ -40195,7 +40195,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -40239,7 +40239,7 @@ "h": 8, "w": 8, "x": 5, - "y": 50 + "y": 55 }, "id": 14507, "options": { @@ -40257,7 +40257,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -40300,7 +40300,7 @@ "h": 8, "w": 8, "x": 13, - "y": 50 + "y": 55 }, "id": 14363, "options": { @@ -40318,11 +40318,11 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "increase(tikv_stream_flush_file_size_count{instance=~\"$instance\"}[30m])", + "expr": "round(increase(tikv_stream_flush_file_size_count{instance=~\"$instance\"}[30m]))", "instant": true, "interval": "", "legendFormat": "{{ instance }}", @@ -40361,7 +40361,7 @@ "h": 2, "w": 3, "x": 21, - "y": 50 + "y": 55 }, "id": 14508, "options": { @@ -40379,7 +40379,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -40422,7 +40422,7 @@ "h": 3, "w": 3, "x": 21, - "y": 52 + "y": 57 }, "id": 14362, "options": { @@ -40440,7 +40440,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -40517,9 +40517,9 @@ }, "gridPos": { "h": 4, - "w": 5, + "w": 2, "x": 0, - "y": 54 + "y": 59 }, "id": 14907, "options": { @@ -40537,7 +40537,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -40551,10 +40551,70 @@ ], "timeFrom": null, "timeShift": null, - "title": "Log Backup Task Status", + "title": "Task Status", "transformations": [], "type": "stat" }, + { + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 2, + "y": 59 + }, + "id": 15361, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "name" + }, + "pluginVersion": "7.5.11", + "targets": [ + { + "exemplar": true, + "expr": "tidb_log_backup_advancer_owner > 0", + "instant": true, + "interval": "", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Advancer Owner", + "type": "stat" + }, { "datasource": "${DS_TEST-CLUSTER}", "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", @@ -40581,7 +40641,7 @@ "h": 3, "w": 3, "x": 21, - "y": 55 + "y": 60 }, "id": 14911, "options": { @@ -40599,11 +40659,11 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_flush_file_size_count{instance=~\"$instance\"}[30m]))", + "expr": "round(sum(increase(tikv_stream_flush_file_size_count{instance=~\"$instance\"}[30m])))", "hide": false, "instant": true, "interval": "", @@ -40637,7 +40697,7 @@ "h": 10, "w": 6, "x": 0, - "y": 58 + "y": 63 }, "hiddenSeries": false, "id": 13262, @@ -40664,7 +40724,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -40675,7 +40735,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log_backup_scan(_[0-9]+)?\"}[2m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", "format": "time_series", "hide": false, "interval": "", @@ -40747,7 +40807,7 @@ "h": 10, "w": 6, "x": 6, - "y": 58 + "y": 63 }, "hiddenSeries": false, "id": 12843, @@ -40769,7 +40829,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -40849,7 +40909,7 @@ "h": 10, "w": 6, "x": 12, - "y": 58 + "y": 63 }, "hiddenSeries": false, "id": 14135, @@ -40870,7 +40930,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -40933,65 +40993,93 @@ } }, { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 600000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Checkpoint Lag Too Huge", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 300000 - }, - { - "color": "red", - "value": 900000 - } - ] - }, "unit": "ms" }, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 10, "w": 6, "x": 18, - "y": 58 + "y": 63 }, + "hiddenSeries": false, "id": 14774, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "value_and_name" + "alertThreshold": true }, - "pluginVersion": "7.5.7", + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "time() * 1000 - tikv_stream_store_checkpoint_ts{instance=~\"$instance\"} / 262144 > 300000", - "instant": true, + "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", + "instant": false, "interval": "", - "legendFormat": "{{ task }}@{{ instance }}", + "legendFormat": "{{ task }}", "refId": "A" }, { @@ -41003,10 +41091,57 @@ "refId": "B" } ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 600000, + "visible": true + } + ], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Abnormal Checkpoint TS Lag", - "type": "stat" + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:228", + "format": "ms", + "label": null, + "logBase": 1, + "max": "3000000", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:229", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -41025,7 +41160,7 @@ "h": 10, "w": 6, "x": 0, - "y": 68 + "y": 73 }, "hiddenSeries": false, "id": 13100, @@ -41047,7 +41182,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41127,7 +41262,7 @@ "h": 10, "w": 6, "x": 6, - "y": 68 + "y": 73 }, "hiddenSeries": false, "id": 14630, @@ -41147,7 +41282,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41163,7 +41298,7 @@ "targets": [ { "exemplar": true, - "expr": "(tikv_stream_observed_region{instance=~\"$instance\", type=\"inc\"} - on(instance) tikv_stream_observed_region{instance=~\"$instance\", type=\"dec\"}) > 0", + "expr": "tikv_stream_observed_region{instance=~\"$instance\"}", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -41172,7 +41307,7 @@ }, { "exemplar": true, - "expr": "sum(tikv_stream_observed_region{instance=~\"$instance\", type=\"inc\"} - on(instance) tikv_stream_observed_region{instance=~\"$instance\", type=\"dec\"}) > 0", + "expr": "sum(tikv_stream_observed_region{instance=~\"$instance\"})", "hide": false, "interval": "", "legendFormat": "total", @@ -41228,7 +41363,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.", + "description": "The errors met when backing up.\n**They are retryable, don't worry.**", "fieldConfig": { "defaults": {}, "overrides": [] @@ -41239,7 +41374,7 @@ "h": 5, "w": 6, "x": 12, - "y": 68 + "y": 73 }, "hiddenSeries": false, "id": 13101, @@ -41261,7 +41396,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41350,7 +41485,7 @@ "h": 10, "w": 6, "x": 18, - "y": 68 + "y": 73 }, "hiddenSeries": false, "id": 14910, @@ -41370,7 +41505,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41388,7 +41523,7 @@ "targets": [ { "exemplar": true, - "expr": "min(tikv_stream_store_checkpoint_ts{instance=~\"$instance\"} / 262144) by (task)", + "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", "instant": false, "interval": "", "legendFormat": "{{ task }}", @@ -41463,7 +41598,7 @@ "h": 5, "w": 6, "x": 12, - "y": 73 + "y": 78 }, "hiddenSeries": false, "id": 14908, @@ -41485,7 +41620,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -41593,7 +41728,7 @@ "h": 7, "w": 6, "x": 0, - "y": 78 + "y": 83 }, "heatmap": {}, "hideZeroBuckets": true, @@ -41676,7 +41811,7 @@ "h": 7, "w": 6, "x": 6, - "y": 78 + "y": 83 }, "heatmap": {}, "hideZeroBuckets": true, @@ -41759,7 +41894,7 @@ "h": 7, "w": 6, "x": 12, - "y": 78 + "y": 83 }, "heatmap": {}, "hideZeroBuckets": true, @@ -41842,7 +41977,7 @@ "h": 7, "w": 6, "x": 18, - "y": 78 + "y": 83 }, "heatmap": {}, "hideZeroBuckets": true, @@ -41925,7 +42060,7 @@ "h": 7, "w": 6, "x": 0, - "y": 85 + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, @@ -42008,7 +42143,7 @@ "h": 7, "w": 6, "x": 6, - "y": 85 + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, @@ -42091,7 +42226,7 @@ "h": 7, "w": 6, "x": 12, - "y": 85 + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, @@ -42174,7 +42309,7 @@ "h": 7, "w": 6, "x": 18, - "y": 85 + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, @@ -42238,7 +42373,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Misc statistics of RocksDB during initial scanning.", + "description": "The internal message type count.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -42249,17 +42384,16 @@ "h": 6, "w": 12, "x": 0, - "y": 92 + "y": 97 }, "hiddenSeries": false, - "id": 14270, + "id": 14914, "legend": { "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, @@ -42270,7 +42404,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42281,9 +42415,9 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_stream_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", + "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", "interval": "", - "legendFormat": "{{ cf }}/{{ op }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" } @@ -42292,7 +42426,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Operation ", + "title": "Internal Message Type", "tooltip": { "shared": true, "sort": 0, @@ -42352,7 +42486,7 @@ "h": 6, "w": 6, "x": 12, - "y": 92 + "y": 97 }, "hiddenSeries": false, "id": 14912, @@ -42374,7 +42508,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42385,7 +42519,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket[10m]))) by (message)", + "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", "interval": "", "legendFormat": "{{ message }}", "queryType": "randomWalk", @@ -42452,7 +42586,7 @@ "h": 6, "w": 6, "x": 18, - "y": 92 + "y": 97 }, "hiddenSeries": false, "id": 14913, @@ -42472,7 +42606,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42483,7 +42617,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket[10m]))) by (message)", + "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", "interval": "", "legendFormat": "{{ message }}", "queryType": "randomWalk", @@ -42548,9 +42682,9 @@ "fillGradient": 0, "gridPos": { "h": 6, - "w": 12, + "w": 6, "x": 0, - "y": 98 + "y": 103 }, "hiddenSeries": false, "id": 14271, @@ -42571,7 +42705,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42638,7 +42772,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal message type count.", + "description": "Misc statistics of RocksDB during initial scanning.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -42648,17 +42782,18 @@ "gridPos": { "h": 6, "w": 6, - "x": 12, - "y": 98 + "x": 6, + "y": 103 }, "hiddenSeries": false, - "id": 14914, + "id": 14270, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": false, + "rightSide": true, + "show": true, "total": false, "values": false }, @@ -42669,7 +42804,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42680,9 +42815,9 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count[$__rate_interval])) by (message)", + "expr": "sum(rate(tikv_stream_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", "interval": "", - "legendFormat": "{{ message }}", + "legendFormat": "{{ cf }}/{{ op }}", "queryType": "randomWalk", "refId": "A" } @@ -42691,7 +42826,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Type", + "title": "Initial Scan RocksDB Operation ", "tooltip": { "shared": true, "sort": 0, @@ -42731,8 +42866,11 @@ } }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "leader-changed": "blue", + "region-changed": "purple" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", @@ -42746,8 +42884,8 @@ "gridPos": { "h": 6, "w": 6, - "x": 18, - "y": 98 + "x": 12, + "y": 103 }, "hiddenSeries": false, "id": 14915, @@ -42760,6 +42898,107 @@ "total": false, "values": false }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "interval": "", + "legendFormat": "{{ message }}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Initial Scanning Trigger Reason", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2608", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2609", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "del": "dark-red", + "put": "green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 103 + }, + "hiddenSeries": false, + "id": 15176, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, "lines": true, "linewidth": 1, "nullPointMode": "null", @@ -42767,7 +43006,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42778,9 +43017,9 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_reason[$__rate_interval])) by (reason)", + "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", "interval": "", - "legendFormat": "{{ message }}", + "legendFormat": "{{ type }}", "queryType": "randomWalk", "refId": "A" } @@ -42789,7 +43028,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Initial Scanning Trigger Reason Rate", + "title": "Region Checkpoint Key Putting", "tooltip": { "shared": true, "sort": 0, @@ -42827,6 +43066,892 @@ "align": false, "alignLevel": null } + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 109 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15544, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Request Checkpoint Batch Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 109 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15716, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Tick Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": { + "epoch-not-match": "purple", + "not-leader": "blue", + "watch_task": "orange" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The reason of advancer failed to be advanced.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 109 + }, + "hiddenSeries": false, + "id": 23763572666, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Region Checkpoint Failure Reason", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:103", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:104", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "fail": "red", + "success": "green", + "watch_task": "orange" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The result of getting region checkpoints.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 109 + }, + "hiddenSeries": false, + "id": 23763572665, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:834", + "alias": "fail", + "transform": "negative-Y", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ result }}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Result", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:103", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:104", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "watch_task": "orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The internal handling message duration.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 116 + }, + "hiddenSeries": false, + "id": 15359, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1017", + "alias": "consistency-check", + "yaxis": 1 + }, + { + "$$hashKey": "object:1018", + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "$$hashKey": "object:1019", + "alias": "get-checkpoints-in-range", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "interval": "", + "legendFormat": "{{ step }}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Tick Duration (P99)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:103", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:104", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "watch_task": "orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The internal handling message duration.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 116 + }, + "hiddenSeries": false, + "id": 15360, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1091", + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "$$hashKey": "object:1092", + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "$$hashKey": "object:1093", + "alias": "consistency-check", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "interval": "", + "legendFormat": "{{ step }}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Tick Duration (P90)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:103", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:104", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "watch_task": "orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The frequent of getting region level checkpoint.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 116 + }, + "hiddenSeries": false, + "id": 23763572733, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1091", + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "$$hashKey": "object:1092", + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "$$hashKey": "object:1093", + "alias": "consistency-check", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{ step }} {{ instance }}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Get Region Operation Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:103", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:104", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "watch_task": "orange" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The variant of checkpoint group.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 116 + }, + "hiddenSeries": false, + "id": 23763572734, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1091", + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "$$hashKey": "object:1092", + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "$$hashKey": "object:1093", + "alias": "consistency-check", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ step }} {{ instance }}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Try Advance Trigger Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:103", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:104", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "title": "Backup Log", From 21e3bd64e8de517e09836cda4991b446914f3f75 Mon Sep 17 00:00:00 2001 From: ekexium Date: Tue, 19 Jul 2022 11:05:07 +0800 Subject: [PATCH 0092/1149] make the smallest bucket size 10 us for many metrics (#13037) close tikv/tikv#13036 Change the smallest bucket size from 500us to 10 us for many metrics Signed-off-by: ekexium Co-authored-by: Ti Chi Robot --- .../engine_rocks/src/perf_context_metrics.rs | 4 ++-- components/external_storage/src/metrics.rs | 2 +- components/raftstore/src/store/metrics.rs | 16 ++++++++-------- components/raftstore/src/store/worker/metrics.rs | 4 ++-- src/coprocessor/metrics.rs | 8 ++++---- src/server/lock_manager/metrics.rs | 2 +- src/server/metrics.rs | 6 +++--- src/storage/metrics.rs | 10 +++++----- src/storage/txn/scheduler.rs | 6 +++--- 9 files changed, 29 insertions(+), 29 deletions(-) diff --git a/components/engine_rocks/src/perf_context_metrics.rs b/components/engine_rocks/src/perf_context_metrics.rs index cca9f551bc1..d384fc96dc9 100644 --- a/components/engine_rocks/src/perf_context_metrics.rs +++ b/components/engine_rocks/src/perf_context_metrics.rs @@ -26,14 +26,14 @@ lazy_static! { "tikv_raftstore_apply_perf_context_time_duration_secs", "Bucketed histogram of request wait time duration.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref STORE_PERF_CONTEXT_TIME_HISTOGRAM: HistogramVec = register_histogram_vec!( "tikv_raftstore_store_perf_context_time_duration_secs", "Bucketed histogram of request wait time duration.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref STORAGE_ROCKSDB_PERF_COUNTER: IntCounterVec = register_int_counter_vec!( diff --git a/components/external_storage/src/metrics.rs b/components/external_storage/src/metrics.rs index 1cb0c37cfa8..99dabca158e 100644 --- a/components/external_storage/src/metrics.rs +++ b/components/external_storage/src/metrics.rs @@ -8,7 +8,7 @@ lazy_static! { "tikv_external_storage_create_seconds", "Bucketed histogram of creating external storage duration", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 69d84f45056..a983feb7909 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -429,21 +429,21 @@ lazy_static! { register_histogram!( "tikv_raftstore_commit_log_duration_seconds", "Bucketed histogram of peer commits logs duration.", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref STORE_APPLY_LOG_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_apply_log_duration_seconds", "Bucketed histogram of peer applying log duration.", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref APPLY_TASK_WAIT_TIME_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_apply_wait_time_duration_secs", "Bucketed histogram of apply task wait time duration.", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref STORE_RAFT_READY_COUNTER_VEC: IntCounterVec = @@ -494,7 +494,7 @@ lazy_static! { "tikv_raftstore_raft_process_duration_secs", "Bucketed histogram of peer processing raft duration.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref PEER_PROPOSE_LOG_SIZE_HISTOGRAM: Histogram = @@ -525,7 +525,7 @@ lazy_static! { register_histogram!( "tikv_raftstore_request_wait_time_duration_secs", "Bucketed histogram of request wait time duration.", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref PEER_GC_RAFT_LOG_COUNTER: IntCounter = @@ -655,7 +655,7 @@ lazy_static! { "tikv_raftstore_apply_perf_context_time_duration_secs", "Bucketed histogram of request wait time duration.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref STORE_PERF_CONTEXT_TIME_HISTOGRAM: HistogramVec = @@ -663,7 +663,7 @@ lazy_static! { "tikv_raftstore_store_perf_context_time_duration_secs", "Bucketed histogram of request wait time duration.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref APPLY_PERF_CONTEXT_TIME_HISTOGRAM_STATIC: PerfContextTimeDuration= @@ -755,7 +755,7 @@ lazy_static! { "tikv_raftstore_inspect_duration_seconds", "Bucketed histogram of inspect duration.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref STORE_SLOW_SCORE_GAUGE: Gauge = diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 75ffc17c72b..e119fcdc3ab 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -72,7 +72,7 @@ lazy_static! { "tikv_raftstore_snapshot_duration_seconds", "Bucketed histogram of raftstore snapshot process duration", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref SNAP_HISTOGRAM: SnapHistogram = @@ -80,7 +80,7 @@ lazy_static! { pub static ref CHECK_SPILT_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_check_split_duration_seconds", "Bucketed histogram of raftstore split check duration", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref COMPACT_RANGE_CF: HistogramVec = register_histogram_vec!( diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index f95ff6ee4db..d757ec49d62 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -96,7 +96,7 @@ lazy_static! { "tikv_coprocessor_request_duration_seconds", "Bucketed histogram of coprocessor request duration", &["req"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref COPR_REQ_HISTOGRAM_STATIC: CoprReqHistogram = @@ -105,7 +105,7 @@ lazy_static! { "tikv_coprocessor_request_handle_seconds", "Bucketed histogram of coprocessor handle request duration", &["req"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref COPR_REQ_HANDLE_TIME_STATIC: CoprReqHistogram = @@ -114,7 +114,7 @@ lazy_static! { "tikv_coprocessor_request_wait_seconds", "Bucketed histogram of coprocessor request wait duration", &["req", "type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref COPR_REQ_WAIT_TIME_STATIC: ReqWaitHistogram = @@ -123,7 +123,7 @@ lazy_static! { "tikv_coprocessor_request_handler_build_seconds", "Bucketed histogram of coprocessor request handler build duration", &["req"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref COPR_REQ_HANDLER_BUILD_TIME_STATIC: CoprReqHistogram = diff --git a/src/server/lock_manager/metrics.rs b/src/server/lock_manager/metrics.rs index 10fac63b1b7..f400652966b 100644 --- a/src/server/lock_manager/metrics.rs +++ b/src/server/lock_manager/metrics.rs @@ -51,7 +51,7 @@ lazy_static! { pub static ref WAITER_LIFETIME_HISTOGRAM: Histogram = register_histogram!( "tikv_lock_manager_waiter_lifetime_duration", "Duration of waiters' lifetime in seconds", - exponential_buckets(0.0005, 2.0, 20).unwrap() // 0.5ms ~ 524s + exponential_buckets(0.00001, 2.0, 26).unwrap() // 0.5ms ~ 524s ) .unwrap(); pub static ref DETECT_DURATION_HISTOGRAM: Histogram = register_histogram!( diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 9cd8631b275..0d24c9f798b 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -321,7 +321,7 @@ lazy_static! { "tikv_gcworker_gc_task_duration_vec", "Duration of gc tasks execution", &["task"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref GC_TOO_BUSY_COUNTER: IntCounter = register_int_counter!( @@ -360,7 +360,7 @@ lazy_static! { pub static ref TTL_CHECKER_COMPACT_DURATION_HISTOGRAM: Histogram = register_histogram!( "tikv_ttl_checker_compact_duration", "Duration of ttl checker compact files execution", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref TTL_CHECKER_POLL_INTERVAL_GAUGE: IntGauge = register_int_gauge!( @@ -492,7 +492,7 @@ lazy_static! { "tikv_storage_engine_async_request_duration_seconds", "Bucketed histogram of processing successful asynchronous requests.", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 95f5809ec9e..07f1143bcb0 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -461,13 +461,13 @@ lazy_static! { register_histogram!( "tikv_scheduler_throttle_duration_seconds", "Bucketed histogram of peer commits logs duration.", - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); pub static ref SCHED_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( "tikv_scheduler_command_duration_seconds", "Bucketed histogram of command execution", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref SCHED_HISTOGRAM_VEC_STATIC: SchedDurationVec = @@ -476,7 +476,7 @@ lazy_static! { "tikv_scheduler_latch_wait_duration_seconds", "Bucketed histogram of latch wait", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref SCHED_LATCH_HISTOGRAM_VEC: SchedLatchDurationVec = @@ -485,7 +485,7 @@ lazy_static! { "tikv_scheduler_processing_read_duration_seconds", "Bucketed histogram of processing read duration", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref SCHED_PROCESSING_READ_HISTOGRAM_STATIC: ProcessingReadVec = @@ -494,7 +494,7 @@ lazy_static! { "tikv_scheduler_processing_write_duration_seconds", "Bucketed histogram of processing write duration", &["type"], - exponential_buckets(0.0005, 2.0, 20).unwrap() + exponential_buckets(0.00001, 2.0, 26).unwrap() ) .unwrap(); pub static ref SCHED_TOO_BUSY_COUNTER: IntCounterVec = register_int_counter_vec!( diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index a9b34b9b189..e78dbdaa49d 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -155,10 +155,10 @@ impl TaskContext { owned: AtomicBool::new(false), write_bytes, tag, - latch_timer: Instant::now_coarse(), + latch_timer: Instant::now(), _cmd_timer: CmdTimer { tag, - begin: Instant::now_coarse(), + begin: Instant::now(), }, } } @@ -701,7 +701,7 @@ impl Scheduler { fail_point!("scheduler_async_snapshot_finish"); SCHED_STAGE_COUNTER_VEC.get(tag).process.inc(); - let timer = Instant::now_coarse(); + let timer = Instant::now(); let region_id = task.cmd.ctx().get_region_id(); let ts = task.cmd.ts(); From 0dff1be50281c72c55c5464751cd733032115ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 19 Jul 2022 12:23:07 +0800 Subject: [PATCH 0093/1149] log-backup: don't run the cond function if the entry state is removal (#13043) close tikv/tikv#13044 `SubscriptionManager::deregister_region_if` won't call the `cond` argument if the target has been removed by setting the state to `Removal`. This would fix some patterns like: let mut exists = false; subs.deregister_region_if(42,|_, _| exists = true); do_with(exists); Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- .../backup-stream/src/subscription_manager.rs | 9 ++++ .../backup-stream/src/subscription_track.rs | 23 ++++++--- components/backup-stream/tests/mod.rs | 48 +++++++++++++++++++ 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 28c1ed6dd78..0b415f95bf6 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -598,6 +598,15 @@ where } async fn get_last_checkpoint_of(&self, task: &str, region: &Region) -> Result { + #[cfg(feature = "failpoints")] + fail::fail_point!("get_last_checkpoint_of", |hint| Err(Error::Other( + box_err!( + "get_last_checkpoint_of({}, {:?}) failed because {:?}", + task, + region, + hint + ) + ))); let meta_cli = self.meta_cli.clone(); let cp = meta_cli.get_region_checkpoint(task, region).await?; debug!("got region checkpoint"; "region_id" => %region.get_id(), "checkpoint" => ?cp); diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index e8a22f9840e..30063089804 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -164,15 +164,26 @@ impl SubscriptionTracer { let region_id = region.get_id(); let remove_result = self.0.get_mut(®ion_id); match remove_result { - Some(mut o) if if_cond(o.value(), region) => { - if o.state != SubscriptionState::Removal { + Some(mut o) => { + // If the state is 'removal', we should act as if the region subscription + // has been removed: the callback should not be called because somebody may + // use this method to check whether a key exists: + // ``` + // let mut present = false; + // deregister_region_if(42, |..| { present = true; }); + // ``` + // At that time, if we call the callback with stale value, the called may get false positive. + if o.state == SubscriptionState::Removal { + return false; + } + if if_cond(o.value(), region) { TRACK_REGION.dec(); + o.value_mut().stop(); + info!("stop listen stream from store"; "observer" => ?o.value(), "region_id"=> %region_id); + return true; } - o.value_mut().stop(); - info!("stop listen stream from store"; "observer" => ?o.value(), "region_id"=> %region_id); - true + false } - Some(_) => false, None => { warn!("trying to deregister region not registered"; "region_id" => %region_id); false diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 9ba59a181b2..b9559d86c1f 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -870,4 +870,52 @@ mod test { keys.union(&keys2).map(|s| s.as_slice()), ); } + + #[test] + fn failed_during_refresh_region() { + defer! { + fail::remove("get_last_checkpoint_of") + } + + let mut suite = SuiteBuilder::new_named("fail_to_refresh_region") + .nodes(1) + .use_v3() + .build(); + + suite.must_register_task(1, "fail_to_refresh_region"); + let keys = run_async_test(suite.write_records(0, 128, 1)); + fail::cfg( + "get_last_checkpoint_of", + "1*return(the stream handler wants to become a batch processor, and the batch processor wants to be a stream handler.)", + ).unwrap(); + + suite.must_split(b"SOLE"); + let keys2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("fail_to_refresh_region"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys.union(&keys2).map(|s| s.as_slice()), + ); + let leader = suite.cluster.leader_of_region(1).unwrap().store_id; + let (tx, rx) = std::sync::mpsc::channel(); + suite.endpoints[&leader] + .scheduler() + .schedule(Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( + RegionSet::Universal, + Box::new(move |rs| { + let _ = tx.send(rs); + }), + ))) + .unwrap(); + + let regions = rx.recv_timeout(Duration::from_secs(10)).unwrap(); + assert!( + regions.iter().all(|item| { + matches!(item, GetCheckpointResult::Ok { checkpoint, .. } if checkpoint.into_inner() > 500) + }), + "{:?}", + regions + ); + } } From 82e8f865cbdaba5e08fc0fedcefec0b7ea877b70 Mon Sep 17 00:00:00 2001 From: MoCuishle28 <32541204+MoCuishle28@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:03:08 +0800 Subject: [PATCH 0094/1149] br: Adjust the backup organization structure (#12958) close tikv/tikv#13063 Adjust the backup organization structure and add a store_id related prefix under the backup path. Signed-off-by: Gaoming Signed-off-by: MoCuishle28 <32541204+MoCuishle28@users.noreply.github.com> Signed-off-by: zhanggaoming Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/backup/Cargo.toml | 1 + components/backup/src/endpoint.rs | 104 ++++++++++++++++++----- components/backup/src/writer.rs | 4 +- components/cloud/aws/src/lib.rs | 2 +- components/cloud/aws/src/s3.rs | 2 +- components/external_storage/src/lib.rs | 2 +- components/external_storage/src/local.rs | 2 +- tests/integrations/backup/mod.rs | 6 +- 9 files changed, 96 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ddd904e637..0dd646d56e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -395,6 +395,7 @@ version = "0.0.1" dependencies = [ "api_version", "async-channel", + "aws", "causal_ts", "collections", "concurrency_manager", diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 85131c8e68f..a59f8949b77 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -35,6 +35,7 @@ failpoints = ["tikv/failpoints"] [dependencies] api_version = { path = "../api_version", default-features = false } async-channel = "1.4" +aws = { path = "../cloud/aws" } causal_ts = { path = "../causal_ts" } collections = { path = "../collections" } concurrency_manager = { path = "../concurrency_manager", default-features = false } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 0734af017d2..bbcf33d7899 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -294,6 +294,7 @@ impl BackupRange { backup_ts: TimeStamp, begin_ts: TimeStamp, saver: async_channel::Sender, + storage_name: &str, ) -> Result { assert!(!self.codec.is_raw_kv); @@ -363,7 +364,7 @@ impl BackupRange { .start_key .clone() .map_or_else(Vec::new, |k| k.into_raw().unwrap()); - let mut writer = writer_builder.build(next_file_start_key.clone())?; + let mut writer = writer_builder.build(next_file_start_key.clone(), storage_name)?; loop { if let Err(e) = scanner.scan_entries(&mut batch) { error!(?e; "backup scan entries failed"); @@ -397,7 +398,7 @@ impl BackupRange { send_to_worker_with_metrics(&saver, msg).await?; next_file_start_key = this_end_key; writer = writer_builder - .build(next_file_start_key.clone()) + .build(next_file_start_key.clone(), storage_name) .map_err(|e| { error_unknown!(?e; "backup writer failed"); e @@ -892,7 +893,7 @@ impl Endpoint { let input = brange.codec.decode_backup_key(Some(k)).unwrap_or_default(); file_system::sha256(&input).ok().map(hex::encode) }); - let name = backup_file_name(store_id, &brange.region, key); + let name = backup_file_name(store_id, &brange.region, key, _backend.name()); let ct = to_sst_compression_type(request.compression_type); let stat = if is_raw_kv { @@ -928,6 +929,7 @@ impl Endpoint { backup_ts, start_ts, saver_tx.clone(), + _backend.name(), ) .await }; @@ -1090,26 +1092,58 @@ fn get_max_start_key(start_key: Option<&Key>, region: &Region) -> Option { /// A name consists with five parts: store id, region_id, a epoch version, the hash of range start key and timestamp. /// range start key is used to keep the unique file name for file, to handle different tables exists on the same region. /// local unix timestamp is used to keep the unique file name for file, to handle receive the same request after connection reset. -pub fn backup_file_name(store_id: u64, region: &Region, key: Option) -> String { +pub fn backup_file_name( + store_id: u64, + region: &Region, + key: Option, + storage_name: &str, +) -> String { let start = SystemTime::now(); let since_the_epoch = start .duration_since(UNIX_EPOCH) .expect("Time went backwards"); - match key { - Some(k) => format!( - "{}_{}_{}_{}_{}", - store_id, - region.get_id(), - region.get_region_epoch().get_version(), - k, - since_the_epoch.as_millis() - ), - None => format!( - "{}_{}_{}", - store_id, - region.get_id(), - region.get_region_epoch().get_version() - ), + + match (key, storage_name) { + // See https://github.com/pingcap/tidb/issues/30087 + // To avoid 503 Slow Down error, if the backup storage is s3, + // organize the backup files by store_id (use slash (/) as delimiter). + (Some(k), aws::STORAGE_NAME | external_storage::local::STORAGE_NAME) => { + format!( + "{}/{}_{}_{}_{}", + store_id, + region.get_id(), + region.get_region_epoch().get_version(), + k, + since_the_epoch.as_millis() + ) + } + (Some(k), _) => { + format!( + "{}_{}_{}_{}_{}", + store_id, + region.get_id(), + region.get_region_epoch().get_version(), + k, + since_the_epoch.as_millis() + ) + } + + (None, aws::STORAGE_NAME | external_storage::local::STORAGE_NAME) => { + format!( + "{}/{}_{}", + store_id, + region.get_id(), + region.get_region_epoch().get_version() + ) + } + (None, _) => { + format!( + "{}_{}_{}", + store_id, + region.get_id(), + region.get_region_epoch().get_version() + ) + } } } @@ -1974,4 +2008,36 @@ pub mod tests { drop(pool); std::thread::sleep(Duration::from_millis(150)); } + + #[test] + fn test_backup_file_name() { + let region = metapb::Region::default(); + let store_id = 1; + let test_cases = vec!["s3", "local", "gcs", "azure", "hdfs"]; + let test_target = vec![ + "1/0_0_000", + "1/0_0_000", + "1_0_0_000", + "1_0_0_000", + "1_0_0_000", + ]; + + let delimiter = "_"; + for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { + let key = Some(String::from("000")); + let filename = backup_file_name(store_id, ®ion, key, storage_name); + + let mut prefix_arr: Vec<&str> = filename.split(delimiter).collect(); + prefix_arr.remove(prefix_arr.len() - 1); + + assert_eq!(target.to_string(), prefix_arr.join(delimiter)); + } + + let test_target = vec!["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; + for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { + let key = None; + let filename = backup_file_name(store_id, ®ion, key, storage_name); + assert_eq!(target.to_string(), filename); + } + } } diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 8408fb7c002..4c4c6dc5ec7 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -198,10 +198,10 @@ impl BackupWriterBuilder { } } - pub fn build(&self, start_key: Vec) -> Result { + pub fn build(&self, start_key: Vec, storage_name: &str) -> Result { let key = file_system::sha256(&start_key).ok().map(hex::encode); let store_id = self.store_id; - let name = backup_file_name(store_id, &self.region, key); + let name = backup_file_name(store_id, &self.region, key, storage_name); BackupWriter::new( self.db.clone(), &name, diff --git a/components/cloud/aws/src/lib.rs b/components/cloud/aws/src/lib.rs index 345302d0534..b6af7d64b48 100644 --- a/components/cloud/aws/src/lib.rs +++ b/components/cloud/aws/src/lib.rs @@ -5,6 +5,6 @@ mod kms; pub use kms::{AwsKms, ENCRYPTION_VENDOR_NAME_AWS_KMS}; mod s3; -pub use s3::{Config, S3Storage, STORAGE_VENDOR_NAME_AWS}; +pub use s3::{Config, S3Storage, STORAGE_NAME, STORAGE_VENDOR_NAME_AWS}; mod util; diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index b5cacb2266e..e2e9919860b 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -515,7 +515,7 @@ impl<'client> S3Uploader<'client> { } } -const STORAGE_NAME: &str = "s3"; +pub const STORAGE_NAME: &str = "s3"; #[async_trait] impl BlobStorage for S3Storage { diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 0bad03cbcca..f1d1a617dc8 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -30,7 +30,7 @@ use tokio::time::timeout; mod hdfs; pub use hdfs::{HdfsConfig, HdfsStorage}; -mod local; +pub mod local; pub use local::LocalStorage; mod noop; pub use noop::NoopStorage; diff --git a/components/external_storage/src/local.rs b/components/external_storage/src/local.rs index 5fd899b17f9..3e307dca157 100644 --- a/components/external_storage/src/local.rs +++ b/components/external_storage/src/local.rs @@ -54,7 +54,7 @@ fn url_for(base: &Path) -> url::Url { u } -const STORAGE_NAME: &str = "local"; +pub const STORAGE_NAME: &str = "local"; #[async_trait] impl ExternalStorage for LocalStorage { diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index ccadcca674f..2990a983974 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -21,11 +21,11 @@ fn assert_same_file_name(s1: String, s2: String) { let tokens1: Vec<&str> = s1.split('_').collect(); let tokens2: Vec<&str> = s2.split('_').collect(); assert_eq!(tokens1.len(), tokens2.len()); - // 2_1_1_e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855_1609407693105_write.sst - // 2_1_1_e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855_1609407693199_write.sst + // 2/1_1_e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855_1609407693105_write.sst + // 2/1_1_e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855_1609407693199_write.sst // should be equal for i in 0..tokens1.len() { - if i != 4 { + if i != 3 { assert_eq!(tokens1[i], tokens2[i]); } } From 856caa1b30c152d82aa0d923fbc0a8253df1ffb4 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 19 Jul 2022 23:29:08 -0700 Subject: [PATCH 0095/1149] tikv: refactor StorageConfigManger to support multi-rocksdb (#12962) close tikv/tikv#12961 refactor StorageConfigManger to support multi-rocksdb Signed-off-by: qi.xu Co-authored-by: qi.xu --- components/engine_test/src/lib.rs | 17 ++++++-- components/engine_traits/src/engine.rs | 35 +++++++--------- components/server/src/server.rs | 9 ++-- src/config.rs | 5 ++- src/server/engine_factory.rs | 12 ++++-- src/server/engine_factory_v2.rs | 40 +++++++++++++++--- src/storage/config_manager.rs | 41 ++++++++----------- .../singleton_flow_controller.rs | 14 ++++++- tests/failpoints/cases/test_storage.rs | 3 +- 9 files changed, 112 insertions(+), 64 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index b670ef34500..82373ac8568 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -90,7 +90,9 @@ pub mod kv { RocksEngine as KvTestEngine, RocksEngineIterator as KvTestEngineIterator, RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; - use engine_traits::{Result, TabletAccessor, TabletFactory}; + use engine_traits::{ + CFOptionsExt, ColumnFamilyOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, + }; use tikv_util::box_err; use crate::ctor::{CFOptions, DBOptions, KvEngineConstructorExt}; @@ -279,8 +281,17 @@ pub mod kv { new_engine } - fn clone(&self) -> Box + Send> { - Box::new(std::clone::Clone::clone(self)) + fn set_shared_block_cache_capacity( + &self, + capacity: u64, + ) -> std::result::Result<(), String> { + let reg = self.registry.lock().unwrap(); + // pick up any tablet and set the shared block cache capacity + if let Some(((_id, _suffix), tablet)) = (*reg).iter().next() { + let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity)?; + } + Ok(()) } } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index de99f924038..9b560bcd65b 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -204,10 +204,6 @@ pub trait TabletFactory: TabletAccessor { /// Tablets root path fn tablets_path(&self) -> PathBuf; - /// Clone the tablet factory instance - /// Here we don't use Clone traint because it will break the trait's object safty - fn clone(&self) -> Box + Send>; - /// Load the tablet from path for id and suffix--for scenarios such as applying snapshot fn load_tablet(&self, _path: &Path, _id: u64, _suffix: u64) -> Result { unimplemented!(); @@ -222,11 +218,13 @@ pub trait TabletFactory: TabletAccessor { fn is_tombstoned(&self, _region_id: u64, _suffix: u64) -> bool { unimplemented!(); } + + fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String>; } pub struct DummyFactory where - EK: Clone + Send + 'static, + EK: CFOptionsExt + Clone + Send + 'static, { pub engine: Option, pub root_path: String, @@ -234,7 +232,7 @@ where impl TabletFactory for DummyFactory where - EK: Clone + Send + 'static, + EK: CFOptionsExt + Clone + Send + 'static, { fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { Ok(self.engine.as_ref().unwrap().clone()) @@ -258,22 +256,19 @@ where PathBuf::from(&self.root_path) } - fn clone(&self) -> Box + Send> { - if self.engine.is_none() { - return Box::>::new(DummyFactory { - engine: None, - root_path: self.root_path.clone(), - }); - } - Box::>::new(DummyFactory { - engine: Some(self.engine.as_ref().unwrap().clone()), - root_path: self.root_path.clone(), - }) + fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + let opt = self + .engine + .as_ref() + .unwrap() + .get_options_cf(CF_DEFAULT) + .unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity) } } impl TabletAccessor for DummyFactory where - EK: Clone + Send + 'static, + EK: CFOptionsExt + Clone + Send + 'static, { fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &EK)) { if let Some(engine) = &self.engine { @@ -288,14 +283,14 @@ where impl DummyFactory where - EK: Clone + Send + 'static, + EK: CFOptionsExt + Clone + Send + 'static, { pub fn new(engine: Option, root_path: String) -> DummyFactory { DummyFactory { engine, root_path } } } -impl Default for DummyFactory { +impl Default for DummyFactory { fn default() -> Self { Self::new(None, "/tmp".to_string()) } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index c0ed12bf73c..7911447368e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -232,6 +232,7 @@ struct TiKvServer { sst_worker: Option>>, quota_limiter: Arc, causal_ts_provider: Option>>, // used for rawkv apiv2 + tablet_factory: Option + Send + Sync>>, } struct TiKvEngines { @@ -352,6 +353,7 @@ impl TiKvServer { sst_worker: None, quota_limiter, causal_ts_provider, + tablet_factory: None, } } @@ -735,7 +737,7 @@ impl TiKvServer { cfg_controller.register( tikv::config::Module::Storage, Box::new(StorageConfigManger::new( - self.engines.as_ref().unwrap().engine.kv_engine(), + self.tablet_factory.as_ref().unwrap().clone(), self.config.storage.block_cache.shared, ttl_scheduler, flow_controller, @@ -1649,7 +1651,7 @@ impl TiKvServer { if let Some(cache) = block_cache { builder = builder.block_cache(cache); } - let factory = builder.build(); + let factory = Arc::new(builder.build()); let kv_engine = factory .create_shared_db() .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); @@ -1659,11 +1661,12 @@ impl TiKvServer { cfg_controller.register( tikv::config::Module::Rocksdb, Box::new(DBConfigManger::new( - Arc::new(factory), + factory.clone(), DBType::Kv, self.config.storage.block_cache.shared, )), ); + self.tablet_factory = Some(factory); engines .raft .register_config(cfg_controller, self.config.storage.block_cache.shared); diff --git a/src/config.rs b/src/config.rs index 9b06da58926..98aabb20369 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3990,7 +3990,8 @@ mod tests { use api_version::{ApiV1, KvFormat}; use case_macros::*; use engine_traits::{ - ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as DBOptionsTrait, ALL_CFS, + ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as DBOptionsTrait, DummyFactory, + ALL_CFS, }; use futures::executor::block_on; use grpcio::ResourceQuota; @@ -4446,7 +4447,7 @@ mod tests { cfg_controller.register( Module::Storage, Box::new(StorageConfigManger::new( - engine, + Arc::new(DummyFactory::new(Some(engine), "".to_string())), shared, scheduler, flow_controller.clone(), diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 04e1f72f05a..0de26bc43c4 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -11,7 +11,8 @@ use engine_rocks::{ RocksEventListener, }; use engine_traits::{ - CompactionJobInfo, Result, TabletAccessor, TabletFactory, CF_DEFAULT, CF_WRITE, + CFOptionsExt, ColumnFamilyOptions, CompactionJobInfo, Result, TabletAccessor, TabletFactory, + CF_DEFAULT, CF_WRITE, }; use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; @@ -245,8 +246,13 @@ impl TabletFactory for KvEngineFactory { fn destroy_tablet(&self, _id: u64, _suffix: u64) -> engine_traits::Result<()> { Ok(()) } - fn clone(&self) -> Box + Send> { - Box::new(std::clone::Clone::clone(self)) + + fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + if let Ok(db) = self.inner.root_db.lock() { + let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity)?; + } + Ok(()) } } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index d1cc29bc88f..2dfe297e5d8 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -7,7 +7,9 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; -use engine_traits::{Result, TabletAccessor, TabletFactory}; +use engine_traits::{ + CFOptionsExt, ColumnFamilyOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, +}; use crate::server::engine_factory::KvEngineFactory; @@ -158,8 +160,14 @@ impl TabletFactory for KvEngineFactoryV2 { new_engine } - fn clone(&self) -> Box + Send> { - Box::new(std::clone::Clone::clone(self)) + fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + let reg = self.registry.lock().unwrap(); + // pick up any tablet and set the shared block cache capacity + if let Some(((_id, _suffix), tablet)) = (*reg).iter().next() { + let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity)?; + } + Ok(()) } } @@ -180,7 +188,7 @@ impl TabletAccessor for KvEngineFactoryV2 { #[cfg(test)] mod tests { - use engine_traits::TabletFactory; + use engine_traits::{TabletFactory, CF_WRITE}; use super::*; use crate::{config::TiKvConfig, server::KvEngineFactoryBuilder}; @@ -212,10 +220,15 @@ mod tests { #[test] fn test_kvengine_factory() { let cfg = TEST_CONFIG.clone(); + assert!(cfg.storage.block_cache.shared); + let cache = cfg.storage.block_cache.build_shared_cache(); let dir = test_util::temp_dir("test_kvengine_factory", false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); + let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); + if let Some(cache) = cache { + builder = builder.block_cache(cache); + } let factory = builder.build(); let shared_db = factory.create_shared_db().unwrap(); let tablet = TabletFactory::create_tablet(&factory, 1, 10); @@ -240,15 +253,25 @@ mod tests { assert_eq!(count, 1); assert!(factory.is_single_engine()); assert!(shared_db.is_single_engine()); + factory + .set_shared_block_cache_capacity(1024 * 1024) + .unwrap(); + let opt = shared_db.get_options_cf(CF_DEFAULT).unwrap(); + assert_eq!(opt.get_block_cache_capacity(), 1024 * 1024); } #[test] fn test_kvengine_factory_v2() { let cfg = TEST_CONFIG.clone(); + assert!(cfg.storage.block_cache.shared); + let cache = cfg.storage.block_cache.build_shared_cache(); let dir = test_util::temp_dir("test_kvengine_factory_v2", false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); + let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); + if let Some(cache) = cache { + builder = builder.block_cache(cache); + } let inner_factory = builder.build(); let factory = KvEngineFactoryV2::new(inner_factory); let tablet = factory.create_tablet(1, 10); @@ -263,6 +286,11 @@ mod tests { let tablet_path = factory.tablet_path(1, 10); let result = factory.open_tablet_raw(&tablet_path, false); assert!(result.is_err()); + factory + .set_shared_block_cache_capacity(1024 * 1024) + .unwrap(); + let opt = tablet.get_options_cf(CF_WRITE).unwrap(); + assert_eq!(opt.get_block_cache_capacity(), 1024 * 1024); assert!(factory.exists(1, 10)); assert!(!factory.exists(1, 11)); diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index 217ebbb25c8..d3d051ac5f9 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -4,7 +4,7 @@ use std::{convert::TryInto, sync::Arc}; -use engine_traits::{CFNamesExt, CFOptionsExt, ColumnFamilyOptions, CF_DEFAULT}; +use engine_traits::{CFNamesExt, CFOptionsExt, TabletFactory, CF_DEFAULT}; use file_system::{get_io_rate_limiter, IOPriority, IOType}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use strum::IntoEnumIterator; @@ -20,7 +20,7 @@ use crate::{ }; pub struct StorageConfigManger { - kvdb: ::Local, + tablet_factory: Arc + Send + Sync>, shared_block_cache: bool, ttl_checker_scheduler: Scheduler, flow_controller: Arc, @@ -32,14 +32,14 @@ unsafe impl Sync for StorageConfigManger {} impl StorageConfigManger { pub fn new( - kvdb: ::Local, + tablet_factory: Arc + Send + Sync>, shared_block_cache: bool, ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, ) -> Self { StorageConfigManger { - kvdb, + tablet_factory, shared_block_cache, ttl_checker_scheduler, flow_controller, @@ -57,12 +57,7 @@ impl ConfigManager for StorageConfigManger { if let Some(size) = block_cache.remove("capacity") { if size != ConfigValue::None { let s: ReadableSize = size.into(); - // Hack: since all CFs in both kvdb and raftdb share a block cache, we can change - // the size through any of them. Here we change it through default CF in kvdb. - // A better way to do it is to hold the cache reference somewhere, and use it to - // change cache size. - let opt = self.kvdb.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(s.0)?; + self.tablet_factory.set_shared_block_cache_capacity(s.0)?; // Write config to metric CONFIG_ROCKSDB_GAUGE .with_label_values(&[CF_DEFAULT, "block_cache_size"]) @@ -77,21 +72,17 @@ impl ConfigManager for StorageConfigManger { } else if let Some(ConfigValue::Module(mut flow_control)) = change.remove("flow_control") { if let Some(v) = flow_control.remove("enable") { let enable: bool = v.into(); - if enable { - for cf in self.kvdb.cf_names() { - self.kvdb - .set_options_cf(cf, &[("disable_write_stall", "true")]) - .unwrap(); - } - self.flow_controller.enable(true); - } else { - for cf in self.kvdb.cf_names() { - self.kvdb - .set_options_cf(cf, &[("disable_write_stall", "false")]) - .unwrap(); - } - self.flow_controller.enable(false); - } + let enable_str = if enable { "true" } else { "false" }; + self.tablet_factory.for_each_opened_tablet( + &mut |_region_id, _suffix, tablet: &EK::Local| { + for cf in tablet.cf_names() { + tablet + .set_options_cf(cf, &[("disable_write_stall", enable_str)]) + .unwrap(); + } + }, + ); + self.flow_controller.enable(enable); } } else if let Some(v) = change.get("scheduler_worker_pool_size") { let pool_size: usize = v.into(); diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 76671412abc..edcac95aa00 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -989,7 +989,8 @@ impl FlowChecker { pub(super) mod tests { use std::sync::atomic::AtomicU64; - use engine_traits::Result; + use engine_rocks::RocksColumnFamilyOptions; + use engine_traits::{CFOptionsExt, Result}; use super::{super::FlowController, *}; @@ -1018,6 +1019,17 @@ pub(super) mod tests { } } + impl CFOptionsExt for EngineStub { + type ColumnFamilyOptions = RocksColumnFamilyOptions; + fn get_options_cf(&self, _cf: &str) -> Result { + unimplemented!(); + } + + fn set_options_cf(&self, _cf: &str, _options: &[(&str, &str)]) -> Result<()> { + unimplemented!(); + } + } + impl FlowControlFactorsExt for EngineStub { fn get_cf_num_files_at_level(&self, _cf: &str, _level: usize) -> Result> { Ok(Some(self.0.num_l0_files.load(Ordering::Relaxed))) diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 7d0bb8c0b74..c6872d22dab 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -12,6 +12,7 @@ use std::{ use api_version::KvFormat; use collections::HashMap; +use engine_traits::DummyFactory; use errors::{extract_key_error, extract_region_error}; use futures::executor::block_on; use grpcio::*; @@ -265,7 +266,7 @@ fn test_scale_scheduler_pool() { cfg_controller.register( Module::Storage, Box::new(StorageConfigManger::new( - kv_engine, + Arc::new(DummyFactory::new(Some(kv_engine), "".to_string())), cfg.storage.block_cache.shared, scheduler, flow_controller, From cfe62ba99d073893839d357c6d3770ceb3f60107 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 20 Jul 2022 14:49:08 +0800 Subject: [PATCH 0096/1149] log: support dynamically change log level via sql (#13019) ref tikv/tikv#4935, ref tikv/tikv#12986 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- cmd/tikv-ctl/src/main.rs | 4 +- cmd/tikv-ctl/src/util.rs | 2 +- components/server/src/server.rs | 4 +- components/server/src/setup.rs | 12 +- components/tikv_util/src/logger/mod.rs | 8 +- src/config.rs | 177 ++++++++++++++++++------- src/server/status_server/mod.rs | 11 +- tests/integrations/config/mod.rs | 6 +- 8 files changed, 158 insertions(+), 66 deletions(-) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 9609fffb9a5..67834db9c5d 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -62,7 +62,9 @@ fn main() { let cfg = cfg_path.map_or_else( || { let mut cfg = TiKvConfig::default(); - cfg.log.level = tikv_util::logger::get_level_by_string("warn").unwrap(); + cfg.log.level = tikv_util::logger::get_level_by_string("warn") + .unwrap() + .into(); cfg }, |path| { diff --git a/cmd/tikv-ctl/src/util.rs b/cmd/tikv-ctl/src/util.rs index c776f16f83d..36091b5a930 100644 --- a/cmd/tikv-ctl/src/util.rs +++ b/cmd/tikv-ctl/src/util.rs @@ -10,7 +10,7 @@ const LOG_DIR: &str = "./ctl-engine-info-log"; #[allow(clippy::field_reassign_with_default)] pub fn init_ctl_logger(level: &str) { let mut cfg = TiKvConfig::default(); - cfg.log.level = slog::Level::from_str(level).unwrap(); + cfg.log.level = slog::Level::from_str(level).unwrap().into(); cfg.rocksdb.info_log_dir = LOG_DIR.to_owned(); cfg.raftdb.info_log_dir = LOG_DIR.to_owned(); initial_logger(&cfg); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 7911447368e..8eca26404d9 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -81,7 +81,7 @@ use raftstore::{ }; use security::SecurityManager; use tikv::{ - config::{ConfigController, DBConfigManger, DBType, TiKvConfig}, + config::{ConfigController, DBConfigManger, DBType, LogConfigManager, TiKvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, @@ -622,6 +622,8 @@ impl TiKvServer { ))), ); + cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); + // Create cdc. let mut cdc_worker = Box::new(LazyWorker::new("cdc")); let cdc_scheduler = cdc_worker.scheduler(); diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index 0c657733f54..3e37d87242c 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -150,9 +150,11 @@ pub fn initial_logger(config: &TiKvConfig) { let drainer = logger::LogDispatcher::new(normal, rocksdb, raftdb, slow); let level = config.log.level; let slow_threshold = config.slow_log_threshold.as_millis(); - logger::init_log(drainer, level, true, true, vec![], slow_threshold).unwrap_or_else(|e| { - fatal!("failed to initialize log: {}", e); - }); + logger::init_log(drainer, level.into(), true, true, vec![], slow_threshold).unwrap_or_else( + |e| { + fatal!("failed to initialize log: {}", e); + }, + ); } macro_rules! do_build { @@ -235,8 +237,8 @@ pub fn initial_metric(cfg: &MetricConfig) { #[allow(dead_code)] pub fn overwrite_config_with_cmd_args(config: &mut TiKvConfig, matches: &ArgMatches<'_>) { if let Some(level) = matches.value_of("log-level") { - config.log.level = logger::get_level_by_string(level).unwrap(); - config.log_level = slog::Level::Info; + config.log.level = logger::get_level_by_string(level).unwrap().into(); + config.log_level = slog::Level::Info.into(); } if let Some(file) = matches.value_of("log-file") { diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 36a5cf95baf..f4fd936cddc 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -78,7 +78,7 @@ where .overflow_strategy(SLOG_CHANNEL_OVERFLOW_STRATEGY) .thread_name(thd_name!("slogger")) .build_with_guard(); - let drain = async_log.filter_level(level).fuse(); + let drain = async_log.fuse(); let drain = SlowLogFilter { threshold: slow_threshold, inner: drain, @@ -87,7 +87,7 @@ where (slog::Logger::root(filtered, slog_o!()), Some(guard)) } else { - let drain = LogAndFuse(Mutex::new(drain).filter_level(level)); + let drain = LogAndFuse(Mutex::new(drain)); let drain = SlowLogFilter { threshold: slow_threshold, inner: drain, @@ -287,7 +287,9 @@ pub fn get_log_level() -> Option { } pub fn set_log_level(new_level: Level) { - LOG_LEVEL.store(new_level.as_usize(), Ordering::SeqCst) + LOG_LEVEL.store(new_level.as_usize(), Ordering::SeqCst); + // also change std log to new level. + let _ = slog_global::redirect_std_log(Some(new_level)); } pub struct TikvFormat diff --git a/src/config.rs b/src/config.rs index 98aabb20369..1f64f53dc58 100644 --- a/src/config.rs +++ b/src/config.rs @@ -8,6 +8,7 @@ use std::{ cmp, collections::{HashMap, HashSet}, + convert::TryFrom, error::Error, fs, i32, io::{Error as IoError, ErrorKind, Write}, @@ -21,7 +22,7 @@ use api_version::ApiV1Ttl; use causal_ts::Config as CausalTsConfig; use encryption_export::DataKeyManager; use engine_rocks::{ - config::{self as rocks_config, BlobRunMode, CompressionType, LogLevel}, + config::{self as rocks_config, BlobRunMode, CompressionType, LogLevel as RocksLogLevel}, get_env, properties::MvccPropertiesCollectorFactory, raw::{ @@ -53,11 +54,16 @@ use raftstore::{ }; use resource_metering::Config as ResourceMeteringConfig; use security::SecurityConfig; +use serde::{ + de::{Error as DError, Unexpected}, + Deserialize, Deserializer, Serialize, Serializer, +}; use serde_json::{to_value, Map, Value}; use tikv_util::{ config::{ self, LogFormat, RaftDataStateMachine, ReadableDuration, ReadableSize, TomlWriter, GIB, MIB, }, + logger::{get_level_by_string, get_string_by_level, set_log_level}, sys::SysQuota, time::duration_to_sec, yatp_pool, @@ -1004,7 +1010,7 @@ impl TitanDBConfig { #[serde(rename_all = "kebab-case")] pub struct DbConfig { #[online_config(skip)] - pub info_log_level: LogLevel, + pub info_log_level: RocksLogLevel, #[serde(with = "rocks_config::recovery_mode_serde")] #[online_config(skip)] pub wal_recovery_mode: DBRecoveryMode, @@ -1101,7 +1107,7 @@ impl Default for DbConfig { info_log_roll_time: ReadableDuration::secs(0), info_log_keep_log_file_num: 10, info_log_dir: "".to_owned(), - info_log_level: LogLevel::Info, + info_log_level: RocksLogLevel::Info, rate_bytes_per_sec: ReadableSize::gb(10), rate_limiter_refill_period: ReadableDuration::millis(100), rate_limiter_mode: DBRateLimiterMode::WriteOnly, @@ -1364,7 +1370,7 @@ pub struct RaftDbConfig { #[online_config(skip)] pub info_log_dir: String, #[online_config(skip)] - pub info_log_level: LogLevel, + pub info_log_level: RocksLogLevel, #[online_config(skip)] pub max_sub_compactions: u32, pub writable_file_max_buffer_size: ReadableSize, @@ -1409,7 +1415,7 @@ impl Default for RaftDbConfig { info_log_roll_time: ReadableDuration::secs(0), info_log_keep_log_file_num: 10, info_log_dir: "".to_owned(), - info_log_level: LogLevel::Info, + info_log_level: RocksLogLevel::Info, max_sub_compactions: bg_job_limits.max_sub_compactions as u32, writable_file_max_buffer_size: ReadableSize::mb(1), use_direct_io_for_flush_and_compaction: false, @@ -1804,33 +1810,6 @@ impl Default for MetricConfig { } } } - -pub mod log_level_serde { - use serde::{ - de::{Error, Unexpected}, - Deserialize, Deserializer, Serialize, Serializer, - }; - use slog::Level; - use tikv_util::logger::{get_level_by_string, get_string_by_level}; - - pub fn deserialize<'de, D>(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let string = String::deserialize(deserializer)?; - get_level_by_string(&string) - .ok_or_else(|| D::Error::invalid_value(Unexpected::Str(&string), &"a valid log level")) - } - - #[allow(clippy::trivially_copy_pass_by_ref)] - pub fn serialize(value: &Level, serializer: S) -> Result - where - S: Serializer, - { - get_string_by_level(*value).serialize(serializer) - } -} - #[derive(Clone, Copy, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -2690,21 +2669,86 @@ impl Default for File { } } -#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct LogConfig { - #[serde(with = "log_level_serde")] - pub level: slog::Level, + pub level: LogLevel, + #[online_config(skip)] pub format: LogFormat, + #[online_config(skip)] pub enable_timestamp: bool, + #[online_config(skip)] pub file: File, } +/// LogLevel is a wrapper type of `slog::Level` +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct LogLevel(slog::Level); + +impl From for slog::Level { + fn from(l: LogLevel) -> Self { + l.0 + } +} + +impl From for LogLevel { + fn from(l: slog::Level) -> Self { + Self(l) + } +} + +impl Serialize for LogLevel { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + get_string_by_level(self.0).serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for LogLevel { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let string = String::deserialize(deserializer)?; + get_level_by_string(&string) + .map(LogLevel) + .ok_or_else(|| D::Error::invalid_value(Unexpected::Str(&string), &"a valid log level")) + } +} + +impl From for ConfigValue { + fn from(l: LogLevel) -> Self { + Self::String(get_string_by_level(l.0).into()) + } +} + +impl TryFrom for LogLevel { + type Error = String; + fn try_from(value: ConfigValue) -> Result { + if let ConfigValue::String(s) = value { + get_level_by_string(&s) + .map(LogLevel) + .ok_or_else(|| format!("invalid log level: '{}'", s)) + } else { + panic!("expect ConfigValue::String, found: {:?}", value) + } + } +} + +impl TryFrom<&ConfigValue> for LogLevel { + type Error = String; + fn try_from(value: &ConfigValue) -> Result { + Self::try_from(value.clone()) + } +} + impl Default for LogConfig { fn default() -> Self { Self { - level: slog::Level::Info, + level: LogLevel(slog::Level::Info), format: LogFormat::Text, enable_timestamp: true, file: File::default(), @@ -2721,6 +2765,19 @@ impl LogConfig { } } +pub struct LogConfigManager; + +impl ConfigManager for LogConfigManager { + fn dispatch(&mut self, changes: ConfigChange) -> CfgResult<()> { + if let Some(v) = changes.get("level") { + let log_level = LogLevel::try_from(v)?; + set_log_level(log_level.0); + } + info!("update log config"; "config" => ?changes); + Ok(()) + } +} + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -2775,8 +2832,7 @@ pub struct TiKvConfig { // They are preserved for compatibility check. #[doc(hidden)] #[online_config(skip)] - #[serde(with = "log_level_serde")] - pub log_level: slog::Level, + pub log_level: LogLevel, #[doc(hidden)] #[online_config(skip)] pub log_file: String, @@ -2814,7 +2870,7 @@ pub struct TiKvConfig { #[online_config(skip)] pub memory_usage_high_water: f64, - #[online_config(skip)] + #[online_config(submodule)] pub log: LogConfig, #[online_config(submodule)] @@ -2895,7 +2951,7 @@ impl Default for TiKvConfig { fn default() -> TiKvConfig { TiKvConfig { cfg_path: "".to_owned(), - log_level: slog::Level::Info, + log_level: slog::Level::Info.into(), log_file: "".to_owned(), log_format: LogFormat::Text, log_rotation_timespan: ReadableDuration::hours(0), @@ -3838,6 +3894,7 @@ pub enum Module { ResourceMetering, BackupStream, Quota, + Log, Unknown(String), } @@ -3865,6 +3922,7 @@ impl From<&str> for Module { "resolved_ts" => Module::ResolvedTs, "resource_metering" => Module::ResourceMetering, "quota" => Module::Quota, + "log" => Module::Log, n => Module::Unknown(n.to_owned()), } } @@ -4003,6 +4061,7 @@ mod tests { use tikv_kv::RocksEngine as RocksDBEngine; use tikv_util::{ config::VersionTrack, + logger::get_log_level, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::SysQuota, worker::{dummy_scheduler, ReceiverWrapper}, @@ -4139,7 +4198,7 @@ mod tests { assert_eq!(last_cfg_metadata.modified().unwrap(), first_modified); // write to file when config is the inequivalent of last one. - cfg.log_level = slog::Level::Warning; + cfg.log_level = slog::Level::Warning.into(); assert!(persist_config(&cfg).is_ok()); last_cfg_metadata = last_cfg_path.metadata().unwrap(); assert_ne!(last_cfg_metadata.modified().unwrap(), first_modified); @@ -4255,8 +4314,7 @@ mod tests { fn test_parse_log_level() { #[derive(Serialize, Deserialize, Debug)] struct LevelHolder { - #[serde(with = "log_level_serde")] - v: Level, + v: LogLevel, } let legal_cases = vec![ @@ -4268,19 +4326,21 @@ mod tests { ("info", Level::Info), ]; for (serialized, deserialized) in legal_cases { - let holder = LevelHolder { v: deserialized }; + let holder = LevelHolder { + v: deserialized.into(), + }; let res_string = toml::to_string(&holder).unwrap(); let exp_string = format!("v = \"{}\"\n", serialized); assert_eq!(res_string, exp_string); let res_value: LevelHolder = toml::from_str(&exp_string).unwrap(); - assert_eq!(res_value.v, deserialized); + assert_eq!(res_value.v, deserialized.into()); } let compatibility_cases = vec![("warning", Level::Warning), ("critical", Level::Critical)]; for (serialized, deserialized) in compatibility_cases { let variant_string = format!("v = \"{}\"\n", serialized); let res_value: LevelHolder = toml::from_str(&variant_string).unwrap(); - assert_eq!(res_value.v, deserialized); + assert_eq!(res_value.v, deserialized.into()); } let illegal_cases = vec!["foobar", ""]; @@ -4709,6 +4769,31 @@ mod tests { ); } + #[test] + fn test_change_logconfig() { + let (cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let cfg_controller = ConfigController::new(cfg); + + cfg_controller.register(Module::Log, Box::new(LogConfigManager)); + + cfg_controller.update_config("log.level", "warn").unwrap(); + assert_eq!(get_log_level().unwrap(), Level::Warning); + assert_eq!( + cfg_controller.get_current().log.level, + LogLevel(Level::Warning) + ); + + assert!( + cfg_controller + .update_config("log.level", "invalid") + .is_err() + ); + assert_eq!( + cfg_controller.get_current().log.level, + LogLevel(Level::Warning) + ); + } + #[test] fn test_dispatch_titan_blob_run_mode_config() { let mut cfg = TiKvConfig::default(); diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 1bb066d1a2c..c4cb6a67fbb 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -62,7 +62,7 @@ use self::profile::{ read_file, start_one_cpu_profile, start_one_heap_profile, }; use crate::{ - config::{log_level_serde, ConfigController}, + config::{ConfigController, LogLevel}, server::Result, tikv_util::sys::thread::ThreadBuildWrapper, }; @@ -79,8 +79,7 @@ static FAIL_POINTS_REQUEST_PATH: &str = "/fail"; #[derive(Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] struct LogLevelRequest { - #[serde(with = "log_level_serde")] - pub log_level: slog::Level, + pub log_level: LogLevel, } pub struct StatusServer { @@ -403,7 +402,7 @@ where match log_level_request { Ok(req) => { - set_log_level(req.log_level); + set_log_level(req.log_level.into()); Ok(Response::new(Body::empty())) } Err(err) => Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), @@ -1464,7 +1463,7 @@ mod tests { .build() .unwrap(); - let new_log_level = slog::Level::Debug; + let new_log_level = slog::Level::Debug.into(); let mut log_level_request = Request::new(Body::from( serde_json::to_string(&LogLevelRequest { log_level: new_log_level, @@ -1484,7 +1483,7 @@ mod tests { .await .map(move |res| { assert_eq!(res.status(), StatusCode::OK); - assert_eq!(get_log_level(), Some(new_log_level)); + assert_eq!(get_log_level(), Some(new_log_level.into())); }) .unwrap() }); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 44a6ad8c989..d66ea96fb3b 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -64,7 +64,7 @@ fn read_file_in_project_dir(path: &str) -> String { fn test_serde_custom_tikv_config() { let mut value = TiKvConfig::default(); value.log_rotation_timespan = ReadableDuration::days(1); - value.log.level = Level::Critical; + value.log.level = Level::Critical.into(); value.log.file.filename = "foo".to_owned(); value.log.format = LogFormat::Json; value.log.file.max_size = 1; @@ -891,12 +891,12 @@ fn test_block_cache_backward_compatible() { fn test_log_backward_compatible() { let content = read_file_in_project_dir("integrations/config/test-log-compatible.toml"); let mut cfg: TiKvConfig = toml::from_str(&content).unwrap(); - assert_eq!(cfg.log.level, slog::Level::Info); + assert_eq!(cfg.log.level, slog::Level::Info.into()); assert_eq!(cfg.log.file.filename, ""); assert_eq!(cfg.log.format, LogFormat::Text); assert_eq!(cfg.log.file.max_size, 300); cfg.logger_compatible_adjust(); - assert_eq!(cfg.log.level, slog::Level::Critical); + assert_eq!(cfg.log.level, slog::Level::Critical.into()); assert_eq!(cfg.log.file.filename, "foo"); assert_eq!(cfg.log.format, LogFormat::Json); assert_eq!(cfg.log.file.max_size, 1024); From 0eec6009fb5d386437eee46063d7e461c25988d3 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Wed, 20 Jul 2022 16:41:09 +0800 Subject: [PATCH 0097/1149] copr: fix get_valid_int_prefix() to be compatible with TiDB(#13045) (#13046) close tikv/tikv#13045 Signed-off-by: guo-shaoge --- .../tidb_query_datatype/src/codec/convert.rs | 153 ++++++++++++------ components/tidb_query_expr/src/impl_cast.rs | 3 +- 2 files changed, 103 insertions(+), 53 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 61ce14a0390..bcfc7bb2bbe 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -828,7 +828,17 @@ impl ConvertTo for Bytes { } pub fn get_valid_int_prefix<'a>(ctx: &mut EvalContext, s: &'a str) -> Result> { - if !ctx.cfg.flag.contains(Flag::IN_SELECT_STMT) { + get_valid_int_prefix_helper(ctx, s, false) +} + +// As TiDB code(getValidIntPrefix()), cast expr needs to give error/warning when input string +// is like float. +pub fn get_valid_int_prefix_helper<'a>( + ctx: &mut EvalContext, + s: &'a str, + is_cast_func: bool, +) -> Result> { + if !is_cast_func { let vs = get_valid_float_prefix(ctx, s)?; Ok(float_str_to_int_string(ctx, vs)) } else { @@ -855,51 +865,65 @@ pub fn get_valid_int_prefix<'a>(ctx: &mut EvalContext, s: &'a str) -> Result(ctx: &mut EvalContext, s: &'a str) -> Result<&'a str> { - let mut saw_dot = false; - let mut saw_digit = false; - let mut valid_len = 0; - let mut e_idx = 0; - for (i, c) in s.chars().enumerate() { - if c == '+' || c == '-' { - if i != 0 && (e_idx == 0 || i != e_idx + 1) { - // "1e+1" is valid. - break; - } - } else if c == '.' { - if saw_dot || e_idx > 0 { - // "1.1." or "1e1.1" + get_valid_float_prefix_helper(ctx, s, false) +} + +// As TiDB code(getValidFloatPrefix()), cast expr should not give error/warning when input is +// empty. +pub fn get_valid_float_prefix_helper<'a>( + ctx: &mut EvalContext, + s: &'a str, + is_cast_func: bool, +) -> Result<&'a str> { + if is_cast_func && s.is_empty() { + Ok("0") + } else { + let mut saw_dot = false; + let mut saw_digit = false; + let mut valid_len = 0; + let mut e_idx = 0; + for (i, c) in s.chars().enumerate() { + if c == '+' || c == '-' { + if i != 0 && (e_idx == 0 || i != e_idx + 1) { + // "1e+1" is valid. + break; + } + } else if c == '.' { + if saw_dot || e_idx > 0 { + // "1.1." or "1e1.1" + break; + } + saw_dot = true; + if saw_digit { + // "123." is valid. + valid_len = i + 1; + } + } else if c == 'e' || c == 'E' { + if !saw_digit { + // "+.e" + break; + } + if e_idx != 0 { + // "1e5e" + break; + } + e_idx = i + } else if !('0'..='9').contains(&c) { break; - } - saw_dot = true; - if saw_digit { - // "123." is valid. + } else { + saw_digit = true; valid_len = i + 1; } - } else if c == 'e' || c == 'E' { - if !saw_digit { - // "+.e" - break; - } - if e_idx != 0 { - // "1e5e" - break; - } - e_idx = i - } else if !('0'..='9').contains(&c) { - break; + } + if valid_len == 0 || valid_len < s.len() { + ctx.handle_truncate_err(Error::truncated_wrong_val("INTEGER", s))?; + } + if valid_len == 0 { + Ok("0") } else { - saw_digit = true; - valid_len = i + 1; + Ok(&s[..valid_len]) } } - if valid_len == 0 || valid_len < s.len() { - ctx.handle_truncate_err(Error::truncated_wrong_val("INTEGER", s))?; - } - if valid_len == 0 { - Ok("0") - } else { - Ok(&s[..valid_len]) - } } /// the `s` must be a valid int_str @@ -1984,28 +2008,48 @@ mod tests { fn test_get_valid_float_prefix() { let cases = vec![ ("-100", "-100"), + ("1.", "1."), + (".1", ".1"), + ("123.23E-10", "123.23E-10"), + ]; + + let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag( + Flag::TRUNCATE_AS_WARNING | Flag::OVERFLOW_AS_WARNING, + ))); + for (i, o) in cases { + assert_eq!(super::get_valid_float_prefix(&mut ctx, i).unwrap(), o); + } + assert_eq!(ctx.take_warnings().warnings.len(), 0); + + let warning_cases = vec![ ("1abc", "1"), ("-1-1", "-1"), ("+1+1", "+1"), ("123..34", "123."), - ("123.23E-10", "123.23E-10"), ("1.1e1.3", "1.1e1"), ("11e1.3", "11e1"), ("1.1e-13a", "1.1e-13"), - ("1.", "1."), - (".1", ".1"), - ("", "0"), ("123e+", "123"), ("123.e", "123."), ("1-1-", "1"), ("11-1-", "11"), ("-1-1-", "-1"), + ("", "0"), ]; - - let mut ctx = EvalContext::new(Arc::new(EvalConfig::default_for_test())); - for (i, o) in cases { + let warning_cnt = warning_cases.len(); + for (i, o) in warning_cases.clone() { assert_eq!(super::get_valid_float_prefix(&mut ctx, i).unwrap(), o); } + assert_eq!(ctx.take_warnings().warnings.len(), warning_cnt); + + // Test is cast expr. + for (i, o) in warning_cases.clone() { + assert_eq!( + super::get_valid_float_prefix_helper(&mut ctx, i, true).unwrap(), + o + ); + } + assert_eq!(ctx.take_warnings().warnings.len(), warning_cnt - 1); } #[test] @@ -2093,11 +2137,8 @@ mod tests { } assert_eq!(ctx.take_warnings().warnings.len(), 0); - let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag( - Flag::IN_SELECT_STMT | Flag::IGNORE_TRUNCATE | Flag::OVERFLOW_AS_WARNING, - ))); + let mut ctx = EvalContext::new(Arc::new(EvalConfig::default_for_test())); let cases = vec![ - ("+0.0", "+0"), ("100", "100"), ("+100", "+100"), ("-100", "-100"), @@ -2108,10 +2149,18 @@ mod tests { ]; for (i, e) in cases { - let o = super::get_valid_int_prefix(&mut ctx, i); + let o = super::get_valid_int_prefix_helper(&mut ctx, i, true); assert_eq!(o.unwrap(), *e, "{}, {}", i, e); } assert_eq!(ctx.take_warnings().warnings.len(), 0); + + let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING))); + let cases = vec![("+0.0", "+0"), ("0.5", "0"), ("+0.5", "+0")]; + for (i, e) in cases { + let o = super::get_valid_int_prefix_helper(&mut ctx, i, true); + assert_eq!(o.unwrap(), *e, "{}, {}", i, e); + } + assert_eq!(ctx.take_warnings().warnings.len(), 3); } #[test] diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index f6d6af4eb02..e283a78d245 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -344,7 +344,7 @@ fn cast_string_as_int( } else { // FIXME: if the err get_valid_int_prefix returned is overflow err, // it should be ERR_TRUNCATE_WRONG_VALUE but not others. - let valid_int_prefix = get_valid_int_prefix(ctx, val)?; + let valid_int_prefix = get_valid_int_prefix_helper(ctx, val, true)?; let parse_res = if !is_str_neg { valid_int_prefix.parse::().map(|x| x as i64) } else { @@ -2343,6 +2343,7 @@ mod tests { vec![ERR_TRUNCATE_WRONG_VALUE], Cond::Unsigned, ), + ("0.5", 0_i64, vec![ERR_TRUNCATE_WRONG_VALUE], Cond::None), ]; for (input, expected, mut err_code, cond) in cs { From 6c7f6ecf4a999b0c102d442061d0ce0bd8b7c969 Mon Sep 17 00:00:00 2001 From: Zwb Date: Wed, 20 Jul 2022 17:01:09 +0800 Subject: [PATCH 0098/1149] Fix panic when enable titan (#13051) close tikv/tikv#13038 Signed-off-by: Wenbo Zhang Co-authored-by: Xinye Tao --- Cargo.lock | 6 +++--- components/engine_rocks/src/write_batch.rs | 2 +- tests/integrations/raftstore/test_compact_after_delete.rs | 1 + tests/integrations/raftstore/test_merge.rs | 1 + tests/integrations/raftstore/test_snap.rs | 2 ++ 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0dd646d56e8..fb4e4d1e6a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2748,7 +2748,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#2e00e78b945194e8a672e8e078b6c73956e9ace0" +source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2767,7 +2767,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#2e00e78b945194e8a672e8e078b6c73956e9ace0" +source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" dependencies = [ "bzip2-sys", "cc", @@ -4584,7 +4584,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#2e00e78b945194e8a672e8e078b6c73956e9ace0" +source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" dependencies = [ "libc 0.2.125", "librocksdb_sys", diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index 1aa5c424521..77b8e65d3eb 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -98,7 +98,7 @@ impl RocksWriteBatchVec { impl engine_traits::WriteBatch for RocksWriteBatchVec { fn write_opt(&self, opts: &WriteOptions) -> Result<()> { let opt: RocksWriteOptions = opts.into(); - if self.index > 0 { + if self.support_write_batch_vec { self.get_db() .multi_batch_write(self.as_inner(), &opt.into_raw()) .map_err(Error::Engine) diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 5a9a1521355..b31b86b3bfb 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -36,6 +36,7 @@ fn test_compact_after_delete(cluster: &mut Cluster) { cluster.cfg.raft_store.region_compact_min_tombstones = 500; cluster.cfg.raft_store.region_compact_tombstones_percent = 50; cluster.cfg.raft_store.region_compact_check_step = 1; + cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); for i in 0..1000 { diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index df739d825bc..4d7914429ab 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -24,6 +24,7 @@ use txn_types::{Key, PessimisticLock}; #[test] fn test_node_base_merge() { let mut cluster = new_node_cluster(0, 3); + cluster.cfg.rocksdb.titan.enabled = true; configure_for_merge(&mut cluster); cluster.run(); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index c75e07e7f3a..180e5fb1334 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -25,6 +25,7 @@ use tikv::server::snap::send_snap; use tikv_util::{config::*, time::Instant, HandyRwLock}; fn test_huge_snapshot(cluster: &mut Cluster, max_snapshot_file_size: u64) { + cluster.cfg.rocksdb.titan.enabled = true; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.snap_apply_batch_size = ReadableSize(500); @@ -211,6 +212,7 @@ fn test_server_snap_gc() { /// when there are multiple snapshots which have overlapped region ranges /// arrive at the same raftstore. fn test_concurrent_snap(cluster: &mut Cluster) { + cluster.cfg.rocksdb.titan.enabled = true; // Disable raft log gc in this test case. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); From baf30d0ea62282c54212ecb3383de9eb2225e063 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Wed, 20 Jul 2022 18:13:09 +0800 Subject: [PATCH 0099/1149] log-backup: upload global checkpoint ts to etcd. (#13053) ref tikv/tikv#1, close tikv/tikv#13062 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 35 +++++++++++++++---- .../backup-stream/src/metadata/client.rs | 34 ++++++++++++++++++ components/backup-stream/src/metadata/keys.rs | 14 ++++++++ components/backup-stream/src/metadata/test.rs | 28 ++++++++++++++- components/backup-stream/src/router.rs | 30 ++++++++-------- components/external_storage/src/local.rs | 34 +++++++++++++++--- 6 files changed, 148 insertions(+), 27 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 51e04023d60..b4c49ea892a 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -806,15 +806,36 @@ where let ts = self.meta_client.global_progress_of_task(&task).await; match ts { Ok(global_checkpoint) => { - if let Err(e) = self + let r = self .range_router .update_global_checkpoint(&task, global_checkpoint, self.store_id) - .await - { - warn!("backup stream failed to update global checkpoint."; - "task" => ?task, - "err" => ?e - ); + .await; + match r { + Ok(true) => { + if let Err(err) = self + .meta_client + .set_storage_checkpoint(&task, global_checkpoint) + .await + { + warn!("backup stream failed to set global checkpoint."; + "task" => ?task, + "global-checkpoint" => global_checkpoint, + "err" => ?err, + ); + } + } + Ok(false) => { + debug!("backup stream no need update global checkpoint."; + "task" => ?task, + "global-checkpoint" => global_checkpoint, + ); + } + Err(e) => { + warn!("backup stream failed to update global checkpoint."; + "task" => ?task, + "err" => ?e + ); + } } } Err(e) => { diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 07d93162e00..dc21f86b526 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -421,6 +421,40 @@ impl MetadataClient { }) } + /// Set the storage checkpoint to metadata. + pub async fn set_storage_checkpoint(&self, task_name: &str, ts: u64) -> Result<()> { + let now = Instant::now(); + defer! { + super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["storage_checkpoint"]).observe(now.saturating_elapsed().as_secs_f64()) + } + self.meta_store + .set(KeyValue( + MetaKey::storage_checkpoint_of(task_name, self.store_id), + ts.to_be_bytes().to_vec(), + )) + .await?; + Ok(()) + } + + /// Get the storage checkpoint from metadata. This function is justly used for test. + pub async fn get_storage_checkpoint(&self, task_name: &str) -> Result { + let now = Instant::now(); + defer! { + super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_step"]).observe(now.saturating_elapsed().as_secs_f64()) + } + let snap = self.meta_store.snapshot().await?; + let ts = snap + .get(Keys::Key(MetaKey::storage_checkpoint_of( + task_name, + self.store_id, + ))) + .await?; + + match ts.as_slice() { + [ts, ..] => Ok(TimeStamp::new(parse_ts_from_bytes(ts.value())?)), + [] => Ok(self.get_task_start_ts_checkpoint(task_name).await?.ts), + } + } /// forward the progress of some task. pub async fn set_local_task_checkpoint(&self, task_name: &str, ts: u64) -> Result<()> { let now = Instant::now(); diff --git a/components/backup-stream/src/metadata/keys.rs b/components/backup-stream/src/metadata/keys.rs index 6920ba14a33..4db978c2cb6 100644 --- a/components/backup-stream/src/metadata/keys.rs +++ b/components/backup-stream/src/metadata/keys.rs @@ -5,6 +5,7 @@ use kvproto::metapb::Region; const PREFIX: &str = "/tidb/br-stream"; const PATH_INFO: &str = "/info"; const PATH_NEXT_BACKUP_TS: &str = "/checkpoint"; +const PATH_STORAGE_CHECKPOINT: &str = "/storage-checkpoint"; const PATH_RANGES: &str = "/ranges"; const PATH_PAUSE: &str = "/pause"; const PATH_LAST_ERROR: &str = "/last-error"; @@ -23,6 +24,8 @@ const TASKS_PREFIX: &str = "/tidb/br-stream/info/"; /// /checkpoint/// -> /// For the status of tasks: /// /pause/ -> "" +/// For the storage checkpoint ts of tasks: +/// /storage-checkpoint// -> /// ``` #[derive(Clone)] pub struct MetaKey(pub Vec); @@ -129,6 +132,17 @@ impl MetaKey { ) } + /// defines the key of storage checkpoint-ts of task in a store. + pub fn storage_checkpoint_of(name: &str, store_id: u64) -> Self { + Self( + format!( + "{}{}/{}/{}", + PREFIX, PATH_STORAGE_CHECKPOINT, name, store_id + ) + .into_bytes(), + ) + } + pub fn pause_prefix_len() -> usize { Self::pause_prefix().0.len() } diff --git a/components/backup-stream/src/metadata/test.rs b/components/backup-stream/src/metadata/test.rs index e70ed78b32c..b9fb965033a 100644 --- a/components/backup-stream/src/metadata/test.rs +++ b/components/backup-stream/src/metadata/test.rs @@ -14,7 +14,7 @@ use kvproto::{ use tokio_stream::StreamExt; use txn_types::TimeStamp; -use super::{MetadataClient, StreamTask}; +use super::{keys::MetaKey, MetadataClient, StreamTask}; use crate::{ errors::Result, metadata::{ @@ -152,6 +152,32 @@ async fn test_progress() -> Result<()> { Ok(()) } +#[test] +fn test_storage_checkpoint_of() { + let task_name = "simple_task"; + let store_id: u64 = 5; + let key = MetaKey::storage_checkpoint_of(task_name, store_id); + assert_eq!( + &key.0, + "/tidb/br-stream/storage-checkpoint/simple_task/5".as_bytes() + ); +} + +#[tokio::test] +async fn test_set_storage_checkpoint() -> Result<()> { + let cli = test_meta_cli(); + let task = simple_task("simple_3"); + let storage_checkpoint_ts: u64 = 12345; + + // set storage checkpoint to metadata + cli.set_storage_checkpoint(task.info.get_name(), storage_checkpoint_ts) + .await?; + // get storage checkpoint from metadata + let ts = cli.get_storage_checkpoint(task.info.get_name()).await?; + assert_eq!(ts.into_inner(), storage_checkpoint_ts); + Ok(()) +} + #[tokio::test] async fn test_init() -> Result<()> { let cli = test_meta_cli(); diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 1ad4c4ad4ca..3e29592a9f4 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -569,11 +569,11 @@ impl RouterInner { task_name: &str, global_checkpoint: u64, store_id: u64, - ) -> Result<()> { - let t = self.get_task_info(task_name).await?; - t.update_global_checkpoint(global_checkpoint, store_id) - .await?; - Ok(()) + ) -> Result { + self.get_task_info(task_name) + .await? + .update_global_checkpoint(global_checkpoint, store_id) + .await } /// tick aims to flush log/meta to extern storage periodically. @@ -1122,7 +1122,7 @@ impl StreamTaskInfo { &self, global_checkpoint: u64, store_id: u64, - ) -> Result<()> { + ) -> Result { let last_global_checkpoint = self.global_checkpoint_ts.load(Ordering::SeqCst); if last_global_checkpoint < global_checkpoint { let r = self.global_checkpoint_ts.compare_exchange( @@ -1133,9 +1133,10 @@ impl StreamTaskInfo { ); if r.is_ok() { self.flush_global_checkpoint(store_id).await?; + return Ok(true); } } - Ok(()) + Ok(false) } } @@ -2014,7 +2015,7 @@ mod tests { } #[tokio::test] - async fn test_update_global_checkpoint() { + async fn test_update_global_checkpoint() -> Result<()> { // create local storage let tmp_dir = tempfile::tempdir().unwrap(); let backend = external_storage_export::make_local_backend(tmp_dir.path()); @@ -2039,18 +2040,18 @@ mod tests { // test no need to update global checkpoint let store_id = 3; let mut global_checkpoint = 10000; - let r = task + let is_updated = task .update_global_checkpoint(global_checkpoint, store_id) - .await; - assert_eq!(r.is_ok(), true); + .await?; + assert_eq!(is_updated, false); assert_eq!(task.global_checkpoint_ts.load(Ordering::SeqCst), 10001); // test update global checkpoint global_checkpoint = 10002; - let r = task + let is_updated = task .update_global_checkpoint(global_checkpoint, store_id) - .await; - assert_eq!(r.is_ok(), true); + .await?; + assert_eq!(is_updated, true); assert_eq!( task.global_checkpoint_ts.load(Ordering::SeqCst), global_checkpoint @@ -2067,5 +2068,6 @@ mod tests { ts.copy_from_slice(&buff); let ts = u64::from_le_bytes(ts); assert_eq!(ts, global_checkpoint); + Ok(()) } } diff --git a/components/external_storage/src/local.rs b/components/external_storage/src/local.rs index 3e307dca157..00cb42cf1a6 100644 --- a/components/external_storage/src/local.rs +++ b/components/external_storage/src/local.rs @@ -100,12 +100,11 @@ impl ExternalStorage for LocalStorage { } })?; } - // Sanitize check, do not save file if it is already exist. + + // Because s3 could support writing(put_object) a existed object. + // For the interface consistent with s3, local storage need also support write a existed file. if fs::metadata(self.base.join(name)).await.is_ok() { - return Err(io::Error::new( - io::ErrorKind::AlreadyExists, - format!("[{}] is already exists in {}", name, self.base.display()), - )); + info!("[{}] is already exists in {}", name, self.base.display()); } let tmp_path = self.tmp_path(Path::new(name)); let mut tmp_f = File::create(&tmp_path).await?; @@ -215,4 +214,29 @@ mod tests { fn test_url_of_backend() { assert_eq!(url_for(Path::new("/tmp/a")).to_string(), "local:///tmp/a"); } + + #[tokio::test] + async fn test_write_existed_file() { + let temp_dir = Builder::new().tempdir().unwrap(); + let path = temp_dir.path(); + let ls = LocalStorage::new(path).unwrap(); + + let filename = "existed.file"; + let buf1: &[u8] = b"pingcap"; + let buf2: &[u8] = b"tikv"; + let r = ls + .write(filename, UnpinReader(Box::new(buf1)), buf1.len() as _) + .await; + assert!(r.is_ok()); + let r = ls + .write(filename, UnpinReader(Box::new(buf2)), buf2.len() as _) + .await; + assert!(r.is_ok()); + + let mut read_buff: Vec = Vec::new(); + let r = ls.read(filename).read_to_end(&mut read_buff).await; + assert!(r.is_ok()); + assert_eq!(read_buff.len(), 4); + assert_eq!(&read_buff, buf2); + } } From 9b3a669a97c39e7603851c0b5c85754c5d2d0cf7 Mon Sep 17 00:00:00 2001 From: Jiarui Li <34512395+Willendless@users.noreply.github.com> Date: Wed, 20 Jul 2022 23:09:08 -0400 Subject: [PATCH 0100/1149] duration: keep duration parser compatible with tidb (#13031) close tikv/tikv#12932, ref tikv/tikv#35455 Signed-off-by: Willendless <317500141@qq.com> Co-authored-by: Liqi Geng --- .../src/codec/mysql/duration.rs | 88 ++++++++++++++----- 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 997983c2e49..e151c8fd0c5 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -150,28 +150,35 @@ mod parser { Ok((rest, hhmmss)) } - fn hhmmss_datetime<'a>( - ctx: &mut EvalContext, - input: &'a str, - fsp: u8, - ) -> IResult<&'a str, Duration, ()> { + /// A string can match datetime format only if it starts with a series of digits + /// whose length matches the full format of DateTime literal (12, 14) + /// or the string starts with a date literal. + fn format_can_match_datetime(input: &str) -> IResult<(), (), ()> { let (rest, digits) = digit1(input)?; + if digits.len() == 12 || digits.len() == 14 { - let datetime = DateTime::parse_datetime(ctx, input, fsp as i8, true) - .map_err(|_| nom::Err::Error(()))?; - return Ok(("", datetime.convert(ctx).map_err(|_| nom::Err::Error(()))?)); + return Ok(((), ())); } + let (rest, _) = anysep(rest)?; let (rest, _) = digit1(rest)?; let (rest, _) = anysep(rest)?; let (rest, _) = digit1(rest)?; - let has_datetime_sep = matches!(rest.chars().next(), Some(c) if c == 'T' || c == ' '); - - if !has_datetime_sep { - return Err(nom::Err::Error(())); + if matches!(rest.chars().next(), Some(c) if c == 'T' || c == ' ') { + Ok(((), ())) + } else { + Err(nom::Err::Error(())) } + } + /// Caller should make sure the input string can match datetime format + /// according to `format_can_match_datetime`. + fn hhmmss_datetime<'a>( + ctx: &mut EvalContext, + input: &'a str, + fsp: u8, + ) -> IResult<&'a str, Duration, ()> { let datetime = DateTime::parse_datetime(ctx, input, fsp as i8, true) .map_err(|_| nom::Err::Error(()))?; Ok(("", datetime.convert(ctx).map_err(|_| nom::Err::Error(()))?)) @@ -208,16 +215,21 @@ mod parser { ctx: &mut EvalContext, input: &str, fsp: u8, - fallback_to_daytime: bool, + fallback_to_datetime: bool, overflow_as_null: bool, ) -> Option { let input = input.trim(); if input.is_empty() { - return Some(Duration::zero()); + return None; } let (rest, neg) = negative(input).ok()?; let (rest, _) = space0::<_, ()>(rest).ok()?; + + let chars_len = rest.len(); + let mut truncated_parse = false; + let fallback_to_datetime = fallback_to_datetime && format_can_match_datetime(rest).is_ok(); + let duration = day_hhmmss(rest) .ok() .and_then(|(rest, (day, [hh, mm, ss]))| { @@ -230,7 +242,10 @@ mod parser { let (rest, frac) = fraction(rest, fsp).ok()?; if !rest.is_empty() { - return None; + if chars_len >= 12 { + return None; + } + truncated_parse = true; } Some(Duration::new_from_parts( @@ -238,8 +253,17 @@ mod parser { )) }); + // In order to keep compatible with TiDB, when input string can only be partially parsed by `hhmmss_compact` + // and it can match the datetime format, we fallback to parse it using datetime format. + if truncated_parse && fallback_to_datetime { + return hhmmss_datetime(ctx, rest, fsp).map_or(None, |(_, duration)| Some(duration)); + } + match duration { - Some(Ok(duration)) => Some(duration), + Some(Ok(duration)) => { + let _ = ctx.handle_truncate(truncated_parse); + Some(duration) + } Some(Err(err)) if err.is_overflow() => { if overflow_as_null { return None; @@ -249,7 +273,7 @@ mod parser { Some(Duration { nanos, fsp }) }) } - None if fallback_to_daytime => { + None if fallback_to_datetime => { hhmmss_datetime(ctx, rest, fsp).map_or(None, |(_, duration)| Some(duration)) } _ => None, @@ -809,7 +833,8 @@ mod tests { ("2011-11-11 00:00:01", 0, Some("00:00:01")), ("20111111000001", 0, Some("00:00:01")), ("201112110102", 0, Some("11:01:02")), - ("2011-11-11", 0, None), + ("2011-11-11", 0, Some("00:20:11")), + ("2012-08-x", 0, Some("00:20:12")), ("--23", 0, None), ("232 10", 0, None), ("-232 10", 0, None), @@ -818,7 +843,24 @@ mod tests { ("00:00:00.777777", 2, Some("00:00:00.78")), ("00:00:00.777777", 6, Some("00:00:00.777777")), ("00:00:00.001", 3, Some("00:00:00.001")), + ("0x", 6, Some("00:00:00.000000")), + ("1x", 6, Some("00:00:01.000000")), + ("0000-00-00", 6, Some("00:00:00.000000")), // NOTE: The following case is easy to fail. + ("0000-00-00", 0, Some("00:00:00")), + ("1234abc", 0, Some("00:12:34")), + ("1234x", 0, Some("00:12:34")), + ("1234xxxxxxx", 0, Some("00:12:34")), + ("1234xxxxxxxx", 0, None), + ("-1234xxxxxxx", 0, Some("-00:12:34")), + ("-1234xxxxxxxx", 0, None), + ("1-----", 0, Some("00:00:01")), + ("20100000-02-12", 0, None), + ("20100-02-12", 0, Some("02:01:00")), + ("99999-99-99", 0, None), + ("99990000", 0, None), + ("0000-00-00", 0, Some("00:00:00")), + ("00-00-00", 0, Some("00:00:00")), ("- 1 ", 0, Some("-00:00:01")), ("1:2:3", 0, Some("01:02:03")), ("1 1:2:3", 0, Some("25:02:03")), @@ -835,8 +877,9 @@ mod tests { (" - 1 : 2 : 3 .123 ", 3, Some("-01:02:03.123")), (" - 1 .123 ", 3, Some("-00:00:01.123")), ("-", 0, None), + ("a", 0, None), ("- .1", 0, None), - ("", 0, Some("00:00:00")), + ("", 0, None), ("", 7, None), ("1.1", 1, Some("00:00:01.1")), ("-1.1", 1, Some("-00:00:01.1")), @@ -846,13 +889,13 @@ mod tests { ("4294967295 0:59:59", 0, None), ("4294967295 232:59:59", 0, None), ("-4294967295 232:59:59", 0, None), - ("1::2:3", 0, None), - ("1.23 3", 0, None), + ("1::2:3", 0, Some("00:00:01")), + ("1.23 3", 0, Some("00:00:01")), ("1:62:3", 0, None), ("1:02:63", 0, None), ("-231342080", 0, None), + ("2010-02-12", 0, Some("00:20:10")), // test fallback to datetime - ("2010-02-12", 0, None), ("2010-02-12t12:23:34", 0, None), ("2010-02-12T12:23:34", 0, Some("12:23:34")), ("2010-02-12 12:23:34", 0, Some("12:23:34")), @@ -871,6 +914,7 @@ mod tests { let cases: Vec<(&str, i8, Option<&'static str>, bool)> = vec![ ("-790822912", 0, None, true), ("-790822912", 0, Some("-838:59:59"), false), + ("99990000", 0, Some("838:59:59"), false), ]; for (input, fsp, expect, return_null) in cases { From 5fa87491244fa0356ec06b4d9681fac14b83ac79 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Thu, 21 Jul 2022 11:37:09 +0800 Subject: [PATCH 0101/1149] log-backup: modify the config (#13023) ref tikv/tikv#12895 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 8 +++++--- etc/config-template.toml | 2 +- src/config.rs | 12 +++++------- tests/integrations/config/mod.rs | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index b4c49ea892a..2defb88b541 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -111,14 +111,14 @@ where concurrency_manager: ConcurrencyManager, ) -> Self { crate::metrics::STREAM_ENABLED.inc(); - let pool = create_tokio_runtime(config.io_threads, "backup-stream") + let pool = create_tokio_runtime((config.num_threads / 2).max(1), "backup-stream") .expect("failed to create tokio runtime for backup stream worker."); let meta_client = MetadataClient::new(store, store_id); let range_router = Router::new( PathBuf::from(config.temp_path.clone()), scheduler.clone(), - config.temp_file_size_limit_per_task.0, + config.file_size_limit.0, config.max_flush_interval.0, ); @@ -159,7 +159,7 @@ where observer.clone(), meta_client.clone(), pd_client.clone(), - config.num_threads, + ((config.num_threads + 1) / 2).max(1), ); pool.spawn(op_loop); Endpoint { @@ -935,6 +935,8 @@ where /// Create a standard tokio runtime /// (which allows io and time reactor, involve thread memory accessor), fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult { + info!("create tokio runtime for backup stream"; "thread_name" => thread_name, "thread-count" => thread_count); + tokio::runtime::Builder::new_multi_thread() .thread_name(thread_name) // Maybe make it more configurable? diff --git a/etc/config-template.toml b/etc/config-template.toml index a19533b7847..795a82f371c 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -1189,7 +1189,7 @@ [log-backup] ## Number of threads to perform backup stream tasks. -## The default value is set to min(CPU_NUM * 0.5, 8). +## The default value is CPU_NUM * 0.5, and limited to [2, 12]. # num-threads = 8 ## enable this feature. TiKV will starts watch related tasks in PD. and backup kv changes to storage accodring to task. diff --git a/src/config.rs b/src/config.rs index 1f64f53dc58..6ae622bd806 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2450,17 +2450,16 @@ pub struct BackupStreamConfig { #[online_config(skip)] pub num_threads: usize, #[online_config(skip)] - pub io_threads: usize, - #[online_config(skip)] pub enable: bool, #[online_config(skip)] pub temp_path: String, #[online_config(skip)] - pub temp_file_size_limit_per_task: ReadableSize, + pub file_size_limit: ReadableSize, #[online_config(skip)] pub initial_scan_pending_memory_quota: ReadableSize, #[online_config(skip)] pub initial_scan_rate_limit: ReadableSize, + #[serde(skip)] #[online_config(skip)] pub use_checkpoint_v3: bool, } @@ -2488,12 +2487,11 @@ impl Default for BackupStreamConfig { Self { max_flush_interval: ReadableDuration::minutes(5), // use at most 50% of vCPU by default - num_threads: (cpu_num * 0.5).clamp(1.0, 8.0) as usize, - io_threads: 2, + num_threads: (cpu_num * 0.5).clamp(2.0, 12.0) as usize, enable: false, // TODO: may be use raft store directory temp_path: String::new(), - temp_file_size_limit_per_task: ReadableSize::mb(128), + file_size_limit: ReadableSize::mb(256), initial_scan_pending_memory_quota: ReadableSize(quota_size as _), initial_scan_rate_limit: ReadableSize::mb(60), use_checkpoint_v3: true, @@ -3101,7 +3099,7 @@ impl TiKvConfig { if self.backup_stream.temp_path.is_empty() { self.backup_stream.temp_path = - config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-tmp")?; + config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; } self.rocksdb.validate()?; diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d66ea96fb3b..8c1be52be78 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -748,7 +748,7 @@ fn test_serde_custom_tikv_config() { ..Default::default() }; value.backup_stream = BackupStreamConfig { - num_threads: 8, + num_threads: 12, ..Default::default() }; value.import = ImportConfig { From 92b223cfd4195d857219bac02d6a96f4ce4c03e3 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 21 Jul 2022 12:05:10 +0800 Subject: [PATCH 0102/1149] log: optimize log filter (#13080) ref tikv/tikv#12986 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- components/tikv_util/src/logger/mod.rs | 85 ++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 13 deletions(-) diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index f4fd936cddc..35bf5f4c8e0 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -83,7 +83,7 @@ where threshold: slow_threshold, inner: drain, }; - let filtered = drain.filter(filter).fuse(); + let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); (slog::Logger::root(filtered, slog_o!()), Some(guard)) } else { @@ -92,7 +92,7 @@ where threshold: slow_threshold, inner: drain, }; - let filtered = drain.filter(filter).fuse(); + let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); (slog::Logger::root(filtered, slog_o!()), None) }; @@ -407,17 +407,15 @@ where type Err = slog::Never; fn log(&self, record: &Record<'_>, values: &OwnedKVList) -> Result { - if record.level().as_usize() <= LOG_LEVEL.load(Ordering::Relaxed) { - if let Err(e) = self.0.log(record, values) { - let fatal_drainer = Mutex::new(text_format(term_writer(), true)).ignore_res(); - fatal_drainer.log(record, values).unwrap(); - let fatal_logger = slog::Logger::root(fatal_drainer, slog_o!()); - slog::slog_crit!( - fatal_logger, - "logger encountered error"; - "err" => %e, - ); - } + if let Err(e) = self.0.log(record, values) { + let fatal_drainer = Mutex::new(text_format(term_writer(), true)).ignore_res(); + fatal_drainer.log(record, values).unwrap(); + let fatal_logger = slog::Logger::root(fatal_drainer, slog_o!()); + slog::slog_crit!( + fatal_logger, + "logger encountered error"; + "err" => %e, + ); } Ok(()) } @@ -452,6 +450,36 @@ where } } +// GlobalLevelFilter is a filter that base on the global `LOG_LEVEL`'s value. +pub struct GlobalLevelFilter(pub D); + +impl GlobalLevelFilter { + /// Create `LevelFilter` + pub fn new(drain: D) -> Self { + Self(drain) + } +} + +impl Drain for GlobalLevelFilter +where + D: Drain, + D::Ok: Default, +{ + type Ok = D::Ok; + type Err = D::Err; + fn log(&self, record: &Record<'_>, logger_values: &OwnedKVList) -> Result { + if record.level().as_usize() <= LOG_LEVEL.load(Ordering::Relaxed) { + self.0.log(record, logger_values) + } else { + Ok(Default::default()) + } + } + #[inline] + fn is_enabled(&self, level: Level) -> bool { + level.as_usize() <= LOG_LEVEL.load(Ordering::Relaxed) && self.0.is_enabled(level) + } +} + struct SlowCostSerializer { // None means input record without key `takes` cost: Option, @@ -821,6 +849,37 @@ mod tests { }); } + #[test] + fn test_global_level_filter() { + let decorator = PlainSyncDecorator::new(TestWriter); + let drain = TikvFormat::new(decorator, true).fuse(); + let logger = + slog::Logger::root_typed(GlobalLevelFilter::new(drain), slog_o!()).into_erased(); + + let expected = "[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:871] [Welcome]\n"; + let check_log = |log: &str| { + BUFFER.with(|buffer| { + let mut buffer = buffer.borrow_mut(); + let output = from_utf8(&*buffer).unwrap(); + // only check the log len here as some field like timestamp, location may change. + assert_eq!(output.len(), log.len()); + buffer.clear(); + }); + }; + + set_log_level(Level::Info); + slog_info!(logger, "Welcome"); + check_log(expected); + + set_log_level(Level::Warning); + slog_info!(logger, "Welcome"); + check_log(""); + + set_log_level(Level::Info); + slog_info!(logger, "Welcome"); + check_log(expected); + } + /// Removes the wrapping signs, peels `"[hello]"` to `"hello"`, or peels `"(hello)"` to `"hello"`, fn peel(output: &str) -> &str { assert!(output.len() >= 2); From 43f5f7ed2e5dff43d22e8be9e59fda2558c76597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 21 Jul 2022 14:17:09 +0800 Subject: [PATCH 0103/1149] log-backup: fixed bug of initial scanning rate limit doesn't take effect (#13069) close tikv/tikv#13068 Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- components/backup-stream/src/event_loader.rs | 74 ++++++++++++++++---- components/backup-stream/src/metrics.rs | 5 ++ components/backup-stream/src/utils.rs | 19 +++-- 3 files changed, 80 insertions(+), 18 deletions(-) diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 841f6ac75b6..40e0ab5c60b 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -92,8 +92,8 @@ impl EventLoader { let region_id = region.get_id(); let scanner = ScannerBuilder::new(snapshot, to_ts) .range( - Some(Key::from_encoded_slice(®ion.start_key)), - Some(Key::from_encoded_slice(®ion.end_key)), + (!region.start_key.is_empty()).then(|| Key::from_encoded_slice(®ion.start_key)), + (!region.end_key.is_empty()).then(|| Key::from_encoded_slice(®ion.end_key)), ) .hint_min_ts(Some(from_ts)) .fill_cache(false) @@ -123,7 +123,7 @@ impl EventLoader { /// Drain the internal buffer, converting them to the [`ApplyEvents`], /// and tracking the locks at the same time. - fn omit_entries_to( + fn emit_entries_to( &mut self, result: &mut ApplyEvents, resolver: &mut TwoPhaseResolver, @@ -178,19 +178,22 @@ impl EventLoader { /// The context for loading incremental data between range. /// Like [`cdc::Initializer`], but supports initialize over range. /// Note: maybe we can merge those two structures? +/// Note': maybe extract more fields to trait so it would be easier to test. #[derive(Clone)] pub struct InitialDataLoader { - pub(crate) router: RT, - pub(crate) regions: R, // Note: maybe we can make it an abstract thing like `EventSink` with // method `async (KvEvent) -> Result<()>`? pub(crate) sink: Router, pub(crate) tracing: SubscriptionTracer, pub(crate) scheduler: Scheduler, + // Note: this is only for `init_range`, maybe make it an argument? + pub(crate) regions: R, + // Note: Maybe move those fields about initial scanning into some trait? + pub(crate) router: RT, pub(crate) quota: PendingMemoryQuota, - pub(crate) handle: Handle, pub(crate) limit: Limiter, + pub(crate) handle: Handle, _engine: PhantomData, } @@ -381,14 +384,12 @@ where "{:?}", msg )))); let mut events = ApplyEvents::with_capacity(1024, region.id); - let stat = event_loader.fill_entries()?; - let disk_read = self.with_resolver(region, |r| { - let (result, byte_size) = utils::with_record_read_throughput(|| { - event_loader.omit_entries_to(&mut events, r) - }); - result?; - Result::Ok(byte_size) - })?; + // Note: the call of `fill_entries` is the only step which would read the disk. + // we only need to record the disk throughput of this. + let (stat, disk_read) = + utils::with_record_read_throughput(|| event_loader.fill_entries()); + let stat = stat?; + self.with_resolver(region, |r| event_loader.emit_entries_to(&mut events, r))?; if events.is_empty() { metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); return Ok(stats.stat); @@ -402,6 +403,7 @@ where self.limit.blocking_consume(disk_read as _); debug!("sending events to router"; "size" => %event_size, "region" => %region_id); metrics::INCREMENTAL_SCAN_SIZE.observe(event_size as f64); + metrics::INCREMENTAL_SCAN_DISK_READ.inc_by(disk_read as f64); metrics::HEAP_MEMORY.add(event_size as _); join_handles.push(tokio::spawn(async move { utils::handle_on_event_result(&sched, sink.on_events(events).await); @@ -470,3 +472,47 @@ where Ok(()) } } + +#[cfg(test)] +mod tests { + use kvproto::metapb::*; + use tikv::storage::{txn::tests::*, Engine, TestEngineBuilder}; + use txn_types::TimeStamp; + + use super::EventLoader; + use crate::{ + router::ApplyEvents, subscription_track::TwoPhaseResolver, + utils::with_record_read_throughput, + }; + + #[test] + fn test_disk_read() { + let engine = TestEngineBuilder::new().build_without_cache().unwrap(); + for i in 0..100 { + let owned_key = format!("{:06}", i); + let key = owned_key.as_bytes(); + let owned_value = [i as u8; 512]; + let value = owned_value.as_slice(); + must_prewrite_put(&engine, key, value, key, i * 2); + must_commit(&engine, key, i * 2, i * 2 + 1); + } + // let compact the memtable to disk so we can see the disk read. + engine.get_rocksdb().as_inner().compact_range(None, None); + + let mut r = Region::new(); + r.set_id(42); + r.set_start_key(b"".to_vec()); + r.set_end_key(b"".to_vec()); + let snap = engine.snapshot_on_kv_engine(b"", b"").unwrap(); + let mut loader = + EventLoader::load_from(snap, TimeStamp::zero(), TimeStamp::max(), &r).unwrap(); + + let (r, data_load) = with_record_read_throughput(|| loader.fill_entries()); + r.unwrap(); + let mut events = ApplyEvents::with_capacity(1024, 42); + let mut res = TwoPhaseResolver::new(42, None); + loader.emit_entries_to(&mut events, &mut res).unwrap(); + assert_ne!(events.len(), 0); + assert_ne!(data_load, 0); + } +} diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index 24a044bb4fb..a94be6df7f6 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -56,6 +56,11 @@ lazy_static! { exponential_buckets(1.0, 2.0, 16).unwrap() ) .unwrap(); + pub static ref INCREMENTAL_SCAN_DISK_READ: Counter = register_counter!( + "tikv_log_backup_initial_scan_disk_read", + "The total count of disk read bytes." + ) + .unwrap(); pub static ref INCREMENTAL_SCAN_SIZE: Histogram = register_histogram!( "tikv_stream_incremental_scan_bytes", "The size of scanning.", diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 486ce6ae0f8..0f09e747b80 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -519,7 +519,19 @@ impl ReadThroughputRecorder { let ins = self.ins.as_ref()?; let begin = self.begin.as_ref()?; let end = ins.io_stat().ok()??; - Some(end.read - begin.read) + let bytes_read = end.read - begin.read; + // FIXME: In our test environment, there may be too many caches hence + // the `bytes_read` is always zero :( + // For now, we eject here and let rocksDB prove that we did read something + // When the proc think we don't touch the block device (even in fact we didn't). + // NOTE: In the real-world, we would accept the zero `bytes_read` value since the cache did exists. + #[cfg(test)] + if bytes_read == 0 { + // use println here so we can get this message even log doesn't enabled. + println!("ejecting in test since no read recorded in procfs"); + return None; + } + Some(bytes_read) } fn end(self) -> u64 { @@ -579,6 +591,8 @@ mod test { time::Duration, }; + use engine_rocks::raw::DBOptions; + use engine_traits::WriteOptions; use futures::executor::block_on; use crate::utils::{is_in_range, CallbackWaitGroup, SegmentMap}; @@ -720,8 +734,6 @@ mod test { } } - /// skip it currently. Test it at local env successfully but failed at pod. - #[cfg(FALSE)] #[test] fn test_recorder() { use engine_rocks::{raw::DB, RocksEngine}; @@ -731,7 +743,6 @@ mod test { let p = TempDir::new("test_db").unwrap(); let mut opt = DBOptions::default(); opt.create_if_missing(true); - opt.enable_multi_write_batch(true); let db = DB::open(opt.clone(), p.path().as_os_str().to_str().unwrap()).unwrap(); let engine = RocksEngine::from_db(Arc::new(db)); let mut wb = engine.write_batch(); From 1cc64cf4a1555efb28555ebaa6fc32b4918bdcce Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 21 Jul 2022 16:03:10 +0800 Subject: [PATCH 0104/1149] server: do not update background cpu limit when auto-tune is off (#13056) ref tikv/tikv#12679, close tikv/tikv#13055 ThreadTime is collected when the cpu limit in the QuotaLimiter is not infinity. #12679 updated the background cpu limit even if auto-tune is off, which is unnecessary. So, that causes additional cost of collecting thread CPU time in some critical paths. This commit sets cpu_time_limit of the QuotaLimiter only if auto-tune is enabled, so we don't waste effort to collect CPU time when auto-tune is not enabled. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- components/server/src/server.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 8eca26404d9..58a4dc61338 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1296,20 +1296,17 @@ impl TiKvServer { }; // Determine the base cpu quota - let base_cpu_quota = { + let base_cpu_quota = // if cpu quota is not specified, start from optimistic case if quota_limiter.cputime_limiter(false).is_infinite() { - let quota = 1000_f64 + 1000_f64 * f64::max( BACKGROUND_REQUEST_CORE_LOWER_BOUND, SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_DEFAULT_RATIO, - ); - quota_limiter.set_cpu_time_limit(quota as usize, false); - quota + ) } else { quota_limiter.cputime_limiter(false) / 1000_f64 - } - }; + }; // Calculate the celling and floor quota let celling_quota = f64::min( @@ -1326,7 +1323,12 @@ impl TiKvServer { DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL, move || { if quota_limiter.auto_tune_enabled() { - let old_quota = quota_limiter.cputime_limiter(false) / 1000_f64; + let cputime_limit = quota_limiter.cputime_limiter(false); + let old_quota = if cputime_limit.is_infinite() { + base_cpu_quota + } else { + cputime_limit / 1000_f64 + }; let cpu_usage = match proc_stats.cpu_usage() { Ok(r) => r, Err(_e) => 0.0, From 1f0a1a3451302647f4de7fc06fe323e0cba67b98 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 21 Jul 2022 16:43:10 +0800 Subject: [PATCH 0105/1149] raftstore: fix high commit log duration when adding new peer (#13078) close tikv/tikv#13077 When adding a new peer, `alive_cache_idx` would not consider the new peer still in applying snapshot. Then it may trigger compacting entry cache due to `alive_cache_idx` being equal to `applied_idx`. After the snapshot is applied, the log gap of new peer is not in entry cache, which triggers async fetch to read disk. Considering raft engine's read performance is not as good as rocksdb's, once there are a lot of Regions triggering async fetch, the process of replicating log to new peer would be slow. If there is a conf change promoting the learner and demoting another peer, the commit index can't be advanced in joint state because the to-be-learner peer doesn't catch up logs in time. Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- components/engine_panic/src/raft_engine.rs | 4 -- components/engine_rocks/src/raft_engine.rs | 4 -- components/engine_traits/src/raft_engine.rs | 8 --- components/raft_log_engine/src/engine.rs | 6 -- components/raftstore/src/store/fsm/peer.rs | 43 +++++++------ components/raftstore/src/store/peer.rs | 4 +- .../raftstore/src/store/peer_storage.rs | 62 ++++--------------- tests/failpoints/cases/test_async_fetch.rs | 36 +++++++++++ 8 files changed, 73 insertions(+), 94 deletions(-) diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index d6f82c7f646..2fffb544fe3 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -118,10 +118,6 @@ impl RaftEngine for PanicEngine { panic!() } - fn has_builtin_entry_cache(&self) -> bool { - panic!() - } - fn flush_metrics(&self, instance: &str) { panic!() } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 2f67904486f..607e0bfca17 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -312,10 +312,6 @@ impl RaftEngine for RocksEngine { Ok(vec![]) } - fn has_builtin_entry_cache(&self) -> bool { - false - } - fn flush_metrics(&self, instance: &str) { KvEngine::flush_metrics(self, instance) } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 7773ee3245f..58a78f605f9 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -119,14 +119,6 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send /// which needs to be compacted ASAP. fn purge_expired_files(&self) -> Result>; - /// The `RaftEngine` has a builtin entry cache or not. - fn has_builtin_entry_cache(&self) -> bool { - false - } - - /// GC the builtin entry cache. - fn gc_entry_cache(&self, _raft_group_id: u64, _to: u64) {} - fn flush_metrics(&self, _instance: &str) {} fn flush_stats(&self) -> Option { None diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index d2f8b7cb4e1..22d2d645165 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -548,12 +548,6 @@ impl RaftEngine for RaftLogEngine { self.0.purge_expired_files().map_err(transfer_error) } - fn has_builtin_entry_cache(&self) -> bool { - false - } - - fn gc_entry_cache(&self, _raft_group_id: u64, _to: u64) {} - /// Flush current cache stats. fn flush_stats(&self) -> Option { None diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index fad93ac54d8..f3bcd56eabf 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1916,7 +1916,7 @@ where ); if self.fsm.peer.pending_remove { - self.fsm.peer.mut_store().flush_cache_metrics(); + self.fsm.peer.mut_store().flush_entry_cache_metrics(); return; } // When having pending snapshot, if election timeout is met, it can't pass @@ -1983,7 +1983,7 @@ where } self.fsm.peer.post_raft_group_tick(); - self.fsm.peer.mut_store().flush_cache_metrics(); + self.fsm.peer.mut_store().flush_entry_cache_metrics(); // Keep ticking if there are still pending read requests or this node is within hibernate timeout. if res.is_none() /* hibernate_region is false */ || @@ -3560,7 +3560,7 @@ where let compact_to = state.get_index() + 1; self.fsm.peer.schedule_raftlog_gc(self.ctx, compact_to); self.fsm.peer.last_compacted_idx = compact_to; - self.fsm.peer.mut_store().compact_to(compact_to); + self.fsm.peer.mut_store().on_compact_raftlog(compact_to); } fn on_ready_split_region( @@ -4900,7 +4900,7 @@ where // snapshot generating has already been cancelled when the role becomes follower. return; } - if !self.fsm.peer.get_store().is_cache_empty() || !self.ctx.cfg.hibernate_regions { + if !self.fsm.peer.get_store().is_entry_cache_empty() || !self.ctx.cfg.hibernate_regions { self.register_raft_gc_log_tick(); } fail_point!("on_raft_log_gc_tick_1", self.fsm.peer_id() == 1, |_| {}); @@ -4930,21 +4930,26 @@ where // `alive_cache_idx` is only used to gc cache. let applied_idx = self.fsm.peer.get_store().applied_index(); let truncated_idx = self.fsm.peer.get_store().truncated_index(); + let first_idx = self.fsm.peer.get_store().first_index(); let last_idx = self.fsm.peer.get_store().last_index(); + let (mut replicated_idx, mut alive_cache_idx) = (last_idx, last_idx); for (peer_id, p) in self.fsm.peer.raft_group.raft.prs().iter() { if replicated_idx > p.matched { replicated_idx = p.matched; } if let Some(last_heartbeat) = self.fsm.peer.peer_heartbeats.get(peer_id) { - if alive_cache_idx > p.matched - && p.matched >= truncated_idx - && *last_heartbeat > cache_alive_limit - { - alive_cache_idx = p.matched; + if *last_heartbeat > cache_alive_limit { + if alive_cache_idx > p.matched && p.matched >= truncated_idx { + alive_cache_idx = p.matched; + } else if p.matched == 0 { + // the new peer is still applying snapshot, do not compact cache now + alive_cache_idx = 0; + } } } } + // When an election happened or a new peer is added, replicated_idx can be 0. if replicated_idx > 0 { assert!( @@ -4955,21 +4960,20 @@ where ); REGION_MAX_LOG_LAG.observe((last_idx - replicated_idx) as f64); } + + // leader may call `get_term()` on the latest replicated index, so compact + // entries before `alive_cache_idx` instead of `alive_cache_idx + 1`. self.fsm .peer .mut_store() - .maybe_gc_cache(alive_cache_idx, applied_idx); + .compact_entry_cache(std::cmp::min(alive_cache_idx, applied_idx + 1)); if needs_evict_entry_cache(self.ctx.cfg.evict_cache_on_memory_ratio) { - self.fsm.peer.mut_store().evict_cache(true); - if !self.fsm.peer.get_store().cache_is_empty() { + self.fsm.peer.mut_store().evict_entry_cache(true); + if !self.fsm.peer.get_store().is_entry_cache_empty() { self.register_entry_cache_evict_tick(); } } - let mut total_gc_logs = 0; - - let first_idx = self.fsm.peer.get_store().first_index(); - let mut compact_idx = if force_compact && replicated_idx > first_idx { replicated_idx } else if (applied_idx > first_idx @@ -5007,7 +5011,6 @@ where .compact_idx_too_small += 1; return; } - total_gc_logs += compact_idx - first_idx; // Create a compact log request and notify directly. let region_id = self.fsm.peer.region().get_id(); @@ -5022,7 +5025,7 @@ where self.fsm.skip_gc_raft_log_ticks = 0; self.register_raft_gc_log_tick(); - PEER_GC_RAFT_LOG_COUNTER.inc_by(total_gc_logs); + PEER_GC_RAFT_LOG_COUNTER.inc_by(compact_idx - first_idx); } fn register_entry_cache_evict_tick(&mut self) { @@ -5032,11 +5035,11 @@ where fn on_entry_cache_evict_tick(&mut self) { fail_point!("on_entry_cache_evict_tick", |_| {}); if needs_evict_entry_cache(self.ctx.cfg.evict_cache_on_memory_ratio) { - self.fsm.peer.mut_store().evict_cache(true); + self.fsm.peer.mut_store().evict_entry_cache(true); } let mut _usage = 0; if memory_usage_reaches_high_water(&mut _usage) - && !self.fsm.peer.get_store().cache_is_empty() + && !self.fsm.peer.get_store().is_entry_cache_empty() { self.register_entry_cache_evict_tick(); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 489db8b9600..1a7954ca037 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2778,7 +2778,7 @@ where .trace_cached_entries(apply.entries[0].clone()); if needs_evict_entry_cache(ctx.cfg.evict_cache_on_memory_ratio) { // Compact all cached entries instead of half evict. - self.mut_store().evict_cache(false); + self.mut_store().evict_entry_cache(false); } ctx.apply_router .schedule_task(self.region_id, ApplyTask::apply(apply)); @@ -3156,7 +3156,7 @@ where if !self.is_leader() { self.mut_store() - .compact_cache_to(apply_state.applied_index + 1); + .compact_entry_cache(apply_state.applied_index + 1); } let progress_to_be_updated = self.mut_store().applied_index_term() != applied_index_term; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index a6208b09f9e..58e35ff9084 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1383,50 +1383,23 @@ where self.last_term = last_term; } - pub fn compact_to(&mut self, idx: u64) { - self.compact_cache_to(idx); - + pub fn on_compact_raftlog(&mut self, idx: u64) { + self.compact_entry_cache(idx); self.cancel_generating_snap(Some(idx)); } - pub fn compact_cache_to(&mut self, idx: u64) { + pub fn compact_entry_cache(&mut self, idx: u64) { self.cache.compact_to(idx); - let rid = self.get_region_id(); - if self.engines.raft.has_builtin_entry_cache() { - self.engines.raft.gc_entry_cache(rid, idx); - } } #[inline] - pub fn is_cache_empty(&self) -> bool { + pub fn is_entry_cache_empty(&self) -> bool { self.cache.is_empty() } - pub fn maybe_gc_cache(&mut self, replicated_idx: u64, apply_idx: u64) { - if self.engines.raft.has_builtin_entry_cache() { - let rid = self.get_region_id(); - self.engines.raft.gc_entry_cache(rid, apply_idx + 1); - } - if replicated_idx == apply_idx { - // The region is inactive, clear the cache immediately. - self.cache.compact_to(apply_idx + 1); - return; - } - let cache_first_idx = match self.cache.first_index() { - None => return, - Some(idx) => idx, - }; - if cache_first_idx > replicated_idx + 1 { - // Catching up log requires accessing fs already, let's optimize for - // the common case. - // Maybe gc to second least replicated_idx is better. - self.cache.compact_to(apply_idx + 1); - } - } - /// Evict entries from the cache. - pub fn evict_cache(&mut self, half: bool) { - if !self.cache.cache.is_empty() { + pub fn evict_entry_cache(&mut self, half: bool) { + if !self.is_entry_cache_empty() { let cache = &mut self.cache; let cache_len = cache.cache.len(); let drain_to = if half { cache_len / 2 } else { cache_len - 1 }; @@ -1436,22 +1409,11 @@ where } } - pub fn cache_is_empty(&self) -> bool { - self.cache.cache.is_empty() - } - #[inline] - pub fn flush_cache_metrics(&mut self) { + pub fn flush_entry_cache_metrics(&mut self) { // NOTE: memory usage of entry cache is flushed realtime. self.cache.flush_stats(); self.raftlog_fetch_stats.flush_stats(); - if self.engines.raft.has_builtin_entry_cache() { - if let Some(stats) = self.engines.raft.flush_stats() { - RAFT_ENTRIES_CACHES_GAUGE.set(stats.cache_size as i64); - RAFT_ENTRY_FETCHES.hit.inc_by(stats.hit as u64); - RAFT_ENTRY_FETCHES.miss.inc_by(stats.miss as u64); - } - } } // Apply the peer with given snapshot. @@ -2457,7 +2419,7 @@ mod tests { router, store.engines.raft.clone(), )); - store.compact_cache_to(5); + store.compact_entry_cache(5); let mut e = store.entries(lo, hi, maxsize, GetEntriesContext::empty(true)); if e == Err(raft::Error::Store( raft::StorageError::LogTemporarilyUnavailable, @@ -3147,20 +3109,20 @@ mod tests { // compact to min(5 + 1, 7) store.cache.persisted = 5; - store.compact_to(7); + store.compact_entry_cache(7); exp_res = vec![new_entry(6, 7), new_entry(7, 8)]; validate_cache(&store, &exp_res); // compact to min(7 + 1, 7) store.cache.persisted = 7; - store.compact_to(7); + store.compact_entry_cache(7); exp_res = vec![new_entry(7, 8)]; validate_cache(&store, &exp_res); // compact all - store.compact_to(8); + store.compact_entry_cache(8); validate_cache(&store, &[]); // invalid compaction should be ignored. - store.compact_to(6); + store.compact_entry_cache(6); } #[test] diff --git a/tests/failpoints/cases/test_async_fetch.rs b/tests/failpoints/cases/test_async_fetch.rs index 28df1dba891..c6b8a693085 100644 --- a/tests/failpoints/cases/test_async_fetch.rs +++ b/tests/failpoints/cases/test_async_fetch.rs @@ -234,3 +234,39 @@ fn test_node_async_fetch_leader_change() { must_get_equal(&cluster.get_engine(1), &k, &v); } } + +// Test the case whether entry cache is reserved for the newly added peer. +#[test] +fn test_node_compact_entry_cache() { + let count = 5; + let mut cluster = new_node_cluster(0, count); + cluster.pd_client.disable_default_operator(); + + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_log_reserve_max_ticks = 2; + cluster.run(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_put(b"k0", b"v0"); + cluster.pd_client.must_remove_peer(1, new_peer(5, 5)); + + // pause snapshot applied + fail::cfg("before_region_gen_snap", "pause").unwrap(); + fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); + // change one peer to learner + cluster.pd_client.add_peer(1, new_learner_peer(5, 5)); + + // cause log lag and pause async fetch to check if entry cache is reserved for the learner + for i in 1..6 { + let k = i.to_string().into_bytes(); + let v = k.clone(); + cluster.must_put(&k, &v); + } + std::thread::sleep(Duration::from_millis(100)); + + fail::remove("before_region_gen_snap"); + cluster.pd_client.must_have_peer(1, new_learner_peer(5, 5)); + + // if entry cache is not reserved, the learner will not be able to catch up. + must_get_equal(&cluster.get_engine(5), b"5", b"5"); +} From dc7c48d1731e079ae0949694f88858554982136e Mon Sep 17 00:00:00 2001 From: BornChanger <97348524+BornChanger@users.noreply.github.com> Date: Thu, 21 Jul 2022 19:55:10 +0800 Subject: [PATCH 0106/1149] components, src: avoid cpu quota limitation contamination (#13085) close tikv/tikv#13084 Signed-off-by: BornChanger Co-authored-by: Ti Chi Robot --- components/tidb_query_executors/src/runner.rs | 2 +- components/tikv_util/src/quota_limiter.rs | 67 ++++++++++--------- src/config.rs | 12 ++-- src/coprocessor/statistics/analyze.rs | 10 +-- src/storage/mod.rs | 4 +- src/storage/txn/scheduler.rs | 2 +- 6 files changed, 50 insertions(+), 47 deletions(-) diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index dc88c1f6993..4a8a3a02851 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -453,7 +453,7 @@ impl BatchExecutorsRunner { let mut chunk = Chunk::default(); - let mut sample = self.quota_limiter.new_sample(); + let mut sample = self.quota_limiter.new_sample(true); let (drained, record_len) = { let _guard = sample.observe_cpu(); self.internal_handle_request( diff --git a/components/tikv_util/src/quota_limiter.rs b/components/tikv_util/src/quota_limiter.rs index c9a761f49de..f382964c4d1 100644 --- a/components/tikv_util/src/quota_limiter.rs +++ b/components/tikv_util/src/quota_limiter.rs @@ -234,21 +234,24 @@ impl QuotaLimiter { } // To generate a sampler. - pub fn new_sample(&self) -> Sample { + pub fn new_sample(&self, is_foreground: bool) -> Sample { Sample { read_bytes: 0, write_bytes: 0, cpu_time: Duration::ZERO, - enable_cpu_limit: !self - .foreground_limiters - .cputime_limiter - .speed_limit() - .is_infinite() - || !self + enable_cpu_limit: if is_foreground { + !self + .foreground_limiters + .cputime_limiter + .speed_limit() + .is_infinite() + } else { + !self .background_limiters .cputime_limiter .speed_limit() - .is_infinite(), + .is_infinite() + }, } } @@ -389,25 +392,25 @@ mod tests { ); }; - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_cpu_time(Duration::from_millis(60)); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::ZERO); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_cpu_time(Duration::from_millis(50)); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::from_millis(110)); std::thread::sleep(Duration::from_millis(10)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_cpu_time(Duration::from_millis(20)); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); // should less 60+50+20 assert!(should_delay < Duration::from_millis(130)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_cpu_time(Duration::from_millis(200)); sample.add_write_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); @@ -417,25 +420,25 @@ mod tests { assert!(thread_start_time.elapsed() < Duration::from_millis(50)); quota_limiter.set_cpu_time_limit(2000, true); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_cpu_time(Duration::from_millis(200)); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::from_millis(100)); quota_limiter.set_read_bandwidth_limit(ReadableSize(512), true); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_read_bytes(128); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::from_millis(250)); quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(2), true); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_write_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::from_millis(125)); quota_limiter.set_max_delay_duration(ReadableDuration::millis(40)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_read_bytes(256); sample.add_write_bytes(512); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); @@ -443,19 +446,19 @@ mod tests { // test change limiter to 0 quota_limiter.set_cpu_time_limit(0, true); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_cpu_time(Duration::from_millis(100)); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::ZERO); quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(0), true); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_write_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::ZERO); quota_limiter.set_read_bandwidth_limit(ReadableSize::kb(0), true); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_read_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::ZERO); @@ -463,30 +466,30 @@ mod tests { // set bandwidth back quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(1), true); quota_limiter.set_max_delay_duration(ReadableDuration::millis(0)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_write_bytes(128); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); check_duration(should_delay, Duration::from_millis(125)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_cpu_time(Duration::from_millis(60)); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_cpu_time(Duration::from_millis(50)); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(110)); std::thread::sleep(Duration::from_millis(10)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_cpu_time(Duration::from_millis(20)); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); // should less 60+50+20 assert!(should_delay < Duration::from_millis(130)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_cpu_time(Duration::from_millis(200)); sample.add_write_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); @@ -496,25 +499,25 @@ mod tests { assert!(thread_start_time.elapsed() < Duration::from_millis(50)); quota_limiter.set_cpu_time_limit(2000, false); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_cpu_time(Duration::from_millis(200)); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(100)); quota_limiter.set_read_bandwidth_limit(ReadableSize(512), false); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_read_bytes(128); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(250)); quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(2), false); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_write_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(125)); quota_limiter.set_max_delay_duration(ReadableDuration::millis(40)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_read_bytes(256); sample.add_write_bytes(512); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); @@ -522,19 +525,19 @@ mod tests { // test change limiter to 0 quota_limiter.set_cpu_time_limit(0, false); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_cpu_time(Duration::from_millis(100)); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(0), false); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_write_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); quota_limiter.set_read_bandwidth_limit(ReadableSize::kb(0), false); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_read_bytes(256); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::ZERO); @@ -542,7 +545,7 @@ mod tests { // set bandwidth back quota_limiter.set_write_bandwidth_limit(ReadableSize::kb(1), false); quota_limiter.set_max_delay_duration(ReadableDuration::millis(0)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_write_bytes(128); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); check_duration(should_delay, Duration::from_millis(125)); diff --git a/src/config.rs b/src/config.rs index 6ae622bd806..ebf1e132777 100644 --- a/src/config.rs +++ b/src/config.rs @@ -4929,7 +4929,7 @@ mod tests { cfg.quota.foreground_write_bandwidth = ReadableSize::mb(256); assert_eq!(cfg_controller.get_current(), cfg); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_read_bytes(ReadableSize::mb(32).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); assert_eq!(should_delay, Duration::from_millis(125)); @@ -4939,7 +4939,7 @@ mod tests { .unwrap(); cfg.quota.foreground_read_bandwidth = ReadableSize::mb(512); assert_eq!(cfg_controller.get_current(), cfg); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); assert_eq!(should_delay, Duration::from_millis(500)); @@ -4956,7 +4956,7 @@ mod tests { cfg.quota.background_write_bandwidth = ReadableSize::mb(256); assert_eq!(cfg_controller.get_current(), cfg); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_read_bytes(ReadableSize::mb(32).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); assert_eq!(should_delay, Duration::from_millis(125)); @@ -4966,7 +4966,7 @@ mod tests { .unwrap(); cfg.quota.background_read_bandwidth = ReadableSize::mb(512); assert_eq!(cfg_controller.get_current(), cfg); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); assert_eq!(should_delay, Duration::from_millis(500)); @@ -4976,12 +4976,12 @@ mod tests { .unwrap(); cfg.quota.max_delay_duration = ReadableDuration::millis(50); assert_eq!(cfg_controller.get_current(), cfg); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); assert_eq!(should_delay, Duration::from_millis(50)); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(false); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); assert_eq!(should_delay, Duration::from_millis(50)); diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index bb0348be98f..05a30f64c4d 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -325,7 +325,7 @@ struct RowSampleBuilder { columns_info: Vec, column_groups: Vec, quota_limiter: Arc, - is_quota_auto_tune: bool, + is_auto_analyze: bool, } impl RowSampleBuilder { @@ -334,7 +334,7 @@ impl RowSampleBuilder { storage: TiKvStorage>, ranges: Vec, quota_limiter: Arc, - is_quota_auto_tune: bool, + is_auto_analyze: bool, ) -> Result { let columns_info: Vec<_> = req.take_columns_info().into(); if columns_info.is_empty() { @@ -359,7 +359,7 @@ impl RowSampleBuilder { columns_info, column_groups: req.take_column_groups().into(), quota_limiter, - is_quota_auto_tune, + is_auto_analyze, }) } @@ -391,7 +391,7 @@ impl RowSampleBuilder { time_slice_start = Instant::now(); } - let mut sample = self.quota_limiter.new_sample(); + let mut sample = self.quota_limiter.new_sample(!self.is_auto_analyze); { let _guard = sample.observe_cpu(); let result = self.data.next_batch(BATCH_MAX_SIZE); @@ -446,7 +446,7 @@ impl RowSampleBuilder { // Don't let analyze bandwidth limit the quota limiter, this is already limited in rate limiter. let quota_delay = { - if !self.is_quota_auto_tune { + if !self.is_auto_analyze { self.quota_limiter.consume_sample(sample, true).await } else { self.quota_limiter.consume_sample(sample, false).await diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cb792d7aec2..0864c9edd2d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -569,7 +569,7 @@ impl Storage { let api_version = self.api_version; let quota_limiter = self.quota_limiter.clone(); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); let res = self.read_pool.spawn_handle( async move { @@ -898,7 +898,7 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; let quota_limiter = self.quota_limiter.clone(); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); let res = self.read_pool.spawn_handle( async move { let stage_scheduled_ts = Instant::now(); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index e78dbdaa49d..2588e820d21 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -775,7 +775,7 @@ impl Scheduler { let ts = task.cmd.ts(); let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); - let mut sample = quota_limiter.new_sample(); + let mut sample = quota_limiter.new_sample(true); let pessimistic_lock_mode = self.pessimistic_lock_mode(); let pipelined = task.cmd.can_be_pipelined() && pessimistic_lock_mode == PessimisticLockMode::Pipelined; From b1952dcaf8af9ab218916b26eb5dd3ce72a8d638 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 22 Jul 2022 13:41:09 +0800 Subject: [PATCH 0107/1149] metrics: fix a expression error of unified read pool cpu (#13087) close tikv/tikv#13086 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index adb398824ca..eda4e88de66 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -6836,7 +6836,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified_read_po*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified_read_po.*\"}[1m])) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, From 6e66f09a6f463b6586c9ba89eaf2a96c2106328c Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 22 Jul 2022 16:53:09 +0800 Subject: [PATCH 0108/1149] engine_trait: introduce status error (#13059) ref tikv/tikv#13058 This PR is the first step to prepare for tirocks, the new rocksdb wrapper. The status error is introduced and iterator trait is refactored to keep consistent with tirocks. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot Co-authored-by: Xinye Tao --- cmd/tikv-ctl/src/executor.rs | 4 +- components/backup-stream/src/utils.rs | 2 +- components/backup/src/writer.rs | 2 +- components/cdc/src/initializer.rs | 4 +- components/engine_panic/src/cf_options.rs | 2 +- components/engine_panic/src/engine.rs | 19 +-- components/engine_panic/src/snapshot.rs | 21 ++-- components/engine_panic/src/sst.rs | 21 ++-- components/engine_rocks/src/cf_options.rs | 6 +- components/engine_rocks/src/compact.rs | 6 +- components/engine_rocks/src/encryption.rs | 12 +- components/engine_rocks/src/engine.rs | 61 ++++----- .../engine_rocks/src/engine_iterator.rs | 55 ++++---- components/engine_rocks/src/file_system.rs | 23 ++-- components/engine_rocks/src/import.rs | 11 +- components/engine_rocks/src/lib.rs | 4 +- components/engine_rocks/src/misc.rs | 44 +++---- components/engine_rocks/src/raft_engine.rs | 11 +- components/engine_rocks/src/raw.rs | 6 +- components/engine_rocks/src/raw_util.rs | 19 +-- components/engine_rocks/src/snapshot.rs | 21 +--- components/engine_rocks/src/sst.rs | 76 ++++++----- components/engine_rocks/src/status.rs | 19 +++ .../engine_rocks/src/table_properties.rs | 12 +- components/engine_rocks/src/util.rs | 8 +- components/engine_rocks/src/write_batch.rs | 32 +++-- components/engine_test/src/lib.rs | 5 +- components/engine_traits/src/cf_options.rs | 2 +- components/engine_traits/src/engine.rs | 12 +- components/engine_traits/src/errors.rs | 118 ++++++++++++++++-- components/engine_traits/src/file_system.rs | 10 +- components/engine_traits/src/iterable.rs | 93 ++++---------- .../engine_traits_tests/src/iterator.rs | 78 ++++++------ .../src/read_consistency.rs | 6 +- components/engine_traits_tests/src/sst.rs | 27 ++-- .../src/coprocessor/consistency_check.rs | 2 +- .../src/coprocessor/split_check/table.rs | 6 +- components/raftstore/src/store/bootstrap.rs | 2 +- .../raftstore/src/store/compaction_guard.rs | 6 +- components/raftstore/src/store/fsm/store.rs | 2 +- .../raftstore/src/store/peer_storage.rs | 4 +- .../raftstore/src/store/region_snapshot.rs | 80 ++++-------- components/raftstore/src/store/snap.rs | 2 +- components/raftstore/src/store/snap/io.rs | 6 +- .../raftstore/src/store/worker/split_check.rs | 4 +- components/server/src/raft_engine_switch.rs | 11 +- components/sst_importer/src/import_file.rs | 3 +- components/sst_importer/src/sst_importer.rs | 40 +++--- components/test_backup/src/lib.rs | 2 +- components/test_raftstore/src/cluster.rs | 8 +- components/test_raftstore/src/util.rs | 2 +- components/tikv_kv/src/btree_engine.rs | 17 ++- components/tikv_kv/src/cursor.rs | 8 +- components/tikv_kv/src/lib.rs | 30 +++-- components/tikv_kv/src/raftstore_impls.rs | 11 +- components/tikv_kv/src/rocksdb_engine.rs | 19 ++- components/tikv_util/src/config.rs | 14 +-- src/config.rs | 39 +++--- src/import/duplicate_detect.rs | 4 +- src/server/debug.rs | 24 ++-- src/server/engine_factory.rs | 2 +- src/server/engine_factory_v2.rs | 2 +- src/server/gc_worker/compaction_filter.rs | 5 +- src/server/node.rs | 2 +- src/server/reset_to_version.rs | 24 ++-- src/storage/kv/test_engine_builder.rs | 8 +- src/storage/mod.rs | 8 +- src/storage/mvcc/consistency_check.rs | 12 +- src/storage/mvcc/reader/reader.rs | 4 +- src/storage/raw/encoded.rs | 11 +- src/storage/raw/raw_mvcc.rs | 25 ++-- src/storage/raw/store.rs | 11 +- src/storage/txn/commands/prewrite.rs | 5 +- src/storage/txn/store.rs | 5 +- .../cases/test_replica_stale_read.rs | 13 +- tests/failpoints/cases/test_ttl.rs | 2 +- .../raftstore/test_split_region.rs | 18 ++- .../integrations/raftstore/test_tombstone.rs | 2 +- tests/integrations/storage/test_raftkv.rs | 32 ++--- 79 files changed, 700 insertions(+), 689 deletions(-) create mode 100644 components/engine_rocks/src/status.rs diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 96b322936bc..401d96e5d8e 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1090,8 +1090,8 @@ impl DebugExecutor for Debugger { fn handle_engine_error(err: EngineError) -> ! { error!("error while open kvdb: {}", err); - if let EngineError::Engine(msg) = err { - if msg.starts_with(LOCK_FILE_ERROR) { + if let EngineError::Engine(s) = err { + if s.state().contains(LOCK_FILE_ERROR) { error!( "LOCK file conflict indicates TiKV process is running. \ Do NOT delete the LOCK file and force the command to run. \ diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 0f09e747b80..6ad26cb045c 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -759,7 +759,7 @@ mod test { let (items, size) = super::with_record_read_throughput(|| { let mut items = vec![]; let snap = engine.snapshot(); - snap.scan(b"", b"", false, |k, v| { + snap.scan(CF_DEFAULT, b"", b"", false, |k, v| { items.push((k.to_owned(), v.to_owned())); Ok(true) }) diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 4c4c6dc5ec7..99a907948ce 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -458,7 +458,7 @@ mod tests { } for (cf, kv) in kvs { let mut map = BTreeMap::new(); - db.scan_cf( + db.scan( cf, keys::DATA_MIN_KEY, keys::DATA_MAX_KEY, diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index e1feb0c9795..28b7e5f5d0a 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -240,7 +240,9 @@ impl Initializer { let (raw_key_prefix, raw_key_prefix_end) = ApiV2::get_rawkv_range(); iter_opt.set_lower_bound(&[raw_key_prefix], DATA_KEY_PREFIX_LEN); iter_opt.set_upper_bound(&[raw_key_prefix_end], DATA_KEY_PREFIX_LEN); - let mut iter = RawMvccSnapshot::from_snapshot(snap).iter(iter_opt).unwrap(); + let mut iter = RawMvccSnapshot::from_snapshot(snap) + .iter(CF_DEFAULT, iter_opt) + .unwrap(); iter.seek_to_first()?; Scanner::RawKvScanner(iter) diff --git a/components/engine_panic/src/cf_options.rs b/components/engine_panic/src/cf_options.rs index 918185b8183..f00db2eeb4f 100644 --- a/components/engine_panic/src/cf_options.rs +++ b/components/engine_panic/src/cf_options.rs @@ -44,7 +44,7 @@ impl ColumnFamilyOptions for PanicColumnFamilyOptions { fn get_block_cache_capacity(&self) -> u64 { panic!() } - fn set_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + fn set_block_cache_capacity(&self, capacity: u64) -> Result<()> { panic!() } fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions) { diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index 5608b55ea00..128cb318ed6 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -1,7 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SeekKey, SyncMutable, + IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SyncMutable, TabletAccessor, WriteOptions, }; @@ -75,10 +75,7 @@ impl SyncMutable for PanicEngine { impl Iterable for PanicEngine { type Iterator = PanicEngineIterator; - fn iterator_opt(&self, opts: IterOptions) -> Result { - panic!() - } - fn iterator_cf_opt(&self, cf: &str, opts: IterOptions) -> Result { + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { panic!() } } @@ -86,10 +83,18 @@ impl Iterable for PanicEngine { pub struct PanicEngineIterator; impl Iterator for PanicEngineIterator { - fn seek(&mut self, key: SeekKey<'_>) -> Result { + fn seek(&mut self, key: &[u8]) -> Result { + panic!() + } + fn seek_for_prev(&mut self, key: &[u8]) -> Result { panic!() } - fn seek_for_prev(&mut self, key: SeekKey<'_>) -> Result { + + fn seek_to_first(&mut self) -> Result { + panic!() + } + + fn seek_to_last(&mut self) -> Result { panic!() } diff --git a/components/engine_panic/src/snapshot.rs b/components/engine_panic/src/snapshot.rs index c65dc560326..e27ed42d093 100644 --- a/components/engine_panic/src/snapshot.rs +++ b/components/engine_panic/src/snapshot.rs @@ -2,9 +2,7 @@ use std::ops::Deref; -use engine_traits::{ - IterOptions, Iterable, Iterator, Peekable, ReadOptions, Result, SeekKey, Snapshot, -}; +use engine_traits::{IterOptions, Iterable, Iterator, Peekable, ReadOptions, Result, Snapshot}; use crate::{db_vector::PanicDBVector, engine::PanicEngine}; @@ -36,10 +34,7 @@ impl Peekable for PanicSnapshot { impl Iterable for PanicSnapshot { type Iterator = PanicSnapshotIterator; - fn iterator_opt(&self, opts: IterOptions) -> Result { - panic!() - } - fn iterator_cf_opt(&self, cf: &str, opts: IterOptions) -> Result { + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { panic!() } } @@ -47,10 +42,18 @@ impl Iterable for PanicSnapshot { pub struct PanicSnapshotIterator; impl Iterator for PanicSnapshotIterator { - fn seek(&mut self, key: SeekKey<'_>) -> Result { + fn seek(&mut self, key: &[u8]) -> Result { + panic!() + } + fn seek_for_prev(&mut self, key: &[u8]) -> Result { panic!() } - fn seek_for_prev(&mut self, key: SeekKey<'_>) -> Result { + + fn seek_to_first(&mut self) -> Result { + panic!() + } + + fn seek_to_last(&mut self) -> Result { panic!() } diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index 64aa5666fe1..d1e5f4b331c 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -3,8 +3,8 @@ use std::path::PathBuf; use engine_traits::{ - CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, Result, SeekKey, - SstCompressionType, SstExt, SstReader, SstWriter, SstWriterBuilder, + CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, Result, SstCompressionType, + SstExt, SstReader, SstWriter, SstWriterBuilder, }; use crate::engine::PanicEngine; @@ -32,10 +32,7 @@ impl SstReader for PanicSstReader { impl Iterable for PanicSstReader { type Iterator = PanicSstReaderIterator; - fn iterator_opt(&self, opts: IterOptions) -> Result { - panic!() - } - fn iterator_cf_opt(&self, cf: &str, opts: IterOptions) -> Result { + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { panic!() } } @@ -43,10 +40,18 @@ impl Iterable for PanicSstReader { pub struct PanicSstReaderIterator; impl Iterator for PanicSstReaderIterator { - fn seek(&mut self, key: SeekKey<'_>) -> Result { + fn seek(&mut self, key: &[u8]) -> Result { + panic!() + } + fn seek_for_prev(&mut self, key: &[u8]) -> Result { panic!() } - fn seek_for_prev(&mut self, key: SeekKey<'_>) -> Result { + + fn seek_to_first(&mut self) -> Result { + panic!() + } + + fn seek_to_last(&mut self) -> Result { panic!() } diff --git a/components/engine_rocks/src/cf_options.rs b/components/engine_rocks/src/cf_options.rs index 49ba840bc00..87d05510f58 100644 --- a/components/engine_rocks/src/cf_options.rs +++ b/components/engine_rocks/src/cf_options.rs @@ -5,7 +5,7 @@ use rocksdb::ColumnFamilyOptions as RawCFOptions; use tikv_util::box_err; use crate::{ - db_options::RocksTitanDBOptions, engine::RocksEngine, + db_options::RocksTitanDBOptions, engine::RocksEngine, r2e, sst_partitioner::RocksSstPartitionerFactory, util, }; @@ -79,8 +79,8 @@ impl ColumnFamilyOptions for RocksColumnFamilyOptions { self.0.get_block_cache_capacity() } - fn set_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { - self.0.set_block_cache_capacity(capacity) + fn set_block_cache_capacity(&self, capacity: u64) -> Result<()> { + self.0.set_block_cache_capacity(capacity).map_err(r2e) } fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions) { diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index fef3af46f5c..0b50e0757c2 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -5,7 +5,7 @@ use std::cmp; use engine_traits::{CFNamesExt, CompactExt, Result}; use rocksdb::{CompactOptions, CompactionOptions, DBCompressionType}; -use crate::{engine::RocksEngine, util}; +use crate::{engine::RocksEngine, r2e, util}; impl CompactExt for RocksEngine { type CompactedEvent = crate::compact_listener::RocksCompactedEvent; @@ -130,8 +130,8 @@ impl CompactExt for RocksEngine { opts.set_max_subcompactions(max_subcompactions as i32); opts.set_output_file_size_limit(output_file_size_limit); - db.compact_files_cf(handle, &opts, &files, output_level)?; - Ok(()) + db.compact_files_cf(handle, &opts, &files, output_level) + .map_err(r2e) } } diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 94c13e811a9..3caf07a0276 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -9,19 +9,19 @@ use rocksdb::{ FileEncryptionInfo as DBFileEncryptionInfo, }; -use crate::raw::Env; +use crate::{r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. pub(crate) fn get_env( base_env: Option>, key_manager: Option>, -) -> std::result::Result, String> { +) -> engine_traits::Result> { let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); if let Some(manager) = key_manager { - Ok(Arc::new(Env::new_key_managed_encrypted_env( - base_env, - WrappedEncryptionKeyManager { manager }, - )?)) + Ok(Arc::new( + Env::new_key_managed_encrypted_env(base_env, WrappedEncryptionKeyManager { manager }) + .map_err(r2e)?, + )) } else { Ok(base_env) } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 33af3b78036..e6a1cf4a6a7 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -3,14 +3,14 @@ use std::{any::Any, fs, path::Path, sync::Arc}; use engine_traits::{ - Error, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, - TabletAccessor, + IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, TabletAccessor, }; use rocksdb::{DBIterator, Writable, DB}; use crate::{ db_vector::RocksDBVector, options::RocksReadOptions, + r2e, rocks_metrics::{ flush_engine_histogram_metrics, flush_engine_iostall_properties, flush_engine_properties, flush_engine_ticker_metrics, @@ -82,7 +82,7 @@ impl KvEngine for RocksEngine { } fn sync(&self) -> Result<()> { - self.db.sync_wal().map_err(Error::Engine) + self.db.sync_wal().map_err(r2e) } fn flush_metrics(&self, instance: &str) { @@ -133,15 +133,7 @@ impl TabletAccessor for RocksEngine { impl Iterable for RocksEngine { type Iterator = RocksEngineIterator; - fn iterator_opt(&self, opts: IterOptions) -> Result { - let opt: RocksReadOptions = opts.into(); - Ok(RocksEngineIterator::from_raw(DBIterator::new( - self.db.clone(), - opt.into_raw(), - ))) - } - - fn iterator_cf_opt(&self, cf: &str, opts: IterOptions) -> Result { + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { let handle = get_cf_handle(&self.db, cf)?; let opt: RocksReadOptions = opts.into(); Ok(RocksEngineIterator::from_raw(DBIterator::new_cf( @@ -157,7 +149,7 @@ impl Peekable for RocksEngine { fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { let opt: RocksReadOptions = opts.into(); - let v = self.db.get_opt(key, &opt.into_raw())?; + let v = self.db.get_opt(key, &opt.into_raw()).map_err(r2e)?; Ok(v.map(RocksDBVector::from_raw)) } @@ -169,41 +161,42 @@ impl Peekable for RocksEngine { ) -> Result> { let opt: RocksReadOptions = opts.into(); let handle = get_cf_handle(&self.db, cf)?; - let v = self.db.get_cf_opt(handle, key, &opt.into_raw())?; + let v = self + .db + .get_cf_opt(handle, key, &opt.into_raw()) + .map_err(r2e)?; Ok(v.map(RocksDBVector::from_raw)) } } impl SyncMutable for RocksEngine { fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { - self.db.put(key, value).map_err(Error::Engine) + self.db.put(key, value).map_err(r2e) } fn put_cf(&self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { let handle = get_cf_handle(&self.db, cf)?; - self.db.put_cf(handle, key, value).map_err(Error::Engine) + self.db.put_cf(handle, key, value).map_err(r2e) } fn delete(&self, key: &[u8]) -> Result<()> { - self.db.delete(key).map_err(Error::Engine) + self.db.delete(key).map_err(r2e) } fn delete_cf(&self, cf: &str, key: &[u8]) -> Result<()> { let handle = get_cf_handle(&self.db, cf)?; - self.db.delete_cf(handle, key).map_err(Error::Engine) + self.db.delete_cf(handle, key).map_err(r2e) } fn delete_range(&self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { - self.db - .delete_range(begin_key, end_key) - .map_err(Error::Engine) + self.db.delete_range(begin_key, end_key).map_err(r2e) } fn delete_range_cf(&self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { let handle = get_cf_handle(&self.db, cf)?; self.db .delete_range_cf(handle, begin_key, end_key) - .map_err(Error::Engine) + .map_err(r2e) } } @@ -211,7 +204,7 @@ impl SyncMutable for RocksEngine { mod tests { use std::sync::Arc; - use engine_traits::{Iterable, KvEngine, Peekable, SyncMutable}; + use engine_traits::{Iterable, KvEngine, Peekable, SyncMutable, CF_DEFAULT}; use kvproto::metapb::Region; use tempfile::Builder; @@ -285,7 +278,7 @@ mod tests { let mut data = vec![]; engine - .scan(b"", &[0xFF, 0xFF], false, |key, value| { + .scan(CF_DEFAULT, b"", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); Ok(true) }) @@ -300,7 +293,7 @@ mod tests { data.clear(); engine - .scan_cf(cf, b"", &[0xFF, 0xFF], false, |key, value| { + .scan(cf, b"", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); Ok(true) }) @@ -314,16 +307,16 @@ mod tests { ); data.clear(); - let pair = engine.seek(b"a1").unwrap().unwrap(); + let pair = engine.seek(CF_DEFAULT, b"a1").unwrap().unwrap(); assert_eq!(pair, (b"a1".to_vec(), b"v1".to_vec())); - assert!(engine.seek(b"a3").unwrap().is_none()); - let pair_cf = engine.seek_cf(cf, b"a1").unwrap().unwrap(); + assert!(engine.seek(CF_DEFAULT, b"a3").unwrap().is_none()); + let pair_cf = engine.seek(cf, b"a1").unwrap().unwrap(); assert_eq!(pair_cf, (b"a1".to_vec(), b"v1".to_vec())); - assert!(engine.seek_cf(cf, b"a3").unwrap().is_none()); + assert!(engine.seek(cf, b"a3").unwrap().is_none()); let mut index = 0; engine - .scan(b"", &[0xFF, 0xFF], false, |key, value| { + .scan(CF_DEFAULT, b"", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); index += 1; Ok(index != 1) @@ -335,15 +328,15 @@ mod tests { let snap = RocksSnapshot::new(engine.get_sync_db()); engine.put(b"a3", b"v3").unwrap(); - assert!(engine.seek(b"a3").unwrap().is_some()); + assert!(engine.seek(CF_DEFAULT, b"a3").unwrap().is_some()); - let pair = snap.seek(b"a1").unwrap().unwrap(); + let pair = snap.seek(CF_DEFAULT, b"a1").unwrap().unwrap(); assert_eq!(pair, (b"a1".to_vec(), b"v1".to_vec())); - assert!(snap.seek(b"a3").unwrap().is_none()); + assert!(snap.seek(CF_DEFAULT, b"a3").unwrap().is_none()); data.clear(); - snap.scan(b"", &[0xFF, 0xFF], false, |key, value| { + snap.scan(CF_DEFAULT, b"", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); Ok(true) }) diff --git a/components/engine_rocks/src/engine_iterator.rs b/components/engine_rocks/src/engine_iterator.rs index fcc10237510..de51b32c8f4 100644 --- a/components/engine_rocks/src/engine_iterator.rs +++ b/components/engine_rocks/src/engine_iterator.rs @@ -2,8 +2,10 @@ use std::sync::Arc; -use engine_traits::{self, Error, Result}; -use rocksdb::{DBIterator, SeekKey as RawSeekKey, DB}; +use engine_traits::{self, Result}; +use rocksdb::{DBIterator, DB}; + +use crate::r2e; // FIXME: Would prefer using &DB instead of Arc. As elsewhere in // this crate, it would require generic associated types. @@ -20,30 +22,38 @@ impl RocksEngineIterator { } impl engine_traits::Iterator for RocksEngineIterator { - fn seek(&mut self, key: engine_traits::SeekKey<'_>) -> Result { - let k: RocksSeekKey<'_> = key.into(); - self.0.seek(k.into_raw()).map_err(Error::Engine) + fn seek(&mut self, key: &[u8]) -> Result { + self.0.seek(rocksdb::SeekKey::Key(key)).map_err(r2e) + } + + fn seek_for_prev(&mut self, key: &[u8]) -> Result { + self.0 + .seek_for_prev(rocksdb::SeekKey::Key(key)) + .map_err(r2e) } - fn seek_for_prev(&mut self, key: engine_traits::SeekKey<'_>) -> Result { - let k: RocksSeekKey<'_> = key.into(); - self.0.seek_for_prev(k.into_raw()).map_err(Error::Engine) + fn seek_to_first(&mut self) -> Result { + self.0.seek(rocksdb::SeekKey::Start).map_err(r2e) + } + + fn seek_to_last(&mut self) -> Result { + self.0.seek(rocksdb::SeekKey::End).map_err(r2e) } fn prev(&mut self) -> Result { #[cfg(not(feature = "nortcheck"))] if !self.valid()? { - return Err(Error::Engine("Iterator invalid".to_string())); + return Err(r2e("Iterator invalid")); } - self.0.prev().map_err(Error::Engine) + self.0.prev().map_err(r2e) } fn next(&mut self) -> Result { #[cfg(not(feature = "nortcheck"))] if !self.valid()? { - return Err(Error::Engine("Iterator invalid".to_string())); + return Err(r2e("Iterator invalid")); } - self.0.next().map_err(Error::Engine) + self.0.next().map_err(r2e) } fn key(&self) -> &[u8] { @@ -59,25 +69,6 @@ impl engine_traits::Iterator for RocksEngineIterator { } fn valid(&self) -> Result { - self.0.valid().map_err(Error::Engine) - } -} - -pub struct RocksSeekKey<'a>(RawSeekKey<'a>); - -impl<'a> RocksSeekKey<'a> { - pub fn into_raw(self) -> RawSeekKey<'a> { - self.0 - } -} - -impl<'a> From> for RocksSeekKey<'a> { - fn from(key: engine_traits::SeekKey<'a>) -> Self { - let k = match key { - engine_traits::SeekKey::Start => RawSeekKey::Start, - engine_traits::SeekKey::End => RawSeekKey::End, - engine_traits::SeekKey::Key(k) => RawSeekKey::Key(k), - }; - RocksSeekKey(k) + self.0.valid().map_err(r2e) } } diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index 397eaead488..2fcbc405056 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -5,20 +5,23 @@ use std::sync::Arc; use engine_traits::{EngineFileSystemInspector, FileSystemInspector}; use rocksdb::FileSystemInspector as DBFileSystemInspector; -use crate::raw::Env; +use crate::{e2r, r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. pub(crate) fn get_env( base_env: Option>, limiter: Option>, -) -> Result, String> { +) -> engine_traits::Result> { let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); - Ok(Arc::new(Env::new_file_system_inspected_env( - base_env, - WrappedFileSystemInspector { - inspector: EngineFileSystemInspector::from_limiter(limiter), - }, - )?)) + Ok(Arc::new( + Env::new_file_system_inspected_env( + base_env, + WrappedFileSystemInspector { + inspector: EngineFileSystemInspector::from_limiter(limiter), + }, + ) + .map_err(r2e)?, + )) } pub struct WrappedFileSystemInspector { @@ -27,11 +30,11 @@ pub struct WrappedFileSystemInspector { impl DBFileSystemInspector for WrappedFileSystemInspector { fn read(&self, len: usize) -> Result { - self.inspector.read(len) + self.inspector.read(len).map_err(e2r) } fn write(&self, len: usize) -> Result { - self.inspector.write(len) + self.inspector.write(len).map_err(e2r) } } diff --git a/components/engine_rocks/src/import.rs b/components/engine_rocks/src/import.rs index 1cfe24cb8e4..641e33f7bd8 100644 --- a/components/engine_rocks/src/import.rs +++ b/components/engine_rocks/src/import.rs @@ -7,7 +7,7 @@ use rocksdb::{ set_external_sst_file_global_seq_no, IngestExternalFileOptions as RawIngestExternalFileOptions, }; -use crate::{engine::RocksEngine, util}; +use crate::{engine::RocksEngine, r2e, util}; impl ImportExt for RocksEngine { type IngestExternalFileOptions = RocksIngestExternalFileOptions; @@ -22,10 +22,10 @@ impl ImportExt for RocksEngine { // Prior to v5.2.0, TiKV use `write_global_seqno=true` for ingestion. For backward // compatibility, in case TiKV is retrying an ingestion job generated by older // version, it needs to reset the global seqno to 0. - set_external_sst_file_global_seq_no(self.as_inner(), cf, file, 0)?; + set_external_sst_file_global_seq_no(self.as_inner(), cf, file, 0).map_err(r2e)?; f.sync_all() - .map_err(|e| format!("sync {}: {:?}", file, e))?; - Ok(()) + .map_err(|e| format!("sync {}: {:?}", file, e)) + .map_err(r2e) })?; // This is calling a specially optimized version of // ingest_external_file_cf. In cases where the memtable needs to be @@ -34,7 +34,8 @@ impl ImportExt for RocksEngine { // the manual memtable flush was taken. let _did_nonblocking_memtable_flush = self .as_inner() - .ingest_external_file_optimized(cf, &opts.0, files)?; + .ingest_external_file_optimized(cf, &opts.0, files) + .map_err(r2e)?; Ok(()) } } diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 7cf4d948d0d..b93d8cc7f36 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -47,6 +47,8 @@ mod sst; pub use crate::sst::*; mod sst_partitioner; pub use crate::sst_partitioner::*; +mod status; +pub use crate::status::*; mod table_properties; pub use crate::table_properties::*; mod write_batch; @@ -113,7 +115,7 @@ pub mod raw; pub fn get_env( key_manager: Option>, limiter: Option>, -) -> std::result::Result, String> { +) -> engine_traits::Result> { let env = encryption::get_env(None /*base_env*/, key_manager)?; file_system::get_env(Some(env), limiter) } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index d7741e98c26..ce608d353b7 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -8,7 +8,8 @@ use rocksdb::Range as RocksRange; use tikv_util::{box_try, keybuilder::KeyBuilder}; use crate::{ - engine::RocksEngine, rocks_metrics_defs::*, sst::RocksSstWriterBuilder, util, RocksSstWriter, + engine::RocksEngine, r2e, rocks_metrics_defs::*, sst::RocksSstWriterBuilder, util, + RocksSstWriter, }; pub const MAX_DELETE_COUNT_BY_KEY: usize = 2048; @@ -54,8 +55,8 @@ impl RocksEngine { } last_end_key = Some(r.end_key.to_owned()); - let mut it = self.iterator_cf_opt(cf, opts.clone())?; - let mut it_valid = it.seek(r.start_key.into())?; + let mut it = self.iterator_opt(cf, opts.clone())?; + let mut it_valid = it.seek(r.start_key)?; while it_valid { if it.key() >= r.end_key { break; @@ -106,8 +107,8 @@ impl RocksEngine { // to avoid referring to missing blob files. opts.set_key_only(true); } - let mut it = self.iterator_cf_opt(cf, opts)?; - let mut it_valid = it.seek(range.start_key.into())?; + let mut it = self.iterator_opt(cf, opts)?; + let mut it_valid = it.seek(range.start_key)?; let mut wb = self.write_batch(); while it_valid { wb.delete_cf(cf, it.key())?; @@ -127,12 +128,12 @@ impl RocksEngine { impl MiscExt for RocksEngine { fn flush(&self, sync: bool) -> Result<()> { - Ok(self.as_inner().flush(sync)?) + self.as_inner().flush(sync).map_err(r2e) } fn flush_cf(&self, cf: &str, sync: bool) -> Result<()> { let handle = util::get_cf_handle(self.as_inner(), cf)?; - Ok(self.as_inner().flush_cf(handle, sync)?) + self.as_inner().flush_cf(handle, sync).map_err(r2e) } fn delete_ranges_cf( @@ -151,12 +152,9 @@ impl MiscExt for RocksEngine { if r.start_key >= r.end_key { continue; } - self.as_inner().delete_files_in_range_cf( - handle, - r.start_key, - r.end_key, - false, - )?; + self.as_inner() + .delete_files_in_range_cf(handle, r.start_key, r.end_key, false) + .map_err(r2e)?; } } DeleteStrategy::DeleteBlobs => { @@ -166,12 +164,9 @@ impl MiscExt for RocksEngine { if r.start_key >= r.end_key { continue; } - self.as_inner().delete_blob_files_in_range_cf( - handle, - r.start_key, - r.end_key, - false, - )?; + self.as_inner() + .delete_blob_files_in_range_cf(handle, r.start_key, r.end_key, false) + .map_err(r2e)?; } } } @@ -241,7 +236,8 @@ impl MiscExt for RocksEngine { for cf in db.cf_names() { let handle = util::get_cf_handle(db, cf)?; - db.delete_files_in_ranges_cf(handle, &delete_ranges, /* include_end */ false)?; + db.delete_files_in_ranges_cf(handle, &delete_ranges, /* include_end */ false) + .map_err(r2e)?; } Ok(()) @@ -252,7 +248,7 @@ impl MiscExt for RocksEngine { } fn sync_wal(&self) -> Result<()> { - Ok(self.as_inner().sync_wal()?) + self.as_inner().sync_wal().map_err(r2e) } fn exists(path: &str) -> bool { @@ -340,7 +336,7 @@ mod tests { use std::sync::Arc; use engine_traits::{ - DeleteStrategy, Iterable, Iterator, Mutable, SeekKey, SyncMutable, WriteBatchExt, ALL_CFS, + DeleteStrategy, Iterable, Iterator, Mutable, SyncMutable, WriteBatchExt, ALL_CFS, }; use tempfile::Builder; @@ -353,8 +349,8 @@ mod tests { fn check_data(db: &RocksEngine, cfs: &[&str], expected: &[(&[u8], &[u8])]) { for cf in cfs { - let mut iter = db.iterator_cf(cf).unwrap(); - iter.seek(SeekKey::Start).unwrap(); + let mut iter = db.iterator(cf).unwrap(); + iter.seek_to_first().unwrap(); for &(k, v) in expected { assert_eq!(k, iter.key()); assert_eq!(v, iter.value()); diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 607e0bfca17..b6a35f4a4e2 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -64,6 +64,7 @@ impl RaftEngineReadOnly for RocksEngine { let start_key = keys::raft_log_key(region_id, low); let end_key = keys::raft_log_key(region_id, high); self.scan( + CF_DEFAULT, &start_key, &end_key, true, // fill_cache @@ -108,6 +109,7 @@ impl RaftEngineReadOnly for RocksEngine { let start_key = keys::raft_log_key(region_id, 0); let end_key = keys::raft_log_key(region_id, u64::MAX); self.scan( + CF_DEFAULT, &start_key, &end_key, false, // fill_cache @@ -123,7 +125,7 @@ impl RaftEngineReadOnly for RocksEngine { fn is_empty(&self) -> Result { let mut is_empty = true; - self.scan_cf(CF_DEFAULT, b"", b"", false, |_, _| { + self.scan(CF_DEFAULT, b"", b"", false, |_, _| { is_empty = false; Ok(false) })?; @@ -158,6 +160,7 @@ impl RaftEngineDebug for RocksEngine { let start_key = keys::raft_log_key(raft_group_id, 0); let end_key = keys::raft_log_key(raft_group_id, u64::MAX); self.scan( + CF_DEFAULT, &start_key, &end_key, false, // fill_cache @@ -181,7 +184,7 @@ impl RocksEngine { if from == 0 { let start_key = keys::raft_log_key(raft_group_id, 0); let prefix = keys::raft_log_prefix(raft_group_id); - match self.seek(&start_key)? { + match self.seek(CF_DEFAULT, &start_key)? { Some((k, _)) if k.starts_with(&prefix) => from = box_try!(keys::raft_log_index(&k)), // No need to gc. _ => return Ok(0), @@ -252,7 +255,7 @@ impl RaftEngine for RocksEngine { let seek_key = keys::raft_log_key(raft_group_id, 0); let prefix = keys::raft_log_prefix(raft_group_id); fail::fail_point!("engine_rocks_raft_engine_clean_seek", |_| Ok(())); - if let Some((key, _)) = self.seek(&seek_key)? { + if let Some((key, _)) = self.seek(CF_DEFAULT, &seek_key)? { if !key.starts_with(&prefix) { // No raft logs for the raft group. return Ok(()); @@ -343,7 +346,7 @@ impl RaftEngine for RocksEngine { let start_key = keys::REGION_META_MIN_KEY; let end_key = keys::REGION_META_MAX_KEY; let mut err = None; - self.scan(start_key, end_key, false, |key, _| { + self.scan(CF_DEFAULT, start_key, end_key, false, |key, _| { let (region_id, suffix) = box_try!(keys::decode_region_meta_key(key)); if suffix != keys::REGION_STATE_SUFFIX { return Ok(true); diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index c7d2e3a0d31..c51c0187b2d 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -14,7 +14,7 @@ pub use rocksdb::{ DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBInfoLogLevel, DBIterator, DBOptions, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, Env, EventListener, IngestExternalFileOptions, LRUCacheOptions, - MemoryAllocator, PerfContext, PrepopulateBlockCache, Range, ReadOptions, SeekKey, - SliceTransform, TableFilter, TablePropertiesCollector, TablePropertiesCollectorFactory, - TitanBlobIndex, TitanDBOptions, Writable, WriteOptions, DB, + MemoryAllocator, PerfContext, PrepopulateBlockCache, Range, ReadOptions, SliceTransform, + TableFilter, TablePropertiesCollector, TablePropertiesCollectorFactory, TitanBlobIndex, + TitanDBOptions, Writable, WriteOptions, DB, }; diff --git a/components/engine_rocks/src/raw_util.rs b/components/engine_rocks/src/raw_util.rs index a9f1fcda781..e669f007276 100644 --- a/components/engine_rocks/src/raw_util.rs +++ b/components/engine_rocks/src/raw_util.rs @@ -13,6 +13,8 @@ use rocksdb::{ }; use tikv_util::warn; +use crate::r2e; + pub struct CFOptions<'a> { cf: &'a str, options: ColumnFamilyOptions, @@ -92,12 +94,13 @@ pub fn new_engine_opt( cfs_v.push(x.cf); cf_opts_v.push(x.options.clone()); } - let mut db = DB::open_cf(db_opt, path, cfs_v.into_iter().zip(cf_opts_v).collect())?; + let mut db = + DB::open_cf(db_opt, path, cfs_v.into_iter().zip(cf_opts_v).collect()).map_err(r2e)?; for x in cfs_opts { if x.cf == CF_DEFAULT { continue; } - db.create_cf((x.cf, x.options))?; + db.create_cf((x.cf, x.options)).map_err(r2e)?; } return Ok(db); @@ -106,7 +109,7 @@ pub fn new_engine_opt( db_opt.create_if_missing(false); // Lists all column families in current db. - let cfs_list = DB::list_column_families(&db_opt, path)?; + let cfs_list = DB::list_column_families(&db_opt, path).map_err(r2e)?; let existed: Vec<&str> = cfs_list.iter().map(|v| v.as_str()).collect(); let needed: Vec<&str> = cfs_opts.iter().map(|x| x.cf).collect(); @@ -134,7 +137,8 @@ pub fn new_engine_opt( cfs_opts_v.push(x.options); } - let db = DB::open_cf(db_opt, path, cfs_v.into_iter().zip(cfs_opts_v).collect())?; + let db = + DB::open_cf(db_opt, path, cfs_v.into_iter().zip(cfs_opts_v).collect()).map_err(r2e)?; return Ok(db); } @@ -155,14 +159,14 @@ pub fn new_engine_opt( } } let cfds = cfs_v.into_iter().zip(cfs_opts_v).collect(); - let mut db = DB::open_cf(db_opt, path, cfds)?; + let mut db = DB::open_cf(db_opt, path, cfds).map_err(r2e)?; // Drops discarded column families. // for cf in existed.iter().filter(|x| needed.iter().find(|y| y == x).is_none()) { for cf in cfs_diff(&existed, &needed) { // Never drop default column families. if cf != CF_DEFAULT { - db.drop_cf(cf)?; + db.drop_cf(cf).map_err(r2e)?; } } @@ -176,7 +180,8 @@ pub fn new_engine_opt( .unwrap() .options .clone(), - ))?; + )) + .map_err(r2e)?; } Ok(db) } diff --git a/components/engine_rocks/src/snapshot.rs b/components/engine_rocks/src/snapshot.rs index e1a0f635286..94724b220f7 100644 --- a/components/engine_rocks/src/snapshot.rs +++ b/components/engine_rocks/src/snapshot.rs @@ -9,7 +9,8 @@ use engine_traits::{self, IterOptions, Iterable, Peekable, ReadOptions, Result, use rocksdb::{rocksdb_options::UnsafeSnap, DBIterator, DB}; use crate::{ - db_vector::RocksDBVector, options::RocksReadOptions, util::get_cf_handle, RocksEngineIterator, + db_vector::RocksDBVector, options::RocksReadOptions, r2e, util::get_cf_handle, + RocksEngineIterator, }; pub struct RocksSnapshot { @@ -54,19 +55,7 @@ impl Drop for RocksSnapshot { impl Iterable for RocksSnapshot { type Iterator = RocksEngineIterator; - fn iterator_opt(&self, opts: IterOptions) -> Result { - let opt: RocksReadOptions = opts.into(); - let mut opt = opt.into_raw(); - unsafe { - opt.set_snapshot(&self.snap); - } - Ok(RocksEngineIterator::from_raw(DBIterator::new( - self.db.clone(), - opt, - ))) - } - - fn iterator_cf_opt(&self, cf: &str, opts: IterOptions) -> Result { + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { let opt: RocksReadOptions = opts.into(); let mut opt = opt.into_raw(); unsafe { @@ -90,7 +79,7 @@ impl Peekable for RocksSnapshot { unsafe { opt.set_snapshot(&self.snap); } - let v = self.db.get_opt(key, &opt)?; + let v = self.db.get_opt(key, &opt).map_err(r2e)?; Ok(v.map(RocksDBVector::from_raw)) } @@ -106,7 +95,7 @@ impl Peekable for RocksSnapshot { opt.set_snapshot(&self.snap); } let handle = get_cf_handle(self.db.as_ref(), cf)?; - let v = self.db.get_cf_opt(handle, key, &opt)?; + let v = self.db.get_cf_opt(handle, key, &opt).map_err(r2e)?; Ok(v.map(RocksDBVector::from_raw)) } } diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index c7eb52e0527..68182238161 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -3,8 +3,8 @@ use std::{path::PathBuf, rc::Rc, sync::Arc}; use engine_traits::{ - Error, ExternalSstFileInfo, IterOptions, Iterable, Iterator, Result, SeekKey, - SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, + Error, ExternalSstFileInfo, IterOptions, Iterable, Iterator, Result, SstCompressionType, + SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, }; use fail::fail_point; use kvproto::import_sstpb::SstMeta; @@ -14,9 +14,7 @@ use rocksdb::{ SstFileWriter, DB, }; -// FIXME: Move RocksSeekKey into a common module since -// it's shared between multiple iterators -use crate::{engine::RocksEngine, engine_iterator::RocksSeekKey, options::RocksReadOptions}; +use crate::{engine::RocksEngine, options::RocksReadOptions, r2e}; impl SstExt for RocksEngine { type SstReader = RocksSstReader; @@ -51,7 +49,7 @@ impl RocksSstReader { cf_options.set_env(env); } let mut reader = SstFileReader::new(cf_options); - reader.open(path)?; + reader.open(path).map_err(r2e)?; let inner = Rc::new(reader); Ok(RocksSstReader { inner }) } @@ -70,7 +68,7 @@ impl SstReader for RocksSstReader { Self::open_with_env(path, None) } fn verify_checksum(&self) -> Result<()> { - self.inner.verify_checksum()?; + self.inner.verify_checksum().map_err(r2e)?; Ok(()) } fn iter(&self) -> Self::Iterator { @@ -81,7 +79,8 @@ impl SstReader for RocksSstReader { impl Iterable for RocksSstReader { type Iterator = RocksSstIterator; - fn iterator_opt(&self, opts: IterOptions) -> Result { + /// Cf is ignored as there is only one cf in sst. + fn iterator_opt(&self, _cf: &str, opts: IterOptions) -> Result { let opt: RocksReadOptions = opts.into(); let opt = opt.into_raw(); Ok(RocksSstIterator(SstFileReader::iter_opt_rc( @@ -89,10 +88,6 @@ impl Iterable for RocksSstReader { opt, ))) } - - fn iterator_cf_opt(&self, _cf: &str, _opts: IterOptions) -> Result { - unimplemented!() // FIXME: What should happen here? - } } // FIXME: See comment on RocksSstReader for why this contains Rc @@ -103,30 +98,40 @@ pub struct RocksSstIterator(DBIterator>); unsafe impl Send for RocksSstIterator {} impl Iterator for RocksSstIterator { - fn seek(&mut self, key: SeekKey<'_>) -> Result { - let k: RocksSeekKey<'_> = key.into(); - self.0.seek(k.into_raw()).map_err(Error::Engine) + fn seek(&mut self, key: &[u8]) -> Result { + self.0.seek(rocksdb::SeekKey::Key(key)).map_err(r2e) + } + + fn seek_for_prev(&mut self, key: &[u8]) -> Result { + self.0 + .seek_for_prev(rocksdb::SeekKey::Key(key)) + .map_err(r2e) } - fn seek_for_prev(&mut self, key: SeekKey<'_>) -> Result { - let k: RocksSeekKey<'_> = key.into(); - self.0.seek_for_prev(k.into_raw()).map_err(Error::Engine) + /// Seek to the first key in the database. + fn seek_to_first(&mut self) -> Result { + self.0.seek(rocksdb::SeekKey::Start).map_err(r2e) + } + + /// Seek to the last key in the database. + fn seek_to_last(&mut self) -> Result { + self.0.seek(rocksdb::SeekKey::End).map_err(r2e) } fn prev(&mut self) -> Result { #[cfg(not(feature = "nortcheck"))] if !self.valid()? { - return Err(Error::Engine("Iterator invalid".to_string())); + return Err(r2e("Iterator invalid")); } - self.0.prev().map_err(Error::Engine) + self.0.prev().map_err(r2e) } fn next(&mut self) -> Result { #[cfg(not(feature = "nortcheck"))] if !self.valid()? { - return Err(Error::Engine("Iterator invalid".to_string())); + return Err(r2e("Iterator invalid")); } - self.0.next().map_err(Error::Engine) + self.0.next().map_err(r2e) } fn key(&self) -> &[u8] { @@ -138,7 +143,7 @@ impl Iterator for RocksSstIterator { } fn valid(&self) -> Result { - self.0.valid().map_err(Error::Engine) + self.0.valid().map_err(r2e) } } @@ -192,7 +197,7 @@ impl SstWriterBuilder for RocksSstWriterBuilder { env = db.env(); let handle = db .cf_handle(self.cf.as_deref().unwrap_or(CF_DEFAULT)) - .ok_or_else(|| format!("CF {:?} is not found", self.cf))?; + .ok_or_else(|| r2e(format!("CF {:?} is not found", self.cf)))?; db.get_options_cf(handle) } else { ColumnFamilyOptions::new() @@ -240,7 +245,7 @@ impl SstWriterBuilder for RocksSstWriterBuilder { io_options.bottommost_compression(DBCompressionType::Disable); let mut writer = SstFileWriter::new(EnvOptions::new(), io_options); fail_point!("on_open_sst_writer"); - writer.open(path)?; + writer.open(path).map_err(r2e)?; Ok(RocksSstWriter { writer, env }) } } @@ -255,11 +260,11 @@ impl SstWriter for RocksSstWriter { type ExternalSstFileReader = SequentialFile; fn put(&mut self, key: &[u8], val: &[u8]) -> Result<()> { - Ok(self.writer.put(key, val)?) + self.writer.put(key, val).map_err(r2e) } fn delete(&mut self, key: &[u8]) -> Result<()> { - Ok(self.writer.delete(key)?) + self.writer.delete(key).map_err(r2e) } fn file_size(&mut self) -> u64 { @@ -267,22 +272,25 @@ impl SstWriter for RocksSstWriter { } fn finish(mut self) -> Result { - Ok(RocksExternalSstFileInfo(self.writer.finish()?)) + Ok(RocksExternalSstFileInfo(self.writer.finish().map_err(r2e)?)) } fn finish_read(mut self) -> Result<(Self::ExternalSstFileInfo, Self::ExternalSstFileReader)> { - let env = self.env.take().ok_or_else(|| { - Error::Engine("failed to read sequential file no env provided".to_owned()) - })?; - let sst_info = self.writer.finish()?; + let env = self + .env + .take() + .ok_or_else(|| r2e("failed to read sequential file no env provided"))?; + let sst_info = self.writer.finish().map_err(r2e)?; let p = sst_info.file_path(); let path = p.as_os_str().to_str().ok_or_else(|| { - Error::Engine(format!( + r2e(format!( "failed to sequential file bad path {}", p.display() )) })?; - let seq_file = env.new_sequential_file(path, EnvOptions::new())?; + let seq_file = env + .new_sequential_file(path, EnvOptions::new()) + .map_err(r2e)?; Ok((RocksExternalSstFileInfo(sst_info), seq_file)) } } diff --git a/components/engine_rocks/src/status.rs b/components/engine_rocks/src/status.rs new file mode 100644 index 00000000000..1565e013834 --- /dev/null +++ b/components/engine_rocks/src/status.rs @@ -0,0 +1,19 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +/// A function that will transform a rocksdb error to engine trait error. +/// +/// r stands for rocksdb, e stands for engine_trait. +pub fn r2e(msg: impl Into) -> engine_traits::Error { + // TODO: use correct code. + engine_traits::Error::Engine(engine_traits::Status::with_error( + engine_traits::Code::IoError, + msg, + )) +} + +/// A function that will transform a engine trait error to rocksdb error. +/// +/// r stands for rocksdb, e stands for engine_trait. +pub fn e2r(s: engine_traits::Error) -> String { + format!("{:?}", s) +} diff --git a/components/engine_rocks/src/table_properties.rs b/components/engine_rocks/src/table_properties.rs index 3a3bbad6a04..19b2141483d 100644 --- a/components/engine_rocks/src/table_properties.rs +++ b/components/engine_rocks/src/table_properties.rs @@ -1,8 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{Error, Range, Result}; +use engine_traits::{Range, Result}; -use crate::{util, RangeProperties, RocksEngine}; +use crate::{r2e, util, RangeProperties, RocksEngine}; #[repr(transparent)] pub struct UserCollectedProperties(rocksdb::UserCollectedProperties); @@ -57,11 +57,9 @@ impl RocksEngine { let cf = util::get_cf_handle(self.as_inner(), cf)?; // FIXME: extra allocation let ranges: Vec<_> = ranges.iter().map(util::range_to_rocks_range).collect(); - let raw = self - .as_inner() - .get_properties_of_tables_in_range(cf, &ranges); - let raw = raw.map_err(Error::Engine)?; - Ok(raw) + self.as_inner() + .get_properties_of_tables_in_range(cf, &ranges) + .map_err(r2e) } pub fn get_range_properties_cf( diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 47e4016ebc6..81a2ccb497a 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -10,6 +10,7 @@ use crate::{ cf_options::RocksColumnFamilyOptions, db_options::RocksDBOptions, engine::RocksEngine, + r2e, raw_util::{new_engine as new_engine_raw, new_engine_opt as new_engine_opt_raw, CFOptions}, rocks_metrics_defs::*, }; @@ -86,10 +87,9 @@ pub fn new_engine_opt( } pub fn get_cf_handle<'a>(db: &'a DB, cf: &str) -> Result<&'a CFHandle> { - let handle = db - .cf_handle(cf) - .ok_or_else(|| Error::Engine(format!("cf {} not found", cf)))?; - Ok(handle) + db.cf_handle(cf) + .ok_or_else(|| format!("cf {} not found", cf)) + .map_err(r2e) } pub fn range_to_rocks_range<'a>(range: &Range<'a>) -> RocksRange<'a> { diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index 77b8e65d3eb..f09761802e6 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -2,10 +2,10 @@ use std::sync::Arc; -use engine_traits::{self, Error, Mutable, Result, WriteBatchExt, WriteOptions}; +use engine_traits::{self, Mutable, Result, WriteBatchExt, WriteOptions}; use rocksdb::{Writable, WriteBatch as RawWriteBatch, DB}; -use crate::{engine::RocksEngine, options::RocksWriteOptions, util::get_cf_handle}; +use crate::{engine::RocksEngine, options::RocksWriteOptions, r2e, util::get_cf_handle}; const WRITE_BATCH_MAX_BATCH: usize = 16; const WRITE_BATCH_LIMIT: usize = 16; @@ -101,11 +101,11 @@ impl engine_traits::WriteBatch for RocksWriteBatchVec { if self.support_write_batch_vec { self.get_db() .multi_batch_write(self.as_inner(), &opt.into_raw()) - .map_err(Error::Engine) + .map_err(r2e) } else { self.get_db() .write_opt(&self.wbs[0], &opt.into_raw()) - .map_err(Error::Engine) + .map_err(r2e) } } @@ -153,9 +153,9 @@ impl engine_traits::WriteBatch for RocksWriteBatchVec { fn pop_save_point(&mut self) -> Result<()> { if let Some(x) = self.save_points.pop() { - return self.wbs[x].pop_save_point().map_err(Error::Engine); + return self.wbs[x].pop_save_point().map_err(r2e); } - Err(Error::Engine("no save point".into())) + Err(r2e("no save point")) } fn rollback_to_save_point(&mut self) -> Result<()> { @@ -164,9 +164,9 @@ impl engine_traits::WriteBatch for RocksWriteBatchVec { self.wbs[i].clear(); } self.index = x; - return self.wbs[x].rollback_to_save_point().map_err(Error::Engine); + return self.wbs[x].rollback_to_save_point().map_err(r2e); } - Err(Error::Engine("no save point".into())) + Err(r2e("no save point")) } fn merge(&mut self, other: Self) -> Result<()> { @@ -181,35 +181,31 @@ impl engine_traits::WriteBatch for RocksWriteBatchVec { impl Mutable for RocksWriteBatchVec { fn put(&mut self, key: &[u8], value: &[u8]) -> Result<()> { self.check_switch_batch(); - self.wbs[self.index].put(key, value).map_err(Error::Engine) + self.wbs[self.index].put(key, value).map_err(r2e) } fn put_cf(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { self.check_switch_batch(); let handle = get_cf_handle(self.db.as_ref(), cf)?; - self.wbs[self.index] - .put_cf(handle, key, value) - .map_err(Error::Engine) + self.wbs[self.index].put_cf(handle, key, value).map_err(r2e) } fn delete(&mut self, key: &[u8]) -> Result<()> { self.check_switch_batch(); - self.wbs[self.index].delete(key).map_err(Error::Engine) + self.wbs[self.index].delete(key).map_err(r2e) } fn delete_cf(&mut self, cf: &str, key: &[u8]) -> Result<()> { self.check_switch_batch(); let handle = get_cf_handle(self.db.as_ref(), cf)?; - self.wbs[self.index] - .delete_cf(handle, key) - .map_err(Error::Engine) + self.wbs[self.index].delete_cf(handle, key).map_err(r2e) } fn delete_range(&mut self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { self.check_switch_batch(); self.wbs[self.index] .delete_range(begin_key, end_key) - .map_err(Error::Engine) + .map_err(r2e) } fn delete_range_cf(&mut self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { @@ -217,7 +213,7 @@ impl Mutable for RocksWriteBatchVec { let handle = get_cf_handle(self.db.as_ref(), cf)?; self.wbs[self.index] .delete_range_cf(handle, begin_key, end_key) - .map_err(Error::Engine) + .map_err(r2e) } } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 82373ac8568..d6633139122 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -281,10 +281,7 @@ pub mod kv { new_engine } - fn set_shared_block_cache_capacity( - &self, - capacity: u64, - ) -> std::result::Result<(), String> { + fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { let reg = self.registry.lock().unwrap(); // pick up any tablet and set the shared block cache capacity if let Some(((_id, _suffix), tablet)) = (*reg).iter().next() { diff --git a/components/engine_traits/src/cf_options.rs b/components/engine_traits/src/cf_options.rs index 2e130cbf73c..6498238280f 100644 --- a/components/engine_traits/src/cf_options.rs +++ b/components/engine_traits/src/cf_options.rs @@ -21,7 +21,7 @@ pub trait ColumnFamilyOptions { fn get_soft_pending_compaction_bytes_limit(&self) -> u64; fn get_hard_pending_compaction_bytes_limit(&self) -> u64; fn get_block_cache_capacity(&self) -> u64; - fn set_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String>; + fn set_block_cache_capacity(&self, capacity: u64) -> Result<()>; fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions); fn get_target_file_size_base(&self) -> u64; fn set_disable_auto_compactions(&mut self, v: bool); diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 9b560bcd65b..c143cf7a194 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -187,7 +187,7 @@ pub trait TabletFactory: TabletAccessor { fn create_shared_db(&self) -> Result; /// Destroy the tablet and its data - fn destroy_tablet(&self, id: u64, suffix: u64) -> crate::Result<()>; + fn destroy_tablet(&self, id: u64, suffix: u64) -> Result<()>; /// Check if the tablet with specified id/suffix exists #[inline] @@ -219,7 +219,7 @@ pub trait TabletFactory: TabletAccessor { unimplemented!(); } - fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String>; + fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()>; } pub struct DummyFactory @@ -243,7 +243,7 @@ where fn create_shared_db(&self) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } - fn destroy_tablet(&self, _id: u64, _suffix: u64) -> crate::Result<()> { + fn destroy_tablet(&self, _id: u64, _suffix: u64) -> Result<()> { Ok(()) } fn exists_raw(&self, _path: &Path) -> bool { @@ -256,7 +256,7 @@ where PathBuf::from(&self.root_path) } - fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { let opt = self .engine .as_ref() @@ -312,8 +312,8 @@ mod tests { fn test_tablet_error_collector_err() { let mut err = TabletErrorCollector::new(); err.add_result(1, 1, Ok(())); - err.add_result(1, 1, Err("this is an error1".to_string().into())); - err.add_result(1, 1, Err("this is an error2".to_string().into())); + err.add_result(1, 1, Err(Status::with_code(Code::Aborted).into())); + err.add_result(1, 1, Err(Status::with_code(Code::NotFound).into())); err.add_result(1, 1, Ok(())); let r = err.take_result(); assert!(r.is_err()); diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index 12104e14a5c..6348db22174 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -6,11 +6,119 @@ use error_code::{self, ErrorCode, ErrorCodeExt}; use raft::{Error as RaftError, StorageError}; use thiserror::Error; +#[repr(u8)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum Code { + Ok = 0, + NotFound = 1, + Corruption = 2, + NotSupported = 3, + InvalidArgument = 4, + IoError = 5, + MergeInProgress = 6, + Incomplete = 7, + ShutdownInProgress = 8, + TimedOut = 9, + Aborted = 10, + Busy = 11, + Expired = 12, + TryAgain = 13, + CompactionTooLarge = 14, + ColumnFamilyDropped = 15, +} + +#[repr(u8)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum SubCode { + None = 0, + MutexTimeout = 1, + LockTimeout = 2, + LockLimit = 3, + NoSpace = 4, + Deadlock = 5, + StaleFile = 6, + MemoryLimit = 7, + SpaceLimit = 8, + PathNotFound = 9, +} + +#[repr(u8)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum Severity { + NoError = 0, + SoftError = 1, + HardError = 2, + FatalError = 3, + UnrecoverableError = 4, +} + +#[repr(C)] +#[derive(Debug, Error)] +#[error("[{:?}] {:?}-{:?} {}", .code, .sub_code, .sev, .state)] +pub struct Status { + code: Code, + sub_code: SubCode, + sev: Severity, + state: String, +} + +impl Status { + pub fn with_code(code: Code) -> Status { + Self { + code, + sub_code: SubCode::None, + sev: Severity::NoError, + state: String::new(), + } + } + + pub fn with_error(code: Code, error: impl Into) -> Self { + Self { + code, + sub_code: SubCode::None, + sev: Severity::NoError, + state: error.into(), + } + } + + #[inline] + pub fn set_sub_code(&mut self, sub_code: SubCode) -> &mut Self { + self.sub_code = sub_code; + self + } + + #[inline] + pub fn set_severity(&mut self, sev: Severity) -> &mut Self { + self.sev = sev; + self + } + + #[inline] + pub fn code(&self) -> Code { + self.code + } + + #[inline] + pub fn sub_code(&self) -> SubCode { + self.sub_code + } + + #[inline] + pub fn severity(&self) -> Severity { + self.sev + } + + #[inline] + pub fn state(&self) -> &str { + &self.state + } +} + #[derive(Debug, Error)] pub enum Error { // Engine uses plain string as the error. - #[error("Storage Engine {0}")] - Engine(String), + #[error("Storage Engine {0:?}")] + Engine(#[from] Status), // FIXME: It should not know Region. #[error( "Key {} is out of [region {}] [{}, {})", @@ -38,12 +146,6 @@ pub enum Error { EntriesCompacted, } -impl From for Error { - fn from(err: String) -> Self { - Error::Engine(err) - } -} - pub type Result = result::Result; impl ErrorCodeExt for Error { diff --git a/components/engine_traits/src/file_system.rs b/components/engine_traits/src/file_system.rs index 9022aeb7dc2..1671c1f0aab 100644 --- a/components/engine_traits/src/file_system.rs +++ b/components/engine_traits/src/file_system.rs @@ -4,9 +4,11 @@ use std::sync::Arc; use file_system::{get_io_rate_limiter, get_io_type, IOOp, IORateLimiter}; +use crate::Result; + pub trait FileSystemInspector: Sync + Send { - fn read(&self, len: usize) -> Result; - fn write(&self, len: usize) -> Result; + fn read(&self, len: usize) -> Result; + fn write(&self, len: usize) -> Result; } pub struct EngineFileSystemInspector { @@ -33,7 +35,7 @@ impl Default for EngineFileSystemInspector { } impl FileSystemInspector for EngineFileSystemInspector { - fn read(&self, len: usize) -> Result { + fn read(&self, len: usize) -> Result { if let Some(limiter) = &self.limiter { let io_type = get_io_type(); Ok(limiter.request(io_type, IOOp::Read, len)) @@ -42,7 +44,7 @@ impl FileSystemInspector for EngineFileSystemInspector { } } - fn write(&self, len: usize) -> Result { + fn write(&self, len: usize) -> Result { if let Some(limiter) = &self.limiter { let io_type = get_io_type(); Ok(limiter.request(io_type, IOOp::Write, len)) diff --git a/components/engine_traits/src/iterable.rs b/components/engine_traits/src/iterable.rs index a6dbdd2d03f..9d45fc5b0ac 100644 --- a/components/engine_traits/src/iterable.rs +++ b/components/engine_traits/src/iterable.rs @@ -31,13 +31,6 @@ use tikv_util::keybuilder::KeyBuilder; use crate::*; -/// A token indicating where an iterator "seek" operation should stop. -pub enum SeekKey<'a> { - Start, - End, - Key(&'a [u8]), -} - /// An iterator over a consistent set of keys and values. /// /// Iterators are implemented for `KvEngine`s and for `Snapshot`s. They see a @@ -56,15 +49,8 @@ pub enum SeekKey<'a> { pub trait Iterator: Send { /// Move the iterator to a specific key. /// - /// When `key` is `SeekKey::Start` or `SeekKey::End`, - /// `seek` and `seek_for_prev` behave identically. - /// The difference between the two functions is how they - /// behave for `SeekKey::Key`, and only when an exactly - /// matching keys is not found: - /// - /// When seeking with `SeekKey::Key`, and an exact match is not found, - /// `seek` sets the iterator to the next key greater than that - /// specified as `key`, if such a key exists; + /// When an exact match is not found, `seek` sets the iterator to the next + /// key greater than that specified as `key`, if such a key exists; /// `seek_for_prev` sets the iterator to the previous key less than /// that specified as `key`, if such a key exists. /// @@ -72,7 +58,7 @@ pub trait Iterator: Send { /// /// `true` if seeking succeeded and the iterator is valid, /// `false` if seeking failed and the iterator is invalid. - fn seek(&mut self, key: SeekKey<'_>) -> Result; + fn seek(&mut self, key: &[u8]) -> Result; /// Move the iterator to a specific key. /// @@ -83,44 +69,40 @@ pub trait Iterator: Send { /// /// `true` if seeking succeeded and the iterator is valid, /// `false` if seeking failed and the iterator is invalid. - fn seek_for_prev(&mut self, key: SeekKey<'_>) -> Result; + fn seek_for_prev(&mut self, key: &[u8]) -> Result; - /// Short for `seek(SeekKey::Start)`. - fn seek_to_first(&mut self) -> Result { - self.seek(SeekKey::Start) - } + /// Seek to the first key in the engine. + fn seek_to_first(&mut self) -> Result; - /// Short for `seek(SeekKey::End)`. - fn seek_to_last(&mut self) -> Result { - self.seek(SeekKey::End) - } + /// Seek to the last key in the database. + fn seek_to_last(&mut self) -> Result; /// Move a valid iterator to the previous key. /// /// # Panics /// - /// If the iterator is invalid + /// If the iterator is invalid, iterator may panic or aborted. fn prev(&mut self) -> Result; /// Move a valid iterator to the next key. /// /// # Panics /// - /// If the iterator is invalid + /// If the iterator is invalid, iterator may panic or aborted. fn next(&mut self) -> Result; /// Retrieve the current key. /// /// # Panics /// - /// If the iterator is invalid + /// If the iterator is invalid, iterator may panic or aborted. fn key(&self) -> &[u8]; /// Retrieve the current value. /// /// # Panics /// - /// If the iterator is invalid + /// If the iterator is invalid, iterator may panic or aborted. fn value(&self) -> &[u8]; /// Returns `true` if the iterator points to a `key`/`value` pair. @@ -130,32 +112,15 @@ pub trait Iterator: Send { pub trait Iterable { type Iterator: Iterator; - fn iterator_opt(&self, opts: IterOptions) -> Result; - fn iterator_cf_opt(&self, cf: &str, opts: IterOptions) -> Result; - - fn iterator(&self) -> Result { - self.iterator_opt(IterOptions::default()) - } + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result; - fn iterator_cf(&self, cf: &str) -> Result { - self.iterator_cf_opt(cf, IterOptions::default()) + fn iterator(&self, cf: &str) -> Result { + self.iterator_opt(cf, IterOptions::default()) } /// scan the key between start_key(inclusive) and end_key(exclusive), /// the upper bound is omitted if end_key is empty - fn scan(&self, start_key: &[u8], end_key: &[u8], fill_cache: bool, f: F) -> Result<()> - where - F: FnMut(&[u8], &[u8]) -> Result, - { - let start = KeyBuilder::from_slice(start_key, DATA_KEY_PREFIX_LEN, 0); - let end = - (!end_key.is_empty()).then(|| KeyBuilder::from_slice(end_key, DATA_KEY_PREFIX_LEN, 0)); - let iter_opt = IterOptions::new(Some(start), end, fill_cache); - scan_impl(self.iterator_opt(iter_opt)?, start_key, f) - } - - // like `scan`, only on a specific column family. - fn scan_cf( + fn scan( &self, cf: &str, start_key: &[u8], @@ -170,23 +135,13 @@ pub trait Iterable { let end = (!end_key.is_empty()).then(|| KeyBuilder::from_slice(end_key, DATA_KEY_PREFIX_LEN, 0)); let iter_opt = IterOptions::new(Some(start), end, fill_cache); - scan_impl(self.iterator_cf_opt(cf, iter_opt)?, start_key, f) - } - - // Seek the first key >= given key, if not found, return None. - fn seek(&self, key: &[u8]) -> Result, Vec)>> { - let mut iter = self.iterator()?; - if iter.seek(SeekKey::Key(key))? { - let (k, v) = (iter.key().to_vec(), iter.value().to_vec()); - return Ok(Some((k, v))); - } - Ok(None) + scan_impl(self.iterator_opt(cf, iter_opt)?, start_key, f) } // Seek the first key >= given key, if not found, return None. - fn seek_cf(&self, cf: &str, key: &[u8]) -> Result, Vec)>> { - let mut iter = self.iterator_cf(cf)?; - if iter.seek(SeekKey::Key(key))? { + fn seek(&self, cf: &str, key: &[u8]) -> Result, Vec)>> { + let mut iter = self.iterator(cf)?; + if iter.seek(key)? { return Ok(Some((iter.key().to_vec(), iter.value().to_vec()))); } Ok(None) @@ -198,19 +153,13 @@ where Iter: Iterator, F: FnMut(&[u8], &[u8]) -> Result, { - let mut remained = it.seek(SeekKey::Key(start_key))?; + let mut remained = it.seek(start_key)?; while remained { remained = f(it.key(), it.value())? && it.next()?; } Ok(()) } -impl<'a> From<&'a [u8]> for SeekKey<'a> { - fn from(bs: &'a [u8]) -> SeekKey<'a> { - SeekKey::Key(bs) - } -} - /// Collect all items of `it` into a vector, generally used for tests. /// /// # Panics diff --git a/components/engine_traits_tests/src/iterator.rs b/components/engine_traits_tests/src/iterator.rs index 00f7a974b52..96709c3fe29 100644 --- a/components/engine_traits_tests/src/iterator.rs +++ b/components/engine_traits_tests/src/iterator.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{Iterable, Iterator, KvEngine, SeekKey}; +use engine_traits::{Iterable, Iterator, KvEngine, CF_DEFAULT}; use panic_hook::recover_safe; use super::default_engine; @@ -30,24 +30,22 @@ where .is_err() ); - assert_eq!(iter.seek(SeekKey::Start).unwrap(), false); - assert_eq!(iter.seek(SeekKey::End).unwrap(), false); - assert_eq!(iter.seek(SeekKey::Key(b"foo")).unwrap(), false); - assert_eq!(iter.seek_for_prev(SeekKey::Start).unwrap(), false); - assert_eq!(iter.seek_for_prev(SeekKey::End).unwrap(), false); - assert_eq!(iter.seek_for_prev(SeekKey::Key(b"foo")).unwrap(), false); + assert_eq!(iter.seek_to_first().unwrap(), false); + assert_eq!(iter.seek_to_last().unwrap(), false); + assert_eq!(iter.seek(b"foo").unwrap(), false); + assert_eq!(iter.seek_for_prev(b"foo").unwrap(), false); } #[test] fn iter_empty_engine() { let db = default_engine(); - iter_empty(&db.engine, |e| e.iterator().unwrap()); + iter_empty(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn iter_empty_snapshot() { let db = default_engine(); - iter_empty(&db.engine, |e| e.snapshot().iterator().unwrap()); + iter_empty(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn iter_forward(e: &E, i: IF) @@ -64,7 +62,7 @@ where assert!(!iter.valid().unwrap()); - assert!(iter.seek(SeekKey::Start).unwrap()); + assert!(iter.seek_to_first().unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"a"); @@ -103,13 +101,13 @@ where #[test] fn iter_forward_engine() { let db = default_engine(); - iter_forward(&db.engine, |e| e.iterator().unwrap()); + iter_forward(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn iter_forward_snapshot() { let db = default_engine(); - iter_forward(&db.engine, |e| e.snapshot().iterator().unwrap()); + iter_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn iter_reverse(e: &E, i: IF) @@ -126,7 +124,7 @@ where assert!(!iter.valid().unwrap()); - assert!(iter.seek(SeekKey::End).unwrap()); + assert!(iter.seek_to_last().unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"c"); @@ -165,13 +163,13 @@ where #[test] fn iter_reverse_engine() { let db = default_engine(); - iter_reverse(&db.engine, |e| e.iterator().unwrap()); + iter_reverse(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn iter_reverse_snapshot() { let db = default_engine(); - iter_reverse(&db.engine, |e| e.snapshot().iterator().unwrap()); + iter_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn seek_to_key_then_forward(e: &E, i: IF) @@ -186,7 +184,7 @@ where let mut iter = i(e); - assert!(iter.seek(SeekKey::Key(b"b")).unwrap()); + assert!(iter.seek(b"b").unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"b"); @@ -206,13 +204,13 @@ where #[test] fn seek_to_key_then_forward_engine() { let db = default_engine(); - seek_to_key_then_forward(&db.engine, |e| e.iterator().unwrap()); + seek_to_key_then_forward(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn seek_to_key_then_forward_snapshot() { let db = default_engine(); - seek_to_key_then_forward(&db.engine, |e| e.snapshot().iterator().unwrap()); + seek_to_key_then_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn seek_to_key_then_reverse(e: &E, i: IF) @@ -227,7 +225,7 @@ where let mut iter = i(e); - assert!(iter.seek(SeekKey::Key(b"b")).unwrap()); + assert!(iter.seek(b"b").unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"b"); @@ -247,13 +245,13 @@ where #[test] fn seek_to_key_then_reverse_engine() { let db = default_engine(); - seek_to_key_then_reverse(&db.engine, |e| e.iterator().unwrap()); + seek_to_key_then_reverse(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn seek_to_key_then_reverse_snapshot() { let db = default_engine(); - seek_to_key_then_reverse(&db.engine, |e| e.snapshot().iterator().unwrap()); + seek_to_key_then_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn iter_forward_then_reverse(e: &E, i: IF) @@ -270,7 +268,7 @@ where assert!(!iter.valid().unwrap()); - assert!(iter.seek(SeekKey::Start).unwrap()); + assert!(iter.seek_to_first().unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"a"); @@ -308,13 +306,13 @@ where #[test] fn iter_forward_then_reverse_engine() { let db = default_engine(); - iter_forward_then_reverse(&db.engine, |e| e.iterator().unwrap()); + iter_forward_then_reverse(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn iter_forward_then_reverse_snapshot() { let db = default_engine(); - iter_forward_then_reverse(&db.engine, |e| e.snapshot().iterator().unwrap()); + iter_forward_then_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn iter_reverse_then_forward(e: &E, i: IF) @@ -331,7 +329,7 @@ where assert!(!iter.valid().unwrap()); - assert!(iter.seek(SeekKey::End).unwrap()); + assert!(iter.seek_to_last().unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"c"); @@ -369,13 +367,13 @@ where #[test] fn iter_reverse_then_forward_engine() { let db = default_engine(); - iter_reverse_then_forward(&db.engine, |e| e.iterator().unwrap()); + iter_reverse_then_forward(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn iter_reverse_then_forward_snapshot() { let db = default_engine(); - iter_reverse_then_forward(&db.engine, |e| e.snapshot().iterator().unwrap()); + iter_reverse_then_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } // When seek finds an exact key then seek_for_prev behaves just like seek @@ -391,19 +389,19 @@ where let mut iter = i(e); - assert!(iter.seek_for_prev(SeekKey::Start).unwrap()); + assert!(iter.seek_to_first().unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"a"); assert_eq!(iter.value(), b"a"); - assert!(iter.seek_for_prev(SeekKey::End).unwrap()); + assert!(iter.seek_to_last().unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"c"); assert_eq!(iter.value(), b"c"); - assert!(iter.seek_for_prev(SeekKey::Key(b"c")).unwrap()); + assert!(iter.seek_for_prev(b"c").unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"c"); @@ -413,13 +411,13 @@ where #[test] fn seek_for_prev_engine() { let db = default_engine(); - seek_for_prev(&db.engine, |e| e.iterator().unwrap()); + seek_for_prev(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn seek_for_prev_snapshot() { let db = default_engine(); - seek_for_prev(&db.engine, |e| e.snapshot().iterator().unwrap()); + seek_for_prev(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } // When Seek::Key doesn't find an exact match, @@ -437,24 +435,24 @@ where assert!(!iter.valid().unwrap()); - assert!(iter.seek(SeekKey::Key(b"b")).unwrap()); + assert!(iter.seek(b"b").unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"c"); - assert!(!iter.seek(SeekKey::Key(b"d")).unwrap()); + assert!(!iter.seek(b"d").unwrap()); assert!(!iter.valid().unwrap()); } #[test] fn seek_key_miss_engine() { let db = default_engine(); - seek_key_miss(&db.engine, |e| e.iterator().unwrap()); + seek_key_miss(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn seek_key_miss_snapshot() { let db = default_engine(); - seek_key_miss(&db.engine, |e| e.snapshot().iterator().unwrap()); + seek_key_miss(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } fn seek_key_prev_miss(e: &E, i: IF) @@ -469,22 +467,22 @@ where assert!(!iter.valid().unwrap()); - assert!(iter.seek_for_prev(SeekKey::Key(b"d")).unwrap()); + assert!(iter.seek_for_prev(b"d").unwrap()); assert!(iter.valid().unwrap()); assert_eq!(iter.key(), b"c"); - assert!(!iter.seek_for_prev(SeekKey::Key(b"b")).unwrap()); + assert!(!iter.seek_for_prev(b"b").unwrap()); assert!(!iter.valid().unwrap()); } #[test] fn seek_key_prev_miss_engine() { let db = default_engine(); - seek_key_prev_miss(&db.engine, |e| e.iterator().unwrap()); + seek_key_prev_miss(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn seek_key_prev_miss_snapshot() { let db = default_engine(); - seek_key_prev_miss(&db.engine, |e| e.snapshot().iterator().unwrap()); + seek_key_prev_miss(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } diff --git a/components/engine_traits_tests/src/read_consistency.rs b/components/engine_traits_tests/src/read_consistency.rs index d80b6b3db7c..8c7ab50657f 100644 --- a/components/engine_traits_tests/src/read_consistency.rs +++ b/components/engine_traits_tests/src/read_consistency.rs @@ -2,7 +2,7 @@ //! Testing iterator and snapshot behavior in the presence of intermixed writes -use engine_traits::{Iterable, Iterator, KvEngine, Peekable, SyncMutable}; +use engine_traits::{Iterable, Iterator, KvEngine, Peekable, SyncMutable, CF_DEFAULT}; use super::default_engine; @@ -71,11 +71,11 @@ where #[test] fn iterator_with_writes_engine() { let db = default_engine(); - iterator_with_writes(&db.engine, |e| e.iterator().unwrap()); + iterator_with_writes(&db.engine, |e| e.iterator(CF_DEFAULT).unwrap()); } #[test] fn iterator_with_writes_snapshot() { let db = default_engine(); - iterator_with_writes(&db.engine, |e| e.snapshot().iterator().unwrap()); + iterator_with_writes(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); } diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index 10104e752cc..231e12ea785 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -6,8 +6,7 @@ use std::fs; use engine_test::kv::KvTestEngine; use engine_traits::{ - Error, ExternalSstFileInfo, Iterator, Result, SeekKey, SstExt, SstReader, SstWriter, - SstWriterBuilder, + Error, ExternalSstFileInfo, Iterator, Result, SstExt, SstReader, SstWriter, SstWriterBuilder, }; use panic_hook::recover_safe; @@ -51,7 +50,7 @@ fn basic() -> Result<()> { let sst_reader = ::SstReader::open(&sst_path)?; let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start)?; + iter.seek_to_first()?; let key = iter.key(); let value = iter.value(); assert_eq!(b"k1", key); @@ -80,7 +79,7 @@ fn forward() -> Result<()> { let sst_reader = ::SstReader::open(&sst_path)?; let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start)?; + iter.seek_to_first()?; let key = iter.key(); let value = iter.value(); @@ -117,7 +116,7 @@ fn reverse() -> Result<()> { let sst_reader = ::SstReader::open(&sst_path)?; let mut iter = sst_reader.iter(); - iter.seek(SeekKey::End)?; + iter.seek_to_last()?; let key = iter.key(); let value = iter.value(); @@ -136,7 +135,7 @@ fn reverse() -> Result<()> { Ok(()) } -// todo test seek_for_prev(SeekKey::Key) +// todo test seek_for_prev(Key) #[test] fn delete() -> Result<()> { @@ -155,7 +154,7 @@ fn delete() -> Result<()> { let sst_reader = ::SstReader::open(&sst_path)?; let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start)?; + iter.seek_to_first()?; assert_eq!(iter.valid()?, false); @@ -174,12 +173,10 @@ fn delete() -> Result<()> { .is_err() ); - assert_eq!(iter.seek(SeekKey::Start)?, false); - assert_eq!(iter.seek(SeekKey::End)?, false); - assert_eq!(iter.seek(SeekKey::Key(b"foo"))?, false); - assert_eq!(iter.seek_for_prev(SeekKey::Start)?, false); - assert_eq!(iter.seek_for_prev(SeekKey::End)?, false); - assert_eq!(iter.seek_for_prev(SeekKey::Key(b"foo"))?, false); + assert_eq!(iter.seek_to_first()?, false); + assert_eq!(iter.seek_to_last()?, false); + assert_eq!(iter.seek(b"foo")?, false); + assert_eq!(iter.seek_for_prev(b"foo")?, false); Ok(()) } @@ -215,7 +212,7 @@ fn same_key() -> Result<()> { let sst_reader = ::SstReader::open(&sst_path)?; let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start)?; + iter.seek_to_first()?; let key = iter.key(); let value = iter.value(); assert_eq!(b"k1", key); @@ -257,7 +254,7 @@ fn reverse_key() -> Result<()> { let sst_reader = ::SstReader::open(&sst_path)?; let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start)?; + iter.seek_to_first()?; let key = iter.key(); let value = iter.value(); assert_eq!(b"k2", key); diff --git a/components/raftstore/src/coprocessor/consistency_check.rs b/components/raftstore/src/coprocessor/consistency_check.rs index 16770595405..70b55db41f4 100644 --- a/components/raftstore/src/coprocessor/consistency_check.rs +++ b/components/raftstore/src/coprocessor/consistency_check.rs @@ -66,7 +66,7 @@ fn compute_hash_on_raw(region: &Region, snap: &S) -> Result { let start_key = keys::enc_start_key(region); let end_key = keys::enc_end_key(region); for cf in cf_names { - snap.scan_cf(cf, &start_key, &end_key, false, |k, v| { + snap.scan(cf, &start_key, &end_key, false, |k, v| { digest.update(k); digest.update(v); Ok(true) diff --git a/components/raftstore/src/coprocessor/split_check/table.rs b/components/raftstore/src/coprocessor/split_check/table.rs index a8a1ded4144..e377d4b550a 100644 --- a/components/raftstore/src/coprocessor/split_check/table.rs +++ b/components/raftstore/src/coprocessor/split_check/table.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; -use engine_traits::{IterOptions, Iterator, KvEngine, SeekKey, CF_WRITE}; +use engine_traits::{IterOptions, Iterator, KvEngine, CF_WRITE}; use error_code::ErrorCodeExt; use kvproto::{metapb::Region, pdpb::CheckPolicy}; use tidb_query_datatype::codec::table as table_codec; @@ -183,10 +183,10 @@ fn last_key_of_region(db: &impl KvEngine, region: &Region) -> Result = iter.seek(SeekKey::End).map_err(|e| box_err!(e)); + let found: Result = iter.seek_to_last().map_err(|e| box_err!(e)); if found? { let key = iter.key().to_vec(); last_key = Some(key); diff --git a/components/raftstore/src/store/bootstrap.rs b/components/raftstore/src/store/bootstrap.rs index 12fb238dce8..561425d9d00 100644 --- a/components/raftstore/src/store/bootstrap.rs +++ b/components/raftstore/src/store/bootstrap.rs @@ -34,7 +34,7 @@ fn is_range_empty( end_key: &[u8], ) -> Result { let mut count: u32 = 0; - engine.scan_cf(cf, start_key, end_key, false, |_, _| { + engine.scan(cf, start_key, end_key, false, |_, _| { count += 1; Ok(false) })?; diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index dc5690a2b34..e7a59631ca1 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -202,9 +202,7 @@ mod tests { raw_util::{new_engine_opt, CFOptions}, RocksEngine, RocksSstPartitionerFactory, RocksSstReader, }; - use engine_traits::{ - CompactExt, Iterator, MiscExt, SeekKey, SstReader, SyncMutable, CF_DEFAULT, - }; + use engine_traits::{CompactExt, Iterator, MiscExt, SstReader, SyncMutable, CF_DEFAULT}; use keys::DATA_PREFIX_KEY; use kvproto::metapb::Region; use tempfile::TempDir; @@ -404,7 +402,7 @@ mod tests { fn collect_keys(path: &str) -> Vec> { let mut sst_reader = RocksSstReader::open(path).unwrap().iter(); - let mut valid = sst_reader.seek(SeekKey::Start).unwrap(); + let mut valid = sst_reader.seek_to_first().unwrap(); let mut ret = vec![]; while valid { ret.push(sst_reader.key().to_owned()); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index c46cafb7e48..f92d08dd3a4 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1084,7 +1084,7 @@ impl RaftPollerBuilder { let mut merging_count = 0; let mut meta = self.store_meta.lock().unwrap(); let mut replication_state = self.global_replication_state.lock().unwrap(); - kv_engine.scan_cf(CF_RAFT, start_key, end_key, false, |key, value| { + kv_engine.scan(CF_RAFT, start_key, end_key, false, |key, value| { let (region_id, suffix) = box_try!(keys::decode_region_meta_key(key)); if suffix != keys::REGION_STATE_SUFFIX { return Ok(true); diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 58e35ff9084..ec6cc3bcf11 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -2238,7 +2238,7 @@ mod tests { store .engines .kv - .scan_cf(CF_RAFT, &meta_start, &meta_end, false, |_, _| { + .scan(CF_RAFT, &meta_start, &meta_end, false, |_, _| { count += 1; Ok(true) }) @@ -2251,7 +2251,7 @@ mod tests { store .engines .kv - .scan_cf(CF_RAFT, &raft_start, &raft_end, false, |_, _| { + .scan(CF_RAFT, &raft_start, &raft_end, false, |_, _| { count += 1; Ok(true) }) diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 390c0ee0f5c..cd2bc75d048 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -109,12 +109,8 @@ where } } - pub fn iter(&self, iter_opt: IterOptions) -> RegionIterator { - RegionIterator::new(&self.snap, Arc::clone(&self.region), iter_opt) - } - - pub fn iter_cf(&self, cf: &str, iter_opt: IterOptions) -> Result> { - Ok(RegionIterator::new_cf( + pub fn iter(&self, cf: &str, iter_opt: IterOptions) -> Result> { + Ok(RegionIterator::new( &self.snap, Arc::clone(&self.region), iter_opt, @@ -124,24 +120,13 @@ where // scan scans database using an iterator in range [start_key, end_key), calls function f for // each iteration, if f returns false, terminates this scan. - pub fn scan(&self, start_key: &[u8], end_key: &[u8], fill_cache: bool, f: F) -> Result<()> - where - F: FnMut(&[u8], &[u8]) -> Result, - { - let start = KeyBuilder::from_slice(start_key, DATA_PREFIX_KEY.len(), 0); - let end = KeyBuilder::from_slice(end_key, DATA_PREFIX_KEY.len(), 0); - let iter_opt = IterOptions::new(Some(start), Some(end), fill_cache); - self.scan_impl(self.iter(iter_opt), start_key, f) - } - - // like `scan`, only on a specific column family. - pub fn scan_cf( + pub fn scan( &self, cf: &str, start_key: &[u8], end_key: &[u8], fill_cache: bool, - f: F, + mut f: F, ) -> Result<()> where F: FnMut(&[u8], &[u8]) -> Result, @@ -149,13 +134,8 @@ where let start = KeyBuilder::from_slice(start_key, DATA_PREFIX_KEY.len(), 0); let end = KeyBuilder::from_slice(end_key, DATA_PREFIX_KEY.len(), 0); let iter_opt = IterOptions::new(Some(start), Some(end), fill_cache); - self.scan_impl(self.iter_cf(cf, iter_opt)?, start_key, f) - } - fn scan_impl(&self, mut it: RegionIterator, start_key: &[u8], mut f: F) -> Result<()> - where - F: FnMut(&[u8], &[u8]) -> Result, - { + let mut it = self.iter(cf, iter_opt)?; let mut it_valid = it.seek(start_key)?; while it_valid { it_valid = f(it.key(), it.value())? && it.next()?; @@ -300,16 +280,7 @@ impl RegionIterator where S: Snapshot, { - pub fn new(snap: &S, region: Arc, mut iter_opt: IterOptions) -> RegionIterator { - update_lower_bound(&mut iter_opt, ®ion); - update_upper_bound(&mut iter_opt, ®ion); - let iter = snap - .iterator_opt(iter_opt) - .expect("creating snapshot iterator"); // FIXME error handling - RegionIterator { iter, region } - } - - pub fn new_cf( + pub fn new( snap: &S, region: Arc, mut iter_opt: IterOptions, @@ -318,7 +289,7 @@ where update_lower_bound(&mut iter_opt, ®ion); update_upper_bound(&mut iter_opt, ®ion); let iter = snap - .iterator_cf_opt(cf, iter_opt) + .iterator_opt(cf, iter_opt) .expect("creating snapshot iterator"); // FIXME error handling RegionIterator { iter, region } } @@ -337,15 +308,13 @@ where }); self.should_seekable(key)?; let key = keys::data_key(key); - self.iter.seek(key.as_slice().into()).map_err(Error::from) + self.iter.seek(&key).map_err(Error::from) } pub fn seek_for_prev(&mut self, key: &[u8]) -> Result { self.should_seekable(key)?; let key = keys::data_key(key); - self.iter - .seek_for_prev(key.as_slice().into()) - .map_err(Error::from) + self.iter.seek_for_prev(&key).map_err(Error::from) } pub fn prev(&mut self) -> Result { @@ -397,7 +366,7 @@ fn handle_check_key_in_region_error(e: crate::Error) -> Result<()> { #[cfg(test)] mod tests { use engine_test::{kv::KvTestSnapshot, new_temp_engine}; - use engine_traits::{Engines, KvEngine, Peekable, RaftEngine, SyncMutable}; + use engine_traits::{Engines, KvEngine, Peekable, RaftEngine, SyncMutable, CF_DEFAULT}; use keys::data_key; use kvproto::metapb::{Peer, Region}; use tempfile::Builder; @@ -548,7 +517,7 @@ mod tests { upper_bound.map(|v| KeyBuilder::from_slice(v, keys::DATA_PREFIX_KEY.len(), 0)), true, ); - let mut iter = snap.iter(iter_opt); + let mut iter = snap.iter(CF_DEFAULT, iter_opt).unwrap(); for (seek_key, in_range, seek_exp, prev_exp) in seek_table.clone() { let check_res = |iter: &RegionIterator, res: Result, @@ -650,7 +619,7 @@ mod tests { let snap = RegionSnapshot::::new(&store); let mut data = vec![]; - snap.scan(b"a2", &[0xFF, 0xFF], false, |key, value| { + snap.scan(CF_DEFAULT, b"a2", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); Ok(true) }) @@ -660,7 +629,7 @@ mod tests { assert_eq!(data, &base_data[1..3]); data.clear(); - snap.scan(b"a2", &[0xFF, 0xFF], false, |key, value| { + snap.scan(CF_DEFAULT, b"a2", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); Ok(false) }) @@ -668,7 +637,7 @@ mod tests { assert_eq!(data.len(), 1); - let mut iter = snap.iter(IterOptions::default()); + let mut iter = snap.iter(CF_DEFAULT, IterOptions::default()).unwrap(); assert!(iter.seek_to_first().unwrap()); let mut res = vec![]; loop { @@ -685,7 +654,7 @@ mod tests { let store = new_peer_storage(engines.clone(), ®ion); let snap = RegionSnapshot::::new(&store); data.clear(); - snap.scan(b"", &[0xFF, 0xFF], false, |key, value| { + snap.scan(CF_DEFAULT, b"", &[0xFF, 0xFF], false, |key, value| { data.push((key.to_vec(), value.to_vec())); Ok(true) }) @@ -694,7 +663,7 @@ mod tests { assert_eq!(data.len(), 5); assert_eq!(data, base_data); - let mut iter = snap.iter(IterOptions::default()); + let mut iter = snap.iter(CF_DEFAULT, IterOptions::default()).unwrap(); assert!(iter.seek(b"a1").unwrap()); assert!(iter.seek_to_first().unwrap()); @@ -710,11 +679,16 @@ mod tests { // test iterator with upper bound let store = new_peer_storage(engines, ®ion); let snap = RegionSnapshot::::new(&store); - let mut iter = snap.iter(IterOptions::new( - None, - Some(KeyBuilder::from_slice(b"a5", DATA_PREFIX_KEY.len(), 0)), - true, - )); + let mut iter = snap + .iter( + CF_DEFAULT, + IterOptions::new( + None, + Some(KeyBuilder::from_slice(b"a5", DATA_PREFIX_KEY.len(), 0)), + true, + ), + ) + .unwrap(); assert!(iter.seek_to_first().unwrap()); let mut res = vec![]; loop { @@ -735,7 +709,7 @@ mod tests { let snap = RegionSnapshot::::new(&store); let mut iter_opt = IterOptions::default(); iter_opt.set_lower_bound(b"a3", 1); - let mut iter = snap.iter(iter_opt); + let mut iter = snap.iter(CF_DEFAULT, iter_opt).unwrap(); assert!(iter.seek_to_last().unwrap()); let mut res = vec![]; loop { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 7bcaeb5529b..cca1dfbda77 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2011,7 +2011,7 @@ pub mod tests { pub fn get_kv_count(snap: &impl EngineSnapshot) -> usize { let mut kv_count = 0; for cf in SNAPSHOT_CFS { - snap.scan_cf( + snap.scan( cf, &keys::data_key(b"a"), &keys::data_key(b"z"), diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 2baf191d749..4fb34f15341 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -81,7 +81,7 @@ where }; let mut stats = BuildStatistics::default(); - box_try!(snap.scan_cf(cf, start_key, end_key, false, |key, value| { + box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { stats.key_count += 1; stats.total_size += key.len() + value.len(); box_try!(BytesEncoder::encode_compact_bytes(&mut writer, key)); @@ -133,7 +133,7 @@ where .to_string(); let sst_writer = RefCell::new(create_sst_file_writer::(engine, cf, &path)?); let mut file_length: usize = 0; - box_try!(snap.scan_cf(cf, start_key, end_key, false, |key, value| { + box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { let entry_len = key.len() + value.len(); if file_length + entry_len > raw_size_per_file as usize { cf_file.add_file(file_id); // add previous file @@ -375,7 +375,7 @@ mod tests { // Scan keys from db let mut keys_in_db: HashMap<_, Vec<_>> = HashMap::new(); for cf in SNAPSHOT_CFS { - snap.scan_cf( + snap.scan( cf, &keys::data_key(b"a"), &keys::data_end_key(b"z"), diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 3822575fb8e..e5dde8a910c 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -98,8 +98,8 @@ where Some(KeyBuilder::from_slice(end_key, 0, 0)), fill_cache, ); - let mut iter = db.iterator_cf_opt(cf, iter_opt)?; - let found: Result = iter.seek(start_key.into()).map_err(|e| box_err!(e)); + let mut iter = db.iterator_opt(cf, iter_opt)?; + let found: Result = iter.seek(start_key).map_err(|e| box_err!(e)); if found? { heap.push(KeyEntry::new( iter.key().to_vec(), diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index 586a3999b82..d011f9be93f 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -7,7 +7,7 @@ use std::sync::{ use crossbeam::channel::{unbounded, Receiver}; use engine_rocks::{self, RocksEngine}; -use engine_traits::{Iterable, Iterator, RaftEngine, RaftEngineReadOnly, RaftLogBatch, SeekKey}; +use engine_traits::{Iterable, Iterator, RaftEngine, RaftEngineReadOnly, RaftLogBatch, CF_DEFAULT}; use kvproto::raft_serverpb::RaftLocalState; use protobuf::Message; use raft::eraftpb::Entry; @@ -36,8 +36,8 @@ pub fn dump_raftdb_to_raft_engine(source: &RocksEngine, target: &RaftLogEngine, info!("Start to scan raft log from RocksEngine and dump into RaftLogEngine"); let consumed_time = tikv_util::time::Instant::now(); // Seek all region id from raftdb and send them to workers. - let mut it = source.iterator().unwrap(); - let mut valid = it.seek(SeekKey::Key(keys::REGION_RAFT_MIN_KEY)).unwrap(); + let mut it = source.iterator(CF_DEFAULT).unwrap(); + let mut valid = it.seek(keys::REGION_RAFT_MIN_KEY).unwrap(); while valid { match keys::decode_raft_key(it.key()) { Err(e) => { @@ -47,7 +47,7 @@ pub fn dump_raftdb_to_raft_engine(source: &RocksEngine, target: &RaftLogEngine, tx.send(id).unwrap(); count_region += 1; let next_key = keys::raft_log_prefix(id + 1); - valid = it.seek(SeekKey::Key(&next_key)).unwrap(); + valid = it.seek(&next_key).unwrap(); } } } @@ -115,7 +115,7 @@ fn check_raft_engine_is_empty(engine: &RaftLogEngine) { fn check_raft_db_is_empty(engine: &RocksEngine) { let mut count = 0; engine - .scan(b"", &[0xFF, 0xFF], false, |_, _| { + .scan(CF_DEFAULT, b"", &[0xFF, 0xFF], false, |_, _| { count += 1; Ok(false) }) @@ -138,6 +138,7 @@ fn run_dump_raftdb_worker( let mut entries = vec![]; old_engine .scan( + CF_DEFAULT, &keys::raft_log_prefix(id), &keys::raft_log_prefix(id + 1), false, diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 7c02b058d1e..be93ded1554 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -316,7 +316,8 @@ impl ImportDir { for &(start, end) in TIDB_RANGES_COMPLEMENT { let mut unexpected_data_key = None; - sst_reader.scan(start, end, false, |key, _| { + // No CF in sst. + sst_reader.scan("", start, end, false, |key, _| { unexpected_data_key = Some(key.to_vec()); Ok(false) })?; diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index d1ef399d6d0..1d4e2e916dc 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -15,7 +15,7 @@ use encryption::{encryption_method_to_db_encryption_method, DataKeyManager}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, - Iterator, KvEngine, SeekKey, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, + Iterator, KvEngine, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; use file_system::{get_io_rate_limiter, OpenOptions}; @@ -554,7 +554,7 @@ impl SstImporter { // must iterate if we perform key rewrite return Ok(None); } - if !iter.seek(SeekKey::Start)? { + if !iter.seek_to_first()? { // the SST is empty, so no need to iterate at all (should be impossible?) return Ok(Some(meta.get_range().clone())); } @@ -566,7 +566,7 @@ impl SstImporter { let start_key = start_key.to_vec(); // seek to end and fetch the last (inclusive) key of the SST. - iter.seek(SeekKey::End)?; + iter.seek_to_last()?; let last_key = keys::origin_key(iter.key()); if is_after_end_bound(last_key, &range_end) { // SST's end is after the range to consume @@ -606,8 +606,8 @@ impl SstImporter { let mut first_key = None; match range_start { - Bound::Unbounded => iter.seek(SeekKey::Start)?, - Bound::Included(s) => iter.seek(SeekKey::Key(&keys::data_key(&s)))?, + Bound::Unbounded => iter.seek_to_first()?, + Bound::Included(s) => iter.seek(&keys::data_key(&s))?, Bound::Excluded(_) => unreachable!(), }; // SST writer must not be opened in gRPC threads, because it may be @@ -789,7 +789,7 @@ mod tests { use engine_traits::{ collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, - SeekKey, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, + SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; use file_system::File; use openssl::hash::{Hasher, MessageDigest}; @@ -1333,7 +1333,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1392,7 +1392,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), Some(env)); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1440,7 +1440,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1485,7 +1485,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1529,7 +1529,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1605,8 +1605,8 @@ mod tests { assert_eq!(meta_info.total_kvs, 4); // verifies the DB content is correct. - let mut iter = db.iterator_cf(cf).unwrap(); - iter.seek(SeekKey::Start).unwrap(); + let mut iter = db.iterator(cf).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1670,7 +1670,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1714,7 +1714,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1746,8 +1746,8 @@ mod tests { db, ); match &result { - Err(Error::EngineTraits(TraitError::Engine(msg))) if msg.starts_with("Corruption:") => { - } + Err(Error::EngineTraits(TraitError::Engine(s))) + if s.state().starts_with("Corruption:") => {} _ => panic!("unexpected download result: {:?}", result), } } @@ -1849,7 +1849,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1907,7 +1907,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ @@ -1962,7 +1962,7 @@ mod tests { let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); let mut iter = sst_reader.iter(); - iter.seek(SeekKey::Start).unwrap(); + iter.seek_to_first().unwrap(); assert_eq!( collect(iter), vec![ diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index bf14b86dfc8..afdcd279e19 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -392,7 +392,7 @@ impl TestSuite { if !end.is_empty() { iter_opt.set_upper_bound(&end, DATA_KEY_PREFIX_LEN); } - let mut iter = snapshot.iter_cf(cf, iter_opt).unwrap(); + let mut iter = snapshot.iter(cf, iter_opt).unwrap(); if !iter.seek(&start).unwrap() { return (0, 0, 0); diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 046d2396382..28112304496 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1260,12 +1260,12 @@ impl Cluster { let mut kv_wb = self.engines[&store_id].kv.write_batch(); self.engines[&store_id] .kv - .scan_cf(CF_RAFT, &meta_start, &meta_end, false, |k, _| { + .scan(CF_RAFT, &meta_start, &meta_end, false, |k, _| { kv_wb.delete(k).unwrap(); Ok(true) }) .unwrap(); - snap.scan_cf(CF_RAFT, &meta_start, &meta_end, false, |k, v| { + snap.scan(CF_RAFT, &meta_start, &meta_end, false, |k, v| { kv_wb.put(k, v).unwrap(); Ok(true) }) @@ -1277,12 +1277,12 @@ impl Cluster { ); self.engines[&store_id] .kv - .scan_cf(CF_RAFT, &raft_start, &raft_end, false, |k, _| { + .scan(CF_RAFT, &raft_start, &raft_end, false, |k, _| { kv_wb.delete(k).unwrap(); Ok(true) }) .unwrap(); - snap.scan_cf(CF_RAFT, &raft_start, &raft_end, false, |k, v| { + snap.scan(CF_RAFT, &raft_start, &raft_end, false, |k, v| { kv_wb.put(k, v).unwrap(); Ok(true) }) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 12ca8f9a867..1769ecc4154 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -107,7 +107,7 @@ pub fn must_region_cleared(engine: &Engines, region for cf in ALL_CFS { engine .kv - .scan_cf(cf, &start_key, &end_key, false, |k, v| { + .scan(cf, &start_key, &end_key, false, |k, v| { panic!( "[region {}] unexpected ({:?}, {:?}) in cf {:?}", id, k, v, cf diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index 36a8aa58849..9557f945034 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -246,11 +246,8 @@ impl Snapshot for BTreeEngineSnapshot { fn get_cf_opt(&self, _: ReadOptions, cf: CfName, key: &Key) -> EngineResult> { self.get_cf(cf, key) } - fn iter(&self, iter_opt: IterOptions) -> EngineResult { - self.iter_cf(CF_DEFAULT, iter_opt) - } #[inline] - fn iter_cf(&self, cf: CfName, iter_opt: IterOptions) -> EngineResult { + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> EngineResult { let tree = self.inner_engine.get_cf(cf); Ok(BTreeEngineIterator::new(tree, iter_opt)) } @@ -341,13 +338,21 @@ pub mod tests { let mut iter_op = IterOptions::default(); iter_op.set_lower_bound(b"a7", 0); iter_op.set_upper_bound(b"a3", 0); - let mut cursor = Cursor::new(snap.iter(iter_op).unwrap(), ScanMode::Forward, false); + let mut cursor = Cursor::new( + snap.iter(CF_DEFAULT, iter_op).unwrap(), + ScanMode::Forward, + false, + ); assert!(!cursor.seek(&Key::from_raw(b"a5"), &mut statistics).unwrap()); let mut iter_op = IterOptions::default(); iter_op.set_lower_bound(b"a3", 0); iter_op.set_upper_bound(b"a7", 0); - let mut cursor = Cursor::new(snap.iter(iter_op).unwrap(), ScanMode::Forward, false); + let mut cursor = Cursor::new( + snap.iter(CF_DEFAULT, iter_op).unwrap(), + ScanMode::Forward, + false, + ); assert!(cursor.seek(&Key::from_raw(b"a5"), &mut statistics).unwrap()); assert!(!cursor.seek(&Key::from_raw(b"a8"), &mut statistics).unwrap()); diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index de29583444c..923a1878a42 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -561,7 +561,7 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { iter_opt.set_prefix_same_as_start(true); } Ok(Cursor::new( - self.snapshot.iter_cf(self.cf, iter_opt)?, + self.snapshot.iter(self.cf, iter_opt)?, self.scan_mode, self.prefix_seek, )) @@ -637,7 +637,7 @@ mod tests { let mut iter_opt = IterOptions::default(); iter_opt.use_prefix_seek(); iter_opt.set_prefix_same_as_start(true); - let it = snap.iter(iter_opt); + let it = snap.iter(CF_DEFAULT, iter_opt).unwrap(); let mut iter = Cursor::new(it, ScanMode::Mixed, true); assert!( @@ -677,7 +677,7 @@ mod tests { let snap = RegionSnapshot::::from_raw(engines.kv.clone(), region); let mut statistics = CfStatistics::default(); - let it = snap.iter(IterOptions::default()); + let it = snap.iter(CF_DEFAULT, IterOptions::default()).unwrap(); let mut iter = Cursor::new(it, ScanMode::Mixed, false); assert!( !iter @@ -735,7 +735,7 @@ mod tests { let mut region = Region::default(); region.mut_peers().push(Peer::default()); let snap = RegionSnapshot::::from_raw(engines.kv, region); - let it = snap.iter(IterOptions::default()); + let it = snap.iter(CF_DEFAULT, IterOptions::default()).unwrap(); let mut iter = Cursor::new(it, ScanMode::Mixed, false); assert!( !iter diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index adb04fc25cd..1d66f11ad74 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -359,8 +359,7 @@ pub trait Snapshot: Sync + Send + Clone { /// Get the value associated with `key` in `cf` column family, with Options in `opts` fn get_cf_opt(&self, opts: ReadOptions, cf: CfName, key: &Key) -> Result>; - fn iter(&self, iter_opt: IterOptions) -> Result; - fn iter_cf(&self, cf: CfName, iter_opt: IterOptions) -> Result; + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> Result; // The minimum key this snapshot can retrieve. #[inline] fn lower_bound(&self) -> Option<&[u8]> { @@ -706,7 +705,7 @@ pub mod tests { fn assert_seek(engine: &E, key: &[u8], pair: (&[u8], &[u8])) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -719,7 +718,7 @@ pub mod tests { fn assert_reverse_seek(engine: &E, key: &[u8], pair: (&[u8], &[u8])) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -817,7 +816,7 @@ pub mod tests { assert_reverse_seek(engine, b"z", (b"x", b"1")); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut iter = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -841,7 +840,7 @@ pub mod tests { must_put(engine, b"z", b"2"); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -864,7 +863,7 @@ pub mod tests { } let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -882,7 +881,7 @@ pub mod tests { fn test_empty_seek(engine: &E) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -954,9 +953,16 @@ pub mod tests { start_idx: usize, step: usize, ) { - let mut cursor = Cursor::new(snapshot.iter(IterOptions::default()).unwrap(), mode, false); - let mut near_cursor = - Cursor::new(snapshot.iter(IterOptions::default()).unwrap(), mode, false); + let mut cursor = Cursor::new( + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), + mode, + false, + ); + let mut near_cursor = Cursor::new( + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), + mode, + false, + ); let limit = (SEEK_BOUND as usize * 10 + 50 - 1) * 2; for (_, mut i) in (start_idx..(SEEK_BOUND as usize * 30)) @@ -1092,7 +1098,7 @@ pub mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut iter = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Forward, false, ); diff --git a/components/tikv_kv/src/raftstore_impls.rs b/components/tikv_kv/src/raftstore_impls.rs index d93ddaf236c..c1384bdcd45 100644 --- a/components/tikv_kv/src/raftstore_impls.rs +++ b/components/tikv_kv/src/raftstore_impls.rs @@ -85,18 +85,11 @@ impl EngineSnapshot for RegionSnapshot { Ok(v.map(|v| v.to_vec())) } - fn iter(&self, iter_opt: IterOptions) -> kv::Result { + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> kv::Result { fail_point!("raftkv_snapshot_iter", |_| Err(box_err!( - "injected error for iter" - ))); - Ok(RegionSnapshot::iter(self, iter_opt)) - } - - fn iter_cf(&self, cf: CfName, iter_opt: IterOptions) -> kv::Result { - fail_point!("raftkv_snapshot_iter_cf", |_| Err(box_err!( "injected error for iter_cf" ))); - RegionSnapshot::iter_cf(self, cf, iter_opt).map_err(kv::Error::from) + RegionSnapshot::iter(self, cf, iter_opt).map_err(kv::Error::from) } #[inline] diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index ee220f7e31a..50059433553 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -15,7 +15,7 @@ use engine_rocks::{ RocksEngineIterator, }; use engine_traits::{ - CfName, Engines, IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, SeekKey, + CfName, Engines, IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, }; use file_system::IORateLimiter; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb}; @@ -287,14 +287,9 @@ impl Snapshot for Arc { Ok(v.map(|v| v.to_vec())) } - fn iter(&self, iter_opt: IterOptions) -> Result { - trace!("RocksSnapshot: create iterator"); - Ok(self.iterator_opt(iter_opt)?) - } - - fn iter_cf(&self, cf: CfName, iter_opt: IterOptions) -> Result { + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> Result { trace!("RocksSnapshot: create cf iterator"); - Ok(self.iterator_cf_opt(cf, iter_opt)?) + Ok(self.iterator_opt(cf, iter_opt)?) } fn ext(&self) -> DummySnapshotExt { @@ -312,19 +307,19 @@ impl EngineIterator for RocksEngineIterator { } fn seek(&mut self, key: &Key) -> Result { - Iterator::seek(self, key.as_encoded().as_slice().into()).map_err(Error::from) + Iterator::seek(self, key.as_encoded()).map_err(Error::from) } fn seek_for_prev(&mut self, key: &Key) -> Result { - Iterator::seek_for_prev(self, key.as_encoded().as_slice().into()).map_err(Error::from) + Iterator::seek_for_prev(self, key.as_encoded()).map_err(Error::from) } fn seek_to_first(&mut self) -> Result { - Iterator::seek(self, SeekKey::Start).map_err(Error::from) + Iterator::seek_to_first(self).map_err(Error::from) } fn seek_to_last(&mut self) -> Result { - Iterator::seek(self, SeekKey::End).map_err(Error::from) + Iterator::seek_to_last(self).map_err(Error::from) } fn valid(&self) -> Result { diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index aa981603d17..6982c66b67a 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -947,8 +947,7 @@ securityfs /sys/kernel/security securityfs rw,nosuid,nodev,noexec,relatime 0 0 #[test] fn test_check_data_dir() { // test invalid data_path - let ret = check_data_dir("/sys/invalid", "/proc/mounts"); - assert!(ret.is_err()); + check_data_dir("/sys/invalid", "/proc/mounts").unwrap_err(); // get real path's fs_info let tmp_dir = Builder::new() .prefix("test-check-data-dir") @@ -959,13 +958,15 @@ securityfs /sys/kernel/security securityfs rw,nosuid,nodev,noexec,relatime 0 0 let fs_info = get_fs_info(&data_path, "/proc/mounts").unwrap(); // data_path may not mounted on a normal device on container - if !fs_info.fsname.starts_with("/dev") { + // /proc/mounts may contain host's device, which is not accessible in container. + if Path::new("/.dockerenv").exists() + && (!fs_info.fsname.starts_with("/dev") || !Path::new(&fs_info.fsname).exists()) + { return; } // test with real path - let ret = check_data_dir(&data_path, "/proc/mounts"); - assert!(ret.is_ok()); + check_data_dir(&data_path, "/proc/mounts").unwrap(); // test with device mapper // get real_path's rotational info @@ -985,8 +986,7 @@ securityfs /sys/kernel/security securityfs rw,nosuid,nodev,noexec,relatime 0 0 let mnt_file = format!("{}/mnt.txt", tmp_dir.path().display()); create_file(&mnt_file, mninfo.as_bytes()); // check info - let res = check_data_dir(&data_path, &mnt_file); - assert!(res.is_ok()); + check_data_dir(&data_path, &mnt_file).unwrap(); // check rotational info let get = get_rotational_info(&tmp_device).unwrap(); assert_eq!(expect, get); diff --git a/src/config.rs b/src/config.rs index ebf1e132777..0df2e2a2101 100644 --- a/src/config.rs +++ b/src/config.rs @@ -37,8 +37,8 @@ use engine_rocks::{ DEFAULT_PROP_KEYS_INDEX_DISTANCE, DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptionsExt, TabletAccessor, - TabletErrorCollector, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as _, DBOptionsExt, + TabletAccessor, TabletErrorCollector, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use file_system::IORateLimiter; use keys::region_raft_prefix_len; @@ -1597,14 +1597,11 @@ impl> DBConfigManger { let mut error_collector = TabletErrorCollector::new(); self.tablet_accessor .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - let r = db.get_options_cf(cf); - if let Ok(opt) = r { - let r = opt.set_block_cache_capacity(size.0); - if let Err(r) = r { - error_collector.add_result(region_id, suffix, Err(r.into())); - } - } else if let Err(r) = r { - error_collector.add_result(region_id, suffix, Err(r)); + let r = db + .get_options_cf(cf) + .and_then(|opt| opt.set_block_cache_capacity(size.0)); + if r.is_err() { + error_collector.add_result(region_id, suffix, r); } }); // Write config to metric @@ -1618,10 +1615,10 @@ impl> DBConfigManger { let mut error_collector = TabletErrorCollector::new(); self.tablet_accessor .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - let mut opt = db.as_inner().get_db_options(); + let mut opt = db.get_db_options(); let r = opt.set_rate_bytes_per_sec(rate_bytes_per_sec); - if let Err(r) = r { - error_collector.add_result(region_id, suffix, Err(r.into())); + if r.is_err() { + error_collector.add_result(region_id, suffix, r); } }); error_collector.take_result() @@ -1634,20 +1631,24 @@ impl> DBConfigManger { let mut error_collector = TabletErrorCollector::new(); self.tablet_accessor .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - let mut opt = db.as_inner().get_db_options(); - let r = opt.set_auto_tuned(rate_limiter_auto_tuned); - if let Err(r) = r { - error_collector.add_result(region_id, suffix, Err(r.into())); + let mut opt = db.get_db_options(); + let r = opt.set_rate_limiter_auto_tuned(rate_limiter_auto_tuned); + if r.is_err() { + error_collector.add_result(region_id, suffix, r); } else { // double check the new state - let new_auto_tuned = opt.get_auto_tuned(); + let new_auto_tuned = opt.get_rate_limiter_auto_tuned(); if new_auto_tuned.is_none() || new_auto_tuned.unwrap() != rate_limiter_auto_tuned { error_collector.add_result( region_id, suffix, - Err("fail to set rate_limiter_auto_tuned".to_string().into()), + Err(engine_traits::Status::with_error( + engine_traits::Code::IoError, + "fail to set rate_limiter_auto_tuned", + ) + .into()), ); } } diff --git a/src/import/duplicate_detect.rs b/src/import/duplicate_detect.rs index f3277f3f3ef..3ae9360e727 100644 --- a/src/import/duplicate_detect.rs +++ b/src/import/duplicate_detect.rs @@ -40,9 +40,7 @@ impl DuplicateDetector { }); let mut iter_opt = IterOptions::new(Some(l_bound), u_bound, false); iter_opt.set_key_only(key_only); - let mut iter = snapshot - .iter_cf(CF_WRITE, iter_opt) - .map_err(from_kv_error)?; + let mut iter = snapshot.iter(CF_WRITE, iter_opt).map_err(from_kv_error)?; iter.seek(&start_key).map_err(from_kv_error)?; Ok(DuplicateDetector { snapshot, diff --git a/src/server/debug.rs b/src/server/debug.rs index d10f58cc2ad..93732c9c580 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -17,8 +17,8 @@ use engine_rocks::{ }; use engine_traits::{ Engines, IterOptions, Iterable, Iterator as EngineIterator, Mutable, MvccProperties, Peekable, - RaftEngine, Range, RangePropertiesExt, SeekKey, SyncMutable, WriteBatch, WriteBatchExt, - WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + RaftEngine, Range, RangePropertiesExt, SyncMutable, WriteBatch, WriteBatchExt, WriteOptions, + CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ debugpb::{self, Db as DBType}, @@ -155,7 +155,7 @@ impl Debugger { let start_key = keys::REGION_META_MIN_KEY; let end_key = keys::REGION_META_MAX_KEY; let mut regions = Vec::with_capacity(128); - box_try!(db.scan_cf(cf, start_key, end_key, false, |key, _| { + box_try!(db.scan(cf, start_key, end_key, false, |key, _| { let (id, suffix) = box_try!(keys::decode_region_meta_key(key)); if suffix != keys::REGION_STATE_SUFFIX { return Ok(true); @@ -241,7 +241,7 @@ impl Debugger { let mut sizes = vec![]; for cf in cfs { let mut size = 0; - box_try!(self.engines.kv.scan_cf( + box_try!(self.engines.kv.scan( cf.as_ref(), start_key, end_key, @@ -273,7 +273,7 @@ impl Debugger { MvccInfoIterator::new( |cf, opts| { let kv = &self.engines.kv; - kv.iterator_cf_opt(cf, opts).map_err(|e| box_err!(e)) + kv.iterator_opt(cf, opts).map_err(|e| box_err!(e)) }, if start.is_empty() { None } else { Some(start) }, if end.is_empty() { None } else { Some(end) }, @@ -298,7 +298,7 @@ impl Debugger { }; let iter_opt = IterOptions::new(Some(KeyBuilder::from_vec(start.to_vec(), 0, 0)), end, false); - let mut iter = box_try!(db.iterator_cf_opt(cf, iter_opt)); + let mut iter = box_try!(db.iterator_opt(cf, iter_opt)); if !iter.seek_to_first().unwrap() { return Ok(vec![]); } @@ -496,8 +496,8 @@ impl Debugger { Some(KeyBuilder::from_vec(to, 0, 0)), false, ); - let mut iter = box_try!(self.engines.kv.iterator_cf_opt(CF_RAFT, readopts)); - iter.seek(SeekKey::from(from.as_ref())).unwrap(); + let mut iter = box_try!(self.engines.kv.iterator_opt(CF_RAFT, readopts)); + iter.seek(&from).unwrap(); let fake_snap_worker = Worker::new("fake-snap-worker").lazy_build("fake-snap"); let fake_raftlog_fetch_worker = @@ -659,7 +659,7 @@ impl Debugger { } } } else { - box_try!(self.engines.kv.scan_cf( + box_try!(self.engines.kv.scan( CF_RAFT, keys::REGION_META_MIN_KEY, keys::REGION_META_MAX_KEY, @@ -759,7 +759,7 @@ impl Debugger { return Err(box_err!("Bad region: {:?}", region)); } - box_try!(self.engines.kv.scan_cf( + box_try!(self.engines.kv.scan( CF_RAFT, keys::REGION_META_MIN_KEY, keys::REGION_META_MAX_KEY, @@ -1061,8 +1061,8 @@ impl MvccChecker { Some(KeyBuilder::from_vec(to, 0, 0)), false, ); - let mut iter = box_try!(db.c().iterator_cf_opt(cf, readopts)); - iter.seek(SeekKey::Start).unwrap(); + let mut iter = box_try!(db.c().iterator_opt(cf, readopts)); + iter.seek_to_first().unwrap(); Ok(iter) }; diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 0de26bc43c4..421c0c0f8ba 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -247,7 +247,7 @@ impl TabletFactory for KvEngineFactory { Ok(()) } - fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { if let Ok(db) = self.inner.root_db.lock() { let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap opt.set_block_cache_capacity(capacity)?; diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 2dfe297e5d8..e5237187886 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -160,7 +160,7 @@ impl TabletFactory for KvEngineFactoryV2 { new_engine } - fn set_shared_block_cache_capacity(&self, capacity: u64) -> std::result::Result<(), String> { + fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { let reg = self.registry.lock().unwrap(); // pick up any tablet and set the shared block cache capacity if let Some(((_id, _suffix), tablet)) = (*reg).iter().next() { diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 7d233430f70..8d914080279 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -467,7 +467,10 @@ impl WriteCompactionFilter { let _io_type_guard = WithIOType::new(IOType::Gc); fail_point!("write_compaction_filter_flush_write_batch", true, |_| { Err(engine_traits::Error::Engine( - "Ingested fail point".to_string(), + engine_traits::Status::with_error( + engine_traits::Code::IoError, + "Ingested fail point", + ), )) }); wb.write_opt(wopts) diff --git a/src/server/node.rs b/src/server/node.rs index dfed9459b1c..eb2cc72e432 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -304,7 +304,7 @@ where for cf in DATA_CFS { for (start, end) in TIDB_RANGES_COMPLEMENT { let mut unexpected_data_key = None; - snapshot.scan_cf( + snapshot.scan( cf, &keys::data_key(start), &keys::data_key(end), diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 1a7443f6d08..94e3e38900d 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -8,8 +8,8 @@ use std::{ use engine_rocks::{RocksEngine, RocksEngineIterator, RocksWriteBatchVec}; use engine_traits::{ - IterOptions, Iterable, Iterator, Mutable, SeekKey, WriteBatch, WriteBatchExt, CF_DEFAULT, - CF_LOCK, CF_WRITE, + IterOptions, Iterable, Iterator, Mutable, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_LOCK, + CF_WRITE, }; use tikv_util::sys::thread::StdThreadBuildWrapper; use txn_types::{Key, TimeStamp, Write, WriteRef}; @@ -71,8 +71,8 @@ impl ResetToVersionWorker { .lock() .expect("failed to lock `state` in `ResetToVersionWorker::new`") = ResetToVersionState::RemovingWrite { scanned: 0 }; - write_iter.seek(SeekKey::Start).unwrap(); - lock_iter.seek(SeekKey::Start).unwrap(); + write_iter.seek_to_first().unwrap(); + lock_iter.seek_to_first().unwrap(); Self { write_iter, lock_iter, @@ -207,9 +207,9 @@ impl ResetToVersionManager { let readopts = IterOptions::new(None, None, false); let write_iter = self .engine - .iterator_cf_opt(CF_WRITE, readopts.clone()) + .iterator_opt(CF_WRITE, readopts.clone()) .unwrap(); - let lock_iter = self.engine.iterator_cf_opt(CF_LOCK, readopts).unwrap(); + let lock_iter = self.engine.iterator_opt(CF_LOCK, readopts).unwrap(); let mut worker = ResetToVersionWorker::new(write_iter, lock_iter, ts, self.state.clone()); let mut wb = self.engine.write_batch(); let props = tikv_util::thread_group::current_properties(); @@ -352,9 +352,9 @@ mod tests { let readopts = IterOptions::new(None, None, false); let mut write_iter = fake_engine .c() - .iterator_cf_opt(CF_WRITE, readopts.clone()) + .iterator_opt(CF_WRITE, readopts.clone()) .unwrap(); - write_iter.seek(SeekKey::Start).unwrap(); + write_iter.seek_to_first().unwrap(); let mut remaining_writes = vec![]; while write_iter.valid().unwrap() { let write = WriteRef::parse(write_iter.value()).unwrap().to_owned(); @@ -364,9 +364,9 @@ mod tests { } let mut default_iter = fake_engine .c() - .iterator_cf_opt(CF_DEFAULT, readopts.clone()) + .iterator_opt(CF_DEFAULT, readopts.clone()) .unwrap(); - default_iter.seek(SeekKey::Start).unwrap(); + default_iter.seek_to_first().unwrap(); let mut remaining_defaults = vec![]; while default_iter.valid().unwrap() { let key = default_iter.key().to_vec(); @@ -375,8 +375,8 @@ mod tests { remaining_defaults.push((key, value)); } - let mut lock_iter = fake_engine.c().iterator_cf_opt(CF_LOCK, readopts).unwrap(); - lock_iter.seek(SeekKey::Start).unwrap(); + let mut lock_iter = fake_engine.c().iterator_opt(CF_LOCK, readopts).unwrap(); + lock_iter.seek_to_first().unwrap(); let mut remaining_locks = vec![]; while lock_iter.valid().unwrap() { let lock = Lock::parse(lock_iter.value()).unwrap().to_owned(); diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index 94d750a20f7..d5c1180ddf0 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -232,7 +232,11 @@ mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut iter_opt = IterOptions::default(); iter_opt.set_max_skippable_internal_keys(1); - let mut iter = Cursor::new(snapshot.iter(iter_opt).unwrap(), ScanMode::Forward, false); + let mut iter = Cursor::new( + snapshot.iter(CF_DEFAULT, iter_opt).unwrap(), + ScanMode::Forward, + false, + ); let mut statistics = CfStatistics::default(); let res = iter.seek(&Key::from_raw(b"foo"), &mut statistics); @@ -258,7 +262,7 @@ mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut iter = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Forward, false, ); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0864c9edd2d..a43b5270875 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2728,16 +2728,12 @@ impl Snapshot for TxnTestSnapshot { self.snapshot.get_cf_opt(opts, cf, key) } - fn iter(&self, iter_opt: engine_traits::IterOptions) -> tikv_kv::Result { - self.snapshot.iter(iter_opt) - } - - fn iter_cf( + fn iter( &self, cf: CfName, iter_opt: engine_traits::IterOptions, ) -> tikv_kv::Result { - self.snapshot.iter_cf(cf, iter_opt) + self.snapshot.iter(cf, iter_opt) } fn ext(&self) -> Self::Ext<'_> { diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index f60147d9991..7881eb45903 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -12,8 +12,8 @@ use std::{ }; use engine_traits::{ - IterOptions, Iterable, Iterator as EngineIterator, KvEngine, Peekable, SeekKey, CF_DEFAULT, - CF_LOCK, CF_RAFT, CF_WRITE, + IterOptions, Iterable, Iterator as EngineIterator, KvEngine, Peekable, CF_DEFAULT, CF_LOCK, + CF_RAFT, CF_WRITE, }; use kvproto::kvrpcpb::{MvccInfo, MvccLock, MvccValue, MvccWrite, Op}; use raftstore::{ @@ -105,7 +105,7 @@ impl ConsistencyCheckObserver for Mvcc { } let mut scanner = MvccInfoScanner::new( - |cf, opts| snap.iterator_cf_opt(cf, opts).map_err(|e| box_err!(e)), + |cf, opts| snap.iterator_opt(cf, opts).map_err(|e| box_err!(e)), Some(&keys::data_key(region.get_start_key())), Some(&keys::data_end_key(region.get_end_key())), MvccChecksum::new(safe_point), @@ -162,7 +162,7 @@ impl MvccInfoScanner { let iter_opts = IterOptions::new(key_builder(from)?, key_builder(to)?, false); let gen_iter = |cf: &str| -> Result { let mut iter = f(cf, iter_opts.clone())?; - box_try!(iter.seek(SeekKey::Key(from))); + box_try!(iter.seek(from)); Ok(iter) }; @@ -464,7 +464,7 @@ mod tests { for &safe_point in &[150, 160, 100] { let raw = engine.get_rocksdb(); let mut scanner = MvccInfoScanner::new( - |cf, opts| raw.iterator_cf_opt(cf, opts).map_err(|e| box_err!(e)), + |cf, opts| raw.iterator_opt(cf, opts).map_err(|e| box_err!(e)), Some(&keys::data_key(b"")), Some(&keys::data_end_key(b"")), MvccChecksum::new(safe_point), @@ -556,7 +556,7 @@ mod tests { let scan_mvcc = |start: &[u8], end: &[u8], limit: u64| { MvccInfoIterator::new( - |cf, opts| engine.iterator_cf_opt(cf, opts).map_err(|e| box_err!(e)), + |cf, opts| engine.iterator_opt(cf, opts).map_err(|e| box_err!(e)), if start.is_empty() { None } else { Some(start) }, if end.is_empty() { None } else { Some(end) }, limit as usize, diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 4f36599b2f6..17b02c28ec9 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -972,7 +972,7 @@ pub mod tests { iopt.set_hint_min_ts(min); iopt.set_hint_max_ts(max); - let mut iter = snap.iter_cf(CF_WRITE, iopt).unwrap(); + let mut iter = snap.iter(CF_WRITE, iopt).unwrap(); for (i, expect_ts) in res.iter().enumerate() { if i == 0 { @@ -1020,7 +1020,7 @@ pub mod tests { iopt.set_hint_max_ts(Bound::Included(6)); let snap = RegionSnapshot::::from_raw(db.c().clone(), region); - let mut iter = snap.iter_cf(CF_WRITE, iopt).unwrap(); + let mut iter = snap.iter(CF_WRITE, iopt).unwrap(); // Must not omit the latest deletion of key1 to prevent seeing outdated record. assert_eq!(iter.seek_to_first().unwrap(), true); diff --git a/src/storage/raw/encoded.rs b/src/storage/raw/encoded.rs index b9b25015891..788d9a7ed02 100644 --- a/src/storage/raw/encoded.rs +++ b/src/storage/raw/encoded.rs @@ -75,16 +75,9 @@ impl Snapshot for RawEncodeSnapshot { self.map_value(self.snap.get_cf_opt(opts, cf, key)) } - fn iter(&self, iter_opt: IterOptions) -> Result { + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> Result { Ok(RawEncodeIterator::new( - self.snap.iter(iter_opt)?, - self.current_ts, - )) - } - - fn iter_cf(&self, cf: CfName, iter_opt: IterOptions) -> Result { - Ok(RawEncodeIterator::new( - self.snap.iter_cf(cf, iter_opt)?, + self.snap.iter(cf, iter_opt)?, self.current_ts, )) } diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 4212b1c56ef..4ddfa68a757 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CfName, IterOptions, ReadOptions, DATA_KEY_PREFIX_LEN}; +use engine_traits::{CfName, IterOptions, ReadOptions, CF_DEFAULT, DATA_KEY_PREFIX_LEN}; use txn_types::{Key, TimeStamp, Value}; use crate::storage::kv::{Error, ErrorInner, Iterator, Result, Snapshot}; @@ -19,7 +19,7 @@ impl RawMvccSnapshot { pub fn seek_first_key_value_cf( &self, - cf: Option, + cf: CfName, opts: Option, key: &Key, ) -> Result> { @@ -29,10 +29,7 @@ impl RawMvccSnapshot { iter_opt.set_prefix_same_as_start(true); let upper_bound = key.clone().append_ts(TimeStamp::zero()).into_encoded(); iter_opt.set_vec_upper_bound(upper_bound, DATA_KEY_PREFIX_LEN); - let mut iter = match cf { - Some(cf_name) => self.iter_cf(cf_name, iter_opt)?, - None => self.iter(iter_opt)?, - }; + let mut iter = self.iter(cf, iter_opt)?; if iter.seek(key)? { Ok(Some(iter.value().to_owned())) } else { @@ -46,23 +43,19 @@ impl Snapshot for RawMvccSnapshot { type Ext<'a> = S::Ext<'a> where S: 'a; fn get(&self, key: &Key) -> Result> { - self.seek_first_key_value_cf(None, None, key) + self.seek_first_key_value_cf(CF_DEFAULT, None, key) } fn get_cf(&self, cf: CfName, key: &Key) -> Result> { - self.seek_first_key_value_cf(Some(cf), None, key) + self.seek_first_key_value_cf(cf, None, key) } fn get_cf_opt(&self, opts: ReadOptions, cf: CfName, key: &Key) -> Result> { - self.seek_first_key_value_cf(Some(cf), Some(opts), key) - } - - fn iter(&self, iter_opt: IterOptions) -> Result { - Ok(RawMvccIterator::new(self.snap.iter(iter_opt)?)) + self.seek_first_key_value_cf(cf, Some(opts), key) } - fn iter_cf(&self, cf: CfName, iter_opt: IterOptions) -> Result { - Ok(RawMvccIterator::new(self.snap.iter_cf(cf, iter_opt)?)) + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> Result { + Ok(RawMvccIterator::new(self.snap.iter(cf, iter_opt)?)) } #[inline] @@ -315,7 +308,7 @@ mod tests { // seek let iter_opt = IterOptions::default(); - let mut iter = encode_snapshot.iter_cf(CF_DEFAULT, iter_opt).unwrap(); + let mut iter = encode_snapshot.iter(CF_DEFAULT, iter_opt).unwrap(); let mut pairs = vec![]; let raw_key = ApiV2::encode_raw_key_owned(b"r\0a".to_vec(), None); iter.seek(&raw_key).unwrap(); diff --git a/src/storage/raw/store.rs b/src/storage/raw/store.rs index b5b901d77a0..5caad0dfbb6 100644 --- a/src/storage/raw/store.rs +++ b/src/storage/raw/store.rs @@ -197,7 +197,7 @@ impl<'a, S: Snapshot, F: KvFormat> RawStoreInner { if limit == 0 { return Ok(vec![]); } - let mut cursor = Cursor::new(self.snapshot.iter_cf(cf, option)?, ScanMode::Forward, false); + let mut cursor = Cursor::new(self.snapshot.iter(cf, option)?, ScanMode::Forward, false); let statistics = statistics.mut_cf_statistics(cf); if !cursor.seek(start_key, statistics)? { return Ok(vec![]); @@ -248,11 +248,7 @@ impl<'a, S: Snapshot, F: KvFormat> RawStoreInner { if limit == 0 { return Ok(vec![]); } - let mut cursor = Cursor::new( - self.snapshot.iter_cf(cf, option)?, - ScanMode::Backward, - false, - ); + let mut cursor = Cursor::new(self.snapshot.iter(cf, option)?, ScanMode::Backward, false); let statistics = statistics.mut_cf_statistics(cf); if !cursor.reverse_seek(start_key, statistics)? { return Ok(vec![]); @@ -303,8 +299,7 @@ impl<'a, S: Snapshot, F: KvFormat> RawStoreInner { let cf_stats = stats.mut_cf_statistics(cf); let mut opts = IterOptions::new(None, None, false); opts.set_upper_bound(r.get_end_key(), DATA_KEY_PREFIX_LEN); - let mut cursor = - Cursor::new(self.snapshot.iter_cf(cf, opts)?, ScanMode::Forward, false); + let mut cursor = Cursor::new(self.snapshot.iter(cf, opts)?, ScanMode::Forward, false); cursor.seek(&Key::from_encoded(r.get_start_key().to_vec()), cf_stats)?; while cursor.valid()? { row_count += 1; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index cfe8f68c512..dd9e451e883 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1452,10 +1452,7 @@ mod tests { fn get_cf_opt(&self, _: ReadOptions, _: CfName, _: &Key) -> Result> { unimplemented!() } - fn iter(&self, _: IterOptions) -> Result { - unimplemented!() - } - fn iter_cf(&self, _: CfName, _: IterOptions) -> Result { + fn iter(&self, _: CfName, _: IterOptions) -> Result { unimplemented!() } fn ext(&self) -> MockSnapshotExt { diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 5ba658ff062..59f9f077aa2 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -812,10 +812,7 @@ mod tests { fn get_cf_opt(&self, _: ReadOptions, _: CfName, _: &Key) -> EngineResult> { Ok(None) } - fn iter(&self, _: IterOptions) -> EngineResult { - Ok(MockRangeSnapshotIter::default()) - } - fn iter_cf(&self, _: CfName, _: IterOptions) -> EngineResult { + fn iter(&self, _: CfName, _: IterOptions) -> EngineResult { Ok(MockRangeSnapshotIter::default()) } fn lower_bound(&self) -> Option<&[u8]> { diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index 83180d8156d..ab11b7039fd 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::{sync::Arc, time::Duration}; use kvproto::{kvrpcpb::Op, metapb::Peer}; use pd_client::PdClient; @@ -314,15 +314,15 @@ fn test_stale_read_while_applying_snapshot() { // Compact logs to force requesting snapshot after clearing send filters. let gc_limit = cluster.cfg.raft_store.raft_log_gc_count_limit(); - let state = cluster.truncated_state(1, 1); - for i in 1..gc_limit * 10 { + for i in 1..gc_limit * 2 { let (k, v) = ( format!("k{}", i).into_bytes(), format!("v{}", i).into_bytes(), ); leader_client.must_kv_write(&pd_client, vec![new_mutation(Op::Put, &k, &v)], k); } - cluster.wait_log_truncated(1, 1, state.get_index() + 5 * gc_limit); + let last_index_on_store_2 = cluster.raft_local_state(1, 2).last_index; + cluster.wait_log_truncated(1, 1, last_index_on_store_2 + 1); // Pasuse before applying snapshot is finish let raft_before_applying_snap_finished = "raft_before_applying_snap_finished"; @@ -330,7 +330,7 @@ fn test_stale_read_while_applying_snapshot() { cluster.clear_send_filters(); // Wait follower 2 start applying snapshot - cluster.wait_log_truncated(1, 2, state.get_index() + 5 * gc_limit); + cluster.wait_log_truncated(1, 2, last_index_on_store_2 + 1); sleep_ms(100); // We can't read while applying snapshot and the `safe_ts` should reset to 0 @@ -346,6 +346,9 @@ fn test_stale_read_while_applying_snapshot() { // Resume applying snapshot fail::remove(raft_before_applying_snap_finished); + let last_index_on_store_1 = cluster.raft_local_state(1, 1).last_index; + cluster.wait_last_index(1, 2, last_index_on_store_1, Duration::from_secs(3)); + // We can read `key1` after applied snapshot follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), k1_commit_ts); // There is still lock on the region, we can't read `key1` with the newest ts diff --git a/tests/failpoints/cases/test_ttl.rs b/tests/failpoints/cases/test_ttl.rs index 9e6a8a3bcde..4748b1d0bbf 100644 --- a/tests/failpoints/cases/test_ttl.rs +++ b/tests/failpoints/cases/test_ttl.rs @@ -349,7 +349,7 @@ fn test_ttl_iterator_impl() { let snapshot = engine.snapshot(SnapContext::default()).unwrap(); let ttl_snapshot = RawEncodeSnapshot::<_, F>::from_snapshot(snapshot); let mut iter = ttl_snapshot - .iter(IterOptions::new(None, None, false)) + .iter(CF_DEFAULT, IterOptions::new(None, None, false)) .unwrap(); iter.seek_to_first().unwrap(); assert_eq!(iter.key(), b"r\0key1"); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index e7901bf9bf4..2d6657e5a90 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -8,7 +8,7 @@ use std::{ }; use engine_rocks::Compat; -use engine_traits::{Iterable, Peekable, CF_WRITE}; +use engine_traits::{Iterable, Peekable, CF_DEFAULT, CF_WRITE}; use keys::data_key; use kvproto::{metapb, pdpb, raft_cmdpb::*, raft_serverpb::RaftMessage}; use pd_client::PdClient; @@ -202,11 +202,17 @@ fn test_auto_split_region(cluster: &mut Cluster) { let mut size = 0; cluster.engines[&store_id] .kv - .scan(&data_key(b""), &data_key(middle_key), false, |k, v| { - size += k.len() as u64; - size += v.len() as u64; - Ok(true) - }) + .scan( + CF_DEFAULT, + &data_key(b""), + &data_key(middle_key), + false, + |k, v| { + size += k.len() as u64; + size += v.len() as u64; + Ok(true) + }, + ) .expect(""); assert!(size <= REGION_SPLIT_SIZE); // although size may be smaller than REGION_SPLIT_SIZE, but the diff should diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index 158223d9a2c..18a1e5a96ca 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -50,7 +50,7 @@ fn test_tombstone(cluster: &mut Cluster) { for cf in engine_2.cf_names() { engine_2 .c() - .scan_cf(cf, b"", &[0xFF], false, |k, v| { + .scan(cf, b"", &[0xFF], false, |k, v| { existing_kvs.push((k.to_vec(), v.to_vec())); Ok(true) }) diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 5e41e3c2789..4f48cb72920 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -385,20 +385,7 @@ fn assert_none_cf(ctx: SnapContext<'_>, engine: &E, cf: CfName, key: assert_eq!(snapshot.get_cf(cf, &Key::from_raw(key)).unwrap(), None); } -fn assert_seek(ctx: SnapContext<'_>, engine: &E, key: &[u8], pair: (&[u8], &[u8])) { - let snapshot = engine.snapshot(ctx).unwrap(); - let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), - ScanMode::Mixed, - false, - ); - let mut statistics = CfStatistics::default(); - cursor.seek(&Key::from_raw(key), &mut statistics).unwrap(); - assert_eq!(cursor.key(&mut statistics), &*bytes::encode_bytes(pair.0)); - assert_eq!(cursor.value(&mut statistics), pair.1); -} - -fn assert_seek_cf( +fn assert_seek( ctx: SnapContext<'_>, engine: &E, cf: CfName, @@ -407,7 +394,7 @@ fn assert_seek_cf( ) { let snapshot = engine.snapshot(ctx).unwrap(); let mut cursor = Cursor::new( - snapshot.iter_cf(cf, IterOptions::default()).unwrap(), + snapshot.iter(cf, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -479,14 +466,14 @@ fn batch(ctx: SnapContext<'_>, engine: &E) { fn seek(ctx: SnapContext<'_>, engine: &E) { must_put(ctx.pb_ctx, engine, b"x", b"1"); - assert_seek(ctx.clone(), engine, b"x", (b"x", b"1")); - assert_seek(ctx.clone(), engine, b"a", (b"x", b"1")); + assert_seek(ctx.clone(), engine, CF_DEFAULT, b"x", (b"x", b"1")); + assert_seek(ctx.clone(), engine, CF_DEFAULT, b"a", (b"x", b"1")); must_put(ctx.pb_ctx, engine, b"z", b"2"); - assert_seek(ctx.clone(), engine, b"y", (b"z", b"2")); - assert_seek(ctx.clone(), engine, b"x\x00", (b"z", b"2")); + assert_seek(ctx.clone(), engine, CF_DEFAULT, b"y", (b"z", b"2")); + assert_seek(ctx.clone(), engine, CF_DEFAULT, b"x\x00", (b"z", b"2")); let snapshot = engine.snapshot(ctx.clone()).unwrap(); let mut iter = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -505,7 +492,7 @@ fn near_seek(ctx: SnapContext<'_>, engine: &E) { must_put(ctx.pb_ctx, engine, b"z", b"2"); let snapshot = engine.snapshot(ctx.clone()).unwrap(); let mut cursor = Cursor::new( - snapshot.iter(IterOptions::default()).unwrap(), + snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), ScanMode::Mixed, false, ); @@ -525,11 +512,12 @@ fn near_seek(ctx: SnapContext<'_>, engine: &E) { must_delete(ctx.pb_ctx, engine, b"z"); } +// TODO: remove following as the code path of cf is the same. fn cf(ctx: SnapContext<'_>, engine: &E) { assert_none_cf(ctx.clone(), engine, "default", b"key"); must_put_cf(ctx.pb_ctx, engine, "default", b"key", b"value"); assert_has_cf(ctx.clone(), engine, "default", b"key", b"value"); - assert_seek_cf(ctx.clone(), engine, "default", b"k", (b"key", b"value")); + assert_seek(ctx.clone(), engine, "default", b"k", (b"key", b"value")); must_delete_cf(ctx.pb_ctx, engine, "default", b"key"); assert_none_cf(ctx, engine, "default", b"key"); } From 5dc99a9ff0dfbc0641a589eaa44df0f2ae1ec2e1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 22 Jul 2022 17:07:09 +0800 Subject: [PATCH 0109/1149] *: solve the problem that test_stale_read_while_applying_snapshot is unstable (#13091) close tikv/tikv#13057 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/engine_rocks/src/misc.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index ce608d353b7..ad1f385654f 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -202,9 +202,10 @@ impl MiscExt for RocksEngine { if let Some(n) = util::get_cf_num_files_at_level(self.as_inner(), handle, 0) { let options = self.as_inner().get_options_cf(handle); let slowdown_trigger = options.get_level_zero_slowdown_writes_trigger(); + let compaction_trigger = options.get_level_zero_file_num_compaction_trigger() as u64; // Leave enough buffer to tolerate heavy write workload, // which may flush some memtables in a short time. - if n > u64::from(slowdown_trigger) / 2 { + if n > u64::from(slowdown_trigger) / 2 && n >= compaction_trigger { return Ok(true); } } From b4a0e3cfd566644ad5b10ba5ba309c029b42f5a2 Mon Sep 17 00:00:00 2001 From: tison Date: Sat, 23 Jul 2022 11:51:09 +0800 Subject: [PATCH 0110/1149] *: migrate match-template to standalone crate (#13112) close tikv/tikv#13113 Signed-off-by: tison Co-authored-by: Ti Chi Robot --- Cargo.toml | 3 +- components/api_version/Cargo.toml | 2 +- components/match_template/Cargo.toml | 13 - components/match_template/src/lib.rs | 261 --------------------- components/tidb_query_aggr/Cargo.toml | 2 +- components/tidb_query_datatype/Cargo.toml | 2 +- components/tidb_query_executors/Cargo.toml | 2 +- components/tidb_query_expr/Cargo.toml | 2 +- scripts/check-bins.py | 2 +- 9 files changed, 7 insertions(+), 282 deletions(-) delete mode 100644 components/match_template/Cargo.toml delete mode 100644 components/match_template/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index b094c857d5e..fd7af73bdf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,7 +110,7 @@ libc = "0.2" libloading = "0.7" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { path = "components/log_wrappers" } -match_template = { path = "components/match_template" } +match-template = "0.0.1" memory_trace_macros = { path = "components/memory_trace_macros" } mime = "0.3.13" more-asserts = "0.2" @@ -243,7 +243,6 @@ members = [ "components/into_other", "components/keys", "components/log_wrappers", - "components/match_template", "components/online_config", "components/panic_hook", "components/pd_client", diff --git a/components/api_version/Cargo.toml b/components/api_version/Cargo.toml index b6ce4bf54d5..e2d4beaacbf 100644 --- a/components/api_version/Cargo.toml +++ b/components/api_version/Cargo.toml @@ -12,7 +12,7 @@ bitflags = "1.0.1" codec = { path = "../codec", default-features = false } engine_traits = { path = "../engine_traits", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } -match_template = { path = "../match_template" } +match-template = "0.0.1" thiserror = "1.0" tikv_alloc = { path = "../tikv_alloc" } tikv_util = { path = "../tikv_util", default-features = false } diff --git a/components/match_template/Cargo.toml b/components/match_template/Cargo.toml deleted file mode 100644 index 1f5f683ee92..00000000000 --- a/components/match_template/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "match_template" -version = "0.0.1" -edition = "2018" -publish = false - -[lib] -proc-macro = true - -[dependencies] -proc-macro2 = "1" -quote = "1" -syn = { version = "1", features = ["full", "extra-traits", "fold"] } diff --git a/components/match_template/src/lib.rs b/components/match_template/src/lib.rs deleted file mode 100644 index eb50d333379..00000000000 --- a/components/match_template/src/lib.rs +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. - -#[macro_use] -extern crate quote; - -use proc_macro2::{Group, TokenStream, TokenTree}; -use quote::ToTokens; -use syn::{ - parse::{Parse, ParseStream, Result}, - punctuated::Punctuated, - *, -}; - -/// This crate provides a macro that can be used to append a match expression with multiple -/// arms, where the tokens in the first arm, as a template, can be subsitituted and the template -/// arm will be expanded into multiple arms. -/// -/// For example, the following code -/// -/// ```ignore -/// match_template! { -/// T = [Int, Real, Double], -/// match Foo { -/// EvalType::T => { panic!("{}", EvalType::T); }, -/// EvalType::Other => unreachable!(), -/// } -/// } -/// ``` -/// -/// generates -/// -/// ```ignore -/// match Foo { -/// EvalType::Int => { panic!("{}", EvalType::Int); }, -/// EvalType::Real => { panic!("{}", EvalType::Real); }, -/// EvalType::Double => { panic!("{}", EvalType::Double); }, -/// EvalType::Other => unreachable!(), -/// } -/// ``` -/// -/// In addition, substitution can vary on two sides of the arms. -/// -/// For example, -/// -/// ```ignore -/// match_template! { -/// T = [Foo, Bar => Baz], -/// match Foo { -/// EvalType::T => { panic!("{}", EvalType::T); }, -/// } -/// } -/// ``` -/// -/// generates -/// -/// ```ignore -/// match Foo { -/// EvalType::Foo => { panic!("{}", EvalType::Foo); }, -/// EvalType::Bar => { panic!("{}", EvalType::Baz); }, -/// } -/// ``` -/// -/// Wildcard match arm is also supported (but there will be no substitution). -#[proc_macro] -pub fn match_template(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let mt = parse_macro_input!(input as MatchTemplate); - mt.expand().into() -} -struct MatchTemplate { - template_ident: Ident, - substitutes: Punctuated, - match_exp: Box, - template_arm: Arm, - remaining_arms: Vec, -} - -impl Parse for MatchTemplate { - fn parse(input: ParseStream<'_>) -> Result { - let template_ident = input.parse()?; - input.parse::()?; - let substitutes_tokens; - bracketed!(substitutes_tokens in input); - let substitutes = - Punctuated::::parse_terminated(&substitutes_tokens)?; - input.parse::()?; - let m: ExprMatch = input.parse()?; - let mut arms = m.arms; - arms.iter_mut().for_each(|arm| arm.comma = None); - assert!(!arms.is_empty(), "Expect at least 1 match arm"); - let template_arm = arms.remove(0); - assert!(template_arm.guard.is_none(), "Expect no match arm guard"); - - Ok(Self { - template_ident, - substitutes, - match_exp: m.expr, - template_arm, - remaining_arms: arms, - }) - } -} - -impl MatchTemplate { - fn expand(self) -> TokenStream { - let Self { - template_ident, - substitutes, - match_exp, - template_arm, - remaining_arms, - } = self; - let match_arms = substitutes.into_iter().map(|substitute| { - let mut arm = template_arm.clone(); - let (left_tokens, right_tokens) = match substitute { - Substitution::Identical(ident) => { - (ident.clone().into_token_stream(), ident.into_token_stream()) - } - Substitution::Map(left_ident, right_tokens) => { - (left_ident.into_token_stream(), right_tokens) - } - }; - arm.pat = replace_in_token_stream(arm.pat, &template_ident, &left_tokens); - arm.body = replace_in_token_stream(arm.body, &template_ident, &right_tokens); - arm - }); - quote! { - match #match_exp { - #(#match_arms,)* - #(#remaining_arms,)* - } - } - } -} - -#[derive(Debug)] -enum Substitution { - Identical(Ident), - Map(Ident, TokenStream), -} - -impl Parse for Substitution { - fn parse(input: ParseStream<'_>) -> Result { - let left_ident = input.parse()?; - let fat_arrow: Option]> = input.parse()?; - if fat_arrow.is_some() { - let mut right_tokens: Vec = vec![]; - while !input.peek(Token![,]) && !input.is_empty() { - right_tokens.push(input.parse()?); - } - Ok(Substitution::Map( - left_ident, - right_tokens.into_iter().collect(), - )) - } else { - Ok(Substitution::Identical(left_ident)) - } - } -} - -fn replace_in_token_stream( - input: T, - from_ident: &Ident, - to_tokens: &TokenStream, -) -> T { - let mut tokens = TokenStream::new(); - input.to_tokens(&mut tokens); - - let tokens: TokenStream = tokens - .into_iter() - .flat_map(|token| match token { - TokenTree::Ident(ident) if ident == *from_ident => to_tokens.clone(), - TokenTree::Group(group) => Group::new( - group.delimiter(), - replace_in_token_stream(group.stream(), from_ident, to_tokens), - ) - .into_token_stream(), - other => other.into(), - }) - .collect(); - - syn::parse2(tokens).unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic() { - let input = r#" - T = [Int, Real, Double], - match foo() { - EvalType::T => { panic!("{}", EvalType::T); }, - EvalType::Other => unreachable!(), - } - "#; - - let expect_output = r#" - match foo() { - EvalType::Int => { panic!("{}", EvalType::Int); }, - EvalType::Real => { panic!("{}", EvalType::Real); }, - EvalType::Double => { panic!("{}", EvalType::Double); }, - EvalType::Other => unreachable!(), - } - "#; - let expect_output_stream: TokenStream = expect_output.parse().unwrap(); - - let mt: MatchTemplate = syn::parse_str(input).unwrap(); - let output = mt.expand(); - assert_eq!(output.to_string(), expect_output_stream.to_string()); - } - - #[test] - fn test_wildcard() { - let input = r#" - TT = [Foo, Bar], - match v { - VectorValue::TT => EvalType::TT, - _ => unreachable!(), - } - "#; - - let expect_output = r#" - match v { - VectorValue::Foo => EvalType::Foo, - VectorValue::Bar => EvalType::Bar, - _ => unreachable!(), - } - "#; - let expect_output_stream: TokenStream = expect_output.parse().unwrap(); - - let mt: MatchTemplate = syn::parse_str(input).unwrap(); - let output = mt.expand(); - assert_eq!(output.to_string(), expect_output_stream.to_string()); - } - - #[test] - fn test_map() { - let input = r#" - TT = [Foo, Bar => Baz, Bark => <&'static Whooh>()], - match v { - VectorValue::TT => EvalType::TT, - EvalType::Other => unreachable!(), - } - "#; - - let expect_output = r#" - match v { - VectorValue::Foo => EvalType::Foo, - VectorValue::Bar => EvalType::Baz, - VectorValue::Bark => EvalType:: < & 'static Whooh>(), - EvalType::Other => unreachable!(), - } - "#; - let expect_output_stream: TokenStream = expect_output.parse().unwrap(); - - let mt: MatchTemplate = syn::parse_str(input).unwrap(); - let output = mt.expand(); - assert_eq!(output.to_string(), expect_output_stream.to_string()); - } -} diff --git a/components/tidb_query_aggr/Cargo.toml b/components/tidb_query_aggr/Cargo.toml index 71025327e9a..e1642fb6f31 100644 --- a/components/tidb_query_aggr/Cargo.toml +++ b/components/tidb_query_aggr/Cargo.toml @@ -6,7 +6,7 @@ publish = false description = "Vector aggr functions of query engine to run TiDB pushed down executors" [dependencies] -match_template = { path = "../match_template" } +match-template = "0.0.1" tidb_query_codegen = { path = "../tidb_query_codegen" } tidb_query_common = { path = "../tidb_query_common", default-features = false } tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 56acb353302..2e748d26d8d 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -20,7 +20,7 @@ hex = "0.4" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" log_wrappers = { path = "../log_wrappers" } -match_template = { path = "../match_template" } +match-template = "0.0.1" nom = { version = "5.1.0", default-features = false, features = ["std"] } num = { version = "0.3", default-features = false } num-derive = "0.3" diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index db4992b0306..923696606ed 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -13,7 +13,7 @@ futures = { version = "0.3", features = ["compat"] } itertools = "0.10" kvproto = { git = "https://github.com/pingcap/kvproto.git" } log_wrappers = { path = "../log_wrappers" } -match_template = { path = "../match_template" } +match-template = "0.0.1" protobuf = { version = "2.8", features = ["bytes"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 8458516390b..a04553b5b6d 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -14,7 +14,7 @@ file_system = { path = "../file_system", default-features = false } flate2 = { version = "=1.0.11", default-features = false, features = ["zlib"] } hex = "0.4" log_wrappers = { path = "../log_wrappers" } -match_template = { path = "../match_template" } +match-template = "0.0.1" num = { version = "0.3", default-features = false } num-traits = "0.2" openssl = { version = "0.10" } diff --git a/scripts/check-bins.py b/scripts/check-bins.py index 04a3b77c01d..e8c7bf03791 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -11,7 +11,7 @@ # NB: The fuzzer bins here are just placeholders due to the workspace # structure; they are not actual fuzzers. WHITE_LIST = { - "online_config", "online_config_derive", "match_template", "tidb_query_codegen", + "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_plugin", "memory_trace_macros", "case_macros", "tracker" From 24316a45e8416593ec0e101a4e85a695f651eeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 25 Jul 2022 16:17:10 +0800 Subject: [PATCH 0111/1149] log-backup: unify namespace for log backup metrics (#13105) ref tikv/tikv#12534 Renamed metrics in `backup-stream`: tikv_stream_(.*) => tikv_log_backup_$1 tikv_pending_initial_scan => tikv_log_backup_initial_scan Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- components/backup-stream/src/metrics.rs | 32 ++++++++--------- metrics/grafana/tikv_details.json | 46 ++++++++++++------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index a94be6df7f6..de150ef2395 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -44,14 +44,14 @@ lazy_static! { ) .unwrap(); pub static ref HANDLE_EVENT_DURATION_HISTOGRAM: HistogramVec = register_histogram_vec!( - "tikv_stream_event_handle_duration_sec", + "tikv_log_backup_event_handle_duration_sec", "The duration of handling an cmd batch.", &["stage"], exponential_buckets(0.001, 2.0, 16).unwrap() ) .unwrap(); pub static ref HANDLE_KV_HISTOGRAM: Histogram = register_histogram!( - "tikv_stream_handle_kv_batch", + "tikv_log_backup_handle_kv_batch", "The total kv pair change handle by the stream backup", exponential_buckets(1.0, 2.0, 16).unwrap() ) @@ -62,18 +62,18 @@ lazy_static! { ) .unwrap(); pub static ref INCREMENTAL_SCAN_SIZE: Histogram = register_histogram!( - "tikv_stream_incremental_scan_bytes", + "tikv_log_backup_incremental_scan_bytes", "The size of scanning.", exponential_buckets(64.0, 2.0, 16).unwrap() ) .unwrap(); pub static ref SKIP_KV_COUNTER: Counter = register_counter!( - "tikv_stream_skip_kv_count", + "tikv_log_backup_skip_kv_count", "The total kv size skipped by the streaming", ) .unwrap(); pub static ref STREAM_ERROR: IntCounterVec = register_int_counter_vec!( - "tikv_stream_errors", + "tikv_log_backup_errors", "The errors during stream backup.", &["type"] ) @@ -85,61 +85,61 @@ lazy_static! { ) .unwrap(); pub static ref HEAP_MEMORY: IntGauge = register_int_gauge!( - "tikv_stream_heap_memory", + "tikv_log_backup_heap_memory", "The heap memory allocating by stream backup." ) .unwrap(); pub static ref ON_EVENT_COST_HISTOGRAM: HistogramVec = register_histogram_vec!( - "tikv_stream_on_event_duration_seconds", + "tikv_log_backup_on_event_duration_seconds", "The time cost of handling events.", &["stage"], exponential_buckets(0.001, 2.0, 16).unwrap() ) .unwrap(); pub static ref STORE_CHECKPOINT_TS: IntGaugeVec = register_int_gauge_vec!( - "tikv_stream_store_checkpoint_ts", + "tikv_log_backup_store_checkpoint_ts", "The checkpoint ts (next backup ts) of task", &["task"], ) .unwrap(); pub static ref FLUSH_DURATION: HistogramVec = register_histogram_vec!( - "tikv_stream_flush_duration_sec", + "tikv_log_backup_flush_duration_sec", "The time cost of flushing a task.", &["stage"], exponential_buckets(1.0, 2.0, 16).unwrap() ) .unwrap(); pub static ref FLUSH_FILE_SIZE: Histogram = register_histogram!( - "tikv_stream_flush_file_size", + "tikv_log_backup_flush_file_size", "Some statistics of flushing of this run.", exponential_buckets(1024.0, 2.0, 16).unwrap() ) .unwrap(); pub static ref INITIAL_SCAN_DURATION: Histogram = register_histogram!( - "tikv_stream_initial_scan_duration_sec", + "tikv_log_backup_initial_scan_duration_sec", "The duration of initial scanning.", exponential_buckets(0.001, 2.0, 16).unwrap() ) .unwrap(); pub static ref SKIP_RETRY: IntCounterVec = register_int_counter_vec!( - "tikv_stream_skip_retry_observe", + "tikv_log_backup_skip_retry_observe", "The reason of giving up observing region when meeting error.", &["reason"], ) .unwrap(); pub static ref INITIAL_SCAN_STAT: IntCounterVec = register_int_counter_vec!( - "tikv_stream_initial_scan_operations", + "tikv_log_backup_initial_scan_operations", "The operations over rocksdb during initial scanning.", &["cf", "op"], ) .unwrap(); pub static ref STREAM_ENABLED: IntCounter = register_int_counter!( - "tikv_stream_enabled", + "tikv_log_backup_enabled", "When gt 0, this node enabled streaming." ) .unwrap(); pub static ref TRACK_REGION: IntGauge = register_int_gauge!( - "tikv_stream_observed_region", + "tikv_log_backup_observed_region", "the region being observed by the current store.", ) .unwrap(); @@ -150,7 +150,7 @@ lazy_static! { ) .unwrap(); pub static ref PENDING_INITIAL_SCAN_LEN: IntGaugeVec = register_int_gauge_vec!( - "tikv_pending_initial_scan", + "tikv_log_backup_pending_initial_scan", "The pending initial scan", &["stage"] ) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index eda4e88de66..5da0ca7c0d3 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -40199,7 +40199,7 @@ "targets": [ { "exemplar": true, - "expr": "tikv_stream_enabled{instance=~\"$instance\"}", + "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", "instant": true, "interval": "", "legendFormat": "{{ instance }}", @@ -40261,7 +40261,7 @@ "targets": [ { "exemplar": true, - "expr": "increase(tikv_stream_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_stream_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", + "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", "hide": false, "instant": true, "interval": "", @@ -40322,7 +40322,7 @@ "targets": [ { "exemplar": true, - "expr": "round(increase(tikv_stream_flush_file_size_count{instance=~\"$instance\"}[30m]))", + "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", "instant": true, "interval": "", "legendFormat": "{{ instance }}", @@ -40383,7 +40383,7 @@ "targets": [ { "exemplar": true, - "expr": "round(sum(increase(tikv_stream_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", + "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", "hide": false, "instant": true, "interval": "", @@ -40444,7 +40444,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_flush_file_size_sum{instance=~\"$instance\"}[30m]))", + "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", "hide": false, "instant": true, "interval": "", @@ -40663,7 +40663,7 @@ "targets": [ { "exemplar": true, - "expr": "round(sum(increase(tikv_stream_flush_file_size_count{instance=~\"$instance\"}[30m])))", + "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", "hide": false, "instant": true, "interval": "", @@ -40840,7 +40840,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(tikv_stream_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", "format": "time_series", "instant": false, "interval": "", @@ -40941,7 +40941,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(tikv_stream_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -41193,7 +41193,7 @@ "targets": [ { "exemplar": true, - "expr": "tikv_stream_heap_memory{instance=~\"$instance\"}", + "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", "format": "time_series", "instant": false, "interval": "", @@ -41298,7 +41298,7 @@ "targets": [ { "exemplar": true, - "expr": "tikv_stream_observed_region{instance=~\"$instance\"}", + "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -41307,7 +41307,7 @@ }, { "exemplar": true, - "expr": "sum(tikv_stream_observed_region{instance=~\"$instance\"})", + "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", "hide": false, "interval": "", "legendFormat": "total", @@ -41407,7 +41407,7 @@ "targets": [ { "exemplar": true, - "expr": "increase(tikv_stream_errors{instance=~\"$instance\"}[$__interval])", + "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", "format": "time_series", "hide": false, "instant": false, @@ -41418,7 +41418,7 @@ }, { "exemplar": true, - "expr": "tikv_stream_errors{instance=~\"$instance\"}", + "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", "hide": true, "interval": "1m", "intervalFactor": 2, @@ -41752,7 +41752,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -41835,7 +41835,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -41918,7 +41918,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -42001,7 +42001,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -42084,7 +42084,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -42167,7 +42167,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -42250,7 +42250,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -42333,7 +42333,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_stream_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -42716,7 +42716,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_stream_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", + "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", "interval": "", "legendFormat": "{{ cf }}", "queryType": "randomWalk", @@ -42815,7 +42815,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_stream_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", + "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", "interval": "", "legendFormat": "{{ cf }}/{{ op }}", "queryType": "randomWalk", From e278777f97cc71aa60cf21550c5e8f55bfa95b84 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 25 Jul 2022 16:43:11 +0800 Subject: [PATCH 0112/1149] raftstorev2: add apply batch system (#13013) ref tikv/tikv#12842 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/apply.rs | 181 +++++++++++++++++- components/raftstore-v2/src/batch/mod.rs | 1 + components/raftstore-v2/src/batch/store.rs | 31 ++- components/raftstore-v2/src/fsm/apply.rs | 72 +++++++ components/raftstore-v2/src/fsm/mod.rs | 1 + components/raftstore-v2/src/lib.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 23 +++ components/raftstore-v2/src/raft/mod.rs | 2 + components/raftstore-v2/src/raft/peer.rs | 13 +- .../src/router/internal_message.rs | 3 + components/raftstore-v2/src/router/message.rs | 4 +- components/raftstore-v2/src/router/mod.rs | 2 + components/raftstore-v2/src/tablet.rs | 93 +++++++++ components/raftstore/src/store/fsm/apply.rs | 48 +++-- 14 files changed, 439 insertions(+), 36 deletions(-) create mode 100644 components/raftstore-v2/src/raft/apply.rs create mode 100644 components/raftstore-v2/src/router/internal_message.rs create mode 100644 components/raftstore-v2/src/tablet.rs diff --git a/components/raftstore-v2/src/batch/apply.rs b/components/raftstore-v2/src/batch/apply.rs index a7e392127d5..ab44d435e67 100644 --- a/components/raftstore-v2/src/batch/apply.rs +++ b/components/raftstore-v2/src/batch/apply.rs @@ -1,4 +1,183 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +//! This module contains all structs related to apply batch system. +//! +//! After being started, each thread will have its own `ApplyPoller` and poll +//! using `ApplyContext`. For more information, see the documentation of batch-system. + +use std::{ + ops::{Deref, DerefMut}, + sync::Arc, +}; + +use batch_system::{ + BasicMailbox, BatchRouter, BatchSystem, HandleResult, HandlerBuilder, PollHandler, +}; +use engine_traits::{KvEngine, RaftEngine}; +use raftstore::store::{ + fsm::{ + apply::{ControlFsm, ControlMsg}, + ApplyNotifier, + }, + util::LatencyInspector, + Config, +}; +use slog::Logger; +use tikv_util::config::{Tracker, VersionTrack}; + +use crate::{ + fsm::{ApplyFsm, ApplyFsmDelegate}, + raft::{Apply, Peer}, + router::ApplyTask, +}; + +pub struct ApplyContext { + cfg: Config, +} + +impl ApplyContext { + pub fn new(cfg: Config) -> Self { + ApplyContext { cfg } + } +} + +pub struct ApplyPoller { + apply_task_buf: Vec, + pending_latency_inspect: Vec, + apply_ctx: ApplyContext, + cfg_tracker: Tracker, +} + +impl ApplyPoller { + pub fn new(apply_ctx: ApplyContext, cfg_tracker: Tracker) -> ApplyPoller { + ApplyPoller { + apply_task_buf: Vec::new(), + pending_latency_inspect: Vec::new(), + apply_ctx, + cfg_tracker, + } + } + + /// Updates the internal buffer to latest capacity. + fn apply_buf_capacity(&mut self) { + let new_cap = self.messages_per_tick(); + tikv_util::set_vec_capacity(&mut self.apply_task_buf, new_cap); + } + + #[inline] + fn messages_per_tick(&self) -> usize { + self.apply_ctx.cfg.messages_per_tick + } +} + +impl PollHandler, ControlFsm> for ApplyPoller +where + EK: KvEngine, +{ + fn begin(&mut self, _batch_size: usize, update_cfg: F) + where + for<'a> F: FnOnce(&'a batch_system::Config), + { + let cfg = self.cfg_tracker.any_new().map(|c| c.clone()); + if let Some(cfg) = cfg { + let last_messages_per_tick = self.messages_per_tick(); + self.apply_ctx.cfg = cfg; + if self.apply_ctx.cfg.messages_per_tick != last_messages_per_tick { + self.apply_buf_capacity(); + } + update_cfg(&self.apply_ctx.cfg.apply_batch_system); + } + } + + fn handle_control(&mut self, control: &mut ControlFsm) -> Option { + control.handle_messages(&mut self.pending_latency_inspect); + for inspector in self.pending_latency_inspect.drain(..) { + // TODO: support apply duration. + inspector.finish(); + } + Some(0) + } + + fn handle_normal( + &mut self, + normal: &mut impl DerefMut>, + ) -> batch_system::HandleResult { + let received_cnt = normal.recv(&mut self.apply_task_buf); + let handle_result = if received_cnt == self.messages_per_tick() { + HandleResult::KeepProcessing + } else { + HandleResult::stop_at(0, false) + }; + let mut delegate = ApplyFsmDelegate::new(normal, &mut self.apply_ctx); + delegate.handle_msgs(&mut self.apply_task_buf); + handle_result + } + + fn end(&mut self, batch: &mut [Option>>]) { + // TODO: support memory trace + } +} + +pub struct ApplyPollerBuilder { + cfg: Arc>, +} + +impl ApplyPollerBuilder { + pub fn new(cfg: Arc>) -> Self { + Self { cfg } + } +} + +impl HandlerBuilder, ControlFsm> for ApplyPollerBuilder { + type Handler = ApplyPoller; + + fn build(&mut self, priority: batch_system::Priority) -> Self::Handler { + let apply_ctx = ApplyContext::new(self.cfg.value().clone()); + let cfg_tracker = self.cfg.clone().tracker("apply".to_string()); + ApplyPoller::new(apply_ctx, cfg_tracker) + } +} + /// Batch system for applying logs pipeline. -pub struct ApplySystem; +pub struct ApplySystem { + system: BatchSystem, ControlFsm>, +} + +impl Deref for ApplySystem { + type Target = BatchSystem, ControlFsm>; + + fn deref(&self) -> &BatchSystem, ControlFsm> { + &self.system + } +} + +impl DerefMut for ApplySystem { + fn deref_mut(&mut self) -> &mut BatchSystem, ControlFsm> { + &mut self.system + } +} + +impl ApplySystem { + pub fn schedule_all<'a, ER: RaftEngine>(&self, peers: impl Iterator>) { + let mut mailboxes = Vec::with_capacity(peers.size_hint().0); + for peer in peers { + let apply = Apply::new(peer); + let (tx, fsm) = ApplyFsm::new(apply); + mailboxes.push(( + peer.region_id(), + BasicMailbox::new(tx, fsm, self.router().state_cnt().clone()), + )); + } + self.router().register_all(mailboxes); + } +} + +pub type ApplyRouter = BatchRouter, ControlFsm>; + +pub fn create_apply_batch_system(cfg: &Config) -> (ApplyRouter, ApplySystem) { + let (control_tx, control_fsm) = ControlFsm::new(); + let (router, system) = + batch_system::create_system(&cfg.apply_batch_system, control_tx, control_fsm); + let system = ApplySystem { system }; + (router, system) +} diff --git a/components/raftstore-v2/src/batch/mod.rs b/components/raftstore-v2/src/batch/mod.rs index e856147220d..0f4b9fba3d3 100644 --- a/components/raftstore-v2/src/batch/mod.rs +++ b/components/raftstore-v2/src/batch/mod.rs @@ -8,4 +8,5 @@ mod apply; mod store; +pub(crate) use apply::ApplyContext; pub use store::{create_store_batch_system, StoreContext, StoreSystem}; diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 6a8974259ff..2dce4b54c2a 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -19,6 +19,7 @@ use tikv_util::{ timer::SteadyTimer, }; +use super::apply::{create_apply_batch_system, ApplyPollerBuilder, ApplyRouter, ApplySystem}; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate}, raft::Peer, @@ -92,10 +93,7 @@ impl StorePoller { } fn schedule_ticks(&mut self) { - assert_eq!( - PeerTick::get_all_ticks().len(), - self.poll_ctx.tick_batch.len() - ); + assert_eq!(PeerTick::all_ticks().len(), self.poll_ctx.tick_batch.len()); for batch in &mut self.poll_ctx.tick_batch { batch.schedule(&self.poll_ctx.timer); } @@ -111,10 +109,12 @@ impl PollHandler { system: BatchSystem, StoreFsm>, + apply_router: ApplyRouter, + apply_system: ApplySystem, logger: Logger, } @@ -280,7 +282,7 @@ impl StoreSystem { T: Transport + 'static, { let mut builder = StorePollerBuilder::new( - cfg, + cfg.clone(), store.get_id(), raft_engine, tablet_factory, @@ -288,6 +290,8 @@ impl StoreSystem { self.logger.clone(), ); let peers = builder.init()?; + self.apply_system + .schedule_all(peers.values().map(|pair| pair.1.peer())); // Choose a different name so we know what version is actually used. rs stands // for raft store. let tag = format!("rs-{}", store.get_id()); @@ -309,10 +313,15 @@ impl StoreSystem { router.force_send(addr, PeerMsg::Start).unwrap(); } router.send_control(StoreMsg::Start { store }).unwrap(); + + let apply_poller_builder = ApplyPollerBuilder::new(cfg); + self.apply_system + .spawn("apply".to_owned(), apply_poller_builder); Ok(()) } pub fn shutdown(&mut self) { + self.apply_system.shutdown(); self.system.shutdown(); } } @@ -332,6 +341,12 @@ where let (store_tx, store_fsm) = StoreFsm::new(cfg, store); let (router, system) = batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm); - let system = StoreSystem { system, logger }; + let (apply_router, apply_system) = create_apply_batch_system(cfg); + let system = StoreSystem { + system, + apply_router, + apply_system, + logger, + }; (router, system) } diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index bb3db8c75d3..43e3441528e 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -1 +1,73 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use batch_system::Fsm; +use crossbeam::channel::TryRecvError; +use engine_traits::KvEngine; +use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; + +use crate::{batch::ApplyContext, raft::Apply, router::ApplyTask}; + +pub struct ApplyFsm { + apply: Apply, + receiver: Receiver, + is_stopped: bool, +} + +impl ApplyFsm { + pub fn new(apply: Apply) -> (LooseBoundedSender, Box) { + let (tx, rx) = mpsc::loose_bounded(usize::MAX); + ( + tx, + Box::new(Self { + apply, + receiver: rx, + is_stopped: false, + }), + ) + } + + /// Fetches tasks to `apply_task_buf`. It will stop when the buffer is full. + /// + /// Returns how many messages are fetched. + pub fn recv(&mut self, apply_task_buf: &mut Vec) -> usize { + let l = apply_task_buf.len(); + for i in l..apply_task_buf.capacity() { + match self.receiver.try_recv() { + Ok(msg) => apply_task_buf.push(msg), + Err(e) => { + if let TryRecvError::Disconnected = e { + self.is_stopped = true; + } + return i - l; + } + } + } + apply_task_buf.capacity() - l + } +} + +impl Fsm for ApplyFsm { + type Message = ApplyTask; + + #[inline] + fn is_stopped(&self) -> bool { + self.is_stopped + } +} + +pub struct ApplyFsmDelegate<'a, EK: KvEngine> { + fsm: &'a mut ApplyFsm, + apply_ctx: &'a mut ApplyContext, +} + +impl<'a, EK: KvEngine> ApplyFsmDelegate<'a, EK> { + pub fn new(fsm: &'a mut ApplyFsm, apply_ctx: &'a mut ApplyContext) -> Self { + Self { fsm, apply_ctx } + } + + pub fn handle_msgs(&self, apply_task_buf: &mut Vec) { + for task in apply_task_buf.drain(..) { + // TODO: handle the tasks. + } + } +} diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 9f3bcefac46..02f788d3be2 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -9,5 +9,6 @@ mod apply; mod peer; mod store; +pub use apply::{ApplyFsm, ApplyFsmDelegate}; pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; pub use store::{StoreFsm, StoreFsmDelegate}; diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 220fa0b2d33..71062161384 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -16,6 +16,7 @@ mod fsm; mod operation; mod raft; mod router; +mod tablet; pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreSystem}; diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs new file mode 100644 index 00000000000..0c7abf52b58 --- /dev/null +++ b/components/raftstore-v2/src/raft/apply.rs @@ -0,0 +1,23 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use slog::Logger; + +use super::Peer; +use crate::tablet::CachedTablet; + +/// Apply applies all the committed commands to kv db. +pub struct Apply { + tablet: CachedTablet, + logger: Logger, +} + +impl Apply { + #[inline] + pub fn new(peer: &Peer) -> Self { + Apply { + tablet: peer.tablet().clone(), + logger: peer.logger().clone(), + } + } +} diff --git a/components/raftstore-v2/src/raft/mod.rs b/components/raftstore-v2/src/raft/mod.rs index 045e9ff89b3..c1d6a522d79 100644 --- a/components/raftstore-v2/src/raft/mod.rs +++ b/components/raftstore-v2/src/raft/mod.rs @@ -1,7 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod apply; mod peer; mod storage; +pub use apply::Apply; pub use peer::Peer; pub use storage::{write_initial_states, Storage}; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index e2ccb068cbc..e52ec322445 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -10,12 +10,15 @@ use slog::{o, Logger}; use tikv_util::{box_err, config::ReadableSize}; use super::storage::Storage; -use crate::Result; +use crate::{ + tablet::{self, CachedTablet}, + Result, +}; /// A peer that delegates commands between state machine and raft. pub struct Peer { raft_group: RawNode>, - tablet: Option, + tablet: CachedTablet, logger: Logger, } @@ -57,6 +60,8 @@ impl Peer { }; let tablet_index = s.region_state().get_tablet_index(); + // Another option is always create tablet even if tablet index is 0. But this can + // introduce race when gc old tablet and create new peer. let tablet = if tablet_index != 0 { if !tablet_factory.exists(region_id, tablet_index) { return Err(box_err!( @@ -73,7 +78,7 @@ impl Peer { Ok(Some(Peer { raft_group: RawNode::new(&raft_cfg, s, &logger)?, - tablet, + tablet: CachedTablet::new(tablet), logger, })) } @@ -94,7 +99,7 @@ impl Peer { } #[inline] - pub fn tablet(&self) -> &Option { + pub fn tablet(&self) -> &CachedTablet { &self.tablet } diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs new file mode 100644 index 00000000000..f5ef72d8e30 --- /dev/null +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -0,0 +1,3 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +pub enum ApplyTask {} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 75011163e83..37d9515d301 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -148,7 +148,7 @@ pub enum PeerTick { } impl PeerTick { - pub const VARIANT_COUNT: usize = Self::get_all_ticks().len(); + pub const VARIANT_COUNT: usize = Self::all_ticks().len(); #[inline] pub fn tag(self) -> &'static str { @@ -166,7 +166,7 @@ impl PeerTick { } } - pub const fn get_all_ticks() -> &'static [PeerTick] { + pub const fn all_ticks() -> &'static [PeerTick] { const TICKS: &[PeerTick] = &[ PeerTick::Raft, PeerTick::RaftLogGc, diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index fd27349ef43..a7c7672b835 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -1,5 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod internal_message; mod message; +pub(crate) use internal_message::ApplyTask; pub use message::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore-v2/src/tablet.rs b/components/raftstore-v2/src/tablet.rs new file mode 100644 index 00000000000..2293eaed033 --- /dev/null +++ b/components/raftstore-v2/src/tablet.rs @@ -0,0 +1,93 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, +}; + +struct LatestTablet { + data: Mutex>, + version: AtomicU64, +} + +/// Tablet may change during split, merge and applying snapshot. So we need a shared value to +/// reflect the latest tablet. `CachedTablet` provide cache that can speed up common access. +#[derive(Clone)] +pub struct CachedTablet { + latest: Arc>, + cache: Option, + version: u64, +} + +impl CachedTablet { + #[inline] + pub fn new(data: Option) -> Self { + CachedTablet { + latest: Arc::new(LatestTablet { + data: Mutex::new(data.clone()), + version: AtomicU64::new(0), + }), + cache: data, + version: 0, + } + } + + pub fn set(&mut self, data: EK) { + let mut guard = self.latest.data.lock().unwrap(); + *guard = Some(data.clone()); + let v = self.latest.version.fetch_add(1, Ordering::Relaxed); + drop(guard); + self.cache = Some(data); + self.version = v; + } + + /// Get the tablet from cache without checking if it's up to date. + #[inline] + pub fn cache(&self) -> Option<&EK> { + self.cache.as_ref() + } + + /// Get the latest tablet. + #[inline] + pub fn latest(&mut self) -> Option<&EK> { + if self.latest.version.load(Ordering::Relaxed) > self.version { + let guard = self.latest.data.lock().unwrap(); + self.version = self.latest.version.load(Ordering::Relaxed); + self.cache = guard.clone(); + } + self.cache() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cached_tablet() { + let mut cached_tablet = CachedTablet::new(None); + assert_eq!(cached_tablet.cache(), None); + assert_eq!(cached_tablet.latest(), None); + + cached_tablet = CachedTablet::new(Some(1)); + assert_eq!(cached_tablet.cache().cloned(), Some(1)); + assert_eq!(cached_tablet.latest().cloned(), Some(1)); + + // Setting tablet will refresh cache immediately. + cached_tablet.set(2); + assert_eq!(cached_tablet.cache().cloned(), Some(2)); + assert_eq!(cached_tablet.latest().cloned(), Some(2)); + + let mut cloned = cached_tablet.clone(); + // Clone should reuse cache. + assert_eq!(cloned.cache().cloned(), Some(2)); + cloned.set(1); + assert_eq!(cloned.cache().cloned(), Some(1)); + assert_eq!(cloned.latest().cloned(), Some(1)); + + // Local cache won't be refreshed until querying latest. + assert_eq!(cached_tablet.cache().cloned(), Some(2)); + assert_eq!(cached_tablet.latest().cloned(), Some(1)); + assert_eq!(cached_tablet.cache().cloned(), Some(1)); + } +} diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 16e039dd640..88bff373760 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3831,7 +3831,7 @@ pub struct ControlFsm { } impl ControlFsm { - fn new() -> (LooseBoundedSender, Box) { + pub fn new() -> (LooseBoundedSender, Box) { let (tx, rx) = loose_bounded(std::usize::MAX); let fsm = Box::new(ControlFsm { stopped: false, @@ -3839,6 +3839,28 @@ impl ControlFsm { }); (tx, fsm) } + + pub fn handle_messages(&mut self, pending_latency_inspect: &mut Vec) { + // Usually there will be only 1 control message. + loop { + match self.receiver.try_recv() { + Ok(ControlMsg::LatencyInspect { + send_time, + mut inspector, + }) => { + inspector.record_apply_wait(send_time.saturating_elapsed()); + pending_latency_inspect.push(inspector); + } + Err(TryRecvError::Empty) => { + return; + } + Err(TryRecvError::Disconnected) => { + self.stopped = true; + return; + } + } + } + } } impl Fsm for ControlFsm { @@ -3887,27 +3909,11 @@ where } fn handle_control(&mut self, control: &mut ControlFsm) -> Option { - loop { - match control.receiver.try_recv() { - Ok(ControlMsg::LatencyInspect { - send_time, - mut inspector, - }) => { - if self.apply_ctx.timer.is_none() { - self.apply_ctx.timer = Some(Instant::now_coarse()); - } - inspector.record_apply_wait(send_time.saturating_elapsed()); - self.apply_ctx.pending_latency_inspect.push(inspector); - } - Err(TryRecvError::Empty) => { - return Some(0); - } - Err(TryRecvError::Disconnected) => { - control.stopped = true; - return Some(0); - } - } + control.handle_messages(&mut self.apply_ctx.pending_latency_inspect); + if !self.apply_ctx.pending_latency_inspect.is_empty() && self.apply_ctx.timer.is_none() { + self.apply_ctx.timer = Some(Instant::now_coarse()); } + Some(0) } fn handle_normal(&mut self, normal: &mut impl DerefMut>) -> HandleResult { From 50f6c6fc294c2aa425b4684b357a5713681895b7 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 27 Jul 2022 11:51:11 +0800 Subject: [PATCH 0113/1149] pd-client: pd client should update if the grpc stream sender failed. (#13094) close tikv/tikv#12934 Signed-off-by: bufferflies <1045931706@qq.com> --- components/error_code/src/pd.rs | 1 + components/pd_client/src/client.rs | 8 +++- components/pd_client/src/errors.rs | 6 ++- components/test_pd/src/mocker/service.rs | 1 + tests/integrations/pd/test_rpc_client.rs | 53 ++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 3 deletions(-) diff --git a/components/error_code/src/pd.rs b/components/error_code/src/pd.rs index 60952e96922..018c86c3d39 100644 --- a/components/error_code/src/pd.rs +++ b/components/error_code/src/pd.rs @@ -8,6 +8,7 @@ define_error_codes!( CLUSTER_NOT_BOOTSTRAPPED => ("ClusterNotBootstraped", "", ""), INCOMPATIBLE => ("Imcompatible", "", ""), GRPC => ("gRPC", "", ""), + STREAM_DISCONNECT => ("StreamDisconnect","",""), REGION_NOT_FOUND => ("RegionNotFound", "", ""), STORE_TOMBSTONE => ("StoreTombstone", "", ""), GLOBAL_CONFIG_NOT_FOUND => ("GlobalConfigNotFound","",""), diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index facf2e24b76..173b25357c4 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -619,6 +619,9 @@ impl PdClient for RpcClient { if last > last_report { last_report = last - 1; } + fail::fail_point!("region_heartbeat_send_failed", |_| { + Err(Error::Grpc(grpcio::Error::RemoteStopped)) + }); Ok((r, WriteFlags::default())) })) .await; @@ -643,7 +646,8 @@ impl PdClient for RpcClient { .expect("expect region heartbeat sender"); let ret = sender .unbounded_send(req) - .map_err(|e| Error::Other(Box::new(e))); + .map_err(|e| Error::StreamDisconnect(e.into_send_error())); + Box::pin(future::ready(ret)) as PdFuture<_> }; @@ -1048,7 +1052,7 @@ impl PdClient for RpcClient { .expect("expect region buckets sender"); let ret = sender .unbounded_send(req) - .map_err(|e| Error::Other(Box::new(e))); + .map_err(|e| Error::StreamDisconnect(e.into_send_error())); Box::pin(future::ready(ret)) as PdFuture<_> }; diff --git a/components/pd_client/src/errors.rs b/components/pd_client/src/errors.rs index a9e4ffe6266..61adceec391 100644 --- a/components/pd_client/src/errors.rs +++ b/components/pd_client/src/errors.rs @@ -3,6 +3,7 @@ use std::{error, result}; use error_code::{self, ErrorCode, ErrorCodeExt}; +use futures::channel::mpsc::SendError; use thiserror::Error; #[derive(Debug, Error)] @@ -15,6 +16,8 @@ pub enum Error { Incompatible, #[error("{0}")] Grpc(#[from] grpcio::Error), + #[error("{0}")] + StreamDisconnect(#[from] SendError), #[error("unknown error {0:?}")] Other(#[from] Box), #[error("region is not found for key {}", log_wrappers::Value::key(.0))] @@ -30,7 +33,7 @@ pub type Result = result::Result; impl Error { pub fn retryable(&self) -> bool { match self { - Error::Grpc(_) | Error::ClusterNotBootstrapped(_) => true, + Error::Grpc(_) | Error::ClusterNotBootstrapped(_) | Error::StreamDisconnect(_) => true, Error::Other(_) | Error::RegionNotFound(_) | Error::StoreTombstone(_) @@ -48,6 +51,7 @@ impl ErrorCodeExt for Error { Error::ClusterNotBootstrapped(_) => error_code::pd::CLUSTER_NOT_BOOTSTRAPPED, Error::Incompatible => error_code::pd::INCOMPATIBLE, Error::Grpc(_) => error_code::pd::GRPC, + Error::StreamDisconnect(_) => error_code::pd::STREAM_DISCONNECT, Error::RegionNotFound(_) => error_code::pd::REGION_NOT_FOUND, Error::StoreTombstone(_) => error_code::pd::STORE_TOMBSTONE, Error::GlobalConfigNotFound(_) => error_code::pd::GLOBAL_CONFIG_NOT_FOUND, diff --git a/components/test_pd/src/mocker/service.rs b/components/test_pd/src/mocker/service.rs index 572eb9534f9..2ff5c178c67 100644 --- a/components/test_pd/src/mocker/service.rs +++ b/components/test_pd/src/mocker/service.rs @@ -238,6 +238,7 @@ impl PdMocker for Service { .insert(region_id, req.get_leader().clone()); let mut resp = RegionHeartbeatResponse::default(); + resp.set_region_id(req.get_region().get_id()); let header = Service::header(); resp.set_header(header); Some(Ok(resp)) diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index eb0337f8a22..20fc6b70908 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -475,6 +475,59 @@ fn test_change_leader_async() { panic!("failed, leader should changed"); } +#[test] +fn test_pd_client_heartbeat_send_failed() { + let pd_client_send_fail_fp = "region_heartbeat_send_failed"; + fail::cfg(pd_client_send_fail_fp, "return()").unwrap(); + let server = MockServer::with_case(1, Arc::new(AlreadyBootstrapped)); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + let poller = Builder::new_multi_thread() + .thread_name(thd_name!("poller")) + .worker_threads(1) + .build() + .unwrap(); + let (tx, rx) = mpsc::channel(); + let f = + client.handle_region_heartbeat_response(1, move |resp| tx.send(resp).unwrap_or_default()); + poller.spawn(f); + + let heartbeat_send_fail = |ok| { + let mut region = metapb::Region::default(); + region.set_id(1); + poller.spawn(client.region_heartbeat( + store::RAFT_INIT_LOG_TERM, + region, + metapb::Peer::default(), + RegionStat::default(), + None, + )); + let rsp = rx.recv_timeout(Duration::from_millis(100)); + if ok { + assert!(rsp.is_ok()); + assert_eq!(rsp.unwrap().get_region_id(), 1); + } else { + assert!(rsp.is_err()); + } + + let region = block_on(client.get_region_by_id(1)); + if ok { + assert!(region.is_ok()); + let r = region.unwrap(); + assert!(r.is_some()); + assert_eq!(1, r.unwrap().get_id()); + } else { + assert!(region.is_err()); + } + }; + // send fail if network is block. + heartbeat_send_fail(false); + fail::remove(pd_client_send_fail_fp); + // send success after network recovered. + heartbeat_send_fail(true); +} + #[test] fn test_region_heartbeat_on_leader_change() { let eps_count = 3; From 6a9db360d9d49a03696473a4d6402606b68b2686 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 27 Jul 2022 14:09:10 +0800 Subject: [PATCH 0114/1149] raftstore: extract EntryStorage (#13115) ref tikv/tikv#12842 This PR extract part of `PeerStorage` as `EntryStorage`, which only serves entry access. It will be reused by raftstorev2. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- .../src/store/async_io/write_tests.rs | 9 +- .../raftstore/src/store/entry_storage.rs | 1417 ++++++++++++++++ components/raftstore/src/store/fsm/apply.rs | 67 +- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/mod.rs | 10 +- components/raftstore/src/store/peer.rs | 26 +- .../raftstore/src/store/peer_storage.rs | 1502 ++--------------- components/raftstore/src/store/worker/read.rs | 44 +- .../raftstore/src/store/worker/region.rs | 16 +- 9 files changed, 1603 insertions(+), 1490 deletions(-) create mode 100644 components/raftstore/src/store/entry_storage.rs diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 97d41824a62..04ece802a45 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -11,7 +11,7 @@ use tempfile::Builder; use super::*; use crate::{ - store::{Config, Transport}, + store::{peer_storage::tests::new_entry, Config, Transport}, Result, }; @@ -42,13 +42,6 @@ fn must_have_entries_and_state( } } -fn new_entry(index: u64, term: u64) -> Entry { - let mut e = Entry::default(); - e.set_index(index); - e.set_term(term); - e -} - fn new_raft_state(term: u64, vote: u64, commit: u64, last_index: u64) -> RaftLocalState { let mut raft_state = RaftLocalState::new(); raft_state.mut_hard_state().set_term(term); diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs new file mode 100644 index 00000000000..4f751a35b17 --- /dev/null +++ b/components/raftstore/src/store/entry_storage.rs @@ -0,0 +1,1417 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains the implementation of the `EntryStorage`, which covers a subset of +//! raft storage. This module will be shared between raftstore v1 and v2. + +use std::{ + cell::{Cell, RefCell}, + cmp, + collections::VecDeque, + mem, + ops::Range, + sync::{Arc, Mutex}, +}; + +use collections::HashMap; +use engine_traits::{KvEngine, RaftEngine, RAFT_LOG_MULTI_GET_CNT}; +use fail::fail_point; +use kvproto::raft_serverpb::{RaftApplyState, RaftLocalState}; +use protobuf::Message; +use raft::{prelude::*, util::limit_size, GetEntriesContext, StorageError}; +use tikv_alloc::TraceEvent; +use tikv_util::{debug, info, worker::Scheduler}; + +use super::{metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE}; +use crate::{bytes_capacity, store::worker::RaftlogFetchTask}; + +const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; +const SHRINK_CACHE_CAPACITY: usize = 64; +const ENTRY_MEM_SIZE: usize = mem::size_of::(); + +pub const MAX_INIT_ENTRY_COUNT: usize = 1024; + +#[inline] +pub fn first_index(state: &RaftApplyState) -> u64 { + state.get_truncated_state().get_index() + 1 +} + +#[inline] +pub fn last_index(state: &RaftLocalState) -> u64 { + state.get_last_index() +} + +/// Committed entries sent to apply threads. +#[derive(Clone)] +pub struct CachedEntries { + pub range: Range, + // Entries and dangle size for them. `dangle` means not in entry cache. + entries: Arc, usize)>>, +} + +impl CachedEntries { + pub fn new(entries: Vec) -> Self { + assert!(!entries.is_empty()); + let start = entries.first().map(|x| x.index).unwrap(); + let end = entries.last().map(|x| x.index).unwrap() + 1; + let range = Range { start, end }; + CachedEntries { + entries: Arc::new(Mutex::new((entries, 0))), + range, + } + } + + /// Take cached entries and dangle size for them. `dangle` means not in entry cache. + pub fn take_entries(&self) -> (Vec, usize) { + mem::take(&mut *self.entries.lock().unwrap()) + } +} + +struct EntryCache { + // The last index of persisted entry. + // It should be equal to `RaftLog::persisted`. + persisted: u64, + cache: VecDeque, + trace: VecDeque, + hit: Cell, + miss: Cell, + #[cfg(test)] + size_change_cb: Option>, +} + +impl EntryCache { + fn first_index(&self) -> Option { + self.cache.front().map(|e| e.get_index()) + } + + fn fetch_entries_to( + &self, + begin: u64, + end: u64, + mut fetched_size: u64, + max_size: u64, + ents: &mut Vec, + ) { + if begin >= end { + return; + } + assert!(!self.cache.is_empty()); + let cache_low = self.cache.front().unwrap().get_index(); + let start_idx = begin.checked_sub(cache_low).unwrap() as usize; + let limit_idx = end.checked_sub(cache_low).unwrap() as usize; + + let mut end_idx = start_idx; + self.cache + .iter() + .skip(start_idx) + .take_while(|e| { + let cur_idx = end_idx as u64 + cache_low; + assert_eq!(e.get_index(), cur_idx); + let m = u64::from(e.compute_size()); + fetched_size += m; + if fetched_size == m { + end_idx += 1; + fetched_size <= max_size && end_idx < limit_idx + } else if fetched_size <= max_size { + end_idx += 1; + end_idx < limit_idx + } else { + false + } + }) + .count(); + // Cache either is empty or contains latest log. Hence we don't need to fetch log + // from rocksdb anymore. + assert!(end_idx == limit_idx || fetched_size > max_size); + let (first, second) = tikv_util::slices_in_range(&self.cache, start_idx, end_idx); + ents.extend_from_slice(first); + ents.extend_from_slice(second); + } + + fn append(&mut self, region_id: u64, peer_id: u64, entries: &[Entry]) { + if !entries.is_empty() { + let mut mem_size_change = 0; + let old_capacity = self.cache.capacity(); + mem_size_change += self.append_impl(region_id, peer_id, entries); + let new_capacity = self.cache.capacity(); + mem_size_change += Self::cache_vec_mem_size_change(new_capacity, old_capacity); + mem_size_change += self.shrink_if_necessary(); + self.flush_mem_size_change(mem_size_change); + } + } + + fn append_impl(&mut self, region_id: u64, peer_id: u64, entries: &[Entry]) -> i64 { + let mut mem_size_change = 0; + + if let Some(cache_last_index) = self.cache.back().map(|e| e.get_index()) { + let first_index = entries[0].get_index(); + if cache_last_index >= first_index { + let cache_len = self.cache.len(); + let truncate_to = cache_len + .checked_sub((cache_last_index - first_index + 1) as usize) + .unwrap_or_default(); + let trunc_to_idx = self.cache[truncate_to].index; + for e in self.cache.drain(truncate_to..) { + mem_size_change -= + (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64; + } + if let Some(cached) = self.trace.back() { + // Only committed entries can be traced, and only uncommitted entries + // can be truncated. So there won't be any overlaps. + let cached_last = cached.range.end - 1; + assert!(cached_last < trunc_to_idx); + } + } else if cache_last_index + 1 < first_index { + panic!( + "[region {}] {} unexpected hole: {} < {}", + region_id, peer_id, cache_last_index, first_index + ); + } + } + + for e in entries { + self.cache.push_back(e.to_owned()); + mem_size_change += (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64; + } + // In the past, the entry cache will be truncated if its size exceeds a certain number. + // However, after introducing async write io, the entry must stay in cache if it's not + // persisted to raft db because the raft-rs may need to read entries.(e.g. leader sends + // MsgAppend to followers) + + mem_size_change + } + + pub fn entry(&self, idx: u64) -> Option<&Entry> { + let cache_low = self.cache.front()?.get_index(); + if idx >= cache_low { + Some(&self.cache[(idx - cache_low) as usize]) + } else { + None + } + } + + /// Compact all entries whose indexes are less than `idx`. + pub fn compact_to(&mut self, mut idx: u64) -> u64 { + if idx > self.persisted + 1 { + // Only the persisted entries can be compacted + idx = self.persisted + 1; + } + + let mut mem_size_change = 0; + + // Clean cached entries which have been already sent to apply threads. For example, + // if entries [1, 10), [10, 20), [20, 30) are sent to apply threads and `compact_to(15)` + // is called, only [20, 30) will still be kept in cache. + let old_trace_cap = self.trace.capacity(); + while let Some(cached_entries) = self.trace.pop_front() { + if cached_entries.range.start >= idx { + self.trace.push_front(cached_entries); + let trace_len = self.trace.len(); + let trace_cap = self.trace.capacity(); + if trace_len < SHRINK_CACHE_CAPACITY && trace_cap > SHRINK_CACHE_CAPACITY { + self.trace.shrink_to(SHRINK_CACHE_CAPACITY); + } + break; + } + let (_, dangle_size) = cached_entries.take_entries(); + mem_size_change -= dangle_size as i64; + idx = cmp::max(cached_entries.range.end, idx); + } + let new_trace_cap = self.trace.capacity(); + mem_size_change += Self::trace_vec_mem_size_change(new_trace_cap, old_trace_cap); + + let cache_first_idx = self.first_index().unwrap_or(u64::MAX); + if cache_first_idx >= idx { + self.flush_mem_size_change(mem_size_change); + assert!(mem_size_change <= 0); + return -mem_size_change as u64; + } + + let cache_last_idx = self.cache.back().unwrap().get_index(); + // Use `cache_last_idx + 1` to make sure cache can be cleared completely if necessary. + let compact_to = (cmp::min(cache_last_idx + 1, idx) - cache_first_idx) as usize; + for e in self.cache.drain(..compact_to) { + mem_size_change -= (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64 + } + + mem_size_change += self.shrink_if_necessary(); + self.flush_mem_size_change(mem_size_change); + assert!(mem_size_change <= 0); + -mem_size_change as u64 + } + + fn total_mem_size(&self) -> i64 { + let data_size: i64 = self + .cache + .iter() + .map(|e| (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64) + .sum(); + let cache_vec_size = Self::cache_vec_mem_size_change(self.cache.capacity(), 0); + let trace_vec_size = Self::trace_vec_mem_size_change(self.trace.capacity(), 0); + data_size + cache_vec_size + trace_vec_size + } + + fn cache_vec_mem_size_change(new_capacity: usize, old_capacity: usize) -> i64 { + ENTRY_MEM_SIZE as i64 * (new_capacity as i64 - old_capacity as i64) + } + + fn trace_vec_mem_size_change(new_capacity: usize, old_capacity: usize) -> i64 { + mem::size_of::() as i64 * (new_capacity as i64 - old_capacity as i64) + } + + fn flush_mem_size_change(&self, mem_size_change: i64) { + #[cfg(test)] + if let Some(size_change_cb) = self.size_change_cb.as_ref() { + size_change_cb(mem_size_change); + } + let event = if mem_size_change > 0 { + TraceEvent::Add(mem_size_change as usize) + } else { + TraceEvent::Sub(-mem_size_change as usize) + }; + MEMTRACE_ENTRY_CACHE.trace(event); + RAFT_ENTRIES_CACHES_GAUGE.add(mem_size_change); + } + + fn flush_stats(&self) { + let hit = self.hit.replace(0); + RAFT_ENTRY_FETCHES.hit.inc_by(hit); + let miss = self.miss.replace(0); + RAFT_ENTRY_FETCHES.miss.inc_by(miss); + } + + #[inline] + fn is_empty(&self) -> bool { + self.cache.is_empty() + } + + fn trace_cached_entries(&mut self, entries: CachedEntries) { + let dangle_size = { + let mut guard = entries.entries.lock().unwrap(); + + let last_idx = guard.0.last().map(|e| e.index).unwrap(); + let cache_front = match self.cache.front().map(|e| e.index) { + Some(i) => i, + None => u64::MAX, + }; + + let dangle_range = if last_idx < cache_front { + // All entries are not in entry cache. + 0..guard.0.len() + } else if let Ok(i) = guard.0.binary_search_by(|e| e.index.cmp(&cache_front)) { + // Some entries are in entry cache. + 0..i + } else { + // All entries are in entry cache. + 0..0 + }; + + let mut size = 0; + for e in &guard.0[dangle_range] { + size += bytes_capacity(&e.data) + bytes_capacity(&e.context); + } + guard.1 = size; + size + }; + + let old_capacity = self.trace.capacity(); + self.trace.push_back(entries); + let new_capacity = self.trace.capacity(); + let diff = Self::trace_vec_mem_size_change(new_capacity, old_capacity); + + self.flush_mem_size_change(diff + dangle_size as i64); + } + + fn shrink_if_necessary(&mut self) -> i64 { + if self.cache.len() < SHRINK_CACHE_CAPACITY && self.cache.capacity() > SHRINK_CACHE_CAPACITY + { + let old_capacity = self.cache.capacity(); + self.cache.shrink_to_fit(); + let new_capacity = self.cache.capacity(); + return Self::cache_vec_mem_size_change(new_capacity, old_capacity); + } + 0 + } + + fn update_persisted(&mut self, persisted: u64) { + self.persisted = persisted; + } +} + +impl Default for EntryCache { + fn default() -> Self { + let entry_cache = EntryCache { + persisted: 0, + cache: Default::default(), + trace: Default::default(), + hit: Cell::new(0), + miss: Cell::new(0), + #[cfg(test)] + size_change_cb: None, + }; + entry_cache.flush_mem_size_change(entry_cache.total_mem_size()); + entry_cache + } +} + +impl Drop for EntryCache { + fn drop(&mut self) { + let mem_size_change = self.total_mem_size(); + self.flush_mem_size_change(-mem_size_change); + self.flush_stats(); + } +} + +#[derive(Debug, PartialEq)] +pub enum RaftlogFetchState { + Fetching, + Fetched(Box), +} + +#[derive(Debug, PartialEq)] +pub struct RaftlogFetchResult { + pub ents: raft::Result>, + // because entries may be empty, so store the original low index that the task issued + pub low: u64, + // the original max size that the task issued + pub max_size: u64, + // if the ents hit max_size + pub hit_size_limit: bool, + // the times that async fetch have already tried + pub tried_cnt: usize, + // the term when the task issued + pub term: u64, +} + +#[derive(Default)] +struct AsyncFetchStats { + async_fetch: Cell, + sync_fetch: Cell, + fallback_fetch: Cell, + fetch_invalid: Cell, + fetch_unused: Cell, +} + +impl AsyncFetchStats { + fn flush_stats(&mut self) { + RAFT_ENTRY_FETCHES + .async_fetch + .inc_by(self.async_fetch.replace(0)); + RAFT_ENTRY_FETCHES + .sync_fetch + .inc_by(self.sync_fetch.replace(0)); + RAFT_ENTRY_FETCHES + .fallback_fetch + .inc_by(self.fallback_fetch.replace(0)); + RAFT_ENTRY_FETCHES + .fetch_invalid + .inc_by(self.fetch_invalid.replace(0)); + RAFT_ENTRY_FETCHES + .fetch_unused + .inc_by(self.fetch_unused.replace(0)); + } +} + +/// A subset of `PeerStorage` that focus on accessing log entries. +pub struct EntryStorage { + region_id: u64, + peer_id: u64, + raft_engine: ER, + cache: EntryCache, + raft_state: RaftLocalState, + apply_state: RaftApplyState, + last_term: u64, + applied_term: u64, + raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_stats: AsyncFetchStats, + async_fetch_results: RefCell>, +} + +impl EntryStorage { + pub fn new( + region_id: u64, + peer_id: u64, + raft_engine: ER, + raft_state: RaftLocalState, + apply_state: RaftApplyState, + last_term: u64, + applied_term: u64, + raftlog_fetch_scheduler: Scheduler, + ) -> Self { + EntryStorage { + region_id, + peer_id, + raft_engine, + cache: EntryCache::default(), + raft_state, + apply_state, + last_term, + applied_term, + raftlog_fetch_scheduler, + raftlog_fetch_stats: AsyncFetchStats::default(), + async_fetch_results: RefCell::new(HashMap::default()), + } + } + + fn check_range(&self, low: u64, high: u64) -> raft::Result<()> { + if low > high { + return Err(storage_error(format!( + "low: {} is greater that high: {}", + low, high + ))); + } else if low <= self.truncated_index() { + return Err(raft::Error::Store(StorageError::Compacted)); + } else if high > self.last_index() + 1 { + return Err(storage_error(format!( + "entries' high {} is out of bound lastindex {}", + high, + self.last_index() + ))); + } + Ok(()) + } + + pub fn clean_async_fetch_res(&mut self, low: u64) { + self.async_fetch_results.borrow_mut().remove(&low); + } + + // Update the async fetch result. + // None indicates cleanning the fetched result. + pub fn update_async_fetch_res(&mut self, low: u64, res: Option>) { + // If it's in fetching, don't clean the async fetch result. + if self.async_fetch_results.borrow().get(&low) == Some(&RaftlogFetchState::Fetching) + && res.is_none() + { + return; + } + + match res { + Some(res) => { + if let Some(RaftlogFetchState::Fetched(prev)) = self + .async_fetch_results + .borrow_mut() + .insert(low, RaftlogFetchState::Fetched(res)) + { + info!( + "unconsumed async fetch res"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "res" => ?prev, + "low" => low, + ); + } + } + None => { + let prev = self.async_fetch_results.borrow_mut().remove(&low); + if prev.is_some() { + self.raftlog_fetch_stats.fetch_unused.update(|m| m + 1); + } + } + } + } + + fn async_fetch( + &self, + region_id: u64, + low: u64, + high: u64, + max_size: u64, + context: GetEntriesContext, + buf: &mut Vec, + ) -> raft::Result { + if let Some(RaftlogFetchState::Fetching) = self.async_fetch_results.borrow().get(&low) { + // already an async fetch in flight + return Err(raft::Error::Store( + raft::StorageError::LogTemporarilyUnavailable, + )); + } + + let tried_cnt = if let Some(RaftlogFetchState::Fetched(res)) = + self.async_fetch_results.borrow_mut().remove(&low) + { + assert_eq!(res.low, low); + let mut ents = res.ents?; + let first = ents.first().map(|e| e.index).unwrap(); + assert_eq!(first, res.low); + let last = ents.last().map(|e| e.index).unwrap(); + + if last + 1 >= high { + // async fetch res covers [low, high) + ents.truncate((high - first) as usize); + assert_eq!(ents.last().map(|e| e.index).unwrap(), high - 1); + if max_size < res.max_size { + limit_size(&mut ents, Some(max_size)); + } + let count = ents.len(); + buf.append(&mut ents); + fail_point!("on_async_fetch_return"); + return Ok(count); + } else if res.hit_size_limit && max_size <= res.max_size { + // async fetch res doesn't cover [low, high) due to hit size limit + if max_size < res.max_size { + limit_size(&mut ents, Some(max_size)); + }; + let count = ents.len(); + buf.append(&mut ents); + return Ok(count); + } else if last + RAFT_LOG_MULTI_GET_CNT > high - 1 + && res.tried_cnt + 1 == MAX_ASYNC_FETCH_TRY_CNT + { + let mut fetched_size = ents.iter().fold(0, |acc, e| acc + e.compute_size() as u64); + if max_size <= fetched_size { + limit_size(&mut ents, Some(max_size)); + let count = ents.len(); + buf.append(&mut ents); + return Ok(count); + } + + // the count of left entries isn't too large, fetch the remaining entries synchronously one by one + for idx in last + 1..high { + let ent = self.raft_engine.get_entry(region_id, idx)?; + match ent { + None => { + return Err(raft::Error::Store(raft::StorageError::Unavailable)); + } + Some(ent) => { + let size = ent.compute_size() as u64; + if fetched_size + size > max_size { + break; + } else { + fetched_size += size; + ents.push(ent); + } + } + } + } + let count = ents.len(); + buf.append(&mut ents); + return Ok(count); + } + info!( + "async fetch invalid"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "first" => first, + "last" => last, + "low" => low, + "high" => high, + "max_size" => max_size, + "res_max_size" => res.max_size, + ); + // low index or max size is changed, the result is not fit for the current range, so refetch again. + self.raftlog_fetch_stats.fetch_invalid.update(|m| m + 1); + res.tried_cnt + 1 + } else { + 1 + }; + + // the first/second try: get [low, high) asynchronously + // the third try: + // - if term and low are matched: use result of [low, persisted) and get [persisted, high) synchronously + // - else: get [low, high) synchronously + if tried_cnt >= MAX_ASYNC_FETCH_TRY_CNT { + // even the larger range is invalid again, fallback to fetch in sync way + self.raftlog_fetch_stats.fallback_fetch.update(|m| m + 1); + let count = self.raft_engine.fetch_entries_to( + region_id, + low, + high, + Some(max_size as usize), + buf, + )?; + return Ok(count); + } + + self.raftlog_fetch_stats.async_fetch.update(|m| m + 1); + self.async_fetch_results + .borrow_mut() + .insert(low, RaftlogFetchState::Fetching); + self.raftlog_fetch_scheduler + .schedule(RaftlogFetchTask::PeerStorage { + region_id, + context, + low, + high, + max_size: (max_size as usize), + tried_cnt, + term: self.hard_state().get_term(), + }) + .unwrap(); + Err(raft::Error::Store( + raft::StorageError::LogTemporarilyUnavailable, + )) + } + + pub fn entries( + &self, + low: u64, + high: u64, + max_size: u64, + context: GetEntriesContext, + ) -> raft::Result> { + self.check_range(low, high)?; + let mut ents = + Vec::with_capacity(std::cmp::min((high - low) as usize, MAX_INIT_ENTRY_COUNT)); + if low == high { + return Ok(ents); + } + let cache_low = self.cache.first_index().unwrap_or(u64::MAX); + if high <= cache_low { + self.cache.miss.update(|m| m + 1); + return if context.can_async() { + self.async_fetch(self.region_id, low, high, max_size, context, &mut ents)?; + Ok(ents) + } else { + self.raftlog_fetch_stats.sync_fetch.update(|m| m + 1); + self.raft_engine.fetch_entries_to( + self.region_id, + low, + high, + Some(max_size as usize), + &mut ents, + )?; + Ok(ents) + }; + } + let begin_idx = if low < cache_low { + self.cache.miss.update(|m| m + 1); + let fetched_count = if context.can_async() { + self.async_fetch(self.region_id, low, cache_low, max_size, context, &mut ents)? + } else { + self.raftlog_fetch_stats.sync_fetch.update(|m| m + 1); + self.raft_engine.fetch_entries_to( + self.region_id, + low, + cache_low, + Some(max_size as usize), + &mut ents, + )? + }; + if fetched_count < (cache_low - low) as usize { + // Less entries are fetched than expected. + return Ok(ents); + } + cache_low + } else { + low + }; + self.cache.hit.update(|h| h + 1); + let fetched_size = ents.iter().fold(0, |acc, e| acc + e.compute_size()); + self.cache + .fetch_entries_to(begin_idx, high, fetched_size as u64, max_size, &mut ents); + Ok(ents) + } + + pub fn term(&self, idx: u64) -> raft::Result { + if idx == self.truncated_index() { + return Ok(self.truncated_term()); + } + self.check_range(idx, idx + 1)?; + if self.truncated_term() == self.last_term || idx == self.last_index() { + return Ok(self.last_term); + } + if let Some(e) = self.cache.entry(idx) { + Ok(e.get_term()) + } else { + Ok(self + .raft_engine + .get_entry(self.region_id, idx) + .unwrap() + .unwrap() + .get_term()) + } + } + + #[inline] + pub fn first_index(&self) -> u64 { + first_index(&self.apply_state) + } + + #[inline] + pub fn last_index(&self) -> u64 { + last_index(&self.raft_state) + } + + #[inline] + pub fn last_term(&self) -> u64 { + self.last_term + } + + #[inline] + pub fn set_last_term(&mut self, term: u64) { + self.last_term = term; + } + + #[inline] + pub fn set_applied_term(&mut self, applied_term: u64) { + self.applied_term = applied_term; + } + + #[inline] + pub fn applied_term(&self) -> u64 { + self.applied_term + } + + #[inline] + pub fn raft_state(&self) -> &RaftLocalState { + &self.raft_state + } + + #[inline] + pub fn raft_state_mut(&mut self) -> &mut RaftLocalState { + &mut self.raft_state + } + + #[inline] + pub fn applied_index(&self) -> u64 { + self.apply_state.get_applied_index() + } + + #[inline] + pub fn set_applied_state(&mut self, apply_state: RaftApplyState) { + self.apply_state = apply_state; + } + + #[inline] + pub fn apply_state(&self) -> &RaftApplyState { + &self.apply_state + } + + #[inline] + pub fn apply_state_mut(&mut self) -> &mut RaftApplyState { + &mut self.apply_state + } + + #[inline] + pub fn commit_index(&self) -> u64 { + self.raft_state.get_hard_state().get_commit() + } + + #[inline] + pub fn set_commit_index(&mut self, commit: u64) { + assert!(commit >= self.commit_index()); + self.raft_state.mut_hard_state().set_commit(commit); + } + + #[inline] + pub fn hard_state(&self) -> &HardState { + self.raft_state.get_hard_state() + } + + #[inline] + pub fn truncated_index(&self) -> u64 { + self.apply_state.get_truncated_state().get_index() + } + + #[inline] + pub fn truncated_term(&self) -> u64 { + self.apply_state.get_truncated_state().get_term() + } + + // Append the given entries to the raft log using previous last index or self.last_index. + pub fn append(&mut self, entries: Vec, task: &mut WriteTask) { + if entries.is_empty() { + return; + } + debug!( + "append entries"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "count" => entries.len(), + ); + let prev_last_index = self.raft_state.get_last_index(); + + let (last_index, last_term) = { + let e = entries.last().unwrap(); + (e.get_index(), e.get_term()) + }; + + self.cache.append(self.region_id, self.peer_id, &entries); + + task.entries = entries; + // Delete any previously appended log entries which never committed. + task.cut_logs = Some((last_index + 1, prev_last_index + 1)); + + self.raft_state.set_last_index(last_index); + self.last_term = last_term; + } + + pub fn compact_entry_cache(&mut self, idx: u64) { + self.cache.compact_to(idx); + } + + #[inline] + pub fn is_entry_cache_empty(&self) -> bool { + self.cache.is_empty() + } + + /// Evict entries from the cache. + pub fn evict_entry_cache(&mut self, half: bool) { + if !self.is_entry_cache_empty() { + let cache = &mut self.cache; + let cache_len = cache.cache.len(); + let drain_to = if half { cache_len / 2 } else { cache_len - 1 }; + let idx = cache.cache[drain_to].index; + let mem_size_change = cache.compact_to(idx + 1); + RAFT_ENTRIES_EVICT_BYTES.inc_by(mem_size_change); + } + } + + #[inline] + pub fn flush_entry_cache_metrics(&mut self) { + // NOTE: memory usage of entry cache is flushed realtime. + self.cache.flush_stats(); + self.raftlog_fetch_stats.flush_stats(); + } + + pub fn raft_engine(&self) -> &ER { + &self.raft_engine + } + + pub fn update_cache_persisted(&mut self, persisted: u64) { + self.cache.update_persisted(persisted); + } + + pub fn trace_cached_entries(&mut self, entries: CachedEntries) { + self.cache.trace_cached_entries(entries); + } + + pub fn clear(&mut self) { + self.cache = EntryCache::default(); + } +} + +#[cfg(test)] +pub mod tests { + use std::sync::mpsc; + + use engine_test::raft::RaftTestEngine; + use engine_traits::RaftEngineReadOnly; + use protobuf::Message; + use raft::{GetEntriesContext, StorageError}; + use tempfile::Builder; + use tikv_util::worker::{dummy_scheduler, LazyWorker, Worker}; + + use super::*; + use crate::store::peer_storage::tests::{append_ents, new_entry, new_storage_from_ents}; + + impl EntryCache { + fn new_with_cb(cb: impl Fn(i64) + Send + 'static) -> Self { + let entry_cache = EntryCache { + persisted: 0, + cache: Default::default(), + trace: Default::default(), + hit: Cell::new(0), + miss: Cell::new(0), + size_change_cb: Some(Box::new(cb) as Box), + }; + entry_cache.flush_mem_size_change(entry_cache.total_mem_size()); + entry_cache + } + } + + pub fn validate_cache(store: &EntryStorage, exp_ents: &[Entry]) { + assert_eq!(store.cache.cache, exp_ents); + for e in exp_ents { + let entry = store + .raft_engine + .get_entry(store.region_id, e.get_index()) + .unwrap() + .unwrap(); + assert_eq!(entry, *e); + } + } + + #[test] + fn test_storage_cache_size_change() { + let new_padded_entry = |index: u64, term: u64, pad_len: usize| { + let mut e = new_entry(index, term); + e.data = vec![b'x'; pad_len].into(); + e + }; + + // Test the initial data structure size. + let (tx, rx) = mpsc::sync_channel(8); + let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); + assert_eq!(rx.try_recv().unwrap(), 896); + + cache.append( + 0, + 0, + &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], + ); + assert_eq!(rx.try_recv().unwrap(), 3); + + // Test size change for one overlapped entry. + cache.append(0, 0, &[new_padded_entry(102, 2, 3)]); + assert_eq!(rx.try_recv().unwrap(), 1); + + // Test size change for all overlapped entries. + cache.append( + 0, + 0, + &[new_padded_entry(101, 3, 4), new_padded_entry(102, 3, 5)], + ); + assert_eq!(rx.try_recv().unwrap(), 5); + + cache.append(0, 0, &[new_padded_entry(103, 3, 6)]); + assert_eq!(rx.try_recv().unwrap(), 6); + + // Test trace a dangle entry. + let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); + cache.trace_cached_entries(cached_entries); + assert_eq!(rx.try_recv().unwrap(), 1); + + // Test trace an entry which is still in cache. + let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); + cache.trace_cached_entries(cached_entries); + assert_eq!(rx.try_recv().unwrap(), 0); + + // Test compare `cached_last` with `trunc_to_idx` in `EntryCache::append_impl`. + cache.append(0, 0, &[new_padded_entry(103, 4, 7)]); + assert_eq!(rx.try_recv().unwrap(), 1); + + // Test compact one traced dangle entry and one entry in cache. + cache.persisted = 101; + cache.compact_to(102); + assert_eq!(rx.try_recv().unwrap(), -5); + + // Test compact the last traced dangle entry. + cache.persisted = 102; + cache.compact_to(103); + assert_eq!(rx.try_recv().unwrap(), -5); + + // Test compact all entries. + cache.persisted = 103; + cache.compact_to(104); + assert_eq!(rx.try_recv().unwrap(), -7); + + drop(cache); + assert_eq!(rx.try_recv().unwrap(), -896); + } + + #[test] + fn test_storage_cache_entry() { + let mut cache = EntryCache::default(); + let ents = vec![ + new_entry(3, 3), + new_entry(4, 4), + new_entry(5, 4), + new_entry(6, 6), + ]; + cache.append(0, 0, &ents); + assert!(cache.entry(1).is_none()); + assert!(cache.entry(2).is_none()); + for e in &ents { + assert_eq!(e, cache.entry(e.get_index()).unwrap()); + } + let res = panic_hook::recover_safe(|| cache.entry(7)); + assert!(res.is_err()); + } + + #[test] + fn test_async_fetch() { + let ents = vec![ + new_entry(2, 2), + new_entry(3, 3), + new_entry(4, 4), + new_entry(5, 5), + new_entry(6, 6), + ]; + + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let region_worker = Worker::new("snap-manager").lazy_build("snap-manager"); + let region_scheduler = region_worker.scheduler(); + let (dummy_scheduler, _rx) = dummy_scheduler(); + + let mut store = new_storage_from_ents(region_scheduler, dummy_scheduler, &td, &ents); + + let max_u64 = u64::max_value(); + let mut tests = vec![ + // already compacted + ( + 3, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Err(raft::Error::Store(StorageError::Compacted)), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Err(raft::Error::Store(StorageError::Compacted)), + vec![], + ), + // fetch partial entries due to max size limit + ( + 3, + 7, + 30, + 1, + RaftlogFetchResult { + ents: Ok(ents[1..4].to_vec()), + low: 3, + max_size: 30, + hit_size_limit: true, + tried_cnt: 1, + term: 1, + }, + Ok(3), + ents[1..4].to_vec(), + ), + // fetch all entries + ( + 2, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Ok(ents.clone()), + low: 2, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Ok(5), + ents.clone(), + ), + // high is smaller than before + ( + 3, + 5, + max_u64, + 1, + RaftlogFetchResult { + ents: Ok(ents[1..].to_vec()), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Ok(2), + ents[1..3].to_vec(), + ), + // high is larger than before, second try + ( + 3, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Ok(ents[1..4].to_vec()), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Err(raft::Error::Store(StorageError::LogTemporarilyUnavailable)), + vec![], + ), + // high is larger than before, thrid try + ( + 3, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Ok(ents[1..4].to_vec()), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 2, + term: 1, + }, + Ok(4), + ents[1..].to_vec(), + ), + // max size is smaller than before + ( + 2, + 7, + 10, + 1, + RaftlogFetchResult { + ents: Ok(ents.clone()), + low: 2, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Ok(2), + ents[..2].to_vec(), + ), + // max size is larger than before but with lower high + ( + 2, + 5, + 40, + 1, + RaftlogFetchResult { + ents: Ok(ents.clone()), + low: 2, + max_size: 30, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Ok(3), + ents[..3].to_vec(), + ), + // low index is smaller than before + ( + 2, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Err(raft::Error::Store(StorageError::Compacted)), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Err(raft::Error::Store(StorageError::LogTemporarilyUnavailable)), + vec![], + ), + // low index is larger than before + ( + 4, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Ok(vec![]), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: 1, + term: 1, + }, + Err(raft::Error::Store(StorageError::LogTemporarilyUnavailable)), + vec![], + ), + // hit tried several lmit + ( + 3, + 7, + max_u64, + 1, + RaftlogFetchResult { + ents: Ok(ents[1..4].to_vec()), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: MAX_ASYNC_FETCH_TRY_CNT, + term: 1, + }, + Ok(4), + ents[1..5].to_vec(), + ), + // term is changed + ( + 3, + 7, + max_u64, + 2, + RaftlogFetchResult { + ents: Ok(ents[1..4].to_vec()), + low: 3, + max_size: max_u64, + hit_size_limit: false, + tried_cnt: MAX_ASYNC_FETCH_TRY_CNT, + term: 1, + }, + Ok(4), + ents[1..5].to_vec(), + ), + ]; + + for (i, (lo, hi, maxsize, term, async_res, expected_res, expected_ents)) in + tests.drain(..).enumerate() + { + if async_res.low != lo { + store.clean_async_fetch_res(lo); + } else { + store.update_async_fetch_res(lo, Some(Box::new(async_res))); + } + let mut ents = vec![]; + store.raft_state.mut_hard_state().set_term(term); + let res = store.async_fetch( + store.get_region_id(), + lo, + hi, + maxsize, + GetEntriesContext::empty(true), + &mut ents, + ); + if res != expected_res { + panic!("#{}: expect result {:?}, got {:?}", i, expected_res, res); + } + if ents != expected_ents { + panic!("#{}: expect ents {:?}, got {:?}", i, expected_ents, ents); + } + } + } + + #[test] + fn test_storage_append() { + let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; + let mut tests = vec![ + ( + vec![new_entry(4, 6), new_entry(5, 6)], + vec![new_entry(4, 6), new_entry(5, 6)], + ), + ( + vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 5)], + vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 5)], + ), + // truncate the existing entries and append + (vec![new_entry(4, 5)], vec![new_entry(4, 5)]), + // direct append + ( + vec![new_entry(6, 5)], + vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 5)], + ), + ]; + for (i, (entries, wentries)) in tests.drain(..).enumerate() { + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let worker = LazyWorker::new("snap-manager"); + let sched = worker.scheduler(); + let (dummy_scheduler, _) = dummy_scheduler(); + let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); + append_ents(&mut store, &entries); + let li = store.last_index().unwrap(); + let actual_entries = store + .entries(4, li + 1, u64::max_value(), GetEntriesContext::empty(false)) + .unwrap(); + if actual_entries != wentries { + panic!("#{}: want {:?}, got {:?}", i, wentries, actual_entries); + } + } + } + + #[test] + fn test_storage_cache_fetch() { + let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let worker = LazyWorker::new("snap-manager"); + let sched = worker.scheduler(); + let (dummy_scheduler, _) = dummy_scheduler(); + let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); + store.cache.cache.clear(); + // empty cache should fetch data from rocksdb directly. + let mut res = store + .entries(4, 6, u64::max_value(), GetEntriesContext::empty(false)) + .unwrap(); + assert_eq!(*res, ents[1..]); + + let entries = vec![new_entry(6, 5), new_entry(7, 5)]; + append_ents(&mut store, &entries); + validate_cache(&store, &entries); + + // direct cache access + res = store + .entries(6, 8, u64::max_value(), GetEntriesContext::empty(false)) + .unwrap(); + assert_eq!(res, entries); + + // size limit should be supported correctly. + res = store + .entries(4, 8, 0, GetEntriesContext::empty(false)) + .unwrap(); + assert_eq!(res, vec![new_entry(4, 4)]); + let mut size: u64 = ents[1..].iter().map(|e| u64::from(e.compute_size())).sum(); + res = store + .entries(4, 8, size, GetEntriesContext::empty(false)) + .unwrap(); + let mut exp_res = ents[1..].to_vec(); + assert_eq!(res, exp_res); + for e in &entries { + size += u64::from(e.compute_size()); + exp_res.push(e.clone()); + res = store + .entries(4, 8, size, GetEntriesContext::empty(false)) + .unwrap(); + assert_eq!(res, exp_res); + } + + // range limit should be supported correctly. + for low in 4..9 { + for high in low..9 { + let res = store + .entries(low, high, u64::max_value(), GetEntriesContext::empty(false)) + .unwrap(); + assert_eq!(*res, exp_res[low as usize - 4..high as usize - 4]); + } + } + } + + #[test] + fn test_storage_cache_update() { + let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let worker = LazyWorker::new("snap-manager"); + let sched = worker.scheduler(); + let (dummy_scheduler, _) = dummy_scheduler(); + let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); + store.cache.cache.clear(); + + // initial cache + let mut entries = vec![new_entry(6, 5), new_entry(7, 5)]; + append_ents(&mut store, &entries); + validate_cache(&store, &entries); + + // rewrite + entries = vec![new_entry(6, 6), new_entry(7, 6)]; + append_ents(&mut store, &entries); + validate_cache(&store, &entries); + + // rewrite old entry + entries = vec![new_entry(5, 6), new_entry(6, 6)]; + append_ents(&mut store, &entries); + validate_cache(&store, &entries); + + // partial rewrite + entries = vec![new_entry(6, 7), new_entry(7, 7)]; + append_ents(&mut store, &entries); + let mut exp_res = vec![new_entry(5, 6), new_entry(6, 7), new_entry(7, 7)]; + validate_cache(&store, &exp_res); + + // direct append + entries = vec![new_entry(8, 7), new_entry(9, 7)]; + append_ents(&mut store, &entries); + exp_res.extend_from_slice(&entries); + validate_cache(&store, &exp_res); + + // rewrite middle + entries = vec![new_entry(7, 8)]; + append_ents(&mut store, &entries); + exp_res.truncate(2); + exp_res.push(new_entry(7, 8)); + validate_cache(&store, &exp_res); + + // compact to min(5 + 1, 7) + store.cache.persisted = 5; + store.compact_entry_cache(7); + exp_res = vec![new_entry(6, 7), new_entry(7, 8)]; + validate_cache(&store, &exp_res); + + // compact to min(7 + 1, 7) + store.cache.persisted = 7; + store.compact_entry_cache(7); + exp_res = vec![new_entry(7, 8)]; + validate_cache(&store, &exp_res); + // compact all + store.compact_entry_cache(8); + validate_cache(&store, &[]); + // invalid compaction should be ignored. + store.compact_entry_cache(6); + } +} diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 88bff373760..98d12303b19 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -77,13 +77,14 @@ use crate::{ }, store::{ cmd_resp, + entry_storage::{self, CachedEntries}, fsm::RaftPollerBuilder, local_metrics::{RaftMetrics, TimeTracker}, memory::*, metrics::*, msg::{Callback, PeerMsg, ReadResponse, SignificantMsg}, peer::Peer, - peer_storage::{self, write_initial_apply_state, write_peer_state, CachedEntries}, + peer_storage::{write_initial_apply_state, write_peer_state}, util, util::{ admin_cmd_epoch_lookup, check_region_epoch, compare_region_epoch, is_learner, @@ -597,7 +598,7 @@ where apply_state: delegate.apply_state.clone(), exec_res: results, metrics: delegate.metrics.clone(), - applied_index_term: delegate.applied_index_term, + applied_term: delegate.applied_term, bucket_stat: delegate.buckets.clone().map(Box::new), }); } @@ -889,7 +890,7 @@ where /// to file, but KV data may not synced to file, so we will lose data. apply_state: RaftApplyState, /// The term of the raft log at applied index. - applied_index_term: u64, + applied_term: u64, /// The latest flushed applied index. last_flush_applied_index: u64, @@ -924,7 +925,7 @@ where pending_remove: false, last_flush_applied_index: reg.apply_state.get_applied_index(), apply_state: reg.apply_state, - applied_index_term: reg.applied_index_term, + applied_term: reg.applied_term, term: reg.term, stopped: false, handle_start: None, @@ -1094,7 +1095,7 @@ where apply_ctx.host.on_empty_cmd(&self.region, index, term); self.apply_state.set_applied_index(index); - self.applied_index_term = term; + self.applied_term = term; assert!(term > 0); // 1. When a peer become leader, it will send an empty entry. @@ -1302,7 +1303,7 @@ where } self.apply_state.set_applied_index(index); - self.applied_index_term = term; + self.applied_term = term; let cmd = Cmd::new(index, term, req.clone(), resp.clone()); let should_write = ctx.host.post_exec( @@ -2469,7 +2470,7 @@ where let prepare_merge = req.get_prepare_merge(); let index = prepare_merge.get_min_index(); - let first_index = peer_storage::first_index(&self.apply_state); + let first_index = entry_storage::first_index(&self.apply_state); if index < first_index { // We filter `CompactLog` command before. panic!( @@ -2713,7 +2714,7 @@ where let compact_index = req.get_compact_log().get_compact_index(); let resp = AdminResponse::default(); - let first_index = peer_storage::first_index(&self.apply_state); + let first_index = entry_storage::first_index(&self.apply_state); if compact_index <= first_index { debug!( "compact index <= first index, no need to compact"; @@ -3011,7 +3012,7 @@ pub struct Registration { pub id: u64, pub term: u64, pub apply_state: RaftApplyState, - pub applied_index_term: u64, + pub applied_term: u64, pub region: Region, pub pending_request_snapshot_count: Arc, pub is_merging: bool, @@ -3024,7 +3025,7 @@ impl Registration { id: peer.peer_id(), term: peer.term(), apply_state: peer.get_store().apply_state().clone(), - applied_index_term: peer.get_store().applied_index_term(), + applied_term: peer.get_store().applied_term(), region: peer.region().clone(), pending_request_snapshot_count: peer.pending_request_snapshot_count.clone(), is_merging: peer.pending_merge_state.is_some(), @@ -3110,7 +3111,7 @@ impl GenSnapTask { pub fn generate_and_schedule_snapshot( self, kv_snap: EK::Snapshot, - last_applied_index_term: u64, + last_applied_term: u64, last_applied_state: RaftApplyState, region_sched: &Scheduler>, ) -> Result<()> @@ -3123,7 +3124,7 @@ impl GenSnapTask { region_id: self.region_id, notifier: self.snap_notifier, for_balance: self.for_balance, - last_applied_index_term, + last_applied_term, last_applied_state, canceled: self.canceled, // This snapshot may be held for a long time, which may cause too many @@ -3281,7 +3282,7 @@ where { pub region_id: u64, pub apply_state: RaftApplyState, - pub applied_index_term: u64, + pub applied_term: u64, pub exec_res: VecDeque>, pub metrics: ApplyMetrics, pub bucket_stat: Option>, @@ -3591,7 +3592,7 @@ where if let Err(e) = snap_task.generate_and_schedule_snapshot::( apply_ctx.engine.snapshot(), - self.delegate.applied_index_term, + self.delegate.applied_term, self.delegate.apply_state.clone(), &apply_ctx.region_scheduler, ) { @@ -4429,7 +4430,7 @@ mod tests { id: Default::default(), term: Default::default(), apply_state: Default::default(), - applied_index_term: Default::default(), + applied_term: Default::default(), region: Default::default(), pending_request_snapshot_count: Default::default(), is_merging: Default::default(), @@ -4444,7 +4445,7 @@ mod tests { id: self.id, term: self.term, apply_state: self.apply_state.clone(), - applied_index_term: self.applied_index_term, + applied_term: self.applied_term, region: self.region.clone(), pending_request_snapshot_count: self.pending_request_snapshot_count.clone(), is_merging: self.is_merging, @@ -4646,7 +4647,7 @@ mod tests { let mut reg = Registration { id: 1, term: 4, - applied_index_term: 5, + applied_term: 5, ..Default::default() }; reg.region.set_id(2); @@ -4659,7 +4660,7 @@ mod tests { assert!(!delegate.pending_remove); assert_eq!(delegate.apply_state, reg.apply_state); assert_eq!(delegate.term, reg.term); - assert_eq!(delegate.applied_index_term, reg.applied_index_term); + assert_eq!(delegate.applied_term, reg.applied_term); }); let (resp_tx, resp_rx) = mpsc::channel(); @@ -4735,10 +4736,10 @@ mod tests { assert!(apply_res.exec_res.is_empty()); // empty entry will make applied_index step forward and should write apply state to engine. assert_eq!(apply_res.metrics.written_keys, 1); - assert_eq!(apply_res.applied_index_term, 5); + assert_eq!(apply_res.applied_term, 5); validate(&router, 2, |delegate| { assert_eq!(delegate.term, 11); - assert_eq!(delegate.applied_index_term, 5); + assert_eq!(delegate.applied_term, 5); assert_eq!(delegate.apply_state.get_applied_index(), 5); assert_eq!( delegate.apply_state.get_applied_index(), @@ -5079,7 +5080,7 @@ mod tests { assert_eq!(engine.get_value(&dk_k2).unwrap().unwrap(), b"v1"); assert_eq!(engine.get_value(&dk_k3).unwrap().unwrap(), b"v1"); validate(&router, 1, |delegate| { - assert_eq!(delegate.applied_index_term, 1); + assert_eq!(delegate.applied_term, 1); assert_eq!(delegate.apply_state.get_applied_index(), 1); }); fetch_apply_res(&rx); @@ -5092,7 +5093,7 @@ mod tests { let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.region_id, 1); assert_eq!(apply_res.apply_state.get_applied_index(), 2); - assert_eq!(apply_res.applied_index_term, 2); + assert_eq!(apply_res.applied_term, 2); assert!(apply_res.exec_res.is_empty()); assert!(apply_res.metrics.written_bytes >= 5); assert_eq!(apply_res.metrics.written_keys, 2); @@ -5120,7 +5121,7 @@ mod tests { let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); assert!(resp.get_header().get_error().has_epoch_not_match()); let apply_res = fetch_apply_res(&rx); - assert_eq!(apply_res.applied_index_term, 2); + assert_eq!(apply_res.applied_term, 2); assert_eq!(apply_res.apply_state.get_applied_index(), 3); let put_entry = EntryBuilder::new(4, 2) @@ -5141,7 +5142,7 @@ mod tests { let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); assert!(resp.get_header().get_error().has_key_not_in_region()); let apply_res = fetch_apply_res(&rx); - assert_eq!(apply_res.applied_index_term, 2); + assert_eq!(apply_res.applied_term, 2); assert_eq!(apply_res.apply_state.get_applied_index(), 4); // a writebatch should be atomic. assert_eq!(engine.get_value(&dk_k3).unwrap().unwrap(), b"v1"); @@ -5235,7 +5236,7 @@ mod tests { assert!(apply_res.exec_res.is_empty()); // The entry should be applied now. let apply_res = fetch_apply_res(&rx); - assert_eq!(apply_res.applied_index_term, 3); + assert_eq!(apply_res.applied_term, 3); assert_eq!(apply_res.apply_state.get_applied_index(), 8); // UploadSST @@ -5312,15 +5313,15 @@ mod tests { // The region was rescheduled low-priority becasuee of ingest command, // only put entry has been applied; let apply_res = fetch_apply_res(&rx); - assert_eq!(apply_res.applied_index_term, 3); + assert_eq!(apply_res.applied_term, 3); assert_eq!(apply_res.apply_state.get_applied_index(), 9); // The region will yield after timeout. let apply_res = fetch_apply_res(&rx); - assert_eq!(apply_res.applied_index_term, 3); + assert_eq!(apply_res.applied_term, 3); assert_eq!(apply_res.apply_state.get_applied_index(), 10); // The third entry should be applied now. let apply_res = fetch_apply_res(&rx); - assert_eq!(apply_res.applied_index_term, 3); + assert_eq!(apply_res.applied_term, 3); assert_eq!(apply_res.apply_state.get_applied_index(), 11); let write_batch_max_keys = ::WRITE_BATCH_MAX_KEYS; @@ -5689,7 +5690,7 @@ mod tests { let apply_res = fetch_apply_res(&rx); // applied_index can still be advanced. assert_eq!(apply_res.apply_state.get_applied_index(), index_id); - assert_eq!(apply_res.applied_index_term, 1); + assert_eq!(apply_res.applied_term, 1); // Executing CompactLog is filtered and takes no effect. assert_eq!(apply_res.exec_res.len(), 0); assert_eq!(apply_res.apply_state.get_truncated_state().get_index(), 0); @@ -5708,7 +5709,7 @@ mod tests { let apply_res = fetch_apply_res(&rx); // applied_index can still be advanced. assert_eq!(apply_res.apply_state.get_applied_index(), index_id); - assert_eq!(apply_res.applied_index_term, 1); + assert_eq!(apply_res.applied_term, 1); // We can get exec result of CompactLog. assert_eq!(apply_res.exec_res.len(), 1); assert_eq!( @@ -5726,7 +5727,7 @@ mod tests { let apply_res = fetch_apply_res(&rx); // applied_index can still be advanced. assert_eq!(apply_res.apply_state.get_applied_index(), index_id); - assert_eq!(apply_res.applied_index_term, 1); + assert_eq!(apply_res.applied_term, 1); // We can't get exec result of ComputeHash. assert_eq!(apply_res.exec_res.len(), 0); obs.filter_consistency_check.store(false, Ordering::SeqCst); @@ -5744,7 +5745,7 @@ mod tests { router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![split], vec![]))); let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.apply_state.get_applied_index(), index_id); - assert_eq!(apply_res.applied_index_term, 1); + assert_eq!(apply_res.applied_term, 1); let (_, r8) = if let ExecResult::SplitRegion { regions, derived: _, @@ -5768,7 +5769,7 @@ mod tests { router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![merge], vec![]))); let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.apply_state.get_applied_index(), index_id); - assert_eq!(apply_res.applied_index_term, 1); + assert_eq!(apply_res.applied_term, 1); // PrepareMerge will trigger commit. let state: RaftApplyState = engine .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index f3bcd56eabf..baccd071690 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2110,7 +2110,7 @@ where self.fsm.has_ready |= self.fsm.peer.post_apply( self.ctx, res.apply_state, - res.applied_index_term, + res.applied_term, &res.metrics, ); // After applying, several metrics are updated, report it to pd to diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 64c70bbc2e7..bd9564b1a63 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -2,6 +2,7 @@ pub mod cmd_resp; pub mod config; +pub mod entry_storage; pub mod fsm; pub mod memory; pub mod metrics; @@ -27,12 +28,17 @@ mod worker; #[cfg(any(test, feature = "testexport"))] pub use self::msg::PeerInternalStat; pub use self::{ + async_io::{ + write::{Worker as WriteWorker, WriteMsg, WriteTask}, + write_router::WriteRouter, + }, bootstrap::{ bootstrap_store, clear_prepare_bootstrap_cluster, clear_prepare_bootstrap_key, initial_region, prepare_bootstrap_cluster, }, compaction_guard::CompactionGuardGeneratorFactory, config::Config, + entry_storage::{EntryStorage, RaftlogFetchResult, MAX_INIT_ENTRY_COUNT}, fsm::{check_sst_for_ingestion, DestroyPeerJob, RaftRouter, StoreInfo}, hibernate_state::{GroupState, HibernateState}, memory::*, @@ -45,8 +51,8 @@ pub use self::{ peer::{AbstractPeer, Peer, PeerStat, ProposalContext, RequestInspector, RequestPolicy}, peer_storage::{ clear_meta, do_snapshot, write_initial_apply_state, write_initial_raft_state, - write_peer_state, PeerStorage, RaftlogFetchResult, SnapState, INIT_EPOCH_CONF_VER, - INIT_EPOCH_VER, MAX_INIT_ENTRY_COUNT, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + write_peer_state, PeerStorage, SnapState, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, + RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, read_queue::ReadIndexContext, region_snapshot::{RegionIterator, RegionSnapshot}, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 1a7954ca037..2bcaefff762 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2194,8 +2194,8 @@ where // TODO: It may cause read index to wait a long time. // There may be some values that are not applied by this leader yet but the old leader, - // if applied_index_term isn't equal to current term. - self.get_store().applied_index_term() == self.term() + // if applied_term isn't equal to current term. + self.get_store().applied_term() == self.term() // There may be stale read if the old leader splits really slow, // the new region may already elected a new leader while // the old leader still think it owns the split range. @@ -3136,7 +3136,7 @@ where &mut self, ctx: &mut PollContext, apply_state: RaftApplyState, - applied_index_term: u64, + applied_term: u64, apply_metrics: &ApplyMetrics, ) -> bool { let mut has_ready = false; @@ -3159,9 +3159,9 @@ where .compact_entry_cache(apply_state.applied_index + 1); } - let progress_to_be_updated = self.mut_store().applied_index_term() != applied_index_term; + let progress_to_be_updated = self.mut_store().applied_term() != applied_term; self.mut_store().set_applied_state(apply_state); - self.mut_store().set_applied_term(applied_index_term); + self.mut_store().set_applied_term(applied_term); self.peer_stat.written_keys += apply_metrics.written_keys; self.peer_stat.written_bytes += apply_metrics.written_bytes; @@ -3183,13 +3183,13 @@ where self.read_progress.update_applied(applied_index); - // Only leaders need to update applied_index_term. + // Only leaders need to update applied_term. if progress_to_be_updated && self.is_leader() { - if applied_index_term == self.term() { + if applied_term == self.term() { ctx.coprocessor_host .on_applied_current_term(StateRole::Leader, self.region()); } - let progress = ReadProgress::applied_index_term(applied_index_term); + let progress = ReadProgress::applied_term(applied_term); let mut meta = ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&self.region_id).unwrap(); self.maybe_update_read_progress(reader, progress); @@ -4223,7 +4223,7 @@ where return Err(box_err!( "{} peer has not applied to current term, applied_term {}, current_term {}", self.tag, - self.get_store().applied_index_term(), + self.get_store().applied_term(), self.term() )); } @@ -4437,11 +4437,11 @@ where // Actually, according to the implementation of conf change in raft-rs, this check must be // passed if the previous check that `pending_conf_index` should be less than or equal to // `self.get_store().applied_index()` is passed. - if self.get_store().applied_index_term() != self.term() { + if self.get_store().applied_term() != self.term() { return Err(box_err!( "{} peer has not applied to current term, applied_term {}, current_term {}", self.tag, - self.get_store().applied_index_term(), + self.get_store().applied_term(), self.term() )); } @@ -4908,7 +4908,7 @@ where let res = self.raft_group.raft.check_group_commit_consistent(); if Some(true) != res { let mut buffer: SmallVec<[(u64, u64, u64); 5]> = SmallVec::new(); - if self.get_store().applied_index_term() >= self.term() { + if self.get_store().applied_term() >= self.term() { let progress = self.raft_group.raft.prs(); for (id, p) in progress.iter() { if !progress.conf().voters().contains(*id) { @@ -5347,7 +5347,7 @@ where ER: RaftEngine, { fn has_applied_to_current_term(&mut self) -> bool { - self.get_store().applied_index_term() == self.term() + self.get_store().applied_term() == self.term() } fn inspect_lease(&mut self) -> LeaseState { diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index ec6cc3bcf11..8301c75e7c3 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -2,23 +2,18 @@ // #[PerformanceCriticalPath] use std::{ - cell::{Cell, RefCell}, - cmp, - collections::VecDeque, - error, mem, - ops::Range, + cell::RefCell, + error, + ops::{Deref, DerefMut}, sync::{ atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, mpsc::{self, Receiver, TryRecvError}, - Arc, Mutex, + Arc, }, u64, }; -use collections::HashMap; -use engine_traits::{ - Engines, KvEngine, Mutable, Peekable, RaftEngine, RaftLogBatch, CF_RAFT, RAFT_LOG_MULTI_GET_CNT, -}; +use engine_traits::{Engines, KvEngine, Mutable, Peekable, RaftEngine, RaftLogBatch, CF_RAFT}; use fail::fail_point; use into_other::into_other; use keys::{self, enc_end_key, enc_start_key}; @@ -32,20 +27,20 @@ use protobuf::Message; use raft::{ self, eraftpb::{self, ConfState, Entry, HardState, Snapshot}, - util::limit_size, Error as RaftError, GetEntriesContext, RaftState, Ready, Storage, StorageError, }; -use tikv_alloc::trace::TraceEvent; use tikv_util::{ box_err, box_try, debug, defer, error, info, time::Instant, warn, worker::Scheduler, }; -use super::{metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager, SnapshotStatistics}; +use super::{ + entry_storage::last_index, metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager, + SnapshotStatistics, +}; use crate::{ - bytes_capacity, store::{ - async_io::write::WriteTask, fsm::GenSnapTask, memory::*, peer::PersistSnapshotResult, util, - worker::RaftlogFetchTask, + async_io::write::WriteTask, entry_storage::EntryStorage, fsm::GenSnapTask, + peer::PersistSnapshotResult, util, worker::RaftlogFetchTask, }, Error, Result, }; @@ -55,17 +50,12 @@ use crate::{ pub const RAFT_INIT_LOG_TERM: u64 = 5; pub const RAFT_INIT_LOG_INDEX: u64 = 5; const MAX_SNAP_TRY_CNT: usize = 5; -const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; - -pub const MAX_INIT_ENTRY_COUNT: usize = 1024; /// The initial region epoch version. pub const INIT_EPOCH_VER: u64 = 1; /// The initial region epoch conf_version. pub const INIT_EPOCH_CONF_VER: u64 = 1; -const SHRINK_CACHE_CAPACITY: usize = 64; - pub const JOB_STATUS_PENDING: usize = 0; pub const JOB_STATUS_RUNNING: usize = 1; pub const JOB_STATUS_CANCELLING: usize = 2; @@ -73,8 +63,6 @@ pub const JOB_STATUS_CANCELLED: usize = 3; pub const JOB_STATUS_FINISHED: usize = 4; pub const JOB_STATUS_FAILED: usize = 5; -const ENTRY_MEM_SIZE: usize = mem::size_of::(); - /// Possible status returned by `check_applying_snap`. #[derive(Debug, Clone, Copy, PartialEq)] pub enum CheckApplyingSnapStatus { @@ -112,312 +100,7 @@ impl PartialEq for SnapState { } } -#[inline] -pub fn first_index(state: &RaftApplyState) -> u64 { - state.get_truncated_state().get_index() + 1 -} - -#[inline] -pub fn last_index(state: &RaftLocalState) -> u64 { - state.get_last_index() -} - -struct EntryCache { - // The last index of persisted entry. - // It should be equal to `RaftLog::persisted`. - persisted: u64, - cache: VecDeque, - trace: VecDeque, - hit: Cell, - miss: Cell, - #[cfg(test)] - size_change_cb: Option>, -} - -impl EntryCache { - fn first_index(&self) -> Option { - self.cache.front().map(|e| e.get_index()) - } - - fn fetch_entries_to( - &self, - begin: u64, - end: u64, - mut fetched_size: u64, - max_size: u64, - ents: &mut Vec, - ) { - if begin >= end { - return; - } - assert!(!self.cache.is_empty()); - let cache_low = self.cache.front().unwrap().get_index(); - let start_idx = begin.checked_sub(cache_low).unwrap() as usize; - let limit_idx = end.checked_sub(cache_low).unwrap() as usize; - - let mut end_idx = start_idx; - self.cache - .iter() - .skip(start_idx) - .take_while(|e| { - let cur_idx = end_idx as u64 + cache_low; - assert_eq!(e.get_index(), cur_idx); - let m = u64::from(e.compute_size()); - fetched_size += m; - if fetched_size == m { - end_idx += 1; - fetched_size <= max_size && end_idx < limit_idx - } else if fetched_size <= max_size { - end_idx += 1; - end_idx < limit_idx - } else { - false - } - }) - .count(); - // Cache either is empty or contains latest log. Hence we don't need to fetch log - // from rocksdb anymore. - assert!(end_idx == limit_idx || fetched_size > max_size); - let (first, second) = tikv_util::slices_in_range(&self.cache, start_idx, end_idx); - ents.extend_from_slice(first); - ents.extend_from_slice(second); - } - - fn append(&mut self, tag: &str, entries: &[Entry]) { - if !entries.is_empty() { - let mut mem_size_change = 0; - let old_capacity = self.cache.capacity(); - mem_size_change += self.append_impl(tag, entries); - let new_capacity = self.cache.capacity(); - mem_size_change += Self::get_cache_vec_mem_size_change(new_capacity, old_capacity); - mem_size_change += self.shrink_if_necessary(); - self.flush_mem_size_change(mem_size_change); - } - } - - fn append_impl(&mut self, tag: &str, entries: &[Entry]) -> i64 { - let mut mem_size_change = 0; - - if let Some(cache_last_index) = self.cache.back().map(|e| e.get_index()) { - let first_index = entries[0].get_index(); - if cache_last_index >= first_index { - let cache_len = self.cache.len(); - let truncate_to = cache_len - .checked_sub((cache_last_index - first_index + 1) as usize) - .unwrap_or_default(); - let trunc_to_idx = self.cache[truncate_to].index; - for e in self.cache.drain(truncate_to..) { - mem_size_change -= - (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64; - } - if let Some(cached) = self.trace.back() { - // Only committed entries can be traced, and only uncommitted entries - // can be truncated. So there won't be any overlaps. - let cached_last = cached.range.end - 1; - assert!(cached_last < trunc_to_idx); - } - } else if cache_last_index + 1 < first_index { - panic!( - "{} unexpected hole: {} < {}", - tag, cache_last_index, first_index - ); - } - } - - for e in entries { - self.cache.push_back(e.to_owned()); - mem_size_change += (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64; - } - // In the past, the entry cache will be truncated if its size exceeds a certain number. - // However, after introducing async write io, the entry must stay in cache if it's not - // persisted to raft db because the raft-rs may need to read entries.(e.g. leader sends - // MsgAppend to followers) - - mem_size_change - } - - pub fn entry(&self, idx: u64) -> Option<&Entry> { - let cache_low = self.cache.front()?.get_index(); - if idx >= cache_low { - Some(&self.cache[(idx - cache_low) as usize]) - } else { - None - } - } - - /// Compact all entries whose indexes are less than `idx`. - pub fn compact_to(&mut self, mut idx: u64) -> u64 { - if idx > self.persisted + 1 { - // Only the persisted entries can be compacted - idx = self.persisted + 1; - } - - let mut mem_size_change = 0; - - // Clean cached entries which have been already sent to apply threads. For example, - // if entries [1, 10), [10, 20), [20, 30) are sent to apply threads and `compact_to(15)` - // is called, only [20, 30) will still be kept in cache. - let old_trace_cap = self.trace.capacity(); - while let Some(cached_entries) = self.trace.pop_front() { - if cached_entries.range.start >= idx { - self.trace.push_front(cached_entries); - let trace_len = self.trace.len(); - let trace_cap = self.trace.capacity(); - if trace_len < SHRINK_CACHE_CAPACITY && trace_cap > SHRINK_CACHE_CAPACITY { - self.trace.shrink_to(SHRINK_CACHE_CAPACITY); - } - break; - } - let (_, dangle_size) = cached_entries.take_entries(); - mem_size_change -= dangle_size as i64; - idx = cmp::max(cached_entries.range.end, idx); - } - let new_trace_cap = self.trace.capacity(); - mem_size_change += Self::get_trace_vec_mem_size_change(new_trace_cap, old_trace_cap); - - let cache_first_idx = self.first_index().unwrap_or(u64::MAX); - if cache_first_idx >= idx { - self.flush_mem_size_change(mem_size_change); - assert!(mem_size_change <= 0); - return -mem_size_change as u64; - } - - let cache_last_idx = self.cache.back().unwrap().get_index(); - // Use `cache_last_idx + 1` to make sure cache can be cleared completely if necessary. - let compact_to = (cmp::min(cache_last_idx + 1, idx) - cache_first_idx) as usize; - for e in self.cache.drain(..compact_to) { - mem_size_change -= (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64 - } - - mem_size_change += self.shrink_if_necessary(); - self.flush_mem_size_change(mem_size_change); - assert!(mem_size_change <= 0); - -mem_size_change as u64 - } - - fn get_total_mem_size(&self) -> i64 { - let data_size: i64 = self - .cache - .iter() - .map(|e| (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64) - .sum(); - let cache_vec_size = Self::get_cache_vec_mem_size_change(self.cache.capacity(), 0); - let trace_vec_size = Self::get_trace_vec_mem_size_change(self.trace.capacity(), 0); - data_size + cache_vec_size + trace_vec_size - } - - fn get_cache_vec_mem_size_change(new_capacity: usize, old_capacity: usize) -> i64 { - ENTRY_MEM_SIZE as i64 * (new_capacity as i64 - old_capacity as i64) - } - - fn get_trace_vec_mem_size_change(new_capacity: usize, old_capacity: usize) -> i64 { - mem::size_of::() as i64 * (new_capacity as i64 - old_capacity as i64) - } - - fn flush_mem_size_change(&self, mem_size_change: i64) { - #[cfg(test)] - if let Some(size_change_cb) = self.size_change_cb.as_ref() { - size_change_cb(mem_size_change); - } - let event = if mem_size_change > 0 { - TraceEvent::Add(mem_size_change as usize) - } else { - TraceEvent::Sub(-mem_size_change as usize) - }; - MEMTRACE_ENTRY_CACHE.trace(event); - RAFT_ENTRIES_CACHES_GAUGE.add(mem_size_change); - } - - fn flush_stats(&self) { - let hit = self.hit.replace(0); - RAFT_ENTRY_FETCHES.hit.inc_by(hit); - let miss = self.miss.replace(0); - RAFT_ENTRY_FETCHES.miss.inc_by(miss); - } - - #[inline] - fn is_empty(&self) -> bool { - self.cache.is_empty() - } - - fn trace_cached_entries(&mut self, entries: CachedEntries) { - let dangle_size = { - let mut guard = entries.entries.lock().unwrap(); - - let last_idx = guard.0.last().map(|e| e.index).unwrap(); - let cache_front = match self.cache.front().map(|e| e.index) { - Some(i) => i, - None => u64::MAX, - }; - - let dangle_range = if last_idx < cache_front { - // All entries are not in entry cache. - 0..guard.0.len() - } else if let Ok(i) = guard.0.binary_search_by(|e| e.index.cmp(&cache_front)) { - // Some entries are in entry cache. - 0..i - } else { - // All entries are in entry cache. - 0..0 - }; - - let mut size = 0; - for e in &guard.0[dangle_range] { - size += bytes_capacity(&e.data) + bytes_capacity(&e.context); - } - guard.1 = size; - size - }; - - let old_capacity = self.trace.capacity(); - self.trace.push_back(entries); - let new_capacity = self.trace.capacity(); - let diff = Self::get_trace_vec_mem_size_change(new_capacity, old_capacity); - - self.flush_mem_size_change(diff + dangle_size as i64); - } - - fn shrink_if_necessary(&mut self) -> i64 { - if self.cache.len() < SHRINK_CACHE_CAPACITY && self.cache.capacity() > SHRINK_CACHE_CAPACITY - { - let old_capacity = self.cache.capacity(); - self.cache.shrink_to_fit(); - let new_capacity = self.cache.capacity(); - return Self::get_cache_vec_mem_size_change(new_capacity, old_capacity); - } - 0 - } - - fn update_persisted(&mut self, persisted: u64) { - self.persisted = persisted; - } -} - -impl Default for EntryCache { - fn default() -> Self { - let entry_cache = EntryCache { - persisted: 0, - cache: Default::default(), - trace: Default::default(), - hit: Cell::new(0), - miss: Cell::new(0), - #[cfg(test)] - size_change_cb: None, - }; - entry_cache.flush_mem_size_change(entry_cache.get_total_mem_size()); - entry_cache - } -} - -impl Drop for EntryCache { - fn drop(&mut self) { - let mem_size_change = self.get_total_mem_size(); - self.flush_mem_size_change(-mem_size_change); - self.flush_stats(); - } -} - -fn storage_error(error: E) -> raft::Error +pub fn storage_error(error: E) -> raft::Error where E: Into>, { @@ -480,7 +163,7 @@ pub fn recover_from_applying_state( Ok(()) } -fn init_applied_index_term( +fn init_applied_term( engines: &Engines, region: &Region, apply_state: &RaftApplyState, @@ -642,72 +325,30 @@ where peer_id: u64, region: metapb::Region, - raft_state: RaftLocalState, - apply_state: RaftApplyState, - applied_index_term: u64, - last_term: u64, snap_state: RefCell, gen_snap_task: RefCell>, region_scheduler: Scheduler>, snap_tried_cnt: RefCell, - cache: EntryCache, - - raftlog_fetch_scheduler: Scheduler, - raftlog_fetch_stats: AsyncFetchStats, - async_fetch_results: RefCell>, + entry_storage: EntryStorage, pub tag: String, } -#[derive(Debug, PartialEq)] -pub enum RaftlogFetchState { - Fetching, - Fetched(Box), -} - -#[derive(Debug, PartialEq)] -pub struct RaftlogFetchResult { - pub ents: raft::Result>, - // because entries may be empty, so store the original low index that the task issued - pub low: u64, - // the original max size that the task issued - pub max_size: u64, - // if the ents hit max_size - pub hit_size_limit: bool, - // the times that async fetch have already tried - pub tried_cnt: usize, - // the term when the task issued - pub term: u64, -} +impl Deref for PeerStorage { + type Target = EntryStorage; -#[derive(Default)] -struct AsyncFetchStats { - async_fetch: Cell, - sync_fetch: Cell, - fallback_fetch: Cell, - fetch_invalid: Cell, - fetch_unused: Cell, + #[inline] + fn deref(&self) -> &Self::Target { + &self.entry_storage + } } -impl AsyncFetchStats { - fn flush_stats(&mut self) { - RAFT_ENTRY_FETCHES - .async_fetch - .inc_by(self.async_fetch.replace(0)); - RAFT_ENTRY_FETCHES - .sync_fetch - .inc_by(self.sync_fetch.replace(0)); - RAFT_ENTRY_FETCHES - .fallback_fetch - .inc_by(self.fallback_fetch.replace(0)); - RAFT_ENTRY_FETCHES - .fetch_invalid - .inc_by(self.fetch_invalid.replace(0)); - RAFT_ENTRY_FETCHES - .fetch_unused - .inc_by(self.fetch_unused.replace(0)); +impl DerefMut for PeerStorage { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.entry_storage } } @@ -728,19 +369,20 @@ where context: GetEntriesContext, ) -> raft::Result> { let max_size = max_size.into(); - self.entries(low, high, max_size.unwrap_or(u64::MAX), context) + self.entry_storage + .entries(low, high, max_size.unwrap_or(u64::MAX), context) } fn term(&self, idx: u64) -> raft::Result { - self.term(idx) + self.entry_storage.term(idx) } fn first_index(&self) -> raft::Result { - Ok(self.first_index()) + Ok(self.entry_storage.first_index()) } fn last_index(&self) -> raft::Result { - Ok(self.last_index()) + Ok(self.entry_storage.last_index()) } fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { @@ -773,25 +415,28 @@ where return Err(box_err!("{} validate state fail: {:?}", tag, e)); } let last_term = init_last_term(&engines, region, &raft_state, &apply_state)?; - let applied_index_term = init_applied_index_term(&engines, region, &apply_state)?; + let applied_term = init_applied_term(&engines, region, &apply_state)?; + let entry_storage = EntryStorage::new( + region.id, + peer_id, + engines.raft.clone(), + raft_state, + apply_state, + last_term, + applied_term, + raftlog_fetch_scheduler, + ); Ok(PeerStorage { engines, peer_id, region: region.clone(), - raft_state, - apply_state, snap_state: RefCell::new(SnapState::Relax), gen_snap_task: RefCell::new(None), region_scheduler, - raftlog_fetch_scheduler, snap_tried_cnt: RefCell::new(0), tag, - applied_index_term, - last_term, - cache: EntryCache::default(), - async_fetch_results: RefCell::new(HashMap::default()), - raftlog_fetch_stats: AsyncFetchStats::default(), + entry_storage, }) } @@ -800,14 +445,14 @@ where } pub fn initial_state(&self) -> raft::Result { - let hard_state = self.raft_state.get_hard_state().clone(); + let hard_state = self.raft_state().get_hard_state().clone(); if hard_state == HardState::default() { assert!( !self.is_initialized(), "peer for region {:?} is initialized but local state {:?} has empty hard \ state", self.region, - self.raft_state + self.raft_state() ); return Ok(RaftState::new(hard_state, ConfState::default())); @@ -818,348 +463,6 @@ where )) } - fn check_range(&self, low: u64, high: u64) -> raft::Result<()> { - if low > high { - return Err(storage_error(format!( - "low: {} is greater that high: {}", - low, high - ))); - } else if low <= self.truncated_index() { - return Err(RaftError::Store(StorageError::Compacted)); - } else if high > self.last_index() + 1 { - return Err(storage_error(format!( - "entries' high {} is out of bound lastindex {}", - high, - self.last_index() - ))); - } - Ok(()) - } - - pub fn clean_async_fetch_res(&mut self, low: u64) { - self.async_fetch_results.borrow_mut().remove(&low); - } - - // Update the async fetch result. - // None indicates cleanning the fetched result. - pub fn update_async_fetch_res(&mut self, low: u64, res: Option>) { - // If it's in fetching, don't clean the async fetch result. - if self.async_fetch_results.borrow().get(&low) == Some(&RaftlogFetchState::Fetching) - && res.is_none() - { - return; - } - - match res { - Some(res) => { - if let Some(RaftlogFetchState::Fetched(prev)) = self - .async_fetch_results - .borrow_mut() - .insert(low, RaftlogFetchState::Fetched(res)) - { - info!( - "unconsumed async fetch res"; - "region_id" => self.region.get_id(), - "peer_id" => self.peer_id, - "res" => ?prev, - "low" => low, - ); - } - } - None => { - let prev = self.async_fetch_results.borrow_mut().remove(&low); - if prev.is_some() { - self.raftlog_fetch_stats.fetch_unused.update(|m| m + 1); - } - } - } - } - - fn async_fetch( - &self, - region_id: u64, - low: u64, - high: u64, - max_size: u64, - context: GetEntriesContext, - buf: &mut Vec, - ) -> raft::Result { - if let Some(RaftlogFetchState::Fetching) = self.async_fetch_results.borrow().get(&low) { - // already an async fetch in flight - return Err(raft::Error::Store( - raft::StorageError::LogTemporarilyUnavailable, - )); - } - - let tried_cnt = if let Some(RaftlogFetchState::Fetched(res)) = - self.async_fetch_results.borrow_mut().remove(&low) - { - assert_eq!(res.low, low); - let mut ents = res.ents?; - let first = ents.first().map(|e| e.index).unwrap(); - assert_eq!(first, res.low); - let last = ents.last().map(|e| e.index).unwrap(); - - if last + 1 >= high { - // async fetch res covers [low, high) - ents.truncate((high - first) as usize); - assert_eq!(ents.last().map(|e| e.index).unwrap(), high - 1); - if max_size < res.max_size { - limit_size(&mut ents, Some(max_size)); - } - let count = ents.len(); - buf.append(&mut ents); - fail_point!("on_async_fetch_return"); - return Ok(count); - } else if res.hit_size_limit && max_size <= res.max_size { - // async fetch res doesn't cover [low, high) due to hit size limit - if max_size < res.max_size { - limit_size(&mut ents, Some(max_size)); - }; - let count = ents.len(); - buf.append(&mut ents); - return Ok(count); - } else if last + RAFT_LOG_MULTI_GET_CNT > high - 1 - && res.tried_cnt + 1 == MAX_ASYNC_FETCH_TRY_CNT - { - let mut fetched_size = ents.iter().fold(0, |acc, e| acc + e.compute_size() as u64); - if max_size <= fetched_size { - limit_size(&mut ents, Some(max_size)); - let count = ents.len(); - buf.append(&mut ents); - return Ok(count); - } - - // the count of left entries isn't too large, fetch the remaining entries synchronously one by one - for idx in last + 1..high { - let ent = self.engines.raft.get_entry(region_id, idx)?; - match ent { - None => { - return Err(raft::Error::Store(raft::StorageError::Unavailable)); - } - Some(ent) => { - let size = ent.compute_size() as u64; - if fetched_size + size > max_size { - break; - } else { - fetched_size += size; - ents.push(ent); - } - } - } - } - let count = ents.len(); - buf.append(&mut ents); - return Ok(count); - } - info!( - "async fetch invalid"; - "region_id" => self.region.get_id(), - "peer_id" => self.peer_id, - "first" => first, - "last" => last, - "low" => low, - "high" => high, - "max_size" => max_size, - "res_max_size" => res.max_size, - ); - // low index or max size is changed, the result is not fit for the current range, so refetch again. - self.raftlog_fetch_stats.fetch_invalid.update(|m| m + 1); - res.tried_cnt + 1 - } else { - 1 - }; - - // the first/second try: get [low, high) asynchronously - // the third try: - // - if term and low are matched: use result of [low, persisted) and get [persisted, high) synchronously - // - else: get [low, high) synchronously - if tried_cnt >= MAX_ASYNC_FETCH_TRY_CNT { - // even the larger range is invalid again, fallback to fetch in sync way - self.raftlog_fetch_stats.fallback_fetch.update(|m| m + 1); - let count = self.engines.raft.fetch_entries_to( - region_id, - low, - high, - Some(max_size as usize), - buf, - )?; - return Ok(count); - } - - self.raftlog_fetch_stats.async_fetch.update(|m| m + 1); - self.async_fetch_results - .borrow_mut() - .insert(low, RaftlogFetchState::Fetching); - self.raftlog_fetch_scheduler - .schedule(RaftlogFetchTask::PeerStorage { - region_id, - context, - low, - high, - max_size: (max_size as usize), - tried_cnt, - term: self.hard_state().get_term(), - }) - .unwrap(); - Err(raft::Error::Store( - raft::StorageError::LogTemporarilyUnavailable, - )) - } - - pub fn entries( - &self, - low: u64, - high: u64, - max_size: u64, - context: GetEntriesContext, - ) -> raft::Result> { - self.check_range(low, high)?; - let mut ents = - Vec::with_capacity(std::cmp::min((high - low) as usize, MAX_INIT_ENTRY_COUNT)); - if low == high { - return Ok(ents); - } - let region_id = self.get_region_id(); - let cache_low = self.cache.first_index().unwrap_or(u64::MAX); - if high <= cache_low { - self.cache.miss.update(|m| m + 1); - return if context.can_async() { - self.async_fetch(region_id, low, high, max_size, context, &mut ents)?; - Ok(ents) - } else { - self.raftlog_fetch_stats.sync_fetch.update(|m| m + 1); - self.engines.raft.fetch_entries_to( - region_id, - low, - high, - Some(max_size as usize), - &mut ents, - )?; - Ok(ents) - }; - } - let begin_idx = if low < cache_low { - self.cache.miss.update(|m| m + 1); - let fetched_count = if context.can_async() { - self.async_fetch(region_id, low, cache_low, max_size, context, &mut ents)? - } else { - self.raftlog_fetch_stats.sync_fetch.update(|m| m + 1); - self.engines.raft.fetch_entries_to( - region_id, - low, - cache_low, - Some(max_size as usize), - &mut ents, - )? - }; - if fetched_count < (cache_low - low) as usize { - // Less entries are fetched than expected. - return Ok(ents); - } - cache_low - } else { - low - }; - self.cache.hit.update(|h| h + 1); - let fetched_size = ents.iter().fold(0, |acc, e| acc + e.compute_size()); - self.cache - .fetch_entries_to(begin_idx, high, fetched_size as u64, max_size, &mut ents); - Ok(ents) - } - - pub fn term(&self, idx: u64) -> raft::Result { - if idx == self.truncated_index() { - return Ok(self.truncated_term()); - } - self.check_range(idx, idx + 1)?; - if self.truncated_term() == self.last_term || idx == self.last_index() { - return Ok(self.last_term); - } - if let Some(e) = self.cache.entry(idx) { - Ok(e.get_term()) - } else { - Ok(self - .engines - .raft - .get_entry(self.get_region_id(), idx) - .unwrap() - .unwrap() - .get_term()) - } - } - - #[inline] - pub fn first_index(&self) -> u64 { - first_index(&self.apply_state) - } - - #[inline] - pub fn last_index(&self) -> u64 { - last_index(&self.raft_state) - } - - #[inline] - pub fn last_term(&self) -> u64 { - self.last_term - } - - #[inline] - pub fn raft_state(&self) -> &RaftLocalState { - &self.raft_state - } - - #[inline] - pub fn applied_index(&self) -> u64 { - self.apply_state.get_applied_index() - } - - #[inline] - pub fn set_applied_state(&mut self, apply_state: RaftApplyState) { - self.apply_state = apply_state; - } - - #[inline] - pub fn set_applied_term(&mut self, applied_index_term: u64) { - self.applied_index_term = applied_index_term; - } - - #[inline] - pub fn apply_state(&self) -> &RaftApplyState { - &self.apply_state - } - - #[inline] - pub fn applied_index_term(&self) -> u64 { - self.applied_index_term - } - - #[inline] - pub fn commit_index(&self) -> u64 { - self.raft_state.get_hard_state().get_commit() - } - - #[inline] - pub fn set_commit_index(&mut self, commit: u64) { - assert!(commit >= self.commit_index()); - self.raft_state.mut_hard_state().set_commit(commit); - } - - #[inline] - pub fn hard_state(&self) -> &HardState { - self.raft_state.get_hard_state() - } - - #[inline] - pub fn truncated_index(&self) -> u64 { - self.apply_state.get_truncated_state().get_index() - } - - #[inline] - pub fn truncated_term(&self) -> u64 { - self.apply_state.get_truncated_state().get_term() - } - #[inline] pub fn region(&self) -> &metapb::Region { &self.region @@ -1181,7 +484,7 @@ where snapshot_index: u64, kv_wb: &mut impl Mutable, ) -> Result<()> { - let mut snapshot_raft_state = self.raft_state.clone(); + let mut snapshot_raft_state = self.raft_state().clone(); snapshot_raft_state .mut_hard_state() .set_commit(snapshot_index); @@ -1200,7 +503,7 @@ where kv_wb.put_msg_cf( CF_RAFT, &keys::apply_state_key(self.region.get_id()), - &self.apply_state, + self.apply_state(), )?; Ok(()) } @@ -1354,68 +657,11 @@ where self.gen_snap_task.get_mut().take() } - // Append the given entries to the raft log using previous last index or self.last_index. - pub fn append(&mut self, entries: Vec, task: &mut WriteTask) { - if entries.is_empty() { - return; - } - let region_id = self.get_region_id(); - debug!( - "append entries"; - "region_id" => region_id, - "peer_id" => self.peer_id, - "count" => entries.len(), - ); - let prev_last_index = self.raft_state.get_last_index(); - - let (last_index, last_term) = { - let e = entries.last().unwrap(); - (e.get_index(), e.get_term()) - }; - - self.cache.append(&self.tag, &entries); - - task.entries = entries; - // Delete any previously appended log entries which never committed. - task.cut_logs = Some((last_index + 1, prev_last_index + 1)); - - self.raft_state.set_last_index(last_index); - self.last_term = last_term; - } - pub fn on_compact_raftlog(&mut self, idx: u64) { - self.compact_entry_cache(idx); + self.entry_storage.compact_entry_cache(idx); self.cancel_generating_snap(Some(idx)); } - pub fn compact_entry_cache(&mut self, idx: u64) { - self.cache.compact_to(idx); - } - - #[inline] - pub fn is_entry_cache_empty(&self) -> bool { - self.cache.is_empty() - } - - /// Evict entries from the cache. - pub fn evict_entry_cache(&mut self, half: bool) { - if !self.is_entry_cache_empty() { - let cache = &mut self.cache; - let cache_len = cache.cache.len(); - let drain_to = if half { cache_len / 2 } else { cache_len - 1 }; - let idx = cache.cache[drain_to].index; - let mem_size_change = cache.compact_to(idx + 1); - RAFT_ENTRIES_EVICT_BYTES.inc_by(mem_size_change); - } - } - - #[inline] - pub fn flush_entry_cache_metrics(&mut self) { - // NOTE: memory usage of entry cache is flushed realtime. - self.cache.flush_stats(); - self.raftlog_fetch_stats.flush_stats(); - } - // Apply the peer with given snapshot. pub fn apply_snapshot( &mut self, @@ -1454,7 +700,7 @@ where if self.is_initialized() { // we can only delete the old data when the peer is initialized. - let first_index = self.first_index(); + let first_index = self.entry_storage.first_index(); // It's possible that logs between `last_compacted_idx` and `first_index` are // being deleted in raftlog_gc worker. But it's OK as: // 1. If the peer accepts a new snapshot, it must start with an index larger than @@ -1475,15 +721,18 @@ where let last_index = snap.get_metadata().get_index(); - self.raft_state.set_last_index(last_index); - self.last_term = snap.get_metadata().get_term(); - self.apply_state.set_applied_index(last_index); - self.applied_index_term = self.last_term; + self.raft_state_mut().set_last_index(last_index); + self.set_last_term(snap.get_metadata().get_term()); + self.apply_state_mut().set_applied_index(last_index); + let last_term = self.last_term(); + self.set_applied_term(last_term); // The snapshot only contains log which index > applied index, so // here the truncate state's (index, term) is in snapshot metadata. - self.apply_state.mut_truncated_state().set_index(last_index); - self.apply_state + self.apply_state_mut() + .mut_truncated_state() + .set_index(last_index); + self.apply_state_mut() .mut_truncated_state() .set_term(snap.get_metadata().get_term()); @@ -1502,7 +751,7 @@ where "region_id" => self.region.get_id(), "peer_id" => self.peer_id, "region" => ?region, - "state" => ?self.apply_state, + "state" => ?self.apply_state(), ); Ok(region) @@ -1522,9 +771,9 @@ where raft_wb, region_id, first_index, - &self.raft_state, + self.raft_state(), )?; - self.cache = EntryCache::default(); + self.entry_storage.clear(); Ok(()) } @@ -1575,8 +824,8 @@ where Ok(()) } - pub fn get_raft_engine(&self) -> ER { - self.engines.raft.clone() + pub fn raft_engine(&self) -> &ER { + self.entry_storage.raft_engine() } /// Check whether the storage has finished applying snapshot. @@ -1721,14 +970,14 @@ where destroy_regions: Vec, ) -> Result<(HandleReadyResult, WriteTask)> { let region_id = self.get_region_id(); - let prev_raft_state = self.raft_state.clone(); + let prev_raft_state = self.raft_state().clone(); let mut write_task = WriteTask::new(region_id, self.peer_id, ready.number()); let mut res = HandleReadyResult::SendIOTask; if !ready.snapshot().is_empty() { fail_point!("raft_before_apply_snap"); - let last_first_index = self.first_index(); + let last_first_index = self.first_index().unwrap(); let snap_region = self.apply_snapshot(ready.snapshot(), &mut write_task, &destroy_regions)?; @@ -1747,15 +996,15 @@ where // Last index is 0 means the peer is created from raft message // and has not applied snapshot yet, so skip persistent hard state. - if self.raft_state.get_last_index() > 0 { + if self.raft_state().get_last_index() > 0 { if let Some(hs) = ready.hs() { - self.raft_state.set_hard_state(hs.clone()); + self.raft_state_mut().set_hard_state(hs.clone()); } } // Save raft state if it has changed or there is a snapshot. - if prev_raft_state != self.raft_state || !ready.snapshot().is_empty() { - write_task.raft_state = Some(self.raft_state.clone()); + if prev_raft_state != *self.raft_state() || !ready.snapshot().is_empty() { + write_task.raft_state = Some(self.raft_state().clone()); } if !ready.snapshot().is_empty() { @@ -1777,10 +1026,6 @@ where Ok((res, write_task)) } - pub fn update_cache_persisted(&mut self, persisted: u64) { - self.cache.update_persisted(persisted); - } - pub fn persist_snapshot(&mut self, res: &PersistSnapshotResult) { // cleanup data before scheduling apply task if self.is_initialized() { @@ -1821,10 +1066,6 @@ where // See comments in `apply_snapshot` for more details. self.set_region(res.region.clone()); } - - pub fn trace_cached_entries(&mut self, entries: CachedEntries) { - self.cache.trace_cached_entries(entries); - } } /// Delete all meta belong to the region. Results are stored in `wb`. @@ -1865,7 +1106,7 @@ pub fn do_snapshot( engine: &E, kv_snap: E::Snapshot, region_id: u64, - last_applied_index_term: u64, + last_applied_term: u64, last_applied_state: RaftApplyState, for_balance: bool, allow_multi_files_snapshot: bool, @@ -1894,7 +1135,7 @@ where let key = SnapKey::new( region_id, - last_applied_index_term, + last_applied_term, apply_state.get_applied_index(), ); @@ -1999,34 +1240,8 @@ pub fn write_peer_state( Ok(()) } -/// Committed entries sent to apply threads. -#[derive(Clone)] -pub struct CachedEntries { - pub range: Range, - // Entries and dangle size for them. `dangle` means not in entry cache. - entries: Arc, usize)>>, -} - -impl CachedEntries { - pub fn new(entries: Vec) -> Self { - assert!(!entries.is_empty()); - let start = entries.first().map(|x| x.index).unwrap(); - let end = entries.last().map(|x| x.index).unwrap() + 1; - let range = Range { start, end }; - CachedEntries { - entries: Arc::new(Mutex::new((entries, 0))), - range, - } - } - - /// Take cached entries and dangle size for them. `dangle` means not in entry cache. - pub fn take_entries(&self) -> (Vec, usize) { - mem::take(&mut *self.entries.lock().unwrap()) - } -} - #[cfg(test)] -mod tests { +pub mod tests { use std::{ cell::RefCell, path::Path, @@ -2058,27 +1273,13 @@ mod tests { store::{ async_io::write::write_to_db_for_test, bootstrap_store, + entry_storage::tests::validate_cache, fsm::apply::compact_raft_log, initial_region, prepare_bootstrap_cluster, worker::{RaftlogFetchRunner, RegionRunner, RegionTask}, }, }; - impl EntryCache { - fn new_with_cb(cb: impl Fn(i64) + Send + 'static) -> Self { - let entry_cache = EntryCache { - persisted: 0, - cache: Default::default(), - trace: Default::default(), - hit: Cell::new(0), - miss: Cell::new(0), - size_change_cb: Some(Box::new(cb) as Box), - }; - entry_cache.flush_mem_size_change(entry_cache.get_total_mem_size()); - entry_cache - } - } - fn new_storage( region_scheduler: Scheduler>, raftlog_fetch_scheduler: Scheduler, @@ -2113,7 +1314,7 @@ mod tests { .unwrap() } - fn new_storage_from_ents( + pub fn new_storage_from_ents( region_scheduler: Scheduler>, raftlog_fetch_scheduler: Scheduler, path: &TempDir, @@ -2124,15 +1325,15 @@ mod tests { store.append(ents[1..].to_vec(), &mut write_task); store.update_cache_persisted(ents.last().unwrap().get_index()); store - .apply_state + .apply_state_mut() .mut_truncated_state() .set_index(ents[0].get_index()); store - .apply_state + .apply_state_mut() .mut_truncated_state() .set_term(ents[0].get_term()); store - .apply_state + .apply_state_mut() .set_applied_index(ents.last().unwrap().get_index()); if write_task.kv_wb.is_none() { write_task.kv_wb = Some(store.engines.kv.write_batch()); @@ -2140,35 +1341,22 @@ mod tests { store .save_apply_state_to(write_task.kv_wb.as_mut().unwrap()) .unwrap(); - write_task.raft_state = Some(store.raft_state.clone()); + write_task.raft_state = Some(store.raft_state().clone()); write_to_db_for_test(&store.engines, write_task); store } - fn append_ents(store: &mut PeerStorage, ents: &[Entry]) { + pub fn append_ents(store: &mut PeerStorage, ents: &[Entry]) { if ents.is_empty() { return; } let mut write_task = WriteTask::new(store.get_region_id(), store.peer_id, 1); store.append(ents.to_vec(), &mut write_task); - write_task.raft_state = Some(store.raft_state.clone()); + write_task.raft_state = Some(store.raft_state().clone()); write_to_db_for_test(&store.engines, write_task); } - fn validate_cache(store: &PeerStorage, exp_ents: &[Entry]) { - assert_eq!(store.cache.cache, exp_ents); - for e in exp_ents { - let entry = store - .engines - .raft - .get_entry(store.get_region_id(), e.get_index()) - .unwrap() - .unwrap(); - assert_eq!(entry, *e); - } - } - - fn new_entry(index: u64, term: u64) -> Entry { + pub fn new_entry(index: u64, term: u64) -> Entry { let mut e = Entry::default(); e.set_index(index); e.set_term(term); @@ -2442,257 +1630,6 @@ mod tests { assert_ne!(count, 0); } - #[test] - fn test_async_fetch() { - let ents = vec![ - new_entry(2, 2), - new_entry(3, 3), - new_entry(4, 4), - new_entry(5, 5), - new_entry(6, 6), - ]; - - let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); - let region_worker = Worker::new("snap-manager").lazy_build("snap-manager"); - let region_scheduler = region_worker.scheduler(); - let (dummy_scheduler, _rx) = dummy_scheduler(); - let mut store = new_storage_from_ents(region_scheduler, dummy_scheduler, &td, &ents); - - let max_u64 = u64::max_value(); - let mut tests = vec![ - // already compacted - ( - 3, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Err(RaftError::Store(StorageError::Compacted)), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Err(RaftError::Store(StorageError::Compacted)), - vec![], - ), - // fetch partial entries due to max size limit - ( - 3, - 7, - 30, - 1, - RaftlogFetchResult { - ents: Ok(ents[1..4].to_vec()), - low: 3, - max_size: 30, - hit_size_limit: true, - tried_cnt: 1, - term: 1, - }, - Ok(3), - ents[1..4].to_vec(), - ), - // fetch all entries - ( - 2, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Ok(ents.clone()), - low: 2, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Ok(5), - ents.clone(), - ), - // high is smaller than before - ( - 3, - 5, - max_u64, - 1, - RaftlogFetchResult { - ents: Ok(ents[1..].to_vec()), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Ok(2), - ents[1..3].to_vec(), - ), - // high is larger than before, second try - ( - 3, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Ok(ents[1..4].to_vec()), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Err(RaftError::Store(StorageError::LogTemporarilyUnavailable)), - vec![], - ), - // high is larger than before, thrid try - ( - 3, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Ok(ents[1..4].to_vec()), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 2, - term: 1, - }, - Ok(4), - ents[1..].to_vec(), - ), - // max size is smaller than before - ( - 2, - 7, - 10, - 1, - RaftlogFetchResult { - ents: Ok(ents.clone()), - low: 2, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Ok(2), - ents[..2].to_vec(), - ), - // max size is larger than before but with lower high - ( - 2, - 5, - 40, - 1, - RaftlogFetchResult { - ents: Ok(ents.clone()), - low: 2, - max_size: 30, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Ok(3), - ents[..3].to_vec(), - ), - // low index is smaller than before - ( - 2, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Err(RaftError::Store(StorageError::Compacted)), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Err(RaftError::Store(StorageError::LogTemporarilyUnavailable)), - vec![], - ), - // low index is larger than before - ( - 4, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Ok(vec![]), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: 1, - term: 1, - }, - Err(RaftError::Store(StorageError::LogTemporarilyUnavailable)), - vec![], - ), - // hit tried several lmit - ( - 3, - 7, - max_u64, - 1, - RaftlogFetchResult { - ents: Ok(ents[1..4].to_vec()), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: MAX_ASYNC_FETCH_TRY_CNT, - term: 1, - }, - Ok(4), - ents[1..5].to_vec(), - ), - // term is changed - ( - 3, - 7, - max_u64, - 2, - RaftlogFetchResult { - ents: Ok(ents[1..4].to_vec()), - low: 3, - max_size: max_u64, - hit_size_limit: false, - tried_cnt: MAX_ASYNC_FETCH_TRY_CNT, - term: 1, - }, - Ok(4), - ents[1..5].to_vec(), - ), - ]; - - for (i, (lo, hi, maxsize, term, async_res, expected_res, expected_ents)) in - tests.drain(..).enumerate() - { - if async_res.low != lo { - store.clean_async_fetch_res(lo); - } else { - store.update_async_fetch_res(lo, Some(Box::new(async_res))); - } - let mut ents = vec![]; - store.raft_state.mut_hard_state().set_term(term); - let res = store.async_fetch( - store.get_region_id(), - lo, - hi, - maxsize, - GetEntriesContext::empty(true), - &mut ents, - ); - if res != expected_res { - panic!("#{}: expect result {:?}, got {:?}", i, expected_res, res); - } - if ents != expected_ents { - panic!("#{}: expect ents {:?}, got {:?}", i, expected_ents, ents); - } - } - } - // last_index and first_index are not mutated by PeerStorage on its own, // so we don't test them here. @@ -2711,10 +1648,9 @@ mod tests { let sched = worker.scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); - let res = store - .term(idx) - .map_err(From::from) - .and_then(|term| compact_raft_log(&store.tag, &mut store.apply_state, idx, term)); + let res = store.term(idx).map_err(From::from).and_then(|term| { + compact_raft_log(&store.tag, store.entry_storage.apply_state_mut(), idx, term) + }); // TODO check exact error type after refactoring error. if res.is_err() ^ werr.is_err() { panic!("#{}: want {:?}, got {:?}", i, werr, res); @@ -2834,10 +1770,10 @@ mod tests { let mut hs = HardState::default(); hs.set_commit(7); hs.set_term(5); - s.raft_state.set_hard_state(hs); - s.raft_state.set_last_index(7); - s.apply_state.set_applied_index(7); - write_task.raft_state = Some(s.raft_state.clone()); + s.raft_state_mut().set_hard_state(hs); + s.raft_state_mut().set_last_index(7); + s.apply_state_mut().set_applied_index(7); + write_task.raft_state = Some(s.raft_state().clone()); if write_task.kv_wb.is_none() { write_task.kv_wb = Some(s.engines.kv.write_batch()); } @@ -2845,7 +1781,7 @@ mod tests { .unwrap(); write_to_db_for_test(&s.engines, write_task); let term = s.term(7).unwrap(); - compact_raft_log(&s.tag, &mut s.apply_state, 7, term).unwrap(); + compact_raft_log(&s.tag, s.entry_storage.apply_state_mut(), 7, term).unwrap(); let mut kv_wb = s.engines.kv.write_batch(); s.save_apply_state_to(&mut kv_wb).unwrap(); kv_wb.write().unwrap(); @@ -2970,246 +1906,6 @@ mod tests { test_storage_create_snapshot_for_role("tikv", 5); } - #[test] - fn test_storage_append() { - let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; - let mut tests = vec![ - ( - vec![new_entry(4, 6), new_entry(5, 6)], - vec![new_entry(4, 6), new_entry(5, 6)], - ), - ( - vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 5)], - vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 5)], - ), - // truncate the existing entries and append - (vec![new_entry(4, 5)], vec![new_entry(4, 5)]), - // direct append - ( - vec![new_entry(6, 5)], - vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 5)], - ), - ]; - for (i, (entries, wentries)) in tests.drain(..).enumerate() { - let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); - let worker = LazyWorker::new("snap-manager"); - let sched = worker.scheduler(); - let (dummy_scheduler, _) = dummy_scheduler(); - let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); - append_ents(&mut store, &entries); - let li = store.last_index(); - let actual_entries = store - .entries(4, li + 1, u64::max_value(), GetEntriesContext::empty(false)) - .unwrap(); - if actual_entries != wentries { - panic!("#{}: want {:?}, got {:?}", i, wentries, actual_entries); - } - } - } - - #[test] - fn test_storage_cache_fetch() { - let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; - let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); - let worker = LazyWorker::new("snap-manager"); - let sched = worker.scheduler(); - let (dummy_scheduler, _) = dummy_scheduler(); - let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); - store.cache.cache.clear(); - // empty cache should fetch data from rocksdb directly. - let mut res = store - .entries(4, 6, u64::max_value(), GetEntriesContext::empty(false)) - .unwrap(); - assert_eq!(*res, ents[1..]); - - let entries = vec![new_entry(6, 5), new_entry(7, 5)]; - append_ents(&mut store, &entries); - validate_cache(&store, &entries); - - // direct cache access - res = store - .entries(6, 8, u64::max_value(), GetEntriesContext::empty(false)) - .unwrap(); - assert_eq!(res, entries); - - // size limit should be supported correctly. - res = store - .entries(4, 8, 0, GetEntriesContext::empty(false)) - .unwrap(); - assert_eq!(res, vec![new_entry(4, 4)]); - let mut size = ents[1..].iter().map(|e| u64::from(e.compute_size())).sum(); - res = store - .entries(4, 8, size, GetEntriesContext::empty(false)) - .unwrap(); - let mut exp_res = ents[1..].to_vec(); - assert_eq!(res, exp_res); - for e in &entries { - size += u64::from(e.compute_size()); - exp_res.push(e.clone()); - res = store - .entries(4, 8, size, GetEntriesContext::empty(false)) - .unwrap(); - assert_eq!(res, exp_res); - } - - // range limit should be supported correctly. - for low in 4..9 { - for high in low..9 { - let res = store - .entries(low, high, u64::max_value(), GetEntriesContext::empty(false)) - .unwrap(); - assert_eq!(*res, exp_res[low as usize - 4..high as usize - 4]); - } - } - } - - #[test] - fn test_storage_cache_update() { - let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; - let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); - let worker = LazyWorker::new("snap-manager"); - let sched = worker.scheduler(); - let (dummy_scheduler, _) = dummy_scheduler(); - let mut store = new_storage_from_ents(sched, dummy_scheduler, &td, &ents); - store.cache.cache.clear(); - - // initial cache - let mut entries = vec![new_entry(6, 5), new_entry(7, 5)]; - append_ents(&mut store, &entries); - validate_cache(&store, &entries); - - // rewrite - entries = vec![new_entry(6, 6), new_entry(7, 6)]; - append_ents(&mut store, &entries); - validate_cache(&store, &entries); - - // rewrite old entry - entries = vec![new_entry(5, 6), new_entry(6, 6)]; - append_ents(&mut store, &entries); - validate_cache(&store, &entries); - - // partial rewrite - entries = vec![new_entry(6, 7), new_entry(7, 7)]; - append_ents(&mut store, &entries); - let mut exp_res = vec![new_entry(5, 6), new_entry(6, 7), new_entry(7, 7)]; - validate_cache(&store, &exp_res); - - // direct append - entries = vec![new_entry(8, 7), new_entry(9, 7)]; - append_ents(&mut store, &entries); - exp_res.extend_from_slice(&entries); - validate_cache(&store, &exp_res); - - // rewrite middle - entries = vec![new_entry(7, 8)]; - append_ents(&mut store, &entries); - exp_res.truncate(2); - exp_res.push(new_entry(7, 8)); - validate_cache(&store, &exp_res); - - // compact to min(5 + 1, 7) - store.cache.persisted = 5; - store.compact_entry_cache(7); - exp_res = vec![new_entry(6, 7), new_entry(7, 8)]; - validate_cache(&store, &exp_res); - - // compact to min(7 + 1, 7) - store.cache.persisted = 7; - store.compact_entry_cache(7); - exp_res = vec![new_entry(7, 8)]; - validate_cache(&store, &exp_res); - // compact all - store.compact_entry_cache(8); - validate_cache(&store, &[]); - // invalid compaction should be ignored. - store.compact_entry_cache(6); - } - - #[test] - fn test_storage_cache_size_change() { - let new_padded_entry = |index: u64, term: u64, pad_len: usize| { - let mut e = new_entry(index, term); - e.data = vec![b'x'; pad_len].into(); - e - }; - - // Test the initial data structure size. - let (tx, rx) = mpsc::sync_channel(8); - let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); - assert_eq!(rx.try_recv().unwrap(), 896); - - cache.append( - "", - &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], - ); - assert_eq!(rx.try_recv().unwrap(), 3); - - // Test size change for one overlapped entry. - cache.append("", &[new_padded_entry(102, 2, 3)]); - assert_eq!(rx.try_recv().unwrap(), 1); - - // Test size change for all overlapped entries. - cache.append( - "", - &[new_padded_entry(101, 3, 4), new_padded_entry(102, 3, 5)], - ); - assert_eq!(rx.try_recv().unwrap(), 5); - - cache.append("", &[new_padded_entry(103, 3, 6)]); - assert_eq!(rx.try_recv().unwrap(), 6); - - // Test trace a dangle entry. - let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); - cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 1); - - // Test trace an entry which is still in cache. - let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); - cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 0); - - // Test compare `cached_last` with `trunc_to_idx` in `EntryCache::append_impl`. - cache.append("", &[new_padded_entry(103, 4, 7)]); - assert_eq!(rx.try_recv().unwrap(), 1); - - // Test compact one traced dangle entry and one entry in cache. - cache.persisted = 101; - cache.compact_to(102); - assert_eq!(rx.try_recv().unwrap(), -5); - - // Test compact the last traced dangle entry. - cache.persisted = 102; - cache.compact_to(103); - assert_eq!(rx.try_recv().unwrap(), -5); - - // Test compact all entries. - cache.persisted = 103; - cache.compact_to(104); - assert_eq!(rx.try_recv().unwrap(), -7); - - drop(cache); - assert_eq!(rx.try_recv().unwrap(), -896); - } - - #[test] - fn test_storage_cache_entry() { - let mut cache = EntryCache::default(); - let ents = vec![ - new_entry(3, 3), - new_entry(4, 4), - new_entry(5, 4), - new_entry(6, 6), - ]; - cache.append("", &ents); - assert!(cache.entry(1).is_none()); - assert!(cache.entry(2).is_none()); - for e in &ents { - assert_eq!(e, cache.entry(e.get_index()).unwrap()); - } - let res = panic_hook::recover_safe(|| cache.entry(7)); - assert!(res.is_err()); - } - #[test] fn test_storage_apply_snapshot() { let ents = vec![ @@ -3256,18 +1952,18 @@ mod tests { let td2 = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); let mut s2 = new_storage(sched.clone(), dummy_scheduler.clone(), &td2); - assert_eq!(s2.first_index(), s2.applied_index() + 1); + assert_eq!(s2.first_index(), Ok(s2.applied_index() + 1)); let mut write_task = WriteTask::new(s2.get_region_id(), s2.peer_id, 1); let snap_region = s2.apply_snapshot(&snap1, &mut write_task, &[]).unwrap(); let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap1.get_data()).unwrap(); assert_eq!(snap_region, snap_data.take_region(),); - assert_eq!(s2.last_term, snap1.get_metadata().get_term()); - assert_eq!(s2.apply_state.get_applied_index(), 6); - assert_eq!(s2.raft_state.get_last_index(), 6); - assert_eq!(s2.apply_state.get_truncated_state().get_index(), 6); - assert_eq!(s2.apply_state.get_truncated_state().get_term(), 6); - assert_eq!(s2.first_index(), s2.applied_index() + 1); + assert_eq!(s2.last_term(), snap1.get_metadata().get_term()); + assert_eq!(s2.apply_state().get_applied_index(), 6); + assert_eq!(s2.raft_state().get_last_index(), 6); + assert_eq!(s2.apply_state().get_truncated_state().get_index(), 6); + assert_eq!(s2.apply_state().get_truncated_state().get_term(), 6); + assert_eq!(s2.first_index(), Ok(s2.applied_index() + 1)); validate_cache(&s2, &[]); let td3 = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); @@ -3279,11 +1975,11 @@ mod tests { let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap1.get_data()).unwrap(); assert_eq!(snap_region, snap_data.take_region(),); - assert_eq!(s3.last_term, snap1.get_metadata().get_term()); - assert_eq!(s3.apply_state.get_applied_index(), 6); - assert_eq!(s3.raft_state.get_last_index(), 6); - assert_eq!(s3.apply_state.get_truncated_state().get_index(), 6); - assert_eq!(s3.apply_state.get_truncated_state().get_term(), 6); + assert_eq!(s3.last_term(), snap1.get_metadata().get_term()); + assert_eq!(s3.apply_state().get_applied_index(), 6); + assert_eq!(s3.raft_state().get_last_index(), 6); + assert_eq!(s3.apply_state().get_truncated_state().get_index(), 6); + assert_eq!(s3.apply_state().get_truncated_state().get_term(), 6); validate_cache(&s3, &[]); } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index a506ab80f17..81358c989e0 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -149,7 +149,7 @@ pub struct ReadDelegate { pub region: Arc, pub peer_id: u64, pub term: u64, - pub applied_index_term: u64, + pub applied_term: u64, pub leader_lease: Option, pub last_valid_ts: Timespec, @@ -230,7 +230,7 @@ impl ReadDelegate { region: Arc::new(region), peer_id, term: peer.term(), - applied_index_term: peer.get_store().applied_index_term(), + applied_term: peer.get_store().applied_term(), leader_lease: None, last_valid_ts: Timespec::new(0, 0), tag: format!("[region {}] {}", region_id, peer_id), @@ -262,8 +262,8 @@ impl ReadDelegate { Progress::Term(term) => { self.term = term; } - Progress::AppliedIndexTerm(applied_index_term) => { - self.applied_index_term = applied_index_term; + Progress::AppliedTerm(applied_term) => { + self.applied_term = applied_term; } Progress::LeaderLease(leader_lease) => { self.leader_lease = Some(leader_lease); @@ -358,7 +358,7 @@ impl ReadDelegate { region: Arc::new(region), peer_id: 1, term: 1, - applied_index_term: 1, + applied_term: 1, leader_lease: None, last_valid_ts: Timespec::new(0, 0), tag: format!("[region {}] {}", region_id, 1), @@ -377,11 +377,11 @@ impl Display for ReadDelegate { write!( f, "ReadDelegate for region {}, \ - leader {} at term {}, applied_index_term {}, has lease {}", + leader {} at term {}, applied_term {}, has lease {}", self.region.get_id(), self.peer_id, self.term, - self.applied_index_term, + self.applied_term, self.leader_lease.is_some(), ) } @@ -391,7 +391,7 @@ impl Display for ReadDelegate { pub enum Progress { Region(metapb::Region), Term(u64), - AppliedIndexTerm(u64), + AppliedTerm(u64), LeaderLease(RemoteLease), RegionBuckets(Arc), } @@ -405,8 +405,8 @@ impl Progress { Progress::Term(term) } - pub fn applied_index_term(applied_index_term: u64) -> Progress { - Progress::AppliedIndexTerm(applied_index_term) + pub fn applied_term(applied_term: u64) -> Progress { + Progress::AppliedTerm(applied_term) } pub fn leader_lease(lease: RemoteLease) -> Progress { @@ -752,13 +752,13 @@ struct Inspector<'r, 'm> { impl<'r, 'm> RequestInspector for Inspector<'r, 'm> { fn has_applied_to_current_term(&mut self) -> bool { - if self.delegate.applied_index_term == self.delegate.term { + if self.delegate.applied_term == self.delegate.term { true } else { debug!( "rejected by term check"; "tag" => &self.delegate.tag, - "applied_index_term" => self.delegate.applied_index_term, + "applied_term" => self.delegate.applied_term, "delegate_term" => ?self.delegate.term, ); @@ -1078,7 +1078,7 @@ mod tests { // Register region 1 lease.renew(monotonic_raw_now()); let remote = lease.maybe_new_remote_lease(term6).unwrap(); - // But the applied_index_term is stale. + // But the applied_term is stale. { let mut meta = store_meta.lock().unwrap(); let read_delegate = ReadDelegate { @@ -1086,7 +1086,7 @@ mod tests { region: Arc::new(region1.clone()), peer_id: leader2.get_id(), term: term6, - applied_index_term: term6 - 1, + applied_term: term6 - 1, leader_lease: Some(remote), last_valid_ts: Timespec::new(0, 0), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), @@ -1099,13 +1099,13 @@ mod tests { meta.readers.insert(1, read_delegate); } - // The applied_index_term is stale + // The applied_term is stale must_redirect(&mut reader, &rx, cmd.clone()); assert_eq!(reader.metrics.rejected_by_cache_miss, 2); assert_eq!(reader.metrics.rejected_by_applied_term, 1); - // Make the applied_index_term matches current term. - let pg = Progress::applied_index_term(term6); + // Make the applied_term matches current term. + let pg = Progress::applied_term(term6); { let mut meta = store_meta.lock().unwrap(); meta.readers.get_mut(&1).unwrap().update(pg); @@ -1236,7 +1236,7 @@ mod tests { meta.readers .get_mut(&1) .unwrap() - .update(Progress::applied_index_term(term6 + 3)); + .update(Progress::applied_term(term6 + 3)); } reader.propose_raft_command( None, @@ -1329,7 +1329,7 @@ mod tests { region: Arc::new(region.clone()), peer_id: 1, term: 1, - applied_index_term: 1, + applied_term: 1, leader_lease: None, last_valid_ts: Timespec::new(0, 0), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), @@ -1345,7 +1345,7 @@ mod tests { let d = reader.get_delegate(1).unwrap(); assert_eq!(&*d.region, ®ion); assert_eq!(d.term, 1); - assert_eq!(d.applied_index_term, 1); + assert_eq!(d.applied_term, 1); assert!(d.leader_lease.is_none()); drop(d); @@ -1370,9 +1370,9 @@ mod tests { meta.readers .get_mut(&1) .unwrap() - .update(Progress::applied_index_term(2)); + .update(Progress::applied_term(2)); } - assert_eq!(reader.get_delegate(1).unwrap().applied_index_term, 2); + assert_eq!(reader.get_delegate(1).unwrap().applied_term, 2); { let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 0ac92103129..4bc5cc032a3 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -72,7 +72,7 @@ const ENGINE: &str = "engine"; pub enum Task { Gen { region_id: u64, - last_applied_index_term: u64, + last_applied_term: u64, last_applied_state: RaftApplyState, kv_snap: S, canceled: Arc, @@ -262,7 +262,7 @@ where fn generate_snap( &self, region_id: u64, - last_applied_index_term: u64, + last_applied_term: u64, last_applied_state: RaftApplyState, kv_snap: EK::Snapshot, notifier: SyncSender, @@ -275,7 +275,7 @@ where &self.engine, kv_snap, region_id, - last_applied_index_term, + last_applied_term, last_applied_state, for_balance, allow_multi_files_snapshot, @@ -301,7 +301,7 @@ where fn handle_gen( &self, region_id: u64, - last_applied_index_term: u64, + last_applied_term: u64, last_applied_state: RaftApplyState, kv_snap: EK::Snapshot, canceled: Arc, @@ -325,7 +325,7 @@ where if let Err(e) = self.generate_snap( region_id, - last_applied_index_term, + last_applied_term, last_applied_state, kv_snap, notifier, @@ -703,7 +703,7 @@ where match task { Task::Gen { region_id, - last_applied_index_term, + last_applied_term, last_applied_state, kv_snap, canceled, @@ -742,7 +742,7 @@ where tikv_alloc::add_thread_memory_accessor(); ctx.handle_gen( region_id, - last_applied_index_term, + last_applied_term, last_applied_state, kv_snap, canceled, @@ -1055,7 +1055,7 @@ mod tests { .schedule(Task::Gen { region_id: id, kv_snap: engine.kv.snapshot(), - last_applied_index_term: entry.get_term(), + last_applied_term: entry.get_term(), last_applied_state: apply_state, canceled: Arc::new(AtomicBool::new(false)), notifier: tx, From 0dc72407e87bf9861991b05d72391bc8fd149871 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 27 Jul 2022 14:43:11 +0800 Subject: [PATCH 0115/1149] raftstorev2: fix cached tablet bug (#13127) ref tikv/tikv#12842 None Signed-off-by: tabokie Co-authored-by: Ti Chi Robot --- Cargo.lock | 16 +- components/raftstore-v2/src/batch/apply.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 16 +- components/raftstore-v2/src/bootstrap.rs | 169 +++++++++++--------- components/raftstore-v2/src/fsm/apply.rs | 3 +- components/raftstore-v2/src/fsm/peer.rs | 3 +- components/raftstore-v2/src/fsm/store.rs | 3 +- components/raftstore-v2/src/raft/peer.rs | 2 +- components/raftstore-v2/src/raft/storage.rs | 4 +- components/raftstore-v2/src/tablet.rs | 23 ++- components/tikv_util/src/lib.rs | 1 + 11 files changed, 137 insertions(+), 107 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb4e4d1e6a9..87dc15eb69a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,7 +84,7 @@ dependencies = [ "codec", "engine_traits", "kvproto", - "match_template", + "match-template", "panic_hook", "thiserror", "tikv_alloc", @@ -2852,8 +2852,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e88c3cbe8288f77f293e48a28b3232e3defd203a6d839fa7f68ea4329e83464" [[package]] -name = "match_template" +name = "match-template" version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c334ac67725febd94c067736ac46ef1c7cacf1c743ca14b9f917c2df2c20acd8" dependencies = [ "proc-macro2", "quote", @@ -5827,7 +5829,7 @@ dependencies = [ name = "tidb_query_aggr" version = "0.0.1" dependencies = [ - "match_template", + "match-template", "panic_hook", "tidb_query_codegen", "tidb_query_common", @@ -5886,7 +5888,7 @@ dependencies = [ "kvproto", "lazy_static", "log_wrappers", - "match_template", + "match-template", "nom 5.1.0", "num 0.3.0", "num-derive", @@ -5918,7 +5920,7 @@ dependencies = [ "itertools", "kvproto", "log_wrappers", - "match_template", + "match-template", "protobuf", "slog", "slog-global", @@ -5947,7 +5949,7 @@ dependencies = [ "flate2", "hex 0.4.2", "log_wrappers", - "match_template", + "match-template", "num 0.3.0", "num-traits", "openssl", @@ -6024,7 +6026,7 @@ dependencies = [ "libloading", "log", "log_wrappers", - "match_template", + "match-template", "memory_trace_macros", "mime", "more-asserts", diff --git a/components/raftstore-v2/src/batch/apply.rs b/components/raftstore-v2/src/batch/apply.rs index ab44d435e67..f71c98e5c86 100644 --- a/components/raftstore-v2/src/batch/apply.rs +++ b/components/raftstore-v2/src/batch/apply.rs @@ -50,7 +50,7 @@ pub struct ApplyPoller { impl ApplyPoller { pub fn new(apply_ctx: ApplyContext, cfg_tracker: Tracker) -> ApplyPoller { - ApplyPoller { + Self { apply_task_buf: Vec::new(), pending_latency_inspect: Vec::new(), apply_ctx, @@ -58,7 +58,7 @@ impl ApplyPoller { } } - /// Updates the internal buffer to latest capacity. + /// Updates the internal buffer to match the latest configuration. fn apply_buf_capacity(&mut self) { let new_cap = self.messages_per_tick(); tikv_util::set_vec_capacity(&mut self.apply_task_buf, new_cap); diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 2dce4b54c2a..1d84ba47302 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -26,7 +26,7 @@ use crate::{ Error, PeerMsg, PeerTick, Result, StoreMsg, }; -/// A per thread context used for handling raft messages. +/// A per-thread context used for handling raft messages. pub struct StoreContext { /// A logger without any KV. It's clean for creating new PeerFSM. pub logger: Logger, @@ -64,19 +64,17 @@ struct StorePoller { impl StorePoller { pub fn new(poll_ctx: StoreContext, cfg_tracker: Tracker) -> Self { - let mut poller = Self { + Self { store_msg_buf: Vec::new(), peer_msg_buf: Vec::new(), poll_ctx, cfg_tracker, last_flush_time: TiInstant::now(), need_flush_events: false, - }; - poller.apply_buf_capacity(); - poller + } } - /// Updates the internal buffer to latest capacity. + /// Updates the internal buffer to match the latest configuration. fn apply_buf_capacity(&mut self) { let new_cap = self.messages_per_tick(); tikv_util::set_vec_capacity(&mut self.store_msg_buf, new_cap); @@ -119,6 +117,7 @@ impl PollHandler Option { + debug_assert!(self.store_msg_buf.is_empty()); let received_cnt = store.recv(&mut self.store_msg_buf); let expected_msg_count = if received_cnt == self.messages_per_tick() { None @@ -134,6 +133,7 @@ impl PollHandler>, ) -> HandleResult { + debug_assert!(self.peer_msg_buf.is_empty()); let received_cnt = peer.recv(&mut self.peer_msg_buf); let handle_result = if received_cnt == self.messages_per_tick() { HandleResult::KeepProcessing @@ -203,7 +203,7 @@ impl StorePollerBuilder { } } - /// Init all the existing raft machine and cleanup stale tablets. + /// Initializes all the existing raft machines and cleanup stale tablets. fn init(&self) -> Result>> { let mut regions = HashMap::default(); let cfg = self.cfg.value(); @@ -328,7 +328,7 @@ impl StoreSystem { pub type StoreRouter = BatchRouter, StoreFsm>; -/// Create the batch system for polling raft activities. +/// Creates the batch system for polling raft activities. pub fn create_store_batch_system( cfg: &Config, store: Store, diff --git a/components/raftstore-v2/src/bootstrap.rs b/components/raftstore-v2/src/bootstrap.rs index 55e1f6814c5..c3e2d2de6f7 100644 --- a/components/raftstore-v2/src/bootstrap.rs +++ b/components/raftstore-v2/src/bootstrap.rs @@ -22,9 +22,16 @@ const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs( /// A struct for bootstrapping the store. /// -/// A typical bootstrap process should follow following order: -/// 1. bootstrap the store to get a store ID. -/// 2. bootstrap the first region using the last store ID. +/// A typical bootstrap process should take the following steps: +/// +/// 1. Calls `bootstrap_store` to bootstrap the store. +/// 2. Calls `bootstrap_first_region` to bootstrap the first region using store +/// ID returned from last step. +/// +/// # Safety +/// +/// These steps are re-entrant, i.e. the caller can redo any steps whether or +/// not they fail or succeed. pub struct Bootstrap<'a, ER: RaftEngine> { engine: &'a ER, cluster_id: u64, @@ -33,8 +40,8 @@ pub struct Bootstrap<'a, ER: RaftEngine> { logger: Logger, } -// Although all methods won't change internal state, but they still receive `&mut self` as it's -// not thread safe to bootstrap concurrently. +// Although all methods won't change internal state, but they still receive +// `&mut self` as it's not thread safe to bootstrap concurrently. impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { pub fn new( engine: &'a ER, @@ -50,9 +57,9 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { } } - /// check store, return store id for the engine. - /// If the store is not bootstrapped, use None. - fn check_store(&mut self) -> Result> { + /// Gets and validates the store ID from engine if it's already + /// bootstrapped. + fn check_store_id_in_engine(&mut self) -> Result> { let ident = match self.engine.get_store_ident()? { Some(ident) => ident, None => return Ok(None), @@ -60,7 +67,8 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { if ident.get_cluster_id() != self.cluster_id { return Err(box_err!( "cluster ID mismatch, local {} != remote {}, \ - you are trying to connect to another cluster, please reconnect to the correct PD", + you are trying to connect to another cluster, \ + please reconnect to the correct PD", ident.get_cluster_id(), self.cluster_id )); @@ -71,13 +79,22 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { Ok(Some(ident.get_store_id())) } - fn inner_bootstrap_store(&mut self) -> Result { + /// Bootstraps the store and returns the store ID. + /// + /// The bootstrapping basically allocates a new store ID from PD and writes + /// it to engine with sync=true. + /// + /// If the store is already bootstrapped, return the store ID directly. + pub fn bootstrap_store(&mut self) -> Result { + if let Some(id) = self.check_store_id_in_engine()? { + return Ok(id); + } + if !self.engine.is_empty()? { + return Err(box_err!("store is not empty and has already had data")); + } let id = self.pd_client.alloc_id()?; debug!(self.logger, "alloc store id"; "store_id" => id); let mut ident = StoreIdent::default(); - if !self.engine.is_empty()? { - return Err(box_err!("store is not empty and has already had data.")); - } ident.set_cluster_id(self.cluster_id); ident.set_store_id(id); self.engine.put_store_ident(&ident)?; @@ -88,18 +105,6 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { Ok(id) } - /// Bootstrap the store and return the store ID. - /// - /// If store is bootstrapped already, return the store ID directly. - pub fn bootstrap_store(&mut self) -> Result { - let store_id = match self.check_store()? { - Some(id) => id, - None => self.inner_bootstrap_store()?, - }; - - Ok(store_id) - } - fn prepare_bootstrap_first_region(&mut self, store_id: u64) -> Result { let region_id = self.pd_client.alloc_id()?; debug!( @@ -127,7 +132,7 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { Ok(region) } - fn check_first_region_bootstrapped(&mut self) -> Result { + fn check_pd_first_region_bootstrapped(&mut self) -> Result { for _ in 0..MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT { match self.pd_client.is_cluster_bootstrapped() { Ok(b) => return Ok(b), @@ -140,21 +145,6 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { Err(box_err!("check cluster bootstrapped failed")) } - fn check_or_prepare_bootstrap_first_region(&mut self, store_id: u64) -> Result> { - if let Some(first_region) = self.engine.get_prepare_bootstrap_region()? { - // Bootstrap is aborted last time, resume. It may succeed or fail last time, no matter - // what, at least we need a way to clean up. - Ok(Some(first_region)) - } else if self.check_first_region_bootstrapped()? { - // If other node has bootstrap the cluster, skip to avoid useless ID allocating and - // disk writes. - Ok(None) - } else { - // We are probably the first one triggering bootstrap. - self.prepare_bootstrap_first_region(store_id).map(Some) - } - } - fn clear_prepare_bootstrap(&mut self, first_region_id: Option) -> Result<()> { let mut wb = self.engine.log_batch(10); wb.remove_prepare_bootstrap_region()?; @@ -168,11 +158,44 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { Ok(()) } - fn inner_bootstrap_first_region( + /// Bootstraps the first region of this cluster. + /// + /// The bootstrapping starts by allocating a region ID from PD. Then it + /// initializes the region's state and writes a preparing marker to the + /// engine. After attempting to register itself as the first region to PD, + /// the preparing marker is deleted from the engine. + /// + /// On the occasion that the someone else bootstraps the first region + /// before us, the region state is cleared and `None` is returned. + pub fn bootstrap_first_region( &mut self, store: &Store, - first_region: &Region, - ) -> Result { + store_id: u64, + ) -> Result> { + let first_region = match self.engine.get_prepare_bootstrap_region()? { + // The last bootstrap aborts. We need to resume or clean it up. + Some(r) => r, + None => { + if self.check_pd_first_region_bootstrapped()? { + // If other node has bootstrap the cluster, skip to avoid + // useless ID allocating and disk writes. + return Ok(None); + } + self.prepare_bootstrap_first_region(store_id)? + } + }; + + info!( + self.logger, + "trying to bootstrap first region"; + "store_id" => store_id, + "region" => ?first_region + ); + // cluster is not bootstrapped, and we choose first store to bootstrap + fail_point!("node_after_prepare_bootstrap_cluster", |_| Err(box_err!( + "injected error: node_after_prepare_bootstrap_cluster" + ))); + let region_id = first_region.get_id(); let mut retry = 0; while retry < MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT { @@ -181,23 +204,32 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { .bootstrap_cluster(store.clone(), first_region.clone()) { Ok(_) => { - info!(self.logger, "bootstrap cluster ok"; "cluster_id" => self.cluster_id); + info!( + self.logger, + "bootstrap cluster ok"; + "cluster_id" => self.cluster_id + ); fail_point!("node_after_bootstrap_cluster", |_| Err(box_err!( "injected error: node_after_bootstrap_cluster" ))); self.clear_prepare_bootstrap(None)?; - return Ok(true); + return Ok(Some(first_region)); } Err(pd_client::Error::ClusterBootstrapped(_)) => { match self.pd_client.get_region(b"") { Ok(region) => { - if region == *first_region { + if region == first_region { + // It is bootstrapped by us before. self.clear_prepare_bootstrap(None)?; - return Ok(true); + return Ok(Some(first_region)); } else { - info!(self.logger, "cluster is already bootstrapped"; "cluster_id" => self.cluster_id); + info!( + self.logger, + "cluster is already bootstrapped"; + "cluster_id" => self.cluster_id + ); self.clear_prepare_bootstrap(Some(region_id))?; - return Ok(false); + return Ok(None); } } Err(e) => { @@ -206,36 +238,21 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { } } Err(e) => { - error!(self.logger, "bootstrap cluster"; "cluster_id" => self.cluster_id, "err" => ?e, "err_code" => %e.error_code()) + error!( + self.logger, + "bootstrap cluster failed once"; + "cluster_id" => self.cluster_id, + "err" => ?e, + "err_code" => %e.error_code() + ); } } retry += 1; thread::sleep(CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL); } - Err(box_err!("bootstrapped cluster failed")) - } - - /// Bootstrap the first region. - /// - /// If the cluster is already bootstrapped, `None` is returned. - pub fn bootstrap_first_region( - &mut self, - store: &Store, - store_id: u64, - ) -> Result> { - let first_region = match self.check_or_prepare_bootstrap_first_region(store_id)? { - Some(r) => r, - None => return Ok(None), - }; - info!(self.logger, "trying to bootstrap first region"; "store_id" => store_id, "region" => ?first_region); - // cluster is not bootstrapped, and we choose first store to bootstrap - fail_point!("node_after_prepare_bootstrap_cluster", |_| Err(box_err!( - "injected error: node_after_prepare_bootstrap_cluster" - ))); - if self.inner_bootstrap_first_region(store, &first_region)? { - Ok(Some(first_region)) - } else { - Ok(None) - } + Err(box_err!( + "bootstrapped cluster failed after {} attempts", + retry + )) } } diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 43e3441528e..21646be4738 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -26,7 +26,8 @@ impl ApplyFsm { ) } - /// Fetches tasks to `apply_task_buf`. It will stop when the buffer is full. + /// Fetches messages to `apply_task_buf`. It will stop when the buffer + /// capacity is reached or there is no more pending messages. /// /// Returns how many messages are fetched. pub fn recv(&mut self, apply_task_buf: &mut Vec) -> usize { diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 8187575d658..88d7b479e49 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -47,7 +47,8 @@ impl PeerFsm { self.peer.logger() } - /// Fetches messages to `peer_msg_buf`. It will stop when the buffer is full. + /// Fetches messages to `peer_msg_buf`. It will stop when the buffer + /// capacity is reached or there is no more pending messages. /// /// Returns how many messages are fetched. pub fn recv(&mut self, peer_msg_buf: &mut Vec>) -> usize { diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 091b3fe11e9..257028f1630 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -23,7 +23,8 @@ impl StoreFsm { (tx, fsm) } - /// Fetches messages to `store_msg_buf`. It will stop when the buffer is full. + /// Fetches messages to `store_msg_buf`. It will stop when the buffer + /// capacity is reached or there is no more pending messages. /// /// Returns how many messages are fetched. pub fn recv(&self, store_msg_buf: &mut Vec) -> usize { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index e52ec322445..c3cede21ebc 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -25,7 +25,7 @@ pub struct Peer { impl Peer { /// Creates a new peer. /// - /// If peer is destroyed, None is returned. + /// If peer is destroyed, `None` is returned. pub fn new( cfg: &Config, region_id: u64, diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index fc25e12bad3..ff0bd64cd01 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -58,8 +58,8 @@ pub struct Storage { impl Storage { /// Creates a new storage. /// - /// All metadata should be initialized before calling this method. If the region is destroyed - /// `None` will be returned. + /// All metadata should be initialized before calling this method. If the + /// region is destroyed, `None` will be returned. pub fn new( region_id: u64, store_id: u64, diff --git a/components/raftstore-v2/src/tablet.rs b/components/raftstore-v2/src/tablet.rs index 2293eaed033..f4f5bdcbc6f 100644 --- a/components/raftstore-v2/src/tablet.rs +++ b/components/raftstore-v2/src/tablet.rs @@ -33,12 +33,12 @@ impl CachedTablet { } pub fn set(&mut self, data: EK) { - let mut guard = self.latest.data.lock().unwrap(); - *guard = Some(data.clone()); - let v = self.latest.version.fetch_add(1, Ordering::Relaxed); - drop(guard); + self.version = { + let mut latest_data = self.latest.data.lock().unwrap(); + *latest_data = Some(data.clone()); + self.latest.version.fetch_add(1, Ordering::Relaxed) + 1 + }; self.cache = Some(data); - self.version = v; } /// Get the tablet from cache without checking if it's up to date. @@ -51,9 +51,9 @@ impl CachedTablet { #[inline] pub fn latest(&mut self) -> Option<&EK> { if self.latest.version.load(Ordering::Relaxed) > self.version { - let guard = self.latest.data.lock().unwrap(); + let latest_data = self.latest.data.lock().unwrap(); self.version = self.latest.version.load(Ordering::Relaxed); - self.cache = guard.clone(); + self.cache = latest_data.clone(); } self.cache() } @@ -76,7 +76,14 @@ mod tests { // Setting tablet will refresh cache immediately. cached_tablet.set(2); assert_eq!(cached_tablet.cache().cloned(), Some(2)); - assert_eq!(cached_tablet.latest().cloned(), Some(2)); + + // Test `latest()` will use cache. + // Unsafe modify the data. + let old_data = *cached_tablet.latest.data.lock().unwrap(); + *cached_tablet.latest.data.lock().unwrap() = Some(0); + assert_eq!(cached_tablet.latest().cloned(), old_data); + // Restore the data. + *cached_tablet.latest.data.lock().unwrap() = old_data; let mut cloned = cached_tablet.clone(); // Clone should reuse cache. diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 9b3e38aa9cc..1fec3722a64 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -586,6 +586,7 @@ pub fn build_on_master_branch() -> bool { } /// Set the capacity of a vector to the given capacity. +#[inline] pub fn set_vec_capacity(v: &mut Vec, cap: usize) { match cap.cmp(&v.capacity()) { cmp::Ordering::Less => v.shrink_to(cap), From f5adcb1cec9e8322be13b2313b6784a0aa0339dd Mon Sep 17 00:00:00 2001 From: WangLe1321 Date: Wed, 27 Jul 2022 16:17:11 +0800 Subject: [PATCH 0116/1149] log-backup: fix uploading to gcs error (#13107) close tikv/tikv#13106 Signed-off-by: WangLe1321 Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/backup-stream/Cargo.toml | 1 + components/backup-stream/src/router.rs | 62 ++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87dc15eb69a..3917b836317 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -459,6 +459,7 @@ dependencies = [ "fail", "file_system", "futures 0.3.15", + "futures-io", "grpcio", "hex 0.4.2", "kvproto", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 9e8049e0ec0..e2b23ccf5db 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -35,6 +35,7 @@ external_storage_export = { path = "../external_storage/export", default-feature fail = { version = "0.5", optional = true } file_system = { path = "../file_system" } futures = "0.3" +futures-io = "0.3" grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } hex = "0.4" diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 3e29592a9f4..b236cefde77 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -972,8 +972,7 @@ impl StreamTaskInfo { let stat = reader.metadata().await?; let reader = UnpinReader(Box::new(limiter.limit(reader.compat()))); let filepath = &data_file.storage_path; - // Once we cannot get the stat of the file, use 4K I/O. - let est_len = stat.len().max(4096); + let est_len = stat.len(); let ret = storage.write(filepath, reader, est_len).await; match ret { @@ -1370,13 +1369,17 @@ struct TaskRange { #[cfg(test)] mod tests { - use std::{ffi::OsStr, time::Duration}; + use std::{ffi::OsStr, marker::Unpin, time::Duration}; + use external_storage::NoopStorage; + use futures::AsyncReadExt; + use futures_io::AsyncRead; use kvproto::brpb::{Local, Noop, StorageBackend, StreamBackupTaskInfo}; use tikv_util::{ codec::number::NumberEncoder, worker::{dummy_scheduler, ReceiverWrapper}, }; + use tokio::{fs::File, sync::Mutex}; use txn_types::{Write, WriteType}; use super::*; @@ -2070,4 +2073,57 @@ mod tests { assert_eq!(ts, global_checkpoint); Ok(()) } + + struct MockCheckContentStorage { + s: NoopStorage, + } + + #[async_trait::async_trait] + impl ExternalStorage for MockCheckContentStorage { + fn name(&self) -> &'static str { + self.s.name() + } + + fn url(&self) -> io::Result { + self.s.url() + } + + async fn write( + &self, + _name: &str, + mut reader: UnpinReader, + content_length: u64, + ) -> io::Result<()> { + let mut data = Vec::new(); + reader.0.read_to_end(&mut data).await?; + let data_len: u64 = data.len() as _; + + if data_len == content_length { + Ok(()) + } else { + Err(io::Error::new( + io::ErrorKind::Other, + "the length of content in reader is not equal with content_length", + )) + } + } + + fn read(&self, name: &str) -> Box { + self.s.read(name) + } + } + + #[tokio::test] + async fn test_est_len_in_flush() -> Result<()> { + let noop_s = NoopStorage::default(); + let ms = MockCheckContentStorage { s: noop_s }; + let file_path = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); + let mut f = File::create(file_path.clone()).await?; + f.write_all("test-data".as_bytes()).await?; + + let data_file = DataFile::new(file_path).await.unwrap(); + let result = StreamTaskInfo::flush_log_file_to(Arc::new(ms), &Mutex::new(data_file)).await; + assert_eq!(result.is_ok(), true); + Ok(()) + } } From 4152dbe02dfd2df11848e70908902c82f032018c Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 28 Jul 2022 11:17:11 +0800 Subject: [PATCH 0117/1149] *: remove engine_rocks raw_util (#13132) ref tikv/tikv#13058 raw_util is duplicated with util. By using util, this PR also makes `DB` type only in use in engine_rocks, which is helpful for adapting tirocks later. In addition, this PR fixes an unsound transforms between `Arc` and `RocksEngine`. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + Makefile | 2 +- cmd/tikv-ctl/src/executor.rs | 11 +- components/backup-stream/src/utils.rs | 8 +- components/backup/src/endpoint.rs | 10 +- components/backup/src/writer.rs | 24 +- components/engine_rocks/src/cf_options.rs | 38 +- components/engine_rocks/src/compact.rs | 51 +-- .../engine_rocks/src/compact_listener.rs | 7 +- components/engine_rocks/src/compat.rs | 25 -- components/engine_rocks/src/db_options.rs | 35 ++ components/engine_rocks/src/engine.rs | 27 +- components/engine_rocks/src/file_system.rs | 34 +- components/engine_rocks/src/import.rs | 17 +- components/engine_rocks/src/lib.rs | 4 - components/engine_rocks/src/misc.rs | 32 +- .../engine_rocks/src/perf_context_impl.rs | 6 +- components/engine_rocks/src/properties.rs | 31 +- components/engine_rocks/src/raw.rs | 15 +- components/engine_rocks/src/raw_util.rs | 336 ------------------ components/engine_rocks/src/rocks_metrics.rs | 3 +- components/engine_rocks/src/util.rs | 314 ++++++++++++---- components/engine_rocks/src/write_batch.rs | 7 +- components/engine_rocks_helper/Cargo.toml | 1 + .../engine_rocks_helper/src/sst_recovery.rs | 22 +- components/engine_test/src/lib.rs | 169 +++------ components/engine_traits/src/lib.rs | 4 - .../src/basic_read_write.rs | 15 +- .../engine_traits_tests/src/cf_names.rs | 19 +- components/engine_traits_tests/src/ctor.rs | 14 +- components/engine_traits_tests/src/lib.rs | 13 +- .../src/coprocessor/split_check/half.rs | 47 +-- .../src/coprocessor/split_check/keys.rs | 36 +- .../src/coprocessor/split_check/size.rs | 45 +-- .../src/coprocessor/split_check/table.rs | 4 +- components/raftstore/src/store/bootstrap.rs | 10 +- .../raftstore/src/store/compaction_guard.rs | 24 +- components/raftstore/src/store/fsm/apply.rs | 8 +- .../raftstore/src/store/peer_storage.rs | 6 +- components/raftstore/src/store/snap.rs | 36 +- .../raftstore/src/store/worker/compact.rs | 12 +- .../src/store/worker/consistency_check.rs | 8 +- .../raftstore/src/store/worker/raftlog_gc.rs | 3 +- components/raftstore/src/store/worker/read.rs | 3 +- .../raftstore/src/store/worker/region.rs | 12 +- components/server/src/raft_engine_switch.rs | 15 +- components/server/src/server.rs | 12 +- components/sst_importer/src/import_mode.rs | 6 +- components/sst_importer/src/util.rs | 26 +- components/test_backup/src/lib.rs | 2 +- components/test_raftstore/src/cluster.rs | 13 +- components/test_raftstore/src/util.rs | 16 +- components/test_sst_importer/src/lib.rs | 24 +- components/tikv_kv/src/cursor.rs | 19 +- components/tikv_kv/src/lib.rs | 2 +- components/tikv_kv/src/rocksdb_engine.rs | 21 +- src/config.rs | 66 ++-- src/server/debug.rs | 199 +++++------ src/server/engine_factory.rs | 7 +- src/server/gc_worker/mod.rs | 17 +- src/server/reset_to_version.rs | 29 +- src/storage/kv/test_engine_builder.rs | 22 +- src/storage/mod.rs | 22 +- src/storage/mvcc/consistency_check.rs | 10 +- src/storage/mvcc/reader/reader.rs | 79 ++-- .../singleton_flow_controller.rs | 4 +- tests/benches/misc/raftkv/mod.rs | 28 +- .../misc/writebatch/bench_writebatch.rs | 41 ++- tests/benches/raftstore/mod.rs | 10 +- tests/failpoints/cases/test_async_fetch.rs | 4 +- tests/failpoints/cases/test_merge.rs | 9 +- tests/failpoints/cases/test_replica_read.rs | 2 - tests/failpoints/cases/test_sst_recovery.rs | 69 ++-- .../integrations/config/dynamic/raftstore.rs | 27 +- .../config/dynamic/split_check.rs | 26 +- .../integrations/raftstore/test_bootstrap.rs | 26 +- .../raftstore/test_clear_stale_data.rs | 26 +- .../raftstore/test_compact_after_delete.rs | 3 +- .../raftstore/test_compact_log.rs | 8 +- .../raftstore/test_conf_change.rs | 4 - tests/integrations/raftstore/test_merge.rs | 3 - tests/integrations/raftstore/test_multi.rs | 25 +- .../raftstore/test_split_region.rs | 3 +- .../integrations/raftstore/test_stale_peer.rs | 25 +- tests/integrations/raftstore/test_stats.rs | 1 + .../integrations/raftstore/test_tombstone.rs | 10 +- tests/integrations/server/kv_service.rs | 14 +- tests/integrations/storage/test_titan.rs | 118 +++--- 88 files changed, 1024 insertions(+), 1618 deletions(-) delete mode 100644 components/engine_rocks/src/compat.rs delete mode 100644 components/engine_rocks/src/raw_util.rs diff --git a/Cargo.lock b/Cargo.lock index 3917b836317..9e0303726fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1504,6 +1504,7 @@ version = "0.1.0" dependencies = [ "engine_rocks", "engine_test", + "engine_traits", "fail", "futures 0.3.15", "keys", diff --git a/Makefile b/Makefile index 22c575abb8f..fb7bbf6052e 100644 --- a/Makefile +++ b/Makefile @@ -330,7 +330,7 @@ unset-override: pre-format: unset-override @rustup component add rustfmt - @cargo install -q cargo-sort + @which cargo-sort &> /dev/null || cargo install -q cargo-sort format: pre-format @cargo fmt diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 401d96e5d8e..19977924e69 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -6,10 +6,7 @@ use std::{ }; use encryption_export::data_key_manager_from_config; -use engine_rocks::{ - raw_util::{db_exist, new_engine_opt}, - RocksEngine, -}; +use engine_rocks::util::{db_exist, new_engine_opt}; use engine_traits::{ Engines, Error as EngineError, RaftEngine, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, }; @@ -78,11 +75,10 @@ pub fn new_debug_executor( .build_cf_opts(&cache, None, cfg.storage.api_version()); let kv_path = PathBuf::from(kv_path).canonicalize().unwrap(); let kv_path = kv_path.to_str().unwrap(); - let kv_db = match new_engine_opt(kv_path, kv_db_opts, kv_cfs_opts) { + let mut kv_db = match new_engine_opt(kv_path, kv_db_opts, kv_cfs_opts) { Ok(db) => db, Err(e) => handle_engine_error(e), }; - let mut kv_db = RocksEngine::from_db(Arc::new(kv_db)); kv_db.set_shared_block_cache(shared_block_cache); let cfg_controller = ConfigController::default(); @@ -95,11 +91,10 @@ pub fn new_debug_executor( error!("raft db not exists: {}", raft_path); tikv_util::logger::exit_process_gracefully(-1); } - let raft_db = match new_engine_opt(&raft_path, raft_db_opts, raft_db_cf_opts) { + let mut raft_db = match new_engine_opt(&raft_path, raft_db_opts, raft_db_cf_opts) { Ok(db) => db, Err(e) => handle_engine_error(e), }; - let mut raft_db = RocksEngine::from_db(Arc::new(raft_db)); raft_db.set_shared_block_cache(shared_block_cache); let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); Box::new(debugger) as Box diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 6ad26cb045c..5aed8f55f7f 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -591,7 +591,6 @@ mod test { time::Duration, }; - use engine_rocks::raw::DBOptions; use engine_traits::WriteOptions; use futures::executor::block_on; @@ -736,15 +735,12 @@ mod test { #[test] fn test_recorder() { - use engine_rocks::{raw::DB, RocksEngine}; use engine_traits::{Iterable, KvEngine, Mutable, WriteBatch, WriteBatchExt, CF_DEFAULT}; use tempdir::TempDir; let p = TempDir::new("test_db").unwrap(); - let mut opt = DBOptions::default(); - opt.create_if_missing(true); - let db = DB::open(opt.clone(), p.path().as_os_str().to_str().unwrap()).unwrap(); - let engine = RocksEngine::from_db(Arc::new(db)); + let engine = + engine_rocks::util::new_engine(p.path().to_str().unwrap(), &[CF_DEFAULT]).unwrap(); let mut wb = engine.write_batch(); for i in 0..100 { wb.put_cf(CF_DEFAULT, format!("hello{}", i).as_bytes(), b"world") diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index bbcf33d7899..8865aa4f94c 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -11,7 +11,7 @@ use std::{ use async_channel::SendError; use causal_ts::CausalTsProvider; use concurrency_manager::ConcurrencyManager; -use engine_rocks::raw::DB; +use engine_rocks::RocksEngine; use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, SstCompressionType}; use external_storage::{BackendConfig, HdfsConfig}; use external_storage_export::{create_storage, ExternalStorage}; @@ -505,7 +505,7 @@ impl BackupRange { async fn backup_raw_kv_to_file( &self, engine: E, - db: Arc, + db: RocksEngine, limiter: &Limiter, file_name: String, cf: CfNameWrap, @@ -659,7 +659,7 @@ pub struct Endpoint { store_id: u64, pool: RefCell, io_pool: Runtime, - db: Arc, + db: RocksEngine, config_manager: ConfigManager, concurrency_manager: ConcurrencyManager, softlimit: SoftLimitKeeper, @@ -782,7 +782,7 @@ impl Endpoint { store_id: u64, engine: E, region_info: R, - db: Arc, + db: RocksEngine, config: BackupConfig, concurrency_manager: ConcurrencyManager, api_version: ApiVersion, @@ -1280,7 +1280,7 @@ pub mod tests { .unwrap(); let concurrency_manager = ConcurrencyManager::new(1.into()); let need_encode_key = !is_raw_kv || api_version == ApiVersion::V2; - let db = rocks.get_rocksdb().get_sync_db(); + let db = rocks.get_rocksdb(); ( temp, Endpoint::new( diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 99a907948ce..7127d896314 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -1,9 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt::Display, io::Read, sync::Arc}; +use std::{fmt::Display, io::Read}; use encryption::{EncrypterReader, Iv}; -use engine_rocks::{raw::DB, RocksEngine, RocksSstWriter, RocksSstWriterBuilder}; +use engine_rocks::{RocksEngine, RocksSstWriter, RocksSstWriterBuilder}; use engine_traits::{ CfName, ExternalSstFileInfo, SstCompressionType, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, @@ -168,7 +168,7 @@ pub struct BackupWriterBuilder { store_id: u64, limiter: Limiter, region: Region, - db: Arc, + db: RocksEngine, compression_type: Option, compression_level: i32, sst_max_size: u64, @@ -180,7 +180,7 @@ impl BackupWriterBuilder { store_id: u64, limiter: Limiter, region: Region, - db: Arc, + db: RocksEngine, compression_type: Option, compression_level: i32, sst_max_size: u64, @@ -227,7 +227,7 @@ pub struct BackupWriter { impl BackupWriter { /// Create a new BackupWriter. pub fn new( - db: Arc, + db: RocksEngine, name: &str, compression_type: Option, compression_level: i32, @@ -238,14 +238,14 @@ impl BackupWriter { let default = RocksSstWriterBuilder::new() .set_in_memory(true) .set_cf(CF_DEFAULT) - .set_db(RocksEngine::from_ref(&db)) + .set_db(&db) .set_compression_type(compression_type) .set_compression_level(compression_level) .build(name)?; let write = RocksSstWriterBuilder::new() .set_in_memory(true) .set_cf(CF_WRITE) - .set_db(RocksEngine::from_ref(&db)) + .set_db(&db) .set_compression_type(compression_type) .set_compression_level(compression_level) .build(name)?; @@ -351,7 +351,7 @@ pub struct BackupRawKvWriter { impl BackupRawKvWriter { /// Create a new BackupRawKvWriter. pub fn new( - db: Arc, + db: RocksEngine, name: &str, cf: CfNameWrap, limiter: Limiter, @@ -363,7 +363,7 @@ impl BackupRawKvWriter { let writer = RocksSstWriterBuilder::new() .set_in_memory(true) .set_cf(cf.into()) - .set_db(RocksEngine::from_ref(&db)) + .set_db(&db) .set_compression_type(compression_type) .set_compression_level(compression_level) .build(name)?; @@ -498,7 +498,7 @@ mod tests { r.set_id(1); r.mut_peers().push(new_peer(1, 1)); let mut writer = BackupWriter::new( - db.get_sync_db(), + db.clone(), "foo", None, 0, @@ -516,7 +516,7 @@ mod tests { // Test write only txn. let mut writer = BackupWriter::new( - db.get_sync_db(), + db.clone(), "foo1", None, 0, @@ -555,7 +555,7 @@ mod tests { // Test write and default. let mut writer = BackupWriter::new( - db.get_sync_db(), + db, "foo2", None, 0, diff --git a/components/engine_rocks/src/cf_options.rs b/components/engine_rocks/src/cf_options.rs index 87d05510f58..c6a5390a063 100644 --- a/components/engine_rocks/src/cf_options.rs +++ b/components/engine_rocks/src/cf_options.rs @@ -1,7 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use std::ops::{Deref, DerefMut}; + use engine_traits::{CFOptionsExt, ColumnFamilyOptions, Result, SstPartitionerFactory}; -use rocksdb::ColumnFamilyOptions as RawCFOptions; +use rocksdb::ColumnFamilyOptions as RawCfOptions; use tikv_util::box_err; use crate::{ @@ -10,11 +12,11 @@ use crate::{ }; impl CFOptionsExt for RocksEngine { - type ColumnFamilyOptions = RocksColumnFamilyOptions; + type ColumnFamilyOptions = RocksCfOptions; fn get_options_cf(&self, cf: &str) -> Result { let handle = util::get_cf_handle(self.as_inner(), cf)?; - Ok(RocksColumnFamilyOptions::from_raw( + Ok(RocksCfOptions::from_raw( self.as_inner().get_options_cf(handle), )) } @@ -27,28 +29,40 @@ impl CFOptionsExt for RocksEngine { } } -#[derive(Clone)] -pub struct RocksColumnFamilyOptions(RawCFOptions); +#[derive(Default, Clone)] +pub struct RocksCfOptions(RawCfOptions); -impl RocksColumnFamilyOptions { - pub fn from_raw(raw: RawCFOptions) -> RocksColumnFamilyOptions { - RocksColumnFamilyOptions(raw) +impl RocksCfOptions { + pub fn from_raw(raw: RawCfOptions) -> RocksCfOptions { + RocksCfOptions(raw) } - pub fn into_raw(self) -> RawCFOptions { + pub fn into_raw(self) -> RawCfOptions { self.0 } +} + +impl Deref for RocksCfOptions { + type Target = RawCfOptions; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} - pub fn as_raw_mut(&mut self) -> &mut RawCFOptions { +impl DerefMut for RocksCfOptions { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -impl ColumnFamilyOptions for RocksColumnFamilyOptions { +impl ColumnFamilyOptions for RocksCfOptions { type TitanDBOptions = RocksTitanDBOptions; fn new() -> Self { - RocksColumnFamilyOptions::from_raw(RawCFOptions::new()) + RocksCfOptions::from_raw(RawCfOptions::default()) } fn get_max_write_buffer_number(&self) -> u32 { diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index 0b50e0757c2..393377149ff 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -137,16 +137,10 @@ impl CompactExt for RocksEngine { #[cfg(test)] mod tests { - use std::sync::Arc; - - use engine_traits::CompactExt; - use rocksdb::{ColumnFamilyOptions, Writable}; + use engine_traits::{CFNamesExt, CFOptionsExt, CompactExt, MiscExt, SyncMutable}; use tempfile::Builder; - use crate::{ - raw_util::{new_engine, CFOptions}, - Compat, - }; + use crate::{util, RocksCfOptions, RocksDBOptions}; #[test] fn test_compact_files_in_range() { @@ -155,29 +149,24 @@ mod tests { .tempdir() .unwrap(); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_disable_auto_compactions(true); - let cfs_opts = vec![ - CFOptions::new("default", cf_opts.clone()), - CFOptions::new("test", cf_opts), - ]; - let db = new_engine( + let cfs_opts = vec![("default", cf_opts.clone()), ("test", cf_opts)]; + let db = util::new_engine_opt( temp_dir.path().to_str().unwrap(), - None, - &["default", "test"], - Some(cfs_opts), + RocksDBOptions::default(), + cfs_opts, ) .unwrap(); - let db = Arc::new(db); for cf_name in db.cf_names() { - let cf = db.cf_handle(cf_name).unwrap(); for i in 0..5 { - db.put_cf(cf, &[i], &[i]).unwrap(); - db.put_cf(cf, &[i + 1], &[i + 1]).unwrap(); - db.flush_cf(cf, true).unwrap(); + db.put_cf(cf_name, &[i], &[i]).unwrap(); + db.put_cf(cf_name, &[i + 1], &[i + 1]).unwrap(); + db.flush_cf(cf_name, true).unwrap(); } - let cf_meta = db.get_column_family_meta_data(cf); + let cf = util::get_cf_handle(db.as_inner(), cf_name).unwrap(); + let cf_meta = db.as_inner().get_column_family_meta_data(cf); let cf_levels = cf_meta.get_levels(); assert_eq!(cf_levels.first().unwrap().get_files().len(), 5); } @@ -187,13 +176,12 @@ mod tests { // # After // Level-0: [4-5] // Level-1: [0-4] - db.c() - .compact_files_in_range(None, Some(&[4]), Some(1)) + db.compact_files_in_range(None, Some(&[4]), Some(1)) .unwrap(); for cf_name in db.cf_names() { - let cf = db.cf_handle(cf_name).unwrap(); - let cf_meta = db.get_column_family_meta_data(cf); + let cf = util::get_cf_handle(db.as_inner(), cf_name).unwrap(); + let cf_meta = db.as_inner().get_column_family_meta_data(cf); let cf_levels = cf_meta.get_levels(); let level_0 = cf_levels[0].get_files(); assert_eq!(level_0.len(), 1); @@ -211,14 +199,13 @@ mod tests { // # After // Level-0: [4-5] // Level-N: [0-4] - db.c() - .compact_files_in_range(Some(&[2]), Some(&[4]), None) + db.compact_files_in_range(Some(&[2]), Some(&[4]), None) .unwrap(); for cf_name in db.cf_names() { - let cf = db.cf_handle(cf_name).unwrap(); - let cf_opts = db.get_options_cf(cf); - let cf_meta = db.get_column_family_meta_data(cf); + let cf = util::get_cf_handle(db.as_inner(), cf_name).unwrap(); + let cf_opts = db.get_options_cf(cf_name).unwrap(); + let cf_meta = db.as_inner().get_column_family_meta_data(cf); let cf_levels = cf_meta.get_levels(); let level_0 = cf_levels[0].get_files(); assert_eq!(level_0.len(), 1); diff --git a/components/engine_rocks/src/compact_listener.rs b/components/engine_rocks/src/compact_listener.rs index 2cfdb253eb0..5fc7a4e92f2 100644 --- a/components/engine_rocks/src/compact_listener.rs +++ b/components/engine_rocks/src/compact_listener.rs @@ -17,10 +17,7 @@ use rocksdb::{ }; use tikv_util::warn; -use crate::{ - properties::{RangeProperties, UserCollectedPropertiesDecoder}, - raw::EventListener, -}; +use crate::properties::{RangeProperties, UserCollectedPropertiesDecoder}; pub struct RocksCompactionJobInfo<'a>(&'a RawCompactionJobInfo); @@ -229,7 +226,7 @@ impl CompactionListener { } } -impl EventListener for CompactionListener { +impl rocksdb::EventListener for CompactionListener { fn on_compaction_completed(&self, info: &RawCompactionJobInfo) { let info = &RocksCompactionJobInfo::from_raw(info); if info.status().is_err() { diff --git a/components/engine_rocks/src/compat.rs b/components/engine_rocks/src/compat.rs deleted file mode 100644 index 96371fcf62b..00000000000 --- a/components/engine_rocks/src/compat.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::Arc; - -use crate::{engine::RocksEngine, raw::DB}; - -/// A trait to enter the world of engine traits from a raw `Arc` -/// with as little syntax as possible. -/// -/// This will be used during the transition from RocksDB to the -/// `KvEngine` abstraction and then discarded. -pub trait Compat { - type Other; - - fn c(&self) -> &Self::Other; -} - -impl Compat for Arc { - type Other = RocksEngine; - - #[inline] - fn c(&self) -> &RocksEngine { - RocksEngine::from_ref(self) - } -} diff --git a/components/engine_rocks/src/db_options.rs b/components/engine_rocks/src/db_options.rs index 948ed469352..6aaccfee76b 100644 --- a/components/engine_rocks/src/db_options.rs +++ b/components/engine_rocks/src/db_options.rs @@ -1,5 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use std::ops::{Deref, DerefMut}; + use engine_traits::{DBOptions, DBOptionsExt, Result, TitanDBOptions}; use rocksdb::{DBOptions as RawDBOptions, TitanDBOptions as RawTitanDBOptions}; use tikv_util::box_err; @@ -19,6 +21,7 @@ impl DBOptionsExt for RocksEngine { } } +#[derive(Default)] pub struct RocksDBOptions(RawDBOptions); impl RocksDBOptions { @@ -35,6 +38,22 @@ impl RocksDBOptions { } } +impl Deref for RocksDBOptions { + type Target = RawDBOptions; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for RocksDBOptions { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + impl DBOptions for RocksDBOptions { type TitanDBOptions = RocksTitanDBOptions; @@ -83,6 +102,22 @@ impl RocksTitanDBOptions { } } +impl Deref for RocksTitanDBOptions { + type Target = RawTitanDBOptions; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for RocksTitanDBOptions { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + impl TitanDBOptions for RocksTitanDBOptions { fn new() -> Self { RocksTitanDBOptions::from_raw(RawTitanDBOptions::new()) diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index e6a1cf4a6a7..6071f06a646 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -30,6 +30,10 @@ pub struct RocksEngine { } impl RocksEngine { + pub(crate) fn new(db: DB) -> RocksEngine { + RocksEngine::from_db(Arc::new(db)) + } + pub fn from_db(db: Arc) -> Self { RocksEngine { db: db.clone(), @@ -38,13 +42,6 @@ impl RocksEngine { } } - // Notice: After obtaining RocksEngine through this method, please make sure - // it has been initialized with db, otherwise do not call its member methods, - // as it'll contain garbage members. - pub fn from_ref(db: &Arc) -> &Self { - unsafe { &*(db as *const Arc as *const RocksEngine) } - } - pub fn as_inner(&self) -> &Arc { &self.db } @@ -202,21 +199,17 @@ impl SyncMutable for RocksEngine { #[cfg(test)] mod tests { - use std::sync::Arc; - use engine_traits::{Iterable, KvEngine, Peekable, SyncMutable, CF_DEFAULT}; use kvproto::metapb::Region; use tempfile::Builder; - use crate::{raw_util, RocksEngine, RocksSnapshot}; + use crate::{util, RocksSnapshot}; #[test] fn test_base() { let path = Builder::new().prefix("var").tempdir().unwrap(); let cf = "cf"; - let engine = RocksEngine::from_db(Arc::new( - raw_util::new_engine(path.path().to_str().unwrap(), None, &[cf], None).unwrap(), - )); + let engine = util::new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, cf]).unwrap(); let mut r = Region::default(); r.set_id(10); @@ -251,9 +244,7 @@ mod tests { fn test_peekable() { let path = Builder::new().prefix("var").tempdir().unwrap(); let cf = "cf"; - let engine = RocksEngine::from_db(Arc::new( - raw_util::new_engine(path.path().to_str().unwrap(), None, &[cf], None).unwrap(), - )); + let engine = util::new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, cf]).unwrap(); engine.put(b"k1", b"v1").unwrap(); engine.put_cf(cf, b"k1", b"v2").unwrap(); @@ -267,9 +258,7 @@ mod tests { fn test_scan() { let path = Builder::new().prefix("var").tempdir().unwrap(); let cf = "cf"; - let engine = RocksEngine::from_db(Arc::new( - raw_util::new_engine(path.path().to_str().unwrap(), None, &[cf], None).unwrap(), - )); + let engine = util::new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, cf]).unwrap(); engine.put(b"a1", b"v1").unwrap(); engine.put(b"a2", b"v2").unwrap(); diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index 2fcbc405056..c63edb8a117 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -42,32 +42,27 @@ impl DBFileSystemInspector for WrappedFileSystemInspecto mod tests { use std::sync::Arc; - use engine_traits::{CompactExt, CF_DEFAULT}; + use engine_traits::{CompactExt, MiscExt, SyncMutable, CF_DEFAULT}; use file_system::{IOOp, IORateLimiter, IORateLimiterStatistics, IOType}; use keys::data_key; - use rocksdb::{DBOptions, Writable, DB}; use tempfile::Builder; use super::*; use crate::{ - compat::Compat, - event_listener::RocksEventListener, - raw::{ColumnFamilyOptions, DBCompressionType}, - raw_util::{new_engine_opt, CFOptions}, + event_listener::RocksEventListener, raw::DBCompressionType, util::new_engine_opt, + RocksCfOptions, RocksDBOptions, RocksEngine, }; - fn new_test_db(dir: &str) -> (Arc, Arc) { + fn new_test_db(dir: &str) -> (RocksEngine, Arc) { let limiter = Arc::new(IORateLimiter::new_for_test()); - let mut db_opts = DBOptions::new(); + let mut db_opts = RocksDBOptions::default(); db_opts.add_event_listener(RocksEventListener::new("test_db", None)); let env = get_env(None, Some(limiter.clone())).unwrap(); db_opts.set_env(env); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_disable_auto_compactions(true); cf_opts.compression_per_level(&[DBCompressionType::No; 7]); - let db = Arc::new( - new_engine_opt(dir, db_opts, vec![CFOptions::new(CF_DEFAULT, cf_opts)]).unwrap(), - ); + let db = new_engine_opt(dir, db_opts, vec![(CF_DEFAULT, cf_opts)]).unwrap(); (db, limiter.statistics().unwrap()) } @@ -97,14 +92,13 @@ mod tests { assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); - db.c() - .compact_range( - CF_DEFAULT, None, /*start_key*/ - None, /*end_key*/ - false, /*exclusive_manual*/ - 1, /*max_subcompactions*/ - ) - .unwrap(); + db.compact_range( + CF_DEFAULT, None, /*start_key*/ + None, /*end_key*/ + false, /*exclusive_manual*/ + 1, /*max_subcompactions*/ + ) + .unwrap(); assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) > value_size * 4); assert!( stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) diff --git a/components/engine_rocks/src/import.rs b/components/engine_rocks/src/import.rs index 641e33f7bd8..79e6d6c0f49 100644 --- a/components/engine_rocks/src/import.rs +++ b/components/engine_rocks/src/import.rs @@ -62,8 +62,6 @@ impl IngestExternalFileOptions for RocksIngestExternalFileOptions { #[cfg(test)] mod tests { - use std::sync::Arc; - use engine_traits::{ FlowControlFactorsExt, MiscExt, Mutable, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, ALL_CFS, CF_DEFAULT, @@ -71,12 +69,7 @@ mod tests { use tempfile::Builder; use super::*; - use crate::{ - engine::RocksEngine, - raw::{ColumnFamilyOptions, DBOptions}, - raw_util::{new_engine_opt, CFOptions}, - RocksSstWriterBuilder, - }; + use crate::{util::new_engine_opt, RocksCfOptions, RocksDBOptions, RocksSstWriterBuilder}; #[test] fn test_ingest_multiple_file() { @@ -91,14 +84,12 @@ mod tests { let cfs_opts = ALL_CFS .iter() .map(|cf| { - let mut opt = ColumnFamilyOptions::new(); + let mut opt = RocksCfOptions::default(); opt.set_force_consistency_checks(true); - CFOptions::new(cf, opt) + (*cf, opt) }) .collect(); - let db = new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - let db = Arc::new(db); - let db = RocksEngine::from_db(db); + let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); let mut wb = db.write_batch(); for i in 1000..5000 { let v = i.to_string(); diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index b93d8cc7f36..8ec581c6e86 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -67,12 +67,8 @@ mod engine_iterator; pub use crate::engine_iterator::*; mod options; -pub mod raw_util; pub mod util; -mod compat; -pub use compat::*; - mod compact_listener; pub use compact_listener::*; diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index ad1f385654f..ff465d85dd1 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -253,7 +253,7 @@ impl MiscExt for RocksEngine { } fn exists(path: &str) -> bool { - crate::raw_util::db_exist(path) + crate::util::db_exist(path) } fn dump_stats(&self) -> Result { @@ -334,8 +334,6 @@ impl MiscExt for RocksEngine { #[cfg(test)] mod tests { - use std::sync::Arc; - use engine_traits::{ DeleteStrategy, Iterable, Iterator, Mutable, SyncMutable, WriteBatchExt, ALL_CFS, }; @@ -344,8 +342,8 @@ mod tests { use super::*; use crate::{ engine::RocksEngine, - raw::{ColumnFamilyOptions, DBOptions, DB}, - raw_util::{new_engine_opt, CFOptions}, + util::{new_engine, new_engine_opt}, + RocksCfOptions, RocksDBOptions, }; fn check_data(db: &RocksEngine, cfs: &[&str], expected: &[(&[u8], &[u8])]) { @@ -372,13 +370,7 @@ mod tests { .unwrap(); let path_str = path.path().to_str().unwrap(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| CFOptions::new(cf, ColumnFamilyOptions::new())) - .collect(); - let db = new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - let db = Arc::new(db); - let db = RocksEngine::from_db(db); + let db = new_engine(path_str, ALL_CFS).unwrap(); let mut wb = db.write_batch(); let ts: u8 = 12; @@ -523,14 +515,12 @@ mod tests { let cfs_opts = ALL_CFS .iter() .map(|cf| { - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_level_zero_file_num_compaction_trigger(1); - CFOptions::new(cf, cf_opts) + (*cf, cf_opts) }) .collect(); - let db = new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - let db = Arc::new(db); - let db = RocksEngine::from_db(db); + let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); let keys = vec![b"k1", b"k2", b"k3", b"k4"]; @@ -562,11 +552,11 @@ mod tests { .unwrap(); let path_str = path.path().to_str().unwrap(); - let mut opts = DBOptions::new(); + let mut opts = RocksDBOptions::default(); opts.create_if_missing(true); opts.enable_multi_batch_write(true); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); // Prefix extractor(trim the timestamp at tail) for write cf. cf_opts .set_prefix_extractor( @@ -577,9 +567,7 @@ mod tests { // Create prefix bloom filter for memtable. cf_opts.set_memtable_prefix_bloom_size_ratio(0.1_f64); let cf = "default"; - let db = DB::open_cf(opts, path_str, vec![(cf, cf_opts)]).unwrap(); - let db = Arc::new(db); - let db = RocksEngine::from_db(db); + let db = new_engine_opt(path_str, opts, vec![(cf, cf_opts)]).unwrap(); let mut wb = db.write_batch(); let kvs: Vec<(&[u8], &[u8])> = vec![ (b"kabcdefg1", b"v1"), diff --git a/components/engine_rocks/src/perf_context_impl.rs b/components/engine_rocks/src/perf_context_impl.rs index 152a0a12785..fe747b21a49 100644 --- a/components/engine_rocks/src/perf_context_impl.rs +++ b/components/engine_rocks/src/perf_context_impl.rs @@ -10,8 +10,8 @@ use tikv_util::time::Instant; use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS}; use crate::{ - perf_context_metrics::*, raw_util, set_perf_flags, set_perf_level, - PerfContext as RawPerfContext, PerfFlag, PerfFlags, + perf_context_metrics::*, set_perf_flags, set_perf_level, util, PerfContext as RawPerfContext, + PerfFlag, PerfFlags, }; macro_rules! report_write_perf_context { @@ -191,7 +191,7 @@ impl PerfContextStatistics { } } } else { - set_perf_level(raw_util::to_raw_perf_level(self.perf_level)); + set_perf_level(util::to_raw_perf_level(self.perf_level)); } } diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 47b48d2fc5c..1168182c58e 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -536,9 +536,7 @@ pub fn get_range_entries_and_versions( #[cfg(test)] mod tests { - use std::sync::Arc; - - use engine_traits::{CF_WRITE, LARGE_CFS}; + use engine_traits::{MiscExt, SyncMutable, CF_WRITE, LARGE_CFS}; use rand::Rng; use tempfile::Builder; use test::Bencher; @@ -546,9 +544,8 @@ mod tests { use super::*; use crate::{ - compat::Compat, - raw::{ColumnFamilyOptions, DBEntryType, DBOptions, TablePropertiesCollector, Writable}, - raw_util::CFOptions, + raw::{DBEntryType, TablePropertiesCollector}, + RocksCfOptions, RocksDBOptions, }; #[allow(clippy::many_single_char_names)] @@ -714,18 +711,15 @@ mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::new(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = RocksDBOptions::default(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_level_zero_file_num_compaction_trigger(10); cf_opts.add_table_properties_collector_factory( "tikv.mvcc-properties-collector", MvccPropertiesCollectorFactory::default(), ); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); - let db = Arc::new(crate::raw_util::new_engine_opt(path_str, db_opts, cfs_opts).unwrap()); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); + let db = crate::util::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); let cases = ["a", "b", "c"]; for &key in &cases { @@ -734,22 +728,21 @@ mod tests { .append_ts(2.into()) .as_encoded(), ); - let write_cf = db.cf_handle(CF_WRITE).unwrap(); - db.put_cf(write_cf, &k1, b"v1").unwrap(); - db.delete_cf(write_cf, &k1).unwrap(); + db.put_cf(CF_WRITE, &k1, b"v1").unwrap(); + db.delete_cf(CF_WRITE, &k1).unwrap(); let key = keys::data_key( Key::from_raw(key.as_bytes()) .append_ts(3.into()) .as_encoded(), ); - db.put_cf(write_cf, &key, b"v2").unwrap(); - db.flush_cf(write_cf, true).unwrap(); + db.put_cf(CF_WRITE, &key, b"v2").unwrap(); + db.flush_cf(CF_WRITE, true).unwrap(); } let start_keys = keys::data_key(&[]); let end_keys = keys::data_end_key(&[]); let (entries, versions) = - get_range_entries_and_versions(db.c(), CF_WRITE, &start_keys, &end_keys).unwrap(); + get_range_entries_and_versions(&db, CF_WRITE, &start_keys, &end_keys).unwrap(); assert_eq!(entries, (cases.len() * 2) as u64); assert_eq!(versions, cases.len() as u64); } diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index c51c0187b2d..1a8718588b2 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -7,14 +7,13 @@ //! crate, but only until the engine interface is completely abstracted. pub use rocksdb::{ - new_compaction_filter_raw, run_ldb_tool, run_sst_dump_tool, BlockBasedOptions, CFHandle, Cache, - ChecksumType, ColumnFamilyOptions, CompactOptions, CompactionFilter, CompactionFilterContext, + new_compaction_filter_raw, run_ldb_tool, run_sst_dump_tool, BlockBasedOptions, Cache, + ChecksumType, CompactOptions, CompactionFilter, CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, CompactionJobInfo, CompactionOptions, CompactionPriority, DBBottommostLevelCompaction, - DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBInfoLogLevel, - DBIterator, DBOptions, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, - DBTitanDBBlobRunMode, Env, EventListener, IngestExternalFileOptions, LRUCacheOptions, - MemoryAllocator, PerfContext, PrepopulateBlockCache, Range, ReadOptions, SliceTransform, - TableFilter, TablePropertiesCollector, TablePropertiesCollectorFactory, TitanBlobIndex, - TitanDBOptions, Writable, WriteOptions, DB, + DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBRateLimiterMode, + DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, Env, EventListener, + IngestExternalFileOptions, LRUCacheOptions, MemoryAllocator, PerfContext, + PrepopulateBlockCache, Range, SliceTransform, TablePropertiesCollector, + TablePropertiesCollectorFactory, }; diff --git a/components/engine_rocks/src/raw_util.rs b/components/engine_rocks/src/raw_util.rs deleted file mode 100644 index e669f007276..00000000000 --- a/components/engine_rocks/src/raw_util.rs +++ /dev/null @@ -1,336 +0,0 @@ -// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. - -//! Functions for constructing the rocksdb crate's `DB` type -//! -//! These are an artifact of refactoring the engine traits and will go away -//! eventually. Prefer to use the versions in the `util` module. - -use std::{fs, path::Path, sync::Arc}; - -use engine_traits::{Result, CF_DEFAULT}; -use rocksdb::{ - load_latest_options, CColumnFamilyDescriptor, ColumnFamilyOptions, DBOptions, Env, DB, -}; -use tikv_util::warn; - -use crate::r2e; - -pub struct CFOptions<'a> { - cf: &'a str, - options: ColumnFamilyOptions, -} - -impl<'a> CFOptions<'a> { - pub fn new(cf: &'a str, options: ColumnFamilyOptions) -> CFOptions<'a> { - CFOptions { cf, options } - } -} - -pub fn new_engine( - path: &str, - db_opts: Option, - cfs: &[&str], - opts: Option>>, -) -> Result { - let mut db_opts = match db_opts { - Some(opt) => opt, - None => DBOptions::new(), - }; - db_opts.enable_statistics(true); - let cf_opts = match opts { - Some(opts_vec) => opts_vec, - None => { - let mut default_cfs_opts = Vec::with_capacity(cfs.len()); - for cf in cfs { - default_cfs_opts.push(CFOptions::new(*cf, ColumnFamilyOptions::new())); - } - default_cfs_opts - } - }; - new_engine_opt(path, db_opts, cf_opts) -} - -/// Turns "dynamic level size" off for the existing column family which was off before. -/// Column families are small, HashMap isn't necessary. -fn adjust_dynamic_level_bytes( - cf_descs: &[CColumnFamilyDescriptor], - cf_options: &mut CFOptions<'_>, -) { - if let Some(cf_desc) = cf_descs - .iter() - .find(|cf_desc| cf_desc.name() == cf_options.cf) - { - let existed_dynamic_level_bytes = - cf_desc.options().get_level_compaction_dynamic_level_bytes(); - if existed_dynamic_level_bytes - != cf_options - .options - .get_level_compaction_dynamic_level_bytes() - { - warn!( - "change dynamic_level_bytes for existing column family is danger"; - "old_value" => existed_dynamic_level_bytes, - "new_value" => cf_options.options.get_level_compaction_dynamic_level_bytes(), - ); - } - cf_options - .options - .set_level_compaction_dynamic_level_bytes(existed_dynamic_level_bytes); - } -} - -pub fn new_engine_opt( - path: &str, - mut db_opt: DBOptions, - cfs_opts: Vec>, -) -> Result { - // Creates a new db if it doesn't exist. - if !db_exist(path) { - db_opt.create_if_missing(true); - - let mut cfs_v = vec![]; - let mut cf_opts_v = vec![]; - if let Some(x) = cfs_opts.iter().find(|x| x.cf == CF_DEFAULT) { - cfs_v.push(x.cf); - cf_opts_v.push(x.options.clone()); - } - let mut db = - DB::open_cf(db_opt, path, cfs_v.into_iter().zip(cf_opts_v).collect()).map_err(r2e)?; - for x in cfs_opts { - if x.cf == CF_DEFAULT { - continue; - } - db.create_cf((x.cf, x.options)).map_err(r2e)?; - } - - return Ok(db); - } - - db_opt.create_if_missing(false); - - // Lists all column families in current db. - let cfs_list = DB::list_column_families(&db_opt, path).map_err(r2e)?; - let existed: Vec<&str> = cfs_list.iter().map(|v| v.as_str()).collect(); - let needed: Vec<&str> = cfs_opts.iter().map(|x| x.cf).collect(); - - let cf_descs = if !existed.is_empty() { - let env = match db_opt.env() { - Some(env) => env, - None => Arc::new(Env::default()), - }; - // panic if OPTIONS not found for existing instance? - let (_, tmp) = load_latest_options(path, &env, true) - .unwrap_or_else(|e| panic!("failed to load_latest_options {:?}", e)) - .unwrap_or_else(|| panic!("couldn't find the OPTIONS file")); - tmp - } else { - vec![] - }; - - // If all column families exist, just open db. - if existed == needed { - let mut cfs_v = vec![]; - let mut cfs_opts_v = vec![]; - for mut x in cfs_opts { - adjust_dynamic_level_bytes(&cf_descs, &mut x); - cfs_v.push(x.cf); - cfs_opts_v.push(x.options); - } - - let db = - DB::open_cf(db_opt, path, cfs_v.into_iter().zip(cfs_opts_v).collect()).map_err(r2e)?; - return Ok(db); - } - - // Opens db. - let mut cfs_v: Vec<&str> = Vec::new(); - let mut cfs_opts_v: Vec = Vec::new(); - for cf in &existed { - cfs_v.push(cf); - match cfs_opts.iter().find(|x| x.cf == *cf) { - Some(x) => { - let mut tmp = CFOptions::new(x.cf, x.options.clone()); - adjust_dynamic_level_bytes(&cf_descs, &mut tmp); - cfs_opts_v.push(tmp.options); - } - None => { - cfs_opts_v.push(ColumnFamilyOptions::new()); - } - } - } - let cfds = cfs_v.into_iter().zip(cfs_opts_v).collect(); - let mut db = DB::open_cf(db_opt, path, cfds).map_err(r2e)?; - - // Drops discarded column families. - // for cf in existed.iter().filter(|x| needed.iter().find(|y| y == x).is_none()) { - for cf in cfs_diff(&existed, &needed) { - // Never drop default column families. - if cf != CF_DEFAULT { - db.drop_cf(cf).map_err(r2e)?; - } - } - - // Creates needed column families if they don't exist. - for cf in cfs_diff(&needed, &existed) { - db.create_cf(( - cf, - cfs_opts - .iter() - .find(|x| x.cf == cf) - .unwrap() - .options - .clone(), - )) - .map_err(r2e)?; - } - Ok(db) -} - -pub fn db_exist(path: &str) -> bool { - let path = Path::new(path); - if !path.exists() || !path.is_dir() { - return false; - } - let current_file_path = path.join("CURRENT"); - if !current_file_path.exists() || !current_file_path.is_file() { - return false; - } - - // If path is not an empty directory, and current file exists, we say db exists. If path is not an empty directory - // but db has not been created, `DB::list_column_families` fails and we can clean up - // the directory by this indication. - fs::read_dir(&path).unwrap().next().is_some() -} - -/// Returns a Vec of cf which is in `a' but not in `b'. -fn cfs_diff<'a>(a: &[&'a str], b: &[&str]) -> Vec<&'a str> { - a.iter() - .filter(|x| !b.iter().any(|y| *x == y)) - .cloned() - .collect() -} - -pub fn to_raw_perf_level(level: engine_traits::PerfLevel) -> rocksdb::PerfLevel { - match level { - engine_traits::PerfLevel::Uninitialized => rocksdb::PerfLevel::Uninitialized, - engine_traits::PerfLevel::Disable => rocksdb::PerfLevel::Disable, - engine_traits::PerfLevel::EnableCount => rocksdb::PerfLevel::EnableCount, - engine_traits::PerfLevel::EnableTimeExceptForMutex => { - rocksdb::PerfLevel::EnableTimeExceptForMutex - } - engine_traits::PerfLevel::EnableTimeAndCPUTimeExceptForMutex => { - rocksdb::PerfLevel::EnableTimeAndCPUTimeExceptForMutex - } - engine_traits::PerfLevel::EnableTime => rocksdb::PerfLevel::EnableTime, - engine_traits::PerfLevel::OutOfBounds => rocksdb::PerfLevel::OutOfBounds, - } -} - -pub fn from_raw_perf_level(level: rocksdb::PerfLevel) -> engine_traits::PerfLevel { - match level { - rocksdb::PerfLevel::Uninitialized => engine_traits::PerfLevel::Uninitialized, - rocksdb::PerfLevel::Disable => engine_traits::PerfLevel::Disable, - rocksdb::PerfLevel::EnableCount => engine_traits::PerfLevel::EnableCount, - rocksdb::PerfLevel::EnableTimeExceptForMutex => { - engine_traits::PerfLevel::EnableTimeExceptForMutex - } - rocksdb::PerfLevel::EnableTimeAndCPUTimeExceptForMutex => { - engine_traits::PerfLevel::EnableTimeAndCPUTimeExceptForMutex - } - rocksdb::PerfLevel::EnableTime => engine_traits::PerfLevel::EnableTime, - rocksdb::PerfLevel::OutOfBounds => engine_traits::PerfLevel::OutOfBounds, - } -} - -#[cfg(test)] -mod tests { - use engine_traits::CF_DEFAULT; - use rocksdb::{ColumnFamilyOptions, DBOptions, DB}; - use tempfile::Builder; - - use super::*; - - #[test] - fn test_cfs_diff() { - let a = vec!["1", "2", "3"]; - let a_diff_a = cfs_diff(&a, &a); - assert!(a_diff_a.is_empty()); - let b = vec!["4"]; - assert_eq!(a, cfs_diff(&a, &b)); - let c = vec!["4", "5", "3", "6"]; - assert_eq!(vec!["1", "2"], cfs_diff(&a, &c)); - assert_eq!(vec!["4", "5", "6"], cfs_diff(&c, &a)); - let d = vec!["1", "2", "3", "4"]; - let a_diff_d = cfs_diff(&a, &d); - assert!(a_diff_d.is_empty()); - assert_eq!(vec!["4"], cfs_diff(&d, &a)); - } - - #[test] - fn test_new_engine_opt() { - let path = Builder::new() - .prefix("_util_rocksdb_test_check_column_families") - .tempdir() - .unwrap(); - let path_str = path.path().to_str().unwrap(); - - // create db when db not exist - let mut cfs_opts = vec![CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new())]; - let mut opts = ColumnFamilyOptions::new(); - opts.set_level_compaction_dynamic_level_bytes(true); - cfs_opts.push(CFOptions::new("cf_dynamic_level_bytes", opts.clone())); - { - let mut db = new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes"]); - check_dynamic_level_bytes(&mut db); - } - - // add cf1. - let cfs_opts = vec![ - CFOptions::new(CF_DEFAULT, opts.clone()), - CFOptions::new("cf_dynamic_level_bytes", opts.clone()), - CFOptions::new("cf1", opts), - ]; - { - let mut db = new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes", "cf1"]); - check_dynamic_level_bytes(&mut db); - } - - // drop cf1. - let cfs_opts = vec![ - CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new()), - CFOptions::new("cf_dynamic_level_bytes", ColumnFamilyOptions::new()), - ]; - { - let mut db = new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes"]); - check_dynamic_level_bytes(&mut db); - } - - // never drop default cf - let cfs_opts = vec![]; - new_engine_opt(path_str, DBOptions::new(), cfs_opts).unwrap(); - column_families_must_eq(path_str, vec![CF_DEFAULT]); - } - - fn column_families_must_eq(path: &str, excepted: Vec<&str>) { - let opts = DBOptions::new(); - let cfs_list = DB::list_column_families(&opts, path).unwrap(); - - let mut cfs_existed: Vec<&str> = cfs_list.iter().map(|v| v.as_str()).collect(); - let mut cfs_excepted: Vec<&str> = excepted.clone(); - cfs_existed.sort_unstable(); - cfs_excepted.sort_unstable(); - assert_eq!(cfs_existed, cfs_excepted); - } - - fn check_dynamic_level_bytes(db: &mut DB) { - let cf_default = db.cf_handle(CF_DEFAULT).unwrap(); - let tmp_cf_opts = db.get_options_cf(cf_default); - assert!(!tmp_cf_opts.get_level_compaction_dynamic_level_bytes()); - let cf_test = db.cf_handle("cf_dynamic_level_bytes").unwrap(); - let tmp_cf_opts = db.get_options_cf(cf_test); - assert!(tmp_cf_opts.get_level_compaction_dynamic_level_bytes()); - } -} diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 1ce4063298e..4529b6e9d27 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -1618,8 +1618,7 @@ mod tests { #[test] fn test_flush() { let dir = Builder::new().prefix("test-flush").tempdir().unwrap(); - let engine = - crate::util::new_engine(dir.path().to_str().unwrap(), None, ALL_CFS, None).unwrap(); + let engine = crate::util::new_engine(dir.path().to_str().unwrap(), ALL_CFS).unwrap(); for tp in ENGINE_TICKER_TYPES { flush_engine_ticker_metrics(*tp, 2, "kv"); } diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 81a2ccb497a..a3b6a2bf4cf 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -1,89 +1,163 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{str::FromStr, sync::Arc}; +use std::{fs, path::Path, str::FromStr, sync::Arc}; -use engine_traits::{Engines, Error, Range, Result, CF_DEFAULT}; -use rocksdb::{CFHandle, Range as RocksRange, SliceTransform, DB}; -use tikv_util::box_err; +use engine_traits::{Engines, Range, Result, CF_DEFAULT}; +use rocksdb::{ + load_latest_options, CColumnFamilyDescriptor, CFHandle, ColumnFamilyOptions, Env, + Range as RocksRange, SliceTransform, DB, +}; +use slog_global::warn; use crate::{ - cf_options::RocksColumnFamilyOptions, - db_options::RocksDBOptions, - engine::RocksEngine, - r2e, - raw_util::{new_engine as new_engine_raw, new_engine_opt as new_engine_opt_raw, CFOptions}, + cf_options::RocksCfOptions, db_options::RocksDBOptions, engine::RocksEngine, r2e, rocks_metrics_defs::*, }; pub fn new_temp_engine(path: &tempfile::TempDir) -> Engines { let raft_path = path.path().join(std::path::Path::new("raft")); Engines::new( - new_engine( - path.path().to_str().unwrap(), - None, - engine_traits::ALL_CFS, - None, - ) - .unwrap(), - new_engine( - raft_path.to_str().unwrap(), - None, - &[engine_traits::CF_DEFAULT], - None, - ) - .unwrap(), + new_engine(path.path().to_str().unwrap(), engine_traits::ALL_CFS).unwrap(), + new_engine(raft_path.to_str().unwrap(), &[engine_traits::CF_DEFAULT]).unwrap(), ) } pub fn new_default_engine(path: &str) -> Result { - let engine = - new_engine_raw(path, None, &[CF_DEFAULT], None).map_err(|e| Error::Other(box_err!(e)))?; - let engine = Arc::new(engine); - let engine = RocksEngine::from_db(engine); - Ok(engine) + new_engine(path, &[CF_DEFAULT]) } -pub struct RocksCFOptions<'a> { - cf: &'a str, - options: RocksColumnFamilyOptions, +pub fn new_engine(path: &str, cfs: &[&str]) -> Result { + let mut db_opts = RocksDBOptions::default(); + db_opts.enable_statistics(true); + let cf_opts = cfs.iter().map(|name| (*name, Default::default())).collect(); + new_engine_opt(path, db_opts, cf_opts) } -impl<'a> RocksCFOptions<'a> { - pub fn new(cf: &'a str, options: RocksColumnFamilyOptions) -> RocksCFOptions<'a> { - RocksCFOptions { cf, options } +pub fn new_engine_opt( + path: &str, + db_opt: RocksDBOptions, + cf_opts: Vec<(&str, RocksCfOptions)>, +) -> Result { + let mut db_opt = db_opt.into_raw(); + if cf_opts.iter().all(|(name, _)| *name != CF_DEFAULT) { + return Err(engine_traits::Error::Engine( + engine_traits::Status::with_error( + engine_traits::Code::InvalidArgument, + "default cf must be specified", + ), + )); + } + let mut cf_opts: Vec<_> = cf_opts + .into_iter() + .map(|(name, opt)| (name, opt.into_raw())) + .collect(); + + // Creates a new db if it doesn't exist. + if !db_exist(path) { + db_opt.create_if_missing(true); + db_opt.create_missing_column_families(true); + + let db = DB::open_cf(db_opt, path, cf_opts.into_iter().collect()).map_err(r2e)?; + + return Ok(RocksEngine::new(db)); + } + + db_opt.create_if_missing(false); + + // Lists all column families in current db. + let cfs_list = DB::list_column_families(&db_opt, path).map_err(r2e)?; + let existed: Vec<&str> = cfs_list.iter().map(|v| v.as_str()).collect(); + let needed: Vec<&str> = cf_opts.iter().map(|(name, _)| *name).collect(); + + let cf_descs = if !existed.is_empty() { + let env = match db_opt.env() { + Some(env) => env, + None => Arc::new(Env::default()), + }; + // panic if OPTIONS not found for existing instance? + let (_, tmp) = load_latest_options(path, &env, true) + .unwrap_or_else(|e| panic!("failed to load_latest_options {:?}", e)) + .unwrap_or_else(|| panic!("couldn't find the OPTIONS file")); + tmp + } else { + vec![] + }; + + for cf in &existed { + if cf_opts.iter().all(|(name, _)| name != cf) { + cf_opts.push((cf, ColumnFamilyOptions::default())); + } + } + for (name, opt) in &mut cf_opts { + adjust_dynamic_level_bytes(&cf_descs, name, opt); } - pub fn into_raw(self) -> CFOptions<'a> { - CFOptions::new(self.cf, self.options.into_raw()) + // If all column families exist, just open db. + if existed == needed { + let db = DB::open_cf(db_opt, path, cf_opts.into_iter().collect()).map_err(r2e)?; + return Ok(RocksEngine::new(db)); } + + // Opens db. + let cfds = cf_opts.into_iter().collect(); + db_opt.create_missing_column_families(true); + let mut db = DB::open_cf(db_opt, path, cfds).map_err(r2e)?; + + // Drops discarded column families. + // for cf in existed.iter().filter(|x| needed.iter().find(|y| y == x).is_none()) { + for cf in cfs_diff(&existed, &needed) { + // Never drop default column families. + if cf != CF_DEFAULT { + db.drop_cf(cf).map_err(r2e)?; + } + } + + Ok(RocksEngine::new(db)) } -pub fn new_engine( - path: &str, - db_opts: Option, - cfs: &[&str], - opts: Option>>, -) -> Result { - let db_opts = db_opts.map(RocksDBOptions::into_raw); - let opts = opts.map(|o| o.into_iter().map(RocksCFOptions::into_raw).collect()); - let engine = new_engine_raw(path, db_opts, cfs, opts).map_err(|e| Error::Other(box_err!(e)))?; - let engine = Arc::new(engine); - let engine = RocksEngine::from_db(engine); - Ok(engine) +/// Turns "dynamic level size" off for the existing column family which was off before. +/// Column families are small, HashMap isn't necessary. +fn adjust_dynamic_level_bytes( + cf_descs: &[CColumnFamilyDescriptor], + name: &str, + opt: &mut ColumnFamilyOptions, +) { + if let Some(cf_desc) = cf_descs.iter().find(|cf_desc| cf_desc.name() == name) { + let existed_dynamic_level_bytes = + cf_desc.options().get_level_compaction_dynamic_level_bytes(); + if existed_dynamic_level_bytes != opt.get_level_compaction_dynamic_level_bytes() { + warn!( + "change dynamic_level_bytes for existing column family is danger"; + "old_value" => existed_dynamic_level_bytes, + "new_value" => opt.get_level_compaction_dynamic_level_bytes(), + ); + } + opt.set_level_compaction_dynamic_level_bytes(existed_dynamic_level_bytes); + } } -pub fn new_engine_opt( - path: &str, - db_opt: RocksDBOptions, - cfs_opts: Vec>, -) -> Result { - let db_opt = db_opt.into_raw(); - let cfs_opts = cfs_opts.into_iter().map(RocksCFOptions::into_raw).collect(); - let engine = - new_engine_opt_raw(path, db_opt, cfs_opts).map_err(|e| Error::Other(box_err!(e)))?; - let engine = Arc::new(engine); - let engine = RocksEngine::from_db(engine); - Ok(engine) +pub fn db_exist(path: &str) -> bool { + let path = Path::new(path); + if !path.exists() || !path.is_dir() { + return false; + } + let current_file_path = path.join("CURRENT"); + if !current_file_path.exists() || !current_file_path.is_file() { + return false; + } + + // If path is not an empty directory, and current file exists, we say db exists. If path is not an empty directory + // but db has not been created, `DB::list_column_families` fails and we can clean up + // the directory by this indication. + fs::read_dir(&path).unwrap().next().is_some() +} + +/// Returns a Vec of cf which is in `a' but not in `b'. +fn cfs_diff<'a>(a: &[&'a str], b: &[&str]) -> Vec<&'a str> { + a.iter() + .filter(|x| !b.iter().any(|y| *x == y)) + .cloned() + .collect() } pub fn get_cf_handle<'a>(db: &'a DB, cf: &str) -> Result<&'a CFHandle> { @@ -223,3 +297,123 @@ impl SliceTransform for NoopSliceTransform { true } } + +pub fn to_raw_perf_level(level: engine_traits::PerfLevel) -> rocksdb::PerfLevel { + match level { + engine_traits::PerfLevel::Uninitialized => rocksdb::PerfLevel::Uninitialized, + engine_traits::PerfLevel::Disable => rocksdb::PerfLevel::Disable, + engine_traits::PerfLevel::EnableCount => rocksdb::PerfLevel::EnableCount, + engine_traits::PerfLevel::EnableTimeExceptForMutex => { + rocksdb::PerfLevel::EnableTimeExceptForMutex + } + engine_traits::PerfLevel::EnableTimeAndCPUTimeExceptForMutex => { + rocksdb::PerfLevel::EnableTimeAndCPUTimeExceptForMutex + } + engine_traits::PerfLevel::EnableTime => rocksdb::PerfLevel::EnableTime, + engine_traits::PerfLevel::OutOfBounds => rocksdb::PerfLevel::OutOfBounds, + } +} + +pub fn from_raw_perf_level(level: rocksdb::PerfLevel) -> engine_traits::PerfLevel { + match level { + rocksdb::PerfLevel::Uninitialized => engine_traits::PerfLevel::Uninitialized, + rocksdb::PerfLevel::Disable => engine_traits::PerfLevel::Disable, + rocksdb::PerfLevel::EnableCount => engine_traits::PerfLevel::EnableCount, + rocksdb::PerfLevel::EnableTimeExceptForMutex => { + engine_traits::PerfLevel::EnableTimeExceptForMutex + } + rocksdb::PerfLevel::EnableTimeAndCPUTimeExceptForMutex => { + engine_traits::PerfLevel::EnableTimeAndCPUTimeExceptForMutex + } + rocksdb::PerfLevel::EnableTime => engine_traits::PerfLevel::EnableTime, + rocksdb::PerfLevel::OutOfBounds => engine_traits::PerfLevel::OutOfBounds, + } +} + +#[cfg(test)] +mod tests { + use engine_traits::{CFOptionsExt, CF_DEFAULT}; + use rocksdb::DB; + use tempfile::Builder; + + use super::*; + + #[test] + fn test_cfs_diff() { + let a = vec!["1", "2", "3"]; + let a_diff_a = cfs_diff(&a, &a); + assert!(a_diff_a.is_empty()); + let b = vec!["4"]; + assert_eq!(a, cfs_diff(&a, &b)); + let c = vec!["4", "5", "3", "6"]; + assert_eq!(vec!["1", "2"], cfs_diff(&a, &c)); + assert_eq!(vec!["4", "5", "6"], cfs_diff(&c, &a)); + let d = vec!["1", "2", "3", "4"]; + let a_diff_d = cfs_diff(&a, &d); + assert!(a_diff_d.is_empty()); + assert_eq!(vec!["4"], cfs_diff(&d, &a)); + } + + #[test] + fn test_new_engine_opt() { + let path = Builder::new() + .prefix("_util_rocksdb_test_check_column_families") + .tempdir() + .unwrap(); + let path_str = path.path().to_str().unwrap(); + + // create db when db not exist + let mut cfs_opts = vec![(CF_DEFAULT, RocksCfOptions::default())]; + let mut opts = RocksCfOptions::default(); + opts.set_level_compaction_dynamic_level_bytes(true); + cfs_opts.push(("cf_dynamic_level_bytes", opts.clone())); + let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); + column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes"]); + check_dynamic_level_bytes(&db); + drop(db); + + // add cf1. + let cfs_opts = vec![ + (CF_DEFAULT, opts.clone()), + ("cf_dynamic_level_bytes", opts.clone()), + ("cf1", opts), + ]; + let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); + column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes", "cf1"]); + check_dynamic_level_bytes(&db); + drop(db); + + // drop cf1. + let cfs = vec![CF_DEFAULT, "cf_dynamic_level_bytes"]; + let db = new_engine(path_str, &cfs).unwrap(); + column_families_must_eq(path_str, cfs); + check_dynamic_level_bytes(&db); + drop(db); + + // drop all cfs. + new_engine(path_str, &[CF_DEFAULT]).unwrap(); + column_families_must_eq(path_str, vec![CF_DEFAULT]); + + // not specifying default cf should error. + new_engine(path_str, &[]).unwrap_err(); + column_families_must_eq(path_str, vec![CF_DEFAULT]); + } + + fn column_families_must_eq(path: &str, excepted: Vec<&str>) { + let opts = RocksDBOptions::default(); + let cfs_list = DB::list_column_families(&opts, path).unwrap(); + + let mut cfs_existed: Vec<&str> = cfs_list.iter().map(|v| v.as_str()).collect(); + let mut cfs_excepted: Vec<&str> = excepted.clone(); + cfs_existed.sort_unstable(); + cfs_excepted.sort_unstable(); + assert_eq!(cfs_existed, cfs_excepted); + } + + fn check_dynamic_level_bytes(db: &RocksEngine) { + let tmp_cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); + assert!(!tmp_cf_opts.get_level_compaction_dynamic_level_bytes()); + let tmp_cf_opts = db.get_options_cf("cf_dynamic_level_bytes").unwrap(); + assert!(tmp_cf_opts.get_level_compaction_dynamic_level_bytes()); + } +} diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index f09761802e6..892dd83321c 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -219,7 +219,7 @@ impl Mutable for RocksWriteBatchVec { #[cfg(test)] mod tests { - use engine_traits::{Peekable, WriteBatch}; + use engine_traits::{Peekable, WriteBatch, CF_DEFAULT}; use rocksdb::DBOptions as RawDBOptions; use tempfile::Builder; @@ -227,6 +227,7 @@ mod tests { super::{util::new_engine_opt, RocksDBOptions}, *, }; + use crate::RocksCfOptions; #[test] fn test_should_write_to_engine_with_pipeline_write_mode() { @@ -241,7 +242,7 @@ mod tests { let engine = new_engine_opt( path.path().join("db").to_str().unwrap(), RocksDBOptions::from_raw(opt), - vec![], + vec![(CF_DEFAULT, RocksCfOptions::default())], ) .unwrap(); assert!( @@ -287,7 +288,7 @@ mod tests { let engine = new_engine_opt( path.path().join("db").to_str().unwrap(), RocksDBOptions::from_raw(opt), - vec![], + vec![(CF_DEFAULT, RocksCfOptions::default())], ) .unwrap(); assert!( diff --git a/components/engine_rocks_helper/Cargo.toml b/components/engine_rocks_helper/Cargo.toml index 74a0e8de47c..77133f09cbd 100644 --- a/components/engine_rocks_helper/Cargo.toml +++ b/components/engine_rocks_helper/Cargo.toml @@ -9,6 +9,7 @@ failpoints = ["fail/failpoints"] [dependencies] engine_rocks = { path = "../engine_rocks", default-features = false } +engine_traits = { path = "../engine_traits" } fail = "0.5" futures = "0.3" keys = { path = "../keys", default-features = false } diff --git a/components/engine_rocks_helper/src/sst_recovery.rs b/components/engine_rocks_helper/src/sst_recovery.rs index e7c1bae3a1c..bfd39e951b2 100644 --- a/components/engine_rocks_helper/src/sst_recovery.rs +++ b/components/engine_rocks_helper/src/sst_recovery.rs @@ -6,7 +6,7 @@ use std::{ time::{Duration, Instant}, }; -use engine_rocks::raw::*; +use engine_rocks::RocksEngine; use fail::fail_point; use raftstore::store::fsm::StoreMeta; use tikv_util::{self, set_panic_mark, warn, worker::*}; @@ -17,7 +17,7 @@ pub const DEFAULT_CHECK_INTERVAL: Duration = Duration::from_secs(10); const MAX_DAMAGED_FILES_NUM: usize = 2; pub struct RecoveryRunner { - db: Arc, + db: RocksEngine, store_meta: Arc>, // Considering that files will not be too much, it is enough to use `Vec`. damaged_files: Vec, @@ -68,7 +68,7 @@ impl RunnableWithTimer for RecoveryRunner { impl RecoveryRunner { pub fn new( - db: Arc, + db: RocksEngine, store_meta: Arc>, max_hang_duration: Duration, check_duration: Duration, @@ -87,7 +87,7 @@ impl RecoveryRunner { return; } - let live_files = self.db.get_live_files(); + let live_files = self.db.as_inner().get_live_files(); for i in 0..live_files.get_files_count() { if path == live_files.get_name(i as i32) { let f = FileInfo { @@ -167,6 +167,7 @@ impl RecoveryRunner { // file with the same largest key will be skipped. // Here store meta lock should be held to prevent peers from being added back. self.db + .as_inner() .delete_files_in_range(&file.smallest_key, &file.largest_key, true) .unwrap(); self.must_file_not_exist(&file.name); @@ -192,7 +193,7 @@ impl RecoveryRunner { } fn must_file_not_exist(&self, fname: &str) { - let live_files = self.db.get_live_files(); + let live_files = self.db.as_inner().get_live_files(); for i in 0..live_files.get_files_count() { if live_files.get_name(i as i32) == fname { // `delete_files_in_range` can't delete L0 files. @@ -206,7 +207,8 @@ impl RecoveryRunner { mod tests { use std::{collections::BTreeMap, sync::Arc}; - use engine_rocks::raw_util; + use engine_rocks::util; + use engine_traits::{CompactExt, SyncMutable, CF_DEFAULT}; use kvproto::metapb::{Peer, Region}; use tempfile::Builder; @@ -218,16 +220,14 @@ mod tests { .prefix("test_sst_recovery_runner") .tempdir() .unwrap(); - let db = Arc::new( - raw_util::new_engine(path.path().to_str().unwrap(), None, &["cf"], None).unwrap(), - ); + let db = util::new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, "cf"]).unwrap(); db.put(b"z2", b"val").unwrap(); db.put(b"z7", b"val").unwrap(); // generate SST file. - db.compact_range(None, None); + db.compact_range(CF_DEFAULT, None, None, false, 1).unwrap(); - let files = db.get_live_files(); + let files = db.as_inner().get_live_files(); assert_eq!(files.get_smallestkey(0), b"z2"); assert_eq!(files.get_largestkey(0), b"z7"); diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index d6633139122..20645823fd8 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -95,21 +95,16 @@ pub mod kv { }; use tikv_util::box_err; - use crate::ctor::{CFOptions, DBOptions, KvEngineConstructorExt}; + use crate::ctor::{ColumnFamilyOptions as KvTestCFOptions, DBOptions, KvEngineConstructorExt}; - pub fn new_engine( - path: &str, - db_opt: Option, - cfs: &[&str], - opts: Option>, - ) -> Result { - KvTestEngine::new_kv_engine(path, db_opt, cfs, opts) + pub fn new_engine(path: &str, cfs: &[&str]) -> Result { + KvTestEngine::new_kv_engine(path, cfs) } pub fn new_engine_opt( path: &str, db_opt: DBOptions, - cfs_opts: Vec, + cfs_opts: Vec<(&str, KvTestCFOptions)>, ) -> Result { KvTestEngine::new_kv_engine_opt(path, db_opt, cfs_opts) } @@ -119,24 +114,21 @@ pub mod kv { #[derive(Clone)] pub struct TestTabletFactory { root_path: String, - db_opt: Option, - cfs: Vec, - opts: Option>, + db_opt: DBOptions, + cf_opts: Vec<(&'static str, KvTestCFOptions)>, registry: Arc>>, } impl TestTabletFactory { pub fn new( root_path: &str, - db_opt: Option, - cfs: &[&str], - opts: Option>, + db_opt: DBOptions, + cf_opts: Vec<(&'static str, KvTestCFOptions)>, ) -> Self { Self { root_path: root_path.to_string(), db_opt, - cfs: cfs.iter().map(|s| s.to_string()).collect(), - opts, + cf_opts, registry: Arc::new(Mutex::new(HashMap::default())), } } @@ -165,13 +157,10 @@ pub mod kv { } let tablet_path = self.tablet_path(id, suffix); let tablet_path = tablet_path.to_str().unwrap(); - let mut cfs = vec![]; - self.cfs.iter().for_each(|s| cfs.push(s.as_str())); - let kv_engine = KvTestEngine::new_kv_engine( + let kv_engine = KvTestEngine::new_kv_engine_opt( tablet_path, self.db_opt.clone(), - cfs.as_slice(), - self.opts.clone(), + self.cf_opts.clone(), )?; reg.insert((id, suffix), kv_engine.clone()); Ok(kv_engine) @@ -343,12 +332,7 @@ pub mod ctor { /// /// The engine stores its data in the `path` directory. /// If that directory does not exist, then it is created. - fn new_kv_engine( - path: &str, - db_opt: Option, - cfs: &[&str], - opts: Option>, - ) -> Result; + fn new_kv_engine(path: &str, cfs: &[&str]) -> Result; /// Create a new engine with specified column families and options /// @@ -357,7 +341,7 @@ pub mod ctor { fn new_kv_engine_opt( path: &str, db_opt: DBOptions, - cfs_opts: Vec, + cf_opts: Vec<(&str, ColumnFamilyOptions)>, ) -> Result; } @@ -390,21 +374,6 @@ pub mod ctor { pub type RaftDBOptions = DBOptions; - #[derive(Clone)] - pub struct CFOptions { - pub cf: String, - pub options: ColumnFamilyOptions, - } - - impl CFOptions { - pub fn new(cf: &str, options: ColumnFamilyOptions) -> CFOptions { - CFOptions { - cf: cf.to_string(), - options, - } - } - } - /// Properties for a single column family /// /// All engines must emulate column families, but at present it is not clear @@ -501,22 +470,19 @@ pub mod ctor { use engine_panic::PanicEngine; use engine_traits::Result; - use super::{CFOptions, DBOptions, KvEngineConstructorExt, RaftEngineConstructorExt}; + use super::{ + ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftEngineConstructorExt, + }; impl KvEngineConstructorExt for engine_panic::PanicEngine { - fn new_kv_engine( - _path: &str, - _db_opt: Option, - _cfs: &[&str], - _opts: Option>, - ) -> Result { + fn new_kv_engine(_path: &str, _cfs: &[&str]) -> Result { Ok(PanicEngine) } fn new_kv_engine_opt( _path: &str, _db_opt: DBOptions, - _cfs_opts: Vec, + _cfs_opts: Vec<(&str, ColumnFamilyOptions)>, ) -> Result { Ok(PanicEngine) } @@ -533,71 +499,38 @@ pub mod ctor { use engine_rocks::{ get_env, properties::{MvccPropertiesCollectorFactory, RangePropertiesCollectorFactory}, - raw::{ - ColumnFamilyOptions as RawRocksColumnFamilyOptions, DBOptions as RawRocksDBOptions, - }, - util::{ - new_engine as rocks_new_engine, new_engine_opt as rocks_new_engine_opt, - RocksCFOptions, - }, - RocksColumnFamilyOptions, RocksDBOptions, + util::new_engine_opt as rocks_new_engine_opt, + RocksCfOptions, RocksDBOptions, }; - use engine_traits::{ColumnFamilyOptions as ColumnFamilyOptionsTrait, Result}; + use engine_traits::{ColumnFamilyOptions as ColumnFamilyOptionsTrait, Result, CF_DEFAULT}; use super::{ - CFOptions, ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftDBOptions, + ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftDBOptions, RaftEngineConstructorExt, }; impl KvEngineConstructorExt for engine_rocks::RocksEngine { - // FIXME this is duplicating behavior from engine_rocks::raw_util in order to + // FIXME this is duplicating behavior from engine_rocks::util in order to // call set_standard_cf_opts. - fn new_kv_engine( - path: &str, - db_opt: Option, - cfs: &[&str], - opts: Option>, - ) -> Result { - let rocks_db_opts = match db_opt { - Some(db_opt) => Some(get_rocks_db_opts(db_opt)?), - None => None, - }; - let cfs_opts = match opts { - Some(opts) => opts, - None => { - let mut default_cfs_opts = Vec::with_capacity(cfs.len()); - for cf in cfs { - default_cfs_opts.push(CFOptions::new(*cf, ColumnFamilyOptions::new())); - } - default_cfs_opts - } - }; - let rocks_cfs_opts = cfs_opts + fn new_kv_engine(path: &str, cfs: &[&str]) -> Result { + let rocks_db_opt = RocksDBOptions::default(); + let default_cf_opt = ColumnFamilyOptions::new(); + let rocks_cfs_opts = cfs .iter() - .map(|cf_opts| { - let mut rocks_cf_opts = RocksColumnFamilyOptions::new(); - set_standard_cf_opts(rocks_cf_opts.as_raw_mut(), &cf_opts.options); - set_cf_opts(&mut rocks_cf_opts, &cf_opts.options); - RocksCFOptions::new(&cf_opts.cf, rocks_cf_opts) - }) + .map(|cf_name| (*cf_name, get_rocks_cf_opts(&default_cf_opt))) .collect(); - rocks_new_engine(path, rocks_db_opts, &[], Some(rocks_cfs_opts)) + rocks_new_engine_opt(path, rocks_db_opt, rocks_cfs_opts) } fn new_kv_engine_opt( path: &str, db_opt: DBOptions, - cfs_opts: Vec, + cfs_opts: Vec<(&str, ColumnFamilyOptions)>, ) -> Result { let rocks_db_opts = get_rocks_db_opts(db_opt)?; let rocks_cfs_opts = cfs_opts .iter() - .map(|cf_opts| { - let mut rocks_cf_opts = RocksColumnFamilyOptions::new(); - set_standard_cf_opts(rocks_cf_opts.as_raw_mut(), &cf_opts.options); - set_cf_opts(&mut rocks_cf_opts, &cf_opts.options); - RocksCFOptions::new(&cf_opts.cf, rocks_cf_opts) - }) + .map(|(name, opt)| (*name, get_rocks_cf_opts(opt))) .collect(); rocks_new_engine_opt(path, rocks_db_opts, rocks_cfs_opts) } @@ -606,22 +539,17 @@ pub mod ctor { impl RaftEngineConstructorExt for engine_rocks::RocksEngine { fn new_raft_engine(path: &str, db_opt: Option) -> Result { let rocks_db_opts = match db_opt { - Some(db_opt) => Some(get_rocks_db_opts(db_opt)?), - None => None, + Some(db_opt) => get_rocks_db_opts(db_opt)?, + None => RocksDBOptions::default(), }; - let cf_opts = CFOptions::new(engine_traits::CF_DEFAULT, ColumnFamilyOptions::new()); - let mut rocks_cf_opts = RocksColumnFamilyOptions::new(); - set_standard_cf_opts(rocks_cf_opts.as_raw_mut(), &cf_opts.options); - set_cf_opts(&mut rocks_cf_opts, &cf_opts.options); - let default_cfs_opts = vec![RocksCFOptions::new(&cf_opts.cf, rocks_cf_opts)]; - rocks_new_engine(path, rocks_db_opts, &[], Some(default_cfs_opts)) + let rocks_cf_opts = get_rocks_cf_opts(&ColumnFamilyOptions::new()); + let default_cfs_opts = vec![(CF_DEFAULT, rocks_cf_opts)]; + rocks_new_engine_opt(path, rocks_db_opts, default_cfs_opts) } } - fn set_standard_cf_opts( - rocks_cf_opts: &mut RawRocksColumnFamilyOptions, - cf_opts: &ColumnFamilyOptions, - ) { + fn get_rocks_cf_opts(cf_opts: &ColumnFamilyOptions) -> RocksCfOptions { + let mut rocks_cf_opts = RocksCfOptions::new(); if !cf_opts.get_no_range_properties() { rocks_cf_opts.add_table_properties_collector_factory( "tikv.range-properties-collector", @@ -634,27 +562,21 @@ pub mod ctor { MvccPropertiesCollectorFactory::default(), ); } - } - fn set_cf_opts( - rocks_cf_opts: &mut RocksColumnFamilyOptions, - cf_opts: &ColumnFamilyOptions, - ) { if let Some(trigger) = cf_opts.get_level_zero_file_num_compaction_trigger() { rocks_cf_opts.set_level_zero_file_num_compaction_trigger(trigger); } if let Some(trigger) = cf_opts.get_level_zero_slowdown_writes_trigger() { - rocks_cf_opts - .as_raw_mut() - .set_level_zero_slowdown_writes_trigger(trigger); + rocks_cf_opts.set_level_zero_slowdown_writes_trigger(trigger); } if cf_opts.get_disable_auto_compactions() { rocks_cf_opts.set_disable_auto_compactions(true); } + rocks_cf_opts } fn get_rocks_db_opts(db_opts: DBOptions) -> Result { - let mut rocks_db_opts = RawRocksDBOptions::new(); + let mut rocks_db_opts = RocksDBOptions::default(); let env = get_env(db_opts.key_manager.clone(), db_opts.rate_limiter)?; rocks_db_opts.set_env(env); if db_opts.enable_multi_batch_write { @@ -662,7 +584,6 @@ pub mod ctor { rocks_db_opts.enable_pipelined_write(false); rocks_db_opts.enable_multi_batch_write(true); } - let rocks_db_opts = RocksDBOptions::from_raw(rocks_db_opts); Ok(rocks_db_opts) } } @@ -695,13 +616,7 @@ pub fn new_temp_engine( ) -> engine_traits::Engines { let raft_path = path.path().join(std::path::Path::new("raft")); engine_traits::Engines::new( - crate::kv::new_engine( - path.path().to_str().unwrap(), - None, - engine_traits::ALL_CFS, - None, - ) - .unwrap(), + crate::kv::new_engine(path.path().to_str().unwrap(), engine_traits::ALL_CFS).unwrap(), crate::raft::new_engine(raft_path.to_str().unwrap(), None).unwrap(), ) } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index c5b09fe59e1..6ba3da2b3d9 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -239,10 +239,6 @@ //! it in engine_traits and engine_rocks, replacing all the callers with calls //! into the traits, then delete the versions in the `engine` crate. //! -//! - Use the .c() method from engine_rocks::compat::Compat to get a -//! KvEngine reference from Arc in the fewest characters. It also -//! works on Snapshot, and can be adapted to other types. -//! //! - Use `IntoOther` to adapt between error types of dependencies that are not //! themselves interdependent. E.g. raft::Error can be created from //! engine_traits::Error even though neither `raft` tor `engine_traits` know diff --git a/components/engine_traits_tests/src/basic_read_write.rs b/components/engine_traits_tests/src/basic_read_write.rs index d5104ba57e3..38a1921dd85 100644 --- a/components/engine_traits_tests/src/basic_read_write.rs +++ b/components/engine_traits_tests/src/basic_read_write.rs @@ -2,7 +2,7 @@ //! Reading and writing -use engine_traits::{Peekable, SyncMutable, ALL_CFS, CF_DEFAULT, CF_WRITE}; +use engine_traits::{Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; use super::engine_cfs; @@ -17,16 +17,3 @@ fn non_cf_methods_are_default_cf() { let value = value.expect("value"); assert_eq!(b"bar", &*value); } - -// CF_DEFAULT always exists -#[test] -fn non_cf_methods_implicit_default_cf() { - let db = engine_cfs(&[CF_WRITE]); - db.engine.put(b"foo", b"bar").unwrap(); - let value = db.engine.get_value(b"foo").unwrap(); - let value = value.expect("value"); - assert_eq!(b"bar", &*value); - let value = db.engine.get_value_cf(CF_DEFAULT, b"foo").unwrap(); - let value = value.expect("value"); - assert_eq!(b"bar", &*value); -} diff --git a/components/engine_traits_tests/src/cf_names.rs b/components/engine_traits_tests/src/cf_names.rs index 187df39a081..48031275b14 100644 --- a/components/engine_traits_tests/src/cf_names.rs +++ b/components/engine_traits_tests/src/cf_names.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CFNamesExt, KvEngine, Snapshot, ALL_CFS, CF_DEFAULT, CF_WRITE}; +use engine_traits::{CFNamesExt, KvEngine, Snapshot, ALL_CFS, CF_DEFAULT}; use super::{default_engine, engine_cfs}; @@ -22,14 +22,6 @@ fn cf_names() { } } -#[test] -fn implicit_default_cf() { - let db = engine_cfs(&[CF_WRITE]); - let names = db.engine.cf_names(); - assert_eq!(names.len(), 2); - assert!(names.contains(&CF_DEFAULT)); -} - #[test] fn default_names_snapshot() { let db = default_engine(); @@ -49,12 +41,3 @@ fn cf_names_snapshot() { assert!(names.contains(cf)); } } - -#[test] -fn implicit_default_cf_snapshot() { - let db = engine_cfs(&[CF_WRITE]); - let snapshot = db.engine.snapshot(); - let names = snapshot.cf_names(); - assert_eq!(names.len(), 2); - assert!(names.contains(&CF_DEFAULT)); -} diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index b3338a46367..5f39ad4f3a7 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -5,7 +5,7 @@ use std::fs; use engine_test::{ - ctor::{CFOptions, ColumnFamilyOptions, DBOptions, KvEngineConstructorExt}, + ctor::{ColumnFamilyOptions, DBOptions, KvEngineConstructorExt}, kv::KvTestEngine, }; use engine_traits::{KvEngine, SyncMutable, ALL_CFS}; @@ -16,7 +16,7 @@ use super::tempdir; fn new_engine_basic() { let dir = tempdir(); let path = dir.path().to_str().unwrap(); - let _db = KvTestEngine::new_kv_engine(path, None, ALL_CFS, None).unwrap(); + let _db = KvTestEngine::new_kv_engine(path, ALL_CFS).unwrap(); } #[test] @@ -26,7 +26,7 @@ fn new_engine_opt_basic() { let db_opts = DBOptions::default(); let cf_opts = ALL_CFS .iter() - .map(|cf| CFOptions::new(cf, ColumnFamilyOptions::new())) + .map(|cf| (*cf, ColumnFamilyOptions::new())) .collect(); let _db = KvTestEngine::new_kv_engine_opt(path, db_opts, cf_opts).unwrap(); } @@ -37,7 +37,7 @@ fn new_engine_missing_dir() { let dir = tempdir(); let path = dir.path(); let path = path.join("missing").to_str().unwrap().to_owned(); - let db = KvTestEngine::new_kv_engine(&path, None, ALL_CFS, None).unwrap(); + let db = KvTestEngine::new_kv_engine(&path, ALL_CFS).unwrap(); db.put(b"foo", b"bar").unwrap(); db.sync().unwrap(); } @@ -50,7 +50,7 @@ fn new_engine_opt_missing_dir() { let db_opts = DBOptions::default(); let cf_opts = ALL_CFS .iter() - .map(|cf| CFOptions::new(cf, ColumnFamilyOptions::new())) + .map(|cf| (*cf, ColumnFamilyOptions::new())) .collect(); let db = KvTestEngine::new_kv_engine_opt(&path, db_opts, cf_opts).unwrap(); db.put(b"foo", b"bar").unwrap(); @@ -71,7 +71,7 @@ fn new_engine_readonly_dir() { fs::set_permissions(&path, perms).unwrap(); let path = path.to_str().unwrap(); - let err = KvTestEngine::new_kv_engine(path, None, ALL_CFS, None); + let err = KvTestEngine::new_kv_engine(path, ALL_CFS); assert!(err.is_err()); } @@ -93,7 +93,7 @@ fn new_engine_opt_readonly_dir() { let db_opts = DBOptions::default(); let cf_opts = ALL_CFS .iter() - .map(|cf| CFOptions::new(cf, ColumnFamilyOptions::new())) + .map(|cf| (*cf, ColumnFamilyOptions::new())) .collect(); let err = KvTestEngine::new_kv_engine_opt(path, db_opts, cf_opts); diff --git a/components/engine_traits_tests/src/lib.rs b/components/engine_traits_tests/src/lib.rs index 0ddb39c61ac..73c741ff925 100644 --- a/components/engine_traits_tests/src/lib.rs +++ b/components/engine_traits_tests/src/lib.rs @@ -64,7 +64,7 @@ fn default_engine() -> TempDirEnginePair { let dir = tempdir(); let path = dir.path().to_str().unwrap(); - let engine = KvTestEngine::new_kv_engine(path, None, &[CF_DEFAULT], None).unwrap(); + let engine = KvTestEngine::new_kv_engine(path, &[CF_DEFAULT]).unwrap(); TempDirEnginePair { engine, tempdir: dir, @@ -74,7 +74,10 @@ fn default_engine() -> TempDirEnginePair { /// Create a multi batch write engine with only CF_DEFAULT fn multi_batch_write_engine() -> TempDirEnginePair { use engine_test::{ - ctor::{DBOptions as KvTestDBOptions, KvEngineConstructorExt}, + ctor::{ + ColumnFamilyOptions as KvTestCFOptions, DBOptions as KvTestDBOptions, + KvEngineConstructorExt, + }, kv::KvTestEngine, }; use engine_traits::CF_DEFAULT; @@ -83,7 +86,9 @@ fn multi_batch_write_engine() -> TempDirEnginePair { let path = dir.path().to_str().unwrap(); let mut opt = KvTestDBOptions::default(); opt.set_enable_multi_batch_write(true); - let engine = KvTestEngine::new_kv_engine(path, Some(opt), &[CF_DEFAULT], None).unwrap(); + let engine = + KvTestEngine::new_kv_engine_opt(path, opt, vec![(CF_DEFAULT, KvTestCFOptions::new())]) + .unwrap(); TempDirEnginePair { engine, tempdir: dir, @@ -96,7 +101,7 @@ fn engine_cfs(cfs: &[&str]) -> TempDirEnginePair { let dir = tempdir(); let path = dir.path().to_str().unwrap(); - let engine = KvTestEngine::new_kv_engine(path, None, cfs, None).unwrap(); + let engine = KvTestEngine::new_kv_engine(path, cfs).unwrap(); TempDirEnginePair { engine, tempdir: dir, diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index f6d207df875..57472b5cecf 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -125,7 +125,7 @@ pub fn get_region_approximate_middle( mod tests { use std::{iter, sync::mpsc}; - use engine_test::ctor::{CFOptions, ColumnFamilyOptions, DBOptions}; + use engine_test::ctor::{ColumnFamilyOptions, DBOptions}; use engine_traits::{MiscExt, SyncMutable, ALL_CFS, CF_DEFAULT, LARGE_CFS}; use kvproto::{ metapb::{Peer, Region}, @@ -148,15 +148,7 @@ mod tests { fn test_split_check() { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| { - let cf_opts = ColumnFamilyOptions::new(); - CFOptions::new(cf, cf_opts) - }) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -201,15 +193,7 @@ mod tests { fn test_split_check_with_key_range() { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| { - let cf_opts = ColumnFamilyOptions::new(); - CFOptions::new(cf, cf_opts) - }) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -273,15 +257,7 @@ mod tests { fn test_generate_region_bucket_impl(mvcc: bool) { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| { - let cf_opts = ColumnFamilyOptions::new(); - CFOptions::new(cf, cf_opts) - }) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -405,15 +381,7 @@ mod tests { fn test_generate_region_bucket_with_deleting_data() { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| { - let cf_opts = ColumnFamilyOptions::new(); - CFOptions::new(cf, cf_opts) - }) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -520,10 +488,7 @@ mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let engine = engine_test::kv::new_engine_opt(path, db_opts, cfs_opts).unwrap(); let mut big_value = Vec::with_capacity(256); diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 22a81e54f31..8c0d7aad86c 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -230,7 +230,7 @@ pub fn get_region_approximate_keys( mod tests { use std::{cmp, sync::mpsc, u64}; - use engine_test::ctor::{CFOptions, ColumnFamilyOptions, DBOptions}; + use engine_test::ctor::{ColumnFamilyOptions, DBOptions}; use engine_traits::{KvEngine, MiscExt, SyncMutable, ALL_CFS, CF_DEFAULT, CF_WRITE, LARGE_CFS}; use kvproto::{ metapb::{Peer, Region}, @@ -290,13 +290,7 @@ mod tests { fn test_split_check() { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cf_opts = ColumnFamilyOptions::new(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -400,13 +394,7 @@ mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cf_opts = ColumnFamilyOptions::new(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -466,10 +454,7 @@ mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); let cases = [("a", 1024), ("b", 2048), ("c", 4096)]; @@ -575,13 +560,7 @@ mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cf_opts = ColumnFamilyOptions::new(); - let cfs_opts = ALL_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); - let engine = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); + let engine = engine_test::kv::new_engine(path_str, ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -655,10 +634,7 @@ mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); // size >= 4194304 will insert a new point in range properties diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 30198cd2337..352e956d43e 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -259,7 +259,7 @@ pub mod tests { use collections::HashSet; use engine_test::{ - ctor::{CFOptions, ColumnFamilyOptions, DBOptions}, + ctor::{ColumnFamilyOptions, DBOptions}, kv::KvTestEngine, }; use engine_traits::{ @@ -446,9 +446,9 @@ pub mod tests { .iter() .map(|cf| { if cfs_with_range_prop.contains(cf) { - CFOptions::new(cf, ColumnFamilyOptions::new()) + (*cf, ColumnFamilyOptions::new()) } else { - CFOptions::new(cf, cf_opt.clone()) + (*cf, cf_opt.clone()) } }) .collect(); @@ -576,9 +576,9 @@ pub mod tests { if cfs_with_range_prop.contains(cf) { let mut opt = ColumnFamilyOptions::new(); opt.set_disable_auto_compactions(true); - CFOptions::new(cf, opt) + (*cf, opt) } else { - CFOptions::new(cf, cf_opt.clone()) + (*cf, cf_opt.clone()) } }) .collect(); @@ -713,9 +713,9 @@ pub mod tests { if cfs_with_range_prop.contains(cf) { let mut opt = ColumnFamilyOptions::new(); opt.set_disable_auto_compactions(true); - CFOptions::new(cf, opt) + (*cf, opt) } else { - CFOptions::new(cf, cf_opt.clone()) + (*cf, cf_opt.clone()) } }) .collect(); @@ -768,9 +768,9 @@ pub mod tests { .iter() .map(|cf| { if cf != &CF_LOCK { - CFOptions::new(cf, ColumnFamilyOptions::new()) + (*cf, ColumnFamilyOptions::new()) } else { - CFOptions::new(cf, cf_opt.clone()) + (*cf, cf_opt.clone()) } }) .collect(); @@ -829,7 +829,7 @@ pub mod tests { .map(|cf| { let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_no_range_properties(true); - CFOptions::new(cf, cf_opts) + (*cf, cf_opts) }) .collect(); let engine = @@ -911,10 +911,7 @@ pub mod tests { cf_opts.set_level_zero_file_num_compaction_trigger(10); cf_opts.set_no_range_properties(true); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let engine = engine_test::kv::new_engine_opt(path, db_opts, cfs_opts).unwrap(); let region = make_region(1, vec![], vec![]); @@ -947,10 +944,7 @@ pub mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let engine = engine_test::kv::new_engine_opt(path, db_opts, cfs_opts).unwrap(); let mut big_value = Vec::with_capacity(256); @@ -1062,10 +1056,7 @@ pub mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); let cases = [("a", 1024), ("b", 2048), ("c", 4096)]; @@ -1095,10 +1086,7 @@ pub mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_disable_auto_compactions(true); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); let mut cf_size = 0; @@ -1133,10 +1121,7 @@ pub mod tests { let db_opts = DBOptions::default(); let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_disable_auto_compactions(true); - let cfs_opts = LARGE_CFS - .iter() - .map(|cf| CFOptions::new(cf, cf_opts.clone())) - .collect(); + let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); let mut cf_size = 0; diff --git a/components/raftstore/src/coprocessor/split_check/table.rs b/components/raftstore/src/coprocessor/split_check/table.rs index e377d4b550a..df2fa0fb7c6 100644 --- a/components/raftstore/src/coprocessor/split_check/table.rs +++ b/components/raftstore/src/coprocessor/split_check/table.rs @@ -256,7 +256,7 @@ mod tests { .prefix("test_last_key_of_region") .tempdir() .unwrap(); - let engine = new_engine(path.path().to_str().unwrap(), None, ALL_CFS, None).unwrap(); + let engine = new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); @@ -309,7 +309,7 @@ mod tests { .prefix("test_table_check_observer") .tempdir() .unwrap(); - let engine = new_engine(path.path().to_str().unwrap(), None, ALL_CFS, None).unwrap(); + let engine = new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let mut region = Region::default(); region.set_id(1); diff --git a/components/raftstore/src/store/bootstrap.rs b/components/raftstore/src/store/bootstrap.rs index 561425d9d00..e1c90a177c7 100644 --- a/components/raftstore/src/store/bootstrap.rs +++ b/components/raftstore/src/store/bootstrap.rs @@ -136,13 +136,9 @@ mod tests { fn test_bootstrap() { let path = Builder::new().prefix("var").tempdir().unwrap(); let raft_path = path.path().join("raft"); - let kv_engine = engine_test::kv::new_engine( - path.path().to_str().unwrap(), - None, - &[CF_DEFAULT, CF_RAFT], - None, - ) - .unwrap(); + let kv_engine = + engine_test::kv::new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, CF_RAFT]) + .unwrap(); let raft_engine = engine_test::raft::new_engine(raft_path.to_str().unwrap(), None).unwrap(); let engines = Engines::new(kv_engine.clone(), raft_engine.clone()); let region = initial_region(1, 1, 1); diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index e7a59631ca1..4fb4c7feb7a 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -195,12 +195,12 @@ impl SstPartitioner for CompactionGuardGenerator

{ #[cfg(test)] mod tests { - use std::{str, sync::Arc}; + use std::str; use engine_rocks::{ - raw::{BlockBasedOptions, ColumnFamilyOptions, DBCompressionType, DBOptions}, - raw_util::{new_engine_opt, CFOptions}, - RocksEngine, RocksSstPartitionerFactory, RocksSstReader, + raw::{BlockBasedOptions, DBCompressionType}, + util::new_engine_opt, + RocksCfOptions, RocksDBOptions, RocksEngine, RocksSstPartitionerFactory, RocksSstReader, }; use engine_traits::{CompactExt, Iterator, MiscExt, SstReader, SyncMutable, CF_DEFAULT}; use keys::DATA_PREFIX_KEY; @@ -367,7 +367,7 @@ mod tests { fn new_test_db(provider: MockRegionInfoProvider) -> (RocksEngine, TempDir) { let temp_dir = TempDir::new().unwrap(); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_target_file_size_base(MAX_OUTPUT_FILE_SIZE); cf_opts.set_sst_partitioner_factory(RocksSstPartitionerFactory( CompactionGuardGeneratorFactory::new(CF_DEFAULT, provider, MIN_OUTPUT_FILE_SIZE) @@ -389,14 +389,12 @@ mod tests { block_based_opts.set_block_size(100); cf_opts.set_block_based_table_factory(&block_based_opts); - let db = RocksEngine::from_db(Arc::new( - new_engine_opt( - temp_dir.path().to_str().unwrap(), - DBOptions::new(), - vec![CFOptions::new(CF_DEFAULT, cf_opts)], - ) - .unwrap(), - )); + let db = new_engine_opt( + temp_dir.path().to_str().unwrap(), + RocksDBOptions::default(), + vec![(CF_DEFAULT, cf_opts)], + ) + .unwrap(); (db, temp_dir) } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 98d12303b19..ab73c0bc8c6 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -4370,13 +4370,7 @@ mod tests { pub fn create_tmp_engine(path: &str) -> (TempDir, KvTestEngine) { let path = Builder::new().prefix(path).tempdir().unwrap(); - let engine = new_engine( - path.path().join("db").to_str().unwrap(), - None, - ALL_CFS, - None, - ) - .unwrap(); + let engine = new_engine(path.path().join("db").to_str().unwrap(), ALL_CFS).unwrap(); (path, engine) } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 8301c75e7c3..76bb95b0d39 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1285,8 +1285,7 @@ pub mod tests { raftlog_fetch_scheduler: Scheduler, path: &TempDir, ) -> PeerStorage { - let kv_db = engine_test::kv::new_engine(path.path().to_str().unwrap(), None, ALL_CFS, None) - .unwrap(); + let kv_db = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let raft_path = path.path().join(Path::new("raft")); let raft_db = engine_test::raft::new_engine(raft_path.to_str().unwrap(), None).unwrap(); let engines = Engines::new(kv_db, raft_db); @@ -2087,8 +2086,7 @@ pub mod tests { let region_sched = region_worker.scheduler(); let raftlog_fetch_worker = LazyWorker::new("raftlog-fetch-worker"); let raftlog_fetch_sched = raftlog_fetch_worker.scheduler(); - let kv_db = - engine_test::kv::new_engine(td.path().to_str().unwrap(), None, ALL_CFS, None).unwrap(); + let kv_db = engine_test::kv::new_engine(td.path().to_str().unwrap(), ALL_CFS).unwrap(); let raft_path = td.path().join(Path::new("raft")); let raft_db = engine_test::raft::new_engine(raft_path.to_str().unwrap(), None).unwrap(); let engines = Engines::new(kv_db, raft_db); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index cca1dfbda77..6a8aa5ca3bf 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1883,7 +1883,7 @@ pub mod tests { use encryption::{DataKeyManager, EncryptionConfig, FileConfig, MasterKeyConfig}; use encryption_export::data_key_manager_from_config; use engine_test::{ - ctor::{CFOptions, ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftDBOptions}, + ctor::{ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftDBOptions}, kv::KvTestEngine, raft::RaftTestEngine, }; @@ -1917,32 +1917,41 @@ pub mod tests { const TEST_META_FILE_BUFFER_SIZE: usize = 1000; const BYTE_SIZE: usize = 1; - type DBBuilder = - fn(p: &Path, db_opt: Option, cf_opts: Option>) -> Result; + type DBBuilder = fn( + p: &Path, + db_opt: Option, + cf_opts: Option>, + ) -> Result; pub fn open_test_empty_db( path: &Path, db_opt: Option, - cf_opts: Option>, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, { let p = path.to_str().unwrap(); - let db = E::new_kv_engine(p, db_opt, ALL_CFS, cf_opts).unwrap(); + let db_opt = db_opt.unwrap_or_default(); + let cf_opts = cf_opts.unwrap_or_else(|| { + ALL_CFS + .iter() + .map(|cf| (*cf, ColumnFamilyOptions::default())) + .collect() + }); + let db = E::new_kv_engine_opt(p, db_opt, cf_opts).unwrap(); Ok(db) } pub fn open_test_db( path: &Path, db_opt: Option, - cf_opts: Option>, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, { - let p = path.to_str().unwrap(); - let db = E::new_kv_engine(p, db_opt, ALL_CFS, cf_opts).unwrap(); + let db = open_test_empty_db::(path, db_opt, cf_opts).unwrap(); let key = keys::data_key(TEST_KEY); // write some data into each cf for (i, cf) in db.cf_names().into_iter().enumerate() { @@ -1957,13 +1966,12 @@ pub mod tests { pub fn open_test_db_with_100keys( path: &Path, db_opt: Option, - cf_opts: Option>, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, { - let p = path.to_str().unwrap(); - let db = E::new_kv_engine(p, db_opt, ALL_CFS, cf_opts).unwrap(); + let db = open_test_empty_db::(path, db_opt, cf_opts).unwrap(); // write some data into each cf for (i, cf) in db.cf_names().into_iter().enumerate() { let mut p = Peer::default(); @@ -1981,7 +1989,7 @@ pub mod tests { path: &TempDir, raft_db_opt: Option, kv_db_opt: Option, - kv_cf_opts: Option>, + kv_cf_opts: Option>, regions: &[u64], ) -> Result> { let p = path.path(); @@ -2267,7 +2275,7 @@ pub mod tests { let dst_db_path = dst_db_dir.path().to_str().unwrap(); // Change arbitrarily the cf order of ALL_CFS at destination db. let dst_cfs = [CF_WRITE, CF_DEFAULT, CF_LOCK, CF_RAFT]; - let dst_db = engine_test::kv::new_engine(dst_db_path, None, &dst_cfs, None).unwrap(); + let dst_db = engine_test::kv::new_engine(dst_db_path, &dst_cfs).unwrap(); let options = ApplyOptions { db: dst_db.clone(), region, @@ -2816,7 +2824,7 @@ pub mod tests { let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_no_range_properties(true); cf_opts.set_no_table_properties(true); - CFOptions::new(cf, cf_opts) + (*cf, cf_opts) }) .collect(); let engine = diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index afa4d609da1..88222623084 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -247,7 +247,7 @@ mod tests { use std::{thread::sleep, time::Duration}; use engine_test::{ - ctor::{CFOptions, ColumnFamilyOptions, DBOptions}, + ctor::{ColumnFamilyOptions, DBOptions}, kv::{new_engine, new_engine_opt, KvTestEngine}, }; use engine_traits::{ @@ -266,7 +266,7 @@ mod tests { .prefix("compact-range-test") .tempdir() .unwrap(); - let db = new_engine(path.path().to_str().unwrap(), None, &[CF_DEFAULT], None).unwrap(); + let db = new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT]).unwrap(); let mut runner = Runner::new(db.clone()); @@ -323,10 +323,10 @@ mod tests { let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(8); let cfs_opts = vec![ - CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new()), - CFOptions::new(CF_RAFT, ColumnFamilyOptions::new()), - CFOptions::new(CF_LOCK, ColumnFamilyOptions::new()), - CFOptions::new(CF_WRITE, cf_opts), + (CF_DEFAULT, ColumnFamilyOptions::new()), + (CF_RAFT, ColumnFamilyOptions::new()), + (CF_LOCK, ColumnFamilyOptions::new()), + (CF_WRITE, cf_opts), ]; new_engine_opt(path, db_opts, cfs_opts).unwrap() } diff --git a/components/raftstore/src/store/worker/consistency_check.rs b/components/raftstore/src/store/worker/consistency_check.rs index dfd2b527168..154f1816dbf 100644 --- a/components/raftstore/src/store/worker/consistency_check.rs +++ b/components/raftstore/src/store/worker/consistency_check.rs @@ -141,13 +141,7 @@ mod tests { #[test] fn test_consistency_check() { let path = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); - let db = new_engine( - path.path().to_str().unwrap(), - None, - &[CF_DEFAULT, CF_RAFT], - None, - ) - .unwrap(); + let db = new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, CF_RAFT]).unwrap(); let mut region = Region::default(); region.mut_peers().push(Peer::default()); diff --git a/components/raftstore/src/store/worker/raftlog_gc.rs b/components/raftstore/src/store/worker/raftlog_gc.rs index 71584a5e678..bf7debfb1d9 100644 --- a/components/raftstore/src/store/worker/raftlog_gc.rs +++ b/components/raftstore/src/store/worker/raftlog_gc.rs @@ -214,8 +214,7 @@ mod tests { let path_raft = dir.path().join("raft"); let path_kv = dir.path().join("kv"); let raft_db = engine_test::raft::new_engine(path_kv.to_str().unwrap(), None).unwrap(); - let kv_db = - engine_test::kv::new_engine(path_raft.to_str().unwrap(), None, ALL_CFS, None).unwrap(); + let kv_db = engine_test::kv::new_engine(path_raft.to_str().unwrap(), ALL_CFS).unwrap(); let engines = Engines::new(kv_db, raft_db.clone()); let (tx, rx) = mpsc::channel(); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 81358c989e0..1be9cf8b4e9 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -981,8 +981,7 @@ mod tests { Receiver>, ) { let path = Builder::new().prefix(path).tempdir().unwrap(); - let db = engine_test::kv::new_engine(path.path().to_str().unwrap(), None, ALL_CFS, None) - .unwrap(); + let db = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let (ch, rx, _) = MockRouter::new(); let mut reader = LocalReader::new(db, store_meta, ch); reader.store_id = Cell::new(Some(store_id)); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 4bc5cc032a3..cdd0ee5556b 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -813,12 +813,12 @@ mod tests { }; use engine_test::{ - ctor::{CFOptions, ColumnFamilyOptions}, + ctor::ColumnFamilyOptions, kv::{KvTestEngine, KvTestSnapshot}, }; use engine_traits::{ CompactExt, FlowControlFactorsExt, KvEngine, MiscExt, Mutable, Peekable, - RaftEngineReadOnly, SyncMutable, WriteBatch, WriteBatchExt, CF_DEFAULT, + RaftEngineReadOnly, SyncMutable, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_WRITE, }; use keys::data_key; use kvproto::raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}; @@ -986,10 +986,10 @@ mod tests { cf_opts.set_level_zero_slowdown_writes_trigger(5); cf_opts.set_disable_auto_compactions(true); let kv_cfs_opts = vec![ - CFOptions::new("default", cf_opts.clone()), - CFOptions::new("write", cf_opts.clone()), - CFOptions::new("lock", cf_opts.clone()), - CFOptions::new("raft", cf_opts.clone()), + (CF_DEFAULT, cf_opts.clone()), + (CF_WRITE, cf_opts.clone()), + (CF_LOCK, cf_opts.clone()), + (CF_RAFT, cf_opts.clone()), ]; let engine = get_test_db_for_regions( &temp_dir, diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index d011f9be93f..bf06ecefcea 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -214,7 +214,6 @@ fn run_dump_raft_engine_worker( #[cfg(test)] mod tests { - use engine_rocks::raw::DBOptions; use tikv::config::TiKvConfig; use super::*; @@ -245,13 +244,12 @@ mod tests { { // Prepare some data for the RocksEngine. - let raftdb = engine_rocks::raw_util::new_engine_opt( + let raftdb = engine_rocks::util::new_engine_opt( &cfg.raft_store.raftdb_path, cfg.raftdb.build_opt(), cfg.raftdb.build_cf_opts(&None), ) .unwrap(); - let raftdb = RocksEngine::from_db(Arc::new(raftdb)); let mut batch = raftdb.log_batch(0); set_write_batch(1, &mut batch); raftdb.consume(&mut batch, false).unwrap(); @@ -271,15 +269,8 @@ mod tests { std::fs::remove_dir_all(&cfg.raft_store.raftdb_path).unwrap(); // Dump logs from RaftLogEngine to RocksEngine. - let raftdb = { - let db = engine_rocks::raw_util::new_engine_opt( - &cfg.raft_store.raftdb_path, - DBOptions::new(), - vec![], - ) - .unwrap(); - RocksEngine::from_db(Arc::new(db)) - }; + let raftdb = + engine_rocks::util::new_engine(&cfg.raft_store.raftdb_path, &[CF_DEFAULT]).unwrap(); dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 4); assert(1, &raftdb); assert(5, &raftdb); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 58a4dc61338..ad788f2ecec 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -649,7 +649,7 @@ impl TiKvServer { if let Some(sst_worker) = &mut self.sst_worker { let sst_runner = RecoveryRunner::new( - engines.engines.kv.get_sync_db(), + engines.engines.kv.clone(), engines.store_meta.clone(), self.config.storage.background_error_recovery_window.into(), DEFAULT_CHECK_INTERVAL, @@ -1186,7 +1186,7 @@ impl TiKvServer { servers.node.id(), engines.engine.clone(), self.region_info_accessor.clone(), - engines.engines.kv.as_inner().clone(), + engines.engines.kv.clone(), self.config.backup.clone(), self.concurrency_manager.clone(), self.config.storage.api_version(), @@ -1551,10 +1551,9 @@ impl ConfiguredRaftEngine for RocksEngine { let mut raft_db_opts = config_raftdb.build_opt(); raft_db_opts.set_env(env.clone()); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let raftdb = - engine_rocks::raw_util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) + let mut raftdb = + engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) .expect("failed to open raftdb"); - let mut raftdb = RocksEngine::from_db(Arc::new(raftdb)); raftdb.set_shared_block_cache(block_cache.is_some()); if should_dump { @@ -1609,13 +1608,12 @@ impl ConfiguredRaftEngine for RaftLogEngine { let mut raft_db_opts = config_raftdb.build_opt(); raft_db_opts.set_env(env.clone()); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let raftdb = engine_rocks::raw_util::new_engine_opt( + let raftdb = engine_rocks::util::new_engine_opt( &config.raft_store.raftdb_path, raft_db_opts, raft_cf_opts, ) .expect("failed to open raftdb for migration"); - let raftdb = RocksEngine::from_db(Arc::new(raftdb)); dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /*threads*/); raftdb.stop(); drop(raftdb); diff --git a/components/sst_importer/src/import_mode.rs b/components/sst_importer/src/import_mode.rs index 3123ed66da5..39dca3bea02 100644 --- a/components/sst_importer/src/import_mode.rs +++ b/components/sst_importer/src/import_mode.rs @@ -242,7 +242,7 @@ impl ImportModeCFOptions { mod tests { use std::thread; - use engine_traits::KvEngine; + use engine_traits::{KvEngine, CF_DEFAULT}; use futures::executor::ThreadPoolBuilder; use tempfile::Builder; use test_sst_importer::{new_test_engine, new_test_engine_with_options}; @@ -290,7 +290,7 @@ mod tests { .prefix("test_import_mode_switcher") .tempdir() .unwrap(); - let db = new_test_engine(temp_dir.path().to_str().unwrap(), &["a", "b"]); + let db = new_test_engine(temp_dir.path().to_str().unwrap(), &[CF_DEFAULT, "a", "b"]); let normal_db_options = ImportModeDBOptions::new_options(&db); let import_db_options = normal_db_options.optimized_for_import_mode(); @@ -331,7 +331,7 @@ mod tests { .prefix("test_import_mode_timeout") .tempdir() .unwrap(); - let db = new_test_engine(temp_dir.path().to_str().unwrap(), &["a", "b"]); + let db = new_test_engine(temp_dir.path().to_str().unwrap(), &[CF_DEFAULT, "a", "b"]); let normal_db_options = ImportModeDBOptions::new_options(&db); let import_db_options = normal_db_options.optimized_for_import_mode(); diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index a3a71ba8144..042b430b811 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -69,13 +69,12 @@ mod tests { use encryption::DataKeyManager; use engine_rocks::{ - util::{new_engine, RocksCFOptions}, - RocksColumnFamilyOptions, RocksDBOptions, RocksEngine, RocksSstWriterBuilder, + util::new_engine_opt, RocksCfOptions, RocksDBOptions, RocksEngine, RocksSstWriterBuilder, RocksTitanDBOptions, }; use engine_traits::{ CfName, ColumnFamilyOptions, DBOptions, EncryptionKeyManager, ImportExt, Peekable, - SstWriter, SstWriterBuilder, TitanDBOptions, + SstWriter, SstWriterBuilder, TitanDBOptions, CF_DEFAULT, }; use tempfile::Builder; use test_util::encryption::new_test_key_manager; @@ -116,7 +115,7 @@ mod tests { fn check_prepare_sst_for_ingestion( db_opts: Option, - cf_opts: Option>>, + cf_opts: Option>, key_manager: Option<&DataKeyManager>, was_encrypted: bool, ) { @@ -135,10 +134,11 @@ mod tests { let kvs = [("k1", "v1"), ("k2", "v2"), ("k3", "v3")]; - let cf_name = "default"; - let db = new_engine(path_str, db_opts, &[cf_name], cf_opts).unwrap(); + let db_opts = db_opts.unwrap_or_default(); + let cf_opts = cf_opts.unwrap_or_else(|| vec![(CF_DEFAULT, RocksCfOptions::default())]); + let db = new_engine_opt(path_str, db_opts, cf_opts).unwrap(); - gen_sst_with_kvs(&db, cf_name, sst_path.to_str().unwrap(), &kvs); + gen_sst_with_kvs(&db, CF_DEFAULT, sst_path.to_str().unwrap(), &kvs); if was_encrypted { // Add the file to key_manager to simulate an encrypted file. @@ -156,9 +156,9 @@ mod tests { prepare_sst_for_ingestion(&sst_path, &sst_clone, key_manager).unwrap(); check_hard_link(&sst_path, 2); check_hard_link(&sst_clone, 2); - db.ingest_external_file_cf(cf_name, &[sst_clone.to_str().unwrap()]) + db.ingest_external_file_cf(CF_DEFAULT, &[sst_clone.to_str().unwrap()]) .unwrap(); - check_db_with_kvs(&db, cf_name, &kvs); + check_db_with_kvs(&db, CF_DEFAULT, &kvs); assert!(!sst_clone.exists()); // Since we are not using key_manager in db, simulate the db deleting the file from // key_manager. @@ -171,9 +171,9 @@ mod tests { prepare_sst_for_ingestion(&sst_path, &sst_clone, key_manager).unwrap(); check_hard_link(&sst_path, 2); check_hard_link(&sst_clone, 1); - db.ingest_external_file_cf(cf_name, &[sst_clone.to_str().unwrap()]) + db.ingest_external_file_cf(CF_DEFAULT, &[sst_clone.to_str().unwrap()]) .unwrap(); - check_db_with_kvs(&db, cf_name, &kvs); + check_db_with_kvs(&db, CF_DEFAULT, &kvs); assert!(!sst_clone.exists()); } @@ -192,11 +192,11 @@ mod tests { // Force all values write out to blob files. titan_opts.set_min_blob_size(0); db_opts.set_titandb_options(&titan_opts); - let mut cf_opts = RocksColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::new(); cf_opts.set_titandb_options(&titan_opts); check_prepare_sst_for_ingestion( Some(db_opts), - Some(vec![RocksCFOptions::new("default", cf_opts)]), + Some(vec![(CF_DEFAULT, cf_opts)]), None, /*key_manager*/ false, /*was_encrypted*/ ); diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index afdcd279e19..dfdffd97105 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -85,7 +85,7 @@ impl TestSuite { *id, sim.storages[id].clone(), sim.region_info_accessors[id].clone(), - engines.kv.as_inner().clone(), + engines.kv.clone(), BackupConfig { num_threads: 4, batch_size: 8, diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 28112304496..0359952d237 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -12,7 +12,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; -use engine_rocks::{raw::DB, Compat, RocksEngine, RocksSnapshot}; +use engine_rocks::{RocksEngine, RocksSnapshot}; use engine_test::raft::RaftTestEngine; use engine_traits::{ CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, WriteBatch, @@ -371,8 +371,8 @@ impl Cluster { debug!("node {} stopped", node_id); } - pub fn get_engine(&self, node_id: u64) -> Arc { - Arc::clone(self.engines[&node_id].kv.as_inner()) + pub fn get_engine(&self, node_id: u64) -> RocksEngine { + self.engines[&node_id].kv.clone() } pub fn get_raft_engine(&self, node_id: u64) -> RaftTestEngine { @@ -736,14 +736,14 @@ impl Cluster { self.leaders.remove(®ion_id); } - pub fn assert_quorum) -> bool>(&self, mut condition: F) { + pub fn assert_quorum bool>(&self, mut condition: F) { if self.engines.is_empty() { return; } let half = self.engines.len() / 2; let mut qualified_cnt = 0; for (id, engines) in &self.engines { - if !condition(engines.kv.as_inner()) { + if !condition(&engines.kv) { debug!("store {} is not qualified yet.", id); continue; } @@ -1178,7 +1178,6 @@ impl Cluster { pub fn apply_state(&self, region_id: u64, store_id: u64) -> RaftApplyState { let key = keys::apply_state_key(region_id); self.get_engine(store_id) - .c() .get_msg_cf::(engine_traits::CF_RAFT, &key) .unwrap() .unwrap() @@ -1197,7 +1196,6 @@ impl Cluster { pub fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { self.get_engine(store_id) - .c() .get_msg_cf::( engine_traits::CF_RAFT, &keys::region_state_key(region_id), @@ -1210,7 +1208,6 @@ impl Cluster { for _ in 0..100 { let state = self .get_engine(store_id) - .c() .get_msg_cf::( engine_traits::CF_RAFT, &keys::region_state_key(region_id), diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 1769ecc4154..bdd7c08b7e8 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -13,7 +13,7 @@ use collections::HashMap; use encryption_export::{ data_key_manager_from_config, DataKeyManager, FileConfig, MasterKeyConfig, }; -use engine_rocks::{config::BlobRunMode, raw::DB, Compat, RocksEngine, RocksSnapshot}; +use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot}; use engine_test::raft::RaftTestEngine; use engine_traits::{ Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, TabletFactory, ALL_CFS, @@ -55,9 +55,9 @@ use txn_types::Key; use crate::{Cluster, Config, ServerCluster, Simulator, TestPdClient}; -pub fn must_get(engine: &Arc, cf: &str, key: &[u8], value: Option<&[u8]>) { +pub fn must_get(engine: &RocksEngine, cf: &str, key: &[u8], value: Option<&[u8]>) { for _ in 1..300 { - let res = engine.c().get_value_cf(cf, &keys::data_key(key)).unwrap(); + let res = engine.get_value_cf(cf, &keys::data_key(key)).unwrap(); if let (Some(value), Some(res)) = (value, res.as_ref()) { assert_eq!(value, &res[..]); return; @@ -68,7 +68,7 @@ pub fn must_get(engine: &Arc, cf: &str, key: &[u8], value: Option<&[u8]>) { thread::sleep(Duration::from_millis(20)); } debug!("last try to get {}", log_wrappers::hex_encode_upper(key)); - let res = engine.c().get_value_cf(cf, &keys::data_key(key)).unwrap(); + let res = engine.get_value_cf(cf, &keys::data_key(key)).unwrap(); if value.is_none() && res.is_none() || value.is_some() && res.is_some() && value.unwrap() == &*res.unwrap() { @@ -81,19 +81,19 @@ pub fn must_get(engine: &Arc, cf: &str, key: &[u8], value: Option<&[u8]>) { ) } -pub fn must_get_equal(engine: &Arc, key: &[u8], value: &[u8]) { +pub fn must_get_equal(engine: &RocksEngine, key: &[u8], value: &[u8]) { must_get(engine, "default", key, Some(value)); } -pub fn must_get_none(engine: &Arc, key: &[u8]) { +pub fn must_get_none(engine: &RocksEngine, key: &[u8]) { must_get(engine, "default", key, None); } -pub fn must_get_cf_equal(engine: &Arc, cf: &str, key: &[u8], value: &[u8]) { +pub fn must_get_cf_equal(engine: &RocksEngine, cf: &str, key: &[u8], value: &[u8]) { must_get(engine, cf, key, Some(value)); } -pub fn must_get_cf_none(engine: &Arc, cf: &str, key: &[u8]) { +pub fn must_get_cf_none(engine: &RocksEngine, cf: &str, key: &[u8]) { must_get(engine, cf, key, None); } diff --git a/components/test_sst_importer/src/lib.rs b/components/test_sst_importer/src/lib.rs index 9c9ef0496e9..65d2a3dc70a 100644 --- a/components/test_sst_importer/src/lib.rs +++ b/components/test_sst_importer/src/lib.rs @@ -3,12 +3,9 @@ use std::{collections::HashMap, fs, path::Path, sync::Arc}; use engine_rocks::{ - raw::{ - ColumnFamilyOptions, DBEntryType, DBOptions, Env, TablePropertiesCollector, - TablePropertiesCollectorFactory, - }, - raw_util::{new_engine, CFOptions}, - RocksEngine, RocksSstReader, RocksSstWriterBuilder, + raw::{DBEntryType, Env, TablePropertiesCollector, TablePropertiesCollectorFactory}, + util::new_engine_opt, + RocksCfOptions, RocksDBOptions, RocksEngine, RocksSstReader, RocksSstWriterBuilder, }; pub use engine_rocks::{RocksEngine as TestEngine, RocksSstWriter}; use engine_traits::{KvEngine, SstWriter, SstWriterBuilder}; @@ -32,12 +29,12 @@ pub fn new_test_engine_with_options_and_env( env: Option>, ) -> RocksEngine where - F: FnMut(&str, &mut ColumnFamilyOptions), + F: FnMut(&str, &mut RocksCfOptions), { let cf_opts = cfs .iter() .map(|cf| { - let mut opt = ColumnFamilyOptions::new(); + let mut opt = RocksCfOptions::default(); if let Some(ref env) = env { opt.set_env(env.clone()); } @@ -46,22 +43,21 @@ where "tikv.test_properties", TestPropertiesCollectorFactory::new(*cf), ); - CFOptions::new(*cf, opt) + (*cf, opt) }) .collect(); - let db_opts = env.map(|e| { - let mut opts = DBOptions::default(); + let db_opts = env.map_or_else(RocksDBOptions::default, |e| { + let mut opts = RocksDBOptions::default(); opts.set_env(e); opts }); - let db = new_engine(path, db_opts, cfs, Some(cf_opts)).expect("rocks test engine"); - RocksEngine::from_db(Arc::new(db)) + new_engine_opt(path, db_opts, cf_opts).expect("rocks test engine") } pub fn new_test_engine_with_options(path: &str, cfs: &[&str], apply: F) -> RocksEngine where - F: FnMut(&str, &mut ColumnFamilyOptions), + F: FnMut(&str, &mut RocksCfOptions), { new_test_engine_with_options_and_env(path, cfs, apply, None) } diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 923a1878a42..44437e60f4c 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -570,13 +570,9 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { #[cfg(test)] mod tests { - use std::sync::Arc; - use engine_rocks::{ - raw::ColumnFamilyOptions, - raw_util::{new_engine, CFOptions}, - util::{new_temp_engine, FixedPrefixSliceTransform}, - RocksEngine, RocksSnapshot, + util::{new_engine_opt, new_temp_engine, FixedPrefixSliceTransform}, + RocksCfOptions, RocksDBOptions, RocksEngine, RocksSnapshot, }; use engine_traits::{IterOptions, SyncMutable, CF_DEFAULT}; use keys::data_key; @@ -613,22 +609,19 @@ mod tests { #[test] fn test_seek_and_prev_with_prefix_seek() { let path = Builder::new().prefix("test-cursor").tempdir().unwrap(); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); cf_opts .set_prefix_extractor( "FixedPrefixSliceTransform", FixedPrefixSliceTransform::new(3), ) .unwrap(); - let engine = new_engine( + let engine = new_engine_opt( path.path().to_str().unwrap(), - None, - &[CF_DEFAULT], - Some(vec![CFOptions::new(CF_DEFAULT, cf_opts)]), + RocksDBOptions::default(), + vec![(CF_DEFAULT, cf_opts)], ) .unwrap(); - let engine = Arc::new(engine); - let engine = RocksEngine::from_db(engine); let (region, _) = load_default_dataset(engine.clone()); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 1d66f11ad74..c96d996dc5c 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -648,7 +648,7 @@ pub fn write_modifies(kv_engine: &impl LocalEngine, modifies: Vec) -> Re Ok(()) } -pub const TEST_ENGINE_CFS: &[CfName] = &["cf"]; +pub const TEST_ENGINE_CFS: &[CfName] = &[CF_DEFAULT, "cf"]; pub mod tests { use tikv_util::codec::bytes; diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 50059433553..f0331403725 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -11,8 +11,7 @@ use std::{ pub use engine_rocks::RocksSnapshot; use engine_rocks::{ - get_env, raw::DBOptions, raw_util::CFOptions, RocksEngine as BaseRocksEngine, - RocksEngineIterator, + get_env, RocksCfOptions, RocksDBOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, }; use engine_traits::{ CfName, Engines, IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, @@ -89,11 +88,10 @@ pub struct RocksEngine { impl RocksEngine { pub fn new( path: &str, - cfs: &[CfName], - cfs_opts: Option>>, + db_opts: Option, + cfs_opts: Vec<(CfName, RocksCfOptions)>, shared_block_cache: bool, io_rate_limiter: Option>, - db_opts: Option, ) -> Result { info!("RocksEngine: creating for path"; "path" => path); let (path, temp_dir) = match path { @@ -104,21 +102,16 @@ impl RocksEngine { _ => (path.to_owned(), None), }; let worker = Worker::new("engine-rocksdb"); - let mut db_opts = db_opts.unwrap_or_else(|| DBOptions::new()); + let mut db_opts = db_opts.unwrap_or_default(); if io_rate_limiter.is_some() { db_opts.set_env(get_env(None /*key_manager*/, io_rate_limiter).unwrap()); } - let db = Arc::new(engine_rocks::raw_util::new_engine( - &path, - Some(db_opts), - cfs, - cfs_opts, - )?); + let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; // It does not use the raft_engine, so it is ok to fill with the same // rocksdb. - let mut kv_engine = BaseRocksEngine::from_db(db.clone()); - let mut raft_engine = BaseRocksEngine::from_db(db); + let mut kv_engine = db.clone(); + let mut raft_engine = db; kv_engine.set_shared_block_cache(shared_block_cache); raft_engine.set_shared_block_cache(shared_block_cache); let engines = Engines::new(kv_engine, raft_engine); diff --git a/src/config.rs b/src/config.rs index 0df2e2a2101..489609d1196 100644 --- a/src/config.rs +++ b/src/config.rs @@ -26,19 +26,19 @@ use engine_rocks::{ get_env, properties::MvccPropertiesCollectorFactory, raw::{ - BlockBasedOptions, Cache, ChecksumType, ColumnFamilyOptions, CompactionPriority, - DBCompactionStyle, DBCompressionType, DBOptions, DBRateLimiterMode, DBRecoveryMode, Env, - LRUCacheOptions, PrepopulateBlockCache, TitanDBOptions, + BlockBasedOptions, Cache, ChecksumType, CompactionPriority, DBCompactionStyle, + DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, LRUCacheOptions, + PrepopulateBlockCache, }, - raw_util::CFOptions, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, - RaftDBLogger, RangePropertiesCollectorFactory, RocksEngine, RocksEventListener, - RocksSstPartitionerFactory, RocksdbLogger, TtlPropertiesCollectorFactory, + RaftDBLogger, RangePropertiesCollectorFactory, RocksCfOptions, RocksDBOptions, RocksEngine, + RocksEventListener, RocksTitanDBOptions, RocksdbLogger, TtlPropertiesCollectorFactory, DEFAULT_PROP_KEYS_INDEX_DISTANCE, DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as _, DBOptionsExt, - TabletAccessor, TabletErrorCollector, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + TabletAccessor, TabletErrorCollector, TitanDBOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, + CF_WRITE, }; use file_system::IORateLimiter; use keys::region_raft_prefix_len; @@ -178,8 +178,8 @@ impl Default for TitanCfConfig { } impl TitanCfConfig { - fn build_opts(&self) -> TitanDBOptions { - let mut opts = TitanDBOptions::new(); + fn build_opts(&self) -> RocksTitanDBOptions { + let mut opts = RocksTitanDBOptions::new(); opts.set_min_blob_size(self.min_blob_size.0 as u64); opts.set_blob_file_compression(self.blob_file_compression.into()); opts.set_blob_cache(self.blob_cache_size.0 as usize, -1, false, 0.0); @@ -533,7 +533,7 @@ macro_rules! build_cf_opt { block_base_opts.set_prepopulate_block_cache($opt.prepopulate_block_cache); block_base_opts.set_format_version($opt.format_version); block_base_opts.set_checksum($opt.checksum); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_block_based_table_factory(&block_base_opts); cf_opts.set_num_levels($opt.num_levels); assert!($opt.compression_per_level.len() >= $opt.num_levels as usize); @@ -592,7 +592,7 @@ macro_rules! build_cf_opt { $opt.compaction_guard_min_output_file_size.0, ) .unwrap(); - cf_opts.set_sst_partitioner_factory(RocksSstPartitionerFactory(factory)); + cf_opts.set_sst_partitioner_factory(factory); cf_opts.set_target_file_size_base($opt.compaction_guard_max_output_file_size.0); } else { warn!("compaction guard is disabled due to region info provider not available") @@ -671,7 +671,7 @@ impl DefaultCfConfig { cache: &Option, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, - ) -> ColumnFamilyOptions { + ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!(self, CF_DEFAULT, cache, region_info_accessor); let f = RangePropertiesCollectorFactory { prop_size_index_distance: self.prop_size_index_distance, @@ -782,7 +782,7 @@ impl WriteCfConfig { &self, cache: &Option, region_info_accessor: Option<&RegionInfoAccessor>, - ) -> ColumnFamilyOptions { + ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!(self, CF_WRITE, cache, region_info_accessor); // Prefix extractor(trim the timestamp at tail) for write cf. cf_opts @@ -876,7 +876,7 @@ impl Default for LockCfConfig { } impl LockCfConfig { - pub fn build_opt(&self, cache: &Option) -> ColumnFamilyOptions { + pub fn build_opt(&self, cache: &Option) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_LOCK, cache, no_region_info_accessor); cf_opts @@ -952,7 +952,7 @@ impl Default for RaftCfConfig { } impl RaftCfConfig { - pub fn build_opt(&self, cache: &Option) -> ColumnFamilyOptions { + pub fn build_opt(&self, cache: &Option) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_RAFT, cache, no_region_info_accessor); cf_opts @@ -991,8 +991,8 @@ impl Default for TitanDBConfig { } impl TitanDBConfig { - fn build_opts(&self) -> TitanDBOptions { - let mut opts = TitanDBOptions::new(); + fn build_opts(&self) -> RocksTitanDBOptions { + let mut opts = RocksTitanDBOptions::new(); opts.set_dirname(&self.dirname); opts.set_disable_background_gc(self.disable_gc); opts.set_max_background_gc(self.max_background_gc); @@ -1131,8 +1131,8 @@ impl Default for DbConfig { } impl DbConfig { - pub fn build_opt(&self) -> DBOptions { - let mut opts = DBOptions::new(); + pub fn build_opt(&self) -> RocksDBOptions { + let mut opts = RocksDBOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { opts.set_wal_dir(&self.wal_dir); @@ -1195,20 +1195,20 @@ impl DbConfig { cache: &Option, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, - ) -> Vec> { + ) -> Vec<(&'static str, RocksCfOptions)> { vec![ - CFOptions::new( + ( CF_DEFAULT, self.defaultcf .build_opt(cache, region_info_accessor, api_version), ), - CFOptions::new(CF_LOCK, self.lockcf.build_opt(cache)), - CFOptions::new( + (CF_LOCK, self.lockcf.build_opt(cache)), + ( CF_WRITE, self.writecf.build_opt(cache, region_info_accessor), ), // TODO: remove CF_RAFT. - CFOptions::new(CF_RAFT, self.raftcf.build_opt(cache)), + (CF_RAFT, self.raftcf.build_opt(cache)), ] } @@ -1319,7 +1319,7 @@ impl Default for RaftDefaultCfConfig { } impl RaftDefaultCfConfig { - pub fn build_opt(&self, cache: &Option) -> ColumnFamilyOptions { + pub fn build_opt(&self, cache: &Option) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_DEFAULT, cache, no_region_info_accessor); let f = FixedPrefixSliceTransform::new(region_raft_prefix_len()); @@ -1431,8 +1431,8 @@ impl Default for RaftDbConfig { } impl RaftDbConfig { - pub fn build_opt(&self) -> DBOptions { - let mut opts = DBOptions::new(); + pub fn build_opt(&self) -> RocksDBOptions { + let mut opts = RocksDBOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { opts.set_wal_dir(&self.wal_dir); @@ -1473,8 +1473,8 @@ impl RaftDbConfig { opts } - pub fn build_cf_opts(&self, cache: &Option) -> Vec> { - vec![CFOptions::new(CF_DEFAULT, self.defaultcf.build_opt(cache))] + pub fn build_cf_opts(&self, cache: &Option) -> Vec<(&'static str, RocksCfOptions)> { + vec![(CF_DEFAULT, self.defaultcf.build_opt(cache))] } fn validate(&mut self) -> Result<(), Box> { @@ -4048,7 +4048,6 @@ mod tests { use case_macros::*; use engine_traits::{ ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as DBOptionsTrait, DummyFactory, - ALL_CFS, }; use futures::executor::block_on; use grpcio::ResourceQuota; @@ -4469,15 +4468,14 @@ mod tests { assert_eq!(F::TAG, cfg.storage.api_version()); let engine = RocksDBEngine::new( &cfg.storage.data_dir, - ALL_CFS, - Some(cfg.rocksdb.build_cf_opts( + Some(cfg.rocksdb.build_opt()), + cfg.rocksdb.build_cf_opts( &cfg.storage.block_cache.build_shared_cache(), None, cfg.storage.api_version(), - )), + ), true, None, - Some(cfg.rocksdb.build_opt()), ) .unwrap(); let storage = diff --git a/src/server/debug.rs b/src/server/debug.rs index 93732c9c580..7bfa2aa438e 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -5,15 +5,14 @@ use std::{ iter::FromIterator, path::Path, result, - sync::Arc, thread::{Builder as ThreadBuilder, JoinHandle}, }; use collections::HashSet; use engine_rocks::{ - raw::{CompactOptions, DBBottommostLevelCompaction, DB}, + raw::{CompactOptions, DBBottommostLevelCompaction}, util::get_cf_handle, - Compat, RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksWriteBatchVec, + RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksWriteBatchVec, }; use engine_traits::{ Engines, IterOptions, Iterable, Iterator as EngineIterator, Mutable, MvccProperties, Peekable, @@ -167,9 +166,9 @@ impl Debugger { Ok(regions) } - fn get_db_from_type(&self, db: DBType) -> Result<&Arc> { + fn get_db_from_type(&self, db: DBType) -> Result<&RocksEngine> { match db { - DBType::Kv => Ok(self.engines.kv.as_inner()), + DBType::Kv => Ok(&self.engines.kv), DBType::Raft => Err(box_err!("Get raft db is not allowed")), _ => Err(box_err!("invalid DBType type")), } @@ -178,7 +177,7 @@ impl Debugger { pub fn get(&self, db: DBType, cf: &str, key: &[u8]) -> Result> { validate_db_and_cf(db, cf)?; let db = self.get_db_from_type(db)?; - match db.c().get_value_cf(cf, key) { + match db.get_value_cf(cf, key) { Ok(Some(v)) => Ok(v.to_vec()), Ok(None) => Err(Error::NotFound(format!( "value for key {:?} in db {:?}", @@ -323,7 +322,7 @@ impl Debugger { ) -> Result<()> { validate_db_and_cf(db, cf)?; let db = self.get_db_from_type(db)?; - let handle = box_try!(get_cf_handle(db, cf)); + let handle = box_try!(get_cf_handle(db.as_inner(), cf)); let start = if start.is_empty() { None } else { Some(start) }; let end = if end.is_empty() { None } else { Some(end) }; info!("Debugger starts manual compact"; "db" => ?db, "cf" => cf); @@ -331,7 +330,8 @@ impl Debugger { opts.set_max_subcompactions(threads as i32); opts.set_exclusive_manual_compaction(false); opts.set_bottommost_level_compaction(bottommost.0); - db.compact_range_cf_opt(handle, &opts, start, end); + db.as_inner() + .compact_range_cf_opt(handle, &opts, start, end); info!("Debugger finishes manual compact"; "db" => ?db, "cf" => cf); Ok(()) } @@ -346,7 +346,7 @@ impl Debugger { let mut errors = Vec::with_capacity(regions.len()); for region in regions { let region_id = region.get_id(); - if let Err(e) = set_region_tombstone(db.as_inner(), store_id, region, &mut wb) { + if let Err(e) = set_region_tombstone(db, store_id, region, &mut wb) { errors.push((region_id, e)); } } @@ -403,7 +403,7 @@ impl Debugger { for region in regions { let region_id = region.get_id(); if let Err(e) = recover_mvcc_for_range( - db.as_inner(), + db, region.get_start_key(), region.get_end_key(), read_only, @@ -417,18 +417,15 @@ impl Debugger { } pub fn recover_all(&self, threads: usize, read_only: bool) -> Result<()> { - let db = self.engines.kv.clone(); + let db = &self.engines.kv; info!("Calculating split keys..."); - let split_keys = divide_db(db.as_inner(), threads) - .unwrap() - .into_iter() - .map(|k| { - let k = Key::from_encoded(keys::origin_key(&k).to_vec()) - .truncate_ts() - .unwrap(); - k.as_encoded().clone() - }); + let split_keys = divide_db(db, threads).unwrap().into_iter().map(|k| { + let k = Key::from_encoded(keys::origin_key(&k).to_vec()) + .truncate_ts() + .unwrap(); + k.as_encoded().clone() + }); let mut range_borders = vec![b"".to_vec()]; range_borders.extend(split_keys); @@ -454,13 +451,8 @@ impl Debugger { log_wrappers::Value::key(&end_key) ); - let result = recover_mvcc_for_range( - db.as_inner(), - &start_key, - &end_key, - read_only, - thread_index, - ); + let result = + recover_mvcc_for_range(&db, &start_key, &end_key, read_only, thread_index); tikv_alloc::remove_thread_memory_accessor(); result }) @@ -861,8 +853,8 @@ impl Debugger { let start = keys::enc_start_key(region); let end = keys::enc_end_key(region); - let mut res = dump_write_cf_properties(self.engines.kv.as_inner(), &start, &end)?; - let mut res1 = dump_default_cf_properties(self.engines.kv.as_inner(), &start, &end)?; + let mut res = dump_write_cf_properties(&self.engines.kv, &start, &end)?; + let mut res1 = dump_default_cf_properties(&self.engines.kv, &start, &end)?; res.append(&mut res1); let middle_key = match box_try!(get_region_approximate_middle(&self.engines.kv, region)) { @@ -885,12 +877,12 @@ impl Debugger { pub fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { let mut props = dump_write_cf_properties( - self.engines.kv.as_inner(), + &self.engines.kv, &keys::data_key(start), &keys::data_end_key(end), )?; let mut props1 = dump_default_cf_properties( - self.engines.kv.as_inner(), + &self.engines.kv, &keys::data_key(start), &keys::data_end_key(end), )?; @@ -904,13 +896,13 @@ impl Debugger { } fn dump_default_cf_properties( - db: &Arc, + db: &RocksEngine, start: &[u8], end: &[u8], ) -> Result> { let mut num_entries = 0; // number of Rocksdb K/V entries. - let collection = box_try!(db.c().get_range_properties_cf(CF_DEFAULT, start, end)); + let collection = box_try!(db.get_range_properties_cf(CF_DEFAULT, start, end)); let num_files = collection.len(); for (_, v) in collection.iter() { @@ -937,13 +929,13 @@ fn dump_default_cf_properties( } fn dump_write_cf_properties( - db: &Arc, + db: &RocksEngine, start: &[u8], end: &[u8], ) -> Result> { let mut num_entries = 0; // number of Rocksdb K/V entries. - let collection = box_try!(db.c().get_range_properties_cf(CF_WRITE, start, end)); + let collection = box_try!(db.get_range_properties_cf(CF_WRITE, start, end)); let num_files = collection.len(); let mut mvcc_properties = MvccProperties::new(); @@ -998,19 +990,19 @@ fn dump_write_cf_properties( } fn recover_mvcc_for_range( - db: &Arc, + db: &RocksEngine, start_key: &[u8], end_key: &[u8], read_only: bool, thread_index: usize, ) -> Result<()> { - let mut mvcc_checker = box_try!(MvccChecker::new(Arc::clone(db), start_key, end_key)); + let mut mvcc_checker = box_try!(MvccChecker::new(db.clone(), start_key, end_key)); mvcc_checker.thread_index = thread_index; let wb_limit: usize = 10240; loop { - let mut wb = db.c().write_batch(); + let mut wb = db.write_batch(); mvcc_checker.check_mvcc(&mut wb, Some(wb_limit))?; let batch_size = wb.count(); @@ -1050,7 +1042,7 @@ pub struct MvccChecker { } impl MvccChecker { - fn new(db: Arc, start_key: &[u8], end_key: &[u8]) -> Result { + fn new(db: RocksEngine, start_key: &[u8], end_key: &[u8]) -> Result { let start_key = keys::data_key(start_key); let end_key = keys::data_end_key(end_key); let gen_iter = |cf: &str| -> Result<_> { @@ -1061,7 +1053,7 @@ impl MvccChecker { Some(KeyBuilder::from_vec(to, 0, 0)), false, ); - let mut iter = box_try!(db.c().iterator_opt(cf, readopts)); + let mut iter = box_try!(db.iterator_opt(cf, readopts)); iter.seek_to_first().unwrap(); Ok(iter) }; @@ -1330,7 +1322,7 @@ fn validate_db_and_cf(db: DBType, cf: &str) -> Result<()> { } fn set_region_tombstone( - db: &Arc, + db: &RocksEngine, store_id: u64, region: Region, wb: &mut RocksWriteBatchVec, @@ -1339,7 +1331,6 @@ fn set_region_tombstone( let key = keys::region_state_key(id); let region_state = db - .c() .get_msg_cf::(CF_RAFT, &key) .map_err(|e| box_err!(e)) .and_then(|s| s.ok_or_else(|| Error::Other("Can't find RegionLocalState".into())))?; @@ -1378,25 +1369,19 @@ fn set_region_tombstone( Ok(()) } -fn divide_db(db: &Arc, parts: usize) -> raftstore::Result>> { +fn divide_db(db: &RocksEngine, parts: usize) -> raftstore::Result>> { // Empty start and end key cover all range. let start = keys::data_key(b""); let end = keys::data_end_key(b""); let range = Range::new(&start, &end); Ok(box_try!( - RocksEngine::from_db(db.clone()).get_range_approximate_split_keys(range, parts - 1) + db.get_range_approximate_split_keys(range, parts - 1) )) } #[cfg(test)] mod tests { - use std::sync::Arc; - - use engine_rocks::{ - raw::{ColumnFamilyOptions, DBOptions}, - raw_util::{new_engine_opt, CFOptions}, - RocksEngine, - }; + use engine_rocks::{util::new_engine_opt, RocksCfOptions, RocksDBOptions, RocksEngine}; use engine_traits::{Mutable, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; use kvproto::{ kvrpcpb::ApiVersion, @@ -1409,7 +1394,7 @@ mod tests { use crate::storage::mvcc::{Lock, LockType}; fn init_region_state( - engine: &Arc, + engine: &RocksEngine, region_id: u64, stores: &[u64], mut learner: usize, @@ -1430,7 +1415,7 @@ mod tests { region_state.set_state(PeerState::Normal); region_state.set_region(region.clone()); let key = keys::region_state_key(region_id); - engine.c().put_msg_cf(CF_RAFT, &key, ®ion_state).unwrap(); + engine.put_msg_cf(CF_RAFT, &key, ®ion_state).unwrap(); region } @@ -1456,10 +1441,9 @@ mod tests { raft_engine.put_msg(&raft_state_key, &raft_state).unwrap(); } - fn get_region_state(engine: &Arc, region_id: u64) -> RegionLocalState { + fn get_region_state(engine: &RocksEngine, region_id: u64) -> RegionLocalState { let key = keys::region_state_key(region_id); engine - .c() .get_msg_cf::(CF_RAFT, &key) .unwrap() .unwrap() @@ -1535,24 +1519,9 @@ mod tests { fn new_debugger() -> Debugger { let tmp = Builder::new().prefix("test_debug").tempdir().unwrap(); let path = tmp.path().to_str().unwrap(); - let engine = Arc::new( - engine_rocks::raw_util::new_engine_opt( - path, - DBOptions::new(), - vec![ - CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new()), - CFOptions::new(CF_WRITE, ColumnFamilyOptions::new()), - CFOptions::new(CF_LOCK, ColumnFamilyOptions::new()), - CFOptions::new(CF_RAFT, ColumnFamilyOptions::new()), - ], - ) - .unwrap(), - ); + let engine = engine_rocks::util::new_engine(path, ALL_CFS).unwrap(); - let engines = Engines::new( - RocksEngine::from_db(Arc::clone(&engine)), - RocksEngine::from_db(engine), - ); + let engines = Engines::new(engine.clone(), engine); Debugger::new(engines, ConfigController::default()) } @@ -1720,21 +1689,21 @@ mod tests { let engine = &debugger.engines.kv; // region 1 with peers at stores 11, 12, 13. - let region_1 = init_region_state(engine.as_inner(), 1, &[11, 12, 13], 0); + let region_1 = init_region_state(engine, 1, &[11, 12, 13], 0); // Got the target region from pd, which doesn't contains the store. let mut target_region_1 = region_1.clone(); target_region_1.mut_peers().remove(0); target_region_1.mut_region_epoch().set_conf_ver(100); // region 2 with peers at stores 11, 12, 13. - let region_2 = init_region_state(engine.as_inner(), 2, &[11, 12, 13], 0); + let region_2 = init_region_state(engine, 2, &[11, 12, 13], 0); // Got the target region from pd, which has different peer_id. let mut target_region_2 = region_2.clone(); target_region_2.mut_peers()[0].set_id(100); target_region_2.mut_region_epoch().set_conf_ver(100); // region 3 with peers at stores 21, 22, 23. - let region_3 = init_region_state(engine.as_inner(), 3, &[21, 22, 23], 0); + let region_3 = init_region_state(engine, 3, &[21, 22, 23], 0); // Got the target region from pd but the peers are not changed. let mut target_region_3 = region_3; target_region_3.mut_region_epoch().set_conf_ver(100); @@ -1748,21 +1717,15 @@ mod tests { let errors = debugger.set_region_tombstone(target_regions).unwrap(); assert_eq!(errors.len(), 1); assert_eq!(errors[0].0, 3); - assert_eq!( - get_region_state(engine.as_inner(), 1).take_region(), - region_1 - ); - assert_eq!( - get_region_state(engine.as_inner(), 2).take_region(), - region_2 - ); + assert_eq!(get_region_state(engine, 1).take_region(), region_1); + assert_eq!(get_region_state(engine, 2).take_region(), region_2); // After set_region_tombstone success, all region should be adjusted. let target_regions = vec![target_region_1, target_region_2]; let errors = debugger.set_region_tombstone(target_regions).unwrap(); assert!(errors.is_empty()); for ®ion_id in &[1, 2] { - let state = get_region_state(engine.as_inner(), region_id).get_state(); + let state = get_region_state(engine, region_id).get_state(); assert_eq!(state, PeerState::Tombstone); } } @@ -1778,19 +1741,19 @@ mod tests { assert!(!errors.is_empty()); // region 1 with peers at stores 11, 12, 13. - init_region_state(engine.as_inner(), 1, &[11, 12, 13], 0); - let mut expected_state = get_region_state(engine.as_inner(), 1); + init_region_state(engine, 1, &[11, 12, 13], 0); + let mut expected_state = get_region_state(engine, 1); expected_state.set_state(PeerState::Tombstone); // tombstone region 1. let errors = debugger.set_region_tombstone_by_id(vec![1]).unwrap(); assert!(errors.is_empty()); - assert_eq!(get_region_state(engine.as_inner(), 1), expected_state); + assert_eq!(get_region_state(engine, 1), expected_state); // tombstone region 1 again. let errors = debugger.set_region_tombstone_by_id(vec![1]).unwrap(); assert!(errors.is_empty()); - assert_eq!(get_region_state(engine.as_inner(), 1), expected_state); + assert_eq!(get_region_state(engine, 1), expected_state); } #[test] @@ -1799,7 +1762,7 @@ mod tests { debugger.set_store_id(100); let engine = &debugger.engines.kv; - let get_region_stores = |engine: &Arc, region_id: u64| { + let get_region_stores = |engine: &RocksEngine, region_id: u64| { get_region_state(engine, region_id) .get_region() .get_peers() @@ -1808,7 +1771,7 @@ mod tests { .collect::>() }; - let get_region_learner = |engine: &Arc, region_id: u64| { + let get_region_learner = |engine: &RocksEngine, region_id: u64| { get_region_state(engine, region_id) .get_region() .get_peers() @@ -1818,9 +1781,9 @@ mod tests { }; // region 1 with peers at stores 11, 12, 13 and 14. - init_region_state(engine.as_inner(), 1, &[11, 12, 13, 14], 0); + init_region_state(engine, 1, &[11, 12, 13, 14], 0); // region 2 with peers at stores 21, 22 and 23. - init_region_state(engine.as_inner(), 2, &[21, 22, 23], 0); + init_region_state(engine, 2, &[21, 22, 23], 0); // Only remove specified stores from region 1. debugger @@ -1828,43 +1791,43 @@ mod tests { .unwrap(); // 13 and 14 should be removed from region 1. - assert_eq!(get_region_stores(engine.as_inner(), 1), &[11, 12]); + assert_eq!(get_region_stores(engine, 1), &[11, 12]); // 21 and 23 shouldn't be removed from region 2. - assert_eq!(get_region_stores(engine.as_inner(), 2), &[21, 22, 23]); + assert_eq!(get_region_stores(engine, 2), &[21, 22, 23]); // Remove specified stores from all regions. debugger .remove_failed_stores(vec![11, 23], None, false) .unwrap(); - assert_eq!(get_region_stores(engine.as_inner(), 1), &[12]); - assert_eq!(get_region_stores(engine.as_inner(), 2), &[21, 22]); + assert_eq!(get_region_stores(engine, 1), &[12]); + assert_eq!(get_region_stores(engine, 2), &[21, 22]); // Should fail when the store itself is in the failed list. - init_region_state(engine.as_inner(), 3, &[100, 31, 32, 33], 0); + init_region_state(engine, 3, &[100, 31, 32, 33], 0); debugger .remove_failed_stores(vec![100], None, false) .unwrap_err(); // no learner, promote learner does nothing - init_region_state(engine.as_inner(), 4, &[41, 42, 43, 44], 0); + init_region_state(engine, 4, &[41, 42, 43, 44], 0); debugger.remove_failed_stores(vec![44], None, true).unwrap(); - assert_eq!(get_region_stores(engine.as_inner(), 4), &[41, 42, 43]); - assert_eq!(get_region_learner(engine.as_inner(), 4), 0); + assert_eq!(get_region_stores(engine, 4), &[41, 42, 43]); + assert_eq!(get_region_learner(engine, 4), 0); // promote learner - init_region_state(engine.as_inner(), 5, &[51, 52, 53, 54], 1); + init_region_state(engine, 5, &[51, 52, 53, 54], 1); debugger .remove_failed_stores(vec![52, 53, 54], None, true) .unwrap(); - assert_eq!(get_region_stores(engine.as_inner(), 5), &[51]); - assert_eq!(get_region_learner(engine.as_inner(), 5), 0); + assert_eq!(get_region_stores(engine, 5), &[51]); + assert_eq!(get_region_learner(engine, 5), 0); // no need to promote learner - init_region_state(engine.as_inner(), 6, &[61, 62, 63, 64], 1); + init_region_state(engine, 6, &[61, 62, 63, 64], 1); debugger.remove_failed_stores(vec![64], None, true).unwrap(); - assert_eq!(get_region_stores(engine.as_inner(), 6), &[61, 62, 63]); - assert_eq!(get_region_learner(engine.as_inner(), 6), 1); + assert_eq!(get_region_stores(engine, 6), &[61, 62, 63]); + assert_eq!(get_region_learner(engine, 6), 1); } #[test] @@ -1874,8 +1837,8 @@ mod tests { let kv_engine = &debugger.engines.kv; let raft_engine = &debugger.engines.raft; - init_region_state(kv_engine.as_inner(), 1, &[100, 101], 1); - init_region_state(kv_engine.as_inner(), 2, &[100, 103], 1); + init_region_state(kv_engine, 1, &[100, 101], 1); + init_region_state(kv_engine, 2, &[100, 103], 1); init_raft_state(kv_engine, raft_engine, 1, 100, 90, 80); init_raft_state(kv_engine, raft_engine, 2, 80, 80, 80); @@ -2026,10 +1989,7 @@ mod tests { remove_region_state(1); remove_region_state(2); assert!(debugger.recreate_region(region.clone()).is_ok()); - assert_eq!( - get_region_state(engine.as_inner(), 100).get_region(), - ®ion - ); + assert_eq!(get_region_state(engine, 100).get_region(), ®ion); region.set_start_key(b"z".to_vec()); region.set_end_key(b"".to_vec()); @@ -2177,27 +2137,28 @@ mod tests { let path_str = path.path().to_str().unwrap(); let cfs_opts = ALL_CFS .iter() - .map(|cf| CFOptions::new(cf, ColumnFamilyOptions::new())) + .map(|cf| (*cf, RocksCfOptions::default())) .collect(); - let db_opt = DBOptions::new(); + let db_opt = RocksDBOptions::default(); db_opt.enable_multi_batch_write(true); - let db = Arc::new(new_engine_opt(path_str, db_opt, cfs_opts).unwrap()); + let db = new_engine_opt(path_str, db_opt, cfs_opts).unwrap(); // Write initial KVs. - let mut wb = RocksEngine::from_db(db.clone()).write_batch(); + let mut wb = db.write_batch(); for &(cf, ref k, ref v, _) in &kv { wb.put_cf(cf, &keys::data_key(k.as_encoded()), v).unwrap(); } wb.write().unwrap(); // Fix problems. - let mut checker = MvccChecker::new(Arc::clone(&db), b"k", b"l").unwrap(); - let mut wb = db.c().write_batch(); + let mut checker = MvccChecker::new(db.clone(), b"k", b"l").unwrap(); + let mut wb = db.write_batch(); checker.check_mvcc(&mut wb, None).unwrap(); wb.write().unwrap(); // Check result. for (cf, k, _, expect) in kv { let data = db + .as_inner() .get_cf( - get_cf_handle(&db, cf).unwrap(), + get_cf_handle(db.as_inner(), cf).unwrap(), &keys::data_key(k.as_encoded()), ) .unwrap(); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 421c0c0f8ba..854c1fdd356 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -144,19 +144,18 @@ impl KvEngineFactory { self.inner.region_info_accessor.as_ref(), self.inner.api_version, ); - let kv_engine = engine_rocks::raw_util::new_engine_opt( + let kv_engine = engine_rocks::util::new_engine_opt( tablet_path.to_str().unwrap(), kv_db_opts, kv_cfs_opts, ); - let kv_engine = match kv_engine { + let mut kv_engine = match kv_engine { Ok(e) => e, Err(e) => { error!("failed to create kv engine"; "path" => %tablet_path.display(), "err" => ?e); return Err(e); } }; - let mut kv_engine = RocksEngine::from_db(Arc::new(kv_engine)); let shared_block_cache = self.inner.block_cache.is_some(); kv_engine.set_shared_block_cache(shared_block_cache); Ok(kv_engine) @@ -184,7 +183,7 @@ impl KvEngineFactory { ); // TODOTODO: call rust-rocks or tirocks to destroy_engine; /* - engine_rocks::raw_util::destroy_engine( + engine_rocks::util::destroy_engine( tablet_path.to_str().unwrap(), kv_db_opts, kv_cfs_opts, diff --git a/src/server/gc_worker/mod.rs b/src/server/gc_worker/mod.rs index 4e2bc6e76de..20de36ef035 100644 --- a/src/server/gc_worker/mod.rs +++ b/src/server/gc_worker/mod.rs @@ -50,9 +50,7 @@ fn check_need_gc(safe_point: TimeStamp, ratio_threshold: f64, props: &MvccProper #[cfg(test)] mod tests { - use std::sync::Arc; - - use engine_rocks::{raw::DB, Compat}; + use engine_rocks::RocksEngine; use engine_traits::{MvccPropertiesExt, CF_WRITE}; use kvproto::metapb::Region; @@ -60,7 +58,7 @@ mod tests { use crate::storage::mvcc::reader_tests::{make_region, open_db, RegionEngine}; fn get_mvcc_properties_and_check_gc( - db: Arc, + db: &RocksEngine, region: Region, safe_point: impl Into, need_gc: bool, @@ -70,7 +68,6 @@ mod tests { let start = keys::data_key(region.get_start_key()); let end = keys::data_end_key(region.get_end_key()); let props = db - .c() .get_mvcc_properties_cf(CF_WRITE, safe_point, &start, &end) .unwrap(); assert_eq!(check_need_gc(safe_point, 1.0, &props), need_gc); @@ -100,7 +97,7 @@ mod tests { engine.put(&[3], 4, 4); engine.flush(); engine.compact(); - let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, false); + let props = get_mvcc_properties_and_check_gc(&db, region.clone(), 10, false); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 4.into()); assert_eq!(props.num_rows, 4); @@ -116,7 +113,7 @@ mod tests { engine.flush(); // After this flush, keys 5,6 in the new SST file have more than one // versions, so we need gc. - let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true); + let props = get_mvcc_properties_and_check_gc(&db, region.clone(), 10, true); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 8.into()); assert_eq!(props.num_rows, 6); @@ -124,7 +121,7 @@ mod tests { assert_eq!(props.num_versions, 8); assert_eq!(props.max_row_versions, 2); // But if the `safe_point` is older than all versions, we don't need gc too. - let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 0, false); + let props = get_mvcc_properties_and_check_gc(&db, region.clone(), 0, false); assert_eq!(props.min_ts, TimeStamp::max()); assert_eq!(props.max_ts, TimeStamp::zero()); assert_eq!(props.num_rows, 0); @@ -138,7 +135,7 @@ mod tests { engine.compact(); // After this compact, all versions of keys 5,6 are deleted, // no keys have more than one versions, so we don't need gc. - let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, false); + let props = get_mvcc_properties_and_check_gc(&db, region.clone(), 10, false); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 4.into()); assert_eq!(props.num_rows, 4); @@ -149,7 +146,7 @@ mod tests { // A single lock version need gc. engine.lock(&[7], 9, 9); engine.flush(); - let props = get_mvcc_properties_and_check_gc(Arc::clone(&db), region.clone(), 10, true); + let props = get_mvcc_properties_and_check_gc(&db, region.clone(), 10, true); assert_eq!(props.min_ts, 1.into()); assert_eq!(props.max_ts, 9.into()); assert_eq!(props.num_rows, 5); diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 94e3e38900d..de837bdb1cb 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -255,12 +255,7 @@ impl ResetToVersionManager { #[cfg(test)] mod tests { - use engine_rocks::{ - raw::{ColumnFamilyOptions, DBOptions}, - raw_util::CFOptions, - Compat, - }; - use engine_traits::{WriteBatch, WriteBatchExt, CF_LOCK, CF_RAFT}; + use engine_traits::{WriteBatch, WriteBatchExt, ALL_CFS, CF_LOCK}; use tempfile::Builder; use txn_types::{Lock, LockType, WriteType}; @@ -270,19 +265,7 @@ mod tests { fn test_basic() { let tmp = Builder::new().prefix("test_basic").tempdir().unwrap(); let path = tmp.path().to_str().unwrap(); - let fake_engine = Arc::new( - engine_rocks::raw_util::new_engine_opt( - path, - DBOptions::new(), - vec![ - CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new()), - CFOptions::new(CF_WRITE, ColumnFamilyOptions::new()), - CFOptions::new(CF_LOCK, ColumnFamilyOptions::new()), - CFOptions::new(CF_RAFT, ColumnFamilyOptions::new()), - ], - ) - .unwrap(), - ); + let fake_engine = engine_rocks::util::new_engine(path, ALL_CFS).unwrap(); let write = vec![ // key, start_ts, commit_ts @@ -339,19 +322,18 @@ mod tests { ); kv.push((CF_LOCK, Key::from_raw(key), lock.to_bytes())); } - let mut wb = fake_engine.c().write_batch(); + let mut wb = fake_engine.write_batch(); for &(cf, ref k, ref v) in &kv { wb.put_cf(cf, &keys::data_key(k.as_encoded()), v).unwrap(); } wb.write().unwrap(); - let manager = ResetToVersionManager::new(fake_engine.c().clone()); + let manager = ResetToVersionManager::new(fake_engine.clone()); manager.start(100.into()); manager.wait(); let readopts = IterOptions::new(None, None, false); let mut write_iter = fake_engine - .c() .iterator_opt(CF_WRITE, readopts.clone()) .unwrap(); write_iter.seek_to_first().unwrap(); @@ -363,7 +345,6 @@ mod tests { remaining_writes.push((key, write)); } let mut default_iter = fake_engine - .c() .iterator_opt(CF_DEFAULT, readopts.clone()) .unwrap(); default_iter.seek_to_first().unwrap(); @@ -375,7 +356,7 @@ mod tests { remaining_defaults.push((key, value)); } - let mut lock_iter = fake_engine.c().iterator_opt(CF_LOCK, readopts).unwrap(); + let mut lock_iter = fake_engine.iterator_opt(CF_LOCK, readopts).unwrap(); lock_iter.seek_to_first().unwrap(); let mut remaining_locks = vec![]; while lock_iter.valid().unwrap() { diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index d5c1180ddf0..e3d1507224b 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -6,7 +6,7 @@ use std::{ }; use causal_ts::tests::DummyRawTsTracker; -use engine_rocks::{raw::ColumnFamilyOptions, raw_util::CFOptions}; +use engine_rocks::RocksCfOptions; use engine_traits::{CfName, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; use file_system::IORateLimiter; use kvproto::kvrpcpb::ApiVersion; @@ -113,24 +113,18 @@ impl TestEngineBuilder { let cfs_opts = cfs .iter() .map(|cf| match *cf { - CF_DEFAULT => CFOptions::new( + CF_DEFAULT => ( CF_DEFAULT, cfg_rocksdb.defaultcf.build_opt(&cache, None, api_version), ), - CF_LOCK => CFOptions::new(CF_LOCK, cfg_rocksdb.lockcf.build_opt(&cache)), - CF_WRITE => CFOptions::new(CF_WRITE, cfg_rocksdb.writecf.build_opt(&cache, None)), - CF_RAFT => CFOptions::new(CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), - _ => CFOptions::new(*cf, ColumnFamilyOptions::new()), + CF_LOCK => (CF_LOCK, cfg_rocksdb.lockcf.build_opt(&cache)), + CF_WRITE => (CF_WRITE, cfg_rocksdb.writecf.build_opt(&cache, None)), + CF_RAFT => (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), + _ => (*cf, RocksCfOptions::default()), }) .collect(); - let mut engine = RocksEngine::new( - &path, - &cfs, - Some(cfs_opts), - cache.is_some(), - self.io_rate_limiter, - None, /* CFOptions */ - )?; + let mut engine = + RocksEngine::new(&path, None, cfs_opts, cache.is_some(), self.io_rate_limiter)?; if let ApiVersion::V2 = api_version { Self::register_causal_observer(&mut engine); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a43b5270875..aab89299641 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3101,8 +3101,7 @@ mod tests { use api_version::{test_kv_format_impl, ApiV2}; use collections::HashMap; - use engine_rocks::raw_util::CFOptions; - use engine_traits::{raw_ttl::ttl_current_ts, ALL_CFS, CF_LOCK, CF_RAFT, CF_WRITE}; + use engine_traits::{raw_ttl::ttl_current_ts, CF_LOCK, CF_RAFT, CF_WRITE}; use error_code::ErrorCodeExt; use errors::extract_key_error; use futures::executor::block_on; @@ -3232,7 +3231,10 @@ mod tests { #[test] fn test_cf_error() { // New engine lacks normal column families. - let engine = TestEngineBuilder::new().cfs(["foo"]).build().unwrap(); + let engine = TestEngineBuilder::new() + .cfs([CF_DEFAULT, "foo"]) + .build() + .unwrap(); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) .build() .unwrap(); @@ -3638,27 +3640,25 @@ mod tests { }; let engine = { let path = "".to_owned(); - let cfs = ALL_CFS.to_vec(); let cfg_rocksdb = db_config; let cache = BlockCacheConfig::default().build_shared_cache(); let cfs_opts = vec![ - CFOptions::new( + ( CF_DEFAULT, cfg_rocksdb .defaultcf .build_opt(&cache, None, ApiVersion::V1), ), - CFOptions::new(CF_LOCK, cfg_rocksdb.lockcf.build_opt(&cache)), - CFOptions::new(CF_WRITE, cfg_rocksdb.writecf.build_opt(&cache, None)), - CFOptions::new(CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), + (CF_LOCK, cfg_rocksdb.lockcf.build_opt(&cache)), + (CF_WRITE, cfg_rocksdb.writecf.build_opt(&cache, None)), + (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), ]; RocksEngine::new( &path, - &cfs, - Some(cfs_opts), + None, + cfs_opts, cache.is_some(), None, /*io_rate_limiter*/ - None, /* CFOptions */ ) } .unwrap(); diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index 7881eb45903..eb788cb4dd3 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -480,7 +480,7 @@ mod tests { #[test] fn test_mvcc_info_collector() { - use engine_test::ctor::{CFOptions, ColumnFamilyOptions, DBOptions}; + use engine_test::ctor::{ColumnFamilyOptions, DBOptions}; use engine_traits::SyncMutable; use txn_types::TimeStamp; @@ -495,10 +495,10 @@ mod tests { path, DBOptions::default(), vec![ - CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new()), - CFOptions::new(CF_WRITE, ColumnFamilyOptions::new()), - CFOptions::new(CF_LOCK, ColumnFamilyOptions::new()), - CFOptions::new(CF_RAFT, ColumnFamilyOptions::new()), + (CF_DEFAULT, ColumnFamilyOptions::new()), + (CF_WRITE, ColumnFamilyOptions::new()), + (CF_LOCK, ColumnFamilyOptions::new()), + (CF_RAFT, ColumnFamilyOptions::new()), ], ) .unwrap(); diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 17b02c28ec9..614f8acb147 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -617,18 +617,16 @@ impl MvccReader { #[cfg(test)] pub mod tests { - use std::{ops::Bound, sync::Arc, u64}; + use std::{ops::Bound, u64}; use concurrency_manager::ConcurrencyManager; use engine_rocks::{ - properties::MvccPropertiesCollectorFactory, - raw::{ColumnFamilyOptions, DBOptions, DB}, - raw_util::CFOptions, - Compat, RocksSnapshot, + properties::MvccPropertiesCollectorFactory, RocksCfOptions, RocksDBOptions, RocksEngine, + RocksSnapshot, }; use engine_traits::{ - IterOptions, Mutable, WriteBatch, WriteBatchExt, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, - CF_WRITE, + CompactExt, IterOptions, MiscExt, Mutable, SyncMutable, WriteBatch, WriteBatchExt, ALL_CFS, + CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ kvrpcpb::{AssertionLevel, Context}, @@ -649,20 +647,20 @@ pub mod tests { }; pub struct RegionEngine { - db: Arc, + db: RocksEngine, region: Region, } impl RegionEngine { - pub fn new(db: &Arc, region: &Region) -> RegionEngine { + pub fn new(db: &RocksEngine, region: &Region) -> RegionEngine { RegionEngine { - db: Arc::clone(db), + db: db.clone(), region: region.clone(), } } pub fn snapshot(&self) -> RegionSnapshot { - let db = self.db.c().clone(); + let db = self.db.clone(); RegionSnapshot::::from_raw(db, self.region.clone()) } @@ -849,7 +847,7 @@ pub mod tests { pub fn write(&mut self, modifies: Vec) { let db = &self.db; - let mut wb = db.c().write_batch(); + let mut wb = db.write_batch(); for rev in modifies { match rev { Modify::Put(cf, k, v) => { @@ -879,22 +877,20 @@ pub mod tests { pub fn flush(&mut self) { for cf in ALL_CFS { - let cf = engine_rocks::util::get_cf_handle(&self.db, cf).unwrap(); self.db.flush_cf(cf, true).unwrap(); } } pub fn compact(&mut self) { for cf in ALL_CFS { - let cf = engine_rocks::util::get_cf_handle(&self.db, cf).unwrap(); - self.db.compact_range_cf(cf, None, None); + self.db.compact_range(cf, None, None, false, 1).unwrap(); } } } - pub fn open_db(path: &str, with_properties: bool) -> Arc { - let db_opts = DBOptions::new(); - let mut cf_opts = ColumnFamilyOptions::new(); + pub fn open_db(path: &str, with_properties: bool) -> RocksEngine { + let db_opt = RocksDBOptions::default(); + let mut cf_opts = RocksCfOptions::default(); cf_opts.set_write_buffer_size(32 * 1024 * 1024); if with_properties { cf_opts.add_table_properties_collector_factory( @@ -903,12 +899,12 @@ pub mod tests { ); } let cfs_opts = vec![ - CFOptions::new(CF_DEFAULT, ColumnFamilyOptions::new()), - CFOptions::new(CF_RAFT, ColumnFamilyOptions::new()), - CFOptions::new(CF_LOCK, ColumnFamilyOptions::new()), - CFOptions::new(CF_WRITE, cf_opts), + (CF_DEFAULT, RocksCfOptions::default()), + (CF_RAFT, RocksCfOptions::default()), + (CF_LOCK, RocksCfOptions::default()), + (CF_WRITE, cf_opts), ]; - Arc::new(engine_rocks::raw_util::new_engine_opt(path, db_opts, cfs_opts).unwrap()) + engine_rocks::util::new_engine_opt(path, db_opt, cfs_opts).unwrap() } pub fn make_region(id: u64, start_key: Vec, end_key: Vec) -> Region { @@ -945,7 +941,7 @@ pub mod tests { engine.put(&[12], 11, 12); engine.flush(); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region); + let snap = RegionSnapshot::::from_raw(db, region); let tests = vec![ // set nothing. @@ -1019,7 +1015,7 @@ pub mod tests { iopt.set_hint_min_ts(Bound::Included(1)); iopt.set_hint_max_ts(Bound::Included(6)); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region); + let snap = RegionSnapshot::::from_raw(db, region); let mut iter = snap.iter(CF_WRITE, iopt).unwrap(); // Must not omit the latest deletion of key1 to prevent seeing outdated record. @@ -1069,7 +1065,7 @@ pub mod tests { engine.prewrite_pessimistic_lock(m, k, 45); engine.commit(k, 45, 50); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region); + let snap = RegionSnapshot::::from_raw(db, region); let mut reader = MvccReader::new(snap, None, false); // Let's assume `50_45 PUT` means a commit version with start ts is 45 and commit ts @@ -1180,7 +1176,7 @@ pub mod tests { engine.prewrite_pessimistic_lock(m, k, 1); engine.commit(k, 1, 4); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region); + let snap = RegionSnapshot::::from_raw(db, region); let mut reader = MvccReader::new(snap, None, false); let (commit_ts, write_type) = reader @@ -1241,7 +1237,7 @@ pub mod tests { // Let's assume `2_1 PUT` means a commit version with start ts is 1 and commit ts // is 2. // Commit versions: [25_23 PUT, 20_10 PUT, 17_15 PUT, 7_7 Rollback, 5_1 PUT, 3_3 Rollback]. - let snap = RegionSnapshot::::from_raw(db.c().clone(), region.clone()); + let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, None, false); let k = Key::from_raw(k); @@ -1312,7 +1308,7 @@ pub mod tests { engine.prewrite(m2, k2, 1); engine.commit(k2, 1, 2); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region); + let snap = RegionSnapshot::::from_raw(db.clone(), region); let mut reader = MvccReader::new(snap, None, false); let (commit_ts, write) = reader @@ -1334,7 +1330,7 @@ pub mod tests { // Test seek_write touches region's end. let region1 = make_region(1, vec![], Key::from_raw(b"k1").into_encoded()); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region1); + let snap = RegionSnapshot::::from_raw(db, region1); let mut reader = MvccReader::new(snap, None, false); assert!(reader.seek_write(&k, 2.into()).unwrap().is_none()); @@ -1384,7 +1380,7 @@ pub mod tests { let m = Mutation::make_put(Key::from_raw(k), v.to_vec()); engine.prewrite(m, k, 24); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region); + let snap = RegionSnapshot::::from_raw(db, region); let mut reader = MvccReader::new(snap, None, false); // Let's assume `2_1 PUT` means a commit version with start ts is 1 and commit ts @@ -1524,7 +1520,7 @@ pub mod tests { limit, expect_res: &[_], expect_is_remain| { - let snap = RegionSnapshot::::from_raw(db.c().clone(), region.clone()); + let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, None, false); let res = reader .scan_locks( @@ -1691,7 +1687,7 @@ pub mod tests { for case in cases { engine.write(case.modifies); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region.clone()); + let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, case.scan_mode, false); let result = reader.load_data(&case.key, case.write); assert_eq!(format!("{:?}", result), format!("{:?}", case.expected)); @@ -1779,7 +1775,7 @@ pub mod tests { for case in cases { engine.write(case.modifies); - let snap = RegionSnapshot::::from_raw(db.c().clone(), region.clone()); + let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, None, false); let result = reader.get(&case.key, case.ts, case.gc_fence_limit); assert_eq!(format!("{:?}", result), format!("{:?}", case.expected)); @@ -1972,8 +1968,7 @@ pub mod tests { fn test_reader_prefix_seek() { let dir = tempfile::TempDir::new().unwrap(); let builder = TestEngineBuilder::new().path(dir.path()); - let db = builder.build().unwrap().kv_engine().get_sync_db(); - let cf = engine_rocks::util::get_cf_handle(&db, CF_WRITE).unwrap(); + let db = builder.build().unwrap().kv_engine(); let region = make_region(1, vec![], vec![]); let mut engine = RegionEngine::new(&db, ®ion); @@ -1983,24 +1978,22 @@ pub mod tests { let commit_ts = (i * 2 + 1).into(); let mut k = vec![b'z']; k.extend_from_slice(Key::from_raw(b"k1").append_ts(commit_ts).as_encoded()); - use engine_rocks::raw::Writable; - engine.db.delete_cf(cf, &k).unwrap(); + engine.db.delete_cf(CF_WRITE, &k).unwrap(); } engine.flush(); - #[allow(clippy::useless_vec)] - for (k, scan_mode, tombstones) in vec![ - (b"k0", Some(ScanMode::Forward), 99), + for (k, scan_mode, tombstones) in &[ + (b"k0" as &[u8], Some(ScanMode::Forward), 99), (b"k0", None, 0), (b"k1", Some(ScanMode::Forward), 99), (b"k1", None, 99), (b"k2", Some(ScanMode::Forward), 0), (b"k2", None, 0), ] { - let mut reader = MvccReader::new(engine.snapshot(), scan_mode, false); + let mut reader = MvccReader::new(engine.snapshot(), *scan_mode, false); let (k, ts) = (Key::from_raw(k), 199.into()); reader.seek_write(&k, ts).unwrap(); - assert_eq!(reader.statistics.write.seek_tombstone, tombstones); + assert_eq!(reader.statistics.write.seek_tombstone, *tombstones); } } } diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index edcac95aa00..056c447aced 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -989,7 +989,7 @@ impl FlowChecker { pub(super) mod tests { use std::sync::atomic::AtomicU64; - use engine_rocks::RocksColumnFamilyOptions; + use engine_rocks::RocksCfOptions; use engine_traits::{CFOptionsExt, Result}; use super::{super::FlowController, *}; @@ -1020,7 +1020,7 @@ pub(super) mod tests { } impl CFOptionsExt for EngineStub { - type ColumnFamilyOptions = RocksColumnFamilyOptions; + type ColumnFamilyOptions = RocksCfOptions; fn get_options_cf(&self, _cf: &str) -> Result { unimplemented!(); } diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index c861a251bba..4c94aeb1249 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use crossbeam::channel::TrySendError; -use engine_rocks::{raw::DB, RocksEngine, RocksSnapshot}; -use engine_traits::{ALL_CFS, CF_DEFAULT}; +use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_traits::{KvEngine, ALL_CFS, CF_DEFAULT}; use kvproto::{ kvrpcpb::{Context, ExtraOp as TxnExtraOp}, metapb::Region, @@ -35,12 +35,12 @@ use crate::test; #[derive(Clone)] struct SyncBenchRouter { - db: Arc, + db: RocksEngine, region: Region, } impl SyncBenchRouter { - fn new(region: Region, db: Arc) -> SyncBenchRouter { + fn new(region: Region, db: RocksEngine) -> SyncBenchRouter { SyncBenchRouter { db, region } } } @@ -51,7 +51,7 @@ impl SyncBenchRouter { cmd_resp::bind_term(&mut response, 1); match cmd.callback { Callback::Read(cb) => { - let snapshot = RocksSnapshot::new(Arc::clone(&self.db)); + let snapshot = self.db.snapshot(); let region = Arc::new(self.region.to_owned()); cb(ReadResponse { response, @@ -129,18 +129,18 @@ impl LocalReadRouter for SyncBenchRouter { fn release_snapshot_cache(&self) {} } -fn new_engine() -> (TempDir, Arc) { +fn new_engine() -> (TempDir, RocksEngine) { let dir = Builder::new().prefix("bench_rafkv").tempdir().unwrap(); let path = dir.path().to_str().unwrap().to_string(); - let db = engine_rocks::raw_util::new_engine(&path, None, ALL_CFS, None).unwrap(); - (dir, Arc::new(db)) + let db = engine_rocks::util::new_engine(&path, ALL_CFS).unwrap(); + (dir, db) } // The lower limit of time a async_snapshot may take. #[bench] fn bench_async_snapshots_noop(b: &mut test::Bencher) { let (_dir, db) = new_engine(); - let snapshot = RocksSnapshot::new(Arc::clone(&db)); + let snapshot = db.snapshot(); let resp = ReadResponse { response: RaftCmdResponse::default(), snapshot: Some(RegionSnapshot::from_snapshot( @@ -179,10 +179,7 @@ fn bench_async_snapshot(b: &mut test::Bencher) { region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(5); let (_tmp, db) = new_engine(); - let kv = RaftKv::new( - SyncBenchRouter::new(region.clone(), db.clone()), - RocksEngine::from_db(db), - ); + let kv = RaftKv::new(SyncBenchRouter::new(region.clone(), db.clone()), db); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); @@ -211,10 +208,7 @@ fn bench_async_write(b: &mut test::Bencher) { region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(5); let (_tmp, db) = new_engine(); - let kv = RaftKv::new( - SyncBenchRouter::new(region.clone(), db.clone()), - RocksEngine::from_db(db), - ); + let kv = RaftKv::new(SyncBenchRouter::new(region.clone(), db.clone()), db); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); diff --git a/tests/benches/misc/writebatch/bench_writebatch.rs b/tests/benches/misc/writebatch/bench_writebatch.rs index 0c6e81a35ca..cde64280184 100644 --- a/tests/benches/misc/writebatch/bench_writebatch.rs +++ b/tests/benches/misc/writebatch/bench_writebatch.rs @@ -1,18 +1,12 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; - -use engine_rocks::{ - raw::{DBOptions, DB}, - RocksEngine, RocksWriteBatchVec, -}; -use engine_traits::{Mutable, WriteBatch, WriteBatchExt}; +use engine_rocks::{RocksCfOptions, RocksDBOptions, RocksEngine, RocksWriteBatchVec}; +use engine_traits::{Mutable, WriteBatch, WriteBatchExt, CF_DEFAULT}; use tempfile::Builder; use test::Bencher; -fn writebatch(db: &Arc, round: usize, batch_keys: usize) { +fn writebatch(engine: &RocksEngine, round: usize, batch_keys: usize) { let v = b"operators are syntactic sugar for calls to methods of built-in traits"; - let engine = RocksEngine::from_db(db.clone()); for r in 0..round { let mut batch = engine.write_batch(); for i in 0..batch_keys { @@ -28,12 +22,17 @@ fn bench_writebatch_impl(b: &mut Bencher, batch_keys: usize) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let mut opts = DBOptions::new(); + let mut opts = RocksDBOptions::default(); opts.create_if_missing(true); opts.enable_unordered_write(false); opts.enable_pipelined_write(false); opts.enable_multi_batch_write(true); - let db = Arc::new(DB::open(opts, path.path().to_str().unwrap()).unwrap()); + let db = engine_rocks::util::new_engine_opt( + path.path().to_str().unwrap(), + opts, + vec![(CF_DEFAULT, RocksCfOptions::default())], + ) + .unwrap(); let key_count = 1 << 13; let round = key_count / batch_keys; b.iter(|| { @@ -112,13 +111,17 @@ fn bench_writebatch_without_capacity(b: &mut Bencher) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let mut opts = DBOptions::new(); + let mut opts = RocksDBOptions::default(); opts.create_if_missing(true); opts.enable_unordered_write(false); opts.enable_pipelined_write(false); opts.enable_multi_batch_write(true); - let db = Arc::new(DB::open(opts, path.path().to_str().unwrap()).unwrap()); - let engine = RocksEngine::from_db(db); + let engine = engine_rocks::util::new_engine_opt( + path.path().to_str().unwrap(), + opts, + vec![(CF_DEFAULT, RocksCfOptions::default())], + ) + .unwrap(); b.iter(|| { let mut wb = engine.write_batch(); fill_writebatch(&mut wb, 4096); @@ -131,13 +134,17 @@ fn bench_writebatch_with_capacity(b: &mut Bencher) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let mut opts = DBOptions::new(); + let mut opts = RocksDBOptions::default(); opts.create_if_missing(true); opts.enable_unordered_write(false); opts.enable_pipelined_write(false); opts.enable_multi_batch_write(true); - let db = Arc::new(DB::open(opts, path.path().to_str().unwrap()).unwrap()); - let engine = RocksEngine::from_db(db); + let engine = engine_rocks::util::new_engine_opt( + path.path().to_str().unwrap(), + opts, + vec![(CF_DEFAULT, RocksCfOptions::default())], + ) + .unwrap(); b.iter(|| { let mut wb = engine.write_batch_with_cap(4096); fill_writebatch(&mut wb, 4096); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index 58e674c9d11..05c602824c2 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -1,17 +1,17 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt, sync::Arc}; +use std::fmt; use criterion::{Bencher, Criterion}; -use engine_rocks::{raw::DB, Compat}; +use engine_rocks::RocksEngine; use engine_traits::{Mutable, WriteBatch, WriteBatchExt}; use test_raftstore::*; use test_util::*; const DEFAULT_DATA_SIZE: usize = 100_000; -fn enc_write_kvs(db: &Arc, kvs: &[(Vec, Vec)]) { - let mut wb = db.c().write_batch(); +fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { + let mut wb = db.write_batch(); for &(ref k, ref v) in kvs { wb.put(&keys::data_key(k), v).unwrap(); } @@ -21,7 +21,7 @@ fn enc_write_kvs(db: &Arc, kvs: &[(Vec, Vec)]) { fn prepare_cluster(cluster: &mut Cluster, initial_kvs: &[(Vec, Vec)]) { cluster.run(); for engines in cluster.engines.values() { - enc_write_kvs(engines.kv.as_inner(), initial_kvs); + enc_write_kvs(&engines.kv, initial_kvs); } cluster.leader_of_region(1).unwrap(); } diff --git a/tests/failpoints/cases/test_async_fetch.rs b/tests/failpoints/cases/test_async_fetch.rs index c6b8a693085..638888e83e2 100644 --- a/tests/failpoints/cases/test_async_fetch.rs +++ b/tests/failpoints/cases/test_async_fetch.rs @@ -32,7 +32,7 @@ fn test_node_async_fetch() { let mut before_states = HashMap::default(); for (&id, engines) in &cluster.engines { - must_get_equal(engines.kv.as_inner(), b"k1", b"v1"); + must_get_equal(&engines.kv, b"k1", b"v1"); let mut state: RaftApplyState = engines .kv .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) @@ -88,7 +88,7 @@ fn test_node_async_fetch() { for i in 1..60u32 { let k = i.to_string().into_bytes(); let v = k.clone(); - must_get_equal(cluster.engines[&1].kv.as_inner(), &k, &v); + must_get_equal(&cluster.engines[&1].kv, &k, &v); } for i in 60..500u32 { diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index c341d801c9b..5cb7c79011f 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -9,7 +9,6 @@ use std::{ time::Duration, }; -use engine_rocks::Compat; use engine_traits::{Peekable, CF_RAFT}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -76,7 +75,6 @@ fn test_node_merge_rollback() { let state_key = keys::region_state_key(region.get_id()); let state: RegionLocalState = cluster .get_engine(i) - .c() .get_msg_cf(CF_RAFT, &state_key) .unwrap() .unwrap(); @@ -105,7 +103,6 @@ fn test_node_merge_rollback() { let state_key = keys::region_state_key(region.get_id()); let state: RegionLocalState = cluster .get_engine(i) - .c() .get_msg_cf(CF_RAFT, &state_key) .unwrap() .unwrap(); @@ -139,10 +136,10 @@ fn test_node_merge_restart() { cluster.shutdown(); let engine = cluster.get_engine(leader.get_store_id()); let state_key = keys::region_state_key(left.get_id()); - let state: RegionLocalState = engine.c().get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); + let state: RegionLocalState = engine.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Merging, "{:?}", state); let state_key = keys::region_state_key(right.get_id()); - let state: RegionLocalState = engine.c().get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); + let state: RegionLocalState = engine.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Normal, "{:?}", state); fail::remove(schedule_merge_fp); cluster.start().unwrap(); @@ -157,7 +154,6 @@ fn test_node_merge_restart() { let state_key = keys::region_state_key(left.get_id()); let state: RegionLocalState = cluster .get_engine(i) - .c() .get_msg_cf(CF_RAFT, &state_key) .unwrap() .unwrap(); @@ -165,7 +161,6 @@ fn test_node_merge_restart() { let state_key = keys::region_state_key(right.get_id()); let state: RegionLocalState = cluster .get_engine(i) - .c() .get_msg_cf(CF_RAFT, &state_key) .unwrap() .unwrap(); diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index bd5003d23f2..e288828dc66 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -7,7 +7,6 @@ use std::{ }; use crossbeam::channel; -use engine_rocks::Compat; use engine_traits::{Peekable, RaftEngineReadOnly, CF_RAFT}; use futures::executor::block_on; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState}; @@ -229,7 +228,6 @@ fn test_read_applying_snapshot() { let region_key = keys::region_state_key(r1); let region_state: RegionLocalState = cluster .get_engine(3) - .c() .get_msg_cf(CF_RAFT, ®ion_key) .unwrap() .unwrap(); diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index e03e58bfa98..b15a43b3d35 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -1,17 +1,25 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{io::Write, path::Path, sync::Arc, time::Duration}; +use std::{fmt::Debug, io::Write, path::Path, sync::Arc, time::Duration}; -use engine_rocks::{ - raw::{CompactionOptions, DB}, - util::get_cf_handle, -}; +use engine_rocks::RocksEngine; use engine_rocks_helper::sst_recovery::*; -use engine_traits::CF_DEFAULT; +use engine_traits::{CompactExt, Peekable, CF_DEFAULT}; use test_raftstore::*; const CHECK_DURATION: Duration = Duration::from_millis(50); +#[track_caller] +fn assert_corruption(res: engine_traits::Result) { + match res { + Err(engine_traits::Error::Engine(s)) => { + // TODO: check code instead after using tirocks. + assert!(s.state().contains("Corruption"), "{:?}", s); + } + _ => panic!("expected corruption, got {:?}", res), + } +} + #[test] fn test_sst_recovery_basic() { let (mut cluster, pd_client, engine1) = create_tikv_cluster_with_one_node_damaged(); @@ -43,19 +51,19 @@ fn test_sst_recovery_basic() { std::thread::sleep(CHECK_DURATION); - assert_eq!(&engine1.get(b"z1").unwrap().unwrap().to_owned(), b"val"); - assert_eq!(&engine1.get(b"z7").unwrap().unwrap().to_owned(), b"val"); - assert!(engine1.get(b"z4").unwrap_err().contains("Corruption")); + must_get_equal(&engine1, b"1", b"val"); + must_get_equal(&engine1, b"7", b"val"); + assert_corruption(engine1.get_value(b"z4")); fail::remove("sst_recovery_before_delete_files"); std::thread::sleep(CHECK_DURATION); - assert_eq!(&engine1.get(b"z1").unwrap().unwrap().to_owned(), b"val"); - assert_eq!(&engine1.get(b"z7").unwrap().unwrap().to_owned(), b"val"); - assert!(engine1.get(b"z4").unwrap().is_none()); + must_get_equal(&engine1, b"1", b"val"); + must_get_equal(&engine1, b"7", b"val"); + assert!(engine1.get_value(b"z4").unwrap().is_none()); // Damaged file has been deleted. - let files = engine1.get_live_files(); + let files = engine1.as_inner().get_live_files(); assert_eq!(files.get_files_count(), 2); assert_eq!(store_meta.lock().unwrap().damaged_ranges.len(), 0); @@ -75,7 +83,7 @@ fn test_sst_recovery_overlap_range_sst_exist() { cluster.must_put_cf(CF_DEFAULT, b"7", b"val_1"); cluster.flush_data(); - let files = engine1.get_live_files(); + let files = engine1.as_inner().get_live_files(); assert_eq!(files.get_files_count(), 4); // Remove peers for safe deletion of files in sst recovery. @@ -90,13 +98,13 @@ fn test_sst_recovery_overlap_range_sst_exist() { cluster.must_put_cf(CF_DEFAULT, b"4", b"val_2"); std::thread::sleep(CHECK_DURATION); - assert_eq!(&engine1.get(b"z1").unwrap().unwrap().to_owned(), b"val_1"); - assert_eq!(&engine1.get(b"z4").unwrap().unwrap().to_owned(), b"val_1"); - assert_eq!(&engine1.get(b"z7").unwrap().unwrap().to_owned(), b"val_1"); + must_get_equal(&engine1, b"1", b"val_1"); + must_get_equal(&engine1, b"4", b"val_1"); + must_get_equal(&engine1, b"7", b"val_1"); // Validate the damaged sst has been deleted. compact_files_to_target_level(&engine1, true, 3).unwrap(); - let files = engine1.get_live_files(); + let files = engine1.as_inner().get_live_files(); assert_eq!(files.get_files_count(), 1); must_get_equal(&engine1, b"4", b"val_1"); @@ -119,10 +127,10 @@ fn test_sst_recovery_atomic_when_adding_peer() { pd_client.must_remove_peer(region.id, peer.clone()); std::thread::sleep(CHECK_DURATION); - assert_eq!(&engine1.get(b"z1").unwrap().unwrap().to_owned(), b"val"); - assert_eq!(&engine1.get(b"z7").unwrap().unwrap().to_owned(), b"val"); + must_get_equal(&engine1, b"1", b"val"); + must_get_equal(&engine1, b"7", b"val"); // delete file action is paused before. - assert!(engine1.get(b"z4").unwrap_err().contains("Corruption")); + assert_corruption(engine1.get_value(b"z4")); let region = cluster.get_region(b"3"); // add peer back on store 1 to validate atomic of sst recovery. @@ -148,11 +156,11 @@ fn disturb_sst_file(path: &Path) { // To trigger compaction and test background error. // set `compact_all` to `false` only compact the latest flushed file. fn compact_files_to_target_level( - engine: &Arc, + engine: &RocksEngine, compact_all: bool, level: i32, -) -> Result<(), String> { - let files = engine.get_live_files(); +) -> engine_traits::Result<()> { + let files = engine.as_inner().get_live_files(); let mut file_names = vec![]; if compact_all { for i in 0..files.get_files_count() { @@ -166,12 +174,11 @@ fn compact_files_to_target_level( file_names.push(name); } - let handle = get_cf_handle(engine, CF_DEFAULT).unwrap(); - engine.compact_files_cf(handle, &CompactionOptions::new(), &file_names, level) + engine.compact_files_cf(CF_DEFAULT, file_names, Some(level), 1, false) } fn create_tikv_cluster_with_one_node_damaged() --> (Cluster, Arc, Arc) { +-> (Cluster, Arc, RocksEngine) { let mut cluster = new_server_cluster(0, 3); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); @@ -227,7 +234,7 @@ fn create_tikv_cluster_with_one_node_damaged() cluster.must_split(®ion, b"7"); // after 3 flushing and compacts, now 3 sst files exist. - let files = engine1.get_live_files(); + let files = engine1.as_inner().get_live_files(); assert_eq!(files.get_files_count(), 3); // disturb sst file range [3,5] @@ -243,11 +250,7 @@ fn create_tikv_cluster_with_one_node_damaged() disturb_sst_file(&sst_path); // The sst file is damaged, so this action will fail. - assert!( - compact_files_to_target_level(&engine1, true, 3) - .unwrap_err() - .contains("Corruption") - ); + assert_corruption(compact_files_to_target_level(&engine1, true, 3)); (cluster, pd_client, engine1) } diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 8f8238e27db..bae6262aeb4 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -8,7 +8,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; -use engine_traits::{Engines, ALL_CFS}; +use engine_traits::{Engines, ALL_CFS, CF_DEFAULT}; use kvproto::raft_serverpb::RaftMessage; use raftstore::{ coprocessor::CoprocessorHost, @@ -49,25 +49,12 @@ impl Transport for MockTransport { } fn create_tmp_engine(dir: &TempDir) -> Engines { - let db = Arc::new( - engine_rocks::raw_util::new_engine( - dir.path().join("db").to_str().unwrap(), - None, - ALL_CFS, - None, - ) - .unwrap(), - ); - let raft_db = Arc::new( - engine_rocks::raw_util::new_engine( - dir.path().join("raft").to_str().unwrap(), - None, - &[], - None, - ) - .unwrap(), - ); - Engines::new(RocksEngine::from_db(db), RocksEngine::from_db(raft_db)) + let db = + engine_rocks::util::new_engine(dir.path().join("db").to_str().unwrap(), ALL_CFS).unwrap(); + let raft_db = + engine_rocks::util::new_engine(dir.path().join("raft").to_str().unwrap(), &[CF_DEFAULT]) + .unwrap(); + Engines::new(db, raft_db) } fn start_raftstore( diff --git a/tests/integrations/config/dynamic/split_check.rs b/tests/integrations/config/dynamic/split_check.rs index 325ef8e9929..582ce8f115e 100644 --- a/tests/integrations/config/dynamic/split_check.rs +++ b/tests/integrations/config/dynamic/split_check.rs @@ -2,14 +2,12 @@ use std::{ path::Path, - sync::{ - mpsc::{self, sync_channel}, - Arc, - }, + sync::mpsc::{self, sync_channel}, time::Duration, }; -use engine_rocks::{raw::DB, Compat}; +use engine_rocks::RocksEngine; +use engine_traits::CF_DEFAULT; use raftstore::{ coprocessor::{ config::{Config, SplitCheckConfigManager}, @@ -20,22 +18,18 @@ use raftstore::{ use tikv::config::{ConfigController, Module, TiKvConfig}; use tikv_util::worker::{LazyWorker, Scheduler, Worker}; -fn tmp_engine>(path: P) -> Arc { - Arc::new( - engine_rocks::raw_util::new_engine( - path.as_ref().to_str().unwrap(), - None, - &["split-check-config"], - None, - ) - .unwrap(), +fn tmp_engine>(path: P) -> RocksEngine { + engine_rocks::util::new_engine( + path.as_ref().to_str().unwrap(), + &[CF_DEFAULT, "split-check-config"], ) + .unwrap() } -fn setup(cfg: TiKvConfig, engine: Arc) -> (ConfigController, LazyWorker) { +fn setup(cfg: TiKvConfig, engine: RocksEngine) -> (ConfigController, LazyWorker) { let (router, _) = sync_channel(1); let runner = Runner::new( - engine.c().clone(), + engine, router.clone(), CoprocessorHost::new(router, cfg.coprocessor.clone()), ); diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 058728cb0a3..f2019d04ea7 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -5,8 +5,7 @@ use std::{ }; use concurrency_manager::ConcurrencyManager; -use engine_rocks::{Compat, RocksEngine}; -use engine_traits::{Engines, Peekable, ALL_CFS, CF_RAFT}; +use engine_traits::{Engines, Peekable, ALL_CFS, CF_DEFAULT, CF_RAFT}; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb::RegionLocalState}; use raftstore::{ coprocessor::CoprocessorHost, @@ -44,19 +43,12 @@ fn test_node_bootstrap_with_prepared_data() { let (_, system) = fsm::create_raft_batch_system(&cfg.raft_store); let simulate_trans = SimulateTransport::new(ChannelTransport::new()); let tmp_path = Builder::new().prefix("test_cluster").tempdir().unwrap(); - let engine = Arc::new( - engine_rocks::raw_util::new_engine(tmp_path.path().to_str().unwrap(), None, ALL_CFS, None) - .unwrap(), - ); + let engine = + engine_rocks::util::new_engine(tmp_path.path().to_str().unwrap(), ALL_CFS).unwrap(); let tmp_path_raft = tmp_path.path().join(Path::new("raft")); - let raft_engine = Arc::new( - engine_rocks::raw_util::new_engine(tmp_path_raft.to_str().unwrap(), None, &[], None) - .unwrap(), - ); - let engines = Engines::new( - RocksEngine::from_db(Arc::clone(&engine)), - RocksEngine::from_db(Arc::clone(&raft_engine)), - ); + let raft_engine = + engine_rocks::util::new_engine(tmp_path_raft.to_str().unwrap(), &[CF_DEFAULT]).unwrap(); + let engines = Engines::new(engine.clone(), raft_engine); let tmp_mgr = Builder::new().prefix("test_cluster").tempdir().unwrap(); let bg_worker = WorkerBuilder::new("background").thread_count(2).create(); let mut node = Node::new( @@ -81,7 +73,6 @@ fn test_node_bootstrap_with_prepared_data() { let region = node.prepare_bootstrap_cluster(&engines, 1).unwrap(); assert!( engine - .c() .get_msg::(keys::PREPARE_BOOTSTRAP_KEY) .unwrap() .is_some() @@ -89,7 +80,6 @@ fn test_node_bootstrap_with_prepared_data() { let region_state_key = keys::region_state_key(region.get_id()); assert!( engine - .c() .get_msg_cf::(CF_RAFT, ®ion_state_key) .unwrap() .is_some() @@ -121,15 +111,13 @@ fn test_node_bootstrap_with_prepared_data() { ) .unwrap(); assert!( - Arc::clone(&engine) - .c() + engine .get_msg::(keys::PREPARE_BOOTSTRAP_KEY) .unwrap() .is_none() ); assert!( engine - .c() .get_msg_cf::(CF_RAFT, ®ion_state_key) .unwrap() .is_none() diff --git a/tests/integrations/raftstore/test_clear_stale_data.rs b/tests/integrations/raftstore/test_clear_stale_data.rs index b67148b473d..8010d4c956c 100644 --- a/tests/integrations/raftstore/test_clear_stale_data.rs +++ b/tests/integrations/raftstore/test_clear_stale_data.rs @@ -1,30 +1,31 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::raw::{CompactOptions, Writable, DB}; -use engine_traits::{CF_DEFAULT, CF_LOCK}; +use engine_rocks::{raw::CompactOptions, RocksEngine}; +use engine_traits::{MiscExt, Peekable, SyncMutable, CF_DEFAULT, CF_LOCK}; use test_raftstore::*; -fn init_db_with_sst_files(db: &DB, level: i32, n: u8) { +fn init_db_with_sst_files(db: &RocksEngine, level: i32, n: u8) { let mut opts = CompactOptions::new(); opts.set_change_level(true); opts.set_target_level(level); for cf_name in &[CF_DEFAULT, CF_LOCK] { - let handle = db.cf_handle(cf_name).unwrap(); + let handle = db.as_inner().cf_handle(cf_name).unwrap(); // Each SST file has only one kv. for i in 0..n { let k = keys::data_key(&[i]); - db.put_cf(handle, &k, &k).unwrap(); - db.flush_cf(handle, true).unwrap(); - db.compact_range_cf_opt(handle, &opts, None, None); + db.put_cf(cf_name, &k, &k).unwrap(); + db.flush_cf(cf_name, true).unwrap(); + db.as_inner() + .compact_range_cf_opt(handle, &opts, None, None); } } } -fn check_db_files_at_level(db: &DB, level: i32, num_files: u64) { +fn check_db_files_at_level(db: &RocksEngine, level: i32, num_files: u64) { for cf_name in &[CF_DEFAULT, CF_LOCK] { - let handle = db.cf_handle(cf_name).unwrap(); + let handle = db.as_inner().cf_handle(cf_name).unwrap(); let name = format!("rocksdb.num-files-at-level{}", level); - let value = db.get_property_int_cf(handle, &name).unwrap(); + let value = db.as_inner().get_property_int_cf(handle, &name).unwrap(); if value != num_files { panic!( "cf {} level {} should have {} files, got {}", @@ -34,11 +35,10 @@ fn check_db_files_at_level(db: &DB, level: i32, num_files: u64) { } } -fn check_kv_in_all_cfs(db: &DB, i: u8, found: bool) { +fn check_kv_in_all_cfs(db: &RocksEngine, i: u8, found: bool) { for cf_name in &[CF_DEFAULT, CF_LOCK] { - let handle = db.cf_handle(cf_name).unwrap(); let k = keys::data_key(&[i]); - let v = db.get_cf(handle, &k).unwrap(); + let v = db.get_value_cf(cf_name, &k).unwrap(); if found { assert_eq!(v.unwrap(), &k); } else { diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index b31b86b3bfb..13cfb535e97 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -63,8 +63,7 @@ fn test_compact_after_delete(cluster: &mut Cluster) { cluster.must_delete_cf(CF_WRITE, &k); } for engines in cluster.engines.values() { - let cf = get_cf_handle(engines.kv.as_inner(), CF_WRITE).unwrap(); - engines.kv.as_inner().flush_cf(cf, true).unwrap(); + engines.kv.flush_cf(CF_WRITE, true).unwrap(); } // wait for compaction. diff --git a/tests/integrations/raftstore/test_compact_log.rs b/tests/integrations/raftstore/test_compact_log.rs index abaa18b50fa..e7d14a6eb45 100644 --- a/tests/integrations/raftstore/test_compact_log.rs +++ b/tests/integrations/raftstore/test_compact_log.rs @@ -53,7 +53,7 @@ fn test_compact_count_limit(cluster: &mut Cluster) { let mut before_states = HashMap::default(); for (&id, engines) in &cluster.engines { - must_get_equal(engines.kv.as_inner(), b"k1", b"v1"); + must_get_equal(&engines.kv, b"k1", b"v1"); let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); let state = state.take_truncated_state(); // compact should not start @@ -119,7 +119,7 @@ fn test_compact_many_times(cluster: &mut Cluster) { let mut before_states = HashMap::default(); for (&id, engines) in &cluster.engines { - must_get_equal(engines.kv.as_inner(), b"k1", b"v1"); + must_get_equal(&engines.kv, b"k1", b"v1"); let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); let state = state.take_truncated_state(); // compact should not start @@ -190,7 +190,7 @@ fn test_compact_size_limit(cluster: &mut Cluster) { if id == 1 { continue; } - must_get_equal(engines.kv.as_inner(), b"k1", b"v1"); + must_get_equal(&engines.kv, b"k1", b"v1"); let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); let state = state.take_truncated_state(); // compact should not start @@ -263,7 +263,7 @@ fn test_compact_reserve_max_ticks(cluster: &mut Cluster) { let mut before_states = HashMap::default(); for (&id, engines) in &cluster.engines { - must_get_equal(engines.kv.as_inner(), b"k1", b"v1"); + must_get_equal(&engines.kv, b"k1", b"v1"); let mut state: RaftApplyState = get_raft_msg_or_default(engines, &apply_key); let state = state.take_truncated_state(); // compact should not start diff --git a/tests/integrations/raftstore/test_conf_change.rs b/tests/integrations/raftstore/test_conf_change.rs index ab4166d5826..3778794387a 100644 --- a/tests/integrations/raftstore/test_conf_change.rs +++ b/tests/integrations/raftstore/test_conf_change.rs @@ -9,7 +9,6 @@ use std::{ time::Duration, }; -use engine_rocks::Compat; use engine_traits::{Peekable, CF_RAFT}; use futures::executor::block_on; use kvproto::{ @@ -176,7 +175,6 @@ fn test_pd_conf_change(cluster: &mut Cluster) { let engine_2 = cluster.get_engine(peer2.get_store_id()); assert!( engine_2 - .c() .get_value(&keys::data_key(b"k1")) .unwrap() .is_none() @@ -402,7 +400,6 @@ fn test_after_remove_itself(cluster: &mut Cluster) { for _ in 0..250 { let region: RegionLocalState = engine1 - .c() .get_msg_cf(CF_RAFT, &keys::region_state_key(r1)) .unwrap() .unwrap(); @@ -412,7 +409,6 @@ fn test_after_remove_itself(cluster: &mut Cluster) { sleep_ms(20); } let region: RegionLocalState = engine1 - .c() .get_msg_cf(CF_RAFT, &keys::region_state_key(r1)) .unwrap() .unwrap(); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 4d7914429ab..1146e152681 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -2,7 +2,6 @@ use std::{iter::*, sync::*, thread, time::*}; -use engine_rocks::Compat; use engine_traits::{Peekable, CF_LOCK, CF_RAFT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, @@ -93,7 +92,6 @@ fn test_node_base_merge() { for _ in 0..3 { state = cluster .get_engine(i) - .c() .get_msg_cf(CF_RAFT, &state_key) .unwrap() .unwrap(); @@ -534,7 +532,6 @@ fn test_node_merge_brain_split() { let state_key = keys::region_state_key(left.get_id()); let state: RegionLocalState = cluster .get_engine(3) - .c() .get_msg_cf(CF_RAFT, &state_key) .unwrap() .unwrap(); diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index 00fb8f99e05..296d6f207cf 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -6,7 +6,6 @@ use std::{ time::Duration, }; -use engine_rocks::Compat; use engine_traits::Peekable; use kvproto::raft_cmdpb::RaftCmdResponse; use raft::eraftpb::MessageType; @@ -33,7 +32,7 @@ fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { thread::sleep(Duration::from_millis(200)); cluster.assert_quorum( - |engine| match engine.c().get_value(&keys::data_key(key)).unwrap() { + |engine| match engine.get_value(&keys::data_key(key)).unwrap() { None => false, Some(v) => &*v == value, }, @@ -45,13 +44,7 @@ fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { // sleep 200ms in case the commit packet is dropped by simulated transport. thread::sleep(Duration::from_millis(200)); - cluster.assert_quorum(|engine| { - engine - .c() - .get_value(&keys::data_key(key)) - .unwrap() - .is_none() - }); + cluster.assert_quorum(|engine| engine.get_value(&keys::data_key(key)).unwrap().is_none()); // TODO add epoch not match test cases. } @@ -79,12 +72,9 @@ fn test_multi_leader_crash(cluster: &mut Cluster) { cluster.must_put(key2, value2); cluster.must_delete(key1); - must_get_none( - cluster.engines[&last_leader.get_store_id()].kv.as_inner(), - key2, - ); + must_get_none(&cluster.engines[&last_leader.get_store_id()].kv, key2); must_get_equal( - cluster.engines[&last_leader.get_store_id()].kv.as_inner(), + &cluster.engines[&last_leader.get_store_id()].kv, key1, value1, ); @@ -93,14 +83,11 @@ fn test_multi_leader_crash(cluster: &mut Cluster) { cluster.run_node(last_leader.get_store_id()).unwrap(); must_get_equal( - cluster.engines[&last_leader.get_store_id()].kv.as_inner(), + &cluster.engines[&last_leader.get_store_id()].kv, key2, value2, ); - must_get_none( - cluster.engines[&last_leader.get_store_id()].kv.as_inner(), - key1, - ); + must_get_none(&cluster.engines[&last_leader.get_store_id()].kv, key1); } fn test_multi_cluster_restart(cluster: &mut Cluster) { diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 2d6657e5a90..53c56510574 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -7,7 +7,6 @@ use std::{ time::Duration, }; -use engine_rocks::Compat; use engine_traits::{Iterable, Peekable, CF_DEFAULT, CF_WRITE}; use keys::data_key; use kvproto::{metapb, pdpb, raft_cmdpb::*, raft_serverpb::RaftMessage}; @@ -295,7 +294,7 @@ fn check_cluster(cluster: &mut Cluster, k: &[u8], v: &[u8], all_ // Note that a follower can still commit the log by an empty MsgAppend // when bcast commit is disabled. A heartbeat response comes to leader // before MsgAppendResponse will trigger MsgAppend. - match engine.c().get_value(&keys::data_key(k)).unwrap() { + match engine.get_value(&keys::data_key(k)).unwrap() { Some(res) => assert_eq!(v, &res[..]), None => missing_count += 1, } diff --git a/tests/integrations/raftstore/test_stale_peer.rs b/tests/integrations/raftstore/test_stale_peer.rs index 92e9d6ac77b..e9edcc49966 100644 --- a/tests/integrations/raftstore/test_stale_peer.rs +++ b/tests/integrations/raftstore/test_stale_peer.rs @@ -4,7 +4,6 @@ use std::{sync::Arc, thread, time::*}; -use engine_rocks::Compat; use engine_traits::{Peekable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RegionLocalState}; use raft::eraftpb::MessageType; @@ -79,11 +78,7 @@ fn test_stale_peer_out_of_region(cluster: &mut Cluster) { must_get_none(&engine_2, key); must_get_none(&engine_2, key2); let state_key = keys::region_state_key(1); - let state: RegionLocalState = engine_2 - .c() - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state: RegionLocalState = engine_2.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); } @@ -171,11 +166,7 @@ fn test_stale_peer_without_data(cluster: &mut Cluster, right_de // Before peer 4 is destroyed, a tombstone mark will be written into the engine. // So we could check the tombstone mark to make sure peer 4 is destroyed. let state_key = keys::region_state_key(new_region_id); - let state: RegionLocalState = engine3 - .c() - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state: RegionLocalState = engine3.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); // other region should not be affected. @@ -258,11 +249,7 @@ fn test_stale_learner() { // Check not leader should fail, all data should be removed. must_get_none(&engine3, b"k1"); let state_key = keys::region_state_key(r1); - let state: RegionLocalState = engine3 - .c() - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state: RegionLocalState = engine3.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); } @@ -317,10 +304,6 @@ fn test_stale_learner_with_read_index() { // Stale learner should be destroyed due to interaction between leader must_get_none(&engine3, b"k1"); let state_key = keys::region_state_key(r1); - let state: RegionLocalState = engine3 - .c() - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state: RegionLocalState = engine3.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); } diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index c9f698edd65..03c0f0a82b2 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -7,6 +7,7 @@ use std::{ }; use api_version::{test_kv_format_impl, KvFormat}; +use engine_traits::MiscExt; use futures::{executor::block_on, SinkExt, StreamExt}; use grpcio::*; use kvproto::{kvrpcpb::*, pdpb::QueryKind, tikvpb::*, tikvpb_grpc::TikvClient}; diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index 18a1e5a96ca..189587dea44 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -3,8 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use crossbeam::channel; -use engine_rocks::{raw::Writable, Compat}; -use engine_traits::{Iterable, Peekable, RaftEngineReadOnly, SyncMutable, CF_RAFT}; +use engine_traits::{CFNamesExt, Iterable, Peekable, RaftEngineReadOnly, SyncMutable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState, StoreIdent}; use protobuf::Message; use raft::eraftpb::MessageType; @@ -49,7 +48,6 @@ fn test_tombstone(cluster: &mut Cluster) { let mut existing_kvs = vec![]; for cf in engine_2.cf_names() { engine_2 - .c() .scan(cf, b"", &[0xFF], false, |k, v| { existing_kvs.push((k.to_vec(), v.to_vec())); Ok(true) @@ -134,7 +132,7 @@ fn test_fast_destroy(cluster: &mut Cluster) { cluster.stop_node(3); let key = keys::region_state_key(1); - let state: RegionLocalState = engine_3.c().get_msg_cf(CF_RAFT, &key).unwrap().unwrap(); + let state: RegionLocalState = engine_3.get_msg_cf(CF_RAFT, &key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); // Force add some dirty data. @@ -245,14 +243,12 @@ fn test_server_stale_meta() { let engine_3 = cluster.get_engine(3); let mut state: RegionLocalState = engine_3 - .c() .get_msg_cf(CF_RAFT, &keys::region_state_key(1)) .unwrap() .unwrap(); state.set_state(PeerState::Tombstone); engine_3 - .c() .put_msg_cf(CF_RAFT, &keys::region_state_key(1), &state) .unwrap(); cluster.clear_send_filters(); @@ -316,7 +312,7 @@ fn test_safe_tombstone_gc() { let mut state: Option = None; let timer = Instant::now(); while timer.saturating_elapsed() < Duration::from_secs(5) { - state = cluster.get_engine(4).c().get_msg_cf(CF_RAFT, &key).unwrap(); + state = cluster.get_engine(4).get_msg_cf(CF_RAFT, &key).unwrap(); if state.is_some() { break; } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 18f3f7278d5..367f38114f6 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -9,7 +9,6 @@ use std::{ use api_version::{ApiV1, ApiV1Ttl, ApiV2, KvFormat}; use concurrency_manager::ConcurrencyManager; -use engine_rocks::{raw::Writable, Compat}; use engine_traits::{ MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, SyncMutable, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -709,7 +708,7 @@ fn test_debug_get() { let engine = cluster.get_engine(store_id); let key = keys::data_key(k); engine.put(&key, v).unwrap(); - assert_eq!(engine.get(&key).unwrap().unwrap(), v); + assert_eq!(engine.get_value(&key).unwrap().unwrap(), v); // Debug get let mut req = debugpb::GetRequest::default(); @@ -784,12 +783,10 @@ fn test_debug_region_info() { let mut apply_state = raft_serverpb::RaftApplyState::default(); apply_state.set_applied_index(42); kv_engine - .c() .put_msg_cf(CF_RAFT, &apply_state_key, &apply_state) .unwrap(); assert_eq!( kv_engine - .c() .get_msg_cf::(CF_RAFT, &apply_state_key) .unwrap() .unwrap(), @@ -800,12 +797,10 @@ fn test_debug_region_info() { let mut region_state = raft_serverpb::RegionLocalState::default(); region_state.set_state(raft_serverpb::PeerState::Tombstone); kv_engine - .c() .put_msg_cf(CF_RAFT, ®ion_state_key, ®ion_state) .unwrap(); assert_eq!( kv_engine - .c() .get_msg_cf::(CF_RAFT, ®ion_state_key) .unwrap() .unwrap(), @@ -844,7 +839,6 @@ fn test_debug_region_size() { let mut state = RegionLocalState::default(); state.set_region(region); engine - .c() .put_msg_cf(CF_RAFT, ®ion_state_key, &state) .unwrap(); @@ -852,8 +846,7 @@ fn test_debug_region_size() { // At lease 8 bytes for the WRITE cf. let (k, v) = (keys::data_key(b"kkkk_kkkk"), b"v"); for cf in &cfs { - let cf_handle = engine.cf_handle(cf).unwrap(); - engine.put_cf(cf_handle, k.as_slice(), v).unwrap(); + engine.put_cf(cf, k.as_slice(), v).unwrap(); } let mut req = debugpb::RegionSizeRequest::default(); @@ -938,8 +931,7 @@ fn test_debug_scan_mvcc() { TimeStamp::zero(), ) .to_bytes(); - let cf_handle = engine.cf_handle(CF_LOCK).unwrap(); - engine.put_cf(cf_handle, k.as_slice(), &v).unwrap(); + engine.put_cf(CF_LOCK, k.as_slice(), &v).unwrap(); } let mut req = debugpb::ScanMvccRequest::default(); diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index cfc250a8e15..cd311386769 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -2,19 +2,17 @@ use std::{ path::{Path, PathBuf}, - sync::Arc, thread, time::Duration, }; use engine_rocks::{ - raw::{IngestExternalFileOptions, Writable}, - util::{get_cf_handle, new_temp_engine}, - Compat, RocksEngine, RocksSnapshot, RocksSstWriterBuilder, + raw::IngestExternalFileOptions, util::new_temp_engine, RocksEngine, RocksSnapshot, + RocksSstWriterBuilder, }; use engine_traits::{ - CompactExt, DeleteStrategy, Engines, KvEngine, MiscExt, Range, SstWriter, SstWriterBuilder, - ALL_CFS, CF_DEFAULT, CF_WRITE, + CFOptionsExt, CompactExt, DeleteStrategy, Engines, KvEngine, MiscExt, Range, SstWriter, + SstWriterBuilder, SyncMutable, CF_DEFAULT, CF_WRITE, }; use keys::data_key; use kvproto::metapb::{Peer, Region}; @@ -64,7 +62,8 @@ fn test_turnoff_titan() { } cluster.must_flush_cf(CF_DEFAULT, true); for i in cluster.get_node_ids().into_iter() { - let db = cluster.get_engine(i); + let engine = cluster.get_engine(i); + let db = engine.as_inner(); assert_eq!( db.get_property_int("rocksdb.num-files-at-level0").unwrap(), 2 @@ -96,9 +95,8 @@ fn test_turnoff_titan() { assert_eq!(cluster.must_get(b"k1"), None); for i in cluster.get_node_ids().into_iter() { let db = cluster.get_engine(i); - let handle = get_cf_handle(&db, CF_DEFAULT).unwrap(); let opt = vec![("blob_run_mode", "kFallback")]; - assert!(db.set_options_cf(handle, &opt).is_ok()); + assert!(db.set_options_cf(CF_DEFAULT, &opt).is_ok()); } cluster.compact_data(); let mut all_check_pass = true; @@ -107,7 +105,8 @@ fn test_turnoff_titan() { sleep_ms(10); all_check_pass = true; for i in cluster.get_node_ids().into_iter() { - let db = cluster.get_engine(i); + let engine = cluster.get_engine(i); + let db = engine.as_inner(); if db.get_property_int("rocksdb.num-files-at-level0").unwrap() != 0 { all_check_pass = false; break; @@ -171,24 +170,9 @@ fn test_delete_files_in_range_for_titan() { let raft_path = path.path().join(Path::new("titan")); let engines = Engines::new( - RocksEngine::from_db(Arc::new( - engine_rocks::raw_util::new_engine( - path.path().to_str().unwrap(), - Some(kv_db_opts), - ALL_CFS, - Some(kv_cfs_opts), - ) - .unwrap(), - )), - RocksEngine::from_db(Arc::new( - engine_rocks::raw_util::new_engine( - raft_path.to_str().unwrap(), - None, - &[CF_DEFAULT], - None, - ) + engine_rocks::util::new_engine_opt(path.path().to_str().unwrap(), kv_db_opts, kv_cfs_opts) .unwrap(), - )), + engine_rocks::util::new_engine(raft_path.to_str().unwrap(), &[CF_DEFAULT]).unwrap(), ); // Write some mvcc keys and values into db @@ -197,37 +181,43 @@ fn test_delete_files_in_range_for_titan() { let start_ts = 7.into(); let commit_ts = 8.into(); let write = Write::new(WriteType::Put, start_ts, None); - let db = engines.kv.as_inner(); - let default_cf = db.cf_handle(CF_DEFAULT).unwrap(); - let write_cf = db.cf_handle(CF_WRITE).unwrap(); - db.put_cf( - default_cf, - &data_key(Key::from_raw(b"a").append_ts(start_ts).as_encoded()), - b"a_value", - ) - .unwrap(); - db.put_cf( - write_cf, - &data_key(Key::from_raw(b"a").append_ts(commit_ts).as_encoded()), - &write.as_ref().to_bytes(), - ) - .unwrap(); - db.put_cf( - default_cf, - &data_key(Key::from_raw(b"b").append_ts(start_ts).as_encoded()), - b"b_value", - ) - .unwrap(); - db.put_cf( - write_cf, - &data_key(Key::from_raw(b"b").append_ts(commit_ts).as_encoded()), - &write.as_ref().to_bytes(), - ) - .unwrap(); + engines + .kv + .put_cf( + CF_DEFAULT, + &data_key(Key::from_raw(b"a").append_ts(start_ts).as_encoded()), + b"a_value", + ) + .unwrap(); + engines + .kv + .put_cf( + CF_WRITE, + &data_key(Key::from_raw(b"a").append_ts(commit_ts).as_encoded()), + &write.as_ref().to_bytes(), + ) + .unwrap(); + engines + .kv + .put_cf( + CF_DEFAULT, + &data_key(Key::from_raw(b"b").append_ts(start_ts).as_encoded()), + b"b_value", + ) + .unwrap(); + engines + .kv + .put_cf( + CF_WRITE, + &data_key(Key::from_raw(b"b").append_ts(commit_ts).as_encoded()), + &write.as_ref().to_bytes(), + ) + .unwrap(); // Flush and compact the kvs into L6. - db.flush(true).unwrap(); - db.c().compact_files_in_range(None, None, None).unwrap(); + engines.kv.flush(true).unwrap(); + engines.kv.compact_files_in_range(None, None, None).unwrap(); + let db = engines.kv.as_inner(); let value = db.get_property_int("rocksdb.num-files-at-level0").unwrap(); assert_eq!(value, 0); let value = db.get_property_int("rocksdb.num-files-at-level6").unwrap(); @@ -247,7 +237,8 @@ fn test_delete_files_in_range_for_titan() { writer.finish().unwrap(); let mut opts = IngestExternalFileOptions::new(); opts.move_files(true); - db.ingest_external_file_cf(default_cf, &opts, &[sst_file_path.to_str().unwrap()]) + let cf_default = db.cf_handle(CF_DEFAULT).unwrap(); + db.ingest_external_file_cf(cf_default, &opts, &[sst_file_path.to_str().unwrap()]) .unwrap(); // Now the LSM structure of default cf is: @@ -265,12 +256,12 @@ fn test_delete_files_in_range_for_titan() { assert_eq!(value, 1); // Used to trigger titan gc - let db = engines.kv.as_inner(); - db.put(b"1", b"1").unwrap(); - db.flush(true).unwrap(); - db.put(b"2", b"2").unwrap(); - db.flush(true).unwrap(); - db.c() + let engine = &engines.kv; + engine.put(b"1", b"1").unwrap(); + engine.flush(true).unwrap(); + engine.put(b"2", b"2").unwrap(); + engine.flush(true).unwrap(); + engine .compact_files_in_range(Some(b"0"), Some(b"3"), Some(1)) .unwrap(); @@ -286,6 +277,7 @@ fn test_delete_files_in_range_for_titan() { // blob2: (1, 1) // blob3: (2, 2) // blob4: (b_7, b_value) + let db = engine.as_inner(); let value = db.get_property_int("rocksdb.num-files-at-level0").unwrap(); assert_eq!(value, 0); let value = db.get_property_int("rocksdb.num-files-at-level1").unwrap(); From 4f8f731485906465b868ee32a4ba7d550c0631e2 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 28 Jul 2022 14:27:11 +0800 Subject: [PATCH 0118/1149] *: limit comment width by rustfmt (#13139) close tikv/tikv#13150 limit comment width to 80 by rustfmt Signed-off-by: Connor1996 --- cmd/tikv-ctl/src/cmd.rs | 29 +- cmd/tikv-ctl/src/executor.rs | 4 +- cmd/tikv-ctl/src/main.rs | 9 +- components/api_version/src/api_v1.rs | 4 +- components/api_version/src/api_v1ttl.rs | 4 +- components/api_version/src/api_v2.rs | 19 +- components/api_version/src/lib.rs | 27 +- .../backup-stream/src/checkpoint_manager.rs | 15 +- components/backup-stream/src/endpoint.rs | 37 +- components/backup-stream/src/event_loader.rs | 34 +- .../backup-stream/src/metadata/client.rs | 20 +- components/backup-stream/src/metadata/keys.rs | 9 +- .../backup-stream/src/metadata/store/etcd.rs | 20 +- .../src/metadata/store/lazy_etcd.rs | 7 +- .../backup-stream/src/metadata/store/mod.rs | 3 +- .../src/metadata/store/slash_etc.rs | 3 +- components/backup-stream/src/observer.rs | 18 +- components/backup-stream/src/router.rs | 95 ++-- .../backup-stream/src/subscription_manager.rs | 35 +- .../backup-stream/src/subscription_track.rs | 56 +- components/backup-stream/src/utils.rs | 56 +- components/backup-stream/tests/mod.rs | 19 +- components/backup/src/endpoint.rs | 39 +- components/backup/src/softlimit.rs | 20 +- components/backup/src/utils.rs | 18 +- components/backup/src/writer.rs | 9 +- .../batch-system/benches/batch-system.rs | 4 +- components/batch-system/src/batch.rs | 43 +- components/batch-system/src/router.rs | 4 +- components/causal_ts/src/config.rs | 5 +- components/causal_ts/src/lib.rs | 3 +- components/causal_ts/src/observer.rs | 26 +- components/causal_ts/src/tso.rs | 23 +- components/cdc/src/channel.rs | 3 +- components/cdc/src/delegate.rs | 44 +- components/cdc/src/endpoint.rs | 38 +- components/cdc/src/initializer.rs | 13 +- components/cdc/src/metrics.rs | 6 +- components/cdc/src/observer.rs | 10 +- components/cdc/src/old_value.rs | 11 +- .../cdc/tests/failpoints/test_endpoint.rs | 16 +- components/cdc/tests/integrations/test_cdc.rs | 53 +- components/cloud/aws/src/kms.rs | 8 +- components/cloud/aws/src/s3.rs | 27 +- components/cloud/azure/src/azblob.rs | 9 +- components/cloud/gcp/src/gcs.rs | 6 +- components/cloud/src/blob.rs | 3 +- components/codec/src/buffer.rs | 24 +- components/codec/src/byte.rs | 119 +++-- components/codec/src/error.rs | 3 +- components/codec/src/number.rs | 58 ++- components/concurrency_manager/src/lib.rs | 8 +- .../concurrency_manager/src/lock_table.rs | 11 +- .../concurrency_manager/tests/memory_usage.rs | 3 +- .../coprocessor_plugin_api/src/allocator.rs | 16 +- .../coprocessor_plugin_api/src/errors.rs | 16 +- components/coprocessor_plugin_api/src/lib.rs | 25 +- .../coprocessor_plugin_api/src/plugin_api.rs | 31 +- .../coprocessor_plugin_api/src/storage_api.rs | 32 +- components/coprocessor_plugin_api/src/util.rs | 47 +- components/encryption/export/src/lib.rs | 3 +- components/encryption/src/crypter.rs | 4 +- .../encryption/src/encrypted_file/mod.rs | 4 +- components/encryption/src/file_dict_file.rs | 51 +- components/encryption/src/io.rs | 10 +- components/encryption/src/manager/mod.rs | 20 +- components/encryption/src/master_key/kms.rs | 10 +- components/encryption/src/master_key/mem.rs | 18 +- components/encryption/src/master_key/mod.rs | 5 +- components/engine_rocks/src/engine.rs | 6 +- components/engine_rocks/src/event_listener.rs | 2 + components/engine_rocks/src/file_system.rs | 12 +- components/engine_rocks/src/import.rs | 7 +- components/engine_rocks/src/lib.rs | 5 +- components/engine_rocks/src/misc.rs | 12 +- .../engine_rocks/src/perf_context_impl.rs | 7 +- components/engine_rocks/src/properties.rs | 12 +- components/engine_rocks/src/raft_engine.rs | 3 +- .../engine_rocks/src/range_properties.rs | 7 +- components/engine_rocks/src/rocks_metrics.rs | 8 +- components/engine_rocks/src/sst.rs | 12 +- components/engine_rocks/src/util.rs | 14 +- components/engine_rocks/src/write_batch.rs | 19 +- .../engine_rocks_helper/src/sst_recovery.rs | 10 +- components/engine_test/src/lib.rs | 3 +- components/engine_traits/src/compact.rs | 11 +- components/engine_traits/src/engine.rs | 29 +- components/engine_traits/src/lib.rs | 41 +- components/engine_traits/src/misc.rs | 34 +- components/engine_traits/src/peekable.rs | 3 +- components/engine_traits/src/perf_context.rs | 3 +- components/engine_traits/src/raft_engine.rs | 4 +- .../engine_traits/src/range_properties.rs | 3 +- .../engine_traits/src/sst_partitioner.rs | 4 +- .../external_storage/export/src/export.rs | 9 +- .../external_storage/export/src/request.rs | 3 +- components/external_storage/src/lib.rs | 6 +- components/external_storage/src/local.rs | 11 +- components/external_storage/src/request.rs | 3 +- components/file_system/src/file.rs | 3 +- .../file_system/src/io_stats/biosnoop.rs | 10 +- components/file_system/src/lib.rs | 16 +- components/file_system/src/metrics_manager.rs | 3 +- components/file_system/src/rate_limiter.rs | 80 +-- components/keys/src/lib.rs | 11 +- components/log_wrappers/src/lib.rs | 14 +- components/log_wrappers/src/test_util.rs | 7 +- components/online_config/src/lib.rs | 4 +- components/panic_hook/src/lib.rs | 3 +- components/pd_client/src/config.rs | 11 +- components/pd_client/src/feature_gate.rs | 8 +- components/pd_client/src/lib.rs | 27 +- components/pd_client/src/tso.rs | 53 +- components/pd_client/src/util.rs | 29 +- components/profiler/examples/prime.rs | 3 +- components/profiler/src/lib.rs | 9 +- components/profiler/src/profiler_unix.rs | 18 +- components/raft_log_engine/src/engine.rs | 3 +- components/raft_log_engine/src/lib.rs | 3 +- components/raftstore-v2/src/batch/apply.rs | 3 +- components/raftstore-v2/src/batch/store.rs | 6 +- components/raftstore-v2/src/lib.rs | 9 +- components/raftstore-v2/src/raft/peer.rs | 4 +- components/raftstore-v2/src/router/message.rs | 8 +- components/raftstore-v2/src/tablet.rs | 5 +- .../raftstore/src/coprocessor/config.rs | 3 +- .../raftstore/src/coprocessor/dispatcher.rs | 18 +- components/raftstore/src/coprocessor/mod.rs | 46 +- .../src/coprocessor/region_info_accessor.rs | 147 +++--- .../src/coprocessor/split_check/keys.rs | 10 +- .../src/coprocessor/split_check/mod.rs | 4 +- .../src/coprocessor/split_check/size.rs | 13 +- .../src/coprocessor/split_check/table.rs | 5 +- .../raftstore/src/store/async_io/write.rs | 17 +- .../src/store/async_io/write_router.rs | 31 +- components/raftstore/src/store/bootstrap.rs | 4 +- .../raftstore/src/store/compaction_guard.rs | 32 +- components/raftstore/src/store/config.rs | 49 +- .../raftstore/src/store/entry_storage.rs | 41 +- components/raftstore/src/store/fsm/apply.rs | 193 ++++--- components/raftstore/src/store/fsm/peer.rs | 477 ++++++++++-------- components/raftstore/src/store/fsm/store.rs | 80 +-- components/raftstore/src/store/msg.rs | 51 +- components/raftstore/src/store/peer.rs | 455 +++++++++-------- .../raftstore/src/store/peer_storage.rs | 74 +-- components/raftstore/src/store/read_queue.rs | 14 +- .../raftstore/src/store/region_snapshot.rs | 4 +- .../raftstore/src/store/replication_mode.rs | 11 +- components/raftstore/src/store/snap.rs | 47 +- components/raftstore/src/store/snap/io.rs | 7 +- components/raftstore/src/store/txn_ext.rs | 104 ++-- components/raftstore/src/store/util.rs | 147 +++--- .../src/store/worker/check_leader.rs | 11 +- .../raftstore/src/store/worker/compact.rs | 24 +- components/raftstore/src/store/worker/pd.rs | 25 +- .../raftstore/src/store/worker/raftlog_gc.rs | 5 +- components/raftstore/src/store/worker/read.rs | 23 +- .../raftstore/src/store/worker/region.rs | 30 +- .../raftstore/src/store/worker/split_check.rs | 9 +- .../src/store/worker/split_config.rs | 31 +- .../src/store/worker/split_controller.rs | 59 ++- components/resolved_ts/src/advance.rs | 16 +- components/resolved_ts/src/cmd.rs | 10 +- components/resolved_ts/src/endpoint.rs | 28 +- components/resolved_ts/src/lib.rs | 13 +- components/resolved_ts/src/observer.rs | 14 +- components/resolved_ts/src/resolver.rs | 6 +- components/resolved_ts/src/scanner.rs | 3 +- components/resource_metering/src/collector.rs | 3 +- components/resource_metering/src/lib.rs | 28 +- components/resource_metering/src/model.rs | 3 +- .../src/recorder/collector_reg.rs | 18 +- .../src/recorder/localstorage.rs | 7 +- .../resource_metering/src/recorder/mod.rs | 5 +- .../src/recorder/sub_recorder/mod.rs | 25 +- .../src/recorder/sub_recorder/summary.rs | 8 +- .../src/reporter/data_sink.rs | 5 +- .../resource_metering/src/reporter/mod.rs | 9 +- .../resource_metering/src/reporter/pubsub.rs | 5 +- .../src/reporter/single_target.rs | 8 +- .../resource_metering/tests/summary_test.rs | 2 +- components/security/src/lib.rs | 3 +- components/server/src/raft_engine_switch.rs | 7 +- components/server/src/server.rs | 45 +- components/server/src/setup.rs | 15 +- components/sst_importer/src/import_file.rs | 8 +- components/sst_importer/src/sst_importer.rs | 19 +- components/sst_importer/src/util.rs | 25 +- components/test_backup/src/lib.rs | 2 +- components/test_coprocessor/src/dag.rs | 3 +- components/test_coprocessor/src/fixture.rs | 3 +- components/test_coprocessor/src/table.rs | 6 +- components/test_raftstore/src/cluster.rs | 26 +- components/test_raftstore/src/node.rs | 4 +- components/test_raftstore/src/pd.rs | 13 +- components/test_raftstore/src/server.rs | 3 +- .../test_raftstore/src/transport_simulate.rs | 18 +- components/test_raftstore/src/util.rs | 8 +- components/test_util/src/lib.rs | 6 +- components/test_util/src/runner.rs | 6 +- components/tidb_query_aggr/src/impl_avg.rs | 3 +- components/tidb_query_aggr/src/impl_count.rs | 7 +- components/tidb_query_aggr/src/impl_first.rs | 13 +- .../tidb_query_aggr/src/impl_max_min.rs | 12 +- components/tidb_query_aggr/src/impl_sum.rs | 13 +- .../tidb_query_aggr/src/impl_variance.rs | 21 +- components/tidb_query_aggr/src/lib.rs | 81 +-- components/tidb_query_aggr/src/parser.rs | 29 +- components/tidb_query_aggr/src/util.rs | 9 +- components/tidb_query_codegen/src/lib.rs | 4 +- .../tidb_query_codegen/src/rpn_function.rs | 118 +++-- components/tidb_query_common/src/error.rs | 5 +- .../tidb_query_common/src/execute_stats.rs | 15 +- .../tidb_query_common/src/storage/mod.rs | 4 +- .../src/storage/ranges_iter.rs | 21 +- .../tidb_query_common/src/storage/scanner.rs | 23 +- .../src/storage/test_fixture.rs | 7 +- components/tidb_query_common/src/util.rs | 8 +- .../src/codec/batch/lazy_column.rs | 45 +- .../src/codec/batch/lazy_column_vec.rs | 23 +- .../src/codec/chunk/chunk.rs | 8 +- .../src/codec/chunk/column.rs | 5 +- .../codec/collation/collator/gbk_collation.rs | 15 +- .../codec/collation/collator/latin1_bin.rs | 3 +- .../src/codec/collation/collator/mod.rs | 6 +- .../collation/collator/utf8mb4_binary.rs | 3 +- .../collation/collator/utf8mb4_general_ci.rs | 3 +- .../collation/collator/utf8mb4_unicode_ci.rs | 3 +- .../src/codec/collation/mod.rs | 5 +- .../tidb_query_datatype/src/codec/convert.rs | 67 ++- .../src/codec/data_type/chunked_vec_bytes.rs | 10 +- .../src/codec/data_type/chunked_vec_json.rs | 11 +- .../src/codec/data_type/chunked_vec_set.rs | 3 +- .../src/codec/data_type/chunked_vec_sized.rs | 9 +- .../src/codec/data_type/logical_rows.rs | 3 +- .../src/codec/data_type/mod.rs | 23 +- .../src/codec/data_type/scalar.rs | 17 +- .../src/codec/data_type/vector.rs | 28 +- .../tidb_query_datatype/src/codec/datum.rs | 15 +- .../src/codec/datum_codec.rs | 5 +- .../src/codec/mysql/binary_literal.rs | 7 +- .../src/codec/mysql/charset.rs | 3 +- .../src/codec/mysql/decimal.rs | 63 ++- .../src/codec/mysql/duration.rs | 20 +- .../src/codec/mysql/json/comparison.rs | 4 +- .../src/codec/mysql/json/json_extract.rs | 7 +- .../src/codec/mysql/json/json_keys.rs | 3 +- .../src/codec/mysql/json/json_merge.rs | 3 +- .../src/codec/mysql/json/mod.rs | 13 +- .../src/codec/mysql/json/modifier.rs | 10 +- .../src/codec/mysql/json/path_expr.rs | 8 +- .../src/codec/mysql/json/serde.rs | 3 +- .../src/codec/mysql/time/extension.rs | 15 +- .../src/codec/mysql/time/mod.rs | 43 +- .../src/codec/mysql/time/tz.rs | 8 +- .../src/codec/row/v2/compat_v1.rs | 14 +- .../src/codec/row/v2/encoder_for_test.rs | 10 +- .../src/codec/row/v2/mod.rs | 3 +- .../src/codec/row/v2/row_slice.rs | 12 +- .../tidb_query_datatype/src/codec/table.rs | 12 +- .../tidb_query_datatype/src/def/eval_type.rs | 16 +- .../tidb_query_datatype/src/def/field_type.rs | 28 +- .../tidb_query_datatype/src/expr/ctx.rs | 32 +- .../src/fast_hash_aggr_executor.rs | 44 +- .../src/index_scan_executor.rs | 219 ++++---- .../tidb_query_executors/src/interface.rs | 91 ++-- components/tidb_query_executors/src/lib.rs | 11 +- .../src/projection_executor.rs | 13 +- components/tidb_query_executors/src/runner.rs | 43 +- .../src/selection_executor.rs | 43 +- .../src/simple_aggr_executor.rs | 37 +- .../src/slow_hash_aggr_executor.rs | 70 +-- .../src/stream_aggr_executor.rs | 25 +- .../src/table_scan_executor.rs | 99 ++-- .../src/top_n_executor.rs | 65 ++- .../src/util/aggr_executor.rs | 75 +-- .../src/util/hash_aggr_helper.rs | 5 +- .../src/util/mock_executor.rs | 4 +- .../tidb_query_executors/src/util/mod.rs | 4 +- .../src/util/scan_executor.rs | 28 +- components/tidb_query_expr/src/impl_cast.rs | 75 +-- .../tidb_query_expr/src/impl_compare_in.rs | 4 +- .../tidb_query_expr/src/impl_encryption.rs | 16 +- components/tidb_query_expr/src/impl_json.rs | 3 +- components/tidb_query_expr/src/impl_math.rs | 16 +- components/tidb_query_expr/src/impl_op.rs | 3 +- components/tidb_query_expr/src/impl_string.rs | 18 +- components/tidb_query_expr/src/impl_time.rs | 54 +- components/tidb_query_expr/src/lib.rs | 17 +- components/tidb_query_expr/src/types/expr.rs | 6 +- .../tidb_query_expr/src/types/expr_builder.rs | 28 +- .../tidb_query_expr/src/types/expr_eval.rs | 51 +- .../tidb_query_expr/src/types/function.rs | 64 ++- .../tidb_query_expr/src/types/test_util.rs | 27 +- components/tikv_alloc/src/error.rs | 3 +- components/tikv_alloc/src/lib.rs | 8 +- components/tikv_alloc/src/trace.rs | 26 +- components/tikv_kv/src/btree_engine.rs | 17 +- components/tikv_kv/src/cursor.rs | 21 +- components/tikv_kv/src/lib.rs | 46 +- components/tikv_kv/src/mock_engine.rs | 3 +- components/tikv_kv/src/rocksdb_engine.rs | 5 +- components/tikv_util/src/buffer_vec.rs | 27 +- components/tikv_util/src/callback.rs | 4 +- components/tikv_util/src/codec/bytes.rs | 38 +- components/tikv_util/src/codec/number.rs | 18 +- components/tikv_util/src/config.rs | 47 +- components/tikv_util/src/deadline.rs | 3 +- components/tikv_util/src/future.rs | 12 +- components/tikv_util/src/lib.rs | 9 +- components/tikv_util/src/log.rs | 11 +- components/tikv_util/src/logger/file_log.rs | 11 +- components/tikv_util/src/logger/formatter.rs | 18 +- components/tikv_util/src/logger/mod.rs | 20 +- components/tikv_util/src/macros.rs | 21 +- components/tikv_util/src/memory.rs | 3 +- .../tikv_util/src/metrics/process_linux.rs | 4 +- .../tikv_util/src/metrics/threads_dummy.rs | 8 +- .../tikv_util/src/metrics/threads_linux.rs | 6 +- components/tikv_util/src/mpsc/batch.rs | 17 +- components/tikv_util/src/mpsc/mod.rs | 13 +- components/tikv_util/src/stream.rs | 7 +- components/tikv_util/src/sys/cgroup.rs | 26 +- components/tikv_util/src/sys/inspector.rs | 4 +- components/tikv_util/src/sys/mod.rs | 7 +- components/tikv_util/src/sys/thread.rs | 13 +- components/tikv_util/src/time.rs | 7 +- components/tikv_util/src/timer.rs | 16 +- components/tikv_util/src/topn.rs | 3 +- components/tikv_util/src/worker/mod.rs | 23 +- components/tikv_util/src/worker/pool.rs | 3 +- .../tikv_util/src/yatp_pool/future_pool.rs | 14 +- .../tipb_helper/src/expr_def_builder.rs | 3 +- components/tracker/src/lib.rs | 4 +- components/txn_types/src/lock.rs | 18 +- components/txn_types/src/timestamp.rs | 23 +- components/txn_types/src/types.rs | 51 +- components/txn_types/src/write.rs | 128 ++--- fuzz/cli.rs | 5 +- fuzz/targets/mod.rs | 3 +- rustfmt.toml | 7 + src/config.rs | 141 +++--- src/coprocessor/dag/storage_impl.rs | 3 +- src/coprocessor/endpoint.rs | 81 +-- .../interceptors/concurrency_limiter.rs | 8 +- src/coprocessor/interceptors/deadline.rs | 4 +- src/coprocessor/mod.rs | 28 +- src/coprocessor/statistics/analyze.rs | 53 +- src/coprocessor/statistics/cmsketch.rs | 4 +- src/coprocessor/tracker.rs | 20 +- src/coprocessor_v2/endpoint.rs | 5 +- src/coprocessor_v2/mod.rs | 28 +- src/coprocessor_v2/plugin_registry.rs | 91 ++-- src/coprocessor_v2/raw_storage_impl.rs | 13 +- src/import/duplicate_detect.rs | 16 +- src/import/sst_service.rs | 14 +- src/server/config.rs | 11 +- src/server/debug.rs | 40 +- src/server/engine_factory.rs | 11 +- src/server/engine_factory_v2.rs | 4 +- .../gc_worker/applied_lock_collector.rs | 83 +-- src/server/gc_worker/compaction_filter.rs | 52 +- src/server/gc_worker/config.rs | 5 +- src/server/gc_worker/gc_manager.rs | 163 +++--- src/server/gc_worker/gc_worker.rs | 76 +-- .../gc_worker/rawkv_compaction_filter.rs | 27 +- src/server/load_statistics/linux.rs | 21 +- src/server/load_statistics/mod.rs | 3 +- src/server/lock_manager/client.rs | 3 +- src/server/lock_manager/config.rs | 6 +- src/server/lock_manager/deadlock.rs | 97 ++-- src/server/lock_manager/mod.rs | 19 +- src/server/lock_manager/waiter_manager.rs | 23 +- src/server/node.rs | 17 +- src/server/raft_client.rs | 32 +- src/server/raftkv.rs | 6 +- src/server/reset_to_version.rs | 11 +- src/server/server.rs | 3 +- src/server/service/debug.rs | 3 +- src/server/service/diagnostics/log.rs | 7 +- src/server/service/diagnostics/sys.rs | 2 +- src/server/service/kv.rs | 29 +- src/server/snap.rs | 3 +- src/server/status_server/mod.rs | 11 +- src/server/status_server/profile.rs | 6 +- src/storage/config.rs | 8 +- src/storage/errors.rs | 18 +- src/storage/kv/test_engine_builder.rs | 4 +- src/storage/lock_manager.rs | 25 +- src/storage/mod.rs | 251 +++++---- src/storage/mvcc/consistency_check.rs | 5 +- src/storage/mvcc/reader/mod.rs | 22 +- src/storage/mvcc/reader/point_getter.rs | 60 ++- src/storage/mvcc/reader/reader.rs | 126 +++-- src/storage/mvcc/reader/scanner/backward.rs | 97 ++-- src/storage/mvcc/reader/scanner/forward.rs | 107 ++-- src/storage/mvcc/reader/scanner/mod.rs | 105 ++-- src/storage/mvcc/txn.rs | 70 +-- src/storage/raw/raw_mvcc.rs | 11 +- src/storage/raw/store.rs | 18 +- src/storage/read_pool.rs | 6 +- .../txn/actions/acquire_pessimistic_lock.rs | 86 ++-- .../txn/actions/check_data_constraint.rs | 10 +- src/storage/txn/actions/check_txn_status.rs | 32 +- src/storage/txn/actions/cleanup.rs | 19 +- src/storage/txn/actions/commit.rs | 13 +- src/storage/txn/actions/mod.rs | 5 +- src/storage/txn/actions/prewrite.rs | 118 +++-- .../txn/commands/acquire_pessimistic_lock.rs | 5 +- .../txn/commands/check_secondary_locks.rs | 14 +- src/storage/txn/commands/check_txn_status.rs | 86 ++-- src/storage/txn/commands/cleanup.rs | 4 +- src/storage/txn/commands/compare_and_swap.rs | 7 +- src/storage/txn/commands/macros.rs | 14 +- src/storage/txn/commands/mod.rs | 58 ++- .../txn/commands/pessimistic_rollback.rs | 3 +- src/storage/txn/commands/prewrite.rs | 84 +-- src/storage/txn/commands/resolve_lock.rs | 11 +- src/storage/txn/commands/resolve_lock_lite.rs | 4 +- src/storage/txn/commands/rollback.rs | 4 +- src/storage/txn/commands/txn_heart_beat.rs | 3 +- src/storage/txn/flow_controller/mod.rs | 4 +- .../singleton_flow_controller.rs | 50 +- .../flow_controller/tablet_flow_controller.rs | 4 +- src/storage/txn/latch.rs | 56 +- src/storage/txn/sched_pool.rs | 3 +- src/storage/txn/scheduler.rs | 145 +++--- src/storage/txn/store.rs | 18 +- src/storage/types.rs | 10 +- .../coprocessor_executors/hash_aggr/mod.rs | 12 +- .../coprocessor_executors/hash_aggr/util.rs | 4 +- .../index_scan/fixture.rs | 4 +- .../coprocessor_executors/index_scan/mod.rs | 12 +- .../coprocessor_executors/integrated/mod.rs | 32 +- .../coprocessor_executors/selection/util.rs | 3 +- .../coprocessor_executors/simple_aggr/util.rs | 4 +- .../coprocessor_executors/stream_aggr/mod.rs | 8 +- .../coprocessor_executors/stream_aggr/util.rs | 4 +- .../table_scan/fixture.rs | 10 +- .../coprocessor_executors/table_scan/mod.rs | 30 +- .../coprocessor_executors/util/fixture.rs | 44 +- .../benches/coprocessor_executors/util/mod.rs | 4 +- .../coprocessor_executors/util/store.rs | 3 +- tests/benches/hierarchy/engine/mod.rs | 2 +- tests/benches/misc/storage/incremental_get.rs | 4 +- tests/failpoints/cases/test_async_fetch.rs | 7 +- .../cases/test_cmd_epoch_checker.rs | 11 +- tests/failpoints/cases/test_conf_change.rs | 3 +- tests/failpoints/cases/test_coprocessor.rs | 9 +- tests/failpoints/cases/test_disk_full.rs | 19 +- tests/failpoints/cases/test_early_apply.rs | 16 +- tests/failpoints/cases/test_encryption.rs | 7 +- tests/failpoints/cases/test_gc_worker.rs | 59 ++- tests/failpoints/cases/test_hibernate.rs | 14 +- tests/failpoints/cases/test_import_service.rs | 19 +- tests/failpoints/cases/test_kv_service.rs | 10 +- .../cases/test_memory_usage_limit.rs | 3 +- tests/failpoints/cases/test_merge.rs | 151 +++--- tests/failpoints/cases/test_pending_peers.rs | 4 +- tests/failpoints/cases/test_rawkv.rs | 6 +- tests/failpoints/cases/test_replica_read.rs | 77 +-- .../cases/test_replica_stale_read.rs | 95 ++-- tests/failpoints/cases/test_server.rs | 8 +- tests/failpoints/cases/test_snap.rs | 46 +- tests/failpoints/cases/test_split_region.rs | 61 ++- tests/failpoints/cases/test_sst_recovery.rs | 3 +- tests/failpoints/cases/test_stale_peer.rs | 18 +- tests/failpoints/cases/test_stale_read.rs | 14 +- tests/failpoints/cases/test_storage.rs | 17 +- tests/failpoints/cases/test_transaction.rs | 28 +- .../failpoints/cases/test_transfer_leader.rs | 7 +- .../failpoints/cases/test_unsafe_recovery.rs | 11 +- tests/integrations/backup/mod.rs | 14 +- .../integrations/config/test_config_client.rs | 3 +- tests/integrations/coprocessor/test_select.rs | 36 +- tests/integrations/pd/test_rpc_client.rs | 10 +- .../integrations/raftstore/test_bootstrap.rs | 14 +- .../raftstore/test_compact_lock_cf.rs | 6 +- .../raftstore/test_compact_log.rs | 12 +- .../raftstore/test_conf_change.rs | 16 +- .../raftstore/test_early_apply.rs | 11 +- .../integrations/raftstore/test_hibernate.rs | 23 +- .../integrations/raftstore/test_lease_read.rs | 79 +-- tests/integrations/raftstore/test_merge.rs | 124 +++-- tests/integrations/raftstore/test_multi.rs | 34 +- tests/integrations/raftstore/test_prevote.rs | 28 +- .../raftstore/test_region_change_observer.rs | 6 +- .../raftstore/test_region_heartbeat.rs | 6 +- .../raftstore/test_region_info_accessor.rs | 3 +- .../raftstore/test_replica_read.rs | 35 +- .../raftstore/test_replication_mode.rs | 19 +- tests/integrations/raftstore/test_snap.rs | 29 +- .../raftstore/test_split_region.rs | 33 +- .../integrations/raftstore/test_stale_peer.rs | 51 +- .../integrations/raftstore/test_tombstone.rs | 6 +- .../raftstore/test_transfer_leader.rs | 3 +- .../raftstore/test_unsafe_recovery.rs | 65 ++- tests/integrations/server/gc_worker.rs | 7 +- tests/integrations/server/kv_service.rs | 13 +- tests/integrations/server/lock_manager.rs | 23 +- tests/integrations/server/raft_client.rs | 18 +- tests/integrations/storage/test_storage.rs | 9 +- 502 files changed, 7245 insertions(+), 5434 deletions(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 74cc69034fc..7f459a4c127 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -373,7 +373,8 @@ pub enum Cmd { /// Skip write RocksDB read_only: bool, }, - /// Unsafely recover when the store can not start normally, this recover may lose data + /// Unsafely recover when the store can not start normally, this recover may + /// lose data UnsafeRecover { #[structopt(subcommand)] cmd: UnsafeRecoverCmd, @@ -404,7 +405,9 @@ pub enum Cmd { default_value = crate::executor::METRICS_PROMETHEUS, possible_values = &["prometheus", "jemalloc", "rocksdb_raft", "rocksdb_kv"], )] - /// Set the metrics tag, one of prometheus/jemalloc/rocksdb_raft/rocksdb_kv, if not specified, print prometheus + /// Set the metrics tag + /// Options: prometheus/jemalloc/rocksdb_raft/rocksdb_kv + /// If not specified, print prometheus tag: Vec, }, /// Force a consistency-check for a specified region @@ -415,10 +418,13 @@ pub enum Cmd { }, /// Get all regions with corrupt raft BadRegions {}, - /// Modify tikv config, eg. tikv-ctl --host ip:port modify-tikv-config -n rocksdb.defaultcf.disable-auto-compactions -v true + /// Modify tikv config. + /// Eg. tikv-ctl --host ip:port modify-tikv-config -n + /// rocksdb.defaultcf.disable-auto-compactions -v true ModifyTikvConfig { #[structopt(short = "n")] - /// The config name are same as the name used on config file, eg. raftstore.messages-per-tick, raftdb.max-background-jobs + /// The config name are same as the name used on config file. + /// eg. raftstore.messages-per-tick, raftdb.max-background-jobs config_name: String, #[structopt(short = "v")] @@ -431,7 +437,8 @@ pub enum Cmd { /// Output meta file path file: String, }, - /// Compact the whole cluster in a specified range in one or more column families + /// Compact the whole cluster in a specified range in one or more column + /// families CompactCluster { #[structopt( short = "d", @@ -449,7 +456,8 @@ pub enum Cmd { default_value = CF_DEFAULT, possible_values = &["default", "lock", "write"], )] - /// Column family names, for kv db, combine from default/lock/write; for raft db, can only be default + /// Column family names, for kv db, combine from default/lock/write; for + /// raft db, can only be default cf: Vec, #[structopt( @@ -529,12 +537,14 @@ pub enum Cmd { #[structopt(subcommand)] cmd: EncryptionMetaCmd, }, - /// Delete encryption keys that are no longer associated with physical files. + /// Delete encryption keys that are no longer associated with physical + /// files. CleanupEncryptionMeta {}, /// Print bad ssts related infos BadSsts { #[structopt(long)] - /// specify manifest, if not set, it will look up manifest file in db path + /// specify manifest, if not set, it will look up manifest file in db + /// path manifest: Option, #[structopt(long, value_delimiter = ",")] @@ -604,7 +614,8 @@ pub enum RaftCmd { pub enum FailCmd { /// Inject failures Inject { - /// Inject fail point and actions pairs. E.g. tikv-ctl fail inject a=off b=panic + /// Inject fail point and actions pairs. + /// E.g. tikv-ctl fail inject a=off b=panic args: Vec, #[structopt(short = "f")] diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 19977924e69..62ce325a130 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -64,7 +64,7 @@ pub fn new_debug_executor( let cache = cfg.storage.block_cache.build_shared_cache(); let shared_block_cache = cache.is_some(); let env = cfg - .build_shared_rocks_env(key_manager.clone(), None /*io_rate_limiter*/) + .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); let mut kv_db_opts = cfg.rocksdb.build_opt(); @@ -105,7 +105,7 @@ pub fn new_debug_executor( error!("raft engine not exists: {}", config.dir); tikv_util::logger::exit_process_gracefully(-1); } - let raft_db = RaftLogEngine::new(config, key_manager, None /*io_rate_limiter*/).unwrap(); + let raft_db = RaftLogEngine::new(config, key_manager, None /* io_rate_limiter */).unwrap(); let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); Box::new(debugger) as Box } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 67834db9c5d..00094af8dc6 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -675,7 +675,7 @@ fn run_ldb_command(args: Vec, cfg: &TiKvConfig) { let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .unwrap() .map(Arc::new); - let env = get_env(key_manager, None /*io_rate_limiter*/).unwrap(); + let env = get_env(key_manager, None /* io_rate_limiter */).unwrap(); let mut opts = cfg.rocksdb.build_opt(); opts.set_env(env); @@ -735,7 +735,9 @@ fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, for line in corruptions.lines() { println!("--------------------------------------------------------"); // The corruption format may like this: + // ```text // /path/to/db/057155.sst is corrupted: Corruption: block checksum mismatch: expected 3754995957, got 708533950 in /path/to/db/057155.sst offset 3126049 size 22724 + // ``` println!("corruption info:\n{}", line); let r = Regex::new(r"/\w*\.sst").unwrap(); @@ -795,8 +797,10 @@ fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, println!("\nsst meta:"); // The output may like this: + // ```text // --------------- Column family "write" (ID 2) -------------- // 63:132906243[3555338 .. 3555338]['7A311B40EFCC2CB4C5911ECF3937D728DED26AE53FA5E61BE04F23F2BE54EACC73' seq:3555338, type:1 .. '7A313030302E25CD5F57252E' seq:3555338, type:1] at level 0 + // ``` let column_r = Regex::new(r"--------------- (.*) --------------\n(.*)").unwrap(); if let Some(m) = column_r.captures(&output) { println!( @@ -848,7 +852,8 @@ fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, println!("unexpected key {}", log_wrappers::Value(&start)); } } else { - // it is expected when the sst is output of a compaction and the sst isn't added to manifest yet. + // it is expected when the sst is output of a compaction and the sst isn't added + // to manifest yet. println!( "sst {} is not found in manifest: {}", sst_file_number, output diff --git a/components/api_version/src/api_v1.rs b/components/api_version/src/api_v1.rs index 5b980ea75f1..1530124d245 100644 --- a/components/api_version/src/api_v1.rs +++ b/components/api_version/src/api_v1.rs @@ -45,7 +45,7 @@ impl KvFormat for ApiV1 { ) -> Result { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => Ok(Key::from_encoded_slice(key)), - ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1")), // reject apiv2 -> apiv1 conversion + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1")), /* reject apiv2 -> apiv1 conversion */ } } @@ -56,7 +56,7 @@ impl KvFormat for ApiV1 { ) -> Result<(Vec, Vec)> { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => Ok((start_key, end_key)), - ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1")), // reject apiv2 -> apiv1 conversion + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1")), /* reject apiv2 -> apiv1 conversion */ } } } diff --git a/components/api_version/src/api_v1ttl.rs b/components/api_version/src/api_v1ttl.rs index 65c7f569aa6..2a2df6bfb33 100644 --- a/components/api_version/src/api_v1ttl.rs +++ b/components/api_version/src/api_v1ttl.rs @@ -70,7 +70,7 @@ impl KvFormat for ApiV1Ttl { ) -> Result { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => Ok(Key::from_encoded_slice(key)), - ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1ttl")), // reject apiv2 -> apiv1ttl conversion + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1ttl")), /* reject apiv2 -> apiv1ttl conversion */ } } @@ -81,7 +81,7 @@ impl KvFormat for ApiV1Ttl { ) -> Result<(Vec, Vec)> { match src_api { ApiVersion::V1 | ApiVersion::V1ttl => Ok((start_key, end_key)), - ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1ttl")), // reject apiv2 -> apiv1ttl conversion + ApiVersion::V2 => Err(box_err!("unsupported conversion from v2 to v1ttl")), /* reject apiv2 -> apiv1ttl conversion */ } } } diff --git a/components/api_version/src/api_v2.rs b/components/api_version/src/api_v2.rs index a8a177596ad..712804b3b3a 100644 --- a/components/api_version/src/api_v2.rs +++ b/components/api_version/src/api_v2.rs @@ -143,8 +143,8 @@ impl KvFormat for ApiV2 { } // Note: `user_key` may not be `KeyMode::Raw`. - // E.g., `raw_xxx_range` interfaces accept an exclusive end key just beyond the scope of raw keys. - // The validity is ensured by client & Storage interfaces. + // E.g. `raw_xxx_range` interfaces accept an exclusive end key just beyond the + // scope of raw keys. The validity is ensured by client & Storage interfaces. fn encode_raw_key(user_key: &[u8], ts: Option) -> Key { let encoded_key = Key::from_raw(user_key); if let Some(ts) = ts { @@ -156,13 +156,14 @@ impl KvFormat for ApiV2 { } // Note: `user_key` may not be `KeyMode::Raw`. - // E.g., `raw_xxx_range` interfaces accept an exclusive end key just beyond the scope of raw keys. - // The validity is ensured by client & Storage interfaces. + // E.g. `raw_xxx_range` interfaces accept an exclusive end key just beyond the + // scope of raw keys. The validity is ensured by client & Storage interfaces. fn encode_raw_key_owned(mut user_key: Vec, ts: Option) -> Key { let src_len = user_key.len(); let encoded_len = MemComparableByteCodec::encoded_len(src_len); - // always reserve more U64_SIZE for ts, as it's likely to "append_ts" later, especially in raw write procedures. + // always reserve more U64_SIZE for ts, as it's likely to "append_ts" later, + // especially in raw write procedures. user_key.reserve(encoded_len - src_len + number::U64_SIZE); user_key.resize(encoded_len, 0u8); MemComparableByteCodec::encode_all_in_place(&mut user_key, src_len); @@ -248,8 +249,8 @@ impl ApiV2 { } // Note: `encoded_bytes` may not be `KeyMode::Raw`. -// E.g., backup service accept an exclusive end key just beyond the scope of raw keys. -// The validity is ensured by client & Storage interfaces. +// E.g., backup service accept an exclusive end key just beyond the scope of raw +// keys. The validity is ensured by client & Storage interfaces. #[inline] fn is_valid_encoded_bytes(mut encoded_bytes: &[u8], with_ts: bool) -> bool { bytes::decode_bytes(&mut encoded_bytes, false).is_ok() @@ -261,8 +262,8 @@ fn is_valid_encoded_key(encoded_key: &Key, with_ts: bool) -> bool { is_valid_encoded_bytes(encoded_key.as_encoded(), with_ts) } -/// TimeStamp::zero is not acceptable, as such entries can not be retrieved by RawKV MVCC. -/// See `RawMvccSnapshot::seek_first_key_value_cf`. +/// TimeStamp::zero is not acceptable, as such entries can not be retrieved by +/// RawKV MVCC. See `RawMvccSnapshot::seek_first_key_value_cf`. #[inline] fn is_valid_ts(ts: TimeStamp) -> bool { !ts.is_zero() diff --git a/components/api_version/src/lib.rs b/components/api_version/src/lib.rs index 0dbdc833b86..60f23455cc7 100644 --- a/components/api_version/src/lib.rs +++ b/components/api_version/src/lib.rs @@ -18,13 +18,15 @@ pub trait KvFormat: Clone + Copy + 'static + Send + Sync { const CLIENT_TAG: ApiVersion; const IS_TTL_ENABLED: bool; - /// Parse the key prefix and infer key mode. It's safe to parse either raw key or encoded key. + /// Parse the key prefix and infer key mode. It's safe to parse either raw + /// key or encoded key. fn parse_key_mode(key: &[u8]) -> KeyMode; fn parse_range_mode(range: (Option<&[u8]>, Option<&[u8]>)) -> KeyMode; /// Parse from the bytes from storage. fn decode_raw_value(bytes: &[u8]) -> Result>; - /// This is equivalent to `decode_raw_value()` but returns the owned user value. + /// This is equivalent to `decode_raw_value()` but returns the owned user + /// value. fn decode_raw_value_owned(mut bytes: Vec) -> Result>> { let (len, expire_ts, is_delete) = { let raw_value = Self::decode_raw_value(&bytes)?; @@ -47,8 +49,8 @@ pub trait KvFormat: Clone + Copy + 'static + Send + Sync { /// This is equivalent to `encode_raw_value` but reduced an allocation. fn encode_raw_value_owned(value: RawValue>) -> Vec; - /// Parse from the txn_types::Key from storage. Default implementation for API V1|V1TTL. - /// Return: (user key, optional timestamp) + /// Parse from the txn_types::Key from storage. Default implementation for + /// API V1|V1TTL. Return: (user key, optional timestamp) fn decode_raw_key(encoded_key: &Key, _with_ts: bool) -> Result<(Vec, Option)> { Ok((encoded_key.as_encoded().clone(), None)) } @@ -59,7 +61,8 @@ pub trait KvFormat: Clone + Copy + 'static + Send + Sync { ) -> Result<(Vec, Option)> { Ok((encoded_key.into_encoded(), None)) } - /// Encode the user key & optional timestamp into txn_types::Key. Default implementation for API V1|V1TTL. + /// Encode the user key & optional timestamp into txn_types::Key. Default + /// implementation for API V1|V1TTL. fn encode_raw_key(user_key: &[u8], _ts: Option) -> Key { Key::from_encoded_slice(user_key) } @@ -138,7 +141,8 @@ macro_rules! match_template_api_version { }} } -/// Dispatch an expression with type `kvproto::kvrpcpb::ApiVersion` to corresponding concrete type of `KvFormat` +/// Dispatch an expression with type `kvproto::kvrpcpb::ApiVersion` to +/// corresponding concrete type of `KvFormat` /// /// For example, the following code /// @@ -197,8 +201,8 @@ pub enum KeyMode { /// /// ### ApiVersion::V1ttl /// -/// 8 bytes representing the unix timestamp in seconds for expiring time will be append -/// to the value of all RawKV kv pairs. +/// 8 bytes representing the unix timestamp in seconds for expiring time will be +/// append to the value of all RawKV kv pairs. /// /// ```text /// ------------------------------------------------------------ @@ -221,8 +225,8 @@ pub enum KeyMode { /// ``` /// /// As shown in the example below, the least significant bit of the meta flag -/// indicates whether the value contains 8 bytes expire ts at the very left to the -/// meta flags. +/// indicates whether the value contains 8 bytes expire ts at the very left to +/// the meta flags. /// /// ```text /// -------------------------------------------------------------------------------- @@ -235,7 +239,8 @@ pub enum KeyMode { pub struct RawValue> { /// The user value. pub user_value: T, - /// The unix timestamp in seconds indicating the point of time that this key will be deleted. + /// The unix timestamp in seconds indicating the point of time that this key + /// will be deleted. pub expire_ts: Option, /// Logical deletion flag in ApiV2, should be `false` in ApiV1 and ApiV1Ttl pub is_delete: bool, diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 96e330f956d..7dae680fa05 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -20,8 +20,8 @@ use crate::{ /// A manager for maintaining the last flush ts. /// This information is provided for the `advancer` in checkpoint V3, -/// which involved a central node (typically TiDB) for collecting all regions' checkpoint -/// then advancing the global checkpoint. +/// which involved a central node (typically TiDB) for collecting all regions' +/// checkpoint then advancing the global checkpoint. #[derive(Debug, Default)] pub struct CheckpointManager { items: HashMap, @@ -165,11 +165,13 @@ pub struct LastFlushTsOfRegion { pub trait FlushObserver: Send + 'static { /// The callback when the flush has advanced the resolver. async fn before(&mut self, checkpoints: Vec<(Region, TimeStamp)>); - /// The callback when the flush is done. (Files are fully written to external storage.) + /// The callback when the flush is done. (Files are fully written to + /// external storage.) async fn after(&mut self, task: &str, rts: u64) -> Result<()>; /// The optional callback to rewrite the resolved ts of this flush. - /// Because the default method (collect all leader resolved ts in the store, and use the minimal TS.) - /// may lead to resolved ts rolling back, if we desire a stronger consistency, we can rewrite a safer resolved ts here. + /// Because the default method (collect all leader resolved ts in the store, + /// and use the minimal TS.) may lead to resolved ts rolling back, if we + /// desire a stronger consistency, we can rewrite a safer resolved ts here. /// Note the new resolved ts cannot be greater than the old resolved ts. async fn rewrite_resolved_ts( &mut self, @@ -282,7 +284,8 @@ where } } // Optionally upload the region checkpoint. - // Unless in some extreme condition, skipping upload the region checkpoint won't lead to data loss. + // Unless in some extreme condition, skipping upload the region checkpoint won't + // lead to data loss. if let Err(err) = self .meta_cli .upload_region_checkpoint(task, &self.checkpoints) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 2defb88b541..ff1e2a4e66c 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -63,7 +63,8 @@ use crate::{ }; const SLOW_EVENT_THRESHOLD: f64 = 120.0; -/// CHECKPOINT_SAFEPOINT_TTL_IF_ERROR specifies the safe point TTL(24 hour) if task has fatal error. +/// CHECKPOINT_SAFEPOINT_TTL_IF_ERROR specifies the safe point TTL(24 hour) if +/// task has fatal error. const CHECKPOINT_SAFEPOINT_TTL_IF_ERROR: u64 = 24; pub struct Endpoint { @@ -255,7 +256,8 @@ where async fn starts_flush_ticks(router: Router) { loop { // check every 5s. - // TODO: maybe use global timer handle in the `tikv_utils::timer` (instead of enabling timing in the current runtime)? + // TODO: maybe use global timer handle in the `tikv_utils::timer` (instead of + // enabling timing in the current runtime)? tokio::time::sleep(Duration::from_secs(5)).await; debug!("backup stream trigger flush tick"); router.tick().await; @@ -415,7 +417,8 @@ where } } - /// Convert a batch of events to the cmd batch, and update the resolver status. + /// Convert a batch of events to the cmd batch, and update the resolver + /// status. fn record_batch(subs: SubscriptionTracer, batch: CmdBatch) -> Option { let region_id = batch.region_id; let mut resolver = match subs.get_subscription_of(region_id) { @@ -425,7 +428,9 @@ where return None; } }; - // Stale data is accpetable, while stale locks may block the checkpoint advancing. + // Stale data is acceptable, while stale locks may block the checkpoint + // advancing. + // ```text // Let L be the instant some key locked, U be the instant it unlocked, // +---------*-------L-----------U--*-------------+ // ^ ^----(1)----^ ^ We get the snapshot for initial scanning at here. @@ -434,6 +439,7 @@ where // ...note that (1) is the last cmd batch of first observing, so the unlock event would never be sent to us. // ...then the lock would get an eternal life in the resolver :| // (Before we refreshing the resolver for this region again) + // ``` if batch.pitr_id != resolver.value().handle.id { debug!("stale command"; "region_id" => %region_id, "now" => ?resolver.value().handle.id, "remote" => ?batch.pitr_id); return None; @@ -529,8 +535,10 @@ where "end_key" => utils::redact(&end_key), ); } - // Assuming the `region info provider` would read region info form `StoreMeta` directly and this would be fast. - // If this gets slow, maybe make it async again. (Will that bring race conditions? say `Start` handled after `ResfreshResolver` of some region.) + // Assuming the `region info provider` would read region info form `StoreMeta` + // directly and this would be fast. If this gets slow, maybe make it async + // again. (Will that bring race conditions? say `Start` handled after + // `ResfreshResolver` of some region.) let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); match range_init_result { Ok(()) => { @@ -680,7 +688,8 @@ where ); } - /// unload a task from memory: this would stop observe the changes required by the task temporarily. + /// unload a task from memory: this would stop observe the changes required + /// by the task temporarily. fn unload_task(&self, task: &str) -> Option { let router = self.range_router.clone(); @@ -988,8 +997,9 @@ pub enum Task { /// FatalError pauses the task and set the error. FatalError(TaskSelector, Box), /// Run the callback when see this message. Only for test usage. - /// NOTE: Those messages for testing are not guared by `#[cfg(test)]` for now, because - /// the integration test would not enable test config when compiling (why?) + /// NOTE: Those messages for testing are not guarded by `#[cfg(test)]` for + /// now, because the integration test would not enable test config when + /// compiling (why?) Sync( // Run the closure if ... Box, @@ -998,8 +1008,9 @@ pub enum Task { ), /// Mark the store as a failover store. /// This would prevent store from updating its checkpoint ts for a while. - /// Because we are not sure whether the regions in the store have new leader -- - /// we keep a safe checkpoint so they can choose a safe `from_ts` for initial scanning. + /// Because we are not sure whether the regions in the store have new leader + /// -- we keep a safe checkpoint so they can choose a safe `from_ts` for + /// initial scanning. MarkFailover(Instant), /// Flush the task with name. Flush(String), @@ -1032,8 +1043,8 @@ pub enum ObserveOp { }, /// Destroy the region subscription. /// Unlike `Stop`, this will assume the region would never go back. - /// For now, the effect of "never go back" is that we won't try to hint other store - /// the checkpoint ts of this region. + /// For now, the effect of "never go back" is that we won't try to hint + /// other store the checkpoint ts of this region. Destroy { region: Region, }, diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 40e0ab5c60b..05b370e2985 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -110,7 +110,8 @@ impl EventLoader { }) } - /// Scan a batch of events from the snapshot, and save them into the internal buffer. + /// Scan a batch of events from the snapshot, and save them into the + /// internal buffer. fn fill_entries(&mut self) -> Result { assert!( self.entry_batch.is_empty(), @@ -278,11 +279,13 @@ where /// and return the current snapshot of that region. fn observe_over(&self, region: &Region, cmd: ChangeObserver) -> Result { // There are 2 ways for getting the initial snapshot of a region: - // 1. the BR method: use the interface in the RaftKv interface, read the key-values directly. - // 2. the CDC method: use the raftstore message `SignificantMsg::CaptureChange` to - // register the region to CDC observer and get a snapshot at the same time. - // Registering the observer to the raftstore is necessary because we should only listen events from leader. - // In CDC, the change observer is per-delegate(i.e. per-region), we can create the command per-region here too. + // - the BR method: use the interface in the RaftKv interface, read the + // key-values directly. + // - the CDC method: use the raftstore message `SignificantMsg::CaptureChange` + // to register the region to CDC observer and get a snapshot at the same time. + // Registering the observer to the raftstore is necessary because we should only + // listen events from leader. In CDC, the change observer is + // per-delegate(i.e. per-region), we can create the command per-region here too. let (callback, fut) = tikv_util::future::paired_future_callback::>(); @@ -351,7 +354,8 @@ where raftstore::store::util::compare_region_epoch( region.get_region_epoch(), &v.value().meta, - // No need for checking conf version because conf change won't cancel the observation. + // No need for checking conf version because conf change won't cancel the + // observation. false, true, false, @@ -359,8 +363,8 @@ where Ok(v) }) .map_err(|err| Error::Contextual { - // Both when we cannot find the region in the track and - // the epoch has changed means that we should cancel the current turn of initial scanning. + // Both when we cannot find the region in the track and the epoch has changed means + // that we should cancel the current turn of initial scanning. inner_error: Box::new(Error::ObserveCanceled( region_id, region.get_region_epoch().clone(), @@ -446,7 +450,8 @@ where Ok(stats) } - /// initialize a range: it simply scan the regions with leader role and send them to [`initialize_region`]. + /// initialize a range: it simply scan the regions with leader role and send + /// them to [`initialize_region`]. pub fn initialize_range(&self, start_key: Vec, end_key: Vec) -> Result<()> { let mut pager = RegionPager::scan_from(self.regions.clone(), start_key, end_key); loop { @@ -456,10 +461,11 @@ where break; } for r in regions { - // Note: Even we did the initial scanning, and blocking resolved ts from advancing, - // if the next_backup_ts was updated in some extreme condition, there is still little chance to lost data: - // For example, if a region cannot elect the leader for long time. (say, net work partition) - // At that time, we have nowhere to record the lock status of this region. + // Note: Even we did the initial scanning, and blocking resolved ts from + // advancing, if the next_backup_ts was updated in some extreme condition, there + // is still little chance to lost data: For example, if a region cannot elect + // the leader for long time. (say, net work partition) At that time, we have + // nowhere to record the lock status of this region. let success = try_send!( self.scheduler, Task::ModifyObserve(ObserveOp::Start { region: r.region }) diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index dc21f86b526..2732952930c 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -244,7 +244,8 @@ impl MetadataClient { } /// Initialize a task: execute some general operations over the keys. - /// For now, it sets the checkpoint ts if there isn't one for the current store. + /// For now, it sets the checkpoint ts if there isn't one for the current + /// store. pub async fn init_task(&self, task: &StreamBackupTaskInfo) -> Result<()> { let if_present = Condition::new( MetaKey::next_backup_ts_of(&task.name, self.store_id), @@ -263,7 +264,8 @@ impl MetadataClient { } /// Upload the last error information to the etcd. - /// This won't pause the task. Even this method would usually be paired with `pause`. + /// This won't pause the task. Even this method would usually be paired with + /// `pause`. pub async fn report_last_error(&self, name: &str, last_error: StreamBackupError) -> Result<()> { use protobuf::Message; let now = Instant::now(); @@ -376,7 +378,8 @@ impl MetadataClient { } /// watch event stream from the revision(exclusive). - /// the revision would usually come from a WithRevision struct(which indices the revision of the inner item). + /// the revision would usually come from a WithRevision struct(which indices + /// the revision of the inner item). pub async fn events_from(&self, revision: i64) -> Result> { let watcher = self .meta_store @@ -436,7 +439,8 @@ impl MetadataClient { Ok(()) } - /// Get the storage checkpoint from metadata. This function is justly used for test. + /// Get the storage checkpoint from metadata. This function is justly used + /// for test. pub async fn get_storage_checkpoint(&self, task_name: &str) -> Result { let now = Instant::now(); defer! { @@ -508,8 +512,8 @@ impl MetadataClient { }) } - /// Perform a two-phase bisection search algorithm for the intersection of all ranges - /// and the specificated range (usually region range.) + /// Perform a two-phase bisection search algorithm for the intersection of + /// all ranges and the specificated range (usually region range.) /// TODO: explain the algorithm? pub async fn range_overlap_of_task( &self, @@ -637,8 +641,8 @@ impl MetadataClient { } /// insert a task with ranges into the metadata store. - /// the current abstraction of metadata store doesn't support transaction API. - /// Hence this function is non-transactional and only for testing. + /// the current abstraction of metadata store doesn't support transaction + /// API. Hence this function is non-transactional and only for testing. pub async fn insert_task_with_range( &self, task: &StreamTask, diff --git a/components/backup-stream/src/metadata/keys.rs b/components/backup-stream/src/metadata/keys.rs index 4db978c2cb6..32962ec36b0 100644 --- a/components/backup-stream/src/metadata/keys.rs +++ b/components/backup-stream/src/metadata/keys.rs @@ -9,7 +9,8 @@ const PATH_STORAGE_CHECKPOINT: &str = "/storage-checkpoint"; const PATH_RANGES: &str = "/ranges"; const PATH_PAUSE: &str = "/pause"; const PATH_LAST_ERROR: &str = "/last-error"; -// Note: maybe use something like `const_fmt` for concatenating constant strings? +// Note: maybe use something like `const_fmt` for concatenating constant +// strings? const TASKS_PREFIX: &str = "/tidb/br-stream/info/"; /// A key that associates to some metadata. @@ -60,7 +61,8 @@ impl KeyValue { } /// Take the start-key and end-key from a metadata key-value pair. - /// example: `KeyValue(/ranges/, ) -> (, )` + /// example: `KeyValue(/ranges/, ) -> + /// (, )` pub fn take_range(&mut self, task_name: &str) -> (Vec, Vec) { let prefix_len = MetaKey::ranges_prefix_len(task_name); (self.take_key()[prefix_len..].to_vec(), self.take_value()) @@ -160,7 +162,8 @@ impl MetaKey { Self(format!("{}{}/{}/{}", PREFIX, PATH_LAST_ERROR, name, store).into_bytes()) } - /// return the key that keeps the range [self, self.next()) contains only `self`. + /// return the key that keeps the range [self, self.next()) contains only + /// `self`. pub fn next(&self) -> Self { let mut next = self.clone(); next.0.push(0); diff --git a/components/backup-stream/src/metadata/store/etcd.rs b/components/backup-stream/src/metadata/store/etcd.rs index 2b940c905cd..556661700f9 100644 --- a/components/backup-stream/src/metadata/store/etcd.rs +++ b/components/backup-stream/src/metadata/store/etcd.rs @@ -61,13 +61,14 @@ impl From for KvEventType { impl From for KeyValue { fn from(kv: etcd_client::KeyValue) -> Self { - // TODO: we can move out the vector in the KeyValue struct here. (instead of copying.) - // But that isn't possible for now because: + // TODO: we can move out the vector in the KeyValue struct here. (instead of + // copying.) But that isn't possible for now because: // - The raw KV pair(defined by the protocol buffer of etcd) is private. - // - That did could be exported by `pub-fields` feature of the client. - // However that feature isn't published in theirs Cargo.toml (Is that a mistake?). - // - Indeed, we can use `mem::transmute` here because `etcd_client::KeyValue` has `#[repr(transparent)]`. - // But before here become a known bottle neck, I'm not sure whether it's worthwhile for involving unsafe code. + // - That did could be exported by `pub-fields` feature of the client. However + // that feature isn't published in theirs Cargo.toml (Is that a mistake?). + // - Indeed, we can use `mem::transmute` here because `etcd_client::KeyValue` + // has `#[repr(transparent)]`. But before here become a known bottle neck, I'm + // not sure whether it's worthwhile for involving unsafe code. KeyValue(MetaKey(kv.key().to_owned()), kv.value().to_owned()) } } @@ -75,7 +76,7 @@ impl From for KeyValue { /// Prepare the etcd options required by the keys. /// Return the start key for requesting. macro_rules! prepare_opt { - ($opt: ident, $keys: expr) => { + ($opt:ident, $keys:expr) => { match $keys { Keys::Prefix(key) => { $opt = $opt.with_prefix(); @@ -203,7 +204,7 @@ impl EtcdStore { Compare::value(cond.over_key, op, cond.arg) } - /// Convert the transcation operations to etcd transcation ops. + /// Convert the transaction operations to etcd transaction ops. fn to_txn(ops: &mut [super::TransactionOp], leases: &HashMap) -> Vec { ops.iter_mut().map(|op| match op { TransactionOp::Put(key, opt) => { @@ -234,7 +235,8 @@ impl EtcdStore { /// Make a conditional txn. /// For now, this wouldn't split huge transaction into smaller ones, - /// so when playing with etcd in PD, conditional transaction should be small. + /// so when playing with etcd in PD, conditional transaction should be + /// small. async fn make_conditional_txn( cli: &mut Client, mut txn: super::CondTransaction, diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 7e1858b913e..97573ab756e 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -105,9 +105,10 @@ impl LazyEtcdClientInner { async fn connect(&self) -> Result { let store = retry(|| { // For now, the interface of the `etcd_client` doesn't us to control - // how to create channels when connecting, hence we cannot update the tls config at runtime. - // TODO: maybe add some method like `with_channel` for `etcd_client`, and adapt the `SecurityManager` API, - // instead of doing everything by own. + // how to create channels when connecting, hence we cannot update the tls config + // at runtime. + // TODO: maybe add some method like `with_channel` for `etcd_client`, and adapt + // the `SecurityManager` API, instead of doing everything by own. etcd_client::Client::connect(self.endpoints.clone(), Some(self.opt.clone())) }) .await diff --git a/components/backup-stream/src/metadata/store/mod.rs b/components/backup-stream/src/metadata/store/mod.rs index 0855582da59..e5d1f03e715 100644 --- a/components/backup-stream/src/metadata/store/mod.rs +++ b/components/backup-stream/src/metadata/store/mod.rs @@ -48,7 +48,8 @@ impl Condition { } /// A conditional transaction. -/// This would atomicly evalute the condition, and execute corresponding transaction. +/// This would atomically evaluate the condition, and execute corresponding +/// transaction. #[derive(Debug)] pub struct CondTransaction { cond: Condition, diff --git a/components/backup-stream/src/metadata/store/slash_etc.rs b/components/backup-stream/src/metadata/store/slash_etc.rs index 1a2f127501c..2ae4c05dfaf 100644 --- a/components/backup-stream/src/metadata/store/slash_etc.rs +++ b/components/backup-stream/src/metadata/store/slash_etc.rs @@ -83,7 +83,8 @@ impl Snapshot for WithRevision { } // use iterator operations (instead of collect all kv pairs in the range) - // if the test case get too slow. (How can we figure out whether there are more?) + // if the test case get too slow. (How can we figure out whether there are + // more?) let more = if extra.limit > 0 { let more = kvs.len() > extra.limit; kvs.truncate(extra.limit); diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index ad22b67e145..36c310d3532 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -19,13 +19,15 @@ use crate::{ }; /// The inflight `StartObserve` message count. -/// Currently, we handle the `StartObserve` message in the main loop(endpoint thread), which may -/// take longer time than expected. So when we are starting to observe many region (e.g. failover), -/// there may be many pending messages, those messages won't block the advancing of checkpoint ts. -/// So the checkpoint ts may be too late and losing some data. +/// Currently, we handle the `StartObserve` message in the main loop(endpoint +/// thread), which may take longer time than expected. So when we are starting +/// to observe many region (e.g. failover), there may be many pending messages, +/// those messages won't block the advancing of checkpoint ts. So the checkpoint +/// ts may be too late and losing some data. /// -/// This is a temporary solution for this problem: If this greater than (1), then it implies that there are some -/// inflight wait-for-initialized regions, we should block the resolved ts from advancing in that condition. +/// This is a temporary solution for this problem: If this greater than (1), +/// then it implies that there are some inflight wait-for-initialized regions, +/// we should block the resolved ts from advancing in that condition. /// /// FIXME: Move handler of `ModifyObserve` to another thread, and remove this :( pub static IN_FLIGHT_START_OBSERVE_MESSAGE: AtomicUsize = AtomicUsize::new(0); @@ -99,8 +101,8 @@ impl BackupStreamObserver { impl Coprocessor for BackupStreamObserver {} impl CmdObserver for BackupStreamObserver { - // `BackupStreamObserver::on_flush_applied_cmd_batch` should only invoke if `cmd_batches` is not empty - // and only leader will trigger this. + // `BackupStreamObserver::on_flush_applied_cmd_batch` should only invoke if + // `cmd_batches` is not empty and only leader will trigger this. fn on_flush_applied_cmd_batch( &self, max_level: ObserveLevel, diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index b236cefde77..05e49d232a9 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -58,9 +58,10 @@ use crate::{ const FLUSH_FAILURE_BECOME_FATAL_THRESHOLD: usize = 30; -/// FLUSH_LOG_CONCURRENT_BATCH_COUNT specifies the concurrent count to write to storage. -/// 'Log backup' will produce a large mount of small files during flush interval, -/// and storage could take mistaken if writing all of these files to storage concurrently. +/// FLUSH_LOG_CONCURRENT_BATCH_COUNT specifies the concurrent count to write to +/// storage. 'Log backup' will produce a large mount of small files during flush +/// interval, and storage could take mistaken if writing all of these files to +/// storage concurrently. const FLUSH_LOG_CONCURRENT_BATCH_COUNT: usize = 128; #[derive(Clone, Debug)] @@ -124,10 +125,11 @@ pub struct ApplyEvents { } impl ApplyEvents { - /// Convert a [CmdBatch] to a vector of events. Ignoring admin / error commands. - /// At the same time, advancing status of the `Resolver` by those keys. - /// Note: the resolved ts cannot be advanced if there is no command, - /// maybe we also need to update resolved_ts when flushing? + /// Convert a [CmdBatch] to a vector of events. Ignoring admin / error + /// commands. At the same time, advancing status of the `Resolver` by + /// those keys. + /// Note: the resolved ts cannot be advanced if there is no command, maybe + /// we also need to update resolved_ts when flushing? pub fn from_cmd_batch(cmd: CmdBatch, resolver: &mut TwoPhaseResolver) -> Self { let region_id = cmd.region_id; let mut result = vec![]; @@ -241,7 +243,8 @@ impl ApplyEvents { >::borrow(&item).clone(), ApplyEvents { events: { - // assuming the keys in the same region would probably be in one group. + // assuming the keys in the same region would probably be in one + // group. let mut v = Vec::with_capacity(event_len); v.push(event); v @@ -336,7 +339,8 @@ pub struct RouterInner { /// The temporary directory for all tasks. prefix: PathBuf, - /// The handle to Endpoint, we should send `Flush` to endpoint if there are too many temporary files. + /// The handle to Endpoint, we should send `Flush` to endpoint if there are + /// too many temporary files. scheduler: Scheduler, /// The size limit of temporary file per task. temp_file_size_limit: u64, @@ -371,8 +375,9 @@ impl RouterInner { } } - /// Find the task for a region. If `end_key` is empty, search from start_key to +inf. - /// It simply search for a random possible overlapping range and get its task. + /// Find the task for a region. If `end_key` is empty, search from start_key + /// to +inf. It simply search for a random possible overlapping range and + /// get its task. /// FIXME: If a region crosses many tasks, this can only find one of them. pub fn find_task_by_range(&self, start_key: &[u8], mut end_key: &[u8]) -> Option { let r = self.ranges.rl(); @@ -384,11 +389,13 @@ impl RouterInner { } /// Register some ranges associated to some task. - /// Because the observer interface yields encoded data key, the key should be ENCODED DATA KEY too. - /// (i.e. encoded by `Key::from_raw(key).into_encoded()`, [`utils::wrap_key`] could be a shortcut.). - /// We keep ranges in memory to filter kv events not in these ranges. + /// Because the observer interface yields encoded data key, the key should + /// be ENCODED DATA KEY too. (i.e. encoded by + /// `Key::from_raw(key).into_encoded()`, [`utils::wrap_key`] could be + /// a shortcut.). We keep ranges in memory to filter kv events not in + /// these ranges. fn register_ranges(&self, task_name: &str, ranges: Vec<(Vec, Vec)>) { - // TODO reigister ranges to filter kv event + // TODO register ranges to filter kv event // register ranges has two main purpose. // 1. filter kv event that no need to backup // 2. route kv event to the corresponding file. @@ -494,9 +501,9 @@ impl RouterInner { let task_info = self.get_task_info(&task).await?; task_info.on_events(events).await?; - // When this event make the size of temporary files exceeds the size limit, make a flush. - // Note that we only flush if the size is less than the limit before the event, - // or we may send multiplied flush requests. + // When this event make the size of temporary files exceeds the size limit, make + // a flush. Note that we only flush if the size is less than the limit before + // the event, or we may send multiplied flush requests. debug!( "backup stream statics size"; "task" => ?task, @@ -526,8 +533,8 @@ impl RouterInner { futures::future::join_all(tasks).await } - /// flush the specified task, once once success, return the min resolved ts of this flush. - /// returns `None` if failed. + /// flush the specified task, once once success, return the min resolved ts + /// of this flush. returns `None` if failed. pub async fn do_flush( &self, task_name: &str, @@ -586,7 +593,8 @@ impl RouterInner { error!("backup stream schedule task failed"; "error" => ?e); } - // if stream task need flush this time, schedule Task::Flush, or update time justly. + // if stream task need flush this time, schedule Task::Flush, or update time + // justly. if task_info.should_flush() && task_info.set_flushing_status_cas(false, true).is_ok() { info!( "backup stream trigger flush task by tick"; @@ -618,14 +626,16 @@ pub enum FormatType { } impl TempFileKey { - /// Create the key for an event. The key can be used to find which temporary file the event should be stored. + /// Create the key for an event. The key can be used to find which temporary + /// file the event should be stored. fn of(kv: &ApplyEvent, region_id: u64) -> Self { let table_id = if kv.is_meta() { // Force table id of meta key be zero. 0 } else { - // When we cannot extract the table key, use 0 for the table key(perhaps we insert meta key here.). - // Can we elide the copy here(or at least, take a slice of key instead of decoding the whole key)? + // When we cannot extract the table key, use 0 for the table key(perhaps we + // insert meta key here.). Can we elide the copy here(or at least, + // take a slice of key instead of decoding the whole key)? Key::from_encoded_slice(&kv.key) .into_raw() .ok() @@ -700,11 +710,14 @@ impl TempFileKey { } /// path_to_log_file specifies the path of record log. - /// eg. "v1/${date}/${hour}/${store_id}/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log" + /// ```text + /// v1/${date}/${hour}/${store_id}/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log + /// ``` fn path_to_log_file(&self, store_id: u64, min_ts: u64, max_ts: u64) -> String { format!( "v1/{}/{}/{}/t{:08}/{:012}-{}.log", - // We may delete a range of files, so using the max_ts for preventing remove some records wrong. + // We may delete a range of files, so using the max_ts for preventing remove some + // records wrong. Self::format_date_time(max_ts, FormatType::Date), Self::format_date_time(max_ts, FormatType::Hour), store_id, @@ -715,7 +728,9 @@ impl TempFileKey { } /// path_to_schema_file specifies the path of schema log. - /// eg. "v1/${date}/${hour}/${store_id}/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log" + /// ```text + /// v1/${date}/${hour}/${store_id}/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log + /// ``` fn path_to_schema_file(store_id: u64, min_ts: u64, max_ts: u64) -> String { format!( "v1/{}/{}/{}/schema-meta/{:012}-{}.log", @@ -744,7 +759,8 @@ pub struct StreamTaskInfo { ranges: Vec<(Vec, Vec)>, /// The parent directory of temporary files. temp_dir: PathBuf, - /// The temporary file index. Both meta (m prefixed keys) and data (t prefixed keys). + /// The temporary file index. Both meta (m prefixed keys) and data (t + /// prefixed keys). files: SlotMap, /// flushing_files contains files pending flush. flushing_files: RwLock, DataFileInfo)>>, @@ -756,9 +772,10 @@ pub struct StreamTaskInfo { min_resolved_ts: TimeStamp, /// Total size of all temporary files in byte. total_size: AtomicUsize, - /// This should only be set to `true` by `compare_and_set(current=false, value=ture)`. - /// The thread who setting it to `true` takes the responsibility of sending the request to the - /// scheduler for flushing the files then. + /// This should only be set to `true` by `compare_and_set(current=false, + /// value=true)`. The thread who setting it to `true` takes the + /// responsibility of sending the request to the scheduler for flushing + /// the files then. /// /// If the request failed, that thread can set it to `false` back then. flushing: AtomicBool, @@ -837,7 +854,8 @@ impl StreamTaskInfo { let mut w = self.files.write().await; // double check before insert. there may be someone already insert that // when we are waiting for the write lock. - // slience the lint advising us to use the `Entry` API which may introduce copying. + // silence the lint advising us to use the `Entry` API which may introduce + // copying. #[allow(clippy::map_entry)] if !w.contains_key(&key) { let path = self.temp_dir.join(key.temp_file_name()); @@ -918,8 +936,9 @@ impl StreamTaskInfo { } pub fn should_flush(&self) -> bool { - // When it doesn't flush since 0.8x of auto-flush interval, we get ready to start flushing. - // So that we will get a buffer for the cost of actual flushing. + // When it doesn't flush since 0.8x of auto-flush interval, we get ready to + // start flushing. So that we will get a buffer for the cost of actual + // flushing. self.get_last_flush_time().saturating_elapsed_secs() >= self.flush_interval.as_secs_f64() * 0.8 } @@ -930,7 +949,8 @@ impl StreamTaskInfo { /// move need-flushing files to flushing_files. pub async fn move_to_flushing_files(&self, store_id: u64) -> Result<&Self> { - // if flushing_files is not empty, which represents this flush is a retry operation. + // if flushing_files is not empty, which represents this flush is a retry + // operation. if !self.flushing_files.read().await.is_empty() { return Ok(self); } @@ -1032,8 +1052,9 @@ impl StreamTaskInfo { /// execute the flush: copy local files to external storage. /// if success, return the last resolved ts of this flush. - /// The caller can try to advance the resolved ts and provide it to the function, - /// and we would use max(resolved_ts_provided, resolved_ts_from_file). + /// The caller can try to advance the resolved ts and provide it to the + /// function, and we would use `max(resolved_ts_provided, + /// resolved_ts_from_file)`. pub async fn do_flush( &self, store_id: u64, diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 0b415f95bf6..4555bdbf4ff 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -62,10 +62,10 @@ pub struct ResolvedRegions { } impl ResolvedRegions { - /// compose the calculated global checkpoint and region checkpoints. - /// note: maybe we can compute the global checkpoint internal and getting the interface clear. - /// however we must take the `min_ts` or we cannot provide valid global checkpoint if there - /// isn't any region checkpoint. + /// Compose the calculated global checkpoint and region checkpoints. + /// Note: Maybe we can compute the global checkpoint internal and getting + /// the interface clear. However we must take the `min_ts` or we cannot + /// provide valid global checkpoint if there isn't any region checkpoint. pub fn new(checkpoint: TimeStamp, checkpoints: Vec<(Region, TimeStamp)>) -> Self { Self { items: checkpoints, @@ -128,7 +128,8 @@ where handle: ObserveHandle, ) -> Result { let region_id = region.get_id(); - // Note: we have external retry at `ScanCmd::exec_by_with_retry`, should we keep retrying here? + // Note: we have external retry at `ScanCmd::exec_by_with_retry`, should we keep + // retrying here? let snap = self.observe_over_with_retry(region, move || { ChangeObserver::from_pitr(region_id, handle.clone()) })?; @@ -222,9 +223,11 @@ fn scan_executor_loop( } /// spawn the executors in the scan pool. -/// we make workers thread instead of spawn scan task directly into the pool because the [`InitialDataLoader`] isn't `Sync` hence -/// we must use it very carefully or rustc (along with tokio) would complain that we made a `!Send` future. -/// so we have moved the data loader to the synchronous context so its reference won't be shared between threads any more. +/// we make workers thread instead of spawn scan task directly into the pool +/// because the [`InitialDataLoader`] isn't `Sync` hence we must use it very +/// carefully or rustc (along with tokio) would complain that we made a `!Send` +/// future. so we have moved the data loader to the synchronous context so its +/// reference won't be shared between threads any more. fn spawn_executors(init: impl InitialScan + Send + 'static, number: usize) -> ScanPoolHandle { let (tx, rx) = crossbeam::channel::bounded(MESSAGE_BUFFER_SIZE); let pool = create_scan_pool(number); @@ -280,8 +283,9 @@ const MESSAGE_BUFFER_SIZE: usize = 4096; /// The operator for region subscription. /// It make a queue for operations over the `SubscriptionTracer`, generally, -/// we should only modify the `SubscriptionTracer` itself (i.e. insert records, remove records) at here. -/// So the order subscription / desubscription won't be broken. +/// we should only modify the `SubscriptionTracer` itself (i.e. insert records, +/// remove records) at here. So the order subscription / desubscription won't be +/// broken. pub struct RegionSubscriptionManager { // Note: these fields appear everywhere, maybe make them a `context` type? regions: R, @@ -337,7 +341,8 @@ where /// /// # returns /// - /// a two-tuple, the first is the handle to the manager, the second is the operator loop future. + /// a two-tuple, the first is the handle to the manager, the second is the + /// operator loop future. pub fn start( initial_loader: InitialDataLoader, observer: BackupStreamObserver, @@ -453,7 +458,8 @@ where } let cps = self.subs.resolve_with(min_ts); let min_region = cps.iter().min_by_key(|(_, rts)| rts); - // If there isn't any region observed, the `min_ts` can be used as resolved ts safely. + // If there isn't any region observed, the `min_ts` can be used as resolved ts + // safely. let rts = min_region.map(|(_, rts)| *rts).unwrap_or(min_ts); info!("getting checkpoint"; "defined_by_region" => ?min_region.map(|r| r.0.get_id()), "checkpoint" => %rts); self.subs.warn_if_gap_too_huge(rts); @@ -621,8 +627,9 @@ where fn spawn_scan(&self, cmd: ScanCmd) { // we should not spawn initial scanning tasks to the tokio blocking pool // because it is also used for converting sync File I/O to async. (for now!) - // In that condition, if we blocking for some resources(for example, the `MemoryQuota`) - // at the block threads, we may meet some ghosty deadlock. + // In that condition, if we blocking for some resources(for example, the + // `MemoryQuota`) at the block threads, we may meet some ghosty + // deadlock. let s = self.scan_pool_handle.request(cmd); if let Err(err) = s { let region_id = err.0.region.get_id(); diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 30063089804..aa9f35705fb 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -17,7 +17,8 @@ pub struct SubscriptionTracer(Arc>); #[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum SubscriptionState { - /// When it is newly added (maybe after split or leader transfered from other store), without any flush. + /// When it is newly added (maybe after split or leader transfered from + /// other store), without any flush. Fresh, /// It has been flushed, and running normally. Normal, @@ -95,8 +96,9 @@ impl SubscriptionTracer { // Register a region as tracing. // The `start_ts` is used to tracking the progress of initial scanning. - // (Note: the `None` case of `start_ts` is for testing / refresh region status when split / merge, - // maybe we'd better provide some special API for those cases and remove the `Option`?) + // Note: the `None` case of `start_ts` is for testing / refresh region status + // when split / merge, maybe we'd better provide some special API for those + // cases and remove the `Option`? pub fn register_region( &self, region: &Region, @@ -132,7 +134,7 @@ impl SubscriptionTracer { pub fn warn_if_gap_too_huge(&self, ts: TimeStamp) { let gap = TimeStamp::physical_now() - ts.physical(); if gap >= 10 * 60 * 1000 - /* 10 mins */ + // 10 mins { let far_resolver = self .0 @@ -155,7 +157,8 @@ impl SubscriptionTracer { } /// try to mark a region no longer be tracked by this observer. - /// returns whether success (it failed if the region hasn't been observed when calling this.) + /// returns whether success (it failed if the region hasn't been observed + /// when calling this.) pub fn deregister_region_if( &self, region: &Region, @@ -170,9 +173,12 @@ impl SubscriptionTracer { // use this method to check whether a key exists: // ``` // let mut present = false; - // deregister_region_if(42, |..| { present = true; }); + // deregister_region_if(42, |..| { + // present = true; + // }); // ``` - // At that time, if we call the callback with stale value, the called may get false positive. + // At that time, if we call the callback with stale value, the called may get + // false positive. if o.state == SubscriptionState::Removal { return false; } @@ -195,7 +201,8 @@ impl SubscriptionTracer { /// /// # return /// - /// Whether the status can be updated internally without deregister-and-register. + /// Whether the status can be updated internally without + /// deregister-and-register. pub fn try_update_region(&self, new_region: &Region) -> bool { let mut sub = match self.get_subscription_of(new_region.get_id()) { Some(sub) => sub, @@ -282,7 +289,8 @@ impl SubscriptionTracer { } /// This enhanced version of `Resolver` allow some unordered lock events. -/// The name "2-phase" means this is used for 2 *concurrency* phases of observing a region: +/// The name "2-phase" means this is used for 2 *concurrency* phases of +/// observing a region: /// 1. Doing the initial scanning. /// 2. Listening at the incremental data. /// @@ -294,25 +302,31 @@ impl SubscriptionTracer { /// +-> Phase 1: Initial scanning scans writes between start ts and now. /// ``` /// -/// In backup-stream, we execute these two tasks parallel. Which may make some race conditions: -/// - When doing initial scanning, there may be a flush triggered, but the default resolver -/// would probably resolved to the tip of incremental events. -/// - When doing initial scanning, we meet and track a lock already meet by the incremental events, -/// then the default resolver cannot untrack this lock any more. +/// In backup-stream, we execute these two tasks parallel. Which may make some +/// race conditions: +/// - When doing initial scanning, there may be a flush triggered, but the +/// default resolver would probably resolved to the tip of incremental events. +/// - When doing initial scanning, we meet and track a lock already meet by the +/// incremental events, then the default resolver cannot untrack this lock any +/// more. /// /// This version of resolver did some change for solve these problems: -/// - The resolver won't advance the resolved ts to greater than `stable_ts` if there is some. This -/// can help us prevent resolved ts from advancing when initial scanning hasn't finished yet. -/// - When we `untrack` a lock haven't been tracked, this would record it, and skip this lock if we want to track it then. -/// This would be safe because: +/// - The resolver won't advance the resolved ts to greater than `stable_ts` if +/// there is some. This can help us prevent resolved ts from advancing when +/// initial scanning hasn't finished yet. +/// - When we `untrack` a lock haven't been tracked, this would record it, and +/// skip this lock if we want to track it then. This would be safe because: /// - untracking a lock not be tracked is no-op for now. -/// - tracking a lock have already being untracked (unordered call of `track` and `untrack`) wouldn't happen at phase 2 for same region. -/// but only when phase 1 and phase 2 happened concurrently, at that time, we wouldn't and cannot advance the resolved ts. +/// - tracking a lock have already being untracked (unordered call of `track` +/// and `untrack`) wouldn't happen at phase 2 for same region. but only when +/// phase 1 and phase 2 happened concurrently, at that time, we wouldn't and +/// cannot advance the resolved ts. pub struct TwoPhaseResolver { resolver: Resolver, future_locks: Vec, /// When `Some`, is the start ts of the initial scanning. - /// And implies the phase 1 (initial scanning) is keep running asynchronously. + /// And implies the phase 1 (initial scanning) is keep running + /// asynchronously. stable_ts: Option, } diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 5aed8f55f7f..89f21567801 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -46,8 +46,9 @@ pub fn wrap_key(v: Vec) -> Vec { } /// Transform a str to a [`engine_traits::CfName`]\(`&'static str`). -/// If the argument isn't one of `""`, `"DEFAULT"`, `"default"`, `"WRITE"`, `"write"`, `"LOCK"`, `"lock"`... -/// returns "ERR_CF". (Which would be ignored then.) +/// If the argument isn't one of `""`, `"DEFAULT"`, `"default"`, `"WRITE"`, +/// `"write"`, `"LOCK"`, `"lock"`... returns "ERR_CF". (Which would be ignored +/// then.) pub fn cf_name(s: &str) -> CfName { match s { "" | "DEFAULT" | "default" => CF_DEFAULT, @@ -149,7 +150,8 @@ pub type Slot = Mutex; /// NOTE: Maybe we can use dashmap for replacing the RwLock. pub type SlotMap = RwLock, S>>; -/// Like `..=val`(a.k.a. `RangeToInclusive`), but allows `val` being a reference to DSTs. +/// Like `..=val`(a.k.a. `RangeToInclusive`), but allows `val` being a reference +/// to DSTs. struct RangeToInclusiveRef<'a, T: ?Sized>(&'a T); impl<'a, T: ?Sized> RangeBounds for RangeToInclusiveRef<'a, T> { @@ -191,7 +193,8 @@ pub type SegmentSet = SegmentMap; impl SegmentMap { /// Try to add a element into the segment tree, with default value. - /// (This is useful when using the segment tree as a `Set`, i.e. `SegmentMap`) + /// (This is useful when using the segment tree as a `Set`, i.e. + /// `SegmentMap`) /// /// - If no overlapping, insert the range into the tree and returns `true`. /// - If overlapping detected, do nothing and return `false`. @@ -267,8 +270,8 @@ impl SegmentMap { return Some(overlap_with_start); } // |--s----+-----+----e----| - // Otherwise, the possibility of being overlapping would be there are some sub range - // of the queried range... + // Otherwise, the possibility of being overlapping would be there are some sub + // range of the queried range... // |--s----+----e----+-----| // ...Or the end key is contained by some Range. // For faster query, we merged the two cases together. @@ -286,7 +289,8 @@ impl SegmentMap { covered_by_the_range.map(|(k, v)| (k, &v.range_end, &v.item)) } - /// Check whether the range is overlapping with any range in the segment tree. + /// Check whether the range is overlapping with any range in the segment + /// tree. pub fn is_overlapping(&self, range: (&R, &R)) -> bool where K: Borrow, @@ -301,8 +305,8 @@ impl SegmentMap { } /// transform a [`RaftCmdRequest`] to `(key, value, cf)` triple. -/// once it contains a write request, extract it, and return `Left((key, value, cf))`, -/// otherwise return the request itself via `Right`. +/// once it contains a write request, extract it, and return `Left((key, value, +/// cf))`, otherwise return the request itself via `Right`. pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), Request> { let (key, value, cf) = match req.get_cmd_type() { CmdType::Put => { @@ -319,11 +323,11 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), } /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, -/// once meet an error, would report it, with the current file and line (so it is made as a macro). -/// returns whether it success. +/// once meet an error, would report it, with the current file and line (so it +/// is made as a macro). returns whether it success. #[macro_export(crate)] macro_rules! try_send { - ($s: expr, $task: expr) => { + ($s:expr, $task:expr) => { match $s.schedule($task) { Err(err) => { $crate::errors::Error::from(err).report(concat!( @@ -341,9 +345,10 @@ macro_rules! try_send { }; } -/// a hacky macro which allow us enable all debug log via the feature `backup_stream_debug`. -/// because once we enable debug log for all crates, it would soon get too verbose to read. -/// using this macro now we can enable debug log level for the crate only (even compile time...). +/// a hacky macro which allow us enable all debug log via the feature +/// `backup_stream_debug`. because once we enable debug log for all crates, it +/// would soon get too verbose to read. using this macro now we can enable debug +/// log level for the crate only (even compile time...). #[macro_export(crate)] macro_rules! debug { ($($t: tt)+) => { @@ -391,7 +396,8 @@ pub fn record_cf_stat(cf_name: &str, stat: &CfStatistics) { ); } -/// a shortcut for handing the result return from `Router::on_events`, when any faliure, send a fatal error to the `doom_messenger`. +/// a shortcut for handing the result return from `Router::on_events`, when any +/// failure, send a fatal error to the `doom_messenger`. pub fn handle_on_event_result(doom_messenger: &Scheduler, result: Vec<(String, Result<()>)>) { for (task, res) in result.into_iter() { if let Err(err) = res { @@ -422,8 +428,8 @@ pub struct CallbackWaitGroup { on_finish_all: std::sync::Mutex>>, } -/// A shortcut for making an opaque future type for return type or argument type, -/// which is sendable and not borrowing any variables. +/// A shortcut for making an opaque future type for return type or argument +/// type, which is sendable and not borrowing any variables. /// /// `fut![T]` == `impl Future + Send + 'static` #[macro_export(crate)] @@ -469,7 +475,8 @@ impl CallbackWaitGroup { Box::pin(rx.map(|_| ())) } - /// make a work, as long as the return value held, mark a work in the group is running. + /// make a work, as long as the return value held, mark a work in the group + /// is running. pub fn work(self: Arc) -> Work { self.running.fetch_add(1, Ordering::SeqCst); Work(self) @@ -520,11 +527,12 @@ impl ReadThroughputRecorder { let begin = self.begin.as_ref()?; let end = ins.io_stat().ok()??; let bytes_read = end.read - begin.read; - // FIXME: In our test environment, there may be too many caches hence - // the `bytes_read` is always zero :( - // For now, we eject here and let rocksDB prove that we did read something - // When the proc think we don't touch the block device (even in fact we didn't). - // NOTE: In the real-world, we would accept the zero `bytes_read` value since the cache did exists. + // FIXME: In our test environment, there may be too many caches hence the + // `bytes_read` is always zero. + // For now, we eject here and let rocksDB prove that we did read something when + // the proc think we don't touch the block device (even in fact we didn't). + // NOTE: In the real-world, we would accept the zero `bytes_read` value since + // the cache did exists. #[cfg(test)] if bytes_read == 0 { // use println here so we can get this message even log doesn't enabled. diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index b9559d86c1f..671952dc40d 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -159,9 +159,10 @@ impl SuiteBuilder { suite.start_endpoint(id, use_v3); } // TODO: The current mock metastore (slash_etc) doesn't supports multi-version. - // We must wait until the endpoints get ready to watching the metastore, or some modifies may be lost. - // Either make Endpoint::with_client wait until watch did start or make slash_etc support multi-version, - // then we can get rid of this sleep. + // We must wait until the endpoints get ready to watching the metastore, or some + // modifies may be lost. Either make Endpoint::with_client wait until watch did + // start or make slash_etc support multi-version, then we can get rid of this + // sleep. std::thread::sleep(Duration::from_secs(1)); suite } @@ -671,8 +672,8 @@ mod test { } #[test] - /// This case tests whehter the checkpoint ts (next backup ts) can be advanced correctly - /// when async commit is enabled. + /// This case tests whether the checkpoint ts (next backup ts) can be + /// advanced correctly when async commit is enabled. fn async_commit() { let mut suite = super::SuiteBuilder::new_named("async_commit") .nodes(3) @@ -768,8 +769,9 @@ mod test { suite.force_flush_files("inflight_message"); fail::cfg("delay_on_start_observe", "pause").unwrap(); suite.must_shuffle_leader(1); - // Handling the `StartObserve` message and doing flush are executed asynchronously. - // Make a delay of unblocking flush thread for make sure we have handled the `StartObserve`. + // Handling the `StartObserve` message and doing flush are executed + // asynchronously. Make a delay of unblocking flush thread for make sure + // we have handled the `StartObserve`. std::thread::sleep(Duration::from_secs(1)); fail::cfg("delay_on_flush", "off").unwrap(); suite.wait_for_flush(); @@ -790,7 +792,8 @@ mod test { .global_progress_of_task("inflight_message"), ) .unwrap(); - // The checkpoint should be advanced as expection when the inflight message has been consumed. + // The checkpoint should be advanced as expected when the inflight message has + // been consumed. assert!(checkpoint > 512, "checkpoint = {}", checkpoint); } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 8865aa4f94c..ada36a08615 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -345,7 +345,7 @@ impl BackupRange { snapshot, backup_ts, IsolationLevel::Si, - false, /* fill_cache */ + false, // fill_cache Default::default(), Default::default(), false, @@ -969,10 +969,11 @@ impl Endpoint { } return; } - // Flush causal timestamp to make sure that future writes will have larger timestamps. - // And help TiKV-BR acquire a backup-ts with intact data smaller than it. - // (Note that intactness is not fully ensured now, until the safe-ts of RawKV is implemented. - // TiKV-BR need a workaround by rewinding backup-ts to a small "safe interval"). + // Flush causal timestamp to make sure that future writes will have larger + // timestamps. And help TiKV-BR acquire a backup-ts with intact data + // smaller than it. (Note that intactness is not fully ensured now, + // until the safe-ts of RawKV is implemented. TiKV-BR need a workaround + // by rewinding backup-ts to a small "safe interval"). if request.is_raw_kv { if let Err(e) = self .causal_ts_provider @@ -1088,10 +1089,13 @@ fn get_max_start_key(start_key: Option<&Key>, region: &Region) -> Option { } } -/// Construct an backup file name based on the given store id, region, range start key and local unix timestamp. -/// A name consists with five parts: store id, region_id, a epoch version, the hash of range start key and timestamp. -/// range start key is used to keep the unique file name for file, to handle different tables exists on the same region. -/// local unix timestamp is used to keep the unique file name for file, to handle receive the same request after connection reset. +/// Construct an backup file name based on the given store id, region, range +/// start key and local unix timestamp. A name consists with five parts: store +/// id, region_id, a epoch version, the hash of range start key and timestamp. +/// range start key is used to keep the unique file name for file, to handle +/// different tables exists on the same region. local unix timestamp is used to +/// keep the unique file name for file, to handle receive the same request after +/// connection reset. pub fn backup_file_name( store_id: u64, region: &Region, @@ -1536,11 +1540,11 @@ pub mod tests { // flush to disk so that read requests can be traced by TiKV limiter. engine .get_rocksdb() - .flush_cf(engine_traits::CF_DEFAULT, true /*sync*/) + .flush_cf(engine_traits::CF_DEFAULT, true /* sync */) .unwrap(); engine .get_rocksdb() - .flush_cf(engine_traits::CF_WRITE, true /*sync*/) + .flush_cf(engine_traits::CF_WRITE, true /* sync */) .unwrap(); // TODO: check key number for each snapshot. @@ -1575,7 +1579,7 @@ pub mod tests { info!("{:?}", files); assert_eq!( files.len(), - file_len, /* default and write */ + file_len, // default and write "{:?}", resp ); @@ -1651,8 +1655,8 @@ pub mod tests { let start_key_idx: u64 = 100; let end_key_idx: u64 = 110; endpoint.region_info.set_regions(vec![( - vec![], //generate_test_raw_key(start_key_idx).into_bytes(), - vec![], //generate_test_raw_key(end_key_idx).into_bytes(), + vec![], // generate_test_raw_key(start_key_idx).into_bytes(), + vec![], // generate_test_raw_key(end_key_idx).into_bytes(), 1, )]); let ctx = Context::default(); @@ -1679,7 +1683,7 @@ pub mod tests { // flush to disk so that read requests can be traced by TiKV limiter. engine .get_rocksdb() - .flush_cf(engine_traits::CF_DEFAULT, true /*sync*/) + .flush_cf(engine_traits::CF_DEFAULT, true /* sync */) .unwrap(); // TODO: check key number for each snapshot. @@ -1730,7 +1734,7 @@ pub mod tests { let file_len = 1; let files = resp.get_files(); info!("{:?}", files); - assert_eq!(files.len(), file_len /* default cf*/, "{:?}", resp); + assert_eq!(files.len(), file_len /* default cf */, "{:?}", resp); assert_eq!(files[0].total_kvs, end_key_idx - start_key_idx); assert_eq!(files[0].crc64xor, checksum); assert_eq!(files[0].get_start_key(), file_start); @@ -2000,7 +2004,8 @@ pub mod tests { assert_eq!(responses.len(), 3, "{:?}", responses); // for testing whether dropping the pool before all tasks finished causes panic. - // but the panic must be checked manually... (It may panic at tokio runtime threads...) + // but the panic must be checked manually. (It may panic at tokio runtime + // threads) let mut pool = ControlThreadPool::new(); pool.adjust_with(1); pool.spawn(async { tokio::time::sleep(Duration::from_millis(100)).await }); diff --git a/components/backup/src/softlimit.rs b/components/backup/src/softlimit.rs index babc13326bd..c3a2fc7c796 100644 --- a/components/backup/src/softlimit.rs +++ b/components/backup/src/softlimit.rs @@ -89,9 +89,10 @@ impl SoftLimit { pub trait CpuStatistics { type Container: IntoIterator; // ThreadInfoStatistics needs &mut self to record the thread information. - // RefCell(internal mutability) would make SoftLimitByCpu !Sync, hence futures contains it become !Send (WHY?) - // Mutex would make this function async or blocking. - // Anyway, &mut here is acceptable, since SoftLimitByCpu won't be shared. (Even the &mut here is a little weird...) + // RefCell(internal mutability) would make SoftLimitByCpu !Sync, hence futures + // contains it become !Send (WHY?) Mutex would make this function async or + // blocking. Anyway, &mut here is acceptable, since SoftLimitByCpu won't be + // shared. (Even the &mut here is a little weird...) fn get_cpu_usages(&mut self) -> Self::Container; } @@ -119,7 +120,8 @@ impl SoftLimitByCpu { self.current_idle_exclude(|_| false) } - /// returns the current idle processor, ignoring threads with name matches the predicate. + /// returns the current idle processor, ignoring threads with name matches + /// the predicate. fn current_idle_exclude(&mut self, mut exclude: impl FnMut(&str) -> bool) -> f64 { let usages = self.metrics.get_cpu_usages(); let used = usages @@ -129,15 +131,17 @@ impl SoftLimitByCpu { self.total_time - used } - /// apply the limit to the soft limit according to the current CPU remaining. + /// apply the limit to the soft limit according to the current CPU + /// remaining. #[cfg(test)] pub async fn exec_over(&mut self, limit: &SoftLimit) -> Result<()> { self.exec_over_with_exclude(limit, |_| false).await } - /// apply the limit to the soft limit according to the current CPU remaining. - /// when calculating the CPU usage, ignore threads with name matched by the exclude predicate. - /// This would keep at least one thread working. + /// apply the limit to the soft limit according to the current CPU + /// remaining. when calculating the CPU usage, ignore threads with name + /// matched by the exclude predicate. This would keep at least one + /// thread working. #[cfg(test)] pub async fn exec_over_with_exclude( &mut self, diff --git a/components/backup/src/utils.rs b/components/backup/src/utils.rs index 64425b595c8..de57b9f9081 100644 --- a/components/backup/src/utils.rs +++ b/components/backup/src/utils.rs @@ -12,11 +12,13 @@ use txn_types::{Key, TimeStamp}; use crate::{metrics::*, Result}; -// BACKUP_V1_TO_V2_TS is used as causal timestamp to backup RawKV api version V1/V1Ttl data and save to V2 format. -// Use 1 other than 0 because 0 is not a acceptable value for causal timestamp. See api_version::ApiV2::is_valid_ts. +// BACKUP_V1_TO_V2_TS is used as causal timestamp to backup RawKV api version +// V1/V1Ttl data and save to V2 format. Use 1 other than 0 because 0 is not a +// acceptable value for causal timestamp. See api_version::ApiV2::is_valid_ts. pub const BACKUP_V1_TO_V2_TS: u64 = 1; /// DaemonRuntime is a "background" runtime, which contains "daemon" tasks: -/// any task spawn into it would run until finish even the runtime isn't referenced. +/// any task spawn into it would run until finish even the runtime isn't +/// referenced. pub struct DaemonRuntime(Option); impl DaemonRuntime { @@ -109,11 +111,12 @@ pub struct KeyValueCodec { } // Usage of the KeyValueCodec in backup process is as following: -// `new` -> `check_backup_api_version`, return false if not supported or input invalid. -// encode the backup range with `encode_backup_key` +// `new` -> `check_backup_api_version`, return false if not supported or input +// invalid. encode the backup range with `encode_backup_key` // In `backup_raw` process -> use `is_valid_raw_value` & // `convert_encoded_key_to_dst_version` & `convert_encoded_value_to_dst_version` -// In BackupResponse, call `decode_backup_key` & `convert_key_range_to_dst_version` +// In BackupResponse, call `decode_backup_key` & +// `convert_key_range_to_dst_version` impl KeyValueCodec { pub fn new(is_raw_kv: bool, cur_api_ver: ApiVersion, dst_api_ver: ApiVersion) -> Self { KeyValueCodec { @@ -204,7 +207,8 @@ impl KeyValueCodec { }) } - // Input key is encoded key for rawkv apiv2 and txnkv. return the decode dst apiversion key. + // Input key is encoded key for rawkv apiv2 and txnkv. return the decode dst + // apiversion key. pub fn decode_backup_key(&self, key: Option) -> Result> { if key.is_none() { return Ok(vec![]); diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 7127d896314..103ee9c6790 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -26,9 +26,8 @@ use crate::{backup_file_name, metrics::*, utils::KeyValueCodec, Error, Result}; #[derive(Debug, Clone, Copy)] /// CfNameWrap wraps the CfName type. -/// For removing the 'static lifetime bound in the async function, -/// which doesn't compile due to 'captures lifetime that does not appear in bounds' :(. -/// see https://github.com/rust-lang/rust/issues/63033 +/// For removing the 'static lifetime bound in the async function, which doesn't +/// compile due to 'captures lifetime that does not appear in bounds', see https://github.com/rust-lang/rust/issues/63033 /// FIXME: remove this. pub struct CfNameWrap(pub &'static str); @@ -99,8 +98,8 @@ impl Writer { Ok(()) } - // FIXME: we cannot get sst_info in [save_and_build_file], which may cause the !Send type - // [RocksEnternalSstFileInfo] sent between threads. + // FIXME: we cannot get sst_info in [save_and_build_file], which may cause the + // !Send type [RocksEnternalSstFileInfo] sent between threads. fn finish_read(writer: RocksSstWriter) -> Result<(u64, impl Read)> { let (sst_info, sst_reader) = writer.finish_read()?; Ok((sst_info.file_size(), sst_reader)) diff --git a/components/batch-system/benches/batch-system.rs b/components/batch-system/benches/batch-system.rs index b4e3ffd03ac..c248eabaf04 100644 --- a/components/batch-system/benches/batch-system.rs +++ b/components/batch-system/benches/batch-system.rs @@ -85,8 +85,8 @@ fn bench_imbalance(c: &mut Criterion) { system.shutdown(); } -/// Bench how it performs when scheduling a lot of quick tasks during an long-polling -/// tasks. +/// Bench how it performs when scheduling a lot of quick tasks during an +/// long-polling tasks. /// /// A good scheduling algorithm should not starve the quick tasks. fn bench_fairness(c: &mut Criterion) { diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index 108058ee5f2..49433a73592 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -1,9 +1,10 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -//! This is the core implementation of a batch system. Generally there will be two -//! different kind of FSMs in TiKV's FSM system. One is normal FSM, which usually -//! represents a peer, the other is control FSM, which usually represents something -//! that controls how the former is created or metrics are collected. +//! This is the core implementation of a batch system. Generally there will be +//! two different kind of FSMs in TiKV's FSM system. One is normal FSM, which +//! usually represents a peer, the other is control FSM, which usually +//! represents something that controls how the former is created or metrics are +//! collected. // #[PerformanceCriticalPath] use std::{ @@ -39,7 +40,7 @@ pub enum FsmTypes { // A macro to introduce common definition of scheduler. macro_rules! impl_sched { - ($name:ident, $ty:path, Fsm = $fsm:tt) => { + ($name:ident, $ty:path,Fsm = $fsm:tt) => { pub struct $name { sender: channel::Sender>, low_sender: channel::Sender>, @@ -205,8 +206,9 @@ impl Batch { /// Schedule the normal FSM located at `index`. /// - /// If `inplace`, the relative position of all fsm will not be changed; otherwise, the fsm - /// will be popped and the last fsm will be swap in to reduce memory copy. + /// If `inplace`, the relative position of all fsm will not be changed; + /// otherwise, the fsm will be popped and the last fsm will be swap in + /// to reduce memory copy. pub fn schedule(&mut self, router: &BatchRouter, index: usize, inplace: bool) { let to_schedule = match self.normals[index].take() { Some(f) => f, @@ -267,8 +269,8 @@ pub enum HandleResult { KeepProcessing, /// The Fsm should stop at the progress. StopAt { - /// The count of messages that have been acknowledged by handler. The fsm should be - /// released until new messages arrive. + /// The count of messages that have been acknowledged by handler. The + /// fsm should be released until new messages arrive. progress: usize, /// Whether the fsm should be released before `end`. skip_end: bool, @@ -307,7 +309,7 @@ pub trait PollHandler: Send + 'static { /// This function is called when handling readiness for control FSM. /// /// If returned value is Some, then it represents a length of channel. This - /// function will only be called for the same fsm after channel's lengh is + /// function will only be called for the same fsm after channel's length is /// larger than the value. If it returns None, then this function will /// still be called for the same FSM in the next loop unless the FSM is /// stopped. @@ -318,8 +320,8 @@ pub trait PollHandler: Send + 'static { /// The returned value is handled in the same way as `handle_control`. fn handle_normal(&mut self, normal: &mut impl DerefMut) -> HandleResult; - /// This function is called after `handle_normal` is called for all fsm and before calling - /// `end`. The function is expected to run lightweight work. + /// This function is called after `handle_normal` is called for all fsm and + /// before calling `end`. The function is expected to run lightweight work. fn light_end(&mut self, _batch: &mut [Option>]) {} /// This function is called at the end of every round. @@ -389,13 +391,14 @@ impl> Poller { let mut to_skip_end = Vec::with_capacity(self.max_batch_size); // Fetch batch after every round is finished. It's helpful to protect regions - // from becoming hungry if some regions are hot points. Since we fetch new fsm every time - // calling `poll`, we do not need to configure a large value for `self.max_batch_size`. + // from becoming hungry if some regions are hot points. Since we fetch new fsm + // every time calling `poll`, we do not need to configure a large value for + // `self.max_batch_size`. let mut run = true; while run && self.fetch_fsm(&mut batch) { - // If there is some region wait to be deal, we must deal with it even if it has overhead - // max size of batch. It's helpful to protect regions from becoming hungry - // if some regions are hot points. + // If there is some region wait to be deal, we must deal with it even if it has + // overhead max size of batch. It's helpful to protect regions from becoming + // hungry if some regions are hot points. let mut max_batch_size = std::cmp::max(self.max_batch_size, batch.normals.len()); // update some online config if needed. { @@ -454,9 +457,9 @@ impl> Poller { if let Ok(fsm) = self.fsm_receiver.try_recv() { run = batch.push(fsm); } - // If we receive a ControlFsm, break this cycle and call `end`. Because ControlFsm - // may change state of the handler, we shall deal with it immediately after - // calling `begin` of `Handler`. + // If we receive a ControlFsm, break this cycle and call `end`. Because + // ControlFsm may change state of the handler, we shall deal with it immediately + // after calling `begin` of `Handler`. if !run || fsm_cnt >= batch.normals.len() { break; } diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 43067ecb202..9975d66dfdc 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -346,8 +346,8 @@ where let state_unit = mem::size_of::>(); // Every message in crossbeam sender needs 8 bytes to store state. let message_unit = mem::size_of::() + 8; - // crossbeam unbounded channel sender has a list of blocks. Every block has 31 unit - // and every sender has at least one sender. + // crossbeam unbounded channel sender has a list of blocks. Every block has 31 + // unit and every sender has at least one sender. let sender_block_unit = 31; RouterTrace { alive: (mailbox_unit * 8 / 7 // hashmap uses 7/8 of allocated memory. diff --git a/components/causal_ts/src/config.rs b/components/causal_ts/src/config.rs index a856b5b7358..e75bff62d47 100644 --- a/components/causal_ts/src/config.rs +++ b/components/causal_ts/src/config.rs @@ -16,8 +16,9 @@ pub struct Config { /// The minimal renew batch size of BatchTsoProvider. /// /// Default is 100. - /// One TSO is required for every batch of Raft put messages, so by default 1K tso/s should be enough. - /// Benchmark showed that with a 8.6w raw_put per second, the TSO requirement is 600 per second. + /// One TSO is required for every batch of Raft put messages, so by default + /// 1K tso/s should be enough. Benchmark showed that with a 8.6w raw_put + /// per second, the TSO requirement is 600 per second. pub renew_batch_min_size: u32, } diff --git a/components/causal_ts/src/lib.rs b/components/causal_ts/src/lib.rs index 615f01365cd..05626ce7203 100644 --- a/components/causal_ts/src/lib.rs +++ b/components/causal_ts/src/lib.rs @@ -22,7 +22,8 @@ pub trait CausalTsProvider: Send + Sync { /// Get a new timestamp. fn get_ts(&self) -> Result; - /// Flush (cached) timestamps to keep causality on some events, such as "leader transfer". + /// Flush (cached) timestamps to keep causality on some events, such as + /// "leader transfer". fn flush(&self) -> Result<()> { Ok(()) } diff --git a/components/causal_ts/src/observer.rs b/components/causal_ts/src/observer.rs index 8d2c5abc95c..aeb04bfabf5 100644 --- a/components/causal_ts/src/observer.rs +++ b/components/causal_ts/src/observer.rs @@ -20,8 +20,9 @@ use raftstore::{ use crate::{CausalTsProvider, RawTsTracker}; -/// CausalObserver appends timestamp for RawKV V2 data, -/// and invoke causal_ts_provider.flush() on specified event, e.g. leader transfer, snapshot apply. +/// CausalObserver appends timestamp for RawKV V2 data, and invoke +/// causal_ts_provider.flush() on specified event, e.g. leader +/// transfer, snapshot apply. /// Should be used ONLY when API v2 is enabled. pub struct CausalObserver { causal_ts_provider: Arc, @@ -37,7 +38,8 @@ impl Clone for CausalObserver { } } -// Causal observer's priority should be higher than all other observers, to avoid being bypassed. +// Causal observer's priority should be higher than all other observers, to +// avoid being bypassed. const CAUSAL_OBSERVER_PRIORITY: u32 = 0; impl CausalObserver { @@ -97,7 +99,8 @@ impl QueryObserver for CausalObserver RoleObserver for CausalObserver RegionChangeObserver for CausalObse return; } - // In the scenario of region merge, the target region would merge some entries from source - // region with larger timestamps (when leader of source region is in another store with - // larger TSO batch than the store of target region's leader). - // So we need a flush after commit merge. See issue #12680. - // TODO: do not need flush if leaders of source & target region are in the same store. + // In the scenario of region merge, the target region would merge some entries + // from source region with larger timestamps (when leader of source region is in + // another store with larger TSO batch than the store of target region's + // leader). So we need a flush after commit merge. See issue #12680. + // TODO: do not need flush if leaders of source & target region are in the same + // store. if let RegionChangeEvent::Update(RegionChangeReason::CommitMerge) = event { self.flush_timestamp(ctx.region(), REASON_REGION_MERGE); } diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 917353222fa..35e6bffd11b 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -31,8 +31,9 @@ use crate::{ // Renew on every 100ms, to adjust batch size rapidly enough. pub(crate) const TSO_BATCH_RENEW_INTERVAL_DEFAULT: u64 = 100; // Batch size on every renew interval. -// One TSO is required for every batch of Raft put messages, so by default 1K tso/s should be enough. -// Benchmark showed that with a 8.6w raw_put per second, the TSO requirement is 600 per second. +// One TSO is required for every batch of Raft put messages, so by default 1K +// tso/s should be enough. Benchmark showed that with a 8.6w raw_put per second, +// the TSO requirement is 600 per second. pub(crate) const TSO_BATCH_MIN_SIZE_DEFAULT: u32 = 100; // Max batch size of TSO requests. Space of logical timestamp is 262144, // exceed this space will cause PD to sleep, waiting for physical clock advance. @@ -89,7 +90,8 @@ impl TsoBatch { Ok(()) } - // Note: batch is "used up" in flush, and batch size will be enlarged in next renew. + // Note: batch is "used up" in flush, and batch size will be enlarged in next + // renew. pub fn flush(&self) { self.logical_start .store(self.logical_end, Ordering::Relaxed); @@ -114,7 +116,8 @@ impl TsoBatch { } } -/// MAX_RENEW_BATCH_SIZE is the batch size of TSO renew. It is an empirical value. +/// MAX_RENEW_BATCH_SIZE is the batch size of TSO renew. It is an empirical +/// value. const MAX_RENEW_BATCH_SIZE: usize = 64; type RenewError = Arc; @@ -363,8 +366,8 @@ impl CausalTsProvider for BatchTsoProvider { break; } if let Err(err) = block_on(self.renew_tso_batch(false, TSO_BATCH_RENEW_FOR_USED_UP)) { - // `renew_tso_batch` failure is likely to be caused by TSO timeout, which would mean that PD is quite busy. - // So do not retry any more. + // `renew_tso_batch` failure is likely to be caused by TSO timeout, which would + // mean that PD is quite busy. So do not retry any more. error!("BatchTsoProvider::get_ts, renew_tso_batch fail on batch used-up"; "err" => ?err); break; } @@ -477,8 +480,8 @@ pub mod tests { let pd_cli = Arc::new(TestPdClient::new(1, false)); pd_cli.set_tso(1000.into()); - // Set `renew_interval` to 0 to disable background renew. Invoke `flush()` to renew manually. - // allocated: [1001, 1100] + // Set `renew_interval` to 0 to disable background renew. Invoke `flush()` to + // renew manually. allocated: [1001, 1100] let provider = block_on(BatchTsoProvider::new_opt( pd_cli.clone(), Duration::ZERO, @@ -539,8 +542,8 @@ pub mod tests { ); } - // Set `renew_interval` to 0 to disable background renew. Invoke `flush()` to renew manually. - // allocated: [1001, 1100] + // Set `renew_interval` to 0 to disable background renew. Invoke `flush()` to + // renew manually. allocated: [1001, 1100] let provider = block_on(BatchTsoProvider::new_opt( pd_cli.clone(), Duration::ZERO, diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index 94fe0f74c61..3b1894eb6fc 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -44,8 +44,9 @@ const CDC_RESP_MAX_BYTES: u32 = 6 * 1024 * 1024; /// Assume the average size of batched `CdcEvent::Event`s is 32KB and /// the average count of batched `CdcEvent::Event`s is 64. -/// +/// ```text /// 2 = (CDC_EVENT_MAX_BYTES * CDC_EVENT_MAX_COUNT / CDC_MAX_RESP_SIZE).ceil() + 1 /* reserve for ResolvedTs */; +/// ``` const CDC_RESP_MAX_BATCH_COUNT: usize = 2; pub enum CdcEvent { diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 752c068e72a..10de563c4fc 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -64,10 +64,11 @@ impl Default for DownstreamID { pub enum DownstreamState { /// It's just created and rejects change events and resolved timestamps. Uninitialized, - /// It has got a snapshot for incremental scan, and change events will be accepted. - /// However it still rejects resolved timestamps. + /// It has got a snapshot for incremental scan, and change events will be + /// accepted. However it still rejects resolved timestamps. Initializing, - /// Incremental scan is finished so that resolved timestamps are acceptable now. + /// Incremental scan is finished so that resolved timestamps are acceptable + /// now. Normal, Stopped, } @@ -78,7 +79,8 @@ impl Default for DownstreamState { } } -/// Shold only be called when it's uninitialized or stopped. Return false if it's stopped. +/// Should only be called when it's uninitialized or stopped. Return false if +/// it's stopped. pub(crate) fn on_init_downstream(s: &AtomicCell) -> bool { s.compare_exchange( DownstreamState::Uninitialized, @@ -87,7 +89,8 @@ pub(crate) fn on_init_downstream(s: &AtomicCell) -> bool { .is_ok() } -/// Shold only be called when it's initializing or stopped. Return false if it's stopped. +/// Should only be called when it's initializing or stopped. Return false if +/// it's stopped. pub(crate) fn post_init_downstream(s: &AtomicCell) -> bool { s.compare_exchange(DownstreamState::Initializing, DownstreamState::Normal) .is_ok() @@ -348,9 +351,10 @@ impl Delegate { let _ = self.broadcast(send); } - /// `txn_extra_op` returns a shared flag which is accessed in TiKV's transaction layer to - /// determine whether to capture modifications' old value or not. Unsubsribing all downstreams - /// or calling `Delegate::stop` will store it with `TxnExtraOp::Noop`. + /// `txn_extra_op` returns a shared flag which is accessed in TiKV's + /// transaction layer to determine whether to capture modifications' old + /// value or not. Unsubscribing all downstreams or calling + /// `Delegate::stop` will store it with `TxnExtraOp::Noop`. /// /// NOTE: Dropping a `Delegate` won't update this flag. pub fn txn_extra_op(&self) -> &AtomicCell { @@ -373,7 +377,8 @@ impl Delegate { Ok(()) } - /// Install a resolver. Return downstreams which fail because of the region's internal changes. + /// Install a resolver. Return downstreams which fail because of the + /// region's internal changes. pub fn on_region_ready( &mut self, mut resolver: Resolver, @@ -667,8 +672,8 @@ impl Delegate { ..Default::default() }; let send = move |downstream: &Downstream| { - // No ready downstream or a downstream that does not match the kv_api type, will be ignored. - // There will be one region that contains both Txn & Raw entries. + // No ready downstream or a downstream that does not match the kv_api type, will + // be ignored. There will be one region that contains both Txn & Raw entries. // The judgement here is for sending entries to downstreams with correct kv_api. if !downstream.state.load().ready_for_change_events() || downstream.kv_api != kv_api { return Ok(()); @@ -877,9 +882,9 @@ impl Delegate { if let Err(e) = compare_region_epoch( &downstream.region_epoch, region, - false, /* check_conf_ver */ - true, /* check_ver */ - true, /* include_region */ + false, // check_conf_ver + true, // check_ver + true, // include_region ) { info!( "cdc fail to subscribe downstream"; @@ -918,9 +923,10 @@ fn make_overlapped_rollback(key: Key, row: &mut EventRow) { set_event_row_type(row, EventLogType::Rollback); } -/// Decodes the write record and store its information in `row`. This may be called both when -/// doing incremental scan of observing apply events. There's different behavior for the two -/// case, distinguished by the `is_apply` parameter. +/// Decodes the write record and store its information in `row`. This may be +/// called both when doing incremental scan of observing apply events. There's +/// different behavior for the two case, distinguished by the `is_apply` +/// parameter. fn decode_write( key: Vec, value: &[u8], @@ -932,8 +938,8 @@ fn decode_write( let write = WriteRef::parse(value).unwrap().to_owned(); // For scanning, ignore the GC fence and read the old data; - // For observed apply, drop the record it self but keep only the overlapped rollback information - // if gc_fence exists. + // For observed apply, drop the record it self but keep only the overlapped + // rollback information if gc_fence exists. if is_apply && write.gc_fence.is_some() { // `gc_fence` is set means the write record has been rewritten. // Currently the only case is writing overlapped_rollback. And in this case diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index fa6dcb97651..22cb5b94922 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -390,8 +390,8 @@ pub struct Endpoint { timer: SteadyTimer, tso_worker: Runtime, store_meta: Arc>, - /// The concurrency manager for transactions. It's needed for CDC to check locks when - /// calculating resolved_ts. + /// The concurrency manager for transactions. It's needed for CDC to check + /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, config: CdcConfig, @@ -458,7 +458,8 @@ impl, E: KvEngine> Endpoint { .build() .unwrap(); - // Initialized for the first time, subsequent adjustments will be made based on configuration updates. + // Initialized for the first time, subsequent adjustments will be made based on + // configuration updates. let scan_concurrency_semaphore = Arc::new(Semaphore::new(config.incremental_scan_concurrency)); let old_value_cache = OldValueCache::new(config.old_value_cache_memory_quota); @@ -534,7 +535,8 @@ impl, E: KvEngine> Endpoint { "current config" => ?self.config, "change" => ?change ); - // Update the config here. The following adjustments will all use the new values. + // Update the config here. The following adjustments will all use the new + // values. self.config.update(change.clone()).unwrap(); // Maybe the cache will be lost due to smaller capacity, @@ -544,8 +546,8 @@ impl, E: KvEngine> Endpoint { .resize(self.config.old_value_cache_memory_quota); } - // Maybe the limit will be exceeded for a while after the concurrency becomes smaller, - // but it is acceptable. + // Maybe the limit will be exceeded for a while after the concurrency becomes + // smaller, but it is acceptable. if change.get("incremental_scan_concurrency").is_some() { self.scan_concurrency_semaphore = Arc::new(Semaphore::new(self.config.incremental_scan_concurrency)) @@ -924,7 +926,8 @@ impl, E: KvEngine> Endpoint { // Reset resolved_regions to empty. let resolved_regions = &mut self.resolved_region_heap; resolved_regions.clear(); - // rawkv only, if user does not use rawkv apiv2, raw_resolved_regions should be empty. + // rawkv only, if user does not use rawkv apiv2, raw_resolved_regions should be + // empty. let mut raw_resolved_regions = ResolvedRegionVec { vec: vec![] }; let total_region_count = regions.len(); @@ -949,7 +952,8 @@ impl, E: KvEngine> Endpoint { } resolved_regions.push(region_id, resolved_ts.min()); // The judge of raw region is not accuracy here, and we may miss at most one - // "normal" raw region. But this will not break the correctness of outlier detection. + // "normal" raw region. But this will not break the correctness of outlier + // detection. if resolved_ts.is_min_ts_from_raw() { raw_resolved_regions.push(region_id, resolved_ts.raw_ts) } @@ -997,7 +1001,8 @@ impl, E: KvEngine> Endpoint { self.broadcast_resolved_ts(outlier_min_resolved_ts, outlier_regions); self.broadcast_resolved_ts(normal_min_resolved_ts, normal_regions); - // rawkv only, if user does not use rawkv apiv2, raw_resolved_regions should be empty. + // rawkv only, if user does not use rawkv apiv2, raw_resolved_regions should be + // empty. self.handle_raw_outlier_regions(&mut raw_resolved_regions, min_ts); } @@ -1124,8 +1129,8 @@ impl, E: KvEngine> Endpoint { let mut min_ts = min_ts_pd; let mut min_ts_min_lock = min_ts_pd; - // Sync with concurrency manager so that it can work correctly when optimizations - // like async commit is enabled. + // Sync with concurrency manager so that it can work correctly when + // optimizations like async commit is enabled. // Note: This step must be done before scheduling `Task::MinTS` task, and the // resolver must be checked in or after `Task::MinTS`' execution. cm.update_max_ts(min_ts); @@ -1707,7 +1712,8 @@ mod tests { let mut updated_cfg = cfg.clone(); { // Update it to be smaller than incremental_scan_threads, - // which will be an invalid change and will modified to incremental_scan_threads. + // which will be an invalid change and will modified to + // incremental_scan_threads. updated_cfg.incremental_scan_concurrency = 2; } let diff = cfg.diff(&updated_cfg); @@ -2104,7 +2110,8 @@ mod tests { suite.run(Task::RawTrackTs { region_id, ts }); } let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); - // region is not ready, so raw lock in resolver, raw ts is added to delegate.pending. + // region is not ready, so raw lock in resolver, raw ts is added to + // delegate.pending. assert_eq!(delegate.resolver.is_none(), true); // Schedule resolver ready (resolver is built by conn a). let mut region = Region::default(); @@ -2219,7 +2226,7 @@ mod tests { .recv_timeout(Duration::from_millis(500)) .unwrap() .unwrap(); - assert_matches!(task_recv, + assert_matches!(task_recv, Task::Deregister(Deregister::Delegate {region_id, observe_id, ..}) if region_id == dead_lock_region && observe_id == ob_id); let gap = Duration::from_millis(cur_tso.physical() - dead_lock_ts.physical()).as_secs_f64(); @@ -2873,7 +2880,8 @@ mod tests { .is_none(), true ); - // count become 1002, boundary: 2248 - 3 * 502 = 742, but ts gap is not larger than 60s. + // count become 1002, boundary: 2248 - 3 * 502 = 742, but ts gap is not larger + // than 60s. region_vec2.push(741, TimeStamp::compose(741, 0)); assert_eq!( region_vec2 diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 28b7e5f5d0a..3be509e73d0 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -305,8 +305,9 @@ impl Initializer { Ok(()) } - // It's extracted from `Initializer::scan_batch` to avoid becoming an asynchronous block, - // so that we can limit scan speed based on the thread disk I/O or RocksDB block read bytes. + // It's extracted from `Initializer::scan_batch` to avoid becoming an + // asynchronous block, so that we can limit scan speed based on the thread + // disk I/O or RocksDB block read bytes. fn do_scan( &self, scanner: &mut Scanner, @@ -472,10 +473,10 @@ impl Initializer { pub(crate) fn deregister_downstream(&self, err: Error) { let deregister = if self.build_resolver || err.has_region_error() { // Deregister delegate on the conditions, - // * It fails to build a resolver. A delegate requires a resolver - // to advance resolved ts. - // * A region error. It usually mean a peer is not leader or - // a leader meets an error and can not serve. + // * It fails to build a resolver. A delegate requires a resolver to advance + // resolved ts. + // * A region error. It usually mean a peer is not leader or a leader meets an + // error and can not serve. Deregister::Delegate { region_id: self.region_id, observe_id: self.observe_id, diff --git a/components/cdc/src/metrics.rs b/components/cdc/src/metrics.rs index 969e3b371a4..5db91572112 100644 --- a/components/cdc/src/metrics.rs +++ b/components/cdc/src/metrics.rs @@ -8,9 +8,9 @@ use prometheus::*; use prometheus_static_metric::*; use tikv::storage::Statistics; -/// Installing a new capture contains 2 phases, one for incremental scanning and one for -/// fetching delta changes from raftstore. They can share some similar metrics, in which -/// case we can use this tag to distinct them. +/// Installing a new capture contains 2 phases, one for incremental scanning and +/// one for fetching delta changes from raftstore. They can share some similar +/// metrics, in which case we can use this tag to distinct them. pub const TAG_DELTA_CHANGE: &str = "delta_change"; pub const TAG_INCREMENTAL_SCAN: &str = "incremental_scan"; diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index 6c0771cbc64..18b4d995077 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -45,8 +45,8 @@ impl CdcObserver { } pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { - // use 0 as the priority of the cmd observer. CDC should have a higher priority than - // the `resolved-ts`'s cmd observer + // use 0 as the priority of the cmd observer. CDC should have a higher priority + // than the `resolved-ts`'s cmd observer coprocessor_host .registry .register_cmd_observer(0, BoxCmdObserver::new(self.clone())); @@ -96,7 +96,8 @@ impl CdcObserver { impl Coprocessor for CdcObserver {} impl CmdObserver for CdcObserver { - // `CdcObserver::on_flush_applied_cmd_batch` should only invoke if `cmd_batches` is not empty + // `CdcObserver::on_flush_applied_cmd_batch` should only invoke if `cmd_batches` + // is not empty fn on_flush_applied_cmd_batch( &self, max_level: ObserveLevel, @@ -119,7 +120,8 @@ impl CmdObserver for CdcObserver { let mut region = Region::default(); region.mut_peers().push(Peer::default()); // Create a snapshot here for preventing the old value was GC-ed. - // TODO: only need it after enabling old value, may add a flag to indicate whether to get it. + // TODO: only need it after enabling old value, may add a flag to indicate + // whether to get it. let snapshot = RegionSnapshot::from_snapshot(Arc::new(engine.snapshot()), Arc::new(region)); let get_old_value = move |key, query_ts, diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index caf3060591e..89f78f694c3 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -104,8 +104,8 @@ impl OldValueCache { } } -/// Fetch old value for `key`. If it can't be found in `old_value_cache`, seek and retrieve it with -/// `query_ts` from `snapshot`. +/// Fetch old value for `key`. If it can't be found in `old_value_cache`, seek +/// and retrieve it with `query_ts` from `snapshot`. pub fn get_old_value( snapshot: &S, key: Key, @@ -171,9 +171,10 @@ pub fn new_old_value_cursor(snapshot: &S, cf: &'static str) - /// Gets the latest value to the key with an older or equal version. /// -/// The key passed in should be a key with a timestamp. This function will returns -/// the latest value of the entry if the user key is the same to the given key and -/// the timestamp is older than or equal to the timestamp in the given key. +/// The key passed in should be a key with a timestamp. This function will +/// returns the latest value of the entry if the user key is the same to the +/// given key and the timestamp is older than or equal to the timestamp in the +/// given key. /// /// `load_from_cf_data` indicates how to get value from `CF_DEFAULT`. pub fn near_seek_old_value( diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index a38c3988bcc..2e9375ce6a5 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -306,7 +306,8 @@ fn do_test_no_resolved_ts_before_downstream_initialized(version: &str) { } let th = thread::spawn(move || { - // The first downstream can receive timestamps but the second should receive nothing. + // The first downstream can receive timestamps but the second should receive + // nothing. let mut rx = event_feeds[0].replace(None).unwrap(); assert!(recv_timeout(&mut rx, Duration::from_secs(1)).is_ok()); let mut rx = event_feeds[1].replace(None).unwrap(); @@ -318,11 +319,11 @@ fn do_test_no_resolved_ts_before_downstream_initialized(version: &str) { suite.stop(); } -// When a new CDC downstream is installed, delta changes for other downstreams on the same -// region should be flushed so that the new downstream can gets a fresh snapshot to performs -// a incremental scan. CDC can ensure that those delta changes are sent to CDC's `Endpoint` -// before the incremental scan, but `Sink` may break this rule. This case tests it won't -// happen any more. +// When a new CDC downstream is installed, delta changes for other downstreams +// on the same region should be flushed so that the new downstream can gets a +// fresh snapshot to performs a incremental scan. CDC can ensure that those +// delta changes are sent to CDC's `Endpoint` before the incremental scan, but +// `Sink` may break this rule. This case tests it won't happen any more. #[test] fn test_cdc_observed_before_incremental_scan_snapshot() { let cluster = new_server_cluster(0, 1); @@ -331,7 +332,8 @@ fn test_cdc_observed_before_incremental_scan_snapshot() { let region = suite.cluster.get_region(b""); let lead_client = PeerClient::new(&suite.cluster, region.id, new_peer(1, 1)); - // So that the second changefeed can get some delta changes elder than its snapshot. + // So that the second changefeed can get some delta changes elder than its + // snapshot. let (mut req_tx_0, event_feed_0, _) = new_event_feed(suite.get_region_cdc_client(region.id)); let req_0 = suite.new_changedata_request(region.id); block_on(req_tx_0.send((req_0, WriteFlags::default()))).unwrap(); diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 5f9f9bf7209..3be68c5905c 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -1177,7 +1177,8 @@ fn test_old_value_multi_changefeeds_impl() { } } - // The downstream 2 can also get old values because `req`.`extra_op` field is ignored now. + // The downstream 2 can also get old values because `req`.`extra_op` field is + // ignored now. event_count = 0; loop { let events = receive_event_2(false).events.to_vec(); @@ -1285,9 +1286,9 @@ fn test_cdc_resolve_ts_checking_concurrency_manager_impl() { } let _guard = lock_key(b"xa", 90); - // The resolved_ts should be blocked by the mem lock but it's already greater than 90. - // Retry until receiving an unchanged resolved_ts because the first several resolved ts received - // might be updated before acquiring the lock. + // The resolved_ts should be blocked by the mem lock but it's already greater + // than 90. Retry until receiving an unchanged resolved_ts because the first + // several resolved ts received might be updated before acquiring the lock. let mut last_resolved_ts = 0; let mut success = false; for _ in 0..5 { @@ -1840,9 +1841,10 @@ fn test_cdc_scan_ignore_gc_fence_impl() { let commit_ts2 = block_on(suite.cluster.pd_client.get_tso()).unwrap(); suite.must_kv_commit(1, vec![key.to_vec()], start_ts2, commit_ts2); - // Assume the first version above is written by async commit and it's commit_ts is not unique. - // Use it's commit_ts as another transaction's start_ts. - // Run check_txn_status on commit_ts1 so that gc_fence will be set on the first version. + // Assume the first version above is written by async commit and it's commit_ts + // is not unique. Use it's commit_ts as another transaction's start_ts. + // Run check_txn_status on commit_ts1 so that gc_fence will be set on the first + // version. let caller_start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); let action = suite.must_check_txn_status( 1, @@ -1940,9 +1942,10 @@ fn test_cdc_extract_rollback_if_gc_fence_set_impl() { let commit_ts2 = block_on(suite.cluster.pd_client.get_tso()).unwrap(); suite.must_kv_commit(1, vec![key.to_vec()], start_ts2, commit_ts2); - // We don't care about the events caused by the previous writings in this test case, and it's - // too complicated to check them. Just skip them here, and wait for resolved_ts to be pushed to - // a greater value than the two versions' commit_ts-es. + // We don't care about the events caused by the previous writings in this test + // case, and it's too complicated to check them. Just skip them here, and + // wait for resolved_ts to be pushed to a greater value than the two + // versions' commit_ts-es. let skip_to_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); loop { let e = receive_event(true); @@ -1953,9 +1956,10 @@ fn test_cdc_extract_rollback_if_gc_fence_set_impl() { } } - // Assume the two versions of the key are written by async commit transactions, and their - // commit_ts-es are also other transaction's start_ts-es. Run check_txn_status on the - // commit_ts-es of the two versions to cause overlapping rollback. + // Assume the two versions of the key are written by async commit transactions, + // and their commit_ts-es are also other transaction's start_ts-es. Run + // check_txn_status on the commit_ts-es of the two versions to cause + // overlapping rollback. let caller_start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); suite.must_check_txn_status( 1, @@ -2007,9 +2011,9 @@ fn test_cdc_extract_rollback_if_gc_fence_set_impl() { other => panic!("unknown event {:?}", other), }); - // In some special cases, a newly committed record may carry an overlapped rollback initially. - // In this case, gc_fence shouldn't be set, and CDC ignores the rollback and handles the - // committing normally. + // In some special cases, a newly committed record may carry an overlapped + // rollback initially. In this case, gc_fence shouldn't be set, and CDC + // ignores the rollback and handles the committing normally. let start_ts3 = block_on(suite.cluster.pd_client.get_tso()).unwrap(); let mut mutation = Mutation::default(); mutation.set_op(Op::Put); @@ -2031,11 +2035,11 @@ fn test_cdc_extract_rollback_if_gc_fence_set_impl() { other => panic!("unknown event {:?}", other), }); - // Again, assume the transaction is committed with async commit protocol, and the commit_ts is - // also another transaction's start_ts. + // Again, assume the transaction is committed with async commit protocol, and + // the commit_ts is also another transaction's start_ts. let commit_ts3 = block_on(suite.cluster.pd_client.get_tso()).unwrap(); - // Rollback another transaction before committing, then the rolling back information will be - // recorded in the lock. + // Rollback another transaction before committing, then the rolling back + // information will be recorded in the lock. let caller_start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); suite.must_check_txn_status( 1, @@ -2082,10 +2086,11 @@ fn test_cdc_extract_rollback_if_gc_fence_set_impl() { suite.stop(); } -// This test is created for covering the case that term was increased without leader change. -// Ideally leader id and term in StoreMeta should be updated together with a yielded SoftState, -// but sometimes the leader was transferred to another store and then changed back, -// a follower would not get a new SoftState. +// This test is created for covering the case that term was increased without +// leader change. Ideally leader id and term in StoreMeta should be updated +// together with a yielded SoftState, but sometimes the leader was transferred +// to another store and then changed back, a follower would not get a new +// SoftState. #[test] fn test_term_change() { let cluster = new_server_cluster(0, 3); diff --git a/components/cloud/aws/src/kms.rs b/components/cloud/aws/src/kms.rs index 11ecf88ddd9..3d5d6a3fdea 100644 --- a/components/cloud/aws/src/kms.rs +++ b/components/cloud/aws/src/kms.rs @@ -82,8 +82,8 @@ impl KmsProvider for AwsKms { ENCRYPTION_VENDOR_NAME_AWS_KMS } - // On decrypt failure, the rule is to return WrongMasterKey error in case it is possible that - // a wrong master key has been used, or other error otherwise. + // On decrypt failure, the rule is to return WrongMasterKey error in case it is + // possible that a wrong master key has been used, or other error otherwise. async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result> { let decrypt_request = DecryptRequest { ciphertext_blob: bytes::Bytes::copy_from_slice(&*data_key), @@ -125,8 +125,8 @@ impl KmsProvider for AwsKms { } } -// Rusoto errors Display implementation just gives the cause message and discards the type. -// This is really bad when the cause message is empty! +// Rusoto errors Display implementation just gives the cause message and +// discards the type. This is really bad when the cause message is empty! // Use Debug instead: this will show both pub struct FixRusotoErrorDisplay( RusotoError, diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index e2e9919860b..fd5c07c5097 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -259,8 +259,9 @@ impl From> for UploadError { } /// try_read_exact tries to read exact length data as the buffer size. -/// like [`std::io::Read::read_exact`], but won't return `UnexpectedEof` when cannot read anything more from the `Read`. -/// once returning a size less than the buffer length, implies a EOF was meet, or nothing readed. +/// like [`std::io::Read::read_exact`], but won't return `UnexpectedEof` when +/// cannot read anything more from the `Read`. once returning a size less than +/// the buffer length, implies a EOF was meet, or nothing read. async fn try_read_exact( r: &mut R, buf: &mut [u8], @@ -283,7 +284,8 @@ async fn try_read_exact( const MINIMUM_PART_SIZE: usize = 5 * 1024 * 1024; impl<'client> S3Uploader<'client> { - /// Creates a new uploader with a given target location and upload configuration. + /// Creates a new uploader with a given target location and upload + /// configuration. fn new(client: &'client S3Client, config: &Config, key: String) -> Self { Self { client, @@ -370,7 +372,8 @@ impl<'client> S3Uploader<'client> { } } - /// Completes a multipart upload process, asking S3 to join all parts into a single file. + /// Completes a multipart upload process, asking S3 to join all parts into a + /// single file. async fn complete(&self) -> Result<(), RusotoError> { let res = timeout( Self::get_timeout(), @@ -452,8 +455,8 @@ impl<'client> S3Uploader<'client> { /// Uploads a file atomically. /// - /// This should be used only when the data is known to be short, and thus relatively cheap to - /// retry the entire upload. + /// This should be used only when the data is known to be short, and thus + /// relatively cheap to retry the entire upload. async fn upload(&self, data: &[u8]) -> Result<(), RusotoError> { let res = timeout(Self::get_timeout(), async { #[cfg(feature = "failpoints")] @@ -540,9 +543,9 @@ impl BlobStorage for S3Storage { } else { io::ErrorKind::Other }; - // Even we can check whether there is an `io::Error` internal and extract it directly, - // We still need to keep the message 'failed to put object' here for adapting the string-matching based - // retry logic in BR :( + // Even we can check whether there is an `io::Error` internal and extract it + // directly, We still need to keep the message 'failed to put object' here for + // adapting the string-matching based retry logic in BR :( io::Error::new(error_code, format!("failed to put object {}", e)) }) } @@ -628,7 +631,8 @@ mod tests { // set multi_part_size to use upload_part function config.multi_part_size = multi_part_size; - // split magic_contents into 3 parts, so we mock 5 requests here(1 begin + 3 part + 1 complete) + // split magic_contents into 3 parts, so we mock 5 requests here(1 begin + 3 + // part + 1 complete) let dispatcher = MultipleMockRequestDispatcher::new(vec![ MockRequestDispatcher::with_status(200).with_body( r#" @@ -904,7 +908,8 @@ mod tests { use self::try_read_exact; - /// ThrottleRead throttles a `Read` -- make it emits 2 chars for each `read` call. + /// ThrottleRead throttles a `Read` -- make it emits 2 chars for each + /// `read` call. struct ThrottleRead(R); impl Read for ThrottleRead { fn read(&mut self, buf: &mut [u8]) -> io::Result { diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index c322f1d0edc..2d7f2566509 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -247,8 +247,6 @@ impl RetryError for RequestError { const CONNECTION_TIMEOUT: Duration = Duration::from_secs(900); /// A helper for uploading a large file to Azure storage. -/// -/// struct AzureUploader { client_builder: Arc, name: String, @@ -257,7 +255,8 @@ struct AzureUploader { } impl AzureUploader { - /// Creates a new uploader with a given target location and upload configuration. + /// Creates a new uploader with a given target location and upload + /// configuration. fn new(client_builder: Arc, config: &Config, name: String) -> Self { AzureUploader { client_builder, @@ -288,8 +287,8 @@ impl AzureUploader { /// Uploads a file atomically. /// - /// This should be used only when the data is known to be short, and thus relatively cheap to - /// retry the entire upload. + /// This should be used only when the data is known to be short, and thus + /// relatively cheap to retry the entire upload. async fn upload(&self, data: &[u8]) -> Result<(), RequestError> { match timeout(Self::get_timeout(), async { self.client_builder diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index 08ee60a52bf..a3401dbf6c8 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -424,8 +424,8 @@ impl BlobStorage for GCSStorage { ..Default::default() }; - // FIXME: Switch to upload() API so we don't need to read the entire data into memory - // in order to retry. + // FIXME: Switch to upload() API so we don't need to read the entire data into + // memory in order to retry. let mut data = Vec::with_capacity(content_length as usize); reader.read_to_end(&mut data).await?; retry(|| async { @@ -456,7 +456,7 @@ impl BlobStorage for GCSStorage { Ok(oid) => oid, Err(e) => return GCSStorage::error_to_async_read(io::ErrorKind::InvalidInput, e), }; - let request = match Object::download(&oid, None /*optional*/) { + let request = match Object::download(&oid, None /* optional */) { Ok(request) => request.map(|_: io::Empty| Body::empty()), Err(e) => return GCSStorage::error_to_async_read(io::ErrorKind::Other, e), }; diff --git a/components/cloud/src/blob.rs b/components/cloud/src/blob.rs index 4685b5ae851..2e38097e385 100644 --- a/components/cloud/src/blob.rs +++ b/components/cloud/src/blob.rs @@ -15,7 +15,8 @@ pub trait BlobConfig: 'static + Send + Sync { /// It is identity to [external_storage::UnpinReader], /// only for decoupling external_storage and cloud package. /// -/// See the documentation of [external_storage::UnpinReader] for why those wrappers exists. +/// See the documentation of [external_storage::UnpinReader] for why those +/// wrappers exists. pub struct PutResource(pub Box); impl AsyncRead for PutResource { diff --git a/components/codec/src/buffer.rs b/components/codec/src/buffer.rs index e19e66b91e1..4010ecdf04f 100644 --- a/components/codec/src/buffer.rs +++ b/components/codec/src/buffer.rs @@ -23,11 +23,13 @@ pub trait BufferReader { /// TODO: We should make the panic behaviour deterministic. fn advance(&mut self, count: usize); - /// Read next several bytes as a slice and advance the position of internal cursor. + /// Read next several bytes as a slice and advance the position of internal + /// cursor. /// /// # Errors /// - /// Returns `Error::Io` if there is not enough space to read specified number of bytes. + /// Returns `Error::Io` if there is not enough space to read specified + /// number of bytes. fn read_bytes(&mut self, count: usize) -> Result<&[u8]>; } @@ -129,14 +131,16 @@ pub trait BufferWriter { /// The caller may hint the underlying buffer to grow according to `size` /// if the underlying buffer is dynamically sized (i.e. is capable to grow). /// - /// The size of the returned slice may be less than `size` given. For example, - /// when underlying buffer is fixed sized and there is no enough space any more. + /// The size of the returned slice may be less than `size` given. For + /// example, when underlying buffer is fixed sized and there is no + /// enough space any more. /// /// # Safety /// - /// The returned mutable slice is for writing only and should be never used for - /// reading since it might contain uninitialized memory when underlying buffer - /// is dynamically sized. For this reason, this function is marked `unsafe`. + /// The returned mutable slice is for writing only and should be never used + /// for reading since it might contain uninitialized memory when + /// underlying buffer is dynamically sized. For this reason, this + /// function is marked `unsafe`. unsafe fn bytes_mut(&mut self, size: usize) -> &mut [u8]; /// Advances the position of internal cursor for a previous write. @@ -490,7 +494,6 @@ mod tests { let mut buffer = base.clone(); let mut buf_slice = buffer.as_mut_slice(); - // let buffer_viewer = std::slice::from_raw_parts(buffer as *const u8, buffer.len()); buf_slice.bytes_mut(13)[..13].clone_from_slice(&base_write[0..13]); assert_eq!(&buf_slice[0..13], &base_write[0..13]); @@ -584,8 +587,8 @@ mod tests { } } - /// Test whether it is safe to store values in `Vec` after `len()`, i.e. during - /// reallocation these values are copied. + /// Test whether it is safe to store values in `Vec` after `len()`, + /// i.e. during reallocation these values are copied. #[test] // FIXME(#4331) Don't ignore this test. #[ignore] @@ -632,7 +635,6 @@ mod tests { // Re-allocate the vector space and ensure that the address is changed. vec.reserve(::std::cmp::max(payload_len * 3, 32)); - //assert_ne!(vec_ptr, vec.as_ptr()); if vec_ptr == vec.as_ptr() { in_place_reallocs += 1; } diff --git a/components/codec/src/byte.rs b/components/codec/src/byte.rs index 53b8091ac8c..63143938c13 100644 --- a/components/codec/src/byte.rs +++ b/components/codec/src/byte.rs @@ -21,9 +21,9 @@ impl MemComparableByteCodec { (src_len / MEMCMP_GROUP_SIZE + 1) * (MEMCMP_GROUP_SIZE + 1) } - /// Gets the length of the first encoded byte sequence in the given buffer, which is encoded in - /// the memory-comparable format. If the buffer is not complete, the length of buffer will be - /// returned. + /// Gets the length of the first encoded byte sequence in the given buffer, + /// which is encoded in the memory-comparable format. If the buffer is + /// not complete, the length of buffer will be returned. #[inline] fn get_first_encoded_len_internal(encoded: &[u8]) -> usize { let mut idx = MEMCMP_GROUP_SIZE; @@ -39,23 +39,25 @@ impl MemComparableByteCodec { } } - /// Gets the length of the first encoded byte sequence in the given buffer, which is encoded in - /// the ascending memory-comparable format. + /// Gets the length of the first encoded byte sequence in the given buffer, + /// which is encoded in the ascending memory-comparable format. pub fn get_first_encoded_len(encoded: &[u8]) -> usize { Self::get_first_encoded_len_internal::(encoded) } - /// Gets the length of the first encoded byte sequence in the given buffer, which is encoded in - /// the descending memory-comparable format. + /// Gets the length of the first encoded byte sequence in the given buffer, + /// which is encoded in the descending memory-comparable format. pub fn get_first_encoded_len_desc(encoded: &[u8]) -> usize { Self::get_first_encoded_len_internal::(encoded) } - /// Encodes all bytes in the `src` into `dest` in ascending memory-comparable format. + /// Encodes all bytes in the `src` into `dest` in ascending + /// memory-comparable format. /// /// Returns the number of bytes encoded. /// - /// `dest` must not overlaps `src`, otherwise encoded results will be incorrect. + /// `dest` must not overlaps `src`, otherwise encoded results will be + /// incorrect. /// /// # Panics /// @@ -99,7 +101,8 @@ impl MemComparableByteCodec { } } - /// Encodes the bytes `src[..len]` in ascending memory-comparable format in place. + /// Encodes the bytes `src[..len]` in ascending memory-comparable format in + /// place. /// /// Returns the number of bytes encoded. /// @@ -159,11 +162,13 @@ impl MemComparableByteCodec { } } - /// Encodes all bytes in the `src` into `dest` in descending memory-comparable format. + /// Encodes all bytes in the `src` into `dest` in descending + /// memory-comparable format. /// /// Returns the number of bytes encoded. /// - /// `dest` must not overlaps `src`, otherwise encoded results will be incorrect. + /// `dest` must not overlaps `src`, otherwise encoded results will be + /// incorrect. /// /// # Panics /// @@ -176,7 +181,8 @@ impl MemComparableByteCodec { encoded_len } - /// Encodes the bytes `src[..len]` in descending memory-comparable format in place. + /// Encodes the bytes `src[..len]` in descending memory-comparable format in + /// place. /// /// Returns the number of bytes encoded. /// @@ -189,21 +195,25 @@ impl MemComparableByteCodec { encoded_len } - /// Decodes bytes in ascending memory-comparable format in the `src` into `dest`. + /// Decodes bytes in ascending memory-comparable format in the `src` into + /// `dest`. /// - /// If there are multiple encoded byte slices in `src`, only the first one will be decoded. + /// If there are multiple encoded byte slices in `src`, only the first one + /// will be decoded. /// - /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number of bytes read in - /// `src` and `written_bytes` is the number of bytes written in `dest`. + /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number + /// of bytes read in `src` and `written_bytes` is the number of bytes + /// written in `dest`. /// - /// Note that actual written data may be larger than `written_bytes`. Bytes more than - /// `written_bytes` are junk and should be ignored. + /// Note that actual written data may be larger than `written_bytes`. Bytes + /// more than `written_bytes` are junk and should be ignored. /// /// If `src == dest`, please use `try_decode_first_in_place`. /// /// # Panics /// - /// Panics if `dest.len() < src.len()`, although actual written data may be less. + /// Panics if `dest.len() < src.len()`, although actual written data may be + /// less. /// /// When there is a panic, `dest` may contain partially written data. /// @@ -223,21 +233,25 @@ impl MemComparableByteCodec { ) } - /// Decodes bytes in descending memory-comparable format in the `src` into `dest`. + /// Decodes bytes in descending memory-comparable format in the `src` into + /// `dest`. /// - /// If there are multiple encoded byte slices in `src`, only the first one will be decoded. + /// If there are multiple encoded byte slices in `src`, only the first one + /// will be decoded. /// - /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number of bytes read in - /// `src` and `written_bytes` is the number of bytes written in `dest`. + /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number + /// of bytes read in `src` and `written_bytes` is the number of bytes + /// written in `dest`. /// - /// Note that actual written data may be larger than `written_bytes`. Bytes more than - /// `written_bytes` are junk and should be ignored. + /// Note that actual written data may be larger than `written_bytes`. Bytes + /// more than `written_bytes` are junk and should be ignored. /// /// If `src == dest`, please use `try_decode_first_in_place_desc`. /// /// # Panics /// - /// Panics if `dest.len() < src.len()`, although actual written data may be less. + /// Panics if `dest.len() < src.len()`, although actual written data may be + /// less. /// /// When there is a panic, `dest` may contain partially written data. /// @@ -259,16 +273,17 @@ impl MemComparableByteCodec { Ok((read_bytes, written_bytes)) } - /// Decodes bytes in ascending memory-comparable format in place, i.e. decoded data will - /// overwrite the encoded data. + /// Decodes bytes in ascending memory-comparable format in place, i.e. + /// decoded data will overwrite the encoded data. /// - /// If there are multiple encoded byte slices in `buffer`, only the first one will be decoded. + /// If there are multiple encoded byte slices in `buffer`, only the first + /// one will be decoded. /// - /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number of bytes read - /// and `written_bytes` is the number of bytes written. + /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number + /// of bytes read and `written_bytes` is the number of bytes written. /// - /// Note that actual written data may be larger than `written_bytes`. Bytes more than - /// `written_bytes` are junk and should be ignored. + /// Note that actual written data may be larger than `written_bytes`. Bytes + /// more than `written_bytes` are junk and should be ignored. /// /// # Errors /// @@ -286,16 +301,17 @@ impl MemComparableByteCodec { ) } - /// Decodes bytes in descending memory-comparable format in place, i.e. decoded data will - /// overwrite the encoded data. + /// Decodes bytes in descending memory-comparable format in place, i.e. + /// decoded data will overwrite the encoded data. /// - /// If there are multiple encoded byte slices in `buffer`, only the first one will be decoded. + /// If there are multiple encoded byte slices in `buffer`, only the first + /// one will be decoded. /// - /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number of bytes read - /// and `written_bytes` is the number of bytes written. + /// Returns `(read_bytes, written_bytes)` where `read_bytes` is the number + /// of bytes read and `written_bytes` is the number of bytes written. /// - /// Note that actual written data may be larger than `written_bytes`. Bytes more than - /// `written_bytes` are junk and should be ignored. + /// Note that actual written data may be larger than `written_bytes`. Bytes + /// more than `written_bytes` are junk and should be ignored. /// /// # Errors /// @@ -323,10 +339,12 @@ impl MemComparableByteCodec { /// /// This function uses pointers to accept the scenario that `src == dest`. /// - /// This function also uses generics to specialize different code path for ascending and - /// descending decoding, which performs better than inlining a flag. + /// This function also uses generics to specialize different code path for + /// ascending and descending decoding, which performs better than + /// inlining a flag. /// - /// Please refer to `try_decode_first` for the meaning of return values, panics and errors. + /// Please refer to `try_decode_first` for the meaning of return values, + /// panics and errors. #[inline] fn try_decode_first_internal( mut src_ptr: *const u8, @@ -395,7 +413,8 @@ impl MemComparableByteCodec { trait MemComparableCodecHelper { const PADDING: [u8; MEMCMP_GROUP_SIZE]; - /// Given a raw padding size byte, interprets the padding size according to correct order. + /// Given a raw padding size byte, interprets the padding size according to + /// correct order. fn parse_padding_size(raw_marker: u8) -> usize; } @@ -476,8 +495,9 @@ impl MemComparableByteDecoder for T {} pub struct CompactByteCodec; impl CompactByteCodec { - /// Gets the length of the first encoded byte sequence in the given buffer, which is encoded in - /// the compact format. If the buffer is not complete, the length of buffer will be returned. + /// Gets the length of the first encoded byte sequence in the given buffer, + /// which is encoded in the compact format. If the buffer is not complete, + /// the length of buffer will be returned. pub fn get_first_encoded_len(encoded: &[u8]) -> usize { let result = NumberCodec::try_decode_var_i64(encoded); match result { @@ -968,8 +988,9 @@ mod tests { fn test_memcmp_try_decode_first() { use super::MEMCMP_GROUP_SIZE as N; - // We have ensured correctness in `test_memcmp_encode_all`, so we use `encode_all` to - // generate fixtures in different length, used for decoding. + // We have ensured correctness in `test_memcmp_encode_all`, so we use + // `encode_all` to generate fixtures in different length, used for + // decoding. fn do_test( is_desc: bool, diff --git a/components/codec/src/error.rs b/components/codec/src/error.rs index 2483bd541de..b85d8dd078d 100644 --- a/components/codec/src/error.rs +++ b/components/codec/src/error.rs @@ -27,8 +27,7 @@ impl ErrorInner { } } -// ====== The code below is to box the error so that the it can be as small as possible ====== - +// Box the error so that the it can be as small as possible #[derive(Debug, Error)] #[error(transparent)] pub struct Error(#[from] pub Box); diff --git a/components/codec/src/number.rs b/components/codec/src/number.rs index 4cc114e7ea7..af47905334d 100644 --- a/components/codec/src/number.rs +++ b/components/codec/src/number.rs @@ -403,7 +403,8 @@ impl NumberCodec { } /// Encodes an unsigned 64 bit integer `v` to `buf` in VarInt encoding, - /// which is not memory-comparable. Returns the number of bytes that encoded. + /// which is not memory-comparable. Returns the number of bytes that + /// encoded. /// /// Note: VarInt encoding is slow, try avoid using it. /// @@ -429,13 +430,15 @@ impl NumberCodec { } /// Decodes an unsigned 64 bit integer from `buf` in VarInt encoding. - /// Returns decoded result and the number of bytes that successfully decoded. + /// Returns decoded result and the number of bytes that successfully + /// decoded. /// /// This function is more efficient when `buf.len() >= 10`. /// /// # Errors /// - /// Returns `Error::Io` if there is not enough space to decode the whole VarInt. + /// Returns `Error::Io` if there is not enough space to decode the whole + /// VarInt. pub fn try_decode_var_u64(buf: &[u8]) -> Result<(u64, usize)> { #[allow(clippy::cast_lossless)] unsafe { @@ -478,7 +481,8 @@ impl NumberCodec { } /// Encodes a signed 64 bit integer `v` to `buf` in VarInt encoding, - /// which is not memory-comparable. Returns the number of bytes that encoded. + /// which is not memory-comparable. Returns the number of bytes that + /// encoded. /// /// Note: VarInt encoding is slow, try avoid using it. /// @@ -495,13 +499,15 @@ impl NumberCodec { } /// Decodes a signed 64 bit integer from `buf` in VarInt encoding. - /// Returns decoded result and the number of bytes that successfully decoded. + /// Returns decoded result and the number of bytes that successfully + /// decoded. /// /// This function is more efficient when `buf.len() >= 10`. /// /// # Errors /// - /// Returns `Error::Io` if there is not enough space to decode the whole VarInt. + /// Returns `Error::Io` if there is not enough space to decode the whole + /// VarInt. #[inline] pub fn try_decode_var_i64(buf: &[u8]) -> Result<(i64, usize)> { let (uv, decoded_bytes) = Self::try_decode_var_u64(buf)?; @@ -514,8 +520,8 @@ impl NumberCodec { } } - /// Gets the length of the first encoded VarInt in the given buffer. If the buffer is not - /// complete, the length of buffer will be returned. + /// Gets the length of the first encoded VarInt in the given buffer. If the + /// buffer is not complete, the length of buffer will be returned. /// /// This function is more efficient when `buf.len() >= 10`. pub fn get_first_encoded_var_int_len(buf: &[u8]) -> usize { @@ -761,7 +767,8 @@ pub trait NumberDecoder: BufferReader { /// /// # Errors /// - /// Returns `Error::Io` if there is not enough space to decode the whole VarInt. + /// Returns `Error::Io` if there is not enough space to decode the whole + /// VarInt. #[inline] fn read_var_u64(&mut self) -> Result { let (v, decoded_bytes) = { @@ -779,7 +786,8 @@ pub trait NumberDecoder: BufferReader { /// /// # Errors /// - /// Returns `Error::Io` if there is not enough space to decode the whole VarInt. + /// Returns `Error::Io` if there is not enough space to decode the whole + /// VarInt. #[inline] fn read_var_i64(&mut self) -> Result { let (v, decoded_bytes) = { @@ -1015,11 +1023,13 @@ pub trait NumberEncoder: BufferWriter { } /// Writes an unsigned 64 bit integer `v` in VarInt encoding, - /// which is not memory-comparable. Returns the number of bytes that encoded. + /// which is not memory-comparable. Returns the number of bytes that + /// encoded. /// /// Note: /// - VarInt encoding is slow, try avoid using it. - /// - The buffer must reserve 10 bytes for writing, although actual written bytes may be less. + /// - The buffer must reserve 10 bytes for writing, although actual written + /// bytes may be less. /// - The buffer will be advanced by actual written bytes. /// /// # Errors @@ -1039,11 +1049,13 @@ pub trait NumberEncoder: BufferWriter { } /// Writes a signed 64 bit integer `v` in VarInt encoding, - /// which is not memory-comparable. Returns the number of bytes that encoded. + /// which is not memory-comparable. Returns the number of bytes that + /// encoded. /// /// Note: /// - VarInt encoding is slow, try avoid using it. - /// - The buffer must reserve 10 bytes for writing, although actual written bytes may be less. + /// - The buffer must reserve 10 bytes for writing, although actual written + /// bytes may be less. /// - The buffer will be advanced by actual written bytes. /// /// # Errors @@ -1818,7 +1830,8 @@ mod benches { use crate::ErrorInner; - /// Encode u64 little endian using `NumberCodec` and store position in extra variable. + /// Encode u64 little endian using `NumberCodec` and store position in extra + /// variable. #[bench] fn bench_encode_u64_le_number_codec(b: &mut test::Bencher) { let mut buf: [u8; 10] = [0; 10]; @@ -1834,7 +1847,8 @@ mod benches { }); } - /// Encode u64 little endian using `byteorder::WriteBytesExt` over a `Cursor<&mut [u8]>`. + /// Encode u64 little endian using `byteorder::WriteBytesExt` over a + /// `Cursor<&mut [u8]>`. #[bench] fn bench_encode_u64_le_byteorder(b: &mut test::Bencher) { use byteorder::WriteBytesExt; @@ -1852,7 +1866,8 @@ mod benches { }); } - /// Encode u64 little endian using `NumberEncoder` over a `Cursor<&mut [u8]>`. + /// Encode u64 little endian using `NumberEncoder` over a `Cursor<&mut + /// [u8]>`. #[bench] fn bench_encode_u64_le_buffer_encoder_slice(b: &mut test::Bencher) { use super::NumberEncoder; @@ -1881,7 +1896,8 @@ mod benches { }); } - /// Decode u64 little endian using `NumberCodec` and store position in extra variable. + /// Decode u64 little endian using `NumberCodec` and store position in extra + /// variable. #[bench] fn bench_decode_u64_le_number_codec(b: &mut test::Bencher) { let buf: [u8; 10] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; @@ -1894,7 +1910,8 @@ mod benches { }); } - /// Decode u64 little endian using `NumberCodec` and store position via slice index. + /// Decode u64 little endian using `NumberCodec` and store position via + /// slice index. #[bench] fn bench_decode_u64_le_number_codec_over_slice(b: &mut test::Bencher) { let buf: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; @@ -1907,7 +1924,8 @@ mod benches { }); } - /// Decode u64 little endian using `byteorder::ReadBytesExt` over a `Cursor<&[u8]>`. + /// Decode u64 little endian using `byteorder::ReadBytesExt` over a + /// `Cursor<&[u8]>`. #[bench] fn bench_decode_u64_le_byteorder(b: &mut test::Bencher) { use byteorder::ReadBytesExt; diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index 7865f43fc78..b80501b5433 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -58,8 +58,8 @@ impl ConcurrencyManager { } } - /// Acquires a mutex of the key and returns an RAII guard. When the guard goes - /// out of scope, the mutex will be unlocked. + /// Acquires a mutex of the key and returns an RAII guard. When the guard + /// goes out of scope, the mutex will be unlocked. /// /// The guard can be used to store Lock in the table. The stored lock /// is visible to `read_key_check` and `read_range_check`. @@ -67,8 +67,8 @@ impl ConcurrencyManager { self.lock_table.lock_key(key).await } - /// Acquires mutexes of the keys and returns the RAII guards. The order of the - /// guards is the same with the given keys. + /// Acquires mutexes of the keys and returns the RAII guards. The order of + /// the guards is the same with the given keys. /// /// The guards can be used to store Lock in the table. The stored lock /// is visible to `read_key_check` and `read_range_check`. diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index 2b9e87f8f39..da08d9983d1 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -33,12 +33,13 @@ impl LockTable { let entry = self.0.get_or_insert(key.clone(), weak); if entry.value().ptr_eq(&weak2) { // If the weak ptr returned by `get_or_insert` equals to the one we inserted, - // `guard` refers to the KeyHandle in the lock table. Now, we can bind the handle - // to the table. + // `guard` refers to the KeyHandle in the lock table. Now, we can bind the + // handle to the table. - // SAFETY: The `table` field in `KeyHandle` is only accessed through the `set_table` - // or the `drop` method. It's impossible to have a concurrent `drop` here and `set_table` - // is only called here. So there is no concurrent access to the `table` field in `KeyHandle`. + // SAFETY: The `table` field in `KeyHandle` is only accessed through the + // `set_table` or the `drop` method. It's impossible to have a concurrent `drop` + // here and `set_table` is only called here. So there is no concurrent access to + // the `table` field in `KeyHandle`. unsafe { guard.handle().set_table(self.clone()); } diff --git a/components/concurrency_manager/tests/memory_usage.rs b/components/concurrency_manager/tests/memory_usage.rs index b3b62ab5849..34ce9986a61 100644 --- a/components/concurrency_manager/tests/memory_usage.rs +++ b/components/concurrency_manager/tests/memory_usage.rs @@ -11,7 +11,8 @@ use rand::prelude::*; use txn_types::{Key, Lock, LockType}; // This test is heavy so we shouldn't run it daily. -// Run it with the following command (recommending release mode) and see the printed stats: +// Run it with the following command (recommending release mode) and see the +// printed stats: // // ``` // cargo test --package concurrency_manager --test memory_usage --features jemalloc --release -- test_memory_usage --exact --ignored --nocapture diff --git a/components/coprocessor_plugin_api/src/allocator.rs b/components/coprocessor_plugin_api/src/allocator.rs index 7d7140b6170..d8c2ab5062f 100644 --- a/components/coprocessor_plugin_api/src/allocator.rs +++ b/components/coprocessor_plugin_api/src/allocator.rs @@ -9,8 +9,8 @@ type DeallocFn = unsafe fn(*mut u8, Layout); /// Used to initialize the plugin's allocator. /// -/// A `HostAllocatorPtr` contains the relevant pointers to initialize the allocator of -/// to plugin. It will be passed from TiKV to the plugin. +/// A `HostAllocatorPtr` contains the relevant pointers to initialize the +/// allocator of to plugin. It will be passed from TiKV to the plugin. #[repr(C)] pub struct HostAllocatorPtr { pub alloc_fn: AllocFn, @@ -26,8 +26,9 @@ pub struct HostAllocator { impl HostAllocator { /// Creates a new [`HostAllocator`]. /// - /// The internal function pointers are initially `None`, so any attempt to allocate memory - /// before a call to [`set_allocator()`] will result in a panic. + /// The internal function pointers are initially `None`, so any attempt to + /// allocate memory before a call to [`set_allocator()`] will result in + /// a panic. pub const fn new() -> Self { HostAllocator { alloc_fn: Atomic::new(None), @@ -35,9 +36,10 @@ impl HostAllocator { } } - /// Updates the function pointers of the [`HostAllocator`] to the given [`HostAllocatorPtr`]. - /// This function needs to be called before _any_ allocation with this allocator is performed, - /// because otherwise the [`HostAllocator`] is in an invalid state. + /// Updates the function pointers of the [`HostAllocator`] to the given + /// [`HostAllocatorPtr`]. This function needs to be called before _any_ + /// allocation with this allocator is performed, because otherwise the + /// [`HostAllocator`] is in an invalid state. pub fn set_allocator(&self, allocator: HostAllocatorPtr) { self.alloc_fn .store(Some(allocator.alloc_fn), Ordering::SeqCst); diff --git a/components/coprocessor_plugin_api/src/errors.rs b/components/coprocessor_plugin_api/src/errors.rs index 7085fa98edd..78961d60df8 100644 --- a/components/coprocessor_plugin_api/src/errors.rs +++ b/components/coprocessor_plugin_api/src/errors.rs @@ -9,9 +9,10 @@ pub type PluginResult = std::result::Result; /// Error returned by operations on [`RawStorage`]. /// -/// If a plugin wants to return a custom error, e.g. an error in the business logic, the plugin should -/// return an appropriately encoded error in [`RawResponse`]; in other words, plugins are responsible -/// for their error handling by themselves. +/// If a plugin wants to return a custom error, e.g. an error in the business +/// logic, the plugin should return an appropriately encoded error in +/// [`RawResponse`]; in other words, plugins are responsible for their error +/// handling by themselves. #[derive(Debug)] pub enum PluginError { KeyNotInRegion { @@ -23,11 +24,12 @@ pub enum PluginError { Timeout(Duration), Canceled, - /// Errors that can not be handled by a coprocessor plugin but should instead be returned to the - /// client. + /// Errors that can not be handled by a coprocessor plugin but should + /// instead be returned to the client. /// - /// If such an error appears, plugins can run some cleanup code and return early from the - /// request. The error will be passed to the client and the client might retry the request. + /// If such an error appears, plugins can run some cleanup code and return + /// early from the request. The error will be passed to the client and + /// the client might retry the request. Other(String, Box), } diff --git a/components/coprocessor_plugin_api/src/lib.rs b/components/coprocessor_plugin_api/src/lib.rs index ca61b54c724..7f05840c072 100644 --- a/components/coprocessor_plugin_api/src/lib.rs +++ b/components/coprocessor_plugin_api/src/lib.rs @@ -1,25 +1,30 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -//! This crate contains some necessary types and traits for implementing a custom coprocessor plugin -//! for TiKV. +//! This crate contains some necessary types and traits for implementing a +//! custom coprocessor plugin for TiKV. //! -//! Most notably, if you want to write a custom plugin, your plugin needs to implement the -//! [`CoprocessorPlugin`] trait. The plugin then needs to be compiled to a `dylib`. +//! Most notably, if you want to write a custom plugin, your plugin needs to +//! implement the [`CoprocessorPlugin`] trait. The plugin then needs to be +//! compiled to a `dylib`. //! -//! > Note: Only `dylib` is supported, and not `cdylib` or `staticlib`, because the latter two are -//! > not able to use TiKV's allocator. See also the documentation in [`std::alloc`]. +//! > Note: Only `dylib` is supported, and not `cdylib` or `staticlib`, because +//! > the latter two are +//! > not able to use TiKV's allocator. See also the documentation in +//! > [`std::alloc`]. //! -//! In order to make your plugin callable, you need to declare a constructor with the -//! [`declare_plugin`] macro. +//! In order to make your plugin callable, you need to declare a constructor +//! with the [`declare_plugin`] macro. //! -//! A plugin can interact with the underlying storage via the [`RawStorage`] trait. +//! A plugin can interact with the underlying storage via the [`RawStorage`] +//! trait. //! //! # Example //! //! ```no_run -//! use coprocessor_plugin_api::*; //! use std::ops::Range; //! +//! use coprocessor_plugin_api::*; +//! //! #[derive(Default)] //! struct MyPlugin; //! diff --git a/components/coprocessor_plugin_api/src/plugin_api.rs b/components/coprocessor_plugin_api/src/plugin_api.rs index 31f87f3c822..f31c3f9bab2 100644 --- a/components/coprocessor_plugin_api/src/plugin_api.rs +++ b/components/coprocessor_plugin_api/src/plugin_api.rs @@ -7,31 +7,32 @@ use crate::PluginResult; /// Raw bytes of the request payload from the client to the coprocessor. pub type RawRequest = Vec; -/// The response from the coprocessor encoded as raw bytes that are sent back to the client. +/// The response from the coprocessor encoded as raw bytes that are sent back to +/// the client. pub type RawResponse = Vec; /// A plugin that allows users to execute arbitrary code on TiKV nodes. /// -/// If you want to implement a custom coprocessor plugin for TiKV, your plugin needs to implement -/// the [`CoprocessorPlugin`] trait. +/// If you want to implement a custom coprocessor plugin for TiKV, your plugin +/// needs to implement the [`CoprocessorPlugin`] trait. /// -/// Plugins can run setup code in their constructor and teardown code by implementing -/// [`std::ops::Drop`]. +/// Plugins can run setup code in their constructor and teardown code by +/// implementing [`std::ops::Drop`]. pub trait CoprocessorPlugin: Send + Sync { /// Handles a request to the coprocessor. /// - /// The data in the `request` parameter is exactly the same data that was passed with the - /// `RawCoprocessorRequest` in the `data` field. Each plugin is responsible to properly decode - /// the raw bytes by itself. - /// The same is true for the return parameter of this function. Upon successful completion, the - /// function should return a properly encoded result as raw bytes which is then sent back to - /// the client. + /// The data in the `request` parameter is exactly the same data that was + /// passed with the `RawCoprocessorRequest` in the `data` field. Each + /// plugin is responsible to properly decode the raw bytes by itself. + /// The same is true for the return parameter of this function. Upon + /// successful completion, the function should return a properly encoded + /// result as raw bytes which is then sent back to the client. /// - /// Most of the time, it's a good idea to use Protobuf for encoding/decoding, but in general you - /// can also send raw bytes. + /// Most of the time, it's a good idea to use Protobuf for + /// encoding/decoding, but in general you can also send raw bytes. /// - /// Plugins can read and write data from the underlying [`RawStorage`] via the `storage` - /// parameter. + /// Plugins can read and write data from the underlying [`RawStorage`] via + /// the `storage` parameter. fn on_raw_coprocessor_request( &self, ranges: Vec>, diff --git a/components/coprocessor_plugin_api/src/storage_api.rs b/components/coprocessor_plugin_api/src/storage_api.rs index 3adfa7c4a7e..08c09ca4a48 100644 --- a/components/coprocessor_plugin_api/src/storage_api.rs +++ b/components/coprocessor_plugin_api/src/storage_api.rs @@ -15,38 +15,44 @@ pub type KvPair = (Key, Value); /// Storage access for coprocessor plugins. /// -/// [`RawStorage`] allows coprocessor plugins to interact with TiKV storage on a low level. +/// [`RawStorage`] allows coprocessor plugins to interact with TiKV storage on a +/// low level. /// /// Batch operations should be preferred due to their better performance. #[async_trait(?Send)] pub trait RawStorage { - /// Retrieves the value for a given key from the storage on the current node. - /// Returns [`Option::None`] if the key is not present in the database. + /// Retrieves the value for a given key from the storage on the current + /// node. Returns [`Option::None`] if the key is not present in the + /// database. async fn get(&self, key: Key) -> PluginResult>; - /// Same as [`RawStorage::get()`], but retrieves values for multiple keys at once. + /// Same as [`RawStorage::get()`], but retrieves values for multiple keys at + /// once. async fn batch_get(&self, keys: Vec) -> PluginResult>; - /// Same as [`RawStorage::get()`], but accepts a `key_range` such that values for keys in - /// `[key_range.start, key_range.end)` are retrieved. + /// Same as [`RawStorage::get()`], but accepts a `key_range` such that + /// values for keys in `[key_range.start, key_range.end)` are retrieved. /// The upper bound of the `key_range` is exclusive. async fn scan(&self, key_range: Range) -> PluginResult>; /// Inserts a new key-value pair into the storage on the current node. async fn put(&self, key: Key, value: Value) -> PluginResult<()>; - /// Same as [`RawStorage::put()`], but inserts multiple key-value pairs at once. + /// Same as [`RawStorage::put()`], but inserts multiple key-value pairs at + /// once. async fn batch_put(&self, kv_pairs: Vec) -> PluginResult<()>; - /// Deletes a key-value pair from the storage on the current node given a `key`. - /// Returns [`Result::Ok]` if the key was successfully deleted. + /// Deletes a key-value pair from the storage on the current node given a + /// `key`. Returns [`Result::Ok]` if the key was successfully deleted. async fn delete(&self, key: Key) -> PluginResult<()>; - /// Same as [`RawStorage::delete()`], but deletes multiple key-value pairs at once. + /// Same as [`RawStorage::delete()`], but deletes multiple key-value pairs + /// at once. async fn batch_delete(&self, keys: Vec) -> PluginResult<()>; - /// Same as [`RawStorage::delete()`], but deletes multiple key-values pairs at once - /// given a `key_range`. All records with keys in `[key_range.start, key_range.end)` - /// will be deleted. The upper bound of the `key_range` is exclusive. + /// Same as [`RawStorage::delete()`], but deletes multiple key-values pairs + /// at once given a `key_range`. All records with keys in + /// `[key_range.start, key_range.end)` will be deleted. The upper bound + /// of the `key_range` is exclusive. async fn delete_range(&self, key_range: Range) -> PluginResult<()>; } diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index fd15a26a1c8..816b0d12162 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -2,33 +2,40 @@ use super::{allocator::HostAllocatorPtr, plugin_api::CoprocessorPlugin}; -/// Name of the exported constructor with signature [`PluginConstructorSignature`] for the plugin. +/// Name of the exported constructor with signature +/// [`PluginConstructorSignature`] for the plugin. pub static PLUGIN_CONSTRUCTOR_SYMBOL: &[u8] = b"_plugin_create"; -/// Name of the exported function with signature [`PluginGetBuildInfoSignature`] to get build -/// information about the plugin. +/// Name of the exported function with signature [`PluginGetBuildInfoSignature`] +/// to get build information about the plugin. pub static PLUGIN_GET_BUILD_INFO_SYMBOL: &[u8] = b"_plugin_get_build_info"; -/// Name of the exported function with signature [`PluginGetPluginInfoSignature`] to get some -/// information about the plugin. +/// Name of the exported function with signature +/// [`PluginGetPluginInfoSignature`] to get some information about the plugin. pub static PLUGIN_GET_PLUGIN_INFO_SYMBOL: &[u8] = b"_plugin_get_plugin_info"; -/// Type signature of the exported function with symbol [`PLUGIN_CONSTRUCTOR_SYMBOL`]. +/// Type signature of the exported function with symbol +/// [`PLUGIN_CONSTRUCTOR_SYMBOL`]. pub type PluginConstructorSignature = unsafe fn(host_allocator: HostAllocatorPtr) -> *mut dyn CoprocessorPlugin; -/// Type signature of the exported function with symbol [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. +/// Type signature of the exported function with symbol +/// [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. pub type PluginGetBuildInfoSignature = extern "C" fn() -> BuildInfo; -/// Type signature of the exported function with symbol [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. +/// Type signature of the exported function with symbol +/// [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; -/// Automatically collected build information about the plugin that is exposed from the library. +/// Automatically collected build information about the plugin that is exposed +/// from the library. /// -/// Will be automatically created when using [`declare_plugin!(...)`](declare_plugin) and will be -/// used by TiKV when a plugin is loaded to determine whether there are compilation mismatches. +/// Will be automatically created when using +/// [`declare_plugin!(...)`](declare_plugin) and will be used by TiKV when a +/// plugin is loaded to determine whether there are compilation mismatches. #[repr(C)] #[derive(Debug, Clone, PartialEq, Eq)] pub struct BuildInfo { - /// Version of the [`coprocessor_plugin_api`](crate) crate that was used to compile this plugin. + /// Version of the [`coprocessor_plugin_api`](crate) crate that was used to + /// compile this plugin. pub api_version: &'static str, /// Target triple for which platform this plugin was compiled. pub target: &'static str, @@ -59,11 +66,15 @@ pub struct PluginInfo { /// Declare a plugin for the library so that it can be loaded by TiKV. /// /// The macro has three different versions: -/// * `declare_plugin!(plugin_name, plugin_version, plugin_ctor)` which gives you full control. -/// * `declare_plugin!(plugin_name, plugin_ctor)` automatically fetches the version from `Cargo.toml`. -/// * `declare_plugin!(plugin_ctor)` automatically fetches plugin name and version from `Cargo.toml`. +/// * `declare_plugin!(plugin_name, plugin_version, plugin_ctor)` which gives +/// you full control. +/// * `declare_plugin!(plugin_name, plugin_ctor)` automatically fetches the +/// version from `Cargo.toml`. +/// * `declare_plugin!(plugin_ctor)` automatically fetches plugin name and +/// version from `Cargo.toml`. /// -/// The types of `plugin_name` and `plugin_version` have to be `&'static str` literals. +/// The types of `plugin_name` and `plugin_version` have to be `&'static str` +/// literals. /// /// # Notes /// This works by automatically generating an `extern "C"` function with a @@ -119,8 +130,8 @@ macro_rules! declare_plugin { /// Transforms the name of a package into the name of the compiled library. /// -/// The result of the function can be used to correctly locate build artifacts of `dylib` on -/// different platforms. +/// The result of the function can be used to correctly locate build artifacts +/// of `dylib` on different platforms. /// /// The name of the `dylib` is /// * `lib.so` on Linux diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index 5b84a4a0c34..537eb8785e5 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -82,7 +82,8 @@ fn create_backend_inner(config: &MasterKeyConfig) -> Result> { }) } -// CloudKMS adapts the KmsProvider definition from the cloud crate to that of the encryption crate +// CloudKMS adapts the KmsProvider definition from the cloud crate to that of +// the encryption crate #[derive(Debug, Deref)] struct CloudKms(Box); diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index f869817de2b..1268d0d88f2 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -153,7 +153,7 @@ impl<'k> AesGcmCrypter<'k> { cipher, &self.key.0, Some(self.iv.as_slice()), - &[], /* AAD */ + &[], // AAD pt, &mut tag.0, )?; @@ -166,7 +166,7 @@ impl<'k> AesGcmCrypter<'k> { cipher, &self.key.0, Some(self.iv.as_slice()), - &[], /* AAD */ + &[], // AAD ct, &tag.0, )?; diff --git a/components/encryption/src/encrypted_file/mod.rs b/components/encryption/src/encrypted_file/mod.rs index 7bf31225db8..57b5527b7bf 100644 --- a/components/encryption/src/encrypted_file/mod.rs +++ b/components/encryption/src/encrypted_file/mod.rs @@ -34,8 +34,8 @@ impl<'a> EncryptedFile<'a> { EncryptedFile { base, name } } - /// Read and decrypt the file. Caller need to handle the NotFound io error in case file not - /// exists. + /// Read and decrypt the file. Caller need to handle the NotFound io error + /// in case file not exists. pub fn read(&self, master_key: &dyn Backend) -> Result> { let start = Instant::now(); let res = OpenOptions::new() diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index e2dedfe534e..0884cb1ca04 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -120,7 +120,8 @@ impl FileDictionaryFile { self.base.join(&self.name) } - /// Rewrite the log file to reduce file size and reduce the time of next recovery. + /// Rewrite the log file to reduce file size and reduce the time of next + /// recovery. fn rewrite(&mut self) -> Result<()> { let file_dict_bytes = self.file_dict.write_to_bytes()?; if self.enable_log { @@ -397,7 +398,7 @@ mod tests { tempdir.path(), "test_file_dict_file", enable_log, - 2, /*file_rewrite_threshold*/ + 2, // file_rewrite_threshold ) .unwrap(); let info1 = create_file_info(1, EncryptionMethod::Aes256Ctr); @@ -440,12 +441,12 @@ mod tests { #[test] fn test_file_dict_file_normal_v1() { - test_file_dict_file_normal(false /*enable_log*/); + test_file_dict_file_normal(false /* enable_log */); } #[test] fn test_file_dict_file_normal_v2() { - test_file_dict_file_normal(true /*enable_log*/); + test_file_dict_file_normal(true /* enable_log */); } fn test_file_dict_file_existed(enable_log: bool) { @@ -454,7 +455,7 @@ mod tests { tempdir.path(), "test_file_dict_file", enable_log, - 2, /*file_rewrite_threshold*/ + 2, // file_rewrite_threshold ) .unwrap(); @@ -464,9 +465,9 @@ mod tests { let (_, file_dict) = FileDictionaryFile::open( tempdir.path(), "test_file_dict_file", - true, /*enable_log*/ - 2, /*file_rewrite_threshold*/ - false, /*skip_rewrite*/ + true, // enable_log + 2, // file_rewrite_threshold + false, // skip_rewrite ) .unwrap(); assert_eq!(*file_dict.files.get("info").unwrap(), info); @@ -474,12 +475,12 @@ mod tests { #[test] fn test_file_dict_file_existed_v1() { - test_file_dict_file_existed(false /*enable_log*/); + test_file_dict_file_existed(false /* enable_log */); } #[test] fn test_file_dict_file_existed_v2() { - test_file_dict_file_existed(true /*enable_log*/); + test_file_dict_file_existed(true /* enable_log */); } fn test_file_dict_file_not_existed(enable_log: bool) { @@ -488,20 +489,20 @@ mod tests { tempdir.path(), "test_file_dict_file", enable_log, - 2, /*file_rewrite_threshold*/ - false, /*skip_rewrite*/ + 2, // file_rewrite_threshold + false, // skip_rewrite ); assert!(matches!(ret, Err(Error::Io(_)))); } #[test] fn test_file_dict_file_not_existed_v1() { - test_file_dict_file_not_existed(false /*enable_log*/); + test_file_dict_file_not_existed(false /* enable_log */); } #[test] fn test_file_dict_file_not_existed_v2() { - test_file_dict_file_not_existed(true /*enable_log*/); + test_file_dict_file_not_existed(true /* enable_log */); } #[test] @@ -524,9 +525,9 @@ mod tests { let (_, file_dict_read) = FileDictionaryFile::open( tempdir.path(), "test_file_dict_file", - true, /*enable_log*/ - 2, /*file_rewrite_threshold*/ - false, /*skip_rewrite*/ + true, // enable_log + 2, // file_rewrite_threshold + false, // skip_rewrite ) .unwrap(); assert_eq!(file_dict, file_dict_read); @@ -544,8 +545,8 @@ mod tests { let mut file_dict = FileDictionaryFile::new( tempdir.path(), "test_file_dict_file", - true, /*enable_log*/ - 1000, /*file_rewrite_threshold*/ + true, // enable_log + 1000, // file_rewrite_threshold ) .unwrap(); @@ -571,9 +572,9 @@ mod tests { let (_, file_dict) = FileDictionaryFile::open( tempdir.path(), "test_file_dict_file", - true, /*enable_log*/ - 1000, /*file_rewrite_threshold*/ - true, /*skip_rewrite*/ + true, // enable_log + 1000, // file_rewrite_threshold + true, // skip_rewrite ) .unwrap(); assert_eq!(*file_dict.files.get("f1").unwrap(), info1); @@ -586,9 +587,9 @@ mod tests { let (_, file_dict) = FileDictionaryFile::open( tempdir.path(), "test_file_dict_file", - false, /*enable_log*/ - 1000, /*file_rewrite_threshold*/ - false, /*skip_rewrite*/ + false, // enable_log + 1000, // file_rewrite_threshold + false, // skip_rewrite ) .unwrap(); assert_eq!(*file_dict.files.get("f1").unwrap(), info1); diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index d62542cb16a..d2c5b6d1546 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -409,7 +409,8 @@ impl CrypterCore { } fn reset_buffer(&mut self, size: usize) { - // OCrypter require the output buffer to have block_size extra bytes, or it will panic. + // OCrypter require the output buffer to have block_size extra bytes, or it will + // panic. self.buffer.resize(size + self.block_size, 0); } @@ -436,9 +437,10 @@ impl CrypterCore { Ok(()) } - /// For simplicity, the following implementation rely on the fact that OpenSSL always - /// return exact same size as input in CTR mode. If it is not true in the future, or we - /// want to support other counter modes, this code needs to be updated. + /// For simplicity, the following implementation rely on the fact that + /// OpenSSL always return exact same size as input in CTR mode. If it is + /// not true in the future, or we want to support other counter modes, + /// this code needs to be updated. pub fn do_crypter_in_place(&mut self, buf: &mut [u8]) -> IoResult<()> { if self.crypter.is_none() { self.reset_crypter(0)?; diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 2240e212b84..79654d9d6a2 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -263,9 +263,9 @@ impl Dicts { return Ok(None); } }; - // When an encrypted file exists in the file system, the file_dict must have info about - // this file. But the opposite is not true, this is because the actual file operation - // and file_dict operation are not atomic. + // When an encrypted file exists in the file system, the file_dict must have + // info about this file. But the opposite is not true, this is because the + // actual file operation and file_dict operation are not atomic. check_stale_file_exist(dst_fname, &mut file_dict, &mut file_dict_file)?; let method = file.method; file_dict.files.insert(dst_fname.to_owned(), file.clone()); @@ -466,7 +466,8 @@ impl DataKeyManager { Ok(Some(Self::from_dicts(dicts, args.method, master_key)?)) } - /// Will block file operation for a considerable amount of time. Only used for debugging purpose. + /// Will block file operation for a considerable amount of time. Only used + /// for debugging purpose. pub fn retain_encrypted_files(&self, f: impl Fn(&str) -> bool) { let mut dict = self.dicts.file_dict.lock().unwrap(); let mut file_dict_file = self.dicts.file_dict_file.lock().unwrap(); @@ -592,7 +593,7 @@ impl DataKeyManager { pub fn create_file_for_write>(&self, path: P) -> Result> { let file_writer = File::create(&path)?; - self.open_file_with_writer(path, file_writer, true /*create*/) + self.open_file_with_writer(path, file_writer, true /* create */) } pub fn open_file_with_writer, W: std::io::Write>( @@ -683,9 +684,9 @@ impl DataKeyManager { let (_, file_dict) = FileDictionaryFile::open( dict_path, FILE_DICT_NAME, - true, /*enable_file_dictionary_log*/ + true, // enable_file_dictionary_log 1, - true, /*skip_rewrite*/ + true, // skip_rewrite )?; if let Some(file_path) = file_path { if let Some(info) = file_dict.files.get(file_path) { @@ -1294,7 +1295,8 @@ mod tests { let previous = Box::new(PlaintextBackend::default()) as Box; let result = new_key_manager(&tmp_dir, None, wrong_key, previous); - // When the master key is invalid, the key manager left a empty file dict and return errors. + // When the master key is invalid, the key manager left a empty file dict and + // return errors. assert!(result.is_err()); let previous = Box::new(PlaintextBackend::default()) as Box; let result = new_key_manager(&tmp_dir, None, right_key, previous); @@ -1317,7 +1319,7 @@ mod tests { { let raw = File::create(&path).unwrap(); let mut f = manager - .open_file_with_writer(&path, raw, false /*create*/) + .open_file_with_writer(&path, raw, false /* create */) .unwrap(); f.write_all(content.as_bytes()).unwrap(); f.sync_all().unwrap(); diff --git a/components/encryption/src/master_key/kms.rs b/components/encryption/src/master_key/kms.rs index da1b6d80e0a..8520e7a0cbe 100644 --- a/components/encryption/src/master_key/kms.rs +++ b/components/encryption/src/master_key/kms.rs @@ -124,17 +124,17 @@ impl KmsBackend { Ok(content) } - // On decrypt failure, the rule is to return WrongMasterKey error in case it is possible that - // a wrong master key has been used, or other error otherwise. + // On decrypt failure, the rule is to return WrongMasterKey error in case it is + // possible that a wrong master key has been used, or other error otherwise. fn decrypt_content(&self, content: &EncryptedContent) -> Result> { let vendor_name = self.kms_provider.name(); match content.metadata.get(MetadataKey::KmsVendor.as_str()) { Some(val) if val.as_slice() == vendor_name.as_bytes() => (), None => { return Err( - // If vender is missing in metadata, it could be the encrypted content is invalid - // or corrupted, but it is also possible that the content is encrypted using the - // FileBackend. Return WrongMasterKey anyway. + // If vender is missing in metadata, it could be the encrypted content is + // invalid or corrupted, but it is also possible that the content is encrypted + // using the FileBackend. Return WrongMasterKey anyway. Error::WrongMasterKey(box_err!("missing KMS vendor")), ); } diff --git a/components/encryption/src/master_key/mem.rs b/components/encryption/src/master_key/mem.rs index 92453dac5f2..8e65b85fff6 100644 --- a/components/encryption/src/master_key/mem.rs +++ b/components/encryption/src/master_key/mem.rs @@ -38,24 +38,25 @@ impl MemAesGcmBackend { Ok(content) } - // On decrypt failure, the rule is to return WrongMasterKey error in case it is possible that - // a wrong master key has been used, or other error otherwise. + // On decrypt failure, the rule is to return WrongMasterKey error in case it is + // possible that a wrong master key has been used, or other error otherwise. pub fn decrypt_content(&self, content: &EncryptedContent) -> Result> { let method = content .get_metadata() .get(MetadataKey::Method.as_str()) .ok_or_else(|| { - // Missing method in metadata. The metadata of the encrypted content is invalid or - // corrupted. + // Missing method in metadata. The metadata of the encrypted content is invalid + // or corrupted. Error::Other(box_err!( "metadata {} not found", MetadataKey::Method.as_str() )) })?; if method.as_slice() != MetadataMethod::Aes256Gcm.as_slice() { - // Currently we only support aes256-gcm. A different method could mean the encrypted - // content is written by a future version of TiKV, and we don't know how to handle it. - // Fail immediately instead of fallback to previous key. + // Currently we only support aes256-gcm. A different method could mean the + // encrypted content is written by a future version of TiKV, and we + // don't know how to handle it. Fail immediately instead of fallback + // to previous key. return Err(Error::Other(box_err!( "encryption method mismatch, expected {:?} vs actual {:?}", MetadataMethod::Aes256Gcm.as_slice(), @@ -75,7 +76,8 @@ impl MemAesGcmBackend { .get_metadata() .get(MetadataKey::AesGcmTag.as_str()) .ok_or_else(|| { - // Tag is missing. The metadata of the encrypted content is invalid or corrupted. + // Tag is missing. The metadata of the encrypted content is invalid or + // corrupted. Error::Other(box_err!("gcm tag not found")) })?; let gcm_tag = AesGcmTag::from(tag.as_slice()); diff --git a/components/encryption/src/master_key/mod.rs b/components/encryption/src/master_key/mod.rs index f975e1de7b9..59578a2bcf0 100644 --- a/components/encryption/src/master_key/mod.rs +++ b/components/encryption/src/master_key/mod.rs @@ -106,8 +106,9 @@ pub mod tests { } impl MockBackend { - // Callers are responsible for enabling tracking on the MockBackend by calling this function - // This names the backend instance, allowiing later fine-grained recall + // Callers are responsible for enabling tracking on the MockBackend by calling + // this function This names the backend instance, allowing later fine-grained + // recall pub fn track(&mut self, name: String) { let track = make_track(&name); self.track = track.clone(); diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 6071f06a646..0e83eb2cdb3 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -56,9 +56,9 @@ impl RocksEngine { return false; } - // If path is not an empty directory, we say db exists. If path is not an empty directory - // but db has not been created, `DB::list_column_families` fails and we can clean up - // the directory by this indication. + // If path is not an empty directory, we say db exists. If path is not an empty + // directory but db has not been created, `DB::list_column_families` fails and + // we can clean up the directory by this indication. fs::read_dir(&path).unwrap().next().is_some() } diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 86b8e4fdcae..5b93ccba637 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -162,8 +162,10 @@ impl rocksdb::EventListener for RocksEventListener { } // Here are some expected error examples: +// ```text // 1. Corruption: Sst file size mismatch: /qps/data/tikv-10014/db/000398.sst. Size recorded in manifest 6975, actual size 6959 // 2. Corruption: Bad table magic number: expected 9863518390377041911, found 759105309091689679 in /qps/data/tikv-10014/db/000021.sst +// ``` // // We assume that only the corruption sst file path is printed inside error. fn resolve_sst_filename_from_err(err: &str) -> Option { diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index c63edb8a117..87f46893774 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -82,21 +82,21 @@ mod tests { db.put(&data_key(b"a1"), &value).unwrap(); db.put(&data_key(b"a2"), &value).unwrap(); assert_eq!(stats.fetch(IOType::Flush, IOOp::Write), 0); - db.flush(true /*sync*/).unwrap(); + db.flush(true /* sync */).unwrap(); assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.put(&data_key(b"a2"), &value).unwrap(); db.put(&data_key(b"a3"), &value).unwrap(); - db.flush(true /*sync*/).unwrap(); + db.flush(true /* sync */).unwrap(); assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.compact_range( - CF_DEFAULT, None, /*start_key*/ - None, /*end_key*/ - false, /*exclusive_manual*/ - 1, /*max_subcompactions*/ + CF_DEFAULT, None, // start_key + None, // end_key + false, // exclusive_manual + 1, // max_subcompactions ) .unwrap(); assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) > value_size * 4); diff --git a/components/engine_rocks/src/import.rs b/components/engine_rocks/src/import.rs index 79e6d6c0f49..a64da35ae67 100644 --- a/components/engine_rocks/src/import.rs +++ b/components/engine_rocks/src/import.rs @@ -19,9 +19,10 @@ impl ImportExt for RocksEngine { opts.set_write_global_seqno(false); files.iter().try_for_each(|file| -> Result<()> { let f = File::open(file)?; - // Prior to v5.2.0, TiKV use `write_global_seqno=true` for ingestion. For backward - // compatibility, in case TiKV is retrying an ingestion job generated by older - // version, it needs to reset the global seqno to 0. + // Prior to v5.2.0, TiKV use `write_global_seqno=true` for ingestion. For + // backward compatibility, in case TiKV is retrying an ingestion job + // generated by older version, it needs to reset the global seqno to + // 0. set_external_sst_file_global_seq_no(self.as_inner(), cf, file, 0).map_err(r2e)?; f.sync_all() .map_err(|e| format!("sync {}: {:?}", file, e)) diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 8ec581c6e86..a2e394bf8c8 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -10,7 +10,8 @@ //! Because there are so many similarly named types across the TiKV codebase, //! and so much "import renaming", this crate consistently explicitly names type //! that implement a trait as `RocksTraitname`, to avoid the need for import -//! renaming and make it obvious what type any particular module is working with. +//! renaming and make it obvious what type any particular module is working +//! with. //! //! Please read the engine_trait crate docs before hacking. @@ -112,6 +113,6 @@ pub fn get_env( key_manager: Option>, limiter: Option>, ) -> engine_traits::Result> { - let env = encryption::get_env(None /*base_env*/, key_manager)?; + let env = encryption::get_env(None /* base_env */, key_manager)?; file_system::get_env(Some(env), limiter) } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index ff465d85dd1..ea6d48adb35 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -19,8 +19,8 @@ impl RocksEngine { self.as_inner().is_titan() } - // We store all data which would be deleted in memory at first because the data of region will never be larger than - // max-region-size. + // We store all data which would be deleted in memory at first because the data + // of region will never be larger than max-region-size. fn delete_all_in_range_cf_by_ingest( &self, cf: &str, @@ -36,8 +36,8 @@ impl RocksEngine { let end = KeyBuilder::from_slice(max_end_key, 0, 0); let mut opts = IterOptions::new(Some(start), Some(end), false); if self.is_titan() { - // Cause DeleteFilesInRange may expose old blob index keys, setting key only for Titan - // to avoid referring to missing blob files. + // Cause DeleteFilesInRange may expose old blob index keys, setting key only for + // Titan to avoid referring to missing blob files. opts.set_key_only(true); } @@ -103,8 +103,8 @@ impl RocksEngine { let end = KeyBuilder::from_slice(range.end_key, 0, 0); let mut opts = IterOptions::new(Some(start), Some(end), false); if self.is_titan() { - // Cause DeleteFilesInRange may expose old blob index keys, setting key only for Titan - // to avoid referring to missing blob files. + // Cause DeleteFilesInRange may expose old blob index keys, setting key only for + // Titan to avoid referring to missing blob files. opts.set_key_only(true); } let mut it = self.iterator_opt(cf, opts)?; diff --git a/components/engine_rocks/src/perf_context_impl.rs b/components/engine_rocks/src/perf_context_impl.rs index fe747b21a49..543e116d8ac 100644 --- a/components/engine_rocks/src/perf_context_impl.rs +++ b/components/engine_rocks/src/perf_context_impl.rs @@ -15,7 +15,7 @@ use crate::{ }; macro_rules! report_write_perf_context { - ($ctx: expr, $metric: ident) => { + ($ctx:expr, $metric:ident) => { if $ctx.perf_level != PerfLevel::Disable { $ctx.write = WritePerfContext::capture(); observe_write_time!($ctx, $metric, write_wal_time); @@ -31,7 +31,7 @@ macro_rules! report_write_perf_context { } macro_rules! observe_write_time { - ($ctx:expr, $metric: expr, $v:ident) => { + ($ctx:expr, $metric:expr, $v:ident) => { $metric.$v.observe(($ctx.write.$v) as f64 / 1e9); }; } @@ -169,7 +169,8 @@ pub struct PerfContextStatistics { const FLUSH_METRICS_INTERVAL: Duration = Duration::from_secs(2); impl PerfContextStatistics { - /// Create an instance which stores instant statistics values, retrieved at creation. + /// Create an instance which stores instant statistics values, retrieved at + /// creation. pub fn new(perf_level: PerfLevel, kind: PerfContextKind) -> Self { PerfContextStatistics { perf_level, diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 1168182c58e..c142ce01a74 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -563,15 +563,18 @@ mod tests { ("g", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), ("h", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8, 1), ("i", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4, 1), - // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + 9),keys(4,5) + // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + + // 9),keys(4,5) ("j", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), ("k", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), // handle "k": size(size = DISTANCE + 2, offset = DISTANCE / 8 * 25 + 11),keys(2,11) ("l", 0, DEFAULT_PROP_KEYS_INDEX_DISTANCE / 2), ("m", 0, DEFAULT_PROP_KEYS_INDEX_DISTANCE / 2), - //handle "m": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE,offset = 11+DEFAULT_PROP_KEYS_INDEX_DISTANCE + // handle "m": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE,offset = + // 11+DEFAULT_PROP_KEYS_INDEX_DISTANCE ("n", 1, DEFAULT_PROP_KEYS_INDEX_DISTANCE), - //handle "n": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE, offset = 11+2*DEFAULT_PROP_KEYS_INDEX_DISTANCE + // handle "n": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE, offset = + // 11+2*DEFAULT_PROP_KEYS_INDEX_DISTANCE ("o", 1, 1), // handle "o": keys = 1, offset = 12 + 2*DEFAULT_PROP_KEYS_INDEX_DISTANCE ]; @@ -662,7 +665,8 @@ mod tests { ("g", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), ("h", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8), ("i", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4), - // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + 9),keys(4,5) + // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + + // 9),keys(4,5) ("j", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), ("k", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), // handle "k": size(size = DISTANCE + 2, offset = DISTANCE / 8 * 25 + 11),keys(2,11) diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index b6a35f4a4e2..fd52342002f 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -38,7 +38,8 @@ impl RaftEngineReadOnly for RocksEngine { let (max_size, mut total_size, mut count) = (max_size.unwrap_or(usize::MAX), 0, 0); if high - low <= RAFT_LOG_MULTI_GET_CNT { - // If election happens in inactive regions, they will just try to fetch one empty log. + // If election happens in inactive regions, they will just try to fetch one + // empty log. for i in low..high { if total_size > 0 && total_size >= max_size { break; diff --git a/components/engine_rocks/src/range_properties.rs b/components/engine_rocks/src/range_properties.rs index fcd0d2fa863..17d0805340d 100644 --- a/components/engine_rocks/src/range_properties.rs +++ b/components/engine_rocks/src/range_properties.rs @@ -191,8 +191,8 @@ impl RangePropertiesExt for RocksEngine { const SAMPLING_THRESHOLD: usize = 20000; const SAMPLE_RATIO: usize = 1000; - // If there are too many keys, reduce its amount before sorting, or it may take too much - // time to sort the keys. + // If there are too many keys, reduce its amount before sorting, or it may take + // too much time to sort the keys. if keys.len() > SAMPLING_THRESHOLD { let len = keys.len(); keys = keys.into_iter().step_by(len / SAMPLE_RATIO).collect(); @@ -204,7 +204,8 @@ impl RangePropertiesExt for RocksEngine { return Ok(keys); } - // Find `key_count` keys which divides the whole range into `parts` parts evenly. + // Find `key_count` keys which divides the whole range into `parts` parts + // evenly. let mut res = Vec::with_capacity(key_count); let section_len = (keys.len() as f64) / ((key_count + 1) as f64); for i in 1..=key_count { diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 4529b6e9d27..4a88c6675ed 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -934,8 +934,8 @@ pub fn flush_engine_iostall_properties(engine: &DB, name: &str) { pub fn flush_engine_properties(engine: &DB, name: &str, shared_block_cache: bool) { for cf in engine.cf_names() { let handle = crate::util::get_cf_handle(engine, cf).unwrap(); - // It is important to monitor each cf's size, especially the "raft" and "lock" column - // families. + // It is important to monitor each cf's size, especially the "raft" and "lock" + // column families. let cf_used_size = crate::util::get_engine_cf_used_size(engine, handle); STORE_ENGINE_SIZE_GAUGE_VEC .with_label_values(&[name, cf]) @@ -1111,8 +1111,8 @@ pub fn flush_engine_properties(engine: &DB, name: &str, shared_block_cache: bool } if shared_block_cache { - // Since block cache is shared, getting cache size from any CF is fine. Here we get from - // default CF. + // Since block cache is shared, getting cache size from any CF is fine. Here we + // get from default CF. let handle = crate::util::get_cf_handle(engine, CF_DEFAULT).unwrap(); let block_cache_usage = engine.get_block_cache_usage_cf(handle); STORE_ENGINE_BLOCK_CACHE_USAGE_GAUGE_VEC diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 68182238161..66e0a974916 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -93,8 +93,8 @@ impl Iterable for RocksSstReader { // FIXME: See comment on RocksSstReader for why this contains Rc pub struct RocksSstIterator(DBIterator>); -// TODO(5kbpers): Temporarily force to add `Send` here, add a method for creating -// DBIterator> in rust-rocksdb later. +// TODO(5kbpers): Temporarily force to add `Send` here, add a method for +// creating DBIterator> in rust-rocksdb later. unsafe impl Send for RocksSstIterator {} impl Iterator for RocksSstIterator { @@ -231,10 +231,10 @@ impl SstWriterBuilder for RocksSstWriterBuilder { io_options.set_compression_options( -14, self.compression_level, - 0, /*strategy*/ - 0, /*max_dict_bytes*/ - 0, /*zstd_max_train_bytes*/ - 1, /*parallel_threads*/ + 0, // strategy + 0, // max_dict_bytes + 0, // zstd_max_train_bytes + 1, // parallel_threads ); } io_options.compression(compress_type); diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index a3b6a2bf4cf..4192eecfcae 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -104,7 +104,8 @@ pub fn new_engine_opt( let mut db = DB::open_cf(db_opt, path, cfds).map_err(r2e)?; // Drops discarded column families. - // for cf in existed.iter().filter(|x| needed.iter().find(|y| y == x).is_none()) { + // for cf in existed.iter().filter(|x| needed.iter().find(|y| y == + // x).is_none()) { for cf in cfs_diff(&existed, &needed) { // Never drop default column families. if cf != CF_DEFAULT { @@ -115,8 +116,8 @@ pub fn new_engine_opt( Ok(RocksEngine::new(db)) } -/// Turns "dynamic level size" off for the existing column family which was off before. -/// Column families are small, HashMap isn't necessary. +/// Turns "dynamic level size" off for the existing column family which was off +/// before. Column families are small, HashMap isn't necessary. fn adjust_dynamic_level_bytes( cf_descs: &[CColumnFamilyDescriptor], name: &str, @@ -146,9 +147,10 @@ pub fn db_exist(path: &str) -> bool { return false; } - // If path is not an empty directory, and current file exists, we say db exists. If path is not an empty directory - // but db has not been created, `DB::list_column_families` fails and we can clean up - // the directory by this indication. + // If path is not an empty directory, and current file exists, we say db exists. + // If path is not an empty directory but db has not been created, + // `DB::list_column_families` fails and we can clean up the directory by + // this indication. fs::read_dir(&path).unwrap().next().is_some() } diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index 892dd83321c..f658fb046fb 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -29,12 +29,14 @@ impl WriteBatchExt for RocksEngine { } } -/// `RocksWriteBatchVec` is for method `MultiBatchWrite` of RocksDB, which splits a large WriteBatch -/// into many smaller ones and then any thread could help to deal with these small WriteBatch when it -/// is calling `MultiBatchCommit` and wait the front writer to finish writing. `MultiBatchWrite` will -/// perform much better than traditional `pipelined_write` when TiKV writes very large data into RocksDB. -/// We will remove this feature when `unordered_write` of RocksDB becomes more stable and becomes compatible -/// with Titan. +/// `RocksWriteBatchVec` is for method `MultiBatchWrite` of RocksDB, which +/// splits a large WriteBatch into many smaller ones and then any thread could +/// help to deal with these small WriteBatch when it is calling +/// `MultiBatchCommit` and wait the front writer to finish writing. +/// `MultiBatchWrite` will perform much better than traditional +/// `pipelined_write` when TiKV writes very large data into RocksDB. +/// We will remove this feature when `unordered_write` of RocksDB becomes more +/// stable and becomes compatible with Titan. pub struct RocksWriteBatchVec { db: Arc, wbs: Vec, @@ -79,8 +81,9 @@ impl RocksWriteBatchVec { self.db.as_ref() } - /// `check_switch_batch` will split a large WriteBatch into many smaller ones. This is to avoid - /// a large WriteBatch blocking write_thread too long. + /// `check_switch_batch` will split a large WriteBatch into many smaller + /// ones. This is to avoid a large WriteBatch blocking write_thread too + /// long. #[inline(always)] fn check_switch_batch(&mut self) { if self.support_write_batch_vec diff --git a/components/engine_rocks_helper/src/sst_recovery.rs b/components/engine_rocks_helper/src/sst_recovery.rs index bfd39e951b2..7a820e6a79b 100644 --- a/components/engine_rocks_helper/src/sst_recovery.rs +++ b/components/engine_rocks_helper/src/sst_recovery.rs @@ -132,7 +132,8 @@ impl RecoveryRunner { self.damaged_files.iter().any(|f| f.name == sst_path) } - // Cleans up obsolete damaged files and panics if some files are not handled in time. + // Cleans up obsolete damaged files and panics if some files are not handled in + // time. fn check_damaged_files(&mut self) { if self.damaged_files.is_empty() { return; @@ -153,7 +154,8 @@ impl RecoveryRunner { } // Check whether the StoreMeta contains the region range, if it contains, - // recorded fault region ids to report to PD and add file info into `damaged_files`. + // recorded fault region ids to report to PD and add file info into + // `damaged_files`. // // Acquire meta lock. fn check_overlap_damaged_regions(&self, file: &FileInfo) -> bool { @@ -163,8 +165,8 @@ impl RecoveryRunner { meta.update_overlap_damaged_ranges(&file.name, &file.smallest_key, &file.largest_key); if !overlap { fail_point!("sst_recovery_before_delete_files"); - // The sst file can be deleted safely and set `include_end` to `true` otherwise the - // file with the same largest key will be skipped. + // The sst file can be deleted safely and set `include_end` to `true` otherwise + // the file with the same largest key will be skipped. // Here store meta lock should be held to prevent peers from being added back. self.db .as_inner() diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 20645823fd8..ada430261e3 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -328,7 +328,8 @@ pub mod ctor { /// - The column families specified as `cfs`, with default options, or /// - The column families specified as `opts`, with options. /// - /// Note that if `opts` is not `None` then the `cfs` argument is completely ignored. + /// Note that if `opts` is not `None` then the `cfs` argument is + /// completely ignored. /// /// The engine stores its data in the `path` directory. /// If that directory does not exist, then it is created. diff --git a/components/engine_traits/src/compact.rs b/components/engine_traits/src/compact.rs index a7e8636769b..8dd1cc7d9b4 100644 --- a/components/engine_traits/src/compact.rs +++ b/components/engine_traits/src/compact.rs @@ -9,7 +9,8 @@ use crate::errors::Result; pub trait CompactExt { type CompactedEvent: CompactedEvent; - /// Checks whether any column family sets `disable_auto_compactions` to `True` or not. + /// Checks whether any column family sets `disable_auto_compactions` to + /// `True` or not. fn auto_compactions_is_disabled(&self) -> Result; /// Compacts the column families in the specified range by manual or not. @@ -24,7 +25,8 @@ pub trait CompactExt { /// Compacts files in the range and above the output level. /// Compacts all files if the range is not specified. - /// Compacts all files to the bottommost level if the output level is not specified. + /// Compacts all files to the bottommost level if the output level is not + /// specified. fn compact_files_in_range( &self, start: Option<&[u8]>, @@ -32,8 +34,9 @@ pub trait CompactExt { output_level: Option, ) -> Result<()>; - /// Compacts files in the range and above the output level of the given column family. - /// Compacts all files to the bottommost level if the output level is not specified. + /// Compacts files in the range and above the output level of the given + /// column family. Compacts all files to the bottommost level if the + /// output level is not specified. fn compact_files_in_range_cf( &self, cf: &str, diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index c143cf7a194..1ffbdec1df5 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -68,8 +68,9 @@ pub trait KvEngine: /// TabletAccessor is the trait to access all the tablets with provided accessor /// -/// For single rocksdb instance, it essentially accesses the global kvdb with the accessor -/// For multi rocksdb instances, it accesses all the tablets with the accessor +/// For single rocksdb instance, it essentially accesses the global kvdb with +/// the accessor For multi rocksdb instances, it accesses all the tablets with +/// the accessor pub trait TabletAccessor { /// Loop visit all opened tablets by the specified function. fn for_each_opened_tablet(&self, _f: &mut (dyn FnMut(u64, u64, &EK))); @@ -82,9 +83,11 @@ pub trait TabletAccessor { /// max error count to log const MAX_ERROR_COUNT: u32 = 5; -/// TabletErrorCollector is the facility struct to handle errors when using TabletAccessor::for_each_opened_tablet +/// TabletErrorCollector is the facility struct to handle errors when using +/// TabletAccessor::for_each_opened_tablet /// -/// It will choose the last failed result as the final result, meanwhile logging errors up to MAX_ERROR_COUNT. +/// It will choose the last failed result as the final result, meanwhile logging +/// errors up to MAX_ERROR_COUNT. pub struct TabletErrorCollector { errors: Vec, max_error_count: u32, @@ -151,14 +154,14 @@ impl Drop for TabletErrorCollector { } /// A factory trait to create new engine. -/// -// It should be named as `EngineFactory` for consistency, but we are about to rename -// engine to tablet, so always use tablet for new traits/types. +// It should be named as `EngineFactory` for consistency, but we are about to +// rename engine to tablet, so always use tablet for new traits/types. pub trait TabletFactory: TabletAccessor { /// Create an tablet by id and suffix. If the tablet exists, it will fail. - /// The id is likely the region Id, the suffix could be the current raft log index. - /// They together could specify a unique path for a region's tablet. - /// The reason to have suffix is that we can keep more than one tablet for a region. + /// The id is likely the region Id, the suffix could be the current raft log + /// index. They together could specify a unique path for a region's + /// tablet. The reason to have suffix is that we can keep more than one + /// tablet for a region. fn create_tablet(&self, id: u64, suffix: u64) -> Result; /// Open a tablet by id and suffix. If the tablet exists, it will open it. @@ -167,7 +170,8 @@ pub trait TabletFactory: TabletAccessor { self.open_tablet_raw(&self.tablet_path(id, suffix), false) } - /// Open a tablet by id and suffix from cache---that means it should already be opened. + /// Open a tablet by id and suffix from cache---that means it should already + /// be opened. fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { if let Ok(engine) = self.open_tablet_raw(&self.tablet_path(id, suffix), false) { return Some(engine); @@ -204,7 +208,8 @@ pub trait TabletFactory: TabletAccessor { /// Tablets root path fn tablets_path(&self) -> PathBuf; - /// Load the tablet from path for id and suffix--for scenarios such as applying snapshot + /// Load the tablet from path for id and suffix--for scenarios such as + /// applying snapshot fn load_tablet(&self, _path: &Path, _id: u64, _suffix: u64) -> Result { unimplemented!(); } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 6ba3da2b3d9..191e5dcb204 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -60,14 +60,15 @@ //! - [`SyncMutable`] and [`Mutable`] - types to which single key/value pairs //! can be written. This includes engines and write batches. //! -//! - [`WriteBatch`] - types that can commit multiple key/value pairs in batches. -//! A `WriteBatchExt::WriteBtach` commits all pairs in one atomic transaction. -//! A `WriteBatchExt::WriteBatchVec` does not (FIXME: is this correct?). +//! - [`WriteBatch`] - types that can commit multiple key/value pairs in +//! batches. A `WriteBatchExt::WriteBatch` commits all pairs in one atomic +//! transaction. A `WriteBatchExt::WriteBatchVec` does not (FIXME: is this +//! correct?). //! //! The `KvEngine` instance generally acts as a factory for types that implement //! other traits in the crate. These factory methods, associated types, and -//! other associated methods are defined in "extension" traits. For example, methods -//! on engines related to batch writes are in the `WriteBatchExt` trait. +//! other associated methods are defined in "extension" traits. For example, +//! methods on engines related to batch writes are in the `WriteBatchExt` trait. //! //! //! # Design notes @@ -75,19 +76,19 @@ //! - `KvEngine` is the main engine trait. It requires many other traits, which //! have many other associated types that implement yet more traits. //! -//! - Features should be grouped into their own modules with their own -//! traits. A common pattern is to have an associated type that implements -//! a trait, and an "extension" trait that associates that type with `KvEngine`, -//! which is part of `KvEngine's trait requirements. +//! - Features should be grouped into their own modules with their own traits. A +//! common pattern is to have an associated type that implements a trait, and +//! an "extension" trait that associates that type with `KvEngine`, which is +//! part of `KvEngine's trait requirements. //! //! - For now, for simplicity, all extension traits are required by `KvEngine`. //! In the future it may be feasible to separate them for engines with //! different feature sets. //! -//! - Associated types generally have the same name as the trait they -//! are required to implement. Engine extensions generally have the same -//! name suffixed with `Ext`. Concrete implementations usually have the -//! same name prefixed with the database name, i.e. `Rocks`. +//! - Associated types generally have the same name as the trait they are +//! required to implement. Engine extensions generally have the same name +//! suffixed with `Ext`. Concrete implementations usually have the same name +//! prefixed with the database name, i.e. `Rocks`. //! //! Example: //! @@ -121,9 +122,9 @@ //! use a standard new method). If future engines require factory methods, the //! traits can be converted then. //! -//! - Types that require a handle to the engine (or some other "parent" type) -//! do so with either Rc or Arc. An example is EngineIterator. The reason -//! for this is that associated types cannot contain lifetimes. That requires +//! - Types that require a handle to the engine (or some other "parent" type) do +//! so with either Rc or Arc. An example is EngineIterator. The reason for +//! this is that associated types cannot contain lifetimes. That requires //! "generic associated types". See //! //! - @@ -221,15 +222,15 @@ //! `RocksDB::from_ref` and `RocksDB::as_inner` methods. //! //! - Down follow the type system too far "down the rabbit hole". When you see -//! that another subsystem is blocking you from refactoring the system you -//! are trying to refactor, stop, stash your changes, and focus on the other +//! that another subsystem is blocking you from refactoring the system you are +//! trying to refactor, stop, stash your changes, and focus on the other //! system instead. //! //! - You will through away branches that lead to dead ends. Learn from the //! experience and try again from a different angle. //! -//! - For now, use the same APIs as the RocksDB bindings, as methods -//! on the various engine traits, and with this crate's error type. +//! - For now, use the same APIs as the RocksDB bindings, as methods on the +//! various engine traits, and with this crate's error type. //! //! - When new types are needed from the RocksDB API, add a new module, define a //! new trait (possibly with the same name as the RocksDB type), then define a diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index bc2c3a2b547..67e32e40bdd 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -11,17 +11,20 @@ use crate::{ #[derive(Clone, Debug)] pub enum DeleteStrategy { - /// Delete the SST files that are fullly fit in range. However, the SST files that are partially - /// overlapped with the range will not be touched. + /// Delete the SST files that are fullly fit in range. However, the SST + /// files that are partially overlapped with the range will not be + /// touched. DeleteFiles, /// Delete the data stored in Titan. DeleteBlobs, - /// Scan for keys and then delete. Useful when we know the keys in range are not too many. + /// Scan for keys and then delete. Useful when we know the keys in range are + /// not too many. DeleteByKey, - /// Delete by range. Note that this is experimental and you should check whether it is enbaled - /// in config before using it. + /// Delete by range. Note that this is experimental and you should check + /// whether it is enbaled in config before using it. DeleteByRange, - /// Delete by ingesting a SST file with deletions. Useful when the number of ranges is too many. + /// Delete by ingesting a SST file with deletions. Useful when the number of + /// ranges is too many. DeleteByWriter { sst_path: String }, } @@ -44,25 +47,26 @@ pub trait MiscExt: CFNamesExt + FlowControlFactorsExt { ranges: &[Range<'_>], ) -> Result<()>; - /// Return the approximate number of records and size in the range of memtables of the cf. + /// Return the approximate number of records and size in the range of + /// memtables of the cf. fn get_approximate_memtable_stats_cf(&self, cf: &str, range: &Range<'_>) -> Result<(u64, u64)>; fn ingest_maybe_slowdown_writes(&self, cf: &str) -> Result; /// Gets total used size of rocksdb engine, including: - /// * total size (bytes) of all SST files. - /// * total size (bytes) of active and unflushed immutable memtables. - /// * total size (bytes) of all blob files. - /// + /// * total size (bytes) of all SST files. + /// * total size (bytes) of active and unflushed immutable memtables. + /// * total size (bytes) of all blob files. fn get_engine_used_size(&self) -> Result; /// Roughly deletes files in multiple ranges. /// /// Note: - /// - After this operation, some keys in the range might still exist in the database. - /// - After this operation, some keys in the range might be removed from existing snapshot, - /// so you shouldn't expect to be able to read data from the range using existing snapshots - /// any more. + /// - After this operation, some keys in the range might still exist in + /// the database. + /// - After this operation, some keys in the range might be removed from + /// existing snapshot, so you shouldn't expect to be able to read data + /// from the range using existing snapshots any more. /// /// Ref: fn roughly_cleanup_ranges(&self, ranges: &[(Vec, Vec)]) -> Result<()>; diff --git a/components/engine_traits/src/peekable.rs b/components/engine_traits/src/peekable.rs index 7550568396c..23318b2a233 100644 --- a/components/engine_traits/src/peekable.rs +++ b/components/engine_traits/src/peekable.rs @@ -19,7 +19,8 @@ pub trait Peekable { /// Returns `None` if they key does not exist. fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result>; - /// Read a value for a key from a given column family, given a set of options. + /// Read a value for a key from a given column family, given a set of + /// options. /// /// Returns `None` if the key does not exist. fn get_value_cf_opt( diff --git a/components/engine_traits/src/perf_context.rs b/components/engine_traits/src/perf_context.rs index c46ec4a95c8..dfa5aa967b7 100644 --- a/components/engine_traits/src/perf_context.rs +++ b/components/engine_traits/src/perf_context.rs @@ -50,7 +50,8 @@ pub enum PerfContextKind { RaftstoreStore, /// Commands in tikv::storage, the inner str is the command tag. Storage(&'static str), - /// Coprocessor requests in tikv::coprocessor, the inner str is the request type. + /// Coprocessor requests in tikv::coprocessor, the inner str is the request + /// type. Coprocessor(&'static str), } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 58a78f605f9..d94d69fa335 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -103,8 +103,8 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()>; - /// Like `cut_logs` but the range could be very large. Return the deleted count. - /// Generally, `from` can be passed in `0`. + /// Like `cut_logs` but the range could be very large. Return the deleted + /// count. Generally, `from` can be passed in `0`. fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result; fn batch_gc(&self, tasks: Vec) -> Result { diff --git a/components/engine_traits/src/range_properties.rs b/components/engine_traits/src/range_properties.rs index 8c326bd41c7..f97008dd929 100644 --- a/components/engine_traits/src/range_properties.rs +++ b/components/engine_traits/src/range_properties.rs @@ -32,7 +32,8 @@ pub trait RangePropertiesExt { large_threshold: u64, ) -> Result; - /// Get range approximate split keys to split range evenly into key_count + 1 parts . + /// Get range approximate split keys to split range evenly into key_count + + /// 1 parts . fn get_range_approximate_split_keys( &self, range: Range<'_>, diff --git a/components/engine_traits/src/sst_partitioner.rs b/components/engine_traits/src/sst_partitioner.rs index faedd4efb8b..f41664403d1 100644 --- a/components/engine_traits/src/sst_partitioner.rs +++ b/components/engine_traits/src/sst_partitioner.rs @@ -30,8 +30,8 @@ pub trait SstPartitioner { } pub trait SstPartitionerFactory: Sync + Send { - // Lifetime of the partitioner can be changed to be bounded by the factory's lifetime once - // generic associated types is supported. + // Lifetime of the partitioner can be changed to be bounded by the factory's + // lifetime once generic associated types is supported. // https://github.com/rust-lang/rfcs/blob/master/text/1598-generic_associated_types.md type Partitioner: SstPartitioner + 'static; diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index b9d4b098394..00048522752 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -1,7 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -//! To use External storage with protobufs as an application, import this module. -//! external_storage contains the actual library code +//! To use External storage with protobufs as an application, import this +//! module. external_storage contains the actual library code //! Cloud provider backends are under components/cloud use std::{ io::{self, Write}, @@ -55,8 +55,9 @@ pub fn create_storage( } } -// when the flag cloud-storage-dylib or cloud-storage-grpc is set create_storage is automatically wrapped with a client -// This function is used by the library/server to avoid any wrapping +// when the flag cloud-storage-dylib or cloud-storage-grpc is set create_storage +// is automatically wrapped with a client This function is used by the +// library/server to avoid any wrapping pub fn create_storage_no_client( storage_backend: &StorageBackend, config: BackendConfig, diff --git a/components/external_storage/export/src/request.rs b/components/external_storage/export/src/request.rs index eaf618746c0..5623c0732d7 100644 --- a/components/external_storage/export/src/request.rs +++ b/components/external_storage/export/src/request.rs @@ -58,7 +58,8 @@ pub async fn restore_inner( expected_length: u64, ) -> io::Result<()> { let storage = create_storage_no_client(&storage_backend)?; - // TODO: support encryption. The service must be launched with or sent a DataKeyManager + // TODO: support encryption. The service must be launched with or sent a + // DataKeyManager let output: &mut dyn io::Write = &mut File::create(file_name)?; // the minimum speed of reading data, in bytes/second. // if reading speed is slower than this rate, we will stop with diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index f1d1a617dc8..8c9ea242b98 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -50,9 +50,9 @@ pub fn record_storage_create(start: Instant, storage: &dyn ExternalStorage) { } /// UnpinReader is a simple wrapper for AsyncRead + Unpin + Send. -/// This wrapper would remove the lifetime at the argument of the generted async function -/// in order to make rustc happy. (And reduce the length of signture of write.) -/// see https://github.com/rust-lang/rust/issues/63033 +/// This wrapper would remove the lifetime at the argument of the generated +/// async function in order to make rustc happy. (And reduce the length of +/// signature of write.) see https://github.com/rust-lang/rust/issues/63033 pub struct UnpinReader(pub Box); #[derive(Debug, Default)] diff --git a/components/external_storage/src/local.rs b/components/external_storage/src/local.rs index 00cb42cf1a6..f246c808b86 100644 --- a/components/external_storage/src/local.rs +++ b/components/external_storage/src/local.rs @@ -84,8 +84,9 @@ impl ExternalStorage for LocalStorage { )); } // create the parent dir if there isn't one. - // note: we may write to arbitrary directory here if the path contains things like '../' - // but internally the file name should be fully controlled by TiKV, so maybe it is OK? + // note: we may write to arbitrary directory here if the path contains things + // like '../' but internally the file name should be fully controlled by + // TiKV, so maybe it is OK? if let Some(parent) = Path::new(name).parent() { fs::create_dir_all(self.base.join(parent)) .await @@ -102,7 +103,8 @@ impl ExternalStorage for LocalStorage { } // Because s3 could support writing(put_object) a existed object. - // For the interface consistent with s3, local storage need also support write a existed file. + // For the interface consistent with s3, local storage need also support write a + // existed file. if fs::metadata(self.base.join(name)).await.is_ok() { info!("[{}] is already exists in {}", name, self.base.display()); } @@ -120,7 +122,8 @@ impl ExternalStorage for LocalStorage { fn read(&self, name: &str) -> Box { debug!("read file from local storage"; "name" => %name, "base" => %self.base.display()); - // We used std i/o here for removing the requirement of tokio reactor when restoring. + // We used std i/o here for removing the requirement of tokio reactor when + // restoring. // FIXME: when restore side get ready, use tokio::fs::File for returning. match StdFile::open(self.base.join(name)) { Ok(file) => Box::new(AllowStdIo::new(file)) as _, diff --git a/components/external_storage/src/request.rs b/components/external_storage/src/request.rs index ef4fa54e448..7f1a81d49b7 100644 --- a/components/external_storage/src/request.rs +++ b/components/external_storage/src/request.rs @@ -24,7 +24,8 @@ pub fn write_sender( // currently it is copying into an intermediate buffer // Writing to a file here uses up disk space // But as a positive it gets the backup data out of the DB the fastest - // Currently this waits for the file to be completely written before sending to storage + // Currently this waits for the file to be completely written before sending to + // storage runtime.enter(|| { block_on(async { let msg = |action: &str| format!("{} file {:?}", action, &file_path); diff --git a/components/file_system/src/file.rs b/components/file_system/src/file.rs index 93269d5da10..1c56b240f1d 100644 --- a/components/file_system/src/file.rs +++ b/components/file_system/src/file.rs @@ -15,7 +15,8 @@ use fs2::FileExt; use super::{get_io_rate_limiter, get_io_type, IOOp, IORateLimiter}; -/// A wrapper around `std::fs::File` with capability to track and regulate IO flow. +/// A wrapper around `std::fs::File` with capability to track and regulate IO +/// flow. pub struct File { inner: fs::File, limiter: Option>, diff --git a/components/file_system/src/io_stats/biosnoop.rs b/components/file_system/src/io_stats/biosnoop.rs index cbe622f78f8..d156d94f77c 100644 --- a/components/file_system/src/io_stats/biosnoop.rs +++ b/components/file_system/src/io_stats/biosnoop.rs @@ -29,9 +29,9 @@ use crate::{metrics::*, IOBytes, IOType}; /// by address, then all the IO requests for that thread will be recorded in /// corresponding type's map in BCC. /// -/// With that information, every time calling `IOContext` it get the stored stats -/// from corresponding type's map in BCC. Thus it enables TiKV to get the latency and -/// bytes of read/write request per IO-type. +/// With that information, every time calling `IOContext` it get the stored +/// stats from corresponding type's map in BCC. Thus it enables TiKV to get the +/// latency and bytes of read/write request per IO-type. const MAX_THREAD_IDX: usize = 192; @@ -291,8 +291,8 @@ mod tests { #[test] fn test_biosnoop() { init().unwrap(); - // Test cases are running in parallel, while they depend on the same global variables. - // To make them not affect each other, run them in sequence. + // Test cases are running in parallel, while they depend on the same global + // variables. To make them not affect each other, run them in sequence. test_thread_idx_allocation(); test_io_context(); unsafe { diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index d5f8345cae3..104b7371537 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -281,7 +281,8 @@ pub fn copy, Q: AsRef>(from: P, to: Q) -> io::Result { copy_imp(from.as_ref(), to.as_ref(), false /* sync */) } -/// Copies the contents and permission bits of one file to another, then synchronizes. +/// Copies the contents and permission bits of one file to another, then +/// synchronizes. pub fn copy_and_sync, Q: AsRef>(from: P, to: Q) -> io::Result { copy_imp(from.as_ref(), to.as_ref(), true /* sync */) } @@ -296,8 +297,8 @@ pub fn file_exists>(file: P) -> bool { path.exists() && path.is_file() } -/// Deletes given path from file system. Returns `true` on success, `false` if the file doesn't exist. -/// Otherwise the raw error will be returned. +/// Deletes given path from file system. Returns `true` on success, `false` if +/// the file doesn't exist. Otherwise the raw error will be returned. pub fn delete_file_if_exist>(file: P) -> io::Result { match remove_file(&file) { Ok(_) => Ok(true), @@ -306,8 +307,8 @@ pub fn delete_file_if_exist>(file: P) -> io::Result { } } -/// Deletes given path from file system. Returns `true` on success, `false` if the directory doesn't -/// exist. Otherwise the raw error will be returned. +/// Deletes given path from file system. Returns `true` on success, `false` if +/// the directory doesn't exist. Otherwise the raw error will be returned. pub fn delete_dir_if_exist>(dir: P) -> io::Result { match remove_dir_all(&dir) { Ok(_) => Ok(true), @@ -316,8 +317,9 @@ pub fn delete_dir_if_exist>(dir: P) -> io::Result { } } -/// Creates a new, empty directory at the provided path. Returns `true` on success, -/// `false` if the directory already exists. Otherwise the raw error will be returned. +/// Creates a new, empty directory at the provided path. Returns `true` on +/// success, `false` if the directory already exists. Otherwise the raw error +/// will be returned. pub fn create_dir_if_not_exist>(dir: P) -> io::Result { match create_dir(&dir) { Ok(_) => Ok(true), diff --git a/components/file_system/src/metrics_manager.rs b/components/file_system/src/metrics_manager.rs index ddc48eb8f86..8ff4bddde47 100644 --- a/components/file_system/src/metrics_manager.rs +++ b/components/file_system/src/metrics_manager.rs @@ -12,7 +12,8 @@ use crate::{ }; pub enum BytesFetcher { - /// Fetch IO statistics from IO rate limiter, which records passed-through IOs in atomic counters. + /// Fetch IO statistics from IO rate limiter, which records passed-through + /// IOs in atomic counters. FromRateLimiter(Arc), /// Fetch IO statistics from OS I/O stats collector. FromIOStatsCollector(), diff --git a/components/file_system/src/rate_limiter.rs b/components/file_system/src/rate_limiter.rs index b6aa0730ac7..51fe8228aef 100644 --- a/components/file_system/src/rate_limiter.rs +++ b/components/file_system/src/rate_limiter.rs @@ -159,15 +159,15 @@ impl Default for IORateLimiterStatistics { } } -/// Used to dynamically adjust the proportion of total budgets allocated for rate limited -/// IO. This is needed when global IOs are only partially rate limited, e.g. when mode is -/// IORateLimitMode::WriteOnly. +/// Used to dynamically adjust the proportion of total budgets allocated for +/// rate limited IO. This is needed when global IOs are only partially rate +/// limited, e.g. when mode is IORateLimitMode::WriteOnly. pub trait IOBudgetAdjustor: Send + Sync { fn adjust(&self, threshold: usize) -> usize; } -/// Limit total IO flow below provided threshold by throttling lower-priority IOs. -/// Rate limit is disabled when total IO threshold is set to zero. +/// Limit total IO flow below provided threshold by throttling lower-priority +/// IOs. Rate limit is disabled when total IO threshold is set to zero. struct PriorityBasedIORateLimiter { // High-priority IOs are only limited when strict is true strict: bool, @@ -197,13 +197,13 @@ impl PriorityBasedIORateLimiterProtected { } macro_rules! do_sleep { - ($duration:expr, sync) => { + ($duration:expr,sync) => { std::thread::sleep($duration); }; - ($duration:expr, async) => { + ($duration:expr,async) => { tokio::time::sleep($duration).await; }; - ($duration:expr, skewed_sync) => { + ($duration:expr,skewed_sync) => { use rand::Rng; let mut rng = rand::thread_rng(); let subtraction: bool = rng.gen(); @@ -217,9 +217,10 @@ macro_rules! do_sleep { } /// Actual implementation for requesting IOs from PriorityBasedIORateLimiter. -/// An attempt will first be recorded. If the attempted amount exceeds the available quotas of -/// current epoch, the requester will be queued (logically) and sleep until served. -/// Macro is necessary to de-dup codes used both in async/sync functions. +/// An attempt will first be recorded. If the attempted amount exceeds the +/// available quotas of current epoch, the requester will be queued (logically) +/// and sleep until served. Macro is necessary to de-dup codes used both in +/// async/sync functions. macro_rules! request_imp { ($limiter:ident, $priority:ident, $amount:ident, $mode:tt) => {{ debug_assert!($amount > 0); @@ -244,7 +245,8 @@ macro_rules! request_imp { // The request is already partially fulfilled in current epoch when consumption // overflow bytes are smaller than requested amount. let remains = std::cmp::min(bytes_through - cached_bytes_per_epoch, amount); - // When there is a recent refill, double check if bytes consumption has been reset. + // When there is a recent refill, double check if bytes consumption has been + // reset. if now + DEFAULT_REFILL_PERIOD < locked.next_refill_time + Duration::from_millis(1) && $limiter.bytes_through[priority_idx].fetch_add(remains, Ordering::Relaxed) + remains @@ -252,8 +254,8 @@ macro_rules! request_imp { { return amount; } - // Enqueue itself by adding to pending_bytes, whose current value denotes a position - // of logical queue to wait in. + // Enqueue itself by adding to pending_bytes, whose current value denotes a + // position of logical queue to wait in. locked.pending_bytes[priority_idx] += remains; // Calculate wait duration by queue_len / served_per_epoch. let wait = if locked.next_refill_time <= now { @@ -343,11 +345,13 @@ impl PriorityBasedIORateLimiter { /// Updates and refills IO budgets for next epoch based on IO priority. /// Here we provide best-effort priority control: - /// 1) Limited IO budget is assigned to lower priority to ensure higher priority can at least - /// consume the same IO amount as the last few epochs without breaching global threshold. - /// 2) Higher priority may temporarily use lower priority's IO budgets. When this happens, - /// total IO flow could exceed global threshold. - /// 3) Highest priority IO alone must not exceed global threshold (in strict mode). + /// - Limited IO budget is assigned to lower priority to ensure higher + /// priority can at least consume the same IO amount as the last few + /// epochs without breaching global threshold. + /// - Higher priority may temporarily use lower priority's IO budgets. When + /// this happens, total IO flow could exceed global threshold. + /// - Highest priority IO alone must not exceed global threshold (in strict + /// mode). fn refill(&self, locked: &mut PriorityBasedIORateLimiterProtected, now: Instant) { let mut total_budgets = self.bytes_per_epoch[IOPriority::High as usize].load(Ordering::Relaxed); @@ -368,8 +372,8 @@ impl PriorityBasedIORateLimiter { let mut used_budgets = 0; for pri in &[IOPriority::High, IOPriority::Medium] { let p = *pri as usize; - // Skipped epochs can only serve pending requests rather that in-coming ones, catch up - // by subtracting them from pending_bytes. + // Skipped epochs can only serve pending requests rather that in-coming ones, + // catch up by subtracting them from pending_bytes. let served_by_skipped_epochs = std::cmp::min( (remaining_budgets as f32 * skipped_epochs) as usize, locked.pending_bytes[p], @@ -460,8 +464,8 @@ impl IORateLimiter { pub fn new_for_test() -> Self { IORateLimiter::new( IORateLimitMode::AllIo, - true, /*strict*/ - true, /*enable_statistics*/ + true, // strict + true, // enable_statistics ) } @@ -629,15 +633,15 @@ mod tests { let t0 = Instant::now(); let _write_context = start_background_jobs( &limiter, - 1, /*job_count*/ + 1, // job_count Request(IOType::ForegroundWrite, IOOp::Write, 10), - None, /*interval*/ + None, // interval ); let _compaction_context = start_background_jobs( &limiter, - 1, /*job_count*/ + 1, // job_count Request(IOType::Compaction, IOOp::Write, 10), - None, /*interval*/ + None, // interval ); std::thread::sleep(Duration::from_secs(1)); let t1 = Instant::now(); @@ -679,9 +683,9 @@ mod tests { { let _context = start_background_jobs( limiter, - 2, /*job_count*/ + 2, // job_count Request(IOType::ForegroundWrite, IOOp::Write, 10), - None, /*interval*/ + None, // interval ); std::thread::sleep(duration); } @@ -699,8 +703,8 @@ mod tests { let bytes_per_sec = 2000; let limiter = Arc::new(IORateLimiter::new( IORateLimitMode::AllIo, - false, /*strict*/ - true, /*enable_statistics*/ + false, // strict + true, // enable_statistics )); limiter.set_io_priority(IOType::ForegroundWrite, IOPriority::Medium); verify_rate_limit(&limiter, bytes_per_sec, Duration::from_secs(2)); @@ -712,9 +716,9 @@ mod tests { { let _context = start_background_jobs( &limiter, - 2, /*job_count*/ + 2, // job_count Request(IOType::ForegroundWrite, IOOp::Write, 10), - None, /*interval*/ + None, // interval ); std::thread::sleep(Duration::from_secs(2)); } @@ -750,7 +754,7 @@ mod tests { // each thread request at most 1000 bytes per second let _context = start_background_jobs( &limiter, - actual_kbytes_per_sec, /*job_count*/ + actual_kbytes_per_sec, // job_count Request(IOType::Compaction, IOOp::Write, 1), Some(Duration::from_millis(1)), ); @@ -781,7 +785,7 @@ mod tests { { let _write = start_background_jobs( &limiter, - 1, /*job_count*/ + 1, // job_count Request( IOType::ForegroundWrite, IOOp::Write, @@ -791,7 +795,7 @@ mod tests { ); let _compaction = start_background_jobs( &limiter, - 1, /*job_count*/ + 1, // job_count Request( IOType::Compaction, IOOp::Write, @@ -801,7 +805,7 @@ mod tests { ); let _import = start_background_jobs( &limiter, - 1, /*job_count*/ + 1, // job_count Request( IOType::Import, IOOp::Write, @@ -826,7 +830,7 @@ mod tests { #[bench] fn bench_critical_section(b: &mut test::Bencher) { - let inner_limiter = PriorityBasedIORateLimiter::new(true /*strict*/); + let inner_limiter = PriorityBasedIORateLimiter::new(true /* strict */); inner_limiter.set_bytes_per_sec(1024); let now = Instant::now_coarse(); b.iter(|| { diff --git a/components/keys/src/lib.rs b/components/keys/src/lib.rs index fa855bbe353..ecb2657de00 100644 --- a/components/keys/src/lib.rs +++ b/components/keys/src/lib.rs @@ -226,16 +226,16 @@ pub fn origin_key(key: &[u8]) -> &[u8] { /// Get the `start_key` of current region in encoded form. pub fn enc_start_key(region: &Region) -> Vec { - // only initialized region's start_key can be encoded, otherwise there must be bugs - // somewhere. + // only initialized region's start_key can be encoded, otherwise there must be + // bugs somewhere. assert!(!region.get_peers().is_empty()); data_key(region.get_start_key()) } /// Get the `end_key` of current region in encoded form. pub fn enc_end_key(region: &Region) -> Vec { - // only initialized region's end_key can be encoded, otherwise there must be bugs - // somewhere. + // only initialized region's end_key can be encoded, otherwise there must be + // bugs somewhere. assert!(!region.get_peers().is_empty()); data_end_key(region.get_end_key()) } @@ -439,7 +439,8 @@ mod tests { assert_eq!(buffer, data_key(b"cde")); let mut region = Region::default(); - // uninitialised region should not be passed in `enc_start_key` and `enc_end_key`. + // uninitialised region should not be passed in `enc_start_key` and + // `enc_end_key`. assert!(::panic_hook::recover_safe(|| enc_start_key(®ion)).is_err()); assert!(::panic_hook::recover_safe(|| enc_end_key(®ion)).is_err()); diff --git a/components/log_wrappers/src/lib.rs b/components/log_wrappers/src/lib.rs index 986c1710137..5361eaeee18 100644 --- a/components/log_wrappers/src/lib.rs +++ b/components/log_wrappers/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! Provides wrappers for types that comes from 3rd-party and does not implement slog::Value. +//! Provides wrappers for types that comes from 3rd-party and does not implement +//! slog::Value. #[macro_use] extern crate slog; @@ -21,10 +22,11 @@ pub mod test_util; /// Wraps any `Display` type, use `Display` as `slog::Value`. /// -/// Usually this wrapper is useful in containers, e.g. `Option>`. +/// Usually this wrapper is useful in containers, e.g. +/// `Option>`. /// -/// If your type `val: T` is directly used as a field value, you may use `"key" => %value` syntax -/// instead. +/// If your type `val: T` is directly used as a field value, you may use `"key" +/// => %value` syntax instead. pub struct DisplayValue(pub T); impl slog::Value for DisplayValue { @@ -43,8 +45,8 @@ impl slog::Value for DisplayValue { /// /// Usually this wrapper is useful in containers, e.g. `Option>`. /// -/// If your type `val: T` is directly used as a field value, you may use `"key" => ?value` syntax -/// instead. +/// If your type `val: T` is directly used as a field value, you may use `"key" +/// => ?value` syntax instead. pub struct DebugValue(pub T); impl slog::Value for DebugValue { diff --git a/components/log_wrappers/src/test_util.rs b/components/log_wrappers/src/test_util.rs index a527ac379eb..d455e52c620 100644 --- a/components/log_wrappers/src/test_util.rs +++ b/components/log_wrappers/src/test_util.rs @@ -4,7 +4,8 @@ use std::{io, sync}; -/// A buffer which can be served as a logging destination while being able to access its content. +/// A buffer which can be served as a logging destination while being able to +/// access its content. #[derive(Clone, Default)] pub struct SyncLoggerBuffer(sync::Arc>>); @@ -14,8 +15,8 @@ impl SyncLoggerBuffer { Self::default() } - /// Builds a `slog::Logger` over this buffer which uses compact format and always output `TIME` - /// in the time field. + /// Builds a `slog::Logger` over this buffer which uses compact format and + /// always output `TIME` in the time field. pub fn build_logger(&self) -> slog::Logger { use slog::Drain; diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index fae347fee40..2388bf3b3ac 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -51,7 +51,7 @@ impl Debug for ConfigValue { } macro_rules! impl_from { - ($from: ty, $to: tt) => { + ($from:ty, $to:tt) => { impl From<$from> for ConfigValue { fn from(r: $from) -> ConfigValue { ConfigValue::$to(r) @@ -69,7 +69,7 @@ impl_from!(String, String); impl_from!(ConfigChange, Module); macro_rules! impl_into { - ($into: ty, $from: tt) => { + ($into:ty, $from:tt) => { impl From for $into { fn from(c: ConfigValue) -> $into { if let ConfigValue::$from(v) = c { diff --git a/components/panic_hook/src/lib.rs b/components/panic_hook/src/lib.rs index 12db221dbb5..7e95ea4071a 100644 --- a/components/panic_hook/src/lib.rs +++ b/components/panic_hook/src/lib.rs @@ -55,7 +55,8 @@ fn track_hook(p: &PanicInfo<'_>) { /// Recover from closure which may panic. /// -/// This function assumes the closure is able to be forced to implement `UnwindSafe`. +/// This function assumes the closure is able to be forced to implement +/// `UnwindSafe`. /// /// Also see [`AssertUnwindSafe`](https://doc.rust-lang.org/std/panic/struct.AssertUnwindSafe.html). pub fn recover_safe(f: F) -> std::thread::Result diff --git a/components/pd_client/src/config.rs b/components/pd_client/src/config.rs index f11608117e8..a02c2272490 100644 --- a/components/pd_client/src/config.rs +++ b/components/pd_client/src/config.rs @@ -6,8 +6,8 @@ use serde_derive::{Deserialize, Serialize}; use tikv_util::config::ReadableDuration; /// The configuration for a PD Client. /// -/// By default during initialization the client will attempt to reconnect every 300s -/// for infinity, logging only every 10th duplicate error. +/// By default during initialization the client will attempt to reconnect every +/// 300s for infinity, logging only every 10th duplicate error. #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -24,8 +24,8 @@ pub struct Config { /// /// Default is isize::MAX, represented by -1. pub retry_max_count: isize, - /// If the client observes the same error message on retry, it can repeat the message only - /// every `n` times. + /// If the client observes the same error message on retry, it can repeat + /// the message only every `n` times. /// /// Default is 10. Set to 1 to disable this feature. pub retry_log_every: usize, @@ -33,7 +33,8 @@ pub struct Config { /// /// Default is 10m. pub update_interval: ReadableDuration, - /// The switch to support forwarding requests to follower when the network partition problem happens. + /// The switch to support forwarding requests to follower when the network + /// partition problem happens. /// /// Default is false. pub enable_forwarding: bool, diff --git a/components/pd_client/src/feature_gate.rs b/components/pd_client/src/feature_gate.rs index 64ee3067585..dc8bef853de 100644 --- a/components/pd_client/src/feature_gate.rs +++ b/components/pd_client/src/feature_gate.rs @@ -7,8 +7,8 @@ use std::sync::{ use semver::{SemVerError, Version}; -/// The function assumes only major, minor and patch are considered, and they are -/// all less than u16::MAX, which is 65535. +/// The function assumes only major, minor and patch are considered, and they +/// are all less than u16::MAX, which is 65535. const fn ver_to_val(major: u64, minor: u64, patch: u64) -> u64 { major << 32 | minor << 16 | patch } @@ -45,8 +45,8 @@ impl FeatureGate { /// /// # Safety /// - /// Correctness in FeatureGate depends on monotonic increasing of version number, - /// should use `set_version` instead. + /// Correctness in FeatureGate depends on monotonic increasing of version + /// number, should use `set_version` instead. pub unsafe fn reset_version(&self, version: &str) -> Result<(), SemVerError> { let new = Version::parse(version)?; let val = ver_to_val(new.major, new.minor, new.patch); diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index c68a97f1dec..21c53f07a34 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -224,10 +224,10 @@ pub trait PdClient: Send + Sync { } /// Creates the cluster with cluster ID, node, stores and first Region. - /// If the cluster is already bootstrapped, return ClusterBootstrapped error. - /// When a node starts, if it finds nothing in the node and - /// cluster is not bootstrapped, it begins to create node, stores, first Region - /// and then call bootstrap_cluster to let PD know it. + /// If the cluster is already bootstrapped, return ClusterBootstrapped + /// error. When a node starts, if it finds nothing in the node and + /// cluster is not bootstrapped, it begins to create node, stores, first + /// Region and then call bootstrap_cluster to let PD know it. /// It may happen that multi nodes start at same time to try to /// bootstrap, but only one can succeed, while others will fail /// and must remove their created local Region data themselves. @@ -263,11 +263,12 @@ pub trait PdClient: Send + Sync { /// - For bootstrapping, PD knows first Region with `bootstrap_cluster`. /// - For changing Peer, PD determines where to add a new Peer in some store /// for this Region. - /// - For Region splitting, PD determines the new Region id and Peer id for the - /// split Region. - /// - For Region merging, PD knows which two Regions will be merged and which Region - /// and Peers will be removed. - /// - For auto-balance, PD determines how to move the Region from one store to another. + /// - For Region splitting, PD determines the new Region id and Peer id for + /// the split Region. + /// - For Region merging, PD knows which two Regions will be merged and + /// which Region and Peers will be removed. + /// - For auto-balance, PD determines how to move the Region from one store + /// to another. /// Gets store information if it is not a tombstone store. fn get_store(&self, _store_id: u64) -> Result { @@ -380,7 +381,8 @@ pub trait PdClient: Send + Sync { unimplemented!(); } - /// Registers a handler to the client, which will be invoked after reconnecting to PD. + /// Registers a handler to the client, which will be invoked after + /// reconnecting to PD. /// /// Please note that this method should only be called once. fn handle_reconnect(&self, _: F) @@ -409,8 +411,9 @@ pub trait PdClient: Send + Sync { } /// Gets a batch of timestamps from PD. - /// Return a timestamp with (physical, logical), indicating that timestamps allocated are: - /// [Timestamp(physical, logical - count + 1), Timestamp(physical, logical)] + /// Return a timestamp with (physical, logical), indicating that timestamps + /// allocated are: [Timestamp(physical, logical - count + 1), + /// Timestamp(physical, logical)] fn batch_get_tso(&self, _count: u32) -> PdFuture { unimplemented!() } diff --git a/components/pd_client/src/tso.rs b/components/pd_client/src/tso.rs index 6c99e87e4e7..a19d7af8f06 100644 --- a/components/pd_client/src/tso.rs +++ b/components/pd_client/src/tso.rs @@ -3,13 +3,15 @@ //! This module is the low-level mechanisms for getting timestamps from a PD //! cluster. It should be used via the `get_tso` API in `PdClient`. //! -//! Once a `TimestampOracle` is created, there will be two futures running in a background working -//! thread created automatically. The `get_timestamp` method creates a oneshot channel whose -//! transmitter is served as a `TimestampRequest`. `TimestampRequest`s are sent to the working -//! thread through a bounded multi-producer, single-consumer channel. Every time the first future -//! is polled, it tries to exhaust the channel to get as many requests as possible and sends a -//! single `TsoRequest` to the PD server. The other future receives `TsoResponse`s from the PD -//! server and allocates timestamps for the requests. +//! Once a `TimestampOracle` is created, there will be two futures running in a +//! background working thread created automatically. The `get_timestamp` method +//! creates a oneshot channel whose transmitter is served as a +//! `TimestampRequest`. `TimestampRequest`s are sent to the working thread +//! through a bounded multi-producer, single-consumer channel. Every time the +//! first future is polled, it tries to exhaust the channel to get as many +//! requests as possible and sends a single `TsoRequest` to the PD server. The +//! other future receives `TsoResponse`s from the PD server and allocates +//! timestamps for the requests. use std::{cell::RefCell, collections::VecDeque, pin::Pin, rc::Rc, thread}; @@ -37,13 +39,14 @@ struct TimestampRequest { count: u32, } -/// The timestamp oracle (TSO) which provides monotonically increasing timestamps. +/// The timestamp oracle (TSO) which provides monotonically increasing +/// timestamps. pub struct TimestampOracle { - /// The transmitter of a bounded channel which transports requests of getting a single - /// timestamp to the TSO working thread. A bounded channel is used to prevent using - /// too much memory unexpectedly. - /// In the working thread, the `TimestampRequest`, which is actually a one channel sender, - /// is used to send back the timestamp result. + /// The transmitter of a bounded channel which transports requests of + /// getting a single timestamp to the TSO working thread. A bounded + /// channel is used to prevent using too much memory unexpectedly. + /// In the working thread, the `TimestampRequest`, which is actually a one + /// channel sender, is used to send back the timestamp result. request_tx: mpsc::Sender, close_rx: watch::Receiver<()>, } @@ -113,12 +116,14 @@ async fn run_tso( mut request_rx: mpsc::Receiver, close_tx: watch::Sender<()>, ) { - // The `TimestampRequest`s which are waiting for the responses from the PD server + // The `TimestampRequest`s which are waiting for the responses from the PD + // server let pending_requests = Rc::new(RefCell::new(VecDeque::with_capacity(MAX_PENDING_COUNT))); - // When there are too many pending requests, the `send_request` future will refuse to fetch - // more requests from the bounded channel. This waker is used to wake up the sending future - // if the queue containing pending requests is no longer full. + // When there are too many pending requests, the `send_request` future will + // refuse to fetch more requests from the bounded channel. This waker is + // used to wake up the sending future if the queue containing pending + // requests is no longer full. let sending_future_waker = Rc::new(AtomicWaker::new()); let mut request_stream = TsoRequestStream { @@ -139,8 +144,8 @@ async fn run_tso( while let Some(Ok(resp)) = rpc_receiver.next().await { let mut pending_requests = pending_requests.borrow_mut(); - // Wake up the sending future blocked by too many pending requests as we are consuming - // some of them here. + // Wake up the sending future blocked by too many pending requests as we are + // consuming some of them here. if pending_requests.len() >= MAX_PENDING_COUNT { sending_future_waker.wake(); } @@ -204,8 +209,8 @@ impl<'a> Stream for TsoRequestStream<'a> { let write_flags = WriteFlags::default().buffer_hint(false); Poll::Ready(Some((req, write_flags))) } else { - // Set the waker to the context, then the stream can be waked up after the pending queue - // is no longer full. + // Set the waker to the context, then the stream can be waked up after the + // pending queue is no longer full. self.self_waker.register(cx.waker()); Poll::Pending } @@ -216,9 +221,9 @@ fn allocate_timestamps( resp: &TsoResponse, pending_requests: &mut VecDeque, ) -> Result<()> { - // PD returns the timestamp with the biggest logical value. We can send back timestamps - // whose logical value is from `logical - count + 1` to `logical` using the senders - // in `pending`. + // PD returns the timestamp with the biggest logical value. We can send back + // timestamps whose logical value is from `logical - count + 1` to `logical` + // using the senders in `pending`. let tail_ts = resp .timestamp .as_ref() diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 5ec629aacdb..e4145f16c0d 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -43,7 +43,8 @@ use super::{ const RETRY_INTERVAL: Duration = Duration::from_secs(1); // 1s const MAX_RETRY_TIMES: u64 = 5; -// The max duration when retrying to connect to leader. No matter if the MAX_RETRY_TIMES is reached. +// The max duration when retrying to connect to leader. No matter if the +// MAX_RETRY_TIMES is reached. const MAX_RETRY_DURATION: Duration = Duration::from_secs(10); // FIXME: Use a request-independent way to handle reconnection. @@ -317,7 +318,8 @@ impl Client { /// Re-establishes connection with PD leader in asynchronized fashion. /// /// If `force` is false, it will reconnect only when members change. - /// Note: Retrying too quickly will return an error due to cancellation. Please always try to reconnect after sending the request first. + /// Note: Retrying too quickly will return an error due to cancellation. + /// Please always try to reconnect after sending the request first. pub async fn reconnect(&self, force: bool) -> Result<()> { PD_RECONNECT_COUNTER_VEC.with_label_values(&["try"]).inc(); let start = Instant::now(); @@ -477,9 +479,10 @@ where { loop { let ret = { - // Drop the read lock immediately to prevent the deadlock between the caller thread - // which may hold the read lock and wait for PD client thread completing the request - // and the PD client thread which may block on acquiring the write lock. + // Drop the read lock immediately to prevent the deadlock between the caller + // thread which may hold the read lock and wait for PD client thread + // completing the request and the PD client thread which may block + // on acquiring the write lock. let client_stub = client.inner.rl().client_stub.clone(); func(&client_stub).map_err(Error::Grpc) }; @@ -610,7 +613,8 @@ impl PdConnector { Ok((_, r)) => { let new_cluster_id = r.get_header().get_cluster_id(); if new_cluster_id == cluster_id { - // check whether the response have leader info, otherwise continue to loop the rest members + // check whether the response have leader info, otherwise continue to + // loop the rest members if r.has_leader() { return Ok(r); } @@ -635,9 +639,11 @@ impl PdConnector { } // There are 3 kinds of situations we will return the new client: - // 1. the force is true which represents the client is newly created or the original connection has some problem - // 2. the previous forwarded host is not empty and it can connect the leader now which represents the network partition problem to leader may be recovered - // 3. the member information of PD has been changed + // 1. the force is true which represents the client is newly created or the + // original connection has some problem 2. the previous forwarded host is + // not empty and it can connect the leader now which represents the network + // partition problem to leader may be recovered 3. the member information of + // PD has been changed async fn reconnect_pd( &self, members_resp: GetMembersResponse, @@ -844,8 +850,9 @@ pub fn find_bucket_index>(key: &[u8], bucket_keys: &[S]) -> Optio ) } -/// Merge incoming bucket stats. If a range in new buckets overlaps with multiple ranges in -/// current buckets, stats of the new range will be added to all stats of current ranges. +/// Merge incoming bucket stats. If a range in new buckets overlaps with +/// multiple ranges in current buckets, stats of the new range will be added to +/// all stats of current ranges. pub fn merge_bucket_stats, I: AsRef<[u8]>>( cur: &[C], cur_stats: &mut BucketStats, diff --git a/components/profiler/examples/prime.rs b/components/profiler/examples/prime.rs index fa54b2b2658..ede351acea5 100644 --- a/components/profiler/examples/prime.rs +++ b/components/profiler/examples/prime.rs @@ -24,7 +24,8 @@ //! valgrind --tool=callgrind --instr-atstart=no ../../target/debug/examples/prime //! ``` //! -//! You must not run example via `valgrind cargo run ...`. The framework won't detect Callgrind! +//! You must not run example via `valgrind cargo run ...`. The framework won't +//! detect Callgrind! #[inline(never)] fn is_prime_number(v: usize, prime_numbers: &[usize]) -> bool { diff --git a/components/profiler/src/lib.rs b/components/profiler/src/lib.rs index e3ea0d43a6a..2734d8f7877 100644 --- a/components/profiler/src/lib.rs +++ b/components/profiler/src/lib.rs @@ -30,11 +30,12 @@ //! //! Then, compile the code with `profiling` feature enabled. //! -//! By default, a profile called `app.profile` will be generated by CPU Profiler. -//! You can then analyze the profile using [pprof](https://github.com/google/pprof). +//! By default, a profile called `app.profile` will be generated by CPU +//! Profiler. You can then analyze the profile using [pprof](https://github.com/google/pprof). //! -//! If the application is running in Callgrind, a Callgrind profile dump will be generated instead. -//! Notice that you should run Callgrind with command line option `--instr-atstart=no`, e.g.: +//! If the application is running in Callgrind, a Callgrind profile dump will be +//! generated instead. Notice that you should run Callgrind with command line +//! option `--instr-atstart=no`, e.g.: //! //! ```bash //! valgrind --tool=callgrind --instr-atstart=no ./my_example diff --git a/components/profiler/src/profiler_unix.rs b/components/profiler/src/profiler_unix.rs index 822b89619a9..c53f32b3b44 100644 --- a/components/profiler/src/profiler_unix.rs +++ b/components/profiler/src/profiler_unix.rs @@ -16,14 +16,15 @@ lazy_static::lazy_static! { static ref ACTIVE_PROFILER: Mutex = Mutex::new(Profiler::None); } -/// Start profiling. Returns false if failed, i.e. there is already a profiling in progress. +/// Start profiling. Returns false if failed, i.e. there is already a profiling +/// in progress. /// -/// When `profiling` feature is not enabled, this function will do nothing and there is totally -/// zero cost. +/// When `profiling` feature is not enabled, this function will do nothing and +/// there is totally zero cost. /// /// When running in Callgrind, Callgrind instrumentation will be started -/// (`CALLGRIND_START_INSTRUMENTATION`). Otherwise, the CPU Profiler will be started and profile -/// will be generated to the file specified by `name`. +/// (`CALLGRIND_START_INSTRUMENTATION`). Otherwise, the CPU Profiler will be +/// started and profile will be generated to the file specified by `name`. // TODO: Better multi-thread support. #[inline] pub fn start(name: impl AsRef) -> bool { @@ -49,10 +50,11 @@ pub fn start(name: impl AsRef) -> bool { true } -/// Stop profiling. Returns false if failed, i.e. there is no profiling in progress. +/// Stop profiling. Returns false if failed, i.e. there is no profiling in +/// progress. /// -/// When `profiling` feature is not enabled, this function will do nothing and there is totally -/// zero cost. +/// When `profiling` feature is not enabled, this function will do nothing and +/// there is totally zero cost. #[inline] pub fn stop() -> bool { let mut profiler = ACTIVE_PROFILER.lock().unwrap(); diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 22d2d645165..628b066029d 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -311,7 +311,8 @@ impl RaftLogBatchTrait for RaftLogBatch { } fn cut_logs(&mut self, _: u64, _: u64, _: u64) { - // It's unnecessary because overlapped entries can be handled in `append`. + // It's unnecessary because overlapped entries can be handled in + // `append`. } fn put_raft_state(&mut self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { diff --git a/components/raft_log_engine/src/lib.rs b/components/raft_log_engine/src/lib.rs index 41ba961c48a..6156771afa8 100644 --- a/components/raft_log_engine/src/lib.rs +++ b/components/raft_log_engine/src/lib.rs @@ -10,7 +10,8 @@ //! Because there are so many similarly named types across the TiKV codebase, //! and so much "import renaming", this crate consistently explicitly names type //! that implement a trait as `RocksTraitname`, to avoid the need for import -//! renaming and make it obvious what type any particular module is working with. +//! renaming and make it obvious what type any particular module is working +//! with. //! //! Please read the engine_trait crate docs before hacking. diff --git a/components/raftstore-v2/src/batch/apply.rs b/components/raftstore-v2/src/batch/apply.rs index f71c98e5c86..ebc7696aa64 100644 --- a/components/raftstore-v2/src/batch/apply.rs +++ b/components/raftstore-v2/src/batch/apply.rs @@ -3,7 +3,8 @@ //! This module contains all structs related to apply batch system. //! //! After being started, each thread will have its own `ApplyPoller` and poll -//! using `ApplyContext`. For more information, see the documentation of batch-system. +//! using `ApplyContext`. For more information, see the documentation of +//! batch-system. use std::{ ops::{Deref, DerefMut}, diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1d84ba47302..ee063fc15dd 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -34,7 +34,8 @@ pub struct StoreContext { pub trans: T, /// The latest configuration. pub cfg: Config, - /// The tick batch for delay ticking. It will be flushed at the end of every round. + /// The tick batch for delay ticking. It will be flushed at the end of every + /// round. pub tick_batch: Vec, /// The precise timer for scheduling tick. pub timer: SteadyTimer, @@ -236,7 +237,8 @@ impl StorePollerBuilder { } fn clean_up_tablets(&self, peers: &HashMap>) -> Result<()> { - // TODO: list all available tablets and destroy those which are not in the peers. + // TODO: list all available tablets and destroy those which are not in the + // peers. Ok(()) } } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 71062161384..0739cd61cb7 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -2,10 +2,11 @@ //! Raftstore is the place where we implement multi-raft. //! -//! The thread module of raftstore is batch-system, more check components/batch-system. -//! All state machines are defined in [`fsm`] module. Everything that wrapping raft is -//! implemented in [`raft`] module. And the commands, including split/merge/confchange/read/write, -//! are implemented in [`operation`] module. All state machines are expected to communicate with +//! The thread module of raftstore is batch-system, more check +//! components/batch-system. All state machines are defined in [`fsm`] module. +//! Everything that wrapping raft is implemented in [`raft`] module. And the +//! commands, including split/merge/confchange/read/write, are implemented in +//! [`operation`] module. All state machines are expected to communicate with //! messages. They are defined in [`router`] module. #![allow(unused)] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index c3cede21ebc..aebb1bf7406 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -60,8 +60,8 @@ impl Peer { }; let tablet_index = s.region_state().get_tablet_index(); - // Another option is always create tablet even if tablet index is 0. But this can - // introduce race when gc old tablet and create new peer. + // Another option is always create tablet even if tablet index is 0. But this + // can introduce race when gc old tablet and create new peer. let tablet = if tablet_index != 0 { if !tablet_factory.exists(region_id, tablet_index) { return Err(box_err!( diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 37d9515d301..12041f56fe7 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -60,9 +60,9 @@ where /// Variants of channels for `Msg`. /// - `Read`: a channel for read only requests including `StatusRequest`, -/// `GetRequest` and `SnapRequest` +/// `GetRequest` and `SnapRequest` /// - `Write`: a channel for write only requests including `AdminRequest` -/// `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. +/// `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. /// Prefer channel rather than callback because: /// 1. channel can be reused, hence reduce allocations. /// 2. channel may not need dynamic dispatch. @@ -234,8 +234,8 @@ pub enum PeerMsg { /// leader of the target raft group. If it's failed to be sent, callback /// usually needs to be called before dropping in case of resource leak. RaftCommand(RaftCommand), - /// Tick is periodical task. If target peer doesn't exist there is a potential - /// that the raft node will not work anymore. + /// Tick is periodical task. If target peer doesn't exist there is a + /// potential that the raft node will not work anymore. Tick(PeerTick), /// Result of applying committed entries. The message can't be lost. ApplyRes { diff --git a/components/raftstore-v2/src/tablet.rs b/components/raftstore-v2/src/tablet.rs index f4f5bdcbc6f..8552b1a1f0f 100644 --- a/components/raftstore-v2/src/tablet.rs +++ b/components/raftstore-v2/src/tablet.rs @@ -10,8 +10,9 @@ struct LatestTablet { version: AtomicU64, } -/// Tablet may change during split, merge and applying snapshot. So we need a shared value to -/// reflect the latest tablet. `CachedTablet` provide cache that can speed up common access. +/// Tablet may change during split, merge and applying snapshot. So we need a +/// shared value to reflect the latest tablet. `CachedTablet` provide cache that +/// can speed up common access. #[derive(Clone)] pub struct CachedTablet { latest: Arc>, diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index 1609cc3001a..1087b18c287 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -53,7 +53,8 @@ pub struct Config { #[online_config(skip)] pub prefer_approximate_bucket: bool, // ratio of region_bucket_size. (0, 0.5) - // The region_bucket_merge_size_ratio * region_bucket_size is threshold to merge with its left neighbor bucket + // The region_bucket_merge_size_ratio * region_bucket_size is threshold to merge with its left + // neighbor bucket pub region_bucket_merge_size_ratio: f64, } diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index cd370e332e3..8122f54b12d 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -38,7 +38,7 @@ pub trait ClonableObserver: 'static + Send { } macro_rules! impl_box_observer { - ($name:ident, $ob: ident, $wrapper: ident) => { + ($name:ident, $ob:ident, $wrapper:ident) => { pub struct $name(Box + Send>); impl $name { pub fn new(observer: T) -> $name { @@ -82,7 +82,7 @@ macro_rules! impl_box_observer { // This is the same as impl_box_observer_g except $ob has a typaram macro_rules! impl_box_observer_g { - ($name:ident, $ob: ident, $wrapper: ident) => { + ($name:ident, $ob:ident, $wrapper:ident) => { pub struct $name(Box> + Send>); impl $name { pub fn new + Clone>(observer: T) -> $name { @@ -254,8 +254,9 @@ impl Registry { } } -/// A macro that loops over all observers and returns early when error is found or -/// bypass is set. `try_loop_ob` is expected to be used for hook that returns a `Result`. +/// A macro that loops over all observers and returns early when error is found +/// or bypass is set. `try_loop_ob` is expected to be used for hook that returns +/// a `Result`. macro_rules! try_loop_ob { ($r:expr, $obs:expr, $hook:ident, $($args:tt)*) => { loop_ob!(_imp _res, $r, $obs, $hook, $($args)*) @@ -439,10 +440,11 @@ impl CoprocessorHost { } } - /// `post_exec` should be called immediately after we executed one raft command. - /// It notifies observers side effects of this command before execution of the next command, - /// including req/resp, apply state, modified region state, etc. - /// Return true observers think a persistence is necessary. + /// `post_exec` should be called immediately after we executed one raft + /// command. It notifies observers side effects of this command before + /// execution of the next command, including req/resp, apply state, + /// modified region state, etc. Return true observers think a + /// persistence is necessary. pub fn post_exec( &self, region: &Region, diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index b798c7577af..8a4975b1459 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -94,13 +94,15 @@ pub trait AdminObserver: Coprocessor { /// For now, the `region` in `ObserverContext` is an empty region. fn post_apply_admin(&self, _: &mut ObserverContext<'_>, _: &AdminResponse) {} - /// Hook before exec admin request, returns whether we should skip this admin. + /// Hook before exec admin request, returns whether we should skip this + /// admin. fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, _: &AdminRequest) -> bool { false } /// Hook to call immediately after exec command - /// Will be a special persistence after this exec if a observer returns true. + /// Will be a special persistence after this exec if a observer returns + /// true. fn post_exec_admin( &self, _: &mut ObserverContext<'_>, @@ -113,7 +115,8 @@ pub trait AdminObserver: Coprocessor { } pub trait QueryObserver: Coprocessor { - /// Hook when observe applying empty cmd, probably caused by leadership change. + /// Hook when observe applying empty cmd, probably caused by leadership + /// change. fn on_empty_cmd(&self, _: &mut ObserverContext<'_>, _index: u64, _term: u64) {} /// Hook to call before proposing write request. @@ -130,13 +133,15 @@ pub trait QueryObserver: Coprocessor { /// For now, the `region` in `ObserverContext` is an empty region. fn post_apply_query(&self, _: &mut ObserverContext<'_>, _: &Cmd) {} - /// Hook before exec write request, returns whether we should skip this write. + /// Hook before exec write request, returns whether we should skip this + /// write. fn pre_exec_query(&self, _: &mut ObserverContext<'_>, _: &[Request]) -> bool { false } /// Hook to call immediately after exec command. - /// Will be a special persistence after this exec if a observer returns true. + /// Will be a special persistence after this exec if a observer returns + /// true. fn post_exec_query( &self, _: &mut ObserverContext<'_>, @@ -150,12 +155,12 @@ pub trait QueryObserver: Coprocessor { pub trait ApplySnapshotObserver: Coprocessor { /// Hook to call after applying key from plain file. - /// This may be invoked multiple times for each plain file, and each time a batch of key-value - /// pairs will be passed to the function. + /// This may be invoked multiple times for each plain file, and each time a + /// batch of key-value pairs will be passed to the function. fn apply_plain_kvs(&self, _: &mut ObserverContext<'_>, _: CfName, _: &[(Vec, Vec)]) {} - /// Hook to call after applying sst file. Currently the content of the snapshot can't be - /// passed to the observer. + /// Hook to call after applying sst file. Currently the content of the + /// snapshot can't be passed to the observer. fn apply_sst(&self, _: &mut ObserverContext<'_>, _: CfName, _path: &str) {} } @@ -216,8 +221,8 @@ pub trait RoleObserver: Coprocessor { /// Hook to call when role of a peer changes. /// /// Please note that, this hook is not called at realtime. There maybe a - /// situation that the hook is not called yet, however the role of some peers - /// have changed. + /// situation that the hook is not called yet, however the role of some + /// peers have changed. fn on_role_change(&self, _: &mut ObserverContext<'_>, _: &RoleChange) {} } @@ -274,8 +279,9 @@ impl ObserveID { } } -/// ObserveHandle is the status of a term of observing, it contains the `ObserveID` -/// and the `observing` flag indicate whether the observing is ongoing +/// ObserveHandle is the status of a term of observing, it contains the +/// `ObserveID` and the `observing` flag indicate whether the observing is +/// ongoing #[derive(Clone, Default, Debug)] pub struct ObserveHandle { pub id: ObserveID, @@ -326,14 +332,15 @@ impl CmdObserveInfo { } } - /// Get the max observe level of the observer info by the observers currently registered. - /// Currently, TiKV uses a static strategy for managing observers. - /// There are a fixed number type of observer being registered in each TiKV node, - /// and normally, observers are singleton. + /// Get the max observe level of the observer info by the observers + /// currently registered. Currently, TiKV uses a static strategy for + /// managing observers. There are a fixed number type of observer being + /// registered in each TiKV node, and normally, observers are singleton. /// The types are: /// CDC: Observer supports the `ChangeData` service. /// PiTR: Observer supports the `backup-log` function. - /// RTS: Observer supports the `resolved-ts` advancing (and follower read, etc.). + /// RTS: Observer supports the `resolved-ts` advancing (and follower read, + /// etc.). fn observe_level(&self) -> ObserveLevel { let cdc = if self.cdc_id.is_observing() { // `cdc` observe all data @@ -449,7 +456,8 @@ pub trait CmdObserver: Coprocessor { cmd_batches: &mut Vec, engine: &E, ); - // TODO: maybe shoulde move `on_applied_current_term` to a separated `Coprocessor` + // TODO: maybe should move `on_applied_current_term` to a separated + // `Coprocessor` /// Hook to call at the first time the leader applied on its term fn on_applied_current_term(&self, role: StateRole, region: &Region); } diff --git a/components/raftstore/src/coprocessor/region_info_accessor.rs b/components/raftstore/src/coprocessor/region_info_accessor.rs index c38f1161a1f..e8a5b1ac1c9 100644 --- a/components/raftstore/src/coprocessor/region_info_accessor.rs +++ b/components/raftstore/src/coprocessor/region_info_accessor.rs @@ -24,20 +24,23 @@ use super::{ ObserverContext, RegionChangeEvent, RegionChangeObserver, Result, RoleChange, RoleObserver, }; -/// `RegionInfoAccessor` is used to collect all regions' information on this TiKV into a collection -/// so that other parts of TiKV can get region information from it. It registers a observer to -/// raftstore, which is named `RegionEventListener`. When the events that we are interested in -/// happen (such as creating and deleting regions), `RegionEventListener` simply sends the events -/// through a channel. -/// In the mean time, `RegionCollector` keeps fetching messages from the channel, and mutates -/// the collection according to the messages. When an accessor method of `RegionInfoAccessor` is -/// called, it also simply sends a message to `RegionCollector`, and the result will be sent -/// back through as soon as it's finished. -/// In fact, the channel mentioned above is actually a `util::worker::Worker`. +/// `RegionInfoAccessor` is used to collect all regions' information on this +/// TiKV into a collection so that other parts of TiKV can get region +/// information from it. It registers a observer to raftstore, which is named +/// `RegionEventListener`. When the events that we are interested in happen +/// (such as creating and deleting regions), `RegionEventListener` simply +/// sends the events through a channel. +/// In the mean time, `RegionCollector` keeps fetching messages from the +/// channel, and mutates the collection according to the messages. When an +/// accessor method of `RegionInfoAccessor` is called, it also simply sends a +/// message to `RegionCollector`, and the result will be sent back through as +/// soon as it's finished. In fact, the channel mentioned above is actually a +/// `util::worker::Worker`. /// -/// **Caution**: Note that the information in `RegionInfoAccessor` is not perfectly precise. Some -/// regions may be temporarily absent while merging or splitting is in progress. Also, -/// `RegionInfoAccessor`'s information may slightly lag the actual regions on the TiKV. +/// **Caution**: Note that the information in `RegionInfoAccessor` is not +/// perfectly precise. Some regions may be temporarily absent while merging or +/// splitting is in progress. Also, `RegionInfoAccessor`'s information may +/// slightly lag the actual regions on the TiKV. /// `RaftStoreEvent` Represents events dispatched from raftstore coprocessor. #[derive(Debug)] @@ -81,9 +84,10 @@ impl RegionInfo { type RegionsMap = HashMap; type RegionRangesMap = BTreeMap; -// RangeKey is a wrapper used to unify the comparsion between region start key -// and region end key. Region end key is special as empty stands for the infinite, -// so we need to take special care for cases where the end key is empty. +// RangeKey is a wrapper used to unify the comparison between region start key +// and region end key. Region end key is special as empty stands for the +// infinite, so we need to take special care for cases where the end key is +// empty. #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] pub enum RangeKey { Finite(Vec), @@ -107,8 +111,8 @@ impl RangeKey { pub type Callback = Box; pub type SeekRegionCallback = Box) + Send>; -/// `RegionInfoAccessor` has its own thread. Queries and updates are done by sending commands to the -/// thread. +/// `RegionInfoAccessor` has its own thread. Queries and updates are done by +/// sending commands to the thread. pub enum RegionInfoQuery { RaftStoreEvent(RaftStoreEvent), SeekRegion { @@ -151,8 +155,8 @@ impl Display for RegionInfoQuery { } } -/// `RegionEventListener` implements observer traits. It simply send the events that we are interested in -/// through the `scheduler`. +/// `RegionEventListener` implements observer traits. It simply send the events +/// that we are interested in through the `scheduler`. #[derive(Clone)] struct RegionEventListener { scheduler: Scheduler, @@ -206,9 +210,10 @@ fn register_region_event_listener( .register_region_change_observer(1, BoxRegionChangeObserver::new(listener)); } -/// `RegionCollector` is the place where we hold all region information we collected, and the -/// underlying runner of `RegionInfoAccessor`. It listens on events sent by the `RegionEventListener` and -/// keeps information of all regions. Role of each region are also tracked. +/// `RegionCollector` is the place where we hold all region information we +/// collected, and the underlying runner of `RegionInfoAccessor`. It listens on +/// events sent by the `RegionEventListener` and keeps information of all +/// regions. Role of each region are also tracked. pub struct RegionCollector { // HashMap: region_id -> (Region, State) regions: RegionsMap, @@ -277,9 +282,10 @@ impl RegionCollector { } fn handle_create_region(&mut self, region: Region, role: StateRole) { - // During tests, we found that the `Create` event may arrive multiple times. And when we - // receive an `Update` message, the region may have been deleted for some reason. So we - // handle it according to whether the region exists in the collection. + // During tests, we found that the `Create` event may arrive multiple times. And + // when we receive an `Update` message, the region may have been deleted for + // some reason. So we handle it according to whether the region exists in the + // collection. if self.regions.contains_key(®ion.get_id()) { info!( "trying to create region but it already exists, try to update it"; @@ -324,8 +330,8 @@ impl RegionCollector { let removed_id = self.region_ranges.remove(&end_key).unwrap(); assert_eq!(removed_id, region.get_id()); } else { - // It's possible that the region is already removed because it's end_key is used by - // another newer region. + // It's possible that the region is already removed because it's end_key is used + // by another newer region. debug!( "destroying region but it doesn't exist"; "region_id" => region.get_id(), @@ -348,29 +354,33 @@ impl RegionCollector { self.create_region(region, new_role); } - /// Determines whether `region_to_check`'s epoch is stale compared to `current`'s epoch + /// Determines whether `region_to_check`'s epoch is stale compared to + /// `current`'s epoch #[inline] fn is_region_epoch_stale(&self, region_to_check: &Region, current: &Region) -> bool { let epoch = region_to_check.get_region_epoch(); let current_epoch = current.get_region_epoch(); // Only compare conf_ver when they have the same version. - // When a region A merges region B, region B may have a greater conf_ver. Then, the new - // merged region meta has larger version but smaller conf_ver than the original B's. In this - // case, the incoming region meta has a smaller conf_ver but is not stale. + // When a region A merges region B, region B may have a greater conf_ver. Then, + // the new merged region meta has larger version but smaller conf_ver than the + // original B's. In this case, the incoming region meta has a smaller conf_ver + // but is not stale. epoch.get_version() < current_epoch.get_version() || (epoch.get_version() == current_epoch.get_version() && epoch.get_conf_ver() < current_epoch.get_conf_ver()) } - /// For all regions whose range overlaps with the given `region` or region_id is the same as - /// `region`'s, checks whether the given `region`'s epoch is not older than theirs. + /// For all regions whose range overlaps with the given `region` or + /// region_id is the same as `region`'s, checks whether the given + /// `region`'s epoch is not older than theirs. /// - /// Returns false if the given `region` is stale, which means, at least one region above has - /// newer epoch. - /// If the given `region` is not stale, all other regions in the collection that overlaps with - /// the given `region` must be stale. Returns true in this case, and if `clear_regions_in_range` - /// is true, those out-of-date regions will be removed from the collection. + /// Returns false if the given `region` is stale, which means, at least one + /// region above has newer epoch. + /// If the given `region` is not stale, all other regions in the collection + /// that overlaps with the given `region` must be stale. Returns true in + /// this case, and if `clear_regions_in_range` is true, those out-of-date + /// regions will be removed from the collection. fn check_region_range(&mut self, region: &Region, clear_regions_in_range: bool) -> bool { if let Some(region_with_same_id) = self.regions.get(®ion.get_id()) { if self.is_region_epoch_stale(region, ®ion_with_same_id.region) { @@ -458,14 +468,14 @@ impl RegionCollector { let region = event.get_region(); if region.get_region_epoch().get_version() == 0 { // Ignore messages with version 0. - // In raftstore `Peer::replicate`, the region meta's fields are all initialized with - // default value except region_id. So if there is more than one region replicating - // when the TiKV just starts, the assertion "Any two region with different ids and - // overlapping ranges must have different version" fails. + // In raftstore `Peer::replicate`, the region meta's fields are all initialized + // with default value except region_id. So if there is more than one region + // replicating when the TiKV just starts, the assertion "Any two region with + // different ids and overlapping ranges must have different version" fails. // // Since 0 is actually an invalid value of version, we can simply ignore the - // messages with version 0. The region will be created later when the region's epoch - // is properly set and an Update message was sent. + // messages with version 0. The region will be created later when the region's + // epoch is properly set and an Update message was sent. return; } if !self.check_region_range(region, true) { @@ -564,7 +574,8 @@ impl RunnableWithTimer for RegionCollector { } } -/// `RegionInfoAccessor` keeps all region information separately from raftstore itself. +/// `RegionInfoAccessor` keeps all region information separately from raftstore +/// itself. #[derive(Clone)] pub struct RegionInfoAccessor { // We use a dedicated worker for region info accessor. If we later want to share a worker with @@ -578,8 +589,9 @@ pub struct RegionInfoAccessor { impl RegionInfoAccessor { /// Creates a new `RegionInfoAccessor` and register to `host`. - /// `RegionInfoAccessor` doesn't need, and should not be created more than once. If it's needed - /// in different places, just clone it, and their contents are shared. + /// `RegionInfoAccessor` doesn't need, and should not be created more than + /// once. If it's needed in different places, just clone it, and their + /// contents are shared. pub fn new(host: &mut CoprocessorHost) -> Self { let worker = WorkerBuilder::new("region-collector-worker").create(); let scheduler = worker.start_with_timer("region-collector-worker", RegionCollector::new()); @@ -605,8 +617,8 @@ impl RegionInfoAccessor { } pub trait RegionInfoProvider: Send + Sync { - /// Get a iterator of regions that contains `from` or have keys larger than `from`, and invoke - /// the callback to process the result. + /// Get a iterator of regions that contains `from` or have keys larger than + /// `from`, and invoke the callback to process the result. fn seek_region(&self, _from: &[u8], _callback: SeekRegionCallback) -> Result<()> { unimplemented!() } @@ -762,7 +774,8 @@ mod tests { } } - /// Adds a set of regions to an empty collection and check if it's successfully loaded. + /// Adds a set of regions to an empty collection and check if it's + /// successfully loaded. fn must_load_regions(c: &mut RegionCollector, regions: &[Region]) { assert!(c.regions.is_empty()); assert!(c.region_ranges.is_empty()); @@ -819,8 +832,9 @@ mod tests { .get_version(); assert!(region.get_region_epoch().get_version() < version); } - // If end_key is updated and the region_id corresponding to the `old_end_key` doesn't equals - // to `region_id`, it shouldn't be removed since it was used by another region. + // If end_key is updated and the region_id corresponding to the `old_end_key` + // doesn't equals to `region_id`, it shouldn't be removed since it was + // used by another region. if let Some(old_end_key) = old_end_key { if old_end_key.as_slice() != region.get_end_key() { assert!( @@ -849,8 +863,8 @@ mod tests { c.handle_raftstore_event(RaftStoreEvent::DestroyRegion { region }); assert!(c.regions.get(&id).is_none()); - // If the region_id corresponding to the end_key doesn't equals to `id`, it shouldn't be - // removed since it was used by another region. + // If the region_id corresponding to the end_key doesn't equals to `id`, it + // shouldn't be removed since it was used by another region. if let Some(end_key) = end_key { assert!( c.region_ranges @@ -1100,9 +1114,10 @@ mod tests { ); } - /// Simulates splitting a region into 3 regions, and the region with old id will be the - /// `derive_index`-th region of them. The events are triggered in order indicated by `seq`. - /// This is to ensure the collection is correct, no matter what the events' order to happen is. + /// Simulates splitting a region into 3 regions, and the region with old id + /// will be the `derive_index`-th region of them. The events are triggered + /// in order indicated by `seq`. This is to ensure the collection is + /// correct, no matter what the events' order to happen is. /// Values in `seq` and of `derive_index` start from 1. fn test_split_impl(derive_index: usize, seq: &[usize]) { let mut c = RegionCollector::new(); @@ -1210,15 +1225,16 @@ mod tests { ]; must_load_regions(&mut c, init_regions); - // While splitting, region 4 created but region 2 still has an `update` event which haven't - // been handled. + // While splitting, region 4 created but region 2 still has an `update` event + // which haven't been handled. must_create_region(&mut c, &new_region(4, b"k5", b"k9", 2), StateRole::Follower); must_update_region(&mut c, &new_region(2, b"k1", b"k9", 1), StateRole::Follower); must_change_role(&mut c, &new_region(2, b"k1", b"k9", 1), StateRole::Leader); must_update_region(&mut c, &new_region(2, b"k1", b"k5", 2), StateRole::Leader); - // TODO: In fact, region 2's role should be follower. However because it's previous state was - // removed while creating updating region 4, it can't be successfully updated. Fortunately - // this case may hardly happen so it can be fixed later. + // TODO: In fact, region 2's role should be follower. However because it's + // previous state was removed while creating updating region 4, it can't be + // successfully updated. Fortunately this case may hardly happen so it can be + // fixed later. check_collection( &c, &[ @@ -1229,8 +1245,9 @@ mod tests { ], ); - // While merging, region 2 expanded and covered region 4 (and their end key become the same) - // but region 4 still has an `update` event which haven't been handled. + // While merging, region 2 expanded and covered region 4 (and their end key + // become the same) but region 4 still has an `update` event which haven't been + // handled. must_update_region(&mut c, &new_region(2, b"k1", b"k9", 3), StateRole::Leader); must_update_region(&mut c, &new_region(4, b"k5", b"k9", 2), StateRole::Follower); must_change_role(&mut c, &new_region(4, b"k5", b"k9", 2), StateRole::Leader); diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 8c0d7aad86c..892a38a7f48 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -62,7 +62,8 @@ where if self.current_count > self.split_threshold && !over_limit { self.split_keys.push(keys::origin_key(key.key()).to_vec()); // if for previous on_kv() self.current_count == self.split_threshold, - // the split key would be pushed this time, but the entry for this time should not be ignored. + // the split key would be pushed this time, but the entry for this time should + // not be ignored. self.current_count = 1; over_limit = self.split_keys.len() as u64 >= self.batch_split_limit; } @@ -184,7 +185,8 @@ where REGION_KEYS_HISTOGRAM.observe(region_keys as f64); // if bucket checker using scan is added, to utilize the scan, // add keys checker as well for free - // It has the assumption that the size's checker is before the keys's check in the host + // It has the assumption that the size's checker is before the keys's check in + // the host let need_split_region = region_keys >= host.cfg.region_max_keys(); if need_split_region { info!( @@ -608,8 +610,8 @@ mod tests { let region_size = get_region_approximate_size(&engine, ®ion, ReadableSize::mb(1000).0).unwrap(); // to make the region_max_size < region_split_size + region_size - // The split by keys should still work. But if the bug in on_kv() in size.rs exists, - // it will result in split by keys failed. + // The split by keys should still work. But if the bug in on_kv() in size.rs + // exists, it will result in split by keys failed. cfg.region_max_size = Some(ReadableSize(region_size * 6 / 5)); cfg.region_split_size = ReadableSize(region_size * 4 / 5); runnable = SplitCheckRunner::new(engine, tx.clone(), CoprocessorHost::new(tx, cfg)); diff --git a/components/raftstore/src/coprocessor/split_check/mod.rs b/components/raftstore/src/coprocessor/split_check/mod.rs index 9f1cbf17eb1..3978789db91 100644 --- a/components/raftstore/src/coprocessor/split_check/mod.rs +++ b/components/raftstore/src/coprocessor/split_check/mod.rs @@ -92,8 +92,8 @@ impl<'a, E> Host<'a, E> { const MIN_BUCKET_COUNT_PER_REGION: u64 = 2; if region_size >= self.cfg.region_bucket_size.0 * MIN_BUCKET_COUNT_PER_REGION { let mut bucket_checker = size::Checker::new( - self.cfg.region_bucket_size.0, /* not used */ - self.cfg.region_bucket_size.0, /* not used */ + self.cfg.region_bucket_size.0, // not used + self.cfg.region_bucket_size.0, // not used region_size / self.cfg.region_bucket_size.0, CheckPolicy::Approximate, ); diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 352e956d43e..faff7b77c0a 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -58,7 +58,8 @@ where if self.current_size > self.split_size && !over_limit { self.split_keys.push(keys::origin_key(entry.key()).to_vec()); // if for previous on_kv() self.current_size == self.split_size, - // the split key would be pushed this time, but the entry size for this time should not be ignored. + // the split key would be pushed this time, but the entry size for this time + // should not be ignored. self.current_size = if self.current_size - size == self.split_size { size } else { @@ -615,8 +616,9 @@ pub mod tests { let cop_host = CoprocessorHost::new(tx.clone(), cfg); let mut runnable = SplitCheckRunner::new(engine.clone(), tx, cop_host.clone()); for i in 0..2000 { - // if not mvcc, kv size is (6+1)*2 = 14, given bucket size is 3000, expect each bucket has about 210 keys - // if mvcc, kv size is about 18*2 = 36, expect each bucket has about 80 keys + // if not mvcc, kv size is (6+1)*2 = 14, given bucket size is 3000, expect each + // bucket has about 210 keys if mvcc, kv size is about 18*2 = 36, expect each + // bucket has about 80 keys let s = key_gen(format!("{:04}00", i).as_bytes(), mvcc, i.into()); engine.put_cf(data_cf, &s, &s).unwrap(); if i % 10 == 0 && i > 0 { @@ -645,8 +647,9 @@ pub mod tests { // insert keys into 0000 ~ 0020 with 000000 ~ 002000 for i in 0..2000 { - // kv size is (6+1)*2 = 14, given bucket size is 3000, expect each bucket has about 210 keys - // if mvcc, kv size is about 18*2 = 36, expect each bucket has about 80 keys + // kv size is (6+1)*2 = 14, given bucket size is 3000, expect each bucket has + // about 210 keys if mvcc, kv size is about 18*2 = 36, expect each bucket has + // about 80 keys let s = key_gen(format!("{:06}", i).as_bytes(), mvcc, i.into()); engine.put_cf(data_cf, &s, &s).unwrap(); if i % 10 == 0 { diff --git a/components/raftstore/src/coprocessor/split_check/table.rs b/components/raftstore/src/coprocessor/split_check/table.rs index df2fa0fb7c6..9b5220938fd 100644 --- a/components/raftstore/src/coprocessor/split_check/table.rs +++ b/components/raftstore/src/coprocessor/split_check/table.rs @@ -26,8 +26,9 @@ where E: KvEngine, { /// Feed keys in order to find the split key. - /// If `current_data_key` does not belong to `status.first_encoded_table_prefix`. - /// it returns the encoded table prefix of `current_data_key`. + /// If `current_data_key` does not belong to + /// `status.first_encoded_table_prefix`. it returns the encoded table + /// prefix of `current_data_key`. fn on_kv(&mut self, _: &mut ObserverContext<'_>, entry: &KeyEntry) -> bool { if self.split_key.is_some() { return true; diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index c788f7c2d1e..6b652670138 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -81,7 +81,8 @@ where } } -/// WriteTask contains write tasks which need to be persisted to kv db and raft db. +/// WriteTask contains write tasks which need to be persisted to kv db and raft +/// db. pub struct WriteTask where EK: KvEngine, @@ -273,7 +274,8 @@ where } fn clear(&mut self) { - // raft_wb doesn't have clear interface and it should be consumed by raft db before + // raft_wb doesn't have clear interface and it should be consumed by raft db + // before self.kv_wb.clear(); self.raft_states.clear(); self.state_size = 0; @@ -582,11 +584,12 @@ where "error_code" => %e.error_code(), ); self.message_metrics.add(msg_type, false); - // If this msg is snapshot, it is unnecessary to send snapshot - // status to this peer because it has already become follower. - // (otherwise the snapshot msg should be sent in store thread other than here) - // Also, the follower don't need flow control, so don't send - // unreachable msg here. + // If this msg is snapshot, it is unnecessary to send + // snapshot status to this peer because it has already + // become follower. (otherwise the snapshot msg should be + // sent in store thread other than here) Also, the follower + // don't need flow control, so don't send unreachable msg + // here. } else { self.message_metrics.add(msg_type, true); } diff --git a/components/raftstore/src/store/async_io/write_router.rs b/components/raftstore/src/store/async_io/write_router.rs index 384273a97ad..6b19212c164 100644 --- a/components/raftstore/src/store/async_io/write_router.rs +++ b/components/raftstore/src/store/async_io/write_router.rs @@ -90,7 +90,8 @@ where } } - /// Send write msg to write worker or push into inner buffer and wait for rescheduling. + /// Send write msg to write worker or push into inner buffer and wait for + /// rescheduling. pub fn send_write_msg>( &mut self, ctx: &mut C, @@ -105,9 +106,9 @@ where } } - /// If there is some msgs need to be rescheduled, check the new persisted number and - /// sending these msgs to a new write worker if persisted number is greater than - /// `self.last_unpersisted`. + /// If there is some msgs need to be rescheduled, check the new persisted + /// number and sending these msgs to a new write worker if persisted + /// number is greater than `self.last_unpersisted`. pub fn check_new_persisted>( &mut self, ctx: &mut C, @@ -117,7 +118,8 @@ where return; } // The peer must be destroyed after all previous write tasks have been finished. - // So do not worry about a destroyed peer being counted in `io_reschedule_concurrent_count`. + // So do not worry about a destroyed peer being counted in + // `io_reschedule_concurrent_count`. ctx.io_reschedule_concurrent_count() .fetch_sub(1, Ordering::SeqCst); @@ -144,10 +146,12 @@ where } } - /// Check if write task can be sent to write worker or pushed into `self.pending_write_msgs`. + /// Check if write task can be sent to write worker or pushed into + /// `self.pending_write_msgs`. /// - /// Returns false if the task should be pushed into `self.pending_write_msgs`. - /// true means the task should be sent to the write worker. + /// Returns false if the task should be pushed into + /// `self.pending_write_msgs`. true means the task should be sent to the + /// write worker. fn should_send>( &mut self, ctx: &mut C, @@ -180,7 +184,8 @@ where } if self.next_writer_id.is_none() { // The hot write peers should not be rescheduled entirely. - // So it will not be rescheduled if the random id is the same as the original one. + // So it will not be rescheduled if the random id is the same as the original + // one. let new_id = rand::random::() % ctx.config().store_io_pool_size; if new_id == self.writer_id { // Reset the time @@ -191,8 +196,9 @@ where } // This peer should be rescheduled. // Try to add 1 to `io_reschedule_concurrent_count`. - // The `cfg.io_reschedule_concurrent_max_count` is used for controlling the concurrent count - // of rescheduling peer fsm because rescheduling will introduce performance penalty. + // The `cfg.io_reschedule_concurrent_max_count` is used for controlling the + // concurrent count of rescheduling peer fsm because rescheduling will + // introduce performance penalty. let success = ctx .io_reschedule_concurrent_count() .fetch_update(Ordering::SeqCst, Ordering::Relaxed, |c| { @@ -205,7 +211,8 @@ where .is_ok(); if success { STORE_IO_RESCHEDULE_PEER_TOTAL_GAUGE.inc(); - // Rescheduling succeeds. The task should be pushed into `self.pending_write_msgs`. + // Rescheduling succeeds. The task should be pushed into + // `self.pending_write_msgs`. self.last_unpersisted = last_unpersisted; info!("starts io reschedule"; "tag" => &self.tag); false diff --git a/components/raftstore/src/store/bootstrap.rs b/components/raftstore/src/store/bootstrap.rs index e1c90a177c7..1ee8e9ddc10 100644 --- a/components/raftstore/src/store/bootstrap.rs +++ b/components/raftstore/src/store/bootstrap.rs @@ -44,8 +44,8 @@ fn is_range_empty( // Bootstrap the store, the DB for this store must be empty and has no data. // -// FIXME: ER typaram should just be impl KvEngine, but RaftEngine doesn't support -// the `is_range_empty` query yet. +// FIXME: ER typaram should just be impl KvEngine, but RaftEngine doesn't +// support the `is_range_empty` query yet. pub fn bootstrap_store( engines: &Engines, cluster_id: u64, diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index 4fb4c7feb7a..1aee90b6463 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -47,8 +47,8 @@ impl CompactionGuardGeneratorFactory

{ } } -// Update to implement engine_traits::SstPartitionerFactory instead once we move to use abstracted -// ColumnFamilyOptions in src/config.rs. +// Update to implement engine_traits::SstPartitionerFactory instead once we move +// to use abstracted ColumnFamilyOptions in src/config.rs. impl SstPartitionerFactory for CompactionGuardGeneratorFactory

{ @@ -59,9 +59,9 @@ impl SstPartitionerFactory } fn create_partitioner(&self, context: &SstPartitionerContext<'_>) -> Option { - // create_partitioner can be called in RocksDB while holding db_mutex. It can block - // other operations on RocksDB. To avoid such caces, we defer region info query to - // the first time should_partition is called. + // create_partitioner can be called in RocksDB while holding db_mutex. It can + // block other operations on RocksDB. To avoid such cases, we defer + // region info query to the first time should_partition is called. Some(CompactionGuardGenerator { cf_name: self.cf_name, smallest_key: context.smallest_key.to_vec(), @@ -383,8 +383,8 @@ mod tests { DBCompressionType::No, DBCompressionType::No, ]); - // Make block size small to make sure current_output_file_size passed to SstPartitioner - // is accurate. + // Make block size small to make sure current_output_file_size passed to + // SstPartitioner is accurate. let mut block_based_opts = BlockBasedOptions::new(); block_based_opts.set_block_size(100); cf_opts.set_block_based_table_factory(&block_based_opts); @@ -437,26 +437,26 @@ mod tests { assert_eq!(b"z", DATA_PREFIX_KEY); // Create two overlapping SST files then force compaction. - // Region "a" will share a SST file with region "b", since region "a" is too small. - // Region "c" will be splitted into two SSTs, since its size is larger than - // target_file_size_base. + // Region "a" will share a SST file with region "b", since region "a" is too + // small. Region "c" will be splitted into two SSTs, since its size is + // larger than target_file_size_base. let value = vec![b'v'; 1024]; db.put(b"za1", b"").unwrap(); db.put(b"zb1", &value).unwrap(); db.put(b"zc1", &value).unwrap(); - db.flush(true /*sync*/).unwrap(); + db.flush(true /* sync */).unwrap(); db.put(b"zb2", &value).unwrap(); db.put(b"zc2", &value).unwrap(); db.put(b"zc3", &value).unwrap(); db.put(b"zc4", &value).unwrap(); db.put(b"zc5", &value).unwrap(); db.put(b"zc6", &value).unwrap(); - db.flush(true /*sync*/).unwrap(); + db.flush(true /* sync */).unwrap(); db.compact_range( - CF_DEFAULT, None, /*start_key*/ - None, /*end_key*/ - false, /*exclusive_manual*/ - 1, /*max_subcompactions*/ + CF_DEFAULT, None, // start_key + None, // end_key + false, // exclusive_manual + 1, // max_subcompactions ) .unwrap(); diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index fdd47d6c2ae..5d7d89bbc7b 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -37,7 +37,8 @@ with_prefix!(prefix_store "store-"); #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct Config { - // minimizes disruption when a partitioned node rejoins the cluster by using a two phase election. + // minimizes disruption when a partitioned node rejoins the cluster by using a two phase + // election. #[online_config(skip)] pub prevote: bool, #[online_config(skip)] @@ -120,12 +121,13 @@ pub struct Config { /// the peer is considered to be down and is reported to PD. pub max_peer_down_duration: ReadableDuration, - /// If the leader of a peer is missing for longer than max_leader_missing_duration, - /// the peer would ask pd to confirm whether it is valid in any region. - /// If the peer is stale and is not valid in any region, it will destroy itself. + /// If the leader of a peer is missing for longer than + /// max_leader_missing_duration, the peer would ask pd to confirm + /// whether it is valid in any region. If the peer is stale and is not + /// valid in any region, it will destroy itself. pub max_leader_missing_duration: ReadableDuration, - /// Similar to the max_leader_missing_duration, instead it will log warnings and - /// try to alert monitoring systems, if there is any. + /// Similar to the max_leader_missing_duration, instead it will log warnings + /// and try to alert monitoring systems, if there is any. pub abnormal_leader_missing_duration: ReadableDuration, pub peer_stale_state_check_interval: ReadableDuration, @@ -156,11 +158,11 @@ pub struct Config { #[online_config(hidden)] pub right_derive_when_split: bool, - /// This setting can only ensure conf remove will not be proposed by the peer - /// being removed. But it can't guarantee the remove is applied when the target - /// is not leader. That means we always need to check if it's working as expected - /// when a leader applies a self-remove conf change. Keep the configuration only - /// for convenient test. + /// This setting can only ensure conf remove will not be proposed by the + /// peer being removed. But it can't guarantee the remove is applied + /// when the target is not leader. That means we always need to check if + /// it's working as expected when a leader applies a self-remove conf + /// change. Keep the configuration only for convenient test. #[cfg(any(test, feature = "testexport"))] pub allow_remove_leader: bool, @@ -213,9 +215,10 @@ pub struct Config { #[doc(hidden)] #[online_config(skip)] /// Disable this feature by set to 0, logic will be removed in other pr. - /// When TiKV memory usage reaches `memory_usage_high_water` it will try to limit memory - /// increasing. For raftstore layer entries will be evicted from entry cache, if they - /// utilize memory more than `evict_cache_on_memory_ratio` * total. + /// When TiKV memory usage reaches `memory_usage_high_water` it will try to + /// limit memory increasing. For raftstore layer entries will be evicted + /// from entry cache, if they utilize memory more than + /// `evict_cache_on_memory_ratio` * total. /// /// Set it to 0 can disable cache evict. // By default it's 0.2. So for different system memory capacity, cache evict happens: @@ -226,13 +229,14 @@ pub struct Config { pub cmd_batch: bool, - /// When the count of concurrent ready exceeds this value, command will not be proposed - /// until the previous ready has been persisted. + /// When the count of concurrent ready exceeds this value, command will not + /// be proposed until the previous ready has been persisted. /// If `cmd_batch` is 0, this config will have no effect. /// If it is 0, it means no limit. pub cmd_batch_concurrent_ready_max_count: usize, - /// When the size of raft db writebatch exceeds this value, write will be triggered. + /// When the size of raft db writebatch exceeds this value, write will be + /// triggered. pub raft_write_size_limit: ReadableSize, pub waterfall_metrics: bool, @@ -256,7 +260,8 @@ pub struct Config { #[serde(skip_serializing)] #[online_config(skip)] pub region_split_size: ReadableSize, - // Deprecated! The time to clean stale peer safely can be decided based on RocksDB snapshot sequence number. + // Deprecated! The time to clean stale peer safely can be decided based on RocksDB snapshot + // sequence number. #[doc(hidden)] #[serde(skip_serializing)] #[online_config(skip)] @@ -268,8 +273,8 @@ pub struct Config { // Interval to report min resolved ts, if it is zero, it means disabled. pub report_min_resolved_ts_interval: ReadableDuration, - /// Interval to check whether to reactivate in-memory pessimistic lock after being disabled - /// before transferring leader. + /// Interval to check whether to reactivate in-memory pessimistic lock after + /// being disabled before transferring leader. pub reactive_memory_lock_tick_interval: ReadableDuration, /// Max tick count before reactivating in-memory pessimistic lock. pub reactive_memory_lock_timeout_tick: usize, @@ -460,8 +465,8 @@ impl Config { )); } - // The adjustment of this value is related to the number of regions, usually 16384 is - // already a large enough value + // The adjustment of this value is related to the number of regions, usually + // 16384 is already a large enough value if self.raft_max_inflight_msgs == 0 || self.raft_max_inflight_msgs > 16384 { return Err(box_err!( "raft max inflight msgs should be greater than 0 and less than or equal to 16384" diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 4f751a35b17..03054cfcc16 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1,7 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -//! This module contains the implementation of the `EntryStorage`, which covers a subset of -//! raft storage. This module will be shared between raftstore v1 and v2. +//! This module contains the implementation of the `EntryStorage`, which covers +//! a subset of raft storage. This module will be shared between raftstore v1 +//! and v2. use std::{ cell::{Cell, RefCell}, @@ -60,7 +61,8 @@ impl CachedEntries { } } - /// Take cached entries and dangle size for them. `dangle` means not in entry cache. + /// Take cached entries and dangle size for them. `dangle` means not in + /// entry cache. pub fn take_entries(&self) -> (Vec, usize) { mem::take(&mut *self.entries.lock().unwrap()) } @@ -119,8 +121,8 @@ impl EntryCache { } }) .count(); - // Cache either is empty or contains latest log. Hence we don't need to fetch log - // from rocksdb anymore. + // Cache either is empty or contains latest log. Hence we don't need to fetch + // log from rocksdb anymore. assert!(end_idx == limit_idx || fetched_size > max_size); let (first, second) = tikv_util::slices_in_range(&self.cache, start_idx, end_idx); ents.extend_from_slice(first); @@ -172,10 +174,10 @@ impl EntryCache { self.cache.push_back(e.to_owned()); mem_size_change += (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64; } - // In the past, the entry cache will be truncated if its size exceeds a certain number. - // However, after introducing async write io, the entry must stay in cache if it's not - // persisted to raft db because the raft-rs may need to read entries.(e.g. leader sends - // MsgAppend to followers) + // In the past, the entry cache will be truncated if its size exceeds a certain + // number. However, after introducing async write io, the entry must stay in + // cache if it's not persisted to raft db because the raft-rs may need to read + // entries.(e.g. leader sends MsgAppend to followers) mem_size_change } @@ -198,9 +200,9 @@ impl EntryCache { let mut mem_size_change = 0; - // Clean cached entries which have been already sent to apply threads. For example, - // if entries [1, 10), [10, 20), [20, 30) are sent to apply threads and `compact_to(15)` - // is called, only [20, 30) will still be kept in cache. + // Clean cached entries which have been already sent to apply threads. For + // example, if entries [1, 10), [10, 20), [20, 30) are sent to apply threads and + // `compact_to(15)` is called, only [20, 30) will still be kept in cache. let old_trace_cap = self.trace.capacity(); while let Some(cached_entries) = self.trace.pop_front() { if cached_entries.range.start >= idx { @@ -227,7 +229,8 @@ impl EntryCache { } let cache_last_idx = self.cache.back().unwrap().get_index(); - // Use `cache_last_idx + 1` to make sure cache can be cleared completely if necessary. + // Use `cache_last_idx + 1` to make sure cache can be cleared completely if + // necessary. let compact_to = (cmp::min(cache_last_idx + 1, idx) - cache_first_idx) as usize; for e in self.cache.drain(..compact_to) { mem_size_change -= (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64 @@ -564,7 +567,8 @@ impl EntryStorage { return Ok(count); } - // the count of left entries isn't too large, fetch the remaining entries synchronously one by one + // the count of left entries isn't too large, fetch the remaining entries + // synchronously one by one for idx in last + 1..high { let ent = self.raft_engine.get_entry(region_id, idx)?; match ent { @@ -597,7 +601,8 @@ impl EntryStorage { "max_size" => max_size, "res_max_size" => res.max_size, ); - // low index or max size is changed, the result is not fit for the current range, so refetch again. + // low index or max size is changed, the result is not fit for the current + // range, so refetch again. self.raftlog_fetch_stats.fetch_invalid.update(|m| m + 1); res.tried_cnt + 1 } else { @@ -606,7 +611,8 @@ impl EntryStorage { // the first/second try: get [low, high) asynchronously // the third try: - // - if term and low are matched: use result of [low, persisted) and get [persisted, high) synchronously + // - if term and low are matched: use result of [low, persisted) and get + // [persisted, high) synchronously // - else: get [low, high) synchronously if tried_cnt >= MAX_ASYNC_FETCH_TRY_CNT { // even the larger range is invalid again, fallback to fetch in sync way @@ -807,7 +813,8 @@ impl EntryStorage { self.apply_state.get_truncated_state().get_term() } - // Append the given entries to the raft log using previous last index or self.last_index. + // Append the given entries to the raft log using previous last index or + // self.last_index. pub fn append(&mut self, entries: Vec, task: &mut WriteTask) { if entries.is_empty() { return; diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index ab73c0bc8c6..284015b0eb8 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -395,14 +395,17 @@ where store_id: u64, /// region_id -> (peer_id, is_splitting) /// Used for handling race between splitting and creating new peer. - /// An uninitialized peer can be replaced to the one from splitting iff they are exactly the same peer. + /// An uninitialized peer can be replaced to the one from splitting iff they + /// are exactly the same peer. pending_create_peers: Arc>>, - /// We must delete the ingested file before calling `callback` so that any ingest-request reaching this - /// peer could see this update if leader had changed. We must also delete them after the applied-index - /// has been persisted to kvdb because this entry may replay because of panic or power-off, which - /// happened before `WriteBatch::write` and after `SstImporter::delete`. We shall make sure that - /// this entry will never apply again at first, then we can delete the ssts files. + /// We must delete the ingested file before calling `callback` so that any + /// ingest-request reaching this peer could see this update if leader + /// had changed. We must also delete them after the applied-index + /// has been persisted to kvdb because this entry may replay because of + /// panic or power-off, which happened before `WriteBatch::write` and + /// after `SstImporter::delete`. We shall make sure that this entry will + /// never apply again at first, then we can delete the ssts files. delete_ssts: Vec, /// The priority of this Handler. @@ -484,10 +487,11 @@ where .push_batch(&delegate.observe_info, delegate.region.get_id()); } - /// Commits all changes have done for delegate. `persistent` indicates whether - /// write the changes into rocksdb. + /// Commits all changes have done for delegate. `persistent` indicates + /// whether write the changes into rocksdb. /// - /// This call is valid only when it's between a `prepare_for` and `finish_for`. + /// This call is valid only when it's between a `prepare_for` and + /// `finish_for`. pub fn commit(&mut self, delegate: &mut ApplyDelegate) { if delegate.last_flush_applied_index < delegate.apply_state.get_applied_index() { delegate.write_apply_state(self.kv_wb_mut()); @@ -547,7 +551,8 @@ where // Control the memory usage for the WriteBatch. self.kv_wb = self.engine.write_batch_with_cap(DEFAULT_APPLY_WB_SIZE); } else { - // Clear data, reuse the WriteBatch, this can reduce memory allocations and deallocations. + // Clear data, reuse the WriteBatch, this can reduce memory allocations and + // deallocations. self.kv_wb_mut().clear(); } self.kv_wb_last_bytes = 0; @@ -567,7 +572,8 @@ where batch_max_level, mut cb_batch, } = mem::replace(&mut self.applied_batch, ApplyCallbackBatch::new()); - // Call it before invoking callback for preventing Commit is executed before Prewrite is observed. + // Call it before invoking callback for preventing Commit is executed before + // Prewrite is observed. self.host .on_flush_applied_cmd_batch(batch_max_level, cmd_batch, &self.engine); // Invoke callbacks @@ -750,9 +756,9 @@ fn has_high_latency_operation(cmd: &RaftCmdRequest) -> bool { fn should_sync_log(cmd: &RaftCmdRequest) -> bool { if cmd.has_admin_request() { if cmd.get_admin_request().get_cmd_type() == AdminCmdType::CompactLog { - // We do not need to sync WAL before compact log, because this request will send a msg to - // raft_gc_log thread to delete the entries before this index instead of deleting them in - // apply thread directly. + // We do not need to sync WAL before compact log, because this request will send + // a msg to raft_gc_log thread to delete the entries before this + // index instead of deleting them in apply thread directly. return false; } return true; @@ -780,9 +786,9 @@ fn should_sync_log(cmd: &RaftCmdRequest) -> bool { /// this struct. /// TODO: check whether generator/coroutine is a good choice in this case. struct WaitSourceMergeState { - /// A flag that indicates whether the source peer has applied to the required - /// index. If the source peer is ready, this flag should be set to the region id - /// of source peer. + /// A flag that indicates whether the source peer has applied to the + /// required index. If the source peer is ready, this flag should be set + /// to the region id of source peer. logs_up_to_date: Arc, } @@ -859,12 +865,14 @@ where tag: String, /// If the delegate should be stopped from polling. - /// A delegate can be stopped in conf change, merge or requested by destroy message. + /// A delegate can be stopped in conf change, merge or requested by destroy + /// message. stopped: bool, /// The start time of the current round to execute commands. handle_start: Option, - /// Set to true when removing itself because of `ConfChangeType::RemoveNode`, and then - /// any following committed logs in same Ready should be applied failed. + /// Set to true when removing itself because of + /// `ConfChangeType::RemoveNode`, and then any following committed logs + /// in same Ready should be applied failed. pending_remove: bool, /// The commands waiting to be committed and applied @@ -872,22 +880,25 @@ where /// The counter of pending request snapshots. See more in `Peer`. pending_request_snapshot_count: Arc, - /// Indicates the peer is in merging, if that compact log won't be performed. + /// Indicates the peer is in merging, if that compact log won't be + /// performed. is_merging: bool, /// Records the epoch version after the last merge. last_merge_version: u64, yield_state: Option>, - /// A temporary state that keeps track of the progress of the source peer state when - /// CommitMerge is unable to be executed. + /// A temporary state that keeps track of the progress of the source peer + /// state when CommitMerge is unable to be executed. wait_merge_state: Option, // ID of last region that reports ready. ready_source_region_id: u64, - /// TiKV writes apply_state to KV RocksDB, in one write batch together with kv data. + /// TiKV writes apply_state to KV RocksDB, in one write batch together with + /// kv data. /// - /// If we write it to Raft RocksDB, apply_state and kv data (Put, Delete) are in - /// separate WAL file. When power failure, for current raft log, apply_index may synced - /// to file, but KV data may not synced to file, so we will lose data. + /// If we write it to Raft RocksDB, apply_state and kv data (Put, Delete) + /// are in separate WAL file. When power failure, for current raft log, + /// apply_index may synced to file, but KV data may not synced to file, + /// so we will lose data. apply_state: RaftApplyState, /// The term of the raft log at applied index. applied_term: u64, @@ -900,8 +911,9 @@ where /// The local metrics, and it will be flushed periodically. metrics: ApplyMetrics, - /// Priority in batch system. When applying some commands which have high latency, - /// we decrease the priority of current fsm to reduce the impact on other normal commands. + /// Priority in batch system. When applying some commands which have high + /// latency, we decrease the priority of current fsm to reduce the + /// impact on other normal commands. priority: Priority, /// To fetch Raft entries for applying if necessary. @@ -954,7 +966,8 @@ where self.id } - /// Handles all the committed_entries, namely, applies the committed entries. + /// Handles all the committed_entries, namely, applies the committed + /// entries. fn handle_raft_committed_entries( &mut self, apply_ctx: &mut ApplyContext, @@ -964,9 +977,9 @@ where return; } apply_ctx.prepare_for(self); - // If we send multiple ConfChange commands, only first one will be proposed correctly, - // others will be saved as a normal entry with no data, so we must re-propose these - // commands again. + // If we send multiple ConfChange commands, only first one will be proposed + // correctly, others will be saved as a normal entry with no data, so we + // must re-propose these commands again. apply_ctx.committed_count += committed_entries_drainer.len(); let mut results = VecDeque::new(); while let Some(entry) = committed_entries_drainer.next() { @@ -986,9 +999,10 @@ where ); } - // NOTE: before v5.0, `EntryType::EntryConfChangeV2` entry is handled by `unimplemented!()`, - // which can break compatibility (i.e. old version tikv running on data written by new version tikv), - // but PD will reject old version tikv join the cluster, so this should not happen. + // NOTE: before v5.0, `EntryType::EntryConfChangeV2` entry is handled by + // `unimplemented!()`, which can break compatibility (i.e. old version tikv + // running on data written by new version tikv), but PD will reject old version + // tikv join the cluster, so this should not happen. let res = match entry.get_entry_type() { EntryType::EntryNormal => self.handle_raft_entry_normal(apply_ctx, &entry), EntryType::EntryConfChange | EntryType::EntryConfChangeV2 => { @@ -1238,11 +1252,13 @@ where /// Applies raft command. /// /// An apply operation can fail in the following situations: - /// 1. it encounters an error that will occur on all stores, it can continue - /// applying next entry safely, like epoch not match for example; - /// 2. it encounters an error that may not occur on all stores, in this case - /// we should try to apply the entry again or panic. Considering that this - /// usually due to disk operation fail, which is rare, so just panic is ok. + /// - it encounters an error that will occur on all stores, it can + /// continue applying next entry safely, like epoch not match for + /// example; + /// - it encounters an error that may not occur on all stores, in this + /// case we should try to apply the entry again or panic. Considering + /// that this usually due to disk operation fail, which is rare, so just + /// panic is ok. fn apply_raft_cmd( &mut self, ctx: &mut ApplyContext, @@ -1359,7 +1375,8 @@ where if let Some(epoch) = origin_epoch { let cmd_type = req.get_admin_request().get_cmd_type(); let epoch_state = admin_cmd_epoch_lookup(cmd_type); - // The change-epoch behavior **MUST BE** equal to the settings in `admin_cmd_epoch_lookup` + // The change-epoch behavior **MUST BE** equal to the settings in + // `admin_cmd_epoch_lookup` if (epoch_state.change_ver && epoch.get_version() == self.region.get_region_epoch().get_version()) || (epoch_state.change_conf_ver @@ -1619,7 +1636,8 @@ where keys::data_key_with_buffer(key, &mut ctx.key_buffer); let key = ctx.key_buffer.as_slice(); - // since size_diff_hint is not accurate, so we just skip calculate the value size. + // since size_diff_hint is not accurate, so we just skip calculate the value + // size. self.metrics.size_diff_hint -= key.len() as i64; if !req.get_delete().get_cf().is_empty() { let cf = req.get_delete().get_cf(); @@ -2236,9 +2254,9 @@ where .mut_splits() .set_right_derive(split.get_right_derive()); admin_req.mut_splits().mut_requests().push(split); - // This method is executed only when there are unapplied entries after being restarted. - // So there will be no callback, it's OK to return a response that does not matched - // with its request. + // This method is executed only when there are unapplied entries after being + // restarted. So there will be no callback, it's OK to return a response + // that does not matched with its request. self.exec_batch_split(ctx, &admin_req) } @@ -2301,10 +2319,12 @@ where // Note that the split requests only contain ids for new regions, so we need // to handle new regions and old region separately. if right_derive { - // So the range of new regions is [old_start_key, split_key1, ..., last_split_key]. + // So the range of new regions is [old_start_key, split_key1, ..., + // last_split_key]. keys.push_front(derived.get_start_key().to_vec()); } else { - // So the range of new regions is [split_key1, ..., last_split_key, old_end_key]. + // So the range of new regions is [split_key1, ..., last_split_key, + // old_end_key]. keys.push_back(derived.get_end_key().to_vec()); derived.set_end_key(keys.front().unwrap().to_vec()); regions.push(derived.clone()); @@ -2520,15 +2540,20 @@ where // The target peer should send missing log entries to the source peer. // // So, the merge process order would be: - // 1. `exec_commit_merge` in target apply fsm and send `CatchUpLogs` to source peer fsm - // 2. `on_catch_up_logs_for_merge` in source peer fsm - // 3. if the source peer has already executed the corresponding `on_ready_prepare_merge`, set pending_remove and jump to step 6 - // 4. ... (raft append and apply logs) - // 5. `on_ready_prepare_merge` in source peer fsm and set pending_remove (means source region has finished applying all logs) - // 6. `logs_up_to_date_for_merge` in source apply fsm (destroy its apply fsm and send Noop to trigger the target apply fsm) - // 7. resume `exec_commit_merge` in target apply fsm - // 8. `on_ready_commit_merge` in target peer fsm and send `MergeResult` to source peer fsm - // 9. `on_merge_result` in source peer fsm (destroy itself) + // - `exec_commit_merge` in target apply fsm and send `CatchUpLogs` to source + // peer fsm + // - `on_catch_up_logs_for_merge` in source peer fsm + // - if the source peer has already executed the corresponding + // `on_ready_prepare_merge`, set pending_remove and jump to step 6 + // - ... (raft append and apply logs) + // - `on_ready_prepare_merge` in source peer fsm and set pending_remove (means + // source region has finished applying all logs) + // - `logs_up_to_date_for_merge` in source apply fsm (destroy its apply fsm and + // send Noop to trigger the target apply fsm) + // - resume `exec_commit_merge` in target apply fsm + // - `on_ready_commit_merge` in target peer fsm and send `MergeResult` to source + // peer fsm + // - `on_merge_result` in source peer fsm (destroy itself) fn exec_commit_merge( &mut self, ctx: &mut ApplyContext, @@ -3043,7 +3068,8 @@ where pub index: u64, pub term: u64, pub cb: Callback, - /// `propose_time` is set to the last time when a peer starts to renew lease. + /// `propose_time` is set to the last time when a peer starts to renew + /// lease. pub propose_time: Option, pub must_pass_epoch_check: bool, } @@ -3055,8 +3081,8 @@ pub struct Destroy { merge_from_snapshot: bool, } -/// A message that asks the delegate to apply to the given logs and then reply to -/// target mailbox. +/// A message that asks the delegate to apply to the given logs and then reply +/// to target mailbox. #[derive(Default, Debug)] pub struct CatchUpLogs { /// The target region to be notified when given logs are applied. @@ -3337,7 +3363,8 @@ where ) } - /// Handles peer registration. When a peer is created, it will register an apply delegate. + /// Handles peer registration. When a peer is created, it will register an + /// apply delegate. fn handle_registration(&mut self, reg: Registration) { info!( "re-register to apply delegates"; @@ -3351,7 +3378,8 @@ where self.delegate = ApplyDelegate::from_registration(reg); } - /// Handles apply tasks, and uses the apply delegate to handle the committed entries. + /// Handles apply tasks, and uses the apply delegate to handle the committed + /// entries. fn handle_apply(&mut self, apply_ctx: &mut ApplyContext, mut apply: Apply) { if apply_ctx.timer.is_none() { apply_ctx.timer = Some(Instant::now_coarse()); @@ -3474,7 +3502,8 @@ where self.delegate.destroy(ctx); } - /// Handles peer destroy. When a peer is destroyed, the corresponding apply delegate should be removed too. + /// Handles peer destroy. When a peer is destroyed, the corresponding apply + /// delegate should be removed too. fn handle_destroy(&mut self, ctx: &mut ApplyContext, d: Destroy) { assert_eq!(d.region_id, self.delegate.region_id()); if d.merge_from_snapshot { @@ -3545,8 +3574,9 @@ where "region_id" => region_id, "peer_id" => self.delegate.id(), ); - // The source peer fsm will be destroyed when the target peer executes `on_ready_commit_merge` - // and sends `merge result` to the source peer fsm. + // The source peer fsm will be destroyed when the target peer executes + // `on_ready_commit_merge` and sends `merge result` to the source peer + // fsm. self.destroy(ctx); catch_up_logs .logs_up_to_date @@ -3650,12 +3680,13 @@ where let resp = match compare_region_epoch( ®ion_epoch, &self.delegate.region, - false, /* check_conf_ver */ - true, /* check_ver */ - true, /* include_region */ + false, // check_conf_ver + true, // check_ver + true, // include_region ) { Ok(()) => { - // Commit the writebatch for ensuring the following snapshot can get all previous writes. + // Commit the writebatch for ensuring the following snapshot can get all + // previous writes. if apply_ctx.kv_wb().count() > 0 { apply_ctx.commit(&mut self.delegate); } @@ -4266,8 +4297,8 @@ mod memtrace { S: Snapshot, { fn heap_size(&self) -> usize { - // Some fields of `PendingCmd` are on stack, but ignore them because they are just - // some small boxed closures. + // Some fields of `PendingCmd` are on stack, but ignore them because they are + // just some small boxed closures. self.normals.capacity() * mem::size_of::>() } } @@ -4728,7 +4759,8 @@ mod tests { assert_eq!(apply_res.apply_state, apply_state); assert_eq!(apply_res.apply_state.get_applied_index(), 5); assert!(apply_res.exec_res.is_empty()); - // empty entry will make applied_index step forward and should write apply state to engine. + // empty entry will make applied_index step forward and should write apply state + // to engine. assert_eq!(apply_res.metrics.written_keys, 1); assert_eq!(apply_res.applied_term, 5); validate(&router, 2, |delegate| { @@ -5335,7 +5367,8 @@ mod tests { capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); } let index = write_batch_max_keys + 11; - // The region was rescheduled to normal-priority handler. Discard the first apply_res. + // The region was rescheduled to normal-priority handler. Discard the first + // apply_res. fetch_apply_res(&rx); let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.apply_state.get_applied_index(), index as u64); @@ -5391,9 +5424,10 @@ mod tests { reg.region.mut_region_epoch().set_version(3); router.schedule_task(1, Msg::Registration(reg)); - // Test whether put commands and ingest commands are applied to engine in a correct order. - // We will generate 5 entries which are put, ingest, put, ingest, put respectively. For a same key, - // it can exist in multiple entries or in a single entries. We will test all all the possible + // Test whether put commands and ingest commands are applied to engine in a + // correct order. We will generate 5 entries which are put, ingest, put, + // ingest, put respectively. For a same key, it can exist in multiple + // entries or in a single entries. We will test all all the possible // keys exsiting combinations. let mut keys = Vec::new(); let keys_count = 1 << 5; @@ -5510,8 +5544,8 @@ mod tests { assert!(!resp.get_header().has_error(), "{:?}", resp); } let mut res = fetch_apply_res(&rx); - // There may be one or two ApplyRes which depends on whether these two apply msgs - // are batched together. + // There may be one or two ApplyRes which depends on whether these two apply + // msgs are batched together. if res.apply_state.get_applied_index() == 3 { res = fetch_apply_res(&rx); } @@ -6276,7 +6310,8 @@ mod tests { let res = panic_hook::recover_safe(|| { let _cmd = PendingCmd::::new(1, 1, Callback::None); panic!("Don't abort"); - // It would abort and fail if there was a double-panic in PendingCmd dtor. + // It would abort and fail if there was a double-panic in PendingCmd + // dtor. }); res.unwrap_err(); } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index baccd071690..8d5369aaefa 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -113,7 +113,8 @@ enum DelayReason { /// Limits the maximum number of regions returned by error. /// -/// Another choice is using coprocessor batch limit, but 10 should be a good fit in most case. +/// Another choice is using coprocessor batch limit, but 10 should be a good fit +/// in most case. const MAX_REGIONS_IN_ERROR: usize = 10; const REGION_SPLIT_SKIP_MAX_COUNT: usize = 3; @@ -129,13 +130,15 @@ where ER: RaftEngine, { pub peer: Peer, - /// A registry for all scheduled ticks. This can avoid scheduling ticks twice accidentally. + /// A registry for all scheduled ticks. This can avoid scheduling ticks + /// twice accidentally. tick_registry: [bool; PeerTick::VARIANT_COUNT], /// Ticks for speed up campaign in chaos state. /// - /// Followers will keep ticking in Idle mode to measure how many ticks have been skipped. - /// Once it becomes chaos, those skipped ticks will be ticked so that it can campaign - /// quickly instead of waiting an election timeout. + /// Followers will keep ticking in Idle mode to measure how many ticks have + /// been skipped. Once it becomes chaos, those skipped ticks will be + /// ticked so that it can campaign quickly instead of waiting an + /// election timeout. /// /// This will be reset to 0 once it receives any messages from leader. missing_ticks: usize, @@ -144,11 +147,12 @@ where has_ready: bool, mailbox: Option>>, pub receiver: Receiver>, - /// when snapshot is generating or sending, skip split check at most REGION_SPLIT_SKIT_MAX_COUNT times. + /// when snapshot is generating or sending, skip split check at most + /// REGION_SPLIT_SKIT_MAX_COUNT times. skip_split_count: usize, - /// Sometimes applied raft logs won't be compacted in time, because less compact means less - /// sync-log in apply threads. Stale logs will be deleted if the skip time reaches this - /// `skip_gc_raft_log_ticks`. + /// Sometimes applied raft logs won't be compacted in time, because less + /// compact means less sync-log in apply threads. Stale logs will be + /// deleted if the skip time reaches this `skip_gc_raft_log_ticks`. skip_gc_raft_log_ticks: usize, reactivate_memory_lock_ticks: usize, @@ -160,8 +164,8 @@ where /// Destroy is delayed because of some unpersisted readies in Peer. /// Should call `destroy_peer` again after persisting all readies. delayed_destroy: Option, - /// Before actually destroying a peer, ensure all log gc tasks are finished, so we - /// can start destroying without seeking. + /// Before actually destroying a peer, ensure all log gc tasks are finished, + /// so we can start destroying without seeking. logs_gc_flushed: bool, } @@ -285,9 +289,9 @@ where )) } - // The peer can be created from another node with raft membership changes, and we only - // know the region_id and peer_id when creating this replicated peer, the region info - // will be retrieved later after applying snapshot. + // The peer can be created from another node with raft membership changes, and + // we only know the region_id and peer_id when creating this replicated peer, + // the region info will be retrieved later after applying snapshot. pub fn replicate( store_id: u64, cfg: &Config, @@ -458,8 +462,8 @@ where fn should_finish(&self, cfg: &Config) -> bool { if let Some(batch_req) = self.request.as_ref() { - // Limit the size of batch request so that it will not exceed raft_entry_max_size after - // adding header. + // Limit the size of batch request so that it will not exceed + // raft_entry_max_size after adding header. if self.batch_req_size > (cfg.raft_entry_max_size.0 as f64 * 0.4) as u64 { return true; } @@ -877,9 +881,9 @@ where return; } let target_index = if self.fsm.peer.force_leader.is_some() { - // For regions that lose quorum (or regions have force leader), whatever has been - // proposed will be committed. Based on that fact, we simply use "last index" here to - // avoid implementing another "wait commit" process. + // For regions that lose quorum (or regions have force leader), whatever has + // been proposed will be committed. Based on that fact, we simply use "last + // index" here to avoid implementing another "wait commit" process. self.fsm.peer.raft_group.raft.raft_log.last_index() } else { self.fsm.peer.raft_group.raft.raft_log.committed @@ -891,7 +895,7 @@ where }); self.fsm .peer - .unsafe_recovery_maybe_finish_wait_apply(/*force=*/ self.fsm.stopped); + .unsafe_recovery_maybe_finish_wait_apply(/* force= */ self.fsm.stopped); } fn on_unsafe_recovery_fill_out_report(&mut self, syncer: UnsafeRecoveryFillOutReportSyncer) { @@ -989,8 +993,9 @@ where if is_learner(&self.fsm.peer.peer) { // FIXME: should use `bcast_check_stale_peer_message` instead. - // Sending a new enum type msg to a old tikv may cause panic during rolling update - // we should change the protobuf behavior and check if properly handled in all place + // Sending a new enum type msg to a old tikv may cause panic during rolling + // update we should change the protobuf behavior and check if properly handled + // in all place self.fsm.peer.bcast_wake_up_message(self.ctx); } } @@ -1358,8 +1363,9 @@ where ); return; } - // wait two rounds of election timeout to trigger check quorum to step down the leader - // note: check quorum is triggered every `election_timeout` instead of `randomized_election_timeout` + // wait two rounds of election timeout to trigger check quorum to step down the + // leader note: check quorum is triggered every `election_timeout` instead of + // `randomized_election_timeout` Some( self.fsm.peer.raft_group.raft.election_timeout() * 2 - self.fsm.peer.raft_group.raft.election_elapsed, @@ -1439,7 +1445,8 @@ where // When PD issues force leader on two different peer, it may cause // two force leader in same term. self.fsm.peer.raft_group.raft.pre_vote = false; - // trigger vote request to all voters, will check the vote result in `check_force_leader` + // trigger vote request to all voters, will check the vote result in + // `check_force_leader` if let Err(e) = self.fsm.peer.raft_group.campaign() { warn!( "Unsafe recovery, campaign failed"; @@ -1558,7 +1565,8 @@ where self.fsm.peer.raft_group.raft.set_check_quorum(true); self.fsm.peer.raft_group.raft.pre_vote = true; if self.fsm.peer.raft_group.raft.promotable() { - // Do not campaign directly here, otherwise on_role_changed() won't called for follower state + // Do not campaign directly here, otherwise on_role_changed() won't called for + // follower state let _ = self.ctx.router.send( self.region_id(), PeerMsg::CasualMessage(CasualMessage::Campaign), @@ -1939,17 +1947,18 @@ where if self.fsm.hibernate_state.group_state() == GroupState::Idle { // missing_ticks should be less than election timeout ticks otherwise // follower may tick more than an election timeout in chaos state. - // Before stopping tick, `missing_tick` should be `raft_election_timeout_ticks` - 2 - // - `raft_heartbeat_ticks` (default 10 - 2 - 2 = 6) - // and the follower's `election_elapsed` in raft-rs is 1. - // After the group state becomes Chaos, the next tick will call `raft_group.tick` - // `missing_tick` + 1 times(default 7). + // Before stopping tick, `missing_tick` should be `raft_election_timeout_ticks` + // - 2 - `raft_heartbeat_ticks` (default 10 - 2 - 2 = 6) and the follower's + // `election_elapsed` in raft-rs is 1. + // After the group state becomes Chaos, the next tick will call + // `raft_group.tick` `missing_tick` + 1 times(default 7). // Then the follower's `election_elapsed` will be 1 + `missing_tick` + 1 // (default 1 + 6 + 1 = 8) which is less than the min election timeout. - // The reason is that we don't want let all followers become (pre)candidate if one - // follower may receive a request, then becomes (pre)candidate and sends (pre)vote msg - // to others. As long as the leader can wake up and broadcast heartbeats in one `raft_heartbeat_ticks` - // time(default 2s), no more followers will wake up and sends vote msg again. + // The reason is that we don't want let all followers become (pre)candidate if + // one follower may receive a request, then becomes (pre)candidate and sends + // (pre)vote msg to others. As long as the leader can wake up and broadcast + // heartbeats in one `raft_heartbeat_ticks` time(default 2s), no more followers + // will wake up and sends vote msg again. if self.fsm.missing_ticks + 1 /* for the next tick after the peer isn't Idle */ + self.fsm.peer.raft_group.raft.election_elapsed + self.ctx.cfg.raft_heartbeat_ticks @@ -1985,7 +1994,8 @@ where self.fsm.peer.mut_store().flush_entry_cache_metrics(); - // Keep ticking if there are still pending read requests or this node is within hibernate timeout. + // Keep ticking if there are still pending read requests or this node is within + // hibernate timeout. if res.is_none() /* hibernate_region is false */ || !self.fsm.peer.check_after_tick(self.fsm.hibernate_state.group_state(), res.unwrap()) || (self.fsm.peer.is_leader() && !self.all_agree_to_hibernate()) @@ -2021,7 +2031,7 @@ where Some(UnsafeRecoveryState::WaitApply { .. }) => self .fsm .peer - .unsafe_recovery_maybe_finish_wait_apply(/*force=*/ false), + .unsafe_recovery_maybe_finish_wait_apply(/* force= */ false), Some(UnsafeRecoveryState::DemoteFailedVoters { syncer, failed_voters, @@ -2378,10 +2388,11 @@ where .retain(|r| self.fsm.region_id() != r.get_id()); } else { // This snapshot may be accepted by raft-rs. - // If it's rejected by raft-rs, the snapshot region in `pending_snapshot_regions` - // will be removed together with the latest snapshot region after applying that snapshot. - // But if `regions_to_destroy` is not empty, the pending snapshot must be this msg's snapshot - // because this kind of snapshot is exclusive. + // If it's rejected by raft-rs, the snapshot region in + // `pending_snapshot_regions` will be removed together with the latest snapshot + // region after applying that snapshot. + // But if `regions_to_destroy` is not empty, the pending snapshot must be this + // msg's snapshot because this kind of snapshot is exclusive. self.destroy_regions_for_snapshot(regions_to_destroy); } } @@ -2541,23 +2552,26 @@ where let from_store_id = msg.get_from_peer().get_store_id(); // Let's consider following cases with three nodes [1, 2, 3] and 1 is leader: - // a. 1 removes 2, 2 may still send MsgAppendResponse to 1. + // - 1 removes 2, 2 may still send MsgAppendResponse to 1. // We should ignore this stale message and let 2 remove itself after // applying the ConfChange log. - // b. 2 is isolated, 1 removes 2. When 2 rejoins the cluster, 2 will - // send stale MsgRequestVote to 1 and 3, at this time, we should tell 2 to gc itself. - // c. 2 is isolated but can communicate with 3. 1 removes 3. + // - 2 is isolated, 1 removes 2. When 2 rejoins the cluster, 2 will + // send stale MsgRequestVote to 1 and 3, at this time, we should tell 2 to gc + // itself. + // - 2 is isolated but can communicate with 3. 1 removes 3. // 2 will send stale MsgRequestVote to 3, 3 should ignore this message. - // d. 2 is isolated but can communicate with 3. 1 removes 2, then adds 4, remove 3. + // - 2 is isolated but can communicate with 3. 1 removes 2, then adds 4, remove + // 3. // 2 will send stale MsgRequestVote to 3, 3 should tell 2 to gc itself. - // e. 2 is isolated. 1 adds 4, 5, 6, removes 3, 1. Now assume 4 is leader. + // - 2 is isolated. 1 adds 4, 5, 6, removes 3, 1. Now assume 4 is leader. // After 2 rejoins the cluster, 2 may send stale MsgRequestVote to 1 and 3, // 1 and 3 will ignore this message. Later 4 will send messages to 2 and 2 will // rejoin the raft group again. - // f. 2 is isolated. 1 adds 4, 5, 6, removes 3, 1. Now assume 4 is leader, and 4 removes 2. + // - 2 is isolated. 1 adds 4, 5, 6, removes 3, 1. Now assume 4 is leader, and 4 + // removes 2. // unlike case e, 2 will be stale forever. - // TODO: for case f, if 2 is stale for a long time, 2 will communicate with pd and pd will - // tell 2 is stale, so 2 can remove itself. + // TODO: for case f, if 2 is stale for a long time, 2 will communicate with pd + // and pd will tell 2 is stale, so 2 can remove itself. let self_epoch = self.fsm.peer.region().get_region_epoch(); if util::is_epoch_stale(from_epoch, self_epoch) && util::find_peer(self.fsm.peer.region(), from_store_id).is_none() @@ -2625,11 +2639,11 @@ where "merge_target" => ?merge_target, ); - // When receiving message that has a merge target, it indicates that the source peer on this - // store is stale, the peers on other stores are already merged. The epoch in merge target - // is the state of target peer at the time when source peer is merged. So here we record the - // merge target epoch version to let the target peer on this store to decide whether to - // destroy the source peer. + // When receiving message that has a merge target, it indicates that the source + // peer on this store is stale, the peers on other stores are already merged. + // The epoch in merge target is the state of target peer at the time when source + // peer is merged. So here we record the merge target epoch version to let the + // target peer on this store to decide whether to destroy the source peer. let mut meta = self.ctx.store_meta.lock().unwrap(); meta.targets_map.insert(self.region_id(), target_region_id); let v = meta @@ -2640,8 +2654,8 @@ where no_range_merge_target.clear_start_key(); no_range_merge_target.clear_end_key(); if let Some(pre_merge_target) = v.insert(self.region_id(), no_range_merge_target) { - // Merge target epoch records the version of target region when source region is merged. - // So it must be same no matter when receiving merge target. + // Merge target epoch records the version of target region when source region is + // merged. So it must be same no matter when receiving merge target. if pre_merge_target.get_region_epoch().get_version() != merge_target.get_region_epoch().get_version() { @@ -2654,7 +2668,8 @@ where } if let Some(r) = meta.regions.get(&target_region_id) { - // In the case that the source peer's range isn't overlapped with target's anymore: + // In the case that the source peer's range isn't overlapped with target's + // anymore: // | region 2 | region 3 | region 1 | // || merge 3 into 2 // \/ @@ -2668,8 +2683,8 @@ where // so the new target peer can't find the source peer. // e.g. new region 2 is overlapped with region 1 // - // If that, source peer still need to decide whether to destroy itself. When the target - // peer has already moved on, source peer can destroy itself. + // If that, source peer still need to decide whether to destroy itself. When the + // target peer has already moved on, source peer can destroy itself. if util::is_epoch_stale(merge_target.get_region_epoch(), r.get_region_epoch()) { return Ok(true); } @@ -2678,8 +2693,8 @@ where drop(meta); // All of the target peers must exist before merging which is guaranteed by PD. - // Now the target peer is not in region map, so if everything is ok, the merge target - // region should be staler than the local target region + // Now the target peer is not in region map, so if everything is ok, the merge + // target region should be staler than the local target region if self.is_merge_target_region_stale(merge_target)? { Ok(true) } else { @@ -2719,16 +2734,17 @@ where ); // Destroy peer in next round in order to apply more committed entries if any. - // It depends on the implementation that msgs which are handled in this round have already fetched. + // It depends on the implementation that msgs which are handled in this round + // have already fetched. let _ = self .ctx .router .force_send(self.fsm.region_id(), PeerMsg::Destroy(self.fsm.peer_id())); } - // Returns `Vec<(u64, bool)>` indicated (source_region_id, merge_to_this_peer) if the `msg` - // doesn't contain a snapshot or this snapshot doesn't conflict with any other snapshots or regions. - // Otherwise a `SnapKey` is returned. + // Returns `Vec<(u64, bool)>` indicated (source_region_id, merge_to_this_peer) + // if the `msg` doesn't contain a snapshot or this snapshot doesn't conflict + // with any other snapshots or regions. Otherwise a `SnapKey` is returned. fn check_snapshot(&mut self, msg: &RaftMessage) -> Result>> { if !msg.get_message().has_snapshot() { return Ok(Either::Right(vec![])); @@ -2829,13 +2845,14 @@ where let mut is_overlapped = false; let mut regions_to_destroy = vec![]; - // In some extreme cases, it may cause source peer destroyed improperly so that a later - // CommitMerge may panic because source is already destroyed, so just drop the message: - // 1. A new snapshot is received whereas a snapshot is still in applying, and the snapshot - // under applying is generated before merge and the new snapshot is generated after merge. - // After the applying snapshot is finished, the log may able to catch up and so a - // CommitMerge will be applied. - // 2. There is a CommitMerge pending in apply thread. + // In some extreme cases, it may cause source peer destroyed improperly so that + // a later CommitMerge may panic because source is already destroyed, so just + // drop the message: + // - A new snapshot is received whereas a snapshot is still in applying, and the + // snapshot under applying is generated before merge and the new snapshot is + // generated after merge. After the applying snapshot is finished, the log may + // able to catch up and so a CommitMerge will be applied. + // - There is a CommitMerge pending in apply thread. let ready = !self.fsm.peer.is_handling_snapshot() && !self.fsm.peer.has_pending_snapshot() // It must be ensured that all logs have been applied. @@ -2864,9 +2881,9 @@ where snap_region.get_region_epoch().to_owned(), ); if ready && can_destroy { - // The snapshot that we decide to whether destroy peer based on must can be applied. - // So here not to destroy peer immediately, or the snapshot maybe dropped in later - // check but the peer is already destroyed. + // The snapshot that we decide to whether destroy peer based on must can be + // applied. So here not to destroy peer immediately, or the snapshot maybe + // dropped in later check but the peer is already destroyed. regions_to_destroy.push((exist_region.get_id(), merge_to_this_peer)); continue; } @@ -2895,14 +2912,16 @@ where // Now all checking passed. if self.fsm.peer.local_first_replicate && !self.fsm.peer.is_initialized() { - // If the peer is not initialized and passes the snapshot range check, `is_splitting` flag must - // be false. - // 1. If `is_splitting` is set to true, then the uninitialized peer is created before split is applied - // and the peer id is the same as split one. So there should be no initialized peer before. - // 2. If the peer is also created by splitting, then the snapshot range is not overlapped with - // parent peer. It means leader has applied merge and split at least one time. However, - // the prerequisite of merge includes the initialization of all target peers and source peers, - // which is conflict with 1. + // If the peer is not initialized and passes the snapshot range check, + // `is_splitting` flag must be false. + // - If `is_splitting` is set to true, then the uninitialized peer is created + // before split is applied and the peer id is the same as split one. So there + // should be no initialized peer before. + // - If the peer is also created by splitting, then the snapshot range is not + // overlapped with parent peer. It means leader has applied merge and split at + // least one time. However, the prerequisite of merge includes the + // initialization of all target peers and source peers, which is conflict with + // 1. let pending_create_peers = self.ctx.pending_create_peers.lock().unwrap(); let status = pending_create_peers.get(®ion_id).cloned(); if status != Some((self.fsm.peer_id(), false)) { @@ -2951,8 +2970,8 @@ where } else { MergeResultKind::Stale }; - // Use `unwrap` is ok because the StoreMeta lock is held and these source peers still - // exist in regions and region_ranges map. + // Use `unwrap` is ok because the StoreMeta lock is held and these source peers + // still exist in regions and region_ranges map. // It depends on the implementation of `destroy_peer`. self.ctx .router @@ -3036,11 +3055,12 @@ where } } - // Returns whether we should propose another TransferLeader command. This is for: - // 1. Considering the amount of pessimistic locks can be big, it can reduce - // unavailable time caused by waiting for the transferree catching up logs. - // 2. Make transferring leader strictly after write commands that executes - // before proposing the locks, preventing unexpected lock loss. + // Returns whether we should propose another TransferLeader command. This is + // for: + // - Considering the amount of pessimistic locks can be big, it can reduce + // unavailable time caused by waiting for the transferee catching up logs. + // - Make transferring leader strictly after write commands that executes before + // proposing the locks, preventing unexpected lock loss. fn propose_locks_before_transfer_leader(&mut self, msg: &eraftpb::Message) -> bool { // 1. Disable in-memory pessimistic locks. @@ -3053,20 +3073,22 @@ where // in the TransferringLeader status, we can safely initiate transferring leader // now. // If it's not in TransferringLeader status now, it is probably because several - // ticks have passed after proposing the locks in the last time and we reactivate - // the memory locks. Then, we should propose the locks again. + // ticks have passed after proposing the locks in the last time and we + // reactivate the memory locks. Then, we should propose the locks again. if msg.get_context() == TRANSFER_LEADER_COMMAND_REPLY_CTX && pessimistic_locks.status == LocksStatus::TransferringLeader { return false; } - // If it is not writable, it's probably because it's a retried TransferLeader and the locks - // have been proposed. But we still need to return true to propose another TransferLeader - // command. Otherwise, some write requests that have marked some locks as deleted will fail - // because raft rejects more proposals. - // It is OK to return true here if it's in other states like MergingRegion or NotLeader. - // In those cases, the locks will fail to propose and nothing will happen. + // If it is not writable, it's probably because it's a retried TransferLeader + // and the locks have been proposed. But we still need to return true to + // propose another TransferLeader command. Otherwise, some write requests that + // have marked some locks as deleted will fail because raft rejects more + // proposals. + // It is OK to return true here if it's in other states like MergingRegion or + // NotLeader. In those cases, the locks will fail to propose and nothing will + // happen. if !pessimistic_locks.is_writable() { return true; } @@ -3078,11 +3100,12 @@ where if pessimistic_locks.is_empty() { return false; } - // FIXME: Raft command has size limit. Either limit the total size of pessimistic locks - // in a region, or split commands here. + // FIXME: Raft command has size limit. Either limit the total size of + // pessimistic locks in a region, or split commands here. let mut cmd = RaftCmdRequest::default(); { - // Downgrade to a read guard, do not block readers in the scheduler as far as possible. + // Downgrade to a read guard, do not block readers in the scheduler as far as + // possible. let pessimistic_locks = RwLockWriteGuard::downgrade(pessimistic_locks); fail_point!("invalidate_locks_before_transfer_leader"); for (key, (lock, deleted)) in &*pessimistic_locks { @@ -3100,9 +3123,10 @@ where } } if cmd.get_requests().is_empty() { - // If the map is not empty but all locks are deleted, it is possible that a write - // command has just marked locks deleted but not proposed yet. It might cause - // that command to fail if we skip proposing the extra TransferLeader command here. + // If the map is not empty but all locks are deleted, it is possible that a + // write command has just marked locks deleted but not proposed yet. + // It might cause that command to fail if we skip proposing the + // extra TransferLeader command here. return true; } cmd.mut_header().set_region_id(self.fsm.region_id()); @@ -3128,7 +3152,8 @@ where } } - /// Check if destroy can be executed immediately. If it can't, the reason is returned. + /// Check if destroy can be executed immediately. If it can't, the reason is + /// returned. fn maybe_delay_destroy(&mut self) -> Option { if self.fsm.peer.has_unpersisted_ready() { assert!(self.ctx.sync_write_worker.is_none()); @@ -3141,9 +3166,9 @@ where let is_initialized = self.fsm.peer.is_initialized(); if !is_initialized { // If the peer is uninitialized, then it can't receive any logs from leader. So - // no need to gc. If there was a peer with same region id on the store, and it had - // logs written, then it must be initialized, hence its log should be gc either - // before it's destroyed or during node restarts. + // no need to gc. If there was a peer with same region id on the store, and it + // had logs written, then it must be initialized, hence its log should be gc + // either before it's destroyed or during node restarts. self.fsm.logs_gc_flushed = true; } if !self.fsm.logs_gc_flushed { @@ -3262,7 +3287,7 @@ where if self.fsm.peer.unsafe_recovery_state.is_some() { self.fsm .peer - .unsafe_recovery_maybe_finish_wait_apply(/*force=*/ true); + .unsafe_recovery_maybe_finish_wait_apply(/* force= */ true); } let mut meta = self.ctx.store_meta.lock().unwrap(); @@ -3318,7 +3343,8 @@ where } // Some places use `force_send().unwrap()` if the StoreMeta lock is held. - // So in here, it's necessary to held the StoreMeta lock when closing the router. + // So in here, it's necessary to held the StoreMeta lock when closing the + // router. self.ctx.router.close(region_id); self.fsm.stop(); @@ -3361,8 +3387,10 @@ where .get_mut(&target) .unwrap() .remove(®ion_id); - // When the target doesn't exist(add peer but the store is isolated), source peer decide to destroy by itself. - // Without target, the `pending_merge_targets` for target won't be removed, so here source peer help target to clear. + // When the target doesn't exist(add peer but the store is isolated), source + // peer decide to destroy by itself. Without target, the + // `pending_merge_targets` for target won't be removed, so here source peer help + // target to clear. if meta.regions.get(&target).is_none() && meta.pending_merge_targets.get(&target).unwrap().is_empty() { @@ -3411,7 +3439,8 @@ where _ => unreachable!(), } } else { - // Please take a look at test case test_redundant_conf_change_by_snapshot. + // Please take a look at test case + // test_redundant_conf_change_by_snapshot. } self.update_region(cp.region); @@ -3526,9 +3555,10 @@ where // Most of these functions are only called when the peer is a leader. // (it's pretty reasonable because progress is used to track others' status) // The only exception is `Raft::restore` at the time of writing, which is ok - // because the raft msgs(including snapshot) don't be handled when `pending_remove` - // is true(it will be set in `destroy_peer`). - // TODO: totally avoid calling these raft-rs functions when `pending_remove` is true. + // because the raft msgs(including snapshot) don't be handled when + // `pending_remove` is true(it will be set in `destroy_peer`). + // TODO: totally avoid calling these raft-rs functions when `pending_remove` is + // true. self.fsm .peer .raft_group @@ -3573,9 +3603,10 @@ where let region_id = derived.get_id(); - // Group in-memory pessimistic locks in the original region into new regions. The locks of - // new regions will be put into the corresponding new regions later. And the locks belonging - // to the old region will stay in the original map. + // Group in-memory pessimistic locks in the original region into new regions. + // The locks of new regions will be put into the corresponding new regions + // later. And the locks belonging to the old region will stay in the original + // map. let region_locks = { let mut pessimistic_locks = self.fsm.peer.txn_ext.pessimistic_locks.write(); info!("moving {} locks to new regions", pessimistic_locks.len(); "region_id" => region_id); @@ -3732,8 +3763,8 @@ where new_peer.peer.approximate_size = estimated_size; new_peer.peer.approximate_keys = estimated_keys; *new_peer.peer.txn_ext.pessimistic_locks.write() = locks; - // The new peer is likely to become leader, send a heartbeat immediately to reduce - // client query miss. + // The new peer is likely to become leader, send a heartbeat immediately to + // reduce client query miss. new_peer.peer.heartbeat_pd(self.ctx); } @@ -3785,8 +3816,9 @@ where /// Check if merge target region is staler than the local one in kv engine. /// It should be called when target region is not in region map in memory. - /// If everything is ok, the answer should always be true because PD should ensure all target peers exist. - /// So if not, error log will be printed and return false. + /// If everything is ok, the answer should always be true because PD should + /// ensure all target peers exist. So if not, error log will be printed + /// and return false. fn is_merge_target_region_stale(&self, target_region: &metapb::Region) -> Result { let target_region_id = target_region.get_id(); let target_peer_id = util::find_peer(target_region, self.ctx.store_id()) @@ -3805,8 +3837,9 @@ where return Ok(true); } // The local target region epoch is staler than target region's. - // In the case where the peer is destroyed by receiving gc msg rather than applying conf change, - // the epoch may staler but it's legal, so check peer id to assure that. + // In the case where the peer is destroyed by receiving gc msg rather than + // applying conf change, the epoch may staler but it's legal, so check peer id + // to assure that. if let Some(local_target_peer_id) = util::find_peer(target_state.get_region(), self.ctx.store_id()).map(|r| r.get_id()) { @@ -3830,8 +3863,8 @@ where // There is a new peer and it's destroyed without being initialised. return Ok(true); } - // The local target peer id is greater than the one in target region, but its epoch - // is staler than target_region's. That is contradictory. + // The local target peer id is greater than the one in target region, but + // its epoch is staler than target_region's. That is contradictory. panic!("{} local target peer id {} is greater than the one in target region {}, but its epoch is staler, local target region {:?}, target region {:?}", self.fsm.peer.tag, local_target_peer_id, target_peer_id, target_state.get_region(), target_region); } @@ -3847,7 +3880,8 @@ where } } } else { - // Can't get local target peer id probably because this target peer is removed by applying conf change + // Can't get local target peer id probably because this target peer is removed + // by applying conf change error!( "the local target peer does not exist in target region state"; "target_region" => ?target_region, @@ -3980,9 +4014,10 @@ where request.set_admin_request(admin); (request, target_id) }; - // Please note that, here assumes that the unit of network isolation is store rather than - // peer. So a quorum stores of source region should also be the quorum stores of target - // region. Otherwise we need to enable proposal forwarding. + // Please note that, here assumes that the unit of network isolation is store + // rather than peer. So a quorum stores of source region should also be the + // quorum stores of target region. Otherwise we need to enable proposal + // forwarding. self.ctx .router .force_send( @@ -4204,8 +4239,8 @@ where d.mark_pending_remove(); } - // After the region commit merged, the region's key range is extended and the region's `safe_ts` - // should reset to `min(source_safe_ts, target_safe_ts)` + // After the region commit merged, the region's key range is extended and the + // region's `safe_ts` should reset to `min(source_safe_ts, target_safe_ts)` let source_read_progress = meta.region_read_progress.remove(&source.get_id()).unwrap(); self.fsm .peer @@ -4222,8 +4257,8 @@ where drop(meta); // make approximate size and keys updated in time. - // the reason why follower need to update is that there is a issue that after merge - // and then transfer leader, the new leader may have stale size and keys. + // the reason why follower need to update is that there is a issue that after + // merge and then transfer leader, the new leader may have stale size and keys. self.fsm.peer.size_diff_hint = self.ctx.cfg.region_split_check_diff().0; self.fsm.peer.reset_region_buckets(); if self.fsm.peer.is_leader() { @@ -4255,9 +4290,9 @@ where /// Handle rollbacking Merge result. /// - /// If commit is 0, it means that Merge is rollbacked by a snapshot; otherwise - /// it's rollbacked by a proposal, and its value should be equal to the commit - /// index of previous PrepareMerge. + /// If commit is 0, it means that Merge is rollbacked by a snapshot; + /// otherwise it's rollbacked by a proposal, and its value should be + /// equal to the commit index of previous PrepareMerge. fn on_ready_rollback_merge(&mut self, commit: u64, region: Option) { let pending_commit = self .fsm @@ -4328,9 +4363,9 @@ where ); } // Because of the checking before proposing `PrepareMerge`, which is - // no `CompactLog` proposal between the smallest commit index and the latest index. - // If the merge succeed, all source peers are impossible in apply snapshot state - // and must be initialized. + // no `CompactLog` proposal between the smallest commit index and the latest + // index. If the merge succeed, all source peers are impossible in apply + // snapshot state and must be initialized. { let meta = self.ctx.store_meta.lock().unwrap(); if meta.atomic_snap_regions.contains_key(&self.region_id()) { @@ -4400,9 +4435,9 @@ where "merge_state" => ?self.fsm.peer.pending_merge_state, ); // Because of the checking before proposing `PrepareMerge`, which is - // no `CompactLog` proposal between the smallest commit index and the latest index. - // If the merge succeed, all source peers are impossible in apply snapshot state - // and must be initialized. + // no `CompactLog` proposal between the smallest commit index and the latest + // index. If the merge succeed, all source peers are impossible in apply + // snapshot state and must be initialized. // So `maybe_destroy` must succeed here. let job = self.fsm.peer.maybe_destroy(self.ctx).unwrap(); self.handle_destroy_peer(job); @@ -4442,8 +4477,9 @@ where ); // Remove this region's snapshot region from the `pending_snapshot_regions` - // The `pending_snapshot_regions` is only used to occupy the key range, so if this - // peer is added to `region_ranges`, it can be remove from `pending_snapshot_regions` + // The `pending_snapshot_regions` is only used to occupy the key range, so if + // this peer is added to `region_ranges`, it can be remove from + // `pending_snapshot_regions` meta.pending_snapshot_regions .retain(|r| self.fsm.region_id() != r.get_id()); @@ -4486,7 +4522,8 @@ where } } else if self.fsm.peer.local_first_replicate { // This peer is uninitialized previously. - // More accurately, the `RegionLocalState` has been persisted so the data can be removed from `pending_create_peers`. + // More accurately, the `RegionLocalState` has been persisted so the data can be + // removed from `pending_create_peers`. let mut pending_create_peers = self.ctx.pending_create_peers.lock().unwrap(); assert_eq!( pending_create_peers.remove(&self.fsm.region_id()), @@ -4576,14 +4613,15 @@ where } } - // Update metrics only when all exec_results are finished in case the metrics is counted multiple times - // when waiting for commit merge + // Update metrics only when all exec_results are finished in case the metrics is + // counted multiple times when waiting for commit merge self.ctx.store_stat.lock_cf_bytes_written += metrics.lock_cf_written_bytes; self.ctx.store_stat.engine_total_bytes_written += metrics.written_bytes; self.ctx.store_stat.engine_total_keys_written += metrics.written_keys; } - /// Check if a request is valid if it has valid prepare_merge/commit_merge proposal. + /// Check if a request is valid if it has valid prepare_merge/commit_merge + /// proposal. fn check_merge_proposal(&self, msg: &mut RaftCmdRequest) -> Result<()> { if !msg.get_admin_request().has_prepare_merge() && !msg.get_admin_request().has_commit_merge() @@ -4678,7 +4716,8 @@ where let request = msg.get_requests(); if self.fsm.peer.force_leader.is_some() { - // in force leader state, forbid requests to make the recovery progress less error-prone + // in force leader state, forbid requests to make the recovery progress less + // error-prone if !(msg.has_admin_request() && (msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeer || msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeerV2)) @@ -4724,8 +4763,8 @@ where .region_not_initialized += 1; return Err(Error::RegionNotInitialized(region_id)); } - // If the peer is applying snapshot, it may drop some sending messages, that could - // make clients wait for response until timeout. + // If the peer is applying snapshot, it may drop some sending messages, that + // could make clients wait for response until timeout. if self.fsm.peer.is_handling_snapshot() { self.ctx.raft_metrics.invalid_proposal.is_applying_snapshot += 1; // TODO: replace to a more suitable error. @@ -4742,10 +4781,10 @@ where match util::check_region_epoch(msg, self.fsm.peer.region(), true) { Err(Error::EpochNotMatch(m, mut new_regions)) => { - // Attach the region which might be split from the current region. But it doesn't - // matter if the region is not split from the current region. If the region meta - // received by the TiKV driver is newer than the meta cached in the driver, the meta is - // updated. + // Attach the region which might be split from the current region. But it + // doesn't matter if the region is not split from the current region. If the + // region meta received by the TiKV driver is newer than the meta cached in the + // driver, the meta is updated. let requested_version = msg.get_header().get_region_epoch().version; self.collect_sibling_region(requested_version, &mut new_regions); self.ctx.raft_metrics.invalid_proposal.epoch_not_match += 1; @@ -4756,7 +4795,8 @@ where } } - /// Propose batched raft commands(if any) first, then propose the given raft command. + /// Propose batched raft commands(if any) first, then propose the given raft + /// command. fn propose_raft_command( &mut self, msg: RaftCmdRequest, @@ -4773,7 +4813,8 @@ where } /// Propose the raft command directly. - /// Note that this function introduces a reorder between this command and batched commands. + /// Note that this function introduces a reorder between this command and + /// batched commands. fn propose_raft_command_internal( &mut self, mut msg: RaftCmdRequest, @@ -4827,9 +4868,9 @@ where } // Note: - // The peer that is being checked is a leader. It might step down to be a follower later. It - // doesn't matter whether the peer is a leader or not. If it's not a leader, the proposing - // command log entry can't be committed. + // The peer that is being checked is a leader. It might step down to be a + // follower later. It doesn't matter whether the peer is a leader or not. If + // it's not a leader, the proposing command log entry can't be committed. let mut resp = RaftCmdResponse::default(); let term = self.fsm.peer.term(); @@ -4875,7 +4916,8 @@ where collect_cnt -= 1; // For example, A is split into B, A, and then B is split into C, B. if r.get_region_epoch().version >= max_version { - // It doesn't matter if it's a false positive, as it's limited by MAX_REGIONS_IN_ERROR. + // It doesn't matter if it's a false positive, as it's limited by + // MAX_REGIONS_IN_ERROR. collect_cnt += r.get_region_epoch().version - max_version; max_version = r.get_region_epoch().version; } @@ -4896,8 +4938,9 @@ where #[allow(clippy::if_same_then_else)] fn on_raft_gc_log_tick(&mut self, force_compact: bool) { if !self.fsm.peer.is_leader() { - // `compact_cache_to` is called when apply, there is no need to call `compact_to` here, - // snapshot generating has already been cancelled when the role becomes follower. + // `compact_cache_to` is called when apply, there is no need to call + // `compact_to` here, snapshot generating has already been cancelled + // when the role becomes follower. return; } if !self.fsm.peer.get_store().is_entry_cache_empty() || !self.ctx.cfg.hibernate_regions { @@ -4907,9 +4950,10 @@ where fail_point!("on_raft_gc_log_tick", |_| {}); debug_assert!(!self.fsm.stopped); - // As leader, we would not keep caches for the peers that didn't response heartbeat in the - // last few seconds. That happens probably because another TiKV is down. In this case if we - // do not clean up the cache, it may keep growing. + // As leader, we would not keep caches for the peers that didn't response + // heartbeat in the last few seconds. That happens probably because + // another TiKV is down. In this case if we do not clean up the cache, + // it may keep growing. let drop_cache_duration = self.ctx.cfg.raft_heartbeat_interval() + self.ctx.cfg.raft_entry_cache_life_time.0; let cache_alive_limit = Instant::now() - drop_cache_duration; @@ -4982,11 +5026,13 @@ where { std::cmp::max(first_idx + (last_idx - first_idx) / 2, replicated_idx) } else if replicated_idx < first_idx || last_idx - first_idx < 3 { - // In the current implementation one compaction can't delete all stale Raft logs. - // There will be at least 3 entries left after one compaction: + // In the current implementation one compaction can't delete all stale Raft + // logs. There will be at least 3 entries left after one compaction: + // ``` // |------------- entries needs to be compacted ----------| // [entries...][the entry at `compact_idx`][the last entry][new compaction entry] // |-------------------- entries will be left ----------------------| + // ``` self.ctx.raft_metrics.raft_log_gc_skipped.reserve_log += 1; return; } else if replicated_idx - first_idx < self.ctx.cfg.raft_log_gc_threshold @@ -5073,13 +5119,13 @@ where return; } - // When restart, the may_skip_split_check will be false. The split check will first - // check the region size, and then check whether the region should split. This - // should work even if we change the region max size. + // When restart, the may_skip_split_check will be false. The split check will + // first check the region size, and then check whether the region should split. + // This should work even if we change the region max size. // If peer says should update approximate size, update region size and check // whether the region should split. - // We assume that `may_skip_split_check` is only set true after the split check task is - // scheduled. + // We assume that `may_skip_split_check` is only set true after the split check + // task is scheduled. if self.fsm.peer.may_skip_split_check && self.fsm.peer.compaction_declined_bytes < self.ctx.cfg.region_split_check_diff().0 && self.fsm.peer.size_diff_hint < self.ctx.cfg.region_split_check_diff().0 @@ -5097,19 +5143,20 @@ where return; } - // When Lightning or BR is importing data to TiKV, their ingest-request may fail because of - // region-epoch not matched. So we hope TiKV do not check region size and split region during - // importing. + // When Lightning or BR is importing data to TiKV, their ingest-request may fail + // because of region-epoch not matched. So we hope TiKV do not check region size + // and split region during importing. if self.ctx.importer.get_mode() == SwitchMode::Import { return; } - // bulk insert too fast may cause snapshot stale very soon, worst case it stale before - // sending. so when snapshot is generating or sending, skip split check at most 3 times. - // There is a trade off between region size and snapshot success rate. Split check is - // triggered every 10 seconds. If a snapshot can't be generated in 30 seconds, it might be - // just too large to be generated. Split it into smaller size can help generation. check - // issue 330 for more info. + // bulk insert too fast may cause snapshot stale very soon, worst case it stale + // before sending. so when snapshot is generating or sending, skip split check + // at most 3 times. There is a trade off between region size and snapshot + // success rate. Split check is triggered every 10 seconds. If a snapshot can't + // be generated in 30 seconds, it might be just too large to be generated. Split + // it into smaller size can help generation. check issue 330 for more + // info. if self.fsm.peer.get_store().is_generating_snapshot() && self.fsm.skip_split_count < self.region_split_skip_max_count() { @@ -5541,8 +5588,8 @@ where return; } - // Do not check the bucket ranges if we want to split the region with a given key range, - // this is to avoid compatibility issues. + // Do not check the bucket ranges if we want to split the region with a given + // key range, this is to avoid compatibility issues. let split_check_bucket_ranges = if !is_key_range { self.gen_bucket_range_for_update() } else { @@ -5608,8 +5655,8 @@ where } if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { - // Clean up the force leader state after a timeout, since the PD recovery process may - // have been aborted for some reasons. + // Clean up the force leader state after a timeout, since the PD recovery + // process may have been aborted for some reasons. if time.saturating_elapsed() > cmp::max( self.ctx.cfg.peer_stale_state_check_interval.0, @@ -5660,8 +5707,9 @@ where // from the cluster or probably destroyed. // Meantime, D, E, F would not reach B, since it's not in the cluster anymore. // In this case, peer B would notice that the leader is missing for a long time, - // and it would check with pd to confirm whether it's still a member of the cluster. - // If not, it destroys itself as a stale peer which is removed out already. + // and it would check with pd to confirm whether it's still a member of the + // cluster. If not, it destroys itself as a stale peer which is removed out + // already. let state = self.fsm.peer.check_stale_state(self.ctx); fail_point!("peer_check_stale_state", state != StaleState::Valid, |_| {}); match state { @@ -5719,8 +5767,8 @@ where fn on_reactivate_memory_lock_tick(&mut self) { let mut pessimistic_locks = self.fsm.peer.txn_ext.pessimistic_locks.write(); - // If it is not leader, we needn't reactivate by tick. In-memory pessimistic lock will - // be enabled when this region becomes leader again. + // If it is not leader, we needn't reactivate by tick. In-memory pessimistic + // lock will be enabled when this region becomes leader again. // And this tick is currently only used for the leader transfer failure case. if !self.fsm.peer.is_leader() || pessimistic_locks.status != LocksStatus::TransferringLeader { @@ -5729,8 +5777,8 @@ where self.fsm.reactivate_memory_lock_ticks += 1; let transferring_leader = self.fsm.peer.raft_group.raft.lead_transferee.is_some(); - // `lead_transferee` is not set immediately after the lock status changes. So, we need - // the tick count condition to avoid reactivating too early. + // `lead_transferee` is not set immediately after the lock status changes. So, + // we need the tick count condition to avoid reactivating too early. if !transferring_leader && self.fsm.reactivate_memory_lock_ticks >= self.ctx.cfg.reactive_memory_lock_timeout_tick @@ -5839,8 +5887,8 @@ where Some(self.fsm.peer.approximate_size.unwrap_or_default() + size); self.fsm.peer.approximate_keys = Some(self.fsm.peer.approximate_keys.unwrap_or_default() + keys); - // The ingested file may be overlapped with the data in engine, so we need to check it - // again to get the accurate value. + // The ingested file may be overlapped with the data in engine, so we need to + // check it again to get the accurate value. self.fsm.peer.may_skip_split_check = false; if self.fsm.peer.is_leader() { self.on_pd_heartbeat_tick(); @@ -5849,13 +5897,13 @@ where } fn on_transfer_leader(&mut self, term: u64) { - // If the term has changed between proposing and executing the TransferLeader request, - // ignore it because this request may be stale. + // If the term has changed between proposing and executing the TransferLeader + // request, ignore it because this request may be stale. if term != self.fsm.peer.term() { return; } - // As the leader can propose the TransferLeader request successfully, the disk of - // the leader is probably not full. + // As the leader can propose the TransferLeader request successfully, the disk + // of the leader is probably not full. self.fsm.peer.execute_transfer_leader( self.ctx, self.fsm.peer.leader_id(), @@ -5865,7 +5913,8 @@ where self.fsm.has_ready = true; } - /// Verify and store the hash to state. return true means the hash has been stored successfully. + /// Verify and store the hash to state. return true means the hash has been + /// stored successfully. // TODO: Consider context in the function. fn verify_and_store_hash( &mut self, @@ -5915,8 +5964,9 @@ where if self.fsm.peer.consistency_state.index != INVALID_INDEX && !self.fsm.peer.consistency_state.hash.is_empty() { - // Maybe computing is too slow or computed result is dropped due to channel full. - // If computing is too slow, miss count will be increased twice. + // Maybe computing is too slow or computed result is dropped due to channel + // full. If computing is too slow, miss count will be increased + // twice. REGION_HASH_COUNTER.verify.miss.inc(); warn!( "hash belongs to wrong index, skip."; @@ -5939,15 +5989,17 @@ where } } -/// Checks merge target, returns whether the source peer should be destroyed and whether the source peer is -/// merged to this target peer. +/// Checks merge target, returns whether the source peer should be destroyed and +/// whether the source peer is merged to this target peer. /// /// It returns (`can_destroy`, `merge_to_this_peer`). /// -/// `can_destroy` is true when there is a network isolation which leads to a follower of a merge target -/// Region's log falls behind and then receive a snapshot with epoch version after merge. +/// `can_destroy` is true when there is a network isolation which leads to a +/// follower of a merge target Region's log falls behind and then receive a +/// snapshot with epoch version after merge. /// -/// `merge_to_this_peer` is true when `can_destroy` is true and the source peer is merged to this target peer. +/// `merge_to_this_peer` is true when `can_destroy` is true and the source peer +/// is merged to this target peer. pub fn maybe_destroy_source( meta: &StoreMeta, target_region_id: u64, @@ -5964,8 +6016,8 @@ pub fn maybe_destroy_source( region_epoch, target_region.get_region_epoch(), ); - // The target peer will move on, namely, it will apply a snapshot generated after merge, - // so destroy source peer. + // The target peer will move on, namely, it will apply a snapshot generated + // after merge, so destroy source peer. if region_epoch.get_version() > target_region.get_region_epoch().get_version() { return ( true, @@ -5975,7 +6027,8 @@ pub fn maybe_destroy_source( .get_id(), ); } - // Wait till the target peer has caught up logs and source peer will be destroyed at that time. + // Wait till the target peer has caught up logs and source peer will be + // destroyed at that time. return (false, false); } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index f92d08dd3a4..635ff2c6693 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -124,25 +124,30 @@ pub struct StoreMeta { pub regions: HashMap, /// region_id -> reader pub readers: HashMap, - /// `MsgRequestPreVote`, `MsgRequestVote` or `MsgAppend` messages from newly split Regions shouldn't be - /// dropped if there is no such Region in this store now. So the messages are recorded temporarily and - /// will be handled later. + /// `MsgRequestPreVote`, `MsgRequestVote` or `MsgAppend` messages from newly + /// split Regions shouldn't be dropped if there is no such Region in this + /// store now. So the messages are recorded temporarily and will be handled + /// later. pub pending_msgs: RingQueue, /// The regions with pending snapshots. pub pending_snapshot_regions: Vec, - /// A marker used to indicate the peer of a Region has received a merge target message and waits to be destroyed. - /// target_region_id -> (source_region_id -> merge_target_region) + /// A marker used to indicate the peer of a Region has received a merge + /// target message and waits to be destroyed. target_region_id -> + /// (source_region_id -> merge_target_region) pub pending_merge_targets: HashMap>, - /// An inverse mapping of `pending_merge_targets` used to let source peer help target peer to clean up related entry. - /// source_region_id -> target_region_id + /// An inverse mapping of `pending_merge_targets` used to let source peer + /// help target peer to clean up related entry. source_region_id -> + /// target_region_id pub targets_map: HashMap, - /// `atomic_snap_regions` and `destroyed_region_for_snap` are used for making destroy overlapped regions - /// and apply snapshot atomically. + /// `atomic_snap_regions` and `destroyed_region_for_snap` are used for + /// making destroy overlapped regions and apply snapshot atomically. /// region_id -> wait_destroy_regions_map(source_region_id -> is_ready) - /// A target peer must wait for all source peer to ready before applying snapshot. + /// A target peer must wait for all source peer to ready before applying + /// snapshot. pub atomic_snap_regions: HashMap>, /// source_region_id -> need_atomic - /// Used for reminding the source peer to switch to ready in `atomic_snap_regions`. + /// Used for reminding the source peer to switch to ready in + /// `atomic_snap_regions`. pub destroyed_region_for_snap: HashMap, /// region_id -> `RegionReadProgress` pub region_read_progress: RegionReadProgressRegistry, @@ -191,7 +196,8 @@ impl StoreMeta { /// end_key > file.smallestkey /// start_key <= file.largestkey pub fn update_overlap_damaged_ranges(&mut self, fname: &str, start: &[u8], end: &[u8]) -> bool { - // `region_ranges` is promised to have no overlap so just check the first region. + // `region_ranges` is promised to have no overlap so just check the first + // region. if let Some((_, id)) = self .region_ranges .range((Excluded(start.to_owned()), Unbounded::>)) @@ -471,11 +477,12 @@ where pub feature_gate: FeatureGate, /// region_id -> (peer_id, is_splitting) /// Used for handling race between splitting and creating new peer. - /// An uninitialized peer can be replaced to the one from splitting iff they are exactly the same peer. + /// An uninitialized peer can be replaced to the one from splitting iff they + /// are exactly the same peer. /// /// WARNING: - /// To avoid deadlock, if you want to use `store_meta` and `pending_create_peers` together, - /// the lock sequence MUST BE: + /// To avoid deadlock, if you want to use `store_meta` and + /// `pending_create_peers` together, the lock sequence MUST BE: /// 1. lock the store_meta. /// 2. lock the pending_create_peers. pub pending_create_peers: Arc>>, @@ -485,8 +492,8 @@ where pub timer: SteadyTimer, pub trans: T, /// WARNING: - /// To avoid deadlock, if you want to use `store_meta` and `global_replication_state` together, - /// the lock sequence MUST BE: + /// To avoid deadlock, if you want to use `store_meta` and + /// `global_replication_state` together, the lock sequence MUST BE: /// 1. lock the store_meta. /// 2. lock the global_replication_state. pub global_replication_state: Arc>, @@ -895,7 +902,8 @@ impl PollHandler, St let mut delegate = PeerFsmDelegate::new(peer, &mut self.poll_ctx); delegate.handle_msgs(&mut self.peer_msg_buf); - // No readiness is generated and using sync write, skipping calling ready and release early. + // No readiness is generated and using sync write, skipping calling ready and + // release early. if !delegate.collect_ready() && self.poll_ctx.sync_write_worker.is_some() { if let HandleResult::StopAt { skip_end, .. } = &mut handle_result { *skip_end = true; @@ -1805,8 +1813,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } else { let mut need_gc_msg = util::is_vote_msg(msg.get_message()); if msg.has_extra_msg() { - // A learner can't vote so it sends the check-stale-peer msg to others to find out whether - // it is removed due to conf change or merge. + // A learner can't vote so it sends the check-stale-peer msg to others to find + // out whether it is removed due to conf change or merge. need_gc_msg |= msg.get_extra_msg().get_type() == ExtraMessageType::MsgCheckStalePeer; // For backward compatibility @@ -1834,8 +1842,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER return Ok(CheckMsgStatus::DropMsg); } // A tombstone peer may not apply the conf change log which removes itself. - // In this case, the local epoch is stale and the local peer can be found from region. - // We can compare the local peer id with to_peer_id to verify whether it is correct to create a new peer. + // In this case, the local epoch is stale and the local peer can be found from + // region. We can compare the local peer id with to_peer_id to verify whether it + // is correct to create a new peer. if let Some(local_peer_id) = util::find_peer(region, self.ctx.store_id()).map(|r| r.get_id()) { @@ -1980,7 +1989,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } let res = self.maybe_create_peer_internal(region_id, msg, is_local_first); - // If failed, i.e. Err or Ok(false), remove this peer data from `pending_create_peers`. + // If failed, i.e. Err or Ok(false), remove this peer data from + // `pending_create_peers`. if res.as_ref().map_or(true, |b| !*b) && is_local_first { let mut pending_create_peers = self.ctx.pending_create_peers.lock().unwrap(); if let Some(status) = pending_create_peers.get(®ion_id) { @@ -2021,13 +2031,16 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER let pending_create_peers = self.ctx.pending_create_peers.lock().unwrap(); match pending_create_peers.get(®ion_id) { Some(status) if *status == (msg.get_to_peer().get_id(), false) => (), - // If changed, it means this peer has been/will be replaced from the new one from splitting. + // If changed, it means this peer has been/will be replaced from the new one from + // splitting. _ => return Ok(false), } - // Note that `StoreMeta` lock is held and status is (peer_id, false) in `pending_create_peers` now. - // If this peer is created from splitting latter and then status in `pending_create_peers` is changed, - // that peer creation in `on_ready_split_region` must be executed **after** current peer creation - // because of the `StoreMeta` lock. + // Note that `StoreMeta` lock is held and status is (peer_id, false) + // in `pending_create_peers` now. If this peer is created from + // splitting latter and then status in `pending_create_peers` is + // changed, that peer creation in `on_ready_split_region` must be + // executed **after** current peer creation because of the + // `StoreMeta` lock. } if meta.overlap_damaged_range( @@ -2096,8 +2109,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER is_overlapped = true; if msg.get_region_epoch().get_version() > exist_region.get_region_epoch().get_version() { - // If new region's epoch version is greater than exist region's, the exist region - // may has been merged/splitted already. + // If new region's epoch version is greater than exist region's, the exist + // region may has been merged/splitted already. let _ = self.ctx.router.force_send( exist_region.get_id(), PeerMsg::CasualMessage(CasualMessage::RegionOverlapped), @@ -2538,9 +2551,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } - // When there is an import job running, the region which this sst belongs may has not been - // split from the origin region because the apply thread is so busy that it can not apply - // SplitRequest as soon as possible. So we can not delete this sst file. + // When there is an import job running, the region which this sst belongs may + // has not been split from the origin region because the apply thread is so busy + // that it can not apply SplitRequest as soon as possible. So we can not + // delete this sst file. if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { let task = CleanupSstTask::ValidateSst { ssts: validate_ssts, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index e3820a6d3ee..e552229aa0c 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -81,9 +81,9 @@ pub type TestCallback = Box; /// Variants of callbacks for `Msg`. /// - `Read`: a callback for read only requests including `StatusRequest`, -/// `GetRequest` and `SnapRequest` +/// `GetRequest` and `SnapRequest` /// - `Write`: a callback for write only requests including `AdminRequest` -/// `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. +/// `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. pub enum Callback { /// No callback. None, @@ -92,12 +92,14 @@ pub enum Callback { /// Write callback. Write { cb: WriteCallback, - /// `proposed_cb` is called after a request is proposed to the raft group successfully. - /// It's used to notify the caller to move on early because it's very likely the request - /// will be applied to the raftstore. + /// `proposed_cb` is called after a request is proposed to the raft + /// group successfully. It's used to notify the caller to move on early + /// because it's very likely the request will be applied to the + /// raftstore. proposed_cb: Option, - /// `committed_cb` is called after a request is committed and before it's being applied, and - /// it's guaranteed that the request will be successfully applied soon. + /// `committed_cb` is called after a request is committed and before + /// it's being applied, and it's guaranteed that the request will be + /// successfully applied soon. committed_cb: Option, trackers: SmallVec<[TimeTracker; 4]>, }, @@ -298,18 +300,20 @@ pub enum MergeResultKind { /// Its target peer applys `CommitMerge` log. FromTargetLog, /// Its target peer receives snapshot. - /// In step 1, this peer should mark `pending_move` is true and destroy its apply fsm. - /// Then its target peer will remove this peer data and apply snapshot atomically. + /// In step 1, this peer should mark `pending_move` is true and destroy its + /// apply fsm. Then its target peer will remove this peer data and apply + /// snapshot atomically. FromTargetSnapshotStep1, /// In step 2, this peer should destroy its peer fsm. FromTargetSnapshotStep2, - /// This peer is no longer needed by its target peer so it can be destroyed by itself. - /// It happens if and only if its target peer has been removed by conf change. + /// This peer is no longer needed by its target peer so it can be destroyed + /// by itself. It happens if and only if its target peer has been removed by + /// conf change. Stale, } -/// Some significant messages sent to raftstore. Raftstore will dispatch these messages to Raft -/// groups to update some important internal status. +/// Some significant messages sent to raftstore. Raftstore will dispatch these +/// messages to Raft groups to update some important internal status. #[derive(Debug)] pub enum SignificantMsg where @@ -389,7 +393,8 @@ pub enum CasualMessage { hash: Vec, }, - /// Approximate size of target region. This message can only be sent by split-check thread. + /// Approximate size of target region. This message can only be sent by + /// split-check thread. RegionApproximateSize { size: u64, }, @@ -578,15 +583,16 @@ pub enum PeerMsg { /// leader of the target raft group. If it's failed to be sent, callback /// usually needs to be called before dropping in case of resource leak. RaftCommand(RaftCommand), - /// Tick is periodical task. If target peer doesn't exist there is a potential - /// that the raft node will not work anymore. + /// Tick is periodical task. If target peer doesn't exist there is a + /// potential that the raft node will not work anymore. Tick(PeerTick), /// Result of applying committed entries. The message can't be lost. ApplyRes { res: ApplyTaskRes, }, - /// Message that can't be lost but rarely created. If they are lost, real bad - /// things happen like some peers will be considered dead in the group. + /// Message that can't be lost but rarely created. If they are lost, real + /// bad things happen like some peers will be considered dead in the + /// group. SignificantMsg(SignificantMsg), /// Start the FSM. Start, @@ -636,8 +642,9 @@ impl fmt::Debug for PeerMsg { } impl PeerMsg { - /// For some specific kind of messages, it's actually acceptable if failed to send it by - /// `significant_send`. This function determine if the current message is acceptable to fail. + /// For some specific kind of messages, it's actually acceptable if failed + /// to send it by `significant_send`. This function determine if the + /// current message is acceptable to fail. pub fn is_send_failure_ignorable(&self) -> bool { matches!( self, @@ -656,8 +663,8 @@ where invalid_ssts: Vec, }, - // Clear region size and keys for all regions in the range, so we can force them to re-calculate - // their size later. + // Clear region size and keys for all regions in the range, so we can force them to + // re-calculate their size later. ClearRegionSizeInRange { start_key: Vec, end_key: Vec, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 2bcaefff762..62721b5c1c9 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -296,8 +296,9 @@ impl ProposedAdminCmd { } struct CmdEpochChecker { - // Although it's a deque, because of the characteristics of the settings from `admin_cmd_epoch_lookup`, - // the max size of admin cmd is 2, i.e. split/merge and change peer. + // Although it's a deque, because of the characteristics of the settings from + // `admin_cmd_epoch_lookup`, the max size of admin cmd is 2, i.e. split/merge and change + // peer. proposed_admin_cmd: VecDeque>, term: u64, } @@ -324,10 +325,11 @@ impl CmdEpochChecker { } } - /// Check if the proposal can be proposed on the basis of its epoch and previous proposed admin cmds. + /// Check if the proposal can be proposed on the basis of its epoch and + /// previous proposed admin cmds. /// - /// Returns None if passing the epoch check, otherwise returns a index which is the last - /// admin cmd index conflicted with this proposal. + /// Returns None if passing the epoch check, otherwise returns a index which + /// is the last admin cmd index conflicted with this proposal. fn propose_check_epoch(&mut self, req: &RaftCmdRequest, term: u64) -> Option { self.maybe_update_term(term); let (check_ver, check_conf_ver) = if !req.has_admin_request() { @@ -473,12 +475,13 @@ pub struct ReadyResult { #[derive(Debug)] /// ForceLeader process would be: -/// 1. If it's hibernated, enter wait ticks state, and wake up the peer -/// 2. Enter pre force leader state, become candidate and send request vote to all peers -/// 3. Wait for the responses of the request vote, no reject should be received. -/// 4. Enter force leader state, become leader without leader lease -/// 5. Execute recovery plan(some remove-peer commands) -/// 6. After the plan steps are all applied, exit force leader state +/// - If it's hibernated, enter wait ticks state, and wake up the peer +/// - Enter pre force leader state, become candidate and send request vote to +/// all peers +/// - Wait for the responses of the request vote, no reject should be received. +/// - Enter force leader state, become leader without leader lease +/// - Execute recovery plan(some remove-peer commands) +/// - After the plan steps are all applied, exit force leader state pub enum ForceLeaderState { WaitTicks { syncer: UnsafeRecoveryForceLeaderSyncer, @@ -495,32 +498,34 @@ pub enum ForceLeaderState { }, } -// Following shared states are used while reporting to PD for unsafe recovery and shared among -// all the regions per their life cycle. +// Following shared states are used while reporting to PD for unsafe recovery +// and shared among all the regions per their life cycle. // The work flow is like: -// 1. report phase -// start_unsafe_recovery_report -// -> broadcast wait-apply commands -// -> wait for all the peers' apply indices meet their targets -// -> broadcast fill out report commands -// -> wait for all the peers fill out the reports for themselves -// -> send a store report (through store heartbeat) -// 2. force leader phase -// dispatch force leader commands -// -> wait for all the peers that received the command become force leader -// -> start_unsafe_recovery_report -// 3. plan execution phase -// dispatch recovery plans -// -> wait for all the creates, deletes and demotes to finish, for the demotes, -// procedures are: -// -> exit joint state if it is already in joint state -// -> demote failed voters, and promote self to be a voter if it is a learner -// -> exit joint state -// -> start_unsafe_recovery_report - -// Intends to use RAII to sync unsafe recovery procedures between peers, in addition to that, -// it uses a closure to avoid having a raft router as a member variable, which is statically -// dispatched, thus needs to propagate the generics everywhere. +// 1. report phase +// - start_unsafe_recovery_report +// - broadcast wait-apply commands +// - wait for all the peers' apply indices meet their targets +// - broadcast fill out report commands +// - wait for all the peers fill out the reports for themselves +// - send a store report (through store heartbeat) +// 2. force leader phase +// - dispatch force leader commands +// - wait for all the peers that received the command become force leader +// - start_unsafe_recovery_report +// 3. plan execution phase +// - dispatch recovery plans +// - wait for all the creates, deletes and demotes to finish, for the +// demotes, procedures are: +// - exit joint state if it is already in joint state +// - demote failed voters, and promote self to be a voter if it is a +// learner +// - exit joint state +// - start_unsafe_recovery_report +// +// Intends to use RAII to sync unsafe recovery procedures between peers, in +// addition to that, it uses a closure to avoid having a raft router as a member +// variable, which is statically dispatched, thus needs to propagate the +// generics everywhere. pub struct InvokeClosureOnDrop(Box); impl fmt::Debug for InvokeClosureOnDrop { @@ -732,15 +737,17 @@ where pub should_wake_up: bool, /// Whether this peer is destroyed asynchronously. /// If it's true, - /// 1. when merging, its data in storeMeta will be removed early by the target peer. - /// 2. all read requests must be rejected. + /// - when merging, its data in storeMeta will be removed early by the + /// target peer. + /// - all read requests must be rejected. pub pending_remove: bool, /// Force leader state is only used in online recovery when the majority of - /// peers are missing. In this state, it forces one peer to become leader out - /// of accordance with Raft election rule, and forbids any read/write proposals. - /// With that, we can further propose remove failed-nodes conf-change, to make - /// the Raft group forms majority and works normally later on. + /// peers are missing. In this state, it forces one peer to become leader + /// out of accordance with Raft election rule, and forbids any + /// read/write proposals. With that, we can further propose remove + /// failed-nodes conf-change, to make the Raft group forms majority and + /// works normally later on. /// /// For details, see the comment of `ForceLeaderState`. pub force_leader: Option, @@ -757,16 +764,17 @@ where /// The count of deleted keys since last reset. delete_keys_hint: u64, /// An inaccurate difference in region size after compaction. - /// It is used to trigger check split to update approximate size and keys after space reclamation - /// of deleted entries. + /// It is used to trigger check split to update approximate size and keys + /// after space reclamation of deleted entries. pub compaction_declined_bytes: u64, /// Approximate size of the region. pub approximate_size: Option, /// Approximate keys of the region. pub approximate_keys: Option, - /// Whether this region has scheduled a split check task. If we just splitted - /// the region or ingested one file which may be overlapped with the existed data, - /// reset the flag so that the region can be splitted again. + /// Whether this region has scheduled a split check task. If we just + /// splitted the region or ingested one file which may be overlapped + /// with the existed data, reset the flag so that the region can be + /// splitted again. pub may_skip_split_check: bool, /// The state for consistency check. @@ -776,7 +784,8 @@ where pub pending_request_snapshot_count: Arc, /// The index of last scheduled committed raft log. pub last_applying_idx: u64, - /// The index of last compacted raft log. It is used for the next compact log task. + /// The index of last compacted raft log. It is used for the next compact + /// log task. pub last_compacted_idx: u64, /// The index of the latest urgent proposal index. last_urgent_proposal_idx: u64, @@ -788,9 +797,10 @@ where pub raft_log_size_hint: u64, /// The write fence index. - /// If there are pessimistic locks, PrepareMerge can be proposed after applying to - /// this index. When a pending PrepareMerge exists, no more write commands should be proposed. - /// This avoids proposing pessimistic locks that are already deleted before PrepareMerge. + /// If there are pessimistic locks, PrepareMerge can be proposed after + /// applying to this index. When a pending PrepareMerge exists, no more + /// write commands should be proposed. This avoids proposing pessimistic + /// locks that are already deleted before PrepareMerge. pub prepare_merge_fence: u64, pub pending_prepare_merge: Option, @@ -816,8 +826,8 @@ where pub replication_mode_version: u64, /// The required replication state at current version. pub dr_auto_sync_state: DrAutoSyncState, - /// A flag that caches sync state. It's set to true when required replication - /// state is reached for current region. + /// A flag that caches sync state. It's set to true when required + /// replication state is reached for current region. pub replication_sync: bool, /// The known newest conf version and its corresponding peer list @@ -1092,9 +1102,10 @@ where pub fn maybe_append_merge_entries(&mut self, merge: &CommitMergeRequest) -> Option { let mut entries = merge.get_entries(); if entries.is_empty() { - // Though the entries is empty, it is possible that one source peer has caught up the logs - // but commit index is not updated. If other source peers are already destroyed, so the raft - // group will not make any progress, namely the source peer can not get the latest commit index anymore. + // Though the entries is empty, it is possible that one source peer has caught + // up the logs but commit index is not updated. If other source peers are + // already destroyed, so the raft group will not make any progress, namely the + // source peer can not get the latest commit index anymore. // Here update the commit index to let source apply rest uncommitted entries. return if merge.get_commit() > self.raft_group.raft.raft_log.committed { self.raft_group.raft.raft_log.commit_to(merge.get_commit()); @@ -1113,9 +1124,9 @@ where "commit_index" => self.raft_group.raft.raft_log.committed, ); if log_idx < self.raft_group.raft.raft_log.committed { - // There are maybe some logs not included in CommitMergeRequest's entries, like CompactLog, - // so the commit index may exceed the last index of the entires from CommitMergeRequest. - // If that, no need to append + // There are maybe some logs not included in CommitMergeRequest's entries, like + // CompactLog, so the commit index may exceed the last index of the entires from + // CommitMergeRequest. If that, no need to append if self.raft_group.raft.raft_log.committed - log_idx >= entries.len() as u64 { return None; } @@ -1126,9 +1137,10 @@ where let last_log = entries.last().unwrap(); if last_log.term > self.term() { - // Hack: In normal flow, when leader sends the entries, it will use a term that's not less - // than the last log term. And follower will update its states correctly. For merge, we append - // the log without raft, so we have to take care of term explicitly to get correct metadata. + // Hack: In normal flow, when leader sends the entries, it will use a term + // that's not less than the last log term. And follower will update its states + // correctly. For merge, we append the log without raft, so we have to take care + // of term explicitly to get correct metadata. info!( "become follower for new logs"; "new_log_term" => last_log.term, @@ -1149,7 +1161,8 @@ where .map(|(_, last_index)| last_index) } - /// Tries to destroy itself. Returns a job (if needed) to do more cleaning tasks. + /// Tries to destroy itself. Returns a job (if needed) to do more cleaning + /// tasks. pub fn maybe_destroy(&mut self, ctx: &PollContext) -> Option { if self.pending_remove { info!( @@ -1193,15 +1206,15 @@ where // There is no applying snapshot or snapshot is canceled so the `apply_snap_ctx` // should be set to None. - // 1. If the snapshot is canceled, the `apply_snap_ctx` should be None. - // Remember the snapshot should not be canceled and the context should - // be None only after applying snapshot in normal case. But here is safe - // becasue this peer is about to destroy and `pending_remove` will be true, - // namely no more ready will be fetched. - // 2. If there is no applying snapshot, the `apply_snap_ctx` should also be None. - // It's possible that the snapshot was canceled successfully before but - // `cancel_applying_snap` returns false. If so, at this time, `apply_snap_ctx` - // is Some and should be set to None. + // - If the snapshot is canceled, the `apply_snap_ctx` should be None. Remember + // the snapshot should not be canceled and the context should be None only + // after applying snapshot in normal case. But here is safe because this peer + // is about to destroy and `pending_remove` will be true, namely no more ready + // will be fetched. + // - If there is no applying snapshot, the `apply_snap_ctx` should also be None. + // It's possible that the snapshot was canceled successfully before but + // `cancel_applying_snap` returns false. If so, at this time, `apply_snap_ctx` + // is Some and should be set to None. self.apply_snap_ctx = None; self.pending_remove = true; @@ -1257,14 +1270,15 @@ where panic!("{} unexpected pending states {:?}", self.tag, status); } } else { - // The status is inserted when it's created. It will be removed in following cases: - // 1. By appy worker as it fails to split due to region state key. This is - // impossible to reach this code path because the delete write batch is not - // persisted yet. - // 2. By store fsm as it fails to create peer, which is also invalid obviously. - // 3. By peer fsm after persisting snapshot, then it should be initialized. - // 4. By peer fsm after split. - // 5. By peer fsm when destroy, which should go the above branch instead. + // The status is inserted when it's created. It will be removed in following + // cases: + // - By apply worker as it fails to split due to region state key. This is + // impossible to reach this code path because the delete write batch is not + // persisted yet. + // - By store fsm as it fails to create peer, which is also invalid obviously. + // - By peer fsm after persisting snapshot, then it should be initialized. + // - By peer fsm after split. + // - By peer fsm when destroy, which should go the above branch instead. (None, false) } } else { @@ -1274,16 +1288,16 @@ where // Set Tombstone state explicitly let mut kv_wb = engines.kv.write_batch(); let mut raft_wb = engines.raft.log_batch(1024); - // Raft log gc should be flushed before being destroyed, so last_compacted_idx has to be - // the minimal index that may still have logs. + // Raft log gc should be flushed before being destroyed, so last_compacted_idx + // has to be the minimal index that may still have logs. let last_compacted_idx = self.last_compacted_idx; self.mut_store() .clear_meta(last_compacted_idx, &mut kv_wb, &mut raft_wb)?; - // StoreFsmDelegate::check_msg use both epoch and region peer list to check whether - // a message is targing a staled peer. But for an uninitialized peer, both epoch and - // peer list are empty, so a removed peer will be created again. Saving current peer - // into the peer list of region will fix this problem. + // StoreFsmDelegate::check_msg use both epoch and region peer list to check + // whether a message is targeting a staled peer. But for an uninitialized peer, + // both epoch and peer list are empty, so a removed peer will be created again. + // Saving current peer into the peer list of region will fix this problem. if !self.get_store().is_initialized() { region.mut_peers().push(self.peer.clone()); } @@ -1371,8 +1385,8 @@ where let last_index = self.raft_group.raft.raft_log.last_index(); for (id, pr) in status.progress.unwrap().iter() { // Even a recent inactive node is also considered. If we put leader into sleep, - // followers or learners may not sync its logs for a long time and become unavailable. - // We choose availability instead of performance in this case. + // followers or learners may not sync its logs for a long time and become + // unavailable. We choose availability instead of performance in this case. if *id == self.peer.get_id() { continue; } @@ -1470,13 +1484,13 @@ where ) { if self.region().get_region_epoch().get_version() < region.get_region_epoch().get_version() { - // Epoch version changed, disable read on the localreader for this region. + // Epoch version changed, disable read on the local reader for this region. self.leader_lease.expire_remote_lease(); } self.mut_store().set_region(region.clone()); let progress = ReadProgress::region(region); - // Always update read delegate's region to avoid stale region info after a follower - // becoming a leader. + // Always update read delegate's region to avoid stale region info after a + // follower becoming a leader. self.maybe_update_read_progress(reader, progress); // Update leader info @@ -1535,7 +1549,8 @@ where self.apply_snap_ctx.is_some() || self.get_store().is_applying_snapshot() } - /// Returns `true` if the raft group has replicated a snapshot but not committed it yet. + /// Returns `true` if the raft group has replicated a snapshot but not + /// committed it yet. #[inline] pub fn has_pending_snapshot(&self) -> bool { self.get_pending_snapshot().is_some() @@ -1875,11 +1890,13 @@ where // 1. Current leader hasn't communicated with this peer. // 2. This peer does not exist yet(maybe it is created but not initialized) // - // The correctness of region merge depends on the fact that all target peers must exist during merging. - // (PD rely on `pending_peers` to check whether all target peers exist) + // The correctness of region merge depends on the fact that all target peers + // must exist during merging. (PD rely on `pending_peers` to check whether all + // target peers exist) // // So if the `matched` is 0, it must be a pending peer. - // It can be ensured because `truncated_index` must be greater than `RAFT_INIT_LOG_INDEX`(5). + // It can be ensured because `truncated_index` must be greater than + // `RAFT_INIT_LOG_INDEX`(5). if progress.matched < truncated_idx { if let Some(p) = self.get_peer_from_cache(id) { pending_peers.push(p); @@ -1999,8 +2016,8 @@ where // Updates the `leader_missing_time` according to the current state. // // If we are checking this it means we suspect the leader might be missing. - // Mark down the time when we are called, so we can check later if it's been longer than it - // should be. + // Mark down the time when we are called, so we can check later if it's been + // longer than it should be. match self.leader_missing_time { None => { self.leader_missing_time = Instant::now().into(); @@ -2102,27 +2119,30 @@ where self.lead_transferee = self.raft_group.raft.lead_transferee.unwrap_or_default(); } - /// Correctness depends on the order between calling this function and notifying other peers - /// the new commit index. - /// It is due to the interaction between lease and split/merge.(details are decribed below) + /// Correctness depends on the order between calling this function and + /// notifying other peers the new commit index. + /// It is due to the interaction between lease and split/merge.(details are + /// described below) /// - /// Note that in addition to the hearbeat/append msg, the read index response also can notify - /// other peers the new commit index. There are three place where TiKV handles read index resquest. - /// The first place is in raft-rs, so it's like hearbeat/append msg, call this function and - /// then send the response. The second place is in `Step`, we should use the commit index - /// of `PeerStorage` which is the greatest commit index that can be observed outside. - /// The third place is in `read_index`, handle it like the second one. + /// Note that in addition to the heartbeat/append msg, the read index + /// response also can notify other peers the new commit index. There are + /// three place where TiKV handles read index request. The first place is in + /// raft-rs, so it's like heartbeat/append msg, call this function and then + /// send the response. The second place is in `Step`, we should use the + /// commit index of `PeerStorage` which is the greatest commit index that + /// can be observed outside. The third place is in `read_index`, handle it + /// like the second one. fn on_leader_commit_idx_changed(&mut self, pre_commit_index: u64, commit_index: u64) { if commit_index <= pre_commit_index || !self.is_leader() { return; } - // The admin cmds in `CmdEpochChecker` are proposed by the current leader so we can - // use it to get the split/prepare-merge cmds which was committed just now. + // The admin cmds in `CmdEpochChecker` are proposed by the current leader so we + // can use it to get the split/prepare-merge cmds which was committed just now. - // BatchSplit and Split cmd are mutually exclusive because they both change epoch's - // version so only one of them can be proposed and the other one will be rejected - // by `CmdEpochChecker`. + // BatchSplit and Split cmd are mutually exclusive because they both change + // epoch's version so only one of them can be proposed and the other one will be + // rejected by `CmdEpochChecker`. let last_split_idx = self .cmd_epoch_checker .last_cmd_index(AdminCmdType::BatchSplit) @@ -2179,7 +2199,8 @@ where // by apply worker. So we have to wait here. // Please note that commit_index can't be used here. When applying a snapshot, // a stale heartbeat can make the leader think follower has already applied - // the snapshot, and send remaining log entries, which may increase commit_index. + // the snapshot, and send remaining log entries, which may increase + // commit_index. // TODO: add more test self.last_applying_idx == self.get_store().applied_index() // Requesting snapshots also triggers apply workers to write @@ -2193,8 +2214,8 @@ where fn ready_to_handle_read(&self) -> bool { // TODO: It may cause read index to wait a long time. - // There may be some values that are not applied by this leader yet but the old leader, - // if applied_term isn't equal to current term. + // There may be some values that are not applied by this leader yet but the old + // leader, if applied_term isn't equal to current term. self.get_store().applied_term() == self.term() // There may be stale read if the old leader splits really slow, // the new region may already elected a new leader while @@ -2209,9 +2230,9 @@ where fn ready_to_handle_unsafe_replica_read(&self, read_index: u64) -> bool { // Wait until the follower applies all values before the read. There is still a - // problem if the leader applies fewer values than the follower, the follower read - // could get a newer value, and after that, the leader may read a stale value, - // which violates linearizability. + // problem if the leader applies fewer values than the follower, the follower + // read could get a newer value, and after that, the leader may read a stale + // value, which violates linearizability. self.get_store().applied_index() >= read_index // If it is in pending merge state(i.e. applied PrepareMerge), the data may be stale. // TODO: Add a test to cover this case @@ -2271,17 +2292,19 @@ where /// Returns whether it's valid to handle raft ready. /// /// The snapshot process order would be: - /// 1. Get the snapshot from the ready - /// 2. Wait for the notify of persisting this ready through `Peer::on_persist_ready` - /// 3. Schedule the snapshot task to region worker through `schedule_applying_snapshot` - /// 4. Wait for applying snapshot to complete(`check_snap_status`) + /// - Get the snapshot from the ready + /// - Wait for the notify of persisting this ready through + /// `Peer::on_persist_ready` + /// - Schedule the snapshot task to region worker through + /// `schedule_applying_snapshot` + /// - Wait for applying snapshot to complete(`check_snap_status`) /// Then it's valid to handle the next ready. fn check_snap_status(&mut self, ctx: &mut PollContext) -> bool { if let Some(snap_ctx) = self.apply_snap_ctx.as_ref() { if !snap_ctx.scheduled { // There is a snapshot from ready but it is not scheduled because the ready has - // not been persisted yet. We should wait for the notification of persisting ready - // and do not get a new ready. + // not been persisted yet. We should wait for the notification of persisting + // ready and do not get a new ready. return false; } } @@ -2334,7 +2357,7 @@ where if self.unsafe_recovery_state.is_some() { debug!("unsafe recovery finishes applying a snapshot"); - self.unsafe_recovery_maybe_finish_wait_apply(/*force=*/ false); + self.unsafe_recovery_maybe_finish_wait_apply(/* force= */ false); } } // If `apply_snap_ctx` is none, it means this snapshot does not @@ -2402,9 +2425,9 @@ where } let meta = ctx.store_meta.lock().unwrap(); - // For merge process, the stale source peer is destroyed asynchronously when applying - // snapshot or creating new peer. So here checks whether there is any overlap, if so, - // wait and do not handle raft ready. + // For merge process, the stale source peer is destroyed asynchronously when + // applying snapshot or creating new peer. So here checks whether there is any + // overlap, if so, wait and do not handle raft ready. if let Some(wait_destroy_regions) = meta.atomic_snap_regions.get(&self.region_id) { for (source_region_id, is_ready) in wait_destroy_regions { if !is_ready { @@ -2596,8 +2619,9 @@ where last.raft_msgs.push(persisted_msgs); } } else { - // If this ready don't need to be persisted and there is no previous unpersisted ready, - // we can safely consider it is persisted so the persisted msgs can be sent immediately. + // If this ready don't need to be persisted and there is no previous unpersisted + // ready, we can safely consider it is persisted so the persisted msgs can be + // sent immediately. self.persisted_number = ready_number; if !persisted_msgs.is_empty() { @@ -2606,8 +2630,8 @@ where self.send_raft_messages(ctx, msgs); } - // The commit index and messages of light ready should be empty because no data needs - // to be persisted. + // The commit index and messages of light ready should be empty because no data + // needs to be persisted. let mut light_rd = self.raft_group.advance_append(ready); self.add_light_ready_metric(&light_rd, &mut ctx.raft_metrics.ready); @@ -2703,9 +2727,9 @@ where .find_propose_time(entry.get_term(), entry.get_index()); if let Some(propose_time) = propose_time { // We must renew current_time because this value may be created a long time ago. - // If we do not renew it, this time may be smaller than propose_time of a command, - // which was proposed in another thread while this thread receives its AppendEntriesResponse - // and is ready to calculate its commit-log-duration. + // If we do not renew it, this time may be smaller than propose_time of a + // command, which was proposed in another thread while this thread receives its + // AppendEntriesResponse and is ready to calculate its commit-log-duration. ctx.current_time.replace(monotonic_raw_now()); ctx.raft_metrics.commit_log.observe(duration_to_sec( (ctx.current_time.unwrap() - propose_time).to_std().unwrap(), @@ -2880,7 +2904,8 @@ where self.mut_store().update_cache_persisted(persist_index); if let Some(ForceLeaderState::ForceLeader { .. }) = self.force_leader { - // forward commit index, the committed entries will be applied in the next raft base tick round + // forward commit index, the committed entries will be applied in the next raft + // base tick round self.maybe_force_forward_commit_index(); } } @@ -2922,7 +2947,8 @@ where let persist_index = self.raft_group.raft.raft_log.persisted; if let Some(ForceLeaderState::ForceLeader { .. }) = self.force_leader { - // forward commit index, the committed entries will be applied in the next raft base tick round + // forward commit index, the committed entries will be applied in the next raft + // base tick round self.maybe_force_forward_commit_index(); } self.mut_store().update_cache_persisted(persist_index); @@ -3022,7 +3048,8 @@ where } } - /// Responses to the ready read index request on the replica, the replica is not a leader. + /// Responses to the ready read index request on the replica, the replica is + /// not a leader. fn post_pending_read_index_on_replica(&mut self, ctx: &mut PollContext) { while let Some(mut read) = self.pending_reads.pop_front() { // The response of this read index request is lost, but we need it for @@ -3101,9 +3128,9 @@ where // update the `read_index` of read request that before this successful // `ready`. if !self.is_leader() { - // NOTE: there could still be some pending reads proposed by the peer when it was - // leader. They will be cleared in `clear_uncommitted_on_role_change` later in - // the function. + // NOTE: there could still be some pending reads proposed by the peer when it + // was leader. They will be cleared in `clear_uncommitted_on_role_change` later + // in the function. self.pending_reads.advance_replica_reads(states); self.post_pending_read_index_on_replica(ctx); } else { @@ -3346,8 +3373,8 @@ where ) { self.propose_normal(ctx, req) } else { - // If leader node is disk full, try to transfer leader to a node with disk usage normal to - // keep write availablity not downback. + // If leader node is disk full, try to transfer leader to a node with disk usage + // normal to keep write availability not downback. // if majority node is disk full, to transfer leader or not is not necessary. // Note: Need to exclude learner node. if maybe_transfer_leader && !self.disk_full_peers.majority { @@ -3402,8 +3429,9 @@ where Ok(Either::Left(idx)) => { let has_applied_to_current_term = self.has_applied_to_current_term(); if has_applied_to_current_term { - // After this peer has applied to current term and passed above checking including `cmd_epoch_checker`, - // we can safely guarantee that this proposal will be committed if there is no abnormal leader transfer + // After this peer has applied to current term and passed above checking + // including `cmd_epoch_checker`, we can safely guarantee + // that this proposal will be committed if there is no abnormal leader transfer // in the near future. Thus proposed callback can be called. cb.invoke_proposed(); } @@ -3468,7 +3496,8 @@ where self.proposals.push(p); } - // TODO: set higher election priority of voter/incoming voter than demoting voter + // TODO: set higher election priority of voter/incoming voter than demoting + // voter /// Validate the `ConfChange` requests and check whether it's safe to /// propose these conf change requests. /// It's safe iff at least the quorum of the Raft group is still healthy @@ -3549,8 +3578,9 @@ where } } - // Multiple changes that only effect learner will not product `IncommingVoter` or `DemotingVoter` - // after apply, but raftstore layer and PD rely on these roles to detect joint state + // Multiple changes that only effect learner will not product `IncommingVoter` + // or `DemotingVoter` after apply, but raftstore layer and PD rely on these + // roles to detect joint state if kind != ConfChangeKind::Simple && only_learner_change { return Err(box_err!( "{} invalid conf change request, multiple changes that only effect learner", @@ -3630,8 +3660,8 @@ where msg.set_msg_type(eraftpb::MessageType::MsgTransferLeader); msg.set_from(self.peer_id()); // log term here represents the term of last log. For leader, the term of last - // log is always its current term. Not just set term because raft library forbids - // setting it for MsgTransferLeader messages. + // log is always its current term. Not just set term because raft library + // forbids setting it for MsgTransferLeader messages. msg.set_log_term(self.term()); self.raft_group.raft.msgs.push(msg); true @@ -3720,8 +3750,9 @@ where self.pending_reads.has_unresolved() } - /// `ReadIndex` requests could be lost in network, so on followers commands could queue in - /// `pending_reads` forever. Sending a new `ReadIndex` periodically can resolve this. + /// `ReadIndex` requests could be lost in network, so on followers commands + /// could queue in `pending_reads` forever. Sending a new `ReadIndex` + /// periodically can resolve this. pub fn retry_pending_reads(&mut self, cfg: &Config) { if self.is_leader() || !self.pending_reads.check_needs_retry(cfg) @@ -3779,11 +3810,11 @@ where let now = monotonic_raw_now(); if self.is_leader() { match self.inspect_lease() { - // Here combine the new read request with the previous one even if the lease expired is - // ok because in this case, the previous read index must be sent out with a valid - // lease instead of a suspect lease. So there must no pending transfer-leader proposals - // before or after the previous read index, and the lease can be renewed when get - // heartbeat responses. + // Here combine the new read request with the previous one even if the lease expired + // is ok because in this case, the previous read index must be sent out with a valid + // lease instead of a suspect lease. So there must no pending transfer-leader + // proposals before or after the previous read index, and the lease can be renewed + // when get heartbeat responses. LeaseState::Valid | LeaseState::Expired => { // Must use the commit index of `PeerStorage` instead of the commit index // in raft-rs which may be greater than the former one. @@ -3796,14 +3827,15 @@ where .get(0) .map(|req| req.has_read_index()) .unwrap_or_default(); - // A read index request or a read with addition request always needs the response of - // checking memory lock for async commit, so we cannot apply the optimization here + // A read index request or a read with addition request always needs the + // response of checking memory lock for async commit, so we cannot apply the + // optimization here if !is_read_index_request && read.addition_request.is_none() && read.propose_time + max_lease > now { - // A read request proposed in the current lease is found; combine the new - // read request to that previous one, so that no proposing needed. + // A read request proposed in the current lease is found; combine the + // new read request to that previous one, so that no proposing needed. read.push_command(req, cb, commit_index); return false; } @@ -3816,9 +3848,9 @@ where } } - // When a replica cannot detect any leader, `MsgReadIndex` will be dropped, which would - // cause a long time waiting for a read response. Then we should return an error directly - // in this situation. + // When a replica cannot detect any leader, `MsgReadIndex` will be dropped, + // which would cause a long time waiting for a read response. Then we + // should return an error directly in this situation. if !self.is_leader() && self.leader_id() == INVALID_ID { poll_ctx.raft_metrics.invalid_proposal.read_index_no_leader += 1; // The leader may be hibernated, send a message for trying to awaken the leader. @@ -3959,8 +3991,9 @@ where "min_matched" => min_m, "min_committed" => min_c, ); - // Reset `min_matched` to `min_committed`, since the raft log at `min_committed` is - // known to be committed in all peers, all of the peers should also have replicated it + // Reset `min_matched` to `min_committed`, since the raft log at `min_committed` + // is known to be committed in all peers, all of the peers should also have + // replicated it min_m = min_c; } Ok((min_m, min_c)) @@ -3976,7 +4009,8 @@ where if self.prepare_merge_fence > 0 { let applied_index = self.get_store().applied_index(); if applied_index >= self.prepare_merge_fence { - // Check passed, clear fence and start proposing pessimistic locks and PrepareMerge. + // Check passed, clear fence and start proposing pessimistic locks and + // PrepareMerge. self.prepare_merge_fence = 0; self.pending_prepare_merge = None; passed_merge_fence = true; @@ -4055,10 +4089,10 @@ where )); }; - // Record current proposed index. If there are some in-memory pessimistic locks, we should - // wait until applying to the proposed index before proposing pessimistic locks and - // PrepareMerge. Otherwise, if an already proposed command will remove a pessimistic lock, - // we will make some deleted locks appear again. + // Record current proposed index. If there are some in-memory pessimistic locks, + // we should wait until applying to the proposed index before proposing + // pessimistic locks and PrepareMerge. Otherwise, if an already proposed command + // will remove a pessimistic lock, we will make some deleted locks appear again. if !passed_merge_fence { let pessimistic_locks = self.txn_ext.pessimistic_locks.read(); if !pessimistic_locks.is_empty() { @@ -4104,9 +4138,10 @@ where pessimistic_locks.status = LocksStatus::MergingRegion; return Ok(()); } - // The proposed pessimistic locks here will also be carried in CommitMerge. Check the size - // to avoid CommitMerge exceeding the size limit of a raft entry. This check is a inaccurate - // check. We will check the size again accurately later using the protobuf encoding. + // The proposed pessimistic locks here will also be carried in CommitMerge. + // Check the size to avoid CommitMerge exceeding the size limit of a raft entry. + // This check is a inaccurate check. We will check the size again accurately + // later using the protobuf encoding. if pessimistic_locks.memory_size > size_limit { return Err(box_err!( "pessimistic locks size {} exceed size limit {}, skip merging.", @@ -4180,9 +4215,11 @@ where /// Propose normal request to raft /// - /// Returns Ok(Either::Left(index)) means the proposal is proposed successfully and is located on `index` position. - /// Ok(Either::Right(index)) means the proposal is rejected by `CmdEpochChecker` and the `index` is the position of - /// the last conflict admin cmd. + /// Returns Ok(Either::Left(index)) means the proposal is proposed + /// successfully and is located on `index` position. + /// Ok(Either::Right(index)) means the proposal is rejected by + /// `CmdEpochChecker` and the `index` is the position of the last + /// conflict admin cmd. fn propose_normal( &mut self, poll_ctx: &mut PollContext, @@ -4209,8 +4246,8 @@ where poll_ctx.raft_metrics.propose.normal += 1; if self.has_applied_to_current_term() { - // Only when applied index's term is equal to current leader's term, the information - // in epoch checker is up to date and can be used to check epoch. + // Only when applied index's term is equal to current leader's term, the + // information in epoch checker is up to date and can be used to check epoch. if let Some(index) = self .cmd_epoch_checker .propose_check_epoch(&req, self.term()) @@ -4218,8 +4255,9 @@ where return Ok(Either::Right(index)); } } else if req.has_admin_request() { - // The admin request is rejected because it may need to update epoch checker which - // introduces an uncertainty and may breaks the correctness of epoch checker. + // The admin request is rejected because it may need to update epoch checker + // which introduces an uncertainty and may breaks the correctness of epoch + // checker. return Err(box_err!( "{} peer has not applied to current term, applied_term {}, current_term {}", self.tag, @@ -4232,7 +4270,8 @@ where let ctx = match self.pre_propose(poll_ctx, &mut req) { Ok(ctx) => ctx, Err(e) => { - // Skipping PrepareMerge is logged when the PendingPrepareMerge error is generated. + // Skipping PrepareMerge is logged when the PendingPrepareMerge error is + // generated. if !matches!(e, Error::PendingPrepareMerge) { warn!( "skip proposal"; @@ -4401,7 +4440,8 @@ where }; // transfer leader command doesn't need to replicate log and apply, so we - // return immediately. Note that this command may fail, we can view it just as an advice + // return immediately. Note that this command may fail, we can view it just as + // an advice cb.invoke_with_response(make_transfer_leader_response()); transferred @@ -4412,9 +4452,10 @@ where // 2. Removing the leader is not allowed in the configuration; // 3. The conf change makes the raft group not healthy; // 4. The conf change is dropped by raft group internally. - /// Returns Ok(Either::Left(index)) means the proposal is proposed successfully and is located on `index` position. - /// Ok(Either::Right(index)) means the proposal is rejected by `CmdEpochChecker` and the `index` is the position of - /// the last conflict admin cmd. + /// Returns Ok(Either::Left(index)) means the proposal is proposed + /// successfully and is located on `index` position. Ok(Either:: + /// Right(index)) means the proposal is rejected by `CmdEpochChecker` and + /// the `index` is the position of the last conflict admin cmd. fn propose_conf_change( &mut self, ctx: &mut PollContext, @@ -4434,9 +4475,10 @@ where self.tag )); } - // Actually, according to the implementation of conf change in raft-rs, this check must be - // passed if the previous check that `pending_conf_index` should be less than or equal to - // `self.get_store().applied_index()` is passed. + // Actually, according to the implementation of conf change in raft-rs, this + // check must be passed if the previous check that `pending_conf_index` + // should be less than or equal to `self.get_store().applied_index()` is + // passed. if self.get_store().applied_term() != self.term() { return Err(box_err!( "{} peer has not applied to current term, applied_term {}, current_term {}", @@ -4618,7 +4660,8 @@ where normal_peers.insert(peer_id); } if let Some(pr) = self.raft_group.raft.prs().get(peer_id) { - // status 3-normal, 2-almostfull, 1-alreadyfull, only for simplying the sort func belowing. + // status 3-normal, 2-almostfull, 1-alreadyfull, only for simplying the sort + // func belowing. let mut status = 3; if let Some(usg) = usage { status = match usg { @@ -4653,7 +4696,8 @@ where return; } - // Reverse sort peers based on `next_idx`, `usage` and `store healthy status`, then try to get a potential quorum. + // Reverse sort peers based on `next_idx`, `usage` and `store healthy status`, + // then try to get a potential quorum. next_idxs.sort_by(|x, y| { if x.3 == y.3 { y.1.cmp(&x.1) @@ -4709,8 +4753,8 @@ where self.dangerous_majority_set = has_dangurous_set; - // For the Peer with AlreadFull in potential quorum set, we still need to send logs to it. - // To support incoming configure change. + // For the Peer with AlreadFull in potential quorum set, we still need to send + // logs to it. To support incoming configure change. if quorum_ok { for peer in potential_quorum { if let Some(x) = self.disk_full_peers.peers.get_mut(&peer) { @@ -4763,7 +4807,8 @@ where } } - // if there are some peers with disk already full status in the majority set, should not allowed. + // if there are some peers with disk already full status in the majority set, + // should not allowed. if self.dangerous_majority_set { return false; } @@ -4775,7 +4820,8 @@ where if matches!(disk_full_opt, DiskFullOpt::AllowedOnAlmostFull) && self.disk_full_peers.peers.values().any(|x| x.1) { - // Majority peers are in disk full status but the request carries a special flag. + // Majority peers are in disk full status but the request carries a special + // flag. return true; } false @@ -5039,13 +5085,14 @@ where } // There could be two cases: - // 1. Target peer already exists but has not established communication with leader yet - // 2. Target peer is added newly due to member change or region split, but it's not - // created yet - // For both cases the region start key and end key are attached in RequestVote and - // Heartbeat message for the store of that peer to check whether to create a new peer - // when receiving these messages, or just to wait for a pending region split to perform - // later. + // - Target peer already exists but has not established communication with + // leader yet + // - Target peer is added newly due to member change or region split, but it's + // not created yet + // For both cases the region start key and end key are attached in RequestVote + // and Heartbeat message for the store of that peer to check whether to create a + // new peer when receiving these messages, or just to wait for a pending region + // split to perform later. if self.get_store().is_initialized() && is_initial_msg(&msg) { let region = self.region(); send_msg.set_start_key(region.get_start_key().to_vec()); @@ -5247,7 +5294,8 @@ where Ok(()) } - /// Update states of the peer which can be changed in the previous raft tick. + /// Update states of the peer which can be changed in the previous raft + /// tick. pub fn post_raft_group_tick(&mut self) { self.lead_transferee = self.raft_group.raft.lead_transferee.unwrap_or_default(); } @@ -5446,7 +5494,8 @@ fn make_transfer_leader_response() -> RaftCmdResponse { resp } -// The Raft message context for a MsgTransferLeader if it is a reply of a TransferLeader command. +// The Raft message context for a MsgTransferLeader if it is a reply of a +// TransferLeader command. pub const TRANSFER_LEADER_COMMAND_REPLY_CTX: &[u8] = &[1]; /// A poor version of `Peer` to avoid port generic variables everywhere. diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 76bb95b0d39..cec0d44f081 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -147,17 +147,18 @@ pub fn recover_from_applying_state( let raft_state = box_try!(engines.raft.get_raft_state(region_id)).unwrap_or_default(); - // if we recv append log when applying snapshot, last_index in raft_local_state will - // larger than snapshot_index. since raft_local_state is written to raft engine, and - // raft write_batch is written after kv write_batch, raft_local_state may wrong if - // restart happen between the two write. so we copy raft_local_state to kv engine - // (snapshot_raft_state), and set snapshot_raft_state.last_index = snapshot_index. - // after restart, we need check last_index. + // if we recv append log when applying snapshot, last_index in raft_local_state + // will larger than snapshot_index. since raft_local_state is written to + // raft engine, and raft write_batch is written after kv write_batch, + // raft_local_state may wrong if restart happen between the two write. so we + // copy raft_local_state to kv engine (snapshot_raft_state), and set + // snapshot_raft_state.last_index = snapshot_index. after restart, we need + // check last_index. if last_index(&snapshot_raft_state) > last_index(&raft_state) { // There is a gap between existing raft logs and snapshot. Clean them up. engines .raft - .clean(region_id, 0 /*first_index*/, &raft_state, raft_wb)?; + .clean(region_id, 0 /* first_index */, &raft_state, raft_wb)?; raft_wb.put_raft_state(region_id, &snapshot_raft_state)?; } Ok(()) @@ -303,8 +304,9 @@ fn validate_states( state_str() )); } - // Since the entries must be persisted before applying, the term of raft state should also - // be persisted. So it should be greater than the commit term of apply state. + // Since the entries must be persisted before applying, the term of raft state + // should also be persisted. So it should be greater than the commit term of + // apply state. if raft_state.get_hard_state().get_term() < apply_state.get_commit_term() { return Err(box_err!( "term of raft state < commit term of apply state, {}", @@ -552,8 +554,8 @@ where true } - /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no unavailable - /// snapshot. + /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no + /// unavailable snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { let mut snap_state = self.snap_state.borrow_mut(); let mut tried_cnt = self.snap_tried_cnt.borrow_mut(); @@ -703,14 +705,14 @@ where let first_index = self.entry_storage.first_index(); // It's possible that logs between `last_compacted_idx` and `first_index` are // being deleted in raftlog_gc worker. But it's OK as: - // 1. If the peer accepts a new snapshot, it must start with an index larger than - // this `first_index`; - // 2. If the peer accepts new entries after this snapshot or new snapshot, it must - // start with the new applied index, which is larger than `first_index`. + // - If the peer accepts a new snapshot, it must start with an index larger than + // this `first_index`; + // - If the peer accepts new entries after this snapshot or new snapshot, it + // must start with the new applied index, which is larger than `first_index`. // So new logs won't be deleted by on going raftlog_gc task accidentally. // It's possible that there will be some logs between `last_compacted_idx` and - // `first_index` are not deleted. So a cleanup task for the range should be triggered - // after applying the snapshot. + // `first_index` are not deleted. So a cleanup task for the range should be + // triggered after applying the snapshot. self.clear_meta(first_index, kv_wb, raft_wb)?; } // Write its source peers' `RegionLocalState` together with itself for atomicity @@ -740,10 +742,10 @@ where // Although there is an interval that other metadata are updated while `region` // is not after handing snapshot from ready, at the time of writing, it's no // problem for now. - // The reason why the update of `region` is delayed is that we expect `region` stays - // consistent with the one in `StoreMeta::regions` which should be updated after - // persisting due to atomic snapshot and peer create process. So if we can fix - // these issues in future(maybe not?), the `region` and `StoreMeta::regions` + // The reason why the update of `region` is delayed is that we expect `region` + // stays consistent with the one in `StoreMeta::regions` which should be updated + // after persisting due to atomic snapshot and peer create process. So if we can + // fix these issues in future(maybe not?), the `region` and `StoreMeta::regions` // can updated here immediately. info!( @@ -865,7 +867,8 @@ where res } - /// Cancel applying snapshot, return true if the job can be considered not be run again. + /// Cancel applying snapshot, return true if the job can be considered not + /// be run again. pub fn cancel_applying_snap(&mut self) -> bool { let is_canceled = match *self.snap_state.borrow() { SnapState::Applying(ref status) => { @@ -1042,14 +1045,15 @@ where } } - // Note that the correctness depends on the fact that these source regions MUST NOT - // serve read request otherwise a corrupt data may be returned. + // Note that the correctness depends on the fact that these source regions MUST + // NOT serve read request otherwise a corrupt data may be returned. // For now, it is ensured by - // 1. After `PrepareMerge` log is committed, the source region leader's lease will be - // suspected immediately which makes the local reader not serve read request. - // 2. No read request can be responsed in peer fsm during merging. - // These conditions are used to prevent reading **stale** data in the past. - // At present, they are also used to prevent reading **corrupt** data. + // - After `PrepareMerge` log is committed, the source region leader's lease + // will be suspected immediately which makes the local reader not serve read + // request. + // - No read request can be responsed in peer fsm during merging. These + // conditions are used to prevent reading **stale** data in the past. At + // present, they are also used to prevent reading **corrupt** data. for r in &res.destroy_regions { if let Err(e) = self.clear_extra_data(r, &res.region) { error!(?e; @@ -1061,8 +1065,8 @@ where self.schedule_applying_snapshot(); - // The `region` is updated after persisting in order to stay consistent with the one - // in `StoreMeta::regions` (will be updated soon). + // The `region` is updated after persisting in order to stay consistent with the + // one in `StoreMeta::regions` (will be updated soon). // See comments in `apply_snapshot` for more details. self.set_region(res.region.clone()); } @@ -1189,7 +1193,8 @@ where Ok(snapshot) } -// When we bootstrap the region we must call this to initialize region local state first. +// When we bootstrap the region we must call this to initialize region local +// state first. pub fn write_initial_raft_state(raft_wb: &mut W, region_id: u64) -> Result<()> { let mut raft_state = RaftLocalState { last_index: RAFT_INIT_LOG_INDEX, @@ -1493,7 +1498,7 @@ pub mod tests { store .engines .raft - .consume(&mut raft_wb, false /*sync*/) + .consume(&mut raft_wb, false /* sync */) .unwrap(); assert_eq!(left, get_meta_key_count(&store)); @@ -1520,7 +1525,8 @@ pub mod tests { where EK: KvEngine, { - /// Sends a significant message. We should guarantee that the message can't be dropped. + /// Sends a significant message. We should guarantee that the message + /// can't be dropped. fn significant_send( &self, _: u64, diff --git a/components/raftstore/src/store/read_queue.rs b/components/raftstore/src/store/read_queue.rs index 9e6c9cf69f0..aa24b4bc3c7 100644 --- a/components/raftstore/src/store/read_queue.rs +++ b/components/raftstore/src/store/read_queue.rs @@ -162,8 +162,9 @@ where self.ready_cnt != self.reads.len() } - /// Clear all commands in the queue. if `notify_removed` contains an `region_id`, - /// notify the request's callback that the region is removed. + /// Clear all commands in the queue. if `notify_removed` contains an + /// `region_id`, notify the request's callback that the region is + /// removed. pub fn clear_all(&mut self, notify_removed: Option) { let mut removed = 0; for mut read in self.reads.drain(..) { @@ -349,7 +350,8 @@ where Some(res) } - /// Raft could have not been ready to handle the poped task. So put it back into the queue. + /// Raft could have not been ready to handle the poped task. So put it back + /// into the queue. pub fn push_front(&mut self, read: ReadIndexRequest) { debug_assert!(read.read_index.is_some()); self.reads.push_front(read); @@ -491,7 +493,8 @@ mod read_index_ctx_tests { } ); - // Old version TiKV should be able to parse context without lock checking fields. + // Old version TiKV should be able to parse context without lock checking + // fields. let bytes = ctx.to_bytes(); assert_eq!(bytes, id.as_bytes()); } @@ -640,7 +643,8 @@ mod tests { ); queue.push_back(req, true); - // Advance on leader, but the peer is not ready to handle it (e.g. it's in merging). + // Advance on leader, but the peer is not ready to handle it (e.g. it's in + // merging). queue.advance_leader_reads("", vec![(id, None, 10)]); // The leader steps down to follower, clear uncommitted reads. diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index cd2bc75d048..056f1f4832d 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -118,8 +118,8 @@ where )) } - // scan scans database using an iterator in range [start_key, end_key), calls function f for - // each iteration, if f returns false, terminates this scan. + // scan scans database using an iterator in range [start_key, end_key), calls + // function f for each iteration, if f returns false, terminates this scan. pub fn scan( &self, cf: &str, diff --git a/components/raftstore/src/store/replication_mode.rs b/components/raftstore/src/store/replication_mode.rs index bf13b9e2364..1f163ccfb9f 100644 --- a/components/raftstore/src/store/replication_mode.rs +++ b/components/raftstore/src/store/replication_mode.rs @@ -93,11 +93,12 @@ impl StoreGroup { /// Gets the group ID of store. /// - /// Different version may indicates different label key. If version is less than - /// recorded one, then label key has to be changed, new value can't be mixed with - /// old values, so `None` is returned. If version is larger, then label key must - /// still matches. Because `recalculate` is called before updating regions' - /// replication status, so unchanged recorded version means unchanged label key. + /// Different version may indicates different label key. If version is less + /// than recorded one, then label key has to be changed, new value can't + /// be mixed with old values, so `None` is returned. If version is larger, + /// then label key must still matches. Because `recalculate` is called + /// before updating regions' replication status, so unchanged recorded + /// version means unchanged label key. #[inline] pub fn group_id(&self, version: u64, store_id: u64) -> Option { if version < self.version { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6a8aa5ca3bf..aeaf70f5b03 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -371,7 +371,8 @@ impl CfFile { assert!(self.size.len() >= idx); let file_name = self.gen_file_name(idx); if self.size.len() > idx { - // Any logic similar to test_snap_corruption_on_size_or_checksum will trigger this branch + // Any logic similar to test_snap_corruption_on_size_or_checksum will trigger + // this branch self.size[idx] = size; self.checksum[idx] = checksum; self.file_names[idx] = file_name.clone(); @@ -645,8 +646,8 @@ impl Snapshot { Ok(s) } - // If all files of the snapshot exist, return `Ok` directly. Otherwise create a new file at - // the temporary meta file path, so that all other try will fail. + // If all files of the snapshot exist, return `Ok` directly. Otherwise create a + // new file at the temporary meta file path, so that all other try will fail. fn init_for_building(&mut self) -> RaftStoreResult<()> { if self.exists() { return Ok(()); @@ -820,10 +821,10 @@ impl Snapshot { fn save_meta_file(&mut self) -> RaftStoreResult<()> { let v = box_try!(self.meta_file.meta.write_to_bytes()); if let Some(mut f) = self.meta_file.file.take() { - // `meta_file` could be None for this case: in `init_for_building` the snapshot exists - // so no temporary meta file is created, and this field is None. However in `do_build` - // it's deleted so we build it again, and then call `save_meta_file` with `meta_file` - // as None. + // `meta_file` could be None for this case: in `init_for_building` the snapshot + // exists so no temporary meta file is created, and this field is + // None. However in `do_build` it's deleted so we build it again, + // and then call `save_meta_file` with `meta_file` as None. // FIXME: We can fix it later by introducing a better snapshot delete mechanism. f.write_all(&v[..])?; f.flush()?; @@ -895,8 +896,8 @@ impl Snapshot { }; cf_file.kv_count = cf_stat.key_count as u64; if cf_file.kv_count > 0 { - // Use `kv_count` instead of file size to check empty files because encrypted sst files - // contain some metadata so their sizes will never be 0. + // Use `kv_count` instead of file size to check empty files because encrypted + // sst files contain some metadata so their sizes will never be 0. self.mgr.rename_tmp_cf_file_for_send(cf_file)?; } else { for tmp_file_path in cf_file.tmp_file_paths() { @@ -936,7 +937,7 @@ impl Snapshot { fn delete(&self) { macro_rules! try_delete_snapshot_files { - ($cf_file: ident, $file_name_func: ident) => { + ($cf_file:ident, $file_name_func:ident) => { let mut file_id = 0; loop { let file_path = $cf_file.path.join($cf_file.$file_name_func(file_id)); @@ -948,7 +949,7 @@ impl Snapshot { } } }; - ($cf_file: ident) => { + ($cf_file:ident) => { let mut file_id = 0; loop { let file_path = $cf_file.path.join($cf_file.gen_file_name(file_id)); @@ -972,7 +973,8 @@ impl Snapshot { for cf_file in &self.cf_files { // Delete cloned files. let clone_file_paths = cf_file.clone_file_paths(); - // in case the meta file is corrupted or deleted, delete snapshot files with best effort + // in case the meta file is corrupted or deleted, delete snapshot files with + // best effort if clone_file_paths.is_empty() { try_delete_snapshot_files!(cf_file, gen_clone_file_name); } else { @@ -1409,8 +1411,8 @@ impl SnapManager { Ok(()) } - // [PerformanceCriticalPath]?? I/O involved API should be called in background thread - // Return all snapshots which is idle not being used. + // [PerformanceCriticalPath]?? I/O involved API should be called in background + // thread Return all snapshots which is idle not being used. pub fn list_idle_snap(&self) -> io::Result> { // Use a lock to protect the directory when scanning. let registry = self.core.registry.rl(); @@ -1489,7 +1491,8 @@ impl SnapManager { /// because only one caller can lock temporary disk files. /// /// NOTE: it calculates snapshot size by scanning the base directory. - /// Don't call it in raftstore thread until the size limitation mechanism gets refactored. + /// Don't call it in raftstore thread until the size limitation mechanism + /// gets refactored. pub fn get_snapshot_for_building(&self, key: &SnapKey) -> RaftStoreResult> { let mut old_snaps = None; while self.get_total_snap_size()? > self.max_total_snap_size() { @@ -1559,8 +1562,9 @@ impl SnapManager { Ok(Box::new(s)) } - /// Get a `Snapshot` can be used for writting and then `save`. Concurrent calls - /// are allowed because only one caller can lock temporary disk files. + /// Get a `Snapshot` can be used for writting and then `save`. Concurrent + /// calls are allowed because only one caller can lock temporary disk + /// files. pub fn get_snapshot_for_receiving( &self, key: &SnapKey, @@ -2378,7 +2382,8 @@ pub mod tests { } } - // Make all the snapshot in the specified dir corrupted to have incorrect checksum. + // Make all the snapshot in the specified dir corrupted to have incorrect + // checksum. fn corrupt_snapshot_checksum_in>(dir: T) -> Vec { let dir_path = dir.into(); let mut res = Vec::new(); @@ -2423,7 +2428,8 @@ pub mod tests { res } - // Make all the snapshot meta files in the specified corrupted to have incorrect content. + // Make all the snapshot meta files in the specified corrupted to have incorrect + // content. fn corrupt_snapshot_meta_file>(dir: T) -> usize { let mut total = 0; let dir_path = dir.into(); @@ -2951,7 +2957,8 @@ pub mod tests { let key = SnapKey::new(1, 1, 1); let region = gen_test_region(1, 1, 1); - // Test one snapshot can be built multi times. DataKeyManager should be handled correctly. + // Test one snapshot can be built multi times. DataKeyManager should be handled + // correctly. for _ in 0..2 { let mut s1 = snap_mgr.get_snapshot_for_building(&key).unwrap(); let mut snap_data = RaftSnapshotData::default(); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 4fb34f15341..c88c1bd3718 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -193,8 +193,8 @@ where Ok(stats) } -/// Apply the given snapshot file into a column family. `callback` will be invoked after each batch of -/// key value pairs written to db. +/// Apply the given snapshot file into a column family. `callback` will be +/// invoked after each batch of key value pairs written to db. pub fn apply_plain_cf_file( path: &str, key_mgr: Option<&Arc>, @@ -226,7 +226,8 @@ where Ok(()) }; - // Collect keys to a vec rather than wb so that we can invoke the callback less times. + // Collect keys to a vec rather than wb so that we can invoke the callback less + // times. let mut batch = Vec::with_capacity(1024); let mut batch_data_size = 0; diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 1d8e7ed1981..7b681506f63 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -15,13 +15,13 @@ use txn_types::{Key, PessimisticLock}; /// Transaction extensions related to a peer. #[derive(Default)] pub struct TxnExt { - /// The max timestamp recorded in the concurrency manager is only updated at leader. - /// So if a peer becomes leader from a follower, the max timestamp can be outdated. - /// We need to update the max timestamp with a latest timestamp from PD before this - /// peer can work. - /// From the least significant to the most, 1 bit marks whether the timestamp is - /// updated, 31 bits for the current epoch version, 32 bits for the current term. - /// The version and term are stored to prevent stale UpdateMaxTimestamp task from + /// The max timestamp recorded in the concurrency manager is only updated at + /// leader. So if a peer becomes leader from a follower, the max timestamp + /// can be outdated. We need to update the max timestamp with a latest + /// timestamp from PD before this peer can work. From the least significant + /// to the most, 1 bit marks whether the timestamp is updated, 31 bits for + /// the current epoch version, 32 bits for the current term. The version + /// and term are stored to prevent stale UpdateMaxTimestamp task from /// marking the lowest bit. pub max_ts_sync_status: AtomicU64, @@ -58,7 +58,8 @@ lazy_static! { const GLOBAL_MEM_SIZE_LIMIT: usize = 100 << 20; // 100 MiB -// 512 KiB, so pessimistic locks in one region can be proposed in a single command. +// 512 KiB, so pessimistic locks in one region can be proposed in a single +// command. const PEER_MEM_SIZE_LIMIT: usize = 512 << 10; /// Pessimistic locks of a region peer. @@ -66,51 +67,53 @@ const PEER_MEM_SIZE_LIMIT: usize = 512 << 10; pub struct PeerPessimisticLocks { /// The table that stores pessimistic locks. /// - /// The bool marks an ongoing write request (which has been sent to the raftstore while not - /// applied yet) will delete this lock. The lock will be really deleted after applying the - /// write request. The flag will decide whether this lock should be migrated to other peers - /// on leader or region changes: + /// The bool marks an ongoing write request (which has been sent to the + /// raftstore while not applied yet) will delete this lock. The lock will be + /// really deleted after applying the write request. The flag will decide + /// whether this lock should be migrated to other peers on leader or region + /// changes: /// - /// - Transfer leader - /// The lock with the deleted mark SHOULD NOT be proposed before transferring leader. - /// Considering the following cases with different orders: - /// 1. Propose write -> propose locks -> apply write -> apply locks -> transfer leader - /// Because the locks marking deleted will not be proposed. The lock will be deleted when - /// applying the write while not showing up again after applying the locks. - /// 2. Propose locks -> propose write -> transfer leader - /// No lock will be lost in normal cases because the write request has been sent to the - /// raftstore, it is likely to be proposed successfully, while the leader will need at - /// least another round to receive the transfer leader message from the transferree. + /// - Transfer leader The lock with the deleted mark SHOULD NOT be proposed + /// before transferring leader. Considering the following cases with + /// different orders: 1. Propose write -> propose locks -> apply write -> + /// apply locks -> transfer leader Because the locks marking deleted will + /// not be proposed. The lock will be deleted when applying the write + /// while not showing up again after applying the locks. 2. Propose locks + /// -> propose write -> transfer leader No lock will be lost in normal + /// cases because the write request has been sent to the raftstore, it is + /// likely to be proposed successfully, while the leader will need at + /// least another round to receive the transfer leader message from the + /// transferee. /// - /// - Split region - /// The lock with the deleted mark SHOULD be moved to new regions on region split. - /// Considering the following cases with different orders: - /// 1. Propose write -> propose split -> apply write -> execute split - /// The write will be applied earlier than split. So, the lock will be deleted earlier - /// than moving locks to new regions. - /// 2. Propose split -> propose write -> ready split -> apply write - /// The write will be skipped because its version is lower than the new region. So, no - /// lock should be deleted in this case. - /// 3. Propose split -> ready split -> propose write - /// The write proposal will be rejected because of version mismatch. + /// - Split region The lock with the deleted mark SHOULD be moved to new + /// regions on region split. Considering the following cases with + /// different orders: 1. Propose write -> propose split -> apply write -> + /// execute split The write will be applied earlier than split. So, the + /// lock will be deleted earlier than moving locks to new regions. 2. + /// Propose split -> propose write -> ready split -> apply write The write + /// will be skipped because its version is lower than the new region. So, + /// no lock should be deleted in this case. 3. Propose split -> ready + /// split -> propose write The write proposal will be rejected because of + /// version mismatch. /// - /// - Merge region - /// The lock with the deleted mark SHOULD be included in the catch up logs on region merge. - /// Considering the following cases with different orders: - /// 1. Propose write -> propose prepare merge -> apply write -> execute merge - /// The locks marked deleted will be deleted when applying the write request. So, the - /// deleted locks will not be included again in the commit merge request. - /// 2. Propose prepare merge -> propose write -> execute merge -> apply write - /// Applying the write will be skipped because of version mismatch. So, no lock should - /// be deleted. It's correct that we include the locks that are marked deleted in the - /// commit merge request. + /// - Merge region The lock with the deleted mark SHOULD be included in the + /// catch up logs on region merge. Considering the following cases with + /// different orders: 1. Propose write -> propose prepare merge -> apply + /// write -> execute merge The locks marked deleted will be deleted when + /// applying the write request. So, the deleted locks will not be included + /// again in the commit merge request. 2. Propose prepare merge -> propose + /// write -> execute merge -> apply write Applying the write will be + /// skipped because of version mismatch. So, no lock should be deleted. + /// It's correct that we include the locks that are marked deleted in the + /// commit merge request. map: HashMap, /// Status of the pessimistic lock map. /// The map is writable only in the Normal state. pub status: LocksStatus, /// Refers to the Raft term in which the pessimistic lock table is valid. pub term: u64, - /// Refers to the region version in which the pessimistic lock table is valid. + /// Refers to the region version in which the pessimistic lock table is + /// valid. pub version: u64, /// Estimated memory used by the pessimistic locks. pub memory_size: usize, @@ -158,8 +161,8 @@ impl PeerPessimisticLocks { for pair in &pairs { let (key, lock) = pair.as_pair(); // If the key already exists in the map, it's an overwrite. - // The primary lock does not change during an overwrite, so we don't need to update - // the memory size. + // The primary lock does not change during an overwrite, so we don't need to + // update the memory size. if !self.map.contains_key(key) { incr += key.len() + lock.memory_size(); } @@ -215,11 +218,12 @@ impl PeerPessimisticLocks { /// Group pessimistic locks in the original region to the split regions. /// - /// The given regions MUST be sorted by key in the ascending order. The returned - /// `HashMap`s are in the same order of the given regions. + /// The given regions MUST be sorted by key in the ascending order. The + /// returned `HashMap`s are in the same order of the given regions. /// - /// The locks belonging to the derived region will be kept in the given `locks` map, - /// and the corresponding position in the returned `Vec` will be an empty map. + /// The locks belonging to the derived region will be kept in the given + /// `locks` map, and the corresponding position in the returned `Vec` + /// will be an empty map. pub fn group_by_regions( &mut self, regions: &[metapb::Region], diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 75c620ac12c..2bda7f4794f 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -105,10 +105,10 @@ pub fn check_key_in_region(key: &[u8], region: &metapb::Region) -> Result<()> { } } -/// `is_first_vote_msg` checks `msg` is the first vote (or prevote) message or not. It's used for -/// when the message is received but there is no such region in `Store::region_peers` and the -/// region overlaps with others. In this case we should put `msg` into `pending_msg` instead of -/// create the peer. +/// `is_first_vote_msg` checks `msg` is the first vote (or prevote) message or +/// not. It's used for when the message is received but there is no such region +/// in `Store::region_peers` and the region overlaps with others. In this case +/// we should put `msg` into `pending_msg` instead of create the peer. #[inline] fn is_first_vote_msg(msg: &eraftpb::Message) -> bool { match msg.get_msg_type() { @@ -119,10 +119,11 @@ fn is_first_vote_msg(msg: &eraftpb::Message) -> bool { } } -/// `is_first_append_entry` checks `msg` is the first append message or not. This meassge is the first -/// message that the learner peers of the new split region will receive from the leader. It's used for -/// when the message is received but there is no such region in `Store::region_peers`. In this case we -/// should put `msg` into `pending_msg` instead of create the peer. +/// `is_first_append_entry` checks `msg` is the first append message or not. +/// This meassge is the first message that the learner peers of the new split +/// region will receive from the leader. It's used for when the message is +/// received but there is no such region in `Store::region_peers`. In this case +/// we should put `msg` into `pending_msg` instead of create the peer. #[inline] fn is_first_append_entry(msg: &eraftpb::Message) -> bool { match msg.get_msg_type() { @@ -146,7 +147,8 @@ pub fn is_vote_msg(msg: &eraftpb::Message) -> bool { msg_type == MessageType::MsgRequestVote || msg_type == MessageType::MsgRequestPreVote } -/// `is_initial_msg` checks whether the `msg` can be used to initialize a new peer or not. +/// `is_initial_msg` checks whether the `msg` can be used to initialize a new +/// peer or not. // There could be two cases: // 1. Target peer already exists but has not established communication with leader yet // 2. Target peer is added newly due to member change or region split, but it's not @@ -207,12 +209,13 @@ impl AdminCmdEpochState { } /// WARNING: the existing settings below **MUST NOT** be changed!!! -/// Changing any admin cmd's `AdminCmdEpochState` or the epoch-change behavior during applying -/// will break upgrade compatibility and correctness dependency of `CmdEpochChecker`. -/// Please remember it is very difficult to fix the issues arising from not following this rule. +/// Changing any admin cmd's `AdminCmdEpochState` or the epoch-change behavior +/// during applying will break upgrade compatibility and correctness dependency +/// of `CmdEpochChecker`. Please remember it is very difficult to fix the issues +/// arising from not following this rule. /// -/// If you really want to change an admin cmd behavior, please add a new admin cmd and **DO NOT** -/// delete the old one. +/// If you really want to change an admin cmd behavior, please add a new admin +/// cmd and **DO NOT** delete the old one. pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochState { match admin_cmp_type { AdminCmdType::InvalidAdmin => AdminCmdEpochState::new(false, false, false, false), @@ -234,8 +237,8 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat } } -/// WARNING: `NORMAL_REQ_CHECK_VER` and `NORMAL_REQ_CHECK_CONF_VER` **MUST NOT** be changed. -/// The reason is the same as `admin_cmd_epoch_lookup`. +/// WARNING: `NORMAL_REQ_CHECK_VER` and `NORMAL_REQ_CHECK_CONF_VER` **MUST NOT** +/// be changed. The reason is the same as `admin_cmd_epoch_lookup`. pub static NORMAL_REQ_CHECK_VER: bool = true; pub static NORMAL_REQ_CHECK_CONF_VER: bool = false; @@ -396,14 +399,16 @@ pub fn is_region_initialized(r: &metapb::Region) -> bool { !r.get_peers().is_empty() } -/// Lease records an expired time, for examining the current moment is in lease or not. -/// It's dedicated to the Raft leader lease mechanism, contains either state of -/// 1. Suspect Timestamp -/// A suspicious leader lease timestamp, which marks the leader may still hold or lose -/// its lease until the clock time goes over this timestamp. -/// 2. Valid Timestamp -/// A valid leader lease timestamp, which marks the leader holds the lease for now. -/// The lease is valid until the clock time goes over this timestamp. +/// Lease records an expired time, for examining the current moment is in lease +/// or not. It's dedicated to the Raft leader lease mechanism, contains either +/// state of +/// - Suspect Timestamp +/// - A suspicious leader lease timestamp, which marks the leader may still +/// hold or lose its lease until the clock time goes over this timestamp. +/// - Valid Timestamp +/// - A valid leader lease timestamp, which marks the leader holds the lease +/// for now. The lease is valid until the clock time goes over this +/// timestamp. /// /// ```text /// Time @@ -419,18 +424,19 @@ pub fn is_region_initialized(r: &metapb::Region) -> bool { /// ``` /// /// Note: -/// - Valid timestamp would increase when raft log entries are applied in current term. -/// - Suspect timestamp would be set after the message `MsgTimeoutNow` is sent by current peer. -/// The message `MsgTimeoutNow` starts a leader transfer procedure. During this procedure, -/// current peer as an old leader may still hold its lease or lose it. -/// It's possible there is a new leader elected and current peer as an old leader -/// doesn't step down due to network partition from the new leader. In that case, -/// current peer lose its leader lease. -/// Within this suspect leader lease expire time, read requests could not be performed -/// locally. -/// - The valid leader lease should be `lease = max_lease - (commit_ts - send_ts)` -/// And the expired timestamp for that leader lease is `commit_ts + lease`, -/// which is `send_ts + max_lease` in short. +/// - Valid timestamp would increase when raft log entries are applied in +/// current term. +/// - Suspect timestamp would be set after the message `MsgTimeoutNow` is sent +/// by current peer. The message `MsgTimeoutNow` starts a leader transfer +/// procedure. During this procedure, current peer as an old leader may +/// still hold its lease or lose it. It's possible there is a new leader +/// elected and current peer as an old leader doesn't step down due to +/// network partition from the new leader. In that case, current peer lose +/// its leader lease. Within this suspect leader lease expire time, read +/// requests could not be performed locally. +/// - The valid leader lease should be `lease = max_lease - (commit_ts - +/// send_ts)` And the expired timestamp for that leader lease is `commit_ts +/// + lease`, which is `send_ts + max_lease` in short. pub struct Lease { // A suspect timestamp is in the Either::Left(_), // a valid timestamp is in the Either::Right(_). @@ -466,9 +472,9 @@ impl Lease { } } - /// The valid leader lease should be `lease = max_lease - (commit_ts - send_ts)` - /// And the expired timestamp for that leader lease is `commit_ts + lease`, - /// which is `send_ts + max_lease` in short. + /// The valid leader lease should be `lease = max_lease - (commit_ts - + /// send_ts)` And the expired timestamp for that leader lease is + /// `commit_ts + lease`, which is `send_ts + max_lease` in short. fn next_expired_time(&self, send_ts: Timespec) -> Timespec { send_ts + self.max_lease } @@ -595,8 +601,8 @@ impl fmt::Debug for Lease { } /// A remote lease, it can only be derived by `Lease`. It will be sent -/// to the local read thread, so name it remote. If Lease expires, the remote must -/// expire too. +/// to the local read thread, so name it remote. If Lease expires, the remote +/// must expire too. #[derive(Clone)] pub struct RemoteLease { expired_time: Arc, @@ -921,8 +927,8 @@ impl RegionReadProgressRegistry { .map(|rp| rp.safe_ts()) } - // Update `safe_ts` with the provided `LeaderInfo` and return the regions that have the - // same `LeaderInfo` + // Update `safe_ts` with the provided `LeaderInfo` and return the regions that + // have the same `LeaderInfo` pub fn handle_check_leaders(&self, leaders: Vec) -> Vec { let mut regions = Vec::with_capacity(leaders.len()); let registry = self.registry.lock().unwrap(); @@ -949,9 +955,9 @@ impl RegionReadProgressRegistry { info_map } - /// Invoke the provided callback with the registry, an internal lock will hold - /// while invoking the callback so it is important that *not* try to acquiring any - /// lock inside the callback to avoid dead lock + /// Invoke the provided callback with the registry, an internal lock will + /// hold while invoking the callback so it is important that *not* try + /// to acquiring any lock inside the callback to avoid dead lock pub fn with(&self, f: F) -> T where F: FnOnce(&HashMap>) -> T, @@ -967,9 +973,10 @@ impl Default for RegionReadProgressRegistry { } } -/// `RegionReadProgress` is used to keep track of the replica's `safe_ts`, the replica can handle a read -/// request directly without requiring leader lease or read index iff `safe_ts` >= `read_ts` (the `read_ts` -/// is usually stale i.e seconds ago). +/// `RegionReadProgress` is used to keep track of the replica's `safe_ts`, the +/// replica can handle a read request directly without requiring leader lease or +/// read index iff `safe_ts` >= `read_ts` (the `read_ts` is usually stale i.e +/// seconds ago). /// /// `safe_ts` is updated by the `(apply index, safe ts)` item: /// ```ignore @@ -978,13 +985,15 @@ impl Default for RegionReadProgressRegistry { /// } /// ``` /// -/// For the leader, the `(apply index, safe ts)` item is publish by the `resolved-ts` worker periodically. -/// For the followers, the item is sync periodically from the leader through the `CheckLeader` rpc. +/// For the leader, the `(apply index, safe ts)` item is publish by the +/// `resolved-ts` worker periodically. For the followers, the item is sync +/// periodically from the leader through the `CheckLeader` rpc. /// -/// The intend is to make the item's `safe ts` larger (more up to date) and `apply index` smaller (require less data) +/// The intend is to make the item's `safe ts` larger (more up to date) and +/// `apply index` smaller (require less data) // -/// TODO: the name `RegionReadProgress` is conflict with the leader lease's `ReadProgress`, shoule change it to another -/// more proper name +/// TODO: the name `RegionReadProgress` is conflict with the leader lease's +/// `ReadProgress`, shoule change it to another more proper name #[derive(Debug)] pub struct RegionReadProgress { // `core` used to keep track and update `safe_ts`, it should @@ -1036,13 +1045,13 @@ impl RegionReadProgress { } } - // Consume the provided `LeaderInfo` to update `safe_ts` and return whether the provided - // `LeaderInfo` is same as ours + // Consume the provided `LeaderInfo` to update `safe_ts` and return whether the + // provided `LeaderInfo` is same as ours pub fn consume_leader_info(&self, mut leader_info: LeaderInfo) -> bool { let mut core = self.core.lock().unwrap(); if leader_info.has_read_state() { - // It is okay to update `safe_ts` without checking the `LeaderInfo`, the `read_state` - // is guaranteed to be valid when it is published by the leader + // It is okay to update `safe_ts` without checking the `LeaderInfo`, the + // `read_state` is guaranteed to be valid when it is published by the leader let rs = leader_info.take_read_state(); let (apply_index, ts) = (rs.get_applied_index(), rs.get_safe_ts()); if apply_index != 0 && ts != 0 && !core.discard { @@ -1123,16 +1132,17 @@ struct RegionReadProgressCore { tag: String, region_id: u64, applied_index: u64, - // A wraper of `(apply_index, safe_ts)` item, where the `read_state.ts` is the peer's current `safe_ts` - // and the `read_state.idx` is the smallest `apply_index` required for that `safe_ts` + // A wraper of `(apply_index, safe_ts)` item, where the `read_state.ts` is the peer's current + // `safe_ts` and the `read_state.idx` is the smallest `apply_index` required for that `safe_ts` read_state: ReadState, // The local peer's acknowledge about the leader leader_info: LocalLeaderInfo, // `pending_items` is a *sorted* list of `(apply_index, safe_ts)` item pending_items: VecDeque, - // After the region commit merged, the region's key range is extended and the region's `safe_ts` - // should reset to `min(source_safe_ts, target_safe_ts)`, and start reject stale `read_state` item - // with index smaller than `last_merge_index` to avoid `safe_ts` undo the decrease + // After the region commit merged, the region's key range is extended and the region's + // `safe_ts` should reset to `min(source_safe_ts, target_safe_ts)`, and start reject stale + // `read_state` item with index smaller than `last_merge_index` to avoid `safe_ts` undo the + // decrease last_merge_index: u64, // Stop update `safe_ts` pause: bool, @@ -1210,7 +1220,8 @@ impl RegionReadProgressCore { // The apply index should not decrease assert!(applied >= self.applied_index); self.applied_index = applied; - // Consume pending items with `apply_index` less or equal to `self.applied_index` + // Consume pending items with `apply_index` less or equal to + // `self.applied_index` let mut to_update = self.read_state.clone(); while let Some(item) = self.pending_items.pop_front() { if self.applied_index < item.idx { @@ -1279,7 +1290,8 @@ impl RegionReadProgressCore { } } -/// Represent the duration of all stages of raftstore recorded by one inspecting. +/// Represent the duration of all stages of raftstore recorded by one +/// inspecting. #[derive(Default, Debug)] pub struct RaftstoreDuration { pub store_wait_duration: Option, @@ -1432,7 +1444,8 @@ mod tests { let cases = vec![ (Timespec::new(0, 0), 0x0000_0000_0000_0000u64), (Timespec::new(0, 1), 0x0000_0000_0000_0000u64), // 1ns is round down to 0ms. - (Timespec::new(0, 999_999), 0x0000_0000_0000_0000u64), // 999_999ns is round down to 0ms. + (Timespec::new(0, 999_999), 0x0000_0000_0000_0000u64), /* 999_999ns is round down to + * 0ms. */ ( // 1_048_575ns is round down to 0ms. Timespec::new(0, 1_048_575 /* 0x0FFFFF */), @@ -1520,7 +1533,7 @@ mod tests { ) -> metapb::Region { let mut region = metapb::Region::default(); macro_rules! push_peer { - ($ids: ident, $role: expr) => { + ($ids:ident, $role:expr) => { for id in $ids { let mut peer = metapb::Peer::default(); peer.set_id(*id); diff --git a/components/raftstore/src/store/worker/check_leader.rs b/components/raftstore/src/store/worker/check_leader.rs index d5fd6f2c007..355dca4f168 100644 --- a/components/raftstore/src/store/worker/check_leader.rs +++ b/components/raftstore/src/store/worker/check_leader.rs @@ -56,7 +56,8 @@ impl Runner { } } - // Get the minimal `safe_ts` from regions overlap with the key range [`start_key`, `end_key`) + // Get the minimal `safe_ts` from regions overlap with the key range + // [`start_key`, `end_key`) fn get_range_safe_ts(&self, key_range: KeyRange) -> u64 { if key_range.get_start_key().is_empty() && key_range.get_end_key().is_empty() { // Fast path to get the min `safe_ts` of all regions in this store @@ -73,10 +74,10 @@ impl Runner { data_key(key_range.get_start_key()), data_end_key(key_range.get_end_key()), ); - // `store_safe_ts` won't be accessed frequently (like per-request or per-transaction), - // also this branch won't entry because the request key range is empty currently (in v5.1) - // keep this branch for robustness and future use, so it is okay getting `store_safe_ts` - // from `store_meta` (behide a mutex) + // `store_safe_ts` won't be accessed frequently (like per-request or + // per-transaction), also this branch won't entry because the request key range + // is empty currently (in v5.1) keep this branch for robustness and future use, + // so it is okay getting `store_safe_ts` from `store_meta` (behide a mutex) let meta = self.store_meta.lock().unwrap(); meta.region_read_progress.with(|registry| { meta.region_ranges diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index 88222623084..a829d2fe01c 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -23,9 +23,12 @@ pub enum Task { }, CheckAndCompact { - cf_names: Vec, // Column families need to compact - ranges: Vec, // Ranges need to check - tombstones_num_threshold: u64, // The minimum RocksDB tombstones a range that need compacting has + // Column families need to compact + cf_names: Vec, + // Ranges need to check + ranges: Vec, + // The minimum RocksDB tombstones a range that need compacting has + tombstones_num_threshold: u64, tombstones_percent_threshold: u64, }, } @@ -181,7 +184,8 @@ fn need_compact( return false; } - // When the number of tombstones exceed threshold and ratio, this range need compacting. + // When the number of tombstones exceed threshold and ratio, this range need + // compacting. let estimate_num_del = num_entires - num_versions; estimate_num_del >= tombstones_num_threshold && estimate_num_del * 100 >= tombstones_percent_threshold * num_entires @@ -193,14 +197,15 @@ fn collect_ranges_need_compact( tombstones_num_threshold: u64, tombstones_percent_threshold: u64, ) -> Result, Error> { - // Check the SST properties for each range, and TiKV will compact a range if the range - // contains too many RocksDB tombstones. TiKV will merge multiple neighboring ranges - // that need compacting into a single range. + // Check the SST properties for each range, and TiKV will compact a range if the + // range contains too many RocksDB tombstones. TiKV will merge multiple + // neighboring ranges that need compacting into a single range. let mut ranges_need_compact = VecDeque::new(); let mut compact_start = None; let mut compact_end = None; for range in ranges.windows(2) { - // Get total entries and total versions in this range and checks if it needs to be compacted. + // Get total entries and total versions in this range and checks if it needs to + // be compacted. if let Some((num_ent, num_ver)) = box_try!(engine.get_range_entries_and_versions(CF_WRITE, &range[0], &range[1])) { @@ -220,7 +225,8 @@ fn collect_ranges_need_compact( } } - // Current range doesn't need compacting, save previous range that need compacting. + // Current range doesn't need compacting, save previous range that need + // compacting. if compact_start.is_some() { assert!(compact_end.is_some()); } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index afd84ad16dd..d65cbcea8d4 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -738,11 +738,12 @@ fn hotspot_query_num_report_threshold() -> u64 { HOTSPOT_QUERY_RATE_THRESHOLD * 10 } -// Slow score is a value that represents the speed of a store and ranges in [1, 100]. -// It is maintained in the AIMD way. -// If there are some inspecting requests timeout during a round, by default the score -// will be increased at most 1x when above 10% inspecting requests timeout. -// If there is not any timeout inspecting requests, the score will go back to 1 in at least 5min. +// Slow score is a value that represents the speed of a store and ranges in [1, +// 100]. It is maintained in the AIMD way. +// If there are some inspecting requests timeout during a round, by default the +// score will be increased at most 1x when above 10% inspecting requests +// timeout. If there is not any timeout inspecting requests, the score will go +// back to 1 in at least 5min. struct SlowScore { value: OrderedFloat, last_record_time: Instant, @@ -1086,9 +1087,10 @@ where Default::default(), ); } - // When rolling update, there might be some old version tikvs that don't support batch split in cluster. - // In this situation, PD version check would refuse `ask_batch_split`. - // But if update time is long, it may cause large Regions, so call `ask_split` instead. + // When rolling update, there might be some old version tikvs that don't support + // batch split in cluster. In this situation, PD version check would refuse + // `ask_batch_split`. But if update time is long, it may cause large Regions, so + // call `ask_split` instead. Err(Error::Incompatible) => { let (region_id, peer_id) = (region.id, peer.id); info!( @@ -1237,7 +1239,8 @@ where stats.set_used_size(used_size); let mut available = capacity.checked_sub(used_size).unwrap_or_default(); - // We only care about rocksdb SST file size, so we should check disk available here. + // We only care about rocksdb SST file size, so we should check disk available + // here. available = cmp::min(available, disk_stats.available_space()); if available == 0 { @@ -2074,8 +2077,8 @@ where self.slow_score.last_tick_finished = false; if self.slow_score.last_tick_id % self.slow_score.round_ticks == 0 { - // `last_update_time` is refreshed every round. If no update happens in a whole round, - // we set the status to unknown. + // `last_update_time` is refreshed every round. If no update happens in a whole + // round, we set the status to unknown. if self.curr_health_status == ServingStatus::Serving && self.slow_score.last_record_time < self.slow_score.last_update_time { diff --git a/components/raftstore/src/store/worker/raftlog_gc.rs b/components/raftstore/src/store/worker/raftlog_gc.rs index bf7debfb1d9..88e30e33104 100644 --- a/components/raftstore/src/store/worker/raftlog_gc.rs +++ b/components/raftstore/src/store/worker/raftlog_gc.rs @@ -108,7 +108,8 @@ impl Runner { return; } fail::fail_point!("worker_gc_raft_log_flush"); - // Sync wal of kv_db to make sure the data before apply_index has been persisted to disk. + // Sync wal of kv_db to make sure the data before apply_index has been persisted + // to disk. let start = Instant::now(); self.engines.kv.sync().unwrap_or_else(|e| { panic!("failed to sync kv_engine in raft_log_gc: {:?}", e); @@ -233,7 +234,7 @@ mod tests { e.set_index(i); raft_wb.append(region_id, vec![e]).unwrap(); } - raft_db.consume(&mut raft_wb, false /*sync*/).unwrap(); + raft_db.consume(&mut raft_wb, false /* sync */).unwrap(); let tbls = vec![ (Task::gc(region_id, 0, 10), 10, (0, 10), (10, 100)), diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 1be9cf8b4e9..b7724789d4b 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -514,10 +514,11 @@ where cmd.callback.invoke_read(read_resp); } - // Ideally `get_delegate` should return `Option<&ReadDelegate>`, but if so the lifetime of - // the returned `&ReadDelegate` will bind to `self`, and make it impossible to use `&mut self` - // while the `&ReadDelegate` is alive, a better choice is use `Rc` but `LocalReader: Send` will be - // violated, which is required by `LocalReadRouter: Send`, use `Arc` will introduce extra cost but + // Ideally `get_delegate` should return `Option<&ReadDelegate>`, but if so the + // lifetime of the returned `&ReadDelegate` will bind to `self`, and make it + // impossible to use `&mut self` while the `&ReadDelegate` is alive, a better + // choice is use `Rc` but `LocalReader: Send` will be violated, which is + // required by `LocalReadRouter: Send`, use `Arc` will introduce extra cost but // make the logic clear fn get_delegate(&mut self, region_id: u64) -> Option> { let rd = match self.delegates.get(®ion_id) { @@ -669,7 +670,8 @@ where // Getting the snapshot let response = self.execute(&req, &delegate.region, None, read_id); - // Double check in case `safe_ts` change after the first check and before getting snapshot + // Double check in case `safe_ts` change after the first check and before + // getting snapshot if let Err(resp) = delegate.check_stale_read_safe(read_ts, &mut self.metrics) { @@ -705,11 +707,12 @@ where } } - /// If read requests are received at the same RPC request, we can create one snapshot for all - /// of them and check whether the time when the snapshot was created is in lease. We use - /// ThreadReadId to figure out whether this RaftCommand comes from the same RPC request with - /// the last RaftCommand which left a snapshot cached in LocalReader. ThreadReadId is composed - /// by thread_id and a thread_local incremental sequence. + /// If read requests are received at the same RPC request, we can create one + /// snapshot for all of them and check whether the time when the snapshot + /// was created is in lease. We use ThreadReadId to figure out whether this + /// RaftCommand comes from the same RPC request with the last RaftCommand + /// which left a snapshot cached in LocalReader. ThreadReadId is composed by + /// thread_id and a thread_local incremental sequence. #[inline] pub fn read( &mut self, diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index cdd0ee5556b..5e2cc8992f5 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -48,7 +48,8 @@ use crate::{ }, }; -// used to periodically check whether we should delete a stale peer's range in region runner +// used to periodically check whether we should delete a stale peer's range in +// region runner #[cfg(test)] pub const STALE_PEER_CHECK_TICK: usize = 1; // 1000 milliseconds @@ -137,7 +138,8 @@ struct StalePeerInfo { } /// A structure records all ranges to be deleted with some delay. -/// The delay is because there may be some coprocessor requests related to these ranges. +/// The delay is because there may be some coprocessor requests related to these +/// ranges. #[derive(Clone, Default)] struct PendingDeleteRanges { ranges: BTreeMap, StalePeerInfo>, // start_key -> StalePeerInfo @@ -202,7 +204,8 @@ impl PendingDeleteRanges { /// Inserts a new range waiting to be deleted. /// - /// Before an insert is called, it must call drain_overlap_ranges to clean the overlapping range. + /// Before an insert is called, it must call drain_overlap_ranges to clean + /// the overlapping range. fn insert(&mut self, region_id: u64, start_key: &[u8], end_key: &[u8], stale_sequence: u64) { if !self.find_overlap_ranges(start_key, end_key).is_empty() { panic!( @@ -290,14 +293,16 @@ where "err" => %e, ); } - // The error can be ignored as snapshot will be sent in next heartbeat in the end. + // The error can be ignored as snapshot will be sent in next heartbeat in the + // end. let _ = self .router .send(region_id, CasualMessage::SnapshotGenerated); Ok(()) } - /// Handles the task of generating snapshot of the Region. It calls `generate_snap` to do the actual work. + /// Handles the task of generating snapshot of the Region. It calls + /// `generate_snap` to do the actual work. fn handle_gen( &self, region_id: u64, @@ -425,7 +430,8 @@ where Ok(()) } - /// Tries to apply the snapshot of the specified Region. It calls `apply_snap` to do the actual work. + /// Tries to apply the snapshot of the specified Region. It calls + /// `apply_snap` to do the actual work. fn handle_apply(&mut self, region_id: u64, status: Arc) { let _ = status.compare_exchange( JOB_STATUS_PENDING, @@ -493,7 +499,8 @@ where let mut df_ranges = Vec::with_capacity(overlap_ranges.len()); for (region_id, start_key, end_key, stale_sequence) in overlap_ranges.iter() { // `DeleteFiles` may break current rocksdb snapshots consistency, - // so do not use it unless we can make sure there is no reader of the destroyed peer anymore. + // so do not use it unless we can make sure there is no reader of the destroyed + // peer anymore. if *stale_sequence < oldest_sequence { df_ranges.push(Range::new(start_key, end_key)); } else { @@ -588,8 +595,8 @@ where } } - /// Checks the number of files at level 0 to avoid write stall after ingesting sst. - /// Returns true if the ingestion causes write stall. + /// Checks the number of files at level 0 to avoid write stall after + /// ingesting sst. Returns true if the ingestion causes write stall. fn ingest_maybe_stall(&self) -> bool { for cf in SNAPSHOT_CFS { // no need to check lock cf @@ -679,8 +686,9 @@ where fn handle_pending_applies(&mut self) { fail_point!("apply_pending_snapshot", |_| {}); while !self.pending_applies.is_empty() { - // should not handle too many applies than the number of files that can be ingested. - // check level 0 every time because we can not make sure how does the number of level 0 files change. + // should not handle too many applies than the number of files that can be + // ingested. check level 0 every time because we can not make sure + // how does the number of level 0 files change. if self.ctx.ingest_maybe_stall() { break; } diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index e5dde8a910c..14a1a5b7bbc 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -339,7 +339,8 @@ where ); } - /// Checks a Region with split and bucket checkers to produce split keys and buckets keys and generates split admin command. + /// Checks a Region with split and bucket checkers to produce split keys and + /// buckets keys and generates split admin command. fn check_split_and_bucket( &mut self, region: &Region, @@ -553,7 +554,8 @@ where if bucket_range_idx == bucket_range_list.len() { skip_check_bucket = true; } else if origin_key >= bucket_range_list[bucket_range_idx].0.as_slice() { - // e.key() is between bucket_range_list[bucket_range_idx].0, bucket_range_list[bucket_range_idx].1 + // e.key() is between bucket_range_list[bucket_range_idx].0, + // bucket_range_list[bucket_range_idx].1 bucket_size += e.entry_size() as u64; if bucket_size >= host.region_bucket_size() { bucket.keys.push(origin_key.to_vec()); @@ -580,7 +582,8 @@ where } } - // if we scan the whole range, we can update approximate size and keys with accurate value. + // if we scan the whole range, we can update approximate size and keys with + // accurate value. if is_key_range { return; } diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index 4fe00fff448..7857ae10d8e 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -14,22 +14,35 @@ pub(crate) const DEFAULT_SAMPLE_NUM: usize = 20; const DEFAULT_QPS_THRESHOLD: usize = 3000; const DEFAULT_BYTE_THRESHOLD: usize = 30 * 1024 * 1024; -// We get balance score by abs(sample.left-sample.right)/(sample.right+sample.left). It will be used to measure left and right balance +// We get balance score by +// abs(sample.left-sample.right)/(sample.right+sample.left). It will be used to +// measure left and right balance const DEFAULT_SPLIT_BALANCE_SCORE: f64 = 0.25; -// We get contained score by sample.contained/(sample.right+sample.left+sample.contained). It will be used to avoid to split regions requested by range. +// We get contained score by +// sample.contained/(sample.right+sample.left+sample.contained). It will be used +// to avoid to split regions requested by range. const DEFAULT_SPLIT_CONTAINED_SCORE: f64 = 0.5; -// If the `split_balance_score` and `split_contained_score` above could not be satisfied, we will try to split the region according to its CPU load, +// If the `split_balance_score` and `split_contained_score` above could not be +// satisfied, we will try to split the region according to its CPU load, // then these parameters below will start to work. -// When the gRPC poll thread CPU usage (over the past `detect_times` seconds by default) is higher than gRPC poll thread count * `DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, -// the CPU-based split won't be triggered no matter if the `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO` and `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` are exceeded -// to prevent from increasing the gRPC poll CPU usage. +// When the gRPC poll thread CPU usage (over the past `detect_times` seconds by +// default) is higher than gRPC poll thread count * +// `DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, the CPU-based split won't +// be triggered no matter if the +// `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO` and +// `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` are exceeded to prevent from increasing +// the gRPC poll CPU usage. const DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.5; -// When the Unified Read Poll thread CPU usage is higher than Unified Read Poll thread count * `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, +// When the Unified Read Poll thread CPU usage is higher than Unified Read Poll +// thread count * +// `DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO`, // the CPU-based split will try to check and record the top hot CPU region. const DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.8; -// When the Unified Read Poll is hot and the region's CPU usage reaches `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` as a percentage of the Unified Read Poll, -// it will be added into the hot region list and may be split later as the top hot CPU region. +// When the Unified Read Poll is hot and the region's CPU usage reaches +// `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` as a percentage of the Unified Read +// Poll, it will be added into the hot region list and may be split later as the +// top hot CPU region. pub(crate) const REGION_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.25; lazy_static! { diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 013ac705be9..0f15bcc4805 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -60,7 +60,8 @@ where } // This function uses the distributed/parallel reservoir sampling algorithm. -// It will sample min(sample_num, all_key_ranges_num) key ranges from multiple `key_ranges_provider` with the same possibility. +// It will sample min(sample_num, all_key_ranges_num) key ranges from multiple +// `key_ranges_provider` with the same possibility. fn sample( sample_num: usize, mut key_ranges_providers: Vec, @@ -72,7 +73,8 @@ where let mut sampled_key_ranges = vec![]; // Retain the non-empty key ranges. // `key_ranges_provider` may return an empty key ranges vector, which will cause - // the later sampling to fall into a dead loop. So we need to filter it out here. + // the later sampling to fall into a dead loop. So we need to filter it out + // here. key_ranges_providers .retain_mut(|key_ranges_provider| !key_ranges_getter(key_ranges_provider).is_empty()); if key_ranges_providers.is_empty() { @@ -109,8 +111,9 @@ where // Generate a random number in [1, all_key_ranges_num]. // Starting from 1 is to achieve equal probability. // For example, for a `prefix_sum` like [1, 2, 3, 4], - // if we generate a random number in [0, 4], the probability of choosing the first index is 0.4 - // rather than 0.25 due to that 0 and 1 will both make `binary_search` get the same result. + // if we generate a random number in [0, 4], the probability of choosing the + // first index is 0.4 rather than 0.25 due to that 0 and 1 will both + // make `binary_search` get the same result. let i = prefix_sum .binary_search(&rng.gen_range(1..=all_key_ranges_num)) .unwrap_or_else(|i| i); @@ -170,7 +173,8 @@ impl From> for Samples { } impl Samples { - // evaluate the samples according to the given key range, it will update the sample's left, right and contained counter. + // evaluate the samples according to the given key range, it will update the + // sample's left, right and contained counter. fn evaluate(&mut self, key_range: &KeyRange) { for mut sample in self.0.iter_mut() { let order_start = if key_range.start_key.is_empty() { @@ -210,8 +214,9 @@ impl Samples { } let evaluated_key_num = (sample.contained + evaluated_key_num_lr) as f64; - // The balance score is the difference in the number of requested keys between the left and right of a sample key. - // The smaller the balance score, the more balanced the load will be after this splitting. + // The balance score is the difference in the number of requested keys between + // the left and right of a sample key. The smaller the balance + // score, the more balanced the load will be after this splitting. let balance_score = (sample.left as f64 - sample.right as f64).abs() / evaluated_key_num_lr as f64; LOAD_BASE_SPLIT_SAMPLE_VEC @@ -222,8 +227,9 @@ impl Samples { continue; } - // The contained score is the ratio of a sample key that are contained in the requested key. - // The larger the contained score, the more RPCs the cluster will receive after this splitting. + // The contained score is the ratio of a sample key that are contained in the + // requested key. The larger the contained score, the more RPCs the + // cluster will receive after this splitting. let contained_score = sample.contained as f64 / evaluated_key_num; LOAD_BASE_SPLIT_SAMPLE_VEC .with_label_values(&["contained_score"]) @@ -233,8 +239,9 @@ impl Samples { continue; } - // We try to find a split key that has the smallest balance score and the smallest contained score - // to make the splitting keep the load balanced while not increasing too many RPCs. + // We try to find a split key that has the smallest balance score and the + // smallest contained score to make the splitting keep the load + // balanced while not increasing too many RPCs. let final_score = balance_score + contained_score; if final_score < best_score { best_index = index as i32; @@ -295,13 +302,14 @@ impl Recorder { // collect the split keys from the recorded key_ranges. // This will start a second-level sampling on the previous sampled key ranges, - // evaluate the samples according to the given key range, and compute the split keys finally. + // evaluate the samples according to the given key range, and compute the split + // keys finally. fn collect(&self, config: &SplitConfig) -> Vec { let sampled_key_ranges = sample(config.sample_num, self.key_ranges.clone(), |x| x); let mut samples = Samples::from(sampled_key_ranges); let recorded_key_ranges: Vec<&KeyRange> = self.key_ranges.iter().flatten().collect(); - // Because we need to observe the number of `no_enough_key` of all the actual keys, - // so we do this check after the samples are calculated. + // Because we need to observe the number of `no_enough_key` of all the actual + // keys, so we do this check after the samples are calculated. if (recorded_key_ranges.len() as u64) < config.sample_threshold { LOAD_BASE_SPLIT_EVENT .no_enough_sampled_key @@ -315,8 +323,8 @@ impl Recorder { } } -// RegionInfo will maintain key_ranges with sample_num length by reservoir sampling. -// And it will save qps num and peer. +// RegionInfo will maintain key_ranges with sample_num length by reservoir +// sampling. And it will save qps num and peer. #[derive(Debug, Clone)] pub struct RegionInfo { pub sample_num: usize, @@ -378,7 +386,8 @@ pub struct ReadStats { // 2. add_query_num_batch // 3. add_flow // Among these three methods, `add_flow` will not update `key_ranges` of `RegionInfo`, - // and due to this, an `RegionInfo` without `key_ranges` may occur. The caller should be aware of this. + // and due to this, an `RegionInfo` without `key_ranges` may occur. The caller should be aware + // of this. pub region_infos: HashMap, pub sample_num: usize, pub region_buckets: HashMap, @@ -525,7 +534,8 @@ impl SplitInfo { } // Create a SplitInfo with the given region_id, peer, start_key and end_key. - // This is used to split the region on half within the specified start and end keys later. + // This is used to split the region on half within the specified start and end + // keys later. fn with_start_end_key( region_id: u64, peer: Peer, @@ -643,7 +653,8 @@ impl AutoSplitController { >= self.cfg.region_cpu_overload_threshold_ratio } - // collect the read stats from read_stats_vec and dispatch them to a Region HashMap. + // collect the read stats from read_stats_vec and dispatch them to a Region + // HashMap. fn collect_read_stats(read_stats_vec: Vec) -> HashMap> { // RegionID -> Vec, collect the RegionInfo from different threads. let mut region_infos_map = HashMap::default(); @@ -659,12 +670,14 @@ impl AutoSplitController { region_infos_map } - // collect the CPU stats from cpu_stats_vec and dispatch them to a Region HashMap. + // collect the CPU stats from cpu_stats_vec and dispatch them to a Region + // HashMap. fn collect_cpu_stats( &self, cpu_stats_vec: Vec>, ) -> HashMap)> { - // RegionID -> (CPU usage, Hottest Key Range), calculate the CPU usage and its hottest key range. + // RegionID -> (CPU usage, Hottest Key Range), calculate the CPU usage and its + // hottest key range. let mut region_cpu_map = HashMap::default(); if !self.should_check_region_cpu() { return region_cpu_map; @@ -730,8 +743,8 @@ impl AutoSplitController { / 100.0 } - // flush the read stats info into the recorder and check if the region needs to be split - // according to all the stats info the recorder has collected before. + // flush the read stats info into the recorder and check if the region needs to + // be split according to all the stats info the recorder has collected before. pub fn flush( &mut self, read_stats_vec: Vec, diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index ef683724429..e1c23652db8 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -44,8 +44,8 @@ pub struct AdvanceTsWorker { timer: SteadyTimer, worker: Runtime, scheduler: Scheduler>, - /// The concurrency manager for transactions. It's needed for CDC to check locks when - /// calculating resolved_ts. + /// The concurrency manager for transactions. It's needed for CDC to check + /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, // store_id -> client tikv_clients: Arc>>, @@ -104,8 +104,8 @@ impl AdvanceTsWorker { // Ignore get tso errors since we will retry every `advance_ts_interval`. let mut min_ts = pd_client.get_tso().await.unwrap_or_default(); - // Sync with concurrency manager so that it can work correctly when optimizations - // like async commit is enabled. + // Sync with concurrency manager so that it can work correctly when + // optimizations like async commit is enabled. // Note: This step must be done before scheduling `Task::MinTS` task, and the // resolver must be checked in or after `Task::MinTS`' execution. cm.update_max_ts(min_ts); @@ -153,8 +153,9 @@ impl AdvanceTsWorker { } // Confirms leadership of region peer before trying to advance resolved ts. -// This function broadcasts a special message to all stores, gets the leader id of them to confirm whether -// current peer has a quorum which accepts its leadership. +// This function broadcasts a special message to all stores, gets the leader id +// of them to confirm whether current peer has a quorum which accepts its +// leadership. pub async fn region_resolved_ts_store( regions: Vec, store_meta: Arc>, @@ -290,7 +291,8 @@ pub async fn region_resolved_ts_store( .observe(start.saturating_elapsed_secs()); }); for _ in 0..store_count { - // Use `select_all` to avoid the process getting blocked when some TiKVs were down. + // Use `select_all` to avoid the process getting blocked when some TiKVs were + // down. let (res, _, remains) = select_all(stores).await; stores = remains; match res { diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index f561aa07e28..277a31e2001 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -135,7 +135,8 @@ impl ChangeLog { pub(crate) fn decode_write(key: &[u8], value: &[u8], is_apply: bool) -> Option { let write = WriteRef::parse(value).ok()?.to_owned(); - // Drop the record it self but keep only the overlapped rollback information if gc_fence exists. + // Drop the record it self but keep only the overlapped rollback information if + // gc_fence exists. if is_apply && write.gc_fence.is_some() { // `gc_fence` is set means the write record has been rewritten. // Currently the only case is writing overlapped_rollback. And in this case @@ -191,7 +192,8 @@ struct RowChange { fn group_row_changes(requests: Vec) -> HashMap { let mut changes: HashMap = HashMap::default(); - // The changes about default cf was recorded here and need to be matched with a `write` or a `lock`. + // The changes about default cf was recorded here and need to be matched with a + // `write` or a `lock`. let mut unmatched_default = HashMap::default(); for mut req in requests { match req.get_cmd_type() { @@ -254,8 +256,8 @@ fn group_row_changes(requests: Vec) -> HashMap { changes } -/// Filter non-lock related data (i.e `default_cf` data), the implement is subject to -/// how `group_row_changes` and `encode_rows` encode `ChangeRow` +/// Filter non-lock related data (i.e `default_cf` data), the implement is +/// subject to how `group_row_changes` and `encode_rows` encode `ChangeRow` pub fn lock_only_filter(mut cmd_batch: CmdBatch) -> Option { if cmd_batch.is_empty() { return None; diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 90e3a3b7912..5a180a9b6c8 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -63,8 +63,8 @@ enum PendingLock { } // Records information related to observed region. -// observe_id is used for avoiding ABA problems in incremental scan task, advance resolved ts task, -// and command observing. +// observe_id is used for avoiding ABA problems in incremental scan task, +// advance resolved ts task, and command observing. struct ObserveRegion { meta: Region, handle: ObserveHandle, @@ -106,8 +106,9 @@ impl ObserveRegion { continue; } ChangeLog::Admin(req_type) => { - // TODO: for admin cmd that won't change the region meta like peer list and key range - // (i.e. `CompactLog`, `ComputeHash`) we may not need to return error + // TODO: for admin cmd that won't change the region meta like peer list + // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to + // return error return Err(format!( "region met admin command {:?} while initializing resolver", req_type @@ -167,8 +168,9 @@ impl ObserveRegion { "region met split/merge command, stop tracking since key range changed, wait for re-register"; "req_type" => ?req_type, ); - // Stop tracking so that `tracked_index` larger than the split/merge command index won't be published - // untill `RegionUpdate` event trigger the region re-register and re-scan the new key range + // Stop tracking so that `tracked_index` larger than the split/merge + // command index won't be published until `RegionUpdate` event + // trigger the region re-register and re-scan the new key range self.resolver.stop_tracking(); } _ => { @@ -421,15 +423,17 @@ where return; } // TODO: may not need to re-register region for some cases: - // - `Split/BatchSplit`, which can be handled by remove out-of-range locks from the `Resolver`'s lock heap + // - `Split/BatchSplit`, which can be handled by remove out-of-range locks from + // the `Resolver`'s lock heap // - `PrepareMerge` and `RollbackMerge`, the key range is unchanged self.deregister_region(region_id); self.register_region(incoming_region); } } - // This function is corresponding to RegionDestroyed event that can be only scheduled by observer. - // To prevent destroying region for wrong peer, it should check the region epoch at first. + // This function is corresponding to RegionDestroyed event that can be only + // scheduled by observer. To prevent destroying region for wrong peer, it + // should check the region epoch at first. fn region_destroyed(&mut self, region: Region) { if let Some(observe_region) = self.regions.get(®ion.id) { if util::compare_region_epoch( @@ -501,7 +505,8 @@ where self.sinker.sink_resolved_ts(regions, ts); } - // Tracking or untracking locks with incoming commands that corresponding observe id is valid. + // Tracking or untracking locks with incoming commands that corresponding + // observe id is valid. #[allow(clippy::drop_ref)] fn handle_change_log( &mut self, @@ -566,7 +571,8 @@ where } fn register_advance_event(&self, cfg_version: usize) { - // Ignore advance event that registered with previous `advance_ts_interval` config + // Ignore advance event that registered with previous `advance_ts_interval` + // config if self.cfg_version != cfg_version { return; } diff --git a/components/resolved_ts/src/lib.rs b/components/resolved_ts/src/lib.rs index 172efbb9c18..5ad2941dde2 100644 --- a/components/resolved_ts/src/lib.rs +++ b/components/resolved_ts/src/lib.rs @@ -1,13 +1,16 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! Resolved TS is a timestamp that represents the lower bonud of incoming Commit TS +//! Resolved TS is a timestamp that represents the lower bound of incoming +//! Commit TS // and the upper bound of outgoing Commit TS. -//! Through this timestamp we can get a consistent view in the transaction level. +//! Through this timestamp we can get a consistent view in the transaction +//! level. //! //! To maintain a correct Resolved TS, these premises must be satisfied: -//! 1. Tracing all locks in the region, use the minimal Start TS as Resolved TS. -//! 2. If there is not any lock, use the latest timestamp as Resolved TS. -//! 3. Resolved TS must be advanced by the region leader after it has applied on its term. +//! - Tracing all locks in the region, use the minimal Start TS as Resolved TS. +//! - If there is not any lock, use the latest timestamp as Resolved TS. +//! - Resolved TS must be advanced by the region leader after it has applied on +//! its term. #![feature(box_patterns)] #![feature(result_flattening)] diff --git a/components/resolved_ts/src/observer.rs b/components/resolved_ts/src/observer.rs index c9decaadc77..9ff7b976ad4 100644 --- a/components/resolved_ts/src/observer.rs +++ b/components/resolved_ts/src/observer.rs @@ -18,8 +18,9 @@ impl Observer { } pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { - // The `resolved-ts` cmd observer will `mem::take` the `Vec`, use a low priority - // to let it be the last observer and avoid affecting other observers + // The `resolved-ts` cmd observer will `mem::take` the `Vec`, use a + // low priority to let it be the last observer and avoid affecting other + // observers coprocessor_host .registry .register_cmd_observer(1000, BoxCmdObserver::new(self.clone())); @@ -84,7 +85,8 @@ impl CmdObserver for Observer { impl RoleObserver for Observer { fn on_role_change(&self, ctx: &mut ObserverContext<'_>, role_change: &RoleChange) { // Stop to advance resolved ts after peer steps down to follower or candidate. - // Do not need to check observe id because we expect all role change events are scheduled in order. + // Do not need to check observe id because we expect all role change events are + // scheduled in order. if role_change.state != StateRole::Leader { if let Err(e) = self.scheduler.schedule(Task::DeRegisterRegion { region_id: ctx.region().id, @@ -102,9 +104,9 @@ impl RegionChangeObserver for Observer { event: RegionChangeEvent, role: StateRole, ) { - // If the peer is not leader, it must has not registered the observe region or it is deregistering - // the observe region, so don't need to send `RegionUpdated`/`RegionDestroyed` to update the observe - // region + // If the peer is not leader, it must has not registered the observe region or + // it is deregistering the observe region, so don't need to send + // `RegionUpdated`/`RegionDestroyed` to update the observe region if role != StateRole::Leader { return; } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 12c7cbe0c56..f1518784a33 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -181,7 +181,8 @@ impl Resolver { self.raw_lock_ts_heap.push(Reverse(ts)); } - // untrack all timestamps smaller than input ts, depend on the raw ts in one region is non-decreasing + // untrack all timestamps smaller than input ts, depend on the raw ts in one + // region is non-decreasing pub fn raw_untrack_lock(&mut self, ts: TimeStamp) { debug!("raw untrack ts before {}, region {}", ts, self.region_id); while let Some(&Reverse(min_ts)) = self.raw_lock_ts_heap.peek() { @@ -197,7 +198,8 @@ impl Resolver { /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. pub fn resolve(&mut self, min_ts: TimeStamp) -> ResolvedTs { - // The `Resolver` is stopped, not need to advance, just return the current `resolved_ts` + // The `Resolver` is stopped, not need to advance, just return the current + // `resolved_ts` if self.stopped { return self.resolved_ts; } diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 835de79c161..396fc7333da 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -212,7 +212,8 @@ impl, E: KvEngine> ScannerPool { let mut resp = box_try!(fut.await); if resp.response.get_header().has_error() { let err = resp.response.take_header().take_error(); - // These two errors can't handled by retrying since the epoch and observe id is unchanged + // These two errors can't handled by retrying since the epoch and observe id is + // unchanged if err.has_epoch_not_match() || err.get_message().contains("stale observe id") { return Err(Error::request(err)); } diff --git a/components/resource_metering/src/collector.rs b/components/resource_metering/src/collector.rs index 9e1830b8acb..bdadd638f2e 100644 --- a/components/resource_metering/src/collector.rs +++ b/components/resource_metering/src/collector.rs @@ -15,7 +15,8 @@ use crate::RawRecords; /// to the `Scheduler` for processing. /// /// `Reporter` implements [Runnable] and [RunnableWithTimer], aggregates the -/// data sent by the `Collector` internally, and reports it regularly through RPC. +/// data sent by the `Collector` internally, and reports it regularly through +/// RPC. /// /// [Recorder]: crate::recorder::Recorder /// [Reporter]: crate::reporter::Reporter diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index 9c1f25e4b0c..bd64d7202ae 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -51,9 +51,9 @@ pub const MAX_THREAD_REGISTER_RETRY: u32 = 10; /// This structure is used as a label to distinguish different request contexts. /// -/// In order to associate `ResourceMeteringTag` with a certain piece of code logic, -/// we added a function to [Future] to bind `ResourceMeteringTag` to the specified -/// future context. It is used in the main business logic of TiKV. +/// In order to associate `ResourceMeteringTag` with a certain piece of code +/// logic, we added a function to [Future] to bind `ResourceMeteringTag` to the +/// specified future context. It is used in the main business logic of TiKV. /// /// [Future]: futures::Future pub struct ResourceMeteringTag { @@ -214,14 +214,15 @@ impl ResourceTagFactory { /// This trait extends the standard [Future]. /// -/// When the user imports [FutureExt], all futures in its module (such as async block) -/// will additionally support the [FutureExt::in_resource_metering_tag] method. This method -/// can bind a [ResourceMeteringTag] to the scope of this future (actually, it is stored in -/// the local storage of the thread where `Future` is located). During the polling period of -/// the future, we can continue to observe the system resources used by the thread in which -/// it is located, which is associated with `ResourceMeteringTag` and is also stored in thread -/// local storage. There is a background thread that continuously summarizes the storage of -/// each thread and reports it regularly. +/// When the user imports [FutureExt], all futures in its module (such as async +/// block) will additionally support the [FutureExt::in_resource_metering_tag] +/// method. This method can bind a [ResourceMeteringTag] to the scope of this +/// future (actually, it is stored in the local storage of the thread where +/// `Future` is located). During the polling period of the future, we can +/// continue to observe the system resources used by the thread in which it is +/// located, which is associated with `ResourceMeteringTag` and is also stored +/// in thread local storage. There is a background thread that continuously +/// summarizes the storage of each thread and reports it regularly. /// /// [Future]: futures::Future pub trait FutureExt: Sized { @@ -245,8 +246,9 @@ pub trait StreamExt: Sized { impl StreamExt for T {} -/// This structure is the return value of the [FutureExt::in_resource_metering_tag] method, -/// which wraps the original [Future] with a [ResourceMeteringTag]. +/// This structure is the return value of the +/// [FutureExt::in_resource_metering_tag] method, which wraps the original +/// [Future] with a [ResourceMeteringTag]. /// /// see [FutureExt] for more information. /// diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 0cacc6930d4..1359e6c3a45 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -71,7 +71,8 @@ impl Default for RawRecords { } impl RawRecords { - /// Keep a maximum of `k` self.records and aggregate the others into returned [RawRecord]. + /// Keep a maximum of `k` self.records and aggregate the others into + /// returned [RawRecord]. pub fn keep_top_k(&mut self, k: usize) -> RawRecord { let mut others = RawRecord::default(); if self.records.len() <= k { diff --git a/components/resource_metering/src/recorder/collector_reg.rs b/components/resource_metering/src/recorder/collector_reg.rs index 8205a2290cb..f166101dfe5 100644 --- a/components/resource_metering/src/recorder/collector_reg.rs +++ b/components/resource_metering/src/recorder/collector_reg.rs @@ -30,16 +30,16 @@ impl CollectorRegHandle { } } - /// Register a collector to the recorder. Dropping the returned [CollectorGuard] will - /// preform deregistering. + /// Register a collector to the recorder. Dropping the returned + /// [CollectorGuard] will preform deregistering. /// - /// The second argument `as_observer` indicates that whether the given `collector` will - /// control the enabled state of the recorder: - /// - When `as_observer` is false, the recorder will respect it and begin to profile if it's - /// off before. In other words, if there is at least one non-observed collector, the recorder - /// will keep running. - /// - When `as_observer` is true, whether the recorder to be on or off won't depend on if - /// the collector exists. + /// The second argument `as_observer` indicates that whether the given + /// `collector` will control the enabled state of the recorder: + /// - When `as_observer` is false, the recorder will respect it and begin to + /// profile if it's off before. In other words, if there is at least one + /// non-observed collector, the recorder will keep running. + /// - When `as_observer` is true, whether the recorder to be on or off won't + /// depend on if the collector exists. pub fn register(&self, collector: Box, as_observer: bool) -> CollectorGuard { static NEXT_COLLECTOR_ID: AtomicU64 = AtomicU64::new(1); let id = CollectorId(NEXT_COLLECTOR_ID.fetch_add(1, Ordering::SeqCst)); diff --git a/components/resource_metering/src/recorder/localstorage.rs b/components/resource_metering/src/recorder/localstorage.rs index afc9554a212..c9f0b25b478 100644 --- a/components/resource_metering/src/recorder/localstorage.rs +++ b/components/resource_metering/src/recorder/localstorage.rs @@ -16,10 +16,11 @@ thread_local! { pub static STORAGE: RefCell = RefCell::new(LocalStorage::default()); } -/// `LocalStorage` is a thread-local structure that contains all necessary data of submodules. +/// `LocalStorage` is a thread-local structure that contains all necessary data +/// of submodules. /// -/// In order to facilitate mutual reference, the thread-local data of all sub-modules -/// need to be stored centrally in `LocalStorage`. +/// In order to facilitate mutual reference, the thread-local data of all +/// sub-modules need to be stored centrally in `LocalStorage`. #[derive(Clone, Default)] pub struct LocalStorage { pub registered: bool, diff --git a/components/resource_metering/src/recorder/mod.rs b/components/resource_metering/src/recorder/mod.rs index 92e6d094274..9ed6acfb74f 100644 --- a/components/resource_metering/src/recorder/mod.rs +++ b/components/resource_metering/src/recorder/mod.rs @@ -288,8 +288,9 @@ impl ConfigChangeNotifier { } } -/// Constructs a default [Recorder], spawn it and return the corresponding [ConfigChangeNotifier], -/// [CollectorRegHandle], [ResourceTagFactory] and [LazyWorker]. +/// Constructs a default [Recorder], spawn it and return the corresponding +/// [ConfigChangeNotifier], [CollectorRegHandle], [ResourceTagFactory] and +/// [LazyWorker]. /// /// This function is intended to simplify external use. pub fn init_recorder( diff --git a/components/resource_metering/src/recorder/sub_recorder/mod.rs b/components/resource_metering/src/recorder/sub_recorder/mod.rs index e36acb26ddb..42647f3486d 100644 --- a/components/resource_metering/src/recorder/sub_recorder/mod.rs +++ b/components/resource_metering/src/recorder/sub_recorder/mod.rs @@ -8,19 +8,22 @@ use crate::{recorder::localstorage::LocalStorage, RawRecords}; pub mod cpu; pub mod summary; -/// This trait defines a general framework that works at a certain frequency. Typically, -/// it describes the recorder(sampler) framework for a specific resource. +/// This trait defines a general framework that works at a certain frequency. +/// Typically, it describes the recorder(sampler) framework for a specific +/// resource. /// -/// [Recorder] will maintain a list of sub-recorders, driving all sub-recorders to work -/// according to the behavior described in this trait. +/// [Recorder] will maintain a list of sub-recorders, driving all sub-recorders +/// to work according to the behavior described in this trait. pub trait SubRecorder: Send { - /// This function is called at a fixed frequency. (A typical frequency is 99hz.) + /// This function is called at a fixed frequency. (A typical frequency is + /// 99hz.) /// - /// The [RawRecords] and [LocalStorage] map of all threads will be passed in through - /// parameters. We need to collect resources (may be from each `LocalStorage`) and - /// write them into `RawRecords`. + /// The [RawRecords] and [LocalStorage] map of all threads will be passed in + /// through parameters. We need to collect resources (may be from each + /// `LocalStorage`) and write them into `RawRecords`. /// - /// The implementation needs to sample the resource in this function (in general). + /// The implementation needs to sample the resource in this function (in + /// general). /// /// [RawRecords]: crate::model::RawRecords /// [LocalStorage]: crate::localstorage::LocalStorage @@ -30,8 +33,8 @@ pub trait SubRecorder: Send { /// This function is called every time before reporting to Collector. /// The default period is 1 second. /// - /// The [RawRecords] and [LocalStorage] map of all threads will be passed in through parameters. - /// `usize` is thread_id without platform dependency. + /// The [RawRecords] and [LocalStorage] map of all threads will be passed in + /// through parameters. `usize` is thread_id without platform dependency. /// /// [RawRecords]: crate::model::RawRecords /// [LocalStorage]: crate::localstorage::LocalStorage diff --git a/components/resource_metering/src/recorder/sub_recorder/summary.rs b/components/resource_metering/src/recorder/sub_recorder/summary.rs index 34cf07f9caf..93ba95080e3 100644 --- a/components/resource_metering/src/recorder/sub_recorder/summary.rs +++ b/components/resource_metering/src/recorder/sub_recorder/summary.rs @@ -35,8 +35,9 @@ pub fn record_write_keys(count: u32) { /// An implementation of [SubRecorder] for collecting summary data. /// -/// `SummaryRecorder` uses some special methods ([record_read_keys]/[record_write_keys]) -/// to collect external statistical information. +/// `SummaryRecorder` uses some special methods +/// ([record_read_keys]/[record_write_keys]) to collect external statistical +/// information. /// /// See [SubRecorder] for more relevant designs. /// @@ -59,7 +60,8 @@ impl SubRecorder for SummaryRecorder { } // The request currently being polled has not yet been merged into the hashmap, // so it needs to be processed separately. (For example, a slow request that is - // blocking needs to reflect in real time how many keys have been read currently) + // blocking needs to reflect in real time how many keys have been read + // currently) if let Some(t) = ls.attached_tag.load_full() { if t.extra_attachment.is_empty() { return; diff --git a/components/resource_metering/src/reporter/data_sink.rs b/components/resource_metering/src/reporter/data_sink.rs index 1dadc2723bc..e453bdd3371 100644 --- a/components/resource_metering/src/reporter/data_sink.rs +++ b/components/resource_metering/src/reporter/data_sink.rs @@ -9,7 +9,8 @@ use crate::error::Result; /// This trait abstracts the interface to communicate with the remote. /// We can simply mock this interface to test without RPC. pub trait DataSink: Send { - // `try_send` pushes a report data into the sink, which will later be sent to a target - // by the sink. If the sink is kept full, or the sink is closed, an error will be returned. + // `try_send` pushes a report data into the sink, which will later be sent to a + // target by the sink. If the sink is kept full, or the sink is closed, an error + // will be returned. fn try_send(&mut self, records: Arc>) -> Result<()>; } diff --git a/components/resource_metering/src/reporter/mod.rs b/components/resource_metering/src/reporter/mod.rs index 024a79bde53..721fb570b22 100644 --- a/components/resource_metering/src/reporter/mod.rs +++ b/components/resource_metering/src/reporter/mod.rs @@ -30,9 +30,9 @@ use crate::{ /// A structure for reporting statistics through [Client]. /// -/// `Reporter` implements [Runnable] and [RunnableWithTimer] to handle [Task]s from -/// the [Scheduler]. It internally aggregates the reported [RawRecords] into [Records] -/// and upload them to the remote server through the `Client`. +/// `Reporter` implements [Runnable] and [RunnableWithTimer] to handle [Task]s +/// from the [Scheduler]. It internally aggregates the reported [RawRecords] +/// into [Records] and upload them to the remote server through the `Client`. /// /// [Runnable]: tikv_util::worker::Runnable /// [RunnableWithTimer]: tikv_util::worker::RunnableWithTimer @@ -205,7 +205,8 @@ impl ConfigChangeNotifier { } } -/// Constructs a default [Recorder], start it and return the corresponding [ConfigChangeNotifier], [DataSinkRegHandle] and [LazyWorker]. +/// Constructs a default [Recorder], start it and return the corresponding +/// [ConfigChangeNotifier], [DataSinkRegHandle] and [LazyWorker]. /// /// This function is intended to simplify external use. pub fn init_reporter( diff --git a/components/resource_metering/src/reporter/pubsub.rs b/components/resource_metering/src/reporter/pubsub.rs index 0112a8b17db..62144ec920c 100644 --- a/components/resource_metering/src/reporter/pubsub.rs +++ b/components/resource_metering/src/reporter/pubsub.rs @@ -22,8 +22,9 @@ use crate::{ /// `PubSubService` implements [ResourceMeteringPubSub]. /// -/// If a client subscribes to resource metering records, the `PubSubService` is responsible for -/// registering them to the reporter. Then the reporter sends data to the client periodically. +/// If a client subscribes to resource metering records, the `PubSubService` is +/// responsible for registering them to the reporter. Then the reporter sends +/// data to the client periodically. /// /// [ResourceMeteringPubSub]: kvproto::resource_usage_agent_grpc::ResourceMeteringPubSub #[derive(Clone)] diff --git a/components/resource_metering/src/reporter/single_target.rs b/components/resource_metering/src/reporter/single_target.rs index 69817bc847b..09609b84462 100644 --- a/components/resource_metering/src/reporter/single_target.rs +++ b/components/resource_metering/src/reporter/single_target.rs @@ -41,8 +41,8 @@ impl Runnable for SingleTargetDataSink { } } -/// `SingleTargetDataSink` is the default implementation of [DataSink], which uses gRPC -/// to report data to the remote end. +/// `SingleTargetDataSink` is the default implementation of [DataSink], which +/// uses gRPC to report data to the remote end. pub struct SingleTargetDataSink { scheduler: Scheduler, data_sink_reg: DataSinkRegHandle, @@ -246,8 +246,8 @@ impl Drop for Guard { } } -/// Constructs a default [SingleTargetDataSink], start it and return the corresponding [AddressChangeNotifier] -/// and [LazyWorker]. +/// Constructs a default [SingleTargetDataSink], start it and return the +/// corresponding [AddressChangeNotifier] and [LazyWorker]. /// /// This function is intended to simplify external use. pub fn init_single_target( diff --git a/components/resource_metering/tests/summary_test.rs b/components/resource_metering/tests/summary_test.rs index c5a9ae61ac3..ae647055206 100644 --- a/components/resource_metering/tests/summary_test.rs +++ b/components/resource_metering/tests/summary_test.rs @@ -53,7 +53,7 @@ fn test_summary() { let data_sink = MockDataSink::default(); - /* At this point we are ready for everything except turning on the switch. */ + // At this point we are ready for everything except turning on the switch. // expect no data { diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index ed5ff0d1fa4..d984ccb353d 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -41,7 +41,8 @@ pub struct SecurityConfig { /// /// # Arguments /// -/// - `tag`: only used in the error message, like "ca key", "cert key", "private key", etc. +/// - `tag`: only used in the error message, like "ca key", "cert key", +/// "private key", etc. fn check_key_file(tag: &str, path: &str) -> Result, Box> { if path.is_empty() { return Ok(None); diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index bf06ecefcea..7ada07d5206 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -158,7 +158,8 @@ fn run_dump_raftdb_worker( let mut state = RaftLocalState::default(); state.merge_from_bytes(value)?; batch.put_raft_state(region_id, &state).unwrap(); - // Assume that we always scan entry first and raft state at the end. + // Assume that we always scan entry first and raft state at the + // end. batch .append(region_id, std::mem::take(&mut entries)) .unwrap(); @@ -237,8 +238,8 @@ mod tests { // Dump logs from RocksEngine to RaftLogEngine. let raft_engine = RaftLogEngine::new( cfg.raft_engine.config(), - None, /*key_manager*/ - None, /*io_rate_limiter*/ + None, // key_manager + None, // io_rate_limiter ) .expect("open raft engine"); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ad788f2ecec..73269c3f07a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -2,13 +2,14 @@ //! This module startups all the components of a TiKV server. //! -//! It is responsible for reading from configs, starting up the various server components, -//! and handling errors (mostly by aborting and reporting to the user). +//! It is responsible for reading from configs, starting up the various server +//! components, and handling errors (mostly by aborting and reporting to the +//! user). //! //! The entry point is `run_tikv`. //! -//! Components are often used to initialize other components, and/or must be explicitly stopped. -//! We keep these components in the `TiKvServer` struct. +//! Components are often used to initialize other components, and/or must be +//! explicitly stopped. We keep these components in the `TiKvServer` struct. use std::{ cmp, @@ -363,7 +364,8 @@ impl TiKvServer { /// /// # Fatal errors /// - /// - If `dynamic config` feature is enabled and failed to register config to PD + /// - If `dynamic config` feature is enabled and failed to register config + /// to PD /// - If some critical configs (like data dir) are differrent from last run /// - If the config can't pass `validate()` /// - If the max open file descriptor limit is not high enough to support @@ -488,9 +490,10 @@ impl TiKvServer { ); } - // We truncate a big file to make sure that both raftdb and kvdb of TiKV have enough space - // to do compaction and region migration when TiKV recover. This file is created in - // data_dir rather than db_path, because we must not increase store size of db_path. + // We truncate a big file to make sure that both raftdb and kvdb of TiKV have + // enough space to do compaction and region migration when TiKV recover. + // This file is created in data_dir rather than db_path, because we must not + // increase store size of db_path. let disk_stats = fs2::statvfs(&self.config.storage.data_dir).unwrap(); let mut capacity = disk_stats.total_space(); if self.config.raft_store.capacity.0 > 0 { @@ -1017,7 +1020,8 @@ impl TiKvServer { ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); - // Start auto gc. Must after `Node::start` because `node_id` is initialized there. + // Start auto gc. Must after `Node::start` because `node_id` is initialized + // there. assert!(node.id() > 0); // Node id should never be 0. let auto_gc_config = AutoGcConfig::new( self.pd_client.clone(), @@ -1242,15 +1246,15 @@ impl TiKvServer { self.config .storage .io_rate_limit - .build(!stats_collector_enabled /*enable_statistics*/), + .build(!stats_collector_enabled /* enable_statistics */), ); let fetcher = if stats_collector_enabled { BytesFetcher::FromIOStatsCollector() } else { BytesFetcher::FromRateLimiter(limiter.statistics().unwrap()) }; - // Set up IO limiter even when rate limit is disabled, so that rate limits can be - // dynamically applied later on. + // Set up IO limiter even when rate limit is disabled, so that rate limits can + // be dynamically applied later on. set_io_rate_limiter(Some(limiter)); fetcher } @@ -1286,7 +1290,8 @@ impl TiKvServer { }); } - // Only background cpu quota tuning is implemented at present. iops and frontend quota tuning is on the way + // Only background cpu quota tuning is implemented at present. iops and frontend + // quota tuning is on the way fn init_quota_tuning_task(&self, quota_limiter: Arc) { // No need to do auto tune when capacity is really low if SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO @@ -1335,9 +1340,11 @@ impl TiKvServer { }; // Try tuning quota when cpu_usage is correctly collected. // rule based tuning: - // 1) if instance is busy, shrink cpu quota for analyze by one quota pace until lower bound is hit; - // 2) if instance cpu usage is healthy, no op; - // 3) if instance is idle, increase cpu quota by one quota pace until upper bound is hit. + // - if instance is busy, shrink cpu quota for analyze by one quota pace until + // lower bound is hit; + // - if instance cpu usage is healthy, no op; + // - if instance is idle, increase cpu quota by one quota pace until upper + // bound is hit. if cpu_usage > 0.0f64 { let mut target_quota = old_quota; @@ -1560,7 +1567,7 @@ impl ConfiguredRaftEngine for RocksEngine { let raft_engine = RaftLogEngine::new(config.raft_engine.config(), key_manager.clone(), None) .expect("failed to open raft engine for migration"); - dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /*threads*/); + dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /* threads */); raft_engine.stop(); drop(raft_engine); raft_data_state_machine.after_dump_data(); @@ -1614,7 +1621,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { raft_cf_opts, ) .expect("failed to open raftdb for migration"); - dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /*threads*/); + dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /* threads */); raftdb.stop(); drop(raftdb); raft_data_state_machine.after_dump_data(); @@ -1674,7 +1681,7 @@ impl TiKvServer { .register_config(cfg_controller, self.config.storage.block_cache.shared); let engines_info = Arc::new(EnginesResourceInfo::new( - &engines, 180, /*max_samples_to_preserve*/ + &engines, 180, // max_samples_to_preserve )); (engines, engines_info) diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index 3e37d87242c..4f49f6fb86e 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -16,7 +16,8 @@ use tikv_util::{self, config, logger}; // A workaround for checking if log is initialized. pub static LOG_INITIALIZED: AtomicBool = AtomicBool::new(false); -// The info log file names does not end with ".log" since it conflict with rocksdb WAL files. +// The info log file names does not end with ".log" since it conflict with +// rocksdb WAL files. pub const DEFAULT_ROCKSDB_LOG_FILE: &str = "rocksdb.info"; pub const DEFAULT_RAFTDB_LOG_FILE: &str = "raftdb.info"; @@ -33,11 +34,12 @@ macro_rules! fatal { }) } -// TODO: There is a very small chance that duplicate files will be generated if there are -// a lot of logs written in a very short time. Consider rename the rotated file with a version -// number while rotate by size. +// TODO: There is a very small chance that duplicate files will be generated if +// there are a lot of logs written in a very short time. Consider rename the +// rotated file with a version number while rotate by size. // -// The file name format after rotated is as follows: "{original name}.{"%Y-%m-%dT%H-%M-%S%.3f"}" +// The file name format after rotated is as follows: +// "{original name}.{"%Y-%m-%dT%H-%M-%S%.3f"}" fn rename_by_timestamp(path: &Path) -> io::Result { let mut new_path = path.parent().unwrap().to_path_buf(); let mut new_fname = path.file_stem().unwrap().to_os_string(); @@ -76,7 +78,8 @@ pub fn initial_logger(config: &TiKvConfig) { let rocksdb_info_log_path = if !config.rocksdb.info_log_dir.is_empty() { make_engine_log_path(&config.rocksdb.info_log_dir, "", DEFAULT_ROCKSDB_LOG_FILE) } else { - // Don't use `DEFAULT_ROCKSDB_SUB_DIR`, because of the logic of `RocksEngine::exists`. + // Don't use `DEFAULT_ROCKSDB_SUB_DIR`, because of the logic of + // `RocksEngine::exists`. make_engine_log_path(&config.storage.data_dir, "", DEFAULT_ROCKSDB_LOG_FILE) }; let raftdb_info_log_path = if !config.raftdb.info_log_dir.is_empty() { diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index be93ded1554..60f72052b10 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -302,7 +302,8 @@ impl ImportDir { for meta in metas { match (api_version, meta.api_version) { (cur_version, meta_version) if cur_version == meta_version => continue, - // sometimes client do not know whether ttl is enabled, so a general V1 is accepted as V1ttl + // sometimes client do not know whether ttl is enabled, so a general V1 is accepted + // as V1ttl (ApiVersion::V1ttl, ApiVersion::V1) => continue, // import V1ttl as V1 will immediatly be rejected because it is never correct. (ApiVersion::V1, ApiVersion::V1ttl) => return Ok(false), @@ -451,8 +452,9 @@ pub fn path_to_sst_meta>(path: P) -> Result { meta.mut_region_epoch().set_conf_ver(elems[2].parse()?); meta.mut_region_epoch().set_version(elems[3].parse()?); if elems.len() > 4 { - // If we upgrade TiKV from 3.0.x to 4.0.x and higher version, we can not read cf_name from - // the file path, because TiKV 3.0.x does not encode cf_name to path. + // If we upgrade TiKV from 3.0.x to 4.0.x and higher version, we can not read + // cf_name from the file path, because TiKV 3.0.x does not encode + // cf_name to path. meta.set_cf_name(elems[4].to_owned()); } Ok(meta) diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 1d4e2e916dc..356541cebbb 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -326,8 +326,9 @@ impl SstImporter { path.temp.clone(), backend, expected_sha256, - // kv-files needn't are decrypted with KMS when download currently because these files are not encrypted when log-backup. - // It is different from sst-files because sst-files is encrypted when saved with rocksdb env with KMS. + // kv-files needn't are decrypted with KMS when download currently because these files + // are not encrypted when log-backup. It is different from sst-files + // because sst-files is encrypted when saved with rocksdb env with KMS. // to do: support KMS when log-backup and restore point. false, // don't support encrypt for now. @@ -431,7 +432,8 @@ impl SstImporter { } if check_key_in_range(&key, 0, start_key, end_key).is_err() { // key not in range, we can simply skip this key here. - // the client make sure the correct region will download and apply the same file. + // the client make sure the correct region will download and apply the same + // file. INPORTER_APPLY_COUNT .with_label_values(&["key_not_in_region"]) .inc(); @@ -573,7 +575,8 @@ impl SstImporter { return Ok(None); } - // range contained the entire SST, no need to iterate, just moving the file is ok + // range contained the entire SST, no need to iterate, just moving the file is + // ok let mut range = Range::default(); range.set_start(start_key); range.set_end(last_key.to_vec()); @@ -844,7 +847,7 @@ mod tests { // Test ImportDir::ingest() let db_path = temp_dir.path().join("db"); - let env = get_env(key_manager.clone(), None /*io_rate_limiter*/).unwrap(); + let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), &[CF_DEFAULT], env); let cases = vec![(0, 10), (5, 15), (10, 20), (0, 100)]; @@ -1363,7 +1366,7 @@ mod tests { .unwrap(); let db_path = temp_dir.path().join("db"); - let env = get_env(Some(key_manager), None /*io_rate_limiter*/).unwrap(); + let env = get_env(Some(key_manager), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), DATA_CFS, env.clone()); let range = importer @@ -1599,8 +1602,8 @@ mod tests { // key3 = "zt9102_r07", value3 = "pqrst", len = 15 // key4 = "zt9102_r13", value4 = "www", len = 13 // total_bytes = (13 + 13 + 15 + 13) + 4 * 8 = 86 - // don't no why each key has extra 8 byte length in raw_key_size(), but it seems tolerable. - // https://docs.rs/rocks/0.1.0/rocks/table_properties/struct.TableProperties.html#method.raw_key_size + // don't no why each key has extra 8 byte length in raw_key_size(), but it seems + // tolerable. https://docs.rs/rocks/0.1.0/rocks/table_properties/struct.TableProperties.html#method.raw_key_size assert_eq!(meta_info.total_bytes, 86); assert_eq!(meta_info.total_kvs, 4); diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 042b430b811..6ba4d892717 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -9,8 +9,8 @@ use file_system::File; use super::Result; /// Prepares the SST file for ingestion. -/// The purpose is to make the ingestion retryable when using the `move_files` option. -/// Things we need to consider here: +/// The purpose is to make the ingestion retryable when using the `move_files` +/// option. Things we need to consider here: /// 1. We need to access the original file on retry, so we should make a clone /// before ingestion. /// 2. `RocksDB` will modified the global seqno of the ingested file, so we need @@ -32,8 +32,9 @@ pub fn prepare_sst_for_ingestion, Q: AsRef>( if Path::new(clone).exists() { file_system::remove_file(clone).map_err(|e| format!("remove {}: {:?}", clone, e))?; } - // always try to remove the file from key manager because the clean up in rocksdb is not atomic, - // thus the file may be deleted but key in key manager is not. + // always try to remove the file from key manager because the clean up in + // rocksdb is not atomic, thus the file may be deleted but key in key + // manager is not. if let Some(key_manager) = encryption_key_manager { key_manager.delete_file(clone)?; } @@ -160,8 +161,8 @@ mod tests { .unwrap(); check_db_with_kvs(&db, CF_DEFAULT, &kvs); assert!(!sst_clone.exists()); - // Since we are not using key_manager in db, simulate the db deleting the file from - // key_manager. + // Since we are not using key_manager in db, simulate the db deleting the file + // from key_manager. if let Some(manager) = key_manager { manager.delete_file(sst_clone.to_str().unwrap()).unwrap(); } @@ -180,8 +181,8 @@ mod tests { #[test] fn test_prepare_sst_for_ingestion() { check_prepare_sst_for_ingestion( - None, None, None, /*key_manager*/ - false, /* was encrypted*/ + None, None, None, // key_manager + false, // was encrypted ); } @@ -197,8 +198,8 @@ mod tests { check_prepare_sst_for_ingestion( Some(db_opts), Some(vec![(CF_DEFAULT, cf_opts)]), - None, /*key_manager*/ - false, /*was_encrypted*/ + None, // key_manager + false, // was_encrypted ); } @@ -207,7 +208,7 @@ mod tests { let tmp_dir = tempfile::TempDir::new().unwrap(); let key_manager = new_test_key_manager(&tmp_dir, None, None, None); let manager = Arc::new(key_manager.unwrap().unwrap()); - check_prepare_sst_for_ingestion(None, None, Some(&manager), false /*was_encrypted*/); + check_prepare_sst_for_ingestion(None, None, Some(&manager), false /* was_encrypted */); } #[test] @@ -215,6 +216,6 @@ mod tests { let tmp_dir = tempfile::TempDir::new().unwrap(); let key_manager = new_test_key_manager(&tmp_dir, None, None, None); let manager = Arc::new(key_manager.unwrap().unwrap()); - check_prepare_sst_for_ingestion(None, None, Some(&manager), true /*was_encrypted*/); + check_prepare_sst_for_ingestion(None, None, Some(&manager), true /* was_encrypted */); } } diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index dfdffd97105..e6622128243 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -52,7 +52,7 @@ pub struct TestSuite { // Retry if encounter error macro_rules! retry_req { - ($call_req: expr, $check_resp: expr, $resp:ident, $retry:literal, $timeout:literal) => { + ($call_req:expr, $check_resp:expr, $resp:ident, $retry:literal, $timeout:literal) => { let start = Instant::now(); let timeout = Duration::from_millis($timeout); let mut tried_times = 0; diff --git a/components/test_coprocessor/src/dag.rs b/components/test_coprocessor/src/dag.rs index 38476f694f5..4165d19bdb4 100644 --- a/components/test_coprocessor/src/dag.rs +++ b/components/test_coprocessor/src/dag.rs @@ -112,7 +112,8 @@ impl DAGSelect { col_expr.mut_val().encode_i64(col_offset).unwrap(); let mut expr = Expr::default(); let mut expr_ft = col.as_field_type(); - // Avg will contains two auxiliary columns (sum, count) and the sum should be a `Decimal` + // Avg will contains two auxiliary columns (sum, count) and the sum should be a + // `Decimal` if aggr_t == ExprType::Avg || aggr_t == ExprType::Sum { expr_ft.set_tp(0xf6); // FieldTypeTp::NewDecimal } diff --git a/components/test_coprocessor/src/fixture.rs b/components/test_coprocessor/src/fixture.rs index c7feacedbfe..55a7f72a07f 100644 --- a/components/test_coprocessor/src/fixture.rs +++ b/components/test_coprocessor/src/fixture.rs @@ -122,7 +122,8 @@ pub fn init_data_with_commit( init_data_with_engine_and_commit(Context::default(), engine, tbl, vals, commit) } -// This function will create a Product table and initialize with the specified data. +// This function will create a Product table and initialize with the specified +// data. pub fn init_with_data( tbl: &ProductTable, vals: &[(i64, Option<&str>, i64)], diff --git a/components/test_coprocessor/src/table.rs b/components/test_coprocessor/src/table.rs index 91910d4c2bf..af070f62759 100644 --- a/components/test_coprocessor/src/table.rs +++ b/components/test_coprocessor/src/table.rs @@ -88,7 +88,8 @@ impl Table { range } - /// Create a `KeyRange` which select records in the range. The end_handle_id is included. + /// Create a `KeyRange` which select records in the range. The end_handle_id + /// is included. pub fn get_record_range(&self, start_handle_id: i64, end_handle_id: i64) -> KeyRange { let mut range = KeyRange::default(); range.set_start(table::encode_row_key(self.id, start_handle_id)); @@ -103,7 +104,8 @@ impl Table { self.get_record_range(handle_id, handle_id) } - /// Create a `KeyRange` which select all index records of a specified index in current table. + /// Create a `KeyRange` which select all index records of a specified index + /// in current table. pub fn get_index_range_all(&self, idx: i64) -> KeyRange { let mut range = KeyRange::default(); let mut buf = Vec::with_capacity(8); diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 0359952d237..301647bf267 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -180,7 +180,8 @@ impl Cluster { pd_client: Arc, api_version: ApiVersion, ) -> Cluster { - // TODO: In the future, maybe it's better to test both case where `use_delete_range` is true and false + // TODO: In the future, maybe it's better to test both case where + // `use_delete_range` is true and false Cluster { cfg: Config { tikv: new_tikv_config_with_api_ver(id, api_version), @@ -221,11 +222,12 @@ impl Cluster { Ok(()) } - /// Engines in a just created cluster are not bootstraped, which means they are not associated - /// with a `node_id`. Call `Cluster::start` can bootstrap all nodes in the cluster. + /// Engines in a just created cluster are not bootstrapped, which means they + /// are not associated with a `node_id`. Call `Cluster::start` can bootstrap + /// all nodes in the cluster. /// - /// However sometimes a node can be bootstrapped externally. This function can be called to - /// mark them as bootstrapped in `Cluster`. + /// However sometimes a node can be bootstrapped externally. This function + /// can be called to mark them as bootstrapped in `Cluster`. pub fn set_bootstrapped(&mut self, node_id: u64, offset: usize) { let engines = self.dbs[offset].clone(); let key_mgr = self.key_managers[offset].clone(); @@ -248,7 +250,7 @@ impl Cluster { self.cfg .storage .io_rate_limit - .build(true /*enable_statistics*/), + .build(true /* enable_statistics */), )); for _ in 0..self.count { self.create_engine(None); @@ -304,7 +306,7 @@ impl Cluster { pub fn flush_data(&self) { for engine in self.engines.values() { let db = &engine.kv; - db.flush_cf(CF_DEFAULT, true /*sync*/).unwrap(); + db.flush_cf(CF_DEFAULT, true /* sync */).unwrap(); } } @@ -605,9 +607,9 @@ impl Cluster { assert_eq!(self.pd_client.get_regions_number() as u32, len) } - // For test when a node is already bootstraped the cluster with the first region - // But another node may request bootstrap at same time and get is_bootstrap false - // Add Region but not set bootstrap to true + // For test when a node is already bootstrapped the cluster with the first + // region But another node may request bootstrap at same time and get + // is_bootstrap false Add Region but not set bootstrap to true pub fn add_first_region(&self) -> Result<()> { let mut region = metapb::Region::default(); let region_id = self.pd_client.alloc_id().unwrap(); @@ -1347,8 +1349,8 @@ impl Cluster { } } - // It's similar to `ask_split`, the difference is the msg, it sends, is `Msg::SplitRegion`, - // and `region` will not be embedded to that msg. + // It's similar to `ask_split`, the difference is the msg, it sends, is + // `Msg::SplitRegion`, and `region` will not be embedded to that msg. // Caller must ensure that the `split_key` is in the `region`. pub fn split_region( &mut self, diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 27cbd367ba7..ac3e3a6cc6e 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -187,8 +187,8 @@ impl NodeCluster { .unwrap() } - // Set a function that will be invoked after creating each CoprocessorHost. The first argument - // of `op` is the node_id. + // Set a function that will be invoked after creating each CoprocessorHost. The + // first argument of `op` is the node_id. // Set this before invoking `run_node`. #[allow(clippy::type_complexity)] pub fn post_create_coprocessor_host( diff --git a/components/test_raftstore/src/pd.rs b/components/test_raftstore/src/pd.rs index 66823a29708..45a69896296 100644 --- a/components/test_raftstore/src/pd.rs +++ b/components/test_raftstore/src/pd.rs @@ -410,9 +410,9 @@ impl PdCluster { fn put_store(&mut self, store: metapb::Store) -> Result<()> { let store_id = store.get_id(); - // There is a race between put_store and handle_region_heartbeat_response. If store id is - // 0, it means it's a placeholder created by latter, we just need to update the meta. - // Otherwise we should overwrite it. + // There is a race between put_store and handle_region_heartbeat_response. If + // store id is 0, it means it's a placeholder created by latter, we just need to + // update the meta. Otherwise we should overwrite it. if self .stores .get(&store_id) @@ -538,8 +538,8 @@ impl PdCluster { && incoming_epoch.get_conf_ver() == 0; let overlaps = self.get_overlap(start_key, end_key); if created_by_unsafe_recovery { - // Allow recreated region by unsafe recover to overwrite other regions with a "older" - // epoch. + // Allow recreated region by unsafe recover to overwrite other regions with a + // "older" epoch. return Ok(overlaps); } for r in overlaps.iter() { @@ -1318,7 +1318,8 @@ impl TestPdClient { self.cluster.wl().check_merge_target_integrity = false; } - /// The next generated TSO will be `ts + 1`. See `get_tso()` and `batch_get_tso()`. + /// The next generated TSO will be `ts + 1`. See `get_tso()` and + /// `batch_get_tso()`. pub fn set_tso(&self, ts: TimeStamp) { let old = self.tso.swap(ts.into_inner(), Ordering::SeqCst); if old > ts.into_inner() { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 5d85fff86bc..e22b730151a 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -167,7 +167,8 @@ impl ServerCluster { ); let security_mgr = Arc::new(SecurityManager::new(&Default::default()).unwrap()); let map = AddressMap::default(); - // We don't actually need to handle snapshot message, just create a dead worker to make it compile. + // We don't actually need to handle snapshot message, just create a dead worker + // to make it compile. let worker = LazyWorker::new("snap-worker"); let conn_builder = ConnectionBuilder::new( env.clone(), diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 9ebba64aa48..e8fba33f65f 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -507,10 +507,11 @@ impl Filter for SnapshotFilter { } } -/// `CollectSnapshotFilter` is a simulation transport filter to simulate the simultaneous delivery -/// of multiple snapshots from different peers. It collects the snapshots from different -/// peers and drop the subsequent snapshots from the same peers. Currently, if there are -/// more than 1 snapshots in this filter, all the snapshots will be dilivered at once. +/// `CollectSnapshotFilter` is a simulation transport filter to simulate the +/// simultaneous delivery of multiple snapshots from different peers. It +/// collects the snapshots from different peers and drop the subsequent +/// snapshots from the same peers. Currently, if there are more than 1 snapshots +/// in this filter, all the snapshots will be delivered at once. pub struct CollectSnapshotFilter { dropped: AtomicBool, stale: AtomicBool, @@ -753,10 +754,11 @@ impl Filter for LeadingDuplicatedSnapshotFilter { } } -/// `RandomLatencyFilter` is a transport filter to simulate randomized network latency. -/// Based on a randomized rate, `RandomLatencyFilter` will decide whether to delay -/// the sending of any message. It's could be used to simulate the message sending -/// in a network with random latency, where messages could be delayed, disordered or lost. +/// `RandomLatencyFilter` is a transport filter to simulate randomized network +/// latency. Based on a randomized rate, `RandomLatencyFilter` will decide +/// whether to delay the sending of any message. It's could be used to simulate +/// the message sending in a network with random latency, where messages could +/// be delayed, disordered or lost. pub struct RandomLatencyFilter { delay_rate: u32, delayed_msgs: Mutex>, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index bdd7c08b7e8..c399b4813f2 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -724,8 +724,9 @@ pub fn configure_for_lease_read( // Adjust max leader lease. cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(election_timeout - base_tick_interval); - // Use large peer check interval, abnormal and max leader missing duration to make a valid config, - // that is election timeout x 2 < peer stale state check < abnormal < max leader missing duration. + // Use large peer check interval, abnormal and max leader missing duration to + // make a valid config, that is election timeout x 2 < peer stale state + // check < abnormal < max leader missing duration. cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration(election_timeout * 3); cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration(election_timeout * 4); @@ -1169,7 +1170,8 @@ pub fn check_compacted( compact_count: u64, must_compacted: bool, ) -> bool { - // Every peer must have compacted logs, so the truncate log state index/term must > than before. + // Every peer must have compacted logs, so the truncate log state index/term + // must > than before. let mut compacted_idx = HashMap::default(); for (&id, engines) in all_engines { diff --git a/components/test_util/src/lib.rs b/components/test_util/src/lib.rs index dc053bd6d20..d2096e74c82 100644 --- a/components/test_util/src/lib.rs +++ b/components/test_util/src/lib.rs @@ -32,9 +32,9 @@ pub use crate::{ }; pub fn setup_for_ci() { - // We use backtrace in tests to record suspicious problems. And loading backtrace - // the first time can take several seconds. Spawning a thread and load it ahead - // of time to avoid causing timeout. + // We use backtrace in tests to record suspicious problems. And loading + // backtrace the first time can take several seconds. Spawning a thread and + // load it ahead of time to avoid causing timeout. thread::Builder::new() .name(tikv_util::thd_name!("backtrace-loader")) .spawn_wrapper(::backtrace::Backtrace::new) diff --git a/components/test_util/src/runner.rs b/components/test_util/src/runner.rs index e7ef1ba0cb5..d05f7e98879 100644 --- a/components/test_util/src/runner.rs +++ b/components/test_util/src/runner.rs @@ -99,9 +99,9 @@ impl TestHook for FailpointHook { } } -/// During panic, due to drop order, failpoints will not be cleared before tests exit. -/// If tests wait for a sleep failpoint, the whole tests will hang. So we need a method -/// to clear failpoints explicitly besides teardown. +/// During panic, due to drop order, failpoints will not be cleared before tests +/// exit. If tests wait for a sleep failpoint, the whole tests will hang. So we +/// need a method to clear failpoints explicitly besides teardown. pub fn clear_failpoints() { FS.with(|s| s.borrow_mut().take()); } diff --git a/components/tidb_query_aggr/src/impl_avg.rs b/components/tidb_query_aggr/src/impl_avg.rs index ec4784b24e4..6337c8de6c5 100644 --- a/components/tidb_query_aggr/src/impl_avg.rs +++ b/components/tidb_query_aggr/src/impl_avg.rs @@ -73,7 +73,8 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserAvg { /// The AVG aggregate function. /// -/// Note that there are `AVG(Decimal) -> (Int, Decimal)` and `AVG(Double) -> (Int, Double)`. +/// Note that there are `AVG(Decimal) -> (Int, Decimal)` and `AVG(Double) -> +/// (Int, Double)`. #[derive(Debug, AggrFunction)] #[aggr_function(state = AggrFnStateAvg::::new())] pub struct AggrFnAvg diff --git a/components/tidb_query_aggr/src/impl_count.rs b/components/tidb_query_aggr/src/impl_count.rs index 0e17f1adfb6..3d49d8b25af 100644 --- a/components/tidb_query_aggr/src/impl_count.rs +++ b/components/tidb_query_aggr/src/impl_count.rs @@ -111,9 +111,10 @@ impl AggrFnStateCount { } } -// Here we manually implement `AggrFunctionStateUpdatePartial` so that `update_repeat` and -// `update_vector` can be faster. Also note that we support all kind of -// `AggrFunctionStateUpdatePartial` for the COUNT aggregate function. +// Here we manually implement `AggrFunctionStateUpdatePartial` so that +// `update_repeat` and `update_vector` can be faster. Also note that we support +// all kind of `AggrFunctionStateUpdatePartial` for the COUNT aggregate +// function. impl super::AggrFunctionStateUpdatePartial for AggrFnStateCount where diff --git a/components/tidb_query_aggr/src/impl_first.rs b/components/tidb_query_aggr/src/impl_first.rs index f01546cc5ef..b7ccd077598 100644 --- a/components/tidb_query_aggr/src/impl_first.rs +++ b/components/tidb_query_aggr/src/impl_first.rs @@ -155,19 +155,22 @@ where } } -// Here we manually implement `AggrFunctionStateUpdatePartial` instead of implementing -// `ConcreteAggrFunctionState` so that `update_repeat` and `update_vector` can be faster. +// Here we manually implement `AggrFunctionStateUpdatePartial` instead of +// implementing `ConcreteAggrFunctionState` so that `update_repeat` and +// `update_vector` can be faster. impl super::AggrFunctionStateUpdatePartial for AggrFnStateFirst where T: EvaluableRef<'static> + 'static, VectorValue: VectorValueExt, { - // ChunkedType has been implemented in AggrFunctionStateUpdatePartial for AggrFnStateFirst + // ChunkedType has been implemented in AggrFunctionStateUpdatePartial for + // AggrFnStateFirst impl_state_update_partial! { T } } -// In order to make `AggrFnStateFirst` satisfy the `AggrFunctionState` trait, we default impl all -// `AggrFunctionStateUpdatePartial` of `Evaluable` for all `AggrFnStateFirst`. +// In order to make `AggrFnStateFirst` satisfy the `AggrFunctionState` trait, we +// default impl all `AggrFunctionStateUpdatePartial` of `Evaluable` for all +// `AggrFnStateFirst`. impl_unmatched_function_state! { AggrFnStateFirst } impl super::AggrFunctionState for AggrFnStateFirst diff --git a/components/tidb_query_aggr/src/impl_max_min.rs b/components/tidb_query_aggr/src/impl_max_min.rs index 49eb4d911b8..31ff6acc8aa 100644 --- a/components/tidb_query_aggr/src/impl_max_min.rs +++ b/components/tidb_query_aggr/src/impl_max_min.rs @@ -242,9 +242,9 @@ where /// # Notes /// - /// For MAX(), MySQL currently compares ENUM and SET columns by their string value rather - /// than by the string's relative position in the set. This differs from how ORDER BY - /// compares them. + /// For MAX(), MySQL currently compares ENUM and SET columns by their string + /// value rather than by the string's relative position in the set. This + /// differs from how ORDER BY compares them. /// /// ref: https://dev.mysql.com/doc/refman/5.7/en/aggregate-functions.html#function_max #[inline] @@ -331,9 +331,9 @@ where /// # Notes /// - /// For MAX(), MySQL currently compares ENUM and SET columns by their string value rather - /// than by the string's relative position in the set. This differs from how ORDER BY - /// compares them. + /// For MAX(), MySQL currently compares ENUM and SET columns by their string + /// value rather than by the string's relative position in the set. This + /// differs from how ORDER BY compares them. /// /// ref: https://dev.mysql.com/doc/refman/5.7/en/aggregate-functions.html#function_max #[inline] diff --git a/components/tidb_query_aggr/src/impl_sum.rs b/components/tidb_query_aggr/src/impl_sum.rs index 5b0e8334e86..85f31b8f459 100644 --- a/components/tidb_query_aggr/src/impl_sum.rs +++ b/components/tidb_query_aggr/src/impl_sum.rs @@ -52,7 +52,8 @@ impl super::parser::AggrDefinitionParser for AggrFnDefinitionParserSum { out_schema.push(out_ft); out_exp.push(exp); - // Choose a type-aware SUM implementation based on the eval type after rewriting exp. + // Choose a type-aware SUM implementation based on the eval type after rewriting + // exp. Ok(match rewritten_eval_type { EvalType::Decimal => Box::new(AggrFnSum::::new()), EvalType::Real => Box::new(AggrFnSum::::new()), @@ -190,8 +191,9 @@ where /// # Notes /// - /// Functions such as SUM() or AVG() that expect a numeric argument cast the argument to a - /// number if necessary. For ENUM values, the index number is used in the calculation. + /// Functions such as SUM() or AVG() that expect a numeric argument cast the + /// argument to a number if necessary. For ENUM values, the index number is + /// used in the calculation. /// /// ref: https://dev.mysql.com/doc/refman/8.0/en/enum.html #[inline] @@ -266,8 +268,9 @@ where /// # Notes /// - /// Functions such as SUM() or AVG() that expect a numeric argument cast the argument to a - /// number if necessary. For ENUM values, the index number is used in the calculation. + /// Functions such as SUM() or AVG() that expect a numeric argument cast the + /// argument to a number if necessary. For ENUM values, the index number is + /// used in the calculation. /// /// ref: https://dev.mysql.com/doc/refman/8.0/en/enum.html #[inline] diff --git a/components/tidb_query_aggr/src/impl_variance.rs b/components/tidb_query_aggr/src/impl_variance.rs index f5b7fcc3bc8..190446c3809 100644 --- a/components/tidb_query_aggr/src/impl_variance.rs +++ b/components/tidb_query_aggr/src/impl_variance.rs @@ -80,7 +80,8 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserVari let out_ft = root_expr.take_field_type(); let out_et = box_try!(EvalType::try_from(out_ft.as_accessor().tp())); - // Rewrite expression to insert CAST() if needed. The rewrite should always succeed. + // Rewrite expression to insert CAST() if needed. The rewrite should always + // succeed. super::util::rewrite_exp_for_sum_avg(src_schema, &mut exp).unwrap(); let rewritten_eval_type = @@ -103,7 +104,8 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserVari out_schema.push(out_ft); out_exp.push(exp); - // Choose a type-aware VARIANCE implementation based on the eval type after rewriting exp. + // Choose a type-aware VARIANCE implementation based on the eval type after + // rewriting exp. Ok(match rewritten_eval_type { EvalType::Decimal => Box::new(AggrFnVariance::::new()), EvalType::Real => Box::new(AggrFnVariance::::new()), @@ -117,7 +119,8 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserVari /// The VARIANCE aggregate function. /// -/// Note that there are `VARIANCE(Decimal) -> Decimal` and `VARIANCE(Double) -> Double`. +/// Note that there are `VARIANCE(Decimal) -> Decimal` and `VARIANCE(Double) -> +/// Double`. #[derive(Debug, AggrFunction)] #[aggr_function(state = AggrFnStateVariance::::new())] pub struct AggrFnVariance @@ -276,9 +279,9 @@ where /// # Notes /// - /// Functions such as SUM() or AVG() or VARIANCE() that expect a numeric argument cast the - /// argument to a number if necessary. For ENUM values, the index number is used in the - /// calculation. + /// Functions such as SUM() or AVG() or VARIANCE() that expect a numeric + /// argument cast the argument to a number if necessary. For ENUM values, + /// the index number is used in the calculation. /// /// ref: https://dev.mysql.com/doc/refman/8.0/en/enum.html #[inline] @@ -387,9 +390,9 @@ where /// # Notes /// - /// Functions such as SUM() or AVG() or VARIANCE() that expect a numeric argument cast the - /// argument to a number if necessary. For ENUM values, the index number is used in the - /// calculation. + /// Functions such as SUM() or AVG() or VARIANCE() that expect a numeric + /// argument cast the argument to a number if necessary. For ENUM values, + /// the index number is used in the calculation. /// /// ref: https://dev.mysql.com/doc/refman/8.0/en/enum.html #[inline] diff --git a/components/tidb_query_aggr/src/lib.rs b/components/tidb_query_aggr/src/lib.rs index 65b2da55d03..b9d73b2773a 100644 --- a/components/tidb_query_aggr/src/lib.rs +++ b/components/tidb_query_aggr/src/lib.rs @@ -30,16 +30,18 @@ pub use self::parser::{AggrDefinitionParser, AllAggrDefinitionParser}; /// A trait for all single parameter aggregate functions. /// -/// Unlike ordinary function, aggregate function calculates a summary value over multiple rows. To -/// save memory, this functionality is provided via an incremental update model: +/// Unlike ordinary function, aggregate function calculates a summary value over +/// multiple rows. To save memory, this functionality is provided via an +/// incremental update model: /// -/// 1. Each aggregate function associates a state structure, storing partially computed aggregate -/// results. +/// - Each aggregate function associates a state structure, storing partially +/// computed aggregate results. /// -/// 2. The caller calls `update()` or `update_vector()` for each row to update the state. +/// - The caller calls `update()` or `update_vector()` for each row to update +/// the state. /// -/// 3. The caller finally calls `push_result()` to aggregate a summary value and push it into the -/// given data container. +/// - The caller finally calls `push_result()` to aggregate a summary value and +/// push it into the given data container. /// /// This trait can be auto derived by using `tidb_query_codegen::AggrFunction`. pub trait AggrFunction: std::fmt::Debug + Send + 'static { @@ -52,13 +54,15 @@ pub trait AggrFunction: std::fmt::Debug + Send + 'static { /// A trait for all single parameter aggregate function states. /// -/// Aggregate function states are created by corresponding aggregate functions. For each state, -/// it can be updated or aggregated (to finalize a result) independently. +/// Aggregate function states are created by corresponding aggregate functions. +/// For each state, it can be updated or aggregated (to finalize a result) +/// independently. /// -/// Note that aggregate function states are strongly typed, that is, the caller must provide the -/// parameter in the correct data type for an aggregate function states that calculates over this -/// data type. To be safely boxed and placed in a vector, interfaces are provided in a form that -/// accept all kinds of data type. However, unmatched types will result in panics in runtime. +/// Note that aggregate function states are strongly typed, that is, the caller +/// must provide the parameter in the correct data type for an aggregate +/// function states that calculates over this data type. To be safely boxed and +/// placed in a vector, interfaces are provided in a form that accept all kinds +/// of data type. However, unmatched types will result in panics in runtime. pub trait AggrFunctionState: std::fmt::Debug + Send @@ -73,17 +77,19 @@ pub trait AggrFunctionState: + AggrFunctionStateUpdatePartial> + AggrFunctionStateUpdatePartial> { - // TODO: A better implementation is to specialize different push result targets. However - // current aggregation executor cannot utilize it. + // TODO: A better implementation is to specialize different push result targets. + // However current aggregation executor cannot utilize it. fn push_result(&self, ctx: &mut EvalContext, target: &mut [VectorValue]) -> Result<()>; } -/// A helper trait for single parameter aggregate function states that only work over concrete eval -/// types. This is the actual and only trait that normal aggregate function states will implement. +/// A helper trait for single parameter aggregate function states that only work +/// over concrete eval types. This is the actual and only trait that normal +/// aggregate function states will implement. /// -/// Unlike `AggrFunctionState`, this trait only provides specialized `update()` and `push_result()` -/// functions according to the associated type. `update()` and `push_result()` functions that accept -/// any eval types (but will panic when eval type does not match expectation) will be generated via +/// Unlike `AggrFunctionState`, this trait only provides specialized `update()` +/// and `push_result()` functions according to the associated type. `update()` +/// and `push_result()` functions that accept any eval types (but will panic +/// when eval type does not match expectation) will be generated via /// implementations over this trait. pub trait ConcreteAggrFunctionState: std::fmt::Debug + Send + 'static { type ParameterType: EvaluableRef<'static>; @@ -102,14 +108,14 @@ pub trait ConcreteAggrFunctionState: std::fmt::Debug + Send + 'static { #[macro_export] macro_rules! update_concrete { - ( $state:expr, $ctx:expr, $value:expr ) => { + ($state:expr, $ctx:expr, $value:expr) => { unsafe { $state.update_concrete_unsafe($ctx, $value.unsafe_into()) } }; } #[macro_export] macro_rules! update_vector { - ( $state:expr, $ctx:expr, $physical_values:expr, $logical_rows:expr ) => { + ($state:expr, $ctx:expr, $physical_values:expr, $logical_rows:expr) => { unsafe { $state.update_vector_unsafe( $ctx, @@ -123,21 +129,21 @@ macro_rules! update_vector { #[macro_export] macro_rules! update_repeat { - ( $state:expr, $ctx:expr, $value:expr, $repeat_times:expr ) => { + ($state:expr, $ctx:expr, $value:expr, $repeat_times:expr) => { unsafe { $state.update_repeat_unsafe($ctx, $value.unsafe_into(), $repeat_times) } }; } #[macro_export] macro_rules! update { - ( $state:expr, $ctx:expr, $value:expr ) => { + ($state:expr, $ctx:expr, $value:expr) => { unsafe { $state.update_unsafe($ctx, $value.unsafe_into()) } }; } #[macro_export] macro_rules! impl_state_update_partial { - ( $ty:tt ) => { + ($ty:tt) => { #[inline] unsafe fn update_unsafe( &mut self, @@ -172,7 +178,7 @@ macro_rules! impl_state_update_partial { #[macro_export] macro_rules! impl_concrete_state { - ( $ty:ty ) => { + ($ty:ty) => { #[inline] unsafe fn update_concrete_unsafe( &mut self, @@ -186,7 +192,7 @@ macro_rules! impl_concrete_state { #[macro_export] macro_rules! impl_unmatched_function_state { - ( $ty:ty ) => { + ($ty:ty) => { impl super::AggrFunctionStateUpdatePartial for $ty where T1: EvaluableRef<'static> + 'static, @@ -226,15 +232,15 @@ macro_rules! impl_unmatched_function_state { }; } -/// A helper trait that provides `update()` and `update_vector()` over a concrete type, which will -/// be relied in `AggrFunctionState`. +/// A helper trait that provides `update()` and `update_vector()` over a +/// concrete type, which will be relied in `AggrFunctionState`. pub trait AggrFunctionStateUpdatePartial> { /// Updates the internal state giving one row data. /// /// # Panics /// - /// Panics if the aggregate function does not support the supplied concrete data type as its - /// parameter. + /// Panics if the aggregate function does not support the supplied concrete + /// data type as its parameter. /// /// # Safety /// @@ -245,8 +251,8 @@ pub trait AggrFunctionStateUpdatePartial> { /// /// # Panics /// - /// Panics if the aggregate function does not support the supplied concrete data type as its - /// parameter. + /// Panics if the aggregate function does not support the supplied concrete + /// data type as its parameter. /// /// # Safety /// @@ -262,8 +268,8 @@ pub trait AggrFunctionStateUpdatePartial> { /// /// # Panics /// - /// Panics if the aggregate function does not support the supplied concrete data type as its - /// parameter. + /// Panics if the aggregate function does not support the supplied concrete + /// data type as its parameter. /// /// # Safety /// @@ -281,8 +287,9 @@ impl, State> AggrFunctionStateUpdatePartial for Stat where State: ConcreteAggrFunctionState, { - // All `ConcreteAggrFunctionState` implement `AggrFunctionStateUpdatePartial`, which is - // one of the trait bound that `AggrFunctionState` requires. + // All `ConcreteAggrFunctionState` implement + // `AggrFunctionStateUpdatePartial`, which is one of the trait bound that + // `AggrFunctionState` requires. #[inline] default unsafe fn update_unsafe( diff --git a/components/tidb_query_aggr/src/parser.rs b/components/tidb_query_aggr/src/parser.rs index 5cbc19961d8..600326edb2f 100644 --- a/components/tidb_query_aggr/src/parser.rs +++ b/components/tidb_query_aggr/src/parser.rs @@ -9,26 +9,29 @@ use crate::{impl_bit_op::*, impl_max_min::*, impl_variance::*, AggrFunction}; /// Parse a specific aggregate function definition from protobuf. /// -/// All aggregate function implementations should include an impl for this trait as well as -/// add a match arm in `map_pb_sig_to_aggr_func_parser` so that the aggregate function can be -/// actually utilized. +/// All aggregate function implementations should include an impl for this trait +/// as well as add a match arm in `map_pb_sig_to_aggr_func_parser` so that the +/// aggregate function can be actually utilized. pub trait AggrDefinitionParser { - /// Checks whether the inner expression of the aggregate function definition is supported. - /// It is ensured that `aggr_def.tp` maps the current parser instance. + /// Checks whether the inner expression of the aggregate function definition + /// is supported. It is ensured that `aggr_def.tp` maps the current + /// parser instance. fn check_supported(&self, aggr_def: &Expr) -> Result<()>; /// Parses and transforms the aggregate function definition. /// - /// The schema of this aggregate function will be appended in `out_schema` and the final - /// RPN expression (maybe wrapped by some casting according to types) will be appended in - /// `out_exp`. + /// The schema of this aggregate function will be appended in `out_schema` + /// and the final RPN expression (maybe wrapped by some casting + /// according to types) will be appended in `out_exp`. /// - /// The parser may choose particular aggregate function implementation based on the data - /// type, so `schema` is also needed in case of data type depending on the column. + /// The parser may choose particular aggregate function implementation based + /// on the data type, so `schema` is also needed in case of data type + /// depending on the column. /// /// # Panic /// - /// May panic if the aggregate function definition is not supported by this parser. + /// May panic if the aggregate function definition is not supported by this + /// parser. fn parse( &self, mut aggr_def: Expr, @@ -100,8 +103,8 @@ impl AggrDefinitionParser for AllAggrDefinitionParser { }) } - /// Parses and transforms the aggregate function definition to generate corresponding - /// `AggrFunction` instance. + /// Parses and transforms the aggregate function definition to generate + /// corresponding `AggrFunction` instance. /// /// # Panic /// diff --git a/components/tidb_query_aggr/src/util.rs b/components/tidb_query_aggr/src/util.rs index 0e9ae390cf1..c4ba7a05766 100644 --- a/components/tidb_query_aggr/src/util.rs +++ b/components/tidb_query_aggr/src/util.rs @@ -7,7 +7,8 @@ use tidb_query_datatype::{builder::FieldTypeBuilder, EvalType, FieldTypeAccessor use tidb_query_expr::{impl_cast::get_cast_fn_rpn_node, RpnExpression, RpnExpressionBuilder}; use tipb::{Expr, FieldType}; -/// Checks whether or not there is only one child and the child expression is supported. +/// Checks whether or not there is only one child and the child expression is +/// supported. pub fn check_aggr_exp_supported_one_child(aggr_def: &Expr) -> Result<()> { if aggr_def.get_children().len() != 1 { return Err(other_err!( @@ -23,7 +24,8 @@ pub fn check_aggr_exp_supported_one_child(aggr_def: &Expr) -> Result<()> { Ok(()) } -/// Rewrites the expression to insert necessary cast functions for SUM and AVG aggregate functions. +/// Rewrites the expression to insert necessary cast functions for SUM and AVG +/// aggregate functions. /// /// See `typeInfer4Sum` and `typeInfer4Avg` in TiDB. /// @@ -63,7 +65,8 @@ pub fn rewrite_exp_for_sum_avg(schema: &[FieldType], exp: &mut RpnExpression) -> Ok(()) } -/// Rewrites the expression to insert necessary cast functions for Bit operation family functions. +/// Rewrites the expression to insert necessary cast functions for Bit operation +/// family functions. pub fn rewrite_exp_for_bit_op(schema: &[FieldType], exp: &mut RpnExpression) -> Result<()> { let ret_field_type = exp.ret_field_type(schema); let ret_eval_type = box_try!(EvalType::try_from(ret_field_type.as_accessor().tp())); diff --git a/components/tidb_query_codegen/src/lib.rs b/components/tidb_query_codegen/src/lib.rs index baa9d8522ab..feee1c6afb3 100644 --- a/components/tidb_query_codegen/src/lib.rs +++ b/components/tidb_query_codegen/src/lib.rs @@ -8,8 +8,8 @@ //! //! This crate exports a custom derive for [`AggrFunction`](https://github.com/tikv/tikv/blob/master/components/tidb_query_aggr/src/mod.rs) //! and an attribute macro called `rpn_fn` for use on functions which provide -//! coprocessor functionality. `rpn_fn` is documented in the [rpn_function](rpn_function.rs) -//! module. +//! coprocessor functionality. `rpn_fn` is documented in the +//! [rpn_function](rpn_function.rs) module. #![feature(proc_macro_diagnostic)] #![feature(iter_order_by)] diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index 8025fc01588..864fce9afd8 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -16,13 +16,13 @@ //! ## Arguments to macro //! //! If neither `varg` or `raw_varg` are supplied, then the generated arguments -//! follow from the supplied function's arguments. Each argument must have a type -//! `Option<&T>` for some `T`. +//! follow from the supplied function's arguments. Each argument must have a +//! type `Option<&T>` for some `T`. //! //! ### `varg` //! -//! The RPN operator takes a variable number of arguments. The arguments are passed -//! as a `&[Option<&T>]`. E.g., +//! The RPN operator takes a variable number of arguments. The arguments are +//! passed as a `&[Option<&T>]`. E.g., //! //! ```ignore //! #[rpn_fn(varg)] @@ -33,8 +33,8 @@ //! //! ### `raw_varg` //! -//! The RPN operator takes a variable number of arguments. The arguments are passed -//! as a `&[ScalarValueRef]`. E.g., +//! The RPN operator takes a variable number of arguments. The arguments are +//! passed as a `&[ScalarValueRef]`. E.g., //! //! ```ignore //! #[rpn_fn(raw_varg)] @@ -43,8 +43,8 @@ //! } //! ``` //! -//! Use `raw_varg` where the function takes a variable number of arguments and the types -//! are not the same, for example, RPN function `case_when`. +//! Use `raw_varg` where the function takes a variable number of arguments and +//! the types are not the same, for example, RPN function `case_when`. //! //! ### `max_args` //! @@ -61,34 +61,40 @@ //! ### `extra_validator` //! //! A function name for custom validation code to be run when an operation is -//! validated. The validator function should have the signature `&tipb::Expr -> Result<()>`. -//! E.g., `#[rpn_fn(raw_varg, extra_validator = json_object_validator)]` +//! validated. The validator function should have the signature `&tipb::Expr -> +//! Result<()>`. E.g., `#[rpn_fn(raw_varg, extra_validator = +//! json_object_validator)]` //! //! ### `metadata_type` //! //! The type of the metadata structure defined in tipb. -//! If `metadata_mapper` is not specified, the protobuf metadata structure will be used as the metadata directly. +//! If `metadata_mapper` is not specified, the protobuf metadata structure will +//! be used as the metadata directly. //! //! ### `metadata_mapper` //! -//! A function name to construct a new metadata or transform a protobuf metadata structure into a desired form. -//! The function signatures varies according to the existence of `metadata_mapper` and `metadata_type` as follows. +//! A function name to construct a new metadata or transform a protobuf metadata +//! structure into a desired form. The function signatures varies according to +//! the existence of `metadata_mapper` and `metadata_type` as follows. //! -//! - `metadata_mapper ` exists, `metadata_type` missing: `fn(&mut tipb::Expr) -> T` +//! - `metadata_mapper ` exists, `metadata_type` missing: `fn(&mut tipb::Expr) +//! -> T` //! //! Constructs a new metadata in type `T`. //! -//! - `metadata_mapper ` exists, `metadata_type` exists: `fn(MetaDataType, &mut tipb::Expr) -> T` +//! - `metadata_mapper ` exists, `metadata_type` exists: `fn(MetaDataType, &mut +//! tipb::Expr) -> T` //! -//! Transforms a protobuf metadata type `MetaDataType` specified by `metadata_type` into a new type `T`. +//! Transforms a protobuf metadata type `MetaDataType` specified by +//! `metadata_type` into a new type `T`. //! //! ### `capture` //! //! An array of argument names which are passed from the caller to the expanded -//! function. The argument names must be in scope in the generated `eval` or `run` -//! methods. Currently, that includes the following arguments (the supplied -//! function must accept these arguments with the corresponding types, in -//! addition to any other arguments): +//! function. The argument names must be in scope in the generated `eval` or +//! `run` methods. Currently, that includes the following arguments (the +//! supplied function must accept these arguments with the corresponding types, +//! in addition to any other arguments): //! //! * `ctx: &mut expr::EvalContext` //! * `output_rows: usize` @@ -111,35 +117,42 @@ //! This includes `varg` and `raw_varg`. //! //! The supplied function is preserved and a constructor function is generated -//! with a `_fn_meta` suffix, e.g., `#[rpn_fn] fn foo ...` will preserve `foo` and -//! generate `foo_fn_meta`. The constructor function returns an `rpn_expr::RpnFnMeta` -//! value. +//! with a `_fn_meta` suffix, e.g., `#[rpn_fn] fn foo ...` will preserve `foo` +//! and generate `foo_fn_meta`. The constructor function returns an +//! `rpn_expr::RpnFnMeta` value. //! -//! The constructor function will include code for validating the runtime arguments -//! and running the function, pointers to these functions are stored in the result. +//! The constructor function will include code for validating the runtime +//! arguments and running the function, pointers to these functions are stored +//! in the result. //! //! ### Non-vararg functions //! -//! Generate the following (examples assume a supplied function called `foo_bar`: +//! Generate the following (examples assume a supplied function called +//! `foo_bar`: //! -//! * A trait to represent the function (`FooBar_Fn`) with a single function `eval`. +//! * A trait to represent the function (`FooBar_Fn`) with a single function +//! `eval`. //! - An impl of that trait for all argument types which panics -//! - An impl of that trait for the supported argument type which calls the supplied function. -//! * An evaluator struct (`FooBar_Evaluator`) which implements `rpn_expr::function::Evaluator`, -//! which includes an `eval` method which dispatches to `FooBar_Fn::eval`. +//! - An impl of that trait for the supported argument type which calls the +//! supplied function. +//! * An evaluator struct (`FooBar_Evaluator`) which implements +//! `rpn_expr::function::Evaluator`, which includes an `eval` method which +//! dispatches to `FooBar_Fn::eval`. //! * A constructor function similar to the vararg case. //! //! The supplied function is preserved. //! -//! The supported argument type is represented as a type-level list, for example, a -//! a function which takes two unsigned ints has an argument representation -//! something like `Arg>`. See documentation in -//! `components/tidb_query_expr/src/types/function.rs` for more details. +//! The supported argument type is represented as a type-level list, for +//! example, a a function which takes two unsigned ints has an argument +//! representation something like `Arg>`. See +//! documentation in `components/tidb_query_expr/src/types/function.rs` for more +//! details. //! -//! The `_Fn` trait can be customised by implementing it manually. -//! For example, you are going to implement an RPN function called `regex_match` taking two -//! arguments, the regex and the string to match. You want to build the regex only once if the -//! first argument is a scalar. The code may look like: +//! The `_Fn` trait can be customized by implementing it manually. +//! For example, you are going to implement an RPN function called `regex_match` +//! taking two arguments, the regex and the string to match. You want to build +//! the regex only once if the first argument is a scalar. The code may look +//! like: //! //! ```ignore //! fn regex_match_impl(regex: &Regex, text: Option<&Bytes>) -> Result> { @@ -175,8 +188,9 @@ //! } //! ``` //! -//! If the RPN function accepts variable number of arguments and all arguments have the same eval -//! type, like RPN function `coalesce`, you can use `#[rpn_fn(varg)]` like: +//! If the RPN function accepts variable number of arguments and all arguments +//! have the same eval type, like RPN function `coalesce`, you can use +//! `#[rpn_fn(varg)]` like: //! //! ```ignore //! #[rpn_fn(varg)] @@ -220,10 +234,12 @@ mod kw { /// Parses an attribute like `#[rpn_fn(varg, capture = [ctx, output_rows])`. #[derive(Debug)] struct RpnFnAttr { - /// Whether or not the function is a varg function. Varg function accepts `&[&Option]`. + /// Whether or not the function is a varg function. Varg function accepts + /// `&[&Option]`. is_varg: bool, - /// Whether or not the function is a raw varg function. Raw varg function accepts `&[ScalarValueRef]`. + /// Whether or not the function is a raw varg function. Raw varg function + /// accepts `&[ScalarValueRef]`. is_raw_varg: bool, /// Whether or not the function needs extra logic on `None` value. @@ -234,8 +250,9 @@ struct RpnFnAttr { /// The maximum accepted arguments, which will be checked by the validator. /// - /// Only varg or raw_varg function accepts a range of number of arguments. Other kind of - /// function strictly stipulates number of arguments according to the function definition. + /// Only varg or raw_varg function accepts a range of number of arguments. + /// Other kind of function strictly stipulates number of arguments + /// according to the function definition. max_args: Option, /// The minimal accepted arguments, which will be checked by the validator. @@ -411,7 +428,8 @@ impl parse::Parse for RpnFnAttr { } } -/// Parses an evaluable type like `Option<&T>`, `Option`, `Option`, `Option` or `Option`. +/// Parses an evaluable type like `Option<&T>`, `Option`, +/// `Option`, `Option` or `Option`. struct RpnFnRefEvaluableTypeWithOption(RpnFnRefEvaluableType); impl parse::Parse for RpnFnRefEvaluableTypeWithOption { @@ -504,8 +522,8 @@ impl parse::Parse for RpnFnRefEvaluableType { } /// Parses a function signature parameter like `val: &Option` or `val: &T`. -/// If input has &Option, set has_option to true; otherwise, set has_option to false. -/// Caller can use has_option to check if input is valid. +/// If input has &Option, set has_option to true; otherwise, set has_option +/// to false. Caller can use has_option to check if input is valid. struct RpnFnSignatureParam { _pat: Pat, has_option: bool, @@ -531,9 +549,9 @@ impl parse::Parse for RpnFnSignatureParam { } } -/// Parses a function signature parameter like `val: &[&Option]` or `val: &[&T]`. -/// If input has &Option, set has_option to true; otherwise, set has_option to false. -/// Caller can use has_option to check if input is valid. +/// Parses a function signature parameter like `val: &[&Option]` or `val: +/// &[&T]`. If input has &Option, set has_option to true; otherwise, set +/// has_option to false. Caller can use has_option to check if input is valid. struct VargsRpnFnSignatureParam { _pat: Pat, has_option: bool, diff --git a/components/tidb_query_common/src/error.rs b/components/tidb_query_common/src/error.rs index 8697413f69c..046e2f02059 100644 --- a/components/tidb_query_common/src/error.rs +++ b/components/tidb_query_common/src/error.rs @@ -90,8 +90,9 @@ impl ErrorCodeExt for EvaluateError { #[error(transparent)] pub struct StorageError(#[from] pub anyhow::Error); -/// We want to restrict the type of errors to be either a `StorageError` or `EvaluateError`, thus -/// `failure::Error` is not used. Instead, we introduce our own error enum. +/// We want to restrict the type of errors to be either a `StorageError` or +/// `EvaluateError`, thus `failure::Error` is not used. Instead, we introduce +/// our own error enum. #[derive(Debug, Error)] pub enum ErrorInner { #[error("Storage error: {0}")] diff --git a/components/tidb_query_common/src/execute_stats.rs b/components/tidb_query_common/src/execute_stats.rs index 2318ad43e16..b2740212df0 100644 --- a/components/tidb_query_common/src/execute_stats.rs +++ b/components/tidb_query_common/src/execute_stats.rs @@ -76,7 +76,8 @@ impl ExecSummaryCollector for ExecSummaryCollectorEnabled { } } -/// A `ExecSummaryCollector` that does not collect anything. Acts like `collect = false`. +/// A `ExecSummaryCollector` that does not collect anything. Acts like `collect +/// = false`. pub struct ExecSummaryCollectorDisabled; impl ExecSummaryCollector for ExecSummaryCollectorDisabled { @@ -105,11 +106,11 @@ pub struct WithSummaryCollector { pub inner: T, } -/// Execution statistics to be flowed between parent and child executors at once during -/// `collect_exec_stats()` invocation. +/// Execution statistics to be flowed between parent and child executors at once +/// during `collect_exec_stats()` invocation. pub struct ExecuteStats { - /// The execution summary of each executor. If execution summary is not needed, it will - /// be zero sized. + /// The execution summary of each executor. If execution summary is not + /// needed, it will be zero sized. pub summary_per_executor: Vec, /// For each range given in the request, how many rows are scanned. @@ -119,8 +120,8 @@ pub struct ExecuteStats { impl ExecuteStats { /// Creates a new statistics instance. /// - /// If execution summary does not need to be collected, it is safe to pass 0 to the `executors` - /// argument, which will avoid one allocation. + /// If execution summary does not need to be collected, it is safe to pass 0 + /// to the `executors` argument, which will avoid one allocation. pub fn new(executors_len: usize) -> Self { Self { summary_per_executor: vec![ExecSummary::default(); executors_len], diff --git a/components/tidb_query_common/src/storage/mod.rs b/components/tidb_query_common/src/storage/mod.rs index 818b863d0a4..f8d9f37723d 100644 --- a/components/tidb_query_common/src/storage/mod.rs +++ b/components/tidb_query_common/src/storage/mod.rs @@ -11,8 +11,8 @@ pub type Result = std::result::Result; pub type OwnedKvPair = (Vec, Vec); -/// The abstract storage interface. The table scan and index scan executor relies on a `Storage` -/// implementation to provide source data. +/// The abstract storage interface. The table scan and index scan executor +/// relies on a `Storage` implementation to provide source data. pub trait Storage: Send { type Statistics; diff --git a/components/tidb_query_common/src/storage/ranges_iter.rs b/components/tidb_query_common/src/storage/ranges_iter.rs index 061cd339129..6f99249336b 100644 --- a/components/tidb_query_common/src/storage/ranges_iter.rs +++ b/components/tidb_query_common/src/storage/ranges_iter.rs @@ -7,12 +7,12 @@ pub enum IterStatus { /// All ranges are consumed. Drained, - /// Last range is drained or this iteration is a fresh start so that caller should scan - /// on a new range. + /// Last range is drained or this iteration is a fresh start so that caller + /// should scan on a new range. NewRange(Range), - /// Last interval range is not drained and the caller should continue scanning without changing - /// the scan range. + /// Last interval range is not drained and the caller should continue + /// scanning without changing the scan range. Continue, } @@ -23,13 +23,14 @@ pub enum IterStatus { /// - a flag indicating continuing last interval range /// - a flag indicating that all ranges are consumed /// -/// If a new range is returned, caller can then scan unknown amount of key(s) within this new range. -/// The caller must inform the structure so that it will emit a new range next time by calling -/// `notify_drained()` after current range is drained. Multiple `notify_drained()` without `next()` -/// will have no effect. +/// If a new range is returned, caller can then scan unknown amount of key(s) +/// within this new range. The caller must inform the structure so that it will +/// emit a new range next time by calling `notify_drained()` after current range +/// is drained. Multiple `notify_drained()` without `next()` will have no +/// effect. pub struct RangesIterator { - /// Whether or not we are processing a valid range. If we are not processing a range, or there - /// is no range any more, this field is `false`. + /// Whether or not we are processing a valid range. If we are not processing + /// a range, or there is no range any more, this field is `false`. in_range: bool, iter: std::vec::IntoIter, diff --git a/components/tidb_query_common/src/storage/scanner.rs b/components/tidb_query_common/src/storage/scanner.rs index 1c1a1cea111..851220307b9 100644 --- a/components/tidb_query_common/src/storage/scanner.rs +++ b/components/tidb_query_common/src/storage/scanner.rs @@ -5,8 +5,8 @@ use crate::error::StorageError; const KEY_BUFFER_CAPACITY: usize = 64; -/// A scanner that scans over multiple ranges. Each range can be a point range containing only -/// one row, or an interval range containing multiple rows. +/// A scanner that scans over multiple ranges. Each range can be a point range +/// containing only one row, or an interval range containing multiple rows. pub struct RangesScanner { storage: T, ranges_iter: RangesIterator, @@ -69,7 +69,8 @@ impl RangesScanner { } /// Fetches next row. - /// Note: `update_scanned_range` can control whether update the scanned range when `is_scanned_range_aware` is true. + /// Note: `update_scanned_range` can control whether update the scanned + /// range when `is_scanned_range_aware` is true. pub fn next_opt( &mut self, update_scanned_range: bool, @@ -119,14 +120,14 @@ impl RangesScanner { } } - /// Appends storage statistics collected so far to the given container and clears the - /// collected statistics. + /// Appends storage statistics collected so far to the given container and + /// clears the collected statistics. pub fn collect_storage_stats(&mut self, dest: &mut T::Statistics) { self.storage.collect_statistics(dest) } - /// Appends scanned rows of each range so far to the given container and clears the - /// collected statistics. + /// Appends scanned rows of each range so far to the given container and + /// clears the collected statistics. pub fn collect_scanned_rows_per_range(&mut self, dest: &mut Vec) { dest.append(&mut self.scanned_rows_per_range); self.scanned_rows_per_range.push(0); @@ -503,8 +504,8 @@ mod tests { assert_eq!(&r.upper_exclusive, b"foo_8"); // Multiple ranges - // TODO: caller should not pass in unordered ranges otherwise scanned ranges would be - // unsound. + // TODO: caller should not pass in unordered ranges otherwise scanned ranges + // would be unsound. let ranges = vec![ IntervalRange::from(("foo", "foo_3")).into(), IntervalRange::from(("foo_5", "foo_50")).into(), @@ -718,8 +719,8 @@ mod tests { assert_eq!(&r.upper_exclusive, b"foo_8"); // Multiple ranges - // TODO: caller should not pass in unordered ranges otherwise scanned ranges would be - // unsound. + // TODO: caller should not pass in unordered ranges otherwise scanned ranges + // would be unsound. let ranges = vec![ IntervalRange::from(("foo", "foo_3")).into(), IntervalRange::from(("foo_5", "foo_50")).into(), diff --git a/components/tidb_query_common/src/storage/test_fixture.rs b/components/tidb_query_common/src/storage/test_fixture.rs index a10726b5347..305bc5bf168 100644 --- a/components/tidb_query_common/src/storage/test_fixture.rs +++ b/components/tidb_query_common/src/storage/test_fixture.rs @@ -11,7 +11,8 @@ type ErrorBuilder = Box crate::error::StorageError>; type FixtureValue = std::result::Result, ErrorBuilder>; -/// A `Storage` implementation that returns fixed source data (i.e. fixture). Useful in tests. +/// A `Storage` implementation that returns fixed source data (i.e. fixture). +/// Useful in tests. #[derive(Clone)] pub struct FixtureStorage { data: Arc, FixtureValue>>, @@ -69,8 +70,8 @@ impl super::Storage for FixtureStorage { fn scan_next(&mut self) -> Result> { let value = if !self.is_backward_scan { - // During the call of this function, `data` must be valid and we are only returning - // data clones to outside, so this access is safe. + // During the call of this function, `data` must be valid and we are only + // returning data clones to outside, so this access is safe. self.data_view_unsafe.as_mut().unwrap().next() } else { self.data_view_unsafe.as_mut().unwrap().next_back() diff --git a/components/tidb_query_common/src/util.rs b/components/tidb_query_common/src/util.rs index 9ee2a059073..9f9b60bf9f7 100644 --- a/components/tidb_query_common/src/util.rs +++ b/components/tidb_query_common/src/util.rs @@ -40,8 +40,8 @@ pub fn is_prefix_next(key: &[u8], next: &[u8]) -> bool { let mut carry_pos = len; loop { if carry_pos == 0 { - // All bytes of `key` are 255. `next` couldn't be `key`'s prefix_next since their - // lengths are equal. + // All bytes of `key` are 255. `next` couldn't be `key`'s prefix_next since + // their lengths are equal. return false; } @@ -71,8 +71,8 @@ pub fn is_prefix_next(key: &[u8], next: &[u8]) -> bool { && next[carry_pos + 1..].iter().all(|byte| *byte == 0) && key[..carry_pos] == next[..carry_pos] } else if len + 1 == next_len { - // `next` must has one more 0 than `key`, and the first `len` bytes must be all 255. - // The case that `len == 0` is also covered here. + // `next` must has one more 0 than `key`, and the first `len` bytes must be all + // 255. The case that `len == 0` is also covered here. *next.last().unwrap() == 0 && key.iter().all(|byte| *byte == 255) && next.iter().take(len).all(|byte| *byte == 255) diff --git a/components/tidb_query_datatype/src/codec/batch/lazy_column.rs b/components/tidb_query_datatype/src/codec/batch/lazy_column.rs index dcd6328ca18..11d290f9c31 100644 --- a/components/tidb_query_datatype/src/codec/batch/lazy_column.rs +++ b/components/tidb_query_datatype/src/codec/batch/lazy_column.rs @@ -16,13 +16,14 @@ use crate::{ match_template_evaltype, EvalType, FieldTypeAccessor, }; -/// A container stores an array of datums, which can be either raw (not decoded), or decoded into -/// the `VectorValue` type. +/// A container stores an array of datums, which can be either raw (not +/// decoded), or decoded into the `VectorValue` type. /// /// TODO: -/// Since currently the data format in response can be the same as in storage, we use this structure -/// to avoid unnecessary repeated serialization / deserialization. In future, Coprocessor will -/// respond all data in Chunk format which is different to the format in storage. At that time, +/// Since currently the data format in response can be the same as in storage, +/// we use this structure to avoid unnecessary repeated serialization / +/// deserialization. In future, Coprocessor will respond all data in Chunk +/// format which is different to the format in storage. At that time, /// this structure is no longer useful and should be removed. #[derive(Clone, Debug)] pub enum LazyBatchColumn { @@ -42,14 +43,16 @@ impl LazyBatchColumn { #[inline] pub fn raw_with_capacity(capacity: usize) -> Self { use codec::number::MAX_VARINT64_LENGTH; - // We assume that each element *may* has a size of MAX_VAR_INT_LEN + Datum Flag (1 byte). + // We assume that each element *may* has a size of MAX_VAR_INT_LEN + Datum Flag + // (1 byte). LazyBatchColumn::Raw(BufferVec::with_capacity( capacity, capacity * (MAX_VARINT64_LENGTH + 1), )) } - /// Creates a new `LazyBatchColumn::Decoded` with specified capacity and eval type. + /// Creates a new `LazyBatchColumn::Decoded` with specified capacity and + /// eval type. #[inline] pub fn decoded_with_capacity_and_tp(capacity: usize, eval_tp: EvalType) -> Self { LazyBatchColumn::Decoded(VectorValue::with_capacity(capacity, eval_tp)) @@ -150,14 +153,16 @@ impl LazyBatchColumn { } } - /// Decodes this column if the column is not decoded, according to the given logical rows map. - /// After decoding, the decoded column will have the same physical layout as the encoded one - /// (i.e. the same logical rows), but elements in unnecessary positions will not be decoded - /// and will be `None`. + /// Decodes this column if the column is not decoded, according to the given + /// logical rows map. After decoding, the decoded column will have the same + /// physical layout as the encoded one (i.e. the same logical rows), but + /// elements in unnecessary positions will not be decoded and will be + /// `None`. /// - /// The field type is needed because we use the same `DateTime` structure when handling - /// Date, Time or Timestamp. - // TODO: Maybe it's a better idea to assign different eval types for different date types. + /// The field type is needed because we use the same `DateTime` structure + /// when handling Date, Time or Timestamp. + // TODO: Maybe it's a better idea to assign different eval types for different + // date types. pub fn ensure_decoded( &mut self, ctx: &mut EvalContext, @@ -358,7 +363,8 @@ mod tests { assert!(col.is_decoded()); assert_eq!(col.len(), 3); assert_eq!(col.capacity(), 3); - // Element 1 is None because it is not referred in `logical_rows` and we don't decode it. + // Element 1 is None because it is not referred in `logical_rows` and we don't + // decode it. assert_eq!(col.decoded().to_int_vec(), &[Some(32), None, Some(10)]); { @@ -370,7 +376,8 @@ mod tests { assert_eq!(col.decoded().to_int_vec(), &[Some(32), None, Some(10)]); } - // Decode a decoded column, even using a different logical rows, does not have effect. + // Decode a decoded column, even using a different logical rows, does not have + // effect. col.ensure_decoded( &mut ctx, &FieldTypeTp::Long.into(), @@ -435,7 +442,8 @@ mod benches { /// Bench performance of decoding a raw batch column. /// - /// Note that there is a clone in the bench suite, whose cost should be excluded. + /// Note that there is a clone in the bench suite, whose cost should be + /// excluded. #[bench] fn bench_lazy_batch_column_clone_and_decode(b: &mut test::Bencher) { use crate::{ @@ -471,7 +479,8 @@ mod benches { /// Bench performance of decoding a decoded lazy batch column. /// - /// Note that there is a clone in the bench suite, whose cost should be excluded. + /// Note that there is a clone in the bench suite, whose cost should be + /// excluded. #[bench] fn bench_lazy_batch_column_clone_and_decode_decoded(b: &mut test::Bencher) { use crate::{ diff --git a/components/tidb_query_datatype/src/codec/batch/lazy_column_vec.rs b/components/tidb_query_datatype/src/codec/batch/lazy_column_vec.rs index d4f7ea9044a..55a07e72ae7 100644 --- a/components/tidb_query_datatype/src/codec/batch/lazy_column_vec.rs +++ b/components/tidb_query_datatype/src/codec/batch/lazy_column_vec.rs @@ -13,7 +13,8 @@ use crate::{ /// Stores multiple `LazyBatchColumn`s. Each column has an equal length. #[derive(Clone, Debug)] pub struct LazyBatchColumnVec { - /// Multiple lazy batch columns. Each column is either decoded, or not decoded. + /// Multiple lazy batch columns. Each column is either decoded, or not + /// decoded. /// /// For decoded columns, they may be in different types. If the column is in /// type `LazyBatchColumn::Raw`, it means that it is not decoded. @@ -37,9 +38,11 @@ impl From> for LazyBatchColumnVec { } impl LazyBatchColumnVec { - /// Creates a new empty `LazyBatchColumnVec`, which does not have columns and rows. + /// Creates a new empty `LazyBatchColumnVec`, which does not have columns + /// and rows. /// - /// Because column numbers won't change, it means constructed instance will be always empty. + /// Because column numbers won't change, it means constructed instance will + /// be always empty. #[inline] pub fn empty() -> Self { Self { @@ -47,7 +50,8 @@ impl LazyBatchColumnVec { } } - /// Creates a new empty `LazyBatchColumnVec` with the same number of columns and schema. + /// Creates a new empty `LazyBatchColumnVec` with the same number of columns + /// and schema. #[inline] #[must_use] pub fn clone_empty(&self, capacity: usize) -> Self { @@ -60,7 +64,8 @@ impl LazyBatchColumnVec { } } - /// Creates a new `LazyBatchColumnVec`, which contains `columns_count` number of raw columns. + /// Creates a new `LazyBatchColumnVec`, which contains `columns_count` + /// number of raw columns. #[cfg(test)] #[must_use] pub fn with_raw_columns(columns_count: usize) -> Self { @@ -160,8 +165,8 @@ impl LazyBatchColumnVec { Ok(()) } - /// Truncates columns into equal length. The new length of all columns would be the length of - /// the shortest column before calling this function. + /// Truncates columns into equal length. The new length of all columns would + /// be the length of the shortest column before calling this function. pub fn truncate_into_equal_length(&mut self) { let mut min_len = self.rows_len(); for col in &self.columns { @@ -184,8 +189,8 @@ impl LazyBatchColumnVec { } } -// Do not implement Deref, since we want to forbid some misleading function calls like -// `LazyBatchColumnVec.len()`. +// Do not implement Deref, since we want to forbid some misleading function +// calls like `LazyBatchColumnVec.len()`. impl Index for LazyBatchColumnVec { type Output = LazyBatchColumn; diff --git a/components/tidb_query_datatype/src/codec/chunk/chunk.rs b/components/tidb_query_datatype/src/codec/chunk/chunk.rs index 2cf1261f7dc..ee111d11f77 100644 --- a/components/tidb_query_datatype/src/codec/chunk/chunk.rs +++ b/components/tidb_query_datatype/src/codec/chunk/chunk.rs @@ -10,8 +10,9 @@ use super::{ use crate::{codec::Datum, FieldTypeAccessor}; /// `Chunk` stores multiple rows of data. -/// Values are appended in compact format and can be directly accessed without decoding. -/// When the chunk is done processing, we can reuse the allocated memory by resetting it. +/// Values are appended in compact format and can be directly accessed without +/// decoding. When the chunk is done processing, we can reuse the allocated +/// memory by resetting it. pub struct Chunk { columns: Vec, } @@ -32,7 +33,8 @@ impl Chunk { } /// Reset the chunk, so the memory it allocated can be reused. - /// Make sure all the data in the chunk is not used anymore before you reuse this chunk. + /// Make sure all the data in the chunk is not used anymore before you reuse + /// this chunk. pub fn reset(&mut self) { for column in &mut self.columns { column.reset(); diff --git a/components/tidb_query_datatype/src/codec/chunk/column.rs b/components/tidb_query_datatype/src/codec/chunk/column.rs index b8f7e4b9da6..f7f13363686 100644 --- a/components/tidb_query_datatype/src/codec/chunk/column.rs +++ b/components/tidb_query_datatype/src/codec/chunk/column.rs @@ -402,7 +402,8 @@ impl Column { self.null_cnt = 0; self.null_bitmap.clear(); if !self.var_offsets.is_empty() { - // The first offset is always 0, it makes slicing the data easier, we need to keep it. + // The first offset is always 0, it makes slicing the data easier, we need to + // keep it. self.var_offsets.truncate(1); } self.data.clear(); @@ -1006,7 +1007,7 @@ pub trait ChunkColumnEncoder: NumberEncoder { } // offsets if !col.is_fixed() { - //let length = (col.length+1)*4; + // let length = (col.length+1)*4; for v in &col.var_offsets { self.write_i64_le(*v as i64)?; } diff --git a/components/tidb_query_datatype/src/codec/collation/collator/gbk_collation.rs b/components/tidb_query_datatype/src/codec/collation/collator/gbk_collation.rs index 9c2dd2497f1..31685ca08d5 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/gbk_collation.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/gbk_collation.rs @@ -15,8 +15,8 @@ impl Collator for T { #[inline] fn char_weight(ch: char) -> Self::Weight { - // All GBK code point are in BMP, if the incoming character is not, convert it to '?'. - // This should not happened. + // All GBK code point are in BMP, if the incoming character is not, convert it + // to '?'. This should not happened. let r = ch as usize; if r > 0xFFFF { return '?' as u16; @@ -71,7 +71,8 @@ impl GbkCollator for CollatorGbkBin { const WEIGHT_TABLE: &'static [u8; (0xffff + 1) * 2] = GBK_BIN_TABLE; } -/// Collator for `gbk_chinese_ci` collation with padding behavior (trims right spaces). +/// Collator for `gbk_chinese_ci` collation with padding behavior (trims right +/// spaces). #[derive(Debug)] pub struct CollatorGbkChineseCi; @@ -80,10 +81,12 @@ impl GbkCollator for CollatorGbkChineseCi { const WEIGHT_TABLE: &'static [u8; (0xffff + 1) * 2] = GBK_CHINESE_CI_TABLE; } -// GBK_BIN_TABLE are the encoding tables from Unicode to GBK code, it is totally the same with golang's GBK encoding. -// If there is no mapping code in GBK, use 0x3F(?) instead. It should not happened. +// GBK_BIN_TABLE are the encoding tables from Unicode to GBK code, it is totally +// the same with golang's GBK encoding. If there is no mapping code in GBK, use +// 0x3F(?) instead. It should not happened. const GBK_BIN_TABLE: &[u8; (0xffff + 1) * 2] = include_bytes!("gbk_bin.data"); // GBK_CHINESE_CI_TABLE are the sort key tables for GBK codepoint. -// If there is no mapping code in GBK, use 0x3F(?) instead. It should not happened. +// If there is no mapping code in GBK, use 0x3F(?) instead. It should not +// happened. const GBK_CHINESE_CI_TABLE: &[u8; (0xffff + 1) * 2] = include_bytes!("gbk_chinese_ci.data"); diff --git a/components/tidb_query_datatype/src/codec/collation/collator/latin1_bin.rs b/components/tidb_query_datatype/src/codec/collation/collator/latin1_bin.rs index c74ed3687a9..c70deb08cd1 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/latin1_bin.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/latin1_bin.rs @@ -4,7 +4,8 @@ use bstr::{ByteSlice, B}; use super::*; -/// Collator for latin1_bin collation with padding behavior (trims right spaces). +/// Collator for latin1_bin collation with padding behavior (trims right +/// spaces). #[derive(Debug)] pub struct CollatorLatin1Bin; diff --git a/components/tidb_query_datatype/src/codec/collation/collator/mod.rs b/components/tidb_query_datatype/src/codec/collation/collator/mod.rs index e12114d9cea..bac55eabea7 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/mod.rs @@ -45,7 +45,8 @@ mod tests { (Collation::GbkChineseCi, 6), ]; let cases = vec![ - // (sa, sb, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, Latin1, GBKBin, GbkChineseCi]) + // (sa, sb, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, + // Latin1, GBKBin, GbkChineseCi]) ( "a".as_bytes(), "a".as_bytes(), @@ -232,7 +233,8 @@ mod tests { (Collation::GbkChineseCi, 6), ]; let cases = vec![ - // (str, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, Latin1, GBKBin, GbkChineseCi]) + // (str, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, Latin1, + // GBKBin, GbkChineseCi]) ( "a", [ diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_binary.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_binary.rs index bbd7e60a047..959664b1854 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_binary.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_binary.rs @@ -2,7 +2,8 @@ use super::*; -/// Collator for utf8mb4_bin collation with padding behavior (trims right spaces). +/// Collator for utf8mb4_bin collation with padding behavior (trims right +/// spaces). #[derive(Debug)] pub struct CollatorUtf8Mb4Bin; diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_general_ci.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_general_ci.rs index 50770550f19..2cc9a738372 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_general_ci.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_general_ci.rs @@ -2,7 +2,8 @@ use super::*; -/// Collator for utf8mb4_general_ci collation with padding behavior (trims right spaces). +/// Collator for utf8mb4_general_ci collation with padding behavior (trims right +/// spaces). #[derive(Debug)] pub struct CollatorUtf8Mb4GeneralCi; diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs index 9bb44382f53..5a529d48144 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs @@ -2,7 +2,8 @@ use super::*; -/// Collator for `utf8mb4_unicode_ci` collation with padding behavior (trims right spaces). +/// Collator for `utf8mb4_unicode_ci` collation with padding behavior (trims +/// right spaces). #[derive(Debug)] pub struct CollatorUtf8Mb4UnicodeCi; diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 7d73cce2192..0d6a8e6d9ea 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -149,8 +149,9 @@ where /// /// # Panic /// - /// The `Ord`, `Hash`, `PartialEq` and more implementations assume that the bytes are - /// valid for the certain collator. The violation will cause panic. + /// The `Ord`, `Hash`, `PartialEq` and more implementations assume that the + /// bytes are valid for the certain collator. The violation will cause + /// panic. #[inline] pub fn new_unchecked(inner: T) -> Self { Self { diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index bcfc7bb2bbe..c576f14ee5f 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -280,11 +280,13 @@ impl ToInt for u64 { impl ToInt for f64 { /// This function is ported from TiDB's types.ConvertFloatToInt, - /// which checks whether the number overflows the signed lower and upper boundaries of `tp` + /// which checks whether the number overflows the signed lower and upper + /// boundaries of `tp` /// /// # Notes /// - /// It handles overflows using `ctx` so that the caller would not handle it anymore. + /// It handles overflows using `ctx` so that the caller would not handle it + /// anymore. fn to_int(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { #![allow(clippy::float_cmp)] let val = self.round(); @@ -307,11 +309,13 @@ impl ToInt for f64 { } /// This function is ported from TiDB's types.ConvertFloatToUint, - /// which checks whether the number overflows the unsigned upper boundaries of `tp` + /// which checks whether the number overflows the unsigned upper boundaries + /// of `tp` /// /// # Notes /// - /// It handles overflows using `ctx` so that the caller would not handle it anymore. + /// It handles overflows using `ctx` so that the caller would not handle it + /// anymore. #[allow(clippy::float_cmp)] fn to_uint(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { let val = self.round(); @@ -444,8 +448,12 @@ impl ToInt for Decimal { impl ToInt for DateTime { // FiXME - // Time::parse_utc_datetime("2000-01-01T12:13:14.6666", 4).unwrap().round_frac(DEFAULT_FSP) - // will get 2000-01-01T12:13:14, this is a bug + // ``` + // Time::parse_utc_datetime("2000-01-01T12:13:14.6666", 4) + // .unwrap() + // .round_frac(DEFAULT_FSP) + // ``` + // will get 2000-01-01T12:13:14, this is a bug #[inline] fn to_int(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { let t = self.round_frac(ctx, DEFAULT_FSP)?; @@ -664,8 +672,8 @@ pub fn produce_dec_with_specified_tp( } } -/// `produce_float_with_specified_tp`(`ProduceFloatWithSpecifiedTp` in TiDB) produces -/// a new float64 according to `flen` and `decimal` in `self.tp`. +/// `produce_float_with_specified_tp`(`ProduceFloatWithSpecifiedTp` in TiDB) +/// produces a new float64 according to `flen` and `decimal` in `self.tp`. /// TODO port tests from TiDB(TiDB haven't implemented now) pub fn produce_float_with_specified_tp( ctx: &mut EvalContext, @@ -692,8 +700,8 @@ pub fn produce_float_with_specified_tp( Ok(res) } -/// `produce_str_with_specified_tp`(`ProduceStrWithSpecifiedTp` in TiDB) produces -/// a new string according to `flen` and `chs`. +/// `produce_str_with_specified_tp`(`ProduceStrWithSpecifiedTp` in TiDB) +/// produces a new string according to `flen` and `chs`. pub fn produce_str_with_specified_tp<'a>( ctx: &mut EvalContext, s: Cow<'a, [u8]>, @@ -705,8 +713,8 @@ pub fn produce_str_with_specified_tp<'a>( return Ok(s); } let flen = flen as usize; - // flen is the char length, not byte length, for UTF8 charset, we need to calculate the - // char count and truncate to flen chars if it is too long. + // flen is the char length, not byte length, for UTF8 charset, we need to + // calculate the char count and truncate to flen chars if it is too long. if chs == charset::CHARSET_UTF8 || chs == charset::CHARSET_UTF8MB4 { let (char_count, truncate_pos) = { let s = &String::from_utf8_lossy(&s); @@ -767,7 +775,8 @@ pub fn pad_zero_for_binary_type(s: &mut Vec, ft: &FieldType) { .unwrap_or(false) && s.len() < flen { - // it seems MaxAllowedPacket has not push down to tikv, so we needn't to handle it + // it seems MaxAllowedPacket has not push down to tikv, so we needn't to handle + // it s.resize(flen, 0); } } @@ -831,8 +840,8 @@ pub fn get_valid_int_prefix<'a>(ctx: &mut EvalContext, s: &'a str) -> Result( ctx: &mut EvalContext, s: &'a str, @@ -868,8 +877,8 @@ pub fn get_valid_float_prefix<'a>(ctx: &mut EvalContext, s: &'a str) -> Result<& get_valid_float_prefix_helper(ctx, s, false) } -// As TiDB code(getValidFloatPrefix()), cast expr should not give error/warning when input is -// empty. +// As TiDB code(getValidFloatPrefix()), cast expr should not give error/warning +// when input is empty. pub fn get_valid_float_prefix_helper<'a>( ctx: &mut EvalContext, s: &'a str, @@ -961,14 +970,14 @@ fn round_int_str(num_next_dot: char, s: &str) -> Cow<'_, str> { } /// It converts a valid float string into valid integer string which can be -/// parsed by `i64::from_str`, we can't parse float first then convert it to string -/// because precision will be lost. +/// parsed by `i64::from_str`, we can't parse float first then convert it to +/// string because precision will be lost. /// /// When the float string indicating a value that is overflowing the i64, /// the original float string is returned and an overflow warning is attached. /// -/// This func will find serious overflow such as the len of result > 20 (without prefix `+/-`) -/// however, it will not check whether the result overflow BIGINT. +/// This func will find serious overflow such as the len of result > 20 (without +/// prefix `+/-`) however, it will not check whether the result overflow BIGINT. fn float_str_to_int_string<'a>(ctx: &mut EvalContext, valid_float: &'a str) -> Cow<'a, str> { // this func is complex, to make it same as TiDB's version, // we impl it like TiDB's version(https://github.com/pingcap/tidb/blob/9b521342bf/types/convert.go#L400) @@ -1531,7 +1540,8 @@ mod tests { ("{}", ERR_TRUNCATE_WRONG_VALUE), ("[]", ERR_TRUNCATE_WRONG_VALUE), ]; - // avoid to use EvalConfig::default_for_test() that set Flag::IGNORE_TRUNCATE as true + // avoid to use EvalConfig::default_for_test() that set Flag::IGNORE_TRUNCATE as + // true let mut ctx = EvalContext::new(Arc::new(EvalConfig::new())); for (jstr, exp) in test_cases { let json: Json = jstr.parse().unwrap(); @@ -1865,7 +1875,8 @@ mod tests { ("{}", ERR_TRUNCATE_WRONG_VALUE), ("[]", ERR_TRUNCATE_WRONG_VALUE), ]; - // avoid to use EvalConfig::default_for_test() that set Flag::IGNORE_TRUNCATE as true + // avoid to use EvalConfig::default_for_test() that set Flag::IGNORE_TRUNCATE as + // true let mut ctx = EvalContext::new(Arc::new(EvalConfig::new())); for (jstr, exp) in test_cases { let json: Json = jstr.parse().unwrap(); @@ -2089,7 +2100,8 @@ mod tests { assert_eq!(o.unwrap(), i); } - // Secondly, make sure warnings are attached when the float string cannot be casted to a valid int string + // Secondly, make sure warnings are attached when the float string cannot be + // casted to a valid int string let warnings = ctx.take_warnings().warnings; assert_eq!(warnings.len(), 2); for warning in warnings { @@ -2359,8 +2371,8 @@ mod tests { // origin, // (origin_flen, origin_decimal), (res_flen, res_decimal), is_unsigned, // expect, warning_err_code, - // ((InInsertStmt || InUpdateStmt || InDeleteStmt), overflow_as_warning, truncate_as_warning) - // ) + // ((InInsertStmt || InUpdateStmt || InDeleteStmt), overflow_as_warning, + // truncate_as_warning) ) // // The origin_flen, origin_decimal field is to // let the programmer clearly know what the flen and decimal of the decimal is. @@ -2646,7 +2658,8 @@ mod tests { // zero // FIXME: // according to Decimal::prec_and_frac, - // the decimals' prec(the number of all digits) and frac(the number of digit after number point) are + // the decimals' prec(the number of all digits) and frac(the number of digit after + // number point) are: // Decimal::zero()'s is (1, 0) // Decimal::from_bytes(b"00.00")'s is (2, 2) // Decimal::from_bytes(b"000.00")'s is (2, 2) diff --git a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs index 7086e97c23b..4bad0fcc129 100644 --- a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs +++ b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs @@ -13,11 +13,11 @@ pub struct ChunkedVecBytes { /// A vector storing `Option` with a compact layout. /// -/// Inside `ChunkedVecBytes`, `bitmap` indicates if an element at given index is null, -/// and `data` stores actual data. Bytes data are stored adjacent to each other in -/// `data`. If element at a given index is null, then it takes no space in `data`. -/// Otherwise, contents of the `Bytes` are stored, and `var_offset` indicates the starting -/// position of each element. +/// Inside `ChunkedVecBytes`, `bitmap` indicates if an element at given index is +/// null, and `data` stores actual data. Bytes data are stored adjacent to each +/// other in `data`. If element at a given index is null, then it takes no space +/// in `data`. Otherwise, contents of the `Bytes` are stored, and `var_offset` +/// indicates the starting position of each element. impl ChunkedVecBytes { #[inline] pub fn push_data_ref(&mut self, value: BytesRef<'_>) { diff --git a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs index 52279c5a439..9ef17dc61eb 100644 --- a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs +++ b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs @@ -7,11 +7,12 @@ use crate::impl_chunked_vec_common; /// A vector storing `Option` with a compact layout. /// -/// Inside `ChunkedVecJson`, `bitmap` indicates if an element at given index is null, -/// and `data` stores actual data. Json data are stored adjacent to each other in -/// `data`. If element at a given index is null, then it takes no space in `data`. -/// Otherwise, a one byte `json_type` and variable size json data is stored in `data`, -/// and `var_offset` indicates the starting position of each element. +/// Inside `ChunkedVecJson`, `bitmap` indicates if an element at given index is +/// null, and `data` stores actual data. Json data are stored adjacent to each +/// other in `data`. If element at a given index is null, then it takes no space +/// in `data`. Otherwise, a one byte `json_type` and variable size json data is +/// stored in `data`, and `var_offset` indicates the starting position of each +/// element. #[derive(Debug, PartialEq, Clone)] pub struct ChunkedVecJson { data: Vec, diff --git a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_set.rs b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_set.rs index 41b523391c2..1a3f6838e96 100644 --- a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_set.rs +++ b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_set.rs @@ -20,7 +20,8 @@ use crate::impl_chunked_vec_common; /// stored representation issue /// /// TODO: add way to set set column data -/// TODO: code fot set/enum looks nearly the same, considering refactor them using macro +/// TODO: code fot set/enum looks nearly the same, considering refactor them +/// using macro #[derive(Debug, Clone)] pub struct ChunkedVecSet { data: Arc, diff --git a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_sized.rs b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_sized.rs index 45e2665ec31..4f614d00be0 100644 --- a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_sized.rs +++ b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_sized.rs @@ -9,10 +9,11 @@ use crate::impl_chunked_vec_common; /// in that structure itself. This includes `Int`, `Real`, `Decimal`, /// `DateTime` and `Duration` in copr framework. /// -/// Inside `ChunkedVecSized`, `bitmap` indicates if an element at given index is null, -/// and `data` stores actual data. If the element at given index is null (or `None`), -/// the corresponding `bitmap` bit is false, and `data` stores zero value for -/// that element. Otherwise, `data` stores actual data, and `bitmap` bit is true. +/// Inside `ChunkedVecSized`, `bitmap` indicates if an element at given index is +/// null, and `data` stores actual data. If the element at given index is null +/// (or `None`), the corresponding `bitmap` bit is false, and `data` stores zero +/// value for that element. Otherwise, `data` stores actual data, and `bitmap` +/// bit is true. #[derive(Debug, PartialEq, Clone)] pub struct ChunkedVecSized { data: Vec, diff --git a/components/tidb_query_datatype/src/codec/data_type/logical_rows.rs b/components/tidb_query_datatype/src/codec/data_type/logical_rows.rs index d27a030b817..46b5a64b010 100644 --- a/components/tidb_query_datatype/src/codec/data_type/logical_rows.rs +++ b/components/tidb_query_datatype/src/codec/data_type/logical_rows.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -// TODO: This value is chosen based on MonetDB/X100's research without our own benchmarks. +// TODO: This value is chosen based on MonetDB/X100's research without our own +// benchmarks. pub const BATCH_MAX_SIZE: usize = 1024; /// Identical logical row is a special case in expression evaluation that diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index 8397a8d2ab5..278ef48469a 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -50,8 +50,8 @@ pub use crate::codec::mysql::{ }; use crate::{codec::convert::ConvertTo, expr::EvalContext, EvalType}; -/// A trait of evaluating current concrete eval type into a MySQL logic value, represented by -/// Rust's `bool` type. +/// A trait of evaluating current concrete eval type into a MySQL logic value, +/// represented by Rust's `bool` type. pub trait AsMySQLBool { /// Evaluates into a MySQL logic value. fn as_mysql_bool(&self, context: &mut EvalContext) -> Result; @@ -187,27 +187,28 @@ pub trait Evaluable: Clone + std::fmt::Debug + Send + Sync + 'static { /// panics if the varient mismatches. fn borrow_scalar_value_ref(v: ScalarValueRef<'_>) -> Option<&Self>; - /// Borrows a slice of this concrete type from a `VectorValue` in the same type; - /// panics if the varient mismatches. + /// Borrows a slice of this concrete type from a `VectorValue` in the same + /// type; panics if the varient mismatches. fn borrow_vector_value(v: &VectorValue) -> &ChunkedVecSized; } pub trait EvaluableRet: Clone + std::fmt::Debug + Send + Sync + 'static { const EVAL_TYPE: EvalType; type ChunkedType: ChunkedVec; - /// Converts a vector of this concrete type into a `VectorValue` in the same type; - /// panics if the varient mismatches. + /// Converts a vector of this concrete type into a `VectorValue` in the same + /// type; panics if the varient mismatches. fn cast_chunk_into_vector_value(vec: Self::ChunkedType) -> VectorValue; } /// # Notes /// -/// Make sure operating `bitmap` and `value` together, so while `bitmap` is 0 and the -/// corresponding value is None. +/// Make sure operating `bitmap` and `value` together, so while `bitmap` is 0 +/// and the corresponding value is None. /// /// With this guaranty, we can avoid the following issue: /// -/// For Data [Some(1), Some(2), None], we could have different stored representation: +/// For Data [Some(1), Some(2), None], we could have different stored +/// representation: /// /// Bitmap: 110, Value: 1, 2, 0 /// Bitmap: 110, Value: 1, 2, 1 @@ -368,8 +369,8 @@ pub trait EvaluableRef<'a>: Clone + std::fmt::Debug + Send + Sync { /// panics if the varient mismatches. fn borrow_scalar_value_ref(v: ScalarValueRef<'a>) -> Option; - /// Borrows a slice of this concrete type from a `VectorValue` in the same type; - /// panics if the varient mismatches. + /// Borrows a slice of this concrete type from a `VectorValue` in the same + /// type; panics if the varient mismatches. fn borrow_vector_value(v: &'a VectorValue) -> Self::ChunkedType; /// Convert this reference to owned type diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index 7bf36935f3b..b95dbb63342 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -13,17 +13,19 @@ use crate::{ /// A scalar value container, a.k.a. datum, for all concrete eval types. /// -/// In many cases, for example, at the framework level, the concrete eval type is unknown at compile -/// time. So we use this enum container to represent types dynamically. It is similar to trait -/// object `Box` where `T` is a concrete eval type but faster. +/// In many cases, for example, at the framework level, the concrete eval type +/// is unknown at compile time. So we use this enum container to represent types +/// dynamically. It is similar to trait object `Box` where `T` is a concrete +/// eval type but faster. /// /// Like `VectorValue`, the inner concrete value is immutable. /// /// Compared to `VectorValue`, it only contains a single concrete value. -/// Compared to `Datum`, it is a newer encapsulation that naturally wraps `Option<..>`. +/// Compared to `Datum`, it is a newer encapsulation that naturally wraps +/// `Option<..>`. /// -/// TODO: Once we removed the `Option<..>` wrapper, it will be much like `Datum`. At that time, -/// we only need to preserve one of them. +/// TODO: Once we removed the `Option<..>` wrapper, it will be much like +/// `Datum`. At that time, we only need to preserve one of them. #[derive(Clone, Debug, PartialEq)] pub enum ScalarValue { Int(Option), @@ -170,7 +172,8 @@ impl From for Option { } } -/// A scalar value reference container. Can be created from `ScalarValue` or `VectorValue`. +/// A scalar value reference container. Can be created from `ScalarValue` or +/// `VectorValue`. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ScalarValueRef<'a> { Int(Option<&'a super::Int>), diff --git a/components/tidb_query_datatype/src/codec/data_type/vector.rs b/components/tidb_query_datatype/src/codec/data_type/vector.rs index d26067d8219..c7eecf92fa0 100644 --- a/components/tidb_query_datatype/src/codec/data_type/vector.rs +++ b/components/tidb_query_datatype/src/codec/data_type/vector.rs @@ -8,8 +8,8 @@ use crate::{ /// A vector value container, a.k.a. column, for all concrete eval types. /// -/// The inner concrete value is immutable. However it is allowed to push and remove values from -/// this vector container. +/// The inner concrete value is immutable. However it is allowed to push and +/// remove values from this vector container. #[derive(Debug, PartialEq, Clone)] pub enum VectorValue { Int(ChunkedVecSized), @@ -25,8 +25,8 @@ pub enum VectorValue { } impl VectorValue { - /// Creates an empty `VectorValue` according to `eval_tp` and reserves capacity according - /// to `capacity`. + /// Creates an empty `VectorValue` according to `eval_tp` and reserves + /// capacity according to `capacity`. #[inline] pub fn with_capacity(capacity: usize, eval_tp: EvalType) -> Self { match_template_evaltype! { @@ -116,9 +116,11 @@ impl VectorValue { self.len() == 0 } - /// Shortens the column, keeping the first `len` datums and dropping the rest. + /// Shortens the column, keeping the first `len` datums and dropping the + /// rest. /// - /// If `len` is greater than the column's current length, this has no effect. + /// If `len` is greater than the column's current length, this has no + /// effect. #[inline] pub fn truncate(&mut self, len: usize) { match_template_evaltype! { @@ -134,7 +136,8 @@ impl VectorValue { self.truncate(0); } - /// Returns the number of elements this column can hold without reallocating. + /// Returns the number of elements this column can hold without + /// reallocating. #[inline] pub fn capacity(&self) -> usize { match_template_evaltype! { @@ -165,7 +168,8 @@ impl VectorValue { /// Evaluates values into MySQL logic values. /// - /// The caller must provide an output buffer which is large enough for holding values. + /// The caller must provide an output buffer which is large enough for + /// holding values. pub fn eval_as_mysql_bools( &self, ctx: &mut EvalContext, @@ -464,7 +468,8 @@ impl VectorValue { macro_rules! impl_as_slice { ($ty:tt, $name:ident) => { impl VectorValue { - /// Extracts a slice of values in specified concrete type from current column. + /// Extracts a slice of values in specified concrete type from current + /// column. /// /// # Panics /// @@ -494,8 +499,9 @@ impl_as_slice! { Json, to_json_vec } impl_as_slice! { Enum, to_enum_vec } impl_as_slice! { Set, to_set_vec } -/// Additional `VectorValue` methods available via generics. These methods support different -/// concrete types but have same names and should be specified via the generic parameter type. +/// Additional `VectorValue` methods available via generics. These methods +/// support different concrete types but have same names and should be specified +/// via the generic parameter type. pub trait VectorValueExt { /// The generic version for `VectorValue::push_xxx()`. fn push(&mut self, v: Option); diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index a1cc6460ae2..8d2e62b6ac0 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -162,7 +162,8 @@ pub fn cmp_f64(l: f64, r: f64) -> Result { .ok_or_else(|| invalid_type!("{} and {} can't be compared", l, r)) } -/// `checked_add_i64` checks and adds `r` to the `l`. Return None if the sum is negative. +/// `checked_add_i64` checks and adds `r` to the `l`. Return None if the sum is +/// negative. #[inline] fn checked_add_i64(l: u64, r: i64) -> Option { if r >= 0 { @@ -908,8 +909,8 @@ pub trait DatumDecoder: NIL_FLAG => Datum::Null, FLOAT_FLAG => self.read_f64().map(Datum::F64)?, DURATION_FLAG => { - // Decode the i64 into `Duration` with `MAX_FSP`, then unflatten it with concrete - // `FieldType` information + // Decode the i64 into `Duration` with `MAX_FSP`, then unflatten it with + // concrete `FieldType` information let nanos = self.read_i64()?; let dur = Duration::from_nanos(nanos, MAX_FSP)?; Datum::Dur(dur) @@ -1010,7 +1011,7 @@ pub trait DatumEncoder: self.write_u8(JSON_FLAG)?; self.write_json(j.as_ref())?; } - //TODO: implement datum write here. + // TODO: implement datum write here. Datum::Enum(_) => unimplemented!(), Datum::Set(_) => unimplemented!(), } @@ -1073,7 +1074,8 @@ pub fn encode(ctx: &mut EvalContext, values: &[Datum], comparable: bool) -> Resu Ok(buf) } -/// `encode_key` encodes a datum slice into a memory comparable buffer as the key. +/// `encode_key` encodes a datum slice into a memory comparable buffer as the +/// key. pub fn encode_key(ctx: &mut EvalContext, values: &[Datum]) -> Result> { encode(ctx, values, true) } @@ -1134,7 +1136,8 @@ pub fn split_datum(buf: &[u8], desc: bool) -> Result<(&[u8], &[u8])> { /// `skip_n_datum_slices` skip `n` datum slices within `buf` /// and advances the buffer pointer. -/// If the datum buffer contains less than `n` slices, an error will be returned. +/// If the datum buffer contains less than `n` slices, an error will be +/// returned. pub fn skip_n(buf: &mut &[u8], n: usize) -> Result<()> { let origin = *buf; for i in 0..n { diff --git a/components/tidb_query_datatype/src/codec/datum_codec.rs b/components/tidb_query_datatype/src/codec/datum_codec.rs index 6710029ec99..9d3f5058d0b 100644 --- a/components/tidb_query_datatype/src/codec/datum_codec.rs +++ b/components/tidb_query_datatype/src/codec/datum_codec.rs @@ -1,7 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! The unified entry for encoding and decoding an evaluable type to / from datum bytes. -//! Datum bytes consists of 1 byte datum flag and variable bytes datum payload. +//! The unified entry for encoding and decoding an evaluable type to / from +//! datum bytes. Datum bytes consists of 1 byte datum flag and variable bytes +//! datum payload. use codec::prelude::*; use tipb::FieldType; diff --git a/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs b/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs index 9904ead1098..8d1f5fdd8bb 100644 --- a/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs @@ -44,9 +44,10 @@ pub fn to_uint(ctx: &mut EvalContext, bytes: &[u8]) -> Result { } impl BinaryLiteral { - /// from_u64 creates a new BinaryLiteral instance by the given uint value in BigEndian. - /// byte size will be used as the length of the new BinaryLiteral, with leading bytes filled to zero. - /// If byte size is -1, the leading zeros in new BinaryLiteral will be trimmed. + /// from_u64 creates a new BinaryLiteral instance by the given uint value in + /// BigEndian. byte size will be used as the length of the new + /// BinaryLiteral, with leading bytes filled to zero. If byte size is -1, + /// the leading zeros in new BinaryLiteral will be trimmed. pub fn from_u64(val: u64, byte_size: isize) -> Result { if byte_size != -1 && !(1..=8).contains(&byte_size) { return Err(box_err!("invalid byte size: {}", byte_size)); diff --git a/components/tidb_query_datatype/src/codec/mysql/charset.rs b/components/tidb_query_datatype/src/codec/mysql/charset.rs index 27ad1b2a44f..0ac2655c619 100644 --- a/components/tidb_query_datatype/src/codec/mysql/charset.rs +++ b/components/tidb_query_datatype/src/codec/mysql/charset.rs @@ -4,7 +4,8 @@ pub const CHARSET_BIN: &str = "binary"; /// `CHARSET_UTF8` is the default charset for string types. pub const CHARSET_UTF8: &str = "utf8"; -/// `CHARSET_UTF8MB4` represents 4 bytes utf8, which works the same way as utf8 in Rust. +/// `CHARSET_UTF8MB4` represents 4 bytes utf8, which works the same way as utf8 +/// in Rust. pub const CHARSET_UTF8MB4: &str = "utf8mb4"; /// `CHARSET_ASCII` is a subset of UTF8. pub const CHARSET_ASCII: &str = "ascii"; diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 2eec85b7e34..a172d2e2723 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -58,10 +58,11 @@ impl Res { matches!(*self, Res::Truncated(_)) } - /// Convert `Res` into `Result` with an `EvalContext` that handling the errors - /// If `truncated_err` is None, `ctx` will try to handle the default truncated error: `Error::truncated()`, - /// otherwise handle the specified error inside `truncated_err`. - /// Same does `overflow_err` means. + /// Convert `Res` into `Result` with an `EvalContext` that handling the + /// errors If `truncated_err` is None, `ctx` will try to handle the + /// default truncated error: `Error::truncated()`, otherwise handle the + /// specified error inside `truncated_err`. Same does `overflow_err` + /// means. fn into_result_impl( self, ctx: &mut EvalContext, @@ -186,7 +187,8 @@ pub fn dec_encoded_len(encoded: &[u8]) -> Result { Ok(int_len + frac_len + 2) } -/// `count_leading_zeroes` returns the number of leading zeroes that can be removed from int. +/// `count_leading_zeroes` returns the number of leading zeroes that can be +/// removed from int. fn count_leading_zeroes(i: u8, word: u32) -> u8 { let (mut c, mut i) = (0, i as usize); while TEN_POW[i] > word { @@ -196,7 +198,8 @@ fn count_leading_zeroes(i: u8, word: u32) -> u8 { c } -/// `count_trailing_zeroes` returns the number of trailing zeroes that can be removed from fraction. +/// `count_trailing_zeroes` returns the number of trailing zeroes that can be +/// removed from fraction. fn count_trailing_zeroes(i: u8, word: u32) -> u8 { let (mut c, mut i) = (0, i as usize); while word % TEN_POW[i] == 0 { @@ -259,14 +262,15 @@ fn sub2(lhs: u32, rhs: u32, carry: &mut i32, res: &mut u32) { type SubTmp = (usize, usize, u8); -/// calculate the carry for lhs - rhs, returns the carry and needed temporary results for -/// beginning a subtraction. +/// calculate the carry for lhs - rhs, returns the carry and needed temporary +/// results for beginning a subtraction. /// /// The new carry can be: /// 1. None if lhs is equals to rhs. /// 2. Some(0) if abs(lhs) > abs(rhs), /// 3. Some(1) if abs(lhs) < abs(rhs). -/// l_frac_word_cnt and r_frac_word_cnt do not contain the suffix 0 when r_int_word_cnt == l_int_word_cnt. +/// l_frac_word_cnt and r_frac_word_cnt do not contain the suffix 0 when +/// r_int_word_cnt == l_int_word_cnt. #[inline] fn calc_sub_carry(lhs: &Decimal, rhs: &Decimal) -> (Option, u8, SubTmp, SubTmp) { let (l_int_word_cnt, mut l_frac_word_cnt) = (word_cnt!(lhs.int_cnt), word_cnt!(lhs.frac_cnt)); @@ -303,9 +307,11 @@ fn calc_sub_carry(lhs: &Decimal, rhs: &Decimal) -> (Option, u8, SubTmp, Sub while r_idx as isize <= r_end && rhs.word_buf[r_end as usize] == 0 { r_end -= 1; } - // here l_end is the last nonzero index in l.word_buf, attention:it may in the range of (0,l_int_word_cnt) + // here l_end is the last nonzero index in l.word_buf, attention:it may in the + // range of (0,l_int_word_cnt) l_frac_word_cnt = cmp::max(0, l_end + 1 - l_stop as isize) as u8; - // here r_end is the last nonzero index in r.word_buf, attention:it may in the range of (0,r_int_word_cnt) + // here r_end is the last nonzero index in r.word_buf, attention:it may in the + // range of (0,r_int_word_cnt) r_frac_word_cnt = cmp::max(0, r_end + 1 - r_stop as isize) as u8; while l_idx as isize <= l_end && r_idx as isize <= r_end @@ -976,10 +982,10 @@ impl Decimal { } /// Given a precision count 'prec', get: - /// 1. the index of first non-zero word in self.word_buf to hold the leading 'prec' number of - /// digits - /// 2. the number of remained digits if we remove all leading zeros for the leading 'prec' - /// number of digits + /// 1. the index of first non-zero word in self.word_buf to hold the + /// leading 'prec' number of digits + /// 2. the number of remained digits if we remove all leading zeros for the + /// leading 'prec' number of digits fn remove_leading_zeroes(&self, prec: u8) -> (usize, u8) { let mut cnt = prec; let mut i = ((cnt + DIGITS_PER_WORD - 1) % DIGITS_PER_WORD) + 1; @@ -1016,7 +1022,8 @@ impl Decimal { (buf, word_start_idx, int_len, int_cnt, frac_cnt) } - /// Get the least precision and fraction count to encode this decimal completely. + /// Get the least precision and fraction count to encode this decimal + /// completely. pub fn prec_and_frac(&self) -> (u8, u8) { let (_, int_cnt) = self.remove_leading_zeroes(self.int_cnt); let prec = int_cnt + self.frac_cnt; @@ -1338,8 +1345,9 @@ impl Decimal { dec } - /// `shift` shifts decimal digits in given number (with rounding if it need), - /// shift > 0 means shift to left shift, shift < 0 means right shift. + /// `shift` shifts decimal digits in given number (with rounding if it + /// need), shift > 0 means shift to left shift, shift < 0 means right + /// shift. /// /// In fact it is multiplying on 10^shift. pub fn shift(self, shift: isize) -> Res { @@ -1564,7 +1572,8 @@ impl Decimal { Decimal::from_bytes_with_word_buf(s, WORD_BUF_LEN) } - /// Returns a `Decimal` from a given bytes slice buffer and specified buffer length + /// Returns a `Decimal` from a given bytes slice buffer and specified buffer + /// length /// /// # Notes /// @@ -1574,7 +1583,7 @@ impl Decimal { fn from_bytes_with_word_buf(s: &[u8], word_buf_len: u8) -> Result> { // trim whitespace let mut bs = match s.iter().position(|c| !c.is_ascii_whitespace()) { - //TODO: return badnumber + // TODO: return badnumber None => return Err(box_err!("\"{}\" is empty", escape(s))), Some(pos) => &s[pos..], }; @@ -1618,7 +1627,7 @@ impl Decimal { word += u32::from(c - b'0') * TEN_POW[inner_idx]; inner_idx += 1; if inner_idx == DIGITS_PER_WORD as usize { - //TODO overflow + // TODO overflow word_idx -= 1; d.word_buf[word_idx] = word; word = 0; @@ -2245,7 +2254,8 @@ pub trait DecimalDecoder: NumberDecoder { Ok(d) } - /// `read_decimal_from_chunk` decode Decimal encoded by `write_decimal_to_chunk`. + /// `read_decimal_from_chunk` decode Decimal encoded by + /// `write_decimal_to_chunk`. fn read_decimal_from_chunk(&mut self) -> Result { let buf = self.read_bytes(DECIMAL_STRUCT_SIZE)?; let d = unsafe { @@ -2457,12 +2467,15 @@ mod tests { Ok(Decimal::from_str("-18446744073709552000").unwrap()), ), // FIXME: because of rust's bug, - // (1<<64)(18446744073709551616), (1<<65)(36893488147419103232) can not be represent by f64 - // so these cases can not pass + // (1<<64)(18446744073709551616), (1<<65)(36893488147419103232) can not be represent + // by f64 so these cases can not pass // (18446744073709551616.0, Ok(Decimal::from_str("18446744073709551616").unwrap())), // (-18446744073709551616.0, Ok(Decimal::from_str("-18446744073709551616").unwrap())), // (36893488147419103000.0, Ok(Decimal::from_str("36893488147419103000.0").unwrap())), - // (-36893488147419103000.0, Ok(Decimal::from_str("-36893488147419103000.0").unwrap())), + // ( + // -36893488147419103000.0, + // Ok(Decimal::from_str("-36893488147419103000.0").unwrap()) + // ), ( 36893488147419103000.0, Ok(Decimal::from_str("36893488147419103000.0").unwrap()), diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index e151c8fd0c5..370467b9928 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -150,9 +150,9 @@ mod parser { Ok((rest, hhmmss)) } - /// A string can match datetime format only if it starts with a series of digits - /// whose length matches the full format of DateTime literal (12, 14) - /// or the string starts with a date literal. + /// A string can match datetime format only if it starts with a series of + /// digits whose length matches the full format of DateTime literal (12, + /// 14) or the string starts with a date literal. fn format_can_match_datetime(input: &str) -> IResult<(), (), ()> { let (rest, digits) = digit1(input)?; @@ -253,8 +253,9 @@ mod parser { )) }); - // In order to keep compatible with TiDB, when input string can only be partially parsed by `hhmmss_compact` - // and it can match the datetime format, we fallback to parse it using datetime format. + // In order to keep compatible with TiDB, when input string can only be + // partially parsed by `hhmmss_compact` and it can match the datetime + // format, we fallback to parse it using datetime format. if truncated_parse && fallback_to_datetime { return hhmmss_datetime(ctx, rest, fsp).map_or(None, |(_, duration)| Some(duration)); } @@ -363,7 +364,8 @@ impl Duration { } /// Returns the number of seconds contained by this Duration as f64. - /// The returned value does include the fractional (nanosecond) part of the duration. + /// The returned value does include the fractional (nanosecond) part of the + /// duration. #[inline] pub fn to_secs_f64(self) -> f64 { self.nanos as f64 / NANOS_PER_SEC as f64 @@ -507,7 +509,8 @@ impl Duration { Ok(Duration { nanos, fsp }) } - /// Checked duration addition. Computes self + rhs, returning None if overflow occurred. + /// Checked duration addition. Computes self + rhs, returning None if + /// overflow occurred. pub fn checked_add(self, rhs: Duration) -> Option { let nanos = self.nanos.checked_add(rhs.nanos)?; check_nanos(nanos).ok()?; @@ -517,7 +520,8 @@ impl Duration { }) } - /// Checked duration subtraction. Computes self - rhs, returning None if overflow occurred. + /// Checked duration subtraction. Computes self - rhs, returning None if + /// overflow occurred. pub fn checked_sub(self, rhs: Duration) -> Option { let nanos = self.nanos.checked_sub(rhs.nanos)?; check_nanos(nanos).ok()?; diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index 1cad179b475..fe8bb2c35d7 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -145,8 +145,8 @@ impl<'a> PartialOrd for JsonRef<'a> { let left_data = self.as_f64(); let right_data = right.as_f64(); - // tidb treats boolean as integer, but boolean is different from integer in JSON. - // so we need convert them to same type and then compare. + // tidb treats boolean as integer, but boolean is different from integer in + // JSON. so we need convert them to same type and then compare. if let (Ok(left), Ok(right)) = (left_data, right_data) { return left.partial_cmp(&right); } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs index bc867904fd6..f7c1198c542 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs @@ -7,9 +7,10 @@ use super::{ }; impl<'a> JsonRef<'a> { - /// `extract` receives several path expressions as arguments, matches them in j, and returns - /// the target JSON matched any path expressions, which may be autowrapped as an array. - /// If there is no any expression matched, it returns None. + /// `extract` receives several path expressions as arguments, matches them + /// in j, and returns the target JSON matched any path expressions, which + /// may be autowrapped as an array. If there is no any expression matched, + /// it returns None. /// /// See `Extract()` in TiDB `json.binary_function.go` pub fn extract(&self, path_expr_list: &[PathExpression]) -> Result> { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_keys.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_keys.rs index 96bc9aaf56e..68c361321ad 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_keys.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_keys.rs @@ -5,7 +5,8 @@ use std::str; use super::{super::Result, path_expr::PathExpression, Json, JsonRef, JsonType}; impl<'a> JsonRef<'a> { - /// Evaluates a (possibly empty) list of values and returns a JSON array containing those values specified by `path_expr_list` + /// Evaluates a (possibly empty) list of values and returns a JSON array + /// containing those values specified by `path_expr_list` pub fn keys(&self, path_expr_list: &[PathExpression]) -> Result> { if !path_expr_list.is_empty() { if path_expr_list.len() > 1 { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_merge.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_merge.rs index 3bccdce7017..627daf77722 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_merge.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_merge.rs @@ -13,7 +13,8 @@ impl Json { /// 1. adjacent arrays are merged to a single array; /// 2. adjacent object are merged to a single object; /// 3. a scalar value is autowrapped as an array before merge; - /// 4. an adjacent array and object are merged by autowrapping the object as an array. + /// 4. an adjacent array and object are merged by autowrapping the object as + /// an array. /// /// See `MergeBinary()` in TiDB `json/binary_function.go` #[allow(clippy::comparison_chain)] diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index 2b36a4b89d0..7251f5477f6 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -54,7 +54,6 @@ //! // lengths up to 127, 2 bytes to represent //! // lengths up to 16383, and so on... //! ``` -//! mod binary; mod comparison; @@ -432,7 +431,8 @@ impl ConvertTo for i64 { impl ConvertTo for f64 { #[inline] fn convert(&self, _: &mut EvalContext) -> Result { - // FIXME: `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we return `DOUBLE` now. + // FIXME: `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we + // return `DOUBLE` now. let mut value = vec![0; F64_SIZE]; NumberCodec::encode_f64_le(&mut value, *self); Ok(Json { @@ -445,7 +445,8 @@ impl ConvertTo for f64 { impl ConvertTo for Real { #[inline] fn convert(&self, _: &mut EvalContext) -> Result { - // FIXME: `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we return `DOUBLE` now. + // FIXME: `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we + // return `DOUBLE` now. let mut value = vec![0; F64_SIZE]; NumberCodec::encode_f64_le(&mut value, self.into_inner()); Ok(Json { @@ -458,7 +459,8 @@ impl ConvertTo for Real { impl ConvertTo for Decimal { #[inline] fn convert(&self, ctx: &mut EvalContext) -> Result { - // FIXME: `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we return `DOUBLE` now. + // FIXME: `select json_type(cast(1111.11 as json))` should return `DECIMAL`, we + // return `DOUBLE` now. let val: f64 = self.convert(ctx)?; val.convert(ctx) } @@ -589,7 +591,8 @@ mod tests { ("{}", ERR_TRUNCATE_WRONG_VALUE), ("[]", ERR_TRUNCATE_WRONG_VALUE), ]; - // avoid to use EvalConfig::default_for_test() that set Flag::IGNORE_TRUNCATE as true + // avoid to use EvalConfig::default_for_test() that set Flag::IGNORE_TRUNCATE as + // true let mut ctx = EvalContext::new(Arc::new(EvalConfig::new())); for (jstr, exp) in test_cases { let json: Json = jstr.parse().unwrap(); diff --git a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs index 5118da55377..ecdec8adad4 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs @@ -52,7 +52,8 @@ impl<'a> BinaryModifier<'a> { self.rebuild() } - /// Replaces the existing value JSON specified by the expression path with `new` + /// Replaces the existing value JSON specified by the expression path with + /// `new` pub fn replace(mut self, path: &PathExpression, new: Json) -> Result { let result = extract_json(self.old, path.legs.as_slice())?; if result.is_empty() { @@ -63,8 +64,8 @@ impl<'a> BinaryModifier<'a> { self.rebuild() } - /// Inserts a `new` into `old` JSON document by given expression path without replacing - /// existing values + /// Inserts a `new` into `old` JSON document by given expression path + /// without replacing existing values pub fn insert(mut self, path: &PathExpression, new: Json) -> Result { let result = extract_json(self.old, path.legs.as_slice())?; if !result.is_empty() { @@ -97,7 +98,8 @@ impl<'a> BinaryModifier<'a> { for i in 0..elem_count { elems.push(parent_node.array_get_elem(i)?); } - // We can ignore the idx in the PathLeg here since we have checked the path-value existence + // We can ignore the idx in the PathLeg here since we have checked the + // path-value existence elems.push(new.as_ref()); self.new_value = Some(Json::from_ref_array(elems)?); } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs b/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs index 09e524fe373..afb9cafff67 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs @@ -15,6 +15,7 @@ // 2) double asterisk(**) could not be last leg; // // Examples: +// ``` // select json_extract('{"a": "b", "c": [1, "2"]}', '$.a') -> "b" // select json_extract('{"a": "b", "c": [1, "2"]}', '$.c') -> [1, "2"] // select json_extract('{"a": "b", "c": [1, "2"]}', '$.a', '$.c') -> ["b", [1, "2"]] @@ -22,6 +23,7 @@ // select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[2]') -> NULL // select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[*]') -> [1, "2"] // select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]] +// ``` use std::ops::Index; @@ -33,7 +35,8 @@ use crate::codec::Result; pub const PATH_EXPR_ASTERISK: &str = "*"; // [a-zA-Z_][a-zA-Z0-9_]* matches any identifier; -// "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped quotes. +// "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped +// quotes. const PATH_EXPR_LEG_RE_STR: &str = r#"(\.\s*([a-zA-Z_][a-zA-Z0-9_]*|\*|"[^"\\]*(\\.[^"\\]*)*")|(\[\s*([0-9]+|\*)\s*\])|\*\*)"#; @@ -135,7 +138,8 @@ pub fn parse_json_path_expr(path_expr: &str) -> Result { legs.push(PathLeg::DoubleAsterisk); } } - // Check `!expr.is_empty()` here because "$" is a valid path to specify the current JSON. + // Check `!expr.is_empty()` here because "$" is a valid path to specify the + // current JSON. if (last_end == 0) && (!expr.is_empty()) { return Err(box_err!("Invalid JSON path: {}", path_expr)); } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs index 984bb151323..1b848c3534f 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs @@ -58,7 +58,8 @@ impl MySQLFormatter { } impl<'a> ToString for JsonRef<'a> { - /// This function is a simple combination and rewrite of serde_json's `to_writer_pretty` + /// This function is a simple combination and rewrite of serde_json's + /// `to_writer_pretty` fn to_string(&self) -> String { let mut writer = Vec::with_capacity(128); let mut ser = JsonSerializer::with_formatter(&mut writer, MySQLFormatter::new()); diff --git a/components/tidb_query_datatype/src/codec/mysql/time/extension.rs b/components/tidb_query_datatype/src/codec/mysql/time/extension.rs index 816d189c999..7cc233e92d1 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/extension.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/extension.rs @@ -59,11 +59,12 @@ impl DateTimeExtension for Time { } /// returns the week of year and year. should not be called directly. - /// when monday_first == true, Monday is considered as the first day in the week, - /// otherwise Sunday. - /// when week_year == true, week is from 1 to 53, otherwise from 0 to 53. - /// when first_weekday == true, the week that contains the first 'first-day-of-week' is week 1, - /// otherwise weeks are numbered according to ISO 8601:1988. + /// - when monday_first == true, Monday is considered as the first day in + /// the week, otherwise Sunday. + /// - when week_year == true, week is from 1 to 53, otherwise from 0 to 53. + /// - when first_weekday == true, the week that contains the first + /// 'first-day-of-week' is week 1, otherwise weeks are numbered according + /// to ISO 8601:1988. fn calc_year_week( &self, monday_first: bool, @@ -104,8 +105,8 @@ impl DateTimeExtension for Time { (year, week) } - /// returns the week of year according to week mode. should not be called directly. - /// implements TiDB calcWeek() + /// returns the week of year according to week mode. should not be called + /// directly. implements TiDB calcWeek() fn calc_year_week_by_week_mode(&self, week_mode: WeekMode) -> (i32, i32) { let mode = week_mode.to_normalized(); let monday_first = mode.contains(WeekMode::BEHAVIOR_MONDAY_FIRST); diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 29b66725e2a..5d387f1cdff 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -73,9 +73,9 @@ fn last_day_of_month(year: u32, month: u32) -> u32 { /// assert_eq!([2019, 12, 2, 0, 0, 0, 0], parts); /// ``` /// When year, month or day is zero, there can not have a carry. -/// e.g.: `"1998-11-00 23:59:59.999" (fsp = 2, round = true)`, in `hms` it contains a carry, -/// however, the `day` is 0, which is invalid in `MySQL`. When thoese cases encountered, return -/// None. +/// e.g.: `"1998-11-00 23:59:59.999" (fsp = 2, round = true)`, in `hms` it +/// contains a carry, however, the `day` is 0, which is invalid in `MySQL`. When +/// thoese cases encountered, return None. fn round_components(parts: &mut [u32]) -> Option<()> { debug_assert_eq!(parts.len(), 7); let modulus = [ @@ -113,9 +113,10 @@ fn chrono_datetime( second: u32, micro: u32, ) -> Result> { - // NOTE: We are not using `tz::from_ymd_opt` as suggested in chrono's README due to - // chronotope/chrono-tz #23. - // As a workaround, we first build a NaiveDate, then attach time zone information to it. + // NOTE: We are not using `tz::from_ymd_opt` as suggested in chrono's README due + // to chronotope/chrono-tz #23. + // As a workaround, we first build a NaiveDate, then attach time zone + // information to it. NaiveDate::from_ymd_opt(year as i32, month, day) .and_then(|date| date.and_hms_opt(hour, minute, second)) .and_then(|t| t.checked_add_signed(chrono::Duration::microseconds(i64::from(micro)))) @@ -344,7 +345,8 @@ mod parser { /// ```ignore /// split_components_with_tz(b"2020-12-24T15:37:50+0800")?.1 == Some(480*60) /// ``` - /// the second value if not None indicates the offset in seconds of the timezone parsed + /// the second value if not None indicates the offset in seconds of the + /// timezone parsed fn split_components_with_tz(input: &str) -> Option<(Vec<&[u8]>, Option)> { let mut buffer = input.as_bytes(); @@ -508,8 +510,9 @@ mod parser { } } - /// Try to parse a datetime string `input` without fractional part and separators. - /// return an array that stores `[year, month, day, hour, minute, second, 0]` + /// Try to parse a datetime string `input` without fractional part and + /// separators. return an array that stores `[year, month, day, hour, + /// minute, second, 0]` fn parse_whole(input: &[u8]) -> Option<[u32; 7]> { let mut parts = [0u32; 7]; @@ -535,8 +538,8 @@ mod parser { Some(parts) } - /// Try to parse a fractional part from `input` with `fsp`, round the result if `round` is - /// true. + /// Try to parse a fractional part from `input` with `fsp`, round the result + /// if `round` is true. /// NOTE: This function assumes that `fsp` is in range: [0, 6]. fn parse_frac(input: &[u8], fsp: u8, round: bool) -> Option<(bool, u32)> { debug_assert!(fsp < 7); @@ -568,8 +571,8 @@ mod parser { let trimmed = input.trim(); (!trimmed.is_empty()).as_option()?; - // to support ISO8601 and MySQL's time zone support, we further parse the following formats - // 2020-12-17T11:55:55Z + // to support ISO8601 and MySQL's time zone support, we further parse the + // following formats 2020-12-17T11:55:55Z // 2020-12-17T11:55:55+0800 // 2020-12-17T11:55:55-08 // 2020-12-17T11:55:55+02:00 @@ -835,8 +838,8 @@ fn handle_invalid_date(ctx: &mut EvalContext, mut args: TimeArgs) -> Result for Time { return Ok(Duration::zero()); } let seconds = i64::from(self.hour() * 3600 + self.minute() * 60 + self.second()); - // `microsecond` returns the number of microseconds since the whole non-leap second. - // Such as for 2019-09-22 07:21:22.670936103 UTC, + // `microsecond` returns the number of microseconds since the whole non-leap + // second. Such as for 2019-09-22 07:21:22.670936103 UTC, // it will return 670936103. let microsecond = i64::from(self.micro()); Duration::from_micros(seconds * 1_000_000 + microsecond, self.fsp() as i8) @@ -2606,7 +2609,8 @@ mod tests { for case in cases { // Enable NO_ZERO_DATE, STRICT_MODE and ALLOW_INVALID_DATE. - // If an invalid date (converted to zero-date) is encountered, an error is returned. + // If an invalid date (converted to zero-date) is encountered, an error is + // returned. let mut ctx = EvalContext::from(TimeEnv { no_zero_date: true, strict_mode: true, @@ -2623,7 +2627,8 @@ mod tests { let cases = vec!["2019-01-00", "2019-00-01"]; for &case in cases.iter() { - // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is produced. + // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is + // produced. let mut ctx = EvalContext::from(TimeEnv { no_zero_in_date: true, ..TimeEnv::default() diff --git a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs index 1efb2f3997c..7b90e96b78c 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs @@ -4,8 +4,8 @@ use std::{fmt, str::FromStr}; use chrono::*; -/// A time zone represented by either offset (i.e. +8) or name (i.e. Asia/Shanghai). In addition, -/// local time zone is also valid. +/// A time zone represented by either offset (i.e. +8) or name (i.e. +/// Asia/Shanghai). In addition, local time zone is also valid. #[derive(Clone)] pub enum Tz { /// A time zone specified by offset seconds. @@ -26,8 +26,8 @@ impl Tz { FixedOffset::east_opt(secs as i32).map(Tz::Offset) } - /// Constructs a time zone from the name. If the specified time zone name is `system`, - /// a local time zone will be constructed. + /// Constructs a time zone from the name. If the specified time zone name is + /// `system`, a local time zone will be constructed. pub fn from_tz_name(name: &str) -> Option { if name.to_lowercase() == "system" { Some(Tz::local()) diff --git a/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs b/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs index 25c651e1243..2e4a0703d4a 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs @@ -123,15 +123,17 @@ pub trait V1CompatibleEncoder: DatumFlagAndPayloadEncoder { impl V1CompatibleEncoder for T {} -/// These tests mainly focus on transfer the v2 encoding to v1-compatible encoding. +/// These tests mainly focus on transfer the v2 encoding to v1-compatible +/// encoding. /// /// The test path is: -/// 1. Encode value using v2 -/// 2. Use `V1CompatibleEncoder` to transfer the encoded bytes from v2 to v1-compatible -/// 3. Use `RawDatumDecoder` decode the encoded bytes, check the result. +/// - Encode value using v2 +/// - Use `V1CompatibleEncoder` to transfer the encoded bytes from v2 to +/// v1-compatible +/// - Use `RawDatumDecoder` decode the encoded bytes, check the result. /// -/// Note: a value encoded using v2 then transfer to v1-compatible encoding, is not always equals the -/// encoded-bytes using v1 directly. +/// Note: a value encoded using v2 then transfer to v1-compatible encoding, is +/// not always equals the encoded-bytes using v1 directly. #[cfg(test)] mod tests { use std::{f64, i16, i32, i64, i8, u16, u32, u64, u8}; diff --git a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs index 09611adfbf6..1ee5104b723 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs @@ -1,18 +1,20 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! This `encoder` module is only used for test, so the implementation is very straightforward. +//! This `encoder` module is only used for test, so the implementation is very +//! straightforward. //! //! According to //! //! The row format is: -//! +//! ``` //! | version | flag | number_of_non_null_columns | number_of_null_columns | non_null_column_ids | null_column_ids | value_offsets | values | //! |---------| ---- | -------------------------- | ---------------------- | ------------------- | --------------- | ------------- | ------ | -//! +//! ``` //! length about each field: //! //! * version: 1 byte -//! * flag: 1 byte, when there's id greater than 255 or the total size of the values is greater than 65535 , value is 1, otherwise 0 +//! * flag: 1 byte, when there's id greater than 255 or the total size of the +//! values is greater than 65535 , value is 1, otherwise 0 //! * number of non-null values: 2 bytes //! * number of null values: 2 bytes //! * non-null column ids: when flag == 1 (big), id is 4 bytes, otherwise 1 byte diff --git a/components/tidb_query_datatype/src/codec/row/v2/mod.rs b/components/tidb_query_datatype/src/codec/row/v2/mod.rs index 2265cd3803d..b0cec291410 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/mod.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/mod.rs @@ -3,7 +3,8 @@ use bitflags::bitflags; // Prior to v2, the first byte is not version code, but datum type. -// From v2, it's used for version code, and the value starts from 128, to be compatible. +// From v2, it's used for version code, and the value starts from 128, to be +// compatible. pub const CODEC_VERSION: u8 = 128; bitflags! { diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index 66aa4df0902..94e9dd0a9ae 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -61,12 +61,13 @@ impl RowSlice<'_> { /// Search `id` in non-null ids /// - /// Returns the `start` position and `offset` in `values` field if found, otherwise returns `None` + /// Returns the `start` position and `offset` in `values` field if found, + /// otherwise returns `None` /// /// # Errors /// - /// If the id is found with no offset(It will only happen when the row data is broken), - /// `Error::ColumnOffset` will be returned. + /// If the id is found with no offset(It will only happen when the row data + /// is broken), `Error::ColumnOffset` will be returned. pub fn search_in_non_null_ids(&self, id: i64) -> Result> { if !self.id_valid(id) { return Ok(None); @@ -170,7 +171,8 @@ impl RowSlice<'_> { /// Decodes `len` number of ints from `buf` in little endian /// /// Note: -/// This method is only implemented on little endianness currently, since x86 use little endianness. +/// This method is only implemented on little endianness currently, since x86 +/// use little endianness. #[cfg(target_endian = "little")] #[inline] fn read_le_bytes<'a, T>(buf: &mut &'a [u8], len: usize) -> Result> @@ -280,7 +282,7 @@ mod tests { let cols = vec![ Column::new(1, 1000), Column::new(356, 2), - Column::new(33, ScalarValue::Int(None)), //0x21 + Column::new(33, ScalarValue::Int(None)), // 0x21 Column::new(3, 3), Column::new(64123, 5), ]; diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index c49fefb4e73..2cb2f055842 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -100,8 +100,8 @@ pub fn check_index_key(key: &[u8]) -> Result<()> { check_key_type(key, INDEX_PREFIX_SEP) } -/// `check_key_type` checks if the key is the type we want, `wanted_type` should be -/// `table::RECORD_PREFIX_SEP` or `table::INDEX_PREFIX_SEP` . +/// `check_key_type` checks if the key is the type we want, `wanted_type` should +/// be `table::RECORD_PREFIX_SEP` or `table::INDEX_PREFIX_SEP` . #[inline] fn check_key_type(key: &[u8], wanted_type: &[u8]) -> Result<()> { let mut buf = key; @@ -184,7 +184,8 @@ pub fn encode_common_handle_for_test(table_id: i64, handle: &[u8]) -> Vec { key } -/// `encode_column_key` encodes the table id, row handle and column id into a byte array. +/// `encode_column_key` encodes the table id, row handle and column id into a +/// byte array. pub fn encode_column_key(table_id: i64, handle: i64, column_id: i64) -> Vec { let mut key = Vec::with_capacity(RECORD_ROW_KEY_LEN + ID_LEN); key.append_table_record_prefix(table_id).unwrap(); @@ -391,7 +392,8 @@ impl RowColsDict { self.cols.insert(cid, RowColMeta::new(offset, length)); } - /// Gets binary of cols, keeps the original order, and returns one slice and cols' end offsets. + /// Gets binary of cols, keeps the original order, and returns one slice and + /// cols' end offsets. pub fn get_column_values_and_end_offsets(&self) -> (&[u8], Vec) { let mut start = self.value.len(); let mut length = 0; @@ -789,7 +791,7 @@ mod tests { range.set_start(small_key.clone()); range.set_end(large_key.clone()); assert!(check_table_ranges(&[range]).is_ok()); - //test range.start > range.end + // test range.start > range.end let mut range = KeyRange::default(); range.set_end(small_key.clone()); range.set_start(large_key); diff --git a/components/tidb_query_datatype/src/def/eval_type.rs b/components/tidb_query_datatype/src/def/eval_type.rs index 16ec996b531..9addab99e56 100644 --- a/components/tidb_query_datatype/src/def/eval_type.rs +++ b/components/tidb_query_datatype/src/def/eval_type.rs @@ -4,9 +4,9 @@ use std::fmt; /// Function implementations' parameter data types. /// -/// It is similar to the `EvalType` in TiDB, but doesn't provide type `Timestamp`, which is -/// handled by the same type as `DateTime` here, instead of a new type. Also, `String` is -/// called `Bytes` here to be less confusing. +/// It is similar to the `EvalType` in TiDB, but doesn't provide type +/// `Timestamp`, which is handled by the same type as `DateTime` here, instead +/// of a new type. Also, `String` is called `Bytes` here to be less confusing. #[derive(Debug, PartialEq, Clone, Copy)] pub enum EvalType { Int, @@ -23,8 +23,8 @@ pub enum EvalType { impl EvalType { /// Converts `EvalType` into one of the compatible `FieldTypeTp`s. /// - /// This function should be only useful in test scenarios that only cares about `EvalType` but - /// accepts a `FieldTypeTp`. + /// This function should be only useful in test scenarios that only cares + /// about `EvalType` but accepts a `FieldTypeTp`. pub fn into_certain_field_type_tp_for_test(self) -> crate::FieldTypeTp { match self { EvalType::Int => crate::FieldTypeTp::LongLong, @@ -49,7 +49,8 @@ impl fmt::Display for EvalType { impl std::convert::TryFrom for EvalType { type Error = crate::DataTypeError; - // Succeeds for all field types supported as eval types, fails for unsupported types. + // Succeeds for all field types supported as eval types, fails for unsupported + // types. fn try_from(tp: crate::FieldTypeTp) -> Result { let eval_type = match tp { crate::FieldTypeTp::Tiny @@ -76,7 +77,8 @@ impl std::convert::TryFrom for EvalType { | crate::FieldTypeTp::Null => EvalType::Bytes, crate::FieldTypeTp::Enum => EvalType::Enum, _ => { - // TODO: we need to handle FieldTypeTp::{Enum, Set} after we implement encode and decode. + // TODO: we need to handle FieldTypeTp::{Enum, Set} after we implement encode + // and decode. return Err(crate::DataTypeError::UnsupportedType { name: tp.to_string(), }); diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index b52065d8a72..ac89ad53318 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -10,9 +10,10 @@ use crate::error::DataTypeError; /// /// `FieldType` is the field type of a column defined by schema. /// -/// `ColumnInfo` describes a column. It contains `FieldType` and some other column specific -/// information. However for historical reasons, fields in `FieldType` (for example, `tp`) -/// are flattened into `ColumnInfo`. Semantically these fields are identical. +/// `ColumnInfo` describes a column. It contains `FieldType` and some other +/// column specific information. However for historical reasons, fields in +/// `FieldType` (for example, `tp`) are flattened into `ColumnInfo`. +/// Semantically these fields are identical. /// /// Please refer to [mysql/type.go](https://github.com/pingcap/parser/blob/master/mysql/type.go). #[derive(PartialEq, Debug, Clone, Copy)] @@ -117,9 +118,9 @@ pub enum Collation { impl Collation { /// Parse from collation id. /// - /// These are magic numbers defined in tidb, where positive numbers are for legacy - /// compatibility, and all new clusters with padding configuration enabled will - /// use negative numbers to indicate the padding behavior. + /// These are magic numbers defined in tidb, where positive numbers are for + /// legacy compatibility, and all new clusters with padding configuration + /// enabled will use negative numbers to indicate the padding behavior. pub fn from_i32(n: i32) -> Result { match n { -33 | -45 => Ok(Collation::Utf8Mb4GeneralCi), @@ -215,8 +216,9 @@ pub trait FieldTypeAccessor { fn set_collation(&mut self, collation: Collation) -> &mut dyn FieldTypeAccessor; - /// Convert reference to `FieldTypeAccessor` interface. Useful when an implementer - /// provides inherent methods with the same name as the accessor trait methods. + /// Convert reference to `FieldTypeAccessor` interface. Useful when an + /// implementer provides inherent methods with the same name as the accessor + /// trait methods. fn as_accessor(&self) -> &dyn FieldTypeAccessor where Self: Sized, @@ -232,8 +234,8 @@ pub trait FieldTypeAccessor { self as &mut dyn FieldTypeAccessor } - /// Whether this type is a hybrid type, which can represent different types of value in - /// specific context. + /// Whether this type is a hybrid type, which can represent different types + /// of value in specific context. /// /// Please refer to `Hybrid` in TiDB. #[inline] @@ -254,7 +256,8 @@ pub trait FieldTypeAccessor { || tp == FieldTypeTp::LongBlob } - /// Whether this type is a char-like type like a string type or a varchar type. + /// Whether this type is a char-like type like a string type or a varchar + /// type. /// /// Please refer to `IsTypeChar` in TiDB. #[inline] @@ -263,7 +266,8 @@ pub trait FieldTypeAccessor { tp == FieldTypeTp::String || tp == FieldTypeTp::VarChar } - /// Whether this type is a varchar-like type like a varstring type or a varchar type. + /// Whether this type is a varchar-like type like a varstring type or a + /// varchar type. /// /// Please refer to `IsTypeVarchar` in TiDB. #[inline] diff --git a/components/tidb_query_datatype/src/expr/ctx.rs b/components/tidb_query_datatype/src/expr/ctx.rs index f92c561b013..0e488689fce 100644 --- a/components/tidb_query_datatype/src/expr/ctx.rs +++ b/components/tidb_query_datatype/src/expr/ctx.rs @@ -89,8 +89,8 @@ impl EvalConfig { } else if req.has_time_zone_offset() { box_try!(eval_cfg.set_time_zone_by_offset(req.get_time_zone_offset())); } else { - // This should not be reachable. However we will not panic here in case - // of compatibility issues. + // This should not be reachable. However we will not panic here in + // case of compatibility issues. } if req.has_max_warning_count() { eval_cfg.set_max_warning_cnt(req.get_max_warning_count() as usize); @@ -316,8 +316,8 @@ impl EvalContext { } /// Indicates whether values less than 0 should be clipped to 0 for unsigned - /// integer types. This is the case for `insert`, `update`, `alter table` and - /// `load data infile` statements, when not in strict SQL mode. + /// integer types. This is the case for `insert`, `update`, `alter table` + /// and `load data infile` statements, when not in strict SQL mode. /// see pub fn should_clip_to_zero(&self) -> bool { self.cfg.flag.contains(Flag::IN_INSERT_STMT) @@ -370,37 +370,37 @@ mod tests { fn test_handle_division_by_zero() { let cases = vec![ //(flag,sql_mode,is_ok,is_empty) - (Flag::empty(), SqlMode::empty(), true, false), //warning + (Flag::empty(), SqlMode::empty(), true, false), // warning ( Flag::IN_INSERT_STMT, SqlMode::ERROR_FOR_DIVISION_BY_ZERO, true, false, - ), //warning + ), // warning ( Flag::IN_UPDATE_OR_DELETE_STMT, SqlMode::ERROR_FOR_DIVISION_BY_ZERO, true, false, - ), //warning + ), // warning ( Flag::IN_UPDATE_OR_DELETE_STMT, SqlMode::ERROR_FOR_DIVISION_BY_ZERO | SqlMode::STRICT_ALL_TABLES, false, true, - ), //error + ), // error ( Flag::IN_UPDATE_OR_DELETE_STMT, SqlMode::STRICT_ALL_TABLES, true, true, - ), //ok + ), // ok ( Flag::IN_UPDATE_OR_DELETE_STMT | Flag::DIVIDED_BY_ZERO_AS_WARNING, SqlMode::ERROR_FOR_DIVISION_BY_ZERO | SqlMode::STRICT_ALL_TABLES, true, false, - ), //warning + ), // warning ]; for (flag, sql_mode, is_ok, is_empty) in cases { let mut cfg = EvalConfig::new(); @@ -415,12 +415,12 @@ mod tests { fn test_handle_invalid_time_error() { let cases = vec![ //(flag,strict_sql_mode,is_ok,is_empty) - (Flag::empty(), false, true, false), //warning - (Flag::empty(), true, true, false), //warning - (Flag::IN_INSERT_STMT, false, true, false), //warning - (Flag::IN_UPDATE_OR_DELETE_STMT, false, true, false), //warning - (Flag::IN_UPDATE_OR_DELETE_STMT, true, false, true), //error - (Flag::IN_INSERT_STMT, true, false, true), //error + (Flag::empty(), false, true, false), // warning + (Flag::empty(), true, true, false), // warning + (Flag::IN_INSERT_STMT, false, true, false), // warning + (Flag::IN_UPDATE_OR_DELETE_STMT, false, true, false), // warning + (Flag::IN_UPDATE_OR_DELETE_STMT, true, false, true), // error + (Flag::IN_INSERT_STMT, true, false, true), // error ]; for (flag, strict_sql_mode, is_ok, is_empty) in cases { let err = Error::invalid_time_format(""); diff --git a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs index c5859e48338..942e61087d3 100644 --- a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs @@ -32,8 +32,8 @@ macro_rules! match_template_hashable { }} } -/// Fast Hash Aggregation Executor uses hash when comparing group key. It only supports one -/// group by column. +/// Fast Hash Aggregation Executor uses hash when comparing group key. It only +/// supports one group by column. pub struct BatchFastHashAggregationExecutor( AggregationExecutor, ); @@ -72,8 +72,8 @@ impl BatchExecutor for BatchFastHashAggregationExecutor } } -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchFastHashAggregationExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -198,8 +198,8 @@ impl BatchFastHashAggregationExecutor { /// All groups. enum Groups { // The value of each hash table is the start index in `FastHashAggregationImpl::states` - // field. When there are new groups (i.e. new entry in the hash table), the states of the groups - // will be appended to `states`. + // field. When there are new groups (i.e. new entry in the hash table), the states of the + // groups will be appended to `states`. Int(HashMap, usize>), Real(HashMap, usize>), Bytes(HashMap, usize>), @@ -388,7 +388,8 @@ impl AggregationExecutorImpl for FastHashAggregationImp Ok(vec![group_by_column]) } - /// Fast hash aggregation can output aggregate results only if the source is drained. + /// Fast hash aggregation can output aggregate results only if the source is + /// drained. #[inline] fn is_partial_results_ready(&self) -> bool { false @@ -481,7 +482,8 @@ mod tests { #[test] fn test_it_works_integration() { - // This test creates a hash aggregation executor with the following aggregate functions: + // This test creates a hash aggregation executor with the following aggregate + // functions: // - COUNT(1) // - COUNT(col_1 + 5.0) // - AVG(col_0) @@ -548,18 +550,20 @@ mod tests { assert!(!r.is_drained.unwrap()); let mut r = exec.next_batch(1); - // col_0 + col_1 can result in [NULL, 9.0, 6.0], thus there will be three groups. + // col_0 + col_1 can result in [NULL, 9.0, 6.0], thus there will be three + // groups. assert_eq!(&r.logical_rows, &[0, 1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); assert_eq!(r.physical_columns.columns_len(), 5); // 4 result column, 1 group by column - // Let's check group by column first. Group by column is decoded in fast hash agg, - // but not decoded in slow hash agg. So decode it anyway. + // Let's check group by column first. Group by column is decoded in fast hash + // agg, but not decoded in slow hash agg. So decode it anyway. r.physical_columns[4] .ensure_all_decoded_for_test(&mut EvalContext::default(), &exec.schema()[4]) .unwrap(); - // The row order is not defined. Let's sort it by the group by column before asserting. + // The row order is not defined. Let's sort it by the group by column before + // asserting. let mut sort_column: Vec<(usize, _)> = r.physical_columns[4] .decoded() .to_real_vec() @@ -611,7 +615,8 @@ mod tests { #[test] fn test_group_by_a_constant() { - // This test creates a hash aggregation executor with the following aggregate functions: + // This test creates a hash aggregation executor with the following aggregate + // functions: // - COUNT(1) // - COUNT(col_1 + 5.0) // - AVG(col_0) @@ -707,7 +712,8 @@ mod tests { use tipb::ExprType; use tipb_helper::ExprDefBuilder; - // This test creates a hash aggregation executor with the following aggregate functions: + // This test creates a hash aggregation executor with the following aggregate + // functions: // - COUNT(col_0) // - AVG(col_1) // And group by: @@ -769,13 +775,14 @@ mod tests { assert_eq!(r.physical_columns.rows_len(), 3); assert_eq!(r.physical_columns.columns_len(), 4); // 3 result column, 1 group by column - // Let's check group by column first. Group by column is decoded in fast hash agg, - // but not decoded in slow hash agg. So decode it anyway. + // Let's check group by column first. Group by column is decoded in fast hash + // agg, but not decoded in slow hash agg. So decode it anyway. r.physical_columns[3] .ensure_all_decoded_for_test(&mut EvalContext::default(), &exec.schema()[3]) .unwrap(); - // The row order is not defined. Let's sort it by the group by column before asserting. + // The row order is not defined. Let's sort it by the group by column before + // asserting. let mut sort_column: Vec<(usize, _)> = r.physical_columns[3] .decoded() .to_bytes_vec() @@ -1079,7 +1086,8 @@ mod tests { #[test] fn test_group_by_enum_column() { - // This test creates a hash aggregation executor with the following aggregate functions: + // This test creates a hash aggregation executor with the following aggregate + // functions: // - COUNT(1) // And group by: // - col_0(enum_type) diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index ccf57f1235f..bcbf2b8f92b 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -31,8 +31,8 @@ use crate::interface::*; pub struct BatchIndexScanExecutor(ScanExecutor); -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we can +// omit the type when calling `check_supported`. impl BatchIndexScanExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -52,32 +52,33 @@ impl BatchIndexScanExecutor { unique: bool, is_scanned_range_aware: bool, ) -> Result { - // Note 1: `unique = true` doesn't completely mean that it is a unique index scan. Instead - // it just means that we can use point-get for this index. In the following scenarios - // `unique` will be `false`: + // Note 1: `unique = true` doesn't completely mean that it is a unique index + // scan. Instead it just means that we can use point-get for this index. + // In the following scenarios `unique` will be `false`: // - scan from a non-unique index // - scan from a unique index with like: where unique-index like xxx // - // Note 2: Unlike table scan executor, the accepted `columns_info` of index scan executor is - // strictly stipulated. The order of columns in the schema must be the same as index data - // stored and if PK handle is needed it must be placed as the last one. + // Note 2: Unlike table scan executor, the accepted `columns_info` of index scan + // executor is strictly stipulated. The order of columns in the schema must be + // the same as index data stored and if PK handle is needed it must be placed as + // the last one. // - // Note 3: Currently TiDB may send multiple PK handles to TiKV (but only the last one is - // real). We accept this kind of request for compatibility considerations, but will be - // forbidden soon. + // Note 3: Currently TiDB may send multiple PK handles to TiKV (but only the + // last one is real). We accept this kind of request for compatibility + // considerations, but will be forbidden soon. // - // Note 4: When process global indexes, an extra partition ID column with column ID - // `table::EXTRA_PARTITION_ID_COL_ID` will append to column info to indicate which partiton - // handles belong to. See https://github.com/pingcap/parser/pull/1010 for more information. + // Note 4: When process global indexes, an extra partition ID column with column + // ID `table::EXTRA_PARTITION_ID_COL_ID` will append to column info to indicate which partiton handles belong to. See https://github.com/pingcap/parser/pull/1010 for more information. // - // Note 5: When process a partitioned table's index under tidb_partition_prune_mode = 'dynamic' - // and with either an active transaction buffer or with a SelectLock/pessimistic lock, we - // need to return the physical table id since several partitions may be included in the - // range. + // Note 5: When process a partitioned table's index under + // tidb_partition_prune_mode = 'dynamic' and with either an active transaction + // buffer or with a SelectLock/pessimistic lock, we need to return the physical + // table id since several partitions may be included in the range. // // Note 6: Also int_handle (-1), EXTRA_PARTITION_ID_COL_ID (-2) and - // EXTRA_PHYSICAL_TABLE_ID_COL_ID (-3) must be requested in this order in columns_info! - // since current implementation looks for them backards for -3, -2, -1. + // EXTRA_PHYSICAL_TABLE_ID_COL_ID (-3) must be requested in this order in + // columns_info! since current implementation looks for them backwards for -3, + // -2, -1. let physical_table_id_column_cnt = columns_info.last().map_or(0, |ci| { (ci.get_column_id() == table::EXTRA_PHYSICAL_TABLE_ID_COL_ID) as usize }); @@ -209,7 +210,8 @@ struct IndexScanExecutorImpl { decode_handle_strategy: DecodeHandleStrategy, /// Number of partition ID columns, now it can only be 0 or 1. - /// Must be after all normal columns and handle, but before physical_table_id_column + /// Must be after all normal columns and handle, but before + /// physical_table_id_column pid_column_cnt: usize, /// Number of Physical Table ID columns, can only be 0 or 1. @@ -230,10 +232,11 @@ impl ScanExecutorImpl for IndexScanExecutorImpl { &mut self.context } - /// Constructs empty columns, with PK containing int handle in decoded format and the rest in raw format. + /// Constructs empty columns, with PK containing int handle in decoded + /// format and the rest in raw format. /// - /// Note: the structure of the constructed column is the same as table scan executor but due - /// to different reasons. + /// Note: the structure of the constructed column is the same as table scan + /// executor but due to different reasons. fn build_column_vec(&self, scan_rows: usize) -> LazyBatchColumnVec { let columns_len = self.schema.len(); let mut columns = Vec::with_capacity(columns_len); @@ -278,53 +281,55 @@ impl ScanExecutorImpl for IndexScanExecutorImpl { } // Value layout: (see https://docs.google.com/document/d/1Co5iMiaxitv3okJmLYLJxZYCNChcjzswJMRr-_45Eqg/edit?usp=sharing) - // +-- IndexValueVersion0 (with restore data, or common handle, or index is global) - // | - // | Layout: TailLen | Options | Padding | [IntHandle] | [UntouchedFlag] - // | Length: 1 | len(options) | len(padding) | 8 | 1 - // | - // | TailLen: len(padding) + len(IntHandle) + len(UntouchedFlag) - // | Options: Encode some value for new features, such as common handle, new collations or global index. - // | See below for more information. - // | Padding: Ensure length of value always >= 10. (or >= 11 if UntouchedFlag exists.) - // | IntHandle: Only exists when table use int handles and index is unique. - // | UntouchedFlag: Only exists when index is untouched. - // | - // +-- Old Encoding (without restore data, integer handle, local) - // | - // | Layout: [Handle] | [UntouchedFlag] - // | Length: 8 | 1 - // | - // | Handle: Only exists in unique index. - // | UntouchedFlag: Only exists when index is untouched. - // | - // | If neither Handle nor UntouchedFlag exists, value will be one single byte '0' (i.e. []byte{'0'}). - // | Length of value <= 9, use to distinguish from the new encoding. + // ```text + // +-- IndexValueVersion0 (with restore data, or common handle, or index is global) // | - // +-- IndexValueForClusteredIndexVersion1 - // | - // | Layout: TailLen | VersionFlag | Version | Options | [UntouchedFlag] - // | Length: 1 | 1 | 1 | len(options) | 1 - // | - // | TailLen: len(UntouchedFlag) - // | Options: Encode some value for new features, such as common handle, new collations or global index. - // | See below for more information. - // | UntouchedFlag: Only exists when index is untouched. - // | - // | Layout of Options: - // | - // | Segment: Common Handle | Global Index | New Collation + // | Layout: TailLen | Options | Padding | [IntHandle] | [UntouchedFlag] + // | Length: 1 | len(options) | len(padding) | 8 | 1 + // | + // | TailLen: len(padding) + len(IntHandle) + len(UntouchedFlag) + // | Options: Encode some value for new features, such as common handle, new collations or global index. + // | See below for more information. + // | Padding: Ensure length of value always >= 10. (or >= 11 if UntouchedFlag exists.) + // | IntHandle: Only exists when table use int handles and index is unique. + // | UntouchedFlag: Only exists when index is untouched. + // | + // +-- Old Encoding (without restore data, integer handle, local) + // | + // | Layout: [Handle] | [UntouchedFlag] + // | Length: 8 | 1 + // | + // | Handle: Only exists in unique index. + // | UntouchedFlag: Only exists when index is untouched. + // | + // | If neither Handle nor UntouchedFlag exists, value will be one single byte '0' (i.e. []byte{'0'}). + // | Length of value <= 9, use to distinguish from the new encoding. + // | + // +-- IndexValueForClusteredIndexVersion1 + // | + // | Layout: TailLen | VersionFlag | Version | Options | [UntouchedFlag] + // | Length: 1 | 1 | 1 | len(options) | 1 + // | + // | TailLen: len(UntouchedFlag) + // | Options: Encode some value for new features, such as common handle, new collations or global index. + // | See below for more information. + // | UntouchedFlag: Only exists when index is untouched. + // | + // | Layout of Options: + // | + // | Segment: Common Handle | Global Index | New Collation // | Layout: CHandle Flag | CHandle Len | CHandle | PidFlag | PartitionID | restoreData - // | Length: 1 | 2 | len(CHandle) | 1 | 8 | len(restoreData) - // | - // | Common Handle Segment: Exists when unique index used common handles. - // | Global Index Segment: Exists when index is global. - // | New Collation Segment: Exists when new collation is used and index or handle contains non-binary string. - // | In v4.0, restored data contains all the index values. For example, (a int, b char(10)) and index (a, b). - // | The restored data contains both the values of a and b. - // | In v5.0, restored data contains only non-binary data(except for char and _bin). In the above example, the restored data contains only the value of b. - // | Besides, if the collation of b is _bin, then restored data is an integer indicate the spaces are truncated. Then we use sortKey - // | and the restored data together to restore original data. + // | Length: 1 | 2 | len(CHandle) | 1 | 8 | len(restoreData) + // | + // | Common Handle Segment: Exists when unique index used common handles. + // | Global Index Segment: Exists when index is global. + // | New Collation Segment: Exists when new collation is used and index or handle contains non-binary string. + // | In v4.0, restored data contains all the index values. For example, (a int, b char(10)) and index (a, b). + // | The restored data contains both the values of a and b. + // | In v5.0, restored data contains only non-binary data(except for char and _bin). In the above example, the restored data contains only the value of b. + // | Besides, if the collation of b is _bin, then restored data is an integer indicate the spaces are truncated. Then we use sortKey + // | and the restored data together to restore original data. + // ``` #[inline] fn process_kv_pair( &mut self, @@ -435,8 +440,9 @@ impl IndexScanExecutorImpl { } // Process index values that are in old collation. - // NOTE: We should extract the index columns from the key first, and extract the handles from value if there is no handle in the key. - // Otherwise, extract the handles from the key. + // NOTE: We should extract the index columns from the key first, and extract the + // handles from value if there is no handle in the key. Otherwise, extract the + // handles from the key. fn process_old_collation_kv( &mut self, mut key_payload: &[u8], @@ -478,16 +484,23 @@ impl IndexScanExecutorImpl { Ok(()) } - // restore_original_data restores the index values whose format is introduced in TiDB 5.0. - // Unlike the format in TiDB 4.0, the new format is optimized for storage space: - // 1. If the index is a composed index, only the non-binary string column's value need to write to value, not all. - // 2. If a string column's collation is _bin, then we only write the number of the truncated spaces to value. - // 3. If a string column is char, not varchar, then we use the sortKey directly. + // restore_original_data restores the index values whose format is introduced in + // TiDB 5.0. Unlike the format in TiDB 4.0, the new format is optimized for + // storage space: + // - If the index is a composed index, only the non-binary string column's value + // need to write to value, not all. + // - If a string column's collation is _bin, then we only write the number of + // the truncated spaces to value. + // - If a string column is char, not varchar, then we use the sortKey directly. + // // The whole logic of this function is: - // 1. For each column pass in, check if it needs the restored data to get to original data. If not, check the next column. - // 2. Skip if the `sort key` is NULL, because the original data must be NULL. - // 3. Depend on the collation if `_bin` or not. Process them differently to get the correct original data. - // 4. Write the original data into the column, we need to make sure pop() is called. + // - For each column pass in, check if it needs the restored data to get to + // original data. If not, check the next column. + // - Skip if the `sort key` is NULL, because the original data must be NULL. + // - Depend on the collation if `_bin` or not. Process them differently to get + // the correct original data. + // - Write the original data into the column, we need to make sure pop() is + // called. fn restore_original_data<'a>( &self, restored_values: &[u8], @@ -518,7 +531,8 @@ impl IndexScanExecutorImpl { column.mut_raw().pop(); let original_data = if is_bin_collation { - // _bin collation, we need to combine data from key and value to form the original data. + // _bin collation, we need to combine data from key and value to form the + // original data. // Unwrap as checked by `decoded_value.read_datum() == Datum::Null` let truncate_str = decoded_value.as_string()?.unwrap(); @@ -551,7 +565,8 @@ impl IndexScanExecutorImpl { // get_index_version is the same as getIndexVersion() in the TiDB repo. fn get_index_version(value: &[u8]) -> Result { if value.len() == 3 || value.len() == 4 { - // For the unique index with null value or non-unique index, the length can be 3 or 4 if <= 9. + // For the unique index with null value or non-unique index, the length can be 3 + // or 4 if <= 9. return Ok(1); } if value.len() <= MAX_OLD_ENCODED_VALUE_LEN { @@ -689,11 +704,13 @@ impl IndexScanExecutorImpl { // If there are some restore data, we need to process them to get the original data. RestoreData::V4(rst) => { - // 4.0 version format, use the restore data directly. The restore data contain all the indexed values. + // 4.0 version format, use the restore data directly. The restore data contain + // all the indexed values. self.extract_columns_from_row_format(rst, columns)?; } RestoreData::V5(rst) => { - // Extract the data from key, then use the restore data to get the original data. + // Extract the data from key, then use the restore data to get the original + // data. Self::extract_columns_from_datum_format( &mut key_payload, &mut columns[..self.columns_id_without_handle.len()], @@ -924,8 +941,9 @@ mod tests { // Case 1. Normal index. - // For a normal index, the PK handle is stored in the key and nothing interesting is stored - // in the value. So let's build corresponding KV data. + // For a normal index, the PK handle is stored in the key and nothing + // interesting is stored in the value. So let's build corresponding KV + // data. let store = { let kv: Vec<_> = data @@ -2023,9 +2041,10 @@ mod tests { fn test_int_handle_char_index() { use tidb_query_datatype::builder::FieldTypeBuilder; - // Schema: create table t(a int, b char(10) collate utf8mb4_bin, c char(10) collate utf8mb4_unicode_ci, key i_a(a), key i_b(b), key i_c(c), key i_abc(a, b, c), unique key i_ua(a), - // unique key i_ub(b), unique key i_uc(c), unique key i_uabc(a,b,c)); - // insert into t values (1, "a ", "A "); + // Schema: create table t(a int, b char(10) collate utf8mb4_bin, c char(10) + // collate utf8mb4_unicode_ci, key i_a(a), key i_b(b), key i_c(c), key i_abc(a, + // b, c), unique key i_ua(a), unique key i_ub(b), unique key i_uc(c), + // unique key i_uabc(a,b,c)); insert into t values (1, "a ", "A "); // i_a and i_ua let mut idx_exe = IndexScanExecutorImpl { @@ -2259,9 +2278,11 @@ mod tests { fn test_int_handle_varchar_index() { use tidb_query_datatype::builder::FieldTypeBuilder; - // Schema: create table t(a int, b varchar(10) collate utf8mb4_bin, c varchar(10) collate utf8mb4_unicode_ci, key i_a(a), key i_b(b), key i_c(c), key i_abc(a, b, c), unique key i_ua(a), - // unique key i_ub(b), unique key i_uc(c), unique key i_uabc(a,b,c)); - // insert into t values (1, "a ", "A "); + // Schema: create table t(a int, b varchar(10) collate utf8mb4_bin, c + // varchar(10) collate utf8mb4_unicode_ci, key i_a(a), key i_b(b), key i_c(c), + // key i_abc(a, b, c), unique key i_ua(a), unique key i_ub(b), unique + // key i_uc(c), unique key i_uabc(a,b,c)); insert into t values (1, "a + // ", "A "); // i_a and i_ua let mut idx_exe = IndexScanExecutorImpl { @@ -2502,9 +2523,12 @@ mod tests { fn test_common_handle_index() { use tidb_query_datatype::builder::FieldTypeBuilder; - // create table t(a int, b char(10) collate utf8mb4_bin, c char(10) collate utf8mb4_unicode_ci, d varchar(10) collate utf8mb4_bin, e varchar(10) collate utf8mb4_general_ci - // , primary key(a, b, c, d, e), key i_a(a), key i_b(b), key i_c(c), key i_d(d), key i_e(e), key i_abcde(a, b, c, d, e), unique key i_ua(a), unique key i_ub(b), unique key i_uc( - // c), unique key i_ud(d), unique key i_ue(e), unique key i_uabcde(a,b,c, d, e)); + // create table t(a int, b char(10) collate utf8mb4_bin, c char(10) collate + // utf8mb4_unicode_ci, d varchar(10) collate utf8mb4_bin, e varchar(10) collate + // utf8mb4_general_ci , primary key(a, b, c, d, e), key i_a(a), key + // i_b(b), key i_c(c), key i_d(d), key i_e(e), key i_abcde(a, b, c, d, e), + // unique key i_ua(a), unique key i_ub(b), unique key i_uc( c), unique + // key i_ud(d), unique key i_ue(e), unique key i_uabcde(a,b,c, d, e)); // // CREATE TABLE `t` ( // `a` int(11) NOT NULL, @@ -3277,8 +3301,9 @@ mod tests { fn test_common_handle_index_latin1_bin() { use tidb_query_datatype::builder::FieldTypeBuilder; - // create table t(c1 varchar(200) CHARACTER SET latin1 COLLATE latin1_bin, c2 int, primary key(c1) clustered, key kk(c2)); - // idx_exec for index kk(c2), its columns will be + // create table t(c1 varchar(200) CHARACTER SET latin1 COLLATE latin1_bin, c2 + // int, primary key(c1) clustered, key kk(c2)); idx_exec for index + // kk(c2), its columns will be let mut idx_exe = IndexScanExecutorImpl { context: Default::default(), schema: vec![ diff --git a/components/tidb_query_executors/src/interface.rs b/components/tidb_query_executors/src/interface.rs index cbbe77943b1..1ea5038a2d6 100644 --- a/components/tidb_query_executors/src/interface.rs +++ b/components/tidb_query_executors/src/interface.rs @@ -1,6 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -// TODO: Maybe we can find a better place to put these interfaces, e.g. naming it as prelude? +// TODO: Maybe we can find a better place to put these interfaces, e.g. naming +// it as prelude? //! Batch executor common structures. @@ -13,8 +14,8 @@ use tidb_query_common::{ use tidb_query_datatype::{codec::batch::LazyBatchColumnVec, expr::EvalWarnings}; use tipb::FieldType; -/// The interface for pull-based executors. It is similar to the Volcano Iterator model, but -/// pulls data in batch and stores data by column. +/// The interface for pull-based executors. It is similar to the Volcano +/// Iterator model, but pulls data in batch and stores data by column. pub trait BatchExecutor: Send { type StorageStats; @@ -23,26 +24,30 @@ pub trait BatchExecutor: Send { /// Pulls next several rows of data (stored by column). /// - /// This function might return zero rows, which doesn't mean that there is no more result. - /// See `is_drained` in `BatchExecuteResult`. + /// This function might return zero rows, which doesn't mean that there is + /// no more result. See `is_drained` in `BatchExecuteResult`. fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult; - /// Collects execution statistics (including but not limited to metrics and execution summaries) - /// accumulated during execution and prepares for next collection. + /// Collects execution statistics (including but not limited to metrics and + /// execution summaries) accumulated during execution and prepares for + /// next collection. /// - /// The executor implementation must invoke this function for each children executor. However - /// the invocation order of children executors is not stipulated. + /// The executor implementation must invoke this function for each children + /// executor. However the invocation order of children executors is not + /// stipulated. /// - /// This function may be invoked several times during execution. For each invocation, it should - /// not contain accumulated meta data in last invocation. Normally the invocation frequency of - /// this function is less than `next_batch()`. + /// This function may be invoked several times during execution. For each + /// invocation, it should not contain accumulated meta data in last + /// invocation. Normally the invocation frequency of this function is + /// less than `next_batch()`. fn collect_exec_stats(&mut self, dest: &mut ExecuteStats); - /// Collects underlying storage statistics accumulated during execution and prepares for - /// next collection. + /// Collects underlying storage statistics accumulated during execution and + /// prepares for next collection. /// - /// Similar to `collect_exec_stats()`, the implementation must invoke this function for each - /// children executor and this function may be invoked several times during execution. + /// Similar to `collect_exec_stats()`, the implementation must invoke this + /// function for each children executor and this function may be invoked + /// several times during execution. fn collect_storage_stats(&mut self, dest: &mut Self::StorageStats); fn take_scanned_range(&mut self) -> IntervalRange; @@ -127,26 +132,31 @@ impl BatchExecutor } } -/// Data to be flowed between parent and child executors' single `next_batch()` invocation. +/// Data to be flowed between parent and child executors' single `next_batch()` +/// invocation. /// -/// Note: there are other data flow between executors, like metrics and output statistics. -/// However they are flowed at once, just before response, instead of each step during execution. -/// Hence they are not covered by this structure. See `BatchExecuteMetaData`. +/// Note: there are other data flow between executors, like metrics and output +/// statistics. However they are flowed at once, just before response, instead +/// of each step during execution. Hence they are not covered by this structure. +/// See `BatchExecuteMetaData`. /// -/// It is only `Send` but not `Sync` because executor returns its own data copy. However `Send` -/// enables executors to live in different threads. +/// It is only `Send` but not `Sync` because executor returns its own data copy. +/// However `Send` enables executors to live in different threads. /// -/// It is designed to be used in new generation executors, i.e. executors support batch execution. -/// The old executors will not be refined to return this kind of result. +/// It is designed to be used in new generation executors, i.e. executors +/// support batch execution. The old executors will not be refined to return +/// this kind of result. pub struct BatchExecuteResult { /// The *physical* columns data generated during this invocation. /// - /// Note 1: Empty column data doesn't mean that there is no more data. See `is_drained`. + /// Note 1: Empty column data doesn't mean that there is no more data. See + /// `is_drained`. /// - /// Note 2: This is only a *physical* store of data. The data may not be in desired order and - /// there could be filtered out data stored inside. You should access the *logical* - /// data via the `logical_rows` field. For the same reason, `rows_len() > 0` doesn't - /// mean that there is logical data inside. + /// Note 2: This is only a *physical* store of data. The data may not be in + /// desired order and there could be filtered out data stored inside. You + /// should access the *logical* data via the `logical_rows` field. For the + /// same reason, `rows_len() > 0` doesn't mean that there is logical data + /// inside. pub physical_columns: LazyBatchColumnVec, /// Valid row offsets in `physical_columns`, placed in the logical order. @@ -160,16 +170,17 @@ pub struct BatchExecuteResult { /// Whether or not there is no more data. /// /// This structure is a `Result`. When it is: - /// - `Ok(false)`: The normal case, means that there could be more data. The caller should - /// continue calling `next_batch()` although for each call the returned data may - /// be empty. - /// - `Ok(true)`: Means that the executor is drained and no more data will be returned in - /// future. However there could be some (last) data in the `data` field this - /// time. The caller should NOT call `next_batch()` any more. - /// - `Err(_)`: Means that there is an error when trying to retrieve more data. In this case, - /// the error is returned and the executor is also drained. Similar to - /// `Ok(true)`, there could be some remaining data in the `data` field which is - /// valid data and should be processed. The caller should NOT call `next_batch()` - /// any more. + /// - `Ok(false)`: The normal case, means that there could be more data. The + /// caller should continue calling `next_batch()` although for each call + /// the returned data may be empty. + /// - `Ok(true)`: Means that the executor is drained and no more data will + /// be returned in future. However there could be some (last) data in the + /// `data` field this time. The caller should NOT call `next_batch()` any + /// more. + /// - `Err(_)`: Means that there is an error when trying to retrieve more + /// data. In this case, the error is returned and the executor is also + /// drained. Similar to `Ok(true)`, there could be some remaining data in + /// the `data` field which is valid data and should be processed. The + /// caller should NOT call `next_batch()` any more. pub is_drained: Result, } diff --git a/components/tidb_query_executors/src/lib.rs b/components/tidb_query_executors/src/lib.rs index b32518c600b..ad86f94f9b8 100644 --- a/components/tidb_query_executors/src/lib.rs +++ b/components/tidb_query_executors/src/lib.rs @@ -1,11 +1,12 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -//! This crate implements a simple SQL query engine to work with TiDB pushed down executors. +//! This crate implements a simple SQL query engine to work with TiDB pushed +//! down executors. //! -//! The query engine is able to scan and understand rows stored by TiDB, run against a -//! series of executors and then return the execution result. The query engine is provided via -//! TiKV Coprocessor interface. However standalone UDF functions are also exported and can be used -//! standalone. +//! The query engine is able to scan and understand rows stored by TiDB, run +//! against a series of executors and then return the execution result. The +//! query engine is provided via TiKV Coprocessor interface. However standalone +//! UDF functions are also exported and can be used standalone. #![allow(incomplete_features)] #![feature(proc_macro_hygiene)] diff --git a/components/tidb_query_executors/src/projection_executor.rs b/components/tidb_query_executors/src/projection_executor.rs index 680800859f3..1d6892731ff 100644 --- a/components/tidb_query_executors/src/projection_executor.rs +++ b/components/tidb_query_executors/src/projection_executor.rs @@ -20,8 +20,8 @@ pub struct BatchProjectionExecutor { exprs: Vec, } -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchProjectionExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -209,8 +209,9 @@ mod tests { ], ); - // When source executor returns empty rows, projection executor should process correctly. - // No errors should be generated and the expression functions should not be called. + // When source executor returns empty rows, projection executor should process + // correctly. No errors should be generated and the expression functions + // should not be called. let r = exec.next_batch(1); // The scan rows parameter has no effect for mock executor. We don't care. @@ -507,8 +508,8 @@ mod tests { ], ); - // When evaluating expr[0], there will be no error. However we will meet errors for - // expr[1]. + // When evaluating expr[0], there will be no error. However we will meet errors + // for expr[1]. let exprs = (0..=1) .map(|offset| { diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 4a8a3a02851..073fade4b29 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -32,32 +32,34 @@ use super::{ *, }; -// TODO: The value is chosen according to some very subjective experience, which is not tuned -// carefully. We need to benchmark to find a best value. Also we may consider accepting this value -// from TiDB side. +// TODO: The value is chosen according to some very subjective experience, which +// is not tuned carefully. We need to benchmark to find a best value. Also we +// may consider accepting this value from TiDB side. const BATCH_INITIAL_SIZE: usize = 32; -// TODO: This value is chosen based on MonetDB/X100's research without our own benchmarks. +// TODO: This value is chosen based on MonetDB/X100's research without our own +// benchmarks. pub use tidb_query_expr::types::BATCH_MAX_SIZE; // TODO: Maybe there can be some better strategy. Needs benchmarks and tunes. const BATCH_GROW_FACTOR: usize = 2; -/// Batch executors are run in coroutines. `MAX_TIME_SLICE` is the maximum time a coroutine -/// can run without being yielded. +/// Batch executors are run in coroutines. `MAX_TIME_SLICE` is the maximum time +/// a coroutine can run without being yielded. pub const MAX_TIME_SLICE: Duration = Duration::from_millis(1); pub struct BatchExecutorsRunner { - /// The deadline of this handler. For each check point (e.g. each iteration) we need to check - /// whether or not the deadline is exceeded and break the process if so. + /// The deadline of this handler. For each check point (e.g. each iteration) + /// we need to check whether or not the deadline is exceeded and break + /// the process if so. // TODO: Deprecate it using a better deadline mechanism. deadline: Deadline, out_most_executor: Box>, - /// The offset of the columns need to be outputted. For example, TiDB may only needs a subset - /// of the columns in the result so that unrelated columns don't need to be encoded and - /// returned back. + /// The offset of the columns need to be outputted. For example, TiDB may + /// only needs a subset of the columns in the result so that unrelated + /// columns don't need to be encoded and returned back. output_offsets: Vec, config: Arc, @@ -76,16 +78,18 @@ pub struct BatchExecutorsRunner { /// 2. chunk: result is encoded column by column using chunk format. encode_type: EncodeType, - /// If it's a paging request, paging_size indicates to the required size for current page. + /// If it's a paging request, paging_size indicates to the required size for + /// current page. paging_size: Option, quota_limiter: Arc, } -// We assign a dummy type `()` so that we can omit the type when calling `check_supported`. +// We assign a dummy type `()` so that we can omit the type when calling +// `check_supported`. impl BatchExecutorsRunner<()> { - /// Given a list of executor descriptors and checks whether all executor descriptors can - /// be used to build batch executors. + /// Given a list of executor descriptors and checks whether all executor + /// descriptors can be used to build batch executors. pub fn check_supported(exec_descriptors: &[tipb::Executor]) -> Result<()> { for ed in exec_descriptors { match ed.get_tp() { @@ -387,7 +391,9 @@ impl BatchExecutorsRunner { storage, ranges, config.clone(), - is_streaming || paging_size.is_some(), // For streaming and paging request, executors will continue scan from range end where last scan is finished + is_streaming || paging_size.is_some(), /* For streaming and paging request, + * executors will continue scan from range + * end where last scan is finished */ )?; let encode_type = if !is_arrow_encodable(out_most_executor.schema()) { @@ -434,8 +440,9 @@ impl BatchExecutorsRunner { /// handle_request returns the response of selection and an optional range, /// only paging request will return Some(IntervalRange), /// this should be used when calculating ranges of the next batch. - /// IntervalRange records whole range scanned though there are gaps in multi ranges. - /// e.g.: [(k1 -> k2), (k4 -> k5)] may got response (k1, k2, k4) with IntervalRange like (k1, k4). + /// IntervalRange records whole range scanned though there are gaps in multi + /// ranges. e.g.: [(k1 -> k2), (k4 -> k5)] may got response (k1, k2, k4) + /// with IntervalRange like (k1, k4). pub async fn handle_request(&mut self) -> Result<(SelectResponse, Option)> { let mut chunks = vec![]; let mut batch_size = Self::batch_initial_size(); diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index e930d6f9d89..61030e593e0 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -20,8 +20,8 @@ pub struct BatchSelectionExecutor { conditions: Vec, } -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchSelectionExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -62,10 +62,12 @@ impl BatchSelectionExecutor { }) } - /// Accepts source result and mutates its `logical_rows` according to predicates. + /// Accepts source result and mutates its `logical_rows` according to + /// predicates. /// - /// When errors are returned, it means there are errors during the evaluation. Currently - /// we treat this situation as "completely failed". + /// When errors are returned, it means there are errors during the + /// evaluation. Currently we treat this situation as "completely + /// failed". fn handle_src_result(&mut self, src_result: &mut BatchExecuteResult) -> Result<()> { // We handle errors in next_batch, so we can ingore it here. @@ -139,9 +141,9 @@ where let mut err_result = Ok(()); let mut logical_index = 0; logical_rows.retain(|_| { - // We don't care the physical index indicated by `logical_rows`, since what's in there - // does not affect the filtering. Instead, the eval result in corresponding logical index - // matters. + // We don't care the physical index indicated by `logical_rows`, since what's in + // there does not affect the filtering. Instead, the eval result in + // corresponding logical index matters. let eval_result_physical_index = eval_result_logical_rows.get_idx(logical_index); logical_index += 1; @@ -261,8 +263,9 @@ mod tests { ], ); - // When source executor returns empty rows, selection executor should process correctly. - // No errors should be generated and the predicate function should not be called. + // When source executor returns empty rows, selection executor should process + // correctly. No errors should be generated and the predicate function + // should not be called. let r = exec.next_batch(1); // The scan rows parameter has no effect for mock executor. We don't care. @@ -330,8 +333,8 @@ mod tests { ) } - /// Tests the scenario that there is no predicate or there is a predicate but always returns - /// true (no data is filtered). + /// Tests the scenario that there is no predicate or there is a predicate + /// but always returns true (no data is filtered). #[test] fn test_no_predicate_or_predicate_always_true() { // Build a selection executor without predicate. @@ -462,8 +465,8 @@ mod tests { ) } - /// Tests the scenario that the predicate returns both true and false. Rows that predicate - /// returns false should be removed from the result. + /// Tests the scenario that the predicate returns both true and false. Rows + /// that predicate returns false should be removed from the result. #[test] fn test_predicate_1() { let src_exec = make_src_executor_using_fixture_2(); @@ -514,8 +517,8 @@ mod tests { assert!(r.is_drained.unwrap()); } - /// Tests the scenario that there are multiple predicates. Only the row that all predicates - /// return true should be remained. + /// Tests the scenario that there are multiple predicates. Only the row that + /// all predicates return true should be remained. #[test] fn test_multiple_predicate_1() { // Use [is_even(column[0]), is_even(column[1])] as the predicate. @@ -634,8 +637,8 @@ mod tests { ], ); - // When evaluating predicates[0], there will be no error. However we will meet errors for - // predicates[1]. + // When evaluating predicates[0], there will be no error. However we will meet + // errors for predicates[1]. let predicates = (0..=1) .map(|offset| { @@ -647,8 +650,8 @@ mod tests { .collect(); let mut exec = BatchSelectionExecutor::new_for_test(src_exec, predicates); - // TODO: A more precise result is that the first two rows are returned and error starts from - // the third row. + // TODO: A more precise result is that the first two rows are returned and error + // starts from the third row. let r = exec.next_batch(1); assert!(r.logical_rows.is_empty()); diff --git a/components/tidb_query_executors/src/simple_aggr_executor.rs b/components/tidb_query_executors/src/simple_aggr_executor.rs index 1e1dd48929b..d26d293a274 100644 --- a/components/tidb_query_executors/src/simple_aggr_executor.rs +++ b/components/tidb_query_executors/src/simple_aggr_executor.rs @@ -1,7 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! Simple aggregation is an aggregation that do not have `GROUP BY`s. It is more even more simpler -//! than stream aggregation. +//! Simple aggregation is an aggregation that do not have `GROUP BY`s. It is +//! more even more simpler than stream aggregation. use std::sync::Arc; @@ -58,8 +58,8 @@ impl BatchExecutor for BatchSimpleAggregationExecutor { } } -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchSimpleAggregationExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -104,8 +104,8 @@ impl BatchSimpleAggregationExecutor { aggr_defs: Vec, aggr_def_parser: impl AggrDefinitionParser, ) -> Result { - // Empty states is fine because it will be re-initialized later according to the content - // in entities. + // Empty states is fine because it will be re-initialized later according to the + // content in entities. let aggr_impl = SimpleAggregationImpl { states: Vec::new(), has_input_rows: false, @@ -222,7 +222,8 @@ impl AggregationExecutorImpl for SimpleAggregationImpl Ok(Vec::new()) } - /// Simple aggregation can output aggregate results only if the source is drained. + /// Simple aggregation can output aggregate results only if the source is + /// drained. #[inline] fn is_partial_results_ready(&self) -> bool { false @@ -243,9 +244,11 @@ mod tests { #[test] fn test_it_works_unit() { - /// Aggregate function `Foo` accepts a Bytes column, returns a Int datum. + /// Aggregate function `Foo` accepts a Bytes column, returns a Int + /// datum. /// - /// The returned data is the sum of the length of all accepted bytes datums. + /// The returned data is the sum of the length of all accepted bytes + /// datums. #[derive(Debug, AggrFunction)] #[aggr_function(state = AggrFnFooState::new())] struct AggrFnFoo; @@ -290,9 +293,9 @@ mod tests { output.push(FieldTypeTp::LongLong.into()); } - /// Aggregate function `Bar` accepts a Real column, returns `(a: Int, b: Int, c: Real)`, - /// where `a` is the number of rows including nulls, `b` is the number of rows excluding - /// nulls, `c` is the sum of all values. + /// Aggregate function `Bar` accepts a Real column, returns `(a: Int, b: + /// Int, c: Real)`, where `a` is the number of rows including nulls, `b` + /// is the number of rows excluding nulls, `c` is the sum of all values. #[derive(Debug, AggrFunction)] #[aggr_function(state = AggrFnBarState::new())] struct AggrFnBar; @@ -349,7 +352,8 @@ mod tests { output.push(FieldTypeTp::Double.into()); } - // This test creates a simple aggregation executor with the following aggregate functions: + // This test creates a simple aggregation executor with the following aggregate + // functions: // - Foo("abc") // - Foo(NULL) // - Bar(42.5) @@ -360,8 +364,8 @@ mod tests { let src_exec = make_src_executor_1(); - // As a unit test, let's use the most simple way to build the executor. No complex parsers - // involved. + // As a unit test, let's use the most simple way to build the executor. No + // complex parsers involved. let aggr_definitions: Vec<_> = (0..6) .map(|index| { @@ -503,7 +507,8 @@ mod tests { use tipb::ExprType; use tipb_helper::ExprDefBuilder; - // This test creates a simple aggregation executor with the following aggregate functions: + // This test creates a simple aggregation executor with the following aggregate + // functions: // - COUNT(1) // - COUNT(4.5) // - COUNT(NULL) diff --git a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs index bd1e5cf8a80..2502e28f570 100644 --- a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs @@ -23,8 +23,8 @@ use crate::{ util::{aggr_executor::*, hash_aggr_helper::HashAggregationHelper, *}, }; -/// Slow Hash Aggregation Executor supports multiple groups but uses less efficient ways to -/// store group keys in hash tables. +/// Slow Hash Aggregation Executor supports multiple groups but uses less +/// efficient ways to store group keys in hash tables. /// /// FIXME: It is not correct to just store the serialized data as the group key. /// See pingcap/tidb#10467. @@ -66,8 +66,8 @@ impl BatchExecutor for BatchSlowHashAggregationExecutor } } -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchSlowHashAggregationExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -195,34 +195,37 @@ impl BatchSlowHashAggregationExecutor { pub struct SlowHashAggregationImpl { states: Vec>, - /// The value is the group index. `states` and `group_key_offsets` are stored in - /// the order of group index. + /// The value is the group index. `states` and `group_key_offsets` are + /// stored in the order of group index. groups: HashMap, group_by_exps: Vec, - /// Extra group by columns store the bytes columns in original data form while - /// default columns store them in sortkey form. - /// The sortkey form is used to aggr on while the original form is to be returned - /// as results. + /// Extra group by columns store the bytes columns in original data form + /// while default columns store them in sortkey form. + /// The sortkey form is used to aggr on while the original form is to be + /// returned as results. /// - /// For example, the bytes column at index i will be stored in sortkey form at column i - /// and in original data form at column `extra_group_by_col_index[i]`. + /// For example, the bytes column at index i will be stored in sortkey form + /// at column i and in original data form at column + /// `extra_group_by_col_index[i]`. extra_group_by_col_index: Vec, - /// The sequence of group by column index which are in original form and are in the - /// same order as group_by_exps by substituting bytes columns index for extra group by column index. + /// The sequence of group by column index which are in original form and are + /// in the same order as group_by_exps by substituting bytes columns + /// index for extra group by column index. original_group_by_col_index: Vec, - /// Encoded group keys are stored in this buffer sequentially. Offsets of each encoded - /// element are stored in `group_key_offsets`. + /// Encoded group keys are stored in this buffer sequentially. Offsets of + /// each encoded element are stored in `group_key_offsets`. /// /// `GroupKeyRefUnsafe` contains a raw pointer to this buffer. #[allow(clippy::box_collection)] group_key_buffer: Box>, - /// The offsets of encoded keys in `group_key_buffer`. This `Vec` always has a leading `0` - /// element. Then, the begin and end offsets of the "i"-th column of the group key whose group - /// index is "j" are `group_key_offsets[j * group_by_col_len + i]` and + /// The offsets of encoded keys in `group_key_buffer`. This `Vec` always has + /// a leading `0` element. Then, the begin and end offsets of the "i"-th + /// column of the group key whose group index is "j" are + /// `group_key_offsets[j * group_by_col_len + i]` and /// `group_key_offsets[j * group_by_col_len + i + 1]`. /// /// group_by_col_len = group_by_exps.len() + extra_group_by_col_index.len() @@ -231,8 +234,9 @@ pub struct SlowHashAggregationImpl { states_offset_each_logical_row: Vec, /// Stores evaluation results of group by expressions. - /// It is just used to reduce allocations. The lifetime is not really 'static. The elements - /// are only valid in the same batch where they are added. + /// It is just used to reduce allocations. The lifetime is not really + /// 'static. The elements are only valid in the same batch where they + /// are added. group_by_results_unsafe: Vec>, /// Cached encoded results for calculated Scalar results @@ -267,8 +271,8 @@ impl AggregationExecutorImpl for SlowHashAggregationImp let logical_rows_len = input_logical_rows.len(); let aggr_fn_len = entities.each_aggr_fn.len(); - // Decode columns with mutable input first, so subsequent access to input can be immutable - // (and the borrow checker will be happy) + // Decode columns with mutable input first, so subsequent access to input can be + // immutable (and the borrow checker will be happy) ensure_columns_decoded( context, &self.group_by_exps, @@ -330,8 +334,8 @@ impl AggregationExecutorImpl for SlowHashAggregationImp // End of the sortkey columns let group_key_ref_end = self.group_key_buffer.len(); - // Encode bytes column in original form to extra group by columns, which is to be returned - // as group by results + // Encode bytes column in original form to extra group by columns, which is to + // be returned as group by results for (i, col_index) in self.extra_group_by_col_index.iter().enumerate() { let group_by_result = &self.group_by_results_unsafe[*col_index]; match group_by_result { @@ -468,17 +472,19 @@ impl AggregationExecutorImpl for SlowHashAggregationImp Ok(group_by_columns) } - /// Slow hash aggregation can output aggregate results only if the source is drained. + /// Slow hash aggregation can output aggregate results only if the source is + /// drained. #[inline] fn is_partial_results_ready(&self) -> bool { false } } -/// A reference to a group key slice in the `group_key_buffer` of `SlowHashAggregationImpl`. +/// A reference to a group key slice in the `group_key_buffer` of +/// `SlowHashAggregationImpl`. /// -/// It is safe as soon as it doesn't outlive the `SlowHashAggregationImpl` that creates this -/// reference. +/// It is safe as soon as it doesn't outlive the `SlowHashAggregationImpl` that +/// creates this reference. struct GroupKeyRefUnsafe { /// Points to the `group_key_buffer` of `SlowHashAggregationImpl` buffer_ptr: NonNull>, @@ -521,7 +527,8 @@ mod tests { use tipb::ExprType; use tipb_helper::ExprDefBuilder; - // This test creates a hash aggregation executor with the following aggregate functions: + // This test creates a hash aggregation executor with the following aggregate + // functions: // - COUNT(1) // - AVG(col_0 + 5.0) // And group by: @@ -596,7 +603,8 @@ mod tests { .ensure_all_decoded_for_test(&mut EvalContext::default(), &exec.schema()[5]) .unwrap(); - // The row order is not defined. Let's sort it by the group by column before asserting. + // The row order is not defined. Let's sort it by the group by column before + // asserting. let mut sort_column: Vec<(usize, _)> = r.physical_columns[3] .decoded() .to_bytes_vec() diff --git a/components/tidb_query_executors/src/stream_aggr_executor.rs b/components/tidb_query_executors/src/stream_aggr_executor.rs index 52f55751442..4b768cd65fe 100644 --- a/components/tidb_query_executors/src/stream_aggr_executor.rs +++ b/components/tidb_query_executors/src/stream_aggr_executor.rs @@ -58,8 +58,8 @@ impl BatchExecutor for BatchStreamAggregationExecutor { } } -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchStreamAggregationExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -95,13 +95,15 @@ pub struct BatchStreamAggregationImpl { states: Vec>, /// Stores evaluation results of group by expressions. - /// It is just used to reduce allocations. The lifetime is not really 'static. The elements - /// are only valid in the same batch where they are added. + /// It is just used to reduce allocations. The lifetime is not really + /// 'static. The elements are only valid in the same batch where they + /// are added. group_by_results_unsafe: Vec>, /// Stores evaluation results of aggregate expressions. - /// It is just used to reduce allocations. The lifetime is not really 'static. The elements - /// are only valid in the same batch where they are added. + /// It is just used to reduce allocations. The lifetime is not really + /// 'static. The elements are only valid in the same batch where they + /// are added. aggr_expr_results_unsafe: Vec>, } @@ -226,8 +228,8 @@ impl AggregationExecutorImpl for BatchStreamAggregation let group_by_len = self.group_by_exps.len(); let aggr_fn_len = entities.each_aggr_fn.len(); - // Decode columns with mutable input first, so subsequent access to input can be immutable - // (and the borrow checker will be happy) + // Decode columns with mutable input first, so subsequent access to input can be + // immutable (and the borrow checker will be happy) ensure_columns_decoded( context, &self.group_by_exps, @@ -391,8 +393,8 @@ impl AggregationExecutorImpl for BatchStreamAggregation Ok(group_by_columns) } - /// We cannot ensure the last group is complete, so we can output partial results - /// only if group count >= 2. + /// We cannot ensure the last group is complete, so we can output partial + /// results only if group count >= 2. #[inline] fn is_partial_results_ready(&self) -> bool { AggregationExecutorImpl::::groups_len(self) >= 2 @@ -469,7 +471,8 @@ mod tests { use tipb::ExprType; use tipb_helper::ExprDefBuilder; - // This test creates a stream aggregation executor with the following aggregate functions: + // This test creates a stream aggregation executor with the following aggregate + // functions: // - COUNT(1) // - AVG(col_1 + 1.0) // And group by: diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index 908f0a7146a..c2c310b4018 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -26,8 +26,8 @@ pub struct BatchTableScanExecutor(ScanExecutor; -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we can +// omit the type when calling `check_supported`. impl BatchTableScanExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -80,8 +80,9 @@ impl BatchTableScanExecutor { column_id_index.insert(ci.get_column_id(), index); } - // Note: if two PK handles are given, we will only preserve the *last* one. Also if two - // columns with the same column id are given, we will only preserve the *last* one. + // Note: if two PK handles are given, we will only preserve the + // *last* one. Also if two columns with the same column + // id are given, we will only preserve the *last* one. } let no_common_handle = primary_column_ids.is_empty(); @@ -142,30 +143,32 @@ impl BatchExecutor for BatchTableScanExecutor { } struct TableScanExecutorImpl { - /// Note: Although called `EvalContext`, it is some kind of execution context instead. + /// Note: Although called `EvalContext`, it is some kind of execution + /// context instead. // TODO: Rename EvalContext to ExecContext. context: EvalContext, - /// The schema of the output. All of the output come from specific columns in the underlying - /// storage. + /// The schema of the output. All of the output come from specific columns + /// in the underlying storage. schema: Vec, - /// The default value of corresponding columns in the schema. When column data is missing, - /// the default value will be used to fill the output. + /// The default value of corresponding columns in the schema. When column + /// data is missing, the default value will be used to fill the output. columns_default_value: Vec>, /// The output position in the schema giving the column id. column_id_index: HashMap, - /// Vec of indices in output row to put the handle. The indices must be sorted in the vec. + /// Vec of indices in output row to put the handle. The indices must be + /// sorted in the vec. handle_indices: HandleIndicesVec, /// Vec of Primary key column's IDs. primary_column_ids: Vec, - /// A vector of flags indicating whether corresponding column is filled in `next_batch`. - /// It is a struct level field in order to prevent repeated memory allocations since its length - /// is fixed for each `next_batch` call. + /// A vector of flags indicating whether corresponding column is filled in + /// `next_batch`. It is a struct level field in order to prevent repeated + /// memory allocations since its length is fixed for each `next_batch` call. is_column_filled: Vec, } @@ -193,8 +196,8 @@ impl TableScanExecutorImpl { remaining = &remaining[1..]; let column_id = box_try!(remaining.read_var_i64()); let (val, new_remaining) = datum::split_datum(remaining, false)?; - // Note: The produced columns may be not in the same length if there is error due - // to corrupted data. It will be handled in `ScanExecutor`. + // Note: The produced columns may be not in the same length if there is error + // due to corrupted data. It will be handled in `ScanExecutor`. let some_index = self.column_id_index.get(&column_id); if let Some(index) = some_index { let index = *index; @@ -246,7 +249,8 @@ impl TableScanExecutorImpl { *decoded_columns += 1; self.is_column_filled[*idx] = true; } else { - // This column is missing. It will be filled with default values later. + // This column is missing. It will be filled with default values + // later. } } Ok(()) @@ -264,13 +268,14 @@ impl ScanExecutorImpl for TableScanExecutorImpl { &mut self.context } - /// Constructs empty columns, with PK in decoded format and the rest in raw format. + /// Constructs empty columns, with PK in decoded format and the rest in raw + /// format. fn build_column_vec(&self, scan_rows: usize) -> LazyBatchColumnVec { let columns_len = self.schema.len(); let mut columns = Vec::with_capacity(columns_len); - // If there are any PK columns, for each of them, fill non-PK columns before it and push the - // PK column. + // If there are any PK columns, for each of them, fill non-PK columns before it + // and push the PK column. // For example, consider: // non-pk non-pk non-pk pk non-pk non-pk pk pk non-pk non-pk // handle_indices: ^3 ^6 ^7 @@ -309,9 +314,10 @@ impl ScanExecutorImpl for TableScanExecutorImpl { last_index = *handle_index + 1; } - // Then fill remaining columns after the last handle column. If there are no PK columns, - // the previous loop will be skipped and this loop will be run on 0..columns_len. - // For the example above, this loop will push: [non-pk, non-pk] + // Then fill remaining columns after the last handle column. If there are no PK + // columns, the previous loop will be skipped and this loop will be run + // on 0..columns_len. For the example above, this loop will push: + // [non-pk, non-pk] for i in last_index..columns_len { if Some(i) == physical_table_id_column_idx { columns.push(LazyBatchColumn::decoded_with_capacity_and_tp( @@ -352,8 +358,9 @@ impl ScanExecutorImpl for TableScanExecutorImpl { let handle = table::decode_int_handle(key)?; for handle_index in &self.handle_indices { - // TODO: We should avoid calling `push_int` repeatedly. Instead we should specialize - // a `&mut Vec` first. However it is hard to program due to lifetime restriction. + // TODO: We should avoid calling `push_int` repeatedly. Instead we should + // specialize a `&mut Vec` first. However it is hard to program + // due to lifetime restriction. if !self.is_column_filled[*handle_index] { columns[*handle_index].mut_decoded().push_int(Some(handle)); decoded_columns += 1; @@ -361,14 +368,16 @@ impl ScanExecutorImpl for TableScanExecutorImpl { } } } else if !self.primary_column_ids.is_empty() { - // Otherwise, if `primary_column_ids` is not empty, we try to extract the values of the columns from the common handle. + // Otherwise, if `primary_column_ids` is not empty, we try to extract the values + // of the columns from the common handle. let mut handle = table::decode_common_handle(key)?; for primary_id in self.primary_column_ids.iter() { let index = self.column_id_index.get(primary_id); let (datum, remain) = datum::split_datum(handle, false)?; handle = remain; - // If the column info of the corresponding primary column id is missing, we ignore this slice of the datum. + // If the column info of the corresponding primary column id is missing, we + // ignore this slice of the datum. if let Some(&index) = index { if !self.is_column_filled[index] { columns[index].mut_raw().push(datum); @@ -390,8 +399,8 @@ impl ScanExecutorImpl for TableScanExecutorImpl { self.is_column_filled[*idx] = true; } - // Some fields may be missing in the row, we push corresponding default value to make all - // columns in same length. + // Some fields may be missing in the row, we push corresponding default value to + // make all columns in same length. for i in 0..columns_len { if !self.is_column_filled[i] { // Missing fields must not be a primary key, so it must be @@ -585,7 +594,8 @@ mod tests { .collect() } - /// Returns whole table's ranges which include point range and non-point range. + /// Returns whole table's ranges which include point range and non-point + /// range. fn mixed_ranges_for_whole_table(&self) -> Vec { vec![ self.table_range(i64::MIN, 3), @@ -743,9 +753,9 @@ mod tests { vec![0, 1], vec![0, 2], vec![1, 2], - //PK is the last column in schema + // PK is the last column in schema vec![2, 1, 0], - //PK is the first column in schema + // PK is the first column in schema vec![0, 1, 2], // PK is in the middle of the schema vec![1, 0, 2], @@ -802,7 +812,8 @@ mod tests { executor.collect_exec_stats(&mut s); - // Collected statistics remain unchanged because of no newly generated delta statistics. + // Collected statistics remain unchanged because of no newly generated delta + // statistics. assert_eq!(s.scanned_rows_per_range.len(), 2); assert_eq!(s.scanned_rows_per_range[0], 3); assert_eq!(s.scanned_rows_per_range[1], 0); @@ -811,7 +822,8 @@ mod tests { assert_eq!(3, exec_summary.num_produced_rows); assert_eq!(2, exec_summary.num_iterations); - // Reset collected statistics so that now we will only collect statistics in this round. + // Reset collected statistics so that now we will only collect statistics in + // this round. s.clear(); executor.next_batch(10); executor.collect_exec_stats(&mut s); @@ -907,7 +919,8 @@ mod tests { let store = FixtureStorage::from(kv); - // For row 0 + row 1 + (row 2 ~ row 4), we should only get row 0, row 1 and an error. + // For row 0 + row 1 + (row 2 ~ row 4), we should only get row 0, row 1 and an + // error. for corrupted_row_index in 2..=4 { let mut executor = BatchTableScanExecutor::new( store.clone(), @@ -1013,8 +1026,8 @@ mod tests { let store = FixtureStorage::new(kv.into_iter().collect()); // Case 1: row 0 + row 1 + row 2 - // We should get row 0 and error because no further rows should be scanned when there is - // an error. + // We should get row 0 and error because no further rows should be scanned when + // there is an error. { let mut executor = BatchTableScanExecutor::new( store.clone(), @@ -1052,8 +1065,8 @@ mod tests { } // Case 1b: row 0 + row 1 + row 2 - // We should get row 0 and error because no further rows should be scanned when there is - // an error. With EXTRA_PHYSICAL_TABLE_ID_COL + // We should get row 0 and error because no further rows should be scanned when + // there is an error. With EXTRA_PHYSICAL_TABLE_ID_COL { let mut columns_info = columns_info.clone(); columns_info.push({ @@ -1228,8 +1241,8 @@ mod tests { // This test makes a pk column with id = 1 and non-pk columns with id // in 10 to 10 + columns_is_pk.len(). - // PK columns will be set to column 1 and others will be set to column 10 + i, where i is - // the index of each column. + // PK columns will be set to column 1 and others will be set to column 10 + i, + // where i is the index of each column. let mut columns_info = Vec::new(); for (i, is_pk) in columns_is_pk.iter().enumerate() { @@ -1378,7 +1391,8 @@ mod tests { assert_eq!(result.is_drained.unwrap(), true); assert_eq!(result.logical_rows.len(), 1); - // We expect we fill the primary column with the value embedded in the common handle. + // We expect we fill the primary column with the value embedded in the common + // handle. for i in 0..result.physical_columns.columns_len() { result.physical_columns[i] .ensure_all_decoded_for_test(&mut EvalContext::default(), &schema[i]) @@ -1563,7 +1577,8 @@ mod tests { result.physical_columns.columns_len(), columns.len() - missed_columns_info.len() ); - // We expect we fill the primary column with the value embedded in the common handle. + // We expect we fill the primary column with the value embedded in the common + // handle. for i in 0..result.physical_columns.columns_len() { result.physical_columns[i] .ensure_all_decoded_for_test(&mut EvalContext::default(), &schema[i]) diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index 112a3f3c33b..39f009784f0 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -18,32 +18,34 @@ use crate::{interface::*, util::*}; pub struct BatchTopNExecutor { /// The heap, which contains N rows at most. /// - /// This field is placed before `eval_columns_buffer_unsafe`, `order_exprs`, `order_is_desc` - /// and `src` because it relies on data in those fields and we want this field to be dropped - /// first. + /// This field is placed before `eval_columns_buffer_unsafe`, `order_exprs`, + /// `order_is_desc` and `src` because it relies on data in those fields + /// and we want this field to be dropped first. heap: BinaryHeap, - /// A collection of all evaluated columns. This is to avoid repeated allocations in - /// each `next_batch()`. + /// A collection of all evaluated columns. This is to avoid repeated + /// allocations in each `next_batch()`. /// - /// DO NOT EVER try to read the content of the elements directly, since it is highly unsafe. - /// The lifetime of elements is not really 'static. Certain elements are valid only if both - /// of the following conditions are satisfied: + /// DO NOT EVER try to read the content of the elements directly, since it + /// is highly unsafe. The lifetime of elements is not really 'static. + /// Certain elements are valid only if both of the following conditions + /// are satisfied: /// /// 1. `BatchTopNExecutor` is valid (i.e. not dropped). /// - /// 2. The referenced `LazyBatchColumnVec` of the element must be valid, which only happens - /// when at least one of the row is in the `heap`. Note that rows may be swapped out from - /// `heap` at any time. + /// 2. The referenced `LazyBatchColumnVec` of the element must be valid, + /// which only happens when at least one of the row is in the `heap`. + /// Note that rows may be swapped out from `heap` at any time. /// - /// This field is placed before `order_exprs` and `src` because it relies on data in - /// those fields and we want this field to be dropped first. + /// This field is placed before `order_exprs` and `src` because it relies on + /// data in those fields and we want this field to be dropped first. #[allow(clippy::box_collection)] eval_columns_buffer_unsafe: Box>>, order_exprs: Box<[RpnExpression]>, - /// This field stores the field type of the results evaluated by the exprs in `order_exprs`. + /// This field stores the field type of the results evaluated by the exprs + /// in `order_exprs`. order_exprs_field_type: Box<[FieldType]>, /// Whether or not it is descending order for each order by column. @@ -56,13 +58,14 @@ pub struct BatchTopNExecutor { is_ended: bool, } -/// All `NonNull` pointers in `BatchTopNExecutor` cannot be accessed out of the struct and -/// `BatchTopNExecutor` doesn't leak the pointers to other threads. Therefore, with those `NonNull` -/// pointers, BatchTopNExecutor still remains `Send`. +/// All `NonNull` pointers in `BatchTopNExecutor` cannot be accessed out of the +/// struct and `BatchTopNExecutor` doesn't leak the pointers to other threads. +/// Therefore, with those `NonNull` pointers, BatchTopNExecutor still remains +/// `Send`. unsafe impl Send for BatchTopNExecutor {} -// We assign a dummy type `Box>` so that we can omit the type -// when calling `check_supported`. +// We assign a dummy type `Box>` so that we +// can omit the type when calling `check_supported`. impl BatchTopNExecutor>> { /// Checks whether this executor can be used. #[inline] @@ -208,8 +211,8 @@ impl BatchTopNExecutor { &logical_rows, )?; - // Pin data behind an Arc, so that they won't be dropped as long as this `pinned_data` - // is kept somewhere. + // Pin data behind an Arc, so that they won't be dropped as long as this + // `pinned_data` is kept somewhere. let pinned_source_data = Arc::new(HeapItemSourceData { physical_columns, logical_rows, @@ -404,8 +407,8 @@ struct HeapItemSourceData { /// The item in the heap of `BatchTopNExecutor`. /// -/// WARN: The content of this structure is valid only if `BatchTopNExecutor` is valid (i.e. -/// not dropped). Thus it is called unsafe. +/// WARN: The content of this structure is valid only if `BatchTopNExecutor` is +/// valid (i.e. not dropped). Thus it is called unsafe. struct HeapItemUnsafe { /// A pointer to the `order_is_desc` field in `BatchTopNExecutor`. order_is_desc_ptr: NonNull<[bool]>, @@ -424,7 +427,8 @@ struct HeapItemUnsafe { /// The length of evaluated columns in the buffer is `order_is_desc.len()`. eval_columns_offset: usize, - /// Which logical row in the evaluated columns this heap item is representing. + /// Which logical row in the evaluated columns this heap item is + /// representing. logical_row_index: usize, } @@ -460,8 +464,9 @@ impl HeapItemUnsafe { let lhs = lhs_node.get_logical_scalar_ref(self.logical_row_index); let rhs = rhs_node.get_logical_scalar_ref(other.logical_row_index); - // There is panic inside, but will never panic, since the data type of corresponding - // column should be consistent for each `HeapItemUnsafe`. + // There is panic inside, but will never panic, since the data type of + // corresponding column should be consistent for each + // `HeapItemUnsafe`. let ord = lhs.cmp_sort_key(&rhs, &order_exprs_field_type[column_idx])?; if ord == Ordering::Equal { @@ -478,8 +483,8 @@ impl HeapItemUnsafe { } } -/// WARN: HeapItemUnsafe implements partial ordering. It panics when Collator fails to parse. -/// So make sure that it is valid before putting it into a heap. +/// WARN: HeapItemUnsafe implements partial ordering. It panics when Collator +/// fails to parse. So make sure that it is valid before putting it into a heap. impl Ord for HeapItemUnsafe { fn cmp(&self, other: &Self) -> Ordering { self.cmp_sort_key(other).unwrap() @@ -884,6 +889,7 @@ mod tests { /// Builds an executor that will return these data: /// + /// ```text /// == Schema == /// Col0 (Bytes[Utf8Mb4GeneralCi]) Col1(Bytes[Utf8Mb4Bin]) Col2(Bytes[Binary]) /// == Call #1 == @@ -897,6 +903,7 @@ mod tests { /// "Aa" NULL "aaa" /// "aaa" "Aa" "áa" /// (drained) + /// ``` fn make_bytes_src_executor() -> MockExecutor { MockExecutor::new( vec![ @@ -1139,6 +1146,7 @@ mod tests { /// Builds an executor that will return these data: /// + /// ```text /// == Schema == /// Col0 (LongLong(Unsigned)) Col1(LongLong[Signed]) Col2(Long[Unsigned]) /// == Call #1 == @@ -1152,6 +1160,7 @@ mod tests { /// 300 300 300 /// 9,223,372,036,854,775,808 -9,223,372,036,854,775,808 2,147,483,648 /// (drained) (drained) (drained) + /// ``` fn make_src_executor_unsigned() -> MockExecutor { MockExecutor::new( vec![ diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index 74a9429b390..a40c0c9aec4 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -9,7 +9,8 @@ //! ^^^^^ ^^^^ : Group By Expressions //! ``` //! -//! The SQL above has 2 GROUP BY columns, so we say it's *group by cardinality* is 2. +//! The SQL above has 2 GROUP BY columns, so we say it's *group by cardinality* +//! is 2. //! //! In the result: //! @@ -22,9 +23,9 @@ //! ^^^^^^ ^^^^^ : Group By Column //! ``` //! -//! Some aggregate function output multiple results, for example, `AVG(Int)` output two results: -//! count and sum. In this case we say that the result of `AVG(Int)` has a *cardinality* of 2. -//! +//! Some aggregate function output multiple results, for example, `AVG(Int)` +//! output two results: count and sum. In this case we say that the result of +//! `AVG(Int)` has a *cardinality* of 2. use std::{convert::TryFrom, sync::Arc}; @@ -44,17 +45,20 @@ use tipb::{Expr, FieldType}; use crate::interface::*; pub trait AggregationExecutorImpl: Send { - /// Accepts entities without any group by columns and modifies them optionally. + /// Accepts entities without any group by columns and modifies them + /// optionally. /// - /// Implementors should modify the `schema` entity when there are group by columns. + /// Implementors should modify the `schema` entity when there are group by + /// columns. /// /// This function will be called only once. fn prepare_entities(&mut self, entities: &mut Entities); - /// Processes a set of columns which are emitted from the underlying executor. + /// Processes a set of columns which are emitted from the underlying + /// executor. /// - /// Implementors should update the aggregate function states according to the data of - /// these columns. + /// Implementors should update the aggregate function states according to + /// the data of these columns. fn process_batch_input( &mut self, entities: &mut Entities, @@ -64,19 +68,20 @@ pub trait AggregationExecutorImpl: Send { /// Returns the current number of groups. /// - /// Note that this number can be inaccurate because it is a hint for the capacity of the vector. + /// Note that this number can be inaccurate because it is a hint for the + /// capacity of the vector. fn groups_len(&self) -> usize; /// Iterates aggregate function states for each available group. /// - /// Implementors should call `iteratee` for each group with the aggregate function states of - /// that group as the argument. + /// Implementors should call `iteratee` for each group with the aggregate + /// function states of that group as the argument. /// - /// Implementors may return the content of each group as extra columns in the return value - /// if there are group by columns. + /// Implementors may return the content of each group as extra columns in + /// the return value if there are group by columns. /// - /// Implementors should not iterate the same group multiple times for the same partial - /// input data. + /// Implementors should not iterate the same group multiple times for the + /// same partial input data. fn iterate_available_groups( &mut self, entities: &mut Entities, @@ -84,10 +89,12 @@ pub trait AggregationExecutorImpl: Send { iteratee: impl FnMut(&mut Entities, &[Box]) -> Result<()>, ) -> Result>; - /// Returns whether we can now output partial aggregate results when the source is not drained. + /// Returns whether we can now output partial aggregate results when the + /// source is not drained. /// - /// This method is called only when the source is not drained because aggregate result is always - /// ready if the source is drained and no error occurs. + /// This method is called only when the source is not drained because + /// aggregate result is always ready if the source is drained and no + /// error occurs. fn is_partial_results_ready(&self) -> bool; } @@ -97,8 +104,8 @@ pub struct Entities { pub src: Src, pub context: EvalContext, - /// The schema of the aggregation executor. It consists of aggregate result columns and - /// group by columns. + /// The schema of the aggregation executor. It consists of aggregate result + /// columns and group by columns. pub schema: Vec, /// The aggregate function. @@ -110,13 +117,14 @@ pub struct Entities { /// The (input) expression of each aggregate function. pub each_aggr_exprs: Vec, - /// The eval type of the result columns of all aggregate functions. One aggregate function - /// may have multiple result columns. + /// The eval type of the result columns of all aggregate functions. One + /// aggregate function may have multiple result columns. pub all_result_column_types: Vec, } -/// A shared executor implementation for simple aggregation, hash aggregation and -/// stream aggregation. Implementation differences are further given via `AggregationExecutorImpl`. +/// A shared executor implementation for simple aggregation, hash aggregation +/// and stream aggregation. Implementation differences are further given via +/// `AggregationExecutorImpl`. pub struct AggregationExecutor> { imp: I, is_ended: bool, @@ -154,7 +162,8 @@ impl> AggregationExecutor schema_len); - // Currently only support 1 parameter aggregate functions, so let's simply assert it. + // Currently only support 1 parameter aggregate functions, so let's simply + // assert it. assert_eq!(each_aggr_exprs.len(), each_aggr_exprs_len + 1); each_aggr_fn.push(aggr_fn); @@ -190,7 +199,8 @@ impl> AggregationExecutor Result<(Option, bool)> { // Use max batch size from the beginning because aggregation @@ -199,12 +209,13 @@ impl> AggregationExecutor MockExecutor { MockExecutor::new( vec![ diff --git a/components/tidb_query_executors/src/util/hash_aggr_helper.rs b/components/tidb_query_executors/src/util/hash_aggr_helper.rs index 7795b1c1062..e357d065030 100644 --- a/components/tidb_query_executors/src/util/hash_aggr_helper.rs +++ b/components/tidb_query_executors/src/util/hash_aggr_helper.rs @@ -16,8 +16,9 @@ pub struct HashAggregationHelper; impl HashAggregationHelper { /// Updates states for each row. /// - /// Each row may belong to a different group. States of all groups should be passed in altogether - /// in a single vector and the states of each row should be specified by an offset vector. + /// Each row may belong to a different group. States of all groups should be + /// passed in altogether in a single vector and the states of each row + /// should be specified by an offset vector. pub fn update_each_row_states_by_offset( entities: &mut Entities, input_physical_columns: &mut LazyBatchColumnVec, diff --git a/components/tidb_query_executors/src/util/mock_executor.rs b/components/tidb_query_executors/src/util/mock_executor.rs index 1f61f811b8c..ae20695033f 100644 --- a/components/tidb_query_executors/src/util/mock_executor.rs +++ b/components/tidb_query_executors/src/util/mock_executor.rs @@ -9,8 +9,8 @@ use tipb::FieldType; use crate::interface::*; -/// A simple mock executor that will return batch data according to a fixture without any -/// modification. +/// A simple mock executor that will return batch data according to a fixture +/// without any modification. /// /// Normally this should be only used in tests. pub struct MockExecutor { diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index cd6c0e1ed5e..6aa578459e2 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -25,8 +25,8 @@ pub fn ensure_columns_decoded( Ok(()) } -/// Evaluates expressions and outputs the result into the given Vec. Lifetime of the expressions -/// are erased. +/// Evaluates expressions and outputs the result into the given Vec. Lifetime of +/// the expressions are erased. pub unsafe fn eval_exprs_decoded_no_lifetime<'a>( ctx: &mut EvalContext, exprs: &[RpnExpression], diff --git a/components/tidb_query_executors/src/util/scan_executor.rs b/components/tidb_query_executors/src/util/scan_executor.rs index 114bc77ee1a..c9a88fb820e 100644 --- a/components/tidb_query_executors/src/util/scan_executor.rs +++ b/components/tidb_query_executors/src/util/scan_executor.rs @@ -25,8 +25,9 @@ pub trait ScanExecutorImpl: Send { /// Accepts a key value pair and fills the column vector. /// - /// The column vector does not need to be regular when there are errors during this process. - /// However if there is no error, the column vector must be regular. + /// The column vector does not need to be regular when there are errors + /// during this process. However if there is no error, the column vector + /// must be regular. fn process_kv_pair( &mut self, key: &[u8], @@ -35,8 +36,9 @@ pub trait ScanExecutorImpl: Send { ) -> Result<()>; } -/// A shared executor implementation for both table scan and index scan. Implementation differences -/// between table scan and index scan are further given via `ScanExecutorImpl`. +/// A shared executor implementation for both table scan and index scan. +/// Implementation differences between table scan and index scan are further +/// given via `ScanExecutorImpl`. pub struct ScanExecutor { /// The internal scanning implementation. imp: I, @@ -44,9 +46,9 @@ pub struct ScanExecutor { /// The scanner that scans over ranges. scanner: RangesScanner, - /// A flag indicating whether this executor is ended. When table is drained or there was an - /// error scanning the table, this flag will be set to `true` and `next_batch` should be never - /// called again. + /// A flag indicating whether this executor is ended. When table is drained + /// or there was an error scanning the table, this flag will be set to + /// `true` and `next_batch` should be never called again. is_ended: bool, } @@ -94,7 +96,8 @@ impl ScanExecutor { /// Fills a column vector and returns whether or not all ranges are drained. /// - /// The columns are ensured to be regular even if there are errors during the process. + /// The columns are ensured to be regular even if there are errors during + /// the process. fn fill_column_vec( &mut self, scan_rows: usize, @@ -129,7 +132,8 @@ impl ScanExecutor { } /// Extracts `FieldType` from `ColumnInfo`. -// TODO: Embed FieldType in ColumnInfo directly in Cop DAG v2 to remove this function. +// TODO: Embed FieldType in ColumnInfo directly in Cop DAG v2 to remove this +// function. pub fn field_type_from_column_info(ci: &ColumnInfo) -> FieldType { let mut field_type = FieldType::default(); field_type.set_tp(ci.get_tp()); @@ -176,9 +180,9 @@ impl BatchExecutor for ScanExecutor { let logical_rows = (0..logical_columns.rows_len()).collect(); // TODO - // If `is_drained.is_err()`, it means that there is an error after *successfully* retrieving - // these rows. After that, if we only consumes some of the rows (TopN / Limit), we should - // ignore this error. + // If `is_drained.is_err()`, it means that there is an error after + // *successfully* retrieving these rows. After that, if we only consumes + // some of the rows (TopN / Limit), we should ignore this error. match &is_drained { // Note: `self.is_ended` is only used for assertion purpose. diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index e283a78d245..16f6a8f66c2 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -223,8 +223,8 @@ fn get_cast_fn_rpn_meta( /// Gets the cast function between specified data types. /// -/// TODO: This function supports some internal casts performed by TiKV. However it would be better -/// to be done in TiDB. +/// TODO: This function supports some internal casts performed by TiKV. However +/// it would be better to be done in TiDB. pub fn get_cast_fn_rpn_node( is_from_constant: bool, from_field_type: &FieldType, @@ -333,8 +333,9 @@ fn cast_string_as_int( match val { None => Ok(None), Some(val) => { - // TODO: in TiDB, if `b.args[0].GetType().Hybrid()` || `IsBinaryLiteral(b.args[0])`, - // then it will return res from EvalInt() directly. + // TODO: in TiDB, if `b.args[0].GetType().Hybrid()` || + // `IsBinaryLiteral(b.args[0])`, then it will return res from + // EvalInt() directly. let is_unsigned = extra.ret_field_type.is_unsigned(); let val = get_valid_utf8_prefix(ctx, val)?; let val = val.trim(); @@ -480,8 +481,8 @@ fn cast_signed_int_as_unsigned_real( } } -// because we needn't to consider if uint overflow upper boundary of signed real, -// so we can merge uint to signed/unsigned real in one function +// because we needn't to consider if uint overflow upper boundary of signed +// real, so we can merge uint to signed/unsigned real in one function #[rpn_fn(nullable)] #[inline] fn cast_unsigned_int_as_signed_or_unsigned_real(val: Option<&Int>) -> Result> { @@ -710,9 +711,10 @@ fn cast_float_real_as_string( } } -// FIXME: We cannot use specialization in current Rust version, so impl ConvertTo for Bytes cannot -// pass compile because of we have impl Convert for T where T: ToString + Evaluable -// Refactor this part after https://github.com/rust-lang/rust/issues/31844 closed +// FIXME: We cannot use specialization in current Rust version, so impl +// ConvertTo for Bytes cannot pass compile because of we have impl +// Convert for T where T: ToString + Evaluable +// Refactor this part after https://github.com/rust-lang/rust/issues/31844 closed #[rpn_fn(nullable, capture = [ctx, extra])] #[inline] fn cast_string_as_string( @@ -841,7 +843,8 @@ fn cast_string_as_unsigned_decimal( match val { None => Ok(None), Some(val) => { - // FIXME: in TiDB, if the param IsBinaryLiteral, then return the result of `evalDecimal` directly + // FIXME: in TiDB, if the param IsBinaryLiteral, then return the result of + // `evalDecimal` directly let d: Decimal = val.convert(ctx)?; let d = if metadata.get_in_union() && d.is_negative() { Decimal::zero() @@ -1302,7 +1305,8 @@ fn cast_string_as_json(extra: &RpnFnCallExtra<'_>, val: Option) -> Res let val: Json = s.parse()?; Ok(Some(val)) } else { - // FIXME: port `JSONBinary` from TiDB to adapt if the bytes is not a valid utf8 string + // FIXME: port `JSONBinary` from TiDB to adapt if the bytes is not a valid utf8 + // string let val = unsafe { String::from_utf8_unchecked(val.to_owned()) }; Ok(Some(Json::from_string(val)?)) } @@ -2308,9 +2312,10 @@ mod tests { // and `show warnings` will show // `| Warning | 1292 | Truncated incorrect INTEGER value: '18446744073709551616'` // fix this cast_string_as_int after fix TiDB's - // ("18446744073709551616", 18446744073709551615 as i64, Some(ERR_TRUNCATE_WRONG_VALUE) , Cond::Unsigned) - // FIXME: our cast_string_as_int's err handle is not exactly same as TiDB's - // ("18446744073709551616", 18446744073709551615u64 as i64, Some(ERR_TRUNCATE_WRONG_VALUE), Cond::InSelectStmt), + // ("18446744073709551616", 18446744073709551615 as i64, Some(ERR_TRUNCATE_WRONG_VALUE) + // , Cond::Unsigned) FIXME: our cast_string_as_int's err handle is not + // exactly same as TiDB's ("18446744073709551616", 18446744073709551615u64 + // as i64, Some(ERR_TRUNCATE_WRONG_VALUE), Cond::InSelectStmt), // has prefix `-` and in_union and unsigned ("-10", 0, vec![], Cond::InUnionAndUnsigned), @@ -2559,7 +2564,8 @@ mod tests { fn test_time_as_int_and_uint() { let mut ctx = EvalContext::default(); // TODO: add more test case - // TODO: add test that make cast_any_as_any:: returning truncated error + // TODO: add test that make cast_any_as_any:: returning truncated + // error let cs: Vec<(Time, i64)> = vec![ ( Time::parse_datetime(&mut ctx, "2000-01-01T12:13:14", 0, true).unwrap(), @@ -2570,8 +2576,12 @@ mod tests { 20000101121315, ), // FiXME - // Time::parse_utc_datetime("2000-01-01T12:13:14.6666", 4).unwrap().round_frac(DEFAULT_FSP) - // will get 2000-01-01T12:13:14, this is a bug + // ``` + // Time::parse_utc_datetime("2000-01-01T12:13:14.6666", 4) + // .unwrap() + // .round_frac(DEFAULT_FSP) + // ``` + // will get 2000-01-01T12:13:14, this is a bug // ( // Time::parse_utc_datetime("2000-01-01T12:13:14.6666", 4).unwrap(), // 20000101121315, @@ -2775,7 +2785,7 @@ mod tests { for (input, expected, fsp) in cases { let mut ctx = EvalContext::default(); let time = - Time::parse_timestamp(&mut ctx, input, MAX_FSP, /* Enable round*/ true).unwrap(); + Time::parse_timestamp(&mut ctx, input, MAX_FSP, /* Enable round */ true).unwrap(); let actual: Time = RpnFnScalarEvaluator::new() .push_param(time) @@ -3528,9 +3538,11 @@ mod tests { vec![ERR_TRUNCATE_WRONG_VALUE, ERR_DATA_OUT_OF_RANGE], ), // the case below has 3 warning - // 1. from getValidFloatPrefix, because of `-1234abc`'s `abc`, (ERR_TRUNCATE_WRONG_VALUE) - // 2. from ProduceFloatWithSpecifiedTp, because of TruncateFloat (ERR_DATA_OUT_OF_RANGE) - // 3. from ProduceFloatWithSpecifiedTp, because of unsigned but negative (ERR_DATA_OUT_OF_RANGE) + // - from getValidFloatPrefix, because of `-1234abc`'s `abc`, + // (ERR_TRUNCATE_WRONG_VALUE) + // - from ProduceFloatWithSpecifiedTp, because of TruncateFloat (ERR_DATA_OUT_OF_RANGE) + // - from ProduceFloatWithSpecifiedTp, because of unsigned but negative + // (ERR_DATA_OUT_OF_RANGE) ( String::from("-1234abc"), 0.0, @@ -3865,8 +3877,8 @@ mod tests { } /// base_cs: - /// vector of (T, T to bytes(without any other handle do by cast_as_string_helper), - /// T to string for debug output), + /// vector of (T, T to bytes(without any other handle do by + /// cast_as_string_helper), T to string for debug output), /// the object should not be zero len. #[allow(clippy::type_complexity)] fn test_as_string_helper( @@ -4627,8 +4639,8 @@ mod tests { // ( // origin, origin_flen, origin_decimal, res_flen, res_decimal, is_unsigned, // expect, warning_err_code, - // (InInsertStmt || InUpdateStmt || InDeleteStmt), overflow_as_warning, truncate_as_warning - // ) + // (InInsertStmt || InUpdateStmt || InDeleteStmt), overflow_as_warning, + // truncate_as_warning ) // // The origin_flen, origin_decimal here is // to let the programmer clearly know what the flen and decimal of the decimal is. @@ -4983,8 +4995,9 @@ mod tests { } // These test depend on the correctness of - // Decimal::from(u64), Decimal::from(i64), Decimal::from_f64(), Decimal::from_bytes() - // Decimal::zero(), Decimal::round, max_or_min_dec, max_decimal + // Decimal::from(u64), Decimal::from(i64), Decimal::from_f64(), + // Decimal::from_bytes() Decimal::zero(), Decimal::round, max_or_min_dec, + // max_decimal #[test] fn test_unsigned_int_as_signed_or_unsigned_decimal() { test_none_with_ctx_and_extra(cast_unsigned_int_as_signed_or_unsigned_decimal); @@ -6087,8 +6100,9 @@ mod tests { { // cast_real_as_duration call `Duration::parse`, directly, // and `Duration::parse`, is test in duration.rs. - // Our test here is to make sure that the result is same as calling `Duration::parse`, - // no matter whether call_real_as_duration call `Duration::parse`, directly. + // Our test here is to make sure that the result is same as calling + // `Duration::parse`, no matter whether call_real_as_duration call + // `Duration::parse`, directly. for val in base_cs { for fsp in MIN_FSP..=MAX_FSP { let mut ctx = CtxConfig { @@ -6756,7 +6770,8 @@ mod tests { // TODO: add more case for other TimeType let cs = vec![ - // Add time_type filed here is to make maintainer know clearly that what is the type of the time. + // Add time_type filed here is to make maintainer know clearly that what is the type of + // the time. ( Time::parse_datetime(&mut ctx, "2000-01-01T12:13:14", 0, true).unwrap(), TimeType::DateTime, diff --git a/components/tidb_query_expr/src/impl_compare_in.rs b/components/tidb_query_expr/src/impl_compare_in.rs index 03b5919b410..d518c9061a0 100644 --- a/components/tidb_query_expr/src/impl_compare_in.rs +++ b/components/tidb_query_expr/src/impl_compare_in.rs @@ -161,8 +161,8 @@ impl InByCompare for Int {} impl InByCompare for Real {} impl InByCompare for Decimal {} impl InByCompare for Duration {} -// DateTime requires TZInfo in context, and we cannot acquire it during metadata_mapper. -// TODO: implement InByHash for DateTime. +// DateTime requires TZInfo in context, and we cannot acquire it during +// metadata_mapper. TODO: implement InByHash for DateTime. impl InByCompare for DateTime {} #[derive(Debug)] diff --git a/components/tidb_query_expr/src/impl_encryption.rs b/components/tidb_query_expr/src/impl_encryption.rs index 9bf99d9f52a..3a51f798442 100644 --- a/components/tidb_query_expr/src/impl_encryption.rs +++ b/components/tidb_query_expr/src/impl_encryption.rs @@ -77,8 +77,9 @@ pub fn compress(input: BytesRef, writer: BytesWriter) -> Result { return Ok(writer.write_ref(Some(b""))); } let mut e = ZlibEncoder::new(input, Compression::default()); - // preferred capacity is input length plus four bytes length header and one extra end "." - // max capacity is isize::max_value(), or will panic with "capacity overflow" + // preferred capacity is input length plus four bytes length header and one + // extra end "." max capacity is isize::max_value(), or will panic with + // "capacity overflow" let mut vec = Vec::with_capacity((input.len() + 5).min(isize::max_value() as usize)); vec.resize(4, 0); LittleEndian::write_u32(&mut vec, input.len() as u32); @@ -116,10 +117,11 @@ pub fn uncompress( let mut d = ZlibDecoder::new(&input[4..]); let mut vec = Vec::with_capacity(len); - // if the length of uncompressed string is greater than the length we read from the first - // four bytes, return null and generate a length corrupted warning. - // if the length of uncompressed string is zero or uncompress fail, return null and generate - // a data corrupted warning + // - if the length of uncompressed string is greater than the length we read + // from the first four bytes, return null and generate a length corrupted + // warning. + // - if the length of uncompressed string is zero or uncompress fail, return + // null and generate a data corrupted warning match d.read_to_end(&mut vec) { match d.read_to_end(&mut vec) { Ok(decoded_len) if len >= decoded_len && decoded_len != 0 => { Ok(writer.write_ref(Some(vec.as_ref()))) @@ -458,7 +460,7 @@ mod tests { ); } - //test NULL case + // test NULL case assert!( RpnFnScalarEvaluator::new() .push_param(ScalarValue::Int(None)) diff --git a/components/tidb_query_expr/src/impl_json.rs b/components/tidb_query_expr/src/impl_json.rs index 0b42c953712..5e5595bd3ed 100644 --- a/components/tidb_query_expr/src/impl_json.rs +++ b/components/tidb_query_expr/src/impl_json.rs @@ -66,7 +66,8 @@ fn json_modify(args: &[ScalarValueRef], mt: ModifyType) -> Result> Ok(Some(base.as_ref().modify(&path_expr_list, values, mt)?)) } -/// validate the arguments are `(Option, &[(Option, Option)])` +/// validate the arguments are `(Option, &[(Option, +/// Option)])` fn json_modify_validator(expr: &tipb::Expr) -> Result<()> { let children = expr.get_children(); assert!(children.len() >= 2); diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index 798ca2b9c6a..80484c224c4 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -639,15 +639,22 @@ fn extract_num(num_s: &str, is_neg: bool, from_base: IntWithSign) -> IntWithSign } } -// Returns (isize, is_positive): convert an i64 to usize, and whether the input is positive +// Returns (isize, is_positive): convert an i64 to usize, and whether the input +// is positive // // # Examples // ``` // assert_eq!(i64_to_usize(1_i64, false), (1_usize, true)); // assert_eq!(i64_to_usize(1_i64, false), (1_usize, true)); // assert_eq!(i64_to_usize(-1_i64, false), (1_usize, false)); -// assert_eq!(i64_to_usize(u64::max_value() as i64, true), (u64::max_value() as usize, true)); -// assert_eq!(i64_to_usize(u64::max_value() as i64, false), (1_usize, false)); +// assert_eq!( +// i64_to_usize(u64::max_value() as i64, true), +// (u64::max_value() as usize, true) +// ); +// assert_eq!( +// i64_to_usize(u64::max_value() as i64, false), +// (1_usize, false) +// ); // ``` #[inline] pub fn i64_to_usize(i: i64, is_unsigned: bool) -> (usize, bool) { @@ -1272,7 +1279,8 @@ mod tests { (std::f64::consts::PI, 0.0_f64), ( (std::f64::consts::PI * 3.0) / 4.0, - f64::tan((std::f64::consts::PI * 3.0) / 4.0), //in mysql and rust, it equals -1.0000000000000002, not -1 + f64::tan((std::f64::consts::PI * 3.0) / 4.0), /* in mysql and rust, it equals + * -1.0000000000000002, not -1 */ ), ]; for (input, expect) in test_cases { diff --git a/components/tidb_query_expr/src/impl_op.rs b/components/tidb_query_expr/src/impl_op.rs index dce8920a545..5ecb4e9a7dc 100644 --- a/components/tidb_query_expr/src/impl_op.rs +++ b/components/tidb_query_expr/src/impl_op.rs @@ -29,7 +29,8 @@ pub fn logical_or(arg0: Option<&i64>, arg1: Option<&i64>) -> Result> #[rpn_fn(nullable)] #[inline] pub fn logical_xor(arg0: Option<&i64>, arg1: Option<&i64>) -> Result> { - // evaluates to 1 if an odd number of operands is nonzero, otherwise 0 is returned. + // evaluates to 1 if an odd number of operands is nonzero, otherwise 0 is + // returned. Ok(match (arg0, arg1) { (Some(arg0), Some(arg1)) => Some(((*arg0 == 0) ^ (*arg1 == 0)) as i64), _ => None, diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index c43b0ff2f1f..9ebba24ed43 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -16,10 +16,12 @@ const SPACE: u8 = 0o40u8; const MAX_BLOB_WIDTH: i32 = 16_777_216; // FIXME: Should be isize // see https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_to-base64 -// mysql base64 doc: A newline is added after each 76 characters of encoded output +// mysql base64 doc: A newline is added after each 76 characters of encoded +// output const BASE64_LINE_WRAP_LENGTH: usize = 76; -// mysql base64 doc: Each 3 bytes of the input data are encoded using 4 characters. +// mysql base64 doc: Each 3 bytes of the input data are encoded using 4 +// characters. const BASE64_INPUT_CHUNK_LENGTH: usize = 3; const BASE64_ENCODED_CHUNK_LENGTH: usize = 4; const BASE64_LINE_WRAP: u8 = b'\n'; @@ -379,11 +381,13 @@ pub fn rpad_utf8( } } -// when target_len is 0, return Some(0), means the pad function should return empty string -// currently there are three conditions it return None, which means pad function should return Null -// 1. target_len is negative -// 2. target_len of type in byte is larger then MAX_BLOB_WIDTH -// 3. target_len is greater than length of input string, *and* pad string is empty +// when target_len is 0, return Some(0), means the pad function should return +// empty string currently there are three conditions it return None, which means +// pad function should return Null +// - target_len is negative +// - target_len of type in byte is larger then MAX_BLOB_WIDTH +// - target_len is greater than length of input string, *and* pad string is +// empty // otherwise return Some(target_len) #[inline] fn validate_target_len_for_pad( diff --git a/components/tidb_query_expr/src/impl_time.rs b/components/tidb_query_expr/src/impl_time.rs index 5914740c8fa..80912fd6526 100644 --- a/components/tidb_query_expr/src/impl_time.rs +++ b/components/tidb_query_expr/src/impl_time.rs @@ -179,7 +179,8 @@ pub fn week_of_year(ctx: &mut EvalContext, t: Option<&DateTime>) -> Result 198652, here the first 4 digits represents year, and the last 2 digits represents week. +// e.g.: SELECT YEARWEEK('1987-01-01'); -- -> 198652, here the first 4 digits +// represents year, and the last 2 digits represents week. #[rpn_fn(capture = [ctx])] #[inline] pub fn year_week_with_mode(ctx: &mut EvalContext, t: &DateTime, mode: &Int) -> Result> { @@ -810,8 +811,8 @@ pub fn duration_duration_time_diff( ) -> Result> { let res = match arg1.checked_sub(*arg2) { Some(res) => res, - // `check_sub` returns `None` if the sub operation overflow/underflow i64 bound or mysql_time_value bound. - // and we need to treat these two case separately. + // `check_sub` returns `None` if the sub operation overflow/underflow i64 bound or + // mysql_time_value bound. and we need to treat these two case separately. // if `arg1 - arg2` is in (`MAX_NANOS`, `i64::MAX`], return max value of mysql `TIME` type. // if `arg1 - arg2` is in [`i64::MIN`, `-MAX_NANOS`), return min value of mysql `TIME` type. // if `arg1 - arg2` is overflow or underflow i64, return `None`. @@ -1075,23 +1076,26 @@ mod tests { assert_eq!(output, expect, "{:?} {:?}", date, format); } - // // TODO: pass this test after refactoring the issue #3953 is fixed. - // { - // let format: Option = Some("abc%b %M %m %c %D %d %e %j".as_bytes().to_vec()); - // let time: Option = Some( DateTime::parse_utc_datetime("0000-00-00 00:00:00", 6).unwrap()); - // - // let mut cfg = EvalConfig::new(); - // cfg.set_flag(Flag::IN_UPDATE_OR_DELETE_STMT) - // .set_sql_mode(SqlMode::NO_ZERO_DATE | SqlMode::STRICT_ALL_TABLES); - // let ctx = EvalContext::new(Arc::new(cfg)); - // - // let output = RpnFnScalarEvaluator::new() - // .context(ctx) - // .push_param(time.clone()) - // .push_param(format) - // .evaluate::(ScalarFuncSig::DateFormatSig); - // assert!(output.is_err()); - // } + // TODO: pass this test after refactoring the issue #3953 is fixed. + // { + // let format: Option = Some( + // "abc%b %M %m %c %D %d %e %j".as_bytes().to_vec()); + // let time: Option = + // Some(DateTime::parse_utc_datetime( + // "0000-00-00 00:00:00", 6).unwrap()); + + // let mut cfg = EvalConfig::new(); + // cfg.set_flag(Flag::IN_UPDATE_OR_DELETE_STMT) + // .set_sql_mode(SqlMode::NO_ZERO_DATE | SqlMode::STRICT_ALL_TABLES); + // let ctx = EvalContext::new(Arc::new(cfg)); + + // let output = RpnFnScalarEvaluator::new() + // .context(ctx) + // .push_param(time.clone()) + // .push_param(format) + // .evaluate::(ScalarFuncSig::DateFormatSig); + // assert!(output.is_err()); + // } { let mut cfg = EvalConfig::new(); @@ -1868,8 +1872,10 @@ mod tests { #[test] fn test_from_days() { let cases = vec![ - (ScalarValue::Int(Some(-140)), Some("0000-00-00")), // mysql FROM_DAYS returns 0000-00-00 for any day <= 365. - (ScalarValue::Int(Some(140)), Some("0000-00-00")), // mysql FROM_DAYS returns 0000-00-00 for any day <= 365. + (ScalarValue::Int(Some(-140)), Some("0000-00-00")), /* mysql FROM_DAYS returns + * 0000-00-00 for any day <= + * 365. */ + (ScalarValue::Int(Some(140)), Some("0000-00-00")), /* mysql FROM_DAYS returns 0000-00-00 for any day <= 365. */ (ScalarValue::Int(Some(735_000)), Some("2012-05-12")), // Leap year. (ScalarValue::Int(Some(735_030)), Some("2012-06-11")), (ScalarValue::Int(Some(735_130)), Some("2012-09-19")), @@ -1882,7 +1888,9 @@ mod tests { (ScalarValue::Int(Some(734_544)), Some("2011-02-11")), (ScalarValue::Int(Some(734_513)), Some("2011-01-11")), (ScalarValue::Int(Some(3_652_424)), Some("9999-12-31")), - (ScalarValue::Int(Some(3_652_425)), Some("0000-00-00")), // mysql FROM_DAYS returns 0000-00-00 for any day >= 3652425 + (ScalarValue::Int(Some(3_652_425)), Some("0000-00-00")), /* mysql FROM_DAYS returns + * 0000-00-00 for any day + * >= 3652425 */ (ScalarValue::Int(None), None), ]; let mut ctx = EvalContext::default(); diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index eec5bdad844..679d4e003f8 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -1,11 +1,12 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -//! This crate implements a simple SQL query engine to work with TiDB pushed down executors. +//! This crate implements a simple SQL query engine to work with TiDB pushed +//! down executors. //! -//! The query engine is able to scan and understand rows stored by TiDB, run against a -//! series of executors and then return the execution result. The query engine is provided via -//! TiKV Coprocessor interface. However standalone UDF functions are also exported and can be used -//! standalone. +//! The query engine is able to scan and understand rows stored by TiDB, run +//! against a series of executors and then return the execution result. The +//! query engine is provided via TiKV Coprocessor interface. However standalone +//! UDF functions are also exported and can be used standalone. #![allow(elided_lifetimes_in_paths)] // Necessary until rpn_fn accepts functions annotated with lifetimes. #![allow(incomplete_features)] @@ -141,7 +142,8 @@ fn map_int_sig(value: ScalarFuncSig, children: &[Expr], mapper: F) -> Result< where F: Fn(bool, bool) -> RpnFnMeta, { - // FIXME: The signature for different signed / unsigned int should be inferred at TiDB side. + // FIXME: The signature for different signed / unsigned int should be inferred + // at TiDB side. if children.len() != 2 { return Err(other_err!( "ScalarFunction {:?} (params = {}) is not supported in batch mode", @@ -220,7 +222,8 @@ fn map_rhs_int_sig(value: ScalarFuncSig, children: &[Expr], mapper: F) -> Res where F: Fn(bool) -> RpnFnMeta, { - // FIXME: The signature for different signed / unsigned int should be inferred at TiDB side. + // FIXME: The signature for different signed / unsigned int should be inferred + // at TiDB side. if children.len() != 2 { return Err(other_err!( "ScalarFunction {:?} (params = {}) is not supported in batch mode", diff --git a/components/tidb_query_expr/src/types/expr.rs b/components/tidb_query_expr/src/types/expr.rs index b94c17f8cdf..26689e762ff 100644 --- a/components/tidb_query_expr/src/types/expr.rs +++ b/components/tidb_query_expr/src/types/expr.rs @@ -24,7 +24,8 @@ pub enum RpnExpressionNode { field_type: FieldType, }, - /// Represents a reference to a column in the columns specified in evaluation. + /// Represents a reference to a column in the columns specified in + /// evaluation. ColumnRef { offset: usize }, } @@ -80,7 +81,8 @@ impl RpnExpressionNode { } } -/// An expression in Reverse Polish notation, which is simply a list of RPN expression nodes. +/// An expression in Reverse Polish notation, which is simply a list of RPN +/// expression nodes. /// /// You may want to build it using `RpnExpressionBuilder`. #[derive(Debug)] diff --git a/components/tidb_query_expr/src/types/expr_builder.rs b/components/tidb_query_expr/src/types/expr_builder.rs index d6c8aebb0c1..33c9d48de67 100644 --- a/components/tidb_query_expr/src/types/expr_builder.rs +++ b/components/tidb_query_expr/src/types/expr_builder.rs @@ -26,9 +26,9 @@ pub struct RpnExpressionBuilder(Vec); impl RpnExpressionBuilder { /// Checks whether the given expression definition tree is supported. pub fn check_expr_tree_supported(c: &Expr) -> Result<()> { - // TODO: This logic relies on the correctness of the passed in GROUP BY eval type. However - // it can be different from the one we calculated (e.g. pass a column / fn with different - // type). + // TODO: This logic relies on the correctness of the passed in GROUP BY eval + // type. However it can be different from the one we calculated (e.g. + // pass a column / fn with different type). box_try!(EvalType::try_from(c.get_field_type().as_accessor().tp())); match c.get_tp() { @@ -54,8 +54,8 @@ impl RpnExpressionBuilder { Ok(()) } - /// Gets the result type when expression tree is converted to RPN expression and evaluated. - /// The result type will be either scalar or vector. + /// Gets the result type when expression tree is converted to RPN expression + /// and evaluated. The result type will be either scalar or vector. pub fn is_expr_eval_to_scalar(c: &Expr) -> Result { match c.get_tp() { ExprType::Null @@ -157,8 +157,9 @@ impl RpnExpressionBuilder { self } - /// Pushes a `Constant` node. The field type will be auto inferred by choosing an arbitrary - /// field type that matches the field type of the given value. + /// Pushes a `Constant` node. The field type will be auto inferred by + /// choosing an arbitrary field type that matches the field type of the + /// given value. #[must_use] pub fn push_constant_for_test(mut self, value: impl Into) -> Self { let value = value.into(); @@ -241,8 +242,8 @@ impl AsRef<[RpnExpressionNode]> for RpnExpressionBuilder { /// B E F G C D A /// ``` /// -/// The transform process is very much like a post-order traversal. This function does it -/// recursively. +/// The transform process is very much like a post-order traversal. This +/// function does it recursively. fn append_rpn_nodes_recursively( tree_node: Expr, rpn_nodes: &mut Vec, @@ -315,7 +316,8 @@ where let args: Vec<_> = tree_node.take_children().into(); let args_len = args.len(); - // Visit children first, then push current node, so that it is a post-order traversal. + // Visit children first, then push current node, so that it is a post-order + // traversal. for arg in args { append_rpn_nodes_recursively(arg, rpn_nodes, ctx, fn_mapper, max_columns)?; } @@ -550,9 +552,9 @@ mod tests { unreachable!() } - /// For testing `append_rpn_nodes_recursively`. It accepts protobuf function sig enum, which - /// cannot be modified by us in tests to support fn_a ~ fn_d. So let's just hard code some - /// substitute. + /// For testing `append_rpn_nodes_recursively`. It accepts protobuf function + /// sig enum, which cannot be modified by us in tests to support fn_a ~ + /// fn_d. So let's just hard code some substitute. fn fn_mapper(expr: &Expr) -> Result { // fn_a: CastIntAsInt // fn_b: CastIntAsReal diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index c8f9083f64f..2ba3b030ef0 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -22,8 +22,8 @@ use super::{ /// /// It can be either an owned node or a reference node. /// -/// When node comes from a column reference, it is a reference node (both value and field_type -/// are references). +/// When node comes from a column reference, it is a reference node (both value +/// and field_type are references). /// /// When nodes comes from an evaluated result, it is an owned node. #[derive(Debug)] @@ -64,17 +64,20 @@ impl<'a> RpnStackNodeVectorValue<'a> { } } -/// A type for each node in the RPN evaluation stack. It can be one of a scalar value node or a -/// vector value node. The vector value node can be either an owned vector value or a reference. +/// A type for each node in the RPN evaluation stack. It can be one of a scalar +/// value node or a vector value node. The vector value node can be either an +/// owned vector value or a reference. #[derive(Debug)] pub enum RpnStackNode<'a> { - /// Represents a scalar value. Comes from a constant node in expression list. + /// Represents a scalar value. Comes from a constant node in expression + /// list. Scalar { value: &'a ScalarValue, field_type: &'a FieldType, }, - /// Represents a vector value. Comes from a column reference or evaluated result. + /// Represents a vector value. Comes from a column reference or evaluated + /// result. Vector { value: RpnStackNodeVectorValue<'a>, field_type: &'a FieldType, @@ -123,7 +126,8 @@ impl<'a> RpnStackNode<'a> { /// Gets a reference of the element by logical index. /// - /// If this is a `Scalar` variant, the returned reference will be the same for any index. + /// If this is a `Scalar` variant, the returned reference will be the same + /// for any index. /// /// # Panics /// @@ -145,13 +149,15 @@ impl<'a> RpnStackNode<'a> { impl RpnExpression { /// Evaluates the expression into a vector. /// - /// If referred columns are not decoded, they will be decoded according to the given schema. + /// If referred columns are not decoded, they will be decoded according to + /// the given schema. /// /// # Panics /// /// Panics if the expression is not valid. /// - /// Panics when referenced column does not have equal length as specified in `rows`. + /// Panics when referenced column does not have equal length as specified in + /// `rows`. pub fn eval<'a>( &'a self, ctx: &mut EvalContext, @@ -160,9 +166,10 @@ impl RpnExpression { input_logical_rows: &'a [usize], output_rows: usize, ) -> Result> { - // We iterate two times. The first time we decode all referred columns. The second time - // we evaluate. This is to make Rust's borrow checker happy because there will be - // mutable reference during the first iteration and we can't keep these references. + // We iterate two times. The first time we decode all referred columns. The + // second time we evaluate. This is to make Rust's borrow checker happy + // because there will be mutable reference during the first iteration + // and we can't keep these references. self.ensure_columns_decoded(ctx, schema, input_physical_columns, input_logical_rows)?; self.eval_decoded( ctx, @@ -194,11 +201,13 @@ impl RpnExpression { Ok(()) } - /// Evaluates the expression into a stack node. The input columns must be already decoded. + /// Evaluates the expression into a stack node. The input columns must be + /// already decoded. /// - /// It differs from `eval` in that `eval_decoded` needn't receive a mutable reference - /// to `LazyBatchColumnVec`. However, since `eval_decoded` doesn't decode columns, - /// it will panic if referred columns are not decoded. + /// It differs from `eval` in that `eval_decoded` needn't receive a mutable + /// reference to `LazyBatchColumnVec`. However, since `eval_decoded` + /// doesn't decode columns, it will panic if referred columns are not + /// decoded. /// /// # Panics /// @@ -206,7 +215,8 @@ impl RpnExpression { /// /// Panics if referred columns are not decoded. /// - /// Panics when referenced column does not have equal length as specified in `rows`. + /// Panics when referenced column does not have equal length as specified in + /// `rows`. pub fn eval_decoded<'a>( &'a self, ctx: &mut EvalContext, @@ -400,7 +410,8 @@ mod tests { assert_eq!(val.field_type().as_accessor().tp(), FieldTypeTp::Double); } - /// Single column node but row numbers in `eval()` does not match column length, should panic. + /// Single column node but row numbers in `eval()` does not match column + /// length, should panic. #[test] fn test_eval_single_column_node_mismatch_rows() { let (columns, logical_rows, schema) = new_single_column_node_fixture(); @@ -725,8 +736,8 @@ mod tests { assert_eq!(val.field_type().as_accessor().tp(), FieldTypeTp::LongLong); } - /// Binary function (arguments are both raw columns). The same column is referred multiple times - /// and it should be Ok. + /// Binary function (arguments are both raw columns). The same column is + /// referred multiple times and it should be Ok. #[test] fn test_eval_binary_function_raw_column() { /// foo(v1, v2) performs v1 * v2. diff --git a/components/tidb_query_expr/src/types/function.rs b/components/tidb_query_expr/src/types/function.rs index e657b9fe262..dee74d2a434 100644 --- a/components/tidb_query_expr/src/types/function.rs +++ b/components/tidb_query_expr/src/types/function.rs @@ -1,8 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! People implementing RPN functions with fixed argument type and count don't necessarily -//! need to understand how `Evaluator` and `RpnDef` work. There's a procedural macro called -//! `rpn_fn` defined in `tidb_query_codegen` to help you create RPN functions. For example: +//! People implementing RPN functions with fixed argument type and count don't +//! necessarily need to understand how `Evaluator` and `RpnDef` work. There's a +//! procedural macro called `rpn_fn` defined in `tidb_query_codegen` to help you +//! create RPN functions. For example: //! //! ```ignore //! use tidb_query_codegen::rpn_fn; @@ -13,9 +14,10 @@ //! } //! ``` //! -//! You can still call the `foo` function directly; the macro preserves the original function -//! It creates a `foo_fn_meta()` function (simply add `_fn_meta` to the original -//! function name) which generates an `RpnFnMeta` struct. +//! You can still call the `foo` function directly; the macro preserves the +//! original function It creates a `foo_fn_meta()` function (simply add +//! `_fn_meta` to the original function name) which generates an `RpnFnMeta` +//! struct. //! //! For more information on the procedural macro, see the documentation in //! `components/tidb_query_codegen/src/rpn_function`. @@ -96,7 +98,8 @@ impl<'a, T: EvaluableRef<'a>> ScalarArg<'a, T> { impl<'a, T: EvaluableRef<'a>> RpnFnArg for ScalarArg<'a, T> { type Type = Option; - /// Gets the value in the given row. All rows of a `ScalarArg` share the same value. + /// Gets the value in the given row. All rows of a `ScalarArg` share the + /// same value. #[inline] fn get(&self, _row: usize) -> Option { self.0.clone() @@ -137,17 +140,19 @@ impl<'a, T: EvaluableRef<'a>, C: 'a + ChunkRef<'a, T>> RpnFnArg for VectorArg<'a /// Partial or complete argument definition of an RPN function. /// -/// `ArgDef` is constructed at the beginning of evaluating an RPN function. The types of -/// `RpnFnArg`s are determined at this stage. So there won't be dynamic dispatch or enum matches -/// when the function is applied to each row of the input. +/// `ArgDef` is constructed at the beginning of evaluating an RPN function. The +/// types of `RpnFnArg`s are determined at this stage. So there won't be dynamic +/// dispatch or enum matches when the function is applied to each row of the +/// input. pub trait ArgDef: std::fmt::Debug {} /// RPN function argument definitions in the form of a linked list. /// -/// For example, if an RPN function foo(Int, Real, Decimal) is applied to input of a scalar of -/// integer, a vector of reals and a vector of decimals, the constructed `ArgDef` will be -/// `Arg, Arg, Arg, Null>>>`. `Null` -/// indicates the end of the argument list. +/// For example, if an RPN function foo(Int, Real, Decimal) is applied to input +/// of a scalar of integer, a vector of reals and a vector of decimals, the +/// constructed `ArgDef` will be `Arg, Arg, +/// Arg, Null>>>`. `Null` indicates the end of the argument +/// list. #[derive(Debug)] pub struct Arg { arg: A, @@ -157,8 +162,8 @@ pub struct Arg { impl ArgDef for Arg {} impl Arg { - /// Gets the value of the head argument in the given row and returns the remaining argument - /// list. + /// Gets the value of the head argument in the given row and returns the + /// remaining argument list. #[inline] pub fn extract(&self, row: usize) -> (A::Type, &Rem) { (self.arg.get(row), &self.rem) @@ -179,16 +184,18 @@ impl ArgDef for Null {} /// A generic evaluator of an RPN function. /// -/// For every RPN function, the evaluator should be created first. Then, call its `eval` method -/// with the input to get the result vector. +/// For every RPN function, the evaluator should be created first. Then, call +/// its `eval` method with the input to get the result vector. /// /// There are two kinds of evaluators in general: -/// - `ArgConstructor`: It's a provided `Evaluator`. It is used in the `rpn_fn` attribute macro -/// to generate the `ArgDef`. The `def` parameter of its eval method is the already constructed -/// `ArgDef`. If it is the outmost evaluator, `def` should be `Null`. -/// - Custom evaluators which do the actual execution of the RPN function. The `def` parameter of -/// its eval method is the constructed `ArgDef`. Implementors can then extract values from the -/// arguments, execute the RPN function and fill the result vector. +/// - `ArgConstructor`: It's a provided `Evaluator`. It is used in the `rpn_fn` +/// attribute macro to generate the `ArgDef`. The `def` parameter of its eval +/// method is the already constructed `ArgDef`. If it is the outmost +/// evaluator, `def` should be `Null`. +/// - Custom evaluators which do the actual execution of the RPN function. The +/// `def` parameter of its eval method is the constructed `ArgDef`. +/// Implementors can then extract values from the arguments, execute the RPN +/// function and fill the result vector. pub trait Evaluator<'a> { fn eval( self, @@ -271,7 +278,8 @@ pub fn validate_expr_return_type(expr: &Expr, et: EvalType) -> Result<()> { } } -/// Validates whether the number of arguments of an expression node meets expectation. +/// Validates whether the number of arguments of an expression node meets +/// expectation. pub fn validate_expr_arguments_eq(expr: &Expr, args: usize) -> Result<()> { let received_args = expr.get_children().len(); if received_args == args { @@ -285,7 +293,8 @@ pub fn validate_expr_arguments_eq(expr: &Expr, args: usize) -> Result<()> { } } -/// Validates whether the number of arguments of an expression node >= expectation. +/// Validates whether the number of arguments of an expression node >= +/// expectation. pub fn validate_expr_arguments_gte(expr: &Expr, args: usize) -> Result<()> { let received_args = expr.get_children().len(); if received_args >= args { @@ -299,7 +308,8 @@ pub fn validate_expr_arguments_gte(expr: &Expr, args: usize) -> Result<()> { } } -/// Validates whether the number of arguments of an expression node <= expectation. +/// Validates whether the number of arguments of an expression node <= +/// expectation. pub fn validate_expr_arguments_lte(expr: &Expr, args: usize) -> Result<()> { let received_args = expr.get_children().len(); if received_args <= args { diff --git a/components/tidb_query_expr/src/types/test_util.rs b/components/tidb_query_expr/src/types/test_util.rs index e1f44b6553d..88ec11debc6 100644 --- a/components/tidb_query_expr/src/types/test_util.rs +++ b/components/tidb_query_expr/src/types/test_util.rs @@ -16,7 +16,8 @@ use crate::{types::function::RpnFnMeta, RpnExpressionBuilder}; /// Helper utility to evaluate RPN function over scalar inputs. /// -/// This structure should be only useful in tests because it is not very efficient. +/// This structure should be only useful in tests because it is not very +/// efficient. pub struct RpnFnScalarEvaluator { rpn_expr_builder: RpnExpressionBuilder, return_field_type: Option, @@ -35,9 +36,9 @@ impl RpnFnScalarEvaluator { } } - /// Pushes a parameter as the value of an argument for evaluation. The field type will be auto - /// inferred by choosing an arbitrary field type that matches the field type of the given - /// value. + /// Pushes a parameter as the value of an argument for evaluation. The field + /// type will be auto inferred by choosing an arbitrary field type that + /// matches the field type of the given value. #[must_use] pub fn push_param(mut self, value: impl Into) -> Self { self.rpn_expr_builder = self.rpn_expr_builder.push_constant_for_test(value); @@ -52,7 +53,8 @@ impl RpnFnScalarEvaluator { self } - /// Pushes a parameter as the value of an argument for evaluation using a specified field type. + /// Pushes a parameter as the value of an argument for evaluation using a + /// specified field type. #[must_use] pub fn push_param_with_field_type( mut self, @@ -67,8 +69,9 @@ impl RpnFnScalarEvaluator { /// Sets the return field type. /// - /// If not set, the evaluation will use an inferred return field type by choosing an arbitrary - /// field type that matches the field type of the generic type `T` when calling `evaluate()`. + /// If not set, the evaluation will use an inferred return field type by + /// choosing an arbitrary field type that matches the field type of the + /// generic type `T` when calling `evaluate()`. #[must_use] pub fn return_field_type(mut self, field_type: impl Into) -> Self { self.return_field_type = Some(field_type.into()); @@ -93,10 +96,11 @@ impl RpnFnScalarEvaluator { /// Evaluates the given function. /// - /// Note that this function does not respect previous `return_field_type()` call. + /// Note that this function does not respect previous `return_field_type()` + /// call. /// - /// This function exposes low-level evaluate results. Prefer to use `evaluate()` instead for - /// normal use case. + /// This function exposes low-level evaluate results. Prefer to use + /// `evaluate()` instead for normal use case. pub fn evaluate_raw( self, ret_field_type: impl Into, @@ -107,7 +111,8 @@ impl RpnFnScalarEvaluator { None => EvalContext::default(), }; - // Children expr descriptors are needed to map the signature into the actual function impl. + // Children expr descriptors are needed to map the signature into the actual + // function impl. let children_ed: Vec<_> = self .rpn_expr_builder .as_ref() diff --git a/components/tikv_alloc/src/error.rs b/components/tikv_alloc/src/error.rs index 68c5338ab7e..c098a387c2e 100644 --- a/components/tikv_alloc/src/error.rs +++ b/components/tikv_alloc/src/error.rs @@ -7,7 +7,8 @@ pub enum ProfError { MemProfilingNotEnabled, IOError(std::io::Error), JemallocError(String), - PathEncodingError(std::ffi::OsString), // When temp files are in a non-unicode directory, OsString.into_string() will cause this error, + PathEncodingError(std::ffi::OsString), /* When temp files are in a non-unicode directory, + * OsString.into_string() will cause this error, */ PathWithNulError(std::ffi::NulError), } diff --git a/components/tikv_alloc/src/lib.rs b/components/tikv_alloc/src/lib.rs index 1435ca2bbd0..507a1195a38 100644 --- a/components/tikv_alloc/src/lib.rs +++ b/components/tikv_alloc/src/lib.rs @@ -26,8 +26,7 @@ //! //! This crate accepts five cargo features: //! -//! - mem-profiling - compiles jemalloc and this crate with profiling -//! capability +//! - mem-profiling - compiles jemalloc and this crate with profiling capability //! //! - jemalloc - compiles tikv-jemallocator (default) //! @@ -134,8 +133,9 @@ mod runner { extern crate test; use test::*; - /// Check for ignored test cases with ignore message "#ifdef ". The test - /// case will be enabled if the specific environment variable is set. + /// Check for ignored test cases with ignore message "#ifdef ". + /// The test case will be enabled if the specific environment variable + /// is set. pub fn run_env_conditional_tests(cases: &[&TestDescAndFn]) { let cases: Vec<_> = cases .iter() diff --git a/components/tikv_alloc/src/trace.rs b/components/tikv_alloc/src/trace.rs index a55988450ee..f58bf31fd06 100644 --- a/components/tikv_alloc/src/trace.rs +++ b/components/tikv_alloc/src/trace.rs @@ -1,20 +1,22 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -//! This module provides some utilities to define the tree hierarchy to trace memory. +//! This module provides some utilities to define the tree hierarchy to trace +//! memory. //! -//! A memory trace is a tree that records how much memory its children and itself -//! uses, It doesn't need to match any function stacktrace, instead it should -//! have logically meaningful layout. +//! A memory trace is a tree that records how much memory its children and +//! itself uses, It doesn't need to match any function stacktrace, instead it +//! should have logically meaningful layout. //! -//! For example, memory usage should be divided into several components under the -//! root scope: TiDB EndPoint, Transaction, Raft, gRPC etc. TiDB EndPoint can divide -//! its children by queries, while Raft can divide memory by store and apply. Name -//! are defined as number for better performance. In practice, it can be mapped to -//! enumerates instead. +//! For example, memory usage should be divided into several components under +//! the root scope: TiDB EndPoint, Transaction, Raft, gRPC etc. TiDB EndPoint +//! can divide its children by queries, while Raft can divide memory by store +//! and apply. Name are defined as number for better performance. In practice, +//! it can be mapped to enumerates instead. //! -//! To define a memory trace tree, we can use the `mem_trace` macro. The `mem_trace` -//! macro constructs every node as a `MemoryTrace` which implements `MemoryTrace` trait. -//! We can also define a specified tree node by implementing `MemoryTrace` trait. +//! To define a memory trace tree, we can use the `mem_trace` macro. The +//! `mem_trace` macro constructs every node as a `MemoryTrace` which implements +//! `MemoryTrace` trait. We can also define a specified tree node by +//! implementing `MemoryTrace` trait. use std::{ fmt::{self, Debug, Display, Formatter}, diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index 9557f945034..b80c32e7088 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -24,9 +24,10 @@ use crate::{ type RwLockTree = RwLock>; -/// The BTreeEngine(based on `BTreeMap`) is in memory and only used in tests and benchmarks. -/// Note: The `snapshot()` and `async_snapshot()` methods are fake, the returned snapshot is not isolated, -/// they will be affected by the later modifies. +/// The BTreeEngine(based on `BTreeMap`) is in memory and only used in tests and +/// benchmarks. Note: The `snapshot()` and `async_snapshot()` methods are fake, +/// the returned snapshot is not isolated, they will be affected by the later +/// modifies. #[derive(Clone)] pub struct BTreeEngine { cf_names: Vec, @@ -102,7 +103,8 @@ impl Engine for BTreeEngine { Ok(()) } - /// warning: It returns a fake snapshot whose content will be affected by the later modifies! + /// warning: It returns a fake snapshot whose content will be affected by + /// the later modifies! fn async_snapshot( &self, _ctx: SnapContext<'_>, @@ -155,9 +157,10 @@ impl BTreeEngineIterator { } } - /// In general, there are 2 endpoints in a range, the left one and the right one. - /// This method will seek to the left one if left is `true`, else seek to the right one. - /// Returns true when the endpoint is valid, which means the endpoint exist and in `self.bounds`. + /// In general, there are 2 endpoints in a range, the left one and the right + /// one. This method will seek to the left one if left is `true`, else seek + /// to the right one. Returns true when the endpoint is valid, which means + /// the endpoint exist and in `self.bounds`. fn seek_to_range_endpoint(&mut self, range: (Bound, Bound), left: bool) -> bool { let tree = self.tree.read().unwrap(); let mut range = tree.range(range); diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 44437e60f4c..995f2ed0e21 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -63,13 +63,15 @@ impl Cursor { self.cur_value_has_read.set(false); } - /// Mark key as read. Returns whether key was marked as read before this call. + /// Mark key as read. Returns whether key was marked as read before this + /// call. #[inline] fn mark_key_read(&self) -> bool { self.cur_key_has_read.replace(true) } - /// Mark value as read. Returns whether value was marked as read before this call. + /// Mark value as read. Returns whether value was marked as read before this + /// call. #[inline] fn mark_value_read(&self) -> bool { self.cur_value_has_read.replace(true) @@ -148,7 +150,8 @@ impl Cursor { } } else if self.prefix_seek { // When prefixed seek and prefix_same_as_start enabled - // seek_to_first may return false due to no key's prefix is same as iter lower bound's + // seek_to_first may return false due to no key's prefix is same as iter lower + // bound's return self.seek(key, statistics); } else { assert!(self.seek_to_first(statistics)); @@ -375,9 +378,9 @@ impl Cursor { } #[inline] - // As Rocksdb described, if Iterator::Valid() is false, there are two possibilities: - // (1) We reached the end of the data. In this case, status() is OK(); - // (2) there is an error. In this case status() is not OK(). + // As Rocksdb described, if Iterator::Valid() is false, there are two + // possibilities: (1) We reached the end of the data. In this case, status() + // is OK(); (2) there is an error. In this case status() is not OK(). // So check status when iterator is invalidated. pub fn valid(&self) -> Result { match self.iter.valid() { @@ -418,7 +421,8 @@ impl Cursor { } } -/// A handy utility to build a snapshot cursor according to various configurations. +/// A handy utility to build a snapshot cursor according to various +/// configurations. pub struct CursorBuilder<'a, S: Snapshot> { snapshot: &'a S, cf: CfName, @@ -555,7 +559,8 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { iter_opt.set_key_only(self.key_only); iter_opt.set_max_skippable_internal_keys(self.max_skippable_internal_keys); - // prefix_seek is only used for single key, so set prefix_same_as_start for safety. + // prefix_seek is only used for single key, so set prefix_same_as_start for + // safety. if self.prefix_seek { iter_opt.use_prefix_seek(); iter_opt.set_prefix_same_as_start(true); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index c96d996dc5c..e26318d7b4e 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -1,7 +1,8 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -//! There are multiple [`Engine`](kv::Engine) implementations, [`RaftKv`](crate::server::raftkv::RaftKv) -//! is used by the [`Server`](crate::server::Server). The [`BTreeEngine`](kv::BTreeEngine) and +//! There are multiple [`Engine`](kv::Engine) implementations, +//! [`RaftKv`](crate::server::raftkv::RaftKv) is used by the +//! [`Server`](crate::server::Server). The [`BTreeEngine`](kv::BTreeEngine) and //! [`RocksEngine`](RocksEngine) are used for testing only. #![feature(min_specialization)] @@ -150,7 +151,8 @@ impl From for raft_cmdpb::Request { } // For test purpose only. -// It's used to simulate observer actions in `rocksdb_engine`. See `RocksEngine::async_write_ext()`. +// It's used to simulate observer actions in `rocksdb_engine`. See +// `RocksEngine::async_write_ext()`. impl From for Modify { fn from(mut req: raft_cmdpb::Request) -> Modify { let name_to_cf = |name: &str| -> Option { @@ -280,8 +282,8 @@ pub trait Engine: Send + Clone + 'static { /// Writes data to the engine asynchronously with some extensions. /// - /// When the write request is proposed successfully, the `proposed_cb` is invoked. - /// When the write request is finished, the `write_cb` is invoked. + /// When the write request is proposed successfully, the `proposed_cb` is + /// invoked. When the write request is finished, the `write_cb` is invoked. fn async_write_ext( &self, ctx: &Context, @@ -341,10 +343,12 @@ pub trait Engine: Send + Clone + 'static { fn schedule_txn_extra(&self, _txn_extra: TxnExtra) {} } -/// A Snapshot is a consistent view of the underlying engine at a given point in time. +/// A Snapshot is a consistent view of the underlying engine at a given point in +/// time. /// -/// Note that this is not an MVCC snapshot, that is a higher level abstraction of a view of TiKV -/// at a specific timestamp. This snapshot is lower-level, a view of the underlying storage. +/// Note that this is not an MVCC snapshot, that is a higher level abstraction +/// of a view of TiKV at a specific timestamp. This snapshot is lower-level, a +/// view of the underlying storage. pub trait Snapshot: Sync + Send + Clone { type Iter: Iterator; type Ext<'a>: SnapshotExt @@ -357,7 +361,8 @@ pub trait Snapshot: Sync + Send + Clone { /// Get the value associated with `key` in `cf` column family fn get_cf(&self, cf: CfName, key: &Key) -> Result>; - /// Get the value associated with `key` in `cf` column family, with Options in `opts` + /// Get the value associated with `key` in `cf` column family, with Options + /// in `opts` fn get_cf_opt(&self, opts: ReadOptions, cf: CfName, key: &Key) -> Result>; fn iter(&self, cf: CfName, iter_opt: IterOptions) -> Result; // The minimum key this snapshot can retrieve. @@ -365,7 +370,8 @@ pub trait Snapshot: Sync + Send + Clone { fn lower_bound(&self) -> Option<&[u8]> { None } - // The maximum key can be fetched from the snapshot should less than the upper bound. + // The maximum key can be fetched from the snapshot should less than the upper + // bound. #[inline] fn upper_bound(&self) -> Option<&[u8]> { None @@ -375,8 +381,9 @@ pub trait Snapshot: Sync + Send + Clone { } pub trait SnapshotExt { - /// Retrieves a version that represents the modification status of the underlying data. - /// Version should be changed when underlying data is changed. + /// Retrieves a version that represents the modification status of the + /// underlying data. Version should be changed when underlying data is + /// changed. /// /// If the engine does not support data version, then `None` is returned. fn get_data_version(&self) -> Option { @@ -533,8 +540,8 @@ where /// /// Postcondition: `TLS_ENGINE_ANY` is non-null. pub fn set_tls_engine(engine: E) { - // Safety: we check that `TLS_ENGINE_ANY` is null to ensure we don't leak an existing - // engine; we ensure there are no other references to `engine`. + // Safety: we check that `TLS_ENGINE_ANY` is null to ensure we don't leak an + // existing engine; we ensure there are no other references to `engine`. TLS_ENGINE_ANY.with(move |e| unsafe { if (*e.get()).is_null() { let engine = Box::into_raw(Box::new(engine)) as *mut (); @@ -552,8 +559,9 @@ pub fn set_tls_engine(engine: E) { /// The current tls engine must have the same type as `E` (or at least /// there destructors must be compatible). pub unsafe fn destroy_tls_engine() { - // Safety: we check that `TLS_ENGINE_ANY` is non-null, we must ensure that references - // to `TLS_ENGINE_ANY` can never be stored outside of `TLS_ENGINE_ANY`. + // Safety: we check that `TLS_ENGINE_ANY` is non-null, we must ensure that + // references to `TLS_ENGINE_ANY` can never be stored outside of + // `TLS_ENGINE_ANY`. TLS_ENGINE_ANY.with(|e| { let ptr = *e.get(); if !ptr.is_null() { @@ -856,7 +864,8 @@ pub mod tests { .near_seek(&Key::from_raw(b"z\x00"), &mut statistics) .unwrap() ); - // Insert many key-values between 'x' and 'z' then near_seek will fallback to seek. + // Insert many key-values between 'x' and 'z' then near_seek will fallback to + // seek. for i in 0..super::SEEK_BOUND { let key = format!("y{}", i); must_put(engine, key.as_bytes(), b"3"); @@ -945,7 +954,8 @@ pub mod tests { ForPrev, } - // use step to control the distance between target key and current key in cursor. + // use step to control the distance between target key and current key in + // cursor. fn test_linear_seek( snapshot: &S, mode: ScanMode, diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index 3851f5148f4..bec883c1f71 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -81,7 +81,8 @@ impl ExpectedWrite { } } -/// `ExpectedWriteList` represents a list of writes expected to write to the engine +/// `ExpectedWriteList` represents a list of writes expected to write to the +/// engine struct ExpectedWriteList(Mutex>); // We implement drop here instead of on MockEngine diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index f0331403725..0cc90730acd 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -104,7 +104,7 @@ impl RocksEngine { let worker = Worker::new("engine-rocksdb"); let mut db_opts = db_opts.unwrap_or_default(); if io_rate_limiter.is_some() { - db_opts.set_env(get_env(None /*key_manager*/, io_rate_limiter).unwrap()); + db_opts.set_env(get_env(None /* key_manager */, io_rate_limiter).unwrap()); } let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; @@ -151,7 +151,8 @@ impl RocksEngine { } /// `pre_propose` is called before propose. - /// It's used to trigger "pre_propose_query" observers for RawKV API V2 by now. + /// It's used to trigger "pre_propose_query" observers for RawKV API V2 by + /// now. fn pre_propose(&self, mut batch: WriteData) -> Result { let requests = batch .modifies diff --git a/components/tikv_util/src/buffer_vec.rs b/components/tikv_util/src/buffer_vec.rs index c337e9e1659..d2247c011ec 100644 --- a/components/tikv_util/src/buffer_vec.rs +++ b/components/tikv_util/src/buffer_vec.rs @@ -4,8 +4,8 @@ use std::iter::*; use codec::prelude::BufferWriter; -/// A vector like container storing multiple buffers. Each buffer is a `[u8]` slice in -/// arbitrary length. +/// A vector like container storing multiple buffers. Each buffer is a `[u8]` +/// slice in arbitrary length. #[derive(Default, Clone)] pub struct BufferVec { data: Vec, @@ -38,7 +38,8 @@ impl BufferVec { Self::default() } - /// Constructs a new, empty `BufferVec` with the specified element capacity and data capacity. + /// Constructs a new, empty `BufferVec` with the specified element capacity + /// and data capacity. #[inline] pub fn with_capacity(elements_capacity: usize, data_capacity: usize) -> Self { Self { @@ -47,15 +48,15 @@ impl BufferVec { } } - /// Returns the number of buffers this `BufferVec` can hold without reallocating the - /// offsets array. + /// Returns the number of buffers this `BufferVec` can hold without + /// reallocating the offsets array. #[inline] pub fn capacity(&self) -> usize { self.offsets.capacity() } - /// Returns the number of buffers this `BufferVec` can hold without reallocating the - /// data array. + /// Returns the number of buffers this `BufferVec` can hold without + /// reallocating the data array. #[inline] pub fn data_capacity(&self) -> usize { self.data.capacity() @@ -100,11 +101,12 @@ impl BufferVec { } } - /// Returns a delegator that provides `extend` appends buffers together as one buffer - /// to the back. + /// Returns a delegator that provides `extend` appends buffers together as + /// one buffer to the back. /// - /// Note that this function always creates a new buffer even if you don't call `extend` - /// on the delegator later, which simply results in appending a new empty buffer. + /// Note that this function always creates a new buffer even if you don't + /// call `extend` on the delegator later, which simply results in + /// appending a new empty buffer. #[inline] pub fn begin_concat_extend(&mut self) -> WithConcatExtend<'_> { WithConcatExtend::init(self) @@ -171,7 +173,8 @@ impl BufferVec { } } - /// Shortens the `BufferVec`, keeping the first `n` buffers and dropping the rest. + /// Shortens the `BufferVec`, keeping the first `n` buffers and dropping the + /// rest. /// /// If `n` >= current length, this has no effect. #[inline] diff --git a/components/tikv_util/src/callback.rs b/components/tikv_util/src/callback.rs index 62a39c7d06f..5f33ce10696 100644 --- a/components/tikv_util/src/callback.rs +++ b/components/tikv_util/src/callback.rs @@ -10,8 +10,8 @@ pub type Callback = Box; /// Note that leaking the callback can cause it to be never called but it /// rarely happens. /// -/// Also note that because `callback` and `arg_on_drop` may be called in the `drop` -/// method, do not panic inside them or use `safe_panic` instead. +/// Also note that because `callback` and `arg_on_drop` may be called in the +/// `drop` method, do not panic inside them or use `safe_panic` instead. pub fn must_call( callback: impl FnOnce(T) + Send + 'static, arg_on_drop: impl FnOnce() -> T + Send + 'static, diff --git a/components/tikv_util/src/codec/bytes.rs b/components/tikv_util/src/codec/bytes.rs index 36990ba7d08..034e8e73375 100644 --- a/components/tikv_util/src/codec/bytes.rs +++ b/components/tikv_util/src/codec/bytes.rs @@ -55,8 +55,8 @@ pub trait BytesEncoder: NumberEncoder { } /// Joins bytes with its length into a byte slice. It is more - /// efficient in both space and time compared to `encode_bytes`. Note that the encoded - /// result is not memcomparable. + /// efficient in both space and time compared to `encode_bytes`. Note that + /// the encoded result is not memcomparable. fn encode_compact_bytes(&mut self, data: &[u8]) -> Result<()> { self.encode_var_i64(data.len() as i64)?; self.write_all(data).map_err(From::from) @@ -95,13 +95,14 @@ fn encode_order_bytes(bs: &[u8], desc: bool) -> Vec { /// Gets the first encoded bytes' length in compactly encoded data. /// -/// Compact-encoding includes a VarInt encoded length prefix (1 ~ 9 bytes) and N bytes payload. -/// This function gets the total bytes length of compact-encoded data, including the length prefix. +/// Compact-encoding includes a VarInt encoded length prefix (1 ~ 9 bytes) and N +/// bytes payload. This function gets the total bytes length of compact-encoded +/// data, including the length prefix. /// /// Note: /// - This function won't check whether the bytes are encoded correctly. -/// - There can be multiple compact-encoded data, placed one by one. This function only returns -/// the length of the first one. +/// - There can be multiple compact-encoded data, placed one by one. This +/// function only returns the length of the first one. pub fn encoded_compact_len(mut encoded: &[u8]) -> usize { let last_encoded = encoded.as_ptr() as usize; let total_len = encoded.len(); @@ -137,13 +138,14 @@ impl CompactBytesFromFileDecoder for T {} /// Gets the first encoded bytes' length in memcomparable-encoded data. /// -/// Memcomparable-encoding includes a VarInt encoded length prefix (1 ~ 9 bytes) and N bytes payload. -/// This function gets the total bytes length of memcomparable-encoded data, including the length prefix. +/// Memcomparable-encoding includes a VarInt encoded length prefix (1 ~ 9 bytes) +/// and N bytes payload. This function gets the total bytes length of +/// memcomparable-encoded data, including the length prefix. /// /// Note: /// - This function won't check whether the bytes are encoded correctly. -/// - There can be multiple memcomparable-encoded data, placed one by one. This function only returns -/// the length of the first one. +/// - There can be multiple memcomparable-encoded data, placed one by one. +/// This function only returns the length of the first one. pub fn encoded_bytes_len(encoded: &[u8], desc: bool) -> usize { let mut idx = ENC_GROUP_SIZE; loop { @@ -221,8 +223,8 @@ pub fn decode_bytes(data: &mut BytesSlice<'_>, desc: bool) -> Result> { } } -/// Decodes bytes which are encoded by `encode_bytes` before just in place without malloc. -/// Please use this instead of `decode_bytes` if possible. +/// Decodes bytes which are encoded by `encode_bytes` before just in place +/// without malloc. Please use this instead of `decode_bytes` if possible. pub fn decode_bytes_in_place(data: &mut Vec, desc: bool) -> Result<()> { let mut write_offset = 0; let mut read_offset = 0; @@ -281,7 +283,8 @@ pub fn decode_bytes_in_place(data: &mut Vec, desc: bool) -> Result<()> { } } -/// Returns whether `encoded` bytes is encoded from `raw`. Returns `false` if `encoded` is invalid. +/// Returns whether `encoded` bytes is encoded from `raw`. Returns `false` if +/// `encoded` is invalid. pub fn is_encoded_from(encoded: &[u8], raw: &[u8], desc: bool) -> bool { let check_single_chunk = |encoded: &[u8], raw: &[u8]| { let len = raw.len(); @@ -310,8 +313,8 @@ pub fn is_encoded_from(encoded: &[u8], raw: &[u8], desc: bool) -> bool { return false; } - // Bytes are compared in reverse order because in real cases like TiDB, if two keys - // are different, the last a few bytes are more likely to be different. + // Bytes are compared in reverse order because in real cases like TiDB, if two + // keys are different, the last a few bytes are more likely to be different. let raw_chunks = raw.chunks_exact(ENC_GROUP_SIZE); // Check the last chunk first @@ -320,8 +323,9 @@ pub fn is_encoded_from(encoded: &[u8], raw: &[u8], desc: bool) -> bool { _ => return false, } - // The count of the remaining chunks must be the same. Using `size_hint` here is both safe and - // efficient because chunk iterators implement trait `TrustedLen`. + // The count of the remaining chunks must be the same. Using `size_hint` here is + // both safe and efficient because chunk iterators implement trait + // `TrustedLen`. if rev_encoded_chunks.size_hint() != raw_chunks.size_hint() { return false; } diff --git a/components/tikv_util/src/codec/number.rs b/components/tikv_util/src/codec/number.rs index 2f0b3fbcf3a..840da1cf85d 100644 --- a/components/tikv_util/src/codec/number.rs +++ b/components/tikv_util/src/codec/number.rs @@ -44,27 +44,31 @@ fn order_decode_f64(u: u64) -> f64 { pub trait NumberEncoder: Write { /// Writes the encoded value to buf. - /// It guarantees that the encoded value is in ascending order for comparison. + /// It guarantees that the encoded value is in ascending order for + /// comparison. fn encode_i64(&mut self, v: i64) -> Result<()> { let u = order_encode_i64(v); self.encode_u64(u) } /// Writes the encoded value to buf. - /// It guarantees that the encoded value is in descending order for comparison. + /// It guarantees that the encoded value is in descending order for + /// comparison. fn encode_i64_desc(&mut self, v: i64) -> Result<()> { let u = order_encode_i64(v); self.encode_u64_desc(u) } /// Writes the encoded value to slice buf. - /// It guarantees that the encoded value is in ascending order for comparison. + /// It guarantees that the encoded value is in ascending order for + /// comparison. fn encode_u64(&mut self, v: u64) -> Result<()> { self.write_u64::(v).map_err(From::from) } /// Writes the encoded value to slice buf. - /// It guarantees that the encoded value is in descending order for comparison. + /// It guarantees that the encoded value is in descending order for + /// comparison. fn encode_u64_desc(&mut self, v: u64) -> Result<()> { self.write_u64::(!v).map_err(From::from) } @@ -100,14 +104,16 @@ pub trait NumberEncoder: Write { } /// Writes the encoded value to slice buf. - /// It guarantees that the encoded value is in ascending order for comparison. + /// It guarantees that the encoded value is in ascending order for + /// comparison. fn encode_f64(&mut self, f: f64) -> Result<()> { let u = order_encode_f64(f); self.encode_u64(u) } /// Writes the encoded value to slice buf. - /// It guarantees that the encoded value is in descending order for comparison. + /// It guarantees that the encoded value is in descending order for + /// comparison. fn encode_f64_desc(&mut self, f: f64) -> Result<()> { let u = order_encode_f64(f); self.encode_u64_desc(u) diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 6982c66b67a..6655531c294 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -539,7 +539,8 @@ pub fn normalize_path>(path: P) -> PathBuf { ret } -/// Normalizes the path and canonicalizes its longest physically existing sub-path. +/// Normalizes the path and canonicalizes its longest physically existing +/// sub-path. fn canonicalize_non_existing_path>(path: P) -> std::io::Result { fn try_canonicalize_normalized_path(path: &Path) -> std::io::Result { use std::path::Component; @@ -591,7 +592,8 @@ fn canonicalize_non_existing_path>(path: P) -> std::io::Result>(path: P) -> std::io::Result { match path.as_ref().canonicalize() { Err(e) if e.kind() == std::io::ErrorKind::NotFound => canonicalize_non_existing_path(path), @@ -714,7 +716,8 @@ mod check_kernel { Ok(()) } - /// `check_kernel_params` checks kernel parameters, following are checked so far: + /// `check_kernel_params` checks kernel parameters, following are checked so + /// far: /// - `net.core.somaxconn` should be greater or equal to 32768. /// - `net.ipv4.tcp_syncookies` should be 0 /// - `vm.swappiness` shoud be 0 @@ -1034,7 +1037,8 @@ fn get_file_count(data_path: &str, extension: &str) -> Result Result<(), ConfigError> { let op = "data-dir.empty.check"; let dir = Path::new(data_path); @@ -1052,7 +1056,8 @@ pub fn check_data_dir_empty(data_path: &str, extension: &str) -> Result<(), Conf } /// `check_addr` validates an address. Addresses are formed like "Host:Port". -/// More details about **Host** and **Port** can be found in WHATWG URL Standard. +/// More details about **Host** and **Port** can be found in WHATWG URL +/// Standard. /// /// Return whether the address is unspecified, i.e. `0.0.0.0` or `::0` pub fn check_addr(addr: &str) -> Result { @@ -1238,9 +1243,9 @@ impl TomlLine { } } -/// TomlWriter use to update the config file and only cover the most commom toml -/// format that used by tikv config file, toml format like: quoted keys, multi-line -/// value, inline table, etc, are not supported, see +/// TomlWriter use to update the config file and only cover the most common toml +/// format that used by tikv config file, toml format like: quoted keys, +/// multi-line value, inline table, etc, are not supported, see /// for more detail. pub struct TomlWriter { dst: Vec, @@ -1402,14 +1407,15 @@ macro_rules! numeric_enum_serializing_mod { } /// Helper for migrating Raft data safely. Such migration is defined as -/// multiple states that can be uniquely distinguished. And the transtions +/// multiple states that can be uniquely distinguished. And the transitions /// between these states are atomic. /// /// States: /// 1. Init - Only source directory contains Raft data. -/// 2. Migrating - A marker file contains the path of source directory. The source -/// directory contains a complete copy of Raft data. Target directory may exist. -/// 3. Completed - Only target directory contains Raft data. Marker file may exist. +/// 2. Migrating - A marker file contains the path of source directory. The +/// source directory contains a complete copy of Raft data. Target +/// directory may exist. 3. Completed - Only target directory contains Raft +/// data. Marker file may exist. pub struct RaftDataStateMachine { root: PathBuf, in_progress_marker: PathBuf, @@ -1454,8 +1460,9 @@ impl RaftDataStateMachine { Ok(()) } - /// Returns whether a migration is needed. When it's needed, enters the `Migrating` - /// state. Otherwise prepares the target directory for opening. + /// Returns whether a migration is needed. When it's needed, enters the + /// `Migrating` state. Otherwise prepares the target directory for + /// opening. pub fn before_open_target(&mut self) -> bool { // Clean up trash directory if there is any. for p in [&self.source, &self.target] { @@ -1478,8 +1485,8 @@ impl RaftDataStateMachine { Self::must_remove(&self.source); return false; } - // It's actually in Completed state, just in the reverse direction. - // Equivalent to Init state. + // It's actually in Completed state, just in the reverse + // direction. Equivalent to Init state. } else { assert!(real_source == self.source); Self::must_remove(&self.target); @@ -1503,8 +1510,8 @@ impl RaftDataStateMachine { Self::must_remove(&self.in_progress_marker); } - // `after_dump_data` involves two atomic operations, insert a check point between - // them to test crash safety. + // `after_dump_data` involves two atomic operations, insert a check point + // between them to test crash safety. #[cfg(test)] fn after_dump_data_with_check(&mut self, check: &F) { assert!(Self::data_exists(&self.source)); @@ -1525,8 +1532,8 @@ impl RaftDataStateMachine { Self::sync_dir(&self.root); } - // Assumes there is a marker file. Returns None when the content of marker file is - // incomplete. + // Assumes there is a marker file. Returns None when the content of marker file + // is incomplete. fn read_marker(&self) -> Option { let marker = fs::read_to_string(&self.in_progress_marker).unwrap(); if marker.ends_with("//") { diff --git a/components/tikv_util/src/deadline.rs b/components/tikv_util/src/deadline.rs index c02d0a19fa9..84463f507b9 100644 --- a/components/tikv_util/src/deadline.rs +++ b/components/tikv_util/src/deadline.rs @@ -31,7 +31,8 @@ impl Deadline { Self { deadline } } - /// Creates a new `Deadline` that will reach after specified amount of time in future. + /// Creates a new `Deadline` that will reach after specified amount of time + /// in future. pub fn from_now(after_duration: Duration) -> Self { let deadline = Instant::now_coarse() + after_duration; Self { deadline } diff --git a/components/tikv_util/src/future.rs b/components/tikv_util/src/future.rs index 8f639a9e5ef..61d6f33ad4c 100644 --- a/components/tikv_util/src/future.rs +++ b/components/tikv_util/src/future.rs @@ -17,8 +17,8 @@ use futures::{ use crate::callback::must_call; -/// Generates a paired future and callback so that when callback is being called, its result -/// is automatically passed as a future result. +/// Generates a paired future and callback so that when callback is being +/// called, its result is automatically passed as a future result. pub fn paired_future_callback() -> (Box, futures_oneshot::Receiver) where T: Send + 'static, @@ -52,8 +52,9 @@ where (callback, future) } -/// Create a stream proxy with buffer representing the remote stream. The returned task -/// will receive messages from the remote stream as much as possible. +/// Create a stream proxy with buffer representing the remote stream. The +/// returned task will receive messages from the remote stream as much as +/// possible. pub fn create_stream_with_buffer( s: S, size: usize, @@ -165,7 +166,8 @@ impl PollAtWake { Ok(_) => return, Err(s) => { if s == NOTIFIED { - // Only this thread can change the state from NOTIFIED, so it has to succeed. + // Only this thread can change the state from NOTIFIED, so it has to + // succeed. match arc_self.state.compare_exchange( NOTIFIED, POLLING, diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 1fec3722a64..ecfeb7253fd 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -498,10 +498,11 @@ pub fn set_panic_hook(panic_abort: bool, data_dir: &str) { ); // There might be remaining logs in the async logger. - // To collect remaining logs and also collect future logs, replace the old one with a - // terminal logger. - // When the old global async logger is replaced, the old async guard will be taken and dropped. - // In the drop() the async guard, it waits for the finish of the remaining logs in the async logger. + // To collect remaining logs and also collect future logs, replace the old one + // with a terminal logger. + // When the old global async logger is replaced, the old async guard will be + // taken and dropped. In the drop() the async guard, it waits for the + // finish of the remaining logs in the async logger. if let Some(level) = ::log::max_level().to_level() { let drainer = logger::text_format(logger::term_writer(), true); let _ = logger::init_log( diff --git a/components/tikv_util/src/log.rs b/components/tikv_util/src/log.rs index 208280519e8..10facfa2287 100644 --- a/components/tikv_util/src/log.rs +++ b/components/tikv_util/src/log.rs @@ -6,10 +6,13 @@ macro_rules! crit( ($($args:tt)+) => { ::slog_global::crit!($($args)+) };); -/// Logs a error level message using the slog global logger. /// Use '?' to output error in debug format or '%' to ouput error in display format. -/// As the third and forth rules shown, the last log field should follow a ',' to seperate the 'err' field. eg. `error!(?e, "msg"; "foo" => foo,);` -/// If you don't want to output error code, just use the common form like other macros. -/// Require `slog_global` dependency and `#![feature(min_speacilization)]` in all crates. +/// Logs a error level message using the slog global logger. /// Use '?' to +/// output error in debug format or '%' to output error in display format. As +/// the third and forth rules shown, the last log field should follow a ',' to +/// separate the 'err' field. eg. `error!(?e, "msg"; "foo" => foo,);` +/// If you don't want to output error code, just use the common form like other +/// macros. Require `slog_global` dependency and +/// `#![feature(min_speacilization)]` in all crates. #[macro_export] macro_rules! error { (?$e:expr; $l:literal) => { diff --git a/components/tikv_util/src/logger/file_log.rs b/components/tikv_util/src/logger/file_log.rs index 3b8d4ae3ff0..5d0300ccdc5 100644 --- a/components/tikv_util/src/logger/file_log.rs +++ b/components/tikv_util/src/logger/file_log.rs @@ -41,18 +41,21 @@ pub trait Rotator: Send { /// Return if the file need to be rotated. fn should_rotate(&self) -> bool; - /// Call by operator, update rotators' state while the operator try to write some data. + /// Call by operator, update rotators' state while the operator try to write + /// some data. fn on_write(&mut self, data: &[u8]) -> io::Result<()>; - /// Call by operator, update rotators' state while the operator execute a rotation. + /// Call by operator, update rotators' state while the operator execute a + /// rotation. fn on_rotate(&mut self) -> io::Result<()>; } /// This `FileLogger` will iterate over a series of `Rotators`, /// once the context trigger the `Rotator`, it will execute a rotation. /// -/// After rotating, the original log file would be renamed to "{original name}.{"%Y-%m-%dT%H-%M-%S%.3f"}". -/// Note: log file will *not* be compressed or otherwise modified. +/// After rotating, the original log file would be renamed to "{original +/// name}.{"%Y-%m-%dT%H-%M-%S%.3f"}". Note: log file will *not* be compressed or +/// otherwise modified. pub struct RotatingFileLogger { path: PathBuf, file: File, diff --git a/components/tikv_util/src/logger/formatter.rs b/components/tikv_util/src/logger/formatter.rs index fe536eff2b0..c53c5896519 100644 --- a/components/tikv_util/src/logger/formatter.rs +++ b/components/tikv_util/src/logger/formatter.rs @@ -2,7 +2,8 @@ use std::io; -/// Writes file name into the writer, removes the character which not match `[a-zA-Z0-9\.-_]` +/// Writes file name into the writer, removes the character which not match +/// `[a-zA-Z0-9\.-_]` pub fn write_file_name(writer: &mut W, file_name: &str) -> io::Result<()> where W: io::Write + ?Sized, @@ -30,9 +31,9 @@ where Ok(()) } -/// According to [RFC: Unified Log Format], it returns `true` when this byte stream contains -/// the following characters, which means this input stream needs to be JSON encoded. -/// Otherwise, it returns `false`. +/// According to [RFC: Unified Log Format], it returns `true` when this byte +/// stream contains the following characters, which means this input stream +/// needs to be JSON encoded. Otherwise, it returns `false`. /// /// - U+0000 (NULL) ~ U+0020 (SPACE) /// - U+0022 (QUOTATION MARK) @@ -41,7 +42,6 @@ where /// - U+005D (RIGHT SQUARE BRACKET) /// /// [RFC: Unified Log Format]: (https://github.com/tikv/rfcs/blob/master/text/2018-12-19-unified-log-format.md) -/// #[inline] fn need_json_encode(bytes: &[u8]) -> bool { for &byte in bytes { @@ -52,13 +52,13 @@ fn need_json_encode(bytes: &[u8]) -> bool { false } -/// According to [RFC: Unified Log Format], escapes the given data and writes it into a writer. -/// If there is no character [`need json encode`], it writes the data into the writer directly. -/// Else, it serializes the given data structure as JSON into a writer. +/// According to [RFC: Unified Log Format], escapes the given data and writes it +/// into a writer. If there is no character [`need json encode`], it writes the +/// data into the writer directly. Else, it serializes the given data structure +/// as JSON into a writer. /// /// [RFC: Unified Log Format]: (https://github.com/tikv/rfcs/blob/master/text/2018-12-19-unified-log-format.md) /// [`need json encode`]: #method.need_json_encode -/// pub fn write_escaped_str(writer: &mut W, value: &str) -> io::Result<()> where W: io::Write + ?Sized, diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 35bf5f4c8e0..dc5d4a3b862 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -179,7 +179,8 @@ where TikvFormat::new(decorator, true) } -/// Same as text_format, but is adjusted to be closer to vanilla RocksDB logger format. +/// Same as text_format, but is adjusted to be closer to vanilla RocksDB logger +/// format. pub fn rocks_text_format(io: W, enable_timestamp: bool) -> RocksFormat> where W: io::Write, @@ -237,8 +238,8 @@ pub fn get_level_by_string(lv: &str) -> Option { } } -// The `to_string()` function of `slog::Level` produces values like `erro` and `trce` instead of -// the full words. This produces the full word. +// The `to_string()` function of `slog::Level` produces values like `erro` and +// `trce` instead of the full words. This produces the full word. pub fn get_string_by_level(lv: Level) -> &'static str { match lv { Level::Critical => "fatal", @@ -421,7 +422,8 @@ where } } -// Filters logs with operation cost lower than threshold. Otherwise output logs to inner drainer +// Filters logs with operation cost lower than threshold. Otherwise output logs +// to inner drainer struct SlowLogFilter { threshold: u64, inner: D, @@ -686,8 +688,8 @@ mod tests { use super::*; - // Due to the requirements of `Logger::root*` on a writer with a 'static lifetime - // we need to make a Thread Local, + // Due to the requirements of `Logger::root*` on a writer with a 'static + // lifetime we need to make a Thread Local, // and implement a custom writer. thread_local! { static BUFFER: RefCell> = RefCell::new(Vec::new()); @@ -861,7 +863,8 @@ mod tests { BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); let output = from_utf8(&*buffer).unwrap(); - // only check the log len here as some field like timestamp, location may change. + // only check the log len here as some field like timestamp, location may + // change. assert_eq!(output.len(), log.len()); buffer.clear(); }); @@ -880,7 +883,8 @@ mod tests { check_log(expected); } - /// Removes the wrapping signs, peels `"[hello]"` to `"hello"`, or peels `"(hello)"` to `"hello"`, + /// Removes the wrapping signs, peels `"[hello]"` to `"hello"`, or peels + /// `"(hello)"` to `"hello"`, fn peel(output: &str) -> &str { assert!(output.len() >= 2); &(output[1..output.len() - 1]) diff --git a/components/tikv_util/src/macros.rs b/components/tikv_util/src/macros.rs index ff32d255276..10889046a3b 100644 --- a/components/tikv_util/src/macros.rs +++ b/components/tikv_util/src/macros.rs @@ -41,7 +41,8 @@ macro_rules! slow_log { } -/// Makes a thread name with an additional tag inherited from the current thread. +/// Makes a thread name with an additional tag inherited from the current +/// thread. #[macro_export] macro_rules! thd_name { ($name:expr) => {{ @@ -54,7 +55,8 @@ macro_rules! thd_name { /// Simulates Go's defer. /// /// Please note that, different from go, this defer is bound to scope. -/// When exiting the scope, its deferred calls are executed in last-in-first-out order. +/// When exiting the scope, its deferred calls are executed in last-in-first-out +/// order. #[macro_export] macro_rules! defer { ($t:expr) => { @@ -62,8 +64,8 @@ macro_rules! defer { }; } -/// Waits for async operation. It returns `Option` after the expression gets executed. -/// It only accepts a `Result` expression. +/// Waits for async operation. It returns `Option` after the expression +/// gets executed. It only accepts a `Result` expression. #[macro_export] macro_rules! wait_op { ($expr:expr) => { @@ -87,7 +89,8 @@ macro_rules! wait_op { }}; } -/// Checks `Result>`, and returns early when it meets `Err` or `Ok(None)`. +/// Checks `Result>`, and returns early when it meets `Err` or +/// `Ok(None)`. #[macro_export] macro_rules! try_opt { ($expr:expr) => {{ @@ -99,8 +102,8 @@ macro_rules! try_opt { }}; } -/// Checks `Result>`, and returns early when it meets `Err` or `Ok(None)`. -/// return `Ok(or)` when met `Ok(None)`. +/// Checks `Result>`, and returns early when it meets `Err` or +/// `Ok(None)`. return `Ok(or)` when met `Ok(None)`. #[macro_export] macro_rules! try_opt_or { ($expr:expr, $or:expr) => {{ @@ -115,8 +118,8 @@ macro_rules! try_opt_or { /// A safe panic macro that prevents double panic. /// /// You probably want to use this macro instead of `panic!` in a `drop` method. -/// It checks whether the current thread is unwinding because of panic. If it is, -/// log an error message instead of causing double panic. +/// It checks whether the current thread is unwinding because of panic. If it +/// is, log an error message instead of causing double panic. #[macro_export] macro_rules! safe_panic { () => ({ diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index cd067f2c382..0a2f49461c5 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -33,7 +33,8 @@ impl HeapSize for Region { let mut size = self.start_key.capacity() + self.end_key.capacity(); size += mem::size_of::(); size += self.peers.capacity() * mem::size_of::(); - // There is still a `bytes` in `EncryptionMeta`. Ignore it becaure it could be shared. + // There is still a `bytes` in `EncryptionMeta`. Ignore it because it could be + // shared. size += mem::size_of::(); size } diff --git a/components/tikv_util/src/metrics/process_linux.rs b/components/tikv_util/src/metrics/process_linux.rs index 0b1c9777b09..9d661d1d434 100644 --- a/components/tikv_util/src/metrics/process_linux.rs +++ b/components/tikv_util/src/metrics/process_linux.rs @@ -1,7 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -//! This module is a subset of rust-prometheus's process collector, without the fd collector -//! to avoid memory fragmentation issues when open fd is large. +//! This module is a subset of rust-prometheus's process collector, without the +//! fd collector to avoid memory fragmentation issues when open fd is large. use std::io::{Error, ErrorKind, Result}; diff --git a/components/tikv_util/src/metrics/threads_dummy.rs b/components/tikv_util/src/metrics/threads_dummy.rs index 3bc60a4f5d4..bd718b34b00 100644 --- a/components/tikv_util/src/metrics/threads_dummy.rs +++ b/components/tikv_util/src/metrics/threads_dummy.rs @@ -1,11 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -/*! - -Currently we does not support collecting CPU usage of threads for systems -other than Linux. PRs are welcome! - -*/ +//! Currently we does not support collecting CPU usage of threads for systems +//! other than Linux. PRs are welcome! use std::io; diff --git a/components/tikv_util/src/metrics/threads_linux.rs b/components/tikv_util/src/metrics/threads_linux.rs index 4eae41b0f06..608b60949e8 100644 --- a/components/tikv_util/src/metrics/threads_linux.rs +++ b/components/tikv_util/src/metrics/threads_linux.rs @@ -214,7 +214,8 @@ impl Collector for ThreadsCollector { } } -/// Sanitizes the thread name. Keeps `a-zA-Z0-9_:`, replaces `-` and ` ` with `_`, and drops the others. +/// Sanitizes the thread name. Keeps `a-zA-Z0-9_:`, replaces `-` and ` ` with +/// `_`, and drops the others. /// /// Examples: /// @@ -368,7 +369,8 @@ impl ThreadInfoStatistics { self.tid_names.entry(tid).or_insert(name); // To get a percentage result, - // we pre-multiply `cpu_time` by 100 here rather than inside the `update_metric`. + // we pre-multiply `cpu_time` by 100 here rather than inside the + // `update_metric`. let cpu_time = thread::linux::cpu_total(&stat) * 100.0; update_metric( &mut self.metrics_total.cpu_times, diff --git a/components/tikv_util/src/mpsc/batch.rs b/components/tikv_util/src/mpsc/batch.rs index f135c938e42..a635a75d4e4 100644 --- a/components/tikv_util/src/mpsc/batch.rs +++ b/components/tikv_util/src/mpsc/batch.rs @@ -196,8 +196,9 @@ impl Receiver { } } -/// Creates a unbounded channel with a given `notify_size`, which means if there are more pending -/// messages in the channel than `notify_size`, the `Sender` will auto notify the `Receiver`. +/// Creates a unbounded channel with a given `notify_size`, which means if there +/// are more pending messages in the channel than `notify_size`, the `Sender` +/// will auto notify the `Receiver`. /// /// # Panics /// if `notify_size` equals to 0. @@ -215,8 +216,9 @@ pub fn unbounded(notify_size: usize) -> (Sender, Receiver) { ) } -/// Creates a bounded channel with a given `notify_size`, which means if there are more pending -/// messages in the channel than `notify_size`, the `Sender` will auto notify the `Receiver`. +/// Creates a bounded channel with a given `notify_size`, which means if there +/// are more pending messages in the channel than `notify_size`, the `Sender` +/// will auto notify the `Receiver`. /// /// # Panics /// if `notify_size` equals to 0. @@ -285,9 +287,10 @@ where I: Fn() -> E + Unpin, C: BatchCollector + Unpin, { - /// Creates a new `BatchReceiver` with given `initializer` and `collector`. `initializer` is - /// used to generate a initial value, and `collector` will collect every (at most - /// `max_batch_size`) raw items into the batched value. + /// Creates a new `BatchReceiver` with given `initializer` and `collector`. + /// `initializer` is used to generate a initial value, and `collector` + /// will collect every (at most `max_batch_size`) raw items into the + /// batched value. pub fn new(rx: Receiver, max_batch_size: usize, initializer: I, collector: C) -> Self { BatchReceiver { rx, diff --git a/components/tikv_util/src/mpsc/mod.rs b/components/tikv_util/src/mpsc/mod.rs index 99dd6b3e5d0..fbd089ebb9e 100644 --- a/components/tikv_util/src/mpsc/mod.rs +++ b/components/tikv_util/src/mpsc/mod.rs @@ -1,12 +1,8 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -/*! - -This module provides an implementation of mpsc channel based on -crossbeam_channel. Comparing to the crossbeam_channel, this implementation -supports closed detection and try operations. - -*/ +//! This module provides an implementation of mpsc channel based on +//! crossbeam_channel. Comparing to the crossbeam_channel, this implementation +//! supports closed detection and try operations. pub mod batch; use std::{ @@ -99,7 +95,8 @@ impl Sender { self.sender.is_empty() } - /// Blocks the current thread until a message is sent or the channel is disconnected. + /// Blocks the current thread until a message is sent or the channel is + /// disconnected. #[inline] pub fn send(&self, t: T) -> Result<(), SendError> { if self.state.is_sender_connected() { diff --git a/components/tikv_util/src/stream.rs b/components/tikv_util/src/stream.rs index d491b73c1b2..b7ba46c45bf 100644 --- a/components/tikv_util/src/stream.rs +++ b/components/tikv_util/src/stream.rs @@ -71,7 +71,8 @@ pub fn error_stream(e: io::Error) -> impl Stream> + Unp /// otherwise the executor's states may be disrupted. /// /// This means the future must only use async functions. -// FIXME: get rid of this function, so that futures_executor::block_on is sufficient. +// FIXME: get rid of this function, so that futures_executor::block_on is +// sufficient. pub fn block_on_external_io(f: F) -> F::Output { // we need a Tokio runtime, Tokio futures require Tokio executor. Builder::new_current_thread() @@ -90,8 +91,8 @@ pub trait RetryError { /// Retries a future execution. /// -/// This method implements truncated exponential back-off retry strategies outlined in -/// and +/// This method implements truncated exponential back-off retry strategies +/// outlined in and /// /// Since rusoto does not have transparent auto-retry /// (), we need to implement this manually. diff --git a/components/tikv_util/src/sys/cgroup.rs b/components/tikv_util/src/sys/cgroup.rs index f475cf3ddda..59830748382 100644 --- a/components/tikv_util/src/sys/cgroup.rs +++ b/components/tikv_util/src/sys/cgroup.rs @@ -37,18 +37,20 @@ use procfs::process::{MountInfo, Process}; // For more details about cgrop v2, PTAL // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html. // -// The above examples are implicitly based on a premise that paths in `/proc/self/cgroup` -// can be appended to `/sys/fs/cgroup` directly to get the final paths. Generally it's -// correct for Linux hosts but maybe wrong for containers. For containers, cgroup file systems -// can be based on other mount points. For example: +// The above examples are implicitly based on a premise that paths in +// `/proc/self/cgroup` can be appended to `/sys/fs/cgroup` directly to get the +// final paths. Generally it's correct for Linux hosts but maybe wrong for +// containers. For containers, cgroup file systems can be based on other mount +// points. For example: // // /proc/self/cgroup: // 4:memory:/path/to/the/controller // /proc/self/mountinfo: -// 34 25 0:30 /path/to/the/controller /sys/fs/cgroup/memory relatime - cgroup cgroup memory -// `path/to/the/controller` is possible to be not accessable in the container. However from the -// `mountinfo` file we can know the path is mounted on `sys/fs/cgroup/memory`, then we can build -// the absolute path based on the mountinfo file. +// 34 25 0:30 /path/to/the/controller /sys/fs/cgroup/memory relatime - cgroup +// cgroup memory `path/to/the/controller` is possible to be not accessable in +// the container. However from the `mountinfo` file we can know the path is +// mounted on `sys/fs/cgroup/memory`, then we can build the absolute path based +// on the mountinfo file. // // For the format of the mountinfo file, PTAL https://man7.org/linux/man-pages/man5/proc.5.html. @@ -175,10 +177,12 @@ fn is_cgroup2_unified_mode() -> Result { } // From cgroup spec: -// "/proc/$PID/cgroup" lists a process’s cgroup membership. If legacy cgroup is in use in -// the system, this file may contain multiple lines, one for each hierarchy. +// "/proc/$PID/cgroup" lists a process’s cgroup membership. If legacy cgroup is +// in use in the system, this file may contain multiple lines, one for each +// hierarchy. // -// The format is "::". For example, "10:cpuset:/test-cpuset". +// The format is "::". For example, +// "10:cpuset:/test-cpuset". fn parse_proc_cgroup_v1(lines: &str) -> HashMap { let mut subsystems = HashMap::new(); for line in lines.lines().map(|s| s.trim()).filter(|s| !s.is_empty()) { diff --git a/components/tikv_util/src/sys/inspector.rs b/components/tikv_util/src/sys/inspector.rs index addb99c58d2..7b49b647706 100644 --- a/components/tikv_util/src/sys/inspector.rs +++ b/components/tikv_util/src/sys/inspector.rs @@ -20,8 +20,8 @@ pub struct DiskStat { pub trait ThreadInspector { type DiskID; - /// Disk read and write bytes from the backend storage layer. `None` means it's not available - /// for the platform. + /// Disk read and write bytes from the backend storage layer. `None` means + /// it's not available for the platform. fn io_stat(&self) -> Result, String> { Ok(None) } diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index 2f5d3c98133..8dd7aefa77c 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -91,8 +91,8 @@ impl SysQuota { } } -/// Get the current global memory usage in bytes. Users need to call `record_global_memory_usage` -/// to refresh it periodically. +/// Get the current global memory usage in bytes. Users need to call +/// `record_global_memory_usage` to refresh it periodically. pub fn get_global_memory_usage() -> u64 { GLOBAL_MEMORY_USAGE.load(Ordering::Acquire) } @@ -110,7 +110,8 @@ pub fn record_global_memory_usage() { GLOBAL_MEMORY_USAGE.store(0, Ordering::Release); } -/// Register the high water mark so that `memory_usage_reaches_high_water` is available. +/// Register the high water mark so that `memory_usage_reaches_high_water` is +/// available. pub fn register_memory_usage_high_water(mark: u64) { MEMORY_USAGE_HIGH_WATER.store(mark, Ordering::Release); } diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index 445fc93974e..00a6e47b409 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -1,8 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -//! This module provides unified APIs for accessing thread/process related information. -//! Only Linux platform is implemented correctly, for other platform, it only guarantees -//! successful compilation. +//! This module provides unified APIs for accessing thread/process related +//! information. Only Linux platform is implemented correctly, for other +//! platform, it only guarantees successful compilation. use std::{io, io::Result, sync::Mutex, thread}; @@ -82,7 +82,8 @@ mod imp { } /// Gets thread ids of the given process id. - /// WARN: Don't call this function frequently. Otherwise there will be a lot of memory fragments. + /// WARN: Don't call this function frequently. Otherwise there will be a lot + /// of memory fragments. pub fn thread_ids>(pid: Pid) -> io::Result { let dir = fs::read_dir(format!("/proc/{}/task", pid))?; Ok(dir @@ -216,8 +217,8 @@ mod imp { pub command: String, } - /// Unlike Linux, the unit of `stime` and `utime` is microseconds instead of ticks. - /// See [`full_thread_stat()`] + /// Unlike Linux, the unit of `stime` and `utime` is microseconds instead of + /// ticks. See [`full_thread_stat()`] #[inline] pub fn ticks_per_second() -> i64 { MICRO_SEC_PER_SEC diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index 57e9e261444..0ab8240c4f2 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -342,10 +342,11 @@ impl Instant { } } - /// It is similar to `duration_since`, but it won't panic when `self` is less than `other`, - /// and `None` will be returned in this case. + /// It is similar to `duration_since`, but it won't panic when `self` is + /// less than `other`, and `None` will be returned in this case. /// - /// Callers need to ensure that `self` and `other` are same type of Instants. + /// Callers need to ensure that `self` and `other` are same type of + /// Instants. pub fn checked_sub(&self, other: Instant) -> Option { if *self >= other { Some(self.duration_since(other)) diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index 50cfa48f9aa..56a00e01a50 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -48,11 +48,11 @@ impl Timer { self.pending.peek().map(|task| task.0.next_tick) } - /// Pops a `TimeoutTask` from the `Timer`, which should be ticked before `instant`. - /// Returns `None` if no tasks should be ticked any more. + /// Pops a `TimeoutTask` from the `Timer`, which should be ticked before + /// `instant`. Returns `None` if no tasks should be ticked any more. /// - /// The normal use case is keeping `pop_task_before` until get `None` in order - /// to retrieve all available events. + /// The normal use case is keeping `pop_task_before` until get `None` in + /// order to retrieve all available events. pub fn pop_task_before(&mut self, instant: Instant) -> Option { if self .pending @@ -121,8 +121,8 @@ fn start_global_timer() -> Handle { struct TimeZero { /// An arbitrary time used as the zero time. /// - /// Note that `zero` doesn't have to be related to `steady_time_point`, as what's - /// observed here is elapsed time instead of time point. + /// Note that `zero` doesn't have to be related to `steady_time_point`, as + /// what's observed here is elapsed time instead of time point. zero: std::time::Instant, /// A base time point. /// @@ -135,8 +135,8 @@ struct TimeZero { /// Time produced by the clock is not affected by clock jump or time adjustment. /// Internally it uses CLOCK_MONOTONIC_RAW to get a steady time source. /// -/// `Instant`s produced by this clock can't be compared or used to calculate elapse -/// unless they are produced using the same zero time. +/// `Instant`s produced by this clock can't be compared or used to calculate +/// elapse unless they are produced using the same zero time. #[derive(Clone)] pub struct SteadyClock { zero: Arc, diff --git a/components/tikv_util/src/topn.rs b/components/tikv_util/src/topn.rs index 5147f0d9b86..d6e059d8c42 100644 --- a/components/tikv_util/src/topn.rs +++ b/components/tikv_util/src/topn.rs @@ -58,7 +58,8 @@ impl IntoIterator for TopN { #[allow(clippy::type_complexity)] type IntoIter = iter::Map>, fn(Reverse) -> T>; - // note: IntoIterator doesn't require the result in order, there is an `IntoIterSorted`, implement that if necessary + // note: IntoIterator doesn't require the result in order, there is an + // `IntoIterSorted`, implement that if necessary fn into_iter(self) -> Self::IntoIter { self.heap.into_iter().map(|Reverse(x)| x) } diff --git a/components/tikv_util/src/worker/mod.rs b/components/tikv_util/src/worker/mod.rs index 4c2e3d2473f..a8196dca054 100644 --- a/components/tikv_util/src/worker/mod.rs +++ b/components/tikv_util/src/worker/mod.rs @@ -1,18 +1,15 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -/*! - -`Worker` provides a mechanism to run tasks asynchronously (i.e. in the background) with some -additional features, for example, ticks. - -A worker contains: - -- A runner (which should implement the `Runnable` trait): to run tasks one by one or in batch. -- A scheduler: to send tasks to the runner, returns immediately. - -Briefly speaking, this is a mpsc (multiple-producer-single-consumer) model. - -*/ +//! `Worker` provides a mechanism to run tasks asynchronously (i.e. in the +//! background) with some additional features, for example, ticks. +//! +//! A worker contains: +//! +//! - A runner (which should implement the `Runnable` trait): to run tasks one +//! by one or in batch. +//! - A scheduler: to send tasks to the runner, returns immediately. +//! +//! Briefly speaking, this is a mpsc (multiple-producer-single-consumer) model. mod future; mod metrics; diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index 841a8a2229d..621ac730c30 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -117,7 +117,8 @@ impl Scheduler { /// Schedules a task to run. /// - /// If the worker is stopped or number pending tasks exceeds capacity, an error will return. + /// If the worker is stopped or number pending tasks exceeds capacity, an + /// error will return. pub fn schedule(&self, task: T) -> Result<(), ScheduleError> { debug!("scheduling task {}", task); if self.counter.load(Ordering::Acquire) >= self.pending_capacity { diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index a40221e3b6d..6962ae30756 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -72,8 +72,8 @@ impl FuturePool { /// Gets current running task count. #[inline] pub fn get_running_task_count(&self) -> usize { - // As long as different future pool has different name prefix, we can safely use the value - // in metrics. + // As long as different future pool has different name prefix, we can safely use + // the value in metrics. self.inner.get_running_task_count() } @@ -85,7 +85,8 @@ impl FuturePool { self.inner.spawn(TrackedFuture::new(future)) } - /// Spawns a future in the pool and returns a handle to the result of the future. + /// Spawns a future in the pool and returns a handle to the result of the + /// future. /// /// The future will not be executed if the handle is not polled. pub fn spawn_handle( @@ -116,8 +117,8 @@ impl PoolInner { } fn get_running_task_count(&self) -> usize { - // As long as different future pool has different name prefix, we can safely use the value - // in metrics. + // As long as different future pool has different name prefix, we can safely use + // the value in metrics. self.env.metrics_running_task_count.get() as usize } @@ -298,7 +299,8 @@ mod tests { // So far we have only elapsed TICK_INTERVAL * 0.2, so no ticks so far. assert!(try_recv_tick().is_err()); - // Even if long enough time has elapsed, tick is not emitted until next task arrives + // Even if long enough time has elapsed, tick is not emitted until next task + // arrives thread::sleep(TICK_INTERVAL * 2); assert!(try_recv_tick().is_err()); diff --git a/components/tipb_helper/src/expr_def_builder.rs b/components/tipb_helper/src/expr_def_builder.rs index 589ee1afbd6..f6c1d26a1ff 100644 --- a/components/tipb_helper/src/expr_def_builder.rs +++ b/components/tipb_helper/src/expr_def_builder.rs @@ -4,7 +4,8 @@ use codec::prelude::NumberEncoder; use tidb_query_datatype::{FieldTypeAccessor, FieldTypeFlag, FieldTypeTp}; use tipb::{Expr, ExprType, FieldType, ScalarFuncSig}; -/// A helper utility to build `tipb::Expr` (a.k.a. expression definition) easily. +/// A helper utility to build `tipb::Expr` (a.k.a. expression definition) +/// easily. pub struct ExprDefBuilder(Expr); impl ExprDefBuilder { diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index dbefbbe770c..c37fcde86d1 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -125,8 +125,8 @@ pub struct RequestMetrics { pub block_read_nanos: u64, pub internal_key_skipped_count: u64, pub deleted_key_skipped_count: u64, - // temp instant used in raftstore metrics, first be the instant when creating the write callback, - // then reset when it is ready to apply + // temp instant used in raftstore metrics, first be the instant when creating the write + // callback, then reset when it is ready to apply pub write_instant: Option, pub wf_batch_wait_nanos: u64, pub wf_send_to_queue_nanos: u64, diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 1a48a59308b..e0570d900ac 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -331,7 +331,8 @@ impl Lock { info } - /// Checks whether the lock conflicts with the given `ts`. If `ts == TimeStamp::max()`, the primary lock will be ignored. + /// Checks whether the lock conflicts with the given `ts`. If `ts == + /// TimeStamp::max()`, the primary lock will be ignored. fn check_ts_conflict_si( lock: Cow<'_, Self>, key: &Key, @@ -358,8 +359,9 @@ impl Lock { let raw_key = key.to_raw()?; if ts == TimeStamp::max() && raw_key == lock.primary && !lock.use_async_commit { - // When `ts == TimeStamp::max()` (which means to get latest committed version for - // primary key), and current key is the primary key, we ignore this lock. + // When `ts == TimeStamp::max()` (which means to get latest committed version + // for primary key), and current key is the primary key, we ignore + // this lock. return Ok(()); } @@ -421,8 +423,8 @@ impl Lock { } } -/// A specialized lock only for pessimistic lock. This saves memory for cases that only -/// pessimistic locks exist. +/// A specialized lock only for pessimistic lock. This saves memory for cases +/// that only pessimistic locks exist. #[derive(Clone, PartialEq, Eq)] pub struct PessimisticLock { /// The primary key in raw format. @@ -803,7 +805,8 @@ mod tests { ) .unwrap(); - // Ignore the primary lock when reading the latest committed version by setting u64::MAX as ts + // Ignore the primary lock when reading the latest committed version by setting + // u64::MAX as ts lock.lock_type = LockType::Put; lock.primary = b"foo".to_vec(); Lock::check_ts_conflict( @@ -815,7 +818,8 @@ mod tests { ) .unwrap(); - // Should not ignore the primary lock of an async commit transaction even if setting u64::MAX as ts + // Should not ignore the primary lock of an async commit transaction even if + // setting u64::MAX as ts let async_commit_lock = lock.clone().use_async_commit(vec![]); Lock::check_ts_conflict( Cow::Borrowed(&async_commit_lock), diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index dcb6f6b03dd..593fa2e1d41 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -122,10 +122,11 @@ const TS_SET_USE_VEC_LIMIT: usize = 8; pub enum TsSet { /// When the set is empty, avoid the useless cloning of Arc. Empty, - /// `Vec` is suitable when the set is small or the set is barely used, and it doesn't worth - /// converting a `Vec` into a `HashSet`. + /// `Vec` is suitable when the set is small or the set is barely used, and + /// it doesn't worth converting a `Vec` into a `HashSet`. Vec(Arc<[TimeStamp]>), - /// `Set` is suitable when there are many timestamps **and** it will be queried multiple times. + /// `Set` is suitable when there are many timestamps **and** it will be + /// queried multiple times. Set(Arc>), } @@ -137,14 +138,15 @@ impl Default for TsSet { } impl TsSet { - /// Create a `TsSet` from the given vec of timestamps. It will select the proper internal - /// collection type according to the size. + /// Create a `TsSet` from the given vec of timestamps. It will select the + /// proper internal collection type according to the size. #[inline] pub fn new(ts: Vec) -> Self { if ts.is_empty() { TsSet::Empty } else if ts.len() <= TS_SET_USE_VEC_LIMIT { - // If there are too few elements in `ts`, use Vec directly instead of making a Set. + // If there are too few elements in `ts`, use Vec directly instead of making a + // Set. TsSet::Vec(ts.into()) } else { TsSet::Set(Arc::new(ts.into_iter().collect())) @@ -161,10 +163,11 @@ impl TsSet { Self::vec(unsafe { tikv_util::memory::vec_transmute(ts) }) } - /// Create a `TsSet` from the given vec of timestamps, but it will be forced to use `Vec` as the - /// internal collection type. When it's sure that the set will be queried at most once, use this - /// is better than `TsSet::new`, since both the querying on `Vec` and the conversion from `Vec` - /// to `HashSet` is O(N). + /// Create a `TsSet` from the given vec of timestamps, but it will be forced + /// to use `Vec` as the internal collection type. When it's sure that the + /// set will be queried at most once, use this is better than `TsSet::new`, + /// since both the querying on `Vec` and the conversion from `Vec` to + /// `HashSet` is O(N). #[inline] pub fn vec(ts: Vec) -> Self { if ts.is_empty() { diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 432f1eafc34..1d3fd775f1b 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -59,7 +59,8 @@ impl Key { Key(encoded) } - /// Creates a key from raw bytes but returns None if the key is an empty slice. + /// Creates a key from raw bytes but returns None if the key is an empty + /// slice. #[inline] pub fn from_raw_maybe_unbounded(key: &[u8]) -> Option { if key.is_empty() { @@ -89,7 +90,8 @@ impl Key { Key(encoded_key) } - /// Creates a key with reserved capacity for timestamp from encoded bytes slice. + /// Creates a key with reserved capacity for timestamp from encoded bytes + /// slice. #[inline] pub fn from_encoded_slice(encoded_key: &[u8]) -> Key { let mut k = Vec::with_capacity(encoded_key.len() + number::U64_SIZE); @@ -128,7 +130,8 @@ impl Key { /// Creates a new key by truncating the timestamp from this key. /// - /// Preconditions: the caller must ensure this is actually a timestamped key. + /// Preconditions: the caller must ensure this is actually a timestamped + /// key. #[inline] pub fn truncate_ts(mut self) -> Result { let len = self.0.len(); @@ -183,14 +186,14 @@ impl Key { Ok(number::decode_u64_desc(&mut ts)?.into()) } - /// Whether the user key part of a ts encoded key `ts_encoded_key` equals to the encoded - /// user key `user_key`. + /// Whether the user key part of a ts encoded key `ts_encoded_key` equals to + /// the encoded user key `user_key`. /// - /// There is an optimization in this function, which is to compare the last 8 encoded bytes - /// first before comparing the rest. It is because in TiDB many records are ended with an 8 - /// byte row id and in many situations only this part is different when calling this function. - // - // TODO: If the last 8 byte is memory aligned, it would be better. + /// There is an optimization in this function, which is to compare the last + /// 8 encoded bytes first before comparing the rest. It is because in TiDB + /// many records are ended with an 8 byte row id and in many situations only + /// this part is different when calling this function. TODO: If the last + /// 8 byte is memory aligned, it would be better. #[inline] pub fn is_user_key_eq(ts_encoded_key: &[u8], user_key: &[u8]) -> bool { let user_key_len = user_key.len(); @@ -199,8 +202,8 @@ impl Key { } if user_key_len >= number::U64_SIZE { // We compare last 8 bytes as u64 first, then compare the rest. - // TODO: Can we just use == to check the left part and right part? `memcmp` might - // be smart enough. + // TODO: Can we just use == to check the left part and right part? `memcmp` + // might be smart enough. let left = NativeEndian::read_u64(&ts_encoded_key[user_key_len - 8..]); let right = NativeEndian::read_u64(&user_key[user_key_len - 8..]); if left != right { @@ -262,10 +265,11 @@ pub enum MutationType { /// A row mutation. /// -/// It may also carry an `Assertion` field, which means it has such an *assertion* to the data -/// (the key already exist or not exist). The assertion should pass if the mutation (in a prewrite -/// request) is going to be finished successfully, otherwise it indicates there should be some bug -/// causing the attempt to write wrong data. +/// It may also carry an `Assertion` field, which means it has such an +/// *assertion* to the data (the key already exist or not exist). The assertion +/// should pass if the mutation (in a prewrite request) is going to be finished +/// successfully, otherwise it indicates there should be some bug causing the +/// attempt to write wrong data. #[derive(Clone)] pub enum Mutation { /// Put `Value` into `Key`, overwriting any existing value. @@ -429,8 +433,8 @@ impl From for Mutation { } } -/// `OldValue` is used by cdc to read the previous value associated with some key during the -/// prewrite process. +/// `OldValue` is used by cdc to read the previous value associated with some +/// key during the prewrite process. #[derive(Debug, Clone, PartialEq)] pub enum OldValue { /// A real `OldValue`. @@ -441,8 +445,8 @@ pub enum OldValue { None, /// The user doesn't care about the previous value. Unspecified, - /// Not sure whether the old value exists or not. users can seek CF_WRITE to the give position - /// to take a look. + /// Not sure whether the old value exists or not. users can seek CF_WRITE to + /// the give position to take a look. SeekWrite(Key), } @@ -470,7 +474,8 @@ impl OldValue { } } - /// The finalized `OldValue::Value` content, or `None` for `OldValue::Unspecified`. + /// The finalized `OldValue::Value` content, or `None` for + /// `OldValue::Unspecified`. /// /// # Panics /// @@ -496,8 +501,8 @@ impl OldValue { } // Returned by MvccTxn when extra_op is set to kvrpcpb::ExtraOp::ReadOldValue. -// key with current ts -> (short value of the prev txn, start ts of the prev txn). -// The value of the map will be None when the mutation is `Insert`. +// key with current ts -> (short value of the prev txn, start ts of the prev +// txn). The value of the map will be None when the mutation is `Insert`. // MutationType is the type of mutation of the current write. pub type OldValues = HashMap)>; diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 73871bf8abe..755207ed3f3 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -29,7 +29,8 @@ const FLAG_OVERLAPPED_ROLLBACK: u8 = b'R'; const GC_FENCE_PREFIX: u8 = b'F'; -/// The short value for rollback records which are protected from being collapsed. +/// The short value for rollback records which are protected from being +/// collapsed. const PROTECTED_ROLLBACK_SHORT_VALUE: &[u8] = b"p"; impl WriteType { @@ -68,20 +69,22 @@ pub struct Write { pub start_ts: TimeStamp, pub short_value: Option, - /// The `commit_ts` of transactions can be non-globally-unique. But since we store Rollback - /// records in the same CF where Commit records is, and Rollback records are saved with - /// `user_key{start_ts}` as the internal key, the collision between Commit and Rollback - /// records can't be avoided. In this case, we keep the Commit record, and set the - /// `has_overlapped_rollback` flag to indicate that there's also a Rollback record. - /// Also note that `has_overlapped_rollback` field is only necessary when the Rollback record - /// should be protected. + /// The `commit_ts` of transactions can be non-globally-unique. But since we + /// store Rollback records in the same CF where Commit records is, and + /// Rollback records are saved with `user_key{start_ts}` as the internal + /// key, the collision between Commit and Rollback records can't be avoided. + /// In this case, we keep the Commit record, and set the + /// `has_overlapped_rollback` flag to indicate that there's also a Rollback + /// record. Also note that `has_overlapped_rollback` field is only necessary + /// when the Rollback record should be protected. pub has_overlapped_rollback: bool, - /// Records the next version after this version when overlapping rollback happens on an already - /// existed commit record. + /// Records the next version after this version when overlapping rollback + /// happens on an already existed commit record. /// - /// When a rollback flag is written on an already-written commit record, it causes rewriting - /// the commit record. It may cause problems with the GC compaction filter. Consider this case: + /// When a rollback flag is written on an already-written commit record, it + /// causes rewriting the commit record. It may cause problems with the + /// GC compaction filter. Consider this case: /// /// ```text /// Key_100_put, Key_120_del @@ -93,51 +96,59 @@ pub struct Write { /// Key_100_put_R, Key_120_del /// ``` /// - /// Then GC with safepoint = 130 may happen. However a follower may not have finished applying - /// the change. So on the follower, it's possible that: + /// Then GC with safepoint = 130 may happen. However a follower may not have + /// finished applying the change. So on the follower, it's possible + /// that: /// /// 1. `Key_100_put`, `Key_120_del` applied - /// 2. GC with safepoint = 130 started and `Key_100_put`, `Key_120_del` are deleted - /// 3. Finished applying `Key_100_put_R`, which means to rewrite `Key_100_put` - /// 4. Read at `140` should get nothing (since it's MVCC-deleted at 120) but finds `Key_100_put` + /// 2. GC with safepoint = 130 started and `Key_100_put`, `Key_120_del` are + /// deleted 3. Finished applying `Key_100_put_R`, which means to rewrite + /// `Key_100_put` 4. Read at `140` should get nothing (since it's + /// MVCC-deleted at 120) but finds `Key_100_put` /// - /// To solve the problem, when marking `has_overlapped_rollback` on an already-existed commit - /// record, add a special field `gc_fence` on it. If there is a newer version after the record - /// being rewritten, the next version's `commit_ts` will be recorded. When MVCC reading finds - /// a commit record with a GC fence timestamp but the corresponding version that matches that ts - /// doesn't exist, the current version will be believed to be already GC-ed and ignored. + /// To solve the problem, when marking `has_overlapped_rollback` on an + /// already-existed commit record, add a special field `gc_fence` on it. If + /// there is a newer version after the record being rewritten, the next + /// version's `commit_ts` will be recorded. When MVCC reading finds a commit + /// record with a GC fence timestamp but the corresponding version + /// that matches that ts doesn't exist, the current version will be + /// believed to be already GC-ed and ignored. /// - /// Therefore, for the example above, in the 3rd step it will record the version `120` to the - /// `gc_fence` field: + /// Therefore, for the example above, in the 3rd step it will record the + /// version `120` to the `gc_fence` field: /// /// ```text /// Key_100_put_R_120, Key_120_del /// ``` /// - /// And when the reading in the 4th step finds the `PUT` record but the version at 120 doesn't - /// exist, it will be regarded as already GC-ed and ignored. + /// And when the reading in the 4th step finds the `PUT` record but the + /// version at 120 doesn't exist, it will be regarded as already GC-ed + /// and ignored. /// - /// For CDC and TiFlash, when they receives a commit record with `gc_fence` field set, it can - /// determine that it must be caused by an overlapped rollback instead of an actual commit. + /// For CDC and TiFlash, when they receives a commit record with `gc_fence` + /// field set, it can determine that it must be caused by an overlapped + /// rollback instead of an actual commit. /// - /// Note: GC fence will only be written on `PUT` and `DELETE` versions, and may only point to - /// a `PUT` or `DELETE` version. If there are other `Lock` and `Rollback` records after the - /// record that's being rewritten, they will be skipped. For example, in this case: + /// Note: GC fence will only be written on `PUT` and `DELETE` versions, and + /// may only point to a `PUT` or `DELETE` version. If there are other `Lock` + /// and `Rollback` records after the record that's being rewritten, they + /// will be skipped. For example, in this case: /// /// ```text /// Key_100_put, Key_105_lock, Key_110_rollback, Key_120_del /// ``` /// - /// If overlapped rollback happens at 100, the `Key_100_put` will be rewritten as - /// `Key_100_put_R_120`. It points to version 120 instead of the nearest 105. + /// If overlapped rollback happens at 100, the `Key_100_put` will be + /// rewritten as `Key_100_put_R_120`. It points to version 120 instead + /// of the nearest 105. /// /// /// The meaning of the field: /// * `None`: A record that haven't been rewritten - /// * `Some(0)`: A commit record that has been rewritten due to overlapping rollback, but it - /// doesn't have an newer version. - /// * `Some(ts)`: A commit record that has been rewritten due to overlapping rollback, - /// and it's next version's `commit_ts` is `ts` + /// * `Some(0)`: A commit record that has been rewritten due to overlapping + /// rollback, but it doesn't have an newer version. + /// * `Some(ts)`: A commit record that has been rewritten due to overlapping + /// rollback, and it's next version's `commit_ts` is `ts` pub gc_fence: Option, } @@ -229,17 +240,18 @@ pub struct WriteRef<'a> { pub write_type: WriteType, pub start_ts: TimeStamp, pub short_value: Option<&'a [u8]>, - /// The `commit_ts` of transactions can be non-globally-unique. But since we store Rollback - /// records in the same CF where Commit records is, and Rollback records are saved with - /// `user_key{start_ts}` as the internal key, the collision between Commit and Rollback - /// records can't be avoided. In this case, we keep the Commit record, and set the - /// `has_overlapped_rollback` flag to indicate that there's also a Rollback record. - /// Also note that `has_overlapped_rollback` field is only necessary when the Rollback record - /// should be protected. + /// The `commit_ts` of transactions can be non-globally-unique. But since we + /// store Rollback records in the same CF where Commit records is, and + /// Rollback records are saved with `user_key{start_ts}` as the internal + /// key, the collision between Commit and Rollback records can't be avoided. + /// In this case, we keep the Commit record, and set the + /// `has_overlapped_rollback` flag to indicate that there's also a Rollback + /// record. Also note that `has_overlapped_rollback` field is only necessary + /// when the Rollback record should be protected. pub has_overlapped_rollback: bool, - /// Records the next version after this version when overlapping rollback happens on an already - /// existed commit record. + /// Records the next version after this version when overlapping rollback + /// happens on an already existed commit record. /// /// See [`Write::gc_fence`] for more detail. pub gc_fence: Option, @@ -333,21 +345,23 @@ impl WriteRef<'_> { } /// Prev Conditions: - /// * The `Write` record `self` is referring to is the latest version found by reading at `read_ts` - /// * The `read_ts` is safe, which means, it's not earlier than the current GC safepoint. + /// * The `Write` record `self` is referring to is the latest version + /// found by reading at `read_ts` + /// * The `read_ts` is safe, which means, it's not earlier than the + /// current GC safepoint. /// Return: - /// Whether the `Write` record is valid, ie. there's no GC fence or GC fence doesn't points to any other - /// version. + /// Whether the `Write` record is valid, ie. there's no GC fence or GC + /// fence doesn't points to any other version. pub fn check_gc_fence_as_latest_version(&self, read_ts: TimeStamp) -> bool { - // It's a valid write record if there's no GC fence or GC fence doesn't points to any other - // version. + // It's a valid write record if there's no GC fence or GC fence doesn't points + // to any other version. // If there is a GC fence that's points to another version, there are two cases: // * If `gc_fence_ts > read_ts`, then since `read_ts` didn't expire the GC - // safepoint, so the current version must be a not-expired version or the latest version - // before safepoint, so it must be a valid version - // * If `gc_fence_ts <= read_ts`, since the current version is the latest version found by - // reading at `read_ts`, the version at `gc_fence_ts` must be missing, so the current - // version must be invalid. + // safepoint, so the current version must be a not-expired version or the + // latest version before safepoint, so it must be a valid version + // * If `gc_fence_ts <= read_ts`, since the current version is the latest + // version found by reading at `read_ts`, the version at `gc_fence_ts` must be + // missing, so the current version must be invalid. if let Some(gc_fence_ts) = self.gc_fence { if !gc_fence_ts.is_zero() && gc_fence_ts <= read_ts { return false; diff --git a/fuzz/cli.rs b/fuzz/cli.rs index f70551ac084..3a804be7d17 100644 --- a/fuzz/cli.rs +++ b/fuzz/cli.rs @@ -212,7 +212,10 @@ fn run_afl(target: &str) -> Result<()> { )); } - // 2. cargo afl fuzz -i {seed_dir} -o {corpus_dir} target/debug/{instrumented_binary} + // 2. + // ``` + // cargo afl fuzz -i {seed_dir} -o {corpus_dir} target/debug/{instrumented_binary} + // ``` let instrumented_bin = WORKSPACE_ROOT.join("target/debug").join(target); let fuzzer_bin = Command::new("cargo") .args(&["afl", "fuzz"]) diff --git a/fuzz/targets/mod.rs b/fuzz/targets/mod.rs index 25799ff618f..73e29bef568 100644 --- a/fuzz/targets/mod.rs +++ b/fuzz/targets/mod.rs @@ -1,6 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -//! DO NOT MOVE THIS FILE. IT WILL BE PARSED BY `fuzz/cli.rs`. SEE `discover_fuzz_targets()`. +//! DO NOT MOVE THIS FILE. IT WILL BE PARSED BY `fuzz/cli.rs`. SEE +//! `discover_fuzz_targets()`. mod util; diff --git a/rustfmt.toml b/rustfmt.toml index ccc70980180..68b82c22bd1 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,6 +1,13 @@ version = "Two" unstable_features = true +comment_width = 80 +wrap_comments = true +format_code_in_doc_comments = true +format_macro_bodies = true +format_macro_matchers = true +normalize_comments = true +normalize_doc_attributes = true condense_wildcard_suffixes = true license_template_path = "etc/license.template" newline_style = "Unix" diff --git a/src/config.rs b/src/config.rs index 489609d1196..0fe367c1349 100644 --- a/src/config.rs +++ b/src/config.rs @@ -88,7 +88,8 @@ pub const BLOCK_CACHE_RATE: f64 = 0.45; /// By default, TiKV will try to limit memory usage to 75% of system memory. pub const MEMORY_USAGE_LIMIT_RATE: f64 = 0.75; -/// Min block cache shard's size. If a shard is too small, the index/filter data may not fit one shard +/// Min block cache shard's size. If a shard is too small, the index/filter data +/// may not fit one shard pub const MIN_BLOCK_CACHE_SHARD_SIZE: usize = 128 * MIB as usize; /// Maximum of 15% of system memory can be used by Raft Engine. Normally its @@ -240,11 +241,12 @@ fn get_background_job_limits_impl( cpu_num: u32, defaults: &BackgroundJobLimits, ) -> BackgroundJobLimits { - // At the minimum, we should have two background jobs: one for flush and one for compaction. - // Otherwise, the number of background jobs should not exceed cpu_num - 1. + // At the minimum, we should have two background jobs: one for flush and one for + // compaction. Otherwise, the number of background jobs should not exceed + // cpu_num - 1. let max_background_jobs = cmp::max(2, cmp::min(defaults.max_background_jobs, cpu_num - 1)); - // Scale flush threads proportionally to cpu cores. Also make sure the number of flush - // threads doesn't exceed total jobs. + // Scale flush threads proportionally to cpu cores. Also make sure the number of + // flush threads doesn't exceed total jobs. let max_background_flushes = cmp::min( (max_background_jobs + 3) / 4, defaults.max_background_flushes, @@ -540,15 +542,16 @@ macro_rules! build_cf_opt { let compression_per_level = $opt.compression_per_level[..$opt.num_levels as usize].to_vec(); cf_opts.compression_per_level(compression_per_level.as_slice()); cf_opts.bottommost_compression($opt.bottommost_level_compression); - // To set for bottommost level sst compression. The first 3 parameters refer to the - // default value in `CompressionOptions` in `rocksdb/include/rocksdb/advanced_options.h`. + // To set for bottommost level sst compression. The first 3 parameters refer to + // the default value in `CompressionOptions` in + // `rocksdb/include/rocksdb/advanced_options.h`. cf_opts.set_bottommost_level_compression_options( - -14, /* window_bits */ - 32767, /* level */ - 0, /* strategy */ + -14, // window_bits + 32767, // level + 0, // strategy $opt.bottommost_zstd_compression_dict_size, $opt.bottommost_zstd_compression_sample_size, - 1, /* parallel_threads */ + 1, // parallel_threads ); cf_opts.set_write_buffer_size($opt.write_buffer_size.0); cf_opts.set_max_write_buffer_number($opt.max_write_buffer_number); @@ -967,8 +970,8 @@ impl RaftCfConfig { #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(default)] #[serde(rename_all = "kebab-case")] -// Note that Titan is still an experimental feature. Once enabled, it can't fall back. -// Forced fallback may result in data loss. +// Note that Titan is still an experimental feature. Once enabled, it can't fall +// back. Forced fallback may result in data loss. pub struct TitanDBConfig { pub enabled: bool, pub dirname: String, @@ -1063,8 +1066,8 @@ pub struct DbConfig { #[online_config(skip)] pub enable_pipelined_write: bool, // deprecated. TiKV will use a new write mode when set `enable_pipelined_write` false and fall - // back to write mode in 3.0 when set `enable_pipelined_write` true. The code of multi-batch-write - // in RocksDB has been removed. + // back to write mode in 3.0 when set `enable_pipelined_write` true. The code of + // multi-batch-write in RocksDB has been removed. #[online_config(skip)] #[serde(skip_serializing)] pub enable_multi_batch_write: bool, @@ -1331,10 +1334,12 @@ impl RaftDefaultCfConfig { } } -// RocksDB Env associate thread pools of multiple instances from the same process. -// When construct Options, options.env is set to same singleton Env::Default() object. -// So total max_background_jobs = max(rocksdb.max_background_jobs, raftdb.max_background_jobs) -// But each instance will limit their background jobs according to their own max_background_jobs +// RocksDB Env associate thread pools of multiple instances from the same +// process. When construct Options, options.env is set to same singleton +// Env::Default() object. So total max_background_jobs = +// max(rocksdb.max_background_jobs, raftdb.max_background_jobs) +// But each instance will limit their background jobs according to their own +// max_background_jobs #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -1592,7 +1597,8 @@ impl> DBConfigManger { block-cache.capacity in storage module instead" .into()); } - // for multi-rocks, shared block cache has to be enabled and thus should shortcut in the above if statement. + // for multi-rocks, shared block cache has to be enabled and thus should + // shortcut in the above if statement. assert!(self.tablet_accessor.is_single_engine()); let mut error_collector = TabletErrorCollector::new(); self.tablet_accessor @@ -1764,7 +1770,8 @@ fn config_to_slice(config_change: &[(String, String)]) -> Vec<(&str, &str)> { .collect() } -// Convert `ConfigValue` to formatted String that can pass to `DB::set_db_options` +// Convert `ConfigValue` to formatted String that can pass to +// `DB::set_db_options` fn config_value_to_string(config_change: Vec<(String, ConfigValue)>) -> Vec<(String, String)> { config_change .into_iter() @@ -2130,10 +2137,10 @@ macro_rules! readpool_config { const DEFAULT_STORAGE_READPOOL_MIN_CONCURRENCY: usize = 4; const DEFAULT_STORAGE_READPOOL_MAX_CONCURRENCY: usize = 8; -// Assume a request can be finished in 1ms, a request at position x will wait about -// 0.001 * x secs to be actual started. A server-is-busy error will trigger 2 seconds -// backoff. So when it needs to wait for more than 2 seconds, return error won't causse -// larger latency. +// Assume a request can be finished in 1ms, a request at position x will wait +// about 0.001 * x secs to be actual started. A server-is-busy error will +// trigger 2 seconds backoff. So when it needs to wait for more than 2 seconds, +// return error won't causse larger latency. const DEFAULT_READPOOL_MAX_TASKS_PER_WORKER: usize = 2 * 1000; const MIN_READPOOL_STACK_SIZE_MB: u64 = 2; @@ -2511,17 +2518,19 @@ pub struct CdcConfig { pub incremental_scan_threads: usize, pub incremental_scan_concurrency: usize, pub incremental_scan_speed_limit: ReadableSize, - /// `TsFilter` can increase speed and decrease resource usage when incremental content is much - /// less than total content. However in other cases, `TsFilter` can make performance worse - /// because it needs to re-fetch old row values if they are required. + /// `TsFilter` can increase speed and decrease resource usage when + /// incremental content is much less than total content. However in + /// other cases, `TsFilter` can make performance worse because it needs + /// to re-fetch old row values if they are required. /// - /// `TsFilter` will be enabled if `incremental/total <= incremental_scan_ts_filter_ratio`. + /// `TsFilter` will be enabled if `incremental/total <= + /// incremental_scan_ts_filter_ratio`. /// Set `incremental_scan_ts_filter_ratio` to 0 will disable it. pub incremental_scan_ts_filter_ratio: f64, - /// Count of threads to confirm Region leadership in TiKV instances, 1 by default. - /// Please consider to increase it if count of regions on one TiKV instance is - /// greater than 20k. + /// Count of threads to confirm Region leadership in TiKV instances, 1 by + /// default. Please consider to increase it if count of regions on one + /// TiKV instance is greater than 20k. #[online_config(skip)] pub tso_worker_threads: usize, @@ -3254,8 +3263,8 @@ impl TiKvConfig { Ok(()) } - // As the init of `logger` is very early, this adjust needs to be separated and called - // immediately after parsing the command line. + // As the init of `logger` is very early, this adjust needs to be separated and + // called immediately after parsing the command line. pub fn logger_compatible_adjust(&mut self) { let default_tikv_cfg = TiKvConfig::default(); let default_log_cfg = LogConfig::default(); @@ -3373,8 +3382,8 @@ impl TiKvConfig { "server.end-point-max-tasks", "readpool.coprocessor.max-tasks-per-worker-xxx", ); // Note: - // Our `end_point_max_tasks` is mostly mistakenly configured, so we don't override - // new configuration using old values. + // Our `end_point_max_tasks` is mostly mistakenly configured, so we don't + // override new configuration using old values. self.server.end_point_max_tasks = None; } if self.raft_store.clean_stale_peer_delay.as_secs() > 0 { @@ -3390,9 +3399,9 @@ impl TiKvConfig { ); self.rocksdb.auto_tuned = None; } - // When shared block cache is enabled, if its capacity is set, it overrides individual - // block cache sizes. Otherwise use the sum of block cache size of all column families - // as the shared cache size. + // When shared block cache is enabled, if its capacity is set, it overrides + // individual block cache sizes. Otherwise use the sum of block cache + // size of all column families as the shared cache size. let cache_cfg = &mut self.storage.block_cache; if cache_cfg.shared && cache_cfg.capacity.is_none() { cache_cfg.capacity = Some(ReadableSize( @@ -3679,7 +3688,8 @@ pub fn to_flatten_config_info(cfg: &TiKvConfig) -> Vec { Value::Object(res) } - // configs that should not be flatten because the config type is HashMap instead of submodule. + // configs that should not be flatten because the config type is HashMap instead + // of submodule. lazy_static! { static ref NO_FLATTEN_CFGS: HashSet<&'static str> = { let mut set = HashSet::new(); @@ -3989,7 +3999,8 @@ impl ConfigController { // dispatched to corresponding config manager, to avoid dispatch change twice if let Some(mgr) = inner.config_mgrs.get_mut(&Module::from(name.as_str())) { if let Err(e) = mgr.dispatch(change.clone()) { - // we already verified the correctness at the beginning of this function. + // we already verified the correctness at the beginning of this + // function. inner.current.update(to_update).unwrap(); return Err(e); } @@ -5129,7 +5140,7 @@ mod tests { ..Default::default() }; let provider = Some(MockRegionInfoProvider::new(vec![])); - let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /*cache*/, provider); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /* cache */, provider); assert_eq!( config.target_file_size_base.0, cf_opts.get_target_file_size_base() @@ -5143,7 +5154,7 @@ mod tests { ..Default::default() }; let provider: Option = None; - let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /*cache*/, provider); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /* cache */, provider); assert_eq!( config.target_file_size_base.0, cf_opts.get_target_file_size_base() @@ -5159,7 +5170,7 @@ mod tests { ..Default::default() }; let provider = Some(MockRegionInfoProvider::new(vec![])); - let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /*cache*/, provider); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /* cache */, provider); assert_eq!( config.compaction_guard_max_output_file_size.0, cf_opts.get_target_file_size_base() @@ -5190,7 +5201,8 @@ mod tests { assert!(cfg.validate().is_ok()); assert_eq!(cfg.memory_usage_limit.unwrap(), ReadableSize(5 * GIB)); - // Test memory_usage_limit will fallback to system memory capacity with huge block cache. + // Test memory_usage_limit will fallback to system memory capacity with huge + // block cache. cfg.memory_usage_limit = None; let system = SysQuota::memory_limit_in_bytes(); cfg.storage.block_cache.capacity = Some(ReadableSize(system * 3 / 4)); @@ -5263,7 +5275,10 @@ mod tests { fn test_background_job_limits() { // cpu num = 1 assert_eq!( - get_background_job_limits_impl(1 /*cpu_num*/, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS), + get_background_job_limits_impl( + 1, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), BackgroundJobLimits { max_background_jobs: 2, max_background_flushes: 1, @@ -5273,7 +5288,7 @@ mod tests { ); assert_eq!( get_background_job_limits_impl( - 1, /*cpu_num*/ + 1, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { @@ -5285,7 +5300,10 @@ mod tests { ); // cpu num = 2 assert_eq!( - get_background_job_limits_impl(2 /*cpu_num*/, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS), + get_background_job_limits_impl( + 2, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), BackgroundJobLimits { max_background_jobs: 2, max_background_flushes: 1, @@ -5295,7 +5313,7 @@ mod tests { ); assert_eq!( get_background_job_limits_impl( - 2, /*cpu_num*/ + 2, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { @@ -5307,7 +5325,10 @@ mod tests { ); // cpu num = 4 assert_eq!( - get_background_job_limits_impl(4 /*cpu_num*/, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS), + get_background_job_limits_impl( + 4, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), BackgroundJobLimits { max_background_jobs: 3, max_background_flushes: 1, @@ -5317,7 +5338,7 @@ mod tests { ); assert_eq!( get_background_job_limits_impl( - 4, /*cpu_num*/ + 4, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { @@ -5329,7 +5350,10 @@ mod tests { ); // cpu num = 8 assert_eq!( - get_background_job_limits_impl(8 /*cpu_num*/, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS), + get_background_job_limits_impl( + 8, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), BackgroundJobLimits { max_background_jobs: 7, max_background_flushes: 2, @@ -5339,7 +5363,7 @@ mod tests { ); assert_eq!( get_background_job_limits_impl( - 8, /*cpu_num*/ + 8, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS, @@ -5347,14 +5371,14 @@ mod tests { // cpu num = 16 assert_eq!( get_background_job_limits_impl( - 16, /*cpu_num*/ + 16, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), KVDB_DEFAULT_BACKGROUND_JOB_LIMITS, ); assert_eq!( get_background_job_limits_impl( - 16, /*cpu_num*/ + 16, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS, @@ -5401,8 +5425,8 @@ mod tests { let mut default_cfg = TiKvConfig::default(); // Some default values are computed based on the environment. - // Because we can't set config values for these in `config-template.toml`, we will handle - // them manually. + // Because we can't set config values for these in `config-template.toml`, we + // will handle them manually. cfg.readpool.unified.max_thread_count = default_cfg.readpool.unified.max_thread_count; cfg.readpool.storage.high_concurrency = default_cfg.readpool.storage.high_concurrency; cfg.readpool.storage.normal_concurrency = default_cfg.readpool.storage.normal_concurrency; @@ -5428,7 +5452,8 @@ mod tests { cfg.backup_stream.num_threads = default_cfg.backup_stream.num_threads; // There is another set of config values that we can't directly compare: - // When the default values are `None`, but are then resolved to `Some(_)` later on. + // When the default values are `None`, but are then resolved to `Some(_)` later + // on. default_cfg.readpool.storage.adjust_use_unified_pool(); default_cfg.readpool.coprocessor.adjust_use_unified_pool(); default_cfg.security.redact_info_log = Some(false); diff --git a/src/coprocessor/dag/storage_impl.rs b/src/coprocessor/dag/storage_impl.rs index 883507452ec..46dcf7f570e 100644 --- a/src/coprocessor/dag/storage_impl.rs +++ b/src/coprocessor/dag/storage_impl.rs @@ -68,7 +68,8 @@ impl Storage for TiKvStorage { } fn scan_next(&mut self) -> QEResult> { - // Unwrap is fine because we must have called `reset_range` before calling `scan_next`. + // Unwrap is fine because we must have called `reset_range` before calling + // `scan_next`. let kv = self.scanner.as_mut().unwrap().next().map_err(Error::from)?; Ok(kv.map(|(k, v)| (k.into_raw().unwrap(), v))) } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 2b2ae03caa2..d07d9bd5bd6 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -32,8 +32,9 @@ use crate::{ }, }; -/// Requests that need time of less than `LIGHT_TASK_THRESHOLD` is considered as light ones, -/// which means they don't need a permit from the semaphore before execution. +/// Requests that need time of less than `LIGHT_TASK_THRESHOLD` is considered as +/// light ones, which means they don't need a permit from the semaphore before +/// execution. const LIGHT_TASK_THRESHOLD: Duration = Duration::from_millis(5); /// A pool to build and run Coprocessor request handlers. @@ -79,9 +80,9 @@ impl Endpoint { resource_tag_factory: ResourceTagFactory, quota_limiter: Arc, ) -> Self { - // FIXME: When yatp is used, we need to limit coprocessor requests in progress to avoid - // using too much memory. However, if there are a number of large requests, small requests - // will still be blocked. This needs to be improved. + // FIXME: When yatp is used, we need to limit coprocessor requests in progress + // to avoid using too much memory. However, if there are a number of large + // requests, small requests will still be blocked. This needs to be improved. let semaphore = match &read_pool { ReadPoolHandle::Yatp { .. } => { Some(Arc::new(Semaphore::new(cfg.end_point_max_concurrency))) @@ -139,8 +140,8 @@ impl Endpoint { Ok(()) } - /// Parse the raw `Request` to create `RequestHandlerBuilder` and `ReqContext`. - /// Returns `Err` if fails. + /// Parse the raw `Request` to create `RequestHandlerBuilder` and + /// `ReqContext`. Returns `Err` if fails. /// /// It also checks if there are locks in memory blocking this read request. fn parse_request_and_check_memory_locks( @@ -373,16 +374,17 @@ impl Endpoint { /// The real implementation of handling a unary request. /// - /// It first retrieves a snapshot, then builds the `RequestHandler` over the snapshot and - /// the given `handler_builder`. Finally, it calls the unary request interface of the - /// `RequestHandler` to process the request and produce a result. + /// It first retrieves a snapshot, then builds the `RequestHandler` over the + /// snapshot and the given `handler_builder`. Finally, it calls the unary + /// request interface of the `RequestHandler` to process the request and + /// produce a result. async fn handle_unary_request_impl( semaphore: Option>, mut tracker: Box>, handler_builder: RequestHandlerBuilder, ) -> Result> { - // When this function is being executed, it may be queued for a long time, so that - // deadline may exceed. + // When this function is being executed, it may be queued for a long time, so + // that deadline may exceed. tracker.on_scheduled(); tracker.req_ctx.deadline.check()?; @@ -445,8 +447,8 @@ impl Endpoint { /// Handle a unary request and run on the read pool. /// - /// Returns `Err(err)` if the read pool is full. Returns `Ok(future)` in other cases. - /// The future inside may be an error however. + /// Returns `Err(err)` if the read pool is full. Returns `Ok(future)` in + /// other cases. The future inside may be an error however. fn handle_unary_request( &self, req_ctx: ReqContext, @@ -477,9 +479,9 @@ impl Endpoint { async move { res.await? } } - /// Parses and handles a unary request. Returns a future that will never fail. If there are - /// errors during parsing or handling, they will be converted into a `Response` as the success - /// result of the future. + /// Parses and handles a unary request. Returns a future that will never + /// fail. If there are errors during parsing or handling, they will be + /// converted into a `Response` as the success result of the future. #[inline] pub fn parse_and_handle_unary_request( &self, @@ -510,9 +512,10 @@ impl Endpoint { /// The real implementation of handling a stream request. /// - /// It first retrieves a snapshot, then builds the `RequestHandler` over the snapshot and - /// the given `handler_builder`. Finally, it calls the stream request interface of the - /// `RequestHandler` multiple times to process the request and produce multiple results. + /// It first retrieves a snapshot, then builds the `RequestHandler` over the + /// snapshot and the given `handler_builder`. Finally, it calls the stream + /// request interface of the `RequestHandler` multiple times to process the + /// request and produce multiple results. fn handle_stream_request_impl( semaphore: Option>, mut tracker: Box>, @@ -585,8 +588,8 @@ impl Endpoint { /// Handle a stream request and run on the read pool. /// - /// Returns `Err(err)` if the read pool is full. Returns `Ok(stream)` in other cases. - /// The stream inside may produce errors however. + /// Returns `Err(err)` if the read pool is full. Returns `Ok(stream)` in + /// other cases. The stream inside may produce errors however. fn handle_stream_request( &self, req_ctx: ReqContext, @@ -621,9 +624,10 @@ impl Endpoint { Ok(rx) } - /// Parses and handles a stream request. Returns a stream that produce each result in a - /// `Response` and will never fail. If there are errors during parsing or handling, they will - /// be converted into a `Response` as the only stream item. + /// Parses and handles a stream request. Returns a stream that produce each + /// result in a `Response` and will never fail. If there are errors during + /// parsing or handling, they will be converted into a `Response` as the + /// only stream item. #[inline] pub fn parse_and_handle_stream_request( &self, @@ -1292,12 +1296,13 @@ mod tests { /// Asserted that the snapshot can be retrieved in 500ms. const SNAPSHOT_DURATION_MS: u64 = 500; - /// Asserted that the delay caused by OS scheduling other tasks is smaller than 200ms. - /// This is mostly for CI. + /// Asserted that the delay caused by OS scheduling other tasks is + /// smaller than 200ms. This is mostly for CI. const HANDLE_ERROR_MS: u64 = 200; - /// The acceptable error range for a coarse timer. Note that we use CLOCK_MONOTONIC_COARSE - /// which can be slewed by time adjustment code (e.g., NTP, PTP). + /// The acceptable error range for a coarse timer. Note that we use + /// CLOCK_MONOTONIC_COARSE which can be slewed by time + /// adjustment code (e.g., NTP, PTP). const COARSE_ERROR_MS: u64 = 50; /// The duration that payload executes. @@ -1460,11 +1465,12 @@ mod tests { // Response 1 // - // Note: `process_wall_time_ms` includes `total_process_time` and `total_suspend_time`. - // Someday it will be separated, but for now, let's just consider the combination. + // Note: `process_wall_time_ms` includes `total_process_time` and + // `total_suspend_time`. Someday it will be separated, but for now, + // let's just consider the combination. // - // In the worst case, `total_suspend_time` could be totally req2 payload. So here: - // req1 payload <= process time <= (req1 payload + req2 payload) + // In the worst case, `total_suspend_time` could be totally req2 payload. + // So here: req1 payload <= process time <= (req1 payload + req2 payload) let resp = &rx.recv().unwrap()[0]; assert!(resp.get_other_error().is_empty()); assert_ge!( @@ -1482,11 +1488,12 @@ mod tests { // Response 2 // - // Note: `process_wall_time_ms` includes `total_process_time` and `total_suspend_time`. - // Someday it will be separated, but for now, let's just consider the combination. + // Note: `process_wall_time_ms` includes `total_process_time` and + // `total_suspend_time`. Someday it will be separated, but for now, + // let's just consider the combination. // - // In the worst case, `total_suspend_time` could be totally req1 payload. So here: - // req2 payload <= process time <= (req1 payload + req2 payload) + // In the worst case, `total_suspend_time` could be totally req1 payload. + // So here: req2 payload <= process time <= (req1 payload + req2 payload) let resp = &rx.recv().unwrap()[0]; assert!(!resp.get_other_error().is_empty()); assert_ge!( diff --git a/src/coprocessor/interceptors/concurrency_limiter.rs b/src/coprocessor/interceptors/concurrency_limiter.rs index d9da8b472bc..aa8b5c72f13 100644 --- a/src/coprocessor/interceptors/concurrency_limiter.rs +++ b/src/coprocessor/interceptors/concurrency_limiter.rs @@ -15,8 +15,8 @@ use tokio::sync::{Semaphore, SemaphorePermit}; use crate::coprocessor::metrics::*; -/// Limits the concurrency of heavy tasks by limiting the time spent on executing `fut` -/// before forcing to acquire a semaphore permit. +/// Limits the concurrency of heavy tasks by limiting the time spent on +/// executing `fut` before forcing to acquire a semaphore permit. /// /// The future `fut` can always run for at least `time_limit_without_permit`, /// but it needs to acquire a permit from the semaphore before it can continue. @@ -159,8 +159,8 @@ mod tests { .is_ok() ); - // Both t1 and t2 need a semaphore permit to finish. Although t2 is much shorter than t1, - // it starts with t1 + // Both t1 and t2 need a semaphore permit to finish. Although t2 is much shorter + // than t1, it starts with t1 smp.add_permits(1); let smp2 = smp.clone(); let mut t1 = diff --git a/src/coprocessor/interceptors/deadline.rs b/src/coprocessor/interceptors/deadline.rs index 7c7d44a6b4f..29b673aa487 100644 --- a/src/coprocessor/interceptors/deadline.rs +++ b/src/coprocessor/interceptors/deadline.rs @@ -9,8 +9,8 @@ use std::{ use pin_project::pin_project; use tikv_util::deadline::{Deadline, DeadlineError}; -/// Checks the deadline before every poll of the future. If the deadline is exceeded, -/// `DeadlineError` is returned. +/// Checks the deadline before every poll of the future. If the deadline is +/// exceeded, `DeadlineError` is returned. pub fn check_deadline( fut: F, deadline: Deadline, diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 834033a60e1..0cde193a606 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -6,16 +6,18 @@ //! By doing so, the CPU of TiKV nodes can be utilized for computing and the //! amount of data to transfer can be reduced (i.e. filtered at TiKV side). //! -//! Notice that Coprocessor handles more than simple SQL query executors (DAG request). It also -//! handles analyzing requests and checksum requests. +//! Notice that Coprocessor handles more than simple SQL query executors (DAG +//! request). It also handles analyzing requests and checksum requests. //! -//! The entry point of handling all coprocessor requests is `Endpoint`. Common steps are: -//! 1. Parse the request into a DAG request, Checksum request or Analyze request. -//! 2. Retrieve a snapshot from the underlying engine according to the given timestamp. -//! 3. Build corresponding request handlers from the snapshot and request detail. -//! 4. Run request handlers once (for unary requests) or multiple times (for streaming requests) -//! on a future thread pool. -//! 5. Return handling result as a response. +//! The entry point of handling all coprocessor requests is `Endpoint`. Common +//! steps are: +//! - Parse the request into a DAG request, Checksum request or Analyze request. +//! - Retrieve a snapshot from the underlying engine according to the given +//! timestamp. +//! - Build corresponding request handlers from the snapshot and request detail. +//! - Run request handlers once (for unary requests) or multiple times (for +//! streaming requests) on a future thread pool. +//! - Return handling result as a response. //! //! Please refer to `Endpoint` for more details. @@ -117,11 +119,13 @@ pub struct ReqContext { pub txn_start_ts: TimeStamp, /// The set of timestamps of locks that can be bypassed during the reading - /// because either they will be rolled back or their commit_ts > read request's start_ts. + /// because either they will be rolled back or their commit_ts > read + /// request's start_ts. pub bypass_locks: TsSet, - /// The set of timestamps of locks that value in it can be accessed during the reading - /// because they will be committed and their commit_ts <= read request's start_ts. + /// The set of timestamps of locks that value in it can be accessed during + /// the reading because they will be committed and their commit_ts <= + /// read request's start_ts. pub access_locks: TsSet, /// The data version to match. If it matches the underlying data version, diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 05a30f64c4d..70144f47ce1 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -138,9 +138,9 @@ impl AnalyzeContext { let mut row_count = 0; let mut time_slice_start = Instant::now(); let mut topn_heap = BinaryHeap::new(); - // cur_val recording the current value's data and its counts when iterating index's rows. - // Once we met a new value, the old value will be pushed into the topn_heap to maintain the - // top-n information. + // cur_val recording the current value's data and its counts when iterating + // index's rows. Once we met a new value, the old value will be pushed + // into the topn_heap to maintain the top-n information. let mut cur_val: (u32, Vec) = (0, vec![]); let top_n_size = req.get_top_n_size() as usize; let stats_version = if req.has_version() { @@ -444,7 +444,8 @@ impl RowSampleBuilder { } } - // Don't let analyze bandwidth limit the quota limiter, this is already limited in rate limiter. + // Don't let analyze bandwidth limit the quota limiter, this is already limited + // in rate limiter. let quota_delay = { if !self.is_auto_analyze { self.quota_limiter.consume_sample(sample, true).await @@ -858,10 +859,10 @@ impl SampleBuilder { }) } - // `collect_columns_stats` returns the sample collectors which contain total count, - // null count, distinct values count and count-min sketch. And it also returns the statistic - // builder for PK which contains the histogram. When PK is common handle, it returns index stats - // for PK. + // `collect_columns_stats` returns the sample collectors which contain total + // count, null count, distinct values count and count-min sketch. And it + // also returns the statistic builder for PK which contains the histogram. + // When PK is common handle, it returns index stats for PK. // See https://en.wikipedia.org/wiki/Reservoir_sampling async fn collect_columns_stats( &mut self, @@ -871,8 +872,8 @@ impl SampleBuilder { self.columns_info.len() - self.columns_info[0].get_pk_handle() as usize; // The number of columns need to be sampled is `columns_without_handle_len`. - // It equals to `columns_info.len()` if the first column doesn't contain a handle. - // Otherwise, it equals to `columns_info.len() - 1`. + // It equals to `columns_info.len()` if the first column doesn't contain a + // handle. Otherwise, it equals to `columns_info.len() - 1`. let mut pk_builder = Histogram::new(self.max_bucket_size); let mut collectors = vec![ SampleCollector::new( @@ -915,9 +916,9 @@ impl SampleBuilder { } if self.analyze_common_handle { - // cur_val recording the current value's data and its counts when iterating index's rows. - // Once we met a new value, the old value will be pushed into the topn_heap to maintain the - // top-n information. + // cur_val recording the current value's data and its counts when iterating + // index's rows. Once we met a new value, the old value will be pushed into the + // topn_heap to maintain the top-n information. let mut cur_val: (u32, Vec) = (0, vec![]); let mut topn_heap = BinaryHeap::new(); for logical_row in &result.logical_rows { @@ -979,16 +980,21 @@ impl SampleBuilder { &mut val, )?; - // This is a workaround for different encoding methods used by TiDB and TiKV for CM Sketch. - // We need this because we must ensure we are using the same encoding method when we are querying values from - // CM Sketch (in TiDB) and inserting values into CM Sketch (here). - // We are inserting raw bytes from TableScanExecutor into CM Sketch here and query CM Sketch using bytes - // encoded by tablecodec.EncodeValue() in TiDB. Their results are different after row format becomes ver 2. + // This is a workaround for different encoding methods used by TiDB and TiKV for + // CM Sketch. We need this because we must ensure we are using the same encoding + // method when we are querying values from CM Sketch (in TiDB) and inserting + // values into CM Sketch (here). + // We are inserting raw bytes from TableScanExecutor into CM Sketch here and + // query CM Sketch using bytes encoded by tablecodec.EncodeValue() in TiDB. + // Their results are different after row format becomes ver 2. // - // Here we (1) convert INT bytes to VAR_INT bytes, (2) convert UINT bytes to VAR_UINT bytes, - // and (3) "flatten" the duration value from DURATION bytes into i64 value, then convert it to VAR_INT bytes. - // These are the only 3 cases we need to care about according to TiDB's tablecodec.EncodeValue() and - // TiKV's V1CompatibleEncoder::write_v2_as_datum(). + // Here we: + // - convert INT bytes to VAR_INT bytes + // - convert UINT bytes to VAR_UINT bytes + // - "flatten" the duration value from DURATION bytes into i64 value, then + // convert it to VAR_INT bytes. + // These are the only 3 cases we need to care about according to TiDB's + // tablecodec.EncodeValue() and TiKV's V1CompatibleEncoder::write_v2_as_datum(). val = match val[0] { INT_FLAG | UINT_FLAG | DURATION_FLAG => { let mut mut_val = &val[..]; @@ -1037,7 +1043,8 @@ impl SampleBuilder { } } -/// `SampleCollector` will collect Samples and calculate the count, ndv and total size of an attribute. +/// `SampleCollector` will collect Samples and calculate the count, ndv and +/// total size of an attribute. #[derive(Clone)] struct SampleCollector { samples: Vec>, diff --git a/src/coprocessor/statistics/cmsketch.rs b/src/coprocessor/statistics/cmsketch.rs index e9da9c8a91d..6a3042c8ee7 100644 --- a/src/coprocessor/statistics/cmsketch.rs +++ b/src/coprocessor/statistics/cmsketch.rs @@ -36,8 +36,8 @@ impl CmSketch { } // `insert` inserts the data into cm sketch. For each row i, the position at - // (h1 + h2*i) % width will be incremented by one, where the (h1, h2) is the hash value - // of data. + // (h1 + h2*i) % width will be incremented by one, where the (h1, h2) is the + // hash value of data. pub fn insert(&mut self, bytes: &[u8]) { self.count = self.count.wrapping_add(1); let (h1, h2) = CmSketch::hash(bytes); diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 064073825f4..f9b908979b8 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -18,7 +18,8 @@ enum TrackerState { /// The tracker is initialized. Initialized, - /// The tracker is notified that the task is scheduled on a thread pool and start running. + /// The tracker is notified that the task is scheduled on a thread pool and + /// start running. Scheduled(Instant), /// The tracker is notified that the snapshot needed by the task is ready. @@ -36,7 +37,8 @@ enum TrackerState { /// The tracker is notified that all items just finished. AllItemFinished, - /// The tracker has finished all tracking and there will be no future operations. + /// The tracker has finished all tracking and there will be no future + /// operations. Tracked, } @@ -50,7 +52,8 @@ pub struct Tracker { wait_time: Duration, // Total wait time schedule_wait_time: Duration, // Wait time spent on waiting for scheduling snapshot_wait_time: Duration, // Wait time spent on waiting for a snapshot - handler_build_time: Duration, // Time spent on building the handler (not included in total wait time) + handler_build_time: Duration, /* Time spent on building the handler (not included in total + * wait time) */ req_lifetime: Duration, // Suspend time between processing two items @@ -75,9 +78,9 @@ pub struct Tracker { } impl Tracker { - /// Initialize the tracker. Normally it is called outside future pool's factory context, - /// because the future pool might be full and we need to wait it. This kind of wait time - /// has to be recorded. + /// Initialize the tracker. Normally it is called outside future pool's + /// factory context, because the future pool might be full and we need + /// to wait it. This kind of wait time has to be recorded. pub fn new(req_ctx: ReqContext, slow_log_threshold: Duration) -> Self { let now = Instant::now_coarse(); Tracker { @@ -386,8 +389,9 @@ impl Tracker { } impl Drop for Tracker { - /// `Tracker` may be dropped without even calling `on_begin_all_items`. For example, if - /// get snapshot failed. So we fast-forward if some steps are missing. + /// `Tracker` may be dropped without even calling `on_begin_all_items`. For + /// example, if get snapshot failed. So we fast-forward if some steps + /// are missing. fn drop(&mut self) { if self.current_stage == TrackerState::Initialized { self.on_scheduled(); diff --git a/src/coprocessor_v2/endpoint.rs b/src/coprocessor_v2/endpoint.rs index 6a8f3e8a5f8..da6d4aa8521 100644 --- a/src/coprocessor_v2/endpoint.rs +++ b/src/coprocessor_v2/endpoint.rs @@ -49,8 +49,9 @@ impl Endpoint { /// Handles a request to the coprocessor framework. /// - /// Each request is dispatched to the corresponding coprocessor plugin based on it's `copr_name` - /// field. A plugin with a matching name must be loaded by TiKV, otherwise an error is returned. + /// Each request is dispatched to the corresponding coprocessor plugin based + /// on it's `copr_name` field. A plugin with a matching name must be loaded + /// by TiKV, otherwise an error is returned. #[inline] pub fn handle_request( &self, diff --git a/src/coprocessor_v2/mod.rs b/src/coprocessor_v2/mod.rs index bcadbb72cfc..d1a045b7b0f 100644 --- a/src/coprocessor_v2/mod.rs +++ b/src/coprocessor_v2/mod.rs @@ -2,25 +2,27 @@ //! # TiKV's Coprocessor Framework //! -//! A coprocessor framework that allows custom, pluggable coprocessor plugins to execute arbitrary -//! user requests directly on TiKV nodes. +//! A coprocessor framework that allows custom, pluggable coprocessor plugins to +//! execute arbitrary user requests directly on TiKV nodes. //! -//! *Note: While there currently also exists a different [coprocessor][super::coprocessor] that is -//! designed to execute a defined set of functions on TiKV nodes, this coprocessor framework allows -//! to register "coprocessor plugins" that can execute arbitrary code directly on TiKV nodes. -//! The long-term goal is to fully replace the existing coprocessor with an equivalent plugin for -//! this coprocessor.* +//! *Note: While there currently also exists a different +//! [coprocessor][super::coprocessor] that is designed to execute a defined set +//! of functions on TiKV nodes, this coprocessor framework allows to register +//! "coprocessor plugins" that can execute arbitrary code directly on TiKV +//! nodes. The long-term goal is to fully replace the existing coprocessor with +//! an equivalent plugin for this coprocessor.* //! //! ## Background //! //! The design of the coprocessor framework follows closely the principles of -//! [HBase's coprocessor][hbase-copr] which in turn is built on the ideas of the coprocessor -//! framework in Google's BigTable. +//! [HBase's coprocessor][hbase-copr] which in turn is built on the ideas of the +//! coprocessor framework in Google's BigTable. //! -//! By registering new coprocessor plugins, users are able to extend the functionality of TiKV and -//! run code directly on storage nodes. This usually leads to dramatically increased performance -//! because the CPU of TiKV nodes can be utilized for computation and the amount of data transfer -//! can be reduced. +//! By registering new coprocessor plugins, users are able to extend the +//! functionality of TiKV and run code directly on storage nodes. This usually +//! leads to dramatically increased performance because the CPU of TiKV nodes +//! can be utilized for computation and the amount of data transfer can be +//! reduced. //! //! //! [hbase-copr]: https://blogs.apache.org/hbase/entry/coprocessor_introduction diff --git a/src/coprocessor_v2/plugin_registry.rs b/src/coprocessor_v2/plugin_registry.rs index bfdc5ac2fc7..c02a652fc88 100644 --- a/src/coprocessor_v2/plugin_registry.rs +++ b/src/coprocessor_v2/plugin_registry.rs @@ -100,13 +100,15 @@ impl PluginRegistry { /// Hot-reloads plugins from a given directory. /// /// All plugins that are already present in the directory will be loaded. - /// A background thread is spawned to watch file system events. If the library file of a loaded - /// plugin is deleted, the corresponding plugin is automatically unloaded; if a new library file - /// is placed into the directory, it will be automatically loaded into TiKV's coprocessor plugin - /// system. + /// A background thread is spawned to watch file system events. If the + /// library file of a loaded plugin is deleted, the corresponding plugin + /// is automatically unloaded; if a new library file is placed into the + /// directory, it will be automatically loaded into TiKV's coprocessor + /// plugin system. /// - /// A file will only be loaded if it has the proper file ending of dynamic link libraries for - /// the current platform (`.so` for Linux, `.dylib` for macOS, `.dll` for Windows). + /// A file will only be loaded if it has the proper file ending of dynamic + /// link libraries for the current platform (`.so` for Linux, `.dylib` + /// for macOS, `.dll` for Windows). pub fn start_hot_reloading( &mut self, plugin_directory: impl Into, @@ -116,9 +118,9 @@ impl PluginRegistry { // Create plugin directory if it doesn't exist. std::fs::create_dir_all(&plugin_directory)?; - // If this is the first call to `start_hot_reloading()`, create a new file system watcher - // and background thread for loading plugins. For later invocations, the same watcher and - // thread will be used. + // If this is the first call to `start_hot_reloading()`, create a new file + // system watcher and background thread for loading plugins. For later + // invocations, the same watcher and thread will be used. if self.fs_watcher.is_none() { let (tx, rx) = mpsc::channel(); let fs_watcher = notify::watcher(tx, Duration::from_secs(3)).unwrap(); @@ -150,7 +152,8 @@ impl PluginRegistry { warn!("a loaded coprocessor plugin is removed. Be aware that original plugin is still running"; "plugin_path" => ?file); } Ok(DebouncedEvent::Rename(old_file, new_file)) => { - // If the file is renamed with a different parent directory, we will receive a `Remove` instead. + // If the file is renamed with a different parent directory, we will + // receive a `Remove` instead. debug_assert!(old_file.parent() == new_file.parent()); rename(&old_file, &new_file); } @@ -180,18 +183,21 @@ impl PluginRegistry { Ok(()) } - /// Finds a plugin by its name. The plugin must have been loaded before with [`load_plugin()`]. + /// Finds a plugin by its name. The plugin must have been loaded before with + /// [`load_plugin()`]. /// - /// Plugins are indexed by the name that is returned by [`CoprocessorPlugin::name()`]. + /// Plugins are indexed by the name that is returned by + /// [`CoprocessorPlugin::name()`]. pub fn get_plugin(&self, plugin_name: &str) -> Option> { self.inner.read().unwrap().get_plugin(plugin_name) } /// finds a plugin by its associated file path, similar to [`get_plugin()`]. /// - /// The given path has to be exactly the same as the one the plugin with loaded with, e.g. - /// `"./coprocessors/plugin1.so"` would be *different* from `"coprocessors/plugin1.so"` - /// (note the leading `./`). The same applies when the associated path was changed with + /// The given path has to be exactly the same as the one the plugin with + /// loaded with, e.g. `"./coprocessors/plugin1.so"` would be *different* + /// from `"coprocessors/plugin1.so"` (note the leading `./`). The same + /// applies when the associated path was changed with /// [`update_plugin_path()`]. pub fn get_plugin_by_path>(&self, plugin_path: P) -> Option> { self.inner.read().unwrap().get_plugin_by_path(plugin_path) @@ -200,7 +206,8 @@ impl PluginRegistry { /// Returns the names of the currently loaded plugins. /// The order of plugin names is arbitrary. pub fn loaded_plugin_names(&self) -> Vec { - // Collect names into vector so we can release the `RwLockReadGuard` before we return. + // Collect names into vector so we can release the `RwLockReadGuard` before we + // return. self.inner .read() .unwrap() @@ -211,9 +218,9 @@ impl PluginRegistry { /// Loads a [`CoprocessorPlugin`] from a `dylib`. /// - /// After this function has successfully finished, the plugin is registered with the - /// [`PluginRegistry`] and can later be obtained by calling [`get_plugin()`] with the proper - /// name. + /// After this function has successfully finished, the plugin is registered + /// with the [`PluginRegistry`] and can later be obtained by calling + /// [`get_plugin()`] with the proper name. /// /// Returns the name of the loaded plugin. pub fn load_plugin>(&self, file_name: P) -> Result { @@ -223,10 +230,12 @@ impl PluginRegistry { /// Attempts to load all plugins from a given directory. /// /// Returns a list of the names of all successfully loaded plugins. - /// If a file could not be successfully loaded as a plugin, it will be discarded. + /// If a file could not be successfully loaded as a plugin, it will be + /// discarded. /// - /// The plugins have to follow the system's naming convention in order to be loaded, e.g. `.so` - /// for Linux, `.dylib` for macOS and `.dll` for Windows. + /// The plugins have to follow the system's naming convention in order to be + /// loaded, e.g. `.so` for Linux, `.dylib` for macOS and `.dll` for + /// Windows. pub fn load_plugins_from_dir( &self, dir_name: impl Into, @@ -255,8 +264,8 @@ impl PluginRegistry { /// Updates the associated file path for plugin. /// - /// This function should be used to maintain consistent state when the underlying file of a - /// plugin was renamed or moved. + /// This function should be used to maintain consistent state when the + /// underlying file of a plugin was renamed or moved. pub fn update_plugin_path>(&self, plugin_name: &str, new_path: P) { self.inner .write() @@ -264,7 +273,8 @@ impl PluginRegistry { .update_plugin_path(plugin_name, new_path) } - /// Returns the associated file path for the plugin for the given `plugin_name`. + /// Returns the associated file path for the plugin for the given + /// `plugin_name`. pub fn get_path_for_plugin(&self, plugin_name: &str) -> Option { self.inner .read() @@ -368,24 +378,26 @@ pub struct LoadedPlugin { } impl LoadedPlugin { - /// Creates a new `LoadedPlugin` by loading a `dylib` from a file into memory. + /// Creates a new `LoadedPlugin` by loading a `dylib` from a file into + /// memory. /// /// The `file_path` argument may be any of: - /// * A simple filename of a library if the library is in any of the platform-specific locations - /// from where libraries are usually loaded, e.g. the current directory or in - /// `LD_LIBRARY_PATH` on unix systems. + /// * A simple filename of a library if the library is in any of the + /// platform-specific locations from where libraries are usually loaded, + /// e.g. the current directory or in `LD_LIBRARY_PATH` on unix systems. /// * Absolute path to the library /// * Relative (to the current working directory) path to the library /// - /// The function instantiates the plugin by calling `_plugin_create()` to obtain a - /// [`CoprocessorPlugin`]. + /// The function instantiates the plugin by calling `_plugin_create()` to + /// obtain a [`CoprocessorPlugin`]. /// /// # Safety /// - /// The library **must** contain a function with name [`PLUGIN_CONSTRUCTOR_SYMBOL`] and the - /// signature of [`PluginConstructorSignature`]. Otherwise, behavior is undefined. - /// See also [`libloading::Library::get()`] for more information on what restrictions apply to - /// [`PLUGIN_CONSTRUCTOR_SYMBOL`]. + /// The library **must** contain a function with name + /// [`PLUGIN_CONSTRUCTOR_SYMBOL`] and the signature of + /// [`PluginConstructorSignature`]. Otherwise, behavior is undefined. + /// See also [`libloading::Library::get()`] for more information on what + /// restrictions apply to [`PLUGIN_CONSTRUCTOR_SYMBOL`]. pub unsafe fn new>(file_path: P) -> Result { let lib = Library::new(&file_path)?; @@ -559,7 +571,8 @@ mod tests { // trigger loading std::fs::copy(&original_library_path, &library_path).unwrap(); - // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so as to make sure the watcher is triggered. + // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so + // as to make sure the watcher is triggered. std::thread::sleep(Duration::from_secs(4)); assert!(registry.get_plugin(plugin_name).is_some()); @@ -570,7 +583,8 @@ mod tests { // trigger rename std::fs::rename(&library_path, &library_path_2).unwrap(); - // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so as to make sure the watcher is triggered. + // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so + // as to make sure the watcher is triggered. std::thread::sleep(Duration::from_secs(4)); assert!(registry.get_plugin(plugin_name).is_some()); @@ -580,7 +594,8 @@ mod tests { ); std::fs::remove_file(&library_path_2).unwrap(); - // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so as to make sure the watcher is triggered. + // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so + // as to make sure the watcher is triggered. std::thread::sleep(Duration::from_secs(4)); // plugin will not be unloadad diff --git a/src/coprocessor_v2/raw_storage_impl.rs b/src/coprocessor_v2/raw_storage_impl.rs index 7ef7b59010a..fc505c50312 100644 --- a/src/coprocessor_v2/raw_storage_impl.rs +++ b/src/coprocessor_v2/raw_storage_impl.rs @@ -19,16 +19,18 @@ use crate::storage::{ /// Implementation of the [`RawStorage`] trait. /// -/// It wraps TiKV's [`Storage`] into an API that is exposed to coprocessor plugins. -/// The `RawStorageImpl` should be constructed for every invocation of a [`CoprocessorPlugin`] as -/// it wraps a [`Context`] that is unique for every request. +/// It wraps TiKV's [`Storage`] into an API that is exposed to coprocessor +/// plugins. The `RawStorageImpl` should be constructed for every invocation of +/// a [`CoprocessorPlugin`] as it wraps a [`Context`] that is unique for every +/// request. pub struct RawStorageImpl<'a, E: Engine, L: LockManager, F: KvFormat> { context: Context, storage: &'a Storage, } impl<'a, E: Engine, L: LockManager, F: KvFormat> RawStorageImpl<'a, E, L, F> { - /// Constructs a new `RawStorageImpl` that wraps a given [`Context`] and [`Storage`]. + /// Constructs a new `RawStorageImpl` that wraps a given [`Context`] and + /// [`Storage`]. pub fn new(context: Context, storage: &'a Storage) -> Self { RawStorageImpl { context, storage } } @@ -190,7 +192,8 @@ impl From for PluginErrorShim { storage::errors::ErrorInner::Kv(KvError(box KvErrorInner::Timeout(duration))) => { PluginError::Timeout(duration) } - // Other errors are passed as-is inside their `Result` so we get a `&Result` when using `Any::downcast_ref`. + // Other errors are passed as-is inside their `Result` so we get a `&Result` when using + // `Any::downcast_ref`. _ => PluginError::Other( format!("{}", &error), Box::new(storage::Result::<()>::Err(error)), diff --git a/src/import/duplicate_detect.rs b/src/import/duplicate_detect.rs index 3ae9360e727..86e955c6cd2 100644 --- a/src/import/duplicate_detect.rs +++ b/src/import/duplicate_detect.rs @@ -397,13 +397,15 @@ mod tests { } // There are 40 key-value pairs in db, there are - // [100, 101, 102, 103, 104, 105, 106, 107, 108, 109] with commit timestamp 10 - // [104, 105, 106, 107, 108, 109, 110, 111, 112, 113] with commit timestamp 14, these 20 keys - // have existed in db before importing. So we do not think (105,10) is repeated with (105,14). - // [108, 109, 110, 111, 112, 113, 114, 115, 116, 117] with commit timestamp 18 - // [112, 113, 114, 115, 116, 117, 118, 119, 120, 121] with commit timestamp 22, these 20 keys - // are imported by lightning. So (108,18) is repeated with (108,14), but (108,18) is not repeated - // with (108,10). + // - [100, 101, 102, 103, 104, 105, 106, 107, 108, 109] with commit timestamp 10 + // - [104, 105, 106, 107, 108, 109, 110, 111, 112, 113] with commit timestamp + // 14, these 20 keys have existed in db before importing. So we do not think + // (105,10) is repeated with (105,14). + // - [108, 109, 110, 111, 112, 113, 114, 115, 116, 117] with commit timestamp 18 + // - [112, 113, 114, 115, 116, 117, 118, 119, 120, 121] with commit timestamp + // 22, these 20 keys + // are imported by lightning. So (108,18) is repeated with (108,14), but + // (108,18) is not repeated with (108,10). #[test] fn test_duplicate_detect_incremental() { let storage = TestStorageBuilderApiV1::new(DummyLockManager) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index a81a34b1e71..36089e41fd1 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -422,7 +422,8 @@ where self.threads.spawn_ok(handle_task); } - // Downloads KV file and performs key-rewrite then apply kv into this tikv store. + // Downloads KV file and performs key-rewrite then apply kv into this tikv + // store. fn apply( &mut self, _ctx: RpcContext<'_>, @@ -629,7 +630,6 @@ where } /// Ingest multiple files by sending a raft command to raftstore. - /// fn multi_ingest( &mut self, ctx: RpcContext<'_>, @@ -858,7 +858,8 @@ fn pb_error_inc(type_: &str, e: &errorpb::Error) { enum RequestCollector { /// Retain the last ts of each key in each request. - /// This is used for write CF because resolved ts observer hates duplicated key in the same request. + /// This is used for write CF because resolved ts observer hates duplicated + /// key in the same request. RetainLastTs(HashMap, (Request, u64)>), /// Collector favor that simple collect all items. /// This is used for default CF. @@ -941,9 +942,10 @@ fn make_request(reqs: &mut RequestCollector, context: Context) -> RaftCmdRequest let mut cmd = RaftCmdRequest::default(); let mut header = make_request_header(context); // Set the UUID of header to prevent raftstore batching our requests. - // The current `resolved_ts` observer assumes that each batch of request doesn't has - // two writes to the same key. (Even with 2 different TS). That was true for normal cases - // because the latches reject concurrency write to keys. However we have bypassed the latch layer :( + // The current `resolved_ts` observer assumes that each batch of request doesn't + // has two writes to the same key. (Even with 2 different TS). That was true + // for normal cases because the latches reject concurrency write to keys. + // However we have bypassed the latch layer :( header.set_uuid(uuid::Uuid::new_v4().as_bytes().to_vec()); cmd.set_header(header); cmd.set_requests(reqs.drain().into()); diff --git a/src/server/config.rs b/src/server/config.rs index 8a581d5eeba..88d167d2e64 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -164,9 +164,10 @@ pub struct Config { #[doc(hidden)] #[online_config(skip)] - /// When TiKV memory usage reaches `memory_usage_high_water` it will try to limit memory - /// increasing. For server layer some messages will be rejected or droped, if they utilize - /// memory more than `reject_messages_on_memory_ratio` * total. + /// When TiKV memory usage reaches `memory_usage_high_water` it will try to + /// limit memory increasing. For server layer some messages will be rejected + /// or dropped, if they utilize memory more than + /// `reject_messages_on_memory_ratio` * total. /// /// Set it to 0 can disable message rejecting. // By default it's 0.2. So for different memory capacity, messages are rejected when: @@ -384,8 +385,8 @@ impl Config { } if self.heavy_load_threshold > 100 { - // The configuration has been changed to describe CPU usage of a single thread instead - // of all threads. So migrate from the old style. + // The configuration has been changed to describe CPU usage of a single thread + // instead of all threads. So migrate from the old style. self.heavy_load_threshold = 75; } diff --git a/src/server/debug.rs b/src/server/debug.rs index 7bfa2aa438e..03630cf930a 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -337,7 +337,8 @@ impl Debugger { } /// Set regions to tombstone by manual, and apply other status(such as - /// peers, version, and key range) from `region` which comes from PD normally. + /// peers, version, and key range) from `region` which comes from PD + /// normally. pub fn set_region_tombstone(&self, regions: Vec) -> Result> { let store_id = self.get_store_ident()?.get_store_id(); let db = &self.engines.kv; @@ -675,8 +676,9 @@ impl Debugger { for region_id in region_ids { let region_state = self.region_info(region_id)?; - // It's safe to unwrap region_local_state here, because get_all_regions_in_store() - // guarantees that the region state exists in kvdb. + // It's safe to unwrap region_local_state here, because + // get_all_regions_in_store() guarantees that the region state + // exists in kvdb. if region_state.region_local_state.unwrap().state == PeerState::Tombstone { continue; } @@ -1933,7 +1935,8 @@ mod tests { // last index < commit index mock_raft_state(&mut wb1, 10, 100, 110); - // commit index < last index < apply index, or commit index < apply index < last index. + // commit index < last index < apply index, or commit index < apply index < last + // index. mock_raft_state(&mut wb1, 11, 100, 90); mock_apply_state(&mut wb2, 11, 110); mock_raft_state(&mut wb1, 12, 100, 90); @@ -2012,10 +2015,14 @@ mod tests { lock.extend(vec![ // key, start_ts, for_update_ts, lock_type, short_value, check (b"k1", 100, 0, LockType::Put, false, Expect::Remove), // k1: remove orphan lock. - (b"k2", 100, 0, LockType::Delete, false, Expect::Keep), // k2: Delete doesn't need default. - (b"k3", 100, 0, LockType::Put, true, Expect::Keep), // k3: short value doesn't need default. - (b"k4", 100, 0, LockType::Put, false, Expect::Keep), // k4: corresponding default exists. - (b"k5", 100, 0, LockType::Put, false, Expect::Remove), // k5: duplicated lock and write. + (b"k2", 100, 0, LockType::Delete, false, Expect::Keep), /* k2: Delete doesn't need + * default. */ + (b"k3", 100, 0, LockType::Put, true, Expect::Keep), /* k3: short value doesn't need + * default. */ + (b"k4", 100, 0, LockType::Put, false, Expect::Keep), /* k4: corresponding default + * exists. */ + (b"k5", 100, 0, LockType::Put, false, Expect::Remove), /* k5: duplicated lock and + * write. */ ]); write.extend(vec![ // key, start_ts, commit_ts, write_type, short_value, check @@ -2032,11 +2039,14 @@ mod tests { ]); write.extend(vec![ // key, start_ts, commit_ts, write_type, short_value - (b"k6", 100, 101, WriteType::Put, true, Expect::Keep), // short value doesn't need default. - (b"k6", 99, 99, WriteType::Rollback, false, Expect::Keep), // rollback doesn't need default. - (b"k6", 97, 98, WriteType::Delete, false, Expect::Keep), // delete doesn't need default. - (b"k6", 94, 94, WriteType::Put, false, Expect::Keep), // ok. - (b"k6", 92, 93, WriteType::Put, false, Expect::Remove), // extra write. + (b"k6", 100, 101, WriteType::Put, true, Expect::Keep), /* short value doesn't need + * default. */ + (b"k6", 99, 99, WriteType::Rollback, false, Expect::Keep), /* rollback doesn't need + * default. */ + (b"k6", 97, 98, WriteType::Delete, false, Expect::Keep), /* delete doesn't need + * default. */ + (b"k6", 94, 94, WriteType::Put, false, Expect::Keep), // ok. + (b"k6", 92, 93, WriteType::Put, false, Expect::Remove), // extra write. (b"k6", 90, 91, WriteType::Delete, false, Expect::Keep), (b"k6", 88, 89, WriteType::Put, true, Expect::Keep), ]); @@ -2066,7 +2076,9 @@ mod tests { lock.extend(vec![ // key, start_ts, for_update_ts, lock_type, short_value, check (b"k8", 90, 105, LockType::Pessimistic, false, Expect::Remove), // newer writes exist - (b"k9", 90, 115, LockType::Put, true, Expect::Keep), // prewritten lock from a pessimistic txn + (b"k9", 90, 115, LockType::Put, true, Expect::Keep), /* prewritten lock + * from a pessimistic + * txn */ ]); write.extend(vec![ // key, start_ts, commit_ts, write_type, short_value diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 854c1fdd356..59315b4732d 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -182,12 +182,11 @@ impl KvEngineFactory { self.inner.api_version, ); // TODOTODO: call rust-rocks or tirocks to destroy_engine; - /* - engine_rocks::util::destroy_engine( - tablet_path.to_str().unwrap(), - kv_db_opts, - kv_cfs_opts, - )?;*/ + // engine_rocks::util::destroy_engine( + // tablet_path.to_str().unwrap(), + // kv_db_opts, + // kv_cfs_opts, + // )?; let _ = std::fs::remove_dir_all(tablet_path); Ok(()) } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index e5237187886..80366cc17d1 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -300,8 +300,8 @@ mod tests { assert!(!factory.is_tombstoned(1, 10)); assert!(factory.load_tablet(&tablet_path, 1, 10).is_err()); assert!(factory.load_tablet(&tablet_path, 1, 20).is_ok()); - // After we load it as with the new id or suffix, we should be unable to get it with - // the old id and suffix in the cache. + // After we load it as with the new id or suffix, we should be unable to get it + // with the old id and suffix in the cache. assert!(factory.open_tablet_cache(1, 10).is_none()); assert!(factory.open_tablet_cache(1, 20).is_some()); diff --git a/src/server/gc_worker/applied_lock_collector.rs b/src/server/gc_worker/applied_lock_collector.rs index 9c30afc350b..a013d742890 100644 --- a/src/server/gc_worker/applied_lock_collector.rs +++ b/src/server/gc_worker/applied_lock_collector.rs @@ -34,9 +34,10 @@ const MAX_COLLECT_SIZE: usize = 1024; struct LockObserverState { max_ts: AtomicU64, - /// `is_clean` is true, only it's sure that all applying of stale locks (locks with start_ts <= - /// specified max_ts) are monitored and collected. If there are too many stale locks or any - /// error happens, `is_clean` must be set to `false`. + /// `is_clean` is true, only it's sure that all applying of stale locks + /// (locks with start_ts <= specified max_ts) are monitored and collected. + /// If there are too many stale locks or any error happens, `is_clean` + /// must be set to `false`. is_clean: AtomicBool, } @@ -112,9 +113,10 @@ impl Display for LockCollectorTask { } } -/// `LockObserver` observes apply events and apply snapshot events. If it happens in CF_LOCK, it -/// checks the `start_ts`s of the locks being written. If a lock's `start_ts` <= specified `max_ts` -/// in the `state`, it will send the lock to through the `sender`, so the receiver can collect it. +/// `LockObserver` observes apply events and apply snapshot events. If it +/// happens in CF_LOCK, it checks the `start_ts`s of the locks being written. If +/// a lock's `start_ts` <= specified `max_ts` in the `state`, it will send the +/// lock to through the `sender`, so the receiver can collect it. #[derive(Clone)] struct LockObserver { state: Arc, @@ -310,9 +312,9 @@ impl LockCollectorRunner { Greater => { info!("start collecting locks"; "max_ts" => max_ts); self.collected_locks.clear(); - // TODO: `is_clean` may be unexpectedly set to false here, if any error happens on a - // previous observing. It need to be solved, although it's very unlikely to happen and - // doesn't affect correctness of data. + // TODO: `is_clean` may be unexpectedly set to false here, if any error happens + // on a previous observing. It need to be solved, although it's very unlikely to + // happen and doesn't affect correctness of data. self.observer_state.mark_clean(); self.observer_state.store_max_ts(max_ts); Ok(()) @@ -420,21 +422,22 @@ impl AppliedLockCollector { self.worker.lock().unwrap().stop(); } - /// Starts collecting applied locks whose `start_ts` <= `max_ts`. Only one `max_ts` is valid - /// at one time. + /// Starts collecting applied locks whose `start_ts` <= `max_ts`. Only one + /// `max_ts` is valid at one time. pub fn start_collecting(&self, max_ts: TimeStamp, callback: Callback<()>) -> Result<()> { - // Before starting collecting, check the concurrency manager to avoid later prewrite - // requests uses a min_commit_ts less than the safepoint. + // Before starting collecting, check the concurrency manager to avoid later + // prewrite requests uses a min_commit_ts less than the safepoint. // `max_ts` here is the safepoint of the current round of GC. - // Ths is similar to that we update max_ts and check memory lock when handling other - // transactional read requests. However this is done at start_collecting instead of - // physical_scan_locks. The reason is that, to fully scan a TiKV store, it might needs more - // than one physical_scan_lock requests. However memory lock needs to be checked before - // scanning the locks, and we can't know the `end_key` of the scan range at that time. As - // a result, each physical_scan_lock request will cause scanning memory lock from the - // start_key to the very-end of the TiKV node, which is a waste. But since we always start - // collecting applied locks before physical scan lock, so a better idea is to check the - // memory lock before physical_scan_lock. + // Ths is similar to that we update max_ts and check memory lock when handling + // other transactional read requests. However this is done at start_collecting + // instead of physical_scan_locks. The reason is that, to fully scan a TiKV + // store, it might needs more than one physical_scan_lock requests. However + // memory lock needs to be checked before scanning the locks, and we can't know + // the `end_key` of the scan range at that time. As a result, each + // physical_scan_lock request will cause scanning memory lock from the start_key + // to the very-end of the TiKV node, which is a waste. But since we always start + // collecting applied locks before physical scan lock, so a better idea is to + // check the memory lock before physical_scan_lock. self.concurrency_manager.update_max_ts(max_ts); self.concurrency_manager .read_range_check(None, None, |key, lock| { @@ -453,10 +456,11 @@ impl AppliedLockCollector { .map_err(|e| box_err!("failed to schedule task: {:?}", e)) } - /// Get the collected locks after `start_collecting`. Only valid when `max_ts` matches the - /// `max_ts` provided to `start_collecting`. - /// Collects at most `MAX_COLLECT_SIZE` locks. If there are (even potentially) more locks than - /// `MAX_COLLECT_SIZE` or any error happens, the flag `is_clean` will be unset, which represents + /// Get the collected locks after `start_collecting`. Only valid when + /// `max_ts` matches the `max_ts` provided to `start_collecting`. + /// Collects at most `MAX_COLLECT_SIZE` locks. If there are (even + /// potentially) more locks than `MAX_COLLECT_SIZE` or any error happens, + /// the flag `is_clean` will be unset, which represents /// `AppliedLockCollector` cannot collect all locks. pub fn get_collected_locks( &self, @@ -468,8 +472,8 @@ impl AppliedLockCollector { .map_err(|e| box_err!("failed to schedule task: {:?}", e)) } - /// Stop collecting locks. Only valid when `max_ts` matches the `max_ts` provided to - /// `start_collecting`. + /// Stop collecting locks. Only valid when `max_ts` matches the `max_ts` + /// provided to `start_collecting`. pub fn stop_collecting(&self, max_ts: TimeStamp, callback: Callback<()>) -> Result<()> { self.scheduler .schedule(LockCollectorTask::StopCollecting { max_ts, callback }) @@ -588,8 +592,8 @@ mod tests { get_collected_locks(&c, 2).unwrap_err(); stop_collecting(&c, 2).unwrap_err(); - // When start_collecting is invoked with a larger ts, the later one will ovewrite the - // previous one. + // When start_collecting is invoked with a larger ts, the later one will + // ovewrite the previous one. start_collecting(&c, 3).unwrap(); assert_eq!(c.concurrency_manager.max_ts(), 3.into()); get_collected_locks(&c, 3).unwrap(); @@ -703,7 +707,8 @@ mod tests { (expected_result.clone(), true) ); - // When start collecting with the same max_ts again, shouldn't clean up the observer state. + // When start collecting with the same max_ts again, shouldn't clean up the + // observer state. start_collecting(&c, 100).unwrap(); assert_eq!( get_collected_locks(&c, 100).unwrap(), @@ -727,8 +732,8 @@ mod tests { (expected_result, true) ); - // When start_collecting is double-invoked again with larger ts, the previous results are - // dropped. + // When start_collecting is double-invoked again with larger ts, the previous + // results are dropped. start_collecting(&c, 110).unwrap(); assert_eq!(get_collected_locks(&c, 110).unwrap(), (vec![], true)); coprocessor_host.post_apply(&Region::default(), &make_raft_cmd(req)); @@ -789,8 +794,8 @@ mod tests { (expected_locks.clone(), true) ); - // When stale start_collecting request arrives, the previous collected results shouldn't - // be dropped. + // When stale start_collecting request arrives, the previous collected results + // shouldn't be dropped. start_collecting(&c, 100).unwrap(); assert_eq!( get_collected_locks(&c, 100).unwrap(), @@ -802,8 +807,8 @@ mod tests { (expected_locks, true) ); - // When start_collecting is double-invoked again with larger ts, the previous results are - // dropped. + // When start_collecting is double-invoked again with larger ts, the previous + // results are dropped. start_collecting(&c, 110).unwrap(); assert_eq!(get_collected_locks(&c, 110).unwrap(), (vec![], true)); coprocessor_host.post_apply_plain_kvs_from_snapshot(&Region::default(), CF_LOCK, &lock_kvs); @@ -813,8 +818,8 @@ mod tests { coprocessor_host.post_apply_sst_from_snapshot(&Region::default(), CF_DEFAULT, ""); assert_eq!(get_collected_locks(&c, 110).unwrap(), (locks.clone(), true)); - // Apply SST file to lock cf is not supported. This will cause error and therefore - // `is_clean` will be set to false. + // Apply SST file to lock cf is not supported. This will cause error and + // therefore `is_clean` will be set to false. coprocessor_host.post_apply_sst_from_snapshot(&Region::default(), CF_LOCK, ""); assert_eq!(get_collected_locks(&c, 110).unwrap(), (locks, false)); } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 8d914080279..165a1f62ddf 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -41,13 +41,15 @@ use crate::{ const DEFAULT_DELETE_BATCH_SIZE: usize = 256 * 1024; pub const DEFAULT_DELETE_BATCH_COUNT: usize = 128; -// The default version that can enable compaction filter for GC. This is necessary because after -// compaction filter is enabled, it's impossible to fallback to ealier version which modifications -// of GC are distributed to other replicas by Raft. +// The default version that can enable compaction filter for GC. This is +// necessary because after compaction filter is enabled, it's impossible to +// fallback to earlier version which modifications of GC are distributed to +// other replicas by Raft. const COMPACTION_FILTER_GC_FEATURE: Feature = Feature::require(5, 0, 0); -// Global context to create a compaction filter for write CF. It's necessary as these fields are -// not available when constructing `WriteCompactionFilterFactory`. +// Global context to create a compaction filter for write CF. It's necessary as +// these fields are not available when constructing +// `WriteCompactionFilterFactory`. pub struct GcContext { pub(crate) db: RocksEngine, pub(crate) store_id: u64, @@ -338,8 +340,8 @@ impl WriteCompactionFilter { } } - // `log_on_error` indicates whether to print an error log on scheduling failures. - // It's only enabled for `GcTask::OrphanVersions`. + // `log_on_error` indicates whether to print an error log on scheduling + // failures. It's only enabled for `GcTask::OrphanVersions`. fn schedule_gc_task(&self, task: GcTask, log_on_error: bool) { match self.gc_scheduler.schedule(task) { Ok(_) => {} @@ -432,7 +434,7 @@ impl WriteCompactionFilter { } self.filtered += 1; self.handle_filtered_write(write)?; - self.flush_pending_writes_if_need(false /*force*/)?; + self.flush_pending_writes_if_need(false /* force */)?; let decision = if self.remove_older { // Use `Decision::RemoveAndSkipUntil` instead of `Decision::Remove` to avoid // leaving tombstones, which can only be freed at the bottommost level. @@ -566,8 +568,8 @@ thread_local! { } impl Drop for WriteCompactionFilter { - // NOTE: it's required that `CompactionFilter` is dropped before the compaction result - // becomes installed into the DB instance. + // NOTE: it's required that `CompactionFilter` is dropped before the compaction + // result becomes installed into the DB instance. fn drop(&mut self) { if self.mvcc_deletion_overlaps.take() == Some(0) { self.handle_bottommost_delete(); @@ -652,7 +654,7 @@ fn check_need_gc( ratio_threshold: f64, context: &CompactionFilterContext, ) -> bool { - let check_props = |props: &MvccProperties| -> (bool, bool /*skip_more_checks*/) { + let check_props = |props: &MvccProperties| -> (bool, bool /* skip_more_checks */) { if props.min_ts > safe_point { return (false, false); } @@ -668,8 +670,9 @@ fn check_need_gc( return (true, false); } - // When comparing `num_versions` with `num_puts`, trait internal levels specially - // because MVCC-deletion marks can't be handled at those levels. + // When comparing `num_versions` with `num_puts`, trait internal levels + // specially because MVCC-deletion marks can't be handled at those + // levels. let num_rollback_and_locks = (props.num_versions - props.num_deletes) as f64; if num_rollback_and_locks > props.num_puts as f64 * ratio_threshold { return (true, false); @@ -973,7 +976,8 @@ pub mod tests { must_prewrite_delete(&engine, b"zkey", b"zkey", 120); must_commit(&engine, b"zkey", 120, 130); - // No GC task should be emit because the mvcc-deletion mark covers some older versions. + // No GC task should be emit because the mvcc-deletion mark covers some older + // versions. gc_and_check(false, b"zkey"); // A GC task should be emit after older versions are cleaned. gc_and_check(true, b"zkey"); @@ -995,14 +999,15 @@ pub mod tests { must_prewrite_put(&engine, b"zkey2", &value, b"zkey2", 220); must_commit(&engine, b"zkey2", 220, 230); - // No GC task should be emit because the mvcc-deletion mark covers some older versions. + // No GC task should be emit because the mvcc-deletion mark covers some older + // versions. gc_and_check(false, b"zkey1"); // A GC task should be emit after older versions are cleaned. gc_and_check(true, b"zkey1"); } - // Test if there are not enought garbage in SST files involved by a compaction, no compaction - // filter will be created. + // Test if there are not enought garbage in SST files involved by a compaction, + // no compaction filter will be created. #[test] fn test_mvcc_properties() { let mut cfg = DbConfig::default(); @@ -1031,7 +1036,8 @@ pub mod tests { gc_runner.target_level = Some(6); gc_runner.safe_point(100).gc(&raw_engine); - // Can perform GC at the bottommost level even if the threshold can't be reached. + // Can perform GC at the bottommost level even if the threshold can't be + // reached. gc_runner.ratio_threshold = Some(10.0); gc_runner.target_level = Some(6); gc_runner.safe_point(140).gc(&raw_engine); @@ -1062,12 +1068,12 @@ pub mod tests { } } - // If we use `CompactionFilterDecision::RemoveAndSkipUntil` in compaction filters, - // deletion marks can only be handled in the bottommost level. Otherwise dirty - // versions could be exposed incorrectly. + // If we use `CompactionFilterDecision::RemoveAndSkipUntil` in compaction + // filters, deletion marks can only be handled in the bottommost level. + // Otherwise dirty versions could be exposed incorrectly. // - // This case tests that deletion marks won't be handled at internal levels, and at - // the bottommost levels, dirty versions still can't be exposed. + // This case tests that deletion marks won't be handled at internal levels, and + // at the bottommost levels, dirty versions still can't be exposed. #[test] fn test_remove_and_skip_until() { let mut cfg = DbConfig::default(); diff --git a/src/server/gc_worker/config.rs b/src/server/gc_worker/config.rs index 9406e39d993..1816dd845e1 100644 --- a/src/server/gc_worker/config.rs +++ b/src/server/gc_worker/config.rs @@ -18,8 +18,9 @@ pub struct GcConfig { pub batch_keys: usize, pub max_write_bytes_per_sec: ReadableSize, pub enable_compaction_filter: bool, - /// By default compaction_filter can only works if `cluster_version` is greater than 5.0.0. - /// Change `compaction_filter_skip_version_check` can enable it by force. + /// By default compaction_filter can only works if `cluster_version` is + /// greater than 5.0.0. Change `compaction_filter_skip_version_check` + /// can enable it by force. pub compaction_filter_skip_version_check: bool, } diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index b009c80b728..7fdc440527f 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -36,18 +36,19 @@ pub struct AutoGcConfig { pub safe_point_provider: S, pub region_info_provider: R, - /// Used to find which peer of a region is on this TiKV, so that we can compose a `Context`. + /// Used to find which peer of a region is on this TiKV, so that we can + /// compose a `Context`. pub self_store_id: u64, pub poll_safe_point_interval: Duration, - /// If this is set, safe_point will be checked before doing GC on every region while working. - /// Otherwise safe_point will be only checked when `poll_safe_point_interval` has past since - /// last checking. + /// If this is set, safe_point will be checked before doing GC on every + /// region while working. Otherwise safe_point will be only checked when + /// `poll_safe_point_interval` has past since last checking. pub always_check_safe_point: bool, - /// This will be called when a round of GC has finished and goes back to idle state. - /// This field is for test purpose. + /// This will be called when a round of GC has finished and goes back to + /// idle state. This field is for test purpose. pub post_a_round_of_gc: Option>, } @@ -64,8 +65,8 @@ impl AutoGcConfig { } } - /// Creates a config for test purpose. The interval to poll safe point is as short as 0.1s and - /// during GC it never skips checking safe point. + /// Creates a config for test purpose. The interval to poll safe point is as + /// short as 0.1s and during GC it never skips checking safe point. pub fn new_test_cfg( safe_point_provider: S, region_info_provider: R, @@ -82,8 +83,8 @@ impl AutoGcConfig { } } -/// The only error that will break `GcManager`'s process is that the `GcManager` is interrupted by -/// others, maybe due to TiKV shutting down. +/// The only error that will break `GcManager`'s process is that the `GcManager` +/// is interrupted by others, maybe due to TiKV shutting down. #[derive(Debug)] enum GcManagerError { Stopped, @@ -93,11 +94,12 @@ type GcManagerResult = std::result::Result; /// Used to check if `GcManager` should be stopped. /// -/// When `GcManager` is running, it might take very long time to GC a round. It should be able to -/// break at any time so that we can shut down TiKV in time. +/// When `GcManager` is running, it might take very long time to GC a round. It +/// should be able to break at any time so that we can shut down TiKV in time. pub(super) struct GcManagerContext { - /// Used to receive stop signal. The sender side is hold in `GcManagerHandle`. - /// If this field is `None`, the `GcManagerContext` will never stop. + /// Used to receive stop signal. The sender side is hold in + /// `GcManagerHandle`. If this field is `None`, the `GcManagerContext` + /// will never stop. stop_signal_receiver: Option>, /// Whether an stop signal is received. is_stopped: bool, @@ -111,14 +113,15 @@ impl GcManagerContext { } } - /// Sets the receiver that used to receive the stop signal. `GcManagerContext` will be - /// considered to be stopped as soon as a message is received from the receiver. + /// Sets the receiver that used to receive the stop signal. + /// `GcManagerContext` will be considered to be stopped as soon as a + /// message is received from the receiver. pub fn set_stop_signal_receiver(&mut self, rx: mpsc::Receiver<()>) { self.stop_signal_receiver = Some(rx); } - /// Sleeps for a while. if a stop message is received, returns immediately with - /// `GcManagerError::Stopped`. + /// Sleeps for a while. if a stop message is received, returns immediately + /// with `GcManagerError::Stopped`. fn sleep_or_stop(&mut self, timeout: Duration) -> GcManagerResult<()> { if self.is_stopped { return Err(GcManagerError::Stopped); @@ -141,8 +144,8 @@ impl GcManagerContext { } } - /// Checks if a stop message has been fired. Returns `GcManagerError::Stopped` if there's such - /// a message. + /// Checks if a stop message has been fired. Returns + /// `GcManagerError::Stopped` if there's such a message. fn check_stopped(&mut self) -> GcManagerResult<()> { if self.is_stopped { return Err(GcManagerError::Stopped); @@ -197,7 +200,8 @@ fn set_status_metrics(state: GcManagerState) { } } -/// Wraps `JoinHandle` of `GcManager` and helps to stop the `GcManager` synchronously. +/// Wraps `JoinHandle` of `GcManager` and helps to stop the `GcManager` +/// synchronously. pub(super) struct GcManagerHandle { join_handle: JoinHandle<()>, stop_signal_sender: mpsc::Sender<()>, @@ -218,13 +222,15 @@ impl GcManagerHandle { } /// Controls how GC runs automatically on the TiKV. -/// It polls safe point periodically, and when the safe point is updated, `GcManager` will start to -/// scan all regions (whose leader is on this TiKV), and does GC on all those regions. +/// It polls safe point periodically, and when the safe point is updated, +/// `GcManager` will start to scan all regions (whose leader is on this TiKV), +/// and does GC on all those regions. pub(super) struct GcManager { cfg: AutoGcConfig, - /// The current safe point. `GcManager` will try to update it periodically. When `safe_point` is - /// updated, `GCManager` will start to do GC on all regions. + /// The current safe point. `GcManager` will try to update it periodically. + /// When `safe_point` is updated, `GCManager` will start to do GC on all + /// regions. safe_point: Arc, safe_point_last_check_time: Instant, @@ -232,7 +238,8 @@ pub(super) struct GcManager>, - /// Holds the running status. It will tell us if `GcManager` should stop working and exit. + /// Holds the running status. It will tell us if `GcManager` should stop + /// working and exit. gc_manager_ctx: GcManagerContext, cfg_tracker: GcWorkerConfigManager, @@ -268,8 +275,8 @@ impl GcMan .store(ts.into_inner(), AtomicOrdering::Relaxed); } - /// Starts working in another thread. This function moves the `GcManager` and returns a handler - /// of it. + /// Starts working in another thread. This function moves the `GcManager` + /// and returns a handler of it. pub fn start(mut self) -> Result { set_status_metrics(GcManagerState::Init); self.initialize(); @@ -292,8 +299,8 @@ impl GcMan }) } - /// Polls safe point and does GC in a loop, again and again, until interrupted by invoking - /// `GcManagerHandle::stop`. + /// Polls safe point and does GC in a loop, again and again, until + /// interrupted by invoking `GcManagerHandle::stop`. fn run(&mut self) { debug!("gc-manager is started"); self.run_impl().unwrap_err(); @@ -325,9 +332,10 @@ impl GcMan } /// Sets the initial state of the `GCManger`. - /// The only task of initializing is to simply get the current safe point as the initial value - /// of `safe_point`. TiKV won't do any GC automatically until the first time `safe_point` was - /// updated to a greater value than initial value. + /// The only task of initializing is to simply get the current safe point as + /// the initial value of `safe_point`. TiKV won't do any GC + /// automatically until the first time `safe_point` was updated to a + /// greater value than initial value. fn initialize(&mut self) { debug!("gc-manager is initializing"); self.save_safe_point(TimeStamp::zero()); @@ -347,8 +355,9 @@ impl GcMan } } - /// Tries to update the safe point. Returns true if safe point has been updated to a greater - /// value. Returns false if safe point didn't change or we encountered an error. + /// Tries to update the safe point. Returns true if safe point has been + /// updated to a greater value. Returns false if safe point didn't + /// change or we encountered an error. fn try_update_safe_point(&mut self) -> bool { self.safe_point_last_check_time = Instant::now(); @@ -380,13 +389,13 @@ impl GcMan } } - /// Scans all regions on the TiKV whose leader is this TiKV, and does GC on all of them. - /// Regions are scanned and GC-ed in lexicographical order. + /// Scans all regions on the TiKV whose leader is this TiKV, and does GC on + /// all of them. Regions are scanned and GC-ed in lexicographical order. /// - /// While the `gc_a_round` function is running, it will periodically check whether safe_point is - /// updated before the function `gc_a_round` finishes. If so, *Rewinding* will occur. For - /// example, when we just starts to do GC, our progress is like this: ('^' means our current - /// progress) + /// While the `gc_a_round` function is running, it will periodically check + /// whether safe_point is updated before the function `gc_a_round` finishes. + /// If so, *Rewinding* will occur. For example, when we just starts to do + /// GC, our progress is like this: ('^' means our current progress) /// /// ```text /// | region 1 | region 2 | region 3| region 4 | region 5 | region 6 | @@ -400,17 +409,18 @@ impl GcMan /// ----------------------^ /// ``` /// - /// At this time we found that safe point was updated, so rewinding will happen. First we - /// continue working to the end: ('#' indicates the position that safe point updates) + /// At this time we found that safe point was updated, so rewinding will + /// happen. First we continue working to the end: ('#' indicates the + /// position that safe point updates) /// /// ```text /// | region 1 | region 2 | region 3| region 4 | region 5 | region 6 | /// ----------------------#------------------------------------------^ /// ``` /// - /// Then region 1-2 were GC-ed with the old safe point and region 3-6 were GC-ed with the new - /// new one. Then, we *rewind* to the very beginning and continue GC to the position that safe - /// point updates: + /// Then region 1-2 were GC-ed with the old safe point and region 3-6 were + /// GC-ed with the new new one. Then, we *rewind* to the very beginning + /// and continue GC to the position that safe point updates: /// /// ```text /// | region 1 | region 2 | region 3| region 4 | region 5 | region 6 | @@ -419,12 +429,14 @@ impl GcMan /// ``` /// /// Then GC finishes. - /// If safe point updates again at some time, it will still try to GC all regions with the - /// latest safe point. If safe point always updates before `gc_a_round` finishes, `gc_a_round` - /// may never stop, but it doesn't matter. + /// If safe point updates again at some time, it will still try to GC all + /// regions with the latest safe point. If safe point always updates + /// before `gc_a_round` finishes, `gc_a_round` may never stop, but it + /// doesn't matter. fn gc_a_round(&mut self) -> GcManagerResult<()> { let mut need_rewind = false; - // Represents where we should stop doing GC. `None` means the very end of the TiKV. + // Represents where we should stop doing GC. `None` means the very end of the + // TiKV. let mut end = None; // Represents where we have GC-ed to. `None` means the very end of the TiKV. let mut progress = Some(Key::from_encoded(BEGIN_KEY.to_vec())); @@ -434,17 +446,17 @@ impl GcMan info!("gc_worker: auto gc starts"; "safe_point" => self.curr_safe_point()); - // The following loop iterates all regions whose leader is on this TiKV and does GC on them. - // At the same time, check whether safe_point is updated periodically. If it's updated, - // rewinding will happen. + // The following loop iterates all regions whose leader is on this TiKV and does + // GC on them. At the same time, check whether safe_point is updated + // periodically. If it's updated, rewinding will happen. loop { self.gc_manager_ctx.check_stopped()?; if is_compaction_filter_allowed(&*self.cfg_tracker.value(), &self.feature_gate) { return Ok(()); } - // Check the current GC progress and determine if we are going to rewind or we have - // finished the round of GC. + // Check the current GC progress and determine if we are going to rewind or we + // have finished the round of GC. if need_rewind { if progress.is_none() { // We have worked to the end and we need to rewind. Restart from beginning. @@ -469,8 +481,8 @@ impl GcMan _ => false, }; if finished { - // We have worked to the end of the TiKV or our progress has reached `end`, and we - // don't need to rewind. In this case, the round of GC has finished. + // We have worked to the end of the TiKV or our progress has reached `end`, and + // we don't need to rewind. In this case, the round of GC has finished. info!("gc_worker: auto gc finishes"; "processed_regions" => processed_regions); return Ok(()); } @@ -478,15 +490,16 @@ impl GcMan assert!(progress.is_some()); - // Before doing GC, check whether safe_point is updated periodically to determine if - // rewinding is needed. + // Before doing GC, check whether safe_point is updated periodically to + // determine if rewinding is needed. self.check_if_need_rewind(&progress, &mut need_rewind, &mut end); progress = self.gc_next_region(progress.unwrap(), &mut processed_regions)?; } } - /// Checks whether we need to rewind in this round of GC. Only used in `gc_a_round`. + /// Checks whether we need to rewind in this round of GC. Only used in + /// `gc_a_round`. fn check_if_need_rewind( &mut self, progress: &Option, @@ -523,8 +536,9 @@ impl GcMan } } - /// Does GC on the next region after `from_key`. Returns the end key of the region it processed. - /// If we have processed to the end of all regions, returns `None`. + /// Does GC on the next region after `from_key`. Returns the end key of the + /// region it processed. If we have processed to the end of all regions, + /// returns `None`. fn gc_next_region( &mut self, from_key: Key, @@ -663,8 +677,8 @@ mod tests { impl GcSafePointProvider for MockSafePointProvider { fn get_safe_point(&self) -> Result { - // Error will be ignored by `GcManager`, which is equivalent to that the safe_point - // is not updated. + // Error will be ignored by `GcManager`, which is equivalent to that the + // safe_point is not updated. self.rx.try_recv().map_err(|e| box_err!(e)) } } @@ -755,13 +769,16 @@ mod tests { /// Run a round of auto GC and check if it correctly GC regions as expected. /// - /// Param `regions` is a `Vec` of tuples which is `(start_key, end_key, region_id)` + /// Param `regions` is a `Vec` of tuples which is `(start_key, end_key, + /// region_id)` /// - /// The first value in param `safe_points` will be used to initialize the GcManager, and the remaining - /// values will be checked before every time GC-ing a region. If the length of `safe_points` is - /// less than executed GC tasks, the last value will be used for extra GC tasks. + /// The first value in param `safe_points` will be used to initialize the + /// GcManager, and the remaining values will be checked before every time + /// GC-ing a region. If the length of `safe_points` is less than executed GC + /// tasks, the last value will be used for extra GC tasks. /// - /// Param `expected_gc_tasks` is a `Vec` of tuples which is `(region_id, safe_point)`. + /// Param `expected_gc_tasks` is a `Vec` of tuples which is `(region_id, + /// safe_point)`. fn test_auto_gc( regions: Vec<(Vec, Vec, u64)>, safe_points: Vec + Copy>, @@ -865,7 +882,8 @@ mod tests { vec![(1, 233), (2, 233), (3, 233), (4, 233)], ); - // First region doesn't starts with empty and last region doesn't ends with empty. + // First region doesn't starts with empty and last region doesn't ends with + // empty. let regions = vec![ (b"0".to_vec(), b"1".to_vec(), 1), (b"1".to_vec(), b"2".to_vec(), 2), @@ -935,8 +953,9 @@ mod tests { ); let mut safe_points = vec![233, 233, 233, 234, 234, 234, 235]; - // The logic of `gc_a_round` wastes a loop when the last region's end_key is not null, so it - // will check safe point one more time before GC-ing the first region after rewinding. + // The logic of `gc_a_round` wastes a loop when the last region's end_key is not + // null, so it will check safe point one more time before GC-ing the first + // region after rewinding. if !regions.last().unwrap().1.is_empty() { safe_points.insert(5, 234); } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 7242a984d0d..fe409be3ae4 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -59,12 +59,12 @@ use crate::{ }, }; -/// After the GC scan of a key, output a message to the log if there are at least this many -/// versions of the key. +/// After the GC scan of a key, output a message to the log if there are at +/// least this many versions of the key. const GC_LOG_FOUND_VERSION_THRESHOLD: usize = 30; -/// After the GC delete versions of a key, output a message to the log if at least this many -/// versions are deleted. +/// After the GC delete versions of a key, output a message to the log if at +/// least this many versions are deleted. const GC_LOG_DELETED_VERSION_THRESHOLD: usize = 30; pub const GC_MAX_EXECUTING_TASKS: usize = 10; @@ -120,13 +120,14 @@ where limit: usize, callback: Callback>, }, - /// If GC in compaction filter is enabled, versions on default CF will be handled with - /// `DB::delete` in write CF's compaction filter. However if the compaction filter finds - /// the DB is stalled, it will send the task to GC worker to ensure the compaction can be - /// continued. + /// If GC in compaction filter is enabled, versions on default CF will be + /// handled with `DB::delete` in write CF's compaction filter. However if + /// the compaction filter finds the DB is stalled, it will send the task + /// to GC worker to ensure the compaction can be continued. /// - /// NOTE: It's possible that the TiKV instance fails after a compaction result is installed - /// but its orphan versions are not deleted. Those orphan versions will never get cleaned + /// NOTE: It's possible that the TiKV instance fails after a compaction + /// result is installed but its orphan versions are not deleted. Those + /// orphan versions will never get cleaned /// until `DefaultCompactionFilter` is introduced. /// /// The tracking issue: . @@ -308,8 +309,8 @@ where } /// Check need gc without getting snapshot. - /// If this is not supported or any error happens, returns true to do further check after - /// getting snapshot. + /// If this is not supported or any error happens, returns true to do + /// further check after getting snapshot. fn need_gc(&self, start_key: &[u8], end_key: &[u8], safe_point: TimeStamp) -> bool { let props = match self .engine @@ -629,8 +630,8 @@ where let local_storage = self.engine.kv_engine(); // Convert keys to RocksDB layer form - // TODO: Logic coupled with raftstore's implementation. Maybe better design is to do it in - // somewhere of the same layer with apply_worker. + // TODO: Logic coupled with raftstore's implementation. Maybe better design is + // to do it in somewhere of the same layer with apply_worker. let start_data_key = keys::data_key(start_key.as_encoded()); let end_data_key = keys::data_end_key(end_key.as_encoded()); @@ -904,7 +905,8 @@ where } } -/// When we failed to schedule a `GcTask` to `GcRunner`, use this to handle the `ScheduleError`. +/// When we failed to schedule a `GcTask` to `GcRunner`, use this to handle the +/// `ScheduleError`. fn handle_gc_task_schedule_error(e: ScheduleError>) -> Result<()> { error!("failed to schedule gc task"; "err" => %e); let res = Err(box_err!("failed to schedule gc task: {:?}", e)); @@ -915,7 +917,8 @@ fn handle_gc_task_schedule_error(e: ScheduleError>) -> Res GcTask::PhysicalScanLock { callback, .. } => { callback(Err(Error::from(ErrorInner::GcWorkerTooBusy))) } - // Attention: If you are adding a new GcTask, do not forget to call the callback if it has a callback. + // Attention: If you are adding a new GcTask, do not forget to call the callback if it has a + // callback. GcTask::GcKeys { .. } | GcTask::RawGcKeys { .. } | GcTask::OrphanVersions { .. } => {} #[cfg(any(test, feature = "testexport"))] GcTask::Validate(_) => {} @@ -968,7 +971,8 @@ where { engine: E, - /// `raft_store_router` is useful to signal raftstore clean region size informations. + /// `raft_store_router` is useful to signal raftstore clean region size + /// informations. raft_store_router: RR, /// Used to signal unsafe destroy range is executed. flow_info_sender: Option>, @@ -1150,11 +1154,12 @@ where .or_else(handle_gc_task_schedule_error) } - /// Cleans up all keys in a range and quickly free the disk space. The range might span over - /// multiple regions, and the `ctx` doesn't indicate region. The request will be done directly - /// on RocksDB, bypassing the Raft layer. User must promise that, after calling `destroy_range`, - /// the range will never be accessed any more. However, `destroy_range` is allowed to be called - /// multiple times on an single range. + /// Cleans up all keys in a range and quickly free the disk space. The range + /// might span over multiple regions, and the `ctx` doesn't indicate region. + /// The request will be done directly on RocksDB, bypassing the Raft layer. + /// User must promise that, after calling `destroy_range`, the range will + /// never be accessed any more. However, `destroy_range` is allowed to be + /// called multiple times on an single range. pub fn unsafe_destroy_range( &self, ctx: Context, @@ -1287,10 +1292,11 @@ mod tests { }; /// A wrapper of engine that adds the 'z' prefix to keys internally. - /// For test engines, they writes keys into db directly, but in production a 'z' prefix will be - /// added to keys by raftstore layer before writing to db. Some functionalities of `GCWorker` - /// bypasses Raft layer, so they needs to know how data is actually represented in db. This - /// wrapper allows test engines write 'z'-prefixed keys to db. + /// For test engines, they writes keys into db directly, but in production a + /// 'z' prefix will be added to keys by raftstore layer before writing to + /// db. Some functionalities of `GCWorker` bypasses Raft layer, so they + /// needs to know how data is actually represented in db. This wrapper + /// allows test engines write 'z'-prefixed keys to db. #[derive(Clone)] struct PrefixedEngine(kv::RocksEngine); @@ -1388,8 +1394,8 @@ mod tests { } } - /// Assert the data in `storage` is the same as `expected_data`. Keys in `expected_data` should - /// be encoded form without ts. + /// Assert the data in `storage` is the same as `expected_data`. Keys in + /// `expected_data` should be encoded form without ts. fn check_data( storage: &Storage, expected_data: &BTreeMap, Vec>, @@ -1988,13 +1994,15 @@ mod tests { .unwrap(); assert_eq!(runner.stats.write.seek_tombstone, 0); - // Test rebuilding snapshot when GC write batch limit reached (gc_info.is_completed == false). - // Build a key with versions that will just reach the limit `MAX_TXN_WRITE_SIZE`. + // Test rebuilding snapshot when GC write batch limit reached + // (gc_info.is_completed == false). Build a key with versions that will + // just reach the limit `MAX_TXN_WRITE_SIZE`. let key_size = Modify::Delete(CF_WRITE, Key::from_raw(b"k2").append_ts(1.into())).size(); // versions = ceil(MAX_TXN_WRITE_SIZE/write_size) + 3 // Write CF: Put@N, Put@N-2, Put@N-4, ... Put@5, Put@3 // ^ ^^^^^^^^^^^^^^^^^^^ - // safepoint=N-1 Deleted in the first batch, `ceil(MAX_TXN_WRITE_SIZE/write_size)` versions. + // safepoint=N-1 Deleted in the first batch, + // `ceil(MAX_TXN_WRITE_SIZE/write_size)` versions. let versions = (MAX_TXN_WRITE_SIZE - 1) / key_size + 4; for start_ts in (1..versions).map(|x| x as u64 * 2) { let commit_ts = start_ts + 1; @@ -2012,9 +2020,9 @@ mod tests { Some((1, ri_provider)), ) .unwrap(); - // The first batch will leave tombstones that will be seen while processing the second - // batch, but it will be seen in `next` after seeking the latest unexpired version, - // therefore `seek_tombstone` is not affected. + // The first batch will leave tombstones that will be seen while processing the + // second batch, but it will be seen in `next` after seeking the latest + // unexpired version, therefore `seek_tombstone` is not affected. assert_eq!(runner.stats.write.seek_tombstone, 0); // ... and next_tombstone indicates there's indeed more than one batches. assert_eq!(runner.stats.write.next_tombstone, versions - 3); diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index ac16c30bb03..3ed206408e4 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -98,8 +98,8 @@ thread_local! { } impl Drop for RawCompactionFilter { - // NOTE: it's required that `CompactionFilter` is dropped before the compaction result - // becomes installed into the DB instance. + // NOTE: it's required that `CompactionFilter` is dropped before the compaction + // result becomes installed into the DB instance. fn drop(&mut self) { self.raw_gc_mvcc_deletions(); @@ -181,7 +181,8 @@ impl RawCompactionFilter { return Ok(CompactionFilterDecision::Keep); } - // If the key mode is not KeyMode::Raw or value_type is not CompactionFilterValueType::Value, it's needed to be retained. + // If the key mode is not KeyMode::Raw or value_type is not + // CompactionFilterValueType::Value, it's needed to be retained. let key_mode = ApiV2::parse_key_mode(keys::origin_key(key)); if key_mode != KeyMode::Raw || value_type != CompactionFilterValueType::Value { return Ok(CompactionFilterDecision::Keep); @@ -199,15 +200,19 @@ impl RawCompactionFilter { self.versions += 1; let raw_value = ApiV2::decode_raw_value(value)?; - // If it's the latest version, and it's deleted or expired, it needs to be sent to GCWorker to be processed asynchronously. + // If it's the latest version, and it's deleted or expired, it needs to be sent + // to GCWorker to be processed asynchronously. if !raw_value.is_valid(self.current_ts) { self.raw_handle_delete(); if self.mvcc_deletions.len() >= DEFAULT_DELETE_BATCH_COUNT { self.raw_gc_mvcc_deletions(); } } - // 1. If it's the latest version, and it's neither deleted nor expired, it's needed to be retained. - // 2. If it's the latest version, and it's deleted or expired, while we do async gctask to deleted or expired records, both put records and deleted/expired records are actually kept within the compaction filter. + // 1. If it's the latest version, and it's neither deleted nor expired, it's + // needed to be retained. 2. If it's the latest version, and it's + // deleted or expired, while we do async gctask to deleted or expired records, + // both put records and deleted/expired records are actually kept within the + // compaction filter. Ok(CompactionFilterDecision::Keep) } else { if commit_ts.into_inner() >= self.safe_point { @@ -216,7 +221,8 @@ impl RawCompactionFilter { self.versions += 1; self.filtered += 1; - // If it's ts < safepoint, and it's not the latest version, it's need to be removed. + // If it's ts < safepoint, and it's not the latest version, it's need to be + // removed. Ok(CompactionFilterDecision::Remove) } } @@ -234,8 +240,8 @@ impl RawCompactionFilter { } } - // `log_on_error` indicates whether to print an error log on scheduling failures. - // It's only enabled for `GcTask::OrphanVersions`. + // `log_on_error` indicates whether to print an error log on scheduling + // failures. It's only enabled for `GcTask::OrphanVersions`. fn schedule_gc_task(&self, task: GcTask, log_on_error: bool) { match self.gc_scheduler.schedule(task) { Ok(_) => {} @@ -363,7 +369,8 @@ pub mod tests { gc_runner.safe_point(80).gc_raw(&raw_engine); - // If ts(70) < safepoint(80), and this userkey's latest verion is not deleted or expired, this version will be removed in do_filter. + // If ts(70) < safepoint(80), and this userkey's latest version is not deleted + // or expired, this version will be removed in do_filter. let entry70 = raw_engine .get_value_cf(CF_DEFAULT, make_key(b"r\0a", 70).as_slice()) .unwrap(); diff --git a/src/server/load_statistics/linux.rs b/src/server/load_statistics/linux.rs index f3a12593a51..e0a9b950944 100644 --- a/src/server/load_statistics/linux.rs +++ b/src/server/load_statistics/linux.rs @@ -23,11 +23,12 @@ pub struct ThreadLoadStatistics { } impl ThreadLoadStatistics { - /// Create a thread load statistics for all threads with `prefix`. `ThreadLoad` is stored into - /// `thread_loads` for each thread. At most `slots` old records will be kept, to make the curve - /// more smooth. + /// Create a thread load statistics for all threads with `prefix`. + /// `ThreadLoad` is stored into `thread_loads` for each thread. At most + /// `slots` old records will be kept, to make the curve more smooth. /// - /// Note: call this after the target threads are initialized, otherwise it can't catch them. + /// Note: call this after the target threads are initialized, otherwise it + /// can't catch them. pub fn new(slots: usize, prefix: &str, thread_loads: Arc) -> Self { let pid = thread::process_id(); let mut tids = vec![]; @@ -56,17 +57,19 @@ impl ThreadLoadStatistics { } } - /// For every threads with the name prefix given in `ThreadLoadStatistics::new`, - /// gather cpu usage from `/proc//task/` and store it in `thread_load` + /// For every threads with the name prefix given in + /// `ThreadLoadStatistics::new`, gather cpu usage from + /// `/proc//task/` and store it in `thread_load` /// passed in `ThreadLoadStatistics::new`. /// - /// Some old usages and instants (at most `slots`) will be kept internal to make - /// the usage curve more smooth. + /// Some old usages and instants (at most `slots`) will be kept internal to + /// make the usage curve more smooth. pub fn record(&mut self, instant: Instant) { self.instants[self.cur_pos] = instant; self.cpu_usages[self.cur_pos].clear(); for tid in &self.tids { - // TODO: if monitored threads exited and restarted then, we should update `self.tids`. + // TODO: if monitored threads exited and restarted then, we should update + // `self.tids`. if let Ok(stat) = thread::full_thread_stat(self.pid, *tid) { let total = thread::linux::cpu_total(&stat); self.cpu_usages[self.cur_pos].insert(*tid, total); diff --git a/src/server/load_statistics/mod.rs b/src/server/load_statistics/mod.rs index 3b792def94d..5cb856e2948 100644 --- a/src/server/load_statistics/mod.rs +++ b/src/server/load_statistics/mod.rs @@ -44,7 +44,8 @@ impl ThreadLoadPool { }) } - /// Gets the current load. For example, 200 means the threads consuming 200% of the CPU resources. + /// Gets the current load. For example, 200 means the threads consuming 200% + /// of the CPU resources. pub fn total_load(&self) -> usize { self.total_load.load(Ordering::Relaxed) } diff --git a/src/server/lock_manager/client.rs b/src/server/lock_manager/client.rs index f3b59c4e97b..c71bec0b63a 100644 --- a/src/server/lock_manager/client.rs +++ b/src/server/lock_manager/client.rs @@ -21,7 +21,8 @@ pub type Callback = Box; const CQ_COUNT: usize = 1; const CLIENT_PREFIX: &str = "deadlock"; -/// Builds the `Environment` of deadlock clients. All clients should use the same instance. +/// Builds the `Environment` of deadlock clients. All clients should use the +/// same instance. pub fn env() -> Arc { Arc::new( EnvBuilder::new() diff --git a/src/server/lock_manager/config.rs b/src/server/lock_manager/config.rs index 8d391e874de..aba08f3d2e7 100644 --- a/src/server/lock_manager/config.rs +++ b/src/server/lock_manager/config.rs @@ -27,9 +27,9 @@ pub struct Config { /// Whether to enable the pipelined pessimistic lock feature. pub pipelined: bool, /// Whether to enable the in-memory pessimistic lock feature. - /// It will take effect only if the `pipelined` config is true because - /// we assume that the success rate of pessimistic transactions is important to - /// people who disable the pipelined pessimistic lock feature. + /// It will take effect only if the `pipelined` config is true because we + /// assume that the success rate of pessimistic transactions is important + /// to people who disable the pipelined pessimistic lock feature. pub in_memory: bool, } diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 7cc8978d735..4fee40138c1 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -112,9 +112,11 @@ impl Locks { /// Used to detect the deadlock of wait-for-lock in the cluster. pub struct DetectTable { - /// Keeps the DAG of wait-for-lock. Every edge from `txn_ts` to `lock_ts` has a survival time -- `ttl`. - /// When checking the deadlock, if the ttl has elpased, the corresponding edge will be removed. - /// `last_detect_time` is the start time of the edge. `Detect` requests will refresh it. + /// Keeps the DAG of wait-for-lock. Every edge from `txn_ts` to `lock_ts` + /// has a survival time -- `ttl`. When checking the deadlock, if the ttl + /// has elpased, the corresponding edge will be removed. + /// `last_detect_time` is the start time of the edge. `Detect` requests will + /// refresh it. // txn_ts => (lock_ts => Locks) wait_for_map: HashMap>, @@ -138,11 +140,12 @@ impl DetectTable { } } - /// Returns the key hash which causes deadlock, and the current wait chain that forms the - /// deadlock with `txn_ts`'s waiting for txn at `lock_ts`. - /// Note that the current detecting edge is not included in the returned wait chain. This is - /// intended to reduce RPC message size since the information about current detecting txn is - /// included in a separated field. + /// Returns the key hash which causes deadlock, and the current wait chain + /// that forms the deadlock with `txn_ts`'s waiting for txn at + /// `lock_ts`. Note that the current detecting edge is not included in + /// the returned wait chain. This is intended to reduce RPC message size + /// since the information about current detecting txn is included in a + /// separated field. pub fn detect( &mut self, txn_ts: TimeStamp, @@ -181,12 +184,12 @@ impl DetectTable { let ttl = self.ttl; let mut stack = vec![wait_for_ts]; - // Memorize the pushed vertexes to avoid duplicate search, and maps to the predecessor of - // the vertex. - // Since the graph is a DAG instead of a tree, a vertex may have multiple predecessors. But - // it's ok if we only remember one: for each vertex, if it has a route to the goal (txn_ts), - // we must be able to find the goal and exit this function before visiting the vertex one - // more time. + // Memorize the pushed vertexes to avoid duplicate search, and maps to the + // predecessor of the vertex. + // Since the graph is a DAG instead of a tree, a vertex may have multiple + // predecessors. But it's ok if we only remember one: for each vertex, + // if it has a route to the goal (txn_ts), we must be able to find the + // goal and exit this function before visiting the vertex one more time. let mut pushed: HashMap = HashMap::default(); pushed.insert(wait_for_ts, TimeStamp::zero()); while let Some(curr_ts) = stack.pop() { @@ -220,18 +223,20 @@ impl DetectTable { None } - /// Generate the wait chain after deadlock is detected. This function is part of implementation - /// of `do_detect`. It assumes there's a path from `start` to `end` in the waiting graph, and - /// every single edge `V1 -> V2` has an entry in `vertex_predecessors_map` so that - /// `vertex_predecessors_map[V2] == V1`, and `vertex_predecessors_map[V1] == 0`. + /// Generate the wait chain after deadlock is detected. This function is + /// part of implementation of `do_detect`. It assumes there's a path + /// from `start` to `end` in the waiting graph, and every single edge + /// `V1 -> V2` has an entry in `vertex_predecessors_map` so that + /// `vertex_predecessors_map[V2] == V1`, and `vertex_predecessors_map[V1] == + /// 0`. fn generate_wait_chain( &self, start: TimeStamp, end: TimeStamp, vertex_predecessors_map: HashMap, ) -> Vec { - // It's rare that a deadlock formed by too many transactions. Preallocating a few elements - // should be enough in most cases. + // It's rare that a deadlock formed by too many transactions. Preallocating a + // few elements should be enough in most cases. let mut wait_chain = Vec::with_capacity(3); let mut lock_ts = end; @@ -259,9 +264,9 @@ impl DetectTable { wait_chain } - /// Returns true and adds to the detect table if `txn_ts` is waiting for `lock_ts`. - /// When the function returns true, `key` and `resource_group_tag` may be taken to store in the - /// waiting graph. + /// Returns true and adds to the detect table if `txn_ts` is waiting for + /// `lock_ts`. When the function returns true, `key` and + /// `resource_group_tag` may be taken to store in the waiting graph. fn register_if_existed( &mut self, txn_ts: TimeStamp, @@ -280,7 +285,8 @@ impl DetectTable { false } - /// Adds to the detect table. The edge from `txn_ts` to `lock_ts` must not exist. + /// Adds to the detect table. The edge from `txn_ts` to `lock_ts` must not + /// exist. fn register( &mut self, txn_ts: TimeStamp, @@ -402,7 +408,8 @@ pub enum Task { /// If the node has the leader region and the role of the node changes, /// a `ChangeRole` task will be scheduled. /// - /// It's the only way to change the node from leader to follower, and vice versa. + /// It's the only way to change the node from leader to follower, and vice + /// versa. ChangeRole(Role), /// Change the ttl of DetectTable ChangeTtl(Duration), @@ -434,8 +441,8 @@ impl Display for Task { } } -/// `Scheduler` is the wrapper of the `FutureScheduler` to simplify scheduling tasks -/// to the deadlock detector. +/// `Scheduler` is the wrapper of the `FutureScheduler` to simplify +/// scheduling tasks to the deadlock detector. #[derive(Clone)] pub struct Scheduler(FutureScheduler); @@ -498,14 +505,15 @@ impl Scheduler { } } -/// The leader region is the region containing the LEADER_KEY and the leader of the -/// leader region is also the leader of the deadlock detector. +/// The leader region is the region containing the LEADER_KEY and the leader of +/// the leader region is also the leader of the deadlock detector. const LEADER_KEY: &[u8] = b""; -/// `RoleChangeNotifier` observes region or role change events of raftstore. If the -/// region is the leader region and the role of this node is changed, a `ChangeRole` -/// task will be scheduled to the deadlock detector. It's the only way to change the -/// node from the leader of deadlock detector to follower, and vice versa. +/// `RoleChangeNotifier` observes region or role change events of raftstore. If +/// the region is the leader region and the role of this node is changed, a +/// `ChangeRole` task will be scheduled to the deadlock detector. It's the only +/// way to change the node from the leader of deadlock detector to follower, and +/// vice versa. #[derive(Clone)] pub(crate) struct RoleChangeNotifier { /// The id of the valid leader region. @@ -755,8 +763,9 @@ where } } } - // If the node is a follower, it will receive a `ChangeRole(Follower)` msg when the leader - // is changed. It should reset itself even if the role of the node is not changed. + // If the node is a follower, it will receive a `ChangeRole(Follower)` msg when + // the leader is changed. It should reset itself even if the role of the + // node is not changed. self.reset(role); } @@ -794,8 +803,9 @@ where /// Returns true if sends successfully. /// - /// If the client is None, reconnects the leader first, then sends the request to the leader. - /// If sends failed, sets the client to None for retry. + /// If the client is None, reconnects the leader first, then sends the + /// request to the leader. If sends failed, sets the client to None for + /// retry. fn send_request_to_leader( &mut self, tp: DetectType, @@ -889,11 +899,13 @@ where if self.send_request_to_leader(tp, txn_ts, lock, diag_ctx.clone()) { return; } - // Because the client is asynchronous, it won't be closed until failing to send a - // request. So retry to refresh the leader info and send it again. + // Because the client is asynchronous, it won't be closed until + // failing to send a request. So retry to + // refresh the leader info and send it again. } - // If a request which causes deadlock is dropped, it leads to the waiter timeout. - // TiDB will retry to acquire the lock and detect deadlock again. + // If a request which causes deadlock is dropped, it leads to the waiter + // timeout. TiDB will retry to acquire the lock and detect deadlock + // again. warn!("detect request dropped"; "tp" => ?tp, "txn_ts" => txn_ts, "lock" => ?lock); ERROR_COUNTER_METRICS.dropped.inc(); } @@ -1304,7 +1316,8 @@ pub mod tests { tag, }; - // Detect specified edges sequentially, and expects the last one will cause the deadlock. + // Detect specified edges sequentially, and expects the last one will cause the + // deadlock. let test_once = |edges: &[Edge<'_>]| { let mut detect_table = DetectTable::new(Duration::from_millis(100)); let mut edge_map = HashMap::default(); diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index 7ce6b50e6c0..91e25a2edeb 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -54,7 +54,8 @@ fn detected_slot_idx(txn_ts: TimeStamp) -> usize { /// `LockManager` has two components working in two threads: /// * One is the `WaiterManager` which manages transactions waiting for locks. -/// * The other one is the `Detector` which detects deadlocks between transactions. +/// * The other one is the `Detector` which detects deadlocks between +/// transactions. pub struct LockManager { waiter_mgr_worker: Option>, detector_worker: Option>, @@ -198,8 +199,9 @@ impl LockManager { } } - /// Creates a `RoleChangeNotifier` of the deadlock detector worker and registers it to - /// the `CoprocessorHost` to observe the role change events of the leader region. + /// Creates a `RoleChangeNotifier` of the deadlock detector worker and + /// registers it to the `CoprocessorHost` to observe the role change + /// events of the leader region. pub fn register_detector_role_change_observer( &self, host: &mut CoprocessorHost, @@ -208,7 +210,8 @@ impl LockManager { role_change_notifier.register(host); } - /// Creates a `DeadlockService` to handle deadlock detect requests from other nodes. + /// Creates a `DeadlockService` to handle deadlock detect requests from + /// other nodes. pub fn deadlock_service(&self) -> DeadlockService { DeadlockService::new( self.waiter_mgr_scheduler.clone(), @@ -268,7 +271,8 @@ impl LockManagerTrait for LockManager { self.waiter_mgr_scheduler .wait_for(start_ts, cb, pr, lock, timeout, diag_ctx.clone()); - // If it is the first lock the transaction tries to lock, it won't cause deadlock. + // If it is the first lock the transaction tries to lock, it won't cause + // deadlock. if !is_first_lock { self.add_to_detected(start_ts); self.detector_scheduler.detect(start_ts, lock, diag_ctx); @@ -288,8 +292,9 @@ impl LockManagerTrait for LockManager { self.waiter_mgr_scheduler .wake_up(lock_ts, hashes, commit_ts); } - // If a pessimistic transaction is committed or rolled back and it once sent requests to - // detect deadlock, clean up its wait-for entries in the deadlock detector. + // If a pessimistic transaction is committed or rolled back and it once sent + // requests to detect deadlock, clean up its wait-for entries in the + // deadlock detector. if is_pessimistic_txn && self.remove_from_detected(lock_ts) { self.detector_scheduler.clean_up(lock_ts); } diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 8c3d2c7749d..8e5225bef76 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -40,11 +40,13 @@ struct DelayInner { cancelled: bool, } -/// `Delay` is a wrapper of `tokio_timer::Delay` which has a resolution of one millisecond. -/// It has some extra features than `tokio_timer::Delay` used by `WaiterManager`. +/// `Delay` is a wrapper of `tokio_timer::Delay` which has a resolution of one +/// millisecond. It has some extra features than `tokio_timer::Delay` used by +/// `WaiterManager`. /// -/// `Delay` performs no work and completes with `true` once the specified deadline has been reached. -/// If it has been cancelled, it will complete with `false` at arbitrary time. +/// `Delay` performs no work and completes with `true` once the specified +/// deadline has been reached. If it has been cancelled, it will complete with +/// `false` at arbitrary time. // FIXME: Use `tokio_timer::DelayQueue` instead if https://github.com/tokio-rs/tokio/issues/1700 is fixed. #[derive(Clone)] struct Delay { @@ -325,7 +327,8 @@ impl WaitTable { WAIT_TABLE_STATUS_GAUGE.txns.inc(); None } - // Here we don't increase waiter_count because it's already updated in LockManager::wait_for() + // Here we don't increase waiter_count because it's already updated in + // LockManager::wait_for() } /// Removes all waiters waiting for the lock. @@ -348,10 +351,11 @@ impl WaitTable { Some(waiter) } - /// Removes the `Waiter` with the smallest start ts and returns it with remaining waiters. + /// Removes the `Waiter` with the smallest start ts and returns it with + /// remaining waiters. /// - /// NOTE: Due to the borrow checker, it doesn't remove the entry in the `WaitTable` - /// even if there is no remaining waiter. + /// NOTE: Due to the borrow checker, it doesn't remove the entry in the + /// `WaitTable` even if there is no remaining waiter. fn remove_oldest_waiter(&mut self, lock: Lock) -> Option<(Waiter, &mut Waiters)> { let waiters = self.wait_table.get_mut(&lock.hash)?; let oldest_idx = waiters @@ -823,7 +827,8 @@ pub mod tests { waiter_ts: TimeStamp, mut lock_info: LockInfo, deadlock_hash: u64, - expect_wait_chain: &[(u64, u64, &[u8], &[u8])], // (waiter_ts, wait_for_ts, key, resource_group_tag) + expect_wait_chain: &[(u64, u64, &[u8], &[u8])], /* (waiter_ts, wait_for_ts, key, + * resource_group_tag) */ ) { match res { Err(StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( diff --git a/src/server/node.rs b/src/server/node.rs index eb2cc72e432..84aeb89377d 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -241,7 +241,8 @@ where self.store.get_id() } - /// Gets the Scheduler of RaftstoreConfigTask, it must be called after start. + /// Gets the Scheduler of RaftstoreConfigTask, it must be called after + /// start. pub fn refresh_config_scheduler(&mut self) -> Scheduler { self.system.refresh_config_scheduler() } @@ -251,7 +252,8 @@ where pub fn get_router(&self) -> RaftRouter { self.system.router() } - /// Gets a transmission end of a channel which is used send messages to apply worker. + /// Gets a transmission end of a channel which is used send messages to + /// apply worker. pub fn get_apply_router(&self) -> ApplyRouter { self.system.apply_router() } @@ -289,11 +291,12 @@ where .kv .get_msg::(keys::STORE_IDENT_KEY)? .expect("Store should have bootstrapped"); - // API version is not written into `StoreIdent` in legacy TiKV, thus it will be V1 in - // `StoreIdent` regardless of `storage.enable_ttl`. To allow upgrading from legacy V1 - // TiKV, the config switch between V1 and V1ttl are not checked here. - // It's safe to do so because `storage.enable_ttl` is impossible to change thanks to the - // config check. + // API version is not written into `StoreIdent` in legacy TiKV, thus it will be + // V1 in `StoreIdent` regardless of `storage.enable_ttl`. To allow upgrading + // from legacy V1 TiKV, the config switch between V1 and V1ttl are not checked + // here. It's safe to do so because `storage.enable_ttl` is impossible to change + // thanks to the config check. let should_check = match (ident.api_version, + // self.api_version) { let should_check = match (ident.api_version, self.api_version) { (ApiVersion::V1, ApiVersion::V1ttl) | (ApiVersion::V1ttl, ApiVersion::V1) => false, (left, right) => left != right, diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 4b2815f5d73..bc0e8a59303 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -133,7 +133,8 @@ impl Queue { self.buf.pop() } - /// Same as `try_pop` but register interest on readiness when `None` is returned. + /// Same as `try_pop` but register interest on readiness when `None` is + /// returned. /// /// The method should be called in polling context. If the queue is empty, /// it will register current polling task for notifications. @@ -244,8 +245,8 @@ impl Buffer for BatchMessageBuffer { #[inline] fn push(&mut self, msg: RaftMessage) { let msg_size = Self::message_size(&msg); - // To avoid building too large batch, we limit each batch's size. Since `msg_size` - // is estimated, `GRPC_SEND_MSG_BUF` is reserved for errors. + // To avoid building too large batch, we limit each batch's size. Since + // `msg_size` is estimated, `GRPC_SEND_MSG_BUF` is reserved for errors. if self.size > 0 && (self.size + msg_size + self.cfg.raft_client_grpc_send_msg_buffer >= self.cfg.max_grpc_send_msg_len as usize @@ -276,9 +277,10 @@ impl Buffer for BatchMessageBuffer { self.push(more); } - // try refresh config after flush. `max_grpc_send_msg_len` and `raft_msg_max_batch_size` - // can impact the buffer push logic, but since they are soft restriction, we check config change - // at here to avoid affact performance since `push` is a hot path. + // try refresh config after flush. `max_grpc_send_msg_len` and + // `raft_msg_max_batch_size` can impact the buffer push logic, but since + // they are soft restriction, we check config change at here to avoid + // affact performance since `push` is a hot path. self.maybe_refresh_config(); res @@ -533,7 +535,8 @@ where RAFT_MESSAGE_FLUSH_COUNTER.full.inc_by(1); } - // So either enough messages are batched up or don't need to wait or wait timeouts. + // So either enough messages are batched up or don't need to wait or wait + // timeouts. s.flush_timeout.take(); ready!(Poll::Ready(s.buffer.flush(&mut s.sender)))?; continue; @@ -823,9 +826,9 @@ async fn start( let f = back_end.batch_call(&client, addr.clone()); let mut res = f.await; if res == Ok(()) { - // If the call is setup successfully, it will never finish. Returning `Ok(())` means the - // batch_call is not supported, we are probably connect to an old version of TiKV. So we - // need to fallback to use legacy API. + // If the call is setup successfully, it will never finish. Returning `Ok(())` + // means the batch_call is not supported, we are probably connect to + // an old version of TiKV. So we need to fallback to use legacy API. let f = back_end.call(&client, addr.clone()); res = f.await; } @@ -836,7 +839,8 @@ async fn start( Err(_) => { error!("connection abort"; "store_id" => back_end.store_id, "addr" => addr); if retry_times > 1 { - // Clears pending messages to avoid consuming high memory when one node is shutdown. + // Clears pending messages to avoid consuming high memory when one node is + // shutdown. back_end.clear_pending_message("unreachable"); } else { // At least report failure in metrics. @@ -990,9 +994,9 @@ where /// Sends a message. /// - /// If the message fails to be sent, false is returned. Returning true means the message is - /// enqueued to buffer. Caller is expected to call `flush` to ensure all buffered messages - /// are sent out. + /// If the message fails to be sent, false is returned. Returning true means + /// the message is enqueued to buffer. Caller is expected to call `flush` to + /// ensure all buffered messages are sent out. pub fn send(&mut self, msg: RaftMessage) -> result::Result<(), DiscardReason> { let store_id = msg.get_to_peer().store_id; let grpc_raft_conn_num = self.builder.cfg.value().grpc_raft_conn_num as u64; diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index e8c06c220b8..ab60f969493 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -514,7 +514,8 @@ impl Coprocessor for ReplicaReadLockChecker {} impl ReadIndexObserver for ReplicaReadLockChecker { fn on_step(&self, msg: &mut eraftpb::Message, role: StateRole) { // Only check and return result if the current peer is a leader. - // If it's not a leader, the read index request will be redirected to the leader later. + // If it's not a leader, the read index request will be redirected to the leader + // later. if msg.get_msg_type() != MessageType::MsgReadIndex || role != StateRole::Leader { return; } @@ -574,7 +575,8 @@ mod tests { use super::*; - // This test ensures `ReplicaReadLockChecker` won't change UUID context of read index. + // This test ensures `ReplicaReadLockChecker` won't change UUID context of read + // index. #[test] fn test_replica_read_lock_checker_for_single_uuid() { let cm = ConcurrencyManager::new(1.into()); diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index de837bdb1cb..20bd65ac17a 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -22,7 +22,8 @@ const BATCH_SIZE: usize = 256; /// todo: Report this to the user. #[derive(Debug, Clone)] pub enum ResetToVersionState { - /// `RemovingWrite` means we are removing stale data in the `WRITE` and `DEFAULT` cf + /// `RemovingWrite` means we are removing stale data in the `WRITE` and + /// `DEFAULT` cf RemovingWrite { scanned: usize }, /// `RemovingWrite` means we are removing stale data in the `LOCK` cf RemovingLock { scanned: usize }, @@ -40,7 +41,8 @@ impl ResetToVersionState { } } -/// `ResetToVersionWorker` is the worker that does the actual reset-to-version work. +/// `ResetToVersionWorker` is the worker that does the actual reset-to-version +/// work. pub struct ResetToVersionWorker { /// `ts` is the timestamp to reset to. ts: TimeStamp, @@ -168,8 +170,9 @@ impl ResetToVersionWorker { } } -/// `ResetToVersionManager` is the manager that manages the reset-to-version process. -/// User should interact with `ResetToVersionManager` instead of using `ResetToVersionWorker` directly. +/// `ResetToVersionManager` is the manager that manages the reset-to-version +/// process. User should interact with `ResetToVersionManager` instead of using +/// `ResetToVersionWorker` directly. pub struct ResetToVersionManager { /// Current state of the reset-to-version process. state: Arc>, diff --git a/src/server/server.rs b/src/server/server.rs index 196a6584be7..c5aa6311193 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -477,7 +477,8 @@ mod tests { } } - // if this failed, unset the environmental variables 'http_proxy' and 'https_proxy', and retry. + // if this failed, unset the environmental variables 'http_proxy' and + // 'https_proxy', and retry. #[test] fn test_peer_resolve() { let cfg = Config { diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 740e597e5e2..e66bb3ec40c 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -61,7 +61,8 @@ pub struct Service> { } impl> Service { - /// Constructs a new `Service` with `Engines`, a `RaftStoreRouter` and a `GcWorker`. + /// Constructs a new `Service` with `Engines`, a `RaftStoreRouter` and a + /// `GcWorker`. pub fn new( engines: Engines, pool: Handle, diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index a79ca0c4e8a..4ab02f819da 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -173,7 +173,8 @@ impl Iterator for LogIterator { if self.pre_log.time < self.begin_time { continue; } - // treat the invalid log with the pre valid log time and level but its own whole line content + // treat the invalid log with the pre valid log time and level but its own + // whole line content item.set_time(self.pre_log.time); item.set_level(self.pre_log.get_level()); item.set_message(input.to_owned()); @@ -267,8 +268,8 @@ fn parse(input: &str) -> Result<(&str, (i64, LogLevel)), Error> { Ok((content, (timestamp, level))) } -/// Parses the start time and end time of a log file and return the maximal and minimal -/// timestamp in unix milliseconds. +/// Parses the start time and end time of a log file and return the maximal and +/// minimal timestamp in unix milliseconds. fn parse_time_range(file: &std::fs::File) -> Result<(i64, i64), Error> { let file_start_time = parse_start_time(file, 10)?; let file_end_time = parse_end_time(file, 10)?; diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index c0cc3eb1c6a..9eb88016424 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -37,7 +37,7 @@ impl NicSnapshot { fn into_pairs(self, prev: &NicSnapshot) -> Vec { macro_rules! pair { - ($label: literal, $value: expr, $old_value: expr) => {{ + ($label:literal, $value:expr, $old_value:expr) => {{ let mut pair = ServerInfoPair::default(); pair.set_key($label.to_owned()); pair.set_value(format!("{:.2}", ($value - $old_value) as f64)); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 878a138aafe..1ad81ec8900 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -218,8 +218,8 @@ macro_rules! handle_request { } macro_rules! set_total_time { - ($resp: ident, $duration: expr, no_time_detail) => {}; - ($resp: ident, $duration: expr, has_time_detail) => { + ($resp:ident, $duration:expr,no_time_detail) => {}; + ($resp:ident, $duration:expr,has_time_detail) => { let mut $resp = $resp; $resp .mut_exec_details_v2() @@ -627,8 +627,8 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ) { let begin_instant = Instant::now(); - // DestroyRange is a very dangerous operation. We don't allow passing MIN_KEY as start, or - // MAX_KEY as end here. + // DestroyRange is a very dangerous operation. We don't allow passing MIN_KEY as + // start, or MAX_KEY as end here. assert!(!req.get_start_key().is_empty()); assert!(!req.get_end_key().is_empty()); @@ -726,8 +726,8 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor if let Err(err @ RaftStoreError::StoreNotMatch { .. }) = Self::handle_raft_message(store_id, &ch, msg, reject) { - // Return an error here will break the connection, only do that for `StoreNotMatch` to - // let tikv to resolve a correct address from PD + // Return an error here will break the connection, only do that for + // `StoreNotMatch` to let tikv to resolve a correct address from PD return Err(Error::from(err)); } } @@ -772,8 +772,8 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor if let Err(err @ RaftStoreError::StoreNotMatch { .. }) = Self::handle_raft_message(store_id, &ch, msg, reject) { - // Return an error here will break the connection, only do that for `StoreNotMatch` to - // let tikv to resolve a correct address from PD + // Return an error here will break the connection, only do that for + // `StoreNotMatch` to let tikv to resolve a correct address from PD return Err(Error::from(err)); } } @@ -1412,8 +1412,9 @@ async fn future_handle_empty( ) -> ServerResult { let mut res = BatchCommandsEmptyResponse::default(); res.set_test_id(req.get_test_id()); - // `BatchCommandsWaker` processes futures in notify. If delay_time is too small, notify - // can be called immediately, so the future is polled recursively and lead to deadlock. + // `BatchCommandsWaker` processes futures in notify. If delay_time is too small, + // notify can be called immediately, so the future is polled recursively and + // lead to deadlock. if req.get_delay_time() >= 10 { let _ = tikv_util::timer::GLOBAL_TIMER_HANDLE .delay( @@ -1733,9 +1734,11 @@ fn future_raw_batch_put( let pairs_len = req.get_pairs().len(); // The TTL for each key in seconds. // - // In some TiKV of old versions, only one TTL can be provided and the TTL will be applied to all keys in - // the request. For compatibility reasons, if the length of `ttls` is exactly one, then the TTL will be applied - // to all keys. Otherwise, the length mismatch between `ttls` and `pairs` will return an error. + // In some TiKV of old versions, only one TTL can be provided and the TTL will + // be applied to all keys in the request. For compatibility reasons, if the + // length of `ttls` is exactly one, then the TTL will be applied to all keys. + // Otherwise, the length mismatch between `ttls` and `pairs` will return an + // error. let ttls = if req.get_ttls().is_empty() { vec![0; pairs_len] } else if req.get_ttls().len() == 1 { diff --git a/src/server/snap.rs b/src/server/snap.rs index 15304c51cdd..f451b6b70e9 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -120,7 +120,8 @@ pub struct SendStat { /// Send the snapshot to specified address. /// -/// It will first send the normal raft snapshot message and then send the snapshot file. +/// It will first send the normal raft snapshot message and then send the +/// snapshot file. pub fn send_snap( env: Arc, mgr: SnapManager, diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index c4cb6a67fbb..13b7b94297d 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -352,7 +352,8 @@ where Ok(val) => val, Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), }, - None => 99, // Default frequency of sampling. 99Hz to avoid coincide with special periods + None => 99, /* Default frequency of sampling. 99Hz to avoid coincide with special + * periods */ }; let prototype_content_type: hyper::http::HeaderValue = @@ -565,8 +566,9 @@ where } // 1. POST "/config" will modify the configuration of TiKV. - // 2. GET "/region" will get start key and end key. These keys could be actual - // user data since in some cases the data itself is stored in the key. + // 2. GET "/region" will get start key and end key. These keys could be + // actual user data since in some cases the data itself is stored in the + // key. let should_check_cert = !matches!( (&method, path.as_ref()), (&Method::GET, "/metrics") @@ -858,7 +860,8 @@ async fn handle_fail_points_request(req: Request) -> hyper::Result { - // In this scope the path must be like /fail...(/...), which starts with FAIL_POINTS_REQUEST_PATH and may or may not have a sub path + // In this scope the path must be like /fail...(/...), which starts with + // FAIL_POINTS_REQUEST_PATH and may or may not have a sub path // Now we return 404 when path is neither /fail nor /fail/ if path != FAIL_POINTS_REQUEST_PATH && path != fail_path { return Ok(Response::builder() diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 88f45a9ca9e..a37712dfd68 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -122,7 +122,8 @@ where } /// Activate heap profile and call `callback` if successfully. -/// `deactivate_heap_profile` can only be called after it's notified from `callback`. +/// `deactivate_heap_profile` can only be called after it's notified from +/// `callback`. pub async fn activate_heap_profile( dump_period: S, store_path: PathBuf, @@ -299,7 +300,8 @@ fn extract_thread_name(thread_name: &str) -> String { .unwrap_or_else(|| thread_name.to_owned()) } -// Re-define some heap profiling functions because heap-profiling is not enabled for tests. +// Re-define some heap profiling functions because heap-profiling is not enabled +// for tests. #[cfg(test)] mod test_utils { use std::sync::Mutex; diff --git a/src/storage/config.rs b/src/storage/config.rs index 78850c9964c..2a5ac4840e0 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -282,10 +282,10 @@ pub struct IORateLimitConfig { pub max_bytes_per_sec: ReadableSize, #[online_config(skip)] pub mode: IORateLimitMode, - /// When this flag is off, high-priority IOs are counted but not limited. Default - /// set to false because the optimal throughput target provided by user might not be - /// the maximum available bandwidth. For multi-tenancy use case, this flag should be - /// turned on. + /// When this flag is off, high-priority IOs are counted but not limited. + /// Default set to false because the optimal throughput target provided by + /// user might not be the maximum available bandwidth. For multi-tenancy + /// use case, this flag should be turned on. #[online_config(skip)] pub strict: bool, pub foreground_read_priority: IOPriority, diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 8c3ca2c4116..dae61653f07 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -21,8 +21,9 @@ use crate::storage::{ }; #[derive(Debug, Error)] -/// Detailed errors for storage operations. This enum also unifies code for basic error -/// handling functionality in a single place instead of being spread out. +/// Detailed errors for storage operations. This enum also unifies code for +/// basic error handling functionality in a single place instead of being spread +/// out. pub enum ErrorInner { #[error("{0}")] Kv(#[from] kv::Error), @@ -177,8 +178,9 @@ pub enum ErrorHeaderKind { } impl ErrorHeaderKind { - /// TODO: This function is only used for bridging existing & legacy metric tags. - /// It should be removed once Coprocessor starts using new static metrics. + /// TODO: This function is only used for bridging existing & legacy metric + /// tags. It should be removed once Coprocessor starts using new static + /// metrics. pub fn get_str(&self) -> &'static str { match *self { ErrorHeaderKind::NotLeader => "not_leader", @@ -204,8 +206,8 @@ const SCHEDULER_IS_BUSY: &str = "scheduler is busy"; const GC_WORKER_IS_BUSY: &str = "gc worker is busy"; const DEADLINE_EXCEEDED: &str = "deadline is exceeded"; -/// Get the `ErrorHeaderKind` enum that corresponds to the error in the protobuf message. -/// Returns `ErrorHeaderKind::Other` if no match found. +/// Get the `ErrorHeaderKind` enum that corresponds to the error in the protobuf +/// message. Returns `ErrorHeaderKind::Other` if no match found. pub fn get_error_kind_from_header(header: &errorpb::Error) -> ErrorHeaderKind { if header.has_not_leader() { ErrorHeaderKind::NotLeader @@ -266,8 +268,8 @@ pub fn extract_region_error(res: &Result) -> Option { Some(err) } Err(Error(box ErrorInner::Closed)) => { - // TiKV is closing, return an RegionError to tell the client that this region is unavailable - // temporarily, the client should retry the request in other TiKVs. + // TiKV is closing, return an RegionError to tell the client that this region is + // unavailable temporarily, the client should retry the request in other TiKVs. let mut err = errorpb::Error::default(); err.set_message("TiKV is Closing".to_string()); Some(err) diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index e3d1507224b..0867c30fb31 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -68,8 +68,8 @@ impl TestEngineBuilder { } /// Register causal observer for RawKV API V2. - // TODO: `RocksEngine` is coupling with RawKV features including GC (compaction filter) & CausalObserver. - // Consider decoupling them. + // TODO: `RocksEngine` is coupling with RawKV features including GC (compaction + // filter) & CausalObserver. Consider decoupling them. fn register_causal_observer(engine: &mut RocksEngine) { let causal_ts_provider = Arc::new(causal_ts::tests::TestProvider::default()); let causal_ob = diff --git a/src/storage/lock_manager.rs b/src/storage/lock_manager.rs index 61d99f1a4dd..def756c921e 100644 --- a/src/storage/lock_manager.rs +++ b/src/storage/lock_manager.rs @@ -20,8 +20,9 @@ pub struct Lock { pub struct DiagnosticContext { /// The key we care about pub key: Vec, - /// This tag is used for aggregate related kv requests (eg. generated from same statement) - /// Currently it is the encoded SQL digest if the client is TiDB + /// This tag is used for aggregate related kv requests (eg. generated from + /// same statement) Currently it is the encoded SQL digest if the client + /// is TiDB pub resource_group_tag: Vec, } @@ -41,8 +42,8 @@ impl WaitTimeout { } } - /// Timeouts are encoded as i64s in protobufs where 0 means using default timeout. - /// Negative means no wait. + /// Timeouts are encoded as i64s in protobufs where 0 means using default + /// timeout. Negative means no wait. pub fn from_encoded(i: i64) -> Option { use std::cmp::Ordering::*; @@ -60,15 +61,18 @@ impl From for WaitTimeout { } } -/// `LockManager` manages transactions waiting for locks held by other transactions. -/// It has responsibility to handle deadlocks between transactions. +/// `LockManager` manages transactions waiting for locks held by other +/// transactions. It has responsibility to handle deadlocks between +/// transactions. pub trait LockManager: Clone + Send + 'static { /// Transaction with `start_ts` waits for `lock` released. /// - /// If the lock is released or waiting times out or deadlock occurs, the transaction - /// should be waken up and call `cb` with `pr` to notify the caller. + /// If the lock is released or waiting times out or deadlock occurs, the + /// transaction should be waken up and call `cb` with `pr` to notify the + /// caller. /// - /// If the lock is the first lock the transaction waits for, it won't result in deadlock. + /// If the lock is the first lock the transaction waits for, it won't result + /// in deadlock. fn wait_for( &self, start_ts: TimeStamp, @@ -80,7 +84,8 @@ pub trait LockManager: Clone + Send + 'static { diag_ctx: DiagnosticContext, ); - /// The locks with `lock_ts` and `hashes` are released, tries to wake up transactions. + /// The locks with `lock_ts` and `hashes` are released, tries to wake up + /// transactions. fn wake_up( &self, lock_ts: TimeStamp, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index aab89299641..6338525ab02 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2,40 +2,50 @@ // #[PerformanceCriticalPath] -//! This module contains TiKV's transaction layer. It lowers high-level, transactional -//! commands to low-level (raw key-value) interactions with persistent storage. +//! This module contains TiKV's transaction layer. It lowers high-level, +//! transactional commands to low-level (raw key-value) interactions with +//! persistent storage. //! -//! This module is further split into layers: [`txn`](txn) lowers transactional commands to -//! key-value operations on an MVCC abstraction. [`mvcc`](mvcc) is our MVCC implementation. -//! [`kv`](kv) is an abstraction layer over persistent storage. +//! This module is further split into layers: [`txn`](txn) lowers transactional +//! commands to key-value operations on an MVCC abstraction. [`mvcc`](mvcc) is +//! our MVCC implementation. [`kv`](kv) is an abstraction layer over persistent +//! storage. //! -//! Other responsibilities of this module are managing latches (see [`latch`](txn::latch)), deadlock -//! and wait handling (see [`lock_manager`](lock_manager)), sche -//! duling command execution (see -//! [`txn::scheduler`](txn::scheduler)), and handling commands from the raw and versioned APIs (in -//! the [`Storage`](Storage) struct). +//! Other responsibilities of this module are managing latches (see +//! [`latch`](txn::latch)), deadlock and wait handling (see +//! [`lock_manager`](lock_manager)), sche duling command execution (see +//! [`txn::scheduler`](txn::scheduler)), and handling commands from the raw and +//! versioned APIs (in the [`Storage`](Storage) struct). //! //! For more information about TiKV's transactions, see the [sig-txn docs](https://github.com/tikv/sig-transaction/tree/master/doc). //! //! Some important types are: //! -//! * the [`Engine`](kv::Engine) trait and related traits, which abstracts over underlying storage, -//! * the [`MvccTxn`](mvcc::txn::MvccTxn) struct, which is the primary object in the MVCC -//! implementation, -//! * the commands in the [`commands`](txn::commands) module, which are how each command is implemented, -//! * the [`Storage`](Storage) struct, which is the primary entry point for this module. +//! * the [`Engine`](kv::Engine) trait and related traits, which abstracts over +//! underlying storage, +//! * the [`MvccTxn`](mvcc::txn::MvccTxn) struct, which is the primary object in +//! the MVCC implementation, +//! * the commands in the [`commands`](txn::commands) module, which are how each +//! command is implemented, +//! * the [`Storage`](Storage) struct, which is the primary entry point for this +//! module. //! //! Related code: //! -//! * the [`kv`](crate::server::service::kv) module, which is the interface for TiKV's APIs, -//! * the [`lock_manager](crate::server::lock_manager), which takes part in lock and deadlock -//! management, -//! * [`gc_worker`](crate::server::gc_worker), which drives garbage collection of old values, -//! * the [`txn_types](::txn_types) crate, some important types for this module's interface, -//! * the [`kvproto`](::kvproto) crate, which defines TiKV's protobuf API and includes some -//! documentation of the commands implemented here, -//! * the [`test_storage`](::test_storage) crate, integration tests for this module, -//! * the [`engine_traits`](::engine_traits) crate, more detail of the engine abstraction. +//! * the [`kv`](crate::server::service::kv) module, which is the interface for +//! TiKV's APIs, +//! * the [`lock_manager](crate::server::lock_manager), which takes part in lock +//! and deadlock management, +//! * [`gc_worker`](crate::server::gc_worker), which drives garbage collection +//! of old values, +//! * the [`txn_types](::txn_types) crate, some important types for this +//! module's interface, +//! * the [`kvproto`](::kvproto) crate, which defines TiKV's protobuf API and +//! includes some documentation of the commands implemented here, +//! * the [`test_storage`](::test_storage) crate, integration tests for this +//! module, +//! * the [`engine_traits`](::engine_traits) crate, more detail of the engine +//! abstraction. pub mod config; pub mod config_manager; @@ -119,27 +129,31 @@ use crate::{ pub type Result = std::result::Result; pub type Callback = Box) + Send>; -/// [`Storage`](Storage) implements transactional KV APIs and raw KV APIs on a given [`Engine`]. -/// An [`Engine`] provides low level KV functionality. [`Engine`] has multiple implementations. -/// When a TiKV server is running, a [`RaftKv`](crate::server::raftkv::RaftKv) will be the -/// underlying [`Engine`] of [`Storage`]. The other two types of engines are for test purpose. +/// [`Storage`](Storage) implements transactional KV APIs and raw KV APIs on a +/// given [`Engine`]. An [`Engine`] provides low level KV functionality. +/// [`Engine`] has multiple implementations. When a TiKV server is running, a +/// [`RaftKv`](crate::server::raftkv::RaftKv) will be the underlying [`Engine`] +/// of [`Storage`]. The other two types of engines are for test purpose. /// -///[`Storage`] is reference counted and cloning [`Storage`] will just increase the reference counter. -/// Storage resources (i.e. threads, engine) will be released when all references are dropped. +/// [`Storage`] is reference counted and cloning [`Storage`] will just increase +/// the reference counter. Storage resources (i.e. threads, engine) will be +/// released when all references are dropped. /// -/// Notice that read and write methods may not be performed over full data in most cases, i.e. when -/// underlying engine is [`RaftKv`](crate::server::raftkv::RaftKv), -/// which limits data access in the range of a single region -/// according to specified `ctx` parameter. However, -/// [`unsafe_destroy_range`](crate::server::gc_worker::GcTask::UnsafeDestroyRange) is the only exception. -/// It's always performed on the whole TiKV. +/// Notice that read and write methods may not be performed over full data in +/// most cases, i.e. when underlying engine is +/// [`RaftKv`](crate::server::raftkv::RaftKv), which limits data access in the +/// range of a single region according to specified `ctx` parameter. However, +/// [`unsafe_destroy_range`](crate::server::gc_worker::GcTask:: +/// UnsafeDestroyRange) is the only exception. It's always performed on the +/// whole TiKV. /// -/// Operations of [`Storage`](Storage) can be divided into two types: MVCC operations and raw operations. -/// MVCC operations uses MVCC keys, which usually consist of several physical keys in different -/// CFs. In default CF and write CF, the key will be memcomparable-encoded and append the timestamp -/// to it, so that multiple versions can be saved at the same time. -/// Raw operations use raw keys, which are saved directly to the engine without memcomparable- -/// encoding and appending timestamp. +/// Operations of [`Storage`](Storage) can be divided into two types: MVCC +/// operations and raw operations. MVCC operations uses MVCC keys, which usually +/// consist of several physical keys in different CFs. In default CF and write +/// CF, the key will be memcomparable-encoded and append the timestamp to it, so +/// that multiple versions can be saved at the same time. Raw operations use raw +/// keys, which are saved directly to the engine without memcomparable- encoding +/// and appending timestamp. pub struct Storage { // TODO: Too many Arcs, would be slow when clone. engine: E, @@ -214,7 +228,7 @@ impl Drop for Storage { } macro_rules! check_key_size { - ($key_iter: expr, $max_key_size: expr, $callback: ident) => { + ($key_iter:expr, $max_key_size:expr, $callback:ident) => { for k in $key_iter { let key_size = k.len(); if key_size > $max_key_size { @@ -332,7 +346,8 @@ impl Storage { unsafe { with_tls_engine(f) } } - /// Check the given raw kv CF name. If the given cf is empty, CF_DEFAULT will be returned. + /// Check the given raw kv CF name. If the given cf is empty, CF_DEFAULT + /// will be returned. // TODO: refactor to use `Api` parameter. fn rawkv_cf(cf: &str, api_version: ApiVersion) -> Result { match api_version { @@ -360,8 +375,10 @@ impl Storage { /// Check if key range is valid /// - /// - If `reverse` is true, `end_key` is less than `start_key`. `end_key` is the lower bound. - /// - If `reverse` is false, `end_key` is greater than `start_key`. `end_key` is the upper bound. + /// - If `reverse` is true, `end_key` is less than `start_key`. `end_key` is + /// the lower bound. + /// - If `reverse` is false, `end_key` is greater than `start_key`. + /// `end_key` is the upper bound. fn check_key_ranges(ranges: &[KeyRange], reverse: bool) -> bool { let ranges_len = ranges.len(); for i in 0..ranges_len { @@ -415,7 +432,8 @@ impl Storage { /// * Request of V2 with legal prefix. /// See the following for detail: /// * rfc: https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. - /// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, enum APIVersion. + /// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, + /// enum APIVersion. // TODO: refactor to use `Api` parameter. fn check_api_version( storage_api_version: ApiVersion, @@ -696,9 +714,11 @@ impl Storage { } } - /// Get values of a set of keys with separate context from a snapshot, return a list of `Result`s. + /// Get values of a set of keys with separate context from a snapshot, + /// return a list of `Result`s. /// - /// Only writes that are committed before their respective `start_ts` are visible. + /// Only writes that are committed before their respective `start_ts` are + /// visible. pub fn batch_get_command>, Statistics)>>( &self, requests: Vec, @@ -713,15 +733,17 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; - // The resource tags of these batched requests are not the same, and it is quite expensive - // to distinguish them, so we can find random one of them as a representative. + // The resource tags of these batched requests are not the same, and it is quite + // expensive to distinguish them, so we can find random one of them as a + // representative. let rand_index = rand::thread_rng().gen_range(0, requests.len()); let rand_ctx = requests[rand_index].get_context(); let rand_key = requests[rand_index].get_key().to_vec(); let resource_tag = self .resource_tag_factory .new_tag_with_key_ranges(rand_ctx, vec![(rand_key.clone(), rand_key)]); - // Unset the TLS tracker because the future below does not belong to any specific request + // Unset the TLS tracker because the future below does not belong to any + // specific request clear_tls_tracker_token(); let res = self.read_pool.spawn_handle( async move { @@ -1044,9 +1066,10 @@ impl Storage { } } - /// Scan keys in [`start_key`, `end_key`) up to `limit` keys from the snapshot. - /// If `reverse_scan` is true, it scans [`end_key`, `start_key`) in descending order. - /// If `end_key` is `None`, it means the upper bound or the lower bound if reverse scan is unbounded. + /// Scan keys in [`start_key`, `end_key`) up to `limit` keys from the + /// snapshot. If `reverse_scan` is true, it scans [`end_key`, + /// `start_key`) in descending order. If `end_key` is `None`, it means + /// the upper bound or the lower bound if reverse scan is unbounded. /// /// Only writes committed before `start_ts` are visible. pub fn scan( @@ -1270,15 +1293,16 @@ impl Storage { .inc(); // Do not check_api_version in scan_lock, to be compatible with TiDB gc-worker, - // which resolves locks on regions, and boundary of regions will be out of range of TiDB keys. + // which resolves locks on regions, and boundary of regions will be out of range + // of TiDB keys. let command_duration = tikv_util::time::Instant::now(); concurrency_manager.update_max_ts(max_ts); let begin_instant = Instant::now(); - // TODO: Though it's very unlikely to find a conflicting memory lock here, it's not - // a good idea to return an error to the client, making the GC fail. A better - // approach is to wait for these locks to be unlocked. + // TODO: Though it's very unlikely to find a conflicting memory lock here, it's + // not a good idea to return an error to the client, making the GC fail. A + // better approach is to wait for these locks to be unlocked. concurrency_manager.read_range_check( start_key.as_ref(), end_key.as_ref(), @@ -1364,7 +1388,8 @@ impl Storage { } } - // The entry point of the storage scheduler. Not only transaction commands need to access keys serially. + // The entry point of the storage scheduler. Not only transaction commands need + // to access keys serially. pub fn sched_txn_command( &self, cmd: TypedCommand, @@ -1423,11 +1448,13 @@ impl Storage { /// Delete all keys in the range [`start_key`, `end_key`). /// - /// All keys in the range will be deleted permanently regardless of their timestamps. - /// This means that deleted keys will not be retrievable by specifying an older timestamp. - /// If `notify_only` is set, the data will not be immediately deleted, but the operation will - /// still be replicated via Raft. This is used to notify that the data will be deleted by - /// [`unsafe_destroy_range`](crate::server::gc_worker::GcTask::UnsafeDestroyRange) soon. + /// All keys in the range will be deleted permanently regardless of their + /// timestamps. This means that deleted keys will not be retrievable by + /// specifying an older timestamp. If `notify_only` is set, the data will + /// not be immediately deleted, but the operation will still be replicated + /// via Raft. This is used to notify that the data will be deleted by + /// [`unsafe_destroy_range`](crate::server::gc_worker::GcTask:: + /// UnsafeDestroyRange) soon. pub fn delete_range( &self, ctx: Context, @@ -1502,7 +1529,8 @@ impl Storage { let begin_instant = Instant::now(); let mut stats = Statistics::default(); let key = F::encode_raw_key_owned(key, None); - // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. + // Keys pass to `tls_collect_query` should be encoded, to get correct keys for + // region split. tls_collect_query( ctx.get_region_id(), ctx.get_peer(), @@ -1555,8 +1583,9 @@ impl Storage { let priority_tag = get_priority_tag(priority); let api_version = self.api_version; - // The resource tags of these batched requests are not the same, and it is quite expensive - // to distinguish them, so we can find random one of them as a representative. + // The resource tags of these batched requests are not the same, and it is quite + // expensive to distinguish them, so we can find random one of them as a + // representative. let rand_index = rand::thread_rng().gen_range(0, gets.len()); let rand_ctx = gets[rand_index].get_context(); let rand_key = gets[rand_index].get_key().to_vec(); @@ -1590,9 +1619,9 @@ impl Storage { for (mut req, id) in gets.into_iter().zip(ids) { let ctx = req.take_context(); let key = F::encode_raw_key_owned(req.take_key(), None); - // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. - // Don't place in loop of `snaps`, otherwise `snap.wait` may run in another thread, - // and cause the `thread-local` statistics unstable for test. + // Keys pass to `tls_collect_query` should be encoded, to get correct keys for + // region split. Don't place in loop of `snaps`, otherwise `snap.wait` may run + // in another thread, and cause the `thread-local` statistics unstable for test. tls_collect_query( ctx.get_region_id(), ctx.get_peer(), @@ -1890,7 +1919,8 @@ impl Storage { } /// Delete a raw key from the storage. - /// In API V2, data is "logical" deleted, to enable CDC of delete operations. + /// In API V2, data is "logical" deleted, to enable CDC of delete + /// operations. pub fn raw_delete( &self, ctx: Context, @@ -1921,8 +1951,9 @@ impl Storage { } /// Delete all raw keys in [`start_key`, `end_key`). - /// Note that in API V2, data is still "physical" deleted, as "logical" delete for a range will be quite expensive. - /// Notification of range delete operations will be through a special channel (unimplemented yet). + /// Note that in API V2, data is still "physical" deleted, as "logical" + /// delete for a range will be quite expensive. Notification of range delete + /// operations will be through a special channel (unimplemented yet). pub fn raw_delete_range( &self, ctx: Context, @@ -1959,7 +1990,8 @@ impl Storage { } /// Delete some raw keys in a batch. - /// In API V2, data is "logical" deleted, to enable CDC of delete operations. + /// In API V2, data is "logical" deleted, to enable CDC of delete + /// operations. pub fn raw_batch_delete( &self, ctx: Context, @@ -1995,14 +2027,16 @@ impl Storage { /// Scan raw keys in a range. /// - /// If `reverse_scan` is false, the range is [`start_key`, `end_key`); otherwise, the range is - /// [`end_key`, `start_key`) and it scans from `start_key` and goes backwards. If `end_key` is `None`, it - /// means unbounded. + /// If `reverse_scan` is false, the range is [`start_key`, `end_key`); + /// otherwise, the range is [`end_key`, `start_key`) and it scans from + /// `start_key` and goes backwards. If `end_key` is `None`, it means + /// unbounded. /// /// This function scans at most `limit` keys. /// /// If `key_only` is true, the value - /// corresponding to the key will not be read out. Only scanned keys will be returned. + /// corresponding to the key will not be read out. Only scanned keys will be + /// returned. pub fn raw_scan( &self, ctx: Context, @@ -2048,7 +2082,8 @@ impl Storage { let start_key = F::encode_raw_key_owned(start_key, None); let end_key = end_key.map(|k| F::encode_raw_key_owned(k, None)); - // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. + // Keys pass to `tls_collect_query` should be encoded, to get correct keys for + // region split. tls_collect_query( ctx.get_region_id(), ctx.get_peer(), @@ -2324,7 +2359,8 @@ impl Storage { let begin_instant = Instant::now(); let mut stats = Statistics::default(); let key = F::encode_raw_key_owned(key, None); - // Keys pass to `tls_collect_query` should be encoded, to get correct keys for region split. + // Keys pass to `tls_collect_query` should be encoded, to get correct keys for + // region split. tls_collect_query( ctx.get_region_id(), ctx.get_peer(), @@ -3658,7 +3694,7 @@ mod tests { None, cfs_opts, cache.is_some(), - None, /*io_rate_limiter*/ + None, // io_rate_limiter ) } .unwrap(); @@ -4599,7 +4635,8 @@ mod tests { #[test] fn test_raw_v2_multi_versions() { - // Test update on the same key to verify multi-versions implementation of RawKV V2. + // Test update on the same key to verify multi-versions implementation of RawKV + // V2. let test_data = vec![Some(b"v1"), Some(b"v2"), None, Some(b"v3")]; let k = b"r\0k".to_vec(); @@ -5502,7 +5539,8 @@ mod tests { false ); - // if end_key is omitted, the next start_key is used instead. so, false is returned. + // if end_key is omitted, the next start_key is used instead. so, false is + // returned. let ranges = make_ranges(vec![ (b"c".to_vec(), vec![]), (b"b".to_vec(), vec![]), @@ -6386,8 +6424,8 @@ mod tests { }, ); - // We should be able to resolve all locks for transaction ts=100 when there are this - // many locks. + // We should be able to resolve all locks for transaction ts=100 when there are + // this many locks. let scanned_locks_coll = vec![ 1, RESOLVE_LOCK_BATCH_SIZE, @@ -6609,7 +6647,8 @@ mod tests { ) }; - // `advise_ttl` = 90, which is less than current ttl 100. The lock's ttl will remains 100. + // `advise_ttl` = 90, which is less than current ttl 100. The lock's ttl will + // remains 100. storage .sched_txn_command( commands::TxnHeartBeat::new(k.clone(), 10.into(), 90, Context::default()), @@ -6618,8 +6657,8 @@ mod tests { .unwrap(); rx.recv().unwrap(); - // `advise_ttl` = 110, which is greater than current ttl. The lock's ttl will be updated to - // 110. + // `advise_ttl` = 110, which is greater than current ttl. The lock's ttl will be + // updated to 110. storage .sched_txn_command( commands::TxnHeartBeat::new(k.clone(), 10.into(), 110, Context::default()), @@ -6684,8 +6723,8 @@ mod tests { assert_eq!(cm.max_ts(), ts(9, 1)); - // No lock and no commit info. If specified rollback_if_not_exist, the key will be rolled - // back. + // No lock and no commit info. If specified rollback_if_not_exist, the key will + // be rolled back. storage .sched_txn_command( commands::CheckTxnStatus::new( @@ -7959,9 +7998,9 @@ mod tests { } // This is one of the series of tests to test overlapped timestamps. - // Overlapped ts means there is a rollback record and a commit record with the same ts. - // In this test we check that if rollback happens before commit, then they should not have overlapped ts, - // which is an expected property. + // Overlapped ts means there is a rollback record and a commit record with the + // same ts. In this test we check that if rollback happens before commit, then + // they should not have overlapped ts, which is an expected property. #[test] fn test_overlapped_ts_rollback_before_prewrite() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -8114,8 +8153,9 @@ mod tests { .unwrap(); assert!(rx.recv().unwrap() > 10); } - // this test shows that the scheduler take `response_policy` in `WriteResult` serious, - // ie. call the callback at expected stage when writing to the engine + // this test shows that the scheduler take `response_policy` in `WriteResult` + // serious, ie. call the callback at expected stage when writing to the + // engine #[test] fn test_scheduler_response_policy() { struct Case { @@ -8279,8 +8319,8 @@ mod tests { .unwrap(); let (tx, rx) = channel(); - // Pessimistically lock k1, k2, k3, k4, after the pessimistic retry k2 is no longer needed - // and the pessimistic lock on k2 is left. + // Pessimistically lock k1, k2, k3, k4, after the pessimistic retry k2 is no + // longer needed and the pessimistic lock on k2 is left. storage .sched_txn_command( new_acquire_pessimistic_lock_command( @@ -8352,7 +8392,8 @@ mod tests { rx.recv().unwrap(); // Pessimistically rollback the k2 lock. - // Non lite lock resolve on k1 and k2, there should no errors as lock on k2 is pessimistic type. + // Non lite lock resolve on k1 and k2, there should no errors as lock on k2 is + // pessimistic type. must_rollback(&storage.engine, b"k2", 10, false); let mut temp_map = HashMap::default(); temp_map.insert(10.into(), 20.into()); @@ -8489,7 +8530,8 @@ mod tests { // Test check_api_version. // See the following for detail: // * rfc: https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. - // * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, enum APIVersion. + // * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, + // enum APIVersion. #[test] fn test_check_api_version() { use error_code::storage::*; @@ -8871,7 +8913,8 @@ mod tests { } let (tx, rx) = channel(); - // The written in-memory pessimistic lock should be visible, so the new lock request should fail. + // The written in-memory pessimistic lock should be visible, so the new lock + // request should fail. storage .sched_txn_command( new_acquire_pessimistic_lock_command( @@ -8886,7 +8929,8 @@ mod tests { }), ) .unwrap(); - // DummyLockManager just drops the callback, so it will fail to receive anything. + // DummyLockManager just drops the callback, so it will fail to receive + // anything. assert!(rx.recv().is_err()); let (tx, rx) = channel(); @@ -8943,7 +8987,8 @@ mod tests { ) .unwrap(); rx.recv().unwrap(); - // When disabling in-memory pessimistic lock, the lock map should remain unchanged. + // When disabling in-memory pessimistic lock, the lock map should remain + // unchanged. assert!(txn_ext.pessimistic_locks.read().is_empty()); let (tx, rx) = channel(); diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index eb788cb4dd3..d715ec598c2 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -28,8 +28,9 @@ use crate::storage::mvcc::{Lock, LockType, WriteRef, WriteType}; const PHYSICAL_SHIFT_BITS: usize = 18; const SAFE_POINT_WINDOW: usize = 120; -// When leader broadcasts a ComputeHash command to followers, it's possible that the safe point -// becomes stale when the command reaches followers. So use a 2 minutes window to reduce this. +// When leader broadcasts a ComputeHash command to followers, it's possible that +// the safe point becomes stale when the command reaches followers. So use a 2 +// minutes window to reduce this. fn get_safe_point_for_check(mut safe_point: u64) -> u64 { safe_point >>= PHYSICAL_SHIFT_BITS; safe_point += (SAFE_POINT_WINDOW * 1000) as u64; // 120s * 1000ms/s. diff --git a/src/storage/mvcc/reader/mod.rs b/src/storage/mvcc/reader/mod.rs index 440a1650ca3..2e7d20ccf2b 100644 --- a/src/storage/mvcc/reader/mod.rs +++ b/src/storage/mvcc/reader/mod.rs @@ -24,23 +24,25 @@ pub enum NewerTsCheckState { NotMetYet, } -/// The result of `get_txn_commit_record`, which is used to get the status of a specified -/// transaction from write cf. +/// The result of `get_txn_commit_record`, which is used to get the status of a +/// specified transaction from write cf. #[derive(Debug)] pub enum TxnCommitRecord { - /// The commit record of the given transaction is not found. But it's possible that there's - /// another transaction's commit record, whose `commit_ts` equals to the current transaction's - /// `start_ts`. That kind of record will be returned via the `overlapped_write` field. - /// In this case, if the current transaction is to be rolled back, the `overlapped_write` must not - /// be overwritten. + /// The commit record of the given transaction is not found. But it's + /// possible that there's another transaction's commit record, whose + /// `commit_ts` equals to the current transaction's `start_ts`. That + /// kind of record will be returned via the `overlapped_write` field. + /// In this case, if the current transaction is to be rolled back, the + /// `overlapped_write` must not be overwritten. None { overlapped_write: Option, }, /// Found the transaction's write record. SingleRecord { commit_ts: TimeStamp, write: Write }, - /// The transaction's status is found in another transaction's record's `overlapped_rollback` - /// field. This may happen when the current transaction's `start_ts` is the same as the - /// `commit_ts` of another transaction on this key. + /// The transaction's status is found in another transaction's record's + /// `overlapped_rollback` field. This may happen when the current + /// transaction's `start_ts` is the same as the `commit_ts` of another + /// transaction on this key. OverlappedRollback { commit_ts: TimeStamp }, } diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index a9ce84aada7..434d0948310 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -50,8 +50,8 @@ impl PointGetterBuilder { self } - /// Set whether values of the user key should be omitted. When `omit_value` is `true`, the - /// length of returned value will be 0. + /// Set whether values of the user key should be omitted. When `omit_value` + /// is `true`, the length of returned value will be 0. /// /// Previously this option is called `key_only`. /// @@ -93,8 +93,8 @@ impl PointGetterBuilder { self } - /// Check whether there is data with newer ts. The result of `met_newer_ts_data` is Unknown - /// if this option is not set. + /// Check whether there is data with newer ts. The result of + /// `met_newer_ts_data` is Unknown if this option is not set. /// /// Default is false. #[inline] @@ -132,8 +132,9 @@ impl PointGetterBuilder { } } -/// This struct can be used to get the value of user keys. Internally, rollbacks are ignored and -/// smaller version will be tried. If the isolation level is Si, locks will be checked first. +/// This struct can be used to get the value of user keys. Internally, rollbacks +/// are ignored and smaller version will be tried. If the isolation level is Si, +/// locks will be checked first. /// /// Use `PointGetterBuilder` to build `PointGetter`. pub struct PointGetter { @@ -169,7 +170,8 @@ impl PointGetter { fail_point!("point_getter_get"); if need_check_locks(self.isolation_level) { - // Check locks that signal concurrent writes for `Si` or more recent writes for `RcCheckTs`. + // Check locks that signal concurrent writes for `Si` or more recent writes for + // `RcCheckTs`. if let Some(lock) = self.load_and_check_lock(user_key)? { return self.load_data_from_lock(user_key, lock); } @@ -178,13 +180,14 @@ impl PointGetter { self.load_data(user_key) } - /// Get a lock of a user key in the lock CF. If lock exists, it will be checked to - /// see whether it conflicts with the given `ts` and return an error if so. If the - /// lock is in access_locks, it will be returned and caller can read through it. + /// Get a lock of a user key in the lock CF. If lock exists, it will be + /// checked to see whether it conflicts with the given `ts` and return + /// an error if so. If the lock is in access_locks, it will be returned + /// and caller can read through it. /// - /// In common cases we expect to get nothing in lock cf. Using a `get_cf` instead of `seek` - /// is fast in such cases due to no need for RocksDB to continue move and skip deleted entries - /// until find a user key. + /// In common cases we expect to get nothing in lock cf. Using a `get_cf` + /// instead of `seek` is fast in such cases due to no need for RocksDB + /// to continue move and skip deleted entries until find a user key. fn load_and_check_lock(&mut self, user_key: &Key) -> Result> { self.statistics.lock.get += 1; let lock_value = self.snapshot.get_cf(CF_LOCK, user_key)?; @@ -216,8 +219,8 @@ impl PointGetter { /// Load the value. /// - /// First, a correct version info in the Write CF will be sought. Then, value will be loaded - /// from Default CF if necessary. + /// First, a correct version info in the Write CF will be sought. Then, + /// value will be loaded from Default CF if necessary. fn load_data(&mut self, user_key: &Key) -> Result> { let mut use_near_seek = false; let mut seek_key = user_key.clone(); @@ -323,9 +326,10 @@ impl PointGetter { /// Load the value from default CF. /// - /// We assume that mostly the keys given to batch get keys are not very close to each other. - /// `near_seek` will likely fall back to `seek` in such scenario, which takes 2x time - /// compared to `get_cf`. Thus we use `get_cf` directly here. + /// We assume that mostly the keys given to batch get keys are not very + /// close to each other. `near_seek` will likely fall back to `seek` in + /// such scenario, which takes 2x time compared to `get_cf`. Thus we use + /// `get_cf` directly here. fn load_data_from_default_cf( &mut self, write_start_ts: TimeStamp, @@ -350,7 +354,8 @@ impl PointGetter { /// Load the value from the lock. /// - /// The lock belongs to a committed transaction and its commit_ts <= read's start_ts. + /// The lock belongs to a committed transaction and its commit_ts <= read's + /// start_ts. fn load_data_from_lock(&mut self, user_key: &Key, lock: Lock) -> Result> { debug_assert!(lock.ts < self.ts && lock.min_commit_ts <= self.ts); match lock.lock_type { @@ -373,8 +378,8 @@ impl PointGetter { } LockType::Delete => Ok(None), LockType::Lock | LockType::Pessimistic => { - // Only when fails to call `Lock::check_ts_conflict()`, the function is called, so it's - // unreachable here. + // Only when fails to call `Lock::check_ts_conflict()`, the function is called, + // so it's unreachable here. unreachable!() } } @@ -552,8 +557,8 @@ mod tests { engine } - /// Builds a sample engine that contains transactions on the way and some short - /// values embedded in the write CF. The data is as follows: + /// Builds a sample engine that contains transactions on the way and some + /// short values embedded in the write CF. The data is as follows: /// DELETE bar (start at 4) /// PUT bar -> barval (commit at 3) /// PUT foo1 -> foo1vv... (commit at 3) @@ -919,8 +924,8 @@ mod tests { must_get_err(&mut getter, key); must_rollback(&engine, key, 40, false); - // Should get the latest committed value if there is a primary lock with a ts less than - // the latest Write's commit_ts. + // Should get the latest committed value if there is a primary lock with a ts + // less than the latest Write's commit_ts. // // write.start_ts(10) < primary_lock.start_ts(15) < write.commit_ts(20) must_acquire_pessimistic_lock(&engine, key, key, 15, 50); @@ -1016,7 +1021,7 @@ mod tests { 100, 80.into(), 1, - 100.into(), /* min_commit_ts */ + 100.into(), // min_commit_ts TimeStamp::default(), false, Assertion::None, @@ -1229,7 +1234,8 @@ mod tests { must_get_value(&mut batch_getter, key2, val22); must_get_err(&mut batch_getter, key3); - // Test batch point get. Error should not be reported if the lock type is rollback or lock. + // Test batch point get. Error should not be reported if the lock type is + // rollback or lock. let mut batch_getter_ok = new_point_getter_with_iso(&engine, 70.into(), IsolationLevel::RcCheckTs); must_get_value(&mut batch_getter_ok, key4, val4); diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 614f8acb147..377d2c94022 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -20,15 +20,16 @@ use crate::storage::{ }, }; -/// Read from an MVCC snapshot, i.e., a logical view of the database at a specific timestamp (the -/// start_ts). +/// Read from an MVCC snapshot, i.e., a logical view of the database at a +/// specific timestamp (the start_ts). /// /// This represents the view of the database from a single transaction. /// -/// Confusingly, there are two meanings of the word 'snapshot' here. In the name of the struct, -/// 'snapshot' means an mvcc snapshot. In the type parameter bound (of `S`), 'snapshot' means a view -/// of the underlying storage engine at a given point in time. This latter snapshot will include -/// values for keys at multiple timestamps. +/// Confusingly, there are two meanings of the word 'snapshot' here. In the name +/// of the struct, 'snapshot' means an mvcc snapshot. In the type parameter +/// bound (of `S`), 'snapshot' means a view of the underlying storage engine at +/// a given point in time. This latter snapshot will include values for keys at +/// multiple timestamps. pub struct SnapshotReader { pub reader: MvccReader, pub start_ts: TimeStamp, @@ -123,11 +124,12 @@ pub struct MvccReader { lock_cursor: Option>, write_cursor: Option>, - /// None means following operations are performed on a single user key, i.e., - /// different versions of the same key. It can use prefix seek to speed up reads - /// from the write-cf. + /// None means following operations are performed on a single user key, + /// i.e., different versions of the same key. It can use prefix seek to + /// speed up reads from the write-cf. scan_mode: Option, - // Records the current key for prefix seek. Will Reset the write cursor when switching to another key. + // Records the current key for prefix seek. Will Reset the write cursor when switching to + // another key. current_key: Option, fill_cache: bool, @@ -266,28 +268,31 @@ impl MvccReader { } /// Return: - /// (commit_ts, write_record) of the write record for `key` committed before or equal to`ts` - /// Post Condition: - /// leave the write_cursor at the first record which key is less or equal to the `ts` encoded version of `key` + /// (commit_ts, write_record) of the write record for `key` committed + /// before or equal to`ts` Post Condition: + /// leave the write_cursor at the first record which key is less or equal + /// to the `ts` encoded version of `key` pub fn seek_write(&mut self, key: &Key, ts: TimeStamp) -> Result> { // Get the cursor for write record // - // When it switches to another key in prefix seek mode, creates a new cursor for it - // because the current position of the cursor is seldom around `key`. + // When it switches to another key in prefix seek mode, creates a new cursor for + // it because the current position of the cursor is seldom around `key`. if self.scan_mode.is_none() && self.current_key.as_ref().map_or(true, |k| k != key) { self.current_key = Some(key.clone()); self.write_cursor.take(); } self.create_write_cursor()?; let cursor = self.write_cursor.as_mut().unwrap(); - // find a `ts` encoded key which is less than the `ts` encoded version of the `key` + // find a `ts` encoded key which is less than the `ts` encoded version of the + // `key` let found = cursor.near_seek(&key.clone().append_ts(ts), &mut self.statistics.write)?; if !found { return Ok(None); } let write_key = cursor.key(&mut self.statistics.write); let commit_ts = Key::decode_ts_from(write_key)?; - // check whether the found written_key's "real key" part equals the `key` we want to find + // check whether the found written_key's "real key" part equals the `key` we + // want to find if !Key::is_user_key_eq(write_key, key.as_encoded()) { return Ok(None); } @@ -296,17 +301,19 @@ impl MvccReader { Ok(Some((commit_ts, write))) } - /// Gets the value of the specified key's latest version before specified `ts`. + /// Gets the value of the specified key's latest version before specified + /// `ts`. /// - /// It tries to ensure the write record's `gc_fence`'s ts, if any, greater than specified - /// `gc_fence_limit`. Pass `None` to `gc_fence_limit` to skip the check. - /// The caller must guarantee that there's no other `PUT` or `DELETE` versions whose `commit_ts` - /// is between the found version and the provided `gc_fence_limit` (`gc_fence_limit` is - /// inclusive). + /// It tries to ensure the write record's `gc_fence`'s ts, if any, greater + /// than specified `gc_fence_limit`. Pass `None` to `gc_fence_limit` to + /// skip the check. The caller must guarantee that there's no other `PUT` or + /// `DELETE` versions whose `commit_ts` is between the found version and + /// the provided `gc_fence_limit` (`gc_fence_limit` is inclusive). /// - /// For transactional reads, the `gc_fence_limit` must be provided to ensure the result is - /// correct. Generally, it should be the read_ts of the current transaction, which might be - /// different from the `ts` passed to this function. + /// For transactional reads, the `gc_fence_limit` must be provided to ensure + /// the result is correct. Generally, it should be the read_ts of the + /// current transaction, which might be different from the `ts` passed to + /// this function. /// /// Note that this function does not check for locks on `key`. fn get( @@ -321,15 +328,17 @@ impl MvccReader { }) } - /// Gets the write record of the specified key's latest version before specified `ts`. - /// It tries to ensure the write record's `gc_fence`'s ts, if any, greater than specified - /// `gc_fence_limit`. Pass `None` to `gc_fence_limit` to skip the check. - /// The caller must guarantee that there's no other `PUT` or `DELETE` versions whose `commit_ts` - /// is between the found version and the provided `gc_fence_limit` (`gc_fence_limit` is + /// Gets the write record of the specified key's latest version before + /// specified `ts`. It tries to ensure the write record's `gc_fence`'s + /// ts, if any, greater than specified `gc_fence_limit`. Pass `None` to + /// `gc_fence_limit` to skip the check. The caller must guarantee that + /// there's no other `PUT` or `DELETE` versions whose `commit_ts` is between + /// the found version and the provided `gc_fence_limit` (`gc_fence_limit` is /// inclusive). - /// For transactional reads, the `gc_fence_limit` must be provided to ensure the result is - /// correct. Generally, it should be the read_ts of the current transaction, which might be - /// different from the `ts` passed to this function. + /// For transactional reads, the `gc_fence_limit` must be provided to ensure + /// the result is correct. Generally, it should be the read_ts of the + /// current transaction, which might be different from the `ts` passed to + /// this function. pub fn get_write( &mut self, key: &Key, @@ -341,8 +350,8 @@ impl MvccReader { .map(|(w, _)| w)) } - /// Gets the write record of the specified key's latest version before specified `ts`, and - /// additionally the write record's `commit_ts`, if any. + /// Gets the write record of the specified key's latest version before + /// specified `ts`, and additionally the write record's `commit_ts`, if any. /// /// See also [`MvccReader::get_write`]. pub fn get_write_with_commit_ts( @@ -375,8 +384,8 @@ impl MvccReader { } fn get_txn_commit_record(&mut self, key: &Key, start_ts: TimeStamp) -> Result { - // It's possible a txn with a small `start_ts` has a greater `commit_ts` than a txn with - // a greater `start_ts` in pessimistic transaction. + // It's possible a txn with a small `start_ts` has a greater `commit_ts` than a + // txn with a greater `start_ts` in pessimistic transaction. // I.e., txn_1.commit_ts > txn_2.commit_ts > txn_2.start_ts > txn_1.start_ts. // // Scan all the versions from `TimeStamp::max()` to `start_ts`. @@ -462,11 +471,12 @@ impl MvccReader { Ok(None) } - /// Scan locks that satisfies `filter(lock)` returns true, from the given start key `start`. - /// At most `limit` locks will be returned. If `limit` is set to `0`, it means unlimited. + /// Scan locks that satisfies `filter(lock)` returns true, from the given + /// start key `start`. At most `limit` locks will be returned. If `limit` is + /// set to `0`, it means unlimited. /// - /// The return type is `(locks, is_remain)`. `is_remain` indicates whether there MAY be - /// remaining locks that can be scanned. + /// The return type is `(locks, is_remain)`. `is_remain` indicates whether + /// there MAY be remaining locks that can be scanned. pub fn scan_locks( &mut self, start: Option<&Key>, @@ -505,7 +515,8 @@ impl MvccReader { cursor.next(&mut self.statistics.lock); } self.statistics.lock.processed_keys += locks.len(); - // If we reach here, `cursor.valid()` is `false`, so there MUST be no more locks. + // If we reach here, `cursor.valid()` is `false`, so there MUST be no more + // locks. Ok((locks, false)) } @@ -1068,9 +1079,10 @@ pub mod tests { let snap = RegionSnapshot::::from_raw(db, region); let mut reader = MvccReader::new(snap, None, false); - // Let's assume `50_45 PUT` means a commit version with start ts is 45 and commit ts - // is 50. - // Commit versions: [50_45 PUT, 45_40 PUT, 40_35 PUT, 30_25 PUT, 20_20 Rollback, 10_1 PUT, 5_5 Rollback]. + // Let's assume `50_45 PUT` means a commit version with start ts is 45 and + // commit ts is 50. + // Commit versions: [50_45 PUT, 45_40 PUT, 40_35 PUT, 30_25 PUT, 20_20 Rollback, + // 10_1 PUT, 5_5 Rollback]. let key = Key::from_raw(k); let overlapped_write = reader .get_txn_commit_record(&key, 55.into()) @@ -1078,8 +1090,8 @@ pub mod tests { .unwrap_none(); assert!(overlapped_write.is_none()); - // When no such record is found but a record of another txn has a write record with - // its commit_ts equals to current start_ts, it + // When no such record is found but a record of another txn has a write record + // with its commit_ts equals to current start_ts, it let overlapped_write = reader .get_txn_commit_record(&key, 50.into()) .unwrap() @@ -1234,9 +1246,10 @@ pub mod tests { engine.prewrite(m, k, 23); engine.commit(k, 23, 25); - // Let's assume `2_1 PUT` means a commit version with start ts is 1 and commit ts - // is 2. - // Commit versions: [25_23 PUT, 20_10 PUT, 17_15 PUT, 7_7 Rollback, 5_1 PUT, 3_3 Rollback]. + // Let's assume `2_1 PUT` means a commit version with start ts is 1 and commit + // ts is 2. + // Commit versions: [25_23 PUT, 20_10 PUT, 17_15 PUT, 7_7 Rollback, 5_1 PUT, 3_3 + // Rollback]. let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, None, false); @@ -1383,10 +1396,10 @@ pub mod tests { let snap = RegionSnapshot::::from_raw(db, region); let mut reader = MvccReader::new(snap, None, false); - // Let's assume `2_1 PUT` means a commit version with start ts is 1 and commit ts - // is 2. - // Commit versions: [21_17 LOCK, 20_18 PUT, 15_13 LOCK, 14_12 PUT, 9_8 DELETE, 7_6 LOCK, - // 5_5 Rollback, 2_1 PUT]. + // Let's assume `2_1 PUT` means a commit version with start ts is 1 and commit + // ts is 2. + // Commit versions: [21_17 LOCK, 20_18 PUT, 15_13 LOCK, 14_12 PUT, 9_8 DELETE, + // 7_6 LOCK, 5_5 Rollback, 2_1 PUT]. let key = Key::from_raw(k); assert!(reader.get_write(&key, 1.into(), None).unwrap().is_none()); @@ -1947,7 +1960,8 @@ pub mod tests { } } - // Must return Oldvalue::None when prev_write_loaded is true and prev_write is None. + // Must return Oldvalue::None when prev_write_loaded is true and prev_write is + // None. let engine = TestEngineBuilder::new().build().unwrap(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); diff --git a/src/storage/mvcc/reader/scanner/backward.rs b/src/storage/mvcc/reader/scanner/backward.rs index 0b20c94a819..7e3d677ea52 100644 --- a/src/storage/mvcc/reader/scanner/backward.rs +++ b/src/storage/mvcc/reader/scanner/backward.rs @@ -22,11 +22,11 @@ use crate::storage::{ // RocksDB, so don't set REVERSE_SEEK_BOUND too small. const REVERSE_SEEK_BOUND: u64 = 16; -/// This struct can be used to scan keys starting from the given user key in the reverse order -/// (less than). +/// This struct can be used to scan keys starting from the given user key in the +/// reverse order (less than). /// -/// Internally, for each key, rollbacks are ignored and smaller version will be tried. If the -/// isolation level is SI, locks will be checked first. +/// Internally, for each key, rollbacks are ignored and smaller version will be +/// tried. If the isolation level is SI, locks will be checked first. /// /// Use `ScannerBuilder` to build `BackwardKvScanner`. pub struct BackwardKvScanner { @@ -81,8 +81,8 @@ impl BackwardKvScanner { // TODO: `seek_to_last` is better, however it has performance issues currently. // TODO: We have no guarantee about whether or not the upper_bound has a // timestamp suffix, so currently it is not safe to change write_cursor's - // reverse_seek to seek_for_prev. However in future, once we have different types - // for them, this can be done safely. + // reverse_seek to seek_for_prev. However in future, once we have different + // types for them, this can be done safely. self.write_cursor.reverse_seek( self.cfg.upper_bound.as_ref().unwrap(), &mut self.statistics.write, @@ -131,9 +131,9 @@ impl BackwardKvScanner { let write_user_key = Key::truncate_ts_for(wk)?; match write_user_key.cmp(lk) { Ordering::Less => { - // We are scanning from largest user key to smallest user key, so this - // indicate that we meet a lock first, thus its corresponding write - // does not exist. + // We are scanning from largest user key to smallest user key, so + // this indicate that we meet a lock first, thus its corresponding + // write does not exist. (lk, false, true) } Ordering::Greater => { @@ -145,8 +145,8 @@ impl BackwardKvScanner { } }; - // Use `from_encoded_slice` to reserve space for ts, so later we can append ts to - // the key or its clones without reallocation. + // Use `from_encoded_slice` to reserve space for ts, so later we can append ts + // to the key or its clones without reallocation. (Key::from_encoded_slice(res.0), res.1, res.2) }; @@ -188,7 +188,8 @@ impl BackwardKvScanner { &mut self.statistics, ); if has_write { - // Skip current_user_key because this key is either blocked or handled. + // Skip current_user_key because this key is either blocked or + // handled. has_write = false; self.move_write_cursor_to_prev_user_key(¤t_user_key)?; } @@ -218,9 +219,9 @@ impl BackwardKvScanner { } } - /// Attempt to get the value of a key specified by `user_key` and `self.cfg.ts` in reverse order. - /// This function requires that the write cursor is currently pointing to the earliest version - /// of `user_key`. + /// Attempt to get the value of a key specified by `user_key` and + /// `self.cfg.ts` in reverse order. This function requires that the write + /// cursor is currently pointing to the earliest version of `user_key`. #[inline] fn reverse_get( &mut self, @@ -232,8 +233,8 @@ impl BackwardKvScanner { // At first, we try to use several `prev()` to get the desired version. - // We need to save last desired version, because when we may move to an unwanted version - // at any time. + // We need to save last desired version, because when we may move to an unwanted + // version at any time. let mut last_version = None; let mut last_checked_commit_ts = TimeStamp::zero(); @@ -310,8 +311,8 @@ impl BackwardKvScanner { } assert!(ts > last_checked_commit_ts); - // After several `prev()`, we still not get the latest version for the specified ts, - // use seek to locate the latest version. + // After several `prev()`, we still not get the latest version for the specified + // ts, use seek to locate the latest version. // Check whether newer version exists. let mut use_near_seek = false; @@ -336,8 +337,8 @@ impl BackwardKvScanner { } } - // `user_key` must have reserved space here, so its clone `seek_key` has reserved space - // too. Thus no reallocation happens in `append_ts`. + // `user_key` must have reserved space here, so its clone `seek_key` has + // reserved space too. Thus no reallocation happens in `append_ts`. seek_key = seek_key.append_ts(ts); if use_near_seek { self.write_cursor @@ -349,9 +350,9 @@ impl BackwardKvScanner { assert!(self.write_cursor.valid()?); loop { - // After seek, or after some `next()`, we may reach `last_checked_commit_ts` again. It - // means we have checked all versions for this user key. We use `last_version` as - // return. + // After seek, or after some `next()`, we may reach `last_checked_commit_ts` + // again. It means we have checked all versions for this user key. + // We use `last_version` as return. let current_ts = { let current_key = self.write_cursor.key(&mut self.statistics.write); // We should never reach another user key. @@ -387,8 +388,8 @@ impl BackwardKvScanner { } } - /// Handle last version. Last version may be PUT or DELETE. If it is a PUT, value should be - /// load. + /// Handle last version. Last version may be PUT or DELETE. If it is a PUT, + /// value should be load. #[inline] fn handle_last_version( &mut self, @@ -410,8 +411,9 @@ impl BackwardKvScanner { } } - /// Load the value by the given `some_write`. If value is carried in `some_write`, it will be - /// returned directly. Otherwise there will be a default CF look up. + /// Load the value by the given `some_write`. If value is carried in + /// `some_write`, it will be returned directly. Otherwise there will be a + /// default CF look up. /// /// The implementation is similar to `PointGetter::load_data_by_write`. #[inline] @@ -438,13 +440,13 @@ impl BackwardKvScanner { } } - /// After `self.reverse_get()`, our write cursor may be pointing to current user key (if we - /// found a desired version), or previous user key (if there is no desired version), or - /// out of bound. + /// After `self.reverse_get()`, our write cursor may be pointing to current + /// user key (if we found a desired version), or previous user key (if there + /// is no desired version), or out of bound. /// - /// If it is pointing to current user key, we need to step it until we meet a new - /// key. We first try to `prev()` a few times. If still not reaching another user - /// key, we `seek_for_prev()`. + /// If it is pointing to current user key, we need to step it until we meet + /// a new key. We first try to `prev()` a few times. If still not reaching + /// another user key, we `seek_for_prev()`. #[inline] fn move_write_cursor_to_prev_user_key(&mut self, current_user_key: &Key) -> Result<()> { for i in 0..SEEK_BOUND { @@ -520,7 +522,8 @@ mod tests { must_commit(&engine, k, ts, ts); } - // Generate REVERSE_SEEK_BOUND / 2 Put and REVERSE_SEEK_BOUND / 2 + 1 Rollback for key [8]. + // Generate REVERSE_SEEK_BOUND / 2 Put and REVERSE_SEEK_BOUND / 2 + 1 Rollback + // for key [8]. let k = &[8_u8]; for ts in 0..=REVERSE_SEEK_BOUND { must_prewrite_put(&engine, k, &[ts as u8], k, ts); @@ -540,8 +543,8 @@ mod tests { } } - // Generate REVERSE_SEEK_BOUND / 2 Put, 1 Delete and REVERSE_SEEK_BOUND / 2 Rollback - // for key [7]. + // Generate REVERSE_SEEK_BOUND / 2 Put, 1 Delete and REVERSE_SEEK_BOUND / 2 + // Rollback for key [7]. let k = &[7_u8]; for ts in 0..REVERSE_SEEK_BOUND / 2 { must_prewrite_put(&engine, k, &[ts as u8], k, ts); @@ -796,8 +799,8 @@ mod tests { assert_eq!(statistics.processed_size, 0); } - /// Check whether everything works as usual when `BackwardKvScanner::reverse_get()` goes - /// out of bound. + /// Check whether everything works as usual when + /// `BackwardKvScanner::reverse_get()` goes out of bound. /// /// Case 1. prev out of bound, next_version is None. #[test] @@ -880,8 +883,8 @@ mod tests { assert_eq!(statistics.processed_size, 0); } - /// Check whether everything works as usual when `BackwardKvScanner::reverse_get()` goes - /// out of bound. + /// Check whether everything works as usual when + /// `BackwardKvScanner::reverse_get()` goes out of bound. /// /// Case 2. prev out of bound, next_version is Some. #[test] @@ -973,7 +976,8 @@ mod tests { } /// Check whether everything works as usual when - /// `BackwardKvScanner::move_write_cursor_to_prev_user_key()` goes out of bound. + /// `BackwardKvScanner::move_write_cursor_to_prev_user_key()` goes out of + /// bound. /// /// Case 1. prev() out of bound #[test] @@ -1054,7 +1058,8 @@ mod tests { } /// Check whether everything works as usual when - /// `BackwardKvScanner::move_write_cursor_to_prev_user_key()` goes out of bound. + /// `BackwardKvScanner::move_write_cursor_to_prev_user_key()` goes out of + /// bound. /// /// Case 2. seek_for_prev() out of bound #[test] @@ -1141,7 +1146,8 @@ mod tests { } /// Check whether everything works as usual when - /// `BackwardKvScanner::move_write_cursor_to_prev_user_key()` goes out of bound. + /// `BackwardKvScanner::move_write_cursor_to_prev_user_key()` goes out of + /// bound. /// /// Case 3. a more complicated case #[test] @@ -1167,7 +1173,8 @@ mod tests { .build() .unwrap(); - // The following illustration comments assume that SEEK_BOUND = 4, REVERSE_SEEK_BOUND = 6. + // The following illustration comments assume that SEEK_BOUND = 4, + // REVERSE_SEEK_BOUND = 6. // Initial position: 1 seek_to_last: // b_11 b_10 b_9 b_8 b_7 b_6 b_5 b_4 b_3 b_2 b_1 c_1 diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 1e5163dcd78..d2c5e8b6a1b 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -94,8 +94,8 @@ impl Cursors { // We have not found another user key for now, so we directly `seek()`. // After that, we must pointing to another key, or out of bound. - // `current_user_key` must have reserved space here, so its clone has reserved space too. - // So no reallocation happens in `append_ts`. + // `current_user_key` must have reserved space here, so its clone has reserved + // space too. So no reallocation happens in `append_ts`. self.write.internal_seek( ¤t_user_key.clone().append_ts(TimeStamp::zero()), &mut statistics.write, @@ -194,17 +194,17 @@ impl> ForwardScanner { loop { // `current_user_key` is `min(user_key(write_cursor), lock_cursor)`, indicating - // the encoded user key we are currently dealing with. It may not have a write, or - // may not have a lock. It is not a slice to avoid data being invalidated after - // cursor moving. + // the encoded user key we are currently dealing with. It may not have a write, + // or may not have a lock. It is not a slice to avoid data being invalidated + // after cursor moving. // - // `has_write` indicates whether `current_user_key` has at least one corresponding - // `write`. If there is one, it is what current write cursor pointing to. The pointed - // `write` must be the most recent (i.e. largest `commit_ts`) write of - // `current_user_key`. + // `has_write` indicates whether `current_user_key` has at least one + // corresponding `write`. If there is one, it is what current write cursor + // pointing to. The pointed `write` must be the most recent (i.e. largest + // `commit_ts`) write of `current_user_key`. // - // `has_lock` indicates whether `current_user_key` has a corresponding `lock`. If - // there is one, it is what current lock cursor pointing to. + // `has_lock` indicates whether `current_user_key` has a corresponding `lock`. + // If there is one, it is what current lock cursor pointing to. let (mut current_user_key, has_write, has_lock) = { let w_key = if self.cursors.write.valid()? { Some(self.cursors.write.key(&mut self.statistics.write)) @@ -261,8 +261,8 @@ impl> ForwardScanner { } }; - // Use `from_encoded_slice` to reserve space for ts, so later we can append ts to - // the key or its clones without reallocation. + // Use `from_encoded_slice` to reserve space for ts, so later we can append ts + // to the key or its clones without reallocation. (Key::from_encoded_slice(res.0), res.1, res.2) }; @@ -303,10 +303,10 @@ impl> ForwardScanner { } } - /// Try to move the write cursor to the `self.cfg.ts` version of the given key. - /// Because it is possible that the cursor is moved to the next user key or - /// the end of key space, the method returns whether the write cursor still - /// points to the given user key. + /// Try to move the write cursor to the `self.cfg.ts` version of the given + /// key. Because it is possible that the cursor is moved to the next user + /// key or the end of key space, the method returns whether the write cursor + /// still points to the given user key. fn move_write_cursor_to_ts(&mut self, user_key: &Key) -> Result { assert!(self.cursors.write.valid()?); @@ -339,7 +339,8 @@ impl> ForwardScanner { self.met_newer_ts_data = NewerTsCheckState::Met; } - // Report error if there's a more recent version if the isolation level is RcCheckTs. + // Report error if there's a more recent version if the isolation level is + // RcCheckTs. if self.cfg.isolation_level == IsolationLevel::RcCheckTs { // TODO: the more write recent version with `LOCK` or `ROLLBACK` write type // could be skipped. @@ -354,10 +355,11 @@ impl> ForwardScanner { } } } - // If we have not found `${user_key}_${ts}` in a few `next()`, directly `seek()`. + // If we have not found `${user_key}_${ts}` in a few `next()`, directly + // `seek()`. if needs_seek { - // `user_key` must have reserved space here, so its clone has reserved space too. So no - // reallocation happens in `append_ts`. + // `user_key` must have reserved space here, so its clone has reserved space + // too. So no reallocation happens in `append_ts`. self.cursors.write.seek( &user_key.clone().append_ts(self.cfg.ts), &mut self.statistics.write, @@ -536,8 +538,9 @@ impl ScanPolicy for LatestEntryPolicy { cursors: &mut Cursors, statistics: &mut Statistics, ) -> Result> { - // Now we must have reached the first key >= `${user_key}_${ts}`. However, we may - // meet `Lock` or `Rollback`. In this case, more versions needs to be looked up. + // Now we must have reached the first key >= `${user_key}_${ts}`. However, we + // may meet `Lock` or `Rollback`. In this case, more versions needs to be looked + // up. let mut write_key = cursors.write.key(&mut statistics.write); let entry: Option = loop { if Key::decode_ts_from(write_key)? <= self.after_ts { @@ -648,7 +651,8 @@ fn scan_latest_handle_lock( .map(|_| HandleRes::Skip(current_user_key)) } -/// The ScanPolicy for outputting `TxnEntry` for every locks or commits in specified ts range. +/// The ScanPolicy for outputting `TxnEntry` for every locks or commits in +/// specified ts range. /// /// The `ForwardScanner` with this policy scans all entries whose `commit_ts`s /// (or locks' `start_ts`s) in range (`from_ts`, `cfg.ts`]. @@ -745,8 +749,8 @@ impl ScanPolicy for DeltaEntryPolicy { let write_value = cursors.write.value(&mut statistics.write); let commit_ts = Key::decode_ts_from(cursors.write.key(&mut statistics.write))?; - // commit_ts > cfg.ts never happens since the ForwardScanner will skip those greater - // versions. + // commit_ts > cfg.ts never happens since the ForwardScanner will skip those + // greater versions. if commit_ts <= self.from_ts { cursors.move_write_cursor_to_next_user_key(¤t_user_key, statistics)?; @@ -755,8 +759,9 @@ impl ScanPolicy for DeltaEntryPolicy { let (write_type, start_ts, short_value) = { // DeltaEntryScanner only returns commit records between `from_ts` and `cfg.ts`. - // We can assume that it must ensure GC safepoint doesn't exceed `from_ts`, so GC - // fence checking can be skipped. But it's still needed when loading the old value. + // We can assume that it must ensure GC safepoint doesn't exceed `from_ts`, so + // GC fence checking can be skipped. But it's still needed when loading the old + // value. let write_ref = WriteRef::parse(write_value)?; ( write_ref.write_type, @@ -832,10 +837,11 @@ impl ScanPolicy for DeltaEntryPolicy { } } -/// This type can be used to scan keys starting from the given user key (greater than or equal). +/// This type can be used to scan keys starting from the given user key (greater +/// than or equal). /// -/// Internally, for each key, rollbacks are ignored and smaller version will be tried. If the -/// isolation level is SI, locks will be checked first. +/// Internally, for each key, rollbacks are ignored and smaller version will be +/// tried. If the isolation level is SI, locks will be checked first. /// /// Use `ScannerBuilder` to build `ForwardKvScanner`. pub type ForwardKvScanner = ForwardScanner; @@ -843,8 +849,8 @@ pub type ForwardKvScanner = ForwardScanner; /// This scanner is like `ForwardKvScanner` but outputs `TxnEntry`. pub type EntryScanner = ForwardScanner; -/// This scanner scans all entries whose commit_ts (or locks' start_ts) is in range -/// (from_ts, cfg.ts]. +/// This scanner scans all entries whose commit_ts (or locks' start_ts) is in +/// range (from_ts, cfg.ts]. pub type DeltaScanner = ForwardScanner; impl TxnEntryScanner for ForwardScanner @@ -1109,7 +1115,8 @@ mod latest_kv_tests { Scanner, }; - /// Check whether everything works as usual when `ForwardKvScanner::get()` goes out of bound. + /// Check whether everything works as usual when `ForwardKvScanner::get()` + /// goes out of bound. #[test] fn test_get_out_of_bound() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -1175,7 +1182,8 @@ mod latest_kv_tests { } /// Check whether everything works as usual when - /// `ForwardKvScanner::move_write_cursor_to_next_user_key()` goes out of bound. + /// `ForwardKvScanner::move_write_cursor_to_next_user_key()` goes out of + /// bound. /// /// Case 1. next() out of bound #[test] @@ -1232,7 +1240,7 @@ mod latest_kv_tests { // a_8 b_2 b_1 b_0 // ^cursor // We should be able to get wanted value without any operation. - // After get the value, use SEEK_BOUND / 2 + 1 next to reach next user key and stop: + // After get the value, use SEEK_BOUND/2+1 next to reach next user key and stop: // a_8 b_2 b_1 b_0 // ^cursor assert_eq!( @@ -1256,7 +1264,8 @@ mod latest_kv_tests { } /// Check whether everything works as usual when - /// `ForwardKvScanner::move_write_cursor_to_next_user_key()` goes out of bound. + /// `ForwardKvScanner::move_write_cursor_to_next_user_key()` goes out of + /// bound. /// /// Case 2. seek() out of bound #[test] @@ -1593,7 +1602,8 @@ mod latest_entry_tests { Engine, Modify, TestEngineBuilder, }; - /// Check whether everything works as usual when `EntryScanner::get()` goes out of bound. + /// Check whether everything works as usual when `EntryScanner::get()` goes + /// out of bound. #[test] fn test_get_out_of_bound() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -1721,7 +1731,7 @@ mod latest_entry_tests { // a_8 b_2 b_1 b_0 // ^cursor // We should be able to get wanted value without any operation. - // After get the value, use SEEK_BOUND / 2 + 1 next to reach next user key and stop: + // After get the value, use SEEK_BOUND/2+1 next to reach next user key and stop: // a_8 b_2 b_1 b_0 // ^cursor let entry = EntryBuilder::default() @@ -2024,7 +2034,8 @@ mod delta_entry_tests { use super::{super::ScannerBuilder, test_util::*, *}; use crate::storage::{mvcc::tests::write, txn::tests::*, Engine, Modify, TestEngineBuilder}; - /// Check whether everything works as usual when `Delta::get()` goes out of bound. + /// Check whether everything works as usual when `Delta::get()` goes out of + /// bound. #[test] fn test_get_out_of_bound() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -2151,7 +2162,7 @@ mod delta_entry_tests { // a_8 b_2 b_1 b_0 // ^cursor // We should be able to get wanted value without any operation. - // After get the value, use SEEK_BOUND / 2 + 1 next to reach next user key and stop: + // After get the value, use SEEK_BOUND/2+1 next to reach next user key and stop: // a_8 b_2 b_1 b_0 // ^cursor let entry = EntryBuilder::default() @@ -2189,8 +2200,8 @@ mod delta_entry_tests { must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND rollback and 1 put for [b] . - // It differs from EntryScanner that this will try to fetch multiple versions of each key. - // So in this test it needs one more next than EntryScanner. + // It differs from EntryScanner that this will try to fetch multiple versions of + // each key. So in this test it needs one more next than EntryScanner. for ts in 1..=SEEK_BOUND { let modifies = vec![ // ts is rather small, so it is ok to `as u8` @@ -2341,8 +2352,8 @@ mod delta_entry_tests { fn test_mess() { // TODO: non-pessimistic lock should be returned enven if its ts < from_ts. // (key, lock, [commit1, commit2, ...]) - // Values ends with 'L' will be made larger than `SHORT_VALUE_MAX_LEN` so it will be saved - // in default cf. + // Values ends with 'L' will be made larger than `SHORT_VALUE_MAX_LEN` so it + // will be saved in default cf. let test_data = vec![ ( b"a" as &[u8], @@ -2555,7 +2566,8 @@ mod delta_entry_tests { while let Some(entry) = scanner.next_entry().unwrap() { actual.push(entry); } - // Do assertions one by one so that if it fails it won't print too long panic message. + // Do assertions one by one so that if it fails it won't print too long panic + // message. for i in 0..std::cmp::max(actual.len(), expected.len()) { assert_eq!( actual[i], expected[i], @@ -2695,7 +2707,8 @@ mod delta_entry_tests { // Scanning entries in (10, max] should get all prewrites check(10, vec![&entry_a_5, &entry_b_15, &entry_c_5]); - // Scanning entries include delete in (7, max] should get a_5, b_10, b_15 and c_5 + // Scanning entries include delete in (7, max] should get a_5, b_10, b_15 and + // c_5 check(7, vec![&entry_a_5, &entry_b_15, &entry_b_10, &entry_c_5]); // Scanning entries in (0, max] should get a_1, a_3, a_5, b_2, b_10, and b_15 check( diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index a3f759191f0..21626d2b61c 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -42,8 +42,8 @@ impl ScannerBuilder { self } - /// Set whether values of the user key should be omitted. When `omit_value` is `true`, the - /// length of returned value will be 0. + /// Set whether values of the user key should be omitted. When `omit_value` + /// is `true`, the length of returned value will be 0. /// /// Previously this option is called `key_only`. /// @@ -75,8 +75,8 @@ impl ScannerBuilder { self } - /// Limit the range to `[lower_bound, upper_bound)` in which the `ForwardKvScanner` should scan. - /// `None` means unbounded. + /// Limit the range to `[lower_bound, upper_bound)` in which the + /// `ForwardKvScanner` should scan. `None` means unbounded. /// /// Default is `(None, None)`. #[inline] @@ -87,8 +87,8 @@ impl ScannerBuilder { self } - /// Set locks that the scanner can bypass. Locks with start_ts in the specified set will be - /// ignored during scanning. + /// Set locks that the scanner can bypass. Locks with start_ts in the + /// specified set will be ignored during scanning. /// /// Default is empty. #[inline] @@ -98,8 +98,8 @@ impl ScannerBuilder { self } - /// Set locks that the scanner can read through. Locks with start_ts in the specified set will be - /// accessed during scanning. + /// Set locks that the scanner can read through. Locks with start_ts in the + /// specified set will be accessed during scanning. /// /// Default is empty. #[inline] @@ -133,8 +133,8 @@ impl ScannerBuilder { self } - /// Check whether there is data with newer ts. The result of `met_newer_ts_data` is Unknown - /// if this option is not set. + /// Check whether there is data with newer ts. The result of + /// `met_newer_ts_data` is Unknown if this option is not set. /// /// Default is false. #[inline] @@ -237,8 +237,8 @@ impl StoreScanner for Scanner { } } - /// Returns whether data with newer ts is found. The result is meaningful only when - /// `check_has_newer_ts_data` is set to true. + /// Returns whether data with newer ts is found. The result is meaningful + /// only when `check_has_newer_ts_data` is set to true. fn met_newer_ts_data(&self) -> NewerTsCheckState { match self { Scanner::Forward(scanner) => scanner.met_newer_ts_data(), @@ -253,9 +253,10 @@ pub struct ScannerConfig { omit_value: bool, isolation_level: IsolationLevel, - /// `lower_bound` and `upper_bound` is used to create `default_cursor`. `upper_bound` - /// is used in initial seek(or `lower_bound` in initial backward seek) as well. They will be consumed after `default_cursor` is being - /// created. + /// `lower_bound` and `upper_bound` is used to create `default_cursor`. + /// `upper_bound` is used in initial seek(or `lower_bound` in initial + /// backward seek) as well. They will be consumed after `default_cursor` is + /// being created. lower_bound: Option, upper_bound: Option, // hint for we will only scan data with commit ts >= hint_min_ts @@ -306,7 +307,8 @@ impl ScannerConfig { self.create_cf_cursor_with_scan_mode(cf, self.scan_mode()) } - /// Create the cursor with specified scan_mode, instead of inferring scan_mode from the config. + /// Create the cursor with specified scan_mode, instead of inferring + /// scan_mode from the config. #[inline] fn create_cf_cursor_with_scan_mode( &mut self, @@ -340,14 +342,15 @@ impl ScannerConfig { /// /// Internally, there will be a `near_seek` operation. /// -/// Notice that the value may be already carried in the `write` (short value). In this -/// case, you should not call this function. +/// Notice that the value may be already carried in the `write` (short value). +/// In this case, you should not call this function. /// /// # Panics /// /// Panics if there is a short value carried in the given `write`. /// -/// Panics if key in default CF does not exist. This means there is a data corruption. +/// Panics if key in default CF does not exist. This means there is a data +/// corruption. pub fn near_load_data_by_write( default_cursor: &mut Cursor, // TODO: make it `ForwardCursor`. user_key: &Key, @@ -429,14 +432,15 @@ pub fn has_data_in_range( } /// Seek for the next valid (write type == Put or Delete) write record. -/// The write cursor must indicate a data key of the user key of which ts <= after_ts. -/// Return None if cannot find any valid write record. +/// The write cursor must indicate a data key of the user key of which ts <= +/// after_ts. Return None if cannot find any valid write record. /// -/// GC fence will be checked against the specified `gc_fence_limit`. If `gc_fence_limit` is greater -/// than the `commit_ts` of the current write record pointed by the cursor, The caller must -/// guarantee that there are no other versions in range `(current_commit_ts, gc_fence_limit]`. Note -/// that if a record is determined as invalid by checking GC fence, the `write_cursor`'s position -/// will be left remain on it. +/// GC fence will be checked against the specified `gc_fence_limit`. If +/// `gc_fence_limit` is greater than the `commit_ts` of the current write record +/// pointed by the cursor, The caller must guarantee that there are no other +/// versions in range `(current_commit_ts, gc_fence_limit]`. Note that if a +/// record is determined as invalid by checking GC fence, the `write_cursor`'s +/// position will be left remain on it. pub fn seek_for_valid_write( write_cursor: &mut Cursor, user_key: &Key, @@ -477,18 +481,21 @@ where } /// Seek for the last written value. -/// The write cursor must indicate a data key of the user key of which ts <= after_ts. -/// Return None if cannot find any valid write record or found a delete record. +/// The write cursor must indicate a data key of the user key of which ts <= +/// after_ts. Return None if cannot find any valid write record or found a +/// delete record. /// -/// GC fence will be checked against the specified `gc_fence_limit`. If `gc_fence_limit` is greater -/// than the `commit_ts` of the current write record pointed by the cursor, The caller must -/// guarantee that there are no other versions in range `(current_commit_ts, gc_fence_limit]`. Note -/// that if a record is determined as invalid by checking GC fence, the `write_cursor`'s position -/// will be left remain on it. +/// GC fence will be checked against the specified `gc_fence_limit`. If +/// `gc_fence_limit` is greater than the `commit_ts` of the current write record +/// pointed by the cursor, The caller must guarantee that there are no other +/// versions in range `(current_commit_ts, gc_fence_limit]`. Note that if a +/// record is determined as invalid by checking GC fence, the `write_cursor`'s +/// position will be left remain on it. /// -/// `write_cursor` maybe created with an `TsFilter`, which can filter out some key-value pairs with -/// less `commit_ts` than `ts_filter`. So if the got value has a less timestamp than `ts_filter`, it -/// should be replaced by None because the real wanted value can have been filtered. +/// `write_cursor` maybe created with an `TsFilter`, which can filter out some +/// key-value pairs with less `commit_ts` than `ts_filter`. So if the got value +/// has a less timestamp than `ts_filter`, it should be replaced by None because +/// the real wanted value can have been filtered. pub fn seek_for_valid_value( write_cursor: &mut Cursor, default_cursor: &mut Cursor, @@ -570,8 +577,8 @@ pub(crate) fn load_data_by_lock( } LockType::Delete => Ok(None), LockType::Lock | LockType::Pessimistic => { - // Only when fails to call `Lock::check_ts_conflict()`, the function is called, so it's - // unreachable here. + // Only when fails to call `Lock::check_ts_conflict()`, the function is called, + // so it's unreachable here. unreachable!() } } @@ -592,8 +599,8 @@ mod tests { }, }; - // Collect data from the scanner and assert it equals to `expected`, which is a collection of - // (raw_key, value). + // Collect data from the scanner and assert it equals to `expected`, which is a + // collection of (raw_key, value). // `None` value in `expected` means the key is locked. fn check_scan_result( mut scanner: Scanner, @@ -842,15 +849,15 @@ mod tests { let access_locks = TsSet::from_u64s(vec![30, 40, 50, 60, 90]); let mut expected_result = vec![ - (vec![0], Some(vec![b'v', 0, 0])), /* access put if not delete_bound */ - (vec![1], Some(vec![b'v', 1, 1])), /* access put */ - /* vec![2] access delete */ - (vec![3], Some(vec![b'v', 3])), /* ignore LockType::Lock */ - (vec![4], None), /* locked */ - (vec![5], Some(vec![b'v', 5])), /* bypass */ - (vec![6], Some(vec![b'v', 6])), /* ignore lock with larger ts */ - (vec![7], Some(vec![b'v', 7])), /* no lock */ - (vec![8], Some(vec![b'v', 8, 8])), /* access put if not delete_bound*/ + (vec![0], Some(vec![b'v', 0, 0])), // access put if not delete_bound + (vec![1], Some(vec![b'v', 1, 1])), // access put + // vec![2] access delete + (vec![3], Some(vec![b'v', 3])), // ignore LockType::Lock + (vec![4], None), // locked + (vec![5], Some(vec![b'v', 5])), // bypass + (vec![6], Some(vec![b'v', 6])), // ignore lock with larger ts + (vec![7], Some(vec![b'v', 7])), // no lock + (vec![8], Some(vec![b'v', 8, 8])), // access put if not delete_bound ]; if desc { expected_result.reverse(); diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index bf8add1abfd..a5343b234ac 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -28,8 +28,9 @@ impl GcInfo { } } -/// `ReleasedLock` contains the information of the lock released by `commit`, `rollback` and so on. -/// It's used by `LockManager` to wake up transactions waiting for locks. +/// `ReleasedLock` contains the information of the lock released by `commit`, +/// `rollback` and so on. It's used by `LockManager` to wake up transactions +/// waiting for locks. #[derive(Debug, PartialEq)] pub struct ReleasedLock { /// The hash value of the lock. @@ -52,8 +53,8 @@ pub struct MvccTxn { pub(crate) start_ts: TimeStamp, pub(crate) write_size: usize, pub(crate) modifies: Vec, - // When 1PC is enabled, locks will be collected here instead of marshalled and put into `writes`, - // so it can be further processed. The elements are tuples representing + // When 1PC is enabled, locks will be collected here instead of marshalled and put into + // `writes`, so it can be further processed. The elements are tuples representing // (key, lock, remove_pessimistic_lock) pub(crate) locks_for_1pc: Vec<(Key, Lock, bool)>, // `concurrency_manager` is used to set memory locks for prewritten keys. @@ -141,14 +142,15 @@ impl MvccTxn { self.modifies.push(write); } - /// Add the timestamp of the current rollback operation to another transaction's lock if - /// necessary. + /// Add the timestamp of the current rollback operation to another + /// transaction's lock if necessary. /// - /// When putting rollback record on a key that's locked by another transaction, the second - /// transaction may overwrite the current rollback record when it's committed. Sometimes it may - /// break consistency. To solve the problem, add the timestamp of the current rollback to the - /// lock. So when the lock is committed, it can check if it will overwrite a rollback record - /// by checking the information in the lock. + /// When putting rollback record on a key that's locked by another + /// transaction, the second transaction may overwrite the current rollback + /// record when it's committed. Sometimes it may break consistency. To solve + /// the problem, add the timestamp of the current rollback to the lock. So + /// when the lock is committed, it can check if it will overwrite a rollback + /// record by checking the information in the lock. pub(crate) fn mark_rollback_on_mismatching_lock( &mut self, key: &Key, @@ -158,18 +160,20 @@ impl MvccTxn { assert_ne!(lock.ts, self.start_ts); if !is_protected { - // A non-protected rollback record is ok to be overwritten, so do nothing in this case. + // A non-protected rollback record is ok to be overwritten, so do nothing in + // this case. return; } if self.start_ts < lock.min_commit_ts { - // The rollback will surely not be overwritten by committing the lock. Do nothing. + // The rollback will surely not be overwritten by committing the lock. Do + // nothing. return; } if !lock.use_async_commit { - // Currently only async commit may use calculated commit_ts. Do nothing if it's not a - // async commit transaction. + // Currently only async commit may use calculated commit_ts. Do nothing if it's + // not a async commit transaction. return; } @@ -563,8 +567,8 @@ pub(crate) mod tests { assert_eq!(w1r.set_overlapped_rollback(false, None), w1); let w2r = must_written(&engine, k2, 11, 20, WriteType::Put); - // Rollback is invoked on secondaries, so the rollback is not protected and overlapped_rollback - // won't be set. + // Rollback is invoked on secondaries, so the rollback is not protected and + // overlapped_rollback won't be set. assert_eq!(w2r, w2); } @@ -951,8 +955,8 @@ pub(crate) mod tests { let (k, v) = (b"k", b"v"); - // Pessimistic prewrite keeps the larger TTL of the prewrite request and the original - // pessimisitic lock. + // Pessimistic prewrite keeps the larger TTL of the prewrite request and the + // original pessimisitic lock. must_acquire_pessimistic_lock_with_ttl(&engine, k, k, 10, 10, 100); must_pessimistic_locked(&engine, k, 10, 10); must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 10, 10, true, 110); @@ -960,8 +964,8 @@ pub(crate) mod tests { must_rollback(&engine, k, 10, false); - // TTL not changed if the pessimistic lock's TTL is larger than that provided in the - // prewrite request. + // TTL not changed if the pessimistic lock's TTL is larger than that provided in + // the prewrite request. must_acquire_pessimistic_lock_with_ttl(&engine, k, k, 20, 20, 100); must_pessimistic_locked(&engine, k, 20, 20); must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 20, 20, true, 90); @@ -1115,8 +1119,8 @@ pub(crate) mod tests { must_pessimistic_prewrite_put(&engine, k3, v3, k1, 10, 20, true); // Write a non-pessimistic lock with for_update_ts 20. must_pessimistic_prewrite_put(&engine, k2, v2, k1, 10, 20, false); - // Roll back the primary key due to timeout, but the non-pessimistic lock is not rolled - // back. + // Roll back the primary key due to timeout, but the non-pessimistic lock is not + // rolled back. must_rollback(&engine, k1, 10, false); // Txn-15 acquires pessimistic locks on k1. @@ -1188,7 +1192,8 @@ pub(crate) mod tests { #[test] fn test_async_prewrite_primary() { - // copy must_prewrite_put_impl, check that the key is written with the correct secondaries and the right timestamp + // copy must_prewrite_put_impl, check that the key is written with the correct + // secondaries and the right timestamp let engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); @@ -1239,7 +1244,8 @@ pub(crate) mod tests { // max_ts in the concurrency manager is 42, so the min_commit_ts is 43. assert_eq!(lock.min_commit_ts, TimeStamp::new(43)); - // A duplicate prewrite request should return the min_commit_ts in the primary key + // A duplicate prewrite request should return the min_commit_ts in the primary + // key assert_eq!(do_prewrite(), 43.into()); } @@ -1296,7 +1302,8 @@ pub(crate) mod tests { // max_ts in the concurrency manager is 42, so the min_commit_ts is 43. assert_eq!(lock.min_commit_ts, TimeStamp::new(43)); - // A duplicate prewrite request should return the min_commit_ts in the primary key + // A duplicate prewrite request should return the min_commit_ts in the primary + // key assert_eq!(do_pessimistic_prewrite(), 43.into()); } @@ -1345,8 +1352,8 @@ pub(crate) mod tests { must_unlocked(&engine, k); must_written(&engine, k, 10, 20, WriteType::Put); - // Optimistic transaction allows the start_ts equals to another transaction's commit_ts - // on the same key. + // Optimistic transaction allows the start_ts equals to another transaction's + // commit_ts on the same key. must_prewrite_put(&engine, k, v, k, 20); must_locked(&engine, k, 20); must_commit(&engine, k, 20, 30); @@ -1418,15 +1425,16 @@ pub(crate) mod tests { assert!(w.has_overlapped_rollback); assert!(w.gc_fence.is_none()); - // Do not commit with overlapped_rollback if the rollback ts doesn't equal to commit_ts. + // Do not commit with overlapped_rollback if the rollback ts doesn't equal to + // commit_ts. must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 40, 0); must_cleanup(&engine, k, 44, 0); must_commit(&engine, k, 40, 45); let w = must_written(&engine, k, 40, 45, WriteType::Put); assert!(!w.has_overlapped_rollback); - // Do not put rollback mark to the lock if the lock is not async commit or if lock.ts is - // before start_ts or min_commit_ts. + // Do not put rollback mark to the lock if the lock is not async commit or if + // lock.ts is before start_ts or min_commit_ts. must_prewrite_put(&engine, k, v, k, 50); must_cleanup(&engine, k, 55, 0); let l = must_locked(&engine, k, 50); diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 4ddfa68a757..59dd5e8f13d 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -151,8 +151,9 @@ impl RawMvccIterator { } // RawMvccIterator always return the latest ts of user key. -// ts is desc encoded after user key, so it's placed the first one for the same user key. -// Only one-way direction scan is supported. Like `seek` then `next` or `seek_for_prev` then `prev` +// ts is desc encoded after user key, so it's placed the first one for the same +// user key. Only one-way direction scan is supported. Like `seek` then `next` +// or `seek_for_prev` then `prev` impl Iterator for RawMvccIterator { fn next(&mut self) -> Result { if !self.is_forward { @@ -217,7 +218,8 @@ impl Iterator for RawMvccIterator { } fn key(&self) -> &[u8] { - // need map_or_else to lazy evaluate the default func, as it will abort when invalid. + // need map_or_else to lazy evaluate the default func, as it will abort when + // invalid. self.cur_key.as_deref().unwrap_or_else(|| self.inner.key()) } @@ -259,7 +261,8 @@ mod tests { let (tx, rx) = channel(); let ctx = Context::default(); - // TODO: Consider another way other than hard coding, to generate keys' prefix of test data. + // TODO: Consider another way other than hard coding, to generate keys' prefix + // of test data. let test_data = vec![ (b"r\0a".to_vec(), b"aa".to_vec(), 10), (b"r\0aa".to_vec(), b"aaa".to_vec(), 20), diff --git a/src/storage/raw/store.rs b/src/storage/raw/store.rs index 5caad0dfbb6..4d70c2bf5ff 100644 --- a/src/storage/raw/store.rs +++ b/src/storage/raw/store.rs @@ -21,7 +21,8 @@ use crate::{ const MAX_TIME_SLICE: Duration = Duration::from_millis(2); const MAX_BATCH_SIZE: usize = 1024; -// TODO: refactor to utilize generic type `KvFormat` and eliminate matching `api_version`. +// TODO: refactor to utilize generic type `KvFormat` and eliminate matching +// `api_version`. pub enum RawStore { V1(RawStoreInner), V1Ttl(RawStoreInner, ApiV1Ttl>), @@ -180,11 +181,11 @@ impl<'a, S: Snapshot, F: KvFormat> RawStoreInner { }) } - /// Scan raw keys in [`start_key`, `end_key`), returns at most `limit` keys. If `end_key` is - /// `None`, it means unbounded. + /// Scan raw keys in [`start_key`, `end_key`), returns at most `limit` keys. + /// If `end_key` is `None`, it means unbounded. /// - /// If `key_only` is true, the value corresponding to the key will not be read. Only scanned - /// keys will be returned. + /// If `key_only` is true, the value corresponding to the key will not be + /// read. Only scanned keys will be returned. pub async fn forward_raw_scan( &'a self, cf: CfName, @@ -231,11 +232,12 @@ impl<'a, S: Snapshot, F: KvFormat> RawStoreInner { Ok(pairs) } - /// Scan raw keys in [`end_key`, `start_key`) in reverse order, returns at most `limit` keys. If - /// `start_key` is `None`, it means it's unbounded. + /// Scan raw keys in [`end_key`, `start_key`) in reverse order, returns at + /// most `limit` keys. If `start_key` is `None`, it means it's unbounded. /// /// If `key_only` is true, the value - /// corresponding to the key will not be read out. Only scanned keys will be returned. + /// corresponding to the key will not be read out. Only scanned keys will be + /// returned. pub async fn reverse_raw_scan( &'a self, cf: CfName, diff --git a/src/storage/read_pool.rs b/src/storage/read_pool.rs index f93497b2905..c25ae15d46b 100644 --- a/src/storage/read_pool.rs +++ b/src/storage/read_pool.rs @@ -1,6 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -//! Distinct thread pools to handle read commands having different priority levels. +//! Distinct thread pools to handle read commands having different priority +//! levels. use std::sync::{Arc, Mutex}; @@ -26,7 +27,8 @@ impl PoolTicker for FuturePoolTicker { } } -/// Build respective thread pools to handle read commands of different priority levels. +/// Build respective thread pools to handle read commands of different priority +/// levels. pub fn build_read_pool( config: &StorageReadPoolConfig, reporter: R, diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 9cca49c9323..792ed8fcb9a 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -12,15 +12,18 @@ use crate::storage::{ Snapshot, }; -/// Acquires pessimistic lock on a single key. Optionally reads the previous value by the way. +/// Acquires pessimistic lock on a single key. Optionally reads the previous +/// value by the way. /// -/// When `need_value` is set, the first return value will be the previous value of the key (possibly -/// `None`). When `need_value` is not set but `need_check_existence` is set, the first return value -/// will be an empty value (`Some(vec![])`) if the key exists before or `None` if not. If neither -/// `need_value` nor `need_check_existence` is set, the first return value is always `None`. +/// When `need_value` is set, the first return value will be the previous value +/// of the key (possibly `None`). When `need_value` is not set but +/// `need_check_existence` is set, the first return value will be an empty value +/// (`Some(vec![])`) if the key exists before or `None` if not. If neither +/// `need_value` nor `need_check_existence` is set, the first return value is +/// always `None`. /// -/// The second return value will also contains the previous value of the key if `need_old_value` is -/// set, or `OldValue::Unspecified` otherwise. +/// The second return value will also contains the previous value of the key if +/// `need_old_value` is set, or `OldValue::Unspecified` otherwise. pub fn acquire_pessimistic_lock( txn: &mut MvccTxn, reader: &mut SnapshotReader, @@ -38,14 +41,16 @@ pub fn acquire_pessimistic_lock( crate::storage::mvcc::txn::make_txn_error(err, &key, reader.start_ts).into() )); - // Update max_ts for Insert operation to guarante linearizability and snapshot isolation + // Update max_ts for Insert operation to guarantee linearizability and snapshot + // isolation if should_not_exist { txn.concurrency_manager.update_max_ts(for_update_ts); } - // When `need_value` is set, the value need to be loaded of course. If `need_check_existence` - // and `need_old_value` are both set, we also load the value even if `need_value` is false, - // so that it avoids `load_old_value` doing repeated work. + // When `need_value` is set, the value need to be loaded of course. If + // `need_check_existence` and `need_old_value` are both set, we also load + // the value even if `need_value` is false, so that it avoids + // `load_old_value` doing repeated work. let need_load_value = need_value || (need_check_existence && need_old_value); fn load_old_value( @@ -72,7 +77,8 @@ pub fn acquire_pessimistic_lock( } } - /// Returns proper result according to the loaded value (if any) the specified settings. + /// Returns proper result according to the loaded value (if any) the + /// specified settings. #[inline] fn ret_val(need_value: bool, need_check_existence: bool, val: Option) -> Option { if need_value { @@ -160,8 +166,8 @@ pub fn acquire_pessimistic_lock( } // Handle rollback. - // The rollback information may come from either a Rollback record or a record with - // `has_overlapped_rollback` flag. + // The rollback information may come from either a Rollback record or a record + // with `has_overlapped_rollback` flag. if commit_ts == reader.start_ts && (write.write_type == WriteType::Rollback || write.has_overlapped_rollback) { @@ -172,7 +178,8 @@ pub fn acquire_pessimistic_lock( } .into()); } - // If `commit_ts` we seek is already before `start_ts`, the rollback must not exist. + // If `commit_ts` we seek is already before `start_ts`, the rollback must not + // exist. if commit_ts > reader.start_ts { if let Some((older_commit_ts, older_write)) = reader.seek_write(&key, reader.start_ts)? @@ -480,8 +487,8 @@ pub mod tests { let k = b"k1"; let v = b"v1"; - // TODO: Some corner cases don't give proper results. Although they are not important, we - // should consider whether they are better to be fixed. + // TODO: Some corner cases don't give proper results. Although they are not + // important, we should consider whether they are better to be fixed. // Normal must_succeed(&engine, k, k, 1, 1); @@ -630,8 +637,9 @@ pub mod tests { must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, true); must_locked(&engine, k, 35); - // Commit pessimistic transaction's key but with smaller commit_ts than for_update_ts. - // Currently not checked, so in this case it will actually be successfully committed. + // Commit pessimistic transaction's key but with smaller commit_ts than + // for_update_ts. Currently not checked, so in this case it will + // actually be successfully committed. must_commit(&engine, k, 35, 36); must_unlocked(&engine, k); must_get_commit_ts(&engine, k, 35, 36); @@ -661,17 +669,18 @@ pub mod tests { must_commit(&engine, k, 46, 50); must_unlocked(&engine, k); - // Prewrite on non-pessimistic key meets write with larger commit_ts than current - // for_update_ts (non-pessimistic data conflict). - // Normally non-pessimistic keys in pessimistic transactions are used when we are sure that - // there won't be conflicts. So this case is also not checked, and prewrite will succeeed. + // Prewrite on non-pessimistic key meets write with larger commit_ts than + // current for_update_ts (non-pessimistic data conflict). + // Normally non-pessimistic keys in pessimistic transactions are used when we + // are sure that there won't be conflicts. So this case is also not checked, and + // prewrite will succeeed. must_pessimistic_prewrite_put(&engine, k, v, k, 47, 48, false); must_locked(&engine, k, 47); must_cleanup(&engine, k, 47, 0); must_unlocked(&engine, k); - // The rollback of the primary key in a pessimistic transaction should be protected from - // being collapsed. + // The rollback of the primary key in a pessimistic transaction should be + // protected from being collapsed. must_succeed(&engine, k, k, 49, 60); must_pessimistic_prewrite_put(&engine, k, v, k, 49, 60, true); must_locked(&engine, k, 49); @@ -681,8 +690,9 @@ pub mod tests { must_rollback(&engine, k, 51, false); must_err(&engine, k, k, 49, 60); - // Overlapped rollback record will be written when the current start_ts equals to another write - // records' commit ts. Now there is a commit record with commit_ts = 50. + // Overlapped rollback record will be written when the current start_ts equals + // to another write records' commit ts. Now there is a commit record with + // commit_ts = 50. must_succeed(&engine, k, k, 50, 61); must_pessimistic_prewrite_put(&engine, k, v, k, 50, 61, true); must_locked(&engine, k, 50); @@ -846,9 +856,9 @@ pub mod tests { // PUT, LOCK, READ // `----------^ - // Note that this case is special because usually the `LOCK` is the first write already got - // during prewrite/acquire_pessimistic_lock and will continue searching an older version - // from the `LOCK` record. + // Note that this case is special because usually the `LOCK` is the first write + // already got during prewrite/acquire_pessimistic_lock and will continue + // searching an older version from the `LOCK` record. must_prewrite_put(&engine, b"k7", b"v7", b"k7", 16); must_commit(&engine, b"k7", 16, 30); must_prewrite_lock(&engine, b"k7", b"k7", 37); @@ -1072,7 +1082,8 @@ pub mod tests { must_pessimistic_prewrite_put(&engine, key, value, key, 3, 3, true); must_commit(&engine, key, 3, 5); - // T2: start_ts = 15, acquire pessimistic lock on k, with should_not_exist flag set. + // T2: start_ts = 15, acquire pessimistic lock on k, with should_not_exist flag + // set. let snapshot = engine.snapshot(Default::default()).unwrap(); let min_commit_ts = TimeStamp::zero(); let cm = ConcurrencyManager::new(min_commit_ts); @@ -1100,12 +1111,14 @@ pub mod tests { assert_eq!(cm.max_ts().into_inner(), 15); - // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on k + // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on + // k must_succeed(&engine, key, key, 8, 8); must_pessimistic_prewrite_delete(&engine, key, key, 8, 8, true); must_commit(&engine, key, 8, cm.max_ts().into_inner() + 1); - // T1: start_ts = 10, repeatedly acquire pessimistic lock on k, with should_not_exist flag set + // T1: start_ts = 10, repeatedly acquire pessimistic lock on k, with + // should_not_exist flag set let snapshot = engine.snapshot(Default::default()).unwrap(); let start_ts = TimeStamp::new(10); let for_update_ts = TimeStamp::new(10); @@ -1157,9 +1170,10 @@ pub mod tests { // k5: GC fence invalid must_prewrite_put(&engine, b"k5", b"v5", b"k5", 5); must_commit(&engine, b"k5", 5, 6); - // A invalid gc fence is assumed never pointing to a ts greater than GC safepoint, and - // a read operation's ts is assumed never less than the GC safepoint. Therefore since we - // will read at ts=10 later, we can't put a version greater than 10 in this case. + // A invalid gc fence is assumed never pointing to a ts greater than GC + // safepoint, and a read operation's ts is assumed never less than the + // GC safepoint. Therefore since we will read at ts=10 later, we can't + // put a version greater than 10 in this case. must_cleanup_with_gc_fence(&engine, b"k5", 6, 0, 8, true); for &need_value in &[false, true] { diff --git a/src/storage/txn/actions/check_data_constraint.rs b/src/storage/txn/actions/check_data_constraint.rs index 3b28d3e4214..35999ee6cb2 100644 --- a/src/storage/txn/actions/check_data_constraint.rs +++ b/src/storage/txn/actions/check_data_constraint.rs @@ -10,7 +10,8 @@ use crate::storage::{ /// Checks the existence of the key according to `should_not_exist`. /// If not, returns an `AlreadyExist` error. -/// The caller must guarantee that the given `write` is the latest version of the key. +/// The caller must guarantee that the given `write` is the latest version of +/// the key. pub(crate) fn check_data_constraint( reader: &mut SnapshotReader, should_not_exist: bool, @@ -18,8 +19,8 @@ pub(crate) fn check_data_constraint( write_commit_ts: TimeStamp, key: &Key, ) -> MvccResult<()> { - // Here we assume `write` is the latest version of the key. So it should not contain a - // GC fence ts. Otherwise, it must be an already-deleted version. + // Here we assume `write` is the latest version of the key. So it should not + // contain a GC fence ts. Otherwise, it must be an already-deleted version. let write_is_invalid = matches!(write.gc_fence, Some(gc_fence_ts) if !gc_fence_ts.is_zero()); if !should_not_exist || write.write_type == WriteType::Delete || write_is_invalid { @@ -28,7 +29,8 @@ pub(crate) fn check_data_constraint( // The current key exists under any of the following conditions: // 1.The current write type is `PUT` - // 2.The current write type is `Rollback` or `Lock`, and the key have an older version. + // 2.The current write type is `Rollback` or `Lock`, and the key have an older + // version. if write.write_type == WriteType::Put || reader.key_exist(key, write_commit_ts.prev())? { return Err(ErrorInner::AlreadyExist { key: key.to_raw()? }.into()); } diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index 295124fde37..2f3a2c84b11 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -11,8 +11,9 @@ use crate::storage::{ Snapshot, TxnStatus, }; -// Check whether there's an overlapped write record, and then perform rollback. The actual behavior -// to do the rollback differs according to whether there's an overlapped write record. +// Check whether there's an overlapped write record, and then perform rollback. +// The actual behavior to do the rollback differs according to whether there's +// an overlapped write record. pub fn check_txn_status_lock_exists( txn: &mut MvccTxn, reader: &mut SnapshotReader, @@ -23,8 +24,9 @@ pub fn check_txn_status_lock_exists( force_sync_commit: bool, resolving_pessimistic_lock: bool, ) -> Result<(TxnStatus, Option)> { - // Never rollback or push forward min_commit_ts in check_txn_status if it's using async commit. - // Rollback of async-commit locks are done during ResolveLock. + // Never rollback or push forward min_commit_ts in check_txn_status if it's + // using async commit. Rollback of async-commit locks are done during + // ResolveLock. if lock.use_async_commit { if force_sync_commit { info!( @@ -40,8 +42,8 @@ pub fn check_txn_status_lock_exists( let is_pessimistic_txn = !lock.for_update_ts.is_zero(); if lock.ts.physical() + lock.ttl < current_ts.physical() { // If the lock is expired, clean it up. - // If the resolving and primary key lock are both pessimistic locks, just unlock the - // primary pessimistic lock and do not write rollback records. + // If the resolving and primary key lock are both pessimistic locks, just unlock + // the primary pessimistic lock and do not write rollback records. return if resolving_pessimistic_lock && lock.lock_type == LockType::Pessimistic { let released = txn.unlock_key(primary_key, is_pessimistic_txn); MVCC_CHECK_TXN_STATUS_COUNTER_VEC.pessimistic_rollback.inc(); @@ -54,9 +56,9 @@ pub fn check_txn_status_lock_exists( }; } - // If lock.min_commit_ts is 0, it's not a large transaction and we can't push forward - // its min_commit_ts otherwise the transaction can't be committed by old version TiDB - // during rolling update. + // If lock.min_commit_ts is 0, it's not a large transaction and we can't push + // forward its min_commit_ts otherwise the transaction can't be committed by + // old version TiDB during rolling update. if !lock.min_commit_ts.is_zero() && !caller_start_ts.is_max() // Push forward the min_commit_ts so that reading won't be blocked by locks. @@ -72,8 +74,9 @@ pub fn check_txn_status_lock_exists( MVCC_CHECK_TXN_STATUS_COUNTER_VEC.update_ts.inc(); } - // As long as the primary lock's min_commit_ts > caller_start_ts, locks belong to the same transaction - // can't block reading. Return MinCommitTsPushed result to the client to let it bypass locks. + // As long as the primary lock's min_commit_ts > caller_start_ts, locks belong + // to the same transaction can't block reading. Return MinCommitTsPushed + // result to the client to let it bypass locks. let min_commit_ts_pushed = (!caller_start_ts.is_zero() && lock.min_commit_ts > caller_start_ts) // If the caller_start_ts is max, it's a point get in the autocommit transaction. // We don't push forward lock's min_commit_ts and the point get can ignore the lock @@ -157,7 +160,8 @@ pub fn rollback_lock( _ => return Ok(txn.unlock_key(key, is_pessimistic_txn)), }; - // If prewrite type is DEL or LOCK or PESSIMISTIC, it is no need to delete value. + // If prewrite type is DEL or LOCK or PESSIMISTIC, it is no need to delete + // value. if lock.short_value.is_none() && lock.lock_type == LockType::Put { txn.delete_value(key.clone(), lock.ts); } @@ -188,8 +192,8 @@ pub fn collapse_prev_rollback( Ok(()) } -/// Generate the Write record that should be written that means to perform a specified rollback -/// operation. +/// Generate the Write record that should be written that means to perform a +/// specified rollback operation. pub fn make_rollback( start_ts: TimeStamp, protected: bool, diff --git a/src/storage/txn/actions/cleanup.rs b/src/storage/txn/actions/cleanup.rs index be8dc60a768..461b8e2d432 100644 --- a/src/storage/txn/actions/cleanup.rs +++ b/src/storage/txn/actions/cleanup.rs @@ -12,12 +12,13 @@ use crate::storage::{ Snapshot, TxnStatus, }; -/// Cleanup the lock if it's TTL has expired, comparing with `current_ts`. If `current_ts` is 0, -/// cleanup the lock without checking TTL. If the lock is the primary lock of a pessimistic -/// transaction, the rollback record is protected from being collapsed. +/// Cleanup the lock if it's TTL has expired, comparing with `current_ts`. If +/// `current_ts` is 0, cleanup the lock without checking TTL. If the lock is the +/// primary lock of a pessimistic transaction, the rollback record is protected +/// from being collapsed. /// -/// Returns the released lock. Returns error if the key is locked or has already been -/// committed. +/// Returns the released lock. Returns error if the key is locked or has already +/// been committed. pub fn cleanup( txn: &mut MvccTxn, reader: &mut SnapshotReader, @@ -193,8 +194,8 @@ pub mod tests { #[test] fn test_cleanup() { - // Cleanup's logic is mostly similar to rollback, except the TTL check. Tests that not - // related to TTL check should be covered by other test cases. + // Cleanup's logic is mostly similar to rollback, except the TTL check. Tests + // that not related to TTL check should be covered by other test cases. let engine = TestEngineBuilder::new().build().unwrap(); // Shorthand for composing ts. @@ -214,8 +215,8 @@ pub mod tests { // Try to cleanup another transaction's lock. Does nothing. must_succeed(&engine, k, ts(10, 1), ts(120, 0)); - // If there is no exisiting lock when cleanup, it may be a pessimistic transaction, - // so the rollback should be protected. + // If there is no existing lock when cleanup, it may be a pessimistic + // transaction, so the rollback should be protected. must_get_rollback_protected(&engine, k, ts(10, 1), true); must_locked(&engine, k, ts(10, 0)); diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 028241155ec..8435479991e 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -41,11 +41,11 @@ pub fn commit( .into()); } - // It's an abnormal routine since pessimistic locks shouldn't be committed in our - // transaction model. But a pessimistic lock will be left if the pessimistic - // rollback request fails to send and the transaction need not to acquire - // this lock again(due to WriteConflict). If the transaction is committed, we - // should commit this pessimistic lock too. + // It's an abnormal routine since pessimistic locks shouldn't be committed in + // our transaction model. But a pessimistic lock will be left if the pessimistic + // rollback request fails to send and the transaction need not to acquire this + // lock again(due to WriteConflict). If the transaction is committed, we should + // commit this pessimistic lock too. if lock.lock_type == LockType::Pessimistic { warn!( "commit a pessimistic lock with Lock type"; @@ -254,7 +254,8 @@ pub mod tests { ); must_succeed(&engine, k, ts(30, 0), ts(50, 0)); - // If the min_commit_ts of the pessimistic lock is greater than prewrite's, use it. + // If the min_commit_ts of the pessimistic lock is greater than prewrite's, use + // it. must_acquire_pessimistic_lock_for_large_txn(&engine, k, k, ts(60, 0), ts(60, 0), 100); check_txn_status::tests::must_success( &engine, diff --git a/src/storage/txn/actions/mod.rs b/src/storage/txn/actions/mod.rs index 518afb5a449..58c27721f56 100644 --- a/src/storage/txn/actions/mod.rs +++ b/src/storage/txn/actions/mod.rs @@ -1,7 +1,8 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -//! This file contains the "actions" we perform on a [`crate::storage::mvcc::MvccTxn`] and related -//! tests. "Actions" here means a group of more basic operations, eg. +//! This file contains the "actions" we perform on a +//! [`crate::storage::mvcc::MvccTxn`] and related tests. "Actions" here means a +//! group of more basic operations, eg. //! [`crate::storage::mvcc::MvccReader::load_lock`], //! [`crate::storage::mvcc::MvccTxn::put_write`], which are methods on //! [`crate::storage::mvcc::MvccTxn`], for archiving a certain target. diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index a96c5eabc8d..e7ca85c8137 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -33,7 +33,8 @@ pub fn prewrite( let mut mutation = PrewriteMutation::from_mutation(mutation, secondary_keys, is_pessimistic_lock, txn_props)?; - // Update max_ts for Insert operation to guarante linearizability and snapshot isolation + // Update max_ts for Insert operation to guarantee linearizability and snapshot + // isolation if mutation.should_not_exist { txn.concurrency_manager.update_max_ts(txn_props.start_ts); } @@ -76,12 +77,13 @@ pub fn prewrite( }; // Check assertion if necessary. There are couple of different cases: - // * If the write is already loaded, then assertion can be checked without introducing too much - // performance overhead. So do assertion in this case. - // * If `amend_pessimistic_lock` has happened, assertion can be done during amending. Skip it. - // * If constraint check is skipped thus `prev_write` is not loaded, doing assertion here - // introduces too much overhead. However, we'll do it anyway if `assertion_level` is set to - // `Strict` level. + // * If the write is already loaded, then assertion can be checked without + // introducing too much performance overhead. So do assertion in this case. + // * If `amend_pessimistic_lock` has happened, assertion can be done during + // amending. Skip it. + // * If constraint check is skipped thus `prev_write` is not loaded, doing + // assertion here introduces too much overhead. However, we'll do it anyway if + // `assertion_level` is set to `Strict` level. // Assertion level will be checked within the `check_assertion` function. if !lock_amended { let (reloaded_prev_write, reloaded) = @@ -95,11 +97,13 @@ pub fn prewrite( let prev_write = prev_write.map(|(w, _)| w); if mutation.should_not_write { - // `checkNotExists` is equivalent to a get operation, so it should update the max_ts. + // `checkNotExists` is equivalent to a get operation, so it should update the + // max_ts. txn.concurrency_manager.update_max_ts(txn_props.start_ts); let min_commit_ts = if mutation.need_min_commit_ts() { - // Don't calculate the min_commit_ts according to the concurrency manager's max_ts - // for a should_not_write mutation because it's not persisted and doesn't change data. + // Don't calculate the min_commit_ts according to the concurrency manager's + // max_ts for a should_not_write mutation because it's not persisted and doesn't + // change data. cmp::max(txn_props.min_commit_ts, txn_props.start_ts.next()) } else { TimeStamp::zero() @@ -273,10 +277,11 @@ impl<'a> PrewriteMutation<'a> { }) } - // Pessimistic transactions only acquire pessimistic locks on row keys and unique index keys. - // The corresponding secondary index keys are not locked until pessimistic prewrite. - // It's possible that lock conflict occurs on them, but the isolation is - // guaranteed by pessimistic locks, so let TiDB resolves these locks immediately. + // Pessimistic transactions only acquire pessimistic locks on row keys and + // unique index keys. The corresponding secondary index keys are not locked + // until pessimistic prewrite. It's possible that lock conflict occurs on + // them, but the isolation is guaranteed by pessimistic locks, so let TiDB + // resolves these locks immediately. fn lock_info(&self, lock: Lock) -> Result { let mut info = lock.into_lock_info(self.key.to_raw()?); if self.txn_props.is_pessimistic() { @@ -343,8 +348,8 @@ impl<'a> PrewriteMutation<'a> { match reader.seek_write(&self.key, TimeStamp::max())? { Some((commit_ts, write)) => { // Abort on writes after our start/for_update timestamp ... - // If exists a commit version whose commit timestamp is larger than current start/for_update - // timestamp, we should abort current prewrite. + // If exists a commit version whose commit timestamp is larger than current + // start/for_update timestamp, we should abort current prewrite. match self.txn_props.kind { TransactionKind::Optimistic(_) => { if commit_ts > self.txn_props.start_ts { @@ -380,8 +385,8 @@ impl<'a> PrewriteMutation<'a> { // TODO: Maybe we need to add a new error for the rolled back case. self.write_conflict_error(&write, commit_ts)?; } - // Should check it when no lock exists, otherwise it can report error when there is - // a lock belonging to a committed transaction which deletes the key. + // Should check it when no lock exists, otherwise it can report error when there + // is a lock belonging to a committed transaction which deletes the key. check_data_constraint(reader, self.should_not_exist, &write, commit_ts, &self.key)?; Ok(Some((write, commit_ts))) @@ -491,12 +496,13 @@ impl<'a> PrewriteMutation<'a> { |(w, _)| matches!(w.gc_fence, Some(gc_fence_ts) if !gc_fence_ts.is_zero()), ) { - // The previously-loaded write record has an invalid gc_fence. Regard it as none. + // The previously-loaded write record has an invalid gc_fence. Regard it as + // none. write = &None; } - // Load the most recent version if prev write is not loaded yet, or the prev write is not - // a data version (`Put` or `Delete`) + // Load the most recent version if prev write is not loaded yet, or the prev + // write is not a data version (`Put` or `Delete`) let need_reload = !write_loaded || write.as_ref().map_or(false, |(w, _)| { w.write_type != WriteType::Put && w.write_type != WriteType::Delete @@ -533,7 +539,8 @@ impl<'a> PrewriteMutation<'a> { _ => Ok(()), }; - // Assertion error can be caused by a rollback. So make up a constraint check if the check was skipped before. + // Assertion error can be caused by a rollback. So make up a constraint check if + // the check was skipped before. if assertion_err.is_err() { if self.skip_constraint_check() { self.check_for_newer_version(reader)?; @@ -583,8 +590,8 @@ impl<'a> PrewriteMutation<'a> { } } -// The final_min_commit_ts will be calculated if either async commit or 1PC is enabled. -// It's allowed to enable 1PC without enabling async commit. +// The final_min_commit_ts will be calculated if either async commit or 1PC is +// enabled. It's allowed to enable 1PC without enabling async commit. fn async_commit_timestamps( key: &Key, lock: &mut Lock, @@ -642,7 +649,8 @@ fn async_commit_timestamps( } // TiKV may fails to write pessimistic locks due to pipelined process. -// If the data is not changed after acquiring the lock, we can still prewrite the key. +// If the data is not changed after acquiring the lock, we can still prewrite +// the key. fn amend_pessimistic_lock( mutation: &PrewriteMutation<'_>, reader: &mut SnapshotReader, @@ -652,11 +660,14 @@ fn amend_pessimistic_lock( // The invariants of pessimistic locks are: // 1. lock's for_update_ts >= key's latest commit_ts // 2. lock's for_update_ts >= txn's start_ts - // 3. If the data is changed after acquiring the pessimistic lock, key's new commit_ts > lock's for_update_ts + // 3. If the data is changed after acquiring the pessimistic lock, key's new + // commit_ts > lock's for_update_ts // - // So, if the key's latest commit_ts is still less than or equal to lock's for_update_ts, the data is not changed. - // However, we can't get lock's for_update_ts in current implementation (txn's for_update_ts is updated for each DML), - // we can only use txn's start_ts to check -- If the key's commit_ts is less than txn's start_ts, it's less than + // So, if the key's latest commit_ts is still less than or equal to lock's + // for_update_ts, the data is not changed. However, we can't get lock's + // for_update_ts in current implementation (txn's for_update_ts is updated for + // each DML), we can only use txn's start_ts to check -- If the key's + // commit_ts is less than txn's start_ts, it's less than // lock's for_update_ts too. if *commit_ts >= reader.start_ts { warn!( @@ -676,7 +687,8 @@ fn amend_pessimistic_lock( } } // Used pipelined pessimistic lock acquiring in this txn but failed - // Luckily no other txn modified this lock, amend it by treat it as optimistic txn. + // Luckily no other txn modified this lock, amend it by treat it as optimistic + // txn. MVCC_CONFLICT_COUNTER .pipelined_acquire_pessimistic_lock_amend_success .inc(); @@ -858,8 +870,9 @@ pub mod tests { let cm = ConcurrencyManager::new(41.into()); let snapshot = engine.snapshot(Default::default()).unwrap(); - // should_not_write mutations don't write locks or change data so that they needn't ask - // the concurrency manager for max_ts. Its min_commit_ts may be less than or equal to max_ts. + // should_not_write mutations don't write locks or change data so that they + // needn't ask the concurrency manager for max_ts. Its min_commit_ts may + // be less than or equal to max_ts. let mut props = optimistic_async_props(b"k0", 10.into(), 50.into(), 2, false); props.min_commit_ts = 11.into(); let mut txn = MvccTxn::new(10.into(), cm.clone()); @@ -878,7 +891,8 @@ pub mod tests { assert!(min_ts < 41.into()); assert_eq!(old_value, OldValue::Unspecified); - // `checkNotExists` is equivalent to a get operation, so it should update the max_ts. + // `checkNotExists` is equivalent to a get operation, so it should update the + // max_ts. let mut props = optimistic_txn_props(b"k0", 42.into()); props.min_commit_ts = 43.into(); let mut txn = MvccTxn::new(42.into(), cm.clone()); @@ -1220,9 +1234,9 @@ pub mod tests { // PUT, LOCK, READ // `----------^ - // Note that this case is special because usually the `LOCK` is the first write already got - // during prewrite/acquire_pessimistic_lock and will continue searching an older version - // from the `LOCK` record. + // Note that this case is special because usually the `LOCK` is the first write + // already got during prewrite/acquire_pessimistic_lock and will continue + // searching an older version from the `LOCK` record. must_prewrite_put(&engine, b"k7", b"v7", b"k7", 16); must_commit(&engine, b"k7", 16, 30); must_prewrite_lock(&engine, b"k7", b"k7", 37); @@ -1373,9 +1387,9 @@ pub mod tests { must_commit(&engine, b"k1", 10, 20); must_commit(&engine, b"k2", 10, 20); - // This is a re-sent prewrite. It should report a PessimisticLockNotFound. In production, the caller - // will need to check if the current transaction is already committed before, in order to - // provide the idempotency. + // This is a re-sent prewrite. It should report a PessimisticLockNotFound. In + // production, the caller will need to check if the current transaction is + // already committed before, in order to provide the idempotency. let err = must_retry_pessimistic_prewrite_put_err( &engine, b"k2", @@ -1405,8 +1419,8 @@ pub mod tests { must_commit(&engine, b"k1", 35, 40); must_commit(&engine, b"k2", 35, 40); - // A retrying non-pessimistic-lock prewrite request should not skip constraint checks. - // It reports a PessimisticLockNotFound. + // A retrying non-pessimistic-lock prewrite request should not skip constraint + // checks. It reports a PessimisticLockNotFound. let err = must_retry_pessimistic_prewrite_put_err( &engine, b"k2", @@ -1434,7 +1448,8 @@ pub mod tests { must_unlocked(&engine, b"k2"); // Committing still does nothing. must_commit(&engine, b"k2", 10, 25); - // Try a different txn start ts (which haven't been successfully committed before). + // Try a different txn start ts (which haven't been successfully committed + // before). let err = must_retry_pessimistic_prewrite_put_err( &engine, b"k2", b"v2", b"k1", &None, 11, 11, false, 0, ); @@ -1443,7 +1458,8 @@ pub mod tests { Error(box ErrorInner::PessimisticLockNotFound { .. }) )); must_unlocked(&engine, b"k2"); - // However conflict still won't be checked if there's a non-retry request arriving. + // However conflict still won't be checked if there's a non-retry request + // arriving. must_prewrite_put_impl( &engine, b"k2", @@ -1464,8 +1480,9 @@ pub mod tests { must_locked(&engine, b"k2", 12); must_rollback(&engine, b"k2", 12, false); - // And conflict check is according to the for_update_ts for pessimistic prewrite. - // So, it will not report error if for_update_ts is large enough. + // And conflict check is according to the for_update_ts for pessimistic + // prewrite. So, it will not report error if for_update_ts is large + // enough. must_prewrite_put_impl( &engine, b"k2", @@ -1896,8 +1913,8 @@ pub mod tests { must_rollback(&engine, &k1, 30, true); must_rollback(&engine, &k3, 30, true); - // Pessimistic transaction assertion fail on fast/strict level if assertion happens - // during amending pessimistic lock. + // Pessimistic transaction assertion fail on fast/strict level if assertion + // happens during amending pessimistic lock. let pass = assertion_level == AssertionLevel::Off; prewrite_put( &k2, @@ -1922,7 +1939,8 @@ pub mod tests { must_rollback(&engine, &k2, 30, true); must_rollback(&engine, &k4, 30, true); - // Pessimistic transaction fail on strict level no matter whether `is_pessimistic_lock`. + // Pessimistic transaction fail on strict level no matter whether + // `is_pessimistic_lock`. let pass = assertion_level != AssertionLevel::Strict; prewrite_put( &k1, @@ -1990,8 +2008,8 @@ pub mod tests { must_cleanup_with_gc_fence(&engine, k, 5, 0, 7, true); }; - // Test multiple cases without recreating the engine. So use a increasing key prefix to - // avoid each case interfering each other. + // Test multiple cases without recreating the engine. So use a increasing key + // prefix to avoid each case interfering each other. let mut key_prefix = b'a'; let mut test_all_levels = |prepare| { diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index d49d759f3a5..1db991f70eb 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -89,8 +89,9 @@ impl WriteCommand for AcquirePessimisticLock let mut res = if self.return_values { Ok(PessimisticLockRes::Values(vec![])) } else if self.check_existence { - // If return_value is set, the existence status is implicitly included in the result. - // So check_existence only need to be explicitly handled if `return_values` is not set. + // If return_value is set, the existence status is implicitly included in the + // result. So check_existence only need to be explicitly handled if + // `return_values` is not set. Ok(PessimisticLockRes::Existence(vec![])) } else { Ok(PessimisticLockRes::Empty) diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index c27e8dc1bc0..7f6f4879a3d 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -56,8 +56,8 @@ enum SecondaryLockStatus { impl WriteCommand for CheckSecondaryLocks { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { - // It is not allowed for commit to overwrite a protected rollback. So we update max_ts - // to prevent this case from happening. + // It is not allowed for commit to overwrite a protected rollback. So we update + // max_ts to prevent this case from happening. context.concurrency_manager.update_max_ts(self.start_ts); let mut txn = MvccTxn::new(self.start_ts, context.concurrency_manager); @@ -83,8 +83,8 @@ impl WriteCommand for CheckSecondaryLocks { (SecondaryLockStatus::Locked(lock), false, None) } } - // Searches the write CF for the commit record of the lock and returns the commit timestamp - // (0 if the lock is not committed). + // Searches the write CF for the commit record of the lock and returns the commit + // timestamp (0 if the lock is not committed). l => { mismatch_lock = l; match reader.get_txn_commit_record(&key)? { @@ -96,9 +96,9 @@ impl WriteCommand for CheckSecondaryLocks { }; // We needn't write a rollback once there is a write record for it: // If it's a committed record, it cannot be changed. - // If it's a rollback record, it either comes from another check_secondary_lock - // (thus protected) or the client stops commit actively. So we don't need - // to make it protected again. + // If it's a rollback record, it either comes from another + // check_secondary_lock (thus protected) or the client stops commit + // actively. So we don't need to make it protected again. (status, false, None) } TxnCommitRecord::OverlappedRollback { .. } => { diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 5ec0ae5c503..7fd4a45ff8a 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -64,12 +64,13 @@ impl CommandExt for CheckTxnStatus { } impl WriteCommand for CheckTxnStatus { - /// checks whether a transaction has expired its primary lock's TTL, rollback the - /// transaction if expired, or update the transaction's min_commit_ts according to the metadata - /// in the primary lock. - /// When transaction T1 meets T2's lock, it may invoke this on T2's primary key. In this - /// situation, `self.start_ts` is T2's `start_ts`, `caller_start_ts` is T1's `start_ts`, and - /// the `current_ts` is literally the timestamp when this function is invoked; it may not be + /// checks whether a transaction has expired its primary lock's TTL, + /// rollback the transaction if expired, or update the transaction's + /// min_commit_ts according to the metadata in the primary lock. + /// When transaction T1 meets T2's lock, it may invoke this on T2's primary + /// key. In this situation, `self.start_ts` is T2's `start_ts`, + /// `caller_start_ts` is T1's `start_ts`, and the `current_ts` is + /// literally the timestamp when this function is invoked; it may not be /// accurate. fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { let mut new_max_ts = self.lock_ts; @@ -122,7 +123,8 @@ impl WriteCommand for CheckTxnStatus { let mut released_locks = ReleasedLocks::new(self.lock_ts, TimeStamp::zero()); released_locks.push(released); - // The lock is released here only when the `check_txn_status` returns `TtlExpire`. + // The lock is released here only when the `check_txn_status` returns + // `TtlExpire`. if let TxnStatus::TtlExpire = txn_status { released_locks.wake_up(context.lock_mgr); } @@ -477,7 +479,8 @@ pub mod tests { must_unlocked(&engine, b"k2"); must_get_rollback_protected(&engine, b"k2", 15, true); - // case 3: pessimistic transaction with two keys (large txn), secondary is prewritten first + // case 3: pessimistic transaction with two keys (large txn), secondary is + // prewritten first must_acquire_pessimistic_lock_for_large_txn(&engine, b"k3", b"k3", 20, 20, 100); must_acquire_pessimistic_lock_for_large_txn(&engine, b"k4", b"k3", 20, 25, 100); must_pessimistic_prewrite_put_async_commit( @@ -491,7 +494,8 @@ pub mod tests { true, 28, ); - // the client must call check_txn_status with caller_start_ts == current_ts == 0, should not push + // the client must call check_txn_status with caller_start_ts == current_ts == + // 0, should not push must_success( &engine, b"k3", @@ -504,7 +508,8 @@ pub mod tests { uncommitted(100, 21, false), ); - // case 4: pessimistic transaction with two keys (not large txn), secondary is prewritten first + // case 4: pessimistic transaction with two keys (not large txn), secondary is + // prewritten first must_acquire_pessimistic_lock_with_ttl(&engine, b"k5", b"k5", 30, 30, 100); must_acquire_pessimistic_lock_with_ttl(&engine, b"k6", b"k5", 30, 35, 100); must_pessimistic_prewrite_put_async_commit( @@ -518,7 +523,8 @@ pub mod tests { true, 36, ); - // the client must call check_txn_status with caller_start_ts == current_ts == 0, should not push + // the client must call check_txn_status with caller_start_ts == current_ts == + // 0, should not push must_success( &engine, b"k5", @@ -569,8 +575,8 @@ pub mod tests { // The initial min_commit_ts is start_ts + 1. must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(5, 1), false); - // CheckTxnStatus with caller_start_ts = 0 and current_ts = 0 should just return the - // information of the lock without changing it. + // CheckTxnStatus with caller_start_ts = 0 and current_ts = 0 should just return + // the information of the lock without changing it. must_success( &engine, k, @@ -613,8 +619,8 @@ pub mod tests { must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(9, 1), false); // caller_start_ts < lock.min_commit_ts < current_ts - // When caller_start_ts < lock.min_commit_ts, no need to update it, but pushed should be - // true. + // When caller_start_ts < lock.min_commit_ts, no need to update it, but pushed + // should be true. must_success( &engine, k, @@ -642,7 +648,8 @@ pub mod tests { ); must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(11, 1), false); - // For same caller_start_ts and current_ts, update min_commit_ts to caller_start_ts + 1 + // For same caller_start_ts and current_ts, update min_commit_ts to + // caller_start_ts + 1 must_success( &engine, k, @@ -689,7 +696,8 @@ pub mod tests { must_prewrite_put_for_large_txn(&engine, k, v, k, ts(20, 0), 100, 0); - // Check a committed transaction when there is another lock. Expect getting the commit ts. + // Check a committed transaction when there is another lock. Expect getting the + // commit ts. must_success( &engine, k, @@ -702,8 +710,8 @@ pub mod tests { committed(ts(15, 0)), ); - // Check a not existing transaction, the result depends on whether `rollback_if_not_exist` - // is set. + // Check a not existing transaction, the result depends on whether + // `rollback_if_not_exist` is set. if r { must_success( &engine, @@ -729,8 +737,8 @@ pub mod tests { must_err(&engine, k, ts(6, 0), ts(12, 0), ts(12, 0), r, false, false); } - // TTL check is based on physical time (in ms). When logical time's difference is larger - // than TTL, the lock won't be resolved. + // TTL check is based on physical time (in ms). When logical time's difference + // is larger than TTL, the lock won't be resolved. must_success( &engine, k, @@ -936,8 +944,10 @@ pub mod tests { 100, TimeStamp::zero(), 1, - /* min_commit_ts */ TimeStamp::zero(), - /* max_commit_ts */ TimeStamp::zero(), + // min_commit_ts + TimeStamp::zero(), + // max_commit_ts + TimeStamp::zero(), false, kvproto::kvrpcpb::Assertion::None, kvproto::kvrpcpb::AssertionLevel::Off, @@ -958,7 +968,8 @@ pub mod tests { must_prewrite_put_for_large_txn(&engine, k, v, k, ts(310, 0), 100, 0); must_large_txn_locked(&engine, k, ts(310, 0), 100, ts(310, 1), false); - // Don't push forward the min_commit_ts if caller_start_ts is max, but pushed should be true. + // Don't push forward the min_commit_ts if caller_start_ts is max, but pushed + // should be true. must_success( &engine, k, @@ -998,7 +1009,8 @@ pub mod tests { let ts = TimeStamp::compose; // Check with resolving_pessimistic_lock flag. - // Path: there is no commit or rollback record, no rollback record should be written. + // Path: there is no commit or rollback record, no rollback record should be + // written. must_success( &engine, k, @@ -1031,8 +1043,9 @@ pub mod tests { uncommitted(10, TimeStamp::zero(), false), ); - // Path: the pessimistic primary key lock does exist, and it's expired, the primary lock will - // be pessimistically rolled back but there will not be a rollback record. + // Path: the pessimistic primary key lock does exist, and it's expired, the + // primary lock will be pessimistically rolled back but there will not + // be a rollback record. must_success( &engine, k, @@ -1060,8 +1073,10 @@ pub mod tests { 10, TimeStamp::zero(), 1, - /* min_commit_ts */ TimeStamp::zero(), - /* max_commit_ts */ TimeStamp::zero(), + // min_commit_ts + TimeStamp::zero(), + // max_commit_ts + TimeStamp::zero(), false, kvproto::kvrpcpb::Assertion::None, kvproto::kvrpcpb::AssertionLevel::Off, @@ -1078,8 +1093,9 @@ pub mod tests { uncommitted(10, TimeStamp::zero(), false), ); - // Path: the prewrite primary key expired and the solving key is a pessimistic lock, - // rollback record should be written and the transaction status is certain. + // Path: the prewrite primary key expired and the solving key is a pessimistic + // lock, rollback record should be written and the transaction status is + // certain. must_success( &engine, k, @@ -1094,8 +1110,9 @@ pub mod tests { must_unlocked(&engine, k); must_get_rollback_ts(&engine, k, ts(30, 0)); - // Path: the resolving_pessimistic_lock is false and the primary key lock is pessimistic - // lock, the transaction is in commit phase and the rollback record should be written. + // Path: the resolving_pessimistic_lock is false and the primary key lock is + // pessimistic lock, the transaction is in commit phase and the rollback + // record should be written. must_acquire_pessimistic_lock_with_ttl(&engine, k, k, ts(50, 0), ts(50, 0), 10); must_pessimistic_locked(&engine, k, ts(50, 0), ts(50, 0)); must_success( @@ -1106,7 +1123,8 @@ pub mod tests { ts(61, 0), true, false, - /* resolving_pessimistic_lock */ false, + // resolving_pessimistic_lock + false, |s| s == TtlExpire, ); must_unlocked(&engine, k); diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index 62c0aaa98c1..c810c749bd6 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -46,8 +46,8 @@ impl CommandExt for Cleanup { impl WriteCommand for Cleanup { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { - // It is not allowed for commit to overwrite a protected rollback. So we update max_ts - // to prevent this case from happening. + // It is not allowed for commit to overwrite a protected rollback. So we update + // max_ts to prevent this case from happening. context.concurrency_manager.update_max_ts(self.start_ts); let mut txn = MvccTxn::new(self.start_ts, context.concurrency_manager); diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index 3d3b62ea156..161db528c19 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -116,9 +116,10 @@ mod tests { test_kv_format_impl!(test_cas_basic_impl); } - /// Note: for API V2, TestEngine don't support MVCC reading, so `pre_propose` observer is ignored, - /// and no timestamp will be append to key. - /// The full test of `RawCompareAndSwap` is in `src/storage/mod.rs`. + /// Note: for API V2, TestEngine don't support MVCC reading, so + /// `pre_propose` observer is ignored, and no timestamp will be append + /// to key. The full test of `RawCompareAndSwap` is in + /// `src/storage/mod.rs`. fn test_cas_basic_impl() { let engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); diff --git a/src/storage/txn/commands/macros.rs b/src/storage/txn/commands/macros.rs index c505714f2a4..c57e7bcb5fb 100644 --- a/src/storage/txn/commands/macros.rs +++ b/src/storage/txn/commands/macros.rs @@ -23,8 +23,8 @@ macro_rules! ctx { /// value of `cmd` and which accepts one parameter whose type name matches /// the value of `cmd`. /// cmd_ty -> The type of the result of executing this command. -/// display -> Information needed to implement the `Display` trait for the command. -/// content -> The fields of the struct definition for the command. +/// display -> Information needed to implement the `Display` trait for the +/// command. content -> The fields of the struct definition for the command. macro_rules! command { ( $(#[$outer_doc: meta])* @@ -148,12 +148,12 @@ macro_rules! request_type { } macro_rules! write_bytes { - ($field: ident) => { + ($field:ident) => { fn write_bytes(&self) -> usize { self.$field.as_encoded().len() } }; - ($field: ident: multiple) => { + ($field:ident : multiple) => { fn write_bytes(&self) -> usize { self.$field.iter().map(|x| x.as_encoded().len()).sum() } @@ -166,17 +166,17 @@ macro_rules! gen_lock { crate::storage::txn::latch::Lock::new::<(), _>(vec![]) } }; - ($field: ident) => { + ($field:ident) => { fn gen_lock(&self) -> crate::storage::txn::latch::Lock { crate::storage::txn::latch::Lock::new(std::iter::once(&self.$field)) } }; - ($field: ident: multiple) => { + ($field:ident : multiple) => { fn gen_lock(&self) -> crate::storage::txn::latch::Lock { crate::storage::txn::latch::Lock::new(&self.$field) } }; - ($field: ident: multiple$transform: tt) => { + ($field:ident : multiple $transform:tt) => { fn gen_lock(&self) -> crate::storage::txn::latch::Lock { #![allow(unused_parens)] let keys = self.$field.iter().map($transform); diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 5cd94b172ff..7f748c352f7 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -70,7 +70,8 @@ use crate::storage::{ /// Learn more about our transaction system at /// [Deep Dive TiKV: Distributed Transactions](https://tikv.org/docs/deep-dive/distributed-transaction/introduction/) /// -/// These are typically scheduled and used through the [`Storage`](crate::storage::Storage) with functions like +/// These are typically scheduled and used through the +/// [`Storage`](crate::storage::Storage) with functions like /// [`prewrite`](prewrite::Prewrite) trait and are executed asynchronously. pub enum Command { Prewrite(Prewrite), @@ -95,22 +96,23 @@ pub enum Command { /// A `Command` with its return type, reified as the generic parameter `T`. /// -/// Incoming grpc requests (like `CommitRequest`, `PrewriteRequest`) are converted to -/// this type via a series of transformations. That process is described below using -/// `CommitRequest` as an example: -/// 1. A `CommitRequest` is handled by the `future_commit` method in kv.rs, where it -/// needs to be transformed to a `TypedCommand` before being passed to the -/// `storage.sched_txn_command` method. -/// 2. The `From` impl for `TypedCommand` gets chosen, and its generic -/// parameter indicates that the result type for this instance of `TypedCommand` is -/// going to be `TxnStatus` - one of the variants of the `StorageCallback` enum. -/// 3. In the above `from` method, the details of the commit request are captured by -/// creating an instance of the struct `storage::txn::commands::commit::Command` -/// via its `new` method. -/// 4. This struct is wrapped in a variant of the enum `storage::txn::commands::Command`. -/// This enum exists to facilitate generic operations over different commands. -/// 5. Finally, the `Command` enum variant for `Commit` is converted to the `TypedCommand` -/// using the `From` impl for `TypedCommand`. +/// Incoming grpc requests (like `CommitRequest`, `PrewriteRequest`) are +/// converted to this type via a series of transformations. That process is +/// described below using `CommitRequest` as an example: +/// 1. A `CommitRequest` is handled by the `future_commit` method in kv.rs, +/// where it needs to be transformed to a `TypedCommand` before being passed to +/// the `storage.sched_txn_command` method. +/// 2. The `From` impl for `TypedCommand` gets chosen, and its +/// generic parameter indicates that the result type for this instance of +/// `TypedCommand` is going to be `TxnStatus` - one of the variants of the +/// `StorageCallback` enum. 3. In the above `from` method, the details of the +/// commit request are captured by creating an instance of the struct +/// `storage::txn::commands::commit::Command` via its `new` method. +/// 4. This struct is wrapped in a variant of the enum +/// `storage::txn::commands::Command`. This enum exists to facilitate generic +/// operations over different commands. 5. Finally, the `Command` enum variant +/// for `Commit` is converted to the `TypedCommand` using the `From` +/// impl for `TypedCommand`. /// /// For other requests, see the corresponding `future_` method, the `From` trait /// implementation and so on. @@ -350,16 +352,18 @@ pub(super) struct ReleasedLocks { pessimistic: bool, } -/// Represents for a scheduler command, when should the response sent to the client. -/// For most cases, the response should be sent after the result being successfully applied to -/// the storage (if needed). But in some special cases, some optimizations allows the response to be -/// returned at an earlier phase. +/// Represents for a scheduler command, when should the response sent to the +/// client. For most cases, the response should be sent after the result being +/// successfully applied to the storage (if needed). But in some special cases, +/// some optimizations allows the response to be returned at an earlier phase. /// -/// Note that this doesn't affect latch releasing. The latch and the memory lock (if any) are always -/// released after applying, regardless of when the response is sent. +/// Note that this doesn't affect latch releasing. The latch and the memory lock +/// (if any) are always released after applying, regardless of when the response +/// is sent. #[derive(Clone, Copy, Debug, PartialEq)] pub enum ResponsePolicy { - /// Return the response to the client when the command has finished applying. + /// Return the response to the client when the command has finished + /// applying. OnApplied, /// Return the response after finishing Raft committing. OnCommitted, @@ -695,12 +699,14 @@ impl Debug for Command { } } -/// Commands that do not need to modify the database during execution will implement this trait. +/// Commands that do not need to modify the database during execution will +/// implement this trait. pub trait ReadCommand: CommandExt { fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result; } -/// Commands that need to modify the database during execution will implement this trait. +/// Commands that need to modify the database during execution will implement +/// this trait. pub trait WriteCommand: CommandExt { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result; } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index bcafed8b0e6..010238426ee 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -45,7 +45,8 @@ impl CommandExt for PessimisticRollback { } impl WriteCommand for PessimisticRollback { - /// Delete any pessimistic lock with small for_update_ts belongs to this transaction. + /// Delete any pessimistic lock with small for_update_ts belongs to this + /// transaction. fn process_write(mut self, snapshot: S, context: WriteContext<'_, L>) -> Result { let mut txn = MvccTxn::new(self.start_ts, context.concurrency_manager); let mut reader = ReaderWithStats::new( diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index dd9e451e883..a6aa8af6f87 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1,10 +1,10 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -//! Functionality for handling optimistic and pessimistic prewrites. These are separate commands -//! (although maybe they shouldn't be since there is only one protobuf), but -//! handling of the commands is similar. We therefore have a single type (Prewriter) to handle both -//! kinds of prewrite. +//! Functionality for handling optimistic and pessimistic prewrites. These are +//! separate commands (although maybe they shouldn't be since there is only one +//! protobuf), but handling of the commands is similar. We therefore have a +//! single type (Prewriter) to handle both kinds of prewrite. use std::mem; @@ -410,7 +410,8 @@ impl WriteCommand for PrewritePessimistic { } } -/// Handles both kinds of prewrite (K statically indicates either optimistic or pessimistic). +/// Handles both kinds of prewrite (K statically indicates either optimistic or +/// pessimistic). struct Prewriter { kind: K, mutations: Vec, @@ -444,7 +445,8 @@ impl Prewriter { SnapshotReader::new_with_ctx(self.start_ts, snapshot, &self.ctx), context.statistics, ); - // Set extra op here for getting the write record when check write conflict in prewrite. + // Set extra op here for getting the write record when check write conflict in + // prewrite. let rows = self.mutations.len(); let res = self.prewrite(&mut txn, &mut reader, context.extra_op); @@ -460,9 +462,10 @@ impl Prewriter { )) } - // Async commit requires the max timestamp in the concurrency manager to be up-to-date. - // If it is possibly stale due to leader transfer or region merge, return an error. - // TODO: Fallback to non-async commit if not synced instead of returning an error. + // Async commit requires the max timestamp in the concurrency manager to be + // up-to-date. If it is possibly stale due to leader transfer or region + // merge, return an error. TODO: Fallback to non-async commit if not synced + // instead of returning an error. fn check_max_ts_synced(&self, snapshot: &impl Snapshot) -> Result<()> { if (self.secondary_keys.is_some() || self.try_one_pc) && !snapshot.ext().is_max_ts_synced() { @@ -476,9 +479,10 @@ impl Prewriter { } } - /// The core part of the prewrite action. In the abstract, this method iterates over the mutations - /// in the prewrite and prewrites each one. It keeps track of any locks encountered and (if it's - /// an async commit transaction) the min_commit_ts, these are returned by the method. + /// The core part of the prewrite action. In the abstract, this method + /// iterates over the mutations in the prewrite and prewrites each one. + /// It keeps track of any locks encountered and (if it's an async commit + /// transaction) the min_commit_ts, these are returned by the method. fn prewrite( &mut self, txn: &mut MvccTxn, @@ -710,10 +714,11 @@ impl Prewriter { } } -/// Encapsulates things which must be done differently for optimistic or pessimistic transactions. +/// Encapsulates things which must be done differently for optimistic or +/// pessimistic transactions. trait PrewriteKind { - /// The type of mutation and, optionally, its extra information, differing for the - /// optimistic and pessimistic transaction. + /// The type of mutation and, optionally, its extra information, differing + /// for the optimistic and pessimistic transaction. type Mutation: MutationLock; fn txn_kind(&self) -> TransactionKind; @@ -783,8 +788,8 @@ impl PrewriteKind for Pessimistic { } } -/// The type of mutation and, optionally, its extra information, differing for the -/// optimistic and pessimistic transaction. +/// The type of mutation and, optionally, its extra information, differing for +/// the optimistic and pessimistic transaction. /// For optimistic txns, this is `Mutation`. /// For pessimistic txns, this is `(Mutation, bool)`, where the bool indicates /// whether the mutation takes a pessimistic lock or not. @@ -845,7 +850,8 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { txn.start_ts, lock.short_value, ); - // Transactions committed with 1PC should be impossible to overwrite rollback records. + // Transactions committed with 1PC should be impossible to overwrite rollback + // records. txn.put_write(key.clone(), commit_ts, write.as_ref().to_bytes()); if delete_pessimistic_lock { released_locks.push(txn.unlock_key(key, true)); @@ -1044,8 +1050,8 @@ mod tests { .unwrap(); // Rollback to make tombstones in lock-cf. rollback(&engine, &mut statistic, keys, 100).unwrap(); - // Gc rollback flags store in write-cf to make sure the next prewrite operation will skip - // seek write cf. + // Gc rollback flags store in write-cf to make sure the next prewrite operation + // will skip seek write cf. gc_by_compact(&engine, pri_key, 101); set_perf_level(PerfLevel::EnableTimeExceptForMutex); let perf = ReadPerfInstant::new(); @@ -1132,9 +1138,9 @@ mod tests { ) .unwrap(); - // Test a 1PC request should not be partially written when encounters error on the halfway. - // If some of the keys are successfully written as committed state, the atomicity will be - // broken. + // Test a 1PC request should not be partially written when encounters error on + // the halfway. If some of the keys are successfully written as committed state, + // the atomicity will be broken. let (k1, v1) = (b"k1", b"v1"); let (k2, v2) = (b"k2", b"v2"); // Lock k2. @@ -1248,9 +1254,9 @@ mod tests { must_rollback(&engine, k1, 20, true); - // Test a 1PC request should not be partially written when encounters error on the halfway. - // If some of the keys are successfully written as committed state, the atomicity will be - // broken. + // Test a 1PC request should not be partially written when encounters error on + // the halfway. If some of the keys are successfully written as committed state, + // the atomicity will be broken. // Lock k2 with a optimistic lock. let mut statistics = Statistics::default(); @@ -1473,7 +1479,7 @@ mod tests { } macro_rules! assert_max_ts_err { - ($e: expr) => { + ($e:expr) => { match $e { Err(Error(box ErrorInner::MaxTimestampNotSynced { .. })) => {} _ => panic!("Should have returned an error"), @@ -1676,11 +1682,12 @@ mod tests { assert_eq!(cm.max_ts().into_inner(), 15); - // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on k + // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on + // k must_prewrite_delete(&engine, key, key, 8); must_commit(&engine, key, 8, cm.max_ts().into_inner() + 1); - // T1: start_ts = 10, reapeatly prewrite on k, with should_not_exist flag set + // T1: start_ts = 10, repeatedly prewrite on k, with should_not_exist flag set let res = prewrite_with_cm( &engine, cm, @@ -2019,8 +2026,8 @@ mod tests { must_commit(&engine, b"k1", 35, 40); must_commit(&engine, b"k2", 35, 40); - // A retrying non-pessimistic-lock prewrite request should not skip constraint checks. - // Here it should take no effect, even there's already a newer version + // A retrying non-pessimistic-lock prewrite request should not skip constraint + // checks. Here it should take no effect, even there's already a newer version // after it. (No matter if it's async commit). prewrite_with_retry_flag(b"k2", b"v2", b"k1", Some(vec![]), 10, false, true).unwrap(); must_unlocked(&engine, b"k2"); @@ -2029,8 +2036,8 @@ mod tests { must_unlocked(&engine, b"k2"); // Committing still does nothing. must_commit(&engine, b"k2", 10, 25); - // Try a different txn start ts (which haven't been successfully committed before). - // It should report a PessimisticLockNotFound. + // Try a different txn start ts (which haven't been successfully committed + // before). It should report a PessimisticLockNotFound. let err = prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 11, false, true).unwrap_err(); assert!(matches!( err, @@ -2039,7 +2046,8 @@ mod tests { ))) )); must_unlocked(&engine, b"k2"); - // However conflict still won't be checked if there's a non-retry request arriving. + // However conflict still won't be checked if there's a non-retry request + // arriving. prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, false, false).unwrap(); must_locked(&engine, b"k2", 10); } @@ -2108,8 +2116,8 @@ mod tests { fn test_assertion_fail_on_conflicting_index_key() { let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - // Simulate two transactions that tries to insert the same row with a secondary index, and - // the second one canceled the first one (by rolling back its lock). + // Simulate two transactions that tries to insert the same row with a secondary + // index, and the second one canceled the first one (by rolling back its lock). let t1_start_ts = TimeStamp::compose(1, 0); let t2_start_ts = TimeStamp::compose(2, 0); @@ -2222,8 +2230,8 @@ mod tests { ))) )); - // If the two keys are sent in different requests, it would be the client's duty to ignore - // the assertion error. + // If the two keys are sent in different requests, it would be the client's duty + // to ignore the assertion error. let err = must_prewrite_put_err_impl( &engine, b"row", diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index 6638fe5cffd..1d2bfbf49d8 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -100,9 +100,9 @@ impl WriteCommand for ResolveLock { false, )? } else if commit_ts > current_lock.ts { - // Continue to resolve locks if the not found committed locks are pessimistic type. - // They could be left if the transaction is finally committed and pessimistic conflict - // retry happens during execution. + // Continue to resolve locks if the not found committed locks are pessimistic + // type. They could be left if the transaction is finally committed and + // pessimistic conflict retry happens during execution. match commit(&mut txn, &mut reader, current_key.clone(), commit_ts) { Ok(res) => res, Err(MvccError(box MvccErrorInner::TxnLockNotFound { .. })) @@ -160,6 +160,7 @@ impl WriteCommand for ResolveLock { } } -// To resolve a key, the write size is about 100~150 bytes, depending on key and value length. -// The write batch will be around 32KB if we scan 256 keys each time. +// To resolve a key, the write size is about 100~150 bytes, depending on key and +// value length. The write batch will be around 32KB if we scan 256 keys each +// time. pub const RESOLVE_LOCK_BATCH_SIZE: usize = 256; diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index f69d4a107fc..5a0f636d2f6 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -52,8 +52,8 @@ impl WriteCommand for ResolveLockLite { ); let rows = self.resolve_keys.len(); - // ti-client guarantees the size of resolve_keys will not too large, so no necessary - // to control the write_size as ResolveLock. + // ti-client guarantees the size of resolve_keys will not too large, so no + // necessary to control the write_size as ResolveLock. let mut released_locks = ReleasedLocks::new(self.start_ts, self.commit_ts); for key in self.resolve_keys { released_locks.push(if !self.commit_ts.is_zero() { diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index 70e7fc4a49d..ad22e966590 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -52,8 +52,8 @@ impl WriteCommand for Rollback { let rows = self.keys.len(); let mut released_locks = ReleasedLocks::new(self.start_ts, TimeStamp::zero()); for k in self.keys { - // Rollback is called only if the transaction is known to fail. Under the circumstances, - // the rollback record needn't be protected. + // Rollback is called only if the transaction is known to fail. Under the + // circumstances, the rollback record needn't be protected. let released_lock = cleanup(&mut txn, &mut reader, k, TimeStamp::zero(), false)?; released_locks.push(released_lock); } diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index e894cc6835e..2149d5571da 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -210,7 +210,8 @@ pub mod tests { must_err(&engine, k, 5, 100); // Create a lock with TTL=100. - // The initial TTL will be set to 0 after calling must_prewrite_put. Update it first. + // The initial TTL will be set to 0 after calling must_prewrite_put. Update it + // first. must_prewrite_put(&engine, k, v, k, 5); must_locked(&engine, k, 5); must_success(&engine, k, 5, 100, 100); diff --git a/src/storage/txn/flow_controller/mod.rs b/src/storage/txn/flow_controller/mod.rs index f109b9896a3..c0faeac6328 100644 --- a/src/storage/txn/flow_controller/mod.rs +++ b/src/storage/txn/flow_controller/mod.rs @@ -13,7 +13,7 @@ pub enum FlowController { } macro_rules! flow_controller_fn { - ($fn_name: ident, $region_id: ident, $type: ident) => { + ($fn_name:ident, $region_id:ident, $type:ident) => { pub fn $fn_name(&self, $region_id: u64) -> $type { match self { FlowController::Singleton(ref controller) => controller.$fn_name($region_id), @@ -21,7 +21,7 @@ macro_rules! flow_controller_fn { } } }; - ($fn_name: ident, $region_id: ident, $bytes: ident, $type: ident) => { + ($fn_name:ident, $region_id:ident, $bytes:ident, $type:ident) => { pub fn $fn_name(&self, $region_id: u64, $bytes: usize) -> $type { match self { FlowController::Singleton(ref controller) => { diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 056c447aced..40bb50a88c8 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -45,8 +45,9 @@ enum Trend { NoTrend, } -/// Flow controller is used to throttle the write rate at scheduler level, aiming -/// to substitute the write stall mechanism of RocksDB. It features in two points: +/// Flow controller is used to throttle the write rate at scheduler level, +/// aiming to substitute the write stall mechanism of RocksDB. It features in +/// two points: /// * throttle at scheduler, so raftstore and apply won't be blocked anymore /// * better control on the throttle rate to avoid QPS drop under heavy write /// @@ -54,22 +55,22 @@ enum Trend { /// is limited to 16MB/s by default which doesn't take real disk ability into /// account. It may underestimate the disk's throughout that 16MB/s is too small /// at once, causing a very large jitter on the write duration. -/// Also, it decreases the delayed write rate further if the factors still exceed -/// the threshold. So under heavy write load, the write rate may be throttled to -/// a very low rate from time to time, causing QPS drop eventually. -/// +/// Also, it decreases the delayed write rate further if the factors still +/// exceed the threshold. So under heavy write load, the write rate may be +/// throttled to a very low rate from time to time, causing QPS drop eventually. /// For compaction pending bytes, we use discardable ratio to do flow control -/// which is separated mechanism from throttle speed. Compaction pending bytes is -/// a approximate value, usually, changes up and down dramatically, so it's unwise -/// to map compaction pending bytes to a specified throttle speed. Instead, -/// mapping it from soft limit to hard limit as 0% to 100% discardable ratio. With -/// this, there must be a point that foreground write rate is equal to the -/// background compaction pending bytes consuming rate so that compaction pending -/// bytes is kept around a steady level. +/// which is separated mechanism from throttle speed. Compaction pending bytes +/// is a approximate value, usually, changes up and down dramatically, so it's +/// unwise to map compaction pending bytes to a specified throttle speed. +/// Instead, mapping it from soft limit to hard limit as 0% to 100% discardable +/// ratio. With this, there must be a point that foreground write rate is equal +/// to the background compaction pending bytes consuming rate so that compaction +/// pending bytes is kept around a steady level. /// /// Here is a brief flow showing where the mechanism works: -/// grpc -> check should drop(discardable ratio) -> limiter -> async write to raftstore +/// grpc -> check should drop(discardable ratio) -> limiter -> async write to +/// raftstore pub struct EngineFlowController { discard_ratio: Arc, limiter: Arc, @@ -702,7 +703,8 @@ impl FlowChecker { .with_label_values(&[&cf]) .set((checker.long_term_pending_bytes.get_avg() * RATIO_SCALE_FACTOR as f64) as i64); - // do special check on start, see the comment of the variable definition for detail. + // do special check on start, see the comment of the variable definition for + // detail. if checker.on_start_pending_bytes { if num < soft || checker.long_term_pending_bytes.trend() == Trend::Increasing { // the write is accumulating, still need to throttle @@ -766,7 +768,8 @@ impl FlowChecker { let prev = checker.last_num_memtables.get_recent(); checker.last_num_memtables.observe(num_memtables); - // do special check on start, see the comment of the variable definition for detail. + // do special check on start, see the comment of the variable definition for + // detail. if checker.on_start_memtable { if num_memtables < self.memtables_threshold || checker.last_num_memtables.trend() == Trend::Increasing @@ -904,7 +907,8 @@ impl FlowChecker { let checker = self.cf_checkers.get_mut(&cf).unwrap(); let num_l0_files = checker.long_term_num_l0_files.get_recent(); - // do special check on start, see the comment of the variable definition for detail. + // do special check on start, see the comment of the variable definition for + // detail. if checker.on_start_l0_files { if num_l0_files < self.l0_files_threshold || checker.long_term_num_l0_files.trend() == Trend::Increasing @@ -1132,7 +1136,8 @@ pub(super) mod tests { tablet_suffix, )) .unwrap(); - // not throttle when the average of the sliding window doesn't exceeds the threshold + // not throttle when the average of the sliding window doesn't exceeds the + // threshold stub.0.num_memtables.store(6, Ordering::Relaxed); tx.send(FlowInfo::Flush( "default".to_string(), @@ -1523,7 +1528,8 @@ pub(super) mod tests { smoother.observe_with_time(4, now); assert_eq!(smoother.trend(), Trend::NoTrend); - // Incresing trend, the left range contains 3 records, the right range contains 1 records. + // Increasing trend, the left range contains 3 records, the right range contains + // 1 records. let mut smoother = Smoother::< f64, 6, @@ -1545,7 +1551,8 @@ pub(super) mod tests { smoother.observe_with_time(4.0, now); assert_eq!(smoother.trend(), Trend::Increasing); - // Decreasing trend, the left range contains 1 records, the right range contains 3 records. + // Decreasing trend, the left range contains 1 records, the right range contains + // 3 records. let mut smoother = Smoother::< f32, 6, @@ -1561,7 +1568,8 @@ pub(super) mod tests { smoother.observe_with_time(1.0, now); assert_eq!(smoother.trend(), Trend::Decreasing); - // No trend, the left range contains 1 records, the right range contains 3 records. + // No trend, the left range contains 1 records, the right range contains 3 + // records. let mut smoother = Smoother::< f32, 6, diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index d177c203ba1..14819127389 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -163,7 +163,8 @@ impl FlowInfoDispatcher { .entry(region_id) .or_insert_with(|| insert_limiter_and_checker(region_id, suffix)); // check if the checker's engine is exactly (region_id, suffix) - // if checker.suffix < suffix, it means its tablet is old and needs the refresh + // if checker.suffix < suffix, it means its tablet is old and needs the + // refresh if checker.tablet_suffix() < suffix { let engine = tablet_factory.open_tablet_cache(region_id, suffix).unwrap(); @@ -332,7 +333,6 @@ mod tests { tablet_suffix, )) .unwrap(); - //assert!(!flow_controller.tablet_exist(region_id)); } #[test] diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index 0c2ca7951ff..86d16858bd3 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -13,13 +13,16 @@ use parking_lot::{Mutex, MutexGuard}; const WAITING_LIST_SHRINK_SIZE: usize = 8; const WAITING_LIST_MAX_CAPACITY: usize = 16; -/// Latch which is used to serialize accesses to resources hashed to the same slot. +/// Latch which is used to serialize accesses to resources hashed to the same +/// slot. /// -/// Latches are indexed by slot IDs. The keys of a command are hashed into unsigned numbers, -/// then the command is added to the waiting queues of the latches. +/// Latches are indexed by slot IDs. The keys of a command are hashed into +/// unsigned numbers, then the command is added to the waiting queues of the +/// latches. /// -/// If command A is ahead of command B in one latch, it must be ahead of command B in all the -/// overlapping latches. This is an invariant ensured by the `gen_lock`, `acquire` and `release`. +/// If command A is ahead of command B in one latch, it must be ahead of command +/// B in all the overlapping latches. This is an invariant ensured by the +/// `gen_lock`, `acquire` and `release`. #[derive(Clone)] struct Latch { // store hash value of the key and command ID which requires this key. @@ -34,7 +37,8 @@ impl Latch { } } - /// Find the first command ID in the queue whose hash value is equal to hash. + /// Find the first command ID in the queue whose hash value is equal to + /// hash. pub fn get_first_req_by_hash(&self, hash: u64) -> Option { for (h, cid) in self.waiting.iter().flatten() { if *h == hash { @@ -44,10 +48,11 @@ impl Latch { None } - /// Remove the first command ID in the queue whose hash value is equal to hash_key. - /// If the element which would be removed does not appear at the front of the queue, it will leave - /// a hole in the queue. So we must remove consecutive hole when remove the head of the - /// queue to make the queue not too long. + /// Remove the first command ID in the queue whose hash value is equal to + /// hash_key. If the element which would be removed does not appear at the + /// front of the queue, it will leave a hole in the queue. So we must remove + /// consecutive hole when remove the head of the queue to make the queue not + /// too long. pub fn pop_front(&mut self, key_hash: u64) -> Option<(u64, u64)> { if let Some(item) = self.waiting.pop_front() { if let Some((k, _)) = item.as_ref() { @@ -74,8 +79,8 @@ impl Latch { self.waiting.push_back(Some((key_hash, cid))); } - /// For some hot keys, the waiting list maybe very long, so we should shrink the waiting - /// VecDeque after pop. + /// For some hot keys, the waiting list maybe very long, so we should shrink + /// the waiting VecDeque after pop. fn maybe_shrink(&mut self) { // Pop item which is none to make queue not too long. while let Some(item) = self.waiting.front() { @@ -95,7 +100,8 @@ impl Latch { /// Lock required for a command. #[derive(Clone)] pub struct Lock { - /// The hash value of the keys that a command must acquire before being able to be processed. + /// The hash value of the keys that a command must acquire before being able + /// to be processed. pub required_hashes: Vec, /// The number of latches that the command has acquired. @@ -126,7 +132,8 @@ impl Lock { } } - /// Returns true if all the required latches have be acquired, false otherwise. + /// Returns true if all the required latches have be acquired, false + /// otherwise. pub fn acquired(&self) -> bool { self.required_hashes.len() == self.owned_count } @@ -138,8 +145,9 @@ impl Lock { /// Latches which are used for concurrency control in the scheduler. /// -/// Each latch is indexed by a slot ID, hence the term latch and slot are used interchangeably, but -/// conceptually a latch is a queue, and a slot is an index to the queue. +/// Each latch is indexed by a slot ID, hence the term latch and slot are used +/// interchangeably, but conceptually a latch is a queue, and a slot is an index +/// to the queue. pub struct Latches { slots: Vec>>, size: usize, @@ -156,11 +164,13 @@ impl Latches { Latches { slots, size } } - /// Tries to acquire the latches specified by the `lock` for command with ID `who`. + /// Tries to acquire the latches specified by the `lock` for command with ID + /// `who`. /// - /// This method will enqueue the command ID into the waiting queues of the latches. A latch is - /// considered acquired if the command ID is the first one of elements in the queue which have - /// the same hash value. Returns true if all the Latches are acquired, false otherwise. + /// This method will enqueue the command ID into the waiting queues of the + /// latches. A latch is considered acquired if the command ID is the first + /// one of elements in the queue which have the same hash value. Returns + /// true if all the Latches are acquired, false otherwise. pub fn acquire(&self, lock: &mut Lock, who: u64) -> bool { let mut acquired_count: usize = 0; for &key_hash in &lock.required_hashes[lock.owned_count..] { @@ -184,9 +194,11 @@ impl Latches { lock.acquired() } - /// Releases all latches owned by the `lock` of command with ID `who`, returns the wakeup list. + /// Releases all latches owned by the `lock` of command with ID `who`, + /// returns the wakeup list. /// - /// Preconditions: the caller must ensure the command is at the front of the latches. + /// Preconditions: the caller must ensure the command is at the front of the + /// latches. pub fn release(&self, lock: &Lock, who: u64) -> Vec { let mut wakeup_list: Vec = vec![]; for &key_hash in &lock.required_hashes[..lock.owned_count] { diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 12ff44bbd61..5894efc3226 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -61,7 +61,8 @@ impl SchedPool { name_prefix: &str, ) -> Self { let engine = Arc::new(Mutex::new(engine)); - // for low cpu quota env, set the max-thread-count as 4 to allow potential cases that we need more thread than cpu num. + // for low cpu quota env, set the max-thread-count as 4 to allow potential cases + // that we need more thread than cpu num. let max_pool_size = std::cmp::max( pool_size, std::cmp::max(4, SysQuota::cpu_cores_quota() as usize), diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 2588e820d21..fb32f767bd5 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -3,23 +3,25 @@ // #[PerformanceCriticalPath //! Scheduler which schedules the execution of `storage::Command`s. //! -//! There is one scheduler for each store. It receives commands from clients, executes them against -//! the MVCC layer storage engine. +//! There is one scheduler for each store. It receives commands from clients, +//! executes them against the MVCC layer storage engine. //! -//! Logically, the data organization hierarchy from bottom to top is row -> region -> store -> -//! database. But each region is replicated onto N stores for reliability, the replicas form a Raft -//! group, one of which acts as the leader. When the client read or write a row, the command is -//! sent to the scheduler which is on the region leader's store. +//! Logically, the data organization hierarchy from bottom to top is row -> +//! region -> store -> database. But each region is replicated onto N stores for +//! reliability, the replicas form a Raft group, one of which acts as the +//! leader. When the client read or write a row, the command is sent to the +//! scheduler which is on the region leader's store. //! -//! Scheduler runs in a single-thread event loop, but command executions are delegated to a pool of -//! worker thread. +//! Scheduler runs in a single-thread event loop, but command executions are +//! delegated to a pool of worker thread. //! -//! Scheduler keeps track of all the running commands and uses latches to ensure serialized access -//! to the overlapping rows involved in concurrent commands. But note that scheduler only ensures -//! serialized access to the overlapping rows at command level, but a transaction may consist of -//! multiple commands, therefore conflicts may happen at transaction level. Transaction semantics -//! is ensured by the transaction protocol implemented in the client library, which is transparent -//! to the scheduler. +//! Scheduler keeps track of all the running commands and uses latches to ensure +//! serialized access to the overlapping rows involved in concurrent commands. +//! But note that scheduler only ensures serialized access to the overlapping +//! rows at command level, but a transaction may consist of multiple commands, +//! therefore conflicts may happen at transaction level. Transaction semantics +//! is ensured by the transaction protocol implemented in the client library, +//! which is transparent to the scheduler. use std::{ marker::PhantomData, @@ -75,8 +77,8 @@ use crate::{ const TASKS_SLOTS_NUM: usize = 1 << 12; // 4096 slots. -// The default limit is set to be very large. Then, requests without `max_exectuion_duration` -// will not be aborted unexpectedly. +// The default limit is set to be very large. Then, requests without +// `max_exectuion_duration` will not be aborted unexpectedly. pub const DEFAULT_EXECUTION_DURATION_LIMIT: Duration = Duration::from_secs(24 * 60 * 60); const IN_MEMORY_PESSIMISTIC_LOCK: Feature = Feature::require(6, 0, 0); @@ -279,18 +281,19 @@ impl SchedulerInner { /// Tries to acquire all the required latches for a command when waken up by /// another finished command. /// - /// Returns a deadline error if the deadline is exceeded. Returns the `Task` if - /// all latches are acquired, returns `None` otherwise. + /// Returns a deadline error if the deadline is exceeded. Returns the `Task` + /// if all latches are acquired, returns `None` otherwise. fn acquire_lock_on_wakeup(&self, cid: u64) -> Result, StorageError> { let mut task_slot = self.get_task_slot(cid); let tctx = task_slot.get_mut(&cid).unwrap(); - // Check deadline early during acquiring latches to avoid expired requests blocking - // other requests. + // Check deadline early during acquiring latches to avoid expired requests + // blocking other requests. if let Err(e) = tctx.task.as_ref().unwrap().cmd.deadline().check() { - // `acquire_lock_on_wakeup` is called when another command releases its locks and wakes up - // command `cid`. This command inserted its lock before and now the lock is at the - // front of the queue. The actual acquired count is one more than the `owned_count` - // recorded in the lock, so we increase one to make `release` work. + // `acquire_lock_on_wakeup` is called when another command releases its locks + // and wakes up command `cid`. This command inserted its lock before + // and now the lock is at the front of the queue. The actual + // acquired count is one more than the `owned_count` recorded in the + // lock, so we increase one to make `release` work. tctx.lock.owned_count += 1; return Err(e.into()); } @@ -463,8 +466,9 @@ impl Scheduler { fail_point!("txn_scheduler_acquire_fail"); } - /// Tries to acquire all the necessary latches. If all the necessary latches are acquired, - /// the method initiates a get snapshot operation for further processing. + /// Tries to acquire all the necessary latches. If all the necessary latches + /// are acquired, the method initiates a get snapshot operation for further + /// processing. fn try_to_wake_up(&self, cid: u64) { match self.inner.acquire_lock_on_wakeup(cid) { Ok(Some(task)) => { @@ -579,8 +583,8 @@ impl Scheduler { /// Event handler for the success of read. /// - /// If a next command is present, continues to execute; otherwise, delivers the result to the - /// callback. + /// If a next command is present, continues to execute; otherwise, delivers + /// the result to the callback. fn on_read_finished(&self, cid: u64, pr: ProcessResult, tag: CommandKind) { SCHED_STAGE_COUNTER_VEC.get(tag).read_finish.inc(); @@ -627,9 +631,9 @@ impl Scheduler { drop(lock_guards); let tctx = self.inner.dequeue_task_context(cid); - // If pipelined pessimistic lock or async apply prewrite takes effect, it's not guaranteed - // that the proposed or committed callback is surely invoked, which takes and invokes - // `tctx.cb(tctx.pr)`. + // If pipelined pessimistic lock or async apply prewrite takes effect, it's not + // guaranteed that the proposed or committed callback is surely invoked, which + // takes and invokes `tctx.cb(tctx.pr)`. if let Some(cb) = tctx.cb { let pr = match result { Ok(()) => pr.or(tctx.pr).unwrap(), @@ -742,8 +746,8 @@ impl Scheduler { .await; } - /// Processes a read command within a worker thread, then posts `ReadFinished` message back to the - /// `Scheduler`. + /// Processes a read command within a worker thread, then posts + /// `ReadFinished` message back to the `Scheduler`. fn process_read(self, snapshot: E::Snap, task: Task, statistics: &mut Statistics) { fail_point!("txn_before_process_read"); debug!("process read cmd in worker pool"; "cid" => task.cid); @@ -764,8 +768,9 @@ impl Scheduler { self.on_read_finished(task.cid, pr, tag); } - /// Processes a write command within a worker thread, then posts either a `WriteFinished` - /// message if successful or a `FinishedWithErr` message back to the `Scheduler`. + /// Processes a write command within a worker thread, then posts either a + /// `WriteFinished` message if successful or a `FinishedWithErr` message + /// back to the `Scheduler`. async fn process_write(self, snapshot: E::Snap, task: Task, statistics: &mut Statistics) { fail_point!("txn_before_process_write"); let write_bytes = task.cmd.write_bytes(); @@ -806,7 +811,8 @@ impl Scheduler { }; if write_result.is_ok() { - // TODO: write bytes can be a bit inaccurate due to error requests or in-memory pessimistic locks. + // TODO: write bytes can be a bit inaccurate due to error requests or in-memory + // pessimistic locks. sample.add_write_bytes(write_bytes); } let read_bytes = statistics.cf_statistics(CF_DEFAULT).flow_stats.read_bytes @@ -833,16 +839,16 @@ impl Scheduler { .map_err(StorageError::from) .and(write_result) { - // Write prepare failure typically means conflicting transactions are detected. Delivers the - // error to the callback, and releases the latches. + // Write prepare failure typically means conflicting transactions are detected. Delivers + // the error to the callback, and releases the latches. Err(err) => { SCHED_STAGE_COUNTER_VEC.get(tag).prepare_write_err.inc(); debug!("write command failed"; "cid" => cid, "err" => ?err); scheduler.finish_with_err(cid, err); return; } - // Initiates an async write operation on the storage engine, there'll be a `WriteFinished` - // message when it finishes. + // Initiates an async write operation on the storage engine, there'll be a + // `WriteFinished` message when it finishes. Ok(res) => res, }; let region_id = ctx.get_region_id(); @@ -963,10 +969,10 @@ impl Scheduler { let _ = self.inner.flow_controller.consume(region_id, write_size); } else { let start = Instant::now_coarse(); - // Control mutex is used to ensure there is only one request consuming the quota. - // The delay may exceed 1s, and the speed limit is changed every second. - // If the speed of next second is larger than the one of first second, - // without the mutex, the write flow can't throttled strictly. + // Control mutex is used to ensure there is only one request consuming the + // quota. The delay may exceed 1s, and the speed limit is changed every second. + // If the speed of next second is larger than the one of first second, without + // the mutex, the write flow can't throttled strictly. let control_mutex = self.inner.control_mutex.clone(); let _guard = control_mutex.lock().await; let delay = self.inner.flow_controller.consume(region_id, write_size); @@ -994,8 +1000,9 @@ impl Scheduler { let (version, term) = (ctx.get_region_epoch().get_version(), ctx.get_term()); // Mutations on the lock CF should overwrite the memory locks. - // We only set a deleted flag here, and the lock will be finally removed when it finishes - // applying. See the comments in `PeerPessimisticLocks` for how this flag is used. + // We only set a deleted flag here, and the lock will be finally removed when it + // finishes applying. See the comments in `PeerPessimisticLocks` for how this + // flag is used. let txn_ext2 = txn_ext.clone(); let mut pessimistic_locks_guard = txn_ext2 .as_ref() @@ -1021,29 +1028,33 @@ impl Scheduler { } _ => vec![], }; - // Keep the read lock guard of the pessimistic lock table until the request is sent to the raftstore. + // Keep the read lock guard of the pessimistic lock table until the request is + // sent to the raftstore. // - // If some in-memory pessimistic locks need to be proposed, we will propose another TransferLeader - // command. Then, we can guarentee even if the proposed locks don't include the locks deleted here, - // the response message of the transfer leader command must be later than this write command because - // this write command has been sent to the raftstore. Then, we don't need to worry this request will - // fail due to the voluntary leader transfer. + // If some in-memory pessimistic locks need to be proposed, we will propose + // another TransferLeader command. Then, we can guarentee even if the proposed + // locks don't include the locks deleted here, the response message of the + // transfer leader command must be later than this write command because this + // write command has been sent to the raftstore. Then, we don't need to worry + // this request will fail due to the voluntary leader transfer. let _downgraded_guard = pessimistic_locks_guard.and_then(|guard| { (!removed_pessimistic_locks.is_empty()).then(|| RwLockWriteGuard::downgrade(guard)) }); - // The callback to receive async results of write prepare from the storage engine. + // The callback to receive async results of write prepare from the storage + // engine. let engine_cb = Box::new(move |result: EngineResult<()>| { let ok = result.is_ok(); if ok && !removed_pessimistic_locks.is_empty() { - // Removing pessimistic locks when it succeeds to apply. This should be done in the apply - // thread, to make sure it happens before other admin commands are executed. + // Removing pessimistic locks when it succeeds to apply. This should be done in + // the apply thread, to make sure it happens before other admin commands are + // executed. if let Some(mut pessimistic_locks) = txn_ext .as_ref() .map(|txn_ext| txn_ext.pessimistic_locks.write()) { - // If epoch version or term does not match, region or leader change has happened, - // so we needn't remove the key. + // If epoch version or term does not match, region or leader change has + // happened, so we needn't remove the key. if pessimistic_locks.term == term && pessimistic_locks.version == version { for key in removed_pessimistic_locks { pessimistic_locks.remove(&key); @@ -1070,8 +1081,9 @@ impl Scheduler { .observe(rows as f64); if !ok { - // Only consume the quota when write succeeds, otherwise failed write requests may exhaust - // the quota and other write requests would be in long delay. + // Only consume the quota when write succeeds, otherwise failed write + // requests may exhaust the quota and other write requests would be in long + // delay. if sched.inner.flow_controller.enabled() { sched.inner.flow_controller.unconsume(region_id, write_size); } @@ -1095,7 +1107,8 @@ impl Scheduler { } } - /// Returns whether it succeeds to write pessimistic locks to the in-memory lock table. + /// Returns whether it succeeds to write pessimistic locks to the in-memory + /// lock table. fn try_write_in_memory_pessimistic_locks( &self, txn_ext: Option<&TxnExt>, @@ -1107,10 +1120,11 @@ impl Scheduler { None => return false, }; let mut pessimistic_locks = txn_ext.pessimistic_locks.write(); - // When not writable, it only means we cannot write locks to the in-memory lock table, - // but it is still possible for the region to propose request. - // When term or epoch version has changed, the request must fail. To be simple, here we just - // let the request fallback to propose and let raftstore generate an appropriate error. + // When not writable, it only means we cannot write locks to the in-memory lock + // table, but it is still possible for the region to propose request. + // When term or epoch version has changed, the request must fail. To be simple, + // here we just let the request fallback to propose and let raftstore generate + // an appropriate error. if !pessimistic_locks.is_writable() || pessimistic_locks.term != context.get_term() || pessimistic_locks.version != context.get_region_epoch().get_version() @@ -1550,7 +1564,8 @@ mod tests { // time limit is 100ms. thread::sleep(Duration::from_millis(200)); - // When releasing the lock, the queuing tasks should be all waken up without stack overflow. + // When releasing the lock, the queuing tasks should be all waken up without + // stack overflow. scheduler.release_lock(&lock, cid); // A new request should not be blocked. diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 59f9f077aa2..2cd4afaf932 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -20,7 +20,8 @@ pub trait Store: Send { /// Fetch the provided key. fn get(&self, key: &Key, statistics: &mut Statistics) -> Result>; - /// Re-use last cursor to incrementally (if possible) fetch the provided key. + /// Re-use last cursor to incrementally (if possible) fetch the provided + /// key. fn incremental_get(&mut self, key: &Key) -> Result>; /// Take the statistics. Currently only available for `incremental_get`. @@ -49,13 +50,15 @@ pub trait Store: Send { /// [`Scanner`]s allow retrieving items or batches from a scan result. /// -/// Commonly they are obtained as a result of a [`scanner`](Store::scanner) operation. +/// Commonly they are obtained as a result of a [`scanner`](Store::scanner) +/// operation. pub trait Scanner: Send { /// Get the next [`KvPair`](KvPair) if it exists. fn next(&mut self) -> Result>; /// Get the next [`KvPair`](KvPair)s up to `limit` if they exist. - /// If `sample_step` is greater than 0, skips `sample_step - 1` number of keys after each returned key. + /// If `sample_step` is greater than 0, skips `sample_step - 1` number of + /// keys after each returned key. fn scan(&mut self, limit: usize, sample_step: usize) -> Result>> { let mut row_count = 0; let mut results = Vec::with_capacity(limit); @@ -1162,12 +1165,14 @@ mod tests { Some((Key::from_raw(b"z"), b"beta".to_vec())) ); assert!(scanner.next().is_err()); - // note: mvcc impl does not guarantee to work any more after meeting a non lock error + // note: mvcc impl does not guarantee to work any more after meeting a non lock + // error assert_eq!(scanner.next().unwrap(), None); let mut scanner = store.scanner(true, false, false, None, None).unwrap(); assert!(scanner.next().is_err()); - // note: mvcc impl does not guarantee to work any more after meeting a non lock error + // note: mvcc impl does not guarantee to work any more after meeting a non lock + // error assert_eq!( scanner.next().unwrap(), Some((Key::from_raw(b"z"), b"beta".to_vec())) @@ -1224,7 +1229,8 @@ mod tests { ); assert_eq!(scanner.next().unwrap(), Some((Key::from_raw(b"z"), vec![]))); assert!(scanner.next().is_err()); - // note: mvcc impl does not guarantee to work any more after meeting a non lock error + // note: mvcc impl does not guarantee to work any more after meeting a non lock + // error assert_eq!(scanner.next().unwrap(), None); let mut scanner = store diff --git a/src/storage/types.rs b/src/storage/types.rs index fe4319da97c..70cd7d2d991 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -123,11 +123,13 @@ pub struct PrewriteResult { #[derive(Clone, Debug, PartialEq)] pub enum PessimisticLockRes { - /// The previous value is loaded while handling the `AcquirePessimisticLock` command. The i-th - /// item is the value of the i-th key in the `AcquirePessimisticLock` command. + /// The previous value is loaded while handling the `AcquirePessimisticLock` + /// command. The i-th item is the value of the i-th key in the + /// `AcquirePessimisticLock` command. Values(Vec>), - /// Checked whether the key exists while handling the `AcquirePessimisticLock` command. The i-th - /// item is true if the i-th key in the `AcquirePessimisticLock` command exists. + /// Checked whether the key exists while handling the + /// `AcquirePessimisticLock` command. The i-th item is true if the i-th key + /// in the `AcquirePessimisticLock` command exists. Existence(Vec), Empty, } diff --git a/tests/benches/coprocessor_executors/hash_aggr/mod.rs b/tests/benches/coprocessor_executors/hash_aggr/mod.rs index f7893e66bdc..07f28c22d63 100644 --- a/tests/benches/coprocessor_executors/hash_aggr/mod.rs +++ b/tests/benches/coprocessor_executors/hash_aggr/mod.rs @@ -40,8 +40,8 @@ fn bench_hash_aggr_count_1_group_by_int_col_2_groups( } /// COUNT(1) GROUP BY COL > X. -/// Half of the row belong to one group and the rest belong to another group. Thus there are -/// totally two groups. +/// Half of the row belong to one group and the rest belong to another group. +/// Thus there are totally two groups. fn bench_hash_aggr_count_1_group_by_fn_2_groups( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -94,8 +94,8 @@ fn bench_hash_aggr_count_1_group_by_decimal_col_2_groups( input.bencher.bench(b, &fb, &group_by, &[expr]); } -/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real column. -/// Each row is a new group. +/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real +/// column. Each row is a new group. fn bench_hash_aggr_count_1_group_by_int_col_real_col( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -115,8 +115,8 @@ fn bench_hash_aggr_count_1_group_by_int_col_real_col( input.bencher.bench(b, &fb, &group_by, &[expr]); } -/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real column. -/// There will be two groups totally. +/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real +/// column. There will be two groups totally. fn bench_hash_aggr_count_1_group_by_int_col_real_col_2_groups( b: &mut criterion::Bencher<'_, M>, input: &Input, diff --git a/tests/benches/coprocessor_executors/hash_aggr/util.rs b/tests/benches/coprocessor_executors/hash_aggr/util.rs index efa92ebf0cb..b799276b193 100644 --- a/tests/benches/coprocessor_executors/hash_aggr/util.rs +++ b/tests/benches/coprocessor_executors/hash_aggr/util.rs @@ -39,8 +39,8 @@ where } } -/// A bencher that will use batch hash aggregation executor to bench the giving aggregate -/// expression. +/// A bencher that will use batch hash aggregation executor to bench the giving +/// aggregate expression. pub struct BatchBencher; impl HashAggrBencher for BatchBencher diff --git a/tests/benches/coprocessor_executors/index_scan/fixture.rs b/tests/benches/coprocessor_executors/index_scan/fixture.rs index 286a2a22e1e..20ee6d41369 100644 --- a/tests/benches/coprocessor_executors/index_scan/fixture.rs +++ b/tests/benches/coprocessor_executors/index_scan/fixture.rs @@ -3,8 +3,8 @@ use test_coprocessor::*; use tikv::storage::RocksEngine; -/// Builds a fixture table, which contains two columns: id, foo and there is an index over -/// `foo` column. +/// Builds a fixture table, which contains two columns: id, foo and there is an +/// index over `foo` column. pub fn table_with_2_columns_and_one_index(rows: usize) -> (i64, Table, Store) { let index_id = next_id(); let id = ColumnBuilder::new() diff --git a/tests/benches/coprocessor_executors/index_scan/mod.rs b/tests/benches/coprocessor_executors/index_scan/mod.rs index 93a9cd4a3fb..ba29f08bb87 100644 --- a/tests/benches/coprocessor_executors/index_scan/mod.rs +++ b/tests/benches/coprocessor_executors/index_scan/mod.rs @@ -11,8 +11,8 @@ const ROWS: usize = 5000; /// 1 interested column, which is PK (which is in the key). /// -/// This kind of scanner is used in SQLs like `SELECT * FROM .. WHERE index = X`, an index lookup -/// will be performed so that PK is needed. +/// This kind of scanner is used in SQLs like `SELECT * FROM .. WHERE index = +/// X`, an index lookup will be performed so that PK is needed. fn bench_index_scan_primary_key(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement + 'static, @@ -27,10 +27,12 @@ where ); } -/// 1 interested column, which is the column of the index itself (which is in the key). +/// 1 interested column, which is the column of the index itself (which is in +/// the key). /// -/// This kind of scanner is used in SQLs like `SELECT COUNT(*) FROM .. WHERE index = X` or -/// `SELECT index FROM .. WHERE index = X`. There is no double read. +/// This kind of scanner is used in SQLs like `SELECT COUNT(*) FROM .. WHERE +/// index = X` or `SELECT index FROM .. WHERE index = X`. There is no double +/// read. fn bench_index_scan_index(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement + 'static, diff --git a/tests/benches/coprocessor_executors/integrated/mod.rs b/tests/benches/coprocessor_executors/integrated/mod.rs index e3e64709625..cb7e48f3bd7 100644 --- a/tests/benches/coprocessor_executors/integrated/mod.rs +++ b/tests/benches/coprocessor_executors/integrated/mod.rs @@ -19,7 +19,8 @@ where { let (table, store) = crate::table_scan::fixture::table_with_2_columns(input.rows); - // TODO: Change to use `DAGSelect` helper when it no longer place unnecessary columns. + // TODO: Change to use `DAGSelect` helper when it no longer place unnecessary + // columns. let executors = &[ table_scan(&[table["id"].as_column_info()]), simple_aggregate(&[ @@ -260,7 +261,8 @@ fn bench_select_count_1_group_by_int_col_group_few_stream( bench_select_count_1_group_by_int_col_stream_impl(table, store, b, input); } -/// SELECT COUNT(1) FROM Table GROUP BY int_col (n groups, n = row_count, stream aggregation) +/// SELECT COUNT(1) FROM Table GROUP BY int_col (n groups, n = row_count, stream +/// aggregation) fn bench_select_count_1_group_by_int_col_group_many_stream( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -365,7 +367,8 @@ fn bench_select_count_1_group_by_2_col_group_few( bench_select_count_1_group_by_2_col_impl(table, store, b, input); } -/// SELECT COUNT(1) FROM Table GROUP BY int_col, int_col + 1 (n groups, n = row_count) +/// SELECT COUNT(1) FROM Table GROUP BY int_col, int_col + 1 (n groups, n = +/// row_count) fn bench_select_count_1_group_by_2_col_group_many( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -407,7 +410,8 @@ fn bench_select_count_1_group_by_2_col_stream_impl( .bench(b, executors, &[table.get_record_range_all()], &store); } -/// SELECT COUNT(1) FROM Table GROUP BY int_col, int_col + 1 (2 groups, stream aggregation) +/// SELECT COUNT(1) FROM Table GROUP BY int_col, int_col + 1 (2 groups, stream +/// aggregation) fn bench_select_count_1_group_by_2_col_group_few_stream( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -418,7 +422,8 @@ fn bench_select_count_1_group_by_2_col_group_few_stream( bench_select_count_1_group_by_2_col_stream_impl(table, store, b, input); } -/// SELECT COUNT(1) FROM Table GROUP BY int_col, int_col + 1 (n groups, n = row_count, stream aggregation) +/// SELECT COUNT(1) FROM Table GROUP BY int_col, int_col + 1 (n groups, n = +/// row_count, stream aggregation) fn bench_select_count_1_group_by_2_col_group_many_stream( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -429,7 +434,8 @@ fn bench_select_count_1_group_by_2_col_group_many_stream( bench_select_count_1_group_by_2_col_stream_impl(table, store, b, input); } -/// SELECT COUNT(1) FROM Table WHERE id > X GROUP BY int_col (2 groups, selectivity = 5%) +/// SELECT COUNT(1) FROM Table WHERE id > X GROUP BY int_col (2 groups, +/// selectivity = 5%) fn bench_select_count_1_where_fn_group_by_int_col_group_few_sel_l( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -531,7 +537,8 @@ fn bench_select_order_by_3_col_impl( .bench(b, executors, &[table.get_record_range_all()], &store); } -/// SELECT id, col1, col2 FROM Table ORDER BY isnull(col1), col1, col2 DESC LIMIT 10 +/// SELECT id, col1, col2 FROM Table ORDER BY isnull(col1), col1, col2 DESC +/// LIMIT 10 fn bench_select_order_by_3_col_limit_small(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -539,7 +546,8 @@ where bench_select_order_by_3_col_impl(10, b, input); } -/// SELECT id, col1, col2 FROM Table ORDER BY isnull(col1), col1, col2 DESC LIMIT 4000 +/// SELECT id, col1, col2 FROM Table ORDER BY isnull(col1), col1, col2 DESC +/// LIMIT 4000 fn bench_select_order_by_3_col_limit_large(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -591,8 +599,8 @@ fn bench_select_where_fn_order_by_3_col_impl( .bench(b, executors, &[table.get_record_range_all()], &store); } -/// SELECT id, col1, col2 FROM Table WHERE id > X ORDER BY isnull(col1), col1, col2 DESC LIMIT 10 -/// (selectivity = 0%) +/// SELECT id, col1, col2 FROM Table WHERE id > X ORDER BY isnull(col1), col1, +/// col2 DESC LIMIT 10 (selectivity = 0%) fn bench_select_where_fn_order_by_3_col_limit_small( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -602,8 +610,8 @@ fn bench_select_where_fn_order_by_3_col_limit_small( bench_select_where_fn_order_by_3_col_impl(10, b, input); } -/// SELECT id, col1, col2 FROM Table WHERE id > X ORDER BY isnull(col1), col1, col2 DESC LIMIT 4000 -/// (selectivity = 0%) +/// SELECT id, col1, col2 FROM Table WHERE id > X ORDER BY isnull(col1), col1, +/// col2 DESC LIMIT 4000 (selectivity = 0%) fn bench_select_where_fn_order_by_3_col_limit_large( b: &mut criterion::Bencher<'_, M>, input: &Input, diff --git a/tests/benches/coprocessor_executors/selection/util.rs b/tests/benches/coprocessor_executors/selection/util.rs index ef2548a3c42..85e39f49cfe 100644 --- a/tests/benches/coprocessor_executors/selection/util.rs +++ b/tests/benches/coprocessor_executors/selection/util.rs @@ -31,7 +31,8 @@ where } } -/// A bencher that will use batch selection aggregation executor to bench the giving expressions. +/// A bencher that will use batch selection aggregation executor to bench the +/// giving expressions. pub struct BatchBencher; impl SelectionBencher for BatchBencher diff --git a/tests/benches/coprocessor_executors/simple_aggr/util.rs b/tests/benches/coprocessor_executors/simple_aggr/util.rs index e3cbe14dd37..e13d1be503f 100644 --- a/tests/benches/coprocessor_executors/simple_aggr/util.rs +++ b/tests/benches/coprocessor_executors/simple_aggr/util.rs @@ -31,8 +31,8 @@ where } } -/// A bencher that will use batch simple aggregation executor to bench the giving aggregate -/// expression. +/// A bencher that will use batch simple aggregation executor to bench the +/// giving aggregate expression. pub struct BatchBencher; impl SimpleAggrBencher for BatchBencher diff --git a/tests/benches/coprocessor_executors/stream_aggr/mod.rs b/tests/benches/coprocessor_executors/stream_aggr/mod.rs index 9f0f3a34e66..fa82fa620a7 100644 --- a/tests/benches/coprocessor_executors/stream_aggr/mod.rs +++ b/tests/benches/coprocessor_executors/stream_aggr/mod.rs @@ -74,8 +74,8 @@ fn bench_stream_aggr_count_1_group_by_decimal_col_2_groups( input.bencher.bench(b, &fb, &group_by, &[expr]); } -/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real column. -/// Each row is a new group. +/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real +/// column. Each row is a new group. fn bench_stream_aggr_count_1_group_by_int_col_real_col( b: &mut criterion::Bencher<'_, M>, input: &Input, @@ -95,8 +95,8 @@ fn bench_stream_aggr_count_1_group_by_int_col_real_col( input.bencher.bench(b, &fb, &group_by, &[expr]); } -/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real column. -/// There will be two groups totally. +/// COUNT(1) GROUP BY COL1, COL2 where COL1 is a int column and COL2 is a real +/// column. There will be two groups totally. fn bench_stream_aggr_count_1_group_by_int_col_real_col_2_groups( b: &mut criterion::Bencher<'_, M>, input: &Input, diff --git a/tests/benches/coprocessor_executors/stream_aggr/util.rs b/tests/benches/coprocessor_executors/stream_aggr/util.rs index b31a220b837..cba952150bb 100644 --- a/tests/benches/coprocessor_executors/stream_aggr/util.rs +++ b/tests/benches/coprocessor_executors/stream_aggr/util.rs @@ -37,8 +37,8 @@ where } } -/// A bencher that will use batch stream aggregation executor to bench the giving aggregate -/// expression. +/// A bencher that will use batch stream aggregation executor to bench the +/// giving aggregate expression. pub struct BatchBencher; impl StreamAggrBencher for BatchBencher diff --git a/tests/benches/coprocessor_executors/table_scan/fixture.rs b/tests/benches/coprocessor_executors/table_scan/fixture.rs index 8005f6fab8a..7e3dd2bfc32 100644 --- a/tests/benches/coprocessor_executors/table_scan/fixture.rs +++ b/tests/benches/coprocessor_executors/table_scan/fixture.rs @@ -23,7 +23,8 @@ pub fn table_with_2_columns(rows: usize) -> (Table, Store) { (table, store) } -/// Builds a fixture table, which contains specified number of columns: col0, col1, col2, ... +/// Builds a fixture table, which contains specified number of columns: col0, +/// col1, col2, ... pub fn table_with_multi_columns(rows: usize, columns: usize) -> (Table, Store) { let mut table = TableBuilder::new(); for idx in 0..columns { @@ -44,8 +45,8 @@ pub fn table_with_multi_columns(rows: usize, columns: usize) -> (Table, Store (Table, Store) { let mut table = TableBuilder::new(); for idx in 0..columns { @@ -67,7 +68,8 @@ pub fn table_with_missing_column(rows: usize, columns: usize) -> (Table, Store (Table, Store) { let id = ColumnBuilder::new() .col_type(TYPE_LONG) diff --git a/tests/benches/coprocessor_executors/table_scan/mod.rs b/tests/benches/coprocessor_executors/table_scan/mod.rs index 288374ae741..b030a236cbd 100644 --- a/tests/benches/coprocessor_executors/table_scan/mod.rs +++ b/tests/benches/coprocessor_executors/table_scan/mod.rs @@ -26,7 +26,8 @@ where ); } -/// 1 interested column, at the front of each row. Each row contains 100 columns. +/// 1 interested column, at the front of each row. Each row contains 100 +/// columns. /// /// This kind of scanner is used in SQLs like `SELECT COUNT(column)`. fn bench_table_scan_datum_front(b: &mut criterion::Bencher<'_, M>, input: &Input) @@ -43,7 +44,8 @@ where ); } -/// 2 interested columns, at the front of each row. Each row contains 100 columns. +/// 2 interested columns, at the front of each row. Each row contains 100 +/// columns. fn bench_table_scan_datum_multi_front(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -76,8 +78,8 @@ where ); } -/// 100 interested columns, all columns in the row are interested (i.e. there are totally 100 -/// columns in the row). +/// 100 interested columns, all columns in the row are interested (i.e. there +/// are totally 100 columns in the row). fn bench_table_scan_datum_all(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -92,7 +94,8 @@ where ); } -/// 3 columns in the row and the last column is very long but only PK is interested. +/// 3 columns in the row and the last column is very long but only PK is +/// interested. fn bench_table_scan_long_datum_primary_key(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -107,7 +110,8 @@ where ); } -/// 3 columns in the row and the last column is very long but a short column is interested. +/// 3 columns in the row and the last column is very long but a short column is +/// interested. fn bench_table_scan_long_datum_normal(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -122,7 +126,8 @@ where ); } -/// 3 columns in the row and the last column is very long and the long column is interested. +/// 3 columns in the row and the last column is very long and the long column is +/// interested. fn bench_table_scan_long_datum_long(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -137,7 +142,8 @@ where ); } -/// 3 columns in the row and the last column is very long and the all columns are interested. +/// 3 columns in the row and the last column is very long and the all columns +/// are interested. fn bench_table_scan_long_datum_all(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -156,8 +162,8 @@ where ); } -/// 1 interested column, but the column is missing from each row (i.e. it's default value is -/// used instead). Each row contains totally 10 columns. +/// 1 interested column, but the column is missing from each row (i.e. it's +/// default value is used instead). Each row contains totally 10 columns. fn bench_table_scan_datum_absent(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, @@ -172,8 +178,8 @@ where ); } -/// 1 interested column, but the column is missing from each row (i.e. it's default value is -/// used instead). Each row contains totally 100 columns. +/// 1 interested column, but the column is missing from each row (i.e. it's +/// default value is used instead). Each row contains totally 100 columns. fn bench_table_scan_datum_absent_large_row(b: &mut criterion::Bencher<'_, M>, input: &Input) where M: Measurement, diff --git a/tests/benches/coprocessor_executors/util/fixture.rs b/tests/benches/coprocessor_executors/util/fixture.rs index 0836be732f7..5910ab4fc69 100644 --- a/tests/benches/coprocessor_executors/util/fixture.rs +++ b/tests/benches/coprocessor_executors/util/fixture.rs @@ -65,7 +65,8 @@ impl FixtureBuilder { self } - /// Pushes a i64 column that values are randomly sampled from the giving values. + /// Pushes a i64 column that values are randomly sampled from the giving + /// values. pub fn push_column_i64_sampled(mut self, samples: &[i64]) -> Self { let mut rng: XorShiftRng = SeedableRng::seed_from_u64(SEED_1); let mut col = Vec::with_capacity(self.rows); @@ -77,10 +78,12 @@ impl FixtureBuilder { self } - /// Pushes a i64 column that values are filled according to the given values in order. + /// Pushes a i64 column that values are filled according to the given values + /// in order. /// - /// For example, if 3 values `[a, b, c]` are given, then the first 1/3 values in the column are - /// `a`, the second 1/3 values are `b` and the last 1/3 values are `c`. + /// For example, if 3 values `[a, b, c]` are given, then the first 1/3 + /// values in the column are `a`, the second 1/3 values are `b` and the + /// last 1/3 values are `c`. pub fn push_column_i64_ordered(mut self, samples: &[i64]) -> Self { let mut col = Vec::with_capacity(self.rows); for i in 0..self.rows { @@ -117,7 +120,8 @@ impl FixtureBuilder { self } - /// Pushes a f64 column that values are randomly sampled from the giving values. + /// Pushes a f64 column that values are randomly sampled from the giving + /// values. pub fn push_column_f64_sampled(mut self, samples: &[f64]) -> Self { let mut rng: XorShiftRng = SeedableRng::seed_from_u64(SEED_1); let mut col = Vec::with_capacity(self.rows); @@ -129,10 +133,12 @@ impl FixtureBuilder { self } - /// Pushes a f64 column that values are filled according to the given values in order. + /// Pushes a f64 column that values are filled according to the given values + /// in order. /// - /// For example, if 3 values `[a, b, c]` are given, then the first 1/3 values in the column are - /// `a`, the second 1/3 values are `b` and the last 1/3 values are `c`. + /// For example, if 3 values `[a, b, c]` are given, then the first 1/3 + /// values in the column are `a`, the second 1/3 values are `b` and the + /// last 1/3 values are `c`. pub fn push_column_f64_ordered(mut self, samples: &[f64]) -> Self { let mut col = Vec::with_capacity(self.rows); for i in 0..self.rows { @@ -157,7 +163,8 @@ impl FixtureBuilder { /// Pushes a decimal column that values are randomly generated. /// - /// Generated decimals have 1 to 30 integer digits and 1 to 20 fractional digits. + /// Generated decimals have 1 to 30 integer digits and 1 to 20 fractional + /// digits. pub fn push_column_decimal_random(mut self) -> Self { let mut rng: XorShiftRng = SeedableRng::seed_from_u64(SEED_2); let mut col = Vec::with_capacity(self.rows); @@ -180,7 +187,8 @@ impl FixtureBuilder { self } - /// Pushes a decimal column that values are randomly sampled from the giving values. + /// Pushes a decimal column that values are randomly sampled from the giving + /// values. pub fn push_column_decimal_sampled(mut self, samples: &[&str]) -> Self { let mut rng: XorShiftRng = SeedableRng::seed_from_u64(SEED_2); let mut col = Vec::with_capacity(self.rows); @@ -193,10 +201,12 @@ impl FixtureBuilder { self } - /// Pushes a decimal column that values are filled according to the given values in order. + /// Pushes a decimal column that values are filled according to the given + /// values in order. /// - /// For example, if 3 values `[a, b, c]` are given, then the first 1/3 values in the column are - /// `a`, the second 1/3 values are `b` and the last 1/3 values are `c`. + /// For example, if 3 values `[a, b, c]` are given, then the first 1/3 + /// values in the column are `a`, the second 1/3 values are `b` and the + /// last 1/3 values are `c`. pub fn push_column_decimal_ordered(mut self, samples: &[&str]) -> Self { let mut col = Vec::with_capacity(self.rows); for i in 0..self.rows { @@ -209,8 +219,8 @@ impl FixtureBuilder { self } - /// Pushes a bytes column that values are randomly generated and each value has the same length - /// as specified. + /// Pushes a bytes column that values are randomly generated and each value + /// has the same length as specified. pub fn push_column_bytes_random_fixed_len(mut self, len: usize) -> Self { let mut rng: XorShiftRng = SeedableRng::seed_from_u64(SEED_3); let mut col = Vec::with_capacity(self.rows); @@ -327,8 +337,8 @@ impl BatchExecutor for BatchFixtureExecutor { } } -/// Benches the performance of the batch fixture executor itself. When using it as the source -/// executor in other benchmarks, we need to take out these costs. +/// Benches the performance of the batch fixture executor itself. When using it +/// as the source executor in other benchmarks, we need to take out these costs. fn bench_util_batch_fixture_executor_next_1024(b: &mut criterion::Bencher<'_, M>) where M: Measurement, diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index f0a64a7e5dd..5ef442a25cd 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -20,8 +20,8 @@ use tipb::Executor as PbExecutor; pub use self::fixture::FixtureBuilder; -/// Gets the value of `TIKV_BENCH_LEVEL`. The larger value it is, the more comprehensive benchmarks -/// will be. +/// Gets the value of `TIKV_BENCH_LEVEL`. The larger value it is, the more +/// comprehensive benchmarks will be. pub fn bench_level() -> usize { if let Ok(s) = std::env::var("TIKV_BENCH_LEVEL") { s.parse::().unwrap() diff --git a/tests/benches/coprocessor_executors/util/store.rs b/tests/benches/coprocessor_executors/util/store.rs index 057bb2133b4..134b0e1e8d2 100644 --- a/tests/benches/coprocessor_executors/util/store.rs +++ b/tests/benches/coprocessor_executors/util/store.rs @@ -10,7 +10,8 @@ use tikv::storage::{ /// `MemStore` is a store provider that operates directly over a BTreeMap. pub type MemStore = FixtureStore; -/// `RocksStore` is a store provider that operates over a disk-based RocksDB storage. +/// `RocksStore` is a store provider that operates over a disk-based RocksDB +/// storage. pub type RocksStore = SnapshotStore>; pub trait StoreDescriber { diff --git a/tests/benches/hierarchy/engine/mod.rs b/tests/benches/hierarchy/engine/mod.rs index f248882a74e..85e6ce77e33 100644 --- a/tests/benches/hierarchy/engine/mod.rs +++ b/tests/benches/hierarchy/engine/mod.rs @@ -48,7 +48,7 @@ fn bench_engine_snapshot>( }); } -//exclude snapshot +// exclude snapshot fn bench_engine_get>( bencher: &mut Bencher<'_>, config: &BenchConfig, diff --git a/tests/benches/misc/storage/incremental_get.rs b/tests/benches/misc/storage/incremental_get.rs index 5c7b8e837a9..eb65f55fd72 100644 --- a/tests/benches/misc/storage/incremental_get.rs +++ b/tests/benches/misc/storage/incremental_get.rs @@ -47,8 +47,8 @@ fn table_lookup_gen_data() -> (SnapshotStore>, Vec) { false, ); - // Keys are given in order, and are far away from each other to simulate a normal table lookup - // scenario. + // Keys are given in order, and are far away from each other to simulate a + // normal table lookup scenario. let mut get_keys = Vec::new(); for i in (0..30000).step_by(30) { get_keys.push(Key::from_raw(&table::encode_row_key(5, i))); diff --git a/tests/failpoints/cases/test_async_fetch.rs b/tests/failpoints/cases/test_async_fetch.rs index 638888e83e2..78517dca8e3 100644 --- a/tests/failpoints/cases/test_async_fetch.rs +++ b/tests/failpoints/cases/test_async_fetch.rs @@ -103,7 +103,7 @@ fn test_node_async_fetch() { &cluster.engines, &before_states, 1, - false, /*must_compacted*/ + false, // must_compacted ) { return; @@ -113,7 +113,7 @@ fn test_node_async_fetch() { &cluster.engines, &before_states, 1, - true, /*must_compacted*/ + true, // must_compacted ); } @@ -256,7 +256,8 @@ fn test_node_compact_entry_cache() { // change one peer to learner cluster.pd_client.add_peer(1, new_learner_peer(5, 5)); - // cause log lag and pause async fetch to check if entry cache is reserved for the learner + // cause log lag and pause async fetch to check if entry cache is reserved for + // the learner for i in 1..6 { let k = i.to_string().into_bytes(); let v = k.clone(); diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 00b8cd286da..1068b35f8d5 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -362,8 +362,8 @@ fn test_reject_proposal_during_leader_transfer() { cluster.must_put(b"k", b"v"); cluster.transfer_leader(r, new_peer(2, 2)); - // The leader can't change to transferring state immediately due to pre-transfer-leader - // feature, so wait for a while. + // The leader can't change to transferring state immediately due to + // pre-transfer-leader feature, so wait for a while. sleep_ms(100); assert_ne!(cluster.leader_of_region(r).unwrap(), new_peer(2, 2)); @@ -441,7 +441,8 @@ fn test_not_invoke_committed_cb_when_fail_to_commit() { cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k", b"v"); - // Partiton the leader and followers to let the leader fails to commit the proposal. + // Partition the leader and followers to let the leader fails to commit the + // proposal. cluster.partition(vec![1], vec![2, 3]); let write_req = make_write_req(&mut cluster, b"k1"); let (cb, cb_receivers) = make_cb(&write_req); @@ -462,8 +463,8 @@ fn test_not_invoke_committed_cb_when_fail_to_commit() { * cluster.cfg.raft_store.raft_election_timeout_ticks as u32; std::thread::sleep(2 * election_timeout); - // Make sure a new leader is elected and will discard the previous proposal when partition is - // recovered. + // Make sure a new leader is elected and will discard the previous proposal when + // partition is recovered. cluster.must_put(b"k2", b"v"); cluster.clear_send_filters(); diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index ef85fde1886..70194b194ac 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -211,7 +211,8 @@ fn test_stale_peer_cache() { // 4. peer 1 sends a snapshot with latest configuration [1, 2, 3] to peer 3; // 5. peer 3 restores the snapshot into memory; // 6. then peer 3 calling `Raft::apply_conf_change` to add peer 4; -// 7. so the disk configuration `[1, 2, 3]` is different from memory configuration `[1, 2, 3, 4]`. +// 7. so the disk configuration `[1, 2, 3]` is different from memory +// configuration `[1, 2, 3, 4]`. #[test] fn test_redundant_conf_change_by_snapshot() { let mut cluster = new_node_cluster(0, 4); diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 60f45ae957a..818c7ba2739 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -35,7 +35,8 @@ fn test_deadline() { #[test] fn test_deadline_2() { - // It should not even take any snapshots when request is outdated from the beginning. + // It should not even take any snapshots when request is outdated from the + // beginning. let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); let req = DAGSelect::from(&product).build(); @@ -198,7 +199,8 @@ fn test_paging_scan() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); - // set batch size and grow size to 1, so that only 1 row will be scanned in each batch. + // set batch size and grow size to 1, so that only 1 row will be scanned in each + // batch. fail::cfg("copr_batch_initial_size", "return(1)").unwrap(); fail::cfg("copr_batch_grow_size", "return(1)").unwrap(); for desc in [false, true] { @@ -263,7 +265,8 @@ fn test_paging_scan_multi_ranges() { ]; let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); - // set batch size and grow size to 1, so that only 1 row will be scanned in each batch. + // set batch size and grow size to 1, so that only 1 row will be scanned in each + // batch. fail::cfg("copr_batch_initial_size", "return(1)").unwrap(); fail::cfg("copr_batch_grow_size", "return(1)").unwrap(); diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 5fb4ac7b1ca..be027ae7217 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -303,7 +303,8 @@ fn test_majority_disk_full() { let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); - // Proposals with special `DiskFullOpt`s can be accepted even if all peers are disk full. + // Proposals with special `DiskFullOpt`s can be accepted even if all peers are + // disk full. fail::cfg(get_fp(DiskUsage::AlmostFull, 1), "return").unwrap(); let reqs = vec![new_put_cmd(b"k3", b"v3")]; let put = new_request(1, epoch.clone(), reqs, false); @@ -313,8 +314,9 @@ fn test_majority_disk_full() { let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); assert!(!resp.get_header().has_error()); - // Reset disk full status for peer 2 and 3. 2 follower reads must success because the leader - // will continue to append entries to followers after the new disk usages are reported. + // Reset disk full status for peer 2 and 3. 2 follower reads must success + // because the leader will continue to append entries to followers after the + // new disk usages are reported. for i in 1..3 { fail::remove(get_fp(DiskUsage::AlmostFull, i + 1)); ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); @@ -327,8 +329,8 @@ fn test_majority_disk_full() { ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); } - // Proposals with special `DiskFullOpt`s will still be rejected if majority peers are already - // disk full. + // Proposals with special `DiskFullOpt`s will still be rejected if majority + // peers are already disk full. let reqs = vec![new_put_cmd(b"k3", b"v3")]; let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); @@ -346,8 +348,8 @@ fn test_majority_disk_full() { cluster.pd_client.must_remove_peer(1, new_peer(2, 2)); // After the last configuration change is applied, the raft group will be like - // `[(1, DiskUsage::AlmostFull), (3, DiskUsage::AlreadyFull)]`. So no more proposals - // should be allowed. + // `[(1, DiskUsage::AlmostFull), (3, DiskUsage::AlreadyFull)]`. So no more + // proposals should be allowed. let reqs = vec![new_put_cmd(b"k4", b"v4")]; let put = new_request(1, epoch, reqs, false); let mut opts = RaftCmdExtraOpts::default(); @@ -383,7 +385,8 @@ fn test_disk_full_followers_with_hibernate_regions() { fail::remove(get_fp(DiskUsage::AlmostFull, 2)); thread::sleep(tick_dur * 2); - // The leader should know peer 2's disk usage changes, because it's keeping to tick. + // The leader should know peer 2's disk usage changes, because it's keeping to + // tick. cluster.must_put(b"k2", b"v2"); must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } diff --git a/tests/failpoints/cases/test_early_apply.rs b/tests/failpoints/cases/test_early_apply.rs index b6ddf136a89..acac65cd397 100644 --- a/tests/failpoints/cases/test_early_apply.rs +++ b/tests/failpoints/cases/test_early_apply.rs @@ -82,16 +82,16 @@ fn test_multi_early_apply() { } /// Test if the commit state check of apply msg is ok. -/// In the previous implementation, the commit state check uses the state of last -/// committed entry and it relies on the guarantee that the commit index and term -/// of the last committed entry must be monotonically increasing even between restarting. -/// However, this guarantee can be broken by +/// In the previous implementation, the commit state check uses the state of +/// last committed entry and it relies on the guarantee that the commit index +/// and term of the last committed entry must be monotonically increasing even +/// between restarting. However, this guarantee can be broken by /// 1. memory limitation of fetching committed entries /// 2. batching apply msg -/// Now the commit state uses the minimum of persist index and commit index from the peer -/// to fix this issue. -/// For simplicity, this test uses region merge to ensure that the apply state will be written -/// to kv db before crash. +/// Now the commit state uses the minimum of persist index and commit index from +/// the peer to fix this issue. +/// For simplicity, this test uses region merge to ensure that the apply state +/// will be written to kv db before crash. #[test] fn test_early_apply_yield_followed_with_many_entries() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_encryption.rs b/tests/failpoints/cases/test_encryption.rs index c99674aae1e..ccb4d698e3f 100644 --- a/tests/failpoints/cases/test_encryption.rs +++ b/tests/failpoints/cases/test_encryption.rs @@ -10,13 +10,14 @@ fn test_file_dict_file_record_corrupted() { tempdir.path(), "test_file_dict_file_record_corrupted_1", true, - 10, /*file_rewrite_threshold*/ + 10, // file_rewrite_threshold ) .unwrap(); let info1 = create_file_info(1, EncryptionMethod::Aes256Ctr); let info2 = create_file_info(2, EncryptionMethod::Unknown); // 9 represents that the first 9 bytes will be discarded. - // Crc32 (4 bytes) + File name length (2 bytes) + FileInfo length (2 bytes) + Log type (1 bytes) + // Crc32 (4 bytes) + File name length (2 bytes) + FileInfo length (2 bytes) + + // Log type (1 bytes) fail::cfg("file_dict_log_append_incomplete", "return(9)").unwrap(); file_dict_file.insert("info1", &info1).unwrap(); fail::remove("file_dict_log_append_incomplete"); @@ -28,7 +29,7 @@ fn test_file_dict_file_record_corrupted() { tempdir.path(), "test_file_dict_file_record_corrupted_2", true, - 10, /*file_rewrite_threshold*/ + 10, // file_rewrite_threshold ) .unwrap(); let info1 = create_file_info(1, EncryptionMethod::Aes256Ctr); diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index 09308646421..c4e3e4dee71 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -28,12 +28,12 @@ use tikv::{ use tikv_util::HandyRwLock; use txn_types::{Key, TimeStamp}; -// In theory, raft can propose conf change as long as there is no pending one. Replicas -// don't apply logs synchronously, so it's possible the old leader is removed before the new -// leader applies all logs. -// In the current implementation, the new leader rejects conf change until it applies all logs. -// It guarantees the correctness of green GC. This test is to prevent breaking it in the -// future. +// In theory, raft can propose conf change as long as there is no pending one. +// Replicas don't apply logs synchronously, so it's possible the old leader is +// removed before the new leader applies all logs. +// In the current implementation, the new leader rejects conf change until it +// applies all logs. It guarantees the correctness of green GC. This test is to +// prevent breaking it in the future. #[test] fn test_collect_lock_from_stale_leader() { let mut cluster = new_server_cluster(0, 2); @@ -62,7 +62,8 @@ fn test_collect_lock_from_stale_leader() { ctx.set_peer(leader.clone()); ctx.set_region_epoch(cluster.get_region_epoch(region_id)); - // Pause the new peer applying so that when it becomes the leader, it doesn't apply all logs. + // Pause the new peer applying so that when it becomes the leader, it doesn't + // apply all logs. let new_leader_apply_fp = "on_handle_apply_1003"; fail::cfg(new_leader_apply_fp, "pause").unwrap(); must_kv_prewrite( @@ -73,7 +74,8 @@ fn test_collect_lock_from_stale_leader() { 10, ); - // Leader election only considers the progress of appending logs, so it can succeed. + // Leader election only considers the progress of appending logs, so it can + // succeed. cluster.must_transfer_leader(region_id, new_peer.clone()); // It shouldn't succeed in the current implementation. cluster.pd_client.remove_peer(region_id, leader.clone()); @@ -157,7 +159,8 @@ fn test_notify_observer_after_apply() { 10, ); }); - // We can use physical_scan_lock to get the lock because we notify the lock observer after writing data to the rocskdb. + // We can use physical_scan_lock to get the lock because we notify the lock + // observer after writing data to the rocskdb. let mut locks = vec![]; retry_until(|| { assert!(must_check_lock_observer(&client, max_ts, true).is_empty()); @@ -189,7 +192,8 @@ fn test_notify_observer_after_apply() { cluster .pd_client .must_add_peer(ctx.get_region_id(), new_peer(store_id, store_id)); - // We can use physical_scan_lock to get the lock because we notify the lock observer after writing data to the rocksdb. + // We can use physical_scan_lock to get the lock because we notify the lock + // observer after writing data to the rocksdb. let mut locks = vec![]; retry_until(|| { assert!(must_check_lock_observer(&replica_client, max_ts, true).is_empty()); @@ -213,13 +217,19 @@ fn test_notify_observer_after_apply() { ); } -// It may cause locks missing during green GC if the raftstore notifies the lock observer before writing data to the rocksdb: -// 1. Store-1 transfers a region to store-2 and store-2 is applying logs. -// 2. GC worker registers lock observer on store-2 after calling lock observer's callback and before finishing applying which means the lock won't be observed. -// 3. GC worker scans locks on each store independently. It's possible GC worker has scanned all locks on store-2 and hasn't scanned locks on store-1. -// 4. Store-2 applies all logs and removes the peer on store-1. -// 5. GC worker can't scan the lock on store-1 because the peer has been destroyed. -// 6. GC worker can't get the lock from store-2 because it can't observe the lock and has scanned it. +// It may cause locks missing during green GC if the raftstore notifies the lock +// observer before writing data to the rocksdb: +// - Store-1 transfers a region to store-2 and store-2 is applying logs. +// - GC worker registers lock observer on store-2 after calling lock observer's +// callback and before finishing applying which means the lock won't be +// observed. +// - GC worker scans locks on each store independently. It's possible GC worker +// has scanned all locks on store-2 and hasn't scanned locks on store-1. +// - Store-2 applies all logs and removes the peer on store-1. +// - GC worker can't scan the lock on store-1 because the peer has been +// destroyed. +// - GC worker can't get the lock from store-2 because it can't observe the lock +// and has scanned it. #[test] fn test_collect_applying_locks() { let mut cluster = new_server_cluster(0, 2); @@ -248,7 +258,8 @@ fn test_collect_applying_locks() { ctx.set_peer(leader.clone()); ctx.set_region_epoch(cluster.get_region_epoch(region_id)); - // Pause store-2 after calling observer callbacks and before writing to the rocksdb. + // Pause store-2 after calling observer callbacks and before writing to the + // rocksdb. let new_leader_apply_fp = "post_handle_apply_1003"; fail::cfg(new_leader_apply_fp, "pause").unwrap(); @@ -300,7 +311,8 @@ fn test_collect_applying_locks() { assert_eq!(locks[0].get_key(), b"k1"); } -// Test write CF's compaction filter can call `orphan_versions_handler` correctly. +// Test write CF's compaction filter can call `orphan_versions_handler` +// correctly. #[test] fn test_error_in_compaction_filter() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -333,8 +345,8 @@ fn test_error_in_compaction_filter() { fail::remove(fp); } -// Test GC worker can receive and handle orphan versions emit from write CF's compaction filter -// correctly. +// Test GC worker can receive and handle orphan versions emit from write CF's +// compaction filter correctly. #[test] fn test_orphan_versions_from_compaction_filter() { let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { @@ -390,8 +402,9 @@ fn test_orphan_versions_from_compaction_filter() { fail::remove(fp); } -// Call `start_auto_gc` like `cmd/src/server.rs` does. It will combine compaction filter and GC -// worker so that GC worker can help to process orphan versions on default CF. +// Call `start_auto_gc` like `cmd/src/server.rs` does. It will combine +// compaction filter and GC worker so that GC worker can help to process orphan +// versions on default CF. fn init_compaction_filter(cluster: &Cluster, store_id: u64) { #[derive(Clone)] struct MockSafePointProvider; diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 94721d0cef5..8ef0f08f19e 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -19,7 +19,8 @@ fn test_break_leadership_on_restart() { cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); cluster.cfg.raft_store.raft_heartbeat_ticks = 2; cluster.cfg.raft_store.raft_election_timeout_ticks = 10; - // So the random election timeout will always be 10, which makes the case more stable. + // So the random election timeout will always be 10, which makes the case more + // stable. cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; configure_for_hibernate(&mut cluster); @@ -38,8 +39,8 @@ fn test_break_leadership_on_restart() { // Peer 3 will: // 1. steps a heartbeat message from its leader and then ticks 1 time. - // 2. ticks a peer_stale_state_check, which will change state from Idle to PreChaos. - // 3. continues to tick until it hibernates totally. + // 2. ticks a peer_stale_state_check, which will change state from Idle to + // PreChaos. 3. continues to tick until it hibernates totally. let (tx, rx) = mpsc::sync_channel(128); fail::cfg_callback("on_raft_base_tick_idle", move || tx.send(0).unwrap()).unwrap(); let mut raft_msg = RaftMessage::default(); @@ -65,8 +66,8 @@ fn test_break_leadership_on_restart() { // Until here, peer 3 will be like `election_elapsed=3 && missing_ticks=6`. thread::sleep(Duration::from_millis(base_tick_ms * 10)); - // Restart the peer 2 and it will broadcast `MsgRequestPreVote` later, which will wake up - // peer 1 and 3. + // Restart the peer 2 and it will broadcast `MsgRequestPreVote` later, which + // will wake up peer 1 and 3. let (tx, rx) = mpsc::sync_channel(128); let filter = RegionPacketFilter::new(1, 3) .direction(Direction::Send) @@ -76,6 +77,7 @@ fn test_break_leadership_on_restart() { cluster.add_send_filter(CloneFilterFactory(filter)); cluster.run_node(2).unwrap(); - // Peer 3 shouldn't start a new election, otherwise the leader may step down incorrectly. + // Peer 3 shouldn't start a new election, otherwise the leader may step down + // incorrectly. assert!(rx.recv_timeout(Duration::from_secs(2)).is_err()); } diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index ec83d8eae75..3fdb464c718 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -128,8 +128,8 @@ fn test_ingest_reentrant() { let checksum2 = calc_crc32(save_path).unwrap(); // TODO: Remove this once write_global_seqno is deprecated. - // Checksums are the same since the global seqno in the SST file no longer gets updated with the - // default setting, which is write_global_seqno=false. + // Checksums are the same since the global seqno in the SST file no longer gets + // updated with the default setting, which is write_global_seqno=false. assert_eq!(checksum1, checksum2); // Do ingest again and it can be reentrant let resp = import.ingest(&ingest).unwrap(); @@ -155,12 +155,13 @@ fn test_ingest_key_manager_delete_file_failed() { let deregister_fp = "key_manager_fails_before_delete_file"; // the first delete is in check before ingest, the second is in ingest cleanup - // set the ingest clean up failed to trigger remove file but not remove key condition + // set the ingest clean up failed to trigger remove file but not remove key + // condition fail::cfg(deregister_fp, "1*off->1*return->off").unwrap(); - // Do an ingest and verify the result is correct. Though the ingest succeeded, the clone file is - // still in the key manager - //TODO: how to check the key manager contains the clone key + // Do an ingest and verify the result is correct. Though the ingest succeeded, + // the clone file is still in the key manager + // TODO: how to check the key manager contains the clone key let mut ingest = IngestRequest::default(); ingest.set_context(ctx.clone()); ingest.set_sst(meta.clone()); @@ -178,7 +179,8 @@ fn test_ingest_key_manager_delete_file_failed() { .get(&node_id) .unwrap() .get_path(&meta); - // wait up to 5 seconds to make sure raw uploaded file is deleted by the async clean up task. + // wait up to 5 seconds to make sure raw uploaded file is deleted by the async + // clean up task. for _ in 0..50 { if !save_path.as_path().exists() { break; @@ -187,7 +189,8 @@ fn test_ingest_key_manager_delete_file_failed() { } assert!(!save_path.as_path().exists()); - // Do upload and ingest again, though key manager contains this file, the ingest action should success. + // Do upload and ingest again, though key manager contains this file, the ingest + // action should success. upload_sst(&import, &meta, &data).unwrap(); let mut ingest = IngestRequest::default(); ingest.set_context(ctx); diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index bde6e8bb123..1f7e35b5691 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -17,7 +17,8 @@ fn test_batch_get_memory_lock() { fail::cfg("raftkv_async_snapshot_err", "return").unwrap(); let resp = client.kv_batch_get(&req).unwrap(); - // the injected error should be returned at both places for backward compatibility. + // the injected error should be returned at both places for backward + // compatibility. assert!(!resp.pairs[0].get_error().get_abort().is_empty()); assert!(!resp.get_error().get_abort().is_empty()); fail::remove("raftkv_async_snapshot_err"); @@ -34,7 +35,8 @@ fn test_kv_scan_memory_lock() { fail::cfg("raftkv_async_snapshot_err", "return").unwrap(); let resp = client.kv_scan(&req).unwrap(); - // the injected error should be returned at both places for backward compatibility. + // the injected error should be returned at both places for backward + // compatibility. assert!(!resp.pairs[0].get_error().get_abort().is_empty()); assert!(!resp.get_error().get_abort().is_empty()); fail::remove("raftkv_async_snapshot_err"); @@ -64,8 +66,8 @@ fn test_scan_lock_push_async_commit() { let k1 = b"k1"; let v1 = b"v1"; - // The following code simulates another case: prewrite is locking the memlock, and then - // another scan lock operation request meets the memlock. + // The following code simulates another case: prewrite is locking the memlock, + // and then another scan lock operation request meets the memlock. fail::cfg("before-set-lock-in-memory", "pause").unwrap(); let client1 = client.clone(); diff --git a/tests/failpoints/cases/test_memory_usage_limit.rs b/tests/failpoints/cases/test_memory_usage_limit.rs index 08c37fb330e..82aa9d5148d 100644 --- a/tests/failpoints/cases/test_memory_usage_limit.rs +++ b/tests/failpoints/cases/test_memory_usage_limit.rs @@ -13,7 +13,8 @@ use raftstore::store::MEMTRACE_ENTRY_CACHE; use test_raftstore::*; use tikv_util::config::ReadableDuration; -// Test even if memory usage reaches high water, committed entries can still get applied slowly. +// Test even if memory usage reaches high water, committed entries can still get +// applied slowly. #[test] fn test_memory_usage_reaches_high_water() { let mut cluster = new_node_cluster(0, 1); diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 5cb7c79011f..713ab4c5a5d 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -95,7 +95,8 @@ fn test_node_merge_rollback() { // Wait till rollback. cluster.must_put(b"k12", b"v12"); - // After premerge and rollback, conf_ver becomes 3 + 1 = 4, version becomes 4 + 2 = 6; + // After premerge and rollback, conf_ver becomes 3 + 1 = 4, version becomes 4 + + // 2 = 6; region.mut_region_epoch().set_conf_ver(4); region.mut_region_epoch().set_version(6); for i in 1..3 { @@ -195,7 +196,8 @@ fn test_node_merge_restart() { must_get_none(&cluster.get_engine(3), b"k3"); } -/// Test if merge is still working when restart a cluster during catching up logs for merge. +/// Test if merge is still working when restart a cluster during catching up +/// logs for merge. #[test] fn test_node_merge_catch_up_logs_restart() { let mut cluster = new_node_cluster(0, 3); @@ -340,8 +342,9 @@ fn test_node_merge_catch_up_logs_no_need() { // let source region not merged fail::cfg("before_handle_catch_up_logs_for_merge", "pause").unwrap(); fail::cfg("after_handle_catch_up_logs_for_merge", "pause").unwrap(); - // due to `before_handle_catch_up_logs_for_merge` failpoint, we already pass `apply_index < catch_up_logs.merge.get_commit()` - // so now can let apply index make progress. + // due to `before_handle_catch_up_logs_for_merge` failpoint, we already pass + // `apply_index < catch_up_logs.merge.get_commit()` so now can let apply + // index make progress. fail::remove("apply_after_prepare_merge"); // make sure all the logs are committed, including the compact command @@ -405,15 +408,15 @@ fn test_node_merge_recover_snapshot() { cluster.must_put(b"k40", b"v5"); } -// Test if a merge handled properly when there are two different snapshots of one region arrive -// in one raftstore tick. +// Test if a merge handled properly when there are two different snapshots of +// one region arrive in one raftstore tick. #[test] fn test_node_merge_multiple_snapshots_together() { test_node_merge_multiple_snapshots(true) } -// Test if a merge handled properly when there are two different snapshots of one region arrive -// in different raftstore tick. +// Test if a merge handled properly when there are two different snapshots of +// one region arrive in different raftstore tick. #[test] fn test_node_merge_multiple_snapshots_not_together() { test_node_merge_multiple_snapshots(false) @@ -471,7 +474,8 @@ fn test_node_merge_multiple_snapshots(together: bool) { .msg_type(MessageType::MsgAppend), )); - // Add a collect snapshot filter, it will delay snapshots until have collected multiple snapshots from different peers + // Add a collect snapshot filter, it will delay snapshots until have collected + // multiple snapshots from different peers cluster.sim.wl().add_recv_filter( 3, Box::new(LeadingDuplicatedSnapshotFilter::new( @@ -488,17 +492,20 @@ fn test_node_merge_multiple_snapshots(together: bool) { // Wait for snapshot to generate and send thread::sleep(Duration::from_millis(100)); - // Merge left and right region, due to isolation, the regions on store 3 are not merged yet. + // Merge left and right region, due to isolation, the regions on store 3 are not + // merged yet. pd_client.must_merge(left.get_id(), right.get_id()); thread::sleep(Duration::from_millis(200)); - // Let peer of right region on store 3 to make append response to trigger a new snapshot - // one is snapshot before merge, the other is snapshot after merge. - // Here blocks raftstore for a while to make it not to apply snapshot and receive new log now. + // Let peer of right region on store 3 to make append response to trigger a new + // snapshot one is snapshot before merge, the other is snapshot after merge. + // Here blocks raftstore for a while to make it not to apply snapshot and + // receive new log now. fail::cfg("on_raft_ready", "sleep(100)").unwrap(); cluster.clear_send_filters(); thread::sleep(Duration::from_millis(200)); - // Filter message again to make sure peer on store 3 can not catch up CommitMerge log + // Filter message again to make sure peer on store 3 can not catch up + // CommitMerge log cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(left.get_id(), 3) .direction(Direction::Recv) @@ -605,7 +612,8 @@ fn test_node_merge_restart_after_apply_premerge_before_apply_compact_log() { must_get_equal(&cluster.get_engine(3), b"k123", b"v2"); } -/// Tests whether stale merge is rollback properly if it merges to the same target region again later. +/// Tests whether stale merge is rollback properly if it merges to the same +/// target region again later. #[test] fn test_node_failed_merge_before_succeed_merge() { let mut cluster = new_node_cluster(0, 3); @@ -668,9 +676,10 @@ fn test_node_failed_merge_before_succeed_merge() { // Wait right region to send CatchUpLogs to left region. sleep_ms(100); // After executing CatchUpLogs in source peer fsm, the committed log will send - // to apply fsm in the end of this batch. So even the first `on_ready_prepare_merge` - // is executed after CatchUplogs, the latter committed logs is still sent to apply fsm - // if CatchUpLogs and `on_ready_prepare_merge` is in different batch. + // to apply fsm in the end of this batch. So even the first + // `on_ready_prepare_merge` is executed after CatchUplogs, the latter + // committed logs is still sent to apply fsm if CatchUpLogs and + // `on_ready_prepare_merge` is in different batch. // // In this case, the data is complete because the wrong up-to-date msg from the // first `on_ready_prepare_merge` is sent after all committed log. @@ -688,10 +697,12 @@ fn test_node_failed_merge_before_succeed_merge() { } } -/// Tests whether the source peer is destroyed correctly when transferring leader during committing merge. +/// Tests whether the source peer is destroyed correctly when transferring +/// leader during committing merge. /// -/// In the previous merge flow, target peer deletes meta of source peer without marking it as pending remove. -/// If source peer becomes leader at the same time, it will panic due to corrupted meta. +/// In the previous merge flow, target peer deletes meta of source peer without +/// marking it as pending remove. If source peer becomes leader at the same +/// time, it will panic due to corrupted meta. #[test] fn test_node_merge_transfer_leader() { let mut cluster = new_node_cluster(0, 3); @@ -703,8 +714,8 @@ fn test_node_merge_transfer_leader() { cluster.run(); - // To ensure the region has applied to its current term so that later `split` can success - // without any retries. Then, `left_peer_3` will must be `1003`. + // To ensure the region has applied to its current term so that later `split` + // can success without any retries. Then, `left_peer_3` will must be `1003`. let region = pd_client.get_region(b"k1").unwrap(); let peer_1 = find_peer(®ion, 1).unwrap().to_owned(); cluster.must_transfer_leader(region.get_id(), peer_1); @@ -791,7 +802,8 @@ fn test_node_merge_cascade_merge_with_apply_yield() { } } -// Test if the rollback merge proposal is proposed before the majority of peers want to rollback +// Test if the rollback merge proposal is proposed before the majority of peers +// want to rollback #[test] fn test_node_multiple_rollback_merge() { let mut cluster = new_node_cluster(0, 3); @@ -832,8 +844,8 @@ fn test_node_multiple_rollback_merge() { // Only the source leader is running `on_check_merge` fail::cfg(on_check_merge_not_1001_fp, "return()").unwrap(); fail::remove(on_schedule_merge_fp); - // In previous implementation, rollback merge proposal can be proposed by leader itself - // So wait for the leader propose rollback merge if possible + // In previous implementation, rollback merge proposal can be proposed by leader + // itself So wait for the leader propose rollback merge if possible sleep_ms(100); // Check if the source region is still in merging mode. let mut l_r = pd_client.get_region(b"k1").unwrap(); @@ -869,14 +881,14 @@ fn test_node_multiple_rollback_merge() { // In the previous implementation, the source peer will propose rollback merge // after the local target peer's epoch is larger than recorded previously. -// But it's wrong. This test constructs a case that writing data to the source region -// after merging. This operation can succeed in the previous implementation which -// causes data loss. -// In the current implementation, the rollback merge proposal can be proposed only when -// the number of peers who want to rollback merge is greater than the majority of all -// peers. If so, this merge is impossible to succeed. -// PS: A peer who wants to rollback merge means its local target peer's epoch is larger -// than recorded. +// But it's wrong. This test constructs a case that writing data to the source +// region after merging. This operation can succeed in the previous +// implementation which causes data loss. +// In the current implementation, the rollback merge proposal can be proposed +// only when the number of peers who want to rollback merge is greater than the +// majority of all peers. If so, this merge is impossible to succeed. +// PS: A peer who wants to rollback merge means its local target peer's epoch is +// larger than recorded. #[test] fn test_node_merge_write_data_to_source_region_after_merging() { let mut cluster = new_node_cluster(0, 3); @@ -971,13 +983,14 @@ fn test_node_merge_write_data_to_source_region_after_merging() { fail::remove(on_handle_apply_2_fp); } -/// In previous implementation, destroying its source peer(s) and applying snapshot is not **atomic**. -/// It may break the rule of our merging process. +/// In previous implementation, destroying its source peer(s) and applying +/// snapshot is not **atomic**. It may break the rule of our merging process. /// -/// A tikv crash after its source peers have destroyed but this target peer does not become to -/// `Applying` state which means it will not apply snapshot after this tikv restarts. -/// After this tikv restarts, a new leader may send logs to this target peer, then the panic may happen -/// because it can not find its source peers when applying `CommitMerge` log. +/// A tikv crash after its source peers have destroyed but this target peer does +/// not become to `Applying` state which means it will not apply snapshot after +/// this tikv restarts. After this tikv restarts, a new leader may send logs to +/// this target peer, then the panic may happen because it can not find its +/// source peers when applying `CommitMerge` log. /// /// This test is to reproduce above situation. #[test] @@ -1020,13 +1033,14 @@ fn test_node_merge_crash_before_snapshot_then_catch_up_logs() { pd_client.must_merge(left.get_id(), right.get_id()); region = pd_client.get_region(b"k1").unwrap(); - // Write some logs and the logs' number is greater than `raft_log_gc_count_limit` - // for latter log compaction + // Write some logs and the logs' number is greater than + // `raft_log_gc_count_limit` for latter log compaction for i in 2..15 { cluster.must_put(format!("k{}", i).as_bytes(), b"v"); } - // Aim at making peer 2 only know the compact log but do not know it is committed + // Aim at making peer 2 only know the compact log but do not know it is + // committed let condition = Arc::new(AtomicBool::new(false)); let recv_filter = Box::new( RegionPacketFilter::new(region.get_id(), 2) @@ -1052,15 +1066,16 @@ fn test_node_merge_crash_before_snapshot_then_catch_up_logs() { let peer_on_store3 = find_peer(®ion, 3).unwrap().to_owned(); assert_eq!(peer_on_store3.get_id(), 3); // Make peer 3 do not handle snapshot ready - // In previous implementation, destroying its source peer and applying snapshot is not atomic. - // So making its source peer be destroyed and do not apply snapshot to reproduce the problem + // In previous implementation, destroying its source peer and applying snapshot + // is not atomic. So making its source peer be destroyed and do not apply + // snapshot to reproduce the problem let before_handle_snapshot_ready_3_fp = "before_handle_snapshot_ready_3"; fail::cfg(before_handle_snapshot_ready_3_fp, "return()").unwrap(); cluster.clear_send_filters(); // Peer 1 will send snapshot to peer 3 - // Source peer sends msg to others to get target region info until the election timeout. - // The max election timeout is 2 * 10 * 10 = 200ms + // Source peer sends msg to others to get target region info until the election + // timeout. The max election timeout is 2 * 10 * 10 = 200ms let election_timeout = 2 * cluster.cfg.raft_store.raft_base_tick_interval.as_millis() * cluster.cfg.raft_store.raft_election_timeout_ticks as u64; @@ -1245,8 +1260,8 @@ fn test_prewrite_before_max_ts_is_synced() { assert!(!resp.get_region_error().has_max_timestamp_not_synced()); } -/// Testing that the source peer's read delegate should not be removed by the target peer -/// and only removed when the peer is destroyed +/// Testing that the source peer's read delegate should not be removed by the +/// target peer and only removed when the peer is destroyed #[test] fn test_source_peer_read_delegate_after_apply() { let mut cluster = new_node_cluster(0, 3); @@ -1266,10 +1281,12 @@ fn test_source_peer_read_delegate_after_apply() { let on_destroy_peer_fp = "destroy_peer"; fail::cfg(on_destroy_peer_fp, "pause").unwrap(); - // Merge finish means the leader of the target region have call `on_ready_commit_merge` + // Merge finish means the leader of the target region have call + // `on_ready_commit_merge` pd_client.must_merge(source.get_id(), target.get_id()); - // The source peer's `ReadDelegate` should not be removed yet and mark as `pending_remove` + // The source peer's `ReadDelegate` should not be removed yet and mark as + // `pending_remove` assert!( cluster.store_metas[&1] .lock() @@ -1312,8 +1329,8 @@ fn test_merge_with_concurrent_pessimistic_locking() { let left = cluster.get_region(b"k1"); let right = cluster.get_region(b"k3"); - // Transfer the leader of the right region to store 2. The leaders of source and target - // regions don't need to be on the same store. + // Transfer the leader of the right region to store 2. The leaders of source and + // target regions don't need to be on the same store. cluster.must_transfer_leader(right.id, new_peer(2, 2)); let snapshot = cluster.must_get_snapshot_of_region(left.id); @@ -1342,7 +1359,8 @@ fn test_merge_with_concurrent_pessimistic_locking() { fail::cfg("before_propose_locks_on_region_merge", "pause").unwrap(); - // 1. Locking before proposing pessimistic locks in the source region can succeed. + // 1. Locking before proposing pessimistic locks in the source region can + // succeed. let client2 = client.clone(); let mut mutation = Mutation::default(); mutation.set_op(Op::PessimisticLock); @@ -1453,7 +1471,8 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { thread::sleep(Duration::from_millis(500)); assert!(txn_ext.pessimistic_locks.read().is_writable()); - // But a later prewrite request should fail because we have already banned all later proposals. + // But a later prewrite request should fail because we have already banned all + // later proposals. req.mut_mutations()[0].set_key(b"k1".to_vec()); let resp2 = thread::spawn(move || client.kv_prewrite(&req).unwrap()); @@ -1515,14 +1534,15 @@ fn test_retry_pending_prepare_merge_fail() { propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); assert!(rx.recv_timeout(Duration::from_millis(200)).is_err()); - // Then, start merging. PrepareMerge should become pending because applied_index is smaller - // than proposed_index. + // Then, start merging. PrepareMerge should become pending because applied_index + // is smaller than proposed_index. cluster.merge_region(left.id, right.id, Callback::None); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); thread::sleep(Duration::from_millis(200)); assert!(txn_ext.pessimistic_locks.read().is_writable()); - // Set disk full error to let PrepareMerge fail. (Set both peer to full to avoid transferring leader) + // Set disk full error to let PrepareMerge fail. (Set both peer to full to avoid + // transferring leader) fail::cfg("disk_already_full_peer_1", "return").unwrap(); fail::cfg("disk_already_full_peer_2", "return").unwrap(); fail::remove("on_handle_apply"); @@ -1590,7 +1610,8 @@ fn test_merge_pessimistic_locks_propose_fail() { LocksStatus::MergingRegion ); - // With the fail point set, we will fail to propose the locks or the PrepareMerge request. + // With the fail point set, we will fail to propose the locks or the + // PrepareMerge request. fail::cfg("raft_propose", "return()").unwrap(); // But after that, the pessimistic locks status should remain unchanged. @@ -1606,8 +1627,9 @@ fn test_merge_pessimistic_locks_propose_fail() { ); } -// Testing that when the source peer is destroyed while merging, it should not persist the `merge_state` -// thus won't generate gc message to destroy other peers +// Testing that when the source peer is destroyed while merging, it should not +// persist the `merge_state` thus won't generate gc message to destroy other +// peers #[test] fn test_destroy_source_peer_while_merging() { let mut cluster = new_node_cluster(0, 5); @@ -1671,9 +1693,10 @@ fn test_destroy_source_peer_while_merging() { pd_client.must_add_peer(right.get_id(), new_peer(4, 7)); must_get_equal(&cluster.get_engine(4), b"k4", b"v4"); - // if store 5 have persist the merge state, peer 2 and peer 3 will be destroyed because - // store 5 will response their request vote message with a gc message, and peer 7 will cause - // store 5 panic because peer 7 have larger peer id than the peer in the merge state + // if store 5 have persist the merge state, peer 2 and peer 3 will be destroyed + // because store 5 will response their request vote message with a gc + // message, and peer 7 will cause store 5 panic because peer 7 have larger + // peer id than the peer in the merge state cluster.clear_send_filters(); cluster.add_send_filter(IsolationFilterFactory::new(1)); diff --git a/tests/failpoints/cases/test_pending_peers.rs b/tests/failpoints/cases/test_pending_peers.rs index 08f028d8fcb..5618bc9ab8e 100644 --- a/tests/failpoints/cases/test_pending_peers.rs +++ b/tests/failpoints/cases/test_pending_peers.rs @@ -36,8 +36,8 @@ fn test_pending_peers() { assert!(pending_peers.is_empty()); } -// Tests if raftstore and apply worker write truncated_state concurrently could lead to -// dirty write. +// Tests if raftstore and apply worker write truncated_state concurrently could +// lead to dirty write. #[test] fn test_pending_snapshot() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index 30d0c1d995f..6db06dee35f 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -19,7 +19,8 @@ struct TestSuite { impl TestSuite { pub fn new(count: usize, api_version: ApiVersion) -> Self { let mut cluster = new_server_cluster_with_api_ver(1, count, api_version); - // Disable background renew by setting `renew_interval` to 0, to make timestamp allocation predictable. + // Disable background renew by setting `renew_interval` to 0, to make timestamp + // allocation predictable. configure_for_causal_ts(&mut cluster, "0s", 100); configure_for_merge(&mut cluster); cluster.run(); @@ -200,7 +201,8 @@ fn test_region_merge() { // Disable CausalObserver::flush_timestamp to produce causality issue. fail::cfg(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP, "return").unwrap(); - // Transfer leaders: region 1 -> store 1, region 3 -> store 2, region 5 -> store 3. + // Transfer leaders: region 1 -> store 1, region 3 -> store 2, region 5 -> store + // 3. suite.must_transfer_leader(®ion1, 1); suite.must_transfer_leader(®ion3, 2); suite.must_transfer_leader(®ion5, 3); diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index e288828dc66..7a6da017d99 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -41,8 +41,8 @@ fn test_wait_for_apply_index() { cluster.must_put(b"k1", b"v1"); must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); - // Peer 3 does not apply the cmd of putting 'k1' right now, then the follower read must - // be blocked. + // Peer 3 does not apply the cmd of putting 'k1' right now, then the follower + // read must be blocked. must_get_none(&cluster.get_engine(3), b"k1"); let mut request = new_request( region.get_id(), @@ -354,12 +354,14 @@ fn test_read_after_cleanup_range_for_snap() { rx1.recv_timeout(Duration::from_secs(5)).unwrap(); } -/// Tests the learner of new split region will know its leader without waiting for the leader heartbeat timeout. +/// Tests the learner of new split region will know its leader without waiting +/// for the leader heartbeat timeout. /// /// Before https://github.com/tikv/tikv/pull/8820, -/// the learner of a new split region may not know its leader if it applies log slowly and drops the no-op -/// entry from the new leader, and it had to wait for a heartbeat timeout to know its leader before that it -/// can't handle any read request. +/// the learner of a new split region may not know its leader if it applies log +/// slowly and drops the no-op entry from the new leader, and it had to wait for +/// a heartbeat timeout to know its leader before that it can't handle any read +/// request. #[test] fn test_new_split_learner_can_not_find_leader() { let mut cluster = new_node_cluster(0, 4); @@ -383,9 +385,10 @@ fn test_new_split_learner_can_not_find_leader() { let region = cluster.get_region(b"k3"); cluster.must_split(®ion, b"k3"); - // This `put` will not inform learner leadership because the The learner is paused at apply split command, - // so the learner peer of the new split region is not create yet. Also, the leader will not send another - // append request before the previous one response as all peer is initiated with the `Probe` mod + // This `put` will not inform learner leadership because the The learner is + // paused at apply split command, so the learner peer of the new split region is + // not create yet. Also, the leader will not send another append request before + // the previous one response as all peer is initiated with the `Probe` mod cluster.must_put(b"k2", b"v2"); assert_eq!(cluster.get(b"k2"), Some(b"v2".to_vec())); @@ -402,8 +405,8 @@ fn test_new_split_learner_can_not_find_leader() { assert_eq!(exp_value, b"v2"); } -/// Test if the read index request can get a correct response when the commit index of leader -/// if not up-to-date after transferring leader. +/// Test if the read index request can get a correct response when the commit +/// index of leader if not up-to-date after transferring leader. #[test] fn test_replica_read_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); @@ -454,7 +457,8 @@ fn test_replica_read_after_transfer_leader() { // Wait peer 1 and 3 to send heartbeat response to peer 2 sleep_ms(100); - // Pause before collecting message to make the these message be handled in one loop + // Pause before collecting message to make the these message be handled in one + // loop let on_peer_collect_message_2 = "on_peer_collect_message_2"; fail::cfg(on_peer_collect_message_2, "pause").unwrap(); @@ -477,8 +481,8 @@ fn test_replica_read_after_transfer_leader() { assert_eq!(exp_value, b"v2"); } -// This test is for reproducing the bug that some replica reads was sent to a leader and shared a same -// read index because of the optimization on leader. +// This test is for reproducing the bug that some replica reads was sent to a +// leader and shared a same read index because of the optimization on leader. #[test] fn test_read_index_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); @@ -511,7 +515,8 @@ fn test_read_index_after_transfer_leader() { async_read_index_on_peer(&mut cluster, new_peer(2, 2), region.clone(), b"k1", true); responses.push(resp); } - // Try to split the region to change the peer into `splitting` state then can not handle read requests. + // Try to split the region to change the peer into `splitting` state then can + // not handle read requests. cluster.split_region(®ion, b"k2", raftstore::store::Callback::None); // Wait the split command be sent. sleep_ms(100); @@ -525,12 +530,15 @@ fn test_read_index_after_transfer_leader() { let msg_type = msg.get_message().get_msg_type(); matches!(msg_type, MessageType::MsgAppendResponse) }); - // Transfer leader to peer 1, peer 2 should not change role since we added a recv filter. + // Transfer leader to peer 1, peer 2 should not change role since we added a + // recv filter. cluster.transfer_leader(region_id, new_peer(1, 1)); - // Pause before collecting peer messages to make sure all messages can be handled in one batch. + // Pause before collecting peer messages to make sure all messages can be + // handled in one batch. let on_peer_collect_message_2 = "on_peer_collect_message_2"; fail::cfg(on_peer_collect_message_2, "pause").unwrap(); - // Pause apply worker to stop the split command so peer 2 would keep in `splitting` state. + // Pause apply worker to stop the split command so peer 2 would keep in + // `splitting` state. let on_handle_apply_2 = "on_handle_apply_2"; fail::cfg(on_handle_apply_2, "pause").unwrap(); // Send heartbeat and append responses to advance read index. @@ -544,8 +552,8 @@ fn test_read_index_after_transfer_leader() { fail::remove(on_peer_collect_message_2); // Wait for read index has been advanced. sleep_ms(100); - // Filter and send vote message, peer 2 would step down to follower and try to handle read requests - // as a follower. + // Filter and send vote message, peer 2 would step down to follower and try to + // handle read requests as a follower. let msgs = std::mem::take(&mut *dropped_msgs.lock().unwrap()); let vote_msgs = msgs.iter().filter(|msg| { let msg_type = msg.get_message().get_msg_type(); @@ -566,8 +574,8 @@ fn test_read_index_after_transfer_leader() { fail::remove(on_handle_apply_2); } -/// Test if the read index request can get a correct response when the commit index of leader -/// if not up-to-date after transferring leader. +/// Test if the read index request can get a correct response when the commit +/// index of leader if not up-to-date after transferring leader. #[test] fn test_batch_read_index_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); @@ -598,7 +606,8 @@ fn test_batch_read_index_after_transfer_leader() { cluster.must_transfer_leader(1, new_peer(2, 2)); - // Pause before collecting message to make the these message be handled in one loop + // Pause before collecting message to make the these message be handled in one + // loop let on_peer_collect_message_2 = "on_peer_collect_message_2"; fail::cfg(on_peer_collect_message_2, "pause").unwrap(); @@ -627,7 +636,8 @@ fn test_batch_read_index_after_transfer_leader() { .map(|x| x.recv_timeout(Duration::from_secs(5)).unwrap()) .collect::>(); - // `term` in the header is `current_term`, not term of the entry at `read_index`. + // `term` in the header is `current_term`, not term of the entry at + // `read_index`. let term = resps[0].get_header().get_current_term(); assert_eq!(term, resps[1].get_header().get_current_term()); assert_eq!(term, pd_client.get_region_last_report_term(1).unwrap()); @@ -636,8 +646,9 @@ fn test_batch_read_index_after_transfer_leader() { let index = resps[i].responses[0].get_read_index().read_index; let raft_engine = cluster.get_raft_engine(2); let entry = raft_engine.get_entry(1, index).unwrap().unwrap(); - // According to Raft, a peer shouldn't be able to perform read index until it commits - // to the current term. So term of `read_index` must equal to the current one. + // According to Raft, a peer shouldn't be able to perform read index until it + // commits to the current term. So term of `read_index` must equal to + // the current one. assert_eq!(entry.get_term(), term); } } @@ -701,8 +712,8 @@ fn test_read_index_lock_checking_on_follower() { let guard = block_on(leader_cm.lock_key(&Key::from_raw(b"k1"))); guard.with_lock(|l| *l = Some(lock.clone())); - // Now, the leader has been transferred to peer 3. The original read index request - // will be first sent to peer 1 and then redirected to peer 3. + // Now, the leader has been transferred to peer 3. The original read index + // request will be first sent to peer 1 and then redirected to peer 3. // We must make sure the lock check is done on peer 3. fail::remove("before_propose_readindex"); @@ -779,14 +790,14 @@ fn test_read_index_lock_checking_on_false_leader() { let guard = block_on(leader_cm.lock_key(&Key::from_raw(b"k1"))); guard.with_lock(|l| *l = Some(lock.clone())); - // Read index from peer 2, the read index message will be sent to the old leader peer 1. - // But the lease of peer 1 has expired and it cannot get majority of heartbeat. - // So, we cannot get the result here. + // Read index from peer 2, the read index message will be sent to the old leader + // peer 1. But the lease of peer 1 has expired and it cannot get majority of + // heartbeat. So, we cannot get the result here. let resp = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k1", true); assert!(resp.recv_timeout(Duration::from_millis(300)).is_err()); - // Now, restore the network partition. Peer 1 should now become follower and drop its - // pending read index request. Peer 2 cannot get the result now. + // Now, restore the network partition. Peer 1 should now become follower and + // drop its pending read index request. Peer 2 cannot get the result now. let recv_filter = Box::new( RegionPacketFilter::new(rid, 2) .direction(Direction::Recv) diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index ab11b7039fd..a8aaa030bfc 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -111,10 +111,12 @@ fn test_stale_read_basic_flow_lock() { b"key1".to_vec(), ); - // Assert `(key1, value2)` can't be readed with `commit_ts2` due to it's larger than the `start_ts` of `key2`. + // Assert `(key1, value2)` can't be read with `commit_ts2` due to it's larger + // than the `start_ts` of `key2`. let resp = follower_client2.kv_read(b"key1".to_vec(), commit_ts2); assert!(resp.get_region_error().has_data_is_not_ready()); - // Still can read `(key1, value1)` since `commit_ts1` is less than the `key2` lock's `start_ts` + // Still can read `(key1, value1)` since `commit_ts1` is less than the `key2` + // lock's `start_ts` follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); // Prewrite on `key3` but not commit yet @@ -129,7 +131,8 @@ fn test_stale_read_basic_flow_lock() { leader_client.must_kv_commit(vec![b"key2".to_vec()], k2_prewrite_ts, k2_commit_ts); // Although there is still lock on the region, but the min lock is refreshed - // to the `key3`'s lock, now we can read `(key1, value2)` but not `(key2, value1)` + // to the `key3`'s lock, now we can read `(key1, value2)` but not `(key2, + // value1)` follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), commit_ts2); let resp = follower_client2.kv_read(b"key2".to_vec(), k2_commit_ts); assert!(resp.get_region_error().has_data_is_not_ready()); @@ -144,9 +147,9 @@ fn test_stale_read_basic_flow_lock() { follower_client2.must_kv_read_equal(b"key3".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); } -// Testing that even leader's `apply_index` updated before sync the `(apply_index, safe_ts)` -// item to other replica, the `apply_index` in the `(apply_index, safe_ts)` item should not -// be updated +// Testing that even leader's `apply_index` updated before sync the +// `(apply_index, safe_ts)` item to other replica, the `apply_index` in the +// `(apply_index, safe_ts)` item should not be updated #[test] fn test_update_apply_index_before_sync_read_state() { let (mut cluster, pd_client, mut leader_client) = prepare_for_stale_read(new_peer(1, 1)); @@ -195,9 +198,9 @@ fn test_update_apply_index_before_sync_read_state() { follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); } -// Testing that if `resolved_ts` updated before `apply_index` update, the `safe_ts` -// won't be updated, hence the leader won't broadcast a wrong `(apply_index, safe_ts)` -// item to other replicas +// Testing that if `resolved_ts` updated before `apply_index` update, the +// `safe_ts` won't be updated, hence the leader won't broadcast a wrong +// `(apply_index, safe_ts)` item to other replicas #[test] fn test_update_resoved_ts_before_apply_index() { let (mut cluster, pd_client, mut leader_client) = prepare_for_stale_read(new_peer(1, 1)); @@ -213,7 +216,8 @@ fn test_update_resoved_ts_before_apply_index() { ); follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); - // Return before handling `apply_res`, to stop the leader updating the apply index + // Return before handling `apply_res`, to stop the leader updating the apply + // index let on_apply_res_fp = "on_apply_res"; fail::cfg(on_apply_res_fp, "return()").unwrap(); // Stop replicate data to follower 2 @@ -249,7 +253,8 @@ fn test_update_resoved_ts_before_apply_index() { follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), commit_ts2); } -// Testing that the new elected leader should initialize the `resolver` correctly +// Testing that the new elected leader should initialize the `resolver` +// correctly #[test] fn test_new_leader_init_resolver() { let (mut cluster, pd_client, mut peer_client1) = prepare_for_stale_read(new_peer(1, 1)); @@ -264,8 +269,8 @@ fn test_new_leader_init_resolver() { b"key1".to_vec(), ); - // There are no lock in the region, the `safe_ts` should keep updating by the new leader, - // so we can read `key1` with the newest ts + // There are no lock in the region, the `safe_ts` should keep updating by the + // new leader, so we can read `key1` with the newest ts cluster.must_transfer_leader(1, new_peer(2, 2)); peer_client1.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); @@ -276,8 +281,8 @@ fn test_new_leader_init_resolver() { get_tso(&pd_client), ); - // There are locks in the region, the `safe_ts` can't be updated, so we can't read - // `key1` with the newest ts + // There are locks in the region, the `safe_ts` can't be updated, so we can't + // read `key1` with the newest ts cluster.must_transfer_leader(1, new_peer(1, 1)); let resp = peer_client2.kv_read(b"key1".to_vec(), get_tso(&pd_client)); assert!(resp.get_region_error().has_data_is_not_ready()); @@ -285,8 +290,9 @@ fn test_new_leader_init_resolver() { peer_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); } -// Testing that while applying snapshot the follower should reset its `safe_ts` to 0 and -// reject incoming stale read request, then resume the `safe_ts` after applying snapshot +// Testing that while applying snapshot the follower should reset its `safe_ts` +// to 0 and reject incoming stale read request, then resume the `safe_ts` after +// applying snapshot #[test] fn test_stale_read_while_applying_snapshot() { let (mut cluster, pd_client, leader_client) = @@ -398,15 +404,17 @@ fn test_stale_read_while_region_merge() { b"key5".to_vec(), ); - // Merge source region into target region, the lock on source region should also merge - // into the target region and cause the target region's `safe_ts` decrease + // Merge source region into target region, the lock on source region should also + // merge into the target region and cause the target region's `safe_ts` + // decrease pd_client.must_merge(source.get_id(), target.get_id()); let mut follower_client2 = PeerClient::new(&cluster, target.get_id(), new_peer(2, 2)); follower_client2.ctx.set_stale_read(true); // We can read `(key5, value1)` with `k1_prewrite_ts` follower_client2.must_kv_read_equal(b"key5".to_vec(), b"value1".to_vec(), k1_prewrite_ts); - // Can't read `key5` with `k5_commit_ts` because `k1_prewrite_ts` is smaller than `k5_commit_ts` + // Can't read `key5` with `k5_commit_ts` because `k1_prewrite_ts` is smaller + // than `k5_commit_ts` let resp = follower_client2.kv_read(b"key5".to_vec(), k5_commit_ts); assert!(resp.get_region_error().has_data_is_not_ready()); @@ -417,7 +425,8 @@ fn test_stale_read_while_region_merge() { follower_client2.must_kv_read_equal(b"key5".to_vec(), b"value2".to_vec(), get_tso(&pd_client)); } -// Testing that after region merge, the `safe_ts` could be advanced even without any incoming write +// Testing that after region merge, the `safe_ts` could be advanced even without +// any incoming write #[test] fn test_stale_read_after_merge() { let (mut cluster, pd_client, _) = @@ -444,9 +453,9 @@ fn test_stale_read_after_merge() { follower_client2.must_kv_read_equal(b"key5".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); } -// Testing that during the merge, the leader of the source region won't not update the -// `safe_ts` since it can't know when the merge is completed and whether there are new -// kv write into its key range +// Testing that during the merge, the leader of the source region won't not +// update the `safe_ts` since it can't know when the merge is completed and +// whether there are new kv write into its key range #[test] fn test_read_source_region_after_target_region_merged() { let (mut cluster, pd_client, leader_client) = @@ -462,7 +471,8 @@ fn test_read_source_region_after_target_region_merged() { cluster.must_split(&cluster.get_region(&[]), b"key3"); let source = pd_client.get_region(b"key1").unwrap(); let target = pd_client.get_region(b"key5").unwrap(); - // Transfer the target region leader to store 1 and the source region leader to store 2 + // Transfer the target region leader to store 1 and the source region leader to + // store 2 cluster.must_transfer_leader(target.get_id(), new_peer(1, 1)); cluster.must_transfer_leader(source.get_id(), find_peer(&source, 2).unwrap().clone()); // Get the source region follower on store 3 @@ -481,7 +491,8 @@ fn test_read_source_region_after_target_region_merged() { // Merge source region into target region pd_client.must_merge(source.get_id(), target.get_id()); - // Leave a lock on the original source region key range through the target region leader + // Leave a lock on the original source region key range through the target + // region leader let target_leader = PeerClient::new(&cluster, target.get_id(), new_peer(1, 1)); let k1_prewrite_ts2 = get_tso(&pd_client); target_leader.must_kv_prewrite( @@ -495,17 +506,17 @@ fn test_read_source_region_after_target_region_merged() { // We still can read `key1` with `k1_commit_ts1` through source region source_follower_client3.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), k1_commit_ts1); - // But can't read `key2` with `k1_prewrite_ts2` because the source leader can't update - // `safe_ts` after source region is merged into target region even though the source leader - // didn't know the merge is complement + // But can't read `key2` with `k1_prewrite_ts2` because the source leader can't + // update `safe_ts` after source region is merged into target region even + // though the source leader didn't know the merge is complement let resp = source_follower_client3.kv_read(b"key1".to_vec(), k1_prewrite_ts2); assert!(resp.get_region_error().has_data_is_not_ready()); fail::remove(apply_before_prepare_merge_2_3); } -// Testing that altough the source region's `safe_ts` wont't be updated during merge, after merge -// rollbacked it should resume updating +// Testing that altough the source region's `safe_ts` wont't be updated during +// merge, after merge rollbacked it should resume updating #[test] fn test_stale_read_after_rollback_merge() { let (mut cluster, pd_client, leader_client) = @@ -539,12 +550,13 @@ fn test_stale_read_after_rollback_merge() { find_peer(&source, 3).unwrap().clone(), ); source_client3.ctx.set_stale_read(true); - // the `safe_ts` should resume updating after merge rollback so we can read `key1` with the newest ts + // the `safe_ts` should resume updating after merge rollback so we can read + // `key1` with the newest ts source_client3.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); } -// Testing that the new leader should ignore the pessimistic lock that wrote by the previous -// leader and keep updating the `safe_ts` +// Testing that the new leader should ignore the pessimistic lock that wrote by +// the previous leader and keep updating the `safe_ts` #[test] fn test_new_leader_ignore_pessimistic_lock() { let (mut cluster, pd_client, leader_client) = prepare_for_stale_read(new_peer(1, 1)); @@ -564,7 +576,8 @@ fn test_new_leader_ignore_pessimistic_lock() { let mut follower_client3 = PeerClient::new(&cluster, 1, new_peer(3, 3)); follower_client3.ctx.set_stale_read(true); - // The new leader should be able to update `safe_ts` so we can read `key1` with the newest ts + // The new leader should be able to update `safe_ts` so we can read `key1` with + // the newest ts follower_client3.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); } @@ -590,7 +603,8 @@ fn test_stale_read_on_learner() { learner_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); } -// Testing that stale read request with a future ts should not update the `concurency_manager`'s `max_ts` +// Testing that stale read request with a future ts should not update the +// `concurrency_manager`'s `max_ts` #[test] fn test_stale_read_future_ts_not_update_max_ts() { let (_cluster, pd_client, mut leader_client) = prepare_for_stale_read(new_peer(1, 1)); @@ -608,8 +622,9 @@ fn test_stale_read_future_ts_not_update_max_ts() { let resp = leader_client.kv_read(b"key1".to_vec(), read_ts); assert!(resp.get_region_error().has_data_is_not_ready()); - // The `max_ts` should not updated by the stale read request, so we can prewrite and commit - // `async_commit` transaction with a ts that smaller than the `read_ts` + // The `max_ts` should not updated by the stale read request, so we can prewrite + // and commit `async_commit` transaction with a ts that smaller than the + // `read_ts` let prewrite_ts = get_tso(&pd_client); assert!(prewrite_ts < read_ts); leader_client.must_kv_prewrite_async_commit( @@ -627,8 +642,8 @@ fn test_stale_read_future_ts_not_update_max_ts() { let resp = leader_client.kv_read(b"key1".to_vec(), read_ts); assert!(resp.get_region_error().has_data_is_not_ready()); - // The `max_ts` should not updated by the stale read request, so 1pc transaction with a ts that smaller - // than the `read_ts` should not be fallbacked to 2pc + // The `max_ts` should not updated by the stale read request, so 1pc transaction + // with a ts that smaller than the `read_ts` should not be fallbacked to 2pc let prewrite_ts = get_tso(&pd_client); assert!(prewrite_ts < read_ts); leader_client.must_kv_prewrite_one_pc( diff --git a/tests/failpoints/cases/test_server.rs b/tests/failpoints/cases/test_server.rs index 9d552eadee3..9c34fd13529 100644 --- a/tests/failpoints/cases/test_server.rs +++ b/tests/failpoints/cases/test_server.rs @@ -9,10 +9,10 @@ use raft::eraftpb::MessageType; use test_raftstore::*; use tikv_util::{config::ReadableDuration, HandyRwLock}; -/// When encountering raft/batch_raft mismatch store id error, the service is expected -/// to drop connections in order to let raft_client re-resolve store address from PD -/// This will make the mismatch error be automatically corrected. -/// Ths test verified this case. +/// When encountering raft/batch_raft mismatch store id error, the service is +/// expected to drop connections in order to let raft_client re-resolve store +/// address from PD This will make the mismatch error be automatically +/// corrected. Ths test verified this case. #[test] fn test_mismatch_store_node() { let count = 3; diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index a899af3466e..3507fc268d4 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -46,8 +46,8 @@ fn test_overlap_cleanup() { cluster.must_split(®ion1, b"k2"); // Wait till the snapshot of split region is applied, whose range is ["", "k2"). must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - // Resume the fail point and pause it again. So only the paused snapshot is generated. - // And the paused snapshot's range is ["", ""), hence overlap. + // Resume the fail point and pause it again. So only the paused snapshot is + // generated. And the paused snapshot's range is ["", ""), hence overlap. fail::cfg(gen_snapshot_fp, "pause").unwrap(); // Overlap snapshot should be deleted. assert_snapshot(&cluster.get_snap_dir(3), region_id, false); @@ -186,11 +186,12 @@ fn assert_snapshot(snap_dir: &str, region_id: u64, exist: bool) { } } -// A peer on store 3 is isolated and is applying snapshot. (add failpoint so it's always pending) -// Then two conf change happens, this peer is removed and a new peer is added on store 3. -// Then isolation clear, this peer will be destroyed because of a bigger peer id in msg. -// In previous implementation, peer fsm can be destroyed synchronously because snapshot state is -// pending and can be canceled, but panic may happen if the applyfsm runs very slow. +// A peer on store 3 is isolated and is applying snapshot. (add failpoint so +// it's always pending) Then two conf change happens, this peer is removed and a +// new peer is added on store 3. Then isolation clear, this peer will be +// destroyed because of a bigger peer id in msg. In previous implementation, +// peer fsm can be destroyed synchronously because snapshot state is pending and +// can be canceled, but panic may happen if the applyfsm runs very slow. #[test] fn test_destroy_peer_on_pending_snapshot() { let mut cluster = new_server_cluster(0, 3); @@ -252,10 +253,11 @@ fn test_destroy_peer_on_pending_snapshot() { } // The peer 3 in store 3 is isolated for a while and then recovered. -// During its applying snapshot, however the peer is destroyed and thus applying snapshot is canceled. -// And when it's destroyed (destroy is not finished either), the machine restarted. -// After the restart, the snapshot should be applied successfully.println! -// And new data should be written to store 3 successfully. +// During its applying snapshot, however the peer is destroyed and thus applying +// snapshot is canceled. And when it's destroyed (destroy is not finished +// either), the machine restarted. After the restart, the snapshot should be +// applied successfully.println! And new data should be written to store 3 +// successfully. #[test] fn test_destroy_peer_on_pending_snapshot_and_restart() { let mut cluster = new_server_cluster(0, 3); @@ -315,7 +317,8 @@ fn test_destroy_peer_on_pending_snapshot_and_restart() { must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); // After peer 3 has applied snapshot, data should be got. must_get_equal(&cluster.get_engine(3), b"k119", b"v1"); - // In the end the snapshot file should be gc-ed anyway, either by new peer or by store + // In the end the snapshot file should be gc-ed anyway, either by new peer or by + // store let now = Instant::now(); loop { let mut snap_files = vec![]; @@ -464,8 +467,9 @@ fn test_receive_old_snapshot() { pd_client.must_add_peer(left.get_id(), new_peer(2, 4)); cluster.must_put(b"k11", b"v1"); - // If peer 2 handles previous old snapshot properly and does not leave over metadata - // in `pending_snapshot_regions`, peer 4 should be created normally. + // If peer 2 handles previous old snapshot properly and does not leave over + // metadata in `pending_snapshot_regions`, peer 4 should be created + // normally. must_get_equal(&cluster.get_engine(2), b"k11", b"v1"); fail::remove(peer_2_handle_snap_mgr_gc_fp); @@ -509,7 +513,8 @@ fn test_gen_snapshot_with_no_committed_entries_ready() { // 1. pause snapshot generating with a failpoint, and then add a new peer; // 2. append more Raft logs to the region to trigger raft log compactions; // 3. disable the failpoint to continue snapshot generating; -// 4. the generated snapshot should have a larger index than the latest `truncated_idx`. +// 4. the generated snapshot should have a larger index than the latest +// `truncated_idx`. #[test] fn test_cancel_snapshot_generating() { let mut cluster = new_node_cluster(0, 5); @@ -670,15 +675,17 @@ fn test_sending_fail_with_net_error() { // need to wait receiver handle the snapshot request sleep_ms(100); - // peer2 will not become learner so ti will has k1 key and receiving count will zero + // peer2 will not become learner so ti will has k1 key and receiving count will + // zero let engine2 = cluster.get_engine(2); must_get_none(&engine2, b"k1"); assert_eq!(cluster.get_snap_mgr(2).stats().receiving_count, 0); } /// Logs scan are now moved to raftlog gc threads. The case is to test if logs -/// are still cleaned up when there is stale logs before first index during applying -/// snapshot. It's expected to schedule a gc task after applying snapshot. +/// are still cleaned up when there is stale logs before first index during +/// applying snapshot. It's expected to schedule a gc task after applying +/// snapshot. #[test] fn test_snapshot_clean_up_logs_with_unfinished_log_gc() { let mut cluster = new_node_cluster(0, 3); @@ -730,7 +737,8 @@ fn test_snapshot_clean_up_logs_with_unfinished_log_gc() { assert!(dest[0].get_index() > truncated_index, "{:?}", dest); } -/// Redo snapshot apply after restart when kvdb state is updated but raftdb state is not. +/// Redo snapshot apply after restart when kvdb state is updated but raftdb +/// state is not. #[test] fn test_snapshot_recover_from_raft_write_failure() { let mut cluster = new_server_cluster(0, 3); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 09eb603ff8e..92aee023fa5 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -259,9 +259,10 @@ impl Filter for PrevoteRangeFilter { } } -// Test if a peer is created from splitting when another initialized peer with the same -// region id has already existed. In previous implementation, it can be created and panic -// will happen because there are two initialized peer with the same region id. +// Test if a peer is created from splitting when another initialized peer with +// the same region id has already existed. In previous implementation, it can be +// created and panic will happen because there are two initialized peer with the +// same region id. #[test] fn test_split_not_to_split_existing_region() { let mut cluster = new_node_cluster(0, 4); @@ -333,8 +334,8 @@ fn test_split_not_to_split_existing_region() { must_get_none(&cluster.get_engine(3), b"k0"); } -// Test if a peer is created from splitting when another initialized peer with the same -// region id existed before and has been destroyed now. +// Test if a peer is created from splitting when another initialized peer with +// the same region id existed before and has been destroyed now. #[test] fn test_split_not_to_split_existing_tombstone_region() { let mut cluster = new_node_cluster(0, 3); @@ -401,8 +402,8 @@ fn test_split_not_to_split_existing_tombstone_region() { } // TiKV uses memory lock to control the order between spliting and creating -// new peer. This case test if tikv continues split if the peer is destroyed after -// memory lock check. +// new peer. This case test if tikv continues split if the peer is destroyed +// after memory lock check. #[test] fn test_split_continue_when_destroy_peer_after_mem_check() { let mut cluster = new_node_cluster(0, 3); @@ -478,8 +479,8 @@ fn test_split_continue_when_destroy_peer_after_mem_check() { // If value of `k22` is equal to `v22`, the previous split log must be applied. must_get_equal(&cluster.get_engine(2), b"k22", b"v22"); - // Once it's marked split in memcheck, destroy should not write tombstone otherwise it will - // break the region states. Hence split should continue. + // Once it's marked split in memcheck, destroy should not write tombstone + // otherwise it will break the region states. Hence split should continue. must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); cluster.clear_send_filters(); @@ -488,8 +489,8 @@ fn test_split_continue_when_destroy_peer_after_mem_check() { must_get_none(&cluster.get_engine(2), b"k1"); } -// Test if a peer can be created from splitting when another uninitialied peer with the same -// peer id has been created on this store. +// Test if a peer can be created from splitting when another uninitialied peer +// with the same peer id has been created on this store. #[test] fn test_split_should_split_existing_same_uninitialied_peer() { let mut cluster = new_node_cluster(0, 3); @@ -541,8 +542,8 @@ fn test_split_should_split_existing_same_uninitialied_peer() { must_get_equal(&cluster.get_engine(2), b"k11", b"v11"); } -// Test if a peer can be created from splitting when another uninitialied peer with different -// peer id has been created on this store. +// Test if a peer can be created from splitting when another uninitialied peer +// with different peer id has been created on this store. #[test] fn test_split_not_to_split_existing_different_uninitialied_peer() { let mut cluster = new_node_cluster(0, 3); @@ -597,7 +598,8 @@ fn test_split_not_to_split_existing_different_uninitialied_peer() { // peer 2 applied snapshot must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); - // But only the right part because there is a peer 4 of region 1000 on local store + // But only the right part because there is a peer 4 of region 1000 on local + // store must_get_none(&cluster.get_engine(2), b"k1"); fail::remove(before_check_snapshot_1000_2_fp); @@ -657,9 +659,9 @@ impl Filter for CollectSnapshotFilter { } } -/// If the uninitialized peer and split peer are fetched into one batch, and the first -/// one doesn't generate ready, the second one does, ready should not be mapped to the -/// first one. +/// If the uninitialized peer and split peer are fetched into one batch, and the +/// first one doesn't generate ready, the second one does, ready should not be +/// mapped to the first one. #[test] fn test_split_duplicated_batch() { let mut cluster = new_node_cluster(0, 3); @@ -696,7 +698,8 @@ fn test_split_duplicated_batch() { if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { panic!("the snapshot is not sent before split, e: {:?}", e); } - // Split the region range and then there should be another snapshot for the split ranges. + // Split the region range and then there should be another snapshot for the + // split ranges. cluster.must_split(®ion, b"k2"); // Ensure second is also sent and piled in filter. if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { @@ -764,8 +767,8 @@ fn test_split_duplicated_batch() { must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); } -/// We depend on split-check task to update approximate size of region even if this region does not -/// need to split. +/// We depend on split-check task to update approximate size of region even if +/// this region does not need to split. #[test] fn test_report_approximate_size_after_split_check() { let mut cluster = new_server_cluster(0, 3); @@ -881,7 +884,8 @@ fn test_split_with_concurrent_pessimistic_locking() { assert!(resp.get_region_error().has_epoch_not_match(), "{:?}", resp); // 2. Locking happens when split has finished - // It needs to be rejected due to incorrect epoch, otherwise the lock may be written to the wrong region. + // It needs to be rejected due to incorrect epoch, otherwise the lock may be + // written to the wrong region. fail::cfg("txn_before_process_write", "pause").unwrap(); req.set_context(cluster.get_ctx(b"key")); let res = thread::spawn(move || client.kv_pessimistic_lock(&req).unwrap()); @@ -979,7 +983,8 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { cluster.split_region(&cluster.get_region(b"key"), b"a", Callback::None); thread::sleep(Duration::from_millis(300)); - // PrewriteResponse should contain an EpochNotMatch instead of PessimisticLockNotFound. + // PrewriteResponse should contain an EpochNotMatch instead of + // PessimisticLockNotFound. fail::remove("txn_before_process_write"); let resp = resp.join().unwrap(); assert!(resp.get_region_error().has_epoch_not_match(), "{:?}", resp); @@ -987,10 +992,11 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { fail::remove("on_split_invalidate_locks"); } -/// Logs are gced asynchronously. If an uninitialized peer is destroyed before being replaced by -/// split, then the asynchronous log gc response may arrive after the peer is replaced, hence -/// it will lead to incorrect memory state. Actually, there is nothing to be gc for uninitialized -/// peer. The case is to guarantee such incorrect state will not happen. +/// Logs are gced asynchronously. If an uninitialized peer is destroyed before +/// being replaced by split, then the asynchronous log gc response may arrive +/// after the peer is replaced, hence it will lead to incorrect memory state. +/// Actually, there is nothing to be gc for uninitialized peer. The case is to +/// guarantee such incorrect state will not happen. #[test] fn test_split_replace_skip_log_gc() { let mut cluster = new_node_cluster(0, 3); @@ -1023,7 +1029,8 @@ fn test_split_replace_skip_log_gc() { cluster.must_put(b"k3", b"v3"); - // Because a is not initialized, so b must be created using heartbeat on store 3. + // Because a is not initialized, so b must be created using heartbeat on store + // 3. // Simulate raft log gc stall. let gc_fp = "worker_gc_raft_log_flush"; diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index b15a43b3d35..f5dadc4205a 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -24,7 +24,8 @@ fn assert_corruption(res: engine_traits::Result) { fn test_sst_recovery_basic() { let (mut cluster, pd_client, engine1) = create_tikv_cluster_with_one_node_damaged(); - // Test that only sst recovery can delete the sst file, remove peer don't delete it. + // Test that only sst recovery can delete the sst file, remove peer don't delete + // it. fail::cfg("sst_recovery_before_delete_files", "pause").unwrap(); let store_meta = cluster.store_metas.get(&1).unwrap().clone(); diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index 0fba036417b..0321772661d 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -25,8 +25,9 @@ fn test_one_node_leader_missing() { let election_timeout = base_tick_interval * 5; cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(election_timeout - base_tick_interval); - // Use large peer check interval, abnormal and max leader missing duration to make a valid config, - // that is election timeout x 2 < peer stale state check < abnormal < max leader missing duration. + // Use large peer check interval, abnormal and max leader missing duration to + // make a valid config, that is election timeout x 2 < peer stale state + // check < abnormal < max leader missing duration. cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration(election_timeout * 3); cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration(election_timeout * 4); @@ -132,7 +133,8 @@ fn test_stale_learner_restart() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } -/// Test if a peer can be destroyed through tombstone msg when applying snapshot. +/// Test if a peer can be destroyed through tombstone msg when applying +/// snapshot. #[test] fn test_stale_peer_destroy_when_apply_snapshot() { let mut cluster = new_node_cluster(0, 3); @@ -208,7 +210,8 @@ fn test_stale_peer_destroy_when_apply_snapshot() { must_get_none(&cluster.get_engine(3), b"k1"); } -/// Test if destroy a uninitialized peer through tombstone msg would allow a staled peer be created again. +/// Test if destroy a uninitialized peer through tombstone msg would allow a +/// staled peer be created again. #[test] fn test_destroy_uninitialized_peer_when_there_exists_old_peer() { // 4 stores cluster. @@ -286,7 +289,8 @@ fn test_destroy_uninitialized_peer_when_there_exists_old_peer() { } /// Logs scan are now moved to raftlog gc threads. The case is to test if logs -/// are still cleaned up when there is stale logs before first index during destroy. +/// are still cleaned up when there is stale logs before first index during +/// destroy. #[test] fn test_destroy_clean_up_logs_with_unfinished_log_gc() { let mut cluster = new_node_cluster(0, 3); @@ -319,8 +323,8 @@ fn test_destroy_clean_up_logs_with_unfinished_log_gc() { must_get_equal(&cluster.get_engine(1), b"k30", b"v30"); fail::remove(fp); - // So peer (3, 3) will be destroyed by gc message. And all stale logs before first - // index should be cleaned up. + // So peer (3, 3) will be destroyed by gc message. And all stale logs before + // first index should be cleaned up. cluster.run_node(3).unwrap(); must_get_none(&cluster.get_engine(3), b"k29"); diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index 6e504e2f834..9a88a73508c 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -338,8 +338,9 @@ fn test_read_index_when_transfer_leader_2() { must_get_equal(&cluster.get_engine(2), b"k0", b"v0"); must_get_equal(&cluster.get_engine(3), b"k0", b"v0"); - // Put and test again to ensure that peer 3 get the latest writes by message append - // instead of snapshot, so that transfer leader to peer 3 can 100% success. + // Put and test again to ensure that peer 3 get the latest writes by message + // append instead of snapshot, so that transfer leader to peer 3 can 100% + // success. cluster.must_put(b"k1", b"v1"); must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); @@ -403,8 +404,8 @@ fn test_read_index_when_transfer_leader_2() { } } - // Resume reserved messages in one batch to make sure the old leader can get read and role - // change in one `Ready`. + // Resume reserved messages in one batch to make sure the old leader can get + // read and role change in one `Ready`. fail::cfg("pause_on_peer_collect_message", "pause").unwrap(); for raft_msg in reserved_msgs { router.send_raft_message(raft_msg).unwrap(); @@ -472,8 +473,9 @@ fn test_read_after_peer_destroyed() { ); } -/// In previous implementation, we suspect the leader lease at the position of `leader_commit_prepare_merge` -/// failpoint when `PrepareMerge` log is committed, which is too late to prevent stale read. +/// In previous implementation, we suspect the leader lease at the position of +/// `leader_commit_prepare_merge` failpoint when `PrepareMerge` log is +/// committed, which is too late to prevent stale read. #[test] fn test_stale_read_during_merging_2() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index c6872d22dab..85dfe054c63 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -479,8 +479,9 @@ fn test_pipelined_pessimistic_lock() { fail::remove(scheduler_async_write_finish_fp); delete_pessimistic_lock(&storage, key.clone(), 50, 50); - // The proposed callback, which is responsible for returning response, is not guaranteed to be - // invoked. In this case it should still be continued properly. + // The proposed callback, which is responsible for returning response, is not + // guaranteed to be invoked. In this case it should still be continued + // properly. fail::cfg(before_pipelined_write_finish_fp, "return()").unwrap(); storage .sched_txn_command( @@ -1336,10 +1337,11 @@ fn test_resolve_lock_deadline() { /// Checks if concurrent transaction works correctly during shutdown. /// -/// During shutdown, all pending writes will fail with error so its latch will be released. -/// Then other writes in the latch queue will be continued to be processed, which can break -/// the correctness of latch: underlying command result is always determined, it should be -/// either always success written or never be written. +/// During shutdown, all pending writes will fail with error so its latch will +/// be released. Then other writes in the latch queue will be continued to be +/// processed, which can break the correctness of latch: underlying command +/// result is always determined, it should be either always success written or +/// never be written. #[test] fn test_mvcc_concurrent_commit_and_rollback_at_shutdown() { let (mut cluster, mut client, mut ctx) = must_new_cluster_and_kv_client_mul(3); @@ -1407,7 +1409,8 @@ fn test_mvcc_concurrent_commit_and_rollback_at_shutdown() { ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); client = TikvClient::new(channel); - // The first request is commit, the second is rollback, the first one should succeed. + // The first request is commit, the second is rollback, the first one should + // succeed. ts += 1; let get_version = ts; let mut get_req = GetRequest::default(); diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 1435fbbe88c..419d923b0d7 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -121,7 +121,8 @@ fn test_snapshot_must_be_later_than_updating_max_ts() { .build() .unwrap(); - // Suppose snapshot was before updating max_ts, after sleeping for 500ms the following prewrite should complete. + // Suppose snapshot was before updating max_ts, after sleeping for 500ms the + // following prewrite should complete. fail::cfg("after-snapshot", "sleep(500)").unwrap(); let read_ts = 20.into(); let get_fut = storage.get(Context::default(), Key::from_raw(b"j"), read_ts); @@ -151,7 +152,8 @@ fn test_snapshot_must_be_later_than_updating_max_ts() { .unwrap(); let has_lock = block_on(get_fut).is_err(); let res = prewrite_rx.recv().unwrap().unwrap(); - // We must make sure either the lock is visible to the reader or min_commit_ts > read_ts. + // We must make sure either the lock is visible to the reader or min_commit_ts > + // read_ts. assert!(res.min_commit_ts > read_ts || has_lock); } @@ -197,10 +199,17 @@ fn test_update_max_ts_before_scan_memory_locks() { assert_eq!(res.min_commit_ts, 101.into()); } -/// Generates a test that checks the correct behavior of holding and dropping locks, -/// during the process of a single prewrite command. +/// Generates a test that checks the correct behavior of holding and dropping +/// locks, during the process of a single prewrite command. macro_rules! lock_release_test { - ($test_name:ident, $lock_exists:ident, $before_actions:expr, $middle_actions:expr, $after_actions:expr, $should_succeed:expr) => { + ( + $test_name:ident, + $lock_exists:ident, + $before_actions:expr, + $middle_actions:expr, + $after_actions:expr, + $should_succeed:expr + ) => { #[test] fn $test_name() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -262,7 +271,8 @@ lock_release_test!( false ); -// Must hold lock until prewrite ends. Must release lock after prewrite succeeds. +// Must hold lock until prewrite ends. Must release lock after prewrite +// succeeds. lock_release_test!( test_lock_lifetime_on_prewrite_success, lock_exists, @@ -395,7 +405,8 @@ fn test_exceed_max_commit_ts_in_the_middle_of_prewrite() { assert_eq!(locks[1].get_key(), b"k2"); assert!(!locks[1].get_use_async_commit()); - // Send a duplicated request to test the idempotency of prewrite when falling back to 2PC. + // Send a duplicated request to test the idempotency of prewrite when falling + // back to 2PC. let (prewrite_tx, prewrite_rx) = channel(); storage .sched_txn_command( @@ -583,7 +594,8 @@ fn test_concurrent_write_after_transfer_leader_invalidates_locks() { let mut req = PrewriteRequest::default(); req.set_context(ctx); req.set_mutations(vec![mutation].into()); - // Set a different start_ts. It should fail because the memory lock is still visible. + // Set a different start_ts. It should fail because the memory lock is still + // visible. req.set_start_version(20); req.set_primary_lock(b"key".to_vec()); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 028ef9f2cef..87b05042a30 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -273,8 +273,8 @@ fn test_read_lock_after_become_follower() { let start_ts = block_on(cluster.pd_client.get_tso()).unwrap(); - // put kv after get start ts, then this commit will cause a PessimisticLockNotFound - // if the pessimistic lock get missing. + // put kv after get start ts, then this commit will cause a + // PessimisticLockNotFound if the pessimistic lock get missing. cluster.must_put(b"key", b"value"); let leader = cluster.leader_of_region(region_id).unwrap(); @@ -334,6 +334,7 @@ fn test_read_lock_after_become_follower() { // Transfer leader will not make the command fail. fail::remove("txn_before_process_write"); let resp = resp_rx.recv().unwrap(); - // The term has changed, so we should get a stale command error instead a PessimisticLockNotFound. + // The term has changed, so we should get a stale command error instead a + // PessimisticLockNotFound. assert!(resp.get_region_error().has_stale_command()); } diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index 290a3561be9..c70ac41d902 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -34,7 +34,8 @@ fn test_unsafe_recovery_send_report() { }) .unwrap(); - // Mannually makes an update, and wait for the apply to be triggered, to simulate "some entries are commited but not applied" scenario. + // Manually makes an update, and wait for the apply to be triggered, to + // simulate "some entries are committed but not applied" scenario. cluster.put(b"random_key2", b"random_val2").unwrap(); apply_triggered_rx .recv_timeout(Duration::from_secs(1)) @@ -88,8 +89,8 @@ fn test_unsafe_recovery_execution_result_report() { cluster.must_transfer_leader(region.get_id(), store2_peer); cluster.put(b"random_key1", b"random_val1").unwrap(); - // Split the region into 2, and remove one of them, so that we can test both region peer list - // update and region creation. + // Split the region into 2, and remove one of them, so that we can test both + // region peer list update and region creation. pd_client.must_split_region( region, pdpb::CheckPolicy::Usekey, @@ -382,8 +383,8 @@ fn test_unsafe_recovery_create_destroy_reentrancy() { cluster.must_transfer_leader(region.get_id(), store2_peer); cluster.put(b"random_key1", b"random_val1").unwrap(); - // Split the region into 2, and remove one of them, so that we can test both region peer list - // update and region creation. + // Split the region into 2, and remove one of them, so that we can test both + // region peer list update and region creation. pd_client.must_split_region( region, pdpb::CheckPolicy::Usekey, diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 2990a983974..ff07d8a712a 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -33,9 +33,11 @@ fn assert_same_file_name(s1: String, s2: String) { fn assert_same_files(mut files1: Vec, mut files2: Vec) { assert_eq!(files1.len(), files2.len()); - // Sort here by start key in case of unordered response (by pipelined write + scan) - // `sort_by_key` couldn't be used here -- rustc would complain that `file.start_key.as_slice()` - // may not live long enough. (Is that a bug of rustc?) + // Sort here by start key in case of unordered response (by pipelined write + + // scan). + // `sort_by_key` couldn't be used here -- rustc would complain that + // `file.start_key.as_slice()` may not live long enough. (Is that a + // bug of rustc?) files1.sort_by(|f1, f2| f1.start_key.cmp(&f2.start_key)); files2.sort_by(|f1, f2| f1.start_key.cmp(&f2.start_key)); @@ -52,7 +54,8 @@ fn assert_same_files(mut files1: Vec, mut files2: Vec 0) when the test failed suite.stop(); diff --git a/tests/integrations/config/test_config_client.rs b/tests/integrations/config/test_config_client.rs index 52cdc9cb012..96299de22a3 100644 --- a/tests/integrations/config/test_config_client.rs +++ b/tests/integrations/config/test_config_client.rs @@ -224,7 +224,8 @@ raft-log-gc-threshold = 2000 ); // config update from config file assert!(cfg_controller.update_from_toml_file().is_ok()); - // after update this configration item should be constant with the modified configuration file + // after update this configuration item should be constant with the modified + // configuration file assert_eq!( cfg_controller .get_current() diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 317e811ec50..69ce131ec8b 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -37,9 +37,10 @@ fn check_chunk_datum_count(chunks: &[Chunk], datum_limit: usize) { } } -/// sort_by sorts the `$v`(a vector of `Vec`) by the $index elements in `Vec` +/// sort_by sorts the `$v`(a vector of `Vec`) by the $index elements in +/// `Vec` macro_rules! sort_by { - ($v:ident, $index:expr, $t:ident) => { + ($v:ident, $index:expr, $t:ident) => { $v.sort_by(|a, b| match (&a[$index], &b[$index]) { (Datum::Null, Datum::Null) => std::cmp::Ordering::Equal, (Datum::$t(a), Datum::$t(b)) => a.cmp(&b), @@ -1732,8 +1733,8 @@ fn test_cache() { // Cache version must be >= 5 because Raft apply index must be >= 5. assert!(cache_version >= 5); - // Send the request again using is_cache_enabled == false (default) and a matching version. - // The request should be processed as usual. + // Send the request again using is_cache_enabled == false (default) and a + // matching version. The request should be processed as usual. let mut req2 = req.clone(); req2.set_cache_if_match_version(cache_version); @@ -1746,8 +1747,8 @@ fn test_cache() { ); assert_eq!(resp.get_data(), resp2.get_data()); - // Send the request again using is_cached_enabled == true and a matching version. - // The request should be skipped. + // Send the request again using is_cached_enabled == true and a matching + // version. The request should be skipped. let mut req3 = req.clone(); req3.set_is_cache_enabled(true); @@ -1757,7 +1758,8 @@ fn test_cache() { assert!(resp3.get_is_cache_hit()); assert!(resp3.get_data().is_empty()); - // Send the request using a non-matching version. The request should be processed. + // Send the request using a non-matching version. The request should be + // processed. let mut req4 = req; req4.set_is_cache_enabled(true); @@ -1775,12 +1777,12 @@ fn test_cache() { #[test] fn test_copr_bypass_or_access_locks() { let data = vec![ - (1, Some("name:1"), 1), /* no lock */ - (2, Some("name:2"), 2), /* bypass lock */ - (3, Some("name:3"), 3), /* access lock(range) */ - (4, Some("name:4"), 4), /* access lock(range) */ - (6, Some("name:6"), 6), /* access lock(point) */ - (8, Some("name:8"), 8), /* not conflict lock */ + (1, Some("name:1"), 1), // no lock + (2, Some("name:2"), 2), // bypass lock + (3, Some("name:3"), 3), // access lock(range) + (4, Some("name:4"), 4), // access lock(range) + (6, Some("name:6"), 6), // access lock(point) + (8, Some("name:8"), 8), // not conflict lock ]; let product = ProductTable::new(); @@ -1894,10 +1896,10 @@ fn test_copr_bypass_or_access_locks() { #[test] fn test_rc_read() { let data = vec![ - (1, Some("name:1"), 1), /* no lock */ - (2, Some("name:2"), 2), /* no lock */ - (3, Some("name:3"), 3), /* update lock */ - (4, Some("name:4"), 4), /* delete lock */ + (1, Some("name:1"), 1), // no lock + (2, Some("name:2"), 2), // no lock + (3, Some("name:3"), 3), // update lock + (4, Some("name:4"), 4), // delete lock ]; let product = ProductTable::new(); diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index 20fc6b70908..3a3967c25a8 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -128,7 +128,7 @@ fn test_rpc_client() { block_on(client.store_heartbeat( pdpb::StoreStats::default(), - /*store_report=*/ None, + None, // store_report None, )) .unwrap(); @@ -353,7 +353,8 @@ fn test_retry_sync() { fn test_not_retry(func: F) { let eps_count = 1; - // NotRetry mocker returns Ok() with error header first, and next returns Ok() without any error header. + // NotRetry mocker returns Ok() with error header first, and next returns Ok() + // without any error header. let not_retry = Arc::new(NotRetry::new()); let server = MockServer::with_case(eps_count, not_retry); let eps = server.bind_addrs(); @@ -586,7 +587,8 @@ fn test_region_heartbeat_on_leader_change() { // Change PD leader once then heartbeat PD. heartbeat_on_leader_change(1); - // Change PD leader twice without update the heartbeat sender, then heartbeat PD. + // Change PD leader twice without update the heartbeat sender, then heartbeat + // PD. heartbeat_on_leader_change(2); } @@ -631,7 +633,7 @@ fn test_cluster_version() { let emit_heartbeat = || { let req = pdpb::StoreStats::default(); - block_on(client.store_heartbeat(req, /*store_report=*/ None, None)).unwrap(); + block_on(client.store_heartbeat(req, /* store_report= */ None, None)).unwrap(); }; let set_cluster_version = |version: &str| { diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index f2019d04ea7..e74f0979241 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -21,10 +21,11 @@ use tikv_util::{ }; fn test_bootstrap_idempotent(cluster: &mut Cluster) { - // assume that there is a node bootstrap the cluster and add region in pd successfully + // assume that there is a node bootstrap the cluster and add region in pd + // successfully cluster.add_first_region().unwrap(); - // now at same time start the another node, and will recive cluster is not bootstrap - // it will try to bootstrap with a new region, but will failed + // now at same time start the another node, and will receive `cluster is not + // bootstrap` it will try to bootstrap with a new region, but will failed // the region number still 1 cluster.start().unwrap(); cluster.check_regions_number(1); @@ -64,11 +65,12 @@ fn test_node_bootstrap_with_prepared_data() { let snap_mgr = SnapManager::new(tmp_mgr.path().to_str().unwrap()); let pd_worker = LazyWorker::new("test-pd-worker"); - // assume there is a node has bootstrapped the cluster and add region in pd successfully + // assume there is a node has bootstrapped the cluster and add region in pd + // successfully bootstrap_with_first_region(Arc::clone(&pd_client)).unwrap(); - // now another node at same time begin bootstrap node, but panic after prepared bootstrap - // now rocksDB must have some prepare data + // now another node at same time begin bootstrap node, but panic after prepared + // bootstrap now rocksDB must have some prepare data bootstrap_store(&engines, 0, 1).unwrap(); let region = node.prepare_bootstrap_cluster(&engines, 1).unwrap(); assert!( diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index 703e49169ef..c8ee96c7c67 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -37,12 +37,14 @@ fn test_compact_lock_cf(cluster: &mut Cluster) { cluster.cfg.rocksdb.lockcf.disable_auto_compactions = true; cluster.run(); - // Write 40 bytes, not reach lock_cf_compact_bytes_threshold, so there is no compaction. + // Write 40 bytes, not reach lock_cf_compact_bytes_threshold, so there is no + // compaction. for i in 0..5 { let (k, v) = (format!("k{}", i), format!("value{}", i)); cluster.must_put_cf(CF_LOCK, k.as_bytes(), v.as_bytes()); } - // Generate one sst, if there are datas only in one memtable, no compactions will be triggered. + // Generate one sst, if there are datas only in one memtable, no compactions + // will be triggered. flush(cluster); // Write more 40 bytes, still not reach lock_cf_compact_bytes_threshold, diff --git a/tests/integrations/raftstore/test_compact_log.rs b/tests/integrations/raftstore/test_compact_log.rs index e7d14a6eb45..bc097dd27e9 100644 --- a/tests/integrations/raftstore/test_compact_log.rs +++ b/tests/integrations/raftstore/test_compact_log.rs @@ -27,7 +27,7 @@ fn test_compact_log(cluster: &mut Cluster) { &cluster.engines, &before_states, 1, - false, /*must_compacted*/ + false, // must_compacted ) { return; @@ -38,7 +38,7 @@ fn test_compact_log(cluster: &mut Cluster) { &cluster.engines, &before_states, 1, - true, /*must_compacted*/ + true, // must_compacted ); } @@ -93,7 +93,7 @@ fn test_compact_count_limit(cluster: &mut Cluster) { &cluster.engines, &before_states, 1, - false, /*must_compacted*/ + false, // must_compacted ) { return; @@ -103,7 +103,7 @@ fn test_compact_count_limit(cluster: &mut Cluster) { &cluster.engines, &before_states, 1, - true, /*must_compacted*/ + true, // must_compacted ); } @@ -140,7 +140,7 @@ fn test_compact_many_times(cluster: &mut Cluster) { &cluster.engines, &before_states, gc_limit * 2, - false, /*must_compacted*/ + false, // must_compacted ) { return; @@ -151,7 +151,7 @@ fn test_compact_many_times(cluster: &mut Cluster) { &cluster.engines, &before_states, gc_limit * 2, - true, /*must_compacted*/ + true, // must_compacted ); } diff --git a/tests/integrations/raftstore/test_conf_change.rs b/tests/integrations/raftstore/test_conf_change.rs index 3778794387a..b37b207ac11 100644 --- a/tests/integrations/raftstore/test_conf_change.rs +++ b/tests/integrations/raftstore/test_conf_change.rs @@ -574,8 +574,8 @@ fn test_conf_change_safe(cluster: &mut Cluster) { cluster.must_put(b"k3", b"v3"); // Ensure the conf change is safe: - // The "RemoveNode" request which asks to remove one healthy node will be rejected - // if there are only 2 healthy nodes in a cluster of 3 nodes. + // The "RemoveNode" request which asks to remove one healthy node will be + // rejected if there are only 2 healthy nodes in a cluster of 3 nodes. pd_client.remove_peer(region_id, new_peer(2, 2)); cluster.must_put(b"k4", b"v4"); pd_client.must_have_peer(region_id, new_peer(2, 2)); @@ -583,7 +583,8 @@ fn test_conf_change_safe(cluster: &mut Cluster) { // In this case, it's fine to remove one unhealthy node. pd_client.must_remove_peer(region_id, new_peer(1, 1)); - // Ensure it works to remove one node from the cluster that has only two healthy nodes. + // Ensure it works to remove one node from the cluster that has only two healthy + // nodes. pd_client.must_remove_peer(region_id, new_peer(2, 2)); } @@ -913,16 +914,17 @@ where #[test] fn test_conf_change_fast() { let mut cluster = new_server_cluster(0, 3); - // Sets heartbeat timeout to more than 5 seconds. It also changes the election timeout, - // but it's OK as the cluster starts with only one peer, it will campaigns immediately. + // Sets heartbeat timeout to more than 5 seconds. It also changes the election + // timeout, but it's OK as the cluster starts with only one peer, it will + // campaigns immediately. configure_for_lease_read(&mut cluster, Some(5000), None); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); cluster.must_put(b"k1", b"v1"); let timer = Instant::now(); - // If conf change relies on heartbeat, it will take more than 5 seconds to finish, - // hence it must timeout. + // If conf change relies on heartbeat, it will take more than 5 seconds to + // finish, hence it must timeout. pd_client.must_add_peer(r1, new_learner_peer(2, 2)); pd_client.must_add_peer(r1, new_peer(2, 2)); must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index 4b9a1e40d8b..a88032671a3 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -22,9 +22,9 @@ fn delete_old_data(engine: &E, id: u64) { ..Default::default() }; engine - .clean(id, 0 /*first_index*/, &state, &mut deleter) + .clean(id, 0 /* first_index */, &state, &mut deleter) .unwrap(); - engine.consume(&mut deleter, true /*sync*/).unwrap(); + engine.consume(&mut deleter, true /* sync */).unwrap(); } /// Allow lost situation. @@ -89,7 +89,7 @@ where delete_old_data(&cluster.get_raft_engine(*id), *id); cluster .get_raft_engine(*id) - .consume(&mut batch, true /*sync*/) + .consume(&mut batch, true /* sync */) .unwrap(); } for id in &ids { @@ -160,7 +160,8 @@ fn test_follower_commit_early_apply() { test_early_apply(DataLost::FollowerCommit) } -/// Tests whether the cluster can recover from all nodes lost their commit index. +/// Tests whether the cluster can recover from all nodes lost their commit +/// index. #[test] fn test_all_node_crash() { test_early_apply(DataLost::AllLost) @@ -202,7 +203,7 @@ fn test_update_internal_apply_index() { delete_old_data(&cluster.get_raft_engine(id), id); cluster .get_raft_engine(id) - .consume(&mut batch, true /*sync*/) + .consume(&mut batch, true /* sync */) .unwrap(); cluster.run_node(id).unwrap(); } diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index 602efc2d9c3..23c859a21bd 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -231,8 +231,9 @@ fn test_transfer_leader_delay() { panic!("failed to request after 3 seconds"); } -/// If a learner is isolated before split and then catch up logs by snapshot, then the -/// range for split learner will be missing on the node until leader is waken. +/// If a learner is isolated before split and then catch up logs by snapshot, +/// then the range for split learner will be missing on the node until leader is +/// waken. #[test] fn test_split_delay() { let mut cluster = new_server_cluster(0, 4); @@ -354,9 +355,9 @@ fn test_inconsistent_configuration() { assert_eq!(cluster.leader_of_region(1), Some(new_peer(3, 3))); } -/// Negotiating hibernation is implemented after 5.0.0, for older version binaries, -/// negotiating can cause connection reset due to new enum type. The test ensures -/// negotiation won't happen until cluster is upgraded. +/// Negotiating hibernation is implemented after 5.0.0, for older version +/// binaries, negotiating can cause connection reset due to new enum type. The +/// test ensures negotiation won't happen until cluster is upgraded. #[test] fn test_hibernate_feature_gate() { let mut cluster = new_node_cluster(0, 3); @@ -405,7 +406,8 @@ fn test_hibernate_feature_gate() { assert!(!awakened.load(Ordering::SeqCst)); } -/// Tests when leader is demoted in a hibernated region, the region can recover automatically. +/// Tests when leader is demoted in a hibernated region, the region can recover +/// automatically. #[test] fn test_leader_demoted_when_hibernated() { let mut cluster = new_node_cluster(0, 4); @@ -489,10 +491,11 @@ fn test_leader_demoted_when_hibernated() { } cluster.clear_send_filters(); - // If there is no leader in the region, the cluster can't write two kvs successfully. - // The first one is possible to succeed if it's committed with the conf change at the - // same time, but the second one can't be committed or accepted because conf change - // should be applied and the leader should be demoted as learner. + // If there is no leader in the region, the cluster can't write two kvs + // successfully. The first one is possible to succeed if it's committed with + // the conf change at the same time, but the second one can't be committed + // or accepted because conf change should be applied and the leader should + // be demoted as learner. cluster.must_put(b"k1", b"v1"); cluster.must_put(b"k2", b"v2"); } diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 140cbb98fcd..ae04c0d12f2 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -22,15 +22,16 @@ use tikv_util::{config::*, time::Instant, HandyRwLock}; // The leader keeps a record of its leader lease, and uses the system's // monotonic raw clocktime to check whether its lease has expired. // If the leader lease has not expired, when the leader receives a read request -// 1. with `read_quorum == false`, the leader will serve it by reading local data. -// This way of handling request is called "lease read". -// 2. with `read_quorum == true`, the leader will serve it by doing index read (see raft's doc). -// This way of handling request is called "index read". -// If the leader lease has expired, leader will serve both kinds of requests by index read, and -// propose an no-op entry to raft quorum to renew the lease. -// No matter what status the leader lease is, a write request is always served by writing a Raft -// log to the Raft quorum. It is called "consistent write". All writes are consistent writes. -// Every time the leader performs a consistent read/write, it will try to renew its lease. +// - with `read_quorum == false`, the leader will serve it by reading local +// data. This way of handling request is called "lease read". +// - with `read_quorum == true`, the leader will serve it by doing index read +// (see raft's doc). This way of handling request is called "index read". +// If the leader lease has expired, leader will serve both kinds of requests by +// index read, and propose an no-op entry to raft quorum to renew the lease. +// No matter what status the leader lease is, a write request is always served +// by writing a Raft log to the Raft quorum. It is called "consistent write". +// All writes are consistent writes. Every time the leader performs a consistent +// read/write, it will try to renew its lease. fn test_renew_lease(cluster: &mut Cluster) { // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; @@ -163,11 +164,12 @@ fn test_node_lease_expired() { test_lease_expired(&mut cluster); } -// A helper function for testing the leader holds unsafe lease during the leader transfer -// procedure, so it will not do lease read. -// Since raft will not propose any request during leader transfer procedure, consistent read/write -// could not be performed neither. -// When leader transfer procedure aborts later, the leader would use and update the lease as usual. +// A helper function for testing the leader holds unsafe lease during the leader +// transfer procedure, so it will not do lease read. +// Since raft will not propose any request during leader transfer procedure, +// consistent read/write could not be performed neither. +// When leader transfer procedure aborts later, the leader would use and update +// the lease as usual. fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster) { // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; @@ -215,7 +217,8 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster // Ensure peer 3 is ready to transfer leader. must_get_equal(&cluster.get_engine(3), key, b"v1"); - // Drop MsgTimeoutNow to `peer3` so that the leader transfer procedure would abort later. + // Drop MsgTimeoutNow to `peer3` so that the leader transfer procedure would + // abort later. cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(region_id, peer3_store_id) .msg_type(MessageType::MsgTimeoutNow) @@ -225,7 +228,8 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster // Issue a transfer leader request to transfer leader from `peer` to `peer3`. cluster.transfer_leader(region_id, peer3); - // Delay a while to ensure transfer leader procedure is triggered inside raft module. + // Delay a while to ensure transfer leader procedure is triggered inside raft + // module. thread::sleep(election_timeout / 2); // Issue a read request and it will fall back to read index. @@ -239,8 +243,8 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster // Make sure the leader transfer procedure timeouts. thread::sleep(election_timeout * 2); - // Then the leader transfer procedure aborts, now the leader could do lease read or consistent - // read/write and renew/reuse the lease as usual. + // Then the leader transfer procedure aborts, now the leader could do lease read + // or consistent read/write and renew/reuse the lease as usual. // Issue a read request and check the value on response. must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); @@ -354,7 +358,8 @@ fn test_batch_id_in_lease(cluster: &mut Cluster) { }) .collect(); - // Snapshot 0 and 1 will use one RocksSnapshot because we have renew their lease. + // Snapshot 0 and 1 will use one RocksSnapshot because we have renew their + // lease. assert!(std::ptr::eq( snaps[0].get_snapshot(), snaps[1].get_snapshot() @@ -389,8 +394,9 @@ fn test_batch_id_in_lease(cluster: &mut Cluster) { )); } -/// test whether the read index callback will be handled when a region is destroyed. -/// If it's not handled properly, it will cause dead lock in transaction scheduler. +/// test whether the read index callback will be handled when a region is +/// destroyed. If it's not handled properly, it will cause dead lock in +/// transaction scheduler. #[test] fn test_node_callback_when_destroyed() { let count = 3; @@ -465,9 +471,10 @@ fn test_lease_read_callback_destroy() { cluster.must_put(b"k2", b"v2"); } -/// A read index request will be appended to waiting list when there is an on-going request -/// to reduce heartbeat messages. But when leader is in suspect lease, requests should not -/// be batched because lease can be expired at anytime. +/// A read index request will be appended to waiting list when there is an +/// on-going request to reduce heartbeat messages. But when leader is in suspect +/// lease, requests should not be batched because lease can be expired at +/// anytime. #[test] fn test_read_index_stale_in_suspect_lease() { let mut cluster = new_node_cluster(0, 3); @@ -485,8 +492,9 @@ fn test_read_index_stale_in_suspect_lease() { cluster.pd_client.must_add_peer(r1, new_peer(3, 3)); let r1 = cluster.get_region(b"k1"); - // Put and test again to ensure that peer 3 get the latest writes by message append - // instead of snapshot, so that transfer leader to peer 3 can 100% success. + // Put and test again to ensure that peer 3 get the latest writes by message + // append instead of snapshot, so that transfer leader to peer 3 can 100% + // success. cluster.must_put(b"k1", b"v1"); must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); cluster.must_put(b"k2", b"v2"); @@ -650,15 +658,16 @@ fn test_not_leader_read_lease() { } /// Test whether read index is greater than applied index. -/// 1. Add hearbeat msg filter. +/// 1. Add heartbeat msg filter. /// 2. Propose a read index request. /// 3. Put a key and get the latest applied index. /// 4. Propose another read index request. -/// 5. Remove the filter and check whether the latter read index is greater than applied index. +/// 5. Remove the filter and check whether the latter read index is greater than +/// applied index. /// /// In previous implementation, these two read index request will be batched and -/// will get the same read index which breaks the correctness because the latter one -/// is proposed after the applied index has increased and replied to client. +/// will get the same read index which breaks the correctness because the latter +/// one is proposed after the applied index has increased and replied to client. #[test] fn test_read_index_after_write() { let mut cluster = new_node_cluster(0, 3); @@ -675,7 +684,8 @@ fn test_read_index_after_write() { cluster.must_transfer_leader(region.get_id(), region_on_store1.clone()); cluster.add_send_filter(IsolationFilterFactory::new(3)); - // Add heartbeat msg filter to prevent the leader to reply the read index response. + // Add heartbeat msg filter to prevent the leader to reply the read index + // response. let filter = Box::new( RegionPacketFilter::new(region.get_id(), 2) .direction(Direction::Recv) @@ -766,7 +776,8 @@ fn test_infinite_lease() { assert_eq!(cluster.leader_of_region(region_id), Some(peer)); assert_eq!(detector.ctx.rl().len(), 1); - // renew-lease-tick shouldn't propose any request if the leader lease is not expired. + // renew-lease-tick shouldn't propose any request if the leader lease is not + // expired. for _ in 0..4 { cluster.must_put(key, b"v0"); thread::sleep(max_lease / 4); @@ -774,8 +785,8 @@ fn test_infinite_lease() { assert_eq!(detector.ctx.rl().len(), 1); } -// LocalReader will try to renew lease in advance, so the region that has continuous reads -// should not go to hibernate. +// LocalReader will try to renew lease in advance, so the region that has +// continuous reads should not go to hibernate. #[test] fn test_node_local_read_renew_lease() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 1146e152681..9cff738fdfe 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -205,7 +205,8 @@ fn test_node_merge_prerequisites_check() { cluster.must_transfer_leader(right.get_id(), right_on_store1); // first MsgAppend will append log, second MsgAppend will set commit index, - // So only allowing first MsgAppend to make source peer have uncommitted entries. + // So only allowing first MsgAppend to make source peer have uncommitted + // entries. cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(left.get_id(), 3) .direction(Direction::Recv) @@ -334,7 +335,8 @@ fn test_node_merge_slow_split_left() { test_node_merge_slow_split(false); } -// Test if a merge handled properly when there is a unfinished slow split before merge. +// Test if a merge handled properly when there is a unfinished slow split before +// merge. fn test_node_merge_slow_split(is_right_derive: bool) { let mut cluster = new_node_cluster(0, 3); configure_for_merge(&mut cluster); @@ -635,7 +637,8 @@ fn test_merge_approximate_size_and_keys() { keys ); - // after merge and then transfer leader, if not update new leader's approximate size, it maybe be stale. + // after merge and then transfer leader, if not update new leader's approximate + // size, it maybe be stale. cluster.must_transfer_leader(region.get_id(), region.get_peers()[0].clone()); // make sure split check is invoked thread::sleep(Duration::from_millis(100)); @@ -731,7 +734,8 @@ fn test_node_merge_update_region() { assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v3"); } -/// Test if merge is working properly when merge entries is empty but commit index is not updated. +/// Test if merge is working properly when merge entries is empty but commit +/// index is not updated. #[test] fn test_node_merge_catch_up_logs_empty_entries() { let mut cluster = new_node_cluster(0, 3); @@ -754,20 +758,23 @@ fn test_node_merge_catch_up_logs_empty_entries() { must_get_equal(&cluster.get_engine(3), b"k0", b"v0"); // first MsgAppend will append log, second MsgAppend will set commit index, - // So only allowing first MsgAppend to make source peer have uncommitted entries. + // So only allowing first MsgAppend to make source peer have uncommitted + // entries. cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(left.get_id(), 3) .direction(Direction::Recv) .msg_type(MessageType::MsgAppend) .allow(1), )); - // make the source peer have no way to know the uncommitted entries can be applied from heartbeat. + // make the source peer have no way to know the uncommitted entries can be + // applied from heartbeat. cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(left.get_id(), 3) .msg_type(MessageType::MsgHeartbeat) .direction(Direction::Recv), )); - // make the source peer have no way to know the uncommitted entries can be applied from target region. + // make the source peer have no way to know the uncommitted entries can be + // applied from target region. cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(right.get_id(), 3) .msg_type(MessageType::MsgAppend) @@ -820,11 +827,12 @@ fn test_merge_with_slow_promote() { /// Test whether a isolated store recover properly if there is no target peer /// on this store before isolated. -/// A (-∞, k2), B [k2, +∞) on store 1,2,4 -/// store 4 is isolated -/// B merge to A (target peer A is not created on store 4. It‘s just exist logically) -/// A split => C (-∞, k3), A [k3, +∞) -/// Then network recovery +/// - A (-∞, k2), B [k2, +∞) on store 1,2,4 +/// - store 4 is isolated +/// - B merge to A (target peer A is not created on store 4. It‘s just exist +/// logically) +/// - A split => C (-∞, k3), A [k3, +∞) +/// - Then network recovery #[test] fn test_merge_isolated_store_with_no_target_peer() { let mut cluster = new_node_cluster(0, 4); @@ -882,7 +890,8 @@ fn test_merge_isolated_store_with_no_target_peer() { must_get_equal(&cluster.get_engine(4), b"k345", b"v345"); } -/// Test whether a isolated peer can recover when two other regions merge to its region +/// Test whether a isolated peer can recover when two other regions merge to its +/// region #[test] fn test_merge_cascade_merge_isolated() { let mut cluster = new_node_cluster(0, 3); @@ -932,8 +941,8 @@ fn test_merge_cascade_merge_isolated() { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } -// Test if a learner can be destroyed properly when it's isolated and removed by conf change -// before its region merge to another region +// Test if a learner can be destroyed properly when it's isolated and removed by +// conf change before its region merge to another region #[test] fn test_merge_isolated_not_in_merge_learner() { let mut cluster = new_node_cluster(0, 3); @@ -967,7 +976,8 @@ fn test_merge_isolated_not_in_merge_learner() { pd_client.must_remove_peer(right.get_id(), right_on_store1); pd_client.must_merge(left.get_id(), right.get_id()); - // Add a new learner on store 2 to trigger peer 2 send check-stale-peer msg to other peers + // Add a new learner on store 2 to trigger peer 2 send check-stale-peer msg to + // other peers pd_client.must_add_peer(right.get_id(), new_learner_peer(2, 5)); cluster.must_put(b"k123", b"v123"); @@ -977,8 +987,8 @@ fn test_merge_isolated_not_in_merge_learner() { must_get_equal(&cluster.get_engine(2), b"k123", b"v123"); } -// Test if a learner can be destroyed properly when it's isolated and removed by conf change -// before another region merge to its region +// Test if a learner can be destroyed properly when it's isolated and removed by +// conf change before another region merge to its region #[test] fn test_merge_isolated_stale_learner() { let mut cluster = new_node_cluster(0, 3); @@ -1015,7 +1025,8 @@ fn test_merge_isolated_stale_learner() { let new_left = pd_client.get_region(b"k1").unwrap(); assert_ne!(left.get_id(), new_left.get_id()); - // Add a new learner on store 2 to trigger peer 2 send check-stale-peer msg to other peers + // Add a new learner on store 2 to trigger peer 2 send check-stale-peer msg to + // other peers pd_client.must_add_peer(new_left.get_id(), new_learner_peer(2, 5)); cluster.must_put(b"k123", b"v123"); @@ -1064,15 +1075,16 @@ fn test_merge_isolated_not_in_merge_learner_2() { pd_client.must_merge(left.get_id(), right.get_id()); cluster.run_node(2).unwrap(); - // When the abnormal leader missing duration has passed, the check-stale-peer msg will be sent to peer 1001. - // After that, a new peer list will be returned (2, 2) (3, 3). - // Then peer 2 sends the check-stale-peer msg to peer 3 and it will get a tombstone response. - // Finally peer 2 will be destroyed. + // When the abnormal leader missing duration has passed, the check-stale-peer + // msg will be sent to peer 1001. After that, a new peer list will be + // returned (2, 2) (3, 3). Then peer 2 sends the check-stale-peer msg to + // peer 3 and it will get a tombstone response. Finally peer 2 will be + // destroyed. must_get_none(&cluster.get_engine(2), b"k1"); } -/// Test if a peer can be removed if its target peer has been removed and doesn't apply the -/// CommitMerge log. +/// Test if a peer can be removed if its target peer has been removed and +/// doesn't apply the CommitMerge log. #[test] fn test_merge_remove_target_peer_isolated() { let mut cluster = new_node_cluster(0, 4); @@ -1109,7 +1121,8 @@ fn test_merge_remove_target_peer_isolated() { cluster.add_send_filter(IsolationFilterFactory::new(3)); // Make region r2's epoch > r2 peer on store 3. - // r2 peer on store 3 will be removed whose epoch is staler than the epoch when r1 merge to r2. + // r2 peer on store 3 will be removed whose epoch is staler than the epoch when + // r1 merge to r2. pd_client.must_add_peer(r2.get_id(), new_peer(4, 4)); pd_client.must_remove_peer(r2.get_id(), new_peer(4, 4)); @@ -1191,8 +1204,8 @@ fn test_sync_max_ts_after_region_merge() { assert!(new_max_ts > max_ts); } -/// If a follower is demoted by a snapshot, its meta will be changed. The case is to ensure -/// asserts in code can tolerate the change. +/// If a follower is demoted by a snapshot, its meta will be changed. The case +/// is to ensure asserts in code can tolerate the change. #[test] fn test_merge_snapshot_demote() { let mut cluster = new_node_cluster(0, 4); @@ -1267,8 +1280,8 @@ fn test_propose_in_memory_pessimistic_locks() { let left = cluster.get_region(b"k1"); let right = cluster.get_region(b"k3"); - // Transfer the leader of the right region to store 2. The leaders of source and target - // regions don't need to be on the same store. + // Transfer the leader of the right region to store 2. The leaders of source and + // target regions don't need to be on the same store. cluster.must_transfer_leader(right.id, new_peer(2, 2)); // Insert lock l1 into the left region @@ -1310,8 +1323,8 @@ fn test_propose_in_memory_pessimistic_locks() { // Merge left region into the right region pd_client.must_merge(left.id, right.id); - // After the left region is merged into the right region, its pessimistic locks should be - // proposed and applied to the storage. + // After the left region is merged into the right region, its pessimistic locks + // should be proposed and applied to the storage. let snapshot = cluster.must_get_snapshot_of_region(right.id); let value = snapshot .get_cf(CF_LOCK, &Key::from_raw(b"k1")) @@ -1334,7 +1347,8 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { configure_for_merge(&mut cluster); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; - // Set raft_entry_max_size to 64 KiB. We will try to make the gap larger than the limit later. + // Set raft_entry_max_size to 64 KiB. We will try to make the gap larger than + // the limit later. cluster.cfg.raft_store.raft_entry_max_size = ReadableSize::kb(64); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1367,8 +1381,8 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { cluster.merge_region(left.id, right.id, Callback::None); thread::sleep(Duration::from_millis(150)); - // The gap is too large, so the previous merge should fail. And this new put request - // should be allowed. + // The gap is too large, so the previous merge should fail. And this new put + // request should be allowed. let res = cluster.async_put(b"k1", b"new_val").unwrap(); cluster.clear_send_filters(); @@ -1442,8 +1456,8 @@ fn test_merge_pessimistic_locks_repeated_merge() { assert_eq!(value, lock.into_lock().to_bytes()); } -/// Check if merge is cleaned up if the merge target is destroyed several times before it's ever -/// scheduled. +/// Check if merge is cleaned up if the merge target is destroyed several times +/// before it's ever scheduled. #[test] fn test_node_merge_long_isolated() { let mut cluster = new_node_cluster(0, 3); @@ -1478,7 +1492,8 @@ fn test_node_merge_long_isolated() { let right = pd_client.get_region(b"k1").unwrap(); cluster.must_split(&right, b"k2"); cluster.must_put(b"k4", b"v4"); - // Ensure the node is removed, so it will not catch up any logs but just destroy itself. + // Ensure the node is removed, so it will not catch up any logs but just destroy + // itself. must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); must_get_equal(&cluster.get_engine(2), b"k4", b"v4"); @@ -1527,14 +1542,19 @@ fn test_stale_message_after_merge() { pd_client.must_add_peer(left.get_id(), new_peer(3, 1004)); pd_client.must_merge(left.get_id(), right.get_id()); - // Such stale message can be sent due to network error, consider the following example: - // 1. Store 1 and Store 3 can't reach each other, so peer 1003 start election and send `RequestVote` - // message to peer 1001, and fail due to network error, but this message is keep backoff-retry to send out - // 2. Peer 1002 become the new leader and remove peer 1003 and add peer 1004 on store 3, then the region is - // merged into other region, the merge can success because peer 1002 can reach both peer 1001 and peer 1004 - // 3. Network recover, so peer 1003's `RequestVote` message is sent to peer 1001 after it is merged + // Such stale message can be sent due to network error, consider the following + // example: + // - Store 1 and Store 3 can't reach each other, so peer 1003 + // start election and send `RequestVote` message to peer 1001, and fail + // due to network error, but this message is keep backoff-retry to send out + // - Peer 1002 become the new leader and remove peer 1003 and add peer 1004 on + // store 3, then the region is merged into other region, the merge can + // success because peer 1002 can reach both peer 1001 and peer 1004 + // - Network recover, so peer 1003's `RequestVote` message is sent to peer 1001 + // after it is merged // - // the backoff-retry of a stale message is hard to simulated in test, so here just send this stale message directly + // the backoff-retry of a stale message is hard to simulated in test, so here + // just send this stale message directly let mut raft_msg = RaftMessage::default(); raft_msg.set_region_id(left.get_id()); raft_msg.set_from_peer(find_peer(&left, 3).unwrap().to_owned()); @@ -1546,7 +1566,8 @@ fn test_stale_message_after_merge() { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } -/// Check whether merge should be prevented if follower may not have enough logs. +/// Check whether merge should be prevented if follower may not have enough +/// logs. #[test] fn test_prepare_merge_with_reset_matched() { let mut cluster = new_server_cluster(0, 3); @@ -1586,15 +1607,16 @@ fn test_prepare_merge_with_reset_matched() { cluster.must_transfer_leader(left.get_id(), left_on_store1); let res = cluster.try_merge(left.get_id(), right.get_id()); // Now leader still knows peer(2, 2) has committed i0 - 1, so the min_match will - // become i0 - 1. But i0 - 1 is not a safe index as peer(3, 3) starts from i0 + 1. + // become i0 - 1. But i0 - 1 is not a safe index as peer(3, 3) starts from i0 + + // 1. assert!(res.get_header().has_error(), "{:?}", res); cluster.clear_send_filters(); // Now leader should replicate more logs and figure out a safe index. pd_client.must_merge(left.get_id(), right.get_id()); } -/// Check if prepare merge min index is chosen correctly even if all match indexes are -/// correct. +/// Check if prepare merge min index is chosen correctly even if all match +/// indexes are correct. #[test] fn test_prepare_merge_with_5_nodes_snapshot() { let mut cluster = new_server_cluster(0, 5); @@ -1634,8 +1656,8 @@ fn test_prepare_merge_with_5_nodes_snapshot() { cluster.add_send_filter(IsolationFilterFactory::new(4)); must_get_equal(&cluster.get_engine(5), b"k13", b"v13"); let res = cluster.try_merge(left.get_id(), right.get_id()); - // min_matched from peer 4 is beyond the first index of peer 5, it should not be chosen - // for prepare merge. + // min_matched from peer 4 is beyond the first index of peer 5, it should not be + // chosen for prepare merge. assert!(res.get_header().has_error(), "{:?}", res); cluster.clear_send_filters(); // Now leader should replicate more logs and figure out a safe index. diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index 296d6f207cf..d7c527b5fd9 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -324,8 +324,9 @@ fn test_leader_change_with_uncommitted_log(cluster: &mut Cluster(cluster: &mut Cluster) { // guarantee peer 1 is leader cluster.must_transfer_leader(1, new_peer(1, 1)); - // if peer 2 is unreachable, leader will not send MsgAppend to peer 2, and the leader will - // send MsgAppend with committed information to peer 2 after network recovered, and peer 2 - // will apply the entry regardless of we add an filter, so we put k0/v0 to make sure the - // network is reachable. + // if peer 2 is unreachable, leader will not send MsgAppend to peer 2, and the + // leader will send MsgAppend with committed information to peer 2 after + // network recovered, and peer 2 will apply the entry regardless of we add + // an filter, so we put k0/v0 to make sure the network is reachable. let (k0, v0) = (b"k0", b"v0"); cluster.must_put(k0, v0); @@ -507,8 +508,9 @@ fn test_read_leader_with_unapplied_log(cluster: &mut Cluster) { must_get_equal(&cluster.get_engine(i), k0, v0); } - // hack: first MsgAppend will append log, second MsgAppend will set commit index, - // So only allowing first MsgAppend to make peer 2 have uncommitted entries. + // hack: first MsgAppend will append log, second MsgAppend will set commit + // index, So only allowing first MsgAppend to make peer 2 have uncommitted + // entries. cluster.add_send_filter(CloneFilterFactory( RegionPacketFilter::new(1, 2) .msg_type(MessageType::MsgAppend) @@ -540,12 +542,13 @@ fn test_read_leader_with_unapplied_log(cluster: &mut Cluster) { cluster.must_transfer_leader(1, util::new_peer(2, 2)); - // leader's term not equal applied index's term, if we read local, we may get old value - // in this situation we need use raft read + // leader's term not equal applied index's term, if we read local, we may get + // old value in this situation we need use raft read must_get_none(&cluster.get_engine(2), k); - // internal read will use raft read no matter read_quorum is false or true, cause applied - // index's term not equal leader's term, and will failed with timeout + // internal read will use raft read no matter read_quorum is false or true, + // cause applied index's term not equal leader's term, and will failed with + // timeout let req = get_with_timeout(cluster, k, false, Duration::from_secs(10)).unwrap(); assert!( req.get_header().get_error().has_stale_command(), @@ -691,8 +694,8 @@ fn test_node_dropped_proposal() { ); put_req.mut_header().set_peer(new_peer(1, 1)); // peer (3, 3) won't become leader and transfer leader request will be canceled - // after about an election timeout. Before it's canceled, all proposal will be dropped - // silently. + // after about an election timeout. Before it's canceled, all proposal will be + // dropped silently. cluster.transfer_leader(1, new_peer(3, 3)); let (tx, rx) = mpsc::channel(); @@ -841,7 +844,8 @@ fn test_leader_drop_with_pessimistic_lock() { cluster.must_put(b"k1", b"v1"); assert_ne!(cluster.leader_of_region(1).unwrap().id, 1); - // When peer 1 becomes leader again, the pessimistic locks should be cleared before. + // When peer 1 becomes leader again, the pessimistic locks should be cleared + // before. cluster.clear_send_filters(); cluster.must_transfer_leader(1, new_peer(1, 1)); assert!(txn_ext.pessimistic_locks.read().is_empty()); diff --git a/tests/integrations/raftstore/test_prevote.rs b/tests/integrations/raftstore/test_prevote.rs index 6128e8e7dbf..a4336e9f3ed 100644 --- a/tests/integrations/raftstore/test_prevote.rs +++ b/tests/integrations/raftstore/test_prevote.rs @@ -35,7 +35,8 @@ fn attach_prevote_notifiers(cluster: &Cluster, peer: u64) -> mp rx } -// Validate that prevote is used in elections after partition or reboot of some nodes. +// Validate that prevote is used in elections after partition or reboot of some +// nodes. fn test_prevote( cluster: &mut Cluster, failure_type: FailureType<'_>, @@ -44,8 +45,8 @@ fn test_prevote( detect_during_recovery: impl Into>, ) { cluster.cfg.raft_store.prevote = true; - // Disable this feature because the test could run slow, in which case peers shouldn't - // hibernate, otherwise it's possible to detect no vote messages. + // Disable this feature because the test could run slow, in which case peers + // shouldn't hibernate, otherwise it's possible to detect no vote messages. cluster.cfg.raft_store.hibernate_regions = false; // To stable the test, we use a large election timeout to make // leader's readiness get handle within an election timeout @@ -149,8 +150,8 @@ fn test_prevote_partition_leader_in_majority_detect_in_majority() { #[test] fn test_prevote_partition_leader_in_majority_detect_in_minority() { let mut cluster = new_node_cluster(0, 5); - // The follower is in the minority and is part of a prevote process. On rejoin it adopts the - // old leader. + // The follower is in the minority and is part of a prevote process. On rejoin + // it adopts the old leader. test_prevote( &mut cluster, FailureType::Partition(&[1, 2, 3], &[4, 5]), @@ -164,8 +165,8 @@ fn test_prevote_partition_leader_in_majority_detect_in_minority() { #[test] fn test_prevote_partition_leader_in_minority_detect_in_majority() { let mut cluster = new_node_cluster(0, 5); - // The follower is in the minority and is part of a prevote process. On rejoin it adopts the - // old leader. + // The follower is in the minority and is part of a prevote process. On rejoin + // it adopts the old leader. test_prevote( &mut cluster, FailureType::Partition(&[1, 2], &[3, 4, 5]), @@ -179,8 +180,8 @@ fn test_prevote_partition_leader_in_minority_detect_in_majority() { #[test] fn test_prevote_partition_leader_in_minority_detect_in_minority() { let mut cluster = new_node_cluster(0, 5); - // The follower is in the minority and is part of a prevote process. On rejoin it adopts the - // old leader. + // The follower is in the minority and is part of a prevote process. On rejoin + // it adopts the old leader. test_prevote( &mut cluster, FailureType::Partition(&[1, 2, 3], &[3, 4, 5]), @@ -216,18 +217,21 @@ fn test_prevote_reboot_minority_followers() { ); } -// Test isolating a minority of the cluster and make sure that the remove themselves. +// Test isolating a minority of the cluster and make sure that the remove +// themselves. fn test_pair_isolated(cluster: &mut Cluster) { let region = 1; let pd_client = Arc::clone(&cluster.pd_client); - // Given some nodes A, B, C, D, E, we partition the cluster such that D, E are isolated from the rest. + // Given some nodes A, B, C, D, E, we partition the cluster such that D, E are + // isolated from the rest. cluster.run(); // Choose a predictable leader so we don't accidentally partition the leader. cluster.must_transfer_leader(region, new_peer(1, 1)); cluster.partition(vec![1, 2, 3], vec![4, 5]); - // Then, add a policy to PD that it should ask the Raft leader to remove the peer from the group. + // Then, add a policy to PD that it should ask the Raft leader to remove the + // peer from the group. pd_client.must_remove_peer(region, new_peer(4, 4)); pd_client.must_remove_peer(region, new_peer(5, 5)); diff --git a/tests/integrations/raftstore/test_region_change_observer.rs b/tests/integrations/raftstore/test_region_change_observer.rs index 3a1437e1868..261b1f2370e 100644 --- a/tests/integrations/raftstore/test_region_change_observer.rs +++ b/tests/integrations/raftstore/test_region_change_observer.rs @@ -97,7 +97,8 @@ fn test_region_change_observer_impl(mut cluster: Cluster) { cluster.must_split(&add_peer_event.0, b"k2"); let mut split_update = receiver.recv().unwrap(); let mut split_create = receiver.recv().unwrap(); - // We should receive an `Update` and a `Create`. The order of them is not important. + // We should receive an `Update` and a `Create`. The order of them is not + // important. if split_update.1 != RegionChangeEvent::Update(RegionChangeReason::Split) { mem::swap(&mut split_update, &mut split_create); } @@ -135,7 +136,8 @@ fn test_region_change_observer_impl(mut cluster: Cluster) { ); let mut merge_update = receiver.recv().unwrap(); let mut merge_destroy = receiver.recv().unwrap(); - // We should receive an `Update` and a `Destroy`. The order of them is not important. + // We should receive an `Update` and a `Destroy`. The order of them is not + // important. if merge_update.1 != RegionChangeEvent::Update(RegionChangeReason::CommitMerge) { mem::swap(&mut merge_update, &mut merge_destroy); } diff --git a/tests/integrations/raftstore/test_region_heartbeat.rs b/tests/integrations/raftstore/test_region_heartbeat.rs index b558f0800e7..117c10a3d19 100644 --- a/tests/integrations/raftstore/test_region_heartbeat.rs +++ b/tests/integrations/raftstore/test_region_heartbeat.rs @@ -47,9 +47,9 @@ fn test_down_peers(cluster: &mut Cluster) { cluster.stop_node(1); cluster.must_put(b"k1", b"v1"); - // max peer down duration is 500 millis, but we only report down time in seconds, - // so sleep 1 second to make the old down second is always larger than new down second - // by at lease 1 second. + // max peer down duration is 500 millis, but we only report down time in + // seconds, so sleep 1 second to make the old down second is always larger + // than new down second by at lease 1 second. sleep_ms(1000); wait_down_peers(cluster, 1, Some(1)); diff --git a/tests/integrations/raftstore/test_region_info_accessor.rs b/tests/integrations/raftstore/test_region_info_accessor.rs index 45df18d01a2..838e2ea492c 100644 --- a/tests/integrations/raftstore/test_region_info_accessor.rs +++ b/tests/integrations/raftstore/test_region_info_accessor.rs @@ -190,7 +190,8 @@ fn test_node_cluster_region_info_accessor() { })); cluster.run_conf_change(); let c = rx.recv().unwrap(); - // We only created it on the node whose id == 1 so we shouldn't receive more than one item. + // We only created it on the node whose id == 1 so we shouldn't receive more + // than one item. assert!(rx.try_recv().is_err()); test_region_info_accessor_impl(&mut cluster, &c); diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 45e17ae37cf..8961008d4a5 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -61,10 +61,10 @@ fn test_replica_read_not_applied() { configure_for_lease_read(&mut cluster, Some(50), Some(30)); let max_lease = Duration::from_secs(1); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); - // After the leader has committed to its term, pending reads on followers can be responsed. - // However followers can receive `ReadIndexResp` after become candidate if the leader has - // hibernated. So, disable the feature to avoid read requests on followers to be cleared as - // stale. + // After the leader has committed to its term, pending reads on followers can be + // responsed. However followers can receive `ReadIndexResp` after become + // candidate if the leader has hibernated. So, disable the feature to avoid + // read requests on followers to be cleared as stale. cluster.cfg.raft_store.hibernate_regions = false; cluster.pd_client.disable_default_operator(); @@ -103,13 +103,15 @@ fn test_replica_read_not_applied() { let resp1_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k1", true, true); assert!(resp1_ch.recv_timeout(Duration::from_secs(1)).is_err()); - // Unpark all append responses so that the new leader can commit its first entry. + // Unpark all append responses so that the new leader can commit its first + // entry. let router = cluster.sim.wl().get_router(2).unwrap(); for raft_msg in mem::take::>(dropped_msgs.lock().unwrap().as_mut()) { router.send_raft_message(raft_msg).unwrap(); } - // The old read index request won't be blocked forever as it's retried internally. + // The old read index request won't be blocked forever as it's retried + // internally. cluster.sim.wl().clear_send_filters(1); cluster.sim.wl().clear_recv_filters(2); let resp1 = resp1_ch.recv_timeout(Duration::from_secs(6)).unwrap(); @@ -128,8 +130,6 @@ fn test_replica_read_on_hibernate() { let mut cluster = new_node_cluster(0, 3); configure_for_lease_read(&mut cluster, Some(50), Some(20)); - // let max_lease = Duration::from_secs(2); - // cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); cluster.pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); @@ -418,7 +418,8 @@ fn test_split_isolation() { let r1 = cluster.get_region(b"k2"); cluster.must_split(&r1, b"k2"); let idx = cluster.truncated_state(1, 1).get_index(); - // Trigger a log compaction, so the left region ['', 'k2'] cannot created through split cmd. + // Trigger a log compaction, so the left region ['', 'k2'] cannot created + // through split cmd. for i in 2..cluster.cfg.raft_store.raft_log_gc_count_limit() * 2 { cluster.must_put(format!("k{}", i).as_bytes(), format!("v{}", i).as_bytes()); } @@ -439,7 +440,8 @@ fn test_split_isolation() { } let peer = peer.unwrap(); cluster.run_node(2).unwrap(); - // Originally leader of region ['', 'k2'] will go to sleep, so the learner peer cannot be created. + // Originally leader of region ['', 'k2'] will go to sleep, so the learner peer + // cannot be created. for _ in 0..10 { let resp = async_read_on_peer(&mut cluster, peer.clone(), r2.clone(), b"k1", true, true); let resp = resp.recv_timeout(Duration::from_secs(1)).unwrap(); @@ -451,8 +453,9 @@ fn test_split_isolation() { panic!("test failed"); } -/// Testing after applying snapshot, the `ReadDelegate` stored at `StoreMeta` will be replace with -/// the new `ReadDelegate`, and the `ReadDelegate` stored at `LocalReader` should also be updated +/// Testing after applying snapshot, the `ReadDelegate` stored at `StoreMeta` +/// will be replace with the new `ReadDelegate`, and the `ReadDelegate` stored +/// at `LocalReader` should also be updated #[test] fn test_read_local_after_snapshpot_replace_peer() { let mut cluster = new_node_cluster(0, 3); @@ -472,8 +475,9 @@ fn test_read_local_after_snapshpot_replace_peer() { must_get_equal(&cluster.get_engine(i), b"k1", b"v1"); } - // send read request to peer 3, so the local reader will cache the `ReadDelegate` of peer 3 - // it is okey only send one request because the read pool thread count is 1 + // send read request to peer 3, so the local reader will cache the + // `ReadDelegate` of peer 3 it is okay only send one request because the + // read pool thread count is 1 let r = cluster.get_region(b"k1"); // wait applying snapshot finish sleep_ms(100); @@ -516,7 +520,8 @@ fn test_read_local_after_snapshpot_replace_peer() { assert_eq!(exp_value, b"v3"); } -/// The case checks if a malformed request should not corrupt the leader's read queue. +/// The case checks if a malformed request should not corrupt the leader's read +/// queue. #[test] fn test_malformed_read_index() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index dc496ef9637..3eddc7ce40d 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -38,8 +38,8 @@ fn run_cluster(cluster: &mut Cluster) { cluster.must_put(b"k1", b"v0"); } -/// When using DrAutoSync replication mode, data should be replicated to different labels -/// before committed. +/// When using DrAutoSync replication mode, data should be replicated to +/// different labels before committed. #[test] fn test_dr_auto_sync() { let mut cluster = prepare_cluster(); @@ -212,22 +212,22 @@ fn test_update_group_id() { cluster.must_split(®ion, b"k2"); let left = pd_client.get_region(b"k0").unwrap(); let right = pd_client.get_region(b"k2").unwrap(); - // When a node is started, all store information are loaded at once, so we need an extra node - // to verify resolve will assign group id. + // When a node is started, all store information are loaded at once, so we need + // an extra node to verify resolve will assign group id. cluster.add_label(3, "zone", "WS"); cluster.add_new_engine(); pd_client.must_add_peer(left.id, new_peer(2, 2)); pd_client.must_add_peer(left.id, new_learner_peer(3, 3)); pd_client.must_add_peer(left.id, new_peer(3, 3)); - // If node 3's group id is not assigned, leader will make commit index as the smallest last - // index of all followers. + // If node 3's group id is not assigned, leader will make commit index as the + // smallest last index of all followers. cluster.add_send_filter(IsolationFilterFactory::new(2)); cluster.must_put(b"k11", b"v11"); must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); must_get_equal(&cluster.get_engine(1), b"k11", b"v11"); - // So both node 1 and node 3 have fully resolved all stores. Further updates to group ID have - // to be done when applying conf change and snapshot. + // So both node 1 and node 3 have fully resolved all stores. Further updates to + // group ID have to be done when applying conf change and snapshot. cluster.clear_send_filters(); pd_client.must_add_peer(right.id, new_peer(2, 4)); pd_client.must_add_peer(right.id, new_learner_peer(3, 5)); @@ -348,7 +348,8 @@ fn test_replication_mode_allowlist() { must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } -/// Ensures hibernate region still works properly when switching replication mode. +/// Ensures hibernate region still works properly when switching replication +/// mode. #[test] fn test_switching_replication_mode_hibernate() { let mut cluster = new_server_cluster(0, 3); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 180e5fb1334..49ecf13c1d9 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -165,11 +165,13 @@ fn test_server_snap_gc_internal(version: &str) { let actual_max_per_file_size = cluster.get_snap_mgr(1).get_actual_max_per_file_size(true); - // version > 6.0.0 should enable multi_snapshot_file feature, which means actual max_per_file_size equals the config + // version > 6.0.0 should enable multi_snapshot_file feature, which means actual + // max_per_file_size equals the config if version == "6.5.0" { assert!(actual_max_per_file_size == cluster.cfg.raft_store.max_snapshot_file_raw_size.0); } else { - // the feature is disabled, and the actual_max_per_file_size should be u64::MAX (so that only one file is generated) + // the feature is disabled, and the actual_max_per_file_size should be u64::MAX + // (so that only one file is generated) assert!(actual_max_per_file_size == u64::MAX); } @@ -243,7 +245,8 @@ fn test_concurrent_snap(cluster: &mut Cluster) { if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { panic!("the snapshot is not sent before split, e: {:?}", e); } - // Split the region range and then there should be another snapshot for the split ranges. + // Split the region range and then there should be another snapshot for the + // split ranges. cluster.must_split(®ion, b"k2"); must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); // Ensure the regions work after split. @@ -521,10 +524,11 @@ fn test_inspected_snapshot() { // Test snapshot generating and receiving can share one I/O limiter fairly. // 1. Bootstrap a 1 Region, 1 replica cluster; -// 2. Add a peer on store 2 for the Region, so that there is a snapshot received on store 2; -// 3. Rename the received snapshot on store 2, and then keep sending it back to store 1; -// 4. Add another peer for the Region, so store 1 will generate a new snapshot; -// 5. Test the generating can success while the store keeps receiving snapshots from store 2. +// 2. Add a peer on store 2 for the Region, so that there is a snapshot received +// on store 2; 3. Rename the received snapshot on store 2, and then keep sending +// it back to store 1; 4. Add another peer for the Region, so store 1 will +// generate a new snapshot; 5. Test the generating can success while the store +// keeps receiving snapshots from store 2. #[test] fn test_gen_during_heavy_recv() { let mut cluster = new_server_cluster(0, 3); @@ -608,7 +612,8 @@ fn test_gen_during_heavy_recv() { } }); - // While store 1 keeps receiving snapshots, it should still can generate a snapshot on time. + // While store 1 keeps receiving snapshots, it should still can generate a + // snapshot on time. pd_client.must_add_peer(r1, new_learner_peer(3, 3)); sleep_ms(500); must_get_equal(&cluster.get_engine(3), b"zzz-0000", b"value"); @@ -653,8 +658,8 @@ fn random_long_vec(length: usize) -> Vec { value } -/// Snapshot is generated using apply term from apply thread, which should be set -/// correctly otherwise lead to unconsistency. +/// Snapshot is generated using apply term from apply thread, which should be +/// set correctly otherwise lead to inconsistency. #[test] fn test_correct_snapshot_term() { // Use five replicas so leader can send a snapshot to a new peer without @@ -697,8 +702,8 @@ fn test_correct_snapshot_term() { // Clears send filters so peer 4 can accept snapshot from peer 5. If peer 5 // didn't set apply index correctly using snapshot in apply worker, the snapshot // will be generated as term 0. Raft consider term of missing index as 0, so - // peer 4 will accept the snapshot and think it has already applied it, hence fast - // forward it then panic. + // peer 4 will accept the snapshot and think it has already applied it, hence + // fast forward it then panic. cluster.clear_send_filters(); must_get_equal(&cluster.get_engine(4), b"k0", b"v0"); cluster.clear_send_filters(); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 53c56510574..91022892f96 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -572,16 +572,17 @@ fn test_split_region_diff_check(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); - // The default size index distance is too large for small data, - // we flush multiple times to generate more size index handles. + // The default size index distance is too large for small data, we flush + // multiple times to generate more size index handles. for _ in 0..10 { put_till_size(cluster, region_max_size, &mut range); } - // Peer will split when size of region meet region_max_size, - // so assume the last region_max_size of data is not involved in split, - // there will be at least (region_max_size * 10 - region_max_size) / region_split_size regions. - // But region_max_size of data should be split too, so there will be at least 2 more regions. + // Peer will split when size of region meet region_max_size, so assume the last + // region_max_size of data is not involved in split, there will be at least + // `(region_max_size * 10 - region_max_size) / region_split_size` regions. + // But region_max_size of data should be split too, so there will be at + // least 2 more regions. let min_region_cnt = (region_max_size * 10 - region_max_size) / region_split_size + 2; let mut try_cnt = 0; @@ -757,9 +758,10 @@ fn test_node_split_epoch_not_match_right_derive() { test_split_epoch_not_match(&mut cluster, true); } -// For the peer which is the leader of the region before split, -// it should campaigns immediately. and then this peer may take the leadership earlier. -// `test_quick_election_after_split` is a helper function for testing this feature. +// For the peer which is the leader of the region before split, it should +// campaigns immediately. and then this peer may take the leadership +// earlier. `test_quick_election_after_split` is a helper function for testing +// this feature. fn test_quick_election_after_split(cluster: &mut Cluster) { // Calculate the reserved time before a new campaign after split. let reserved_time = @@ -778,8 +780,8 @@ fn test_quick_election_after_split(cluster: &mut Cluster) { // The campaign should always succeeds in the ideal test environment. let new_region = cluster.get_region(b"k3"); - // Ensure the new leader is established for the newly split region, and it shares the - // same store with the leader of old region. + // Ensure the new leader is established for the newly split region, and it + // shares the same store with the leader of old region. let new_leader = cluster.query_leader( old_leader.get_store_id(), new_region.get_id(), @@ -1070,7 +1072,8 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version3, bucket_version2); - // now the buckets is ["", "k12", ""]. further split ["", k12], [k12, ""] buckets into more buckets + // now the buckets is ["", "k12", ""]. further split ["", k12], [k12, ""] + // buckets into more buckets let region = pd_client.get_region(b"k11").unwrap(); let bucket_ranges = vec![ BucketRange(vec![], b"k12".to_vec()), @@ -1202,7 +1205,8 @@ fn test_gen_split_check_bucket_ranges() { let mut cluster = new_server_cluster(0, count); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(5); cluster.cfg.coprocessor.enable_region_bucket = true; - // disable report buckets; as it will reset the user traffic stats to randmize the test result + // disable report buckets; as it will reset the user traffic stats to randomize + // the test result cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::secs(5); // Make merge check resume quickly. cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(100); @@ -1248,7 +1252,8 @@ fn test_gen_split_check_bucket_ranges() { Option::None, Some(expected_buckets.clone()), ); - // because the diff between last_bucket_regions and bucket_regions is zero, bucket range for split check should be empty. + // because the diff between last_bucket_regions and bucket_regions is zero, + // bucket range for split check should be empty. let expected_bucket_ranges = vec![]; cluster.send_half_split_region_message(®ion, Some(expected_bucket_ranges)); diff --git a/tests/integrations/raftstore/test_stale_peer.rs b/tests/integrations/raftstore/test_stale_peer.rs index e9edcc49966..e12584d6c60 100644 --- a/tests/integrations/raftstore/test_stale_peer.rs +++ b/tests/integrations/raftstore/test_stale_peer.rs @@ -15,17 +15,19 @@ use tikv_util::{config::ReadableDuration, HandyRwLock}; /// If a peer detects the leader is missing for a specified long time, /// it should consider itself as a stale peer which is removed from the region. /// This test case covers the following scenario: -/// At first, there are three peer A, B, C in the cluster, and A is leader. -/// Peer B gets down. And then A adds D, E, F into the cluster. -/// Peer D becomes leader of the new cluster, and then removes peer A, B, C. -/// After all these peer in and out, now the cluster has peer D, E, F. -/// If peer B goes up at this moment, it still thinks it is one of the cluster -/// and has peers A, C. However, it could not reach A, C since they are removed from -/// the cluster or probably destroyed. -/// Meantime, D, E, F would not reach B, Since it's not in the cluster anymore. -/// In this case, Peer B would notice that the leader is missing for a long time, -/// and it would check with pd to confirm whether it's still a member of the cluster. -/// If not, it should destroy itself as a stale peer which is removed out already. +/// - At first, there are three peer A, B, C in the cluster, and A is leader. +/// - Peer B gets down. And then A adds D, E, F into the cluster. +/// - Peer D becomes leader of the new cluster, and then removes peer A, B, C. +/// - After all these peer in and out, now the cluster has peer D, E, F. +/// - If peer B goes up at this moment, it still thinks it is one of the +/// cluster and has peers A, C. However, it could not reach A, C since they +/// are removed from the cluster or probably destroyed. +/// - Meantime, D, E, F would not reach B, Since it's not in the cluster +/// anymore. +/// In this case, Peer B would notice that the leader is missing for a long +/// time, and it would check with pd to confirm whether it's still a member of +/// the cluster. If not, it should destroy itself as a stale peer which is +/// removed out already. fn test_stale_peer_out_of_region(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. @@ -47,7 +49,8 @@ fn test_stale_peer_out_of_region(cluster: &mut Cluster) { cluster.add_send_filter(IsolationFilterFactory::new(2)); // In case 2 is leader, it will fail to pass the healthy nodes check, - // so remove isolated node first. Because 2 is isolated, so it can't remove itself. + // so remove isolated node first. Because 2 is isolated, so it can't remove + // itself. pd_client.must_remove_peer(r1, new_peer(2, 2)); // Add peer [(4, 4), (5, 5), (6, 6)]. @@ -96,18 +99,18 @@ fn test_server_stale_peer_out_of_region() { test_stale_peer_out_of_region(&mut cluster); } -/// A help function for testing the behaviour of the gc of stale peer -/// which is out or region. -/// If a peer detects the leader is missing for a specified long time, -/// it should consider itself as a stale peer which is removed from the region. -/// This test case covers the following scenario: -/// A peer, B is initialized as a replicated peer without data after -/// receiving a single raft AE message. But then it goes through some process like -/// the case of `test_stale_peer_out_of_region`, it's removed out of the region -/// and wouldn't be contacted anymore. -/// In both cases, peer B would notice that the leader is missing for a long time, -/// and it's an initialized peer without any data. It would destroy itself as -/// as stale peer directly and should not impact other region data on the same store. +/// A help function for testing the behaviour of the gc of stale peer which is +/// out or region. If a peer detects the leader is missing for a specified long +/// time, it should consider itself as a stale peer which is removed from the +/// region. This test case covers the following scenario: +/// - A peer, B is initialized as a replicated peer without data after receiving +/// a single raft AE message. But then it goes through some process like the +/// case of `test_stale_peer_out_of_region`, it's removed out of the region +/// and wouldn't be contacted anymore. +/// In both cases, peer B would notice that the leader is missing for a long +/// time, and it's an initialized peer without any data. It would destroy itself +/// as stale peer directly and should not impact other region data on the +/// same store. fn test_stale_peer_without_data(cluster: &mut Cluster, right_derive: bool) { cluster.cfg.raft_store.right_derive_when_split = right_derive; diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index 189587dea44..21adc354295 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -263,9 +263,9 @@ fn test_server_stale_meta() { /// Tests a tombstone peer won't trigger wrong gc message. /// -/// An uninitialized peer's peer list is empty. If a message from a healthy peer passes -/// all the other checks accidentally, it may trigger a tombstone message which will -/// make the healthy peer destroy all its data. +/// An uninitialized peer's peer list is empty. If a message from a healthy peer +/// passes all the other checks accidentally, it may trigger a tombstone message +/// which will make the healthy peer destroy all its data. #[test] fn test_safe_tombstone_gc() { let mut cluster = new_node_cluster(0, 5); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index cb1c970914d..86789fc8f7f 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -361,7 +361,8 @@ fn test_memory_pessimistic_locks_status_after_transfer_leader_failure() { LocksStatus::TransferringLeader ); - // After several ticks, in-memory pessimistic locks should become available again. + // After several ticks, in-memory pessimistic locks should become available + // again. thread::sleep(Duration::from_secs(1)); assert_eq!(txn_ext.pessimistic_locks.read().status, LocksStatus::Normal); cluster.reset_leader_of_region(1); diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index ebeb99ddfe7..cf2361ebc8e 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -305,8 +305,9 @@ fn test_unsafe_recovery_already_in_joint_state() { assert!(promoted); } -// Tests whether unsafe recovery behaves correctly when the failed region is already in the -// middle of a joint state, once exit, it recovers itself without any further demotions. +// Tests whether unsafe recovery behaves correctly when the failed region is +// already in the middle of a joint state, once exit, it recovers itself without +// any further demotions. #[test] fn test_unsafe_recovery_early_return_after_exit_joint_state() { let mut cluster = new_server_cluster(0, 3); @@ -789,16 +790,19 @@ fn test_force_leader_trigger_snapshot() { find_peer(®ion, 3).unwrap().clone(), ); let req = new_admin_request(region.get_id(), region.get_region_epoch(), cmd); - // Though it has a force leader now, but the command can't committed because the log is not replicated to all the alive peers. + // Though it has a force leader now, but the command can't committed because the + // log is not replicated to all the alive peers. assert!( cluster .call_command_on_leader(req, Duration::from_millis(1000)) .unwrap() .get_header() - .has_error() // error "there is a pending conf change" indicating no committed log after being the leader + .has_error() /* error "there is a pending conf change" indicating no committed log + * after being the leader */ ); - // Permit snapshot message, snapshot should be applied and advance commit index now. + // Permit snapshot message, snapshot should be applied and advance commit index + // now. cluster.sim.wl().clear_recv_filters(2); cluster .pd_client @@ -863,7 +867,8 @@ fn test_force_leader_with_uncommitted_conf_change() { * 2, )); cluster.must_enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); - // the uncommitted conf-change is committed successfully after being force leader + // the uncommitted conf-change is committed successfully after being force + // leader cluster .pd_client .must_none_peer(region.get_id(), find_peer(®ion, 2).unwrap().clone()); @@ -885,12 +890,13 @@ fn test_force_leader_with_uncommitted_conf_change() { assert_eq!(cluster.must_get(b"k4"), Some(b"v4".to_vec())); } -// Test the case that none of five nodes fails and force leader on one of the nodes. -// Note: It still can't defend extreme misuse cases. For example, a group of a, -// b and c. c is isolated from a, a is the leader. If c has increased its term -// by 2 somehow (for example false prevote success twice) and force leader is -// sent to b and break lease constrain, then b will reject a's heartbeat while -// can vote for c. So c becomes leader and there are two leaders in the group. +// Test the case that none of five nodes fails and force leader on one of the +// nodes. Note: It still can't defend extreme misuse cases. For example, a group +// of a, b and c. c is isolated from a, a is the leader. If c has increased its +// term by 2 somehow (for example false prevote success twice) and force leader +// is sent to b and break lease constrain, then b will reject a's heartbeat +// while can vote for c. So c becomes leader and there are two leaders in the +// group. #[test] fn test_force_leader_on_healthy_region() { let mut cluster = new_node_cluster(0, 5); @@ -920,7 +926,8 @@ fn test_force_leader_on_healthy_region() { assert_eq!(cluster.must_get(b"k1"), Some(b"v1".to_vec())); cluster.must_put(b"k2", b"v2"); - // try to exit force leader, it will be ignored silently as it's not in the force leader state + // try to exit force leader, it will be ignored silently as it's not in the + // force leader state cluster.exit_force_leader(region.get_id(), 1); cluster.must_put(b"k4", b"v4"); @@ -1147,15 +1154,17 @@ fn test_force_leader_multiple_election_rounds() { } // Tests whether unsafe recovery report sets has_commit_merge correctly. -// This field is used by PD to issue force leader command in order, so that the recovery process -// does not break the merge accidentally, when: -// * The source region and the target region lost their quorum. -// * The living peer(s) of the source region does not have prepare merge message replicated. -// * The living peer(s) of the target region has commit merge messages replicated but -// uncommitted. -// If the living peer(s) of the source region in the above example enters force leader state before -// the peer(s) of the target region, thus proposes a no-op entry (while becoming the leader) which -// is conflict with part of the catch up logs, there will be data loss. +// This field is used by PD to issue force leader command in order, so that the +// recovery process does not break the merge accidentally, when: +// * The source region and the target region lost their quorum. +// * The living peer(s) of the source region does not have prepare merge +// message replicated. +// * The living peer(s) of the target region has commit merge messages +// replicated but uncommitted. +// If the living peer(s) of the source region in the above example enters force +// leader state before the peer(s) of the target region, thus proposes a no-op +// entry (while becoming the leader) which is conflict with part of the catch up +// logs, there will be data loss. #[test] fn test_unsafe_recovery_has_commit_merge() { let mut cluster = new_node_cluster(0, 3); @@ -1178,8 +1187,8 @@ fn test_unsafe_recovery_has_commit_merge() { let right_on_store1 = find_peer(&right, 1).unwrap(); cluster.must_transfer_leader(right.get_id(), right_on_store1.clone()); - // Block the target region from receiving MsgAppendResponse, so that the commit merge message - // will only be replicated but not committed. + // Block the target region from receiving MsgAppendResponse, so that the commit + // merge message will only be replicated but not committed. let recv_filter = Box::new( RegionPacketFilter::new(right.get_id(), 1) .direction(Direction::Recv) @@ -1236,15 +1245,15 @@ fn test_unsafe_recovery_during_merge() { let right_on_store1 = find_peer(&right, 1).unwrap(); cluster.must_transfer_leader(right.get_id(), right_on_store1.clone()); - // Blocks the replication of prepare merge message, so that the commit merge back fills it - // in CatchUpLogs. + // Blocks the replication of prepare merge message, so that the commit merge + // back fills it in CatchUpLogs. let append_filter = Box::new( RegionPacketFilter::new(left.get_id(), 2) .direction(Direction::Recv) .msg_type(MessageType::MsgAppend), ); - // Blocks the target region from receiving MsgAppendResponse, so that the commit merge message - // will only be replicated but not committed. + // Blocks the target region from receiving MsgAppendResponse, so that the commit + // merge message will only be replicated but not committed. let commit_filter = Box::new( RegionPacketFilter::new(right.get_id(), 1) .direction(Direction::Recv) diff --git a/tests/integrations/server/gc_worker.rs b/tests/integrations/server/gc_worker.rs index 1ce3cc6415a..4f521cb1da7 100644 --- a/tests/integrations/server/gc_worker.rs +++ b/tests/integrations/server/gc_worker.rs @@ -226,7 +226,8 @@ fn test_applied_lock_collector() { assert_eq!(resp.get_locks().len(), 1024); }); - // Register lock observer at a later safe point. Lock observer should reset its state. + // Register lock observer at a later safe point. Lock observer should reset its + // state. safe_point += 1; clients.iter().for_each(|(_, c)| { must_register_lock_observer(c, safe_point); @@ -266,8 +267,8 @@ fn test_applied_lock_collector() { }); } -// Since v5.0 GC bypasses Raft, which means GC scans/deletes records with `keys::DATA_PREFIX`. -// This case ensures it's performed correctly. +// Since v5.0 GC bypasses Raft, which means GC scans/deletes records with +// `keys::DATA_PREFIX`. This case ensures it's performed correctly. #[test] fn test_gc_bypass_raft() { let (cluster, leader, ctx) = must_new_cluster_mul(1); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 367f38114f6..95d1494c660 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1444,7 +1444,7 @@ macro_rules! test_func { macro_rules! test_func_init { ($client:ident, $ctx:ident, $call_opt:ident, $func:ident, $req:ident) => {{ test_func!($client, $ctx, $call_opt, $func, $req::default()) }}; - ($client:ident, $ctx:ident, $call_opt:ident, $func:ident, $req:ident, batch) => {{ + ($client:ident, $ctx:ident, $call_opt:ident, $func:ident, $req:ident,batch) => {{ test_func!($client, $ctx, $call_opt, $func, { let mut req = $req::default(); req.set_keys(vec![b"key".to_vec()].into()); @@ -1664,7 +1664,8 @@ fn test_tikv_forwarding() { } } -/// Test if forwarding works correctly if the target node is shutdown and restarted. +/// Test if forwarding works correctly if the target node is shutdown and +/// restarted. #[test] fn test_forwarding_reconnect() { let (mut cluster, client, call_opt, ctx) = setup_cluster(); @@ -1753,7 +1754,8 @@ fn test_get_lock_wait_info_api() { // Test API version verification for transaction requests. // See the following for detail: // * rfc: https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. -// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, enum APIVersion. +// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, +// enum APIVersion. #[test] fn test_txn_api_version() { const TIDB_KEY_CASE: &[u8] = b"t_a"; @@ -1831,7 +1833,7 @@ fn test_txn_api_version() { let expect_prefix = format!("Error({}", errcode); assert!(!errs.is_empty(), "case {}", i); assert!( - errs[0].get_abort().starts_with(&expect_prefix), // e.g. Error(ApiVersionNotMatched { storage_api_version: V1, req_api_version: V2 }) + errs[0].get_abort().starts_with(&expect_prefix), /* e.g. Error(ApiVersionNotMatched { storage_api_version: V1, req_api_version: V2 }) */ "case {}: errs[0]: {:?}, expected: {}", i, errs[0], @@ -1956,7 +1958,8 @@ fn test_txn_api_version() { #[test] fn test_storage_with_quota_limiter_enable() { let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { - // write_bandwidth is limited to 1, which means that every write request will trigger the limit. + // write_bandwidth is limited to 1, which means that every write request will + // trigger the limit. let quota_config = QuotaConfig { foreground_cpu_time: 2000, foreground_write_bandwidth: ReadableSize(10), diff --git a/tests/integrations/server/lock_manager.rs b/tests/integrations/server/lock_manager.rs index 4fe3b98ebe1..d796d9c1f66 100644 --- a/tests/integrations/server/lock_manager.rs +++ b/tests/integrations/server/lock_manager.rs @@ -20,8 +20,8 @@ fn deadlock(client: &TikvClient, ctx: Context, key1: &[u8], ts: u64) -> bool { let (client_clone, mut ctx_clone, key1_clone) = (client.clone(), ctx.clone(), key1.clone()); let handle = thread::spawn(move || { - // `resource_group_tag` is set to check if the wait chain reported by the deadlock error - // carries the correct information. + // `resource_group_tag` is set to check if the wait chain reported by the + // deadlock error carries the correct information. ctx_clone.set_resource_group_tag(b"tag1".to_vec()); let resp = kv_pessimistic_lock( &client_clone, @@ -80,8 +80,8 @@ fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (Tik /// Creates a deadlock on the store containing key. fn must_detect_deadlock(cluster: &mut Cluster, key: &[u8], ts: u64) { - // Sometimes, deadlocks can't be detected at once due to leader change, but it will be - // detected. + // Sometimes, deadlocks can't be detected at once due to leader change, but it + // will be detected. for _ in 0..5 { let (client, ctx) = build_leader_client(cluster, key); if deadlock(&client, ctx, key, ts) { @@ -118,8 +118,8 @@ fn must_transfer_leader(cluster: &mut Cluster, region_key: &[u8], /// Transfers the region containing region_key from source store to target peer. /// -/// REQUIRE: The source store must be the leader the region and the target store must not have -/// this region. +/// REQUIRE: The source store must be the leader the region and the target store +/// must not have this region. fn must_transfer_region( cluster: &mut Cluster, region_key: &[u8], @@ -168,7 +168,8 @@ fn find_peer_of_store(region: &Region, store_id: u64) -> Peer { .clone() } -/// Creates a cluster with only one region and store(1) is the leader of the region. +/// Creates a cluster with only one region and store(1) is the leader of the +/// region. fn new_cluster_for_deadlock_test(count: usize) -> Cluster { let mut cluster = new_server_cluster(0, count); cluster.cfg.pessimistic_txn.wait_for_lock_timeout = ReadableDuration::millis(500); @@ -229,8 +230,8 @@ fn test_detect_deadlock_when_split_region() { #[test] fn test_detect_deadlock_when_transfer_region() { let mut cluster = new_cluster_for_deadlock_test(4); - // Transfer the leader region to store(4) and the leader of deadlock detector should be - // also transfered. + // Transfer the leader region to store(4) and the leader of deadlock detector + // should be also transferred. must_transfer_region(&mut cluster, b"k", 1, 4, 4); deadlock_detector_leader_must_be(&mut cluster, 4); must_detect_deadlock(&mut cluster, b"k", 10); @@ -242,8 +243,8 @@ fn test_detect_deadlock_when_transfer_region() { must_detect_deadlock(&mut cluster, b"k", 10); must_detect_deadlock(&mut cluster, b"k1", 10); - // Transfer the new region back to store(4) which will send a role change message with empty - // key range. It shouldn't affect deadlock detector. + // Transfer the new region back to store(4) which will send a role change + // message with empty key range. It shouldn't affect deadlock detector. must_transfer_region(&mut cluster, b"k1", 1, 4, 6); deadlock_detector_leader_must_be(&mut cluster, 4); must_detect_deadlock(&mut cluster, b"k", 10); diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index de7c238e2c3..c3964ab39d8 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -236,8 +236,8 @@ fn test_batch_size_limit() { assert_eq!(msg_count.load(Ordering::SeqCst), 10); } -/// In edge case that the estimated size may be inaccurate, we need to ensure connection -/// will not be broken in this case. +/// In edge case that the estimated size may be inaccurate, we need to ensure +/// connection will not be broken in this case. #[test] fn test_batch_size_edge_limit() { let msg_count = Arc::new(AtomicUsize::new(0)); @@ -247,13 +247,14 @@ fn test_batch_size_edge_limit() { let mut raft_client = get_raft_client_by_port(port); - // Put them in buffer so sibling messages will be likely be batched during sending. + // Put them in buffer so sibling messages will be likely be batched during + // sending. let mut msgs = Vec::with_capacity(5); for _ in 0..5 { let mut raft_m = RaftMessage::default(); - // Magic number, this can make estimated size about 4940000, hence two messages will be - // batched together, but the total size will be way largher than 10MiB as there are many - // indexes and terms. + // Magic number, this can make estimated size about 4940000, hence two messages + // will be batched together, but the total size will be way larger than + // 10MiB as there are many indexes and terms. for _ in 0..38000 { let mut e = Entry::default(); e.set_term(1); @@ -275,8 +276,9 @@ fn test_batch_size_edge_limit() { assert_eq!(msg_count.load(Ordering::SeqCst), 5); } -// Try to create a mock server with `service`. The server will be binded wiht a random -// port chosen between [`min_port`, `max_port`]. Return `None` if no port is available. +// Try to create a mock server with `service`. The server will be bounded with a +// random port chosen between [`min_port`, `max_port`]. Return `None` if no port +// is available. fn create_mock_server(service: T, min_port: u16, max_port: u16) -> Option<(Server, u16)> where T: Tikv + Clone + Send + 'static, diff --git a/tests/integrations/storage/test_storage.rs b/tests/integrations/storage/test_storage.rs index 72eabdb7828..21c9db6fe42 100644 --- a/tests/integrations/storage/test_storage.rs +++ b/tests/integrations/storage/test_storage.rs @@ -913,7 +913,8 @@ const RAW_KEY_CASE: &[u8] = b"r\0_a"; // Test API version verification for txnkv requests. // See the following for detail: // * rfc: https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. -// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, enum APIVersion. +// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, +// enum APIVersion. #[test] fn test_txn_store_txnkv_api_version() { let test_data = vec![ @@ -967,7 +968,8 @@ fn test_txn_store_txnkv_api_version() { store.scan_err(key, None, 100, 10); - // To compatible with TiDB gc-worker, we remove check_api_version_ranges in scan_lock + // To compatible with TiDB gc-worker, we remove check_api_version_ranges in + // scan_lock store.scan_locks_ok(20, key, &end_key, 10, vec![]); store.delete_range_err(key, key); @@ -979,7 +981,8 @@ fn test_txn_store_txnkv_api_version() { // Test API version verification for rawkv requests. // See the following for detail: // * rfc: https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. -// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, enum APIVersion. +// * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, +// enum APIVersion. #[test] fn test_txn_store_rawkv_api_version() { let test_data = vec![ From b22be438650b914231c6ea4b0afb9c72538044c9 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 28 Jul 2022 16:53:11 +0800 Subject: [PATCH 0119/1149] *: rename all DB and CF (#13149) close tikv/tikv#12394 to Db and Cf, and ColumnFamily to Cf, IO to Io Signed-off-by: Jay Lee --- cmd/tikv-ctl/src/executor.rs | 16 +- cmd/tikv-ctl/src/main.rs | 14 +- .../backup-stream/src/subscription_manager.rs | 2 +- components/backup/src/endpoint.rs | 18 +- components/backup/src/utils.rs | 4 +- components/batch-system/src/batch.rs | 4 +- components/cloud/gcp/src/gcs.rs | 14 +- components/cloud/gcp/src/lib.rs | 2 +- components/encryption/export/src/lib.rs | 4 +- components/encryption/src/crypter.rs | 34 +-- components/encryption/src/file_dict_file.rs | 4 +- components/encryption/src/lib.rs | 5 +- components/encryption/src/manager/mod.rs | 34 +-- components/engine_panic/src/cf_names.rs | 4 +- components/engine_panic/src/cf_options.rs | 18 +- components/engine_panic/src/db_options.rs | 20 +- components/engine_panic/src/db_vector.rs | 10 +- components/engine_panic/src/engine.rs | 8 +- components/engine_panic/src/snapshot.rs | 8 +- components/engine_rocks/src/cf_names.rs | 4 +- components/engine_rocks/src/cf_options.rs | 16 +- components/engine_rocks/src/compact.rs | 8 +- components/engine_rocks/src/db_options.rs | 46 ++-- components/engine_rocks/src/db_vector.rs | 18 +- components/engine_rocks/src/engine.rs | 12 +- components/engine_rocks/src/event_listener.rs | 28 +- components/engine_rocks/src/file_system.rs | 30 +- components/engine_rocks/src/import.rs | 4 +- components/engine_rocks/src/lib.rs | 2 +- components/engine_rocks/src/logger.rs | 4 +- components/engine_rocks/src/misc.rs | 8 +- components/engine_rocks/src/properties.rs | 4 +- components/engine_rocks/src/raft_engine.rs | 4 +- components/engine_rocks/src/snapshot.rs | 12 +- components/engine_rocks/src/util.rs | 52 ++-- components/engine_rocks/src/write_batch.rs | 6 +- components/engine_test/src/lib.rs | 89 +++--- components/engine_traits/src/cf_names.rs | 2 +- components/engine_traits/src/cf_options.rs | 14 +- components/engine_traits/src/db_options.rs | 14 +- components/engine_traits/src/db_vector.rs | 2 +- components/engine_traits/src/engine.rs | 16 +- components/engine_traits/src/file_system.rs | 10 +- components/engine_traits/src/lib.rs | 2 +- components/engine_traits/src/misc.rs | 4 +- components/engine_traits/src/peekable.rs | 10 +- components/engine_traits/src/raft_engine.rs | 4 +- .../engine_traits_tests/src/cf_names.rs | 2 +- components/engine_traits_tests/src/ctor.rs | 23 +- components/engine_traits_tests/src/lib.rs | 7 +- .../src/scenario_writes.rs | 2 +- .../external_storage/export/src/export.rs | 10 +- components/external_storage/src/lib.rs | 4 +- components/file_system/src/file.rs | 22 +- .../file_system/src/io_stats/biosnoop.rs | 76 +++--- components/file_system/src/io_stats/mod.rs | 20 +- components/file_system/src/io_stats/proc.rs | 56 ++-- components/file_system/src/lib.rs | 86 +++--- components/file_system/src/metrics.rs | 30 +- components/file_system/src/metrics_manager.rs | 22 +- components/file_system/src/rate_limiter.rs | 258 +++++++++--------- components/raft_log_engine/src/engine.rs | 22 +- .../src/coprocessor/split_check/half.rs | 6 +- .../src/coprocessor/split_check/keys.rs | 10 +- .../src/coprocessor/split_check/size.rs | 50 ++-- .../raftstore/src/store/compaction_guard.rs | 6 +- components/raftstore/src/store/peer.rs | 4 +- .../raftstore/src/store/peer_storage.rs | 8 +- .../raftstore/src/store/region_snapshot.rs | 6 +- components/raftstore/src/store/snap.rs | 44 ++- components/raftstore/src/store/snap/io.rs | 7 +- .../raftstore/src/store/worker/compact.rs | 12 +- .../raftstore/src/store/worker/raftlog_gc.rs | 10 +- .../src/store/worker/refresh_config.rs | 4 +- .../raftstore/src/store/worker/region.rs | 12 +- .../raftstore/src/store/worker/split_check.rs | 4 +- components/security/src/lib.rs | 6 +- components/server/src/server.rs | 24 +- components/sst_importer/src/import_mode.rs | 56 ++-- components/sst_importer/src/sst_importer.rs | 10 +- components/sst_importer/src/util.rs | 14 +- components/test_coprocessor/src/dag.rs | 10 +- components/test_raftstore/src/cluster.rs | 4 +- components/test_raftstore/src/util.rs | 4 +- components/test_sst_importer/src/lib.rs | 6 +- .../src/codec/collation/encoding/utf8.rs | 14 +- .../src/codec/collation/mod.rs | 4 +- .../src/codec/data_type/mod.rs | 30 +- .../src/codec/data_type/scalar.rs | 2 +- .../tidb_query_datatype/src/codec/datum.rs | 2 +- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/duration.rs | 2 +- .../src/codec/mysql/enums.rs | 2 +- .../src/codec/mysql/json/mod.rs | 2 +- .../src/codec/mysql/json/serde.rs | 10 +- .../src/codec/mysql/set.rs | 2 +- .../src/codec/mysql/time/mod.rs | 2 +- .../src/codec/row/v2/row_slice.rs | 28 +- .../src/index_scan_executor.rs | 6 +- .../src/selection_executor.rs | 2 +- components/tikv_kv/src/cursor.rs | 4 +- components/tikv_kv/src/rocksdb_engine.rs | 8 +- components/tikv_util/src/logger/mod.rs | 6 +- scripts/clippy | 2 +- src/config.rs | 79 +++--- src/coprocessor/dag/mod.rs | 8 +- src/coprocessor/dag/storage_impl.rs | 8 +- src/coprocessor/readpool_impl.rs | 6 +- src/import/sst_service.rs | 6 +- src/read_pool.rs | 4 +- src/server/debug.rs | 54 ++-- src/server/engine_factory.rs | 4 +- src/server/engine_factory_v2.rs | 4 +- .../gc_worker/applied_lock_collector.rs | 2 +- src/server/gc_worker/compaction_filter.rs | 22 +- src/server/gc_worker/gc_manager.rs | 2 +- src/server/gc_worker/gc_worker.rs | 12 +- src/server/gc_worker/mod.rs | 4 +- .../gc_worker/rawkv_compaction_filter.rs | 8 +- src/server/snap.rs | 14 +- src/storage/config.rs | 104 +++---- src/storage/config_manager.rs | 8 +- src/storage/kv/test_engine_builder.rs | 6 +- src/storage/mod.rs | 4 +- src/storage/mvcc/consistency_check.rs | 12 +- src/storage/mvcc/reader/reader.rs | 4 +- src/storage/read_pool.rs | 6 +- .../singleton_flow_controller.rs | 18 +- .../flow_controller/tablet_flow_controller.rs | 6 +- src/storage/txn/sched_pool.rs | 4 +- .../misc/writebatch/bench_writebatch.rs | 8 +- tests/failpoints/cases/test_coprocessor.rs | 6 +- tests/failpoints/cases/test_encryption.rs | 4 +- tests/failpoints/cases/test_gc_worker.rs | 6 +- tests/integrations/config/mod.rs | 32 +-- tests/integrations/coprocessor/test_select.rs | 70 ++--- tests/integrations/raftstore/test_snap.rs | 18 +- .../integrations/raftstore/test_tombstone.rs | 2 +- tests/integrations/storage/test_titan.rs | 2 +- 139 files changed, 1141 insertions(+), 1151 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 62ce325a130..df2c3cfbadf 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -13,7 +13,7 @@ use engine_traits::{ use futures::{executor::block_on, future, stream, Stream, StreamExt, TryStreamExt}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ - debugpb::{Db as DBType, *}, + debugpb::{Db as DbType, *}, kvrpcpb::MvccInfo, metapb::{Peer, Region}, raft_cmdpb::RaftCmdRequest, @@ -464,7 +464,7 @@ pub trait DebugExecutor { fn compact( &self, address: Option<&str>, - db: DBType, + db: DbType, cf: &str, from: Option>, to: Option>, @@ -487,7 +487,7 @@ pub trait DebugExecutor { fn compact_region( &self, address: Option<&str>, - db: DBType, + db: DbType, cf: &str, region_id: u64, threads: u32, @@ -604,7 +604,7 @@ pub trait DebugExecutor { fn do_compaction( &self, - db: DBType, + db: DbType, cf: &str, from: &[u8], to: &[u8], @@ -649,7 +649,7 @@ impl DebugExecutor for DebugClient { fn get_value_by_key(&self, cf: &str, key: Vec) -> Vec { let mut req = GetRequest::default(); - req.set_db(DBType::Kv); + req.set_db(DbType::Kv); req.set_cf(cf.to_owned()); req.set_key(key); self.get(&req) @@ -718,7 +718,7 @@ impl DebugExecutor for DebugClient { fn do_compaction( &self, - db: DBType, + db: DbType, cf: &str, from: &[u8], to: &[u8], @@ -858,7 +858,7 @@ impl DebugExecutor for Debugger { } fn get_value_by_key(&self, cf: &str, key: Vec) -> Vec { - self.get(DBType::Kv, cf, &key) + self.get(DbType::Kv, cf, &key) .unwrap_or_else(|e| perror_and_exit("Debugger::get", e)) } @@ -902,7 +902,7 @@ impl DebugExecutor for Debugger { fn do_compaction( &self, - db: DBType, + db: DbType, cf: &str, from: &[u8], to: &[u8], diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 00094af8dc6..d37336cbd36 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -23,8 +23,8 @@ use std::{ }; use encryption_export::{ - create_backend, data_key_manager_from_config, encryption_method_from_db_encryption_method, - DataKeyManager, DecrypterReader, Iv, + create_backend, data_key_manager_from_config, from_engine_encryption_method, DataKeyManager, + DecrypterReader, Iv, }; use engine_rocks::get_env; use engine_traits::EncryptionKeyManager; @@ -33,7 +33,7 @@ use futures::executor::block_on; use gag::BufferRedirect; use grpcio::{CallOption, ChannelBuilder, Environment}; use kvproto::{ - debugpb::{Db as DBType, *}, + debugpb::{Db as DbType, *}, encryptionpb::EncryptionMethod, kvrpcpb::SplitRegionRequest, raft_serverpb::SnapshotMeta, @@ -151,7 +151,7 @@ fn main() { let infile1 = Path::new(infile).canonicalize().unwrap(); let file_info = key_manager.get_file(infile1.to_str().unwrap()).unwrap(); - let mthd = encryption_method_from_db_encryption_method(file_info.method); + let mthd = from_engine_encryption_method(file_info.method); if mthd == EncryptionMethod::Plaintext { println!( "{} is not encrypted, skip to decrypt it into {}", @@ -218,7 +218,7 @@ fn main() { bottommost, } => { let pd_client = get_pd_rpc_client(opt.pd, Arc::clone(&mgr)); - let db_type = if db == "kv" { DBType::Kv } else { DBType::Raft }; + let db_type = if db == "kv" { DbType::Kv } else { DbType::Raft }; let cfs = cf.iter().map(|s| s.as_ref()).collect(); let from_key = from.map(|k| unescape(&k)); let to_key = to.map(|k| unescape(&k)); @@ -347,7 +347,7 @@ fn main() { threads, bottommost, } => { - let db_type = if db == "kv" { DBType::Kv } else { DBType::Raft }; + let db_type = if db == "kv" { DbType::Kv } else { DbType::Raft }; let from_key = from.map(|k| unescape(&k)); let to_key = to.map(|k| unescape(&k)); let bottommost = BottommostLevelCompaction::from(Some(bottommost.as_ref())); @@ -610,7 +610,7 @@ fn compact_whole_cluster( pd_client: &RpcClient, cfg: &TiKvConfig, mgr: Arc, - db_type: DBType, + db_type: DbType, cfs: Vec<&str>, from: Option>, to: Option>, diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 4555bdbf4ff..c6e928b8201 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -238,7 +238,7 @@ fn spawn_executors(init: impl InitialScan + Send + 'static, number: usize) -> Sc let stopped = stopped.clone(); pool.spawn(move |_: &mut YatpHandle<'_>| { tikv_alloc::add_thread_memory_accessor(); - let _io_guard = file_system::WithIOType::new(file_system::IOType::Replication); + let _io_guard = file_system::WithIoType::new(file_system::IoType::Replication); scan_executor_loop(init, rx, stopped); tikv_alloc::remove_thread_memory_accessor(); }) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index ada36a08615..7af38d12ac4 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -1181,7 +1181,7 @@ pub mod tests { use api_version::{api_v2::RAW_KEY_PREFIX, dispatch_api_version, KvFormat, RawValue}; use engine_traits::MiscExt; use external_storage_export::{make_local_backend, make_noop_backend}; - use file_system::{IOOp, IORateLimiter, IOType}; + use file_system::{IoOp, IoRateLimiter, IoType}; use futures::{executor::block_on, stream::StreamExt}; use kvproto::metapb; use raftstore::{ @@ -1265,7 +1265,7 @@ pub mod tests { } pub fn new_endpoint_with_limiter( - limiter: Option>, + limiter: Option>, api_version: ApiVersion, is_raw_kv: bool, causal_ts_provider: Option>, @@ -1508,7 +1508,7 @@ pub mod tests { #[test] fn test_handle_backup_task() { - let limiter = Arc::new(IORateLimiter::new_for_test()); + let limiter = Arc::new(IoRateLimiter::new_for_test()); let stats = limiter.statistics().unwrap(); let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), ApiVersion::V1, false, None); let engine = endpoint.engine.clone(); @@ -1585,8 +1585,8 @@ pub mod tests { ); let (none, _rx) = block_on(rx.into_future()); assert!(none.is_none(), "{:?}", none); - assert_eq!(stats.fetch(IOType::Export, IOOp::Write), 0); - assert_ne!(stats.fetch(IOType::Export, IOOp::Read), 0); + assert_eq!(stats.fetch(IoType::Export, IoOp::Write), 0); + assert_ne!(stats.fetch(IoType::Export, IoOp::Read), 0); } } @@ -1647,7 +1647,7 @@ pub mod tests { } fn test_handle_backup_raw_task_impl(cur_api_ver: ApiVersion, dst_api_ver: ApiVersion) -> bool { - let limiter = Arc::new(IORateLimiter::new_for_test()); + let limiter = Arc::new(IoRateLimiter::new_for_test()); let stats = limiter.statistics().unwrap(); let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), cur_api_ver, true, None); let engine = endpoint.engine.clone(); @@ -1759,8 +1759,8 @@ pub mod tests { ); let (none, _rx) = block_on(rx.into_future()); assert!(none.is_none(), "{:?}", none); - assert_eq!(stats.fetch(IOType::Export, IOOp::Write), 0); - assert_ne!(stats.fetch(IOType::Export, IOOp::Read), 0); + assert_eq!(stats.fetch(IoType::Export, IoOp::Write), 0); + assert_ne!(stats.fetch(IoType::Export, IoOp::Read), 0); true } @@ -1788,7 +1788,7 @@ pub mod tests { #[test] fn test_backup_raw_apiv2_causal_ts() { - let limiter = Arc::new(IORateLimiter::new_for_test()); + let limiter = Arc::new(IoRateLimiter::new_for_test()); let ts_provider = Arc::new(causal_ts::tests::TestProvider::default()); let start_ts = ts_provider.get_ts().unwrap(); let (tmp, endpoint) = new_endpoint_with_limiter( diff --git a/components/backup/src/utils.rs b/components/backup/src/utils.rs index de57b9f9081..1b8fda5570e 100644 --- a/components/backup/src/utils.rs +++ b/components/backup/src/utils.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use api_version::{dispatch_api_version, ApiV2, KeyMode, KvFormat}; -use file_system::IOType; +use file_system::IoType; use futures::Future; use kvproto::kvrpcpb::ApiVersion; use tikv_util::{error, sys::thread::ThreadBuildWrapper}; @@ -94,7 +94,7 @@ pub fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResu .enable_time() .after_start_wrapper(|| { tikv_alloc::add_thread_memory_accessor(); - file_system::set_io_type(IOType::Export); + file_system::set_io_type(IoType::Export); }) .before_stop_wrapper(|| { tikv_alloc::remove_thread_memory_accessor(); diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index 49433a73592..f868b4bfc94 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -17,7 +17,7 @@ use std::{ use crossbeam::channel::{self, SendError}; use fail::fail_point; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use tikv_util::{ debug, error, info, mpsc, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, time::Instant, warn, @@ -589,7 +589,7 @@ where .name(name) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); poller.poll(); }) .unwrap(); diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index a3401dbf6c8..799d1b02ee9 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -127,7 +127,7 @@ impl BlobConfig for Config { // GCS compatible storage #[derive(Clone)] -pub struct GCSStorage { +pub struct GcsStorage { config: Config, svc_access: Option>, client: Client, Body>, @@ -228,7 +228,7 @@ impl RetryError for RequestError { } } -impl GCSStorage { +impl GcsStorage { pub fn from_input(input: InputConfig) -> io::Result { Self::new(Config::from_input(input)?) } @@ -238,7 +238,7 @@ impl GCSStorage { } /// Create a new GCS storage for the given config. - pub fn new(config: Config) -> io::Result { + pub fn new(config: Config) -> io::Result { let svc_access = if let Some(si) = &config.svc_info { Some( ServiceAccountAccess::new(si.clone()) @@ -249,7 +249,7 @@ impl GCSStorage { }; let client = Client::builder().build(HttpsConnector::new()); - Ok(GCSStorage { + Ok(GcsStorage { config, svc_access: svc_access.map(Arc::new), client, @@ -392,7 +392,7 @@ fn parse_predefined_acl(acl: &str) -> Result, &str> { const STORAGE_NAME: &str = "gcs"; #[async_trait] -impl BlobStorage for GCSStorage { +impl BlobStorage for GcsStorage { fn config(&self) -> Box { Box::new(self.config.clone()) as Box } @@ -454,11 +454,11 @@ impl BlobStorage for GCSStorage { debug!("read file from GCS storage"; "key" => %name); let oid = match ObjectId::new(bucket, name) { Ok(oid) => oid, - Err(e) => return GCSStorage::error_to_async_read(io::ErrorKind::InvalidInput, e), + Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::InvalidInput, e), }; let request = match Object::download(&oid, None /* optional */) { Ok(request) => request.map(|_: io::Empty| Body::empty()), - Err(e) => return GCSStorage::error_to_async_read(io::ErrorKind::Other, e), + Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::Other, e), }; Box::new( self.make_request(request, tame_gcs::Scopes::ReadOnly) diff --git a/components/cloud/gcp/src/lib.rs b/components/cloud/gcp/src/lib.rs index e023ca9c6eb..4652bbf5b74 100644 --- a/components/cloud/gcp/src/lib.rs +++ b/components/cloud/gcp/src/lib.rs @@ -4,4 +4,4 @@ extern crate slog_global; mod gcs; -pub use gcs::{Config, GCSStorage}; +pub use gcs::{Config, GcsStorage}; diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index 537eb8785e5..e29a41cd07e 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -14,8 +14,8 @@ use derive_more::Deref; #[cfg(feature = "cloud-aws")] pub use encryption::KmsBackend; pub use encryption::{ - encryption_method_from_db_encryption_method, Backend, DataKeyManager, DataKeyManagerArgs, - DecrypterReader, EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, + from_engine_encryption_method, Backend, DataKeyManager, DataKeyManagerArgs, DecrypterReader, + EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, }; use encryption::{ DataKeyPair, EncryptedKey, FileBackend, KmsProvider, PlainKey, PlaintextBackend, diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index 1268d0d88f2..13286e416c9 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -2,7 +2,7 @@ use byteorder::{BigEndian, ByteOrder}; use derive_more::Deref; -use engine_traits::EncryptionMethod as DBEncryptionMethod; +use engine_traits::EncryptionMethod as EtEncryptionMethod; use kvproto::encryptionpb::EncryptionMethod; use openssl::symm::{self, Cipher as OCipher}; use rand::{rngs::OsRng, RngCore}; @@ -10,32 +10,28 @@ use tikv_util::{box_err, impl_display_as_debug}; use crate::{Error, Result}; -pub fn encryption_method_to_db_encryption_method(method: EncryptionMethod) -> DBEncryptionMethod { +pub fn to_engine_encryption_method(method: EncryptionMethod) -> EtEncryptionMethod { match method { - EncryptionMethod::Plaintext => DBEncryptionMethod::Plaintext, - EncryptionMethod::Aes128Ctr => DBEncryptionMethod::Aes128Ctr, - EncryptionMethod::Aes192Ctr => DBEncryptionMethod::Aes192Ctr, - EncryptionMethod::Aes256Ctr => DBEncryptionMethod::Aes256Ctr, - EncryptionMethod::Sm4Ctr => DBEncryptionMethod::Sm4Ctr, - EncryptionMethod::Unknown => DBEncryptionMethod::Unknown, + EncryptionMethod::Plaintext => EtEncryptionMethod::Plaintext, + EncryptionMethod::Aes128Ctr => EtEncryptionMethod::Aes128Ctr, + EncryptionMethod::Aes192Ctr => EtEncryptionMethod::Aes192Ctr, + EncryptionMethod::Aes256Ctr => EtEncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr => EtEncryptionMethod::Sm4Ctr, + EncryptionMethod::Unknown => EtEncryptionMethod::Unknown, } } -pub fn encryption_method_from_db_encryption_method(method: DBEncryptionMethod) -> EncryptionMethod { +pub fn from_engine_encryption_method(method: EtEncryptionMethod) -> EncryptionMethod { match method { - DBEncryptionMethod::Plaintext => EncryptionMethod::Plaintext, - DBEncryptionMethod::Aes128Ctr => EncryptionMethod::Aes128Ctr, - DBEncryptionMethod::Aes192Ctr => EncryptionMethod::Aes192Ctr, - DBEncryptionMethod::Aes256Ctr => EncryptionMethod::Aes256Ctr, - DBEncryptionMethod::Sm4Ctr => EncryptionMethod::Sm4Ctr, - DBEncryptionMethod::Unknown => EncryptionMethod::Unknown, + EtEncryptionMethod::Plaintext => EncryptionMethod::Plaintext, + EtEncryptionMethod::Aes128Ctr => EncryptionMethod::Aes128Ctr, + EtEncryptionMethod::Aes192Ctr => EncryptionMethod::Aes192Ctr, + EtEncryptionMethod::Aes256Ctr => EncryptionMethod::Aes256Ctr, + EtEncryptionMethod::Sm4Ctr => EncryptionMethod::Sm4Ctr, + EtEncryptionMethod::Unknown => EncryptionMethod::Unknown, } } -pub fn compat(method: EncryptionMethod) -> EncryptionMethod { - method -} - pub fn get_method_key_length(method: EncryptionMethod) -> usize { match method { EncryptionMethod::Plaintext => 0, diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index 0884cb1ca04..653fbf8dbbb 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -390,7 +390,7 @@ mod tests { use kvproto::encryptionpb::EncryptionMethod; use super::*; - use crate::{crypter::compat, encrypted_file::EncryptedFile, Error}; + use crate::{encrypted_file::EncryptedFile, Error}; fn test_file_dict_file_normal(enable_log: bool) { let tempdir = tempfile::tempdir().unwrap(); @@ -614,7 +614,7 @@ mod tests { fn create_file_info(id: u64, method: EncryptionMethod) -> FileInfo { FileInfo { key_id: id, - method: compat(method), + method, ..Default::default() } } diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index e6498e5d3ab..7f9079ed030 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -13,9 +13,8 @@ mod metrics; pub use self::{ config::*, crypter::{ - compat, encryption_method_from_db_encryption_method, - encryption_method_to_db_encryption_method, verify_encryption_config, AesGcmCrypter, Iv, - PlainKey, + from_engine_encryption_method, to_engine_encryption_method, verify_encryption_config, + AesGcmCrypter, Iv, PlainKey, }, encrypted_file::EncryptedFile, errors::{Error, Result, RetryCodedError}, diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 79654d9d6a2..a45f6153358 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -13,7 +13,7 @@ use std::{ use crossbeam::channel::{self, select, tick}; use engine_traits::{ - EncryptionKeyManager, EncryptionMethod as DBEncryptionMethod, FileEncryptionInfo, + EncryptionKeyManager, EncryptionMethod as EtEncryptionMethod, FileEncryptionInfo, }; use fail::fail_point; use file_system::File; @@ -23,7 +23,7 @@ use tikv_util::{box_err, debug, error, info, sys::thread::StdThreadBuildWrapper, use crate::{ config::EncryptionConfig, - crypter::{self, compat, Iv}, + crypter::{self, Iv}, encrypted_file::EncryptedFile, file_dict_file::FileDictionaryFile, io::{DecrypterReader, EncrypterWriter}, @@ -198,7 +198,7 @@ impl Dicts { let file = FileInfo { iv: iv.as_slice().to_vec(), key_id: self.current_key_id.load(Ordering::SeqCst), - method: compat(method), + method, ..Default::default() }; let file_num = { @@ -243,7 +243,7 @@ impl Dicts { file_dict_file.remove(fname)?; ENCRYPTION_FILE_NUM_GAUGE.set(file_num); - if file.method != compat(EncryptionMethod::Plaintext) { + if file.method != EncryptionMethod::Plaintext { debug!("delete encrypted file"; "fname" => fname); } else { debug!("delete plaintext file"; "fname" => fname); @@ -275,7 +275,7 @@ impl Dicts { file_dict_file.insert(dst_fname, &file)?; ENCRYPTION_FILE_NUM_GAUGE.set(file_num); - if method != compat(EncryptionMethod::Plaintext) { + if method != EncryptionMethod::Plaintext { info!("link encrypted file"; "src" => src_fname, "dst" => dst_fname); } else { info!("link plaintext file"; "src" => src_fname, "dst" => dst_fname); @@ -312,7 +312,7 @@ impl Dicts { // Generate a new data key if // 1. encryption method is not the same, or // 2. the current data key was exposed and the new master key is secure. - if compat(method) == key.method && !(key.was_exposed && master_key.is_secure()) { + if method == key.method && !(key.was_exposed && master_key.is_secure()) { let creation_time = UNIX_EPOCH + Duration::from_secs(key.creation_time); match now.duration_since(creation_time) { Ok(duration) => { @@ -336,7 +336,7 @@ impl Dicts { let (key_id, key) = generate_data_key(method); let data_key = DataKey { key, - method: compat(method), + method, creation_time, was_exposed: false, ..Default::default() @@ -615,9 +615,9 @@ impl DataKeyManager { }; EncrypterWriter::new( writer, - crypter::encryption_method_from_db_encryption_method(file.method), + crypter::from_engine_encryption_method(file.method), &file.key, - if file.method == DBEncryptionMethod::Plaintext { + if file.method == EtEncryptionMethod::Plaintext { debug_assert!(file.iv.is_empty()); Iv::Empty } else { @@ -645,9 +645,9 @@ impl DataKeyManager { let file = self.get_file(fname)?; DecrypterReader::new( reader, - crypter::encryption_method_from_db_encryption_method(file.method), + crypter::from_engine_encryption_method(file.method), &file.key, - if file.method == DBEncryptionMethod::Plaintext { + if file.method == EtEncryptionMethod::Plaintext { debug_assert!(file.iv.is_empty()); Iv::Empty } else { @@ -723,7 +723,7 @@ impl DataKeyManager { }; let encrypted_file = FileEncryptionInfo { key, - method: crypter::encryption_method_to_db_encryption_method(method), + method: crypter::to_engine_encryption_method(method), iv, }; Ok(Some(encrypted_file)) @@ -750,10 +750,10 @@ impl EncryptionKeyManager for DataKeyManager { // Return Plaintext if file is not found // RocksDB requires this let file = FileInfo::default(); - let method = compat(EncryptionMethod::Plaintext); + let method = EncryptionMethod::Plaintext; Ok(FileEncryptionInfo { key: vec![], - method: crypter::encryption_method_to_db_encryption_method(method), + method: crypter::to_engine_encryption_method(method), iv: file.iv, }) } @@ -767,7 +767,7 @@ impl EncryptionKeyManager for DataKeyManager { let file = self.dicts.new_file(fname, self.method)?; let encrypted_file = FileEncryptionInfo { key, - method: crypter::encryption_method_to_db_encryption_method(file.method), + method: crypter::to_engine_encryption_method(file.method), iv: file.get_iv().to_owned(), }; Ok(encrypted_file) @@ -789,7 +789,7 @@ impl EncryptionKeyManager for DataKeyManager { #[cfg(test)] mod tests { - use engine_traits::EncryptionMethod as DBEncryptionMethod; + use engine_traits::EncryptionMethod as EtEncryptionMethod; use file_system::{remove_file, File}; use matches::assert_matches; use tempfile::TempDir; @@ -912,7 +912,7 @@ mod tests { let foo3 = manager.get_file("foo").unwrap(); assert_eq!(foo1, foo3); let bar = manager.new_file("bar").unwrap(); - assert_eq!(bar.method, DBEncryptionMethod::Plaintext); + assert_eq!(bar.method, EtEncryptionMethod::Plaintext); } // When enabling encryption, using insecure master key is not allowed. diff --git a/components/engine_panic/src/cf_names.rs b/components/engine_panic/src/cf_names.rs index 8697634586b..ee71210f229 100644 --- a/components/engine_panic/src/cf_names.rs +++ b/components/engine_panic/src/cf_names.rs @@ -1,10 +1,10 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::CFNamesExt; +use engine_traits::CfNamesExt; use crate::engine::PanicEngine; -impl CFNamesExt for PanicEngine { +impl CfNamesExt for PanicEngine { fn cf_names(&self) -> Vec<&str> { panic!() } diff --git a/components/engine_panic/src/cf_options.rs b/components/engine_panic/src/cf_options.rs index f00db2eeb4f..1da2473bdaa 100644 --- a/components/engine_panic/src/cf_options.rs +++ b/components/engine_panic/src/cf_options.rs @@ -1,13 +1,13 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CFOptionsExt, ColumnFamilyOptions, Result, SstPartitionerFactory}; +use engine_traits::{CfOptions, CfOptionsExt, Result, SstPartitionerFactory}; -use crate::{db_options::PanicTitanDBOptions, engine::PanicEngine}; +use crate::{db_options::PanicTitanDbOptions, engine::PanicEngine}; -impl CFOptionsExt for PanicEngine { - type ColumnFamilyOptions = PanicColumnFamilyOptions; +impl CfOptionsExt for PanicEngine { + type CfOptions = PanicCfOptions; - fn get_options_cf(&self, cf: &str) -> Result { + fn get_options_cf(&self, cf: &str) -> Result { panic!() } fn set_options_cf(&self, cf: &str, options: &[(&str, &str)]) -> Result<()> { @@ -15,10 +15,10 @@ impl CFOptionsExt for PanicEngine { } } -pub struct PanicColumnFamilyOptions; +pub struct PanicCfOptions; -impl ColumnFamilyOptions for PanicColumnFamilyOptions { - type TitanDBOptions = PanicTitanDBOptions; +impl CfOptions for PanicCfOptions { + type TitanDbOptions = PanicTitanDbOptions; fn new() -> Self { panic!() @@ -47,7 +47,7 @@ impl ColumnFamilyOptions for PanicColumnFamilyOptions { fn set_block_cache_capacity(&self, capacity: u64) -> Result<()> { panic!() } - fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions) { + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { panic!() } fn get_target_file_size_base(&self) -> u64 { diff --git a/components/engine_panic/src/db_options.rs b/components/engine_panic/src/db_options.rs index f28741ce4c2..4b8eb562018 100644 --- a/components/engine_panic/src/db_options.rs +++ b/components/engine_panic/src/db_options.rs @@ -1,13 +1,13 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{DBOptions, DBOptionsExt, Result, TitanDBOptions}; +use engine_traits::{DbOptions, DbOptionsExt, Result, TitanDbOptions}; use crate::engine::PanicEngine; -impl DBOptionsExt for PanicEngine { - type DBOptions = PanicDBOptions; +impl DbOptionsExt for PanicEngine { + type DbOptions = PanicDbOptions; - fn get_db_options(&self) -> Self::DBOptions { + fn get_db_options(&self) -> Self::DbOptions { panic!() } fn set_db_options(&self, options: &[(&str, &str)]) -> Result<()> { @@ -15,10 +15,10 @@ impl DBOptionsExt for PanicEngine { } } -pub struct PanicDBOptions; +pub struct PanicDbOptions; -impl DBOptions for PanicDBOptions { - type TitanDBOptions = PanicTitanDBOptions; +impl DbOptions for PanicDbOptions { + type TitanDbOptions = PanicTitanDbOptions; fn new() -> Self { panic!() @@ -44,14 +44,14 @@ impl DBOptions for PanicDBOptions { panic!() } - fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions) { + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { panic!() } } -pub struct PanicTitanDBOptions; +pub struct PanicTitanDbOptions; -impl TitanDBOptions for PanicTitanDBOptions { +impl TitanDbOptions for PanicTitanDbOptions { fn new() -> Self { panic!() } diff --git a/components/engine_panic/src/db_vector.rs b/components/engine_panic/src/db_vector.rs index 83d615dbc4c..3daf6dc9500 100644 --- a/components/engine_panic/src/db_vector.rs +++ b/components/engine_panic/src/db_vector.rs @@ -2,14 +2,14 @@ use std::ops::Deref; -use engine_traits::DBVector; +use engine_traits::DbVector; #[derive(Debug)] -pub struct PanicDBVector; +pub struct PanicDbVector; -impl DBVector for PanicDBVector {} +impl DbVector for PanicDbVector {} -impl Deref for PanicDBVector { +impl Deref for PanicDbVector { type Target = [u8]; fn deref(&self) -> &[u8] { @@ -17,7 +17,7 @@ impl Deref for PanicDBVector { } } -impl<'a> PartialEq<&'a [u8]> for PanicDBVector { +impl<'a> PartialEq<&'a [u8]> for PanicDbVector { fn eq(&self, rhs: &&[u8]) -> bool { **rhs == **self } diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index 128cb318ed6..a296c3df9d8 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -5,7 +5,7 @@ use engine_traits::{ TabletAccessor, WriteOptions, }; -use crate::{db_vector::PanicDBVector, snapshot::PanicSnapshot, write_batch::PanicWriteBatch}; +use crate::{db_vector::PanicDbVector, snapshot::PanicSnapshot, write_batch::PanicWriteBatch}; #[derive(Clone, Debug)] pub struct PanicEngine; @@ -35,9 +35,9 @@ impl TabletAccessor for PanicEngine { } impl Peekable for PanicEngine { - type DBVector = PanicDBVector; + type DbVector = PanicDbVector; - fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { panic!() } fn get_value_cf_opt( @@ -45,7 +45,7 @@ impl Peekable for PanicEngine { opts: &ReadOptions, cf: &str, key: &[u8], - ) -> Result> { + ) -> Result> { panic!() } } diff --git a/components/engine_panic/src/snapshot.rs b/components/engine_panic/src/snapshot.rs index e27ed42d093..e573402c6d2 100644 --- a/components/engine_panic/src/snapshot.rs +++ b/components/engine_panic/src/snapshot.rs @@ -4,7 +4,7 @@ use std::ops::Deref; use engine_traits::{IterOptions, Iterable, Iterator, Peekable, ReadOptions, Result, Snapshot}; -use crate::{db_vector::PanicDBVector, engine::PanicEngine}; +use crate::{db_vector::PanicDbVector, engine::PanicEngine}; #[derive(Clone, Debug)] pub struct PanicSnapshot; @@ -16,9 +16,9 @@ impl Snapshot for PanicSnapshot { } impl Peekable for PanicSnapshot { - type DBVector = PanicDBVector; + type DbVector = PanicDbVector; - fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { panic!() } fn get_value_cf_opt( @@ -26,7 +26,7 @@ impl Peekable for PanicSnapshot { opts: &ReadOptions, cf: &str, key: &[u8], - ) -> Result> { + ) -> Result> { panic!() } } diff --git a/components/engine_rocks/src/cf_names.rs b/components/engine_rocks/src/cf_names.rs index b45a3960328..3b2512d0def 100644 --- a/components/engine_rocks/src/cf_names.rs +++ b/components/engine_rocks/src/cf_names.rs @@ -1,10 +1,10 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::CFNamesExt; +use engine_traits::CfNamesExt; use crate::engine::RocksEngine; -impl CFNamesExt for RocksEngine { +impl CfNamesExt for RocksEngine { fn cf_names(&self) -> Vec<&str> { self.as_inner().cf_names() } diff --git a/components/engine_rocks/src/cf_options.rs b/components/engine_rocks/src/cf_options.rs index c6a5390a063..6b3bdcaa11b 100644 --- a/components/engine_rocks/src/cf_options.rs +++ b/components/engine_rocks/src/cf_options.rs @@ -2,19 +2,19 @@ use std::ops::{Deref, DerefMut}; -use engine_traits::{CFOptionsExt, ColumnFamilyOptions, Result, SstPartitionerFactory}; +use engine_traits::{CfOptions, CfOptionsExt, Result, SstPartitionerFactory}; use rocksdb::ColumnFamilyOptions as RawCfOptions; use tikv_util::box_err; use crate::{ - db_options::RocksTitanDBOptions, engine::RocksEngine, r2e, + db_options::RocksTitanDbOptions, engine::RocksEngine, r2e, sst_partitioner::RocksSstPartitionerFactory, util, }; -impl CFOptionsExt for RocksEngine { - type ColumnFamilyOptions = RocksCfOptions; +impl CfOptionsExt for RocksEngine { + type CfOptions = RocksCfOptions; - fn get_options_cf(&self, cf: &str) -> Result { + fn get_options_cf(&self, cf: &str) -> Result { let handle = util::get_cf_handle(self.as_inner(), cf)?; Ok(RocksCfOptions::from_raw( self.as_inner().get_options_cf(handle), @@ -58,8 +58,8 @@ impl DerefMut for RocksCfOptions { } } -impl ColumnFamilyOptions for RocksCfOptions { - type TitanDBOptions = RocksTitanDBOptions; +impl CfOptions for RocksCfOptions { + type TitanDbOptions = RocksTitanDbOptions; fn new() -> Self { RocksCfOptions::from_raw(RawCfOptions::default()) @@ -97,7 +97,7 @@ impl ColumnFamilyOptions for RocksCfOptions { self.0.set_block_cache_capacity(capacity).map_err(r2e) } - fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions) { + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { self.0.set_titandb_options(opts.as_raw()) } diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index 393377149ff..b9e3e5fe558 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -2,7 +2,7 @@ use std::cmp; -use engine_traits::{CFNamesExt, CompactExt, Result}; +use engine_traits::{CfNamesExt, CompactExt, Result}; use rocksdb::{CompactOptions, CompactionOptions, DBCompressionType}; use crate::{engine::RocksEngine, r2e, util}; @@ -137,10 +137,10 @@ impl CompactExt for RocksEngine { #[cfg(test)] mod tests { - use engine_traits::{CFNamesExt, CFOptionsExt, CompactExt, MiscExt, SyncMutable}; + use engine_traits::{CfNamesExt, CfOptionsExt, CompactExt, MiscExt, SyncMutable}; use tempfile::Builder; - use crate::{util, RocksCfOptions, RocksDBOptions}; + use crate::{util, RocksCfOptions, RocksDbOptions}; #[test] fn test_compact_files_in_range() { @@ -154,7 +154,7 @@ mod tests { let cfs_opts = vec![("default", cf_opts.clone()), ("test", cf_opts)]; let db = util::new_engine_opt( temp_dir.path().to_str().unwrap(), - RocksDBOptions::default(), + RocksDbOptions::default(), cfs_opts, ) .unwrap(); diff --git a/components/engine_rocks/src/db_options.rs b/components/engine_rocks/src/db_options.rs index 6aaccfee76b..dea87dbb135 100644 --- a/components/engine_rocks/src/db_options.rs +++ b/components/engine_rocks/src/db_options.rs @@ -2,17 +2,17 @@ use std::ops::{Deref, DerefMut}; -use engine_traits::{DBOptions, DBOptionsExt, Result, TitanDBOptions}; +use engine_traits::{DbOptions, DbOptionsExt, Result, TitanDbOptions}; use rocksdb::{DBOptions as RawDBOptions, TitanDBOptions as RawTitanDBOptions}; use tikv_util::box_err; use crate::engine::RocksEngine; -impl DBOptionsExt for RocksEngine { - type DBOptions = RocksDBOptions; +impl DbOptionsExt for RocksEngine { + type DbOptions = RocksDbOptions; - fn get_db_options(&self) -> Self::DBOptions { - RocksDBOptions::from_raw(self.as_inner().get_db_options()) + fn get_db_options(&self) -> Self::DbOptions { + RocksDbOptions::from_raw(self.as_inner().get_db_options()) } fn set_db_options(&self, options: &[(&str, &str)]) -> Result<()> { self.as_inner() @@ -22,11 +22,11 @@ impl DBOptionsExt for RocksEngine { } #[derive(Default)] -pub struct RocksDBOptions(RawDBOptions); +pub struct RocksDbOptions(RawDBOptions); -impl RocksDBOptions { - pub fn from_raw(raw: RawDBOptions) -> RocksDBOptions { - RocksDBOptions(raw) +impl RocksDbOptions { + pub fn from_raw(raw: RawDBOptions) -> RocksDbOptions { + RocksDbOptions(raw) } pub fn into_raw(self) -> RawDBOptions { @@ -38,7 +38,7 @@ impl RocksDBOptions { } } -impl Deref for RocksDBOptions { +impl Deref for RocksDbOptions { type Target = RawDBOptions; #[inline] @@ -47,18 +47,18 @@ impl Deref for RocksDBOptions { } } -impl DerefMut for RocksDBOptions { +impl DerefMut for RocksDbOptions { #[inline] fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -impl DBOptions for RocksDBOptions { - type TitanDBOptions = RocksTitanDBOptions; +impl DbOptions for RocksDbOptions { + type TitanDbOptions = RocksTitanDbOptions; fn new() -> Self { - RocksDBOptions::from_raw(RawDBOptions::new()) + RocksDbOptions::from_raw(RawDBOptions::new()) } fn get_max_background_jobs(&self) -> i32 { @@ -85,16 +85,16 @@ impl DBOptions for RocksDBOptions { .map_err(|e| box_err!(e)) } - fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions) { + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { self.0.set_titandb_options(opts.as_raw()) } } -pub struct RocksTitanDBOptions(RawTitanDBOptions); +pub struct RocksTitanDbOptions(RawTitanDBOptions); -impl RocksTitanDBOptions { - pub fn from_raw(raw: RawTitanDBOptions) -> RocksTitanDBOptions { - RocksTitanDBOptions(raw) +impl RocksTitanDbOptions { + pub fn from_raw(raw: RawTitanDBOptions) -> RocksTitanDbOptions { + RocksTitanDbOptions(raw) } pub fn as_raw(&self) -> &RawTitanDBOptions { @@ -102,7 +102,7 @@ impl RocksTitanDBOptions { } } -impl Deref for RocksTitanDBOptions { +impl Deref for RocksTitanDbOptions { type Target = RawTitanDBOptions; #[inline] @@ -111,16 +111,16 @@ impl Deref for RocksTitanDBOptions { } } -impl DerefMut for RocksTitanDBOptions { +impl DerefMut for RocksTitanDbOptions { #[inline] fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -impl TitanDBOptions for RocksTitanDBOptions { +impl TitanDbOptions for RocksTitanDbOptions { fn new() -> Self { - RocksTitanDBOptions::from_raw(RawTitanDBOptions::new()) + RocksTitanDbOptions::from_raw(RawTitanDBOptions::new()) } fn set_min_blob_size(&mut self, size: u64) { diff --git a/components/engine_rocks/src/db_vector.rs b/components/engine_rocks/src/db_vector.rs index cf48bd8da0e..97fa65b7072 100644 --- a/components/engine_rocks/src/db_vector.rs +++ b/components/engine_rocks/src/db_vector.rs @@ -5,20 +5,20 @@ use std::{ ops::Deref, }; -use engine_traits::DBVector; +use engine_traits::DbVector; use rocksdb::DBVector as RawDBVector; -pub struct RocksDBVector(RawDBVector); +pub struct RocksDbVector(RawDBVector); -impl RocksDBVector { - pub fn from_raw(raw: RawDBVector) -> RocksDBVector { - RocksDBVector(raw) +impl RocksDbVector { + pub fn from_raw(raw: RawDBVector) -> RocksDbVector { + RocksDbVector(raw) } } -impl DBVector for RocksDBVector {} +impl DbVector for RocksDbVector {} -impl Deref for RocksDBVector { +impl Deref for RocksDbVector { type Target = [u8]; fn deref(&self) -> &[u8] { @@ -26,13 +26,13 @@ impl Deref for RocksDBVector { } } -impl Debug for RocksDBVector { +impl Debug for RocksDbVector { fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { write!(formatter, "{:?}", &**self) } } -impl<'a> PartialEq<&'a [u8]> for RocksDBVector { +impl<'a> PartialEq<&'a [u8]> for RocksDbVector { fn eq(&self, rhs: &&[u8]) -> bool { **rhs == **self } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 0e83eb2cdb3..9c995144efa 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -8,7 +8,7 @@ use engine_traits::{ use rocksdb::{DBIterator, Writable, DB}; use crate::{ - db_vector::RocksDBVector, + db_vector::RocksDbVector, options::RocksReadOptions, r2e, rocks_metrics::{ @@ -142,12 +142,12 @@ impl Iterable for RocksEngine { } impl Peekable for RocksEngine { - type DBVector = RocksDBVector; + type DbVector = RocksDbVector; - fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { let opt: RocksReadOptions = opts.into(); let v = self.db.get_opt(key, &opt.into_raw()).map_err(r2e)?; - Ok(v.map(RocksDBVector::from_raw)) + Ok(v.map(RocksDbVector::from_raw)) } fn get_value_cf_opt( @@ -155,14 +155,14 @@ impl Peekable for RocksEngine { opts: &ReadOptions, cf: &str, key: &[u8], - ) -> Result> { + ) -> Result> { let opt: RocksReadOptions = opts.into(); let handle = get_cf_handle(&self.db, cf)?; let v = self .db .get_cf_opt(handle, key, &opt.into_raw()) .map_err(r2e)?; - Ok(v.map(RocksDBVector::from_raw)) + Ok(v.map(RocksDbVector::from_raw)) } } diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 5b93ccba637..ad7a9de455f 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use file_system::{get_io_type, set_io_type, IOType}; +use file_system::{get_io_type, set_io_type, IoType}; use regex::Regex; use rocksdb::{ CompactionJobInfo, DBBackgroundErrorReason, FlushJobInfo, IngestionInfo, MutableStatus, @@ -32,23 +32,23 @@ impl RocksEventListener { impl rocksdb::EventListener for RocksEventListener { fn on_flush_begin(&self, _info: &FlushJobInfo) { - set_io_type(IOType::Flush); + set_io_type(IoType::Flush); } fn on_flush_completed(&self, info: &FlushJobInfo) { STORE_ENGINE_EVENT_COUNTER_VEC .with_label_values(&[&self.db_name, info.cf_name(), "flush"]) .inc(); - if get_io_type() == IOType::Flush { - set_io_type(IOType::Other); + if get_io_type() == IoType::Flush { + set_io_type(IoType::Other); } } fn on_compaction_begin(&self, info: &CompactionJobInfo) { if info.base_input_level() == 0 { - set_io_type(IOType::LevelZeroCompaction); + set_io_type(IoType::LevelZeroCompaction); } else { - set_io_type(IOType::Compaction); + set_io_type(IoType::Compaction); } } @@ -69,26 +69,26 @@ impl rocksdb::EventListener for RocksEventListener { &info.compaction_reason().to_string(), ]) .inc(); - if info.base_input_level() == 0 && get_io_type() == IOType::LevelZeroCompaction - || info.base_input_level() != 0 && get_io_type() == IOType::Compaction + if info.base_input_level() == 0 && get_io_type() == IoType::LevelZeroCompaction + || info.base_input_level() != 0 && get_io_type() == IoType::Compaction { - set_io_type(IOType::Other); + set_io_type(IoType::Other); } } fn on_subcompaction_begin(&self, info: &SubcompactionJobInfo) { if info.base_input_level() == 0 { - set_io_type(IOType::LevelZeroCompaction); + set_io_type(IoType::LevelZeroCompaction); } else { - set_io_type(IOType::Compaction); + set_io_type(IoType::Compaction); } } fn on_subcompaction_completed(&self, info: &SubcompactionJobInfo) { - if info.base_input_level() == 0 && get_io_type() == IOType::LevelZeroCompaction - || info.base_input_level() != 0 && get_io_type() == IOType::Compaction + if info.base_input_level() == 0 && get_io_type() == IoType::LevelZeroCompaction + || info.base_input_level() != 0 && get_io_type() == IoType::Compaction { - set_io_type(IOType::Other); + set_io_type(IoType::Other); } } diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index 87f46893774..614611bc40e 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -10,7 +10,7 @@ use crate::{e2r, r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. pub(crate) fn get_env( base_env: Option>, - limiter: Option>, + limiter: Option>, ) -> engine_traits::Result> { let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); Ok(Arc::new( @@ -43,19 +43,19 @@ mod tests { use std::sync::Arc; use engine_traits::{CompactExt, MiscExt, SyncMutable, CF_DEFAULT}; - use file_system::{IOOp, IORateLimiter, IORateLimiterStatistics, IOType}; + use file_system::{IoOp, IoRateLimiter, IoRateLimiterStatistics, IoType}; use keys::data_key; use tempfile::Builder; use super::*; use crate::{ event_listener::RocksEventListener, raw::DBCompressionType, util::new_engine_opt, - RocksCfOptions, RocksDBOptions, RocksEngine, + RocksCfOptions, RocksDbOptions, RocksEngine, }; - fn new_test_db(dir: &str) -> (RocksEngine, Arc) { - let limiter = Arc::new(IORateLimiter::new_for_test()); - let mut db_opts = RocksDBOptions::default(); + fn new_test_db(dir: &str) -> (RocksEngine, Arc) { + let limiter = Arc::new(IoRateLimiter::new_for_test()); + let mut db_opts = RocksDbOptions::default(); db_opts.add_event_listener(RocksEventListener::new("test_db", None)); let env = get_env(None, Some(limiter.clone())).unwrap(); db_opts.set_env(env); @@ -81,16 +81,16 @@ mod tests { db.put(&data_key(b"a1"), &value).unwrap(); db.put(&data_key(b"a2"), &value).unwrap(); - assert_eq!(stats.fetch(IOType::Flush, IOOp::Write), 0); + assert_eq!(stats.fetch(IoType::Flush, IoOp::Write), 0); db.flush(true /* sync */).unwrap(); - assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); - assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); + assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); + assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.put(&data_key(b"a2"), &value).unwrap(); db.put(&data_key(b"a3"), &value).unwrap(); db.flush(true /* sync */).unwrap(); - assert!(stats.fetch(IOType::Flush, IOOp::Write) > value_size * 2); - assert!(stats.fetch(IOType::Flush, IOOp::Write) < value_size * 2 + amplification_bytes); + assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); + assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.compact_range( CF_DEFAULT, None, // start_key @@ -99,14 +99,14 @@ mod tests { 1, // max_subcompactions ) .unwrap(); - assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) > value_size * 4); + assert!(stats.fetch(IoType::LevelZeroCompaction, IoOp::Read) > value_size * 4); assert!( - stats.fetch(IOType::LevelZeroCompaction, IOOp::Read) + stats.fetch(IoType::LevelZeroCompaction, IoOp::Read) < value_size * 4 + amplification_bytes ); - assert!(stats.fetch(IOType::LevelZeroCompaction, IOOp::Write) > value_size * 3); + assert!(stats.fetch(IoType::LevelZeroCompaction, IoOp::Write) > value_size * 3); assert!( - stats.fetch(IOType::LevelZeroCompaction, IOOp::Write) + stats.fetch(IoType::LevelZeroCompaction, IoOp::Write) < value_size * 3 + amplification_bytes ); } diff --git a/components/engine_rocks/src/import.rs b/components/engine_rocks/src/import.rs index a64da35ae67..1aa65ec07fa 100644 --- a/components/engine_rocks/src/import.rs +++ b/components/engine_rocks/src/import.rs @@ -70,7 +70,7 @@ mod tests { use tempfile::Builder; use super::*; - use crate::{util::new_engine_opt, RocksCfOptions, RocksDBOptions, RocksSstWriterBuilder}; + use crate::{util::new_engine_opt, RocksCfOptions, RocksDbOptions, RocksSstWriterBuilder}; #[test] fn test_ingest_multiple_file() { @@ -90,7 +90,7 @@ mod tests { (*cf, opt) }) .collect(); - let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); + let db = new_engine_opt(path_str, RocksDbOptions::default(), cfs_opts).unwrap(); let mut wb = db.write_batch(); for i in 1000..5000 { let v = i.to_string(); diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index a2e394bf8c8..f8b32c72a59 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -111,7 +111,7 @@ pub mod raw; pub fn get_env( key_manager: Option>, - limiter: Option>, + limiter: Option>, ) -> engine_traits::Result> { let env = encryption::get_env(None /* base_env */, key_manager)?; file_system::get_env(Some(env), limiter) diff --git a/components/engine_rocks/src/logger.rs b/components/engine_rocks/src/logger.rs index 9482dd12d25..b7b196448c5 100644 --- a/components/engine_rocks/src/logger.rs +++ b/components/engine_rocks/src/logger.rs @@ -21,9 +21,9 @@ impl Logger for RocksdbLogger { } #[derive(Default)] -pub struct RaftDBLogger; +pub struct RaftDbLogger; -impl Logger for RaftDBLogger { +impl Logger for RaftDbLogger { fn logv(&self, log_level: InfoLogLevel, log: &str) { match log_level { InfoLogLevel::Header => info!(#"raftdb_log_header", "{}", log), diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index ea6d48adb35..fd695bb4d2c 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -1,7 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - CFNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, + CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, Range, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, ALL_CFS, }; use rocksdb::Range as RocksRange; @@ -343,7 +343,7 @@ mod tests { use crate::{ engine::RocksEngine, util::{new_engine, new_engine_opt}, - RocksCfOptions, RocksDBOptions, + RocksCfOptions, RocksDbOptions, }; fn check_data(db: &RocksEngine, cfs: &[&str], expected: &[(&[u8], &[u8])]) { @@ -520,7 +520,7 @@ mod tests { (*cf, cf_opts) }) .collect(); - let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); + let db = new_engine_opt(path_str, RocksDbOptions::default(), cfs_opts).unwrap(); let keys = vec![b"k1", b"k2", b"k3", b"k4"]; @@ -552,7 +552,7 @@ mod tests { .unwrap(); let path_str = path.path().to_str().unwrap(); - let mut opts = RocksDBOptions::default(); + let mut opts = RocksDbOptions::default(); opts.create_if_missing(true); opts.enable_multi_batch_write(true); diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index c142ce01a74..8d049112f92 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -545,7 +545,7 @@ mod tests { use super::*; use crate::{ raw::{DBEntryType, TablePropertiesCollector}, - RocksCfOptions, RocksDBOptions, + RocksCfOptions, RocksDbOptions, }; #[allow(clippy::many_single_char_names)] @@ -715,7 +715,7 @@ mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = RocksDBOptions::default(); + let db_opts = RocksDbOptions::default(); let mut cf_opts = RocksCfOptions::default(); cf_opts.set_level_zero_file_num_compaction_trigger(10); cf_opts.add_table_properties_collector_factory( diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index fd52342002f..f1e86903e9d 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -3,7 +3,7 @@ // #[PerformanceCriticalPath] use engine_traits::{ Error, Iterable, KvEngine, MiscExt, Mutable, Peekable, RaftEngine, RaftEngineDebug, - RaftEngineReadOnly, RaftLogBatch, RaftLogGCTask, Result, SyncMutable, WriteBatch, + RaftEngineReadOnly, RaftLogBatch, RaftLogGcTask, Result, SyncMutable, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT, RAFT_LOG_MULTI_GET_CNT, }; use kvproto::{ @@ -289,7 +289,7 @@ impl RaftEngine for RocksEngine { self.put_msg(&keys::raft_state_key(raft_group_id), state) } - fn batch_gc(&self, groups: Vec) -> Result { + fn batch_gc(&self, groups: Vec) -> Result { let mut total = 0; let mut raft_wb = self.write_batch_with_cap(4 * 1024); for task in groups { diff --git a/components/engine_rocks/src/snapshot.rs b/components/engine_rocks/src/snapshot.rs index 94724b220f7..c107601c5d6 100644 --- a/components/engine_rocks/src/snapshot.rs +++ b/components/engine_rocks/src/snapshot.rs @@ -9,7 +9,7 @@ use engine_traits::{self, IterOptions, Iterable, Peekable, ReadOptions, Result, use rocksdb::{rocksdb_options::UnsafeSnap, DBIterator, DB}; use crate::{ - db_vector::RocksDBVector, options::RocksReadOptions, r2e, util::get_cf_handle, + db_vector::RocksDbVector, options::RocksReadOptions, r2e, util::get_cf_handle, RocksEngineIterator, }; @@ -71,16 +71,16 @@ impl Iterable for RocksSnapshot { } impl Peekable for RocksSnapshot { - type DBVector = RocksDBVector; + type DbVector = RocksDbVector; - fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { let opt: RocksReadOptions = opts.into(); let mut opt = opt.into_raw(); unsafe { opt.set_snapshot(&self.snap); } let v = self.db.get_opt(key, &opt).map_err(r2e)?; - Ok(v.map(RocksDBVector::from_raw)) + Ok(v.map(RocksDbVector::from_raw)) } fn get_value_cf_opt( @@ -88,7 +88,7 @@ impl Peekable for RocksSnapshot { opts: &ReadOptions, cf: &str, key: &[u8], - ) -> Result> { + ) -> Result> { let opt: RocksReadOptions = opts.into(); let mut opt = opt.into_raw(); unsafe { @@ -96,6 +96,6 @@ impl Peekable for RocksSnapshot { } let handle = get_cf_handle(self.db.as_ref(), cf)?; let v = self.db.get_cf_opt(handle, key, &opt).map_err(r2e)?; - Ok(v.map(RocksDBVector::from_raw)) + Ok(v.map(RocksDbVector::from_raw)) } } diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 4192eecfcae..ebb18e92de5 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -10,7 +10,7 @@ use rocksdb::{ use slog_global::warn; use crate::{ - cf_options::RocksCfOptions, db_options::RocksDBOptions, engine::RocksEngine, r2e, + cf_options::RocksCfOptions, db_options::RocksDbOptions, engine::RocksEngine, r2e, rocks_metrics_defs::*, }; @@ -27,7 +27,7 @@ pub fn new_default_engine(path: &str) -> Result { } pub fn new_engine(path: &str, cfs: &[&str]) -> Result { - let mut db_opts = RocksDBOptions::default(); + let mut db_opts = RocksDbOptions::default(); db_opts.enable_statistics(true); let cf_opts = cfs.iter().map(|name| (*name, Default::default())).collect(); new_engine_opt(path, db_opts, cf_opts) @@ -35,7 +35,7 @@ pub fn new_engine(path: &str, cfs: &[&str]) -> Result { pub fn new_engine_opt( path: &str, - db_opt: RocksDBOptions, + db_opt: RocksDbOptions, cf_opts: Vec<(&str, RocksCfOptions)>, ) -> Result { let mut db_opt = db_opt.into_raw(); @@ -92,25 +92,24 @@ pub fn new_engine_opt( adjust_dynamic_level_bytes(&cf_descs, name, opt); } - // If all column families exist, just open db. - if existed == needed { - let db = DB::open_cf(db_opt, path, cf_opts.into_iter().collect()).map_err(r2e)?; + let cfds: Vec<_> = cf_opts.into_iter().collect(); + // We have added all missing options by iterating `existed`. If two vecs still + // have same length, then they must have same column families dispite their + // orders. So just open db. + if needed.len() == existed.len() && needed.len() == cfds.len() { + let db = DB::open_cf(db_opt, path, cfds).map_err(r2e)?; return Ok(RocksEngine::new(db)); } // Opens db. - let cfds = cf_opts.into_iter().collect(); db_opt.create_missing_column_families(true); let mut db = DB::open_cf(db_opt, path, cfds).map_err(r2e)?; // Drops discarded column families. - // for cf in existed.iter().filter(|x| needed.iter().find(|y| y == - // x).is_none()) { for cf in cfs_diff(&existed, &needed) { - // Never drop default column families. - if cf != CF_DEFAULT { - db.drop_cf(cf).map_err(r2e)?; - } + // We have checked it at the very beginning, so it must be needed. + assert_ne!(cf, CF_DEFAULT); + db.drop_cf(cf).map_err(r2e)?; } Ok(RocksEngine::new(db)) @@ -334,7 +333,7 @@ pub fn from_raw_perf_level(level: rocksdb::PerfLevel) -> engine_traits::PerfLeve #[cfg(test)] mod tests { - use engine_traits::{CFOptionsExt, CF_DEFAULT}; + use engine_traits::{CfOptionsExt, Peekable, SyncMutable, CF_DEFAULT}; use rocksdb::DB; use tempfile::Builder; @@ -369,7 +368,7 @@ mod tests { let mut opts = RocksCfOptions::default(); opts.set_level_compaction_dynamic_level_bytes(true); cfs_opts.push(("cf_dynamic_level_bytes", opts.clone())); - let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); + let db = new_engine_opt(path_str, RocksDbOptions::default(), cfs_opts).unwrap(); column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes"]); check_dynamic_level_bytes(&db); drop(db); @@ -378,11 +377,28 @@ mod tests { let cfs_opts = vec![ (CF_DEFAULT, opts.clone()), ("cf_dynamic_level_bytes", opts.clone()), - ("cf1", opts), + ("cf1", opts.clone()), ]; - let db = new_engine_opt(path_str, RocksDBOptions::default(), cfs_opts).unwrap(); + let db = new_engine_opt(path_str, RocksDbOptions::default(), cfs_opts).unwrap(); column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes", "cf1"]); check_dynamic_level_bytes(&db); + for cf in &[CF_DEFAULT, "cf_dynamic_level_bytes", "cf1"] { + db.put_cf(cf, b"k", b"v").unwrap(); + } + drop(db); + + // change order should not cause data corruption. + let cfs_opts = vec![ + ("cf_dynamic_level_bytes", opts.clone()), + ("cf1", opts.clone()), + (CF_DEFAULT, opts), + ]; + let db = new_engine_opt(path_str, RocksDbOptions::default(), cfs_opts).unwrap(); + column_families_must_eq(path_str, vec![CF_DEFAULT, "cf_dynamic_level_bytes", "cf1"]); + check_dynamic_level_bytes(&db); + for cf in &[CF_DEFAULT, "cf_dynamic_level_bytes", "cf1"] { + assert_eq!(db.get_value_cf(cf, b"k").unwrap().unwrap(), b"v"); + } drop(db); // drop cf1. @@ -402,7 +418,7 @@ mod tests { } fn column_families_must_eq(path: &str, excepted: Vec<&str>) { - let opts = RocksDBOptions::default(); + let opts = RocksDbOptions::default(); let cfs_list = DB::list_column_families(&opts, path).unwrap(); let mut cfs_existed: Vec<&str> = cfs_list.iter().map(|v| v.as_str()).collect(); diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index f658fb046fb..e4028feb411 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -227,7 +227,7 @@ mod tests { use tempfile::Builder; use super::{ - super::{util::new_engine_opt, RocksDBOptions}, + super::{util::new_engine_opt, RocksDbOptions}, *, }; use crate::RocksCfOptions; @@ -244,7 +244,7 @@ mod tests { opt.enable_multi_batch_write(false); let engine = new_engine_opt( path.path().join("db").to_str().unwrap(), - RocksDBOptions::from_raw(opt), + RocksDbOptions::from_raw(opt), vec![(CF_DEFAULT, RocksCfOptions::default())], ) .unwrap(); @@ -290,7 +290,7 @@ mod tests { opt.enable_multi_batch_write(true); let engine = new_engine_opt( path.path().join("db").to_str().unwrap(), - RocksDBOptions::from_raw(opt), + RocksDbOptions::from_raw(opt), vec![(CF_DEFAULT, RocksCfOptions::default())], ) .unwrap(); diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index ada430261e3..979fbda17d0 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -65,9 +65,9 @@ pub mod raft { #[cfg(feature = "test-engine-raft-raft-engine")] pub use raft_log_engine::RaftLogEngine as RaftTestEngine; - use crate::ctor::{RaftDBOptions, RaftEngineConstructorExt}; + use crate::ctor::{RaftDbOptions, RaftEngineConstructorExt}; - pub fn new_engine(path: &str, db_opt: Option) -> Result { + pub fn new_engine(path: &str, db_opt: Option) -> Result { RaftTestEngine::new_raft_engine(path, db_opt) } } @@ -91,11 +91,11 @@ pub mod kv { RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, + CfOptions, CfOptionsExt, Result, TabletAccessor, TabletFactory, CF_DEFAULT, }; use tikv_util::box_err; - use crate::ctor::{ColumnFamilyOptions as KvTestCFOptions, DBOptions, KvEngineConstructorExt}; + use crate::ctor::{CfOptions as KvTestCfOptions, DbOptions, KvEngineConstructorExt}; pub fn new_engine(path: &str, cfs: &[&str]) -> Result { KvTestEngine::new_kv_engine(path, cfs) @@ -103,8 +103,8 @@ pub mod kv { pub fn new_engine_opt( path: &str, - db_opt: DBOptions, - cfs_opts: Vec<(&str, KvTestCFOptions)>, + db_opt: DbOptions, + cfs_opts: Vec<(&str, KvTestCfOptions)>, ) -> Result { KvTestEngine::new_kv_engine_opt(path, db_opt, cfs_opts) } @@ -114,16 +114,16 @@ pub mod kv { #[derive(Clone)] pub struct TestTabletFactory { root_path: String, - db_opt: DBOptions, - cf_opts: Vec<(&'static str, KvTestCFOptions)>, + db_opt: DbOptions, + cf_opts: Vec<(&'static str, KvTestCfOptions)>, registry: Arc>>, } impl TestTabletFactory { pub fn new( root_path: &str, - db_opt: DBOptions, - cf_opts: Vec<(&'static str, KvTestCFOptions)>, + db_opt: DbOptions, + cf_opts: Vec<(&'static str, KvTestCfOptions)>, ) -> Self { Self { root_path: root_path.to_string(), @@ -312,7 +312,7 @@ pub mod ctor { use encryption::DataKeyManager; use engine_traits::Result; - use file_system::IORateLimiter; + use file_system::IoRateLimiter; /// Kv engine construction /// @@ -341,30 +341,30 @@ pub mod ctor { /// If that directory does not exist, then it is created. fn new_kv_engine_opt( path: &str, - db_opt: DBOptions, - cf_opts: Vec<(&str, ColumnFamilyOptions)>, + db_opt: DbOptions, + cf_opts: Vec<(&str, CfOptions)>, ) -> Result; } /// Raft engine construction pub trait RaftEngineConstructorExt: Sized { /// Create a new raft engine. - fn new_raft_engine(path: &str, db_opt: Option) -> Result; + fn new_raft_engine(path: &str, db_opt: Option) -> Result; } #[derive(Clone, Default)] - pub struct DBOptions { + pub struct DbOptions { key_manager: Option>, - rate_limiter: Option>, + rate_limiter: Option>, enable_multi_batch_write: bool, } - impl DBOptions { + impl DbOptions { pub fn set_key_manager(&mut self, key_manager: Option>) { self.key_manager = key_manager; } - pub fn set_rate_limiter(&mut self, rate_limiter: Option>) { + pub fn set_rate_limiter(&mut self, rate_limiter: Option>) { self.rate_limiter = rate_limiter; } @@ -373,7 +373,7 @@ pub mod ctor { } } - pub type RaftDBOptions = DBOptions; + pub type RaftDbOptions = DbOptions; /// Properties for a single column family /// @@ -397,7 +397,7 @@ pub mod ctor { /// In the future TiKV will probably have engine-specific configuration /// options. #[derive(Clone)] - pub struct ColumnFamilyOptions { + pub struct CfOptions { disable_auto_compactions: bool, level_zero_file_num_compaction_trigger: Option, level_zero_slowdown_writes_trigger: Option, @@ -409,9 +409,9 @@ pub mod ctor { no_table_properties: bool, } - impl ColumnFamilyOptions { - pub fn new() -> ColumnFamilyOptions { - ColumnFamilyOptions { + impl CfOptions { + pub fn new() -> CfOptions { + CfOptions { disable_auto_compactions: false, level_zero_file_num_compaction_trigger: None, level_zero_slowdown_writes_trigger: None, @@ -461,7 +461,7 @@ pub mod ctor { } } - impl Default for ColumnFamilyOptions { + impl Default for CfOptions { fn default() -> Self { Self::new() } @@ -471,9 +471,7 @@ pub mod ctor { use engine_panic::PanicEngine; use engine_traits::Result; - use super::{ - ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftEngineConstructorExt, - }; + use super::{CfOptions, DbOptions, KvEngineConstructorExt, RaftEngineConstructorExt}; impl KvEngineConstructorExt for engine_panic::PanicEngine { fn new_kv_engine(_path: &str, _cfs: &[&str]) -> Result { @@ -482,15 +480,15 @@ pub mod ctor { fn new_kv_engine_opt( _path: &str, - _db_opt: DBOptions, - _cfs_opts: Vec<(&str, ColumnFamilyOptions)>, + _db_opt: DbOptions, + _cfs_opts: Vec<(&str, CfOptions)>, ) -> Result { Ok(PanicEngine) } } impl RaftEngineConstructorExt for engine_panic::PanicEngine { - fn new_raft_engine(_path: &str, _db_opt: Option) -> Result { + fn new_raft_engine(_path: &str, _db_opt: Option) -> Result { Ok(PanicEngine) } } @@ -501,21 +499,20 @@ pub mod ctor { get_env, properties::{MvccPropertiesCollectorFactory, RangePropertiesCollectorFactory}, util::new_engine_opt as rocks_new_engine_opt, - RocksCfOptions, RocksDBOptions, + RocksCfOptions, RocksDbOptions, }; - use engine_traits::{ColumnFamilyOptions as ColumnFamilyOptionsTrait, Result, CF_DEFAULT}; + use engine_traits::{CfOptions as _, Result, CF_DEFAULT}; use super::{ - ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftDBOptions, - RaftEngineConstructorExt, + CfOptions, DbOptions, KvEngineConstructorExt, RaftDbOptions, RaftEngineConstructorExt, }; impl KvEngineConstructorExt for engine_rocks::RocksEngine { // FIXME this is duplicating behavior from engine_rocks::util in order to // call set_standard_cf_opts. fn new_kv_engine(path: &str, cfs: &[&str]) -> Result { - let rocks_db_opt = RocksDBOptions::default(); - let default_cf_opt = ColumnFamilyOptions::new(); + let rocks_db_opt = RocksDbOptions::default(); + let default_cf_opt = CfOptions::new(); let rocks_cfs_opts = cfs .iter() .map(|cf_name| (*cf_name, get_rocks_cf_opts(&default_cf_opt))) @@ -525,8 +522,8 @@ pub mod ctor { fn new_kv_engine_opt( path: &str, - db_opt: DBOptions, - cfs_opts: Vec<(&str, ColumnFamilyOptions)>, + db_opt: DbOptions, + cfs_opts: Vec<(&str, CfOptions)>, ) -> Result { let rocks_db_opts = get_rocks_db_opts(db_opt)?; let rocks_cfs_opts = cfs_opts @@ -538,18 +535,18 @@ pub mod ctor { } impl RaftEngineConstructorExt for engine_rocks::RocksEngine { - fn new_raft_engine(path: &str, db_opt: Option) -> Result { + fn new_raft_engine(path: &str, db_opt: Option) -> Result { let rocks_db_opts = match db_opt { Some(db_opt) => get_rocks_db_opts(db_opt)?, - None => RocksDBOptions::default(), + None => RocksDbOptions::default(), }; - let rocks_cf_opts = get_rocks_cf_opts(&ColumnFamilyOptions::new()); + let rocks_cf_opts = get_rocks_cf_opts(&CfOptions::new()); let default_cfs_opts = vec![(CF_DEFAULT, rocks_cf_opts)]; rocks_new_engine_opt(path, rocks_db_opts, default_cfs_opts) } } - fn get_rocks_cf_opts(cf_opts: &ColumnFamilyOptions) -> RocksCfOptions { + fn get_rocks_cf_opts(cf_opts: &CfOptions) -> RocksCfOptions { let mut rocks_cf_opts = RocksCfOptions::new(); if !cf_opts.get_no_range_properties() { rocks_cf_opts.add_table_properties_collector_factory( @@ -576,8 +573,8 @@ pub mod ctor { rocks_cf_opts } - fn get_rocks_db_opts(db_opts: DBOptions) -> Result { - let mut rocks_db_opts = RocksDBOptions::default(); + fn get_rocks_db_opts(db_opts: DbOptions) -> Result { + let mut rocks_db_opts = RocksDbOptions::default(); let env = get_env(db_opts.key_manager.clone(), db_opts.rate_limiter)?; rocks_db_opts.set_env(env); if db_opts.enable_multi_batch_write { @@ -593,10 +590,10 @@ pub mod ctor { use engine_traits::Result; use raft_log_engine::{RaftEngineConfig, RaftLogEngine}; - use super::{RaftDBOptions, RaftEngineConstructorExt}; + use super::{RaftDbOptions, RaftEngineConstructorExt}; impl RaftEngineConstructorExt for raft_log_engine::RaftLogEngine { - fn new_raft_engine(path: &str, db_opts: Option) -> Result { + fn new_raft_engine(path: &str, db_opts: Option) -> Result { let mut config = RaftEngineConfig::default(); config.dir = path.to_owned(); RaftLogEngine::new( diff --git a/components/engine_traits/src/cf_names.rs b/components/engine_traits/src/cf_names.rs index 714139c8530..c33ac11081a 100644 --- a/components/engine_traits/src/cf_names.rs +++ b/components/engine_traits/src/cf_names.rs @@ -1,5 +1,5 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -pub trait CFNamesExt { +pub trait CfNamesExt { fn cf_names(&self) -> Vec<&str>; } diff --git a/components/engine_traits/src/cf_options.rs b/components/engine_traits/src/cf_options.rs index 6498238280f..a43b01670ed 100644 --- a/components/engine_traits/src/cf_options.rs +++ b/components/engine_traits/src/cf_options.rs @@ -1,17 +1,17 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use crate::{db_options::TitanDBOptions, sst_partitioner::SstPartitionerFactory, Result}; +use crate::{db_options::TitanDbOptions, sst_partitioner::SstPartitionerFactory, Result}; /// Trait for engines with column family options -pub trait CFOptionsExt { - type ColumnFamilyOptions: ColumnFamilyOptions; +pub trait CfOptionsExt { + type CfOptions: CfOptions; - fn get_options_cf(&self, cf: &str) -> Result; + fn get_options_cf(&self, cf: &str) -> Result; fn set_options_cf(&self, cf: &str, options: &[(&str, &str)]) -> Result<()>; } -pub trait ColumnFamilyOptions { - type TitanDBOptions: TitanDBOptions; +pub trait CfOptions { + type TitanDbOptions: TitanDbOptions; fn new() -> Self; fn get_max_write_buffer_number(&self) -> u32; @@ -22,7 +22,7 @@ pub trait ColumnFamilyOptions { fn get_hard_pending_compaction_bytes_limit(&self) -> u64; fn get_block_cache_capacity(&self) -> u64; fn set_block_cache_capacity(&self, capacity: u64) -> Result<()>; - fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions); + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions); fn get_target_file_size_base(&self) -> u64; fn set_disable_auto_compactions(&mut self, v: bool); fn get_disable_auto_compactions(&self) -> bool; diff --git a/components/engine_traits/src/db_options.rs b/components/engine_traits/src/db_options.rs index 7a6042d3db4..6fbd61b4833 100644 --- a/components/engine_traits/src/db_options.rs +++ b/components/engine_traits/src/db_options.rs @@ -3,16 +3,16 @@ use crate::errors::Result; /// A trait for engines that support setting global options -pub trait DBOptionsExt { - type DBOptions: DBOptions; +pub trait DbOptionsExt { + type DbOptions: DbOptions; - fn get_db_options(&self) -> Self::DBOptions; + fn get_db_options(&self) -> Self::DbOptions; fn set_db_options(&self, options: &[(&str, &str)]) -> Result<()>; } /// A handle to a database's options -pub trait DBOptions { - type TitanDBOptions: TitanDBOptions; +pub trait DbOptions { + type TitanDbOptions: TitanDbOptions; fn new() -> Self; fn get_max_background_jobs(&self) -> i32; @@ -20,11 +20,11 @@ pub trait DBOptions { fn set_rate_bytes_per_sec(&mut self, rate_bytes_per_sec: i64) -> Result<()>; fn get_rate_limiter_auto_tuned(&self) -> Option; fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()>; - fn set_titandb_options(&mut self, opts: &Self::TitanDBOptions); + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions); } /// Titan-specefic options -pub trait TitanDBOptions { +pub trait TitanDbOptions { fn new() -> Self; fn set_min_blob_size(&mut self, size: u64); } diff --git a/components/engine_traits/src/db_vector.rs b/components/engine_traits/src/db_vector.rs index 9caf55d9e22..08bea9f11e5 100644 --- a/components/engine_traits/src/db_vector.rs +++ b/components/engine_traits/src/db_vector.rs @@ -6,4 +6,4 @@ use std::{fmt::Debug, ops::Deref}; /// /// The database may optimize this type to be a view into /// its own cache. -pub trait DBVector: Debug + Deref + for<'a> PartialEq<&'a [u8]> {} +pub trait DbVector: Debug + Deref + for<'a> PartialEq<&'a [u8]> {} diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 1ffbdec1df5..276fb1ed19a 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -23,9 +23,9 @@ pub trait KvEngine: + SyncMutable + Iterable + WriteBatchExt - + DBOptionsExt - + CFNamesExt - + CFOptionsExt + + DbOptionsExt + + CfNamesExt + + CfOptionsExt + ImportExt + SstExt + CompactExt @@ -229,7 +229,7 @@ pub trait TabletFactory: TabletAccessor { pub struct DummyFactory where - EK: CFOptionsExt + Clone + Send + 'static, + EK: CfOptionsExt + Clone + Send + 'static, { pub engine: Option, pub root_path: String, @@ -237,7 +237,7 @@ where impl TabletFactory for DummyFactory where - EK: CFOptionsExt + Clone + Send + 'static, + EK: CfOptionsExt + Clone + Send + 'static, { fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { Ok(self.engine.as_ref().unwrap().clone()) @@ -273,7 +273,7 @@ where } impl TabletAccessor for DummyFactory where - EK: CFOptionsExt + Clone + Send + 'static, + EK: CfOptionsExt + Clone + Send + 'static, { fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &EK)) { if let Some(engine) = &self.engine { @@ -288,14 +288,14 @@ where impl DummyFactory where - EK: CFOptionsExt + Clone + Send + 'static, + EK: CfOptionsExt + Clone + Send + 'static, { pub fn new(engine: Option, root_path: String) -> DummyFactory { DummyFactory { engine, root_path } } } -impl Default for DummyFactory { +impl Default for DummyFactory { fn default() -> Self { Self::new(None, "/tmp".to_string()) } diff --git a/components/engine_traits/src/file_system.rs b/components/engine_traits/src/file_system.rs index 1671c1f0aab..51911b1f58e 100644 --- a/components/engine_traits/src/file_system.rs +++ b/components/engine_traits/src/file_system.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use file_system::{get_io_rate_limiter, get_io_type, IOOp, IORateLimiter}; +use file_system::{get_io_rate_limiter, get_io_type, IoOp, IoRateLimiter}; use crate::Result; @@ -12,7 +12,7 @@ pub trait FileSystemInspector: Sync + Send { } pub struct EngineFileSystemInspector { - limiter: Option>, + limiter: Option>, } impl EngineFileSystemInspector { @@ -23,7 +23,7 @@ impl EngineFileSystemInspector { } } - pub fn from_limiter(limiter: Option>) -> Self { + pub fn from_limiter(limiter: Option>) -> Self { EngineFileSystemInspector { limiter } } } @@ -38,7 +38,7 @@ impl FileSystemInspector for EngineFileSystemInspector { fn read(&self, len: usize) -> Result { if let Some(limiter) = &self.limiter { let io_type = get_io_type(); - Ok(limiter.request(io_type, IOOp::Read, len)) + Ok(limiter.request(io_type, IoOp::Read, len)) } else { Ok(len) } @@ -47,7 +47,7 @@ impl FileSystemInspector for EngineFileSystemInspector { fn write(&self, len: usize) -> Result { if let Some(limiter) = &self.limiter { let io_type = get_io_type(); - Ok(limiter.request(io_type, IOOp::Write, len)) + Ok(limiter.request(io_type, IoOp::Write, len)) } else { Ok(len) } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 191e5dcb204..0e709d164bd 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -328,7 +328,7 @@ pub use crate::range::*; mod raft_engine; pub use raft_engine::{ - CacheStats, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch, RaftLogGCTask, + CacheStats, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch, RaftLogGcTask, RAFT_LOG_MULTI_GET_CNT, }; diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 67e32e40bdd..0e6b9600da6 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -6,7 +6,7 @@ //! FIXME: Things here need to be moved elsewhere. use crate::{ - cf_names::CFNamesExt, errors::Result, flow_control_factors::FlowControlFactorsExt, range::Range, + cf_names::CfNamesExt, errors::Result, flow_control_factors::FlowControlFactorsExt, range::Range, }; #[derive(Clone, Debug)] @@ -28,7 +28,7 @@ pub enum DeleteStrategy { DeleteByWriter { sst_path: String }, } -pub trait MiscExt: CFNamesExt + FlowControlFactorsExt { +pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn flush(&self, sync: bool) -> Result<()>; fn flush_cf(&self, cf: &str, sync: bool) -> Result<()>; diff --git a/components/engine_traits/src/peekable.rs b/components/engine_traits/src/peekable.rs index 23318b2a233..fe9e3600abe 100644 --- a/components/engine_traits/src/peekable.rs +++ b/components/engine_traits/src/peekable.rs @@ -10,14 +10,14 @@ use crate::*; /// to read from, or to encode the value as a protobuf message. pub trait Peekable { /// The byte-vector type through which the database returns read values. - type DBVector: DBVector; + type DbVector: DbVector; /// Read a value for a key, given a set of options. /// /// Reads from the default column family. /// /// Returns `None` if they key does not exist. - fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result>; + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result>; /// Read a value for a key from a given column family, given a set of /// options. @@ -28,14 +28,14 @@ pub trait Peekable { opts: &ReadOptions, cf: &str, key: &[u8], - ) -> Result>; + ) -> Result>; /// Read a value for a key. /// /// Uses the default options and column family. /// /// Returns `None` if the key does not exist. - fn get_value(&self, key: &[u8]) -> Result> { + fn get_value(&self, key: &[u8]) -> Result> { self.get_value_opt(&ReadOptions::default(), key) } @@ -44,7 +44,7 @@ pub trait Peekable { /// Uses the default options. /// /// Returns `None` if the key does not exist. - fn get_value_cf(&self, cf: &str, key: &[u8]) -> Result> { + fn get_value_cf(&self, cf: &str, key: &[u8]) -> Result> { self.get_value_cf_opt(&ReadOptions::default(), cf, key) } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index d94d69fa335..a7bd66d3230 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -59,7 +59,7 @@ pub trait RaftEngineDebug: RaftEngine + Sync + Send + 'static { } } -pub struct RaftLogGCTask { +pub struct RaftLogGcTask { pub raft_group_id: u64, pub from: u64, pub to: u64, @@ -107,7 +107,7 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send /// count. Generally, `from` can be passed in `0`. fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result; - fn batch_gc(&self, tasks: Vec) -> Result { + fn batch_gc(&self, tasks: Vec) -> Result { let mut total = 0; for task in tasks { total += self.gc(task.raft_group_id, task.from, task.to)?; diff --git a/components/engine_traits_tests/src/cf_names.rs b/components/engine_traits_tests/src/cf_names.rs index 48031275b14..2cac1eaff73 100644 --- a/components/engine_traits_tests/src/cf_names.rs +++ b/components/engine_traits_tests/src/cf_names.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CFNamesExt, KvEngine, Snapshot, ALL_CFS, CF_DEFAULT}; +use engine_traits::{CfNamesExt, KvEngine, Snapshot, ALL_CFS, CF_DEFAULT}; use super::{default_engine, engine_cfs}; diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index 5f39ad4f3a7..2ab7a7360a7 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -5,7 +5,7 @@ use std::fs; use engine_test::{ - ctor::{ColumnFamilyOptions, DBOptions, KvEngineConstructorExt}, + ctor::{CfOptions, DbOptions, KvEngineConstructorExt}, kv::KvTestEngine, }; use engine_traits::{KvEngine, SyncMutable, ALL_CFS}; @@ -23,11 +23,8 @@ fn new_engine_basic() { fn new_engine_opt_basic() { let dir = tempdir(); let path = dir.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let cf_opts = ALL_CFS - .iter() - .map(|cf| (*cf, ColumnFamilyOptions::new())) - .collect(); + let db_opts = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let _db = KvTestEngine::new_kv_engine_opt(path, db_opts, cf_opts).unwrap(); } @@ -47,11 +44,8 @@ fn new_engine_opt_missing_dir() { let dir = tempdir(); let path = dir.path(); let path = path.join("missing").to_str().unwrap().to_owned(); - let db_opts = DBOptions::default(); - let cf_opts = ALL_CFS - .iter() - .map(|cf| (*cf, ColumnFamilyOptions::new())) - .collect(); + let db_opts = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let db = KvTestEngine::new_kv_engine_opt(&path, db_opts, cf_opts).unwrap(); db.put(b"foo", b"bar").unwrap(); db.sync().unwrap(); @@ -90,11 +84,8 @@ fn new_engine_opt_readonly_dir() { fs::set_permissions(&path, perms).unwrap(); let path = path.to_str().unwrap(); - let db_opts = DBOptions::default(); - let cf_opts = ALL_CFS - .iter() - .map(|cf| (*cf, ColumnFamilyOptions::new())) - .collect(); + let db_opts = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let err = KvTestEngine::new_kv_engine_opt(path, db_opts, cf_opts); assert!(err.is_err()); diff --git a/components/engine_traits_tests/src/lib.rs b/components/engine_traits_tests/src/lib.rs index 73c741ff925..d9b6af12f09 100644 --- a/components/engine_traits_tests/src/lib.rs +++ b/components/engine_traits_tests/src/lib.rs @@ -75,8 +75,7 @@ fn default_engine() -> TempDirEnginePair { fn multi_batch_write_engine() -> TempDirEnginePair { use engine_test::{ ctor::{ - ColumnFamilyOptions as KvTestCFOptions, DBOptions as KvTestDBOptions, - KvEngineConstructorExt, + CfOptions as KvTestCfOptions, DbOptions as KvTestDbOptions, KvEngineConstructorExt, }, kv::KvTestEngine, }; @@ -84,10 +83,10 @@ fn multi_batch_write_engine() -> TempDirEnginePair { let dir = tempdir(); let path = dir.path().to_str().unwrap(); - let mut opt = KvTestDBOptions::default(); + let mut opt = KvTestDbOptions::default(); opt.set_enable_multi_batch_write(true); let engine = - KvTestEngine::new_kv_engine_opt(path, opt, vec![(CF_DEFAULT, KvTestCFOptions::new())]) + KvTestEngine::new_kv_engine_opt(path, opt, vec![(CF_DEFAULT, KvTestCfOptions::new())]) .unwrap(); TempDirEnginePair { engine, diff --git a/components/engine_traits_tests/src/scenario_writes.rs b/components/engine_traits_tests/src/scenario_writes.rs index 3e250c21198..c9b1b1d5fb7 100644 --- a/components/engine_traits_tests/src/scenario_writes.rs +++ b/components/engine_traits_tests/src/scenario_writes.rs @@ -105,7 +105,7 @@ impl WriteScenarioEngine { } } - fn get_value(&self, key: &[u8]) -> Result::DBVector>> { + fn get_value(&self, key: &[u8]) -> Result::DbVector>> { use WriteScenario::*; match self.scenario { NoCf | DefaultCf | WriteBatchNoCf | WriteBatchDefaultCf => { diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index 00048522752..0fb24ef48ce 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -29,7 +29,7 @@ pub use external_storage::{ }; use futures_io::AsyncRead; #[cfg(feature = "cloud-gcp")] -pub use gcp::{Config as GCSConfig, GCSStorage}; +pub use gcp::{Config as GcsConfig, GcsStorage}; pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; #[cfg(any(feature = "cloud-gcp", feature = "cloud-aws", feature = "cloud-azure"))] use kvproto::brpb::{AzureBlobStorage, Gcs, S3}; @@ -139,7 +139,7 @@ fn create_config(backend: &Backend) -> Option>> { } #[cfg(feature = "cloud-gcp")] Backend::Gcs(config) => { - let conf = GCSConfig::from_input(config.clone()); + let conf = GcsConfig::from_input(config.clone()); Some(conf.map(|c| Box::new(c) as Box)) } #[cfg(feature = "cloud-azure")] @@ -155,7 +155,7 @@ fn create_config(backend: &Backend) -> Option>> { } #[cfg(feature = "cloud-gcp")] "gcp" | "gcs" => { - let conf = GCSConfig::from_cloud_dynamic(&dyn_backend); + let conf = GcsConfig::from_cloud_dynamic(&dyn_backend); Some(conf.map(|c| Box::new(c) as Box)) } #[cfg(feature = "cloud-azure")] @@ -191,14 +191,14 @@ fn create_backend_inner( blob_store(s) } #[cfg(feature = "cloud-gcp")] - Backend::Gcs(config) => blob_store(GCSStorage::from_input(config.clone())?), + Backend::Gcs(config) => blob_store(GcsStorage::from_input(config.clone())?), #[cfg(feature = "cloud-azure")] Backend::AzureBlobStorage(config) => blob_store(AzureStorage::from_input(config.clone())?), Backend::CloudDynamic(dyn_backend) => match dyn_backend.provider_name.as_str() { #[cfg(feature = "cloud-aws")] "aws" | "s3" => blob_store(S3Storage::from_cloud_dynamic(dyn_backend)?), #[cfg(feature = "cloud-gcp")] - "gcp" | "gcs" => blob_store(GCSStorage::from_cloud_dynamic(dyn_backend)?), + "gcp" | "gcs" => blob_store(GcsStorage::from_cloud_dynamic(dyn_backend)?), #[cfg(feature = "cloud-azure")] "azure" | "azblob" => blob_store(AzureStorage::from_cloud_dynamic(dyn_backend)?), _ => { diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 8c9ea242b98..afae433e54a 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -16,7 +16,7 @@ use std::{ }; use async_trait::async_trait; -use encryption::{encryption_method_from_db_encryption_method, DecrypterReader, Iv}; +use encryption::{from_engine_encryption_method, DecrypterReader, Iv}; use engine_traits::FileEncryptionInfo; use file_system::File; use futures_io::AsyncRead; @@ -152,7 +152,7 @@ pub fn encrypt_wrap_reader<'a>( let input = match file_crypter { Some(x) => Box::new(DecrypterReader::new( reader, - encryption_method_from_db_encryption_method(x.method), + from_engine_encryption_method(x.method), &x.key, Iv::from_slice(&x.iv)?, )?), diff --git a/components/file_system/src/file.rs b/components/file_system/src/file.rs index 1c56b240f1d..c072b8f852f 100644 --- a/components/file_system/src/file.rs +++ b/components/file_system/src/file.rs @@ -13,13 +13,13 @@ use std::{ // Extention Traits use fs2::FileExt; -use super::{get_io_rate_limiter, get_io_type, IOOp, IORateLimiter}; +use super::{get_io_rate_limiter, get_io_type, IoOp, IoRateLimiter}; /// A wrapper around `std::fs::File` with capability to track and regulate IO /// flow. pub struct File { inner: fs::File, - limiter: Option>, + limiter: Option>, } impl Debug for File { @@ -40,7 +40,7 @@ impl File { #[cfg(test)] pub fn open_with_limiter>( path: P, - limiter: Option>, + limiter: Option>, ) -> io::Result { let inner = fs::File::open(path)?; Ok(File { inner, limiter }) @@ -57,7 +57,7 @@ impl File { #[cfg(test)] pub fn create_with_limiter>( path: P, - limiter: Option>, + limiter: Option>, ) -> io::Result { let inner = fs::File::create(path)?; Ok(File { inner, limiter }) @@ -105,7 +105,7 @@ impl Read for File { let mut remains = buf.len(); let mut pos = 0; while remains > 0 { - let allowed = limiter.request(get_io_type(), IOOp::Read, remains); + let allowed = limiter.request(get_io_type(), IoOp::Read, remains); let read = self.inner.read(&mut buf[pos..pos + allowed])?; pos += read; remains -= read; @@ -132,7 +132,7 @@ impl Write for File { let mut remains = buf.len(); let mut pos = 0; while remains > 0 { - let allowed = limiter.request(get_io_type(), IOOp::Write, remains); + let allowed = limiter.request(get_io_type(), IoOp::Write, remains); let written = self.inner.write(&buf[pos..pos + allowed])?; pos += written; remains -= written; @@ -262,7 +262,7 @@ mod tests { .prefix("test_instrumented_file") .tempdir() .unwrap(); - let limiter = Arc::new(IORateLimiter::new_for_test()); + let limiter = Arc::new(IoRateLimiter::new_for_test()); // make sure read at most one bytes at a time limiter.set_io_rate_limit(20 /* 1s / refill_period */); let stats = limiter.statistics().unwrap(); @@ -270,24 +270,24 @@ mod tests { let tmp_file = tmp_dir.path().join("instrumented.txt"); let content = String::from("drink full and descend"); { - let _guard = WithIOType::new(IOType::ForegroundWrite); + let _guard = WithIoType::new(IoType::ForegroundWrite); let mut f = File::create_with_limiter(&tmp_file, Some(limiter.clone())).unwrap(); f.write_all(content.as_bytes()).unwrap(); f.sync_all().unwrap(); assert_eq!( - stats.fetch(IOType::ForegroundWrite, IOOp::Write), + stats.fetch(IoType::ForegroundWrite, IoOp::Write), content.len() ); } { - let _guard = WithIOType::new(IOType::Export); + let _guard = WithIoType::new(IoType::Export); let mut buffer = String::new(); let mut f = File::open_with_limiter(&tmp_file, Some(limiter)).unwrap(); assert_eq!(f.read_to_string(&mut buffer).unwrap(), content.len()); assert_eq!(buffer, content); // read_to_string only exit when file.read() returns zero, which means // it requires two EOF reads to finish the call. - assert_eq!(stats.fetch(IOType::Export, IOOp::Read), content.len() + 2); + assert_eq!(stats.fetch(IoType::Export, IoOp::Read), content.len() + 2); } } diff --git a/components/file_system/src/io_stats/biosnoop.rs b/components/file_system/src/io_stats/biosnoop.rs index d156d94f77c..6b804bfed87 100644 --- a/components/file_system/src/io_stats/biosnoop.rs +++ b/components/file_system/src/io_stats/biosnoop.rs @@ -14,7 +14,7 @@ use crossbeam_utils::CachePadded; use strum::{EnumCount, IntoEnumIterator}; use tikv_util::sys::thread; -use crate::{metrics::*, IOBytes, IOType}; +use crate::{metrics::*, IoBytes, IoType}; /// Biosnoop leverages BCC to make use of eBPF to get disk IO of TiKV requests. /// The BCC code is in `biosnoop.c` which is compiled and attached kernel on @@ -29,7 +29,7 @@ use crate::{metrics::*, IOBytes, IOType}; /// by address, then all the IO requests for that thread will be recorded in /// corresponding type's map in BCC. /// -/// With that information, every time calling `IOContext` it get the stored +/// With that information, every time calling `IoContext` it get the stored /// stats from corresponding type's map in BCC. Thus it enables TiKV to get the /// latency and bytes of read/write request per IO-type. @@ -37,9 +37,9 @@ const MAX_THREAD_IDX: usize = 192; // Hold the BPF to keep it not dropped. // The two tables are `stats_by_type` and `type_by_pid` respectively. -static mut BPF_CONTEXT: Option = None; +static mut BPF_CONTEXT: Option = None; -struct BPFContext { +struct BpfContext { bpf: BPF, stats_table: Table, type_table: Table, @@ -56,9 +56,9 @@ struct BPFContext { // and kernel. Thus no need to make the elements atomic. Also use padding to // avoid false sharing. // Leave the last element as reserved, when there is no available index, all -// other threads will be allocated to that index with IOType::Other always. -static mut IO_TYPE_ARRAY: [CachePadded; MAX_THREAD_IDX + 1] = - [CachePadded::new(IOType::Other); MAX_THREAD_IDX + 1]; +// other threads will be allocated to that index with IoType::Other always. +static mut IO_TYPE_ARRAY: [CachePadded; MAX_THREAD_IDX + 1] = + [CachePadded::new(IoType::Other); MAX_THREAD_IDX + 1]; // The index of the element of IO_TYPE_ARRAY for this thread to access. thread_local! { @@ -71,7 +71,7 @@ thread_local! { &mut tid.to_ne_bytes(), std::slice::from_raw_parts_mut( ptr as *mut u8, - std::mem::size_of::<*const IOType>(), + std::mem::size_of::<*const IoType>(), ), ).unwrap(); } @@ -83,7 +83,7 @@ struct IdxWrapper(usize); impl Drop for IdxWrapper { fn drop(&mut self) { - unsafe { *IO_TYPE_ARRAY[self.0] = IOType::Other }; + unsafe { *IO_TYPE_ARRAY[self.0] = IoType::Other }; IDX_ALLOCATOR.free(self.0); // drop() of static variables won't be called when program exits. @@ -134,10 +134,10 @@ impl IdxAllocator { } } -pub fn set_io_type(new_io_type: IOType) { +pub fn set_io_type(new_io_type: IoType) { unsafe { IDX.with(|idx| { - // if MAX_THREAD_IDX, keep IOType::Other always + // if MAX_THREAD_IDX, keep IoType::Other always if idx.0 != MAX_THREAD_IDX { *IO_TYPE_ARRAY[idx.0] = new_io_type; } @@ -145,22 +145,22 @@ pub fn set_io_type(new_io_type: IOType) { }; } -pub fn get_io_type() -> IOType { +pub fn get_io_type() -> IoType { unsafe { *IDX.with(|idx| IO_TYPE_ARRAY[idx.0]) } } -pub fn fetch_io_bytes() -> [IOBytes; IOType::COUNT] { +pub fn fetch_io_bytes() -> [IoBytes; IoType::COUNT] { let mut bytes = Default::default(); unsafe { if let Some(ctx) = BPF_CONTEXT.as_mut() { - for io_type in IOType::iter() { - let io_type_buf_ptr = &mut io_type as *mut IOType as *mut u8; + for io_type in IoType::iter() { + let io_type_buf_ptr = &mut io_type as *mut IoType as *mut u8; let mut io_type_buf = - std::slice::from_raw_parts_mut(io_type_buf_ptr, std::mem::size_of::()); + std::slice::from_raw_parts_mut(io_type_buf_ptr, std::mem::size_of::()); if let Ok(e) = ctx.stats_table.get(&mut io_type_buf) { - assert!(e.len() == std::mem::size_of::()); + assert!(e.len() == std::mem::size_of::()); bytes[io_type as usize] = - std::ptr::read_unaligned(e.as_ptr() as *const IOBytes); + std::ptr::read_unaligned(e.as_ptr() as *const IoBytes); } } } @@ -210,7 +210,7 @@ pub fn init() -> Result<(), String> { let stats_table = bpf.table("stats_by_type").map_err(|e| e.to_string())?; let type_table = bpf.table("type_by_pid").map_err(|e| e.to_string())?; unsafe { - BPF_CONTEXT = Some(BPFContext { + BPF_CONTEXT = Some(BpfContext { bpf, stats_table, type_table, @@ -286,7 +286,7 @@ mod tests { fetch_io_bytes, flush_io_latency_metrics, get_io_type, init, set_io_type, BPF_CONTEXT, MAX_THREAD_IDX, }; - use crate::{metrics::*, IOType, OpenOptions}; + use crate::{metrics::*, IoType, OpenOptions}; #[test] fn test_biosnoop() { @@ -301,8 +301,8 @@ mod tests { } fn test_io_context() { - set_io_type(IOType::Compaction); - assert_eq!(get_io_type(), IOType::Compaction); + set_io_type(IoType::Compaction); + assert_eq!(get_io_type(), IoType::Compaction); let tmp = TempDir::new().unwrap(); let file_path = tmp.path().join("test_io_context"); let mut f = OpenOptions::new() @@ -313,18 +313,18 @@ mod tests { .unwrap(); let mut w = vec![A512::default(); 2]; w.as_bytes_mut()[512] = 42; - let mut compaction_bytes_before = fetch_io_bytes()[IOType::Compaction as usize]; + let mut compaction_bytes_before = fetch_io_bytes()[IoType::Compaction as usize]; f.write(w.as_bytes()).unwrap(); f.sync_all().unwrap(); - let compaction_bytes = fetch_io_bytes()[IOType::Compaction as usize]; + let compaction_bytes = fetch_io_bytes()[IoType::Compaction as usize]; assert_ne!((compaction_bytes - compaction_bytes_before).write, 0); assert_eq!((compaction_bytes - compaction_bytes_before).read, 0); compaction_bytes_before = compaction_bytes; drop(f); - let other_bytes_before = fetch_io_bytes()[IOType::Other as usize]; + let other_bytes_before = fetch_io_bytes()[IoType::Other as usize]; std::thread::spawn(move || { - set_io_type(IOType::Other); + set_io_type(IoType::Other); let mut f = OpenOptions::new() .read(true) .custom_flags(O_DIRECT) @@ -337,8 +337,8 @@ mod tests { .join() .unwrap(); - let compaction_bytes = fetch_io_bytes()[IOType::Compaction as usize]; - let other_bytes = fetch_io_bytes()[IOType::Other as usize]; + let compaction_bytes = fetch_io_bytes()[IoType::Compaction as usize]; + let other_bytes = fetch_io_bytes()[IoType::Other as usize]; assert_eq!((compaction_bytes - compaction_bytes_before).write, 0); assert_eq!((compaction_bytes - compaction_bytes_before).read, 0); assert_eq!((other_bytes - other_bytes_before).write, 0); @@ -353,7 +353,7 @@ mod tests { // the thread indexes should be recycled. for _ in 1..=MAX_THREAD_IDX * 2 { std::thread::spawn(|| { - set_io_type(IOType::Other); + set_io_type(IoType::Other); }) .join() .unwrap(); @@ -365,7 +365,7 @@ mod tests { for _ in 1..=MAX_THREAD_IDX { let pair1 = pair.clone(); let h = std::thread::spawn(move || { - set_io_type(IOType::Compaction); + set_io_type(IoType::Compaction); let (lock, cvar) = &*pair1; let mut stop = lock.lock().unwrap(); while !*stop { @@ -375,11 +375,11 @@ mod tests { handles.push(h); } - // the reserved index is used, io type should be IOType::Other + // the reserved index is used, io type should be IoType::Other for _ in 1..=MAX_THREAD_IDX { std::thread::spawn(|| { - set_io_type(IOType::Compaction); - assert_eq!(get_io_type(), IOType::Other); + set_io_type(IoType::Compaction); + assert_eq!(get_io_type(), IoType::Other); }) .join() .unwrap(); @@ -399,8 +399,8 @@ mod tests { // the thread indexes should be available again. for _ in 1..=MAX_THREAD_IDX { std::thread::spawn(|| { - set_io_type(IOType::Compaction); - assert_eq!(get_io_type(), IOType::Compaction); + set_io_type(IoType::Compaction); + assert_eq!(get_io_type(), IoType::Compaction); }) .join() .unwrap(); @@ -439,7 +439,7 @@ mod tests { #[ignore] fn bench_flush_io_latency_metrics(b: &mut Bencher) { init().unwrap(); - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); let tmp = TempDir::new().unwrap(); let file_path = tmp.path().join("bench_flush_io_latency_metrics"); @@ -476,7 +476,7 @@ mod tests { w.as_bytes_mut()[64] = 42; b.iter(|| { - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); f.write(w.as_bytes()).unwrap(); f.sync_all().unwrap(); }); @@ -509,7 +509,7 @@ mod tests { .unwrap(); let mut r = vec![A512::default(); 2]; b.iter(|| { - set_io_type(IOType::ForegroundRead); + set_io_type(IoType::ForegroundRead); f.seek(SeekFrom::Start(rng.gen_range(0..100) * 512)) .unwrap(); assert_ne!(f.read(&mut r.as_bytes_mut()).unwrap(), 0); diff --git a/components/file_system/src/io_stats/mod.rs b/components/file_system/src/io_stats/mod.rs index d9c7ae9d519..e4c0017451f 100644 --- a/components/file_system/src/io_stats/mod.rs +++ b/components/file_system/src/io_stats/mod.rs @@ -6,27 +6,27 @@ mod stub { use strum::EnumCount; - use crate::{IOBytes, IOType}; + use crate::{IoBytes, IoType}; pub fn init() -> Result<(), String> { Err("No I/O tracing tool available".to_owned()) } thread_local! { - static IO_TYPE: Cell = Cell::new(IOType::Other); + static IO_TYPE: Cell = Cell::new(IoType::Other); } - pub fn set_io_type(new_io_type: IOType) { + pub fn set_io_type(new_io_type: IoType) { IO_TYPE.with(|io_type| { io_type.set(new_io_type); }); } - pub fn get_io_type() -> IOType { + pub fn get_io_type() -> IoType { IO_TYPE.with(|io_type| io_type.get()) } - pub fn fetch_io_bytes() -> [IOBytes; IOType::COUNT] { + pub fn fetch_io_bytes() -> [IoBytes; IoType::COUNT] { Default::default() } } @@ -48,7 +48,7 @@ mod tests { use tikv_util::sys::thread::StdThreadBuildWrapper; use super::*; - use crate::IOType; + use crate::IoType; #[bench] fn bench_fetch_io_bytes(b: &mut test::Bencher) { @@ -57,7 +57,7 @@ mod tests { .map(|_| { let tx_clone = tx.clone(); std::thread::Builder::new().spawn_wrapper(move || { - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); tx_clone.send(()).unwrap(); }) }) @@ -75,14 +75,14 @@ mod tests { .map(|_| { let tx_clone = tx.clone(); std::thread::Builder::new().spawn_wrapper(move || { - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); tx_clone.send(()).unwrap(); }) }) .collect::>(); b.iter(|| match get_io_type() { - IOType::ForegroundWrite => set_io_type(IOType::ForegroundRead), - _ => set_io_type(IOType::ForegroundWrite), + IoType::ForegroundWrite => set_io_type(IoType::ForegroundRead), + _ => set_io_type(IoType::ForegroundWrite), }); for _ in 0..8 { rx.recv().unwrap(); diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index 836b5f5fdf0..07856ebe9c0 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -18,18 +18,18 @@ use tikv_util::{ warn, }; -use crate::{IOBytes, IOType}; +use crate::{IoBytes, IoType}; lazy_static! { /// Total I/O bytes read/written by each I/O type. - static ref GLOBAL_IO_STATS: [AtomicIOBytes; IOType::COUNT] = Default::default(); + static ref GLOBAL_IO_STATS: [AtomicIoBytes; IoType::COUNT] = Default::default(); /// Incremental I/O bytes read/written by the thread's own I/O type. - static ref LOCAL_IO_STATS: ThreadLocal>> = ThreadLocal::new(); + static ref LOCAL_IO_STATS: ThreadLocal>> = ThreadLocal::new(); } thread_local! { /// A private copy of I/O type. Optimized for local access. - static IO_TYPE: Cell = Cell::new(IOType::Other); + static IO_TYPE: Cell = Cell::new(IoType::Other); } #[derive(Debug)] @@ -50,7 +50,7 @@ impl ThreadID { } } - fn fetch_io_bytes(&mut self) -> Option { + fn fetch_io_bytes(&mut self) -> Option { if self.proc_reader.is_none() { let path = PathBuf::from("/proc") .join(format!("{}", self.pid)) @@ -73,7 +73,7 @@ impl ThreadID { warn!("failed to seek proc file: {}", e); }) .ok()?; - let mut io_bytes = IOBytes::default(); + let mut io_bytes = IoBytes::default(); for line in reader.lines() { let line = line .map_err(|e| { @@ -101,37 +101,37 @@ impl ThreadID { } } -struct LocalIOStats { +struct LocalIoStats { id: ThreadID, - io_type: IOType, - last_flushed: IOBytes, + io_type: IoType, + last_flushed: IoBytes, } -impl LocalIOStats { +impl LocalIoStats { fn current() -> Self { - LocalIOStats { + LocalIoStats { id: ThreadID::current(), - io_type: IOType::Other, - last_flushed: IOBytes::default(), + io_type: IoType::Other, + last_flushed: IoBytes::default(), } } } #[derive(Default)] -struct AtomicIOBytes { +struct AtomicIoBytes { read: AtomicU64, write: AtomicU64, } -impl AtomicIOBytes { - fn load(&self, order: Ordering) -> IOBytes { - IOBytes { +impl AtomicIoBytes { + fn load(&self, order: Ordering) -> IoBytes { + IoBytes { read: self.read.load(order), write: self.write.load(order), } } - fn fetch_add(&self, other: IOBytes, order: Ordering) { + fn fetch_add(&self, other: IoBytes, order: Ordering) { self.read.fetch_add(other.read, order); self.write.fetch_add(other.write, order); } @@ -139,7 +139,7 @@ impl AtomicIOBytes { /// Flushes the local I/O stats to global I/O stats. #[inline] -fn flush_thread_io(sentinel: &mut LocalIOStats) { +fn flush_thread_io(sentinel: &mut LocalIoStats) { if let Some(io_bytes) = sentinel.id.fetch_io_bytes() { GLOBAL_IO_STATS[sentinel.io_type as usize] .fetch_add(io_bytes - sentinel.last_flushed, Ordering::Relaxed); @@ -151,11 +151,11 @@ pub fn init() -> Result<(), String> { Ok(()) } -pub fn set_io_type(new_io_type: IOType) { +pub fn set_io_type(new_io_type: IoType) { IO_TYPE.with(|io_type| { if io_type.get() != new_io_type { let mut sentinel = LOCAL_IO_STATS - .get_or(|| CachePadded::new(Mutex::new(LocalIOStats::current()))) + .get_or(|| CachePadded::new(Mutex::new(LocalIoStats::current()))) .lock(); flush_thread_io(&mut sentinel); sentinel.io_type = new_io_type; @@ -164,16 +164,16 @@ pub fn set_io_type(new_io_type: IOType) { }); } -pub fn get_io_type() -> IOType { +pub fn get_io_type() -> IoType { IO_TYPE.with(|io_type| io_type.get()) } -pub fn fetch_io_bytes() -> [IOBytes; IOType::COUNT] { - let mut bytes: [IOBytes; IOType::COUNT] = Default::default(); +pub fn fetch_io_bytes() -> [IoBytes; IoType::COUNT] { + let mut bytes: [IoBytes; IoType::COUNT] = Default::default(); LOCAL_IO_STATS.iter().for_each(|sentinel| { flush_thread_io(&mut sentinel.lock()); }); - for i in 0..IOType::COUNT { + for i in 0..IoType::COUNT { bytes[i] = GLOBAL_IO_STATS[i].load(Ordering::Relaxed); } bytes @@ -191,14 +191,14 @@ mod tests { use tempfile::{tempdir, tempdir_in}; use super::*; - use crate::{OpenOptions, WithIOType}; + use crate::{OpenOptions, WithIoType}; #[test] fn test_read_bytes() { let tmp = tempdir_in("/var/tmp").unwrap_or_else(|_| tempdir().unwrap()); let file_path = tmp.path().join("test_read_bytes.txt"); let mut id = ThreadID::current(); - let _type = WithIOType::new(IOType::Compaction); + let _type = WithIoType::new(IoType::Compaction); { let mut f = OpenOptions::new() .write(true) @@ -230,7 +230,7 @@ mod tests { let tmp = tempdir_in("/var/tmp").unwrap_or_else(|_| tempdir().unwrap()); let file_path = tmp.path().join("test_write_bytes.txt"); let mut id = ThreadID::current(); - let _type = WithIOType::new(IOType::Compaction); + let _type = WithIoType::new(IoType::Compaction); let mut f = OpenOptions::new() .write(true) .create(true) diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 104b7371537..0bacbdef428 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -42,21 +42,21 @@ use openssl::{ hash::{self, Hasher, MessageDigest}, }; pub use rate_limiter::{ - get_io_rate_limiter, set_io_rate_limiter, IOBudgetAdjustor, IORateLimitMode, IORateLimiter, - IORateLimiterStatistics, + get_io_rate_limiter, set_io_rate_limiter, IoBudgetAdjustor, IoRateLimitMode, IoRateLimiter, + IoRateLimiterStatistics, }; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use strum::{EnumCount, EnumIter}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum IOOp { +pub enum IoOp { Read, Write, } #[repr(C)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumCount, EnumIter)] -pub enum IOType { +pub enum IoType { Other = 0, // Including coprocessor and storage read. ForegroundRead = 1, @@ -74,37 +74,37 @@ pub enum IOType { Export = 10, } -impl IOType { +impl IoType { pub fn as_str(&self) -> &str { match *self { - IOType::Other => "other", - IOType::ForegroundRead => "foreground_read", - IOType::ForegroundWrite => "foreground_write", - IOType::Flush => "flush", - IOType::LevelZeroCompaction => "level_zero_compaction", - IOType::Compaction => "compaction", - IOType::Replication => "replication", - IOType::LoadBalance => "load_balance", - IOType::Gc => "gc", - IOType::Import => "import", - IOType::Export => "export", + IoType::Other => "other", + IoType::ForegroundRead => "foreground_read", + IoType::ForegroundWrite => "foreground_write", + IoType::Flush => "flush", + IoType::LevelZeroCompaction => "level_zero_compaction", + IoType::Compaction => "compaction", + IoType::Replication => "replication", + IoType::LoadBalance => "load_balance", + IoType::Gc => "gc", + IoType::Import => "import", + IoType::Export => "export", } } } -pub struct WithIOType { - previous_io_type: IOType, +pub struct WithIoType { + previous_io_type: IoType, } -impl WithIOType { - pub fn new(new_io_type: IOType) -> WithIOType { +impl WithIoType { + pub fn new(new_io_type: IoType) -> WithIoType { let previous_io_type = get_io_type(); set_io_type(new_io_type); - WithIOType { previous_io_type } + WithIoType { previous_io_type } } } -impl Drop for WithIOType { +impl Drop for WithIoType { fn drop(&mut self) { set_io_type(self.previous_io_type); } @@ -112,12 +112,12 @@ impl Drop for WithIOType { #[repr(C)] #[derive(Debug, Copy, Clone, Default)] -pub struct IOBytes { +pub struct IoBytes { read: u64, write: u64, } -impl std::ops::Sub for IOBytes { +impl std::ops::Sub for IoBytes { type Output = Self; fn sub(self, other: Self) -> Self::Output { @@ -130,18 +130,18 @@ impl std::ops::Sub for IOBytes { #[repr(u32)] #[derive(Debug, Clone, PartialEq, Eq, Copy, EnumCount)] -pub enum IOPriority { +pub enum IoPriority { Low = 0, Medium = 1, High = 2, } -impl IOPriority { +impl IoPriority { pub fn as_str(&self) -> &str { match *self { - IOPriority::Low => "low", - IOPriority::Medium => "medium", - IOPriority::High => "high", + IoPriority::Low => "low", + IoPriority::Medium => "medium", + IoPriority::High => "high", } } @@ -150,19 +150,19 @@ impl IOPriority { } } -impl std::str::FromStr for IOPriority { +impl std::str::FromStr for IoPriority { type Err = String; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { match s { - "low" => Ok(IOPriority::Low), - "medium" => Ok(IOPriority::Medium), - "high" => Ok(IOPriority::High), + "low" => Ok(IoPriority::Low), + "medium" => Ok(IoPriority::Medium), + "high" => Ok(IoPriority::High), s => Err(format!("expect: low, medium or high, got: {:?}", s)), } } } -impl Serialize for IOPriority { +impl Serialize for IoPriority { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -171,7 +171,7 @@ impl Serialize for IOPriority { } } -impl<'de> Deserialize<'de> for IOPriority { +impl<'de> Deserialize<'de> for IoPriority { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -179,17 +179,17 @@ impl<'de> Deserialize<'de> for IOPriority { use serde::de::{Error, Unexpected, Visitor}; struct StrVistor; impl<'de> Visitor<'de> for StrVistor { - type Value = IOPriority; + type Value = IoPriority; fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(formatter, "a IO priority") } - fn visit_str(self, value: &str) -> Result + fn visit_str(self, value: &str) -> Result where E: Error, { - let p = match IOPriority::from_str(&*value.trim().to_lowercase()) { + let p = match IoPriority::from_str(&*value.trim().to_lowercase()) { Ok(p) => p, _ => { return Err(E::invalid_value( @@ -206,15 +206,15 @@ impl<'de> Deserialize<'de> for IOPriority { } } -impl From for ConfigValue { - fn from(mode: IOPriority) -> ConfigValue { +impl From for ConfigValue { + fn from(mode: IoPriority) -> ConfigValue { ConfigValue::String(mode.as_str().to_owned()) } } -impl TryFrom for IOPriority { +impl TryFrom for IoPriority { type Error = String; - fn try_from(c: ConfigValue) -> Result { + fn try_from(c: ConfigValue) -> Result { if let ConfigValue::String(s) = c { Self::from_str(s.as_str()) } else { diff --git a/components/file_system/src/metrics.rs b/components/file_system/src/metrics.rs index e968eaaece6..8aecc6b21c7 100644 --- a/components/file_system/src/metrics.rs +++ b/components/file_system/src/metrics.rs @@ -6,7 +6,7 @@ use prometheus::{local::*, *}; use prometheus_static_metric::*; make_static_metric! { - pub label_enum IOType { + pub label_enum IoType { other, foreground_read, foreground_write, @@ -20,29 +20,29 @@ make_static_metric! { export, } - pub label_enum IOOp { + pub label_enum IoOp { read, write, } - pub label_enum IOPriority { + pub label_enum IoPriority { low, medium, high, } - pub struct IOLatencyVec : Histogram { - "type" => IOType, - "op" => IOOp, + pub struct IoLatencyVec : Histogram { + "type" => IoType, + "op" => IoOp, } - pub struct IOBytesVec : IntCounter { - "type" => IOType, - "op" => IOOp, + pub struct IoBytesVec : IntCounter { + "type" => IoType, + "op" => IoOp, } - pub struct IOPriorityIntGaugeVec : IntGauge { - "type" => IOPriority, + pub struct IoPriorityIntGaugeVec : IntGauge { + "type" => IoPriority, } } @@ -53,9 +53,9 @@ lazy_static! { &["type", "op"] ).unwrap(); - pub static ref IO_LATENCY_MICROS_VEC: IOLatencyVec = + pub static ref IO_LATENCY_MICROS_VEC: IoLatencyVec = register_static_histogram_vec!( - IOLatencyVec, + IoLatencyVec, "tikv_io_latency_micros", "Duration of disk tikv io.", &["type", "op"], @@ -70,8 +70,8 @@ lazy_static! { ) .unwrap(); - pub static ref RATE_LIMITER_MAX_BYTES_PER_SEC: IOPriorityIntGaugeVec = register_static_int_gauge_vec!( - IOPriorityIntGaugeVec, + pub static ref RATE_LIMITER_MAX_BYTES_PER_SEC: IoPriorityIntGaugeVec = register_static_int_gauge_vec!( + IoPriorityIntGaugeVec, "tikv_rate_limiter_max_bytes_per_sec", "Maximum IO bytes per second", &["type"] diff --git a/components/file_system/src/metrics_manager.rs b/components/file_system/src/metrics_manager.rs index 8ff4bddde47..89e822b24e7 100644 --- a/components/file_system/src/metrics_manager.rs +++ b/components/file_system/src/metrics_manager.rs @@ -8,36 +8,36 @@ use tikv_util::time::Instant; use crate::{ io_stats::fetch_io_bytes, metrics::{tls_flush, IO_BYTES_VEC}, - IOBytes, IOOp, IORateLimiterStatistics, IOType, + IoBytes, IoOp, IoRateLimiterStatistics, IoType, }; pub enum BytesFetcher { /// Fetch IO statistics from IO rate limiter, which records passed-through /// IOs in atomic counters. - FromRateLimiter(Arc), + FromRateLimiter(Arc), /// Fetch IO statistics from OS I/O stats collector. - FromIOStatsCollector(), + FromIoStatsCollector(), } impl BytesFetcher { - fn fetch(&self) -> [IOBytes; IOType::COUNT] { + fn fetch(&self) -> [IoBytes; IoType::COUNT] { match *self { BytesFetcher::FromRateLimiter(ref stats) => { - let mut bytes: [IOBytes; IOType::COUNT] = Default::default(); - for t in IOType::iter() { - bytes[t as usize].read = stats.fetch(t, IOOp::Read) as u64; - bytes[t as usize].write = stats.fetch(t, IOOp::Write) as u64; + let mut bytes: [IoBytes; IoType::COUNT] = Default::default(); + for t in IoType::iter() { + bytes[t as usize].read = stats.fetch(t, IoOp::Read) as u64; + bytes[t as usize].write = stats.fetch(t, IoOp::Write) as u64; } bytes } - BytesFetcher::FromIOStatsCollector() => fetch_io_bytes(), + BytesFetcher::FromIoStatsCollector() => fetch_io_bytes(), } } } pub struct MetricsManager { fetcher: BytesFetcher, - last_fetch: [IOBytes; IOType::COUNT], + last_fetch: [IoBytes; IoType::COUNT], } impl MetricsManager { @@ -51,7 +51,7 @@ impl MetricsManager { pub fn flush(&mut self, _now: Instant) { tls_flush(); let latest = self.fetcher.fetch(); - for t in IOType::iter() { + for t in IoType::iter() { let delta_bytes = latest[t as usize] - self.last_fetch[t as usize]; IO_BYTES_VEC .with_label_values(&[t.as_str(), "read"]) diff --git a/components/file_system/src/rate_limiter.rs b/components/file_system/src/rate_limiter.rs index 51fe8228aef..da7fe5fe75c 100644 --- a/components/file_system/src/rate_limiter.rs +++ b/components/file_system/src/rate_limiter.rs @@ -17,7 +17,7 @@ use tikv_util::time::Instant; use super::{ metrics::{tls_collect_rate_limiter_request_wait, RATE_LIMITER_MAX_BYTES_PER_SEC}, - IOOp, IOPriority, IOType, + IoOp, IoPriority, IoType, }; const DEFAULT_REFILL_PERIOD: Duration = Duration::from_millis(50); @@ -25,38 +25,38 @@ const DEFAULT_REFILLS_PER_SEC: usize = (1.0 / DEFAULT_REFILL_PERIOD.as_secs_f32( const MAX_WAIT_DURATION_PER_REQUEST: Duration = Duration::from_millis(500); #[derive(Debug, Clone, PartialEq, Eq, Copy)] -pub enum IORateLimitMode { +pub enum IoRateLimitMode { WriteOnly, ReadOnly, AllIo, } -impl IORateLimitMode { +impl IoRateLimitMode { pub fn as_str(&self) -> &str { match *self { - IORateLimitMode::WriteOnly => "write-only", - IORateLimitMode::ReadOnly => "read-only", - IORateLimitMode::AllIo => "all-io", + IoRateLimitMode::WriteOnly => "write-only", + IoRateLimitMode::ReadOnly => "read-only", + IoRateLimitMode::AllIo => "all-io", } } #[inline] - pub fn contains(&self, op: IOOp) -> bool { + pub fn contains(&self, op: IoOp) -> bool { match *self { - IORateLimitMode::WriteOnly => op == IOOp::Write, - IORateLimitMode::ReadOnly => op == IOOp::Read, + IoRateLimitMode::WriteOnly => op == IoOp::Write, + IoRateLimitMode::ReadOnly => op == IoOp::Read, _ => true, } } } -impl FromStr for IORateLimitMode { +impl FromStr for IoRateLimitMode { type Err = String; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { match s { - "write-only" => Ok(IORateLimitMode::WriteOnly), - "read-only" => Ok(IORateLimitMode::ReadOnly), - "all-io" => Ok(IORateLimitMode::AllIo), + "write-only" => Ok(IoRateLimitMode::WriteOnly), + "read-only" => Ok(IoRateLimitMode::ReadOnly), + "all-io" => Ok(IoRateLimitMode::AllIo), s => Err(format!( "expect: write-only, read-only or all-io, got: {:?}", s @@ -65,7 +65,7 @@ impl FromStr for IORateLimitMode { } } -impl Serialize for IORateLimitMode { +impl Serialize for IoRateLimitMode { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -74,7 +74,7 @@ impl Serialize for IORateLimitMode { } } -impl<'de> Deserialize<'de> for IORateLimitMode { +impl<'de> Deserialize<'de> for IoRateLimitMode { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -82,17 +82,17 @@ impl<'de> Deserialize<'de> for IORateLimitMode { use serde::de::{Error, Unexpected, Visitor}; struct StrVistor; impl<'de> Visitor<'de> for StrVistor { - type Value = IORateLimitMode; + type Value = IoRateLimitMode; fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(formatter, "a IO rate limit mode") } - fn visit_str(self, value: &str) -> Result + fn visit_str(self, value: &str) -> Result where E: Error, { - let p = match IORateLimitMode::from_str(&*value.trim().to_lowercase()) { + let p = match IoRateLimitMode::from_str(&*value.trim().to_lowercase()) { Ok(p) => p, _ => { return Err(E::invalid_value( @@ -112,48 +112,48 @@ impl<'de> Deserialize<'de> for IORateLimitMode { /// Record accumulated bytes through of different types. /// Used for testing and metrics. #[derive(Debug)] -pub struct IORateLimiterStatistics { - read_bytes: [CachePadded; IOType::COUNT], - write_bytes: [CachePadded; IOType::COUNT], +pub struct IoRateLimiterStatistics { + read_bytes: [CachePadded; IoType::COUNT], + write_bytes: [CachePadded; IoType::COUNT], } -impl IORateLimiterStatistics { +impl IoRateLimiterStatistics { pub fn new() -> Self { - IORateLimiterStatistics { + IoRateLimiterStatistics { read_bytes: Default::default(), write_bytes: Default::default(), } } - pub fn fetch(&self, io_type: IOType, io_op: IOOp) -> usize { + pub fn fetch(&self, io_type: IoType, io_op: IoOp) -> usize { let io_type_idx = io_type as usize; match io_op { - IOOp::Read => self.read_bytes[io_type_idx].load(Ordering::Relaxed), - IOOp::Write => self.write_bytes[io_type_idx].load(Ordering::Relaxed), + IoOp::Read => self.read_bytes[io_type_idx].load(Ordering::Relaxed), + IoOp::Write => self.write_bytes[io_type_idx].load(Ordering::Relaxed), } } - pub fn record(&self, io_type: IOType, io_op: IOOp, bytes: usize) { + pub fn record(&self, io_type: IoType, io_op: IoOp, bytes: usize) { let io_type_idx = io_type as usize; match io_op { - IOOp::Read => { + IoOp::Read => { self.read_bytes[io_type_idx].fetch_add(bytes, Ordering::Relaxed); } - IOOp::Write => { + IoOp::Write => { self.write_bytes[io_type_idx].fetch_add(bytes, Ordering::Relaxed); } } } pub fn reset(&self) { - for i in 0..IOType::COUNT { + for i in 0..IoType::COUNT { self.read_bytes[i].store(0, Ordering::Relaxed); self.write_bytes[i].store(0, Ordering::Relaxed); } } } -impl Default for IORateLimiterStatistics { +impl Default for IoRateLimiterStatistics { fn default() -> Self { Self::new() } @@ -161,36 +161,36 @@ impl Default for IORateLimiterStatistics { /// Used to dynamically adjust the proportion of total budgets allocated for /// rate limited IO. This is needed when global IOs are only partially rate -/// limited, e.g. when mode is IORateLimitMode::WriteOnly. -pub trait IOBudgetAdjustor: Send + Sync { +/// limited, e.g. when mode is IoRateLimitMode::WriteOnly. +pub trait IoBudgetAdjustor: Send + Sync { fn adjust(&self, threshold: usize) -> usize; } /// Limit total IO flow below provided threshold by throttling lower-priority /// IOs. Rate limit is disabled when total IO threshold is set to zero. -struct PriorityBasedIORateLimiter { +struct PriorityBasedIoRateLimiter { // High-priority IOs are only limited when strict is true strict: bool, // Total bytes passed through during current epoch - bytes_through: [CachePadded; IOPriority::COUNT], + bytes_through: [CachePadded; IoPriority::COUNT], // Maximum bytes permitted during current epoch - bytes_per_epoch: [CachePadded; IOPriority::COUNT], - protected: Mutex, + bytes_per_epoch: [CachePadded; IoPriority::COUNT], + protected: Mutex, } -struct PriorityBasedIORateLimiterProtected { +struct PriorityBasedIoRateLimiterProtected { next_refill_time: Instant, // Bytes that can't be fulfilled in current epoch - pending_bytes: [usize; IOPriority::COUNT], + pending_bytes: [usize; IoPriority::COUNT], // Adjust low priority IO flow based on system backlog - adjustor: Option>, + adjustor: Option>, } -impl PriorityBasedIORateLimiterProtected { +impl PriorityBasedIoRateLimiterProtected { fn new() -> Self { - PriorityBasedIORateLimiterProtected { + PriorityBasedIoRateLimiterProtected { next_refill_time: Instant::now_coarse() + DEFAULT_REFILL_PERIOD, - pending_bytes: [0; IOPriority::COUNT], + pending_bytes: [0; IoPriority::COUNT], adjustor: None, } } @@ -216,7 +216,7 @@ macro_rules! do_sleep { }; } -/// Actual implementation for requesting IOs from PriorityBasedIORateLimiter. +/// Actual implementation for requesting IOs from PriorityBasedIoRateLimiter. /// An attempt will first be recorded. If the attempted amount exceeds the /// available quotas of current epoch, the requester will be queued (logically) /// and sleep until served. Macro is necessary to de-dup codes used both in @@ -235,7 +235,7 @@ macro_rules! request_imp { $limiter.bytes_through[priority_idx].fetch_add(amount, Ordering::Relaxed) + amount; // We prefer not to partially return only a portion of requested bytes. if bytes_through <= cached_bytes_per_epoch - || !$limiter.strict && $priority == IOPriority::High + || !$limiter.strict && $priority == IoPriority::High { return amount; } @@ -296,50 +296,50 @@ macro_rules! request_imp { }}; } -impl PriorityBasedIORateLimiter { +impl PriorityBasedIoRateLimiter { fn new(strict: bool) -> Self { - PriorityBasedIORateLimiter { + PriorityBasedIoRateLimiter { strict, bytes_through: Default::default(), bytes_per_epoch: Default::default(), - protected: Mutex::new(PriorityBasedIORateLimiterProtected::new()), + protected: Mutex::new(PriorityBasedIoRateLimiterProtected::new()), } } /// Dynamically changes the total IO flow threshold. fn set_bytes_per_sec(&self, bytes_per_sec: usize) { let now = (bytes_per_sec as f64 * DEFAULT_REFILL_PERIOD.as_secs_f64()) as usize; - let before = self.bytes_per_epoch[IOPriority::High as usize].swap(now, Ordering::Relaxed); + let before = self.bytes_per_epoch[IoPriority::High as usize].swap(now, Ordering::Relaxed); RATE_LIMITER_MAX_BYTES_PER_SEC .high .set(bytes_per_sec as i64); if now == 0 || before == 0 { // Toggle on or off rate limit. let _locked = self.protected.lock(); - self.bytes_per_epoch[IOPriority::Medium as usize].store(now, Ordering::Relaxed); + self.bytes_per_epoch[IoPriority::Medium as usize].store(now, Ordering::Relaxed); RATE_LIMITER_MAX_BYTES_PER_SEC .medium .set(bytes_per_sec as i64); - self.bytes_per_epoch[IOPriority::Low as usize].store(now, Ordering::Relaxed); + self.bytes_per_epoch[IoPriority::Low as usize].store(now, Ordering::Relaxed); RATE_LIMITER_MAX_BYTES_PER_SEC.low.set(bytes_per_sec as i64); } } - fn set_low_priority_io_adjustor(&self, adjustor: Option>) { + fn set_low_priority_io_adjustor(&self, adjustor: Option>) { let mut locked = self.protected.lock(); locked.adjustor = adjustor; } - fn request(&self, priority: IOPriority, amount: usize) -> usize { + fn request(&self, priority: IoPriority, amount: usize) -> usize { request_imp!(self, priority, amount, sync) } - async fn async_request(&self, priority: IOPriority, amount: usize) -> usize { + async fn async_request(&self, priority: IoPriority, amount: usize) -> usize { request_imp!(self, priority, amount, async) } #[cfg(test)] - fn request_with_skewed_clock(&self, priority: IOPriority, amount: usize) -> usize { + fn request_with_skewed_clock(&self, priority: IoPriority, amount: usize) -> usize { request_imp!(self, priority, amount, skewed_sync) } @@ -352,9 +352,9 @@ impl PriorityBasedIORateLimiter { /// this happens, total IO flow could exceed global threshold. /// - Highest priority IO alone must not exceed global threshold (in strict /// mode). - fn refill(&self, locked: &mut PriorityBasedIORateLimiterProtected, now: Instant) { + fn refill(&self, locked: &mut PriorityBasedIoRateLimiterProtected, now: Instant) { let mut total_budgets = - self.bytes_per_epoch[IOPriority::High as usize].load(Ordering::Relaxed); + self.bytes_per_epoch[IoPriority::High as usize].load(Ordering::Relaxed); if total_budgets == 0 { // It's possible that rate limit is toggled off in the meantime. return; @@ -365,12 +365,12 @@ impl PriorityBasedIORateLimiter { locked.next_refill_time = now + DEFAULT_REFILL_PERIOD; debug_assert!( - IOPriority::High as usize == IOPriority::Medium as usize + 1 - && IOPriority::Medium as usize == IOPriority::Low as usize + 1 + IoPriority::High as usize == IoPriority::Medium as usize + 1 + && IoPriority::Medium as usize == IoPriority::Low as usize + 1 ); let mut remaining_budgets = total_budgets; let mut used_budgets = 0; - for pri in &[IOPriority::High, IOPriority::Medium] { + for pri in &[IoPriority::High, IoPriority::Medium] { let p = *pri as usize; // Skipped epochs can only serve pending requests rather that in-coming ones, // catch up by subtracting them from pending_bytes. @@ -390,7 +390,7 @@ impl PriorityBasedIORateLimiter { used_budgets += ((served_by_first_epoch + served_by_skipped_epochs) as f32 / (skipped_epochs + 1.0)) as usize; // Only apply rate limit adjustments on low-priority IOs. - if *pri == IOPriority::Medium { + if *pri == IoPriority::Medium { if let Some(adjustor) = &locked.adjustor { total_budgets = adjustor.adjust(total_budgets); } @@ -400,7 +400,7 @@ impl PriorityBasedIORateLimiter { } else { 1 // A small positive value so not to disable flow control. }; - if *pri == IOPriority::High { + if *pri == IoPriority::High { RATE_LIMITER_MAX_BYTES_PER_SEC .medium .set((remaining_budgets * DEFAULT_REFILLS_PER_SEC) as i64); @@ -411,7 +411,7 @@ impl PriorityBasedIORateLimiter { } self.bytes_per_epoch[p - 1].store(remaining_budgets, Ordering::Relaxed); } - let p = IOPriority::Low as usize; + let p = IoPriority::Low as usize; let to_serve_pending_bytes = std::cmp::min(locked.pending_bytes[p], remaining_budgets); locked.pending_bytes[p] -= to_serve_pending_bytes; self.bytes_through[p].store(to_serve_pending_bytes, Ordering::Relaxed); @@ -427,7 +427,7 @@ impl PriorityBasedIORateLimiter { #[cfg(test)] fn reset(&self) { let mut locked = self.protected.lock(); - for p in &[IOPriority::High, IOPriority::Medium] { + for p in &[IoPriority::High, IoPriority::Medium] { let p = *p as usize; locked.pending_bytes[p] = 0; } @@ -435,26 +435,26 @@ impl PriorityBasedIORateLimiter { } /// A high-performance IO rate limiter used for prioritized flow control. -/// An instance of `IORateLimiter` can be safely shared between threads. -pub struct IORateLimiter { - mode: IORateLimitMode, - priority_map: [CachePadded; IOType::COUNT], - throughput_limiter: Arc, - stats: Option>, +/// An instance of `IoRateLimiter` can be safely shared between threads. +pub struct IoRateLimiter { + mode: IoRateLimitMode, + priority_map: [CachePadded; IoType::COUNT], + throughput_limiter: Arc, + stats: Option>, } -impl IORateLimiter { - pub fn new(mode: IORateLimitMode, strict: bool, enable_statistics: bool) -> Self { - let priority_map: [CachePadded; IOType::COUNT] = Default::default(); +impl IoRateLimiter { + pub fn new(mode: IoRateLimitMode, strict: bool, enable_statistics: bool) -> Self { + let priority_map: [CachePadded; IoType::COUNT] = Default::default(); for p in priority_map.iter() { - p.store(IOPriority::High as u32, Ordering::Relaxed); + p.store(IoPriority::High as u32, Ordering::Relaxed); } - IORateLimiter { + IoRateLimiter { mode, priority_map, - throughput_limiter: Arc::new(PriorityBasedIORateLimiter::new(strict)), + throughput_limiter: Arc::new(PriorityBasedIoRateLimiter::new(strict)), stats: if enable_statistics { - Some(Arc::new(IORateLimiterStatistics::new())) + Some(Arc::new(IoRateLimiterStatistics::new())) } else { None }, @@ -462,14 +462,14 @@ impl IORateLimiter { } pub fn new_for_test() -> Self { - IORateLimiter::new( - IORateLimitMode::AllIo, + IoRateLimiter::new( + IoRateLimitMode::AllIo, true, // strict true, // enable_statistics ) } - pub fn statistics(&self) -> Option> { + pub fn statistics(&self) -> Option> { self.stats.clone() } @@ -477,15 +477,15 @@ impl IORateLimiter { self.throughput_limiter.set_bytes_per_sec(rate); } - pub fn set_io_priority(&self, io_type: IOType, io_priority: IOPriority) { + pub fn set_io_priority(&self, io_type: IoType, io_priority: IoPriority) { self.priority_map[io_type as usize].store(io_priority as u32, Ordering::Relaxed); } pub fn set_low_priority_io_adjustor_if_needed( &self, - adjustor: Option>, + adjustor: Option>, ) { - if self.mode != IORateLimitMode::AllIo { + if self.mode != IoRateLimitMode::AllIo { self.throughput_limiter .set_low_priority_io_adjustor(adjustor); } @@ -494,10 +494,10 @@ impl IORateLimiter { /// Requests for token for bytes and potentially update statistics. If this /// request can not be satisfied, the call is blocked. Granted token can be /// less than the requested bytes, but must be greater than zero. - pub fn request(&self, io_type: IOType, io_op: IOOp, mut bytes: usize) -> usize { + pub fn request(&self, io_type: IoType, io_op: IoOp, mut bytes: usize) -> usize { if self.mode.contains(io_op) { bytes = self.throughput_limiter.request( - IOPriority::unsafe_from_u32( + IoPriority::unsafe_from_u32( self.priority_map[io_type as usize].load(Ordering::Relaxed), ), bytes, @@ -513,12 +513,12 @@ impl IORateLimiter { /// statistics. If this request can not be satisfied, the call is blocked. /// Granted token can be less than the requested bytes, but must be greater /// than zero. - pub async fn async_request(&self, io_type: IOType, io_op: IOOp, mut bytes: usize) -> usize { + pub async fn async_request(&self, io_type: IoType, io_op: IoOp, mut bytes: usize) -> usize { if self.mode.contains(io_op) { bytes = self .throughput_limiter .async_request( - IOPriority::unsafe_from_u32( + IoPriority::unsafe_from_u32( self.priority_map[io_type as usize].load(Ordering::Relaxed), ), bytes, @@ -532,10 +532,10 @@ impl IORateLimiter { } #[cfg(test)] - fn request_with_skewed_clock(&self, io_type: IOType, io_op: IOOp, mut bytes: usize) -> usize { + fn request_with_skewed_clock(&self, io_type: IoType, io_op: IoOp, mut bytes: usize) -> usize { if self.mode.contains(io_op) { bytes = self.throughput_limiter.request_with_skewed_clock( - IOPriority::unsafe_from_u32( + IoPriority::unsafe_from_u32( self.priority_map[io_type as usize].load(Ordering::Relaxed), ), bytes, @@ -549,15 +549,15 @@ impl IORateLimiter { } lazy_static! { - static ref IO_RATE_LIMITER: Mutex>> = Mutex::new(None); + static ref IO_RATE_LIMITER: Mutex>> = Mutex::new(None); } // Do NOT use this method in test environment. -pub fn set_io_rate_limiter(limiter: Option>) { +pub fn set_io_rate_limiter(limiter: Option>) { *IO_RATE_LIMITER.lock() = limiter; } -pub fn get_io_rate_limiter() -> Option> { +pub fn get_io_rate_limiter() -> Option> { (*IO_RATE_LIMITER.lock()).clone() } @@ -591,10 +591,10 @@ mod tests { } #[derive(Debug, Clone, Copy)] - struct Request(IOType, IOOp, usize); + struct Request(IoType, IoOp, usize); fn start_background_jobs( - limiter: &Arc, + limiter: &Arc, job_count: usize, request: Request, interval: Option, @@ -624,8 +624,8 @@ mod tests { #[test] fn test_rate_limit_toggle() { let bytes_per_sec = 2000; - let limiter = IORateLimiter::new_for_test(); - limiter.set_io_priority(IOType::Compaction, IOPriority::Low); + let limiter = IoRateLimiter::new_for_test(); + limiter.set_io_priority(IoType::Compaction, IoPriority::Low); let limiter = Arc::new(limiter); let stats = limiter.statistics().unwrap(); // enable rate limit @@ -634,19 +634,19 @@ mod tests { let _write_context = start_background_jobs( &limiter, 1, // job_count - Request(IOType::ForegroundWrite, IOOp::Write, 10), + Request(IoType::ForegroundWrite, IoOp::Write, 10), None, // interval ); let _compaction_context = start_background_jobs( &limiter, 1, // job_count - Request(IOType::Compaction, IOOp::Write, 10), + Request(IoType::Compaction, IoOp::Write, 10), None, // interval ); std::thread::sleep(Duration::from_secs(1)); let t1 = Instant::now(); approximate_eq!( - stats.fetch(IOType::ForegroundWrite, IOOp::Write) as f64, + stats.fetch(IoType::ForegroundWrite, IoOp::Write) as f64, bytes_per_sec as f64 * (t1 - t0).as_secs_f64() ); // disable rate limit @@ -655,11 +655,11 @@ mod tests { std::thread::sleep(Duration::from_secs(1)); let t2 = Instant::now(); assert!( - stats.fetch(IOType::ForegroundWrite, IOOp::Write) as f64 + stats.fetch(IoType::ForegroundWrite, IoOp::Write) as f64 > bytes_per_sec as f64 * (t2 - t1).as_secs_f64() * 4.0 ); assert!( - stats.fetch(IOType::Compaction, IOOp::Write) as f64 + stats.fetch(IoType::Compaction, IoOp::Write) as f64 > bytes_per_sec as f64 * (t2 - t1).as_secs_f64() * 4.0 ); // enable rate limit @@ -668,12 +668,12 @@ mod tests { std::thread::sleep(Duration::from_secs(1)); let t3 = Instant::now(); approximate_eq!( - stats.fetch(IOType::ForegroundWrite, IOOp::Write) as f64, + stats.fetch(IoType::ForegroundWrite, IoOp::Write) as f64, bytes_per_sec as f64 * (t3 - t2).as_secs_f64() ); } - fn verify_rate_limit(limiter: &Arc, bytes_per_sec: usize, duration: Duration) { + fn verify_rate_limit(limiter: &Arc, bytes_per_sec: usize, duration: Duration) { let stats = limiter.statistics().unwrap(); limiter.set_io_rate_limit(bytes_per_sec); stats.reset(); @@ -684,7 +684,7 @@ mod tests { let _context = start_background_jobs( limiter, 2, // job_count - Request(IOType::ForegroundWrite, IOOp::Write, 10), + Request(IoType::ForegroundWrite, IoOp::Write, 10), None, // interval ); std::thread::sleep(duration); @@ -693,7 +693,7 @@ mod tests { end.duration_since(begin) }; approximate_eq!( - stats.fetch(IOType::ForegroundWrite, IOOp::Write) as f64, + stats.fetch(IoType::ForegroundWrite, IoOp::Write) as f64, bytes_per_sec as f64 * actual_duration.as_secs_f64() ); } @@ -701,14 +701,14 @@ mod tests { #[test] fn test_rate_limit_dynamic_priority() { let bytes_per_sec = 2000; - let limiter = Arc::new(IORateLimiter::new( - IORateLimitMode::AllIo, + let limiter = Arc::new(IoRateLimiter::new( + IoRateLimitMode::AllIo, false, // strict true, // enable_statistics )); - limiter.set_io_priority(IOType::ForegroundWrite, IOPriority::Medium); + limiter.set_io_priority(IoType::ForegroundWrite, IoPriority::Medium); verify_rate_limit(&limiter, bytes_per_sec, Duration::from_secs(2)); - limiter.set_io_priority(IOType::ForegroundWrite, IOPriority::High); + limiter.set_io_priority(IoType::ForegroundWrite, IoPriority::High); let stats = limiter.statistics().unwrap(); stats.reset(); let duration = { @@ -717,7 +717,7 @@ mod tests { let _context = start_background_jobs( &limiter, 2, // job_count - Request(IOType::ForegroundWrite, IOOp::Write, 10), + Request(IoType::ForegroundWrite, IoOp::Write, 10), None, // interval ); std::thread::sleep(Duration::from_secs(2)); @@ -726,7 +726,7 @@ mod tests { end.duration_since(begin) }; assert!( - stats.fetch(IOType::ForegroundWrite, IOOp::Write) as f64 + stats.fetch(IoType::ForegroundWrite, IoOp::Write) as f64 > bytes_per_sec as f64 * duration.as_secs_f64() * 1.5 ); } @@ -735,7 +735,7 @@ mod tests { fn test_rate_limited_heavy_flow() { let low_bytes_per_sec = 2000; let high_bytes_per_sec = 10000; - let limiter = Arc::new(IORateLimiter::new_for_test()); + let limiter = Arc::new(IoRateLimiter::new_for_test()); verify_rate_limit(&limiter, low_bytes_per_sec, Duration::from_secs(2)); verify_rate_limit(&limiter, high_bytes_per_sec, Duration::from_secs(2)); verify_rate_limit(&limiter, low_bytes_per_sec, Duration::from_secs(2)); @@ -745,7 +745,7 @@ mod tests { fn test_rate_limited_light_flow() { let kbytes_per_sec = 3; let actual_kbytes_per_sec = 2; - let limiter = Arc::new(IORateLimiter::new_for_test()); + let limiter = Arc::new(IoRateLimiter::new_for_test()); limiter.set_io_rate_limit(kbytes_per_sec * 1000); let stats = limiter.statistics().unwrap(); let duration = { @@ -755,7 +755,7 @@ mod tests { let _context = start_background_jobs( &limiter, actual_kbytes_per_sec, // job_count - Request(IOType::Compaction, IOOp::Write, 1), + Request(IoType::Compaction, IoOp::Write, 1), Some(Duration::from_millis(1)), ); std::thread::sleep(Duration::from_secs(2)); @@ -764,7 +764,7 @@ mod tests { end.duration_since(begin) }; approximate_eq!( - stats.fetch(IOType::Compaction, IOOp::Write) as f64, + stats.fetch(IoType::Compaction, IoOp::Write) as f64, actual_kbytes_per_sec as f64 * duration.as_secs_f64() * 1000.0 ); } @@ -775,10 +775,10 @@ mod tests { let write_work = 50; let compaction_work = 80; let import_work = 50; - let limiter = IORateLimiter::new_for_test(); + let limiter = IoRateLimiter::new_for_test(); limiter.set_io_rate_limit(bytes_per_sec); - limiter.set_io_priority(IOType::Compaction, IOPriority::Medium); - limiter.set_io_priority(IOType::Import, IOPriority::Low); + limiter.set_io_priority(IoType::Compaction, IoPriority::Medium); + limiter.set_io_priority(IoType::Import, IoPriority::Low); let stats = limiter.statistics().unwrap(); let limiter = Arc::new(limiter); let begin = Instant::now(); @@ -787,8 +787,8 @@ mod tests { &limiter, 1, // job_count Request( - IOType::ForegroundWrite, - IOOp::Write, + IoType::ForegroundWrite, + IoOp::Write, write_work * bytes_per_sec / 100 / 1000, ), Some(Duration::from_millis(1)), @@ -797,8 +797,8 @@ mod tests { &limiter, 1, // job_count Request( - IOType::Compaction, - IOOp::Write, + IoType::Compaction, + IoOp::Write, compaction_work * bytes_per_sec / 100 / 1000, ), Some(Duration::from_millis(1)), @@ -807,8 +807,8 @@ mod tests { &limiter, 1, // job_count Request( - IOType::Import, - IOOp::Write, + IoType::Import, + IoOp::Write, import_work * bytes_per_sec / 100 / 1000, ), Some(Duration::from_millis(1)), @@ -817,20 +817,20 @@ mod tests { } let end = Instant::now(); let duration = end.duration_since(begin); - let write_bytes = stats.fetch(IOType::ForegroundWrite, IOOp::Write); + let write_bytes = stats.fetch(IoType::ForegroundWrite, IoOp::Write); approximate_eq!( write_bytes as f64, (write_work * bytes_per_sec / 100) as f64 * duration.as_secs_f64() ); - let compaction_bytes = stats.fetch(IOType::Compaction, IOOp::Write); - let import_bytes = stats.fetch(IOType::Import, IOOp::Write); + let compaction_bytes = stats.fetch(IoType::Compaction, IoOp::Write); + let import_bytes = stats.fetch(IoType::Import, IoOp::Write); let total_bytes = write_bytes + import_bytes + compaction_bytes; approximate_eq!((compaction_bytes + write_bytes) as f64, total_bytes as f64); } #[bench] fn bench_critical_section(b: &mut test::Bencher) { - let inner_limiter = PriorityBasedIORateLimiter::new(true /* strict */); + let inner_limiter = PriorityBasedIoRateLimiter::new(true /* strict */); inner_limiter.set_bytes_per_sec(1024); let now = Instant::now_coarse(); b.iter(|| { diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 628b066029d..49183245785 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -11,9 +11,9 @@ use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ CacheStats, EncryptionKeyManager, EncryptionMethod, PerfContextExt, PerfContextKind, PerfLevel, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, - RaftLogGCTask, Result, + RaftLogGcTask, Result, }; -use file_system::{IOOp, IORateLimiter, IOType}; +use file_system::{IoOp, IoRateLimiter, IoType}; use kvproto::{ metapb::Region, raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, @@ -47,7 +47,7 @@ pub struct ManagedReader { ::Reader, DecrypterReader<::Reader>, >, - rate_limiter: Option>, + rate_limiter: Option>, } impl Seek for ManagedReader { @@ -63,7 +63,7 @@ impl Read for ManagedReader { fn read(&mut self, buf: &mut [u8]) -> IoResult { let mut size = buf.len(); if let Some(ref mut limiter) = self.rate_limiter { - size = limiter.request(IOType::ForegroundRead, IOOp::Read, size); + size = limiter.request(IoType::ForegroundRead, IoOp::Read, size); } match self.inner.as_mut() { Either::Left(reader) => reader.read(&mut buf[..size]), @@ -77,7 +77,7 @@ pub struct ManagedWriter { ::Writer, EncrypterWriter<::Writer>, >, - rate_limiter: Option>, + rate_limiter: Option>, } impl Seek for ManagedWriter { @@ -93,7 +93,7 @@ impl Write for ManagedWriter { fn write(&mut self, buf: &[u8]) -> IoResult { let mut size = buf.len(); if let Some(ref mut limiter) = self.rate_limiter { - size = limiter.request(IOType::ForegroundWrite, IOOp::Write, size); + size = limiter.request(IoType::ForegroundWrite, IoOp::Write, size); } match self.inner.as_mut() { Either::Left(writer) => writer.write(&buf[..size]), @@ -133,13 +133,13 @@ impl WriteExt for ManagedWriter { pub struct ManagedFileSystem { base_file_system: DefaultFileSystem, key_manager: Option>, - rate_limiter: Option>, + rate_limiter: Option>, } impl ManagedFileSystem { pub fn new( key_manager: Option>, - rate_limiter: Option>, + rate_limiter: Option>, ) -> Self { Self { base_file_system: DefaultFileSystem, @@ -256,7 +256,7 @@ impl RaftLogEngine { pub fn new( config: RaftEngineConfig, key_manager: Option>, - rate_limiter: Option>, + rate_limiter: Option>, ) -> Result { let file_system = Arc::new(ManagedFileSystem::new(key_manager, rate_limiter)); Ok(RaftLogEngine(Arc::new( @@ -516,14 +516,14 @@ impl RaftEngine for RaftLogEngine { } fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result { - self.batch_gc(vec![RaftLogGCTask { + self.batch_gc(vec![RaftLogGcTask { raft_group_id, from, to, }]) } - fn batch_gc(&self, tasks: Vec) -> Result { + fn batch_gc(&self, tasks: Vec) -> Result { let mut batch = self.log_batch(tasks.len()); let mut old_first_index = Vec::with_capacity(tasks.len()); for task in &tasks { diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index 57472b5cecf..8f572eb1f9f 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -125,7 +125,7 @@ pub fn get_region_approximate_middle( mod tests { use std::{iter, sync::mpsc}; - use engine_test::ctor::{ColumnFamilyOptions, DBOptions}; + use engine_test::ctor::{CfOptions, DbOptions}; use engine_traits::{MiscExt, SyncMutable, ALL_CFS, CF_DEFAULT, LARGE_CFS}; use kvproto::{ metapb::{Peer, Region}, @@ -485,8 +485,8 @@ mod tests { .unwrap(); let path = tmp.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let engine = engine_test::kv::new_engine_opt(path, db_opts, cfs_opts).unwrap(); diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 892a38a7f48..e2e58933e57 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -232,7 +232,7 @@ pub fn get_region_approximate_keys( mod tests { use std::{cmp, sync::mpsc, u64}; - use engine_test::ctor::{ColumnFamilyOptions, DBOptions}; + use engine_test::ctor::{CfOptions, DbOptions}; use engine_traits::{KvEngine, MiscExt, SyncMutable, ALL_CFS, CF_DEFAULT, CF_WRITE, LARGE_CFS}; use kvproto::{ metapb::{Peer, Region}, @@ -453,8 +453,8 @@ mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); @@ -633,8 +633,8 @@ mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index faff7b77c0a..bc9fd855038 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -260,7 +260,7 @@ pub mod tests { use collections::HashSet; use engine_test::{ - ctor::{ColumnFamilyOptions, DBOptions}, + ctor::{CfOptions, DbOptions}, kv::KvTestEngine, }; use engine_traits::{ @@ -438,16 +438,16 @@ pub mod tests { fn test_split_check_impl(cfs_with_range_prop: &[CfName], data_cf: CfName) { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); + let db_opts = DbOptions::default(); let cfs_with_range_prop: HashSet<_> = cfs_with_range_prop.iter().cloned().collect(); - let mut cf_opt = ColumnFamilyOptions::new(); + let mut cf_opt = CfOptions::new(); cf_opt.set_no_range_properties(true); let cfs_opts = ALL_CFS .iter() .map(|cf| { if cfs_with_range_prop.contains(cf) { - (*cf, ColumnFamilyOptions::new()) + (*cf, CfOptions::new()) } else { (*cf, cf_opt.clone()) } @@ -565,9 +565,9 @@ pub mod tests { fn test_generate_bucket_impl(cfs_with_range_prop: &[CfName], data_cf: CfName, mvcc: bool) { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); + let db_opts = DbOptions::default(); let cfs_with_range_prop: HashSet<_> = cfs_with_range_prop.iter().cloned().collect(); - let mut cf_opt = ColumnFamilyOptions::new(); + let mut cf_opt = CfOptions::new(); cf_opt.set_no_range_properties(true); cf_opt.set_disable_auto_compactions(true); @@ -575,7 +575,7 @@ pub mod tests { .iter() .map(|cf| { if cfs_with_range_prop.contains(cf) { - let mut opt = ColumnFamilyOptions::new(); + let mut opt = CfOptions::new(); opt.set_disable_auto_compactions(true); (*cf, opt) } else { @@ -704,9 +704,9 @@ pub mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); + let db_opts = DbOptions::default(); let cfs_with_range_prop: HashSet<_> = LARGE_CFS.iter().cloned().collect(); - let mut cf_opt = ColumnFamilyOptions::new(); + let mut cf_opt = CfOptions::new(); cf_opt.set_no_range_properties(true); cf_opt.set_disable_auto_compactions(true); @@ -714,7 +714,7 @@ pub mod tests { .iter() .map(|cf| { if cfs_with_range_prop.contains(cf) { - let mut opt = ColumnFamilyOptions::new(); + let mut opt = CfOptions::new(); opt.set_disable_auto_compactions(true); (*cf, opt) } else { @@ -763,15 +763,15 @@ pub mod tests { fn test_cf_lock_without_range_prop() { let path = Builder::new().prefix("test-raftstore").tempdir().unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opt = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opt = CfOptions::new(); cf_opt.set_no_range_properties(true); let cfs_opts = ALL_CFS .iter() .map(|cf| { if cf != &CF_LOCK { - (*cf, ColumnFamilyOptions::new()) + (*cf, CfOptions::new()) } else { (*cf, cf_opt.clone()) } @@ -830,13 +830,13 @@ pub mod tests { let cfs_opts = ALL_CFS .iter() .map(|cf| { - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = CfOptions::new(); cf_opts.set_no_range_properties(true); (*cf, cf_opts) }) .collect(); let engine = - engine_test::kv::new_engine_opt(path_str, DBOptions::default(), cfs_opts).unwrap(); + engine_test::kv::new_engine_opt(path_str, DbOptions::default(), cfs_opts).unwrap(); let mut runnable = SplitCheckRunner::new(engine.clone(), tx.clone(), CoprocessorHost::new(tx, cfg)); @@ -909,8 +909,8 @@ pub mod tests { .unwrap(); let path = tmp.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); cf_opts.set_no_range_properties(true); @@ -944,8 +944,8 @@ pub mod tests { .unwrap(); let path = tmp.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let engine = engine_test::kv::new_engine_opt(path, db_opts, cfs_opts).unwrap(); @@ -1056,8 +1056,8 @@ pub mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(10); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); @@ -1086,8 +1086,8 @@ pub mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_disable_auto_compactions(true); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); @@ -1121,8 +1121,8 @@ pub mod tests { .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_disable_auto_compactions(true); let cfs_opts = LARGE_CFS.iter().map(|cf| (*cf, cf_opts.clone())).collect(); let db = engine_test::kv::new_engine_opt(path_str, db_opts, cfs_opts).unwrap(); diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index 1aee90b6463..c8fb02d424b 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -48,7 +48,7 @@ impl CompactionGuardGeneratorFactory

{ } // Update to implement engine_traits::SstPartitionerFactory instead once we move -// to use abstracted ColumnFamilyOptions in src/config.rs. +// to use abstracted CfOptions in src/config.rs. impl SstPartitionerFactory for CompactionGuardGeneratorFactory

{ @@ -200,7 +200,7 @@ mod tests { use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, - RocksCfOptions, RocksDBOptions, RocksEngine, RocksSstPartitionerFactory, RocksSstReader, + RocksCfOptions, RocksDbOptions, RocksEngine, RocksSstPartitionerFactory, RocksSstReader, }; use engine_traits::{CompactExt, Iterator, MiscExt, SstReader, SyncMutable, CF_DEFAULT}; use keys::DATA_PREFIX_KEY; @@ -391,7 +391,7 @@ mod tests { let db = new_engine_opt( temp_dir.path().to_str().unwrap(), - RocksDBOptions::default(), + RocksDbOptions::default(), vec![(CF_DEFAULT, cf_opts)], ) .unwrap(); diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 62721b5c1c9..6d309afa17f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2571,7 +2571,7 @@ where let persisted_msgs = ready.take_persisted_messages(); let mut has_write_ready = false; match &res { - HandleReadyResult::SendIOTask | HandleReadyResult::Snapshot { .. } => { + HandleReadyResult::SendIoTask | HandleReadyResult::Snapshot { .. } => { if !persisted_msgs.is_empty() { task.messages = self.build_raft_messages(ctx, persisted_msgs); } @@ -2602,7 +2602,7 @@ where self.raft_group.advance_append_async(ready); } } - HandleReadyResult::NoIOTask => { + HandleReadyResult::NoIoTask => { if let Some(last) = self.unpersisted_readies.back_mut() { // Attach to the last unpersisted ready so that it can be considered to be // persisted with the last ready at the same time. diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index cec0d44f081..aec48c1756f 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -115,7 +115,7 @@ impl From for RaftError { #[derive(PartialEq, Debug)] pub enum HandleReadyResult { - SendIOTask, + SendIoTask, Snapshot { msgs: Vec, snap_region: metapb::Region, @@ -124,7 +124,7 @@ pub enum HandleReadyResult { /// The first index before applying the snapshot. last_first_index: u64, }, - NoIOTask, + NoIoTask, } pub fn recover_from_applying_state( @@ -977,7 +977,7 @@ where let mut write_task = WriteTask::new(region_id, self.peer_id, ready.number()); - let mut res = HandleReadyResult::SendIOTask; + let mut res = HandleReadyResult::SendIoTask; if !ready.snapshot().is_empty() { fail_point!("raft_before_apply_snap"); let last_first_index = self.first_index().unwrap(); @@ -1023,7 +1023,7 @@ where } if !write_task.has_data() { - res = HandleReadyResult::NoIOTask; + res = HandleReadyResult::NoIoTask; } Ok((res, write_task)) diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 056f1f4832d..64bde3cf88b 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -175,13 +175,13 @@ impl Peekable for RegionSnapshot where S: Snapshot, { - type DBVector = ::DBVector; + type DbVector = ::DbVector; fn get_value_opt( &self, opts: &ReadOptions, key: &[u8], - ) -> EngineResult> { + ) -> EngineResult> { check_key_in_range( key, self.region.get_id(), @@ -200,7 +200,7 @@ where opts: &ReadOptions, cf: &str, key: &[u8], - ) -> EngineResult> { + ) -> EngineResult> { check_key_in_range( key, self.region.get_id(), diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index aeaf70f5b03..e7b024c38eb 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -15,9 +15,7 @@ use std::{ }; use collections::{HashMap, HashMapEntry as Entry}; -use encryption::{ - create_aes_ctr_crypter, encryption_method_from_db_encryption_method, DataKeyManager, Iv, -}; +use encryption::{create_aes_ctr_crypter, from_engine_encryption_method, DataKeyManager, Iv}; use engine_traits::{CfName, EncryptionKeyManager, KvEngine, CF_DEFAULT, CF_LOCK, CF_WRITE}; use error_code::{self, ErrorCode, ErrorCodeExt}; use fail::fail_point; @@ -617,7 +615,7 @@ impl Snapshot { if let Some(mgr) = &s.mgr.encryption_key_manager { let enc_info = mgr.new_file(&file_paths[idx])?; - let mthd = encryption_method_from_db_encryption_method(enc_info.method); + let mthd = from_engine_encryption_method(enc_info.method); if mthd != EncryptionMethod::Plaintext { let file_for_recving = cf_file.file_for_recving.last_mut().unwrap(); file_for_recving.encrypter = Some( @@ -1887,7 +1885,7 @@ pub mod tests { use encryption::{DataKeyManager, EncryptionConfig, FileConfig, MasterKeyConfig}; use encryption_export::data_key_manager_from_config; use engine_test::{ - ctor::{ColumnFamilyOptions, DBOptions, KvEngineConstructorExt, RaftDBOptions}, + ctor::{CfOptions, DbOptions, KvEngineConstructorExt, RaftDbOptions}, kv::KvTestEngine, raft::RaftTestEngine, }; @@ -1921,16 +1919,16 @@ pub mod tests { const TEST_META_FILE_BUFFER_SIZE: usize = 1000; const BYTE_SIZE: usize = 1; - type DBBuilder = fn( + type DbBuilder = fn( p: &Path, - db_opt: Option, - cf_opts: Option>, + db_opt: Option, + cf_opts: Option>, ) -> Result; pub fn open_test_empty_db( path: &Path, - db_opt: Option, - cf_opts: Option>, + db_opt: Option, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, @@ -1940,7 +1938,7 @@ pub mod tests { let cf_opts = cf_opts.unwrap_or_else(|| { ALL_CFS .iter() - .map(|cf| (*cf, ColumnFamilyOptions::default())) + .map(|cf| (*cf, CfOptions::default())) .collect() }); let db = E::new_kv_engine_opt(p, db_opt, cf_opts).unwrap(); @@ -1949,8 +1947,8 @@ pub mod tests { pub fn open_test_db( path: &Path, - db_opt: Option, - cf_opts: Option>, + db_opt: Option, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, @@ -1969,8 +1967,8 @@ pub mod tests { pub fn open_test_db_with_100keys( path: &Path, - db_opt: Option, - cf_opts: Option>, + db_opt: Option, + cf_opts: Option>, ) -> Result where E: KvEngine + KvEngineConstructorExt, @@ -1991,9 +1989,9 @@ pub mod tests { pub fn get_test_db_for_regions( path: &TempDir, - raft_db_opt: Option, - kv_db_opt: Option, - kv_cf_opts: Option>, + raft_db_opt: Option, + kv_db_opt: Option, + kv_cf_opts: Option>, regions: &[u64], ) -> Result> { let p = path.path(); @@ -2116,9 +2114,9 @@ pub mod tests { (dir, key_manager.unwrap()) } - pub fn gen_db_options_with_encryption(prefix: &str) -> (TempDir, DBOptions) { + pub fn gen_db_options_with_encryption(prefix: &str) -> (TempDir, DbOptions) { let (_enc_dir, key_manager) = create_encryption_key_manager(prefix); - let mut db_opts = DBOptions::default(); + let mut db_opts = DbOptions::default(); db_opts.set_key_manager(Some(key_manager)); (_enc_dir, db_opts) } @@ -2193,7 +2191,7 @@ pub mod tests { test_snap_file(open_test_db_with_100keys, 500); } - fn test_snap_file(get_db: DBBuilder, max_file_size: u64) { + fn test_snap_file(get_db: DbBuilder, max_file_size: u64) { let region_id = 1; let region = gen_test_region(region_id, 1, 1); let src_db_dir = Builder::new() @@ -2312,7 +2310,7 @@ pub mod tests { test_snap_validation(open_test_db_with_100keys, 500); } - fn test_snap_validation(get_db: DBBuilder, max_file_size: u64) { + fn test_snap_validation(get_db: DbBuilder, max_file_size: u64) { let region_id = 1; let region = gen_test_region(region_id, 1, 1); let db_dir = Builder::new() @@ -2827,7 +2825,7 @@ pub mod tests { let kv_cf_opts = ALL_CFS .iter() .map(|cf| { - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = CfOptions::new(); cf_opts.set_no_range_properties(true); cf_opts.set_no_table_properties(true); (*cf, cf_opts) diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index c88c1bd3718..61986ffcd78 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -9,8 +9,7 @@ use std::{ }; use encryption::{ - encryption_method_from_db_encryption_method, DataKeyManager, DecrypterReader, EncrypterWriter, - Iv, + from_engine_encryption_method, DataKeyManager, DecrypterReader, EncrypterWriter, Iv, }; use engine_traits::{ CfName, EncryptionKeyManager, Error as EngineError, Iterable, KvEngine, Mutable, @@ -61,7 +60,7 @@ where if let Some(key_mgr) = key_mgr { let enc_info = box_try!(key_mgr.new_file(path)); - let mthd = encryption_method_from_db_encryption_method(enc_info.method); + let mthd = from_engine_encryption_method(enc_info.method); if mthd != EncryptionMethod::Plaintext { let writer = box_try!(EncrypterWriter::new( file.take().unwrap(), @@ -284,7 +283,7 @@ pub fn get_decrypter_reader( encryption_key_manager: &DataKeyManager, ) -> Result, Error> { let enc_info = box_try!(encryption_key_manager.get_file(file)); - let mthd = encryption_method_from_db_encryption_method(enc_info.method); + let mthd = from_engine_encryption_method(enc_info.method); debug!( "get_decrypter_reader gets enc_info for {:?}, method: {:?}", file, mthd diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index a829d2fe01c..958da2adaa6 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -253,7 +253,7 @@ mod tests { use std::{thread::sleep, time::Duration}; use engine_test::{ - ctor::{ColumnFamilyOptions, DBOptions}, + ctor::{CfOptions, DbOptions}, kv::{new_engine, new_engine_opt, KvTestEngine}, }; use engine_traits::{ @@ -325,13 +325,13 @@ mod tests { } fn open_db(path: &str) -> KvTestEngine { - let db_opts = DBOptions::default(); - let mut cf_opts = ColumnFamilyOptions::new(); + let db_opts = DbOptions::default(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_file_num_compaction_trigger(8); let cfs_opts = vec![ - (CF_DEFAULT, ColumnFamilyOptions::new()), - (CF_RAFT, ColumnFamilyOptions::new()), - (CF_LOCK, ColumnFamilyOptions::new()), + (CF_DEFAULT, CfOptions::new()), + (CF_RAFT, CfOptions::new()), + (CF_LOCK, CfOptions::new()), (CF_WRITE, cf_opts), ]; new_engine_opt(path, db_opts, cfs_opts).unwrap() diff --git a/components/raftstore/src/store/worker/raftlog_gc.rs b/components/raftstore/src/store/worker/raftlog_gc.rs index 88e30e33104..f93213dfa0d 100644 --- a/components/raftstore/src/store/worker/raftlog_gc.rs +++ b/components/raftstore/src/store/worker/raftlog_gc.rs @@ -6,8 +6,8 @@ use std::{ sync::mpsc::Sender, }; -use engine_traits::{Engines, KvEngine, RaftEngine, RaftLogGCTask}; -use file_system::{IOType, WithIOType}; +use engine_traits::{Engines, KvEngine, RaftEngine, RaftLogGcTask}; +use file_system::{IoType, WithIoType}; use thiserror::Error; use tikv_util::{ box_try, debug, error, @@ -88,7 +88,7 @@ impl Runner { } /// Does the GC job and returns the count of logs collected. - fn gc_raft_log(&mut self, regions: Vec) -> Result { + fn gc_raft_log(&mut self, regions: Vec) -> Result { fail::fail_point!("worker_gc_raft_log", |s| { Ok(s.and_then(|s| s.parse().ok()).unwrap_or(0)) }); @@ -137,7 +137,7 @@ impl Runner { "end_index" => t.end_idx, ); } - groups.push(RaftLogGCTask { + groups.push(RaftLogGcTask { raft_group_id: t.region_id, from: t.start_idx, to: t.end_idx, @@ -171,7 +171,7 @@ where type Task = Task; fn run(&mut self, task: Task) { - let _io_type_guard = WithIOType::new(IOType::ForegroundWrite); + let _io_type_guard = WithIoType::new(IoType::ForegroundWrite); let flush_now = task.flush; self.tasks.push(task); // TODO: maybe they should also be batched even `flush_now` is true. diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index d3681654975..6555e96f102 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -6,7 +6,7 @@ use std::{ }; use batch_system::{BatchRouter, Fsm, FsmTypes, HandlerBuilder, Poller, PoolState, Priority}; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use tikv_util::{ debug, error, info, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable, }; @@ -74,7 +74,7 @@ where ))) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); poller.poll(); }) .unwrap(); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 5e2cc8992f5..d15e40e6f5e 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -18,7 +18,7 @@ use std::{ use engine_traits::{DeleteStrategy, KvEngine, Mutable, Range, WriteBatch, CF_LOCK, CF_RAFT}; use fail::fail_point; -use file_system::{IOType, WithIOType}; +use file_system::{IoType, WithIoType}; use kvproto::raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}; use pd_client::PdClient; use raft::eraftpb::Snapshot as RaftSnapshot; @@ -322,10 +322,10 @@ where } let start = Instant::now(); - let _io_type_guard = WithIOType::new(if for_balance { - IOType::LoadBalance + let _io_type_guard = WithIoType::new(if for_balance { + IoType::LoadBalance } else { - IOType::Replication + IoType::Replication }); if let Err(e) = self.generate_snap( @@ -821,7 +821,7 @@ mod tests { }; use engine_test::{ - ctor::ColumnFamilyOptions, + ctor::CfOptions, kv::{KvTestEngine, KvTestSnapshot}, }; use engine_traits::{ @@ -990,7 +990,7 @@ mod tests { .tempdir() .unwrap(); - let mut cf_opts = ColumnFamilyOptions::new(); + let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_slowdown_writes_trigger(5); cf_opts.set_disable_auto_compactions(true); let kv_cfs_opts = vec![ diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 14a1a5b7bbc..81fa843ace0 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -8,7 +8,7 @@ use std::{ }; use engine_traits::{CfName, IterOptions, Iterable, Iterator, KvEngine, CF_WRITE, LARGE_CFS}; -use file_system::{IOType, WithIOType}; +use file_system::{IoType, WithIoType}; use itertools::Itertools; use kvproto::{ metapb::{Region, RegionEpoch}, @@ -639,7 +639,7 @@ where { type Task = Task; fn run(&mut self, task: Task) { - let _io_type_guard = WithIOType::new(IOType::LoadBalance); + let _io_type_guard = WithIoType::new(IoType::LoadBalance); match task { Task::SplitCheckTask { region, diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index d984ccb353d..c0be3ba276b 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -166,7 +166,7 @@ impl SecurityManager { sb.bind(addr, port) } else { if !self.cfg.cert_allowed_cn.is_empty() { - let cn_checker = CNChecker { + let cn_checker = CnChecker { allowed_cn: Arc::new(self.cfg.cert_allowed_cn.clone()), }; sb = sb.add_checker(cn_checker); @@ -186,11 +186,11 @@ impl SecurityManager { } #[derive(Clone)] -struct CNChecker { +struct CnChecker { allowed_cn: Arc>, } -impl ServerChecker for CNChecker { +impl ServerChecker for CnChecker { fn check(&mut self, ctx: &RpcContext<'_>) -> CheckResult { match check_common_name(&self.allowed_cn, ctx) { Ok(()) => CheckResult::Continue, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 73269c3f07a..425acf6e15c 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -43,13 +43,13 @@ use engine_rocks::{ }; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions, Engines, FlowControlFactorsExt, KvEngine, MiscExt, - RaftEngine, TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, + CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, RaftEngine, + TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use file_system::{ - get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IOBudgetAdjustor, - MetricsManager as IOMetricsManager, + get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor, + MetricsManager as IoMetricsManager, }; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; @@ -82,7 +82,7 @@ use raftstore::{ }; use security::SecurityManager; use tikv::{ - config::{ConfigController, DBConfigManger, DBType, LogConfigManager, TiKvConfig}, + config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TiKvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, @@ -1249,7 +1249,7 @@ impl TiKvServer { .build(!stats_collector_enabled /* enable_statistics */), ); let fetcher = if stats_collector_enabled { - BytesFetcher::FromIOStatsCollector() + BytesFetcher::FromIoStatsCollector() } else { BytesFetcher::FromRateLimiter(limiter.statistics().unwrap()) }; @@ -1267,7 +1267,7 @@ impl TiKvServer { let mut engine_metrics = EngineMetricsManager::::new( self.engines.as_ref().unwrap().engines.clone(), ); - let mut io_metrics = IOMetricsManager::new(fetcher); + let mut io_metrics = IoMetricsManager::new(fetcher); let engines_info_clone = engines_info.clone(); self.background_worker .spawn_interval_task(DEFAULT_METRICS_FLUSH_INTERVAL, move || { @@ -1582,9 +1582,9 @@ impl ConfiguredRaftEngine for RocksEngine { fn register_config(&self, cfg_controller: &mut ConfigController, share_cache: bool) { cfg_controller.register( tikv::config::Module::Raftdb, - Box::new(DBConfigManger::new( + Box::new(DbConfigManger::new( Arc::new(self.clone()), - DBType::Raft, + DbType::Raft, share_cache, )), ); @@ -1669,9 +1669,9 @@ impl TiKvServer { let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DBConfigManger::new( + Box::new(DbConfigManger::new( factory.clone(), - DBType::Kv, + DbType::Kv, self.config.storage.block_cache.shared, )), ); @@ -1887,7 +1887,7 @@ impl EnginesResourceInfo { } } -impl IOBudgetAdjustor for EnginesResourceInfo { +impl IoBudgetAdjustor for EnginesResourceInfo { fn adjust(&self, total_budgets: usize) -> usize { let score = self.latest_normalized_pending_bytes.load(Ordering::Relaxed) as f32 / Self::SCALE_FACTOR as f32; diff --git a/components/sst_importer/src/import_mode.rs b/components/sst_importer/src/import_mode.rs index 39dca3bea02..98a4aae7fe8 100644 --- a/components/sst_importer/src/import_mode.rs +++ b/components/sst_importer/src/import_mode.rs @@ -8,7 +8,7 @@ use std::{ time::{Duration, Instant}, }; -use engine_traits::{ColumnFamilyOptions, DBOptions, KvEngine}; +use engine_traits::{CfOptions, DbOptions, KvEngine}; use futures::executor::ThreadPool; use futures_util::compat::Future01CompatExt; use kvproto::import_sstpb::*; @@ -16,19 +16,19 @@ use tikv_util::timer::GLOBAL_TIMER_HANDLE; use super::{Config, Result}; -pub type RocksDBMetricsFn = fn(cf: &str, name: &str, v: f64); +pub type RocksDbMetricsFn = fn(cf: &str, name: &str, v: f64); struct ImportModeSwitcherInner { is_import: Arc, - backup_db_options: ImportModeDBOptions, - backup_cf_options: Vec<(String, ImportModeCFOptions)>, + backup_db_options: ImportModeDbOptions, + backup_cf_options: Vec<(String, ImportModeCfOptions)>, timeout: Duration, next_check: Instant, - metrics_fn: RocksDBMetricsFn, + metrics_fn: RocksDbMetricsFn, } impl ImportModeSwitcherInner { - fn enter_normal_mode(&mut self, db: &E, mf: RocksDBMetricsFn) -> Result { + fn enter_normal_mode(&mut self, db: &E, mf: RocksDbMetricsFn) -> Result { if !self.is_import.load(Ordering::Acquire) { return Ok(false); } @@ -43,18 +43,18 @@ impl ImportModeSwitcherInner { Ok(true) } - fn enter_import_mode(&mut self, db: &E, mf: RocksDBMetricsFn) -> Result { + fn enter_import_mode(&mut self, db: &E, mf: RocksDbMetricsFn) -> Result { if self.is_import.load(Ordering::Acquire) { return Ok(false); } - self.backup_db_options = ImportModeDBOptions::new_options(db); + self.backup_db_options = ImportModeDbOptions::new_options(db); self.backup_cf_options.clear(); let import_db_options = self.backup_db_options.optimized_for_import_mode(); import_db_options.set_options(db)?; for cf_name in db.cf_names() { - let cf_opts = ImportModeCFOptions::new_options(db, cf_name); + let cf_opts = ImportModeCfOptions::new_options(db, cf_name); let import_cf_options = cf_opts.optimized_for_import_mode(); self.backup_cf_options.push((cf_name.to_owned(), cf_opts)); import_cf_options.set_options(db, cf_name, mf)?; @@ -79,7 +79,7 @@ impl ImportModeSwitcher { let is_import = Arc::new(AtomicBool::new(false)); let inner = Arc::new(Mutex::new(ImportModeSwitcherInner { is_import: is_import.clone(), - backup_db_options: ImportModeDBOptions::new(), + backup_db_options: ImportModeDbOptions::new(), backup_cf_options: Vec::new(), timeout, next_check: Instant::now() + timeout, @@ -120,14 +120,14 @@ impl ImportModeSwitcher { executor.spawn_ok(timer_loop); } - pub fn enter_normal_mode(&self, db: &E, mf: RocksDBMetricsFn) -> Result { + pub fn enter_normal_mode(&self, db: &E, mf: RocksDbMetricsFn) -> Result { if !self.is_import.load(Ordering::Acquire) { return Ok(false); } self.inner.lock().unwrap().enter_normal_mode(db, mf) } - pub fn enter_import_mode(&self, db: &E, mf: RocksDBMetricsFn) -> Result { + pub fn enter_import_mode(&self, db: &E, mf: RocksDbMetricsFn) -> Result { let mut inner = self.inner.lock().unwrap(); let ret = inner.enter_import_mode(db, mf)?; inner.next_check = Instant::now() + inner.timeout; @@ -144,11 +144,11 @@ impl ImportModeSwitcher { } } -struct ImportModeDBOptions { +struct ImportModeDbOptions { max_background_jobs: i32, } -impl ImportModeDBOptions { +impl ImportModeDbOptions { fn new() -> Self { Self { max_background_jobs: 32, @@ -161,9 +161,9 @@ impl ImportModeDBOptions { } } - fn new_options(db: &impl KvEngine) -> ImportModeDBOptions { + fn new_options(db: &impl KvEngine) -> ImportModeDbOptions { let db_opts = db.get_db_options(); - ImportModeDBOptions { + ImportModeDbOptions { max_background_jobs: db_opts.get_max_background_jobs(), } } @@ -179,14 +179,14 @@ impl ImportModeDBOptions { } } -struct ImportModeCFOptions { +struct ImportModeCfOptions { level0_stop_writes_trigger: u32, level0_slowdown_writes_trigger: u32, soft_pending_compaction_bytes_limit: u64, hard_pending_compaction_bytes_limit: u64, } -impl ImportModeCFOptions { +impl ImportModeCfOptions { fn optimized_for_import_mode(&self) -> Self { Self { level0_stop_writes_trigger: self.level0_stop_writes_trigger.max(1 << 30), @@ -196,10 +196,10 @@ impl ImportModeCFOptions { } } - fn new_options(db: &impl KvEngine, cf_name: &str) -> ImportModeCFOptions { + fn new_options(db: &impl KvEngine, cf_name: &str) -> ImportModeCfOptions { let cf_opts = db.get_options_cf(cf_name).unwrap(); //FIXME unwrap - ImportModeCFOptions { + ImportModeCfOptions { level0_stop_writes_trigger: cf_opts.get_level_zero_stop_writes_trigger(), level0_slowdown_writes_trigger: cf_opts.get_level_zero_slowdown_writes_trigger(), soft_pending_compaction_bytes_limit: cf_opts.get_soft_pending_compaction_bytes_limit(), @@ -207,7 +207,7 @@ impl ImportModeCFOptions { } } - fn set_options(&self, db: &impl KvEngine, cf_name: &str, mf: RocksDBMetricsFn) -> Result<()> { + fn set_options(&self, db: &impl KvEngine, cf_name: &str, mf: RocksDbMetricsFn) -> Result<()> { let opts = [ ( "level0_stop_writes_trigger".to_owned(), @@ -252,8 +252,8 @@ mod tests { fn check_import_options( db: &E, - expected_db_opts: &ImportModeDBOptions, - expected_cf_opts: &ImportModeCFOptions, + expected_db_opts: &ImportModeDbOptions, + expected_cf_opts: &ImportModeCfOptions, ) where E: KvEngine, { @@ -292,9 +292,9 @@ mod tests { .unwrap(); let db = new_test_engine(temp_dir.path().to_str().unwrap(), &[CF_DEFAULT, "a", "b"]); - let normal_db_options = ImportModeDBOptions::new_options(&db); + let normal_db_options = ImportModeDbOptions::new_options(&db); let import_db_options = normal_db_options.optimized_for_import_mode(); - let normal_cf_options = ImportModeCFOptions::new_options(&db, "default"); + let normal_cf_options = ImportModeCfOptions::new_options(&db, "default"); let import_cf_options = normal_cf_options.optimized_for_import_mode(); assert!( @@ -333,9 +333,9 @@ mod tests { .unwrap(); let db = new_test_engine(temp_dir.path().to_str().unwrap(), &[CF_DEFAULT, "a", "b"]); - let normal_db_options = ImportModeDBOptions::new_options(&db); + let normal_db_options = ImportModeDbOptions::new_options(&db); let import_db_options = normal_db_options.optimized_for_import_mode(); - let normal_cf_options = ImportModeCFOptions::new_options(&db, "default"); + let normal_cf_options = ImportModeCfOptions::new_options(&db, "default"); let import_cf_options = normal_cf_options.optimized_for_import_mode(); fn mf(_cf: &str, _name: &str, _v: f64) {} @@ -374,7 +374,7 @@ mod tests { |_, opt| opt.set_level_zero_stop_writes_trigger(2_000_000_000), ); - let normal_cf_options = ImportModeCFOptions::new_options(&db, "default"); + let normal_cf_options = ImportModeCfOptions::new_options(&db, "default"); assert_eq!(normal_cf_options.level0_stop_writes_trigger, 2_000_000_000); let import_cf_options = normal_cf_options.optimized_for_import_mode(); assert_eq!(import_cf_options.level0_stop_writes_trigger, 2_000_000_000); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 356541cebbb..b6d13ac9761 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -11,7 +11,7 @@ use std::{ }; use dashmap::DashMap; -use encryption::{encryption_method_to_db_encryption_method, DataKeyManager}; +use encryption::{to_engine_encryption_method, DataKeyManager}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, @@ -33,7 +33,7 @@ use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ import_file::{ImportDir, ImportFile}, - import_mode::{ImportModeSwitcher, RocksDBMetricsFn}, + import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, metrics::*, sst_writer::{RawSstWriter, TxnSstWriter}, Config, Error, Result, @@ -211,11 +211,11 @@ impl SstImporter { } } - pub fn enter_normal_mode(&self, db: E, mf: RocksDBMetricsFn) -> Result { + pub fn enter_normal_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { self.switcher.enter_normal_mode(&db, mf) } - pub fn enter_import_mode(&self, db: E, mf: RocksDBMetricsFn) -> Result { + pub fn enter_import_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { self.switcher.enter_import_mode(&db, mf) } @@ -488,7 +488,7 @@ impl SstImporter { let path = self.dir.join(meta)?; let file_crypter = crypter.map(|c| FileEncryptionInfo { - method: encryption_method_to_db_encryption_method(c.cipher_type), + method: to_engine_encryption_method(c.cipher_type), key: c.cipher_key, iv: meta.cipher_iv.to_owned(), }); diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 6ba4d892717..9266378845d 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -70,12 +70,12 @@ mod tests { use encryption::DataKeyManager; use engine_rocks::{ - util::new_engine_opt, RocksCfOptions, RocksDBOptions, RocksEngine, RocksSstWriterBuilder, - RocksTitanDBOptions, + util::new_engine_opt, RocksCfOptions, RocksDbOptions, RocksEngine, RocksSstWriterBuilder, + RocksTitanDbOptions, }; use engine_traits::{ - CfName, ColumnFamilyOptions, DBOptions, EncryptionKeyManager, ImportExt, Peekable, - SstWriter, SstWriterBuilder, TitanDBOptions, CF_DEFAULT, + CfName, CfOptions, DbOptions, EncryptionKeyManager, ImportExt, Peekable, SstWriter, + SstWriterBuilder, TitanDbOptions, CF_DEFAULT, }; use tempfile::Builder; use test_util::encryption::new_test_key_manager; @@ -115,7 +115,7 @@ mod tests { } fn check_prepare_sst_for_ingestion( - db_opts: Option, + db_opts: Option, cf_opts: Option>, key_manager: Option<&DataKeyManager>, was_encrypted: bool, @@ -188,8 +188,8 @@ mod tests { #[test] fn test_prepare_sst_for_ingestion_titan() { - let mut db_opts = RocksDBOptions::new(); - let mut titan_opts = RocksTitanDBOptions::new(); + let mut db_opts = RocksDbOptions::new(); + let mut titan_opts = RocksTitanDbOptions::new(); // Force all values write out to blob files. titan_opts.set_min_blob_size(0); db_opts.set_titandb_options(&titan_opts); diff --git a/components/test_coprocessor/src/dag.rs b/components/test_coprocessor/src/dag.rs index 4165d19bdb4..740ece83e1a 100644 --- a/components/test_coprocessor/src/dag.rs +++ b/components/test_coprocessor/src/dag.rs @@ -277,15 +277,15 @@ impl DAGSelect { } } -pub struct DAGChunkSpliter { +pub struct DagChunkSpliter { chunks: Vec, datums: Vec, col_cnt: usize, } -impl DAGChunkSpliter { - pub fn new(chunks: Vec, col_cnt: usize) -> DAGChunkSpliter { - DAGChunkSpliter { +impl DagChunkSpliter { + pub fn new(chunks: Vec, col_cnt: usize) -> DagChunkSpliter { + DagChunkSpliter { chunks, col_cnt, datums: Vec::with_capacity(0), @@ -293,7 +293,7 @@ impl DAGChunkSpliter { } } -impl Iterator for DAGChunkSpliter { +impl Iterator for DagChunkSpliter { type Item = Vec; fn next(&mut self) -> Option> { diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 301647bf267..097e74f157b 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -18,7 +18,7 @@ use engine_traits::{ CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, }; -use file_system::IORateLimiter; +use file_system::IoRateLimiter; use futures::executor::block_on; use kvproto::{ errorpb::Error as PbError, @@ -160,7 +160,7 @@ pub struct Cluster { pub dbs: Vec>, pub store_metas: HashMap>>, key_managers: Vec>>, - pub io_rate_limiter: Option>, + pub io_rate_limiter: Option>, pub engines: HashMap>, key_managers_map: HashMap>>, pub labels: HashMap>, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index c399b4813f2..e33837ebd76 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -19,7 +19,7 @@ use engine_traits::{ Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, TabletFactory, ALL_CFS, CF_DEFAULT, CF_RAFT, }; -use file_system::IORateLimiter; +use file_system::IoRateLimiter; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -625,7 +625,7 @@ pub fn must_contains_error(resp: &RaftCmdResponse, msg: &str) { pub fn create_test_engine( // TODO: pass it in for all cases. router: Option>, - limiter: Option>, + limiter: Option>, cfg: &Config, ) -> ( Engines, diff --git a/components/test_sst_importer/src/lib.rs b/components/test_sst_importer/src/lib.rs index 65d2a3dc70a..9397a6bb35b 100644 --- a/components/test_sst_importer/src/lib.rs +++ b/components/test_sst_importer/src/lib.rs @@ -5,7 +5,7 @@ use std::{collections::HashMap, fs, path::Path, sync::Arc}; use engine_rocks::{ raw::{DBEntryType, Env, TablePropertiesCollector, TablePropertiesCollectorFactory}, util::new_engine_opt, - RocksCfOptions, RocksDBOptions, RocksEngine, RocksSstReader, RocksSstWriterBuilder, + RocksCfOptions, RocksDbOptions, RocksEngine, RocksSstReader, RocksSstWriterBuilder, }; pub use engine_rocks::{RocksEngine as TestEngine, RocksSstWriter}; use engine_traits::{KvEngine, SstWriter, SstWriterBuilder}; @@ -47,8 +47,8 @@ where }) .collect(); - let db_opts = env.map_or_else(RocksDBOptions::default, |e| { - let mut opts = RocksDBOptions::default(); + let db_opts = env.map_or_else(RocksDbOptions::default, |e| { + let mut opts = RocksDbOptions::default(); opts.set_env(e); opts }); diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs b/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs index b1539e7c581..d06bf49c025 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs @@ -2,11 +2,11 @@ use super::*; -pub trait UTF8CompatibleEncoding { +pub trait Utf8CompatibleEncoding { const NAME: &'static str; } -impl Encoding for T { +impl Encoding for T { #[inline] fn decode(data: BytesRef<'_>) -> Result { match str::from_utf8(data) { @@ -17,22 +17,22 @@ impl Encoding for T { } #[derive(Debug)] -pub struct EncodingUTF8Mb4; +pub struct EncodingUtf8Mb4; -impl UTF8CompatibleEncoding for EncodingUTF8Mb4 { +impl Utf8CompatibleEncoding for EncodingUtf8Mb4 { const NAME: &'static str = "utf8mb4"; } #[derive(Debug)] -pub struct EncodingUTF8; +pub struct EncodingUtf8; -impl UTF8CompatibleEncoding for EncodingUTF8 { +impl Utf8CompatibleEncoding for EncodingUtf8 { const NAME: &'static str = "utf8"; } #[derive(Debug)] pub struct EncodingLatin1; -impl UTF8CompatibleEncoding for EncodingLatin1 { +impl Utf8CompatibleEncoding for EncodingLatin1 { const NAME: &'static str = "latin1"; } diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 0d6a8e6d9ea..b3033c06d84 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -49,8 +49,8 @@ macro_rules! match_template_charset { match_template::match_template! { $t = [ - UTF8 => EncodingUTF8, - UTF8Mb4 => EncodingUTF8Mb4, + UTF8 => EncodingUtf8, + UTF8Mb4 => EncodingUtf8Mb4, Latin1 => EncodingLatin1, GBK => EncodingGBK, Binary => EncodingBinary, diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index 278ef48469a..930070e87a2 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -52,49 +52,49 @@ use crate::{codec::convert::ConvertTo, expr::EvalContext, EvalType}; /// A trait of evaluating current concrete eval type into a MySQL logic value, /// represented by Rust's `bool` type. -pub trait AsMySQLBool { +pub trait AsMySqlBool { /// Evaluates into a MySQL logic value. fn as_mysql_bool(&self, context: &mut EvalContext) -> Result; } -impl AsMySQLBool for Int { +impl AsMySqlBool for Int { #[inline] fn as_mysql_bool(&self, _context: &mut EvalContext) -> Result { Ok(*self != 0) } } -impl AsMySQLBool for Real { +impl AsMySqlBool for Real { #[inline] fn as_mysql_bool(&self, _context: &mut EvalContext) -> Result { Ok(self.into_inner() != 0f64) } } -impl<'a, T: AsMySQLBool> AsMySQLBool for &'a T { +impl<'a, T: AsMySqlBool> AsMySqlBool for &'a T { #[inline] fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { (**self).as_mysql_bool(context) } } -impl AsMySQLBool for Bytes { +impl AsMySqlBool for Bytes { #[inline] fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { self.as_slice().as_mysql_bool(context) } } -impl<'a> AsMySQLBool for BytesRef<'a> { +impl<'a> AsMySqlBool for BytesRef<'a> { #[inline] fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { Ok(!self.is_empty() && ConvertTo::::convert(self, context)? != 0f64) } } -impl<'a, T> AsMySQLBool for Option<&'a T> +impl<'a, T> AsMySqlBool for Option<&'a T> where - T: AsMySQLBool, + T: AsMySqlBool, { fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { match self { @@ -104,25 +104,25 @@ where } } -impl<'a> AsMySQLBool for JsonRef<'a> { +impl<'a> AsMySqlBool for JsonRef<'a> { fn as_mysql_bool(&self, _context: &mut EvalContext) -> Result { Ok(!self.is_zero()) } } -impl<'a> AsMySQLBool for EnumRef<'a> { +impl<'a> AsMySqlBool for EnumRef<'a> { fn as_mysql_bool(&self, _context: &mut EvalContext) -> Result { Ok(!self.is_empty()) } } -impl<'a> AsMySQLBool for SetRef<'a> { +impl<'a> AsMySqlBool for SetRef<'a> { fn as_mysql_bool(&self, _context: &mut EvalContext) -> Result { Ok(!self.is_empty()) } } -impl<'a> AsMySQLBool for Option> { +impl<'a> AsMySqlBool for Option> { fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { match self { None => Ok(false), @@ -131,7 +131,7 @@ impl<'a> AsMySQLBool for Option> { } } -impl<'a> AsMySQLBool for Option> { +impl<'a> AsMySqlBool for Option> { fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { match self { None => Ok(false), @@ -140,7 +140,7 @@ impl<'a> AsMySQLBool for Option> { } } -impl<'a> AsMySQLBool for Option> { +impl<'a> AsMySqlBool for Option> { fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { match self { None => Ok(false), @@ -149,7 +149,7 @@ impl<'a> AsMySQLBool for Option> { } } -impl<'a> AsMySQLBool for Option> { +impl<'a> AsMySqlBool for Option> { fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { match self { None => Ok(false), diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index b95dbb63342..d476fd2d370 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -83,7 +83,7 @@ impl ScalarValue { } } -impl AsMySQLBool for ScalarValue { +impl AsMySqlBool for ScalarValue { #[inline] fn as_mysql_bool(&self, context: &mut EvalContext) -> Result { match_template_evaltype! { diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index 8d2e62b6ac0..9d791d911cd 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -24,7 +24,7 @@ use super::{ use crate::{ codec::{ convert::{ConvertTo, ToInt}, - data_type::AsMySQLBool, + data_type::AsMySqlBool, }, expr::EvalContext, FieldTypeTp, diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index a172d2e2723..2518e003ba3 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -1943,7 +1943,7 @@ impl Display for Decimal { } } -impl crate::codec::data_type::AsMySQLBool for Decimal { +impl crate::codec::data_type::AsMySqlBool for Decimal { #[inline] fn as_mysql_bool(&self, _ctx: &mut EvalContext) -> crate::codec::Result { Ok(!self.is_zero()) diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 370467b9928..3869f773020 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -703,7 +703,7 @@ pub trait DurationDecoder: NumberDecoder { impl DurationDecoder for T {} -impl crate::codec::data_type::AsMySQLBool for Duration { +impl crate::codec::data_type::AsMySqlBool for Duration { #[inline] fn as_mysql_bool(&self, _context: &mut crate::expr::EvalContext) -> crate::codec::Result { Ok(!self.is_zero()) diff --git a/components/tidb_query_datatype/src/codec/mysql/enums.rs b/components/tidb_query_datatype/src/codec/mysql/enums.rs index 9a591cf750a..fecada58b1d 100644 --- a/components/tidb_query_datatype/src/codec/mysql/enums.rs +++ b/components/tidb_query_datatype/src/codec/mysql/enums.rs @@ -84,7 +84,7 @@ impl PartialOrd for Enum { } } -impl crate::codec::data_type::AsMySQLBool for Enum { +impl crate::codec::data_type::AsMySqlBool for Enum { #[inline] fn as_mysql_bool(&self, _context: &mut crate::expr::EvalContext) -> crate::codec::Result { Ok(self.value != 0) diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index 7251f5477f6..2e5abc6f87a 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -487,7 +487,7 @@ impl ConvertTo for Duration { } } -impl crate::codec::data_type::AsMySQLBool for Json { +impl crate::codec::data_type::AsMySqlBool for Json { #[inline] fn as_mysql_bool(&self, _context: &mut crate::expr::EvalContext) -> crate::codec::Result { // TODO: This logic is not correct. See pingcap/tidb#9593 diff --git a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs index 1b848c3534f..19fec765d1c 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs @@ -15,9 +15,9 @@ use crate::codec::Error; /// https://github.com/pingcap/tidb/blob/master/types/json/binary.go /// We add a space after `,` and `:`. #[derive(Clone, Debug)] -pub struct MySQLFormatter {} +pub struct MySqlFormatter {} -impl serde_json::ser::Formatter for MySQLFormatter { +impl serde_json::ser::Formatter for MySqlFormatter { #[inline] fn begin_object_value(&mut self, writer: &mut W) -> std::io::Result<()> where @@ -51,9 +51,9 @@ impl serde_json::ser::Formatter for MySQLFormatter { } } -impl MySQLFormatter { +impl MySqlFormatter { pub fn new() -> Self { - MySQLFormatter {} + MySqlFormatter {} } } @@ -62,7 +62,7 @@ impl<'a> ToString for JsonRef<'a> { /// `to_writer_pretty` fn to_string(&self) -> String { let mut writer = Vec::with_capacity(128); - let mut ser = JsonSerializer::with_formatter(&mut writer, MySQLFormatter::new()); + let mut ser = JsonSerializer::with_formatter(&mut writer, MySqlFormatter::new()); self.serialize(&mut ser).unwrap(); unsafe { // serde_json will not emit invalid UTF-8 diff --git a/components/tidb_query_datatype/src/codec/mysql/set.rs b/components/tidb_query_datatype/src/codec/mysql/set.rs index 0d5a28e2ba5..62539c1ff2c 100644 --- a/components/tidb_query_datatype/src/codec/mysql/set.rs +++ b/components/tidb_query_datatype/src/codec/mysql/set.rs @@ -69,7 +69,7 @@ impl PartialOrd for Set { } } -impl crate::codec::data_type::AsMySQLBool for Set { +impl crate::codec::data_type::AsMySqlBool for Set { #[inline] fn as_mysql_bool(&self, _context: &mut crate::expr::EvalContext) -> crate::codec::Result { Ok(self.value > 0) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 5d387f1cdff..79068b38118 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1961,7 +1961,7 @@ pub trait TimeDecoder: NumberDecoder { impl TimeDecoder for T {} -impl crate::codec::data_type::AsMySQLBool for Time { +impl crate::codec::data_type::AsMySqlBool for Time { #[inline] fn as_mysql_bool(&self, _context: &mut crate::expr::EvalContext) -> crate::codec::Result { Ok(!self.is_zero()) diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index 94e9dd0a9ae..463a969284d 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -13,17 +13,17 @@ use crate::codec::{Error, Result}; pub enum RowSlice<'a> { Small { origin: &'a [u8], - non_null_ids: LEBytes<'a, u8>, - null_ids: LEBytes<'a, u8>, - offsets: LEBytes<'a, u16>, - values: LEBytes<'a, u8>, + non_null_ids: LeBytes<'a, u8>, + null_ids: LeBytes<'a, u8>, + offsets: LeBytes<'a, u16>, + values: LeBytes<'a, u8>, }, Big { origin: &'a [u8], - non_null_ids: LEBytes<'a, u32>, - null_ids: LEBytes<'a, u32>, - offsets: LEBytes<'a, u32>, - values: LEBytes<'a, u8>, + non_null_ids: LeBytes<'a, u32>, + null_ids: LeBytes<'a, u32>, + offsets: LeBytes<'a, u32>, + values: LeBytes<'a, u8>, }, } @@ -45,7 +45,7 @@ impl RowSlice<'_> { non_null_ids: read_le_bytes(&mut data, non_null_cnt)?, null_ids: read_le_bytes(&mut data, null_cnt)?, offsets: read_le_bytes(&mut data, non_null_cnt)?, - values: LEBytes::new(data), + values: LeBytes::new(data), } } else { RowSlice::Small { @@ -53,7 +53,7 @@ impl RowSlice<'_> { non_null_ids: read_le_bytes(&mut data, non_null_cnt)?, null_ids: read_le_bytes(&mut data, null_cnt)?, offsets: read_le_bytes(&mut data, non_null_cnt)?, - values: LEBytes::new(data), + values: LeBytes::new(data), } }; Ok(row) @@ -175,7 +175,7 @@ impl RowSlice<'_> { /// use little endianness. #[cfg(target_endian = "little")] #[inline] -fn read_le_bytes<'a, T>(buf: &mut &'a [u8], len: usize) -> Result> +fn read_le_bytes<'a, T>(buf: &mut &'a [u8], len: usize) -> Result> where T: PrimInt, { @@ -185,17 +185,17 @@ where } let slice = &buf[..bytes_len]; buf.advance(bytes_len); - Ok(LEBytes::new(slice)) + Ok(LeBytes::new(slice)) } #[cfg(target_endian = "little")] -pub struct LEBytes<'a, T: PrimInt> { +pub struct LeBytes<'a, T: PrimInt> { slice: &'a [u8], _marker: PhantomData, } #[cfg(target_endian = "little")] -impl<'a, T: PrimInt> LEBytes<'a, T> { +impl<'a, T: PrimInt> LeBytes<'a, T> { fn new(slice: &'a [u8]) -> Self { Self { slice, diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index bcbf2b8f92b..9f23d434a6c 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -371,7 +371,7 @@ enum RestoreData<'a> { #[derive(PartialEq, Debug, Copy, Clone)] enum DecodePartitionIdOp<'a> { Nop, - PID(&'a [u8]), + Pid(&'a [u8]), } impl IndexScanExecutorImpl { @@ -662,7 +662,7 @@ impl IndexScanExecutorImpl { } else if partition_id_bytes.is_empty() { DecodePartitionIdOp::Nop } else { - DecodePartitionIdOp::PID(partition_id_bytes) + DecodePartitionIdOp::Pid(partition_id_bytes) } }; @@ -803,7 +803,7 @@ impl IndexScanExecutorImpl { ) -> Result<()> { match decode_pid { DecodePartitionIdOp::Nop => {} - DecodePartitionIdOp::PID(pid) => { + DecodePartitionIdOp::Pid(pid) => { // If need partition id, append partition id to the last column // before physical table id column if exists. let pid = NumberCodec::decode_i64(pid); diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index 61030e593e0..b7a19da9026 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -136,7 +136,7 @@ fn update_logical_rows_by_vector_value<'a, TT: EvaluableRef<'a>, T: 'a + ChunkRe eval_result_logical_rows: LogicalRows<'_>, ) -> tidb_query_common::error::Result<()> where - Option: AsMySQLBool, + Option: AsMySqlBool, { let mut err_result = Ok(()); let mut logical_index = 0; diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 995f2ed0e21..cfa171054c9 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -577,7 +577,7 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { mod tests { use engine_rocks::{ util::{new_engine_opt, new_temp_engine, FixedPrefixSliceTransform}, - RocksCfOptions, RocksDBOptions, RocksEngine, RocksSnapshot, + RocksCfOptions, RocksDbOptions, RocksEngine, RocksSnapshot, }; use engine_traits::{IterOptions, SyncMutable, CF_DEFAULT}; use keys::data_key; @@ -623,7 +623,7 @@ mod tests { .unwrap(); let engine = new_engine_opt( path.path().to_str().unwrap(), - RocksDBOptions::default(), + RocksDbOptions::default(), vec![(CF_DEFAULT, cf_opts)], ) .unwrap(); diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 0cc90730acd..44d5e698f5c 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -11,12 +11,12 @@ use std::{ pub use engine_rocks::RocksSnapshot; use engine_rocks::{ - get_env, RocksCfOptions, RocksDBOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, + get_env, RocksCfOptions, RocksDbOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, }; use engine_traits::{ CfName, Engines, IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, }; -use file_system::IORateLimiter; +use file_system::IoRateLimiter; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb}; use raftstore::coprocessor::CoprocessorHost; use tempfile::{Builder, TempDir}; @@ -88,10 +88,10 @@ pub struct RocksEngine { impl RocksEngine { pub fn new( path: &str, - db_opts: Option, + db_opts: Option, cfs_opts: Vec<(CfName, RocksCfOptions)>, shared_block_cache: bool, - io_rate_limiter: Option>, + io_rate_limiter: Option>, ) -> Result { info!("RocksEngine: creating for path"; "path" => path); let (path, temp_dir) = match path { diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index dc5d4a3b862..91ecd803b89 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -1014,8 +1014,8 @@ mod tests { } } - struct RaftDBWriter; - impl Write for RaftDBWriter { + struct RaftDbWriter; + impl Write for RaftDbWriter { fn write(&mut self, buf: &[u8]) -> io::Result { RAFTDB_BUFFER.with(|buffer| buffer.borrow_mut().write(buf)) } @@ -1029,7 +1029,7 @@ mod tests { let normal = TikvFormat::new(PlainSyncDecorator::new(NormalWriter), true); let slow = TikvFormat::new(PlainSyncDecorator::new(SlowLogWriter), true); let rocksdb = TikvFormat::new(PlainSyncDecorator::new(RocksdbLogWriter), true); - let raftdb = TikvFormat::new(PlainSyncDecorator::new(RaftDBWriter), true); + let raftdb = TikvFormat::new(PlainSyncDecorator::new(RaftDbWriter), true); let drain = LogDispatcher::new(normal, rocksdb, raftdb, Some(slow)).fuse(); let drain = SlowLogFilter { threshold: 200, diff --git a/scripts/clippy b/scripts/clippy index 58bdafb817b..491362410c1 100755 --- a/scripts/clippy +++ b/scripts/clippy @@ -20,7 +20,6 @@ CLIPPY_LINTS=(-A clippy::module_inception \ -A clippy::too_many_arguments \ -A clippy::blacklisted_name \ -A clippy::redundant_closure \ - -A clippy::upper_case_acronyms \ -A clippy::field_reassign_with_default \ -A clippy::wrong_self_convention \ -A clippy::needless_range_loop \ @@ -33,6 +32,7 @@ CLIPPY_LINTS=(-A clippy::module_inception \ -A clippy::enum_variant_names \ -W clippy::dbg_macro \ -W clippy::todo \ + -D clippy::upper_case_acronyms \ -D clippy::disallowed-methods \ -D rust-2018-idioms) diff --git a/src/config.rs b/src/config.rs index 0fe367c1349..6c345b8b773 100644 --- a/src/config.rs +++ b/src/config.rs @@ -31,16 +31,15 @@ use engine_rocks::{ PrepopulateBlockCache, }, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, - RaftDBLogger, RangePropertiesCollectorFactory, RocksCfOptions, RocksDBOptions, RocksEngine, - RocksEventListener, RocksTitanDBOptions, RocksdbLogger, TtlPropertiesCollectorFactory, + RaftDbLogger, RangePropertiesCollectorFactory, RocksCfOptions, RocksDbOptions, RocksEngine, + RocksEventListener, RocksTitanDbOptions, RocksdbLogger, TtlPropertiesCollectorFactory, DEFAULT_PROP_KEYS_INDEX_DISTANCE, DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as _, DBOptionsExt, - TabletAccessor, TabletErrorCollector, TitanDBOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, - CF_WRITE, + CfOptions as _, CfOptionsExt, DbOptions as _, DbOptionsExt, TabletAccessor, + TabletErrorCollector, TitanDbOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; -use file_system::IORateLimiter; +use file_system::IoRateLimiter; use keys::region_raft_prefix_len; use kvproto::kvrpcpb::ApiVersion; use online_config::{ConfigChange, ConfigManager, ConfigValue, OnlineConfig, Result as CfgResult}; @@ -179,8 +178,8 @@ impl Default for TitanCfConfig { } impl TitanCfConfig { - fn build_opts(&self) -> RocksTitanDBOptions { - let mut opts = RocksTitanDBOptions::new(); + fn build_opts(&self) -> RocksTitanDbOptions { + let mut opts = RocksTitanDbOptions::new(); opts.set_min_blob_size(self.min_blob_size.0 as u64); opts.set_blob_file_compression(self.blob_file_compression.into()); opts.set_blob_cache(self.blob_cache_size.0 as usize, -1, false, 0.0); @@ -972,7 +971,7 @@ impl RaftCfConfig { #[serde(rename_all = "kebab-case")] // Note that Titan is still an experimental feature. Once enabled, it can't fall // back. Forced fallback may result in data loss. -pub struct TitanDBConfig { +pub struct TitanDbConfig { pub enabled: bool, pub dirname: String, pub disable_gc: bool, @@ -981,7 +980,7 @@ pub struct TitanDBConfig { pub purge_obsolete_files_period: ReadableDuration, } -impl Default for TitanDBConfig { +impl Default for TitanDbConfig { fn default() -> Self { Self { enabled: false, @@ -993,9 +992,9 @@ impl Default for TitanDBConfig { } } -impl TitanDBConfig { - fn build_opts(&self) -> RocksTitanDBOptions { - let mut opts = RocksTitanDBOptions::new(); +impl TitanDbConfig { + fn build_opts(&self) -> RocksTitanDbOptions { + let mut opts = RocksTitanDbOptions::new(); opts.set_dirname(&self.dirname); opts.set_disable_background_gc(self.disable_gc); opts.set_max_background_gc(self.max_background_gc); @@ -1082,13 +1081,13 @@ pub struct DbConfig { #[online_config(submodule)] pub raftcf: RaftCfConfig, #[online_config(skip)] - pub titan: TitanDBConfig, + pub titan: TitanDbConfig, } impl Default for DbConfig { fn default() -> DbConfig { let bg_job_limits = get_background_job_limits(&KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); - let titan_config = TitanDBConfig { + let titan_config = TitanDbConfig { max_background_gc: bg_job_limits.max_titan_background_gc as i32, ..Default::default() }; @@ -1134,8 +1133,8 @@ impl Default for DbConfig { } impl DbConfig { - pub fn build_opt(&self) -> RocksDBOptions { - let mut opts = RocksDBOptions::default(); + pub fn build_opt(&self) -> RocksDbOptions { + let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { opts.set_wal_dir(&self.wal_dir); @@ -1392,13 +1391,13 @@ pub struct RaftDbConfig { #[online_config(submodule)] pub defaultcf: RaftDefaultCfConfig, #[online_config(skip)] - pub titan: TitanDBConfig, + pub titan: TitanDbConfig, } impl Default for RaftDbConfig { fn default() -> RaftDbConfig { let bg_job_limits = get_background_job_limits(&RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS); - let titan_config = TitanDBConfig { + let titan_config = TitanDbConfig { max_background_gc: bg_job_limits.max_titan_background_gc as i32, ..Default::default() }; @@ -1436,8 +1435,8 @@ impl Default for RaftDbConfig { } impl RaftDbConfig { - pub fn build_opt(&self) -> RocksDBOptions { - let mut opts = RocksDBOptions::default(); + pub fn build_opt(&self) -> RocksDbOptions { + let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { opts.set_wal_dir(&self.wal_dir); @@ -1457,7 +1456,7 @@ impl RaftDbConfig { opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - opts.set_info_log(RaftDBLogger::default()); + opts.set_info_log(RaftDbLogger::default()); opts.set_info_log_level(self.info_log_level.into()); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); @@ -1536,20 +1535,20 @@ impl RaftEngineConfig { } #[derive(Clone, Copy, Debug)] -pub enum DBType { +pub enum DbType { Kv, Raft, } -pub struct DBConfigManger> { +pub struct DbConfigManger> { tablet_accessor: Arc, - db_type: DBType, + db_type: DbType, shared_block_cache: bool, } -impl> DBConfigManger { - pub fn new(tablet_accessor: Arc, db_type: DBType, shared_block_cache: bool) -> Self { - DBConfigManger { +impl> DbConfigManger { + pub fn new(tablet_accessor: Arc, db_type: DbType, shared_block_cache: bool) -> Self { + DbConfigManger { tablet_accessor, db_type, shared_block_cache, @@ -1681,17 +1680,17 @@ impl> DBConfigManger { fn validate_cf(&self, cf: &str) -> Result<(), Box> { match (self.db_type, cf) { - (DBType::Kv, CF_DEFAULT) - | (DBType::Kv, CF_WRITE) - | (DBType::Kv, CF_LOCK) - | (DBType::Kv, CF_RAFT) - | (DBType::Raft, CF_DEFAULT) => Ok(()), + (DbType::Kv, CF_DEFAULT) + | (DbType::Kv, CF_WRITE) + | (DbType::Kv, CF_LOCK) + | (DbType::Kv, CF_RAFT) + | (DbType::Raft, CF_DEFAULT) => Ok(()), _ => Err(format!("invalid cf {:?} for db {:?}", cf, self.db_type).into()), } } } -impl + Send + Sync> ConfigManager for DBConfigManger { +impl + Send + Sync> ConfigManager for DbConfigManger { fn dispatch(&mut self, change: ConfigChange) -> Result<(), Box> { let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); @@ -3556,7 +3555,7 @@ impl TiKvConfig { pub fn build_shared_rocks_env( &self, key_manager: Option>, - limiter: Option>, + limiter: Option>, ) -> Result, String> { let env = get_env(key_manager, limiter)?; if !self.raft_engine.enable { @@ -4057,9 +4056,7 @@ mod tests { use api_version::{ApiV1, KvFormat}; use case_macros::*; - use engine_traits::{ - ColumnFamilyOptions as ColumnFamilyOptionsTrait, DBOptions as DBOptionsTrait, DummyFactory, - }; + use engine_traits::{CfOptions as _, DbOptions as _, DummyFactory}; use futures::executor::block_on; use grpcio::ResourceQuota; use itertools::Itertools; @@ -4379,7 +4376,7 @@ mod tests { incoming.coprocessor.region_split_keys = Some(10000); incoming.gc.max_write_bytes_per_sec = ReadableSize::mb(100); incoming.rocksdb.defaultcf.block_cache_size = ReadableSize::mb(500); - incoming.storage.io_rate_limit.import_priority = file_system::IOPriority::High; + incoming.storage.io_rate_limit.import_priority = file_system::IoPriority::High; let diff = old.diff(&incoming); let mut change = HashMap::new(); change.insert( @@ -4505,9 +4502,9 @@ mod tests { let (shared, cfg_controller) = (cfg.storage.block_cache.shared, ConfigController::new(cfg)); cfg_controller.register( Module::Rocksdb, - Box::new(DBConfigManger::new( + Box::new(DbConfigManger::new( Arc::new(engine.clone()), - DBType::Kv, + DbType::Kv, shared, )), ); diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index d0b9d7c381a..8b3f561ce5f 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -65,7 +65,7 @@ impl DagHandlerBuilder { pub fn build(self) -> Result> { COPR_DAG_REQ_COUNT.with_label_values(&["batch"]).inc(); - Ok(BatchDAGHandler::new( + Ok(BatchDagHandler::new( self.req, self.ranges, self.store, @@ -81,12 +81,12 @@ impl DagHandlerBuilder { } } -pub struct BatchDAGHandler { +pub struct BatchDagHandler { runner: tidb_query_executors::runner::BatchExecutorsRunner, data_version: Option, } -impl BatchDAGHandler { +impl BatchDagHandler { pub fn new( req: DagRequest, ranges: Vec, @@ -116,7 +116,7 @@ impl BatchDAGHandler { } #[async_trait] -impl RequestHandler for BatchDAGHandler { +impl RequestHandler for BatchDagHandler { async fn handle_request(&mut self) -> Result> { let result = self.runner.handle_request().await; handle_qe_response(result, self.runner.can_be_cached(), self.data_version).map(|x| x.into()) diff --git a/src/coprocessor/dag/storage_impl.rs b/src/coprocessor/dag/storage_impl.rs index 46dcf7f570e..7f5e60081e7 100644 --- a/src/coprocessor/dag/storage_impl.rs +++ b/src/coprocessor/dag/storage_impl.rs @@ -1,7 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use tidb_query_common::storage::{ - IntervalRange, OwnedKvPair, PointRange, Result as QEResult, Storage, + IntervalRange, OwnedKvPair, PointRange, Result as QeResult, Storage, }; use txn_types::Key; @@ -41,7 +41,7 @@ impl Storage for TiKvStorage { is_backward_scan: bool, is_key_only: bool, range: IntervalRange, - ) -> QEResult<()> { + ) -> QeResult<()> { if let Some(scanner) = &mut self.scanner { self.cf_stats_backlog.add(&scanner.take_statistics()); if scanner.met_newer_ts_data() == NewerTsCheckState::Met { @@ -67,14 +67,14 @@ impl Storage for TiKvStorage { Ok(()) } - fn scan_next(&mut self) -> QEResult> { + fn scan_next(&mut self) -> QeResult> { // Unwrap is fine because we must have called `reset_range` before calling // `scan_next`. let kv = self.scanner.as_mut().unwrap().next().map_err(Error::from)?; Ok(kv.map(|(k, v)| (k.into_raw().unwrap(), v))) } - fn get(&mut self, _is_key_only: bool, range: PointRange) -> QEResult> { + fn get(&mut self, _is_key_only: bool, range: PointRange) -> QeResult> { // TODO: Default CF does not need to be accessed if KeyOnly. // TODO: No need to check newer ts data if self.scanner has met newer ts data. let key = range.0; diff --git a/src/coprocessor/readpool_impl.rs b/src/coprocessor/readpool_impl.rs index b47ee388f22..45f6b9bcc73 100644 --- a/src/coprocessor/readpool_impl.rs +++ b/src/coprocessor/readpool_impl.rs @@ -2,7 +2,7 @@ use std::sync::{Arc, Mutex}; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use tikv_util::yatp_pool::{Config, DefaultTicker, FuturePool, PoolTicker, YatpPoolBuilder}; use super::metrics::*; @@ -45,7 +45,7 @@ pub fn build_read_pool( .name_prefix(name) .after_start(move || { set_tls_engine(engine.lock().unwrap().clone()); - set_io_type(IOType::ForegroundRead); + set_io_type(IoType::ForegroundRead); }) .before_stop(move || unsafe { // Safety: we call `set_` and `destroy_` with the same engine type. @@ -71,7 +71,7 @@ pub fn build_read_pool_for_test( .config(config) .after_start(move || { set_tls_engine(engine.lock().unwrap().clone()); - set_io_type(IOType::ForegroundRead); + set_io_type(IoType::ForegroundRead); }) // Safety: we call `set_` and `destroy_` with the same engine type. .before_stop(|| unsafe { destroy_tls_engine::() }) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 36089e41fd1..fea333903a6 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -9,7 +9,7 @@ use std::{ use collections::HashSet; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use futures::{ executor::{ThreadPool, ThreadPoolBuilder}, future::join_all, @@ -87,7 +87,7 @@ where .after_start_wrapper(move || { tikv_util::thread_group::set_properties(props.clone()); tikv_alloc::add_thread_memory_accessor(); - set_io_type(IOType::Import); + set_io_type(IoType::Import); }) .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) .create() @@ -587,7 +587,7 @@ where /// /// If the ingestion fails because the region is not found or the epoch does /// not match, the remaining files will eventually be cleaned up by - /// CleanupSSTWorker. + /// CleanupSstWorker. fn ingest( &mut self, ctx: RpcContext<'_>, diff --git a/src/read_pool.rs b/src/read_pool.rs index 7409c9a4b6e..9c413de60a7 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -5,7 +5,7 @@ use std::{ sync::{mpsc::SyncSender, Arc, Mutex}, }; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use futures::{channel::oneshot, future::TryFutureExt}; use kvproto::kvrpcpb::CommandPri; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; @@ -261,7 +261,7 @@ pub fn build_yatp_read_pool( .after_start(move || { let engine = raftkv.lock().unwrap().clone(); set_tls_engine(engine); - set_io_type(IOType::ForegroundRead); + set_io_type(IoType::ForegroundRead); }) .before_stop(|| unsafe { destroy_tls_engine::(); diff --git a/src/server/debug.rs b/src/server/debug.rs index 03630cf930a..933f4308245 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -20,7 +20,7 @@ use engine_traits::{ CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ - debugpb::{self, Db as DBType}, + debugpb::{self, Db as DbType}, metapb::{PeerRole, Region}, raft_serverpb::*, }; @@ -166,15 +166,15 @@ impl Debugger { Ok(regions) } - fn get_db_from_type(&self, db: DBType) -> Result<&RocksEngine> { + fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { match db { - DBType::Kv => Ok(&self.engines.kv), - DBType::Raft => Err(box_err!("Get raft db is not allowed")), - _ => Err(box_err!("invalid DBType type")), + DbType::Kv => Ok(&self.engines.kv), + DbType::Raft => Err(box_err!("Get raft db is not allowed")), + _ => Err(box_err!("invalid DB type")), } } - pub fn get(&self, db: DBType, cf: &str, key: &[u8]) -> Result> { + pub fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { validate_db_and_cf(db, cf)?; let db = self.get_db_from_type(db)?; match db.get_value_cf(cf, key) { @@ -313,7 +313,7 @@ impl Debugger { /// Compact the cf[start..end) in the db. pub fn compact( &self, - db: DBType, + db: DbType, cf: &str, start: &[u8], end: &[u8], @@ -1309,13 +1309,13 @@ fn region_overlap(r1: &Region, r2: &Region) -> bool { && (start_key_2 < end_key_1 || end_key_1.is_empty()) } -fn validate_db_and_cf(db: DBType, cf: &str) -> Result<()> { +fn validate_db_and_cf(db: DbType, cf: &str) -> Result<()> { match (db, cf) { - (DBType::Kv, CF_DEFAULT) - | (DBType::Kv, CF_WRITE) - | (DBType::Kv, CF_LOCK) - | (DBType::Kv, CF_RAFT) - | (DBType::Raft, CF_DEFAULT) => Ok(()), + (DbType::Kv, CF_DEFAULT) + | (DbType::Kv, CF_WRITE) + | (DbType::Kv, CF_LOCK) + | (DbType::Kv, CF_RAFT) + | (DbType::Raft, CF_DEFAULT) => Ok(()), _ => Err(Error::InvalidArgument(format!( "invalid cf {:?} for db {:?}", cf, db @@ -1383,7 +1383,7 @@ fn divide_db(db: &RocksEngine, parts: usize) -> raftstore::Result>> #[cfg(test)] mod tests { - use engine_rocks::{util::new_engine_opt, RocksCfOptions, RocksDBOptions, RocksEngine}; + use engine_rocks::{util::new_engine_opt, RocksCfOptions, RocksDbOptions, RocksEngine}; use engine_traits::{Mutable, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; use kvproto::{ kvrpcpb::ApiVersion, @@ -1496,22 +1496,22 @@ mod tests { #[test] fn test_validate_db_and_cf() { let valid_cases = vec![ - (DBType::Kv, CF_DEFAULT), - (DBType::Kv, CF_WRITE), - (DBType::Kv, CF_LOCK), - (DBType::Kv, CF_RAFT), - (DBType::Raft, CF_DEFAULT), + (DbType::Kv, CF_DEFAULT), + (DbType::Kv, CF_WRITE), + (DbType::Kv, CF_LOCK), + (DbType::Kv, CF_RAFT), + (DbType::Raft, CF_DEFAULT), ]; for (db, cf) in valid_cases { validate_db_and_cf(db, cf).unwrap(); } let invalid_cases = vec![ - (DBType::Raft, CF_WRITE), - (DBType::Raft, CF_LOCK), - (DBType::Raft, CF_RAFT), - (DBType::Invalid, CF_DEFAULT), - (DBType::Invalid, "BAD_CF"), + (DbType::Raft, CF_WRITE), + (DbType::Raft, CF_LOCK), + (DbType::Raft, CF_RAFT), + (DbType::Invalid, CF_DEFAULT), + (DbType::Invalid, "BAD_CF"), ]; for (db, cf) in invalid_cases { validate_db_and_cf(db, cf).unwrap_err(); @@ -1558,10 +1558,10 @@ mod tests { engine.put(k, v).unwrap(); assert_eq!(&*engine.get_value(k).unwrap().unwrap(), v); - let got = debugger.get(DBType::Kv, CF_DEFAULT, k).unwrap(); + let got = debugger.get(DbType::Kv, CF_DEFAULT, k).unwrap(); assert_eq!(&got, v); - match debugger.get(DBType::Kv, CF_DEFAULT, b"foo") { + match debugger.get(DbType::Kv, CF_DEFAULT, b"foo") { Err(Error::NotFound(_)) => (), _ => panic!("expect Error::NotFound(_)"), } @@ -2151,7 +2151,7 @@ mod tests { .iter() .map(|cf| (*cf, RocksCfOptions::default())) .collect(); - let db_opt = RocksDBOptions::default(); + let db_opt = RocksDbOptions::default(); db_opt.enable_multi_batch_write(true); let db = new_engine_opt(path_str, db_opt, cfs_opts).unwrap(); // Write initial KVs. diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 59315b4732d..4e2edc13569 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -11,8 +11,8 @@ use engine_rocks::{ RocksEventListener, }; use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions, CompactionJobInfo, Result, TabletAccessor, TabletFactory, - CF_DEFAULT, CF_WRITE, + CfOptions, CfOptionsExt, CompactionJobInfo, Result, TabletAccessor, TabletFactory, CF_DEFAULT, + CF_WRITE, }; use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 80366cc17d1..b47fc34cf27 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -7,9 +7,7 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; -use engine_traits::{ - CFOptionsExt, ColumnFamilyOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, -}; +use engine_traits::{CfOptions, CfOptionsExt, Result, TabletAccessor, TabletFactory, CF_DEFAULT}; use crate::server::engine_factory::KvEngineFactory; diff --git a/src/server/gc_worker/applied_lock_collector.rs b/src/server/gc_worker/applied_lock_collector.rs index a013d742890..9d0e16f4286 100644 --- a/src/server/gc_worker/applied_lock_collector.rs +++ b/src/server/gc_worker/applied_lock_collector.rs @@ -20,7 +20,7 @@ use raftstore::coprocessor::{ use tikv_util::worker::{Builder as WorkerBuilder, Runnable, ScheduleError, Scheduler, Worker}; use txn_types::Key; -// TODO: Use new error type for GCWorker instead of storage::Error. +// TODO: Use new error type for GcWorker instead of storage::Error. use super::{Error, ErrorInner, Result}; use crate::storage::{ mvcc::{ErrorInner as MvccErrorInner, Lock, TimeStamp}, diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 165a1f62ddf..1c50b56bed1 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -23,7 +23,7 @@ use engine_rocks::{ use engine_traits::{ KvEngine, MiscExt, Mutable, MvccProperties, WriteBatch, WriteBatchExt, WriteOptions, }; -use file_system::{IOType, WithIOType}; +use file_system::{IoType, WithIoType}; use pd_client::{Feature, FeatureGate}; use prometheus::{local::*, *}; use raftstore::coprocessor::RegionInfoProvider; @@ -466,7 +466,7 @@ impl WriteCompactionFilter { wb: &RocksWriteBatchVec, wopts: &WriteOptions, ) -> Result<(), engine_traits::Error> { - let _io_type_guard = WithIOType::new(IOType::Gc); + let _io_type_guard = WithIoType::new(IoType::Gc); fail_point!("write_compaction_filter_flush_write_batch", true, |_| { Err(engine_traits::Error::Engine( engine_traits::Status::with_error( @@ -725,7 +725,7 @@ pub mod test_utils { // Put a new key-value pair to ensure compaction can be triggered correctly. engine.delete_cf("write", b"znot-exists-key").unwrap(); - TestGCRunner::new(safe_point).gc(&engine); + TestGcRunner::new(safe_point).gc(&engine); } lazy_static! { @@ -740,7 +740,7 @@ pub mod test_utils { compact_opts } - pub struct TestGCRunner<'a> { + pub struct TestGcRunner<'a> { pub safe_point: u64, pub ratio_threshold: Option, pub start: Option<&'a [u8]>, @@ -751,11 +751,11 @@ pub mod test_utils { pub(super) callbacks_on_drop: Vec>, } - impl<'a> TestGCRunner<'a> { + impl<'a> TestGcRunner<'a> { pub fn new(safe_point: u64) -> Self { let (gc_scheduler, gc_receiver) = dummy_scheduler(); - TestGCRunner { + TestGcRunner { safe_point, ratio_threshold: None, start: None, @@ -768,7 +768,7 @@ pub mod test_utils { } } - impl<'a> TestGCRunner<'a> { + impl<'a> TestGcRunner<'a> { pub fn safe_point(&mut self, sp: u64) -> &mut Self { self.safe_point = sp; self @@ -915,7 +915,7 @@ pub mod tests { let engine = TestEngineBuilder::new().build().unwrap(); let raw_engine = engine.get_rocksdb(); let value = vec![b'v'; 512]; - let mut gc_runner = TestGCRunner::new(0); + let mut gc_runner = TestGcRunner::new(0); // GC can't delete keys after the given safe point. must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); @@ -948,7 +948,7 @@ pub mod tests { let value = vec![b'v'; 512]; let engine = TestEngineBuilder::new().build().unwrap(); let raw_engine = engine.get_rocksdb(); - let mut gc_runner = TestGCRunner::new(0); + let mut gc_runner = TestGcRunner::new(0); let mut gc_and_check = |expect_tasks: bool, prefix: &[u8]| { gc_runner.safe_point(500).gc(&raw_engine); @@ -1018,7 +1018,7 @@ pub mod tests { let engine = builder.build_with_cfg(&cfg).unwrap(); let raw_engine = engine.get_rocksdb(); let value = vec![b'v'; 512]; - let mut gc_runner = TestGCRunner::new(0); + let mut gc_runner = TestGcRunner::new(0); for start_ts in &[100, 110, 120, 130] { must_prewrite_put(&engine, b"zkey", &value, b"zkey", *start_ts); @@ -1084,7 +1084,7 @@ pub mod tests { let builder = TestEngineBuilder::new().path(dir.path()); let engine = builder.build_with_cfg(&cfg).unwrap(); let raw_engine = engine.get_rocksdb(); - let mut gc_runner = TestGCRunner::new(0); + let mut gc_runner = TestGcRunner::new(0); // So the construction of SST files will be: // L6: |key_110| diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index 7fdc440527f..bcfe87d6783 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -331,7 +331,7 @@ impl GcMan } } - /// Sets the initial state of the `GCManger`. + /// Sets the initial state of the `GcManger`. /// The only task of initializing is to simply get the current safe point as /// the initial value of `safe_point`. TiKV won't do any GC /// automatically until the first time `safe_point` was updated to a diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index fe409be3ae4..7e695430d10 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -19,7 +19,7 @@ use engine_traits::{ raw_ttl::ttl_current_ts, DeleteStrategy, Error as EngineError, KvEngine, MiscExt, Range, WriteBatch, WriteOptions, CF_DEFAULT, CF_LOCK, CF_WRITE, }; -use file_system::{IOType, WithIOType}; +use file_system::{IoType, WithIoType}; use futures::executor::block_on; use kvproto::{ kvrpcpb::{Context, LockInfo}, @@ -767,7 +767,7 @@ where #[inline] fn run(&mut self, task: GcTask) { - let _io_type_guard = WithIOType::new(IOType::Gc); + let _io_type_guard = WithIoType::new(IoType::Gc); let enum_label = task.get_enum_label(); GC_GCTASK_COUNTER_STATIC.get(enum_label).inc(); @@ -1293,10 +1293,10 @@ mod tests { /// A wrapper of engine that adds the 'z' prefix to keys internally. /// For test engines, they writes keys into db directly, but in production a - /// 'z' prefix will be added to keys by raftstore layer before writing to - /// db. Some functionalities of `GCWorker` bypasses Raft layer, so they - /// needs to know how data is actually represented in db. This wrapper - /// allows test engines write 'z'-prefixed keys to db. + /// 'z' prefix will be added to keys by raftstore layer before writing + /// to db. Some functionalities of `GcWorker` bypasses Raft layer, so + /// they needs to know how data is actually represented in db. This + /// wrapper allows test engines write 'z'-prefixed keys to db. #[derive(Clone)] struct PrefixedEngine(kv::RocksEngine); diff --git a/src/server/gc_worker/mod.rs b/src/server/gc_worker/mod.rs index 20de36ef035..d6114a5875c 100644 --- a/src/server/gc_worker/mod.rs +++ b/src/server/gc_worker/mod.rs @@ -7,9 +7,9 @@ mod gc_manager; mod gc_worker; mod rawkv_compaction_filter; -// TODO: Use separated error type for GCWorker instead. +// TODO: Use separated error type for GcWorker instead. #[cfg(any(test, feature = "failpoints"))] -pub use compaction_filter::test_utils::{gc_by_compact, TestGCRunner}; +pub use compaction_filter::test_utils::{gc_by_compact, TestGcRunner}; pub use compaction_filter::WriteCompactionFilterFactory; pub use config::{GcConfig, GcWorkerConfigManager, DEFAULT_GC_BATCH_KEYS}; use engine_traits::MvccProperties; diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index 3ed206408e4..49758f5793b 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -201,7 +201,7 @@ impl RawCompactionFilter { self.versions += 1; let raw_value = ApiV2::decode_raw_value(value)?; // If it's the latest version, and it's deleted or expired, it needs to be sent - // to GCWorker to be processed asynchronously. + // to GcWorker to be processed asynchronously. if !raw_value.is_valid(self.current_ts) { self.raw_handle_delete(); if self.mvcc_deletions.len() >= DEFAULT_DELETE_BATCH_COUNT { @@ -314,7 +314,7 @@ pub mod tests { use super::*; use crate::{ - config::DbConfig, server::gc_worker::TestGCRunner, storage::kv::TestEngineBuilder, + config::DbConfig, server::gc_worker::TestGcRunner, storage::kv::TestEngineBuilder, }; pub fn make_key(key: &[u8], ts: u64) -> Vec { @@ -334,7 +334,7 @@ pub mod tests { .build_with_cfg(&cfg) .unwrap(); let raw_engine = engine.get_rocksdb(); - let mut gc_runner = TestGCRunner::new(0); + let mut gc_runner = TestGcRunner::new(0); let user_key = b"r\0aaaaaaaaaaa"; @@ -399,7 +399,7 @@ pub mod tests { .build() .unwrap(); let raw_engine = engine.get_rocksdb(); - let mut gc_runner = TestGCRunner::new(0); + let mut gc_runner = TestGcRunner::new(0); let mut gc_and_check = |expect_tasks: bool, prefix: &[u8]| { gc_runner.safe_point(500).gc_raw(&raw_engine); diff --git a/src/server/snap.rs b/src/server/snap.rs index f451b6b70e9..b785c455921 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -13,7 +13,7 @@ use std::{ }; use engine_traits::KvEngine; -use file_system::{IOType, WithIOType}; +use file_system::{IoType, WithIoType}; use futures::{ future::{Future, TryFutureExt}, sink::SinkExt, @@ -206,7 +206,7 @@ struct RecvSnapContext { key: SnapKey, file: Option>, raft_msg: RaftMessage, - io_type: IOType, + io_type: IoType, } impl RecvSnapContext { @@ -227,11 +227,11 @@ impl RecvSnapContext { let mut snapshot = RaftSnapshotData::default(); snapshot.merge_from_bytes(data)?; let io_type = if snapshot.get_meta().get_for_balance() { - IOType::LoadBalance + IoType::LoadBalance } else { - IOType::Replication + IoType::Replication }; - let _with_io_type = WithIOType::new(io_type); + let _with_io_type = WithIoType::new(io_type); let snap = { let s = match snap_mgr.get_snapshot_for_receiving(&key, data) { @@ -257,7 +257,7 @@ impl RecvSnapContext { } fn finish>(self, raft_router: R) -> Result<()> { - let _with_io_type = WithIOType::new(self.io_type); + let _with_io_type = WithIoType::new(self.io_type); let key = self.key; if let Some(mut file) = self.file { info!("saving snapshot file"; "snap_key" => %key, "file" => file.path()); @@ -300,7 +300,7 @@ fn recv_snap + 'static>( return Err(box_err!("{} receive chunk with empty data", context.key)); } let f = context.file.as_mut().unwrap(); - let _with_io_type = WithIOType::new(context.io_type); + let _with_io_type = WithIoType::new(context.io_type); if let Err(e) = Write::write_all(&mut *f, &data) { let key = &context.key; let path = context.file.as_mut().unwrap().path(); diff --git a/src/storage/config.rs b/src/storage/config.rs index 2a5ac4840e0..9a359310178 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -5,7 +5,7 @@ use std::{cmp::max, error::Error}; use engine_rocks::raw::{Cache, LRUCacheOptions, MemoryAllocator}; -use file_system::{IOPriority, IORateLimitMode, IORateLimiter, IOType}; +use file_system::{IoPriority, IoRateLimitMode, IoRateLimiter, IoType}; use kvproto::kvrpcpb::ApiVersion; use libc::c_int; use online_config::OnlineConfig; @@ -64,7 +64,7 @@ pub struct Config { #[online_config(submodule)] pub block_cache: BlockCacheConfig, #[online_config(submodule)] - pub io_rate_limit: IORateLimitConfig, + pub io_rate_limit: IoRateLimitConfig, } impl Default for Config { @@ -88,7 +88,7 @@ impl Default for Config { ttl_check_poll_interval: ReadableDuration::hours(12), flow_control: FlowControlConfig::default(), block_cache: BlockCacheConfig::default(), - io_rate_limit: IORateLimitConfig::default(), + io_rate_limit: IoRateLimitConfig::default(), background_error_recovery_window: ReadableDuration::hours(1), } } @@ -278,82 +278,82 @@ impl BlockCacheConfig { #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] -pub struct IORateLimitConfig { +pub struct IoRateLimitConfig { pub max_bytes_per_sec: ReadableSize, #[online_config(skip)] - pub mode: IORateLimitMode, + pub mode: IoRateLimitMode, /// When this flag is off, high-priority IOs are counted but not limited. - /// Default set to false because the optimal throughput target provided by - /// user might not be the maximum available bandwidth. For multi-tenancy - /// use case, this flag should be turned on. + /// Default set to false because the optimal throughput target provided + /// by user might not be the maximum available bandwidth. For + /// multi-tenancy use case, this flag should be turned on. #[online_config(skip)] pub strict: bool, - pub foreground_read_priority: IOPriority, - pub foreground_write_priority: IOPriority, - pub flush_priority: IOPriority, - pub level_zero_compaction_priority: IOPriority, - pub compaction_priority: IOPriority, - pub replication_priority: IOPriority, - pub load_balance_priority: IOPriority, - pub gc_priority: IOPriority, - pub import_priority: IOPriority, - pub export_priority: IOPriority, - pub other_priority: IOPriority, + pub foreground_read_priority: IoPriority, + pub foreground_write_priority: IoPriority, + pub flush_priority: IoPriority, + pub level_zero_compaction_priority: IoPriority, + pub compaction_priority: IoPriority, + pub replication_priority: IoPriority, + pub load_balance_priority: IoPriority, + pub gc_priority: IoPriority, + pub import_priority: IoPriority, + pub export_priority: IoPriority, + pub other_priority: IoPriority, } -impl Default for IORateLimitConfig { - fn default() -> IORateLimitConfig { - IORateLimitConfig { +impl Default for IoRateLimitConfig { + fn default() -> IoRateLimitConfig { + IoRateLimitConfig { max_bytes_per_sec: ReadableSize::mb(0), - mode: IORateLimitMode::WriteOnly, + mode: IoRateLimitMode::WriteOnly, strict: false, - foreground_read_priority: IOPriority::High, - foreground_write_priority: IOPriority::High, - flush_priority: IOPriority::High, - level_zero_compaction_priority: IOPriority::Medium, - compaction_priority: IOPriority::Low, - replication_priority: IOPriority::High, - load_balance_priority: IOPriority::High, - gc_priority: IOPriority::High, - import_priority: IOPriority::Medium, - export_priority: IOPriority::Medium, - other_priority: IOPriority::High, + foreground_read_priority: IoPriority::High, + foreground_write_priority: IoPriority::High, + flush_priority: IoPriority::High, + level_zero_compaction_priority: IoPriority::Medium, + compaction_priority: IoPriority::Low, + replication_priority: IoPriority::High, + load_balance_priority: IoPriority::High, + gc_priority: IoPriority::High, + import_priority: IoPriority::Medium, + export_priority: IoPriority::Medium, + other_priority: IoPriority::High, } } } -impl IORateLimitConfig { - pub fn build(&self, enable_statistics: bool) -> IORateLimiter { - let limiter = IORateLimiter::new(self.mode, self.strict, enable_statistics); +impl IoRateLimitConfig { + pub fn build(&self, enable_statistics: bool) -> IoRateLimiter { + let limiter = IoRateLimiter::new(self.mode, self.strict, enable_statistics); limiter.set_io_rate_limit(self.max_bytes_per_sec.0 as usize); - limiter.set_io_priority(IOType::ForegroundRead, self.foreground_read_priority); - limiter.set_io_priority(IOType::ForegroundWrite, self.foreground_write_priority); - limiter.set_io_priority(IOType::Flush, self.flush_priority); + limiter.set_io_priority(IoType::ForegroundRead, self.foreground_read_priority); + limiter.set_io_priority(IoType::ForegroundWrite, self.foreground_write_priority); + limiter.set_io_priority(IoType::Flush, self.flush_priority); limiter.set_io_priority( - IOType::LevelZeroCompaction, + IoType::LevelZeroCompaction, self.level_zero_compaction_priority, ); - limiter.set_io_priority(IOType::Compaction, self.compaction_priority); - limiter.set_io_priority(IOType::Replication, self.replication_priority); - limiter.set_io_priority(IOType::LoadBalance, self.load_balance_priority); - limiter.set_io_priority(IOType::Gc, self.gc_priority); - limiter.set_io_priority(IOType::Import, self.import_priority); - limiter.set_io_priority(IOType::Export, self.export_priority); - limiter.set_io_priority(IOType::Other, self.other_priority); + limiter.set_io_priority(IoType::Compaction, self.compaction_priority); + limiter.set_io_priority(IoType::Replication, self.replication_priority); + limiter.set_io_priority(IoType::LoadBalance, self.load_balance_priority); + limiter.set_io_priority(IoType::Gc, self.gc_priority); + limiter.set_io_priority(IoType::Import, self.import_priority); + limiter.set_io_priority(IoType::Export, self.export_priority); + limiter.set_io_priority(IoType::Other, self.other_priority); limiter } fn validate(&mut self) -> Result<(), Box> { - if self.other_priority != IOPriority::High { + if self.other_priority != IoPriority::High { warn!( "Occasionally some critical IO operations are tagged as IOType::Other, \ e.g. IOs are fired from unmanaged threads, thread-local type storage exceeds \ capacity. To be on the safe side, change priority for IOType::Other from \ {:?} to {:?}", self.other_priority, - IOPriority::High + IoPriority::High ); - self.other_priority = IOPriority::High; + self.other_priority = IoPriority::High; } if self.gc_priority != self.foreground_write_priority { warn!( @@ -363,7 +363,7 @@ impl IORateLimitConfig { ); self.gc_priority = self.foreground_write_priority; } - if self.mode != IORateLimitMode::WriteOnly { + if self.mode != IoRateLimitMode::WriteOnly { return Err( "storage.io-rate-limit.mode other than write-only is not supported.".into(), ); diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index d3d051ac5f9..8bc92a7f697 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -4,8 +4,8 @@ use std::{convert::TryInto, sync::Arc}; -use engine_traits::{CFNamesExt, CFOptionsExt, TabletFactory, CF_DEFAULT}; -use file_system::{get_io_rate_limiter, IOPriority, IOType}; +use engine_traits::{CfNamesExt, CfOptionsExt, TabletFactory, CF_DEFAULT}; +use file_system::{get_io_rate_limiter, IoPriority, IoType}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use strum::IntoEnumIterator; use tikv_kv::Engine; @@ -98,10 +98,10 @@ impl ConfigManager for StorageConfigManger { limiter.set_io_rate_limit(limit.0 as usize); } - for t in IOType::iter() { + for t in IoType::iter() { if let Some(priority) = io_rate_limit.remove(&(t.as_str().to_owned() + "_priority")) { - let priority: IOPriority = priority.try_into()?; + let priority: IoPriority = priority.try_into()?; limiter.set_io_priority(t, priority); } } diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index 0867c30fb31..b1b727f898c 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -8,7 +8,7 @@ use std::{ use causal_ts::tests::DummyRawTsTracker; use engine_rocks::RocksCfOptions; use engine_traits::{CfName, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; -use file_system::IORateLimiter; +use file_system::IoRateLimiter; use kvproto::kvrpcpb::ApiVersion; use tikv_util::config::ReadableSize; @@ -27,7 +27,7 @@ const TEMP_DIR: &str = ""; pub struct TestEngineBuilder { path: Option, cfs: Option>, - io_rate_limiter: Option>, + io_rate_limiter: Option>, api_version: ApiVersion, } @@ -62,7 +62,7 @@ impl TestEngineBuilder { self } - pub fn io_rate_limiter(mut self, limiter: Option>) -> Self { + pub fn io_rate_limiter(mut self, limiter: Option>) -> Self { self.io_rate_limiter = limiter; self } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6338525ab02..620bca80b32 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3152,7 +3152,7 @@ mod tests { *, }; use crate::{ - config::TitanDBConfig, + config::TitanDbConfig, coprocessor::checksum_crc64_xor, storage::{ config::BlockCacheConfig, @@ -3668,7 +3668,7 @@ mod tests { #[test] fn test_scan_with_key_only() { let db_config = crate::config::DbConfig { - titan: TitanDBConfig { + titan: TitanDbConfig { enabled: true, ..Default::default() }, diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index d715ec598c2..c27b96840d0 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -481,7 +481,7 @@ mod tests { #[test] fn test_mvcc_info_collector() { - use engine_test::ctor::{ColumnFamilyOptions, DBOptions}; + use engine_test::ctor::{CfOptions, DbOptions}; use engine_traits::SyncMutable; use txn_types::TimeStamp; @@ -494,12 +494,12 @@ mod tests { let path = tmp.path().to_str().unwrap(); let engine = engine_test::kv::new_engine_opt( path, - DBOptions::default(), + DbOptions::default(), vec![ - (CF_DEFAULT, ColumnFamilyOptions::new()), - (CF_WRITE, ColumnFamilyOptions::new()), - (CF_LOCK, ColumnFamilyOptions::new()), - (CF_RAFT, ColumnFamilyOptions::new()), + (CF_DEFAULT, CfOptions::new()), + (CF_WRITE, CfOptions::new()), + (CF_LOCK, CfOptions::new()), + (CF_RAFT, CfOptions::new()), ], ) .unwrap(); diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 377d2c94022..c45fabe2540 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -632,7 +632,7 @@ pub mod tests { use concurrency_manager::ConcurrencyManager; use engine_rocks::{ - properties::MvccPropertiesCollectorFactory, RocksCfOptions, RocksDBOptions, RocksEngine, + properties::MvccPropertiesCollectorFactory, RocksCfOptions, RocksDbOptions, RocksEngine, RocksSnapshot, }; use engine_traits::{ @@ -900,7 +900,7 @@ pub mod tests { } pub fn open_db(path: &str, with_properties: bool) -> RocksEngine { - let db_opt = RocksDBOptions::default(); + let db_opt = RocksDbOptions::default(); let mut cf_opts = RocksCfOptions::default(); cf_opts.set_write_buffer_size(32 * 1024 * 1024); if with_properties { diff --git a/src/storage/read_pool.rs b/src/storage/read_pool.rs index c25ae15d46b..a0aee5a185f 100644 --- a/src/storage/read_pool.rs +++ b/src/storage/read_pool.rs @@ -5,7 +5,7 @@ use std::sync::{Arc, Mutex}; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use tikv_util::yatp_pool::{Config, DefaultTicker, FuturePool, PoolTicker, YatpPoolBuilder}; use crate::{ @@ -49,7 +49,7 @@ pub fn build_read_pool( .config(config) .after_start(move || { set_tls_engine(engine.lock().unwrap().clone()); - set_io_type(IOType::ForegroundRead); + set_io_type(IoType::ForegroundRead); }) .before_stop(move || unsafe { // Safety: we call `set_` and `destroy_` with the same engine type. @@ -79,7 +79,7 @@ pub fn build_read_pool_for_test( .name_prefix(name) .after_start(move || { set_tls_engine(engine.lock().unwrap().clone()); - set_io_type(IOType::ForegroundRead); + set_io_type(IoType::ForegroundRead); }) // Safety: we call `set_` and `destroy_` with the same engine type. .before_stop(|| unsafe { destroy_tls_engine::() }) diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 40bb50a88c8..8cb901187dd 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -17,7 +17,7 @@ use std::{ use collections::HashMap; use engine_rocks::FlowInfo; -use engine_traits::{CFNamesExt, FlowControlFactorsExt}; +use engine_traits::{CfNamesExt, FlowControlFactorsExt}; use getset::{CopyGetters, Setters}; use num_traits::cast::{AsPrimitive, FromPrimitive}; use rand::Rng; @@ -115,7 +115,7 @@ impl EngineFlowController { } } - pub fn new( + pub fn new( config: &FlowControlConfig, engine: E, flow_info_receiver: Receiver, @@ -443,7 +443,7 @@ impl Default for CfFlowChecker { } #[derive(CopyGetters, Setters)] -pub(super) struct FlowChecker { +pub(super) struct FlowChecker { pub soft_pending_compaction_bytes_limit: u64, hard_pending_compaction_bytes_limit: u64, memtables_threshold: u64, @@ -473,7 +473,7 @@ pub(super) struct FlowChecker FlowChecker { +impl FlowChecker { pub fn new( config: &FlowControlConfig, engine: E, @@ -994,7 +994,7 @@ pub(super) mod tests { use std::sync::atomic::AtomicU64; use engine_rocks::RocksCfOptions; - use engine_traits::{CFOptionsExt, Result}; + use engine_traits::{CfOptionsExt, Result}; use super::{super::FlowController, *}; @@ -1017,15 +1017,15 @@ pub(super) mod tests { } } - impl CFNamesExt for EngineStub { + impl CfNamesExt for EngineStub { fn cf_names(&self) -> Vec<&str> { vec!["default"] } } - impl CFOptionsExt for EngineStub { - type ColumnFamilyOptions = RocksCfOptions; - fn get_options_cf(&self, _cf: &str) -> Result { + impl CfOptionsExt for EngineStub { + type CfOptions = RocksCfOptions; + fn get_options_cf(&self, _cf: &str) -> Result { unimplemented!(); } diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index 14819127389..a35517246c5 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -13,7 +13,7 @@ use std::{ use collections::HashMap; use engine_rocks::FlowInfo; -use engine_traits::{CFNamesExt, FlowControlFactorsExt, TabletFactory}; +use engine_traits::{CfNamesExt, FlowControlFactorsExt, TabletFactory}; use rand::Rng; use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; @@ -47,7 +47,7 @@ impl Drop for TabletFlowController { } impl TabletFlowController { - pub fn new( + pub fn new( config: &FlowControlConfig, tablet_factory: Arc + Send + Sync>, flow_info_receiver: Receiver, @@ -86,7 +86,7 @@ impl TabletFlowController { struct FlowInfoDispatcher; impl FlowInfoDispatcher { - fn start( + fn start( rx: Receiver, flow_info_receiver: Receiver, tablet_factory: Arc + Send + Sync>, diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 5894efc3226..78a891b650e 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -7,7 +7,7 @@ use std::{ }; use collections::HashMap; -use file_system::{set_io_type, IOType}; +use file_system::{set_io_type, IoType}; use kvproto::pdpb::QueryKind; use prometheus::local::*; use raftstore::store::WriteStats; @@ -74,7 +74,7 @@ impl SchedPool { // the tls_engine invariants. .after_start(move || { set_tls_engine(engine.lock().unwrap().clone()); - set_io_type(IOType::ForegroundWrite); + set_io_type(IoType::ForegroundWrite); }) .before_stop(move || unsafe { // Safety: we ensure the `set_` and `destroy_` calls use the same engine type. diff --git a/tests/benches/misc/writebatch/bench_writebatch.rs b/tests/benches/misc/writebatch/bench_writebatch.rs index cde64280184..f396976e3c1 100644 --- a/tests/benches/misc/writebatch/bench_writebatch.rs +++ b/tests/benches/misc/writebatch/bench_writebatch.rs @@ -1,6 +1,6 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::{RocksCfOptions, RocksDBOptions, RocksEngine, RocksWriteBatchVec}; +use engine_rocks::{RocksCfOptions, RocksDbOptions, RocksEngine, RocksWriteBatchVec}; use engine_traits::{Mutable, WriteBatch, WriteBatchExt, CF_DEFAULT}; use tempfile::Builder; use test::Bencher; @@ -22,7 +22,7 @@ fn bench_writebatch_impl(b: &mut Bencher, batch_keys: usize) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let mut opts = RocksDBOptions::default(); + let mut opts = RocksDbOptions::default(); opts.create_if_missing(true); opts.enable_unordered_write(false); opts.enable_pipelined_write(false); @@ -111,7 +111,7 @@ fn bench_writebatch_without_capacity(b: &mut Bencher) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let mut opts = RocksDBOptions::default(); + let mut opts = RocksDbOptions::default(); opts.create_if_missing(true); opts.enable_unordered_write(false); opts.enable_pipelined_write(false); @@ -134,7 +134,7 @@ fn bench_writebatch_with_capacity(b: &mut Bencher) { .prefix("/tmp/rocksdb_write_batch_bench") .tempdir() .unwrap(); - let mut opts = RocksDBOptions::default(); + let mut opts = RocksDbOptions::default(); opts.create_if_missing(true); opts.enable_unordered_write(false); opts.enable_pipelined_write(false); diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 818c7ba2739..10192db7bf0 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -219,7 +219,7 @@ fn test_paging_scan() { select_resp.merge_from_bytes(resp.get_data()).unwrap(); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(select_resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(select_resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(exp) { let name_datum = name.unwrap().as_bytes().into(); let expected_encoded = datum::encode_value( @@ -293,7 +293,7 @@ fn test_paging_scan_multi_ranges() { select_resp.merge_from_bytes(resp.get_data()).unwrap(); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(select_resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(select_resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(exp) { let name_datum = name.unwrap().as_bytes().into(); let expected_encoded = datum::encode_value( @@ -349,7 +349,7 @@ fn test_paging_scan_multi_ranges() { select_resp.merge_from_bytes(resp.get_data()).unwrap(); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(select_resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(select_resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(exp) { let name_datum = name.unwrap().as_bytes().into(); let expected_encoded = datum::encode_value( diff --git a/tests/failpoints/cases/test_encryption.rs b/tests/failpoints/cases/test_encryption.rs index ccb4d698e3f..502e31afff9 100644 --- a/tests/failpoints/cases/test_encryption.rs +++ b/tests/failpoints/cases/test_encryption.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use encryption::{compat, FileDictionaryFile}; +use encryption::FileDictionaryFile; use kvproto::encryptionpb::{EncryptionMethod, FileInfo}; #[test] @@ -47,7 +47,7 @@ fn test_file_dict_file_record_corrupted() { fn create_file_info(id: u64, method: EncryptionMethod) -> FileInfo { FileInfo { key_id: id, - method: compat(method), + method, ..Default::default() } } diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index c4e3e4dee71..73031b10283 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -17,7 +17,7 @@ use raftstore::coprocessor::{ use test_raftstore::*; use tikv::{ server::gc_worker::{ - AutoGcConfig, GcSafePointProvider, GcTask, Result as GcWorkerResult, TestGCRunner, + AutoGcConfig, GcSafePointProvider, GcTask, Result as GcWorkerResult, TestGcRunner, }, storage::{ kv::TestEngineBuilder, @@ -329,7 +329,7 @@ fn test_error_in_compaction_filter() { let fp = "write_compaction_filter_flush_write_batch"; fail::cfg(fp, "return").unwrap(); - let mut gc_runner = TestGCRunner::new(200); + let mut gc_runner = TestGcRunner::new(200); gc_runner.gc(&raw_engine); match gc_runner.gc_receiver.recv().unwrap() { @@ -382,7 +382,7 @@ fn test_orphan_versions_from_compaction_filter() { let fp = "write_compaction_filter_flush_write_batch"; fail::cfg(fp, "return").unwrap(); - let mut gc_runner = TestGCRunner::new(100); + let mut gc_runner = TestGcRunner::new(100); gc_runner.gc_scheduler = cluster.sim.rl().get_gc_worker(1).scheduler(); gc_runner.gc(&engine.kv); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 8c1be52be78..e8449624a0f 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -14,7 +14,7 @@ use engine_rocks::{ }, }; use engine_traits::PerfLevel; -use file_system::{IOPriority, IORateLimitMode}; +use file_system::{IoPriority, IoRateLimitMode}; use kvproto::encryptionpb::EncryptionMethod; use pd_client::Config as PdConfig; use raft_log_engine::{ReadableSize as RaftEngineReadableSize, RecoveryMode}; @@ -32,7 +32,7 @@ use tikv::{ lock_manager::Config as PessimisticTxnConfig, Config as ServerConfig, }, storage::config::{ - BlockCacheConfig, Config as StorageConfig, FlowControlConfig, IORateLimitConfig, + BlockCacheConfig, Config as StorageConfig, FlowControlConfig, IoRateLimitConfig, }, }; use tikv_util::config::{LogFormat, ReadableDuration, ReadableSize}; @@ -261,7 +261,7 @@ fn test_serde_custom_tikv_config() { max_sorted_runs: 100, gc_merge_rewrite: false, }; - let titan_db_config = TitanDBConfig { + let titan_db_config = TitanDbConfig { enabled: true, dirname: "bar".to_owned(), disable_gc: false, @@ -681,21 +681,21 @@ fn test_serde_custom_tikv_config() { high_pri_pool_ratio: 0.8, memory_allocator: Some(String::from("nodump")), }, - io_rate_limit: IORateLimitConfig { + io_rate_limit: IoRateLimitConfig { max_bytes_per_sec: ReadableSize::mb(1000), - mode: IORateLimitMode::AllIo, + mode: IoRateLimitMode::AllIo, strict: true, - foreground_read_priority: IOPriority::Low, - foreground_write_priority: IOPriority::Low, - flush_priority: IOPriority::Low, - level_zero_compaction_priority: IOPriority::Low, - compaction_priority: IOPriority::High, - replication_priority: IOPriority::Low, - load_balance_priority: IOPriority::Low, - gc_priority: IOPriority::High, - import_priority: IOPriority::High, - export_priority: IOPriority::High, - other_priority: IOPriority::Low, + foreground_read_priority: IoPriority::Low, + foreground_write_priority: IoPriority::Low, + flush_priority: IoPriority::Low, + level_zero_compaction_priority: IoPriority::Low, + compaction_priority: IoPriority::High, + replication_priority: IoPriority::Low, + load_balance_priority: IoPriority::Low, + gc_priority: IoPriority::High, + import_priority: IoPriority::High, + export_priority: IoPriority::High, + other_priority: IoPriority::Low, }, background_error_recovery_window: ReadableDuration::hours(1), }; diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 69ce131ec8b..024ebddbdea 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -65,7 +65,7 @@ fn test_select() { // for dag selection let req = DAGSelect::from(&product).build(); let mut resp = handle_select(&endpoint, req); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(data) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -100,7 +100,7 @@ fn test_batch_row_limit() { let req = DAGSelect::from(&product).build(); let mut resp = handle_select(&endpoint, req); check_chunk_datum_count(resp.get_chunks(), chunk_datum_limit); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(data) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -171,7 +171,7 @@ fn test_stream_batch_row_limit() { let chunk_data_limit = stream_row_limit * 3; // we have 3 fields. check_chunk_datum_count(&chunks, chunk_data_limit); - let spliter = DAGChunkSpliter::new(chunks, 3); + let spliter = DagChunkSpliter::new(chunks, 3); let j = cmp::min((i + 1) * stream_row_limit, data.len()); let cur_data = &data[i * stream_row_limit..j]; for (row, &(id, name, cnt)) in spliter.zip(cur_data) { @@ -205,7 +205,7 @@ fn test_select_after_lease() { thread::sleep(cluster.cfg.raft_store.raft_store_max_leader_lease.0); let req = DAGSelect::from(&product).build_with(ctx, &[0]); let mut resp = handle_select(&endpoint, req); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(data) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -279,7 +279,7 @@ fn test_group_by() { let mut resp = handle_select(&endpoint, req); // should only have name:0, name:2 and name:1 let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); let mut results = spliter.collect::>>(); sort_by!(results, 0, Bytes); for (row, name) in results.iter().zip(&[b"name:0", b"name:1", b"name:2"]) { @@ -322,7 +322,7 @@ fn test_aggr_count() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (name, cnt)) in results.iter().zip(exp) { @@ -351,7 +351,7 @@ fn test_aggr_count() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (gk_data, cnt)) in results.iter().zip(exp) { @@ -400,7 +400,7 @@ fn test_aggr_first() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (name, id)) in results.iter().zip(exp) { @@ -431,7 +431,7 @@ fn test_aggr_first() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 0, Bytes); for (row, (count, name)) in results.iter().zip(exp) { @@ -483,7 +483,7 @@ fn test_aggr_avg() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let mut results = spliter.collect::>>(); sort_by!(results, 2, Bytes); for (row, (name, (sum, cnt))) in results.iter().zip(exp) { @@ -526,7 +526,7 @@ fn test_aggr_sum() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (name, cnt)) in results.iter().zip(exp) { @@ -594,7 +594,7 @@ fn test_aggr_extre() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let mut results = spliter.collect::>>(); sort_by!(results, 2, Bytes); for (row, (name, max, min)) in results.iter().zip(exp) { @@ -672,7 +672,7 @@ fn test_aggr_bit_ops() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 4); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 4); let mut results = spliter.collect::>>(); sort_by!(results, 3, Bytes); for (row, (name, bitand, bitor, bitxor)) in results.iter().zip(exp) { @@ -716,7 +716,7 @@ fn test_order_by_column() { .build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(exp) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -753,7 +753,7 @@ fn test_order_by_pk_with_select_from_index() { .build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(expect) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -786,7 +786,7 @@ fn test_limit() { let req = DAGSelect::from(&product).limit(5).build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(expect) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -823,7 +823,7 @@ fn test_reverse() { .build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(expect) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -855,7 +855,7 @@ fn test_index() { let req = DAGSelect::from_index(&product, &product["id"]).build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); for (row, (id, ..)) in spliter.zip(data) { let expected_encoded = datum::encode_value(&mut EvalContext::default(), &[id.into()]).unwrap(); @@ -889,7 +889,7 @@ fn test_index_reverse_limit() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); for (row, (id, ..)) in spliter.zip(expect) { let expected_encoded = datum::encode_value(&mut EvalContext::default(), &[id.into()]).unwrap(); @@ -919,7 +919,7 @@ fn test_limit_oom() { .build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); for (row, (id, ..)) in spliter.zip(data) { let expected_encoded = datum::encode_value(&mut EvalContext::default(), &[id.into()]).unwrap(); @@ -959,7 +959,7 @@ fn test_del_select() { let resp = handle_request(&endpoint, req); let mut sel_resp = SelectResponse::default(); sel_resp.merge_from_bytes(resp.get_data()).unwrap(); - let spliter = DAGChunkSpliter::new(sel_resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(sel_resp.take_chunks().into(), 1); let mut row_count = 0; for _ in spliter { row_count += 1; @@ -992,7 +992,7 @@ fn test_index_group_by() { let mut resp = handle_select(&endpoint, req); // should only have name:0, name:2 and name:1 let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); let mut results = spliter.collect::>>(); sort_by!(results, 0, Bytes); for (row, name) in results.iter().zip(&[b"name:0", b"name:1", b"name:2"]) { @@ -1025,7 +1025,7 @@ fn test_index_aggr_count() { .output_offsets(Some(vec![0])) .build(); let mut resp = handle_select(&endpoint, req); - let mut spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let mut spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); let expected_encoded = datum::encode_value( &mut EvalContext::default(), &[Datum::U64(data.len() as u64)], @@ -1053,7 +1053,7 @@ fn test_index_aggr_count() { resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (name, cnt)) in results.iter().zip(exp) { @@ -1080,7 +1080,7 @@ fn test_index_aggr_count() { resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (gk_data, cnt)) in results.iter().zip(exp) { @@ -1124,7 +1124,7 @@ fn test_index_aggr_first() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (name, id)) in results.iter().zip(exp) { @@ -1182,7 +1182,7 @@ fn test_index_aggr_avg() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let mut results = spliter.collect::>>(); sort_by!(results, 2, Bytes); for (row, (name, (sum, cnt))) in results.iter().zip(exp) { @@ -1225,7 +1225,7 @@ fn test_index_aggr_sum() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 2); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 2); let mut results = spliter.collect::>>(); sort_by!(results, 1, Bytes); for (row, (name, cnt)) in results.iter().zip(exp) { @@ -1292,7 +1292,7 @@ fn test_index_aggr_extre() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let exp_len = exp.len(); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let mut results = spliter.collect::>>(); sort_by!(results, 2, Bytes); for (row, (name, max, min)) in results.iter().zip(exp) { @@ -1359,7 +1359,7 @@ fn test_where() { let req = DAGSelect::from(&product).where_expr(cond).build(); let mut resp = handle_select(&endpoint, req); - let mut spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let mut spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let row = spliter.next().unwrap(); let (id, name, cnt) = data[2]; let name_datum = name.map(|s| s.as_bytes()).into(); @@ -1504,7 +1504,7 @@ fn test_handle_truncate() { assert!(!resp.has_error()); assert!(!resp.get_warnings().is_empty()); // check data - let mut spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let mut spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let row = spliter.next().unwrap(); let (id, name, cnt) = data[2]; let name_datum = name.map(|s| s.as_bytes()).into(); @@ -1554,7 +1554,7 @@ fn test_default_val() { let req = DAGSelect::from(&tbl).limit(5).build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 4); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 4); for (row, (id, name, cnt)) in spliter.zip(expect) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -1585,7 +1585,7 @@ fn test_output_offsets() { .output_offsets(Some(vec![1])) .build(); let mut resp = handle_select(&endpoint, req); - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 1); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); for (row, (_, name, _)) in spliter.zip(data) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = @@ -1845,7 +1845,7 @@ fn test_copr_bypass_or_access_locks() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(expected_data) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( @@ -1950,7 +1950,7 @@ fn test_rc_read() { let mut resp = handle_select(&endpoint, req); let mut row_count = 0; - let spliter = DAGChunkSpliter::new(resp.take_chunks().into(), 3); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(expected_data.clone()) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 49ecf13c1d9..2bc05726bfc 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -12,7 +12,7 @@ use std::{ }; use engine_traits::{KvEngine, RaftEngineReadOnly}; -use file_system::{IOOp, IOType}; +use file_system::{IoOp, IoType}; use futures::executor::block_on; use grpcio::Environment; use kvproto::raft_serverpb::*; @@ -503,23 +503,23 @@ fn test_inspected_snapshot() { .unwrap() .statistics() .unwrap(); - assert_eq!(stats.fetch(IOType::Replication, IOOp::Read), 0); - assert_eq!(stats.fetch(IOType::Replication, IOOp::Write), 0); + assert_eq!(stats.fetch(IoType::Replication, IoOp::Read), 0); + assert_eq!(stats.fetch(IoType::Replication, IoOp::Write), 0); // Make sure snapshot read hits disk cluster.flush_data(); // Let store 3 inform leader to generate a snapshot. cluster.run_node(3).unwrap(); must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); - assert_ne!(stats.fetch(IOType::Replication, IOOp::Read), 0); - assert_ne!(stats.fetch(IOType::Replication, IOOp::Write), 0); + assert_ne!(stats.fetch(IoType::Replication, IoOp::Read), 0); + assert_ne!(stats.fetch(IoType::Replication, IoOp::Write), 0); pd_client.must_remove_peer(1, new_peer(2, 2)); - assert_eq!(stats.fetch(IOType::LoadBalance, IOOp::Read), 0); - assert_eq!(stats.fetch(IOType::LoadBalance, IOOp::Write), 0); + assert_eq!(stats.fetch(IoType::LoadBalance, IoOp::Read), 0); + assert_eq!(stats.fetch(IoType::LoadBalance, IoOp::Write), 0); pd_client.must_add_peer(1, new_peer(2, 2)); must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); - assert_ne!(stats.fetch(IOType::LoadBalance, IOOp::Read), 0); - assert_ne!(stats.fetch(IOType::LoadBalance, IOOp::Write), 0); + assert_ne!(stats.fetch(IoType::LoadBalance, IoOp::Read), 0); + assert_ne!(stats.fetch(IoType::LoadBalance, IoOp::Write), 0); } // Test snapshot generating and receiving can share one I/O limiter fairly. diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index 21adc354295..3d7fc235cad 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -3,7 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use crossbeam::channel; -use engine_traits::{CFNamesExt, Iterable, Peekable, RaftEngineReadOnly, SyncMutable, CF_RAFT}; +use engine_traits::{CfNamesExt, Iterable, Peekable, RaftEngineReadOnly, SyncMutable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState, StoreIdent}; use protobuf::Message; use raft::eraftpb::MessageType; diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index cd311386769..ec8bf906e1c 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -11,7 +11,7 @@ use engine_rocks::{ RocksSstWriterBuilder, }; use engine_traits::{ - CFOptionsExt, CompactExt, DeleteStrategy, Engines, KvEngine, MiscExt, Range, SstWriter, + CfOptionsExt, CompactExt, DeleteStrategy, Engines, KvEngine, MiscExt, Range, SstWriter, SstWriterBuilder, SyncMutable, CF_DEFAULT, CF_WRITE, }; use keys::data_key; From 84654c87d6def968a197d47babad8e08acdf685a Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 29 Jul 2022 09:01:11 +0800 Subject: [PATCH 0120/1149] raftstore: add more arguments to pre_exec observer (#13158) ref tikv/tikv#12849 Add more arguments to pre_exec observer Signed-off-by: CalvinNeo --- .../raftstore/src/coprocessor/dispatcher.rs | 27 ++++++++++++++----- components/raftstore/src/coprocessor/mod.rs | 10 +++++-- components/raftstore/src/store/fsm/apply.rs | 10 +++++-- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 8122f54b12d..c752e629af1 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -417,13 +417,14 @@ impl CoprocessorHost { } } - pub fn pre_exec(&self, region: &Region, cmd: &RaftCmdRequest) -> bool { + // (index, term) is for the applying entry. + pub fn pre_exec(&self, region: &Region, cmd: &RaftCmdRequest, index: u64, term: u64) -> bool { let mut ctx = ObserverContext::new(region); if !cmd.has_admin_request() { let query = cmd.get_requests(); for observer in &self.registry.query_observers { let observer = observer.observer.inner(); - if observer.pre_exec_query(&mut ctx, query) { + if observer.pre_exec_query(&mut ctx, query, index, term) { return true; } } @@ -432,7 +433,7 @@ impl CoprocessorHost { let admin = cmd.get_admin_request(); for observer in &self.registry.admin_observers { let observer = observer.observer.inner(); - if observer.pre_exec_admin(&mut ctx, admin) { + if observer.pre_exec_admin(&mut ctx, admin, index, term) { return true; } } @@ -665,7 +666,13 @@ mod tests { ctx.bypass = self.bypass.load(Ordering::SeqCst); } - fn pre_exec_admin(&self, ctx: &mut ObserverContext<'_>, _: &AdminRequest) -> bool { + fn pre_exec_admin( + &self, + ctx: &mut ObserverContext<'_>, + _: &AdminRequest, + _: u64, + _: u64, + ) -> bool { self.called.fetch_add(16, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); false @@ -696,7 +703,13 @@ mod tests { ctx.bypass = self.bypass.load(Ordering::SeqCst); } - fn pre_exec_query(&self, ctx: &mut ObserverContext<'_>, _: &[Request]) -> bool { + fn pre_exec_query( + &self, + ctx: &mut ObserverContext<'_>, + _: &[Request], + _: u64, + _: u64, + ) -> bool { self.called.fetch_add(15, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); false @@ -839,12 +852,12 @@ mod tests { let mut query_req = RaftCmdRequest::default(); query_req.set_requests(vec![Request::default()].into()); - host.pre_exec(®ion, &query_req); + host.pre_exec(®ion, &query_req, 0, 0); assert_all!([&ob.called], &[103]); // 15 let mut admin_req = RaftCmdRequest::default(); admin_req.set_admin_request(AdminRequest::default()); - host.pre_exec(®ion, &admin_req); + host.pre_exec(®ion, &admin_req, 0, 0); assert_all!([&ob.called], &[119]); // 16 } diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 8a4975b1459..e7c351262fa 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -96,7 +96,13 @@ pub trait AdminObserver: Coprocessor { /// Hook before exec admin request, returns whether we should skip this /// admin. - fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, _: &AdminRequest) -> bool { + fn pre_exec_admin( + &self, + _: &mut ObserverContext<'_>, + _: &AdminRequest, + _: u64, + _: u64, + ) -> bool { false } @@ -135,7 +141,7 @@ pub trait QueryObserver: Coprocessor { /// Hook before exec write request, returns whether we should skip this /// write. - fn pre_exec_query(&self, _: &mut ObserverContext<'_>, _: &[Request]) -> bool { + fn pre_exec_query(&self, _: &mut ObserverContext<'_>, _: &[Request], _: u64, _: u64) -> bool { false } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 284015b0eb8..1b64c9a2787 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1273,7 +1273,7 @@ where // E.g. `RaftApplyState` must not be changed. let mut origin_epoch = None; - let (resp, exec_result) = if ctx.host.pre_exec(&self.region, req) { + let (resp, exec_result) = if ctx.host.pre_exec(&self.region, req, index, term) { // One of the observers want to filter execution of the command. let mut resp = RaftCmdResponse::default(); if !req.get_header().get_uuid().is_empty() { @@ -5000,7 +5000,13 @@ mod tests { } } - fn pre_exec_admin(&self, _: &mut ObserverContext<'_>, req: &AdminRequest) -> bool { + fn pre_exec_admin( + &self, + _: &mut ObserverContext<'_>, + req: &AdminRequest, + _: u64, + _: u64, + ) -> bool { let cmd_type = req.get_cmd_type(); if cmd_type == AdminCmdType::CompactLog && self.filter_compact_log.deref().load(Ordering::SeqCst) From 1e13ddf3bf12c00afb4d049d05978fecae9a6067 Mon Sep 17 00:00:00 2001 From: Zwb Date: Fri, 29 Jul 2022 15:11:12 +0800 Subject: [PATCH 0121/1149] Make max_subcompactions dynamically changeable (#13151) close tikv/tikv#13145 Make max_subcompactions dynamically changeable Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- src/config.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index 6c345b8b773..23dea43d47a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1239,6 +1239,15 @@ impl DbConfig { ) .into()); } + if self.max_sub_compactions == 0 + || self.max_sub_compactions as i32 > self.max_background_jobs + { + return Err(format!( + "max_sub_compactions should be greater than 0 and less than or equal to {:?}", + self.max_background_jobs, + ) + .into()); + } if self.max_background_flushes <= 0 || self.max_background_flushes > limit { return Err(format!( "max_background_flushes should be greater than 0 and less than or equal to {:?}", @@ -1375,7 +1384,6 @@ pub struct RaftDbConfig { pub info_log_dir: String, #[online_config(skip)] pub info_log_level: RocksLogLevel, - #[online_config(skip)] pub max_sub_compactions: u32, pub writable_file_max_buffer_size: ReadableSize, #[online_config(skip)] @@ -1678,6 +1686,11 @@ impl> DbConfigManger { Ok(()) } + fn set_max_subcompactions(&self, max_subcompactions: u32) -> Result<(), Box> { + self.set_db_config(&[("max_subcompactions", &max_subcompactions.to_string())])?; + Ok(()) + } + fn validate_cf(&self, cf: &str) -> Result<(), Box> { match (self.db_type, cf) { (DbType::Kv, CF_DEFAULT) @@ -1740,6 +1753,14 @@ impl + Send + Sync> ConfigManager for DbConfigMan self.set_max_background_jobs(max_background_jobs)?; } + if let Some(background_subcompactions_config) = change + .drain_filter(|(name, _)| name == "max_sub_compactions") + .next() + { + let max_subcompactions = background_subcompactions_config.1.into(); + self.set_max_subcompactions(max_subcompactions)?; + } + if let Some(background_flushes_config) = change .drain_filter(|(name, _)| name == "max_background_flushes") .next() From 2f42bc9ce1a2e457ec2a49820ab56ae52adcc6d2 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 29 Jul 2022 17:47:13 +0800 Subject: [PATCH 0122/1149] tests: do not use `assert!(r.is_ok())` in tests (#13009) ref tikv/tikv#13008 None Signed-off-by: tabokie --- components/backup-stream/src/errors.rs | 5 +- components/backup/src/endpoint.rs | 3 +- .../cdc/tests/failpoints/test_endpoint.rs | 4 +- components/cloud/aws/src/s3.rs | 31 +++---- .../concurrency_manager/src/lock_table.rs | 28 +++--- components/encryption/src/file_dict_file.rs | 5 +- components/encryption/src/io.rs | 19 ++-- components/encryption/src/manager/mod.rs | 3 +- components/engine_traits/src/engine.rs | 2 +- .../src/scenario_writes.rs | 3 +- components/external_storage/src/local.rs | 17 ++-- .../raftstore/src/coprocessor/config.rs | 12 +-- .../src/coprocessor/split_observer.rs | 7 +- components/raftstore/src/store/bootstrap.rs | 10 +-- components/raftstore/src/store/config.rs | 32 +++---- components/raftstore/src/store/fsm/apply.rs | 2 +- components/raftstore/src/store/txn_ext.rs | 18 ++-- components/resource_metering/src/config.rs | 4 +- components/sst_importer/src/sst_importer.rs | 1 - components/tidb_query_aggr/src/lib.rs | 58 +++++------- .../tidb_query_datatype/src/codec/convert.rs | 7 +- .../src/codec/mysql/decimal.rs | 16 ++-- .../src/codec/mysql/json/serde.rs | 8 +- .../tidb_query_datatype/src/codec/table.rs | 6 +- .../tidb_query_datatype/src/expr/ctx.rs | 16 ++-- .../src/table_scan_executor.rs | 4 +- .../tidb_query_expr/src/impl_compare_in.rs | 17 ++-- .../tidb_query_expr/src/types/expr_builder.rs | 90 ++++++------------- .../tidb_query_expr/src/types/expr_eval.rs | 51 ++++++----- components/tikv_util/src/config.rs | 15 ++-- components/tikv_util/src/mpsc/batch.rs | 8 +- components/tikv_util/src/worker/mod.rs | 2 +- .../tikv_util/src/yatp_pool/future_pool.rs | 8 +- src/config.rs | 48 +++++----- .../interceptors/concurrency_limiter.rs | 13 ++- src/coprocessor/interceptors/deadline.rs | 10 ++- src/read_pool.rs | 18 ++-- src/server/debug.rs | 2 +- src/server/engine_factory_v2.rs | 12 +-- src/server/gc_worker/gc_worker.rs | 46 +++++----- src/server/resolve.rs | 4 +- src/server/server.rs | 2 +- src/server/service/diagnostics/log.rs | 1 - src/server/status_server/mod.rs | 4 +- src/server/status_server/profile.rs | 6 +- src/storage/config.rs | 4 +- src/storage/mod.rs | 22 ++--- src/storage/mvcc/consistency_check.rs | 2 +- src/storage/mvcc/mod.rs | 5 +- src/storage/mvcc/txn.rs | 10 +-- src/storage/txn/scheduler.rs | 8 +- src/storage/txn/store.rs | 48 +++++----- tests/benches/misc/raftkv/mod.rs | 2 +- tests/failpoints/cases/test_disk_full.rs | 2 +- tests/failpoints/cases/test_merge.rs | 72 +++++++-------- tests/failpoints/cases/test_pd_client.rs | 1 - tests/failpoints/cases/test_split_region.rs | 18 ++-- tests/failpoints/cases/test_storage.rs | 2 +- tests/failpoints/cases/test_transaction.rs | 14 ++- .../failpoints/cases/test_transfer_leader.rs | 90 +++++++++---------- .../integrations/config/test_config_client.rs | 4 +- tests/integrations/pd/test_rpc_client.rs | 4 +- tests/integrations/raftstore/test_merge.rs | 38 ++++---- tests/integrations/raftstore/test_multi.rs | 30 +++---- .../raftstore/test_replica_read.rs | 4 +- .../raftstore/test_split_region.rs | 14 ++- .../raftstore/test_transfer_leader.rs | 22 ++--- tests/integrations/server/gc_worker.rs | 2 +- tests/integrations/server/kv_service.rs | 2 +- tests/integrations/server/security.rs | 3 +- tests/integrations/server/status_server.rs | 2 +- .../integrations/storage/test_raft_storage.rs | 5 +- tests/integrations/storage/test_titan.rs | 30 +++---- 73 files changed, 502 insertions(+), 636 deletions(-) diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index b049b0a29be..493cf28babc 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -285,8 +285,9 @@ mod test { b.iter(|| { let result: Result<()> = Ok(()); let lucky_number = rand::random::(); - let result = result.context_with(|| format!("lucky: the number is {}", lucky_number)); - assert!(result.is_ok()); + result + .context_with(|| format!("lucky: the number is {}", lucky_number)) + .unwrap(); }) } } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 7af38d12ac4..35a08c81a2d 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -1676,8 +1676,7 @@ pub mod tests { dst_user_key.as_encoded(), dst_value, ); - let ret = engine.put(&ctx, key, value); - assert!(ret.is_ok()); + engine.put(&ctx, key, value).unwrap(); i += 1; } // flush to disk so that read requests can be traced by TiKV limiter. diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index 2e9375ce6a5..9a1053681f1 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -309,9 +309,9 @@ fn do_test_no_resolved_ts_before_downstream_initialized(version: &str) { // The first downstream can receive timestamps but the second should receive // nothing. let mut rx = event_feeds[0].replace(None).unwrap(); - assert!(recv_timeout(&mut rx, Duration::from_secs(1)).is_ok()); + recv_timeout(&mut rx, Duration::from_secs(1)).unwrap(); let mut rx = event_feeds[1].replace(None).unwrap(); - assert!(recv_timeout(&mut rx, Duration::from_secs(3)).is_err()); + recv_timeout(&mut rx, Duration::from_secs(3)).unwrap_err(); }); th.join().unwrap(); diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index fd5c07c5097..ef13749ccea 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -651,14 +651,13 @@ mod tests { let s = S3Storage::new_creds_dispatcher(config, dispatcher, credentials_provider).unwrap(); - let resp = s - .put( - "mykey", - PutResource(Box::new(magic_contents.as_bytes())), - magic_contents.len() as u64, - ) - .await; - assert!(resp.is_ok()); + s.put( + "mykey", + PutResource(Box::new(magic_contents.as_bytes())), + magic_contents.len() as u64, + ) + .await + .unwrap(); assert_eq!( CLOUD_REQUEST_HISTOGRAM_VEC .get_metric_with_label_values(&["s3", "upload_part"]) @@ -739,17 +738,15 @@ mod tests { // inject 50ms delay fail::cfg(s3_sleep_injected_fp, "return(50)").unwrap(); - let resp = s - .put( - "mykey", - PutResource(Box::new(magic_contents.as_bytes())), - magic_contents.len() as u64, - ) - .await; + s.put( + "mykey", + PutResource(Box::new(magic_contents.as_bytes())), + magic_contents.len() as u64, + ) + .await + .unwrap(); fail::remove(s3_sleep_injected_fp); fail::remove(s3_timeout_injected_fp); - // no timeout - assert!(resp.is_ok()); } #[test] diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index da08d9983d1..4169537840e 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -172,7 +172,7 @@ mod test { let key_k = Key::from_raw(b"k"); // no lock found - assert!(lock_table.check_key(&key_k, |_| Err(())).is_ok()); + lock_table.check_key(&key_k, |_| Err(())).unwrap(); let lock = Lock::new( LockType::Lock, @@ -190,7 +190,7 @@ mod test { }); // lock passes check_fn - assert!(lock_table.check_key(&key_k, |l| ts_check(l, 5)).is_ok()); + lock_table.check_key(&key_k, |l| ts_check(l, 5)).unwrap(); // lock does not pass check_fn assert_eq!(lock_table.check_key(&key_k, |l| ts_check(l, 20)), Err(lock)); @@ -231,22 +231,18 @@ mod test { }); // no lock found - assert!( - lock_table - .check_range( - Some(&Key::from_raw(b"m")), - Some(&Key::from_raw(b"n")), - |_, _| Err(()) - ) - .is_ok() - ); + lock_table + .check_range( + Some(&Key::from_raw(b"m")), + Some(&Key::from_raw(b"n")), + |_, _| Err(()), + ) + .unwrap(); // lock passes check_fn - assert!( - lock_table - .check_range(None, Some(&Key::from_raw(b"z")), |_, l| ts_check(l, 5)) - .is_ok() - ); + lock_table + .check_range(None, Some(&Key::from_raw(b"z")), |_, l| ts_check(l, 5)) + .unwrap(); // first lock does not pass check_fn assert_eq!( diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index 653fbf8dbbb..4a2609cacb5 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -600,10 +600,9 @@ mod tests { // Try open as v1 file. Should success. { let file_dict_file = EncryptedFile::new(tempdir.path(), "test_file_dict_file"); - let file_bytes = file_dict_file.read(&PlaintextBackend::default()); - assert!(file_bytes.is_ok()); + let file_bytes = file_dict_file.read(&PlaintextBackend::default()).unwrap(); let mut file_dict = FileDictionary::default(); - file_dict.merge_from_bytes(&file_bytes.unwrap()).unwrap(); + file_dict.merge_from_bytes(&file_bytes).unwrap(); assert_eq!(*file_dict.files.get("f1").unwrap(), info1); assert_eq!(file_dict.files.get("f2"), None); assert_eq!(file_dict.files.get("f3"), None); diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index d2c5b6d1546..e02aafabe88 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -694,9 +694,8 @@ mod tests { buf: &mut [u8], ) -> Poll> { let len = min(self.read_maxsize_once, buf.len()); - let r = self.cursor.read(&mut buf[..len]); - assert!(r.is_ok()); - Poll::Ready(IoResult::Ok(r.unwrap())) + let r = self.cursor.read(&mut buf[..len]).unwrap(); + Poll::Ready(IoResult::Ok(r)) } } @@ -727,11 +726,10 @@ mod tests { let mut encrypt_read_len = 0; loop { - let s = encrypt_reader + let read_len = encrypt_reader .read(&mut encrypt_text[encrypt_read_len..]) - .await; - assert!(s.is_ok()); - let read_len = s.unwrap(); + .await + .unwrap(); if read_len == 0 { break; } @@ -757,11 +755,10 @@ mod tests { .unwrap(); loop { - let s = decrypt_reader + let read_len = decrypt_reader .read(&mut decrypt_text[decrypt_read_len..]) - .await; - assert!(s.is_ok()); - let read_len = s.unwrap(); + .await + .unwrap(); if read_len == 0 { break; } diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index a45f6153358..0dcdbffdb95 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -1299,8 +1299,7 @@ mod tests { // return errors. assert!(result.is_err()); let previous = Box::new(PlaintextBackend::default()) as Box; - let result = new_key_manager(&tmp_dir, None, right_key, previous); - assert!(result.is_ok()); + new_key_manager(&tmp_dir, None, right_key, previous).unwrap(); } #[test] diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 276fb1ed19a..dc09b54fb6e 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -309,7 +309,7 @@ mod tests { fn test_tablet_error_collector_ok() { let mut err = TabletErrorCollector::new(); err.add_result(1, 1, Ok(())); - assert!(err.take_result().is_ok()); + err.take_result().unwrap(); assert_eq!(err.get_error_count(), 0); } diff --git a/components/engine_traits_tests/src/scenario_writes.rs b/components/engine_traits_tests/src/scenario_writes.rs index c9b1b1d5fb7..eb05c107c1d 100644 --- a/components/engine_traits_tests/src/scenario_writes.rs +++ b/components/engine_traits_tests/src/scenario_writes.rs @@ -213,8 +213,7 @@ scenario_test! { put_get { scenario_test! { delete_none { let db = write_scenario_engine(); - let res = db.delete(b"foo"); - assert!(res.is_ok()); + db.delete(b"foo").unwrap(); }} scenario_test! { delete { diff --git a/components/external_storage/src/local.rs b/components/external_storage/src/local.rs index f246c808b86..80c22929525 100644 --- a/components/external_storage/src/local.rs +++ b/components/external_storage/src/local.rs @@ -227,18 +227,15 @@ mod tests { let filename = "existed.file"; let buf1: &[u8] = b"pingcap"; let buf2: &[u8] = b"tikv"; - let r = ls - .write(filename, UnpinReader(Box::new(buf1)), buf1.len() as _) - .await; - assert!(r.is_ok()); - let r = ls - .write(filename, UnpinReader(Box::new(buf2)), buf2.len() as _) - .await; - assert!(r.is_ok()); + ls.write(filename, UnpinReader(Box::new(buf1)), buf1.len() as _) + .await + .unwrap(); + ls.write(filename, UnpinReader(Box::new(buf2)), buf2.len() as _) + .await + .unwrap(); let mut read_buff: Vec = Vec::new(); - let r = ls.read(filename).read_to_end(&mut read_buff).await; - assert!(r.is_ok()); + ls.read(filename).read_to_end(&mut read_buff).await.unwrap(); assert_eq!(read_buff.len(), 4); assert_eq!(&read_buff, buf2); } diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index 1087b18c287..fb1fc35345f 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -209,34 +209,34 @@ mod tests { cfg = Config::default(); cfg.region_max_size = Some(ReadableSize(10)); cfg.region_split_size = ReadableSize(20); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); cfg = Config::default(); cfg.region_max_size = None; cfg.region_split_size = ReadableSize(20); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); assert_eq!(cfg.region_max_size, Some(ReadableSize(30))); cfg = Config::default(); cfg.region_max_keys = Some(10); cfg.region_split_keys = Some(20); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); cfg = Config::default(); cfg.region_max_keys = None; cfg.region_split_keys = Some(20); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); assert_eq!(cfg.region_max_keys, Some(30)); cfg = Config::default(); cfg.enable_region_bucket = false; cfg.region_split_size = ReadableSize(20); cfg.region_bucket_size = ReadableSize(30); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); cfg = Config::default(); cfg.region_split_size = ReadableSize::mb(20); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); assert_eq!(cfg.region_split_keys, Some(200000)); } } diff --git a/components/raftstore/src/coprocessor/split_observer.rs b/components/raftstore/src/coprocessor/split_observer.rs index e763c83a37c..7f844f4b069 100644 --- a/components/raftstore/src/coprocessor/split_observer.rs +++ b/components/raftstore/src/coprocessor/split_observer.rs @@ -240,14 +240,13 @@ mod tests { let observer = SplitObserver; - let resp = observer.pre_propose_admin(&mut ctx, &mut req); // since no split is defined, actual coprocessor won't be invoke. - assert!(resp.is_ok()); + observer.pre_propose_admin(&mut ctx, &mut req).unwrap(); assert!(!req.has_split(), "only split req should be handle."); req = new_split_request(new_row_key(1, 2, 0)); // For compatible reason, split should supported too. - assert!(observer.pre_propose_admin(&mut ctx, &mut req).is_ok()); + observer.pre_propose_admin(&mut ctx, &mut req).unwrap(); // Empty key should be skipped. let mut split_keys = vec![vec![]]; @@ -257,7 +256,7 @@ mod tests { req = new_batch_split_request(split_keys.clone()); // Although invalid keys should be skipped, but if all keys are // invalid, errors should be reported. - assert!(observer.pre_propose_admin(&mut ctx, &mut req).is_err()); + observer.pre_propose_admin(&mut ctx, &mut req).unwrap_err(); let mut key = new_row_key(1, 2, 0); let mut expected_key = key[..key.len() - 8].to_vec(); diff --git a/components/raftstore/src/store/bootstrap.rs b/components/raftstore/src/store/bootstrap.rs index 1ee8e9ddc10..f6e3a266f01 100644 --- a/components/raftstore/src/store/bootstrap.rs +++ b/components/raftstore/src/store/bootstrap.rs @@ -143,10 +143,10 @@ mod tests { let engines = Engines::new(kv_engine.clone(), raft_engine.clone()); let region = initial_region(1, 1, 1); - assert!(bootstrap_store(&engines, 1, 1).is_ok()); - assert!(bootstrap_store(&engines, 1, 1).is_err()); + bootstrap_store(&engines, 1, 1).unwrap(); + bootstrap_store(&engines, 1, 1).unwrap_err(); - assert!(prepare_bootstrap_cluster(&engines, ®ion).is_ok()); + prepare_bootstrap_cluster(&engines, ®ion).unwrap(); assert!( kv_engine .get_value(keys::PREPARE_BOOTSTRAP_KEY) @@ -167,8 +167,8 @@ mod tests { ); assert!(raft_engine.get_raft_state(1).unwrap().is_some()); - assert!(clear_prepare_bootstrap_key(&engines).is_ok()); - assert!(clear_prepare_bootstrap_cluster(&engines, 1).is_ok()); + clear_prepare_bootstrap_key(&engines).unwrap(); + clear_prepare_bootstrap_cluster(&engines, 1).unwrap(); assert!( is_range_empty( &kv_engine, diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 5d7d89bbc7b..6b59eaf71bb 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -1023,10 +1023,8 @@ mod tests { cfg = Config::new(); cfg.raft_log_gc_size_limit = None; - assert!( - cfg.validate(ReadableSize(20), false, ReadableSize(0)) - .is_ok() - ); + cfg.validate(ReadableSize(20), false, ReadableSize(0)) + .unwrap(); assert_eq!(cfg.raft_log_gc_size_limit, Some(ReadableSize(15))); cfg = Config::new(); @@ -1042,10 +1040,8 @@ mod tests { cfg = Config::new(); cfg.raft_log_gc_count_limit = None; - assert!( - cfg.validate(ReadableSize::mb(1), false, ReadableSize(0)) - .is_ok() - ); + cfg.validate(ReadableSize::mb(1), false, ReadableSize(0)) + .unwrap(); assert_eq!(cfg.raft_log_gc_count_limit, Some(768)); cfg = Config::new(); @@ -1098,13 +1094,13 @@ mod tests { cfg = Config::new(); cfg.hibernate_regions = true; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(256)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(256)); cfg = Config::new(); cfg.hibernate_regions = false; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(1024)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(256)); @@ -1112,7 +1108,7 @@ mod tests { cfg.hibernate_regions = true; cfg.store_batch_system.max_batch_size = Some(123); cfg.apply_batch_system.max_batch_size = Some(234); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(123)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(234)); @@ -1134,7 +1130,7 @@ mod tests { cfg.hibernate_regions = true; cfg.max_peer_down_duration = ReadableDuration::minutes(5); cfg.peer_stale_state_check_interval = ReadableDuration::minutes(5); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.max_peer_down_duration, ReadableDuration::minutes(10)); cfg = Config::new(); @@ -1143,7 +1139,7 @@ mod tests { cfg.raft_max_size_per_msg = ReadableSize::gb(64); assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); cfg.raft_max_size_per_msg = ReadableSize::gb(3); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg = Config::new(); cfg.raft_entry_max_size = ReadableSize(0); @@ -1151,23 +1147,23 @@ mod tests { cfg.raft_entry_max_size = ReadableSize::mb(3073); assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); cfg.raft_entry_max_size = ReadableSize::gb(3); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg = Config::new(); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_ok()); + cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 16); cfg = Config::new(); - assert!(cfg.validate(split_size, true, split_size / 8).is_ok()); + cfg.validate(split_size, true, split_size / 8).unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 16); cfg = Config::new(); - assert!(cfg.validate(split_size, true, split_size / 20).is_ok()); + cfg.validate(split_size, true, split_size / 20).unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 20); cfg = Config::new(); cfg.region_split_check_diff = Some(ReadableSize(1)); - assert!(cfg.validate(split_size, true, split_size / 20).is_ok()); + cfg.validate(split_size, true, split_size / 20).unwrap(); assert_eq!(cfg.region_split_check_diff(), ReadableSize(1)); } } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 1b64c9a2787..3b9546a460c 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -4728,7 +4728,7 @@ mod tests { }); let cc_resp = cc_rx.try_recv().unwrap(); assert!(cc_resp.get_header().get_error().has_stale_command()); - assert!(rx.recv_timeout(Duration::from_secs(3)).is_ok()); + rx.recv_timeout(Duration::from_secs(3)).unwrap(); // Make sure Apply and Snapshot are in the same batch. let (snap_tx, _) = mpsc::sync_channel(0); diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 7b681506f63..078d3114060 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -338,10 +338,10 @@ mod tests { let k3 = Key::from_raw(b"k333"); // Test the memory size of peer pessimistic locks after inserting. - assert!(locks1.insert(vec![(k1.clone(), lock(b"k1"))]).is_ok()); + locks1.insert(vec![(k1.clone(), lock(b"k1"))]).unwrap(); assert_eq!(locks1.get(&k1), Some(&(lock(b"k1"), false))); assert_eq!(locks1.memory_size, k1.len() + lock(b"k1").memory_size()); - assert!(locks1.insert(vec![(k2.clone(), lock(b"k1"))]).is_ok()); + locks1.insert(vec![(k2.clone(), lock(b"k1"))]).unwrap(); assert_eq!(locks1.get(&k2), Some(&(lock(b"k1"), false))); assert_eq!( locks1.memory_size, @@ -349,7 +349,7 @@ mod tests { ); // Test the global memory size after inserting. - assert!(locks2.insert(vec![(k3.clone(), lock(b"k1"))]).is_ok()); + locks2.insert(vec![(k3.clone(), lock(b"k1"))]).unwrap(); assert_eq!(locks2.get(&k3), Some(&(lock(b"k1"), false))); assert_eq!( GLOBAL_MEM_SIZE.get() as usize, @@ -357,7 +357,7 @@ mod tests { ); // Test the memory size after replacing, it should not change. - assert!(locks1.insert(vec![(k2.clone(), lock(b"k2"))]).is_ok()); + locks1.insert(vec![(k2.clone(), lock(b"k2"))]).unwrap(); assert_eq!(locks1.get(&k2), Some(&(lock(b"k2"), false))); assert_eq!( locks1.memory_size, @@ -395,12 +395,14 @@ mod tests { defer!(GLOBAL_MEM_SIZE.set(0)); let mut locks = PeerPessimisticLocks::default(); - let res = locks.insert(vec![(Key::from_raw(b"k1"), lock(&[0; 512000]))]); - assert!(res.is_ok()); + locks + .insert(vec![(Key::from_raw(b"k1"), lock(&[0; 512000]))]) + .unwrap(); // Exceeding the region limit - let res = locks.insert(vec![(Key::from_raw(b"k2"), lock(&[0; 32000]))]); - assert!(res.is_err()); + locks + .insert(vec![(Key::from_raw(b"k2"), lock(&[0; 32000]))]) + .unwrap_err(); assert!(locks.get(&Key::from_raw(b"k2")).is_none()); // Not exceeding the region limit, but exceeding the global limit diff --git a/components/resource_metering/src/config.rs b/components/resource_metering/src/config.rs index 90b09588e3a..69d7c78cb2f 100644 --- a/components/resource_metering/src/config.rs +++ b/components/resource_metering/src/config.rs @@ -133,14 +133,14 @@ mod tests { #[test] fn test_config_validate() { let cfg = Config::default(); - assert!(cfg.validate().is_ok()); // Empty address is allowed. + cfg.validate().unwrap(); // Empty address is allowed. let cfg = Config { receiver_address: "127.0.0.1:6666".to_string(), report_receiver_interval: ReadableDuration::minutes(1), max_resource_groups: 2000, precision: ReadableDuration::secs(1), }; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let cfg = Config { receiver_address: "127.0.0.1:6666".to_string(), report_receiver_interval: ReadableDuration::days(999), // invalid diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index b6d13ac9761..71a58a33dc3 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -966,7 +966,6 @@ mod tests { // test with tde let tmp_dir = tempfile::TempDir::new().unwrap(); let key_manager = new_test_key_manager(&tmp_dir, None, None, None); - assert!(key_manager.is_ok()); (tmp_dir, Arc::new(key_manager.unwrap().unwrap())) } diff --git a/components/tidb_query_aggr/src/lib.rs b/components/tidb_query_aggr/src/lib.rs index b9d73b2773a..1eda14a0697 100644 --- a/components/tidb_query_aggr/src/lib.rs +++ b/components/tidb_query_aggr/src/lib.rs @@ -416,22 +416,18 @@ mod tests { let mut s = AggrFnStateFoo::new(); // Update using `Int` should success. - assert!( - update!( - &mut s as &mut dyn AggrFunctionStateUpdatePartial<_>, - &mut ctx, - Some(&1) - ) - .is_ok() - ); - assert!( - update!( - &mut s as &mut dyn AggrFunctionStateUpdatePartial<_>, - &mut ctx, - Some(&3) - ) - .is_ok() - ); + update!( + &mut s as &mut dyn AggrFunctionStateUpdatePartial<_>, + &mut ctx, + Some(&1) + ) + .unwrap(); + update!( + &mut s as &mut dyn AggrFunctionStateUpdatePartial<_>, + &mut ctx, + Some(&3) + ) + .unwrap(); // Update using other data type should panic. let result = panic_hook::recover_safe(|| { @@ -457,27 +453,21 @@ mod tests { // Push result to Real VectorValue should success. let mut target = vec![VectorValue::with_capacity(0, EvalType::Real)]; - assert!( - (&mut s as &mut dyn AggrFunctionState) - .push_result(&mut ctx, &mut target) - .is_ok() - ); + (&mut s as &mut dyn AggrFunctionState) + .push_result(&mut ctx, &mut target) + .unwrap(); assert_eq!(target[0].to_real_vec(), &[Real::new(4.0).ok()]); // Calling push result multiple times should also success. - assert!( - update!( - &mut s as &mut dyn AggrFunctionStateUpdatePartial<_>, - &mut ctx, - Some(&1) - ) - .is_ok() - ); - assert!( - (&mut s as &mut dyn AggrFunctionState) - .push_result(&mut ctx, &mut target) - .is_ok() - ); + update!( + &mut s as &mut dyn AggrFunctionStateUpdatePartial<_>, + &mut ctx, + Some(&1) + ) + .unwrap(); + (&mut s as &mut dyn AggrFunctionState) + .push_result(&mut ctx, &mut target) + .unwrap(); assert_eq!( target[0].to_real_vec(), &[Real::new(4.0).ok(), Real::new(5.0).ok()] diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index c576f14ee5f..41f0794950d 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -1965,20 +1965,17 @@ mod tests { let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING))); let val: Result = b"".to_vec().convert(&mut ctx); - assert!(val.is_ok()); assert_eq!(val.unwrap(), 0.0); assert_eq!(ctx.warnings.warnings.len(), 1); let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING))); let val: Result = b"1.1a".to_vec().convert(&mut ctx); - assert!(val.is_ok()); assert_eq!(val.unwrap(), 1.1); assert_eq!(ctx.warnings.warnings.len(), 1); // IGNORE_TRUNCATE let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::IGNORE_TRUNCATE))); let val: Result = b"1.2a".to_vec().convert(&mut ctx); - assert!(val.is_ok()); assert_eq!(val.unwrap(), 1.2); assert_eq!(ctx.warnings.warnings.len(), 0); } @@ -2356,9 +2353,7 @@ mod tests { for (dec, flen, decimal, want) in cases { ft.set_flen(flen); ft.set_decimal(decimal); - let nd = produce_dec_with_specified_tp(&mut ctx, dec, &ft); - assert!(nd.is_ok()); - let nd = nd.unwrap(); + let nd = produce_dec_with_specified_tp(&mut ctx, dec, &ft).unwrap(); assert_eq!(nd, want, "{}, {}, {}, {}, {}", dec, nd, want, flen, decimal); } } diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 2518e003ba3..7cd1c239bb1 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -3734,11 +3734,9 @@ mod tests { ))); let truncated_res = Res::Truncated(2333); - assert!( - truncated_res - .into_result_impl(&mut ctx, Some(Error::truncated()), None) - .is_ok() - ); + truncated_res + .into_result_impl(&mut ctx, Some(Error::truncated()), None) + .unwrap(); // Overflow cases let mut ctx = EvalContext::default(); @@ -3757,10 +3755,8 @@ mod tests { Flag::OVERFLOW_AS_WARNING, ))); let error = Error::overflow("", ""); - assert!( - overflow_res - .into_result_impl(&mut ctx, None, Some(error)) - .is_ok() - ); + overflow_res + .into_result_impl(&mut ctx, None, Some(error)) + .unwrap(); } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs index 19fec765d1c..a4c33944e21 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs @@ -240,8 +240,7 @@ mod tests { ]; for json_str in legal_cases { - let resp = Json::from_str(json_str); - assert!(resp.is_ok()); + Json::from_str(json_str).unwrap(); } let cases = vec![ @@ -256,9 +255,8 @@ mod tests { ]; for (json_str, json) in cases { - let resp = Json::from_str(json_str); - assert!(resp.is_ok()); - assert_eq!(resp.unwrap(), json.unwrap()); + let resp = Json::from_str(json_str).unwrap(); + assert_eq!(resp, json.unwrap()); } let illegal_cases = vec!["[pxx,apaa]", "hpeheh", ""]; diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 2cb2f055842..052ad8bf927 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -790,7 +790,7 @@ mod tests { let mut range = KeyRange::default(); range.set_start(small_key.clone()); range.set_end(large_key.clone()); - assert!(check_table_ranges(&[range]).is_ok()); + check_table_ranges(&[range]).unwrap(); // test range.start > range.end let mut range = KeyRange::default(); range.set_end(small_key.clone()); @@ -819,13 +819,13 @@ mod tests { #[test] fn test_check_key_type() { let record_key = encode_row_key(TABLE_ID, 1); - assert!(check_key_type(record_key.as_slice(), RECORD_PREFIX_SEP).is_ok()); + check_key_type(record_key.as_slice(), RECORD_PREFIX_SEP).unwrap(); assert!(check_key_type(record_key.as_slice(), INDEX_PREFIX_SEP).is_err()); let (_, index_key) = generate_index_data_for_test(TABLE_ID, INDEX_ID, 1, &Datum::I64(1), true); assert!(check_key_type(index_key.as_slice(), RECORD_PREFIX_SEP).is_err()); - assert!(check_key_type(index_key.as_slice(), INDEX_PREFIX_SEP).is_ok()); + check_key_type(index_key.as_slice(), INDEX_PREFIX_SEP).unwrap(); let too_small_key = vec![0]; assert!(check_key_type(too_small_key.as_slice(), RECORD_PREFIX_SEP).is_err()); diff --git a/components/tidb_query_datatype/src/expr/ctx.rs b/components/tidb_query_datatype/src/expr/ctx.rs index 0e488689fce..a3e175a3867 100644 --- a/components/tidb_query_datatype/src/expr/ctx.rs +++ b/components/tidb_query_datatype/src/expr/ctx.rs @@ -335,19 +335,19 @@ mod tests { fn test_handle_truncate() { // ignore_truncate = false, truncate_as_warning = false let mut ctx = EvalContext::new(Arc::new(EvalConfig::new())); - assert!(ctx.handle_truncate(false).is_ok()); + ctx.handle_truncate(false).unwrap(); assert!(ctx.handle_truncate(true).is_err()); assert!(ctx.take_warnings().warnings.is_empty()); // ignore_truncate = false; let mut ctx = EvalContext::new(Arc::new(EvalConfig::default_for_test())); - assert!(ctx.handle_truncate(false).is_ok()); - assert!(ctx.handle_truncate(true).is_ok()); + ctx.handle_truncate(false).unwrap(); + ctx.handle_truncate(true).unwrap(); assert!(ctx.take_warnings().warnings.is_empty()); // ignore_truncate = false, truncate_as_warning = true let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING))); - assert!(ctx.handle_truncate(false).is_ok()); - assert!(ctx.handle_truncate(true).is_ok()); + ctx.handle_truncate(false).unwrap(); + ctx.handle_truncate(true).unwrap(); assert!(!ctx.take_warnings().warnings.is_empty()); } @@ -355,11 +355,11 @@ mod tests { fn test_max_warning_cnt() { let eval_cfg = Arc::new(EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING)); let mut ctx = EvalContext::new(Arc::clone(&eval_cfg)); - assert!(ctx.handle_truncate(true).is_ok()); - assert!(ctx.handle_truncate(true).is_ok()); + ctx.handle_truncate(true).unwrap(); + ctx.handle_truncate(true).unwrap(); assert_eq!(ctx.take_warnings().warnings.len(), 2); for _ in 0..2 * DEFAULT_MAX_WARNING_CNT { - assert!(ctx.handle_truncate(true).is_ok()); + ctx.handle_truncate(true).unwrap(); } let warnings = ctx.take_warnings(); assert_eq!(warnings.warning_cnt, 2 * DEFAULT_MAX_WARNING_CNT); diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index c2c310b4018..a4f7e957663 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -1136,7 +1136,7 @@ mod tests { .unwrap(); let mut result = executor.next_batch(1); - assert!(result.is_drained.is_ok()); + result.is_drained.unwrap(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_decoded()); @@ -1196,7 +1196,7 @@ mod tests { .unwrap(); let mut result = executor.next_batch(10); - assert!(result.is_drained.is_ok()); + result.is_drained.unwrap(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 2); assert!(result.physical_columns[0].is_decoded()); diff --git a/components/tidb_query_expr/src/impl_compare_in.rs b/components/tidb_query_expr/src/impl_compare_in.rs index d518c9061a0..312943a276a 100644 --- a/components/tidb_query_expr/src/impl_compare_in.rs +++ b/components/tidb_query_expr/src/impl_compare_in.rs @@ -821,14 +821,15 @@ mod tests { let logical_rows: &[usize] = &(0..1024).collect::>(); profiler::start("./bench_compare_in.profile"); b.iter(|| { - let result = black_box(&exp).eval( - black_box(&mut ctx), - black_box(schema), - black_box(&mut columns), - black_box(logical_rows), - black_box(1024), - ); - assert!(result.is_ok()); + black_box(&exp) + .eval( + black_box(&mut ctx), + black_box(schema), + black_box(&mut columns), + black_box(logical_rows), + black_box(1024), + ) + .unwrap(); }); profiler::stop(); } diff --git a/components/tidb_query_expr/src/types/expr_builder.rs b/components/tidb_query_expr/src/types/expr_builder.rs index 33c9d48de67..0546fe43f08 100644 --- a/components/tidb_query_expr/src/types/expr_builder.rs +++ b/components/tidb_query_expr/src/types/expr_builder.rs @@ -584,39 +584,34 @@ mod tests { .push_child(ExprDefBuilder::constant_int(1)) .push_child(ExprDefBuilder::constant_real(3.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_ok()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap(); // Incorrect return type let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsTime, FieldTypeTp::LongLong) .push_child(ExprDefBuilder::constant_int(1)) .push_child(ExprDefBuilder::constant_real(3.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); // Incorrect number of arguments let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsTime, FieldTypeTp::VarChar) .push_child(ExprDefBuilder::constant_int(1)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsTime, FieldTypeTp::VarChar) .push_child(ExprDefBuilder::constant_int(1)) .push_child(ExprDefBuilder::constant_real(3.0)) .push_child(ExprDefBuilder::constant_real(1.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); // Incorrect argument type let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsTime, FieldTypeTp::VarChar) .push_child(ExprDefBuilder::constant_int(1)) .push_child(ExprDefBuilder::constant_int(5)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); } #[test] @@ -626,16 +621,14 @@ mod tests { ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) .push_child(ExprDefBuilder::constant_int(1)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_ok()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap(); let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) .push_child(ExprDefBuilder::constant_int(1)) .push_child(ExprDefBuilder::constant_int(5)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_ok()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap(); let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) @@ -643,40 +636,35 @@ mod tests { .push_child(ExprDefBuilder::constant_int(5)) .push_child(ExprDefBuilder::constant_int(4)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_ok()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap(); // Incorrect return type let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::LongLong) .push_child(ExprDefBuilder::constant_int(1)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); // Incorrect argument type let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) .push_child(ExprDefBuilder::constant_real(1.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) .push_child(ExprDefBuilder::constant_int(1)) .push_child(ExprDefBuilder::constant_real(1.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) .push_child(ExprDefBuilder::constant_real(3.0)) .push_child(ExprDefBuilder::constant_real(1.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsDuration, FieldTypeTp::Double) @@ -684,8 +672,7 @@ mod tests { .push_child(ExprDefBuilder::constant_real(1.0)) .push_child(ExprDefBuilder::constant_int(1)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); } #[test] @@ -695,23 +682,20 @@ mod tests { .push_child(ExprDefBuilder::constant_real(3.0)) .push_child(ExprDefBuilder::constant_real(5.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_ok()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap(); // Insufficient arguments let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsJson, FieldTypeTp::LongLong) .push_child(ExprDefBuilder::constant_real(3.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); // Incorrect return type let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsJson, FieldTypeTp::Double) .push_child(ExprDefBuilder::constant_real(3.0)) .push_child(ExprDefBuilder::constant_real(5.0)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); // Incorrect types let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastIntAsJson, FieldTypeTp::LongLong) @@ -719,8 +703,7 @@ mod tests { .push_child(ExprDefBuilder::constant_real(5.0)) .push_child(ExprDefBuilder::constant_int(42)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); } #[test] @@ -730,22 +713,19 @@ mod tests { .push_child(ExprDefBuilder::constant_real(3.0)) .push_child(ExprDefBuilder::constant_int(5)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_ok()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap(); // Insufficient arguments let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastRealAsInt, FieldTypeTp::Double).build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); // Incorrect return type let node = ExprDefBuilder::scalar_func(ScalarFuncSig::CastRealAsInt, FieldTypeTp::LongLong) .push_child(ExprDefBuilder::constant_real(3.0)) .push_child(ExprDefBuilder::constant_int(5)) .build(); - let exp = RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0); - assert!(exp.is_err()); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node, fn_mapper, 0).unwrap_err(); } #[test] @@ -851,14 +831,8 @@ mod tests { .is_err() ); for i in 1..10 { - assert!( - RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper( - node.clone(), - fn_mapper, - i - ) - .is_ok() - ); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) + .unwrap(); } // Col offset = 3. The minimum success max_columns is 4. @@ -874,14 +848,8 @@ mod tests { ); } for i in 4..10 { - assert!( - RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper( - node.clone(), - fn_mapper, - i - ) - .is_ok() - ); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) + .unwrap(); } // Col offset = 1, 2, 5. The minimum success max_columns is 6. @@ -903,14 +871,8 @@ mod tests { ); } for i in 6..10 { - assert!( - RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper( - node.clone(), - fn_mapper, - i - ) - .is_ok() - ); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) + .unwrap(); } } diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index 2ba3b030ef0..442c0f8486b 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1246,14 +1246,15 @@ mod tests { profiler::start("./bench_eval_plus_1024_rows.profile"); b.iter(|| { - let result = black_box(&exp).eval( - black_box(&mut ctx), - black_box(schema), - black_box(&mut columns), - black_box(&logical_rows), - black_box(1024), - ); - assert!(result.is_ok()); + black_box(&exp) + .eval( + black_box(&mut ctx), + black_box(schema), + black_box(&mut columns), + black_box(&logical_rows), + black_box(1024), + ) + .unwrap(); }); profiler::stop(); } @@ -1283,14 +1284,15 @@ mod tests { profiler::start("./eval_compare_1024_rows.profile"); b.iter(|| { - let result = black_box(&exp).eval( - black_box(&mut ctx), - black_box(schema), - black_box(&mut columns), - black_box(&logical_rows), - black_box(1024), - ); - assert!(result.is_ok()); + black_box(&exp) + .eval( + black_box(&mut ctx), + black_box(schema), + black_box(&mut columns), + black_box(&logical_rows), + black_box(1024), + ) + .unwrap(); }); profiler::stop(); } @@ -1320,14 +1322,15 @@ mod tests { profiler::start("./bench_eval_compare_5_rows.profile"); b.iter(|| { - let result = black_box(&exp).eval( - black_box(&mut ctx), - black_box(schema), - black_box(&mut columns), - black_box(&logical_rows), - black_box(5), - ); - assert!(result.is_ok()); + black_box(&exp) + .eval( + black_box(&mut ctx), + black_box(schema), + black_box(&mut columns), + black_box(&logical_rows), + black_box(5), + ) + .unwrap(); }); profiler::stop(); } diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 6655531c294..8fa7c8492d0 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -1933,25 +1933,20 @@ mod tests { #[test] fn test_check_data_dir_empty() { // test invalid data_path - let ret = check_data_dir_empty("/sys/invalid", "txt"); - assert!(ret.is_ok()); + check_data_dir_empty("/sys/invalid", "txt").unwrap(); // test empty data_path let tmp_path = Builder::new() .prefix("test-get-file-count") .tempdir() .unwrap() .into_path(); - let ret = check_data_dir_empty(tmp_path.to_str().unwrap(), "txt"); - assert!(ret.is_ok()); + check_data_dir_empty(tmp_path.to_str().unwrap(), "txt").unwrap(); // test non-empty data_path let tmp_file = format!("{}", tmp_path.join("test-get-file-count.txt").display()); create_file(&tmp_file, b""); - let ret = check_data_dir_empty(tmp_path.to_str().unwrap(), ""); - assert!(ret.is_err()); - let ret = check_data_dir_empty(tmp_path.to_str().unwrap(), "txt"); - assert!(ret.is_err()); - let ret = check_data_dir_empty(tmp_path.to_str().unwrap(), "xt"); - assert!(ret.is_ok()); + check_data_dir_empty(tmp_path.to_str().unwrap(), "").unwrap_err(); + check_data_dir_empty(tmp_path.to_str().unwrap(), "txt").unwrap_err(); + check_data_dir_empty(tmp_path.to_str().unwrap(), "xt").unwrap(); } #[test] diff --git a/components/tikv_util/src/mpsc/batch.rs b/components/tikv_util/src/mpsc/batch.rs index a635a75d4e4..e8d54c514a1 100644 --- a/components/tikv_util/src/mpsc/batch.rs +++ b/components/tikv_util/src/mpsc/batch.rs @@ -391,7 +391,7 @@ mod tests { } // Send without notify, the receiver can't get batched messages. - assert!(tx.send(0).is_ok()); + tx.send(0).unwrap(); thread::sleep(time::Duration::from_millis(10)); assert_eq!(msg_counter.load(Ordering::Acquire), 0); @@ -404,7 +404,7 @@ mod tests { // Auto notify with more sendings. for _ in 0..4 { - assert!(tx.send(0).is_ok()); + tx.send(0).unwrap(); } thread::sleep(time::Duration::from_millis(10)); assert_eq!(msg_counter.load(Ordering::Acquire), 5); @@ -442,7 +442,7 @@ mod tests { polled.recv().unwrap(); // Send without notify, the receiver can't get batched messages. - assert!(tx.send(0).is_ok()); + tx.send(0).unwrap(); thread::sleep(time::Duration::from_millis(10)); assert_eq!(msg_counter.load(Ordering::Acquire), 0); @@ -455,7 +455,7 @@ mod tests { // Auto notify with more sendings. for _ in 0..16 { - assert!(tx.send(0).is_ok()); + tx.send(0).unwrap(); } thread::sleep(time::Duration::from_millis(10)); assert_eq!(msg_counter.load(Ordering::Acquire), 17); diff --git a/components/tikv_util/src/worker/mod.rs b/components/tikv_util/src/worker/mod.rs index a8196dca054..cba3a9989cb 100644 --- a/components/tikv_util/src/worker/mod.rs +++ b/components/tikv_util/src/worker/mod.rs @@ -134,7 +134,7 @@ mod tests { let (tx, rx) = mpsc::channel(); lazy_worker.start(BatchRunner { ch: tx }); - assert!(rx.recv_timeout(Duration::from_secs(3)).is_ok()); + rx.recv_timeout(Duration::from_secs(3)).unwrap(); worker.stop(); drop(rx); diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 6962ae30756..1f9c74dd709 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -474,10 +474,10 @@ mod tests { // full assert!(spawn_long_time_future(&read_pool, 8, 100).is_err()); - assert!(rx.recv().is_ok()); - assert!(rx.recv().is_ok()); - assert!(rx.recv().is_ok()); - assert!(rx.recv().is_ok()); + rx.recv().unwrap().unwrap(); + rx.recv().unwrap().unwrap(); + rx.recv().unwrap().unwrap(); + rx.recv().unwrap().unwrap(); // no more results assert!(rx.recv_timeout(Duration::from_millis(500)).is_err()); diff --git a/src/config.rs b/src/config.rs index 23dea43d47a..80e763e6981 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1920,7 +1920,7 @@ mod unified_read_pool_tests { stack_size: ReadableSize::mb(2), max_tasks_per_worker: 2000, }; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let cfg = UnifiedReadPoolConfig { min_thread_count: 1, max_thread_count: cmp::max( @@ -1929,7 +1929,7 @@ mod unified_read_pool_tests { ), ..cfg }; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let invalid_cfg = UnifiedReadPoolConfig { min_thread_count: 0, @@ -2103,7 +2103,7 @@ macro_rules! readpool_config { #[test] fn test_validate() { let cfg = $struct_name::default(); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let mut invalid_cfg = cfg.clone(); invalid_cfg.high_concurrency = 0; @@ -2127,7 +2127,7 @@ macro_rules! readpool_config { invalid_cfg.max_tasks_per_worker_high = 1; assert!(invalid_cfg.validate().is_err()); invalid_cfg.max_tasks_per_worker_high = 100; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let mut invalid_cfg = cfg.clone(); invalid_cfg.max_tasks_per_worker_normal = 0; @@ -2135,7 +2135,7 @@ macro_rules! readpool_config { invalid_cfg.max_tasks_per_worker_normal = 1; assert!(invalid_cfg.validate().is_err()); invalid_cfg.max_tasks_per_worker_normal = 100; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let mut invalid_cfg = cfg.clone(); invalid_cfg.max_tasks_per_worker_low = 0; @@ -2143,12 +2143,12 @@ macro_rules! readpool_config { invalid_cfg.max_tasks_per_worker_low = 1; assert!(invalid_cfg.validate().is_err()); invalid_cfg.max_tasks_per_worker_low = 100; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let mut invalid_but_unified = cfg.clone(); invalid_but_unified.use_unified_pool = Some(true); invalid_but_unified.low_concurrency = 0; - assert!(invalid_but_unified.validate().is_ok()); + invalid_but_unified.validate().unwrap(); } } }; @@ -2263,23 +2263,23 @@ mod readpool_tests { use_unified_pool: Some(false), ..Default::default() }; - assert!(storage.validate().is_ok()); + storage.validate().unwrap(); let coprocessor = CoprReadPoolConfig { use_unified_pool: Some(false), ..Default::default() }; - assert!(coprocessor.validate().is_ok()); + coprocessor.validate().unwrap(); let cfg = ReadPoolConfig { unified, storage, coprocessor, }; assert!(!cfg.is_unified_pool_enabled()); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); // Storage and coprocessor config must be valid when yatp is not used. let unified = UnifiedReadPoolConfig::default(); - assert!(unified.validate().is_ok()); + unified.validate().unwrap(); let storage = StorageReadPoolConfig { use_unified_pool: Some(false), high_concurrency: 0, @@ -2312,9 +2312,9 @@ mod readpool_tests { use_unified_pool: Some(true), ..Default::default() }; - assert!(storage.validate().is_ok()); + storage.validate().unwrap(); let coprocessor = CoprReadPoolConfig::default(); - assert!(coprocessor.validate().is_ok()); + coprocessor.validate().unwrap(); let mut cfg = ReadPoolConfig { unified, storage, @@ -2368,7 +2368,7 @@ mod readpool_tests { assert!(cfg.is_unified_pool_enabled()); assert!(cfg.validate().is_err()); cfg.storage.low_concurrency = 1; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let storage = StorageReadPoolConfig { use_unified_pool: Some(true), @@ -2389,7 +2389,7 @@ mod readpool_tests { assert!(cfg.is_unified_pool_enabled()); assert!(cfg.validate().is_err()); cfg.coprocessor.low_concurrency = 1; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); } } @@ -4220,13 +4220,13 @@ mod tests { let first_modified = last_cfg_metadata.modified().unwrap(); // not write to file when config is the equivalent of last one. - assert!(persist_config(&cfg).is_ok()); + persist_config(&cfg).unwrap(); last_cfg_metadata = last_cfg_path.metadata().unwrap(); assert_eq!(last_cfg_metadata.modified().unwrap(), first_modified); // write to file when config is the inequivalent of last one. cfg.log_level = slog::Level::Warning.into(); - assert!(persist_config(&cfg).is_ok()); + persist_config(&cfg).unwrap(); last_cfg_metadata = last_cfg_path.metadata().unwrap(); assert_ne!(last_cfg_metadata.modified().unwrap(), first_modified); } @@ -4305,7 +4305,7 @@ mod tests { let mut tikv_cfg = TiKvConfig::default(); tikv_cfg.storage.data_dir = path.as_path().to_str().unwrap().to_owned(); - assert!(persist_config(&tikv_cfg).is_ok()); + persist_config(&tikv_cfg).unwrap(); } #[test] @@ -5199,11 +5199,11 @@ mod tests { #[test] fn test_validate_tikv_config() { let mut cfg = TiKvConfig::default(); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let default_region_split_check_diff = cfg.raft_store.region_split_check_diff().0; cfg.raft_store.region_split_check_diff = Some(ReadableSize(cfg.raft_store.region_split_check_diff().0 + 1)); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); assert_eq!( cfg.raft_store.region_split_check_diff().0, default_region_split_check_diff + 1 @@ -5216,7 +5216,7 @@ mod tests { // Test memory_usage_limit is based on block cache size if it's not configured. cfg.memory_usage_limit = None; cfg.storage.block_cache.capacity = Some(ReadableSize(3 * GIB)); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); assert_eq!(cfg.memory_usage_limit.unwrap(), ReadableSize(5 * GIB)); // Test memory_usage_limit will fallback to system memory capacity with huge @@ -5224,7 +5224,7 @@ mod tests { cfg.memory_usage_limit = None; let system = SysQuota::memory_limit_in_bytes(); cfg.storage.block_cache.capacity = Some(ReadableSize(system * 3 / 4)); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); assert_eq!(cfg.memory_usage_limit.unwrap(), ReadableSize(system)); } @@ -5243,7 +5243,7 @@ mod tests { { let mut cfg = TiKvConfig::default(); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); } { @@ -5285,7 +5285,7 @@ mod tests { tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb", "db"); cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); } } diff --git a/src/coprocessor/interceptors/concurrency_limiter.rs b/src/coprocessor/interceptors/concurrency_limiter.rs index aa8b5c72f13..c77eab86f16 100644 --- a/src/coprocessor/interceptors/concurrency_limiter.rs +++ b/src/coprocessor/interceptors/concurrency_limiter.rs @@ -151,13 +151,12 @@ mod tests { // Light tasks should run without any semaphore permit let smp2 = smp.clone(); - assert!( - tokio::spawn(timeout(Duration::from_millis(250), async move { - limit_concurrency(work(2), &*smp2, Duration::from_millis(500)).await - })) - .await - .is_ok() - ); + tokio::spawn(timeout(Duration::from_millis(250), async move { + limit_concurrency(work(2), &*smp2, Duration::from_millis(500)).await + })) + .await + .unwrap() + .unwrap(); // Both t1 and t2 need a semaphore permit to finish. Although t2 is much shorter // than t1, it starts with t1 diff --git a/src/coprocessor/interceptors/deadline.rs b/src/coprocessor/interceptors/deadline.rs index 29b673aa487..b88e6d5f0c9 100644 --- a/src/coprocessor/interceptors/deadline.rs +++ b/src/coprocessor/interceptors/deadline.rs @@ -57,10 +57,12 @@ mod tests { } } - let res = check_deadline(work(5), Deadline::from_now(Duration::from_millis(500))).await; - assert!(res.is_ok()); + check_deadline(work(5), Deadline::from_now(Duration::from_millis(500))) + .await + .unwrap(); - let res = check_deadline(work(100), Deadline::from_now(Duration::from_millis(500))).await; - assert!(res.is_err()); + check_deadline(work(100), Deadline::from_now(Duration::from_millis(500))) + .await + .unwrap_err(); } } diff --git a/src/read_pool.rs b/src/read_pool.rs index 9c413de60a7..ded1308beb2 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -380,8 +380,8 @@ mod tests { let (task3, _tx3) = gen_task(); let (task4, _tx4) = gen_task(); - assert!(handle.spawn(task1, CommandPri::Normal, 1).is_ok()); - assert!(handle.spawn(task2, CommandPri::Normal, 2).is_ok()); + handle.spawn(task1, CommandPri::Normal, 1).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2).unwrap(); thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3) { @@ -391,7 +391,7 @@ mod tests { tx1.send(()).unwrap(); thread::sleep(Duration::from_millis(300)); - assert!(handle.spawn(task4, CommandPri::Normal, 4).is_ok()); + handle.spawn(task4, CommandPri::Normal, 4).unwrap(); } #[test] @@ -422,8 +422,8 @@ mod tests { let (task4, _tx4) = gen_task(); let (task5, _tx5) = gen_task(); - assert!(handle.spawn(task1, CommandPri::Normal, 1).is_ok()); - assert!(handle.spawn(task2, CommandPri::Normal, 2).is_ok()); + handle.spawn(task1, CommandPri::Normal, 1).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2).unwrap(); thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3) { @@ -434,7 +434,7 @@ mod tests { handle.scale_pool_size(3); assert_eq!(handle.get_normal_pool_size(), 3); - assert!(handle.spawn(task4, CommandPri::Normal, 4).is_ok()); + handle.spawn(task4, CommandPri::Normal, 4).unwrap(); thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5) { @@ -471,8 +471,8 @@ mod tests { let (task4, _tx4) = gen_task(); let (task5, _tx5) = gen_task(); - assert!(handle.spawn(task1, CommandPri::Normal, 1).is_ok()); - assert!(handle.spawn(task2, CommandPri::Normal, 2).is_ok()); + handle.spawn(task1, CommandPri::Normal, 1).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2).unwrap(); thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3) { @@ -487,7 +487,7 @@ mod tests { handle.scale_pool_size(1); assert_eq!(handle.get_normal_pool_size(), 1); - assert!(handle.spawn(task4, CommandPri::Normal, 4).is_ok()); + handle.spawn(task4, CommandPri::Normal, 4).unwrap(); thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5) { diff --git a/src/server/debug.rs b/src/server/debug.rs index 933f4308245..831a2b85255 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -1991,7 +1991,7 @@ mod tests { remove_region_state(1); remove_region_state(2); - assert!(debugger.recreate_region(region.clone()).is_ok()); + debugger.recreate_region(region.clone()).unwrap(); assert_eq!(get_region_state(engine, 100).get_region(), ®ion); region.set_start_key(b"z".to_vec()); diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index b47fc34cf27..094f6f5d5e6 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -229,9 +229,7 @@ mod tests { } let factory = builder.build(); let shared_db = factory.create_shared_db().unwrap(); - let tablet = TabletFactory::create_tablet(&factory, 1, 10); - assert!(tablet.is_ok()); - let tablet = tablet.unwrap(); + let tablet = TabletFactory::create_tablet(&factory, 1, 10).unwrap(); let tablet2 = factory.open_tablet(1, 10).unwrap(); assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); @@ -272,9 +270,7 @@ mod tests { } let inner_factory = builder.build(); let factory = KvEngineFactoryV2::new(inner_factory); - let tablet = factory.create_tablet(1, 10); - assert!(tablet.is_ok()); - let tablet = tablet.unwrap(); + let tablet = factory.create_tablet(1, 10).unwrap(); let tablet2 = factory.open_tablet(1, 10).unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); let tablet2 = factory.open_tablet_cache(1, 10).unwrap(); @@ -296,8 +292,8 @@ mod tests { assert!(!factory.exists(2, 11)); assert!(factory.exists_raw(&tablet_path)); assert!(!factory.is_tombstoned(1, 10)); - assert!(factory.load_tablet(&tablet_path, 1, 10).is_err()); - assert!(factory.load_tablet(&tablet_path, 1, 20).is_ok()); + factory.load_tablet(&tablet_path, 1, 10).unwrap_err(); + factory.load_tablet(&tablet_path, 1, 20).unwrap(); // After we load it as with the new id or suffix, we should be unable to get it // with the old id and suffix in the cache. assert!(factory.open_tablet_cache(1, 10).is_none()); diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 7e695430d10..dcdb075d256 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -2051,18 +2051,16 @@ mod tests { // Before starting gc_worker, fill the scheduler to full. for _ in 0..GC_MAX_PENDING_TASKS { - assert!( - gc_worker - .scheduler() - .schedule(GcTask::Gc { - region_id: 0, - start_key: vec![], - end_key: vec![], - safe_point: TimeStamp::from(100), - callback: Box::new(|_res| {}) - }) - .is_ok() - ); + gc_worker + .scheduler() + .schedule(GcTask::Gc { + region_id: 0, + start_key: vec![], + end_key: vec![], + safe_point: TimeStamp::from(100), + callback: Box::new(|_res| {}), + }) + .unwrap(); } // Then, it will fail to schedule another gc command. let (tx, rx) = mpsc::channel(); @@ -2081,24 +2079,22 @@ mod tests { let (tx, rx) = mpsc::channel(); // When the gc_worker is full, scheduling an unsafe destroy range task should be // still allowed. - assert!( - gc_worker - .unsafe_destroy_range( - Context::default(), - Key::from_raw(b"a"), - Key::from_raw(b"z"), - Box::new(move |res| { - tx.send(res).unwrap(); - }) - ) - .is_ok() - ); + gc_worker + .unsafe_destroy_range( + Context::default(), + Key::from_raw(b"a"), + Key::from_raw(b"z"), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); gc_worker.start().unwrap(); // After the worker starts running, the destroy range task should run, // and the key in the range will be deleted. - assert!(rx.recv_timeout(Duration::from_secs(10)).unwrap().is_ok()); + rx.recv_timeout(Duration::from_secs(10)).unwrap().unwrap(); must_get_none(&engine, b"key", 30); } } diff --git a/src/server/resolve.rs b/src/server/resolve.rs index ccee5c52f82..404cee0e613 100644 --- a/src/server/resolve.rs +++ b/src/server/resolve.rs @@ -256,14 +256,14 @@ mod tests { fn test_resolve_store_state_up() { let store = new_store(STORE_ADDR, metapb::StoreState::Up); let runner = new_runner(store); - assert!(runner.get_address(0).is_ok()); + runner.get_address(0).unwrap(); } #[test] fn test_resolve_store_state_offline() { let store = new_store(STORE_ADDR, metapb::StoreState::Offline); let runner = new_runner(store); - assert!(runner.get_address(0).is_ok()); + runner.get_address(0).unwrap(); } #[test] diff --git a/src/server/server.rs b/src/server/server.rs index c5aa6311193..5c0ace9d7b1 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -581,7 +581,7 @@ mod tests { trans.send(msg.clone()).unwrap(); trans.flush(); - assert!(rx.recv_timeout(Duration::from_secs(5)).is_ok()); + rx.recv_timeout(Duration::from_secs(5)).unwrap(); msg.mut_to_peer().set_store_id(2); msg.set_region_id(2); diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 4ab02f819da..232ddd58b4b 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -481,7 +481,6 @@ mod tests { ]; for (input, time, level, content) in cs.into_iter() { let result = parse(input); - assert!(result.is_ok(), "expected OK, but got: {:?}", result); let timestamp = timestamp(time); let log = result.unwrap(); assert_eq!(log.0, content); diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 13b7b94297d..7911808e86b 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -1420,7 +1420,7 @@ mod tests { let resp = block_on(handle).unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body_bytes = block_on(hyper::body::to_bytes(resp.into_body())).unwrap(); - assert!(String::from_utf8(body_bytes.as_ref().to_owned()).is_ok()); + String::from_utf8(body_bytes.as_ref().to_owned()).unwrap(); // test gzip let handle = status_server.thread_pool.spawn(async move { @@ -1440,7 +1440,7 @@ mod tests { GzDecoder::new(body_bytes.reader()) .read_to_end(&mut decoded_bytes) .unwrap(); - assert!(String::from_utf8(decoded_bytes).is_ok()); + String::from_utf8(decoded_bytes).unwrap(); status_server.stop(); } diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index a37712dfd68..446711bef30 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -409,7 +409,7 @@ mod tests { let (tx, rx) = mpsc::channel(1); let res = rt.spawn(activate_heap_profile(rx, std::env::temp_dir(), || {})); drop(tx); - assert!(block_on(res).unwrap().is_ok()); + block_on(res).unwrap().unwrap(); // Test activated profiling can be stopped by the handle. let (tx, rx) = sync_channel::(1); @@ -424,7 +424,7 @@ mod tests { )); assert!(check_activated()); assert!(deactivate_heap_profile()); - assert!(block_on(res).unwrap().is_ok()); + block_on(res).unwrap().unwrap(); } #[test] @@ -454,6 +454,6 @@ mod tests { )); assert!(check_activated()); assert!(deactivate_heap_profile()); - assert!(block_on(res).unwrap().is_ok()); + block_on(res).unwrap().unwrap(); } } diff --git a/src/storage/config.rs b/src/storage/config.rs index 9a359310178..4bfc664629f 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -379,11 +379,11 @@ mod tests { #[test] fn test_validate_storage_config() { let mut cfg = Config::default(); - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); let max_pool_size = std::cmp::max(4, SysQuota::cpu_cores_quota() as usize); cfg.scheduler_worker_pool_size = max_pool_size; - assert!(cfg.validate().is_ok()); + cfg.validate().unwrap(); cfg.scheduler_worker_pool_size = 0; assert!(cfg.validate().is_err()); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 620bca80b32..ef9aecf02ad 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -7758,7 +7758,7 @@ mod tests { assert_eq!(key_error.get_locked().get_key(), b"key"); // Ignore memory locks in resolved or committed locks. ctx.set_resolved_locks(vec![10]); - assert!(block_on(storage.get(ctx.clone(), Key::from_raw(b"key"), 100.into())).is_ok()); + block_on(storage.get(ctx.clone(), Key::from_raw(b"key"), 100.into())).unwrap(); ctx.take_resolved_locks(); // Test batch_get @@ -7773,7 +7773,7 @@ mod tests { assert_eq!(key_error.get_locked().get_key(), b"key"); // Ignore memory locks in resolved locks. ctx.set_resolved_locks(vec![10]); - assert!(batch_get(ctx.clone()).is_ok()); + batch_get(ctx.clone()).unwrap(); ctx.take_resolved_locks(); // Test scan @@ -7784,13 +7784,13 @@ mod tests { extract_key_error(&scan(ctx.clone(), Key::from_raw(b"a"), None, false).unwrap_err()); assert_eq!(key_error.get_locked().get_key(), b"key"); ctx.set_resolved_locks(vec![10]); - assert!(scan(ctx.clone(), Key::from_raw(b"a"), None, false).is_ok()); + scan(ctx.clone(), Key::from_raw(b"a"), None, false).unwrap(); ctx.take_resolved_locks(); let key_error = extract_key_error(&scan(ctx.clone(), Key::from_raw(b"\xff"), None, true).unwrap_err()); assert_eq!(key_error.get_locked().get_key(), b"key"); ctx.set_resolved_locks(vec![10]); - assert!(scan(ctx.clone(), Key::from_raw(b"\xff"), None, false).is_ok()); + scan(ctx.clone(), Key::from_raw(b"\xff"), None, false).unwrap(); ctx.take_resolved_locks(); // Ignore memory locks in resolved or committed locks. @@ -7816,14 +7816,14 @@ mod tests { consumer.take_data() }; let res = batch_get_command(req2.clone()); - assert!(res[0].is_ok()); + res[0].as_ref().unwrap(); let key_error = extract_key_error(res[1].as_ref().unwrap_err()); assert_eq!(key_error.get_locked().get_key(), b"key"); // Ignore memory locks in resolved or committed locks. req2.mut_context().set_resolved_locks(vec![10]); let res = batch_get_command(req2.clone()); - assert!(res[0].is_ok()); - assert!(res[1].is_ok()); + res[0].as_ref().unwrap(); + res[1].as_ref().unwrap(); req2.mut_context().take_resolved_locks(); } @@ -8661,7 +8661,7 @@ mod tests { assert!(res.is_err(), "case {}", i); assert_eq!(res.unwrap_err().error_code(), err, "case {}", i); } else { - assert!(res.is_ok(), "case {}", i); + assert!(res.is_ok(), "case {} {:?}", i, res); } } } @@ -8717,7 +8717,7 @@ mod tests { assert!(res.is_err()); assert_eq!(res.unwrap_err().error_code(), err); } else { - assert!(res.is_ok()); + res.unwrap(); } }; @@ -8955,7 +8955,7 @@ mod tests { }), ) .unwrap(); - assert!(rx.recv().unwrap().is_ok()); + rx.recv().unwrap().unwrap(); // After prewrite, the memory lock should be removed. { let pessimistic_locks = txn_ext.pessimistic_locks.read(); @@ -9014,6 +9014,6 @@ mod tests { ) .unwrap(); // Prewrite still succeeds - assert!(rx.recv().unwrap().is_ok()); + rx.recv().unwrap().unwrap(); } } diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index c27b96840d0..fba4f207054 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -567,7 +567,7 @@ mod tests { let mut count = 0; for key_and_mvcc in scan_mvcc(b"z", &[], 30) { - assert!(key_and_mvcc.is_ok()); + key_and_mvcc.unwrap(); count += 1; } assert_eq!(count, 7); diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 31631f34152..07d0093e71c 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -631,9 +631,8 @@ pub mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = SnapshotReader::new(start_ts.into(), snapshot, true); - let ret = reader.get_txn_commit_record(&Key::from_raw(key)); - assert!(ret.is_ok()); - match ret.unwrap().info() { + let ret = reader.get_txn_commit_record(&Key::from_raw(key)).unwrap(); + match ret.info() { None => {} Some((_, write_type)) => { assert_eq!(write_type, WriteType::Rollback); diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index a5343b234ac..3dd95d4045d 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -413,7 +413,7 @@ pub(crate) mod tests { must_commit(&engine, k1, 4, 5); // After delete "k1", insert returns ok. - assert!(try_prewrite_insert(&engine, k1, v2, k1, 6).is_ok()); + try_prewrite_insert(&engine, k1, v2, k1, 6).unwrap(); must_commit(&engine, k1, 6, 7); // Rollback @@ -434,7 +434,7 @@ pub(crate) mod tests { must_rollback(&engine, k1, 12, false); // After delete "k1", insert returns ok. - assert!(try_prewrite_insert(&engine, k1, v2, k1, 13).is_ok()); + try_prewrite_insert(&engine, k1, v2, k1, 13).unwrap(); must_commit(&engine, k1, 13, 14); } @@ -453,9 +453,9 @@ pub(crate) mod tests { must_commit(&engine, k1, 4, 5); // After delete "k1", check_not_exists returns ok. - assert!(try_prewrite_check_not_exists(&engine, k1, k1, 6).is_ok()); + try_prewrite_check_not_exists(&engine, k1, k1, 6).unwrap(); - assert!(try_prewrite_insert(&engine, k1, v2, k1, 7).is_ok()); + try_prewrite_insert(&engine, k1, v2, k1, 7).unwrap(); must_commit(&engine, k1, 7, 8); // Rollback @@ -472,7 +472,7 @@ pub(crate) mod tests { must_rollback(&engine, k1, 13, false); // After delete "k1", check_not_exists returns ok. - assert!(try_prewrite_check_not_exists(&engine, k1, k1, 14).is_ok()); + try_prewrite_check_not_exists(&engine, k1, k1, 14).unwrap(); } #[test] diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index fb32f767bd5..66194cd08fa 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1392,7 +1392,7 @@ mod tests { let cmd: TypedCommand<()> = req.into(); let (cb, f) = paired_future_callback(); scheduler.run_cmd(cmd.cmd, StorageCallback::Boolean(cb)); - assert!(block_on(f).unwrap().is_ok()); + block_on(f).unwrap().unwrap(); } #[test] @@ -1450,7 +1450,7 @@ mod tests { let cmd: TypedCommand<()> = req.into(); let (cb, f) = paired_future_callback(); scheduler.run_cmd(cmd.cmd, StorageCallback::Boolean(cb)); - assert!(block_on(f).unwrap().is_ok()); + block_on(f).unwrap().unwrap(); } #[test] @@ -1516,7 +1516,7 @@ mod tests { let cmd: TypedCommand = req.into(); let (cb, f) = paired_future_callback(); scheduler.run_cmd(cmd.cmd, StorageCallback::TxnStatus(cb)); - assert!(block_on(f).unwrap().is_ok()); + block_on(f).unwrap().unwrap(); } #[test] @@ -1574,7 +1574,7 @@ mod tests { let cmd: TypedCommand<()> = req.into(); let (cb, f) = paired_future_callback(); scheduler.run_cmd(cmd.cmd, StorageCallback::Boolean(cb)); - assert!(block_on(f).is_ok()); + block_on(f).unwrap().unwrap(); } #[test] diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 2cd4afaf932..c85bd828c08 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -970,18 +970,16 @@ mod tests { let bound_b = Key::from_encoded(b"b".to_vec()); let bound_c = Key::from_encoded(b"c".to_vec()); let bound_d = Key::from_encoded(b"d".to_vec()); - assert!(store.scanner(false, false, false, None, None).is_ok()); - assert!( - store - .scanner( - false, - false, - false, - Some(bound_b.clone()), - Some(bound_c.clone()) - ) - .is_ok() - ); + store.scanner(false, false, false, None, None).unwrap(); + store + .scanner( + false, + false, + false, + Some(bound_b.clone()), + Some(bound_c.clone()), + ) + .unwrap(); assert!( store .scanner( @@ -1021,22 +1019,16 @@ mod tests { Default::default(), false, ); - assert!(store2.scanner(false, false, false, None, None).is_ok()); - assert!( - store2 - .scanner(false, false, false, Some(bound_a.clone()), None) - .is_ok() - ); - assert!( - store2 - .scanner(false, false, false, Some(bound_a), Some(bound_b)) - .is_ok() - ); - assert!( - store2 - .scanner(false, false, false, None, Some(bound_c)) - .is_ok() - ); + store2.scanner(false, false, false, None, None).unwrap(); + store2 + .scanner(false, false, false, Some(bound_a.clone()), None) + .unwrap(); + store2 + .scanner(false, false, false, Some(bound_a), Some(bound_b)) + .unwrap(); + store2 + .scanner(false, false, false, None, Some(bound_c)) + .unwrap(); } fn gen_fixture_store() -> FixtureStore { diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index 4c94aeb1249..c97bdd72fac 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -152,7 +152,7 @@ fn bench_async_snapshots_noop(b: &mut test::Bencher) { b.iter(|| { let cb1: EngineCallback> = Box::new(move |res| { - assert!(res.is_ok()); + res.unwrap(); }); let cb2: EngineCallback> = Box::new(move |res| { if let Ok(CmdRes::Snap(snap)) = res { diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index be027ae7217..f1b135ef86a 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -67,7 +67,7 @@ fn ensure_disk_usage_is_reported( let peer = new_peer(store_id, peer_id); let key = region.get_start_key(); let ch = async_read_on_peer(cluster, peer, region.clone(), key, true, true); - assert!(ch.recv_timeout(Duration::from_secs(1)).is_ok()); + ch.recv_timeout(Duration::from_secs(1)).unwrap(); } fn test_disk_full_leader_behaviors(usage: DiskUsage) { diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 713ab4c5a5d..92785fcfa1e 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1335,22 +1335,20 @@ fn test_merge_with_concurrent_pessimistic_locking() { let snapshot = cluster.must_get_snapshot_of_region(left.id); let txn_ext = snapshot.txn_ext.unwrap(); - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![( - Key::from_raw(b"k0"), - PessimisticLock { - primary: b"k0".to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 3000, - for_update_ts: 20.into(), - min_commit_ts: 30.into(), - }, - )]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![( + Key::from_raw(b"k0"), + PessimisticLock { + primary: b"k0".to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 3000, + for_update_ts: 20.into(), + min_commit_ts: 30.into(), + }, + )]) + .unwrap(); let addr = cluster.sim.rl().get_addr(1); let env = Arc::new(Environment::new(1)); @@ -1436,16 +1434,14 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![ - (Key::from_raw(b"k0"), lock.clone()), - (Key::from_raw(b"k1"), lock), - ]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![ + (Key::from_raw(b"k0"), lock.clone()), + (Key::from_raw(b"k1"), lock), + ]) + .unwrap(); let mut mutation = Mutation::default(); mutation.set_op(Op::Put); @@ -1517,13 +1513,11 @@ fn test_retry_pending_prepare_merge_fail() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"k1"), l1)]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"k1"), l1)]) + .unwrap(); // Pause apply and write some data to the left region fail::cfg("on_handle_apply", "pause").unwrap(); @@ -1593,13 +1587,11 @@ fn test_merge_pessimistic_locks_propose_fail() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"k1"), lock)]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"k1"), lock)]) + .unwrap(); fail::cfg("raft_propose", "pause").unwrap(); diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 5eba2b298a1..22871994f82 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -118,7 +118,6 @@ fn test_load_global_config() { ) .await }); - assert!(res.is_ok()); for (k, v) in res.unwrap() { assert_eq!(k, format!("/global/config/{}", v)) } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 92aee023fa5..6a67e83ef1b 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -80,7 +80,7 @@ fn test_follower_slow_split() { // After the follower split success, it will response to the pending vote. fail::cfg("apply_before_split_1_3", "off").unwrap(); - assert!(rx.recv_timeout(Duration::from_millis(100)).is_ok()); + rx.recv_timeout(Duration::from_millis(100)).unwrap(); } #[test] @@ -164,7 +164,7 @@ fn test_split_lost_request_vote() { // After the follower split success, it will response to the pending vote. fail::cfg("apply_after_split_1_3", "off").unwrap(); - assert!(rx.recv_timeout(Duration::from_millis(100)).is_ok()); + rx.recv_timeout(Duration::from_millis(100)).unwrap(); } fn gen_split_region() -> (Region, Region, Region) { @@ -951,14 +951,12 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { }; { let mut locks = txn_ext.pessimistic_locks.write(); - assert!( - locks - .insert(vec![ - (Key::from_raw(b"a"), lock_a), - (Key::from_raw(b"c"), lock_c) - ]) - .is_ok() - ); + locks + .insert(vec![ + (Key::from_raw(b"a"), lock_a), + (Key::from_raw(b"c"), lock_c), + ]) + .unwrap(); } let mut mutation = Mutation::default(); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 85dfe054c63..17e9957d947 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1311,7 +1311,7 @@ fn test_resolve_lock_deadline() { }), ) .unwrap(); - assert!(rx.recv().unwrap().is_ok()); + rx.recv().unwrap().unwrap(); // Resolve lock, this needs two rounds, two process_read and two process_write. // So it needs more than 400ms. It will exceed the deadline. diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 419d923b0d7..c9f7a70ee09 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -334,7 +334,7 @@ fn test_max_commit_ts_error() { assert!(res.one_pc_commit_ts.is_zero()); // There should not be any memory lock left. - assert!(cm.read_range_check(None, None, |_, _| Err(())).is_ok()); + cm.read_range_check(None, None, |_, _| Err(())).unwrap(); // Two locks should be written, the second one does not async commit. let l1 = must_locked(&storage.get_engine(), b"k1", 10); @@ -566,13 +566,11 @@ fn test_concurrent_write_after_transfer_leader_invalidates_locks() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"key"), lock.clone())]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"key"), lock.clone())]) + .unwrap(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 87b05042a30..9ad2816d3d3 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -118,22 +118,20 @@ fn test_delete_lock_proposed_after_proposing_locks_impl(transfer_msg_count: usiz let snapshot = cluster.must_get_snapshot_of_region(region_id); let txn_ext = snapshot.txn_ext.unwrap(); - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![( - Key::from_raw(b"key"), - PessimisticLock { - primary: b"key".to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 1000, - for_update_ts: 10.into(), - min_commit_ts: 20.into(), - }, - )]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![( + Key::from_raw(b"key"), + PessimisticLock { + primary: b"key".to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 1000, + for_update_ts: 10.into(), + min_commit_ts: 20.into(), + }, + )]) + .unwrap(); let addr = cluster.sim.rl().get_addr(1); let env = Arc::new(Environment::new(1)); @@ -197,22 +195,20 @@ fn test_delete_lock_proposed_before_proposing_locks() { let snapshot = cluster.must_get_snapshot_of_region(region_id); let txn_ext = snapshot.txn_ext.unwrap(); - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![( - Key::from_raw(b"key"), - PessimisticLock { - primary: b"key".to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 1000, - for_update_ts: 10.into(), - min_commit_ts: 20.into(), - }, - )]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![( + Key::from_raw(b"key"), + PessimisticLock { + primary: b"key".to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 1000, + for_update_ts: 10.into(), + min_commit_ts: 20.into(), + }, + )]) + .unwrap(); let addr = cluster.sim.rl().get_addr(1); let env = Arc::new(Environment::new(1)); @@ -281,22 +277,20 @@ fn test_read_lock_after_become_follower() { let snapshot = cluster.must_get_snapshot_of_region(region_id); let txn_ext = snapshot.txn_ext.unwrap(); let for_update_ts = block_on(cluster.pd_client.get_tso()).unwrap(); - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![( - Key::from_raw(b"key"), - PessimisticLock { - primary: b"key".to_vec().into_boxed_slice(), - start_ts, - ttl: 1000, - for_update_ts, - min_commit_ts: for_update_ts, - }, - )]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![( + Key::from_raw(b"key"), + PessimisticLock { + primary: b"key".to_vec().into_boxed_slice(), + start_ts, + ttl: 1000, + for_update_ts, + min_commit_ts: for_update_ts, + }, + )]) + .unwrap(); let addr = cluster.sim.rl().get_addr(3); let env = Arc::new(Environment::new(1)); diff --git a/tests/integrations/config/test_config_client.rs b/tests/integrations/config/test_config_client.rs index 96299de22a3..fa45d08b24a 100644 --- a/tests/integrations/config/test_config_client.rs +++ b/tests/integrations/config/test_config_client.rs @@ -223,8 +223,8 @@ raft-log-gc-threshold = 2000 50 ); // config update from config file - assert!(cfg_controller.update_from_toml_file().is_ok()); - // after update this configuration item should be constant with the modified + cfg_controller.update_from_toml_file().unwrap(); + // after update this configration item should be constant with the modified // configuration file assert_eq!( cfg_controller diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index 3a3967c25a8..a6ac43235f3 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -32,11 +32,11 @@ fn test_retry_rpc_client() { server.stop(); let child = thread::spawn(move || { let cfg = new_config(m_eps); - assert_eq!(RpcClient::new(&cfg, None, m_mgr).is_ok(), true); + RpcClient::new(&cfg, None, m_mgr).unwrap(); }); thread::sleep(Duration::from_millis(500)); server.start(&mgr, eps); - assert_eq!(child.join().is_ok(), true); + child.join().unwrap(); } #[test] diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 9cff738fdfe..d378c55c5e6 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1294,13 +1294,11 @@ fn test_propose_in_memory_pessimistic_locks() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"k1"), l1.clone())]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"k1"), l1.clone())]) + .unwrap(); // Insert lock l2 into the right region let snapshot = cluster.must_get_snapshot_of_region(right.id); @@ -1312,13 +1310,11 @@ fn test_propose_in_memory_pessimistic_locks() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"k3"), l2.clone())]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"k3"), l2.clone())]) + .unwrap(); // Merge left region into the right region pd_client.must_merge(left.id, right.id); @@ -1386,7 +1382,7 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { let res = cluster.async_put(b"k1", b"new_val").unwrap(); cluster.clear_send_filters(); - assert!(res.recv().is_ok()); + res.recv().unwrap(); assert_eq!(cluster.must_get(b"k1").unwrap(), b"new_val"); } @@ -1421,13 +1417,11 @@ fn test_merge_pessimistic_locks_repeated_merge() { for_update_ts: 20.into(), min_commit_ts: 30.into(), }; - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"k1"), lock.clone())]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"k1"), lock.clone())]) + .unwrap(); // Filter MsgAppend, so the proposed PrepareMerge will not succeed cluster.add_send_filter(CloneFilterFactory( diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index d7c527b5fd9..656f6d57d2d 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -822,22 +822,20 @@ fn test_leader_drop_with_pessimistic_lock() { .get_txn_ext() .unwrap() .clone(); - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![( - Key::from_raw(b"k1"), - PessimisticLock { - primary: b"k1".to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 1000, - for_update_ts: 10.into(), - min_commit_ts: 10.into(), - }, - )]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![( + Key::from_raw(b"k1"), + PessimisticLock { + primary: b"k1".to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 1000, + for_update_ts: 10.into(), + min_commit_ts: 10.into(), + }, + )]) + .unwrap(); // Isolate node 1, leader should be transferred to another node. cluster.add_send_filter(IsolationFilterFactory::new(1)); diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 8961008d4a5..a2ae4ab0f31 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -317,8 +317,8 @@ fn test_read_index_out_of_order() { // After peer 2 is removed, we can get 2 read responses. let resp2 = async_read_on_peer(&mut cluster, new_peer(1, 1), r1, b"k1", true, true); - assert!(resp2.recv_timeout(Duration::from_secs(1)).is_ok()); - assert!(resp1.recv_timeout(Duration::from_secs(1)).is_ok()); + resp2.recv_timeout(Duration::from_secs(1)).unwrap(); + resp1.recv_timeout(Duration::from_secs(1)).unwrap(); } #[test] diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 91022892f96..a7664e8ccf0 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -973,14 +973,12 @@ fn test_split_with_in_memory_pessimistic_locks() { }; { let mut locks = txn_ext.pessimistic_locks.write(); - assert!( - locks - .insert(vec![ - (Key::from_raw(b"a"), lock_a.clone()), - (Key::from_raw(b"c"), lock_c.clone()) - ]) - .is_ok() - ); + locks + .insert(vec![ + (Key::from_raw(b"a"), lock_a.clone()), + (Key::from_raw(b"c"), lock_c.clone()), + ]) + .unwrap(); } let region = cluster.get_region(b""); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index 86789fc8f7f..b360bd3da58 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -215,7 +215,7 @@ fn test_transfer_leader_during_snapshot(cluster: &mut Cluster) cluster.transfer_leader(r1, new_peer(2, 2)); let resp = cluster.call_command_on_leader(put, Duration::from_secs(5)); // if it's transferring leader, resp will timeout. - assert!(resp.is_ok(), "{:?}", resp); + resp.unwrap(); must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); } @@ -299,11 +299,9 @@ fn test_propose_in_memory_pessimistic_locks() { { let mut pessimistic_locks = txn_ext.pessimistic_locks.write(); assert!(pessimistic_locks.is_writable()); - assert!( - pessimistic_locks - .insert(vec![(Key::from_raw(b"key"), lock.clone())]) - .is_ok() - ); + pessimistic_locks + .insert(vec![(Key::from_raw(b"key"), lock.clone())]) + .unwrap(); } cluster.must_transfer_leader(1, new_peer(2, 2)); @@ -338,13 +336,11 @@ fn test_memory_pessimistic_locks_status_after_transfer_leader_failure() { min_commit_ts: 30.into(), }; // Write a pessimistic lock to the in-memory pessimistic lock table. - assert!( - txn_ext - .pessimistic_locks - .write() - .insert(vec![(Key::from_raw(b"key"), lock)]) - .is_ok() - ); + txn_ext + .pessimistic_locks + .write() + .insert(vec![(Key::from_raw(b"key"), lock)]) + .unwrap(); // Make it fail to transfer leader cluster.add_send_filter(CloneFilterFactory( diff --git a/tests/integrations/server/gc_worker.rs b/tests/integrations/server/gc_worker.rs index 4f521cb1da7..59dc776dcca 100644 --- a/tests/integrations/server/gc_worker.rs +++ b/tests/integrations/server/gc_worker.rs @@ -301,7 +301,7 @@ fn test_gc_bypass_raft() { } let gc_sched = cluster.sim.rl().get_gc_worker(1).scheduler(); - assert!(sync_gc(&gc_sched, 0, b"k1".to_vec(), b"k2".to_vec(), 200.into()).is_ok()); + sync_gc(&gc_sched, 0, b"k1".to_vec(), b"k2".to_vec(), 200.into()).unwrap(); for &start_ts in &[10, 20, 30] { let commit_ts = start_ts + 5; diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 95d1494c660..366de3c0493 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1345,7 +1345,7 @@ fn test_prewrite_check_max_commit_ts() { } // There shouldn't be locks remaining in the lock table. - assert!(cm.read_range_check(None, None, |_, _| Err(())).is_ok()); + cm.read_range_check(None, None, |_, _| Err(())).unwrap(); } #[test] diff --git a/tests/integrations/server/security.rs b/tests/integrations/server/security.rs index 8243aca6c46..71a0979a005 100644 --- a/tests/integrations/server/security.rs +++ b/tests/integrations/server/security.rs @@ -24,8 +24,7 @@ fn test_check_cn_success() { let channel = ChannelBuilder::new(env).secure_connect(&addr, cred); let client = TikvClient::new(channel); - let status = client.kv_get(&GetRequest::default()); - assert!(status.is_ok()); + client.kv_get(&GetRequest::default()).unwrap(); } #[test] diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index ac9139a6374..84a4de39b25 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -52,7 +52,7 @@ fn test_region_meta_endpoint() { ) .unwrap(); let addr = format!("127.0.0.1:{}", test_util::alloc_port()); - assert!(status_server.start(addr).is_ok()); + status_server.start(addr).unwrap(); let check_task = check(status_server.listening_addr(), region_id); let rt = tokio::runtime::Runtime::new().unwrap(); if let Err(err) = rt.block_on(check_task) { diff --git a/tests/integrations/storage/test_raft_storage.rs b/tests/integrations/storage/test_raft_storage.rs index f828870e964..ef1ee5402e6 100644 --- a/tests/integrations/storage/test_raft_storage.rs +++ b/tests/integrations/storage/test_raft_storage.rs @@ -98,8 +98,9 @@ fn test_raft_storage_get_after_lease() { #[test] fn test_raft_storage_rollback_before_prewrite() { let (_cluster, storage, ctx) = new_raft_storage(); - let ret = storage.rollback(ctx.clone(), vec![Key::from_raw(b"key")], 10); - assert!(ret.is_ok()); + storage + .rollback(ctx.clone(), vec![Key::from_raw(b"key")], 10) + .unwrap(); let ret = storage.prewrite( ctx, vec![Mutation::make_put(Key::from_raw(b"key"), b"value".to_vec())], diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index ec8bf906e1c..7b1aab71183 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -40,25 +40,21 @@ fn test_turnoff_titan() { let size = 5; for i in 0..size { - assert!( - cluster - .put( - format!("k{:02}0", i).as_bytes(), - format!("v{}", i).as_bytes(), - ) - .is_ok() - ); + cluster + .put( + format!("k{:02}0", i).as_bytes(), + format!("v{}", i).as_bytes(), + ) + .unwrap(); } cluster.must_flush_cf(CF_DEFAULT, true); for i in 0..size { - assert!( - cluster - .put( - format!("k{:02}1", i).as_bytes(), - format!("v{}", i).as_bytes(), - ) - .is_ok() - ); + cluster + .put( + format!("k{:02}1", i).as_bytes(), + format!("v{}", i).as_bytes(), + ) + .unwrap(); } cluster.must_flush_cf(CF_DEFAULT, true); for i in cluster.get_node_ids().into_iter() { @@ -96,7 +92,7 @@ fn test_turnoff_titan() { for i in cluster.get_node_ids().into_iter() { let db = cluster.get_engine(i); let opt = vec![("blob_run_mode", "kFallback")]; - assert!(db.set_options_cf(CF_DEFAULT, &opt).is_ok()); + db.set_options_cf(CF_DEFAULT, &opt).unwrap(); } cluster.compact_data(); let mut all_check_pass = true; From 940e1395869e2d92aa91eb2d59380ce894125b70 Mon Sep 17 00:00:00 2001 From: 5kbpers Date: Fri, 29 Jul 2022 18:41:13 +0800 Subject: [PATCH 0123/1149] raftstore: use force_send to send ApplyRes (#13168) close tikv/tikv#13160 Use force_send to send ApplyRes Signed-off-by: 5kbpers Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/apply.rs | 1 + components/raftstore/src/store/fsm/store.rs | 14 +++++--- components/tikv_util/src/mpsc/mod.rs | 7 +++- tests/failpoints/cases/test_split_region.rs | 39 ++++++++++++++++++++- 4 files changed, 55 insertions(+), 6 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 3b9546a460c..938ea526894 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -644,6 +644,7 @@ where let is_synced = self.write_to_db(); if !self.apply_res.is_empty() { + fail_point!("before_nofity_apply_res"); let apply_res = mem::take(&mut self.apply_res); self.notifier.notify(apply_res); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 635ff2c6693..28abf24083b 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -292,16 +292,21 @@ where { fn notify(&self, apply_res: Vec>) { for r in apply_res { - self.router.try_send( - r.region_id, + let region_id = r.region_id; + if let Err(e) = self.router.force_send( + region_id, PeerMsg::ApplyRes { res: ApplyTaskRes::Apply(r), }, - ); + ) { + error!("failed to send apply result"; "region_id" => region_id, "err" => ?e); + } } } fn notify_one(&self, region_id: u64, msg: PeerMsg) { - self.router.try_send(region_id, msg); + if let Err(e) = self.router.force_send(region_id, msg) { + error!("failed to notify apply msg"; "region_id" => region_id, "err" => ?e); + } } fn clone_box(&self) -> Box> { @@ -795,6 +800,7 @@ impl PollHandler, St where for<'a> F: FnOnce(&'a BatchSystemConfig), { + fail_point!("begin_raft_poller"); self.previous_metrics = self.poll_ctx.raft_metrics.ready.clone(); self.poll_ctx.pending_count = 0; self.poll_ctx.ready_count = 0; diff --git a/components/tikv_util/src/mpsc/mod.rs b/components/tikv_util/src/mpsc/mod.rs index fbd089ebb9e..ccec5448d0b 100644 --- a/components/tikv_util/src/mpsc/mod.rs +++ b/components/tikv_util/src/mpsc/mod.rs @@ -17,6 +17,7 @@ use std::{ use crossbeam::channel::{ self, RecvError, RecvTimeoutError, SendError, TryRecvError, TrySendError, }; +use fail::fail_point; struct State { sender_cnt: AtomicIsize, @@ -236,7 +237,11 @@ impl LooseBoundedSender { #[inline] pub fn try_send(&self, t: T) -> Result<(), TrySendError> { let cnt = self.tried_cnt.get(); - if cnt < CHECK_INTERVAL { + let check_interval = || { + fail_point!("loose_bounded_sender_check_interval", |_| 0); + CHECK_INTERVAL + }; + if cnt < check_interval() { self.tried_cnt.set(cnt + 1); } else if self.len() < self.limit { self.tried_cnt.set(1); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 6a67e83ef1b..aab1fe3d879 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -21,7 +21,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::{ - store::{config::Config as RaftstoreConfig, util::is_vote_msg, Callback}, + store::{config::Config as RaftstoreConfig, util::is_vote_msg, Callback, PeerMsg}, Result, }; use test_raftstore::*; @@ -1061,3 +1061,40 @@ fn test_split_replace_skip_log_gc() { cluster.must_put(b"k4", b"v4"); must_get_equal(&cluster.get_engine(2), b"k4", b"v4"); } + +#[test] +fn test_split_store_channel_full() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.notify_capacity = 10; + cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); + cluster.cfg.raft_store.messages_per_tick = 1; + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + let region = pd_client.get_region(b"k2").unwrap(); + let apply_fp = "before_nofity_apply_res"; + fail::cfg(apply_fp, "pause").unwrap(); + let (tx, rx) = mpsc::channel(); + cluster.split_region( + ®ion, + b"k2", + Callback::write(Box::new(move |_| tx.send(()).unwrap())), + ); + rx.recv().unwrap(); + let sender_fp = "loose_bounded_sender_check_interval"; + fail::cfg(sender_fp, "return").unwrap(); + let store_fp = "begin_raft_poller"; + fail::cfg(store_fp, "pause").unwrap(); + let raft_router = cluster.sim.read().unwrap().get_router(1).unwrap(); + for _ in 0..50 { + raft_router.force_send(1, PeerMsg::Noop).unwrap(); + } + fail::remove(apply_fp); + fail::remove(store_fp); + sleep_ms(300); + let region = pd_client.get_region(b"k1").unwrap(); + assert_ne!(region.id, 1); + fail::remove(sender_fp); +} From f96c66015da0961a5e2836c1e72d165004b471e6 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 29 Jul 2022 21:45:13 +0800 Subject: [PATCH 0124/1149] metrics: fix wrong expression for cdc cpu usage (#13148) close tikv/tikv#13147 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 5da0ca7c0d3..8189e45d3d2 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -7717,7 +7717,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$tikv_instance\", name=~\"cdcwkr.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cdcwkr.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - worker", @@ -7725,7 +7725,7 @@ "step": 4 }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$tikv_instance\", name=~\"tso\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"tso\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - tso", @@ -7823,7 +7823,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$tikv_instance\", name=~\"cdc_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cdc_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", From 9120fe675cf6389d880959caf101726869a9a92e Mon Sep 17 00:00:00 2001 From: haojinming Date: Mon, 1 Aug 2022 10:18:04 +0800 Subject: [PATCH 0125/1149] CDC: fix rawkv resolved ts issue (#13142) close tikv/tikv#13144 Signed-off-by: haojinming --- components/cdc/src/delegate.rs | 57 ++++++- components/cdc/src/endpoint.rs | 84 +++++++-- components/cdc/src/initializer.rs | 1 + components/cdc/src/observer.rs | 273 +++++++++++++++++++++++++++++- components/cdc/tests/mod.rs | 2 +- components/server/src/server.rs | 2 +- 6 files changed, 391 insertions(+), 28 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 10de563c4fc..f6ef0659fe0 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -24,7 +24,7 @@ use kvproto::{ }, }; use raftstore::{ - coprocessor::{Cmd, CmdBatch, ObserveHandle}, + coprocessor::{Cmd, CmdBatch, ObserveHandle, ObserveID}, store::util::compare_region_epoch, Error as RaftStoreError, }; @@ -614,19 +614,20 @@ impl Delegate { rows.push(v); } self.sink_downstream(rows, index, ChangeDataRequestKvApi::TiDb)?; - self.sink_raw_downstream(raw_rows, index) + self.sink_downstream(raw_rows, index, ChangeDataRequestKvApi::RawKv) } - fn sink_raw_downstream(&mut self, entries: Vec, index: u64) -> Result<()> { - if entries.is_empty() { - return Ok(()); + pub fn raw_untrack_ts(&mut self, cdc_id: ObserveID, max_ts: TimeStamp) { + // Stale CmdBatch, drop it silently. + if cdc_id != self.handle.id { + return; } // the entry's timestamp is non-decreasing, the last has the max ts. - let max_raw_ts = TimeStamp::from(entries.last().unwrap().commit_ts); + // use prev ts, see reason at CausalObserver::pre_propose_query + let max_raw_ts = max_ts.prev(); match self.resolver { Some(ref mut resolver) => { - // use prev ts, see reason at CausalObserver::pre_propose_query - resolver.raw_untrack_lock(max_raw_ts.prev()); + resolver.raw_untrack_lock(max_raw_ts); } None => { assert!(self.pending.is_some(), "region resolver not ready"); @@ -636,7 +637,6 @@ impl Delegate { .push(PendingLock::RawUntrack { ts: max_raw_ts }); } } - self.sink_downstream(entries, index, ChangeDataRequestKvApi::RawKv) } pub fn raw_track_ts(&mut self, ts: TimeStamp) { @@ -908,6 +908,16 @@ impl Delegate { // To inform transaction layer no more old values are required for the region. self.txn_extra_op.store(TxnExtraOp::Noop); } + + // if raw data and tidb data both exist in this region, it will return false. + pub fn is_raw_region(&self) -> bool { + if let Some(region) = &self.region { + ApiV2::parse_range_mode((Some(®ion.start_key), Some(®ion.end_key))) + == KeyMode::Raw + } else { + false + } + } } fn set_event_row_type(row: &mut EventRow, ty: EventLogType) { @@ -1265,4 +1275,33 @@ mod tests { } } } + + #[test] + fn test_is_raw_region() { + let region_id = 10; + let mut region = Region::default(); + region.set_id(region_id); + + // start-key, end-key, is_raw + let test_cases = vec![ + (vec![b'r', 0, 0, 0, b'a'], vec![b'r', 0, 0, 0, b'z'], true), + (vec![b'a', 0, 0, 0, b'a'], vec![b'r', 0, 0, 0, b'z'], false), + (vec![b'r', 0, 0, 0, b'a'], vec![b'z', 0, 0, 0, b'z'], false), + (vec![b'r', 0, 0, 0, b'a'], vec![b's'], true), + (vec![b'r', 0, 0, 0, b'a'], vec![], false), + (vec![], vec![], false), + ]; + for (start_key, end_key, is_raw) in &test_cases { + region.set_start_key(start_key.clone()); + region.set_end_key(end_key.clone()); + let resolver = Resolver::new(region_id); + let mut delegate = Delegate::new(region_id, Default::default()); + assert!( + delegate + .on_region_ready(resolver, region.clone()) + .is_empty() + ); + assert_eq!(delegate.is_raw_region(), *is_raw); + } + } } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 22cb5b94922..4a957774a23 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -59,6 +59,7 @@ use crate::{ delegate::{on_init_downstream, Delegate, Downstream, DownstreamID, DownstreamState}, initializer::Initializer, metrics::*, + observer::RawRegionTs, old_value::{OldValueCache, OldValueCallback}, service::{Conn, ConnID, FeatureGate}, CdcObserver, Error, @@ -177,6 +178,9 @@ pub enum Task { region_id: u64, ts: TimeStamp, }, + RawUntrackTs { + raw_region_ts: Vec, + }, } impl_display_as_debug!(Task); @@ -256,6 +260,10 @@ impl fmt::Debug for Task { .field("region_id", ®ion_id) .field("ts", &ts) .finish(), + Task::RawUntrackTs { ref raw_region_ts } => de + .field("type", &"raw_untrack_ts") + .field("raw_ts", raw_region_ts) + .finish(), } } } @@ -859,6 +867,19 @@ impl, E: KvEngine> Endpoint { flush_oldvalue_stats(&statistics, TAG_DELTA_CHANGE); } + pub fn on_raw_untrack_ts(&mut self, batch_region_ts: Vec) { + for region_ts in batch_region_ts { + let region_id = region_ts.region_id; + if let Some(delegate) = self.capture_regions.get_mut(®ion_id) { + if delegate.has_failed() { + // Skip the batch if the delegate has failed. + continue; + } + delegate.raw_untrack_ts(region_ts.cdc_id, region_ts.max_ts); + } + } + } + fn on_region_ready(&mut self, observe_id: ObserveID, resolver: Resolver, region: Region) { let region_id = region.get_id(); let mut failed_downstreams = Vec::new(); @@ -954,7 +975,7 @@ impl, E: KvEngine> Endpoint { // The judge of raw region is not accuracy here, and we may miss at most one // "normal" raw region. But this will not break the correctness of outlier // detection. - if resolved_ts.is_min_ts_from_raw() { + if resolved_ts.is_min_ts_from_raw() || delegate.is_raw_region() { raw_resolved_regions.push(region_id, resolved_ts.raw_ts) } @@ -1335,6 +1356,7 @@ impl, E: KvEngine> Runnable for Endpoint { }, Task::ChangeConfig(change) => self.on_change_cfg(change), Task::RawTrackTs { region_id, ts } => self.on_raw_track_ts(region_id, ts), + Task::RawUntrackTs { raw_region_ts } => self.on_raw_untrack_ts(raw_region_ts), } } } @@ -1507,7 +1529,7 @@ mod tests { .unwrap() .kv_engine() }), - CdcObserver::new(task_sched), + CdcObserver::new(task_sched, api_version), Arc::new(StdMutex::new(StoreMeta::new(0))), ConcurrencyManager::new(1.into()), Arc::new(Environment::new(1)), @@ -2109,6 +2131,13 @@ mod tests { let ts = TimeStamp::compose(i, 0); suite.run(Task::RawTrackTs { region_id, ts }); } + suite.run(Task::RawUntrackTs { + raw_region_ts: vec![RawRegionTs { + region_id, + cdc_id: observe_id, + max_ts: TimeStamp::compose(125, 0), + }], + }); // untrack ts before 125 let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); // region is not ready, so raw lock in resolver, raw ts is added to // delegate.pending. @@ -2131,7 +2160,7 @@ mod tests { let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); let resolver = delegate.resolver.as_mut().unwrap(); let raw_resolved_ts = resolver.resolve(TimeStamp::compose(200, 0)).min(); - assert_eq!(raw_resolved_ts, TimeStamp::compose(100, 0)); + assert_eq!(raw_resolved_ts, TimeStamp::compose(125, 0)); } #[test] @@ -2144,7 +2173,7 @@ mod tests { let quota = crate::channel::MemoryQuota::new(usize::MAX); let (tx, _) = channel::channel(1, quota); let mut region_cnt = 0; - let mut start_ts: u64 = 200; + let start_ts: u64 = 200; let region_ids: Vec = (1..50).collect(); let dead_lock_region = 1; let dead_lock_ts = TimeStamp::compose(1, 0); @@ -2185,6 +2214,8 @@ mod tests { let mut region = Region::default(); region.id = region_id; region.set_region_epoch(region_epoch); + region.set_start_key(vec![b'r', 0, 0, 0, b'a']); + region.set_end_key(vec![b'r', 0, 0, 0, b'z']); let resolver = Resolver::new(region_id); suite.run(Task::ResolverReady { observe_id, @@ -2200,14 +2231,17 @@ mod tests { let ts = if region_id == dead_lock_region { dead_lock_ts } else { - TimeStamp::compose(start_ts, 0) + TimeStamp::compose(start_ts + 1, 0) }; - start_ts += 1; - suite.run(Task::RawTrackTs { region_id, ts }); - let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); - let resolver = delegate.resolver.as_mut().unwrap(); - let raw_resolved_ts = resolver.resolve(cur_tso).min(); - assert_eq!(raw_resolved_ts, ts); + // Only 9 region is min_ts_from_raw, but other regions are raw regions, + // Them can also be counted. + if region_id < 10 { + suite.run(Task::RawTrackTs { region_id, ts }); + let delegate = suite.endpoint.capture_regions.get_mut(®ion_id).unwrap(); + let resolver = delegate.resolver.as_mut().unwrap(); + let raw_resolved_ts = resolver.resolve(cur_tso).min(); + assert_eq!(raw_resolved_ts, ts); + } } let ob_id = suite .endpoint @@ -2245,6 +2279,34 @@ mod tests { .is_none(), true ); + let untrack_region_id = 20; + let cdc_id = suite + .endpoint + .capture_regions + .get(&untrack_region_id) + .unwrap() + .handle + .id; + let region_ts = RawRegionTs { + region_id: untrack_region_id, + cdc_id, + max_ts: TimeStamp::compose(1000, 0), + }; + suite.run(Task::RawUntrackTs { + raw_region_ts: vec![region_ts], + }); + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + let delegate = suite + .endpoint + .capture_regions + .get_mut(&untrack_region_id) + .unwrap(); + let resolver = delegate.resolver.as_mut().unwrap(); + let raw_resolved_ts = resolver.resolve(cur_tso).min(); + assert_eq!(raw_resolved_ts, cur_tso); // region is untracked. } #[test] diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 3be509e73d0..98720b7cf0c 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -237,6 +237,7 @@ impl Initializer { Scanner::TxnKvScanner(txnkv_scanner) } else { let mut iter_opt = IterOptions::default(); + iter_opt.set_fill_cache(false); let (raw_key_prefix, raw_key_prefix_end) = ApiV2::get_rawkv_range(); iter_opt.set_lower_bound(&[raw_key_prefix], DATA_KEY_PREFIX_LEN); iter_opt.set_upper_bound(&[raw_key_prefix_end], DATA_KEY_PREFIX_LEN); diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index 18b4d995077..124757d7697 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -2,16 +2,21 @@ use std::sync::{Arc, RwLock}; +use api_version::{ApiV2, KeyMode, KvFormat}; use causal_ts::{Error as CausalTsError, RawTsTracker, Result as CausalTsResult}; use collections::HashMap; use engine_traits::KvEngine; use fail::fail_point; -use kvproto::metapb::{Peer, Region}; +use kvproto::{ + kvrpcpb::ApiVersion, + metapb::{Peer, Region}, + raft_cmdpb::CmdType, +}; use raft::StateRole; use raftstore::{coprocessor::*, store::RegionSnapshot, Error as RaftStoreError}; use tikv::storage::Statistics; -use tikv_util::{box_err, error, warn, worker::Scheduler}; -use txn_types::TimeStamp; +use tikv_util::{box_err, defer, error, warn, worker::Scheduler}; +use txn_types::{Key, TimeStamp}; use crate::{ endpoint::{Deregister, Task}, @@ -19,6 +24,14 @@ use crate::{ Error as CdcError, }; +// max_ts presents the max ts in one batch. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct RawRegionTs { + pub region_id: u64, + pub cdc_id: ObserveID, + pub max_ts: TimeStamp, +} + /// An Observer for CDC. /// /// It observes raftstore internal events, such as: @@ -30,6 +43,7 @@ pub struct CdcObserver { // A shared registry for managing observed regions. // TODO: it may become a bottleneck, find a better way to manage the registry. observe_regions: Arc>>, + api_version: ApiVersion, } impl CdcObserver { @@ -37,10 +51,11 @@ impl CdcObserver { /// /// Events are strong ordered, so `sched` must be implemented as /// a FIFO queue. - pub fn new(sched: Scheduler) -> CdcObserver { + pub fn new(sched: Scheduler, api_version: ApiVersion) -> CdcObserver { CdcObserver { sched, observe_regions: Arc::default(), + api_version, } } @@ -91,6 +106,66 @@ impl CdcObserver { .get(®ion_id) .cloned() } + + fn untrack_raw_ts(&self, raw_region_ts: Vec) { + if raw_region_ts.is_empty() { + return; + } + if let Err(e) = self.sched.schedule(Task::RawUntrackTs { raw_region_ts }) { + warn!("cdc schedule task failed"; "error" => ?e); + } + } + + // parse rawkv cmd from CmdBatch Vec and return the max ts of every region. + pub fn get_raw_region_ts(&self, cmd_batches: &Vec) -> Vec { + if self.api_version != ApiVersion::V2 { + return vec![]; + } + let mut region_ts = vec![]; + for batch in cmd_batches { + if batch.is_empty() { + continue; + } + let region_id = batch.region_id; + let cdc_id = batch.cdc_id; + if !self + .is_subscribed(region_id) + .map_or(false, |ob_id| ob_id == cdc_id) + { + continue; + } + // Find the max ts in one batch + // The raw request's ts is non-decreasing, only need find the last one. + batch.cmds.iter().rfind(|cmd| { + if let Some(last_key) = cmd + .request + .get_requests() + .iter() + .rfind(|req| { + CmdType::Put == req.get_cmd_type() + && ApiV2::parse_key_mode(req.get_put().get_key()) == KeyMode::Raw + }) + .map(|req| req.get_put().get_key()) + { + match ApiV2::decode_raw_key_owned(Key::from_encoded_slice(last_key), true) { + Ok((_, ts)) => { + region_ts.push(RawRegionTs { + region_id, + cdc_id, + max_ts: ts.unwrap(), + }); + } + // error is ignored, raw dead lock is resolved in Endpoint::on_min_ts + Err(e) => warn!("decode raw key fails"; "err" => ?e), + } + true + } else { + false + } + }); + } + region_ts + } } impl Coprocessor for CdcObserver {} @@ -106,6 +181,13 @@ impl CmdObserver for CdcObserver { ) { assert!(!cmd_batches.is_empty()); fail_point!("before_cdc_flush_apply"); + + // Untrack raw ts regardless of the ob level. + // Because RawKV locks is tracked regardless of observe level as it is in Raft + // propose procedure and can not get an accurate observe level. + let raw_region_ts = self.get_raw_region_ts(cmd_batches); + defer!(self.untrack_raw_ts(raw_region_ts)); + if max_level < ObserveLevel::All { return; } @@ -219,7 +301,11 @@ mod tests { use std::time::Duration; use engine_rocks::RocksEngine; - use kvproto::metapb::Region; + use engine_traits::CF_WRITE; + use kvproto::{ + metapb::Region, + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse, Request}, + }; use raftstore::{coprocessor::RoleChange, store::util::new_peer}; use tikv::storage::kv::TestEngineBuilder; @@ -228,7 +314,7 @@ mod tests { #[test] fn test_register_and_deregister() { let (scheduler, mut rx) = tikv_util::worker::dummy_scheduler(); - let observer = CdcObserver::new(scheduler); + let observer = CdcObserver::new(scheduler, ApiVersion::V1); let observe_info = CmdObserveInfo::from_handle( ObserveHandle::new(), ObserveHandle::new(), @@ -368,4 +454,179 @@ mod tests { observer.on_role_change(&mut ctx, &RoleChange::new(StateRole::Follower)); rx.recv_timeout(Duration::from_millis(10)).unwrap_err(); } + + fn put_cf(cf: &str, key: &[u8], value: &[u8]) -> Request { + let mut cmd = Request::default(); + cmd.set_cmd_type(CmdType::Put); + cmd.mut_put().set_cf(cf.to_owned()); + cmd.mut_put().set_key(key.to_vec()); + cmd.mut_put().set_value(value.to_vec()); + cmd + } + + #[test] + fn test_get_raw_region_ts() { + let (scheduler, mut rx) = tikv_util::worker::dummy_scheduler(); + let observer = CdcObserver::new(scheduler, ApiVersion::V2); + let region_id = 1; + let mut cmd = Cmd::new(0, 0, RaftCmdRequest::default(), RaftCmdResponse::default()); + cmd.request.mut_requests().clear(); + // Both cdc and resolved-ts worker are observing + let observe_info = CmdObserveInfo::from_handle( + ObserveHandle::new(), + ObserveHandle::new(), + ObserveHandle::default(), + ); + let mut cb = CmdBatch::new(&observe_info, region_id); + cb.push(&observe_info, region_id, cmd.clone()); + let cmd_batches = vec![cb]; + let ret = observer.get_raw_region_ts(&cmd_batches); + assert!(ret.is_empty()); + + let data = vec![put_cf(CF_WRITE, b"k7", b"v"), put_cf(CF_WRITE, b"k8", b"v")]; + for put in &data { + cmd.request.mut_requests().push(put.clone()); + } + let mut cb = CmdBatch::new(&observe_info, region_id); + cb.push(&observe_info, region_id, cmd.clone()); + let cmd_batches = vec![cb]; + let ret = observer.get_raw_region_ts(&cmd_batches); + assert!(ret.is_empty()); // no apiv2 key + cmd.request.mut_requests().clear(); + let data = vec![ + put_cf( + CF_WRITE, + ApiV2::encode_raw_key(b"ra", Some(TimeStamp::from(100))).as_encoded(), + b"v1", + ), + put_cf( + CF_WRITE, + ApiV2::encode_raw_key(b"rb", Some(TimeStamp::from(200))).as_encoded(), + b"v2", + ), + ]; + for put in &data { + cmd.request.mut_requests().push(put.clone()); + } + let mut cb1 = CmdBatch::new(&observe_info, region_id); + cb1.push(&observe_info, region_id, cmd.clone()); + let mut cmd2 = Cmd::new(0, 0, RaftCmdRequest::default(), RaftCmdResponse::default()); + cmd2.request.mut_requests().clear(); + let data2 = vec![ + put_cf( + CF_WRITE, + ApiV2::encode_raw_key(b"ra", Some(TimeStamp::from(300))).as_encoded(), + b"v1", + ), + put_cf( + CF_WRITE, + ApiV2::encode_raw_key(b"rb", Some(TimeStamp::from(400))).as_encoded(), + b"v2", + ), + ]; + for put in &data2 { + cmd2.request.mut_requests().push(put.clone()); + } + let mut cb2 = CmdBatch::new(&observe_info, region_id + 1); + cb2.push(&observe_info, region_id + 1, cmd2.clone()); + let mut cmd_batches = vec![cb1.clone(), cb2.clone()]; + let ret = observer.get_raw_region_ts(&cmd_batches); + assert_eq!(ret.len(), 0); // region is not subscribed. + observer.subscribe_region(region_id, observe_info.cdc_id.id); + observer.subscribe_region(region_id + 1, observe_info.cdc_id.id); + let ret = observer.get_raw_region_ts(&cmd_batches); + assert_eq!(ret.len(), 2); // two batch and both subscribed. + assert_eq!( + ret[0], + RawRegionTs { + region_id, + cdc_id: observe_info.cdc_id.id, + max_ts: TimeStamp::from(200) + } + ); + assert_eq!( + ret[1], + RawRegionTs { + region_id: region_id + 1, + cdc_id: observe_info.cdc_id.id, + max_ts: TimeStamp::from(400) + } + ); + let engine = TestEngineBuilder::new().build().unwrap().get_rocksdb(); + >::on_flush_applied_cmd_batch( + &observer, + ObserveLevel::LockRelated, + &mut cmd_batches, + &engine, + ); + // schedule task even if max level is not `All`. + match rx + .recv_timeout(Duration::from_millis(100)) + .unwrap() + .unwrap() + { + Task::RawUntrackTs { raw_region_ts } => { + assert_eq!(raw_region_ts.len(), 2); // two batch and both subscribed. + assert_eq!( + raw_region_ts[0], + RawRegionTs { + region_id, + cdc_id: observe_info.cdc_id.id, + max_ts: TimeStamp::from(200) + } + ); + assert_eq!( + raw_region_ts[1], + RawRegionTs { + region_id: region_id + 1, + cdc_id: observe_info.cdc_id.id, + max_ts: TimeStamp::from(400) + } + ); + } + _ => panic!("unexpected task"), + }; + + // non-rawkv + let data3 = vec![ + put_cf( + CF_WRITE, + ApiV2::encode_raw_key(b"ra", Some(TimeStamp::from(500))).as_encoded(), + b"v1", + ), + put_cf( + CF_WRITE, // this is non-rawkv + ApiV2::encode_raw_key(b"b", Some(TimeStamp::from(600))).as_encoded(), + b"v2", + ), + ]; + let mut cmd3 = Cmd::new(0, 0, RaftCmdRequest::default(), RaftCmdResponse::default()); + for put in &data3 { + cmd3.request.mut_requests().push(put.clone()); + } + cb2.push(&observe_info, region_id + 1, cmd3.clone()); + let cmd_batches = vec![cb1, cb2]; + let ret = observer.get_raw_region_ts(&cmd_batches); + assert_eq!(ret.len(), 2); // two batch and both subscribed. + assert_eq!( + ret[0], + RawRegionTs { + region_id, + cdc_id: observe_info.cdc_id.id, + max_ts: TimeStamp::from(200) + } + ); + assert_eq!( + ret[1], + RawRegionTs { + region_id: region_id + 1, + cdc_id: observe_info.cdc_id.id, + max_ts: TimeStamp::from(500) // 600 is not rawkey + } + ); + let (scheduler, _) = tikv_util::worker::dummy_scheduler(); + let observer = CdcObserver::new(scheduler, ApiVersion::V1); + let ret = observer.get_raw_region_ts(&cmd_batches); + assert!(ret.is_empty()); // v1 does nothing. + } } diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 25283951450..63c06551a80 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -156,7 +156,7 @@ impl TestSuiteBuilder { Arc::new(cdc::CdcTxnExtraScheduler::new(worker.scheduler().clone())), ); let scheduler = worker.scheduler(); - let cdc_ob = cdc::CdcObserver::new(scheduler.clone()); + let cdc_ob = cdc::CdcObserver::new(scheduler.clone(), ApiVersion::V1); obs.insert(id, cdc_ob.clone()); sim.coprocessor_hooks.entry(id).or_default().push(Box::new( move |host: &mut CoprocessorHost| { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 425acf6e15c..d8824453a24 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -802,7 +802,7 @@ impl TiKvServer { } // Register cdc. - let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); + let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone(), F::TAG); cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( From 829e5396cb8741ae8b5b33a7d2ebed33d46fd7ed Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Mon, 1 Aug 2022 15:04:05 +0800 Subject: [PATCH 0126/1149] charset: update the error message about can not convert error (#13155) close tikv/tikv#13156 Signed-off-by: xiongjiwei Co-authored-by: Ti Chi Robot --- .../src/codec/collation/encoding/ascii.rs | 5 ++++- .../src/codec/collation/encoding/gbk.rs | 5 ++++- .../src/codec/collation/encoding/mod.rs | 21 +++++++++++++++++++ .../src/codec/collation/encoding/utf8.rs | 5 ++++- .../tidb_query_datatype/src/codec/error.rs | 4 ++-- 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/ascii.rs b/components/tidb_query_datatype/src/codec/collation/encoding/ascii.rs index fac8c8f3b58..be1b91ae1ea 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/ascii.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/ascii.rs @@ -20,7 +20,10 @@ impl Encoding for EncodingAscii { fn decode(data: BytesRef<'_>) -> Result { for x in data { if !x.is_ascii() { - return Err(Error::cannot_convert_string("ascii")); + return Err(Error::cannot_convert_string( + format_invalid_char(data).as_str(), + "ascii", + )); } } Ok(Bytes::from(data)) diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs index 26f61da7536..43a6289e640 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs @@ -13,7 +13,10 @@ impl Encoding for EncodingGBK { fn decode(data: BytesRef<'_>) -> Result { match GBK.decode_without_bom_handling_and_without_replacement(data) { Some(v) => Ok(Bytes::from(v.as_bytes())), - None => Err(Error::cannot_convert_string("gbk")), + None => Err(Error::cannot_convert_string( + format_invalid_char(data).as_str(), + "gbk", + )), } } diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs b/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs index 2647446ab7f..b2434105ce5 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs @@ -15,3 +15,24 @@ use crate::codec::{ data_type::{Bytes, BytesRef}, Error, Result, }; + +fn format_invalid_char(data: BytesRef<'_>) -> String { + // Max length of the invalid string is '\x00\x00\x00\x00\x00...'(25) we set 32 + // here. + let mut buf = String::with_capacity(32); + const MAX_BYTES_TO_SHOW: usize = 5; + buf.push('\''); + for i in 0..data.len() { + if i > MAX_BYTES_TO_SHOW { + buf.push_str("..."); + break; + } + if data[i].is_ascii() { + buf.push(char::from(data[i])); + } else { + buf.push_str(format!("\\x{:X}", data[i]).as_str()); + } + } + buf.push('\''); + buf +} diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs b/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs index d06bf49c025..e83d6e3eb22 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/utf8.rs @@ -11,7 +11,10 @@ impl Encoding for T { fn decode(data: BytesRef<'_>) -> Result { match str::from_utf8(data) { Ok(v) => Ok(Bytes::from(v)), - Err(_) => Err(Error::cannot_convert_string(T::NAME)), + Err(_) => Err(Error::cannot_convert_string( + format_invalid_char(data).as_str(), + T::NAME, + )), } } } diff --git a/components/tidb_query_datatype/src/codec/error.rs b/components/tidb_query_datatype/src/codec/error.rs index 9cb0ee50d18..23e76a124b8 100644 --- a/components/tidb_query_datatype/src/codec/error.rs +++ b/components/tidb_query_datatype/src/codec/error.rs @@ -95,8 +95,8 @@ impl Error { } } - pub fn cannot_convert_string(charset: &str) -> Error { - let msg = format!("cannot convert string from binary to {}", charset); + pub fn cannot_convert_string(s: &str, charset: &str) -> Error { + let msg = format!("Cannot convert string {} from binary to {}", s, charset); Error::Eval(msg, ERR_CANNOT_CONVERT_STRING) } From 677548c4ea1676a944cc650eb82275eaee41f551 Mon Sep 17 00:00:00 2001 From: 5kbpers Date: Mon, 1 Aug 2022 23:42:05 +0800 Subject: [PATCH 0127/1149] raftstore: make `UNREACHABLE_BACKOFF` configurable (#13193) close tikv/tikv#13054 make `UNREACHABLE_BACKOFF` configurable. Signed-off-by: 5kbpers --- components/raftstore/src/store/config.rs | 3 +++ components/raftstore/src/store/fsm/store.rs | 6 +++--- components/raftstore/src/store/worker/check_leader.rs | 2 +- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 6b59eaf71bb..32141a23542 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -283,6 +283,8 @@ pub struct Config { #[doc(hidden)] pub max_snapshot_file_raw_size: ReadableSize, + + pub unreachable_backoff: ReadableDuration, } impl Default for Config { @@ -372,6 +374,7 @@ impl Default for Config { renew_leader_lease_advance_duration: ReadableDuration::secs(0), report_region_buckets_tick_interval: ReadableDuration::secs(10), max_snapshot_file_raw_size: ReadableSize::mb(100), + unreachable_backoff: ReadableDuration::secs(10), } } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 28abf24083b..52d9bebd0ab 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -105,7 +105,6 @@ use crate::{ type Key = Vec; pub const PENDING_MSG_CAP: usize = 100; -const UNREACHABLE_BACKOFF: Duration = Duration::from_secs(10); const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); pub const MULTI_FILES_SNAPSHOT_FEATURE: Feature = Feature::require(6, 1, 0); // it only makes sense for large region @@ -2682,13 +2681,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER fn on_store_unreachable(&mut self, store_id: u64) { let now = Instant::now(); + let unreachable_backoff = self.ctx.cfg.unreachable_backoff.0; if self .fsm .store .last_unreachable_report .get(&store_id) - .map_or(UNREACHABLE_BACKOFF, |t| now.saturating_duration_since(*t)) - < UNREACHABLE_BACKOFF + .map_or(unreachable_backoff, |t| now.saturating_duration_since(*t)) + < unreachable_backoff { return; } diff --git a/components/raftstore/src/store/worker/check_leader.rs b/components/raftstore/src/store/worker/check_leader.rs index 355dca4f168..8821bb6118d 100644 --- a/components/raftstore/src/store/worker/check_leader.rs +++ b/components/raftstore/src/store/worker/check_leader.rs @@ -83,7 +83,7 @@ impl Runner { meta.region_ranges // get overlapped regions .range((Excluded(start_key), Unbounded)) - .take_while(|(_, id)| end_key > enc_start_key(&meta.regions[id])) + .take_while(|(_, id)| end_key > enc_start_key(&meta.regions[*id])) // get the min `safe_ts` .map(|(_, id)| { registry.get(id).unwrap().safe_ts() diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index e8449624a0f..2988b0cf0a3 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -244,6 +244,7 @@ fn test_serde_custom_tikv_config() { reactive_memory_lock_timeout_tick: 8, report_region_buckets_tick_interval: ReadableDuration::secs(1234), max_snapshot_file_raw_size: ReadableSize::gb(10), + unreachable_backoff: ReadableDuration::secs(111), }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index ea9cf8e4062..0221446683a 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -219,6 +219,7 @@ reactive-memory-lock-timeout-tick = 8 report-min-resolved-ts-interval = "233ms" report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" +unreachable-backoff = "111s" [coprocessor] split-region-on-table = false From 4dbb057238b45b31658eba1111978d6c87fb09b8 Mon Sep 17 00:00:00 2001 From: cosven Date: Tue, 2 Aug 2022 11:30:05 +0800 Subject: [PATCH 0128/1149] raftstore: add metrics/logs to help debug high commit log duration (#13120) ref tikv/tikv#13060, ref tikv/tikv#13078 In some cases, such as the one mentioned in #13078, the commit log duration became high. In the case, the needed log is not in entry cache and there are many raftlog async fetch tasks. This commit adds a log to show the cache first index and peers' progress when there is any long uncommitted proposal. It also adds a metric to show the duration of the async fetch tasks. Signed-off-by: cosven Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/router/message.rs | 3 + components/raftstore/src/store/config.rs | 15 ++ .../raftstore/src/store/entry_storage.rs | 54 ++-- components/raftstore/src/store/fsm/peer.rs | 14 + components/raftstore/src/store/fsm/store.rs | 2 + components/raftstore/src/store/metrics.rs | 8 + components/raftstore/src/store/msg.rs | 3 + components/raftstore/src/store/peer.rs | 62 +++++ metrics/grafana/tikv_details.json | 252 ++++++++++++++++++ tests/integrations/config/mod.rs | 2 + tests/integrations/config/test-custom.toml | 6 +- 11 files changed, 402 insertions(+), 19 deletions(-) diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 12041f56fe7..87187b30e75 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -145,6 +145,7 @@ pub enum PeerTick { CheckLeaderLease = 7, ReactivateMemoryLock = 8, ReportBuckets = 9, + CheckLongUncommitted = 10, } impl PeerTick { @@ -163,6 +164,7 @@ impl PeerTick { PeerTick::CheckLeaderLease => "check_leader_lease", PeerTick::ReactivateMemoryLock => "reactivate_memory_lock", PeerTick::ReportBuckets => "report_buckets", + PeerTick::CheckLongUncommitted => "check_long_uncommitted", } } @@ -178,6 +180,7 @@ impl PeerTick { PeerTick::CheckLeaderLease, PeerTick::ReactivateMemoryLock, PeerTick::ReportBuckets, + PeerTick::CheckLongUncommitted, ]; TICKS } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 32141a23542..ad89d5e7e70 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -281,6 +281,13 @@ pub struct Config { // Interval of scheduling a tick to report region buckets. pub report_region_buckets_tick_interval: ReadableDuration, + /// Interval to check long uncommitted proposals. + #[doc(hidden)] + pub check_long_uncommitted_interval: ReadableDuration, + /// Base threshold of long uncommitted proposal. + #[doc(hidden)] + pub long_uncommitted_base_threshold: ReadableDuration, + #[doc(hidden)] pub max_snapshot_file_raw_size: ReadableSize, @@ -363,6 +370,14 @@ impl Default for Config { raft_msg_flush_interval: ReadableDuration::micros(250), reactive_memory_lock_tick_interval: ReadableDuration::secs(2), reactive_memory_lock_timeout_tick: 5, + check_long_uncommitted_interval: ReadableDuration::secs(10), + /// In some cases, such as rolling upgrade, some regions' commit log + /// duration can be 12 seconds. Before #13078 is merged, + /// the commit log duration can be 2.8 minutes. So maybe + /// 20s is a relatively reasonable base threshold. Generally, + /// the log commit duration is less than 1s. Feel free to adjust + /// this config :) + long_uncommitted_base_threshold: ReadableDuration::secs(20), // They are preserved for compatibility check. region_max_size: ReadableSize(0), diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 03054cfcc16..c73e12013fe 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -20,7 +20,7 @@ use kvproto::raft_serverpb::{RaftApplyState, RaftLocalState}; use protobuf::Message; use raft::{prelude::*, util::limit_size, GetEntriesContext, StorageError}; use tikv_alloc::TraceEvent; -use tikv_util::{debug, info, worker::Scheduler}; +use tikv_util::{debug, info, time::Instant, warn, worker::Scheduler}; use super::{metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE}; use crate::{bytes_capacity, store::worker::RaftlogFetchTask}; @@ -364,9 +364,10 @@ impl Drop for EntryCache { } } -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub enum RaftlogFetchState { - Fetching, + // The Instant records the start time of the fetching. + Fetching(Instant), Fetched(Box), } @@ -481,26 +482,40 @@ impl EntryStorage { // None indicates cleanning the fetched result. pub fn update_async_fetch_res(&mut self, low: u64, res: Option>) { // If it's in fetching, don't clean the async fetch result. - if self.async_fetch_results.borrow().get(&low) == Some(&RaftlogFetchState::Fetching) - && res.is_none() - { - return; + if let Some(RaftlogFetchState::Fetching(_)) = self.async_fetch_results.borrow().get(&low) { + if res.is_none() { + return; + } } match res { Some(res) => { - if let Some(RaftlogFetchState::Fetched(prev)) = self + match self .async_fetch_results .borrow_mut() .insert(low, RaftlogFetchState::Fetched(res)) { - info!( - "unconsumed async fetch res"; - "region_id" => self.region_id, - "peer_id" => self.peer_id, - "res" => ?prev, - "low" => low, - ); + Some(RaftlogFetchState::Fetching(start)) => { + RAFT_ENTRY_FETCHES_TASK_DURATION_HISTOGRAM + .observe(start.saturating_elapsed_secs()); + } + Some(RaftlogFetchState::Fetched(prev)) => { + info!( + "unconsumed async fetch res"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "res" => ?prev, + "low" => low, + ); + } + _ => { + warn!( + "unknown async fetch res"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "low" => low, + ); + } } } None => { @@ -521,7 +536,7 @@ impl EntryStorage { context: GetEntriesContext, buf: &mut Vec, ) -> raft::Result { - if let Some(RaftlogFetchState::Fetching) = self.async_fetch_results.borrow().get(&low) { + if let Some(RaftlogFetchState::Fetching(_)) = self.async_fetch_results.borrow().get(&low) { // already an async fetch in flight return Err(raft::Error::Store( raft::StorageError::LogTemporarilyUnavailable, @@ -630,7 +645,7 @@ impl EntryStorage { self.raftlog_fetch_stats.async_fetch.update(|m| m + 1); self.async_fetch_results .borrow_mut() - .insert(low, RaftlogFetchState::Fetching); + .insert(low, RaftlogFetchState::Fetching(Instant::now_coarse())); self.raftlog_fetch_scheduler .schedule(RaftlogFetchTask::PeerStorage { region_id, @@ -851,6 +866,11 @@ impl EntryStorage { self.cache.is_empty() } + #[inline] + pub fn entry_cache_first_index(&self) -> Option { + self.cache.first_index() + } + /// Evict entries from the cache. pub fn evict_entry_cache(&mut self, half: bool) { if !self.is_entry_cache_empty() { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 8d5369aaefa..1d02b723cf6 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1076,6 +1076,7 @@ where PeerTick::CheckLeaderLease => self.on_check_leader_lease_tick(), PeerTick::ReactivateMemoryLock => self.on_reactivate_memory_lock_tick(), PeerTick::ReportBuckets => self.on_report_region_buckets_tick(), + PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted_tick(), } } @@ -5091,6 +5092,19 @@ where } } + fn register_check_long_uncommitted_tick(&mut self) { + self.schedule_tick(PeerTick::CheckLongUncommitted) + } + + fn on_check_long_uncommitted_tick(&mut self) { + if !self.fsm.peer.is_leader() || self.fsm.hibernate_state.group_state() == GroupState::Idle + { + return; + } + self.fsm.peer.check_long_uncommitted_proposals(self.ctx); + self.register_check_long_uncommitted_tick(); + } + fn register_check_leader_lease_tick(&mut self) { self.schedule_tick(PeerTick::CheckLeaderLease) } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 52d9bebd0ab..5235f90e156 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -555,6 +555,8 @@ where self.cfg.reactive_memory_lock_tick_interval.0; self.tick_batch[PeerTick::ReportBuckets as usize].wait_duration = self.cfg.report_region_buckets_tick_interval.0; + self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = + self.cfg.check_long_uncommitted_interval.0; } } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index a983feb7909..9691d5be0db 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -605,6 +605,14 @@ lazy_static! { pub static ref RAFT_ENTRY_FETCHES: RaftEntryFetches = auto_flush_from!(RAFT_ENTRY_FETCHES_VEC, RaftEntryFetches); + // The max task duration can be a few minutes. + pub static ref RAFT_ENTRY_FETCHES_TASK_DURATION_HISTOGRAM: Histogram = + register_histogram!( + "tikv_raftstore_entry_fetches_task_duration_seconds", + "Bucketed histogram of raft entry fetches task duration.", + exponential_buckets(0.0005, 2.0, 21).unwrap() // 500us ~ 8.7m + ).unwrap(); + pub static ref LEADER_MISSING: IntGauge = register_int_gauge!( "tikv_raftstore_leader_missing", diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index e552229aa0c..43126d1def5 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -233,6 +233,7 @@ pub enum PeerTick { CheckLeaderLease = 7, ReactivateMemoryLock = 8, ReportBuckets = 9, + CheckLongUncommitted = 10, } impl PeerTick { @@ -251,6 +252,7 @@ impl PeerTick { PeerTick::CheckLeaderLease => "check_leader_lease", PeerTick::ReactivateMemoryLock => "reactivate_memory_lock", PeerTick::ReportBuckets => "report_buckets", + PeerTick::CheckLongUncommitted => "check_long_uncommitted", } } @@ -266,6 +268,7 @@ impl PeerTick { PeerTick::CheckLeaderLease, PeerTick::ReactivateMemoryLock, PeerTick::ReportBuckets, + PeerTick::CheckLongUncommitted, ]; TICKS } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 6d309afa17f..9a8fd7d0605 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -192,6 +192,11 @@ impl ProposalQueue { None } + #[inline] + fn oldest(&self) -> Option<&Proposal> { + self.queue.front() + } + fn push(&mut self, p: Proposal) { if let Some(f) = self.queue.back() { // The term must be increasing among all log entries and the index @@ -730,6 +735,11 @@ where #[getset(get = "pub")] leader_lease: Lease, pending_reads: ReadIndexQueue, + /// Threshold of long uncommitted proposals. + /// + /// Note that this is a dynamically changing value. Check the + /// `has_long_uncommitted_proposals` method for details. + long_uncommitted_threshold: Duration, /// If it fails to send messages to leader. pub leader_unreachable: bool, @@ -937,6 +947,7 @@ where raft_max_inflight_msgs: cfg.raft_max_inflight_msgs, proposals: ProposalQueue::new(tag.clone()), pending_reads: Default::default(), + long_uncommitted_threshold: cfg.long_uncommitted_base_threshold.0, peer_cache: RefCell::new(HashMap::default()), peer_heartbeats: HashMap::default(), peers_start_pending_time: vec![], @@ -2810,6 +2821,57 @@ where fail_point!("after_send_to_apply_1003", self.peer_id() == 1003, |_| {}); } + /// Check long uncommitted proposals and log some info to help find why. + pub fn check_long_uncommitted_proposals(&mut self, ctx: &mut PollContext) { + if self.has_long_uncommitted_proposals(ctx) { + let status = self.raft_group.status(); + let mut buffer: Vec<(u64, u64, u64)> = Vec::new(); + if let Some(prs) = status.progress { + for (id, p) in prs.iter() { + buffer.push((*id, p.commit_group_id, p.matched)); + } + } + warn!( + "found long uncommitted proposals"; + "region_id" => self.region_id, + "peer_id" => self.peer.get_id(), + "progress" => ?buffer, + "cache_first_index" => ?self.get_store().entry_cache_first_index(), + "next_turn_threshold" => ?self.long_uncommitted_threshold, + ); + } + } + + /// Check if there is long uncommitted proposal. + /// + /// This will increase the threshold when a long uncommitted proposal is + /// detected, and reset the threshold when there is no long uncommitted + /// proposal. + fn has_long_uncommitted_proposals(&mut self, ctx: &mut PollContext) -> bool { + let mut has_long_uncommitted = false; + let base_threshold = ctx.cfg.long_uncommitted_base_threshold.0; + if let Some(propose_time) = self.proposals.oldest().and_then(|p| p.propose_time) { + // When a proposal was proposed with this ctx before, the current_time can be + // some. + let current_time = *ctx.current_time.get_or_insert_with(monotonic_raw_now); + let elapsed = match (current_time - propose_time).to_std() { + Ok(elapsed) => elapsed, + Err(_) => return false, + }; + // Increase the threshold for next turn when a long uncommitted proposal is + // detected. + if elapsed >= self.long_uncommitted_threshold { + has_long_uncommitted = true; + self.long_uncommitted_threshold += base_threshold; + } else if elapsed < base_threshold { + self.long_uncommitted_threshold = base_threshold; + } + } else { + self.long_uncommitted_threshold = base_threshold; + } + has_long_uncommitted + } + fn on_persist_snapshot( &mut self, ctx: &mut PollContext, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 8189e45d3d2..0291aa87590 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -7872,6 +7872,111 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 55 + }, + "hiddenSeries": false, + "id": 23763572511, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftlog_fetch.*\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Raftlog fetch Worker CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, @@ -17210,6 +17315,153 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "hiddenSeries": false, + "id": 23763572555, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:521", + "alias": "/pending-task/", + "transform": "negative-Y", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "99%", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%", + "refId": "B", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg", + "refId": "C", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(tikv_worker_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\", name=~\"raftlog-fetch-worker\"})", + "hide": false, + "interval": "", + "legendFormat": "pending-task", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Raft log async fetch task duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "decimals": null, + "format": "s", + "label": null, + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "title": "Raft Log", diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2988b0cf0a3..98bb55625fa 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -243,6 +243,8 @@ fn test_serde_custom_tikv_config() { reactive_memory_lock_tick_interval: ReadableDuration::millis(566), reactive_memory_lock_timeout_tick: 8, report_region_buckets_tick_interval: ReadableDuration::secs(1234), + check_long_uncommitted_interval: ReadableDuration::secs(1), + long_uncommitted_base_threshold: ReadableDuration::secs(1), max_snapshot_file_raw_size: ReadableSize::gb(10), unreachable_backoff: ReadableDuration::secs(111), }; diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 0221446683a..c653e9c500d 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -216,6 +216,8 @@ check-leader-lease-interval = "123ms" renew-leader-lease-advance-duration = "456ms" reactive-memory-lock-tick-interval = "566ms" reactive-memory-lock-timeout-tick = 8 +check-long-uncommitted-interval = "1s" +long-uncommitted-base-threshold = "1s" report-min-resolved-ts-interval = "233ms" report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" @@ -229,7 +231,7 @@ region-split-size = "12MB" region-max-keys = 100000 region-split-keys = 100000 consistency-check-method = "raw" -enable-region-bucket = true +enable-region-bucket = true region-bucket-size = "1MB" region-size-threshold-for-approximate = "3MB" region-bucket-merge-size-ratio = 0.4 @@ -374,7 +376,7 @@ num-levels = 4 max-bytes-for-level-multiplier = 8 compaction-style = "universal" disable-auto-compactions = true -disable-write-stall = true +disable-write-stall = true soft-pending-compaction-bytes-limit = "12GB" hard-pending-compaction-bytes-limit = "12GB" force-consistency-checks = true From 0576484eed99a6126511136f1af9ded029b9154c Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 2 Aug 2022 04:32:05 -0700 Subject: [PATCH 0129/1149] raftstore: refactor async write to allow write all states to raft engine (#13157) ref tikv/tikv#12842 In v2, all states are moved to raft engine, so it doesn't need to write to kv db anymore. Signed-off-by: Jay Lee --- .../raftstore/src/store/async_io/write.rs | 334 ++++++++++++++---- .../src/store/async_io/write_tests.rs | 188 +++++++--- .../raftstore/src/store/entry_storage.rs | 147 +++++++- components/raftstore/src/store/fsm/store.rs | 15 +- components/raftstore/src/store/mod.rs | 13 +- .../raftstore/src/store/peer_storage.rs | 157 +------- 6 files changed, 573 insertions(+), 281 deletions(-) diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 6b652670138..72fd52ea4d4 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -8,7 +8,7 @@ //! raft db and then invoking callback or sending msgs if any. use std::{ - fmt, + fmt, mem, sync::Arc, thread::{self, JoinHandle}, }; @@ -16,12 +16,11 @@ use std::{ use collections::HashMap; use crossbeam::channel::{bounded, Receiver, Sender, TryRecvError}; use engine_traits::{ - Engines, KvEngine, PerfContext, PerfContextKind, RaftEngine, RaftLogBatch, WriteBatch, - WriteOptions, + KvEngine, PerfContext, PerfContextKind, RaftEngine, RaftLogBatch, WriteBatch, WriteOptions, }; use error_code::ErrorCodeExt; use fail::fail_point; -use kvproto::raft_serverpb::{RaftLocalState, RaftMessage}; +use kvproto::raft_serverpb::{RaftApplyState, RaftLocalState, RaftMessage, RegionLocalState}; use protobuf::Message; use raft::eraftpb::Entry; use tikv_util::{ @@ -53,16 +52,16 @@ const RAFT_WB_SHRINK_SIZE: usize = 10 * 1024 * 1024; const RAFT_WB_DEFAULT_SIZE: usize = 256 * 1024; /// Notify the event to the specified region. -pub trait Notifier: Clone + Send + 'static { - fn notify_persisted(&self, region_id: u64, peer_id: u64, ready_number: u64); +pub trait PersistedNotifier: Clone + Send + 'static { + fn notify(&self, region_id: u64, peer_id: u64, ready_number: u64); } -impl Notifier for RaftRouter +impl PersistedNotifier for RaftRouter where EK: KvEngine, ER: RaftEngine, { - fn notify_persisted(&self, region_id: u64, peer_id: u64, ready_number: u64) { + fn notify(&self, region_id: u64, peer_id: u64, ready_number: u64) { if let Err(e) = self.force_send( region_id, PeerMsg::Persisted { @@ -81,6 +80,79 @@ where } } +/// Extra writes besides raft engine. +/// +/// For now, applying snapshot needs to persist some extra states. For v1, +/// these states are written to KvEngine. For v2, they are written to +/// RaftEngine. +// TODO: perhaps we should always pass states instead of a write batch even +// for v1. +pub enum ExtraWrite { + None, + V1(W), + V2(ExtraStates), +} + +impl ExtraWrite { + #[inline] + pub fn is_empty(&self) -> bool { + match self { + ExtraWrite::None => true, + ExtraWrite::V1(w) => w.is_empty(), + _ => false, + } + } + + #[inline] + fn data_size(&self) -> usize { + match self { + ExtraWrite::None => 0, + ExtraWrite::V1(w) => w.data_size(), + ExtraWrite::V2(m) => mem::size_of_val(m), + } + } + + #[inline] + pub fn ensure_v1(&mut self, write_batch: impl FnOnce() -> W) -> &mut W { + if let ExtraWrite::None = self { + *self = ExtraWrite::V1(write_batch()); + } else if let ExtraWrite::V2(_) = self { + unreachable!("v1 and v2 are mixed used"); + } + match self { + ExtraWrite::V1(w) => w, + _ => unreachable!(), + } + } + + #[inline] + pub fn v1_mut(&mut self) -> Option<&mut W> { + if let ExtraWrite::V1(w) = self { + Some(w) + } else { + None + } + } + + #[inline] + pub fn set_v2(&mut self, extra_states: ExtraStates) { + if let ExtraWrite::V1(_) = self { + unreachable!("v1 and v2 are mixed used"); + } else { + *self = ExtraWrite::V2(extra_states); + } + } + + #[inline] + pub fn v2_mut(&mut self) -> Option<&mut ExtraStates> { + if let ExtraWrite::V2(m) = self { + Some(m) + } else { + None + } + } +} + /// WriteTask contains write tasks which need to be persisted to kv db and raft /// db. pub struct WriteTask @@ -92,11 +164,11 @@ where peer_id: u64, ready_number: u64, pub send_time: Instant, - pub kv_wb: Option, pub raft_wb: Option, pub entries: Vec, pub cut_logs: Option<(u64, u64)>, pub raft_state: Option, + pub extra_write: ExtraWrite, pub messages: Vec, pub trackers: Vec, } @@ -112,11 +184,11 @@ where peer_id, ready_number, send_time: Instant::now(), - kv_wb: None, raft_wb: None, entries: vec![], cut_logs: None, raft_state: None, + extra_write: ExtraWrite::None, messages: vec![], trackers: vec![], } @@ -126,10 +198,15 @@ where !(self.raft_state.is_none() && self.entries.is_empty() && self.cut_logs.is_none() - && self.kv_wb.as_ref().map_or(true, |wb| wb.is_empty()) + && self.extra_write.is_empty() && self.raft_wb.as_ref().map_or(true, |wb| wb.is_empty())) } + #[inline] + pub fn ready_number(&self) -> u64 { + self.ready_number + } + /// Sanity check for robustness. pub fn valid(&self) -> Result<()> { if self.region_id == 0 || self.peer_id == 0 || self.ready_number == 0 { @@ -189,16 +266,96 @@ where } } +/// These states are set only in raftstore V2. +#[derive(Default)] +pub struct ExtraStates { + apply_state: RaftApplyState, + region_state: Option, +} + +impl ExtraStates { + #[inline] + pub fn new(apply_state: RaftApplyState) -> Self { + Self { + apply_state, + region_state: None, + } + } + + #[inline] + pub fn set_region_state(&mut self, region_state: RegionLocalState) { + self.region_state = Some(region_state); + } +} + +pub enum ExtraBatchWrite { + None, + V1(W), + V2(HashMap), +} + +impl ExtraBatchWrite { + #[inline] + fn clear(&mut self) { + match self { + ExtraBatchWrite::None => {} + ExtraBatchWrite::V1(w) => w.clear(), + ExtraBatchWrite::V2(m) => m.clear(), + } + } + + /// Merge the extra_write with this batch. + /// + /// If there is any new states inserted, return the size of the state. + fn merge(&mut self, region_id: u64, extra_write: &mut ExtraWrite) -> usize { + let mut inserted = false; + match mem::replace(extra_write, ExtraWrite::None) { + ExtraWrite::None => (), + ExtraWrite::V1(wb) => match self { + ExtraBatchWrite::None => *self = ExtraBatchWrite::V1(wb), + ExtraBatchWrite::V1(kv_wb) => kv_wb.merge(wb).unwrap(), + ExtraBatchWrite::V2(_) => unreachable!("v2 and v1 are mixed used"), + }, + ExtraWrite::V2(extra_states) => match self { + ExtraBatchWrite::None => { + let mut map = HashMap::default(); + map.insert(region_id, extra_states); + *self = ExtraBatchWrite::V2(map); + inserted = true; + } + ExtraBatchWrite::V1(_) => unreachable!("v2 and v1 are mixed used"), + ExtraBatchWrite::V2(extra_states_map) => match extra_states_map.entry(region_id) { + collections::HashMapEntry::Occupied(mut slot) => { + slot.get_mut().apply_state = extra_states.apply_state; + if let Some(region_state) = extra_states.region_state { + slot.get_mut().region_state = Some(region_state); + } + } + collections::HashMapEntry::Vacant(slot) => { + slot.insert(extra_states); + inserted = true; + } + }, + }, + }; + if inserted { + std::mem::size_of::() + } else { + 0 + } + } +} + /// WriteTaskBatch is used for combining several WriteTask into one. struct WriteTaskBatch where EK: KvEngine, ER: RaftEngine, { - pub kv_wb: EK::WriteBatch, pub raft_wb: ER::LogBatch, // Write raft state once for a region everytime writing to disk pub raft_states: HashMap, + pub extra_batch_write: ExtraBatchWrite, pub state_size: usize, pub tasks: Vec>, // region_id -> (peer_id, ready_number) @@ -210,11 +367,11 @@ where EK: KvEngine, ER: RaftEngine, { - fn new(kv_wb: EK::WriteBatch, raft_wb: ER::LogBatch) -> Self { + fn new(raft_wb: ER::LogBatch) -> Self { Self { - kv_wb, raft_wb, raft_states: HashMap::default(), + extra_batch_write: ExtraBatchWrite::None, state_size: 0, tasks: vec![], readies: HashMap::default(), @@ -226,9 +383,6 @@ where if let Err(e) = task.valid() { panic!("task is not valid: {:?}", e); } - if let Some(kv_wb) = task.kv_wb.take() { - self.kv_wb.merge(kv_wb).unwrap(); - } if let Some(raft_wb) = task.raft_wb.take() { self.raft_wb.merge(raft_wb).unwrap(); } @@ -249,6 +403,10 @@ where } } + self.state_size += self + .extra_batch_write + .merge(task.region_id, &mut task.extra_write); + if let Some(prev_readies) = self .readies .insert(task.region_id, (task.peer_id, task.ready_number)) @@ -276,8 +434,8 @@ where fn clear(&mut self) { // raft_wb doesn't have clear interface and it should be consumed by raft db // before - self.kv_wb.clear(); self.raft_states.clear(); + self.extra_batch_write.clear(); self.state_size = 0; self.tasks.clear(); self.readies.clear(); @@ -298,6 +456,18 @@ where for (region_id, state) in self.raft_states.drain() { self.raft_wb.put_raft_state(region_id, &state).unwrap(); } + if let ExtraBatchWrite::V2(extra_states_map) = &mut self.extra_batch_write { + for (region_id, state) in extra_states_map.drain() { + self.raft_wb + .put_apply_state(region_id, &state.apply_state) + .unwrap(); + if let Some(region_state) = state.region_state { + self.raft_wb + .put_region_state(region_id, ®ion_state) + .unwrap(); + } + } + } self.state_size = 0; if metrics.waterfall_metrics { let now = std::time::Instant::now(); @@ -342,11 +512,12 @@ pub struct Worker where EK: KvEngine, ER: RaftEngine, - N: Notifier, + N: PersistedNotifier, { store_id: u64, tag: String, - engines: Engines, + raft_engine: ER, + kv_engine: Option, receiver: Receiver>, notifier: N, trans: T, @@ -363,30 +534,28 @@ impl Worker where EK: KvEngine, ER: RaftEngine, - N: Notifier, + N: PersistedNotifier, T: Transport, { pub fn new( store_id: u64, tag: String, - engines: Engines, + raft_engine: ER, + kv_engine: Option, receiver: Receiver>, notifier: N, trans: T, cfg: &Arc>, ) -> Self { - let batch = WriteTaskBatch::new( - engines.kv.write_batch_with_cap(KV_WB_DEFAULT_SIZE), - engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE), - ); - let perf_context = engines - .raft - .get_perf_context(cfg.value().perf_level, PerfContextKind::RaftstoreStore); + let batch = WriteTaskBatch::new(raft_engine.log_batch(RAFT_WB_DEFAULT_SIZE)); + let perf_context = + raft_engine.get_perf_context(cfg.value().perf_level, PerfContextKind::RaftstoreStore); let cfg_tracker = cfg.clone().tracker(tag.clone()); Self { store_id, tag, - engines, + raft_engine, + kv_engine, receiver, notifier, trans, @@ -455,7 +624,7 @@ where "region_id" => task.region_id, "peer_id" => task.peer_id, "ready_number" => task.ready_number, - "kv_wb_size" => task.kv_wb.as_ref().map_or(0, |wb| wb.data_size()), + "extra_write_size" => task.extra_write.data_size(), "raft_wb_size" => task.raft_wb.as_ref().map_or(0, |wb| wb.persist_size()), "entry_count" => task.entries.len(), ); @@ -491,29 +660,37 @@ where fail_point!("raft_before_save"); let mut write_kv_time = 0f64; - if !self.batch.kv_wb.is_empty() { - let raft_before_save_kv_on_store_3 = || { - fail_point!("raft_before_save_kv_on_store_3", self.store_id == 3, |_| {}); - }; - raft_before_save_kv_on_store_3(); - let now = Instant::now(); - let mut write_opts = WriteOptions::new(); - write_opts.set_sync(true); - // TODO: Add perf context - self.batch.kv_wb.write_opt(&write_opts).unwrap_or_else(|e| { - panic!( - "store {}: {} failed to write to kv engine: {:?}", - self.store_id, self.tag, e - ); - }); - if self.batch.kv_wb.data_size() > KV_WB_SHRINK_SIZE { - self.batch.kv_wb = self.engines.kv.write_batch_with_cap(KV_WB_DEFAULT_SIZE); + if let ExtraBatchWrite::V1(kv_wb) = &mut self.batch.extra_batch_write { + if !kv_wb.is_empty() { + let store_id = self.store_id; + let raft_before_save_kv_on_store_3 = || { + fail_point!("raft_before_save_kv_on_store_3", store_id == 3, |_| {}); + }; + raft_before_save_kv_on_store_3(); + let now = Instant::now(); + let mut write_opts = WriteOptions::new(); + write_opts.set_sync(true); + // TODO: Add perf context + let tag = &self.tag; + kv_wb.write_opt(&write_opts).unwrap_or_else(|e| { + panic!( + "store {}: {} failed to write to kv engine: {:?}", + store_id, tag, e + ); + }); + if kv_wb.data_size() > KV_WB_SHRINK_SIZE { + *kv_wb = self + .kv_engine + .as_ref() + .unwrap() + .write_batch_with_cap(KV_WB_DEFAULT_SIZE); + } + write_kv_time = duration_to_sec(now.saturating_elapsed()); + STORE_WRITE_KVDB_DURATION_HISTOGRAM.observe(write_kv_time); } - write_kv_time = duration_to_sec(now.saturating_elapsed()); - STORE_WRITE_KVDB_DURATION_HISTOGRAM.observe(write_kv_time); - } - self.batch.after_write_to_kv_db(&self.metrics); + self.batch.after_write_to_kv_db(&self.metrics); + } fail_point!("raft_between_save"); @@ -523,8 +700,7 @@ where let now = Instant::now(); self.perf_context.start_observe(); - self.engines - .raft + self.raft_engine .consume_and_shrink( &mut self.batch.raft_wb, true, @@ -606,8 +782,7 @@ where let mut callback_time = 0f64; if notify { for (region_id, (peer_id, ready_number)) in &self.batch.readies { - self.notifier - .notify_persisted(*region_id, *peer_id, *ready_number); + self.notifier.notify(*region_id, *peer_id, *ready_number); } now = Instant::now(); callback_time = duration_to_sec(now.saturating_duration_since(now2)); @@ -665,26 +840,29 @@ where handlers: Vec>, } -impl StoreWriters -where - EK: KvEngine, - ER: RaftEngine, -{ - pub fn new() -> Self { +impl Default for StoreWriters { + fn default() -> Self { Self { writers: vec![], handlers: vec![], } } +} +impl StoreWriters +where + EK: KvEngine, + ER: RaftEngine, +{ pub fn senders(&self) -> &Vec>> { &self.writers } - pub fn spawn( + pub fn spawn( &mut self, store_id: u64, - engines: &Engines, + raft_engine: ER, + kv_engine: Option, notifier: &N, trans: &T, cfg: &Arc>, @@ -696,7 +874,8 @@ where let mut worker = Worker::new( store_id, tag.clone(), - engines.clone(), + raft_engine.clone(), + kv_engine.clone(), rx, notifier.clone(), trans.clone(), @@ -726,23 +905,24 @@ where /// Used for test to write task to kv db and raft db. #[cfg(test)] -pub fn write_to_db_for_test(engines: &Engines, task: WriteTask) -where +pub fn write_to_db_for_test( + engines: &engine_traits::Engines, + task: WriteTask, +) where EK: KvEngine, ER: RaftEngine, { - let mut batch = WriteTaskBatch::new( - engines.kv.write_batch(), - engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE), - ); + let mut batch = WriteTaskBatch::new(engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE)); batch.add_write_task(task); batch.before_write_to_db(&StoreWriteMetrics::new(false)); - if !batch.kv_wb.is_empty() { - let mut write_opts = WriteOptions::new(); - write_opts.set_sync(true); - batch.kv_wb.write_opt(&write_opts).unwrap_or_else(|e| { - panic!("test failed to write to kv engine: {:?}", e); - }); + if let ExtraBatchWrite::V1(kv_wb) = &mut batch.extra_batch_write { + if !kv_wb.is_empty() { + let mut write_opts = WriteOptions::new(); + write_opts.set_sync(true); + kv_wb.write_opt(&write_opts).unwrap_or_else(|e| { + panic!("test failed to write to kv engine: {:?}", e); + }); + } } if !batch.raft_wb.is_empty() { engines diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 04ece802a45..aaaed69c555 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -5,7 +5,7 @@ use std::time::Duration; use collections::HashSet; use crossbeam::channel::unbounded; use engine_test::{kv::KvTestEngine, new_temp_engine, raft::RaftTestEngine}; -use engine_traits::{Mutable, Peekable, RaftEngineReadOnly, WriteBatchExt}; +use engine_traits::{Engines, Mutable, Peekable, RaftEngineReadOnly, WriteBatchExt}; use kvproto::raft_serverpb::RaftMessage; use tempfile::Builder; @@ -15,6 +15,9 @@ use crate::{ Result, }; +type TestKvWriteBatch = ::WriteBatch; +type TestRaftLogBatch = ::LogBatch; + fn must_have_entries_and_state( raft_engine: &RaftTestEngine, entries_state: Vec<(u64, Vec, RaftLocalState)>, @@ -56,8 +59,8 @@ struct TestNotifier { tx: Sender<(u64, (u64, u64))>, } -impl Notifier for TestNotifier { - fn notify_persisted(&self, region_id: u64, peer_id: u64, ready_number: u64) { +impl PersistedNotifier for TestNotifier { + fn notify(&self, region_id: u64, peer_id: u64, ready_number: u64) { self.tx.send((region_id, (peer_id, ready_number))).unwrap() } } @@ -146,42 +149,30 @@ fn init_write_batch( engines: &Engines, task: &mut WriteTask, ) { - task.kv_wb = Some(engines.kv.write_batch()); + task.extra_write.ensure_v1(|| engines.kv.write_batch()); task.raft_wb = Some(engines.raft.log_batch(0)); } /// Help function for less code /// Option must not be none -fn put_kv(wb: &mut Option<::WriteBatch>, key: &[u8], value: &[u8]) { - wb.as_mut().unwrap().put(key, value).unwrap(); +fn put_kv(wb: Option<&mut TestKvWriteBatch>, key: &[u8], value: &[u8]) { + wb.unwrap().put(key, value).unwrap(); } /// Help function for less code /// Option must not be none -fn delete_kv(wb: &mut Option<::WriteBatch>, key: &[u8]) { - wb.as_mut().unwrap().delete(key).unwrap(); +fn delete_kv(wb: Option<&mut TestKvWriteBatch>, key: &[u8]) { + wb.unwrap().delete(key).unwrap(); } /// Simulate kv puts on raft engine. -fn put_raft_kv(wb: &mut Option<::LogBatch>, key: u64) { - wb.as_mut() - .unwrap() - .append(key, vec![new_entry(key, key)]) - .unwrap(); +fn put_raft_kv(wb: Option<&mut TestRaftLogBatch>, key: u64) { + wb.unwrap().append(key, vec![new_entry(key, key)]).unwrap(); } -fn delete_raft_kv( - engine: &RaftTestEngine, - wb: &mut Option<::LogBatch>, - key: u64, -) { +fn delete_raft_kv(engine: &RaftTestEngine, wb: Option<&mut TestRaftLogBatch>, key: u64) { engine - .clean( - key, - key, - &new_raft_state(key, key, key, key), - wb.as_mut().unwrap(), - ) + .clean(key, key, &new_raft_state(key, key, key, key), wb.unwrap()) .unwrap(); } @@ -212,7 +203,8 @@ impl TestWorker { worker: Worker::new( 1, "writer".to_string(), - engines.clone(), + engines.raft.clone(), + Some(engines.kv.clone()), task_rx, notifier, trans, @@ -236,11 +228,12 @@ impl TestWriters { let trans = TestTransport { tx: msg_tx }; let (notify_tx, notify_rx) = unbounded(); let notifier = TestNotifier { tx: notify_tx }; - let mut writers = StoreWriters::new(); + let mut writers = StoreWriters::default(); writers .spawn( 1, - engines, + engines.raft.clone(), + Some(engines.kv.clone()), ¬ifier, &trans, &Arc::new(VersionTrack::new(cfg.clone())), @@ -269,8 +262,8 @@ fn test_worker() { let mut task_1 = WriteTask::::new(region_1, 1, 10); init_write_batch(&engines, &mut task_1); - put_kv(&mut task_1.kv_wb, b"kv_k1", b"kv_v1"); - put_raft_kv(&mut task_1.raft_wb, 17); + put_kv(task_1.extra_write.v1_mut(), b"kv_k1", b"kv_v1"); + put_raft_kv(task_1.raft_wb.as_mut(), 17); task_1.entries.append(&mut vec![ new_entry(5, 5), new_entry(6, 5), @@ -284,8 +277,8 @@ fn test_worker() { let mut task_2 = WriteTask::::new(region_2, 2, 15); init_write_batch(&engines, &mut task_2); - put_kv(&mut task_2.kv_wb, b"kv_k2", b"kv_v2"); - put_raft_kv(&mut task_2.raft_wb, 27); + put_kv(task_2.extra_write.v1_mut(), b"kv_k2", b"kv_v2"); + put_raft_kv(task_2.raft_wb.as_mut(), 27); task_2 .entries .append(&mut vec![new_entry(20, 15), new_entry(21, 15)]); @@ -298,9 +291,9 @@ fn test_worker() { let mut task_3 = WriteTask::::new(region_1, 1, 11); init_write_batch(&engines, &mut task_3); - put_kv(&mut task_3.kv_wb, b"kv_k3", b"kv_v3"); - put_raft_kv(&mut task_3.raft_wb, 37); - delete_raft_kv(&engines.raft, &mut task_3.raft_wb, 17); + put_kv(task_3.extra_write.v1_mut(), b"kv_k3", b"kv_v3"); + put_raft_kv(task_3.raft_wb.as_mut(), 37); + delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); task_3 .entries .append(&mut vec![new_entry(6, 6), new_entry(7, 7)]); @@ -357,8 +350,8 @@ fn test_basic_flow() { let mut task_1 = WriteTask::::new(region_1, 1, 10); init_write_batch(&engines, &mut task_1); - put_kv(&mut task_1.kv_wb, b"kv_k1", b"kv_v1"); - put_raft_kv(&mut task_1.raft_wb, 17); + put_kv(task_1.extra_write.v1_mut(), b"kv_k1", b"kv_v1"); + put_raft_kv(task_1.raft_wb.as_mut(), 17); task_1 .entries .append(&mut vec![new_entry(5, 5), new_entry(6, 5), new_entry(7, 5)]); @@ -371,8 +364,8 @@ fn test_basic_flow() { let mut task_2 = WriteTask::::new(2, 2, 20); init_write_batch(&engines, &mut task_2); - put_kv(&mut task_2.kv_wb, b"kv_k2", b"kv_v2"); - put_raft_kv(&mut task_2.raft_wb, 27); + put_kv(task_2.extra_write.v1_mut(), b"kv_k2", b"kv_v2"); + put_raft_kv(task_2.raft_wb.as_mut(), 27); task_2 .entries .append(&mut vec![new_entry(50, 12), new_entry(51, 13)]); @@ -385,10 +378,10 @@ fn test_basic_flow() { let mut task_3 = WriteTask::::new(region_1, 1, 15); init_write_batch(&engines, &mut task_3); - put_kv(&mut task_3.kv_wb, b"kv_k3", b"kv_v3"); - delete_kv(&mut task_3.kv_wb, b"kv_k1"); - put_raft_kv(&mut task_3.raft_wb, 37); - delete_raft_kv(&engines.raft, &mut task_3.raft_wb, 17); + put_kv(task_3.extra_write.v1_mut(), b"kv_k3", b"kv_v3"); + delete_kv(task_3.extra_write.v1_mut(), b"kv_k1"); + put_raft_kv(task_3.raft_wb.as_mut(), 37); + delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); task_3.entries.append(&mut vec![new_entry(6, 6)]); task_3.cut_logs = Some((7, 8)); task_3.raft_state = Some(new_raft_state(6, 345, 6, 6)); @@ -429,3 +422,114 @@ fn test_basic_flow() { t.writers.shutdown(); } + +#[test] +fn test_basic_flow_with_states() { + let region_1 = 1; + let region_2 = 2; + + let path = Builder::new() + .prefix("async-io-basic-states") + .tempdir() + .unwrap(); + let engines = new_temp_engine(&path); + let mut cfg = Config::default(); + cfg.store_io_pool_size = 2; + let mut t = TestWriters::new(&cfg, &engines); + + let mut task_1 = WriteTask::::new(region_1, 1, 10); + task_1.raft_wb = Some(engines.raft.log_batch(0)); + let mut apply_state_1 = RaftApplyState::default(); + apply_state_1.applied_index = 2; + let mut extra_state = ExtraStates::new(apply_state_1); + let mut region_state_1 = RegionLocalState::default(); + region_state_1 + .mut_region() + .mut_region_epoch() + .set_version(3); + extra_state.region_state = Some(region_state_1.clone()); + task_1.extra_write.set_v2(extra_state); + put_raft_kv(task_1.raft_wb.as_mut(), 17); + task_1 + .entries + .append(&mut vec![new_entry(5, 5), new_entry(6, 5), new_entry(7, 5)]); + task_1.raft_state = Some(new_raft_state(5, 234, 6, 7)); + task_1 + .messages + .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); + + t.write_sender(0).send(WriteMsg::WriteTask(task_1)).unwrap(); + + let mut task_2 = WriteTask::::new(2, 2, 20); + task_2.raft_wb = Some(engines.raft.log_batch(0)); + let mut apply_state_2 = RaftApplyState::default(); + apply_state_2.applied_index = 30; + let extra_state = ExtraStates::new(apply_state_2.clone()); + task_2.extra_write.set_v2(extra_state); + put_raft_kv(task_2.raft_wb.as_mut(), 27); + task_2 + .entries + .append(&mut vec![new_entry(50, 12), new_entry(51, 13)]); + task_2.raft_state = Some(new_raft_state(13, 567, 49, 51)); + task_2 + .messages + .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); + + t.write_sender(1).send(WriteMsg::WriteTask(task_2)).unwrap(); + + let mut task_3 = WriteTask::::new(region_1, 1, 15); + task_3.raft_wb = Some(engines.raft.log_batch(0)); + let mut apply_state_3 = RaftApplyState::default(); + apply_state_3.applied_index = 5; + let extra_state = ExtraStates::new(apply_state_3.clone()); + task_3.extra_write.set_v2(extra_state); + put_raft_kv(task_3.raft_wb.as_mut(), 37); + delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); + task_3.entries.append(&mut vec![new_entry(6, 6)]); + task_3.cut_logs = Some((7, 8)); + task_3.raft_state = Some(new_raft_state(6, 345, 6, 6)); + task_3 + .messages + .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); + + t.write_sender(0).send(WriteMsg::WriteTask(task_3)).unwrap(); + + must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); + + assert_eq!(test_raft_kv(&engines.raft, 17), false); + assert_eq!(test_raft_kv(&engines.raft, 27), true); + assert_eq!(test_raft_kv(&engines.raft, 37), true); + + must_have_entries_and_state( + &engines.raft, + vec![ + ( + region_1, + vec![new_entry(5, 5), new_entry(6, 6)], + new_raft_state(6, 345, 6, 6), + ), + ( + region_2, + vec![new_entry(50, 12), new_entry(51, 13)], + new_raft_state(13, 567, 49, 51), + ), + ], + ); + assert_eq!( + engines.raft.get_apply_state(region_1).unwrap().unwrap(), + apply_state_3 + ); + assert_eq!( + engines.raft.get_apply_state(region_2).unwrap().unwrap(), + apply_state_2 + ); + assert_eq!( + engines.raft.get_region_state(region_1).unwrap().unwrap(), + region_state_1 + ); + assert_eq!(engines.raft.get_region_state(region_2).unwrap(), None); + + must_have_same_count_msg(6, &t.msg_rx); + + t.writers.shutdown(); +} diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index c73e12013fe..33b504127f8 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -16,14 +16,20 @@ use std::{ use collections::HashMap; use engine_traits::{KvEngine, RaftEngine, RAFT_LOG_MULTI_GET_CNT}; use fail::fail_point; -use kvproto::raft_serverpb::{RaftApplyState, RaftLocalState}; +use kvproto::{ + metapb, + raft_serverpb::{RaftApplyState, RaftLocalState}, +}; use protobuf::Message; use raft::{prelude::*, util::limit_size, GetEntriesContext, StorageError}; use tikv_alloc::TraceEvent; -use tikv_util::{debug, info, time::Instant, warn, worker::Scheduler}; +use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; -use super::{metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE}; -use crate::{bytes_capacity, store::worker::RaftlogFetchTask}; +use super::{ + metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE, RAFT_INIT_LOG_INDEX, + RAFT_INIT_LOG_TERM, +}; +use crate::{bytes_capacity, store::worker::RaftlogFetchTask, Result}; const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; const SHRINK_CACHE_CAPACITY: usize = 64; @@ -415,6 +421,115 @@ impl AsyncFetchStats { } } +fn validate_states( + region_id: u64, + raft_engine: &ER, + raft_state: &mut RaftLocalState, + apply_state: &RaftApplyState, +) -> Result<()> { + let last_index = raft_state.get_last_index(); + let mut commit_index = raft_state.get_hard_state().get_commit(); + let recorded_commit_index = apply_state.get_commit_index(); + let state_str = || -> String { + format!( + "region {}, raft state {:?}, apply state {:?}", + region_id, raft_state, apply_state + ) + }; + // The commit index of raft state may be less than the recorded commit index. + // If so, forward the commit index. + if commit_index < recorded_commit_index { + let entry = raft_engine.get_entry(region_id, recorded_commit_index)?; + if entry.map_or(true, |e| e.get_term() != apply_state.get_commit_term()) { + return Err(box_err!( + "log at recorded commit index [{}] {} doesn't exist, may lose data, {}", + apply_state.get_commit_term(), + recorded_commit_index, + state_str() + )); + } + info!("updating commit index"; "region_id" => region_id, "old" => commit_index, "new" => recorded_commit_index); + commit_index = recorded_commit_index; + } + // Invariant: applied index <= max(commit index, recorded commit index) + if apply_state.get_applied_index() > commit_index { + return Err(box_err!( + "applied index > max(commit index, recorded commit index), {}", + state_str() + )); + } + // Invariant: max(commit index, recorded commit index) <= last index + if commit_index > last_index { + return Err(box_err!( + "max(commit index, recorded commit index) > last index, {}", + state_str() + )); + } + // Since the entries must be persisted before applying, the term of raft state + // should also be persisted. So it should be greater than the commit term of + // apply state. + if raft_state.get_hard_state().get_term() < apply_state.get_commit_term() { + return Err(box_err!( + "term of raft state < commit term of apply state, {}", + state_str() + )); + } + + raft_state.mut_hard_state().set_commit(commit_index); + + Ok(()) +} + +pub fn init_last_term( + raft_engine: &ER, + region: &metapb::Region, + raft_state: &RaftLocalState, + apply_state: &RaftApplyState, +) -> Result { + let last_idx = raft_state.get_last_index(); + if last_idx == 0 { + return Ok(0); + } else if last_idx == RAFT_INIT_LOG_INDEX { + return Ok(RAFT_INIT_LOG_TERM); + } else if last_idx == apply_state.get_truncated_state().get_index() { + return Ok(apply_state.get_truncated_state().get_term()); + } else { + assert!(last_idx > RAFT_INIT_LOG_INDEX); + } + let entry = raft_engine.get_entry(region.get_id(), last_idx)?; + match entry { + None => Err(box_err!( + "[region {}] entry at {} doesn't exist, may lose data.", + region.get_id(), + last_idx + )), + Some(e) => Ok(e.get_term()), + } +} + +pub fn init_applied_term( + raft_engine: &ER, + region: &metapb::Region, + apply_state: &RaftApplyState, +) -> Result { + if apply_state.applied_index == RAFT_INIT_LOG_INDEX { + return Ok(RAFT_INIT_LOG_TERM); + } + let truncated_state = apply_state.get_truncated_state(); + if apply_state.applied_index == truncated_state.get_index() { + return Ok(truncated_state.get_term()); + } + + match raft_engine.get_entry(region.get_id(), apply_state.applied_index)? { + Some(e) => Ok(e.term), + None => Err(box_err!( + "[region {}] entry at apply index {} doesn't exist, may lose data.", + region.get_id(), + apply_state.applied_index + )), + } +} + /// A subset of `PeerStorage` that focus on accessing log entries. pub struct EntryStorage { region_id: u64, @@ -432,17 +547,25 @@ pub struct EntryStorage { impl EntryStorage { pub fn new( - region_id: u64, peer_id: u64, raft_engine: ER, - raft_state: RaftLocalState, + mut raft_state: RaftLocalState, apply_state: RaftApplyState, - last_term: u64, - applied_term: u64, + region: &metapb::Region, raftlog_fetch_scheduler: Scheduler, - ) -> Self { - EntryStorage { - region_id, + ) -> Result { + if let Err(e) = validate_states(region.id, &raft_engine, &mut raft_state, &apply_state) { + return Err(box_err!( + "[region {}] {} validate state fail: {:?}", + region.id, + peer_id, + e + )); + } + let last_term = init_last_term(&raft_engine, region, &raft_state, &apply_state)?; + let applied_term = init_applied_term(&raft_engine, region, &apply_state)?; + Ok(Self { + region_id: region.id, peer_id, raft_engine, cache: EntryCache::default(), @@ -453,7 +576,7 @@ impl EntryStorage { raftlog_fetch_scheduler, raftlog_fetch_stats: AsyncFetchStats::default(), async_fetch_results: RefCell::new(HashMap::default()), - } + }) } fn check_range(&self, low: u64, high: u64) -> raft::Result<()> { diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 5235f90e156..d4bb0a32266 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1261,7 +1261,8 @@ where Some(WriteWorker::new( self.store.get_id(), "sync-writer".to_string(), - self.engines.clone(), + self.engines.raft.clone(), + Some(self.engines.kv.clone()), rx, self.router.clone(), self.trans.clone(), @@ -1526,8 +1527,14 @@ impl RaftBatchSystem { .background_worker .start("consistency-check", consistency_check_runner); - self.store_writers - .spawn(meta.get_id(), &engines, &self.router, &trans, &cfg)?; + self.store_writers.spawn( + meta.get_id(), + engines.raft.clone(), + Some(engines.kv.clone()), + &self.router, + &trans, + &cfg, + )?; let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); let mut builder = RaftPollerBuilder { @@ -1717,7 +1724,7 @@ pub fn create_raft_batch_system( apply_router, apply_system, router: raft_router.clone(), - store_writers: StoreWriters::new(), + store_writers: StoreWriters::default(), }; (raft_router, system) } diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index bd9564b1a63..d75fef94323 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -15,7 +15,7 @@ mod async_io; mod bootstrap; mod compaction_guard; mod hibernate_state; -mod local_metrics; +pub mod local_metrics; mod peer; mod peer_storage; mod read_queue; @@ -29,8 +29,8 @@ mod worker; pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ - write::{Worker as WriteWorker, WriteMsg, WriteTask}, - write_router::WriteRouter, + write::{PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask}, + write_router::{WriteRouter, WriteRouterContext}, }, bootstrap::{ bootstrap_store, clear_prepare_bootstrap_cluster, clear_prepare_bootstrap_key, @@ -68,8 +68,9 @@ pub use self::{ util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ AutoSplitController, Bucket, BucketRange, CheckLeaderRunner, CheckLeaderTask, - FlowStatistics, FlowStatsReporter, KeyEntry, LocalReader, PdTask, QueryStats, ReadDelegate, - ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, - SplitConfigManager, TrackVer, WriteStats, + FlowStatistics, FlowStatsReporter, KeyEntry, LocalReader, PdTask, QueryStats, + RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, ReadStats, RefreshConfigTask, + RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, TrackVer, + WriteStats, }, }; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index aec48c1756f..83363d65ac8 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -164,32 +164,6 @@ pub fn recover_from_applying_state( Ok(()) } -fn init_applied_term( - engines: &Engines, - region: &Region, - apply_state: &RaftApplyState, -) -> Result { - if apply_state.applied_index == RAFT_INIT_LOG_INDEX { - return Ok(RAFT_INIT_LOG_TERM); - } - let truncated_state = apply_state.get_truncated_state(); - if apply_state.applied_index == truncated_state.get_index() { - return Ok(truncated_state.get_term()); - } - - match engines - .raft - .get_entry(region.get_id(), apply_state.applied_index)? - { - Some(e) => Ok(e.term), - None => Err(box_err!( - "[region {}] entry at apply index {} doesn't exist, may lose data.", - region.get_id(), - apply_state.applied_index - )), - } -} - fn init_raft_state( engines: &Engines, region: &Region, @@ -233,92 +207,6 @@ fn init_apply_state( ) } -fn init_last_term( - engines: &Engines, - region: &Region, - raft_state: &RaftLocalState, - apply_state: &RaftApplyState, -) -> Result { - let last_idx = raft_state.get_last_index(); - if last_idx == 0 { - return Ok(0); - } else if last_idx == RAFT_INIT_LOG_INDEX { - return Ok(RAFT_INIT_LOG_TERM); - } else if last_idx == apply_state.get_truncated_state().get_index() { - return Ok(apply_state.get_truncated_state().get_term()); - } else { - assert!(last_idx > RAFT_INIT_LOG_INDEX); - } - let entry = engines.raft.get_entry(region.get_id(), last_idx)?; - match entry { - None => Err(box_err!( - "[region {}] entry at {} doesn't exist, may lose data.", - region.get_id(), - last_idx - )), - Some(e) => Ok(e.get_term()), - } -} - -fn validate_states( - region_id: u64, - engines: &Engines, - raft_state: &mut RaftLocalState, - apply_state: &RaftApplyState, -) -> Result<()> { - let last_index = raft_state.get_last_index(); - let mut commit_index = raft_state.get_hard_state().get_commit(); - let recorded_commit_index = apply_state.get_commit_index(); - let state_str = || -> String { - format!( - "region {}, raft state {:?}, apply state {:?}", - region_id, raft_state, apply_state - ) - }; - // The commit index of raft state may be less than the recorded commit index. - // If so, forward the commit index. - if commit_index < recorded_commit_index { - let entry = engines.raft.get_entry(region_id, recorded_commit_index)?; - if entry.map_or(true, |e| e.get_term() != apply_state.get_commit_term()) { - return Err(box_err!( - "log at recorded commit index [{}] {} doesn't exist, may lose data, {}", - apply_state.get_commit_term(), - recorded_commit_index, - state_str() - )); - } - info!("updating commit index"; "region_id" => region_id, "old" => commit_index, "new" => recorded_commit_index); - commit_index = recorded_commit_index; - } - // Invariant: applied index <= max(commit index, recorded commit index) - if apply_state.get_applied_index() > commit_index { - return Err(box_err!( - "applied index > max(commit index, recorded commit index), {}", - state_str() - )); - } - // Invariant: max(commit index, recorded commit index) <= last index - if commit_index > last_index { - return Err(box_err!( - "max(commit index, recorded commit index) > last index, {}", - state_str() - )); - } - // Since the entries must be persisted before applying, the term of raft state - // should also be persisted. So it should be greater than the commit term of - // apply state. - if raft_state.get_hard_state().get_term() < apply_state.get_commit_term() { - return Err(box_err!( - "term of raft state < commit term of apply state, {}", - state_str() - )); - } - - raft_state.mut_hard_state().set_commit(commit_index); - - Ok(()) -} - pub struct PeerStorage where EK: KvEngine, @@ -411,23 +299,17 @@ where "peer_id" => peer_id, "path" => ?engines.kv.path(), ); - let mut raft_state = init_raft_state(&engines, region)?; + let raft_state = init_raft_state(&engines, region)?; let apply_state = init_apply_state(&engines, region)?; - if let Err(e) = validate_states(region.get_id(), &engines, &mut raft_state, &apply_state) { - return Err(box_err!("{} validate state fail: {:?}", tag, e)); - } - let last_term = init_last_term(&engines, region, &raft_state, &apply_state)?; - let applied_term = init_applied_term(&engines, region, &apply_state)?; + let entry_storage = EntryStorage::new( - region.id, peer_id, engines.raft.clone(), raft_state, apply_state, - last_term, - applied_term, + region, raftlog_fetch_scheduler, - ); + )?; Ok(PeerStorage { engines, @@ -694,11 +576,8 @@ where if task.raft_wb.is_none() { task.raft_wb = Some(self.engines.raft.log_batch(64)); } - if task.kv_wb.is_none() { - task.kv_wb = Some(self.engines.kv.write_batch()); - } let raft_wb = task.raft_wb.as_mut().unwrap(); - let kv_wb = task.kv_wb.as_mut().unwrap(); + let kv_wb = task.extra_write.ensure_v1(|| self.engines.kv.write_batch()); if self.is_initialized() { // we can only delete the old data when the peer is initialized. @@ -1017,9 +896,9 @@ where // in case of recv raft log after snapshot. self.save_snapshot_raft_state_to( ready.snapshot().get_metadata().get_index(), - write_task.kv_wb.as_mut().unwrap(), + write_task.extra_write.v1_mut().unwrap(), )?; - self.save_apply_state_to(write_task.kv_wb.as_mut().unwrap())?; + self.save_apply_state_to(write_task.extra_write.v1_mut().unwrap())?; } if !write_task.has_data() { @@ -1325,7 +1204,8 @@ pub mod tests { ents: &[Entry], ) -> PeerStorage { let mut store = new_storage(region_scheduler, raftlog_fetch_scheduler, path); - let mut write_task = WriteTask::new(store.get_region_id(), store.peer_id, 1); + let mut write_task: WriteTask = + WriteTask::new(store.get_region_id(), store.peer_id, 1); store.append(ents[1..].to_vec(), &mut write_task); store.update_cache_persisted(ents.last().unwrap().get_index()); store @@ -1339,12 +1219,10 @@ pub mod tests { store .apply_state_mut() .set_applied_index(ents.last().unwrap().get_index()); - if write_task.kv_wb.is_none() { - write_task.kv_wb = Some(store.engines.kv.write_batch()); - } - store - .save_apply_state_to(write_task.kv_wb.as_mut().unwrap()) - .unwrap(); + let kv_wb = write_task + .extra_write + .ensure_v1(|| store.engines.kv.write_batch()); + store.save_apply_state_to(kv_wb).unwrap(); write_task.raft_state = Some(store.raft_state().clone()); write_to_db_for_test(&store.engines, write_task); store @@ -1779,11 +1657,10 @@ pub mod tests { s.raft_state_mut().set_last_index(7); s.apply_state_mut().set_applied_index(7); write_task.raft_state = Some(s.raft_state().clone()); - if write_task.kv_wb.is_none() { - write_task.kv_wb = Some(s.engines.kv.write_batch()); - } - s.save_apply_state_to(write_task.kv_wb.as_mut().unwrap()) - .unwrap(); + let kv_wb = write_task + .extra_write + .ensure_v1(|| s.engines.kv.write_batch()); + s.save_apply_state_to(kv_wb).unwrap(); write_to_db_for_test(&s.engines, write_task); let term = s.term(7).unwrap(); compact_raft_log(&s.tag, s.entry_storage.apply_state_mut(), 7, term).unwrap(); From 8479cebfaac80672e7ef5a1ad40e940f7f1b7aba Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 3 Aug 2022 12:00:06 +0800 Subject: [PATCH 0130/1149] engine_test: add single-rocksdb TestTabletFactory (#13163) close tikv/tikv#13162 Signed-off-by: SpadeA-Tang --- components/engine_test/src/lib.rs | 127 +++++++++++++++++++++---- components/engine_traits/src/engine.rs | 26 +++-- src/server/engine_factory.rs | 43 ++++++--- src/server/engine_factory_v2.rs | 30 ++---- 4 files changed, 165 insertions(+), 61 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 979fbda17d0..7bdd87827e7 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -116,7 +116,7 @@ pub mod kv { root_path: String, db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>, - registry: Arc>>, + root_db: Arc>>, } impl TestTabletFactory { @@ -129,7 +129,104 @@ pub mod kv { root_path: root_path.to_string(), db_opt, cf_opts, - registry: Arc::new(Mutex::new(HashMap::default())), + root_db: Arc::new(Mutex::default()), + } + } + + fn create_tablet(&self, tablet_path: &Path) -> Result { + let kv_engine = KvTestEngine::new_kv_engine_opt( + tablet_path.to_str().unwrap(), + self.db_opt.clone(), + self.cf_opts.clone(), + )?; + Ok(kv_engine) + } + } + + impl TabletFactory for TestTabletFactory { + fn create_shared_db(&self) -> Result { + let tablet_path = self.tablet_path(0, 0); + let tablet = self.create_tablet(&tablet_path)?; + let mut root_db = self.root_db.lock().unwrap(); + root_db.replace(tablet.clone()); + Ok(tablet) + } + + fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { + let db = self.root_db.lock().unwrap(); + if let Some(cp) = db.as_ref() { + return Ok(cp.clone()); + } + + self.create_shared_db() + } + + fn open_tablet_cache(&self, _id: u64, _suffix: u64) -> Option { + self.open_tablet_raw(&self.tablet_path(0, 0), false).ok() + } + + fn open_tablet_cache_any(&self, _id: u64) -> Option { + self.open_tablet_cache(0, 0) + } + + fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { + TabletFactory::create_tablet(self, 0, 0) + } + + fn exists_raw(&self, _path: &Path) -> bool { + false + } + + #[inline] + fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + Path::new(&self.root_path).join(format!("tablets/{}_{}", id, suffix)) + } + + #[inline] + fn tablets_path(&self) -> PathBuf { + Path::new(&self.root_path).join("tablets") + } + + #[inline] + fn destroy_tablet(&self, _id: u64, _suffix: u64) -> engine_traits::Result<()> { + Ok(()) + } + + fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { + let db = self.root_db.lock().unwrap(); + let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity)?; + Ok(()) + } + } + + impl TabletAccessor for TestTabletFactory { + fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &KvTestEngine)) { + let db = self.root_db.lock().unwrap(); + let db = db.as_ref().unwrap(); + f(0, 0, db); + } + + fn is_single_engine(&self) -> bool { + true + } + } + + #[derive(Clone)] + pub struct TestTabletFactoryV2 { + inner: TestTabletFactory, + registry: Arc>>, + } + + impl TestTabletFactoryV2 { + pub fn new( + root_path: &str, + db_opt: DbOptions, + cf_opts: Vec<(&'static str, KvTestCfOptions)>, + ) -> Self { + Self { + inner: TestTabletFactory::new(root_path, db_opt, cf_opts), + registry: Arc::default(), } } } @@ -145,7 +242,7 @@ pub mod kv { (tablet_id, tablet_suffix) } - impl TabletFactory for TestTabletFactory { + impl TabletFactory for TestTabletFactoryV2 { fn create_tablet(&self, id: u64, suffix: u64) -> Result { let mut reg = self.registry.lock().unwrap(); if let Some(db) = reg.get(&(id, suffix)) { @@ -155,35 +252,27 @@ pub mod kv { db.as_inner().path() )); } + let tablet_path = self.tablet_path(id, suffix); - let tablet_path = tablet_path.to_str().unwrap(); - let kv_engine = KvTestEngine::new_kv_engine_opt( - tablet_path, - self.db_opt.clone(), - self.cf_opts.clone(), - )?; + let kv_engine = self.inner.create_tablet(&tablet_path)?; reg.insert((id, suffix), kv_engine.clone()); + Ok(kv_engine) } fn open_tablet(&self, id: u64, suffix: u64) -> Result { - let mut reg = self.registry.lock().unwrap(); + let reg = self.registry.lock().unwrap(); if let Some(db) = reg.get(&(id, suffix)) { return Ok(db.clone()); } let db_path = self.tablet_path(id, suffix); let db = self.open_tablet_raw(db_path.as_path(), false)?; - reg.insert((id, suffix), db.clone()); Ok(db) } fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { - let reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { - return Some(db.clone()); - } - None + self.registry.lock().unwrap().get(&(id, suffix)).cloned() } fn open_tablet_cache_any(&self, id: u64) -> Option { @@ -217,12 +306,12 @@ pub mod kv { #[inline] fn tablets_path(&self) -> PathBuf { - Path::new(&self.root_path).join("tablets") + Path::new(&self.inner.root_path).join("tablets") } #[inline] fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { - Path::new(&self.root_path).join(format!("tablets/{}_{}", id, suffix)) + Path::new(&self.inner.root_path).join(format!("tablets/{}_{}", id, suffix)) } #[inline] @@ -281,7 +370,7 @@ pub mod kv { } } - impl TabletAccessor for TestTabletFactory { + impl TabletAccessor for TestTabletFactoryV2 { #[inline] fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &KvTestEngine)) { let reg = self.registry.lock().unwrap(); diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index dc09b54fb6e..7add5e4d9b2 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -172,17 +172,10 @@ pub trait TabletFactory: TabletAccessor { /// Open a tablet by id and suffix from cache---that means it should already /// be opened. - fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { - if let Ok(engine) = self.open_tablet_raw(&self.tablet_path(id, suffix), false) { - return Some(engine); - } - None - } + fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option; /// Open a tablet by id and any suffix from cache - fn open_tablet_cache_any(&self, id: u64) -> Option { - self.open_tablet_cache(id, 0) - } + fn open_tablet_cache_any(&self, id: u64) -> Option; /// Open tablet by path and readonly flag fn open_tablet_raw(&self, path: &Path, readonly: bool) -> Result; @@ -242,21 +235,35 @@ where fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } + fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } + + fn open_tablet_cache(&self, _id: u64, _suffix: u64) -> Option { + Some(self.engine.as_ref().unwrap().clone()) + } + + fn open_tablet_cache_any(&self, _id: u64) -> Option { + Some(self.engine.as_ref().unwrap().clone()) + } + fn create_shared_db(&self) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } + fn destroy_tablet(&self, _id: u64, _suffix: u64) -> Result<()> { Ok(()) } + fn exists_raw(&self, _path: &Path) -> bool { true } + fn tablet_path(&self, _id: u64, _suffix: u64) -> PathBuf { PathBuf::from(&self.root_path) } + fn tablets_path(&self) -> PathBuf { PathBuf::from(&self.root_path) } @@ -271,6 +278,7 @@ where opt.set_block_cache_capacity(capacity) } } + impl TabletAccessor for DummyFactory where EK: CfOptionsExt + Clone + Send + 'static, diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 4e2edc13569..7ddf338d870 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -18,6 +18,7 @@ use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; use tikv_util::worker::Scheduler; +use super::engine_factory_v2::KvEngineFactoryV2; use crate::config::{DbConfig, TiKvConfig, DEFAULT_ROCKSDB_SUB_DIR}; struct FactoryInner { @@ -89,6 +90,17 @@ impl KvEngineFactoryBuilder { compact_event_sender: self.compact_event_sender.clone(), } } + + pub fn build_v2(self) -> KvEngineFactoryV2 { + let factory = KvEngineFactory { + inner: Arc::new(self.inner), + compact_event_sender: self.compact_event_sender.clone(), + }; + KvEngineFactoryV2 { + inner: factory, + registry: Arc::default(), + } + } } #[derive(Clone)] @@ -219,13 +231,22 @@ impl TabletFactory for KvEngineFactory { } fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { - if let Ok(db) = self.inner.root_db.lock() { - let cp = db.as_ref().unwrap().clone(); - return Ok(cp); + let db = self.inner.root_db.lock().unwrap(); + if let Some(cp) = db.as_ref() { + return Ok(cp.clone()); } + self.create_shared_db() } + fn open_tablet_cache(&self, _id: u64, _suffix: u64) -> Option { + self.open_tablet_raw(&self.tablet_path(0, 0), false).ok() + } + + fn open_tablet_cache_any(&self, _id: u64) -> Option { + self.open_tablet_cache(0, 0) + } + fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { TabletFactory::create_tablet(self, 0, 0) } @@ -233,9 +254,11 @@ impl TabletFactory for KvEngineFactory { fn exists_raw(&self, _path: &Path) -> bool { false } + fn tablet_path(&self, _id: u64, _suffix: u64) -> PathBuf { self.kv_engine_path() } + fn tablets_path(&self) -> PathBuf { self.kv_engine_path() } @@ -246,20 +269,18 @@ impl TabletFactory for KvEngineFactory { } fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { - if let Ok(db) = self.inner.root_db.lock() { - let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(capacity)?; - } + let db = self.inner.root_db.lock().unwrap(); + let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity)?; Ok(()) } } impl TabletAccessor for KvEngineFactory { fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { - if let Ok(db) = self.inner.root_db.lock() { - let db = db.as_ref().unwrap(); - f(0, 0, db); - } + let db = self.inner.root_db.lock().unwrap(); + let db = db.as_ref().unwrap(); + f(0, 0, db); } fn is_single_engine(&self) -> bool { diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 094f6f5d5e6..5d26958ea41 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -15,8 +15,8 @@ const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; #[derive(Clone)] pub struct KvEngineFactoryV2 { - inner: KvEngineFactory, - registry: Arc>>, + pub inner: KvEngineFactory, + pub registry: Arc>>, } // Extract tablet id and tablet suffix from the path. @@ -49,7 +49,7 @@ impl TabletFactory for KvEngineFactoryV2 { } fn open_tablet(&self, id: u64, suffix: u64) -> Result { - let mut reg = self.registry.lock().unwrap(); + let reg = self.registry.lock().unwrap(); if let Some(db) = reg.get(&(id, suffix)) { return Ok(db.clone()); } @@ -57,16 +57,11 @@ impl TabletFactory for KvEngineFactoryV2 { let db_path = self.tablet_path(id, suffix); let db = self.open_tablet_raw(db_path.as_path(), false)?; debug!("open tablet"; "key" => ?(id, suffix)); - reg.insert((id, suffix), db.clone()); Ok(db) } fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { - let reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { - return Some(db.clone()); - } - None + self.registry.lock().unwrap().get(&(id, suffix)).cloned() } fn open_tablet_cache_any(&self, id: u64) -> Option { @@ -153,6 +148,7 @@ impl TabletFactory for KvEngineFactoryV2 { let new_engine = self.open_tablet_raw(db_path.as_path(), false); if new_engine.is_ok() { let (old_id, old_suffix) = get_id_and_suffix_from_path(path); + assert!(suffix > old_suffix); self.registry.lock().unwrap().remove(&(old_id, old_suffix)); } new_engine @@ -206,15 +202,6 @@ mod tests { }; } - impl KvEngineFactoryV2 { - pub fn new(inner: KvEngineFactory) -> Self { - KvEngineFactoryV2 { - inner, - registry: Arc::new(Mutex::new(HashMap::default())), - } - } - } - #[test] fn test_kvengine_factory() { let cfg = TEST_CONFIG.clone(); @@ -268,8 +255,8 @@ mod tests { if let Some(cache) = cache { builder = builder.block_cache(cache); } - let inner_factory = builder.build(); - let factory = KvEngineFactoryV2::new(inner_factory); + + let factory = builder.build_v2(); let tablet = factory.create_tablet(1, 10).unwrap(); let tablet2 = factory.open_tablet(1, 10).unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); @@ -314,8 +301,7 @@ mod tests { let env = cfg.build_shared_rocks_env(None, None).unwrap(); let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); - let inner_factory = builder.build(); - let factory = KvEngineFactoryV2::new(inner_factory); + let factory = builder.build_v2(); factory.create_tablet(1, 10).unwrap(); factory.create_tablet(2, 10).unwrap(); let mut count = 0; From 5c866c4685b27bf52296dc1d38e2671dc04fbe01 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Wed, 3 Aug 2022 14:32:06 +0800 Subject: [PATCH 0131/1149] raftstore: Implement observer on_compute_engine_size (#12948) ref tikv/tikv#12849 Implement observer on_compute_engine_size Signed-off-by: CalvinNeo Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/dispatcher.rs | 27 ++++ components/raftstore/src/coprocessor/mod.rs | 22 ++- components/raftstore/src/store/fsm/store.rs | 2 + components/raftstore/src/store/worker/pd.rs | 149 ++++++++++++------ 4 files changed, 152 insertions(+), 48 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index c752e629af1..6297722a996 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -143,6 +143,7 @@ impl_box_observer_g!( SplitCheckObserver, WrappedSplitCheckObserver ); +impl_box_observer!(BoxPdTaskObserver, PdTaskObserver, WrappedPdTaskObserver); impl_box_observer!(BoxRoleObserver, RoleObserver, WrappedRoleObserver); impl_box_observer!( BoxRegionChangeObserver, @@ -176,6 +177,7 @@ where region_change_observers: Vec>, cmd_observers: Vec>>, read_index_observers: Vec>, + pd_task_observers: Vec>, // TODO: add endpoint } @@ -191,6 +193,7 @@ impl Default for Registry { region_change_observers: Default::default(), cmd_observers: Default::default(), read_index_observers: Default::default(), + pd_task_observers: Default::default(), } } } @@ -237,6 +240,10 @@ impl Registry { push!(priority, cco, self.consistency_check_observers); } + pub fn register_pd_task_observer(&mut self, priority: u32, ro: BoxPdTaskObserver) { + push!(priority, ro, self.pd_task_observers); + } + pub fn register_role_observer(&mut self, priority: u32, ro: BoxRoleObserver) { push!(priority, ro, self.role_observers); } @@ -548,6 +555,15 @@ impl CoprocessorHost { Ok(hashes) } + pub fn on_compute_engine_size(&self) -> Option { + let mut store_size = None; + for observer in &self.registry.pd_task_observers { + let observer = observer.observer.inner(); + observer.on_compute_engine_size(&mut store_size); + } + store_size + } + pub fn on_role_change(&self, region: &Region, role_change: RoleChange) { loop_ob!( region, @@ -721,6 +737,12 @@ mod tests { } } + impl PdTaskObserver for TestCoprocessor { + fn on_compute_engine_size(&self, _: &mut Option) { + self.called.fetch_add(19, Ordering::SeqCst); + } + } + impl RoleObserver for TestCoprocessor { fn on_role_change(&self, ctx: &mut ObserverContext<'_>, _: &RoleChange) { self.called.fetch_add(7, Ordering::SeqCst); @@ -795,6 +817,8 @@ mod tests { .register_query_observer(1, BoxQueryObserver::new(ob.clone())); host.registry .register_apply_snapshot_observer(1, BoxApplySnapshotObserver::new(ob.clone())); + host.registry + .register_pd_task_observer(1, BoxPdTaskObserver::new(ob.clone())); host.registry .register_role_observer(1, BoxRoleObserver::new(ob.clone())); host.registry @@ -859,6 +883,9 @@ mod tests { admin_req.set_admin_request(AdminRequest::default()); host.pre_exec(®ion, &admin_req, 0, 0); assert_all!([&ob.called], &[119]); // 16 + + host.on_compute_engine_size(); + assert_all!([&ob.called], &[138]); // 19 } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index e7c351262fa..9f82c90968b 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -32,8 +32,8 @@ pub use self::{ consistency_check::{ConsistencyCheckObserver, Raw as RawConsistencyCheckObserver}, dispatcher::{ BoxAdminObserver, BoxApplySnapshotObserver, BoxCmdObserver, BoxConsistencyCheckObserver, - BoxQueryObserver, BoxRegionChangeObserver, BoxRoleObserver, BoxSplitCheckObserver, - CoprocessorHost, Registry, + BoxPdTaskObserver, BoxQueryObserver, BoxRegionChangeObserver, BoxRoleObserver, + BoxSplitCheckObserver, CoprocessorHost, Registry, }, error::{Error, Result}, region_info_accessor::{ @@ -203,6 +203,24 @@ pub trait SplitCheckObserver: Coprocessor { ); } +/// Describes size information about all stores. +/// There is guarantee that capacity >= used + avail. +/// since some space can be reserved. +#[derive(Debug, Default)] +pub struct StoreSizeInfo { + /// The capacity of the store. + pub capacity: u64, + /// Size of actual data. + pub used: u64, + /// Available space that can be written with actual data. + pub avail: u64, +} + +pub trait PdTaskObserver: Coprocessor { + /// Compute capacity/used/available size of this store. + fn on_compute_engine_size(&self, _: &mut Option) {} +} + pub struct RoleChange { pub state: StateRole, pub leader_id: u64, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index d4bb0a32266..b058d0bb35e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1622,6 +1622,7 @@ impl RaftBatchSystem { let (raft_builder, apply_builder) = (builder.clone(), apply_poller_builder.clone()); let tag = format!("raftstore-{}", store.get_id()); + let coprocessor_host = builder.coprocessor_host.clone(); self.system.spawn(tag, builder); let mut mailboxes = Vec::with_capacity(region_peers.len()); let mut address = Vec::with_capacity(region_peers.len()); @@ -1669,6 +1670,7 @@ impl RaftBatchSystem { collector_reg_handle, region_read_progress, health_service, + coprocessor_host, ); assert!(workers.pd_worker.start_with_timer(pd_runner)); diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index d65cbcea8d4..9e5e54c185e 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -49,19 +49,22 @@ use tikv_util::{ }; use yatp::Remote; -use crate::store::{ - cmd_resp::new_error, - metrics::*, - peer::{UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer}, - transport::SignificantRouter, - util::{is_epoch_stale, KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, - worker::{ - query_stats::QueryStats, - split_controller::{SplitInfo, TOP_N}, - AutoSplitController, ReadStats, SplitConfigChange, WriteStats, +use crate::{ + coprocessor::CoprocessorHost, + store::{ + cmd_resp::new_error, + metrics::*, + peer::{UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer}, + transport::SignificantRouter, + util::{is_epoch_stale, KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, + worker::{ + query_stats::QueryStats, + split_controller::{SplitInfo, TOP_N}, + AutoSplitController, ReadStats, SplitConfigChange, WriteStats, + }, + Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, + RegionReadProgressRegistry, SignificantMsg, SnapManager, StoreInfo, StoreMsg, TxnExt, }, - Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, - RegionReadProgressRegistry, SignificantMsg, SnapManager, StoreInfo, StoreMsg, TxnExt, }; type RecordPairVec = Vec; @@ -902,6 +905,7 @@ where // The health status of the store is updated by the slow score mechanism. health_service: Option, curr_health_status: ServingStatus, + coprocessor_host: CoprocessorHost, } impl Runner @@ -926,6 +930,7 @@ where collector_reg_handle: CollectorRegHandle, region_read_progress: RegionReadProgressRegistry, health_service: Option, + coprocessor_host: CoprocessorHost, ) -> Runner { // Register the region CPU records collector. let mut region_cpu_records_collector = None; @@ -969,6 +974,7 @@ where slow_score: SlowScore::new(cfg.inspect_interval.0), health_service, curr_health_status: ServingStatus::Serving, + coprocessor_host, } } @@ -1179,18 +1185,6 @@ where store_report: Option, dr_autosync_status: Option, ) { - let disk_stats = match fs2::statvfs(store_info.kv_engine.path()) { - Err(e) => { - error!( - "get disk stat for rocksdb failed"; - "engine_path" => store_info.kv_engine.path(), - "err" => ?e - ); - return; - } - Ok(stats) => stats, - }; - let mut report_peers = HashMap::default(); for (region_id, region_peer) in &mut self.region_peers { let read_bytes = region_peer.read_bytes - region_peer.last_store_report_read_bytes; @@ -1218,35 +1212,21 @@ where } stats = collect_report_read_peer_stats(HOTSPOT_REPORT_CAPACITY, report_peers, stats); - - let disk_cap = disk_stats.total_space(); - let capacity = if store_info.capacity == 0 || disk_cap < store_info.capacity { - disk_cap - } else { - store_info.capacity + let (capacity, used_size, available) = match collect_engine_size( + &self.coprocessor_host, + Some(&store_info), + self.snap_mgr.get_total_snap_size().unwrap(), + ) { + Some((capacity, used_size, available)) => (capacity, used_size, available), + None => return, }; - stats.set_capacity(capacity); - let used_size = self.snap_mgr.get_total_snap_size().unwrap() - + store_info - .kv_engine - .get_engine_used_size() - .expect("kv engine used size") - + store_info - .raft_engine - .get_engine_size() - .expect("raft engine used size"); + stats.set_capacity(capacity); stats.set_used_size(used_size); - let mut available = capacity.checked_sub(used_size).unwrap_or_default(); - // We only care about rocksdb SST file size, so we should check disk available - // here. - available = cmp::min(available, disk_stats.available_space()); - if available == 0 { warn!("no available space"); } - stats.set_available(available); stats.set_bytes_read( self.store_stat.engine_total_bytes_read - self.store_stat.engine_last_total_bytes_read, @@ -2304,6 +2284,48 @@ fn collect_report_read_peer_stats( stats } +fn collect_engine_size( + coprocessor_host: &CoprocessorHost, + store_info: Option<&StoreInfo>, + snap_mgr_size: u64, +) -> Option<(u64, u64, u64)> { + if let Some(engine_size) = coprocessor_host.on_compute_engine_size() { + return Some((engine_size.capacity, engine_size.used, engine_size.avail)); + } + let store_info = store_info.unwrap(); + let disk_stats = match fs2::statvfs(store_info.kv_engine.path()) { + Err(e) => { + error!( + "get disk stat for rocksdb failed"; + "engine_path" => store_info.kv_engine.path(), + "err" => ?e + ); + return None; + } + Ok(stats) => stats, + }; + let disk_cap = disk_stats.total_space(); + let capacity = if store_info.capacity == 0 || disk_cap < store_info.capacity { + disk_cap + } else { + store_info.capacity + }; + let used_size = snap_mgr_size + + store_info + .kv_engine + .get_engine_used_size() + .expect("kv engine used size") + + store_info + .raft_engine + .get_engine_size() + .expect("raft engine used size"); + let mut available = capacity.checked_sub(used_size).unwrap_or_default(); + // We only care about rocksdb SST file size, so we should check disk available + // here. + available = cmp::min(available, disk_stats.available_space()); + Some((capacity, used_size, available)) +} + fn get_read_query_num(stat: &pdpb::QueryStats) -> u64 { stat.get_get() + stat.get_coprocessor() + stat.get_scan() } @@ -2494,9 +2516,12 @@ mod tests { ); } + use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; use metapb::Peer; use resource_metering::{RawRecord, TagInfos}; + use crate::coprocessor::{BoxPdTaskObserver, Coprocessor, PdTaskObserver, StoreSizeInfo}; + #[test] fn test_calculate_region_cpu_records() { // region_id -> total_cpu_time_ms @@ -2600,4 +2625,36 @@ mod tests { assert_eq!(report.stats.get_read_qps(), expected); } } + + #[derive(Debug, Clone, Default)] + struct PdObserver {} + + impl Coprocessor for PdObserver {} + + impl PdTaskObserver for PdObserver { + fn on_compute_engine_size(&self, s: &mut Option) { + let _ = s.insert(StoreSizeInfo { + capacity: 444, + used: 111, + avail: 333, + }); + } + } + + #[test] + fn test_pd_task_observer() { + let mut host = CoprocessorHost::::default(); + let obs = PdObserver::default(); + host.registry + .register_pd_task_observer(1, BoxPdTaskObserver::new(obs)); + let store_size = collect_engine_size::(&host, None, 0); + let (cap, used, avail) = if let Some((cap, used, avail)) = store_size { + (cap, used, avail) + } else { + panic!("store_size should not be none"); + }; + assert_eq!(cap, 444); + assert_eq!(used, 111); + assert_eq!(avail, 333); + } } From 8be0b14d34382eaa7ff9814714af19043547466e Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 3 Aug 2022 18:26:06 +0800 Subject: [PATCH 0132/1149] *: update toolchain to 2022-07-31 (#13190) ref tikv/tikv#13008, ref tikv/tikv#13009 Signed-off-by: tabokie --- Cargo.lock | 8 +- Makefile | 1 + cmd/tikv-ctl/src/cmd.rs | 4 +- components/api_version/src/lib.rs | 4 +- .../backup-stream/src/checkpoint_manager.rs | 2 +- .../backup-stream/src/metadata/client.rs | 4 +- .../src/metadata/store/slash_etc.rs | 2 +- components/backup-stream/src/metrics.rs | 2 +- components/backup-stream/src/router.rs | 17 +-- .../backup-stream/src/subscription_track.rs | 2 +- components/backup-stream/src/utils.rs | 2 +- components/batch-system/src/fsm.rs | 4 +- components/causal_ts/src/tso.rs | 14 +- components/cdc/src/channel.rs | 2 +- components/cdc/src/delegate.rs | 4 +- components/cdc/src/endpoint.rs | 2 +- .../cdc/tests/failpoints/test_resolve.rs | 2 +- components/cloud/aws/src/kms.rs | 2 +- components/cloud/aws/src/s3.rs | 32 +++-- components/codec/src/buffer.rs | 10 +- components/codec/src/byte.rs | 8 +- .../concurrency_manager/src/key_handle.rs | 4 +- components/concurrency_manager/src/lib.rs | 8 +- .../concurrency_manager/src/lock_table.rs | 2 +- components/coprocessor_plugin_api/src/util.rs | 4 +- components/encryption/src/config.rs | 6 +- .../encryption/src/encrypted_file/header.rs | 4 +- components/encryption/src/manager/mod.rs | 4 +- .../encryption/src/master_key/metadata.rs | 4 +- .../engine_rocks/src/compact_listener.rs | 2 +- components/engine_rocks/src/engine.rs | 2 +- components/engine_rocks/src/misc.rs | 5 +- .../engine_rocks/src/perf_context_impl.rs | 4 +- components/engine_rocks/src/properties.rs | 2 +- .../engine_rocks/src/range_properties.rs | 8 +- components/engine_rocks/src/ttl_properties.rs | 4 +- components/engine_traits/src/cf_defs.rs | 8 +- components/engine_traits/src/encryption.rs | 4 +- components/engine_traits/src/engine.rs | 3 +- components/engine_traits/src/errors.rs | 6 +- components/engine_traits/src/perf_context.rs | 2 +- .../engine_traits/src/sst_partitioner.rs | 6 +- components/engine_traits_tests/src/ctor.rs | 4 +- .../engine_traits_tests/src/delete_range.rs | 10 +- .../engine_traits_tests/src/iterator.rs | 64 ++++----- .../src/scenario_writes.rs | 6 +- components/engine_traits_tests/src/sst.rs | 24 ++-- .../engine_traits_tests/src/write_batch.rs | 40 +++--- components/error_code/src/lib.rs | 2 +- components/file_system/src/lib.rs | 10 +- components/file_system/src/rate_limiter.rs | 4 +- components/keys/src/lib.rs | 10 +- components/keys/src/rewrite.rs | 2 +- .../online_config_derive/src/lib.rs | 6 +- components/online_config/src/lib.rs | 4 +- components/raftstore-v2/src/router/message.rs | 2 +- .../src/coprocessor/region_info_accessor.rs | 2 +- .../src/coprocessor/split_check/size.rs | 2 +- components/raftstore/src/lib.rs | 1 - components/raftstore/src/store/config.rs | 75 +++++++---- .../raftstore/src/store/entry_storage.rs | 2 +- components/raftstore/src/store/fsm/apply.rs | 20 +-- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/fsm/store.rs | 8 +- components/raftstore/src/store/msg.rs | 2 +- components/raftstore/src/store/peer.rs | 4 +- .../raftstore/src/store/peer_storage.rs | 6 +- components/raftstore/src/store/read_queue.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 2 +- .../raftstore/src/store/replication_mode.rs | 3 +- components/raftstore/src/store/snap.rs | 22 ++-- components/raftstore/src/store/txn_ext.rs | 2 +- components/raftstore/src/store/util.rs | 4 +- .../src/store/worker/cleanup_snapshot.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 12 +- .../raftstore/src/store/worker/split_check.rs | 2 +- .../src/store/worker/split_controller.rs | 2 +- components/resource_metering/src/config.rs | 6 +- components/resource_metering/src/lib.rs | 15 +-- components/resource_metering/src/model.rs | 4 +- components/sst_importer/src/sst_importer.rs | 10 +- components/sst_importer/src/sst_writer.rs | 6 +- components/test_raftstore/src/pd.rs | 2 +- .../test_raftstore/src/transport_simulate.rs | 4 +- components/test_sst_importer/src/lib.rs | 2 +- components/test_storage/src/assert_storage.rs | 73 +++++------ .../tidb_query_aggr/src/impl_max_min.rs | 2 +- components/tidb_query_aggr/src/lib.rs | 8 +- .../tidb_query_common/src/execute_stats.rs | 2 +- .../tidb_query_common/src/storage/range.rs | 6 +- .../src/storage/ranges_iter.rs | 2 +- .../tidb_query_datatype/src/codec/convert.rs | 6 +- .../src/codec/data_type/chunked_vec_bytes.rs | 2 +- .../src/codec/data_type/mod.rs | 6 +- .../src/codec/data_type/vector.rs | 2 +- .../tidb_query_datatype/src/codec/datum.rs | 4 +- .../src/codec/mysql/binary_literal.rs | 12 +- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/duration.rs | 2 +- .../src/codec/mysql/enums.rs | 6 +- .../src/codec/mysql/json/mod.rs | 4 +- .../src/codec/mysql/json/modifier.rs | 4 +- .../src/codec/mysql/json/serde.rs | 2 +- .../src/codec/mysql/time/mod.rs | 16 +-- .../src/codec/mysql/time/tz.rs | 2 +- .../src/codec/row/v2/row_slice.rs | 4 +- .../tidb_query_datatype/src/codec/table.rs | 14 +- .../tidb_query_datatype/src/def/eval_type.rs | 2 +- .../tidb_query_datatype/src/def/field_type.rs | 4 +- .../tidb_query_datatype/src/expr/ctx.rs | 2 +- .../src/index_scan_executor.rs | 38 +++--- .../src/limit_executor.rs | 2 +- .../src/projection_executor.rs | 2 +- .../src/selection_executor.rs | 2 +- .../src/table_scan_executor.rs | 12 +- .../tidb_query_expr/src/impl_arithmetic.rs | 14 +- components/tidb_query_expr/src/impl_cast.rs | 6 +- .../tidb_query_expr/src/impl_compare.rs | 2 +- .../tidb_query_expr/src/impl_compare_in.rs | 4 +- .../tidb_query_expr/src/impl_encryption.rs | 10 +- components/tidb_query_expr/src/impl_json.rs | 4 +- components/tidb_query_expr/src/impl_math.rs | 24 ++-- components/tidb_query_expr/src/impl_op.rs | 32 ++--- components/tidb_query_expr/src/impl_string.rs | 20 +-- components/tidb_query_expr/src/impl_time.rs | 4 +- .../tidb_query_expr/src/types/expr_builder.rs | 26 +--- .../tidb_query_expr/src/types/expr_eval.rs | 12 +- components/tikv_kv/src/btree_engine.rs | 2 +- components/tikv_kv/src/cursor.rs | 24 ++-- components/tikv_kv/src/lib.rs | 14 +- components/tikv_util/src/codec/bytes.rs | 4 +- components/tikv_util/src/config.rs | 8 +- components/tikv_util/src/future.rs | 2 +- components/tikv_util/src/lib.rs | 2 +- components/tikv_util/src/logger/file_log.rs | 2 +- components/tikv_util/src/logger/mod.rs | 10 +- components/tikv_util/src/metrics/mod.rs | 2 +- .../tikv_util/src/metrics/threads_linux.rs | 2 +- components/tikv_util/src/mpsc/batch.rs | 2 +- components/tikv_util/src/time.rs | 2 +- components/tikv_util/src/timer.rs | 2 +- components/tikv_util/src/worker/pool.rs | 2 +- .../tikv_util/src/yatp_pool/future_pool.rs | 30 ++--- components/tracker/src/lib.rs | 4 +- components/tracker/src/slab.rs | 2 +- components/txn_types/src/lock.rs | 6 +- components/txn_types/src/timestamp.rs | 2 +- components/txn_types/src/types.rs | 20 ++- fuzz/cli.rs | 2 +- rust-toolchain | 2 +- rustfmt.toml | 1 - scripts/check-docker-build | 22 ++-- scripts/check-license | 14 ++ scripts/check-redact-log | 16 +-- scripts/clippy | 58 ++++---- scripts/clippy-all | 18 +-- scripts/run-cargo.sh | 2 +- src/config.rs | 124 ++++++++---------- src/coprocessor/endpoint.rs | 4 +- .../interceptors/concurrency_limiter.rs | 6 +- src/server/config.rs | 22 ++-- src/server/debug.rs | 12 +- src/server/engine_factory_v2.rs | 4 +- src/server/gc_worker/gc_manager.rs | 4 +- src/server/gc_worker/gc_worker.rs | 20 ++- src/server/lock_manager/client.rs | 7 +- src/server/raftkv.rs | 10 +- src/server/resolve.rs | 2 +- src/server/server.rs | 2 +- src/server/snap.rs | 4 +- src/server/status_server/profile.rs | 6 +- src/storage/config.rs | 4 +- src/storage/mod.rs | 2 +- src/storage/mvcc/mod.rs | 2 +- src/storage/mvcc/reader/point_getter.rs | 10 +- src/storage/mvcc/reader/scanner/backward.rs | 4 +- src/storage/mvcc/reader/scanner/forward.rs | 4 +- src/storage/mvcc/txn.rs | 28 ++-- src/storage/txn/actions/check_txn_status.rs | 2 +- src/storage/txn/actions/commit.rs | 2 +- src/storage/txn/actions/tests.rs | 38 +++--- .../singleton_flow_controller.rs | 2 +- src/storage/txn/store.rs | 24 ++-- tests/failpoints/cases/test_conf_change.rs | 2 +- tests/failpoints/cases/test_encryption.rs | 2 +- tests/failpoints/cases/test_hibernate.rs | 2 +- tests/failpoints/cases/test_merge.rs | 2 +- tests/failpoints/cases/test_pd_client.rs | 2 +- tests/failpoints/cases/test_replica_read.rs | 6 +- tests/failpoints/cases/test_split_region.rs | 2 +- tests/failpoints/cases/test_storage.rs | 2 +- tests/failpoints/cases/test_transaction.rs | 6 +- .../failpoints/cases/test_transfer_leader.rs | 6 +- tests/failpoints/cases/test_ttl.rs | 4 +- .../failpoints/cases/test_unsafe_recovery.rs | 24 ++-- .../integrations/config/dynamic/gc_worker.rs | 2 +- .../config/dynamic/pessimistic_txn.rs | 2 +- .../integrations/config/dynamic/raftstore.rs | 2 +- .../integrations/config/test_config_client.rs | 12 +- tests/integrations/pd/test_rpc_client.rs | 4 +- .../integrations/raftstore/test_bootstrap.rs | 2 +- .../raftstore/test_early_apply.rs | 2 +- .../integrations/raftstore/test_lease_read.rs | 4 +- tests/integrations/raftstore/test_merge.rs | 6 +- .../raftstore/test_replica_read.rs | 14 +- .../raftstore/test_split_region.rs | 2 +- .../raftstore/test_unsafe_recovery.rs | 24 ++-- tests/integrations/server/kv_service.rs | 2 +- tests/integrations/server/raft_client.rs | 2 +- tests/integrations/server/security.rs | 2 +- .../integrations/storage/test_raft_storage.rs | 28 ++-- tests/integrations/storage/test_raftkv.rs | 2 +- tests/integrations/storage/test_titan.rs | 2 +- 213 files changed, 869 insertions(+), 970 deletions(-) create mode 100755 scripts/check-license diff --git a/Cargo.lock b/Cargo.lock index 9e0303726fb..c5d22fc6e61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3676,18 +3676,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58ad3879ad3baf4e44784bc6a718a8698867bb991f8ce24d1bcbe2cfb4c3a75e" +checksum = "78203e83c48cffbe01e4a2d35d566ca4de445d79a85372fc64e378bfc812a260" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744b6f092ba29c3650faf274db506afd39944f48420f6c86b17cfe0ee1cb36bb" +checksum = "710faf75e1b33345361201d36d04e98ac1ed8909151a017ed384700836104c74" dependencies = [ "proc-macro2", "quote", diff --git a/Makefile b/Makefile index fb7bbf6052e..3229a307e7f 100644 --- a/Makefile +++ b/Makefile @@ -347,6 +347,7 @@ pre-clippy: unset-override clippy: pre-clippy @./scripts/check-redact-log @./scripts/check-docker-build + @./scripts/check-license @./scripts/clippy-all pre-audit: diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 7f459a4c127..2fec7ea9cef 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -1,13 +1,13 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::{borrow::ToOwned, lazy::SyncLazy, str, string::ToString, u64}; +use std::{borrow::ToOwned, str, string::ToString, sync::LazyLock, u64}; use clap::{crate_authors, AppSettings}; use engine_traits::CF_DEFAULT; use structopt::StructOpt; const RAW_KEY_HINT: &str = "Raw key (generally starts with \"z\") in escaped form"; -static VERSION_INFO: SyncLazy = SyncLazy::new(|| { +static VERSION_INFO: LazyLock = LazyLock::new(|| { let build_timestamp = option_env!("TIKV_BUILD_TIME"); tikv::tikv_version_info(build_timestamp) }); diff --git a/components/api_version/src/lib.rs b/components/api_version/src/lib.rs index 60f23455cc7..fb8fd13cbfd 100644 --- a/components/api_version/src/lib.rs +++ b/components/api_version/src/lib.rs @@ -176,7 +176,7 @@ macro_rules! dispatch_api_version { } /// The key mode inferred from the key prefix. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum KeyMode { /// Raw key. Raw, @@ -235,7 +235,7 @@ pub enum KeyMode { /// | 0x12 0x34 0x56 | 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff | 0x01 (0b00000001) | /// -------------------------------------------------------------------------------- /// ``` -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub struct RawValue> { /// The user value. pub user_value: T, diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 7dae680fa05..2874d548c5a 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -138,7 +138,7 @@ fn epoch_not_match(id: u64, sent: u64, real: u64) -> PbError { err } -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +#[derive(Debug, PartialEq, Hash, Clone, Copy)] /// A simple region id, but versioned. pub struct RegionIdWithVersion { pub region_id: u64, diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 2732952930c..e92addd2992 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -72,7 +72,7 @@ impl PartialEq for MetadataEvent { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum CheckpointProvider { Store(u64), Region { id: u64, version: u64 }, @@ -82,7 +82,7 @@ pub enum CheckpointProvider { /// The polymorphic checkpoint. /// The global checkpoint should be the minimal checkpoint of all checkpoints. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub struct Checkpoint { pub provider: CheckpointProvider, pub ts: TimeStamp, diff --git a/components/backup-stream/src/metadata/store/slash_etc.rs b/components/backup-stream/src/metadata/store/slash_etc.rs index 2ae4c05dfaf..0d6484b0c1e 100644 --- a/components/backup-stream/src/metadata/store/slash_etc.rs +++ b/components/backup-stream/src/metadata/store/slash_etc.rs @@ -49,7 +49,7 @@ impl std::fmt::Debug for Key { } /// A value (maybe tombstone.) -#[derive(Debug, Eq, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone)] enum Value { Val(Vec), Del, diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index de150ef2395..c3f99b8617e 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -6,7 +6,7 @@ use prometheus::*; /// The status of a task. /// The ordering of this imples the priority for presenting to the user. /// max(TASK_STATUS) of all stores would be probably the state of the task. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum TaskStatus { Running = 0, Paused, diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 05e49d232a9..d5486cecddb 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -76,8 +76,8 @@ impl TaskSelector { pub fn reference(&self) -> TaskSelectorRef<'_> { match self { TaskSelector::ByName(s) => TaskSelectorRef::ByName(s), - TaskSelector::ByKey(k) => TaskSelectorRef::ByKey(&*k), - TaskSelector::ByRange(s, e) => TaskSelectorRef::ByRange(&*s, &*e), + TaskSelector::ByKey(k) => TaskSelectorRef::ByKey(k), + TaskSelector::ByRange(s, e) => TaskSelectorRef::ByRange(s, e), TaskSelector::All => TaskSelectorRef::All, } } @@ -99,9 +99,9 @@ impl<'a> TaskSelectorRef<'a> { ) -> bool { match self { TaskSelectorRef::ByName(name) => task_name == name, - TaskSelectorRef::ByKey(k) => task_range.any(|(s, e)| utils::is_in_range(k, (&*s, &*e))), + TaskSelectorRef::ByKey(k) => task_range.any(|(s, e)| utils::is_in_range(k, (s, e))), TaskSelectorRef::ByRange(x1, y1) => { - task_range.any(|(x2, y2)| utils::is_overlapping((x1, y1), (&*x2, &*y2))) + task_range.any(|(x2, y2)| utils::is_overlapping((x1, y1), (x2, y2))) } TaskSelectorRef::All => true, } @@ -652,15 +652,14 @@ impl TempFileKey { } fn get_file_type(&self) -> FileType { - let file_type = match self.cmd_type { + match self.cmd_type { CmdType::Put => FileType::Put, CmdType::Delete => FileType::Delete, _ => { warn!("error cmdtype"; "cmdtype" => ?self.cmd_type); panic!("error CmdType"); } - }; - file_type + } } /// The full name of the file owns the key. @@ -1787,9 +1786,7 @@ mod tests { reader: UnpinReader, content_length: u64, ) -> io::Result<()> { - if let Err(e) = (self.error_on_write)() { - return Err(e); - } + (self.error_on_write)()?; self.inner.write(name, reader, content_length).await } diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index aa9f35705fb..2287dedc6c5 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -15,7 +15,7 @@ use crate::{debug, metrics::TRACK_REGION, utils}; #[derive(Clone, Default, Debug)] pub struct SubscriptionTracer(Arc>); -#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum SubscriptionState { /// When it is newly added (maybe after split or leader transfered from /// other store), without any flush. diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 89f21567801..ac1b3dec168 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -698,7 +698,7 @@ mod test { drop(work); }); } - let _ = block_on(tokio::time::timeout(Duration::from_secs(20), wg.wait())).unwrap(); + block_on(tokio::time::timeout(Duration::from_secs(20), wg.wait())).unwrap(); assert_eq!(cnt.load(Ordering::SeqCst), 0, "{:?}@{}", c, i); } } diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index cee3a7b4020..6fb4fe91539 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -19,7 +19,7 @@ const NOTIFYSTATE_IDLE: usize = 1; // The FSM is expected to be dropped. const NOTIFYSTATE_DROP: usize = 2; -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum Priority { Low, Normal, @@ -155,7 +155,7 @@ impl FsmState { let ptr = self.data.swap(ptr::null_mut(), Ordering::SeqCst); if !ptr.is_null() { unsafe { - Box::from_raw(ptr); + let _ = Box::from_raw(ptr); } } } diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 35e6bffd11b..b6ee5d177e1 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -433,7 +433,7 @@ pub mod tests { batch.renew(10, TimeStamp::compose(1, 110)).unwrap(); // timestamp fall back - assert!(batch.renew(10, TimeStamp::compose(1, 119)).is_err()); + batch.renew(10, TimeStamp::compose(1, 119)).unwrap_err(); batch.renew(10, TimeStamp::compose(1, 200)).unwrap(); for logical in 191..=195 { @@ -500,7 +500,7 @@ pub mod tests { for ts in 1101..=1200u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } - assert!(provider.get_ts().is_err()); + provider.get_ts().unwrap_err(); provider.flush().unwrap(); // allocated: [1201, 1400] assert_eq!(provider.batch_size(), 200); @@ -517,7 +517,7 @@ pub mod tests { for ts in 1401..=1500u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } - assert!(provider.get_ts().is_err()); + provider.get_ts().unwrap_err(); // renew on used-up for ts in 1501..=2500u64 { @@ -560,23 +560,23 @@ pub mod tests { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } - assert!(provider.flush().is_err()); + provider.flush().unwrap_err(); for ts in 1101..=1300u64 { // renew on used-up, allocated: [1101, 1300] assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } pd_cli.trigger_tso_failure(); - assert!(provider.get_ts().is_err()); // renew fail on used-up + provider.get_ts().unwrap_err(); // renew fail on used-up pd_cli.trigger_tso_failure(); - assert!(provider.flush().is_err()); + provider.flush().unwrap_err(); provider.flush().unwrap(); // allocated: [1301, 1700] pd_cli.trigger_tso_failure(); // make renew fail to verify used-up for ts in 1301..=1700u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } - assert!(provider.get_ts().is_err()); + provider.get_ts().unwrap_err(); } } diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index 3b1894eb6fc..595632c306e 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -266,7 +266,7 @@ pub fn channel(buffer: usize, memory_quota: MemoryQuota) -> (Sink, Drain) { ) } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum SendError { Full, Disconnected, diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index f6ef0659fe0..fc379916232 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -45,7 +45,7 @@ use crate::{ static DOWNSTREAM_ID_ALLOC: AtomicUsize = AtomicUsize::new(0); /// A unique identifier of a Downstream. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Hash)] pub struct DownstreamID(usize); impl DownstreamID { @@ -1229,7 +1229,7 @@ mod tests { assert!(delegate.handle.is_observing()); // Subscribe with an invalid epoch. - assert!(delegate.subscribe(new_downstream(1, 2)).is_err()); + delegate.subscribe(new_downstream(1, 2)).unwrap_err(); assert_eq!(delegate.downstreams().len(), 1); // Unsubscribe all downstreams. diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 4a957774a23..9d15c347e32 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -2568,7 +2568,7 @@ mod tests { err: Some(Error::request(err_header.clone())), }; suite.run(Task::Deregister(deregister)); - assert!(channel::recv_timeout(&mut rx, Duration::from_millis(200)).is_err()); + channel::recv_timeout(&mut rx, Duration::from_millis(200)).unwrap_err(); assert_eq!(suite.endpoint.capture_regions.len(), 1); let deregister = Deregister::Downstream { diff --git a/components/cdc/tests/failpoints/test_resolve.rs b/components/cdc/tests/failpoints/test_resolve.rs index 75326ac0fb5..560eb68ba44 100644 --- a/components/cdc/tests/failpoints/test_resolve.rs +++ b/components/cdc/tests/failpoints/test_resolve.rs @@ -260,7 +260,7 @@ fn test_joint_confchange() { receive_resolved_ts(&receive_event); tx.send(()).unwrap(); }); - assert!(rx.recv_timeout(Duration::from_secs(2)).is_err()); + rx.recv_timeout(Duration::from_secs(2)).unwrap_err(); fail::remove(update_region_fp); fail::remove(deregister_fp); diff --git a/components/cloud/aws/src/kms.rs b/components/cloud/aws/src/kms.rs index 3d5d6a3fdea..040db46bb53 100644 --- a/components/cloud/aws/src/kms.rs +++ b/components/cloud/aws/src/kms.rs @@ -86,7 +86,7 @@ impl KmsProvider for AwsKms { // possible that a wrong master key has been used, or other error otherwise. async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result> { let decrypt_request = DecryptRequest { - ciphertext_blob: bytes::Bytes::copy_from_slice(&*data_key), + ciphertext_blob: bytes::Bytes::copy_from_slice(data_key), // Use default algorithm SYMMETRIC_DEFAULT. encryption_algorithm: None, // Use key_id encoded in ciphertext. diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index ef13749ccea..25499d89c61 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -707,15 +707,14 @@ mod tests { // inject put error let s3_put_obj_err_fp = "s3_put_obj_err"; fail::cfg(s3_put_obj_err_fp, "return").unwrap(); - let resp = s - .put( - "mykey", - PutResource(Box::new(magic_contents.as_bytes())), - magic_contents.len() as u64, - ) - .await; + s.put( + "mykey", + PutResource(Box::new(magic_contents.as_bytes())), + magic_contents.len() as u64, + ) + .await + .unwrap_err(); fail::remove(s3_put_obj_err_fp); - assert!(resp.is_err()); // test timeout let s3_timeout_injected_fp = "s3_timeout_injected"; @@ -725,16 +724,15 @@ mod tests { fail::cfg(s3_timeout_injected_fp, "return(100)").unwrap(); // inject 200ms delay fail::cfg(s3_sleep_injected_fp, "return(200)").unwrap(); - let resp = s - .put( - "mykey", - PutResource(Box::new(magic_contents.as_bytes())), - magic_contents.len() as u64, - ) - .await; - fail::remove(s3_sleep_injected_fp); // timeout occur due to delay 200ms - assert!(resp.is_err()); + s.put( + "mykey", + PutResource(Box::new(magic_contents.as_bytes())), + magic_contents.len() as u64, + ) + .await + .unwrap_err(); + fail::remove(s3_sleep_injected_fp); // inject 50ms delay fail::cfg(s3_sleep_injected_fp, "return(50)").unwrap(); diff --git a/components/codec/src/buffer.rs b/components/codec/src/buffer.rs index 4010ecdf04f..f40ee1fae4f 100644 --- a/components/codec/src/buffer.rs +++ b/components/codec/src/buffer.rs @@ -343,7 +343,7 @@ mod tests { // Read more bytes than available buffer.set_position(39); - assert!(buffer.read_bytes(2).is_err()); + buffer.read_bytes(2).unwrap_err(); assert_eq!(buffer.position(), 39); assert_eq!(buffer.bytes(), &base[39..40]); } @@ -378,14 +378,14 @@ mod tests { assert_eq!(buffer, &base[21..40]); assert_eq!(buffer.bytes(), &base[21..40]); - assert!(buffer.read_bytes(20).is_err()); + buffer.read_bytes(20).unwrap_err(); buffer.advance(19); assert_eq!(buffer, &[]); assert_eq!(buffer.bytes(), &[]); assert_eq!(buffer.read_bytes(0).unwrap(), &[]); - assert!(buffer.read_bytes(1).is_err()); + buffer.read_bytes(1).unwrap_err(); } #[test] @@ -424,7 +424,7 @@ mod tests { assert_eq!(buffer.position(), 20); // Write more bytes than available size - assert!(buffer.write_bytes(&base_write[20..]).is_err()); + buffer.write_bytes(&base_write[20..]).unwrap_err(); assert_eq!(&buffer.get_ref()[0..20], &base_write[0..20]); assert_eq!(&buffer.get_ref()[20..], &base[20..]); assert_eq!(buffer.position(), 20); @@ -522,7 +522,7 @@ mod tests { let mut buf_slice = &mut buffer[20..]; // Buffer remain 20, write 21 bytes shall fail. - assert!(buf_slice.write_bytes(&base_write[20..41]).is_err()); + buf_slice.write_bytes(&base_write[20..41]).unwrap_err(); // Write remaining 20 bytes buf_slice.bytes_mut(20)[..20].clone_from_slice(&base_write[20..40]); diff --git a/components/codec/src/byte.rs b/components/codec/src/byte.rs index 63143938c13..aa7baba9e75 100644 --- a/components/codec/src/byte.rs +++ b/components/codec/src/byte.rs @@ -971,7 +971,7 @@ mod tests { let result = panic_hook::recover_safe(move || { let _ = MemComparableByteCodec::encode_all(src.as_slice(), dest.as_mut_slice()); }); - assert!(result.is_err()); + result.unwrap_err(); let mut src_in_place = vec![0; dest_len]; let result = panic_hook::recover_safe(move || { @@ -980,7 +980,7 @@ mod tests { src_len, ); }); - assert!(result.is_err()); + result.unwrap_err(); } } @@ -1141,7 +1141,7 @@ mod tests { invalid_src.as_slice(), dest.as_mut_slice(), ); - assert!(result.is_err()); + result.unwrap_err(); } } @@ -1162,7 +1162,7 @@ mod tests { dest.as_mut_slice(), ); }); - assert!(result.is_err()); + result.unwrap_err(); } { let mut dest = vec![0; src.len()]; diff --git a/components/concurrency_manager/src/key_handle.rs b/components/concurrency_manager/src/key_handle.rs index f34b29b0f37..c7aebbc49e0 100644 --- a/components/concurrency_manager/src/key_handle.rs +++ b/components/concurrency_manager/src/key_handle.rs @@ -39,7 +39,7 @@ impl KeyHandle { } pub fn with_lock(&self, f: impl FnOnce(&Option) -> T) -> T { - f(&*self.lock_store.lock()) + f(&self.lock_store.lock()) } /// Set the LockTable that the KeyHandle is in. @@ -80,7 +80,7 @@ impl KeyHandleGuard { } pub fn with_lock(&self, f: impl FnOnce(&mut Option) -> T) -> T { - f(&mut *self.handle.lock_store.lock()) + f(&mut self.handle.lock_store.lock()) } pub(crate) fn handle(&self) -> &Arc { diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index b80501b5433..342f2139e08 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -137,7 +137,8 @@ mod tests { let concurrency_manager = ConcurrencyManager::new(1.into()); let keys: Vec<_> = [b"c", b"a", b"b"] .iter() - .map(|k| Key::from_raw(*k)) + .copied() + .map(|k| Key::from_raw(k)) .collect(); let guards = concurrency_manager.lock_keys(keys.iter()).await; for (key, guard) in keys.iter().zip(&guards) { @@ -181,8 +182,9 @@ mod tests { vec![20, 40, 30], vec![30, 20, 40], ]; - let keys: Vec<_> = vec![b"a", b"b", b"c"] - .into_iter() + let keys: Vec<_> = [b"a", b"b", b"c"] + .iter() + .copied() .map(|k| Key::from_raw(k)) .collect(); diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index 4169537840e..bf7a224aa28 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -57,7 +57,7 @@ impl LockTable { ) -> Result<(), E> { if let Some(lock_ref) = self.get(key) { return lock_ref.with_lock(|lock| { - if let Some(lock) = &*lock { + if let Some(lock) = lock { return check_fn(lock); } Ok(()) diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 816b0d12162..606082c0c4e 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -32,7 +32,7 @@ pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; /// [`declare_plugin!(...)`](declare_plugin) and will be used by TiKV when a /// plugin is loaded to determine whether there are compilation mismatches. #[repr(C)] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub struct BuildInfo { /// Version of the [`coprocessor_plugin_api`](crate) crate that was used to /// compile this plugin. @@ -55,7 +55,7 @@ impl BuildInfo { /// Information about the plugin, like its name and version. #[repr(C)] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub struct PluginInfo { /// The name of the plugin. pub name: &'static str, diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 4f83a72855f..3fff9064f58 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -39,14 +39,14 @@ impl Default for EncryptionConfig { } } -#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct FileConfig { pub path: String, } -#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, OnlineConfig)] +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct KmsConfig { @@ -68,7 +68,7 @@ impl KmsConfig { } } -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case", tag = "type")] pub enum MasterKeyConfig { // Store encryption metadata as plaintext. Data still get encrypted. Not allowed to use if diff --git a/components/encryption/src/encrypted_file/header.rs b/components/encryption/src/encrypted_file/header.rs index 1456f451f62..420b3076adb 100644 --- a/components/encryption/src/encrypted_file/header.rs +++ b/components/encryption/src/encrypted_file/header.rs @@ -7,7 +7,7 @@ use tikv_util::box_err; use crate::Result; -#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Debug)] pub enum Version { // The content only contains the encrypted part. V1 = 1, @@ -39,7 +39,7 @@ impl Version { /// | | Reserved (3 bytes) /// | Version (1 bytes) /// ``` -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct Header { version: Version, crc32: u32, diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 0dcdbffdb95..58a3a7a66e5 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -494,7 +494,7 @@ impl DataKeyManager { Dicts::open( &args.dict_path, args.rotation_period, - &*master_key, + master_key, args.enable_file_dictionary_log, args.file_dictionary_rewrite_threshold, ), @@ -560,7 +560,7 @@ impl DataKeyManager { )) })?; // Rewrite key_dict after replace master key. - dicts.save_key_dict(&*master_key)?; + dicts.save_key_dict(master_key)?; info!("encryption: persisted result after replace master key."); Ok(dicts) diff --git a/components/encryption/src/master_key/metadata.rs b/components/encryption/src/master_key/metadata.rs index 8537a2416e3..38518cf0b34 100644 --- a/components/encryption/src/master_key/metadata.rs +++ b/components/encryption/src/master_key/metadata.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, Hash, PartialEq)] pub enum MetadataKey { Method, Iv, @@ -27,7 +27,7 @@ impl MetadataKey { } } -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, Hash, PartialEq)] pub enum MetadataMethod { Plaintext, Aes256Gcm, diff --git a/components/engine_rocks/src/compact_listener.rs b/components/engine_rocks/src/compact_listener.rs index 5fc7a4e92f2..e679410c8b9 100644 --- a/components/engine_rocks/src/compact_listener.rs +++ b/components/engine_rocks/src/compact_listener.rs @@ -197,7 +197,7 @@ impl CompactedEvent for RocksCompactedEvent { } fn cf(&self) -> &str { - &*self.cf + &self.cf } } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 9c995144efa..13ae38b6afb 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -250,7 +250,7 @@ mod tests { engine.put_cf(cf, b"k1", b"v2").unwrap(); assert_eq!(&*engine.get_value(b"k1").unwrap().unwrap(), b"v1"); - assert!(engine.get_value_cf("foo", b"k1").is_err()); + engine.get_value_cf("foo", b"k1").unwrap_err(); assert_eq!(&*engine.get_value_cf(cf, b"k1").unwrap().unwrap(), b"v2"); } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index fd695bb4d2c..3e204bbc49f 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -400,10 +400,7 @@ mod tests { let mut kvs_left: Vec<_> = kvs; for r in ranges { - kvs_left = kvs_left - .into_iter() - .filter(|k| k.0 < r.start_key || k.0 >= r.end_key) - .collect(); + kvs_left.retain(|k| k.0 < r.start_key || k.0 >= r.end_key); } check_data(&db, ALL_CFS, kvs_left.as_slice()); } diff --git a/components/engine_rocks/src/perf_context_impl.rs b/components/engine_rocks/src/perf_context_impl.rs index 543e116d8ac..59086127154 100644 --- a/components/engine_rocks/src/perf_context_impl.rs +++ b/components/engine_rocks/src/perf_context_impl.rs @@ -185,10 +185,10 @@ impl PerfContextStatistics { if self.perf_level == PerfLevel::Uninitialized { match self.kind { PerfContextKind::Storage(_) | PerfContextKind::Coprocessor(_) => { - set_perf_flags(&*DEFAULT_READ_PERF_FLAGS) + set_perf_flags(&DEFAULT_READ_PERF_FLAGS) } PerfContextKind::RaftstoreStore | PerfContextKind::RaftstoreApply => { - set_perf_flags(&*DEFAULT_WRITE_PERF_FLAGS) + set_perf_flags(&DEFAULT_WRITE_PERF_FLAGS) } } } else { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 8d049112f92..41e13a813e6 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -130,7 +130,7 @@ impl<'a> DecodeProperties for UserCollectedPropertiesDecoder<'a> { } } -#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[derive(Debug, Clone, PartialEq, Copy)] pub enum RangeOffsetKind { Size, Keys, diff --git a/components/engine_rocks/src/range_properties.rs b/components/engine_rocks/src/range_properties.rs index 17d0805340d..101a004982a 100644 --- a/components/engine_rocks/src/range_properties.rs +++ b/components/engine_rocks/src/range_properties.rs @@ -58,10 +58,10 @@ impl RangePropertiesExt for RocksEngine { let keys = props.get_approximate_keys_in_range(start_key, end_key); format!( "{}:{}", - Path::new(&*k) + Path::new(k) .file_name() .map(|f| f.to_str().unwrap()) - .unwrap_or(&*k), + .unwrap_or(k), keys ) }) @@ -118,10 +118,10 @@ impl RangePropertiesExt for RocksEngine { let size = props.get_approximate_size_in_range(start_key, end_key); format!( "{}:{}", - Path::new(&*k) + Path::new(k) .file_name() .map(|f| f.to_str().unwrap()) - .unwrap_or(&*k), + .unwrap_or(k), size ) }) diff --git a/components/engine_rocks/src/ttl_properties.rs b/components/engine_rocks/src/ttl_properties.rs index 5dd51d8cd97..eb4641cc102 100644 --- a/components/engine_rocks/src/ttl_properties.rs +++ b/components/engine_rocks/src/ttl_properties.rs @@ -182,10 +182,10 @@ mod tests { } let case2 = [("zr\0a", 0)]; - assert!(get_properties(&case2).is_err()); + get_properties(&case2).unwrap_err(); let case3 = []; - assert!(get_properties(&case3).is_err()); + get_properties(&case3).unwrap_err(); let case4 = [("zr\0a", 1)]; let props = get_properties(&case4).unwrap(); diff --git a/components/engine_traits/src/cf_defs.rs b/components/engine_traits/src/cf_defs.rs index f47a63e69e3..e3fe95ec3b6 100644 --- a/components/engine_traits/src/cf_defs.rs +++ b/components/engine_traits/src/cf_defs.rs @@ -14,11 +14,5 @@ pub fn name_to_cf(name: &str) -> Option { if name.is_empty() { return Some(CF_DEFAULT); } - for c in ALL_CFS { - if name == *c { - return Some(c); - } - } - - None + ALL_CFS.iter().copied().find(|c| name == *c) } diff --git a/components/engine_traits/src/encryption.rs b/components/engine_traits/src/encryption.rs index 41a0f97fb36..16f29d16d75 100644 --- a/components/engine_traits/src/encryption.rs +++ b/components/engine_traits/src/encryption.rs @@ -12,7 +12,7 @@ pub trait EncryptionKeyManager: Sync + Send { fn link_file(&self, src_fname: &str, dst_fname: &str) -> Result<()>; } -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, PartialEq)] pub struct FileEncryptionInfo { pub method: EncryptionMethod, pub key: Vec, @@ -46,7 +46,7 @@ impl FileEncryptionInfo { } } -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum EncryptionMethod { Unknown = 0, Plaintext = 1, diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 7add5e4d9b2..8d991f1cfeb 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -328,8 +328,7 @@ mod tests { err.add_result(1, 1, Err(Status::with_code(Code::Aborted).into())); err.add_result(1, 1, Err(Status::with_code(Code::NotFound).into())); err.add_result(1, 1, Ok(())); - let r = err.take_result(); - assert!(r.is_err()); + err.take_result().unwrap_err(); assert_eq!(err.get_error_count(), 2); } } diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index 6348db22174..6784891921b 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -7,7 +7,7 @@ use raft::{Error as RaftError, StorageError}; use thiserror::Error; #[repr(u8)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, Hash, PartialEq)] pub enum Code { Ok = 0, NotFound = 1, @@ -28,7 +28,7 @@ pub enum Code { } #[repr(u8)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, Hash, PartialEq)] pub enum SubCode { None = 0, MutexTimeout = 1, @@ -43,7 +43,7 @@ pub enum SubCode { } #[repr(u8)] -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, Hash, PartialEq)] pub enum Severity { NoError = 0, SoftError = 1, diff --git a/components/engine_traits/src/perf_context.rs b/components/engine_traits/src/perf_context.rs index dfa5aa967b7..56351fbeca5 100644 --- a/components/engine_traits/src/perf_context.rs +++ b/components/engine_traits/src/perf_context.rs @@ -44,7 +44,7 @@ pub trait PerfContextExt { /// /// This is a leaky abstraction that supports the encapsulation of metrics /// reporting by the subsystems that use PerfContext. -#[derive(Eq, PartialEq, Copy, Clone, Debug)] +#[derive(PartialEq, Copy, Clone, Debug)] pub enum PerfContextKind { RaftstoreApply, RaftstoreStore, diff --git a/components/engine_traits/src/sst_partitioner.rs b/components/engine_traits/src/sst_partitioner.rs index f41664403d1..bc6ec13a4eb 100644 --- a/components/engine_traits/src/sst_partitioner.rs +++ b/components/engine_traits/src/sst_partitioner.rs @@ -2,20 +2,20 @@ use std::ffi::CString; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub struct SstPartitionerRequest<'a> { pub prev_user_key: &'a [u8], pub current_user_key: &'a [u8], pub current_output_file_size: u64, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum SstPartitionerResult { NotRequired, Required, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub struct SstPartitionerContext<'a> { pub is_full_compaction: bool, pub is_manual_compaction: bool, diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index 2ab7a7360a7..ab1eea4d958 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -67,7 +67,7 @@ fn new_engine_readonly_dir() { let path = path.to_str().unwrap(); let err = KvTestEngine::new_kv_engine(path, ALL_CFS); - assert!(err.is_err()); + err.unwrap_err(); } #[test] @@ -88,5 +88,5 @@ fn new_engine_opt_readonly_dir() { let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let err = KvTestEngine::new_kv_engine_opt(path, db_opts, cf_opts); - assert!(err.is_err()); + err.unwrap_err(); } diff --git a/components/engine_traits_tests/src/delete_range.rs b/components/engine_traits_tests/src/delete_range.rs index c2b87395d6a..bdfba737048 100644 --- a/components/engine_traits_tests/src/delete_range.rs +++ b/components/engine_traits_tests/src/delete_range.rs @@ -8,10 +8,8 @@ use super::default_engine; #[test] fn delete_range_cf_bad_cf() { let db = default_engine(); - assert!( - recover_safe(|| { - db.engine.delete_range_cf("bogus", b"a", b"b").unwrap(); - }) - .is_err() - ); + recover_safe(|| { + db.engine.delete_range_cf("bogus", b"a", b"b").unwrap(); + }) + .unwrap_err(); } diff --git a/components/engine_traits_tests/src/iterator.rs b/components/engine_traits_tests/src/iterator.rs index 96709c3fe29..714ca4cb0b4 100644 --- a/components/engine_traits_tests/src/iterator.rs +++ b/components/engine_traits_tests/src/iterator.rs @@ -15,20 +15,16 @@ where assert_eq!(iter.valid().unwrap(), false); - assert!(iter.prev().is_err()); - assert!(iter.next().is_err()); - assert!( - recover_safe(|| { - iter.key(); - }) - .is_err() - ); - assert!( - recover_safe(|| { - iter.value(); - }) - .is_err() - ); + iter.prev().unwrap_err(); + iter.next().unwrap_err(); + recover_safe(|| { + iter.key(); + }) + .unwrap_err(); + recover_safe(|| { + iter.value(); + }) + .unwrap_err(); assert_eq!(iter.seek_to_first().unwrap(), false); assert_eq!(iter.seek_to_last().unwrap(), false); @@ -84,18 +80,14 @@ where assert!(!iter.valid().unwrap()); - assert!( - recover_safe(|| { - iter.key(); - }) - .is_err() - ); - assert!( - recover_safe(|| { - iter.value(); - }) - .is_err() - ); + recover_safe(|| { + iter.key(); + }) + .unwrap_err(); + recover_safe(|| { + iter.value(); + }) + .unwrap_err(); } #[test] @@ -146,18 +138,14 @@ where assert!(!iter.valid().unwrap()); - assert!( - recover_safe(|| { - iter.key(); - }) - .is_err() - ); - assert!( - recover_safe(|| { - iter.value(); - }) - .is_err() - ); + recover_safe(|| { + iter.key(); + }) + .unwrap_err(); + recover_safe(|| { + iter.value(); + }) + .unwrap_err(); } #[test] diff --git a/components/engine_traits_tests/src/scenario_writes.rs b/components/engine_traits_tests/src/scenario_writes.rs index eb05c107c1d..1e52f9400d2 100644 --- a/components/engine_traits_tests/src/scenario_writes.rs +++ b/components/engine_traits_tests/src/scenario_writes.rs @@ -10,7 +10,7 @@ use panic_hook::recover_safe; use super::engine_cfs; #[allow(clippy::enum_variant_names)] -#[derive(Eq, PartialEq)] +#[derive(PartialEq)] enum WriteScenario { NoCf, DefaultCf, @@ -279,9 +279,9 @@ scenario_test! { delete_range_reverse_range { db.put(b"c", b"").unwrap(); db.put(b"d", b"").unwrap(); - assert!(recover_safe(|| { + recover_safe(|| { db.delete_range(b"d", b"b").unwrap(); - }).is_err()); + }).unwrap_err(); assert!(db.get_value(b"b").unwrap().is_some()); assert!(db.get_value(b"c").unwrap().is_some()); diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index 231e12ea785..ce4160e5ddc 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -158,20 +158,16 @@ fn delete() -> Result<()> { assert_eq!(iter.valid()?, false); - assert!(iter.prev().is_err()); - assert!(iter.next().is_err()); - assert!( - recover_safe(|| { - iter.key(); - }) - .is_err() - ); - assert!( - recover_safe(|| { - iter.value(); - }) - .is_err() - ); + iter.prev().unwrap_err(); + iter.next().unwrap_err(); + recover_safe(|| { + iter.key(); + }) + .unwrap_err(); + recover_safe(|| { + iter.value(); + }) + .unwrap_err(); assert_eq!(iter.seek_to_first()?, false); assert_eq!(iter.seek_to_last()?, false); diff --git a/components/engine_traits_tests/src/write_batch.rs b/components/engine_traits_tests/src/write_batch.rs index dc966cf03b6..e99245adb4b 100644 --- a/components/engine_traits_tests/src/write_batch.rs +++ b/components/engine_traits_tests/src/write_batch.rs @@ -717,12 +717,10 @@ fn write_batch_delete_range_backward_range() { let mut wb = db.engine.write_batch(); wb.delete_range(b"c", b"a").unwrap(); - assert!( - recover_safe(|| { - wb.write().unwrap(); - }) - .is_err() - ); + recover_safe(|| { + wb.write().unwrap(); + }) + .unwrap_err(); assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"b").unwrap().is_some()); @@ -745,12 +743,10 @@ fn write_batch_delete_range_backward_range() { wb.delete_range(&256_usize.to_be_bytes(), &0_usize.to_be_bytes()) .unwrap(); - assert!( - recover_safe(|| { - wb.write().unwrap(); - }) - .is_err() - ); + recover_safe(|| { + wb.write().unwrap(); + }) + .unwrap_err(); assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"b").unwrap().is_some()); @@ -787,12 +783,10 @@ fn write_batch_delete_range_backward_range_partial_commit() { wb.put(b"f", b"").unwrap(); wb.delete(b"a").unwrap(); - assert!( - recover_safe(|| { - wb.write().unwrap(); - }) - .is_err() - ); + recover_safe(|| { + wb.write().unwrap(); + }) + .unwrap_err(); assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"b").unwrap().is_some()); @@ -835,12 +829,10 @@ fn write_batch_delete_range_backward_range_partial_commit() { wb.delete(&i.to_be_bytes()).unwrap(); } - assert!( - recover_safe(|| { - wb.write().unwrap(); - }) - .is_err() - ); + recover_safe(|| { + wb.write().unwrap(); + }) + .unwrap_err(); assert!(db.engine.get_value(b"a").unwrap().is_some()); assert!(db.engine.get_value(b"b").unwrap().is_some()); diff --git a/components/error_code/src/lib.rs b/components/error_code/src/lib.rs index 8ad7f3e1f23..0747b3fd2fb 100644 --- a/components/error_code/src/lib.rs +++ b/components/error_code/src/lib.rs @@ -43,7 +43,7 @@ pub mod storage; use std::fmt::{self, Display, Formatter}; -#[derive(PartialEq, Eq, Debug, Clone, Copy)] +#[derive(PartialEq, Debug, Clone, Copy)] pub struct ErrorCode { pub code: &'static str, pub description: &'static str, diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 0bacbdef428..36acbc65a91 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -48,14 +48,14 @@ pub use rate_limiter::{ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use strum::{EnumCount, EnumIter}; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum IoOp { Read, Write, } #[repr(C)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, EnumCount, EnumIter)] +#[derive(Clone, Copy, Debug, PartialEq, Hash, EnumCount, EnumIter)] pub enum IoType { Other = 0, // Including coprocessor and storage read. @@ -129,7 +129,7 @@ impl std::ops::Sub for IoBytes { } #[repr(u32)] -#[derive(Debug, Clone, PartialEq, Eq, Copy, EnumCount)] +#[derive(Debug, Clone, PartialEq, Copy, EnumCount)] pub enum IoPriority { Low = 0, Medium = 1, @@ -189,7 +189,7 @@ impl<'de> Deserialize<'de> for IoPriority { where E: Error, { - let p = match IoPriority::from_str(&*value.trim().to_lowercase()) { + let p = match IoPriority::from_str(&value.trim().to_lowercase()) { Ok(p) => p, _ => { return Err(E::invalid_value( @@ -483,7 +483,7 @@ mod tests { // Ensure it works for non-existent file. let non_existent_file = dir_path.join("non_existent_file"); - assert!(get_file_size(&non_existent_file).is_err()); + get_file_size(&non_existent_file).unwrap_err(); } #[test] diff --git a/components/file_system/src/rate_limiter.rs b/components/file_system/src/rate_limiter.rs index da7fe5fe75c..f3ec05a4314 100644 --- a/components/file_system/src/rate_limiter.rs +++ b/components/file_system/src/rate_limiter.rs @@ -24,7 +24,7 @@ const DEFAULT_REFILL_PERIOD: Duration = Duration::from_millis(50); const DEFAULT_REFILLS_PER_SEC: usize = (1.0 / DEFAULT_REFILL_PERIOD.as_secs_f32()) as usize; const MAX_WAIT_DURATION_PER_REQUEST: Duration = Duration::from_millis(500); -#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[derive(Debug, Clone, PartialEq, Copy)] pub enum IoRateLimitMode { WriteOnly, ReadOnly, @@ -92,7 +92,7 @@ impl<'de> Deserialize<'de> for IoRateLimitMode { where E: Error, { - let p = match IoRateLimitMode::from_str(&*value.trim().to_lowercase()) { + let p = match IoRateLimitMode::from_str(&value.trim().to_lowercase()) { Ok(p) => p, _ => { return Err(E::invalid_value( diff --git a/components/keys/src/lib.rs b/components/keys/src/lib.rs index ecb2657de00..f62ffc6f8ab 100644 --- a/components/keys/src/lib.rs +++ b/components/keys/src/lib.rs @@ -415,17 +415,17 @@ mod tests { let state_key = raft_state_key(1); // invalid length - assert!(decode_raft_log_key(&state_key).is_err()); + decode_raft_log_key(&state_key).unwrap_err(); let mut state_key = state_key.to_vec(); state_key.write_u64::(2).unwrap(); // invalid suffix - assert!(decode_raft_log_key(&state_key).is_err()); + decode_raft_log_key(&state_key).unwrap_err(); let mut region_state_key = region_state_key(1).to_vec(); region_state_key.write_u64::(2).unwrap(); // invalid prefix - assert!(decode_raft_log_key(®ion_state_key).is_err()); + decode_raft_log_key(®ion_state_key).unwrap_err(); } #[test] @@ -441,8 +441,8 @@ mod tests { let mut region = Region::default(); // uninitialised region should not be passed in `enc_start_key` and // `enc_end_key`. - assert!(::panic_hook::recover_safe(|| enc_start_key(®ion)).is_err()); - assert!(::panic_hook::recover_safe(|| enc_end_key(®ion)).is_err()); + ::panic_hook::recover_safe(|| enc_start_key(®ion)).unwrap_err(); + ::panic_hook::recover_safe(|| enc_end_key(®ion)).unwrap_err(); region.mut_peers().push(Peer::default()); assert_eq!(enc_start_key(®ion), vec![DATA_PREFIX]); diff --git a/components/keys/src/rewrite.rs b/components/keys/src/rewrite.rs index 03b6ea27c4f..51f588e9732 100644 --- a/components/keys/src/rewrite.rs +++ b/components/keys/src/rewrite.rs @@ -8,7 +8,7 @@ use std::ops::Bound::{self, *}; /// An error indicating the key cannot be rewritten because it does not start /// with the given prefix. -#[derive(PartialEq, Eq, Debug, Clone)] +#[derive(PartialEq, Debug, Clone)] pub struct WrongPrefix; /// Rewrites the prefix of a byte array. diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index ed37aeac40c..5518aa0e5e6 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -123,11 +123,7 @@ fn encoder( } }; // Only reserve attributes that related to `serde` - field.attrs = field - .attrs - .into_iter() - .filter(|f| is_attr("serde", f)) - .collect(); + field.attrs.retain(|f| is_attr("serde", f)); serialize_fields.push(field); } // Only reserve attributes that related to `serde` diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index 2388bf3b3ac..18d9cc0fd71 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -296,7 +296,7 @@ mod tests { ); } - #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] + #[derive(Clone, Copy, Debug, PartialEq, Serialize)] pub enum TestEnum { First, Second, @@ -364,6 +364,6 @@ mod tests { let mut diff = HashMap::new(); diff.insert("e".to_owned(), ConfigValue::String("invalid".into())); - assert!(config.update(diff).is_err()); + config.update(diff).unwrap_err(); } } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 87187b30e75..37b34bcb666 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -132,7 +132,7 @@ where } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] pub enum PeerTick { Raft = 0, diff --git a/components/raftstore/src/coprocessor/region_info_accessor.rs b/components/raftstore/src/coprocessor/region_info_accessor.rs index e8a5b1ac1c9..fb6defbc375 100644 --- a/components/raftstore/src/coprocessor/region_info_accessor.rs +++ b/components/raftstore/src/coprocessor/region_info_accessor.rs @@ -1167,7 +1167,7 @@ mod tests { for index in indices { for order in orders { - test_split_impl(*index, *order); + test_split_impl(*index, order.as_slice()); } } } diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index bc9fd855038..44318a27b60 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -1045,7 +1045,7 @@ pub mod tests { #[test] fn test_get_approximate_split_keys() { for cf in LARGE_CFS { - test_get_approximate_split_keys_impl(*cf); + test_get_approximate_split_keys_impl(cf); } } diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index f26022efe64..e5906719109 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -6,7 +6,6 @@ #![feature(min_specialization)] #![feature(box_patterns)] #![feature(hash_drain_filter)] -#![feature(let_chains)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index ad89d5e7e70..89b5cfc1ac9 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -1011,12 +1011,14 @@ mod tests { ); cfg.raft_heartbeat_ticks = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_election_timeout_ticks = 10; cfg.raft_heartbeat_ticks = 10; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_min_election_timeout_ticks = 5; @@ -1029,15 +1031,18 @@ mod tests { cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg.raft_heartbeat_ticks = 11; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_threshold = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_size_limit = Some(ReadableSize(0)); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_size_limit = None; @@ -1049,12 +1054,14 @@ mod tests { cfg.raft_base_tick_interval = ReadableDuration::secs(1); cfg.raft_election_timeout_ticks = 10; cfg.raft_store_max_leader_lease = ReadableDuration::secs(20); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_count_limit = Some(100); cfg.merge_max_log_gap = 110; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_count_limit = None; @@ -1064,51 +1071,62 @@ mod tests { cfg = Config::new(); cfg.merge_check_tick_interval = ReadableDuration::secs(0); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_base_tick_interval = ReadableDuration::secs(1); cfg.raft_election_timeout_ticks = 10; cfg.peer_stale_state_check_interval = ReadableDuration::secs(5); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.peer_stale_state_check_interval = ReadableDuration::minutes(2); cfg.abnormal_leader_missing_duration = ReadableDuration::minutes(1); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.abnormal_leader_missing_duration = ReadableDuration::minutes(2); cfg.max_leader_missing_duration = ReadableDuration::minutes(1); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.local_read_batch_size = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.max_batch_size = Some(0); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.pool_size = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.max_batch_size = Some(0); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.pool_size = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.max_batch_size = Some(10241); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.max_batch_size = Some(10241); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.hibernate_regions = true; @@ -1132,17 +1150,20 @@ mod tests { cfg = Config::new(); cfg.future_poll_size = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.snap_generator_pool_size = 0; - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.raft_base_tick_interval = ReadableDuration::secs(1); cfg.raft_election_timeout_ticks = 11; cfg.raft_store_max_leader_lease = ReadableDuration::secs(11); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg = Config::new(); cfg.hibernate_regions = true; @@ -1153,17 +1174,21 @@ mod tests { cfg = Config::new(); cfg.raft_max_size_per_msg = ReadableSize(0); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg.raft_max_size_per_msg = ReadableSize::gb(64); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg.raft_max_size_per_msg = ReadableSize::gb(3); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg = Config::new(); cfg.raft_entry_max_size = ReadableSize(0); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg.raft_entry_max_size = ReadableSize::mb(3073); - assert!(cfg.validate(split_size, false, ReadableSize(0)).is_err()); + cfg.validate(split_size, false, ReadableSize(0)) + .unwrap_err(); cfg.raft_entry_max_size = ReadableSize::gb(3); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 33b504127f8..e5c617ec91b 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1155,7 +1155,7 @@ pub mod tests { assert_eq!(e, cache.entry(e.get_index()).unwrap()); } let res = panic_hook::recover_safe(|| cache.entry(7)); - assert!(res.is_err()); + res.unwrap_err(); } #[test] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 938ea526894..e2db05db143 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3794,7 +3794,7 @@ where } => self.handle_change(apply_ctx, cmd, region_epoch, cb), #[cfg(any(test, feature = "testexport"))] Msg::Validate(_, f) => { - let delegate: *const u8 = unsafe { mem::transmute(&self.delegate) }; + let delegate = &self.delegate as *const ApplyDelegate as *const u8; f(delegate) } } @@ -4705,7 +4705,7 @@ mod tests { // unregistered region should be ignored and notify failed. let resp = resp_rx.recv_timeout(Duration::from_secs(3)).unwrap(); assert!(resp.get_header().get_error().has_region_not_found()); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); let (cc_tx, cc_rx) = mpsc::channel(); let pops = vec![ @@ -4808,7 +4808,7 @@ mod tests { "{:?}", resp ); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); system.shutdown(); } @@ -5982,29 +5982,29 @@ mod tests { let mut region = Region::default(); // Check uuid and cf name - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.set_uuid(Uuid::new_v4().as_bytes().to_vec()); sst.set_cf_name(CF_DEFAULT.to_owned()); check_sst_for_ingestion(&sst, ®ion).unwrap(); sst.set_cf_name("test".to_owned()); - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.set_cf_name(CF_WRITE.to_owned()); check_sst_for_ingestion(&sst, ®ion).unwrap(); // Check region id region.set_id(1); sst.set_region_id(2); - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.set_region_id(1); check_sst_for_ingestion(&sst, ®ion).unwrap(); // Check region epoch region.mut_region_epoch().set_conf_ver(1); - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.mut_region_epoch().set_conf_ver(1); check_sst_for_ingestion(&sst, ®ion).unwrap(); region.mut_region_epoch().set_version(1); - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.mut_region_epoch().set_version(1); check_sst_for_ingestion(&sst, ®ion).unwrap(); @@ -6013,9 +6013,9 @@ mod tests { region.set_end_key(vec![8]); sst.mut_range().set_start(vec![1]); sst.mut_range().set_end(vec![8]); - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.mut_range().set_start(vec![2]); - assert!(check_sst_for_ingestion(&sst, ®ion).is_err()); + check_sst_for_ingestion(&sst, ®ion).unwrap_err(); sst.mut_range().set_end(vec![7]); check_sst_for_ingestion(&sst, ®ion).unwrap(); } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 1d02b723cf6..3ae6b74a13c 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3742,7 +3742,7 @@ where } }; let mut replication_state = self.ctx.global_replication_state.lock().unwrap(); - new_peer.peer.init_replication_mode(&mut *replication_state); + new_peer.peer.init_replication_mode(&mut replication_state); drop(replication_state); let meta_peer = new_peer.peer.peer.clone(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index b058d0bb35e..9e126d4d141 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1138,7 +1138,7 @@ impl RaftPollerBuilder { self.engines.clone(), region, )); - peer.peer.init_replication_mode(&mut *replication_state); + peer.peer.init_replication_mode(&mut replication_state); if local_state.get_state() == PeerState::Merging { info!("region is merging"; "region" => ?region, "store_id" => store_id); merging_count += 1; @@ -1178,7 +1178,7 @@ impl RaftPollerBuilder { self.engines.clone(), ®ion, )?; - peer.peer.init_replication_mode(&mut *replication_state); + peer.peer.init_replication_mode(&mut replication_state); peer.schedule_applying_snapshot(); meta.region_ranges .insert(enc_end_key(®ion), region.get_id()); @@ -2168,7 +2168,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER // Now all checking passed let mut replication_state = self.ctx.global_replication_state.lock().unwrap(); - peer.peer.init_replication_mode(&mut *replication_state); + peer.peer.init_replication_mode(&mut replication_state); drop(replication_state); peer.peer.local_first_replicate = is_local_first; @@ -2790,7 +2790,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } }; let mut replication_state = self.ctx.global_replication_state.lock().unwrap(); - peer.peer.init_replication_mode(&mut *replication_state); + peer.peer.init_replication_mode(&mut replication_state); drop(replication_state); peer.peer.activate(self.ctx); diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 43126d1def5..ce812d5ef24 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -220,7 +220,7 @@ where } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] pub enum PeerTick { Raft = 0, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 9a8fd7d0605..7c57eeb9ae4 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -113,7 +113,7 @@ const REGION_READ_PROGRESS_CAP: usize = 128; pub const MAX_COMMITTED_SIZE_PER_READY: u64 = 16 * 1024 * 1024; /// The returned states of the peer after checking whether it is stale -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq)] pub enum StaleState { Valid, ToValidate, @@ -5808,7 +5808,7 @@ mod tests { applied_to_index_term: true, lease_state: LeaseState::Valid, }; - assert!(inspector.inspect(&req).is_err()); + inspector.inspect(&req).unwrap_err(); } } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 83363d65ac8..5ad6395dd33 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1818,7 +1818,7 @@ pub mod tests { Option::>::None, ); worker.start(runner); - assert!(s1.snapshot(0, 0).is_err()); + s1.snapshot(0, 0).unwrap_err(); let gen_task = s1.gen_snap_task.borrow_mut().take().unwrap(); generate_and_schedule_snapshot(gen_task, &s1.engines, &sched).unwrap(); @@ -1909,7 +1909,7 @@ pub mod tests { JOB_STATUS_FAILED, )))); let res = panic_hook::recover_safe(|| s.cancel_applying_snap()); - assert!(res.is_err()); + res.unwrap_err(); } #[test] @@ -1959,7 +1959,7 @@ pub mod tests { JOB_STATUS_FAILED, )))); let res = panic_hook::recover_safe(|| s.check_applying_snap()); - assert!(res.is_err()); + res.unwrap_err(); } #[test] diff --git a/components/raftstore/src/store/read_queue.rs b/components/raftstore/src/store/read_queue.rs index aa24b4bc3c7..d9261b9fde3 100644 --- a/components/raftstore/src/store/read_queue.rs +++ b/components/raftstore/src/store/read_queue.rs @@ -82,7 +82,7 @@ where } pub fn cmds(&self) -> &[(RaftCmdRequest, Callback, Option)] { - &*self.cmds + &self.cmds } pub fn take_cmds(&mut self) -> MustConsumeVec<(RaftCmdRequest, Callback, Option)> { diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 64bde3cf88b..86d89fad051 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -492,7 +492,7 @@ mod tests { assert!(v0.is_none()); let v4 = snap.get_value(b"key5"); - assert!(v4.is_err()); + v4.unwrap_err(); } #[allow(clippy::type_complexity)] diff --git a/components/raftstore/src/store/replication_mode.rs b/components/raftstore/src/store/replication_mode.rs index 1f163ccfb9f..5cc0364b79a 100644 --- a/components/raftstore/src/store/replication_mode.rs +++ b/components/raftstore/src/store/replication_mode.rs @@ -192,7 +192,6 @@ impl GlobalReplicationState { #[cfg(test)] mod tests { - use std::panic; use kvproto::{ metapb, @@ -334,6 +333,6 @@ mod tests { .group .register_store(1, vec![label1.clone(), label3.clone()]) }); - assert!(res.is_err(), "existing group id can't be changed."); + res.unwrap_err(); } } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index e7b024c38eb..9a279029fd5 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -445,7 +445,7 @@ pub struct Snapshot { mgr: SnapManagerCore, } -#[derive(PartialEq, Eq, Clone, Copy)] +#[derive(PartialEq, Clone, Copy)] enum CheckPolicy { ErrAllowed, ErrNotAllowed, @@ -2516,7 +2516,7 @@ pub mod tests { corrupt_snapshot_size_in(dir.path()); - assert!(Snapshot::new_for_sending(dir.path(), &key, &mgr_core,).is_err()); + Snapshot::new_for_sending(dir.path(), &key, &mgr_core).unwrap_err(); let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s2.exists()); @@ -2563,11 +2563,11 @@ pub mod tests { write_batch_size: TEST_WRITE_BATCH_SIZE, coprocessor_host: CoprocessorHost::::default(), }; - assert!(s5.apply(options).is_err()); + s5.apply(options).unwrap_err(); corrupt_snapshot_size_in(dst_dir.path()); - assert!(Snapshot::new_for_receiving(dst_dir.path(), &key, &mgr_core, snap_meta,).is_err()); - assert!(Snapshot::new_for_applying(dst_dir.path(), &key, &mgr_core).is_err()); + Snapshot::new_for_receiving(dst_dir.path(), &key, &mgr_core, snap_meta).unwrap_err(); + Snapshot::new_for_applying(dst_dir.path(), &key, &mgr_core).unwrap_err(); } #[test] @@ -2607,7 +2607,7 @@ pub mod tests { assert_eq!(1, corrupt_snapshot_meta_file(dir.path())); - assert!(Snapshot::new_for_sending(dir.path(), &key, &mgr_core,).is_err()); + Snapshot::new_for_sending(dir.path(), &key, &mgr_core).unwrap_err(); let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s2.exists()); @@ -2637,11 +2637,9 @@ pub mod tests { assert_eq!(1, corrupt_snapshot_meta_file(dst_dir.path())); - assert!(Snapshot::new_for_applying(dst_dir.path(), &key, &mgr_core,).is_err()); - assert!( - Snapshot::new_for_receiving(dst_dir.path(), &key, &mgr_core, snap_data.take_meta(),) - .is_err() - ); + Snapshot::new_for_applying(dst_dir.path(), &key, &mgr_core).unwrap_err(); + Snapshot::new_for_receiving(dst_dir.path(), &key, &mgr_core, snap_data.take_meta()) + .unwrap_err(); } #[test] @@ -2663,7 +2661,7 @@ pub mod tests { let path2 = temp_path2.to_str().unwrap().to_owned(); File::create(temp_path2).unwrap(); mgr = SnapManager::new(path2); - assert!(mgr.init().is_err()); + mgr.init().unwrap_err(); } #[test] diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 078d3114060..1270ae104c9 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -408,7 +408,7 @@ mod tests { // Not exceeding the region limit, but exceeding the global limit GLOBAL_MEM_SIZE.set(101 << 20); let res = locks.insert(vec![(Key::from_raw(b"k2"), lock(b"abc"))]); - assert!(res.is_err()); + res.unwrap_err(); assert!(locks.get(&Key::from_raw(b"k2")).is_none()); } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 2bda7f4794f..1b707a42921 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -449,7 +449,7 @@ pub struct Lease { remote: Option, } -#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Debug)] pub enum LeaseState { /// The lease is suspicious, may be invalid. Suspect, @@ -794,7 +794,7 @@ impl< } } -#[derive(PartialEq, Eq, Debug)] +#[derive(PartialEq, Debug)] pub enum ConfChangeKind { // Only contains one configuration change Simple, diff --git a/components/raftstore/src/store/worker/cleanup_snapshot.rs b/components/raftstore/src/store/worker/cleanup_snapshot.rs index 07d2ac001d4..c84d6ddb4d3 100644 --- a/components/raftstore/src/store/worker/cleanup_snapshot.rs +++ b/components/raftstore/src/store/worker/cleanup_snapshot.rs @@ -25,7 +25,7 @@ pub enum Task { impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &*self { + match self { Task::GcSnapshot => write!(f, "Gc Snapshot"), Task::DeleteSnapshotFiles { key, .. } => write!(f, "Delete Snapshot Files for {}", key), } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 9e5e54c185e..6a6aa53103d 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1955,12 +1955,12 @@ where unix_secs_now.into_inner() - last_report_ts.into_inner(); // Keep consistent with the calculation of cpu_usages in a store heartbeat. // See components/tikv_util/src/metrics/threads_linux.rs for more details. - (interval_second > 0) - .then(|| { - ((cpu_time_duration.as_secs_f64() * 100.0) / interval_second as f64) - as u64 - }) - .unwrap_or(0) + if interval_second > 0 { + ((cpu_time_duration.as_secs_f64() * 100.0) / interval_second as f64) + as u64 + } else { + 0 + } }; ( read_bytes_delta, diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 81fa843ace0..d1c531070ac 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -105,7 +105,7 @@ where iter.key().to_vec(), pos, iter.value().len(), - *cf, + cf, )); } iters.push((*cf, iter)); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 0f15bcc4805..addedc3d653 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -251,7 +251,7 @@ impl Samples { if best_index >= 0 { return self.0[best_index as usize].key.clone(); } - return vec![]; + vec![] } } diff --git a/components/resource_metering/src/config.rs b/components/resource_metering/src/config.rs index 69d7c78cb2f..090768a9493 100644 --- a/components/resource_metering/src/config.rs +++ b/components/resource_metering/src/config.rs @@ -147,20 +147,20 @@ mod tests { max_resource_groups: 2000, precision: ReadableDuration::secs(1), }; - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); let cfg = Config { receiver_address: "127.0.0.1:6666".to_string(), report_receiver_interval: ReadableDuration::minutes(1), max_resource_groups: usize::MAX, // invalid precision: ReadableDuration::secs(1), }; - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); let cfg = Config { receiver_address: "127.0.0.1:6666".to_string(), report_receiver_interval: ReadableDuration::minutes(1), max_resource_groups: 2000, precision: ReadableDuration::days(999), // invalid }; - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } } diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index bd64d7202ae..ba8e2174e19 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -143,15 +143,12 @@ impl Drop for Guard { return; } let mut records = ls.summary_records.lock().unwrap(); - match records.get(&tag) { - Some(record) => { - record.merge(&cur_record); - } - None => { - // See MAX_SUMMARY_RECORDS_LEN. - if records.len() < MAX_SUMMARY_RECORDS_LEN { - records.insert(tag, cur_record); - } + if let Some(record) = records.get(&tag) { + record.merge(&cur_record); + } else { + // See MAX_SUMMARY_RECORDS_LEN. + if records.len() < MAX_SUMMARY_RECORDS_LEN { + records.insert(tag, cur_record); } } }) diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 1359e6c3a45..6f7118ef9e1 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -20,7 +20,7 @@ thread_local! { } /// Raw resource statistics record. -#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] +#[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct RawRecord { pub cpu_time: u32, // ms pub read_keys: u32, @@ -48,7 +48,7 @@ impl RawRecord { /// [Recorder]: crate::recorder::Recorder /// [Reporter]: crate::reporter::Reporter /// [Collector]: crate::collector::Collector -#[derive(Debug, Eq, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct RawRecords { pub begin_unix_time_secs: u64, pub duration: Duration, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 71a58a33dc3..ce55e7beb41 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -823,7 +823,7 @@ mod tests { check_file_not_exists(&path.clone, key_manager.as_deref()); // Cannot create the same file again. - assert!(dir.create(&meta, key_manager.clone()).is_err()); + dir.create(&meta, key_manager.clone()).unwrap_err(); } // Test ImportDir::delete() @@ -912,12 +912,10 @@ mod tests { let mut f = ImportFile::create(meta.clone(), path.clone(), data_key_manager.clone()).unwrap(); // Cannot create the same file again. - assert!( - ImportFile::create(meta.clone(), path.clone(), data_key_manager.clone()).is_err() - ); + ImportFile::create(meta.clone(), path.clone(), data_key_manager.clone()).unwrap_err(); f.append(data).unwrap(); // Invalid crc32 and length. - assert!(f.finish().is_err()); + f.finish().unwrap_err(); check_file_exists(&path.temp, data_key_manager.as_deref()); check_file_not_exists(&path.save, data_key_manager.as_deref()); } @@ -1595,7 +1593,7 @@ mod tests { meta.set_length(0); // disable validation. meta.set_crc32(0); let meta_info = importer.validate(&meta).unwrap(); - let _ = importer.ingest(&[meta_info.clone()], &db).unwrap(); + importer.ingest(&[meta_info.clone()], &db).unwrap(); // key1 = "zt9102_r01", value1 = "abc", len = 13 // key2 = "zt9102_r04", value2 = "xyz", len = 13 // key3 = "zt9102_r07", value3 = "pqrst", len = 15 diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index 60fc1b9e2ab..210f17fc168 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -434,7 +434,7 @@ mod tests { let (mut w, _handle) = new_writer(SstImporter::new_raw_writer, ApiVersion::V1); let mut batch = RawWriteBatch::default(); batch.set_ttl(10); - assert!(w.write(batch).is_err()); + w.write(batch).unwrap_err(); } #[test] @@ -462,7 +462,7 @@ mod tests { let pairs = vec![pair]; batch.set_pairs(pairs.into()); - assert!(w.write(batch).is_err()); + w.write(batch).unwrap_err(); } #[test] @@ -478,7 +478,7 @@ mod tests { let pairs = vec![pair]; batch.set_pairs(pairs.into()); - assert!(w.write(batch.clone()).is_err()); + w.write(batch.clone()).unwrap_err(); // put a valid key let mut pair = Pair::default(); diff --git a/components/test_raftstore/src/pd.rs b/components/test_raftstore/src/pd.rs index 45a69896296..33241862e07 100644 --- a/components/test_raftstore/src/pd.rs +++ b/components/test_raftstore/src/pd.rs @@ -811,7 +811,7 @@ pub struct TestPdClient { pub gc_safepoints: RwLock>, } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct GcSafePoint { pub serivce: String, pub ttl: Duration, diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index e8fba33f65f..0aa778d01b0 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -314,9 +314,9 @@ impl FilterFactory for PartitionFilterFactory { node_ids: self.s2.clone(), })]; } - return vec![Box::new(PartitionFilter { + vec![Box::new(PartitionFilter { node_ids: self.s1.clone(), - })]; + })] } } diff --git a/components/test_sst_importer/src/lib.rs b/components/test_sst_importer/src/lib.rs index 9397a6bb35b..2f8c195a6bf 100644 --- a/components/test_sst_importer/src/lib.rs +++ b/components/test_sst_importer/src/lib.rs @@ -38,7 +38,7 @@ where if let Some(ref env) = env { opt.set_env(env.clone()); } - apply(*cf, &mut opt); + apply(cf, &mut opt); opt.add_table_properties_collector_factory( "tikv.test_properties", TestPropertiesCollectorFactory::new(*cf), diff --git a/components/test_storage/src/assert_storage.rs b/components/test_storage/src/assert_storage.rs index 7f057971785..5cb6e43d8cb 100644 --- a/components/test_storage/src/assert_storage.rs +++ b/components/test_storage/src/assert_storage.rs @@ -240,7 +240,9 @@ impl AssertionStorage { pub fn get_err(&self, key: &[u8], ts: impl Into) { let key = Key::from_raw(key); - assert!(self.store.get(self.ctx.clone(), &key, ts.into()).is_err()); + self.store + .get(self.ctx.clone(), &key, ts.into()) + .unwrap_err(); } pub fn get_ok(&self, key: &[u8], ts: impl Into, expect: &[u8]) { @@ -271,11 +273,9 @@ impl AssertionStorage { pub fn batch_get_err(&self, keys: &[&[u8]], ts: impl Into) { let keys: Vec = keys.iter().map(|x| Key::from_raw(x)).collect(); - assert!( - self.store - .batch_get(self.ctx.clone(), &keys, ts.into()) - .is_err() - ); + self.store + .batch_get(self.ctx.clone(), &keys, ts.into()) + .unwrap_err(); } pub fn batch_get_command_ok(&self, keys: &[&[u8]], ts: u64, expect: Vec<&[u8]>) { @@ -293,11 +293,9 @@ impl AssertionStorage { } pub fn batch_get_command_err(&self, keys: &[&[u8]], ts: u64) { - assert!( - self.store - .batch_get_command(self.ctx.clone(), keys, ts) - .is_err() - ); + self.store + .batch_get_command(self.ctx.clone(), keys, ts) + .unwrap_err(); } fn expect_not_leader_or_stale_command(&self, err: storage::Error) { @@ -332,7 +330,6 @@ impl AssertionStorage { ) where T: std::fmt::Debug, { - assert!(resp.is_err()); let err = resp.unwrap_err(); match err { StorageError(box StorageErrorInner::Txn(TxnError( @@ -384,16 +381,14 @@ impl AssertionStorage { _commit_ts: impl Into, ) { let start_ts = start_ts.into(); - assert!( - self.store - .prewrite( - self.ctx.clone(), - vec![Mutation::make_put(Key::from_raw(key), value.to_vec())], - key.to_vec(), - start_ts, - ) - .is_err() - ); + self.store + .prewrite( + self.ctx.clone(), + vec![Mutation::make_put(Key::from_raw(key), value.to_vec())], + key.to_vec(), + start_ts, + ) + .unwrap_err(); } pub fn delete_ok( @@ -683,16 +678,14 @@ impl AssertionStorage { start_ts: impl Into, current_ts: impl Into, ) { - assert!( - self.store - .cleanup( - self.ctx.clone(), - Key::from_raw(key), - start_ts.into(), - current_ts.into() - ) - .is_err() - ); + self.store + .cleanup( + self.ctx.clone(), + Key::from_raw(key), + start_ts.into(), + current_ts.into(), + ) + .unwrap_err(); } pub fn rollback_ok(&self, keys: Vec<&[u8]>, start_ts: impl Into) { @@ -704,11 +697,9 @@ impl AssertionStorage { pub fn rollback_err(&self, keys: Vec<&[u8]>, start_ts: impl Into) { let keys: Vec = keys.iter().map(|x| Key::from_raw(x)).collect(); - assert!( - self.store - .rollback(self.ctx.clone(), keys, start_ts.into()) - .is_err() - ); + self.store + .rollback(self.ctx.clone(), keys, start_ts.into()) + .unwrap_err(); } pub fn scan_locks_ok( @@ -890,11 +881,9 @@ impl AssertionStorage { } pub fn raw_batch_get_command_err(&self, cf: String, keys: Vec>) { - assert!( - self.store - .raw_batch_get_command(self.ctx.clone(), cf, keys) - .is_err() - ); + self.store + .raw_batch_get_command(self.ctx.clone(), cf, keys) + .unwrap_err(); } pub fn raw_put_ok(&self, cf: String, key: Vec, value: Vec) { diff --git a/components/tidb_query_aggr/src/impl_max_min.rs b/components/tidb_query_aggr/src/impl_max_min.rs index 31ff6acc8aa..f4046c35440 100644 --- a/components/tidb_query_aggr/src/impl_max_min.rs +++ b/components/tidb_query_aggr/src/impl_max_min.rs @@ -937,7 +937,7 @@ mod tests { min_state.push_result(&mut ctx, &mut aggr_result).unwrap(); } - assert_eq!(aggr_result[0].to_int_vec(), &(*expected_res)); + assert_eq!(aggr_result[0].to_int_vec(), expected_res); } #[test] diff --git a/components/tidb_query_aggr/src/lib.rs b/components/tidb_query_aggr/src/lib.rs index 1eda14a0697..c6ddfb96d2f 100644 --- a/components/tidb_query_aggr/src/lib.rs +++ b/components/tidb_query_aggr/src/lib.rs @@ -438,7 +438,7 @@ mod tests { Real::new(1.0).ok().as_ref() ); }); - assert!(result.is_err()); + result.unwrap_err(); let result = panic_hook::recover_safe(|| { let mut s = s.clone(); @@ -448,7 +448,7 @@ mod tests { Some(&[1u8] as BytesRef<'_>) ); }); - assert!(result.is_err()); + result.unwrap_err(); // Push result to Real VectorValue should success. let mut target = vec![VectorValue::with_capacity(0, EvalType::Real)]; @@ -479,13 +479,13 @@ mod tests { let mut target: Vec = Vec::new(); let _ = (&mut s as &mut dyn AggrFunctionState).push_result(&mut ctx, &mut target[..]); }); - assert!(result.is_err()); + result.unwrap_err(); let result = panic_hook::recover_safe(|| { let mut s = s.clone(); let mut target: Vec = vec![VectorValue::with_capacity(0, EvalType::Int)]; let _ = (&mut s as &mut dyn AggrFunctionState).push_result(&mut ctx, &mut target[..]); }); - assert!(result.is_err()); + result.unwrap_err(); } } diff --git a/components/tidb_query_common/src/execute_stats.rs b/components/tidb_query_common/src/execute_stats.rs index b2740212df0..55d31dfb8f5 100644 --- a/components/tidb_query_common/src/execute_stats.rs +++ b/components/tidb_query_common/src/execute_stats.rs @@ -4,7 +4,7 @@ use derive_more::{Add, AddAssign}; /// Execution summaries to support `EXPLAIN ANALYZE` statements. We don't use /// `ExecutorExecutionSummary` directly since it is less efficient. -#[derive(Debug, Default, Copy, Clone, Add, AddAssign, PartialEq, Eq)] +#[derive(Debug, Default, Copy, Clone, Add, AddAssign, PartialEq)] pub struct ExecSummary { /// Total time cost in this executor. pub time_processed_ns: usize, diff --git a/components/tidb_query_common/src/storage/range.rs b/components/tidb_query_common/src/storage/range.rs index b4075fb3b60..b826f55fe46 100644 --- a/components/tidb_query_common/src/storage/range.rs +++ b/components/tidb_query_common/src/storage/range.rs @@ -4,7 +4,7 @@ use kvproto::coprocessor::KeyRange; // TODO: Remove this module after switching to DAG v2. -#[derive(PartialEq, Eq, Clone)] +#[derive(PartialEq, Clone)] pub enum Range { Point(PointRange), Interval(IntervalRange), @@ -41,7 +41,7 @@ impl From for Range { } } -#[derive(Default, PartialEq, Eq, Clone)] +#[derive(Default, PartialEq, Clone)] pub struct IntervalRange { pub lower_inclusive: Vec, pub upper_exclusive: Vec, @@ -87,7 +87,7 @@ impl<'a, 'b> From<(&'a str, &'b str)> for IntervalRange { } } -#[derive(Default, PartialEq, Eq, Clone)] +#[derive(Default, PartialEq, Clone)] pub struct PointRange(pub Vec); impl std::fmt::Debug for PointRange { diff --git a/components/tidb_query_common/src/storage/ranges_iter.rs b/components/tidb_query_common/src/storage/ranges_iter.rs index 6f99249336b..b872d8c5bc5 100644 --- a/components/tidb_query_common/src/storage/ranges_iter.rs +++ b/components/tidb_query_common/src/storage/ranges_iter.rs @@ -2,7 +2,7 @@ use super::range::Range; -#[derive(PartialEq, Eq, Clone, Debug)] +#[derive(PartialEq, Clone, Debug)] pub enum IterStatus { /// All ranges are consumed. Drained, diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 41f0794950d..67620510ef8 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -1589,7 +1589,7 @@ mod tests { // SHOULD_CLIP_TO_ZERO let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::IN_INSERT_STMT))); let r = (-12345_i64).to_uint(&mut ctx, FieldTypeTp::LongLong); - assert!(r.is_err()); + r.unwrap_err(); // SHOULD_CLIP_TO_ZERO | OVERFLOW_AS_WARNING let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag( @@ -1928,11 +1928,11 @@ mod tests { // test overflow let mut ctx = EvalContext::default(); let val: Result = f64::INFINITY.to_string().as_bytes().convert(&mut ctx); - assert!(val.is_err()); + val.unwrap_err(); let mut ctx = EvalContext::default(); let val: Result = f64::NEG_INFINITY.to_string().as_bytes().convert(&mut ctx); - assert!(val.is_err()); + val.unwrap_err(); // TRUNCATE_AS_WARNING let mut ctx = EvalContext::new(Arc::new(EvalConfig::from_flag(Flag::TRUNCATE_AS_WARNING))); diff --git a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs index 4bad0fcc129..c4f5abbc122 100644 --- a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs +++ b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_bytes.rs @@ -177,7 +177,7 @@ impl BytesWriter { } } -impl<'a> PartialBytesWriter { +impl PartialBytesWriter { pub fn partial_write(&mut self, data: BytesRef<'_>) { self.chunked_vec.data.extend_from_slice(data); } diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index 930070e87a2..8ca36790824 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -410,7 +410,7 @@ impl<'a, T: Evaluable + EvaluableRet> EvaluableRef<'a> for &'a T { } } -impl<'a, A: UnsafeRefInto, B> UnsafeRefInto> for Option { +impl, B> UnsafeRefInto> for Option { unsafe fn unsafe_into(self) -> Option { self.map(|x| x.unsafe_into()) } @@ -698,7 +698,7 @@ mod tests { .as_bytes() .to_vec() .as_mysql_bool(&mut ctx); - assert!(val.is_err()); + val.unwrap_err(); let mut ctx = EvalContext::default(); let val: Result = f64::NEG_INFINITY @@ -706,7 +706,7 @@ mod tests { .as_bytes() .to_vec() .as_mysql_bool(&mut ctx); - assert!(val.is_err()); + val.unwrap_err(); } #[test] diff --git a/components/tidb_query_datatype/src/codec/data_type/vector.rs b/components/tidb_query_datatype/src/codec/data_type/vector.rs index c7eecf92fa0..49a4e3a1cff 100644 --- a/components/tidb_query_datatype/src/codec/data_type/vector.rs +++ b/components/tidb_query_datatype/src/codec/data_type/vector.rs @@ -366,7 +366,7 @@ impl VectorValue { output.write_evaluable_datum_null()?; } Some(val) => { - output.write_evaluable_datum_decimal(*val)?; + output.write_evaluable_datum_decimal(val)?; } } Ok(()) diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index 9d791d911cd..c953e9e7269 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -1975,7 +1975,7 @@ mod tests { ]; for d in illegal_cases { - assert!(d.cast_as_json().is_err()); + d.cast_as_json().unwrap_err(); } } @@ -1996,7 +1996,7 @@ mod tests { let illegal_cases = vec![Datum::Max, Datum::Min]; for d in illegal_cases { - assert!(d.into_json().is_err()); + d.into_json().unwrap_err(); } } diff --git a/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs b/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs index 8d1f5fdd8bb..3ab44ad40df 100644 --- a/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/binary_literal.rs @@ -277,7 +277,7 @@ mod tests { } let lit = BinaryLiteral::from_u64(100, -2); - assert!(lit.is_err()); + lit.unwrap_err(); } #[test] @@ -463,12 +463,10 @@ mod tests { let mut ctx = EvalContext::default(); for (s, expected, err) in cs { if err { - assert!( - BinaryLiteral::from_hex_str(s) - .unwrap() - .to_uint(&mut ctx) - .is_err() - ); + BinaryLiteral::from_hex_str(s) + .unwrap() + .to_uint(&mut ctx) + .unwrap_err(); } else { let lit = BinaryLiteral::from_hex_str(s).unwrap(); assert_eq!(lit.to_uint(&mut ctx).unwrap(), expected) diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 7cd1c239bb1..135a3cd2ce7 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -3045,7 +3045,7 @@ mod tests { // error cases let cases = vec![b"1e18446744073709551620"]; for case in cases { - assert!(Decimal::from_bytes(case).is_err()); + Decimal::from_bytes(case).unwrap_err(); } } diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 3869f773020..520c985f4b5 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -81,7 +81,7 @@ fn check_nanos_part(nanos: u32) -> Result { #[inline] fn check_nanos(nanos: i64) -> Result { - if nanos < -MAX_NANOS || nanos > MAX_NANOS { + if !(-MAX_NANOS..=MAX_NANOS).contains(&nanos) { Err(Error::truncated_wrong_val("NANOS", nanos)) } else { Ok(nanos) diff --git a/components/tidb_query_datatype/src/codec/mysql/enums.rs b/components/tidb_query_datatype/src/codec/mysql/enums.rs index fecada58b1d..6c39d7f8a95 100644 --- a/components/tidb_query_datatype/src/codec/mysql/enums.rs +++ b/components/tidb_query_datatype/src/codec/mysql/enums.rs @@ -467,7 +467,7 @@ mod tests { 1, 0, 0, 0, 0, 0, 0, 0, 99, // 3rd ]; for data in &src { - dest.write_enum_to_chunk_by_datum_payload_compact_bytes(*data, &field_type) + dest.write_enum_to_chunk_by_datum_payload_compact_bytes(data, &field_type) .expect("write_enum_to_chunk_by_payload_compact_bytes"); } assert_eq!(&dest, res); @@ -490,7 +490,7 @@ mod tests { 1, 0, 0, 0, 0, 0, 0, 0, 99, // 3rd ]; for data in &src { - dest.write_enum_to_chunk_by_datum_payload_uint(*data, &field_type) + dest.write_enum_to_chunk_by_datum_payload_uint(data, &field_type) .expect("write_enum_to_chunk_by_payload_uint"); } assert_eq!(&dest, res); @@ -513,7 +513,7 @@ mod tests { 1, 0, 0, 0, 0, 0, 0, 0, 99, // 3rd ]; for data in &src { - dest.write_enum_to_chunk_by_datum_payload_var_uint(*data, &field_type) + dest.write_enum_to_chunk_by_datum_payload_var_uint(data, &field_type) .expect("write_enum_to_chunk_by_payload_var_uint"); } assert_eq!(&dest, res); diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index 2e5abc6f87a..8967ab71eeb 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -100,7 +100,7 @@ use crate::{ const ERR_CONVERT_FAILED: &str = "Can not covert from "; /// The types of `Json` which follows -#[derive(Eq, PartialEq, FromPrimitive, Clone, Debug, Copy)] +#[derive(PartialEq, FromPrimitive, Clone, Debug, Copy)] pub enum JsonType { Object = 0x01, Array = 0x03, @@ -536,7 +536,7 @@ mod tests { ], ]; for d in cases { - assert!(json_object(d).is_err()); + json_object(d).unwrap_err(); } let cases = vec![ diff --git a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs index ecdec8adad4..8d1b5c0d453 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs @@ -87,7 +87,7 @@ impl<'a> BinaryModifier<'a> { return Ok(()); } let parent_node = &result[0]; - match &*last_leg { + match last_leg { PathLeg::Index(_) => { // Record the parent node value offset, as it's actually relative to `old` self.to_be_modified_ptr = parent_node.as_ptr(); @@ -167,7 +167,7 @@ impl<'a> BinaryModifier<'a> { return Ok(()); } let parent_node = &result[0]; - match &*last_leg { + match last_leg { PathLeg::Index(remove_idx) => { if parent_node.get_type() == JsonType::Array { self.to_be_modified_ptr = parent_node.as_ptr(); diff --git a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs index a4c33944e21..b2b2f421bcb 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs @@ -262,7 +262,7 @@ mod tests { let illegal_cases = vec!["[pxx,apaa]", "hpeheh", ""]; for json_str in illegal_cases { let resp = Json::from_str(json_str); - assert!(resp.is_err()); + resp.unwrap_err(); } } } diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 79068b38118..88c08f16b20 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -2044,7 +2044,7 @@ mod tests { let should_fail = vec![-1111, 1, 100, 700_100, 100_000_000, 100_000_101_000_000]; for case in should_fail { - assert!(Time::parse_from_i64(&mut ctx, case, TimeType::DateTime, 0).is_err()); + Time::parse_from_i64(&mut ctx, case, TimeType::DateTime, 0).unwrap_err(); } Ok(()) } @@ -2079,9 +2079,7 @@ mod tests { ]; for case in should_fail { let case: Decimal = case.parse().unwrap(); - assert!( - Time::parse_from_decimal(&mut ctx, &case, TimeType::DateTime, 0, true).is_err() - ); + Time::parse_from_decimal(&mut ctx, &case, TimeType::DateTime, 0, true).unwrap_err(); } Ok(()) } @@ -2155,7 +2153,7 @@ mod tests { ]; for case in should_fail { - assert!(Time::parse_date(&mut ctx, case).is_err()); + Time::parse_date(&mut ctx, case).unwrap_err(); } Ok(()) } @@ -2287,7 +2285,7 @@ mod tests { ]; for (case, fsp) in should_fail { - assert!(Time::parse_datetime(&mut ctx, case, fsp, false).is_err()); + Time::parse_datetime(&mut ctx, case, fsp, false).unwrap_err(); } Ok(()) } @@ -2583,7 +2581,7 @@ mod tests { ..TimeEnv::default() }); - assert!(Time::parse_datetime(&mut ctx, "0000-00-00 00:00:00", 0, false).is_err()); + Time::parse_datetime(&mut ctx, "0000-00-00 00:00:00", 0, false).unwrap_err(); // Enable NO_ZERO_DATE, STRICT_MODE and IGNORE_TRUNCATE. // If zero-date is encountered, an error is returned. @@ -2616,7 +2614,7 @@ mod tests { strict_mode: true, ..TimeEnv::default() }); - assert!(Time::parse_datetime(&mut ctx, case, 0, false).is_err()); + Time::parse_datetime(&mut ctx, case, 0, false).unwrap_err(); } Ok(()) @@ -2663,7 +2661,7 @@ mod tests { strict_mode: true, ..TimeEnv::default() }); - assert!(Time::parse_datetime(&mut ctx, case, 0, false).is_err()); + Time::parse_datetime(&mut ctx, case, 0, false).unwrap_err(); } Ok(()) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs index 7b90e96b78c..25b35a90fc0 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs @@ -188,7 +188,7 @@ impl TimeZone for Tz { /// `Tz::Local` -> `TzOffset::Local` /// `Tz::Offset` -> `TzOffset::Fixed` /// `Tz::Name` -> `TzOffset::NonFixed` -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Debug)] pub enum TzOffset { Local(FixedOffset), Fixed(FixedOffset), diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index 463a969284d..5d0c7329d54 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -151,8 +151,8 @@ impl RowSlice<'_> { #[inline] pub fn origin(&self) -> &[u8] { match self { - RowSlice::Big { origin, .. } => *origin, - RowSlice::Small { origin, .. } => *origin, + RowSlice::Big { origin, .. } => origin, + RowSlice::Small { origin, .. } => origin, } } diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 052ad8bf927..7155748571f 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -795,13 +795,13 @@ mod tests { let mut range = KeyRange::default(); range.set_end(small_key.clone()); range.set_start(large_key); - assert!(check_table_ranges(&[range]).is_err()); + check_table_ranges(&[range]).unwrap_err(); // test invalid end let mut range = KeyRange::default(); range.set_start(small_key); range.set_end(b"xx".to_vec()); - assert!(check_table_ranges(&[range]).is_err()); + check_table_ranges(&[range]).unwrap_err(); } #[test] @@ -812,7 +812,7 @@ mod tests { assert_eq!(tid, decode_table_id(&k).unwrap()); let k = encode_index_seek_key(tid, 1, &k); assert_eq!(tid, decode_table_id(&k).unwrap()); - assert!(decode_table_id(b"xxx").is_err()); + decode_table_id(b"xxx").unwrap_err(); } } @@ -820,15 +820,15 @@ mod tests { fn test_check_key_type() { let record_key = encode_row_key(TABLE_ID, 1); check_key_type(record_key.as_slice(), RECORD_PREFIX_SEP).unwrap(); - assert!(check_key_type(record_key.as_slice(), INDEX_PREFIX_SEP).is_err()); + check_key_type(record_key.as_slice(), INDEX_PREFIX_SEP).unwrap_err(); let (_, index_key) = generate_index_data_for_test(TABLE_ID, INDEX_ID, 1, &Datum::I64(1), true); - assert!(check_key_type(index_key.as_slice(), RECORD_PREFIX_SEP).is_err()); + check_key_type(index_key.as_slice(), RECORD_PREFIX_SEP).unwrap_err(); check_key_type(index_key.as_slice(), INDEX_PREFIX_SEP).unwrap(); let too_small_key = vec![0]; - assert!(check_key_type(too_small_key.as_slice(), RECORD_PREFIX_SEP).is_err()); - assert!(check_key_type(too_small_key.as_slice(), INDEX_PREFIX_SEP).is_err()); + check_key_type(too_small_key.as_slice(), RECORD_PREFIX_SEP).unwrap_err(); + check_key_type(too_small_key.as_slice(), INDEX_PREFIX_SEP).unwrap_err(); } } diff --git a/components/tidb_query_datatype/src/def/eval_type.rs b/components/tidb_query_datatype/src/def/eval_type.rs index 9addab99e56..855802119b9 100644 --- a/components/tidb_query_datatype/src/def/eval_type.rs +++ b/components/tidb_query_datatype/src/def/eval_type.rs @@ -137,7 +137,7 @@ mod tests { if let Some(etype) = etype { assert_eq!(ftt.unwrap(), etype); } else { - assert!(ftt.is_err()); + ftt.unwrap_err(); } } } diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index ac89ad53318..417d7b0d146 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -548,7 +548,7 @@ mod tests { if let Some(c) = expected { assert_eq!(coll.unwrap(), c); } else { - assert!(coll.is_err()); + coll.unwrap_err(); } } } @@ -574,7 +574,7 @@ mod tests { if let Some(c) = expected { assert_eq!(charset.unwrap(), c); } else { - assert!(charset.is_err()); + charset.unwrap_err(); } } } diff --git a/components/tidb_query_datatype/src/expr/ctx.rs b/components/tidb_query_datatype/src/expr/ctx.rs index a3e175a3867..ffaf63a9774 100644 --- a/components/tidb_query_datatype/src/expr/ctx.rs +++ b/components/tidb_query_datatype/src/expr/ctx.rs @@ -336,7 +336,7 @@ mod tests { // ignore_truncate = false, truncate_as_warning = false let mut ctx = EvalContext::new(Arc::new(EvalConfig::new())); ctx.handle_truncate(false).unwrap(); - assert!(ctx.handle_truncate(true).is_err()); + ctx.handle_truncate(true).unwrap_err(); assert!(ctx.take_warnings().warnings.is_empty()); // ignore_truncate = false; let mut ctx = EvalContext::new(Arc::new(EvalConfig::default_for_test())); diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 9f23d434a6c..8492a928a8d 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -817,7 +817,7 @@ impl IndexScanExecutorImpl { #[inline] fn split_common_handle(value: &[u8]) -> Result<(&[u8], &[u8])> { if value - .get(0) + .first() .map_or(false, |c| *c == table::INDEX_VALUE_COMMON_HANDLE_FLAG) { let handle_len = (&value[1..]).read_u16().map_err(|_| { @@ -839,7 +839,7 @@ impl IndexScanExecutorImpl { #[inline] fn split_partition_id(value: &[u8]) -> Result<(&[u8], &[u8])> { if value - .get(0) + .first() .map_or(false, |c| *c == table::INDEX_VALUE_PARTITION_ID_FLAG) { if value.len() < 9 { @@ -858,7 +858,7 @@ impl IndexScanExecutorImpl { fn split_restore_data(value: &[u8]) -> Result<(&[u8], &[u8])> { Ok( if value - .get(0) + .first() .map_or(false, |c| *c == table::INDEX_VALUE_RESTORED_DATA_FLAG) { (value, &value[value.len()..]) @@ -1106,17 +1106,13 @@ mod tests { assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 3); assert!(result.physical_columns[0].is_raw()); - assert!( - result.physical_columns[0] - .ensure_all_decoded_for_test(&mut ctx, &schema[1]) - .is_err() - ); + result.physical_columns[0] + .ensure_all_decoded_for_test(&mut ctx, &schema[1]) + .unwrap_err(); assert!(result.physical_columns[1].is_raw()); - assert!( - result.physical_columns[1] - .ensure_all_decoded_for_test(&mut ctx, &schema[0]) - .is_err() - ); + result.physical_columns[1] + .ensure_all_decoded_for_test(&mut ctx, &schema[0]) + .unwrap_err(); } { @@ -1163,17 +1159,13 @@ mod tests { &[Some(5), Some(5), Some(-5)] ); assert!(result.physical_columns[1].is_raw()); - assert!( - result.physical_columns[1] - .ensure_all_decoded_for_test(&mut ctx, &schema[3]) - .is_err() - ); + result.physical_columns[1] + .ensure_all_decoded_for_test(&mut ctx, &schema[3]) + .unwrap_err(); assert!(result.physical_columns[2].is_raw()); - assert!( - result.physical_columns[2] - .ensure_all_decoded_for_test(&mut ctx, &schema[1]) - .is_err() - ); + result.physical_columns[2] + .ensure_all_decoded_for_test(&mut ctx, &schema[1]) + .unwrap_err(); } { diff --git a/components/tidb_query_executors/src/limit_executor.rs b/components/tidb_query_executors/src/limit_executor.rs index 864b32ecd6b..a1917e1b17b 100644 --- a/components/tidb_query_executors/src/limit_executor.rs +++ b/components/tidb_query_executors/src/limit_executor.rs @@ -124,7 +124,7 @@ mod tests { let r = exec.next_batch(1); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(r.is_drained.is_err()); + r.is_drained.unwrap_err(); } #[test] diff --git a/components/tidb_query_executors/src/projection_executor.rs b/components/tidb_query_executors/src/projection_executor.rs index 1d6892731ff..7304ed1b1e3 100644 --- a/components/tidb_query_executors/src/projection_executor.rs +++ b/components/tidb_query_executors/src/projection_executor.rs @@ -523,6 +523,6 @@ mod tests { let r = exec.next_batch(1); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.is_err()); + r.is_drained.unwrap_err(); } } diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index b7a19da9026..d3a2d97ef4b 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -655,6 +655,6 @@ mod tests { let r = exec.next_batch(1); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.is_err()); + r.is_drained.unwrap_err(); } } diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index a4f7e957663..3ddb20b3e4d 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -939,7 +939,7 @@ mod tests { .unwrap(); let mut result = executor.next_batch(10); - assert!(result.is_drained.is_err()); + result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 2); assert!(result.physical_columns[0].is_decoded()); @@ -1046,7 +1046,7 @@ mod tests { .unwrap(); let mut result = executor.next_batch(10); - assert!(result.is_drained.is_err()); + result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_decoded()); @@ -1094,7 +1094,7 @@ mod tests { .unwrap(); let mut result = executor.next_batch(10); - assert!(result.is_drained.is_err()); + result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_decoded()); @@ -1154,7 +1154,7 @@ mod tests { ); let result = executor.next_batch(1); - assert!(result.is_drained.is_err()); + result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 0); } @@ -1175,7 +1175,7 @@ mod tests { .unwrap(); let result = executor.next_batch(10); - assert!(result.is_drained.is_err()); + result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 0); } @@ -1230,7 +1230,7 @@ mod tests { .unwrap(); let result = executor.next_batch(10); - assert!(result.is_drained.is_err()); + result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 0); } diff --git a/components/tidb_query_expr/src/impl_arithmetic.rs b/components/tidb_query_expr/src/impl_arithmetic.rs index 2500ebc311c..01776c1ad7a 100644 --- a/components/tidb_query_expr/src/impl_arithmetic.rs +++ b/components/tidb_query_expr/src/impl_arithmetic.rs @@ -1200,13 +1200,11 @@ mod tests { let overflow = vec![(f64::MAX, 0.0001)]; for (lhs, rhs) in overflow { - assert!( - RpnFnScalarEvaluator::new() - .push_param(lhs) - .push_param(rhs) - .evaluate::(ScalarFuncSig::DivideReal) - .is_err() - ) + RpnFnScalarEvaluator::new() + .push_param(lhs) + .push_param(rhs) + .evaluate::(ScalarFuncSig::DivideReal) + .unwrap_err(); } } @@ -1275,7 +1273,7 @@ mod tests { if is_ok { assert!(result.unwrap().is_none()); } else { - assert!(result.is_err()); + result.unwrap_err(); } if has_warning { diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 16f6a8f66c2..7fb118dfbec 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -1189,7 +1189,7 @@ fn cast_string_as_time( let val = String::from_utf8_lossy(val); Time::parse( ctx, - &*val, + &val, extra.ret_field_type.as_accessor().tp().try_into()?, extra.ret_field_type.get_decimal() as i8, // Enable round @@ -2425,7 +2425,7 @@ mod tests { assert!(output.is_ok(), "input: {:?}", input); assert_eq!(output.unwrap().unwrap(), exp, "input={:?}", input); } else { - assert!(output.is_err()); + output.unwrap_err(); } } } @@ -3661,7 +3661,7 @@ mod tests { input ); } else { - assert!(output.is_err()); + output.unwrap_err(); } } } diff --git a/components/tidb_query_expr/src/impl_compare.rs b/components/tidb_query_expr/src/impl_compare.rs index 858e1bcb3ec..350b36a3a99 100644 --- a/components/tidb_query_expr/src/impl_compare.rs +++ b/components/tidb_query_expr/src/impl_compare.rs @@ -545,7 +545,7 @@ mod tests { use super::*; use crate::test_util::RpnFnScalarEvaluator; - #[derive(Clone, Copy, PartialEq, Eq)] + #[derive(Clone, Copy, PartialEq)] enum TestCaseCmpOp { GT, GE, diff --git a/components/tidb_query_expr/src/impl_compare_in.rs b/components/tidb_query_expr/src/impl_compare_in.rs index 312943a276a..6de0ba33cfb 100644 --- a/components/tidb_query_expr/src/impl_compare_in.rs +++ b/components/tidb_query_expr/src/impl_compare_in.rs @@ -65,11 +65,11 @@ pub trait Extract: Sized { #[inline] fn type_error(eval_type: EvalType, expr_type: ExprType) -> Error { - return other_err!( + other_err!( "Unexpected ExprType {:?} and EvalType {:?}", expr_type, eval_type - ); + ) } impl Extract for Int { diff --git a/components/tidb_query_expr/src/impl_encryption.rs b/components/tidb_query_expr/src/impl_encryption.rs index 3a51f798442..9c26826c03b 100644 --- a/components/tidb_query_expr/src/impl_encryption.rs +++ b/components/tidb_query_expr/src/impl_encryption.rs @@ -452,12 +452,10 @@ mod tests { ]; for len in overflow_tests { - assert!( - RpnFnScalarEvaluator::new() - .push_param(len) - .evaluate::(ScalarFuncSig::RandomBytes) - .is_err(), - ); + RpnFnScalarEvaluator::new() + .push_param(len) + .evaluate::(ScalarFuncSig::RandomBytes) + .unwrap_err(); } // test NULL case diff --git a/components/tidb_query_expr/src/impl_json.rs b/components/tidb_query_expr/src/impl_json.rs index 5e5595bd3ed..60f784dc604 100644 --- a/components/tidb_query_expr/src/impl_json.rs +++ b/components/tidb_query_expr/src/impl_json.rs @@ -584,7 +584,7 @@ mod tests { .push_params(err_args) .evaluate(ScalarFuncSig::JsonObjectSig); - assert!(output.is_err()); + output.unwrap_err(); } } @@ -948,7 +948,7 @@ mod tests { if is_success { assert_eq!(output.unwrap(), expected, "{:?}", vargs); } else { - assert!(output.is_err()); + output.unwrap_err(); } } } diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index 80484c224c4..55e86ee14d0 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -1204,7 +1204,7 @@ mod tests { let output: Result> = RpnFnScalarEvaluator::new() .push_param(Some(Real::new(x).unwrap())) .evaluate(ScalarFuncSig::Exp); - assert!(output.is_err()); + output.unwrap_err(); } } @@ -1317,12 +1317,10 @@ mod tests { .unwrap(); assert!((output.unwrap().into_inner() - expect).abs() < f64::EPSILON); } - assert!( - RpnFnScalarEvaluator::new() - .push_param(Some(Real::new(0.0_f64).unwrap())) - .evaluate::(ScalarFuncSig::Cot) - .is_err() - ); + RpnFnScalarEvaluator::new() + .push_param(Some(Real::new(0.0_f64).unwrap())) + .evaluate::(ScalarFuncSig::Cot) + .unwrap_err(); } #[test] @@ -1374,13 +1372,11 @@ mod tests { ]; for (lhs, rhs) in invalid_cases { - assert!( - RpnFnScalarEvaluator::new() - .push_param(lhs) - .push_param(rhs) - .evaluate::(ScalarFuncSig::Pow) - .is_err() - ); + RpnFnScalarEvaluator::new() + .push_param(lhs) + .push_param(rhs) + .evaluate::(ScalarFuncSig::Pow) + .unwrap_err(); } } diff --git a/components/tidb_query_expr/src/impl_op.rs b/components/tidb_query_expr/src/impl_op.rs index 5ecb4e9a7dc..9081f623b8e 100644 --- a/components/tidb_query_expr/src/impl_op.rs +++ b/components/tidb_query_expr/src/impl_op.rs @@ -402,18 +402,16 @@ mod tests { .unwrap(); assert_eq!(output, expect_output, "{:?}", arg); } - assert!( - RpnFnScalarEvaluator::new() - .push_param_with_field_type( - Some((i64::MAX as u64 + 2) as i64), - FieldTypeBuilder::new() - .tp(FieldTypeTp::LongLong) - .flag(FieldTypeFlag::UNSIGNED) - .build() - ) - .evaluate::(ScalarFuncSig::UnaryMinusInt) - .is_err() - ); + RpnFnScalarEvaluator::new() + .push_param_with_field_type( + Some((i64::MAX as u64 + 2) as i64), + FieldTypeBuilder::new() + .tp(FieldTypeTp::LongLong) + .flag(FieldTypeFlag::UNSIGNED) + .build(), + ) + .evaluate::(ScalarFuncSig::UnaryMinusInt) + .unwrap_err(); let signed_test_cases = vec![ (None, None), @@ -429,12 +427,10 @@ mod tests { .unwrap(); assert_eq!(output, expect_output, "{:?}", arg); } - assert!( - RpnFnScalarEvaluator::new() - .push_param(i64::MIN) - .evaluate::(ScalarFuncSig::UnaryMinusInt) - .is_err() - ); + RpnFnScalarEvaluator::new() + .push_param(i64::MIN) + .evaluate::(ScalarFuncSig::UnaryMinusInt) + .unwrap_err(); } #[test] diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index 9ebba24ed43..f3b9b03c287 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -284,8 +284,8 @@ pub fn lpad_utf8( pad: BytesRef, writer: BytesWriter, ) -> Result { - let input = str::from_utf8(&*arg)?; - let pad = str::from_utf8(&*pad)?; + let input = str::from_utf8(arg)?; + let pad = str::from_utf8(pad)?; let input_len = input.chars().count(); let pad_len = pad.chars().count(); @@ -350,8 +350,8 @@ pub fn rpad_utf8( pad: BytesRef, writer: BytesWriter, ) -> Result { - let input = str::from_utf8(&*arg)?; - let pad = str::from_utf8(&*pad)?; + let input = str::from_utf8(arg)?; + let pad = str::from_utf8(pad)?; let input_len = input.chars().count(); let pad_len = pad.chars().count(); @@ -451,7 +451,7 @@ pub fn left_utf8(lhs: BytesRef, rhs: &Int, writer: BytesWriter) -> Result Result { - let s = str::from_utf8(&*s_utf8)?; - let newstr = str::from_utf8(&*newstr_utf8)?; + let s = str::from_utf8(s_utf8)?; + let newstr = str::from_utf8(newstr_utf8)?; let pos = *pos; let len = *len; let upos: usize = pos as usize; @@ -543,7 +543,7 @@ pub fn right_utf8(lhs: BytesRef, rhs: &Int, writer: BytesWriter) -> Result(ScalarFuncSig::CharLengthUtf8); - assert!(output.is_err()); + output.unwrap_err(); } } diff --git a/components/tidb_query_expr/src/impl_time.rs b/components/tidb_query_expr/src/impl_time.rs index 80912fd6526..0f55e21bab5 100644 --- a/components/tidb_query_expr/src/impl_time.rs +++ b/components/tidb_query_expr/src/impl_time.rs @@ -523,7 +523,7 @@ pub fn from_days(ctx: &mut EvalContext, arg: &Int) -> Result> { pub fn make_date(ctx: &mut EvalContext, year: &Int, day: &Int) -> Result> { let mut year = *year; let mut day = *day; - if day <= 0 || year < 0 || year > 9999 || day > 366 * 9999 { + if !(1..=366 * 9999).contains(&day) || !(0..=9999).contains(&year) { return Ok(None); } if year < 70 { @@ -2394,7 +2394,7 @@ mod tests { .build(), ) .evaluate::(ScalarFuncSig::MakeTime); - assert!(output.is_err()); + output.unwrap_err(); } } diff --git a/components/tidb_query_expr/src/types/expr_builder.rs b/components/tidb_query_expr/src/types/expr_builder.rs index 0546fe43f08..5311a2c03d9 100644 --- a/components/tidb_query_expr/src/types/expr_builder.rs +++ b/components/tidb_query_expr/src/types/expr_builder.rs @@ -826,10 +826,8 @@ mod tests { fn test_max_columns_check() { // Col offset = 0. The minimum success max_columns is 1. let node = ExprDefBuilder::column_ref(0, FieldTypeTp::LongLong).build(); - assert!( - RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, 0) - .is_err() - ); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, 0) + .unwrap_err(); for i in 1..10 { RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) .unwrap(); @@ -838,14 +836,8 @@ mod tests { // Col offset = 3. The minimum success max_columns is 4. let node = ExprDefBuilder::column_ref(3, FieldTypeTp::LongLong).build(); for i in 0..=3 { - assert!( - RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper( - node.clone(), - fn_mapper, - i - ) - .is_err() - ); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) + .unwrap_err(); } for i in 4..10 { RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) @@ -861,14 +853,8 @@ mod tests { .build(); for i in 0..=5 { - assert!( - RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper( - node.clone(), - fn_mapper, - i - ) - .is_err() - ); + RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) + .unwrap_err(); } for i in 6..10 { RpnExpressionBuilder::build_from_expr_tree_with_fn_mapper(node.clone(), fn_mapper, i) diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index 442c0f8486b..078bbf1bb80 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -43,7 +43,7 @@ impl<'a> RpnStackNodeVectorValue<'a> { pub fn as_ref(&self) -> &VectorValue { match self { RpnStackNodeVectorValue::Generated { physical_value, .. } => physical_value, - RpnStackNodeVectorValue::Ref { physical_value, .. } => *physical_value, + RpnStackNodeVectorValue::Ref { physical_value, .. } => physical_value, } } @@ -425,7 +425,7 @@ mod tests { // smaller row number let _ = exp.eval(&mut ctx, &schema, &mut c, &logical_rows, 4); }); - assert!(hooked_eval.is_err()); + hooked_eval.unwrap_err(); let mut c = columns; let exp = RpnExpressionBuilder::new_for_test() @@ -436,7 +436,7 @@ mod tests { // larger row number let _ = exp.eval(&mut ctx, &schema, &mut c, &logical_rows, 6); }); - assert!(hooked_eval.is_err()); + hooked_eval.unwrap_err(); } /// Single function call node (i.e. nullary function) @@ -930,7 +930,7 @@ mod tests { let hooked_eval = panic_hook::recover_safe(|| { let _ = exp.eval(&mut ctx, &[], &mut columns, &[], 3); }); - assert!(hooked_eval.is_err()); + hooked_eval.unwrap_err(); } /// Irregular RPN expression (contains unused node). Should panic. @@ -954,7 +954,7 @@ mod tests { let hooked_eval = panic_hook::recover_safe(|| { let _ = exp.eval(&mut ctx, &[], &mut columns, &[], 3); }); - assert!(hooked_eval.is_err()); + hooked_eval.unwrap_err(); } /// Eval type does not match. Should panic. @@ -976,7 +976,7 @@ mod tests { let hooked_eval = panic_hook::recover_safe(|| { let _ = exp.eval(&mut ctx, &[], &mut columns, &[], 3); }); - assert!(hooked_eval.is_err()); + hooked_eval.unwrap_err(); } /// Parse from an expression tree then evaluate. diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index b80c32e7088..757c3e2c378 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -428,6 +428,6 @@ pub mod tests { #[test] fn test_get_not_exist_cf() { let engine = BTreeEngine::new(&[]); - assert!(::panic_hook::recover_safe(|| engine.get_cf("not_exist_cf")).is_err()); + ::panic_hook::recover_safe(|| engine.get_cf("not_exist_cf")).unwrap_err(); } } diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index cfa171054c9..2d0dd77e9d3 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -647,10 +647,8 @@ mod tests { iter.seek(&Key::from_encoded_slice(b"a3"), &mut statistics) .unwrap() ); - assert!( - iter.seek(&Key::from_encoded_slice(b"a9"), &mut statistics) - .is_err() - ); + iter.seek(&Key::from_encoded_slice(b"a9"), &mut statistics) + .unwrap_err(); assert!( !iter @@ -661,10 +659,8 @@ mod tests { iter.seek_for_prev(&Key::from_encoded_slice(b"a3"), &mut statistics) .unwrap() ); - assert!( - iter.seek_for_prev(&Key::from_encoded_slice(b"a1"), &mut statistics) - .is_err() - ); + iter.seek_for_prev(&Key::from_encoded_slice(b"a1"), &mut statistics) + .unwrap_err(); } #[test] @@ -705,14 +701,10 @@ mod tests { .reverse_seek(&Key::from_encoded_slice(b"a3"), &mut statistics) .unwrap() ); - assert!( - iter.reverse_seek(&Key::from_encoded_slice(b"a1"), &mut statistics) - .is_err() - ); - assert!( - iter.reverse_seek(&Key::from_encoded_slice(b"a8"), &mut statistics) - .is_err() - ); + iter.reverse_seek(&Key::from_encoded_slice(b"a1"), &mut statistics) + .unwrap_err(); + iter.reverse_seek(&Key::from_encoded_slice(b"a8"), &mut statistics) + .unwrap_err(); assert!(iter.seek_to_last(&mut statistics)); let mut res = vec![]; diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index e26318d7b4e..dea3c0dc745 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -67,7 +67,7 @@ pub type Callback = Box) + Send>; pub type ExtCallback = Box; pub type Result = result::Result; -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub enum Modify { Delete(CfName, Key), Put(CfName, Key, Value), @@ -156,14 +156,8 @@ impl From for raft_cmdpb::Request { impl From for Modify { fn from(mut req: raft_cmdpb::Request) -> Modify { let name_to_cf = |name: &str| -> Option { - engine_traits::name_to_cf(name).or_else(|| { - for c in TEST_ENGINE_CFS { - if name == *c { - return Some(c); - } - } - None - }) + engine_traits::name_to_cf(name) + .or_else(|| TEST_ENGINE_CFS.iter().copied().find(|c| name == *c)) }; match req.get_cmd_type() { @@ -947,7 +941,7 @@ pub mod tests { }}; } - #[derive(PartialEq, Eq, Clone, Copy)] + #[derive(PartialEq, Clone, Copy)] enum SeekMode { Normal, Reverse, diff --git a/components/tikv_util/src/codec/bytes.rs b/components/tikv_util/src/codec/bytes.rs index 034e8e73375..df23090c9c7 100644 --- a/components/tikv_util/src/codec/bytes.rs +++ b/components/tikv_util/src/codec/bytes.rs @@ -448,8 +448,8 @@ mod tests { ]; for mut x in invalid_bytes { - assert!(decode_bytes(&mut x.as_slice(), false).is_err()); - assert!(decode_bytes_in_place(&mut x, false).is_err()); + decode_bytes(&mut x.as_slice(), false).unwrap_err(); + decode_bytes_in_place(&mut x, false).unwrap_err(); } } diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 8fa7c8492d0..7e9f22dcb01 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -59,7 +59,7 @@ const MINUTE: u64 = SECOND * TIME_MAGNITUDE_2; const HOUR: u64 = MINUTE * TIME_MAGNITUDE_2; const DAY: u64 = HOUR * TIME_MAGNITUDE_3; -#[derive(Clone, Copy, Debug, Serialize, Deserialize, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case")] pub enum LogFormat { Text, @@ -937,14 +937,14 @@ securityfs /sys/kernel/security securityfs rw,nosuid,nodev,noexec,relatime 0 0 // not found let f2 = get_fs_info("/tmp", &mnt_file); - assert!(f2.is_err()); + f2.unwrap_err(); } #[test] fn test_get_rotational_info() { // test device not exist let ret = get_rotational_info("/dev/invalid"); - assert!(ret.is_err()); + ret.unwrap_err(); } #[test] @@ -1823,7 +1823,7 @@ mod tests { { File::create(&path2).unwrap(); } - assert!(canonicalize_path(&path2).is_err()); + canonicalize_path(&path2).unwrap_err(); assert!(Path::new(&path2).exists()); } diff --git a/components/tikv_util/src/future.rs b/components/tikv_util/src/future.rs index 61d6f33ad4c..5f4c5b43817 100644 --- a/components/tikv_util/src/future.rs +++ b/components/tikv_util/src/future.rs @@ -147,7 +147,7 @@ impl PollAtWake { }; let waker = task::waker_ref(arc_self); - let cx = &mut Context::from_waker(&*waker); + let cx = &mut Context::from_waker(&waker); loop { match fut.as_mut().poll(cx) { // Likely pending diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index ecfeb7253fd..a75c4756b9c 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -728,7 +728,7 @@ mod tests { match foo(&mu.rl()) { Some(_) | None => { let res = mu.try_write(); - assert!(res.is_err()); + res.unwrap_err(); } } } diff --git a/components/tikv_util/src/logger/file_log.rs b/components/tikv_util/src/logger/file_log.rs index 5d0300ccdc5..5b575638c19 100644 --- a/components/tikv_util/src/logger/file_log.rs +++ b/components/tikv_util/src/logger/file_log.rs @@ -376,7 +376,7 @@ mod tests { // Rename failed. logger.write_all(&[0xff; 1025]).unwrap(); - assert!(logger.flush().is_err()); + logger.flush().unwrap_err(); // dropping the logger still should not panic. drop(logger); diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 91ecd803b89..5ebe9468a50 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -781,7 +781,7 @@ mod tests { BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); - let output = from_utf8(&*buffer).unwrap(); + let output = from_utf8(&buffer).unwrap(); assert_eq!(output.lines().count(), expect.lines().count()); let re = Regex::new(r"(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s?(?P\[.*\])?").unwrap(); @@ -829,7 +829,7 @@ mod tests { BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); - let output = from_utf8(&*buffer).unwrap(); + let output = from_utf8(&buffer).unwrap(); assert_eq!(output.lines().count(), expect.lines().count()); for (output_line, expect_line) in output.lines().zip(expect.lines()) { @@ -862,7 +862,7 @@ mod tests { let check_log = |log: &str| { BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); - let output = from_utf8(&*buffer).unwrap(); + let output = from_utf8(&buffer).unwrap(); // only check the log len here as some field like timestamp, location may // change. assert_eq!(output.len(), log.len()); @@ -1048,7 +1048,7 @@ mod tests { let re = Regex::new(r"(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s?(?P\[.*\])?").unwrap(); NORMAL_BUFFER.with(|buffer| { let buffer = buffer.borrow_mut(); - let output = from_utf8(&*buffer).unwrap(); + let output = from_utf8(&buffer).unwrap(); let output_segments = re.captures(output).unwrap(); assert_eq!(output_segments["msg"].to_owned(), r#"["Hello World"]"#); }); @@ -1060,7 +1060,7 @@ mod tests { "#; SLOW_BUFFER.with(|buffer| { let buffer = buffer.borrow_mut(); - let output = from_utf8(&*buffer).unwrap(); + let output = from_utf8(&buffer).unwrap(); let expect_re = Regex::new(r"(?P\[.*?\])\s?(?P\[.*\])?").unwrap(); assert_eq!(output.lines().count(), slow_expect.lines().count()); for (output, expect) in output.lines().zip(slow_expect.lines()) { diff --git a/components/tikv_util/src/metrics/mod.rs b/components/tikv_util/src/metrics/mod.rs index 4b5a9abc2f7..3a9964bd8d2 100644 --- a/components/tikv_util/src/metrics/mod.rs +++ b/components/tikv_util/src/metrics/mod.rs @@ -46,7 +46,7 @@ pub fn dump_to(w: &mut impl Write, should_simplify: bool) { let encoder = TextEncoder::new(); let metric_families = prometheus::gather(); if !should_simplify { - if let Err(e) = encoder.encode(&*metric_families, w) { + if let Err(e) = encoder.encode(&metric_families, w) { warn!("prometheus encoding error"; "err" => ?e); } return; diff --git a/components/tikv_util/src/metrics/threads_linux.rs b/components/tikv_util/src/metrics/threads_linux.rs index 608b60949e8..9f85425b0ba 100644 --- a/components/tikv_util/src/metrics/threads_linux.rs +++ b/components/tikv_util/src/metrics/threads_linux.rs @@ -706,7 +706,7 @@ mod tests { let (raw_name, _) = get_thread_name("(@#)").unwrap(); assert_eq!(sanitize_thread_name(1, raw_name), "1"); - assert!(get_thread_name("invalid_stat").is_err()); + get_thread_name("invalid_stat").unwrap_err(); } #[test] diff --git a/components/tikv_util/src/mpsc/batch.rs b/components/tikv_util/src/mpsc/batch.rs index e8d54c514a1..0415f9376af 100644 --- a/components/tikv_util/src/mpsc/batch.rs +++ b/components/tikv_util/src/mpsc/batch.rs @@ -489,7 +489,7 @@ mod tests { let mut future_slot = self.future.lock().unwrap(); if let Some(mut future) = future_slot.take() { let waker = task::waker_ref(&task); - let cx = &mut Context::from_waker(&*waker); + let cx = &mut Context::from_waker(&waker); match future.as_mut().poll(cx) { Poll::Pending => { *future_slot = Some(future); diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index 0ab8240c4f2..0df4ed4adac 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -506,7 +506,7 @@ pub type Limiter = async_speed_limit::Limiter; pub type Consume = async_speed_limit::limiter::Consume; /// ReadId to judge whether the read requests come from the same GRPC stream. -#[derive(Eq, PartialEq, Clone, Debug)] +#[derive(PartialEq, Clone, Debug)] pub struct ThreadReadId { sequence: u64, pub create_time: Timespec, diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index 56a00e01a50..f47cdaf21e9 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -221,7 +221,7 @@ mod tests { use super::*; - #[derive(Debug, PartialEq, Eq, Copy, Clone)] + #[derive(Debug, PartialEq, Copy, Clone)] enum Task { A, B, diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index 621ac730c30..ba4b1e27f41 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -29,7 +29,7 @@ use crate::{ yatp_pool::{DefaultTicker, YatpPoolBuilder}, }; -#[derive(Eq, PartialEq)] +#[derive(PartialEq)] pub enum ScheduleError { Stopped(T), Full(T), diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 1f9c74dd709..9de2d49cb07 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -187,7 +187,7 @@ impl PoolInner { } } -#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Debug)] pub struct Full { pub current_tasks: usize, pub max_tasks: usize, @@ -285,11 +285,11 @@ mod tests { .unwrap() }; - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); // Tick is emitted because long enough time has elapsed since pool is created spawn_future_and_wait(&pool, TICK_INTERVAL / 20); - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); spawn_future_and_wait(&pool, TICK_INTERVAL / 20); spawn_future_and_wait(&pool, TICK_INTERVAL / 20); @@ -297,30 +297,30 @@ mod tests { spawn_future_and_wait(&pool, TICK_INTERVAL / 20); // So far we have only elapsed TICK_INTERVAL * 0.2, so no ticks so far. - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); // Even if long enough time has elapsed, tick is not emitted until next task // arrives thread::sleep(TICK_INTERVAL * 2); - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); spawn_future_and_wait(&pool, TICK_INTERVAL / 20); assert_eq!(try_recv_tick().unwrap(), 0); - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); // Tick is not emitted if there is no task thread::sleep(TICK_INTERVAL * 2); - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); // Tick is emitted since long enough time has passed spawn_future_and_wait(&pool, TICK_INTERVAL / 20); assert_eq!(try_recv_tick().unwrap(), 1); - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); // Tick is emitted immediately after a long task spawn_future_and_wait(&pool, TICK_INTERVAL * 2); assert_eq!(try_recv_tick().unwrap(), 2); - assert!(try_recv_tick().is_err()); + try_recv_tick().unwrap_err(); } #[test] @@ -337,18 +337,18 @@ mod tests { .thread_count(2, 2, 2) .build_future_pool(); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); // Spawn two tasks, each will be processed in one worker thread. spawn_future_without_wait(&pool, TICK_INTERVAL / 2); spawn_future_without_wait(&pool, TICK_INTERVAL / 2); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); // Wait long enough time to trigger a tick. thread::sleep(TICK_INTERVAL * 2); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); // These two tasks should both trigger a tick. spawn_future_without_wait(&pool, TICK_INTERVAL); @@ -359,7 +359,7 @@ mod tests { assert_eq!(rx.try_recv().unwrap(), 0); assert_eq!(rx.try_recv().unwrap(), 1); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); } #[test] @@ -457,7 +457,7 @@ mod tests { spawn_long_time_future(&read_pool, 4, 400).unwrap(), ); // no available results (running = 4) - assert!(rx.recv_timeout(Duration::from_millis(50)).is_err()); + rx.recv_timeout(Duration::from_millis(50)).unwrap_err(); // full assert!(spawn_long_time_future(&read_pool, 5, 100).is_err()); @@ -480,7 +480,7 @@ mod tests { rx.recv().unwrap().unwrap(); // no more results - assert!(rx.recv_timeout(Duration::from_millis(500)).is_err()); + rx.recv_timeout(Duration::from_millis(500)).unwrap_err(); } #[test] diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index c37fcde86d1..e0a9b9de24f 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -1,7 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(array_from_fn)] - mod metrics; mod slab; mod tls; @@ -92,7 +90,7 @@ impl RequestInfo { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Default)] pub enum RequestType { #[default] Unknown, diff --git a/components/tracker/src/slab.rs b/components/tracker/src/slab.rs index f737ee1ed1e..9b4be50796b 100644 --- a/components/tracker/src/slab.rs +++ b/components/tracker/src/slab.rs @@ -144,7 +144,7 @@ struct SlabEntry { pub const INVALID_TRACKER_TOKEN: TrackerToken = TrackerToken(u64::MAX); -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq)] pub struct TrackerToken(u64); impl TrackerToken { diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index e0570d900ac..4c784e31318 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -425,7 +425,7 @@ impl Lock { /// A specialized lock only for pessimistic lock. This saves memory for cases /// that only pessimistic locks exist. -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, PartialEq)] pub struct PessimisticLock { /// The primary key in raw format. pub primary: Box<[u8]>, @@ -695,7 +695,7 @@ mod tests { } // Test `Lock::parse()` handles incorrect input. - assert!(Lock::parse(b"").is_err()); + Lock::parse(b"").unwrap_err(); let lock = Lock::new( LockType::Lock, @@ -708,7 +708,7 @@ mod tests { TimeStamp::zero(), ); let mut v = lock.to_bytes(); - assert!(Lock::parse(&v[..4]).is_err()); + Lock::parse(&v[..4]).unwrap_err(); // Test `Lock::parse()` ignores unknown bytes. v.extend(b"unknown"); let l = Lock::parse(&v).unwrap(); diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index 593fa2e1d41..946ccfbbdcb 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -211,7 +211,7 @@ mod tests { fn test_split_ts() { let k = b"k"; let ts = TimeStamp(123); - assert!(Key::split_on_ts_for(k).is_err()); + Key::split_on_ts_for(k).unwrap_err(); let enc = Key::from_encoded_slice(k).append_ts(ts); let res = Key::split_on_ts_for(enc.as_encoded()).unwrap(); assert_eq!(res, (k.as_ref(), ts)); diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 1d3fd775f1b..75df337f80c 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -572,17 +572,15 @@ mod tests { #[test] fn test_flags_panic() { for _ in 0..100 { - assert!( - panic_hook::recover_safe(|| { - // r must be an invalid flags if it is not zero - let r = rand::random::() & !WriteBatchFlags::all().bits(); - WriteBatchFlags::from_bits_check(r); - if r == 0 { - panic!("panic for zero"); - } - }) - .is_err() - ); + panic_hook::recover_safe(|| { + // r must be an invalid flags if it is not zero + let r = rand::random::() & !WriteBatchFlags::all().bits(); + WriteBatchFlags::from_bits_check(r); + if r == 0 { + panic!("panic for zero"); + } + }) + .unwrap_err(); } } diff --git a/fuzz/cli.rs b/fuzz/cli.rs index 3a804be7d17..96972d94565 100644 --- a/fuzz/cli.rs +++ b/fuzz/cli.rs @@ -57,7 +57,7 @@ enum Cli { } arg_enum! { - #[derive(Debug, PartialEq, Eq, Clone, Copy)] + #[derive(Debug, PartialEq, Clone, Copy)] enum Fuzzer { Afl, Honggfuzz, diff --git a/rust-toolchain b/rust-toolchain index b91c1b17580..2181086f8d2 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-05-01 +nightly-2022-07-31 diff --git a/rustfmt.toml b/rustfmt.toml index 68b82c22bd1..3de3c63c441 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -9,7 +9,6 @@ format_macro_matchers = true normalize_comments = true normalize_doc_attributes = true condense_wildcard_suffixes = true -license_template_path = "etc/license.template" newline_style = "Unix" use_field_init_shorthand = true use_try_shorthand = true diff --git a/scripts/check-docker-build b/scripts/check-docker-build index 26a53cc1ef6..6a505f31a89 100755 --- a/scripts/check-docker-build +++ b/scripts/check-docker-build @@ -2,18 +2,16 @@ # This script checks if all cargo targets have path specifications. set -euo pipefail -for i in $(find . -type f -name 'Cargo.toml'); do - # These folders are excluded from docker build. - if echo $i | grep -q "./fuzz/\|./profiler/"; then - continue - fi - for target in "test" "bench" "bin" "example"; do - matches=$(sed -n "/\[\[$target\]\]/,/^$/ p" $i) - if [ $(echo "$matches" | grep -c "[[$target]]") != $(echo "$matches" | grep -c "^path =") ]; then - echo "Path has not been specified for a $target target in $i, this will break docker build." - exit 1 - fi - done +for i in $(git ls-files | grep 'Cargo.toml' | grep -v 'fuzz/\|./profiler/'); do + for target in "test" "bench" "bin" "example"; do + # from "[[test]]" to the first trailing empty line + matches=$(sed -n "/\[\[$target\]\]/,/^$/ p" $i) + # check equal amount of "[[test]]" and "path =" + if [ $(echo "$matches" | grep -c "[[$target]]") != $(echo "$matches" | grep -c "^path =") ]; then + echo "Path has not been specified for a $target target in $i, this will break docker build." + exit 1 + fi + done done echo "Docker build check passed." diff --git a/scripts/check-license b/scripts/check-license new file mode 100755 index 00000000000..0b35ef67177 --- /dev/null +++ b/scripts/check-license @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Check all source files have a license header. +set -euo pipefail + +for i in $(git ls-files | grep "\.rs"); do + # first line -> match -> print line -> quit + matches=$(sed -n "1{/Copyright [0-9]\{4\} TiKV Project Authors. Licensed under Apache-2.0./p;};q;" $i) + if [ -z "${matches}" ]; then + echo "License header is missing from $i." + exit 1 + fi +done + +echo "License check passed." diff --git a/scripts/check-redact-log b/scripts/check-redact-log index 880de323700..8ec3141ad4a 100755 --- a/scripts/check-redact-log +++ b/scripts/check-redact-log @@ -3,19 +3,19 @@ set -euo pipefail function error_msg() { - echo "To print user data into info logs or error messages, use log_wrappers::Value() instead of hex::encode_upper. The former will respect \`security.redact-info-log\` config and filter out user data from info log if needed. Otherwise, use \`log_wrappers::hex_encode_upper\` to get around the lint error. See https://github.com/tikv/tikv/pull/9250 for more information." >&2 + echo "To print user data into info logs or error messages, use log_wrappers::Value() instead of hex::encode_upper. The former will respect \`security.redact-info-log\` config and filter out user data from info log if needed. Otherwise, use \`log_wrappers::hex_encode_upper\` to get around the lint error. See https://github.com/tikv/tikv/pull/9250 for more information." >&2 } if [[ "$(uname)" == "Darwin" ]] ; then - if grep -r -n --color=always --include '*.rs' --exclude hex.rs --exclude-dir tikv-ctl --exclude-dir target 'encode_upper' . | grep -v log_wrappers ; then - error_msg - exit 1 - fi + if grep -r -n --color=always --include '*.rs' --exclude hex.rs --exclude-dir tikv-ctl --exclude-dir target 'encode_upper' . | grep -v log_wrappers ; then + error_msg + exit 1 + fi else if grep -r -n -P '(?/dev/null +# cd $pkg +# cargo clippy --all-targets --no-default-features \ +# --features "${TIKV_ENABLE_FEATURES}" -- "${ALLOWED_CLIPPY_LINTS[@]}" +# cd - >/dev/null # done # for pkg in "fuzz"; do -# cd $pkg -# cargo clippy --all-targets -- "${ALLOWED_CLIPPY_LINTS[@]}" -# cd - >/dev/null +# cd $pkg +# cargo clippy --all-targets -- "${ALLOWED_CLIPPY_LINTS[@]}" +# cd - >/dev/null # done diff --git a/scripts/run-cargo.sh b/scripts/run-cargo.sh index 8c309645f6a..0002c054927 100644 --- a/scripts/run-cargo.sh +++ b/scripts/run-cargo.sh @@ -16,7 +16,7 @@ set -e if [[ -e .cargo/config ]]; then - rm .cargo/config + rm .cargo/config fi args="" diff --git a/src/config.rs b/src/config.rs index 80e763e6981..8a9bf2d2468 100644 --- a/src/config.rs +++ b/src/config.rs @@ -213,7 +213,7 @@ impl TitanCfConfig { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] struct BackgroundJobLimits { max_background_jobs: u32, max_background_flushes: u32, @@ -1935,26 +1935,26 @@ mod unified_read_pool_tests { min_thread_count: 0, ..cfg }; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let invalid_cfg = UnifiedReadPoolConfig { min_thread_count: 2, max_thread_count: 1, ..cfg }; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let invalid_cfg = UnifiedReadPoolConfig { stack_size: ReadableSize::mb(1), ..cfg }; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let invalid_cfg = UnifiedReadPoolConfig { max_tasks_per_worker: 1, ..cfg }; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let invalid_cfg = UnifiedReadPoolConfig { min_thread_count: 1, max_thread_count: cmp::max( @@ -1963,7 +1963,7 @@ mod unified_read_pool_tests { ) + 1, ..cfg }; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); } } @@ -2258,7 +2258,7 @@ mod readpool_tests { stack_size: ReadableSize::mb(0), max_tasks_per_worker: 0, }; - assert!(unified.validate().is_err()); + unified.validate().unwrap_err(); let storage = StorageReadPoolConfig { use_unified_pool: Some(false), ..Default::default() @@ -2285,7 +2285,7 @@ mod readpool_tests { high_concurrency: 0, ..Default::default() }; - assert!(storage.validate().is_err()); + storage.validate().unwrap_err(); let coprocessor = CoprReadPoolConfig { use_unified_pool: Some(false), ..Default::default() @@ -2296,7 +2296,7 @@ mod readpool_tests { coprocessor, }; assert!(!invalid_cfg.is_unified_pool_enabled()); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); } #[test] @@ -2307,7 +2307,7 @@ mod readpool_tests { max_thread_count: 0, ..Default::default() }; - assert!(unified.validate().is_err()); + unified.validate().unwrap_err(); let storage = StorageReadPoolConfig { use_unified_pool: Some(true), ..Default::default() @@ -2322,7 +2322,7 @@ mod readpool_tests { }; cfg.adjust_use_unified_pool(); assert!(cfg.is_unified_pool_enabled()); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } #[test] @@ -2366,7 +2366,7 @@ mod readpool_tests { ..Default::default() }; assert!(cfg.is_unified_pool_enabled()); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); cfg.storage.low_concurrency = 1; cfg.validate().unwrap(); @@ -2387,7 +2387,7 @@ mod readpool_tests { ..Default::default() }; assert!(cfg.is_unified_pool_enabled()); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); cfg.coprocessor.low_concurrency = 1; cfg.validate().unwrap(); } @@ -2711,7 +2711,7 @@ pub struct LogConfig { } /// LogLevel is a wrapper type of `slog::Level` -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq)] pub struct LogLevel(slog::Level); impl From for slog::Level { @@ -4131,7 +4131,7 @@ mod tests { let mut last_cfg = TiKvConfig::default(); tikv_cfg.rocksdb.wal_dir = "/data/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); - assert!(tikv_cfg.check_critical_cfg_with(&last_cfg).is_err()); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); last_cfg.rocksdb.wal_dir = "/data/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); @@ -4141,7 +4141,7 @@ mod tests { let mut last_cfg = TiKvConfig::default(); tikv_cfg.storage.data_dir = "/data1".to_owned(); tikv_cfg.validate().unwrap(); - assert!(tikv_cfg.check_critical_cfg_with(&last_cfg).is_err()); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); last_cfg.storage.data_dir = "/data1".to_owned(); tikv_cfg.validate().unwrap(); @@ -4155,7 +4155,7 @@ mod tests { tikv_cfg.raft_engine.mut_config().dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); - assert!(tikv_cfg.check_critical_cfg_with(&last_cfg).is_err()); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); last_cfg.raft_engine.mut_config().dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); @@ -4169,7 +4169,7 @@ mod tests { tikv_cfg.raftdb.wal_dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); - assert!(tikv_cfg.check_critical_cfg_with(&last_cfg).is_err()); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); last_cfg.raftdb.wal_dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); @@ -4177,7 +4177,7 @@ mod tests { tikv_cfg.raft_store.raftdb_path = "/raft_path".to_owned(); tikv_cfg.validate().unwrap(); - assert!(tikv_cfg.check_critical_cfg_with(&last_cfg).is_err()); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); last_cfg.raft_store.raftdb_path = "/raft_path".to_owned(); tikv_cfg.validate().unwrap(); @@ -4314,7 +4314,7 @@ mod tests { tikv_cfg.pd.endpoints = vec!["".to_owned()]; let dur = tikv_cfg.raft_store.raft_heartbeat_interval(); tikv_cfg.server.grpc_keepalive_time = ReadableDuration(dur); - assert!(tikv_cfg.validate().is_err()); + tikv_cfg.validate().unwrap_err(); tikv_cfg.server.grpc_keepalive_time = ReadableDuration(dur * 2); tikv_cfg.validate().unwrap(); } @@ -4328,7 +4328,7 @@ mod tests { tikv_cfg.rocksdb.writecf.block_size = ReadableSize::gb(10); tikv_cfg.rocksdb.raftcf.block_size = ReadableSize::gb(10); tikv_cfg.raftdb.defaultcf.block_size = ReadableSize::gb(10); - assert!(tikv_cfg.validate().is_err()); + tikv_cfg.validate().unwrap_err(); tikv_cfg.rocksdb.defaultcf.block_size = ReadableSize::kb(10); tikv_cfg.rocksdb.lockcf.block_size = ReadableSize::kb(10); tikv_cfg.rocksdb.writecf.block_size = ReadableSize::kb(10); @@ -4442,7 +4442,7 @@ mod tests { for (name, value) in cases { let mut change = HashMap::new(); change.insert(name, value); - assert!(to_config_change(change).is_err()); + to_config_change(change).unwrap_err(); } } @@ -4610,21 +4610,15 @@ mod tests { cfg_controller.register(Module::ResolvedTs, Box::new(TestConfigManager(tx))); // Return error if try to update not support config or unknow config - assert!( - cfg_controller - .update_config("resolved-ts.enable", "false") - .is_err() - ); - assert!( - cfg_controller - .update_config("resolved-ts.scan-lock-pool-size", "10") - .is_err() - ); - assert!( - cfg_controller - .update_config("resolved-ts.xxx", "false") - .is_err() - ); + cfg_controller + .update_config("resolved-ts.enable", "false") + .unwrap_err(); + cfg_controller + .update_config("resolved-ts.scan-lock-pool-size", "10") + .unwrap_err(); + cfg_controller + .update_config("resolved-ts.xxx", "false") + .unwrap_err(); let mut resolved_ts_cfg = cfg_controller.get_current().resolved_ts; // Default value @@ -4644,11 +4638,9 @@ mod tests { ); // Return error if try to update `advance-ts-interval` to an invalid value - assert!( - cfg_controller - .update_config("resolved-ts.advance-ts-interval", "0m") - .is_err() - ); + cfg_controller + .update_config("resolved-ts.advance-ts-interval", "0m") + .unwrap_err(); assert_eq!( resolved_ts_cfg.advance_ts_interval, ReadableDuration::millis(100) @@ -4738,11 +4730,9 @@ mod tests { // Can not update block cache through storage module // when shared block cache is disabled - assert!( - cfg_controller - .update_config("storage.block-cache.capacity", "512MB") - .is_err() - ); + cfg_controller + .update_config("storage.block-cache.capacity", "512MB") + .unwrap_err(); } #[test] @@ -4778,11 +4768,9 @@ mod tests { let db = storage.get_engine().get_rocksdb(); // Can not update shared block cache through rocksdb module - assert!( - cfg_controller - .update_config("rocksdb.defaultcf.block-cache-size", "256MB") - .is_err() - ); + cfg_controller + .update_config("rocksdb.defaultcf.block-cache-size", "256MB") + .unwrap_err(); cfg_controller .update_config("storage.block-cache.capacity", "256MB") @@ -4809,11 +4797,9 @@ mod tests { LogLevel(Level::Warning) ); - assert!( - cfg_controller - .update_config("log.level", "invalid") - .is_err() - ); + cfg_controller + .update_config("log.level", "invalid") + .unwrap_err(); assert_eq!( cfg_controller.get_current().log.level, LogLevel(Level::Warning) @@ -4882,7 +4868,7 @@ mod tests { res.unwrap(); (size, std::cmp::max(size / 2, 1)) } else { - assert!(res.is_err()); + res.unwrap_err(); (origin_pool_size, origin_pool_size_high) }; assert_eq!( @@ -4938,11 +4924,9 @@ mod tests { assert_eq!(cfg_controller.get_current(), cfg); // u64::MAX ns casts to 213503d. - assert!( - cfg_controller - .update_config("quota.max-delay-duration", "213504d") - .is_err() - ); + cfg_controller + .update_config("quota.max-delay-duration", "213504d") + .unwrap_err(); assert_eq!(cfg_controller.get_current(), cfg); cfg_controller @@ -5211,7 +5195,7 @@ mod tests { // Test validating memory_usage_limit when it's greater than max. cfg.memory_usage_limit = Some(ReadableSize(SysQuota::memory_limit_in_bytes() * 2)); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); // Test memory_usage_limit is based on block cache size if it's not configured. cfg.memory_usage_limit = None; @@ -5250,7 +5234,7 @@ mod tests { let mut cfg = TiKvConfig::default(); cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "db"); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } { @@ -5259,7 +5243,7 @@ mod tests { cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } { @@ -5268,14 +5252,14 @@ mod tests { cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb", "db"); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } { let mut cfg = TiKvConfig::default(); cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } { @@ -5643,7 +5627,7 @@ mod tests { let r = panic_hook::recover_safe(|| { let _: DefaultCfConfig = toml::from_str(bad_string_config).unwrap(); }); - assert!(r.is_err()); + r.unwrap_err(); let bad_string_config = r#" compaction-style = 4 @@ -5651,7 +5635,7 @@ mod tests { let r = panic_hook::recover_safe(|| { let _: DefaultCfConfig = toml::from_str(bad_string_config).unwrap(); }); - assert!(r.is_err()); + r.unwrap_err(); // rate-limiter-mode default values is 2 let config_str = r#" diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index d07d9bd5bd6..677490a4b31 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -887,7 +887,7 @@ mod tests { None, PerfLevel::EnableCount, ); - assert!(block_on(copr.handle_unary_request(outdated_req_ctx, handler_builder)).is_err()); + block_on(copr.handle_unary_request(outdated_req_ctx, handler_builder)).unwrap_err(); } #[test] @@ -1038,7 +1038,7 @@ mod tests { // verify for _ in 2..5 { - assert!(rx.recv().unwrap().is_err()); + rx.recv().unwrap().unwrap_err(); } for i in 0..2 { let resp = rx.recv().unwrap().unwrap(); diff --git a/src/coprocessor/interceptors/concurrency_limiter.rs b/src/coprocessor/interceptors/concurrency_limiter.rs index c77eab86f16..590dd5d7180 100644 --- a/src/coprocessor/interceptors/concurrency_limiter.rs +++ b/src/coprocessor/interceptors/concurrency_limiter.rs @@ -152,7 +152,7 @@ mod tests { // Light tasks should run without any semaphore permit let smp2 = smp.clone(); tokio::spawn(timeout(Duration::from_millis(250), async move { - limit_concurrency(work(2), &*smp2, Duration::from_millis(500)).await + limit_concurrency(work(2), &smp2, Duration::from_millis(500)).await })) .await .unwrap() @@ -164,7 +164,7 @@ mod tests { let smp2 = smp.clone(); let mut t1 = tokio::spawn( - async move { limit_concurrency(work(8), &*smp2, Duration::default()).await }, + async move { limit_concurrency(work(8), &smp2, Duration::default()).await }, ) .fuse(); @@ -172,7 +172,7 @@ mod tests { let smp2 = smp.clone(); let mut t2 = tokio::spawn( - async move { limit_concurrency(work(2), &*smp2, Duration::default()).await }, + async move { limit_concurrency(work(2), &smp2, Duration::default()).await }, ) .fuse(); diff --git a/src/server/config.rs b/src/server/config.rs index 88d167d2e64..1959b77df00 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -495,27 +495,27 @@ mod tests { let mut invalid_cfg = cfg.clone(); invalid_cfg.concurrent_send_snap_limit = 0; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let mut invalid_cfg = cfg.clone(); invalid_cfg.concurrent_recv_snap_limit = 0; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let mut invalid_cfg = cfg.clone(); invalid_cfg.end_point_recursion_limit = 0; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let mut invalid_cfg = cfg.clone(); invalid_cfg.grpc_memory_pool_quota = ReadableSize::mb(0); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); let mut invalid_cfg = cfg.clone(); invalid_cfg.end_point_request_max_handle_duration = ReadableDuration::secs(0); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); invalid_cfg = Config::default(); invalid_cfg.addr = "0.0.0.0:1000".to_owned(); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); invalid_cfg.advertise_addr = "127.0.0.1:1000".to_owned(); invalid_cfg.validate().unwrap(); @@ -526,25 +526,25 @@ mod tests { } assert!(invalid_cfg.advertise_status_addr.is_empty()); invalid_cfg.advertise_status_addr = "0.0.0.0:1000".to_owned(); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); invalid_cfg = Config::default(); invalid_cfg.advertise_addr = "127.0.0.1:1000".to_owned(); invalid_cfg.advertise_status_addr = "127.0.0.1:1000".to_owned(); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); invalid_cfg = Config::default(); invalid_cfg.max_grpc_send_msg_len = 0; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); invalid_cfg = Config::default(); invalid_cfg.grpc_stream_initial_window_size = ReadableSize(i32::MAX as u64 + 1); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); cfg.labels.insert("k1".to_owned(), "v1".to_owned()); cfg.validate().unwrap(); cfg.labels.insert("k2".to_owned(), "v2?".to_owned()); - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } #[test] diff --git a/src/server/debug.rs b/src/server/debug.rs index 831a2b85255..77f6962deb9 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -913,10 +913,10 @@ fn dump_default_cf_properties( let sst_files = collection .iter() .map(|(k, _)| { - Path::new(&*k) + Path::new(k) .file_name() .map(|f| f.to_str().unwrap()) - .unwrap_or(&*k) + .unwrap_or(k) .to_string() }) .collect::>() @@ -950,10 +950,10 @@ fn dump_write_cf_properties( let sst_files = collection .iter() .map(|(k, _)| { - Path::new(&*k) + Path::new(k) .file_name() .map(|f| f.to_str().unwrap()) - .unwrap_or(&*k) + .unwrap_or(k) .to_string() }) .collect::>() @@ -1987,7 +1987,7 @@ mod tests { region.set_start_key(b"k".to_vec()); region.set_end_key(b"z".to_vec()); - assert!(debugger.recreate_region(region.clone()).is_err()); + debugger.recreate_region(region.clone()).unwrap_err(); remove_region_state(1); remove_region_state(2); @@ -1996,7 +1996,7 @@ mod tests { region.set_start_key(b"z".to_vec()); region.set_end_key(b"".to_vec()); - assert!(debugger.recreate_region(region).is_err()); + debugger.recreate_region(region).unwrap_err(); } #[test] diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 5d26958ea41..cf988f9da37 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -266,7 +266,7 @@ mod tests { assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); let tablet_path = factory.tablet_path(1, 10); let result = factory.open_tablet_raw(&tablet_path, false); - assert!(result.is_err()); + result.unwrap_err(); factory .set_shared_block_cache_capacity(1024 * 1024) .unwrap(); @@ -290,7 +290,7 @@ mod tests { assert!(factory.is_tombstoned(1, 20)); factory.destroy_tablet(1, 20).unwrap(); let result = factory.open_tablet(1, 20); - assert!(result.is_err()); + result.unwrap_err(); assert!(!factory.is_single_engine()); } diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index bcfe87d6783..b2a6a9d02dc 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -321,7 +321,7 @@ impl GcMan self.wait_for_next_safe_point()?; // Don't need to run GC any more if compaction filter is enabled. - if !is_compaction_filter_allowed(&*self.cfg_tracker.value(), &self.feature_gate) { + if !is_compaction_filter_allowed(&self.cfg_tracker.value(), &self.feature_gate) { set_status_metrics(GcManagerState::Working); self.gc_a_round()?; if let Some(on_finished) = self.cfg.post_a_round_of_gc.as_ref() { @@ -451,7 +451,7 @@ impl GcMan // periodically. If it's updated, rewinding will happen. loop { self.gc_manager_ctx.check_stopped()?; - if is_compaction_filter_allowed(&*self.cfg_tracker.value(), &self.feature_gate) { + if is_compaction_filter_allowed(&self.cfg_tracker.value(), &self.feature_gate) { return Ok(()); } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index dcdb075d256..131efd68fac 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -2064,17 +2064,15 @@ mod tests { } // Then, it will fail to schedule another gc command. let (tx, rx) = mpsc::channel(); - assert!( - gc_worker - .gc( - TimeStamp::from(1), - Box::new(move |res| { - tx.send(res).unwrap(); - }) - ) - .is_err() - ); - assert!(rx.recv().unwrap().is_err()); + gc_worker + .gc( + TimeStamp::from(1), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap_err(); + rx.recv().unwrap().unwrap_err(); let (tx, rx) = mpsc::channel(); // When the gc_worker is full, scheduling an unsafe destroy range task should be diff --git a/src/server/lock_manager/client.rs b/src/server/lock_manager/client.rs index c71bec0b63a..ba4e77810c3 100644 --- a/src/server/lock_manager/client.rs +++ b/src/server/lock_manager/client.rs @@ -59,14 +59,13 @@ impl Client { let (sink, receiver) = self.client.detect().unwrap(); let send_task = Box::pin(async move { let mut sink = sink.sink_map_err(Error::Grpc); - let res = sink - .send_all(&mut rx.map(|r| Ok((r, WriteFlags::default())))) + + sink.send_all(&mut rx.map(|r| Ok((r, WriteFlags::default())))) .await .map(|_| { info!("cancel detect sender"); sink.get_mut().cancel(); - }); - res + }) }); self.sender = Some(tx); diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index ab60f969493..de72a642837 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -128,9 +128,7 @@ fn on_write_result(mut write_resp: WriteResponse) -> Result> where S: Snapshot, { - if let Err(e) = check_raft_cmd_response(&mut write_resp.response) { - return Err(e); - } + check_raft_cmd_response(&mut write_resp.response)?; let resps = write_resp.response.take_responses(); Ok(CmdRes::Resp(resps.into())) } @@ -139,9 +137,7 @@ fn on_read_result(mut read_resp: ReadResponse) -> Result> where S: Snapshot, { - if let Err(e) = check_raft_cmd_response(&mut read_resp.response) { - return Err(e); - } + check_raft_cmd_response(&mut read_resp.response)?; let resps = read_resp.response.take_responses(); if let Some(mut snapshot) = read_resp.snapshot { snapshot.term = NonZeroU64::new(read_resp.response.get_header().get_current_term()); @@ -201,7 +197,7 @@ where req: Request, cb: Callback>, ) -> Result<()> { - let mut header = self.new_request_header(&*ctx.pb_ctx); + let mut header = self.new_request_header(ctx.pb_ctx); if ctx.pb_ctx.get_stale_read() && !ctx.start_ts.is_zero() { let mut data = [0u8; 8]; (&mut data[..]) diff --git a/src/server/resolve.rs b/src/server/resolve.rs index 404cee0e613..acf60ae783f 100644 --- a/src/server/resolve.rs +++ b/src/server/resolve.rs @@ -270,7 +270,7 @@ mod tests { fn test_resolve_store_state_tombstone() { let store = new_store(STORE_ADDR, metapb::StoreState::Tombstone); let runner = new_runner(store); - assert!(runner.get_address(0).is_err()); + runner.get_address(0).unwrap_err(); } #[test] diff --git a/src/server/server.rs b/src/server/server.rs index 5c0ace9d7b1..f202e30e761 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -320,7 +320,7 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En server.shutdown(); } if let Some(pool) = self.stats_pool.take() { - let _ = pool.shutdown_background(); + pool.shutdown_background(); } let _ = self.yatp_read_pool.take(); self.health_service.shutdown(); diff --git a/src/server/snap.rs b/src/server/snap.rs index b785c455921..e88fbd21fc9 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -186,7 +186,7 @@ pub fn send_snap( match recv_result { Ok(_) => { fail_point!("snapshot_delete_after_send"); - mgr.delete_snapshot(&key, &*chunks.snap, true); + mgr.delete_snapshot(&key, &chunks.snap, true); // TODO: improve it after rustc resolves the bug. // Call `info` in the closure directly will cause rustc // panic with `Cannot create local mono-item for DefId`. @@ -292,7 +292,7 @@ fn recv_snap + 'static>( defer!(snap_mgr.deregister(&context_key, &SnapEntry::Receiving)); while let Some(item) = stream.next().await { fail_point!("receiving_snapshot_net_error", |_| { - return Err(box_err!("{} failed to receive snapshot", context_key)); + Err(box_err!("{} failed to receive snapshot", context_key)) }); let mut chunk = item?; let data = chunk.take_data(); diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 446711bef30..3419c7df0c8 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -278,7 +278,7 @@ where { let mut id = 0; while let Some(res) = period.next().await { - let _ = res?; + res?; id += 1; let path = format!("{}/{:0>6}{}", dir, id, HEAP_PROFILE_SUFFIX); dump_prof(&path).map_err(|e| format!("dump_prof: {}", e))?; @@ -394,7 +394,7 @@ mod tests { assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); drop(tx1); - assert!(block_on(res1).unwrap().is_err()); + block_on(res1).unwrap().unwrap_err(); } #[test] @@ -439,7 +439,7 @@ mod tests { let (mut tx, rx) = mpsc::channel(1); let res = rt.spawn(activate_heap_profile(rx, std::env::temp_dir(), || {})); block_on(tx.send(Err("test".to_string()))).unwrap(); - assert!(block_on(res).unwrap().is_err()); + block_on(res).unwrap().unwrap_err(); // Test heap profiling can be activated again. let (tx, rx) = sync_channel::(1); diff --git a/src/storage/config.rs b/src/storage/config.rs index 4bfc664629f..7f2e6820201 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -386,10 +386,10 @@ mod tests { cfg.validate().unwrap(); cfg.scheduler_worker_pool_size = 0; - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); cfg.scheduler_worker_pool_size = max_pool_size + 1; - assert!(cfg.validate().is_err()); + cfg.validate().unwrap_err(); } #[test] diff --git a/src/storage/mod.rs b/src/storage/mod.rs index ef9aecf02ad..966b6095310 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -8931,7 +8931,7 @@ mod tests { .unwrap(); // DummyLockManager just drops the callback, so it will fail to receive // anything. - assert!(rx.recv().is_err()); + rx.recv().unwrap_err(); let (tx, rx) = channel(); storage diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 07d0093e71c..1a554a4410b 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -488,7 +488,7 @@ pub mod tests { if check_lock(&mut reader, key, ts).is_err() { return; } - assert!(reader.get(key, ts).is_err()); + reader.get(key, ts).unwrap_err(); } pub fn must_locked(engine: &E, key: &[u8], start_ts: impl Into) -> Lock { diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 434d0948310..7c521bb5952 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -465,7 +465,7 @@ mod tests { } fn must_get_err(point_getter: &mut PointGetter, key: &[u8]) { - assert!(point_getter.get(&Key::from_raw(key)).is_err()); + point_getter.get(&Key::from_raw(key)).unwrap_err(); } fn assert_seek_next_prev(stat: &CfStatistics, seek: usize, next: usize, prev: usize) { @@ -1152,15 +1152,15 @@ mod tests { (b"k9", None), ]; - for (k, v) in &expected_results { + for (k, v) in expected_results.iter().copied() { let mut single_getter = new_point_getter(&engine, 40.into()); - let value = single_getter.get(&Key::from_raw(*k)).unwrap(); + let value = single_getter.get(&Key::from_raw(k)).unwrap(); assert_eq!(value, v.map(|v| v.to_vec())); } let mut getter = new_point_getter(&engine, 40.into()); - for (k, v) in &expected_results { - let value = getter.get(&Key::from_raw(*k)).unwrap(); + for (k, v) in expected_results { + let value = getter.get(&Key::from_raw(k)).unwrap(); assert_eq!(value, v.map(|v| v.to_vec())); } } diff --git a/src/storage/mvcc/reader/scanner/backward.rs b/src/storage/mvcc/reader/scanner/backward.rs index 7e3d677ea52..6ade614e848 100644 --- a/src/storage/mvcc/reader/scanner/backward.rs +++ b/src/storage/mvcc/reader/scanner/backward.rs @@ -1499,7 +1499,7 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(key2), val22.to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); // Scanner has met a lock though lock.ts > read_ts. let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1529,6 +1529,6 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(key1), val1.to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); } } diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index d2c5e8b6a1b..a7a839cf2e7 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -1550,7 +1550,7 @@ mod latest_kv_tests { scanner.next().unwrap(), Some((Key::from_raw(key1), val1.to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); // Scanner has met a lock though lock.ts > read_ts. let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1583,7 +1583,7 @@ mod latest_kv_tests { scanner.next().unwrap(), Some((Key::from_raw(key5), val5.to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); } } diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 3dd95d4045d..1517ad67c78 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -446,7 +446,7 @@ pub(crate) mod tests { must_commit(&engine, k1, 1, 2); // "k1" already exist, returns AlreadyExist error. - assert!(try_prewrite_check_not_exists(&engine, k1, k1, 3).is_err()); + try_prewrite_check_not_exists(&engine, k1, k1, 3).unwrap_err(); // Delete "k1" must_prewrite_delete(&engine, k1, k1, 4); @@ -461,7 +461,7 @@ pub(crate) mod tests { // Rollback must_prewrite_put(&engine, k1, v3, k1, 9); must_rollback(&engine, k1, 9, false); - assert!(try_prewrite_check_not_exists(&engine, k1, k1, 10).is_err()); + try_prewrite_check_not_exists(&engine, k1, k1, 10).unwrap_err(); // Delete "k1" again must_prewrite_delete(&engine, k1, k1, 11); @@ -479,7 +479,7 @@ pub(crate) mod tests { fn test_mvcc_txn_pessmistic_prewrite_check_not_exist() { let engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; - assert!(try_pessimistic_prewrite_check_not_exists(&engine, k, k, 3).is_err()) + try_pessimistic_prewrite_check_not_exists(&engine, k, k, 3).unwrap_err(); } #[test] @@ -792,17 +792,15 @@ pub(crate) mod tests { let cm = ConcurrencyManager::new(10.into()); let mut txn = MvccTxn::new(5.into(), cm.clone()); let mut reader = SnapshotReader::new(5.into(), snapshot, true); - assert!( - prewrite( - &mut txn, - &mut reader, - &txn_props(5.into(), key, CommitKind::TwoPc, None, 0, false), - Mutation::make_put(Key::from_raw(key), value.to_vec()), - &None, - false, - ) - .is_err() - ); + prewrite( + &mut txn, + &mut reader, + &txn_props(5.into(), key, CommitKind::TwoPc, None, 0, false), + Mutation::make_put(Key::from_raw(key), value.to_vec()), + &None, + false, + ) + .unwrap_err(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut txn = MvccTxn::new(5.into(), cm); @@ -990,7 +988,7 @@ pub(crate) mod tests { // start_ts = 5, commit_ts = 15, Lock must_get(&engine, k, 19, v); - assert!(try_prewrite_insert(&engine, k, v, k, 20).is_err()); + try_prewrite_insert(&engine, k, v, k, 20).unwrap_err(); } #[test] diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index 2f3a2c84b11..f80e61f93ad 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -213,7 +213,7 @@ pub fn make_rollback( } } -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq)] pub enum MissingLockAction { Rollback, ProtectedRollback, diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 8435479991e..456757285e0 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -151,7 +151,7 @@ pub mod tests { let cm = ConcurrencyManager::new(start_ts); let mut txn = MvccTxn::new(start_ts, cm); let mut reader = SnapshotReader::new(start_ts, snapshot, true); - assert!(commit(&mut txn, &mut reader, Key::from_raw(key), commit_ts.into()).is_err()); + commit(&mut txn, &mut reader, Key::from_raw(key), commit_ts.into()).unwrap_err(); } #[cfg(test)] diff --git a/src/storage/txn/actions/tests.rs b/src/storage/txn/actions/tests.rs index acbd7a7f1a7..e5e4b57054c 100644 --- a/src/storage/txn/actions/tests.rs +++ b/src/storage/txn/actions/tests.rs @@ -486,17 +486,15 @@ pub fn must_prewrite_lock_err( let mut txn = MvccTxn::new(ts, cm); let mut reader = SnapshotReader::new(ts, snapshot, true); - assert!( - prewrite( - &mut txn, - &mut reader, - &default_txn_props(ts, pk, TimeStamp::zero()), - Mutation::make_lock(Key::from_raw(key)), - &None, - false, - ) - .is_err() - ); + prewrite( + &mut txn, + &mut reader, + &default_txn_props(ts, pk, TimeStamp::zero()), + Mutation::make_lock(Key::from_raw(key)), + &None, + false, + ) + .unwrap_err(); } pub fn must_pessimistic_prewrite_lock( @@ -539,14 +537,12 @@ pub fn must_rollback_err(engine: &E, key: &[u8], start_ts: impl Into< let cm = ConcurrencyManager::new(start_ts); let mut txn = MvccTxn::new(start_ts, cm); let mut reader = SnapshotReader::new(start_ts, snapshot, true); - assert!( - txn::cleanup( - &mut txn, - &mut reader, - Key::from_raw(key), - TimeStamp::zero(), - false, - ) - .is_err() - ); + txn::cleanup( + &mut txn, + &mut reader, + Key::from_raw(key), + TimeStamp::zero(), + false, + ) + .unwrap_err(); } diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 8cb901187dd..2b36d6d8821 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -38,7 +38,7 @@ const MAX_THROTTLE_SPEED: f64 = 200.0 * 1024.0 * 1024.0; // 200MB const EMA_FACTOR: f64 = 0.6; // EMA stands for Exponential Moving Average -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] enum Trend { Increasing, Decreasing, diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index c85bd828c08..0cd6c5b173b 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -1084,7 +1084,9 @@ mod tests { store.get(&Key::from_raw(b"ca"), &mut statistics).unwrap(), Some(b"hello".to_vec()) ); - assert!(store.get(&Key::from_raw(b"bba"), &mut statistics).is_err()); + store + .get(&Key::from_raw(b"bba"), &mut statistics) + .unwrap_err(); assert_eq!( store.get(&Key::from_raw(b"bbaa"), &mut statistics).unwrap(), None @@ -1115,7 +1117,9 @@ mod tests { store.get(&Key::from_raw(b"ab"), &mut statistics).unwrap(), Some(b"bar".to_vec()) ); - assert!(store.get(&Key::from_raw(b"zz"), &mut statistics).is_err()); + store + .get(&Key::from_raw(b"zz"), &mut statistics) + .unwrap_err(); assert_eq!( store.get(&Key::from_raw(b"z"), &mut statistics).unwrap(), Some(b"beta".to_vec()) @@ -1147,7 +1151,7 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(b"bb"), b"alphaalpha".to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); assert_eq!( scanner.next().unwrap(), Some((Key::from_raw(b"ca"), b"hello".to_vec())) @@ -1156,13 +1160,13 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(b"z"), b"beta".to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); // note: mvcc impl does not guarantee to work any more after meeting a non lock // error assert_eq!(scanner.next().unwrap(), None); let mut scanner = store.scanner(true, false, false, None, None).unwrap(); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); // note: mvcc impl does not guarantee to work any more after meeting a non lock // error assert_eq!( @@ -1173,7 +1177,7 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(b"ca"), b"hello".to_vec())) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); assert_eq!( scanner.next().unwrap(), Some((Key::from_raw(b"bb"), b"alphaalpha".to_vec())) @@ -1214,13 +1218,13 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(b"bb"), vec![])) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); assert_eq!( scanner.next().unwrap(), Some((Key::from_raw(b"ca"), vec![])) ); assert_eq!(scanner.next().unwrap(), Some((Key::from_raw(b"z"), vec![]))); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); // note: mvcc impl does not guarantee to work any more after meeting a non lock // error assert_eq!(scanner.next().unwrap(), None); @@ -1278,7 +1282,7 @@ mod tests { scanner.next().unwrap(), Some((Key::from_raw(b"bb"), vec![])) ); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); assert_eq!(scanner.next().unwrap(), None); let mut scanner = store @@ -1316,7 +1320,7 @@ mod tests { Some(Key::from_raw(b"bba")), ) .unwrap(); - assert!(scanner.next().is_err()); + scanner.next().unwrap_err(); assert_eq!( scanner.next().unwrap(), Some((Key::from_raw(b"bb"), vec![])) diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index 70194b194ac..d4219808af0 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -270,7 +270,7 @@ fn test_redundant_conf_change_by_snapshot() { fail::cfg("apply_on_conf_change_3_1", "off").unwrap(); cluster.must_transfer_leader(1, new_peer(3, 3)); - assert!(rx.try_recv().is_err()); + rx.try_recv().unwrap_err(); fail::remove("apply_on_conf_change_3_1"); } diff --git a/tests/failpoints/cases/test_encryption.rs b/tests/failpoints/cases/test_encryption.rs index 502e31afff9..8b73188e569 100644 --- a/tests/failpoints/cases/test_encryption.rs +++ b/tests/failpoints/cases/test_encryption.rs @@ -23,7 +23,7 @@ fn test_file_dict_file_record_corrupted() { fail::remove("file_dict_log_append_incomplete"); file_dict_file.insert("info2", &info2).unwrap(); // Intermediate record damage is not allowed. - assert!(file_dict_file.recovery().is_err()); + file_dict_file.recovery().unwrap_err(); let mut file_dict_file = FileDictionaryFile::new( tempdir.path(), diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 8ef0f08f19e..6bbed4ac641 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -79,5 +79,5 @@ fn test_break_leadership_on_restart() { // Peer 3 shouldn't start a new election, otherwise the leader may step down // incorrectly. - assert!(rx.recv_timeout(Duration::from_secs(2)).is_err()); + rx.recv_timeout(Duration::from_secs(2)).unwrap_err(); } diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 92785fcfa1e..32bd2f05228 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1526,7 +1526,7 @@ fn test_retry_pending_prepare_merge_fail() { let rx = cluster.async_put(b"k1", b"v11").unwrap(); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); - assert!(rx.recv_timeout(Duration::from_millis(200)).is_err()); + rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); // Then, start merging. PrepareMerge should become pending because applied_index // is smaller than proposed_index. diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 22871994f82..eb22ac29e45 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -129,7 +129,7 @@ fn test_watch_global_config_on_closed_server() { let client = Arc::new(client); use futures::StreamExt; let j = std::thread::spawn(move || { - let _ = futures::executor::block_on(async move { + futures::executor::block_on(async move { let mut r = client.watch_global_config().unwrap(); let mut i: usize = 0; while let Some(r) = r.next().await { diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 7a6da017d99..5fe71834e45 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -59,7 +59,7 @@ fn test_wait_for_apply_index() { .async_command_on_node(3, request, cb) .unwrap(); // Must timeout here - assert!(rx.recv_timeout(Duration::from_millis(500)).is_err()); + rx.recv_timeout(Duration::from_millis(500)).unwrap_err(); fail::remove("on_apply_write_cmd"); // After write cmd applied, the follower read will be executed. @@ -794,7 +794,7 @@ fn test_read_index_lock_checking_on_false_leader() { // peer 1. But the lease of peer 1 has expired and it cannot get majority of // heartbeat. So, we cannot get the result here. let resp = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k1", true); - assert!(resp.recv_timeout(Duration::from_millis(300)).is_err()); + resp.recv_timeout(Duration::from_millis(300)).unwrap_err(); // Now, restore the network partition. Peer 1 should now become follower and // drop its pending read index request. Peer 2 cannot get the result now. @@ -805,7 +805,7 @@ fn test_read_index_lock_checking_on_false_leader() { ); cluster.sim.wl().add_recv_filter(2, recv_filter); cluster.clear_send_filters(); - assert!(resp.recv_timeout(Duration::from_millis(300)).is_err()); + resp.recv_timeout(Duration::from_millis(300)).unwrap_err(); // After cleaning all filters, peer 2 will retry and will get error. cluster.sim.wl().clear_recv_filters(2); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index aab1fe3d879..bf23267a06a 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -131,7 +131,7 @@ fn test_split_lost_request_vote() { assert_eq!(range.1, b"k2"); // Make sure the message has sent to peer 3. - let _sent = after_sent_rx + after_sent_rx .recv_timeout(Duration::from_millis(100)) .unwrap(); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 17e9957d947..7502fe6be4e 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -324,7 +324,7 @@ fn test_scale_scheduler_pool() { scale_pool(1); fail::cfg(snapshot_fp, "1*pause").unwrap(); // propose one prewrite to block the only worker - assert!(do_prewrite(b"k1", b"v1").is_err()); + do_prewrite(b"k1", b"v1").unwrap_err(); scale_pool(2); diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index c9f7a70ee09..de19d1a790c 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -323,10 +323,8 @@ fn test_max_commit_ts_error() { ) .unwrap(); thread::sleep(Duration::from_millis(200)); - assert!( - cm.read_key_check(&Key::from_raw(b"k1"), |_| Err(())) - .is_err() - ); + cm.read_key_check(&Key::from_raw(b"k1"), |_| Err(())) + .unwrap_err(); cm.update_max_ts(200.into()); let res = prewrite_rx.recv().unwrap().unwrap(); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 9ad2816d3d3..556549b8141 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -154,7 +154,7 @@ fn test_delete_lock_proposed_after_proposing_locks_impl(transfer_msg_count: usiz thread::spawn(move || tx.send(client.kv_cleanup(&req).unwrap()).unwrap()); thread::sleep(Duration::from_millis(200)); - assert!(resp_rx.try_recv().is_err()); + resp_rx.try_recv().unwrap_err(); for _ in 0..transfer_msg_count { cluster.transfer_leader(1, new_peer(2, 2)); @@ -231,7 +231,7 @@ fn test_delete_lock_proposed_before_proposing_locks() { thread::spawn(move || tx.send(client.kv_cleanup(&req).unwrap()).unwrap()); thread::sleep(Duration::from_millis(200)); - assert!(resp_rx.try_recv().is_err()); + resp_rx.try_recv().unwrap_err(); cluster.transfer_leader(1, new_peer(2, 2)); thread::sleep(Duration::from_millis(200)); @@ -318,7 +318,7 @@ fn test_read_lock_after_become_follower() { thread::spawn(move || tx.send(client.kv_prewrite(&req).unwrap()).unwrap()); thread::sleep(Duration::from_millis(200)); - assert!(resp_rx.try_recv().is_err()); + resp_rx.try_recv().unwrap_err(); // And pause applying the write on the leader. fail::cfg("on_apply_write_cmd", "pause").unwrap(); diff --git a/tests/failpoints/cases/test_ttl.rs b/tests/failpoints/cases/test_ttl.rs index 4748b1d0bbf..25ffcf6ff4c 100644 --- a/tests/failpoints/cases/test_ttl.rs +++ b/tests/failpoints/cases/test_ttl.rs @@ -87,14 +87,14 @@ fn test_ttl_checker_impl() { assert!(kvdb.get_value_cf(CF_DEFAULT, key4).unwrap().is_some()); assert!(kvdb.get_value_cf(CF_DEFAULT, key5).unwrap().is_some()); - let _ = check_ttl_and_compact_files(&kvdb, b"zr\0key1", b"zr\0key25", false); + check_ttl_and_compact_files(&kvdb, b"zr\0key1", b"zr\0key25", false); assert!(kvdb.get_value_cf(CF_DEFAULT, key1).unwrap().is_none()); assert!(kvdb.get_value_cf(CF_DEFAULT, key2).unwrap().is_some()); assert!(kvdb.get_value_cf(CF_DEFAULT, key3).unwrap().is_none()); assert!(kvdb.get_value_cf(CF_DEFAULT, key4).unwrap().is_some()); assert!(kvdb.get_value_cf(CF_DEFAULT, key5).unwrap().is_some()); - let _ = check_ttl_and_compact_files(&kvdb, b"zr\0key2", b"zr\0key6", false); + check_ttl_and_compact_files(&kvdb, b"zr\0key2", b"zr\0key6", false); assert!(kvdb.get_value_cf(CF_DEFAULT, key1).unwrap().is_none()); assert!(kvdb.get_value_cf(CF_DEFAULT, key2).unwrap().is_some()); assert!(kvdb.get_value_cf(CF_DEFAULT, key3).unwrap().is_none()); diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index c70ac41d902..20bb666ff3e 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -114,11 +114,9 @@ fn test_unsafe_recovery_execution_result_report() { true, ); // marjority is lost, can't propose command successfully. - assert!( - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .is_err() - ); + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); } cluster.must_enter_force_leader(region2.get_id(), nodes[0], vec![nodes[1], nodes[2]]); @@ -303,11 +301,9 @@ fn test_unsafe_recovery_demotion_reentrancy() { true, ); // marjority is lost, can't propose command successfully. - assert!( - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .is_err() - ); + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); } cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); @@ -408,11 +404,9 @@ fn test_unsafe_recovery_create_destroy_reentrancy() { true, ); // marjority is lost, can't propose command successfully. - assert!( - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .is_err() - ); + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); } cluster.must_enter_force_leader(region2.get_id(), nodes[0], vec![nodes[1], nodes[2]]); diff --git a/tests/integrations/config/dynamic/gc_worker.rs b/tests/integrations/config/dynamic/gc_worker.rs index 19e97058616..e3603d8cbab 100644 --- a/tests/integrations/config/dynamic/gc_worker.rs +++ b/tests/integrations/config/dynamic/gc_worker.rs @@ -17,7 +17,7 @@ fn test_gc_config_validate() { let mut invalid_cfg = GcConfig::default(); invalid_cfg.batch_keys = 0; - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); } fn setup_cfg_controller( diff --git a/tests/integrations/config/dynamic/pessimistic_txn.rs b/tests/integrations/config/dynamic/pessimistic_txn.rs index 78824d6ee95..b7496de182d 100644 --- a/tests/integrations/config/dynamic/pessimistic_txn.rs +++ b/tests/integrations/config/dynamic/pessimistic_txn.rs @@ -24,7 +24,7 @@ fn test_config_validate() { let mut invalid_cfg = Config::default(); invalid_cfg.wait_for_lock_timeout = ReadableDuration::millis(0); - assert!(invalid_cfg.validate().is_err()); + invalid_cfg.validate().unwrap_err(); } #[derive(Clone)] diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index bae6262aeb4..d1b34a3a498 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -191,7 +191,7 @@ fn test_update_raftstore_config() { ]; for cfg in invalid_cfgs { let change = new_changes(vec![cfg]); - assert!(cfg_controller.update(change).is_err()); + cfg_controller.update(change).unwrap_err(); // update failed, original config should not be changed. validate_store_cfg(&raft_store); diff --git a/tests/integrations/config/test_config_client.rs b/tests/integrations/config/test_config_client.rs index fa45d08b24a..4ceb5d3affc 100644 --- a/tests/integrations/config/test_config_client.rs +++ b/tests/integrations/config/test_config_client.rs @@ -33,23 +33,23 @@ fn test_update_config() { // update not support config let res = cfg_controller.update(change("server.addr", "localhost:3000")); - assert!(res.is_err()); + res.unwrap_err(); assert_eq!(cfg_controller.get_current(), cfg); // update to invalid config let res = cfg_controller.update(change("raftstore.raft-log-gc-threshold", "0")); - assert!(res.is_err()); + res.unwrap_err(); assert_eq!(cfg_controller.get_current(), cfg); // bad update request let res = cfg_controller.update(change("xxx.yyy", "0")); - assert!(res.is_err()); + res.unwrap_err(); let res = cfg_controller.update(change("raftstore.xxx", "0")); - assert!(res.is_err()); + res.unwrap_err(); let res = cfg_controller.update(change("raftstore.raft-log-gc-threshold", "10MB")); - assert!(res.is_err()); + res.unwrap_err(); let res = cfg_controller.update(change("raft-log-gc-threshold", "10MB")); - assert!(res.is_err()); + res.unwrap_err(); assert_eq!(cfg_controller.get_current(), cfg); } diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index a6ac43235f3..57566b91e75 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -509,7 +509,7 @@ fn test_pd_client_heartbeat_send_failed() { assert!(rsp.is_ok()); assert_eq!(rsp.unwrap().get_region_id(), 1); } else { - assert!(rsp.is_err()); + rsp.unwrap_err(); } let region = block_on(client.get_region_by_id(1)); @@ -519,7 +519,7 @@ fn test_pd_client_heartbeat_send_failed() { assert!(r.is_some()); assert_eq!(1, r.unwrap().get_id()); } else { - assert!(region.is_err()); + region.unwrap_err(); } }; // send fail if network is block. diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index e74f0979241..1caf4e31ea3 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -181,7 +181,7 @@ fn test_node_switch_api_version() { cluster.shutdown(); } else { // Should not be able to switch to `to_api`. - assert!(cluster.start().is_err()); + cluster.start().unwrap_err(); } } } diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index a88032671a3..b30a861e2fe 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -28,7 +28,7 @@ fn delete_old_data(engine: &E, id: u64) { } /// Allow lost situation. -#[derive(PartialEq, Eq, Clone, Copy)] +#[derive(PartialEq, Clone, Copy)] enum DataLost { /// The leader loses commit index. /// diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index ae04c0d12f2..4b69bd4129e 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -302,8 +302,8 @@ fn test_batch_id_in_lease(cluster: &mut Cluster) { let (split_key1, split_key2) = (b"k22", b"k44"); let keys = vec![b"k11", b"k33", b"k55"]; - let _ = keys.iter().map(|key| { - cluster.must_put(*key, b"v1"); + let _ = keys.iter().map(|&key| { + cluster.must_put(key, b"v1"); }); let region = pd_client.get_region(keys[0]).unwrap(); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index d378c55c5e6..f44b2f99642 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1636,9 +1636,9 @@ fn test_prepare_merge_with_5_nodes_snapshot() { pd_client.add_peer(left.get_id(), new_peer(5, 16)); // Make sure there will be no admin entries after min_matched. - for (k, v) in &[(b"k11", b"v11"), (b"k12", b"v12")] { - cluster.must_put(*k, *v); - must_get_equal(&cluster.get_engine(4), *k, *v); + for (k, v) in [(b"k11", b"v11"), (b"k12", b"v12")] { + cluster.must_put(k, v); + must_get_equal(&cluster.get_engine(4), k, v); } cluster.add_send_filter(IsolationFilterFactory::new(4)); // So index of peer 4 becomes min_matched. diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index a2ae4ab0f31..6deccad3a5e 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -101,7 +101,7 @@ fn test_replica_read_not_applied() { // Read index on follower should be blocked instead of get an old value. let resp1_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k1", true, true); - assert!(resp1_ch.recv_timeout(Duration::from_secs(1)).is_err()); + resp1_ch.recv_timeout(Duration::from_secs(1)).unwrap_err(); // Unpark all append responses so that the new leader can commit its first // entry. @@ -151,7 +151,7 @@ fn test_replica_read_on_hibernate() { // Read index on follower should be blocked. let resp1_ch = async_read_on_peer(&mut cluster, new_peer(1, 1), r1, b"k1", true, true); - assert!(resp1_ch.recv_timeout(Duration::from_secs(1)).is_err()); + resp1_ch.recv_timeout(Duration::from_secs(1)).unwrap_err(); let (tx, rx) = mpsc::sync_channel(1024); let cb = Arc::new(move |msg: &RaftMessage| { @@ -278,7 +278,9 @@ fn test_replica_read_on_stale_peer() { cluster.must_put(b"k2", b"v2"); let resp1_ch = async_read_on_peer(&mut cluster, peer_on_store3, region, b"k2", true, true); // must be timeout - assert!(resp1_ch.recv_timeout(Duration::from_micros(100)).is_err()); + resp1_ch + .recv_timeout(Duration::from_micros(100)) + .unwrap_err(); } #[test] @@ -311,7 +313,7 @@ fn test_read_index_out_of_order() { // Can't get read resonse because heartbeat responses are blocked. let r1 = cluster.get_region(b"k1"); let resp1 = async_read_on_peer(&mut cluster, new_peer(1, 1), r1.clone(), b"k1", true, true); - assert!(resp1.recv_timeout(Duration::from_secs(2)).is_err()); + resp1.recv_timeout(Duration::from_secs(2)).unwrap_err(); pd_client.must_remove_peer(rid, new_peer(2, 2)); @@ -353,8 +355,8 @@ fn test_read_index_retry_lock_checking() { let r1 = cluster.get_region(b"k1"); let resp1 = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1.clone(), b"k1", true); let resp2 = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k2", true); - assert!(resp1.recv_timeout(Duration::from_secs(2)).is_err()); - assert!(resp2.try_recv().is_err()); + resp1.recv_timeout(Duration::from_secs(2)).unwrap_err(); + resp2.try_recv().unwrap_err(); // k1 has a memory lock let leader_cm = cluster.sim.rl().get_concurrency_manager(1); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index a7664e8ccf0..6ac72f668db 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -689,7 +689,7 @@ fn test_split_epoch_not_match(cluster: &mut Cluster, right_deri cluster.must_split(&r, b"k4"); let regions: Vec<_> = [b"k0", b"k2", b"k3", b"k4"] .iter() - .map(|k| pd_client.get_region(*k).unwrap()) + .map(|&k| pd_client.get_region(k).unwrap()) .collect(); let new = regions[3].clone(); diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index cf2361ebc8e..a9cd40d2fff 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -19,11 +19,9 @@ fn confirm_quorum_is_lost(cluster: &mut Cluster, region: &metap true, ); // marjority is lost, can't propose command successfully. - assert!( - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .is_err() - ); + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); } #[test] @@ -854,11 +852,9 @@ fn test_force_leader_with_uncommitted_conf_change() { find_peer(®ion, 2).unwrap().clone(), ); let req = new_admin_request(region.get_id(), region.get_region_epoch(), cmd); - assert!( - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .is_err() - ); + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); // wait election timeout std::thread::sleep(Duration::from_millis( @@ -973,11 +969,9 @@ fn test_force_leader_on_wrong_leader() { find_peer(®ion, 3).unwrap().clone(), ); let req = new_admin_request(region.get_id(), region.get_region_epoch(), cmd); - assert!( - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .is_err() - ); + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); cluster.exit_force_leader(region.get_id(), 2); // peer on node2 still doesn't have the latest committed log. diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 366de3c0493..17b1e49f2e0 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1936,7 +1936,7 @@ fn test_txn_api_version() { // Pessimistic Lock ts += 1; let lock_ts = ts; - let _resp = must_kv_pessimistic_lock(&client, ctx.clone(), k.clone(), lock_ts); + must_kv_pessimistic_lock(&client, ctx.clone(), k.clone(), lock_ts); // Prewrite Pessimistic let mut mutation = Mutation::default(); diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index c3964ab39d8..7ee38a72c87 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -423,7 +423,7 @@ fn test_store_allowlist() { for _ in 0..3 { let mut raft_m = RaftMessage::default(); raft_m.mut_to_peer().set_store_id(1); - assert!(raft_client.send(raft_m).is_err()); + raft_client.send(raft_m).unwrap_err(); } for _ in 0..5 { let mut raft_m = RaftMessage::default(); diff --git a/tests/integrations/server/security.rs b/tests/integrations/server/security.rs index 71a0979a005..a0d7d53186d 100644 --- a/tests/integrations/server/security.rs +++ b/tests/integrations/server/security.rs @@ -44,5 +44,5 @@ fn test_check_cn_fail() { let client = TikvClient::new(channel); let status = client.kv_get(&GetRequest::default()); - assert!(status.is_err()); + status.unwrap_err(); } diff --git a/tests/integrations/storage/test_raft_storage.rs b/tests/integrations/storage/test_raft_storage.rs index ef1ee5402e6..98e60386884 100644 --- a/tests/integrations/storage/test_raft_storage.rs +++ b/tests/integrations/storage/test_raft_storage.rs @@ -56,10 +56,14 @@ fn test_raft_storage() { // Test wrong region id. let region_id = ctx.get_region_id(); ctx.set_region_id(region_id + 1); - assert!(storage.get(ctx.clone(), &key, 20).is_err()); - assert!(storage.batch_get(ctx.clone(), &[key.clone()], 20).is_err()); - assert!(storage.scan(ctx.clone(), key, None, 1, false, 20).is_err()); - assert!(storage.scan_locks(ctx, 20, None, None, 100).is_err()); + storage.get(ctx.clone(), &key, 20).unwrap_err(); + storage + .batch_get(ctx.clone(), &[key.clone()], 20) + .unwrap_err(); + storage + .scan(ctx.clone(), key, None, 1, false, 20) + .unwrap_err(); + storage.scan_locks(ctx, 20, None, None, 100).unwrap_err(); } #[test] @@ -147,7 +151,7 @@ fn test_raft_storage_store_not_match() { peer.set_store_id(store_id + 1); ctx.set_peer(peer); - assert!(storage.get(ctx.clone(), &key, 20).is_err()); + storage.get(ctx.clone(), &key, 20).unwrap_err(); let res = storage.get(ctx.clone(), &key, 20); if let StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Engine(KvError( box KvErrorInner::Request(ref e), @@ -157,9 +161,13 @@ fn test_raft_storage_store_not_match() { } else { panic!("expect store_not_match, but got {:?}", res); } - assert!(storage.batch_get(ctx.clone(), &[key.clone()], 20).is_err()); - assert!(storage.scan(ctx.clone(), key, None, 1, false, 20).is_err()); - assert!(storage.scan_locks(ctx, 20, None, None, 100).is_err()); + storage + .batch_get(ctx.clone(), &[key.clone()], 20) + .unwrap_err(); + storage + .scan(ctx.clone(), key, None, 1, false, 20) + .unwrap_err(); + storage.scan_locks(ctx, 20, None, None, 100).unwrap_err(); } #[test] @@ -350,8 +358,8 @@ fn test_auto_gc() { let split_keys: &[&[u8]] = &[b"k2", b"k4", b"k6", b"k8"]; for k in split_keys { - let region = cluster.get_region(*k); - cluster.must_split(®ion, *k); + let region = cluster.get_region(k); + cluster.must_split(®ion, k); } check_data(&mut cluster, &storages, &test_data, 50, true); diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 4f48cb72920..f99d9348616 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -530,5 +530,5 @@ fn wrong_context(ctx: &Context, engine: &E) { let region_id = ctx.get_region_id(); let mut ctx = ctx.to_owned(); ctx.set_region_id(region_id + 1); - assert!(engine.write(&ctx, WriteData::default()).is_err()); + engine.write(&ctx, WriteData::default()).unwrap_err(); } diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 7b1aab71183..b0c95eb9f7a 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -83,7 +83,7 @@ fn test_turnoff_titan() { // try reopen db when titan isn't properly turned off. configure_for_disable_titan(&mut cluster); - assert!(cluster.pre_start_check().is_err()); + cluster.pre_start_check().unwrap_err(); configure_for_enable_titan(&mut cluster, ReadableSize::kb(0)); cluster.pre_start_check().unwrap(); From dcb5e2ccd2582fc2f8d3425c27f9891368658154 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 4 Aug 2022 12:24:06 +0800 Subject: [PATCH 0133/1149] tikv_kv: support tablet level snapshot acquisition (#13039) close tikv/tikv#13050 Signed-off-by: SpadeA-Tang --- Cargo.lock | 7 + components/raftstore-v2/Cargo.toml | 6 + components/raftstore-v2/src/fsm/mod.rs | 2 +- components/raftstore-v2/src/fsm/store.rs | 30 +- components/raftstore-v2/src/operation/mod.rs | 2 + .../raftstore-v2/src/operation/read/mod.rs | 3 + .../raftstore-v2/src/operation/read/read.rs | 256 ++++++++++ components/raftstore-v2/src/tablet.rs | 3 +- components/raftstore/src/router.rs | 23 +- components/raftstore/src/store/fsm/store.rs | 1 - components/raftstore/src/store/mod.rs | 11 +- components/raftstore/src/store/peer.rs | 12 +- components/raftstore/src/store/worker/mod.rs | 5 +- components/raftstore/src/store/worker/read.rs | 460 +++++++++++++++--- components/server/src/server.rs | 13 +- components/test_raftstore/src/node.rs | 6 +- components/test_raftstore/src/server.rs | 8 +- components/tikv_kv/Cargo.toml | 1 + src/server/engine_factory.rs | 5 +- src/server/engine_factory_v2.rs | 13 +- 20 files changed, 760 insertions(+), 107 deletions(-) create mode 100644 components/raftstore-v2/src/operation/read/mod.rs create mode 100644 components/raftstore-v2/src/operation/read/read.rs diff --git a/Cargo.lock b/Cargo.lock index c5d22fc6e61..52b39154e91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4183,12 +4183,15 @@ dependencies = [ "batch-system", "collections", "crossbeam", + "engine_rocks", "engine_test", "engine_traits", "error_code", "fail", "futures-util", + "keys", "kvproto", + "log_wrappers", "pd_client", "raft", "raft-proto", @@ -4199,7 +4202,10 @@ dependencies = [ "tempfile", "test_pd", "test_util", + "tikv_kv", "tikv_util", + "time", + "txn_types", ] [[package]] @@ -6217,6 +6223,7 @@ dependencies = [ "backtrace", "engine_panic", "engine_rocks", + "engine_test", "engine_traits", "error_code", "fail", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 5cdd2ee747f..f6a827d7424 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -32,18 +32,24 @@ cloud-azure = ["raftstore/cloud-azure"] batch-system = { path = "../batch-system", default-features = false } collections = { path = "../collections" } crossbeam = "0.8" +engine_rocks = { path = "../engine_rocks", default-features = false } engine_traits = { path = "../engine_traits" } error_code = { path = "../error_code" } fail = "0.5" futures-util = { version = "0.3", features = ["compat"] } +keys = { path = "../keys", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } +log_wrappers = { path = "../log_wrappers" } pd_client = { path = "../pd_client" } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0" } raftstore = { path = "../raftstore" } slog = "2.3" smallvec = "1.4" +tikv_kv = { path = "../tikv_kv", default-features = false } tikv_util = { path = "../tikv_util", default-features = false } +time = "0.1" +txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] engine_test = { path = "../engine_test", default-features = false } diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 02f788d3be2..8126c8a868a 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -11,4 +11,4 @@ mod store; pub use apply::{ApplyFsm, ApplyFsmDelegate}; pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; -pub use store::{StoreFsm, StoreFsmDelegate}; +pub use store::{StoreFsm, StoreFsmDelegate, StoreMeta}; diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 257028f1630..886478a3036 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -1,12 +1,38 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use batch_system::Fsm; +use collections::HashMap; use crossbeam::channel::TryRecvError; +use engine_traits::KvEngine; use kvproto::metapb::Store; -use raftstore::store::Config; +use raftstore::store::{Config, ReadDelegate}; use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; -use crate::{batch::StoreContext, StoreMsg}; +use crate::{batch::StoreContext, tablet::CachedTablet, StoreMsg}; + +pub struct StoreMeta +where + E: KvEngine, +{ + pub store_id: Option, + /// region_id -> reader + pub readers: HashMap, + /// region_id -> tablet cache + pub tablet_caches: HashMap>, +} + +impl StoreMeta +where + E: KvEngine, +{ + pub fn new() -> StoreMeta { + StoreMeta { + store_id: None, + readers: HashMap::default(), + tablet_caches: HashMap::default(), + } + } +} pub struct StoreFsm { store: Store, diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index bb3db8c75d3..8c427378da3 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -1 +1,3 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod read; diff --git a/components/raftstore-v2/src/operation/read/mod.rs b/components/raftstore-v2/src/operation/read/mod.rs new file mode 100644 index 00000000000..8c427378da3 --- /dev/null +++ b/components/raftstore-v2/src/operation/read/mod.rs @@ -0,0 +1,3 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod read; diff --git a/components/raftstore-v2/src/operation/read/read.rs b/components/raftstore-v2/src/operation/read/read.rs new file mode 100644 index 00000000000..63878beeb22 --- /dev/null +++ b/components/raftstore-v2/src/operation/read/read.rs @@ -0,0 +1,256 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use std::{ + cell::Cell, + collections::HashMap, + fmt::{self, Display, Formatter}, + marker::PhantomData, + ops::Deref, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, + time::Duration, +}; + +use crossbeam::{atomic::AtomicCell, channel::TrySendError}; +use engine_traits::{KvEngine, RaftEngine, Snapshot, TabletFactory}; +use fail::fail_point; +use kvproto::{ + errorpb, + kvrpcpb::ExtraOp as TxnExtraOp, + metapb, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, ReadIndexResponse, Request, Response}, +}; +use pd_client::BucketMeta; +use raftstore::{ + errors::RAFTSTORE_IS_BUSY, + store::{ + cmd_resp, + util::{self, LeaseState, RegionReadProgress, RemoteLease}, + ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadMetrics, ReadProgress, ReadResponse, + RegionSnapshot, RequestInspector, RequestPolicy, TrackVer, TxnExt, + }, + Error, Result, +}; +use slog::{debug, error, info, o, warn, Logger}; +use tikv_util::{ + codec::number::decode_u64, + lru::LruCache, + time::{monotonic_raw_now, Instant, ThreadReadId}, +}; +use time::Timespec; + +use crate::{fsm::StoreMeta, tablet::CachedTablet}; + +/// CachedReadDelegate is a wrapper the ReadDelegate and CachedTablet. +/// CachedTablet can fetch the latest tablet of this ReadDelegate's region. The +/// main purpose of this wrapping is to implement ReadExecutor where the latest +/// tablet is needed. +pub struct CachedReadDelegate +where + E: KvEngine, +{ + // The reason for this to be Arc, see the comment on get_delegate in + // raftstore/src/store/worker/read.rs + delegate: Arc, + cached_tablet: CachedTablet, +} + +impl Deref for CachedReadDelegate +where + E: KvEngine, +{ + type Target = ReadDelegate; + + fn deref(&self) -> &Self::Target { + self.delegate.as_ref() + } +} + +impl Clone for CachedReadDelegate +where + E: KvEngine, +{ + fn clone(&self) -> Self { + CachedReadDelegate { + delegate: Arc::clone(&self.delegate), + cached_tablet: self.cached_tablet.clone(), + } + } +} + +impl ReadExecutor for CachedReadDelegate +where + E: KvEngine, +{ + fn get_tablet(&mut self) -> &E { + self.cached_tablet.latest().unwrap() + } + + fn get_snapshot( + &mut self, + _: Option, + _: &mut Option>, + ) -> Arc { + Arc::new(self.cached_tablet.latest().unwrap().snapshot()) + } +} + +#[derive(Clone)] +struct StoreMetaDelegate +where + E: KvEngine, +{ + store_meta: Arc>>, +} + +impl StoreMetaDelegate +where + E: KvEngine, +{ + pub fn new(store_meta: Arc>>) -> StoreMetaDelegate { + StoreMetaDelegate { store_meta } + } +} + +impl ReadExecutorProvider for StoreMetaDelegate +where + E: KvEngine, +{ + type Executor = CachedReadDelegate; + + fn store_id(&self) -> Option { + self.store_meta.as_ref().lock().unwrap().store_id + } + + /// get the ReadDelegate with region_id and the number of delegates in the + /// StoreMeta + fn get_executor_and_len(&self, region_id: u64) -> (usize, Option) { + let meta = self.store_meta.as_ref().lock().unwrap(); + let reader = meta.readers.get(®ion_id).cloned(); + if let Some(reader) = reader { + // If reader is not None, cache must not be None. + let cached_tablet = meta.tablet_caches.get(®ion_id).cloned().unwrap(); + return ( + meta.readers.len(), + Some(CachedReadDelegate { + delegate: Arc::new(reader), + cached_tablet, + }), + ); + } + (meta.readers.len(), None) + } +} + +#[cfg(test)] +mod tests { + use std::{borrow::Borrow, sync::mpsc::*, thread}; + + use crossbeam::channel::TrySendError; + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, KvTestSnapshot, TestTabletFactoryV2}, + }; + use engine_traits::{Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; + use kvproto::{metapb::Region, raft_cmdpb::*}; + use raftstore::store::{ + util::Lease, Callback, CasualMessage, CasualRouter, LocalReader, ProposalRouter, + RaftCommand, + }; + use tempfile::{Builder, TempDir}; + use tikv_kv::Snapshot; + use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; + use time::Duration; + use txn_types::{Key, Lock, LockType, WriteBatchFlags}; + + use super::*; + + fn new_read_delegate( + region: &Region, + peer_id: u64, + term: u64, + applied_index_term: u64, + ) -> ReadDelegate { + let mut read_delegate_core = ReadDelegate::mock(region.id); + read_delegate_core.peer_id = peer_id; + read_delegate_core.term = term; + read_delegate_core.applied_term = applied_index_term; + read_delegate_core.region = Arc::new(region.clone()); + read_delegate_core + } + + #[test] + fn test_read_delegate() { + // Building a tablet factory + let ops = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let path = Builder::new() + .prefix("test-local-reader") + .tempdir() + .unwrap(); + let factory = Arc::new(TestTabletFactoryV2::new( + path.path().to_str().unwrap(), + ops, + cf_opts, + )); + + let store_meta = + StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::::new()))); + + let tablet1; + let tablet2; + { + let mut meta = store_meta.store_meta.as_ref().lock().unwrap(); + + // Create read_delegate with region id 1 + let mut read_delegate = ReadDelegate::mock(1); + meta.readers.insert(1, read_delegate); + + // create tablet with region_id 1 and prepare some data + tablet1 = factory.create_tablet(1, 10).unwrap(); + tablet1.put_cf(CF_DEFAULT, b"a1", b"val1").unwrap(); + let cache = CachedTablet::new(Some(tablet1.clone())); + meta.tablet_caches.insert(1, cache); + + // Create read_delegate with region id 1 + let mut read_delegate = ReadDelegate::mock(2); + let cache = CachedTablet::new(Some(read_delegate.clone())); + meta.readers.insert(2, read_delegate); + + // create tablet with region_id 1 and prepare some data + tablet2 = factory.create_tablet(2, 10).unwrap(); + tablet2.put_cf(CF_DEFAULT, b"a2", b"val2").unwrap(); + let cache = CachedTablet::new(Some(tablet2.clone())); + meta.tablet_caches.insert(2, cache); + } + + let (_, delegate) = store_meta.get_executor_and_len(1); + let mut delegate = delegate.unwrap(); + let tablet = delegate.get_tablet(); + assert_eq!(tablet1.as_inner().path(), tablet.as_inner().path()); + let snapshot = delegate.get_snapshot(None, &mut None); + assert_eq!( + b"val1".to_vec(), + snapshot + .get(&Key::from_encoded(b"a1".to_vec())) + .unwrap() + .unwrap() + ); + + let (_, delegate) = store_meta.get_executor_and_len(2); + let mut delegate = delegate.unwrap(); + let tablet = delegate.get_tablet(); + assert_eq!(tablet2.as_inner().path(), tablet.as_inner().path()); + let snapshot = delegate.get_snapshot(None, &mut None); + assert_eq!( + b"val2".to_vec(), + snapshot + .get(&Key::from_encoded(b"a2".to_vec())) + .unwrap() + .unwrap() + ); + } +} diff --git a/components/raftstore-v2/src/tablet.rs b/components/raftstore-v2/src/tablet.rs index 8552b1a1f0f..7765f5c07b6 100644 --- a/components/raftstore-v2/src/tablet.rs +++ b/components/raftstore-v2/src/tablet.rs @@ -5,6 +5,7 @@ use std::sync::{ Arc, Mutex, }; +#[derive(Debug)] struct LatestTablet { data: Mutex>, version: AtomicU64, @@ -13,7 +14,7 @@ struct LatestTablet { /// Tablet may change during split, merge and applying snapshot. So we need a /// shared value to reflect the latest tablet. `CachedTablet` provide cache that /// can speed up common access. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct CachedTablet { latest: Arc>, cache: Option, diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 72d2bf8ca2b..400fee65813 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -12,9 +12,9 @@ use tikv_util::time::ThreadReadId; use crate::{ store::{ fsm::RaftRouter, - transport::{CasualRouter, ProposalRouter, SignificantRouter, StoreRouter}, - Callback, CasualMessage, LocalReader, PeerMsg, RaftCmdExtraOpts, RaftCommand, - SignificantMsg, StoreMsg, + transport::{CasualRouter, ProposalRouter, SignificantRouter}, + CachedReadDelegate, Callback, CasualMessage, LocalReader, PeerMsg, RaftCmdExtraOpts, + RaftCommand, SignificantMsg, StoreMetaDelegate, StoreMsg, StoreRouter, }, DiscardReason, Error as RaftStoreError, Result as RaftStoreResult, }; @@ -168,12 +168,21 @@ where } /// A router that routes messages to the raftstore -pub struct ServerRaftStoreRouter { +pub struct ServerRaftStoreRouter +where + EK: KvEngine, + ER: RaftEngine, +{ router: RaftRouter, - local_reader: RefCell, EK>>, + local_reader: + RefCell, EK, CachedReadDelegate, StoreMetaDelegate>>, } -impl Clone for ServerRaftStoreRouter { +impl Clone for ServerRaftStoreRouter +where + EK: KvEngine, + ER: RaftEngine, +{ fn clone(&self) -> Self { ServerRaftStoreRouter { router: self.router.clone(), @@ -186,7 +195,7 @@ impl ServerRaftStoreRouter { /// Creates a new router. pub fn new( router: RaftRouter, - reader: LocalReader, EK>, + reader: LocalReader, EK, CachedReadDelegate, StoreMetaDelegate>, ) -> ServerRaftStoreRouter { let local_reader = RefCell::new(reader); ServerRaftStoreRouter { diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 9e126d4d141..ecdb8653147 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -115,7 +115,6 @@ pub struct StoreInfo { } pub struct StoreMeta { - /// store id pub store_id: Option, /// region_end_key -> region_id pub region_ranges: BTreeMap, u64>, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index d75fef94323..d47cc892033 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -67,10 +67,11 @@ pub use self::{ txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ - AutoSplitController, Bucket, BucketRange, CheckLeaderRunner, CheckLeaderTask, - FlowStatistics, FlowStatsReporter, KeyEntry, LocalReader, PdTask, QueryStats, - RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, ReadStats, RefreshConfigTask, - RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, TrackVer, - WriteStats, + AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, + CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, + LocalReader, PdTask, QueryStats, RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, + ReadExecutor, ReadExecutorProvider, ReadMetrics, ReadProgress, ReadStats, + RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, + SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, }, }; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 7c57eeb9ae4..99287ca493c 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -79,7 +79,7 @@ use super::{ self, check_region_epoch, is_initial_msg, AdminCmdEpochState, ChangePeerI, ConfChangeKind, Lease, LeaseState, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER, }, - DestroyPeerJob, + DestroyPeerJob, LocalReadContext, }; use crate::{ coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason, RoleChange}, @@ -4653,7 +4653,7 @@ where } } - let mut resp = ctx.execute(&req, &Arc::new(region), read_index, None); + let mut resp = ctx.execute(&req, &Arc::new(region), read_index, None, None); if let Some(snap) = resp.snapshot.as_mut() { snap.txn_ext = Some(self.txn_ext.clone()); snap.bucket_meta = self.region_buckets.as_ref().map(|b| b.meta.clone()); @@ -5485,11 +5485,15 @@ where EK: KvEngine, ER: RaftEngine, { - fn get_engine(&self) -> &EK { + fn get_tablet(&mut self) -> &EK { &self.engines.kv } - fn get_snapshot(&mut self, _: Option) -> Arc { + fn get_snapshot( + &mut self, + _: Option, + _: &mut Option>, + ) -> Arc { Arc::new(self.engines.kv.snapshot()) } } diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 583e9341f0d..1651183f976 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -32,7 +32,10 @@ pub use self::{ query_stats::QueryStats, raftlog_fetch::{Runner as RaftlogFetchRunner, Task as RaftlogFetchTask}, raftlog_gc::{Runner as RaftlogGcRunner, Task as RaftlogGcTask}, - read::{LocalReader, Progress as ReadProgress, ReadDelegate, ReadExecutor, TrackVer}, + read::{ + CachedReadDelegate, LocalReadContext, LocalReader, Progress as ReadProgress, ReadDelegate, + ReadExecutor, ReadExecutorProvider, ReadMetrics, StoreMetaDelegate, TrackVer, + }, refresh_config::{ BatchComponent as RaftStoreBatchComponent, Runner as RefreshConfigRunner, Task as RefreshConfigTask, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index b7724789d4b..f3d52be5044 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -4,6 +4,7 @@ use std::{ cell::Cell, fmt::{self, Display, Formatter}, + ops::Deref, sync::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, @@ -42,16 +43,21 @@ use crate::{ Error, Result, }; +/// #[RaftstoreCommon] pub trait ReadExecutor { - fn get_engine(&self) -> &E; - fn get_snapshot(&mut self, ts: Option) -> Arc; + fn get_tablet(&mut self) -> &E; + fn get_snapshot( + &mut self, + ts: Option, + read_context: &mut Option>, + ) -> Arc; - fn get_value(&self, req: &Request, region: &metapb::Region) -> Result { + fn get_value(&mut self, req: &Request, region: &metapb::Region) -> Result { let key = req.get_get().get_key(); // region key range has no data prefix, so we must use origin key to check. util::check_key_in_region(key, region)?; - let engine = self.get_engine(); + let engine = self.get_tablet(); let mut resp = Response::default(); let res = if !req.get_get().get_cf().is_empty() { let cf = req.get_get().get_cf(); @@ -89,6 +95,7 @@ pub trait ReadExecutor { region: &Arc, read_index: Option, mut ts: Option, + mut read_context: Option>, ) -> ReadResponse { let requests = msg.get_requests(); let mut response = ReadResponse { @@ -112,8 +119,10 @@ pub trait ReadExecutor { } }, CmdType::Snap => { - let snapshot = - RegionSnapshot::from_snapshot(self.get_snapshot(ts.take()), region.clone()); + let snapshot = RegionSnapshot::from_snapshot( + self.get_snapshot(ts.take(), &mut read_context), + region.clone(), + ); response.snapshot = Some(snapshot); Response::default() } @@ -143,7 +152,7 @@ pub trait ReadExecutor { } } -/// A read only delegate of `Peer`. +/// #[RaftstoreCommon]: A read only delegate of `Peer`. #[derive(Clone, Debug)] pub struct ReadDelegate { pub region: Arc, @@ -165,6 +174,50 @@ pub struct ReadDelegate { pub track_ver: TrackVer, } +/// CachedReadDelegate is a wrapper the ReadDelegate and kv_engine. LocalReader +/// dispatch local read requests to ReadDeleage according to the region_id where +/// ReadDelegate needs kv_engine to read data or fetch snapshot. +pub struct CachedReadDelegate +where + E: KvEngine, +{ + delegate: Arc, + kv_engine: E, +} + +impl Deref for CachedReadDelegate +where + E: KvEngine, +{ + type Target = ReadDelegate; + + fn deref(&self) -> &Self::Target { + self.delegate.as_ref() + } +} + +impl Clone for CachedReadDelegate +where + E: KvEngine, +{ + fn clone(&self) -> Self { + CachedReadDelegate { + delegate: Arc::clone(&self.delegate), + kv_engine: self.kv_engine.clone(), + } + } +} + +/// #[RaftstoreCommon]: LocalReadContext combines some LocalReader's fields for temporary usage. +pub struct LocalReadContext<'a, E> +where + E: KvEngine, +{ + metrics: &'a mut ReadMetrics, + read_id: &'a mut ThreadReadId, + snap_cache: &'a mut Box>>, +} + impl Drop for ReadDelegate { fn drop(&mut self) { // call `inc` to notify the source `ReadDelegate` is dropped @@ -172,6 +225,70 @@ impl Drop for ReadDelegate { } } +/// #[RaftstoreCommon] +pub trait ReadExecutorProvider: Send + Clone + 'static +where + E: KvEngine, +{ + type Executor: ReadExecutor; + + fn store_id(&self) -> Option; + + /// get the ReadDelegate with region_id and the number of delegates in the + /// StoreMeta + fn get_executor_and_len(&self, region_id: u64) -> (usize, Option); +} + +#[derive(Clone)] +pub struct StoreMetaDelegate +where + E: KvEngine, +{ + store_meta: Arc>, + kv_engine: E, +} + +impl StoreMetaDelegate +where + E: KvEngine, +{ + pub fn new(store_meta: Arc>, kv_engine: E) -> Self { + StoreMetaDelegate { + store_meta, + kv_engine, + } + } +} + +impl ReadExecutorProvider for StoreMetaDelegate +where + E: KvEngine, +{ + type Executor = CachedReadDelegate; + + fn store_id(&self) -> Option { + self.store_meta.as_ref().lock().unwrap().store_id + } + + /// get the ReadDelegate with region_id and the number of delegates in the + /// StoreMeta + fn get_executor_and_len(&self, region_id: u64) -> (usize, Option) { + let meta = self.store_meta.as_ref().lock().unwrap(); + let reader = meta.readers.get(®ion_id).cloned(); + if let Some(reader) = reader { + return ( + meta.readers.len(), + Some(CachedReadDelegate { + delegate: Arc::new(reader), + kv_engine: self.kv_engine.clone(), + }), + ); + } + (meta.readers.len(), None) + } +} + +/// #[RaftstoreCommon] #[derive(Debug)] pub struct TrackVer { version: Arc, @@ -193,14 +310,14 @@ impl TrackVer { } // Take `&mut self` to prevent calling `inc` and `clone` at the same time - fn inc(&mut self) { + pub fn inc(&mut self) { // Only the source `TrackVer` can increase version if self.source { self.version.fetch_add(1, Ordering::Relaxed); } } - fn any_new(&self) -> bool { + pub fn any_new(&self) -> bool { self.version.load(Ordering::Relaxed) > self.local_ver } } @@ -243,7 +360,7 @@ impl ReadDelegate { } } - fn fresh_valid_ts(&mut self) { + pub fn fresh_valid_ts(&mut self) { self.last_valid_ts = monotonic_raw_now(); } @@ -276,7 +393,7 @@ impl ReadDelegate { // If the remote lease will be expired in near future send message // to `raftstore` renew it - fn maybe_renew_lease_advance( + pub fn maybe_renew_lease_advance( &self, router: &dyn CasualRouter, ts: Timespec, @@ -301,7 +418,7 @@ impl ReadDelegate { } } - fn is_in_leader_lease(&self, ts: Timespec, metrics: &mut ReadMetrics) -> bool { + pub fn is_in_leader_lease(&self, ts: Timespec, metrics: &mut ReadMetrics) -> bool { if let Some(ref lease) = self.leader_lease { let term = lease.term(); if term == self.term { @@ -320,7 +437,7 @@ impl ReadDelegate { false } - fn check_stale_read_safe( + pub fn check_stale_read_safe( &self, read_ts: u64, metrics: &mut ReadMetrics, @@ -387,6 +504,7 @@ impl Display for ReadDelegate { } } +/// #[RaftstoreCommon] #[derive(Debug)] pub enum Progress { Region(metapb::Region), @@ -418,63 +536,74 @@ impl Progress { } } -pub struct LocalReader +/// #[RaftstoreCommon]: LocalReader is an entry point where local read requests are dipatch to the +/// relevant regions by LocalReader so that these requests can be handled by the +/// relevant ReadDelegate respectively. +pub struct LocalReader where C: ProposalRouter + CasualRouter, E: KvEngine, + D: ReadExecutor + Deref, + S: ReadExecutorProvider, { - store_id: Cell>, - store_meta: Arc>, + pub store_id: Cell>, + store_meta: S, kv_engine: E, - metrics: ReadMetrics, + pub metrics: ReadMetrics, // region id -> ReadDelegate // The use of `Arc` here is a workaround, see the comment at `get_delegate` - delegates: LruCache>, - snap_cache: Option>, + pub delegates: LruCache, + snap_cache: Box>>, cache_read_id: ThreadReadId, // A channel to raftstore. router: C, } -impl ReadExecutor for LocalReader +impl ReadExecutor for CachedReadDelegate where - C: ProposalRouter + CasualRouter, E: KvEngine, { - fn get_engine(&self) -> &E { + fn get_tablet(&mut self) -> &E { &self.kv_engine } - fn get_snapshot(&mut self, create_time: Option) -> Arc { - self.metrics.local_executed_requests += 1; + fn get_snapshot( + &mut self, + create_time: Option, + read_context: &mut Option>, + ) -> Arc { + let ctx = read_context.as_mut().unwrap(); + ctx.metrics.local_executed_requests += 1; if let Some(ts) = create_time { - if ts == self.cache_read_id { - if let Some(snap) = self.snap_cache.as_ref() { - self.metrics.local_executed_snapshot_cache_hit += 1; + if ts == *ctx.read_id { + if let Some(snap) = ctx.snap_cache.as_ref().as_ref() { + ctx.metrics.local_executed_snapshot_cache_hit += 1; return snap.clone(); } } let snap = Arc::new(self.kv_engine.snapshot()); - self.cache_read_id = ts; - self.snap_cache = Some(snap.clone()); + *ctx.read_id = ts; + *ctx.snap_cache = Box::new(Some(snap.clone())); return snap; } Arc::new(self.kv_engine.snapshot()) } } -impl LocalReader +impl LocalReader where C: ProposalRouter + CasualRouter, E: KvEngine, + D: ReadExecutor + Deref + Clone, + S: ReadExecutorProvider, { - pub fn new(kv_engine: E, store_meta: Arc>, router: C) -> Self { + pub fn new(kv_engine: E, store_meta: S, router: C) -> Self { let cache_read_id = ThreadReadId::new(); LocalReader { store_meta, kv_engine, router, - snap_cache: None, + snap_cache: Box::new(None), cache_read_id, store_id: Cell::new(None), metrics: Default::default(), @@ -520,28 +649,22 @@ where // choice is use `Rc` but `LocalReader: Send` will be violated, which is // required by `LocalReadRouter: Send`, use `Arc` will introduce extra cost but // make the logic clear - fn get_delegate(&mut self, region_id: u64) -> Option> { + pub fn get_delegate(&mut self, region_id: u64) -> Option { let rd = match self.delegates.get(®ion_id) { // The local `ReadDelegate` is up to date - Some(d) if !d.track_ver.any_new() => Some(Arc::clone(d)), + Some(d) if !d.track_ver.any_new() => Some(d.clone()), _ => { debug!("update local read delegate"; "region_id" => region_id); self.metrics.rejected_by_cache_miss += 1; - let (meta_len, meta_reader) = { - let meta = self.store_meta.lock().unwrap(); - ( - meta.readers.len(), - meta.readers.get(®ion_id).cloned().map(Arc::new), - ) - }; + let (meta_len, meta_reader) = { self.store_meta.get_executor_and_len(region_id) }; // Remove the stale delegate self.delegates.remove(®ion_id); self.delegates.resize(meta_len); match meta_reader { Some(reader) => { - self.delegates.insert(region_id, Arc::clone(&reader)); + self.delegates.insert(region_id, reader.clone()); Some(reader) } None => None, @@ -552,13 +675,13 @@ where rd.filter(|r| !r.pending_remove) } - fn pre_propose_raft_command( + pub fn pre_propose_raft_command( &mut self, req: &RaftCmdRequest, - ) -> Result, RequestPolicy)>> { + ) -> Result> { // Check store id. if self.store_id.get().is_none() { - let store_id = self.store_meta.lock().unwrap().store_id; + let store_id = self.store_meta.store_id(); self.store_id.set(store_id); } let store_id = self.store_id.get().unwrap(); @@ -627,7 +750,8 @@ where cb: Callback, ) { match self.pre_propose_raft_command(&req) { - Ok(Some((delegate, policy))) => { + Ok(Some((mut delegate, policy))) => { + let delegate_ext: LocalReadContext<'_, E>; let mut response = match policy { // Leader can read local if and only if it is in lease. RequestPolicy::ReadLocal => { @@ -647,8 +771,18 @@ where self.redirect(RaftCommand::new(req, cb)); return; } - let response = self.execute(&req, &delegate.region, None, read_id); + + delegate_ext = LocalReadContext { + metrics: &mut self.metrics, + snap_cache: &mut self.snap_cache, + read_id: &mut self.cache_read_id, + }; + + let region = Arc::clone(&delegate.region); + let response = + delegate.execute(&req, ®ion, None, read_id, Some(delegate_ext)); // Try renew lease in advance + delegate.maybe_renew_lease_advance( &self.router, snapshot_ts, @@ -667,8 +801,16 @@ where return; } + delegate_ext = LocalReadContext { + metrics: &mut self.metrics, + snap_cache: &mut self.snap_cache, + read_id: &mut self.cache_read_id, + }; + + let region = Arc::clone(&delegate.region); // Getting the snapshot - let response = self.execute(&req, &delegate.region, None, read_id); + let response = + delegate.execute(&req, ®ion, None, read_id, Some(delegate_ext)); // Double check in case `safe_ts` change after the first check and before // getting snapshot @@ -725,14 +867,16 @@ where } pub fn release_snapshot_cache(&mut self) { - self.snap_cache.take(); + self.snap_cache.as_mut().take(); } } -impl Clone for LocalReader +impl Clone for LocalReader where C: ProposalRouter + CasualRouter + Clone, E: KvEngine, + D: ReadExecutor + Deref, + S: ReadExecutorProvider, { fn clone(&self) -> Self { LocalReader { @@ -748,6 +892,7 @@ where } } +/// #[RaftstoreCommon] struct Inspector<'r, 'm> { delegate: &'r ReadDelegate, metrics: &'m mut ReadMetrics, @@ -786,26 +931,27 @@ impl<'r, 'm> RequestInspector for Inspector<'r, 'm> { const METRICS_FLUSH_INTERVAL: u64 = 15_000; // 15s +/// #[RaftstoreCommon] #[derive(Clone)] -struct ReadMetrics { - local_executed_requests: u64, - local_executed_stale_read_requests: u64, - local_executed_snapshot_cache_hit: u64, +pub struct ReadMetrics { + pub local_executed_requests: u64, + pub local_executed_stale_read_requests: u64, + pub local_executed_snapshot_cache_hit: u64, // TODO: record rejected_by_read_quorum. - rejected_by_store_id_mismatch: u64, - rejected_by_peer_id_mismatch: u64, - rejected_by_term_mismatch: u64, - rejected_by_lease_expire: u64, - rejected_by_no_region: u64, - rejected_by_no_lease: u64, - rejected_by_epoch: u64, - rejected_by_applied_term: u64, - rejected_by_channel_full: u64, - rejected_by_cache_miss: u64, - rejected_by_safe_timestamp: u64, - renew_lease_advance: u64, - - last_flush_time: Instant, + pub rejected_by_store_id_mismatch: u64, + pub rejected_by_peer_id_mismatch: u64, + pub rejected_by_term_mismatch: u64, + pub rejected_by_lease_expire: u64, + pub rejected_by_no_region: u64, + pub rejected_by_no_lease: u64, + pub rejected_by_epoch: u64, + pub rejected_by_applied_term: u64, + pub rejected_by_channel_full: u64, + pub rejected_by_cache_miss: u64, + pub rejected_by_safe_timestamp: u64, + pub renew_lease_advance: u64, + + pub last_flush_time: Instant, } impl Default for ReadMetrics { @@ -923,7 +1069,7 @@ mod tests { use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; - use engine_traits::ALL_CFS; + use engine_traits::{Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; use kvproto::raft_cmdpb::*; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; @@ -980,13 +1126,18 @@ mod tests { store_meta: Arc>, ) -> ( TempDir, - LocalReader, + LocalReader< + MockRouter, + KvTestEngine, + CachedReadDelegate, + StoreMetaDelegate, + >, Receiver>, ) { let path = Builder::new().prefix(path).tempdir().unwrap(); let db = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let (ch, rx, _) = MockRouter::new(); - let mut reader = LocalReader::new(db, store_meta, ch); + let mut reader = LocalReader::new(db.clone(), StoreMetaDelegate::new(store_meta, db), ch); reader.store_id = Cell::new(Some(store_id)); (path, reader, rx) } @@ -1004,7 +1155,12 @@ mod tests { } fn must_redirect( - reader: &mut LocalReader, + reader: &mut LocalReader< + MockRouter, + KvTestEngine, + CachedReadDelegate, + StoreMetaDelegate, + >, rx: &Receiver>, cmd: RaftCmdRequest, ) { @@ -1024,7 +1180,12 @@ mod tests { } fn must_not_redirect( - reader: &mut LocalReader, + reader: &mut LocalReader< + MockRouter, + KvTestEngine, + CachedReadDelegate, + StoreMetaDelegate, + >, rx: &Receiver>, task: RaftCommand, ) { @@ -1386,4 +1547,157 @@ mod tests { let d = reader.get_delegate(1).unwrap(); assert_eq!(d.leader_lease.clone().unwrap().term(), 3); } + + #[test] + fn test_read_delegate() { + let path = Builder::new() + .prefix("test-local-reader") + .tempdir() + .unwrap(); + let kv_engine = + engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); + kv_engine.put_cf(CF_DEFAULT, b"a1", b"val1").unwrap(); + let store_meta = + StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::new(0))), kv_engine.clone()); + + { + let mut meta = store_meta.store_meta.as_ref().lock().unwrap(); + + // Create read_delegate with region id 1 + let read_delegate = ReadDelegate::mock(1); + meta.readers.insert(1, read_delegate); + + // Create read_delegate with region id 1 + let read_delegate = ReadDelegate::mock(2); + meta.readers.insert(2, read_delegate); + } + + let mut read_id = ThreadReadId::new(); + let mut read_metrics = ReadMetrics::default(); + let mut snap_cache = Box::new(None); + + let read_id_copy = Some(read_id.clone()); + + let mut read_context = Some(LocalReadContext { + metrics: &mut read_metrics, + read_id: &mut read_id, + snap_cache: &mut snap_cache, + }); + + let (_, delegate) = store_meta.get_executor_and_len(1); + let mut delegate = delegate.unwrap(); + let tablet = delegate.get_tablet(); + assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); + let snapshot = delegate.get_snapshot(read_id_copy.clone(), &mut read_context); + let val = snapshot.get_value(b"a1").unwrap().unwrap(); + assert_eq!(b"val1", val.deref()); + + let (_, delegate) = store_meta.get_executor_and_len(2); + let mut delegate = delegate.unwrap(); + let tablet = delegate.get_tablet(); + assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); + let snapshot = delegate.get_snapshot(read_id_copy, &mut read_context); + let val = snapshot.get_value(b"a1").unwrap().unwrap(); + assert_eq!(b"val1", val.deref()); + + assert!(snap_cache.as_ref().is_some()); + assert_eq!(read_metrics.local_executed_requests, 2); + assert_eq!(read_metrics.local_executed_snapshot_cache_hit, 1); + } + + #[test] + fn test_snap_cache_hit() { + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, _) = new_reader("test-local-reader", 1, store_meta.clone()); + + let mut region1 = metapb::Region::default(); + region1.set_id(1); + + // Register region 1 + { + let mut meta = store_meta.lock().unwrap(); + let read_delegate = ReadDelegate { + tag: String::new(), + region: Arc::new(region1.clone()), + peer_id: 1, + term: 1, + applied_term: 1, + leader_lease: None, + last_valid_ts: Timespec::new(0, 0), + txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), + txn_ext: Arc::new(TxnExt::default()), + read_progress: Arc::new(RegionReadProgress::new(®ion1, 1, 1, "".to_owned())), + pending_remove: false, + track_ver: TrackVer::new(), + bucket_meta: None, + }; + meta.readers.insert(1, read_delegate); + } + + let mut delegate = reader.get_delegate(region1.id).unwrap(); + let read_id = Some(ThreadReadId::new()); + + { + let mut read_context = Some(LocalReadContext { + metrics: &mut reader.metrics, + snap_cache: &mut reader.snap_cache, + read_id: &mut reader.cache_read_id, + }); + + for _ in 0..10 { + // Different region id should reuse the cache + let _ = delegate.get_snapshot(read_id.clone(), &mut read_context); + } + } + // We should hit cache 9 times + assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 9); + + let read_id = Some(ThreadReadId::new()); + + { + let read_context = LocalReadContext { + metrics: &mut reader.metrics, + snap_cache: &mut reader.snap_cache, + read_id: &mut reader.cache_read_id, + }; + + let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); + } + // This time, we will miss the cache + assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 9); + + { + let read_context = LocalReadContext { + metrics: &mut reader.metrics, + snap_cache: &mut reader.snap_cache, + read_id: &mut reader.cache_read_id, + }; + let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); + // We can hit it again. + assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 10); + } + + reader.release_snapshot_cache(); + { + let read_context = LocalReadContext { + metrics: &mut reader.metrics, + snap_cache: &mut reader.snap_cache, + read_id: &mut reader.cache_read_id, + }; + let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); + } + // After release, we will mss the cache even with the prevsiou read_id. + assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 10); + + { + let read_context = LocalReadContext { + metrics: &mut reader.metrics, + snap_cache: &mut reader.snap_cache, + read_id: &mut reader.cache_read_id, + }; + let _ = delegate.get_snapshot(read_id, &mut Some(read_context)); + } + // We can hit it again. + assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 11); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index d8824453a24..fe2b0dd1c26 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -76,7 +76,7 @@ use raftstore::{ }, memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, AutoSplitController, CheckLeaderRunner, GlobalReplicationState, LocalReader, SnapManager, - SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, + SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, }, RaftRouterCompactedEventSender, }; @@ -257,7 +257,10 @@ type LocalServer = Server, resolve::PdStoreAddrResolver, LocalRaftKv>; type LocalRaftKv = RaftKv>; -impl TiKvServer { +impl TiKvServer +where + ER: RaftEngine, +{ fn init(mut config: TiKvConfig) -> TiKvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, @@ -561,7 +564,11 @@ impl TiKvServer { let engine = RaftKv::new( ServerRaftStoreRouter::new( self.router.clone(), - LocalReader::new(engines.kv.clone(), store_meta.clone(), self.router.clone()), + LocalReader::new( + engines.kv.clone(), + StoreMetaDelegate::new(store_meta.clone(), engines.kv.clone()), + self.router.clone(), + ), ), engines.kv.clone(), ); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index ac3e3a6cc6e..2584d29629e 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -290,7 +290,11 @@ impl Simulator for NodeCluster { Arc::new(SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version()).unwrap()) }; - let local_reader = LocalReader::new(engines.kv.clone(), store_meta.clone(), router.clone()); + let local_reader = LocalReader::new( + engines.kv.clone(), + StoreMetaDelegate::new(store_meta.clone(), engines.kv.clone()), + router.clone(), + ); let cfg_controller = ConfigController::new(cfg.tikv.clone()); let split_check_runner = diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index e22b730151a..7107c668c3d 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -38,7 +38,7 @@ use raftstore::{ fsm::{store::StoreMeta, ApplyRouter, RaftBatchSystem, RaftRouter}, msg::RaftCmdExtraOpts, AutoSplitController, Callback, CheckLeaderRunner, LocalReader, RegionSnapshot, SnapManager, - SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, + SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, }, Result, }; @@ -284,7 +284,11 @@ impl ServerCluster { } } - let local_reader = LocalReader::new(engines.kv.clone(), store_meta.clone(), router.clone()); + let local_reader = LocalReader::new( + engines.kv.clone(), + StoreMetaDelegate::new(store_meta.clone(), engines.kv.clone()), + router.clone(), + ); let raft_router = ServerRaftStoreRouter::new(router.clone(), local_reader); let sim_router = SimulateTransport::new(raft_router.clone()); diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 5b640d3b0b7..cd6543dafe8 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -28,6 +28,7 @@ test-engines-panic = [ backtrace = "0.3" engine_panic = { path = "../engine_panic", default-features = false } engine_rocks = { path = "../engine_rocks", default-features = false } +engine_test = { path = "../engine_test", default-features = false } engine_traits = { path = "../engine_traits", default-features = false } error_code = { path = "../error_code", default-features = false } fail = "0.5" diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 7ddf338d870..bd94a3638d4 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -96,10 +96,7 @@ impl KvEngineFactoryBuilder { inner: Arc::new(self.inner), compact_event_sender: self.compact_event_sender.clone(), }; - KvEngineFactoryV2 { - inner: factory, - registry: Arc::default(), - } + KvEngineFactoryV2::new(factory) } } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index cf988f9da37..4132b2e4c25 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -15,8 +15,17 @@ const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; #[derive(Clone)] pub struct KvEngineFactoryV2 { - pub inner: KvEngineFactory, - pub registry: Arc>>, + inner: KvEngineFactory, + registry: Arc>>, +} + +impl KvEngineFactoryV2 { + pub fn new(inner: KvEngineFactory) -> Self { + KvEngineFactoryV2 { + inner, + registry: Arc::new(Mutex::new(HashMap::default())), + } + } } // Extract tablet id and tablet suffix from the path. From 1c87fbe045b5b34f446101142a7ebb26b0dc4c92 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 4 Aug 2022 17:34:07 +0800 Subject: [PATCH 0134/1149] *: all in camel case (#13215) close tikv/tikv#13216 Signed-off-by: tabokie --- clippy.toml | 4 +- cmd/tikv-ctl/src/executor.rs | 6 +- cmd/tikv-ctl/src/main.rs | 14 +- cmd/tikv-ctl/src/util.rs | 4 +- cmd/tikv-server/src/main.rs | 8 +- components/api_version/src/api_v2.rs | 2 +- components/api_version/src/lib.rs | 14 +- components/backup/src/errors.rs | 6 +- components/cdc/src/delegate.rs | 38 +-- components/cdc/src/endpoint.rs | 74 +++--- components/cdc/src/initializer.rs | 18 +- components/cdc/src/observer.rs | 18 +- components/cdc/src/service.rs | 30 +-- components/engine_rocks/src/util.rs | 4 +- components/engine_traits/src/errors.rs | 4 +- components/engine_traits/src/lib.rs | 2 +- components/engine_traits/src/perf_context.rs | 4 +- components/error_code/src/backup_stream.rs | 6 +- components/error_code/src/causal_ts.rs | 6 +- components/error_code/src/cloud.rs | 4 +- components/error_code/src/encryption.rs | 2 +- components/error_code/src/engine.rs | 4 +- components/error_code/src/pd.rs | 6 +- components/error_code/src/raftstore.rs | 2 +- components/error_code/src/sst_importer.rs | 6 +- components/error_code/src/storage.rs | 6 +- components/file_system/src/io_stats/proc.rs | 18 +- components/raftstore/src/coprocessor/mod.rs | 22 +- components/raftstore/src/store/util.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 42 ++-- .../src/store/worker/split_controller.rs | 10 +- components/resolved_ts/src/advance.rs | 4 +- components/resolved_ts/src/endpoint.rs | 10 +- components/resolved_ts/src/scanner.rs | 4 +- components/resolved_ts/src/sinker.rs | 4 +- components/server/src/raft_engine_switch.rs | 4 +- components/server/src/server.rs | 44 ++-- components/server/src/setup.rs | 10 +- components/sst_importer/src/errors.rs | 8 +- components/sst_importer/src/sst_writer.rs | 2 +- components/test_backup/src/lib.rs | 4 +- components/test_coprocessor/src/dag.rs | 48 ++-- components/test_raftstore/src/config.rs | 10 +- components/test_raftstore/src/util.rs | 8 +- .../src/codec/chunk/chunk.rs | 8 +- .../src/codec/chunk/column.rs | 4 +- .../src/codec/collation/encoding/gbk.rs | 4 +- .../src/codec/collation/mod.rs | 6 +- .../src/codec/row/v2/compat_v1.rs | 4 +- .../tidb_query_datatype/src/codec/table.rs | 4 +- .../tidb_query_datatype/src/def/eval_type.rs | 6 +- .../tidb_query_datatype/src/def/field_type.rs | 26 +- .../tidb_query_expr/src/impl_compare.rs | 224 +++++++++--------- components/tidb_query_expr/src/impl_math.rs | 18 +- components/tidb_query_expr/src/lib.rs | 98 ++++---- .../tidb_query_expr/src/types/expr_eval.rs | 4 +- components/tikv_alloc/src/error.rs | 6 +- etc/error_code.toml | 8 +- src/config.rs | 168 ++++++------- src/coprocessor/checksum.rs | 6 +- src/coprocessor/dag/mod.rs | 4 +- src/coprocessor/dag/storage_impl.rs | 6 +- src/coprocessor/statistics/analyze.rs | 16 +- src/import/duplicate_detect.rs | 4 +- src/server/engine_factory.rs | 4 +- src/server/engine_factory_v2.rs | 6 +- src/server/errors.rs | 4 +- src/server/service/diagnostics/log.rs | 4 +- src/server/service/kv.rs | 2 +- src/server/status_server/mod.rs | 6 +- src/storage/mod.rs | 4 +- .../coprocessor_executors/index_scan/mod.rs | 8 +- .../coprocessor_executors/index_scan/util.rs | 12 +- .../coprocessor_executors/integrated/mod.rs | 10 +- .../coprocessor_executors/integrated/util.rs | 12 +- .../coprocessor_executors/table_scan/mod.rs | 8 +- .../coprocessor_executors/table_scan/util.rs | 12 +- .../coprocessor_executors/util/bencher.rs | 8 +- .../util/scan_bencher.rs | 12 +- .../misc/coprocessor/codec/chunk/mod.rs | 2 +- tests/failpoints/cases/test_coprocessor.rs | 28 +-- .../integrations/config/dynamic/gc_worker.rs | 10 +- .../config/dynamic/pessimistic_txn.rs | 4 +- .../integrations/config/dynamic/raftstore.rs | 6 +- tests/integrations/config/dynamic/snap.rs | 6 +- .../config/dynamic/split_check.rs | 6 +- tests/integrations/config/mod.rs | 24 +- .../integrations/config/test_config_client.rs | 8 +- .../integrations/coprocessor/test_checksum.rs | 4 +- tests/integrations/coprocessor/test_select.rs | 94 ++++---- tests/integrations/import/test_sst_service.rs | 4 +- tests/integrations/import/util.rs | 10 +- .../resource_metering/test_cpu.rs | 4 +- .../resource_metering/test_read_keys.rs | 4 +- .../resource_metering/test_suite/mod.rs | 4 +- tests/integrations/storage/test_titan.rs | 4 +- 96 files changed, 753 insertions(+), 751 deletions(-) diff --git a/clippy.toml b/clippy.toml index 2a4bb3e82b2..1530b3cb60b 100644 --- a/clippy.toml +++ b/clippy.toml @@ -6,4 +6,6 @@ disallowed-methods = [ { path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start", reason = "Wrapper function `::after_start_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, { path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop", reason = "Wrapper function `::before_stop_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, -] \ No newline at end of file +] +avoid-breaking-exported-api = false +upper-case-acronyms-aggressive = true diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index df2c3cfbadf..aa2f604b547 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -27,7 +27,7 @@ use raftstore::store::INIT_EPOCH_CONF_VER; use security::SecurityManager; use serde_json::json; use tikv::{ - config::{ConfigController, TiKvConfig}, + config::{ConfigController, TikvConfig}, server::debug::{BottommostLevelCompaction, Debugger, RegionInfo}, }; use tikv_util::escape; @@ -43,7 +43,7 @@ pub const LOCK_FILE_ERROR: &str = "IO error: While lock file"; type MvccInfoStream = Pin, MvccInfo), String>>>>; pub fn new_debug_executor( - cfg: &TiKvConfig, + cfg: &TikvConfig, data_dir: Option<&str>, skip_paranoid_checks: bool, host: Option<&str>, @@ -359,7 +359,7 @@ pub trait DebugExecutor { region: u64, to_host: Option<&str>, to_data_dir: Option<&str>, - to_config: &TiKvConfig, + to_config: &TikvConfig, mgr: Arc, ) { let rhs_debug_executor = new_debug_executor(to_config, to_data_dir, false, to_host, mgr); diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index d37336cbd36..ce39c121300 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -45,7 +45,7 @@ use raft_log_engine::ManagedFileSystem; use regex::Regex; use security::{SecurityConfig, SecurityManager}; use structopt::{clap::ErrorKind, StructOpt}; -use tikv::{config::TiKvConfig, server::debug::BottommostLevelCompaction}; +use tikv::{config::TikvConfig, server::debug::BottommostLevelCompaction}; use tikv_util::{escape, run_and_wait_child_process, sys::thread::StdThreadBuildWrapper, unescape}; use txn_types::Key; @@ -61,7 +61,7 @@ fn main() { let cfg_path = opt.config.as_ref(); let cfg = cfg_path.map_or_else( || { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.log.level = tikv_util::logger::get_level_by_string("warn") .unwrap() .into(); @@ -332,7 +332,7 @@ fn main() { } => { let to_data_dir = to_data_dir.as_deref(); let to_host = to_host.as_deref(); - let to_config = to_config.map_or_else(TiKvConfig::default, |path| { + let to_config = to_config.map_or_else(TikvConfig::default, |path| { let s = fs::read_to_string(&path).unwrap(); toml::from_str(&s).unwrap() }); @@ -608,7 +608,7 @@ fn split_region(pd_client: &RpcClient, mgr: Arc, region_id: u64 fn compact_whole_cluster( pd_client: &RpcClient, - cfg: &TiKvConfig, + cfg: &TikvConfig, mgr: Arc, db_type: DbType, cfs: Vec<&str>, @@ -671,7 +671,7 @@ fn read_fail_file(path: &str) -> Vec<(String, String)> { list } -fn run_ldb_command(args: Vec, cfg: &TiKvConfig) { +fn run_ldb_command(args: Vec, cfg: &TikvConfig) { let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .unwrap() .map(Arc::new); @@ -682,12 +682,12 @@ fn run_ldb_command(args: Vec, cfg: &TiKvConfig) { engine_rocks::raw::run_ldb_tool(&args, &opts); } -fn run_sst_dump_command(args: Vec, cfg: &TiKvConfig) { +fn run_sst_dump_command(args: Vec, cfg: &TikvConfig) { let opts = cfg.rocksdb.build_opt(); engine_rocks::raw::run_sst_dump_tool(&args, &opts); } -fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, cfg: &TiKvConfig) { +fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, cfg: &TikvConfig) { let db = &cfg.infer_kv_engine_path(Some(data_dir)).unwrap(); println!( "\nstart to print bad ssts; data_dir:{}; db:{}", diff --git a/cmd/tikv-ctl/src/util.rs b/cmd/tikv-ctl/src/util.rs index 36091b5a930..d7e83511d3e 100644 --- a/cmd/tikv-ctl/src/util.rs +++ b/cmd/tikv-ctl/src/util.rs @@ -3,13 +3,13 @@ use std::{borrow::ToOwned, error::Error, str, str::FromStr, u64}; use server::setup::initial_logger; -use tikv::config::TiKvConfig; +use tikv::config::TikvConfig; const LOG_DIR: &str = "./ctl-engine-info-log"; #[allow(clippy::field_reassign_with_default)] pub fn init_ctl_logger(level: &str) { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.log.level = slog::Level::from_str(level).unwrap().into(); cfg.rocksdb.info_log_dir = LOG_DIR.to_owned(); cfg.raftdb.info_log_dir = LOG_DIR.to_owned(); diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 0d6e472a602..b366cd7849f 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -7,7 +7,7 @@ use std::{path::Path, process}; use clap::{crate_authors, App, Arg}; use serde_json::{Map, Value}; use server::setup::{ensure_no_unrecognized_config, validate_and_persist_config}; -use tikv::config::{to_flatten_config_info, TiKvConfig}; +use tikv::config::{to_flatten_config_info, TikvConfig}; fn main() { let build_timestamp = option_env!("TIKV_BUILD_TIME"); @@ -157,7 +157,7 @@ fn main() { .get_matches(); if matches.is_present("print-sample-config") { - let config = TiKvConfig::default(); + let config = TikvConfig::default(); println!("{}", toml::to_string_pretty(&config).unwrap()); process::exit(0); } @@ -167,9 +167,9 @@ fn main() { let mut config = matches .value_of_os("config") - .map_or_else(TiKvConfig::default, |path| { + .map_or_else(TikvConfig::default, |path| { let path = Path::new(path); - TiKvConfig::from_file( + TikvConfig::from_file( path, if is_config_check { Some(&mut unrecognized_keys) diff --git a/components/api_version/src/api_v2.rs b/components/api_version/src/api_v2.rs index 712804b3b3a..a56d5deac30 100644 --- a/components/api_version/src/api_v2.rs +++ b/components/api_version/src/api_v2.rs @@ -50,7 +50,7 @@ impl KvFormat for ApiV2 { match key[0] { RAW_KEY_PREFIX => KeyMode::Raw, TXN_KEY_PREFIX => KeyMode::Txn, - TIDB_META_KEY_PREFIX | TIDB_TABLE_KEY_PREFIX => KeyMode::TiDB, + TIDB_META_KEY_PREFIX | TIDB_TABLE_KEY_PREFIX => KeyMode::Tidb, _ => KeyMode::Unknown, } } diff --git a/components/api_version/src/lib.rs b/components/api_version/src/lib.rs index fb8fd13cbfd..ceb18b4bddb 100644 --- a/components/api_version/src/lib.rs +++ b/components/api_version/src/lib.rs @@ -188,7 +188,7 @@ pub enum KeyMode { /// TiDB, but instead, it means that the key matches the definition of /// TiDB key in API V2, therefore, the key is treated as TiDB data in /// order to fulfill compatibility. - TiDB, + Tidb, /// Unrecognised key mode. Unknown, } @@ -271,8 +271,8 @@ mod tests { ); assert_eq!(ApiV2::parse_key_mode(&[RAW_KEY_PREFIX]), KeyMode::Raw); assert_eq!(ApiV2::parse_key_mode(&[TXN_KEY_PREFIX]), KeyMode::Txn); - assert_eq!(ApiV2::parse_key_mode(&b"t_a"[..]), KeyMode::TiDB); - assert_eq!(ApiV2::parse_key_mode(&b"m"[..]), KeyMode::TiDB); + assert_eq!(ApiV2::parse_key_mode(&b"t_a"[..]), KeyMode::Tidb); + assert_eq!(ApiV2::parse_key_mode(&b"m"[..]), KeyMode::Tidb); assert_eq!(ApiV2::parse_key_mode(&b"ot"[..]), KeyMode::Unknown); } @@ -289,19 +289,19 @@ mod tests { ); assert_eq!( ApiV2::parse_range_mode((Some(b"t_a"), Some(b"t_z"))), - KeyMode::TiDB + KeyMode::Tidb ); assert_eq!( ApiV2::parse_range_mode((Some(b"t"), Some(b"u"))), - KeyMode::TiDB + KeyMode::Tidb ); assert_eq!( ApiV2::parse_range_mode((Some(b"m"), Some(b"n"))), - KeyMode::TiDB + KeyMode::Tidb ); assert_eq!( ApiV2::parse_range_mode((Some(b"m_a"), Some(b"m_z"))), - KeyMode::TiDB + KeyMode::Tidb ); assert_eq!( ApiV2::parse_range_mode((Some(b"x\0a"), Some(b"x\0z"))), diff --git a/components/backup/src/errors.rs b/components/backup/src/errors.rs index 4f290262c57..413f4ee77f9 100644 --- a/components/backup/src/errors.rs +++ b/components/backup/src/errors.rs @@ -24,7 +24,7 @@ impl From for ErrorPb { fn from(e: Error) -> ErrorPb { let mut err = ErrorPb::default(); match e { - Error::ClusterID { current, request } => { + Error::ClusterId { current, request } => { BACKUP_RANGE_ERROR_VEC .with_label_values(&["cluster_mismatch"]) .inc(); @@ -114,8 +114,8 @@ pub enum Error { EngineTrait(#[from] EngineTraitError), #[error("Transaction error {0}")] Txn(#[from] TxnError), - #[error("ClusterID error current {current}, request {request}")] - ClusterID { current: u64, request: u64 }, + #[error("ClusterId error current {current}, request {request}")] + ClusterId { current: u64, request: u64 }, #[error("Invalid cf {cf}")] InvalidCf { cf: String }, #[error("Failed to acquire the semaphore {0}")] diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index fc379916232..1928cd3257a 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -24,7 +24,7 @@ use kvproto::{ }, }; use raftstore::{ - coprocessor::{Cmd, CmdBatch, ObserveHandle, ObserveID}, + coprocessor::{Cmd, CmdBatch, ObserveHandle, ObserveId}, store::util::compare_region_epoch, Error as RaftStoreError, }; @@ -38,7 +38,7 @@ use crate::{ initializer::KvEntry, metrics::*, old_value::{OldValueCache, OldValueCallback}, - service::ConnID, + service::ConnId, Error, Result, }; @@ -46,15 +46,15 @@ static DOWNSTREAM_ID_ALLOC: AtomicUsize = AtomicUsize::new(0); /// A unique identifier of a Downstream. #[derive(Clone, Copy, Debug, PartialEq, Hash)] -pub struct DownstreamID(usize); +pub struct DownstreamId(usize); -impl DownstreamID { - pub fn new() -> DownstreamID { - DownstreamID(DOWNSTREAM_ID_ALLOC.fetch_add(1, Ordering::SeqCst)) +impl DownstreamId { + pub fn new() -> DownstreamId { + DownstreamId(DOWNSTREAM_ID_ALLOC.fetch_add(1, Ordering::SeqCst)) } } -impl Default for DownstreamID { +impl Default for DownstreamId { fn default() -> Self { Self::new() } @@ -119,10 +119,10 @@ impl DownstreamState { pub struct Downstream { // TODO: include cdc request. /// A unique identifier of the Downstream. - id: DownstreamID, + id: DownstreamId, // The request ID set by CDC to identify events corresponding different requests. req_id: u64, - conn_id: ConnID, + conn_id: ConnId, // The IP address of downstream. peer: String, region_epoch: RegionEpoch, @@ -140,11 +140,11 @@ impl Downstream { peer: String, region_epoch: RegionEpoch, req_id: u64, - conn_id: ConnID, + conn_id: ConnId, kv_api: ChangeDataRequestKvApi, ) -> Downstream { Downstream { - id: DownstreamID::new(), + id: DownstreamId::new(), req_id, conn_id, peer, @@ -199,7 +199,7 @@ impl Downstream { self.sink = Some(sink); } - pub fn get_id(&self) -> DownstreamID { + pub fn get_id(&self) -> DownstreamId { self.id } @@ -207,7 +207,7 @@ impl Downstream { self.state.clone() } - pub fn get_conn_id(&self) -> ConnID { + pub fn get_conn_id(&self) -> ConnId { self.conn_id } } @@ -277,7 +277,7 @@ impl Delegate { Ok(()) } - pub fn downstream(&self, downstream_id: DownstreamID) -> Option<&Downstream> { + pub fn downstream(&self, downstream_id: DownstreamId) -> Option<&Downstream> { self.downstreams().iter().find(|d| d.id == downstream_id) } @@ -297,7 +297,7 @@ impl Delegate { /// Let downstream unsubscribe the delegate. /// Return whether the delegate is empty or not. - pub fn unsubscribe(&mut self, id: DownstreamID, err: Option) -> bool { + pub fn unsubscribe(&mut self, id: DownstreamId, err: Option) -> bool { let error_event = err.map(|err| err.into_error_event(self.region_id)); let region_id = self.region_id; if let Some(d) = self.remove_downstream(id) { @@ -617,7 +617,7 @@ impl Delegate { self.sink_downstream(raw_rows, index, ChangeDataRequestKvApi::RawKv) } - pub fn raw_untrack_ts(&mut self, cdc_id: ObserveID, max_ts: TimeStamp) { + pub fn raw_untrack_ts(&mut self, cdc_id: ObserveId, max_ts: TimeStamp) { // Stale CmdBatch, drop it silently. if cdc_id != self.handle.id { return; @@ -863,7 +863,7 @@ impl Delegate { self.txn_extra_op.store(TxnExtraOp::ReadOldValue); } - fn remove_downstream(&mut self, id: DownstreamID) -> Option { + fn remove_downstream(&mut self, id: DownstreamId) -> Option { let downstreams = self.downstreams_mut(); if let Some(index) = downstreams.iter().position(|x| x.id == id) { let downstream = downstreams.swap_remove(index); @@ -1070,7 +1070,7 @@ mod tests { String::new(), region_epoch, request_id, - ConnID::new(), + ConnId::new(), ChangeDataRequestKvApi::TiDb, ); downstream.set_sink(sink); @@ -1189,7 +1189,7 @@ mod tests { let mut epoch = RegionEpoch::default(); epoch.set_conf_ver(region_version); epoch.set_version(region_version); - Downstream::new(peer, epoch, id, ConnID::new(), ChangeDataRequestKvApi::TiDb) + Downstream::new(peer, epoch, id, ConnId::new(), ChangeDataRequestKvApi::TiDb) }; // Create a new delegate. diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 9d15c347e32..d9938006ca1 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -29,7 +29,7 @@ use kvproto::{ use online_config::{ConfigChange, OnlineConfig}; use pd_client::{Feature, PdClient}; use raftstore::{ - coprocessor::{CmdBatch, ObserveID}, + coprocessor::{CmdBatch, ObserveId}, router::RaftStoreRouter, store::{ fsm::{ChangeObserver, StoreMeta}, @@ -56,12 +56,12 @@ use txn_types::{TimeStamp, TxnExtra, TxnExtraScheduler}; use crate::{ channel::{CdcEvent, MemoryQuota, SendError}, - delegate::{on_init_downstream, Delegate, Downstream, DownstreamID, DownstreamState}, + delegate::{on_init_downstream, Delegate, Downstream, DownstreamId, DownstreamState}, initializer::Initializer, metrics::*, observer::RawRegionTs, old_value::{OldValueCache, OldValueCallback}, - service::{Conn, ConnID, FeatureGate}, + service::{Conn, ConnId, FeatureGate}, CdcObserver, Error, }; @@ -78,16 +78,16 @@ const RAW_RESOLVED_TS_OUTLIER_COUNT_THRESHOLD: usize = 10; pub enum Deregister { Downstream { region_id: u64, - downstream_id: DownstreamID, - conn_id: ConnID, + downstream_id: DownstreamId, + conn_id: ConnId, err: Option, }, Delegate { region_id: u64, - observe_id: ObserveID, + observe_id: ObserveId, err: Error, }, - Conn(ConnID), + Conn(ConnId), } impl_display_as_debug!(Deregister); @@ -137,7 +137,7 @@ pub enum Task { Register { request: ChangeDataRequest, downstream: Downstream, - conn_id: ConnID, + conn_id: ConnId, version: semver::Version, }, Deregister(Deregister), @@ -148,13 +148,13 @@ pub enum Task { multi: Vec, old_value_cb: OldValueCallback, }, - MinTS { + MinTs { regions: Vec, min_ts: TimeStamp, current_ts: TimeStamp, }, ResolverReady { - observe_id: ObserveID, + observe_id: ObserveId, region: Region, resolver: Resolver, }, @@ -163,7 +163,7 @@ pub enum Task { // the downstream switches to Normal after the previous commands was sunk. InitDownstream { region_id: u64, - downstream_id: DownstreamID, + downstream_id: DownstreamId, downstream_state: Arc>, // `incremental_scan_barrier` will be sent into `sink` to ensure all delta changes // are delivered to the downstream. And then incremental scan can start. @@ -215,7 +215,7 @@ impl fmt::Debug for Task { .field("type", &"multi_batch") .field("multi_batch", &multi.len()) .finish(), - Task::MinTS { + Task::MinTs { ref min_ts, ref current_ts, .. @@ -388,7 +388,7 @@ pub struct Endpoint { cluster_id: u64, capture_regions: HashMap, - connections: HashMap, + connections: HashMap, scheduler: Scheduler, raft_router: T, engine: E, @@ -611,7 +611,7 @@ impl, E: KvEngine> Endpoint { let oid = self.observer.unsubscribe_region(region_id, id); assert!( oid.is_some(), - "unsubscribe region {} failed, ObserveID {:?}", + "unsubscribe region {} failed, ObserveId {:?}", region_id, id ); @@ -624,7 +624,7 @@ impl, E: KvEngine> Endpoint { } => { // Something went wrong, deregister all downstreams of the region. - // To avoid ABA problem, we must check the unique ObserveID. + // To avoid ABA problem, we must check the unique ObserveId. let need_remove = self .capture_regions .get(®ion_id) @@ -642,7 +642,7 @@ impl, E: KvEngine> Endpoint { assert_eq!( need_remove, oid.is_some(), - "unsubscribe region {} failed, ObserveID {:?}", + "unsubscribe region {} failed, ObserveId {:?}", region_id, observe_id ); @@ -661,7 +661,7 @@ impl, E: KvEngine> Endpoint { let oid = self.observer.unsubscribe_region(region_id, id); assert!( oid.is_some(), - "unsubscribe region {} failed, ObserveID {:?}", + "unsubscribe region {} failed, ObserveId {:?}", region_id, id ); @@ -678,7 +678,7 @@ impl, E: KvEngine> Endpoint { &mut self, mut request: ChangeDataRequest, mut downstream: Downstream, - conn_id: ConnID, + conn_id: ConnId, version: semver::Version, ) { let region_id = request.region_id; @@ -784,7 +784,7 @@ impl, E: KvEngine> Endpoint { let old_observe_id = self.observer.subscribe_region(region_id, observe_id); assert!( old_observe_id.is_none(), - "region {} must not be observed twice, old ObserveID {:?}, new ObserveID {:?}", + "region {} must not be observed twice, old ObserveId {:?}, new ObserveId {:?}", region_id, old_observe_id, observe_id @@ -880,7 +880,7 @@ impl, E: KvEngine> Endpoint { } } - fn on_region_ready(&mut self, observe_id: ObserveID, resolver: Resolver, region: Region) { + fn on_region_ready(&mut self, observe_id: ObserveId, resolver: Resolver, region: Region) { let region_id = region.get_id(); let mut failed_downstreams = Vec::new(); if let Some(delegate) = self.capture_regions.get_mut(®ion_id) { @@ -1129,7 +1129,7 @@ impl, E: KvEngine> Endpoint { let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); let raft_router = self.raft_router.clone(); - let regions: Vec<(u64, ObserveID)> = self + let regions: Vec<(u64, ObserveId)> = self .capture_regions .iter() .map(|(region_id, delegate)| (*region_id, delegate.handle.id)) @@ -1152,8 +1152,8 @@ impl, E: KvEngine> Endpoint { // Sync with concurrency manager so that it can work correctly when // optimizations like async commit is enabled. - // Note: This step must be done before scheduling `Task::MinTS` task, and the - // resolver must be checked in or after `Task::MinTS`' execution. + // Note: This step must be done before scheduling `Task::MinTs` task, and the + // resolver must be checked in or after `Task::MinTs`' execution. cm.update_max_ts(min_ts); if let Some(min_mem_lock_ts) = cm.global_min_lock_ts() { if min_mem_lock_ts < min_ts { @@ -1169,7 +1169,7 @@ impl, E: KvEngine> Endpoint { Err(err) => panic!("failed to regiester min ts event, error: {:?}", err), } - // If flush_causal_timestamp fails, cannot schedule MinTS task + // If flush_causal_timestamp fails, cannot schedule MinTs task // as new coming raw data may use timestamp smaller than min_ts if let Err(e) = causal_ts_provider.map_or(Ok(()), |provider| provider.flush()) { error!("cdc flush causal timestamp failed"; "err" => ?e); @@ -1202,7 +1202,7 @@ impl, E: KvEngine> Endpoint { }; if !regions.is_empty() { - match scheduler.schedule(Task::MinTS { + match scheduler.schedule(Task::MinTs { regions, min_ts, current_ts: min_ts_pd, @@ -1225,7 +1225,7 @@ impl, E: KvEngine> Endpoint { } async fn region_resolved_ts_raft( - regions: Vec<(u64, ObserveID)>, + regions: Vec<(u64, ObserveId)>, scheduler: &Scheduler, raft_router: T, min_ts: TimeStamp, @@ -1293,7 +1293,7 @@ impl, E: KvEngine> Runnable for Endpoint { debug!("cdc run task"; "task" => %task); match task { - Task::MinTS { + Task::MinTs { regions, min_ts, current_ts, @@ -2250,7 +2250,7 @@ mod tests { .unwrap() .handle .id; - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: region_ids, min_ts: cur_tso, current_ts: cur_tso, @@ -2371,7 +2371,7 @@ mod tests { let resolver = Resolver::new(1); let observe_id = suite.endpoint.capture_regions[&1].handle.id; suite.on_region_ready(observe_id, resolver, region.clone()); - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1], min_ts: TimeStamp::from(1), current_ts: TimeStamp::zero(), @@ -2407,7 +2407,7 @@ mod tests { region.set_id(2); let observe_id = suite.endpoint.capture_regions[&2].handle.id; suite.on_region_ready(observe_id, resolver, region); - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1, 2], min_ts: TimeStamp::from(2), current_ts: TimeStamp::zero(), @@ -2452,7 +2452,7 @@ mod tests { region.set_id(3); let observe_id = suite.endpoint.capture_regions[&3].handle.id; suite.on_region_ready(observe_id, resolver, region); - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1, 2, 3], min_ts: TimeStamp::from(3), current_ts: TimeStamp::zero(), @@ -2612,8 +2612,8 @@ mod tests { assert_eq!(suite.endpoint.capture_regions.len(), 1); let deregister = Deregister::Delegate { region_id: 1, - // A stale ObserveID (different from the actual one). - observe_id: ObserveID::new(), + // A stale ObserveId (different from the actual one). + observe_id: ObserveId::new(), err: Error::request(err_header), }; suite.run(Task::Deregister(deregister)); @@ -2686,7 +2686,7 @@ mod tests { } }; - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1], min_ts: TimeStamp::from(1), current_ts: TimeStamp::zero(), @@ -2700,7 +2700,7 @@ mod tests { ) .unwrap_err(); - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1, 2], min_ts: TimeStamp::from(2), current_ts: TimeStamp::zero(), @@ -2714,7 +2714,7 @@ mod tests { ) .unwrap_err(); - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1, 2, 3], min_ts: TimeStamp::from(3), current_ts: TimeStamp::zero(), @@ -2724,7 +2724,7 @@ mod tests { // conn b must receive a resolved ts that contains region 3. assert_batch_resolved_ts(conn_rxs.get_mut(1).unwrap(), vec![3], 3); - suite.run(Task::MinTS { + suite.run(Task::MinTs { regions: vec![1, 3], min_ts: TimeStamp::from(4), current_ts: TimeStamp::zero(), diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 98720b7cf0c..f6a2ce2885c 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -16,7 +16,7 @@ use kvproto::{ metapb::{Region, RegionEpoch}, }; use raftstore::{ - coprocessor::ObserveID, + coprocessor::ObserveId, router::RaftStoreRouter, store::{ fsm::ChangeObserver, @@ -47,11 +47,11 @@ use txn_types::{Key, KvPair, Lock, LockType, OldValue, TimeStamp}; use crate::{ channel::CdcEvent, - delegate::{post_init_downstream, Delegate, DownstreamID, DownstreamState}, + delegate::{post_init_downstream, Delegate, DownstreamId, DownstreamState}, endpoint::Deregister, metrics::*, old_value::{near_seek_old_value, new_old_value_cursor, OldValueCursors}, - service::ConnID, + service::ConnId, Error, Result, Task, }; @@ -81,10 +81,10 @@ pub(crate) struct Initializer { pub(crate) region_id: u64, pub(crate) region_epoch: RegionEpoch, - pub(crate) observe_id: ObserveID, - pub(crate) downstream_id: DownstreamID, + pub(crate) observe_id: ObserveId, + pub(crate) downstream_id: DownstreamId, pub(crate) downstream_state: Arc>, - pub(crate) conn_id: ConnID, + pub(crate) conn_id: ConnId, pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, @@ -632,10 +632,10 @@ mod tests { region_id: 1, region_epoch: RegionEpoch::default(), - observe_id: ObserveID::new(), - downstream_id: DownstreamID::new(), + observe_id: ObserveId::new(), + downstream_id: DownstreamId::new(), downstream_state, - conn_id: ConnID::new(), + conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), speed_limiter: Limiter::new(speed_limit as _), diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index 124757d7697..d7da79c0361 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -28,7 +28,7 @@ use crate::{ #[derive(Clone, Debug, Eq, PartialEq)] pub struct RawRegionTs { pub region_id: u64, - pub cdc_id: ObserveID, + pub cdc_id: ObserveId, pub max_ts: TimeStamp, } @@ -42,7 +42,7 @@ pub struct CdcObserver { sched: Scheduler, // A shared registry for managing observed regions. // TODO: it may become a bottleneck, find a better way to manage the registry. - observe_regions: Arc>>, + observe_regions: Arc>>, api_version: ApiVersion, } @@ -76,8 +76,8 @@ impl CdcObserver { /// Subscribe an region, the observer will sink events of the region into /// its scheduler. /// - /// Return previous ObserveID if there is one. - pub fn subscribe_region(&self, region_id: u64, observe_id: ObserveID) -> Option { + /// Return previous ObserveId if there is one. + pub fn subscribe_region(&self, region_id: u64, observe_id: ObserveId) -> Option { self.observe_regions .write() .unwrap() @@ -87,9 +87,9 @@ impl CdcObserver { /// Stops observe the region. /// /// Return ObserverID if unsubscribe successfully. - pub fn unsubscribe_region(&self, region_id: u64, observe_id: ObserveID) -> Option { + pub fn unsubscribe_region(&self, region_id: u64, observe_id: ObserveId) -> Option { let mut regions = self.observe_regions.write().unwrap(); - // To avoid ABA problem, we must check the unique ObserveID. + // To avoid ABA problem, we must check the unique ObserveId. if let Some(oid) = regions.get(®ion_id) { if *oid == observe_id { return regions.remove(®ion_id); @@ -99,7 +99,7 @@ impl CdcObserver { } /// Check whether the region is subscribed or not. - pub fn is_subscribed(&self, region_id: u64) -> Option { + pub fn is_subscribed(&self, region_id: u64) -> Option { self.observe_regions .read() .unwrap() @@ -364,7 +364,7 @@ mod tests { observer.on_role_change(&mut ctx, &RoleChange::new(StateRole::Follower)); rx.recv_timeout(Duration::from_millis(10)).unwrap_err(); - let oid = ObserveID::new(); + let oid = ObserveId::new(); observer.subscribe_region(1, oid); let mut ctx = ObserverContext::new(®ion); @@ -440,7 +440,7 @@ mod tests { }; // unsubscribed fail if observer id is different. - assert_eq!(observer.unsubscribe_region(1, ObserveID::new()), None); + assert_eq!(observer.unsubscribe_region(1, ObserveId::new()), None); // No event if it is unsubscribed. let oid_ = observer.unsubscribe_region(1, oid).unwrap(); diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index 80d0f8c47a4..e7bec568f67 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -26,7 +26,7 @@ use tikv_util::{error, info, warn, worker::*}; use crate::{ channel::{channel, MemoryQuota, Sink, CDC_CHANNLE_CAPACITY}, - delegate::{Downstream, DownstreamID, DownstreamState}, + delegate::{Downstream, DownstreamId, DownstreamState}, endpoint::{Deregister, Task}, }; @@ -34,15 +34,15 @@ static CONNECTION_ID_ALLOC: AtomicUsize = AtomicUsize::new(0); /// A unique identifier of a Connection. #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -pub struct ConnID(usize); +pub struct ConnId(usize); -impl ConnID { - pub fn new() -> ConnID { - ConnID(CONNECTION_ID_ALLOC.fetch_add(1, Ordering::SeqCst)) +impl ConnId { + pub fn new() -> ConnId { + ConnId(CONNECTION_ID_ALLOC.fetch_add(1, Ordering::SeqCst)) } } -impl Default for ConnID { +impl Default for ConnId { fn default() -> Self { Self::new() } @@ -74,10 +74,10 @@ impl FeatureGate { } pub struct Conn { - id: ConnID, + id: ConnId, sink: Sink, - // region id -> DownstreamID - downstreams: HashMap>)>, + // region id -> DownstreamId + downstreams: HashMap>)>, peer: String, version: Option<(semver::Version, FeatureGate)>, } @@ -85,7 +85,7 @@ pub struct Conn { impl Conn { pub fn new(sink: Sink, peer: String) -> Conn { Conn { - id: ConnID::new(), + id: ConnId::new(), sink, downstreams: HashMap::default(), version: None, @@ -132,19 +132,19 @@ impl Conn { &self.peer } - pub fn get_id(&self) -> ConnID { + pub fn get_id(&self) -> ConnId { self.id } pub fn get_downstreams( &self, - ) -> &HashMap>)> { + ) -> &HashMap>)> { &self.downstreams } pub fn take_downstreams( self, - ) -> HashMap>)> { + ) -> HashMap>)> { self.downstreams } @@ -155,7 +155,7 @@ impl Conn { pub fn subscribe( &mut self, region_id: u64, - downstream_id: DownstreamID, + downstream_id: DownstreamId, downstream_state: Arc>, ) -> bool { match self.downstreams.entry(region_id) { @@ -171,7 +171,7 @@ impl Conn { self.downstreams.remove(®ion_id); } - pub fn downstream_id(&self, region_id: u64) -> Option { + pub fn downstream_id(&self, region_id: u64) -> Option { self.downstreams.get(®ion_id).map(|x| x.0) } } diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index ebb18e92de5..f749f78851c 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -307,7 +307,7 @@ pub fn to_raw_perf_level(level: engine_traits::PerfLevel) -> rocksdb::PerfLevel engine_traits::PerfLevel::EnableTimeExceptForMutex => { rocksdb::PerfLevel::EnableTimeExceptForMutex } - engine_traits::PerfLevel::EnableTimeAndCPUTimeExceptForMutex => { + engine_traits::PerfLevel::EnableTimeAndCpuTimeExceptForMutex => { rocksdb::PerfLevel::EnableTimeAndCPUTimeExceptForMutex } engine_traits::PerfLevel::EnableTime => rocksdb::PerfLevel::EnableTime, @@ -324,7 +324,7 @@ pub fn from_raw_perf_level(level: rocksdb::PerfLevel) -> engine_traits::PerfLeve engine_traits::PerfLevel::EnableTimeExceptForMutex } rocksdb::PerfLevel::EnableTimeAndCPUTimeExceptForMutex => { - engine_traits::PerfLevel::EnableTimeAndCPUTimeExceptForMutex + engine_traits::PerfLevel::EnableTimeAndCpuTimeExceptForMutex } rocksdb::PerfLevel::EnableTime => engine_traits::PerfLevel::EnableTime, rocksdb::PerfLevel::OutOfBounds => engine_traits::PerfLevel::OutOfBounds, diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index 6784891921b..c9960b50753 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -137,7 +137,7 @@ pub enum Error { #[error("{0:?}")] Other(#[from] Box), #[error("CF {0} not found")] - CFName(String), + CfName(String), #[error("Codec {0}")] Codec(#[from] tikv_util::codec::Error), #[error("The entries of region is unavailable")] @@ -155,7 +155,7 @@ impl ErrorCodeExt for Error { Error::NotInRange { .. } => error_code::engine::NOT_IN_RANGE, Error::Protobuf(_) => error_code::engine::PROTOBUF, Error::Io(_) => error_code::engine::IO, - Error::CFName(_) => error_code::engine::CF_NAME, + Error::CfName(_) => error_code::engine::CF_NAME, Error::Codec(_) => error_code::engine::CODEC, Error::Other(_) => error_code::UNKNOWN, Error::EntriesUnavailable => error_code::engine::DATALOSS, diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 0e709d164bd..72794fba5cd 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -191,7 +191,7 @@ //! //! At the end of this phase the `engine` crate will be deleted. //! -//! ## 3) "Pulling up" the generic abstractions through TiKv +//! ## 3) "Pulling up" the generic abstractions through TiKV //! //! With all of TiKV using the `engine_traits` traits in conjunction with the //! concrete `engine_rocks` types, we can push generic type parameters up diff --git a/components/engine_traits/src/perf_context.rs b/components/engine_traits/src/perf_context.rs index 56351fbeca5..ba48974a460 100644 --- a/components/engine_traits/src/perf_context.rs +++ b/components/engine_traits/src/perf_context.rs @@ -8,7 +8,7 @@ pub enum PerfLevel { Disable, EnableCount, EnableTimeExceptForMutex, - EnableTimeAndCPUTimeExceptForMutex, + EnableTimeAndCpuTimeExceptForMutex, EnableTime, OutOfBounds, } @@ -18,7 +18,7 @@ numeric_enum_serializing_mod! {perf_level_serde PerfLevel { Disable = 1, EnableCount = 2, EnableTimeExceptForMutex = 3, - EnableTimeAndCPUTimeExceptForMutex = 4, + EnableTimeAndCpuTimeExceptForMutex = 4, EnableTime = 5, OutOfBounds = 6, }} diff --git a/components/error_code/src/backup_stream.rs b/components/error_code/src/backup_stream.rs index fa11ff5b37d..9448169cc05 100644 --- a/components/error_code/src/backup_stream.rs +++ b/components/error_code/src/backup_stream.rs @@ -3,7 +3,7 @@ define_error_codes! { "KV:LogBackup:", - ETCD => ("ETCD", + ETCD => ("Etcd", "Error during requesting the meta store(etcd)", "Please check the connectivity between TiKV and PD."), PROTO => ("Proto", @@ -23,7 +23,7 @@ define_error_codes! { "Malformed metadata found.", "The metadata format is unexpected, please check the compatibility between TiKV / BR." ), - IO => ("IO", + IO => ("Io", "Error during doing Input / Output operations.", "This is a generic error, please check the error message for further information." ), @@ -35,7 +35,7 @@ define_error_codes! { "Error during scheduling internal task.", "This is an internal error, and may happen if there are too many changes to observe, please ask the community for help." ), - PD => ("PD", + PD => ("Pd", "Error during requesting the Placement Driver.", "Please check the connectivity between TiKV and PD." ), diff --git a/components/error_code/src/causal_ts.rs b/components/error_code/src/causal_ts.rs index a5b2884a151..3f7f4e2a17e 100644 --- a/components/error_code/src/causal_ts.rs +++ b/components/error_code/src/causal_ts.rs @@ -4,9 +4,9 @@ define_error_codes!( "KV:CausalTs:", PD => ("PdClient", "", ""), - TSO => ("TSO", "", ""), - TSO_BATCH_USED_UP => ("TSO batch used up", "", ""), - BATCH_RENEW => ("Batch renew", "", ""), + TSO => ("Tso", "", ""), + TSO_BATCH_USED_UP => ("TsoBatchUsedUp", "", ""), + BATCH_RENEW => ("BatchRenew", "", ""), UNKNOWN => ("Unknown", "", "") ); diff --git a/components/error_code/src/cloud.rs b/components/error_code/src/cloud.rs index 63841761e7c..510481679dd 100644 --- a/components/error_code/src/cloud.rs +++ b/components/error_code/src/cloud.rs @@ -3,8 +3,8 @@ define_error_codes!( "KV:Cloud:", - IO => ("IO", "", ""), - SSL => ("SSL", "", ""), + IO => ("Io", "", ""), + SSL => ("Ssl", "", ""), PROTO => ("Proto", "", ""), UNKNOWN => ("Unknown", "", ""), TIMEOUT => ("Timeout", "", ""), diff --git a/components/error_code/src/encryption.rs b/components/error_code/src/encryption.rs index 069e98e3e6c..4204db84864 100644 --- a/components/error_code/src/encryption.rs +++ b/components/error_code/src/encryption.rs @@ -4,7 +4,7 @@ define_error_codes!( "KV:Encryption:", ROCKS => ("Rocks", "", ""), - IO => ("IO", "", ""), + IO => ("Io", "", ""), CRYPTER => ("Crypter", "", ""), PROTO => ("Proto", "", ""), UNKNOWN_ENCRYPTION => ("UnknownEncryption", "", ""), diff --git a/components/error_code/src/engine.rs b/components/error_code/src/engine.rs index d29d658cb69..4bb66f09753 100644 --- a/components/error_code/src/engine.rs +++ b/components/error_code/src/engine.rs @@ -6,8 +6,8 @@ define_error_codes!( ENGINE => ("Engine", "", ""), NOT_IN_RANGE => ("NotInRange", "", ""), PROTOBUF => ("Protobuf", "", ""), - IO => ("IO", "", ""), - CF_NAME => ("CFName", "", ""), + IO => ("Io", "", ""), + CF_NAME => ("CfName", "", ""), CODEC => ("Codec", "", ""), DATALOSS => ("DataLoss", "", ""), DATACOMPACTED => ("DataCompacted", "", "") diff --git a/components/error_code/src/pd.rs b/components/error_code/src/pd.rs index 018c86c3d39..3ca2ac0b29f 100644 --- a/components/error_code/src/pd.rs +++ b/components/error_code/src/pd.rs @@ -1,13 +1,13 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. define_error_codes!( - "KV:PD:", + "KV:Pd:", - IO => ("IO", "", ""), + IO => ("Io", "", ""), CLUSTER_BOOTSTRAPPED => ("ClusterBootstraped", "", ""), CLUSTER_NOT_BOOTSTRAPPED => ("ClusterNotBootstraped", "", ""), INCOMPATIBLE => ("Imcompatible", "", ""), - GRPC => ("gRPC", "", ""), + GRPC => ("Grpc", "", ""), STREAM_DISCONNECT => ("StreamDisconnect","",""), REGION_NOT_FOUND => ("RegionNotFound", "", ""), STORE_TOMBSTONE => ("StoreTombstone", "", ""), diff --git a/components/error_code/src/raftstore.rs b/components/error_code/src/raftstore.rs index 4d38de92284..2926c69c21e 100644 --- a/components/error_code/src/raftstore.rs +++ b/components/error_code/src/raftstore.rs @@ -19,7 +19,7 @@ define_error_codes!( STALE_COMMAND => ("StaleCommand", "", ""), TRANSPORT => ("Transport", "", ""), COPROCESSOR => ("Coprocessor", "", ""), - IO => ("IO", "", ""), + IO => ("Io", "", ""), PROTOBUF => ("Protobuf", "", ""), ADDR_PARSE => ("AddressParse", "", ""), TIMEOUT => ("Timeout", "", ""), diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index e24209c92a1..2eb6177458b 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -1,13 +1,13 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. define_error_codes!( - "KV:SSTImporter:", + "KV:SstImporter:", IO => ("Io", "", ""), - GRPC => ("gRPC", "", ""), + GRPC => ("Grpc", "", ""), UUID => ("Uuid", "", ""), FUTURE => ("Future", "", ""), - ROCKSDB => ("RocksDB", "", ""), + ROCKSDB => ("RocksDb", "", ""), PARSE_INT_ERROR => ("ParseIntError", "", ""), FILE_EXISTS => ("FileExists", "", ""), FILE_CORRUPTED => ("FileCorrupted", "", ""), diff --git a/components/error_code/src/storage.rs b/components/error_code/src/storage.rs index 5336ab80bb0..61b81215438 100644 --- a/components/error_code/src/storage.rs +++ b/components/error_code/src/storage.rs @@ -10,12 +10,12 @@ define_error_codes!( SCHED_TOO_BUSY => ("SchedTooBusy", "", ""), GC_WORKER_TOO_BUSY => ("GcWorkerTooBusy", "", ""), KEY_TOO_LARGE => ("KeyTooLarge", "", ""), - INVALID_CF => ("InvalidCF", "", ""), - CF_DEPRECATED => ("CFDeprecated", "", ""), + INVALID_CF => ("InvalidCf", "", ""), + CF_DEPRECATED => ("CfDeprecated", "", ""), TTL_NOT_ENABLED => ("TtlNotEnabled", "", ""), TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""), PROTOBUF => ("Protobuf", "", ""), - INVALID_TXN_TSO => ("INVALIDTXNTSO", "", ""), + INVALID_TXN_TSO => ("InvalidTxnTso", "", ""), INVALID_REQ_RANGE => ("InvalidReqRange", "", ""), BAD_FORMAT_LOCK => ("BadFormatLock", "", ""), BAD_FORMAT_WRITE => ("BadFormatWrite", "",""), diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index 07856ebe9c0..ceb772bee6e 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -33,17 +33,17 @@ thread_local! { } #[derive(Debug)] -struct ThreadID { +struct ThreadId { pid: Pid, tid: Pid, proc_reader: Option>, } -impl ThreadID { - fn current() -> ThreadID { +impl ThreadId { + fn current() -> ThreadId { let pid = thread::process_id(); let tid = thread::thread_id(); - ThreadID { + ThreadId { pid, tid, proc_reader: None, @@ -102,7 +102,7 @@ impl ThreadID { } struct LocalIoStats { - id: ThreadID, + id: ThreadId, io_type: IoType, last_flushed: IoBytes, } @@ -110,7 +110,7 @@ struct LocalIoStats { impl LocalIoStats { fn current() -> Self { LocalIoStats { - id: ThreadID::current(), + id: ThreadId::current(), io_type: IoType::Other, last_flushed: IoBytes::default(), } @@ -197,7 +197,7 @@ mod tests { fn test_read_bytes() { let tmp = tempdir_in("/var/tmp").unwrap_or_else(|_| tempdir().unwrap()); let file_path = tmp.path().join("test_read_bytes.txt"); - let mut id = ThreadID::current(); + let mut id = ThreadId::current(); let _type = WithIoType::new(IoType::Compaction); { let mut f = OpenOptions::new() @@ -229,7 +229,7 @@ mod tests { fn test_write_bytes() { let tmp = tempdir_in("/var/tmp").unwrap_or_else(|_| tempdir().unwrap()); let file_path = tmp.path().join("test_write_bytes.txt"); - let mut id = ThreadID::current(); + let mut id = ThreadId::current(); let _type = WithIoType::new(IoType::Compaction); let mut f = OpenOptions::new() .write(true) @@ -250,7 +250,7 @@ mod tests { #[bench] fn bench_fetch_thread_io_bytes(b: &mut test::Bencher) { - let mut id = ThreadID::current(); + let mut id = ThreadId::current(); b.iter(|| id.fetch_io_bytes().unwrap()); } } diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 9f82c90968b..82313ae7d4e 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -295,34 +295,34 @@ static OBSERVE_ID_ALLOC: AtomicUsize = AtomicUsize::new(0); /// A unique identifier for checking stale observed commands. #[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct ObserveID(usize); +pub struct ObserveId(usize); -impl ObserveID { - pub fn new() -> ObserveID { - ObserveID(OBSERVE_ID_ALLOC.fetch_add(1, Ordering::SeqCst)) +impl ObserveId { + pub fn new() -> ObserveId { + ObserveId(OBSERVE_ID_ALLOC.fetch_add(1, Ordering::SeqCst)) } } /// ObserveHandle is the status of a term of observing, it contains the -/// `ObserveID` and the `observing` flag indicate whether the observing is +/// `ObserveId` and the `observing` flag indicate whether the observing is /// ongoing #[derive(Clone, Default, Debug)] pub struct ObserveHandle { - pub id: ObserveID, + pub id: ObserveId, observing: Arc, } impl ObserveHandle { pub fn new() -> ObserveHandle { ObserveHandle { - id: ObserveID::new(), + id: ObserveId::new(), observing: Arc::new(AtomicBool::new(true)), } } pub fn with_id(id: usize) -> ObserveHandle { ObserveHandle { - id: ObserveID(id), + id: ObserveId(id), observing: Arc::new(AtomicBool::new(true)), } } @@ -412,9 +412,9 @@ pub enum ObserveLevel { #[derive(Clone, Debug)] pub struct CmdBatch { pub level: ObserveLevel, - pub cdc_id: ObserveID, - pub rts_id: ObserveID, - pub pitr_id: ObserveID, + pub cdc_id: ObserveId, + pub rts_id: ObserveId, + pub pitr_id: ObserveId, pub region_id: u64, pub cmds: Vec, } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 1b707a42921..db62674e6a5 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -692,7 +692,7 @@ fn timespec_to_u64(ts: Timespec) -> u64 { /// /// # Panics /// -/// If nsec is negative or GE than 1_000_000_000(nano seconds pre second). +/// If nsec (nano seconds pre second) is not in [0, 1_000_000_000) range. #[inline] pub(crate) fn u64_to_timespec(u: u64) -> Timespec { let sec = u >> TIMESPEC_SEC_SHIFT; diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 6a6aa53103d..97e8ee85d86 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -186,9 +186,9 @@ where id: u64, duration: RaftstoreDuration, }, - UpdateRegionCPUCollector(bool), - RegionCPURecords(Arc), - ReportMinResolvedTS { + UpdateRegionCpuCollector(bool), + RegionCpuRecords(Arc), + ReportMinResolvedTs { store_id: u64, min_resolved_ts: u64, }, @@ -409,16 +409,16 @@ where Task::UpdateSlowScore { id, ref duration } => { write!(f, "compute slow score: id {}, duration {:?}", id, duration) } - Task::UpdateRegionCPUCollector(is_register) => { + Task::UpdateRegionCpuCollector(is_register) => { if is_register { return write!(f, "register region cpu collector"); } write!(f, "deregister region cpu collector") } - Task::RegionCPURecords(ref cpu_records) => { + Task::RegionCpuRecords(ref cpu_records) => { write!(f, "get region cpu records: {:?}", cpu_records) } - Task::ReportMinResolvedTS { + Task::ReportMinResolvedTs { store_id, min_resolved_ts, } => { @@ -625,8 +625,8 @@ where ) { let start_time = TiInstant::now(); match auto_split_controller.refresh_and_check_cfg() { - SplitConfigChange::UpdateRegionCPUCollector(is_register) => { - if let Err(e) = scheduler.schedule(Task::UpdateRegionCPUCollector(is_register)) { + SplitConfigChange::UpdateRegionCpuCollector(is_register) => { + if let Err(e) = scheduler.schedule(Task::UpdateRegionCpuCollector(is_register)) { error!( "failed to register or deregister the region cpu collector"; "is_register" => is_register, @@ -680,7 +680,7 @@ where .min() .unwrap_or(0) }); - let task = Task::ReportMinResolvedTS { + let task = Task::ReportMinResolvedTs { store_id, min_resolved_ts, }; @@ -839,8 +839,8 @@ impl SlowScore { } } -// RegionCPUMeteringCollector is used to collect the region-related CPU info. -struct RegionCPUMeteringCollector +// RegionCpuMeteringCollector is used to collect the region-related CPU info. +struct RegionCpuMeteringCollector where EK: KvEngine, ER: RaftEngine, @@ -848,24 +848,24 @@ where scheduler: Scheduler>, } -impl RegionCPUMeteringCollector +impl RegionCpuMeteringCollector where EK: KvEngine, ER: RaftEngine, { - fn new(scheduler: Scheduler>) -> RegionCPUMeteringCollector { - RegionCPUMeteringCollector { scheduler } + fn new(scheduler: Scheduler>) -> RegionCpuMeteringCollector { + RegionCpuMeteringCollector { scheduler } } } -impl Collector for RegionCPUMeteringCollector +impl Collector for RegionCpuMeteringCollector where EK: KvEngine, ER: RaftEngine, { fn collect(&self, records: Arc) { self.scheduler - .schedule(Task::RegionCPURecords(records)) + .schedule(Task::RegionCpuRecords(records)) .ok(); } } @@ -940,7 +940,7 @@ where > 0.0 { region_cpu_records_collector = Some(collector_reg_handle.register( - Box::new(RegionCPUMeteringCollector::new(scheduler.clone())), + Box::new(RegionCpuMeteringCollector::new(scheduler.clone())), false, )); } @@ -1040,7 +1040,7 @@ where return; } self.region_cpu_records_collector = Some(self.collector_reg_handle.register( - Box::new(RegionCPUMeteringCollector::new(self.scheduler.clone())), + Box::new(RegionCpuMeteringCollector::new(self.scheduler.clone())), false, )); } @@ -2015,11 +2015,11 @@ where } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => self.slow_score.record(id, duration.sum()), - Task::UpdateRegionCPUCollector(is_register) => { + Task::UpdateRegionCpuCollector(is_register) => { self.handle_update_region_cpu_collector(is_register) } - Task::RegionCPURecords(records) => self.handle_region_cpu_records(records), - Task::ReportMinResolvedTS { + Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), + Task::ReportMinResolvedTs { store_id, min_resolved_ts, } => self.handle_report_min_resolved_ts(store_id, min_resolved_ts), diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index addedc3d653..7c698905b72 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -555,7 +555,7 @@ impl SplitInfo { #[derive(PartialEq, Debug)] pub enum SplitConfigChange { Noop, - UpdateRegionCPUCollector(bool), + UpdateRegionCpuCollector(bool), } pub struct AutoSplitController { @@ -927,12 +927,12 @@ impl AutoSplitController { if self.cfg.region_cpu_overload_threshold_ratio <= 0.0 && incoming.region_cpu_overload_threshold_ratio > 0.0 { - cfg_change = SplitConfigChange::UpdateRegionCPUCollector(true); + cfg_change = SplitConfigChange::UpdateRegionCpuCollector(true); } if self.cfg.region_cpu_overload_threshold_ratio > 0.0 && incoming.region_cpu_overload_threshold_ratio <= 0.0 { - cfg_change = SplitConfigChange::UpdateRegionCPUCollector(false); + cfg_change = SplitConfigChange::UpdateRegionCpuCollector(false); } self.cfg = incoming.clone(); } @@ -1638,7 +1638,7 @@ mod tests { ); assert_eq!( auto_split_controller.refresh_and_check_cfg(), - SplitConfigChange::UpdateRegionCPUCollector(false), + SplitConfigChange::UpdateRegionCpuCollector(false), ); assert_eq!( auto_split_controller @@ -1658,7 +1658,7 @@ mod tests { ); assert_eq!( auto_split_controller.refresh_and_check_cfg(), - SplitConfigChange::UpdateRegionCPUCollector(true), + SplitConfigChange::UpdateRegionCpuCollector(true), ); assert_eq!( auto_split_controller diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index e1c23652db8..57bf20e7d0b 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -106,8 +106,8 @@ impl AdvanceTsWorker { // Sync with concurrency manager so that it can work correctly when // optimizations like async commit is enabled. - // Note: This step must be done before scheduling `Task::MinTS` task, and the - // resolver must be checked in or after `Task::MinTS`' execution. + // Note: This step must be done before scheduling `Task::MinTs` task, and the + // resolver must be checked in or after `Task::MinTs`' execution. cm.update_max_ts(min_ts); if let Some(min_mem_lock_ts) = cm.global_min_lock_ts() { if min_mem_lock_ts < min_ts { diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 5a180a9b6c8..f2920e2af69 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -18,7 +18,7 @@ use kvproto::{metapb::Region, raft_cmdpb::AdminCmdType}; use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; use pd_client::PdClient; use raftstore::{ - coprocessor::{CmdBatch, ObserveHandle, ObserveID}, + coprocessor::{CmdBatch, ObserveHandle, ObserveId}, router::RaftStoreRouter, store::{ fsm::StoreMeta, @@ -458,7 +458,7 @@ where } // Deregister current observed region and try to register it again. - fn re_register_region(&mut self, region_id: u64, observe_id: ObserveID, cause: String) { + fn re_register_region(&mut self, region_id: u64, observe_id: ObserveId, cause: String) { if let Some(observe_region) = self.regions.get(®ion_id) { if observe_region.handle.id != observe_id { warn!("resolved ts deregister region failed due to observe_id not match"); @@ -554,7 +554,7 @@ where fn handle_scan_locks( &mut self, region_id: u64, - observe_id: ObserveID, + observe_id: ObserveId, entries: Vec, apply_index: u64, ) { @@ -622,7 +622,7 @@ pub enum Task { }, ReRegisterRegion { region_id: u64, - observe_id: ObserveID, + observe_id: ObserveId, cause: String, }, RegisterAdvanceEvent { @@ -638,7 +638,7 @@ pub enum Task { }, ScanLocks { region_id: u64, - observe_id: ObserveID, + observe_id: ObserveId, entries: Vec, apply_index: u64, }, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 396fc7333da..4266103933f 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -6,7 +6,7 @@ use engine_traits::KvEngine; use futures::compat::Future01CompatExt; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb::Region}; use raftstore::{ - coprocessor::{ObserveHandle, ObserveID}, + coprocessor::{ObserveHandle, ObserveId}, router::RaftStoreRouter, store::{ fsm::ChangeObserver, @@ -33,7 +33,7 @@ const GET_SNAPSHOT_RETRY_TIME: u32 = 3; const GET_SNAPSHOT_RETRY_BACKOFF_STEP: Duration = Duration::from_millis(25); pub type BeforeStartCallback = Box; -pub type OnErrorCallback = Box; +pub type OnErrorCallback = Box; pub type OnEntriesCallback = Box, u64) + Send>; pub type IsCancelledCallback = Box bool + Send>; diff --git a/components/resolved_ts/src/sinker.rs b/components/resolved_ts/src/sinker.rs index 29eebce02ed..383e5f7acc7 100644 --- a/components/resolved_ts/src/sinker.rs +++ b/components/resolved_ts/src/sinker.rs @@ -3,14 +3,14 @@ use std::marker::PhantomData; use engine_traits::Snapshot; -use raftstore::{coprocessor::ObserveID, store::RegionSnapshot}; +use raftstore::{coprocessor::ObserveId, store::RegionSnapshot}; use txn_types::TimeStamp; use crate::cmd::ChangeLog; pub struct SinkCmd { pub region_id: u64, - pub observe_id: ObserveID, + pub observe_id: ObserveId, pub logs: Vec, } diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index 7ada07d5206..29144c8ca18 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -215,7 +215,7 @@ fn run_dump_raft_engine_worker( #[cfg(test)] mod tests { - use tikv::config::TiKvConfig; + use tikv::config::TikvConfig; use super::*; @@ -230,7 +230,7 @@ mod tests { raftdb_wal_path.push("test-wal"); } - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.raft_store.raftdb_path = raftdb_path.to_str().unwrap().to_owned(); cfg.raftdb.wal_dir = raftdb_wal_path.to_str().unwrap().to_owned(); cfg.raft_engine.mut_config().dir = raft_engine_path.to_str().unwrap().to_owned(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index fe2b0dd1c26..1cb6a9b3b65 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -9,7 +9,7 @@ //! The entry point is `run_tikv`. //! //! Components are often used to initialize other components, and/or must be -//! explicitly stopped. We keep these components in the `TiKvServer` struct. +//! explicitly stopped. We keep these components in the `TikvServer` struct. use std::{ cmp, @@ -82,7 +82,7 @@ use raftstore::{ }; use security::SecurityManager; use tikv::{ - config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TiKvConfig}, + config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, @@ -140,10 +140,10 @@ const SYSTEM_HEALTHY_THRESHOLD: f64 = 0.50; const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu #[inline] -fn run_impl(config: TiKvConfig) { - let mut tikv = TiKvServer::::init::(config); +fn run_impl(config: TikvConfig) { + let mut tikv = TikvServer::::init::(config); - // Must be called after `TiKvServer::init`. + // Must be called after `TikvServer::init`. let memory_limit = tikv.config.memory_usage_limit.unwrap().0; let high_water = (tikv.config.memory_usage_high_water * memory_limit as f64) as u64; register_memory_usage_high_water(high_water); @@ -170,7 +170,7 @@ fn run_impl(config: TiKvConfig) { /// Run a TiKV server. Returns when the server is shutdown by the user, in which /// case the server will be properly stopped. -pub fn run_tikv(config: TiKvConfig) { +pub fn run_tikv(config: TikvConfig) { // Sets the global logger ASAP. // It is okay to use the config w/o `validate()`, // because `initial_logger()` handles various conditions. @@ -207,8 +207,8 @@ const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); /// A complete TiKV server. -struct TiKvServer { - config: TiKvConfig, +struct TikvServer { + config: TikvConfig, cfg_controller: Option, security_mgr: Arc, pd_client: Arc, @@ -221,7 +221,7 @@ struct TiKvServer { store_path: PathBuf, snap_mgr: Option, // Will be filled in `init_servers`. encryption_key_manager: Option>, - engines: Option>, + engines: Option>, servers: Option>, region_info_accessor: RegionInfoAccessor, coprocessor_host: Option>, @@ -236,7 +236,7 @@ struct TiKvServer { tablet_factory: Option + Send + Sync>>, } -struct TiKvEngines { +struct TikvEngines { engines: Engines, store_meta: Arc>, engine: RaftKv>, @@ -257,11 +257,11 @@ type LocalServer = Server, resolve::PdStoreAddrResolver, LocalRaftKv>; type LocalRaftKv = RaftKv>; -impl TiKvServer +impl TikvServer where ER: RaftEngine, { - fn init(mut config: TiKvConfig) -> TiKvServer { + fn init(mut config: TikvConfig) -> TikvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -331,7 +331,7 @@ where info!("Causal timestamp provider startup."); } - TiKvServer { + TikvServer { config, cfg_controller: Some(cfg_controller), security_mgr, @@ -373,7 +373,7 @@ where /// - If the config can't pass `validate()` /// - If the max open file descriptor limit is not high enough to support /// the main database and the raft database. - fn init_config(mut config: TiKvConfig) -> ConfigController { + fn init_config(mut config: TikvConfig) -> ConfigController { validate_and_persist_config(&mut config, true); ensure_dir_exist(&config.storage.data_dir).unwrap(); @@ -408,7 +408,7 @@ where } fn connect_to_pd_cluster( - config: &mut TiKvConfig, + config: &mut TikvConfig, env: Arc, security_mgr: Arc, ) -> Arc { @@ -573,7 +573,7 @@ where engines.kv.clone(), ); - self.engines = Some(TiKvEngines { + self.engines = Some(TikvEngines { engines, store_meta, engine, @@ -813,7 +813,7 @@ where cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( - tikv::config::Module::CDC, + tikv::config::Module::Cdc, Box::new(CdcConfigManager(cdc_worker.scheduler())), ); @@ -1535,7 +1535,7 @@ where pub trait ConfiguredRaftEngine: RaftEngine { fn build( - _: &TiKvConfig, + _: &TikvConfig, _: &Arc, _: &Option>, _: &Option, @@ -1548,7 +1548,7 @@ pub trait ConfiguredRaftEngine: RaftEngine { impl ConfiguredRaftEngine for RocksEngine { fn build( - config: &TiKvConfig, + config: &TikvConfig, env: &Arc, key_manager: &Option>, block_cache: &Option, @@ -1600,7 +1600,7 @@ impl ConfiguredRaftEngine for RocksEngine { impl ConfiguredRaftEngine for RaftLogEngine { fn build( - config: &TiKvConfig, + config: &TikvConfig, env: &Arc, key_manager: &Option>, block_cache: &Option, @@ -1637,7 +1637,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { } } -impl TiKvServer { +impl TikvServer { fn init_raw_engines( &mut self, flow_listener: engine_rocks::FlowListener, @@ -1724,7 +1724,7 @@ fn pre_start() { } } -fn check_system_config(config: &TiKvConfig) { +fn check_system_config(config: &TikvConfig) { info!("beginning system configuration check"); let mut rocksdb_max_open_files = config.rocksdb.max_open_files; if config.rocksdb.titan.enabled { diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index 4f49f6fb86e..5742eda8bc8 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -10,7 +10,7 @@ use std::{ use chrono::Local; use clap::ArgMatches; use collections::HashMap; -use tikv::config::{check_critical_config, persist_config, MetricConfig, TiKvConfig}; +use tikv::config::{check_critical_config, persist_config, MetricConfig, TikvConfig}; use tikv_util::{self, config, logger}; // A workaround for checking if log is initialized. @@ -74,7 +74,7 @@ fn make_engine_log_path(path: &str, sub_path: &str, filename: &str) -> String { } #[allow(dead_code)] -pub fn initial_logger(config: &TiKvConfig) { +pub fn initial_logger(config: &TikvConfig) { let rocksdb_info_log_path = if !config.rocksdb.info_log_dir.is_empty() { make_engine_log_path(&config.rocksdb.info_log_dir, "", DEFAULT_ROCKSDB_LOG_FILE) } else { @@ -142,7 +142,7 @@ pub fn initial_logger(config: &TiKvConfig) { rocksdb: R, raftdb: T, slow: Option, - config: &TiKvConfig, + config: &TikvConfig, ) where N: slog::Drain + Send + 'static, R: slog::Drain + Send + 'static, @@ -238,7 +238,7 @@ pub fn initial_metric(cfg: &MetricConfig) { } #[allow(dead_code)] -pub fn overwrite_config_with_cmd_args(config: &mut TiKvConfig, matches: &ArgMatches<'_>) { +pub fn overwrite_config_with_cmd_args(config: &mut TikvConfig, matches: &ArgMatches<'_>) { if let Some(level) = matches.value_of("log-level") { config.log.level = logger::get_level_by_string(level).unwrap().into(); config.log_level = slog::Level::Info.into(); @@ -303,7 +303,7 @@ pub fn overwrite_config_with_cmd_args(config: &mut TiKvConfig, matches: &ArgMatc } #[allow(dead_code)] -pub fn validate_and_persist_config(config: &mut TiKvConfig, persist: bool) { +pub fn validate_and_persist_config(config: &mut TikvConfig, persist: bool) { config.compatible_adjust(); if let Err(e) = config.validate() { fatal!("invalid configuration: {}", e); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index 3fc229aa6ee..51aabcbec01 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -19,7 +19,7 @@ pub fn error_inc(type_: &str, err: &Error) { Error::Io(..) => "io", Error::Grpc(..) => "grpc", Error::Uuid(..) => "uuid", - Error::RocksDB(..) => "rocksdb", + Error::RocksDb(..) => "rocksdb", Error::EngineTraits(..) => "engine_traits", Error::ParseIntError(..) => "parse_int", Error::FileExists(..) => "file_exists", @@ -52,7 +52,7 @@ pub enum Error { // FIXME: Remove concrete 'rocks' type #[error("RocksDB {0}")] - RocksDB(String), + RocksDb(String), #[error("Engine {0:?}")] EngineTraits(#[from] engine_traits::Error), @@ -140,7 +140,7 @@ impl Error { impl From for Error { fn from(msg: String) -> Self { - Self::RocksDB(msg) + Self::RocksDb(msg) } } @@ -161,7 +161,7 @@ impl ErrorCodeExt for Error { Error::Grpc(_) => error_code::sst_importer::GRPC, Error::Uuid(_) => error_code::sst_importer::UUID, Error::Future(_) => error_code::sst_importer::FUTURE, - Error::RocksDB(_) => error_code::sst_importer::ROCKSDB, + Error::RocksDb(_) => error_code::sst_importer::ROCKSDB, Error::EngineTraits(e) => e.error_code(), Error::ParseIntError(_) => error_code::sst_importer::PARSE_INT_ERROR, Error::FileExists(..) => error_code::sst_importer::FILE_EXISTS, diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index 210f17fc168..70d30569557 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -61,7 +61,7 @@ impl TxnSstWriter { fn check_api_version(&self, key: &[u8]) -> Result<()> { let mode = K::parse_key_mode(key); - if self.api_version == ApiVersion::V2 && mode != KeyMode::Txn && mode != KeyMode::TiDB { + if self.api_version == ApiVersion::V2 && mode != KeyMode::Txn && mode != KeyMode::Tidb { return Err(Error::invalid_key_mode( SstWriterType::Txn, self.api_version, diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index e6622128243..d7bed05eddd 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -24,7 +24,7 @@ use tidb_query_common::storage::{ }; use tikv::{ config::BackupConfig, - coprocessor::{checksum_crc64_xor, dag::TiKvStorage}, + coprocessor::{checksum_crc64_xor, dag::TikvStorage}, storage::{ kv::{Engine, SnapContext}, SnapshotStore, @@ -355,7 +355,7 @@ impl TestSuite { false, ); let mut scanner = RangesScanner::new(RangesScannerOptions { - storage: TiKvStorage::new(snap_store, false), + storage: TikvStorage::new(snap_store, false), ranges: vec![Range::Interval(IntervalRange::from((start, end)))], scan_backward_in_range: false, is_key_only: false, diff --git a/components/test_coprocessor/src/dag.rs b/components/test_coprocessor/src/dag.rs index 740ece83e1a..76e91cc6ef5 100644 --- a/components/test_coprocessor/src/dag.rs +++ b/components/test_coprocessor/src/dag.rs @@ -15,7 +15,7 @@ use tipb::{ use super::*; -pub struct DAGSelect { +pub struct DagSelect { pub execs: Vec, pub cols: Vec, pub order_by: Vec, @@ -27,8 +27,8 @@ pub struct DAGSelect { pub paging_size: Option, } -impl DAGSelect { - pub fn from(table: &Table) -> DAGSelect { +impl DagSelect { + pub fn from(table: &Table) -> DagSelect { let mut exec = Executor::default(); exec.set_tp(ExecType::TypeTableScan); let mut tbl_scan = TableScan::default(); @@ -38,7 +38,7 @@ impl DAGSelect { tbl_scan.set_columns(columns_info); exec.set_tbl_scan(tbl_scan); - DAGSelect { + DagSelect { execs: vec![exec], cols: table.columns_info(), order_by: vec![], @@ -51,7 +51,7 @@ impl DAGSelect { } } - pub fn from_index(table: &Table, index: &Column) -> DAGSelect { + pub fn from_index(table: &Table, index: &Column) -> DagSelect { let idx = index.index; let mut exec = Executor::default(); exec.set_tp(ExecType::TypeIndexScan); @@ -65,7 +65,7 @@ impl DAGSelect { exec.set_idx_scan(scan); let range = table.get_index_range_all(idx); - DAGSelect { + DagSelect { execs: vec![exec], cols: columns_info.to_vec(), order_by: vec![], @@ -79,13 +79,13 @@ impl DAGSelect { } #[must_use] - pub fn limit(mut self, n: u64) -> DAGSelect { + pub fn limit(mut self, n: u64) -> DagSelect { self.limit = Some(n); self } #[must_use] - pub fn order_by(mut self, col: &Column, desc: bool) -> DAGSelect { + pub fn order_by(mut self, col: &Column, desc: bool) -> DagSelect { let col_offset = offset_for_column(&self.cols, col.id); let mut item = ByItem::default(); let mut expr = Expr::default(); @@ -99,12 +99,12 @@ impl DAGSelect { } #[must_use] - pub fn count(self, col: &Column) -> DAGSelect { + pub fn count(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::Count) } #[must_use] - pub fn aggr_col(mut self, col: &Column, aggr_t: ExprType) -> DAGSelect { + pub fn aggr_col(mut self, col: &Column, aggr_t: ExprType) -> DagSelect { let col_offset = offset_for_column(&self.cols, col.id); let mut col_expr = Expr::default(); col_expr.set_field_type(col.as_field_type()); @@ -125,47 +125,47 @@ impl DAGSelect { } #[must_use] - pub fn first(self, col: &Column) -> DAGSelect { + pub fn first(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::First) } #[must_use] - pub fn sum(self, col: &Column) -> DAGSelect { + pub fn sum(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::Sum) } #[must_use] - pub fn avg(self, col: &Column) -> DAGSelect { + pub fn avg(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::Avg) } #[must_use] - pub fn max(self, col: &Column) -> DAGSelect { + pub fn max(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::Max) } #[must_use] - pub fn min(self, col: &Column) -> DAGSelect { + pub fn min(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::Min) } #[must_use] - pub fn bit_and(self, col: &Column) -> DAGSelect { + pub fn bit_and(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::AggBitAnd) } #[must_use] - pub fn bit_or(self, col: &Column) -> DAGSelect { + pub fn bit_or(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::AggBitOr) } #[must_use] - pub fn bit_xor(self, col: &Column) -> DAGSelect { + pub fn bit_xor(self, col: &Column) -> DagSelect { self.aggr_col(col, ExprType::AggBitXor) } #[must_use] - pub fn group_by(mut self, cols: &[&Column]) -> DAGSelect { + pub fn group_by(mut self, cols: &[&Column]) -> DagSelect { for col in cols { let offset = offset_for_column(&self.cols, col.id); let mut expr = Expr::default(); @@ -178,13 +178,13 @@ impl DAGSelect { } #[must_use] - pub fn output_offsets(mut self, output_offsets: Option>) -> DAGSelect { + pub fn output_offsets(mut self, output_offsets: Option>) -> DagSelect { self.output_offsets = output_offsets; self } #[must_use] - pub fn where_expr(mut self, expr: Expr) -> DAGSelect { + pub fn where_expr(mut self, expr: Expr) -> DagSelect { let mut exec = Executor::default(); exec.set_tp(ExecType::TypeSelection); let mut selection = Selection::default(); @@ -195,20 +195,20 @@ impl DAGSelect { } #[must_use] - pub fn desc(mut self, desc: bool) -> DAGSelect { + pub fn desc(mut self, desc: bool) -> DagSelect { self.execs[0].mut_tbl_scan().set_desc(desc); self } #[must_use] - pub fn paging_size(mut self, paging_size: u64) -> DAGSelect { + pub fn paging_size(mut self, paging_size: u64) -> DagSelect { assert_ne!(paging_size, 0); self.paging_size = Some(paging_size); self } #[must_use] - pub fn key_ranges(mut self, key_ranges: Vec) -> DAGSelect { + pub fn key_ranges(mut self, key_ranges: Vec) -> DagSelect { self.key_ranges = key_ranges; self } diff --git a/components/test_raftstore/src/config.rs b/components/test_raftstore/src/config.rs index 15748773409..a86b8eb1bf0 100644 --- a/components/test_raftstore/src/config.rs +++ b/components/test_raftstore/src/config.rs @@ -2,25 +2,25 @@ use std::ops::{Deref, DerefMut}; -use tikv::config::TiKvConfig; +use tikv::config::TikvConfig; #[derive(Clone)] pub struct Config { - pub tikv: TiKvConfig, + pub tikv: TikvConfig, pub prefer_mem: bool, } impl Deref for Config { - type Target = TiKvConfig; + type Target = TikvConfig; #[inline] - fn deref(&self) -> &TiKvConfig { + fn deref(&self) -> &TikvConfig { &self.tikv } } impl DerefMut for Config { #[inline] - fn deref_mut(&mut self) -> &mut TiKvConfig { + fn deref_mut(&mut self) -> &mut TikvConfig { &mut self.tikv } } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index e33837ebd76..eaeaf6a4e0f 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -131,10 +131,10 @@ pub fn must_region_cleared(engine: &Engines, region } lazy_static! { - static ref TEST_CONFIG: TiKvConfig = { + static ref TEST_CONFIG: TikvConfig = { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let common_test_cfg = manifest_dir.join("src/common-test.toml"); - TiKvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { + TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { panic!( "invalid auto generated configuration file {}, err {}", manifest_dir.display(), @@ -144,13 +144,13 @@ lazy_static! { }; } -pub fn new_tikv_config(cluster_id: u64) -> TiKvConfig { +pub fn new_tikv_config(cluster_id: u64) -> TikvConfig { let mut cfg = TEST_CONFIG.clone(); cfg.server.cluster_id = cluster_id; cfg } -pub fn new_tikv_config_with_api_ver(cluster_id: u64, api_ver: ApiVersion) -> TiKvConfig { +pub fn new_tikv_config_with_api_ver(cluster_id: u64, api_ver: ApiVersion) -> TikvConfig { let mut cfg = TEST_CONFIG.clone(); cfg.server.cluster_id = cluster_id; cfg.storage.set_api_version(api_ver); diff --git a/components/tidb_query_datatype/src/codec/chunk/chunk.rs b/components/tidb_query_datatype/src/codec/chunk/chunk.rs index ee111d11f77..b4478c8a4d3 100644 --- a/components/tidb_query_datatype/src/codec/chunk/chunk.rs +++ b/components/tidb_query_datatype/src/codec/chunk/chunk.rs @@ -188,7 +188,7 @@ mod tests { FieldTypeTp::DateTime.into(), FieldTypeTp::Duration.into(), FieldTypeTp::NewDecimal.into(), - FieldTypeTp::JSON.into(), + FieldTypeTp::Json.into(), FieldTypeTp::String.into(), ]; let json: Json = r#"{"k1":"v1"}"#.parse().unwrap(); @@ -229,7 +229,7 @@ mod tests { FieldTypeTp::DateTime.into(), FieldTypeTp::Duration.into(), FieldTypeTp::NewDecimal.into(), - FieldTypeTp::JSON.into(), + FieldTypeTp::Json.into(), FieldTypeTp::String.into(), ]; let json: Json = r#"{"k1":"v1"}"#.parse().unwrap(); @@ -329,7 +329,7 @@ mod tests { fn bench_encode_from_raw_json_datum(b: &mut Bencher) { let json: Json = r#"{"k1":"v1"}"#.parse().unwrap(); let datum = Datum::Json(json); - bench_encode_from_raw_datum_impl(b, datum, FieldTypeTp::JSON); + bench_encode_from_raw_datum_impl(b, datum, FieldTypeTp::Json); } #[test] @@ -341,7 +341,7 @@ mod tests { FieldTypeTp::VarChar.into(), FieldTypeTp::VarChar.into(), FieldTypeTp::NewDecimal.into(), - FieldTypeTp::JSON.into(), + FieldTypeTp::Json.into(), ]; let mut chunk = Chunk::new(&fields, rows); diff --git a/components/tidb_query_datatype/src/codec/chunk/column.rs b/components/tidb_query_datatype/src/codec/chunk/column.rs index f7f13363686..ef1c2602864 100644 --- a/components/tidb_query_datatype/src/codec/chunk/column.rs +++ b/components/tidb_query_datatype/src/codec/chunk/column.rs @@ -316,7 +316,7 @@ impl Column { } FieldTypeTp::Duration => Datum::Dur(self.get_duration(idx, field_type.decimal())?), FieldTypeTp::NewDecimal => Datum::Dec(self.get_decimal(idx)?), - FieldTypeTp::JSON => Datum::Json(self.get_json(idx)?), + FieldTypeTp::Json => Datum::Json(self.get_json(idx)?), FieldTypeTp::Enum => Datum::Enum(self.get_enum(idx)?), FieldTypeTp::Bit => Datum::Bytes(self.get_bytes(idx).to_vec()), FieldTypeTp::Set => { @@ -1142,7 +1142,7 @@ mod tests { #[test] fn test_column_json() { - let fields: Vec = vec![FieldTypeTp::JSON.into()]; + let fields: Vec = vec![FieldTypeTp::Json.into()]; let json: Json = r#"{"k1":"v1"}"#.parse().unwrap(); let data = vec![Datum::Null, Datum::Json(json)]; diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs index 43a6289e640..6f27475ff2c 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs @@ -6,9 +6,9 @@ use super::*; use crate::codec::data_type::{BytesGuard, BytesWriter}; #[derive(Debug)] -pub struct EncodingGBK; +pub struct EncodingGbk; -impl Encoding for EncodingGBK { +impl Encoding for EncodingGbk { #[inline] fn decode(data: BytesRef<'_>) -> Result { match GBK.decode_without_bom_handling_and_without_replacement(data) { diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index b3033c06d84..cdc21cbe35a 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -49,10 +49,10 @@ macro_rules! match_template_charset { match_template::match_template! { $t = [ - UTF8 => EncodingUtf8, - UTF8Mb4 => EncodingUtf8Mb4, + Utf8 => EncodingUtf8, + Utf8Mb4 => EncodingUtf8Mb4, Latin1 => EncodingLatin1, - GBK => EncodingGBK, + Gbk => EncodingGbk, Binary => EncodingBinary, Ascii => EncodingAscii, ], diff --git a/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs b/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs index 2e4a0703d4a..79c08ec5404 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs @@ -102,7 +102,7 @@ pub trait V1CompatibleEncoder: DatumFlagAndPayloadEncoder { // Copy datum payload as it is self.write_bytes(src)?; } - FieldTypeTp::JSON => { + FieldTypeTp::Json => { self.write_u8(datum::JSON_FLAG)?; // Copy datum payload as it is self.write_bytes(src)?; @@ -288,7 +288,7 @@ mod tests { let mut ctx = EvalContext::default(); for value in cases { - let col = Column::new(1, value.clone()).with_tp(FieldTypeTp::JSON); + let col = Column::new(1, value.clone()).with_tp(FieldTypeTp::Json); let buf = encode_to_v1_compatible(&mut ctx, &col); let got: Json = buf.decode(col.ft(), &mut ctx).unwrap().unwrap(); assert_eq!(value, got); diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 7155748571f..0c995487b3d 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -284,7 +284,7 @@ fn unflatten( FieldTypeTp::VarChar, FieldTypeTp::String, FieldTypeTp::NewDecimal, - FieldTypeTp::JSON + FieldTypeTp::Json, ] .contains(&t), "unknown type {} {}", @@ -631,7 +631,7 @@ mod tests { (1, FieldTypeTp::LongLong.into()), (2, FieldTypeTp::VarChar.into()), (3, FieldTypeTp::NewDecimal.into()), - (5, FieldTypeTp::JSON.into()), + (5, FieldTypeTp::Json.into()), (6, duration_col), ]); diff --git a/components/tidb_query_datatype/src/def/eval_type.rs b/components/tidb_query_datatype/src/def/eval_type.rs index 855802119b9..e6cd7da1b6a 100644 --- a/components/tidb_query_datatype/src/def/eval_type.rs +++ b/components/tidb_query_datatype/src/def/eval_type.rs @@ -33,7 +33,7 @@ impl EvalType { EvalType::Bytes => crate::FieldTypeTp::String, EvalType::DateTime => crate::FieldTypeTp::DateTime, EvalType::Duration => crate::FieldTypeTp::Duration, - EvalType::Json => crate::FieldTypeTp::JSON, + EvalType::Json => crate::FieldTypeTp::Json, EvalType::Enum => crate::FieldTypeTp::Enum, EvalType::Set => crate::FieldTypeTp::Set, } @@ -66,7 +66,7 @@ impl std::convert::TryFrom for EvalType { | crate::FieldTypeTp::Date | crate::FieldTypeTp::DateTime => EvalType::DateTime, crate::FieldTypeTp::Duration => EvalType::Duration, - crate::FieldTypeTp::JSON => EvalType::Json, + crate::FieldTypeTp::Json => EvalType::Json, crate::FieldTypeTp::VarChar | crate::FieldTypeTp::TinyBlob | crate::FieldTypeTp::MediumBlob @@ -115,7 +115,7 @@ mod tests { (NewDate, None), (VarChar, Some(EvalType::Bytes)), (Bit, Some(EvalType::Int)), - (JSON, Some(EvalType::Json)), + (Json, Some(EvalType::Json)), (NewDecimal, Some(EvalType::Decimal)), (Enum, Some(EvalType::Enum)), (Set, None), diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index 417d7b0d146..903ec738e89 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -36,7 +36,7 @@ pub enum FieldTypeTp { NewDate = 14, VarChar = 15, Bit = 16, - JSON = 0xf5, + Json = 0xf5, NewDecimal = 0xf6, Enum = 0xf7, Set = 0xf8, @@ -52,7 +52,7 @@ pub enum FieldTypeTp { impl FieldTypeTp { fn from_i32(i: i32) -> Option { if (i >= FieldTypeTp::Unspecified as i32 && i <= FieldTypeTp::Bit as i32) - || (i >= FieldTypeTp::JSON as i32 && i <= FieldTypeTp::Geometry as i32) + || (i >= FieldTypeTp::Json as i32 && i <= FieldTypeTp::Geometry as i32) { Some(unsafe { ::std::mem::transmute::(i) }) } else { @@ -61,7 +61,7 @@ impl FieldTypeTp { } pub fn from_u8(i: u8) -> Option { - if i <= FieldTypeTp::Bit as u8 || i >= FieldTypeTp::JSON as u8 { + if i <= FieldTypeTp::Bit as u8 || i >= FieldTypeTp::Json as u8 { Some(unsafe { ::std::mem::transmute::(i32::from(i)) }) } else { None @@ -148,10 +148,10 @@ impl fmt::Display for Collation { #[derive(PartialEq, Debug, Clone, Copy)] pub enum Charset { - UTF8, - UTF8Mb4, + Utf8, + Utf8Mb4, Latin1, - GBK, + Gbk, Binary, Ascii, } @@ -159,10 +159,10 @@ pub enum Charset { impl Charset { pub fn from_name(name: &str) -> Result { match name { - "utf8mb4" => Ok(Charset::UTF8Mb4), - "utf8" => Ok(Charset::UTF8), + "utf8mb4" => Ok(Charset::Utf8Mb4), + "utf8" => Ok(Charset::Utf8), "latin1" => Ok(Charset::Latin1), - "gbk" => Ok(Charset::GBK), + "gbk" => Ok(Charset::Gbk), "binary" => Ok(Charset::Binary), "ascii" => Ok(Charset::Ascii), _ => Err(DataTypeError::UnsupportedCharset { @@ -471,7 +471,7 @@ mod tests { FieldTypeTp::NewDate, FieldTypeTp::VarChar, FieldTypeTp::Bit, - FieldTypeTp::JSON, + FieldTypeTp::Json, FieldTypeTp::NewDecimal, FieldTypeTp::Enum, FieldTypeTp::Set, @@ -556,9 +556,9 @@ mod tests { #[test] fn test_charset_from_str() { let cases = vec![ - ("gbk", Some(Charset::GBK)), - ("utf8mb4", Some(Charset::UTF8Mb4)), - ("utf8", Some(Charset::UTF8)), + ("gbk", Some(Charset::Gbk)), + ("utf8mb4", Some(Charset::Utf8Mb4)), + ("utf8", Some(Charset::Utf8)), ("binary", Some(Charset::Binary)), ("latin1", Some(Charset::Latin1)), ("ascii", Some(Charset::Ascii)), diff --git a/components/tidb_query_expr/src/impl_compare.rs b/components/tidb_query_expr/src/impl_compare.rs index 350b36a3a99..a8dbf96d1cb 100644 --- a/components/tidb_query_expr/src/impl_compare.rs +++ b/components/tidb_query_expr/src/impl_compare.rs @@ -153,63 +153,63 @@ pub trait CmpOp { fn compare_order(ordering: std::cmp::Ordering) -> bool; } -pub struct CmpOpLT; +pub struct CmpOpLt; -impl CmpOp for CmpOpLT { +impl CmpOp for CmpOpLt { #[inline] fn compare_order(ordering: Ordering) -> bool { ordering == Ordering::Less } } -pub struct CmpOpLE; +pub struct CmpOpLe; -impl CmpOp for CmpOpLE { +impl CmpOp for CmpOpLe { #[inline] fn compare_order(ordering: Ordering) -> bool { ordering != Ordering::Greater } } -pub struct CmpOpGT; +pub struct CmpOpGt; -impl CmpOp for CmpOpGT { +impl CmpOp for CmpOpGt { #[inline] fn compare_order(ordering: Ordering) -> bool { ordering == Ordering::Greater } } -pub struct CmpOpGE; +pub struct CmpOpGe; -impl CmpOp for CmpOpGE { +impl CmpOp for CmpOpGe { #[inline] fn compare_order(ordering: Ordering) -> bool { ordering != Ordering::Less } } -pub struct CmpOpNE; +pub struct CmpOpNe; -impl CmpOp for CmpOpNE { +impl CmpOp for CmpOpNe { #[inline] fn compare_order(ordering: Ordering) -> bool { ordering != Ordering::Equal } } -pub struct CmpOpEQ; +pub struct CmpOpEq; -impl CmpOp for CmpOpEQ { +impl CmpOp for CmpOpEq { #[inline] fn compare_order(ordering: Ordering) -> bool { ordering == Ordering::Equal } } -pub struct CmpOpNullEQ; +pub struct CmpOpNullEq; -impl CmpOp for CmpOpNullEQ { +impl CmpOp for CmpOpNullEq { #[inline] fn compare_null() -> Option { Some(1) @@ -547,220 +547,220 @@ mod tests { #[derive(Clone, Copy, PartialEq)] enum TestCaseCmpOp { - GT, - GE, - LT, - LE, - EQ, - NE, - NullEQ, + Gt, + Ge, + Lt, + Le, + Eq, + Ne, + NullEq, } #[allow(clippy::type_complexity)] fn generate_numeric_compare_cases() -> Vec<(Option, Option, TestCaseCmpOp, Option)> { vec![ - (None, None, TestCaseCmpOp::GT, None), - (Real::new(3.5).ok(), None, TestCaseCmpOp::GT, None), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::GT, None), - (None, Real::new(3.5).ok(), TestCaseCmpOp::GT, None), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::GT, None), + (None, None, TestCaseCmpOp::Gt, None), + (Real::new(3.5).ok(), None, TestCaseCmpOp::Gt, None), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::Gt, None), + (None, Real::new(3.5).ok(), TestCaseCmpOp::Gt, None), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::Gt, None), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::GT, + TestCaseCmpOp::Gt, Some(1), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::GT, + TestCaseCmpOp::Gt, Some(0), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::GT, + TestCaseCmpOp::Gt, Some(0), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::GT, + TestCaseCmpOp::Gt, Some(0), ), - (None, None, TestCaseCmpOp::GE, None), - (Real::new(3.5).ok(), None, TestCaseCmpOp::GE, None), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::GE, None), - (None, Real::new(3.5).ok(), TestCaseCmpOp::GE, None), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::GE, None), + (None, None, TestCaseCmpOp::Ge, None), + (Real::new(3.5).ok(), None, TestCaseCmpOp::Ge, None), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::Ge, None), + (None, Real::new(3.5).ok(), TestCaseCmpOp::Ge, None), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::Ge, None), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::GE, + TestCaseCmpOp::Ge, Some(1), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::GE, + TestCaseCmpOp::Ge, Some(0), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::GE, + TestCaseCmpOp::Ge, Some(1), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::GE, + TestCaseCmpOp::Ge, Some(1), ), - (None, None, TestCaseCmpOp::LT, None), - (Real::new(3.5).ok(), None, TestCaseCmpOp::LT, None), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::LT, None), - (None, Real::new(3.5).ok(), TestCaseCmpOp::LT, None), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::LT, None), + (None, None, TestCaseCmpOp::Lt, None), + (Real::new(3.5).ok(), None, TestCaseCmpOp::Lt, None), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::Lt, None), + (None, Real::new(3.5).ok(), TestCaseCmpOp::Lt, None), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::Lt, None), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::LT, + TestCaseCmpOp::Lt, Some(0), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::LT, + TestCaseCmpOp::Lt, Some(1), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::LT, + TestCaseCmpOp::Lt, Some(0), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::LT, + TestCaseCmpOp::Lt, Some(0), ), - (None, None, TestCaseCmpOp::LE, None), - (Real::new(3.5).ok(), None, TestCaseCmpOp::LE, None), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::LE, None), - (None, Real::new(3.5).ok(), TestCaseCmpOp::LE, None), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::LE, None), + (None, None, TestCaseCmpOp::Le, None), + (Real::new(3.5).ok(), None, TestCaseCmpOp::Le, None), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::Le, None), + (None, Real::new(3.5).ok(), TestCaseCmpOp::Le, None), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::Le, None), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::LE, + TestCaseCmpOp::Le, Some(0), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::LE, + TestCaseCmpOp::Le, Some(1), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::LE, + TestCaseCmpOp::Le, Some(1), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::LE, + TestCaseCmpOp::Le, Some(1), ), - (None, None, TestCaseCmpOp::EQ, None), - (Real::new(3.5).ok(), None, TestCaseCmpOp::EQ, None), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::EQ, None), - (None, Real::new(3.5).ok(), TestCaseCmpOp::EQ, None), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::EQ, None), + (None, None, TestCaseCmpOp::Eq, None), + (Real::new(3.5).ok(), None, TestCaseCmpOp::Eq, None), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::Eq, None), + (None, Real::new(3.5).ok(), TestCaseCmpOp::Eq, None), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::Eq, None), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::EQ, + TestCaseCmpOp::Eq, Some(0), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::EQ, + TestCaseCmpOp::Eq, Some(0), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::EQ, + TestCaseCmpOp::Eq, Some(1), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::EQ, + TestCaseCmpOp::Eq, Some(1), ), - (None, None, TestCaseCmpOp::NE, None), - (Real::new(3.5).ok(), None, TestCaseCmpOp::NE, None), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::NE, None), - (None, Real::new(3.5).ok(), TestCaseCmpOp::NE, None), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::NE, None), + (None, None, TestCaseCmpOp::Ne, None), + (Real::new(3.5).ok(), None, TestCaseCmpOp::Ne, None), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::Ne, None), + (None, Real::new(3.5).ok(), TestCaseCmpOp::Ne, None), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::Ne, None), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::NE, + TestCaseCmpOp::Ne, Some(1), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::NE, + TestCaseCmpOp::Ne, Some(1), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::NE, + TestCaseCmpOp::Ne, Some(0), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::NE, + TestCaseCmpOp::Ne, Some(0), ), - (None, None, TestCaseCmpOp::NullEQ, Some(1)), - (Real::new(3.5).ok(), None, TestCaseCmpOp::NullEQ, Some(0)), - (Real::new(-2.1).ok(), None, TestCaseCmpOp::NullEQ, Some(0)), - (None, Real::new(3.5).ok(), TestCaseCmpOp::NullEQ, Some(0)), - (None, Real::new(-2.1).ok(), TestCaseCmpOp::NullEQ, Some(0)), + (None, None, TestCaseCmpOp::NullEq, Some(1)), + (Real::new(3.5).ok(), None, TestCaseCmpOp::NullEq, Some(0)), + (Real::new(-2.1).ok(), None, TestCaseCmpOp::NullEq, Some(0)), + (None, Real::new(3.5).ok(), TestCaseCmpOp::NullEq, Some(0)), + (None, Real::new(-2.1).ok(), TestCaseCmpOp::NullEq, Some(0)), ( Real::new(3.5).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::NullEQ, + TestCaseCmpOp::NullEq, Some(0), ), ( Real::new(-2.1).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::NullEQ, + TestCaseCmpOp::NullEq, Some(0), ), ( Real::new(3.5).ok(), Real::new(3.5).ok(), - TestCaseCmpOp::NullEQ, + TestCaseCmpOp::NullEq, Some(1), ), ( Real::new(-2.1).ok(), Real::new(-2.1).ok(), - TestCaseCmpOp::NullEQ, + TestCaseCmpOp::NullEq, Some(1), ), ] @@ -770,13 +770,13 @@ mod tests { fn test_compare_real() { for (arg0, arg1, cmp_op, expect_output) in generate_numeric_compare_cases() { let sig = match cmp_op { - TestCaseCmpOp::GT => ScalarFuncSig::GtReal, - TestCaseCmpOp::GE => ScalarFuncSig::GeReal, - TestCaseCmpOp::LT => ScalarFuncSig::LtReal, - TestCaseCmpOp::LE => ScalarFuncSig::LeReal, - TestCaseCmpOp::EQ => ScalarFuncSig::EqReal, - TestCaseCmpOp::NE => ScalarFuncSig::NeReal, - TestCaseCmpOp::NullEQ => ScalarFuncSig::NullEqReal, + TestCaseCmpOp::Gt => ScalarFuncSig::GtReal, + TestCaseCmpOp::Ge => ScalarFuncSig::GeReal, + TestCaseCmpOp::Lt => ScalarFuncSig::LtReal, + TestCaseCmpOp::Le => ScalarFuncSig::LeReal, + TestCaseCmpOp::Eq => ScalarFuncSig::EqReal, + TestCaseCmpOp::Ne => ScalarFuncSig::NeReal, + TestCaseCmpOp::NullEq => ScalarFuncSig::NullEqReal, }; let output = RpnFnScalarEvaluator::new() .push_param(arg0) @@ -795,13 +795,13 @@ mod tests { for (arg0, arg1, cmp_op, expect_output) in generate_numeric_compare_cases() { let sig = match cmp_op { - TestCaseCmpOp::GT => ScalarFuncSig::GtDuration, - TestCaseCmpOp::GE => ScalarFuncSig::GeDuration, - TestCaseCmpOp::LT => ScalarFuncSig::LtDuration, - TestCaseCmpOp::LE => ScalarFuncSig::LeDuration, - TestCaseCmpOp::EQ => ScalarFuncSig::EqDuration, - TestCaseCmpOp::NE => ScalarFuncSig::NeDuration, - TestCaseCmpOp::NullEQ => ScalarFuncSig::NullEqDuration, + TestCaseCmpOp::Gt => ScalarFuncSig::GtDuration, + TestCaseCmpOp::Ge => ScalarFuncSig::GeDuration, + TestCaseCmpOp::Lt => ScalarFuncSig::LtDuration, + TestCaseCmpOp::Le => ScalarFuncSig::LeDuration, + TestCaseCmpOp::Eq => ScalarFuncSig::EqDuration, + TestCaseCmpOp::Ne => ScalarFuncSig::NeDuration, + TestCaseCmpOp::NullEq => ScalarFuncSig::NullEqDuration, }; let output = RpnFnScalarEvaluator::new() .push_param(arg0.map(map_double_to_duration)) @@ -822,13 +822,13 @@ mod tests { let mut ctx = EvalContext::default(); for (arg0, arg1, cmp_op, expect_output) in generate_numeric_compare_cases() { let sig = match cmp_op { - TestCaseCmpOp::GT => ScalarFuncSig::GtDecimal, - TestCaseCmpOp::GE => ScalarFuncSig::GeDecimal, - TestCaseCmpOp::LT => ScalarFuncSig::LtDecimal, - TestCaseCmpOp::LE => ScalarFuncSig::LeDecimal, - TestCaseCmpOp::EQ => ScalarFuncSig::EqDecimal, - TestCaseCmpOp::NE => ScalarFuncSig::NeDecimal, - TestCaseCmpOp::NullEQ => ScalarFuncSig::NullEqDecimal, + TestCaseCmpOp::Gt => ScalarFuncSig::GtDecimal, + TestCaseCmpOp::Ge => ScalarFuncSig::GeDecimal, + TestCaseCmpOp::Lt => ScalarFuncSig::LtDecimal, + TestCaseCmpOp::Le => ScalarFuncSig::LeDecimal, + TestCaseCmpOp::Eq => ScalarFuncSig::EqDecimal, + TestCaseCmpOp::Ne => ScalarFuncSig::NeDecimal, + TestCaseCmpOp::NullEq => ScalarFuncSig::NullEqDecimal, }; let output = RpnFnScalarEvaluator::new() .push_param(arg0.map(|v| f64_to_decimal(&mut ctx, v.into_inner()).unwrap())) @@ -843,13 +843,13 @@ mod tests { fn test_compare_signed_int() { for (arg0, arg1, cmp_op, expect_output) in generate_numeric_compare_cases() { let sig = match cmp_op { - TestCaseCmpOp::GT => ScalarFuncSig::GtInt, - TestCaseCmpOp::GE => ScalarFuncSig::GeInt, - TestCaseCmpOp::LT => ScalarFuncSig::LtInt, - TestCaseCmpOp::LE => ScalarFuncSig::LeInt, - TestCaseCmpOp::EQ => ScalarFuncSig::EqInt, - TestCaseCmpOp::NE => ScalarFuncSig::NeInt, - TestCaseCmpOp::NullEQ => ScalarFuncSig::NullEqInt, + TestCaseCmpOp::Gt => ScalarFuncSig::GtInt, + TestCaseCmpOp::Ge => ScalarFuncSig::GeInt, + TestCaseCmpOp::Lt => ScalarFuncSig::LtInt, + TestCaseCmpOp::Le => ScalarFuncSig::LeInt, + TestCaseCmpOp::Eq => ScalarFuncSig::EqInt, + TestCaseCmpOp::Ne => ScalarFuncSig::NeInt, + TestCaseCmpOp::NullEq => ScalarFuncSig::NullEqInt, }; let output = RpnFnScalarEvaluator::new() .push_param(arg0.map(|v| v.into_inner() as i64)) diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index 55e86ee14d0..abd190d077a 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -345,7 +345,7 @@ fn rand() -> Result> { #[inline] #[rpn_fn(nullable)] fn rand_with_seed_first_gen(seed: Option<&i64>) -> Result> { - let mut rng = MySQLRng::new_with_seed(seed.cloned().unwrap_or(0)); + let mut rng = MySqlRng::new_with_seed(seed.cloned().unwrap_or(0)); let res = rng.gen(); Ok(Real::new(res).ok()) } @@ -548,7 +548,7 @@ pub fn round_with_frac_real(arg0: &Real, arg1: &Int) -> Result> { } thread_local! { - static MYSQL_RNG: RefCell = RefCell::new(MySQLRng::new()) + static MYSQL_RNG: RefCell = RefCell::new(MySqlRng::new()) } #[derive(Copy, Clone)] @@ -672,12 +672,12 @@ pub fn i64_to_usize(i: i64, is_unsigned: bool) -> (usize, bool) { } } -pub struct MySQLRng { +pub struct MySqlRng { seed1: u32, seed2: u32, } -impl MySQLRng { +impl MySqlRng { fn new() -> Self { let current_time = time::get_time(); let nsec = i64::from(current_time.nsec); @@ -687,7 +687,7 @@ impl MySQLRng { fn new_with_seed(seed: i64) -> Self { let seed1 = (seed.wrapping_mul(0x10001).wrapping_add(55555555)) as u32 % MAX_RAND_VALUE; let seed2 = (seed.wrapping_mul(0x10000001)) as u32 % MAX_RAND_VALUE; - MySQLRng { seed1, seed2 } + MySqlRng { seed1, seed2 } } fn gen(&mut self) -> f64 { @@ -697,7 +697,7 @@ impl MySQLRng { } } -impl Default for MySQLRng { +impl Default for MySqlRng { fn default() -> Self { Self::new() } @@ -2030,9 +2030,9 @@ mod tests { #[test] #[allow(clippy::float_cmp)] fn test_rand_new() { - let mut rng1 = MySQLRng::new(); + let mut rng1 = MySqlRng::new(); std::thread::sleep(std::time::Duration::from_millis(100)); - let mut rng2 = MySQLRng::new(); + let mut rng2 = MySqlRng::new(); let got1 = rng1.gen(); let got2 = rng2.gen(); assert!(got1 < 1.0); @@ -2054,7 +2054,7 @@ mod tests { (9223372036854775807, 0.9050373219931845, 0.37014932126752037), ]; for (seed, exp1, exp2) in tests { - let mut rand = MySQLRng::new_with_seed(seed); + let mut rand = MySqlRng::new_with_seed(seed); let res1 = rand.gen(); assert_eq!(res1, exp1); let res2 = rand.gen(); diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 679d4e003f8..b5a2ce226c5 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -430,20 +430,20 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::FromBinary => map_from_binary_fn_sig(expr)?, // impl_compare - ScalarFuncSig::LtInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::LtReal => compare_fn_meta::>(), - ScalarFuncSig::LtDecimal => compare_fn_meta::>(), - ScalarFuncSig::LtString => map_string_compare_sig::(ft)?, - ScalarFuncSig::LtTime => compare_fn_meta::>(), - ScalarFuncSig::LtDuration => compare_fn_meta::>(), - ScalarFuncSig::LtJson => compare_json_fn_meta::(), - ScalarFuncSig::LeInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::LeReal => compare_fn_meta::>(), - ScalarFuncSig::LeDecimal => compare_fn_meta::>(), - ScalarFuncSig::LeString => map_string_compare_sig::(ft)?, - ScalarFuncSig::LeTime => compare_fn_meta::>(), - ScalarFuncSig::LeDuration => compare_fn_meta::>(), - ScalarFuncSig::LeJson => compare_json_fn_meta::(), + ScalarFuncSig::LtInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::LtReal => compare_fn_meta::>(), + ScalarFuncSig::LtDecimal => compare_fn_meta::>(), + ScalarFuncSig::LtString => map_string_compare_sig::(ft)?, + ScalarFuncSig::LtTime => compare_fn_meta::>(), + ScalarFuncSig::LtDuration => compare_fn_meta::>(), + ScalarFuncSig::LtJson => compare_json_fn_meta::(), + ScalarFuncSig::LeInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::LeReal => compare_fn_meta::>(), + ScalarFuncSig::LeDecimal => compare_fn_meta::>(), + ScalarFuncSig::LeString => map_string_compare_sig::(ft)?, + ScalarFuncSig::LeTime => compare_fn_meta::>(), + ScalarFuncSig::LeDuration => compare_fn_meta::>(), + ScalarFuncSig::LeJson => compare_json_fn_meta::(), ScalarFuncSig::GreatestInt => greatest_int_fn_meta(), ScalarFuncSig::GreatestDecimal => greatest_decimal_fn_meta(), ScalarFuncSig::GreatestString => greatest_string_fn_meta(), @@ -464,41 +464,41 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::LeastCmpStringAsTime=> least_cmp_string_as_time_fn_meta(), ScalarFuncSig::LeastDuration => least_duration_fn_meta(), ScalarFuncSig::IntervalReal => interval_real_fn_meta(), - ScalarFuncSig::GtInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::GtReal => compare_fn_meta::>(), - ScalarFuncSig::GtDecimal => compare_fn_meta::>(), - ScalarFuncSig::GtString => map_string_compare_sig::(ft)?, - ScalarFuncSig::GtTime => compare_fn_meta::>(), - ScalarFuncSig::GtDuration => compare_fn_meta::>(), - ScalarFuncSig::GtJson => compare_json_fn_meta::(), - ScalarFuncSig::GeInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::GeReal => compare_fn_meta::>(), - ScalarFuncSig::GeDecimal => compare_fn_meta::>(), - ScalarFuncSig::GeString => map_string_compare_sig::(ft)?, - ScalarFuncSig::GeTime => compare_fn_meta::>(), - ScalarFuncSig::GeDuration => compare_fn_meta::>(), - ScalarFuncSig::GeJson => compare_json_fn_meta::(), - ScalarFuncSig::NeInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::NeReal => compare_fn_meta::>(), - ScalarFuncSig::NeDecimal => compare_fn_meta::>(), - ScalarFuncSig::NeString => map_string_compare_sig::(ft)?, - ScalarFuncSig::NeTime => compare_fn_meta::>(), - ScalarFuncSig::NeDuration => compare_fn_meta::>(), - ScalarFuncSig::NeJson => compare_json_fn_meta::(), - ScalarFuncSig::EqInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::EqReal => compare_fn_meta::>(), - ScalarFuncSig::EqDecimal => compare_fn_meta::>(), - ScalarFuncSig::EqString => map_string_compare_sig::(ft)?, - ScalarFuncSig::EqTime => compare_fn_meta::>(), - ScalarFuncSig::EqDuration => compare_fn_meta::>(), - ScalarFuncSig::EqJson => compare_json_fn_meta::(), - ScalarFuncSig::NullEqInt => map_int_sig(value, children, compare_mapper::)?, - ScalarFuncSig::NullEqReal => compare_fn_meta::>(), - ScalarFuncSig::NullEqDecimal => compare_fn_meta::>(), - ScalarFuncSig::NullEqString => map_string_compare_sig::(ft)?, - ScalarFuncSig::NullEqTime => compare_fn_meta::>(), - ScalarFuncSig::NullEqDuration => compare_fn_meta::>(), - ScalarFuncSig::NullEqJson => compare_json_fn_meta::(), + ScalarFuncSig::GtInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::GtReal => compare_fn_meta::>(), + ScalarFuncSig::GtDecimal => compare_fn_meta::>(), + ScalarFuncSig::GtString => map_string_compare_sig::(ft)?, + ScalarFuncSig::GtTime => compare_fn_meta::>(), + ScalarFuncSig::GtDuration => compare_fn_meta::>(), + ScalarFuncSig::GtJson => compare_json_fn_meta::(), + ScalarFuncSig::GeInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::GeReal => compare_fn_meta::>(), + ScalarFuncSig::GeDecimal => compare_fn_meta::>(), + ScalarFuncSig::GeString => map_string_compare_sig::(ft)?, + ScalarFuncSig::GeTime => compare_fn_meta::>(), + ScalarFuncSig::GeDuration => compare_fn_meta::>(), + ScalarFuncSig::GeJson => compare_json_fn_meta::(), + ScalarFuncSig::NeInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::NeReal => compare_fn_meta::>(), + ScalarFuncSig::NeDecimal => compare_fn_meta::>(), + ScalarFuncSig::NeString => map_string_compare_sig::(ft)?, + ScalarFuncSig::NeTime => compare_fn_meta::>(), + ScalarFuncSig::NeDuration => compare_fn_meta::>(), + ScalarFuncSig::NeJson => compare_json_fn_meta::(), + ScalarFuncSig::EqInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::EqReal => compare_fn_meta::>(), + ScalarFuncSig::EqDecimal => compare_fn_meta::>(), + ScalarFuncSig::EqString => map_string_compare_sig::(ft)?, + ScalarFuncSig::EqTime => compare_fn_meta::>(), + ScalarFuncSig::EqDuration => compare_fn_meta::>(), + ScalarFuncSig::EqJson => compare_json_fn_meta::(), + ScalarFuncSig::NullEqInt => map_int_sig(value, children, compare_mapper::)?, + ScalarFuncSig::NullEqReal => compare_fn_meta::>(), + ScalarFuncSig::NullEqDecimal => compare_fn_meta::>(), + ScalarFuncSig::NullEqString => map_string_compare_sig::(ft)?, + ScalarFuncSig::NullEqTime => compare_fn_meta::>(), + ScalarFuncSig::NullEqDuration => compare_fn_meta::>(), + ScalarFuncSig::NullEqJson => compare_json_fn_meta::(), ScalarFuncSig::CoalesceInt => coalesce_fn_meta::(), ScalarFuncSig::CoalesceReal => coalesce_fn_meta::(), ScalarFuncSig::CoalesceString => coalesce_bytes_fn_meta(), diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index 078bbf1bb80..b892333b0ef 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1274,7 +1274,7 @@ mod tests { .push_column_ref_for_test(0) .push_column_ref_for_test(0) .push_fn_call_for_test( - compare_fn_meta::>(), + compare_fn_meta::>(), 2, FieldTypeTp::LongLong, ) @@ -1312,7 +1312,7 @@ mod tests { .push_column_ref_for_test(0) .push_column_ref_for_test(0) .push_fn_call_for_test( - compare_fn_meta::>(), + compare_fn_meta::>(), 2, FieldTypeTp::LongLong, ) diff --git a/components/tikv_alloc/src/error.rs b/components/tikv_alloc/src/error.rs index c098a387c2e..a8912389784 100644 --- a/components/tikv_alloc/src/error.rs +++ b/components/tikv_alloc/src/error.rs @@ -5,7 +5,7 @@ use std::{error, fmt}; #[derive(Debug)] pub enum ProfError { MemProfilingNotEnabled, - IOError(std::io::Error), + IoError(std::io::Error), JemallocError(String), PathEncodingError(std::ffi::OsString), /* When temp files are in a non-unicode directory, * OsString.into_string() will cause this error, */ @@ -18,7 +18,7 @@ impl fmt::Display for ProfError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ProfError::MemProfilingNotEnabled => write!(f, "mem-profiling was not enabled"), - ProfError::IOError(e) => write!(f, "io error occurred {:?}", e), + ProfError::IoError(e) => write!(f, "io error occurred {:?}", e), ProfError::JemallocError(e) => write!(f, "jemalloc error {}", e), ProfError::PathEncodingError(path) => { write!(f, "Dump target path {:?} is not unicode encoding", path) @@ -32,7 +32,7 @@ impl fmt::Display for ProfError { impl From for ProfError { fn from(e: std::io::Error) -> Self { - ProfError::IOError(e) + ProfError::IoError(e) } } diff --git a/etc/error_code.toml b/etc/error_code.toml index 20887f27abd..9a42cc3769a 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -198,9 +198,9 @@ error = ''' KV:Engine:IO ''' -["KV:Engine:CFName"] +["KV:Engine:CfName"] error = ''' -KV:Engine:CFName +KV:Engine:CfName ''' ["KV:Engine:Codec"] @@ -463,9 +463,9 @@ error = ''' KV:SSTImporter:Future ''' -["KV:SSTImporter:RocksDB"] +["KV:SSTImporter:RocksDb"] error = ''' -KV:SSTImporter:RocksDB +KV:SSTImporter:RocksDb ''' ["KV:SSTImporter:ParseIntError"] diff --git a/src/config.rs b/src/config.rs index 8a9bf2d2468..3f609f6c10d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,7 +2,7 @@ //! Configuration for the entire server. //! -//! TiKV is configured through the `TiKvConfig` type, which is in turn +//! TiKV is configured through the `TikvConfig` type, which is in turn //! made up of many other configuration types. use std::{ @@ -2850,7 +2850,7 @@ impl QuotaConfig { #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] -pub struct TiKvConfig { +pub struct TikvConfig { #[doc(hidden)] #[serde(skip_serializing)] #[online_config(hidden)] @@ -2975,9 +2975,9 @@ pub struct TiKvConfig { pub causal_ts: CausalTsConfig, } -impl Default for TiKvConfig { - fn default() -> TiKvConfig { - TiKvConfig { +impl Default for TikvConfig { + fn default() -> TikvConfig { + TikvConfig { cfg_path: "".to_owned(), log_level: slog::Level::Info.into(), log_file: "".to_owned(), @@ -3019,7 +3019,7 @@ impl Default for TiKvConfig { } } -impl TiKvConfig { +impl TikvConfig { pub fn infer_raft_db_path(&self, data_dir: Option<&str>) -> Result> { if self.raft_store.raftdb_path.is_empty() { let data_dir = data_dir.unwrap_or(&self.storage.data_dir); @@ -3286,7 +3286,7 @@ impl TiKvConfig { // As the init of `logger` is very early, this adjust needs to be separated and // called immediately after parsing the command line. pub fn logger_compatible_adjust(&mut self) { - let default_tikv_cfg = TiKvConfig::default(); + let default_tikv_cfg = TikvConfig::default(); let default_log_cfg = LogConfig::default(); if self.log_level != default_tikv_cfg.log_level { eprintln!("deprecated configuration, log-level has been moved to log.level"); @@ -3538,7 +3538,7 @@ impl TiKvConfig { let mut cfg = if let Some(keys) = unrecognized_keys { serde_ignored::deserialize(&mut deserializer, |key| keys.push(key.to_string())) } else { - ::deserialize(&mut deserializer) + ::deserialize(&mut deserializer) }?; deserializer.end()?; cfg.cfg_path = path.display().to_string(); @@ -3559,9 +3559,9 @@ impl TiKvConfig { self.rocksdb.write_into_metrics(); } - pub fn with_tmp() -> Result<(TiKvConfig, tempfile::TempDir), IoError> { + pub fn with_tmp() -> Result<(TikvConfig, tempfile::TempDir), IoError> { let tmp = tempfile::tempdir()?; - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.storage.data_dir = tmp.path().display().to_string(); cfg.cfg_path = tmp.path().join(LAST_CONFIG_FILE).display().to_string(); Ok((cfg, tmp)) @@ -3597,7 +3597,7 @@ impl TiKvConfig { /// Loads the previously-loaded configuration from `last_tikv.toml`, /// compares key configuration items and fails if they are not /// identical. -pub fn check_critical_config(config: &TiKvConfig) -> Result<(), String> { +pub fn check_critical_config(config: &TikvConfig) -> Result<(), String> { // Check current critical configurations with last time, if there are some // changes, user must guarantee relevant works have been done. if let Some(mut cfg) = get_last_config(&config.storage.data_dir) { @@ -3610,12 +3610,12 @@ pub fn check_critical_config(config: &TiKvConfig) -> Result<(), String> { Ok(()) } -fn get_last_config(data_dir: &str) -> Option { +fn get_last_config(data_dir: &str) -> Option { let store_path = Path::new(data_dir); let last_cfg_path = store_path.join(LAST_CONFIG_FILE); if last_cfg_path.exists() { return Some( - TiKvConfig::from_file(&last_cfg_path, None).unwrap_or_else(|e| { + TikvConfig::from_file(&last_cfg_path, None).unwrap_or_else(|e| { panic!( "invalid auto generated configuration file {}, err {}", last_cfg_path.display(), @@ -3628,7 +3628,7 @@ fn get_last_config(data_dir: &str) -> Option { } /// Persists config to `last_tikv.toml` -pub fn persist_config(config: &TiKvConfig) -> Result<(), String> { +pub fn persist_config(config: &TikvConfig) -> Result<(), String> { let store_path = Path::new(&config.storage.data_dir); let last_cfg_path = store_path.join(LAST_CONFIG_FILE); let tmp_cfg_path = store_path.join(TMP_CONFIG_FILE); @@ -3694,7 +3694,7 @@ pub fn write_config>(path: P, content: &[u8]) -> CfgResult<()> { } // convert tikv config to a flatten array. -pub fn to_flatten_config_info(cfg: &TiKvConfig) -> Vec { +pub fn to_flatten_config_info(cfg: &TikvConfig) -> Vec { fn to_cfg_value(default_value: &Value, cfg_value: Option<&Value>, key: &str) -> Value { let mut res = Map::with_capacity(2); res.insert("Name".into(), Value::String(key.into())); @@ -3746,7 +3746,7 @@ pub fn to_flatten_config_info(cfg: &TiKvConfig) -> Vec { } let cfg_value = to_value(cfg).unwrap(); - let default_value = to_value(TiKvConfig::default()).unwrap(); + let default_value = to_value(TikvConfig::default()).unwrap(); let mut key_buf = String::new(); let mut res = Vec::new(); @@ -3760,7 +3760,7 @@ pub fn to_flatten_config_info(cfg: &TiKvConfig) -> Vec { } lazy_static! { - pub static ref TIKVCONFIG_TYPED: ConfigChange = TiKvConfig::default().typed(); + pub static ref TIKVCONFIG_TYPED: ConfigChange = TikvConfig::default().typed(); } fn serde_to_online_config(name: String) -> String { @@ -3918,7 +3918,7 @@ pub enum Module { PessimisticTxn, Gc, Split, - CDC, + Cdc, ResolvedTs, ResourceMetering, BackupStream, @@ -3947,7 +3947,7 @@ impl From<&str> for Module { "backup_stream" => Module::BackupStream, "pessimistic_txn" => Module::PessimisticTxn, "gc" => Module::Gc, - "cdc" => Module::CDC, + "cdc" => Module::Cdc, "resolved_ts" => Module::ResolvedTs, "resource_metering" => Module::ResourceMetering, "quota" => Module::Quota, @@ -3967,12 +3967,12 @@ pub struct ConfigController { #[derive(Default)] struct ConfigInner { - current: TiKvConfig, + current: TikvConfig, config_mgrs: HashMap>, } impl ConfigController { - pub fn new(current: TiKvConfig) -> Self { + pub fn new(current: TikvConfig) -> Self { ConfigController { inner: Arc::new(RwLock::new(ConfigInner { current, @@ -3988,7 +3988,7 @@ impl ConfigController { pub fn update_from_toml_file(&self) -> CfgResult<()> { let current = self.get_current(); - match TiKvConfig::from_file(Path::new(¤t.cfg_path), None) { + match TikvConfig::from_file(Path::new(¤t.cfg_path), None) { Ok(incoming) => { let diff = current.diff(&incoming); self.update_impl(diff, None) @@ -4066,7 +4066,7 @@ impl ConfigController { } } - pub fn get_current(&self) -> TiKvConfig { + pub fn get_current(&self) -> TikvConfig { self.inner.read().unwrap().current.clone() } } @@ -4122,13 +4122,13 @@ mod tests { #[test] fn test_check_critical_cfg_with() { - let mut tikv_cfg = TiKvConfig::default(); - let last_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); + let last_cfg = TikvConfig::default(); tikv_cfg.validate().unwrap(); tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); - let mut tikv_cfg = TiKvConfig::default(); - let mut last_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); + let mut last_cfg = TikvConfig::default(); tikv_cfg.rocksdb.wal_dir = "/data/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); @@ -4137,8 +4137,8 @@ mod tests { tikv_cfg.validate().unwrap(); tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); - let mut tikv_cfg = TiKvConfig::default(); - let mut last_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); + let mut last_cfg = TikvConfig::default(); tikv_cfg.storage.data_dir = "/data1".to_owned(); tikv_cfg.validate().unwrap(); tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); @@ -4148,8 +4148,8 @@ mod tests { tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); // Enable Raft Engine. - let mut tikv_cfg = TiKvConfig::default(); - let mut last_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); + let mut last_cfg = TikvConfig::default(); tikv_cfg.raft_engine.enable = true; last_cfg.raft_engine.enable = true; @@ -4162,8 +4162,8 @@ mod tests { tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); // Disable Raft Engine and uses RocksDB. - let mut tikv_cfg = TiKvConfig::default(); - let mut last_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); + let mut last_cfg = TikvConfig::default(); tikv_cfg.raft_engine.enable = false; last_cfg.raft_engine.enable = false; @@ -4210,7 +4210,7 @@ mod tests { #[test] fn test_last_cfg_modified() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); let store_path = Path::new(&cfg.storage.data_dir); let last_cfg_path = store_path.join(LAST_CONFIG_FILE); @@ -4238,12 +4238,12 @@ mod tests { let file = path_buf.as_path(); let (s1, s2) = ("/xxx/wal_dir".to_owned(), "/yyy/wal_dir".to_owned()); - let mut tikv_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); tikv_cfg.rocksdb.wal_dir = s1.clone(); tikv_cfg.raftdb.wal_dir = s2.clone(); tikv_cfg.write_to_file(file).unwrap(); - let cfg_from_file = TiKvConfig::from_file(file, None).unwrap_or_else(|e| { + let cfg_from_file = TikvConfig::from_file(file, None).unwrap_or_else(|e| { panic!( "invalid auto generated configuration file {}, err {}", file.display(), @@ -4257,7 +4257,7 @@ mod tests { tikv_cfg.rocksdb.wal_dir = s2.clone(); tikv_cfg.raftdb.wal_dir = s1.clone(); tikv_cfg.write_to_file(file).unwrap(); - let cfg_from_file = TiKvConfig::from_file(file, None).unwrap_or_else(|e| { + let cfg_from_file = TikvConfig::from_file(file, None).unwrap_or_else(|e| { panic!( "invalid auto generated configuration file {}, err {}", file.display(), @@ -4270,7 +4270,7 @@ mod tests { #[test] fn test_flatten_cfg() { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.server.labels.insert("zone".into(), "test".into()); cfg.raft_store.raft_log_gc_count_limit = Some(123); @@ -4303,14 +4303,14 @@ mod tests { .unwrap(); let path = root_path.path().join("not_exist_dir"); - let mut tikv_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); tikv_cfg.storage.data_dir = path.as_path().to_str().unwrap().to_owned(); persist_config(&tikv_cfg).unwrap(); } #[test] fn test_keepalive_check() { - let mut tikv_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); tikv_cfg.pd.endpoints = vec!["".to_owned()]; let dur = tikv_cfg.raft_store.raft_heartbeat_interval(); tikv_cfg.server.grpc_keepalive_time = ReadableDuration(dur); @@ -4321,7 +4321,7 @@ mod tests { #[test] fn test_block_size() { - let mut tikv_cfg = TiKvConfig::default(); + let mut tikv_cfg = TikvConfig::default(); tikv_cfg.pd.endpoints = vec!["".to_owned()]; tikv_cfg.rocksdb.defaultcf.block_size = ReadableSize::gb(10); tikv_cfg.rocksdb.lockcf.block_size = ReadableSize::gb(10); @@ -4392,8 +4392,8 @@ mod tests { ConfigValue::from(10000u64) ); - let old = TiKvConfig::default(); - let mut incoming = TiKvConfig::default(); + let old = TikvConfig::default(); + let mut incoming = TikvConfig::default(); incoming.coprocessor.region_split_keys = Some(10000); incoming.gc.max_write_bytes_per_sec = ReadableSize::mb(100); incoming.rocksdb.defaultcf.block_cache_size = ReadableSize::mb(500); @@ -4487,7 +4487,7 @@ mod tests { #[allow(clippy::type_complexity)] fn new_engines( - cfg: TiKvConfig, + cfg: TikvConfig, ) -> ( Storage, ConfigController, @@ -4545,7 +4545,7 @@ mod tests { #[test] fn test_flow_control() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.storage.flow_control.l0_files_threshold = 50; cfg.validate().unwrap(); let (storage, cfg_controller, _, flow_controller) = new_engines::(cfg); @@ -4604,7 +4604,7 @@ mod tests { } } - let (cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); let cfg_controller = ConfigController::new(cfg); let (tx, rx) = channel::unbounded(); cfg_controller.register(Module::ResolvedTs, Box::new(TestConfigManager(tx))); @@ -4659,7 +4659,7 @@ mod tests { #[test] fn test_change_rocksdb_config() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.rocksdb.max_background_jobs = 4; cfg.rocksdb.max_background_flushes = 2; cfg.rocksdb.defaultcf.disable_auto_compactions = false; @@ -4737,7 +4737,7 @@ mod tests { #[test] fn test_change_rate_limiter_auto_tuned() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); // vanilla limiter does not support dynamically changing auto-tuned mode. cfg.rocksdb.rate_limiter_auto_tuned = true; cfg.validate().unwrap(); @@ -4761,7 +4761,7 @@ mod tests { #[test] fn test_change_shared_block_cache() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.storage.block_cache.shared = true; cfg.validate().unwrap(); let (storage, cfg_controller, ..) = new_engines::(cfg); @@ -4785,7 +4785,7 @@ mod tests { #[test] fn test_change_logconfig() { - let (cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); let cfg_controller = ConfigController::new(cfg); cfg_controller.register(Module::Log, Box::new(LogConfigManager)); @@ -4808,7 +4808,7 @@ mod tests { #[test] fn test_dispatch_titan_blob_run_mode_config() { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); let mut incoming = cfg.clone(); cfg.rocksdb.defaultcf.titan.blob_run_mode = BlobRunMode::Normal; incoming.rocksdb.defaultcf.titan.blob_run_mode = BlobRunMode::Fallback; @@ -4828,7 +4828,7 @@ mod tests { #[test] fn test_change_ttl_check_poll_interval() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.storage.block_cache.shared = true; cfg.validate().unwrap(); let (_, cfg_controller, mut rx, _) = new_engines::(cfg); @@ -4845,7 +4845,7 @@ mod tests { #[test] fn test_change_store_scheduler_worker_pool_size() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.storage.scheduler_worker_pool_size = 4; cfg.validate().unwrap(); let (storage, cfg_controller, ..) = new_engines::(cfg); @@ -4895,7 +4895,7 @@ mod tests { #[test] fn test_change_quota_config() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.quota.foreground_cpu_time = 1000; cfg.quota.foreground_write_bandwidth = ReadableSize::mb(128); cfg.quota.foreground_read_bandwidth = ReadableSize::mb(256); @@ -5008,7 +5008,7 @@ mod tests { #[test] fn test_change_server_config() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let cfg_controller = ConfigController::new(cfg.clone()); let (scheduler, _receiver) = dummy_scheduler(); @@ -5022,7 +5022,7 @@ mod tests { )), ); - let check_cfg = |cfg: &TiKvConfig| { + let check_cfg = |cfg: &TikvConfig| { assert_eq!(&cfg_controller.get_current(), cfg); assert_eq!(&*version_tracker.value(), &cfg.server); }; @@ -5045,7 +5045,7 @@ mod tests { fn test_compatible_adjust_validate_equal() { // After calling many time of `compatible_adjust` and `validate` should has // the same effect as calling `compatible_adjust` and `validate` one time - let mut c = TiKvConfig::default(); + let mut c = TikvConfig::default(); let mut cfg = c.clone(); c.compatible_adjust(); c.validate().unwrap(); @@ -5063,7 +5063,7 @@ mod tests { [readpool.storage] [readpool.coprocessor] "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.compatible_adjust(); assert_eq!(cfg.readpool.storage.use_unified_pool, Some(true)); assert_eq!(cfg.readpool.coprocessor.use_unified_pool, Some(true)); @@ -5074,7 +5074,7 @@ mod tests { [readpool.coprocessor] normal-concurrency = 1 "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.compatible_adjust(); assert_eq!(cfg.readpool.storage.use_unified_pool, Some(false)); assert_eq!(cfg.readpool.coprocessor.use_unified_pool, Some(false)); @@ -5104,7 +5104,7 @@ mod tests { temp_config_writer.sync_data().unwrap(); let mut unrecognized_keys = Vec::new(); - let _ = TiKvConfig::from_file(temp_config_file.path(), Some(&mut unrecognized_keys)); + let _ = TikvConfig::from_file(temp_config_file.path(), Some(&mut unrecognized_keys)); assert_eq!( unrecognized_keys, @@ -5124,7 +5124,7 @@ mod tests { [raft-engine] enable = true "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); assert_eq!( cfg.raft_engine.config.dir, @@ -5182,7 +5182,7 @@ mod tests { #[test] fn test_validate_tikv_config() { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.validate().unwrap(); let default_region_split_check_diff = cfg.raft_store.region_split_check_diff().0; cfg.raft_store.region_split_check_diff = @@ -5226,19 +5226,19 @@ mod tests { } { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.validate().unwrap(); } { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "db"); cfg.validate().unwrap_err(); } { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); @@ -5247,7 +5247,7 @@ mod tests { } { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); @@ -5256,14 +5256,14 @@ mod tests { } { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); cfg.validate().unwrap_err(); } { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); @@ -5394,7 +5394,7 @@ mod tests { .map(|l| l.strip_prefix('#').unwrap_or(l)) .join("\n"); - let mut cfg: TiKvConfig = toml::from_str(&template_config).unwrap(); + let mut cfg: TikvConfig = toml::from_str(&template_config).unwrap(); cfg.validate().unwrap(); } @@ -5407,7 +5407,7 @@ mod tests { let mut deserializer = toml::Deserializer::new(&template_config); let mut unrecognized_keys = Vec::new(); - let _: TiKvConfig = serde_ignored::deserialize(&mut deserializer, |key| { + let _: TikvConfig = serde_ignored::deserialize(&mut deserializer, |key| { unrecognized_keys.push(key.to_string()) }) .unwrap(); @@ -5423,8 +5423,8 @@ mod tests { .map(|l| l.strip_prefix('#').unwrap_or(l)) .join("\n"); - let mut cfg: TiKvConfig = toml::from_str(&template_config).unwrap(); - let mut default_cfg = TiKvConfig::default(); + let mut cfg: TikvConfig = toml::from_str(&template_config).unwrap(); + let mut default_cfg = TikvConfig::default(); // Some default values are computed based on the environment. // Because we can't set config values for these in `config-template.toml`, we @@ -5514,7 +5514,7 @@ mod tests { .lines() .map(|l| l.strip_prefix('#').unwrap_or(l)) .join("\n"); - let _: TiKvConfig = toml::from_str(&template_config).unwrap(); + let _: TikvConfig = toml::from_str(&template_config).unwrap(); } Err(e) => { if e.is_timeout() { @@ -5531,7 +5531,7 @@ mod tests { let content = r#" [cdc] "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); // old-value-cache-size is deprecated, 0 must not report error. @@ -5539,28 +5539,28 @@ mod tests { [cdc] old-value-cache-size = 0 "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); let content = r#" [cdc] min-ts-interval = "0s" "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); let content = r#" [cdc] incremental-scan-threads = 0 "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); let content = r#" [cdc] incremental-scan-concurrency = 0 "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); let content = r#" @@ -5568,7 +5568,7 @@ mod tests { incremental-scan-concurrency = 1 incremental-scan-threads = 2 "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); } @@ -5591,7 +5591,7 @@ mod tests { ("backup_stream", Module::BackupStream), ("pessimistic_txn", Module::PessimisticTxn), ("gc", Module::Gc), - ("cdc", Module::CDC), + ("cdc", Module::Cdc), ("resolved_ts", Module::ResolvedTs), ("resource_metering", Module::ResourceMetering), ("unknown", Module::Unknown("unknown".to_string())), @@ -5722,7 +5722,7 @@ mod tests { l0-files-threshold = 77 soft-pending-compaction-bytes-limit = "777GB" "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); assert_eq!( cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, @@ -5744,7 +5744,7 @@ mod tests { soft-pending-compaction-bytes-limit = "888GB" [rocksdb.writecf] "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); assert_eq!( cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, @@ -5767,7 +5767,7 @@ mod tests { level0-slowdown-writes-trigger = 66 soft-pending-compaction-bytes-limit = "666GB" "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); assert_eq!( cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, @@ -5788,7 +5788,7 @@ mod tests { level0-slowdown-writes-trigger = 88 soft-pending-compaction-bytes-limit = "888GB" "#; - let mut cfg: TiKvConfig = toml::from_str(content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); assert_eq!( cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger, diff --git a/src/coprocessor/checksum.rs b/src/coprocessor/checksum.rs index 32819879188..f208b87ee0f 100644 --- a/src/coprocessor/checksum.rs +++ b/src/coprocessor/checksum.rs @@ -15,14 +15,14 @@ use tipb::{ChecksumAlgorithm, ChecksumRequest, ChecksumResponse}; use yatp::task::future::reschedule; use crate::{ - coprocessor::{dag::TiKvStorage, *}, + coprocessor::{dag::TikvStorage, *}, storage::{Snapshot, SnapshotStore, Statistics}, }; // `ChecksumContext` is used to handle `ChecksumRequest` pub struct ChecksumContext { req: ChecksumRequest, - scanner: RangesScanner>>, + scanner: RangesScanner>>, } impl ChecksumContext { @@ -43,7 +43,7 @@ impl ChecksumContext { false, ); let scanner = RangesScanner::new(RangesScannerOptions { - storage: TiKvStorage::new(store, false), + storage: TikvStorage::new(store, false), ranges: ranges .into_iter() .map(|r| Range::from_pb_range(r, false)) diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index 8b3f561ce5f..5b06638f244 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -11,7 +11,7 @@ use tidb_query_common::{execute_stats::ExecSummary, storage::IntervalRange}; use tikv_alloc::trace::MemoryTraceGuard; use tipb::{DagRequest, SelectResponse, StreamResponse}; -pub use self::storage_impl::TiKvStorage; +pub use self::storage_impl::TikvStorage; use crate::{ coprocessor::{metrics::*, Deadline, RequestHandler, Result}, storage::{Statistics, Store}, @@ -103,7 +103,7 @@ impl BatchDagHandler { runner: tidb_query_executors::runner::BatchExecutorsRunner::from_request( req, ranges, - TiKvStorage::new(store, is_cache_enabled), + TikvStorage::new(store, is_cache_enabled), deadline, streaming_batch_limit, is_streaming, diff --git a/src/coprocessor/dag/storage_impl.rs b/src/coprocessor/dag/storage_impl.rs index 7f5e60081e7..6d819b7b94f 100644 --- a/src/coprocessor/dag/storage_impl.rs +++ b/src/coprocessor/dag/storage_impl.rs @@ -11,14 +11,14 @@ use crate::{ }; /// A `Storage` implementation over TiKV's storage. -pub struct TiKvStorage { +pub struct TikvStorage { store: S, scanner: Option, cf_stats_backlog: Statistics, met_newer_ts_data_backlog: NewerTsCheckState, } -impl TiKvStorage { +impl TikvStorage { pub fn new(store: S, check_can_be_cached: bool) -> Self { Self { store, @@ -33,7 +33,7 @@ impl TiKvStorage { } } -impl Storage for TiKvStorage { +impl Storage for TikvStorage { type Statistics = Statistics; fn begin_scan( diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 70144f47ce1..e11558e73b3 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -37,7 +37,7 @@ use yatp::task::future::reschedule; use super::{cmsketch::CmSketch, fmsketch::FmSketch, histogram::Histogram}; use crate::{ - coprocessor::{dag::TiKvStorage, MEMTRACE_ANALYZE, *}, + coprocessor::{dag::TikvStorage, MEMTRACE_ANALYZE, *}, storage::{Snapshot, SnapshotStore, Statistics}, }; @@ -47,7 +47,7 @@ const ANALYZE_VERSION_V2: i32 = 2; // `AnalyzeContext` is used to handle `AnalyzeReq` pub struct AnalyzeContext { req: AnalyzeReq, - storage: Option>>, + storage: Option>>, ranges: Vec, storage_stats: Statistics, quota_limiter: Arc, @@ -76,7 +76,7 @@ impl AnalyzeContext { Ok(Self { req, - storage: Some(TiKvStorage::new(store, false)), + storage: Some(TikvStorage::new(store, false)), ranges, storage_stats: Statistics::default(), quota_limiter, @@ -126,7 +126,7 @@ impl AnalyzeContext { // it would build a histogram and count-min sketch of index values. async fn handle_index( req: AnalyzeIndexReq, - scanner: &mut RangesScanner>>, + scanner: &mut RangesScanner>>, is_common_handle: bool, ) -> Result> { let mut hist = Histogram::new(req.get_bucket_size() as usize); @@ -317,7 +317,7 @@ impl RequestHandler for AnalyzeContext { } struct RowSampleBuilder { - data: BatchTableScanExecutor>>, + data: BatchTableScanExecutor>>, max_sample_size: usize, max_fm_sketch_size: usize, @@ -331,7 +331,7 @@ struct RowSampleBuilder { impl RowSampleBuilder { fn new( mut req: AnalyzeColumnsReq, - storage: TiKvStorage>, + storage: TikvStorage>, ranges: Vec, quota_limiter: Arc, is_auto_analyze: bool, @@ -797,7 +797,7 @@ impl Drop for BaseRowSampleCollector { } struct SampleBuilder { - data: BatchTableScanExecutor>>, + data: BatchTableScanExecutor>>, max_bucket_size: usize, max_sample_size: usize, @@ -818,7 +818,7 @@ impl SampleBuilder { fn new( mut req: AnalyzeColumnsReq, common_handle_req: Option, - storage: TiKvStorage>, + storage: TikvStorage>, ranges: Vec, ) -> Result { let columns_info: Vec<_> = req.take_columns_info().into(); diff --git a/src/import/duplicate_detect.rs b/src/import/duplicate_detect.rs index 86e955c6cd2..c5429315938 100644 --- a/src/import/duplicate_detect.rs +++ b/src/import/duplicate_detect.rs @@ -181,7 +181,7 @@ impl DuplicateDetector { .map_err(from_kv_error)?; match value { Some(val) => pair.set_value(val.to_vec()), - None => return Err(Error::RocksDB("Not found defaultcf value".to_owned())), + None => return Err(Error::RocksDb("Not found defaultcf value".to_owned())), } } } @@ -217,7 +217,7 @@ impl Iterator for DuplicateDetector { fn from_kv_error(e: tikv_kv::Error) -> Error { match e { tikv_kv::Error(box tikv_kv::ErrorInner::Other(err)) => Error::Engine(err), - _ => Error::RocksDB("unkown error when request rocksdb".to_owned()), + _ => Error::RocksDb("unkown error when request rocksdb".to_owned()), } } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index bd94a3638d4..fad5cd25ba8 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -19,7 +19,7 @@ use raftstore::RegionInfoAccessor; use tikv_util::worker::Scheduler; use super::engine_factory_v2::KvEngineFactoryV2; -use crate::config::{DbConfig, TiKvConfig, DEFAULT_ROCKSDB_SUB_DIR}; +use crate::config::{DbConfig, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}; struct FactoryInner { env: Arc, @@ -39,7 +39,7 @@ pub struct KvEngineFactoryBuilder { } impl KvEngineFactoryBuilder { - pub fn new(env: Arc, config: &TiKvConfig, store_path: impl Into) -> Self { + pub fn new(env: Arc, config: &TikvConfig, store_path: impl Into) -> Self { Self { inner: FactoryInner { env, diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 4132b2e4c25..7f3bcaafe4f 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -194,14 +194,14 @@ mod tests { use engine_traits::{TabletFactory, CF_WRITE}; use super::*; - use crate::{config::TiKvConfig, server::KvEngineFactoryBuilder}; + use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; lazy_static! { - static ref TEST_CONFIG: TiKvConfig = { + static ref TEST_CONFIG: TikvConfig = { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let common_test_cfg = manifest_dir.join("components/test_raftstore/src/common-test.toml"); - TiKvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { + TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { panic!( "invalid auto generated configuration file {}, err {}", manifest_dir.display(), diff --git a/src/server/errors.rs b/src/server/errors.rs index 8932de2dc38..c7a41947f79 100644 --- a/src/server/errors.rs +++ b/src/server/errors.rs @@ -6,7 +6,7 @@ use engine_traits::Error as EngineTraitError; use futures::channel::oneshot::Canceled; use grpcio::Error as GrpcError; use hyper::Error as HttpError; -use openssl::error::ErrorStack as OpenSSLError; +use openssl::error::ErrorStack as OpenSslError; use pd_client::Error as PdError; use protobuf::ProtobufError; use raftstore::Error as RaftServerError; @@ -65,7 +65,7 @@ pub enum Error { Http(#[from] HttpError), #[error("{0:?}")] - OpenSSL(#[from] OpenSSLError), + OpenSsl(#[from] OpenSslError), } pub type Result = result::Result; diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 232ddd58b4b..6f06bf17b30 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -41,12 +41,12 @@ pub enum Error { InvalidRequest(String), ParseError(String), SearchError(String), - IOError(std::io::Error), + IoError(std::io::Error), } impl From for Error { fn from(err: std::io::Error) -> Self { - Error::IOError(err) + Error::IoError(err) } } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 1ad81ec8900..8f0f9a23cae 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1,6 +1,6 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -// #[PerformanceCriticalPath]: Tikv gRPC APIs implementation +// #[PerformanceCriticalPath]: TiKV gRPC APIs implementation use std::{mem, sync::Arc}; use api_version::KvFormat; diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 7911808e86b..3df7bf212d9 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -952,7 +952,7 @@ mod tests { use tikv_util::logger::get_log_level; use crate::{ - config::{ConfigController, TiKvConfig}, + config::{ConfigController, TikvConfig}, server::status_server::{profile::TEST_PROFILE_MUTEX, LogLevelRequest, StatusServer}, }; @@ -1045,12 +1045,12 @@ mod tests { .await .unwrap(); let resp_json = String::from_utf8_lossy(&v).to_string(); - let cfg = TiKvConfig::default(); + let cfg = TikvConfig::default(); serde_json::to_string(&cfg.get_encoder()) .map(|cfg_json| { assert_eq!(resp_json, cfg_json); }) - .expect("Could not convert TiKvConfig to string"); + .expect("Could not convert TikvConfig to string"); }); block_on(handle).unwrap(); status_server.stop(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 966b6095310..6c4374f7c76 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -449,7 +449,7 @@ impl Storage { (ApiVersion::V2, ApiVersion::V1) if Self::is_txn_command(cmd) => { // For compatibility, accept TiDB request only. for key in keys { - if ApiV2::parse_key_mode(key.as_ref()) != KeyMode::TiDB { + if ApiV2::parse_key_mode(key.as_ref()) != KeyMode::Tidb { return Err(ErrorInner::invalid_key_mode( cmd, storage_api_version, @@ -513,7 +513,7 @@ impl Storage { range.0.as_ref().map(AsRef::as_ref), range.1.as_ref().map(AsRef::as_ref), ); - if ApiV2::parse_range_mode(range) != KeyMode::TiDB { + if ApiV2::parse_range_mode(range) != KeyMode::Tidb { return Err(ErrorInner::invalid_key_range_mode( cmd, storage_api_version, diff --git a/tests/benches/coprocessor_executors/index_scan/mod.rs b/tests/benches/coprocessor_executors/index_scan/mod.rs index ba29f08bb87..eb9f98ae73b 100644 --- a/tests/benches/coprocessor_executors/index_scan/mod.rs +++ b/tests/benches/coprocessor_executors/index_scan/mod.rs @@ -76,14 +76,14 @@ where { let mut inputs = vec![ Input::new(util::BatchIndexScanNext1024Bencher::::new()), - Input::new(util::IndexScanDAGBencher::::new(false, ROWS)), - Input::new(util::IndexScanDAGBencher::::new(true, ROWS)), + Input::new(util::IndexScanDagBencher::::new(false, ROWS)), + Input::new(util::IndexScanDagBencher::::new(true, ROWS)), ]; if crate::util::bench_level() >= 2 { let mut additional_inputs = vec![ Input::new(util::BatchIndexScanNext1024Bencher::::new()), - Input::new(util::IndexScanDAGBencher::::new(false, ROWS)), - Input::new(util::IndexScanDAGBencher::::new(true, ROWS)), + Input::new(util::IndexScanDagBencher::::new(false, ROWS)), + Input::new(util::IndexScanDagBencher::::new(true, ROWS)), ]; inputs.append(&mut additional_inputs); } diff --git a/tests/benches/coprocessor_executors/index_scan/util.rs b/tests/benches/coprocessor_executors/index_scan/util.rs index 87ca1086353..19c2be94195 100644 --- a/tests/benches/coprocessor_executors/index_scan/util.rs +++ b/tests/benches/coprocessor_executors/index_scan/util.rs @@ -8,7 +8,7 @@ use test_coprocessor::*; use tidb_query_datatype::expr::EvalConfig; use tidb_query_executors::{interface::*, BatchIndexScanExecutor}; use tikv::{ - coprocessor::{dag::TiKvStorage, RequestHandler}, + coprocessor::{dag::TikvStorage, RequestHandler}, storage::{RocksEngine, Statistics, Store as TxnStore}, }; use tipb::ColumnInfo; @@ -33,7 +33,7 @@ impl scan_bencher::ScanExecutorBuilder for BatchIndexScan unique: bool, ) -> Self::E { let mut executor = BatchIndexScanExecutor::new( - black_box(TiKvStorage::new( + black_box(TikvStorage::new( ToTxnStore::::to_store(store), false, )), @@ -53,12 +53,12 @@ impl scan_bencher::ScanExecutorBuilder for BatchIndexScan } } -pub struct IndexScanExecutorDAGBuilder { +pub struct IndexScanExecutorDagBuilder { _phantom: PhantomData, } -impl scan_bencher::ScanExecutorDAGHandlerBuilder - for IndexScanExecutorDAGBuilder +impl scan_bencher::ScanExecutorDagHandlerBuilder + for IndexScanExecutorDagBuilder { type T = T; type P = IndexScanParam; @@ -77,4 +77,4 @@ impl scan_bencher::ScanExecutorDAGHandlerBuilder pub type BatchIndexScanNext1024Bencher = scan_bencher::BatchScanNext1024Bencher>; -pub type IndexScanDAGBencher = scan_bencher::ScanDAGBencher>; +pub type IndexScanDagBencher = scan_bencher::ScanDagBencher>; diff --git a/tests/benches/coprocessor_executors/integrated/mod.rs b/tests/benches/coprocessor_executors/integrated/mod.rs index cb7e48f3bd7..0b3d638e854 100644 --- a/tests/benches/coprocessor_executors/integrated/mod.rs +++ b/tests/benches/coprocessor_executors/integrated/mod.rs @@ -19,7 +19,7 @@ where { let (table, store) = crate::table_scan::fixture::table_with_2_columns(input.rows); - // TODO: Change to use `DAGSelect` helper when it no longer place unnecessary + // TODO: Change to use `DagSelect` helper when it no longer place unnecessary // columns. let executors = &[ table_scan(&[table["id"].as_column_info()]), @@ -706,15 +706,15 @@ where rows_options.push(1); } let mut bencher_options: Vec>> = vec![ - Box::new(util::DAGBencher::::new(false)), - Box::new(util::DAGBencher::::new(true)), + Box::new(util::DagBencher::::new(false)), + Box::new(util::DagBencher::::new(true)), ]; if crate::util::bench_level() >= 2 { let mut additional_inputs: Vec>> = vec![ Box::new(util::BatchBencher::::new()), Box::new(util::BatchBencher::::new()), - Box::new(util::DAGBencher::::new(false)), - Box::new(util::DAGBencher::::new(true)), + Box::new(util::DagBencher::::new(false)), + Box::new(util::DagBencher::::new(true)), ]; bencher_options.append(&mut additional_inputs); } diff --git a/tests/benches/coprocessor_executors/integrated/util.rs b/tests/benches/coprocessor_executors/integrated/util.rs index d0c6bedaecd..d9cb5fd2138 100644 --- a/tests/benches/coprocessor_executors/integrated/util.rs +++ b/tests/benches/coprocessor_executors/integrated/util.rs @@ -7,7 +7,7 @@ use kvproto::coprocessor::KeyRange; use test_coprocessor::*; use tidb_query_datatype::expr::EvalConfig; use tikv::{ - coprocessor::dag::TiKvStorage, + coprocessor::dag::TikvStorage, storage::{RocksEngine, Store as TxnStore}, }; use tipb::Executor as PbExecutor; @@ -73,7 +73,7 @@ where crate::util::bencher::BatchNextAllBencher::new(|| { tidb_query_executors::runner::build_executors( black_box(executors.to_vec()), - black_box(TiKvStorage::new(ToTxnStore::::to_store(store), false)), + black_box(TikvStorage::new(ToTxnStore::::to_store(store), false)), black_box(ranges.to_vec()), black_box(Arc::new(EvalConfig::default())), black_box(false), @@ -88,12 +88,12 @@ where } } -pub struct DAGBencher { +pub struct DagBencher { pub batch: bool, _phantom: PhantomData, } -impl DAGBencher { +impl DagBencher { pub fn new(batch: bool) -> Self { Self { batch, @@ -102,7 +102,7 @@ impl DAGBencher { } } -impl IntegratedBencher for DAGBencher +impl IntegratedBencher for DagBencher where T: TxnStore + 'static, M: Measurement, @@ -119,7 +119,7 @@ where ranges: &[KeyRange], store: &Store, ) { - crate::util::bencher::DAGHandleBencher::new(|| { + crate::util::bencher::DagHandleBencher::new(|| { crate::util::build_dag_handler::(executors, ranges, store) }) .bench(b); diff --git a/tests/benches/coprocessor_executors/table_scan/mod.rs b/tests/benches/coprocessor_executors/table_scan/mod.rs index b030a236cbd..63cba5f1d7e 100644 --- a/tests/benches/coprocessor_executors/table_scan/mod.rs +++ b/tests/benches/coprocessor_executors/table_scan/mod.rs @@ -240,14 +240,14 @@ where { let mut inputs = vec![ Input::new(util::BatchTableScanNext1024Bencher::::new()), - Input::new(util::TableScanDAGBencher::::new(false, ROWS)), - Input::new(util::TableScanDAGBencher::::new(true, ROWS)), + Input::new(util::TableScanDagBencher::::new(false, ROWS)), + Input::new(util::TableScanDagBencher::::new(true, ROWS)), ]; if crate::util::bench_level() >= 2 { let mut additional_inputs = vec![ Input::new(util::BatchTableScanNext1024Bencher::::new()), - Input::new(util::TableScanDAGBencher::::new(false, ROWS)), - Input::new(util::TableScanDAGBencher::::new(true, ROWS)), + Input::new(util::TableScanDagBencher::::new(false, ROWS)), + Input::new(util::TableScanDagBencher::::new(true, ROWS)), ]; inputs.append(&mut additional_inputs); } diff --git a/tests/benches/coprocessor_executors/table_scan/util.rs b/tests/benches/coprocessor_executors/table_scan/util.rs index e66af09dc67..7bcfe436d62 100644 --- a/tests/benches/coprocessor_executors/table_scan/util.rs +++ b/tests/benches/coprocessor_executors/table_scan/util.rs @@ -8,7 +8,7 @@ use test_coprocessor::*; use tidb_query_datatype::expr::EvalConfig; use tidb_query_executors::{interface::*, BatchTableScanExecutor}; use tikv::{ - coprocessor::{dag::TiKvStorage, RequestHandler}, + coprocessor::{dag::TikvStorage, RequestHandler}, storage::{RocksEngine, Statistics, Store as TxnStore}, }; use tipb::ColumnInfo; @@ -33,7 +33,7 @@ impl scan_bencher::ScanExecutorBuilder for BatchTableScan _: (), ) -> Self::E { let mut executor = BatchTableScanExecutor::new( - black_box(TiKvStorage::new( + black_box(TikvStorage::new( ToTxnStore::::to_store(store), false, )), @@ -53,12 +53,12 @@ impl scan_bencher::ScanExecutorBuilder for BatchTableScan } } -pub struct TableScanExecutorDAGBuilder { +pub struct TableScanExecutorDagBuilder { _phantom: PhantomData, } -impl scan_bencher::ScanExecutorDAGHandlerBuilder - for TableScanExecutorDAGBuilder +impl scan_bencher::ScanExecutorDagHandlerBuilder + for TableScanExecutorDagBuilder { type T = T; type P = TableScanParam; @@ -77,4 +77,4 @@ impl scan_bencher::ScanExecutorDAGHandlerBuilder pub type BatchTableScanNext1024Bencher = scan_bencher::BatchScanNext1024Bencher>; -pub type TableScanDAGBencher = scan_bencher::ScanDAGBencher>; +pub type TableScanDagBencher = scan_bencher::ScanDagBencher>; diff --git a/tests/benches/coprocessor_executors/util/bencher.rs b/tests/benches/coprocessor_executors/util/bencher.rs index cfbd2c90bc2..64862582bd8 100644 --- a/tests/benches/coprocessor_executors/util/bencher.rs +++ b/tests/benches/coprocessor_executors/util/bencher.rs @@ -76,17 +76,17 @@ impl E> Bencher for BatchNextAllBencher { } /// Invoke handle request for a DAG handler. -pub struct DAGHandleBencher Box> { +pub struct DagHandleBencher Box> { handler_builder: F, } -impl Box> DAGHandleBencher { +impl Box> DagHandleBencher { pub fn new(handler_builder: F) -> Self { Self { handler_builder } } } -impl Box> Bencher for DAGHandleBencher { +impl Box> Bencher for DagHandleBencher { fn bench(&mut self, b: &mut criterion::Bencher<'_, M>) where M: Measurement, @@ -94,7 +94,7 @@ impl Box> Bencher for DAGHandleBencher { b.iter_batched_ref( &mut self.handler_builder, |handler| { - profiler::start("./DAGHandleBencher.profile"); + profiler::start("./DagHandleBencher.profile"); black_box(block_on(handler.handle_request()).unwrap()); profiler::stop(); }, diff --git a/tests/benches/coprocessor_executors/util/scan_bencher.rs b/tests/benches/coprocessor_executors/util/scan_bencher.rs index 64f65712d54..affc19436bb 100644 --- a/tests/benches/coprocessor_executors/util/scan_bencher.rs +++ b/tests/benches/coprocessor_executors/util/scan_bencher.rs @@ -26,7 +26,7 @@ pub trait ScanExecutorBuilder: 'static { ) -> Self::E; } -pub trait ScanExecutorDAGHandlerBuilder: 'static { +pub trait ScanExecutorDagHandlerBuilder: 'static { type T: TxnStore + 'static; type P: Copy + 'static; fn build( @@ -118,13 +118,13 @@ where } } -pub struct ScanDAGBencher { +pub struct ScanDagBencher { batch: bool, display_table_rows: usize, _phantom: PhantomData, } -impl ScanDAGBencher { +impl ScanDagBencher { pub fn new(batch: bool, display_table_rows: usize) -> Self { Self { batch, @@ -134,9 +134,9 @@ impl ScanDAGBencher { } } -impl ScanBencher for ScanDAGBencher +impl ScanBencher for ScanDagBencher where - B: ScanExecutorDAGHandlerBuilder, + B: ScanExecutorDagHandlerBuilder, M: Measurement, { fn name(&self) -> String { @@ -157,7 +157,7 @@ where store: &Store, parameters: B::P, ) { - crate::util::bencher::DAGHandleBencher::new(|| { + crate::util::bencher::DagHandleBencher::new(|| { B::build(self.batch, columns, ranges, store, parameters) }) .bench(b); diff --git a/tests/benches/misc/coprocessor/codec/chunk/mod.rs b/tests/benches/misc/coprocessor/codec/chunk/mod.rs index 84e524031d5..f956e2cb14e 100644 --- a/tests/benches/misc/coprocessor/codec/chunk/mod.rs +++ b/tests/benches/misc/coprocessor/codec/chunk/mod.rs @@ -22,7 +22,7 @@ fn bench_encode_chunk(b: &mut Bencher) { FieldTypeTp::VarChar.into(), FieldTypeTp::VarChar.into(), FieldTypeTp::NewDecimal.into(), - FieldTypeTp::JSON.into(), + FieldTypeTp::Json.into(), ]; let mut chunk = Chunk::new(&fields, rows); for row_id in 0..rows { diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 10192db7bf0..4371e8999ce 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -25,7 +25,7 @@ use txn_types::{Key, Lock, LockType}; fn test_deadline() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("deadline_check_fail", "return()").unwrap(); let resp = handle_request(&endpoint, req); @@ -39,7 +39,7 @@ fn test_deadline_2() { // beginning. let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("rockskv_async_snapshot", "panic").unwrap(); fail::cfg("deadline_check_fail", "return()").unwrap(); @@ -68,7 +68,7 @@ fn test_deadline_3() { }; init_data_with_details(Context::default(), engine, &product, &data, true, &cfg) }; - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("kv_cursor_seek", "sleep(2000)").unwrap(); fail::cfg("copr_batch_initial_size", "return(1)").unwrap(); @@ -89,7 +89,7 @@ fn test_deadline_3() { fn test_parse_request_failed() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("coprocessor_parse_request", "return()").unwrap(); let resp = handle_request(&endpoint, req); @@ -102,7 +102,7 @@ fn test_parse_request_failed_2() { // It should not even take any snapshots when parse failed. let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("rockskv_async_snapshot", "panic").unwrap(); fail::cfg("coprocessor_parse_request", "return()").unwrap(); @@ -115,7 +115,7 @@ fn test_parse_request_failed_2() { fn test_readpool_full() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("future_pool_spawn_full", "return()").unwrap(); let resp = handle_request(&endpoint, req); @@ -127,7 +127,7 @@ fn test_readpool_full() { fn test_snapshot_failed() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("rockskv_async_snapshot", "return()").unwrap(); let resp = handle_request(&endpoint, req); @@ -139,7 +139,7 @@ fn test_snapshot_failed() { fn test_snapshot_failed_2() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &[]); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("rockskv_async_snapshot_not_leader", "return()").unwrap(); let resp = handle_request(&endpoint, req); @@ -153,7 +153,7 @@ fn test_storage_error() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); fail::cfg("kv_cursor_seek", "return()").unwrap(); let resp = handle_request(&endpoint, req); @@ -178,7 +178,7 @@ fn test_region_error_in_scan() { init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); fail::cfg("region_snapshot_seek", "return()").unwrap(); - let req = DAGSelect::from(&product).build_with(ctx, &[0]); + let req = DagSelect::from(&product).build_with(ctx, &[0]); let resp = handle_request(&endpoint, req); assert!( @@ -210,7 +210,7 @@ fn test_paging_scan() { exp.reverse(); } - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .paging_size(paging_size as u64) .desc(desc) .build(); @@ -278,7 +278,7 @@ fn test_paging_scan_multi_ranges() { exp.reverse(); } - let builder = DAGSelect::from(&product) + let builder = DagSelect::from(&product) .paging_size(paging_size) .desc(desc); let mut range1 = builder.key_ranges[0].clone(); @@ -334,7 +334,7 @@ fn test_paging_scan_multi_ranges() { exp.reverse(); } - let builder = DAGSelect::from(&product) + let builder = DagSelect::from(&product) .paging_size(paging_size) .desc(desc); let mut range1 = builder.key_ranges[0].clone(); @@ -409,7 +409,7 @@ fn test_read_index_lock_checking_on_follower() { ctx.set_replica_read(true); let product = ProductTable::new(); - let mut req = DAGSelect::from(&product).build(); + let mut req = DagSelect::from(&product).build(); req.set_context(ctx); req.set_start_ts(100); diff --git a/tests/integrations/config/dynamic/gc_worker.rs b/tests/integrations/config/dynamic/gc_worker.rs index e3603d8cbab..3014ebc3ba2 100644 --- a/tests/integrations/config/dynamic/gc_worker.rs +++ b/tests/integrations/config/dynamic/gc_worker.rs @@ -4,7 +4,7 @@ use std::{sync::mpsc::channel, time::Duration}; use raftstore::router::RaftStoreBlackHole; use tikv::{ - config::{ConfigController, Module, TiKvConfig}, + config::{ConfigController, Module, TikvConfig}, server::gc_worker::{GcConfig, GcTask, GcWorker}, storage::kv::TestEngineBuilder, }; @@ -21,7 +21,7 @@ fn test_gc_config_validate() { } fn setup_cfg_controller( - cfg: TiKvConfig, + cfg: TikvConfig, ) -> ( GcWorker, ConfigController, @@ -62,7 +62,7 @@ where #[allow(clippy::float_cmp)] #[test] fn test_gc_worker_config_update() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let (gc_worker, cfg_controller) = setup_cfg_controller(cfg); let scheduler = gc_worker.scheduler(); @@ -96,7 +96,7 @@ fn test_gc_worker_config_update() { #[test] #[allow(clippy::float_cmp)] fn test_change_io_limit_by_config_manager() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let (gc_worker, cfg_controller) = setup_cfg_controller(cfg); let scheduler = gc_worker.scheduler(); @@ -134,7 +134,7 @@ fn test_change_io_limit_by_config_manager() { #[allow(clippy::float_cmp)] fn test_change_io_limit_by_debugger() { // Debugger use GcWorkerConfigManager to change io limit - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let (gc_worker, _) = setup_cfg_controller(cfg); let scheduler = gc_worker.scheduler(); diff --git a/tests/integrations/config/dynamic/pessimistic_txn.rs b/tests/integrations/config/dynamic/pessimistic_txn.rs index b7496de182d..49bedd38c73 100644 --- a/tests/integrations/config/dynamic/pessimistic_txn.rs +++ b/tests/integrations/config/dynamic/pessimistic_txn.rs @@ -36,7 +36,7 @@ impl StoreAddrResolver for MockResolver { } fn setup( - cfg: TiKvConfig, + cfg: TikvConfig, ) -> ( ConfigController, WaiterMgrScheduler, @@ -95,7 +95,7 @@ where fn test_lock_manager_cfg_update() { const DEFAULT_TIMEOUT: u64 = 3000; const DEFAULT_DELAY: u64 = 100; - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.pessimistic_txn.wait_for_lock_timeout = ReadableDuration::millis(DEFAULT_TIMEOUT); cfg.pessimistic_txn.wake_up_delay_duration = ReadableDuration::millis(DEFAULT_DELAY); cfg.pessimistic_txn.pipelined = false; diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index d1b34a3a498..35d5fe23e49 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -23,7 +23,7 @@ use resource_metering::CollectorRegHandle; use tempfile::TempDir; use test_raftstore::TestPdClient; use tikv::{ - config::{ConfigController, Module, TiKvConfig}, + config::{ConfigController, Module, TikvConfig}, import::SstImporter, }; use tikv_util::{ @@ -58,7 +58,7 @@ fn create_tmp_engine(dir: &TempDir) -> Engines { } fn start_raftstore( - cfg: TiKvConfig, + cfg: TikvConfig, dir: &TempDir, ) -> ( ConfigController, @@ -142,7 +142,7 @@ where #[test] fn test_update_raftstore_config() { - let (mut config, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut config, _dir) = TikvConfig::with_tmp().unwrap(); config.validate().unwrap(); let (cfg_controller, router, _, mut system) = start_raftstore(config.clone(), &_dir); diff --git a/tests/integrations/config/dynamic/snap.rs b/tests/integrations/config/dynamic/snap.rs index 2594c4ffcaf..5b9ef72b4c3 100644 --- a/tests/integrations/config/dynamic/snap.rs +++ b/tests/integrations/config/dynamic/snap.rs @@ -12,7 +12,7 @@ use raftstore::store::{fsm::create_raft_batch_system, SnapManager}; use security::SecurityManager; use tempfile::TempDir; use tikv::{ - config::{ConfigController, TiKvConfig}, + config::{ConfigController, TikvConfig}, server::{ config::{Config as ServerConfig, ServerConfigManager}, snap::{Runner as SnapHandler, Task as SnapTask}, @@ -24,7 +24,7 @@ use tikv_util::{ }; fn start_server( - cfg: TiKvConfig, + cfg: TikvConfig, dir: &TempDir, ) -> (ConfigController, LazyWorker, SnapManager) { let snap_mgr = { @@ -85,7 +85,7 @@ where #[test] fn test_update_server_config() { - let (mut config, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut config, _dir) = TikvConfig::with_tmp().unwrap(); config.validate().unwrap(); let (cfg_controller, snap_worker, snap_mgr) = start_server(config.clone(), &_dir); let mut svr_cfg = config.server.clone(); diff --git a/tests/integrations/config/dynamic/split_check.rs b/tests/integrations/config/dynamic/split_check.rs index 582ce8f115e..eb9b1a63986 100644 --- a/tests/integrations/config/dynamic/split_check.rs +++ b/tests/integrations/config/dynamic/split_check.rs @@ -15,7 +15,7 @@ use raftstore::{ }, store::{SplitCheckRunner as Runner, SplitCheckTask as Task}, }; -use tikv::config::{ConfigController, Module, TiKvConfig}; +use tikv::config::{ConfigController, Module, TikvConfig}; use tikv_util::worker::{LazyWorker, Scheduler, Worker}; fn tmp_engine>(path: P) -> RocksEngine { @@ -26,7 +26,7 @@ fn tmp_engine>(path: P) -> RocksEngine { .unwrap() } -fn setup(cfg: TiKvConfig, engine: RocksEngine) -> (ConfigController, LazyWorker) { +fn setup(cfg: TikvConfig, engine: RocksEngine) -> (ConfigController, LazyWorker) { let (router, _) = sync_channel(1); let runner = Runner::new( engine, @@ -62,7 +62,7 @@ where #[test] fn test_update_split_check_config() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let engine = tmp_engine(&cfg.storage.data_dir); let (cfg_controller, mut worker) = setup(cfg.clone(), engine); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 98bb55625fa..b8899a1de4f 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -42,7 +42,7 @@ mod test_config_client; #[test] fn test_toml_serde() { - let value = TiKvConfig::default(); + let value = TikvConfig::default(); let dump = toml::to_string_pretty(&value).unwrap(); let load = toml::from_str(&dump).unwrap(); assert_eq!(value, load); @@ -62,7 +62,7 @@ fn read_file_in_project_dir(path: &str) -> String { #[test] fn test_serde_custom_tikv_config() { - let mut value = TiKvConfig::default(); + let mut value = TikvConfig::default(); value.log_rotation_timespan = ReadableDuration::days(1); value.log.level = Level::Critical.into(); value.log.file.filename = "foo".to_owned(); @@ -808,7 +808,7 @@ fn test_serde_custom_tikv_config() { } } -fn diff_config(lhs: &TiKvConfig, rhs: &TiKvConfig) { +fn diff_config(lhs: &TikvConfig, rhs: &TikvConfig) { let lhs_str = format!("{:?}", lhs); let rhs_str = format!("{:?}", rhs); @@ -840,12 +840,12 @@ fn diff_config(lhs: &TiKvConfig, rhs: &TiKvConfig) { #[test] fn test_serde_default_config() { - let cfg: TiKvConfig = toml::from_str("").unwrap(); - assert_eq!(cfg, TiKvConfig::default()); + let cfg: TikvConfig = toml::from_str("").unwrap(); + assert_eq!(cfg, TikvConfig::default()); let content = read_file_in_project_dir("integrations/config/test-default.toml"); - let cfg: TiKvConfig = toml::from_str(&content).unwrap(); - assert_eq!(cfg, TiKvConfig::default()); + let cfg: TikvConfig = toml::from_str(&content).unwrap(); + assert_eq!(cfg, TikvConfig::default()); } #[test] @@ -854,8 +854,8 @@ fn test_readpool_default_config() { [readpool.unified] max-thread-count = 1 "#; - let cfg: TiKvConfig = toml::from_str(content).unwrap(); - let mut expected = TiKvConfig::default(); + let cfg: TikvConfig = toml::from_str(content).unwrap(); + let mut expected = TikvConfig::default(); expected.readpool.unified.max_thread_count = 1; assert_eq!(cfg, expected); } @@ -869,14 +869,14 @@ fn test_do_not_use_unified_readpool_with_legacy_config() { [readpool.coprocessor] normal-concurrency = 1 "#; - let cfg: TiKvConfig = toml::from_str(content).unwrap(); + let cfg: TikvConfig = toml::from_str(content).unwrap(); assert!(!cfg.readpool.is_unified_pool_enabled()); } #[test] fn test_block_cache_backward_compatible() { let content = read_file_in_project_dir("integrations/config/test-cache-compatible.toml"); - let mut cfg: TiKvConfig = toml::from_str(&content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(&content).unwrap(); assert!(cfg.storage.block_cache.shared); assert!(cfg.storage.block_cache.capacity.is_none()); cfg.compatible_adjust(); @@ -893,7 +893,7 @@ fn test_block_cache_backward_compatible() { #[test] fn test_log_backward_compatible() { let content = read_file_in_project_dir("integrations/config/test-log-compatible.toml"); - let mut cfg: TiKvConfig = toml::from_str(&content).unwrap(); + let mut cfg: TikvConfig = toml::from_str(&content).unwrap(); assert_eq!(cfg.log.level, slog::Level::Info.into()); assert_eq!(cfg.log.file.filename, ""); assert_eq!(cfg.log.format, LogFormat::Text); diff --git a/tests/integrations/config/test_config_client.rs b/tests/integrations/config/test_config_client.rs index 4ceb5d3affc..6faa68f3932 100644 --- a/tests/integrations/config/test_config_client.rs +++ b/tests/integrations/config/test_config_client.rs @@ -19,7 +19,7 @@ fn change(name: &str, value: &str) -> HashMap { #[test] fn test_update_config() { - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let cfg_controller = ConfigController::new(cfg); let mut cfg = cfg_controller.get_current(); @@ -68,7 +68,7 @@ fn test_dispatch_change() { } } - let (mut cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.validate().unwrap(); let cfg_controller = ConfigController::new(cfg); let mut cfg = cfg_controller.get_current(); @@ -89,7 +89,7 @@ fn test_dispatch_change() { #[test] fn test_write_update_to_file() { - let (mut cfg, tmp_dir) = TiKvConfig::with_tmp().unwrap(); + let (mut cfg, tmp_dir) = TikvConfig::with_tmp().unwrap(); cfg.cfg_path = tmp_dir.path().join("cfg_file").to_str().unwrap().to_owned(); { let c = r#" @@ -201,7 +201,7 @@ fn test_update_from_toml_file() { } } - let (cfg, _dir) = TiKvConfig::with_tmp().unwrap(); + let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); let cfg_controller = ConfigController::new(cfg); let cfg = cfg_controller.get_current(); let mgr = CfgManager(Arc::new(Mutex::new(cfg.raft_store.clone()))); diff --git a/tests/integrations/coprocessor/test_checksum.rs b/tests/integrations/coprocessor/test_checksum.rs index 3e08cfd22e9..db96393c860 100644 --- a/tests/integrations/coprocessor/test_checksum.rs +++ b/tests/integrations/coprocessor/test_checksum.rs @@ -13,7 +13,7 @@ use tidb_query_common::storage::{ Range, }; use tikv::{ - coprocessor::{dag::TiKvStorage, *}, + coprocessor::{dag::TikvStorage, *}, storage::{Engine, SnapshotStore}, }; use tipb::{ChecksumAlgorithm, ChecksumRequest, ChecksumResponse, ChecksumScanOn}; @@ -79,7 +79,7 @@ fn reversed_checksum_crc64_xor(store: &Store, range: KeyRange) -> false, ); let mut scanner = RangesScanner::new(RangesScannerOptions { - storage: TiKvStorage::new(store, false), + storage: TikvStorage::new(store, false), ranges: vec![Range::from_pb_range(range, false)], scan_backward_in_range: true, is_key_only: false, diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 024ebddbdea..660e88905e4 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -63,7 +63,7 @@ fn test_select() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag selection - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); let mut resp = handle_select(&endpoint, req); let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(data) { @@ -97,7 +97,7 @@ fn test_batch_row_limit() { }; // for dag selection - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); let mut resp = handle_select(&endpoint, req); check_chunk_datum_count(resp.get_chunks(), chunk_datum_limit); let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); @@ -132,7 +132,7 @@ fn test_stream_batch_row_limit() { init_data_with_details(Context::default(), engine, &product, &data, true, &cfg) }; - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); assert_eq!(req.get_ranges().len(), 1); // only ignore first 7 bytes of the row id @@ -203,7 +203,7 @@ fn test_select_after_lease() { // Sleep until the leader lease is expired. thread::sleep(cluster.cfg.raft_store.raft_store_max_leader_lease.0); - let req = DAGSelect::from(&product).build_with(ctx, &[0]); + let req = DagSelect::from(&product).build_with(ctx, &[0]); let mut resp = handle_select(&endpoint, req); let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(data) { @@ -236,8 +236,8 @@ fn test_scan_detail() { }; let reqs = vec![ - DAGSelect::from(&product).build(), - DAGSelect::from_index(&product, &product["name"]).build(), + DagSelect::from(&product).build(), + DagSelect::from_index(&product, &product["name"]).build(), ]; for mut req in reqs { @@ -272,7 +272,7 @@ fn test_group_by() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0])) .build(); @@ -314,7 +314,7 @@ fn test_aggr_count() { ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .count(&product["count"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0, 1])) @@ -344,7 +344,7 @@ fn test_aggr_count() { ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .count(&product["id"]) .group_by(&[&product["name"], &product["count"]]) .build(); @@ -392,7 +392,7 @@ fn test_aggr_first() { ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .first(&product["id"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0, 1])) @@ -423,7 +423,7 @@ fn test_aggr_first() { ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .first(&product["name"]) .group_by(&[&product["count"]]) .output_offsets(Some(vec![0, 1])) @@ -476,7 +476,7 @@ fn test_aggr_avg() { (Datum::Bytes(b"name:5".to_vec()), (Datum::Dec(8.into()), 2)), ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .avg(&product["count"]) .group_by(&[&product["name"]]) .build(); @@ -518,7 +518,7 @@ fn test_aggr_sum() { (Datum::Bytes(b"name:5".to_vec()), 8), ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .sum(&product["count"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0, 1])) @@ -586,7 +586,7 @@ fn test_aggr_extre() { ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .max(&product["count"]) .min(&product["count"]) .group_by(&[&product["name"]]) @@ -662,7 +662,7 @@ fn test_aggr_bit_ops() { ]; // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .bit_and(&product["count"]) .bit_or(&product["count"]) .bit_xor(&product["count"]) @@ -709,7 +709,7 @@ fn test_order_by_column() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .order_by(&product["count"], true) .order_by(&product["name"], false) .limit(5) @@ -747,7 +747,7 @@ fn test_order_by_pk_with_select_from_index() { let (_, endpoint) = init_with_data(&product, &data); let expect: Vec<_> = data.drain(..5).collect(); // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .order_by(&product["id"], true) .limit(5) .build(); @@ -783,7 +783,7 @@ fn test_limit() { let (_, endpoint) = init_with_data(&product, &data); let expect: Vec<_> = data.drain(..5).collect(); // for dag - let req = DAGSelect::from(&product).limit(5).build(); + let req = DagSelect::from(&product).limit(5).build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); @@ -817,7 +817,7 @@ fn test_reverse() { data.reverse(); let expect: Vec<_> = data.drain(..5).collect(); // for dag - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .limit(5) .order_by(&product["id"], true) .build(); @@ -852,7 +852,7 @@ fn test_index() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag - let req = DAGSelect::from_index(&product, &product["id"]).build(); + let req = DagSelect::from_index(&product, &product["id"]).build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 1); @@ -882,7 +882,7 @@ fn test_index_reverse_limit() { data.reverse(); let expect: Vec<_> = data.drain(..5).collect(); // for dag - let req = DAGSelect::from_index(&product, &product["id"]) + let req = DagSelect::from_index(&product, &product["id"]) .limit(5) .order_by(&product["id"], true) .build(); @@ -914,7 +914,7 @@ fn test_limit_oom() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag - let req = DAGSelect::from_index(&product, &product["id"]) + let req = DagSelect::from_index(&product, &product["id"]) .limit(100000000) .build(); let mut resp = handle_select(&endpoint, req); @@ -953,7 +953,7 @@ fn test_del_select() { store.commit(); // for dag - let mut req = DAGSelect::from_index(&product, &product["id"]).build(); + let mut req = DagSelect::from_index(&product, &product["id"]).build(); req.mut_context().set_record_scan_stat(true); let resp = handle_request(&endpoint, req); @@ -985,7 +985,7 @@ fn test_index_group_by() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0])) .build(); @@ -1020,7 +1020,7 @@ fn test_index_aggr_count() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .count(&product["id"]) .output_offsets(Some(vec![0])) .build(); @@ -1045,7 +1045,7 @@ fn test_index_aggr_count() { (Datum::Bytes(b"name:5".to_vec()), 2), ]; // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .count(&product["id"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0, 1])) @@ -1073,7 +1073,7 @@ fn test_index_aggr_count() { (vec![Datum::Bytes(b"name:3".to_vec()), Datum::I64(3)], 1), (vec![Datum::Bytes(b"name:5".to_vec()), Datum::I64(4)], 2), ]; - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .count(&product["id"]) .group_by(&[&product["name"], &product["count"]]) .build(); @@ -1116,7 +1116,7 @@ fn test_index_aggr_first() { (Datum::Bytes(b"name:5".to_vec()), 5), ]; // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .first(&product["id"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0, 1])) @@ -1175,7 +1175,7 @@ fn test_index_aggr_avg() { (Datum::Bytes(b"name:5".to_vec()), (Datum::Dec(8.into()), 2)), ]; // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .avg(&product["count"]) .group_by(&[&product["name"]]) .build(); @@ -1217,7 +1217,7 @@ fn test_index_aggr_sum() { (Datum::Bytes(b"name:5".to_vec()), 8), ]; // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .sum(&product["count"]) .group_by(&[&product["name"]]) .output_offsets(Some(vec![0, 1])) @@ -1284,7 +1284,7 @@ fn test_index_aggr_extre() { (Datum::Bytes(b"name:6".to_vec()), Datum::Null, Datum::Null), ]; // for dag - let req = DAGSelect::from_index(&product, &product["name"]) + let req = DagSelect::from_index(&product, &product["name"]) .max(&product["count"]) .min(&product["count"]) .group_by(&[&product["name"]]) @@ -1357,7 +1357,7 @@ fn test_where() { cond }; - let req = DAGSelect::from(&product).where_expr(cond).build(); + let req = DagSelect::from(&product).where_expr(cond).build(); let mut resp = handle_select(&endpoint, req); let mut spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); let row = spliter.next().unwrap(); @@ -1489,7 +1489,7 @@ fn test_handle_truncate() { for cond in cases { // Ignore truncate error. - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .where_expr(cond.clone()) .build_with(Context::default(), &[FLAG_IGNORE_TRUNCATE]); let resp = handle_select(&endpoint, req); @@ -1497,7 +1497,7 @@ fn test_handle_truncate() { assert!(resp.get_warnings().is_empty()); // truncate as warning - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .where_expr(cond.clone()) .build_with(Context::default(), &[FLAG_TRUNCATE_AS_WARNING]); let mut resp = handle_select(&endpoint, req); @@ -1518,7 +1518,7 @@ fn test_handle_truncate() { assert_eq!(spliter.next().is_none(), true); // Do NOT ignore truncate error. - let req = DAGSelect::from(&product).where_expr(cond.clone()).build(); + let req = DagSelect::from(&product).where_expr(cond.clone()).build(); let resp = handle_select(&endpoint, req); assert!(resp.has_error()); assert!(resp.get_warnings().is_empty()); @@ -1551,7 +1551,7 @@ fn test_default_val() { let (_, endpoint) = init_with_data(&product, &data); let expect: Vec<_> = data.drain(..5).collect(); - let req = DAGSelect::from(&tbl).limit(5).build(); + let req = DagSelect::from(&tbl).limit(5).build(); let mut resp = handle_select(&endpoint, req); let mut row_count = 0; let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 4); @@ -1581,7 +1581,7 @@ fn test_output_offsets() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); - let req = DAGSelect::from(&product) + let req = DagSelect::from(&product) .output_offsets(Some(vec![1])) .build(); let mut resp = handle_select(&endpoint, req); @@ -1607,7 +1607,7 @@ fn test_key_is_locked_for_primary() { let product = ProductTable::new(); let (_, endpoint) = init_data_with_commit(&product, &data, false); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); let resp = handle_request(&endpoint, req); assert!(resp.get_data().is_empty(), "{:?}", resp); assert!(resp.has_locked(), "{:?}", resp); @@ -1625,7 +1625,7 @@ fn test_key_is_locked_for_index() { let product = ProductTable::new(); let (_, endpoint) = init_data_with_commit(&product, &data, false); - let req = DAGSelect::from_index(&product, &product["name"]).build(); + let req = DagSelect::from_index(&product, &product["name"]).build(); let resp = handle_request(&endpoint, req); assert!(resp.get_data().is_empty(), "{:?}", resp); assert!(resp.has_locked(), "{:?}", resp); @@ -1643,7 +1643,7 @@ fn test_output_counts() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); - let req = DAGSelect::from(&product).build(); + let req = DagSelect::from(&product).build(); let resp = handle_select(&endpoint, req); assert_eq!(resp.get_output_counts(), &[data.len() as i64]); } @@ -1663,7 +1663,7 @@ fn test_exec_details() { let flags = &[0]; let ctx = Context::default(); - let req = DAGSelect::from(&product).build_with(ctx, flags); + let req = DagSelect::from(&product).build_with(ctx, flags); let resp = handle_request(&endpoint, req); assert!(resp.has_exec_details()); let exec_details = resp.get_exec_details(); @@ -1687,7 +1687,7 @@ fn test_invalid_range() { let product = ProductTable::new(); let (_, endpoint) = init_with_data(&product, &data); - let mut select = DAGSelect::from(&product); + let mut select = DagSelect::from(&product); select.key_ranges[0].set_start(b"xxx".to_vec()); select.key_ranges[0].set_end(b"zzz".to_vec()); let req = select.build(); @@ -1703,7 +1703,7 @@ fn test_snapshot_failed() { let (_, endpoint) = init_data_with_engine_and_commit(ctx, raft_engine, &product, &[], true); // Use an invalid context to make errors. - let req = DAGSelect::from(&product).build_with(Context::default(), &[0]); + let req = DagSelect::from(&product).build_with(Context::default(), &[0]); let resp = handle_request(&endpoint, req); assert!(resp.get_region_error().has_store_not_match()); @@ -1724,7 +1724,7 @@ fn test_cache() { let (_, endpoint) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); - let req = DAGSelect::from(&product).build_with(ctx, &[0]); + let req = DagSelect::from(&product).build_with(ctx, &[0]); let resp = handle_request(&endpoint, req.clone()); assert!(!resp.get_is_cache_hit()); @@ -1839,7 +1839,7 @@ fn test_copr_bypass_or_access_locks() { // DAG { - let mut req = DAGSelect::from(&product).build_with(ctx.clone(), &[0]); + let mut req = DagSelect::from(&product).build_with(ctx.clone(), &[0]); req.set_start_ts(read_ts.into_inner()); req.set_ranges(ranges.clone().into()); @@ -1944,7 +1944,7 @@ fn test_rc_read() { ctx.set_isolation_level(IsolationLevel::Rc); let ranges = vec![product.get_record_range(1, 4)]; - let mut req = DAGSelect::from(&product).build_with(ctx.clone(), &[0]); + let mut req = DagSelect::from(&product).build_with(ctx.clone(), &[0]); req.set_start_ts(u64::MAX - 1); req.set_ranges(ranges.into()); @@ -1973,7 +1973,7 @@ fn test_buckets() { let (_, endpoint) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &[], true); - let req = DAGSelect::from(&product).build_with(ctx, &[0]); + let req = DagSelect::from(&product).build_with(ctx, &[0]); let resp = handle_request(&endpoint, req.clone()); assert_eq!(resp.get_latest_buckets_version(), 0); diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 0174d0ef53f..a47c817d2af 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -5,7 +5,7 @@ use kvproto::{import_sstpb::*, kvrpcpb::Context, tikvpb::*}; use pd_client::PdClient; use tempfile::Builder; use test_sst_importer::*; -use tikv::config::TiKvConfig; +use tikv::config::TikvConfig; use super::util::*; @@ -84,7 +84,7 @@ fn test_write_and_ingest_with_tde() { #[test] fn test_ingest_sst() { - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); cfg.server.grpc_concurrency = 1; let (_cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client(Some(cfg)); diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index 363e3292ec6..e757e7685ba 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -7,13 +7,13 @@ use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; use kvproto::{import_sstpb::*, kvrpcpb::*, tikvpb::*}; use security::SecurityConfig; use test_raftstore::*; -use tikv::config::TiKvConfig; +use tikv::config::TikvConfig; use tikv_util::HandyRwLock; use uuid::Uuid; const CLEANUP_SST_MILLIS: u64 = 10; -pub fn new_cluster(cfg: TiKvConfig) -> (Cluster, Context) { +pub fn new_cluster(cfg: TikvConfig) -> (Cluster, Context) { let count = 1; let mut cluster = new_server_cluster(0, count); cluster.cfg = Config { @@ -34,10 +34,10 @@ pub fn new_cluster(cfg: TiKvConfig) -> (Cluster, Context) { } pub fn open_cluster_and_tikv_import_client( - cfg: Option, + cfg: Option, ) -> (Cluster, Context, TikvClient, ImportSstClient) { let cfg = cfg.unwrap_or_else(|| { - let mut config = TiKvConfig::default(); + let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); let cleanup_interval = Duration::from_millis(CLEANUP_SST_MILLIS); config.raft_store.cleanup_import_sst_interval.0 = cleanup_interval; @@ -84,7 +84,7 @@ pub fn new_cluster_and_tikv_import_client_tde() -> ( let encryption_cfg = test_util::new_file_security_config(&tmp_dir); let mut security = test_util::new_security_cfg(None); security.encryption = encryption_cfg; - let mut config = TiKvConfig::default(); + let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); let cleanup_interval = Duration::from_millis(CLEANUP_SST_MILLIS); config.raft_store.cleanup_import_sst_interval.0 = cleanup_interval; diff --git a/tests/integrations/resource_metering/test_cpu.rs b/tests/integrations/resource_metering/test_cpu.rs index abbfcdf3d17..9ead51f5ef5 100644 --- a/tests/integrations/resource_metering/test_cpu.rs +++ b/tests/integrations/resource_metering/test_cpu.rs @@ -12,7 +12,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use futures::{executor::block_on, StreamExt}; use kvproto::kvrpcpb::Context; -use test_coprocessor::{DAGSelect, Insert, ProductTable, Store}; +use test_coprocessor::{DagSelect, Insert, ProductTable, Store}; use tidb_query_datatype::codec::Datum; use tikv::{ config::CoprReadPoolConfig, @@ -92,7 +92,7 @@ pub fn test_reschedule_coprocessor() { insert.execute(); store.commit(); - let mut req = DAGSelect::from(&table).build(); + let mut req = DagSelect::from(&table).build(); let mut ctx = Context::default(); ctx.set_resource_group_tag(tag.as_bytes().to_vec()); req.set_context(ctx); diff --git a/tests/integrations/resource_metering/test_read_keys.rs b/tests/integrations/resource_metering/test_read_keys.rs index d5306ef21f5..87ad50024ad 100644 --- a/tests/integrations/resource_metering/test_read_keys.rs +++ b/tests/integrations/resource_metering/test_read_keys.rs @@ -8,7 +8,7 @@ use grpcio::{ChannelBuilder, Environment}; use kvproto::{coprocessor, kvrpcpb::*, resource_usage_agent::ResourceUsageRecord, tikvpb::*}; use protobuf::Message; use resource_metering::ResourceTagFactory; -use test_coprocessor::{DAGSelect, ProductTable, Store}; +use test_coprocessor::{DagSelect, ProductTable, Store}; use test_raftstore::*; use test_util::alloc_port; use tidb_query_datatype::codec::Datum; @@ -202,7 +202,7 @@ fn test_read_keys_coprocessor() { .unwrap(); // Do DAG select to register runtime thread. - let mut req = DAGSelect::from(&product).build(); + let mut req = DagSelect::from(&product).build(); let mut ctx = Context::default(); ctx.set_resource_group_tag("TEST-TAG".into()); req.set_context(ctx); diff --git a/tests/integrations/resource_metering/test_suite/mod.rs b/tests/integrations/resource_metering/test_suite/mod.rs index 88ffa9494ab..667c86d230a 100644 --- a/tests/integrations/resource_metering/test_suite/mod.rs +++ b/tests/integrations/resource_metering/test_suite/mod.rs @@ -19,7 +19,7 @@ use resource_metering::{Config, ResourceTagFactory}; use tempfile::TempDir; use test_util::alloc_port; use tikv::{ - config::{ConfigController, TiKvConfig}, + config::{ConfigController, TikvConfig}, storage::{ lock_manager::DummyLockManager, RocksEngine, StorageApiV1, TestEngineBuilder, TestStorageBuilderApiV1, @@ -50,7 +50,7 @@ pub struct TestSuite { impl TestSuite { pub fn new(cfg: resource_metering::Config) -> Self { - let (mut tikv_cfg, dir) = TiKvConfig::with_tmp().unwrap(); + let (mut tikv_cfg, dir) = TikvConfig::with_tmp().unwrap(); tikv_cfg.resource_metering = cfg.clone(); let cfg_controller = ConfigController::new(tikv_cfg); diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index b0c95eb9f7a..f5e642f161b 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -20,7 +20,7 @@ use raftstore::store::{apply_sst_cf_file, build_sst_cf_file_list, CfFile, Region use tempfile::Builder; use test_raftstore::*; use tikv::{ - config::TiKvConfig, + config::TikvConfig, storage::{mvcc::ScannerBuilder, txn::Scanner}, }; use tikv_util::{ @@ -148,7 +148,7 @@ fn test_delete_files_in_range_for_titan() { .unwrap(); // Set configs and create engines - let mut cfg = TiKvConfig::default(); + let mut cfg = TikvConfig::default(); let cache = cfg.storage.block_cache.build_shared_cache(); cfg.rocksdb.titan.enabled = true; cfg.rocksdb.titan.disable_gc = true; From 71caf10cb7c35a3dcfeea945d3bc6437e3a314b2 Mon Sep 17 00:00:00 2001 From: Jarvis Date: Thu, 4 Aug 2022 17:56:07 +0800 Subject: [PATCH 0135/1149] encryption: Set Iv to empty if using plaintext encryption (#13083) close tikv/tikv#13081 Using empty IV for plaintext encryption Signed-off-by: Jarvis Zheng Co-authored-by: Xinye Tao --- components/encryption/src/manager/mod.rs | 114 ++++++++++++++++++++++- 1 file changed, 113 insertions(+), 1 deletion(-) diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 58a3a7a66e5..fb6b2312027 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -194,7 +194,11 @@ impl Dicts { fn new_file(&self, fname: &str, method: EncryptionMethod) -> Result { let mut file_dict_file = self.file_dict_file.lock().unwrap(); - let iv = Iv::new_ctr(); + let iv = if method != EncryptionMethod::Plaintext { + Iv::new_ctr() + } else { + Iv::Empty + }; let file = FileInfo { iv: iv.as_slice().to_vec(), key_id: self.current_key_id.load(Ordering::SeqCst), @@ -1336,4 +1340,112 @@ mod tests { assert_eq!(buffer, content); } } + + fn generate_mock_file>(dkm: Option<&DataKeyManager>, path: P, content: &String) { + use std::io::Write; + match dkm { + Some(manager) => { + // Encryption enabled. Use DataKeyManager to manage file. + let mut f = manager.create_file_for_write(&path).unwrap(); + f.write_all(content.as_bytes()).unwrap(); + f.sync_all().unwrap(); + } + None => { + // Encryption disabled. Write content in plaintext. + let mut f = File::create(&path).unwrap(); + f.write_all(content.as_bytes()).unwrap(); + f.sync_all().unwrap(); + } + } + } + + fn check_mock_file_content>( + dkm: Option<&DataKeyManager>, + path: P, + expected: &String, + ) { + use std::io::Read; + + match dkm { + Some(manager) => { + let mut buffer = String::new(); + let mut f = manager.open_file_for_read(&path).unwrap(); + assert_eq!(f.read_to_string(&mut buffer).unwrap(), expected.len()); + assert_eq!(buffer, expected.to_string()); + } + None => { + let mut buffer = String::new(); + let mut f = File::open(&path).unwrap(); + assert_eq!(f.read_to_string(&mut buffer).unwrap(), expected.len()); + assert_eq!(buffer, expected.to_string()); + } + } + } + + fn test_change_method(from: EncryptionMethod, to: EncryptionMethod) { + if from == to { + return; + } + + let generate_file_name = |method| format!("{:?}", method); + let generate_file_content = |method| format!("Encrypted with {:?}", method); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let (key_path, _tmp_key_dir) = create_key_file("key"); + let master_key_backend = + Box::new(FileBackend::new(key_path.as_path()).unwrap()) as Box; + let previous = new_mock_backend() as Box; + let path_to_file1 = tmp_dir.path().join(generate_file_name(from)); + let content1 = generate_file_content(from); + + if from == EncryptionMethod::Plaintext { + // encryption not enabled. + let mut args = def_data_key_args(&tmp_dir); + args.method = EncryptionMethod::Plaintext; + let manager = + DataKeyManager::new(master_key_backend, Box::new(move || Ok(previous)), args) + .unwrap(); + assert!(manager.is_none()); + generate_mock_file(None, &path_to_file1, &content1); + check_mock_file_content(None, &path_to_file1, &content1); + } else { + let manager = + new_key_manager(&tmp_dir, Some(from), master_key_backend, previous).unwrap(); + + generate_mock_file(Some(&manager), &path_to_file1, &content1); + check_mock_file_content(Some(&manager), &path_to_file1, &content1); + // Close old manager + drop(manager); + } + + // re-open with new encryption/plaintext algorithm. + let master_key_backend = + Box::new(FileBackend::new(key_path.as_path()).unwrap()) as Box; + let previous = new_mock_backend() as Box; + let manager = new_key_manager(&tmp_dir, Some(to), master_key_backend, previous).unwrap(); + let path_to_file2 = tmp_dir.path().join(generate_file_name(to)); + + let content2 = generate_file_content(to); + generate_mock_file(Some(&manager), &path_to_file2, &content2); + check_mock_file_content(Some(&manager), &path_to_file2, &content2); + // check old file content + check_mock_file_content(Some(&manager), &path_to_file1, &content1); + } + + #[test] + fn test_encryption_algorithm_switch() { + let _guard = LOCK_FOR_GAUGE.lock().unwrap(); + + let method_list = [ + EncryptionMethod::Plaintext, + EncryptionMethod::Aes128Ctr, + EncryptionMethod::Aes192Ctr, + EncryptionMethod::Aes256Ctr, + EncryptionMethod::Sm4Ctr, + ]; + for from in method_list { + for to in method_list { + test_change_method(from, to) + } + } + } } From 25b45d939800089cdaf4c19c2d54e3420e75038e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boqin=20Qin=28=E7=A7=A6=20=E4=BC=AF=E9=92=A6=29?= Date: Thu, 4 Aug 2022 20:26:06 +0800 Subject: [PATCH 0136/1149] components/pd_client: fix double-read-lock in client (#13188) close tikv/tikv#12933 Signed-off-by: Burton Qin Co-authored-by: Shirly Co-authored-by: Ti Chi Robot --- components/pd_client/src/client.rs | 193 ++++++++++++++++------------- 1 file changed, 107 insertions(+), 86 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 173b25357c4..04fd6350ca1 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -37,7 +37,7 @@ use yatp::{task::future::TaskCell, ThreadPool}; use super::{ metrics::*, - util::{check_resp_header, sync_request, Client, PdConnector}, + util::{check_resp_header, sync_request, Client, Inner, PdConnector}, BucketStat, Config, Error, FeatureGate, PdClient, PdFuture, RegionInfo, RegionStat, Result, UnixSecs, REQUEST_TIMEOUT, }; @@ -192,9 +192,12 @@ impl RpcClient { /// Creates a new call option with default request timeout. #[inline] pub fn call_option(client: &Client) -> CallOption { - client - .inner - .rl() + Self::call_option_inner(&client.inner.rl()) + } + + #[inline] + fn call_option_inner(inner: &Inner) -> CallOption { + inner .target_info() .call_option() .timeout(Duration::from_secs(REQUEST_TIMEOUT)) @@ -214,14 +217,15 @@ impl RpcClient { req.set_region_key(key.to_vec()); let executor = move |client: &Client, req: pdpb::GetRegionRequest| { - let handler = client - .inner - .rl() - .client_stub - .get_region_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_region_async_opt", e) - }); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .get_region_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_async_opt", e) + }) + }; Box::pin(async move { let mut resp = handler.await?; @@ -253,12 +257,15 @@ impl RpcClient { req.set_store_id(store_id); let executor = move |client: &Client, req: pdpb::GetStoreRequest| { - let handler = client - .inner - .rl() - .client_stub - .get_store_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "get_store_async", e)); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .get_store_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_store_async", e) + }) + }; Box::pin(async move { let mut resp = handler.await?; @@ -496,14 +503,15 @@ impl PdClient for RpcClient { req.set_region_id(region_id); let executor = move |client: &Client, req: pdpb::GetRegionByIdRequest| { - let handler = client - .inner - .rl() - .client_stub - .get_region_by_id_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_region_by_id", e) - }); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .get_region_by_id_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_by_id", e); + }) + }; Box::pin(async move { let mut resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC @@ -534,14 +542,15 @@ impl PdClient for RpcClient { req.set_region_id(region_id); let executor = move |client: &Client, req: pdpb::GetRegionByIdRequest| { - let handler = client - .inner - .rl() - .client_stub - .get_region_by_id_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_region_by_id", e) - }); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .get_region_by_id_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_by_id", e) + }) + }; Box::pin(async move { let mut resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC @@ -671,12 +680,13 @@ impl PdClient for RpcClient { req.set_region(region); let executor = move |client: &Client, req: pdpb::AskSplitRequest| { - let handler = client - .inner - .rl() - .client_stub - .ask_split_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "ask_split", e)); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .ask_split_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "ask_split", e)) + }; Box::pin(async move { let resp = handler.await?; @@ -706,12 +716,15 @@ impl PdClient for RpcClient { req.set_split_count(count as u32); let executor = move |client: &Client, req: pdpb::AskBatchSplitRequest| { - let handler = client - .inner - .rl() - .client_stub - .ask_batch_split_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "ask_batch_split", e)); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .ask_batch_split_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "ask_batch_split", e) + }) + }; Box::pin(async move { let resp = handler.await?; @@ -750,12 +763,15 @@ impl PdClient for RpcClient { } let executor = move |client: &Client, req: pdpb::StoreHeartbeatRequest| { let feature_gate = client.feature_gate.clone(); - let handler = client - .inner - .rl() - .client_stub - .store_heartbeat_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "store_heartbeat", e)); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .store_heartbeat_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "store_heartbeat", e) + }) + }; Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC @@ -784,14 +800,15 @@ impl PdClient for RpcClient { req.set_regions(regions.into()); let executor = move |client: &Client, req: pdpb::ReportBatchSplitRequest| { - let handler = client - .inner - .rl() - .client_stub - .report_batch_split_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "report_batch_split", e) - }); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .report_batch_split_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "report_batch_split", e) + }) + }; Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC @@ -837,15 +854,15 @@ impl PdClient for RpcClient { req.set_header(self.header()); let executor = move |client: &Client, req: pdpb::GetGcSafePointRequest| { - let option = Self::call_option(client); - let handler = client - .inner - .rl() - .client_stub - .get_gc_safe_point_async_opt(&req, option) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_gc_saft_point", e) - }); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .get_gc_safe_point_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_gc_saft_point", e) + }) + }; Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC @@ -925,17 +942,18 @@ impl PdClient for RpcClient { req.set_ttl(ttl.as_secs() as _); req.set_safe_point(safe_point.into_inner()); let executor = move |client: &Client, r: pdpb::UpdateServiceGcSafePointRequest| { - let handler = client - .inner - .rl() - .client_stub - .update_service_gc_safe_point_async_opt(&r, Self::call_option(client)) - .unwrap_or_else(|e| { - panic!( - "fail to request PD {} err {:?}", - "update_service_safe_point", e - ) - }); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .update_service_gc_safe_point_async_opt(&r, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!( + "fail to request PD {} err {:?}", + "update_service_safe_point", e + ) + }) + }; Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC @@ -963,12 +981,15 @@ impl PdClient for RpcClient { req.set_min_resolved_ts(min_resolved_ts); let executor = move |client: &Client, req: pdpb::ReportMinResolvedTsRequest| { - let handler = client - .inner - .rl() - .client_stub - .report_min_resolved_ts_async_opt(&req, Self::call_option(client)) - .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "min_resolved_ts", e)); + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .report_min_resolved_ts_async_opt(&req, Self::call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "min_resolved_ts", e) + }) + }; Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC From 83d17c39cd0dc3bdb3c6b1f7e6206d4b322c3a37 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 5 Aug 2022 14:52:06 +0800 Subject: [PATCH 0137/1149] server: raise error when bootstrap with a zero store-id (#13010) close tikv/tikv#13011 raise error when bootstrap with a zero store-id Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/pd_client/src/client.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 04fd6350ca1..ca997e473e9 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -398,7 +398,11 @@ impl PdClient for RpcClient { })?; check_resp_header(resp.get_header())?; - Ok(resp.get_id()) + let id = resp.get_id(); + if id == 0 { + return Err(box_err!("pd alloc weird id 0")); + } + Ok(id) } fn put_store(&self, store: metapb::Store) -> Result> { From 9df16e04376952b34936209e8476350dc273d007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boqin=20Qin=28=E7=A7=A6=20=E4=BC=AF=E9=92=A6=29?= Date: Fri, 5 Aug 2022 15:20:06 +0800 Subject: [PATCH 0138/1149] components/engine_test: fix double-lock in open_tablet (#13187) ref tikv/tikv#13186 Signed-off-by: Burton Qin Co-authored-by: Ti Chi Robot --- components/engine_test/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 7bdd87827e7..c3c8cc598ad 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -261,8 +261,7 @@ pub mod kv { } fn open_tablet(&self, id: u64, suffix: u64) -> Result { - let reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { + if let Some(db) = self.registry.lock().unwrap().get(&(id, suffix)) { return Ok(db.clone()); } From 3800412c49479441738da7caf408bba88a92c62d Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 5 Aug 2022 15:44:06 +0800 Subject: [PATCH 0139/1149] server: Support default metapb::Store when register to pd (#13192) ref tikv/tikv#12849 Support default metapb::Store when register to pd Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/server/src/server.rs | 1 + components/test_raftstore/src/node.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/server/node.rs | 22 +++++++++++++------ .../integrations/raftstore/test_bootstrap.rs | 1 + 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 1cb6a9b3b65..4a4cadeb639 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -867,6 +867,7 @@ where self.state.clone(), self.background_worker.clone(), Some(health_service.clone()), + None, ); node.try_bootstrap_store(engines.engines.clone()) .unwrap_or_else(|e| fatal!("failed to bootstrap node id: {}", e)); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 2584d29629e..be361db3185 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -247,6 +247,7 @@ impl Simulator for NodeCluster { Arc::default(), bg_worker.clone(), None, + None, ); let (snap_mgr, snap_mgr_path) = if node_id == 0 diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 7107c668c3d..da81606d2dd 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -497,6 +497,7 @@ impl ServerCluster { state, bg_worker.clone(), Some(health_service.clone()), + None, ); node.try_bootstrap_store(engines.clone())?; let node_id = node.id(); diff --git a/src/server/node.rs b/src/server/node.rs index 84aeb89377d..a282bcded37 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -112,8 +112,12 @@ where state: Arc>, bg_worker: Worker, health_service: Option, + default_store: Option, ) -> Node { - let mut store = metapb::Store::default(); + let mut store = match default_store { + None => metapb::Store::default(), + Some(s) => s, + }; store.set_id(INVALID_ID); if cfg.advertise_addr.is_empty() { store.set_address(cfg.addr.clone()); @@ -125,7 +129,9 @@ where } else { store.set_status_address(cfg.advertise_status_addr.clone()) } - store.set_version(env!("CARGO_PKG_VERSION").to_string()); + if store.get_version() == "" { + store.set_version(env!("CARGO_PKG_VERSION").to_string()); + } if let Ok(path) = std::env::current_exe() { if let Some(path) = path.parent() { @@ -134,11 +140,13 @@ where }; store.set_start_timestamp(chrono::Local::now().timestamp()); - store.set_git_hash( - option_env!("TIKV_BUILD_GIT_HASH") - .unwrap_or("Unknown git hash") - .to_string(), - ); + if store.get_git_hash() == "" { + store.set_git_hash( + option_env!("TIKV_BUILD_GIT_HASH") + .unwrap_or("Unknown git hash") + .to_string(), + ); + } let mut labels = Vec::new(); for (k, v) in &cfg.labels { diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 1caf4e31ea3..92e4422c57f 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -61,6 +61,7 @@ fn test_node_bootstrap_with_prepared_data() { Arc::default(), bg_worker, None, + None, ); let snap_mgr = SnapManager::new(tmp_mgr.path().to_str().unwrap()); let pd_worker = LazyWorker::new("test-pd-worker"); From 68397e8c7fe1842635b29675f9cc01f534fc1d3e Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 5 Aug 2022 16:46:07 +0800 Subject: [PATCH 0140/1149] tablet: fix the potential dead lock in open_tablet (#13165) close tikv/tikv#13213 Signed-off-by: SpadeA-Tang --- components/engine_test/src/lib.rs | 156 +++++++---- components/engine_traits/src/engine.rs | 111 ++++++-- .../raftstore-v2/src/operation/read/read.rs | 10 +- components/raftstore-v2/src/raft/peer.rs | 8 +- src/server/engine_factory.rs | 51 ++-- src/server/engine_factory_v2.rs | 251 +++++++++++++----- .../flow_controller/tablet_flow_controller.rs | 19 +- 7 files changed, 428 insertions(+), 178 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index c3c8cc598ad..18d89b1c2fb 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -91,7 +91,7 @@ pub mod kv { RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; use engine_traits::{ - CfOptions, CfOptionsExt, Result, TabletAccessor, TabletFactory, CF_DEFAULT, + CfOptions, CfOptionsExt, OpenOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, }; use tikv_util::box_err; @@ -134,12 +134,11 @@ pub mod kv { } fn create_tablet(&self, tablet_path: &Path) -> Result { - let kv_engine = KvTestEngine::new_kv_engine_opt( + KvTestEngine::new_kv_engine_opt( tablet_path.to_str().unwrap(), self.db_opt.clone(), self.cf_opts.clone(), - )?; - Ok(kv_engine) + ) } } @@ -152,25 +151,36 @@ pub mod kv { Ok(tablet) } - fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { - let db = self.root_db.lock().unwrap(); - if let Some(cp) = db.as_ref() { - return Ok(cp.clone()); + /// See the comment above the same name method in KvEngineFactory + fn open_tablet( + &self, + _id: u64, + _suffix: Option, + options: OpenOptions, + ) -> Result { + if let Some(db) = self.root_db.lock().unwrap().as_ref() { + if options.create_new() { + return Err(box_err!( + "root tablet {} already exists", + db.as_inner().path() + )); + } + return Ok(db.clone()); + } else if options.create_new() || options.create() { + return self.create_shared_db(); } - self.create_shared_db() + Err(box_err!("root tablet has not been initialized")) } - fn open_tablet_cache(&self, _id: u64, _suffix: u64) -> Option { - self.open_tablet_raw(&self.tablet_path(0, 0), false).ok() - } - - fn open_tablet_cache_any(&self, _id: u64) -> Option { - self.open_tablet_cache(0, 0) - } - - fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { - TabletFactory::create_tablet(self, 0, 0) + fn open_tablet_raw( + &self, + _path: &Path, + _id: u64, + _suffix: u64, + _options: OpenOptions, + ) -> Result { + self.create_shared_db() } fn exists_raw(&self, _path: &Path) -> bool { @@ -243,59 +253,86 @@ pub mod kv { } impl TabletFactory for TestTabletFactoryV2 { - fn create_tablet(&self, id: u64, suffix: u64) -> Result { - let mut reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { - return Err(box_err!( - "region {} {} already exists", - id, - db.as_inner().path() - )); - } - - let tablet_path = self.tablet_path(id, suffix); - let kv_engine = self.inner.create_tablet(&tablet_path)?; - reg.insert((id, suffix), kv_engine.clone()); - - Ok(kv_engine) - } - - fn open_tablet(&self, id: u64, suffix: u64) -> Result { - if let Some(db) = self.registry.lock().unwrap().get(&(id, suffix)) { - return Ok(db.clone()); + /// See the comment above the same name method in KvEngineFactoryV2 + fn open_tablet( + &self, + id: u64, + suffix: Option, + mut options: OpenOptions, + ) -> Result { + if options.create_new() || options.create() { + options = options.set_cache_only(false); } - let db_path = self.tablet_path(id, suffix); - let db = self.open_tablet_raw(db_path.as_path(), false)?; - Ok(db) - } - - fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { - self.registry.lock().unwrap().get(&(id, suffix)).cloned() - } + let mut reg = self.registry.lock().unwrap(); + if let Some(suffix) = suffix { + if let Some(tablet) = reg.get(&(id, suffix)) { + // Target tablet exist in the cache + + if options.create_new() { + return Err(box_err!( + "region {} {} already exists", + id, + tablet.as_inner().path() + )); + } + return Ok(tablet.clone()); + } else if !options.cache_only() { + let tablet_path = self.tablet_path(id, suffix); + let tablet = self.open_tablet_raw(&tablet_path, id, suffix, options.clone())?; + if !options.skip_cache() { + reg.insert((id, suffix), tablet.clone()); + } + return Ok(tablet); + } + } else if options.cache_only() { + // This branch reads an arbitrary tablet with region id `id` - fn open_tablet_cache_any(&self, id: u64) -> Option { - let reg = self.registry.lock().unwrap(); - if let Some(k) = reg.keys().find(|k| k.0 == id) { - return Some(reg.get(k).unwrap().clone()); + if let Some(k) = reg.keys().find(|k| k.0 == id) { + return Ok(reg.get(k).unwrap().clone()); + } } - None - } - fn open_tablet_raw(&self, path: &Path, _readonly: bool) -> Result { - if !KvTestEngine::exists(path.to_str().unwrap_or_default()) { + Err(box_err!( + "tablet with region id {} suffix {:?} does not exist", + id, + suffix + )) + } + + fn open_tablet_raw( + &self, + path: &Path, + id: u64, + _suffix: u64, + options: OpenOptions, + ) -> Result { + let engine_exist = KvTestEngine::exists(path.to_str().unwrap_or_default()); + // Even though neither options.create nor options.create_new are true, if the + // tablet files already exists, we will open it by calling + // inner.create_tablet. In this case, the tablet exists but not in the cache + // (registry). + if !options.create() && !options.create_new() && !engine_exist { return Err(box_err!( "path {} does not have db", path.to_str().unwrap_or_default() )); + }; + + if options.create_new() && engine_exist { + return Err(box_err!( + "region {} {} already exists", + id, + path.to_str().unwrap() + )); } - let (tablet_id, tablet_suffix) = get_id_and_suffix_from_path(path); - self.create_tablet(tablet_id, tablet_suffix) + + self.inner.create_tablet(path) } #[inline] fn create_shared_db(&self) -> Result { - self.create_tablet(0, 0) + self.open_tablet(0, Some(0), OpenOptions::default().set_create_new(true)) } #[inline] @@ -350,7 +387,8 @@ pub mod kv { let db_path = self.tablet_path(id, suffix); std::fs::rename(path, &db_path)?; - let new_engine = self.open_tablet_raw(db_path.as_path(), false); + let new_engine = + self.open_tablet(id, Some(suffix), OpenOptions::default().set_create(true)); if new_engine.is_ok() { let (old_id, old_suffix) = get_id_and_suffix_from_path(path); self.registry.lock().unwrap().remove(&(old_id, old_suffix)); diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 8d991f1cfeb..e59d9104e56 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -153,32 +153,89 @@ impl Drop for TabletErrorCollector { } } +/// OpenOptionsn is used for specifiying the way of opening a tablet. +#[derive(Default, Clone)] +pub struct OpenOptions { + // create tablet if non-exist + create: bool, + create_new: bool, + read_only: bool, + cache_only: bool, + skip_cache: bool, +} + +impl OpenOptions { + /// Sets the option to create a tablet, or open it if it already exists. + pub fn set_create(mut self, create: bool) -> Self { + self.create = create; + self + } + + /// Sets the option to create a new tablet, failing if it already exists. + pub fn set_create_new(mut self, create_new: bool) -> Self { + self.create_new = create_new; + self + } + + /// Sets the option for read only + pub fn set_read_only(mut self, read_only: bool) -> Self { + self.read_only = read_only; + self + } + + /// Sets the option for only reading from cache. + pub fn set_cache_only(mut self, cache_only: bool) -> Self { + self.cache_only = cache_only; + self + } + + /// Sets the option to open a tablet without updating the cache. + pub fn set_skip_cache(mut self, skip_cache: bool) -> Self { + self.skip_cache = skip_cache; + self + } + + pub fn create(&self) -> bool { + self.create + } + + pub fn create_new(&self) -> bool { + self.create_new + } + + pub fn read_only(&self) -> bool { + self.read_only + } + + pub fn cache_only(&self) -> bool { + self.cache_only + } + + pub fn skip_cache(&self) -> bool { + self.skip_cache + } +} + /// A factory trait to create new engine. // It should be named as `EngineFactory` for consistency, but we are about to // rename engine to tablet, so always use tablet for new traits/types. pub trait TabletFactory: TabletAccessor { - /// Create an tablet by id and suffix. If the tablet exists, it will fail. + /// Open the tablet with id and suffix according to the OpenOptions. + /// /// The id is likely the region Id, the suffix could be the current raft log /// index. They together could specify a unique path for a region's /// tablet. The reason to have suffix is that we can keep more than one /// tablet for a region. - fn create_tablet(&self, id: u64, suffix: u64) -> Result; - - /// Open a tablet by id and suffix. If the tablet exists, it will open it. - /// If the tablet does not exist, it will create it. - fn open_tablet(&self, id: u64, suffix: u64) -> Result { - self.open_tablet_raw(&self.tablet_path(id, suffix), false) - } + fn open_tablet(&self, id: u64, suffix: Option, options: OpenOptions) -> Result; - /// Open a tablet by id and suffix from cache---that means it should already - /// be opened. - fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option; - - /// Open a tablet by id and any suffix from cache - fn open_tablet_cache_any(&self, id: u64) -> Option; - - /// Open tablet by path and readonly flag - fn open_tablet_raw(&self, path: &Path, readonly: bool) -> Result; + /// Open tablet by raw path without updating cache. + fn open_tablet_raw( + &self, + path: &Path, + id: u64, + suffix: u64, + options: OpenOptions, + ) -> Result; /// Create the shared db for v1 fn create_shared_db(&self) -> Result; @@ -232,23 +289,21 @@ impl TabletFactory for DummyFactory where EK: CfOptionsExt + Clone + Send + 'static, { - fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { + fn create_shared_db(&self) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } - fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { + fn open_tablet(&self, _id: u64, _suffix: Option, _options: OpenOptions) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } - fn open_tablet_cache(&self, _id: u64, _suffix: u64) -> Option { - Some(self.engine.as_ref().unwrap().clone()) - } - - fn open_tablet_cache_any(&self, _id: u64) -> Option { - Some(self.engine.as_ref().unwrap().clone()) - } - - fn create_shared_db(&self) -> Result { + fn open_tablet_raw( + &self, + _path: &Path, + _id: u64, + _suffix: u64, + _options: OpenOptions, + ) -> Result { Ok(self.engine.as_ref().unwrap().clone()) } diff --git a/components/raftstore-v2/src/operation/read/read.rs b/components/raftstore-v2/src/operation/read/read.rs index 63878beeb22..bc3903e12fd 100644 --- a/components/raftstore-v2/src/operation/read/read.rs +++ b/components/raftstore-v2/src/operation/read/read.rs @@ -154,7 +154,7 @@ mod tests { ctor::{CfOptions, DbOptions}, kv::{KvTestEngine, KvTestSnapshot, TestTabletFactoryV2}, }; - use engine_traits::{Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; + use engine_traits::{OpenOptions, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; use kvproto::{metapb::Region, raft_cmdpb::*}; use raftstore::store::{ util::Lease, Callback, CasualMessage, CasualRouter, LocalReader, ProposalRouter, @@ -210,7 +210,9 @@ mod tests { meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data - tablet1 = factory.create_tablet(1, 10).unwrap(); + tablet1 = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); tablet1.put_cf(CF_DEFAULT, b"a1", b"val1").unwrap(); let cache = CachedTablet::new(Some(tablet1.clone())); meta.tablet_caches.insert(1, cache); @@ -221,7 +223,9 @@ mod tests { meta.readers.insert(2, read_delegate); // create tablet with region_id 1 and prepare some data - tablet2 = factory.create_tablet(2, 10).unwrap(); + tablet2 = factory + .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); tablet2.put_cf(CF_DEFAULT, b"a2", b"val2").unwrap(); let cache = CachedTablet::new(Some(tablet2.clone())); meta.tablet_caches.insert(2, cache); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index aebb1bf7406..70dccd284fa 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use engine_traits::{KvEngine, RaftEngine, TabletFactory}; +use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use raft::{RawNode, INVALID_ID}; use raftstore::store::{util::find_peer, Config}; @@ -71,7 +71,11 @@ impl Peer { )); } // TODO: Perhaps we should stop create the tablet automatically. - Some(tablet_factory.open_tablet(region_id, tablet_index)?) + Some(tablet_factory.open_tablet( + region_id, + Some(tablet_index), + OpenOptions::default().set_create(true), + )?) } else { None }; diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index fad5cd25ba8..968e8fa04d8 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -11,8 +11,8 @@ use engine_rocks::{ RocksEventListener, }; use engine_traits::{ - CfOptions, CfOptionsExt, CompactionJobInfo, Result, TabletAccessor, TabletFactory, CF_DEFAULT, - CF_WRITE, + CfOptions, CfOptionsExt, CompactionJobInfo, OpenOptions, Result, TabletAccessor, TabletFactory, + CF_DEFAULT, CF_WRITE, }; use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; @@ -227,25 +227,42 @@ impl TabletFactory for KvEngineFactory { Ok(tablet) } - fn create_tablet(&self, _id: u64, _suffix: u64) -> Result { - let db = self.inner.root_db.lock().unwrap(); - if let Some(cp) = db.as_ref() { - return Ok(cp.clone()); + /// Open the root tablet according to the OpenOptions. + /// + /// If options.create_new is true, create the root tablet. If the tablet + /// exists, it will fail. + /// + /// If options.create is true, open the the root tablet if it exists or + /// create it otherwise. + fn open_tablet( + &self, + _id: u64, + _suffix: Option, + options: OpenOptions, + ) -> Result { + if let Some(db) = self.inner.root_db.lock().unwrap().as_ref() { + if options.create_new() { + return Err(box_err!( + "root tablet {} already exists", + db.as_inner().path() + )); + } + return Ok(db.clone()); + } else if options.create_new() || options.create() { + return self.create_shared_db(); } - self.create_shared_db() - } - - fn open_tablet_cache(&self, _id: u64, _suffix: u64) -> Option { - self.open_tablet_raw(&self.tablet_path(0, 0), false).ok() - } - - fn open_tablet_cache_any(&self, _id: u64) -> Option { - self.open_tablet_cache(0, 0) + Err(box_err!("root tablet has not been initialized")) } - fn open_tablet_raw(&self, _path: &Path, _readonly: bool) -> Result { - TabletFactory::create_tablet(self, 0, 0) + fn open_tablet_raw( + &self, + _path: &Path, + _id: u64, + _suffix: u64, + _options: OpenOptions, + ) -> Result { + self.create_shared_db() } fn exists_raw(&self, _path: &Path) -> bool { diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 7f3bcaafe4f..e3f57d4f244 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -7,7 +7,9 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; -use engine_traits::{CfOptions, CfOptionsExt, Result, TabletAccessor, TabletFactory, CF_DEFAULT}; +use engine_traits::{ + CfOptions, CfOptionsExt, OpenOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, +}; use crate::server::engine_factory::KvEngineFactory; @@ -40,62 +42,104 @@ fn get_id_and_suffix_from_path(path: &Path) -> (u64, u64) { } impl TabletFactory for KvEngineFactoryV2 { - fn create_tablet(&self, id: u64, suffix: u64) -> Result { - let mut reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { - return Err(box_err!( - "region {} {} already exists", - id, - db.as_inner().path() - )); - } - let tablet_path = self.tablet_path(id, suffix); - let kv_engine = self.inner.create_tablet(&tablet_path, id, suffix)?; - debug!("inserting tablet"; "key" => ?(id, suffix)); - reg.insert((id, suffix), kv_engine.clone()); - self.inner.on_tablet_created(id, suffix); - Ok(kv_engine) - } - - fn open_tablet(&self, id: u64, suffix: u64) -> Result { - let reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { - return Ok(db.clone()); + /// open a tablet according to the OpenOptions. + /// + /// If options.cache_only is true, only open the relevant tablet from + /// `registry`, and if suffix is None, return an arbitrary tablet with the + /// target region id if there are any. + /// + /// If options.create_new is true, create a tablet by id and suffix. If the + /// tablet exists, it will fail. + /// + /// If options.create is true, open the tablet with id and suffix if it + /// exists or create it otherwise. + /// + /// Note: options.cache_only and options.create and/or options.create_new + /// cannot be true simultaneously + fn open_tablet( + &self, + id: u64, + suffix: Option, + mut options: OpenOptions, + ) -> Result { + if options.create() || options.create_new() { + options = options.set_cache_only(false); } - let db_path = self.tablet_path(id, suffix); - let db = self.open_tablet_raw(db_path.as_path(), false)?; - debug!("open tablet"; "key" => ?(id, suffix)); - Ok(db) - } - - fn open_tablet_cache(&self, id: u64, suffix: u64) -> Option { - self.registry.lock().unwrap().get(&(id, suffix)).cloned() - } + let mut reg = self.registry.lock().unwrap(); + if let Some(suffix) = suffix { + if let Some(tablet) = reg.get(&(id, suffix)) { + // Target tablet exist in the cache + + if options.create_new() { + return Err(box_err!( + "region {} {} already exists", + id, + tablet.as_inner().path() + )); + } + return Ok(tablet.clone()); + } else if !options.cache_only() { + let tablet_path = self.tablet_path(id, suffix); + let tablet = self.open_tablet_raw(&tablet_path, id, suffix, options.clone())?; + if !options.skip_cache() { + debug!("Insert a tablet"; "key" => ?(id, suffix)); + reg.insert((id, suffix), tablet.clone()); + } + return Ok(tablet); + } + } else if options.cache_only() { + // This branch reads an arbitrary tablet with region id `id` - fn open_tablet_cache_any(&self, id: u64) -> Option { - let reg = self.registry.lock().unwrap(); - if let Some(k) = reg.keys().find(|k| k.0 == id) { - debug!("choose a random tablet"; "key" => ?k); - return Some(reg.get(k).unwrap().clone()); + if let Some(k) = reg.keys().find(|k| k.0 == id) { + debug!("choose a random tablet"; "key" => ?k); + return Ok(reg.get(k).unwrap().clone()); + } } - None + + Err(box_err!( + "tablet with region id {} suffix {:?} does not exist", + id, + suffix + )) } - fn open_tablet_raw(&self, path: &Path, _readonly: bool) -> Result { - if !RocksEngine::exists(path.to_str().unwrap_or_default()) { + fn open_tablet_raw( + &self, + path: &Path, + id: u64, + suffix: u64, + options: OpenOptions, + ) -> Result { + let engine_exist = RocksEngine::exists(path.to_str().unwrap_or_default()); + // Even though neither options.create nor options.create_new are true, if the + // tablet files already exists, we will open it by calling + // inner.create_tablet. In this case, the tablet exists but not in the cache + // (registry). + if !options.create() && !options.create_new() && !engine_exist { return Err(box_err!( "path {} does not have db", path.to_str().unwrap_or_default() )); + }; + + if options.create_new() && engine_exist { + return Err(box_err!( + "region {} {} already exists", + id, + path.to_str().unwrap() + )); } - let (tablet_id, tablet_suffix) = get_id_and_suffix_from_path(path); - self.create_tablet(tablet_id, tablet_suffix) + + let tablet = self.inner.create_tablet(path, id, suffix)?; + debug!("open tablet"; "key" => ?(id, suffix)); + self.inner.on_tablet_created(id, suffix); + Ok(tablet) } #[inline] fn create_shared_db(&self) -> Result { - self.create_tablet(0, 0) + self.open_tablet(0, Some(0), OpenOptions::default().set_create_new(true)) } #[inline] @@ -154,10 +198,10 @@ impl TabletFactory for KvEngineFactoryV2 { let db_path = self.tablet_path(id, suffix); std::fs::rename(path, &db_path)?; - let new_engine = self.open_tablet_raw(db_path.as_path(), false); + let new_engine = + self.open_tablet(id, Some(suffix), OpenOptions::default().set_create(true)); if new_engine.is_ok() { let (old_id, old_suffix) = get_id_and_suffix_from_path(path); - assert!(suffix > old_suffix); self.registry.lock().unwrap().remove(&(old_id, old_suffix)); } new_engine @@ -191,7 +235,7 @@ impl TabletAccessor for KvEngineFactoryV2 { #[cfg(test)] mod tests { - use engine_traits::{TabletFactory, CF_WRITE}; + use engine_traits::{OpenOptions, TabletFactory, CF_WRITE}; use super::*; use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; @@ -225,17 +269,24 @@ mod tests { } let factory = builder.build(); let shared_db = factory.create_shared_db().unwrap(); - let tablet = TabletFactory::create_tablet(&factory, 1, 10).unwrap(); - let tablet2 = factory.open_tablet(1, 10).unwrap(); + + // V1 can only create tablet once + factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap_err(); + + let tablet = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) + .unwrap(); + assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); + let tablet = factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap(); + assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); + let tablet = factory + .open_tablet(1, None, OpenOptions::default().set_cache_only(true)) + .unwrap(); assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet2 = factory.open_tablet_cache(1, 10).unwrap(); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet2 = factory.open_tablet_cache_any(1).unwrap(); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet_path = factory.tablet_path(1, 10); - let tablet2 = factory.open_tablet_raw(&tablet_path, false).unwrap(); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); let mut count = 0; factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { assert!(id == 0); @@ -266,16 +317,26 @@ mod tests { } let factory = builder.build_v2(); - let tablet = factory.create_tablet(1, 10).unwrap(); - let tablet2 = factory.open_tablet(1, 10).unwrap(); + let tablet = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); + let tablet2 = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) + .unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet2 = factory.open_tablet_cache(1, 10).unwrap(); + let tablet2 = factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet2 = factory.open_tablet_cache_any(1).unwrap(); + let tablet2 = factory + .open_tablet(1, None, OpenOptions::default().set_cache_only(true)) + .unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + let tablet_path = factory.tablet_path(1, 10); - let result = factory.open_tablet_raw(&tablet_path, false); + let result = factory.open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)); result.unwrap_err(); + factory .set_shared_block_cache_capacity(1024 * 1024) .unwrap(); @@ -292,17 +353,73 @@ mod tests { factory.load_tablet(&tablet_path, 1, 20).unwrap(); // After we load it as with the new id or suffix, we should be unable to get it // with the old id and suffix in the cache. - assert!(factory.open_tablet_cache(1, 10).is_none()); - assert!(factory.open_tablet_cache(1, 20).is_some()); + factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap_err(); + factory + .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) + .unwrap(); factory.mark_tombstone(1, 20); assert!(factory.is_tombstoned(1, 20)); factory.destroy_tablet(1, 20).unwrap(); - let result = factory.open_tablet(1, 20); + + let result = factory.open_tablet(1, Some(20), OpenOptions::default()); result.unwrap_err(); + assert!(!factory.is_single_engine()); } + #[test] + fn test_existed_db_not_in_registry() { + let cfg = TEST_CONFIG.clone(); + assert!(cfg.storage.block_cache.shared); + let cache = cfg.storage.block_cache.build_shared_cache(); + let dir = test_util::temp_dir("test_kvengine_factory_v2", false); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); + if let Some(cache) = cache { + builder = builder.block_cache(cache); + } + + let factory = builder.build_v2(); + let tablet = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); + drop(tablet); + let tablet = factory.registry.lock().unwrap().remove(&(1, 10)).unwrap(); + drop(tablet); + factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap_err(); + + let tablet_path = factory.tablet_path(1, 10); + let tablet = factory + .open_tablet_raw(&tablet_path, 1, 10, OpenOptions::default()) + .unwrap(); + // the tablet will not inserted in the cache + factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap_err(); + drop(tablet); + + let tablet_path = factory.tablet_path(1, 20); + // No such tablet, so error will be returned. + factory + .open_tablet_raw(&tablet_path, 1, 10, OpenOptions::default()) + .unwrap_err(); + + let _ = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) + .unwrap(); + + // Now, it should be in the cache. + factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap(); + } + #[test] fn test_get_live_tablets() { let cfg = TEST_CONFIG.clone(); @@ -311,8 +428,12 @@ mod tests { let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); let factory = builder.build_v2(); - factory.create_tablet(1, 10).unwrap(); - factory.create_tablet(2, 10).unwrap(); + factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); + factory + .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); let mut count = 0; factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { assert!(id == 1 || id == 2); diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index a35517246c5..17a5900bea7 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -13,7 +13,7 @@ use std::{ use collections::HashMap; use engine_rocks::FlowInfo; -use engine_traits::{CfNamesExt, FlowControlFactorsExt, TabletFactory}; +use engine_traits::{CfNamesExt, FlowControlFactorsExt, OpenOptions, TabletFactory}; use rand::Rng; use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; @@ -117,7 +117,13 @@ impl FlowInfoDispatcher { } let insert_limiter_and_checker = |region_id, suffix| -> FlowChecker { - let engine = tablet_factory.open_tablet_cache(region_id, suffix).unwrap(); + let engine = tablet_factory + .open_tablet( + region_id, + Some(suffix), + OpenOptions::default().set_cache_only(true), + ) + .unwrap(); let mut v = limiters.as_ref().write().unwrap(); let discard_ratio = Arc::new(AtomicU32::new(0)); let limiter = v.entry(region_id).or_insert(( @@ -166,8 +172,13 @@ impl FlowInfoDispatcher { // if checker.suffix < suffix, it means its tablet is old and needs the // refresh if checker.tablet_suffix() < suffix { - let engine = - tablet_factory.open_tablet_cache(region_id, suffix).unwrap(); + let engine = tablet_factory + .open_tablet( + region_id, + Some(suffix), + OpenOptions::default().set_cache_only(true), + ) + .unwrap(); checker.set_engine(engine); checker.set_tablet_suffix(suffix); } From bec2627bb826b9e3c6266ec82f5aef5cdb4de7bd Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Mon, 8 Aug 2022 11:08:47 +0800 Subject: [PATCH 0141/1149] diagnostics: support cgroup limit memory (#13237) close tikv/tikv#13217, ref tikv/tikv#13217 support cgroup limit memory in diagnostics service Signed-off-by: Lloyd-Pottiger Co-authored-by: Lloyd-Pottiger --- src/server/service/diagnostics/sys.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 9eb88016424..f39da646ad1 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -350,7 +350,7 @@ fn mem_hardware_info(collector: &mut Vec) { system.refresh_memory(); let mut pair = ServerInfoPair::default(); pair.set_key("capacity".to_string()); - pair.set_value((system.get_total_memory() * KIB).to_string()); + pair.set_value(SysQuota::memory_limit_in_bytes().to_string()); let mut item = ServerInfoItem::default(); item.set_tp("memory".to_string()); item.set_name("memory".to_string()); From dcbeb16f8cf5fb9e8811a9b030b75cd71710949f Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 8 Aug 2022 11:50:47 +0800 Subject: [PATCH 0142/1149] dr-auto-sync: judge whether `RocksWriteBatchVec` is empty (#13238) close tikv/tikv#13194 Signed-off-by: lhy1024 --- src/server/reset_to_version.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 20bd65ac17a..e1faccd9b3f 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -134,8 +134,10 @@ impl ResetToVersionWorker { box_try!(wb.delete_cf(CF_WRITE, &key)); box_try!(wb.delete_cf(CF_DEFAULT, default_key.as_encoded())); } - wb.write().unwrap(); - wb.clear(); + if !wb.is_empty() { + wb.write().unwrap(); + wb.clear(); + } Ok(has_more) } @@ -165,7 +167,10 @@ impl ResetToVersionWorker { break; } } - wb.write().unwrap(); + if !wb.is_empty() { + wb.write().unwrap(); + wb.clear(); + } Ok(has_more) } } From 0b4231ac9ed76575d0fe4e3b6ba93efac4b50431 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Mon, 8 Aug 2022 12:14:46 +0800 Subject: [PATCH 0143/1149] causal_ts: Implement TSO batch list (#12970) close tikv/tikv#12794, ref tikv/tikv#12794 Implement TSO batch list to improve tolerance to TSO service fault. Signed-off-by: pingyu Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + Cargo.toml | 2 +- components/causal_ts/Cargo.toml | 6 +- components/causal_ts/src/config.rs | 33 +- components/causal_ts/src/lib.rs | 3 + components/causal_ts/src/metrics.rs | 61 +- components/causal_ts/src/observer.rs | 12 +- components/causal_ts/src/tso.rs | 753 ++++++++++++++++++------ components/server/src/server.rs | 2 + components/test_raftstore/src/server.rs | 2 + metrics/grafana/tikv_raw.json | 243 ++++++++ tests/integrations/config/mod.rs | 2 + 12 files changed, 933 insertions(+), 187 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52b39154e91..f258fbdcf69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -733,6 +733,7 @@ dependencies = [ "parking_lot 0.12.0", "pd_client", "prometheus", + "prometheus-static-metric", "raft", "raftstore", "serde", diff --git a/Cargo.toml b/Cargo.toml index fd7af73bdf4..9bbea00262c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ cloud-azure = [ "encryption_export/cloud-azure", "sst_importer/cloud-azure", ] -testexport = ["raftstore/testexport", "api_version/testexport"] +testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport"] test-engine-kv-rocksdb = [ "engine_test/test-engine-kv-rocksdb" ] diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index 08027941f03..b1ad4ed449a 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -4,6 +4,9 @@ version = "0.0.1" edition = "2018" publish = false +[features] +testexport = [] + [dependencies] api_version = { path = "../api_version", default-features = false } engine_rocks = { path = "../engine_rocks", default-features = false } @@ -17,6 +20,7 @@ log_wrappers = { path = "../log_wrappers" } parking_lot = "0.12" pd_client = { path = "../pd_client", default-features = false } prometheus = { version = "0.13", features = ["nightly"] } +prometheus-static-metric = "0.5" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raftstore = { path = "../raftstore", default-features = false } serde = "1.0" @@ -30,4 +34,4 @@ tokio = { version = "1", features = ["sync"] } txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] -test_raftstore = { path = "../test_raftstore", default-features = false } +test_raftstore = { path = "../test_raftstore" } diff --git a/components/causal_ts/src/config.rs b/components/causal_ts/src/config.rs index e75bff62d47..0b08fecc7d6 100644 --- a/components/causal_ts/src/config.rs +++ b/components/causal_ts/src/config.rs @@ -20,6 +20,21 @@ pub struct Config { /// 1K tso/s should be enough. Benchmark showed that with a 8.6w raw_put /// per second, the TSO requirement is 600 per second. pub renew_batch_min_size: u32, + /// The maximum renew batch size of BatchTsoProvider. + /// + /// Default is 8192. + /// PD provides 262144 TSO per 50ms for the whole cluster. Exceed this space + /// will cause PD to sleep for 50ms, waiting for physical update + /// interval. The 50ms limitation can not be broken through now (see + /// `tso-update-physical-interval`). + pub renew_batch_max_size: u32, + /// The available interval of BatchTsoProvider. + /// + /// Default is 3s. + /// The longer of the value can provide better "high-availability" against + /// PD failure, but more overhead of `TsoBatchList` & pressure to TSO + /// service. + pub available_interval: ReadableDuration, } impl Config { @@ -28,7 +43,13 @@ impl Config { return Err("causal-ts.renew_interval can't be zero".into()); } if self.renew_batch_min_size == 0 { - return Err("causal-ts.renew_batch_init_size should be greater than 0".into()); + return Err("causal-ts.renew_batch_min_size should be greater than 0".into()); + } + if self.renew_batch_max_size == 0 { + return Err("causal-ts.renew_batch_max_size should be greater than 0".into()); + } + if self.available_interval.is_zero() { + return Err("causal-ts.available-interval can't be zero".into()); } Ok(()) } @@ -37,8 +58,14 @@ impl Config { impl Default for Config { fn default() -> Self { Self { - renew_interval: ReadableDuration::millis(crate::tso::TSO_BATCH_RENEW_INTERVAL_DEFAULT), - renew_batch_min_size: crate::tso::TSO_BATCH_MIN_SIZE_DEFAULT, + renew_interval: ReadableDuration::millis( + crate::tso::DEFAULT_TSO_BATCH_RENEW_INTERVAL_MS, + ), + renew_batch_min_size: crate::tso::DEFAULT_TSO_BATCH_MIN_SIZE, + renew_batch_max_size: crate::tso::DEFAULT_TSO_BATCH_MAX_SIZE, + available_interval: ReadableDuration::millis( + crate::tso::DEFAULT_TSO_BATCH_AVAILABLE_INTERVAL_MS, + ), } } } diff --git a/components/causal_ts/src/lib.rs b/components/causal_ts/src/lib.rs index 05626ce7203..9d77818d253 100644 --- a/components/causal_ts/src/lib.rs +++ b/components/causal_ts/src/lib.rs @@ -1,5 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(map_first_last)] // For `BTreeMap::pop_first`. +#![feature(div_duration)] + #[macro_use] extern crate tikv_util; diff --git a/components/causal_ts/src/metrics.rs b/components/causal_ts/src/metrics.rs index 072f7325dc0..52f352ccfe5 100644 --- a/components/causal_ts/src/metrics.rs +++ b/components/causal_ts/src/metrics.rs @@ -2,6 +2,7 @@ use lazy_static::*; use prometheus::*; +use prometheus_static_metric::*; lazy_static! { pub static ref TS_PROVIDER_TSO_BATCH_SIZE: IntGauge = register_int_gauge!( @@ -20,7 +21,65 @@ lazy_static! { "tikv_causal_ts_provider_tso_batch_renew_duration_seconds", "Histogram of the duration of TSO batch renew", &["result", "reason"], - exponential_buckets(1e-6, 2.0, 20).unwrap() // 1us ~ 1s + exponential_buckets(1e-4, 2.0, 20).unwrap() // 0.1ms ~ 104s ) .unwrap(); + pub static ref TS_PROVIDER_TSO_BATCH_LIST_COUNTING: HistogramVec = register_histogram_vec!( + "tikv_causal_ts_provider_tso_batch_list_counting", + "Histogram of TSO batch list counting", + &["type"], + exponential_buckets(10.0, 2.0, 20).unwrap() // 10 ~ 10,000,000 + ) + .unwrap(); +} + +make_auto_flush_static_metric! { + pub label_enum TsoBatchRenewReason { + init, + background, + used_up, + flush, + } + + pub label_enum TsoBatchCountingKind { + tso_usage, + tso_remain, + new_batch_size, + } + + pub label_enum ResultKind { + ok, + err, + } + + pub struct TsProviderGetTsDurationVec: LocalHistogram { + "result" => ResultKind, + } + + pub struct TsoBatchRenewDurationVec: LocalHistogram { + "result" => ResultKind, + "reason" => TsoBatchRenewReason, + } + + pub struct TsoBatchListCountingVec: LocalHistogram { + "type" => TsoBatchCountingKind, + } +} + +impl From<&std::result::Result> for ResultKind { + #[inline] + fn from(res: &std::result::Result) -> Self { + if res.is_ok() { Self::ok } else { Self::err } + } +} + +lazy_static! { + pub static ref TS_PROVIDER_GET_TS_DURATION_STATIC: TsProviderGetTsDurationVec = + auto_flush_from!(TS_PROVIDER_GET_TS_DURATION, TsProviderGetTsDurationVec); + pub static ref TS_PROVIDER_TSO_BATCH_RENEW_DURATION_STATIC: TsoBatchRenewDurationVec = auto_flush_from!( + TS_PROVIDER_TSO_BATCH_RENEW_DURATION, + TsoBatchRenewDurationVec + ); + pub static ref TS_PROVIDER_TSO_BATCH_LIST_COUNTING_STATIC: TsoBatchListCountingVec = + auto_flush_from!(TS_PROVIDER_TSO_BATCH_LIST_COUNTING, TsoBatchListCountingVec); } diff --git a/components/causal_ts/src/observer.rs b/components/causal_ts/src/observer.rs index aeb04bfabf5..f648d8cba08 100644 --- a/components/causal_ts/src/observer.rs +++ b/components/causal_ts/src/observer.rs @@ -175,8 +175,16 @@ pub mod tests { fn init() -> CausalObserver, DummyRawTsTracker> { let pd_cli = Arc::new(TestPdClient::new(0, true)); pd_cli.set_tso(100.into()); - let causal_ts_provider = - Arc::new(block_on(BatchTsoProvider::new_opt(pd_cli, Duration::ZERO, 100)).unwrap()); + let causal_ts_provider = Arc::new( + block_on(BatchTsoProvider::new_opt( + pd_cli, + Duration::ZERO, + Duration::from_secs(3), + 100, + 8192, + )) + .unwrap(), + ); CausalObserver::new(causal_ts_provider, DummyRawTsTracker::default()) } diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index b6ee5d177e1..6eabf8bf351 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -1,9 +1,31 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +//! ## The algorithm to make the TSO cache tolerate failure of TSO service +//! +//! 1. The scale of High-Available is specified by config item +//! `causal-ts.available-interval`. +//! +//! 2. Count usage of TSO on every renew interval. +//! +//! 3. Calculate `cache_multiplier` by `causal-ts.available-interval / +//! causal-ts.renew-interval`. +//! +//! 4. Then `tso_usage x cache_multiplier` is the expected number of TSO should +//! be cached. +//! +//! 5. And `tso_usage x cache_multiplier - tso_remain` is the expected number of +//! TSO to be requested from TSO service (if it's not a flush). +//! +//! Others: +//! * `cache_multiplier` is also used as capacity of TSO batch list, as we +//! append an item to the list on every renew. + use std::{ + borrow::Borrow, + collections::BTreeMap, error, result, sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{AtomicI32, AtomicU32, AtomicU64, Ordering}, Arc, }, }; @@ -28,24 +50,28 @@ use crate::{ CausalTsProvider, }; -// Renew on every 100ms, to adjust batch size rapidly enough. -pub(crate) const TSO_BATCH_RENEW_INTERVAL_DEFAULT: u64 = 100; -// Batch size on every renew interval. -// One TSO is required for every batch of Raft put messages, so by default 1K -// tso/s should be enough. Benchmark showed that with a 8.6w raw_put per second, -// the TSO requirement is 600 per second. -pub(crate) const TSO_BATCH_MIN_SIZE_DEFAULT: u32 = 100; -// Max batch size of TSO requests. Space of logical timestamp is 262144, -// exceed this space will cause PD to sleep, waiting for physical clock advance. -const TSO_BATCH_MAX_SIZE: u32 = 20_0000; - -const TSO_BATCH_RENEW_ON_INITIALIZE: &str = "init"; -const TSO_BATCH_RENEW_BY_BACKGROUND: &str = "background"; -const TSO_BATCH_RENEW_FOR_USED_UP: &str = "used-up"; -const TSO_BATCH_RENEW_FOR_FLUSH: &str = "flush"; +/// Renew on every 100ms, to adjust batch size rapidly enough. +pub(crate) const DEFAULT_TSO_BATCH_RENEW_INTERVAL_MS: u64 = 100; +/// Minimal batch size of TSO requests. This is an empirical value. +pub(crate) const DEFAULT_TSO_BATCH_MIN_SIZE: u32 = 100; +/// Maximum batch size of TSO requests. +/// As PD provides 262144 TSO per 50ms, conservatively set to 1/16 of 262144. +/// Exceed this space will cause PD to sleep for 50ms, waiting for physical +/// update interval. The 50ms limitation can not be broken through now (see +/// `tso-update-physical-interval`). +pub(crate) const DEFAULT_TSO_BATCH_MAX_SIZE: u32 = 8192; +/// Maximum available interval of TSO cache. +/// It means the duration that TSO we cache would be available despite failure +/// of PD. The longer of the value can provide better "High-Availability" +/// against PD failure, but more overhead of `TsoBatchList` & pressure to TSO +/// service. +pub(crate) const DEFAULT_TSO_BATCH_AVAILABLE_INTERVAL_MS: u64 = 3000; +/// Just a limitation for safety, in case user specify a too big +/// `available_interval`. +const MAX_TSO_BATCH_LIST_CAPACITY: u32 = 1024; /// TSO range: [(physical, logical_start), (physical, logical_end)) -#[derive(Default, Debug)] +#[derive(Debug)] struct TsoBatch { size: u32, physical: u64, @@ -54,7 +80,7 @@ struct TsoBatch { } impl TsoBatch { - pub fn pop(&self) -> Option { + pub fn pop(&self) -> Option<(TimeStamp, bool /* is_used_up */)> { let mut logical = self.logical_start.load(Ordering::Relaxed); while logical < self.logical_end { match self.logical_start.compare_exchange_weak( @@ -63,7 +89,12 @@ impl TsoBatch { Ordering::Relaxed, Ordering::Relaxed, ) { - Ok(_) => return Some(TimeStamp::compose(self.physical, logical)), + Ok(_) => { + return Some(( + TimeStamp::compose(self.physical, logical), + logical + 1 == self.logical_end, + )); + } Err(x) => logical = x, } } @@ -71,48 +102,177 @@ impl TsoBatch { } // `last_ts` is the last timestamp of the new batch. - pub fn renew(&mut self, batch_size: u32, last_ts: TimeStamp) -> Result<()> { - let (physical, logical) = (last_ts.physical(), last_ts.logical() + 1); - let logical_start = logical.checked_sub(batch_size as u64).unwrap(); + pub fn new(batch_size: u32, last_ts: TimeStamp) -> Self { + let (physical, logical_end) = (last_ts.physical(), last_ts.logical() + 1); + let logical_start = logical_end.checked_sub(batch_size as u64).unwrap(); + + Self { + size: batch_size, + physical, + logical_end, + logical_start: AtomicU64::new(logical_start), + } + } + + /// Number of remaining (available) TSO in the batch. + pub fn remain(&self) -> u32 { + self.logical_end + .saturating_sub(self.logical_start.load(Ordering::Relaxed)) as u32 + } + + /// The original start timestamp in the batch. + pub fn original_start(&self) -> TimeStamp { + TimeStamp::compose(self.physical, self.logical_end - self.size as u64) + } + + /// The excluded end timestamp after the last in batch. + pub fn excluded_end(&self) -> TimeStamp { + TimeStamp::compose(self.physical, self.logical_end) + } +} + +/// `TsoBatchList` is a ordered list of `TsoBatch`. It aims to: +/// +/// 1. Cache more number of TSO to improve high availability. See issue #12794. +/// `TsoBatch` can only cache at most 262144 TSO as logical clock is 18 bits. +/// +/// 2. Fully utilize cached TSO when some regions require latest TSO (e.g. in +/// the scenario of leader transfer). Other regions without the requirement can +/// still use older TSO cache. +#[derive(Default, Debug)] +struct TsoBatchList { + inner: RwLock, + + /// Number of remaining (available) TSO. + /// Using signed integer for avoiding a wrap around huge value as it's not + /// precisely counted. + tso_remain: AtomicI32, + + /// Statistics of TSO usage. + tso_usage: AtomicU32, - if physical < self.physical - || (physical == self.physical && logical_start < self.logical_end) + /// Length of batch list. It is used to limit size for efficiency, and keep + /// batches fresh. + capacity: u32, +} + +/// Inner data structure of batch list. +/// The reasons why `crossbeam_skiplist::SkipMap` is not chosen: +/// +/// 1. In `flush()` procedure, a reader of `SkipMap` can still acquire a batch +/// after the it is removed, which would violate the causality requirement. +/// The `RwLock` avoid this scenario by lock synchronization. +/// +/// 2. It is a scenario with much more reads than writes. The `RwLock` would not +/// be less efficient than lock free implementation. +type TsoBatchListInner = BTreeMap; + +impl TsoBatchList { + pub fn new(capacity: u32) -> Self { + Self { + capacity: std::cmp::min(capacity, MAX_TSO_BATCH_LIST_CAPACITY), + ..Default::default() + } + } + + pub fn remain(&self) -> u32 { + std::cmp::max(self.tso_remain.load(Ordering::Relaxed), 0) as u32 + } + + pub fn usage(&self) -> u32 { + self.tso_usage.load(Ordering::Relaxed) + } + + pub fn take_and_report_usage(&self) -> u32 { + let usage = self.tso_usage.swap(0, Ordering::Relaxed); + TS_PROVIDER_TSO_BATCH_LIST_COUNTING_STATIC + .tso_usage + .observe(usage as f64); + usage + } + + // TODO: make it async + fn remove_batch(&self, key: u64) { + if let Some(batch) = self.inner.write().remove(&key) { + self.tso_remain + .fetch_sub(batch.remain() as i32, Ordering::Relaxed); + } + } + + /// Pop timestamp. + /// When `after_ts.is_some()`, it will pop timestamp larger that `after_ts`. + /// It is used for the scenario that some regions have causality + /// requirement (e.g. after transfer, the next timestamp of new leader + /// should be larger than the store where it is transferred from). + /// `after_ts` is included. + pub fn pop(&self, after_ts: Option) -> Option { + let inner = self.inner.read(); + let range = match after_ts { + Some(after_ts) => inner.range(&after_ts.into_inner()..), + None => inner.range(..), + }; + for (key, batch) in range { + if let Some((ts, is_used_up)) = batch.pop() { + let key = *key; + drop(inner); + self.tso_usage.fetch_add(1, Ordering::Relaxed); + self.tso_remain.fetch_sub(1, Ordering::Relaxed); + if is_used_up { + // TODO: make it async + self.remove_batch(key); + } + return Some(ts); + } + } + None + } + + pub fn push(&self, batch_size: u32, last_ts: TimeStamp, need_flush: bool) -> Result { + let new_batch = TsoBatch::new(batch_size, last_ts); + + if let Some((_, last_batch)) = self.inner.read().iter().next_back() { + if new_batch.original_start() < last_batch.excluded_end() { + error!("timestamp fall back"; "batch_size" => batch_size, "last_ts" => ?last_ts, + "last_batch" => ?last_batch, "new_batch" => ?new_batch); + return Err(box_err!("timestamp fall back")); + } + } + + let key = new_batch.original_start().into_inner(); { - error!("timestamp fall back"; "last_ts" => ?last_ts, "batch" => ?self, - "physical" => physical, "logical" => logical, "logical_start" => logical_start); - return Err(box_err!("timestamp fall back")); + // Hold the write lock until new batch is inserted. + // Otherwise a `pop()` would acquire the lock, meet no TSO available, and invoke + // renew request. + let mut inner = self.inner.write(); + if need_flush { + self.flush_internal(&mut inner); + } + + inner.insert(key, new_batch); + self.tso_remain + .fetch_add(batch_size as i32, Ordering::Relaxed); } - self.size = batch_size; - self.physical = physical; - self.logical_end = logical; - self.logical_start.store(logical_start, Ordering::Relaxed); - Ok(()) + // remove items out of capacity limitation. + // TODO: make it async + if self.inner.read().len() > self.capacity as usize { + if let Some((_, batch)) = self.inner.write().pop_first() { + self.tso_remain + .fetch_sub(batch.remain() as i32, Ordering::Relaxed); + } + } + + Ok(key) + } + + fn flush_internal(&self, inner: &mut TsoBatchListInner) { + inner.clear(); + self.tso_remain.store(0, Ordering::Relaxed); } - // Note: batch is "used up" in flush, and batch size will be enlarged in next - // renew. pub fn flush(&self) { - self.logical_start - .store(self.logical_end, Ordering::Relaxed); - } - - // Return None if TsoBatch is empty. - // Note that `logical_start` will be larger than `logical_end`. See `pop()`. - pub fn used_size(&self) -> Option { - if self.size > 0 { - Some( - self.size - .checked_sub( - self.logical_end - .saturating_sub(self.logical_start.load(Ordering::Relaxed)) - as u32, - ) - .unwrap(), - ) - } else { - None - } + let mut inner = self.inner.write(); + self.flush_internal(&mut inner); } } @@ -128,53 +288,92 @@ struct RenewRequest { sender: oneshot::Sender, } +#[derive(Clone, Copy, Debug)] +struct RenewParameter { + batch_min_size: u32, + batch_max_size: u32, + // `cache_multiplier` indicates that times on usage of TSO it should cache. + // It is also used as capacity of `TsoBatchList`. + cache_multiplier: u32, +} + pub struct BatchTsoProvider { pd_client: Arc, - batch: Arc>, - batch_min_size: u32, + batch_list: Arc, causal_ts_worker: Worker, renew_interval: Duration, - renew_request_tx: mpsc::Sender, + renew_parameter: RenewParameter, + renew_request_tx: Sender, +} + +impl std::fmt::Debug for BatchTsoProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BatchTsoProvider") + .field("batch_list", &self.batch_list) + .field("renew_interval", &self.renew_interval) + .field("renew_parameter", &self.renew_parameter) + .finish() + } } impl BatchTsoProvider { pub async fn new(pd_client: Arc) -> Result { Self::new_opt( pd_client, - Duration::from_millis(TSO_BATCH_RENEW_INTERVAL_DEFAULT), - TSO_BATCH_MIN_SIZE_DEFAULT, + Duration::from_millis(DEFAULT_TSO_BATCH_RENEW_INTERVAL_MS), + Duration::from_millis(DEFAULT_TSO_BATCH_AVAILABLE_INTERVAL_MS), + DEFAULT_TSO_BATCH_MIN_SIZE, + DEFAULT_TSO_BATCH_MAX_SIZE, ) .await } + #[allow(unused_mut)] + fn calc_cache_multiplier(mut renew_interval: Duration, available_interval: Duration) -> u32 { + #[cfg(any(test, feature = "testexport"))] + if renew_interval.is_zero() { + // Should happen in test only. + renew_interval = Duration::from_millis(DEFAULT_TSO_BATCH_RENEW_INTERVAL_MS); + } + available_interval.div_duration_f64(renew_interval).ceil() as u32 + } + pub async fn new_opt( pd_client: Arc, renew_interval: Duration, + available_interval: Duration, batch_min_size: u32, + batch_max_size: u32, ) -> Result { + let cache_multiplier = Self::calc_cache_multiplier(renew_interval, available_interval); + let renew_parameter = RenewParameter { + batch_min_size, + batch_max_size, + cache_multiplier, + }; let (renew_request_tx, renew_request_rx) = mpsc::channel(MAX_RENEW_BATCH_SIZE); let s = Self { pd_client: pd_client.clone(), - batch: Arc::new(RwLock::new(TsoBatch::default())), - batch_min_size, + batch_list: Arc::new(TsoBatchList::new(cache_multiplier)), causal_ts_worker: WorkerBuilder::new("causal_ts_batch_tso_worker").create(), renew_interval, + renew_parameter, renew_request_tx, }; s.init(renew_request_rx).await?; Ok(s) } - async fn renew_tso_batch(&self, need_flush: bool, reason: &str) -> Result<()> { + async fn renew_tso_batch(&self, need_flush: bool, reason: TsoBatchRenewReason) -> Result<()> { Self::renew_tso_batch_internal(self.renew_request_tx.clone(), need_flush, reason).await } async fn renew_tso_batch_internal( renew_request_tx: Sender, need_flush: bool, - reason: &str, + reason: TsoBatchRenewReason, ) -> Result<()> { - let start = Instant::now(); + let start = Instant::now_coarse(); let (request, response) = oneshot::channel(); renew_request_tx .send(RenewRequest { @@ -188,60 +387,70 @@ impl BatchTsoProvider { .map_err(|_| box_err!("renew response channel is dropped")) .and_then(|r| r.map_err(|err| Error::BatchRenew(err))); - let label = if res.is_ok() { "ok" } else { "err" }; - TS_PROVIDER_TSO_BATCH_RENEW_DURATION - .with_label_values(&[label, reason]) + TS_PROVIDER_TSO_BATCH_RENEW_DURATION_STATIC + .get(res.borrow().into()) + .get(reason) .observe(start.saturating_elapsed_secs()); res } async fn renew_tso_batch_impl( pd_client: Arc, - tso_batch: Arc>, - batch_min_size: u32, + tso_batch_list: Arc, + renew_parameter: RenewParameter, need_flush: bool, ) -> Result<()> { - let new_batch_size = { - let batch = tso_batch.read(); - match batch.used_size() { - None => batch_min_size, - Some(used_size) => { - debug!("CachedTsoProvider::renew_tso_batch"; "batch before" => ?batch, "need_flush" => need_flush, "used size" => used_size); - Self::calc_new_batch_size(batch.size, used_size, batch_min_size) - } - } - }; - - match pd_client.batch_get_tso(new_batch_size).await { + let tso_remain = tso_batch_list.remain(); + let new_batch_size = + Self::calc_new_batch_size(tso_batch_list.clone(), renew_parameter, need_flush); + + TS_PROVIDER_TSO_BATCH_LIST_COUNTING_STATIC + .tso_remain + .observe(tso_remain as f64); + TS_PROVIDER_TSO_BATCH_LIST_COUNTING_STATIC + .new_batch_size + .observe(new_batch_size as f64); + + let res = match pd_client.batch_get_tso(new_batch_size).await { Err(err) => { - warn!("BatchTsoProvider::renew_tso_batch, pd_client.batch_get_tso error"; "error" => ?err, "need_flash" => need_flush); + warn!("BatchTsoProvider::renew_tso_batch, pd_client.batch_get_tso error"; + "new_batch_size" => new_batch_size, "error" => ?err, "need_flash" => need_flush); if need_flush { - let batch = tso_batch.write(); - batch.flush(); + tso_batch_list.flush(); } Err(err.into()) } Ok(ts) => { - { - let mut batch = tso_batch.write(); - batch.renew(new_batch_size, ts).map_err(|e| { + tso_batch_list + .push(new_batch_size, ts, need_flush) + .map_err(|e| { if need_flush { - batch.flush(); + tso_batch_list.flush(); } e })?; - debug!("BatchTsoProvider::renew_tso_batch"; "batch renew" => ?batch, "ts" => ?ts); - } - TS_PROVIDER_TSO_BATCH_SIZE.set(new_batch_size as i64); + debug!("BatchTsoProvider::renew_tso_batch"; + "tso_batch_list.remain" => tso_batch_list.remain(), "ts" => ?ts); + + // Should only be invoked after successful renew. Otherwise the TSO usage will + // be lost, and batch size requirement will be less than expected. Note that + // invoked here is not precise. There would be `get_ts()` before here after + // above `tso_batch_list.push()`, and make `tso_usage` a little bigger. This + // error is acceptable. + tso_batch_list.take_and_report_usage(); + Ok(()) } - } + }; + let total_batch_size = tso_batch_list.remain() + tso_batch_list.usage(); + TS_PROVIDER_TSO_BATCH_SIZE.set(total_batch_size as i64); + res } async fn renew_thread( pd_client: Arc, - tso_batch: Arc>, - batch_min_size: u32, + tso_batch_list: Arc, + renew_parameter: RenewParameter, mut rx: Receiver, ) { loop { @@ -270,8 +479,8 @@ impl BatchTsoProvider { let res = Self::renew_tso_batch_impl( pd_client.clone(), - tso_batch.clone(), - batch_min_size, + tso_batch_list.clone(), + renew_parameter, need_flush, ) .await @@ -286,28 +495,36 @@ impl BatchTsoProvider { } } - fn calc_new_batch_size(batch_size: u32, used_size: u32, batch_min_size: u32) -> u32 { - if used_size > batch_size * 3 / 4 { - // Enlarge to double if used more than 3/4. - std::cmp::min(batch_size << 1, TSO_BATCH_MAX_SIZE) - } else if used_size < batch_size / 4 { - // Shrink to half if used less than 1/4. - std::cmp::max(batch_size >> 1, batch_min_size) - } else { - batch_size + fn calc_new_batch_size( + tso_batch_list: Arc, + renew_parameter: RenewParameter, + need_flush: bool, + ) -> u32 { + // The expected number of TSO is `cache_multiplier` times on latest usage. + // Note: There is a `batch_max_size` limitation, so the request batch size will + // be less than expected, and will be fulfill in next renew. + // TODO: consider schedule TSO requests exceed `batch_max_size` limitation to + // fulfill requirement in time. + let mut new_batch_size = tso_batch_list.usage() * renew_parameter.cache_multiplier; + if !need_flush { + new_batch_size = new_batch_size.saturating_sub(tso_batch_list.remain()) } + std::cmp::min( + std::cmp::max(new_batch_size, renew_parameter.batch_min_size), + renew_parameter.batch_max_size, + ) } async fn init(&self, renew_request_rx: Receiver) -> Result<()> { // Spawn renew thread. let pd_client = self.pd_client.clone(); - let tso_batch = self.batch.clone(); - let batch_min_size = self.batch_min_size; + let tso_batch_list = self.batch_list.clone(); + let renew_parameter = self.renew_parameter; self.causal_ts_worker.remote().spawn(async move { - Self::renew_thread(pd_client, tso_batch, batch_min_size, renew_request_rx).await; + Self::renew_thread(pd_client, tso_batch_list, renew_parameter, renew_request_rx).await; }); - self.renew_tso_batch(true, TSO_BATCH_RENEW_ON_INITIALIZE) + self.renew_tso_batch(true, TsoBatchRenewReason::init) .await?; let request_tx = self.renew_request_tx.clone(); @@ -317,7 +534,7 @@ impl BatchTsoProvider { let _ = Self::renew_tso_batch_internal( request_tx, false, - TSO_BATCH_RENEW_BY_BACKGROUND, + TsoBatchRenewReason::background, ) .await; } @@ -331,33 +548,38 @@ impl BatchTsoProvider { Ok(()) } - // Get current batch_size, for test purpose. - pub fn batch_size(&self) -> u32 { - self.batch.read().size + #[cfg(test)] + pub fn tso_remain(&self) -> u32 { + self.batch_list.remain() + } + + #[cfg(test)] + pub fn tso_usage(&self) -> u32 { + self.batch_list.usage() } } const GET_TS_MAX_RETRY: u32 = 3; impl CausalTsProvider for BatchTsoProvider { + // TODO: support `after_ts` argument. fn get_ts(&self) -> Result { let start = Instant::now(); let mut retries = 0; let mut last_batch_size: u32; loop { { - let batch = self.batch.read(); - last_batch_size = batch.size; - match batch.pop() { + last_batch_size = self.batch_list.remain() + self.batch_list.usage(); + match self.batch_list.pop(None) { Some(ts) => { trace!("BatchTsoProvider::get_ts: {:?}", ts); - TS_PROVIDER_GET_TS_DURATION - .with_label_values(&["ok"]) + TS_PROVIDER_GET_TS_DURATION_STATIC + .ok .observe(start.saturating_elapsed_secs()); return Ok(ts); } None => { - warn!("BatchTsoProvider::get_ts, batch used up"; "batch.size" => batch.size, "retries" => retries); + warn!("BatchTsoProvider::get_ts, batch used up"; "last_batch_size" => last_batch_size, "retries" => retries); } } } @@ -365,7 +587,7 @@ impl CausalTsProvider for BatchTsoProvider { if retries >= GET_TS_MAX_RETRY { break; } - if let Err(err) = block_on(self.renew_tso_batch(false, TSO_BATCH_RENEW_FOR_USED_UP)) { + if let Err(err) = block_on(self.renew_tso_batch(false, TsoBatchRenewReason::used_up)) { // `renew_tso_batch` failure is likely to be caused by TSO timeout, which would // mean that PD is quite busy. So do not retry any more. error!("BatchTsoProvider::get_ts, renew_tso_batch fail on batch used-up"; "err" => ?err); @@ -373,15 +595,16 @@ impl CausalTsProvider for BatchTsoProvider { } retries += 1; } - error!("BatchTsoProvider::get_ts, batch used up"; "batch.size" => last_batch_size, "retries" => retries); - TS_PROVIDER_GET_TS_DURATION - .with_label_values(&["err"]) + error!("BatchTsoProvider::get_ts, batch used up"; "last_batch_size" => last_batch_size, "retries" => retries); + TS_PROVIDER_GET_TS_DURATION_STATIC + .err .observe(start.saturating_elapsed_secs()); Err(Error::TsoBatchUsedUp(last_batch_size)) } + // TODO: provide asynchronous method fn flush(&self) -> Result<()> { - block_on(self.renew_tso_batch(true, TSO_BATCH_RENEW_FOR_FLUSH)) + block_on(self.renew_tso_batch(true, TsoBatchRenewReason::flush)) } } @@ -413,57 +636,209 @@ pub mod tests { #[test] fn test_tso_batch() { - let mut batch = TsoBatch::default(); + let batch = TsoBatch::new(10, TimeStamp::compose(1, 100)); - assert_eq!(batch.used_size(), None); - assert_eq!(batch.pop(), None); - batch.flush(); + assert_eq!(batch.original_start(), TimeStamp::compose(1, 91)); + assert_eq!(batch.excluded_end(), TimeStamp::compose(1, 101)); + assert_eq!(batch.remain(), 10); - batch.renew(10, TimeStamp::compose(1, 100)).unwrap(); - for logical in 91..=95 { - assert_eq!(batch.pop(), Some(TimeStamp::compose(1, logical))); + for logical in 91..=93 { + assert_eq!(batch.pop(), Some((TimeStamp::compose(1, logical), false))); } - assert_eq!(batch.used_size(), Some(5)); + assert_eq!(batch.remain(), 7); - for logical in 96..=100 { - assert_eq!(batch.pop(), Some(TimeStamp::compose(1, logical))); + for logical in 94..=99 { + assert_eq!(batch.pop(), Some((TimeStamp::compose(1, logical), false))); } - assert_eq!(batch.used_size(), Some(10)); - assert_eq!(batch.pop(), None); + assert_eq!(batch.remain(), 1); - batch.renew(10, TimeStamp::compose(1, 110)).unwrap(); - // timestamp fall back - batch.renew(10, TimeStamp::compose(1, 119)).unwrap_err(); - - batch.renew(10, TimeStamp::compose(1, 200)).unwrap(); - for logical in 191..=195 { - assert_eq!(batch.pop(), Some(TimeStamp::compose(1, logical))); - } - batch.flush(); - assert_eq!(batch.used_size(), Some(10)); + assert_eq!(batch.pop(), Some((TimeStamp::compose(1, 100), true))); assert_eq!(batch.pop(), None); + assert_eq!(batch.remain(), 0); } #[test] fn test_cals_new_batch_size() { + let cache_multiplier = 30; let cases = vec![ - (100, 0, 100), - (100, 76, 200), - (200, 49, 100), - (200, 50, 200), - (200, 150, 200), - (200, 151, 400), - (200, 200, 400), - (TSO_BATCH_MAX_SIZE, TSO_BATCH_MAX_SIZE, TSO_BATCH_MAX_SIZE), + (0, 0, true, 100), + (50, 0, true, 100), + (1000, 100, true, 3000), + ( + 1000, + DEFAULT_TSO_BATCH_MAX_SIZE, + true, + DEFAULT_TSO_BATCH_MAX_SIZE, + ), + (0, 0, false, 100), + (1000, 0, false, 100), + (1000, 100, false, 2000), + (5000, 100, false, 100), + ( + 1000, + DEFAULT_TSO_BATCH_MAX_SIZE, + false, + DEFAULT_TSO_BATCH_MAX_SIZE, + ), ]; - for (i, (batch_size, used_size, expected)) in cases.into_iter().enumerate() { - let new_size = - BatchTsoProvider::::calc_new_batch_size(batch_size, used_size, 100); + for (i, (remain, usage, need_flush, expected)) in cases.into_iter().enumerate() { + let batch_list = Arc::new(TsoBatchList { + inner: Default::default(), + tso_remain: AtomicI32::new(remain as i32), + tso_usage: AtomicU32::new(usage), + capacity: cache_multiplier, + }); + let renew_parameter = RenewParameter { + batch_min_size: DEFAULT_TSO_BATCH_MIN_SIZE, + batch_max_size: DEFAULT_TSO_BATCH_MAX_SIZE, + cache_multiplier, + }; + let new_size = BatchTsoProvider::::calc_new_batch_size( + batch_list, + renew_parameter, + need_flush, + ); assert_eq!(new_size, expected, "case {}", i); } } + #[test] + fn test_tso_batch_list_basic() { + let batch_list = TsoBatchList::new(10); + + assert_eq!(batch_list.remain(), 0); + assert_eq!(batch_list.usage(), 0); + assert_eq!(batch_list.pop(None), None); + + batch_list + .push(10, TimeStamp::compose(1, 100), false) + .unwrap(); + assert_eq!(batch_list.remain(), 10); + assert_eq!(batch_list.usage(), 0); + + for logical in 91..=94 { + assert_eq!(batch_list.pop(None), Some(TimeStamp::compose(1, logical))); + } + assert_eq!(batch_list.remain(), 6); + assert_eq!(batch_list.usage(), 4); + + for logical in 95..=100 { + assert_eq!(batch_list.pop(None), Some(TimeStamp::compose(1, logical))); + } + assert_eq!(batch_list.remain(), 0); + assert_eq!(batch_list.usage(), 10); + assert_eq!(batch_list.pop(None), None); + assert_eq!(batch_list.remain(), 0); + assert_eq!(batch_list.usage(), 10); + + batch_list + .push(10, TimeStamp::compose(1, 110), false) + .unwrap(); + assert_eq!(batch_list.remain(), 10); + assert_eq!(batch_list.usage(), 10); + // timestamp fall back + batch_list + .push(10, TimeStamp::compose(1, 119), false) + .unwrap_err(); + batch_list + .push(10, TimeStamp::compose(1, 200), false) + .unwrap(); + assert_eq!(batch_list.remain(), 20); + assert_eq!(batch_list.usage(), 10); + + for logical in 101..=110 { + assert_eq!(batch_list.pop(None), Some(TimeStamp::compose(1, logical))); + } + for logical in 191..=195 { + assert_eq!(batch_list.pop(None), Some(TimeStamp::compose(1, logical))); + } + assert_eq!(batch_list.remain(), 5); + assert_eq!(batch_list.usage(), 25); + + batch_list.flush(); + assert_eq!(batch_list.pop(None), None); + assert_eq!(batch_list.remain(), 0); + assert_eq!(batch_list.take_and_report_usage(), 25); + assert_eq!(batch_list.usage(), 0); + + // need_flush + batch_list + .push(10, TimeStamp::compose(1, 300), false) + .unwrap(); + let key391 = batch_list + .push(10, TimeStamp::compose(1, 400), true) + .unwrap(); + assert_eq!(key391, TimeStamp::compose(1, 391).into_inner()); + assert_eq!(batch_list.remain(), 10); + assert_eq!(batch_list.usage(), 0); + + for logical in 391..=400 { + assert_eq!(batch_list.pop(None), Some(TimeStamp::compose(1, logical))); + } + assert_eq!(batch_list.remain(), 0); + assert_eq!(batch_list.usage(), 10); + } + + #[test] + fn test_tso_batch_list_max_batch_count() { + let batch_list = TsoBatchList::new(3); + + batch_list + .push(10, TimeStamp::compose(1, 100), false) + .unwrap(); // will be remove after the 4th push. + batch_list + .push(10, TimeStamp::compose(1, 200), false) + .unwrap(); + batch_list + .push(10, TimeStamp::compose(1, 300), false) + .unwrap(); + batch_list + .push(10, TimeStamp::compose(1, 400), false) + .unwrap(); + + for logical in 191..=195 { + assert_eq!(batch_list.pop(None), Some(TimeStamp::compose(1, logical))); + } + assert_eq!(batch_list.remain(), 25); + assert_eq!(batch_list.usage(), 5); + } + + #[test] + fn test_tso_batch_list_pop_after_ts() { + let batch_list = TsoBatchList::new(10); + + batch_list + .push(10, TimeStamp::compose(1, 100), false) + .unwrap(); + batch_list + .push(10, TimeStamp::compose(1, 200), false) + .unwrap(); + batch_list + .push(10, TimeStamp::compose(1, 300), false) + .unwrap(); + batch_list + .push(10, TimeStamp::compose(1, 400), false) + .unwrap(); + + let after_ts = TimeStamp::compose(1, 291); + for logical in 291..=300 { + assert_eq!( + batch_list.pop(Some(after_ts)), + Some(TimeStamp::compose(1, logical)) + ); + } + for logical in 391..=400 { + assert_eq!( + batch_list.pop(Some(after_ts)), + Some(TimeStamp::compose(1, logical)) + ); + } + assert_eq!(batch_list.pop(Some(after_ts)), None); + assert_eq!(batch_list.remain(), 20); + assert_eq!(batch_list.usage(), 20); + } + #[test] fn test_simple_tso_provider() { let pd_cli = Arc::new(TestPdClient::new(1, false)); @@ -485,44 +860,62 @@ pub mod tests { let provider = block_on(BatchTsoProvider::new_opt( pd_cli.clone(), Duration::ZERO, + Duration::from_secs(1), // cache_multiplier = 10 100, + 80000, )) .unwrap(); - assert_eq!(provider.batch_size(), 100); + assert_eq!(provider.tso_remain(), 100); + assert_eq!(provider.tso_usage(), 0); + for ts in 1001..=1010u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } + assert_eq!(provider.tso_remain(), 90); + assert_eq!(provider.tso_usage(), 10); provider.flush().unwrap(); // allocated: [1101, 1200] - assert_eq!(provider.batch_size(), 100); + assert_eq!(provider.tso_remain(), 100); + assert_eq!(provider.tso_usage(), 0); // used up pd_cli.trigger_tso_failure(); // make renew fail to verify used-up for ts in 1101..=1200u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } + assert_eq!(provider.tso_remain(), 0); + assert_eq!(provider.tso_usage(), 100); provider.get_ts().unwrap_err(); + assert_eq!(provider.tso_remain(), 0); + assert_eq!(provider.tso_usage(), 100); - provider.flush().unwrap(); // allocated: [1201, 1400] - assert_eq!(provider.batch_size(), 200); - - // used < 20% - for ts in 1201..=1249u64 { + provider.flush().unwrap(); // allocated: [1201, 2200] + for ts in 1201..=1260u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } + assert_eq!(provider.tso_remain(), 940); + assert_eq!(provider.tso_usage(), 60); - provider.flush().unwrap(); // allocated: [1401, 1500] - assert_eq!(provider.batch_size(), 100); + // allocated: [2201, 2300] + block_on(provider.renew_tso_batch(false, TsoBatchRenewReason::background)).unwrap(); + assert_eq!(provider.tso_remain(), 1040); // 940 + 100 + assert_eq!(provider.tso_usage(), 0); pd_cli.trigger_tso_failure(); // make renew fail to verify used-up - for ts in 1401..=1500u64 { + for ts in 1261..=2300u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } provider.get_ts().unwrap_err(); + assert_eq!(provider.tso_remain(), 0); + assert_eq!(provider.tso_usage(), 1040); // renew on used-up - for ts in 1501..=2500u64 { + for ts in 2301..=100_000u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } + // batch size: 10400, 80000, 80000 + // batch boundary: 2301, 12700, 92700, 100_000 + assert_eq!(provider.tso_remain(), 72700); + assert_eq!(provider.tso_usage(), 7300); } #[test] @@ -532,14 +925,14 @@ pub mod tests { { pd_cli.trigger_tso_failure(); - assert!( - block_on(BatchTsoProvider::new_opt( - pd_cli.clone(), - Duration::ZERO, - 100 - )) - .is_err() - ); + block_on(BatchTsoProvider::new_opt( + pd_cli.clone(), + Duration::ZERO, + Duration::from_secs(3), + 100, + 8192, + )) + .unwrap_err(); } // Set `renew_interval` to 0 to disable background renew. Invoke `flush()` to @@ -547,10 +940,12 @@ pub mod tests { let provider = block_on(BatchTsoProvider::new_opt( pd_cli.clone(), Duration::ZERO, + Duration::from_secs(1), // cache_multiplier=10 100, + 8192, )) .unwrap(); - assert_eq!(provider.batch_size(), 100); + assert_eq!(provider.tso_remain(), 100); for ts in 1001..=1010u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } @@ -572,9 +967,9 @@ pub mod tests { pd_cli.trigger_tso_failure(); provider.flush().unwrap_err(); - provider.flush().unwrap(); // allocated: [1301, 1700] + provider.flush().unwrap(); // allocated: [1301, 3300] pd_cli.trigger_tso_failure(); // make renew fail to verify used-up - for ts in 1301..=1700u64 { + for ts in 1301..=3300u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } provider.get_ts().unwrap_err(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 4a4cadeb639..fd079764027 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -322,7 +322,9 @@ where let tso = block_on(causal_ts::BatchTsoProvider::new_opt( pd_client.clone(), config.causal_ts.renew_interval.0, + config.causal_ts.available_interval.0, config.causal_ts.renew_batch_min_size, + config.causal_ts.renew_batch_max_size, )); if let Err(e) = tso { fatal!("Causal timestamp provider initialize failed: {:?}", e); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index da81606d2dd..f69ef253e5b 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -364,7 +364,9 @@ impl ServerCluster { block_on(causal_ts::BatchTsoProvider::new_opt( self.pd_client.clone(), cfg.causal_ts.renew_interval.0, + cfg.causal_ts.available_interval.0, cfg.causal_ts.renew_batch_min_size, + cfg.causal_ts.renew_batch_max_size, )) .unwrap(), ); diff --git a/metrics/grafana/tikv_raw.json b/metrics/grafana/tikv_raw.json index f81ac801173..6664dad2734 100644 --- a/metrics/grafana/tikv_raw.json +++ b/metrics/grafana/tikv_raw.json @@ -464,6 +464,26 @@ "legendFormat": "{{result}}-P99", "refId": "A", "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tikv_causal_ts_provider_get_ts_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, result))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{result}}-P999", + "refId": "B", + "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(1, sum(rate(tikv_causal_ts_provider_get_ts_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, result))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{result}}-MAX", + "refId": "C", + "step": 10 } ], "thresholds": [], @@ -720,6 +740,229 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The TSO batch list counting", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 64, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_causal_ts_provider_tso_batch_list_counting_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}-P99", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(tikv_causal_ts_provider_tso_batch_list_counting_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}-P50", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TSO batch list counting", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "TSO batch list counting frequency", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 23 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_causal_ts_provider_tso_batch_list_counting_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{result}}", + "metric": "tikv_causal_ts_provider_tso_batch_list_counting_count", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TSO batch list counting frequency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index b8899a1de4f..247b06834b0 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -794,6 +794,8 @@ fn test_serde_custom_tikv_config() { value.causal_ts = CausalTsConfig { renew_interval: ReadableDuration::millis(100), renew_batch_min_size: 100, + renew_batch_max_size: 8192, + available_interval: ReadableDuration::millis(3000), }; let custom = read_file_in_project_dir("integrations/config/test-custom.toml"); From 2d2f6d50477d70d210f95e7f53eeb6aa173ded8f Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 9 Aug 2022 14:10:47 +0800 Subject: [PATCH 0144/1149] file_system: detect procfs accessibility before using it (#13117) close tikv/tikv#13116 None Signed-off-by: tabokie --- components/file_system/src/io_stats/proc.rs | 72 +++++++++------------ 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index ceb772bee6e..60c8cac9c36 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -13,10 +13,7 @@ use crossbeam_utils::CachePadded; use parking_lot::Mutex; use strum::EnumCount; use thread_local::ThreadLocal; -use tikv_util::{ - sys::thread::{self, Pid}, - warn, -}; +use tikv_util::sys::thread::{self, Pid}; use crate::{IoBytes, IoType}; @@ -50,54 +47,44 @@ impl ThreadId { } } - fn fetch_io_bytes(&mut self) -> Option { + fn fetch_io_bytes(&mut self) -> Result { if self.proc_reader.is_none() { let path = PathBuf::from("/proc") .join(format!("{}", self.pid)) .join("task") .join(format!("{}", self.tid)) .join("io"); - match File::open(path) { - Ok(file) => { - self.proc_reader = Some(BufReader::new(file)); - } - Err(e) => { - warn!("failed to open proc file: {}", e); - } - } + self.proc_reader = Some(BufReader::new( + File::open(path).map_err(|e| format!("open: {}", e))?, + )); } - if let Some(ref mut reader) = self.proc_reader { - reader - .seek(std::io::SeekFrom::Start(0)) - .map_err(|e| { - warn!("failed to seek proc file: {}", e); - }) - .ok()?; - let mut io_bytes = IoBytes::default(); - for line in reader.lines() { - let line = line - .map_err(|e| { - // ESRCH 3 No such process - if e.raw_os_error() != Some(3) { - warn!("failed to read proc file: {}", e); - } - }) - .ok()?; - if line.len() > 11 { - let mut s = line.split_whitespace(); - if let (Some(field), Some(value)) = (s.next(), s.next()) { - if field.starts_with("read_bytes") { - io_bytes.read = u64::from_str(value).ok()?; - } else if field.starts_with("write_bytes") { - io_bytes.write = u64::from_str(value).ok()?; + let reader = self.proc_reader.as_mut().unwrap(); + reader + .seek(std::io::SeekFrom::Start(0)) + .map_err(|e| format!("seek: {}", e))?; + let mut io_bytes = IoBytes::default(); + for line in reader.lines() { + match line { + Ok(line) => { + if line.len() > 11 { + let mut s = line.split_whitespace(); + if let (Some(field), Some(value)) = (s.next(), s.next()) { + if field.starts_with("read_bytes") { + io_bytes.read = u64::from_str(value) + .map_err(|e| format!("parse read_bytes: {}", e))?; + } else if field.starts_with("write_bytes") { + io_bytes.write = u64::from_str(value) + .map_err(|e| format!("parse write_bytes: {}", e))?; + } } } } + // ESRCH 3 No such process + Err(e) if e.raw_os_error() == Some(3) => break, + Err(e) => return Err(format!("read: {}", e)), } - Some(io_bytes) - } else { - None } + Ok(io_bytes) } } @@ -140,7 +127,7 @@ impl AtomicIoBytes { /// Flushes the local I/O stats to global I/O stats. #[inline] fn flush_thread_io(sentinel: &mut LocalIoStats) { - if let Some(io_bytes) = sentinel.id.fetch_io_bytes() { + if let Ok(io_bytes) = sentinel.id.fetch_io_bytes() { GLOBAL_IO_STATS[sentinel.io_type as usize] .fetch_add(io_bytes - sentinel.last_flushed, Ordering::Relaxed); sentinel.last_flushed = io_bytes; @@ -148,6 +135,9 @@ fn flush_thread_io(sentinel: &mut LocalIoStats) { } pub fn init() -> Result<(), String> { + ThreadId::current() + .fetch_io_bytes() + .map_err(|e| format!("failed to fetch I/O bytes from proc: {}", e))?; Ok(()) } From 3d521a08f0be88a43062fbbe3c15e784c939942f Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 10 Aug 2022 15:18:49 +0800 Subject: [PATCH 0145/1149] raftstore-v2: add module docs and remove inappropriate module dependencies (#13241) close tikv/tikv#13050 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 -- components/raftstore-v2/Cargo.toml | 2 -- .../src/operation/read/{read.rs => local.rs} | 15 ++++----------- .../raftstore-v2/src/operation/read/mod.rs | 8 +++++++- components/raftstore/src/store/worker/read.rs | 16 ++++++++++------ 5 files changed, 21 insertions(+), 22 deletions(-) rename components/raftstore-v2/src/operation/read/{read.rs => local.rs} (94%) diff --git a/Cargo.lock b/Cargo.lock index f258fbdcf69..1537c75bbff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4184,7 +4184,6 @@ dependencies = [ "batch-system", "collections", "crossbeam", - "engine_rocks", "engine_test", "engine_traits", "error_code", @@ -4203,7 +4202,6 @@ dependencies = [ "tempfile", "test_pd", "test_util", - "tikv_kv", "tikv_util", "time", "txn_types", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index f6a827d7424..8551864a444 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -32,7 +32,6 @@ cloud-azure = ["raftstore/cloud-azure"] batch-system = { path = "../batch-system", default-features = false } collections = { path = "../collections" } crossbeam = "0.8" -engine_rocks = { path = "../engine_rocks", default-features = false } engine_traits = { path = "../engine_traits" } error_code = { path = "../error_code" } fail = "0.5" @@ -46,7 +45,6 @@ raft-proto = { version = "0.7.0" } raftstore = { path = "../raftstore" } slog = "2.3" smallvec = "1.4" -tikv_kv = { path = "../tikv_kv", default-features = false } tikv_util = { path = "../tikv_util", default-features = false } time = "0.1" txn_types = { path = "../txn_types", default-features = false } diff --git a/components/raftstore-v2/src/operation/read/read.rs b/components/raftstore-v2/src/operation/read/local.rs similarity index 94% rename from components/raftstore-v2/src/operation/read/read.rs rename to components/raftstore-v2/src/operation/read/local.rs index bc3903e12fd..56a5f01a7fd 100644 --- a/components/raftstore-v2/src/operation/read/read.rs +++ b/components/raftstore-v2/src/operation/read/local.rs @@ -161,7 +161,6 @@ mod tests { RaftCommand, }; use tempfile::{Builder, TempDir}; - use tikv_kv::Snapshot; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; use time::Duration; use txn_types::{Key, Lock, LockType, WriteBatchFlags}; @@ -213,7 +212,7 @@ mod tests { tablet1 = factory .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) .unwrap(); - tablet1.put_cf(CF_DEFAULT, b"a1", b"val1").unwrap(); + tablet1.put(b"a1", b"val1").unwrap(); let cache = CachedTablet::new(Some(tablet1.clone())); meta.tablet_caches.insert(1, cache); @@ -226,7 +225,7 @@ mod tests { tablet2 = factory .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) .unwrap(); - tablet2.put_cf(CF_DEFAULT, b"a2", b"val2").unwrap(); + tablet2.put(b"a2", b"val2").unwrap(); let cache = CachedTablet::new(Some(tablet2.clone())); meta.tablet_caches.insert(2, cache); } @@ -238,10 +237,7 @@ mod tests { let snapshot = delegate.get_snapshot(None, &mut None); assert_eq!( b"val1".to_vec(), - snapshot - .get(&Key::from_encoded(b"a1".to_vec())) - .unwrap() - .unwrap() + *snapshot.get_value(b"a1").unwrap().unwrap() ); let (_, delegate) = store_meta.get_executor_and_len(2); @@ -251,10 +247,7 @@ mod tests { let snapshot = delegate.get_snapshot(None, &mut None); assert_eq!( b"val2".to_vec(), - snapshot - .get(&Key::from_encoded(b"a2".to_vec())) - .unwrap() - .unwrap() + *snapshot.get_value(b"a2").unwrap().unwrap() ); } } diff --git a/components/raftstore-v2/src/operation/read/mod.rs b/components/raftstore-v2/src/operation/read/mod.rs index 8c427378da3..efbe6af1a5a 100644 --- a/components/raftstore-v2/src/operation/read/mod.rs +++ b/components/raftstore-v2/src/operation/read/mod.rs @@ -1,3 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -mod read; +//! There are two types of read: +//! - If the ReadDelegate is in the leader lease status, the read is operated +//! locally and need not to go through the raft layer (namely local read). +//! - Otherwise, redirect the request to the raftstore and proposed as a +//! RaftCommand in the raft layer. + +mod local; diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index f3d52be5044..de1cb1011ae 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -1069,7 +1069,7 @@ mod tests { use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; - use engine_traits::{Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; + use engine_traits::{Peekable, SyncMutable, ALL_CFS}; use kvproto::raft_cmdpb::*; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; @@ -1556,7 +1556,7 @@ mod tests { .unwrap(); let kv_engine = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); - kv_engine.put_cf(CF_DEFAULT, b"a1", b"val1").unwrap(); + kv_engine.put(b"a1", b"val1").unwrap(); let store_meta = StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::new(0))), kv_engine.clone()); @@ -1589,16 +1589,20 @@ mod tests { let tablet = delegate.get_tablet(); assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); let snapshot = delegate.get_snapshot(read_id_copy.clone(), &mut read_context); - let val = snapshot.get_value(b"a1").unwrap().unwrap(); - assert_eq!(b"val1", val.deref()); + assert_eq!( + b"val1".to_vec(), + *snapshot.get_value(b"a1").unwrap().unwrap() + ); let (_, delegate) = store_meta.get_executor_and_len(2); let mut delegate = delegate.unwrap(); let tablet = delegate.get_tablet(); assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); let snapshot = delegate.get_snapshot(read_id_copy, &mut read_context); - let val = snapshot.get_value(b"a1").unwrap().unwrap(); - assert_eq!(b"val1", val.deref()); + assert_eq!( + b"val1".to_vec(), + *snapshot.get_value(b"a1").unwrap().unwrap() + ); assert!(snap_cache.as_ref().is_some()); assert_eq!(read_metrics.local_executed_requests, 2); From 6b3ca45c8e828d5ef8b36492791237ea9d62cfca Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:00:48 +0800 Subject: [PATCH 0146/1149] server: make EnginesResourcesInfo be compatible with Multi-Rocks DB version. (#13206) close tikv/tikv#13214 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/server/src/server.rs | 65 +++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index fd079764027..e925a663943 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -13,6 +13,7 @@ use std::{ cmp, + collections::HashMap, convert::TryFrom, env, fmt, net::SocketAddr, @@ -44,7 +45,7 @@ use engine_rocks::{ use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, RaftEngine, - TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, + TabletAccessor, TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use file_system::{ @@ -97,8 +98,8 @@ use tikv::{ service::{DebugService, DiagnosticsService}, status_server::StatusServer, ttl::TtlChecker, - KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, DEFAULT_CLUSTER_ID, - GRPC_THREAD_PREFIX, + KvEngineFactory, KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, + DEFAULT_CLUSTER_ID, GRPC_THREAD_PREFIX, }, storage::{ self, @@ -1685,13 +1686,15 @@ impl TikvServer { self.config.storage.block_cache.shared, )), ); - self.tablet_factory = Some(factory); + self.tablet_factory = Some(factory.clone()); engines .raft .register_config(cfg_controller, self.config.storage.block_cache.shared); let engines_info = Arc::new(EnginesResourceInfo::new( - &engines, 180, // max_samples_to_preserve + factory, + engines.raft.as_rocks_engine().cloned(), + 180, // max_samples_to_preserve )); (engines, engines_info) @@ -1841,8 +1844,13 @@ impl EngineMetricsManager { } pub struct EnginesResourceInfo { - kv_engine: RocksEngine, + tablet_factory: Arc, raft_engine: Option, + // region_id -> (suffix, tablet) + // `update` is called perodically which needs this map for recording the latest tablet for each + // region and cached_latest_tablets is used to avoid memory allocation each time when + // calling `update`. + cached_latest_tablets: Arc>>, latest_normalized_pending_bytes: AtomicU32, normalized_pending_bytes_collector: MovingAvgU32, } @@ -1850,14 +1858,15 @@ pub struct EnginesResourceInfo { impl EnginesResourceInfo { const SCALE_FACTOR: u64 = 100; - fn new( - engines: &Engines, + fn new( + tablet_factory: Arc, + raft_engine: Option, max_samples_to_preserve: usize, ) -> Self { - let raft_engine = engines.raft.as_rocks_engine().cloned(); EnginesResourceInfo { - kv_engine: engines.kv.clone(), + tablet_factory, raft_engine, + cached_latest_tablets: Arc::default(), latest_normalized_pending_bytes: AtomicU32::new(0), normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), } @@ -1884,9 +1893,41 @@ impl EnginesResourceInfo { if let Some(raft_engine) = &self.raft_engine { fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); } - for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { - fetch_engine_cf(&self.kv_engine, cf, &mut normalized_pending_bytes); + + let mut cached_latest_tablets = self.cached_latest_tablets.as_ref().lock().unwrap(); + + self.tablet_factory + .for_each_opened_tablet( + &mut |id, suffix, db: &RocksEngine| match cached_latest_tablets.entry(id) { + collections::HashMapEntry::Occupied(mut slot) => { + if slot.get().0 < suffix { + slot.insert((suffix, db.clone())); + } + } + collections::HashMapEntry::Vacant(slot) => { + slot.insert((suffix, db.clone())); + } + }, + ); + + // todo(SpadeA): Now, there's a potential race condition problem where the + // tablet could be destroyed after the clone and before the fetching + // which could result in programme panic. It's okay now as the single global + // kv_engine will not be destroyed in normal operation and v2 is not + // ready for operation. Furthermore, this race condition is general to v2 as + // tablet clone is not a case exclusively happened here. We should + // propose another PR to tackle it such as destory tablet lazily in a GC + // thread. + + for (_, (_, tablet)) in cached_latest_tablets.iter() { + for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { + fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); + } } + + // Clear ensures that these tablets are not hold forever. + cached_latest_tablets.clear(); + let (_, avg) = self .normalized_pending_bytes_collector .add(normalized_pending_bytes); From 1ec844528144ddb900a39960e30207624df40571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 11 Aug 2022 16:50:49 +0800 Subject: [PATCH 0147/1149] log-backup: add timeout for operations that may stuck (#13255) close tikv/tikv#13251 Added a timeout of `30s` for every etcd gRPC request. Moved `on_update_global_checkpoint` to background. Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- Cargo.lock | 3 +- components/backup-stream/Cargo.toml | 4 +- components/backup-stream/src/endpoint.rs | 43 +++++++++++++++---- .../src/metadata/store/lazy_etcd.rs | 8 +++- components/backup-stream/tests/mod.rs | 34 ++++++++++++++- 5 files changed, 78 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1537c75bbff..41a5df4c1ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1620,8 +1620,7 @@ dependencies = [ [[package]] name = "etcd-client" version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76b9f5b0b4f53cf836bef05b22cd5239479700bc8d44a04c3c77f1ba6c2c73e9" +source = "git+https://github.com/yujuncen/etcd-client?rev=e0321a1990ee561cf042973666c0db61c8d82364#e0321a1990ee561cf042973666c0db61c8d82364" dependencies = [ "http", "prost", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index e2b23ccf5db..7fe221842ce 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -29,7 +29,9 @@ dashmap = "5" engine_rocks = { path = "../engine_rocks", default-features = false } engine_traits = { path = "../engine_traits", default-features = false } error_code = { path = "../error_code" } -etcd-client = { version = "0.7", features = ["pub-response-field", "tls"] } +# We cannot update the etcd-client to latest version because of the cyclic requirement. +# Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. +etcd-client = { git = "https://github.com/yujuncen/etcd-client", rev = "e0321a1990ee561cf042973666c0db61c8d82364", features = ["pub-response-field", "tls"] } external_storage = { path = "../external_storage", default-features = false } external_storage_export = { path = "../external_storage/export", default-features = false } fail = { version = "0.5", optional = true } diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index ff1e2a4e66c..281bf2e77f6 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -66,6 +66,10 @@ const SLOW_EVENT_THRESHOLD: f64 = 120.0; /// CHECKPOINT_SAFEPOINT_TTL_IF_ERROR specifies the safe point TTL(24 hour) if /// task has fatal error. const CHECKPOINT_SAFEPOINT_TTL_IF_ERROR: u64 = 24; +/// The timeout for tick updating the checkpoint. +/// Generally, it would take ~100ms. +/// 5s would be enough for it. +const TICK_UPDATE_TIMEOUT: Duration = Duration::from_secs(5); pub struct Endpoint { // Note: those fields are more like a shared context between components. @@ -810,19 +814,29 @@ where })); } - fn on_update_global_checkpoint(&self, task: String) { - self.pool.block_on(async move { - let ts = self.meta_client.global_progress_of_task(&task).await; + fn update_global_checkpoint(&self, task: String) -> future![()] { + let meta_client = self.meta_client.clone(); + let router = self.range_router.clone(); + let store_id = self.store_id; + async move { + #[cfg(feature = "failpoints")] + { + // fail-rs doesn't support async code blocks now. + // let's borrow the feature name and do it ourselves :3 + if std::env::var("LOG_BACKUP_UGC_SLEEP_AND_RETURN").is_ok() { + tokio::time::sleep(Duration::from_secs(100)).await; + return; + } + } + let ts = meta_client.global_progress_of_task(&task).await; match ts { Ok(global_checkpoint) => { - let r = self - .range_router - .update_global_checkpoint(&task, global_checkpoint, self.store_id) + let r = router + .update_global_checkpoint(&task, global_checkpoint, store_id) .await; match r { Ok(true) => { - if let Err(err) = self - .meta_client + if let Err(err) = meta_client .set_storage_checkpoint(&task, global_checkpoint) .await { @@ -854,7 +868,18 @@ where ); } } - }); + } + } + + fn on_update_global_checkpoint(&self, task: String) { + let _guard = self.pool.handle().enter(); + let result = self.pool.block_on(tokio::time::timeout( + TICK_UPDATE_TIMEOUT, + self.update_global_checkpoint(task), + )); + if let Err(err) = result { + warn!("log backup update global checkpoint timed out"; "err" => %err) + } } /// Modify observe over some region. diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 97573ab756e..8cd6b87ec71 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -10,6 +10,8 @@ use tokio::sync::OnceCell; use super::{etcd::EtcdSnapshot, EtcdStore, MetaStore}; use crate::errors::{ContextualResultExt, Result}; +const RPC_TIMEOUT: Duration = Duration::from_secs(30); + #[derive(Clone)] pub struct LazyEtcdClient(Arc); @@ -26,7 +28,11 @@ impl ConnectionConfig { if let Some(tls) = &self.tls { opts = opts.with_tls(tls.clone()) } - opts = opts.with_keep_alive(self.keep_alive_interval, self.keep_alive_timeout); + opts = opts + .with_keep_alive(self.keep_alive_interval, self.keep_alive_timeout) + .with_timeout(RPC_TIMEOUT) + .keep_alive_while_idle(false); + opts } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 671952dc40d..f838e96ddbf 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -598,7 +598,7 @@ fn run_async_test(test: impl Future) -> T { #[cfg(test)] mod test { - use std::time::Duration; + use std::time::{Duration, Instant}; use backup_stream::{ errors::Error, metadata::MetadataClient, router::TaskSelector, GetCheckpointResult, @@ -874,6 +874,38 @@ mod test { ); } + #[test] + fn upload_checkpoint_exits_in_time() { + defer! {{ + std::env::remove_var("LOG_BACKUP_UGC_SLEEP_AND_RETURN"); + }} + let suite = SuiteBuilder::new_named("upload_checkpoint_exits_in_time") + .nodes(1) + .build(); + std::env::set_var("LOG_BACKUP_UGC_SLEEP_AND_RETURN", "meow"); + let (_, victim) = suite.endpoints.iter().next().unwrap(); + let sched = victim.scheduler(); + sched + .schedule(Task::UpdateGlobalCheckpoint("greenwoods".to_owned())) + .unwrap(); + let start = Instant::now(); + let (tx, rx) = tokio::sync::oneshot::channel(); + sched + .schedule(Task::Sync( + Box::new(move || { + tx.send(Instant::now()).unwrap(); + }), + Box::new(|_| true), + )) + .unwrap(); + let end = run_async_test(rx).unwrap(); + assert!( + end - start < Duration::from_secs(10), + "take = {:?}", + end - start + ); + } + #[test] fn failed_during_refresh_region() { defer! { From 693ae46f2739d3f8a493589aff57edcdafc8e12a Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 11 Aug 2022 18:10:49 +0800 Subject: [PATCH 0148/1149] pd-client: tikv should continue if cluster-id is zero. (#13242) close tikv/tikv#13240 using warn to replace panic. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- components/pd_client/src/util.rs | 47 ++++++++++++++++++------ tests/integrations/pd/test_rpc_client.rs | 14 +++++++ 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index e4145f16c0d..7f7ef9a5db5 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -585,6 +585,12 @@ impl PdConnector { .keepalive_timeout(Duration::from_secs(3)); self.security_mgr.connect(cb, addr_trim) }; + fail_point!("cluster_id_is_not_ready", |_| { + Ok(( + PdClientStub::new(channel.clone()), + GetMembersResponse::default(), + )) + }); let client = PdClientStub::new(channel); let option = CallOption::default().timeout(Duration::from_secs(REQUEST_TIMEOUT)); let response = client @@ -597,6 +603,13 @@ impl PdConnector { } } + // load_members returns the PD members by calling getMember, there are two + // abnormal scenes for the reponse: + // 1. header has an error: the PD is not ready to serve. + // 2. cluster id is zero: etcd start server but the follower did not get + // cluster id yet. + // In this case, load_members should return an error, so the client + // will not update client address. pub async fn load_members(&self, previous: &GetMembersResponse) -> Result { let previous_leader = previous.get_leader(); let members = previous.get_members(); @@ -611,18 +624,30 @@ impl PdConnector { for ep in m.get_client_urls() { match self.connect(ep.as_str()).await { Ok((_, r)) => { - let new_cluster_id = r.get_header().get_cluster_id(); - if new_cluster_id == cluster_id { - // check whether the response have leader info, otherwise continue to - // loop the rest members - if r.has_leader() { - return Ok(r); - } + let header = r.get_header(); + // Try next follower endpoint if the cluster has not ready since this pr: + // pd#5412. + if let Err(e) = check_resp_header(header) { + error!("connect pd failed";"endpoints" => ep, "error" => ?e); } else { - panic!( - "{} no longer belongs to cluster {}, it is in {}", - ep, cluster_id, new_cluster_id - ); + let new_cluster_id = header.get_cluster_id(); + // it is new cluster if the new cluster id is zero. + if cluster_id == 0 || new_cluster_id == cluster_id { + // check whether the response have leader info, otherwise continue + // to loop the rest members + if r.has_leader() { + return Ok(r); + } + // Try next endpoint if PD server returns the + // cluster id is zero without any error. + } else if new_cluster_id == 0 { + error!("{} connect success, but cluster id is not ready", ep); + } else { + panic!( + "{} no longer belongs to cluster {}, it is in {}", + ep, cluster_id, new_cluster_id + ); + } } } Err(e) => { diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index 57566b91e75..5f44cc0137b 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -476,6 +476,20 @@ fn test_change_leader_async() { panic!("failed, leader should changed"); } +#[test] +fn test_pd_client_ok_when_cluster_not_ready() { + let pd_client_cluster_id_zero = "cluster_id_is_not_ready"; + let server = MockServer::with_case(3, Arc::new(AlreadyBootstrapped)); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + fail::cfg(pd_client_cluster_id_zero, "return()").unwrap(); + // wait 100ms to let client load member. + thread::sleep(Duration::from_millis(101)); + assert_eq!(client.reconnect().is_err(), true); + fail::remove(pd_client_cluster_id_zero); +} + #[test] fn test_pd_client_heartbeat_send_failed() { let pd_client_send_fail_fp = "region_heartbeat_send_failed"; From 1ffa3034bec416268af751d319c8d7a2bc2ca464 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 11 Aug 2022 18:32:49 +0800 Subject: [PATCH 0149/1149] pd-client: remove `call_option` to avoid deadlock(RWR). (#13249) close tikv/tikv#13191, ref rust-lang/rust#93740 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- components/pd_client/src/client.rs | 78 ++++++++++++------------------ components/pd_client/src/util.rs | 17 +++++-- 2 files changed, 46 insertions(+), 49 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index ca997e473e9..942ab0269be 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -19,7 +19,7 @@ use futures::{ sink::SinkExt, stream::StreamExt, }; -use grpcio::{CallOption, EnvBuilder, Environment, WriteFlags}; +use grpcio::{EnvBuilder, Environment, WriteFlags}; use kvproto::{ metapb, pdpb::{self, Member}, @@ -37,7 +37,7 @@ use yatp::{task::future::TaskCell, ThreadPool}; use super::{ metrics::*, - util::{check_resp_header, sync_request, Client, Inner, PdConnector}, + util::{call_option_inner, check_resp_header, sync_request, Client, PdConnector}, BucketStat, Config, Error, FeatureGate, PdClient, PdFuture, RegionInfo, RegionStat, Result, UnixSecs, REQUEST_TIMEOUT, }; @@ -189,20 +189,6 @@ impl RpcClient { block_on(self.pd_client.reconnect(true)) } - /// Creates a new call option with default request timeout. - #[inline] - pub fn call_option(client: &Client) -> CallOption { - Self::call_option_inner(&client.inner.rl()) - } - - #[inline] - fn call_option_inner(inner: &Inner) -> CallOption { - inner - .target_info() - .call_option() - .timeout(Duration::from_secs(REQUEST_TIMEOUT)) - } - /// Gets given key's Region and Region's leader from PD. fn get_region_and_leader( &self, @@ -221,7 +207,7 @@ impl RpcClient { let inner = client.inner.rl(); inner .client_stub - .get_region_async_opt(&req, Self::call_option_inner(&inner)) + .get_region_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "get_region_async_opt", e) }) @@ -261,7 +247,7 @@ impl RpcClient { let inner = client.inner.rl(); inner .client_stub - .get_store_async_opt(&req, Self::call_option_inner(&inner)) + .get_store_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "get_store_async", e) }) @@ -339,7 +325,7 @@ impl PdClient for RpcClient { ) -> Result> { use kvproto::pdpb::WatchGlobalConfigRequest; let req = WatchGlobalConfigRequest::default(); - sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { + sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, _| { client.watch_global_config(&req) }) } @@ -362,8 +348,8 @@ impl PdClient for RpcClient { req.set_store(stores); req.set_region(region); - let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.bootstrap_opt(&req, Self::call_option(&self.pd_client)) + let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.bootstrap_opt(&req, option) })?; check_resp_header(resp.get_header())?; Ok(resp.replication_status.take()) @@ -377,8 +363,8 @@ impl PdClient for RpcClient { let mut req = pdpb::IsBootstrappedRequest::default(); req.set_header(self.header()); - let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.is_bootstrapped_opt(&req, Self::call_option(&self.pd_client)) + let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.is_bootstrapped_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -393,8 +379,8 @@ impl PdClient for RpcClient { let mut req = pdpb::AllocIdRequest::default(); req.set_header(self.header()); - let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.alloc_id_opt(&req, Self::call_option(&self.pd_client)) + let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.alloc_id_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -414,8 +400,8 @@ impl PdClient for RpcClient { req.set_header(self.header()); req.set_store(store); - let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.put_store_opt(&req, Self::call_option(&self.pd_client)) + let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.put_store_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -431,8 +417,8 @@ impl PdClient for RpcClient { req.set_header(self.header()); req.set_store_id(store_id); - let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.get_store_opt(&req, Self::call_option(&self.pd_client)) + let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.get_store_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -457,8 +443,8 @@ impl PdClient for RpcClient { req.set_header(self.header()); req.set_exclude_tombstone_stores(exclude_tombstone); - let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.get_all_stores_opt(&req, Self::call_option(&self.pd_client)) + let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.get_all_stores_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -473,8 +459,8 @@ impl PdClient for RpcClient { let mut req = pdpb::GetClusterConfigRequest::default(); req.set_header(self.header()); - let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.get_cluster_config_opt(&req, Self::call_option(&self.pd_client)) + let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.get_cluster_config_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -511,7 +497,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .get_region_by_id_async_opt(&req, Self::call_option_inner(&inner)) + .get_region_by_id_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "get_region_by_id", e); }) @@ -550,7 +536,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .get_region_by_id_async_opt(&req, Self::call_option_inner(&inner)) + .get_region_by_id_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "get_region_by_id", e) }) @@ -688,7 +674,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .ask_split_async_opt(&req, Self::call_option_inner(&inner)) + .ask_split_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "ask_split", e)) }; @@ -724,7 +710,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .ask_batch_split_async_opt(&req, Self::call_option_inner(&inner)) + .ask_batch_split_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "ask_batch_split", e) }) @@ -771,7 +757,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .store_heartbeat_async_opt(&req, Self::call_option_inner(&inner)) + .store_heartbeat_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "store_heartbeat", e) }) @@ -808,7 +794,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .report_batch_split_async_opt(&req, Self::call_option_inner(&inner)) + .report_batch_split_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "report_batch_split", e) }) @@ -841,8 +827,8 @@ impl PdClient for RpcClient { } req.set_region(region.region); - let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.scatter_region_opt(&req, Self::call_option(&self.pd_client)) + let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.scatter_region_opt(&req, option) })?; check_resp_header(resp.get_header()) } @@ -862,7 +848,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .get_gc_safe_point_async_opt(&req, Self::call_option_inner(&inner)) + .get_gc_safe_point_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "get_gc_saft_point", e) }) @@ -895,8 +881,8 @@ impl PdClient for RpcClient { req.set_header(self.header()); req.set_region_id(region_id); - let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client| { - client.get_operator_opt(&req, Self::call_option(&self.pd_client)) + let resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.get_operator_opt(&req, option) })?; check_resp_header(resp.get_header())?; @@ -950,7 +936,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .update_service_gc_safe_point_async_opt(&r, Self::call_option_inner(&inner)) + .update_service_gc_safe_point_async_opt(&r, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!( "fail to request PD {} err {:?}", @@ -989,7 +975,7 @@ impl PdClient for RpcClient { let inner = client.inner.rl(); inner .client_stub - .report_min_resolved_ts_async_opt(&req, Self::call_option_inner(&inner)) + .report_min_resolved_ts_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { panic!("fail to request PD {} err {:?}", "min_resolved_ts", e) }) diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 7f7ef9a5db5..fec63383891 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -472,10 +472,17 @@ where } } +pub fn call_option_inner(inner: &Inner) -> CallOption { + inner + .target_info() + .call_option() + .timeout(Duration::from_secs(REQUEST_TIMEOUT)) +} + /// Do a request in synchronized fashion. pub fn sync_request(client: &Client, mut retry: usize, func: F) -> Result where - F: Fn(&PdClientStub) -> GrpcResult, + F: Fn(&PdClientStub, CallOption) -> GrpcResult, { loop { let ret = { @@ -483,8 +490,12 @@ where // thread which may hold the read lock and wait for PD client thread // completing the request and the PD client thread which may block // on acquiring the write lock. - let client_stub = client.inner.rl().client_stub.clone(); - func(&client_stub).map_err(Error::Grpc) + let (client_stub, option) = { + let inner = client.inner.rl(); + (inner.client_stub.clone(), call_option_inner(&inner)) + }; + + func(&client_stub, option).map_err(Error::Grpc) }; match ret { Ok(r) => { From 38655bff985289560d6a6095bff54665d9cfd254 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 12 Aug 2022 09:46:49 +0800 Subject: [PATCH 0150/1149] raftstore: Use thread-local metrics for local read (#13244) ref tikv/tikv#12876 Use tls local read metrics Signed-off-by: Connor1996 --- .../raftstore-v2/src/operation/read/local.rs | 2 +- components/raftstore/src/store/mod.rs | 6 +- .../raftstore/src/store/worker/metrics.rs | 51 ++- components/raftstore/src/store/worker/mod.rs | 2 +- components/raftstore/src/store/worker/read.rs | 348 +++++++----------- 5 files changed, 183 insertions(+), 226 deletions(-) diff --git a/components/raftstore-v2/src/operation/read/local.rs b/components/raftstore-v2/src/operation/read/local.rs index 56a5f01a7fd..2e694f11ebc 100644 --- a/components/raftstore-v2/src/operation/read/local.rs +++ b/components/raftstore-v2/src/operation/read/local.rs @@ -29,7 +29,7 @@ use raftstore::{ store::{ cmd_resp, util::{self, LeaseState, RegionReadProgress, RemoteLease}, - ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadMetrics, ReadProgress, ReadResponse, + ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadResponse, RegionSnapshot, RequestInspector, RequestPolicy, TrackVer, TxnExt, }, Error, Result, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index d47cc892033..ad730206175 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -70,8 +70,8 @@ pub use self::{ AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, LocalReader, PdTask, QueryStats, RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, - ReadExecutor, ReadExecutorProvider, ReadMetrics, ReadProgress, ReadStats, - RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, - SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, + ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, StoreMetaDelegate, + TrackVer, WriteStats, }, }; diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index e119fcdc3ab..fa27ea340b8 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -1,8 +1,11 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +use std::{cell::RefCell, time::Duration}; + use lazy_static::lazy_static; -use prometheus::*; +use prometheus::{local::LocalIntCounter, *}; use prometheus_static_metric::*; +use tikv_util::time::Instant; make_auto_flush_static_metric! { pub label_enum SnapType { @@ -44,14 +47,54 @@ make_static_metric! { epoch, applied_term, channel_full, + cache_miss, safe_ts, } - pub struct ReadRejectCounter : IntCounter { - "reason" => RejectReason + pub struct LocalReadRejectCounter : LocalIntCounter { + "reason" => RejectReason, } } +pub struct LocalReadMetrics { + pub local_executed_requests: LocalIntCounter, + pub local_executed_stale_read_requests: LocalIntCounter, + pub local_executed_snapshot_cache_hit: LocalIntCounter, + pub reject_reason: LocalReadRejectCounter, + pub renew_lease_advance: LocalIntCounter, + last_flush_time: Instant, +} + +thread_local! { + pub static TLS_LOCAL_READ_METRICS: RefCell = RefCell::new( + LocalReadMetrics { + local_executed_requests: LOCAL_READ_EXECUTED_REQUESTS.local(), + local_executed_stale_read_requests: LOCAL_READ_EXECUTED_STALE_READ_REQUESTS.local(), + local_executed_snapshot_cache_hit: LOCAL_READ_EXECUTED_CACHE_REQUESTS.local(), + reject_reason: LocalReadRejectCounter::from(&LOCAL_READ_REJECT_VEC), + renew_lease_advance: LOCAL_READ_RENEW_LEASE_ADVANCE_COUNTER.local(), + last_flush_time: Instant::now_coarse(), + } + ); +} + +const METRICS_FLUSH_INTERVAL: u64 = 15_000; // 15s + +pub fn maybe_tls_local_read_metrics_flush() { + TLS_LOCAL_READ_METRICS.with(|m| { + let mut m = m.borrow_mut(); + + if m.last_flush_time.saturating_elapsed() >= Duration::from_millis(METRICS_FLUSH_INTERVAL) { + m.local_executed_requests.flush(); + m.local_executed_stale_read_requests.flush(); + m.local_executed_snapshot_cache_hit.flush(); + m.reject_reason.flush(); + m.renew_lease_advance.flush(); + m.last_flush_time = Instant::now_coarse(); + } + }); +} + lazy_static! { pub static ref SNAP_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_raftstore_snapshot_total", @@ -111,8 +154,6 @@ lazy_static! { &["reason"] ) .unwrap(); - pub static ref LOCAL_READ_REJECT: ReadRejectCounter = - ReadRejectCounter::from(&LOCAL_READ_REJECT_VEC); pub static ref LOCAL_READ_EXECUTED_REQUESTS: IntCounter = register_int_counter!( "tikv_raftstore_local_read_executed_requests", "Total number of requests directly executed by local reader." diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 1651183f976..2298710ad63 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -34,7 +34,7 @@ pub use self::{ raftlog_gc::{Runner as RaftlogGcRunner, Task as RaftlogGcTask}, read::{ CachedReadDelegate, LocalReadContext, LocalReader, Progress as ReadProgress, ReadDelegate, - ReadExecutor, ReadExecutorProvider, ReadMetrics, StoreMetaDelegate, TrackVer, + ReadExecutor, ReadExecutorProvider, StoreMetaDelegate, TrackVer, }, refresh_config::{ BatchComponent as RaftStoreBatchComponent, Runner as RefreshConfigRunner, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index de1cb1011ae..3c5c05f4717 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -9,7 +9,6 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, }, - time::Duration, }; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; @@ -26,7 +25,7 @@ use tikv_util::{ codec::number::decode_u64, debug, error, lru::LruCache, - time::{monotonic_raw_now, Instant, ThreadReadId}, + time::{monotonic_raw_now, ThreadReadId}, }; use time::Timespec; @@ -213,7 +212,6 @@ pub struct LocalReadContext<'a, E> where E: KvEngine, { - metrics: &'a mut ReadMetrics, read_id: &'a mut ThreadReadId, snap_cache: &'a mut Box>>, } @@ -397,7 +395,6 @@ impl ReadDelegate { &self, router: &dyn CasualRouter, ts: Timespec, - metrics: &mut ReadMetrics, ) { if !self .leader_lease @@ -407,7 +404,7 @@ impl ReadDelegate { { return; } - metrics.renew_lease_advance += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().renew_lease_advance.inc()); let region_id = self.region.get_id(); if let Err(e) = router.send(region_id, CasualMessage::RenewLease) { debug!( @@ -418,18 +415,19 @@ impl ReadDelegate { } } - pub fn is_in_leader_lease(&self, ts: Timespec, metrics: &mut ReadMetrics) -> bool { + pub fn is_in_leader_lease(&self, ts: Timespec) -> bool { if let Some(ref lease) = self.leader_lease { let term = lease.term(); if term == self.term { if lease.inspect(Some(ts)) == LeaseState::Valid { return true; } else { - metrics.rejected_by_lease_expire += 1; + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().reject_reason.lease_expire.inc()); debug!("rejected by lease expire"; "tag" => &self.tag); } } else { - metrics.rejected_by_term_mismatch += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.term_mismatch.inc()); debug!("rejected by term mismatch"; "tag" => &self.tag); } } @@ -440,7 +438,6 @@ impl ReadDelegate { pub fn check_stale_read_safe( &self, read_ts: u64, - metrics: &mut ReadMetrics, ) -> std::result::Result<(), ReadResponse> { let safe_ts = self.read_progress.safe_ts(); if safe_ts >= read_ts { @@ -452,7 +449,7 @@ impl ReadDelegate { "safe ts" => safe_ts, "read ts" => read_ts ); - metrics.rejected_by_safe_timestamp += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.safe_ts.inc()); let mut response = cmd_resp::new_error(Error::DataIsNotReady { region_id: self.region.get_id(), peer_id: self.peer_id, @@ -549,7 +546,6 @@ where pub store_id: Cell>, store_meta: S, kv_engine: E, - pub metrics: ReadMetrics, // region id -> ReadDelegate // The use of `Arc` here is a workaround, see the comment at `get_delegate` pub delegates: LruCache, @@ -573,11 +569,12 @@ where read_context: &mut Option>, ) -> Arc { let ctx = read_context.as_mut().unwrap(); - ctx.metrics.local_executed_requests += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); if let Some(ts) = create_time { if ts == *ctx.read_id { if let Some(snap) = ctx.snap_cache.as_ref().as_ref() { - ctx.metrics.local_executed_snapshot_cache_hit += 1; + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_snapshot_cache_hit.inc()); return snap.clone(); } } @@ -606,7 +603,6 @@ where snap_cache: Box::new(None), cache_read_id, store_id: Cell::new(None), - metrics: Default::default(), delegates: LruCache::with_capacity_and_sample(0, 7), } } @@ -618,14 +614,14 @@ where match ProposalRouter::send(&self.router, cmd) { Ok(()) => return, Err(TrySendError::Full(c)) => { - self.metrics.rejected_by_channel_full += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.channel_full.inc()); err.set_message(RAFTSTORE_IS_BUSY.to_owned()); err.mut_server_is_busy() .set_reason(RAFTSTORE_IS_BUSY.to_owned()); cmd = c; } Err(TrySendError::Disconnected(c)) => { - self.metrics.rejected_by_no_region += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); err.set_message(format!("region {} is missing", region_id)); err.mut_region_not_found().set_region_id(region_id); cmd = c; @@ -655,7 +651,7 @@ where Some(d) if !d.track_ver.any_new() => Some(d.clone()), _ => { debug!("update local read delegate"; "region_id" => region_id); - self.metrics.rejected_by_cache_miss += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.cache_miss.inc()); let (meta_len, meta_reader) = { self.store_meta.get_executor_and_len(region_id) }; @@ -687,7 +683,7 @@ where let store_id = self.store_id.get().unwrap(); if let Err(e) = util::check_store_id(req, store_id) { - self.metrics.rejected_by_store_id_mismatch += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.store_id_mismatch.inc()); debug!("rejected by store id not match"; "err" => %e); return Err(e); } @@ -697,7 +693,7 @@ where let delegate = match self.get_delegate(region_id) { Some(d) => d, None => { - self.metrics.rejected_by_no_region += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); debug!("rejected by no region"; "region_id" => region_id); return Ok(None); } @@ -707,7 +703,7 @@ where // Check peer id. if let Err(e) = util::check_peer_id(req, delegate.peer_id) { - self.metrics.rejected_by_peer_id_mismatch += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.peer_id_mismatch.inc()); return Err(e); } @@ -718,13 +714,13 @@ where "delegate_term" => delegate.term, "header_term" => req.get_header().get_term(), ); - self.metrics.rejected_by_term_mismatch += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.term_mismatch.inc()); return Err(e); } // Check region epoch. if util::check_region_epoch(req, &delegate.region, false).is_err() { - self.metrics.rejected_by_epoch += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.epoch.inc()); // Stale epoch, redirect it to raftstore to get the latest region. debug!("rejected by epoch not match"; "tag" => &delegate.tag); return Ok(None); @@ -732,7 +728,6 @@ where let mut inspector = Inspector { delegate: &delegate, - metrics: &mut self.metrics, }; match inspector.inspect(req) { Ok(RequestPolicy::ReadLocal) => Ok(Some((delegate, RequestPolicy::ReadLocal))), @@ -766,14 +761,13 @@ where } None => monotonic_raw_now(), }; - if !delegate.is_in_leader_lease(snapshot_ts, &mut self.metrics) { + if !delegate.is_in_leader_lease(snapshot_ts) { // Forward to raftstore. self.redirect(RaftCommand::new(req, cb)); return; } delegate_ext = LocalReadContext { - metrics: &mut self.metrics, snap_cache: &mut self.snap_cache, read_id: &mut self.cache_read_id, }; @@ -782,27 +776,19 @@ where let response = delegate.execute(&req, ®ion, None, read_id, Some(delegate_ext)); // Try renew lease in advance - - delegate.maybe_renew_lease_advance( - &self.router, - snapshot_ts, - &mut self.metrics, - ); + delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); response } // Replica can serve stale read if and only if its `safe_ts` >= `read_ts` RequestPolicy::StaleRead => { let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); assert!(read_ts > 0); - if let Err(resp) = - delegate.check_stale_read_safe(read_ts, &mut self.metrics) - { + if let Err(resp) = delegate.check_stale_read_safe(read_ts) { cb.invoke_read(resp); return; } delegate_ext = LocalReadContext { - metrics: &mut self.metrics, snap_cache: &mut self.snap_cache, read_id: &mut self.cache_read_id, }; @@ -814,13 +800,12 @@ where // Double check in case `safe_ts` change after the first check and before // getting snapshot - if let Err(resp) = - delegate.check_stale_read_safe(read_ts, &mut self.metrics) - { + if let Err(resp) = delegate.check_stale_read_safe(read_ts) { cb.invoke_read(resp); return; } - self.metrics.local_executed_stale_read_requests += 1; + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); response } _ => unreachable!(), @@ -863,7 +848,7 @@ where cb: Callback, ) { self.propose_raft_command(read_id, req, cb); - self.metrics.maybe_flush(); + maybe_tls_local_read_metrics_flush(); } pub fn release_snapshot_cache(&mut self) { @@ -884,7 +869,6 @@ where kv_engine: self.kv_engine.clone(), router: self.router.clone(), store_id: self.store_id.clone(), - metrics: Default::default(), delegates: LruCache::with_capacity_and_sample(0, 7), snap_cache: self.snap_cache.clone(), cache_read_id: self.cache_read_id.clone(), @@ -893,12 +877,11 @@ where } /// #[RaftstoreCommon] -struct Inspector<'r, 'm> { +struct Inspector<'r> { delegate: &'r ReadDelegate, - metrics: &'m mut ReadMetrics, } -impl<'r, 'm> RequestInspector for Inspector<'r, 'm> { +impl<'r> RequestInspector for Inspector<'r> { fn has_applied_to_current_term(&mut self) -> bool { if self.delegate.applied_term == self.delegate.term { true @@ -911,7 +894,7 @@ impl<'r, 'm> RequestInspector for Inspector<'r, 'm> { ); // only for metric. - self.metrics.rejected_by_applied_term += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.applied_term.inc()); false } } @@ -923,146 +906,12 @@ impl<'r, 'm> RequestInspector for Inspector<'r, 'm> { LeaseState::Valid } else { debug!("rejected by leader lease"; "tag" => &self.delegate.tag); - self.metrics.rejected_by_no_lease += 1; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_lease.inc()); LeaseState::Expired } } } -const METRICS_FLUSH_INTERVAL: u64 = 15_000; // 15s - -/// #[RaftstoreCommon] -#[derive(Clone)] -pub struct ReadMetrics { - pub local_executed_requests: u64, - pub local_executed_stale_read_requests: u64, - pub local_executed_snapshot_cache_hit: u64, - // TODO: record rejected_by_read_quorum. - pub rejected_by_store_id_mismatch: u64, - pub rejected_by_peer_id_mismatch: u64, - pub rejected_by_term_mismatch: u64, - pub rejected_by_lease_expire: u64, - pub rejected_by_no_region: u64, - pub rejected_by_no_lease: u64, - pub rejected_by_epoch: u64, - pub rejected_by_applied_term: u64, - pub rejected_by_channel_full: u64, - pub rejected_by_cache_miss: u64, - pub rejected_by_safe_timestamp: u64, - pub renew_lease_advance: u64, - - pub last_flush_time: Instant, -} - -impl Default for ReadMetrics { - fn default() -> ReadMetrics { - ReadMetrics { - local_executed_requests: 0, - local_executed_stale_read_requests: 0, - local_executed_snapshot_cache_hit: 0, - rejected_by_store_id_mismatch: 0, - rejected_by_peer_id_mismatch: 0, - rejected_by_term_mismatch: 0, - rejected_by_lease_expire: 0, - rejected_by_no_region: 0, - rejected_by_no_lease: 0, - rejected_by_epoch: 0, - rejected_by_applied_term: 0, - rejected_by_channel_full: 0, - rejected_by_cache_miss: 0, - rejected_by_safe_timestamp: 0, - renew_lease_advance: 0, - last_flush_time: Instant::now(), - } - } -} - -impl ReadMetrics { - pub fn maybe_flush(&mut self) { - if self.last_flush_time.saturating_elapsed() - >= Duration::from_millis(METRICS_FLUSH_INTERVAL) - { - self.flush(); - self.last_flush_time = Instant::now(); - } - } - - fn flush(&mut self) { - if self.rejected_by_store_id_mismatch > 0 { - LOCAL_READ_REJECT - .store_id_mismatch - .inc_by(self.rejected_by_store_id_mismatch); - self.rejected_by_store_id_mismatch = 0; - } - if self.rejected_by_peer_id_mismatch > 0 { - LOCAL_READ_REJECT - .peer_id_mismatch - .inc_by(self.rejected_by_peer_id_mismatch); - self.rejected_by_peer_id_mismatch = 0; - } - if self.rejected_by_term_mismatch > 0 { - LOCAL_READ_REJECT - .term_mismatch - .inc_by(self.rejected_by_term_mismatch); - self.rejected_by_term_mismatch = 0; - } - if self.rejected_by_lease_expire > 0 { - LOCAL_READ_REJECT - .lease_expire - .inc_by(self.rejected_by_lease_expire); - self.rejected_by_lease_expire = 0; - } - if self.rejected_by_no_region > 0 { - LOCAL_READ_REJECT - .no_region - .inc_by(self.rejected_by_no_region); - self.rejected_by_no_region = 0; - } - if self.rejected_by_no_lease > 0 { - LOCAL_READ_REJECT.no_lease.inc_by(self.rejected_by_no_lease); - self.rejected_by_no_lease = 0; - } - if self.rejected_by_epoch > 0 { - LOCAL_READ_REJECT.epoch.inc_by(self.rejected_by_epoch); - self.rejected_by_epoch = 0; - } - if self.rejected_by_applied_term > 0 { - LOCAL_READ_REJECT - .applied_term - .inc_by(self.rejected_by_applied_term); - self.rejected_by_applied_term = 0; - } - if self.rejected_by_channel_full > 0 { - LOCAL_READ_REJECT - .channel_full - .inc_by(self.rejected_by_channel_full); - self.rejected_by_channel_full = 0; - } - if self.rejected_by_safe_timestamp > 0 { - LOCAL_READ_REJECT - .safe_ts - .inc_by(self.rejected_by_safe_timestamp); - self.rejected_by_safe_timestamp = 0; - } - if self.local_executed_snapshot_cache_hit > 0 { - LOCAL_READ_EXECUTED_CACHE_REQUESTS.inc_by(self.local_executed_snapshot_cache_hit); - self.local_executed_snapshot_cache_hit = 0; - } - if self.local_executed_requests > 0 { - LOCAL_READ_EXECUTED_REQUESTS.inc_by(self.local_executed_requests); - self.local_executed_requests = 0; - } - if self.local_executed_stale_read_requests > 0 { - LOCAL_READ_EXECUTED_STALE_READ_REQUESTS.inc_by(self.local_executed_stale_read_requests); - self.local_executed_stale_read_requests = 0; - } - if self.renew_lease_advance > 0 { - LOCAL_READ_RENEW_LEASE_ADVANCE_COUNTER.inc_by(self.renew_lease_advance); - self.renew_lease_advance = 0; - } - } -} - #[cfg(test)] mod tests { use std::{sync::mpsc::*, thread}; @@ -1234,8 +1083,14 @@ mod tests { // The region is not register yet. must_redirect(&mut reader, &rx, cmd.clone()); - assert_eq!(reader.metrics.rejected_by_no_region, 1); - assert_eq!(reader.metrics.rejected_by_cache_miss, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.no_region.get()), + 1 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 1 + ); assert!(reader.delegates.get(&1).is_none()); // Register region 1 @@ -1264,8 +1119,14 @@ mod tests { // The applied_term is stale must_redirect(&mut reader, &rx, cmd.clone()); - assert_eq!(reader.metrics.rejected_by_cache_miss, 2); - assert_eq!(reader.metrics.rejected_by_applied_term, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 2 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.applied_term.get()), + 1 + ); // Make the applied_term matches current term. let pg = Progress::applied_term(term6); @@ -1276,7 +1137,10 @@ mod tests { let task = RaftCommand::::new(cmd.clone(), Callback::Read(Box::new(move |_| {}))); must_not_redirect(&mut reader, &rx, task); - assert_eq!(reader.metrics.rejected_by_cache_miss, 3); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 3 + ); // Let's read. let task = RaftCommand::::new( @@ -1291,7 +1155,10 @@ mod tests { // Wait for expiration. thread::sleep(Duration::seconds(1).to_std().unwrap()); must_redirect(&mut reader, &rx, cmd.clone()); - assert_eq!(reader.metrics.rejected_by_lease_expire, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()), + 1 + ); // Renew lease. lease.renew(monotonic_raw_now()); @@ -1311,8 +1178,14 @@ mod tests { assert!(resp.snapshot.is_none()); })), ); - assert_eq!(reader.metrics.rejected_by_store_id_mismatch, 1); - assert_eq!(reader.metrics.rejected_by_cache_miss, 3); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.store_id_mismatch.get()), + 1 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 3 + ); // metapb::Peer id mismatch. let mut cmd_peer_id = cmd.clone(); @@ -1332,7 +1205,10 @@ mod tests { assert!(resp.snapshot.is_none()); })), ); - assert_eq!(reader.metrics.rejected_by_peer_id_mismatch, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.peer_id_mismatch.get()), + 1 + ); // Read quorum. let mut cmd_read_quorum = cmd.clone(); @@ -1351,7 +1227,10 @@ mod tests { assert!(resp.snapshot.is_none()); })), ); - assert_eq!(reader.metrics.rejected_by_term_mismatch, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.term_mismatch.get()), + 1 + ); // Stale epoch. let mut epoch12 = epoch13; @@ -1359,15 +1238,19 @@ mod tests { let mut cmd_epoch = cmd.clone(); cmd_epoch.mut_header().set_region_epoch(epoch12); must_redirect(&mut reader, &rx, cmd_epoch); - assert_eq!(reader.metrics.rejected_by_epoch, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.epoch.get()), + 1 + ); // Expire lease manually, and it can not be renewed. - let previous_lease_rejection = reader.metrics.rejected_by_lease_expire; + let previous_lease_rejection = + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()); lease.expire(); lease.renew(monotonic_raw_now()); must_redirect(&mut reader, &rx, cmd.clone()); assert_eq!( - reader.metrics.rejected_by_lease_expire, + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()), previous_lease_rejection + 1 ); @@ -1384,10 +1267,14 @@ mod tests { ); rx.try_recv().unwrap(); assert_eq!(rx.try_recv().unwrap_err(), TryRecvError::Empty); - assert_eq!(reader.metrics.rejected_by_channel_full, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.channel_full.get()), + 1 + ); // Reject by term mismatch in lease. - let previous_term_rejection = reader.metrics.rejected_by_term_mismatch; + let previous_term_rejection = + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.term_mismatch.get()); let mut cmd9 = cmd.clone(); cmd9.mut_header().set_term(term6 + 3); { @@ -1415,10 +1302,13 @@ mod tests { cmd9 ); assert_eq!( - reader.metrics.rejected_by_term_mismatch, - previous_term_rejection + 1, + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.term_mismatch.get()), + previous_term_rejection + 1 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 4 ); - assert_eq!(reader.metrics.rejected_by_cache_miss, 4); // Stale local ReadDelegate cmd.mut_header().set_term(term6 + 3); @@ -1432,10 +1322,16 @@ mod tests { let task = RaftCommand::::new(cmd.clone(), Callback::Read(Box::new(move |_| {}))); must_not_redirect(&mut reader, &rx, task); - assert_eq!(reader.metrics.rejected_by_cache_miss, 5); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 5 + ); // Stale read - assert_eq!(reader.metrics.rejected_by_safe_timestamp, 0); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.safe_ts.get()), + 0 + ); read_progress.update_safe_ts(1, 1); assert_eq!(read_progress.safe_ts(), 1); @@ -1456,13 +1352,19 @@ mod tests { })), ); must_not_redirect(&mut reader, &rx, task); - assert_eq!(reader.metrics.rejected_by_safe_timestamp, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.safe_ts.get()), + 1 + ); read_progress.update_safe_ts(1, 2); assert_eq!(read_progress.safe_ts(), 2); let task = RaftCommand::::new(cmd, Callback::Read(Box::new(move |_| {}))); must_not_redirect(&mut reader, &rx, task); - assert_eq!(reader.metrics.rejected_by_safe_timestamp, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.safe_ts.get()), + 1 + ); // Remove invalid delegate let reader_clone = store_meta.lock().unwrap().readers.get(&1).unwrap().clone(); @@ -1573,13 +1475,11 @@ mod tests { } let mut read_id = ThreadReadId::new(); - let mut read_metrics = ReadMetrics::default(); let mut snap_cache = Box::new(None); let read_id_copy = Some(read_id.clone()); let mut read_context = Some(LocalReadContext { - metrics: &mut read_metrics, read_id: &mut read_id, snap_cache: &mut snap_cache, }); @@ -1605,8 +1505,14 @@ mod tests { ); assert!(snap_cache.as_ref().is_some()); - assert_eq!(read_metrics.local_executed_requests, 2); - assert_eq!(read_metrics.local_executed_snapshot_cache_hit, 1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_requests.get()), + 2 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), + 1 + ); } #[test] @@ -1643,7 +1549,6 @@ mod tests { { let mut read_context = Some(LocalReadContext { - metrics: &mut reader.metrics, snap_cache: &mut reader.snap_cache, read_id: &mut reader.cache_read_id, }); @@ -1654,13 +1559,15 @@ mod tests { } } // We should hit cache 9 times - assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 9); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), + 9 + ); let read_id = Some(ThreadReadId::new()); { let read_context = LocalReadContext { - metrics: &mut reader.metrics, snap_cache: &mut reader.snap_cache, read_id: &mut reader.cache_read_id, }; @@ -1668,40 +1575,49 @@ mod tests { let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); } // This time, we will miss the cache - assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 9); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), + 9 + ); { let read_context = LocalReadContext { - metrics: &mut reader.metrics, snap_cache: &mut reader.snap_cache, read_id: &mut reader.cache_read_id, }; let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); // We can hit it again. - assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 10); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), + 10 + ); } reader.release_snapshot_cache(); { let read_context = LocalReadContext { - metrics: &mut reader.metrics, snap_cache: &mut reader.snap_cache, read_id: &mut reader.cache_read_id, }; let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); } // After release, we will mss the cache even with the prevsiou read_id. - assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 10); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), + 10 + ); { let read_context = LocalReadContext { - metrics: &mut reader.metrics, snap_cache: &mut reader.snap_cache, read_id: &mut reader.cache_read_id, }; let _ = delegate.get_snapshot(read_id, &mut Some(read_context)); } // We can hit it again. - assert_eq!(reader.metrics.local_executed_snapshot_cache_hit, 11); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), + 11 + ); } } From a8cd9645ef27617e12f73b2e25de1ba9793ecf82 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Fri, 12 Aug 2022 16:40:50 +0800 Subject: [PATCH 0151/1149] metric: fix the panel description for gc compaction filter (#13275) close tikv/tikv#13274 fix the panel description for gc compaction filter Signed-off-by: cfzjywxk --- metrics/grafana/tikv_details.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 0291aa87590..b47c226cb02 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -22518,7 +22518,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 0, - "description": "SafePoint used for TiKV's Auto GC", + "description": "Keys handled in GC compaction filter", "fill": 0, "gridPos": { "h": 7, @@ -22569,14 +22569,14 @@ "expr": "sum(rate(tikv_gc_compaction_filter_skip{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "skip", + "legendFormat": "skipped", "refId": "B" }, { "expr": "sum(rate(tikv_gc_compaction_mvcc_rollback{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "rollback/lock", + "legendFormat": "mvcc-rollback/mvcc-lock", "refId": "C" }, { @@ -22590,7 +22590,7 @@ "expr": "sum(rate(tikv_gc_compaction_filter_perform{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "perform", + "legendFormat": "performed-times", "refId": "E" }, { From 594fca7348fb756bb44b2a6778a3e938aa5215b4 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 12 Aug 2022 18:10:49 +0800 Subject: [PATCH 0152/1149] raftstore: simplify v1 propose path (#13230) ref tikv/tikv#12842 Signed-off-by: tabokie --- components/backup-stream/Cargo.toml | 4 +- components/backup-stream/src/endpoint.rs | 1 - components/backup-stream/src/event_loader.rs | 1 - components/backup-stream/src/router.rs | 14 -- .../backup-stream/src/subscription_manager.rs | 36 ++--- components/batch-system/src/batch.rs | 120 +++++++------- components/batch-system/src/fsm.rs | 56 ++++--- components/batch-system/src/mailbox.rs | 21 ++- components/batch-system/src/router.rs | 32 ++-- components/cloud/aws/src/s3.rs | 1 - components/raftstore-v2/src/batch/store.rs | 45 +++--- components/raftstore-v2/src/fsm/peer.rs | 4 +- components/raftstore/src/store/fsm/apply.rs | 10 +- components/raftstore/src/store/fsm/peer.rs | 100 ++++++------ components/raftstore/src/store/peer.rs | 153 ++++++++---------- components/raftstore/src/store/worker/pd.rs | 6 - .../src/store/worker/split_controller.rs | 3 - src/server/raft_client.rs | 1 - .../cases/test_cmd_epoch_checker.rs | 28 ++-- 19 files changed, 307 insertions(+), 329 deletions(-) diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 7fe221842ce..b0b6fc3f13f 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -8,7 +8,7 @@ default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] test-engine-kv-rocksdb = ["tikv/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["tikv/test-engine-raft-raft-engine"] test-engines-rocksdb = ["tikv/test-engines-rocksdb"] -failpoints = ["tikv/failpoints", "fail/failpoints", "fail"] +failpoints = ["tikv/failpoints", "fail/failpoints"] backup-stream-debug = [] [[test]] @@ -34,7 +34,7 @@ error_code = { path = "../error_code" } etcd-client = { git = "https://github.com/yujuncen/etcd-client", rev = "e0321a1990ee561cf042973666c0db61c8d82364", features = ["pub-response-field", "tls"] } external_storage = { path = "../external_storage", default-features = false } external_storage_export = { path = "../external_storage/export", default-features = false } -fail = { version = "0.5", optional = true } +fail = "0.5" file_system = { path = "../file_system" } futures = "0.3" futures-io = "0.3" diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 281bf2e77f6..81374484463 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -766,7 +766,6 @@ where async move { let mut resolved = get_rts.await?; let mut new_rts = resolved.global_checkpoint(); - #[cfg(feature = "failpoints")] fail::fail_point!("delay_on_flush"); flush_ob.before(resolved.take_region_checkpoints()).await; if let Some(rewritten_rts) = flush_ob.rewrite_resolved_ts(&task).await { diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 05b370e2985..61e227af1ac 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -383,7 +383,6 @@ where let mut stats = StatisticsSummary::default(); let start = Instant::now(); loop { - #[cfg(feature = "failpoints")] fail::fail_point!("scan_and_async_send", |msg| Err(Error::Other(box_err!( "{:?}", msg )))); diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index d5486cecddb..f1280103e89 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -688,20 +688,6 @@ impl TempFileKey { use chrono::prelude::*; let millis = TimeStamp::physical(ts.into()); let dt = Utc.timestamp_millis(millis as _); - - #[cfg(feature = "failpoints")] - { - fail::fail_point!("stream_format_date_time", |s| { - return dt - .format(&s.unwrap_or_else(|| "%Y%m".to_owned())) - .to_string(); - }); - match t { - FormatType::Date => dt.format("%Y%m%d").to_string(), - FormatType::Hour => dt.format("%H").to_string(), - } - } - #[cfg(not(feature = "failpoints"))] match t { FormatType::Date => dt.format("%Y%m%d"), FormatType::Hour => dt.format("%H"), diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index c6e928b8201..751f41ee587 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -202,7 +202,6 @@ fn scan_executor_loop( canceled: Arc, ) { while let Ok(cmd) = cmds.recv() { - #[cfg(feature = "failpoints")] fail::fail_point!("execute_scan_command"); debug!("handling initial scan request"; "region_id" => %cmd.region.get_id()); metrics::PENDING_INITIAL_SCAN_LEN @@ -393,7 +392,6 @@ where info!("backup stream: on_modify_observe"; "op" => ?op); match op { ObserveOp::Start { region } => { - #[cfg(feature = "failpoints")] fail::fail_point!("delay_on_start_observe"); self.start_observe(region).await; metrics::INITIAL_SCAN_REASON @@ -522,7 +520,6 @@ where } Some(for_task) => { - #[cfg(feature = "failpoints")] fail::fail_point!("try_start_observe", |_| { Err(Error::Other(box_err!("Nature is boring"))) }); @@ -604,7 +601,6 @@ where } async fn get_last_checkpoint_of(&self, task: &str, region: &Region) -> Result { - #[cfg(feature = "failpoints")] fail::fail_point!("get_last_checkpoint_of", |hint| Err(Error::Other( box_err!( "get_last_checkpoint_of({}, {:?}) failed because {:?}", @@ -666,8 +662,6 @@ mod test { use tikv::storage::Statistics; use super::InitialScan; - #[cfg(feature = "failpoints")] - use crate::{subscription_manager::spawn_executors, utils::CallbackWaitGroup}; #[derive(Clone, Copy)] struct NoopInitialScan; @@ -687,27 +681,27 @@ mod test { } } - #[cfg(feature = "failpoints")] - fn should_finish_in(f: impl FnOnce() + Send + 'static, d: std::time::Duration) { - let (tx, rx) = futures::channel::oneshot::channel(); - std::thread::spawn(move || { - f(); - tx.send(()).unwrap(); - }); - let pool = tokio::runtime::Builder::new_current_thread() - .enable_time() - .build() - .unwrap(); - let _e = pool.handle().enter(); - pool.block_on(tokio::time::timeout(d, rx)).unwrap().unwrap(); - } - #[test] #[cfg(feature = "failpoints")] fn test_message_delay_and_exit() { use std::time::Duration; use super::ScanCmd; + use crate::{subscription_manager::spawn_executors, utils::CallbackWaitGroup}; + + fn should_finish_in(f: impl FnOnce() + Send + 'static, d: std::time::Duration) { + let (tx, rx) = futures::channel::oneshot::channel(); + std::thread::spawn(move || { + f(); + tx.send(()).unwrap(); + }); + let pool = tokio::runtime::Builder::new_current_thread() + .enable_time() + .build() + .unwrap(); + let _e = pool.handle().enter(); + pool.block_on(tokio::time::timeout(d, rx)).unwrap().unwrap(); + } let pool = spawn_executors(NoopInitialScan, 1); let wg = CallbackWaitGroup::new(); diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index f868b4bfc94..4d935ad4819 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -132,7 +132,7 @@ pub struct Batch { } impl Batch { - /// Create a a batch with given batch size. + /// Creates a batch with given batch size. pub fn with_capacity(cap: usize) -> Batch { Batch { normals: Vec::with_capacity(cap), @@ -163,15 +163,16 @@ impl Batch { self.control.take(); } - /// Put back the FSM located at index. + /// Releases the ownership of `fsm` so that it can be scheduled in another + /// poller. /// - /// Only when channel length is larger than `checked_len` will trigger - /// further notification. This function may fail if channel length is - /// larger than the given value before FSM is released. - fn release(&mut self, mut fsm: NormalFsm, checked_len: usize) -> Option> { + /// When pending messages of the FSM is different than `expected_len`, + /// attempts to schedule it in this poller again. Returns the `fsm` if the + /// re-scheduling suceeds. + fn release(&mut self, mut fsm: NormalFsm, expected_len: usize) -> Option> { let mailbox = fsm.take_mailbox().unwrap(); mailbox.release(fsm.fsm); - if mailbox.len() == checked_len { + if mailbox.len() == expected_len { None } else { match mailbox.take_fsm() { @@ -186,7 +187,7 @@ impl Batch { } } - /// Remove the normal FSM located at `index`. + /// Removes the normal FSM. /// /// This method should only be called when the FSM is stopped. /// If there are still messages in channel, the FSM is untouched and @@ -204,18 +205,11 @@ impl Batch { } } - /// Schedule the normal FSM located at `index`. - /// - /// If `inplace`, the relative position of all fsm will not be changed; - /// otherwise, the fsm will be popped and the last fsm will be swap in - /// to reduce memory copy. - pub fn schedule(&mut self, router: &BatchRouter, index: usize, inplace: bool) { + /// Schedules the normal FSM located at `index`. + pub fn schedule(&mut self, router: &BatchRouter, index: usize) { let to_schedule = match self.normals[index].take() { Some(f) => f, None => { - if !inplace { - self.normals.swap_remove(index); - } return; } }; @@ -232,12 +226,19 @@ impl Batch { // failed to reschedule f.policy.take(); self.normals[index] = res; - } else if !inplace { + } + } + + /// Reclaims the slot storage if there is no FSM located at `index`. It will + /// alter the positions of some other FSMs with index larger than `index`. + #[inline] + pub fn swap_reclaim(&mut self, index: usize) { + if self.normals[index].is_none() { self.normals.swap_remove(index); } } - /// Same as `release`, but working on control FSM. + /// Same as [`release`], but works with control FSM. pub fn release_control(&mut self, control_box: &BasicMailbox, checked_len: usize) -> bool { let s = self.control.take().unwrap(); control_box.release(s); @@ -254,7 +255,7 @@ impl Batch { } } - /// Same as `remove`, but working on control FSM. + /// Same as [`remove`], but works with control FSM. pub fn remove_control(&mut self, control_box: &BasicMailbox) { if control_box.is_empty() { let s = self.control.take().unwrap(); @@ -265,14 +266,14 @@ impl Batch { /// The result for `PollHandler::handle_control`. pub enum HandleResult { - /// The Fsm still needs to be processed. + /// The FSM still needs to be handled in the next run. KeepProcessing, - /// The Fsm should stop at the progress. + /// The FSM should stop at the progress. StopAt { - /// The count of messages that have been acknowledged by handler. The - /// fsm should be released until new messages arrive. + /// The amount of messages acknowledged by the handler. The FSM + /// should be released unless new messages arrive. progress: usize, - /// Whether the fsm should be released before `end`. + /// Whether the FSM should be passed in to `end` call. skip_end: bool, }, } @@ -284,9 +285,10 @@ impl HandleResult { } } -/// A handler that poll all FSM in ready. +/// A handler that polls all FSMs in ready. +/// +/// A general process works like the following: /// -/// A General process works like following: /// ```text /// loop { /// begin @@ -294,34 +296,34 @@ impl HandleResult { /// handle_control /// foreach ready normal: /// handle_normal +/// light_end /// end /// } /// ``` /// -/// Note that, every poll thread has its own handler, which doesn't have to be -/// Sync. +/// A [`PollHandler`] doesn't have to be [`Sync`] because each poll thread has +/// its own handler. pub trait PollHandler: Send + 'static { /// This function is called at the very beginning of every round. fn begin(&mut self, _batch_size: usize, update_cfg: F) where for<'a> F: FnOnce(&'a Config); - /// This function is called when handling readiness for control FSM. + /// This function is called when the control FSM is ready. /// - /// If returned value is Some, then it represents a length of channel. This - /// function will only be called for the same fsm after channel's length is - /// larger than the value. If it returns None, then this function will - /// still be called for the same FSM in the next loop unless the FSM is - /// stopped. + /// If `Some(len)` is returned, this function will not be called again until + /// there are more than `len` pending messages in `control` FSM. + /// + /// If `None` is returned, this function will be called again with the same + /// FSM `control` in the next round, unless it is stopped. fn handle_control(&mut self, control: &mut C) -> Option; - /// This function is called when handling readiness for normal FSM. - /// - /// The returned value is handled in the same way as `handle_control`. + /// This function is called when some normal FSMs are ready. fn handle_normal(&mut self, normal: &mut impl DerefMut) -> HandleResult; - /// This function is called after `handle_normal` is called for all fsm and - /// before calling `end`. The function is expected to run lightweight work. + /// This function is called after [`handle_normal`] is called for all FSMs + /// and before calling [`end`]. The function is expected to run lightweight + /// works. fn light_end(&mut self, _batch: &mut [Option>]) {} /// This function is called at the end of every round. @@ -383,7 +385,8 @@ impl> Poller { !batch.is_empty() } - // Poll for readiness and forward to handler. Remove stale peer if necessary. + /// Polls for readiness and forwards them to handler. Removes stale peers if + /// necessary. pub fn poll(&mut self) { fail_point!("poll"); let mut batch = Batch::with_capacity(self.max_batch_size); @@ -391,7 +394,7 @@ impl> Poller { let mut to_skip_end = Vec::with_capacity(self.max_batch_size); // Fetch batch after every round is finished. It's helpful to protect regions - // from becoming hungry if some regions are hot points. Since we fetch new fsm + // from becoming hungry if some regions are hot points. Since we fetch new FSM // every time calling `poll`, we do not need to configure a large value for // `self.max_batch_size`. let mut run = true; @@ -400,7 +403,7 @@ impl> Poller { // overhead max size of batch. It's helpful to protect regions from becoming // hungry if some regions are hot points. let mut max_batch_size = std::cmp::max(self.max_batch_size, batch.normals.len()); - // update some online config if needed. + // Update some online config if needed. { // TODO: rust 2018 does not support capture disjoint field within a closure. // See https://github.com/rust-lang/rust/issues/53488 for more details. @@ -457,9 +460,11 @@ impl> Poller { if let Ok(fsm) = self.fsm_receiver.try_recv() { run = batch.push(fsm); } - // If we receive a ControlFsm, break this cycle and call `end`. Because - // ControlFsm may change state of the handler, we shall deal with it immediately - // after calling `begin` of `Handler`. + // When `fsm_cnt >= batch.normals.len()`: + // - No more FSMs in `fsm_receiver`. + // - We receive a control FSM. Break the loop because ControlFsm may change + // state of the handler, we shall deal with it immediately after calling + // `begin` of `Handler`. if !run || fsm_cnt >= batch.normals.len() { break; } @@ -478,17 +483,19 @@ impl> Poller { fsm_cnt += 1; } self.handler.light_end(&mut batch.normals); - for offset in &to_skip_end { - batch.schedule(&self.router, *offset, true); + for index in &to_skip_end { + batch.schedule(&self.router, *index); } to_skip_end.clear(); self.handler.end(&mut batch.normals); - // Because release use `swap_remove` internally, so using pop here - // to remove the correct FSM. - while let Some(r) = reschedule_fsms.pop() { - batch.schedule(&self.router, r, false); + // Iterate larger index first, so that `swap_reclaim` won't affect other FSMs + // in the list. + for index in reschedule_fsms.iter().rev() { + batch.schedule(&self.router, *index); + batch.swap_reclaim(*index); } + reschedule_fsms.clear(); } if let Some(fsm) = batch.control.take() { self.router.control_scheduler.schedule(fsm); @@ -521,9 +528,9 @@ pub trait HandlerBuilder { /// A system that can poll FSMs concurrently and in batch. /// -/// To use the system, two type of FSMs and their PollHandlers need -/// to be defined: Normal and Control. Normal FSM handles the general -/// task while Control FSM creates normal FSM instances. +/// To use the system, two type of FSMs and their PollHandlers need to be +/// defined: Normal and Control. Normal FSM handles the general task while +/// Control FSM creates normal FSM instances. pub struct BatchSystem { name_prefix: Option, router: BatchRouter, @@ -694,7 +701,8 @@ pub type BatchRouter = Router, ControlSchedule /// Create a batch system with the given thread name prefix and pool size. /// -/// `sender` and `controller` should be paired. +/// `sender` and `controller` should be paired: all messages sent on the +/// `sender` will become available to the `controller`. pub fn create_system( cfg: &Config, sender: mpsc::LooseBoundedSender, diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 6fb4fe91539..09e32333c96 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -12,44 +12,37 @@ use std::{ use crate::mailbox::BasicMailbox; -// The FSM is notified. -const NOTIFYSTATE_NOTIFIED: usize = 0; -// The FSM is idle. -const NOTIFYSTATE_IDLE: usize = 1; -// The FSM is expected to be dropped. -const NOTIFYSTATE_DROP: usize = 2; - #[derive(Clone, Copy, Debug, PartialEq)] pub enum Priority { Low, Normal, } -/// `FsmScheduler` schedules `Fsm` for later handles. +/// `FsmScheduler` schedules `Fsm` for later handling. pub trait FsmScheduler { type Fsm: Fsm; - /// Schedule a Fsm for later handles. + /// Schedule a Fsm for later handling. fn schedule(&self, fsm: Box); /// Shutdown the scheduler, which indicates that resources like /// background thread pool should be released. fn shutdown(&self); } -/// A Fsm is a finite state machine. It should be able to be notified for +/// A `Fsm` is a finite state machine. It should be able to be notified for /// updating internal state according to incoming messages. pub trait Fsm { type Message: Send; fn is_stopped(&self) -> bool; - /// Set a mailbox to Fsm, which should be used to send message to itself. + /// Set a mailbox to FSM, which should be used to send message to itself. fn set_mailbox(&mut self, _mailbox: Cow<'_, BasicMailbox>) where Self: Sized, { } - /// Take the mailbox from Fsm. Implementation should ensure there will be + /// Take the mailbox from FSM. Implementation should ensure there will be /// no reference to mailbox after calling this method. fn take_mailbox(&mut self) -> Option> where @@ -63,17 +56,30 @@ pub trait Fsm { } } +/// A holder of FSM. +/// +/// There are three possible states: +/// +/// 1. NOTIFYSTATE_NOTIFIED: The FSM is taken by an external executor. `data` +/// holds a null pointer. +/// 2. NOTIFYSTATE_IDLE: No actor is using the FSM. `data` owns the FSM. +/// 3. NOTIFYSTATE_DROP: The FSM is dropped. `data` holds a null pointer. pub struct FsmState { status: AtomicUsize, data: AtomicPtr, + /// A counter shared with other `FsmState`s. state_cnt: Arc, } impl FsmState { + const NOTIFYSTATE_NOTIFIED: usize = 0; + const NOTIFYSTATE_IDLE: usize = 1; + const NOTIFYSTATE_DROP: usize = 2; + pub fn new(data: Box, state_cnt: Arc) -> FsmState { state_cnt.fetch_add(1, Ordering::Relaxed); FsmState { - status: AtomicUsize::new(NOTIFYSTATE_IDLE), + status: AtomicUsize::new(Self::NOTIFYSTATE_IDLE), data: AtomicPtr::new(Box::into_raw(data)), state_cnt, } @@ -82,8 +88,8 @@ impl FsmState { /// Take the fsm if it's IDLE. pub fn take_fsm(&self) -> Option> { let res = self.status.compare_exchange( - NOTIFYSTATE_IDLE, - NOTIFYSTATE_NOTIFIED, + Self::NOTIFYSTATE_IDLE, + Self::NOTIFYSTATE_NOTIFIED, Ordering::AcqRel, Ordering::Acquire, ); @@ -99,7 +105,7 @@ impl FsmState { } } - /// Notify fsm via a `FsmScheduler`. + /// Notifies FSM via a `FsmScheduler`. #[inline] pub fn notify>( &self, @@ -115,25 +121,25 @@ impl FsmState { } } - /// Put the owner back to the state. + /// Releases the FSM ownership back to this state. /// /// It's not required that all messages should be consumed before - /// releasing a fsm. However, a fsm is guaranteed to be notified only + /// releasing a FSM. However, a FSM is guaranteed to be notified only /// when new messages arrives after it's released. #[inline] pub fn release(&self, fsm: Box) { let previous = self.data.swap(Box::into_raw(fsm), Ordering::AcqRel); - let mut previous_status = NOTIFYSTATE_NOTIFIED; + let mut previous_status = Self::NOTIFYSTATE_NOTIFIED; if previous.is_null() { let res = self.status.compare_exchange( - NOTIFYSTATE_NOTIFIED, - NOTIFYSTATE_IDLE, + Self::NOTIFYSTATE_NOTIFIED, + Self::NOTIFYSTATE_IDLE, Ordering::AcqRel, Ordering::Acquire, ); previous_status = match res { Ok(_) => return, - Err(NOTIFYSTATE_DROP) => { + Err(Self::NOTIFYSTATE_DROP) => { let ptr = self.data.swap(ptr::null_mut(), Ordering::AcqRel); unsafe { Box::from_raw(ptr) }; return; @@ -144,11 +150,11 @@ impl FsmState { panic!("invalid release state: {:?} {}", previous, previous_status); } - /// Clear the fsm. + /// Clears the FSM. #[inline] pub fn clear(&self) { - match self.status.swap(NOTIFYSTATE_DROP, Ordering::AcqRel) { - NOTIFYSTATE_NOTIFIED | NOTIFYSTATE_DROP => return, + match self.status.swap(Self::NOTIFYSTATE_DROP, Ordering::AcqRel) { + Self::NOTIFYSTATE_NOTIFIED | Self::NOTIFYSTATE_DROP => return, _ => {} } diff --git a/components/batch-system/src/mailbox.rs b/components/batch-system/src/mailbox.rs index 219edb2e2af..5afddf73c14 100644 --- a/components/batch-system/src/mailbox.rs +++ b/components/batch-system/src/mailbox.rs @@ -13,12 +13,21 @@ use crate::fsm::{Fsm, FsmScheduler, FsmState}; /// A basic mailbox. /// -/// Every mailbox should have one and only one owner, who will receive all -/// messages sent to this mailbox. +/// A mailbox holds an FSM owner, and the sending end of a channel to send +/// messages to that owner. Multiple producers share the same mailbox to +/// communicate with a FSM. /// -/// When a message is sent to a mailbox, its owner will be checked whether it's -/// idle. An idle owner will be scheduled via `FsmScheduler` immediately, which -/// will drive the fsm to poll for messages. +/// The mailbox's FSM owner needs to be scheduled to a [`Poller`] to handle its +/// pending messages. Therefore, the producer of messages also needs to provide +/// a channel to a poller ([`FsmScheduler`]), so that the mailbox can schedule +/// its FSM owner. When a message is sent to a mailbox, the mailbox will check +/// whether its FSM owner is idle, i.e. not already taken and scheduled. If the +/// FSM is idle, it will be scheduled immediately. By doing so, the mailbox +/// temporarily transfers its ownership of the FSM to the poller. The +/// implementation must make sure the same FSM is returned afterwards via the +/// [`release`] method. +/// +/// [`Poller`]: crate::batch::Poller pub struct BasicMailbox { sender: mpsc::LooseBoundedSender, state: Arc>, @@ -103,7 +112,7 @@ impl Clone for BasicMailbox { } } -/// A more high level mailbox. +/// A more high level mailbox that is paired with a [`FsmScheduler`]. pub struct Mailbox where Owner: Fsm, diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 9975d66dfdc..8b0936a9faa 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -39,17 +39,20 @@ enum CheckDoResult { Valid(T), } -/// Router route messages to its target mailbox. -/// -/// Every fsm has a mailbox, hence it's necessary to have an address book -/// that can deliver messages to specified fsm, which is exact router. +/// Router routes messages to its target FSM's mailbox. /// /// In our abstract model, every batch system has two different kind of -/// fsms. First is normal fsm, which does the common work like peers in a -/// raftstore model or apply delegate in apply model. Second is control fsm, +/// FSMs. First is normal FSM, which does the common work like peers in a +/// raftstore model or apply delegate in apply model. Second is control FSM, /// which does some work that requires a global view of resources or creates -/// missing fsm for specified address. Normal fsm and control fsm can have -/// different scheduler, but this is not required. +/// missing FSM for specified address. +/// +/// There are one control FSM and multiple normal FSMs in a system. Each FSM +/// has its own mailbox. We maintain an address book to deliver messages to the +/// specified normal FSM. +/// +/// Normal FSM and control FSM can have different scheduler, but this is not +/// required. pub struct Router { normals: Arc>>, caches: Cell>>, @@ -60,8 +63,9 @@ pub struct Router { pub(crate) normal_scheduler: Ns, pub(crate) control_scheduler: Cs, - // Count of Mailboxes that is not destroyed. - // Added when a Mailbox created, and subtracted it when a Mailbox destroyed. + // Number of active mailboxes. + // Added when a mailbox is created, and subtracted it when a mailbox is + // destroyed. state_cnt: Arc, // Indicates the router is shutdown down or not. shutdown: Arc, @@ -198,7 +202,7 @@ where } } - /// Get the mailbox of control fsm. + /// Get the mailbox of control FSM. pub fn control_mailbox(&self) -> Mailbox { Mailbox::new(self.control_box.clone(), self.control_scheduler.clone()) } @@ -269,7 +273,7 @@ where } } - /// Force sending message to control fsm. + /// Force sending message to control FSM. #[inline] pub fn send_control(&self, msg: C::Message) -> Result<(), TrySendError> { match self.control_box.try_send(msg, &self.control_scheduler) { @@ -284,7 +288,7 @@ where } } - /// Try to notify all normal fsm a message. + /// Try to notify all normal FSMs a message. pub fn broadcast_normal(&self, mut msg_gen: impl FnMut() -> N::Message) { let mailboxes = self.normals.lock().unwrap(); for mailbox in mailboxes.map.values() { @@ -292,7 +296,7 @@ where } } - /// Try to notify all fsm that the cluster is being shutdown. + /// Try to notify all FSMs that the cluster is being shutdown. pub fn broadcast_shutdown(&self) { info!("broadcasting shutdown"); self.shutdown.store(true, Ordering::SeqCst); diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 25499d89c61..991ae154427 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -474,7 +474,6 @@ impl<'client> S3Uploader<'client> { sleep(delay_duration).await; } - #[cfg(feature = "failpoints")] fail_point!("s3_put_obj_err", |_| { Err(RusotoError::ParseError("failed to put object".to_owned())) }); diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index ee063fc15dd..9c1f60ba947 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -26,9 +26,9 @@ use crate::{ Error, PeerMsg, PeerTick, Result, StoreMsg, }; -/// A per-thread context used for handling raft messages. +/// A per-thread context shared by the [`StoreFsm`] and multiple [`PeerFsm`]s. pub struct StoreContext { - /// A logger without any KV. It's clean for creating new PeerFSM. + /// A logger without any KV. It's clean for creating new PeerFsm. pub logger: Logger, /// The transport for sending messages to peers on other stores. pub trans: T, @@ -53,12 +53,20 @@ impl StoreContext { } } -/// Poller for polling raft state machines. +/// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. +/// +/// It is responsible for: +/// +/// - Keeping the local [`StoreContext`] up-to-date. +/// - Receiving and sending messages in and out of these FSMs. struct StorePoller { - store_msg_buf: Vec, - peer_msg_buf: Vec>, poll_ctx: StoreContext, cfg_tracker: Tracker, + /// Buffers to hold in-coming messages. + store_msg_buf: Vec, + peer_msg_buf: Vec>, + /// These fields controls the timing of flushing messages generated by + /// FSMs. last_flush_time: TiInstant, need_flush_events: bool, } @@ -66,10 +74,10 @@ struct StorePoller { impl StorePoller { pub fn new(poll_ctx: StoreContext, cfg_tracker: Tracker) -> Self { Self { - store_msg_buf: Vec::new(), - peer_msg_buf: Vec::new(), poll_ctx, cfg_tracker, + store_msg_buf: Vec::new(), + peer_msg_buf: Vec::new(), last_flush_time: TiInstant::now(), need_flush_events: false, } @@ -106,8 +114,8 @@ impl PollHandler F: FnOnce(&'a batch_system::Config), { - let cfg = self.cfg_tracker.any_new().map(|c| c.clone()); - if let Some(cfg) = cfg { + // Apply configuration changes. + if let Some(cfg) = self.cfg_tracker.any_new().map(|c| c.clone()) { let last_messages_per_tick = self.messages_per_tick(); self.poll_ctx.cfg = cfg; if self.poll_ctx.cfg.messages_per_tick != last_messages_per_tick { @@ -117,31 +125,28 @@ impl PollHandler Option { + fn handle_control(&mut self, fsm: &mut StoreFsm) -> Option { debug_assert!(self.store_msg_buf.is_empty()); - let received_cnt = store.recv(&mut self.store_msg_buf); + let received_cnt = fsm.recv(&mut self.store_msg_buf); let expected_msg_count = if received_cnt == self.messages_per_tick() { None } else { Some(0) }; - let mut delegate = StoreFsmDelegate::new(store, &mut self.poll_ctx); + let mut delegate = StoreFsmDelegate::new(fsm, &mut self.poll_ctx); delegate.handle_msgs(&mut self.store_msg_buf); expected_msg_count } - fn handle_normal( - &mut self, - peer: &mut impl DerefMut>, - ) -> HandleResult { + fn handle_normal(&mut self, fsm: &mut impl DerefMut>) -> HandleResult { debug_assert!(self.peer_msg_buf.is_empty()); - let received_cnt = peer.recv(&mut self.peer_msg_buf); + let received_cnt = fsm.recv(&mut self.peer_msg_buf); let handle_result = if received_cnt == self.messages_per_tick() { HandleResult::KeepProcessing } else { HandleResult::stop_at(0, false) }; - let mut delegate = PeerFsmDelegate::new(peer, &mut self.poll_ctx); + let mut delegate = PeerFsmDelegate::new(fsm, &mut self.poll_ctx); delegate.handle_msgs(&mut self.peer_msg_buf); handle_result } @@ -204,7 +209,7 @@ impl StorePollerBuilder { } } - /// Initializes all the existing raft machines and cleanup stale tablets. + /// Initializes all the existing raft machines and cleans up stale tablets. fn init(&self) -> Result>> { let mut regions = HashMap::default(); let cfg = self.cfg.value(); @@ -262,7 +267,7 @@ where } } -/// The system used for poll raft activities. +/// The system used for polling Raft activities. pub struct StoreSystem { system: BatchSystem, StoreFsm>, apply_router: ApplyRouter, diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 88d7b479e49..a8fb67aa121 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -76,7 +76,7 @@ impl Fsm for PeerFsm { self.is_stopped } - /// Set a mailbox to Fsm, which should be used to send message to itself. + /// Set a mailbox to FSM, which should be used to send message to itself. fn set_mailbox(&mut self, mailbox: Cow<'_, BasicMailbox>) where Self: Sized, @@ -84,7 +84,7 @@ impl Fsm for PeerFsm { self.mailbox = Some(mailbox.into_owned()); } - /// Take the mailbox from Fsm. Implementation should ensure there will be + /// Take the mailbox from FSM. Implementation should ensure there will be /// no reference to mailbox after calling this method. fn take_mailbox(&mut self) -> Option> where diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index e2db05db143..0d97137bab1 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3594,19 +3594,21 @@ where } } - #[allow(unused_mut, clippy::redundant_closure_call)] fn handle_snapshot(&mut self, apply_ctx: &mut ApplyContext, snap_task: GenSnapTask) { if self.delegate.pending_remove || self.delegate.stopped { return; } let applied_index = self.delegate.apply_state.get_applied_index(); - let mut need_sync = apply_ctx + let need_sync = apply_ctx .apply_res .iter() .any(|res| res.region_id == self.delegate.region_id()) && self.delegate.last_flush_applied_index != applied_index; - (|| fail_point!("apply_on_handle_snapshot_sync", |_| { need_sync = true }))(); - if need_sync { + let force_sync_fp = || { + fail_point!("apply_on_handle_snapshot_sync", |_| true); + false + }; + if need_sync || force_sync_fp() { if apply_ctx.timer.is_none() { apply_ctx.timer = Some(Instant::now_coarse()); } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 3ae6b74a13c..e4707947fbb 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -572,7 +572,7 @@ where self.stopped } - /// Set a mailbox to Fsm, which should be used to send message to itself. + /// Set a mailbox to FSM, which should be used to send message to itself. #[inline] fn set_mailbox(&mut self, mailbox: Cow<'_, BasicMailbox>) where @@ -581,7 +581,7 @@ where self.mailbox = Some(mailbox.into_owned()); } - /// Take the mailbox from Fsm. Implementation should ensure there will be + /// Take the mailbox from FSM. Implementation should ensure there will be /// no reference to mailbox after calling this method. #[inline] fn take_mailbox(&mut self) -> Option> @@ -631,6 +631,7 @@ where .propose .request_wait_time .observe(duration_to_sec(cmd.send_time.saturating_elapsed()) as f64); + if let Some(Err(e)) = cmd.extra_opts.deadline.map(|deadline| deadline.check()) { cmd.callback.invoke_with_response(new_error(e.into())); continue; @@ -648,14 +649,14 @@ where { self.fsm.batch_req_builder.add(cmd, req_size); if self.fsm.batch_req_builder.should_finish(&self.ctx.cfg) { - self.propose_batch_raft_command(true); + self.propose_pending_batch_raft_command(); } } else { self.propose_raft_command( cmd.request, cmd.callback, cmd.extra_opts.disk_full_opt, - ) + ); } } PeerMsg::Tick(tick) => self.on_tick(tick), @@ -688,53 +689,57 @@ where } } } + self.on_loop_finished(); + } + + #[inline] + fn on_loop_finished(&mut self) { + let ready_concurrency = self.ctx.cfg.cmd_batch_concurrent_ready_max_count; + let should_propose = self.ctx.sync_write_worker.is_some() + || ready_concurrency == 0 + || self.fsm.peer.unpersisted_ready_len() < ready_concurrency; + let force_delay_fp = || { + fail_point!( + "force_delay_propose_batch_raft_command", + self.ctx.sync_write_worker.is_none(), + |_| true + ); + false + }; // Propose batch request which may be still waiting for more raft-command - if self.ctx.sync_write_worker.is_some() { - self.propose_batch_raft_command(true); - } else { - self.propose_batch_raft_command(false); - self.check_batch_cmd_and_proposed_cb(); + if should_propose && !force_delay_fp() { + self.propose_pending_batch_raft_command(); + } else if self.fsm.batch_req_builder.has_proposed_cb + && self.fsm.batch_req_builder.propose_checked.is_none() + && let Some(cmd) = self.fsm.batch_req_builder.request.take() + { + // We are delaying these requests to next loop. Try to fulfill their + // proposed callback early. + self.fsm.batch_req_builder.propose_checked = Some(false); + if let Ok(None) = self.pre_propose_raft_command(&cmd) { + if self.fsm.peer.will_likely_propose(&cmd) { + self.fsm.batch_req_builder.propose_checked = Some(true); + for cb in &mut self.fsm.batch_req_builder.callbacks { + cb.invoke_proposed(); + } + } + } + self.fsm.batch_req_builder.request = Some(cmd); } } - fn propose_batch_raft_command(&mut self, force: bool) { + /// Flushes all pending raft commands for immediate execution. + #[inline] + fn propose_pending_batch_raft_command(&mut self) { if self.fsm.batch_req_builder.request.is_none() { return; } - if !force - && self.ctx.cfg.cmd_batch_concurrent_ready_max_count != 0 - && self.fsm.peer.unpersisted_ready_len() - >= self.ctx.cfg.cmd_batch_concurrent_ready_max_count - { - return; - } - fail_point!("propose_batch_raft_command", !force, |_| {}); let (request, callback) = self .fsm .batch_req_builder .build(&mut self.ctx.raft_metrics) .unwrap(); - self.propose_raft_command_internal(request, callback, DiskFullOpt::NotAllowedOnFull) - } - - fn check_batch_cmd_and_proposed_cb(&mut self) { - if self.fsm.batch_req_builder.request.is_none() - || !self.fsm.batch_req_builder.has_proposed_cb - || self.fsm.batch_req_builder.propose_checked.is_some() - { - return; - } - let cmd = self.fsm.batch_req_builder.request.take().unwrap(); - self.fsm.batch_req_builder.propose_checked = Some(false); - if let Ok(None) = self.pre_propose_raft_command(&cmd) { - if self.fsm.peer.will_likely_propose(&cmd) { - self.fsm.batch_req_builder.propose_checked = Some(true); - for cb in &mut self.fsm.batch_req_builder.callbacks { - cb.invoke_proposed(); - } - } - } - self.fsm.batch_req_builder.request = Some(cmd); + self.propose_raft_command_internal(request, callback, DiskFullOpt::NotAllowedOnFull); } fn on_update_replication_mode(&mut self) { @@ -3016,9 +3021,7 @@ where ); } None => { - if self.fsm.batch_req_builder.request.is_some() { - self.propose_batch_raft_command(true); - } + self.propose_pending_batch_raft_command(); if self.propose_locks_before_transfer_leader(msg) { // If some pessimistic locks are just proposed, we propose another // TransferLeader command instead of transferring leader immediately. @@ -4796,20 +4799,17 @@ where } } - /// Propose batched raft commands(if any) first, then propose the given raft - /// command. + /// Proposes pending batch raft commands (if any), then proposes the + /// provided raft command. + #[inline] fn propose_raft_command( &mut self, msg: RaftCmdRequest, cb: Callback, diskfullopt: DiskFullOpt, ) { - if let Some((request, callback)) = - self.fsm.batch_req_builder.build(&mut self.ctx.raft_metrics) - { - self.propose_raft_command_internal(request, callback, DiskFullOpt::NotAllowedOnFull); - } - + // Propose pending commands before processing new one. + self.propose_pending_batch_raft_command(); self.propose_raft_command_internal(msg, cb, diskfullopt); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 99287ca493c..17fe22926d1 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -3385,16 +3385,16 @@ where true } - /// Propose a request. + /// Proposes a request. /// - /// Return true means the request has been proposed successfully. + /// Return whether the request has been proposed successfully. pub fn propose( &mut self, ctx: &mut PollContext, mut cb: Callback, req: RaftCmdRequest, mut err_resp: RaftCmdResponse, - disk_full_opt: DiskFullOpt, + mut disk_full_opt: DiskFullOpt, ) -> bool { if self.pending_remove { return false; @@ -3421,53 +3421,11 @@ where } Ok(RequestPolicy::ProposeNormal) => { // For admin cmds, only region split/merge comes here. - let mut stores = Vec::new(); - let mut opt = disk_full_opt; - let mut maybe_transfer_leader = false; if req.has_admin_request() { - opt = DiskFullOpt::AllowedOnAlmostFull; - } - if self.check_proposal_normal_with_disk_usage( - ctx, - opt, - &mut stores, - &mut maybe_transfer_leader, - ) { - self.propose_normal(ctx, req) - } else { - // If leader node is disk full, try to transfer leader to a node with disk usage - // normal to keep write availability not downback. - // if majority node is disk full, to transfer leader or not is not necessary. - // Note: Need to exclude learner node. - if maybe_transfer_leader && !self.disk_full_peers.majority { - let target_peer = self - .get_store() - .region() - .get_peers() - .iter() - .find(|x| { - !self.disk_full_peers.has(x.get_id()) - && x.get_id() != self.peer.get_id() - && !self.down_peer_ids.contains(&x.get_id()) - && !matches!(x.get_role(), PeerRole::Learner) - }) - .cloned(); - if let Some(p) = target_peer { - debug!( - "try to transfer leader because of current leader disk full: region id = {}, peer id = {}; target peer id = {}", - self.region_id, - self.peer.get_id(), - p.get_id() - ); - self.pre_transfer_leader(&p); - } - } - let errmsg = format!( - "propose failed: tikv disk full, cmd diskFullOpt={:?}, leader diskUsage={:?}", - disk_full_opt, ctx.self_disk_usage - ); - Err(Error::DiskFull(stores, errmsg)) + disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; } + self.check_normal_proposal_with_disk_full_opt(ctx, disk_full_opt) + .and_then(|_| self.propose_normal(ctx, req)) } Ok(RequestPolicy::ProposeConfChange) => self.propose_conf_change(ctx, &req), Err(e) => Err(e), @@ -4837,56 +4795,74 @@ where // Check disk usages for the peer itself and other peers in the raft group. // The return value indicates whether the proposal is allowed or not. - fn check_proposal_normal_with_disk_usage( + fn check_normal_proposal_with_disk_full_opt( &mut self, ctx: &mut PollContext, disk_full_opt: DiskFullOpt, - disk_full_stores: &mut Vec, - maybe_transfer_leader: &mut bool, - ) -> bool { - // check self disk status. - let allowed = match ctx.self_disk_usage { + ) -> Result<()> { + let leader_allowed = match ctx.self_disk_usage { DiskUsage::Normal => true, DiskUsage::AlmostFull => !matches!(disk_full_opt, DiskFullOpt::NotAllowedOnFull), DiskUsage::AlreadyFull => false, }; - - if !allowed { + let mut disk_full_stores = Vec::new(); + if !leader_allowed { disk_full_stores.push(ctx.store.id); - *maybe_transfer_leader = true; - return false; - } - - // If all followers diskusage normal, then allowed. - if self.disk_full_peers.is_empty() { - return true; - } - - for peer in self.get_store().region().get_peers() { - let (peer_id, store_id) = (peer.get_id(), peer.get_store_id()); - if self.disk_full_peers.peers.get(&peer_id).is_some() { - disk_full_stores.push(store_id); + // Try to transfer leader to a node with disk usage normal to maintain write + // availability. If majority node is disk full, to transfer leader or not is not + // necessary. Note: Need to exclude learner node. + if !self.disk_full_peers.majority { + let target_peer = self + .get_store() + .region() + .get_peers() + .iter() + .find(|x| { + !self.disk_full_peers.has(x.get_id()) + && x.get_id() != self.peer.get_id() + && !self.down_peer_ids.contains(&x.get_id()) + && !matches!(x.get_role(), PeerRole::Learner) + }) + .cloned(); + if let Some(p) = target_peer { + debug!( + "try to transfer leader because of current leader disk full"; + "region_id" => self.region_id, + "peer_id" => self.peer.get_id(), + "target_peer_id" => p.get_id(), + ); + self.pre_transfer_leader(&p); + } + } + } else { + // Check followers. + if self.disk_full_peers.is_empty() { + return Ok(()); + } + if !self.dangerous_majority_set { + if !self.disk_full_peers.majority { + return Ok(()); + } + // Majority peers are in disk full status but the request carries a special + // flag. + if matches!(disk_full_opt, DiskFullOpt::AllowedOnAlmostFull) + && self.disk_full_peers.peers.values().any(|x| x.1) + { + return Ok(()); + } + } + for peer in self.get_store().region().get_peers() { + let (peer_id, store_id) = (peer.get_id(), peer.get_store_id()); + if self.disk_full_peers.peers.get(&peer_id).is_some() { + disk_full_stores.push(store_id); + } } } - - // if there are some peers with disk already full status in the majority set, - // should not allowed. - if self.dangerous_majority_set { - return false; - } - - if !self.disk_full_peers.majority { - return true; - } - - if matches!(disk_full_opt, DiskFullOpt::AllowedOnAlmostFull) - && self.disk_full_peers.peers.values().any(|x| x.1) - { - // Majority peers are in disk full status but the request carries a special - // flag. - return true; - } - false + let errmsg = format!( + "propose failed: tikv disk full, cmd diskFullOpt={:?}, leader diskUsage={:?}", + disk_full_opt, ctx.self_disk_usage + ); + Err(Error::DiskFull(disk_full_stores, errmsg)) } /// Check if the command will be likely to pass all the check and propose. @@ -5322,6 +5298,7 @@ where self.raft_group.raft.r.max_msg_size = ctx.cfg.raft_max_size_per_msg.0; } + #[inline] fn maybe_inject_propose_error( &self, #[allow(unused_variables)] req: &RaftCmdRequest, diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 97e8ee85d86..4ac03e2578b 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -17,7 +17,6 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine}; -#[cfg(feature = "failpoints")] use fail::fail_point; use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::{HealthService, ServingStatus}; @@ -439,7 +438,6 @@ const DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL: Duration = Duration::from_secs(1); const DEFAULT_COLLECT_TICK_INTERVAL: Duration = Duration::from_secs(1); fn default_collect_tick_interval() -> Duration { - #[cfg(feature = "failpoints")] fail_point!("mock_collect_tick_interval", |_| { Duration::from_millis(1) }); @@ -447,7 +445,6 @@ fn default_collect_tick_interval() -> Duration { } fn config(interval: Duration) -> Duration { - #[cfg(feature = "failpoints")] fail_point!("mock_min_resolved_ts_interval", |_| { Duration::from_millis(50) }); @@ -721,21 +718,18 @@ const HOTSPOT_REPORT_CAPACITY: usize = 1000; // TODO: support dynamic configure threshold in future. fn hotspot_key_report_threshold() -> u64 { - #[cfg(feature = "failpoints")] fail_point!("mock_hotspot_threshold", |_| { 0 }); HOTSPOT_KEY_RATE_THRESHOLD * 10 } fn hotspot_byte_report_threshold() -> u64 { - #[cfg(feature = "failpoints")] fail_point!("mock_hotspot_threshold", |_| { 0 }); HOTSPOT_BYTE_RATE_THRESHOLD * 10 } fn hotspot_query_num_report_threshold() -> u64 { - #[cfg(feature = "failpoints")] fail_point!("mock_hotspot_threshold", |_| { 0 }); HOTSPOT_QUERY_RATE_THRESHOLD * 10 diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 7c698905b72..fc984dd1a50 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -616,7 +616,6 @@ impl AutoSplitController { } fn is_grpc_poll_busy(&self, avg_grpc_thread_usage: f64) -> bool { - #[cfg(feature = "failpoints")] fail::fail_point!("mock_grpc_poll_is_not_busy", |_| { false }); if self.max_grpc_thread_count == 0 { return false; @@ -629,7 +628,6 @@ impl AutoSplitController { } fn is_unified_read_pool_busy(&self, unified_read_pool_thread_usage: f64) -> bool { - #[cfg(feature = "failpoints")] fail::fail_point!("mock_unified_read_pool_is_busy", |_| { true }); if self.max_unified_read_pool_thread_count == 0 { return false; @@ -644,7 +642,6 @@ impl AutoSplitController { } fn is_region_busy(&self, unified_read_pool_thread_usage: f64, region_cpu_usage: f64) -> bool { - #[cfg(feature = "failpoints")] fail::fail_point!("mock_region_is_busy", |_| { true }); if unified_read_pool_thread_usage <= 0.0 || !self.should_check_region_cpu() { return false; diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index bc0e8a59303..df1a18ab06d 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -1012,7 +1012,6 @@ where self.last_hash.1 as usize }; - #[allow(unused_mut)] let mut transport_on_send_store_fp = || { fail_point!( "transport_on_send_snapshot", diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 1068b35f8d5..9de8911754b 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -111,12 +111,12 @@ fn test_reject_proposal_during_region_split() { .unwrap_err(); // Try to put a key. - let propose_batch_raft_command_fp = "propose_batch_raft_command"; + let force_delay_propose_batch_raft_command_fp = "force_delay_propose_batch_raft_command"; let mut receivers = vec![]; for i in 0..2 { if i == 1 { // Test another path of calling proposed callback. - fail::cfg(propose_batch_raft_command_fp, "2*return").unwrap(); + fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"k1"); let (cb, cb_receivers) = make_cb(&write_req); @@ -190,12 +190,12 @@ fn test_reject_proposal_during_region_merge() { .unwrap_err(); // Try to put a key on the source region. - let propose_batch_raft_command_fp = "propose_batch_raft_command"; + let force_delay_propose_batch_raft_command_fp = "force_delay_propose_batch_raft_command"; let mut receivers = vec![]; for i in 0..2 { if i == 1 { // Test another path of calling proposed callback. - fail::cfg(propose_batch_raft_command_fp, "2*return").unwrap(); + fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"a"); let (cb, cb_receivers) = make_cb(&write_req); @@ -231,7 +231,7 @@ fn test_reject_proposal_during_region_merge() { for i in 0..2 { if i == 1 { // Test another path of calling proposed callback. - fail::cfg(propose_batch_raft_command_fp, "2*return").unwrap(); + fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"a"); let (cb, cb_receivers) = make_cb(&write_req); @@ -248,7 +248,7 @@ fn test_reject_proposal_during_region_merge() { for i in 0..2 { if i == 1 { // Test another path of calling proposed callback. - fail::cfg(propose_batch_raft_command_fp, "2*return").unwrap(); + fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"k"); let (cb, cb_receivers) = make_cb(&write_req); @@ -314,11 +314,11 @@ fn test_reject_proposal_during_rollback_region_merge() { // Write request is rejected because the source region is merging. // It's not handled by epoch checker now. - let propose_batch_raft_command_fp = "propose_batch_raft_command"; + let force_delay_propose_batch_raft_command_fp = "force_delay_propose_batch_raft_command"; for i in 0..2 { if i == 1 { // Test another path of calling proposed callback. - fail::cfg(propose_batch_raft_command_fp, "2*return").unwrap(); + fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"a"); let (cb, cb_receivers) = make_cb(&write_req); @@ -367,11 +367,11 @@ fn test_reject_proposal_during_leader_transfer() { sleep_ms(100); assert_ne!(cluster.leader_of_region(r).unwrap(), new_peer(2, 2)); - let propose_batch_raft_command_fp = "propose_batch_raft_command"; + let force_delay_propose_batch_raft_command_fp = "force_delay_propose_batch_raft_command"; for i in 0..2 { if i == 1 { // Test another path of calling proposed callback. - fail::cfg(propose_batch_raft_command_fp, "2*return").unwrap(); + fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"k"); let (cb, cb_receivers) = make_cb(&write_req); @@ -485,8 +485,8 @@ fn test_propose_before_transfer_leader() { cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k", b"v"); - let propose_batch_raft_command_fp = "propose_batch_raft_command"; - fail::cfg(propose_batch_raft_command_fp, "return").unwrap(); + let force_delay_propose_batch_raft_command_fp = "force_delay_propose_batch_raft_command"; + fail::cfg(force_delay_propose_batch_raft_command_fp, "return").unwrap(); let write_req = make_write_req(&mut cluster, b"k1"); let (cb, cb_receivers) = make_cb(&write_req); @@ -514,8 +514,8 @@ fn test_propose_before_split_and_merge() { cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k", b"v"); - let propose_batch_raft_command_fp = "propose_batch_raft_command"; - fail::cfg(propose_batch_raft_command_fp, "return").unwrap(); + let force_delay_propose_batch_raft_command_fp = "force_delay_propose_batch_raft_command"; + fail::cfg(force_delay_propose_batch_raft_command_fp, "return").unwrap(); let write_req = make_write_req(&mut cluster, b"k1"); let (cb, cb_receivers) = make_cb(&write_req); From 2780bbaf812d36b59e4586f2c6ce5b9c4d5ec03f Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 12 Aug 2022 18:38:44 +0800 Subject: [PATCH 0153/1149] engine: upgrade raft-engine to support log recycling (#13231) * Update dependency on RaftEngine for supporting to open `Recycle Log Files` feature. Signed-off-by: Lucasliang * Supply extra implementations to the ENV::APIs in raft_log_engine. Signed-off-by: lucasliang * Refine the code-path in engine.rs. Signed-off-by: lucasliang * Update Cargo.toml. Signed-off-by: lucasliang * Bugfix for lacking atomicities in the processing of `rename` and `reuse`. This commit includes: * Fix the bug of locking atocmicity in the operations of `rename` and `reuse`; * Meanwhile, the related callings of `link_file` have been enhanced with safety. Signed-off-by: lucasliang * Refine the format of annoations in etc/config-template.toml. Signed-off-by: lucasliang * Make the annotations of `reuse` and configurations in RaftEngine more readable. Signed-off-by: lucasliang * Refine the annotation in `reuse`. Signed-off-by: lucasliang * Remove unnecessary restraints of raft-engine lib in cargo.toml. Signed-off-by: lucasliang Signed-off-by: Lucasliang Signed-off-by: lucasliang Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- Cargo.lock | 52 +++++++++++++++++++-- components/raft_log_engine/src/engine.rs | 42 +++++++++++++++++ components/raftstore/src/store/snap.rs | 12 +++-- components/sst_importer/src/import_file.rs | 22 +++++++-- components/sst_importer/src/sst_importer.rs | 10 +++- etc/config-template.toml | 17 +++++++ 6 files changed, 141 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41a5df4c1ed..802a0e19487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1770,7 +1770,7 @@ dependencies = [ "serde", "slog", "slog-global", - "strum", + "strum 0.20.0", "tempfile", "thread_local", "tikv_alloc", @@ -2465,6 +2465,12 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "if_chain" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" + [[package]] name = "indexmap" version = "1.6.2" @@ -4036,7 +4042,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#7a436eae40a6b62371123c96941e058b7fe52b63" +source = "git+https://github.com/tikv/raft-engine.git#6a6fe3bd2e0a1ca0b4fc643800ddc93abe74cd87" dependencies = [ "byteorder", "crc32fast", @@ -4045,6 +4051,7 @@ dependencies = [ "fs2", "hashbrown 0.12.0", "hex 0.4.2", + "if_chain", "lazy_static", "libc 0.2.125", "log", @@ -4061,13 +4068,15 @@ dependencies = [ "rhai", "scopeguard", "serde", + "serde_repr", + "strum 0.24.1", "thiserror", ] [[package]] name = "raft-engine-ctl" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#7a436eae40a6b62371123c96941e058b7fe52b63" +source = "git+https://github.com/tikv/raft-engine.git#6a6fe3bd2e0a1ca0b4fc643800ddc93abe74cd87" dependencies = [ "clap 3.1.6", "env_logger", @@ -5001,6 +5010,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fe39d9fbb0ebf5eb2c7cb7e2a47e4f462fad1379f1166b8ae49ad9eae89a7ca" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_urlencoded" version = "0.7.0" @@ -5380,7 +5400,16 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c" dependencies = [ - "strum_macros", + "strum_macros 0.20.1", +] + +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros 0.24.2", ] [[package]] @@ -5395,6 +5424,19 @@ dependencies = [ "syn", ] +[[package]] +name = "strum_macros" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4faebde00e8ff94316c01800f9054fd2ba77d30d9e922541913051d1d978918b" +dependencies = [ + "heck 0.4.0", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "subtle" version = "2.3.0" @@ -6072,7 +6114,7 @@ dependencies = [ "slog", "slog-global", "sst_importer", - "strum", + "strum 0.20.0", "sysinfo", "tempfile", "test_sst_importer", diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 49183245785..dd7c222845c 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -194,6 +194,48 @@ impl FileSystem for ManagedFileSystem { self.base_file_system.delete(path) } + fn rename>(&self, src_path: P, dst_path: P) -> IoResult<()> { + if let Some(ref manager) = self.key_manager { + // Note: `rename` will reuse the old entryption info from `src_path`. + let src_str = src_path.as_ref().to_str().unwrap(); + let dst_str = dst_path.as_ref().to_str().unwrap(); + manager.link_file(src_str, dst_str)?; + let r = self + .base_file_system + .rename(src_path.as_ref(), dst_path.as_ref()); + let del_file = if r.is_ok() { src_str } else { dst_str }; + if let Err(e) = manager.delete_file(del_file) { + warn!("fail to remove encryption metadata during 'rename'"; "err" => ?e); + } + r + } else { + self.base_file_system.rename(src_path, dst_path) + } + } + + fn reuse>(&self, src_path: P, dst_path: P) -> IoResult<()> { + if let Some(ref manager) = self.key_manager { + // Note: In contrast to `rename`, `reuse` will make sure the encryption + // metadata is properly updated by rotating the encryption key for safety, + // when encryption flag is true. It won't rewrite the data blocks with + // the updated encryption metadata. Therefore, the old encrypted data + // won't be accessible after this calling. + let src_str = src_path.as_ref().to_str().unwrap(); + let dst_str = dst_path.as_ref().to_str().unwrap(); + manager.new_file(dst_path.as_ref().to_str().unwrap())?; + let r = self + .base_file_system + .rename(src_path.as_ref(), dst_path.as_ref()); + let del_file = if r.is_ok() { src_str } else { dst_str }; + if let Err(e) = manager.delete_file(del_file) { + warn!("fail to remove encryption metadata during 'reuse'"; "err" => ?e); + } + r + } else { + self.base_file_system.rename(src_path, dst_path) + } + } + fn exists_metadata>(&self, path: P) -> bool { if let Some(ref manager) = self.key_manager { if let Ok(info) = manager.get_file(path.as_ref().to_str().unwrap()) { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 9a279029fd5..74cfd5ab0d6 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1771,8 +1771,6 @@ impl SnapManagerCore { let tmp_file_paths = cf_file.tmp_file_paths(); let file_paths = cf_file.file_paths(); for (i, tmp_file_path) in tmp_file_paths.iter().enumerate() { - file_system::rename(&tmp_file_path, &file_paths[i])?; - let mgr = self.encryption_key_manager.as_ref(); if let Some(mgr) = &mgr { let src = &tmp_file_path; @@ -1786,7 +1784,15 @@ impl SnapManagerCore { } return Err(e.into()); } - mgr.delete_file(src)?; + let r = file_system::rename(src, dst); + let del_file = if r.is_ok() { src } else { dst }; + if let Err(e) = mgr.delete_file(del_file) { + warn!("fail to remove encryption metadata during 'rename_tmp_cf_file_for_send'"; + "err" => ?e); + } + r?; + } else { + file_system::rename(&tmp_file_path, &file_paths[i])?; } let file = Path::new(&file_paths[i]); let (checksum, size) = calc_checksum_and_size(file, mgr)?; diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 60f72052b10..e83255942fd 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -50,7 +50,6 @@ pub struct ImportPath { impl ImportPath { // move file from temp to save. pub fn save(mut self, key_manager: Option<&DataKeyManager>) -> Result<()> { - file_system::rename(&self.temp, &self.save)?; if let Some(key_manager) = key_manager { let temp_str = self .temp @@ -61,7 +60,15 @@ impl ImportPath { .to_str() .ok_or_else(|| Error::InvalidSstPath(self.save.clone()))?; key_manager.link_file(temp_str, save_str)?; - key_manager.delete_file(temp_str)?; + let r = file_system::rename(&self.temp, &self.save); + let del_file = if r.is_ok() { temp_str } else { save_str }; + if let Err(e) = key_manager.delete_file(del_file) { + warn!("fail to remove encryption metadata during 'save'"; + "file" => ?self, "err" => ?e); + } + r?; + } else { + file_system::rename(&self.temp, &self.save)?; } // sync the directory after rename self.save.pop(); @@ -137,12 +144,19 @@ impl ImportFile { "finalize SST write cache", )); } - file_system::rename(&self.path.temp, &self.path.save)?; if let Some(ref manager) = self.key_manager { let tmp_str = self.path.temp.to_str().unwrap(); let save_str = self.path.save.to_str().unwrap(); manager.link_file(tmp_str, save_str)?; - manager.delete_file(self.path.temp.to_str().unwrap())?; + let r = file_system::rename(&self.path.temp, &self.path.save); + let del_file = if r.is_ok() { tmp_str } else { save_str }; + if let Err(e) = manager.delete_file(del_file) { + warn!("fail to remove encryption metadata during finishing importing files."; + "err" => ?e); + } + r?; + } else { + file_system::rename(&self.path.temp, &self.path.save)?; } Ok(()) } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index ce55e7beb41..7e40859b127 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -584,7 +584,6 @@ impl SstImporter { })()?; if let Some(range) = direct_retval { - file_system::rename(&path.temp, &path.save)?; if let Some(key_manager) = &self.key_manager { let temp_str = path .temp @@ -595,7 +594,14 @@ impl SstImporter { .to_str() .ok_or_else(|| Error::InvalidSstPath(path.save.clone()))?; key_manager.link_file(temp_str, save_str)?; - key_manager.delete_file(temp_str)?; + let r = file_system::rename(&path.temp, &path.save); + let del_file = if r.is_ok() { temp_str } else { save_str }; + if let Err(e) = key_manager.delete_file(del_file) { + warn!("fail to remove encryption metadata during 'do_download'"; "err" => ?e); + } + r?; + } else { + file_system::rename(&path.temp, &path.save)?; } IMPORTER_DOWNLOAD_DURATION .with_label_values(&["rename"]) diff --git a/etc/config-template.toml b/etc/config-template.toml index 795a82f371c..558612151ec 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -1086,6 +1086,23 @@ ## When it's not set, 15% of available system memory will be used. # memory-limit = "1GB" +## Version of the log file in Raft Engine. +## +## Candidates: +## 1: Can be read by TiKV release 6.1 and above. +## 2: Can be read by TiKV release 6.3 and above. Supports log recycling. +## +## Default: 1. +# format-version = 1 + +## Whether to recycle stale log files in Raft Engine. +## If `true`, logically purged log files will be reserved for recycling. +## Only available for `format-version` >= 2. This option is only +## available when TiKV >= 6.3.x. +## +## Default: false. +# enable-log-recycle = false + [security] ## The path for TLS certificates. Empty string means disabling secure connections. # ca-path = "" From 8be9d449b6cf86a5e4731d032ba1d8cd7f736da8 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 15 Aug 2022 01:46:51 -0700 Subject: [PATCH 0154/1149] engine_traits: clean up sst iterator (#13277) ref tikv/tikv#13058 There are 3 notable changes: - Correctly implement iterator for `SstReader`. - Remove unnecessary methods. - Make interface taking mutable references correctly. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_panic/src/lib.rs | 1 + components/engine_panic/src/misc.rs | 4 +- components/engine_panic/src/snapshot.rs | 6 +-- components/engine_panic/src/sst.rs | 21 +++++----- components/engine_panic/src/write_batch.rs | 2 +- components/engine_rocks/src/file_system.rs | 4 +- components/engine_rocks/src/lib.rs | 1 + components/engine_rocks/src/misc.rs | 12 ++++-- components/engine_rocks/src/snapshot.rs | 6 +-- components/engine_rocks/src/sst.rs | 39 +++++++------------ components/engine_rocks/src/write_batch.rs | 2 +- components/engine_traits/src/engines.rs | 4 +- components/engine_traits/src/errors.rs | 5 +++ components/engine_traits/src/iterable.rs | 25 ++++++++++-- components/engine_traits/src/lib.rs | 1 + components/engine_traits/src/misc.rs | 4 +- components/engine_traits/src/snapshot.rs | 1 - components/engine_traits/src/sst.rs | 6 +-- components/engine_traits/src/write_batch.rs | 4 +- .../engine_traits_tests/src/cf_names.rs | 22 +---------- components/engine_traits_tests/src/sst.rs | 15 +++---- .../engine_traits_tests/src/write_batch.rs | 4 +- .../src/coprocessor/consistency_check.rs | 6 +-- .../raftstore/src/store/compaction_guard.rs | 11 ++++-- components/raftstore/src/store/fsm/apply.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 2 +- .../src/store/worker/consistency_check.rs | 4 +- components/sst_importer/src/import_file.rs | 17 ++++---- components/sst_importer/src/sst_importer.rs | 28 ++++++------- src/server/gc_worker/compaction_filter.rs | 4 +- src/server/gc_worker/gc_worker.rs | 2 +- tests/integrations/raftstore/test_stats.rs | 6 +-- .../raftstore/test_update_region_size.rs | 2 +- tests/integrations/storage/test_titan.rs | 6 +-- 34 files changed, 132 insertions(+), 147 deletions(-) diff --git a/components/engine_panic/src/lib.rs b/components/engine_panic/src/lib.rs index 761b31af1d8..0573c936135 100644 --- a/components/engine_panic/src/lib.rs +++ b/components/engine_panic/src/lib.rs @@ -9,6 +9,7 @@ //! with your engine's own name; then fill in the implementations; remove //! the allow(unused) attribute; +#![feature(generic_associated_types)] #![allow(unused)] mod cf_names; diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 9a5cc310fc3..5a78ea66e5a 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -5,11 +5,11 @@ use engine_traits::{DeleteStrategy, MiscExt, Range, Result}; use crate::engine::PanicEngine; impl MiscExt for PanicEngine { - fn flush(&self, sync: bool) -> Result<()> { + fn flush_cfs(&self, wait: bool) -> Result<()> { panic!() } - fn flush_cf(&self, cf: &str, sync: bool) -> Result<()> { + fn flush_cf(&self, cf: &str, wait: bool) -> Result<()> { panic!() } diff --git a/components/engine_panic/src/snapshot.rs b/components/engine_panic/src/snapshot.rs index e573402c6d2..cf651db4956 100644 --- a/components/engine_panic/src/snapshot.rs +++ b/components/engine_panic/src/snapshot.rs @@ -9,11 +9,7 @@ use crate::{db_vector::PanicDbVector, engine::PanicEngine}; #[derive(Clone, Debug)] pub struct PanicSnapshot; -impl Snapshot for PanicSnapshot { - fn cf_names(&self) -> Vec<&str> { - panic!() - } -} +impl Snapshot for PanicSnapshot {} impl Peekable for PanicSnapshot { type DbVector = PanicDbVector; diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index d1e5f4b331c..a0f1479604c 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -1,10 +1,10 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::path::PathBuf; +use std::{marker::PhantomData, path::PathBuf}; use engine_traits::{ - CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, Result, SstCompressionType, - SstExt, SstReader, SstWriter, SstWriterBuilder, + CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, RefIterable, Result, + SstCompressionType, SstExt, SstReader, SstWriter, SstWriterBuilder, }; use crate::engine::PanicEngine; @@ -24,22 +24,21 @@ impl SstReader for PanicSstReader { fn verify_checksum(&self) -> Result<()> { panic!() } - fn iter(&self) -> Self::Iterator { - panic!() - } } -impl Iterable for PanicSstReader { - type Iterator = PanicSstReaderIterator; +impl RefIterable for PanicSstReader { + type Iterator<'a> = PanicSstReaderIterator<'a>; - fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + fn iter(&self, opts: IterOptions) -> Result> { panic!() } } -pub struct PanicSstReaderIterator; +pub struct PanicSstReaderIterator<'a> { + _phantom: PhantomData<&'a ()>, +} -impl Iterator for PanicSstReaderIterator { +impl Iterator for PanicSstReaderIterator<'_> { fn seek(&mut self, key: &[u8]) -> Result { panic!() } diff --git a/components/engine_panic/src/write_batch.rs b/components/engine_panic/src/write_batch.rs index d2dc866ca31..e8ba326590c 100644 --- a/components/engine_panic/src/write_batch.rs +++ b/components/engine_panic/src/write_batch.rs @@ -20,7 +20,7 @@ impl WriteBatchExt for PanicEngine { pub struct PanicWriteBatch; impl WriteBatch for PanicWriteBatch { - fn write_opt(&self, _: &WriteOptions) -> Result<()> { + fn write_opt(&mut self, _: &WriteOptions) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index 614611bc40e..f3211d52d68 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -82,13 +82,13 @@ mod tests { db.put(&data_key(b"a1"), &value).unwrap(); db.put(&data_key(b"a2"), &value).unwrap(); assert_eq!(stats.fetch(IoType::Flush, IoOp::Write), 0); - db.flush(true /* sync */).unwrap(); + db.flush_cfs(true /* wait */).unwrap(); assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.put(&data_key(b"a2"), &value).unwrap(); db.put(&data_key(b"a3"), &value).unwrap(); - db.flush(true /* sync */).unwrap(); + db.flush_cfs(true /* wait */).unwrap(); assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index f8b32c72a59..b0e7012bad7 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -16,6 +16,7 @@ //! Please read the engine_trait crate docs before hacking. #![cfg_attr(test, feature(test))] +#![feature(generic_associated_types)] #[allow(unused_extern_crates)] extern crate tikv_alloc; diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 3e204bbc49f..7cf5d771486 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -127,13 +127,17 @@ impl RocksEngine { } impl MiscExt for RocksEngine { - fn flush(&self, sync: bool) -> Result<()> { - self.as_inner().flush(sync).map_err(r2e) + fn flush_cfs(&self, wait: bool) -> Result<()> { + let mut handles = vec![]; + for cf in self.cf_names() { + handles.push(util::get_cf_handle(self.as_inner(), cf)?); + } + self.as_inner().flush_cfs(&handles, wait).map_err(r2e) } - fn flush_cf(&self, cf: &str, sync: bool) -> Result<()> { + fn flush_cf(&self, cf: &str, wait: bool) -> Result<()> { let handle = util::get_cf_handle(self.as_inner(), cf)?; - self.as_inner().flush_cf(handle, sync).map_err(r2e) + self.as_inner().flush_cf(handle, wait).map_err(r2e) } fn delete_ranges_cf( diff --git a/components/engine_rocks/src/snapshot.rs b/components/engine_rocks/src/snapshot.rs index c107601c5d6..b19a32fd739 100644 --- a/components/engine_rocks/src/snapshot.rs +++ b/components/engine_rocks/src/snapshot.rs @@ -32,11 +32,7 @@ impl RocksSnapshot { } } -impl Snapshot for RocksSnapshot { - fn cf_names(&self) -> Vec<&str> { - self.db.cf_names() - } -} +impl Snapshot for RocksSnapshot {} impl Debug for RocksSnapshot { fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 66e0a974916..0518dd7feb5 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -1,9 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{path::PathBuf, rc::Rc, sync::Arc}; +use std::{path::PathBuf, sync::Arc}; use engine_traits::{ - Error, ExternalSstFileInfo, IterOptions, Iterable, Iterator, Result, SstCompressionType, + Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, }; use fail::fail_point; @@ -22,11 +22,8 @@ impl SstExt for RocksEngine { type SstWriterBuilder = RocksSstWriterBuilder; } -// FIXME: like in RocksEngineIterator and elsewhere, here we are using -// Rc to avoid putting references in an associated type, which -// requires generic associated types. pub struct RocksSstReader { - inner: Rc, + inner: SstFileReader, } impl RocksSstReader { @@ -50,8 +47,7 @@ impl RocksSstReader { } let mut reader = SstFileReader::new(cf_options); reader.open(path).map_err(r2e)?; - let inner = Rc::new(reader); - Ok(RocksSstReader { inner }) + Ok(RocksSstReader { inner: reader }) } pub fn compression_name(&self) -> String { @@ -71,33 +67,26 @@ impl SstReader for RocksSstReader { self.inner.verify_checksum().map_err(r2e)?; Ok(()) } - fn iter(&self) -> Self::Iterator { - RocksSstIterator(SstFileReader::iter_rc(self.inner.clone())) - } } -impl Iterable for RocksSstReader { - type Iterator = RocksSstIterator; +impl RefIterable for RocksSstReader { + type Iterator<'a> = RocksSstIterator<'a>; - /// Cf is ignored as there is only one cf in sst. - fn iterator_opt(&self, _cf: &str, opts: IterOptions) -> Result { + #[inline] + fn iter(&self, opts: IterOptions) -> Result> { let opt: RocksReadOptions = opts.into(); let opt = opt.into_raw(); - Ok(RocksSstIterator(SstFileReader::iter_opt_rc( - self.inner.clone(), - opt, - ))) + Ok(RocksSstIterator(SstFileReader::iter_opt(&self.inner, opt))) } } -// FIXME: See comment on RocksSstReader for why this contains Rc -pub struct RocksSstIterator(DBIterator>); +pub struct RocksSstIterator<'a>(DBIterator<&'a SstFileReader>); -// TODO(5kbpers): Temporarily force to add `Send` here, add a method for -// creating DBIterator> in rust-rocksdb later. -unsafe impl Send for RocksSstIterator {} +// It's OK to send the iterator around. +// TODO: remove this when using tirocks. +unsafe impl Send for RocksSstIterator<'_> {} -impl Iterator for RocksSstIterator { +impl Iterator for RocksSstIterator<'_> { fn seek(&mut self, key: &[u8]) -> Result { self.0.seek(rocksdb::SeekKey::Key(key)).map_err(r2e) } diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index e4028feb411..f617608119b 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -99,7 +99,7 @@ impl RocksWriteBatchVec { } impl engine_traits::WriteBatch for RocksWriteBatchVec { - fn write_opt(&self, opts: &WriteOptions) -> Result<()> { + fn write_opt(&mut self, opts: &WriteOptions) -> Result<()> { let opt: RocksWriteOptions = opts.into(); if self.support_write_batch_vec { self.get_db() diff --git a/components/engine_traits/src/engines.rs b/components/engine_traits/src/engines.rs index 4e4089d52dc..d5928a9783a 100644 --- a/components/engine_traits/src/engines.rs +++ b/components/engine_traits/src/engines.rs @@ -20,11 +20,11 @@ impl Engines { } } - pub fn write_kv(&self, wb: &K::WriteBatch) -> Result<()> { + pub fn write_kv(&self, wb: &mut K::WriteBatch) -> Result<()> { wb.write() } - pub fn write_kv_opt(&self, wb: &K::WriteBatch, opts: &WriteOptions) -> Result<()> { + pub fn write_kv_opt(&self, wb: &mut K::WriteBatch, opts: &WriteOptions) -> Result<()> { wb.write_opt(opts) } diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index c9960b50753..6ef46ff7a70 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -40,6 +40,11 @@ pub enum SubCode { MemoryLimit = 7, SpaceLimit = 8, PathNotFound = 9, + MergeOperandsInsufficientCapacity = 10, + ManualCompactionPaused = 11, + Overwritten = 12, + TxnNotPrepared = 13, + IoFenced = 14, } #[repr(u8)] diff --git a/components/engine_traits/src/iterable.rs b/components/engine_traits/src/iterable.rs index 9d45fc5b0ac..50fcfc2344b 100644 --- a/components/engine_traits/src/iterable.rs +++ b/components/engine_traits/src/iterable.rs @@ -109,6 +109,14 @@ pub trait Iterator: Send { fn valid(&self) -> Result; } +pub trait RefIterable { + type Iterator<'a>: Iterator + where + Self: 'a; + + fn iter(&self, opts: IterOptions) -> Result>; +} + pub trait Iterable { type Iterator: Iterator; @@ -131,10 +139,7 @@ pub trait Iterable { where F: FnMut(&[u8], &[u8]) -> Result, { - let start = KeyBuilder::from_slice(start_key, DATA_KEY_PREFIX_LEN, 0); - let end = - (!end_key.is_empty()).then(|| KeyBuilder::from_slice(end_key, DATA_KEY_PREFIX_LEN, 0)); - let iter_opt = IterOptions::new(Some(start), end, fill_cache); + let iter_opt = iter_option(start_key, end_key, fill_cache); scan_impl(self.iterator_opt(cf, iter_opt)?, start_key, f) } @@ -175,3 +180,15 @@ pub fn collect(mut it: I) -> Vec<(Vec, Vec)> { } v } + +/// Build an `IterOptions` using giving data key bound. Empty upper bound will +/// be ignored. +pub fn iter_option(lower_bound: &[u8], upper_bound: &[u8], fill_cache: bool) -> IterOptions { + let lower_bound = Some(KeyBuilder::from_slice(lower_bound, 0, 0)); + let upper_bound = if upper_bound.is_empty() { + None + } else { + Some(KeyBuilder::from_slice(upper_bound, 0, 0)) + }; + IterOptions::new(lower_bound, upper_bound, fill_cache) +} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 72794fba5cd..b140da14969 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -250,6 +250,7 @@ //! Likewise `engine_rocks` can temporarily call code from inside `engine`. #![feature(min_specialization)] #![feature(assert_matches)] +#![feature(generic_associated_types)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 0e6b9600da6..f0ba9d03c39 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -29,9 +29,9 @@ pub enum DeleteStrategy { } pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { - fn flush(&self, sync: bool) -> Result<()>; + fn flush_cfs(&self, wait: bool) -> Result<()>; - fn flush_cf(&self, cf: &str, sync: bool) -> Result<()>; + fn flush_cf(&self, cf: &str, wait: bool) -> Result<()>; fn delete_all_in_range(&self, strategy: DeleteStrategy, ranges: &[Range<'_>]) -> Result<()> { for cf in self.cf_names() { diff --git a/components/engine_traits/src/snapshot.rs b/components/engine_traits/src/snapshot.rs index 93ef451209c..7907abd1445 100644 --- a/components/engine_traits/src/snapshot.rs +++ b/components/engine_traits/src/snapshot.rs @@ -12,5 +12,4 @@ pub trait Snapshot where Self: 'static + Peekable + Iterable + Send + Sync + Sized + Debug, { - fn cf_names(&self) -> Vec<&str>; } diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index fb37c918886..a97fe7a8b87 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; use kvproto::import_sstpb::SstMeta; -use crate::{errors::Result, iterable::Iterable}; +use crate::{errors::Result, RefIterable}; #[derive(Clone, Debug)] pub struct SstMetaInfo { @@ -20,11 +20,9 @@ pub trait SstExt: Sized { } /// SstReader is used to read an SST file. -pub trait SstReader: Iterable + Sized { +pub trait SstReader: RefIterable + Sized { fn open(path: &str) -> Result; fn verify_checksum(&self) -> Result<()>; - // FIXME: Shouldn't this me a method on Iterable? - fn iter(&self) -> Self::Iterator; } /// SstWriter is used to create sst files that can be added to database later. diff --git a/components/engine_traits/src/write_batch.rs b/components/engine_traits/src/write_batch.rs index 5d6824a7207..4dc8e47e823 100644 --- a/components/engine_traits/src/write_batch.rs +++ b/components/engine_traits/src/write_batch.rs @@ -71,10 +71,10 @@ pub trait Mutable: Send { /// save point, and pops the save point from the stack. pub trait WriteBatch: Mutable { /// Commit the WriteBatch to disk with the given options - fn write_opt(&self, opts: &WriteOptions) -> Result<()>; + fn write_opt(&mut self, opts: &WriteOptions) -> Result<()>; /// Commit the WriteBatch to disk atomically - fn write(&self) -> Result<()> { + fn write(&mut self) -> Result<()> { self.write_opt(&WriteOptions::default()) } diff --git a/components/engine_traits_tests/src/cf_names.rs b/components/engine_traits_tests/src/cf_names.rs index 2cac1eaff73..f85c2f5df97 100644 --- a/components/engine_traits_tests/src/cf_names.rs +++ b/components/engine_traits_tests/src/cf_names.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CfNamesExt, KvEngine, Snapshot, ALL_CFS, CF_DEFAULT}; +use engine_traits::{CfNamesExt, ALL_CFS, CF_DEFAULT}; use super::{default_engine, engine_cfs}; @@ -21,23 +21,3 @@ fn cf_names() { assert!(names.contains(cf)); } } - -#[test] -fn default_names_snapshot() { - let db = default_engine(); - let snapshot = db.engine.snapshot(); - let names = snapshot.cf_names(); - assert_eq!(names.len(), 1); - assert_eq!(names[0], CF_DEFAULT); -} - -#[test] -fn cf_names_snapshot() { - let db = engine_cfs(ALL_CFS); - let snapshot = db.engine.snapshot(); - let names = snapshot.cf_names(); - assert_eq!(names.len(), ALL_CFS.len()); - for cf in ALL_CFS { - assert!(names.contains(cf)); - } -} diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index ce4160e5ddc..26ed686aad4 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -6,7 +6,8 @@ use std::fs; use engine_test::kv::KvTestEngine; use engine_traits::{ - Error, ExternalSstFileInfo, Iterator, Result, SstExt, SstReader, SstWriter, SstWriterBuilder, + Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstExt, SstReader, + SstWriter, SstWriterBuilder, }; use panic_hook::recover_safe; @@ -48,7 +49,7 @@ fn basic() -> Result<()> { sst_writer.finish()?; let sst_reader = ::SstReader::open(&sst_path)?; - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; let key = iter.key(); @@ -77,7 +78,7 @@ fn forward() -> Result<()> { sst_writer.finish()?; let sst_reader = ::SstReader::open(&sst_path)?; - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -114,7 +115,7 @@ fn reverse() -> Result<()> { sst_writer.finish()?; let sst_reader = ::SstReader::open(&sst_path)?; - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_last()?; @@ -152,7 +153,7 @@ fn delete() -> Result<()> { sst_writer.finish()?; let sst_reader = ::SstReader::open(&sst_path)?; - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -206,7 +207,7 @@ fn same_key() -> Result<()> { sst_writer.finish()?; let sst_reader = ::SstReader::open(&sst_path)?; - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; let key = iter.key(); @@ -248,7 +249,7 @@ fn reverse_key() -> Result<()> { sst_writer.finish()?; let sst_reader = ::SstReader::open(&sst_path)?; - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; let key = iter.key(); diff --git a/components/engine_traits_tests/src/write_batch.rs b/components/engine_traits_tests/src/write_batch.rs index e99245adb4b..f13cec0845a 100644 --- a/components/engine_traits_tests/src/write_batch.rs +++ b/components/engine_traits_tests/src/write_batch.rs @@ -20,11 +20,11 @@ fn write_batch_none_no_commit() { #[test] fn write_batch_none() { let db = default_engine(); - let wb = db.engine.write_batch(); + let mut wb = db.engine.write_batch(); wb.write().unwrap(); let db = multi_batch_write_engine(); - let wb = db.engine.write_batch_with_cap(1024); + let mut wb = db.engine.write_batch_with_cap(1024); wb.write().unwrap(); } diff --git a/components/raftstore/src/coprocessor/consistency_check.rs b/components/raftstore/src/coprocessor/consistency_check.rs index 70b55db41f4..5ba97089f85 100644 --- a/components/raftstore/src/coprocessor/consistency_check.rs +++ b/components/raftstore/src/coprocessor/consistency_check.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; -use engine_traits::{KvEngine, Snapshot, CF_RAFT}; +use engine_traits::{KvEngine, Snapshot, ALL_CFS, CF_RAFT}; use kvproto::metapb::Region; use crate::{ @@ -60,12 +60,10 @@ impl ConsistencyCheckObserver for Raw { fn compute_hash_on_raw(region: &Region, snap: &S) -> Result { let region_id = region.get_id(); let mut digest = crc32fast::Hasher::new(); - let mut cf_names = snap.cf_names(); - cf_names.sort_unstable(); let start_key = keys::enc_start_key(region); let end_key = keys::enc_end_key(region); - for cf in cf_names { + for cf in ALL_CFS { snap.scan(cf, &start_key, &end_key, false, |k, v| { digest.update(k); digest.update(v); diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index c8fb02d424b..78dbccbf585 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -202,7 +202,9 @@ mod tests { util::new_engine_opt, RocksCfOptions, RocksDbOptions, RocksEngine, RocksSstPartitionerFactory, RocksSstReader, }; - use engine_traits::{CompactExt, Iterator, MiscExt, SstReader, SyncMutable, CF_DEFAULT}; + use engine_traits::{ + CompactExt, IterOptions, Iterator, MiscExt, RefIterable, SstReader, SyncMutable, CF_DEFAULT, + }; use keys::DATA_PREFIX_KEY; use kvproto::metapb::Region; use tempfile::TempDir; @@ -399,7 +401,8 @@ mod tests { } fn collect_keys(path: &str) -> Vec> { - let mut sst_reader = RocksSstReader::open(path).unwrap().iter(); + let reader = RocksSstReader::open(path).unwrap(); + let mut sst_reader = reader.iter(IterOptions::default()).unwrap(); let mut valid = sst_reader.seek_to_first().unwrap(); let mut ret = vec![]; while valid { @@ -444,14 +447,14 @@ mod tests { db.put(b"za1", b"").unwrap(); db.put(b"zb1", &value).unwrap(); db.put(b"zc1", &value).unwrap(); - db.flush(true /* sync */).unwrap(); + db.flush_cfs(true /* wait */).unwrap(); db.put(b"zb2", &value).unwrap(); db.put(b"zc2", &value).unwrap(); db.put(b"zc3", &value).unwrap(); db.put(b"zc4", &value).unwrap(); db.put(b"zc5", &value).unwrap(); db.put(b"zc6", &value).unwrap(); - db.flush(true /* sync */).unwrap(); + db.flush_cfs(true /* wait */).unwrap(); db.compact_range( CF_DEFAULT, None, // start_key None, // end_key diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 0d97137bab1..d33a262cf6a 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -533,7 +533,7 @@ where self.perf_context.start_observe(); let mut write_opts = engine_traits::WriteOptions::new(); write_opts.set_sync(need_sync); - self.kv_wb().write_opt(&write_opts).unwrap_or_else(|e| { + self.kv_wb_mut().write_opt(&write_opts).unwrap_or_else(|e| { panic!("failed to write to engine: {:?}", e); }); let trackers: Vec<_> = self diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 86d89fad051..fe58a2587a7 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -460,7 +460,7 @@ mod tests { let db = &engines.kv; for &(ref k, level) in &levels { db.put(&data_key(k), k).unwrap(); - db.flush(true).unwrap(); + db.flush_cfs(true).unwrap(); data.push((k.to_vec(), k.to_vec())); db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(level)) .unwrap(); diff --git a/components/raftstore/src/store/worker/consistency_check.rs b/components/raftstore/src/store/worker/consistency_check.rs index 154f1816dbf..b3bd7ef32d0 100644 --- a/components/raftstore/src/store/worker/consistency_check.rs +++ b/components/raftstore/src/store/worker/consistency_check.rs @@ -128,7 +128,7 @@ mod tests { use byteorder::{BigEndian, WriteBytesExt}; use engine_test::kv::{new_engine, KvTestEngine}; - use engine_traits::{KvEngine, SyncMutable, CF_DEFAULT, CF_RAFT}; + use engine_traits::{KvEngine, SyncMutable, ALL_CFS}; use kvproto::metapb::*; use tempfile::Builder; use tikv_util::worker::Runnable; @@ -141,7 +141,7 @@ mod tests { #[test] fn test_consistency_check() { let path = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); - let db = new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT, CF_RAFT]).unwrap(); + let db = new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let mut region = Region::default(); region.mut_peers().push(Peer::default()); diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index e83255942fd..f5292b70075 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -11,7 +11,9 @@ use std::{ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; use engine_rocks::{get_env, RocksSstReader}; -use engine_traits::{EncryptionKeyManager, Iterable, KvEngine, SstMetaInfo, SstReader}; +use engine_traits::{ + iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, +}; use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; use kvproto::{import_sstpb::*, kvrpcpb::ApiVersion}; use tikv_util::time::Instant; @@ -330,19 +332,14 @@ impl ImportDir { let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; for &(start, end) in TIDB_RANGES_COMPLEMENT { - let mut unexpected_data_key = None; - // No CF in sst. - sst_reader.scan("", start, end, false, |key, _| { - unexpected_data_key = Some(key.to_vec()); - Ok(false) - })?; - - if let Some(unexpected_data_key) = unexpected_data_key { + let opt = iter_option(start, end, false); + let mut iter = sst_reader.iter(opt)?; + if iter.seek(start)? { error!( "unable to import: switch api version with non-tidb key"; "sst" => ?meta.api_version, "current" => ?api_version, - "key" => ?log_wrappers::hex_encode_upper(&unexpected_data_key) + "key" => ?log_wrappers::hex_encode_upper(iter.key()) ); return Ok(false); } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 7e40859b127..806066bd202 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -15,8 +15,8 @@ use encryption::{to_engine_encryption_method, DataKeyManager}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, - Iterator, KvEngine, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, - SstWriterBuilder, CF_DEFAULT, CF_WRITE, + IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, + SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; use file_system::{get_io_rate_limiter, OpenOptions}; use futures::executor::ThreadPool; @@ -548,7 +548,7 @@ impl SstImporter { let start_rename_rewrite = Instant::now(); // read the first and last keys from the SST, determine if we could // simply move the entire SST instead of iterating and generate a new one. - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default())?; let direct_retval = (|| -> Result> { if rewrite_rule.old_key_prefix != rewrite_rule.new_key_prefix || rewrite_rule.new_timestamp != 0 @@ -798,7 +798,7 @@ mod tests { use engine_traits::{ collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, - SstReader, SstWriter, CF_DEFAULT, DATA_CFS, + RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; use file_system::File; use openssl::hash::{Hasher, MessageDigest}; @@ -1338,7 +1338,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1397,7 +1397,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), Some(env)); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1445,7 +1445,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1490,7 +1490,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1534,7 +1534,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1675,7 +1675,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1719,7 +1719,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1854,7 +1854,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1912,7 +1912,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), @@ -1967,7 +1967,7 @@ mod tests { // verifies the SST content is correct. let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); sst_reader.verify_checksum().unwrap(); - let mut iter = sst_reader.iter(); + let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!( collect(iter), diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 1c50b56bed1..ef190f4760e 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -463,7 +463,7 @@ impl WriteCompactionFilter { } fn do_flush( - wb: &RocksWriteBatchVec, + wb: &mut RocksWriteBatchVec, wopts: &WriteOptions, ) -> Result<(), engine_traits::Error> { let _io_type_guard = WithIoType::new(IoType::Gc); @@ -481,7 +481,7 @@ impl WriteCompactionFilter { if self.write_batch.count() > DEFAULT_DELETE_BATCH_COUNT || force { let mut wopts = WriteOptions::default(); wopts.set_no_slowdown(true); - if let Err(e) = do_flush(&self.write_batch, &wopts) { + if let Err(e) = do_flush(&mut self.write_batch, &wopts) { let wb = mem::replace( &mut self.write_batch, self.engine.write_batch_with_cap(DEFAULT_DELETE_BATCH_SIZE), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 131efd68fac..eaa55c9c69c 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -882,7 +882,7 @@ where limit, ); } - GcTask::OrphanVersions { wb, id } => { + GcTask::OrphanVersions { mut wb, id } => { info!("handling GcTask::OrphanVersions"; "id" => id); let mut wopts = WriteOptions::default(); wopts.set_sync(true); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 03c0f0a82b2..2af595c4e5a 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -28,7 +28,7 @@ fn check_available(cluster: &mut Cluster) { for i in 0..1000 { let last_available = stats.get_available(); cluster.must_put(format!("k{}", i).as_bytes(), &value); - engine.flush(true).unwrap(); + engine.flush_cfs(true).unwrap(); sleep_ms(20); let stats = pd_client.get_store_stats(1).unwrap(); @@ -59,7 +59,7 @@ fn test_simple_store_stats(cluster: &mut Cluster) { } let engine = cluster.get_engine(1); - engine.flush(true).unwrap(); + engine.flush_cfs(true).unwrap(); let last_stats = pd_client.get_store_stats(1).unwrap(); assert_eq!(last_stats.get_region_count(), 1); @@ -68,7 +68,7 @@ fn test_simple_store_stats(cluster: &mut Cluster) { let region = pd_client.get_region(b"").unwrap(); cluster.must_split(®ion, b"k2"); - engine.flush(true).unwrap(); + engine.flush_cfs(true).unwrap(); // wait report region count after split for _ in 0..100 { diff --git a/tests/integrations/raftstore/test_update_region_size.rs b/tests/integrations/raftstore/test_update_region_size.rs index 4aab144ff27..ee4fb79ac62 100644 --- a/tests/integrations/raftstore/test_update_region_size.rs +++ b/tests/integrations/raftstore/test_update_region_size.rs @@ -9,7 +9,7 @@ use tikv_util::config::*; fn flush(cluster: &mut Cluster) { for engines in cluster.engines.values() { - engines.kv.flush(true).unwrap(); + engines.kv.flush_cfs(true).unwrap(); } } diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index f5e642f161b..5b957b88822 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -211,7 +211,7 @@ fn test_delete_files_in_range_for_titan() { .unwrap(); // Flush and compact the kvs into L6. - engines.kv.flush(true).unwrap(); + engines.kv.flush_cfs(true).unwrap(); engines.kv.compact_files_in_range(None, None, None).unwrap(); let db = engines.kv.as_inner(); let value = db.get_property_int("rocksdb.num-files-at-level0").unwrap(); @@ -254,9 +254,9 @@ fn test_delete_files_in_range_for_titan() { // Used to trigger titan gc let engine = &engines.kv; engine.put(b"1", b"1").unwrap(); - engine.flush(true).unwrap(); + engine.flush_cfs(true).unwrap(); engine.put(b"2", b"2").unwrap(); - engine.flush(true).unwrap(); + engine.flush_cfs(true).unwrap(); engine .compact_files_in_range(Some(b"0"), Some(b"3"), Some(1)) .unwrap(); From 73c0bacf2be12c93bbb26727f9d713783dbaa05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 16 Aug 2022 14:14:51 +0800 Subject: [PATCH 0155/1149] log-backup: fix early return (#13288) close tikv/tikv#13281 Fixed a bug that may cause data loss in log backup. Signed-off-by: Yu Juncen --- Cargo.lock | 2 +- components/backup-stream/Cargo.toml | 2 +- components/backup-stream/src/event_loader.rs | 9 ++- components/backup-stream/tests/mod.rs | 73 +++++++++++++++++++- 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 802a0e19487..1efe0607541 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1620,7 +1620,7 @@ dependencies = [ [[package]] name = "etcd-client" version = "0.7.2" -source = "git+https://github.com/yujuncen/etcd-client?rev=e0321a1990ee561cf042973666c0db61c8d82364#e0321a1990ee561cf042973666c0db61c8d82364" +source = "git+https://github.com/pingcap/etcd-client?rev=e0321a1990ee561cf042973666c0db61c8d82364#e0321a1990ee561cf042973666c0db61c8d82364" dependencies = [ "http", "prost", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index b0b6fc3f13f..e5bb889420d 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -31,7 +31,7 @@ engine_traits = { path = "../engine_traits", default-features = false } error_code = { path = "../error_code" } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/yujuncen/etcd-client", rev = "e0321a1990ee561cf042973666c0db61c8d82364", features = ["pub-response-field", "tls"] } +etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "e0321a1990ee561cf042973666c0db61c8d82364", features = ["pub-response-field", "tls"] } external_storage = { path = "../external_storage", default-features = false } external_storage_export = { path = "../external_storage/export", default-features = false } fail = "0.5" diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 61e227af1ac..5aade374249 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -391,9 +391,16 @@ where // we only need to record the disk throughput of this. let (stat, disk_read) = utils::with_record_read_throughput(|| event_loader.fill_entries()); + // We must use the size of entry batch here to check whether we have progress. + // Or we may exit too early if there are only records: + // - can be inlined to `write` CF (hence it won't be written to default CF) + // - are prewritten. (hence it will only contains `Prewrite` records). + // In this condition, ALL records generate no ApplyEvent(only lock change), + // and we would exit after the first run of loop :( + let no_progress = event_loader.entry_batch.is_empty(); let stat = stat?; self.with_resolver(region, |r| event_loader.emit_entries_to(&mut events, r))?; - if events.is_empty() { + if no_progress { metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); return Ok(stats.stat); } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index f838e96ddbf..c5d3442fb84 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -158,7 +158,6 @@ impl SuiteBuilder { for id in 1..=(n as u64) { suite.start_endpoint(id, use_v3); } - // TODO: The current mock metastore (slash_etc) doesn't supports multi-version. // We must wait until the endpoints get ready to watching the metastore, or some // modifies may be lost. Either make Endpoint::with_client wait until watch did // start or make slash_etc support multi-version, then we can get rid of this @@ -318,6 +317,19 @@ impl Suite { inserted } + fn commit_keys(&mut self, keys: Vec>, start_ts: TimeStamp, commit_ts: TimeStamp) { + let mut region_keys = HashMap::>>::new(); + for k in keys { + let enc_key = Key::from_raw(&k).into_encoded(); + let region = self.cluster.get_region_id(&enc_key); + region_keys.entry(region).or_default().push(k); + } + + for (region, keys) in region_keys { + self.must_kv_commit(region, keys, start_ts, commit_ts); + } + } + fn just_commit_a_key(&mut self, key: Vec, start_ts: TimeStamp, commit_ts: TimeStamp) { let enc_key = Key::from_raw(&key).into_encoded(); let region = self.cluster.get_region_id(&enc_key); @@ -604,10 +616,13 @@ mod test { errors::Error, metadata::MetadataClient, router::TaskSelector, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; + use pd_client::PdClient; use tikv_util::{box_err, defer, info, HandyRwLock}; - use txn_types::TimeStamp; + use txn_types::{Key, TimeStamp}; - use crate::{make_record_key, make_split_key_at_record, run_async_test, SuiteBuilder}; + use crate::{ + make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, + }; #[test] fn basic() { @@ -650,6 +665,58 @@ mod test { suite.cluster.shutdown(); } + /// This test tests whether we can handle some weird transactions and their + /// race with initial scanning. + /// Generally, those transactions: + /// - Has N mutations, which's values are all short enough to be inlined in + /// the `Write` CF. (N > 1024) + /// - Commit the mutation set M first. (for all m in M: Nth-Of-Key(m) > + /// 1024) + /// ```text + /// |--...-----^------*---*-*--*-*-*-> (The line is the Key Space - from "" to inf) + /// +The 1024th key (* = committed mutation) + /// ``` + /// - Before committing remaining mutations, PiTR triggered initial + /// scanning. + /// - The remaining mutations are committed before the instant when initial + /// scanning get the snapshot. + #[test] + fn with_split_txn() { + let mut suite = super::SuiteBuilder::new_named("split_txn").use_v3().build(); + run_async_test(async { + let start_ts = suite.cluster.pd_client.get_tso().await.unwrap(); + let keys = (1..1960).map(|i| make_record_key(1, i)).collect::>(); + suite.must_kv_prewrite( + 1, + keys.clone() + .into_iter() + .map(|k| mutation(k, b"hello, world".to_vec())) + .collect(), + make_record_key(1, 1913), + start_ts, + ); + let commit_ts = suite.cluster.pd_client.get_tso().await.unwrap(); + suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); + suite.must_register_task(1, "test_split_txn"); + suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); + suite.force_flush_files("test_split_txn"); + suite.wait_for_flush(); + let keys_encoded = keys + .iter() + .map(|v| { + Key::from_raw(v.as_slice()) + .append_ts(commit_ts) + .into_encoded() + }) + .collect::>(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys_encoded.iter().map(Vec::as_slice), + ); + }); + suite.cluster.shutdown(); + } + #[test] /// This case tests whether the backup can continue when the leader failes. fn leader_down() { From dabf29e4178f3f9b86f0a0eb5cac1a131df2d377 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Tue, 16 Aug 2022 15:32:51 +0800 Subject: [PATCH 0156/1149] raftstore: allow exec observers delay deletion of applied ssts (#13061) ref tikv/tikv#12849 allow exec observers delay deletion of applied ssts Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/dispatcher.rs | 166 ++++++++++++++---- components/raftstore/src/coprocessor/mod.rs | 13 +- components/raftstore/src/store/fsm/apply.rs | 146 +++++++++++++-- components/raftstore/src/store/metrics.rs | 6 + components/sst_importer/src/import_file.rs | 4 + 5 files changed, 285 insertions(+), 50 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 6297722a996..7eea973997b 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -459,12 +459,13 @@ impl CoprocessorHost { cmd: &Cmd, apply_state: &RaftApplyState, region_state: &RegionState, + apply_ctx: &mut ApplyCtxInfo<'_>, ) -> bool { let mut ctx = ObserverContext::new(region); if !cmd.response.has_admin_response() { for observer in &self.registry.query_observers { let observer = observer.observer.inner(); - if observer.post_exec_query(&mut ctx, cmd, apply_state, region_state) { + if observer.post_exec_query(&mut ctx, cmd, apply_state, region_state, apply_ctx) { return true; } } @@ -472,7 +473,7 @@ impl CoprocessorHost { } else { for observer in &self.registry.admin_observers { let observer = observer.observer.inner(); - if observer.post_exec_admin(&mut ctx, cmd, apply_state, region_state) { + if observer.post_exec_admin(&mut ctx, cmd, apply_state, region_state, apply_ctx) { return true; } } @@ -656,6 +657,26 @@ mod tests { return_err: Arc, } + enum ObserverIndex { + PreProposeAdmin = 1, + PreApplyAdmin = 2, + PostApplyAdmin = 3, + PreProposeQuery = 4, + PreApplyQuery = 5, + PostApplyQuery = 6, + OnRoleChange = 7, + OnRegionChanged = 8, + ApplyPlainKvs = 9, + ApplySst = 10, + OnFlushAppliedCmdBatch = 13, + OnEmptyCmd = 14, + PreExecQuery = 15, + PreExecAdmin = 16, + PostExecQuery = 17, + PostExecAdmin = 18, + OnComputeEngineSize = 19, + } + impl Coprocessor for TestCoprocessor {} impl AdminObserver for TestCoprocessor { @@ -664,7 +685,8 @@ mod tests { ctx: &mut ObserverContext<'_>, _: &mut AdminRequest, ) -> Result<()> { - self.called.fetch_add(1, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PreProposeAdmin as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); if self.return_err.load(Ordering::SeqCst) { return Err(box_err!("error")); @@ -673,12 +695,14 @@ mod tests { } fn pre_apply_admin(&self, ctx: &mut ObserverContext<'_>, _: &AdminRequest) { - self.called.fetch_add(2, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PreApplyAdmin as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } fn post_apply_admin(&self, ctx: &mut ObserverContext<'_>, _: &AdminResponse) { - self.called.fetch_add(3, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PostApplyAdmin as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } @@ -689,7 +713,22 @@ mod tests { _: u64, _: u64, ) -> bool { - self.called.fetch_add(16, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PreExecAdmin as usize, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + false + } + + fn post_exec_admin( + &self, + ctx: &mut ObserverContext<'_>, + _: &Cmd, + _: &RaftApplyState, + _: &RegionState, + _: &mut ApplyCtxInfo<'_>, + ) -> bool { + self.called + .fetch_add(ObserverIndex::PostExecAdmin as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); false } @@ -701,7 +740,8 @@ mod tests { ctx: &mut ObserverContext<'_>, _: &mut Vec, ) -> Result<()> { - self.called.fetch_add(4, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PreProposeQuery as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); if self.return_err.load(Ordering::SeqCst) { return Err(box_err!("error")); @@ -710,12 +750,14 @@ mod tests { } fn pre_apply_query(&self, ctx: &mut ObserverContext<'_>, _: &[Request]) { - self.called.fetch_add(5, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PreApplyQuery as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } fn post_apply_query(&self, ctx: &mut ObserverContext<'_>, _: &Cmd) { - self.called.fetch_add(6, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PostApplyQuery as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } @@ -726,26 +768,46 @@ mod tests { _: u64, _: u64, ) -> bool { - self.called.fetch_add(15, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::PreExecQuery as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); false } fn on_empty_cmd(&self, ctx: &mut ObserverContext<'_>, _index: u64, _term: u64) { - self.called.fetch_add(14, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::OnEmptyCmd as usize, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + } + + fn post_exec_query( + &self, + ctx: &mut ObserverContext<'_>, + _: &Cmd, + _: &RaftApplyState, + _: &RegionState, + _: &mut ApplyCtxInfo<'_>, + ) -> bool { + self.called + .fetch_add(ObserverIndex::PostExecQuery as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); + false } } impl PdTaskObserver for TestCoprocessor { fn on_compute_engine_size(&self, _: &mut Option) { - self.called.fetch_add(19, Ordering::SeqCst); + self.called.fetch_add( + ObserverIndex::OnComputeEngineSize as usize, + Ordering::SeqCst, + ); } } impl RoleObserver for TestCoprocessor { fn on_role_change(&self, ctx: &mut ObserverContext<'_>, _: &RoleChange) { - self.called.fetch_add(7, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::OnRoleChange as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } } @@ -757,7 +819,8 @@ mod tests { _: RegionChangeEvent, _: StateRole, ) { - self.called.fetch_add(8, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::OnRegionChanged as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } } @@ -769,12 +832,14 @@ mod tests { _: CfName, _: &[(Vec, Vec)], ) { - self.called.fetch_add(9, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::ApplyPlainKvs as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } fn apply_sst(&self, ctx: &mut ObserverContext<'_>, _: CfName, _: &str) { - self.called.fetch_add(10, Ordering::SeqCst); + self.called + .fetch_add(ObserverIndex::ApplySst as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } } @@ -786,7 +851,10 @@ mod tests { _: &mut Vec, _: &PanicEngine, ) { - self.called.fetch_add(13, Ordering::SeqCst); + self.called.fetch_add( + ObserverIndex::OnFlushAppliedCmdBatch as usize, + Ordering::SeqCst, + ); } fn on_applied_current_term(&self, _: StateRole, _: &Region) {} } @@ -825,38 +893,50 @@ mod tests { .register_region_change_observer(1, BoxRegionChangeObserver::new(ob.clone())); host.registry .register_cmd_observer(1, BoxCmdObserver::new(ob.clone())); + + let mut index: usize = 0; let region = Region::default(); let mut admin_req = RaftCmdRequest::default(); admin_req.set_admin_request(AdminRequest::default()); host.pre_propose(®ion, &mut admin_req).unwrap(); - assert_all!([&ob.called], &[1]); + index += ObserverIndex::PreProposeAdmin as usize; + assert_all!([&ob.called], &[index]); host.pre_apply(®ion, &admin_req); - assert_all!([&ob.called], &[3]); + index += ObserverIndex::PreApplyAdmin as usize; + assert_all!([&ob.called], &[index]); let mut admin_resp = RaftCmdResponse::default(); admin_resp.set_admin_response(AdminResponse::default()); host.post_apply(®ion, &Cmd::new(0, 0, admin_req, admin_resp)); - assert_all!([&ob.called], &[6]); + index += ObserverIndex::PostApplyAdmin as usize; + assert_all!([&ob.called], &[index]); let mut query_req = RaftCmdRequest::default(); query_req.set_requests(vec![Request::default()].into()); host.pre_propose(®ion, &mut query_req).unwrap(); - assert_all!([&ob.called], &[10]); + index += ObserverIndex::PreProposeQuery as usize; + assert_all!([&ob.called], &[index]); + index += ObserverIndex::PreApplyQuery as usize; host.pre_apply(®ion, &query_req); - assert_all!([&ob.called], &[15]); + assert_all!([&ob.called], &[index]); let query_resp = RaftCmdResponse::default(); host.post_apply(®ion, &Cmd::new(0, 0, query_req, query_resp)); - assert_all!([&ob.called], &[21]); + index += ObserverIndex::PostApplyQuery as usize; + assert_all!([&ob.called], &[index]); host.on_role_change(®ion, RoleChange::new(StateRole::Leader)); - assert_all!([&ob.called], &[28]); + index += ObserverIndex::OnRoleChange as usize; + assert_all!([&ob.called], &[index]); host.on_region_changed(®ion, RegionChangeEvent::Create, StateRole::Follower); - assert_all!([&ob.called], &[36]); + index += ObserverIndex::OnRegionChanged as usize; + assert_all!([&ob.called], &[index]); host.post_apply_plain_kvs_from_snapshot(®ion, "default", &[]); - assert_all!([&ob.called], &[45]); + index += ObserverIndex::ApplyPlainKvs as usize; + assert_all!([&ob.called], &[index]); host.post_apply_sst_from_snapshot(®ion, "default", ""); - assert_all!([&ob.called], &[55]); + index += ObserverIndex::ApplySst as usize; + assert_all!([&ob.called], &[index]); let observe_info = CmdObserveInfo::from_handle( ObserveHandle::new(), @@ -866,26 +946,46 @@ mod tests { let mut cb = CmdBatch::new(&observe_info, 0); cb.push(&observe_info, 0, Cmd::default()); host.on_flush_applied_cmd_batch(cb.level, vec![cb], &PanicEngine); - // `post_apply` + `on_flush_applied_cmd_batch` => 13 + 6 = 19 - assert_all!([&ob.called], &[74]); + index += ObserverIndex::PostApplyQuery as usize; + index += ObserverIndex::OnFlushAppliedCmdBatch as usize; + assert_all!([&ob.called], &[index]); let mut empty_req = RaftCmdRequest::default(); empty_req.set_requests(vec![Request::default()].into()); host.on_empty_cmd(®ion, 0, 0); - assert_all!([&ob.called], &[88]); // 14 + index += ObserverIndex::OnEmptyCmd as usize; + assert_all!([&ob.called], &[index]); let mut query_req = RaftCmdRequest::default(); query_req.set_requests(vec![Request::default()].into()); host.pre_exec(®ion, &query_req, 0, 0); - assert_all!([&ob.called], &[103]); // 15 + index += ObserverIndex::PreExecQuery as usize; + assert_all!([&ob.called], &[index]); let mut admin_req = RaftCmdRequest::default(); admin_req.set_admin_request(AdminRequest::default()); host.pre_exec(®ion, &admin_req, 0, 0); - assert_all!([&ob.called], &[119]); // 16 + index += ObserverIndex::PreExecAdmin as usize; + assert_all!([&ob.called], &[index]); host.on_compute_engine_size(); - assert_all!([&ob.called], &[138]); // 19 + index += ObserverIndex::OnComputeEngineSize as usize; + assert_all!([&ob.called], &[index]); + + let mut pending_handle_ssts = None; + let mut delete_ssts = vec![]; + let mut pending_delete_ssts = vec![]; + let mut info = ApplyCtxInfo { + pending_handle_ssts: &mut pending_handle_ssts, + pending_delete_ssts: &mut pending_delete_ssts, + delete_ssts: &mut delete_ssts, + }; + let apply_state = RaftApplyState::default(); + let region_state = RegionState::default(); + let cmd = Cmd::default(); + host.post_exec(®ion, &cmd, &apply_state, ®ion_state, &mut info); + index += ObserverIndex::PostExecQuery as usize; + assert_all!([&ob.called], &[index]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 82313ae7d4e..fcbfcfc98ff 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -9,7 +9,7 @@ use std::{ vec::IntoIter, }; -use engine_traits::CfName; +use engine_traits::{CfName, SstMetaInfo}; use kvproto::{ metapb::Region, pdpb::CheckPolicy, @@ -75,12 +75,21 @@ impl<'a> ObserverContext<'a> { } } +/// Context of a region provided for observers. +#[derive(Default, Clone)] pub struct RegionState { pub peer_id: u64, pub pending_remove: bool, pub modified_region: Option, } +/// Context for exec observers of mutation to be applied to ApplyContext. +pub struct ApplyCtxInfo<'a> { + pub pending_handle_ssts: &'a mut Option>, + pub delete_ssts: &'a mut Vec, + pub pending_delete_ssts: &'a mut Vec, +} + pub trait AdminObserver: Coprocessor { /// Hook to call before proposing admin request. fn pre_propose_admin(&self, _: &mut ObserverContext<'_>, _: &mut AdminRequest) -> Result<()> { @@ -115,6 +124,7 @@ pub trait AdminObserver: Coprocessor { _: &Cmd, _: &RaftApplyState, _: &RegionState, + _: &mut ApplyCtxInfo<'_>, ) -> bool { false } @@ -154,6 +164,7 @@ pub trait QueryObserver: Coprocessor { _: &Cmd, _: &RaftApplyState, _: &RegionState, + _: &mut ApplyCtxInfo<'_>, ) -> bool { false } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index d33a262cf6a..3f841e699bb 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -73,7 +73,8 @@ use super::metrics::*; use crate::{ bytes_capacity, coprocessor::{ - Cmd, CmdBatch, CmdObserveInfo, CoprocessorHost, ObserveHandle, ObserveLevel, RegionState, + ApplyCtxInfo, Cmd, CmdBatch, CmdObserveInfo, CoprocessorHost, ObserveHandle, ObserveLevel, + RegionState, }, store::{ cmd_resp, @@ -408,6 +409,11 @@ where /// never apply again at first, then we can delete the ssts files. delete_ssts: Vec, + /// A self-defined engine may be slow to ingest ssts. + /// It may move some elements of `delete_ssts` into `pending_delete_ssts` to + /// delay deletion. Otherwise we may lost data. + pending_delete_ssts: Vec, + /// The priority of this Handler. priority: Priority, /// Whether to yield high-latency operation to low-priority handler. @@ -465,6 +471,7 @@ where perf_context: engine.get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply), yield_duration: cfg.apply_yield_duration.0, delete_ssts: vec![], + pending_delete_ssts: vec![], store_id, pending_create_peers, priority, @@ -1244,7 +1251,6 @@ where .applied_batch .push(cmd_cb, cmd, &self.observe_info, self.region_id()); if should_write { - debug!("persist data and apply state"; "region_id" => self.region_id(), "peer_id" => self.id(), "state" => ?self.apply_state); apply_ctx.commit(self); } exec_result @@ -1323,6 +1329,22 @@ where self.applied_term = term; let cmd = Cmd::new(index, term, req.clone(), resp.clone()); + let (modified_region, mut pending_handle_ssts) = match exec_result { + ApplyResult::Res(ref e) => match e { + ExecResult::SplitRegion { ref derived, .. } => (Some(derived.clone()), None), + ExecResult::PrepareMerge { ref region, .. } => (Some(region.clone()), None), + ExecResult::CommitMerge { ref region, .. } => (Some(region.clone()), None), + ExecResult::RollbackMerge { ref region, .. } => (Some(region.clone()), None), + ExecResult::IngestSst { ref ssts } => (None, Some(ssts.clone())), + _ => (None, None), + }, + _ => (None, None), + }; + let mut apply_ctx_info = ApplyCtxInfo { + pending_handle_ssts: &mut pending_handle_ssts, + delete_ssts: &mut ctx.delete_ssts, + pending_delete_ssts: &mut ctx.pending_delete_ssts, + }; let should_write = ctx.host.post_exec( &self.region, &cmd, @@ -1330,18 +1352,25 @@ where &RegionState { peer_id: self.id(), pending_remove: self.pending_remove, - modified_region: match exec_result { - ApplyResult::Res(ref e) => match e { - ExecResult::SplitRegion { ref derived, .. } => Some(derived.clone()), - ExecResult::PrepareMerge { ref region, .. } => Some(region.clone()), - ExecResult::CommitMerge { ref region, .. } => Some(region.clone()), - ExecResult::RollbackMerge { ref region, .. } => Some(region.clone()), - _ => None, - }, - _ => None, - }, + modified_region, }, + &mut apply_ctx_info, ); + match pending_handle_ssts { + None => (), + Some(mut v) => { + if !v.is_empty() { + // All elements in `pending_handle_ssts` should be moved into either + // `delete_ssts` or `pending_delete_ssts`, once handled by by any of the + // `post_exec` observers. So a non-empty + // `pending_handle_ssts` here indicates no `post_exec` handled. + ctx.delete_ssts.append(&mut v); + } + RAFT_APPLYING_SST_GAUGE + .with_label_values(&["pending_delete"]) + .set(ctx.pending_delete_ssts.len() as i64); + } + } if let ApplyResult::Res(ref exec_result) = exec_result { match *exec_result { @@ -1564,7 +1593,6 @@ where }; dont_delete_ingested_sst_fp(); } - ctx.delete_ssts.append(&mut ssts.clone()); ApplyResult::Res(ExecResult::IngestSst { ssts }) } else { ApplyResult::None @@ -4967,6 +4995,10 @@ mod tests { cmd_sink: Option>>>, filter_compact_log: Arc, filter_consistency_check: Arc, + delay_remove_ssts: Arc, + last_delete_sst_count: Arc, + last_pending_delete_sst_count: Arc, + last_pending_handle_sst_count: Arc, } impl Coprocessor for ApplyObserver {} @@ -4979,6 +5011,43 @@ mod tests { fn post_apply_query(&self, _: &mut ObserverContext<'_>, _: &Cmd) { self.post_query_count.fetch_add(1, Ordering::SeqCst); } + + fn post_exec_query( + &self, + _: &mut ObserverContext<'_>, + _: &Cmd, + _: &RaftApplyState, + _: &RegionState, + apply_info: &mut ApplyCtxInfo<'_>, + ) -> bool { + match apply_info.pending_handle_ssts { + Some(v) => { + // If it is a ingest sst + let mut ssts = std::mem::take(v); + assert_ne!(ssts.len(), 0); + if self.delay_remove_ssts.load(Ordering::SeqCst) { + apply_info.pending_delete_ssts.append(&mut ssts); + } else { + apply_info.delete_ssts.append(&mut ssts); + } + } + None => (), + } + self.last_delete_sst_count + .store(apply_info.delete_ssts.len() as u64, Ordering::SeqCst); + self.last_pending_delete_sst_count.store( + apply_info.pending_delete_ssts.len() as u64, + Ordering::SeqCst, + ); + self.last_pending_handle_sst_count.store( + match apply_info.pending_handle_ssts { + Some(ref v) => v.len() as u64, + None => 0, + }, + Ordering::SeqCst, + ); + false + } } impl AdminObserver for ApplyObserver { @@ -4988,6 +5057,7 @@ mod tests { cmd: &Cmd, _: &RaftApplyState, region_state: &RegionState, + _: &mut ApplyCtxInfo<'_>, ) -> bool { let request = cmd.request.get_admin_request(); match request.get_cmd_type() { @@ -5664,11 +5734,13 @@ mod tests { #[test] fn test_exec_observer() { let (_path, engine) = create_tmp_engine("test-exec-observer"); - let (_import_dir, importer) = create_tmp_importer("test-exec-observer"); + let (import_dir, importer) = create_tmp_importer("test-exec-observer"); let mut host = CoprocessorHost::::default(); let obs = ApplyObserver::default(); host.registry .register_admin_observer(1, BoxAdminObserver::new(obs.clone())); + host.registry + .register_query_observer(1, BoxQueryObserver::new(obs.clone())); let (tx, rx) = mpsc::channel(); let (region_scheduler, _) = dummy_scheduler(); @@ -5682,7 +5754,7 @@ mod tests { sender, region_scheduler, coprocessor_host: host, - importer, + importer: importer.clone(), engine: engine.clone(), router: router.clone(), store_id: 1, @@ -5783,7 +5855,7 @@ mod tests { let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.apply_state.get_applied_index(), index_id); assert_eq!(apply_res.applied_term, 1); - let (_, r8) = if let ExecResult::SplitRegion { + let (r1, r8) = if let ExecResult::SplitRegion { regions, derived: _, new_split_regions: _, @@ -5814,6 +5886,48 @@ mod tests { .unwrap_or_default(); assert_eq!(apply_res.apply_state, state); + // Phase 3: we test if we can delay deletion of some sst files. + let r1_epoch = r1.get_region_epoch(); + index_id += 1; + let kvs: Vec<(&[u8], &[u8])> = vec![(b"k3", b"2")]; + let sst_path = import_dir.path().join("test.sst"); + let (mut meta, data) = gen_sst_file_with_kvs(&sst_path, &kvs); + meta.set_region_id(1); + meta.set_region_epoch(r1_epoch.clone()); + let mut file = importer.create(&meta).unwrap(); + file.append(&data).unwrap(); + file.finish().unwrap(); + let src = sst_path.clone(); + let dst = file.get_import_path().save.to_str().unwrap(); + std::fs::copy(src, dst).unwrap(); + assert!(sst_path.as_path().exists()); + let ingestsst = EntryBuilder::new(index_id, 1) + .ingest_sst(&meta) + .epoch(r1_epoch.get_conf_ver(), r1_epoch.get_version()) + .build(); + + obs.delay_remove_ssts.store(true, Ordering::SeqCst); + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![ingestsst], vec![]))); + fetch_apply_res(&rx); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.exec_res.len(), 1); + assert_eq!(obs.last_pending_handle_sst_count.load(Ordering::SeqCst), 0); + assert_eq!(obs.last_delete_sst_count.load(Ordering::SeqCst), 0); + assert_eq!(obs.last_pending_delete_sst_count.load(Ordering::SeqCst), 1); + + index_id += 1; + let ingestsst = EntryBuilder::new(index_id, 1) + .ingest_sst(&meta) + .epoch(r1_epoch.get_conf_ver(), r1_epoch.get_version()) + .build(); + obs.delay_remove_ssts.store(false, Ordering::SeqCst); + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![ingestsst], vec![]))); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.exec_res.len(), 1); + assert_eq!(obs.last_pending_handle_sst_count.load(Ordering::SeqCst), 0); + assert_eq!(obs.last_delete_sst_count.load(Ordering::SeqCst), 1); + assert_eq!(obs.last_pending_delete_sst_count.load(Ordering::SeqCst), 1); + system.shutdown(); } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 9691d5be0db..587b9ad3af7 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -777,4 +777,10 @@ lazy_static! { .unwrap(); pub static ref RAFT_LOG_GC_SKIPPED: RaftLogGcSkippedVec = auto_flush_from!(RAFT_LOG_GC_SKIPPED_VEC, RaftLogGcSkippedVec); + + pub static ref RAFT_APPLYING_SST_GAUGE: IntGaugeVec = register_int_gauge_vec!( + "tikv_raft_applying_sst", + "Sum of applying sst.", + &["type"] + ).unwrap(); } diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index f5292b70075..c4a0498a9a6 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -183,6 +183,10 @@ impl ImportFile { } Ok(()) } + + pub fn get_import_path(&self) -> &ImportPath { + &self.path + } } impl fmt::Debug for ImportFile { From 117805f8ebf9eede138ce7a7345445cac251b45d Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 16 Aug 2022 16:04:52 +0800 Subject: [PATCH 0157/1149] server: make tablet cache a local variable (#13273) ref tikv/tikv#13214 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/server/src/server.rs | 123 ++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 15 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index e925a663943..f61d981a912 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -45,7 +45,7 @@ use engine_rocks::{ use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, RaftEngine, - TabletAccessor, TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, + TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use file_system::{ @@ -98,8 +98,8 @@ use tikv::{ service::{DebugService, DiagnosticsService}, status_server::StatusServer, ttl::TtlChecker, - KvEngineFactory, KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, - DEFAULT_CLUSTER_ID, GRPC_THREAD_PREFIX, + KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, DEFAULT_CLUSTER_ID, + GRPC_THREAD_PREFIX, }, storage::{ self, @@ -1280,12 +1280,19 @@ where ); let mut io_metrics = IoMetricsManager::new(fetcher); let engines_info_clone = engines_info.clone(); + + // region_id -> (suffix, tablet) + // `update` of EnginesResourceInfo is called perodically which needs this map + // for recording the latest tablet for each region. + // `cached_latest_tablets` is passed to `update` to avoid memory + // allocation each time when calling `update`. + let mut cached_latest_tablets: HashMap = HashMap::new(); self.background_worker .spawn_interval_task(DEFAULT_METRICS_FLUSH_INTERVAL, move || { let now = Instant::now(); engine_metrics.flush(now); io_metrics.flush(now); - engines_info_clone.update(now); + engines_info_clone.update(now, &mut cached_latest_tablets); }); if let Some(limiter) = get_io_rate_limiter() { limiter.set_low_priority_io_adjustor_if_needed(Some(engines_info)); @@ -1844,13 +1851,8 @@ impl EngineMetricsManager { } pub struct EnginesResourceInfo { - tablet_factory: Arc, + tablet_factory: Arc + Sync + Send>, raft_engine: Option, - // region_id -> (suffix, tablet) - // `update` is called perodically which needs this map for recording the latest tablet for each - // region and cached_latest_tablets is used to avoid memory allocation each time when - // calling `update`. - cached_latest_tablets: Arc>>, latest_normalized_pending_bytes: AtomicU32, normalized_pending_bytes_collector: MovingAvgU32, } @@ -1859,20 +1861,23 @@ impl EnginesResourceInfo { const SCALE_FACTOR: u64 = 100; fn new( - tablet_factory: Arc, + tablet_factory: Arc + Sync + Send>, raft_engine: Option, max_samples_to_preserve: usize, ) -> Self { EnginesResourceInfo { tablet_factory, raft_engine, - cached_latest_tablets: Arc::default(), latest_normalized_pending_bytes: AtomicU32::new(0), normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), } } - pub fn update(&self, _now: Instant) { + pub fn update( + &self, + _now: Instant, + cached_latest_tablets: &mut HashMap, + ) { let mut normalized_pending_bytes = 0; fn fetch_engine_cf(engine: &RocksEngine, cf: &str, normalized_pending_bytes: &mut u32) { @@ -1894,8 +1899,6 @@ impl EnginesResourceInfo { fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); } - let mut cached_latest_tablets = self.cached_latest_tablets.as_ref().lock().unwrap(); - self.tablet_factory .for_each_opened_tablet( &mut |id, suffix, db: &RocksEngine| match cached_latest_tablets.entry(id) { @@ -1955,3 +1958,93 @@ impl IoBudgetAdjustor for EnginesResourceInfo { (total_budgets as f32 * score) as usize } } + +#[cfg(test)] +mod test { + use std::{ + collections::HashMap, + sync::{atomic::Ordering, Arc}, + }; + + use engine_rocks::{raw::Env, RocksEngine}; + use engine_traits::{ + FlowControlFactorsExt, MiscExt, OpenOptions, SyncMutable, TabletFactory, CF_DEFAULT, + }; + use tempfile::Builder; + use tikv::{config::TikvConfig, server::KvEngineFactoryBuilder}; + use tikv_util::{config::ReadableSize, time::Instant}; + + use super::EnginesResourceInfo; + + #[test] + fn test_engines_resource_info_update() { + let mut config = TikvConfig::default(); + config.rocksdb.defaultcf.disable_auto_compactions = true; + config.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); + config.rocksdb.writecf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); + config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); + let env = Arc::new(Env::default()); + let path = Builder::new().prefix("test-update").tempdir().unwrap(); + + let builder = KvEngineFactoryBuilder::new(env, &config, path.path()); + let factory = builder.build_v2(); + + for i in 1..6 { + let _ = factory + .open_tablet(i, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); + } + + let tablet = factory + .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) + .unwrap(); + // Prepare some data for two tablets of the same region. So we can test whether + // we fetch the bytes from the latest one. + for i in 1..21 { + tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + if i % 2 == 0 { + tablet.flush_cf(CF_DEFAULT, true).unwrap(); + } + } + let old_pending_compaction_bytes = tablet + .get_cf_pending_compaction_bytes(CF_DEFAULT) + .unwrap() + .unwrap(); + + let tablet = factory + .open_tablet(1, Some(20), OpenOptions::default().set_create_new(true)) + .unwrap(); + + for i in 1..11 { + tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + if i % 2 == 0 { + tablet.flush_cf(CF_DEFAULT, true).unwrap(); + } + } + let new_pending_compaction_bytes = tablet + .get_cf_pending_compaction_bytes(CF_DEFAULT) + .unwrap() + .unwrap(); + + assert!(old_pending_compaction_bytes > new_pending_compaction_bytes); + + let engines_info = Arc::new(EnginesResourceInfo::new(Arc::new(factory), None, 10)); + + let mut cached_latest_tablets: HashMap = HashMap::new(); + engines_info.update(Instant::now(), &mut cached_latest_tablets); + + // The memory allocation should be reserved + assert!(cached_latest_tablets.capacity() >= 5); + // The tablet cache should be cleared + assert!(cached_latest_tablets.is_empty()); + + // The latest_normalized_pending_bytes should be equal to the pending compaction + // bytes of tablet_1_20 + assert_eq!( + (new_pending_compaction_bytes * 100) as u32, + engines_info + .latest_normalized_pending_bytes + .load(Ordering::Relaxed) + ); + } +} From 73c5d13455e0f07e5b38dd5de2d346afa2f1ddef Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 16 Aug 2022 19:32:50 -0700 Subject: [PATCH 0158/1149] raftstore-v2: add ready processing (#13227) ref tikv/tikv#12842 This PR adds the basic ready processing for v2. Note test case can't be run for now as there is still API missing from raft engine. Compared to v1, v2 always use async raft. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/engine_test/src/lib.rs | 14 +- components/raft_log_engine/src/engine.rs | 25 +- components/raftstore-v2/Cargo.toml | 5 + components/raftstore-v2/src/batch/mod.rs | 2 +- components/raftstore-v2/src/batch/store.rs | 146 ++++++++- components/raftstore-v2/src/fsm/peer.rs | 126 +++++++- components/raftstore-v2/src/fsm/store.rs | 10 +- components/raftstore-v2/src/lib.rs | 15 +- components/raftstore-v2/src/operation/mod.rs | 3 + .../raftstore-v2/src/operation/read/local.rs | 6 +- .../src/operation/ready/async_writer.rs | 199 ++++++++++++ .../raftstore-v2/src/operation/ready/mod.rs | 304 ++++++++++++++++++ components/raftstore-v2/src/raft/apply.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 124 ++++++- components/raftstore-v2/src/raft/storage.rs | 144 ++++++--- components/raftstore-v2/src/router/imp.rs | 12 + components/raftstore-v2/src/router/message.rs | 5 +- components/raftstore-v2/src/router/mod.rs | 1 + .../raftstore-v2/tests/integrations/mod.rs | 174 ++++++++++ .../tests/integrations/test_election.rs | 10 + .../raftstore/src/store/async_io/write.rs | 5 +- .../src/store/async_io/write_router.rs | 72 +++-- .../src/store/async_io/write_tests.rs | 4 +- components/raftstore/src/store/fsm/peer.rs | 4 +- components/raftstore/src/store/fsm/store.rs | 23 +- components/raftstore/src/store/mod.rs | 12 +- components/raftstore/src/store/msg.rs | 11 +- .../raftstore/src/store/peer_storage.rs | 49 +-- components/raftstore/src/store/transport.rs | 9 + components/raftstore/src/store/worker/mod.rs | 4 +- .../src/store/worker/raftlog_fetch.rs | 42 +-- 32 files changed, 1346 insertions(+), 217 deletions(-) create mode 100644 components/raftstore-v2/src/operation/ready/async_writer.rs create mode 100644 components/raftstore-v2/src/operation/ready/mod.rs create mode 100644 components/raftstore-v2/src/router/imp.rs create mode 100644 components/raftstore-v2/tests/integrations/mod.rs create mode 100644 components/raftstore-v2/tests/integrations/test_election.rs diff --git a/Cargo.lock b/Cargo.lock index 1efe0607541..93ac7ddd600 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4201,6 +4201,7 @@ dependencies = [ "kvproto", "log_wrappers", "pd_client", + "protobuf", "raft", "raft-proto", "raftstore", diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 18d89b1c2fb..bc723dbb76a 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -113,7 +113,7 @@ pub mod kv { #[derive(Clone)] pub struct TestTabletFactory { - root_path: String, + root_path: PathBuf, db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>, root_db: Arc>>, @@ -121,12 +121,12 @@ pub mod kv { impl TestTabletFactory { pub fn new( - root_path: &str, + root_path: &Path, db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>, ) -> Self { Self { - root_path: root_path.to_string(), + root_path: root_path.to_path_buf(), db_opt, cf_opts, root_db: Arc::new(Mutex::default()), @@ -230,7 +230,7 @@ pub mod kv { impl TestTabletFactoryV2 { pub fn new( - root_path: &str, + root_path: &Path, db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>, ) -> Self { @@ -342,12 +342,14 @@ pub mod kv { #[inline] fn tablets_path(&self) -> PathBuf { - Path::new(&self.inner.root_path).join("tablets") + self.inner.root_path.join("tablets") } #[inline] fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { - Path::new(&self.inner.root_path).join(format!("tablets/{}_{}", id, suffix)) + self.inner + .root_path + .join(format!("tablets/{}_{}", id, suffix)) } #[inline] diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index dd7c222845c..8991a6f6838 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -28,8 +28,8 @@ use tikv_util::Either; use crate::perf_context::RaftEnginePerfContext; -// A special region ID representing global state. -const STORE_REGION_ID: u64 = 0; +// A special region ID representing store state. +const STORE_STATE_ID: u64 = 0; #[derive(Clone)] pub struct MessageExtTyped; @@ -377,14 +377,14 @@ impl RaftLogBatchTrait for RaftLogBatch { fn put_store_ident(&mut self, ident: &StoreIdent) -> Result<()> { self.0 - .put_message(STORE_REGION_ID, STORE_IDENT_KEY.to_vec(), ident) + .put_message(STORE_STATE_ID, STORE_IDENT_KEY.to_vec(), ident) .map_err(transfer_error) } fn put_prepare_bootstrap_region(&mut self, region: &Region) -> Result<()> { self.0 .put_message( - STORE_REGION_ID, + STORE_STATE_ID, PREPARE_BOOTSTRAP_REGION_KEY.to_vec(), region, ) @@ -393,7 +393,7 @@ impl RaftLogBatchTrait for RaftLogBatch { fn remove_prepare_bootstrap_region(&mut self) -> Result<()> { self.0 - .delete(STORE_REGION_ID, PREPARE_BOOTSTRAP_REGION_KEY.to_vec()); + .delete(STORE_STATE_ID, PREPARE_BOOTSTRAP_REGION_KEY.to_vec()); Ok(()) } @@ -451,13 +451,13 @@ impl RaftEngineReadOnly for RaftLogEngine { fn get_store_ident(&self) -> Result> { self.0 - .get_message(STORE_REGION_ID, STORE_IDENT_KEY) + .get_message(STORE_STATE_ID, STORE_IDENT_KEY) .map_err(transfer_error) } fn get_prepare_bootstrap_region(&self) -> Result> { self.0 - .get_message(STORE_REGION_ID, PREPARE_BOOTSTRAP_REGION_KEY) + .get_message(STORE_STATE_ID, PREPARE_BOOTSTRAP_REGION_KEY) .map_err(transfer_error) } @@ -541,7 +541,7 @@ impl RaftEngine for RaftLogEngine { let mut batch = Self::LogBatch::default(); batch .0 - .put_message(STORE_REGION_ID, STORE_IDENT_KEY.to_vec(), ident) + .put_message(STORE_STATE_ID, STORE_IDENT_KEY.to_vec(), ident) .map_err(transfer_error)?; self.0.write(&mut batch.0, true).map_err(transfer_error)?; Ok(()) @@ -605,12 +605,17 @@ impl RaftEngine for RaftLogEngine { Ok(self.0.get_used_size() as u64) } - fn for_each_raft_group(&self, _f: &mut F) -> std::result::Result<(), E> + fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> where F: FnMut(u64) -> std::result::Result<(), E>, E: From, { - unimplemented!() + for id in self.0.raft_groups() { + if id != STORE_STATE_ID { + f(id)?; + } + } + Ok(()) } } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 8551864a444..29e68517441 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -40,6 +40,7 @@ keys = { path = "../keys", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } log_wrappers = { path = "../log_wrappers" } pd_client = { path = "../pd_client" } +protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0" } raftstore = { path = "../raftstore" } @@ -60,3 +61,7 @@ test_util = { path = "../test_util" } name = "raftstore-v2-failpoints" path = "tests/failpoints/mod.rs" required-features = ["failpoints"] + +[[test]] +name = "raftstore-v2-integrations" +path = "tests/integrations/mod.rs" diff --git a/components/raftstore-v2/src/batch/mod.rs b/components/raftstore-v2/src/batch/mod.rs index 0f4b9fba3d3..7e00932d1e1 100644 --- a/components/raftstore-v2/src/batch/mod.rs +++ b/components/raftstore-v2/src/batch/mod.rs @@ -9,4 +9,4 @@ mod apply; mod store; pub(crate) use apply::ApplyContext; -pub use store::{create_store_batch_system, StoreContext, StoreSystem}; +pub use store::{create_store_batch_system, StoreContext, StoreRouter, StoreSystem}; diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 9c1f60ba947..739240f84e0 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -1,15 +1,25 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{mem, ops::DerefMut, sync::Arc, time::Duration}; +use std::{ + mem, + ops::{Deref, DerefMut}, + sync::{atomic::AtomicUsize, Arc}, + time::Duration, +}; use batch_system::{ BasicMailbox, BatchRouter, BatchSystem, HandleResult, HandlerBuilder, PollHandler, }; use collections::HashMap; +use crossbeam::channel::Sender; use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; use futures_util::{compat::Future01CompatExt, FutureExt}; use kvproto::{metapb::Store, raft_serverpb::PeerState}; -use raftstore::store::{fsm::store::PeerTickBatch, Config, Transport}; +use raft::INVALID_ID; +use raftstore::store::{ + fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, RaftlogFetchRunner, + RaftlogFetchTask, StoreWriters, Transport, WriteMsg, WriteSenders, +}; use slog::Logger; use tikv_util::{ box_err, @@ -17,6 +27,7 @@ use tikv_util::{ future::poll_future_notify, time::Instant as TiInstant, timer::SteadyTimer, + worker::{Scheduler, Worker}, }; use super::apply::{create_apply_batch_system, ApplyPollerBuilder, ApplyRouter, ApplySystem}; @@ -27,28 +38,42 @@ use crate::{ }; /// A per-thread context shared by the [`StoreFsm`] and multiple [`PeerFsm`]s. -pub struct StoreContext { - /// A logger without any KV. It's clean for creating new PeerFsm. +pub struct StoreContext { + /// A logger without any KV. It's clean for creating new PeerFSM. pub logger: Logger, /// The transport for sending messages to peers on other stores. pub trans: T, + pub has_ready: bool, + pub raft_metrics: RaftMetrics, /// The latest configuration. pub cfg: Config, + pub router: StoreRouter, /// The tick batch for delay ticking. It will be flushed at the end of every /// round. pub tick_batch: Vec, /// The precise timer for scheduling tick. pub timer: SteadyTimer, + pub write_senders: WriteSenders, } -impl StoreContext { - fn new(cfg: Config, trans: T, logger: Logger) -> Self { +impl StoreContext { + fn new( + cfg: Config, + trans: T, + router: StoreRouter, + write_senders: WriteSenders, + logger: Logger, + ) -> Self { Self { logger, trans, + has_ready: false, + raft_metrics: RaftMetrics::new(cfg.waterfall_metrics), cfg, + router, tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], timer: SteadyTimer::default(), + write_senders, } } } @@ -59,8 +84,8 @@ impl StoreContext { /// /// - Keeping the local [`StoreContext`] up-to-date. /// - Receiving and sending messages in and out of these FSMs. -struct StorePoller { - poll_ctx: StoreContext, +struct StorePoller { + poll_ctx: StoreContext, cfg_tracker: Tracker, /// Buffers to hold in-coming messages. store_msg_buf: Vec, @@ -71,8 +96,8 @@ struct StorePoller { need_flush_events: bool, } -impl StorePoller { - pub fn new(poll_ctx: StoreContext, cfg_tracker: Tracker) -> Self { +impl StorePoller { + pub fn new(poll_ctx: StoreContext, cfg_tracker: Tracker) -> Self { Self { poll_ctx, cfg_tracker, @@ -108,7 +133,7 @@ impl StorePoller { } impl PollHandler, StoreFsm> - for StorePoller + for StorePoller { fn begin(&mut self, _batch_size: usize, update_cfg: F) where @@ -147,7 +172,11 @@ impl PollHandler { engine: ER, tablet_factory: Arc>, trans: T, + router: StoreRouter, + log_fetch_scheduler: Scheduler, + write_senders: WriteSenders, logger: Logger, } @@ -197,6 +229,9 @@ impl StorePollerBuilder { engine: ER, tablet_factory: Arc>, trans: T, + router: StoreRouter, + log_fetch_scheduler: Scheduler, + store_writers: &mut StoreWriters, logger: Logger, ) -> Self { StorePollerBuilder { @@ -205,7 +240,10 @@ impl StorePollerBuilder { engine, tablet_factory, trans, + router, + log_fetch_scheduler, logger, + write_senders: store_writers.senders(), } } @@ -215,12 +253,14 @@ impl StorePollerBuilder { let cfg = self.cfg.value(); self.engine .for_each_raft_group::(&mut |region_id| { + assert_ne!(region_id, INVALID_ID); let peer = match Peer::new( &cfg, region_id, self.store_id, self.tablet_factory.as_ref(), self.engine.clone(), + self.log_fetch_scheduler.clone(), &self.logger, )? { Some(peer) => peer, @@ -254,12 +294,14 @@ where EK: KvEngine, T: Transport + 'static, { - type Handler = StorePoller; + type Handler = StorePoller; fn build(&mut self, priority: batch_system::Priority) -> Self::Handler { let poll_ctx = StoreContext::new( self.cfg.value().clone(), self.trans.clone(), + self.router.clone(), + self.write_senders.clone(), self.logger.clone(), ); let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); @@ -267,11 +309,29 @@ where } } +/// A set of background threads that will processing offloaded work from +/// raftstore. +struct Workers { + /// Worker for fetching raft logs asynchronously + log_fetch_worker: Worker, + store_writers: StoreWriters, +} + +impl Default for Workers { + fn default() -> Self { + Self { + log_fetch_worker: Worker::new("raftlog-fetch-worker"), + store_writers: StoreWriters::default(), + } + } +} + /// The system used for polling Raft activities. pub struct StoreSystem { system: BatchSystem, StoreFsm>, apply_router: ApplyRouter, apply_system: ApplySystem, + workers: Option>, logger: Logger, } @@ -288,14 +348,32 @@ impl StoreSystem { where T: Transport + 'static, { + let mut workers = Workers::default(); + workers.store_writers.spawn( + store.get_id(), + raft_engine.clone(), + None, + router, + &trans, + &cfg, + )?; + let log_fetch_scheduler = workers.log_fetch_worker.start( + "raftlog-fetch-worker", + RaftlogFetchRunner::new(router.clone(), raft_engine.clone()), + ); + let mut builder = StorePollerBuilder::new( cfg.clone(), store.get_id(), raft_engine, tablet_factory, trans, + router.clone(), + log_fetch_scheduler, + &mut workers.store_writers, self.logger.clone(), ); + self.workers = Some(workers); let peers = builder.init()?; self.apply_system .schedule_all(peers.values().map(|pair| pair.1.peer())); @@ -328,12 +406,47 @@ impl StoreSystem { } pub fn shutdown(&mut self) { + if self.workers.is_none() { + return; + } + let mut workers = self.workers.take().unwrap(); + self.apply_system.shutdown(); self.system.shutdown(); + + workers.store_writers.shutdown(); + workers.log_fetch_worker.stop(); + } +} + +#[derive(Clone)] +pub struct StoreRouter { + router: BatchRouter, StoreFsm>, + logger: Logger, +} + +impl StoreRouter { + #[inline] + pub fn logger(&self) -> &Logger { + &self.logger } } -pub type StoreRouter = BatchRouter, StoreFsm>; +impl Deref for StoreRouter { + type Target = BatchRouter, StoreFsm>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.router + } +} + +impl DerefMut for StoreRouter { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.router + } +} /// Creates the batch system for polling raft activities. pub fn create_store_batch_system( @@ -353,7 +466,8 @@ where system, apply_router, apply_system, - logger, + workers: None, + logger: logger.clone(), }; - (router, system) + (StoreRouter { router, logger }, system) } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index a8fb67aa121..696a1e5ddf4 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -1,37 +1,43 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +//! This module contains the peer implementation for batch system. + use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine}; use kvproto::metapb; -use raftstore::store::Config; -use slog::{info, Logger}; -use tikv_util::mpsc::{self, LooseBoundedSender, Receiver, Sender}; +use raftstore::store::{Config, Transport}; +use slog::{debug, error, info, trace, Logger}; +use tikv_util::{ + is_zero_duration, + mpsc::{self, LooseBoundedSender, Receiver, Sender}, +}; -use crate::{batch::StoreContext, raft::Peer, PeerMsg, Result}; +use crate::{batch::StoreContext, raft::Peer, PeerMsg, PeerTick, Result}; pub type SenderFsmPair = (LooseBoundedSender>, Box>); pub struct PeerFsm { peer: Peer, - logger: Logger, mailbox: Option>>, receiver: Receiver>, + /// A registry for all scheduled ticks. This can avoid scheduling ticks + /// twice accidentally. + tick_registry: u16, is_stopped: bool, } impl PeerFsm { pub fn new(cfg: &Config, peer: Peer) -> Result> { - let logger = peer.logger().clone(); - info!(logger, "create peer"); + info!(peer.logger, "create peer"); let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); let fsm = Box::new(PeerFsm { - logger, peer, mailbox: None, receiver: rx, + tick_registry: 0, is_stopped: false, }); Ok((tx, fsm)) @@ -42,9 +48,14 @@ impl PeerFsm { &self.peer } + #[inline] + pub fn peer_mut(&mut self) -> &mut Peer { + &mut self.peer + } + #[inline] pub fn logger(&self) -> &Logger { - self.peer.logger() + &self.peer.logger } /// Fetches messages to `peer_msg_buf`. It will stop when the buffer @@ -95,18 +106,103 @@ impl Fsm for PeerFsm { } pub struct PeerFsmDelegate<'a, EK: KvEngine, ER: RaftEngine, T> { - fsm: &'a mut PeerFsm, - store_ctx: &'a mut StoreContext, + pub fsm: &'a mut PeerFsm, + pub store_ctx: &'a mut StoreContext, } -impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { - pub fn new(fsm: &'a mut PeerFsm, store_ctx: &'a mut StoreContext) -> Self { +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { + pub fn new(fsm: &'a mut PeerFsm, store_ctx: &'a mut StoreContext) -> Self { Self { fsm, store_ctx } } - pub fn handle_msgs(&self, peer_msgs_buf: &mut Vec>) { + pub fn schedule_tick(&mut self, tick: PeerTick) { + assert!(PeerTick::VARIANT_COUNT <= u16::BITS as usize); + let idx = tick as usize; + let key = 1u16 << (idx as u16); + if self.fsm.tick_registry & key != 0 { + return; + } + if is_zero_duration(&self.store_ctx.tick_batch[idx].wait_duration) { + return; + } + trace!( + self.fsm.logger(), + "schedule tick"; + "tick" => ?tick, + "timeout" => ?self.store_ctx.tick_batch[idx].wait_duration, + ); + + let region_id = self.fsm.peer.region_id(); + let mb = match self.store_ctx.router.mailbox(region_id) { + Some(mb) => mb, + None => { + error!( + self.fsm.logger(), + "failed to get mailbox"; + "tick" => ?tick, + ); + return; + } + }; + self.fsm.tick_registry |= key; + let logger = self.fsm.logger().clone(); + // TODO: perhaps following allocation can be removed. + let cb = Box::new(move || { + // This can happen only when the peer is about to be destroyed + // or the node is shutting down. So it's OK to not to clean up + // registry. + if let Err(e) = mb.force_send(PeerMsg::Tick(tick)) { + debug!( + logger, + "failed to schedule peer tick"; + "tick" => ?tick, + "err" => %e, + ); + } + }); + self.store_ctx.tick_batch[idx].ticks.push(cb); + } + + fn on_start(&mut self) { + self.schedule_tick(PeerTick::Raft); + } + + fn on_tick(&mut self, tick: PeerTick) { + match tick { + PeerTick::Raft => self.on_raft_tick(), + PeerTick::RaftLogGc => unimplemented!(), + PeerTick::SplitRegionCheck => unimplemented!(), + PeerTick::PdHeartbeat => unimplemented!(), + PeerTick::CheckMerge => unimplemented!(), + PeerTick::CheckPeerStaleState => unimplemented!(), + PeerTick::EntryCacheEvict => unimplemented!(), + PeerTick::CheckLeaderLease => unimplemented!(), + PeerTick::ReactivateMemoryLock => unimplemented!(), + PeerTick::ReportBuckets => unimplemented!(), + PeerTick::CheckLongUncommitted => unimplemented!(), + } + } + + pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec>) { for msg in peer_msgs_buf.drain(..) { - // TODO: handle the messages. + match msg { + PeerMsg::RaftMessage(_) => unimplemented!(), + PeerMsg::RaftCommand(_) => unimplemented!(), + PeerMsg::Tick(tick) => self.on_tick(tick), + PeerMsg::ApplyRes { res } => unimplemented!(), + PeerMsg::Start => self.on_start(), + PeerMsg::Noop => unimplemented!(), + PeerMsg::Persisted { + peer_id, + ready_number, + } => self + .fsm + .peer_mut() + .on_persisted(self.store_ctx, peer_id, ready_number), + PeerMsg::FetchedLogs(fetched_logs) => { + self.fsm.peer_mut().on_fetched_logs(fetched_logs) + } + } } } } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 886478a3036..d80cd90d80b 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -3,7 +3,7 @@ use batch_system::Fsm; use collections::HashMap; use crossbeam::channel::TryRecvError; -use engine_traits::KvEngine; +use engine_traits::{KvEngine, RaftEngine}; use kvproto::metapb::Store; use raftstore::store::{Config, ReadDelegate}; use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; @@ -74,13 +74,13 @@ impl Fsm for StoreFsm { } } -pub struct StoreFsmDelegate<'a, T> { +pub struct StoreFsmDelegate<'a, EK: KvEngine, ER: RaftEngine, T> { fsm: &'a mut StoreFsm, - store_ctx: &'a mut StoreContext, + store_ctx: &'a mut StoreContext, } -impl<'a, T> StoreFsmDelegate<'a, T> { - pub fn new(fsm: &'a mut StoreFsm, store_ctx: &'a mut StoreContext) -> Self { +impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { + pub fn new(fsm: &'a mut StoreFsm, store_ctx: &'a mut StoreContext) -> Self { Self { fsm, store_ctx } } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 0739cd61cb7..43998160638 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -9,7 +9,20 @@ //! [`operation`] module. All state machines are expected to communicate with //! messages. They are defined in [`router`] module. +// You may get confused about the peer, or other structs like apply, in fsm and +// peer in raft module. The guideline is that if any field doesn't depend on +// the details of batch system, then it should be defined for peer in raft +// module. +// +// If we change to other concurrent programming solution, we can easily just +// change the peer in fsm. +// +// Any accessors should be defined in the file where the struct is defined. +// Functionalities like read, write, etc should be implemented in [`operation`] +// using a standalone modules. + #![allow(unused)] +#![feature(let_else)] mod batch; mod bootstrap; @@ -20,7 +33,7 @@ mod router; mod tablet; pub(crate) use batch::StoreContext; -pub use batch::{create_store_batch_system, StoreSystem}; +pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; pub use raftstore::{Error, Result}; pub use router::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 8c427378da3..583053dd551 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -1,3 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. mod read; +mod ready; + +pub use ready::AsyncWriter; diff --git a/components/raftstore-v2/src/operation/read/local.rs b/components/raftstore-v2/src/operation/read/local.rs index 2e694f11ebc..6601477f8c3 100644 --- a/components/raftstore-v2/src/operation/read/local.rs +++ b/components/raftstore-v2/src/operation/read/local.rs @@ -190,11 +190,7 @@ mod tests { .prefix("test-local-reader") .tempdir() .unwrap(); - let factory = Arc::new(TestTabletFactoryV2::new( - path.path().to_str().unwrap(), - ops, - cf_opts, - )); + let factory = Arc::new(TestTabletFactoryV2::new(path.path(), ops, cf_opts)); let store_meta = StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::::new()))); diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs new file mode 100644 index 00000000000..457df9307ba --- /dev/null +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -0,0 +1,199 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::VecDeque, + sync::{atomic::AtomicUsize, Arc}, +}; + +use crossbeam::channel::Sender; +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::raft_serverpb::RaftMessage; +use raftstore::store::{ + local_metrics::RaftMetrics, Config, PersistedNotifier, WriteMsg, WriteRouter, + WriteRouterContext, WriteSenders, WriteTask, +}; +use slog::{warn, Logger}; + +use crate::{ + batch::{StoreContext, StoreRouter}, + PeerMsg, +}; + +#[derive(Debug)] +struct UnpersistedReady { + /// Number of ready. + number: u64, + /// Max number of following ready whose data to be persisted is empty. + max_empty_number: u64, + raft_msgs: Vec>, +} + +/// A writer that handles asynchronous writes. +pub struct AsyncWriter { + write_router: WriteRouter, + unpersisted_readies: VecDeque, + persisted_number: u64, +} + +impl AsyncWriter { + pub fn new(region_id: u64, peer_id: u64) -> Self { + let write_router = WriteRouter::new(format!("[region {}] {}", region_id, peer_id)); + Self { + write_router, + unpersisted_readies: VecDeque::new(), + persisted_number: 0, + } + } + + /// Execute the task. + /// + /// If the task takes some time to finish, `None` is returned. Otherwise, + pub fn write( + &mut self, + ctx: &mut impl WriteRouterContext, + task: WriteTask, + ) -> Option> { + if task.has_data() { + self.send(ctx, task); + None + } else { + self.merge(task) + } + } + + pub fn known_largest_number(&self) -> u64 { + self.unpersisted_readies + .back() + .map(|r| r.number) + .unwrap_or(self.persisted_number) + } + + fn send(&mut self, ctx: &mut impl WriteRouterContext, task: WriteTask) { + let ready_number = task.ready_number(); + self.write_router.send_write_msg( + ctx, + self.unpersisted_readies.back().map(|r| r.number), + raftstore::store::WriteMsg::WriteTask(task), + ); + self.unpersisted_readies.push_back(UnpersistedReady { + number: ready_number, + max_empty_number: ready_number, + raft_msgs: vec![], + }); + } + + fn merge(&mut self, task: WriteTask) -> Option> { + let ready_number = task.ready_number(); + if self.unpersisted_readies.is_empty() { + // If this ready don't need to be persisted and there is no previous unpersisted + // ready, we can safely consider it is persisted so the persisted msgs can be + // sent immediately. + self.persisted_number = task.ready_number(); + return Some(task); + } + + // Attach to the last unpersisted ready so that it can be considered to be + // persisted with the last ready at the same time. + let last = self.unpersisted_readies.back_mut().unwrap(); + last.max_empty_number = task.ready_number(); + if !task.messages.is_empty() { + last.raft_msgs.push(task.messages); + } + None + } + + /// Called when an asynchronous write has finished. + pub fn on_persisted( + &mut self, + ctx: &mut impl WriteRouterContext, + ready_number: u64, + logger: &Logger, + ) -> Vec> { + if self.persisted_number >= ready_number { + return vec![]; + } + + let last_unpersisted = self.unpersisted_readies.back(); + if last_unpersisted.map_or(true, |u| u.number < ready_number) { + panic!( + "{:?} ready number is too large {:?} vs {}", + logger.list(), + last_unpersisted, + ready_number + ); + } + + let mut raft_messages = vec![]; + // There must be a match in `self.unpersisted_readies`. + loop { + let Some(v) = self.unpersisted_readies.pop_front() else { + panic!("{:?} ready number not found {}", logger.list(), ready_number); + }; + if v.number > ready_number { + panic!( + "{:?} ready number not matched {:?} vs {}", + logger.list(), + v, + ready_number + ); + } + if raft_messages.is_empty() { + raft_messages = v.raft_msgs; + } else { + raft_messages.extend(v.raft_msgs); + } + if v.number == ready_number { + self.persisted_number = v.max_empty_number; + break; + } + } + + self.write_router + .check_new_persisted(ctx, self.persisted_number); + + raft_messages + } + + pub fn persisted_number(&self) -> u64 { + self.persisted_number + } +} + +impl WriteRouterContext for StoreContext +where + EK: KvEngine, + ER: RaftEngine, +{ + fn write_senders(&self) -> &WriteSenders { + &self.write_senders + } + + fn config(&self) -> &Config { + &self.cfg + } + + fn raft_metrics(&self) -> &RaftMetrics { + &self.raft_metrics + } +} + +impl PersistedNotifier for StoreRouter { + fn notify(&self, region_id: u64, peer_id: u64, ready_number: u64) { + if let Err(e) = self.force_send( + region_id, + PeerMsg::Persisted { + peer_id, + ready_number, + }, + ) { + warn!( + self.logger(), + "failed to send noop to trigger persisted ready"; + "region_id" => region_id, + "peer_id" => peer_id, + "ready_number" => ready_number, + "error" => ?e, + ); + } + } +} diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs new file mode 100644 index 00000000000..668453e708b --- /dev/null +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -0,0 +1,304 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains the actions that will drive a raft state machine. +//! +//! # Raft Ready +//! +//! Every messages or ticks may have side affect. Handling all those side +//! affect immediately is not efficient. Instead, tikv uses `Ready` to batch up +//! all the side affects and handle them at once for throughput. +//! +//! As raft store is the critical path in the whole system, we avoid most +//! blocking IO. So a typical processing is divided into two steps: +//! +//! - Handle raft ready to process the side affect and send IO tasks to +//! background threads +//! - Receive IO tasks completion and update the raft state machine +//! +//! There two steps can be processed concurrently. + +mod async_writer; + +use engine_traits::{KvEngine, RaftEngine}; +use error_code::ErrorCodeExt; +use kvproto::raft_serverpb::RaftMessage; +use protobuf::Message as _; +use raft::{eraftpb, Ready}; +use raftstore::store::{FetchedLogs, Transport, WriteTask}; +use slog::{debug, error, trace, warn}; + +pub use self::async_writer::AsyncWriter; +use crate::{ + batch::StoreContext, + fsm::{PeerFsm, PeerFsmDelegate}, + raft::{Peer, Storage}, + PeerTick, +}; + +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { + /// Raft relies on periodic ticks to keep the state machine sync with other + /// peers. + pub fn on_raft_tick(&mut self) { + if self.fsm.peer_mut().tick() { + self.fsm.peer_mut().set_has_ready(); + } + self.schedule_tick(PeerTick::Raft); + } +} + +impl Peer { + #[inline] + fn tick(&mut self) -> bool { + self.raft_group_mut().tick() + } + + /// Callback for fetching logs asynchronously. + pub fn on_fetched_logs(&mut self, fetched_logs: FetchedLogs) { + let FetchedLogs { context, logs } = fetched_logs; + let low = logs.low; + if !self.is_leader() { + self.entry_storage_mut().clean_async_fetch_res(low); + return; + } + if self.term() != logs.term { + self.entry_storage_mut().clean_async_fetch_res(low); + } else { + self.entry_storage_mut() + .update_async_fetch_res(low, Some(logs)); + } + self.raft_group_mut().on_entries_fetched(context); + // clean the async fetch result immediately if not used to free memory + self.entry_storage_mut().update_async_fetch_res(low, None); + self.set_has_ready(); + } + + /// Partially filled a raft message that will be sent to other peer. + fn prepare_raft_message(&mut self) -> RaftMessage { + let mut raft_msg = RaftMessage::new(); + raft_msg.set_region_id(self.region().id); + raft_msg.set_from_peer(self.peer().clone()); + // set current epoch + let epoch = self.storage().region().get_region_epoch(); + let msg_epoch = raft_msg.mut_region_epoch(); + msg_epoch.set_version(epoch.get_version()); + msg_epoch.set_conf_ver(epoch.get_conf_ver()); + raft_msg + } + + /// Transform a message from raft lib to a message that can be sent to other + /// peers. + /// + /// If the recipient can't be found, `None` is returned. + #[inline] + fn build_raft_message( + &mut self, + ctx: &mut StoreContext, + msg: eraftpb::Message, + ) -> Option { + let to_peer = match self.get_peer_from_cache(msg.to) { + Some(p) => p, + None => { + warn!(self.logger, "failed to look up recipient peer"; "to_peer" => msg.to); + return None; + } + }; + + let mut raft_msg = self.prepare_raft_message(); + + raft_msg.set_to_peer(to_peer); + if msg.from != self.peer().id { + debug!( + self.logger, + "redirecting message"; + "msg_type" => ?msg.get_msg_type(), + "from" => msg.get_from(), + "to" => msg.get_to(), + ); + } + raft_msg.set_message(msg); + Some(raft_msg) + } + + /// Send a message. + /// + /// The message is pushed into the send buffer, it may not be sent out until + /// transport is flushed explicitly. + fn send_raft_message( + &mut self, + ctx: &mut StoreContext, + msg: RaftMessage, + ) { + let msg_type = msg.get_message().get_msg_type(); + let to_peer_id = msg.get_to_peer().get_id(); + let to_store_id = msg.get_to_peer().get_store_id(); + + trace!( + self.logger, + "send raft msg"; + "msg_type" => ?msg_type, + "msg_size" => msg.get_message().compute_size(), + "to" => to_peer_id, + ); + + match ctx.trans.send(msg) { + Ok(()) => ctx.raft_metrics.send_message.add(msg_type, true), + Err(e) => { + // We use metrics to observe failure on production. + debug!( + self.logger, + "failed to send msg to other peer"; + "target_peer_id" => to_peer_id, + "target_store_id" => to_store_id, + "err" => ?e, + "error_code" => %e.error_code(), + ); + // unreachable store + self.raft_group_mut().report_unreachable(to_peer_id); + ctx.raft_metrics.send_message.add(msg_type, false); + } + } + } + + fn handle_raft_committed_entries( + &self, + _ctx: &mut crate::batch::StoreContext, + _take_committed_entries: Vec, + ) { + unimplemented!() + } + + /// Processing the ready of raft. A detail description of how it's handled + /// can be found at https://docs.rs/raft/latest/raft/#processing-the-ready-state. + /// + /// It's should be called at the end of every round of processing. Any + /// writes will be handled asynchronously, and be notified once writes + /// are persisted. + #[inline] + pub fn handle_raft_ready(&mut self, ctx: &mut StoreContext) { + let has_ready = self.reset_has_ready(); + if !has_ready { + return; + } + ctx.has_ready = true; + + if !self.raft_group().has_ready() { + return; + } + + debug!(self.logger, "handle raft ready"); + + let mut ready = self.raft_group_mut().ready(); + // Update it after unstable entries pagination is introduced. + debug_assert!(ready.entries().last().map_or_else( + || true, + |entry| entry.index == self.raft_group().raft.raft_log.last_index() + )); + + if !ready.messages().is_empty() { + debug_assert!(self.is_leader()); + for msg in ready.take_messages() { + if let Some(msg) = self.build_raft_message(ctx, msg) { + self.send_raft_message(ctx, msg); + } + } + } + + if !ready.committed_entries().is_empty() { + self.handle_raft_committed_entries(ctx, ready.take_committed_entries()); + } + + let ready_number = ready.number(); + let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); + self.storage_mut() + .handle_raft_ready(&mut ready, &mut write_task); + if !ready.persisted_messages().is_empty() { + write_task.messages = ready + .take_persisted_messages() + .into_iter() + .flat_map(|m| self.build_raft_message(ctx, m)) + .collect(); + } + // Ready number should increase monotonically. + assert!(self.async_writer.known_largest_number() < ready.number()); + if let Some(task) = self.async_writer.write(ctx, write_task) { + // So the task doesn't need to be process asynchronously, directly advance. + let mut light_rd = self.raft_group_mut().advance_append(ready); + if !task.messages.is_empty() { + for m in task.messages { + self.send_raft_message(ctx, m); + } + } + if !light_rd.messages().is_empty() || light_rd.commit_index().is_some() { + panic!( + "{:?} unexpected messages [{}] commit index [{:?}]", + self.logger.list(), + light_rd.messages().len(), + light_rd.commit_index() + ); + } + if !light_rd.committed_entries().is_empty() { + self.handle_raft_committed_entries(ctx, light_rd.take_committed_entries()); + } + } else { + // The task will be written asynchronously. Once it's persisted, it will be + // notified by `on_persisted`. + self.raft_group_mut().advance_append_async(ready); + } + + ctx.raft_metrics.ready.has_ready_region += 1; + } + + /// Called when an asynchronously write finishes. + pub fn on_persisted( + &mut self, + ctx: &mut StoreContext, + peer_id: u64, + ready_number: u64, + ) { + if peer_id != self.peer_id() { + error!(self.logger, "peer id not matched"; "persisted_peer_id" => peer_id, "persisted_number" => ready_number); + return; + } + let persisted_message = self + .async_writer + .on_persisted(ctx, ready_number, &self.logger); + for msgs in persisted_message { + for msg in msgs { + self.send_raft_message(ctx, msg); + } + } + let persisted_number = self.async_writer.persisted_number(); + self.raft_group_mut().on_persist_ready(persisted_number); + let persisted_index = self.raft_group().raft.raft_log.persisted; + self.storage_mut() + .entry_storage_mut() + .update_cache_persisted(persisted_index); + // We may need to check if there is persisted committed logs. + self.set_has_ready(); + } +} + +impl Storage { + /// Apply the ready to the storage. If there is any states need to be + /// persisted, it will be written to `write_task`. + fn handle_raft_ready( + &mut self, + ready: &mut Ready, + write_task: &mut WriteTask, + ) { + let prev_raft_state = self.entry_storage().raft_state().clone(); + + // TODO: handle snapshot + + let entry_storage = self.entry_storage_mut(); + if !ready.entries().is_empty() { + entry_storage.append(ready.take_entries(), write_task); + } + if let Some(hs) = ready.hs() { + entry_storage.raft_state_mut().set_hard_state(hs.clone()); + } + if prev_raft_state != *entry_storage.raft_state() { + write_task.raft_state = Some(entry_storage.raft_state().clone()); + } + } +} diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 0c7abf52b58..09646965bda 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -17,7 +17,7 @@ impl Apply { pub fn new(peer: &Peer) -> Self { Apply { tablet: peer.tablet().clone(), - logger: peer.logger().clone(), + logger: peer.logger.clone(), } } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 70dccd284fa..eb61d744774 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -1,16 +1,17 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::{collections::VecDeque, mem, sync::Arc}; use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; -use raft::{RawNode, INVALID_ID}; -use raftstore::store::{util::find_peer, Config}; +use raft::{RawNode, StateRole, INVALID_ID}; +use raftstore::store::{util::find_peer, Config, EntryStorage, RaftlogFetchTask, WriteRouter}; use slog::{o, Logger}; -use tikv_util::{box_err, config::ReadableSize}; +use tikv_util::{box_err, config::ReadableSize, worker::Scheduler}; use super::storage::Storage; use crate::{ + operation::AsyncWriter, tablet::{self, CachedTablet}, Result, }; @@ -19,7 +20,13 @@ use crate::{ pub struct Peer { raft_group: RawNode>, tablet: CachedTablet, - logger: Logger, + /// We use a cache for looking up peers. Not all peers exist in region's + /// peer list, for example, an isolated peer may need to send/receive + /// messages with unknown peers after recovery. + peer_cache: Vec, + pub(crate) async_writer: AsyncWriter, + has_ready: bool, + pub(crate) logger: Logger, } impl Peer { @@ -32,9 +39,10 @@ impl Peer { store_id: u64, tablet_factory: &dyn TabletFactory, engine: ER, + scheduler: Scheduler, logger: &Logger, ) -> Result> { - let s = match Storage::new(region_id, store_id, engine, logger)? { + let s = match Storage::new(region_id, store_id, engine, scheduler, logger)? { Some(s) => s, None => return Ok(None), }; @@ -83,18 +91,31 @@ impl Peer { Ok(Some(Peer { raft_group: RawNode::new(&raft_cfg, s, &logger)?, tablet: CachedTablet::new(tablet), + has_ready: false, + async_writer: AsyncWriter::new(region_id, peer_id), logger, + peer_cache: vec![], })) } + #[inline] + pub fn region(&self) -> &metapb::Region { + self.raft_group.store().region() + } + #[inline] pub fn region_id(&self) -> u64 { - self.raft_group.store().region_state().get_region().get_id() + self.region().get_id() + } + + #[inline] + pub fn peer(&self) -> &metapb::Peer { + self.raft_group.store().peer() } #[inline] pub fn peer_id(&self) -> u64 { - self.raft_group.store().peer().get_id() + self.peer().get_id() } #[inline] @@ -102,13 +123,96 @@ impl Peer { self.raft_group.store() } + #[inline] + pub fn storage_mut(&mut self) -> &mut Storage { + self.raft_group.mut_store() + } + + #[inline] + pub fn entry_storage(&self) -> &EntryStorage { + self.raft_group.store().entry_storage() + } + + #[inline] + pub fn entry_storage_mut(&mut self) -> &mut EntryStorage { + self.raft_group.mut_store().entry_storage_mut() + } + #[inline] pub fn tablet(&self) -> &CachedTablet { &self.tablet } #[inline] - pub fn logger(&self) -> &Logger { - &self.logger + pub fn tablet_mut(&mut self) -> &mut CachedTablet { + &mut self.tablet + } + + #[inline] + pub fn raft_group(&self) -> &RawNode> { + &self.raft_group + } + + #[inline] + pub fn raft_group_mut(&mut self) -> &mut RawNode> { + &mut self.raft_group + } + + /// Mark the peer has a ready so it will be checked at the end of every + /// processing round. + #[inline] + pub fn set_has_ready(&mut self) { + self.has_ready = true; + } + + /// Mark the peer has no ready and return its previous state. + #[inline] + pub fn reset_has_ready(&mut self) -> bool { + mem::take(&mut self.has_ready) + } + + #[inline] + pub fn insert_peer_cache(&mut self, peer: metapb::Peer) { + for p in self.raft_group.store().region().get_peers() { + if p.get_id() == peer.get_id() { + return; + } + } + for p in &mut self.peer_cache { + if p.get_id() == peer.get_id() { + *p = peer; + return; + } + } + self.peer_cache.push(peer); + } + + #[inline] + pub fn clear_peer_cache(&mut self) { + self.peer_cache.clear(); + } + + #[inline] + pub fn get_peer_from_cache(&self, peer_id: u64) -> Option { + for p in self.raft_group.store().region().get_peers() { + if p.get_id() == peer_id { + return Some(p.clone()); + } + } + self.peer_cache + .iter() + .find(|p| p.get_id() == peer_id) + .cloned() + } + + #[inline] + pub fn is_leader(&self) -> bool { + self.raft_group.raft.state == StateRole::Leader + } + + /// Term of the state machine. + #[inline] + pub fn term(&self) -> u64 { + self.raft_group.raft.term } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index ff0bd64cd01..4f625b751ac 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -1,17 +1,22 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::fmt::{self, Debug, Formatter}; + use engine_traits::{RaftEngine, RaftLogBatch}; use kvproto::{ metapb::{self, Region}, raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, }; use raft::{ - eraftpb::{Entry, Snapshot}, + eraftpb::{ConfState, Entry, HardState, Snapshot}, GetEntriesContext, RaftState, INVALID_ID, }; -use raftstore::store::{util::find_peer, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use raftstore::store::{ + util::{self, find_peer}, + EntryStorage, RaftlogFetchTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, +}; use slog::{o, Logger}; -use tikv_util::box_err; +use tikv_util::{box_err, worker::Scheduler}; use crate::{Error, Result}; @@ -45,16 +50,56 @@ pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Resul /// A storage for raft. /// /// It's similar to `PeerStorage` in v1. -#[derive(Debug)] pub struct Storage { - engine: ER, + entry_storage: EntryStorage, peer: metapb::Peer, region_state: RegionLocalState, - raft_state: RaftLocalState, - apply_state: RaftApplyState, logger: Logger, } +impl Debug for Storage { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "Storage of [region {}] {}", + self.region().get_id(), + self.peer.get_id() + ) + } +} + +impl Storage { + #[inline] + pub fn entry_storage(&self) -> &EntryStorage { + &self.entry_storage + } + + #[inline] + pub fn entry_storage_mut(&mut self) -> &mut EntryStorage { + &mut self.entry_storage + } + + #[inline] + pub fn region_state(&self) -> &RegionLocalState { + &self.region_state + } + + #[inline] + pub fn region(&self) -> &metapb::Region { + self.region_state.get_region() + } + + #[inline] + pub fn peer(&self) -> &metapb::Peer { + &self.peer + } + + #[inline] + pub fn logger(&self) -> &Logger { + &self.logger + } +} + impl Storage { /// Creates a new storage. /// @@ -64,12 +109,17 @@ impl Storage { region_id: u64, store_id: u64, engine: ER, + log_fetch_scheduler: Scheduler, logger: &Logger, ) -> Result>> { - let region_state = match engine.get_region_state(region_id) { + let region_state: RegionLocalState = match engine.get_region_state(region_id) { Ok(Some(s)) => s, res => { - return Err(box_err!("failed to get region state: {:?}", res)); + return Err(box_err!( + "failed to get region state for region {}: {:?}", + region_id, + res + )); } }; @@ -101,53 +151,67 @@ impl Storage { } }; - let mut s = Storage { + let region = region_state.get_region(); + + let entry_storage = EntryStorage::new( + peer.get_id(), engine, - peer: peer.clone(), - region_state, raft_state, apply_state, - logger, - }; - s.validate_state()?; - Ok(Some(s)) - } - - fn validate_state(&mut self) -> Result<()> { - unimplemented!() - } + region, + log_fetch_scheduler, + )?; - #[inline] - pub fn region_state(&self) -> &RegionLocalState { - &self.region_state + Ok(Some(Storage { + entry_storage, + peer: peer.clone(), + region_state, + logger, + })) } #[inline] pub fn raft_state(&self) -> &RaftLocalState { - &self.raft_state + self.entry_storage.raft_state() } #[inline] pub fn apply_state(&self) -> &RaftApplyState { - &self.apply_state + self.entry_storage.apply_state() } #[inline] - pub fn peer(&self) -> &metapb::Peer { - &self.peer - } - - #[inline] - pub fn logger(&self) -> &Logger { - &self.logger + pub fn is_initialized(&self) -> bool { + self.region_state.get_tablet_index() != 0 } } impl raft::Storage for Storage { fn initial_state(&self) -> raft::Result { - unimplemented!() + let hard_state = self.raft_state().get_hard_state().clone(); + // We will persist hard state no matter if it's initialized or not in + // v2, So hard state may not be empty. But when it becomes initialized, + // commit must be changed. + assert_eq!( + hard_state.commit == 0, + !self.is_initialized(), + "region state doesn't match raft state {:?} vs {:?}", + self.region_state(), + self.raft_state() + ); + + if hard_state.commit == 0 { + // If it's uninitialized, return empty state as we consider every + // states are empty at the very beginning. + return Ok(RaftState::new(hard_state, ConfState::default())); + } + Ok(RaftState::new( + hard_state, + util::conf_state_from_region(self.region()), + )) } + #[inline] fn entries( &self, low: u64, @@ -155,19 +219,23 @@ impl raft::Storage for Storage { max_size: impl Into>, context: GetEntriesContext, ) -> raft::Result> { - unimplemented!() + self.entry_storage + .entries(low, high, max_size.into().unwrap_or(u64::MAX), context) } + #[inline] fn term(&self, idx: u64) -> raft::Result { - unimplemented!() + self.entry_storage.term(idx) } + #[inline] fn first_index(&self) -> raft::Result { - unimplemented!() + Ok(self.entry_storage.first_index()) } + #[inline] fn last_index(&self) -> raft::Result { - unimplemented!() + Ok(self.entry_storage.last_index()) } fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs new file mode 100644 index 00000000000..1288f14c3da --- /dev/null +++ b/components/raftstore-v2/src/router/imp.rs @@ -0,0 +1,12 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use raftstore::store::{FetchedLogs, LogFetchedNotifier}; + +use crate::{batch::StoreRouter, PeerMsg}; + +impl LogFetchedNotifier for StoreRouter { + fn notify(&self, region_id: u64, fetched: FetchedLogs) { + let _ = self.force_send(region_id, PeerMsg::FetchedLogs(fetched)); + } +} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 37b34bcb666..a71bdc89283 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -10,7 +10,8 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, }; use raftstore::store::{ - fsm::ApplyTaskRes, metrics::RaftEventDurationType, InspectedRaftMessage, RegionSnapshot, + fsm::ApplyTaskRes, metrics::RaftEventDurationType, FetchedLogs, InspectedRaftMessage, + RegionSnapshot, }; use tikv_util::{memory::HeapSize, time::Instant}; @@ -244,6 +245,7 @@ pub enum PeerMsg { ApplyRes { res: ApplyTaskRes, }, + FetchedLogs(FetchedLogs), /// Start the FSM. Start, /// A message only used to notify a peer. @@ -275,6 +277,7 @@ impl fmt::Debug for PeerMsg { "Persisted peer_id {}, ready_number {}", peer_id, ready_number ), + PeerMsg::FetchedLogs(fetched) => write!(fmt, "FetchedLogs {:?}", fetched), } } } diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index a7c7672b835..11df3cbbabd 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod imp; mod internal_message; mod message; diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs new file mode 100644 index 00000000000..d93cd09fc62 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -0,0 +1,174 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(test)] +#![feature(assert_matches)] +#![feature(custom_test_frameworks)] +#![test_runner(test_util::run_tests)] +// TODO: remove following when tests can be run. +#![allow(dead_code)] +#![allow(unused_imports)] + +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, +}; + +use crossbeam::channel::{self, Receiver, Sender}; +use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, TestTabletFactoryV2}, + raft::RaftTestEngine, +}; +use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; +use kvproto::{metapb::Store, raft_serverpb::RaftMessage}; +use pd_client::RpcClient; +use raftstore::store::{Config, Transport, RAFT_INIT_LOG_INDEX}; +use raftstore_v2::{create_store_batch_system, Bootstrap, StoreRouter, StoreSystem}; +use slog::{o, Logger}; +use tempfile::TempDir; +use test_pd::mocker::Service; +use tikv_util::config::VersionTrack; + +mod test_election; + +type TestRouter = StoreRouter; + +struct TestNode { + _pd_server: test_pd::Server, + _pd_client: RpcClient, + _path: TempDir, + store: Store, + raft_engine: Option, + factory: Option>, + system: Option>, + logger: Logger, +} + +impl TestNode { + fn new() -> TestNode { + let logger = slog_global::borrow_global().new(o!()); + let pd_server = test_pd::Server::new(1); + let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); + let path = TempDir::new().unwrap(); + + let cf_opts = ALL_CFS + .iter() + .copied() + .map(|cf| (cf, CfOptions::default())) + .collect(); + let factory = Arc::new(TestTabletFactoryV2::new( + path.path(), + DbOptions::default(), + cf_opts, + )); + let raft_engine = + engine_test::raft::new_engine(&format!("{}", path.path().join("raft").display()), None) + .unwrap(); + let mut bootstrap = Bootstrap::new(&raft_engine, 0, &pd_client, logger.clone()); + let store_id = bootstrap.bootstrap_store().unwrap(); + let mut store = Store::default(); + store.set_id(store_id); + let region = bootstrap + .bootstrap_first_region(&store, store_id) + .unwrap() + .unwrap(); + if factory.exists(region.get_id(), RAFT_INIT_LOG_INDEX) { + factory + .destroy_tablet(region.get_id(), RAFT_INIT_LOG_INDEX) + .unwrap(); + } + factory + .open_tablet( + region.get_id(), + Some(RAFT_INIT_LOG_INDEX), + OpenOptions::default().set_create_new(true), + ) + .unwrap(); + + TestNode { + _pd_server: pd_server, + _pd_client: pd_client, + _path: path, + store, + raft_engine: Some(raft_engine), + factory: Some(factory), + system: None, + logger, + } + } + + fn start( + &mut self, + cfg: &Arc>, + trans: impl Transport + 'static, + ) -> TestRouter { + let (router, mut system) = create_store_batch_system::( + &cfg.value(), + self.store.clone(), + self.logger.clone(), + ); + system + .start( + self.store.clone(), + cfg.clone(), + self.raft_engine.clone().unwrap(), + self.factory.clone().unwrap(), + trans, + &router, + ) + .unwrap(); + self.system = Some(system); + router + } + + fn stop(&mut self) { + if let Some(mut system) = self.system.take() { + system.shutdown(); + } + } +} + +impl Drop for TestNode { + fn drop(&mut self) { + self.stop(); + self.raft_engine.take(); + self.factory.take(); + } +} + +#[derive(Clone)] +pub struct TestTransport { + tx: Sender, + flush_cnt: Arc, +} + +fn new_test_transport() -> (TestTransport, Receiver) { + let (tx, rx) = channel::unbounded(); + let flush_cnt = Default::default(); + (TestTransport { tx, flush_cnt }, rx) +} + +impl Transport for TestTransport { + fn send(&mut self, msg: RaftMessage) -> raftstore_v2::Result<()> { + let _ = self.tx.send(msg); + Ok(()) + } + + fn set_store_allowlist(&mut self, _stores: Vec) {} + + fn need_flush(&self) -> bool { + !self.tx.is_empty() + } + + fn flush(&mut self) { + self.flush_cnt.fetch_add(1, Ordering::SeqCst); + } +} + +fn setup_default_cluster() -> (TestNode, Receiver, TestRouter) { + let mut node = TestNode::new(); + let cfg = Default::default(); + let (tx, rx) = new_test_transport(); + let router = node.start(&cfg, tx); + (node, rx, router) +} diff --git a/components/raftstore-v2/tests/integrations/test_election.rs b/components/raftstore-v2/tests/integrations/test_election.rs new file mode 100644 index 00000000000..cf3a0cc4906 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_election.rs @@ -0,0 +1,10 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use raftstore_v2::PeerMsg; + +// TODO: finish test case when callback is added. +#[test] +fn test_smoke() { + let (_node, _transport, router) = super::setup_default_cluster(); + router.send(2, PeerMsg::Noop).unwrap(); +} diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 72fd52ea4d4..a007d168474 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -33,6 +33,7 @@ use tikv_util::{ warn, }; +use super::write_router::WriteSenders; use crate::{ store::{ config::Config, @@ -854,8 +855,8 @@ where EK: KvEngine, ER: RaftEngine, { - pub fn senders(&self) -> &Vec>> { - &self.writers + pub fn senders(&self) -> WriteSenders { + WriteSenders::new(self.writers.clone()) } pub fn spawn( diff --git a/components/raftstore/src/store/async_io/write_router.rs b/components/raftstore/src/store/async_io/write_router.rs index 6b19212c164..6c1db6419cf 100644 --- a/components/raftstore/src/store/async_io/write_router.rs +++ b/components/raftstore/src/store/async_io/write_router.rs @@ -5,6 +5,7 @@ use std::{ mem, + ops::Index, sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -28,8 +29,7 @@ where EK: KvEngine, ER: RaftEngine, { - fn write_senders(&self) -> &Vec>>; - fn io_reschedule_concurrent_count(&self) -> &Arc; + fn write_senders(&self) -> &WriteSenders; fn config(&self) -> &Config; fn raft_metrics(&self) -> &RaftMetrics; } @@ -39,14 +39,10 @@ where EK: KvEngine, ER: RaftEngine, { - fn write_senders(&self) -> &Vec>> { + fn write_senders(&self) -> &WriteSenders { &self.write_senders } - fn io_reschedule_concurrent_count(&self) -> &Arc { - &self.io_reschedule_concurrent_count - } - fn config(&self) -> &Config { &self.cfg } @@ -120,7 +116,8 @@ where // The peer must be destroyed after all previous write tasks have been finished. // So do not worry about a destroyed peer being counted in // `io_reschedule_concurrent_count`. - ctx.io_reschedule_concurrent_count() + ctx.write_senders() + .io_reschedule_concurrent_count .fetch_sub(1, Ordering::SeqCst); STORE_IO_RESCHEDULE_PEER_TOTAL_GAUGE.dec(); @@ -200,7 +197,8 @@ where // concurrent count of rescheduling peer fsm because rescheduling will // introduce performance penalty. let success = ctx - .io_reschedule_concurrent_count() + .write_senders() + .io_reschedule_concurrent_count .fetch_update(Ordering::SeqCst, Ordering::Relaxed, |c| { if c < ctx.config().io_reschedule_concurrent_max_count { Some(c + 1) @@ -245,6 +243,37 @@ where } } +/// Senders for asynchronous writes. There can be multiple senders, generally +/// you should use `WriteRouter` to decide which sender to be used. +#[derive(Clone)] +pub struct WriteSenders { + write_senders: Vec>>, + io_reschedule_concurrent_count: Arc, +} + +impl WriteSenders { + pub fn new(write_senders: Vec>>) -> Self { + WriteSenders { + write_senders, + io_reschedule_concurrent_count: Arc::default(), + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.write_senders.is_empty() + } +} + +impl Index for WriteSenders { + type Output = Sender>; + + #[inline] + fn index(&self, index: usize) -> &Sender> { + &self.write_senders[index] + } +} + #[cfg(test)] mod tests { use std::thread; @@ -257,8 +286,7 @@ mod tests { struct TestWriteRouter { receivers: Vec>>, - senders: Vec>>, - io_reschedule_concurrent_count: Arc, + senders: WriteSenders, config: Config, raft_metrics: RaftMetrics, } @@ -273,8 +301,7 @@ mod tests { } Self { receivers, - senders, - io_reschedule_concurrent_count: Arc::new(AtomicUsize::new(0)), + senders: WriteSenders::new(senders), config, raft_metrics: RaftMetrics::new(true), } @@ -293,7 +320,10 @@ mod tests { } fn must_same_reschedule_count(&self, count: usize) { - let cnt = self.io_reschedule_concurrent_count.load(Ordering::Relaxed); + let cnt = self + .senders + .io_reschedule_concurrent_count + .load(Ordering::Relaxed); if cnt != count { panic!("reschedule count not same, {} != {}", cnt, count); } @@ -301,14 +331,10 @@ mod tests { } impl WriteRouterContext for TestWriteRouter { - fn write_senders(&self) -> &Vec>> { + fn write_senders(&self) -> &WriteSenders { &self.senders } - fn io_reschedule_concurrent_count(&self) -> &Arc { - &self.io_reschedule_concurrent_count - } - fn config(&self) -> &Config { &self.config } @@ -407,7 +433,9 @@ mod tests { t.must_same_reschedule_count(0); thread::sleep(Duration::from_millis(10)); - t.io_reschedule_concurrent_count.store(4, Ordering::Relaxed); + t.senders + .io_reschedule_concurrent_count + .store(4, Ordering::Relaxed); // Should retry reschedule next time because the limitation of concurrent count. // However it's possible that it will not scheduled due to random // so using loop here. @@ -428,7 +456,9 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - t.io_reschedule_concurrent_count.store(3, Ordering::Relaxed); + t.senders + .io_reschedule_concurrent_count + .store(3, Ordering::Relaxed); thread::sleep(Duration::from_millis(RETRY_SCHEDULE_MILLISECONS + 2)); // Should reschedule now r.send_write_msg(&mut t, Some(40), WriteMsg::Shutdown); diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index aaaed69c555..1642c90d075 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -246,8 +246,8 @@ impl TestWriters { } } - fn write_sender(&self, id: usize) -> &Sender> { - &self.writers.senders()[id] + fn write_sender(&self, id: usize) -> Sender> { + self.writers.senders()[id].clone() } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index e4707947fbb..2452f177cff 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1306,8 +1306,8 @@ where SignificantMsg::RaftLogGcFlushed => { self.on_raft_log_gc_flushed(); } - SignificantMsg::RaftlogFetched { context, res } => { - self.on_raft_log_fetched(context, res); + SignificantMsg::RaftlogFetched(fetched_logs) => { + self.on_raft_log_fetched(fetched_logs.context, fetched_logs.logs); } SignificantMsg::EnterForceLeaderState { syncer, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index ecdb8653147..3c4e77ff4b9 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -10,10 +10,7 @@ use std::{ }, mem, ops::{Deref, DerefMut}, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, - }, + sync::{atomic::Ordering, Arc, Mutex}, time::{Duration, Instant}, u64, }; @@ -24,7 +21,7 @@ use batch_system::{ }; use collections::{HashMap, HashMapEntry, HashSet}; use concurrency_manager::ConcurrencyManager; -use crossbeam::channel::{unbounded, Sender, TryRecvError, TrySendError}; +use crossbeam::channel::{unbounded, TryRecvError, TrySendError}; use engine_traits::{ CompactedEvent, DeleteStrategy, Engines, KvEngine, Mutable, PerfContextKind, RaftEngine, RaftLogBatch, Range, WriteBatch, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -71,7 +68,10 @@ use crate::{ RegionChangeReason, }, store::{ - async_io::write::{StoreWriters, Worker as WriteWorker, WriteMsg}, + async_io::{ + write::{StoreWriters, Worker as WriteWorker, WriteMsg}, + write_router::WriteSenders, + }, config::Config, fsm::{ create_apply_batch_system, @@ -518,9 +518,8 @@ where /// Disk usage for other stores. The store itself is not included. /// Only contains items which is not `DiskUsage::Normal`. pub store_disk_usages: HashMap, - pub write_senders: Vec>>, + pub write_senders: WriteSenders, pub sync_write_worker: Option, T>>, - pub io_reschedule_concurrent_count: Arc, pub pending_latency_inspect: Vec, } @@ -1072,8 +1071,7 @@ pub struct RaftPollerBuilder { pub engines: Engines, global_replication_state: Arc>, feature_gate: FeatureGate, - write_senders: Vec>>, - io_reschedule_concurrent_count: Arc, + write_senders: WriteSenders, } impl RaftPollerBuilder { @@ -1313,7 +1311,6 @@ where store_disk_usages: Default::default(), write_senders: self.write_senders.clone(), sync_write_worker, - io_reschedule_concurrent_count: self.io_reschedule_concurrent_count.clone(), pending_latency_inspect: vec![], }; ctx.update_ticks_timeout(); @@ -1364,7 +1361,6 @@ where global_replication_state: self.global_replication_state.clone(), feature_gate: self.feature_gate.clone(), write_senders: self.write_senders.clone(), - io_reschedule_concurrent_count: self.io_reschedule_concurrent_count.clone(), } } } @@ -1558,8 +1554,7 @@ impl RaftBatchSystem { store_meta, pending_create_peers: Arc::new(Mutex::new(HashMap::default())), feature_gate: pd_client.feature_gate().clone(), - write_senders: self.store_writers.senders().clone(), - io_reschedule_concurrent_count: Arc::new(AtomicUsize::new(0)), + write_senders: self.store_writers.senders(), }; let region_peers = builder.init()?; self.start_system::( diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index ad730206175..b5a35461728 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -30,7 +30,7 @@ pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ write::{PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask}, - write_router::{WriteRouter, WriteRouterContext}, + write_router::{WriteRouter, WriteRouterContext, WriteSenders}, }, bootstrap::{ bootstrap_store, clear_prepare_bootstrap_cluster, clear_prepare_bootstrap_key, @@ -68,10 +68,10 @@ pub use self::{ util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, - CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, - LocalReader, PdTask, QueryStats, RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, - ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, - SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, StoreMetaDelegate, - TrackVer, WriteStats, + CheckLeaderTask, FetchedLogs, FlowStatistics, FlowStatsReporter, KeyEntry, + LocalReadContext, LocalReader, LogFetchedNotifier, PdTask, QueryStats, RaftlogFetchRunner, + RaftlogFetchTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, + ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, + SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, }, }; diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index ce812d5ef24..947e9e074fd 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -19,12 +19,12 @@ use kvproto::{ }; #[cfg(any(test, feature = "testexport"))] use pd_client::BucketMeta; -use raft::{GetEntriesContext, SnapshotStatus}; +use raft::SnapshotStatus; use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; -use super::{local_metrics::TimeTracker, AbstractPeer, RegionSnapshot}; +use super::{local_metrics::TimeTracker, worker::FetchedLogs, AbstractPeer, RegionSnapshot}; use crate::store::{ fsm::apply::{CatchUpLogs, ChangeObserver, TaskRes as ApplyTaskRes}, metrics::RaftEventDurationType, @@ -34,7 +34,7 @@ use crate::store::{ }, util::{KeysInfoFormatter, LatencyInspector}, worker::{Bucket, BucketRange}, - RaftlogFetchResult, SnapKey, + SnapKey, }; #[derive(Debug)] @@ -357,10 +357,7 @@ where LeaderCallback(Callback), RaftLogGcFlushed, // Reports the result of asynchronous Raft logs fetching. - RaftlogFetched { - context: GetEntriesContext, - res: Box, - }, + RaftlogFetched(FetchedLogs), EnterForceLeaderState { syncer: UnsafeRecoveryForceLeaderSyncer, failed_stores: HashSet, diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 5ad6395dd33..129dac6dbb5 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1160,7 +1160,9 @@ pub mod tests { entry_storage::tests::validate_cache, fsm::apply::compact_raft_log, initial_region, prepare_bootstrap_cluster, - worker::{RaftlogFetchRunner, RegionRunner, RegionTask}, + worker::{ + FetchedLogs, LogFetchedNotifier, RaftlogFetchRunner, RegionRunner, RegionTask, + }, }, }; @@ -1383,35 +1385,20 @@ pub mod tests { } } - use crate::{ - store::{SignificantMsg, SignificantRouter}, - Result as RaftStoreResult, - }; - - pub struct TestRouter { - ch: SyncSender>, + pub struct TestRouter { + ch: SyncSender, } - impl TestRouter { - pub fn new() -> (Self, Receiver>) { + impl TestRouter { + pub fn new() -> (Self, Receiver) { let (tx, rx) = sync_channel(1); (Self { ch: tx }, rx) } } - impl SignificantRouter for TestRouter - where - EK: KvEngine, - { - /// Sends a significant message. We should guarantee that the message - /// can't be dropped. - fn significant_send( - &self, - _: u64, - msg: SignificantMsg, - ) -> RaftStoreResult<()> { - self.ch.send(msg).unwrap(); - Ok(()) + impl LogFetchedNotifier for TestRouter { + fn notify(&self, _region_id: u64, fetched_logs: FetchedLogs) { + self.ch.send(fetched_logs).unwrap(); } } @@ -1486,24 +1473,16 @@ pub mod tests { let raftlog_fetch_scheduler = raftlog_fetch_worker.scheduler(); let mut store = new_storage_from_ents(region_scheduler, raftlog_fetch_scheduler, &td, &ents); - raftlog_fetch_worker.start(RaftlogFetchRunner::::new( - router, - store.engines.raft.clone(), - )); + raftlog_fetch_worker.start(RaftlogFetchRunner::new(router, store.engines.raft.clone())); store.compact_entry_cache(5); let mut e = store.entries(lo, hi, maxsize, GetEntriesContext::empty(true)); if e == Err(raft::Error::Store( raft::StorageError::LogTemporarilyUnavailable, )) { let res = rx.recv().unwrap(); - match res { - SignificantMsg::RaftlogFetched { res, context } => { - store.update_async_fetch_res(lo, Some(res)); - count += 1; - e = store.entries(lo, hi, maxsize, context); - } - _ => unreachable!(), - }; + store.update_async_fetch_res(lo, Some(res.logs)); + count += 1; + e = store.entries(lo, hi, maxsize, res.context); } if e != wentries { panic!("#{}: expect entries {:?}, got {:?}", i, wentries, e); diff --git a/components/raftstore/src/store/transport.rs b/components/raftstore/src/store/transport.rs index f64fbae037e..19b825ac20c 100644 --- a/components/raftstore/src/store/transport.rs +++ b/components/raftstore/src/store/transport.rs @@ -8,6 +8,7 @@ use engine_traits::{KvEngine, RaftEngine, Snapshot}; use kvproto::raft_serverpb::RaftMessage; use tikv_util::{error, warn}; +use super::worker::{FetchedLogs, LogFetchedNotifier}; use crate::{ store::{CasualMessage, PeerMsg, RaftCommand, RaftRouter, SignificantMsg, StoreMsg}, DiscardReason, Error, Result, @@ -171,3 +172,11 @@ where } } } + +impl LogFetchedNotifier for RaftRouter { + #[inline] + fn notify(&self, region_id: u64, fetched: FetchedLogs) { + // Ignore region not found as it may be removed. + let _ = self.significant_send(region_id, SignificantMsg::RaftlogFetched(fetched)); + } +} diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 2298710ad63..4910f3fdd2b 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -30,7 +30,9 @@ pub use self::{ Runner as PdRunner, Task as PdTask, }, query_stats::QueryStats, - raftlog_fetch::{Runner as RaftlogFetchRunner, Task as RaftlogFetchTask}, + raftlog_fetch::{ + FetchedLogs, LogFetchedNotifier, Runner as RaftlogFetchRunner, Task as RaftlogFetchTask, + }, raftlog_gc::{Runner as RaftlogGcRunner, Task as RaftlogGcTask}, read::{ CachedReadDelegate, LocalReadContext, LocalReader, Progress as ReadProgress, ReadDelegate, diff --git a/components/raftstore/src/store/worker/raftlog_fetch.rs b/components/raftstore/src/store/worker/raftlog_fetch.rs index 63bccf6324a..b3de87f7715 100644 --- a/components/raftstore/src/store/worker/raftlog_fetch.rs +++ b/components/raftstore/src/store/worker/raftlog_fetch.rs @@ -2,12 +2,12 @@ use std::fmt; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::RaftEngine; use fail::fail_point; use raft::GetEntriesContext; use tikv_util::worker::Runnable; -use crate::store::{RaftlogFetchResult, SignificantMsg, SignificantRouter, MAX_INIT_ENTRY_COUNT}; +use crate::store::{RaftlogFetchResult, MAX_INIT_ENTRY_COUNT}; pub enum Task { PeerStorage { @@ -42,32 +42,39 @@ impl fmt::Display for Task { } } -pub struct Runner +#[derive(Debug)] +pub struct FetchedLogs { + pub context: GetEntriesContext, + pub logs: Box, +} + +/// A router for receiving fetched result. +pub trait LogFetchedNotifier: Send { + fn notify(&self, region_id: u64, fetched: FetchedLogs); +} + +pub struct Runner where - EK: KvEngine, ER: RaftEngine, - R: SignificantRouter, + N: LogFetchedNotifier, { - router: R, + notifier: N, raft_engine: ER, - _phantom: std::marker::PhantomData, } -impl> Runner { - pub fn new(router: R, raft_engine: ER) -> Runner { +impl Runner { + pub fn new(notifier: N, raft_engine: ER) -> Runner { Runner { - router, + notifier, raft_engine, - _phantom: std::marker::PhantomData, } } } -impl Runnable for Runner +impl Runnable for Runner where - EK: KvEngine, ER: RaftEngine, - R: SignificantRouter, + N: LogFetchedNotifier, { type Task = Task; @@ -97,12 +104,11 @@ where .map(|c| (*c as u64) != high - low) .unwrap_or(false); fail_point!("worker_async_fetch_raft_log"); - // it may return a region not found error as the region could be merged. - let _ = self.router.significant_send( + self.notifier.notify( region_id, - SignificantMsg::RaftlogFetched { + FetchedLogs { context, - res: Box::new(RaftlogFetchResult { + logs: Box::new(RaftlogFetchResult { ents: res.map(|_| ents).map_err(|e| e.into()), low, max_size: max_size as u64, From 1ea26a2ac8761af356cc5c0825eb89a0b8fc9749 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 17 Aug 2022 02:08:51 -0700 Subject: [PATCH 0159/1149] raftstore-v2: add read write channel (#13245) ref tikv/tikv#12842 v2 uses channel instead of callbacks for proposals, so async/await has first class support. We can further reduce allocations by introducing channel pool. Signed-off-by: Jay Lee --- Cargo.lock | 2 +- components/raftstore-v2/Cargo.toml | 2 +- components/raftstore-v2/src/batch/store.rs | 4 +- components/raftstore-v2/src/fsm/peer.rs | 13 +- components/raftstore-v2/src/router/imp.rs | 3 +- .../src/router/internal_message.rs | 3 + components/raftstore-v2/src/router/message.rs | 201 +++----- components/raftstore-v2/src/router/mod.rs | 9 +- .../src/router/response_channel.rs | 477 ++++++++++++++++++ components/raftstore/src/lib.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 141 +++--- components/raftstore/src/store/fsm/peer.rs | 30 +- components/raftstore/src/store/msg.rs | 125 ++++- components/raftstore/src/store/peer.rs | 51 +- components/raftstore/src/store/read_queue.rs | 88 ++-- 15 files changed, 778 insertions(+), 372 deletions(-) create mode 100644 components/raftstore-v2/src/router/response_channel.rs diff --git a/Cargo.lock b/Cargo.lock index 93ac7ddd600..3083e56ef23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4196,7 +4196,7 @@ dependencies = [ "engine_traits", "error_code", "fail", - "futures-util", + "futures 0.3.15", "keys", "kvproto", "log_wrappers", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 29e68517441..f526aeda9c4 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -35,7 +35,7 @@ crossbeam = "0.8" engine_traits = { path = "../engine_traits" } error_code = { path = "../error_code" } fail = "0.5" -futures-util = { version = "0.3", features = ["compat"] } +futures = { version = "0.3", features = ["compat"] } keys = { path = "../keys", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } log_wrappers = { path = "../log_wrappers" } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 739240f84e0..d4cba3d9381 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -13,7 +13,7 @@ use batch_system::{ use collections::HashMap; use crossbeam::channel::Sender; use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; -use futures_util::{compat::Future01CompatExt, FutureExt}; +use futures::{compat::Future01CompatExt, FutureExt}; use kvproto::{metapb::Store, raft_serverpb::PeerState}; use raft::INVALID_ID; use raftstore::store::{ @@ -89,7 +89,7 @@ struct StorePoller { cfg_tracker: Tracker, /// Buffers to hold in-coming messages. store_msg_buf: Vec, - peer_msg_buf: Vec>, + peer_msg_buf: Vec, /// These fields controls the timing of flushing messages generated by /// FSMs. last_flush_time: TiInstant, diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 696a1e5ddf4..307da362330 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -17,12 +17,12 @@ use tikv_util::{ use crate::{batch::StoreContext, raft::Peer, PeerMsg, PeerTick, Result}; -pub type SenderFsmPair = (LooseBoundedSender>, Box>); +pub type SenderFsmPair = (LooseBoundedSender, Box>); pub struct PeerFsm { peer: Peer, mailbox: Option>>, - receiver: Receiver>, + receiver: Receiver, /// A registry for all scheduled ticks. This can avoid scheduling ticks /// twice accidentally. tick_registry: u16, @@ -62,7 +62,7 @@ impl PeerFsm { /// capacity is reached or there is no more pending messages. /// /// Returns how many messages are fetched. - pub fn recv(&mut self, peer_msg_buf: &mut Vec>) -> usize { + pub fn recv(&mut self, peer_msg_buf: &mut Vec) -> usize { let l = peer_msg_buf.len(); for i in l..peer_msg_buf.capacity() { match self.receiver.try_recv() { @@ -80,7 +80,7 @@ impl PeerFsm { } impl Fsm for PeerFsm { - type Message = PeerMsg; + type Message = PeerMsg; #[inline] fn is_stopped(&self) -> bool { @@ -183,13 +183,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } } - pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec>) { + pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec) { for msg in peer_msgs_buf.drain(..) { match msg { PeerMsg::RaftMessage(_) => unimplemented!(), + PeerMsg::RaftQuery(_) => unimplemented!(), PeerMsg::RaftCommand(_) => unimplemented!(), PeerMsg::Tick(tick) => self.on_tick(tick), - PeerMsg::ApplyRes { res } => unimplemented!(), + PeerMsg::ApplyRes(res) => unimplemented!(), PeerMsg::Start => self.on_start(), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 1288f14c3da..401961dfdb1 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -3,7 +3,8 @@ use engine_traits::{KvEngine, RaftEngine}; use raftstore::store::{FetchedLogs, LogFetchedNotifier}; -use crate::{batch::StoreRouter, PeerMsg}; +use super::PeerMsg; +use crate::batch::StoreRouter; impl LogFetchedNotifier for StoreRouter { fn notify(&self, region_id: u64, fetched: FetchedLogs) { diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index f5ef72d8e30..05653e4fdcc 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,3 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. pub enum ApplyTask {} + +#[derive(Debug)] +pub enum ApplyRes {} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index a71bdc89283..3f0dadaed04 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -1,137 +1,23 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::{fmt, marker::PhantomData}; +use std::fmt; use engine_traits::{KvEngine, Snapshot}; use kvproto::{ - kvrpcpb::ExtraOp as TxnExtraOp, + cdcpb::Event, metapb, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, }; use raftstore::store::{ - fsm::ApplyTaskRes, metrics::RaftEventDurationType, FetchedLogs, InspectedRaftMessage, - RegionSnapshot, + metrics::RaftEventDurationType, FetchedLogs, InspectedRaftMessage, RegionSnapshot, }; -use tikv_util::{memory::HeapSize, time::Instant}; +use tikv_util::time::Instant; -pub struct WriteResponseChannel; - -impl WriteResponseChannel { - /// Called after a request is proposed to the raft group successfully. It's - /// used to notify the caller to move on early because it's very likely the - /// request will be applied to the raftstore. - pub fn notify_proposed(&self) {} - - /// Called after a request is committed and before it's being applied, and - /// it's guaranteed that the request will be successfully applied soon. - pub fn notify_committed(&self) {} - - pub fn notify_applied(&self, _res: Result<(), RaftCmdResponse>) {} -} - -pub struct ReadResponseChannel { - _snap: PhantomData, -} - -pub struct ReadResponse { - pub snapshot: RegionSnapshot, - // What is this? - pub txn_extra_op: TxnExtraOp, -} - -impl ReadResponseChannel { - pub fn notify_read(&self, _res: Result, RaftCmdResponse>) {} -} - -// This is only necessary because of seeming limitations in derive(Clone) w/r/t -// generics. If it can be deleted in the future in favor of derive, it should -// be. -impl Clone for ReadResponse -where - S: Snapshot, -{ - fn clone(&self) -> ReadResponse { - ReadResponse { - snapshot: self.snapshot.clone(), - txn_extra_op: self.txn_extra_op, - } - } -} - -/// Variants of channels for `Msg`. -/// - `Read`: a channel for read only requests including `StatusRequest`, -/// `GetRequest` and `SnapRequest` -/// - `Write`: a channel for write only requests including `AdminRequest` -/// `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. -/// Prefer channel rather than callback because: -/// 1. channel can be reused, hence reduce allocations. -/// 2. channel may not need dynamic dispatch. -/// 3. caller can use async fashion. -/// 4. there will be no callback leak. -pub enum ResponseChannel { - /// No callback. - None, - /// Read callback. - Read(ReadResponseChannel), - /// Write callback. - Write(WriteResponseChannel), -} - -impl HeapSize for ResponseChannel {} - -impl ResponseChannel -where - S: Snapshot, -{ - pub fn notify_applied(self, resp: RaftCmdResponse) { - match self { - ResponseChannel::None => (), - ResponseChannel::Read(read) => { - read.notify_read(Err(resp)); - } - ResponseChannel::Write(write) => { - write.notify_applied(Err(resp)); - } - } - } - - pub fn notify_proposed(&mut self) { - if let ResponseChannel::Write(write) = self { - write.notify_proposed(); - } - } - - pub fn notify_committed(&mut self) { - if let ResponseChannel::Write(write) = self { - write.notify_committed(); - } - } - - pub fn invoke_read(self, args: ReadResponse) { - match self { - ResponseChannel::Read(read) => read.notify_read(Ok(args)), - other => panic!("expect Callback::Read(..), got {:?}", other), - } - } - - pub fn is_none(&self) -> bool { - matches!(self, ResponseChannel::None) - } -} - -impl fmt::Debug for ResponseChannel -where - S: Snapshot, -{ - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ResponseChannel::None => write!(fmt, "Callback::None"), - ResponseChannel::Read(_) => write!(fmt, "Callback::Read(..)"), - ResponseChannel::Write { .. } => write!(fmt, "Callback::Write(..)"), - } - } -} +use super::{ + response_channel::{CmdResChannel, QueryResChannel}, + ApplyRes, +}; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] @@ -208,65 +94,96 @@ impl StoreTick { } } -/// Raft command is the command that is expected to be proposed by the -/// leader of the target raft group. -#[derive(Debug)] -pub struct RaftCommand { +/// Command that can be handled by raftstore. +pub struct RaftRequest { pub send_time: Instant, pub request: RaftCmdRequest, - pub ch: ResponseChannel, } -impl RaftCommand { - #[inline] - pub fn new(request: RaftCmdRequest, ch: ResponseChannel) -> RaftCommand { - RaftCommand { +impl RaftRequest { + pub fn new(request: RaftCmdRequest) -> Self { + RaftRequest { + send_time: Instant::now(), request, + } + } +} + +/// A query that won't change any state. So it doesn't have to be replicated to +/// all replicas. +pub struct RaftQuery { + pub req: RaftRequest, + pub ch: QueryResChannel, +} + +impl RaftQuery { + #[inline] + pub fn new(request: RaftCmdRequest, ch: QueryResChannel) -> Self { + Self { + req: RaftRequest::new(request), + ch, + } + } +} + +/// Commands that change the inernal states. It will be transformed into logs +/// and reach consensus in the raft group. +pub struct RaftCommand { + pub cmd: RaftRequest, + pub ch: CmdResChannel, +} + +impl RaftCommand { + #[inline] + pub fn new(request: RaftCmdRequest, ch: CmdResChannel) -> Self { + Self { + cmd: RaftRequest::new(request), ch, - send_time: Instant::now(), } } } /// Message that can be sent to a peer. -pub enum PeerMsg { +pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. RaftMessage(InspectedRaftMessage), - /// Raft command is the command that is expected to be proposed by the - /// leader of the target raft group. If it's failed to be sent, callback - /// usually needs to be called before dropping in case of resource leak. - RaftCommand(RaftCommand), + /// Read command only involves read operations, they are usually processed + /// using lease or read index. + RaftQuery(RaftQuery), + /// Proposal needs to be processed by all peers in a raft group. They will + /// be transformed into logs and be proposed by the leader peer. + RaftCommand(RaftCommand), /// Tick is periodical task. If target peer doesn't exist there is a /// potential that the raft node will not work anymore. Tick(PeerTick), /// Result of applying committed entries. The message can't be lost. - ApplyRes { - res: ApplyTaskRes, - }, + ApplyRes(ApplyRes), FetchedLogs(FetchedLogs), /// Start the FSM. Start, /// A message only used to notify a peer. Noop, + /// A message that indicates an asynchronous write has finished. Persisted { peer_id: u64, ready_number: u64, }, } -impl fmt::Debug for PeerMsg { +impl fmt::Debug for PeerMsg { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { PeerMsg::RaftMessage(_) => write!(fmt, "Raft Message"), + PeerMsg::RaftQuery(_) => write!(fmt, "Raft Query"), PeerMsg::RaftCommand(_) => write!(fmt, "Raft Command"), PeerMsg::Tick(tick) => write! { fmt, "{:?}", tick }, - PeerMsg::ApplyRes { res } => write!(fmt, "ApplyRes {:?}", res), + PeerMsg::ApplyRes(res) => write!(fmt, "ApplyRes {:?}", res), PeerMsg::Start => write!(fmt, "Startup"), PeerMsg::Noop => write!(fmt, "Noop"), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 11df3cbbabd..4a1df09fa44 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -3,6 +3,11 @@ mod imp; mod internal_message; mod message; +mod response_channel; -pub(crate) use internal_message::ApplyTask; -pub use message::{PeerMsg, PeerTick, StoreMsg, StoreTick}; +pub(crate) use self::internal_message::ApplyTask; +pub use self::{ + internal_message::ApplyRes, + message::{PeerMsg, PeerTick, RaftCommand, RaftQuery, RaftRequest, StoreMsg, StoreTick}, + response_channel::{CmdResChannel, QueryResChannel, QueryResult}, +}; diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs new file mode 100644 index 00000000000..fe84ae3c3ef --- /dev/null +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -0,0 +1,477 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! Variants of channels for `Msg`. +//! - `Read`: a channel for read only requests including `StatusRequest`, +//! `GetRequest` and `SnapRequest` +//! - `Write`: a channel for write only requests including `AdminRequest` +//! `PutRequest`, `DeleteRequest` and `DeleteRangeRequest`. +//! +//! Prefer channel over callback because: +//! 1. channel can be reused, hence reduce allocations (not yet implemented). +//! 2. channel may not need dynamic dispatch. +//! 3. caller can use async fashion. +//! 4. there will be no callback leak. + +use std::{ + cell::UnsafeCell, + fmt, + future::Future, + mem::{self, ManuallyDrop}, + pin::Pin, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + task::{Context, Poll}, +}; + +use engine_traits::Snapshot; +use futures::task::AtomicWaker; +use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, raft_cmdpb::RaftCmdResponse}; +use raftstore::store::{ + local_metrics::TimeTracker, msg::ErrorCallback, ReadCallback, RegionSnapshot, WriteCallback, +}; +use smallvec::SmallVec; +use tikv_util::memory::HeapSize; + +/// A struct allows to watch and notify specific events. +/// +/// There are two different events: state and payload. Obviously, state events +/// have no payload. At most 30 states can be defined. There can be only one +/// type of payload. +struct EventCore { + /// Every event will have two bits. + /// - 0b00 means the event is not fired and not subscribed. + /// - 0b01 means the event is fired and not subscribed. + /// - 0b10 means the event is not fired and subscribed. + /// - 0b11 means the event is fired and subscribed. + /// Event 0 and Event 31 is reserved as payload and cancel respectively. + /// Other events should be defined within [1, 30]. + event: AtomicU64, + res: UnsafeCell>, + // Waker can be changed, need to use `AtomicWaker` to guarantee no data race. + waker: AtomicWaker, +} + +unsafe impl Send for EventCore {} + +const PAYLOAD_EVENT: u64 = 0; +const CANCEL_EVENT: u64 = 31; + +#[inline] +const fn subscribed_bit_of(event: u64) -> u64 { + 1 << (event * 2) +} + +#[inline] +const fn fired_bit_of(event: u64) -> u64 { + 1 << (event * 2 + 1) +} + +impl EventCore { + #[inline] + fn notify_event(&self, event: u64) { + let previous = self.event.fetch_or(fired_bit_of(event), Ordering::AcqRel); + if previous & subscribed_bit_of(event) != 0 { + self.waker.wake() + } + } + + /// Set the result. + /// + /// After this call, no events should be notified. + #[inline] + fn set_result(&self, result: Res) { + unsafe { + *self.res.get() = Some(result); + } + let previous = self.event.fetch_or( + fired_bit_of(PAYLOAD_EVENT) | fired_bit_of(CANCEL_EVENT), + Ordering::AcqRel, + ); + if previous & subscribed_bit_of(PAYLOAD_EVENT) != 0 { + self.waker.wake() + } + } + + /// Cancel all subscribers. + /// + /// After this call, no events should be notified and no result should be + /// set. + #[inline] + fn cancel(&self) { + let mut previous = self + .event + .fetch_or(fired_bit_of(CANCEL_EVENT), Ordering::AcqRel); + let subscribed_bit = subscribed_bit_of(0); + while previous != 0 { + // Not notified yet. + if previous & 0b11 == subscribed_bit { + self.waker.wake(); + return; + } + previous >>= 2; + } + } +} + +struct WaitEvent<'a, Res> { + event: u64, + core: &'a EventCore, +} + +#[inline] +fn check_bit(e: u64, fired_bit: u64) -> Option { + if e & fired_bit != 0 { + return Some(true); + } + let cancel_bit = fired_bit_of(CANCEL_EVENT); + if e & cancel_bit != 0 { + return Some(false); + } + None +} + +impl<'a, Res> Future for WaitEvent<'a, Res> { + type Output = bool; + + #[inline] + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let event = &self.core.event; + let mut e = event.load(Ordering::Relaxed); + let fired_bit = fired_bit_of(self.event); + if let Some(b) = check_bit(e, fired_bit) { + return Poll::Ready(b); + } + self.core.waker.register(cx.waker()); + let subscribed_bit = subscribed_bit_of(self.event); + loop { + match event.compare_exchange_weak( + e, + e | subscribed_bit, + Ordering::AcqRel, + Ordering::Relaxed, + ) { + Ok(_) => return Poll::Pending, + Err(v) => e = v, + }; + if let Some(b) = check_bit(e, fired_bit) { + return Poll::Ready(b); + } + } + } +} + +struct WaitResult<'a, Res> { + core: &'a EventCore, +} + +impl<'a, Res> Future for WaitResult<'a, Res> { + type Output = Option; + + #[inline] + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let event = &self.core.event; + let fired_bit = fired_bit_of(PAYLOAD_EVENT); + let mut e = event.load(Ordering::Relaxed); + if check_bit(e, fired_bit).is_some() { + unsafe { + return Poll::Ready((*self.core.res.get()).take()); + } + } + let subscribed_bit = subscribed_bit_of(PAYLOAD_EVENT); + self.core.waker.register(cx.waker()); + loop { + match event.compare_exchange_weak( + e, + e | subscribed_bit, + Ordering::AcqRel, + Ordering::Relaxed, + ) { + Ok(_) => return Poll::Pending, + Err(v) => e = v, + }; + if check_bit(e, fired_bit).is_some() { + unsafe { + return Poll::Ready((*self.core.res.get()).take()); + } + } + } + } +} + +pub struct CommandResultSubscriber { + core: Arc>, +} + +impl CommandResultSubscriber { + pub async fn wait_proposed(&mut self) -> bool { + WaitEvent { + event: CmdResChannel::PROPOSED_EVENT, + core: &self.core, + } + .await + } + + pub async fn wait_committed(&mut self) -> bool { + WaitEvent { + event: CmdResChannel::COMMITTED_EVENT, + core: &self.core, + } + .await + } + + pub async fn result(mut self) -> Option { + WaitResult { core: &self.core }.await + } +} + +unsafe impl Send for CommandResultSubscriber {} +unsafe impl Sync for CommandResultSubscriber {} + +pub struct CmdResChannel { + core: ManuallyDrop>>, +} + +impl CmdResChannel { + // Valid range is [1, 30] + const PROPOSED_EVENT: u64 = 1; + const COMMITTED_EVENT: u64 = 2; + + #[inline] + pub fn pair() -> (Self, CommandResultSubscriber) { + let core = Arc::new(EventCore { + event: AtomicU64::new(0), + res: UnsafeCell::new(None), + waker: AtomicWaker::new(), + }); + ( + Self { + core: ManuallyDrop::new(core.clone()), + }, + CommandResultSubscriber { core }, + ) + } +} + +impl ErrorCallback for CmdResChannel { + fn report_error(self, err: RaftCmdResponse) { + self.set_result(err); + } + + fn is_none(&self) -> bool { + false + } +} + +impl WriteCallback for CmdResChannel { + type Response = RaftCmdResponse; + + /// Called after a request is proposed to the raft group successfully. It's + /// used to notify the caller to move on early because it's very likely the + /// request will be applied to the raftstore. + #[inline] + fn notify_proposed(&mut self) { + self.core.notify_event(Self::PROPOSED_EVENT); + } + + /// Called after a request is committed and before it's being applied, and + /// it's guaranteed that the request will be successfully applied soon. + #[inline] + fn notify_committed(&mut self) { + self.core.notify_event(Self::COMMITTED_EVENT); + } + + fn trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { + None + } + + fn trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { + None + } + + // TODO: support executing hooks inside setting result. + #[inline] + fn set_result(mut self, res: RaftCmdResponse) { + self.core.set_result(res); + unsafe { + ManuallyDrop::drop(&mut self.core); + } + mem::forget(self); + } +} + +impl Drop for CmdResChannel { + #[inline] + fn drop(&mut self) { + self.core.cancel(); + unsafe { + ManuallyDrop::drop(&mut self.core); + } + } +} + +unsafe impl Send for CmdResChannel {} +unsafe impl Sync for CmdResChannel {} + +/// Response for Read. +/// +/// Unlike v1, snapshot are always taken in LocalReader, hence snapshot doesn't +/// need to be a field of the struct. +#[derive(Clone, PartialEq, Debug)] +pub struct ReadResponse { + pub txn_extra_op: TxnExtraOp, +} + +/// Possible result of a raft query. +#[derive(Clone, Debug, PartialEq)] +pub enum QueryResult { + /// If it's a read like get or snapshot, `ReadResponse` is returned on + /// success. + Read(ReadResponse), + /// If it's a status query, `RaftCmdResponse` is returned. If it's a read + /// like query, `RaftCmdResponse` is returned on error. + Response(RaftCmdResponse), +} + +impl QueryResult { + pub fn read(&self) -> Option<&ReadResponse> { + match self { + QueryResult::Read(r) => Some(r), + _ => None, + } + } + + pub fn response(&self) -> Option<&RaftCmdResponse> { + match self { + QueryResult::Response(r) => Some(r), + _ => None, + } + } +} + +pub struct QueryResChannel { + core: ManuallyDrop>>, +} + +impl QueryResChannel { + pub fn pair() -> (Self, QueryResSubscriber) { + let core = Arc::new(EventCore { + event: AtomicU64::new(0), + res: UnsafeCell::new(None), + waker: AtomicWaker::new(), + }); + ( + Self { + core: ManuallyDrop::new(core.clone()), + }, + QueryResSubscriber { core }, + ) + } +} + +impl ErrorCallback for QueryResChannel { + #[inline] + fn report_error(self, err: RaftCmdResponse) { + self.set_result(QueryResult::Response(err)); + } + + #[inline] + fn is_none(&self) -> bool { + false + } +} + +impl ReadCallback for QueryResChannel { + type Response = QueryResult; + + #[inline] + fn set_result(mut self, res: QueryResult) { + self.core.set_result(res); + unsafe { + ManuallyDrop::drop(&mut self.core); + } + mem::forget(self); + } +} + +impl Drop for QueryResChannel { + #[inline] + fn drop(&mut self) { + self.core.cancel(); + unsafe { + ManuallyDrop::drop(&mut self.core); + } + } +} + +unsafe impl Send for QueryResChannel {} +unsafe impl Sync for QueryResChannel {} + +pub struct QueryResSubscriber { + core: Arc>, +} + +impl QueryResSubscriber { + pub async fn result(mut self) -> Option { + WaitResult { core: &self.core }.await + } +} + +unsafe impl Send for QueryResSubscriber {} +unsafe impl Sync for QueryResSubscriber {} + +#[cfg(test)] +mod tests { + use engine_test::kv::KvTestSnapshot; + use futures::executor::block_on; + + use super::*; + + #[test] + fn test_cancel() { + let (mut chan, mut sub) = CmdResChannel::pair(); + drop(chan); + assert!(!block_on(sub.wait_proposed())); + assert!(!block_on(sub.wait_committed())); + assert!(block_on(sub.result()).is_none()); + + let (mut chan, mut sub) = CmdResChannel::pair(); + chan.notify_proposed(); + let mut result = RaftCmdResponse::default(); + result.mut_header().set_current_term(4); + chan.set_result(result.clone()); + assert!(block_on(sub.wait_proposed())); + assert!(!block_on(sub.wait_committed())); + assert_eq!(block_on(sub.result()), Some(result)); + + let (mut chan, mut sub) = QueryResChannel::pair(); + drop(chan); + assert!(block_on(sub.result()).is_none()); + } + + #[test] + fn test_channel() { + let (mut chan, mut sub) = CmdResChannel::pair(); + chan.notify_proposed(); + chan.notify_committed(); + let mut result = RaftCmdResponse::default(); + result.mut_header().set_current_term(2); + chan.set_result(result.clone()); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + assert_eq!(block_on(sub.result()), Some(result.clone())); + + let (mut chan, mut sub) = QueryResChannel::pair(); + let resp = QueryResult::Response(result.clone()); + chan.set_result(resp.clone()); + assert_eq!(block_on(sub.result()).unwrap(), resp); + + let (mut chan, mut sub) = QueryResChannel::pair(); + let read = QueryResult::Read(ReadResponse { + txn_extra_op: TxnExtraOp::ReadOldValue, + }); + chan.set_result(read.clone()); + assert_eq!(block_on(sub.result()).unwrap(), read); + } +} diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index e5906719109..66fdbc00546 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -6,6 +6,7 @@ #![feature(min_specialization)] #![feature(box_patterns)] #![feature(hash_drain_filter)] +#![feature(let_else)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 3f841e699bb..d44cca3668b 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -83,7 +83,7 @@ use crate::{ local_metrics::{RaftMetrics, TimeTracker}, memory::*, metrics::*, - msg::{Callback, PeerMsg, ReadResponse, SignificantMsg}, + msg::{Callback, ErrorCallback, PeerMsg, ReadResponse, SignificantMsg}, peer::Peer, peer_storage::{write_initial_apply_state, write_peer_state}, util, @@ -91,7 +91,7 @@ use crate::{ admin_cmd_epoch_lookup, check_region_epoch, compare_region_epoch, is_learner, ChangePeerI, ConfChangeKind, KeysInfoFormatter, LatencyInspector, }, - Config, RegionSnapshot, RegionTask, + Config, RegionSnapshot, RegionTask, WriteCallback, }, Error, Result, }; @@ -101,20 +101,14 @@ const APPLY_WB_SHRINK_SIZE: usize = 1024 * 1024; const SHRINK_PENDING_CMD_QUEUE_CAP: usize = 64; const MAX_APPLY_BATCH_SIZE: usize = 64 * 1024 * 1024; -pub struct PendingCmd -where - S: Snapshot, -{ +pub struct PendingCmd { pub index: u64, pub term: u64, - pub cb: Option>, + pub cb: Option, } -impl PendingCmd -where - S: Snapshot, -{ - fn new(index: u64, term: u64, cb: Callback) -> PendingCmd { +impl PendingCmd { + fn new(index: u64, term: u64, cb: C) -> PendingCmd { PendingCmd { index, term, @@ -123,10 +117,7 @@ where } } -impl Drop for PendingCmd -where - S: Snapshot, -{ +impl Drop for PendingCmd { fn drop(&mut self) { if self.cb.is_some() { safe_panic!( @@ -138,10 +129,7 @@ where } } -impl Debug for PendingCmd -where - S: Snapshot, -{ +impl Debug for PendingCmd { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, @@ -153,30 +141,24 @@ where } } -impl HeapSize for PendingCmd {} +impl HeapSize for PendingCmd {} /// Commands waiting to be committed and applied. #[derive(Debug)] -pub struct PendingCmdQueue -where - S: Snapshot, -{ - normals: VecDeque>, - conf_change: Option>, +pub struct PendingCmdQueue { + normals: VecDeque>, + conf_change: Option>, } -impl PendingCmdQueue -where - S: Snapshot, -{ - fn new() -> PendingCmdQueue { +impl PendingCmdQueue { + fn new() -> PendingCmdQueue { PendingCmdQueue { normals: VecDeque::new(), conf_change: None, } } - fn pop_normal(&mut self, index: u64, term: u64) -> Option> { + fn pop_normal(&mut self, index: u64, term: u64) -> Option> { self.normals.pop_front().and_then(|cmd| { if self.normals.capacity() > SHRINK_PENDING_CMD_QUEUE_CAP && self.normals.len() < SHRINK_PENDING_CMD_QUEUE_CAP @@ -191,18 +173,18 @@ where }) } - fn append_normal(&mut self, cmd: PendingCmd) { + fn append_normal(&mut self, cmd: PendingCmd) { self.normals.push_back(cmd); } - fn take_conf_change(&mut self) -> Option> { + fn take_conf_change(&mut self) -> Option> { // conf change will not be affected when changing between follower and leader, // so there is no need to check term. self.conf_change.take() } // TODO: seems we don't need to separate conf change from normal entries. - fn set_conf_change(&mut self, cmd: PendingCmd) { + fn set_conf_change(&mut self, cmd: PendingCmd) { self.conf_change = Some(cmd); } } @@ -547,7 +529,7 @@ where .applied_batch .cb_batch .iter() - .flat_map(|(cb, _)| cb.get_trackers()) + .flat_map(|(cb, _)| cb.trackers()) .flat_map(|trackers| trackers.iter().map(|t| t.as_tracker_token())) .flatten() .collect(); @@ -586,7 +568,7 @@ where // Invoke callbacks let now = std::time::Instant::now(); for (cb, resp) in cb_batch.drain(..) { - for tracker in cb.get_trackers().iter().flat_map(|v| *v) { + for tracker in cb.trackers().iter().flat_map(|v| *v) { tracker.observe(now, &self.apply_time, |t| &mut t.metrics.apply_time_nanos); } cb.invoke_with_response(resp); @@ -675,7 +657,7 @@ where } /// Calls the callback of `cmd` when the Region is removed. -fn notify_region_removed(region_id: u64, peer_id: u64, mut cmd: PendingCmd) { +fn notify_region_removed(region_id: u64, peer_id: u64, mut cmd: PendingCmd) { debug!( "region is removed, notify commands"; "region_id" => region_id, @@ -686,10 +668,10 @@ fn notify_region_removed(region_id: u64, peer_id: u64, mut cmd: PendingCmd) { +pub fn notify_req_region_removed(region_id: u64, cb: impl ErrorCallback) { let region_not_found = Error::RegionNotFound(region_id); let resp = cmd_resp::new_error(region_not_found); - cb.invoke_with_response(resp); + cb.report_error(resp); } /// Calls the callback of `cmd` when it can not be processed further. @@ -697,7 +679,7 @@ fn notify_stale_command( region_id: u64, peer_id: u64, term: u64, - mut cmd: PendingCmd, + mut cmd: PendingCmd, ) { info!( "command is stale, skip"; @@ -709,15 +691,15 @@ fn notify_stale_command( notify_stale_req(term, cmd.cb.take().unwrap()); } -pub fn notify_stale_req(term: u64, cb: Callback) { +pub fn notify_stale_req(term: u64, cb: impl ErrorCallback) { let resp = cmd_resp::err_resp(Error::StaleCommand, term); - cb.invoke_with_response(resp); + cb.report_error(resp); } -pub fn notify_stale_req_with_msg(term: u64, msg: String, cb: Callback) { +pub fn notify_stale_req_with_msg(term: u64, msg: String, cb: impl ErrorCallback) { let mut resp = cmd_resp::err_resp(Error::StaleCommand, term); resp.mut_header().mut_error().set_message(msg); - cb.invoke_with_response(resp); + cb.report_error(resp); } /// Checks if a write is needed to be issued before handling the command. @@ -884,7 +866,7 @@ where pending_remove: bool, /// The commands waiting to be committed and applied - pending_cmds: PendingCmdQueue, + pending_cmds: PendingCmdQueue>, /// The counter of pending request snapshots. See more in `Peer`. pending_request_snapshot_count: Arc, @@ -2974,10 +2956,7 @@ pub fn compact_raft_log( Ok(()) } -pub struct Apply -where - S: Snapshot, -{ +pub struct Apply { pub peer_id: u64, pub region_id: u64, pub term: u64, @@ -2985,11 +2964,11 @@ where pub commit_term: u64, pub entries: SmallVec<[CachedEntries; 1]>, pub entries_size: usize, - pub cbs: Vec>, + pub cbs: Vec>, pub bucket_meta: Option>, } -impl Apply { +impl Apply { pub(crate) fn new( peer_id: u64, region_id: u64, @@ -2997,9 +2976,9 @@ impl Apply { commit_index: u64, commit_term: u64, entries: Vec, - cbs: Vec>, + cbs: Vec>, buckets: Option>, - ) -> Apply { + ) -> Apply { let mut entries_size = 0; for e in &entries { entries_size += bytes_capacity(&e.data) + bytes_capacity(&e.context); @@ -3021,7 +3000,7 @@ impl Apply { pub fn on_schedule(&mut self, metrics: &RaftMetrics) { let now = std::time::Instant::now(); for cb in &mut self.cbs { - if let Callback::Write { trackers, .. } = &mut cb.cb { + if let Some(trackers) = cb.cb.trackers_mut() { for tracker in trackers { tracker.observe(now, &metrics.store_time, |t| { t.metrics.write_instant = Some(now); @@ -3035,7 +3014,7 @@ impl Apply { } } - fn try_batch(&mut self, other: &mut Apply) -> bool { + fn try_batch(&mut self, other: &mut Apply) -> bool { assert_eq!(self.region_id, other.region_id); assert_eq!(self.peer_id, other.peer_id); if self.entries_size + other.entries_size <= MAX_APPLY_BATCH_SIZE { @@ -3089,21 +3068,18 @@ impl Registration { } #[derive(Debug)] -pub struct Proposal -where - S: Snapshot, -{ +pub struct Proposal { pub is_conf_change: bool, pub index: u64, pub term: u64, - pub cb: Callback, + pub cb: C, /// `propose_time` is set to the last time when a peer starts to renew /// lease. pub propose_time: Option, pub must_pass_epoch_check: bool, } -impl HeapSize for Proposal {} +impl HeapSize for Proposal {} pub struct Destroy { region_id: u64, @@ -3252,7 +3228,7 @@ where { Apply { start: Instant, - apply: Apply, + apply: Apply>, }, Registration(Registration), LogsUpToDate(CatchUpLogs), @@ -3273,7 +3249,7 @@ impl Msg where EK: KvEngine, { - pub fn apply(apply: Apply) -> Msg { + pub fn apply(apply: Apply>) -> Msg { Msg::Apply { start: Instant::now(), apply, @@ -3409,7 +3385,11 @@ where /// Handles apply tasks, and uses the apply delegate to handle the committed /// entries. - fn handle_apply(&mut self, apply_ctx: &mut ApplyContext, mut apply: Apply) { + fn handle_apply( + &mut self, + apply_ctx: &mut ApplyContext, + mut apply: Apply>, + ) { if apply_ctx.timer.is_none() { apply_ctx.timer = Some(Instant::now_coarse()); } @@ -3483,12 +3463,12 @@ where } /// Handles proposals, and appends the commands to the apply delegate. - fn append_proposal(&mut self, props_drainer: Drain<'_, Proposal>) { + fn append_proposal(&mut self, props_drainer: Drain<'_, Proposal>>) { let (region_id, peer_id) = (self.delegate.region_id(), self.delegate.id()); let propose_num = props_drainer.len(); if self.delegate.stopped { for p in props_drainer { - let cmd = PendingCmd::::new(p.index, p.term, p.cb); + let cmd = PendingCmd::new(p.index, p.term, p.cb); notify_stale_command(region_id, peer_id, self.delegate.term, cmd); } return; @@ -3790,7 +3770,7 @@ where for tracker in apply .cbs .iter() - .flat_map(|p| p.cb.get_trackers()) + .flat_map(|p| p.cb.trackers()) .flat_map(|ts| ts.iter().flat_map(|t| t.as_tracker_token())) { GLOBAL_TRACKERS.with_tracker(tracker, |t| { @@ -4191,7 +4171,7 @@ where // So only shutdown needs to be checked here. if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { for p in apply.cbs.drain(..) { - let cmd = PendingCmd::::new(p.index, p.term, p.cb); + let cmd = PendingCmd::new(p.index, p.term, p.cb); notify_region_removed(apply.region_id, apply.peer_id, cmd); } } @@ -4323,14 +4303,11 @@ mod memtrace { pub merge_yield: usize, } - impl HeapSize for PendingCmdQueue - where - S: Snapshot, - { + impl HeapSize for PendingCmdQueue { fn heap_size(&self) -> usize { // Some fields of `PendingCmd` are on stack, but ignore them because they are // just some small boxed closures. - self.normals.capacity() * mem::size_of::>() + self.normals.capacity() * mem::size_of::>() } } @@ -4642,7 +4619,7 @@ mod tests { index: u64, term: u64, cb: Callback, - ) -> Proposal { + ) -> Proposal> { Proposal { is_conf_change, index, @@ -4653,13 +4630,13 @@ mod tests { } } - fn apply( + fn apply( peer_id: u64, region_id: u64, term: u64, entries: Vec, - cbs: Vec>, - ) -> Apply { + cbs: Vec>, + ) -> Apply { let (commit_index, commit_term) = entries .last() .map(|e| (e.get_index(), e.get_term())) @@ -4843,7 +4820,7 @@ mod tests { system.shutdown(); } - fn cb(idx: u64, term: u64, tx: Sender) -> Proposal { + fn cb(idx: u64, term: u64, tx: Sender) -> Proposal> { proposal( false, idx, @@ -6423,7 +6400,7 @@ mod tests { #[test] fn pending_cmd_leak() { let res = panic_hook::recover_safe(|| { - let _cmd = PendingCmd::::new(1, 1, Callback::None); + let _cmd = PendingCmd::new(1, 1, Callback::::None); }); res.unwrap_err(); } @@ -6431,7 +6408,7 @@ mod tests { #[test] fn pending_cmd_leak_dtor_not_abort() { let res = panic_hook::recover_safe(|| { - let _cmd = PendingCmd::::new(1, 1, Callback::None); + let _cmd = PendingCmd::new(1, 1, Callback::::None); panic!("Don't abort"); // It would abort and fail if there was a double-panic in PendingCmd // dtor. diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 2452f177cff..66ceeea7967 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -93,7 +93,7 @@ use crate::{ }, AbstractPeer, CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, PeerTick, ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, - SignificantMsg, SnapKey, StoreMsg, + SignificantMsg, SnapKey, StoreMsg, WriteCallback, }, Error, Result, }; @@ -490,13 +490,7 @@ where let mut cbs = std::mem::take(&mut self.callbacks); let proposed_cbs: Vec = cbs .iter_mut() - .filter_map(|cb| { - if let Callback::Write { proposed_cb, .. } = cb { - proposed_cb.take() - } else { - None - } - }) + .filter_map(|cb| cb.take_proposed_cb()) .collect(); let proposed_cb: Option = if proposed_cbs.is_empty() { None @@ -509,13 +503,7 @@ where }; let committed_cbs: Vec<_> = cbs .iter_mut() - .filter_map(|cb| { - if let Callback::Write { committed_cb, .. } = cb { - committed_cb.take() - } else { - None - } - }) + .filter_map(|cb| cb.take_committed_cb()) .collect(); let committed_cb: Option = if committed_cbs.is_empty() { None @@ -529,13 +517,7 @@ where let tokens: SmallVec<[TimeTracker; 4]> = cbs .iter_mut() - .filter_map(|cb| { - if let Callback::Write { trackers, .. } = cb { - Some(trackers[0]) - } else { - None - } - }) + .filter_map(|cb| cb.trackers().map(|t| t[0])) .collect(); let mut cb = Callback::write_ext( @@ -550,7 +532,7 @@ where committed_cb, ); - if let Callback::Write { trackers, .. } = &mut cb { + if let Some(trackers) = cb.trackers_mut() { *trackers = tokens; } @@ -4829,7 +4811,7 @@ where if self.ctx.raft_metrics.waterfall_metrics { let now = Instant::now(); - for tracker in cb.get_trackers().iter().flat_map(|v| *v) { + for tracker in cb.trackers().iter().flat_map(|v| *v) { tracker.observe(now, &self.ctx.raft_metrics.wf_batch_wait, |t| { &mut t.metrics.wf_batch_wait_nanos }); diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 947e9e074fd..619a18e3fb5 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -73,9 +73,10 @@ where } } -pub type ReadCallback = Box) + Send>; -pub type WriteCallback = Box; +pub type BoxReadCallback = Box) + Send>; +pub type BoxWriteCallback = Box; pub type ExtCallback = Box; + #[cfg(any(test, feature = "testexport"))] pub type TestCallback = Box; @@ -88,10 +89,10 @@ pub enum Callback { /// No callback. None, /// Read callback. - Read(ReadCallback), + Read(BoxReadCallback), /// Write callback. Write { - cb: WriteCallback, + cb: BoxWriteCallback, /// `proposed_cb` is called after a request is proposed to the raft /// group successfully. It's used to notify the caller to move on early /// because it's very likely the request will be applied to the @@ -101,6 +102,7 @@ pub enum Callback { /// it's being applied, and it's guaranteed that the request will be /// successfully applied soon. committed_cb: Option, + trackers: SmallVec<[TimeTracker; 4]>, }, #[cfg(any(test, feature = "testexport"))] @@ -114,12 +116,12 @@ impl Callback where S: Snapshot, { - pub fn write(cb: WriteCallback) -> Self { + pub fn write(cb: BoxWriteCallback) -> Self { Self::write_ext(cb, None, None) } pub fn write_ext( - cb: WriteCallback, + cb: BoxWriteCallback, proposed_cb: Option, committed_cb: Option, ) -> Self { @@ -142,13 +144,6 @@ where } } - pub fn get_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { - match self { - Callback::Write { trackers, .. } => Some(trackers), - _ => None, - } - } - pub fn invoke_with_response(self, resp: RaftCmdResponse) { match self { Callback::None => (), @@ -169,27 +164,22 @@ where } } - pub fn has_proposed_cb(&mut self) -> bool { - if let Callback::Write { proposed_cb, .. } = self { - proposed_cb.is_some() - } else { - false - } + pub fn has_proposed_cb(&self) -> bool { + let Callback::Write { proposed_cb, .. } = self else { return false }; + proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - if let Callback::Write { proposed_cb, .. } = self { - if let Some(cb) = proposed_cb.take() { - cb() - } + let Callback::Write { proposed_cb, .. } = self else { return }; + if let Some(cb) = proposed_cb.take() { + cb(); } } pub fn invoke_committed(&mut self) { - if let Callback::Write { committed_cb, .. } = self { - if let Some(cb) = committed_cb.take() { - cb() - } + let Callback::Write { committed_cb, .. } = self else { return }; + if let Some(cb) = committed_cb.take() { + cb(); } } @@ -200,7 +190,86 @@ where } } - pub fn is_none(&self) -> bool { + pub fn take_proposed_cb(&mut self) -> Option { + let Callback::Write { proposed_cb, .. } = self else { return None }; + proposed_cb.take() + } + + pub fn take_committed_cb(&mut self) -> Option { + let Callback::Write { committed_cb, .. } = self else { return None }; + committed_cb.take() + } +} + +pub trait ReadCallback: ErrorCallback { + type Response; + + fn set_result(self, result: Self::Response); +} + +pub trait WriteCallback: ErrorCallback { + type Response; + + fn notify_proposed(&mut self); + fn notify_committed(&mut self); + fn trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>>; + fn trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>>; + fn set_result(self, result: Self::Response); +} + +pub trait ErrorCallback: Send { + fn report_error(self, err: RaftCmdResponse); + fn is_none(&self) -> bool; +} + +impl ReadCallback for Callback { + type Response = ReadResponse; + + #[inline] + fn set_result(self, result: Self::Response) { + self.invoke_read(result); + } +} + +impl WriteCallback for Callback { + type Response = RaftCmdResponse; + + #[inline] + fn notify_proposed(&mut self) { + self.invoke_proposed(); + } + + #[inline] + fn notify_committed(&mut self) { + self.invoke_committed(); + } + + #[inline] + fn trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { + let Callback::Write { trackers, .. } = self else { return None }; + Some(trackers) + } + + #[inline] + fn trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { + let Callback::Write { trackers, .. } = self else { return None }; + Some(trackers) + } + + #[inline] + fn set_result(self, result: Self::Response) { + self.invoke_with_response(result); + } +} + +impl ErrorCallback for Callback { + #[inline] + fn report_error(self, err: RaftCmdResponse) { + self.invoke_with_response(err); + } + + #[inline] + fn is_none(&self) -> bool { matches!(self, Callback::None) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 17fe22926d1..b109d107c4f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -93,7 +93,7 @@ use crate::{ }, hibernate_state::GroupState, memory::{needs_evict_entry_cache, MEMTRACE_RAFT_ENTRIES}, - msg::{PeerMsg, RaftCommand, SignificantMsg, StoreMsg}, + msg::{ErrorCallback, PeerMsg, RaftCommand, SignificantMsg, StoreMsg}, txn_ext::LocksStatus, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ @@ -101,7 +101,7 @@ use crate::{ ReadProgress, RegionTask, SplitCheckTask, }, Callback, Config, GlobalReplicationState, PdTask, ReadIndexContext, ReadResponse, TxnExt, - RAFT_INIT_LOG_INDEX, + WriteCallback, RAFT_INIT_LOG_INDEX, }, Error, Result, }; @@ -121,16 +121,13 @@ pub enum StaleState { } #[derive(Debug)] -struct ProposalQueue -where - S: Snapshot, -{ +pub struct ProposalQueue { tag: String, - queue: VecDeque>, + queue: VecDeque>, } -impl ProposalQueue { - fn new(tag: String) -> ProposalQueue { +impl ProposalQueue { + fn new(tag: String) -> ProposalQueue { ProposalQueue { tag, queue: VecDeque::new(), @@ -146,7 +143,7 @@ impl ProposalQueue { .and_then(|i| { self.queue[i] .cb - .get_trackers() + .trackers() .map(|ts| (self.queue[i].term, ts)) }) } @@ -159,7 +156,7 @@ impl ProposalQueue { } // Find proposal in front or at the given term and index - fn pop(&mut self, term: u64, index: u64) -> Option> { + fn pop(&mut self, term: u64, index: u64) -> Option> { self.queue.pop_front().and_then(|p| { // Comparing the term first then the index, because the term is // increasing among all log entries and the index is increasing @@ -174,7 +171,7 @@ impl ProposalQueue { /// Find proposal at the given term and index and notify stale proposals /// in front that term and index - fn find_proposal(&mut self, term: u64, index: u64, current_term: u64) -> Option> { + fn find_proposal(&mut self, term: u64, index: u64, current_term: u64) -> Option> { while let Some(p) = self.pop(term, index) { if p.term == term { if p.index == index { @@ -193,11 +190,11 @@ impl ProposalQueue { } #[inline] - fn oldest(&self) -> Option<&Proposal> { + fn oldest(&self) -> Option<&Proposal> { self.queue.front() } - fn push(&mut self, p: Proposal) { + fn push(&mut self, p: Proposal) { if let Some(f) = self.queue.back() { // The term must be increasing among all log entries and the index // must be increasing inside a given term @@ -217,7 +214,7 @@ impl ProposalQueue { } } - fn back(&self) -> Option<&Proposal> { + fn back(&self) -> Option<&Proposal> { self.queue.back() } } @@ -730,11 +727,11 @@ where /// Record the last instant of each peer's heartbeat response. pub peer_heartbeats: HashMap, - proposals: ProposalQueue, + proposals: ProposalQueue>, leader_missing_time: Option, #[getset(get = "pub")] leader_lease: Lease, - pending_reads: ReadIndexQueue, + pending_reads: ReadIndexQueue>, /// Threshold of long uncommitted proposals. /// /// Note that this is a dynamically changing value. Check the @@ -1646,7 +1643,7 @@ where { let proposal = &self.proposals.queue[idx]; if term == proposal.term { - for tracker in proposal.cb.get_trackers().iter().flat_map(|v| v.iter()) { + for tracker in proposal.cb.trackers().iter().flat_map(|v| v.iter()) { tracker.observe(std_now, &ctx.raft_metrics.wf_send_proposal, |t| { &mut t.metrics.wf_send_proposal_nanos }); @@ -3054,7 +3051,7 @@ where fn response_read( &self, - read: &mut ReadIndexRequest, + read: &mut ReadIndexRequest>, ctx: &mut PollContext, replica_read: bool, ) { @@ -3505,7 +3502,7 @@ where fn post_propose( &mut self, poll_ctx: &mut PollContext, - mut p: Proposal, + mut p: Proposal>, ) { // Try to renew leader lease on every consistent read/write request. if poll_ctx.current_time.is_none() { @@ -3797,7 +3794,11 @@ where ); } - pub fn push_pending_read(&mut self, read: ReadIndexRequest, is_leader: bool) { + pub fn push_pending_read( + &mut self, + read: ReadIndexRequest>, + is_leader: bool, + ) { self.pending_reads.push_back(read, is_leader); } @@ -3822,7 +3823,7 @@ where ); poll_ctx.raft_metrics.propose.unsafe_read_index += 1; cmd_resp::bind_error(&mut err_resp, e); - cb.invoke_with_response(err_resp); + cb.report_error(err_resp); self.should_wake_up = true; return false; } @@ -3899,7 +3900,7 @@ where } self.should_wake_up = true; cmd_resp::bind_error(&mut err_resp, Error::NotLeader(self.region_id, None)); - cb.invoke_with_response(err_resp); + cb.report_error(err_resp); return false; } @@ -5795,7 +5796,7 @@ mod tests { #[test] fn test_propose_queue_find_proposal() { - let mut pq: ProposalQueue = + let mut pq: ProposalQueue> = ProposalQueue::new("tag".to_owned()); let gen_term = |index: u64| (index / 10) + 1; let push_proposal = |pq: &mut ProposalQueue<_>, index: u64| { @@ -5858,7 +5859,7 @@ mod tests { fn must_not_call() -> ExtCallback { Box::new(move || unreachable!()) } - let mut pq: ProposalQueue = + let mut pq: ProposalQueue> = ProposalQueue::new("tag".to_owned()); // (1, 4) and (1, 5) is not committed diff --git a/components/raftstore/src/store/read_queue.rs b/components/raftstore/src/store/read_queue.rs index d9261b9fde3..6af9c151810 100644 --- a/components/raftstore/src/store/read_queue.rs +++ b/components/raftstore/src/store/read_queue.rs @@ -4,7 +4,6 @@ use std::{cmp, collections::VecDeque, mem, u64, usize}; use collections::HashMap; -use engine_traits::Snapshot; use kvproto::{ kvrpcpb::LockInfo, raft_cmdpb::{self, RaftCmdRequest}, @@ -21,19 +20,17 @@ use tikv_util::{ use time::Timespec; use uuid::Uuid; +use super::msg::ErrorCallback; use crate::{ - store::{fsm::apply, metrics::*, Callback, Config}, + store::{fsm::apply, metrics::*, Config}, Result, }; const READ_QUEUE_SHRINK_SIZE: usize = 64; -pub struct ReadIndexRequest -where - S: Snapshot, -{ +pub struct ReadIndexRequest { pub id: Uuid, - cmds: MustConsumeVec<(RaftCmdRequest, Callback, Option)>, + cmds: MustConsumeVec<(RaftCmdRequest, C, Option)>, pub propose_time: Timespec, pub read_index: Option, pub addition_request: Option>, @@ -44,24 +41,16 @@ where cmds_heap_size: usize, } -impl ReadIndexRequest -where - S: Snapshot, -{ - const CMD_SIZE: usize = mem::size_of::<(RaftCmdRequest, Callback, Option)>(); +impl ReadIndexRequest { + const CMD_SIZE: usize = mem::size_of::<(RaftCmdRequest, C, Option)>(); - pub fn push_command(&mut self, req: RaftCmdRequest, cb: Callback, read_index: u64) { + pub fn push_command(&mut self, req: RaftCmdRequest, cb: C, read_index: u64) { RAFT_READ_INDEX_PENDING_COUNT.inc(); self.cmds_heap_size += req.heap_size(); self.cmds.push((req, cb, Some(read_index))); } - pub fn with_command( - id: Uuid, - req: RaftCmdRequest, - cb: Callback, - propose_time: Timespec, - ) -> Self { + pub fn with_command(id: Uuid, req: RaftCmdRequest, cb: C, propose_time: Timespec) -> Self { RAFT_READ_INDEX_PENDING_COUNT.inc(); // Ignore heap allocations for `Callback`. @@ -81,31 +70,25 @@ where } } - pub fn cmds(&self) -> &[(RaftCmdRequest, Callback, Option)] { + pub fn cmds(&self) -> &[(RaftCmdRequest, C, Option)] { &self.cmds } - pub fn take_cmds(&mut self) -> MustConsumeVec<(RaftCmdRequest, Callback, Option)> { + pub fn take_cmds(&mut self) -> MustConsumeVec<(RaftCmdRequest, C, Option)> { self.cmds_heap_size = 0; self.cmds.take() } } -impl Drop for ReadIndexRequest -where - S: Snapshot, -{ +impl Drop for ReadIndexRequest { fn drop(&mut self) { let dur = (monotonic_raw_now() - self.propose_time).to_std().unwrap(); RAFT_READ_INDEX_PENDING_DURATION.observe(duration_to_sec(dur)); } } -pub struct ReadIndexQueue -where - S: Snapshot, -{ - reads: VecDeque>, +pub struct ReadIndexQueue { + reads: VecDeque>, ready_cnt: usize, // How many requests are handled. handled_cnt: usize, @@ -115,11 +98,8 @@ where retry_countdown: usize, } -impl Default for ReadIndexQueue -where - S: Snapshot, -{ - fn default() -> ReadIndexQueue { +impl Default for ReadIndexQueue { + fn default() -> ReadIndexQueue { ReadIndexQueue { reads: VecDeque::new(), ready_cnt: 0, @@ -130,10 +110,7 @@ where } } -impl ReadIndexQueue -where - S: Snapshot, -{ +impl ReadIndexQueue { /// Check it's necessary to retry pending read requests or not. /// Return true if all such conditions are satisfied: /// 1. more than an election timeout elapsed from the last request push; @@ -196,7 +173,7 @@ where self.contexts.clear(); } - pub fn push_back(&mut self, mut read: ReadIndexRequest, is_leader: bool) { + pub fn push_back(&mut self, mut read: ReadIndexRequest, is_leader: bool) { if !is_leader { read.in_contexts = true; let offset = self.handled_cnt + self.reads.len(); @@ -206,15 +183,15 @@ where self.retry_countdown = usize::MAX; } - pub fn back_mut(&mut self) -> Option<&mut ReadIndexRequest> { + pub fn back_mut(&mut self) -> Option<&mut ReadIndexRequest> { self.reads.back_mut() } - pub fn back(&self) -> Option<&ReadIndexRequest> { + pub fn back(&self) -> Option<&ReadIndexRequest> { self.reads.back() } - pub fn last_ready(&self) -> Option<&ReadIndexRequest> { + pub fn last_ready(&self) -> Option<&ReadIndexRequest> { if self.ready_cnt > 0 { return Some(&self.reads[self.ready_cnt - 1]); } @@ -333,7 +310,7 @@ where } } - pub fn pop_front(&mut self) -> Option> { + pub fn pop_front(&mut self) -> Option> { if self.ready_cnt == 0 { return None; } @@ -352,7 +329,7 @@ where /// Raft could have not been ready to handle the poped task. So put it back /// into the queue. - pub fn push_front(&mut self, read: ReadIndexRequest) { + pub fn push_front(&mut self, read: ReadIndexRequest) { debug_assert!(read.read_index.is_some()); self.reads.push_front(read); self.ready_cnt += 1; @@ -444,10 +421,7 @@ mod memtrace { use super::*; - impl HeapSize for ReadIndexRequest - where - S: Snapshot, - { + impl HeapSize for ReadIndexRequest { fn heap_size(&self) -> usize { let mut size = self.cmds_heap_size + Self::CMD_SIZE * self.cmds.capacity(); if let Some(ref add) = self.addition_request { @@ -457,13 +431,10 @@ mod memtrace { } } - impl HeapSize for ReadIndexQueue - where - S: Snapshot, - { + impl HeapSize for ReadIndexQueue { #[inline] fn heap_size(&self) -> usize { - let mut size = self.reads.capacity() * mem::size_of::>() + let mut size = self.reads.capacity() * mem::size_of::>() // For one Uuid and one usize. + 24 * self.contexts.len(); for read in &self.reads { @@ -522,10 +493,11 @@ mod tests { use engine_test::kv::KvTestSnapshot; use super::*; + use crate::store::Callback; #[test] fn test_read_queue_fold() { - let mut queue = ReadIndexQueue:: { + let mut queue = ReadIndexQueue::> { handled_cnt: 125, ..Default::default() }; @@ -584,7 +556,7 @@ mod tests { #[test] fn test_become_leader_then_become_follower() { - let mut queue = ReadIndexQueue:: { + let mut queue = ReadIndexQueue::> { handled_cnt: 100, ..Default::default() }; @@ -628,7 +600,7 @@ mod tests { #[test] fn test_retake_leadership() { - let mut queue = ReadIndexQueue:: { + let mut queue = ReadIndexQueue::> { handled_cnt: 100, ..Default::default() }; @@ -670,7 +642,7 @@ mod tests { #[test] fn test_advance_replica_reads_out_of_order() { - let mut queue = ReadIndexQueue:: { + let mut queue = ReadIndexQueue::> { handled_cnt: 100, ..Default::default() }; From 6c06f99c1b9dc10811e689cfc979a1b8d4287dd6 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 19 Aug 2022 15:38:52 +0800 Subject: [PATCH 0160/1149] raftstore: Simplify raft local metrics (#13307) ref tikv/tikv#12876 Simplify raft local metrics by using local counter provided by rust-prometheus Signed-off-by: Connor1996 --- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../raftstore/src/store/async_io/write.rs | 2 +- components/raftstore/src/store/fsm/peer.rs | 79 ++- components/raftstore/src/store/fsm/store.rs | 63 ++- .../raftstore/src/store/local_metrics.rs | 461 +++--------------- components/raftstore/src/store/metrics.rs | 220 ++++----- components/raftstore/src/store/peer.rs | 55 ++- .../raftstore/src/store/worker/metrics.rs | 2 +- 8 files changed, 321 insertions(+), 563 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 668453e708b..156ea55a414 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -245,7 +245,7 @@ impl Peer { self.raft_group_mut().advance_append_async(ready); } - ctx.raft_metrics.ready.has_ready_region += 1; + ctx.raft_metrics.ready.has_ready_region.inc(); } /// Called when an asynchronously write finishes. diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index a007d168474..ea796117e2c 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -564,7 +564,7 @@ where cfg_tracker, raft_write_size_limit: cfg.value().raft_write_size_limit.0 as usize, metrics: StoreWriteMetrics::new(cfg.value().waterfall_metrics), - message_metrics: Default::default(), + message_metrics: RaftSendMessageMetrics::default(), perf_context, pending_latency_inspect: vec![], } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 66ceeea7967..9b354fb0842 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -486,7 +486,7 @@ where let cb = self.callbacks.pop().unwrap(); return Some((req, cb)); } - metric.propose.batch += self.callbacks.len() - 1; + metric.propose.batch.inc_by(self.callbacks.len() as u64 - 1); let mut cbs = std::mem::take(&mut self.callbacks); let proposed_cbs: Vec = cbs .iter_mut() @@ -610,8 +610,7 @@ where PeerMsg::RaftCommand(cmd) => { self.ctx .raft_metrics - .propose - .request_wait_time + .propose_wait_time .observe(duration_to_sec(cmd.send_time.saturating_elapsed()) as f64); if let Some(Err(e)) = cmd.extra_opts.deadline.map(|deadline| deadline.check()) { @@ -662,7 +661,7 @@ where PeerMsg::Destroy(peer_id) => { if self.fsm.peer.peer_id() == peer_id { match self.fsm.peer.maybe_destroy(self.ctx) { - None => self.ctx.raft_metrics.message_dropped.applying_snap += 1, + None => self.ctx.raft_metrics.message_dropped.applying_snap.inc(), Some(job) => { self.handle_destroy_peer(job); } @@ -1820,7 +1819,7 @@ where self.register_entry_cache_evict_tick(); } self.ctx.ready_count += 1; - self.ctx.raft_metrics.ready.has_ready_region += 1; + self.ctx.raft_metrics.ready.has_ready_region.inc(); if self.fsm.peer.leader_unreachable { self.fsm.reset_hibernate_state(GroupState::Chaos); @@ -2187,7 +2186,7 @@ where "peer_id" => self.fsm.peer_id(), "err" => ?e, ); - self.ctx.raft_metrics.propose.unsafe_read_index += 1; + self.ctx.raft_metrics.propose.unsafe_read_index.inc(); return; } @@ -2290,7 +2289,7 @@ where "skip {:?} because of disk full", msg_type; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id() ); - self.ctx.raft_metrics.message_dropped.disk_full += 1; + self.ctx.raft_metrics.message_dropped.disk_full.inc(); return Ok(()); } @@ -2360,7 +2359,7 @@ where && (msg.get_message().get_from() == raft::INVALID_ID || msg.get_message().get_from() == self.fsm.peer_id()) { - self.ctx.raft_metrics.message_dropped.stale_msg += 1; + self.ctx.raft_metrics.message_dropped.stale_msg.inc(); return Ok(()); } self.fsm.peer.step(self.ctx, msg.take_message()) @@ -2516,7 +2515,11 @@ where "to_store_id" => to.get_store_id(), "my_store_id" => self.store_id(), ); - self.ctx.raft_metrics.message_dropped.mismatch_store_id += 1; + self.ctx + .raft_metrics + .message_dropped + .mismatch_store_id + .inc(); return false; } @@ -2525,7 +2528,11 @@ where "missing epoch in raft message, ignore it"; "region_id" => region_id, ); - self.ctx.raft_metrics.message_dropped.mismatch_region_epoch += 1; + self.ctx + .raft_metrics + .message_dropped + .mismatch_region_epoch + .inc(); return false; } @@ -2577,7 +2584,7 @@ where "peer_id" => self.fsm.peer_id(), "target_peer" => ?target, ); - self.ctx.raft_metrics.message_dropped.stale_msg += 1; + self.ctx.raft_metrics.message_dropped.stale_msg.inc(); true } cmp::Ordering::Greater => { @@ -2605,7 +2612,7 @@ where } } } - None => self.ctx.raft_metrics.message_dropped.applying_snap += 1, + None => self.ctx.raft_metrics.message_dropped.applying_snap.inc(), } true } @@ -2710,7 +2717,7 @@ where "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), ); - self.ctx.raft_metrics.message_dropped.stale_msg += 1; + self.ctx.raft_metrics.message_dropped.stale_msg.inc(); return; } // TODO: ask pd to guarantee we are stale now. @@ -2780,7 +2787,7 @@ where "snap" => ?snap_region, "to_peer" => ?msg.get_to_peer(), ); - self.ctx.raft_metrics.message_dropped.region_no_peer += 1; + self.ctx.raft_metrics.message_dropped.region_no_peer.inc(); return Ok(Either::Left(key)); } @@ -2792,7 +2799,7 @@ where "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), ); - self.ctx.raft_metrics.message_dropped.stale_msg += 1; + self.ctx.raft_metrics.message_dropped.stale_msg.inc(); return Ok(Either::Left(key)); } else { panic!( @@ -2826,7 +2833,7 @@ where "region" => ?region, "snap" => ?snap_region, ); - self.ctx.raft_metrics.message_dropped.region_overlap += 1; + self.ctx.raft_metrics.message_dropped.region_overlap.inc(); return Ok(Either::Left(key)); } } @@ -2889,7 +2896,7 @@ where } } if is_overlapped { - self.ctx.raft_metrics.message_dropped.region_overlap += 1; + self.ctx.raft_metrics.message_dropped.region_overlap.inc(); return Ok(Either::Left(key)); } @@ -4687,7 +4694,11 @@ where ) -> Result> { // Check store_id, make sure that the msg is dispatched to the right place. if let Err(e) = util::check_store_id(msg, self.store_id()) { - self.ctx.raft_metrics.invalid_proposal.mismatch_store_id += 1; + self.ctx + .raft_metrics + .invalid_proposal + .mismatch_store_id + .inc(); return Err(e); } if msg.has_status_request() { @@ -4730,7 +4741,7 @@ where && !allow_replica_read && !allow_stale_read { - self.ctx.raft_metrics.invalid_proposal.not_leader += 1; + self.ctx.raft_metrics.invalid_proposal.not_leader.inc(); let leader = self.fsm.peer.get_peer_from_cache(leader_id); self.fsm.reset_hibernate_state(GroupState::Chaos); self.register_raft_base_tick(); @@ -4738,7 +4749,11 @@ where } // peer_id must be the same as peer's. if let Err(e) = util::check_peer_id(msg, self.fsm.peer.peer_id()) { - self.ctx.raft_metrics.invalid_proposal.mismatch_peer_id += 1; + self.ctx + .raft_metrics + .invalid_proposal + .mismatch_peer_id + .inc(); return Err(e); } // check whether the peer is initialized. @@ -4746,13 +4761,18 @@ where self.ctx .raft_metrics .invalid_proposal - .region_not_initialized += 1; + .region_not_initialized + .inc(); return Err(Error::RegionNotInitialized(region_id)); } // If the peer is applying snapshot, it may drop some sending messages, that // could make clients wait for response until timeout. if self.fsm.peer.is_handling_snapshot() { - self.ctx.raft_metrics.invalid_proposal.is_applying_snapshot += 1; + self.ctx + .raft_metrics + .invalid_proposal + .is_applying_snapshot + .inc(); // TODO: replace to a more suitable error. return Err(Error::Other(box_err!( "{} peer is applying snapshot", @@ -4761,7 +4781,7 @@ where } // Check whether the term is stale. if let Err(e) = util::check_term(msg, self.fsm.peer.term()) { - self.ctx.raft_metrics.invalid_proposal.stale_command += 1; + self.ctx.raft_metrics.invalid_proposal.stale_command.inc(); return Err(e); } @@ -4773,7 +4793,7 @@ where // driver, the meta is updated. let requested_version = msg.get_header().get_region_epoch().version; self.collect_sibling_region(requested_version, &mut new_regions); - self.ctx.raft_metrics.invalid_proposal.epoch_not_match += 1; + self.ctx.raft_metrics.invalid_proposal.epoch_not_match.inc(); Err(Error::EpochNotMatch(m, new_regions)) } Err(e) => Err(e), @@ -5016,12 +5036,16 @@ where // [entries...][the entry at `compact_idx`][the last entry][new compaction entry] // |-------------------- entries will be left ----------------------| // ``` - self.ctx.raft_metrics.raft_log_gc_skipped.reserve_log += 1; + self.ctx.raft_metrics.raft_log_gc_skipped.reserve_log.inc(); return; } else if replicated_idx - first_idx < self.ctx.cfg.raft_log_gc_threshold && self.fsm.skip_gc_raft_log_ticks < self.ctx.cfg.raft_log_reserve_max_ticks { - self.ctx.raft_metrics.raft_log_gc_skipped.threshold_limit += 1; + self.ctx + .raft_metrics + .raft_log_gc_skipped + .threshold_limit + .inc(); // Logs will only be kept `max_ticks` * `raft_log_gc_tick_interval`. self.fsm.skip_gc_raft_log_ticks += 1; self.register_raft_gc_log_tick(); @@ -5037,7 +5061,8 @@ where self.ctx .raft_metrics .raft_log_gc_skipped - .compact_idx_too_small += 1; + .compact_idx_too_small + .inc(); return; } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 3c4e77ff4b9..d6faf92ca85 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -82,7 +82,7 @@ use crate::{ ApplyBatchSystem, ApplyNotifier, ApplyPollerBuilder, ApplyRes, ApplyRouter, ApplyTaskRes, }, - local_metrics::{RaftMetrics, RaftReadyMetrics}, + local_metrics::RaftMetrics, memory::*, metrics::*, peer_storage, @@ -598,7 +598,7 @@ where "msg_type" => ?msg_type, ); - self.raft_metrics.message_dropped.stale_msg += 1; + self.raft_metrics.message_dropped.stale_msg.inc(); let mut gc_msg = RaftMessage::default(); gc_msg.set_region_id(region_id); @@ -765,7 +765,6 @@ pub struct RaftPoller>, peer_msg_buf: Vec>, - previous_metrics: RaftReadyMetrics, timer: TiInstant, poll_ctx: PollContext, messages_per_tick: usize, @@ -773,12 +772,17 @@ pub struct RaftPoller RaftPoller { fn flush_events(&mut self) { self.flush_ticks(); - self.poll_ctx.raft_metrics.flush(); + self.poll_ctx.raft_metrics.maybe_flush(); self.poll_ctx.store_stat.flush(); MEMTRACE_PEERS.trace(mem::take(&mut self.trace_event)); @@ -800,7 +804,9 @@ impl PollHandler, St for<'a> F: FnOnce(&'a BatchSystemConfig), { fail_point!("begin_raft_poller"); - self.previous_metrics = self.poll_ctx.raft_metrics.ready.clone(); + self.previous_append = self.poll_ctx.raft_metrics.ready.append.get(); + self.previous_message = self.poll_ctx.raft_metrics.ready.message.get(); + self.previous_snapshot = self.poll_ctx.raft_metrics.ready.snapshot.get(); self.poll_ctx.pending_count = 0; self.poll_ctx.ready_count = 0; self.poll_ctx.has_ready = false; @@ -1010,17 +1016,20 @@ impl PollHandler, St .raft_metrics .ready .append - .saturating_sub(self.previous_metrics.append), + .get() + .saturating_sub(self.previous_append), self.poll_ctx .raft_metrics .ready .message - .saturating_sub(self.previous_metrics.message), + .get() + .saturating_sub(self.previous_message), self.poll_ctx .raft_metrics .ready .snapshot - .saturating_sub(self.previous_metrics.snapshot), + .get() + .saturating_sub(self.previous_snapshot), ); } @@ -1319,7 +1328,6 @@ where tag: tag.clone(), store_msg_buf: Vec::with_capacity(ctx.cfg.messages_per_tick), peer_msg_buf: Vec::with_capacity(ctx.cfg.messages_per_tick), - previous_metrics: ctx.raft_metrics.ready.clone(), timer: TiInstant::now(), messages_per_tick: ctx.cfg.messages_per_tick, poll_ctx: ctx, @@ -1327,6 +1335,9 @@ where trace_event: TraceEvent::default(), last_flush_time: TiInstant::now(), need_flush_events: false, + previous_append: 0, + previous_message: 0, + previous_snapshot: 0, } } } @@ -1757,7 +1768,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER if local_state.get_state() != PeerState::Tombstone { // Maybe split, but not registered yet. if !util::is_first_message(msg.get_message()) { - self.ctx.raft_metrics.message_dropped.region_nonexistent += 1; + self.ctx + .raft_metrics + .message_dropped + .region_nonexistent + .inc(); return Err(box_err!( "[region {}] region not exist but not tombstone: {:?}", region_id, @@ -1810,7 +1825,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } // The region in this peer is already destroyed if util::is_epoch_stale(from_epoch, region_epoch) { - self.ctx.raft_metrics.message_dropped.region_tombstone_peer += 1; + self.ctx + .raft_metrics + .message_dropped + .region_tombstone_peer + .inc(); info!( "tombstone peer receives a stale message"; "region_id" => region_id, @@ -1859,7 +1878,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER util::find_peer(region, self.ctx.store_id()).map(|r| r.get_id()) { if to_peer_id <= local_peer_id { - self.ctx.raft_metrics.message_dropped.region_tombstone_peer += 1; + self.ctx + .raft_metrics + .message_dropped + .region_tombstone_peer + .inc(); info!( "tombstone peer receives a stale message, local_peer_id >= to_peer_id in msg"; "region_id" => region_id, @@ -1907,7 +1930,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER "to_store_id" => msg.get_to_peer().get_store_id(), "region_id" => region_id, ); - self.ctx.raft_metrics.message_dropped.mismatch_store_id += 1; + self.ctx + .raft_metrics + .message_dropped + .mismatch_store_id + .inc(); return Ok(()); } @@ -1916,7 +1943,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER "missing epoch in raft message, ignore it"; "region_id" => region_id, ); - self.ctx.raft_metrics.message_dropped.mismatch_region_epoch += 1; + self.ctx + .raft_metrics + .message_dropped + .mismatch_region_epoch + .inc(); return Ok(()); } if msg.get_is_tombstone() || msg.has_merge_target() { @@ -1986,7 +2017,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER "region_id" => region_id, "msg_type" => ?msg_type, ); - self.ctx.raft_metrics.message_dropped.stale_msg += 1; + self.ctx.raft_metrics.message_dropped.stale_msg.inc(); return Ok(false); } @@ -2129,7 +2160,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } if is_overlapped { - self.ctx.raft_metrics.message_dropped.region_overlap += 1; + self.ctx.raft_metrics.message_dropped.region_overlap.inc(); return Ok(false); } diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 923fb8ffc26..aa33ae49fea 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -6,112 +6,51 @@ use std::sync::{Arc, Mutex}; use collections::HashSet; use prometheus::local::LocalHistogram; use raft::eraftpb::MessageType; +use tikv_util::time::{Duration, Instant}; use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS}; use super::metrics::*; -/// The buffered metrics counters for raft ready handling. -#[derive(Debug, Default, Clone)] -pub struct RaftReadyMetrics { - pub message: u64, - pub commit: u64, - pub append: u64, - pub snapshot: u64, - pub pending_region: u64, - pub has_ready_region: u64, -} +const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s -impl RaftReadyMetrics { - /// Flushes all metrics - fn flush(&mut self) { - // reset all buffered metrics once they have been added - if self.message > 0 { - STORE_RAFT_READY_COUNTER.message.inc_by(self.message); - self.message = 0; - } - if self.commit > 0 { - STORE_RAFT_READY_COUNTER.commit.inc_by(self.commit); - self.commit = 0; - } - if self.append > 0 { - STORE_RAFT_READY_COUNTER.append.inc_by(self.append); - self.append = 0; - } - if self.snapshot > 0 { - STORE_RAFT_READY_COUNTER.snapshot.inc_by(self.snapshot); - self.snapshot = 0; - } - if self.pending_region > 0 { - STORE_RAFT_READY_COUNTER - .pending_region - .inc_by(self.pending_region); - self.pending_region = 0; - } - if self.has_ready_region > 0 { - STORE_RAFT_READY_COUNTER - .has_ready_region - .inc_by(self.has_ready_region); - self.has_ready_region = 0; - } - } -} - -pub type SendStatus = [u64; 2]; - -macro_rules! flush_send_status { - ($metrics:ident, $self:ident) => {{ - if $self.$metrics[0] > 0 { - STORE_RAFT_SENT_MESSAGE_COUNTER - .$metrics - .drop - .inc_by($self.$metrics[0]); - $self.$metrics[0] = 0; - } - if $self.$metrics[1] > 0 { - STORE_RAFT_SENT_MESSAGE_COUNTER - .$metrics - .accept - .inc_by($self.$metrics[1]); - $self.$metrics[1] = 0; +macro_rules! set_send_status { + ($metrics:expr, $success:ident) => {{ + if $success { + $metrics.accept.inc(); + } else { + $metrics.drop.inc(); } }}; } -/// The buffered metrics counters for raft message. -#[derive(Debug, Default, Clone)] -pub struct RaftSendMessageMetrics { - pub append: SendStatus, - pub append_resp: SendStatus, - pub prevote: SendStatus, - pub prevote_resp: SendStatus, - pub vote: SendStatus, - pub vote_resp: SendStatus, - pub snapshot: SendStatus, - pub heartbeat: SendStatus, - pub heartbeat_resp: SendStatus, - pub transfer_leader: SendStatus, - pub timeout_now: SendStatus, - pub read_index: SendStatus, - pub read_index_resp: SendStatus, +pub struct RaftSendMessageMetrics(RaftSentMessageCounterVec); + +impl Default for RaftSendMessageMetrics { + fn default() -> Self { + Self(RaftSentMessageCounterVec::from( + &STORE_RAFT_SENT_MESSAGE_COUNTER_VEC, + )) + } } impl RaftSendMessageMetrics { pub fn add(&mut self, msg_type: MessageType, success: bool) { - let i = success as usize; match msg_type { - MessageType::MsgAppend => self.append[i] += 1, - MessageType::MsgAppendResponse => self.append_resp[i] += 1, - MessageType::MsgRequestPreVote => self.prevote[i] += 1, - MessageType::MsgRequestPreVoteResponse => self.prevote_resp[i] += 1, - MessageType::MsgRequestVote => self.vote[i] += 1, - MessageType::MsgRequestVoteResponse => self.vote_resp[i] += 1, - MessageType::MsgSnapshot => self.snapshot[i] += 1, - MessageType::MsgHeartbeat => self.heartbeat[i] += 1, - MessageType::MsgHeartbeatResponse => self.heartbeat_resp[i] += 1, - MessageType::MsgTransferLeader => self.transfer_leader[i] += 1, - MessageType::MsgReadIndex => self.read_index[i] += 1, - MessageType::MsgReadIndexResp => self.read_index_resp[i] += 1, - MessageType::MsgTimeoutNow => self.timeout_now[i] += 1, + MessageType::MsgAppend => set_send_status!(self.0.append, success), + MessageType::MsgAppendResponse => set_send_status!(self.0.append_resp, success), + MessageType::MsgRequestPreVote => set_send_status!(self.0.prevote, success), + MessageType::MsgRequestPreVoteResponse => { + set_send_status!(self.0.prevote_resp, success) + } + MessageType::MsgRequestVote => set_send_status!(self.0.vote, success), + MessageType::MsgRequestVoteResponse => set_send_status!(self.0.vote_resp, success), + MessageType::MsgSnapshot => set_send_status!(self.0.snapshot, success), + MessageType::MsgHeartbeat => set_send_status!(self.0.heartbeat, success), + MessageType::MsgHeartbeatResponse => set_send_status!(self.0.heartbeat_resp, success), + MessageType::MsgTransferLeader => set_send_status!(self.0.transfer_leader, success), + MessageType::MsgReadIndex => set_send_status!(self.0.read_index, success), + MessageType::MsgReadIndexResp => set_send_status!(self.0.read_index_resp, success), + MessageType::MsgTimeoutNow => set_send_status!(self.0.timeout_now, success), // We do not care about these message types for metrics. // Explicitly declare them so when we add new message types we are forced to // decide. @@ -123,293 +62,30 @@ impl RaftSendMessageMetrics { | MessageType::MsgCheckQuorum => {} } } - /// Flushes all metrics - pub fn flush(&mut self) { - // reset all buffered metrics once they have been added - flush_send_status!(append, self); - flush_send_status!(append_resp, self); - flush_send_status!(prevote, self); - flush_send_status!(prevote_resp, self); - flush_send_status!(vote, self); - flush_send_status!(vote_resp, self); - flush_send_status!(snapshot, self); - flush_send_status!(heartbeat, self); - flush_send_status!(heartbeat_resp, self); - flush_send_status!(transfer_leader, self); - flush_send_status!(timeout_now, self); - flush_send_status!(read_index, self); - flush_send_status!(read_index_resp, self); - } -} - -#[derive(Debug, Default, Clone)] -pub struct RaftMessageDropMetrics { - pub mismatch_store_id: u64, - pub mismatch_region_epoch: u64, - pub stale_msg: u64, - pub region_overlap: u64, - pub region_no_peer: u64, - pub region_tombstone_peer: u64, - pub region_nonexistent: u64, - pub applying_snap: u64, - pub disk_full: u64, -} - -impl RaftMessageDropMetrics { - fn flush(&mut self) { - if self.mismatch_store_id > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .mismatch_store_id - .inc_by(self.mismatch_store_id); - self.mismatch_store_id = 0; - } - if self.mismatch_region_epoch > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .mismatch_region_epoch - .inc_by(self.mismatch_region_epoch); - self.mismatch_region_epoch = 0; - } - if self.stale_msg > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .stale_msg - .inc_by(self.stale_msg); - self.stale_msg = 0; - } - if self.region_overlap > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .region_overlap - .inc_by(self.region_overlap); - self.region_overlap = 0; - } - if self.region_no_peer > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .region_no_peer - .inc_by(self.region_no_peer); - self.region_no_peer = 0; - } - if self.region_tombstone_peer > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .region_tombstone_peer - .inc_by(self.region_tombstone_peer); - self.region_tombstone_peer = 0; - } - if self.region_nonexistent > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .region_nonexistent - .inc_by(self.region_nonexistent); - self.region_nonexistent = 0; - } - if self.applying_snap > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .applying_snap - .inc_by(self.applying_snap); - self.applying_snap = 0; - } - if self.disk_full > 0 { - STORE_RAFT_DROPPED_MESSAGE_COUNTER - .disk_full - .inc_by(self.disk_full); - self.disk_full = 0; - } - } -} - -/// The buffered metrics counters for raft propose. -#[derive(Clone)] -pub struct RaftProposeMetrics { - pub all: u64, - pub local_read: u64, - pub read_index: u64, - pub unsafe_read_index: u64, - pub dropped_read_index: u64, - pub normal: u64, - pub batch: usize, - pub transfer_leader: u64, - pub conf_change: u64, - pub request_wait_time: LocalHistogram, -} - -impl Default for RaftProposeMetrics { - fn default() -> RaftProposeMetrics { - RaftProposeMetrics { - all: 0, - local_read: 0, - read_index: 0, - unsafe_read_index: 0, - normal: 0, - transfer_leader: 0, - conf_change: 0, - batch: 0, - dropped_read_index: 0, - request_wait_time: REQUEST_WAIT_TIME_HISTOGRAM.local(), - } - } -} - -impl RaftProposeMetrics { - /// Flushes all metrics - fn flush(&mut self) { - // reset all buffered metrics once they have been added - if self.all > 0 { - PEER_PROPOSAL_COUNTER.all.inc_by(self.all); - self.all = 0; - } - if self.local_read > 0 { - PEER_PROPOSAL_COUNTER.local_read.inc_by(self.local_read); - self.local_read = 0; - } - if self.read_index > 0 { - PEER_PROPOSAL_COUNTER.read_index.inc_by(self.read_index); - self.read_index = 0; - } - if self.unsafe_read_index > 0 { - PEER_PROPOSAL_COUNTER - .unsafe_read_index - .inc_by(self.unsafe_read_index); - self.unsafe_read_index = 0; - } - if self.dropped_read_index > 0 { - PEER_PROPOSAL_COUNTER - .dropped_read_index - .inc_by(self.dropped_read_index); - self.dropped_read_index = 0; - } - if self.normal > 0 { - PEER_PROPOSAL_COUNTER.normal.inc_by(self.normal); - self.normal = 0; - } - if self.transfer_leader > 0 { - PEER_PROPOSAL_COUNTER - .transfer_leader - .inc_by(self.transfer_leader); - self.transfer_leader = 0; - } - if self.conf_change > 0 { - PEER_PROPOSAL_COUNTER.conf_change.inc_by(self.conf_change); - self.conf_change = 0; - } - if self.batch > 0 { - PEER_PROPOSAL_COUNTER.batch.inc_by(self.batch as u64); - self.batch = 0; - } - self.request_wait_time.flush(); - } -} -/// The buffered metrics counter for invalid propose -#[derive(Clone, Default)] -pub struct RaftInvalidProposeMetrics { - pub mismatch_store_id: u64, - pub region_not_found: u64, - pub not_leader: u64, - pub mismatch_peer_id: u64, - pub stale_command: u64, - pub epoch_not_match: u64, - pub read_index_no_leader: u64, - pub region_not_initialized: u64, - pub is_applying_snapshot: u64, -} - -impl RaftInvalidProposeMetrics { - fn flush(&mut self) { - if self.mismatch_store_id > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .mismatch_store_id - .inc_by(self.mismatch_store_id); - self.mismatch_store_id = 0; - } - if self.region_not_found > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .region_not_found - .inc_by(self.region_not_found); - self.region_not_found = 0; - } - if self.not_leader > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .not_leader - .inc_by(self.not_leader); - self.not_leader = 0; - } - if self.mismatch_peer_id > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .mismatch_peer_id - .inc_by(self.mismatch_peer_id); - self.mismatch_peer_id = 0; - } - if self.stale_command > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .stale_command - .inc_by(self.stale_command); - self.stale_command = 0; - } - if self.epoch_not_match > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .epoch_not_match - .inc_by(self.epoch_not_match); - self.epoch_not_match = 0; - } - if self.read_index_no_leader > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .read_index_no_leader - .inc_by(self.read_index_no_leader); - self.read_index_no_leader = 0; - } - if self.region_not_initialized > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .region_not_initialized - .inc_by(self.region_not_initialized); - self.region_not_initialized = 0; - } - if self.is_applying_snapshot > 0 { - RAFT_INVALID_PROPOSAL_COUNTER - .is_applying_snapshot - .inc_by(self.is_applying_snapshot); - self.is_applying_snapshot = 0; - } - } -} - -#[derive(Clone, Default)] -pub struct RaftLogGcSkippedMetrics { - pub reserve_log: u64, - pub threshold_limit: u64, - pub compact_idx_too_small: u64, -} - -impl RaftLogGcSkippedMetrics { - fn flush(&mut self) { - if self.reserve_log > 0 { - RAFT_LOG_GC_SKIPPED.reserve_log.inc_by(self.reserve_log); - self.reserve_log = 0; - } - if self.threshold_limit > 0 { - RAFT_LOG_GC_SKIPPED - .threshold_limit - .inc_by(self.threshold_limit); - self.threshold_limit = 0; - } - if self.compact_idx_too_small > 0 { - RAFT_LOG_GC_SKIPPED - .compact_idx_too_small - .inc_by(self.compact_idx_too_small); - self.compact_idx_too_small = 0; - } + pub fn flush(&mut self) { + self.0.flush(); } } /// The buffered metrics counters for raft. -#[derive(Clone)] pub struct RaftMetrics { - pub store_time: LocalHistogram, - pub ready: RaftReadyMetrics, + // local counter + pub ready: RaftReadyCounterVec, pub send_message: RaftSendMessageMetrics, - pub message_dropped: RaftMessageDropMetrics, - pub propose: RaftProposeMetrics, + pub message_dropped: RaftDroppedMessageCounterVec, + pub propose: RaftProposalCounterVec, + pub invalid_proposal: RaftInvalidProposalCounterVec, + pub raft_log_gc_skipped: RaftLogGcSkippedCounterVec, + + // local histogram + pub store_time: LocalHistogram, + pub propose_wait_time: LocalHistogram, pub process_ready: LocalHistogram, pub commit_log: LocalHistogram, - pub leader_missing: Arc>>, - pub invalid_proposal: RaftInvalidProposeMetrics, pub write_block_wait: LocalHistogram, + + // waterfall metrics pub waterfall_metrics: bool, pub wf_batch_wait: LocalHistogram, pub wf_send_to_queue: LocalHistogram, @@ -417,23 +93,31 @@ pub struct RaftMetrics { pub wf_persist_log: LocalHistogram, pub wf_commit_log: LocalHistogram, pub wf_commit_not_persist_log: LocalHistogram, - pub raft_log_gc_skipped: RaftLogGcSkippedMetrics, + + pub leader_missing: Arc>>, + + last_flush_time: Instant, } impl RaftMetrics { pub fn new(waterfall_metrics: bool) -> Self { Self { + ready: RaftReadyCounterVec::from(&STORE_RAFT_READY_COUNTER_VEC), + send_message: RaftSendMessageMetrics::default(), + message_dropped: RaftDroppedMessageCounterVec::from( + &STORE_RAFT_DROPPED_MESSAGE_COUNTER_VEC, + ), + propose: RaftProposalCounterVec::from(&PEER_PROPOSAL_COUNTER_VEC), + invalid_proposal: RaftInvalidProposalCounterVec::from( + &RAFT_INVALID_PROPOSAL_COUNTER_VEC, + ), + raft_log_gc_skipped: RaftLogGcSkippedCounterVec::from(&RAFT_LOG_GC_SKIPPED_VEC), store_time: STORE_TIME_HISTOGRAM.local(), - ready: Default::default(), - send_message: Default::default(), - message_dropped: Default::default(), - propose: Default::default(), + propose_wait_time: REQUEST_WAIT_TIME_HISTOGRAM.local(), process_ready: PEER_RAFT_PROCESS_DURATION .with_label_values(&["ready"]) .local(), commit_log: PEER_COMMIT_LOG_HISTOGRAM.local(), - leader_missing: Arc::default(), - invalid_proposal: Default::default(), write_block_wait: STORE_WRITE_MSG_BLOCK_WAIT_DURATION_HISTOGRAM.local(), waterfall_metrics, wf_batch_wait: STORE_WF_BATCH_WAIT_DURATION_HISTOGRAM.local(), @@ -442,22 +126,32 @@ impl RaftMetrics { wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(), wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), - raft_log_gc_skipped: RaftLogGcSkippedMetrics::default(), + leader_missing: Arc::default(), + last_flush_time: Instant::now_coarse(), } } - /// Flushs all metrics - pub fn flush(&mut self) { - self.store_time.flush(); + /// Flushes all metrics + pub fn maybe_flush(&mut self) { + if self.last_flush_time.saturating_elapsed() < Duration::from_millis(METRICS_FLUSH_INTERVAL) + { + return; + } + self.last_flush_time = Instant::now_coarse(); + self.ready.flush(); self.send_message.flush(); + self.message_dropped.flush(); self.propose.flush(); + self.invalid_proposal.flush(); + self.raft_log_gc_skipped.flush(); + + self.store_time.flush(); + self.propose_wait_time.flush(); self.process_ready.flush(); self.commit_log.flush(); - self.message_dropped.flush(); - self.invalid_proposal.flush(); self.write_block_wait.flush(); - self.raft_log_gc_skipped.flush(); + if self.waterfall_metrics { self.wf_batch_wait.flush(); self.wf_send_to_queue.flush(); @@ -466,6 +160,7 @@ impl RaftMetrics { self.wf_commit_log.flush(); self.wf_commit_not_persist_log.flush(); } + let mut missing = self.leader_missing.lock().unwrap(); LEADER_MISSING.set(missing.len() as i64); missing.clear(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 587b9ad3af7..ad4ee7e7f98 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -15,17 +15,6 @@ make_auto_flush_static_metric! { write_thread_wait, db_mutex_lock_nanos, } - pub label_enum ProposalType { - all, - local_read, - read_index, - unsafe_read_index, - normal, - transfer_leader, - conf_change, - batch, - dropped_read_index, - } pub label_enum WriteCmdType { put, @@ -53,43 +42,6 @@ make_auto_flush_static_metric! { success, } - pub label_enum RaftReadyType { - message, - commit, - append, - snapshot, - pending_region, - has_ready_region, - } - - pub label_enum MessageCounterType { - append, - append_resp, - prevote, - prevote_resp, - vote, - vote_resp, - snapshot, - heartbeat, - heartbeat_resp, - transfer_leader, - timeout_now, - read_index, - read_index_resp, - } - - pub label_enum RaftDroppedMessage { - mismatch_store_id, - mismatch_region_epoch, - stale_msg, - region_overlap, - region_no_peer, - region_tombstone_peer, - region_nonexistent, - applying_snap, - disk_full, - } - pub label_enum SnapValidationType { stale, decode, @@ -126,17 +78,7 @@ make_auto_flush_static_metric! { fetch_unused, } - pub label_enum RaftInvalidProposal { - mismatch_store_id, - region_not_found, - not_leader, - mismatch_peer_id, - stale_command, - epoch_not_match, - read_index_no_leader, - region_not_initialized, - is_applying_snapshot, - } + pub label_enum RaftEventDurationType { compact_check, pd_store_heartbeat, @@ -154,23 +96,10 @@ make_auto_flush_static_metric! { skip_partition, } - pub label_enum SendStatus { - accept, - drop, - } - - pub label_enum RaftLogGcSkippedReason { - reserve_log, - compact_idx_too_small, - threshold_limit, - } - pub struct RaftEventDuration : LocalHistogram { "type" => RaftEventDurationType } - pub struct RaftInvalidProposalCount : LocalIntCounter { - "type" => RaftInvalidProposal - } + pub struct RaftEntryFetches : LocalIntCounter { "type" => RaftEntryType } @@ -184,9 +113,6 @@ make_auto_flush_static_metric! { "type" => RegionHashType, "result" => RegionHashResult, } - pub struct ProposalVec: LocalIntCounter { - "type" => ProposalType, - } pub struct AdminCmdVec : LocalIntCounter { "type" => AdminCmdType, @@ -197,19 +123,6 @@ make_auto_flush_static_metric! { "type" => WriteCmdType, } - pub struct RaftReadyVec : LocalIntCounter { - "type" => RaftReadyType, - } - - pub struct MessageCounterVec : LocalIntCounter { - "type" => MessageCounterType, - "status" => SendStatus, - } - - pub struct RaftDropedVec : LocalIntCounter { - "type" => RaftDroppedMessage, - } - pub struct SnapValidVec : LocalIntCounter { "type" => SnapValidationType } @@ -221,18 +134,79 @@ make_auto_flush_static_metric! { "cf" => CfNames, "type" => CompactionGuardAction, } - - pub struct RaftLogGcSkippedVec: LocalIntCounter { - "reason" => RaftLogGcSkippedReason, - } } make_static_metric! { - pub struct HibernatedPeerStateGauge: IntGauge { - "state" => { - awaken, - hibernated, - }, + pub label_enum RaftReadyType { + message, + commit, + append, + snapshot, + pending_region, + has_ready_region, + } + + pub label_enum RaftSentMessageCounterType { + append, + append_resp, + prevote, + prevote_resp, + vote, + vote_resp, + snapshot, + heartbeat, + heartbeat_resp, + transfer_leader, + timeout_now, + read_index, + read_index_resp, + } + + pub label_enum SendStatus { + accept, + drop, + } + + pub label_enum RaftDroppedMessage { + mismatch_store_id, + mismatch_region_epoch, + stale_msg, + region_overlap, + region_no_peer, + region_tombstone_peer, + region_nonexistent, + applying_snap, + disk_full, + } + + pub label_enum ProposalType { + all, + local_read, + read_index, + unsafe_read_index, + normal, + transfer_leader, + conf_change, + batch, + dropped_read_index, + } + + pub label_enum RaftInvalidProposal { + mismatch_store_id, + region_not_found, + not_leader, + mismatch_peer_id, + stale_command, + epoch_not_match, + read_index_no_leader, + region_not_initialized, + is_applying_snapshot, + } + + pub label_enum RaftLogGcSkippedReason { + reserve_log, + compact_idx_too_small, + threshold_limit, } pub label_enum LoadBaseSplitEventType { @@ -262,9 +236,42 @@ make_static_metric! { unable_to_split_cpu_top, } + pub struct HibernatedPeerStateGauge: IntGauge { + "state" => { + awaken, + hibernated, + }, + } + + pub struct RaftReadyCounterVec : LocalIntCounter { + "type" => RaftReadyType, + } + + pub struct RaftSentMessageCounterVec : LocalIntCounter { + "type" => RaftSentMessageCounterType, + "status" => SendStatus, + } + + pub struct RaftDroppedMessageCounterVec : LocalIntCounter { + "type" => RaftDroppedMessage, + } + + pub struct RaftProposalCounterVec: LocalIntCounter { + "type" => ProposalType, + } + + pub struct RaftInvalidProposalCounterVec : LocalIntCounter { + "type" => RaftInvalidProposal + } + + pub struct RaftLogGcSkippedCounterVec: LocalIntCounter { + "reason" => RaftLogGcSkippedReason, + } + pub struct LoadBaseSplitEventCounterVec: IntCounter { "type" => LoadBaseSplitEventType, } + } lazy_static! { @@ -404,8 +411,6 @@ lazy_static! { "Total number of proposal made.", &["type"] ).unwrap(); - pub static ref PEER_PROPOSAL_COUNTER: ProposalVec = - auto_flush_from!(PEER_PROPOSAL_COUNTER_VEC, ProposalVec); pub static ref PEER_ADMIN_CMD_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( @@ -452,8 +457,6 @@ lazy_static! { "Total number of raft ready handled.", &["type"] ).unwrap(); - pub static ref STORE_RAFT_READY_COUNTER: RaftReadyVec = - auto_flush_from!(STORE_RAFT_READY_COUNTER_VEC, RaftReadyVec); pub static ref STORE_RAFT_SENT_MESSAGE_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( @@ -461,8 +464,6 @@ lazy_static! { "Total number of raft ready sent messages.", &["type", "status"] ).unwrap(); - pub static ref STORE_RAFT_SENT_MESSAGE_COUNTER: MessageCounterVec = - auto_flush_from!(STORE_RAFT_SENT_MESSAGE_COUNTER_VEC, MessageCounterVec); pub static ref STORE_RAFT_DROPPED_MESSAGE_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( @@ -470,8 +471,6 @@ lazy_static! { "Total number of raft dropped messages.", &["type"] ).unwrap(); - pub static ref STORE_RAFT_DROPPED_MESSAGE_COUNTER: RaftDropedVec = - auto_flush_from!(STORE_RAFT_DROPPED_MESSAGE_COUNTER_VEC, RaftDropedVec); pub static ref STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( @@ -632,8 +631,6 @@ lazy_static! { "Total number of raft invalid proposal.", &["type"] ).unwrap(); - pub static ref RAFT_INVALID_PROPOSAL_COUNTER: RaftInvalidProposalCount = - auto_flush_from!(RAFT_INVALID_PROPOSAL_COUNTER_VEC, RaftInvalidProposalCount); pub static ref RAFT_EVENT_DURATION_VEC: HistogramVec = register_histogram_vec!( @@ -714,11 +711,10 @@ lazy_static! { exponential_buckets(8.0, 2.0, 24).unwrap() ).unwrap(); - pub static ref RAFT_ENTRIES_CACHES_GAUGE: IntGauge = register_int_gauge!( "tikv_raft_entries_caches", "Total memory size of raft entries caches." - ).unwrap(); + ).unwrap(); pub static ref RAFT_ENTRIES_EVICT_BYTES: IntCounter = register_int_counter!( "tikv_raft_entries_evict_bytes", @@ -775,12 +771,10 @@ lazy_static! { &["reason"] ) .unwrap(); - pub static ref RAFT_LOG_GC_SKIPPED: RaftLogGcSkippedVec = - auto_flush_from!(RAFT_LOG_GC_SKIPPED_VEC, RaftLogGcSkippedVec); pub static ref RAFT_APPLYING_SST_GAUGE: IntGaugeVec = register_int_gauge_vec!( "tikv_raft_applying_sst", "Sum of applying sst.", &["type"] - ).unwrap(); + ).unwrap(); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index b109d107c4f..89ed6eeef7d 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -70,7 +70,7 @@ use uuid::Uuid; use super::{ cmd_resp, - local_metrics::{RaftMetrics, RaftReadyMetrics, TimeTracker}, + local_metrics::{RaftMetrics, TimeTracker}, metrics::*, peer_storage::{write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage}, read_queue::{ReadIndexQueue, ReadIndexRequest}, @@ -1569,19 +1569,28 @@ where self.raft_group.snap() } - fn add_ready_metric(&self, ready: &Ready, metrics: &mut RaftReadyMetrics) { - metrics.message += ready.messages().len() as u64; - metrics.commit += ready.committed_entries().len() as u64; - metrics.append += ready.entries().len() as u64; + fn add_ready_metric(&self, ready: &Ready, metrics: &mut RaftMetrics) { + metrics.ready.message.inc_by(ready.messages().len() as u64); + metrics + .ready + .commit + .inc_by(ready.committed_entries().len() as u64); + metrics.ready.append.inc_by(ready.entries().len() as u64); if !ready.snapshot().is_empty() { - metrics.snapshot += 1; + metrics.ready.snapshot.inc(); } } - fn add_light_ready_metric(&self, light_ready: &LightReady, metrics: &mut RaftReadyMetrics) { - metrics.message += light_ready.messages().len() as u64; - metrics.commit += light_ready.committed_entries().len() as u64; + fn add_light_ready_metric(&self, light_ready: &LightReady, metrics: &mut RaftMetrics) { + metrics + .ready + .message + .inc_by(light_ready.messages().len() as u64); + metrics + .ready + .commit + .inc_by(light_ready.committed_entries().len() as u64); } #[inline] @@ -2490,7 +2499,7 @@ where let mut ready = self.raft_group.ready(); - self.add_ready_metric(&ready, &mut ctx.raft_metrics.ready); + self.add_ready_metric(&ready, &mut ctx.raft_metrics); // Update it after unstable entries pagination is introduced. debug_assert!(ready.entries().last().map_or_else( @@ -2642,7 +2651,7 @@ where // needs to be persisted. let mut light_rd = self.raft_group.advance_append(ready); - self.add_light_ready_metric(&light_rd, &mut ctx.raft_metrics.ready); + self.add_light_ready_metric(&light_rd, &mut ctx.raft_metrics); if let Some(idx) = light_rd.commit_index() { panic!( @@ -3012,7 +3021,7 @@ where } self.mut_store().update_cache_persisted(persist_index); - self.add_light_ready_metric(&light_rd, &mut ctx.raft_metrics.ready); + self.add_light_ready_metric(&light_rd, &mut ctx.raft_metrics); if let Some(commit_index) = light_rd.commit_index() { let pre_commit_index = self.get_store().commit_index(); @@ -3397,7 +3406,7 @@ where return false; } - ctx.raft_metrics.propose.all += 1; + ctx.raft_metrics.propose.all.inc(); let req_admin_cmd_type = if !req.has_admin_request() { None @@ -3730,7 +3739,7 @@ where req: RaftCmdRequest, cb: Callback, ) { - ctx.raft_metrics.propose.local_read += 1; + ctx.raft_metrics.propose.local_read.inc(); cb.invoke_read(self.handle_read(ctx, req, false, Some(self.get_store().commit_index()))) } @@ -3821,7 +3830,7 @@ where "peer_id" => self.peer.get_id(), "err" => ?e, ); - poll_ctx.raft_metrics.propose.unsafe_read_index += 1; + poll_ctx.raft_metrics.propose.unsafe_read_index.inc(); cmd_resp::bind_error(&mut err_resp, e); cb.report_error(err_resp); self.should_wake_up = true; @@ -3873,7 +3882,11 @@ where // which would cause a long time waiting for a read response. Then we // should return an error directly in this situation. if !self.is_leader() && self.leader_id() == INVALID_ID { - poll_ctx.raft_metrics.invalid_proposal.read_index_no_leader += 1; + poll_ctx + .raft_metrics + .invalid_proposal + .read_index_no_leader + .inc(); // The leader may be hibernated, send a message for trying to awaken the leader. if self.bcast_wake_up_time.is_none() || self @@ -3904,7 +3917,7 @@ where return false; } - poll_ctx.raft_metrics.propose.read_index += 1; + poll_ctx.raft_metrics.propose.read_index.inc(); self.bcast_wake_up_time = None; let request = req @@ -3916,7 +3929,7 @@ where if dropped && self.is_leader() { // The message gets dropped silently, can't be handled anymore. apply::notify_stale_req(self.term(), cb); - poll_ctx.raft_metrics.propose.dropped_read_index += 1; + poll_ctx.raft_metrics.propose.dropped_read_index.inc(); return false; } @@ -4264,7 +4277,7 @@ where return Err(Error::ProposalInMergingMode(self.region_id)); } - poll_ctx.raft_metrics.propose.normal += 1; + poll_ctx.raft_metrics.propose.normal.inc(); if self.has_applied_to_current_term() { // Only when applied index's term is equal to current leader's term, the @@ -4425,7 +4438,7 @@ where req: RaftCmdRequest, cb: Callback, ) -> bool { - ctx.raft_metrics.propose.transfer_leader += 1; + ctx.raft_metrics.propose.transfer_leader.inc(); let transfer_leader = get_transfer_leader_cmd(&req).unwrap(); let prs = self.raft_group.raft.prs(); @@ -4544,7 +4557,7 @@ where self.check_conf_change(ctx, changes.as_ref(), &cc)?; - ctx.raft_metrics.propose.conf_change += 1; + ctx.raft_metrics.propose.conf_change.inc(); // TODO: use local histogram metrics PEER_PROPOSE_LOG_SIZE_HISTOGRAM.observe(data_size as f64); info!( diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index fa27ea340b8..a0732043d1b 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -78,7 +78,7 @@ thread_local! { ); } -const METRICS_FLUSH_INTERVAL: u64 = 15_000; // 15s +const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s pub fn maybe_tls_local_read_metrics_flush() { TLS_LOCAL_READ_METRICS.with(|m| { From 5fe01e05a373b13f0bf41025df2c6da2da3835ee Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 19 Aug 2022 16:02:52 +0800 Subject: [PATCH 0161/1149] util: use local histogram to record schedule wait duration (#13285) closes tikv/tikv#13293 It is too expensive to record the schedule wait duration of thread pools with shared atomics. The better way is to use local metrics and flush them at intervals. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- components/tikv_util/src/yatp_pool/mod.rs | 18 ++++++++++++------ src/server/raft_client.rs | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index e2e57c9fbce..6e246d6cddf 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use fail::fail_point; pub use future_pool::{Full, FuturePool}; -use prometheus::Histogram; +use prometheus::{local::LocalHistogram, Histogram}; use yatp::{ pool::{CloneRunnerBuilder, Local, Runner}, queue::{multilevel, QueueType, TaskCell as _}, @@ -45,13 +45,15 @@ impl TickerWrapper { } } - pub fn try_tick(&mut self) { + // Returns whether tick has been triggered. + pub fn try_tick(&mut self) -> bool { let now = Instant::now_coarse(); if now.saturating_duration_since(self.last_tick_time) < tick_interval() { - return; + return false; } self.last_tick_time = now; self.ticker.on_tick(); + true } pub fn on_tick(&mut self) { @@ -93,7 +95,7 @@ pub struct YatpPoolRunner { before_pause: Option>, // Statistics about the schedule wait duration. - schedule_wait_duration: Histogram, + schedule_wait_duration: LocalHistogram, } impl Runner for YatpPoolRunner { @@ -118,7 +120,9 @@ impl Runner for YatpPoolRunner { .observe(schedule_time.elapsed().as_secs_f64()); } let finished = self.inner.handle(local, task_cell); - self.ticker.try_tick(); + if self.ticker.try_tick() { + self.schedule_wait_duration.flush(); + } finished } @@ -160,7 +164,7 @@ impl YatpPoolRunner { after_start, before_stop, before_pause, - schedule_wait_duration, + schedule_wait_duration: schedule_wait_duration.local(), } } } @@ -334,6 +338,8 @@ mod tests { for _ in 0..3 { rx.recv().unwrap(); } + // Drop the pool so the local metrics are flushed. + drop(pool); let histogram = metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); } diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index df1a18ab06d..bc0e8a59303 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -1012,6 +1012,7 @@ where self.last_hash.1 as usize }; + #[allow(unused_mut)] let mut transport_on_send_store_fp = || { fail_point!( "transport_on_send_snapshot", From 58fa80e0de0d43d473dc456081a4d2b08939e0aa Mon Sep 17 00:00:00 2001 From: cosven Date: Fri, 19 Aug 2022 17:52:52 +0800 Subject: [PATCH 0162/1149] storage: precheck whether the peer is leader when acquiring latches failed (#13254) close tikv/tikv#12966, ref tikv/tikv#12966 When a tikv is isolated from other tikv instances, some requests will be blocked in raftstore and the corresponding latches are not released. Following requests which require the latches will receive ServerIsBusy error and keep retrying. However, In such case, peers on the tikv are not leader anymore. The client is supposed to receive NotLeader error immediately. This commit introduces fail fast mode to scheduler. When a request fails to acquire any latch, scheduler checks if the peer is still leader. If it still the leader, schedule the request as usual, fail fast otherwise. Signed-off-by: cosven Co-authored-by: Ti Chi Robot --- components/backup/src/endpoint.rs | 11 +- .../src/coprocessor/region_info_accessor.rs | 67 +++++-- components/server/src/server.rs | 1 + components/test_raftstore/src/server.rs | 18 +- components/tikv_kv/src/lib.rs | 5 + components/tikv_kv/src/rocksdb_engine.rs | 25 ++- src/server/raftkv.rs | 15 +- src/storage/metrics.rs | 2 + src/storage/txn/scheduler.rs | 164 ++++++++++++++++-- tests/benches/misc/raftkv/mod.rs | 15 +- tests/integrations/storage/test_raftkv.rs | 49 ++++++ 11 files changed, 313 insertions(+), 59 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 35a08c81a2d..e0ea9e3ae28 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -849,8 +849,8 @@ impl Endpoint { self.pool.borrow_mut().spawn(async move { loop { - // when get the guard, release it until we finish scanning a batch, - // because if we were suspended during scanning, + // when get the guard, release it until we finish scanning a batch, + // because if we were suspended during scanning, // the region info have higher possibility to change (then we must compensate that by the fine-grained backup). let guard = limit.guard().await; if let Err(e) = guard { @@ -1174,11 +1174,12 @@ pub mod tests { use std::{ fs, path::{Path, PathBuf}, - sync::Mutex, + sync::{Mutex, RwLock}, time::Duration, }; use api_version::{api_v2::RAW_KEY_PREFIX, dispatch_api_version, KvFormat, RawValue}; + use collections::HashSet; use engine_traits::MiscExt; use external_storage_export::{make_local_backend, make_noop_backend}; use file_system::{IoOp, IoRateLimiter, IoType}; @@ -1213,7 +1214,9 @@ pub mod tests { impl MockRegionInfoProvider { pub fn new(encode_key: bool) -> Self { MockRegionInfoProvider { - regions: Arc::new(Mutex::new(RegionCollector::new())), + regions: Arc::new(Mutex::new(RegionCollector::new(Arc::new(RwLock::new( + HashSet::default(), + ))))), cancel: None, need_encode_key: encode_key, } diff --git a/components/raftstore/src/coprocessor/region_info_accessor.rs b/components/raftstore/src/coprocessor/region_info_accessor.rs index fb6defbc375..8f9021c8e60 100644 --- a/components/raftstore/src/coprocessor/region_info_accessor.rs +++ b/components/raftstore/src/coprocessor/region_info_accessor.rs @@ -6,11 +6,11 @@ use std::{ Bound::{Excluded, Unbounded}, }, fmt::{Display, Formatter, Result as FmtResult}, - sync::{mpsc, Mutex}, + sync::{mpsc, Arc, Mutex, RwLock}, time::Duration, }; -use collections::HashMap; +use collections::{HashMap, HashSet}; use engine_traits::KvEngine; use kvproto::metapb::Region; use raft::StateRole; @@ -219,11 +219,14 @@ pub struct RegionCollector { regions: RegionsMap, // BTreeMap: data_end_key -> region_id region_ranges: RegionRangesMap, + + region_leaders: Arc>>, } impl RegionCollector { - pub fn new() -> Self { + pub fn new(region_leaders: Arc>>) -> Self { Self { + region_leaders, regions: HashMap::default(), region_ranges: BTreeMap::default(), } @@ -337,11 +340,21 @@ impl RegionCollector { "region_id" => region.get_id(), ) } + self.region_leaders + .write() + .unwrap() + .remove(®ion.get_id()); } fn handle_role_change(&mut self, region: Region, new_role: StateRole) { let region_id = region.get_id(); + if new_role == StateRole::Leader { + self.region_leaders.write().unwrap().insert(region_id); + } else { + self.region_leaders.write().unwrap().remove(®ion_id); + } + if let Some(r) = self.regions.get_mut(®ion_id) { r.role = new_role; return; @@ -507,12 +520,6 @@ impl RegionCollector { } } -impl Default for RegionCollector { - fn default() -> Self { - Self::new() - } -} - impl Runnable for RegionCollector { type Task = RegionInfoQuery; @@ -585,6 +592,11 @@ pub struct RegionInfoAccessor { // https://github.com/tikv/tikv/issues/9044 worker: Worker, scheduler: Scheduler, + + /// Region leader ids set on the store. + /// + /// Others can access this info directly, such as RaftKV. + region_leaders: Arc>>, } impl RegionInfoAccessor { @@ -593,11 +605,24 @@ impl RegionInfoAccessor { /// once. If it's needed in different places, just clone it, and their /// contents are shared. pub fn new(host: &mut CoprocessorHost) -> Self { + let region_leaders = Arc::new(RwLock::new(HashSet::default())); let worker = WorkerBuilder::new("region-collector-worker").create(); - let scheduler = worker.start_with_timer("region-collector-worker", RegionCollector::new()); + let scheduler = worker.start_with_timer( + "region-collector-worker", + RegionCollector::new(region_leaders.clone()), + ); register_region_event_listener(host, scheduler.clone()); - Self { worker, scheduler } + Self { + worker, + scheduler, + region_leaders, + } + } + + /// Get a set of region leader ids. + pub fn region_leaders(&self) -> Arc>> { + self.region_leaders.clone() } /// Stops the `RegionInfoAccessor`. It should be stopped after raftstore. @@ -711,6 +736,10 @@ impl RegionInfoProvider for MockRegionInfoProvider { mod tests { use super::*; + fn new_region_collector() -> RegionCollector { + RegionCollector::new(Arc::new(RwLock::new(HashSet::default()))) + } + fn new_region(id: u64, start_key: &[u8], end_key: &[u8], version: u64) -> Region { let mut region = Region::default(); region.set_id(id); @@ -910,7 +939,7 @@ mod tests { #[test] fn test_ignore_invalid_version() { - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); c.handle_raftstore_event(RaftStoreEvent::CreateRegion { region: new_region(1, b"k1", b"k3", 0), @@ -939,7 +968,7 @@ mod tests { region_with_conf(6, b"k7", b"", 20, 10), ]; - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); must_load_regions(&mut c, regions); assert!(c.check_region_range(®ion_with_conf(1, b"", b"k1", 10, 10), false)); @@ -1002,7 +1031,7 @@ mod tests { new_region(6, b"k7", b"", 1), ]; - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); must_load_regions(&mut c, &init_regions); let mut regions: Vec<_> = init_regions .iter() @@ -1033,7 +1062,7 @@ mod tests { check_collection(&c, &[]); // Test that the region with the same id will be kept in the collection - c = RegionCollector::new(); + c = new_region_collector(); must_load_regions(&mut c, &init_regions); c.check_region_range(&new_region(3, b"k1", b"k7", 2), true); @@ -1052,7 +1081,7 @@ mod tests { #[test] fn test_basic_updating() { - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); let init_regions = &[ new_region(1, b"", b"k1", 1), new_region(2, b"k1", b"k9", 1), @@ -1120,7 +1149,7 @@ mod tests { /// correct, no matter what the events' order to happen is. /// Values in `seq` and of `derive_index` start from 1. fn test_split_impl(derive_index: usize, seq: &[usize]) { - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); let init_regions = &[ new_region(1, b"", b"k1", 1), new_region(2, b"k1", b"k9", 1), @@ -1173,7 +1202,7 @@ mod tests { } fn test_merge_impl(to_left: bool, update_first: bool) { - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); let init_regions = &[ region_with_conf(1, b"", b"k1", 1, 1), region_with_conf(2, b"k1", b"k2", 1, 100), @@ -1217,7 +1246,7 @@ mod tests { #[test] fn test_extreme_cases() { - let mut c = RegionCollector::new(); + let mut c = new_region_collector(); let init_regions = &[ new_region(1, b"", b"k1", 1), new_region(2, b"k1", b"k9", 1), diff --git a/components/server/src/server.rs b/components/server/src/server.rs index f61d981a912..35a06d1321f 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -574,6 +574,7 @@ where ), ), engines.kv.clone(), + self.region_info_accessor.region_leaders(), ); self.engines = Some(TikvEngines { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f69ef253e5b..683de2e5a7d 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -289,15 +289,19 @@ impl ServerCluster { StoreMetaDelegate::new(store_meta.clone(), engines.kv.clone()), router.clone(), ); - let raft_router = ServerRaftStoreRouter::new(router.clone(), local_reader); - let sim_router = SimulateTransport::new(raft_router.clone()); - - let raft_engine = RaftKv::new(sim_router.clone(), engines.kv.clone()); // Create coprocessor. let mut coprocessor_host = CoprocessorHost::new(router.clone(), cfg.coprocessor.clone()); let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); + let raft_router = ServerRaftStoreRouter::new(router.clone(), local_reader); + let sim_router = SimulateTransport::new(raft_router.clone()); + let raft_engine = RaftKv::new( + sim_router.clone(), + engines.kv.clone(), + region_info_accessor.region_leaders(), + ); + if let Some(hooks) = self.coprocessor_hooks.get(&node_id) { for hook in hooks { hook(&mut coprocessor_host); @@ -313,7 +317,11 @@ impl ServerCluster { raft_engine.clone(), )); - let mut engine = RaftKv::new(sim_router.clone(), engines.kv.clone()); + let mut engine = RaftKv::new( + sim_router.clone(), + engines.kv.clone(), + region_info_accessor.region_leaders(), + ); if let Some(scheduler) = self.txn_extra_schedulers.remove(&node_id) { engine.set_txn_extra_scheduler(scheduler); } diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index dea3c0dc745..466bd973906 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -272,6 +272,11 @@ pub trait Engine: Send + Clone + 'static { fn async_snapshot(&self, ctx: SnapContext<'_>, cb: Callback) -> Result<()>; + /// Precheck request which has write with it's context. + fn precheck_write_with_ctx(&self, _ctx: &Context) -> Result<()> { + Ok(()) + } + fn async_write(&self, ctx: &Context, batch: WriteData, write_cb: Callback<()>) -> Result<()>; /// Writes data to the engine asynchronously with some extensions. diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 44d5e698f5c..031b182b9fe 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -129,6 +129,15 @@ impl RocksEngine { self.not_leader.store(true, Ordering::SeqCst); } + fn not_leader_error(&self) -> Error { + let not_leader = { + let mut header = kvproto::errorpb::Error::default(); + header.mut_not_leader().set_region_id(100); + header + }; + Error::from(ErrorInner::Request(not_leader)) + } + pub fn pause(&self, dur: Duration) { self.sched.schedule(Task::Pause(dur)).unwrap(); } @@ -209,6 +218,13 @@ impl Engine for RocksEngine { write_modifies(&self.engines.kv, modifies) } + fn precheck_write_with_ctx(&self, _ctx: &Context) -> Result<()> { + if self.not_leader.load(Ordering::SeqCst) { + return Err(self.not_leader_error()); + } + Ok(()) + } + fn async_write(&self, ctx: &Context, batch: WriteData, cb: Callback<()>) -> Result<()> { self.async_write_ext(ctx, batch, cb, None, None) } @@ -243,16 +259,11 @@ impl Engine for RocksEngine { fail_point!("rockskv_async_snapshot", |_| Err(box_err!( "snapshot failed" ))); - let not_leader = { - let mut header = kvproto::errorpb::Error::default(); - header.mut_not_leader().set_region_id(100); - header - }; fail_point!("rockskv_async_snapshot_not_leader", |_| { - Err(Error::from(ErrorInner::Request(not_leader.clone()))) + Err(self.not_leader_error()) }); if self.not_leader.load(Ordering::SeqCst) { - return Err(Error::from(ErrorInner::Request(not_leader))); + return Err(self.not_leader_error()); } box_try!(self.sched.schedule(Task::Snapshot(cb))); Ok(()) diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index de72a642837..a314315985c 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -8,10 +8,11 @@ use std::{ mem, num::NonZeroU64, result, - sync::Arc, + sync::{Arc, RwLock}, time::Duration, }; +use collections::HashSet; use concurrency_manager::ConcurrencyManager; use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; use kvproto::{ @@ -158,6 +159,7 @@ where router: S, engine: E, txn_extra_scheduler: Option>, + region_leaders: Arc>>, } impl RaftKv @@ -166,11 +168,12 @@ where S: RaftStoreRouter + LocalReadRouter + 'static, { /// Create a RaftKv using specified configuration. - pub fn new(router: S, engine: E) -> RaftKv { + pub fn new(router: S, engine: E, region_leaders: Arc>>) -> RaftKv { RaftKv { router, engine, txn_extra_scheduler: None, + region_leaders, } } @@ -353,6 +356,14 @@ where write_modifies(&self.engine, modifies) } + fn precheck_write_with_ctx(&self, ctx: &Context) -> kv::Result<()> { + let region_id = ctx.get_region_id(); + match self.region_leaders.read().unwrap().get(®ion_id) { + Some(_) => Ok(()), + None => Err(RaftServerError::NotLeader(region_id, None).into()), + } + } + fn async_write( &self, ctx: &Context, diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 07f1143bcb0..e58f7862b37 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -159,6 +159,8 @@ make_auto_flush_static_metric! { new, snapshot, async_snapshot_err, + precheck_write_ok, + precheck_write_err, snapshot_ok, snapshot_err, read_finish, diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 66194cd08fa..2d9d3610432 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -48,7 +48,9 @@ use pd_client::{Feature, FeatureGate}; use raftstore::store::TxnExt; use resource_metering::{FutureExt, ResourceTagFactory}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData}; -use tikv_util::{quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE}; +use tikv_util::{ + deadline::Deadline, quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE, +}; use tracker::{get_tls_tracker_token, set_tls_tracker_token, TrackerToken}; use txn_types::TimeStamp; @@ -261,6 +263,17 @@ impl SchedulerInner { tctx } + /// Try to own the corresponding task context and take the callback. + /// + /// If the task is been processing, it should be owned. + /// If it has been finished, then it is not in the slot. + /// In both cases, cb should be None. Otherwise, cb should be some. + fn try_own_and_take_cb(&self, cid: u64) -> Option { + self.get_task_slot(cid) + .get_mut(&cid) + .and_then(|tctx| if tctx.try_own() { tctx.cb.take() } else { None }) + } + fn take_task_cb_and_pr(&self, cid: u64) -> (Option, Option) { self.get_task_slot(cid) .get_mut(&cid) @@ -431,7 +444,7 @@ impl Scheduler { self.inner .new_task_context(Task::new(cid, tracker, cmd), callback) }); - let deadline = tctx.task.as_ref().unwrap().cmd.deadline(); + if self.inner.latches.acquire(&mut tctx.lock, cid) { fail_point!("txn_scheduler_acquire_success"); tctx.on_schedule(); @@ -440,30 +453,66 @@ impl Scheduler { self.execute(task); return; } - // Check deadline in background. + let task = tctx.task.as_ref().unwrap(); + let deadline = task.cmd.deadline(); + let cmd_ctx = task.cmd.ctx().clone(); + self.fail_fast_or_check_deadline(cid, tag, cmd_ctx, deadline); + fail_point!("txn_scheduler_acquire_fail"); + } + + fn fail_fast_or_check_deadline( + &self, + cid: u64, + tag: CommandKind, + cmd_ctx: Context, + deadline: Deadline, + ) { let sched = self.clone(); self.inner .high_priority_pool .pool .spawn(async move { - GLOBAL_TIMER_HANDLE - .delay(deadline.to_std_instant()) - .compat() - .await - .unwrap(); - let cb = sched - .inner - .get_task_slot(cid) - .get_mut(&cid) - .and_then(|tctx| if tctx.try_own() { tctx.cb.take() } else { None }); - if let Some(cb) = cb { - cb.execute(ProcessResult::Failed { - err: StorageErrorInner::DeadlineExceeded.into(), - }) + match unsafe { + with_tls_engine(|engine: &E| engine.precheck_write_with_ctx(&cmd_ctx)) + } { + // Precheck failed, try to return err early. + Err(e) => { + let cb = sched.inner.try_own_and_take_cb(cid); + // The task is not processing or finished currently. It's safe + // to response early here. In the future, the task will be waked up + // and it will finished with DeadlineExceeded error. + // As the cb is taken here, it will not be executed anymore. + if let Some(cb) = cb { + let pr = ProcessResult::Failed { + err: StorageError::from(e), + }; + Self::early_response( + cid, + cb, + pr, + tag, + CommandStageKind::precheck_write_err, + ); + } + } + Ok(()) => { + SCHED_STAGE_COUNTER_VEC.get(tag).precheck_write_ok.inc(); + // Check deadline in background. + GLOBAL_TIMER_HANDLE + .delay(deadline.to_std_instant()) + .compat() + .await + .unwrap(); + let cb = sched.inner.try_own_and_take_cb(cid); + if let Some(cb) = cb { + cb.execute(ProcessResult::Failed { + err: StorageErrorInner::DeadlineExceeded.into(), + }) + } + } } }) .unwrap(); - fail_point!("txn_scheduler_acquire_fail"); } /// Tries to acquire all the necessary latches. If all the necessary latches @@ -1201,6 +1250,7 @@ mod tests { use super::*; use crate::storage::{ + kv::{Error as KvError, ErrorInner as KvErrorInner}, lock_manager::DummyLockManager, mvcc::{self, Mutation}, test_util::latest_feature_gate, @@ -1210,7 +1260,7 @@ mod tests { flow_controller::{EngineFlowController, FlowController}, latch::*, }, - TestEngineBuilder, TxnStatus, + RocksEngine, TestEngineBuilder, TxnStatus, }; #[derive(Clone)] @@ -1221,6 +1271,36 @@ mod tests { fn report_write_stats(&self, _write_stats: WriteStats) {} } + // TODO(cosven): use this in the following test cases to reduce duplicate code. + fn new_test_scheduler() -> (Scheduler, RocksEngine) { + let engine = TestEngineBuilder::new().build().unwrap(); + let config = Config { + scheduler_concurrency: 1024, + scheduler_worker_pool_size: 1, + scheduler_pending_write_threshold: ReadableSize(100 * 1024 * 1024), + enable_async_apply_prewrite: false, + ..Default::default() + }; + ( + Scheduler::new( + engine.clone(), + DummyLockManager, + ConcurrencyManager::new(1.into()), + &config, + DynamicConfigs { + pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), + in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + }, + Arc::new(FlowController::Singleton(EngineFlowController::empty())), + DummyReporter, + ResourceTagFactory::new_for_test(), + Arc::new(QuotaLimiter::default()), + latest_feature_gate(), + ), + engine, + ) + } + #[test] fn test_command_latches() { let mut temp_map = HashMap::default(); @@ -1395,6 +1475,52 @@ mod tests { block_on(f).unwrap().unwrap(); } + /// When all latches are acquired, the command should be executed directly. + /// When any latch is not acquired, the command should be prechecked. + #[test] + fn test_schedule_command_with_fail_fast_mode() { + let (scheduler, engine) = new_test_scheduler(); + + // req can acquire all latches, so it should be executed directly. + let mut req = BatchRollbackRequest::default(); + req.mut_context().max_execution_duration_ms = 10000; + req.set_keys(vec![b"a".to_vec(), b"b".to_vec(), b"c".to_vec()].into()); + let cmd: TypedCommand<()> = req.into(); + let (cb, f) = paired_future_callback(); + scheduler.run_cmd(cmd.cmd, StorageCallback::Boolean(cb)); + // It must be executed (and succeed). + block_on(f).unwrap().unwrap(); + + // Acquire the latch, so that next command(req2) can't require all latches. + let mut lock = Lock::new(&[Key::from_raw(b"d")]); + let cid = scheduler.inner.gen_id(); + assert!(scheduler.inner.latches.acquire(&mut lock, cid)); + + engine.trigger_not_leader(); + + // req2 can't acquire all latches, req2 will be prechecked. + let mut req2 = BatchRollbackRequest::default(); + req2.mut_context().max_execution_duration_ms = 10000; + req2.set_keys(vec![b"a".to_vec(), b"b".to_vec(), b"d".to_vec()].into()); + let cmd2: TypedCommand<()> = req2.into(); + let (cb2, f2) = paired_future_callback(); + scheduler.run_cmd(cmd2.cmd, StorageCallback::Boolean(cb2)); + + // Precheck should return NotLeader error. + assert!(matches!( + block_on(f2).unwrap(), + Err(StorageError(box StorageErrorInner::Kv(KvError( + box KvErrorInner::Request(ref e), + )))) if e.has_not_leader(), + )); + // The task context should be owned, and it's cb should be taken. + let cid2 = cid + 1; // Hack: get the cid of req2. + let mut task_slot = scheduler.inner.get_task_slot(cid2); + let tctx = task_slot.get_mut(&cid2).unwrap(); + assert!(!tctx.try_own()); + assert!(tctx.cb.is_none()); + } + #[test] fn test_pool_available_deadline() { let engine = TestEngineBuilder::new().build().unwrap(); diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index c97bdd72fac..1143600920f 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -1,7 +1,8 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::sync::{Arc, RwLock}; +use collections::HashSet; use crossbeam::channel::TrySendError; use engine_rocks::{RocksEngine, RocksSnapshot}; use engine_traits::{KvEngine, ALL_CFS, CF_DEFAULT}; @@ -179,7 +180,11 @@ fn bench_async_snapshot(b: &mut test::Bencher) { region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(5); let (_tmp, db) = new_engine(); - let kv = RaftKv::new(SyncBenchRouter::new(region.clone(), db.clone()), db); + let kv = RaftKv::new( + SyncBenchRouter::new(region.clone(), db.clone()), + db, + Arc::new(RwLock::new(HashSet::default())), + ); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); @@ -208,7 +213,11 @@ fn bench_async_write(b: &mut test::Bencher) { region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(5); let (_tmp, db) = new_engine(); - let kv = RaftKv::new(SyncBenchRouter::new(region.clone(), db.clone()), db); + let kv = RaftKv::new( + SyncBenchRouter::new(region.clone(), db.clone()), + db, + Arc::new(RwLock::new(HashSet::default())), + ); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index f99d9348616..420f9bd7765 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -330,6 +330,55 @@ fn test_invalid_read_index_when_no_leader() { ); } +/// RaftKV precheck_write_with_ctx checks if the current role is leader. +/// When it is not, it should return NotLeader error during prechecking. +#[test] +fn test_raftkv_precheck_write_with_ctx() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + // make sure leader has been elected. + assert_eq!(cluster.must_get(b"k1"), None); + + let region = cluster.get_region(b""); + let leader = cluster.leader_of_region(region.get_id()).unwrap(); + let follower = region + .get_peers() + .iter() + .find(|p| p.get_id() != leader.get_id()) + .unwrap(); + + let leader_storage = cluster.sim.rl().storages[&leader.get_id()].clone(); + let follower_storage = cluster.sim.rl().storages[&follower.get_id()].clone(); + + // Assume this is a write request. + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(region.get_peers()[0].clone()); + + // The (write) request can be sent to the leader. + leader_storage.precheck_write_with_ctx(&ctx).unwrap(); + // The (write) request should not be send to a follower. + follower_storage.precheck_write_with_ctx(&ctx).unwrap_err(); + + // Leader has network partition and it must be not leader any more. + let filter = Box::new(RegionPacketFilter::new( + region.get_id(), + leader.get_store_id(), + )); + cluster + .sim + .wl() + .add_recv_filter(leader.get_store_id(), filter.clone()); + cluster + .sim + .wl() + .add_send_filter(leader.get_store_id(), filter); + sleep_until_election_triggered(&cluster.cfg); + leader_storage.precheck_write_with_ctx(&ctx).unwrap_err(); +} + fn must_put(ctx: &Context, engine: &E, key: &[u8], value: &[u8]) { engine.put(ctx, Key::from_raw(key), value.to_vec()).unwrap(); } From 9d658db6f861653125e53c0709795c49a316e301 Mon Sep 17 00:00:00 2001 From: Potato Date: Mon, 22 Aug 2022 16:40:20 +0800 Subject: [PATCH 0163/1149] storage: record and return pessimistic_lock_wait time (#13309) ref pingcap/kvproto#965, ref tikv/tikv#12362 This commit record the pessimistic_lock_wait time for pessimistic transactions in the waitManager. Signed-off-by: OneSizeFitQuorum --- Cargo.lock | 2 +- components/pd_client/src/util.rs | 3 +- components/tracker/src/lib.rs | 2 ++ components/tracker/src/slab.rs | 6 ++++ src/server/lock_manager/mod.rs | 8 +++-- src/server/lock_manager/waiter_manager.rs | 22 +++++++++++--- src/server/service/kv.rs | 16 +++++----- src/storage/lock_manager.rs | 3 ++ src/storage/txn/scheduler.rs | 2 ++ tests/integrations/server/kv_service.rs | 37 +++++++++++++++++++++++ 10 files changed, 84 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3083e56ef23..b067e3337e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2627,7 +2627,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#d88fa382391ec305e879be7635e39beae6a19890" +source = "git+https://github.com/pingcap/kvproto.git#affce57868b9f8befac389559d372369b2cb616f" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index fec63383891..2aa74176627 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -848,11 +848,12 @@ pub fn check_resp_header(header: &ResponseHeader) -> Result<()> { ErrorType::IncompatibleVersion => Err(Error::Incompatible), ErrorType::StoreTombstone => Err(Error::StoreTombstone(err.get_message().to_owned())), ErrorType::RegionNotFound => Err(Error::RegionNotFound(vec![])), - ErrorType::Unknown => Err(box_err!(err.get_message())), ErrorType::GlobalConfigNotFound => { Err(Error::GlobalConfigNotFound(err.get_message().to_owned())) } ErrorType::Ok => Ok(()), + ErrorType::DuplicatedEntry | ErrorType::EntryNotFound => Err(box_err!(err.get_message())), + ErrorType::Unknown => Err(box_err!(err.get_message())), } } diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index e0a9b9de24f..be099beadde 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -40,6 +40,7 @@ impl Tracker { } pub fn write_write_detail(&self, detail: &mut pb::WriteDetail) { + detail.set_pessimistic_lock_wait_nanos(self.metrics.pessimistic_lock_wait_nanos); detail.set_store_batch_wait_nanos(self.metrics.wf_batch_wait_nanos); detail.set_propose_send_wait_nanos( self.metrics @@ -123,6 +124,7 @@ pub struct RequestMetrics { pub block_read_nanos: u64, pub internal_key_skipped_count: u64, pub deleted_key_skipped_count: u64, + pub pessimistic_lock_wait_nanos: u64, // temp instant used in raftstore metrics, first be the instant when creating the write // callback, then reset when it is ready to apply pub write_instant: Option, diff --git a/components/tracker/src/slab.rs b/components/tracker/src/slab.rs index 9b4be50796b..c7b9efa9944 100644 --- a/components/tracker/src/slab.rs +++ b/components/tracker/src/slab.rs @@ -182,6 +182,12 @@ impl fmt::Debug for TrackerToken { } } +impl Default for TrackerToken { + fn default() -> Self { + INVALID_TRACKER_TOKEN + } +} + #[cfg(test)] mod tests { use std::{sync::Arc, thread}; diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index 91e25a2edeb..e437cea2bf1 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -320,6 +320,7 @@ mod tests { use raftstore::coprocessor::RegionChangeEvent; use security::SecurityConfig; use tikv_util::config::ReadableDuration; + use tracker::{TrackerToken, INVALID_TRACKER_TOKEN}; use self::{deadlock::tests::*, metrics::*, waiter_manager::tests::*}; use super::*; @@ -361,10 +362,11 @@ mod tests { lock_mgr } - fn diag_ctx(key: &[u8], resource_group_tag: &[u8]) -> DiagnosticContext { + fn diag_ctx(key: &[u8], resource_group_tag: &[u8], tracker: TrackerToken) -> DiagnosticContext { DiagnosticContext { key: key.to_owned(), resource_group_tag: resource_group_tag.to_owned(), + tracker, } } @@ -428,7 +430,7 @@ mod tests { waiter1.lock, false, Some(WaitTimeout::Default), - diag_ctx(b"k1", b"tag1"), + diag_ctx(b"k1", b"tag1", INVALID_TRACKER_TOKEN), ); assert!(lock_mgr.has_waiter()); let (waiter2, lock_info2, f2) = new_test_waiter(20.into(), 10.into(), 10); @@ -439,7 +441,7 @@ mod tests { waiter2.lock, false, Some(WaitTimeout::Default), - diag_ctx(b"k2", b"tag2"), + diag_ctx(b"k2", b"tag2", INVALID_TRACKER_TOKEN), ); assert!(lock_mgr.has_waiter()); assert_elapsed( diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 8e5225bef76..b0e05091267 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -19,13 +19,14 @@ use futures::{ task::{Context, Poll}, }; use kvproto::deadlock::WaitForEntry; -use prometheus::HistogramTimer; use tikv_util::{ config::ReadableDuration, + time::{duration_to_sec, InstantExt}, timer::GLOBAL_TIMER_HANDLE, worker::{FutureRunnable, FutureScheduler, Stopped}, }; use tokio::task::spawn_local; +use tracker::GLOBAL_TRACKERS; use super::{config::Config, deadlock::Scheduler as DetectorScheduler, metrics::*}; use crate::storage::{ @@ -110,6 +111,7 @@ pub enum Task { lock: Lock, timeout: WaitTimeout, diag_ctx: DiagnosticContext, + start_waiting_time: Instant, }, WakeUp { // lock info @@ -181,7 +183,7 @@ pub(crate) struct Waiter { pub(crate) lock: Lock, pub diag_ctx: DiagnosticContext, delay: Delay, - _lifetime_timer: HistogramTimer, + start_waiting_time: Instant, } impl Waiter { @@ -192,6 +194,7 @@ impl Waiter { lock: Lock, deadline: Instant, diag_ctx: DiagnosticContext, + start_waiting_time: Instant, ) -> Self { Self { start_ts, @@ -200,7 +203,7 @@ impl Waiter { lock, delay: Delay::new(deadline), diag_ctx, - _lifetime_timer: WAITER_LIFETIME_HISTOGRAM.start_coarse_timer(), + start_waiting_time, } } @@ -224,6 +227,11 @@ impl Waiter { /// `Notify` consumes the `Waiter` to notify the corresponding transaction /// going on. fn notify(self) { + let elapsed = self.start_waiting_time.saturating_elapsed(); + GLOBAL_TRACKERS.with_tracker(self.diag_ctx.tracker, |tracker| { + tracker.metrics.pessimistic_lock_wait_nanos = elapsed.as_nanos() as u64; + }); + WAITER_LIFETIME_HISTOGRAM.observe(duration_to_sec(elapsed)); // Cancel the delay timer to prevent removing the same `Waiter` earlier. self.delay.cancel(); self.cb.execute(self.pr); @@ -424,6 +432,7 @@ impl Scheduler { lock, timeout, diag_ctx, + start_waiting_time: Instant::now(), }); } @@ -597,6 +606,7 @@ impl FutureRunnable for WaiterManager { lock, timeout, diag_ctx, + start_waiting_time, } => { let waiter = Waiter::new( start_ts, @@ -605,6 +615,7 @@ impl FutureRunnable for WaiterManager { lock, self.normalize_deadline(timeout), diag_ctx, + start_waiting_time, ); self.handle_wait_for(waiter); TASK_COUNTER_METRICS.wait_for.inc(); @@ -662,7 +673,7 @@ pub mod tests { lock: Lock { ts: lock_ts, hash }, diag_ctx: DiagnosticContext::default(), delay: Delay::new(Instant::now()), - _lifetime_timer: WAITER_LIFETIME_HISTOGRAM.start_coarse_timer(), + start_waiting_time: Instant::now(), } } @@ -764,6 +775,7 @@ pub mod tests { lock, Instant::now() + Duration::from_millis(3000), DiagnosticContext::default(), + Instant::now(), ); (waiter, info, f) } @@ -977,7 +989,7 @@ pub mod tests { .remove_waiter( Lock { ts: TimeStamp::zero(), - hash: 0 + hash: 0, }, TimeStamp::zero(), ) diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 8f0f9a23cae..fa743911b40 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -2080,20 +2080,22 @@ txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp } resp.set_errors(extract_key_errors(v.map(|v| v.locks)).into()); }}); -txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (v, resp, tracker) { +txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (v, resp, tracker) {{ match v { Ok(Ok(res)) => { let (values, not_founds) = res.into_values_and_not_founds(); resp.set_values(values.into()); resp.set_not_founds(not_founds); - GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); - tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); - }); }, - Err(e) | Ok(Err(e)) => resp.set_errors(vec![extract_key_error(&e)].into()), + Err(e) | Ok(Err(e)) => { + resp.set_errors(vec![extract_key_error(&e)].into()) + }, } -}); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); + }); +}}); txn_command_future!(future_pessimistic_rollback, PessimisticRollbackRequest, PessimisticRollbackResponse, (v, resp) { resp.set_errors(extract_key_errors(v).into()) }); diff --git a/src/storage/lock_manager.rs b/src/storage/lock_manager.rs index def756c921e..79a9d0572f3 100644 --- a/src/storage/lock_manager.rs +++ b/src/storage/lock_manager.rs @@ -2,6 +2,7 @@ use std::time::Duration; +use tracker::TrackerToken; use txn_types::TimeStamp; use crate::{ @@ -24,6 +25,8 @@ pub struct DiagnosticContext { /// same statement) Currently it is the encoded SQL digest if the client /// is TiDB pub resource_group_tag: Vec, + /// The tracker is used to track and collect the lock wait details. + pub tracker: TrackerToken, } /// Time to wait for lock released when encountering locks. diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 2d9d3610432..382979b7815 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -827,6 +827,7 @@ impl Scheduler { let cid = task.cid; let priority = task.cmd.priority(); let ts = task.cmd.ts(); + let tracker = task.tracker; let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); let mut sample = quota_limiter.new_sample(true); @@ -913,6 +914,7 @@ impl Scheduler { let diag_ctx = DiagnosticContext { key, resource_group_tag: ctx.get_resource_group_tag().into(), + tracker, }; scheduler.on_wait_for_lock(cid, ts, pr, lock, is_first_lock, wait_timeout, diag_ctx); return; diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 17b1e49f2e0..8095ebdf2ca 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2148,3 +2148,40 @@ fn test_rpc_wall_time() { ); } } + +#[test] +fn test_pessimistic_lock_execution_tracking() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (k, v) = (b"k1".to_vec(), b"k2".to_vec()); + + // Add a prewrite lock. + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v); + must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 10); + + let block_duration = Duration::from_millis(300); + let client_clone = client.clone(); + let ctx_clone = ctx.clone(); + let k_clone = k.clone(); + let handle = thread::spawn(move || { + thread::sleep(block_duration); + must_kv_commit(&client_clone, ctx_clone, vec![k_clone], 10, 30, 30); + }); + + let resp = kv_pessimistic_lock(&client, ctx, vec![k], 20, 20, false); + assert!( + resp.get_exec_details_v2() + .get_write_detail() + .get_pessimistic_lock_wait_nanos() + > 0, + "resp lock wait time={:?}, block_duration={:?}", + resp.get_exec_details_v2() + .get_write_detail() + .get_pessimistic_lock_wait_nanos(), + block_duration + ); + + handle.join().unwrap(); +} From f6159555995c156dcbfc741ccd93a59948d9d5de Mon Sep 17 00:00:00 2001 From: haojinming Date: Mon, 22 Aug 2022 22:06:21 +0800 Subject: [PATCH 0164/1149] rawkv: Reuse scheduler worker pool for raw modify command (#13286) ref tikv/tikv#13284 Signed-off-by: haojinming Co-authored-by: Ti Chi Robot --- src/storage/mod.rs | 284 ++++++++++++++++--------- tests/failpoints/cases/test_storage.rs | 33 +++ 2 files changed, 218 insertions(+), 99 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6c4374f7c76..d974c731db0 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -87,6 +87,7 @@ use rand::prelude::*; use resource_metering::{FutureExt, ResourceTagFactory}; use tikv_kv::SnapshotExt; use tikv_util::{ + deadline::Deadline, quota_limiter::QuotaLimiter, time::{duration_to_ms, Instant, ThreadReadId}, }; @@ -1446,6 +1447,29 @@ impl Storage { Ok(()) } + // Schedule raw modify commands, which reuse the scheduler worker pool. + // TODO: separate the txn and raw commands if needed in the future. + fn sched_raw_command(&self, tag: CommandKind, future: T) -> Result<()> + where + T: Future + Send + 'static, + { + SCHED_STAGE_COUNTER_VEC.get(tag).new.inc(); + self.sched + .get_sched_pool(CommandPri::Normal) + .pool + .spawn(future) + .map_err(|_| Error::from(ErrorInner::SchedTooBusy)) + } + + fn get_deadline(ctx: &Context) -> Deadline { + let execution_duration_limit = if ctx.max_execution_duration_ms == 0 { + crate::storage::txn::scheduler::DEFAULT_EXECUTION_DURATION_LIMIT + } else { + ::std::time::Duration::from_millis(ctx.max_execution_duration_ms) + }; + Deadline::from_now(execution_duration_limit) + } + /// Delete all keys in the range [`start_key`, `end_key`). /// /// All keys in the range will be deleted permanently regardless of their @@ -1817,44 +1841,60 @@ impl Storage { if !F::IS_TTL_ENABLED && ttl != 0 { return Err(Error::from(ErrorInner::TtlNotEnabled)); } + let deadline = Self::get_deadline(&ctx); + let cf = Self::rawkv_cf(&cf, self.api_version)?; + let engine = self.engine.clone(); + self.sched_raw_command(CMD, async move { + if let Err(e) = deadline.check() { + return callback(Err(Error::from(e))); + } + let command_duration = tikv_util::time::Instant::now(); + let raw_value = RawValue { + user_value: value, + expire_ts: ttl_to_expire_ts(ttl), + is_delete: false, + }; + let m = Modify::Put( + cf, + F::encode_raw_key_owned(key, None), + F::encode_raw_value_owned(raw_value), + ); - let raw_value = RawValue { - user_value: value, - expire_ts: ttl_to_expire_ts(ttl), - is_delete: false, - }; - let m = Modify::Put( - Self::rawkv_cf(&cf, self.api_version)?, - F::encode_raw_key_owned(key, None), - F::encode_raw_value_owned(raw_value), - ); - - let mut batch = WriteData::from_modifies(vec![m]); - batch.set_allowed_on_disk_almost_full(); - - self.engine.async_write( - &ctx, - batch, - Box::new(|res| callback(res.map_err(Error::from))), - )?; - KV_COMMAND_COUNTER_VEC_STATIC.raw_put.inc(); - Ok(()) + let mut batch = WriteData::from_modifies(vec![m]); + batch.set_allowed_on_disk_almost_full(); + let (cb, f) = tikv_util::future::paired_future_callback(); + let async_ret = + engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); + let v: Result<()> = match async_ret { + Err(e) => Err(Error::from(e)), + Ok(_) => f.await.unwrap(), + }; + callback(v); + KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); + SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); + SCHED_HISTOGRAM_VEC_STATIC + .get(CMD) + .observe(command_duration.saturating_elapsed().as_secs_f64()); + }) } - fn raw_batch_put_requests_to_modifies( - cf: CfName, - pairs: Vec, - ttls: Vec, - ) -> Result> { + fn check_ttl_valid(key_cnt: usize, ttls: &Vec) -> Result<()> { if !F::IS_TTL_ENABLED { if ttls.iter().any(|&x| x != 0) { return Err(Error::from(ErrorInner::TtlNotEnabled)); } - } else if ttls.len() != pairs.len() { + } else if ttls.len() != key_cnt { return Err(Error::from(ErrorInner::TtlLenNotEqualsToPairs)); } + Ok(()) + } - let modifies = pairs + fn raw_batch_put_requests_to_modifies( + cf: CfName, + pairs: Vec, + ttls: Vec, + ) -> Vec { + pairs .into_iter() .zip(ttls) .map(|((k, v), ttl)| { @@ -1869,8 +1909,7 @@ impl Storage { F::encode_raw_value_owned(raw_value), ) }) - .collect(); - Ok(modifies) + .collect() } /// Write some keys to the storage in a batch. @@ -1882,10 +1921,11 @@ impl Storage { ttls: Vec, callback: Callback<()>, ) -> Result<()> { + const CMD: CommandKind = CommandKind::raw_batch_put; Self::check_api_version( self.api_version, ctx.api_version, - CommandKind::raw_batch_put, + CMD, pairs.iter().map(|(ref k, _)| k), )?; @@ -1896,18 +1936,32 @@ impl Storage { self.max_key_size, callback ); + Self::check_ttl_valid(pairs.len(), &ttls)?; - let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls)?; - let mut batch = WriteData::from_modifies(modifies); - batch.set_allowed_on_disk_almost_full(); - - self.engine.async_write( - &ctx, - batch, - Box::new(|res| callback(res.map_err(Error::from))), - )?; - KV_COMMAND_COUNTER_VEC_STATIC.raw_batch_put.inc(); - Ok(()) + let engine = self.engine.clone(); + let deadline = Self::get_deadline(&ctx); + self.sched_raw_command(CMD, async move { + if let Err(e) = deadline.check() { + return callback(Err(Error::from(e))); + } + let command_duration = tikv_util::time::Instant::now(); + let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls); + let mut batch = WriteData::from_modifies(modifies); + batch.set_allowed_on_disk_almost_full(); + let (cb, f) = tikv_util::future::paired_future_callback(); + let async_ret = + engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); + let v: Result<()> = match async_ret { + Err(e) => Err(Error::from(e)), + Ok(_) => f.await.unwrap(), + }; + callback(v); + KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); + SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); + SCHED_HISTOGRAM_VEC_STATIC + .get(CMD) + .observe(command_duration.saturating_elapsed().as_secs_f64()); + }) } fn raw_delete_request_to_modify(cf: CfName, key: Vec) -> Modify { @@ -1928,26 +1982,35 @@ impl Storage { key: Vec, callback: Callback<()>, ) -> Result<()> { - Self::check_api_version( - self.api_version, - ctx.api_version, - CommandKind::raw_delete, - [&key], - )?; + const CMD: CommandKind = CommandKind::raw_delete; + Self::check_api_version(self.api_version, ctx.api_version, CMD, [&key])?; check_key_size!(Some(&key).into_iter(), self.max_key_size, callback); - - let m = Self::raw_delete_request_to_modify(Self::rawkv_cf(&cf, self.api_version)?, key); - let mut batch = WriteData::from_modifies(vec![m]); - batch.set_allowed_on_disk_almost_full(); - - self.engine.async_write( - &ctx, - batch, - Box::new(|res| callback(res.map_err(Error::from))), - )?; - KV_COMMAND_COUNTER_VEC_STATIC.raw_delete.inc(); - Ok(()) + let cf = Self::rawkv_cf(&cf, self.api_version)?; + let engine = self.engine.clone(); + let deadline = Self::get_deadline(&ctx); + self.sched_raw_command(CMD, async move { + if let Err(e) = deadline.check() { + return callback(Err(Error::from(e))); + } + let command_duration = tikv_util::time::Instant::now(); + let m = Self::raw_delete_request_to_modify(cf, key); + let mut batch = WriteData::from_modifies(vec![m]); + batch.set_allowed_on_disk_almost_full(); + let (cb, f) = tikv_util::future::paired_future_callback(); + let async_ret = + engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); + let v: Result<()> = match async_ret { + Err(e) => Err(Error::from(e)), + Ok(_) => f.await.unwrap(), + }; + callback(v); + KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); + SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); + SCHED_HISTOGRAM_VEC_STATIC + .get(CMD) + .observe(command_duration.saturating_elapsed().as_secs_f64()); + }) } /// Delete all raw keys in [`start_key`, `end_key`). @@ -1962,31 +2025,45 @@ impl Storage { end_key: Vec, callback: Callback<()>, ) -> Result<()> { + const CMD: CommandKind = CommandKind::raw_delete_range; check_key_size!([&start_key, &end_key], self.max_key_size, callback); Self::check_api_version_ranges( self.api_version, ctx.api_version, - CommandKind::raw_delete_range, + CMD, [(Some(&start_key), Some(&end_key))], )?; let cf = Self::rawkv_cf(&cf, self.api_version)?; - let start_key = F::encode_raw_key_owned(start_key, None); - let end_key = F::encode_raw_key_owned(end_key, None); - - let mut batch = - WriteData::from_modifies(vec![Modify::DeleteRange(cf, start_key, end_key, false)]); - batch.set_allowed_on_disk_almost_full(); - - // TODO: special notification channel for API V2. - - self.engine.async_write( - &ctx, - batch, - Box::new(|res| callback(res.map_err(Error::from))), - )?; - KV_COMMAND_COUNTER_VEC_STATIC.raw_delete_range.inc(); - Ok(()) + let engine = self.engine.clone(); + let deadline = Self::get_deadline(&ctx); + self.sched_raw_command(CMD, async move { + if let Err(e) = deadline.check() { + return callback(Err(Error::from(e))); + } + let command_duration = tikv_util::time::Instant::now(); + let start_key = F::encode_raw_key_owned(start_key, None); + let end_key = F::encode_raw_key_owned(end_key, None); + + let mut batch = + WriteData::from_modifies(vec![Modify::DeleteRange(cf, start_key, end_key, false)]); + batch.set_allowed_on_disk_almost_full(); + + // TODO: special notification channel for API V2. + let (cb, f) = tikv_util::future::paired_future_callback(); + let async_ret = + engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); + let v: Result<()> = match async_ret { + Err(e) => Err(Error::from(e)), + Ok(_) => f.await.unwrap(), + }; + callback(v); + KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); + SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); + SCHED_HISTOGRAM_VEC_STATIC + .get(CMD) + .observe(command_duration.saturating_elapsed().as_secs_f64()); + }) } /// Delete some raw keys in a batch. @@ -1999,30 +2076,38 @@ impl Storage { keys: Vec>, callback: Callback<()>, ) -> Result<()> { - Self::check_api_version( - self.api_version, - ctx.api_version, - CommandKind::raw_batch_delete, - &keys, - )?; + const CMD: CommandKind = CommandKind::raw_batch_delete; + Self::check_api_version(self.api_version, ctx.api_version, CMD, &keys)?; let cf = Self::rawkv_cf(&cf, self.api_version)?; check_key_size!(keys.iter(), self.max_key_size, callback); - - let modifies = keys - .into_iter() - .map(|k| Self::raw_delete_request_to_modify(cf, k)) - .collect(); - let mut batch = WriteData::from_modifies(modifies); - batch.set_allowed_on_disk_almost_full(); - - self.engine.async_write( - &ctx, - batch, - Box::new(|res| callback(res.map_err(Error::from))), - )?; - KV_COMMAND_COUNTER_VEC_STATIC.raw_batch_delete.inc(); - Ok(()) + let engine = self.engine.clone(); + let deadline = Self::get_deadline(&ctx); + self.sched_raw_command(CMD, async move { + if let Err(e) = deadline.check() { + return callback(Err(Error::from(e))); + } + let command_duration = tikv_util::time::Instant::now(); + let modifies = keys + .into_iter() + .map(|k| Self::raw_delete_request_to_modify(cf, k)) + .collect(); + let mut batch = WriteData::from_modifies(modifies); + batch.set_allowed_on_disk_almost_full(); + let (cb, f) = tikv_util::future::paired_future_callback(); + let async_ret = + engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); + let v: Result<()> = match async_ret { + Err(e) => Err(Error::from(e)), + Ok(_) => f.await.unwrap(), + }; + callback(v); + KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); + SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); + SCHED_HISTOGRAM_VEC_STATIC + .get(CMD) + .observe(command_duration.saturating_elapsed().as_secs_f64()); + }) } /// Scan raw keys in a range. @@ -2444,7 +2529,8 @@ impl Storage { )?; let cf = Self::rawkv_cf(&cf, self.api_version)?; - let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls)?; + Self::check_ttl_valid(pairs.len(), &ttls)?; + let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls); let cmd = RawAtomicStore::new(cf, modifies, ctx); self.sched_txn_command(cmd, callback) } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 7502fe6be4e..40ba7297b7c 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1425,3 +1425,36 @@ fn test_mvcc_concurrent_commit_and_rollback_at_shutdown() { ); assert_eq!(get_resp.value, v); } + +#[test] +fn test_raw_put_deadline() { + let deadline_fp = "deadline_check_fail"; + let mut cluster = new_server_cluster(0, 1); + cluster.run(); + let region = cluster.get_region(b""); + let leader = region.get_peers()[0].clone(); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(leader); + + let mut put_req = RawPutRequest::default(); + put_req.set_context(ctx); + put_req.key = b"k3".to_vec(); + put_req.value = b"v3".to_vec(); + fail::cfg(deadline_fp, "return()").unwrap(); + let put_resp = client.raw_put(&put_req).unwrap(); + assert!(put_resp.has_region_error(), "{:?}", put_resp); + must_get_none(&cluster.get_engine(1), b"k3"); + + fail::remove(deadline_fp); + let put_resp = client.raw_put(&put_req).unwrap(); + assert!(!put_resp.has_region_error(), "{:?}", put_resp); + must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); +} From 252b44288c7eaa1943b5b871018d80d63a7af88f Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 22 Aug 2022 18:54:20 -0700 Subject: [PATCH 0165/1149] make read quota limiter work for coprocessor as well (#13257) close tikv/tikv#13256 The coprocessor's read bytes are not calculated by foreground quota limiter. Signed-off-by: qi.xu Co-authored-by: qi.xu Co-authored-by: 5kbpers --- components/test_coprocessor/src/fixture.rs | 20 ++++++++--- components/tidb_query_executors/src/runner.rs | 3 ++ components/tikv_util/src/quota_limiter.rs | 6 ++++ tests/failpoints/cases/test_coprocessor.rs | 4 +-- .../integrations/coprocessor/test_analyze.rs | 16 ++++----- .../integrations/coprocessor/test_checksum.rs | 2 +- tests/integrations/coprocessor/test_select.rs | 34 +++++++++++-------- 7 files changed, 55 insertions(+), 30 deletions(-) diff --git a/components/test_coprocessor/src/fixture.rs b/components/test_coprocessor/src/fixture.rs index 55a7f72a07f..23fc877a996 100644 --- a/components/test_coprocessor/src/fixture.rs +++ b/components/test_coprocessor/src/fixture.rs @@ -67,7 +67,7 @@ pub fn init_data_with_engine_and_commit( tbl: &ProductTable, vals: &[(i64, Option<&str>, i64)], commit: bool, -) -> (Store, Endpoint) { +) -> (Store, Endpoint, Arc) { init_data_with_details(ctx, engine, tbl, vals, commit, &Config::default()) } @@ -78,7 +78,7 @@ pub fn init_data_with_details( vals: &[(i64, Option<&str>, i64)], commit: bool, cfg: &Config, -) -> (Store, Endpoint) { +) -> (Store, Endpoint, Arc) { let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) .build() .unwrap(); @@ -103,21 +103,22 @@ pub fn init_data_with_details( store.get_engine(), )); let cm = ConcurrencyManager::new(1.into()); + let limiter = Arc::new(QuotaLimiter::default()); let copr = Endpoint::new( cfg, pool.handle(), cm, ResourceTagFactory::new_for_test(), - Arc::new(QuotaLimiter::default()), + limiter.clone(), ); - (store, copr) + (store, copr, limiter) } pub fn init_data_with_commit( tbl: &ProductTable, vals: &[(i64, Option<&str>, i64)], commit: bool, -) -> (Store, Endpoint) { +) -> (Store, Endpoint, Arc) { let engine = TestEngineBuilder::new().build().unwrap(); init_data_with_engine_and_commit(Context::default(), engine, tbl, vals, commit) } @@ -128,5 +129,14 @@ pub fn init_with_data( tbl: &ProductTable, vals: &[(i64, Option<&str>, i64)], ) -> (Store, Endpoint) { + let (store, endpoint, _) = init_data_with_commit(tbl, vals, true); + (store, endpoint) +} + +// Same as init_with_data except returned values include Arc +pub fn init_with_data_ext( + tbl: &ProductTable, + vals: &[(i64, Option<&str>, i64)], +) -> (Store, Endpoint, Arc) { init_data_with_commit(tbl, vals, true) } diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 073fade4b29..9f32aaa180e 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -471,6 +471,9 @@ impl BatchExecutorsRunner { &mut ctx, )? }; + if chunk.has_rows_data() { + sample.add_read_bytes(chunk.get_rows_data().len()); + } let quota_delay = self.quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { diff --git a/components/tikv_util/src/quota_limiter.rs b/components/tikv_util/src/quota_limiter.rs index f382964c4d1..4d5ca82c7d9 100644 --- a/components/tikv_util/src/quota_limiter.rs +++ b/components/tikv_util/src/quota_limiter.rs @@ -233,6 +233,12 @@ impl QuotaLimiter { self.enable_auto_tune.load(Ordering::Relaxed) } + pub fn total_read_bytes_consumed(&self, is_foreground: bool) -> usize { + self.get_limiters(is_foreground) + .read_bandwidth_limiter + .total_bytes_consumed() + } + // To generate a sampler. pub fn new_sample(&self, is_foreground: bool) -> Sample { Sample { diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 4371e8999ce..481e533a879 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -60,7 +60,7 @@ fn test_deadline_3() { ]; let product = ProductTable::new(); - let (_, endpoint) = { + let (_, endpoint, _) = { let engine = tikv::storage::TestEngineBuilder::new().build().unwrap(); let cfg = tikv::server::Config { end_point_request_max_handle_duration: tikv_util::config::ReadableDuration::secs(1), @@ -174,7 +174,7 @@ fn test_region_error_in_scan() { let (_cluster, raft_engine, mut ctx) = new_raft_engine(1, ""); ctx.set_isolation_level(IsolationLevel::Si); - let (_, endpoint) = + let (_, endpoint, _) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); fail::cfg("region_snapshot_seek", "return()").unwrap(); diff --git a/tests/integrations/coprocessor/test_analyze.rs b/tests/integrations/coprocessor/test_analyze.rs index 04f10fa08f1..0ce4623ac15 100644 --- a/tests/integrations/coprocessor/test_analyze.rs +++ b/tests/integrations/coprocessor/test_analyze.rs @@ -114,7 +114,7 @@ fn test_analyze_column_with_lock() { let product = ProductTable::new(); for &iso_level in &[IsolationLevel::Si, IsolationLevel::Rc] { - let (_, endpoint) = init_data_with_commit(&product, &data, false); + let (_, endpoint, _) = init_data_with_commit(&product, &data, false); let mut req = new_analyze_column_req(&product, 3, 3, 3, 3, 4, 32); let mut ctx = Context::default(); @@ -149,7 +149,7 @@ fn test_analyze_column() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, true); + let (_, endpoint, _) = init_data_with_commit(&product, &data, true); let req = new_analyze_column_req(&product, 3, 3, 3, 3, 4, 32); let resp = handle_request(&endpoint, req); @@ -181,7 +181,7 @@ fn test_analyze_single_primary_column() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, true); + let (_, endpoint, _) = init_data_with_commit(&product, &data, true); let req = new_analyze_column_req(&product, 1, 3, 3, 3, 4, 32); let resp = handle_request(&endpoint, req); @@ -206,7 +206,7 @@ fn test_analyze_index_with_lock() { let product = ProductTable::new(); for &iso_level in &[IsolationLevel::Si, IsolationLevel::Rc] { - let (_, endpoint) = init_data_with_commit(&product, &data, false); + let (_, endpoint, _) = init_data_with_commit(&product, &data, false); let mut req = new_analyze_index_req(&product, 3, product["name"].index, 4, 32, 0, 1); let mut ctx = Context::default(); @@ -246,7 +246,7 @@ fn test_analyze_index() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, true); + let (_, endpoint, _) = init_data_with_commit(&product, &data, true); let req = new_analyze_index_req(&product, 3, product["name"].index, 4, 32, 2, 2); let resp = handle_request(&endpoint, req); @@ -288,7 +288,7 @@ fn test_analyze_sampling_reservoir() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, true); + let (_, endpoint, _) = init_data_with_commit(&product, &data, true); // Pass the 2nd column as a column group. let req = new_analyze_sampling_req(&product, 1, 5, 0.0); @@ -320,7 +320,7 @@ fn test_analyze_sampling_bernoulli() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, true); + let (_, endpoint, _) = init_data_with_commit(&product, &data, true); // Pass the 2nd column as a column group. let req = new_analyze_sampling_req(&product, 1, 0, 0.5); @@ -346,7 +346,7 @@ fn test_invalid_range() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, true); + let (_, endpoint, _) = init_data_with_commit(&product, &data, true); let mut req = new_analyze_index_req(&product, 3, product["name"].index, 4, 32, 0, 1); let mut key_range = KeyRange::default(); key_range.set_start(b"xxx".to_vec()); diff --git a/tests/integrations/coprocessor/test_checksum.rs b/tests/integrations/coprocessor/test_checksum.rs index db96393c860..2983414b9cc 100644 --- a/tests/integrations/coprocessor/test_checksum.rs +++ b/tests/integrations/coprocessor/test_checksum.rs @@ -46,7 +46,7 @@ fn test_checksum() { ]; let product = ProductTable::new(); - let (store, endpoint) = init_data_with_commit(&product, &data, true); + let (store, endpoint, _) = init_data_with_commit(&product, &data, true); for column in &[&product["id"], &product["name"], &product["count"]] { assert!(column.index >= 0); diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 660e88905e4..952516daf35 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -19,7 +19,7 @@ use tikv::{ server::Config, storage::TestEngineBuilder, }; -use tikv_util::codec::number::*; +use tikv_util::{codec::number::*, config::ReadableSize}; use tipb::{ AnalyzeColumnsReq, AnalyzeReq, AnalyzeType, ChecksumRequest, Chunk, Expr, ExprType, ScalarFuncSig, SelectResponse, @@ -61,10 +61,15 @@ fn test_select() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_with_data(&product, &data); + let (_, endpoint, limiter) = init_with_data_ext(&product, &data); + limiter.set_read_bandwidth_limit(ReadableSize::kb(1), true); // for dag selection let req = DagSelect::from(&product).build(); let mut resp = handle_select(&endpoint, req); + let mut total_chunk_size = 0; + for chunk in resp.get_chunks() { + total_chunk_size += chunk.get_rows_data().len(); + } let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); for (row, (id, name, cnt)) in spliter.zip(data) { let name_datum = name.map(|s| s.as_bytes()).into(); @@ -76,6 +81,7 @@ fn test_select() { let result_encoded = datum::encode_value(&mut EvalContext::default(), &row).unwrap(); assert_eq!(result_encoded, &*expected_encoded); } + assert_eq!(limiter.total_read_bytes_consumed(true), total_chunk_size); // the consume_sample is called due to read bytes quota } #[test] @@ -89,7 +95,7 @@ fn test_batch_row_limit() { let batch_row_limit = 3; let chunk_datum_limit = batch_row_limit * 3; // we have 3 fields. let product = ProductTable::new(); - let (_, endpoint) = { + let (_, endpoint, _) = { let engine = TestEngineBuilder::new().build().unwrap(); let mut cfg = Config::default(); cfg.end_point_batch_row_limit = batch_row_limit; @@ -125,7 +131,7 @@ fn test_stream_batch_row_limit() { let product = ProductTable::new(); let stream_row_limit = 2; - let (_, endpoint) = { + let (_, endpoint, _) = { let engine = TestEngineBuilder::new().build().unwrap(); let mut cfg = Config::default(); cfg.end_point_stream_batch_row_limit = stream_row_limit; @@ -198,7 +204,7 @@ fn test_select_after_lease() { let product = ProductTable::new(); let (cluster, raft_engine, ctx) = new_raft_engine(1, ""); - let (_, endpoint) = + let (_, endpoint, _) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); // Sleep until the leader lease is expired. @@ -228,7 +234,7 @@ fn test_scan_detail() { ]; let product = ProductTable::new(); - let (_, endpoint) = { + let (_, endpoint, _) = { let engine = TestEngineBuilder::new().build().unwrap(); let mut cfg = Config::default(); cfg.end_point_batch_row_limit = 50; @@ -1605,7 +1611,7 @@ fn test_key_is_locked_for_primary() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, false); + let (_, endpoint, _) = init_data_with_commit(&product, &data, false); let req = DagSelect::from(&product).build(); let resp = handle_request(&endpoint, req); @@ -1623,7 +1629,7 @@ fn test_key_is_locked_for_index() { ]; let product = ProductTable::new(); - let (_, endpoint) = init_data_with_commit(&product, &data, false); + let (_, endpoint, _) = init_data_with_commit(&product, &data, false); let req = DagSelect::from_index(&product, &product["name"]).build(); let resp = handle_request(&endpoint, req); @@ -1700,7 +1706,7 @@ fn test_snapshot_failed() { let product = ProductTable::new(); let (_cluster, raft_engine, ctx) = new_raft_engine(1, ""); - let (_, endpoint) = init_data_with_engine_and_commit(ctx, raft_engine, &product, &[], true); + let (_, endpoint, _) = init_data_with_engine_and_commit(ctx, raft_engine, &product, &[], true); // Use an invalid context to make errors. let req = DagSelect::from(&product).build_with(Context::default(), &[0]); @@ -1721,7 +1727,7 @@ fn test_cache() { let product = ProductTable::new(); let (_cluster, raft_engine, ctx) = new_raft_engine(1, ""); - let (_, endpoint) = + let (_, endpoint, _) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); let req = DagSelect::from(&product).build_with(ctx, &[0]); @@ -1796,7 +1802,7 @@ fn test_copr_bypass_or_access_locks() { (8, Some("name:8"), 8), ]; // lock row 3, 4, 6 - let (mut store, endpoint) = init_data_with_engine_and_commit( + let (mut store, endpoint, _) = init_data_with_engine_and_commit( Default::default(), store.get_engine(), &product, @@ -1912,7 +1918,7 @@ fn test_rc_read() { ]; // uncommitted lock to be ignored - let (store, _) = init_data_with_engine_and_commit( + let (store, ..) = init_data_with_engine_and_commit( Default::default(), store.get_engine(), &product, @@ -1921,7 +1927,7 @@ fn test_rc_read() { ); // committed lock to be read - let (mut store, endpoint) = init_data_with_engine_and_commit( + let (mut store, endpoint, _) = init_data_with_engine_and_commit( Default::default(), store.get_engine(), &product, @@ -1970,7 +1976,7 @@ fn test_buckets() { let product = ProductTable::new(); let (mut cluster, raft_engine, ctx) = new_raft_engine(1, ""); - let (_, endpoint) = + let (_, endpoint, _) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &[], true); let req = DagSelect::from(&product).build_with(ctx, &[0]); From 2c2c005b2ed609e60717fab25e7da9b708dac265 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Tue, 23 Aug 2022 02:40:21 -0400 Subject: [PATCH 0166/1149] copr: fix compatiblity of `json_extract` function (#13299) close tikv/tikv#13297 Signed-off-by: YangKeao --- .../src/codec/mysql/json/json_extract.rs | 196 ++++++++++++++++-- .../src/codec/mysql/json/mod.rs | 13 ++ 2 files changed, 186 insertions(+), 23 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs index f7c1198c542..d40451fc9b5 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs @@ -1,5 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +use collections::HashSet; + use super::{ super::Result, path_expr::{PathExpression, PathLeg, PATH_EXPR_ARRAY_INDEX_ASTERISK, PATH_EXPR_ASTERISK}, @@ -14,21 +16,59 @@ impl<'a> JsonRef<'a> { /// /// See `Extract()` in TiDB `json.binary_function.go` pub fn extract(&self, path_expr_list: &[PathExpression]) -> Result> { + let mut could_return_multiple_matches = path_expr_list.len() > 1; + let mut elem_list = Vec::with_capacity(path_expr_list.len()); for path_expr in path_expr_list { + could_return_multiple_matches |= path_expr.contains_any_asterisk(); elem_list.append(&mut extract_json(*self, &path_expr.legs)?) } if elem_list.is_empty() { - return Ok(None); + Ok(None) + } else if could_return_multiple_matches { + Ok(Some(Json::from_array( + elem_list.drain(..).map(|j| j.to_owned()).collect(), + )?)) + } else { + Ok(Some(elem_list.remove(0).to_owned())) } - if path_expr_list.len() == 1 && elem_list.len() == 1 { - // If path_expr contains asterisks, elem_list.len() won't be 1 - // even if path_expr_list.len() equals to 1. - return Ok(Some(elem_list.remove(0).to_owned())); + } +} + +#[derive(Eq)] +struct RefEqualJsonWrapper<'a>(JsonRef<'a>); + +impl<'a> PartialEq for RefEqualJsonWrapper<'a> { + fn eq(&self, other: &Self) -> bool { + self.0.ref_eq(&other.0) + } +} + +impl<'a> std::hash::Hash for RefEqualJsonWrapper<'a> { + fn hash(&self, state: &mut H) { + self.0.value.as_ptr().hash(state) + } +} + +// append the elem_list vector, if the referenced json object doesn't exist +// unlike the append in std, this function **doesn't** set the `other` length to +// 0 +// +// To use this function, you have to ensure both `elem_list` and `other` are +// unique. +fn append_if_ref_unique<'a>(elem_list: &mut Vec>, other: &Vec>) { + elem_list.reserve(other.len()); + + let mut unique_verifier = HashSet::>::with_hasher(Default::default()); + for elem in elem_list.iter() { + unique_verifier.insert(RefEqualJsonWrapper(*elem)); + } + + for elem in other { + let elem = RefEqualJsonWrapper(*elem); + if !unique_verifier.contains(&elem) { + elem_list.push(elem.0); } - Ok(Some(Json::from_array( - elem_list.drain(..).map(|j| j.to_owned()).collect(), - )?)) } } @@ -45,18 +85,21 @@ pub fn extract_json<'a>(j: JsonRef<'a>, path_legs: &[PathLeg]) -> Result { if i as usize == 0 { - ret.append(&mut extract_json(j, sub_path_legs)?) + append_if_ref_unique(&mut ret, &extract_json(j, sub_path_legs)?) } } }, @@ -65,27 +108,36 @@ pub fn extract_json<'a>(j: JsonRef<'a>, path_legs: &[PathLeg]) -> Result { - ret.append(&mut extract_json(j, sub_path_legs)?); + append_if_ref_unique(&mut ret, &extract_json(j, sub_path_legs)?); match j.get_type() { JsonType::Array => { let elem_count = j.get_elem_count(); for k in 0..elem_count { - ret.append(&mut extract_json(j.array_get_elem(k)?, sub_path_legs)?) + append_if_ref_unique( + &mut ret, + &extract_json(j.array_get_elem(k)?, path_legs)?, + ) } } JsonType::Object => { let elem_count = j.get_elem_count(); for i in 0..elem_count { - ret.append(&mut extract_json(j.object_get_val(i)?, sub_path_legs)?) + append_if_ref_unique( + &mut ret, + &extract_json(j.object_get_val(i)?, path_legs)?, + ) } } _ => {} @@ -257,7 +309,7 @@ mod tests { legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("c"))], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], - Some("false"), + Some("[false]"), ), ( r#"[{"a": "a1", "b": 20.08, "c": false}, true]"#, @@ -265,7 +317,101 @@ mod tests { legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("c"))], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], - Some("false"), + Some("[false]"), + ), + ( + r#"[[0, 1], [2, 3], [4, [5, 6]]]"#, + vec![PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }], + Some("[[0, 1], 0, 1, 2, 3, 4, 5, 6]"), + ), + ( + r#"[[0, 1], [2, 3], [4, [5, 6]]]"#, + vec![ + PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }, + PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }, + ], + Some("[[0, 1], 0, 1, 2, 3, 4, 5, 6, [0, 1], 0, 1, 2, 3, 4, 5, 6]"), + ), + ( + "[1]", + vec![PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }], + Some("[1]"), + ), + ( + r#"{"a": 1}"#, + vec![PathExpression { + legs: vec![PathLeg::Key(String::from("a")), PathLeg::Index(0)], + flags: PathExpressionFlag::default(), + }], + Some("1"), + ), + ( + r#"{"a": 1}"#, + vec![PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }], + Some(r#"[{"a": 1}, 1]"#), + ), + ( + r#"{"a": 1}"#, + vec![PathExpression { + legs: vec![ + PathLeg::Index(0), + PathLeg::Index(0), + PathLeg::Index(0), + PathLeg::Key(String::from("a")), + ], + flags: PathExpressionFlag::default(), + }], + Some(r#"1"#), + ), + ( + r#"[1, [[{"x": [{"a":{"b":{"c":42}}}]}]]]"#, + vec![PathExpression { + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(String::from("a")), + PathLeg::Key(String::from("*")), + ], + flags: PATH_EXPRESSION_CONTAINS_ASTERISK + | PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }], + Some(r#"[{"c": 42}]"#), + ), + ( + r#"[{"a": [3,4]}, {"b": 2 }]"#, + vec![ + PathExpression { + legs: vec![PathLeg::Index(0), PathLeg::Key(String::from("a"))], + flags: PathExpressionFlag::default(), + }, + PathExpression { + legs: vec![PathLeg::Index(1), PathLeg::Key(String::from("a"))], + flags: PathExpressionFlag::default(), + }, + ], + Some("[[3, 4]]"), + ), + ( + r#"[{"a": [1,1,1,1]}]"#, + vec![PathExpression { + legs: vec![PathLeg::Index(0), PathLeg::Key(String::from("a"))], + flags: PathExpressionFlag::default(), + }], + Some("[1, 1, 1, 1]"), ), ]; for (i, (js, exprs, expected)) in test_cases.drain(..).enumerate() { @@ -276,11 +422,15 @@ mod tests { Some(es) => { let e = Json::from_str(es); assert!(e.is_ok(), "#{} expect parse json ok but got {:?}", i, e); - Some(e.unwrap()) + Some(e.unwrap().to_string()) } None => None, }; - let got = j.as_ref().extract(&exprs[..]).unwrap(); + let got = j + .as_ref() + .extract(&exprs[..]) + .unwrap() + .map(|got| got.to_string()); assert_eq!( got, expected, "#{} expect {:?}, but got {:?}", diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index 8967ab71eeb..480ac5db129 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -219,6 +219,19 @@ impl<'a> JsonRef<'a> { JsonType::String => false, } } + + // Returns whether the two JsonRef references to the same + // json object. + // + // As the JsonRef exists and holds the reference to the Json + // , the `Vec` inside the Json cannot be changed, so comparing + // the pointer is enough to represent the reference equality. + // + // PartialEq and PartialCmp have been implemented for JsonRef + // to compare the value. + pub(crate) fn ref_eq(&self, other: &JsonRef<'a>) -> bool { + std::ptr::eq(self.value, other.value) + } } /// Json implements type json used in tikv by Binary Json. From 7861f56f6249ea6b4cc19a6b2ba7d7dbd2a63c25 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 22 Aug 2022 23:56:20 -0700 Subject: [PATCH 0167/1149] raftstore-v2: support status query (#13300) ref tikv/tikv#12842 And as an example to show how to setup test case. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 16 ++- components/raftstore-v2/src/fsm/peer.rs | 33 +++++- components/raftstore-v2/src/fsm/store.rs | 8 +- components/raftstore-v2/src/lib.rs | 3 +- components/raftstore-v2/src/operation/mod.rs | 2 +- .../src/operation/{read => query}/local.rs | 0 .../raftstore-v2/src/operation/query/mod.rs | 82 ++++++++++++++ .../raftstore-v2/src/operation/read/mod.rs | 9 -- .../src/operation/ready/async_writer.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 4 +- components/raftstore-v2/src/raft/peer.rs | 32 +++++- components/raftstore-v2/src/router/message.rs | 66 ++++------- components/raftstore-v2/src/router/mod.rs | 2 +- .../src/router/response_channel.rs | 12 +- .../raftstore-v2/tests/integrations/mod.rs | 105 ++++++++++++++++-- .../tests/integrations/test_election.rs | 10 -- .../tests/integrations/test_status.rs | 50 +++++++++ components/tikv_util/src/config.rs | 2 + scripts/check-license | 2 +- 19 files changed, 334 insertions(+), 106 deletions(-) rename components/raftstore-v2/src/operation/{read => query}/local.rs (100%) create mode 100644 components/raftstore-v2/src/operation/query/mod.rs delete mode 100644 components/raftstore-v2/src/operation/read/mod.rs delete mode 100644 components/raftstore-v2/tests/integrations/test_election.rs create mode 100644 components/raftstore-v2/tests/integrations/test_status.rs diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index d4cba3d9381..d30490f50d5 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -34,7 +34,8 @@ use super::apply::{create_apply_batch_system, ApplyPollerBuilder, ApplyRouter, A use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate}, raft::Peer, - Error, PeerMsg, PeerTick, Result, StoreMsg, + router::{PeerMsg, PeerTick, StoreMsg}, + Error, Result, }; /// A per-thread context shared by the [`StoreFsm`] and multiple [`PeerFsm`]s. @@ -139,6 +140,9 @@ impl PollHandler F: FnOnce(&'a batch_system::Config), { + if self.store_msg_buf.capacity() == 0 || self.peer_msg_buf.capacity() == 0 { + self.apply_buf_capacity(); + } // Apply configuration changes. if let Some(cfg) = self.cfg_tracker.any_new().map(|c| c.clone()) { let last_messages_per_tick = self.messages_per_tick(); @@ -152,8 +156,9 @@ impl PollHandler Option { debug_assert!(self.store_msg_buf.is_empty()); - let received_cnt = fsm.recv(&mut self.store_msg_buf); - let expected_msg_count = if received_cnt == self.messages_per_tick() { + let batch_size = self.messages_per_tick(); + let received_cnt = fsm.recv(&mut self.store_msg_buf, batch_size); + let expected_msg_count = if received_cnt == batch_size { None } else { Some(0) @@ -165,8 +170,9 @@ impl PollHandler>) -> HandleResult { debug_assert!(self.peer_msg_buf.is_empty()); - let received_cnt = fsm.recv(&mut self.peer_msg_buf); - let handle_result = if received_cnt == self.messages_per_tick() { + let batch_size = self.messages_per_tick(); + let received_cnt = fsm.recv(&mut self.peer_msg_buf, batch_size); + let handle_result = if received_cnt == batch_size { HandleResult::KeepProcessing } else { HandleResult::stop_at(0, false) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 307da362330..886d8b2323a 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -13,9 +13,15 @@ use slog::{debug, error, info, trace, Logger}; use tikv_util::{ is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver, Sender}, + time::{duration_to_sec, Instant}, }; -use crate::{batch::StoreContext, raft::Peer, PeerMsg, PeerTick, Result}; +use crate::{ + batch::StoreContext, + raft::Peer, + router::{PeerMsg, PeerTick}, + Result, +}; pub type SenderFsmPair = (LooseBoundedSender, Box>); @@ -62,9 +68,9 @@ impl PeerFsm { /// capacity is reached or there is no more pending messages. /// /// Returns how many messages are fetched. - pub fn recv(&mut self, peer_msg_buf: &mut Vec) -> usize { + pub fn recv(&mut self, peer_msg_buf: &mut Vec, batch_size: usize) -> usize { let l = peer_msg_buf.len(); - for i in l..peer_msg_buf.capacity() { + for i in l..batch_size { match self.receiver.try_recv() { Ok(msg) => peer_msg_buf.push(msg), Err(e) => { @@ -75,7 +81,7 @@ impl PeerFsm { } } } - peer_msg_buf.capacity() - l + batch_size - l } } @@ -167,6 +173,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.schedule_tick(PeerTick::Raft); } + #[inline] + fn on_receive_command(&self, send_time: Instant) { + self.store_ctx + .raft_metrics + .propose_wait_time + .observe(duration_to_sec(send_time.saturating_elapsed()) as f64); + } + fn on_tick(&mut self, tick: PeerTick) { match tick { PeerTick::Raft => self.on_raft_tick(), @@ -187,8 +201,15 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, for msg in peer_msgs_buf.drain(..) { match msg { PeerMsg::RaftMessage(_) => unimplemented!(), - PeerMsg::RaftQuery(_) => unimplemented!(), - PeerMsg::RaftCommand(_) => unimplemented!(), + PeerMsg::RaftQuery(cmd) => { + self.on_receive_command(cmd.send_time); + self.on_query(cmd.request, cmd.ch) + } + PeerMsg::RaftCommand(cmd) => { + self.on_receive_command(cmd.send_time); + // self.on_command(cmd.cmd.request, cmd.ch) + unimplemented!() + } PeerMsg::Tick(tick) => self.on_tick(tick), PeerMsg::ApplyRes(res) => unimplemented!(), PeerMsg::Start => self.on_start(), diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index d80cd90d80b..61a3f76b138 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -8,7 +8,7 @@ use kvproto::metapb::Store; use raftstore::store::{Config, ReadDelegate}; use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; -use crate::{batch::StoreContext, tablet::CachedTablet, StoreMsg}; +use crate::{batch::StoreContext, router::StoreMsg, tablet::CachedTablet}; pub struct StoreMeta where @@ -53,15 +53,15 @@ impl StoreFsm { /// capacity is reached or there is no more pending messages. /// /// Returns how many messages are fetched. - pub fn recv(&self, store_msg_buf: &mut Vec) -> usize { + pub fn recv(&self, store_msg_buf: &mut Vec, batch_size: usize) -> usize { let l = store_msg_buf.len(); - for i in l..store_msg_buf.capacity() { + for i in l..batch_size { match self.receiver.try_recv() { Ok(msg) => store_msg_buf.push(msg), Err(_) => return i - l, } } - store_msg_buf.capacity() - l + batch_size - l } } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 43998160638..0b890d4a177 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -29,11 +29,10 @@ mod bootstrap; mod fsm; mod operation; mod raft; -mod router; +pub mod router; mod tablet; pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; pub use raftstore::{Error, Result}; -pub use router::{PeerMsg, PeerTick, StoreMsg, StoreTick}; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 583053dd551..c352ffe0cc1 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -mod read; +mod query; mod ready; pub use ready::AsyncWriter; diff --git a/components/raftstore-v2/src/operation/read/local.rs b/components/raftstore-v2/src/operation/query/local.rs similarity index 100% rename from components/raftstore-v2/src/operation/read/local.rs rename to components/raftstore-v2/src/operation/query/local.rs diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs new file mode 100644 index 00000000000..ff03117419b --- /dev/null +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -0,0 +1,82 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! There are two types of Query: KV read and status query. +//! +//! KV Read is implemented in local module and lease module (not implemented +//! yet). Read will be executed in callee thread if in lease, which is +//! implemented in local module. If lease is expired, it will extend the lease +//! first. Lease maintainance is implemented in lease module. +//! +//! Status query is implemented in the root module directly. + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse, StatusCmdType}; +use raftstore::{ + store::{cmd_resp, util, ReadCallback}, + Error, Result, +}; +use tikv_util::box_err; + +use crate::{ + fsm::PeerFsmDelegate, + raft::Peer, + router::{QueryResChannel, QueryResult}, +}; + +mod local; + +impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { + #[inline] + pub fn on_query(&mut self, req: RaftCmdRequest, ch: QueryResChannel) { + if !req.has_status_request() { + unimplemented!(); + } else { + self.fsm.peer_mut().on_query_status(&req, ch); + } + } +} + +impl Peer { + /// Status command is used to query target region information. + #[inline] + fn on_query_status(&mut self, req: &RaftCmdRequest, ch: QueryResChannel) { + let mut response = RaftCmdResponse::default(); + if let Err(e) = self.query_status(req, &mut response) { + cmd_resp::bind_error(&mut response, e); + } + ch.set_result(QueryResult::Response(response)); + } + + fn query_status(&mut self, req: &RaftCmdRequest, resp: &mut RaftCmdResponse) -> Result<()> { + util::check_store_id(req, self.peer().get_store_id())?; + let cmd_type = req.get_status_request().get_cmd_type(); + let status_resp = resp.mut_status_response(); + status_resp.set_cmd_type(cmd_type); + match cmd_type { + StatusCmdType::RegionLeader => { + if let Some(leader) = self.leader() { + status_resp.mut_region_leader().set_leader(leader); + } + } + StatusCmdType::RegionDetail => { + if !self.storage().is_initialized() { + let region_id = req.get_header().get_region_id(); + return Err(Error::RegionNotInitialized(region_id)); + } + status_resp + .mut_region_detail() + .set_region(self.region().clone()); + if let Some(leader) = self.leader() { + status_resp.mut_region_detail().set_leader(leader); + } + } + StatusCmdType::InvalidStatus => { + return Err(box_err!("{:?} invalid status command!", self.logger.list())); + } + } + + // Bind peer current term here. + cmd_resp::bind_term(resp, self.term()); + Ok(()) + } +} diff --git a/components/raftstore-v2/src/operation/read/mod.rs b/components/raftstore-v2/src/operation/read/mod.rs deleted file mode 100644 index efbe6af1a5a..00000000000 --- a/components/raftstore-v2/src/operation/read/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -//! There are two types of read: -//! - If the ReadDelegate is in the leader lease status, the read is operated -//! locally and need not to go through the raft layer (namely local read). -//! - Otherwise, redirect the request to the raftstore and proposed as a -//! RaftCommand in the raft layer. - -mod local; diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index 457df9307ba..e0b2a1c4802 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -16,7 +16,7 @@ use slog::{warn, Logger}; use crate::{ batch::{StoreContext, StoreRouter}, - PeerMsg, + router::PeerMsg, }; #[derive(Debug)] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 156ea55a414..1be4b0ee546 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -32,7 +32,7 @@ use crate::{ batch::StoreContext, fsm::{PeerFsm, PeerFsmDelegate}, raft::{Peer, Storage}, - PeerTick, + router::PeerTick, }; impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { @@ -95,7 +95,7 @@ impl Peer { ctx: &mut StoreContext, msg: eraftpb::Message, ) -> Option { - let to_peer = match self.get_peer_from_cache(msg.to) { + let to_peer = match self.peer_from_cache(msg.to) { Some(p) => p, None => { warn!(self.logger, "failed to look up recipient peer"; "to_peer" => msg.to); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index eb61d744774..6fd7b4b444c 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -88,14 +88,22 @@ impl Peer { None }; - Ok(Some(Peer { + let mut peer = Peer { raft_group: RawNode::new(&raft_cfg, s, &logger)?, tablet: CachedTablet::new(tablet), has_ready: false, async_writer: AsyncWriter::new(region_id, peer_id), logger, peer_cache: vec![], - })) + }; + + // If this region has only one peer and I am the one, campaign directly. + let region = peer.region(); + if region.get_peers().len() == 1 && region.get_peers()[0].get_store_id() == store_id { + peer.raft_group.campaign()?; + } + + Ok(Some(peer)) } #[inline] @@ -193,7 +201,7 @@ impl Peer { } #[inline] - pub fn get_peer_from_cache(&self, peer_id: u64) -> Option { + pub fn peer_from_cache(&self, peer_id: u64) -> Option { for p in self.raft_group.store().region().get_peers() { if p.get_id() == peer_id { return Some(p.clone()); @@ -210,6 +218,24 @@ impl Peer { self.raft_group.raft.state == StateRole::Leader } + #[inline] + pub fn leader_id(&self) -> u64 { + self.raft_group.raft.leader_id + } + + /// Get the leader peer meta. + /// + /// `None` is returned if there is no leader or the meta can't be found. + #[inline] + pub fn leader(&self) -> Option { + let leader_id = self.leader_id(); + if leader_id != 0 { + self.peer_from_cache(leader_id) + } else { + None + } + } + /// Term of the state machine. #[inline] pub fn term(&self) -> u64 { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 3f0dadaed04..72e6149d7ad 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -15,7 +15,7 @@ use raftstore::store::{ use tikv_util::time::Instant; use super::{ - response_channel::{CmdResChannel, QueryResChannel}, + response_channel::{CmdResChannel, CmdResSubscriber, QueryResChannel, QueryResSubscriber}, ApplyRes, }; @@ -95,49 +95,17 @@ impl StoreTick { } /// Command that can be handled by raftstore. -pub struct RaftRequest { +pub struct RaftRequest { pub send_time: Instant, pub request: RaftCmdRequest, + pub ch: C, } -impl RaftRequest { - pub fn new(request: RaftCmdRequest) -> Self { +impl RaftRequest { + pub fn new(request: RaftCmdRequest, ch: C) -> Self { RaftRequest { send_time: Instant::now(), request, - } - } -} - -/// A query that won't change any state. So it doesn't have to be replicated to -/// all replicas. -pub struct RaftQuery { - pub req: RaftRequest, - pub ch: QueryResChannel, -} - -impl RaftQuery { - #[inline] - pub fn new(request: RaftCmdRequest, ch: QueryResChannel) -> Self { - Self { - req: RaftRequest::new(request), - ch, - } - } -} - -/// Commands that change the inernal states. It will be transformed into logs -/// and reach consensus in the raft group. -pub struct RaftCommand { - pub cmd: RaftRequest, - pub ch: CmdResChannel, -} - -impl RaftCommand { - #[inline] - pub fn new(request: RaftCmdRequest, ch: CmdResChannel) -> Self { - Self { - cmd: RaftRequest::new(request), ch, } } @@ -149,12 +117,12 @@ pub enum PeerMsg { /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. RaftMessage(InspectedRaftMessage), - /// Read command only involves read operations, they are usually processed - /// using lease or read index. - RaftQuery(RaftQuery), - /// Proposal needs to be processed by all peers in a raft group. They will - /// be transformed into logs and be proposed by the leader peer. - RaftCommand(RaftCommand), + /// Query won't change any state. A typical query is KV read. In most cases, + /// it will be processed using lease or read index. + RaftQuery(RaftRequest), + /// Command changes the inernal states. It will be transformed into logs and + /// applied on all replicas. + RaftCommand(RaftRequest), /// Tick is periodical task. If target peer doesn't exist there is a /// potential that the raft node will not work anymore. Tick(PeerTick), @@ -172,6 +140,18 @@ pub enum PeerMsg { }, } +impl PeerMsg { + pub fn raft_query(req: RaftCmdRequest) -> (Self, QueryResSubscriber) { + let (ch, sub) = QueryResChannel::pair(); + (PeerMsg::RaftQuery(RaftRequest::new(req, ch)), sub) + } + + pub fn raft_command(req: RaftCmdRequest) -> (Self, CmdResSubscriber) { + let (ch, sub) = CmdResChannel::pair(); + (PeerMsg::RaftCommand(RaftRequest::new(req, ch)), sub) + } +} + impl fmt::Debug for PeerMsg { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 4a1df09fa44..17250833168 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -8,6 +8,6 @@ mod response_channel; pub(crate) use self::internal_message::ApplyTask; pub use self::{ internal_message::ApplyRes, - message::{PeerMsg, PeerTick, RaftCommand, RaftQuery, RaftRequest, StoreMsg, StoreTick}, + message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{CmdResChannel, QueryResChannel, QueryResult}, }; diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index fe84ae3c3ef..ae43bd07c25 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -200,11 +200,11 @@ impl<'a, Res> Future for WaitResult<'a, Res> { } } -pub struct CommandResultSubscriber { +pub struct CmdResSubscriber { core: Arc>, } -impl CommandResultSubscriber { +impl CmdResSubscriber { pub async fn wait_proposed(&mut self) -> bool { WaitEvent { event: CmdResChannel::PROPOSED_EVENT, @@ -226,8 +226,8 @@ impl CommandResultSubscriber { } } -unsafe impl Send for CommandResultSubscriber {} -unsafe impl Sync for CommandResultSubscriber {} +unsafe impl Send for CmdResSubscriber {} +unsafe impl Sync for CmdResSubscriber {} pub struct CmdResChannel { core: ManuallyDrop>>, @@ -239,7 +239,7 @@ impl CmdResChannel { const COMMITTED_EVENT: u64 = 2; #[inline] - pub fn pair() -> (Self, CommandResultSubscriber) { + pub fn pair() -> (Self, CmdResSubscriber) { let core = Arc::new(EventCore { event: AtomicU64::new(0), res: UnsafeCell::new(None), @@ -249,7 +249,7 @@ impl CmdResChannel { Self { core: ManuallyDrop::new(core.clone()), }, - CommandResultSubscriber { core }, + CmdResSubscriber { core }, ) } } diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index d93cd09fc62..d922020cbcb 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -8,9 +8,13 @@ #![allow(dead_code)] #![allow(unused_imports)] -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, +use std::{ + ops::{Deref, DerefMut}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, }; use crossbeam::channel::{self, Receiver, Sender}; @@ -20,18 +24,55 @@ use engine_test::{ raft::RaftTestEngine, }; use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; -use kvproto::{metapb::Store, raft_serverpb::RaftMessage}; +use futures::executor::block_on; +use kvproto::{ + metapb::Store, + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_serverpb::RaftMessage, +}; use pd_client::RpcClient; use raftstore::store::{Config, Transport, RAFT_INIT_LOG_INDEX}; -use raftstore_v2::{create_store_batch_system, Bootstrap, StoreRouter, StoreSystem}; +use raftstore_v2::{ + create_store_batch_system, + router::{PeerMsg, QueryResult}, + Bootstrap, StoreRouter, StoreSystem, +}; use slog::{o, Logger}; use tempfile::TempDir; use test_pd::mocker::Service; -use tikv_util::config::VersionTrack; +use tikv_util::config::{ReadableDuration, VersionTrack}; + +mod test_status; + +struct TestRouter(StoreRouter); + +impl Deref for TestRouter { + type Target = StoreRouter; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for TestRouter { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} -mod test_election; +impl TestRouter { + fn query(&self, region_id: u64, req: RaftCmdRequest) -> Option { + let (msg, sub) = PeerMsg::raft_query(req); + self.send(region_id, msg).unwrap(); + block_on(sub.result()) + } -type TestRouter = StoreRouter; + fn command(&self, region_id: u64, req: RaftCmdRequest) -> Option { + let (msg, sub) = PeerMsg::raft_command(req); + self.send(region_id, msg).unwrap(); + block_on(sub.result()) + } +} struct TestNode { _pd_server: test_pd::Server, @@ -41,6 +82,7 @@ struct TestNode { raft_engine: Option, factory: Option>, system: Option>, + cfg: Option>>, logger: Logger, } @@ -93,13 +135,14 @@ impl TestNode { raft_engine: Some(raft_engine), factory: Some(factory), system: None, + cfg: None, logger, } } fn start( &mut self, - cfg: &Arc>, + cfg: Arc>, trans: impl Transport + 'static, ) -> TestRouter { let (router, mut system) = create_store_batch_system::( @@ -117,8 +160,13 @@ impl TestNode { &router, ) .unwrap(); + self.cfg = Some(cfg); self.system = Some(system); - router + TestRouter(router) + } + + fn config(&self) -> &Arc> { + self.cfg.as_ref().unwrap() } fn stop(&mut self) { @@ -165,10 +213,43 @@ impl Transport for TestTransport { } } +// TODO: remove following when we finally integrate it in tikv-server binary. +fn v2_default_config() -> Config { + let mut config = Config::default(); + config.store_io_pool_size = 1; + config +} + +/// Disable all ticks, so test case can schedule manually. +fn disable_all_auto_ticks(cfg: &mut Config) { + cfg.raft_base_tick_interval = ReadableDuration::ZERO; + cfg.raft_log_gc_tick_interval = ReadableDuration::ZERO; + cfg.raft_log_compact_sync_interval = ReadableDuration::ZERO; + cfg.raft_engine_purge_interval = ReadableDuration::ZERO; + cfg.split_region_check_tick_interval = ReadableDuration::ZERO; + cfg.region_compact_check_interval = ReadableDuration::ZERO; + cfg.pd_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.pd_store_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.snap_mgr_gc_tick_interval = ReadableDuration::ZERO; + cfg.lock_cf_compact_interval = ReadableDuration::ZERO; + cfg.peer_stale_state_check_interval = ReadableDuration::ZERO; + cfg.consistency_check_interval = ReadableDuration::ZERO; + cfg.report_region_flow_interval = ReadableDuration::ZERO; + cfg.check_leader_lease_interval = ReadableDuration::ZERO; + cfg.merge_check_tick_interval = ReadableDuration::ZERO; + cfg.cleanup_import_sst_interval = ReadableDuration::ZERO; + cfg.inspect_interval = ReadableDuration::ZERO; + cfg.report_min_resolved_ts_interval = ReadableDuration::ZERO; + cfg.reactive_memory_lock_tick_interval = ReadableDuration::ZERO; + cfg.report_region_buckets_tick_interval = ReadableDuration::ZERO; + cfg.check_long_uncommitted_interval = ReadableDuration::ZERO; +} + fn setup_default_cluster() -> (TestNode, Receiver, TestRouter) { let mut node = TestNode::new(); - let cfg = Default::default(); + let mut cfg = v2_default_config(); + disable_all_auto_ticks(&mut cfg); let (tx, rx) = new_test_transport(); - let router = node.start(&cfg, tx); + let router = node.start(Arc::new(VersionTrack::new(cfg)), tx); (node, rx, router) } diff --git a/components/raftstore-v2/tests/integrations/test_election.rs b/components/raftstore-v2/tests/integrations/test_election.rs deleted file mode 100644 index cf3a0cc4906..00000000000 --- a/components/raftstore-v2/tests/integrations/test_election.rs +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use raftstore_v2::PeerMsg; - -// TODO: finish test case when callback is added. -#[test] -fn test_smoke() { - let (_node, _transport, router) = super::setup_default_cluster(); - router.send(2, PeerMsg::Noop).unwrap(); -} diff --git a/components/raftstore-v2/tests/integrations/test_status.rs b/components/raftstore-v2/tests/integrations/test_status.rs new file mode 100644 index 00000000000..7b0d71c9589 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_status.rs @@ -0,0 +1,50 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::assert_matches::assert_matches; + +use futures::executor::block_on; +use kvproto::raft_cmdpb::{RaftCmdRequest, StatusCmdType}; +use raftstore::store::util::new_peer; +use raftstore_v2::router::{PeerMsg, PeerTick, QueryResChannel, QueryResult, RaftRequest}; + +#[test] +fn test_status() { + let (_node, _transport, router) = super::setup_default_cluster(); + // When there is only one peer, it should campaign immediately. + let mut req = RaftCmdRequest::default(); + req.mut_header().set_peer(new_peer(1, 3)); + req.mut_status_request() + .set_cmd_type(StatusCmdType::RegionLeader); + let res = router.query(2, req.clone()).unwrap(); + let status_resp = res.response().unwrap().get_status_response(); + assert_eq!( + *status_resp.get_region_leader().get_leader(), + new_peer(1, 3) + ); + + req.mut_status_request() + .set_cmd_type(StatusCmdType::RegionDetail); + let res = router.query(2, req.clone()).unwrap(); + let status_resp = res.response().unwrap().get_status_response(); + let detail = status_resp.get_region_detail(); + assert_eq!(*detail.get_leader(), new_peer(1, 3)); + let region = detail.get_region(); + assert_eq!(region.get_id(), 2); + assert!(region.get_start_key().is_empty()); + assert!(region.get_end_key().is_empty()); + assert_eq!(*region.get_peers(), vec![new_peer(1, 3)]); + assert_eq!(region.get_region_epoch().get_version(), 1); + assert_eq!(region.get_region_epoch().get_conf_ver(), 1); + + // Invalid store id should return error. + req.mut_header().mut_peer().set_store_id(4); + let res = router.query(2, req).unwrap(); + let resp = res.response().unwrap(); + assert!( + resp.get_header().get_error().has_store_not_match(), + "{:?}", + resp + ); + + // TODO: add a peer then check for region change and leadership change. +} diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 7e9f22dcb01..e11a4799bc0 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -391,6 +391,8 @@ impl FromStr for ReadableDuration { } impl ReadableDuration { + pub const ZERO: ReadableDuration = ReadableDuration(Duration::ZERO); + pub const fn micros(micros: u64) -> ReadableDuration { ReadableDuration(Duration::from_micros(micros)) } diff --git a/scripts/check-license b/scripts/check-license index 0b35ef67177..c22e712780f 100755 --- a/scripts/check-license +++ b/scripts/check-license @@ -2,7 +2,7 @@ # Check all source files have a license header. set -euo pipefail -for i in $(git ls-files | grep "\.rs"); do +for i in $(git ls-files -o --exclude-standard | grep "\.rs"); do # first line -> match -> print line -> quit matches=$(sed -n "1{/Copyright [0-9]\{4\} TiKV Project Authors. Licensed under Apache-2.0./p;};q;" $i) if [ -z "${matches}" ]; then From 6dcd0b3d66945583a133e60838533a5d12b3a487 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Tue, 23 Aug 2022 15:28:20 +0800 Subject: [PATCH 0168/1149] raftstore: Implement coprocessor observer pre(post)_apply_snapshot (#12889) ref tikv/tikv#12849 Support new observers pre(post)_apply_snapshot. Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/dispatcher.rs | 97 +++++++++- components/raftstore/src/coprocessor/mod.rs | 29 +++ components/raftstore/src/store/mod.rs | 2 +- .../raftstore/src/store/peer_storage.rs | 1 + .../raftstore/src/store/worker/region.rs | 175 +++++++++++++++--- 5 files changed, 278 insertions(+), 26 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 7eea973997b..d2c4e14567a 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -506,6 +506,50 @@ impl CoprocessorHost { ); } + pub fn should_pre_apply_snapshot(&self) -> bool { + for observer in &self.registry.apply_snapshot_observers { + let observer = observer.observer.inner(); + if observer.should_pre_apply_snapshot() { + return true; + } + } + false + } + + pub fn pre_apply_snapshot( + &self, + region: &Region, + peer_id: u64, + snap_key: &crate::store::SnapKey, + snap: Option<&crate::store::Snapshot>, + ) { + loop_ob!( + region, + &self.registry.apply_snapshot_observers, + pre_apply_snapshot, + peer_id, + snap_key, + snap, + ); + } + + pub fn post_apply_snapshot( + &self, + region: &Region, + peer_id: u64, + snap_key: &crate::store::SnapKey, + snap: Option<&crate::store::Snapshot>, + ) { + loop_ob!( + region, + &self.registry.apply_snapshot_observers, + post_apply_snapshot, + peer_id, + snap_key, + snap, + ); + } + pub fn new_split_checker_host<'a>( &'a self, region: &Region, @@ -648,7 +692,10 @@ mod tests { }; use tikv_util::box_err; - use crate::coprocessor::*; + use crate::{ + coprocessor::*, + store::{SnapKey, Snapshot}, + }; #[derive(Clone, Default)] struct TestCoprocessor { @@ -675,6 +722,9 @@ mod tests { PostExecQuery = 17, PostExecAdmin = 18, OnComputeEngineSize = 19, + PreApplySnapshot = 20, + PostApplySnapshot = 21, + ShouldPreApplySnapshot = 22, } impl Coprocessor for TestCoprocessor {} @@ -842,6 +892,38 @@ mod tests { .fetch_add(ObserverIndex::ApplySst as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } + + fn pre_apply_snapshot( + &self, + ctx: &mut ObserverContext<'_>, + _: u64, + _: &SnapKey, + _: Option<&Snapshot>, + ) { + self.called + .fetch_add(ObserverIndex::PreApplySnapshot as usize, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + } + + fn post_apply_snapshot( + &self, + ctx: &mut ObserverContext<'_>, + _: u64, + _: &crate::store::SnapKey, + _: Option<&Snapshot>, + ) { + self.called + .fetch_add(ObserverIndex::PostApplySnapshot as usize, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + } + + fn should_pre_apply_snapshot(&self) -> bool { + self.called.fetch_add( + ObserverIndex::ShouldPreApplySnapshot as usize, + Ordering::SeqCst, + ); + false + } } impl CmdObserver for TestCoprocessor { @@ -986,6 +1068,19 @@ mod tests { host.post_exec(®ion, &cmd, &apply_state, ®ion_state, &mut info); index += ObserverIndex::PostExecQuery as usize; assert_all!([&ob.called], &[index]); + + let key = SnapKey::new(region.get_id(), 1, 1); + host.pre_apply_snapshot(®ion, 0, &key, None); + index += ObserverIndex::PreApplySnapshot as usize; + assert_all!([&ob.called], &[index]); + + host.post_apply_snapshot(®ion, 0, &key, None); + index += ObserverIndex::PostApplySnapshot as usize; + assert_all!([&ob.called], &[index]); + + host.should_pre_apply_snapshot(); + index += ObserverIndex::ShouldPreApplySnapshot as usize; + assert_all!([&ob.called], &[index]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index fcbfcfc98ff..cc6bfb91b06 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -179,6 +179,35 @@ pub trait ApplySnapshotObserver: Coprocessor { /// Hook to call after applying sst file. Currently the content of the /// snapshot can't be passed to the observer. fn apply_sst(&self, _: &mut ObserverContext<'_>, _: CfName, _path: &str) {} + + /// Hook when receiving Task::Apply. + /// Should pass valid snapshot, the option is only for testing. + /// Notice that we can call `pre_apply_snapshot` to multiple snapshots at + /// the same time. + fn pre_apply_snapshot( + &self, + _: &mut ObserverContext<'_>, + _peer_id: u64, + _: &crate::store::SnapKey, + _: Option<&crate::store::Snapshot>, + ) { + } + + /// Hook when the whole snapshot is applied. + /// Should pass valid snapshot, the option is only for testing. + fn post_apply_snapshot( + &self, + _: &mut ObserverContext<'_>, + _: u64, + _: &crate::store::SnapKey, + _snapshot: Option<&crate::store::Snapshot>, + ) { + } + + /// We call pre_apply_snapshot only when one of the observer returns true. + fn should_pre_apply_snapshot(&self) -> bool { + false + } } /// SplitChecker is invoked during a split check scan, and decides to use diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index b5a35461728..878c7c3b9f8 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -21,7 +21,7 @@ mod peer_storage; mod read_queue; mod region_snapshot; mod replication_mode; -mod snap; +pub mod snap; mod txn_ext; mod worker; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 129dac6dbb5..4a36f385648 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -827,6 +827,7 @@ where let task = RegionTask::Apply { region_id: self.get_region_id(), status, + peer_id: self.peer_id, }; // Don't schedule the snapshot to region worker. diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index d15e40e6f5e..f167a2c90bf 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -84,6 +84,7 @@ pub enum Task { Apply { region_id: u64, status: Arc, + peer_id: u64, }, /// Destroy data between [start_key, end_key). /// @@ -347,13 +348,9 @@ where .observe(start.saturating_elapsed_secs()); } - /// Applies snapshot data of the Region. - fn apply_snap(&mut self, region_id: u64, abort: Arc) -> Result<()> { - info!("begin apply snap data"; "region_id" => region_id); - fail_point!("region_apply_snap", |_| { Ok(()) }); - check_abort(&abort)?; + fn region_state(&self, region_id: u64) -> Result { let region_key = keys::region_state_key(region_id); - let mut region_state: RegionLocalState = + let region_state: RegionLocalState = match box_try!(self.engine.get_msg_cf(CF_RAFT, ®ion_key)) { Some(state) => state, None => { @@ -363,6 +360,31 @@ where )); } }; + Ok(region_state) + } + + fn apply_state(&self, region_id: u64) -> Result { + let state_key = keys::apply_state_key(region_id); + let apply_state: RaftApplyState = + match box_try!(self.engine.get_msg_cf(CF_RAFT, &state_key)) { + Some(state) => state, + None => { + return Err(box_err!( + "failed to get apply_state from {}", + log_wrappers::Value::key(&state_key) + )); + } + }; + Ok(apply_state) + } + + /// Applies snapshot data of the Region. + fn apply_snap(&mut self, region_id: u64, peer_id: u64, abort: Arc) -> Result<()> { + info!("begin apply snap data"; "region_id" => region_id, "peer_id" => peer_id); + fail_point!("region_apply_snap", |_| { Ok(()) }); + check_abort(&abort)?; + let region_key = keys::region_state_key(region_id); + let mut region_state = self.region_state(region_id)?; // clear up origin data. let region = region_state.get_region().clone(); @@ -382,17 +404,8 @@ where check_abort(&abort)?; fail_point!("apply_snap_cleanup_range"); - let state_key = keys::apply_state_key(region_id); - let apply_state: RaftApplyState = - match box_try!(self.engine.get_msg_cf(CF_RAFT, &state_key)) { - Some(state) => state, - None => { - return Err(box_err!( - "failed to get raftstate from {}", - log_wrappers::Value::key(&state_key) - )); - } - }; + let apply_state = self.apply_state(region_id)?; + let term = apply_state.get_truncated_state().get_term(); let idx = apply_state.get_truncated_state().get_index(); let snap_key = SnapKey::new(region_id, term, idx); @@ -408,12 +421,14 @@ where let timer = Instant::now(); let options = ApplyOptions { db: self.engine.clone(), - region, + region: region.clone(), abort: Arc::clone(&abort), write_batch_size: self.batch_size, coprocessor_host: self.coprocessor_host.clone(), }; s.apply(options)?; + self.coprocessor_host + .post_apply_snapshot(®ion, peer_id, &snap_key, Some(&s)); let mut wb = self.engine.write_batch(); region_state.set_state(PeerState::Normal); @@ -432,7 +447,7 @@ where /// Tries to apply the snapshot of the specified Region. It calls /// `apply_snap` to do the actual work. - fn handle_apply(&mut self, region_id: u64, status: Arc) { + fn handle_apply(&mut self, region_id: u64, peer_id: u64, status: Arc) { let _ = status.compare_exchange( JOB_STATUS_PENDING, JOB_STATUS_RUNNING, @@ -444,7 +459,7 @@ where // let timer = apply_histogram.start_coarse_timer(); let start = Instant::now(); - match self.apply_snap(region_id, Arc::clone(&status)) { + match self.apply_snap(region_id, peer_id, Arc::clone(&status)) { Ok(()) => { status.swap(JOB_STATUS_FINISHED, Ordering::SeqCst); SNAP_COUNTER.apply.success.inc(); @@ -627,6 +642,46 @@ where Ok(()) } + + /// Calls observer `pre_apply_snapshot` for every task. + /// Multiple task can be `pre_apply_snapshot` at the same time. + fn pre_apply_snapshot(&self, task: &Task) -> Result<()> { + let (region_id, abort, peer_id) = match task { + Task::Apply { + region_id, + status, + peer_id, + } => (region_id, status.clone(), peer_id), + _ => panic!("invalid apply snapshot task"), + }; + + let region_state = self.region_state(*region_id)?; + let apply_state = self.apply_state(*region_id)?; + + check_abort(&abort)?; + + let term = apply_state.get_truncated_state().get_term(); + let idx = apply_state.get_truncated_state().get_index(); + let snap_key = SnapKey::new(*region_id, term, idx); + let s = box_try!(self.mgr.get_snapshot_for_applying(&snap_key)); + if !s.exists() { + self.coprocessor_host.pre_apply_snapshot( + region_state.get_region(), + *peer_id, + &snap_key, + None, + ); + return Err(box_err!("missing snapshot file {}", s.path())); + } + check_abort(&abort)?; + self.coprocessor_host.pre_apply_snapshot( + region_state.get_region(), + *peer_id, + &snap_key, + Some(&s), + ); + Ok(()) + } } pub struct Runner @@ -692,8 +747,13 @@ where if self.ctx.ingest_maybe_stall() { break; } - if let Some(Task::Apply { region_id, status }) = self.pending_applies.pop_front() { - self.ctx.handle_apply(region_id, status); + if let Some(Task::Apply { + region_id, + status, + peer_id, + }) = self.pending_applies.pop_front() + { + self.ctx.handle_apply(region_id, peer_id, status); } } } @@ -763,6 +823,9 @@ where } task @ Task::Apply { .. } => { fail_point!("on_region_worker_apply", true, |_| {}); + if self.ctx.coprocessor_host.should_pre_apply_snapshot() { + let _ = self.ctx.pre_apply_snapshot(&task); + } // to makes sure applying snapshots in order. self.pending_applies.push_back(task); self.handle_pending_applies(); @@ -836,7 +899,10 @@ mod tests { use super::*; use crate::{ - coprocessor::CoprocessorHost, + coprocessor::{ + ApplySnapshotObserver, BoxApplySnapshotObserver, Coprocessor, CoprocessorHost, + ObserverContext, + }, store::{ peer_storage::JOB_STATUS_PENDING, snap::tests::get_test_db_for_regions, worker::RegionRunner, CasualMessage, SnapKey, SnapManager, @@ -989,6 +1055,10 @@ mod tests { .prefix("test_pending_applies") .tempdir() .unwrap(); + let obs = MockApplySnapshotObserver::default(); + let mut host = CoprocessorHost::::default(); + host.registry + .register_apply_snapshot_observer(1, BoxApplySnapshotObserver::new(obs.clone())); let mut cf_opts = CfOptions::new(); cf_opts.set_level_zero_slowdown_writes_trigger(5); @@ -1043,7 +1113,7 @@ mod tests { 0, true, 2, - CoprocessorHost::::default(), + host, router, Option::>::None, ); @@ -1104,6 +1174,7 @@ mod tests { .schedule(Task::Apply { region_id: id, status, + peer_id: 1, }) .unwrap(); }; @@ -1170,6 +1241,12 @@ mod tests { ); wait_apply_finish(&[1]); + assert_eq!(obs.pre_apply_count.load(Ordering::SeqCst), 1); + assert_eq!(obs.post_apply_count.load(Ordering::SeqCst), 1); + assert_eq!( + obs.pre_apply_hash.load(Ordering::SeqCst), + obs.post_apply_hash.load(Ordering::SeqCst) + ); // the pending apply task should be finished and snapshots are ingested. // note that when ingest sst, it may flush memtable if overlap, @@ -1276,4 +1353,54 @@ mod tests { thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); assert!(!check_region_exist(6)); } + + #[derive(Clone, Default)] + struct MockApplySnapshotObserver { + pub pre_apply_count: Arc, + pub post_apply_count: Arc, + pub pre_apply_hash: Arc, + pub post_apply_hash: Arc, + } + + impl Coprocessor for MockApplySnapshotObserver {} + + impl ApplySnapshotObserver for MockApplySnapshotObserver { + fn pre_apply_snapshot( + &self, + _: &mut ObserverContext<'_>, + peer_id: u64, + key: &crate::store::SnapKey, + snapshot: Option<&crate::store::Snapshot>, + ) { + let code = snapshot.unwrap().total_size().unwrap() + + key.term + + key.region_id + + key.idx + + peer_id; + self.pre_apply_count.fetch_add(1, Ordering::SeqCst); + self.pre_apply_hash + .fetch_add(code as usize, Ordering::SeqCst); + } + + fn post_apply_snapshot( + &self, + _: &mut ObserverContext<'_>, + peer_id: u64, + key: &crate::store::SnapKey, + snapshot: Option<&crate::store::Snapshot>, + ) { + let code = snapshot.unwrap().total_size().unwrap() + + key.term + + key.region_id + + key.idx + + peer_id; + self.post_apply_count.fetch_add(1, Ordering::SeqCst); + self.post_apply_hash + .fetch_add(code as usize, Ordering::SeqCst); + } + + fn should_pre_apply_snapshot(&self) -> bool { + true + } + } } From 7cc8a39e316eae3e07276fc1f42b1a21fc8964de Mon Sep 17 00:00:00 2001 From: ystaticy Date: Wed, 24 Aug 2022 18:02:22 +0800 Subject: [PATCH 0169/1149] Divided Gc metrics in different labels by keymode (#12853) ref tikv/tikv#12852 Signed-off-by: ystaticy Co-authored-by: Ping Yu --- components/tikv_kv/src/metrics.rs | 8 + metrics/grafana/tikv_details.json | 48 +-- src/server/gc_worker/compaction_filter.rs | 92 +++-- src/server/gc_worker/gc_worker.rs | 241 +++++++----- src/server/gc_worker/mod.rs | 10 +- .../gc_worker/rawkv_compaction_filter.rs | 53 ++- src/server/metrics.rs | 2 +- src/storage/mvcc/metrics.rs | 6 +- src/storage/mvcc/txn.rs | 10 +- src/storage/txn/actions/gc.rs | 11 +- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_gc_metrics.rs | 364 ++++++++++++++++++ 12 files changed, 667 insertions(+), 179 deletions(-) create mode 100644 tests/failpoints/cases/test_gc_metrics.rs diff --git a/components/tikv_kv/src/metrics.rs b/components/tikv_kv/src/metrics.rs index 3b63c4ab1a3..c57e4dcf496 100644 --- a/components/tikv_kv/src/metrics.rs +++ b/components/tikv_kv/src/metrics.rs @@ -3,6 +3,13 @@ use prometheus_static_metric::*; make_auto_flush_static_metric! { + pub label_enum GcKeyMode { + // The enum 'txn' contains both TiDB and TxnKV scenarios statistics, + // as they have the same storage format, and use the same GC procedures. + txn, + raw, + } + pub label_enum GcKeysCF { default, lock, @@ -25,6 +32,7 @@ make_auto_flush_static_metric! { } pub struct GcKeysCounterVec: LocalIntCounter { + "key_mode" => GcKeyMode, "cf" => GcKeysCF, "tag" => GcKeysDetail, } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b47c226cb02..d871603c134 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -22194,10 +22194,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_mvcc_gc_delete_versions_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_storage_mvcc_gc_delete_versions_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "keys/s", + "legendFormat": "{{key_mode}}_keys/s", "refId": "E" } ], @@ -22555,70 +22555,70 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gc_compaction_filtered{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filtered{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by {key_mode}", "format": "time_series", "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "filtered", + "legendFormat": "{{key_mode}}_filtered", "metric": "tikv_storage_command_total", "refId": "A", "step": 4 }, { - "expr": "sum(rate(tikv_gc_compaction_filter_skip{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filter_skip{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "skipped", + "legendFormat": "{{key_mode}}_skipped", "refId": "B" }, { - "expr": "sum(rate(tikv_gc_compaction_mvcc_rollback{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_mvcc_rollback{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "mvcc-rollback/mvcc-lock", + "legendFormat": "{{key_mode}}_mvcc-rollback/mvcc-lock", "refId": "C" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_orphan_versions{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filter_orphan_versions{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "orphan-versions", + "legendFormat": "{{key_mode}}_orphan-versions", "refId": "D" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_perform{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filter_perform{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "performed-times", + "legendFormat": "{{key_mode}}_performed-times", "refId": "E" }, { - "expr": "sum(rate(tikv_gc_compaction_failure{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(rate(tikv_gc_compaction_failure{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode,type)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "failure-{{type}}", + "legendFormat": "{{key_mode}}_failure-{{type}}", "refId": "F" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_met{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_met{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "mvcc-deletion-met", + "legendFormat": "{{key_mode}}_mvcc-deletion-met", "refId": "G" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_handled{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_handled{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "mvcc-deletion-handled", + "legendFormat": "{{key_mode}}_mvcc-deletion-handled", "refId": "H" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_wasted{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_wasted{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "mvcc-deletion-wasted", + "legendFormat": "{{key_mode}}_mvcc-deletion-wasted", "refId": "I" } ], @@ -22708,10 +22708,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"write\"}[1m])) by (tag)", + "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"write\"}[1m])) by (key_mode,tag)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{key_mode}}_{{tag}}", "refId": "A" } ], @@ -22801,10 +22801,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"default\"}[1m])) by (tag)", + "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"default\"}[1m])) by (key_mode,tag)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{key_mode}}_{{tag}}", "refId": "A" } ], diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index ef190f4760e..23f007eb8be 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -34,7 +34,7 @@ use tikv_util::{ use txn_types::{Key, TimeStamp, WriteRef, WriteType}; use crate::{ - server::gc_worker::{GcConfig, GcTask, GcWorkerConfigManager}, + server::gc_worker::{GcConfig, GcTask, GcWorkerConfigManager, STAT_TXN_KEYMODE}, storage::mvcc::{GC_DELETE_VERSIONS_HISTOGRAM, MVCC_VERSIONS_HISTOGRAM}, }; @@ -69,62 +69,73 @@ lazy_static! { pub static ref GC_CONTEXT: Mutex> = Mutex::new(None); // Filtered keys in `WriteCompactionFilter::filter_v2`. - pub static ref GC_COMPACTION_FILTERED: IntCounter = register_int_counter!( + pub static ref GC_COMPACTION_FILTERED: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filtered", - "Filtered versions by compaction" + "Filtered versions by compaction", + &["key_mode"] ) .unwrap(); // A counter for errors met by `WriteCompactionFilter`. + //TODO: Add test case to check the correctness of GC_COMPACTION_FAILURE pub static ref GC_COMPACTION_FAILURE: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_failure", "Compaction filter meets failure", - &["type"] + &["key_mode", "type"] ) .unwrap(); // A counter for skip performing GC in compactions. - static ref GC_COMPACTION_FILTER_SKIP: IntCounter = register_int_counter!( + pub static ref GC_COMPACTION_FILTER_SKIP: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filter_skip", - "Skip to create compaction filter for GC because of table properties" + "Skip to create compaction filter for GC because of table properties", + &["key_mode"] ) .unwrap(); - static ref GC_COMPACTION_FILTER_PERFORM: IntCounter = register_int_counter!( + pub static ref GC_COMPACTION_FILTER_PERFORM: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filter_perform", - "perfrom GC in compaction filter" + "perfrom GC in compaction filter", + &["key_mode"] ) .unwrap(); // `WriteType::Rollback` and `WriteType::Lock` are handled in different ways. - pub static ref GC_COMPACTION_MVCC_ROLLBACK: IntCounter = register_int_counter!( + //TODO: Add test case to check the correctness of GC_COMPACTION_MVCC_ROLLBACK + pub static ref GC_COMPACTION_MVCC_ROLLBACK: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_mvcc_rollback", - "Compaction of mvcc rollbacks" + "Compaction of mvcc rollbacks", + &["key_mode"] ) .unwrap(); + //TODO: Add test case to check the correctness of GC_COMPACTION_FILTER_ORPHAN_VERSIONS pub static ref GC_COMPACTION_FILTER_ORPHAN_VERSIONS: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filter_orphan_versions", "Compaction filter orphan versions for default CF", - &["tag"] + &["key_mode", "tag"] ).unwrap(); /// Counter of mvcc deletions met in compaction filter. - pub static ref GC_COMPACTION_FILTER_MVCC_DELETION_MET: IntCounter = register_int_counter!( + pub static ref GC_COMPACTION_FILTER_MVCC_DELETION_MET: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filter_mvcc_deletion_met", - "MVCC deletion from compaction filter met" + "MVCC deletion from compaction filter met", + &["key_mode"] ).unwrap(); /// Counter of mvcc deletions handled in gc worker. - pub static ref GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED: IntCounter = register_int_counter!( + pub static ref GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filter_mvcc_deletion_handled", - "MVCC deletion from compaction filter handled" + "MVCC deletion from compaction filter handled", + &["key_mode"] ) .unwrap(); /// Mvcc deletions sent to gc worker can have already been cleared, in which case resources are /// wasted to seek them. - pub static ref GC_COMPACTION_FILTER_MVCC_DELETION_WASTED: IntCounter = register_int_counter!( + //TODO: Add test case to check the correctness of GC_COMPACTION_FILTER_MVCC_DELETION_WASTED + pub static ref GC_COMPACTION_FILTER_MVCC_DELETION_WASTED: IntCounterVec = register_int_counter_vec!( "tikv_gc_compaction_filter_mvcc_deletion_wasted", - "MVCC deletion from compaction filter wasted" + "MVCC deletion from compaction filter wasted", + &["key_mode"] ).unwrap(); } @@ -236,11 +247,14 @@ impl CompactionFilterFactory for WriteCompactionFilterFactory { return std::ptr::null_mut(); } drop(gc_context_option); - - GC_COMPACTION_FILTER_PERFORM.inc(); + GC_COMPACTION_FILTER_PERFORM + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc(); if !check_need_gc(safe_point.into(), ratio_threshold, context) { debug!("skip gc in compaction filter because it's not necessary"); - GC_COMPACTION_FILTER_SKIP.inc(); + GC_COMPACTION_FILTER_SKIP + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc(); return std::ptr::null_mut(); } @@ -289,8 +303,8 @@ struct WriteCompactionFilter { total_filtered: usize, mvcc_rollback_and_locks: usize, orphan_versions: usize, - versions_hist: LocalHistogram, - filtered_hist: LocalHistogram, + versions_hist: LocalHistogramVec, + filtered_hist: LocalHistogramVec, #[cfg(any(test, feature = "failpoints"))] callbacks_on_drop: Vec>, @@ -351,10 +365,14 @@ impl WriteCompactionFilter { } match e { ScheduleError::Full(_) => { - GC_COMPACTION_FAILURE.with_label_values(&["full"]).inc(); + GC_COMPACTION_FAILURE + .with_label_values(&[STAT_TXN_KEYMODE, "full"]) + .inc(); } ScheduleError::Stopped(_) => { - GC_COMPACTION_FAILURE.with_label_values(&["stopped"]).inc(); + GC_COMPACTION_FAILURE + .with_label_values(&[STAT_TXN_KEYMODE, "stopped"]) + .inc(); } } } @@ -423,7 +441,9 @@ impl WriteCompactionFilter { self.remove_older = true; if self.is_bottommost_level { self.mvcc_deletion_overlaps = Some(0); - GC_COMPACTION_FILTER_MVCC_DELETION_MET.inc(); + GC_COMPACTION_FILTER_MVCC_DELETION_MET + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc(); } } } @@ -503,22 +523,30 @@ impl WriteCompactionFilter { fn switch_key_metrics(&mut self) { if self.versions != 0 { - self.versions_hist.observe(self.versions as f64); + self.versions_hist + .with_label_values(&[STAT_TXN_KEYMODE]) + .observe(self.versions as f64); self.total_versions += self.versions; self.versions = 0; } if self.filtered != 0 { - self.filtered_hist.observe(self.filtered as f64); + self.filtered_hist + .with_label_values(&[STAT_TXN_KEYMODE]) + .observe(self.filtered as f64); self.total_filtered += self.filtered; self.filtered = 0; } } fn flush_metrics(&self) { - GC_COMPACTION_FILTERED.inc_by(self.total_filtered as u64); - GC_COMPACTION_MVCC_ROLLBACK.inc_by(self.mvcc_rollback_and_locks as u64); + GC_COMPACTION_FILTERED + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc_by(self.total_filtered as u64); + GC_COMPACTION_MVCC_ROLLBACK + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc_by(self.mvcc_rollback_and_locks as u64); GC_COMPACTION_FILTER_ORPHAN_VERSIONS - .with_label_values(&["generated"]) + .with_label_values(&[STAT_TXN_KEYMODE, "generated"]) .inc_by(self.orphan_versions as u64); if let Some((versions, filtered)) = STATS.with(|stats| { stats.versions.update(|x| x + self.total_versions); @@ -609,7 +637,9 @@ impl CompactionFilter for WriteCompactionFilter { Ok(decision) => decision, Err(e) => { warn!("compaction filter meet error: {}", e); - GC_COMPACTION_FAILURE.with_label_values(&["filter"]).inc(); + GC_COMPACTION_FAILURE + .with_label_values(&[STAT_TXN_KEYMODE, "filter"]) + .inc(); self.encountered_errors = true; CompactionFilterDecision::Keep } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index eaa55c9c69c..eadd1d77fb2 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -13,6 +13,7 @@ use std::{ }; use api_version::{ApiV2, KvFormat}; +use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::FlowInfo; use engine_traits::{ @@ -53,7 +54,7 @@ use super::{ use crate::{ server::metrics::*, storage::{ - kv::{Engine, ScanMode, Statistics}, + kv::{metrics::GcKeyMode, Engine, ScanMode, Statistics}, mvcc::{GcInfo, MvccReader, MvccTxn}, txn::{gc, Error as TxnError}, }, @@ -67,10 +68,12 @@ const GC_LOG_FOUND_VERSION_THRESHOLD: usize = 30; /// least this many versions are deleted. const GC_LOG_DELETED_VERSION_THRESHOLD: usize = 30; -pub const GC_MAX_EXECUTING_TASKS: usize = 10; const GC_TASK_SLOW_SECONDS: u64 = 30; const GC_MAX_PENDING_TASKS: usize = 4096; +pub const STAT_TXN_KEYMODE: &str = "txn"; +pub const STAT_RAW_KEYMODE: &str = "raw"; + /// Provides safe point. pub trait GcSafePointProvider: Send + 'static { fn get_safe_point(&self) -> Result; @@ -196,7 +199,7 @@ where } /// Used to perform GC operations on the engine. -struct GcRunner +pub struct GcRunner where E: Engine, RR: RaftStoreRouter, @@ -212,7 +215,7 @@ where cfg: GcConfig, cfg_tracker: Tracker, - stats: Statistics, + stats_map: HashMap, } pub const MAX_RAW_WRITE_SIZE: usize = 32 * 1024; @@ -304,7 +307,7 @@ where limiter, cfg, cfg_tracker, - stats: Statistics::default(), + stats_map: Default::default(), } } @@ -336,7 +339,7 @@ where gc_info.deleted_versions += next_gc_info.deleted_versions; gc_info.is_completed = next_gc_info.is_completed; let stats = mem::take(&mut reader.statistics); - self.stats.add(&stats); + self.mut_stats(GcKeyMode::txn).add(&stats); Ok(()) } @@ -383,7 +386,7 @@ where self.gc_keys(keys, safe_point, None)?; } - self.stats.add(&reader.statistics); + self.mut_stats(GcKeyMode::txn).add(&reader.statistics); debug!( "gc has finished"; "start_key" => log_wrappers::Value::key(start_key), @@ -393,7 +396,7 @@ where Ok(()) } - fn gc_keys( + pub fn gc_keys( &mut self, keys: Vec, safe_point: TimeStamp, @@ -527,7 +530,7 @@ where wasted_keys += 1; } - gc_info.report_metrics(); + gc_info.report_metrics(STAT_RAW_KEYMODE); next_gc_key = keys.next(); gc_info = GcInfo::default(); @@ -569,7 +572,8 @@ where } if raw_modifies.write_size >= MAX_RAW_WRITE_SIZE { - self.stats.data.add(&statistics); + let cf_stats = self.mut_stats(GcKeyMode::raw).mut_cf_statistics(CF_DEFAULT); + cf_stats.add(&statistics); return Ok(()); } @@ -589,7 +593,8 @@ where gc_info.is_completed = true; - self.stats.data.add(&statistics); + let cf_stats = self.mut_stats(GcKeyMode::raw).mut_cf_statistics(CF_DEFAULT); + cf_stats.add(&statistics); if let Some(to_del_key) = latest_version_key { self.delete_raws(to_del_key, raw_modifies, gc_info); @@ -598,6 +603,14 @@ where Ok(()) } + pub fn mut_stats(&mut self, key_mode: GcKeyMode) -> &mut Statistics { + let stats = self + .stats_map + .entry(key_mode) + .or_insert_with(Default::default); + stats + } + fn delete_raws(&mut self, key: Key, raw_modifies: &mut MvccRaw, gc_info: &mut GcInfo) { let write = Modify::Delete(CF_DEFAULT, key); raw_modifies.write_size += write.size(); @@ -732,15 +745,17 @@ where Ok(lock_infos) } - fn update_statistics_metrics(&mut self) { - let stats = mem::take(&mut self.stats); - - for (cf, details) in stats.details_enum().iter() { - for (tag, count) in details.iter() { - GC_KEYS_COUNTER_STATIC - .get(*cf) - .get(*tag) - .inc_by(*count as u64); + fn update_statistics_metrics(&mut self, key_mode: GcKeyMode) { + if let Some(mut_stats) = self.stats_map.get_mut(&key_mode) { + let stats = mem::take(mut_stats); + for (cf, cf_details) in stats.details_enum().iter() { + for (tag, count) in cf_details.iter() { + GC_KEYS_COUNTER_STATIC + .get(key_mode) + .get(*cf) + .get(*tag) + .inc_by(*count as u64); + } } } } @@ -797,7 +812,7 @@ where let res = self.gc(&start_key, &end_key, safe_point); update_metrics(res.is_err()); callback(res); - self.update_statistics_metrics(); + self.update_statistics_metrics(GcKeyMode::txn); slow_log!( T timer, "GC on range [{}, {}), safe_point {}", @@ -812,11 +827,15 @@ where store_id, region_info_provider, } => { - let old_seek_tombstone = self.stats.write.seek_tombstone; + let old_seek_tombstone = self.mut_stats(GcKeyMode::txn).write.seek_tombstone; match self.gc_keys(keys, safe_point, Some((store_id, region_info_provider))) { Ok((handled, wasted)) => { - GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.inc_by(handled as _); - GC_COMPACTION_FILTER_MVCC_DELETION_WASTED.inc_by(wasted as _); + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc_by(handled as _); + GC_COMPACTION_FILTER_MVCC_DELETION_WASTED + .with_label_values(&[STAT_TXN_KEYMODE]) + .inc_by(wasted as _); update_metrics(false); } Err(e) => { @@ -824,10 +843,10 @@ where update_metrics(true); } } - let new_seek_tombstone = self.stats.write.seek_tombstone; + let new_seek_tombstone = self.mut_stats(GcKeyMode::txn).write.seek_tombstone; let seek_tombstone = new_seek_tombstone - old_seek_tombstone; slow_log!(T timer, "GC keys, seek_tombstone {}", seek_tombstone); - self.update_statistics_metrics(); + self.update_statistics_metrics(GcKeyMode::txn); } GcTask::RawGcKeys { keys, @@ -837,8 +856,12 @@ where } => { match self.raw_gc_keys(keys, safe_point, Some((store_id, region_info_provider))) { Ok((handled, wasted)) => { - GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.inc_by(handled as _); - GC_COMPACTION_FILTER_MVCC_DELETION_WASTED.inc_by(wasted as _); + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED + .with_label_values(&[STAT_RAW_KEYMODE]) + .inc_by(handled as _); + GC_COMPACTION_FILTER_MVCC_DELETION_WASTED + .with_label_values(&[STAT_RAW_KEYMODE]) + .inc_by(wasted as _); update_metrics(false); } Err(e) => { @@ -846,7 +869,7 @@ where update_metrics(true); } } - self.update_statistics_metrics(); + self.update_statistics_metrics(GcKeyMode::raw); } GcTask::UnsafeDestroyRange { ctx, @@ -893,7 +916,7 @@ where } info!("write GcTask::OrphanVersions success"; "id" => id); GC_COMPACTION_FILTER_ORPHAN_VERSIONS - .with_label_values(&["cleaned"]) + .with_label_values(&[STAT_TXN_KEYMODE, "cleaned"]) .inc_by(wb.count() as u64); update_metrics(false); } @@ -1243,51 +1266,25 @@ where } } -#[cfg(test)] -mod tests { - - use std::{ - collections::BTreeMap, - sync::mpsc::{self, channel}, - thread, - time::Duration, - }; +#[cfg(any(test, feature = "testexport"))] +pub mod test_gc_worker { + use std::sync::Arc; - use api_version::{ApiV2, KvFormat, RawValue}; - use engine_rocks::{util::get_cf_handle, RocksEngine, RocksSnapshot}; + use engine_rocks::{RocksEngine, RocksSnapshot}; use engine_traits::KvEngine; - use futures::executor::block_on; use kvproto::{ - kvrpcpb::{ApiVersion, Op}, - metapb::Peer, + kvrpcpb::Context, + metapb::{Peer, Region}, }; - use raft::StateRole; - use raftstore::{ - coprocessor::{region_info_accessor::RegionInfoAccessor, RegionChangeEvent}, - router::RaftStoreBlackHole, - store::RegionSnapshot, - }; - use tikv_kv::Snapshot; - use tikv_util::{codec::number::NumberEncoder, future::paired_future_callback}; - use txn_types::Mutation; + use raftstore::store::RegionSnapshot; + use tikv_kv::{write_modifies, Engine, Modify, SnapContext, WriteData}; + use txn_types::{Key, TimeStamp}; - use super::*; use crate::{ - config::DbConfig, + server::gc_worker::{GcSafePointProvider, Result as GcWorkerResult}, storage::{ - kv::{ - self, write_modifies, Callback as EngineCallback, Modify, Result as EngineResult, - SnapContext, TestEngineBuilder, WriteData, - }, - lock_manager::DummyLockManager, - mvcc::{tests::must_get_none, MAX_TXN_WRITE_SIZE}, - txn::{ - commands, - tests::{ - must_commit, must_gc, must_prewrite_delete, must_prewrite_put, must_rollback, - }, - }, - Engine, Storage, TestStorageBuilderApiV1, + kv, + kv::{Callback as EngineCallback, Result as EngineResult}, }, }; @@ -1298,7 +1295,7 @@ mod tests { /// they needs to know how data is actually represented in db. This /// wrapper allows test engines write 'z'-prefixed keys to db. #[derive(Clone)] - struct PrefixedEngine(kv::RocksEngine); + pub struct PrefixedEngine(pub kv::RocksEngine); impl Engine for PrefixedEngine { // Use RegionSnapshot which can remove the z prefix internally. @@ -1394,6 +1391,59 @@ mod tests { } } + pub struct MockSafePointProvider(pub u64); + + impl GcSafePointProvider for MockSafePointProvider { + fn get_safe_point(&self) -> GcWorkerResult { + Ok(self.0.into()) + } + } +} + +#[cfg(test)] +mod tests { + + use std::{ + collections::BTreeMap, + sync::mpsc::{self, channel}, + thread, + time::Duration, + }; + + use api_version::{ApiV2, KvFormat, RawValue}; + use engine_rocks::{util::get_cf_handle, RocksEngine}; + use futures::executor::block_on; + use kvproto::{ + kvrpcpb::{ApiVersion, Op}, + metapb::Peer, + }; + use raft::StateRole; + use raftstore::{ + coprocessor::{region_info_accessor::RegionInfoAccessor, RegionChangeEvent}, + router::RaftStoreBlackHole, + }; + use tikv_kv::Snapshot; + use tikv_util::{codec::number::NumberEncoder, future::paired_future_callback}; + use txn_types::Mutation; + + use super::*; + use crate::{ + config::DbConfig, + server::gc_worker::{MockSafePointProvider, PrefixedEngine}, + storage::{ + kv::{metrics::GcKeyMode, Modify, TestEngineBuilder, WriteData}, + lock_manager::DummyLockManager, + mvcc::{tests::must_get_none, MAX_TXN_WRITE_SIZE}, + txn::{ + commands, + tests::{ + must_commit, must_gc, must_prewrite_delete, must_prewrite_put, must_rollback, + }, + }, + Engine, Storage, TestStorageBuilderApiV1, + }, + }; + /// Assert the data in `storage` is the same as `expected_data`. Keys in /// `expected_data` should be encoded form without ts. fn check_data( @@ -1666,13 +1716,6 @@ mod tests { assert_eq!(res[..], expected_lock_info[3..9]); } - struct MockSafePointProvider(u64); - impl GcSafePointProvider for MockSafePointProvider { - fn get_safe_point(&self) -> Result { - Ok(self.0.into()) - } - } - #[test] fn test_gc_keys_with_region_info_provider() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -1808,13 +1851,13 @@ mod tests { } db.flush_cf(cf, true).unwrap(); - assert_eq!(runner.stats.write.seek, 0); - assert_eq!(runner.stats.write.next, 0); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek, 0); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.next, 0); runner .gc_keys(keys, TimeStamp::new(200), Some((1, Arc::new(ri_provider)))) .unwrap(); - assert_eq!(runner.stats.write.seek, 1); - assert_eq!(runner.stats.write.next, 100 * 2); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek, 1); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.next, 100 * 2); } #[test] @@ -1906,8 +1949,8 @@ mod tests { .raw_gc_keys(to_gc_keys, TimeStamp::new(120), Some((1, ri_provider))) .unwrap(); - assert_eq!(7, runner.stats.data.next); - assert_eq!(2, runner.stats.data.seek); + assert_eq!(7, runner.mut_stats(GcKeyMode::raw).data.next); + assert_eq!(2, runner.mut_stats(GcKeyMode::raw).data.seek); let snapshot = prefixed_engine.snapshot_on_kv_engine(&[], &[]).unwrap(); @@ -1960,7 +2003,7 @@ mod tests { must_gc(&prefixed_engine, b"k2\x00", 30); // Test tombstone counter works - assert_eq!(runner.stats.write.seek_tombstone, 0); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); runner .gc_keys( vec![Key::from_raw(b"k2\x00")], @@ -1968,11 +2011,14 @@ mod tests { Some((1, ri_provider.clone())), ) .unwrap(); - assert_eq!(runner.stats.write.seek_tombstone, 20); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 20); // gc_keys with single key - runner.stats.write.seek_tombstone = 0; - assert_eq!(runner.stats.write.seek_tombstone, 0); + runner + .mut_stats(GcKeyMode::txn) + .mut_cf_statistics(CF_WRITE) + .seek_tombstone = 0; + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); runner .gc_keys( vec![Key::from_raw(b"k2")], @@ -1980,11 +2026,14 @@ mod tests { Some((1, ri_provider.clone())), ) .unwrap(); - assert_eq!(runner.stats.write.seek_tombstone, 0); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); // gc_keys with multiple key - runner.stats.write.seek_tombstone = 0; - assert_eq!(runner.stats.write.seek_tombstone, 0); + runner + .mut_stats(GcKeyMode::txn) + .mut_cf_statistics(CF_WRITE) + .seek_tombstone = 0; + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); runner .gc_keys( vec![Key::from_raw(b"k1"), Key::from_raw(b"k2")], @@ -1992,7 +2041,7 @@ mod tests { Some((1, ri_provider.clone())), ) .unwrap(); - assert_eq!(runner.stats.write.seek_tombstone, 0); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); // Test rebuilding snapshot when GC write batch limit reached // (gc_info.is_completed == false). Build a key with versions that will @@ -2012,7 +2061,10 @@ mod tests { db.flush_cf(cf, true).unwrap(); let safepoint = versions as u64 * 2; - runner.stats.write.seek_tombstone = 0; + runner + .mut_stats(GcKeyMode::txn) + .mut_cf_statistics(CF_DEFAULT) + .seek_tombstone = 0; runner .gc_keys( vec![Key::from_raw(b"k2")], @@ -2023,9 +2075,12 @@ mod tests { // The first batch will leave tombstones that will be seen while processing the // second batch, but it will be seen in `next` after seeking the latest // unexpired version, therefore `seek_tombstone` is not affected. - assert_eq!(runner.stats.write.seek_tombstone, 0); + assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); // ... and next_tombstone indicates there's indeed more than one batches. - assert_eq!(runner.stats.write.next_tombstone, versions - 3); + assert_eq!( + runner.mut_stats(GcKeyMode::txn).write.next_tombstone, + versions - 3 + ); } #[test] diff --git a/src/server/gc_worker/mod.rs b/src/server/gc_worker/mod.rs index d6114a5875c..5b43b9b4be3 100644 --- a/src/server/gc_worker/mod.rs +++ b/src/server/gc_worker/mod.rs @@ -1,11 +1,11 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. mod applied_lock_collector; -mod compaction_filter; +pub mod compaction_filter; mod config; mod gc_manager; mod gc_worker; -mod rawkv_compaction_filter; +pub mod rawkv_compaction_filter; // TODO: Use separated error type for GcWorker instead. #[cfg(any(test, feature = "failpoints"))] @@ -14,7 +14,11 @@ pub use compaction_filter::WriteCompactionFilterFactory; pub use config::{GcConfig, GcWorkerConfigManager, DEFAULT_GC_BATCH_KEYS}; use engine_traits::MvccProperties; pub use gc_manager::AutoGcConfig; -pub use gc_worker::{sync_gc, GcSafePointProvider, GcTask, GcWorker, GC_MAX_EXECUTING_TASKS}; +#[cfg(any(test, feature = "testexport"))] +pub use gc_worker::test_gc_worker::{MockSafePointProvider, PrefixedEngine}; +pub use gc_worker::{ + sync_gc, GcSafePointProvider, GcTask, GcWorker, STAT_RAW_KEYMODE, STAT_TXN_KEYMODE, +}; pub use rawkv_compaction_filter::RawCompactionFilterFactory; use txn_types::TimeStamp; diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index 49758f5793b..e50e33c1b38 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -16,7 +16,7 @@ use engine_rocks::{ RocksEngine, }; use engine_traits::{raw_ttl::ttl_current_ts, MiscExt}; -use prometheus::local::LocalHistogram; +use prometheus::local::LocalHistogramVec; use raftstore::coprocessor::RegionInfoProvider; use tikv_util::worker::{ScheduleError, Scheduler}; use txn_types::Key; @@ -25,9 +25,10 @@ use crate::{ server::gc_worker::{ compaction_filter::{ CompactionFilterStats, DEFAULT_DELETE_BATCH_COUNT, GC_COMPACTION_FAILURE, - GC_COMPACTION_FILTERED, GC_COMPACTION_FILTER_ORPHAN_VERSIONS, GC_CONTEXT, + GC_COMPACTION_FILTERED, GC_COMPACTION_FILTER_MVCC_DELETION_MET, + GC_COMPACTION_FILTER_ORPHAN_VERSIONS, GC_CONTEXT, }, - GcTask, + GcTask, STAT_RAW_KEYMODE, }, storage::mvcc::{GC_DELETE_VERSIONS_HISTOGRAM, MVCC_VERSIONS_HISTOGRAM}, }; @@ -87,8 +88,8 @@ struct RawCompactionFilter { total_versions: usize, total_filtered: usize, orphan_versions: usize, - versions_hist: LocalHistogram, - filtered_hist: LocalHistogram, + versions_hist: LocalHistogramVec, + filtered_hist: LocalHistogramVec, encountered_errors: bool, } @@ -128,7 +129,9 @@ impl CompactionFilter for RawCompactionFilter { Ok(decision) => decision, Err(e) => { warn!("compaction filter meet error: {}", e); - GC_COMPACTION_FAILURE.with_label_values(&["filter"]).inc(); + GC_COMPACTION_FAILURE + .with_label_values(&[STAT_RAW_KEYMODE, "filter"]) + .inc(); self.encountered_errors = true; CompactionFilterDecision::Keep } @@ -203,6 +206,9 @@ impl RawCompactionFilter { // If it's the latest version, and it's deleted or expired, it needs to be sent // to GcWorker to be processed asynchronously. if !raw_value.is_valid(self.current_ts) { + GC_COMPACTION_FILTER_MVCC_DELETION_MET + .with_label_values(&[STAT_RAW_KEYMODE]) + .inc(); self.raw_handle_delete(); if self.mvcc_deletions.len() >= DEFAULT_DELETE_BATCH_COUNT { self.raw_gc_mvcc_deletions(); @@ -251,10 +257,14 @@ impl RawCompactionFilter { } match e { ScheduleError::Full(_) => { - GC_COMPACTION_FAILURE.with_label_values(&["full"]).inc(); + GC_COMPACTION_FAILURE + .with_label_values(&[STAT_RAW_KEYMODE, "full"]) + .inc(); } ScheduleError::Stopped(_) => { - GC_COMPACTION_FAILURE.with_label_values(&["stopped"]).inc(); + GC_COMPACTION_FAILURE + .with_label_values(&[STAT_RAW_KEYMODE, "stopped"]) + .inc(); } } } @@ -270,21 +280,27 @@ impl RawCompactionFilter { // TODO some refactor to avoid duplicated codes. fn switch_key_metrics(&mut self) { if self.versions != 0 { - self.versions_hist.observe(self.versions as f64); + self.versions_hist + .with_label_values(&[STAT_RAW_KEYMODE]) + .observe(self.versions as f64); self.total_versions += self.versions; self.versions = 0; } if self.filtered != 0 { - self.filtered_hist.observe(self.filtered as f64); + self.filtered_hist + .with_label_values(&[STAT_RAW_KEYMODE]) + .observe(self.filtered as f64); self.total_filtered += self.filtered; self.filtered = 0; } } fn flush_metrics(&self) { - GC_COMPACTION_FILTERED.inc_by(self.total_filtered as u64); + GC_COMPACTION_FILTERED + .with_label_values(&[STAT_RAW_KEYMODE]) + .inc_by(self.total_filtered as u64); GC_COMPACTION_FILTER_ORPHAN_VERSIONS - .with_label_values(&["generated"]) + .with_label_values(&[STAT_RAW_KEYMODE, "generated"]) .inc_by(self.orphan_versions as u64); if let Some((versions, filtered)) = STATS.with(|stats| { stats.versions.update(|x| x + self.total_versions); @@ -301,6 +317,13 @@ impl RawCompactionFilter { } } +#[cfg(any(test, feature = "testexport"))] +pub fn make_key(key: &[u8], ts: u64) -> Vec { + let encode_key = ApiV2::encode_raw_key(key, Some(ts.into())); + let res = keys::data_key(encode_key.as_encoded()); + res +} + #[cfg(test)] pub mod tests { @@ -317,12 +340,6 @@ pub mod tests { config::DbConfig, server::gc_worker::TestGcRunner, storage::kv::TestEngineBuilder, }; - pub fn make_key(key: &[u8], ts: u64) -> Vec { - let encode_key = ApiV2::encode_raw_key(key, Some(ts.into())); - let res = keys::data_key(encode_key.as_encoded()); - res - } - #[test] fn test_raw_compaction_filter() { let mut cfg = DbConfig::default(); diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 0d24c9f798b..86ca07f38b4 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -212,7 +212,7 @@ lazy_static! { pub static ref GC_KEYS_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_gcworker_gc_keys", "Counter of keys affected during gc", - &["cf", "tag"] + &["key_mode", "cf", "tag"] ) .unwrap(); pub static ref GC_KEY_FAILURES: IntCounter = register_int_counter!( diff --git a/src/storage/mvcc/metrics.rs b/src/storage/mvcc/metrics.rs index 3fa98e8979a..ddfdc14f5ef 100644 --- a/src/storage/mvcc/metrics.rs +++ b/src/storage/mvcc/metrics.rs @@ -54,15 +54,17 @@ make_static_metric! { } lazy_static! { - pub static ref MVCC_VERSIONS_HISTOGRAM: Histogram = register_histogram!( + pub static ref MVCC_VERSIONS_HISTOGRAM: HistogramVec = register_histogram_vec!( "tikv_storage_mvcc_versions", "Histogram of versions for each key", + &["key_mode"], exponential_buckets(1.0, 2.0, 30).unwrap() ) .unwrap(); - pub static ref GC_DELETE_VERSIONS_HISTOGRAM: Histogram = register_histogram!( + pub static ref GC_DELETE_VERSIONS_HISTOGRAM: HistogramVec = register_histogram_vec!( "tikv_storage_mvcc_gc_delete_versions", "Histogram of versions deleted by gc for each key", + &["key_mode"], exponential_buckets(1.0, 2.0, 30).unwrap() ) .unwrap(); diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 1517ad67c78..b0a64d83f22 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -20,10 +20,14 @@ pub struct GcInfo { } impl GcInfo { - pub fn report_metrics(&self) { - MVCC_VERSIONS_HISTOGRAM.observe(self.found_versions as f64); + pub fn report_metrics(&self, key_mode: &str) { + MVCC_VERSIONS_HISTOGRAM + .with_label_values(&[key_mode]) + .observe(self.found_versions as f64); if self.deleted_versions > 0 { - GC_DELETE_VERSIONS_HISTOGRAM.observe(self.deleted_versions as f64); + GC_DELETE_VERSIONS_HISTOGRAM + .with_label_values(&[key_mode]) + .observe(self.deleted_versions as f64); } } } diff --git a/src/storage/txn/actions/gc.rs b/src/storage/txn/actions/gc.rs index 07a95f4b06b..29264c7df90 100644 --- a/src/storage/txn/actions/gc.rs +++ b/src/storage/txn/actions/gc.rs @@ -2,9 +2,12 @@ use txn_types::{Key, TimeStamp, Write, WriteType}; -use crate::storage::{ - mvcc::{GcInfo, MvccReader, MvccTxn, Result as MvccResult, MAX_TXN_WRITE_SIZE}, - Snapshot, +use crate::{ + server::gc_worker::STAT_TXN_KEYMODE, + storage::{ + mvcc::{GcInfo, MvccReader, MvccTxn, Result as MvccResult, MAX_TXN_WRITE_SIZE}, + Snapshot, + }, }; pub fn gc<'a, S: Snapshot>( @@ -15,7 +18,7 @@ pub fn gc<'a, S: Snapshot>( ) -> MvccResult { let gc = Gc::new(txn, reader, key); let info = gc.run(safe_point)?; - info.report_metrics(); + info.report_metrics(STAT_TXN_KEYMODE); Ok(info) } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 33063777e01..1c38571e280 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -10,6 +10,7 @@ mod test_coprocessor; mod test_disk_full; mod test_early_apply; mod test_encryption; +mod test_gc_metrics; mod test_gc_worker; mod test_hibernate; mod test_import_service; diff --git a/tests/failpoints/cases/test_gc_metrics.rs b/tests/failpoints/cases/test_gc_metrics.rs new file mode 100644 index 00000000000..ede14988744 --- /dev/null +++ b/tests/failpoints/cases/test_gc_metrics.rs @@ -0,0 +1,364 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{atomic::AtomicU64, mpsc, Arc}, + thread, + time::Duration, +}; + +use api_version::{ApiV2, KvFormat, RawValue}; +use engine_rocks::{util::get_cf_handle, RocksEngine}; +use engine_traits::{CF_DEFAULT, CF_WRITE}; +use kvproto::{ + kvrpcpb::*, + metapb::{Peer, Region}, +}; +use pd_client::FeatureGate; +use raft::StateRole; +use raftstore::{ + coprocessor::{CoprocessorHost, RegionChangeEvent}, + router::RaftStoreBlackHole, + RegionInfoAccessor, +}; +use tikv::{ + config::DbConfig, + server::gc_worker::{ + compaction_filter::{ + GC_COMPACTION_FILTERED, GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED, + GC_COMPACTION_FILTER_MVCC_DELETION_MET, GC_COMPACTION_FILTER_PERFORM, + GC_COMPACTION_FILTER_SKIP, + }, + rawkv_compaction_filter::make_key, + AutoGcConfig, GcConfig, GcWorker, MockSafePointProvider, PrefixedEngine, TestGcRunner, + STAT_RAW_KEYMODE, STAT_TXN_KEYMODE, + }, + storage::{ + kv::{Modify, TestEngineBuilder, WriteData}, + mvcc::{tests::must_get, MVCC_VERSIONS_HISTOGRAM}, + txn::tests::{must_commit, must_prewrite_delete, must_prewrite_put}, + Engine, + }, +}; +use txn_types::{Key, TimeStamp}; + +#[test] +fn test_txn_create_compaction_filter() { + GC_COMPACTION_FILTER_PERFORM.reset(); + GC_COMPACTION_FILTER_SKIP.reset(); + + let mut cfg = DbConfig::default(); + cfg.writecf.disable_auto_compactions = true; + cfg.writecf.dynamic_level_bytes = false; + let dir = tempfile::TempDir::new().unwrap(); + let builder = TestEngineBuilder::new().path(dir.path()); + let engine = builder.build_with_cfg(&cfg).unwrap(); + let raw_engine = engine.get_rocksdb(); + + let mut gc_runner = TestGcRunner::new(0); + let value = vec![b'v'; 512]; + + must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); + must_commit(&engine, b"zkey", 100, 110); + + gc_runner + .safe_point(TimeStamp::new(1).into_inner()) + .gc(&raw_engine); + assert_eq!( + GC_COMPACTION_FILTER_PERFORM + .with_label_values(&[STAT_TXN_KEYMODE]) + .get(), + 1 + ); + assert_eq!( + GC_COMPACTION_FILTER_SKIP + .with_label_values(&[STAT_TXN_KEYMODE]) + .get(), + 1 + ); + + GC_COMPACTION_FILTER_PERFORM.reset(); + GC_COMPACTION_FILTER_SKIP.reset(); +} + +#[test] +fn test_txn_mvcc_filtered() { + MVCC_VERSIONS_HISTOGRAM.reset(); + GC_COMPACTION_FILTERED.reset(); + + let engine = TestEngineBuilder::new().build().unwrap(); + let raw_engine = engine.get_rocksdb(); + let value = vec![b'v'; 512]; + let mut gc_runner = TestGcRunner::new(0); + + // GC can't delete keys after the given safe point. + must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); + must_commit(&engine, b"zkey", 100, 110); + gc_runner.safe_point(50).gc(&raw_engine); + must_get(&engine, b"zkey", 110, &value); + + // GC can't delete keys before the safe ponit if they are latest versions. + gc_runner.safe_point(200).gc(&raw_engine); + must_get(&engine, b"zkey", 110, &value); + + must_prewrite_put(&engine, b"zkey", &value, b"zkey", 120); + must_commit(&engine, b"zkey", 120, 130); + + // GC can't delete the latest version before the safe ponit. + gc_runner.safe_point(115).gc(&raw_engine); + must_get(&engine, b"zkey", 110, &value); + + // GC a version will also delete the key on default CF. + gc_runner.safe_point(200).gc(&raw_engine); + assert_eq!( + MVCC_VERSIONS_HISTOGRAM + .with_label_values(&[STAT_TXN_KEYMODE]) + .get_sample_sum(), + 4_f64 + ); + assert_eq!( + GC_COMPACTION_FILTERED + .with_label_values(&[STAT_TXN_KEYMODE]) + .get(), + 1 + ); + + MVCC_VERSIONS_HISTOGRAM.reset(); + GC_COMPACTION_FILTERED.reset(); +} + +#[test] +fn test_txn_gc_keys_handled() { + GC_COMPACTION_FILTER_MVCC_DELETION_MET.reset(); + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); + + let engine = TestEngineBuilder::new().build().unwrap(); + let prefixed_engine = PrefixedEngine(engine.clone()); + + let (tx, _rx) = mpsc::channel(); + let feature_gate = FeatureGate::default(); + feature_gate.set_version("5.0.0").unwrap(); + let mut gc_worker = GcWorker::new( + prefixed_engine.clone(), + RaftStoreBlackHole, + tx, + GcConfig::default(), + feature_gate, + ); + gc_worker.start().unwrap(); + + let mut r1 = Region::default(); + r1.set_id(1); + r1.mut_region_epoch().set_version(1); + r1.set_start_key(b"".to_vec()); + r1.set_end_key(b"".to_vec()); + r1.mut_peers().push(Peer::default()); + r1.mut_peers()[0].set_store_id(1); + + let sp_provider = MockSafePointProvider(200); + let mut host = CoprocessorHost::::default(); + let ri_provider = RegionInfoAccessor::new(&mut host); + let auto_gc_cfg = AutoGcConfig::new(sp_provider, ri_provider, 1); + let safe_point = Arc::new(AtomicU64::new(500)); + gc_worker.start_auto_gc(auto_gc_cfg, safe_point).unwrap(); + host.on_region_changed(&r1, RegionChangeEvent::Create, StateRole::Leader); + + let db = engine.kv_engine().as_inner().clone(); + let cf = get_cf_handle(&db, CF_WRITE).unwrap(); + + for i in 0..3 { + let k = format!("k{:02}", i).into_bytes(); + must_prewrite_put(&prefixed_engine, &k, b"value", &k, 101); + must_commit(&prefixed_engine, &k, 101, 102); + must_prewrite_delete(&prefixed_engine, &k, &k, 151); + must_commit(&prefixed_engine, &k, 151, 152); + } + + db.flush_cf(cf, true).unwrap(); + + db.compact_range_cf(cf, None, None); + + // This compaction can schedule gc task + db.compact_range_cf(cf, None, None); + thread::sleep(Duration::from_millis(100)); + + assert_eq!( + GC_COMPACTION_FILTER_MVCC_DELETION_MET + .with_label_values(&[STAT_TXN_KEYMODE]) + .get(), + 6 + ); + + assert_eq!( + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED + .with_label_values(&[STAT_TXN_KEYMODE]) + .get(), + 3 + ); + + GC_COMPACTION_FILTER_MVCC_DELETION_MET.reset(); + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); +} + +#[test] +fn test_raw_mvcc_filtered() { + MVCC_VERSIONS_HISTOGRAM.reset(); + GC_COMPACTION_FILTERED.reset(); + + let mut cfg = DbConfig::default(); + cfg.defaultcf.disable_auto_compactions = true; + cfg.defaultcf.dynamic_level_bytes = false; + + let engine = TestEngineBuilder::new() + .api_version(ApiVersion::V2) + .build_with_cfg(&cfg) + .unwrap(); + let raw_engine = engine.get_rocksdb(); + let mut gc_runner = TestGcRunner::new(0); + + let user_key = b"r\0aaaaaaaaaaa"; + + let test_raws = vec![ + (user_key, 100, false), + (user_key, 90, false), + (user_key, 70, false), + ]; + + let modifies = test_raws + .into_iter() + .map(|(key, ts, is_delete)| { + ( + make_key(key, ts), + ApiV2::encode_raw_value(RawValue { + user_value: &[0; 10][..], + expire_ts: Some(TimeStamp::max().into_inner()), + is_delete, + }), + ) + }) + .map(|(k, v)| Modify::Put(CF_DEFAULT, Key::from_encoded_slice(k.as_slice()), v)) + .collect(); + + let ctx = Context { + api_version: ApiVersion::V2, + ..Default::default() + }; + let batch = WriteData::from_modifies(modifies); + + engine.write(&ctx, batch).unwrap(); + + gc_runner.safe_point(80).gc_raw(&raw_engine); + + assert_eq!( + MVCC_VERSIONS_HISTOGRAM + .with_label_values(&[STAT_RAW_KEYMODE]) + .get_sample_sum(), + 1_f64 + ); + assert_eq!( + GC_COMPACTION_FILTERED + .with_label_values(&[STAT_RAW_KEYMODE]) + .get(), + 1 + ); + + MVCC_VERSIONS_HISTOGRAM.reset(); + GC_COMPACTION_FILTERED.reset(); +} + +#[test] +fn test_raw_gc_keys_handled() { + GC_COMPACTION_FILTER_MVCC_DELETION_MET.reset(); + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); + + let engine = TestEngineBuilder::new() + .api_version(ApiVersion::V2) + .build() + .unwrap(); + let prefixed_engine = PrefixedEngine(engine.clone()); + + let (tx, _rx) = mpsc::channel(); + let feature_gate = FeatureGate::default(); + feature_gate.set_version("5.0.0").unwrap(); + let mut gc_worker = GcWorker::new( + prefixed_engine, + RaftStoreBlackHole, + tx, + GcConfig::default(), + feature_gate, + ); + gc_worker.start().unwrap(); + + let mut r1 = Region::default(); + r1.set_id(1); + r1.mut_region_epoch().set_version(1); + r1.set_start_key(b"".to_vec()); + r1.set_end_key(b"".to_vec()); + r1.mut_peers().push(Peer::default()); + r1.mut_peers()[0].set_store_id(1); + + let sp_provider = MockSafePointProvider(200); + let mut host = CoprocessorHost::::default(); + let ri_provider = RegionInfoAccessor::new(&mut host); + let auto_gc_cfg = AutoGcConfig::new(sp_provider, ri_provider, 1); + let safe_point = Arc::new(AtomicU64::new(500)); + gc_worker.start_auto_gc(auto_gc_cfg, safe_point).unwrap(); + host.on_region_changed(&r1, RegionChangeEvent::Create, StateRole::Leader); + + let db = engine.kv_engine().as_inner().clone(); + + let user_key_del = b"r\0aaaaaaaaaaa"; + + // If it's deleted, it will call async scheduler GcTask. + let test_raws = vec![ + (user_key_del, 9, true), + (user_key_del, 5, false), + (user_key_del, 1, false), + ]; + + let modifies = test_raws + .into_iter() + .map(|(key, ts, is_delete)| { + ( + make_key(key, ts), + ApiV2::encode_raw_value(RawValue { + user_value: &[0; 10][..], + expire_ts: Some(TimeStamp::max().into_inner()), + is_delete, + }), + ) + }) + .map(|(k, v)| Modify::Put(CF_DEFAULT, Key::from_encoded_slice(k.as_slice()), v)) + .collect(); + + let ctx = Context { + api_version: ApiVersion::V2, + ..Default::default() + }; + + let batch = WriteData::from_modifies(modifies); + + engine.write(&ctx, batch).unwrap(); + + let cf = get_cf_handle(&db, CF_DEFAULT).unwrap(); + db.flush_cf(cf, true).unwrap(); + + db.compact_range_cf(cf, None, None); + + thread::sleep(Duration::from_millis(100)); + + assert_eq!( + GC_COMPACTION_FILTER_MVCC_DELETION_MET + .with_label_values(&[STAT_RAW_KEYMODE]) + .get(), + 1 + ); + assert_eq!( + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED + .with_label_values(&[STAT_RAW_KEYMODE]) + .get(), + 1 + ); + + GC_COMPACTION_FILTER_MVCC_DELETION_MET.reset(); + GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); +} From b5bc5078a1fd767390c8d696cf0d965623b962f7 Mon Sep 17 00:00:00 2001 From: hehechen Date: Wed, 24 Aug 2022 19:50:21 +0800 Subject: [PATCH 0170/1149] resolved_ts: fix check_leader to tiflash proxy (#13312) ref tikv/tikv#12092, close tikv/tikv#13310 For TiFlash proxy, should use store.peer_address instead of store.address. For TiKV, always use the same peer_address and address, so the change won't affect. Signed-off-by: hehechen Co-authored-by: Ti Chi Robot --- components/resolved_ts/src/advance.rs | 2 +- src/server/node.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 57bf20e7d0b..190c4474711 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -397,7 +397,7 @@ async fn get_tikv_client( CString::new("random id").unwrap(), CONN_ID.fetch_add(1, Ordering::SeqCst), ); - let channel = security_mgr.connect(cb, &store.address); + let channel = security_mgr.connect(cb, &store.peer_address); let cli = TikvClient::new(channel); clients.insert(store_id, cli.clone()); RTS_TIKV_CLIENT_INIT_DURATION_HISTOGRAM.observe(start.saturating_elapsed_secs()); diff --git a/src/server/node.rs b/src/server/node.rs index a282bcded37..d8bee9abfd7 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -121,8 +121,10 @@ where store.set_id(INVALID_ID); if cfg.advertise_addr.is_empty() { store.set_address(cfg.addr.clone()); + store.set_peer_address(cfg.addr.clone()); } else { - store.set_address(cfg.advertise_addr.clone()) + store.set_address(cfg.advertise_addr.clone()); + store.set_peer_address(cfg.advertise_addr.clone()); } if cfg.advertise_status_addr.is_empty() { store.set_status_address(cfg.status_addr.clone()); From afbacfc4a864080f958497ce9a387df854f62f0f Mon Sep 17 00:00:00 2001 From: ekexium Date: Thu, 25 Aug 2022 10:08:21 +0800 Subject: [PATCH 0171/1149] txn: deferred constraint check (#13121) close tikv/tikv#13128, ref pingcap/tidb#36579 Signed-off-by: ekexium Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/cdc/src/old_value.rs | 3 +- components/cdc/tests/mod.rs | 6 +- components/resolved_ts/src/cmd.rs | 4 +- components/resolved_ts/tests/mod.rs | 9 +- components/test_raftstore/src/util.rs | 6 +- src/storage/mod.rs | 51 +++- src/storage/mvcc/reader/point_getter.rs | 6 +- src/storage/mvcc/reader/reader.rs | 6 +- src/storage/mvcc/reader/scanner/forward.rs | 36 ++- src/storage/mvcc/txn.rs | 58 ++-- .../txn/actions/acquire_pessimistic_lock.rs | 62 ++-- src/storage/txn/actions/cleanup.rs | 4 +- src/storage/txn/actions/commit.rs | 4 +- src/storage/txn/actions/prewrite.rs | 197 ++++++++----- src/storage/txn/actions/tests.rs | 272 +++++++++++++++--- src/storage/txn/commands/check_txn_status.rs | 14 +- src/storage/txn/commands/mod.rs | 8 +- src/storage/txn/commands/prewrite.rs | 171 +++++++---- src/storage/txn/commands/rollback.rs | 4 +- src/storage/txn/store.rs | 4 +- tests/benches/hierarchy/mvcc/mod.rs | 14 +- tests/benches/hierarchy/txn/mod.rs | 14 +- tests/failpoints/cases/test_merge.rs | 4 +- tests/failpoints/cases/test_split_region.rs | 6 +- tests/failpoints/cases/test_storage.rs | 20 +- tests/failpoints/cases/test_transaction.rs | 9 +- tests/integrations/server/kv_service.rs | 4 +- 28 files changed, 714 insertions(+), 284 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b067e3337e5..52ad7912203 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2627,7 +2627,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#affce57868b9f8befac389559d372369b2cb616f" +source = "git+https://github.com/pingcap/kvproto.git#a0f02b6efcee6112bdc313988bf6c0ae3f83c07d" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index 89f78f694c3..9d60474b952 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -293,6 +293,7 @@ mod tests { use engine_rocks::{ReadPerfInstant, RocksEngine}; use engine_traits::{KvEngine, MiscExt}; + use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; use tikv::{ config::DbConfig, storage::{kv::TestEngineBuilder, txn::tests::*}, @@ -415,7 +416,7 @@ mod tests { must_commit(&engine, k, 7, 9); must_acquire_pessimistic_lock(&engine, k, k, 8, 10); - must_pessimistic_prewrite_put(&engine, k, b"v5", k, 8, 10, true); + must_pessimistic_prewrite_put(&engine, k, b"v5", k, 8, 10, DoPessimisticCheck); must_get_eq(&kv_engine, &key, 10, Some(b"v4".to_vec())); must_commit(&engine, k, 8, 11); } diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 63c06551a80..89eebcceec7 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -11,7 +11,7 @@ use grpcio::{ }; use kvproto::{ cdcpb::{create_change_data, ChangeDataClient, ChangeDataEvent, ChangeDataRequest}, - kvrpcpb::*, + kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, tikvpb::TikvClient, }; use online_config::OnlineConfig; @@ -418,7 +418,9 @@ impl TestSuite { prewrite_req.start_version = ts.into_inner(); prewrite_req.lock_ttl = prewrite_req.start_version + 1; prewrite_req.for_update_ts = for_update_ts.into_inner(); - prewrite_req.mut_is_pessimistic_lock().push(true); + prewrite_req + .mut_pessimistic_actions() + .push(DoPessimisticCheck); let prewrite_resp = self .get_tikv_client(region_id) .kv_prewrite(&prewrite_req) diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 277a31e2001..0bb22e0a21e 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -286,7 +286,7 @@ pub fn lock_only_filter(mut cmd_batch: CmdBatch) -> Option { #[cfg(test)] mod tests { use concurrency_manager::ConcurrencyManager; - use kvproto::kvrpcpb::AssertionLevel; + use kvproto::kvrpcpb::{AssertionLevel, PrewriteRequestPessimisticAction::*}; use tikv::storage::{ kv::{MockEngineBuilder, TestEngineBuilder}, lock_manager::DummyLockManager, @@ -405,7 +405,7 @@ mod tests { }, Mutation::make_put(k1.clone(), b"v4".to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); one_pc_commit_ts(true, &mut txn, 10.into(), &DummyLockManager); diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 3d7fdb87569..0e6d8bbc9f8 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -6,7 +6,10 @@ use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::{RocksEngine, RocksSnapshot}; use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment}; -use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; +use kvproto::{ + kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, + tikvpb::TikvClient, +}; use online_config::ConfigValue; use raftstore::coprocessor::CoprocessorHost; use resolved_ts::{Observer, Task}; @@ -261,7 +264,9 @@ impl TestSuite { prewrite_req.start_version = ts.into_inner(); prewrite_req.lock_ttl = prewrite_req.start_version + 1; prewrite_req.for_update_ts = for_update_ts.into_inner(); - prewrite_req.mut_is_pessimistic_lock().push(true); + prewrite_req + .mut_pessimistic_actions() + .push(DoPessimisticCheck); let prewrite_resp = self .get_tikv_client(region_id) .kv_prewrite(&prewrite_req) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index eaeaf6a4e0f..8cac947dc57 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -24,7 +24,7 @@ use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ encryptionpb::EncryptionMethod, - kvrpcpb::*, + kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, metapb::{self, RegionEpoch}, pdpb::{ ChangePeer, ChangePeerV2, CheckPolicy, Merge, RegionHeartbeatResponse, SplitRegion, @@ -894,7 +894,7 @@ pub fn must_kv_prewrite_with( let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { - prewrite_req.is_pessimistic_lock = vec![true; muts.len()]; + prewrite_req.pessimistic_actions = vec![DoPessimisticCheck; muts.len()]; } prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; @@ -931,7 +931,7 @@ pub fn try_kv_prewrite_with( let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { - prewrite_req.is_pessimistic_lock = vec![true; muts.len()]; + prewrite_req.pessimistic_actions = vec![DoPessimisticCheck; muts.len()]; } prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; diff --git a/src/storage/mod.rs b/src/storage/mod.rs index d974c731db0..3024a05381f 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3227,7 +3227,7 @@ mod tests { use error_code::ErrorCodeExt; use errors::extract_key_error; use futures::executor::block_on; - use kvproto::kvrpcpb::{AssertionLevel, CommandPri, Op}; + use kvproto::kvrpcpb::{AssertionLevel, CommandPri, Op, PrewriteRequestPessimisticAction::*}; use tikv_util::config::ReadableSize; use tracker::INVALID_TRACKER_TOKEN; use txn_types::{Mutation, PessimisticLock, WriteType}; @@ -7199,8 +7199,14 @@ mod tests { .sched_txn_command( commands::PrewritePessimistic::new( vec![ - (Mutation::make_put(key.clone(), val.clone()), true), - (Mutation::make_put(key2.clone(), val2.clone()), false), + ( + Mutation::make_put(key.clone(), val.clone()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(key2.clone(), val2.clone()), + SkipPessimisticCheck, + ), ], key.to_raw().unwrap(), 10.into(), @@ -8059,8 +8065,14 @@ mod tests { .sched_txn_command( commands::PrewritePessimistic::new( vec![ - (Mutation::make_put(Key::from_raw(b"d"), b"v".to_vec()), true), - (Mutation::make_put(Key::from_raw(b"e"), b"v".to_vec()), true), + ( + Mutation::make_put(Key::from_raw(b"d"), b"v".to_vec()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(Key::from_raw(b"e"), b"v".to_vec()), + DoPessimisticCheck, + ), ], b"d".to_vec(), 200.into(), @@ -8152,7 +8164,10 @@ mod tests { storage .sched_txn_command( commands::PrewritePessimistic::new( - vec![(Mutation::make_put(key2.clone(), value2.clone()), true)], + vec![( + Mutation::make_put(key2.clone(), value2.clone()), + DoPessimisticCheck, + )], k2.to_vec(), 10.into(), 0, @@ -8197,8 +8212,11 @@ mod tests { .sched_txn_command( commands::PrewritePessimistic::new( vec![ - (Mutation::make_put(key1.clone(), value1), true), - (Mutation::make_put(key2.clone(), value2), false), + (Mutation::make_put(key1.clone(), value1), DoPessimisticCheck), + ( + Mutation::make_put(key2.clone(), value2), + SkipPessimisticCheck, + ), ], k1.to_vec(), 1.into(), @@ -8435,23 +8453,23 @@ mod tests { vec![ ( Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), - true, + DoPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"k3"), b"v2".to_vec()), - true, + DoPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"k4"), b"v4".to_vec()), - true, + DoPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"k5"), b"v5".to_vec()), - true, + DoPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"k6"), b"v6".to_vec()), - true, + DoPessimisticCheck, ), ], b"k1".to_vec(), @@ -9023,7 +9041,10 @@ mod tests { storage .sched_txn_command( commands::PrewritePessimistic::new( - vec![(Mutation::make_put(k1.clone(), b"v".to_vec()), true)], + vec![( + Mutation::make_put(k1.clone(), b"v".to_vec()), + DoPessimisticCheck, + )], b"k1".to_vec(), 10.into(), 3000, @@ -9081,7 +9102,7 @@ mod tests { storage .sched_txn_command( commands::PrewritePessimistic::new( - vec![(Mutation::make_put(k1, b"v".to_vec()), true)], + vec![(Mutation::make_put(k1, b"v".to_vec()), DoPessimisticCheck)], b"k1".to_vec(), 10.into(), 3000, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 7c521bb5952..2a231b42823 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -389,7 +389,7 @@ impl PointGetter { #[cfg(test)] mod tests { use engine_rocks::ReadPerfInstant; - use kvproto::kvrpcpb::{Assertion, AssertionLevel}; + use kvproto::kvrpcpb::{Assertion, AssertionLevel, PrewriteRequestPessimisticAction::*}; use txn_types::SHORT_VALUE_MAX_LEN; use super::*; @@ -929,7 +929,7 @@ mod tests { // // write.start_ts(10) < primary_lock.start_ts(15) < write.commit_ts(20) must_acquire_pessimistic_lock(&engine, key, key, 15, 50); - must_pessimistic_prewrite_delete(&engine, key, key, 15, 50, true); + must_pessimistic_prewrite_delete(&engine, key, key, 15, 50, DoPessimisticCheck); let mut getter = new_point_getter(&engine, TimeStamp::max()); must_get_value(&mut getter, key, val); } @@ -1017,7 +1017,7 @@ mod tests { key, &None, 80.into(), - false, + SkipPessimisticCheck, 100, 80.into(), 1, diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index c45fabe2540..f1ed7748a15 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -640,7 +640,7 @@ pub mod tests { CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ - kvrpcpb::{AssertionLevel, Context}, + kvrpcpb::{AssertionLevel, Context, PrewriteRequestPessimisticAction::*}, metapb::{Peer, Region}, }; use raftstore::store::RegionSnapshot; @@ -749,7 +749,7 @@ pub mod tests { &Self::txn_props(start_ts, pk, false), m, &None, - false, + SkipPessimisticCheck, ) .unwrap(); self.write(txn.into_modifies()); @@ -773,7 +773,7 @@ pub mod tests { &Self::txn_props(start_ts, pk, true), m, &None, - true, + DoPessimisticCheck, ) .unwrap(); self.write(txn.into_modifies()); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index a7a839cf2e7..6bed0289053 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -2029,7 +2029,7 @@ mod latest_entry_tests { #[cfg(test)] mod delta_entry_tests { use engine_traits::{CF_LOCK, CF_WRITE}; - use kvproto::kvrpcpb::Context; + use kvproto::kvrpcpb::{Context, PrewriteRequestPessimisticAction::*}; use txn_types::{is_short_value, SHORT_VALUE_MAX_LEN}; use super::{super::ScannerBuilder, test_util::*, *}; @@ -2486,7 +2486,7 @@ mod delta_entry_tests { key, start_ts, commit_ts - 1, - true, + DoPessimisticCheck, ), WriteType::Delete => must_pessimistic_prewrite_delete( &engine, @@ -2494,7 +2494,7 @@ mod delta_entry_tests { key, start_ts, commit_ts - 1, - true, + DoPessimisticCheck, ), WriteType::Lock => must_pessimistic_prewrite_lock( &engine, @@ -2502,7 +2502,7 @@ mod delta_entry_tests { key, start_ts, commit_ts - 1, - true, + DoPessimisticCheck, ), WriteType::Rollback => must_rollback(&engine, key, start_ts, false), } @@ -2528,14 +2528,24 @@ mod delta_entry_tests { key, ts, for_update_ts, - true, + DoPessimisticCheck, + ), + LockType::Delete => must_pessimistic_prewrite_delete( + &engine, + key, + key, + ts, + for_update_ts, + DoPessimisticCheck, + ), + LockType::Lock => must_pessimistic_prewrite_lock( + &engine, + key, + key, + ts, + for_update_ts, + DoPessimisticCheck, ), - LockType::Delete => { - must_pessimistic_prewrite_delete(&engine, key, key, ts, for_update_ts, true) - } - LockType::Lock => { - must_pessimistic_prewrite_lock(&engine, key, key, ts, for_update_ts, true) - } LockType::Pessimistic => {} } } @@ -2631,12 +2641,12 @@ mod delta_entry_tests { // Generate put for [b] at 15. must_acquire_pessimistic_lock(&engine, b"b", b"b", 9, 15); - must_pessimistic_prewrite_put(&engine, b"b", b"b_15", b"b", 9, 15, true); + must_pessimistic_prewrite_put(&engine, b"b", b"b_15", b"b", 9, 15, DoPessimisticCheck); must_prewrite_put(&engine, b"c", b"c_4", b"c", 4); must_commit(&engine, b"c", 4, 6); must_acquire_pessimistic_lock(&engine, b"c", b"c", 5, 15); - must_pessimistic_prewrite_put(&engine, b"c", b"c_5", b"c", 5, 15, true); + must_pessimistic_prewrite_put(&engine, b"c", b"c_5", b"c", 5, 15, DoPessimisticCheck); must_cleanup(&engine, b"c", 20, 0); let entry_a_1 = EntryBuilder::default() diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index b0a64d83f22..a9032d1b463 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -274,7 +274,7 @@ pub(crate) fn make_txn_error( #[cfg(test)] pub(crate) mod tests { - use kvproto::kvrpcpb::{AssertionLevel, Context}; + use kvproto::kvrpcpb::{AssertionLevel, Context, PrewriteRequestPessimisticAction::*}; use txn_types::{TimeStamp, WriteType, SHORT_VALUE_MAX_LEN}; use super::*; @@ -341,7 +341,7 @@ pub(crate) mod tests { must_commit(&engine, k1, 25, 27); must_acquire_pessimistic_lock(&engine, k1, k1, 23, 29); must_get(&engine, k1, 30, v); - must_pessimistic_prewrite_delete(&engine, k1, k1, 23, 29, true); + must_pessimistic_prewrite_delete(&engine, k1, k1, 23, 29, DoPessimisticCheck); must_get_err(&engine, k1, 30); // should read the latest record when `ts == u64::max_value()` // even if lock.start_ts(23) < latest write.commit_ts(27) @@ -521,8 +521,8 @@ pub(crate) mod tests { must_acquire_pessimistic_lock(&engine, k1, k1, 15, 15); must_acquire_pessimistic_lock(&engine, k2, k1, 15, 17); - must_pessimistic_prewrite_put(&engine, k1, v, k1, 15, 17, true); - must_pessimistic_prewrite_put(&engine, k2, v, k1, 15, 17, true); + must_pessimistic_prewrite_put(&engine, k1, v, k1, 15, 17, DoPessimisticCheck); + must_pessimistic_prewrite_put(&engine, k2, v, k1, 15, 17, DoPessimisticCheck); must_rollback(&engine, k1, 15, false); must_rollback(&engine, k2, 15, false); // The rollback of the primary key should be protected @@ -758,7 +758,7 @@ pub(crate) mod tests { &txn_props(10.into(), pk, CommitKind::TwoPc, None, 0, false), Mutation::make_put(key.clone(), v.to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); assert!(txn.write_size() > 0); @@ -802,7 +802,7 @@ pub(crate) mod tests { &txn_props(5.into(), key, CommitKind::TwoPc, None, 0, false), Mutation::make_put(Key::from_raw(key), value.to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap_err(); @@ -815,7 +815,7 @@ pub(crate) mod tests { &txn_props(5.into(), key, CommitKind::TwoPc, None, 0, true), Mutation::make_put(Key::from_raw(key), value.to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); } @@ -961,7 +961,7 @@ pub(crate) mod tests { // original pessimisitic lock. must_acquire_pessimistic_lock_with_ttl(&engine, k, k, 10, 10, 100); must_pessimistic_locked(&engine, k, 10, 10); - must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 10, 10, true, 110); + must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 10, 10, DoPessimisticCheck, 110); must_locked_with_ttl(&engine, k, 10, 110); must_rollback(&engine, k, 10, false); @@ -970,7 +970,7 @@ pub(crate) mod tests { // the prewrite request. must_acquire_pessimistic_lock_with_ttl(&engine, k, k, 20, 20, 100); must_pessimistic_locked(&engine, k, 20, 20); - must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 20, 20, true, 90); + must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 20, 20, DoPessimisticCheck, 90); must_locked_with_ttl(&engine, k, 20, 100); } @@ -984,7 +984,7 @@ pub(crate) mod tests { must_prewrite_put(&engine, k, v, k, 10); must_commit(&engine, k, 10, 11); must_acquire_pessimistic_lock(&engine, k, k, 5, 12); - must_pessimistic_prewrite_lock(&engine, k, k, 5, 12, true); + must_pessimistic_prewrite_lock(&engine, k, k, 5, 12, DoPessimisticCheck); must_commit(&engine, k, 5, 15); // Now in write cf: @@ -1025,7 +1025,7 @@ pub(crate) mod tests { expected_lock_info.get_primary_lock(), &None, expected_lock_info.get_lock_version().into(), - false, + SkipPessimisticCheck, expected_lock_info.get_lock_ttl(), TimeStamp::zero(), expected_lock_info.get_txn_size(), @@ -1068,7 +1068,7 @@ pub(crate) mod tests { expected_lock_info.set_lock_ttl(0); assert_lock_info_eq( - must_pessimistic_prewrite_put_err(&engine, k, v, k, 40, 40, false), + must_pessimistic_prewrite_put_err(&engine, k, v, k, 40, 40, SkipPessimisticCheck), &expected_lock_info, ); @@ -1095,8 +1095,8 @@ pub(crate) mod tests { must_prewrite_put(&engine, k, v, k, 2); must_locked(&engine, k, 2); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 1, 1, false); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 3, 3, false); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 1, 1, SkipPessimisticCheck); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 3, 3, SkipPessimisticCheck); } #[test] @@ -1117,19 +1117,19 @@ pub(crate) mod tests { must_acquire_pessimistic_lock_err(&engine, k3, k1, 10, 10); // Update for_update_ts to 20 due to write conflict must_acquire_pessimistic_lock(&engine, k3, k1, 10, 20); - must_pessimistic_prewrite_put(&engine, k1, v1, k1, 10, 20, true); - must_pessimistic_prewrite_put(&engine, k3, v3, k1, 10, 20, true); + must_pessimistic_prewrite_put(&engine, k1, v1, k1, 10, 20, DoPessimisticCheck); + must_pessimistic_prewrite_put(&engine, k3, v3, k1, 10, 20, DoPessimisticCheck); // Write a non-pessimistic lock with for_update_ts 20. - must_pessimistic_prewrite_put(&engine, k2, v2, k1, 10, 20, false); + must_pessimistic_prewrite_put(&engine, k2, v2, k1, 10, 20, SkipPessimisticCheck); // Roll back the primary key due to timeout, but the non-pessimistic lock is not // rolled back. must_rollback(&engine, k1, 10, false); // Txn-15 acquires pessimistic locks on k1. must_acquire_pessimistic_lock(&engine, k1, k1, 15, 15); - must_pessimistic_prewrite_put(&engine, k1, v1, k1, 15, 15, true); + must_pessimistic_prewrite_put(&engine, k1, v1, k1, 15, 15, DoPessimisticCheck); // There is a non-pessimistic lock conflict here. - match must_pessimistic_prewrite_put_err(&engine, k2, v2, k1, 15, 15, false) { + match must_pessimistic_prewrite_put_err(&engine, k2, v2, k1, 15, 15, SkipPessimisticCheck) { Error(box ErrorInner::KeyIsLocked(info)) => assert_eq!(info.get_lock_ttl(), 0), e => panic!("unexpected error: {}", e), }; @@ -1166,30 +1166,30 @@ pub(crate) mod tests { // Key not exist; should succeed. fail_to_write_pessimistic_lock(&engine, k, 10, 10); - must_pessimistic_prewrite_put(&engine, k, &v, k, 10, 10, true); + must_pessimistic_prewrite_put(&engine, k, &v, k, 10, 10, DoPessimisticCheck); must_commit(&engine, k, 10, 20); must_get(&engine, k, 20, &v); // for_update_ts(30) >= start_ts(30) > commit_ts(20); should succeed. v.push(0); fail_to_write_pessimistic_lock(&engine, k, 30, 30); - must_pessimistic_prewrite_put(&engine, k, &v, k, 30, 30, true); + must_pessimistic_prewrite_put(&engine, k, &v, k, 30, 30, DoPessimisticCheck); must_commit(&engine, k, 30, 40); must_get(&engine, k, 40, &v); // for_update_ts(40) >= commit_ts(40) > start_ts(35); should fail. fail_to_write_pessimistic_lock(&engine, k, 35, 40); - must_pessimistic_prewrite_put_err(&engine, k, &v, k, 35, 40, true); + must_pessimistic_prewrite_put_err(&engine, k, &v, k, 35, 40, DoPessimisticCheck); // KeyIsLocked; should fail. must_acquire_pessimistic_lock(&engine, k, k, 50, 50); - must_pessimistic_prewrite_put_err(&engine, k, &v, k, 60, 60, true); + must_pessimistic_prewrite_put_err(&engine, k, &v, k, 60, 60, DoPessimisticCheck); pessimistic_rollback::tests::must_success(&engine, k, 50, 50); // The txn has been rolled back; should fail. must_acquire_pessimistic_lock(&engine, k, k, 80, 80); must_cleanup(&engine, k, 80, TimeStamp::max()); - must_pessimistic_prewrite_put_err(&engine, k, &v, k, 80, 80, true); + must_pessimistic_prewrite_put_err(&engine, k, &v, k, 80, 80, DoPessimisticCheck); } #[test] @@ -1219,7 +1219,7 @@ pub(crate) mod tests { ), mutation, &Some(vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()]), - false, + SkipPessimisticCheck, ) .unwrap(); let modifies = txn.into_modifies(); @@ -1277,7 +1277,7 @@ pub(crate) mod tests { ), mutation, &Some(vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()]), - true, + DoPessimisticCheck, ) .unwrap(); let modifies = txn.into_modifies(); @@ -1336,7 +1336,7 @@ pub(crate) mod tests { ), mutation, &Some(vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()]), - true, + DoPessimisticCheck, ) .unwrap(); assert_eq!(min_commit_ts.into_inner(), 100); @@ -1379,7 +1379,7 @@ pub(crate) mod tests { // Pessimistic transaction also works in the same case. must_acquire_pessimistic_lock(&engine, k, k, 50, 50); must_pessimistic_locked(&engine, k, 50, 50); - must_pessimistic_prewrite_put(&engine, k, v, k, 50, 50, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 50, 50, DoPessimisticCheck); must_commit(&engine, k, 50, 60); must_unlocked(&engine, k); must_written(&engine, k, 50, 60, WriteType::Put); @@ -1562,7 +1562,7 @@ pub(crate) mod tests { // T2, start_ts = 20 must_acquire_pessimistic_lock(&engine, k2, k2, 20, 25); - must_pessimistic_prewrite_put(&engine, k2, v2, k2, 20, 25, true); + must_pessimistic_prewrite_put(&engine, k2, v2, k2, 20, 25, DoPessimisticCheck); must_cleanup(&engine, k2, 20, 0); diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 792ed8fcb9a..9df4d9ebce9 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -252,6 +252,8 @@ pub fn acquire_pessimistic_lock( pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::Context; + #[cfg(test)] + use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; use txn_types::TimeStamp; use super::*; @@ -493,7 +495,7 @@ pub mod tests { // Normal must_succeed(&engine, k, k, 1, 1); must_pessimistic_locked(&engine, k, 1, 1); - must_pessimistic_prewrite_put(&engine, k, v, k, 1, 1, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 1, 1, DoPessimisticCheck); must_locked(&engine, k, 1); must_commit(&engine, k, 1, 2); must_unlocked(&engine, k); @@ -516,7 +518,7 @@ pub mod tests { must_prewrite_lock_err(&engine, k, k, 8); must_err(&engine, k, k, 8, 8); must_succeed(&engine, k, k, 8, 9); - must_pessimistic_prewrite_put(&engine, k, v, k, 8, 8, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 8, 8, DoPessimisticCheck); must_commit(&engine, k, 8, 10); must_unlocked(&engine, k); @@ -525,16 +527,16 @@ pub mod tests { must_pessimistic_locked(&engine, k, 11, 11); must_cleanup(&engine, k, 11, 0); must_err(&engine, k, k, 11, 11); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 11, 11, true); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 11, 11, DoPessimisticCheck); must_prewrite_lock_err(&engine, k, k, 11); must_unlocked(&engine, k); must_succeed(&engine, k, k, 12, 12); - must_pessimistic_prewrite_put(&engine, k, v, k, 12, 12, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 12, 12, DoPessimisticCheck); must_locked(&engine, k, 12); must_cleanup(&engine, k, 12, 0); must_err(&engine, k, k, 12, 12); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 12, 12, true); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 12, 12, DoPessimisticCheck); must_prewrite_lock_err(&engine, k, k, 12); must_unlocked(&engine, k); @@ -543,9 +545,9 @@ pub mod tests { must_pessimistic_locked(&engine, k, 13, 13); must_succeed(&engine, k, k, 13, 13); must_pessimistic_locked(&engine, k, 13, 13); - must_pessimistic_prewrite_put(&engine, k, v, k, 13, 13, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 13, 13, DoPessimisticCheck); must_locked(&engine, k, 13); - must_pessimistic_prewrite_put(&engine, k, v, k, 13, 13, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 13, 13, DoPessimisticCheck); must_locked(&engine, k, 13); must_commit(&engine, k, 13, 14); must_unlocked(&engine, k); @@ -556,7 +558,7 @@ pub mod tests { must_succeed(&engine, k, k, 15, 15); must_pessimistic_locked(&engine, k, 15, 15); must_get(&engine, k, 16, v); - must_pessimistic_prewrite_delete(&engine, k, k, 15, 15, true); + must_pessimistic_prewrite_delete(&engine, k, k, 15, 15, DoPessimisticCheck); must_get_err(&engine, k, 16); must_commit(&engine, k, 15, 17); @@ -582,7 +584,7 @@ pub mod tests { // Acquire lock on a prewritten key should fail. must_succeed(&engine, k, k, 26, 26); must_pessimistic_locked(&engine, k, 26, 26); - must_pessimistic_prewrite_delete(&engine, k, k, 26, 26, true); + must_pessimistic_prewrite_delete(&engine, k, k, 26, 26, DoPessimisticCheck); must_locked(&engine, k, 26); must_err(&engine, k, k, 26, 26); must_locked(&engine, k, 26); @@ -595,7 +597,7 @@ pub mod tests { must_unlocked(&engine, k); must_get_none(&engine, k, 28); // Pessimistic prewrite on a committed key should fail. - must_pessimistic_prewrite_put_err(&engine, k, v, k, 26, 26, true); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 26, 26, DoPessimisticCheck); must_unlocked(&engine, k); must_get_none(&engine, k, 28); // Currently we cannot avoid this. @@ -604,7 +606,7 @@ pub mod tests { must_unlocked(&engine, k); // Non pessimistic key in pessimistic transaction. - must_pessimistic_prewrite_put(&engine, k, v, k, 30, 30, false); + must_pessimistic_prewrite_put(&engine, k, v, k, 30, 30, SkipPessimisticCheck); must_locked(&engine, k, 30); must_commit(&engine, k, 30, 31); must_unlocked(&engine, k); @@ -628,13 +630,13 @@ pub mod tests { must_pessimistic_locked(&engine, k, 35, 37); // Cannot prewrite when there is another transaction's pessimistic lock. - must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 36, true); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, true); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 36, DoPessimisticCheck); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, DoPessimisticCheck); must_pessimistic_locked(&engine, k, 35, 37); // Cannot prewrite when there is another transaction's non-pessimistic lock. - must_pessimistic_prewrite_put(&engine, k, v, k, 35, 37, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 35, 37, DoPessimisticCheck); must_locked(&engine, k, 35); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, true); + must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, DoPessimisticCheck); must_locked(&engine, k, 35); // Commit pessimistic transaction's key but with smaller commit_ts than @@ -648,7 +650,7 @@ pub mod tests { // Currently not checked, so prewrite will success. must_succeed(&engine, k, k, 40, 40); must_pessimistic_locked(&engine, k, 40, 40); - must_pessimistic_prewrite_put(&engine, k, v, k, 40, 40, false); + must_pessimistic_prewrite_put(&engine, k, v, k, 40, 40, SkipPessimisticCheck); must_locked(&engine, k, 40); must_commit(&engine, k, 40, 41); must_unlocked(&engine, k); @@ -657,14 +659,14 @@ pub mod tests { // Currently not checked. must_succeed(&engine, k, k, 42, 45); must_pessimistic_locked(&engine, k, 42, 45); - must_pessimistic_prewrite_put(&engine, k, v, k, 42, 43, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 42, 43, DoPessimisticCheck); must_locked(&engine, k, 42); must_commit(&engine, k, 42, 45); must_unlocked(&engine, k); must_succeed(&engine, k, k, 46, 47); must_pessimistic_locked(&engine, k, 46, 47); - must_pessimistic_prewrite_put(&engine, k, v, k, 46, 48, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 46, 48, DoPessimisticCheck); must_locked(&engine, k, 46); must_commit(&engine, k, 46, 50); must_unlocked(&engine, k); @@ -674,7 +676,7 @@ pub mod tests { // Normally non-pessimistic keys in pessimistic transactions are used when we // are sure that there won't be conflicts. So this case is also not checked, and // prewrite will succeeed. - must_pessimistic_prewrite_put(&engine, k, v, k, 47, 48, false); + must_pessimistic_prewrite_put(&engine, k, v, k, 47, 48, SkipPessimisticCheck); must_locked(&engine, k, 47); must_cleanup(&engine, k, 47, 0); must_unlocked(&engine, k); @@ -682,7 +684,7 @@ pub mod tests { // The rollback of the primary key in a pessimistic transaction should be // protected from being collapsed. must_succeed(&engine, k, k, 49, 60); - must_pessimistic_prewrite_put(&engine, k, v, k, 49, 60, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 49, 60, DoPessimisticCheck); must_locked(&engine, k, 49); must_cleanup(&engine, k, 49, 0); must_get_rollback_protected(&engine, k, 49, true); @@ -694,7 +696,7 @@ pub mod tests { // to another write records' commit ts. Now there is a commit record with // commit_ts = 50. must_succeed(&engine, k, k, 50, 61); - must_pessimistic_prewrite_put(&engine, k, v, k, 50, 61, true); + must_pessimistic_prewrite_put(&engine, k, v, k, 50, 61, DoPessimisticCheck); must_locked(&engine, k, 50); must_cleanup(&engine, k, 50, 0); must_get_overlapped_rollback(&engine, k, 50, 46, WriteType::Put, Some(0)); @@ -704,7 +706,15 @@ pub mod tests { let for_update_ts = start_ts + 48; let commit_ts = start_ts + 50; must_succeed(&engine, k, k, *start_ts, for_update_ts); - must_pessimistic_prewrite_put(&engine, k, v, k, *start_ts, for_update_ts, true); + must_pessimistic_prewrite_put( + &engine, + k, + v, + k, + *start_ts, + for_update_ts, + DoPessimisticCheck, + ); must_commit(&engine, k, *start_ts, commit_ts); must_get(&engine, k, commit_ts + 1, v); } @@ -946,13 +956,13 @@ pub mod tests { // Put v1 @ start ts 1, commit ts 2 must_succeed(&engine, k, k, 1, 1); - must_pessimistic_prewrite_put(&engine, k, v1, k, 1, 1, true); + must_pessimistic_prewrite_put(&engine, k, v1, k, 1, 1, DoPessimisticCheck); must_commit(&engine, k, 1, 2); let v2 = b"v2"; // Put v2 @ start ts 10, commit ts 11 must_succeed(&engine, k, k, 10, 10); - must_pessimistic_prewrite_put(&engine, k, v2, k, 10, 10, true); + must_pessimistic_prewrite_put(&engine, k, v2, k, 10, 10, DoPessimisticCheck); must_commit(&engine, k, 10, 11); // Lock @ start ts 9, for update ts 12, commit ts 13 @@ -1079,7 +1089,7 @@ pub mod tests { // T1: start_ts = 3, commit_ts = 5, put key:value must_succeed(&engine, key, key, 3, 3); - must_pessimistic_prewrite_put(&engine, key, value, key, 3, 3, true); + must_pessimistic_prewrite_put(&engine, key, value, key, 3, 3, DoPessimisticCheck); must_commit(&engine, key, 3, 5); // T2: start_ts = 15, acquire pessimistic lock on k, with should_not_exist flag @@ -1114,7 +1124,7 @@ pub mod tests { // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on // k must_succeed(&engine, key, key, 8, 8); - must_pessimistic_prewrite_delete(&engine, key, key, 8, 8, true); + must_pessimistic_prewrite_delete(&engine, key, key, 8, 8, DoPessimisticCheck); must_commit(&engine, key, 8, cm.max_ts().into_inner() + 1); // T1: start_ts = 10, repeatedly acquire pessimistic lock on k, with diff --git a/src/storage/txn/actions/cleanup.rs b/src/storage/txn/actions/cleanup.rs index 461b8e2d432..19cb90f0a22 100644 --- a/src/storage/txn/actions/cleanup.rs +++ b/src/storage/txn/actions/cleanup.rs @@ -82,6 +82,8 @@ pub mod tests { use concurrency_manager::ConcurrencyManager; use engine_traits::CF_WRITE; use kvproto::kvrpcpb::Context; + #[cfg(test)] + use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; use txn_types::TimeStamp; use super::*; @@ -233,7 +235,7 @@ pub mod tests { must_get_rollback_protected(&engine, k, ts(11, 1), true); must_acquire_pessimistic_lock(&engine, k, k, ts(13, 1), ts(14, 1)); - must_pessimistic_prewrite_put(&engine, k, v, k, ts(13, 1), ts(14, 1), true); + must_pessimistic_prewrite_put(&engine, k, v, k, ts(13, 1), ts(14, 1), DoPessimisticCheck); must_succeed(&engine, k, ts(13, 1), ts(120, 0)); must_get_rollback_protected(&engine, k, ts(13, 1), true); } diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 456757285e0..2351e0c3282 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -107,6 +107,8 @@ pub fn commit( pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::Context; + #[cfg(test)] + use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; use txn_types::TimeStamp; use super::*; @@ -275,7 +277,7 @@ pub mod tests { k, &None, ts(60, 0), - true, + DoPessimisticCheck, 50, ts(60, 0), 1, diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index e7ca85c8137..7b562af8b43 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -4,7 +4,10 @@ use std::cmp; use fail::fail_point; -use kvproto::kvrpcpb::{Assertion, AssertionLevel}; +use kvproto::kvrpcpb::{ + Assertion, AssertionLevel, + PrewriteRequestPessimisticAction::{self, *}, +}; use txn_types::{ is_short_value, Key, Mutation, MutationType, OldValue, TimeStamp, Value, Write, WriteType, }; @@ -28,10 +31,10 @@ pub fn prewrite( txn_props: &TransactionProperties<'_>, mutation: Mutation, secondary_keys: &Option>>, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) -> Result<(TimeStamp, OldValue)> { let mut mutation = - PrewriteMutation::from_mutation(mutation, secondary_keys, is_pessimistic_lock, txn_props)?; + PrewriteMutation::from_mutation(mutation, secondary_keys, pessimistic_action, txn_props)?; // Update max_ts for Insert operation to guarantee linearizability and snapshot // isolation @@ -56,8 +59,8 @@ pub fn prewrite( let mut lock_amended = false; let lock_status = match reader.load_lock(&mutation.key)? { - Some(lock) => mutation.check_lock(lock, is_pessimistic_lock)?, - None if is_pessimistic_lock => { + Some(lock) => mutation.check_lock(lock, pessimistic_action)?, + None if matches!(pessimistic_action, DoPessimisticCheck) => { amend_pessimistic_lock(&mutation, reader)?; lock_amended = true; LockStatus::None @@ -228,7 +231,7 @@ struct PrewriteMutation<'a> { mutation_type: MutationType, secondary_keys: &'a Option>>, min_commit_ts: TimeStamp, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, lock_type: Option, lock_ttl: u64, @@ -243,7 +246,7 @@ impl<'a> PrewriteMutation<'a> { fn from_mutation( mutation: Mutation, secondary_keys: &'a Option>>, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, txn_props: &'a TransactionProperties<'a>, ) -> Result> { let should_not_write = mutation.should_not_write(); @@ -265,7 +268,7 @@ impl<'a> PrewriteMutation<'a> { mutation_type, secondary_keys, min_commit_ts: txn_props.min_commit_ts, - is_pessimistic_lock, + pessimistic_action, lock_type, lock_ttl: txn_props.lock_ttl, @@ -291,11 +294,15 @@ impl<'a> PrewriteMutation<'a> { } /// Check whether the current key is locked at any timestamp. - fn check_lock(&mut self, lock: Lock, is_pessimistic_lock: bool) -> Result { + fn check_lock( + &mut self, + lock: Lock, + pessimistic_action: PrewriteRequestPessimisticAction, + ) -> Result { if lock.ts != self.txn_props.start_ts { // Abort on lock belonging to other transaction if // prewrites a pessimistic lock. - if is_pessimistic_lock { + if matches!(pessimistic_action, DoPessimisticCheck) { warn!( "prewrite failed (pessimistic lock not found)"; "start_ts" => self.txn_props.start_ts, @@ -360,7 +367,12 @@ impl<'a> PrewriteMutation<'a> { // Note: PessimisticLockNotFound can happen on a non-pessimistically locked key, // if it is a retrying prewrite request. TransactionKind::Pessimistic(for_update_ts) => { - if commit_ts > for_update_ts { + if let DoConstraintCheck = self.pessimistic_action { + if commit_ts > self.txn_props.start_ts { + MVCC_CONFLICT_COUNTER.prewrite_write_conflict.inc(); + self.write_conflict_error(&write, commit_ts)?; + } + } else if commit_ts > for_update_ts { warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; "key" => %self.key, "start_ts" => self.txn_props.start_ts, @@ -570,10 +582,16 @@ impl<'a> PrewriteMutation<'a> { match &self.txn_props.kind { TransactionKind::Optimistic(s) => *s, TransactionKind::Pessimistic(_) => { - // For non-pessimistic-locked keys, do not skip constraint check when retrying. - // This intents to protect idempotency. - // Ref: https://github.com/tikv/tikv/issues/11187 - self.is_pessimistic_lock || !self.txn_props.is_retry_request + match self.pessimistic_action { + DoPessimisticCheck => true, + // For non-pessimistic-locked keys, do not skip constraint check when retrying. + // This intents to protect idempotency. + // Ref: https://github.com/tikv/tikv/issues/11187 + SkipPessimisticCheck => !self.txn_props.is_retry_request, + // For keys that postpones constraint check to prewrite, do not skip constraint + // check. + PrewriteRequestPessimisticAction::DoConstraintCheck => false, + } } } } @@ -782,7 +800,7 @@ pub mod tests { &props, Mutation::make_insert(Key::from_raw(key), value.to_vec()), &None, - false, + SkipPessimisticCheck, )?; // Insert must be None if the key is not lock, or be Unspecified if the // key is already locked. @@ -813,7 +831,7 @@ pub mod tests { &optimistic_txn_props(pk, ts), Mutation::make_check_not_exists(Key::from_raw(key)), &None, - true, + DoPessimisticCheck, )?; assert_eq!(old_value, OldValue::Unspecified); Ok(()) @@ -835,7 +853,7 @@ pub mod tests { &optimistic_async_props(b"k1", 10.into(), 50.into(), 2, false), Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &Some(vec![b"k2".to_vec()]), - false, + SkipPessimisticCheck, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -848,7 +866,7 @@ pub mod tests { &optimistic_async_props(b"k1", 10.into(), 50.into(), 1, false), Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &Some(vec![]), - false, + SkipPessimisticCheck, ) .unwrap_err(); assert!(matches!( @@ -883,7 +901,7 @@ pub mod tests { &props, Mutation::make_check_not_exists(Key::from_raw(b"k0")), &Some(vec![]), - false, + SkipPessimisticCheck, ) .unwrap(); assert!(min_ts > props.start_ts); @@ -903,7 +921,7 @@ pub mod tests { &props, Mutation::make_check_not_exists(Key::from_raw(b"k0")), &Some(vec![]), - false, + SkipPessimisticCheck, ) .unwrap(); assert_eq!(cm.max_ts(), props.start_ts); @@ -918,7 +936,7 @@ pub mod tests { &optimistic_async_props(b"k1", 10.into(), 50.into(), 2, false), Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &Some(vec![b"k2".to_vec()]), - false, + SkipPessimisticCheck, ) .unwrap(); assert!(min_ts > 42.into()); @@ -941,7 +959,7 @@ pub mod tests { &optimistic_async_props(b"k3", 44.into(), 50.into(), 2, false), mutation.clone(), &Some(vec![b"k4".to_vec()]), - false, + SkipPessimisticCheck, ) .unwrap(); assert!(min_ts > 44.into()); @@ -963,7 +981,7 @@ pub mod tests { &props, mutation.clone(), &Some(vec![b"k6".to_vec()]), - false, + SkipPessimisticCheck, ) .unwrap(); assert!(min_ts > 45.into()); @@ -982,7 +1000,7 @@ pub mod tests { &props, mutation.clone(), &Some(vec![b"k8".to_vec()]), - false, + SkipPessimisticCheck, ) .unwrap(); assert!(min_ts >= 46.into()); @@ -1012,7 +1030,7 @@ pub mod tests { &optimistic_async_props(b"k1", 10.into(), 50.into(), 2, true), Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -1025,7 +1043,7 @@ pub mod tests { &optimistic_async_props(b"k1", 10.into(), 50.into(), 1, true), Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap_err(); assert!(matches!( @@ -1071,7 +1089,7 @@ pub mod tests { }, Mutation::make_check_not_exists(Key::from_raw(key)), &None, - false, + SkipPessimisticCheck, )?; assert_eq!(old_value, OldValue::Unspecified); Ok(()) @@ -1108,7 +1126,7 @@ pub mod tests { &txn_props, Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &Some(vec![b"k2".to_vec()]), - true, + DoPessimisticCheck, ) .unwrap(); // Pessimistic txn skips constraint check, does not read previous write. @@ -1122,7 +1140,7 @@ pub mod tests { &txn_props, Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &Some(vec![]), - true, + DoPessimisticCheck, ) .unwrap_err(); } @@ -1158,7 +1176,7 @@ pub mod tests { &txn_props, Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &None, - true, + DoPessimisticCheck, ) .unwrap(); // Pessimistic txn skips constraint check, does not read previous write. @@ -1172,7 +1190,7 @@ pub mod tests { &txn_props, Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &None, - true, + DoPessimisticCheck, ) .unwrap_err(); } @@ -1278,7 +1296,7 @@ pub mod tests { &txn_props, Mutation::make_check_not_exists(Key::from_raw(key)), &None, - false, + SkipPessimisticCheck, ); if success { let res = res.unwrap(); @@ -1293,7 +1311,7 @@ pub mod tests { &txn_props, Mutation::make_insert(Key::from_raw(key), b"value".to_vec()), &None, - false, + SkipPessimisticCheck, ); if success { let res = res.unwrap(); @@ -1348,7 +1366,7 @@ pub mod tests { &txn_props, Mutation::make_put(key.clone(), b"value".to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); assert_eq!(&old_value, expected_value, "key: {}", key); @@ -1368,7 +1386,7 @@ pub mod tests { &Some(vec![b"k2".to_vec()]), 10, 10, - true, + DoPessimisticCheck, 15, ); must_pessimistic_prewrite_put_async_commit( @@ -1379,7 +1397,7 @@ pub mod tests { &Some(vec![]), 10, 10, - false, + SkipPessimisticCheck, 15, ); @@ -1398,7 +1416,7 @@ pub mod tests { &Some(vec![]), 10, 10, - false, + SkipPessimisticCheck, 0, ); assert!(matches!( @@ -1429,7 +1447,7 @@ pub mod tests { &Some(vec![]), 10, 10, - false, + SkipPessimisticCheck, 0, ); assert!(matches!( @@ -1439,7 +1457,15 @@ pub mod tests { must_unlocked(&engine, b"k2"); let err = must_retry_pessimistic_prewrite_put_err( - &engine, b"k2", b"v2", b"k1", &None, 10, 10, false, 0, + &engine, + b"k2", + b"v2", + b"k1", + &None, + 10, + 10, + SkipPessimisticCheck, + 0, ); assert!(matches!( err, @@ -1451,7 +1477,15 @@ pub mod tests { // Try a different txn start ts (which haven't been successfully committed // before). let err = must_retry_pessimistic_prewrite_put_err( - &engine, b"k2", b"v2", b"k1", &None, 11, 11, false, 0, + &engine, + b"k2", + b"v2", + b"k1", + &None, + 11, + 11, + SkipPessimisticCheck, + 0, ); assert!(matches!( err, @@ -1467,7 +1501,7 @@ pub mod tests { b"k1", &None, 12.into(), - false, + SkipPessimisticCheck, 100, 12.into(), 1, @@ -1490,7 +1524,7 @@ pub mod tests { b"k1", &None, 13.into(), - false, + SkipPessimisticCheck, 100, 55.into(), 1, @@ -1545,7 +1579,7 @@ pub mod tests { &txn_props, Mutation::make_put(Key::from_raw(b"k1"), b"value".to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); assert_eq!( @@ -1599,7 +1633,7 @@ pub mod tests { &txn_props, Mutation::make_insert(Key::from_raw(b"k1"), b"v2".to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -1736,7 +1770,7 @@ pub mod tests { &txn_props, Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), &None, - false, + SkipPessimisticCheck, )?; Ok(old_value) })], @@ -1772,7 +1806,7 @@ pub mod tests { &txn_props, Mutation::make_insert(Key::from_raw(key), b"v2".to_vec()), &None, - false, + SkipPessimisticCheck, )?; Ok(old_value) })], @@ -1786,7 +1820,7 @@ pub mod tests { let prewrite_put = |key: &'_ _, value, ts: u64, - is_pessimistic_lock, + pessimistic_action, for_update_ts: u64, assertion, assertion_level, @@ -1799,7 +1833,7 @@ pub mod tests { key, &None, ts.into(), - is_pessimistic_lock, + pessimistic_action, 100, for_update_ts.into(), 1, @@ -1818,7 +1852,7 @@ pub mod tests { &None, ts, for_update_ts, - is_pessimistic_lock, + pessimistic_action, 0, false, assertion, @@ -1843,7 +1877,7 @@ pub mod tests { &k1, b"v1", 10, - false, + SkipPessimisticCheck, 0, Assertion::NotExist, assertion_level, @@ -1855,7 +1889,7 @@ pub mod tests { &k1, b"v1", 20, - false, + SkipPessimisticCheck, 0, Assertion::Exist, assertion_level, @@ -1868,7 +1902,7 @@ pub mod tests { &k2, b"v2", 10, - true, + DoPessimisticCheck, 11, Assertion::NotExist, assertion_level, @@ -1880,7 +1914,7 @@ pub mod tests { &k2, b"v2", 20, - true, + DoPessimisticCheck, 21, Assertion::Exist, assertion_level, @@ -1894,7 +1928,7 @@ pub mod tests { &k1, b"v1", 30, - false, + SkipPessimisticCheck, 0, Assertion::NotExist, assertion_level, @@ -1904,7 +1938,7 @@ pub mod tests { &k3, b"v3", 30, - false, + SkipPessimisticCheck, 0, Assertion::Exist, assertion_level, @@ -1920,7 +1954,7 @@ pub mod tests { &k2, b"v2", 30, - true, + DoPessimisticCheck, 31, Assertion::NotExist, assertion_level, @@ -1930,7 +1964,7 @@ pub mod tests { &k4, b"v4", 30, - true, + DoPessimisticCheck, 31, Assertion::Exist, assertion_level, @@ -1939,14 +1973,14 @@ pub mod tests { must_rollback(&engine, &k2, 30, true); must_rollback(&engine, &k4, 30, true); - // Pessimistic transaction fail on strict level no matter whether - // `is_pessimistic_lock`. + // Pessimistic transaction fail on strict level no matter what + // `pessimistic_action` is. let pass = assertion_level != AssertionLevel::Strict; prewrite_put( &k1, b"v1", 40, - false, + SkipPessimisticCheck, 41, Assertion::NotExist, assertion_level, @@ -1956,7 +1990,7 @@ pub mod tests { &k3, b"v3", 40, - false, + SkipPessimisticCheck, 41, Assertion::Exist, assertion_level, @@ -1971,7 +2005,7 @@ pub mod tests { &k2, b"v2", 40, - true, + DoPessimisticCheck, 41, Assertion::NotExist, assertion_level, @@ -1981,7 +2015,7 @@ pub mod tests { &k4, b"v4", 40, - true, + DoPessimisticCheck, 41, Assertion::Exist, assertion_level, @@ -2027,4 +2061,39 @@ pub mod tests { test_all_levels(&prepare_delete); test_all_levels(&prepare_gc_fence); } + + #[test] + fn test_deferred_constraint_check() { + let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let key = b"key"; + let key2 = b"key2"; + let value = b"value"; + + // 1. write conflict + must_prewrite_put(&engine, key, value, key, 1); + must_commit(&engine, key, 1, 5); + must_pessimistic_prewrite_insert(&engine, key2, value, key, 3, 3, SkipPessimisticCheck); + let err = + must_pessimistic_prewrite_insert_err(&engine, key, value, key, 3, 3, DoConstraintCheck); + assert!(matches!(err, Error(box ErrorInner::WriteConflict { .. }))); + + // 2. unique constraint fail + must_prewrite_put(&engine, key, value, key, 11); + must_commit(&engine, key, 11, 12); + let err = must_pessimistic_prewrite_insert_err( + &engine, + key, + value, + key, + 13, + 13, + DoConstraintCheck, + ); + assert!(matches!(err, Error(box ErrorInner::AlreadyExist { .. }))); + + // 3. success + must_prewrite_delete(&engine, key, key, 21); + must_commit(&engine, key, 21, 22); + must_pessimistic_prewrite_insert(&engine, key, value, key, 23, 23, DoConstraintCheck); + } } diff --git a/src/storage/txn/actions/tests.rs b/src/storage/txn/actions/tests.rs index e5e4b57054c..523d4b9e8ac 100644 --- a/src/storage/txn/actions/tests.rs +++ b/src/storage/txn/actions/tests.rs @@ -3,7 +3,10 @@ //! This file contains tests and testing tools which affects multiple actions use concurrency_manager::ConcurrencyManager; -use kvproto::kvrpcpb::{Assertion, AssertionLevel, Context}; +use kvproto::kvrpcpb::{ + Assertion, AssertionLevel, Context, + PrewriteRequestPessimisticAction::{self, *}, +}; use prewrite::{prewrite, CommitKind, TransactionKind, TransactionProperties}; use super::*; @@ -20,7 +23,7 @@ pub fn must_prewrite_put_impl( pk: &[u8], secondary_keys: &Option>>, ts: TimeStamp, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, lock_ttl: u64, for_update_ts: TimeStamp, txn_size: u64, @@ -29,6 +32,81 @@ pub fn must_prewrite_put_impl( is_retry_request: bool, assertion: Assertion, assertion_level: AssertionLevel, +) { + must_prewrite_put_impl_with_should_not_exist( + engine, + key, + value, + pk, + secondary_keys, + ts, + pessimistic_action, + lock_ttl, + for_update_ts, + txn_size, + min_commit_ts, + max_commit_ts, + is_retry_request, + assertion, + assertion_level, + false, + ); +} + +pub fn must_prewrite_insert_impl( + engine: &E, + key: &[u8], + value: &[u8], + pk: &[u8], + secondary_keys: &Option>>, + ts: TimeStamp, + pessimistic_action: PrewriteRequestPessimisticAction, + lock_ttl: u64, + for_update_ts: TimeStamp, + txn_size: u64, + min_commit_ts: TimeStamp, + max_commit_ts: TimeStamp, + is_retry_request: bool, + assertion: Assertion, + assertion_level: AssertionLevel, +) { + must_prewrite_put_impl_with_should_not_exist( + engine, + key, + value, + pk, + secondary_keys, + ts, + pessimistic_action, + lock_ttl, + for_update_ts, + txn_size, + min_commit_ts, + max_commit_ts, + is_retry_request, + assertion, + assertion_level, + true, + ); +} + +pub fn must_prewrite_put_impl_with_should_not_exist( + engine: &E, + key: &[u8], + value: &[u8], + pk: &[u8], + secondary_keys: &Option>>, + ts: TimeStamp, + pessimistic_action: PrewriteRequestPessimisticAction, + lock_ttl: u64, + for_update_ts: TimeStamp, + txn_size: u64, + min_commit_ts: TimeStamp, + max_commit_ts: TimeStamp, + is_retry_request: bool, + assertion: Assertion, + assertion_level: AssertionLevel, + should_not_exist: bool, ) { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -36,7 +114,11 @@ pub fn must_prewrite_put_impl( let mut txn = MvccTxn::new(ts, cm); let mut reader = SnapshotReader::new(ts, snapshot, true); - let mutation = Mutation::Put((Key::from_raw(key), value.to_vec()), assertion); + let mutation = if should_not_exist { + Mutation::Insert((Key::from_raw(key), value.to_vec()), assertion) + } else { + Mutation::Put((Key::from_raw(key), value.to_vec()), assertion) + }; let txn_kind = if for_update_ts.is_zero() { TransactionKind::Optimistic(false) } else { @@ -64,7 +146,7 @@ pub fn must_prewrite_put_impl( }, mutation, secondary_keys, - is_pessimistic_lock, + pessimistic_action, ) .unwrap(); write(engine, &ctx, txn.into_modifies()); @@ -84,7 +166,7 @@ pub fn must_prewrite_put( pk, &None, ts.into(), - false, + SkipPessimisticCheck, 0, TimeStamp::default(), 0, @@ -103,7 +185,7 @@ pub fn must_pessimistic_prewrite_put( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) { must_prewrite_put_impl( engine, @@ -112,7 +194,35 @@ pub fn must_pessimistic_prewrite_put( pk, &None, ts.into(), - is_pessimistic_lock, + pessimistic_action, + 0, + for_update_ts.into(), + 0, + TimeStamp::default(), + TimeStamp::default(), + false, + Assertion::None, + AssertionLevel::Off, + ); +} + +pub fn must_pessimistic_prewrite_insert( + engine: &E, + key: &[u8], + value: &[u8], + pk: &[u8], + ts: impl Into, + for_update_ts: impl Into, + pessimistic_action: PrewriteRequestPessimisticAction, +) { + must_prewrite_insert_impl( + engine, + key, + value, + pk, + &None, + ts.into(), + pessimistic_action, 0, for_update_ts.into(), 0, @@ -131,7 +241,7 @@ pub fn must_pessimistic_prewrite_put_with_ttl( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, lock_ttl: u64, ) { must_prewrite_put_impl( @@ -141,7 +251,7 @@ pub fn must_pessimistic_prewrite_put_with_ttl( pk, &None, ts.into(), - is_pessimistic_lock, + pessimistic_action, lock_ttl, for_update_ts.into(), 0, @@ -166,6 +276,11 @@ pub fn must_prewrite_put_for_large_txn( let ts = ts.into(); let min_commit_ts = (ts.into_inner() + 1).into(); let for_update_ts = for_update_ts.into(); + let pessimistic_action = if !for_update_ts.is_zero() { + DoPessimisticCheck + } else { + SkipPessimisticCheck + }; must_prewrite_put_impl( engine, key, @@ -173,7 +288,7 @@ pub fn must_prewrite_put_for_large_txn( pk, &None, ts, - !for_update_ts.is_zero(), + pessimistic_action, lock_ttl, for_update_ts, 0, @@ -202,7 +317,7 @@ pub fn must_prewrite_put_async_commit( pk, secondary_keys, ts.into(), - false, + SkipPessimisticCheck, 100, TimeStamp::default(), 0, @@ -222,7 +337,7 @@ pub fn must_pessimistic_prewrite_put_async_commit( secondary_keys: &Option>>, ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, min_commit_ts: impl Into, ) { assert!(secondary_keys.is_some()); @@ -233,7 +348,7 @@ pub fn must_pessimistic_prewrite_put_async_commit( pk, secondary_keys, ts.into(), - is_pessimistic_lock, + pessimistic_action, 100, for_update_ts.into(), 0, @@ -269,6 +384,7 @@ fn default_txn_props( assertion_level: AssertionLevel::Off, } } + pub fn must_prewrite_put_err_impl( engine: &E, key: &[u8], @@ -277,11 +393,74 @@ pub fn must_prewrite_put_err_impl( secondary_keys: &Option>>, ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, + max_commit_ts: impl Into, + is_retry_request: bool, + assertion: Assertion, + assertion_level: AssertionLevel, +) -> Error { + must_prewrite_put_err_impl_with_should_not_exist( + engine, + key, + value, + pk, + secondary_keys, + ts.into(), + for_update_ts.into(), + pessimistic_action, + max_commit_ts.into(), + is_retry_request, + assertion, + assertion_level, + false, + ) +} + +pub fn must_prewrite_insert_err_impl( + engine: &E, + key: &[u8], + value: &[u8], + pk: &[u8], + secondary_keys: &Option>>, + ts: impl Into, + for_update_ts: impl Into, + pessimistic_action: PrewriteRequestPessimisticAction, + max_commit_ts: impl Into, + is_retry_request: bool, + assertion: Assertion, + assertion_level: AssertionLevel, +) -> Error { + must_prewrite_put_err_impl_with_should_not_exist( + engine, + key, + value, + pk, + secondary_keys, + ts.into(), + for_update_ts.into(), + pessimistic_action, + max_commit_ts.into(), + is_retry_request, + assertion, + assertion_level, + true, + ) +} + +pub fn must_prewrite_put_err_impl_with_should_not_exist( + engine: &E, + key: &[u8], + value: &[u8], + pk: &[u8], + secondary_keys: &Option>>, + ts: impl Into, + for_update_ts: impl Into, + pessimistic_action: PrewriteRequestPessimisticAction, max_commit_ts: impl Into, is_retry_request: bool, assertion: Assertion, assertion_level: AssertionLevel, + should_not_exist: bool, ) -> Error { let snapshot = engine.snapshot(Default::default()).unwrap(); let for_update_ts = for_update_ts.into(); @@ -289,7 +468,11 @@ pub fn must_prewrite_put_err_impl( let ts = ts.into(); let mut txn = MvccTxn::new(ts, cm); let mut reader = SnapshotReader::new(ts, snapshot, true); - let mutation = Mutation::Put((Key::from_raw(key), value.to_vec()), assertion); + let mutation = if should_not_exist { + Mutation::Insert((Key::from_raw(key), value.to_vec()), assertion) + } else { + Mutation::Put((Key::from_raw(key), value.to_vec()), assertion) + }; let commit_kind = if secondary_keys.is_some() { CommitKind::Async(max_commit_ts.into()) } else { @@ -306,7 +489,7 @@ pub fn must_prewrite_put_err_impl( &props, mutation, &None, - is_pessimistic_lock, + pessimistic_action, ) .unwrap_err() } @@ -326,7 +509,7 @@ pub fn must_prewrite_put_err( &None, ts, TimeStamp::zero(), - false, + SkipPessimisticCheck, 0, false, Assertion::None, @@ -341,7 +524,7 @@ pub fn must_pessimistic_prewrite_put_err( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) -> Error { must_prewrite_put_err_impl( engine, @@ -351,7 +534,32 @@ pub fn must_pessimistic_prewrite_put_err( &None, ts, for_update_ts, - is_pessimistic_lock, + pessimistic_action, + 0, + false, + Assertion::None, + AssertionLevel::Off, + ) +} + +pub fn must_pessimistic_prewrite_insert_err( + engine: &E, + key: &[u8], + value: &[u8], + pk: &[u8], + ts: impl Into, + for_update_ts: impl Into, + pessimistic_action: PrewriteRequestPessimisticAction, +) -> Error { + must_prewrite_insert_err_impl( + engine, + key, + value, + pk, + &None, + ts, + for_update_ts, + pessimistic_action, 0, false, Assertion::None, @@ -367,7 +575,7 @@ pub fn must_retry_pessimistic_prewrite_put_err( secondary_keys: &Option>>, ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, max_commit_ts: impl Into, ) -> Error { must_prewrite_put_err_impl( @@ -378,7 +586,7 @@ pub fn must_retry_pessimistic_prewrite_put_err( secondary_keys, ts, for_update_ts, - is_pessimistic_lock, + pessimistic_action, max_commit_ts, true, Assertion::None, @@ -392,7 +600,7 @@ fn must_prewrite_delete_impl( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -409,7 +617,7 @@ fn must_prewrite_delete_impl( &default_txn_props(ts, pk, for_update_ts), mutation, &None, - is_pessimistic_lock, + pessimistic_action, ) .unwrap(); @@ -424,7 +632,7 @@ pub fn must_prewrite_delete( pk: &[u8], ts: impl Into, ) { - must_prewrite_delete_impl(engine, key, pk, ts, TimeStamp::zero(), false); + must_prewrite_delete_impl(engine, key, pk, ts, TimeStamp::zero(), SkipPessimisticCheck); } pub fn must_pessimistic_prewrite_delete( @@ -433,9 +641,9 @@ pub fn must_pessimistic_prewrite_delete( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) { - must_prewrite_delete_impl(engine, key, pk, ts, for_update_ts, is_pessimistic_lock); + must_prewrite_delete_impl(engine, key, pk, ts, for_update_ts, pessimistic_action); } fn must_prewrite_lock_impl( @@ -444,7 +652,7 @@ fn must_prewrite_lock_impl( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -461,7 +669,7 @@ fn must_prewrite_lock_impl( &default_txn_props(ts, pk, for_update_ts), mutation, &None, - is_pessimistic_lock, + pessimistic_action, ) .unwrap(); @@ -471,7 +679,7 @@ fn must_prewrite_lock_impl( } pub fn must_prewrite_lock(engine: &E, key: &[u8], pk: &[u8], ts: impl Into) { - must_prewrite_lock_impl(engine, key, pk, ts, TimeStamp::zero(), false); + must_prewrite_lock_impl(engine, key, pk, ts, TimeStamp::zero(), SkipPessimisticCheck); } pub fn must_prewrite_lock_err( @@ -492,7 +700,7 @@ pub fn must_prewrite_lock_err( &default_txn_props(ts, pk, TimeStamp::zero()), Mutation::make_lock(Key::from_raw(key)), &None, - false, + SkipPessimisticCheck, ) .unwrap_err(); } @@ -503,9 +711,9 @@ pub fn must_pessimistic_prewrite_lock( pk: &[u8], ts: impl Into, for_update_ts: impl Into, - is_pessimistic_lock: bool, + pessimistic_action: PrewriteRequestPessimisticAction, ) { - must_prewrite_lock_impl(engine, key, pk, ts, for_update_ts, is_pessimistic_lock); + must_prewrite_lock_impl(engine, key, pk, ts, for_update_ts, pessimistic_action); } pub fn must_rollback( diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 7fd4a45ff8a..24f69e9a237 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -147,7 +147,7 @@ impl WriteCommand for CheckTxnStatus { #[cfg(test)] pub mod tests { use concurrency_manager::ConcurrencyManager; - use kvproto::kvrpcpb::Context; + use kvproto::kvrpcpb::{Context, PrewriteRequestPessimisticAction::*}; use tikv_util::deadline::Deadline; use txn_types::{Key, WriteType}; @@ -388,7 +388,7 @@ pub mod tests { &Some(vec![]), 15, 16, - true, + DoPessimisticCheck, 17, ); // All following check_txn_status should return the unchanged lock information @@ -491,7 +491,7 @@ pub mod tests { &Some(vec![]), 20, 25, - true, + DoPessimisticCheck, 28, ); // the client must call check_txn_status with caller_start_ts == current_ts == @@ -520,7 +520,7 @@ pub mod tests { &Some(vec![]), 30, 35, - true, + DoPessimisticCheck, 36, ); // the client must call check_txn_status with caller_start_ts == current_ts == @@ -791,7 +791,7 @@ pub mod tests { must_large_txn_locked(&engine, k, ts(4, 0), 200, ts(135, 1), true); // Commit the key. - must_pessimistic_prewrite_put(&engine, k, v, k, ts(4, 0), ts(130, 0), true); + must_pessimistic_prewrite_put(&engine, k, v, k, ts(4, 0), ts(130, 0), DoPessimisticCheck); must_commit(&engine, k, ts(4, 0), ts(140, 0)); must_unlocked(&engine, k); must_get_commit_ts(&engine, k, ts(4, 0), ts(140, 0)); @@ -940,7 +940,7 @@ pub mod tests { k, &None, ts(300, 0), - false, + SkipPessimisticCheck, 100, TimeStamp::zero(), 1, @@ -1069,7 +1069,7 @@ pub mod tests { k, &None, ts(30, 0), - false, + SkipPessimisticCheck, 10, TimeStamp::zero(), 1, diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 7f748c352f7..3dc1a37697e 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -162,12 +162,12 @@ impl From for TypedCommand { req.take_context(), ) } else { - let is_pessimistic_lock = req.take_is_pessimistic_lock(); + let pessimistic_actions = req.take_pessimistic_actions(); let mutations = req .take_mutations() .into_iter() .map(Into::into) - .zip(is_pessimistic_lock.into_iter()) + .zip(pessimistic_actions) .collect(); PrewritePessimistic::new( mutations, @@ -803,7 +803,7 @@ pub mod test_util { pub fn pessimistic_prewrite( engine: &E, statistics: &mut Statistics, - mutations: Vec<(Mutation, bool)>, + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, primary: Vec, start_ts: u64, for_update_ts: u64, @@ -826,7 +826,7 @@ pub mod test_util { engine: &E, cm: ConcurrencyManager, statistics: &mut Statistics, - mutations: Vec<(Mutation, bool)>, + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, primary: Vec, start_ts: u64, for_update_ts: u64, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index a6aa8af6f87..deca5733eb0 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -9,7 +9,10 @@ use std::mem; use engine_traits::CF_WRITE; -use kvproto::kvrpcpb::{AssertionLevel, ExtraOp}; +use kvproto::kvrpcpb::{ + AssertionLevel, ExtraOp, + PrewriteRequestPessimisticAction::{self, *}, +}; use tikv_kv::SnapshotExt; use txn_types::{Key, Mutation, OldValue, OldValues, TimeStamp, TxnExtra, Write, WriteType}; @@ -254,7 +257,7 @@ command! { cmd_ty => PrewriteResult, content => { /// The set of mutations to apply; the bool = is pessimistic lock. - mutations: Vec<(Mutation, bool)>, + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, /// The primary lock. Secondary locks (from `mutations`) will refer to the primary lock. primary: Vec, /// The transaction timestamp. @@ -308,7 +311,7 @@ impl std::fmt::Debug for PrewritePessimistic { impl PrewritePessimistic { #[cfg(test)] pub fn with_defaults( - mutations: Vec<(Mutation, bool)>, + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, primary: Vec, start_ts: TimeStamp, for_update_ts: TimeStamp, @@ -331,7 +334,7 @@ impl PrewritePessimistic { #[cfg(test)] pub fn with_1pc( - mutations: Vec<(Mutation, bool)>, + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, primary: Vec, start_ts: TimeStamp, for_update_ts: TimeStamp, @@ -549,7 +552,7 @@ impl Prewriter { let mut assertion_failure = None; for m in mem::take(&mut self.mutations) { - let is_pessimistic_lock = m.is_pessimistic_lock(); + let pessimistic_action = m.pessimistic_action(); let m = m.into_mutation(); let key = m.key().clone(); let mutation_type = m.mutation_type(); @@ -560,8 +563,7 @@ impl Prewriter { } let need_min_commit_ts = secondaries.is_some() || self.try_one_pc; - let prewrite_result = - prewrite(txn, reader, &props, m, secondaries, is_pessimistic_lock); + let prewrite_result = prewrite(txn, reader, &props, m, secondaries, pessimistic_action); match prewrite_result { Ok((ts, old_value)) if !(need_min_commit_ts && ts.is_zero()) => { if need_min_commit_ts && final_min_commit_ts < ts { @@ -781,7 +783,7 @@ struct Pessimistic { } impl PrewriteKind for Pessimistic { - type Mutation = (Mutation, bool); + type Mutation = (Mutation, PrewriteRequestPessimisticAction); fn txn_kind(&self) -> TransactionKind { TransactionKind::Pessimistic(self.for_update_ts) @@ -791,16 +793,17 @@ impl PrewriteKind for Pessimistic { /// The type of mutation and, optionally, its extra information, differing for /// the optimistic and pessimistic transaction. /// For optimistic txns, this is `Mutation`. -/// For pessimistic txns, this is `(Mutation, bool)`, where the bool indicates -/// whether the mutation takes a pessimistic lock or not. +/// For pessimistic txns, this is `(Mutation, PessimisticAction)`, where the +/// action indicates what kind of operations(checks) need to be performed. +/// The action also implies the type of the lock status. trait MutationLock { - fn is_pessimistic_lock(&self) -> bool; + fn pessimistic_action(&self) -> PrewriteRequestPessimisticAction; fn into_mutation(self) -> Mutation; } impl MutationLock for Mutation { - fn is_pessimistic_lock(&self) -> bool { - false + fn pessimistic_action(&self) -> PrewriteRequestPessimisticAction { + SkipPessimisticCheck } fn into_mutation(self) -> Mutation { @@ -808,8 +811,8 @@ impl MutationLock for Mutation { } } -impl MutationLock for (Mutation, bool) { - fn is_pessimistic_lock(&self) -> bool { +impl MutationLock for (Mutation, PrewriteRequestPessimisticAction) { + fn pessimistic_action(&self) -> PrewriteRequestPessimisticAction { self.1 } @@ -1185,7 +1188,10 @@ mod tests { must_acquire_pessimistic_lock(&engine, key, key, 10, 10); - let mutations = vec![(Mutation::make_put(Key::from_raw(key), value.to_vec()), true)]; + let mutations = vec![( + Mutation::make_put(Key::from_raw(key), value.to_vec()), + DoPessimisticCheck, + )]; let mut statistics = Statistics::default(); pessimistic_prewrite_with_cm( &engine, @@ -1209,8 +1215,14 @@ mod tests { must_acquire_pessimistic_lock(&engine, k1, k1, 8, 12); let mutations = vec![ - (Mutation::make_put(Key::from_raw(k1), v1.to_vec()), true), - (Mutation::make_put(Key::from_raw(k2), v2.to_vec()), false), + ( + Mutation::make_put(Key::from_raw(k1), v1.to_vec()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(Key::from_raw(k2), v2.to_vec()), + SkipPessimisticCheck, + ), ]; statistics = Statistics::default(); pessimistic_prewrite_with_cm( @@ -1235,7 +1247,10 @@ mod tests { cm.update_max_ts(50.into()); must_acquire_pessimistic_lock(&engine, k1, k1, 20, 20); - let mutations = vec![(Mutation::make_put(Key::from_raw(k1), v1.to_vec()), true)]; + let mutations = vec![( + Mutation::make_put(Key::from_raw(k1), v1.to_vec()), + DoPessimisticCheck, + )]; statistics = Statistics::default(); let res = pessimistic_prewrite_with_cm( &engine, @@ -1272,8 +1287,14 @@ mod tests { .unwrap(); // Try 1PC on the two keys and it will fail on the second one. let mutations = vec![ - (Mutation::make_put(Key::from_raw(k1), v1.to_vec()), true), - (Mutation::make_put(Key::from_raw(k2), v2.to_vec()), false), + ( + Mutation::make_put(Key::from_raw(k1), v1.to_vec()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(Key::from_raw(k2), v2.to_vec()), + SkipPessimisticCheck, + ), ]; must_acquire_pessimistic_lock(&engine, k1, k1, 60, 60); pessimistic_prewrite_with_cm( @@ -1369,7 +1390,10 @@ mod tests { must_acquire_pessimistic_lock(&engine, key, key, 10, 10); - let mutations = vec![(Mutation::make_put(Key::from_raw(key), value.to_vec()), true)]; + let mutations = vec![( + Mutation::make_put(Key::from_raw(key), value.to_vec()), + DoPessimisticCheck, + )]; let mut statistics = Statistics::default(); let cmd = super::PrewritePessimistic::new( mutations, @@ -1400,8 +1424,14 @@ mod tests { must_acquire_pessimistic_lock(&engine, k2, k1, 20, 20); let mutations = vec![ - (Mutation::make_put(Key::from_raw(k1), v1.to_vec()), true), - (Mutation::make_put(Key::from_raw(k2), v2.to_vec()), true), + ( + Mutation::make_put(Key::from_raw(k1), v1.to_vec()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(Key::from_raw(k2), v2.to_vec()), + DoPessimisticCheck, + ), ]; let mut statistics = Statistics::default(); // calculated_ts > max_commit_ts @@ -1605,7 +1635,10 @@ mod tests { }; let cmd = if case.pessimistic { PrewritePessimistic::new( - mutations.iter().map(|it| (it.clone(), false)).collect(), + mutations + .iter() + .map(|it| (it.clone(), SkipPessimisticCheck)) + .collect(), keys[0].to_vec(), start_ts, 0, @@ -1813,7 +1846,7 @@ mod tests { &Some(vec![]), 5, 5, - true, + DoPessimisticCheck, 10, ); must_commit(&engine, key, 5, 10); @@ -1821,7 +1854,10 @@ mod tests { // T2: start_ts = 15, commit_ts = 16, 1PC must_acquire_pessimistic_lock(&engine, key, key, 15, 15); let cmd = PrewritePessimistic::with_1pc( - vec![(Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), true)], + vec![( + Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), + DoPessimisticCheck, + )], key.to_vec(), 15.into(), 15.into(), @@ -1836,7 +1872,10 @@ mod tests { // Repeating the T1 prewrite request let cmd = PrewritePessimistic::new( - vec![(Mutation::make_put(Key::from_raw(key), b"v1".to_vec()), true)], + vec![( + Mutation::make_put(Key::from_raw(key), b"v1".to_vec()), + DoPessimisticCheck, + )], key.to_vec(), 5.into(), 200, @@ -1871,7 +1910,10 @@ mod tests { // Repeating the T2 prewrite request let cmd = PrewritePessimistic::with_1pc( - vec![(Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), true)], + vec![( + Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), + DoPessimisticCheck, + )], key.to_vec(), 15.into(), 15.into(), @@ -1909,11 +1951,11 @@ mod tests { let mutations = vec![ ( Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), - false, + SkipPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), - true, + DoPessimisticCheck, ), ]; let res = pessimistic_prewrite_with_cm( @@ -1960,13 +2002,13 @@ mod tests { pk: &[u8], secondary_keys, ts: u64, - is_pessimistic_lock, + pessimistic_action, is_retry_request| { let mutation = Mutation::make_put(Key::from_raw(key), value.to_vec()); let mut ctx = Context::default(); ctx.set_is_retry_request(is_retry_request); let cmd = PrewritePessimistic::new( - vec![(mutation, is_pessimistic_lock)], + vec![(mutation, pessimistic_action)], pk.to_vec(), ts.into(), 100, @@ -1991,7 +2033,7 @@ mod tests { &Some(vec![b"k2".to_vec()]), 10, 10, - true, + DoPessimisticCheck, 15, ); must_pessimistic_prewrite_put_async_commit( @@ -2002,7 +2044,7 @@ mod tests { &Some(vec![]), 10, 10, - false, + SkipPessimisticCheck, 15, ); @@ -2011,7 +2053,16 @@ mod tests { must_commit(&engine, b"k2", 10, 20); // This is a re-sent prewrite. - prewrite_with_retry_flag(b"k2", b"v2", b"k1", Some(vec![]), 10, false, true).unwrap(); + prewrite_with_retry_flag( + b"k2", + b"v2", + b"k1", + Some(vec![]), + 10, + SkipPessimisticCheck, + true, + ) + .unwrap(); // Commit repeatedly, these operations should have no effect. must_commit(&engine, b"k1", 10, 25); must_commit(&engine, b"k2", 10, 25); @@ -2029,16 +2080,28 @@ mod tests { // A retrying non-pessimistic-lock prewrite request should not skip constraint // checks. Here it should take no effect, even there's already a newer version // after it. (No matter if it's async commit). - prewrite_with_retry_flag(b"k2", b"v2", b"k1", Some(vec![]), 10, false, true).unwrap(); + prewrite_with_retry_flag( + b"k2", + b"v2", + b"k1", + Some(vec![]), + 10, + SkipPessimisticCheck, + true, + ) + .unwrap(); must_unlocked(&engine, b"k2"); - prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, false, true).unwrap(); + prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, SkipPessimisticCheck, true) + .unwrap(); must_unlocked(&engine, b"k2"); // Committing still does nothing. must_commit(&engine, b"k2", 10, 25); // Try a different txn start ts (which haven't been successfully committed // before). It should report a PessimisticLockNotFound. - let err = prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 11, false, true).unwrap_err(); + let err = + prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 11, SkipPessimisticCheck, true) + .unwrap_err(); assert!(matches!( err, Error(box ErrorInner::Mvcc(MvccError( @@ -2048,7 +2111,8 @@ mod tests { must_unlocked(&engine, b"k2"); // However conflict still won't be checked if there's a non-retry request // arriving. - prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, false, false).unwrap(); + prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, SkipPessimisticCheck, false) + .unwrap(); must_locked(&engine, b"k2", 10); } @@ -2096,7 +2160,10 @@ mod tests { must_rollback(&engine, k1, 10, true); must_acquire_pessimistic_lock(&engine, k1, v1, 15, 15); let prewrite_cmd = PrewritePessimistic::with_defaults( - vec![(Mutation::make_put(Key::from_raw(k1), v1.to_vec()), true)], + vec![( + Mutation::make_put(Key::from_raw(k1), v1.to_vec()), + DoPessimisticCheck, + )], k1.to_vec(), 10.into(), 10.into(), @@ -2149,7 +2216,7 @@ mod tests { b"row", &None, t2_start_ts, - true, + DoPessimisticCheck, 1000, t2_start_ts, 1, @@ -2166,7 +2233,7 @@ mod tests { b"row", &None, t2_start_ts, - false, + SkipPessimisticCheck, 1000, t2_start_ts, 1, @@ -2188,11 +2255,11 @@ mod tests { vec![ ( Mutation::make_put(Key::from_raw(b"row"), b"value".to_vec()), - true, + DoPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"index"), b"value".to_vec()), - false, + SkipPessimisticCheck, ), ], b"row".to_vec(), @@ -2211,11 +2278,11 @@ mod tests { vec![ ( Mutation::make_put(Key::from_raw(b"index"), b"value".to_vec()), - false, + SkipPessimisticCheck, ), ( Mutation::make_put(Key::from_raw(b"row"), b"value".to_vec()), - true, + DoPessimisticCheck, ), ], b"row".to_vec(), @@ -2240,7 +2307,7 @@ mod tests { &None, t1_start_ts, t1_start_ts, - true, + DoPessimisticCheck, 0, false, Assertion::NotExist, @@ -2258,7 +2325,7 @@ mod tests { &None, t1_start_ts, t1_start_ts, - false, + SkipPessimisticCheck, 0, false, Assertion::NotExist, @@ -2335,7 +2402,7 @@ mod tests { &Some(vec![b"k2".to_vec()]), 5, 10, - true, + DoPessimisticCheck, 15, ); must_prewrite_put_impl( @@ -2345,7 +2412,7 @@ mod tests { b"k1", &Some(vec![]), 5.into(), - false, + SkipPessimisticCheck, 100, 10.into(), 1, @@ -2365,7 +2432,7 @@ mod tests { // (is_retry_request flag is not set, here we don't rely on it.) let mutation = Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()); let cmd = PrewritePessimistic::new( - vec![(mutation, false)], + vec![(mutation, SkipPessimisticCheck)], b"k1".to_vec(), 5.into(), 100, diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index ad22e966590..7e93e77dee6 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -75,6 +75,8 @@ impl WriteCommand for Rollback { #[cfg(test)] mod tests { + use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; + use crate::storage::{txn::tests::*, TestEngineBuilder}; #[test] @@ -87,7 +89,7 @@ mod tests { must_rollback(&engine, k1, 10, false); must_rollback(&engine, k2, 10, false); - must_pessimistic_prewrite_put(&engine, k2, v, k1, 10, 10, false); + must_pessimistic_prewrite_put(&engine, k2, v, k1, 10, 10, SkipPessimisticCheck); must_rollback(&engine, k2, 10, false); } } diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 0cd6c5b173b..2af968c21be 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -636,7 +636,7 @@ mod tests { use concurrency_manager::ConcurrencyManager; use engine_traits::{CfName, IterOptions, ReadOptions}; - use kvproto::kvrpcpb::{AssertionLevel, Context}; + use kvproto::kvrpcpb::{AssertionLevel, Context, PrewriteRequestPessimisticAction::*}; use tikv_kv::DummySnapshotExt; use super::*; @@ -708,7 +708,7 @@ mod tests { }, Mutation::make_put(Key::from_raw(key), key.to_vec()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index e982465c621..f88533171c3 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -2,7 +2,7 @@ use concurrency_manager::ConcurrencyManager; use criterion::{black_box, BatchSize, Bencher, Criterion}; -use kvproto::kvrpcpb::{AssertionLevel, Context}; +use kvproto::kvrpcpb::{AssertionLevel, Context, PrewriteRequestPessimisticAction::*}; use test_util::KvGenerator; use tikv::storage::{ kv::{Engine, WriteData}, @@ -54,7 +54,7 @@ where &txn_props, Mutation::make_put(Key::from_raw(k), v.clone()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); } @@ -98,7 +98,15 @@ fn mvcc_prewrite>(b: &mut Bencher<'_>, config: &B is_retry_request: false, assertion_level: AssertionLevel::Off, }; - prewrite(&mut txn, &mut reader, &txn_props, mutation, &None, false).unwrap(); + prewrite( + &mut txn, + &mut reader, + &txn_props, + mutation, + &None, + SkipPessimisticCheck, + ) + .unwrap(); } }, BatchSize::SmallInput, diff --git a/tests/benches/hierarchy/txn/mod.rs b/tests/benches/hierarchy/txn/mod.rs index 723d0eb3745..840d4ac81fa 100644 --- a/tests/benches/hierarchy/txn/mod.rs +++ b/tests/benches/hierarchy/txn/mod.rs @@ -2,7 +2,7 @@ use concurrency_manager::ConcurrencyManager; use criterion::{black_box, BatchSize, Bencher, Criterion}; -use kvproto::kvrpcpb::{AssertionLevel, Context}; +use kvproto::kvrpcpb::{AssertionLevel, Context, PrewriteRequestPessimisticAction::*}; use test_util::KvGenerator; use tikv::storage::{ kv::{Engine, WriteData}, @@ -50,7 +50,7 @@ where &txn_props, Mutation::make_put(Key::from_raw(k), v.clone()), &None, - false, + SkipPessimisticCheck, ) .unwrap(); } @@ -91,7 +91,15 @@ fn txn_prewrite>(b: &mut Bencher<'_>, config: &Be is_retry_request: false, assertion_level: AssertionLevel::Off, }; - prewrite(&mut txn, &mut reader, &txn_props, mutation, &None, false).unwrap(); + prewrite( + &mut txn, + &mut reader, + &txn_props, + mutation, + &None, + SkipPessimisticCheck, + ) + .unwrap(); let write_data = WriteData::from_modifies(txn.into_modifies()); black_box(engine.write(&ctx, write_data)).unwrap(); } diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 32bd2f05228..c602fc6e4f7 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -12,7 +12,7 @@ use std::{ use engine_traits::{Peekable, CF_RAFT}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ - kvrpcpb::*, + kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, raft_serverpb::{PeerState, RaftMessage, RegionLocalState}, tikvpb::TikvClient, }; @@ -1450,7 +1450,7 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { let mut req = PrewriteRequest::default(); req.set_context(cluster.get_ctx(b"k0")); req.set_mutations(vec![mutation].into()); - req.set_is_pessimistic_lock(vec![true]); + req.set_pessimistic_actions(vec![DoPessimisticCheck]); req.set_start_version(10); req.set_for_update_ts(40); req.set_primary_lock(b"k0".to_vec()); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index bf23267a06a..9ed57b94091 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -13,7 +13,9 @@ use collections::HashMap; use engine_traits::CF_WRITE; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ - kvrpcpb::{Mutation, Op, PessimisticLockRequest, PrewriteRequest}, + kvrpcpb::{ + Mutation, Op, PessimisticLockRequest, PrewriteRequest, PrewriteRequestPessimisticAction::*, + }, metapb::Region, raft_serverpb::RaftMessage, tikvpb::TikvClient, @@ -966,7 +968,7 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { let mut req = PrewriteRequest::default(); req.set_context(cluster.get_ctx(b"a")); req.set_mutations(vec![mutation].into()); - req.set_is_pessimistic_lock(vec![true]); + req.set_pessimistic_actions(vec![DoPessimisticCheck]); req.set_start_version(10); req.set_for_update_ts(commit_ts + 20); req.set_primary_lock(b"a".to_vec()); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 40ba7297b7c..7b92cc7065e 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -19,7 +19,7 @@ use grpcio::*; use kvproto::{ kvrpcpb::{ self, AssertionLevel, BatchRollbackRequest, CommandPri, CommitRequest, Context, GetRequest, - Op, PrewriteRequest, RawPutRequest, + Op, PrewriteRequest, PrewriteRequestPessimisticAction::*, RawPutRequest, }, tikvpb::TikvClient, }; @@ -398,7 +398,10 @@ fn test_pipelined_pessimistic_lock() { storage .sched_txn_command( commands::PrewritePessimistic::new( - vec![(Mutation::make_put(key.clone(), val.clone()), true)], + vec![( + Mutation::make_put(key.clone(), val.clone()), + DoPessimisticCheck, + )], key.to_raw().unwrap(), 10.into(), 3000, @@ -571,7 +574,7 @@ fn test_async_commit_prewrite_with_stale_max_ts() { commands::PrewritePessimistic::new( vec![( Mutation::make_put(Key::from_raw(b"k1"), b"v".to_vec()), - true, + DoPessimisticCheck, )], b"k1".to_vec(), 10.into(), @@ -705,7 +708,11 @@ fn test_async_apply_prewrite_impl( commands::PrewritePessimistic::new( vec![( Mutation::make_put(Key::from_raw(key), value.to_vec()), - need_lock, + if need_lock { + DoPessimisticCheck + } else { + SkipPessimisticCheck + }, )], key.to_vec(), start_ts, @@ -1036,7 +1043,10 @@ fn test_async_apply_prewrite_1pc_impl( storage .sched_txn_command( commands::PrewritePessimistic::new( - vec![(Mutation::make_put(Key::from_raw(key), value.to_vec()), true)], + vec![( + Mutation::make_put(Key::from_raw(key), value.to_vec()), + DoPessimisticCheck, + )], key.to_vec(), start_ts, 0, diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index de19d1a790c..cd5bec990c8 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -12,7 +12,10 @@ use std::{ use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ - kvrpcpb::{self as pb, AssertionLevel, Context, Op, PessimisticLockRequest, PrewriteRequest}, + kvrpcpb::{ + self as pb, AssertionLevel, Context, Op, PessimisticLockRequest, PrewriteRequest, + PrewriteRequestPessimisticAction::*, + }, tikvpb::TikvClient, }; use raftstore::store::{util::new_peer, LocksStatus}; @@ -53,10 +56,10 @@ fn test_txn_failpoints() { let (k2, v2) = (b"k2", b"v2"); must_acquire_pessimistic_lock(&engine, k, k, 30, 30); fail::cfg("pessimistic_prewrite", "return()").unwrap(); - must_pessimistic_prewrite_put_err(&engine, k, v1, k, 30, 30, true); + must_pessimistic_prewrite_put_err(&engine, k, v1, k, 30, 30, DoPessimisticCheck); must_prewrite_put(&engine, k2, v2, k2, 31); fail::remove("pessimistic_prewrite"); - must_pessimistic_prewrite_put(&engine, k, v1, k, 30, 30, true); + must_pessimistic_prewrite_put(&engine, k, v1, k, 30, 30, DoPessimisticCheck); must_commit(&engine, k, 30, 40); must_commit(&engine, k2, 31, 41); must_get(&engine, k, 50, v1); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 8095ebdf2ca..9a946a806bc 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -19,7 +19,7 @@ use grpcio_health::{proto::HealthCheckRequest, *}; use kvproto::{ coprocessor::*, debugpb, - kvrpcpb::{self, *}, + kvrpcpb::{self, PrewriteRequestPessimisticAction::*, *}, metapb, raft_serverpb, raft_serverpb::*, tikvpb::*, @@ -2073,7 +2073,7 @@ fn test_commands_write_detail() { mutation.set_op(Op::Put); mutation.set_value(v); prewrite_req.set_mutations(vec![mutation].into()); - prewrite_req.set_is_pessimistic_lock(vec![true]); + prewrite_req.set_pessimistic_actions(vec![DoPessimisticCheck]); prewrite_req.set_context(ctx.clone()); prewrite_req.set_primary_lock(k.clone()); prewrite_req.set_start_version(20); From 0030aeb90a840140a935fbb0181b6a62b5e680b3 Mon Sep 17 00:00:00 2001 From: BornChanger <97348524+BornChanger@users.noreply.github.com> Date: Thu, 25 Aug 2022 22:50:21 +0800 Subject: [PATCH 0172/1149] *: support read quota limit for analyze (#13302) ref tikv/tikv#13257, close tikv/tikv#13301 Signed-off-by: BornChanger Co-authored-by: Ti Chi Robot --- src/coprocessor/statistics/analyze.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index e11558e73b3..8f7b8c57dde 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -392,6 +392,7 @@ impl RowSampleBuilder { } let mut sample = self.quota_limiter.new_sample(!self.is_auto_analyze); + let mut read_size: usize = 0; { let _guard = sample.observe_cpu(); let result = self.data.next_batch(BATCH_MAX_SIZE); @@ -431,6 +432,7 @@ impl RowSampleBuilder { } else { collation_key_vals.push(Vec::new()); } + read_size += val.len(); column_vals.push(val); } collector.mut_base().count += 1; @@ -444,6 +446,7 @@ impl RowSampleBuilder { } } + sample.add_read_bytes(read_size); // Don't let analyze bandwidth limit the quota limiter, this is already limited // in rate limiter. let quota_delay = { From 7415946640f817245ccedbd95397991ec0650877 Mon Sep 17 00:00:00 2001 From: Lintian Shi Date: Mon, 29 Aug 2022 13:18:23 +0800 Subject: [PATCH 0173/1149] raftstore: fix checking for snapshot last index (#13088) close tikv/tikv#12618 using commit instead of last_index to check gap between existing raft logs and snapshot when recovering from applying state. Signed-off-by: LintianShi Signed-off-by: Lintian Shi Co-authored-by: LintianShi Co-authored-by: Ti Chi Robot Co-authored-by: Jay --- .../raftstore/src/store/peer_storage.rs | 21 ++-- tests/failpoints/cases/test_snap.rs | 104 ++++++++++++++++++ 2 files changed, 113 insertions(+), 12 deletions(-) diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 4a36f385648..cf70234c841 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -33,10 +33,7 @@ use tikv_util::{ box_err, box_try, debug, defer, error, info, time::Instant, warn, worker::Scheduler, }; -use super::{ - entry_storage::last_index, metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager, - SnapshotStatistics, -}; +use super::{metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager, SnapshotStatistics}; use crate::{ store::{ async_io::write::WriteTask, entry_storage::EntryStorage, fsm::GenSnapTask, @@ -147,14 +144,14 @@ pub fn recover_from_applying_state( let raft_state = box_try!(engines.raft.get_raft_state(region_id)).unwrap_or_default(); - // if we recv append log when applying snapshot, last_index in raft_local_state - // will larger than snapshot_index. since raft_local_state is written to - // raft engine, and raft write_batch is written after kv write_batch, - // raft_local_state may wrong if restart happen between the two write. so we - // copy raft_local_state to kv engine (snapshot_raft_state), and set - // snapshot_raft_state.last_index = snapshot_index. after restart, we need - // check last_index. - if last_index(&snapshot_raft_state) > last_index(&raft_state) { + // since raft_local_state is written to raft engine, and + // raft write_batch is written after kv write_batch. raft_local_state may wrong + // if restart happen between the two write. so we copy raft_local_state to + // kv engine (snapshot_raft_state), and set + // snapshot_raft_state.hard_state.commit = snapshot_index. after restart, we + // need check commit. + if snapshot_raft_state.get_hard_state().get_commit() > raft_state.get_hard_state().get_commit() + { // There is a gap between existing raft logs and snapshot. Clean them up. engines .raft diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 3507fc268d4..93acfffc258 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -793,3 +793,107 @@ fn test_snapshot_recover_from_raft_write_failure() { cluster.must_put(format!("k1{}", i).as_bytes(), b"v1"); } } + +/// Test whether applying snapshot is resumed properly when last_index before +/// applying snapshot is larger than the snapshot index and applying is aborted +/// between kv write and raft write. +#[test] +fn test_snapshot_recover_from_raft_write_failure_with_uncommitted_log() { + let mut cluster = new_server_cluster(0, 3); + configure_for_snapshot(&mut cluster); + // Avoid triggering snapshot at final step. + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(10); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + // We use three peers([1, 2, 3]) for this test. + cluster.run(); + + sleep_ms(500); + + // Guarantee peer 1 is leader. + cluster.must_transfer_leader(1, new_peer(1, 1)); + + cluster.must_put(b"k1", b"v1"); + for i in 1..4 { + must_get_equal(&cluster.get_engine(i), b"k1", b"v1"); + } + + // Guarantee that peer 2 and 3 won't receive any entries, + // so these entries cannot be committed. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 1) + .msg_type(MessageType::MsgAppend) + .direction(Direction::Send), + )); + + // Peer 1 appends entries which is never committed. + for i in 1..20 { + let region = cluster.get_region(b""); + let reqs = vec![new_put_cmd(format!("k2{}", i).as_bytes(), b"v2")]; + let mut put = new_request( + region.get_id(), + region.get_region_epoch().clone(), + reqs, + false, + ); + put.mut_header().set_peer(new_peer(1, 1)); + let _ = cluster.call_command_on_node(1, put, Duration::from_secs(1)); + } + + for i in 1..4 { + must_get_none(&cluster.get_engine(i), b"k210"); + } + // Now peer 1 should have much longer log than peer 2 and 3. + + // Hack: down peer 1 in order to change leader to peer 3. + cluster.stop_node(1); + sleep_ms(100); + cluster.clear_send_filters(); + sleep_ms(100); + cluster.must_transfer_leader(1, new_peer(3, 3)); + + for i in 0..20 { + cluster.must_put(format!("k3{}", i).as_bytes(), b"v3"); + } + + // Peer 1 back to cluster + cluster.add_send_filter(IsolationFilterFactory::new(1)); + sleep_ms(100); + cluster.run_node(1).unwrap(); + sleep_ms(100); + must_get_none(&cluster.get_engine(1), b"k319"); + must_get_equal(&cluster.get_engine(2), b"k319", b"v3"); + must_get_equal(&cluster.get_engine(3), b"k319", b"v3"); + + // Raft writes are dropped. + let raft_before_save_on_store_1_fp = "raft_before_save_on_store_1"; + fail::cfg(raft_before_save_on_store_1_fp, "return").unwrap(); + // Skip applying snapshot into RocksDB to keep peer status in Applying. + let apply_snapshot_fp = "apply_pending_snapshot"; + fail::cfg(apply_snapshot_fp, "return()").unwrap(); + cluster.clear_send_filters(); + // Wait for leader send snapshot. + sleep_ms(100); + + cluster.stop_node(1); + fail::remove(raft_before_save_on_store_1_fp); + fail::remove(apply_snapshot_fp); + // Recover from applying state and validate states, + // may fail in this step due to invalid states. + cluster.run_node(1).unwrap(); + // Snapshot is applied. + must_get_equal(&cluster.get_engine(1), b"k319", b"v3"); + let mut ents = Vec::new(); + cluster + .get_raft_engine(1) + .get_all_entries_to(1, &mut ents) + .unwrap(); + // Raft logs are cleared. + assert!(ents.is_empty()); + + // Final step: append some more entries to make sure raftdb is healthy. + for i in 20..25 { + cluster.must_put(format!("k1{}", i).as_bytes(), b"v1"); + } +} From 68f99ae034376f5629d80fa7712796a61dc5d50e Mon Sep 17 00:00:00 2001 From: Potato Date: Mon, 29 Aug 2022 19:28:23 +0800 Subject: [PATCH 0174/1149] storage: record and return asycn snapshot metric (#13358) ref pingcap/kvproto#974, ref pingcap/kvproto#978, ref tikv/tikv#12362 This commit records read_index_propose_wait_nanos, read_index_confirm_wait_nanos and read_pool_schedule_wait_nanos Signed-off-by: OneSizeFitQuorum --- Cargo.lock | 3 +- Cargo.toml | 2 +- components/backup-stream/src/event_loader.rs | 2 +- components/cdc/src/endpoint.rs | 2 +- components/cdc/src/initializer.rs | 2 +- components/raftstore-v2/Cargo.toml | 1 + .../src/router/response_channel.rs | 9 +- components/raftstore/src/store/fsm/apply.rs | 16 +-- components/raftstore/src/store/fsm/peer.rs | 31 +++-- components/raftstore/src/store/msg.rs | 51 +++++--- components/raftstore/src/store/peer.rs | 29 +++-- components/raftstore/src/store/worker/read.rs | 22 ++-- components/resolved_ts/src/scanner.rs | 2 +- components/test_raftstore/src/util.rs | 22 +++- components/tracker/src/lib.rs | 6 + src/coprocessor/endpoint.rs | 13 +- src/coprocessor/tracker.rs | 18 ++- src/import/sst_service.rs | 2 +- src/server/raftkv.rs | 2 +- src/server/service/debug.rs | 4 +- src/server/service/kv.rs | 2 +- src/storage/mod.rs | 8 ++ tests/benches/misc/raftkv/mod.rs | 4 +- tests/failpoints/cases/mod.rs | 1 + .../cases/test_cmd_epoch_checker.rs | 4 +- .../cases/test_read_execution_tracker.rs | 121 ++++++++++++++++++ tests/failpoints/cases/test_stale_read.rs | 2 +- .../integrations/raftstore/test_lease_read.rs | 2 +- tests/integrations/server/kv_service.rs | 2 +- 29 files changed, 293 insertions(+), 92 deletions(-) create mode 100644 tests/failpoints/cases/test_read_execution_tracker.rs diff --git a/Cargo.lock b/Cargo.lock index 52ad7912203..a5c71cef10d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2627,7 +2627,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#a0f02b6efcee6112bdc313988bf6c0ae3f83c07d" +source = "git+https://github.com/pingcap/kvproto.git#9cc5e1ddfda3aec6eddfc09de1d0072ebbd7bb21" dependencies = [ "futures 0.3.15", "grpcio", @@ -4213,6 +4213,7 @@ dependencies = [ "test_util", "tikv_util", "time", + "tracker", "txn_types", ] diff --git a/Cargo.toml b/Cargo.toml index 9bbea00262c..e1dad6c5fa3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -207,7 +207,7 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # kvproto at the same time. # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. # [patch.'https://github.com/pingcap/kvproto'] -# kvproto = {git = "https://github.com/your_github_id/kvproto", branch="your_branch"} +# kvproto = { git = "https://github.com/your_github_id/kvproto", branch="your_branch" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 5aade374249..0f83d4726e4 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -295,7 +295,7 @@ where SignificantMsg::CaptureChange { cmd, region_epoch: region.get_region_epoch().clone(), - callback: Callback::Read(Box::new(|snapshot| { + callback: Callback::read(Box::new(|snapshot| { if snapshot.response.get_header().has_error() { callback(Err(Error::RaftRequest( snapshot.response.get_header().get_error().clone(), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index d9938006ca1..2e0253b23a9 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1242,7 +1242,7 @@ impl, E: KvEngine> Endpoint { let (tx, rx) = tokio::sync::oneshot::channel(); if let Err(e) = raft_router_clone.significant_send( region_id, - SignificantMsg::LeaderCallback(Callback::Read(Box::new(move |resp| { + SignificantMsg::LeaderCallback(Callback::read(Box::new(move |resp| { let resp = if resp.response.get_header().has_error() { None } else { diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index f6a2ce2885c..418e0c23a0a 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -144,7 +144,7 @@ impl Initializer { SignificantMsg::CaptureChange { cmd: change_cmd, region_epoch, - callback: Callback::Read(Box::new(move |resp| { + callback: Callback::read(Box::new(move |resp| { if let Err(e) = sched.schedule(Task::InitDownstream { region_id, downstream_id, diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index f526aeda9c4..c7d920e4011 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -48,6 +48,7 @@ slog = "2.3" smallvec = "1.4" tikv_util = { path = "../tikv_util", default-features = false } time = "0.1" +tracker = { path = "../tracker" } txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index ae43bd07c25..e87095215b8 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -33,6 +33,7 @@ use raftstore::store::{ }; use smallvec::SmallVec; use tikv_util::memory::HeapSize; +use tracker::TrackerToken; /// A struct allows to watch and notify specific events. /// @@ -282,11 +283,11 @@ impl WriteCallback for CmdResChannel { self.core.notify_event(Self::COMMITTED_EVENT); } - fn trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { + fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { None } - fn trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { + fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { None } @@ -393,6 +394,10 @@ impl ReadCallback for QueryResChannel { } mem::forget(self); } + + fn read_tracker(&self) -> Option<&TrackerToken> { + None + } } impl Drop for QueryResChannel { diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index d44cca3668b..6d1d1881046 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -529,7 +529,7 @@ where .applied_batch .cb_batch .iter() - .flat_map(|(cb, _)| cb.trackers()) + .flat_map(|(cb, _)| cb.write_trackers()) .flat_map(|trackers| trackers.iter().map(|t| t.as_tracker_token())) .flatten() .collect(); @@ -568,7 +568,7 @@ where // Invoke callbacks let now = std::time::Instant::now(); for (cb, resp) in cb_batch.drain(..) { - for tracker in cb.trackers().iter().flat_map(|v| *v) { + for tracker in cb.write_trackers().iter().flat_map(|v| *v) { tracker.observe(now, &self.apply_time, |t| &mut t.metrics.apply_time_nanos); } cb.invoke_with_response(resp); @@ -3000,7 +3000,7 @@ impl Apply { pub fn on_schedule(&mut self, metrics: &RaftMetrics) { let now = std::time::Instant::now(); for cb in &mut self.cbs { - if let Some(trackers) = cb.cb.trackers_mut() { + if let Some(trackers) = cb.cb.write_trackers_mut() { for tracker in trackers { tracker.observe(now, &metrics.store_time, |t| { t.metrics.write_instant = Some(now); @@ -3770,7 +3770,7 @@ where for tracker in apply .cbs .iter() - .flat_map(|p| p.cb.trackers()) + .flat_map(|p| p.cb.write_trackers()) .flat_map(|ts| ts.iter().flat_map(|t| t.as_tracker_token())) { GLOBAL_TRACKERS.with_tracker(tracker, |t| { @@ -5985,7 +5985,7 @@ mod tests { Msg::Change { region_epoch: region_epoch.clone(), cmd: ChangeObserver::from_cdc(1, observe_handle.clone()), - cb: Callback::Read(Box::new(|resp: ReadResponse| { + cb: Callback::read(Box::new(|resp: ReadResponse| { assert!(!resp.response.get_header().has_error()); assert!(resp.snapshot.is_some()); let snap = resp.snapshot.unwrap(); @@ -6054,7 +6054,7 @@ mod tests { Msg::Change { region_epoch, cmd: ChangeObserver::from_cdc(2, observe_handle), - cb: Callback::Read(Box::new(|resp: ReadResponse<_>| { + cb: Callback::read(Box::new(|resp: ReadResponse<_>| { assert!( resp.response .get_header() @@ -6226,7 +6226,7 @@ mod tests { Msg::Change { region_epoch: region_epoch.clone(), cmd: ChangeObserver::from_cdc(1, observe_handle.clone()), - cb: Callback::Read(Box::new(|resp: ReadResponse<_>| { + cb: Callback::read(Box::new(|resp: ReadResponse<_>| { assert!(!resp.response.get_header().has_error(), "{:?}", resp); assert!(resp.snapshot.is_some()); })), @@ -6381,7 +6381,7 @@ mod tests { Msg::Change { region_epoch, cmd: ChangeObserver::from_cdc(1, observe_handle), - cb: Callback::Read(Box::new(move |resp: ReadResponse<_>| { + cb: Callback::read(Box::new(move |resp: ReadResponse<_>| { assert!( resp.response.get_header().get_error().has_epoch_not_match(), "{:?}", diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 9b354fb0842..5497d2ad1d9 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -52,11 +52,12 @@ use tikv_util::{ box_err, debug, defer, error, escape, info, is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, sys::{disk::DiskUsage, memory_usage_reaches_high_water}, - time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, + time::{monotonic_raw_now, Instant as TiInstant}, trace, warn, worker::{ScheduleError, Scheduler}, Either, }; +use tracker::GLOBAL_TRACKERS; use txn_types::WriteBatchFlags; use self::memtrace::*; @@ -92,7 +93,7 @@ use crate::{ RegionTask, SplitCheckTask, }, AbstractPeer, CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, - PeerTick, ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, + PeerTick, ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, SignificantMsg, SnapKey, StoreMsg, WriteCallback, }, Error, Result, @@ -517,7 +518,7 @@ where let tokens: SmallVec<[TimeTracker; 4]> = cbs .iter_mut() - .filter_map(|cb| cb.trackers().map(|t| t[0])) + .filter_map(|cb| cb.write_trackers().map(|t| t[0])) .collect(); let mut cb = Callback::write_ext( @@ -532,7 +533,7 @@ where committed_cb, ); - if let Some(trackers) = cb.trackers_mut() { + if let Some(trackers) = cb.write_trackers_mut() { *trackers = tokens; } @@ -608,10 +609,17 @@ where } } PeerMsg::RaftCommand(cmd) => { + let propose_time = cmd.send_time.saturating_elapsed(); self.ctx .raft_metrics .propose_wait_time - .observe(duration_to_sec(cmd.send_time.saturating_elapsed()) as f64); + .observe(propose_time.as_secs_f64()); + cmd.callback.read_tracker().map(|tracker| { + GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + t.metrics.read_index_propose_wait_nanos = + propose_time.as_nanos() as u64; + }) + }); if let Some(Err(e)) = cmd.extra_opts.deadline.map(|deadline| deadline.check()) { cmd.callback.invoke_with_response(new_error(e.into())); @@ -625,8 +633,8 @@ where // so that normal writes can be rejected when proposing if the // store's disk is full. && ((self.ctx.self_disk_usage == DiskUsage::Normal - && !self.fsm.peer.disk_full_peers.majority()) - || cmd.extra_opts.disk_full_opt == DiskFullOpt::NotAllowedOnFull) + && !self.fsm.peer.disk_full_peers.majority()) + || cmd.extra_opts.disk_full_opt == DiskFullOpt::NotAllowedOnFull) { self.fsm.batch_req_builder.add(cmd, req_size); if self.fsm.batch_req_builder.should_finish(&self.ctx.cfg) { @@ -1001,8 +1009,7 @@ where || util::is_epoch_stale( region.get_region_epoch(), self.fsm.peer.region().get_region_epoch(), - ) - { + ) { // Stale message return; } @@ -1205,7 +1212,7 @@ where let apply_router = self.ctx.apply_router.clone(); self.propose_raft_command_internal( msg, - Callback::Read(Box::new(move |resp| { + Callback::read(Box::new(move |resp| { // Return the error if resp.response.get_header().has_error() { cb.invoke_read(resp); @@ -2200,7 +2207,7 @@ where cmd.mut_header().set_read_quorum(true); self.propose_raft_command_internal( cmd, - Callback::Read(Box::new(|_| ())), + Callback::read(Box::new(|_| ())), DiskFullOpt::AllowedOnAlmostFull, ); } @@ -4831,7 +4838,7 @@ where if self.ctx.raft_metrics.waterfall_metrics { let now = Instant::now(); - for tracker in cb.trackers().iter().flat_map(|v| *v) { + for tracker in cb.write_trackers().iter().flat_map(|v| *v) { tracker.observe(now, &self.ctx.raft_metrics.wf_batch_wait, |t| { &mut t.metrics.wf_batch_wait_nanos }); diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 619a18e3fb5..5b3221e8c19 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -22,7 +22,7 @@ use pd_client::BucketMeta; use raft::SnapshotStatus; use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; -use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; +use tracker::{get_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; use super::{local_metrics::TimeTracker, worker::FetchedLogs, AbstractPeer, RegionSnapshot}; use crate::store::{ @@ -89,7 +89,11 @@ pub enum Callback { /// No callback. None, /// Read callback. - Read(BoxReadCallback), + Read { + cb: BoxReadCallback, + + tracker: TrackerToken, + }, /// Write callback. Write { cb: BoxWriteCallback, @@ -116,6 +120,11 @@ impl Callback where S: Snapshot, { + pub fn read(cb: BoxReadCallback) -> Self { + let tracker = get_tls_tracker_token(); + Callback::Read { cb, tracker } + } + pub fn write(cb: BoxWriteCallback) -> Self { Self::write_ext(cb, None, None) } @@ -147,13 +156,13 @@ where pub fn invoke_with_response(self, resp: RaftCmdResponse) { match self { Callback::None => (), - Callback::Read(read) => { + Callback::Read { cb, .. } => { let resp = ReadResponse { response: resp, snapshot: None, txn_extra_op: TxnExtraOp::Noop, }; - read(resp); + cb(resp); } Callback::Write { cb, .. } => { let resp = WriteResponse { response: resp }; @@ -165,19 +174,19 @@ where } pub fn has_proposed_cb(&self) -> bool { - let Callback::Write { proposed_cb, .. } = self else { return false }; + let Callback::Write { proposed_cb, .. } = self else { return false; }; proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - let Callback::Write { proposed_cb, .. } = self else { return }; + let Callback::Write { proposed_cb, .. } = self else { return; }; if let Some(cb) = proposed_cb.take() { cb(); } } pub fn invoke_committed(&mut self) { - let Callback::Write { committed_cb, .. } = self else { return }; + let Callback::Write { committed_cb, .. } = self else { return; }; if let Some(cb) = committed_cb.take() { cb(); } @@ -185,18 +194,18 @@ where pub fn invoke_read(self, args: ReadResponse) { match self { - Callback::Read(read) => read(args), - other => panic!("expect Callback::Read(..), got {:?}", other), + Callback::Read { cb, .. } => cb(args), + other => panic!("expect Callback::read(..), got {:?}", other), } } pub fn take_proposed_cb(&mut self) -> Option { - let Callback::Write { proposed_cb, .. } = self else { return None }; + let Callback::Write { proposed_cb, .. } = self else { return None; }; proposed_cb.take() } pub fn take_committed_cb(&mut self) -> Option { - let Callback::Write { committed_cb, .. } = self else { return None }; + let Callback::Write { committed_cb, .. } = self else { return None; }; committed_cb.take() } } @@ -205,6 +214,7 @@ pub trait ReadCallback: ErrorCallback { type Response; fn set_result(self, result: Self::Response); + fn read_tracker(&self) -> Option<&TrackerToken>; } pub trait WriteCallback: ErrorCallback { @@ -212,8 +222,8 @@ pub trait WriteCallback: ErrorCallback { fn notify_proposed(&mut self); fn notify_committed(&mut self); - fn trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>>; - fn trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>>; + fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>>; + fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>>; fn set_result(self, result: Self::Response); } @@ -229,6 +239,11 @@ impl ReadCallback for Callback { fn set_result(self, result: Self::Response) { self.invoke_read(result); } + + fn read_tracker(&self) -> Option<&TrackerToken> { + let Callback::Read { tracker, .. } = self else { return None; }; + Some(tracker) + } } impl WriteCallback for Callback { @@ -245,14 +260,14 @@ impl WriteCallback for Callback { } #[inline] - fn trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { - let Callback::Write { trackers, .. } = self else { return None }; + fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { + let Callback::Write { trackers, .. } = self else { return None; }; Some(trackers) } #[inline] - fn trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { - let Callback::Write { trackers, .. } = self else { return None }; + fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { + let Callback::Write { trackers, .. } = self else { return None; }; Some(trackers) } @@ -281,7 +296,7 @@ where fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Callback::None => write!(fmt, "Callback::None"), - Callback::Read(_) => write!(fmt, "Callback::Read(..)"), + Callback::Read { .. } => write!(fmt, "Callback::Read(..)"), Callback::Write { .. } => write!(fmt, "Callback::Write(..)"), #[cfg(any(test, feature = "testexport"))] Callback::Test { .. } => write!(fmt, "Callback::Test(..)"), diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 89ed6eeef7d..6b3ec4c3456 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -65,6 +65,7 @@ use tikv_util::{ Either, }; use time::Timespec; +use tracker::GLOBAL_TRACKERS; use txn_types::WriteBatchFlags; use uuid::Uuid; @@ -100,14 +101,15 @@ use crate::{ HeartbeatTask, RaftlogFetchTask, RaftlogGcTask, ReadDelegate, ReadExecutor, ReadProgress, RegionTask, SplitCheckTask, }, - Callback, Config, GlobalReplicationState, PdTask, ReadIndexContext, ReadResponse, TxnExt, - WriteCallback, RAFT_INIT_LOG_INDEX, + Callback, Config, GlobalReplicationState, PdTask, ReadCallback, ReadIndexContext, + ReadResponse, TxnExt, WriteCallback, RAFT_INIT_LOG_INDEX, }, Error, Result, }; const SHRINK_CACHE_CAPACITY: usize = 64; -const MIN_BCAST_WAKE_UP_INTERVAL: u64 = 1_000; // 1s +const MIN_BCAST_WAKE_UP_INTERVAL: u64 = 1_000; +// 1s const REGION_READ_PROGRESS_CAP: usize = 128; #[doc(hidden)] pub const MAX_COMMITTED_SIZE_PER_READY: u64 = 16 * 1024 * 1024; @@ -143,7 +145,7 @@ impl ProposalQueue { .and_then(|i| { self.queue[i] .cb - .trackers() + .write_trackers() .map(|ts| (self.queue[i].term, ts)) }) } @@ -1652,7 +1654,7 @@ where { let proposal = &self.proposals.queue[idx]; if term == proposal.term { - for tracker in proposal.cb.trackers().iter().flat_map(|v| v.iter()) { + for tracker in proposal.cb.write_trackers().iter().flat_map(|v| v.iter()) { tracker.observe(std_now, &ctx.raft_metrics.wf_send_proposal, |t| { &mut t.metrics.wf_send_proposal_nanos }); @@ -2504,7 +2506,7 @@ where // Update it after unstable entries pagination is introduced. debug_assert!(ready.entries().last().map_or_else( || true, - |entry| entry.index == self.raft_group.raft.raft_log.last_index() + |entry| entry.index == self.raft_group.raft.raft_log.last_index(), )); if self.memtrace_raft_entries != 0 { MEMTRACE_RAFT_ENTRIES.trace(TraceEvent::Sub(self.memtrace_raft_entries)); @@ -3071,7 +3073,14 @@ where "peer_id" => self.peer.get_id(), ); RAFT_READ_INDEX_PENDING_COUNT.sub(read.cmds().len() as i64); + let time = monotonic_raw_now(); for (req, cb, mut read_index) in read.take_cmds().drain(..) { + cb.read_tracker().map(|tracker| { + GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + t.metrics.read_index_confirm_wait_nanos = + (time - read.propose_time).to_std().unwrap().as_nanos() as u64; + }) + }); // leader reports key is locked if let Some(locked) = read.locked.take() { let mut response = raft_cmdpb::Response::default(); @@ -3588,9 +3597,9 @@ where if peer.get_id() == self.peer_id() && (change_type == ConfChangeType::RemoveNode - // In Joint confchange, the leader is allowed to be DemotingVoter - || (kind == ConfChangeKind::Simple - && change_type == ConfChangeType::AddLearnerNode)) + // In Joint confchange, the leader is allowed to be DemotingVoter + || (kind == ConfChangeKind::Simple + && change_type == ConfChangeType::AddLearnerNode)) && !ctx.cfg.allow_remove_leader() { return Err(box_err!( @@ -5415,6 +5424,8 @@ pub trait RequestInspector { return Ok(RequestPolicy::ProposeNormal); } + fail_point!("perform_read_index", |_| Ok(RequestPolicy::ReadIndex)); + let flags = WriteBatchFlags::from_bits_check(req.get_header().get_flags()); if flags.contains(WriteBatchFlags::STALE_READ) { return Ok(RequestPolicy::StaleRead); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 3c5c05f4717..a3c3878cf68 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -1016,7 +1016,7 @@ mod tests { reader.propose_raft_command( None, cmd.clone(), - Callback::Read(Box::new(|resp| { + Callback::read(Box::new(|resp| { panic!("unexpected invoke, {:?}", resp); })), ); @@ -1135,7 +1135,7 @@ mod tests { meta.readers.get_mut(&1).unwrap().update(pg); } let task = - RaftCommand::::new(cmd.clone(), Callback::Read(Box::new(move |_| {}))); + RaftCommand::::new(cmd.clone(), Callback::read(Box::new(move |_| {}))); must_not_redirect(&mut reader, &rx, task); assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), @@ -1145,7 +1145,7 @@ mod tests { // Let's read. let task = RaftCommand::::new( cmd.clone(), - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { let snap = resp.snapshot.unwrap(); assert_eq!(snap.get_region(), ®ion1); })), @@ -1172,7 +1172,7 @@ mod tests { reader.propose_raft_command( None, cmd_store_id, - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { let err = resp.response.get_header().get_error(); assert!(err.has_store_not_match()); assert!(resp.snapshot.is_none()); @@ -1196,7 +1196,7 @@ mod tests { reader.propose_raft_command( None, cmd_peer_id, - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { assert!( resp.response.get_header().has_error(), "{:?}", @@ -1221,7 +1221,7 @@ mod tests { reader.propose_raft_command( None, cmd_term, - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { let err = resp.response.get_header().get_error(); assert!(err.has_stale_command(), "{:?}", resp); assert!(resp.snapshot.is_none()); @@ -1259,7 +1259,7 @@ mod tests { reader.propose_raft_command( None, cmd.clone(), - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { let err = resp.response.get_header().get_error(); assert!(err.has_server_is_busy(), "{:?}", resp); assert!(resp.snapshot.is_none()); @@ -1291,7 +1291,7 @@ mod tests { reader.propose_raft_command( None, cmd9.clone(), - Callback::Read(Box::new(|resp| { + Callback::read(Box::new(|resp| { panic!("unexpected invoke, {:?}", resp); })), ); @@ -1320,7 +1320,7 @@ mod tests { meta.readers.get_mut(&1).unwrap().update(pg); } let task = - RaftCommand::::new(cmd.clone(), Callback::Read(Box::new(move |_| {}))); + RaftCommand::::new(cmd.clone(), Callback::read(Box::new(move |_| {}))); must_not_redirect(&mut reader, &rx, task); assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), @@ -1345,7 +1345,7 @@ mod tests { cmd.mut_header().set_flag_data(data.into()); let task = RaftCommand::::new( cmd.clone(), - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { let err = resp.response.get_header().get_error(); assert!(err.has_data_is_not_ready()); assert!(resp.snapshot.is_none()); @@ -1359,7 +1359,7 @@ mod tests { read_progress.update_safe_ts(1, 2); assert_eq!(read_progress.safe_ts(), 2); - let task = RaftCommand::::new(cmd, Callback::Read(Box::new(move |_| {}))); + let task = RaftCommand::::new(cmd, Callback::read(Box::new(move |_| {}))); must_not_redirect(&mut reader, &rx, task); assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.safe_ts.get()), diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 4266103933f..7877de718ba 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -206,7 +206,7 @@ impl, E: KvEngine> ScannerPool { SignificantMsg::CaptureChange { cmd: change_cmd, region_epoch: task.region.get_region_epoch().clone(), - callback: Callback::Read(Box::new(cb)), + callback: Callback::read(Box::new(cb)), }, )?; let mut resp = box_try!(fut.await); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 8cac947dc57..9b653ac2096 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -40,6 +40,7 @@ use kvproto::{ tikvpb::TikvClient, }; use pd_client::PdClient; +use protobuf::RepeatedField; use raft::eraftpb::ConfChangeType; pub use raftstore::store::util::{find_peer, new_learner_peer, new_peer}; use raftstore::{ @@ -420,7 +421,7 @@ pub fn make_cb(cmd: &RaftCmdRequest) -> (Callback, mpsc::Receiver let (tx, rx) = mpsc::channel(); let mut detector = CallbackLeakDetector::default(); let cb = if is_read { - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { detector.called = true; // we don't care error actually. let _ = tx.send(resp.response); @@ -485,7 +486,7 @@ pub fn async_read_on_peer( request.mut_header().set_peer(peer); request.mut_header().set_replica_read(replica_read); let (tx, rx) = mpsc::sync_channel(1); - let cb = Callback::Read(Box::new(move |resp| drop(tx.send(resp.response)))); + let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); cluster.sim.wl().async_read(node_id, None, request, cb); rx } @@ -508,7 +509,7 @@ pub fn batch_read_on_peer( ); request.mut_header().set_peer(peer.clone()); let t = tx.clone(); - let cb = Callback::Read(Box::new(move |resp| { + let cb = Callback::read(Box::new(move |resp| { t.send((len, resp)).unwrap(); })); cluster @@ -562,7 +563,7 @@ pub fn async_read_index_on_peer( ); request.mut_header().set_peer(peer); let (tx, rx) = mpsc::sync_channel(1); - let cb = Callback::Read(Box::new(move |resp| drop(tx.send(resp.response)))); + let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); cluster.sim.wl().async_read(node_id, None, request, cb); rx } @@ -881,6 +882,19 @@ pub fn kv_read(client: &TikvClient, ctx: Context, key: Vec, ts: u64) -> GetR client.kv_get(&get_req).unwrap() } +pub fn kv_batch_read( + client: &TikvClient, + ctx: Context, + keys: Vec>, + ts: u64, +) -> BatchGetResponse { + let mut batch_get_req = BatchGetRequest::default(); + batch_get_req.set_context(ctx); + batch_get_req.set_keys(RepeatedField::from(keys)); + batch_get_req.set_version(ts); + client.kv_batch_get(&batch_get_req).unwrap() +} + pub fn must_kv_prewrite_with( client: &TikvClient, ctx: Context, diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index be099beadde..664dc1e6767 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -37,6 +37,9 @@ impl Tracker { detail_v2.set_rocksdb_key_skipped_count(self.metrics.internal_key_skipped_count); detail_v2.set_rocksdb_delete_skipped_count(self.metrics.deleted_key_skipped_count); detail_v2.set_get_snapshot_nanos(self.metrics.get_snapshot_nanos); + detail_v2.set_read_index_propose_wait_nanos(self.metrics.read_index_propose_wait_nanos); + detail_v2.set_read_index_confirm_wait_nanos(self.metrics.read_index_confirm_wait_nanos); + detail_v2.set_read_pool_schedule_wait_nanos(self.metrics.read_pool_schedule_wait_nanos); } pub fn write_write_detail(&self, detail: &mut pb::WriteDetail) { @@ -118,6 +121,9 @@ pub enum RequestType { #[derive(Debug, Default, Clone)] pub struct RequestMetrics { pub get_snapshot_nanos: u64, + pub read_index_propose_wait_nanos: u64, + pub read_index_confirm_wait_nanos: u64, + pub read_pool_schedule_wait_nanos: u64, pub block_cache_hit_count: u64, pub block_read_count: u64, pub block_read_byte: u64, diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 677490a4b31..5bd05bd29cd 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -501,9 +501,16 @@ impl Endpoint { async move { let res = match result_of_future { Err(e) => make_error_response(e).into(), - Ok(handle_fut) => handle_fut - .await - .unwrap_or_else(|e| make_error_response(e).into()), + Ok(handle_fut) => { + let mut response = handle_fut + .await + .unwrap_or_else(|e| make_error_response(e).into()); + let scan_detail_v2 = response.mut_exec_details_v2().mut_scan_detail_v2(); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(scan_detail_v2); + }); + response + } }; GLOBAL_TRACKERS.remove(tracker); res diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index f9b908979b8..0547d2088f0 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -82,7 +82,7 @@ impl Tracker { /// factory context, because the future pool might be full and we need /// to wait it. This kind of wait time has to be recorded. pub fn new(req_ctx: ReqContext, slow_log_threshold: Duration) -> Self { - let now = Instant::now_coarse(); + let now = Instant::now(); Tracker { request_begin_at: now, current_stage: TrackerState::Initialized, @@ -106,14 +106,18 @@ impl Tracker { pub fn on_scheduled(&mut self) { assert_eq!(self.current_stage, TrackerState::Initialized); - let now = Instant::now_coarse(); + let now = Instant::now(); self.schedule_wait_time = now - self.request_begin_at; + with_tls_tracker(|tracker| { + tracker.metrics.read_pool_schedule_wait_nanos = + self.schedule_wait_time.as_nanos() as u64; + }); self.current_stage = TrackerState::Scheduled(now); } pub fn on_snapshot_finished(&mut self) { if let TrackerState::Scheduled(at) = self.current_stage { - let now = Instant::now_coarse(); + let now = Instant::now(); self.snapshot_wait_time = now - at; self.wait_time = now - self.request_begin_at; self.current_stage = TrackerState::SnapshotRetrieved(now); @@ -124,7 +128,7 @@ impl Tracker { pub fn on_begin_all_items(&mut self) { if let TrackerState::SnapshotRetrieved(at) = self.current_stage { - let now = Instant::now_coarse(); + let now = Instant::now(); self.handler_build_time = now - at; self.current_stage = TrackerState::AllItemsBegan; } else { @@ -133,7 +137,7 @@ impl Tracker { } pub fn on_begin_item(&mut self) { - let now = Instant::now_coarse(); + let now = Instant::now(); match self.current_stage { TrackerState::AllItemsBegan => {} TrackerState::ItemFinished(at) => { @@ -149,7 +153,7 @@ impl Tracker { pub fn on_finish_item(&mut self, some_storage_stats: Option) { if let TrackerState::ItemBegan(at) = self.current_stage { - let now = Instant::now_coarse(); + let now = Instant::now(); self.item_process_time = now - at; self.total_process_time += self.item_process_time; if let Some(storage_stats) = some_storage_stats { @@ -227,7 +231,7 @@ impl Tracker { _ => unreachable!(), } - self.req_lifetime = Instant::now_coarse() - self.request_begin_at; + self.req_lifetime = Instant::now() - self.request_begin_at; self.current_stage = TrackerState::AllItemFinished; self.track(); } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index fea333903a6..fff9c79cec2 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -127,7 +127,7 @@ where cmd.set_header(header); cmd.set_requests(vec![req].into()); let (cb, future) = paired_future_callback(); - if let Err(e) = router.send_command(cmd, Callback::Read(cb), RaftCmdExtraOpts::default()) { + if let Err(e) = router.send_command(cmd, Callback::read(cb), RaftCmdExtraOpts::default()) { return Err(e.into()); } let mut res = future.await.map_err(|_| { diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index a314315985c..0a3f2fdd742 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -216,7 +216,7 @@ where .read( ctx.read_id, cmd, - StoreCallback::Read(Box::new(move |resp| { + StoreCallback::read(Box::new(move |resp| { cb(on_read_result(resp).map_err(Error::into)); })), ) diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index e66bb3ec40c..30cc8342959 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -552,7 +552,7 @@ fn region_detail>( raft_cmd.set_status_request(status_request); let (tx, rx) = oneshot::channel(); - let cb = Callback::Read(Box::new(|resp| tx.send(resp).unwrap())); + let cb = Callback::read(Box::new(|resp| tx.send(resp).unwrap())); async move { raft_router @@ -592,7 +592,7 @@ fn consistency_check>( raft_cmd.set_admin_request(admin_request); let (tx, rx) = oneshot::channel(); - let cb = Callback::Read(Box::new(|resp| tx.send(resp).unwrap())); + let cb = Callback::read(Box::new(|resp| tx.send(resp).unwrap())); async move { raft_router diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index fa743911b40..79fbd9c6624 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -951,7 +951,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor // so just send it as an command. if let Err(e) = self .ch - .send_command(cmd, Callback::Read(cb), RaftCmdExtraOpts::default()) + .send_command(cmd, Callback::read(cb), RaftCmdExtraOpts::default()) { // Retrun region error instead a gRPC error. let mut resp = ReadIndexResponse::default(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 3024a05381f..8dbb8a69361 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -696,6 +696,10 @@ impl Storage { wait_wall_time_ms: duration_to_ms(wait_wall_time), process_wall_time_ms: duration_to_ms(process_wall_time), }; + with_tls_tracker(|tracker| { + tracker.metrics.read_pool_schedule_wait_nanos = + schedule_wait_time.as_nanos() as u64; + }); Ok(( result?, KvGetStatistics { @@ -1041,6 +1045,10 @@ impl Storage { stage_snap_recv_ts.saturating_duration_since(stage_begin_ts); let process_wall_time = stage_finished_ts.saturating_duration_since(stage_snap_recv_ts); + with_tls_tracker(|tracker| { + tracker.metrics.read_pool_schedule_wait_nanos = + schedule_wait_time.as_nanos() as u64; + }); let latency_stats = StageLatencyStats { schedule_wait_time_ms: duration_to_ms(schedule_wait_time), snapshot_wait_time_ms: duration_to_ms(snapshot_wait_time), diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index 1143600920f..223b692d579 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -51,7 +51,7 @@ impl SyncBenchRouter { let mut response = RaftCmdResponse::default(); cmd_resp::bind_term(&mut response, 1); match cmd.callback { - Callback::Read(cb) => { + Callback::Read { cb, .. } => { let snapshot = self.db.snapshot(); let region = Arc::new(self.region.to_owned()); cb(ReadResponse { @@ -161,7 +161,7 @@ fn bench_async_snapshots_noop(b: &mut test::Bencher) { } }); let cb: Callback = - Callback::Read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { let res = CmdRes::Snap(resp.snapshot.unwrap()); cb2(Ok(res)); })); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 1c38571e280..1ef0471152f 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -21,6 +21,7 @@ mod test_metrics_overflow; mod test_pd_client; mod test_pending_peers; mod test_rawkv; +mod test_read_execution_tracker; mod test_replica_read; mod test_replica_stale_read; mod test_server; diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 9de8911754b..d96c467d487 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -101,7 +101,7 @@ fn test_reject_proposal_during_region_split() { // Try to split region. let (split_tx, split_rx) = mpsc::channel(); - let cb = Callback::Read(Box::new(move |resp: ReadResponse| { + let cb = Callback::read(Box::new(move |resp: ReadResponse| { split_tx.send(resp.response).unwrap() })); let r = cluster.get_region(b""); @@ -179,7 +179,7 @@ fn test_reject_proposal_during_region_merge() { fail::cfg(prepare_merge_fp, "pause").unwrap(); // Try to merge region. let (merge_tx, merge_rx) = mpsc::channel(); - let cb = Callback::Read(Box::new(move |resp: ReadResponse| { + let cb = Callback::read(Box::new(move |resp: ReadResponse| { merge_tx.send(resp.response).unwrap() })); let source = cluster.get_region(b""); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs new file mode 100644 index 00000000000..4357d65af5f --- /dev/null +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -0,0 +1,121 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use kvproto::kvrpcpb::*; +use test_coprocessor::{init_with_data, DagSelect, ProductTable}; +use test_raftstore::{ + kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, +}; + +#[test] +fn test_read_execution_tracking() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); + let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); + + // write entries + let mut mutation1 = Mutation::default(); + mutation1.set_op(Op::Put); + mutation1.set_key(k1.clone()); + mutation1.set_value(v1); + + let mut mutation2 = Mutation::default(); + mutation2.set_op(Op::Put); + mutation2.set_key(k2.clone()); + mutation2.set_value(v2); + + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation1, mutation2], + k1.clone(), + 10, + ); + must_kv_commit( + &client, + ctx.clone(), + vec![k1.clone(), k2.clone()], + 10, + 30, + 30, + ); + + let lease_read_checker = |scan_detail: &ScanDetailV2| { + assert!( + scan_detail.get_read_index_propose_wait_nanos() == 0, + "resp lease read propose wait time={:?}", + scan_detail.get_read_index_propose_wait_nanos() + ); + + assert!( + scan_detail.get_read_index_confirm_wait_nanos() == 0, + "resp lease read confirm wait time={:?}", + scan_detail.get_read_index_confirm_wait_nanos() + ); + + assert!( + scan_detail.get_read_pool_schedule_wait_nanos() > 0, + "resp read pool scheduling wait time={:?}", + scan_detail.get_read_pool_schedule_wait_nanos() + ); + }; + + // should perform lease read + let resp = kv_read(&client, ctx.clone(), k1.clone(), 100); + + lease_read_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + + // should perform lease read + let resp = kv_batch_read(&client, ctx.clone(), vec![k1.clone(), k2.clone()], 100); + + lease_read_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + + let product = ProductTable::new(); + init_with_data(&product, &[(1, Some("name:0"), 2)]); + let mut coprocessor_request = DagSelect::from(&product).build(); + coprocessor_request.set_context(ctx.clone()); + coprocessor_request.set_start_ts(100); + + // should perform lease read + let resp = client.coprocessor(&coprocessor_request).unwrap(); + + lease_read_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + + let read_index_checker = |scan_detail: &ScanDetailV2| { + assert!( + scan_detail.get_read_index_propose_wait_nanos() > 0, + "resp lease read propose wait time={:?}", + scan_detail.get_read_index_propose_wait_nanos() + ); + + assert!( + scan_detail.get_read_index_confirm_wait_nanos() > 0, + "resp lease read confirm wait time={:?}", + scan_detail.get_read_index_confirm_wait_nanos() + ); + + assert!( + scan_detail.get_read_pool_schedule_wait_nanos() > 0, + "resp read pool scheduling wait time={:?}", + scan_detail.get_read_pool_schedule_wait_nanos() + ); + }; + + fail::cfg("perform_read_index", "return()").unwrap(); + + // should perform read index + let resp = kv_read(&client, ctx.clone(), k1.clone(), 100); + + read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + + // should perform read index + let resp = kv_batch_read(&client, ctx, vec![k1, k2], 100); + + read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + + // should perform read index + let resp = client.coprocessor(&coprocessor_request).unwrap(); + + read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + + fail::remove("perform_read_index"); +} diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index 9a88a73508c..18ddb865fd9 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -362,7 +362,7 @@ fn test_read_index_when_transfer_leader_2() { sim.async_command_on_node( old_leader.get_id(), read_request, - Callback::Read(Box::new(move |resp| tx.send(resp.response).unwrap())), + Callback::read(Box::new(move |resp| tx.send(resp.response).unwrap())), ) .unwrap(); rx diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 4b69bd4129e..80b90d78045 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -526,7 +526,7 @@ fn test_read_index_stale_in_suspect_lease() { sim.async_command_on_node( old_leader.get_id(), read_request, - Callback::Read(Box::new(move |resp| tx.send(resp.response).unwrap())), + Callback::read(Box::new(move |resp| tx.send(resp.response).unwrap())), ) .unwrap(); rx diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 9a946a806bc..262060b4491 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -666,7 +666,7 @@ fn test_split_region_impl(is_raw_kv: bool) { .collect(); assert_eq!( result_split_keys, - vec![b"b", b"c", b"d", b"e",] + vec![b"b", b"c", b"d", b"e"] .into_iter() .map(|k| encode_key(&k[..])) .collect::>() From 40192af85a92ed0b6d29af0059c837620ef4eb6c Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 30 Aug 2022 11:36:23 +0800 Subject: [PATCH 0175/1149] storage: implement the FlashbackToVersion txn command (#13345) ref tikv/tikv#13303 Implement the `FlashbackToVersion` txn command, which contains two phases: `command::FlashbackToVersionReadPhase` and `command::FlashbackToVersion`. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/tracker/src/lib.rs | 1 + src/storage/metrics.rs | 1 + src/storage/mod.rs | 330 +++++++++++++++++- src/storage/mvcc/reader/reader.rs | 228 +++++++++++- .../txn/commands/flashback_to_version.rs | 134 +++++++ .../flashback_to_version_read_phase.rs | 118 +++++++ src/storage/txn/commands/mod.rs | 12 + 7 files changed, 816 insertions(+), 8 deletions(-) create mode 100644 src/storage/txn/commands/flashback_to_version.rs create mode 100644 src/storage/txn/commands/flashback_to_version_read_phase.rs diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 664dc1e6767..56ce2aa3280 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -113,6 +113,7 @@ pub enum RequestType { KvTxnHeartBeat, KvRollback, KvPessimisticRollback, + KvFlashbackToVersion, CoprocessorDag, CoprocessorAnalyze, CoprocessorChecksum, diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index e58f7862b37..c1076dca604 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -140,6 +140,7 @@ make_auto_flush_static_metric! { pause, key_mvcc, start_ts_mvcc, + flashback_to_version, raw_get, raw_batch_get, raw_scan, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8dbb8a69361..162a58b4801 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3235,14 +3235,17 @@ mod tests { use error_code::ErrorCodeExt; use errors::extract_key_error; use futures::executor::block_on; - use kvproto::kvrpcpb::{AssertionLevel, CommandPri, Op, PrewriteRequestPessimisticAction::*}; + use kvproto::kvrpcpb::{ + Assertion, AssertionLevel, CommandPri, Op, PrewriteRequestPessimisticAction::*, + }; use tikv_util::config::ReadableSize; use tracker::INVALID_TRACKER_TOKEN; - use txn_types::{Mutation, PessimisticLock, WriteType}; + use txn_types::{Mutation, PessimisticLock, WriteType, SHORT_VALUE_MAX_LEN}; use super::{ mvcc::tests::{must_unlocked, must_written}, test_util::*, + txn::commands::FLASHBACK_BATCH_SIZE, *, }; use crate::{ @@ -4407,6 +4410,329 @@ mod tests { ); } + #[test] + fn test_flashback_to_version() { + let storage = TestStorageBuilderApiV1::new(DummyLockManager) + .build() + .unwrap(); + let writes = vec![ + // (Mutation, StartTS, CommitTS) + ( + Mutation::Put((Key::from_raw(b"k"), b"v@1".to_vec()), Assertion::None), + 1, + 2, + ), + ( + Mutation::Put((Key::from_raw(b"k"), b"v@3".to_vec()), Assertion::None), + 3, + 4, + ), + ( + Mutation::Put((Key::from_raw(b"k"), b"v@5".to_vec()), Assertion::None), + 5, + 6, + ), + ( + Mutation::Put((Key::from_raw(b"k"), b"v@7".to_vec()), Assertion::None), + 7, + 8, + ), + ( + Mutation::Delete(Key::from_raw(b"k"), Assertion::None), + 9, + 10, + ), + ( + Mutation::Put((Key::from_raw(b"k"), b"v@11".to_vec()), Assertion::None), + 11, + 12, + ), + // Non-short value + ( + Mutation::Put( + (Key::from_raw(b"k"), vec![b'v'; SHORT_VALUE_MAX_LEN + 1]), + Assertion::None, + ), + 13, + 14, + ), + ]; + let (tx, rx) = channel(); + // Prewrite and commit. + for write in writes.iter() { + let (key, value) = write.0.clone().into_key_value(); + let start_ts = write.1.into(); + let commit_ts = write.2.into(); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![write.0.clone()], + key.clone().to_raw().unwrap(), + start_ts, + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new( + vec![key.clone()], + start_ts, + commit_ts, + Context::default(), + ), + expect_value_callback(tx.clone(), 1, TxnStatus::committed(commit_ts)), + ) + .unwrap(); + rx.recv().unwrap(); + if let Mutation::Put(..) = write.0 { + expect_value( + value.unwrap(), + block_on(storage.get(Context::default(), key.clone(), commit_ts)) + .unwrap() + .0, + ); + } else { + expect_none( + block_on(storage.get(Context::default(), key, commit_ts)) + .unwrap() + .0, + ); + } + } + // Flashback. + for idx in (0..writes.len()).rev() { + let write = &writes[idx]; + let key = write.0.key(); + let start_ts = write.1.into(); + let commit_ts = write.2.into(); + storage + .sched_txn_command( + commands::FlashbackToVersionReadPhase::new( + start_ts, + None, + Some(key.clone()), + Some(key.clone()), + Context::default(), + ), + expect_ok_callback(tx.clone(), 2), + ) + .unwrap(); + rx.recv().unwrap(); + if idx == 0 || matches!(writes[idx - 1].0, Mutation::Delete(..)) { + expect_none( + block_on(storage.get(Context::default(), key.clone(), commit_ts)) + .unwrap() + .0, + ); + } else { + let (_, old_value) = writes[idx - 1].0.clone().into_key_value(); + expect_value( + old_value.unwrap(), + block_on(storage.get(Context::default(), key.clone(), commit_ts)) + .unwrap() + .0, + ); + } + } + } + + #[test] + fn test_flashback_to_version_lock() { + let storage = TestStorageBuilderApiV1::new(DummyLockManager) + .build() + .unwrap(); + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_put(Key::from_raw(b"k"), b"v@1".to_vec())], + b"k".to_vec(), + 1.into(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k")], + 1.into(), + 2.into(), + Context::default(), + ), + expect_value_callback(tx.clone(), 1, TxnStatus::committed(2.into())), + ) + .unwrap(); + rx.recv().unwrap(); + expect_value( + b"v@1".to_vec(), + block_on(storage.get(Context::default(), Key::from_raw(b"k"), 2.into())) + .unwrap() + .0, + ); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_put(Key::from_raw(b"k"), b"v@3".to_vec())], + b"k".to_vec(), + 3.into(), + ), + expect_ok_callback(tx.clone(), 2), + ) + .unwrap(); + rx.recv().unwrap(); + expect_error( + |e| match e { + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::KeyIsLocked { .. }, + ))))) => (), + e => panic!("unexpected error chain: {:?}", e), + }, + block_on(storage.get(Context::default(), Key::from_raw(b"k"), 3.into())), + ); + + storage + .sched_txn_command( + commands::FlashbackToVersionReadPhase::new( + 2.into(), + None, + Some(Key::from_raw(b"k")), + Some(Key::from_raw(b"k")), + Context::default(), + ), + expect_ok_callback(tx.clone(), 3), + ) + .unwrap(); + rx.recv().unwrap(); + expect_value( + b"v@1".to_vec(), + block_on(storage.get(Context::default(), Key::from_raw(b"k"), 3.into())) + .unwrap() + .0, + ); + storage + .sched_txn_command( + commands::FlashbackToVersionReadPhase::new( + 1.into(), + None, + Some(Key::from_raw(b"k")), + Some(Key::from_raw(b"k")), + Context::default(), + ), + expect_ok_callback(tx, 3), + ) + .unwrap(); + rx.recv().unwrap(); + expect_none( + block_on(storage.get(Context::default(), Key::from_raw(b"k"), 3.into())) + .unwrap() + .0, + ); + } + + #[test] + fn test_flashback_to_version_in_multi_batch() { + let storage = TestStorageBuilderApiV1::new(DummyLockManager) + .build() + .unwrap(); + let (tx, rx) = channel(); + // Add (FLASHBACK_BATCH_SIZE * 2) lock records. + for i in 1..=FLASHBACK_BATCH_SIZE * 2 { + let start_ts = (i as u64).into(); + let key = Key::from_raw(format!("k{}", i).as_bytes()); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_put( + key.clone(), + format!("v@{}", i).as_bytes().to_vec(), + )], + key.to_raw().unwrap(), + start_ts, + ), + expect_ok_callback(tx.clone(), i as i32), + ) + .unwrap(); + rx.recv().unwrap(); + expect_error( + |e| match e { + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::KeyIsLocked { .. }, + ))))) => (), + e => panic!("unexpected error chain: {:?}", e), + }, + block_on(storage.get(Context::default(), key, start_ts)), + ); + } + // Add (FLASHBACK_BATCH_SIZE * 2) write records. + for i in FLASHBACK_BATCH_SIZE * 2 + 1..=FLASHBACK_BATCH_SIZE * 4 { + let start_ts = (i as u64).into(); + let commit_ts = ((i + 1) as u64).into(); + let key = Key::from_raw(format!("k{}", i).as_bytes()); + let value = format!("v@{}", i).as_bytes().to_vec(); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_put(key.clone(), value.clone())], + key.to_raw().unwrap(), + start_ts, + ), + expect_ok_callback(tx.clone(), i as i32), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new( + vec![key.clone()], + start_ts, + commit_ts, + Context::default(), + ), + expect_value_callback(tx.clone(), i as i32, TxnStatus::committed(commit_ts)), + ) + .unwrap(); + rx.recv().unwrap(); + expect_value( + value, + block_on(storage.get(Context::default(), key, commit_ts)) + .unwrap() + .0, + ); + } + // Flashback all records. + storage + .sched_txn_command( + commands::FlashbackToVersionReadPhase::new( + TimeStamp::zero(), + None, + Some(Key::from_raw(b"k")), + Some(Key::from_raw(b"k")), + Context::default(), + ), + expect_ok_callback(tx, 2), + ) + .unwrap(); + rx.recv().unwrap(); + expect_none( + block_on(storage.get(Context::default(), Key::from_raw(b"k1"), 1.into())) + .unwrap() + .0, + ); + expect_none( + block_on(storage.get( + Context::default(), + Key::from_raw(format!("k{}", FLASHBACK_BATCH_SIZE * 4).as_bytes()), + ((FLASHBACK_BATCH_SIZE * 4 + 1) as u64).into(), + )) + .unwrap() + .0, + ); + } + #[test] fn test_high_priority_get_put() { let storage = TestStorageBuilderApiV1::new(DummyLockManager) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index f1ed7748a15..2a43ac24583 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -281,7 +281,7 @@ impl MvccReader { self.current_key = Some(key.clone()); self.write_cursor.take(); } - self.create_write_cursor()?; + self.create_write_cursor(None)?; let cursor = self.write_cursor.as_mut().unwrap(); // find a `ts` encoded key which is less than the `ts` encoded version of the // `key` @@ -427,13 +427,14 @@ impl MvccReader { Ok(()) } - fn create_write_cursor(&mut self) -> Result<()> { + fn create_write_cursor(&mut self, hint_min_ts: Option) -> Result<()> { if self.write_cursor.is_none() { let cursor = CursorBuilder::new(&self.snapshot, CF_WRITE) .fill_cache(self.fill_cache) // Only use prefix seek in non-scan mode. .prefix_seek(self.scan_mode.is_none()) .scan_mode(self.get_scan_mode(true)) + .hint_min_ts(hint_min_ts) .build()?; self.write_cursor = Some(cursor); } @@ -454,7 +455,7 @@ impl MvccReader { /// Return the first committed key for which `start_ts` equals to `ts` pub fn seek_ts(&mut self, ts: TimeStamp) -> Result> { assert!(self.scan_mode.is_some()); - self.create_write_cursor()?; + self.create_write_cursor(None)?; let cursor = self.write_cursor.as_mut().unwrap(); let mut ok = cursor.seek_to_first(&mut self.statistics.write); @@ -471,11 +472,11 @@ impl MvccReader { Ok(None) } - /// Scan locks that satisfies `filter(lock)` returns true, from the given - /// start key `start`. At most `limit` locks will be returned. If `limit` is + /// Scan locks that satisfies `filter(lock)` returns true in the key range + /// [start, end). At most `limit` locks will be returned. If `limit` is /// set to `0`, it means unlimited. /// - /// The return type is `(locks, is_remain)`. `is_remain` indicates whether + /// The return type is `(locks, has_remain)`. `has_remain` indicates whether /// there MAY be remaining locks that can be scanned. pub fn scan_locks( &mut self, @@ -520,6 +521,57 @@ impl MvccReader { Ok((locks, false)) } + /// Scan writes that satisfies `filter(key)` returns true in the key range + /// [start, end). At most `limit` locks will be returned. If `limit` is + /// set to `0`, it means unlimited. + /// + /// The return type is `(writes, has_remain)`. `has_remain` indicates + /// whether there MAY be remaining writes that can be scanned. + pub fn scan_writes( + &mut self, + start: Option<&Key>, + end: Option<&Key>, + filter: F, + limit: usize, + hint_min_ts: Option, + ) -> Result<(Vec<(Key, Write)>, bool)> + where + F: Fn(&Key) -> bool, + { + self.create_write_cursor(hint_min_ts)?; + let cursor = self.write_cursor.as_mut().unwrap(); + let ok = match start { + Some(x) => cursor.seek(x, &mut self.statistics.write)?, + None => cursor.seek_to_first(&mut self.statistics.write), + }; + if !ok { + return Ok((vec![], false)); + } + let mut writes = Vec::with_capacity(limit); + while cursor.valid()? { + let key = Key::from_encoded_slice(cursor.key(&mut self.statistics.write)); + if let Some(end) = end { + if key >= *end { + return Ok((writes, false)); + } + } + + if filter(&key) { + writes.push(( + key, + WriteRef::parse(cursor.value(&mut self.statistics.write))?.to_owned(), + )); + if limit > 0 && writes.len() == limit { + return Ok((writes, true)); + } + } + cursor.next(&mut self.statistics.lock); + } + self.statistics.write.processed_keys += writes.len(); + resource_metering::record_read_keys(writes.len() as u32); + Ok((writes, false)) + } + pub fn scan_keys( &mut self, mut start: Option, @@ -1604,6 +1656,170 @@ pub mod tests { ); } + #[test] + fn test_scan_writes() { + let path = tempfile::Builder::new() + .prefix("_test_storage_mvcc_reader_scan_writes") + .tempdir() + .unwrap(); + let path = path.path().to_str().unwrap(); + let region = make_region(1, vec![], vec![]); + let db = open_db(path, true); + let mut engine = RegionEngine::new(&db, ®ion); + + // Put some writes to the db. + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k1"), b"v1@1".to_vec()), + b"k1", + 1, + ); + engine.commit(b"k1", 1, 2); + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k1"), b"v1@3".to_vec()), + b"k1", + 3, + ); + engine.commit(b"k1", 3, 4); + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k1"), b"v1@5".to_vec()), + b"k1", + 5, + ); + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k2"), b"v2@1".to_vec()), + b"k2", + 1, + ); + engine.commit(b"k2", 1, 2); + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k2"), b"v2@3".to_vec()), + b"k2", + 3, + ); + engine.commit(b"k2", 3, 4); + + // Creates a reader and scan writes. + let check_scan_write = |start_key: Option, + end_key: Option, + filter: Box bool>, + limit, + expect_res: &[_], + expect_is_remain: bool| { + let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); + let mut reader = MvccReader::new(snap, Some(ScanMode::Forward), false); + let res = reader + .scan_writes(start_key.as_ref(), end_key.as_ref(), filter, limit, None) + .unwrap(); + assert_eq!(res.0, expect_res); + assert_eq!(res.1, expect_is_remain); + }; + + check_scan_write( + None, + None, + Box::new(|key| key.decode_ts().unwrap() >= 1.into()), + 1, + &[( + Key::from_raw(b"k1").append_ts(4.into()), + Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec())), + )], + true, + ); + check_scan_write( + None, + None, + Box::new(|key| key.decode_ts().unwrap() >= 1.into()), + 5, + &[ + ( + Key::from_raw(b"k1").append_ts(4.into()), + Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec())), + ), + ( + Key::from_raw(b"k1").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec())), + ), + ( + Key::from_raw(b"k2").append_ts(4.into()), + Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec())), + ), + ( + Key::from_raw(b"k2").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec())), + ), + ], + false, + ); + check_scan_write( + Some(Key::from_raw(b"k2")), + None, + Box::new(|key| key.decode_ts().unwrap() >= 1.into()), + 3, + &[ + ( + Key::from_raw(b"k2").append_ts(4.into()), + Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec())), + ), + ( + Key::from_raw(b"k2").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec())), + ), + ], + false, + ); + check_scan_write( + None, + Some(Key::from_raw(b"k2")), + Box::new(|key| key.decode_ts().unwrap() >= 1.into()), + 4, + &[ + ( + Key::from_raw(b"k1").append_ts(4.into()), + Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec())), + ), + ( + Key::from_raw(b"k1").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec())), + ), + ], + false, + ); + check_scan_write( + Some(Key::from_raw(b"k1")), + Some(Key::from_raw(b"k2")), + Box::new(|key| key.decode_ts().unwrap() >= 1.into()), + 4, + &[ + ( + Key::from_raw(b"k1").append_ts(4.into()), + Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec())), + ), + ( + Key::from_raw(b"k1").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec())), + ), + ], + false, + ); + check_scan_write( + None, + None, + Box::new(|key| key.decode_ts().unwrap() < 4.into()), + 4, + &[ + ( + Key::from_raw(b"k1").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec())), + ), + ( + Key::from_raw(b"k2").append_ts(2.into()), + Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec())), + ), + ], + false, + ); + } + #[test] fn test_load_data() { let path = tempfile::Builder::new() diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs new file mode 100644 index 00000000000..058758888d5 --- /dev/null +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -0,0 +1,134 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use txn_types::{Key, Lock, LockType, TimeStamp, Write, WriteType}; + +use crate::storage::{ + kv::WriteData, + lock_manager::LockManager, + mvcc::{MvccTxn, MAX_TXN_WRITE_SIZE}, + txn::{ + commands::{ + Command, CommandExt, FlashbackToVersionReadPhase, ResponsePolicy, TypedCommand, + WriteCommand, WriteContext, WriteResult, + }, + latch, Result, + }, + ProcessResult, Snapshot, +}; + +command! { + FlashbackToVersion: + cmd_ty => (), + display => "kv::command::flashback_to_version @{} | {:?}", (version ,ctx), + content => { + version: TimeStamp, + end_key: Option, + next_lock_key: Option, + next_write_key: Option, + key_locks: Vec<(Key, Lock)>, + key_writes: Vec<(Key, Write)>, + } +} + +impl CommandExt for FlashbackToVersion { + ctx!(); + tag!(flashback_to_version); + request_type!(KvFlashbackToVersion); + + fn gen_lock(&self) -> latch::Lock { + latch::Lock::new( + self.key_locks + .iter() + .map(|(key, _)| key) + .chain(self.key_writes.iter().map(|(key, _)| key)), + ) + } + + fn write_bytes(&self) -> usize { + self.key_locks + .iter() + .map(|(key, _)| key.as_encoded().len()) + .chain( + self.key_writes + .iter() + .map(|(key, _)| key.as_encoded().len()), + ) + .sum() + } +} + +impl WriteCommand for FlashbackToVersion { + fn process_write(mut self, _snapshot: S, context: WriteContext<'_, L>) -> Result { + let mut txn = MvccTxn::new(TimeStamp::zero(), context.concurrency_manager); + + let mut rows = 0; + let mut next_lock_key = self.next_lock_key.take(); + let mut next_write_key = self.next_write_key.take(); + // To flashback the `CF_LOCK`, we need to delete all locks records whose + // `start_ts` is greater than the specified version, and if it's not a + // short-value `LockType::Put`, we need to delete the actual data from + // `CF_DEFAULT` as well. + // TODO: `resolved_ts` should be taken into account. + for (key, lock) in self.key_locks { + if txn.write_size() >= MAX_TXN_WRITE_SIZE { + next_lock_key = Some(key); + break; + } + txn.unlock_key(key.clone(), lock.is_pessimistic_txn()); + rows += 1; + // If the short value is none and it's a `LockType::Put`, we should delete the + // corresponding key from `CF_DEFAULT` as well. + if lock.short_value.is_none() && lock.lock_type == LockType::Put { + txn.delete_value(key, lock.ts); + rows += 1; + } + } + // To flashback the `CF_WRITE`, we need to delete all write records whose + // `commit_ts` is greater than the specified version, and if it's not a + // short-value `WriteType::Put`, we need to delete the actual data from + // `CF_DEFAULT` as well. + for (key, write) in self.key_writes { + if txn.write_size() >= MAX_TXN_WRITE_SIZE { + next_write_key = Some(key); + break; + } + let encoded_key = key.clone().truncate_ts()?; + let commit_ts = key.decode_ts()?; + txn.delete_write(encoded_key.clone(), commit_ts); + rows += 1; + // If the short value is none and it's a `WriteType::Put`, we should delete the + // corresponding key from `CF_DEFAULT` as well. + if write.short_value.is_none() && write.write_type == WriteType::Put { + txn.delete_value(encoded_key, write.start_ts); + rows += 1; + } + } + + let mut write_data = WriteData::from_modifies(txn.into_modifies()); + write_data.set_allowed_on_disk_almost_full(); + Ok(WriteResult { + ctx: self.ctx.clone(), + to_be_write: write_data, + rows, + pr: if next_lock_key.is_none() && next_write_key.is_none() { + ProcessResult::Res + } else { + let next_cmd = FlashbackToVersionReadPhase { + ctx: self.ctx.clone(), + deadline: self.deadline, + version: self.version, + end_key: self.end_key, + next_lock_key, + next_write_key, + }; + ProcessResult::NextCommand { + cmd: Command::FlashbackToVersionReadPhase(next_cmd), + } + }, + lock_info: None, + lock_guards: vec![], + response_policy: ResponsePolicy::OnApplied, + }) + } +} diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs new file mode 100644 index 00000000000..5feedd80eb8 --- /dev/null +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -0,0 +1,118 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use txn_types::{Key, TimeStamp}; + +use crate::storage::{ + mvcc::MvccReader, + txn::{ + commands::{ + Command, CommandExt, FlashbackToVersion, ProcessResult, ReadCommand, TypedCommand, + }, + sched_pool::tls_collect_keyread_histogram_vec, + Result, + }, + ScanMode, Snapshot, Statistics, +}; + +command! { + FlashbackToVersionReadPhase: + cmd_ty => (), + display => "kv::command::flashback_to_version_read_phase | {:?}", (ctx), + content => { + version: TimeStamp, + end_key: Option, + next_lock_key: Option, + next_write_key: Option, + } +} + +impl CommandExt for FlashbackToVersionReadPhase { + ctx!(); + tag!(flashback_to_version); + request_type!(KvFlashbackToVersion); + property!(readonly); + gen_lock!(empty); + + fn write_bytes(&self) -> usize { + 0 + } +} + +pub const FLASHBACK_BATCH_SIZE: usize = 256; + +impl ReadCommand for FlashbackToVersionReadPhase { + fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result { + let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); + // Scan the locks. + let mut key_locks = Vec::with_capacity(0); + let mut has_remain_locks = false; + if self.next_lock_key.is_some() { + let key_locks_result = reader.scan_locks( + self.next_lock_key.as_ref(), + self.end_key.as_ref(), + // To flashback `CF_LOCK`, we need to delete all locks. + |_| true, + FLASHBACK_BATCH_SIZE, + ); + statistics.add(&reader.statistics); + (key_locks, has_remain_locks) = key_locks_result?; + } + // Scan the writes. + let mut key_writes = Vec::with_capacity(0); + let mut has_remain_writes = false; + // The batch is not full, we can still read. + if self.next_write_key.is_some() && key_locks.len() < FLASHBACK_BATCH_SIZE { + let key_writes_result = reader.scan_writes( + self.next_write_key.as_ref(), + self.end_key.as_ref(), + // To flashback `CF_WRITE` and `CF_DEFAULT`, we need to delete all keys whose + // commit_ts is greater than the specified version. + |key| key.decode_ts().unwrap() > self.version, + FLASHBACK_BATCH_SIZE - key_locks.len(), + Some(self.version), + ); + statistics.add(&reader.statistics); + (key_writes, has_remain_writes) = key_writes_result?; + } else if self.next_write_key.is_some() && key_locks.len() >= FLASHBACK_BATCH_SIZE { + // The batch is full, we need to read the writes in the next batch later. + has_remain_writes = true; + } + tls_collect_keyread_histogram_vec( + self.tag().get_str(), + (key_locks.len() + key_writes.len()) as f64, + ); + + if key_locks.is_empty() && key_writes.is_empty() { + Ok(ProcessResult::Res) + } else { + let next_lock_key = if has_remain_locks { + key_locks.last().map(|(key, _)| key.clone()) + } else { + None + }; + let next_write_key = if has_remain_writes && !key_writes.is_empty() { + key_writes.last().map(|(key, _)| key.clone()) + } else if has_remain_writes && key_writes.is_empty() { + // We haven't read any write yet, so we need to read the writes in the next + // batch later. + self.next_write_key + } else { + None + }; + let next_cmd = FlashbackToVersion { + ctx: self.ctx, + deadline: self.deadline, + version: self.version, + end_key: self.end_key, + key_locks, + key_writes, + next_lock_key, + next_write_key, + }; + Ok(ProcessResult::NextCommand { + cmd: Command::FlashbackToVersion(next_cmd), + }) + } + } +} diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 3dc1a37697e..f4794d6a0db 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -11,6 +11,8 @@ pub(crate) mod check_txn_status; pub(crate) mod cleanup; pub(crate) mod commit; pub(crate) mod compare_and_swap; +pub(crate) mod flashback_to_version; +pub(crate) mod flashback_to_version_read_phase; pub(crate) mod mvcc_by_key; pub(crate) mod mvcc_by_start_ts; pub(crate) mod pause; @@ -37,6 +39,8 @@ pub use cleanup::Cleanup; pub use commit::Commit; pub use compare_and_swap::RawCompareAndSwap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; +pub use flashback_to_version::FlashbackToVersion; +pub use flashback_to_version_read_phase::{FlashbackToVersionReadPhase, FLASHBACK_BATCH_SIZE}; use kvproto::kvrpcpb::*; pub use mvcc_by_key::MvccByKey; pub use mvcc_by_start_ts::MvccByStartTs; @@ -92,6 +96,8 @@ pub enum Command { MvccByStartTs(MvccByStartTs), RawCompareAndSwap(RawCompareAndSwap), RawAtomicStore(RawAtomicStore), + FlashbackToVersionReadPhase(FlashbackToVersionReadPhase), + FlashbackToVersion(FlashbackToVersion), } /// A `Command` with its return type, reified as the generic parameter `T`. @@ -567,6 +573,8 @@ impl Command { Command::MvccByStartTs(t) => t, Command::RawCompareAndSwap(t) => t, Command::RawAtomicStore(t) => t, + Command::FlashbackToVersionReadPhase(t) => t, + Command::FlashbackToVersion(t) => t, } } @@ -590,6 +598,8 @@ impl Command { Command::MvccByStartTs(t) => t, Command::RawCompareAndSwap(t) => t, Command::RawAtomicStore(t) => t, + Command::FlashbackToVersionReadPhase(t) => t, + Command::FlashbackToVersion(t) => t, } } @@ -602,6 +612,7 @@ impl Command { Command::ResolveLockReadPhase(t) => t.process_read(snapshot, statistics), Command::MvccByKey(t) => t.process_read(snapshot, statistics), Command::MvccByStartTs(t) => t.process_read(snapshot, statistics), + Command::FlashbackToVersionReadPhase(t) => t.process_read(snapshot, statistics), _ => panic!("unsupported read command"), } } @@ -627,6 +638,7 @@ impl Command { Command::Pause(t) => t.process_write(snapshot, context), Command::RawCompareAndSwap(t) => t.process_write(snapshot, context), Command::RawAtomicStore(t) => t.process_write(snapshot, context), + Command::FlashbackToVersion(t) => t.process_write(snapshot, context), _ => panic!("unsupported write command"), } } From f208e1b921333dd347195a086852419718c3bf2a Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Tue, 30 Aug 2022 13:02:24 +0800 Subject: [PATCH 0176/1149] dr-auto-sync: enable min-resolved-ts report by default (#13305) ref tikv/tikv#13219 Signed-off-by: lhy1024 --- components/raftstore/src/store/config.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 3 +++ components/resolved_ts/tests/failpoints/mod.rs | 13 +++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 89b5cfc1ac9..34805e4c9ca 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -384,7 +384,7 @@ impl Default for Config { region_split_size: ReadableSize(0), clean_stale_peer_delay: ReadableDuration::minutes(0), inspect_interval: ReadableDuration::millis(500), - report_min_resolved_ts_interval: ReadableDuration::millis(0), + report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), report_region_buckets_tick_interval: ReadableDuration::secs(10), diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 4ac03e2578b..45a3827e8f5 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -448,6 +448,9 @@ fn config(interval: Duration) -> Duration { fail_point!("mock_min_resolved_ts_interval", |_| { Duration::from_millis(50) }); + fail_point!("mock_min_resolved_ts_interval_disable", |_| { + Duration::from_millis(0) + }); interval } diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index e734864471a..ab4e88f9d25 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -7,6 +7,7 @@ use kvproto::kvrpcpb::*; use pd_client::PdClient; use test_raftstore::{new_peer, sleep_ms}; pub use testsuite::*; +use tikv_util::config::ReadableDuration; use txn_types::TimeStamp; #[test] @@ -57,6 +58,16 @@ fn test_report_min_resolved_ts() { fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); fail::cfg("mock_min_resolved_ts_interval", "return(0)").unwrap(); let mut suite = TestSuite::new(1); + // default config is 1s + assert_eq!( + suite + .cluster + .cfg + .tikv + .raft_store + .report_min_resolved_ts_interval, + ReadableDuration::secs(1) + ); let region = suite.cluster.get_region(&[]); let ts1 = suite.cluster.pd_client.get_min_resolved_ts(); @@ -89,6 +100,7 @@ fn test_report_min_resolved_ts() { fn test_report_min_resolved_ts_disable() { fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); + fail::cfg("mock_min_resolved_ts_interval_disable", "return(0)").unwrap(); let mut suite = TestSuite::new(1); let region = suite.cluster.get_region(&[]); let ts1 = suite.cluster.pd_client.get_min_resolved_ts(); @@ -113,5 +125,6 @@ fn test_report_min_resolved_ts_disable() { assert!(ts3 == ts1); fail::remove("mock_tick_interval"); fail::remove("mock_collect_tick_interval"); + fail::remove("mock_min_resolved_ts_interval_disable"); suite.stop(); } From 564f6e3ae3da0022c4de78f4345516ced147b58b Mon Sep 17 00:00:00 2001 From: TonsnakeLin <87681388+TonsnakeLin@users.noreply.github.com> Date: Tue, 30 Aug 2022 15:00:23 +0800 Subject: [PATCH 0177/1149] PessmistincLock: lock the key if exists (#13211) close tikv/tikv#13210 Signed-off-by: Jay Lee Signed-off-by: TonsnakeLin Signed-off-by: CalvinNeo Signed-off-by: ystaticy Signed-off-by: hehechen Signed-off-by: ekexium Signed-off-by: BornChanger Signed-off-by: LintianShi Signed-off-by: Lintian Shi Signed-off-by: OneSizeFitQuorum Co-authored-by: Jay Co-authored-by: Calvin Neo Co-authored-by: ystaticy Co-authored-by: hehechen Co-authored-by: ekexium Co-authored-by: BornChanger <97348524+BornChanger@users.noreply.github.com> Co-authored-by: Lintian Shi Co-authored-by: Potato Co-authored-by: Ti Chi Robot Co-authored-by: Ping Yu Co-authored-by: Yilin Chen Co-authored-by: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Co-authored-by: LintianShi --- components/error_code/src/storage.rs | 1 + src/storage/mod.rs | 6 + src/storage/mvcc/mod.rs | 13 ++ src/storage/mvcc/reader/reader.rs | 1 + src/storage/mvcc/txn.rs | 3 +- .../txn/actions/acquire_pessimistic_lock.rs | 156 ++++++++++++++++-- .../txn/commands/acquire_pessimistic_lock.rs | 6 +- src/storage/txn/commands/mod.rs | 1 + src/storage/txn/scheduler.rs | 1 + tests/failpoints/cases/test_storage.rs | 2 + 10 files changed, 172 insertions(+), 18 deletions(-) diff --git a/components/error_code/src/storage.rs b/components/error_code/src/storage.rs index 61b81215438..ff994032dea 100644 --- a/components/error_code/src/storage.rs +++ b/components/error_code/src/storage.rs @@ -40,6 +40,7 @@ define_error_codes!( COMMIT_TS_TOO_LARGE => ("CommitTsTooLarge", "", ""), ASSERTION_FAILED => ("AssertionFailed", "", ""), + LOCK_IF_EXISTS_FAILED => ("LockIfExistsFailed", "", ""), UNKNOWN => ("Unknown", "", "") ); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 162a58b4801..0a7801848b9 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3120,6 +3120,7 @@ pub mod test_util { for_update_ts.next(), OldValues::default(), check_existence, + false, Context::default(), ) } @@ -7775,6 +7776,7 @@ mod tests { 21.into(), OldValues::default(), false, + false, Context::default(), ), expect_ok_callback(tx, 0), @@ -8465,6 +8467,7 @@ mod tests { 0.into(), OldValues::default(), false, + false, Default::default(), ), expect_ok_callback(tx.clone(), 0), @@ -8487,6 +8490,7 @@ mod tests { 0.into(), OldValues::default(), false, + false, Default::default(), ), expect_ok_callback(tx.clone(), 0), @@ -8714,6 +8718,7 @@ mod tests { TimeStamp::new(12), OldValues::default(), false, + false, Context::default(), ), pipelined_pessimistic_lock: true, @@ -8739,6 +8744,7 @@ mod tests { TimeStamp::new(12), OldValues::default(), false, + false, Context::default(), ), pipelined_pessimistic_lock: false, diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 1a554a4410b..f787014fd01 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -158,6 +158,12 @@ pub enum ErrorInner { existing_commit_ts: TimeStamp, }, + #[error( + "Lock_only_if_exists of a pessimistic lock request is set to true, but return_value is not, start_ts:{}, key:{}", + .start_ts, log_wrappers::Value::key(.key) + )] + LockIfExistsFailed { start_ts: TimeStamp, key: Vec }, + #[error("{0:?}")] Other(#[from] Box), } @@ -276,6 +282,12 @@ impl ErrorInner { existing_start_ts: *existing_start_ts, existing_commit_ts: *existing_commit_ts, }), + ErrorInner::LockIfExistsFailed { start_ts, key } => { + Some(ErrorInner::LockIfExistsFailed { + start_ts: *start_ts, + key: key.clone(), + }) + } ErrorInner::Io(_) | ErrorInner::Other(_) => None, } } @@ -375,6 +387,7 @@ impl ErrorCodeExt for Error { } ErrorInner::CommitTsTooLarge { .. } => error_code::storage::COMMIT_TS_TOO_LARGE, ErrorInner::AssertionFailed { .. } => error_code::storage::ASSERTION_FAILED, + ErrorInner::LockIfExistsFailed { .. } => error_code::storage::LOCK_IF_EXISTS_FAILED, ErrorInner::Other(_) => error_code::storage::UNKNOWN, } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 2a43ac24583..eb83af270a1 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -856,6 +856,7 @@ pub mod tests { false, TimeStamp::zero(), true, + false, ) .unwrap(); self.write(txn.into_modifies()); diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index a9032d1b463..c02d8ef97c8 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -1050,6 +1050,7 @@ pub(crate) mod tests { false, false, TimeStamp::zero(), + false, ); } @@ -1316,7 +1317,7 @@ pub(crate) mod tests { // Simulate that min_commit_ts is pushed forward larger than latest_ts must_acquire_pessimistic_lock_impl( - &engine, b"key", b"key", 2, false, 20000, 2, false, false, 100, + &engine, b"key", b"key", 2, false, 20000, 2, false, false, 100, false, ); let snapshot = engine.snapshot(Default::default()).unwrap(); diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 9df4d9ebce9..7e30dcdd37c 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -36,11 +36,23 @@ pub fn acquire_pessimistic_lock( need_check_existence: bool, min_commit_ts: TimeStamp, need_old_value: bool, + lock_only_if_exists: bool, ) -> MvccResult<(Option, OldValue)> { fail_point!("acquire_pessimistic_lock", |err| Err( crate::storage::mvcc::txn::make_txn_error(err, &key, reader.start_ts).into() )); - + if lock_only_if_exists && !need_value { + error!( + "lock_only_if_exists of a pessimistic lock request is set to true, but return_value is not"; + "start_ts" => reader.start_ts, + "key" => log_wrappers::Value::key(key.as_encoded()), + ); + return Err(ErrorInner::LockIfExistsFailed { + start_ts: reader.start_ts, + key: key.into_raw()?, + } + .into()); + } // Update max_ts for Insert operation to guarantee linearizability and snapshot // isolation if should_not_exist { @@ -243,7 +255,12 @@ pub fn acquire_pessimistic_lock( for_update_ts, min_commit_ts, }; - txn.put_pessimistic_lock(key, lock); + + // When lock_only_if_exists is false, always accquire pessimitic lock, otherwise + // do it when val exists + if !lock_only_if_exists || val.is_some() { + txn.put_pessimistic_lock(key, lock); + } // TODO don't we need to commit the modifies in txn? Ok((ret_val(need_value, need_check_existence, val), old_value)) @@ -284,6 +301,7 @@ pub mod tests { need_value: bool, need_check_existence: bool, min_commit_ts: impl Into, + lock_only_if_exists: bool, ) -> Option { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -304,6 +322,7 @@ pub mod tests { need_check_existence, min_commit_ts, false, + lock_only_if_exists, ) .unwrap(); let modifies = txn.into_modifies(); @@ -331,6 +350,7 @@ pub mod tests { pk: &[u8], start_ts: impl Into, for_update_ts: impl Into, + lock_only_if_exists: bool, ) -> Option { must_succeed_impl( engine, @@ -343,6 +363,7 @@ pub mod tests { true, false, TimeStamp::zero(), + lock_only_if_exists, ) } @@ -366,6 +387,7 @@ pub mod tests { false, false, TimeStamp::zero(), + false, ) .is_none() ); @@ -392,6 +414,7 @@ pub mod tests { false, false, min_commit_ts, + false, ); } @@ -412,6 +435,7 @@ pub mod tests { false, false, TimeStamp::zero(), + false, ) } @@ -421,6 +445,7 @@ pub mod tests { pk: &[u8], start_ts: impl Into, for_update_ts: impl Into, + lock_only_if_exists: bool, ) -> MvccError { must_err_impl( engine, @@ -432,6 +457,7 @@ pub mod tests { true, false, TimeStamp::zero(), + lock_only_if_exists, ) } @@ -445,6 +471,7 @@ pub mod tests { need_value: bool, need_check_existence: bool, min_commit_ts: impl Into, + lock_only_if_exists: bool, ) -> MvccError { let snapshot = engine.snapshot(Default::default()).unwrap(); let min_commit_ts = min_commit_ts.into(); @@ -464,6 +491,7 @@ pub mod tests { need_check_existence, min_commit_ts, false, + lock_only_if_exists, ) .unwrap_err() } @@ -737,25 +765,28 @@ pub mod tests { let engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k", b"v"); - assert_eq!(must_succeed_return_value(&engine, k, k, 10, 10), None); + assert_eq!( + must_succeed_return_value(&engine, k, k, 10, 10, false), + None + ); must_pessimistic_locked(&engine, k, 10, 10); pessimistic_rollback::tests::must_success(&engine, k, 10, 10); // Put must_prewrite_put(&engine, k, v, k, 10); // KeyIsLocked - match must_err_return_value(&engine, k, k, 20, 20) { + match must_err_return_value(&engine, k, k, 20, 20, false) { MvccError(box ErrorInner::KeyIsLocked(_)) => (), e => panic!("unexpected error: {}", e), }; must_commit(&engine, k, 10, 20); // WriteConflict - match must_err_return_value(&engine, k, k, 15, 15) { + match must_err_return_value(&engine, k, k, 15, 15, false) { MvccError(box ErrorInner::WriteConflict { .. }) => (), e => panic!("unexpected error: {}", e), }; assert_eq!( - must_succeed_return_value(&engine, k, k, 25, 25), + must_succeed_return_value(&engine, k, k, 25, 25, false), Some(v.to_vec()) ); must_pessimistic_locked(&engine, k, 25, 25); @@ -765,7 +796,7 @@ pub mod tests { must_prewrite_lock(&engine, k, k, 30); must_commit(&engine, k, 30, 40); assert_eq!( - must_succeed_return_value(&engine, k, k, 45, 45), + must_succeed_return_value(&engine, k, k, 45, 45, false), Some(v.to_vec()) ); must_pessimistic_locked(&engine, k, 45, 45); @@ -774,7 +805,7 @@ pub mod tests { // Skip Write::Rollback must_rollback(&engine, k, 50, false); assert_eq!( - must_succeed_return_value(&engine, k, k, 55, 55), + must_succeed_return_value(&engine, k, k, 55, 55, false), Some(v.to_vec()) ); must_pessimistic_locked(&engine, k, 55, 55); @@ -783,17 +814,99 @@ pub mod tests { // Delete must_prewrite_delete(&engine, k, k, 60); must_commit(&engine, k, 60, 70); - assert_eq!(must_succeed_return_value(&engine, k, k, 75, 75), None); + assert_eq!( + must_succeed_return_value(&engine, k, k, 75, 75, false), + None + ); // Duplicated command - assert_eq!(must_succeed_return_value(&engine, k, k, 75, 75), None); assert_eq!( - must_succeed_return_value(&engine, k, k, 75, 55), + must_succeed_return_value(&engine, k, k, 75, 75, false), + None + ); + assert_eq!( + must_succeed_return_value(&engine, k, k, 75, 55, false), Some(v.to_vec()) ); must_pessimistic_locked(&engine, k, 75, 75); pessimistic_rollback::tests::must_success(&engine, k, 75, 75); } + #[test] + fn test_pessimistic_lock_only_if_exists() { + let engine = TestEngineBuilder::new().build().unwrap(); + let (k, v) = (b"k", b"v"); + + // The key doesn't exist, no pessimistic lock is generated + assert_eq!(must_succeed_return_value(&engine, k, k, 10, 10, true), None); + must_unlocked(&engine, k); + + match must_err_impl( + &engine, + k, + k, + 10, + false, + 10, + false, + false, + TimeStamp::zero(), + true, + ) { + MvccError(box ErrorInner::LockIfExistsFailed { + start_ts: _, + key: _, + }) => (), + e => panic!("unexpected error: {}", e), + }; + + // Put the value, writecf: k_20_put_v + must_prewrite_put(&engine, k, v, k, 10); + must_commit(&engine, k, 10, 20); + // Pessimistic lock generated + assert_eq!( + must_succeed_return_value(&engine, k, k, 25, 25, true), + Some(v.to_vec()) + ); + must_pessimistic_locked(&engine, k, 25, 25); + pessimistic_rollback::tests::must_success(&engine, k, 25, 25); + + // Skip Write::Lock, WriteRecord: k_20_put_v k_40_lock + must_prewrite_lock(&engine, k, k, 30); + must_commit(&engine, k, 30, 40); + assert_eq!( + must_succeed_return_value(&engine, k, k, 45, 45, true), + Some(v.to_vec()) + ); + must_pessimistic_locked(&engine, k, 45, 45); + pessimistic_rollback::tests::must_success(&engine, k, 45, 45); + + // Skip Write::Rollback WriteRecord: k_20_put_v k_40_lock k_50_R + must_rollback(&engine, k, 50, false); + assert_eq!( + must_succeed_return_value(&engine, k, k, 55, 55, true), + Some(v.to_vec()) + ); + must_pessimistic_locked(&engine, k, 55, 55); + pessimistic_rollback::tests::must_success(&engine, k, 55, 55); + + // Delete WriteRecord: k_20_put_v k_40_lock k_50_R k_70_delete + must_prewrite_delete(&engine, k, k, 60); + must_commit(&engine, k, 60, 70); + assert_eq!(must_succeed_return_value(&engine, k, k, 75, 75, true), None); + must_unlocked(&engine, k); + + // Duplicated command + assert_eq!( + must_succeed_return_value(&engine, k, k, 75, 75, false), + None + ); + must_pessimistic_locked(&engine, k, 75, 75); + assert_eq!(must_succeed_return_value(&engine, k, k, 75, 85, true), None); + must_pessimistic_locked(&engine, k, 75, 85); + pessimistic_rollback::tests::must_success(&engine, k, 75, 85); + must_unlocked(&engine, k); + } + #[test] fn test_overwrite_pessimistic_lock() { let engine = TestEngineBuilder::new().build().unwrap(); @@ -889,23 +1002,25 @@ pub mod tests { // Test constraint check with `should_not_exist`. if expected_value.is_none() { assert!( - must_succeed_impl(&engine, key, key, 50, true, 0, 50, false, false, 51) + must_succeed_impl(&engine, key, key, 50, true, 0, 50, false, false, 51, false) .is_none() ); must_pessimistic_rollback(&engine, key, 50, 51); } else { - must_err_impl(&engine, key, key, 50, true, 50, false, false, 51); + must_err_impl(&engine, key, key, 50, true, 50, false, false, 51, false); } must_unlocked(&engine, key); // Test getting value. - let res = must_succeed_impl(&engine, key, key, 50, false, 0, 50, true, false, 51); + let res = + must_succeed_impl(&engine, key, key, 50, false, 0, 50, true, false, 51, false); assert_eq!(res, expected_value.map(|v| v.to_vec())); must_pessimistic_rollback(&engine, key, 50, 51); // Test getting value when already locked. must_succeed(&engine, key, key, 50, 51); - let res2 = must_succeed_impl(&engine, key, key, 50, false, 0, 50, true, false, 51); + let res2 = + must_succeed_impl(&engine, key, key, 50, false, 0, 50, true, false, 51, false); assert_eq!(res2, expected_value.map(|v| v.to_vec())); must_pessimistic_rollback(&engine, key, 50, 51); } @@ -939,6 +1054,7 @@ pub mod tests { *need_check_existence, min_commit_ts, need_old_value, + false, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -989,6 +1105,7 @@ pub mod tests { need_check_existence, min_commit_ts, need_old_value, + false, ) .unwrap(); assert_eq!( @@ -1022,6 +1139,7 @@ pub mod tests { false, min_commit_ts, true, + false, ) .unwrap(); assert_eq!( @@ -1064,6 +1182,7 @@ pub mod tests { *need_check_existence, min_commit_ts, need_old_value, + false, )?; Ok(old_value) }); @@ -1116,6 +1235,7 @@ pub mod tests { need_check_existence, min_commit_ts, need_old_value, + false, ) .unwrap_err(); @@ -1149,6 +1269,7 @@ pub mod tests { check_existence, min_commit_ts, need_old_value, + false, ) .unwrap_err(); } @@ -1221,6 +1342,7 @@ pub mod tests { need_value, need_check_existence, 0, + false, ); assert_eq!(value1, None); must_pessimistic_rollback(&engine, b"k1", start_ts, 30); @@ -1236,6 +1358,7 @@ pub mod tests { need_value, need_check_existence, 0, + false, ); assert_eq!(value2, expected_value(Some(b"v2"))); must_pessimistic_rollback(&engine, b"k2", start_ts, 30); @@ -1251,6 +1374,7 @@ pub mod tests { need_value, need_check_existence, 0, + false, ); assert_eq!(value3, None); must_pessimistic_rollback(&engine, b"k3", start_ts, 30); @@ -1266,6 +1390,7 @@ pub mod tests { need_value, need_check_existence, 0, + false, ); assert_eq!(value4, expected_value(Some(b"v4"))); must_pessimistic_rollback(&engine, b"k4", start_ts, 30); @@ -1281,6 +1406,7 @@ pub mod tests { need_value, need_check_existence, 0, + false, ); assert_eq!(value5, None); must_pessimistic_rollback(&engine, b"k5", start_ts, 30); diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 1db991f70eb..3632d847e59 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -26,8 +26,8 @@ command! { /// This can be rolled back with a [`PessimisticRollback`](Command::PessimisticRollback) command. AcquirePessimisticLock: cmd_ty => StorageResult, - display => "kv::command::acquirepessimisticlock keys({:?}) @ {} {} {} {:?} {} {} | {:?}", - (keys, start_ts, lock_ttl, for_update_ts, wait_timeout, min_commit_ts, check_existence, ctx), + display => "kv::command::acquirepessimisticlock keys({:?}) @ {} {} {} {:?} {} {} {} | {:?}", + (keys, start_ts, lock_ttl, for_update_ts, wait_timeout, min_commit_ts, check_existence, lock_only_if_exists, ctx), content => { /// The set of keys to lock. keys: Vec<(Key, bool)>, @@ -47,6 +47,7 @@ command! { min_commit_ts: TimeStamp, old_values: OldValues, check_existence: bool, + lock_only_if_exists: bool, } } @@ -110,6 +111,7 @@ impl WriteCommand for AcquirePessimisticLock self.check_existence, self.min_commit_ts, need_old_value, + self.lock_only_if_exists, ) { Ok((val, old_value)) => { if self.return_values || self.check_existence { diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index f4794d6a0db..a204ab4f30f 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -219,6 +219,7 @@ impl From for TypedCommand( 0.into(), OldValues::default(), false, + false, ctx.clone(), ), Box::new(move |r| tx.send(r).unwrap()), @@ -1005,6 +1006,7 @@ fn test_async_apply_prewrite_1pc_impl( 0.into(), OldValues::default(), false, + false, ctx.clone(), ), Box::new(move |r| tx.send(r).unwrap()), From 66edf9c2f62a9741eba596c8b687801418ed589a Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 30 Aug 2022 15:44:23 +0800 Subject: [PATCH 0178/1149] raftstore: prettify snapshot build flow (#13377) ref tikv/tikv#12876 prettify snapshot build flow Signed-off-by: Connor1996 --- .../raftstore/src/store/peer_storage.rs | 107 ++++---- components/raftstore/src/store/snap.rs | 239 +++++------------- .../raftstore/src/store/worker/region.rs | 38 ++- components/test_raftstore/src/node.rs | 6 +- src/server/snap.rs | 4 +- 5 files changed, 138 insertions(+), 256 deletions(-) diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index cf70234c841..c99b7644321 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -33,7 +33,7 @@ use tikv_util::{ box_err, box_try, debug, defer, error, info, time::Instant, warn, worker::Scheduler, }; -use super::{metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager, SnapshotStatistics}; +use super::{metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager}; use crate::{ store::{ async_io::write::WriteTask, entry_storage::EntryStorage, fsm::GenSnapTask, @@ -439,7 +439,8 @@ where let mut snap_state = self.snap_state.borrow_mut(); let mut tried_cnt = self.snap_tried_cnt.borrow_mut(); - let (mut tried, mut last_canceled, mut snap) = (false, false, None); + let mut tried = false; + let mut last_canceled = false; if let SnapState::Generating { ref canceled, ref receiver, @@ -450,24 +451,19 @@ where last_canceled = canceled.load(Ordering::SeqCst); match receiver.try_recv() { Err(TryRecvError::Empty) => { - let e = raft::StorageError::SnapshotTemporarilyUnavailable; - return Err(raft::Error::Store(e)); + return Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )); } - Ok(s) if !last_canceled => snap = Some(s), - Err(TryRecvError::Disconnected) | Ok(_) => {} - } - } - - if tried { - *snap_state = SnapState::Relax; - match snap { - Some(s) => { + Ok(s) if !last_canceled => { + *snap_state = SnapState::Relax; *tried_cnt = 0; if self.validate_snap(&s, request_index) { return Ok(s); } } - None => { + Err(TryRecvError::Disconnected) | Ok(_) => { + *snap_state = SnapState::Relax; warn!( "failed to try generating snapshot"; "region_id" => self.region.get_id(), @@ -491,6 +487,9 @@ where cnt ))); } + if !tried || !last_canceled { + *tried_cnt += 1; + } info!( "requesting snapshot"; @@ -500,10 +499,6 @@ where "request_peer" => to, ); - if !tried || !last_canceled { - *tried_cnt += 1; - } - let (sender, receiver) = mpsc::sync_channel(1); let canceled = Arc::new(AtomicBool::new(false)); let index = Arc::new(AtomicU64::new(0)); @@ -512,11 +507,15 @@ where index: index.clone(), receiver, }; - let mut to_store_id = 0; - if let Some(peer) = self.region().get_peers().iter().find(|p| p.id == to) { - to_store_id = peer.store_id; - } - let task = GenSnapTask::new(self.region.get_id(), index, canceled, sender, to_store_id); + + let store_id = self + .region() + .get_peers() + .iter() + .find(|p| p.id == to) + .map(|p| p.store_id) + .unwrap_or(0); + let task = GenSnapTask::new(self.region.get_id(), index, canceled, sender, store_id); let mut gen_snap_task = self.gen_snap_task.borrow_mut(); assert!(gen_snap_task.is_none()); @@ -1000,18 +999,14 @@ where "region_id" => region_id, ); - let msg = kv_snap + let apply_state: RaftApplyState = kv_snap .get_msg_cf(CF_RAFT, &keys::apply_state_key(region_id)) - .map_err(into_other::<_, raft::Error>)?; - let apply_state: RaftApplyState = match msg { - None => { - return Err(storage_error(format!( - "could not load raft state of region {}", - region_id - ))); - } - Some(state) => state, - }; + .map_err(into_other::<_, raft::Error>) + .and_then(|v| { + v.ok_or_else(|| { + storage_error(format!("could not load raft state of region {}", region_id)) + }) + })?; assert_eq!(apply_state, last_applied_state); let key = SnapKey::new( @@ -1019,19 +1014,18 @@ where last_applied_term, apply_state.get_applied_index(), ); - mgr.register(key.clone(), SnapEntry::Generating); defer!(mgr.deregister(&key, &SnapEntry::Generating)); - let state: RegionLocalState = kv_snap + let region_state: RegionLocalState = kv_snap .get_msg_cf(CF_RAFT, &keys::region_state_key(key.region_id)) - .and_then(|res| match res { - None => Err(box_err!("region {} could not find region info", region_id)), - Some(state) => Ok(state), - }) - .map_err(into_other::<_, raft::Error>)?; - - if state.get_state() != PeerState::Normal { + .map_err(into_other::<_, raft::Error>) + .and_then(|v| { + v.ok_or_else(|| { + storage_error(format!("region {} could not find region info", region_id)) + }) + })?; + if region_state.get_state() != PeerState::Normal { return Err(storage_error(format!( "snap job for {} seems stale, skip.", region_id @@ -1039,33 +1033,22 @@ where } let mut snapshot = Snapshot::default(); - // Set snapshot metadata. snapshot.mut_metadata().set_index(key.idx); snapshot.mut_metadata().set_term(key.term); - - let conf_state = util::conf_state_from_region(state.get_region()); - snapshot.mut_metadata().set_conf_state(conf_state); - - let mut s = mgr.get_snapshot_for_building(&key)?; + snapshot + .mut_metadata() + .set_conf_state(util::conf_state_from_region(region_state.get_region())); // Set snapshot data. - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(state.get_region().clone()); - let mut stat = SnapshotStatistics::new(); - s.build( + let mut s = mgr.get_snapshot_for_building(&key)?; + let snap_data = s.build( engine, &kv_snap, - state.get_region(), - &mut snap_data, - &mut stat, + region_state.get_region(), allow_multi_files_snapshot, + for_balance, )?; - snap_data.mut_meta().set_for_balance(for_balance); - let v = snap_data.write_to_bytes()?; - snapshot.set_data(v.into()); - - SNAPSHOT_KV_COUNT_HISTOGRAM.observe(stat.kv_count as f64); - SNAPSHOT_SIZE_HISTOGRAM.observe(stat.size as f64); + snapshot.set_data(snap_data.write_to_bytes()?.into()); Ok(snapshot) } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 74cfd5ab0d6..8b063e9e1f0 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -41,13 +41,7 @@ use tikv_util::{ use crate::{ coprocessor::CoprocessorHost, - store::{ - metrics::{ - CfNames, INGEST_SST_DURATION_SECONDS, SNAPSHOT_BUILD_TIME_HISTOGRAM, - SNAPSHOT_CF_KV_COUNT, SNAPSHOT_CF_SIZE, - }, - peer_storage::JOB_STATUS_CANCELLING, - }, + store::{metrics::*, peer_storage::JOB_STATUS_CANCELLING}, Error as RaftStoreError, Result as RaftStoreResult, }; @@ -211,7 +205,7 @@ fn retry_delete_snapshot(mgr: &SnapManagerCore, key: &SnapKey, snap: &Snapshot) false } -fn gen_snapshot_meta(cf_files: &[CfFile]) -> RaftStoreResult { +fn gen_snapshot_meta(cf_files: &[CfFile], for_balance: bool) -> RaftStoreResult { let mut meta = Vec::with_capacity(cf_files.len()); for cf_file in cf_files { if !SNAPSHOT_CFS.iter().any(|cf| cf_file.cf == *cf) { @@ -239,6 +233,7 @@ fn gen_snapshot_meta(cf_files: &[CfFile]) -> RaftStoreResult { } let mut snapshot_meta = SnapshotMeta::default(); snapshot_meta.set_cf_files(meta.into()); + snapshot_meta.set_for_balance(for_balance); Ok(snapshot_meta) } @@ -424,7 +419,7 @@ impl CfFile { #[derive(Default)] struct MetaFile { - pub meta: SnapshotMeta, + pub meta: Option, pub path: PathBuf, pub file: Option, @@ -735,7 +730,7 @@ impl Snapshot { } } } - self.meta_file.meta = snapshot_meta; + self.meta_file.meta = Some(snapshot_meta); Ok(()) } @@ -754,7 +749,7 @@ impl Snapshot { } pub fn load_snapshot_meta_if_necessary(&mut self) -> RaftStoreResult<()> { - if self.meta_file.meta.get_cf_files().is_empty() && file_exists(&self.meta_file.path) { + if self.meta_file.meta.is_none() && file_exists(&self.meta_file.path) { return self.load_snapshot_meta(); } Ok(()) @@ -817,7 +812,7 @@ impl Snapshot { // Only called in `do_build`. fn save_meta_file(&mut self) -> RaftStoreResult<()> { - let v = box_try!(self.meta_file.meta.write_to_bytes()); + let v = box_try!(self.meta_file.meta.as_ref().unwrap().write_to_bytes()); if let Some(mut f) = self.meta_file.file.take() { // `meta_file` could be None for this case: in `init_for_building` the snapshot // exists so no temporary meta file is created, and this field is @@ -843,8 +838,8 @@ impl Snapshot { engine: &EK, kv_snap: &EK::Snapshot, region: &Region, - stat: &mut SnapshotStatistics, allow_multi_files_snapshot: bool, + for_balance: bool, ) -> RaftStoreResult<()> where EK: KvEngine, @@ -925,10 +920,8 @@ impl Snapshot { ); } - stat.kv_count = self.cf_files.iter().map(|cf| cf.kv_count as usize).sum(); // save snapshot meta to meta file - let snapshot_meta = gen_snapshot_meta(&self.cf_files[..])?; - self.meta_file.meta = snapshot_meta; + self.meta_file.meta = Some(gen_snapshot_meta(&self.cf_files[..], for_balance)?); self.save_meta_file()?; Ok(()) } @@ -1031,31 +1024,41 @@ impl Snapshot { engine: &EK, kv_snap: &EK::Snapshot, region: &Region, - snap_data: &mut RaftSnapshotData, - stat: &mut SnapshotStatistics, allow_multi_files_snapshot: bool, - ) -> RaftStoreResult<()> { + for_balance: bool, + ) -> RaftStoreResult { + let mut snap_data = RaftSnapshotData::default(); + snap_data.set_region(region.clone()); + let t = Instant::now(); - self.do_build::(engine, kv_snap, region, stat, allow_multi_files_snapshot)?; + self.do_build::( + engine, + kv_snap, + region, + allow_multi_files_snapshot, + for_balance, + )?; - let total_size = self.total_size()?; - stat.size = total_size; + let total_size = self.total_size(); + let total_count = self.total_count(); // set snapshot meta data snap_data.set_file_size(total_size); snap_data.set_version(SNAPSHOT_VERSION); - snap_data.set_meta(self.meta_file.meta.clone()); + snap_data.set_meta(self.meta_file.meta.as_ref().unwrap().clone()); SNAPSHOT_BUILD_TIME_HISTOGRAM.observe(duration_to_sec(t.saturating_elapsed()) as f64); + SNAPSHOT_KV_COUNT_HISTOGRAM.observe(total_count as f64); + SNAPSHOT_SIZE_HISTOGRAM.observe(total_size as f64); info!( "scan snapshot"; "region_id" => region.get_id(), "snapshot" => self.path(), - "key_count" => stat.kv_count, + "key_count" => total_count, "size" => total_size, "takes" => ?t.saturating_elapsed(), ); - Ok(()) + Ok(snap_data) } pub fn apply(&mut self, options: ApplyOptions) -> Result<()> { @@ -1119,11 +1122,15 @@ impl Snapshot { file_system::metadata(&self.meta_file.path) } - pub fn total_size(&self) -> io::Result { - Ok(self - .cf_files + pub fn total_size(&self) -> u64 { + self.cf_files .iter() - .fold(0, |acc, x| acc + x.size.iter().sum::())) + .map(|cf| cf.size.iter().sum::()) + .sum() + } + + pub fn total_count(&self) -> u64 { + self.cf_files.iter().map(|cf| cf.kv_count).sum() } pub fn save(&mut self) -> io::Result<()> { @@ -1182,7 +1189,7 @@ impl Snapshot { sync_dir(&self.dir_path)?; // write meta file - let v = self.meta_file.meta.write_to_bytes()?; + let v = self.meta_file.meta.as_ref().unwrap().write_to_bytes()?; { let mut meta_file = self.meta_file.file.take().unwrap(); meta_file.write_all(&v[..])?; @@ -1560,19 +1567,17 @@ impl SnapManager { Ok(Box::new(s)) } - /// Get a `Snapshot` can be used for writting and then `save`. Concurrent + /// Get a `Snapshot` can be used for writing and then `save`. Concurrent /// calls are allowed because only one caller can lock temporary disk /// files. pub fn get_snapshot_for_receiving( &self, key: &SnapKey, - data: &[u8], + snapshot_meta: SnapshotMeta, ) -> RaftStoreResult> { let _lock = self.core.registry.rl(); - let mut snapshot_data = RaftSnapshotData::default(); - snapshot_data.merge_from_bytes(data)?; let base = &self.core.base; - let f = Snapshot::new_for_receiving(base, key, &self.core, snapshot_data.take_meta())?; + let f = Snapshot::new_for_receiving(base, key, &self.core, snapshot_meta)?; Ok(Box::new(f)) } @@ -1902,7 +1907,7 @@ pub mod tests { use kvproto::{ encryptionpb::EncryptionMethod, metapb::{Peer, Region}, - raft_serverpb::{RaftApplyState, RaftSnapshotData, RegionLocalState, SnapshotMeta}, + raft_serverpb::{RaftApplyState, RegionLocalState, SnapshotMeta}, }; use protobuf::Message; use raft::eraftpb::Entry; @@ -2024,7 +2029,7 @@ pub mod tests { Ok(Engines::new(kv, raft)) } - pub fn get_kv_count(snap: &impl EngineSnapshot) -> usize { + pub fn get_kv_count(snap: &impl EngineSnapshot) -> u64 { let mut kv_count = 0; for cf in SNAPSHOT_CFS { snap.scan( @@ -2139,7 +2144,7 @@ pub mod tests { }; cf_file.push(f); } - let meta = super::gen_snapshot_meta(&cf_file).unwrap(); + let meta = super::gen_snapshot_meta(&cf_file, false).unwrap(); let cf_files = meta.get_cf_files(); assert_eq!(cf_files.len(), super::SNAPSHOT_CFS.len() * 2); // each CF has two snapshot files; for (i, cf_file_meta) in meta.get_cf_files().iter().enumerate() { @@ -2221,28 +2226,14 @@ pub mod tests { assert!(!s1.exists()); assert_eq!(mgr_core.get_total_snap_size().unwrap(), 0); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - Snapshot::build::( - &mut s1, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let mut snap_data = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); // Ensure that this snapshot file does exist after being built. assert!(s1.exists()); - let total_size = s1.total_size().unwrap(); + let size = s1.total_size(); // Ensure the `size_track` is modified correctly. - let size = mgr_core.get_total_snap_size().unwrap(); - assert_eq!(size, total_size); - assert_eq!(stat.size as u64, size); - assert_eq!(stat.kv_count, get_kv_count(&snapshot)); + assert_eq!(size, mgr_core.get_total_snap_size().unwrap()); + assert_eq!(s1.total_count(), get_kv_count(&snapshot)); // Ensure this snapshot could be read for sending. let mut s2 = Snapshot::new_for_sending(src_dir.path(), &key, &mgr_core).unwrap(); @@ -2335,34 +2326,13 @@ pub mod tests { let mut s1 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s1.exists()); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - Snapshot::build::( - &mut s1, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(s1.exists()); let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(s2.exists()); - Snapshot::build::( - &mut s2, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let _ = s2.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(s2.exists()); } @@ -2505,19 +2475,7 @@ pub mod tests { let mut s1 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s1.exists()); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - Snapshot::build::( - &mut s1, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(s1.exists()); corrupt_snapshot_size_in(dir.path()); @@ -2526,16 +2484,7 @@ pub mod tests { let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s2.exists()); - Snapshot::build::( - &mut s2, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let snap_data = s2.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(s2.exists()); let dst_dir = Builder::new() @@ -2596,19 +2545,7 @@ pub mod tests { let mut s1 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s1.exists()); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - Snapshot::build::( - &mut s1, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(s1.exists()); assert_eq!(1, corrupt_snapshot_meta_file(dir.path())); @@ -2617,16 +2554,7 @@ pub mod tests { let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s2.exists()); - Snapshot::build::( - &mut s2, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let mut snap_data = s2.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(s2.exists()); let dst_dir = Builder::new() @@ -2688,21 +2616,9 @@ pub mod tests { let mgr_core = create_manager_core(&path, u64::MAX); let mut s1 = Snapshot::new_for_building(&path, &key1, &mgr_core).unwrap(); let mut region = gen_test_region(1, 1, 1); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - Snapshot::build::( - &mut s1, - &db, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let mut snap_data = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); let mut s = Snapshot::new_for_sending(&path, &key1, &mgr_core).unwrap(); - let expected_size = s.total_size().unwrap(); + let expected_size = s.total_size(); let mut s2 = Snapshot::new_for_receiving(&path, &key1, &mgr_core, snap_data.get_meta().clone()) .unwrap(); @@ -2772,19 +2688,14 @@ pub mod tests { // Ensure the snapshot being built will not be deleted on GC. src_mgr.register(key.clone(), SnapEntry::Generating); let mut s1 = src_mgr.get_snapshot_for_building(&key).unwrap(); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - s1.build(&db, &snapshot, ®ion, &mut snap_data, &mut stat, true) - .unwrap(); - let v = snap_data.write_to_bytes().unwrap(); + let mut snap_data = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); check_registry_around_deregister(&src_mgr, &key, &SnapEntry::Generating); // Ensure the snapshot being sent will not be deleted on GC. src_mgr.register(key.clone(), SnapEntry::Sending); let mut s2 = src_mgr.get_snapshot_for_sending(&key).unwrap(); - let expected_size = s2.total_size().unwrap(); + let expected_size = s2.total_size(); let dst_temp_dir = Builder::new() .prefix("test-snap-deletion-on-registry-dst") @@ -2796,7 +2707,9 @@ pub mod tests { // Ensure the snapshot being received will not be deleted on GC. dst_mgr.register(key.clone(), SnapEntry::Receiving); - let mut s3 = dst_mgr.get_snapshot_for_receiving(&key, &v[..]).unwrap(); + let mut s3 = dst_mgr + .get_snapshot_for_receiving(&key, snap_data.take_meta()) + .unwrap(); let n = io::copy(&mut s2, &mut s3).unwrap(); assert_eq!(n, expected_size); s3.save().unwrap(); @@ -2850,20 +2763,16 @@ pub mod tests { // Add an oldest snapshot for receiving. let recv_key = SnapKey::new(100, 100, 100); - let recv_head = { - let mut stat = SnapshotStatistics::new(); - let mut snap_data = RaftSnapshotData::default(); + let mut recv_head = { let mut s = snap_mgr.get_snapshot_for_building(&recv_key).unwrap(); s.build( &engine.kv, &snapshot, &gen_test_region(100, 1, 1), - &mut snap_data, - &mut stat, true, + false, ) - .unwrap(); - snap_data.write_to_bytes().unwrap() + .unwrap() }; let recv_remain = { let mut data = Vec::with_capacity(1024); @@ -2873,7 +2782,7 @@ pub mod tests { data }; let mut s = snap_mgr - .get_snapshot_for_receiving(&recv_key, &recv_head) + .get_snapshot_for_receiving(&recv_key, recv_head.take_meta()) .unwrap(); s.write_all(&recv_remain).unwrap(); s.save().unwrap(); @@ -2884,17 +2793,9 @@ pub mod tests { let key = SnapKey::new(region_id, 1, 1); let region = gen_test_region(region_id, 1, 1); let mut s = snap_mgr.get_snapshot_for_building(&key).unwrap(); - let mut snap_data = RaftSnapshotData::default(); - let mut stat = SnapshotStatistics::new(); - s.build( - &engine.kv, - &snapshot, - ®ion, - &mut snap_data, - &mut stat, - true, - ) - .unwrap(); + let _ = s + .build(&engine.kv, &snapshot, ®ion, true, false) + .unwrap(); // The first snap_size is for region 100. // That snapshot won't be deleted because it's not for generating. @@ -2963,11 +2864,7 @@ pub mod tests { // correctly. for _ in 0..2 { let mut s1 = snap_mgr.get_snapshot_for_building(&key).unwrap(); - let mut snap_data = RaftSnapshotData::default(); - snap_data.set_region(region.clone()); - let mut stat = SnapshotStatistics::new(); - s1.build(&db, &snapshot, ®ion, &mut snap_data, &mut stat, true) - .unwrap(); + let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); assert!(snap_mgr.delete_snapshot(&key, &s1, false)); } } diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index f167a2c90bf..244ca514924 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -383,14 +383,14 @@ where info!("begin apply snap data"; "region_id" => region_id, "peer_id" => peer_id); fail_point!("region_apply_snap", |_| { Ok(()) }); check_abort(&abort)?; - let region_key = keys::region_state_key(region_id); - let mut region_state = self.region_state(region_id)?; - // clear up origin data. + let mut region_state = self.region_state(region_id)?; let region = region_state.get_region().clone(); let start_key = keys::enc_start_key(®ion); let end_key = keys::enc_end_key(®ion); check_abort(&abort)?; + + // clear up origin data. let overlap_ranges = self .pending_delete_ranges .drain_overlap_ranges(&start_key, &end_key); @@ -404,8 +404,8 @@ where check_abort(&abort)?; fail_point!("apply_snap_cleanup_range"); + // apply snapshot let apply_state = self.apply_state(region_id)?; - let term = apply_state.get_truncated_state().get_term(); let idx = apply_state.get_truncated_state().get_index(); let snap_key = SnapKey::new(region_id, term, idx); @@ -430,9 +430,10 @@ where self.coprocessor_host .post_apply_snapshot(®ion, peer_id, &snap_key, Some(&s)); + // delete snapshot state. let mut wb = self.engine.write_batch(); region_state.set_state(PeerState::Normal); - box_try!(wb.put_msg_cf(CF_RAFT, ®ion_key, ®ion_state)); + box_try!(wb.put_msg_cf(CF_RAFT, &keys::region_state_key(region_id), ®ion_state)); box_try!(wb.delete_cf(CF_RAFT, &keys::snapshot_raft_state_key(region_id))); wb.write().unwrap_or_else(|e| { panic!("{} failed to save apply_snap result: {:?}", region_id, e); @@ -455,8 +456,7 @@ where Ordering::SeqCst, ); SNAP_COUNTER.apply.all.inc(); - // let apply_histogram = SNAP_HISTOGRAM.with_label_values(&["apply"]); - // let timer = apply_histogram.start_coarse_timer(); + let start = Instant::now(); match self.apply_snap(region_id, peer_id, Arc::clone(&status)) { @@ -892,8 +892,9 @@ mod tests { RaftEngineReadOnly, SyncMutable, WriteBatch, WriteBatchExt, CF_DEFAULT, CF_WRITE, }; use keys::data_key; - use kvproto::raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}; + use kvproto::raft_serverpb::{PeerState, RaftApplyState, RaftSnapshotData, RegionLocalState}; use pd_client::RpcClient; + use protobuf::Message; use tempfile::Builder; use tikv_util::worker::{LazyWorker, Worker}; @@ -1148,11 +1149,14 @@ mod tests { } msg => panic!("expected SnapshotGenerated, but got {:?}", msg), } - let data = s1.get_data(); + let mut data = RaftSnapshotData::default(); + data.merge_from_bytes(s1.get_data()).unwrap(); let key = SnapKey::from_snap(&s1).unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); let mut s2 = mgr.get_snapshot_for_sending(&key).unwrap(); - let mut s3 = mgr.get_snapshot_for_receiving(&key, data).unwrap(); + let mut s3 = mgr + .get_snapshot_for_receiving(&key, data.take_meta()) + .unwrap(); io::copy(&mut s2, &mut s3).unwrap(); s3.save().unwrap(); @@ -1372,11 +1376,8 @@ mod tests { key: &crate::store::SnapKey, snapshot: Option<&crate::store::Snapshot>, ) { - let code = snapshot.unwrap().total_size().unwrap() - + key.term - + key.region_id - + key.idx - + peer_id; + let code = + snapshot.unwrap().total_size() + key.term + key.region_id + key.idx + peer_id; self.pre_apply_count.fetch_add(1, Ordering::SeqCst); self.pre_apply_hash .fetch_add(code as usize, Ordering::SeqCst); @@ -1389,11 +1390,8 @@ mod tests { key: &crate::store::SnapKey, snapshot: Option<&crate::store::Snapshot>, ) { - let code = snapshot.unwrap().total_size().unwrap() - + key.term - + key.region_id - + key.idx - + peer_id; + let code = + snapshot.unwrap().total_size() + key.term + key.region_id + key.idx + peer_id; self.post_apply_count.fetch_add(1, Ordering::SeqCst); self.post_apply_hash .fetch_add(code as usize, Ordering::SeqCst); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index be361db3185..11a5dda87bd 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -17,6 +17,7 @@ use kvproto::{ raft_cmdpb::*, raft_serverpb::{self, RaftMessage}, }; +use protobuf::Message; use raft::{eraftpb::MessageType, SnapshotStatus}; use raftstore::{ coprocessor::{config::SplitCheckConfigManager, CoprocessorHost}, @@ -94,7 +95,10 @@ impl Transport for ChannelTransport { Some(p) => { p.0.register(key.clone(), SnapEntry::Receiving); let data = msg.get_message().get_snapshot().get_data(); - p.0.get_snapshot_for_receiving(&key, data).unwrap() + let mut snapshot_data = raft_serverpb::RaftSnapshotData::default(); + snapshot_data.merge_from_bytes(data).unwrap(); + p.0.get_snapshot_for_receiving(&key, snapshot_data.take_meta()) + .unwrap() } None => return Err(box_err!("missing temp dir for store {}", to_store)), }; diff --git a/src/server/snap.rs b/src/server/snap.rs index e88fbd21fc9..b651d2d0c82 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -150,7 +150,7 @@ pub fn send_snap( if !s.exists() { return Err(box_err!("missing snap file: {:?}", s.path())); } - let total_size = s.total_size()?; + let total_size = s.total_size(); let mut chunks = { let mut first_chunk = SnapshotChunk::default(); @@ -234,7 +234,7 @@ impl RecvSnapContext { let _with_io_type = WithIoType::new(io_type); let snap = { - let s = match snap_mgr.get_snapshot_for_receiving(&key, data) { + let s = match snap_mgr.get_snapshot_for_receiving(&key, snapshot.take_meta()) { Ok(s) => s, Err(e) => return Err(box_err!("{} failed to create snapshot file: {:?}", key, e)), }; From aed265824757fe74287a80e3b36e72da0ceee5ee Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 30 Aug 2022 07:50:23 -0700 Subject: [PATCH 0179/1149] *: move RegionMeta to raftstore (#13335) ref tikv/tikv#12842, ref tikv/tikv#13334 This PR moves `RegionMeta` to raftstore module, so that both v1 and v2 can use it for debugging. Several fields are re-arranged to be more concise and more informations. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/query/mod.rs | 16 +- components/raftstore-v2/src/router/mod.rs | 4 +- .../src/router/response_channel.rs | 162 ++++++++---------- components/raftstore/src/store/fsm/peer.rs | 61 ++++--- components/raftstore/src/store/mod.rs | 8 +- components/raftstore/src/store/msg.rs | 6 +- components/raftstore/src/store/peer.rs | 11 -- .../raftstore/src/store}/region_meta.rs | 70 +++++--- src/server/status_server/mod.rs | 5 +- tests/integrations/server/status_server.rs | 6 +- 10 files changed, 173 insertions(+), 176 deletions(-) rename {src/server/status_server => components/raftstore/src/store}/region_meta.rs (81%) diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index ff03117419b..bb8467fbc5c 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -12,7 +12,7 @@ use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse, StatusCmdType}; use raftstore::{ - store::{cmd_resp, util, ReadCallback}, + store::{cmd_resp, region_meta::RegionMeta, util, GroupState, ReadCallback}, Error, Result, }; use tikv_util::box_err; @@ -20,7 +20,7 @@ use tikv_util::box_err; use crate::{ fsm::PeerFsmDelegate, raft::Peer, - router::{QueryResChannel, QueryResult}, + router::{DebugInfoChannel, QueryResChannel, QueryResult}, }; mod local; @@ -79,4 +79,16 @@ impl Peer { cmd_resp::bind_term(resp, self.term()); Ok(()) } + + /// Query internal states for debugging purpose. + pub fn on_query_debug_info(&self, ch: DebugInfoChannel) { + let entry_storage = self.storage().entry_storage(); + let meta = RegionMeta::new( + self.storage().region_state(), + entry_storage.apply_state(), + GroupState::Ordered, + self.raft_group().status(), + ); + ch.set_result(meta); + } } diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 17250833168..8c1ba338642 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -9,5 +9,7 @@ pub(crate) use self::internal_message::ApplyTask; pub use self::{ internal_message::ApplyRes, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, - response_channel::{CmdResChannel, QueryResChannel, QueryResult}, + response_channel::{ + CmdResChannel, DebugInfoChannel, DebugInfoSubscriber, QueryResChannel, QueryResult, + }, }; diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index e87095215b8..2e0908aa7d0 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -16,7 +16,7 @@ use std::{ cell::UnsafeCell, fmt, future::Future, - mem::{self, ManuallyDrop}, + mem, pin::Pin, sync::{ atomic::{AtomicU64, Ordering}, @@ -29,7 +29,8 @@ use engine_traits::Snapshot; use futures::task::AtomicWaker; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, raft_cmdpb::RaftCmdResponse}; use raftstore::store::{ - local_metrics::TimeTracker, msg::ErrorCallback, ReadCallback, RegionSnapshot, WriteCallback, + local_metrics::TimeTracker, msg::ErrorCallback, region_meta::RegionMeta, ReadCallback, + RegionSnapshot, WriteCallback, }; use smallvec::SmallVec; use tikv_util::memory::HeapSize; @@ -69,6 +70,17 @@ const fn fired_bit_of(event: u64) -> u64 { 1 << (event * 2 + 1) } +impl Default for EventCore { + #[inline] + fn default() -> Self { + Self { + event: AtomicU64::new(0), + res: UnsafeCell::new(None), + waker: AtomicWaker::new(), + } + } +} + impl EventCore { #[inline] fn notify_event(&self, event: u64) { @@ -201,10 +213,54 @@ impl<'a, Res> Future for WaitResult<'a, Res> { } } -pub struct CmdResSubscriber { - core: Arc>, +/// A base subscriber that contains most common implementation of subscribers. +pub struct BaseSubscriber { + core: Arc>, } +impl BaseSubscriber { + /// Wait for the result. + #[inline] + pub async fn result(mut self) -> Option { + WaitResult { core: &self.core }.await + } +} + +unsafe impl Send for BaseSubscriber {} +unsafe impl Sync for BaseSubscriber {} + +/// A base channel that contains most common implementation of channels. +pub struct BaseChannel { + core: Arc>, +} + +impl BaseChannel { + /// Creates a pair of channel and subscriber. + #[inline] + pub fn pair() -> (Self, BaseSubscriber) { + let core: Arc> = Arc::default(); + (Self { core: core.clone() }, BaseSubscriber { core }) + } + + /// Sets the final result. + #[inline] + pub fn set_result(mut self, res: Res) { + self.core.set_result(res); + } +} + +impl Drop for BaseChannel { + #[inline] + fn drop(&mut self) { + self.core.cancel(); + } +} + +unsafe impl Send for BaseChannel {} +unsafe impl Sync for BaseChannel {} + +pub type CmdResSubscriber = BaseSubscriber; + impl CmdResSubscriber { pub async fn wait_proposed(&mut self) -> bool { WaitEvent { @@ -221,38 +277,14 @@ impl CmdResSubscriber { } .await } - - pub async fn result(mut self) -> Option { - WaitResult { core: &self.core }.await - } } -unsafe impl Send for CmdResSubscriber {} -unsafe impl Sync for CmdResSubscriber {} - -pub struct CmdResChannel { - core: ManuallyDrop>>, -} +pub type CmdResChannel = BaseChannel; impl CmdResChannel { // Valid range is [1, 30] const PROPOSED_EVENT: u64 = 1; const COMMITTED_EVENT: u64 = 2; - - #[inline] - pub fn pair() -> (Self, CmdResSubscriber) { - let core = Arc::new(EventCore { - event: AtomicU64::new(0), - res: UnsafeCell::new(None), - waker: AtomicWaker::new(), - }); - ( - Self { - core: ManuallyDrop::new(core.clone()), - }, - CmdResSubscriber { core }, - ) - } } impl ErrorCallback for CmdResChannel { @@ -294,27 +326,10 @@ impl WriteCallback for CmdResChannel { // TODO: support executing hooks inside setting result. #[inline] fn set_result(mut self, res: RaftCmdResponse) { - self.core.set_result(res); - unsafe { - ManuallyDrop::drop(&mut self.core); - } - mem::forget(self); - } -} - -impl Drop for CmdResChannel { - #[inline] - fn drop(&mut self) { - self.core.cancel(); - unsafe { - ManuallyDrop::drop(&mut self.core); - } + self.set_result(res); } } -unsafe impl Send for CmdResChannel {} -unsafe impl Sync for CmdResChannel {} - /// Response for Read. /// /// Unlike v1, snapshot are always taken in LocalReader, hence snapshot doesn't @@ -351,25 +366,7 @@ impl QueryResult { } } -pub struct QueryResChannel { - core: ManuallyDrop>>, -} - -impl QueryResChannel { - pub fn pair() -> (Self, QueryResSubscriber) { - let core = Arc::new(EventCore { - event: AtomicU64::new(0), - res: UnsafeCell::new(None), - waker: AtomicWaker::new(), - }); - ( - Self { - core: ManuallyDrop::new(core.clone()), - }, - QueryResSubscriber { core }, - ) - } -} +pub type QueryResChannel = BaseChannel; impl ErrorCallback for QueryResChannel { #[inline] @@ -388,11 +385,7 @@ impl ReadCallback for QueryResChannel { #[inline] fn set_result(mut self, res: QueryResult) { - self.core.set_result(res); - unsafe { - ManuallyDrop::drop(&mut self.core); - } - mem::forget(self); + self.set_result(res); } fn read_tracker(&self) -> Option<&TrackerToken> { @@ -400,31 +393,10 @@ impl ReadCallback for QueryResChannel { } } -impl Drop for QueryResChannel { - #[inline] - fn drop(&mut self) { - self.core.cancel(); - unsafe { - ManuallyDrop::drop(&mut self.core); - } - } -} - -unsafe impl Send for QueryResChannel {} -unsafe impl Sync for QueryResChannel {} - -pub struct QueryResSubscriber { - core: Arc>, -} - -impl QueryResSubscriber { - pub async fn result(mut self) -> Option { - WaitResult { core: &self.core }.await - } -} +pub type QueryResSubscriber = BaseSubscriber; -unsafe impl Send for QueryResSubscriber {} -unsafe impl Sync for QueryResSubscriber {} +pub type DebugInfoChannel = BaseChannel; +pub type DebugInfoSubscriber = BaseSubscriber; #[cfg(test)] mod tests { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 5497d2ad1d9..c587ea5f32c 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -33,8 +33,8 @@ use kvproto::{ StatusCmdType, StatusResponse, }, raft_serverpb::{ - ExtraMessage, ExtraMessageType, MergeState, PeerState, RaftApplyState, RaftMessage, - RaftSnapshotData, RaftTruncatedState, RegionLocalState, + ExtraMessage, ExtraMessageType, MergeState, PeerState, RaftMessage, RaftSnapshotData, + RaftTruncatedState, RegionLocalState, }, replication_modepb::{DrAutoSyncState, ReplicationMode}, }; @@ -84,6 +84,7 @@ use crate::{ UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, + region_meta::RegionMeta, transport::Transport, util, util::{is_learner, KeysInfoFormatter, LeaseState}, @@ -92,8 +93,8 @@ use crate::{ GcSnapshotTask, RaftlogFetchTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, }, - AbstractPeer, CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, - PeerTick, ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, + CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, PeerTick, + ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, SignificantMsg, SnapKey, StoreMsg, WriteCallback, }, Error, Result, @@ -1001,7 +1002,24 @@ where CasualMessage::ForceCompactRaftLogs => { self.on_raft_gc_log_tick(true); } - CasualMessage::AccessPeer(cb) => cb(self.fsm as &mut dyn AbstractPeer), + CasualMessage::AccessPeer(cb) => { + let peer = &self.fsm.peer; + let store = peer.get_store(); + let mut local_state = RegionLocalState::default(); + local_state.set_region(store.region().clone()); + if let Some(s) = &peer.pending_merge_state { + local_state.set_merge_state(s.clone()); + } + if store.is_applying_snapshot() { + local_state.set_state(PeerState::Applying); + } + cb(RegionMeta::new( + &local_state, + store.apply_state(), + self.fsm.hibernate_state.group_state(), + peer.raft_group.status(), + )) + } CasualMessage::QueryRegionLeaderResp { region, leader } => { // the leader already updated if self.fsm.peer.raft_group.raft.leader_id != raft::INVALID_ID @@ -3754,8 +3772,13 @@ where // New peer derive write flow from parent region, // this will be used by balance write flow. new_peer.peer.peer_stat = self.fsm.peer.peer_stat.clone(); - new_peer.peer.last_compacted_idx = - new_peer.apply_state().get_truncated_state().get_index() + 1; + new_peer.peer.last_compacted_idx = new_peer + .peer + .get_store() + .apply_state() + .get_truncated_state() + .get_index() + + 1; let campaigned = new_peer.peer.maybe_campaign(is_leader); new_peer.has_ready |= campaigned; @@ -6220,30 +6243,6 @@ where } } -impl AbstractPeer for PeerFsm { - fn meta_peer(&self) -> &metapb::Peer { - &self.peer.peer - } - fn group_state(&self) -> GroupState { - self.hibernate_state.group_state() - } - fn region(&self) -> &metapb::Region { - self.peer.raft_group.store().region() - } - fn apply_state(&self) -> &RaftApplyState { - self.peer.raft_group.store().apply_state() - } - fn raft_status(&self) -> raft::Status<'_> { - self.peer.raft_group.status() - } - fn raft_commit_index(&self) -> u64 { - self.peer.raft_group.store().commit_index() - } - fn pending_merge_state(&self) -> Option<&MergeState> { - self.peer.pending_merge_state.as_ref() - } -} - mod memtrace { use memory_trace_macros::MemoryTraceHelper; diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 878c7c3b9f8..cac2e36d5eb 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -7,6 +7,7 @@ pub mod fsm; pub mod memory; pub mod metrics; pub mod msg; +pub mod region_meta; pub mod transport; #[macro_use] pub mod util; @@ -29,7 +30,10 @@ mod worker; pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ - write::{PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask}, + write::{ + ExtraStates, PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, + WriteTask, + }, write_router::{WriteRouter, WriteRouterContext, WriteSenders}, }, bootstrap::{ @@ -48,7 +52,7 @@ pub use self::{ PeerTick, RaftCmdExtraOpts, RaftCommand, ReadCallback, ReadResponse, SignificantMsg, StoreMsg, StoreTick, WriteCallback, WriteResponse, }, - peer::{AbstractPeer, Peer, PeerStat, ProposalContext, RequestInspector, RequestPolicy}, + peer::{Peer, PeerStat, ProposalContext, RequestInspector, RequestPolicy}, peer_storage::{ clear_meta, do_snapshot, write_initial_apply_state, write_initial_raft_state, write_peer_state, PeerStorage, SnapState, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 5b3221e8c19..251094e6475 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -24,7 +24,9 @@ use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; use tracker::{get_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; -use super::{local_metrics::TimeTracker, worker::FetchedLogs, AbstractPeer, RegionSnapshot}; +use super::{ + local_metrics::TimeTracker, region_meta::RegionMeta, worker::FetchedLogs, RegionSnapshot, +}; use crate::store::{ fsm::apply::{CatchUpLogs, ChangeObserver, TaskRes as ApplyTaskRes}, metrics::RaftEventDurationType, @@ -517,7 +519,7 @@ pub enum CasualMessage { ForceCompactRaftLogs, /// A message to access peer's internal state. - AccessPeer(Box), + AccessPeer(Box), /// Region info from PD QueryRegionLeaderResp { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 6b3ec4c3456..91698be98e9 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5566,17 +5566,6 @@ fn make_transfer_leader_response() -> RaftCmdResponse { // TransferLeader command. pub const TRANSFER_LEADER_COMMAND_REPLY_CTX: &[u8] = &[1]; -/// A poor version of `Peer` to avoid port generic variables everywhere. -pub trait AbstractPeer { - fn meta_peer(&self) -> &metapb::Peer; - fn group_state(&self) -> GroupState; - fn region(&self) -> &metapb::Region; - fn apply_state(&self) -> &RaftApplyState; - fn raft_status(&self) -> raft::Status<'_>; - fn raft_commit_index(&self) -> u64; - fn pending_merge_state(&self) -> Option<&MergeState>; -} - mod memtrace { use std::mem; diff --git a/src/server/status_server/region_meta.rs b/components/raftstore/src/store/region_meta.rs similarity index 81% rename from src/server/status_server/region_meta.rs rename to components/raftstore/src/store/region_meta.rs index cd78e7382c9..9af541cbfd9 100644 --- a/src/server/status_server/region_meta.rs +++ b/components/raftstore/src/store/region_meta.rs @@ -2,9 +2,11 @@ use std::collections::HashMap; -use kvproto::metapb::PeerRole; -use raft::{Progress, ProgressState, StateRole}; -use raftstore::store::{AbstractPeer, GroupState}; +use kvproto::{metapb::PeerRole, raft_serverpb}; +use raft::{Progress, ProgressState, StateRole, Status}; +use serde::{Deserialize, Serialize}; + +use super::GroupState; #[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub enum RaftProgressState { @@ -179,22 +181,34 @@ pub struct RaftApplyState { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RegionMeta { +pub struct RegionLocalState { pub id: u64, - pub group_state: GroupState, pub start_key: Vec, pub end_key: Vec, pub epoch: Epoch, pub peers: Vec, pub merge_state: Option, + pub tablet_index: u64, +} + +/// A serializeable struct that exposes the internal debug information of a +/// peer. TODO: make protobuf generated code derive serde directly. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RegionMeta { + pub group_state: GroupState, pub raft_status: RaftStatus, pub raft_apply: RaftApplyState, + pub region_state: RegionLocalState, } impl RegionMeta { - pub fn new(abstract_peer: &dyn AbstractPeer) -> Self { - let region = abstract_peer.region(); - let apply_state = abstract_peer.apply_state(); + pub fn new( + local_state: &raft_serverpb::RegionLocalState, + apply_state: &raft_serverpb::RaftApplyState, + group_state: GroupState, + raft_status: Status<'_>, + ) -> Self { + let region = local_state.get_region(); let epoch = region.get_region_epoch(); let start_key = region.get_start_key(); let end_key = region.get_end_key(); @@ -207,25 +221,15 @@ impl RegionMeta { role: peer.get_role().into(), }); } + let merge_state = if local_state.has_merge_state() { + Some(local_state.get_merge_state()) + } else { + None + }; Self { - id: region.get_id(), - group_state: abstract_peer.group_state(), - start_key: start_key.to_owned(), - end_key: end_key.to_owned(), - epoch: Epoch { - conf_ver: epoch.get_conf_ver(), - version: epoch.get_version(), - }, - peers, - merge_state: abstract_peer - .pending_merge_state() - .map(|state| RegionMergeState { - min_index: state.get_min_index(), - commit: state.get_commit(), - region_id: state.get_target().get_id(), - }), - raft_status: abstract_peer.raft_status().into(), + group_state, + raft_status: raft_status.into(), raft_apply: RaftApplyState { applied_index: apply_state.get_applied_index(), commit_index: apply_state.get_commit_index(), @@ -235,6 +239,22 @@ impl RegionMeta { term: apply_state.get_truncated_state().get_term(), }, }, + region_state: RegionLocalState { + id: region.get_id(), + start_key: start_key.to_owned(), + end_key: end_key.to_owned(), + epoch: Epoch { + conf_ver: epoch.get_conf_ver(), + version: epoch.get_version(), + }, + peers, + merge_state: merge_state.map(|state| RegionMergeState { + min_index: state.get_min_index(), + commit: state.get_commit(), + region_id: state.get_target().get_id(), + }), + tablet_index: local_state.get_tablet_index(), + }, } } } diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 3df7bf212d9..7c001baec1e 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -1,7 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. mod profile; -pub mod region_meta; use std::{ error::Error as StdError, marker::PhantomData, @@ -454,8 +453,8 @@ where let (tx, rx) = oneshot::channel(); match router.send( id, - CasualMessage::AccessPeer(Box::new(move |peer| { - if let Err(meta) = tx.send(region_meta::RegionMeta::new(peer)) { + CasualMessage::AccessPeer(Box::new(move |meta| { + if let Err(meta) = tx.send(meta) { error!("receiver dropped, region meta: {:?}", meta) } })), diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index 84a4de39b25..455465d87cb 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -3,12 +3,10 @@ use std::{error::Error, net::SocketAddr, sync::Arc}; use hyper::{body, Client, StatusCode, Uri}; +use raftstore::store::region_meta::RegionMeta; use security::SecurityConfig; use test_raftstore::{new_server_cluster, Simulator}; -use tikv::{ - config::ConfigController, - server::status_server::{region_meta::RegionMeta, StatusServer}, -}; +use tikv::{config::ConfigController, server::status_server::StatusServer}; use tikv_util::HandyRwLock; async fn check(authority: SocketAddr, region_id: u64) -> Result<(), Box> { From d2cc9550d1e253499039c5fe6508d9cb6ca24f0c Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 31 Aug 2022 21:24:24 +0800 Subject: [PATCH 0180/1149] coprocessor: move task reschedule from runner to scanner (#13337) ref tikv/tikv#13313 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- Cargo.lock | 6 + components/test_backup/src/lib.rs | 4 +- components/tidb_query_common/Cargo.toml | 3 + .../tidb_query_common/src/execute_stats.rs | 2 +- .../tidb_query_common/src/storage/scanner.rs | 241 ++++++++++++------ components/tidb_query_executors/Cargo.toml | 1 + .../src/fast_hash_aggr_executor.rs | 43 ++-- .../src/index_scan_executor.rs | 35 +-- .../tidb_query_executors/src/interface.rs | 14 +- .../src/limit_executor.rs | 33 +-- .../src/projection_executor.rs | 33 +-- components/tidb_query_executors/src/runner.rs | 63 ++--- .../src/selection_executor.rs | 51 ++-- .../src/simple_aggr_executor.rs | 23 +- .../src/slow_hash_aggr_executor.rs | 13 +- .../src/stream_aggr_executor.rs | 19 +- .../src/table_scan_executor.rs | 37 +-- .../src/top_n_executor.rs | 65 ++--- .../src/util/aggr_executor.rs | 19 +- .../src/util/mock_executor.rs | 7 +- .../src/util/scan_executor.rs | 10 +- components/tikv_util/Cargo.toml | 1 + components/tikv_util/src/quota_limiter.rs | 49 +++- src/coprocessor/checksum.rs | 17 +- src/coprocessor/dag/mod.rs | 4 +- src/coprocessor/endpoint.rs | 8 +- src/coprocessor/mod.rs | 2 +- src/coprocessor/statistics/analyze.rs | 41 +-- tests/Cargo.toml | 1 + .../coprocessor_executors/index_scan/util.rs | 3 +- .../coprocessor_executors/table_scan/util.rs | 3 +- .../coprocessor_executors/util/bencher.rs | 4 +- .../coprocessor_executors/util/fixture.rs | 4 +- .../integrations/coprocessor/test_checksum.rs | 3 +- 34 files changed, 509 insertions(+), 353 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a5c71cef10d..7ed11da4cd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5760,6 +5760,7 @@ version = "0.0.1" dependencies = [ "api_version", "arrow", + "async-trait", "batch-system", "byteorder", "causal_ts", @@ -5906,9 +5907,11 @@ name = "tidb_query_common" version = "0.0.1" dependencies = [ "anyhow", + "async-trait", "byteorder", "derive_more", "error_code", + "futures 0.3.15", "kvproto", "lazy_static", "log_wrappers", @@ -5918,6 +5921,7 @@ dependencies = [ "thiserror", "tikv_util", "time", + "yatp", ] [[package]] @@ -5963,6 +5967,7 @@ name = "tidb_query_executors" version = "0.0.1" dependencies = [ "anyhow", + "async-trait", "codec", "collections", "fail", @@ -6324,6 +6329,7 @@ dependencies = [ "openssl", "page_size", "panic_hook", + "pin-project", "procfs", "procinfo", "prometheus", diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index d7bed05eddd..5447e8f2b37 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -13,7 +13,7 @@ use backup::Task; use collections::HashMap; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; use external_storage_export::make_local_backend; -use futures::channel::mpsc as future_mpsc; +use futures::{channel::mpsc as future_mpsc, executor::block_on}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{brpb::*, kvrpcpb::*, tikvpb::TikvClient}; use rand::Rng; @@ -362,7 +362,7 @@ impl TestSuite { is_scanned_range_aware: false, }); let digest = crc64fast::Digest::new(); - while let Some((k, v)) = scanner.next().unwrap() { + while let Some((k, v)) = block_on(scanner.next()).unwrap() { checksum = checksum_crc64_xor(checksum, digest.clone(), &k, &v); total_kvs += 1; total_bytes += (k.len() + v.len()) as u64; diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index 2f42c226327..0efadbd48e9 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -7,8 +7,10 @@ description = "Common utility of a query engine to run TiDB pushed down executor [dependencies] anyhow = "1.0" +async-trait = "0.1" derive_more = "0.99.3" error_code = { path = "../error_code", default-features = false } +futures = "0.3" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" log_wrappers = { path = "../log_wrappers" } @@ -18,6 +20,7 @@ serde_json = "1.0" thiserror = "1.0" tikv_util = { path = "../tikv_util", default-features = false } time = "0.1" +yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] byteorder = "1.2" diff --git a/components/tidb_query_common/src/execute_stats.rs b/components/tidb_query_common/src/execute_stats.rs index 55d31dfb8f5..122363eed98 100644 --- a/components/tidb_query_common/src/execute_stats.rs +++ b/components/tidb_query_common/src/execute_stats.rs @@ -18,7 +18,7 @@ pub struct ExecSummary { /// A trait for all execution summary collectors. pub trait ExecSummaryCollector: Send { - type DurationRecorder; + type DurationRecorder: Send; /// Creates a new instance with specified output slot index. fn new(output_index: usize) -> Self diff --git a/components/tidb_query_common/src/storage/scanner.rs b/components/tidb_query_common/src/storage/scanner.rs index 851220307b9..e12659f329b 100644 --- a/components/tidb_query_common/src/storage/scanner.rs +++ b/components/tidb_query_common/src/storage/scanner.rs @@ -1,9 +1,19 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use std::time::Duration; + +use tikv_util::time::Instant; +use yatp::task::future::reschedule; + use super::{range::*, ranges_iter::*, OwnedKvPair, Storage}; use crate::error::StorageError; const KEY_BUFFER_CAPACITY: usize = 64; +/// Batch executors are run in coroutines. `MAX_TIME_SLICE` is the maximum time +/// a coroutine can run without being yielded. +const MAX_TIME_SLICE: Duration = Duration::from_millis(1); +/// the number of scanned keys that should trigger a reschedule. +const CHECK_KEYS: usize = 32; /// A scanner that scans over multiple ranges. Each range can be a point range /// containing only one row, or an interval range containing multiple rows. @@ -23,6 +33,35 @@ pub struct RangesScanner { current_range: IntervalRange, working_range_begin_key: Vec, working_range_end_key: Vec, + rescheduler: RescheduleChecker, +} + +// TODO: maybe it's better to make it generic to avoid directly depending +// on yatp's rescheduler. +struct RescheduleChecker { + prev_start: Instant, + prev_key_count: usize, +} + +impl RescheduleChecker { + fn new() -> Self { + Self { + prev_start: Instant::now(), + prev_key_count: 0, + } + } + + #[inline(always)] + async fn check_reschedule(&mut self, force_check: bool) { + self.prev_key_count += 1; + if (force_check || self.prev_key_count % CHECK_KEYS == 0) + && self.prev_start.saturating_elapsed() > MAX_TIME_SLICE + { + reschedule().await; + self.prev_start = Instant::now(); + self.prev_key_count = 0; + } + } } pub struct RangesScannerOptions { @@ -58,24 +97,26 @@ impl RangesScanner { }, working_range_begin_key: Vec::with_capacity(KEY_BUFFER_CAPACITY), working_range_end_key: Vec::with_capacity(KEY_BUFFER_CAPACITY), + rescheduler: RescheduleChecker::new(), } } /// Fetches next row. // Note: This is not implemented over `Iterator` since it can fail. // TODO: Change to use reference to avoid allocation and copy. - pub fn next(&mut self) -> Result, StorageError> { - self.next_opt(true) + pub async fn next(&mut self) -> Result, StorageError> { + self.next_opt(true).await } /// Fetches next row. /// Note: `update_scanned_range` can control whether update the scanned /// range when `is_scanned_range_aware` is true. - pub fn next_opt( + pub async fn next_opt( &mut self, update_scanned_range: bool, ) -> Result, StorageError> { loop { + let mut force_check = true; let range = self.ranges_iter.next(); let some_row = match range { IterStatus::NewRange(Range::Point(r)) => { @@ -95,7 +136,10 @@ impl RangesScanner { .begin_scan(self.scan_backward_in_range, self.is_key_only, r)?; self.storage.scan_next()? } - IterStatus::Continue => self.storage.scan_next()?, + IterStatus::Continue => { + force_check = false; + self.storage.scan_next()? + } IterStatus::Drained => { if self.is_scanned_range_aware { self.update_working_range_end_key(); @@ -111,6 +155,7 @@ impl RangesScanner { if let Some(r) = self.scanned_rows_per_range.last_mut() { *r += 1; } + self.rescheduler.check_reschedule(force_check).await; return Ok(some_row); } else { @@ -243,6 +288,8 @@ impl RangesScanner { #[cfg(test)] mod tests { + use futures::executor::block_on; + use super::*; use crate::storage::{test_fixture::FixtureStorage, IntervalRange, PointRange, Range}; @@ -276,26 +323,26 @@ mod tests { is_scanned_range_aware: false, }); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo".to_vec(), b"1".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo_2".to_vec(), b"3".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo_3".to_vec(), b"5".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"bar".to_vec(), b"2".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"bar_2".to_vec(), b"4".to_vec())) ); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); // Backward in range let ranges: Vec = vec![ @@ -312,22 +359,22 @@ mod tests { is_scanned_range_aware: false, }); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo_2".to_vec(), b"3".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo".to_vec(), b"1".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo_3".to_vec(), b"5".to_vec())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"bar".to_vec(), b"2".to_vec())) ); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); // Key only let ranges: Vec = vec![ @@ -342,21 +389,27 @@ mod tests { is_key_only: true, is_scanned_range_aware: false, }); - assert_eq!(scanner.next().unwrap(), Some((b"bar".to_vec(), Vec::new()))); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), + Some((b"bar".to_vec(), Vec::new())) + ); + assert_eq!( + block_on(scanner.next()).unwrap(), Some((b"bar_2".to_vec(), Vec::new())) ); - assert_eq!(scanner.next().unwrap(), Some((b"foo".to_vec(), Vec::new()))); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), + Some((b"foo".to_vec(), Vec::new())) + ); + assert_eq!( + block_on(scanner.next()).unwrap(), Some((b"foo_2".to_vec(), Vec::new())) ); assert_eq!( - scanner.next().unwrap(), + block_on(scanner.next()).unwrap(), Some((b"foo_3".to_vec(), Vec::new())) ); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); } #[test] @@ -378,9 +431,9 @@ mod tests { }); let mut scanned_rows_per_range = Vec::new(); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_2"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![2, 0, 1]); @@ -390,28 +443,28 @@ mod tests { assert_eq!(scanned_rows_per_range, vec![0]); scanned_rows_per_range.clear(); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"bar"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"bar_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar_2"); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![0, 2]); scanned_rows_per_range.clear(); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![1]); scanned_rows_per_range.clear(); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_2"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_3"); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); + assert_eq!(block_on(scanner.next()).unwrap(), None); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![2]); scanned_rows_per_range.clear(); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![0]); @@ -436,7 +489,7 @@ mod tests { assert_eq!(&r.lower_inclusive, b""); assert_eq!(&r.upper_exclusive, b""); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b""); @@ -452,7 +505,7 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"x"); @@ -468,7 +521,7 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"x"); @@ -484,20 +537,20 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"foo_2\0"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_2\0"); assert_eq!(&r.upper_exclusive, b"foo_3\0"); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_3\0"); @@ -522,31 +575,31 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"foo\0"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo\0"); assert_eq!(&r.upper_exclusive, b"foo_2\0"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"bar"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_2\0"); assert_eq!(&r.upper_exclusive, b"bar\0"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"bar_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar\0"); assert_eq!(&r.upper_exclusive, b"bar_2\0"); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar_2\0"); @@ -571,7 +624,7 @@ mod tests { assert_eq!(&r.lower_inclusive, b""); assert_eq!(&r.upper_exclusive, b""); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b""); @@ -587,7 +640,7 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"x"); @@ -603,7 +656,7 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"x"); @@ -619,20 +672,20 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_3"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_2"); assert_eq!(&r.upper_exclusive, b"foo_8"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"foo_2"); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); @@ -655,26 +708,26 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"bar_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar_2"); assert_eq!(&r.upper_exclusive, b"box"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"bar"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar"); assert_eq!(&r.upper_exclusive, b"bar_2"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo_2"); - assert_eq!(&scanner.next().unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"bar"); - assert_eq!(scanner.next().unwrap(), None); + assert_eq!(block_on(scanner.next()).unwrap(), None); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); @@ -695,22 +748,31 @@ mod tests { }); // Only lower_inclusive is updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b""); // Upper_exclusive is updated. - assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"foo_2"); + assert_eq!( + &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + b"foo_2" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); // Upper_exclusive is not updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo_3"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo_3" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); // Drained. - assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(block_on(scanner.next_opt(false)).unwrap(), None); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"foo_8"); @@ -738,27 +800,39 @@ mod tests { }); // Only lower_inclusive is updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b""); // Upper_exclusive is updated. Updated by scanned row. - assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"foo_2"); + assert_eq!( + &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + b"foo_2" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); // Upper_exclusive is not updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"bar"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"bar" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); // Upper_exclusive is not updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"bar_2"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"bar_2" + ); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"foo_2\0"); // Drain. - assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(block_on(scanner.next_opt(false)).unwrap(), None); assert_eq!(&scanner.working_range_begin_key, b"foo"); assert_eq!(&scanner.working_range_end_key, b"box"); @@ -781,22 +855,31 @@ mod tests { }); // Only lower_inclusive is updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo_3"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo_3" + ); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); assert_eq!(&scanner.working_range_end_key, b""); // Upper_exclusive is updated. - assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"foo_2"); + assert_eq!( + &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + b"foo_2" + ); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); assert_eq!(&scanner.working_range_end_key, b"foo_2"); // Upper_exclusive is not updated. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo" + ); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); assert_eq!(&scanner.working_range_end_key, b"foo_2"); // Drained. - assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(block_on(scanner.next_opt(false)).unwrap(), None); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); assert_eq!(&scanner.working_range_end_key, b"foo"); @@ -822,27 +905,39 @@ mod tests { }); // Lower_inclusive is updated. Upper_exclusive is not update. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"bar_2"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"bar_2" + ); assert_eq!(&scanner.working_range_begin_key, b"box"); assert_eq!(&scanner.working_range_end_key, b""); // Upper_exclusive is updated. Updated by scanned row. - assert_eq!(&scanner.next_opt(true).unwrap().unwrap().0, b"bar"); + assert_eq!( + &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + b"bar" + ); assert_eq!(&scanner.working_range_begin_key, b"box"); assert_eq!(&scanner.working_range_end_key, b"bar"); // Upper_exclusive is not update. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo_2"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo_2" + ); assert_eq!(&scanner.working_range_begin_key, b"box"); assert_eq!(&scanner.working_range_end_key, b"bar"); // Upper_exclusive is not update. - assert_eq!(&scanner.next_opt(false).unwrap().unwrap().0, b"foo"); + assert_eq!( + &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + b"foo" + ); assert_eq!(&scanner.working_range_begin_key, b"box"); assert_eq!(&scanner.working_range_end_key, b"bar"); // Drain. - assert_eq!(scanner.next_opt(false).unwrap(), None); + assert_eq!(block_on(scanner.next_opt(false)).unwrap(), None); assert_eq!(&scanner.working_range_begin_key, b"box"); assert_eq!(&scanner.working_range_end_key, b"foo"); diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 923696606ed..ada01c8aef0 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -6,6 +6,7 @@ publish = false description = "A vector query engine to run TiDB pushed down executors" [dependencies] +async-trait = "0.1" codec = { path = "../codec", default-features = false } collections = { path = "../collections" } fail = "0.5" diff --git a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs index 942e61087d3..174912ca0b0 100644 --- a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs @@ -2,6 +2,7 @@ use std::{convert::TryFrom, hash::Hash, sync::Arc}; +use async_trait::async_trait; use collections::HashMap; use tidb_query_aggr::*; use tidb_query_common::{storage::IntervalRange, Result}; @@ -38,6 +39,7 @@ pub struct BatchFastHashAggregationExecutor( AggregationExecutor, ); +#[async_trait] impl BatchExecutor for BatchFastHashAggregationExecutor { type StorageStats = Src::StorageStats; @@ -47,8 +49,8 @@ impl BatchExecutor for BatchFastHashAggregationExecutor } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - self.0.next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + self.0.next_batch(scan_rows).await } #[inline] @@ -464,6 +466,7 @@ where #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_datatype::{expr::EvalWarnings, FieldTypeTp}; use tidb_query_expr::{ impl_arithmetic::{arithmetic_fn_meta, RealPlus}, @@ -539,17 +542,17 @@ mod tests { let src_exec = make_src_executor_1(); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let mut r = exec.next_batch(1); + let mut r = block_on(exec.next_batch(1)); // col_0 + col_1 can result in [NULL, 9.0, 6.0], thus there will be three // groups. assert_eq!(&r.logical_rows, &[0, 1, 2]); @@ -675,17 +678,17 @@ mod tests { let src_exec = make_src_executor_1(); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let mut r = exec.next_batch(1); + let mut r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 5); // 4 result column, 1 group by column @@ -759,17 +762,17 @@ mod tests { let src_exec = make_src_executor_1(); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let mut r = exec.next_batch(1); + let mut r = block_on(exec.next_batch(1)); // col_4 can result in [NULL, "aa", "aaa"], thus there will be three groups. assert_eq!(&r.logical_rows, &[0, 1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); @@ -944,12 +947,12 @@ mod tests { ); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(r.is_drained.unwrap()); @@ -992,17 +995,17 @@ mod tests { let src_exec = make_src_executor_1(); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let mut r = exec.next_batch(1); + let mut r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); assert_eq!(r.physical_columns.columns_len(), 1); // 0 result column, 1 group by column @@ -1063,17 +1066,17 @@ mod tests { let src_exec = make_src_executor_1(); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let mut r = exec.next_batch(1); + let mut r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 1); // 0 result column, 1 group by column @@ -1136,7 +1139,7 @@ mod tests { }], ); let mut exec = exec_builder(src_exec); - let r = exec.next_batch(4); + let r = block_on(exec.next_batch(4)); assert_eq!(r.physical_columns.rows_len(), 4); assert_eq!(r.physical_columns.columns_len(), 2); diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 8492a928a8d..ae04ffe03e6 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use async_trait::async_trait; use codec::{number::NumberCodec, prelude::NumberDecoder}; use itertools::izip; use kvproto::coprocessor::KeyRange; @@ -152,6 +153,7 @@ impl BatchIndexScanExecutor { } } +#[async_trait] impl BatchExecutor for BatchIndexScanExecutor { type StorageStats = S::Statistics; @@ -161,8 +163,8 @@ impl BatchExecutor for BatchIndexScanExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - self.0.next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + self.0.next_batch(scan_rows).await } #[inline] @@ -874,6 +876,7 @@ mod tests { use std::sync::Arc; use codec::prelude::NumberEncoder; + use futures::executor::block_on; use kvproto::coprocessor::KeyRange; use tidb_query_common::{storage::test_fixture::FixtureStorage, util::convert_to_prefix_next}; use tidb_query_datatype::{ @@ -984,7 +987,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 3); @@ -1041,7 +1044,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 3); @@ -1101,7 +1104,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 3); @@ -1146,7 +1149,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 3); @@ -1198,7 +1201,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 2); @@ -1275,7 +1278,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 2); @@ -1332,7 +1335,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1442,7 +1445,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1485,7 +1488,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1581,7 +1584,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1681,7 +1684,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1775,7 +1778,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1868,7 +1871,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1994,7 +1997,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert!(result.is_drained.as_ref().unwrap()); assert_eq!(result.physical_columns.columns_len(), 4); assert_eq!(result.physical_columns.rows_len(), 1); diff --git a/components/tidb_query_executors/src/interface.rs b/components/tidb_query_executors/src/interface.rs index 1ea5038a2d6..611516ab6bc 100644 --- a/components/tidb_query_executors/src/interface.rs +++ b/components/tidb_query_executors/src/interface.rs @@ -5,6 +5,7 @@ //! Batch executor common structures. +use async_trait::async_trait; pub use tidb_query_common::execute_stats::{ ExecSummaryCollector, ExecuteStats, WithSummaryCollector, }; @@ -16,6 +17,7 @@ use tipb::FieldType; /// The interface for pull-based executors. It is similar to the Volcano /// Iterator model, but pulls data in batch and stores data by column. +#[async_trait] pub trait BatchExecutor: Send { type StorageStats; @@ -26,7 +28,7 @@ pub trait BatchExecutor: Send { /// /// This function might return zero rows, which doesn't mean that there is /// no more result. See `is_drained` in `BatchExecuteResult`. - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult; + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult; /// Collects execution statistics (including but not limited to metrics and /// execution summaries) accumulated during execution and prepares for @@ -68,6 +70,7 @@ pub trait BatchExecutor: Send { } } +#[async_trait] impl BatchExecutor for Box { type StorageStats = T::StorageStats; @@ -75,8 +78,8 @@ impl BatchExecutor for Box { (**self).schema() } - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - (**self).next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + (**self).next_batch(scan_rows).await } fn collect_exec_stats(&mut self, dest: &mut ExecuteStats) { @@ -96,6 +99,7 @@ impl BatchExecutor for Box { } } +#[async_trait] impl BatchExecutor for WithSummaryCollector { @@ -105,9 +109,9 @@ impl BatchExecutor self.inner.schema() } - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { let timer = self.summary_collector.on_start_iterate(); - let result = self.inner.next_batch(scan_rows); + let result = self.inner.next_batch(scan_rows).await; self.summary_collector .on_finish_iterate(timer, result.logical_rows.len()); result diff --git a/components/tidb_query_executors/src/limit_executor.rs b/components/tidb_query_executors/src/limit_executor.rs index a1917e1b17b..a9cd2cae482 100644 --- a/components/tidb_query_executors/src/limit_executor.rs +++ b/components/tidb_query_executors/src/limit_executor.rs @@ -1,5 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use async_trait::async_trait; use tidb_query_common::{storage::IntervalRange, Result}; use tipb::FieldType; @@ -23,6 +24,7 @@ impl BatchLimitExecutor { } } +#[async_trait] impl BatchExecutor for BatchLimitExecutor { type StorageStats = Src::StorageStats; @@ -32,13 +34,13 @@ impl BatchExecutor for BatchLimitExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { let real_scan_rows = if self.is_src_scan_executor { std::cmp::min(scan_rows, self.remaining_rows) } else { scan_rows }; - let mut result = self.src.next_batch(real_scan_rows); + let mut result = self.src.next_batch(real_scan_rows).await; if result.logical_rows.len() < self.remaining_rows { self.remaining_rows -= result.logical_rows.len(); } else { @@ -74,6 +76,7 @@ impl BatchExecutor for BatchLimitExecutor { #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_datatype::{ codec::{batch::LazyBatchColumnVec, data_type::VectorValue}, expr::EvalWarnings, @@ -99,7 +102,7 @@ mod tests { let mut exec = BatchLimitExecutor::new(src_exec, 0, false).unwrap(); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 3); assert!(r.is_drained.unwrap()); @@ -121,7 +124,7 @@ mod tests { let mut exec = BatchLimitExecutor::new(src_exec, 10, false).unwrap(); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); r.is_drained.unwrap_err(); @@ -153,12 +156,12 @@ mod tests { let mut exec = BatchLimitExecutor::new(src_exec, 10, false).unwrap(); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 3); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); assert!(r.is_drained.unwrap()); @@ -190,12 +193,12 @@ mod tests { let mut exec = BatchLimitExecutor::new(src_exec, 4, false).unwrap(); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 2]); assert_eq!(r.physical_columns.rows_len(), 3); assert!(r.is_drained.unwrap()); // No errors @@ -233,17 +236,17 @@ mod tests { let mut exec = BatchLimitExecutor::new(src_exec, 4, false).unwrap(); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert!(r.is_drained.unwrap()); @@ -256,9 +259,9 @@ mod tests { let src_exec = MockScanExecutor::new(rows, schema); let mut exec = BatchLimitExecutor::new(src_exec, 5, true).unwrap(); - let r = exec.next_batch(100); + let r = block_on(exec.next_batch(100)); assert_eq!(r.logical_rows, &[0, 1, 2, 3, 4]); - let r = exec.next_batch(2); + let r = block_on(exec.next_batch(2)); assert_eq!(r.is_drained.unwrap(), true); let schema = vec![FieldTypeTp::LongLong.into()]; @@ -266,10 +269,10 @@ mod tests { let src_exec = MockScanExecutor::new(rows, schema); let mut exec = BatchLimitExecutor::new(src_exec, 1024, true).unwrap(); for _i in 0..1023 { - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(r.is_drained.unwrap(), false); } - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(r.is_drained.unwrap(), true); } } diff --git a/components/tidb_query_executors/src/projection_executor.rs b/components/tidb_query_executors/src/projection_executor.rs index 7304ed1b1e3..962cd8698e5 100644 --- a/components/tidb_query_executors/src/projection_executor.rs +++ b/components/tidb_query_executors/src/projection_executor.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use async_trait::async_trait; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ codec::{batch::LazyBatchColumnVec, data_type::*}, @@ -75,6 +76,7 @@ impl BatchProjectionExecutor { } } +#[async_trait] impl BatchExecutor for BatchProjectionExecutor { type StorageStats = Src::StorageStats; @@ -84,8 +86,8 @@ impl BatchExecutor for BatchProjectionExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - let mut src_result = self.src.next_batch(scan_rows); + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + let mut src_result = self.src.next_batch(scan_rows).await; let child_schema = self.src.schema(); let mut eval_result = Vec::with_capacity(self.schema().len()); let BatchExecuteResult { @@ -159,6 +161,7 @@ impl BatchExecutor for BatchProjectionExecutor { #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_codegen::rpn_fn; use tidb_query_datatype::{codec::batch::LazyBatchColumnVec, expr::EvalWarnings, FieldTypeTp}; @@ -213,7 +216,7 @@ mod tests { // correctly. No errors should be generated and the expression functions // should not be called. - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); // The scan rows parameter has no effect for mock executor. We don't care. // FIXME: A compiler bug prevented us write: // | assert_eq!(r.logical_rows.as_slice(), &[]); @@ -221,11 +224,11 @@ mod tests { assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -289,7 +292,7 @@ mod tests { ]; let mut exec = BatchProjectionExecutor::new_for_test(src_exec, exprs); assert_eq!(exec.schema().len(), 1); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[2, 0]); assert_eq!(r.physical_columns.columns_len(), 1); assert_eq!( @@ -298,12 +301,12 @@ mod tests { ); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.columns_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1]); assert_eq!(r.physical_columns.columns_len(), 1); assert_eq!( @@ -326,7 +329,7 @@ mod tests { ]; let mut exec = BatchProjectionExecutor::new_for_test(src_exec, exprs); assert_eq!(exec.schema().len(), 2); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[2, 0]); assert_eq!(r.physical_columns.columns_len(), 2); assert_eq!( @@ -339,12 +342,12 @@ mod tests { ); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.columns_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1]); assert_eq!(r.physical_columns.columns_len(), 2); assert_eq!( @@ -438,7 +441,7 @@ mod tests { .build_for_test(); let mut exec = BatchProjectionExecutor::new_for_test(src_exec, vec![expr1, expr2]); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[3, 4, 0, 2]); assert_eq!(r.physical_columns.columns_len(), 2); assert_eq!( @@ -451,11 +454,11 @@ mod tests { ); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(r.logical_rows, &[0]); assert_eq!(r.physical_columns[0].decoded().to_int_vec(), vec![None]); assert_eq!(r.physical_columns[1].decoded().to_int_vec(), vec![Some(1)]); @@ -521,7 +524,7 @@ mod tests { .collect(); let mut exec = BatchProjectionExecutor::new_for_test(src_exec, exprs); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); r.is_drained.unwrap_err(); } diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 9f32aaa180e..551c3da8a7e 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -1,6 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{convert::TryFrom, sync::Arc, time::Duration}; +use std::{convert::TryFrom, sync::Arc}; use fail::fail_point; use kvproto::coprocessor::KeyRange; @@ -19,13 +19,11 @@ use tikv_util::{ deadline::Deadline, metrics::{ThrottleType, NON_TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC}, quota_limiter::QuotaLimiter, - time::Instant, }; use tipb::{ self, Chunk, DagRequest, EncodeType, ExecType, ExecutorExecutionSummary, FieldType, SelectResponse, StreamResponse, }; -use yatp::task::future::reschedule; use super::{ interface::{BatchExecutor, ExecuteStats}, @@ -44,10 +42,6 @@ pub use tidb_query_expr::types::BATCH_MAX_SIZE; // TODO: Maybe there can be some better strategy. Needs benchmarks and tunes. const BATCH_GROW_FACTOR: usize = 2; -/// Batch executors are run in coroutines. `MAX_TIME_SLICE` is the maximum time -/// a coroutine can run without being yielded. -pub const MAX_TIME_SLICE: Duration = Duration::from_millis(1); - pub struct BatchExecutorsRunner { /// The deadline of this handler. For each check point (e.g. each iteration) /// we need to check whether or not the deadline is exceeded and break @@ -450,26 +444,21 @@ impl BatchExecutorsRunner { let mut ctx = EvalContext::new(self.config.clone()); let mut record_all = 0; - let mut time_slice_start = Instant::now(); loop { - // Check whether we should yield from the execution - if need_reschedule(time_slice_start) { - reschedule().await; - time_slice_start = Instant::now(); - } - let mut chunk = Chunk::default(); - let mut sample = self.quota_limiter.new_sample(true); let (drained, record_len) = { - let _guard = sample.observe_cpu(); - self.internal_handle_request( - false, - batch_size, - &mut chunk, - &mut warnings, - &mut ctx, - )? + let (cpu_time, res) = sample + .observe_cpu_async(self.internal_handle_request( + false, + batch_size, + &mut chunk, + &mut warnings, + &mut ctx, + )) + .await; + sample.add_cpu_time(cpu_time); + res? }; if chunk.has_rows_data() { sample.add_read_bytes(chunk.get_rows_data().len()); @@ -534,7 +523,7 @@ impl BatchExecutorsRunner { } } - pub fn handle_streaming_request( + pub async fn handle_streaming_request( &mut self, ) -> Result<(Option<(StreamResponse, IntervalRange)>, bool)> { let mut warnings = self.config.new_eval_warnings(); @@ -548,13 +537,15 @@ impl BatchExecutorsRunner { while record_len < self.stream_row_limit && !is_drained { let mut current_chunk = Chunk::default(); // TODO: Streaming coprocessor on TiKV is just not enabled in TiDB now. - let (drained, len) = self.internal_handle_request( - true, - batch_size.min(self.stream_row_limit - record_len), - &mut current_chunk, - &mut warnings, - &mut ctx, - )?; + let (drained, len) = self + .internal_handle_request( + true, + batch_size.min(self.stream_row_limit - record_len), + &mut current_chunk, + &mut warnings, + &mut ctx, + ) + .await?; chunk .mut_rows_data() .extend_from_slice(current_chunk.get_rows_data()); @@ -586,7 +577,7 @@ impl BatchExecutorsRunner { } } - fn internal_handle_request( + async fn internal_handle_request( &mut self, is_streaming: bool, batch_size: usize, @@ -598,7 +589,7 @@ impl BatchExecutorsRunner { self.deadline.check()?; - let mut result = self.out_most_executor.next_batch(batch_size); + let mut result = self.out_most_executor.next_batch(batch_size).await; let is_drained = result.is_drained?; @@ -690,9 +681,3 @@ fn grow_batch_size(batch_size: &mut usize) { } } } - -#[inline] -fn need_reschedule(time_slice_start: Instant) -> bool { - fail_point!("copr_reschedule", |_| true); - time_slice_start.saturating_elapsed() > MAX_TIME_SLICE -} diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index d3a2d97ef4b..60459229f4f 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use async_trait::async_trait; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ codec::data_type::*, @@ -164,6 +165,7 @@ where err_result } +#[async_trait] impl BatchExecutor for BatchSelectionExecutor { type StorageStats = Src::StorageStats; @@ -174,8 +176,8 @@ impl BatchExecutor for BatchSelectionExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - let mut src_result = self.src.next_batch(scan_rows); + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + let mut src_result = self.src.next_batch(scan_rows).await; if let Err(e) = self.handle_src_result(&mut src_result) { // TODO: Rows before we meeting an evaluation error are innocent. @@ -213,6 +215,7 @@ impl BatchExecutor for BatchSelectionExecutor { #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_codegen::rpn_fn; use tidb_query_datatype::{codec::batch::LazyBatchColumnVec, expr::EvalWarnings, FieldTypeTp}; @@ -267,7 +270,7 @@ mod tests { // correctly. No errors should be generated and the predicate function // should not be called. - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); // The scan rows parameter has no effect for mock executor. We don't care. // FIXME: A compiler bug prevented us write: // | assert_eq!(r.logical_rows.as_slice(), &[]); @@ -275,11 +278,11 @@ mod tests { assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -359,15 +362,15 @@ mod tests { // The selection executor should return data as it is. - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[2, 0]); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1]); assert!(r.is_drained.unwrap()); } @@ -385,15 +388,15 @@ mod tests { // The selection executor should always return empty rows. - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -479,15 +482,15 @@ mod tests { .build_for_test(); let mut exec = BatchSelectionExecutor::new_for_test(src_exec, vec![predicate]); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[3, 0]); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -504,15 +507,15 @@ mod tests { .build_for_test(); let mut exec = BatchSelectionExecutor::new_for_test(src_exec, vec![predicate]); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 2]); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -542,15 +545,15 @@ mod tests { let src_exec = make_src_executor_using_fixture_2(); let mut exec = BatchSelectionExecutor::new_for_test(src_exec, predicates); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -577,15 +580,15 @@ mod tests { let src_exec = make_src_executor_using_fixture_2(); let mut exec = BatchSelectionExecutor::new_for_test(src_exec, predicates); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(r.is_drained.unwrap()); } @@ -653,7 +656,7 @@ mod tests { // TODO: A more precise result is that the first two rows are returned and error // starts from the third row. - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); r.is_drained.unwrap_err(); } diff --git a/components/tidb_query_executors/src/simple_aggr_executor.rs b/components/tidb_query_executors/src/simple_aggr_executor.rs index d26d293a274..75790428187 100644 --- a/components/tidb_query_executors/src/simple_aggr_executor.rs +++ b/components/tidb_query_executors/src/simple_aggr_executor.rs @@ -5,6 +5,7 @@ use std::sync::Arc; +use async_trait::async_trait; use tidb_query_aggr::*; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ @@ -24,6 +25,7 @@ pub struct BatchSimpleAggregationExecutor( AggregationExecutor, ); +#[async_trait] impl BatchExecutor for BatchSimpleAggregationExecutor { type StorageStats = Src::StorageStats; @@ -33,8 +35,8 @@ impl BatchExecutor for BatchSimpleAggregationExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - self.0.next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + self.0.next_batch(scan_rows).await } #[inline] @@ -232,6 +234,7 @@ impl AggregationExecutorImpl for SimpleAggregationImpl #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_codegen::AggrFunction; use tidb_query_datatype::{ expr::{EvalContext, EvalWarnings}, @@ -460,15 +463,15 @@ mod tests { BatchSimpleAggregationExecutor::new_for_test(src_exec, aggr_definitions, MyParser); // The scan rows parameter has no effect for mock executor. We don't care. - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 12); @@ -548,15 +551,15 @@ mod tests { AllAggrDefinitionParser, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 10); @@ -665,12 +668,12 @@ mod tests { let mut exec = BatchSimpleAggregationExecutor::new_for_test(src_exec, vec![Expr::default()], MyParser); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(r.is_drained.unwrap()); diff --git a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs index 2502e28f570..ee076b652a7 100644 --- a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs @@ -7,6 +7,7 @@ use std::{ sync::Arc, }; +use async_trait::async_trait; use collections::{HashMap, HashMapEntry}; use tidb_query_aggr::*; use tidb_query_common::{storage::IntervalRange, Result}; @@ -32,6 +33,7 @@ pub struct BatchSlowHashAggregationExecutor( AggregationExecutor, ); +#[async_trait] impl BatchExecutor for BatchSlowHashAggregationExecutor { type StorageStats = Src::StorageStats; @@ -41,8 +43,8 @@ impl BatchExecutor for BatchSlowHashAggregationExecutor } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - self.0.next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + self.0.next_batch(scan_rows).await } #[inline] @@ -511,6 +513,7 @@ impl Eq for GroupKeyRefUnsafe {} #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_datatype::{codec::data_type::*, FieldTypeTp}; use tidb_query_expr::{ impl_arithmetic::{arithmetic_fn_meta, RealPlus}, @@ -571,17 +574,17 @@ mod tests { AllAggrDefinitionParser, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let mut r = exec.next_batch(1); + let mut r = block_on(exec.next_batch(1)); // col_4 (sort_key), col_0 + 1 can result in: // NULL, NULL // aa, NULL diff --git a/components/tidb_query_executors/src/stream_aggr_executor.rs b/components/tidb_query_executors/src/stream_aggr_executor.rs index 4b768cd65fe..d8a0599bf87 100644 --- a/components/tidb_query_executors/src/stream_aggr_executor.rs +++ b/components/tidb_query_executors/src/stream_aggr_executor.rs @@ -2,6 +2,7 @@ use std::{cmp::Ordering, convert::TryFrom, sync::Arc}; +use async_trait::async_trait; use tidb_query_aggr::*; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ @@ -24,6 +25,7 @@ pub struct BatchStreamAggregationExecutor( AggregationExecutor, ); +#[async_trait] impl BatchExecutor for BatchStreamAggregationExecutor { type StorageStats = Src::StorageStats; @@ -33,8 +35,8 @@ impl BatchExecutor for BatchStreamAggregationExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - self.0.next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + self.0.next_batch(scan_rows).await } #[inline] @@ -454,6 +456,7 @@ fn update_current_states( #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_datatype::{ builder::FieldTypeBuilder, expr::EvalWarnings, Collation, FieldTypeTp, }; @@ -511,7 +514,7 @@ mod tests { AllAggrDefinitionParser, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1]); assert_eq!(r.physical_columns.rows_len(), 2); assert_eq!(r.physical_columns.columns_len(), 5); @@ -542,12 +545,12 @@ mod tests { &[None, Real::new(3.5).ok()] ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 5); @@ -595,7 +598,7 @@ mod tests { AllAggrDefinitionParser, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1]); assert_eq!(r.physical_columns.rows_len(), 2); assert_eq!(r.physical_columns.columns_len(), 2); @@ -611,12 +614,12 @@ mod tests { &[None, Real::new(1.5).ok()] ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 2); diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index 3ddb20b3e4d..957a23ba8c0 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -2,6 +2,7 @@ use std::{collections::HashSet, sync::Arc}; +use async_trait::async_trait; use collections::HashMap; use kvproto::coprocessor::KeyRange; use smallvec::SmallVec; @@ -108,6 +109,7 @@ impl BatchTableScanExecutor { } } +#[async_trait] impl BatchExecutor for BatchTableScanExecutor { type StorageStats = S::Statistics; @@ -117,8 +119,8 @@ impl BatchExecutor for BatchTableScanExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { - self.0.next_batch(scan_rows) + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + self.0.next_batch(scan_rows).await } #[inline] @@ -438,6 +440,7 @@ impl ScanExecutorImpl for TableScanExecutorImpl { mod tests { use std::{iter, sync::Arc}; + use futures::executor::block_on; use kvproto::coprocessor::KeyRange; use tidb_query_common::{ execute_stats::*, storage::test_fixture::FixtureStorage, util::convert_to_prefix_next, @@ -716,7 +719,7 @@ mod tests { for expect_rows in batch_expect_rows { let expect_rows = *expect_rows; let expect_drained = start_row + expect_rows > total_rows; - let result = executor.next_batch(expect_rows); + let result = block_on(executor.next_batch(expect_rows)); assert_eq!(*result.is_drained.as_ref().unwrap(), expect_drained); if expect_drained { // all remaining rows are fetched @@ -796,8 +799,8 @@ mod tests { .unwrap() .collect_summary(1); - executor.next_batch(1); - executor.next_batch(2); + block_on(executor.next_batch(1)); + block_on(executor.next_batch(2)); let mut s = ExecuteStats::new(2); executor.collect_exec_stats(&mut s); @@ -825,7 +828,7 @@ mod tests { // Reset collected statistics so that now we will only collect statistics in // this round. s.clear(); - executor.next_batch(10); + block_on(executor.next_batch(10)); executor.collect_exec_stats(&mut s); assert_eq!(s.scanned_rows_per_range.len(), 1); @@ -938,7 +941,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 2); @@ -1045,7 +1048,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1093,7 +1096,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1135,7 +1138,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(1); + let mut result = block_on(executor.next_batch(1)); result.is_drained.unwrap(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 1); @@ -1153,7 +1156,7 @@ mod tests { &[Some(7)] ); - let result = executor.next_batch(1); + let result = block_on(executor.next_batch(1)); result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 0); @@ -1174,7 +1177,7 @@ mod tests { ) .unwrap(); - let result = executor.next_batch(10); + let result = block_on(executor.next_batch(10)); result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 0); @@ -1195,7 +1198,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); result.is_drained.unwrap(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 2); @@ -1229,7 +1232,7 @@ mod tests { ) .unwrap(); - let result = executor.next_batch(10); + let result = block_on(executor.next_batch(10)); result.is_drained.unwrap_err(); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 0); @@ -1279,7 +1282,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert_eq!(result.is_drained.unwrap(), true); assert_eq!(result.logical_rows.len(), 1); assert_eq!(result.physical_columns.columns_len(), columns_is_pk.len()); @@ -1387,7 +1390,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert_eq!(result.is_drained.unwrap(), true); assert_eq!(result.logical_rows.len(), 1); @@ -1568,7 +1571,7 @@ mod tests { ) .unwrap(); - let mut result = executor.next_batch(10); + let mut result = block_on(executor.next_batch(10)); assert_eq!(result.is_drained.unwrap(), true); if !columns_info.is_empty() { assert_eq!(result.logical_rows.len(), 1); diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index 39f009784f0..06dc1ce956b 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -2,6 +2,7 @@ use std::{cmp::Ordering, collections::BinaryHeap, ptr::NonNull, sync::Arc}; +use async_trait::async_trait; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ codec::{ @@ -178,10 +179,10 @@ impl BatchTopNExecutor { } #[inline] - fn handle_next_batch(&mut self) -> Result> { + async fn handle_next_batch(&mut self) -> Result> { // Use max batch size from the beginning because top N // always needs to calculate over all data. - let src_result = self.src.next_batch(crate::runner::BATCH_MAX_SIZE); + let src_result = self.src.next_batch(crate::runner::BATCH_MAX_SIZE).await; self.context.warnings = src_result.warnings; @@ -319,6 +320,7 @@ impl BatchTopNExecutor { } } +#[async_trait] impl BatchExecutor for BatchTopNExecutor { type StorageStats = Src::StorageStats; @@ -328,7 +330,7 @@ impl BatchExecutor for BatchTopNExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { assert!(!self.is_ended); if self.n == 0 { @@ -343,11 +345,11 @@ impl BatchExecutor for BatchTopNExecutor { if let Some(paging_size) = self.context.cfg.paging_size { if self.n > paging_size as usize { - return self.src.next_batch(scan_rows); + return self.src.next_batch(scan_rows).await; } } - let result = self.handle_next_batch(); + let result = self.handle_next_batch().await; match result { Err(e) => { @@ -507,6 +509,7 @@ impl Eq for HeapItemUnsafe {} #[cfg(test)] mod tests { + use futures::executor::block_on; use tidb_query_datatype::{ builder::FieldTypeBuilder, expr::EvalWarnings, Collation, FieldTypeFlag, FieldTypeTp, }; @@ -540,7 +543,7 @@ mod tests { 0, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); assert!(r.is_drained.unwrap()); } @@ -578,11 +581,11 @@ mod tests { 10, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); assert!(r.is_drained.unwrap()); } @@ -699,17 +702,17 @@ mod tests { 100, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); assert_eq!(r.physical_columns.rows_len(), 7); assert_eq!(r.physical_columns.columns_len(), 3); @@ -769,17 +772,17 @@ mod tests { 7, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); assert_eq!(r.physical_columns.rows_len(), 7); assert_eq!(r.physical_columns.columns_len(), 3); @@ -852,17 +855,17 @@ mod tests { 5, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); @@ -1016,17 +1019,17 @@ mod tests { 5, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); @@ -1097,17 +1100,17 @@ mod tests { 5, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); @@ -1258,17 +1261,17 @@ mod tests { 5, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); @@ -1372,17 +1375,17 @@ mod tests { 5, ); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); assert!(!r.is_drained.unwrap()); - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); @@ -1485,8 +1488,8 @@ mod tests { let mut exec2 = build_src_executor(); loop { - let r1 = exec.next_batch(1); - let r2 = exec2.next_batch(1); + let r1 = block_on(exec.next_batch(1)); + let r2 = block_on(exec2.next_batch(1)); assert_eq!(r1.logical_rows, r2.logical_rows); assert_eq!( r1.physical_columns.rows_len(), diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index a40c0c9aec4..ceb9949f83b 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -29,6 +29,7 @@ use std::{convert::TryFrom, sync::Arc}; +use async_trait::async_trait; use tidb_query_aggr::*; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ @@ -202,10 +203,14 @@ impl> AggregationExecutor Result<(Option, bool)> { + async fn handle_next_batch(&mut self) -> Result<(Option, bool)> { // Use max batch size from the beginning because aggregation // always needs to calculate over all data. - let src_result = self.entities.src.next_batch(crate::runner::BATCH_MAX_SIZE); + let src_result = self + .entities + .src + .next_batch(crate::runner::BATCH_MAX_SIZE) + .await; self.entities.context.warnings = src_result.warnings; @@ -290,6 +295,7 @@ impl> AggregationExecutor> BatchExecutor for AggregationExecutor { @@ -301,10 +307,10 @@ impl> BatchExecutor } #[inline] - fn next_batch(&mut self, _scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, _scan_rows: usize) -> BatchExecuteResult { assert!(!self.is_ended); - let result = self.handle_next_batch(); + let result = self.handle_next_batch().await; match result { Err(e) => { @@ -581,6 +587,7 @@ pub mod tests { fn test_agg_paging() { use std::sync::Arc; + use futures::executor::block_on; use tidb_query_datatype::expr::EvalConfig; use tidb_query_expr::RpnExpressionBuilder; use tipb::ExprType; @@ -642,7 +649,7 @@ pub mod tests { let src_exec = make_src_executor_2(); let mut exec = exec_builder(src_exec, Some(paging_size)); for nth_call in 0..call_num { - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); if nth_call == call_num - 1 { assert!(r.is_drained.unwrap()); } else { @@ -672,7 +679,7 @@ pub mod tests { let row_num = &expect_row_num2[test_case]; let mut exec = exec_stream(make_src_executor_2(), Some(paging_size)); for nth_call in 0..call_num { - let r = exec.next_batch(1); + let r = block_on(exec.next_batch(1)); if nth_call == call_num - 1 { assert!(r.is_drained.unwrap()); } else { diff --git a/components/tidb_query_executors/src/util/mock_executor.rs b/components/tidb_query_executors/src/util/mock_executor.rs index ae20695033f..a6f11904b33 100644 --- a/components/tidb_query_executors/src/util/mock_executor.rs +++ b/components/tidb_query_executors/src/util/mock_executor.rs @@ -1,5 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use async_trait::async_trait; use tidb_query_common::storage::IntervalRange; use tidb_query_datatype::{ codec::{batch::LazyBatchColumnVec, data_type::VectorValue}, @@ -28,6 +29,7 @@ impl MockExecutor { } } +#[async_trait] impl BatchExecutor for MockExecutor { type StorageStats = (); @@ -35,7 +37,7 @@ impl BatchExecutor for MockExecutor { &self.schema } - fn next_batch(&mut self, _scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, _scan_rows: usize) -> BatchExecuteResult { self.results.next().unwrap() } @@ -73,6 +75,7 @@ impl MockScanExecutor { } } +#[async_trait] impl BatchExecutor for MockScanExecutor { type StorageStats = (); @@ -80,7 +83,7 @@ impl BatchExecutor for MockScanExecutor { &self.schema } - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { let real_scan_rows = std::cmp::min(scan_rows, self.rows.len()); // just one column let mut res_col = Vec::new(); diff --git a/components/tidb_query_executors/src/util/scan_executor.rs b/components/tidb_query_executors/src/util/scan_executor.rs index c9a88fb820e..935db5dd392 100644 --- a/components/tidb_query_executors/src/util/scan_executor.rs +++ b/components/tidb_query_executors/src/util/scan_executor.rs @@ -1,5 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use async_trait::async_trait; use kvproto::coprocessor::KeyRange; use tidb_query_common::{ storage::{ @@ -98,7 +99,7 @@ impl ScanExecutor { /// /// The columns are ensured to be regular even if there are errors during /// the process. - fn fill_column_vec( + async fn fill_column_vec( &mut self, scan_rows: usize, columns: &mut LazyBatchColumnVec, @@ -106,7 +107,7 @@ impl ScanExecutor { assert!(scan_rows > 0); for i in 0..scan_rows { - let some_row = self.scanner.next_opt(i == scan_rows - 1)?; + let some_row = self.scanner.next_opt(i == scan_rows - 1).await?; if let Some((key, value)) = some_row { // Retrieved one row from point range or non-point range. @@ -160,6 +161,7 @@ pub fn check_columns_info_supported(columns_info: &[ColumnInfo]) -> Result<()> { Ok(()) } +#[async_trait] impl BatchExecutor for ScanExecutor { type StorageStats = S::Statistics; @@ -169,12 +171,12 @@ impl BatchExecutor for ScanExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { assert!(!self.is_ended); assert!(scan_rows > 0); let mut logical_columns = self.imp.build_column_vec(scan_rows); - let is_drained = self.fill_column_vec(scan_rows, &mut logical_columns); + let is_drained = self.fill_column_vec(scan_rows, &mut logical_columns).await; logical_columns.assert_columns_equal_length(); let logical_rows = (0..logical_columns.rows_len()).collect(); diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index befe6559e32..d8964cf0301 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -36,6 +36,7 @@ num-traits = "0.2" num_cpus = "1" online_config = { path = "../online_config" } openssl = "0.10" +pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = "2" diff --git a/components/tikv_util/src/quota_limiter.rs b/components/tikv_util/src/quota_limiter.rs index 4d5ca82c7d9..818ec0ea60c 100644 --- a/components/tikv_util/src/quota_limiter.rs +++ b/components/tikv_util/src/quota_limiter.rs @@ -1,16 +1,20 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + future::Future, + pin::Pin, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, }, + task::{Context, Poll}, time::Duration, }; use cpu_time::ThreadTime; use futures::compat::Future01CompatExt; use online_config::{ConfigChange, ConfigManager}; +use pin_project::pin_project; use super::{ config::{ReadableDuration, ReadableSize}, @@ -110,7 +114,19 @@ impl<'a> Sample { } } - fn add_cpu_time(&mut self, time: Duration) { + /// Record thread cpu time in async manner. The function creates a future + /// that can track the cpu time used during the future's poll, caller + /// should explicitly call `add_cpu_time` after the future is ready. + pub fn observe_cpu_async(&self, f: F) -> CpuObserveFuture { + CpuObserveFuture { + enabled: self.enable_cpu_limit, + total_duration: Duration::ZERO, + timer: None, + delegate: f, + } + } + + pub fn add_cpu_time(&mut self, time: Duration) { self.cpu_time += time; } } @@ -128,6 +144,37 @@ impl<'a> Drop for CpuObserveGuard<'a> { } } +/// CpuObserveFuture is a future that used to track thread cpu time. +#[pin_project] +pub struct CpuObserveFuture { + enabled: bool, + total_duration: Duration, + timer: Option, + #[pin] + delegate: F, +} + +// `ThreadTime` is not Send, but is safe here because we only use it duration +// each poll. +unsafe impl Send for CpuObserveFuture {} + +impl Future for CpuObserveFuture { + type Output = (Duration, F::Output); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + if *this.enabled { + *this.timer = Some(ThreadTime::now()); + } + let res = this.delegate.poll(cx); + if let Some(timer) = this.timer { + *this.total_duration += timer.elapsed(); + } + let dur = *this.total_duration; + res.map(|r| (dur, r)) + } +} + impl Default for QuotaLimiter { fn default() -> Self { let foreground_limiters = LimiterItems::default(); diff --git a/src/coprocessor/checksum.rs b/src/coprocessor/checksum.rs index f208b87ee0f..52bd0a60184 100644 --- a/src/coprocessor/checksum.rs +++ b/src/coprocessor/checksum.rs @@ -7,12 +7,8 @@ use tidb_query_common::storage::{ scanner::{RangesScanner, RangesScannerOptions}, Range, }; -use tidb_query_executors::runner::MAX_TIME_SLICE; -use tidb_query_expr::BATCH_MAX_SIZE; use tikv_alloc::trace::MemoryTraceGuard; -use tikv_util::time::Instant; use tipb::{ChecksumAlgorithm, ChecksumRequest, ChecksumResponse}; -use yatp::task::future::reschedule; use crate::{ coprocessor::{dag::TikvStorage, *}, @@ -77,18 +73,7 @@ impl RequestHandler for ChecksumContext { let mut prefix_digest = crc64fast::Digest::new(); prefix_digest.write(&old_prefix); - let mut row_count = 0; - let mut time_slice_start = Instant::now(); - while let Some((k, v)) = self.scanner.next()? { - row_count += 1; - if row_count >= BATCH_MAX_SIZE { - if time_slice_start.saturating_elapsed() > MAX_TIME_SLICE { - reschedule().await; - time_slice_start = Instant::now(); - } - row_count = 0; - } - + while let Some((k, v)) = self.scanner.next().await? { if !k.starts_with(&new_prefix) { return Err(box_err!("Wrong prefix expect: {:?}", new_prefix)); } diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index 5b06638f244..ce575859e59 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -122,8 +122,8 @@ impl RequestHandler for BatchDagHandler { handle_qe_response(result, self.runner.can_be_cached(), self.data_version).map(|x| x.into()) } - fn handle_streaming_request(&mut self) -> Result<(Option, bool)> { - handle_qe_stream_response(self.runner.handle_streaming_request()) + async fn handle_streaming_request(&mut self) -> Result<(Option, bool)> { + handle_qe_stream_response(self.runner.handle_streaming_request().await) } fn collect_scan_statistics(&mut self, dest: &mut Statistics) { diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 5bd05bd29cd..8c2e6d571c0 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -558,7 +558,7 @@ impl Endpoint { let result = { tracker.on_begin_item(); - let result = handler.handle_streaming_request(); + let result = handler.handle_streaming_request().await; let mut storage_stats = Statistics::default(); handler.collect_scan_statistics(&mut storage_stats); @@ -803,8 +803,9 @@ mod tests { } } + #[async_trait] impl RequestHandler for StreamFixture { - fn handle_streaming_request(&mut self) -> Result<(Option, bool)> { + async fn handle_streaming_request(&mut self) -> Result<(Option, bool)> { let is_finished = if self.result_len == 0 { true } else { @@ -848,8 +849,9 @@ mod tests { } } + #[async_trait] impl RequestHandler for StreamFromClosure { - fn handle_streaming_request(&mut self) -> Result<(Option, bool)> { + async fn handle_streaming_request(&mut self) -> Result<(Option, bool)> { let result = (self.result_generator)(self.nth); self.nth += 1; result diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 0cde193a606..8acd5325a1e 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -69,7 +69,7 @@ pub trait RequestHandler: Send { } /// Processes current request and produces streaming responses. - fn handle_streaming_request(&mut self) -> HandlerStreamStepResult { + async fn handle_streaming_request(&mut self) -> HandlerStreamStepResult { panic!("streaming request is not supported for this handler"); } diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 8f7b8c57dde..ade8a007383 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -22,18 +22,14 @@ use tidb_query_datatype::{ expr::{EvalConfig, EvalContext}, FieldTypeAccessor, }; -use tidb_query_executors::{ - interface::BatchExecutor, runner::MAX_TIME_SLICE, BatchTableScanExecutor, -}; +use tidb_query_executors::{interface::BatchExecutor, BatchTableScanExecutor}; use tidb_query_expr::BATCH_MAX_SIZE; use tikv_alloc::trace::{MemoryTraceGuard, TraceEvent}; use tikv_util::{ metrics::{ThrottleType, NON_TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC}, quota_limiter::QuotaLimiter, - time::Instant, }; use tipb::{self, AnalyzeColumnsReq, AnalyzeIndexReq, AnalyzeReq, AnalyzeType}; -use yatp::task::future::reschedule; use super::{cmsketch::CmSketch, fmsketch::FmSketch, histogram::Histogram}; use crate::{ @@ -135,8 +131,6 @@ impl AnalyzeContext { req.get_cmsketch_width() as usize, ); let mut fms = FmSketch::new(req.get_sketch_size() as usize); - let mut row_count = 0; - let mut time_slice_start = Instant::now(); let mut topn_heap = BinaryHeap::new(); // cur_val recording the current value's data and its counts when iterating // index's rows. Once we met a new value, the old value will be pushed @@ -148,15 +142,7 @@ impl AnalyzeContext { } else { ANALYZE_VERSION_V1 }; - while let Some((key, _)) = scanner.next()? { - row_count += 1; - if row_count >= BATCH_MAX_SIZE { - if time_slice_start.saturating_elapsed() > MAX_TIME_SLICE { - reschedule().await; - time_slice_start = Instant::now(); - } - row_count = 0; - } + while let Some((key, _)) = scanner.next().await? { let mut key = &key[..]; if is_common_handle { table::check_record_key(key)?; @@ -382,20 +368,19 @@ impl RowSampleBuilder { use tidb_query_datatype::{codec::collation::Collator, match_template_collator}; let mut is_drained = false; - let mut time_slice_start = Instant::now(); let mut collector = self.new_collector(); while !is_drained { - let time_slice_elapsed = time_slice_start.saturating_elapsed(); - if time_slice_elapsed > MAX_TIME_SLICE { - reschedule().await; - time_slice_start = Instant::now(); - } - let mut sample = self.quota_limiter.new_sample(!self.is_auto_analyze); let mut read_size: usize = 0; { + let result = { + let (duration, res) = sample + .observe_cpu_async(self.data.next_batch(BATCH_MAX_SIZE)) + .await; + sample.add_cpu_time(duration); + res + }; let _guard = sample.observe_cpu(); - let result = self.data.next_batch(BATCH_MAX_SIZE); is_drained = result.is_drained?; let columns_slice = result.physical_columns.as_slice(); @@ -888,17 +873,11 @@ impl SampleBuilder { columns_without_handle_len ]; let mut is_drained = false; - let mut time_slice_start = Instant::now(); let mut common_handle_hist = Histogram::new(self.max_bucket_size); let mut common_handle_cms = CmSketch::new(self.cm_sketch_depth, self.cm_sketch_width); let mut common_handle_fms = FmSketch::new(self.max_fm_sketch_size); while !is_drained { - let time_slice_elapsed = time_slice_start.saturating_elapsed(); - if time_slice_elapsed > MAX_TIME_SLICE { - reschedule().await; - time_slice_start = Instant::now(); - } - let result = self.data.next_batch(BATCH_MAX_SIZE); + let result = self.data.next_batch(BATCH_MAX_SIZE).await; is_drained = result.is_drained?; let mut columns_slice = result.physical_columns.as_slice(); diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 11dbfc09f2f..14bf818aaf0 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -74,6 +74,7 @@ portable = ["tikv/portable"] [dependencies] api_version = { path = "../components/api_version", default-features = false } +async-trait = "0.1" batch-system = { path = "../components/batch-system", default-features = false } cdc = { path = "../components/cdc", default-features = false } collections = { path = "../components/collections" } diff --git a/tests/benches/coprocessor_executors/index_scan/util.rs b/tests/benches/coprocessor_executors/index_scan/util.rs index 19c2be94195..7531fb68944 100644 --- a/tests/benches/coprocessor_executors/index_scan/util.rs +++ b/tests/benches/coprocessor_executors/index_scan/util.rs @@ -3,6 +3,7 @@ use std::{marker::PhantomData, sync::Arc}; use criterion::black_box; +use futures::executor::block_on; use kvproto::coprocessor::KeyRange; use test_coprocessor::*; use tidb_query_datatype::expr::EvalConfig; @@ -48,7 +49,7 @@ impl scan_bencher::ScanExecutorBuilder for BatchIndexScan .unwrap(); // There is a step of building scanner in the first `next()` which cost time, // so we next() before hand. - executor.next_batch(1); + block_on(executor.next_batch(1)); Box::new(executor) as Box> } } diff --git a/tests/benches/coprocessor_executors/table_scan/util.rs b/tests/benches/coprocessor_executors/table_scan/util.rs index 7bcfe436d62..2fe7c4fc4c0 100644 --- a/tests/benches/coprocessor_executors/table_scan/util.rs +++ b/tests/benches/coprocessor_executors/table_scan/util.rs @@ -3,6 +3,7 @@ use std::{marker::PhantomData, sync::Arc}; use criterion::black_box; +use futures::executor::block_on; use kvproto::coprocessor::KeyRange; use test_coprocessor::*; use tidb_query_datatype::expr::EvalConfig; @@ -48,7 +49,7 @@ impl scan_bencher::ScanExecutorBuilder for BatchTableScan .unwrap(); // There is a step of building scanner in the first `next()` which cost time, // so we next() before hand. - executor.next_batch(1); + block_on(executor.next_batch(1)); Box::new(executor) as Box> } } diff --git a/tests/benches/coprocessor_executors/util/bencher.rs b/tests/benches/coprocessor_executors/util/bencher.rs index 64862582bd8..246510f991b 100644 --- a/tests/benches/coprocessor_executors/util/bencher.rs +++ b/tests/benches/coprocessor_executors/util/bencher.rs @@ -32,7 +32,7 @@ impl E> Bencher for BatchNext1024Bencher { |executor| { profiler::start("./BatchNext1024Bencher.profile"); let iter_times = black_box(1024); - let r = black_box(executor.next_batch(iter_times)); + let r = black_box(block_on(executor.next_batch(iter_times))); r.is_drained.unwrap(); profiler::stop(); }, @@ -62,7 +62,7 @@ impl E> Bencher for BatchNextAllBencher { |executor| { profiler::start("./BatchNextAllBencher.profile"); loop { - let r = executor.next_batch(1024); + let r = block_on(executor.next_batch(1024)); black_box(&r); if r.is_drained.unwrap() { break; diff --git a/tests/benches/coprocessor_executors/util/fixture.rs b/tests/benches/coprocessor_executors/util/fixture.rs index 5910ab4fc69..24062c7a2da 100644 --- a/tests/benches/coprocessor_executors/util/fixture.rs +++ b/tests/benches/coprocessor_executors/util/fixture.rs @@ -2,6 +2,7 @@ use std::str::FromStr; +use async_trait::async_trait; use criterion::measurement::Measurement; use rand::{seq::SliceRandom, Rng, SeedableRng}; use rand_xorshift::XorShiftRng; @@ -283,6 +284,7 @@ pub struct BatchFixtureExecutor { columns: Vec, } +#[async_trait] impl BatchExecutor for BatchFixtureExecutor { type StorageStats = Statistics; @@ -292,7 +294,7 @@ impl BatchExecutor for BatchFixtureExecutor { } #[inline] - fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { let mut columns = Vec::with_capacity(self.columns.len()); for col in &mut self.columns { let mut column = LazyBatchColumn::raw_with_capacity(scan_rows); diff --git a/tests/integrations/coprocessor/test_checksum.rs b/tests/integrations/coprocessor/test_checksum.rs index 2983414b9cc..66df6b2832c 100644 --- a/tests/integrations/coprocessor/test_checksum.rs +++ b/tests/integrations/coprocessor/test_checksum.rs @@ -2,6 +2,7 @@ use std::u64; +use futures::executor::block_on; use kvproto::{ coprocessor::{KeyRange, Request}, kvrpcpb::{Context, IsolationLevel}, @@ -88,7 +89,7 @@ fn reversed_checksum_crc64_xor(store: &Store, range: KeyRange) -> let mut checksum = 0; let digest = crc64fast::Digest::new(); - while let Some((k, v)) = scanner.next().unwrap() { + while let Some((k, v)) = block_on(scanner.next()).unwrap() { let mut digest = digest.clone(); digest.write(&k); digest.write(&v); From 90c4a0602040102fabf45ea8e8bfac33f4472d07 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Thu, 1 Sep 2022 05:08:24 -0400 Subject: [PATCH 0181/1149] copr: add json opaque value and implement conversion, comparison... (#13342) close tikv/tikv#13340 Signed-off-by: YangKeao Co-authored-by: Liqi Geng --- Cargo.lock | 1 + components/tidb_query_datatype/Cargo.toml | 1 + .../tidb_query_datatype/src/codec/convert.rs | 4 +- .../src/codec/mysql/json/binary.rs | 8 + .../src/codec/mysql/json/comparison.rs | 10 ++ .../src/codec/mysql/json/jcodec.rs | 19 ++- .../src/codec/mysql/json/json_type.rs | 17 +++ .../src/codec/mysql/json/mod.rs | 30 +++- .../src/codec/mysql/json/modifier.rs | 3 +- .../src/codec/mysql/json/serde.rs | 11 ++ .../src/codec/row/v2/encoder_for_test.rs | 2 +- components/tidb_query_expr/src/impl_cast.rs | 141 ++++++++++++++++-- 12 files changed, 225 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ed11da4cd7..e76166d88c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5928,6 +5928,7 @@ dependencies = [ name = "tidb_query_datatype" version = "0.0.1" dependencies = [ + "base64", "bitfield", "bitflags", "boolinator", diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 2e748d26d8d..7eb9a296ac2 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -6,6 +6,7 @@ publish = false description = "Data type of a query engine to run TiDB pushed down executors" [dependencies] +base64 = "0.13" bitfield = "0.13.2" bitflags = "1.0.1" boolinator = "2.4.0" diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 67620510ef8..efd99f5317a 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -510,7 +510,7 @@ impl<'a> ToInt for JsonRef<'a> { // TiDB: 5 // MySQL: 4 let val = match self.get_type() { - JsonType::Object | JsonType::Array => Ok(ctx + JsonType::Object | JsonType::Array | JsonType::Opaque => Ok(ctx .handle_truncate_err(Error::truncated_wrong_val("Integer", self.to_string())) .map(|_| 0)?), JsonType::Literal => Ok(self.get_literal().map_or(0, |x| x as i64)), @@ -526,7 +526,7 @@ impl<'a> ToInt for JsonRef<'a> { #[inline] fn to_uint(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { let val = match self.get_type() { - JsonType::Object | JsonType::Array => Ok(ctx + JsonType::Object | JsonType::Array | JsonType::Opaque => Ok(ctx .handle_truncate_err(Error::truncated_wrong_val("Integer", self.to_string())) .map(|_| 0)?), JsonType::Literal => Ok(self.get_literal().map_or(0, |x| x as u64)), diff --git a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs index af66980460e..9b8264ee3fb 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs @@ -80,6 +80,14 @@ impl<'a> JsonRef<'a> { &self.value()[val_offset..val_offset + str_len as usize + len_len], ) } + JsonType::Opaque => { + let (opaque_bytes_len, len_len) = + NumberCodec::try_decode_var_u64(&self.value()[val_offset + 1..])?; + JsonRef::new( + val_type, + &self.value()[val_offset..val_offset + opaque_bytes_len as usize + len_len + 1], + ) + } _ => { let data_size = NumberCodec::decode_u32_le(&self.value()[val_offset + ELEMENT_COUNT_LEN..]) diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index fe8bb2c35d7..f948a172ef0 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -37,6 +37,7 @@ impl<'a> JsonRef<'a> { .map_or(PRECEDENCE_NULL, |_| PRECEDENCE_BOOLEAN), JsonType::I64 | JsonType::U64 | JsonType::Double => PRECEDENCE_NUMBER, JsonType::String => PRECEDENCE_STRING, + JsonType::Opaque => PRECEDENCE_OPAQUE, } } @@ -140,6 +141,15 @@ impl<'a> PartialOrd for JsonRef<'a> { } Some(left_count.cmp(&right_count)) } + JsonType::Opaque => { + if let (Ok(left), Ok(right)) = + (self.get_opaque_bytes(), right.get_opaque_bytes()) + { + left.partial_cmp(right) + } else { + return None; + } + } }; } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index 4e4094f0ae3..51ca3ba0da0 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -5,7 +5,10 @@ use std::{collections::BTreeMap, convert::TryInto, f64, str}; use codec::{number::NumberCodec, prelude::*}; use super::{constants::*, Json, JsonRef, JsonType}; -use crate::codec::{Error, Result}; +use crate::{ + codec::{Error, Result}, + FieldTypeTp, +}; impl<'a> JsonRef<'a> { fn encoded_len(&self) -> usize { @@ -211,6 +214,14 @@ pub trait JsonEncoder: NumberEncoder { self.write_bytes(bytes)?; Ok(()) } + + fn write_json_opaque(&mut self, typ: FieldTypeTp, bytes: &[u8]) -> Result<()> { + self.write_u8(typ.to_u8().unwrap())?; + let bytes_len = bytes.len() as u64; + self.write_var_u64(bytes_len)?; + self.write_bytes(bytes)?; + Ok(()) + } } pub trait JsonDatumPayloadChunkEncoder: BufferWriter { @@ -243,6 +254,12 @@ pub trait JsonDecoder: NumberDecoder { } JsonType::I64 | JsonType::U64 | JsonType::Double => self.read_bytes(NUMBER_LEN)?, JsonType::Literal => self.read_bytes(LITERAL_LEN)?, + JsonType::Opaque => { + let value = self.bytes(); + // the first byte of opaque stores the MySQL type code + let (opaque_bytes_len, len_len) = NumberCodec::try_decode_var_u64(&value[1..])?; + self.read_bytes(opaque_bytes_len as usize + len_len + 1)? + } }; Ok(Json::new(tp, Vec::from(value))) } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs index c6fd25ec688..28c4d275471 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use super::{JsonRef, JsonType}; +use crate::FieldTypeTp; const JSON_TYPE_BOOLEAN: &[u8] = b"BOOLEAN"; const JSON_TYPE_NONE: &[u8] = b"NULL"; @@ -10,6 +11,9 @@ const JSON_TYPE_DOUBLE: &[u8] = b"DOUBLE"; const JSON_TYPE_STRING: &[u8] = b"STRING"; const JSON_TYPE_OBJECT: &[u8] = b"OBJECT"; const JSON_TYPE_ARRAY: &[u8] = b"ARRAY"; +const JSON_TYPE_BIT: &[u8] = b"BIT"; +const JSON_TYPE_BLOB: &[u8] = b"BLOB"; +const JSON_TYPE_OPAQUE: &[u8] = b"OPAQUE"; impl<'a> JsonRef<'a> { /// `json_type` is the implementation for @@ -26,6 +30,19 @@ impl<'a> JsonRef<'a> { Some(_) => JSON_TYPE_BOOLEAN, None => JSON_TYPE_NONE, }, + JsonType::Opaque => match self.get_opaque_type() { + Ok( + FieldTypeTp::TinyBlob + | FieldTypeTp::MediumBlob + | FieldTypeTp::LongBlob + | FieldTypeTp::Blob + | FieldTypeTp::String + | FieldTypeTp::VarString + | FieldTypeTp::VarChar, + ) => JSON_TYPE_BLOB, + Ok(FieldTypeTp::Bit) => JSON_TYPE_BIT, + _ => JSON_TYPE_OPAQUE, + }, } } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index 480ac5db129..c4e3a9ebf5c 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -90,11 +90,12 @@ use super::super::{datum::Datum, Error, Result}; use crate::{ codec::{ convert::ConvertTo, - data_type::{Decimal, Real}, + data_type::{BytesRef, Decimal, Real}, mysql, mysql::{Duration, Time, TimeType}, }, expr::EvalContext, + FieldTypeTp, }; const ERR_CONVERT_FAILED: &str = "Can not covert from "; @@ -109,6 +110,10 @@ pub enum JsonType { U64 = 0x0a, Double = 0x0b, String = 0x0c, + + // It's a special value for the compatibility with MySQL. + // It will store the raw buffer containing unexpected type (e.g. Binary). + Opaque = 0x0d, } impl TryFrom for JsonType { @@ -206,6 +211,20 @@ impl<'a> JsonRef<'a> { Ok(str::from_utf8(self.get_str_bytes()?)?) } + // Returns the opaque value in bytes + pub(crate) fn get_opaque_bytes(&self) -> Result<&'a [u8]> { + assert_eq!(self.type_code, JsonType::Opaque); + let val = self.value(); + let (str_len, len_len) = NumberCodec::try_decode_var_u64(&val[1..])?; + Ok(&val[(len_len + 1)..len_len + 1 + str_len as usize]) + } + + pub(crate) fn get_opaque_type(&self) -> Result { + assert_eq!(self.type_code, JsonType::Opaque); + let val = self.value(); + FieldTypeTp::from_u8(val[0]).ok_or(box_err!("invalid opaque type code")) + } + // Return whether the value is zero. // https://dev.mysql.com/doc/refman/8.0/en/json.html#Converting%20between%20JSON%20and%20non-JSON%20values pub(crate) fn is_zero(&self) -> bool { @@ -217,6 +236,7 @@ impl<'a> JsonRef<'a> { JsonType::U64 => self.get_u64() == 0, JsonType::Double => self.get_double() == 0f64, JsonType::String => false, + JsonType::Opaque => false, } } @@ -284,6 +304,12 @@ impl Json { Ok(Self::new(JsonType::String, value)) } + pub fn from_opaque(typ: FieldTypeTp, bytes: BytesRef<'_>) -> Result { + let mut value = vec![]; + value.write_json_opaque(typ, bytes)?; + Ok(Self::new(JsonType::Opaque, value)) + } + /// Creates a `literal` JSON from a `bool` pub fn from_bool(b: bool) -> Result { let mut value = vec![]; @@ -414,7 +440,7 @@ impl<'a> ConvertTo for JsonRef<'a> { #[inline] fn convert(&self, ctx: &mut EvalContext) -> Result { let d = match self.get_type() { - JsonType::Array | JsonType::Object => ctx + JsonType::Array | JsonType::Object | JsonType::Opaque => ctx .handle_truncate_err(Error::truncated_wrong_val("Float", self.to_string())) .map(|_| 0f64)?, JsonType::U64 => self.get_u64() as f64, diff --git a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs index 8d1b5c0d453..8c88153defc 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs @@ -232,7 +232,8 @@ impl<'a> BinaryModifier<'a> { | JsonType::I64 | JsonType::U64 | JsonType::Double - | JsonType::String => { + | JsonType::String + | JsonType::Opaque => { buf.extend_from_slice(self.old.value); } JsonType::Object | JsonType::Array => { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs index b2b2f421bcb..d15f728ed10 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs @@ -107,6 +107,17 @@ impl<'a> Serialize for JsonRef<'a> { } tup.end() } + JsonType::Opaque => { + let bytes = self + .get_opaque_bytes() + .map_err(|_| SerError::custom("invalid opaque value"))?; + let typ = self + .get_opaque_type() + .map_err(|_| SerError::custom("invalid opaque type code"))?; + + let str = format!("base64:type{}:{}", typ, base64::encode(bytes)); + serializer.serialize_str(&str) + } } } } diff --git a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs index 1ee5104b723..bedbc7324ce 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs @@ -6,7 +6,7 @@ //! According to //! //! The row format is: -//! ``` +//! ```ignore //! | version | flag | number_of_non_null_columns | number_of_null_columns | non_null_column_ids | null_column_ids | value_offsets | values | //! |---------| ---- | -------------------------- | ---------------------- | ------------------- | --------------- | ------------- | ------ | //! ``` diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 7fb118dfbec..50ea93d0ade 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -28,7 +28,9 @@ use tidb_query_datatype::{ }; use tipb::{Expr, FieldType}; -use crate::{types::RpnExpressionBuilder, RpnExpressionNode, RpnFnCallExtra, RpnFnMeta}; +use crate::{ + types::RpnExpressionBuilder, RpnExpressionNode, RpnFnCallExtra, RpnFnMeta, RpnStackNode, +}; fn get_cast_fn_rpn_meta( is_from_constant: bool, @@ -1288,13 +1290,30 @@ fn cast_uint_as_json(val: Option<&Int>) -> Result> { } } -#[rpn_fn(nullable, capture = [extra])] +#[rpn_fn(nullable, capture = [args, extra])] #[inline] -fn cast_string_as_json(extra: &RpnFnCallExtra<'_>, val: Option) -> Result> { +fn cast_string_as_json( + args: &[RpnStackNode<'_>], + extra: &RpnFnCallExtra<'_>, + val: Option, +) -> Result> { match val { None => Ok(None), Some(val) => { - if extra + let typ = args[0].field_type(); + if typ.is_binary_string_like() { + let mut buf = val; + + let mut vec; + if typ.tp() == FieldTypeTp::String { + vec = (*val).to_owned(); + // the `flen` of string is always greater than zero + vec.resize(typ.flen().try_into().unwrap(), 0); + buf = &vec; + } + + Ok(Some(Json::from_opaque(typ.tp(), buf)?)) + } else if extra .ret_field_type .as_accessor() .flag() @@ -1467,12 +1486,16 @@ fn cast_enum_as_time( } } -#[rpn_fn(nullable, capture = [extra])] +#[rpn_fn(nullable, capture = [args, extra])] #[inline] -fn cast_enum_as_json(extra: &RpnFnCallExtra, val: Option) -> Result> { +fn cast_enum_as_json( + args: &[RpnStackNode<'_>], + extra: &RpnFnCallExtra, + val: Option, +) -> Result> { match val { None => Ok(None), - Some(val) => cast_string_as_json(extra, Some(val.name())), + Some(val) => cast_string_as_json(args, extra, Some(val.name())), } } @@ -1557,6 +1580,24 @@ mod tests { assert!(r.is_none()); } + fn test_none_with_args_and_extra(func: F) + where + F: Fn(&[RpnStackNode<'_>], &RpnFnCallExtra, Option) -> Result>, + { + let value = ScalarValue::Bytes(None); + let field_type = FieldType::default(); + let args: [RpnStackNode<'_>; 1] = [RpnStackNode::Scalar { + value: &value, + field_type: &field_type, + }]; + let ret_field_type: FieldType = FieldType::default(); + let extra = RpnFnCallExtra { + ret_field_type: &ret_field_type, + }; + let r = func(&args, &extra, None).unwrap(); + assert!(r.is_none()); + } + fn test_none_with_metadata(func: F) where F: Fn(&tipb::InUnionMetadata, Option) -> Result>, @@ -2028,7 +2069,7 @@ mod tests { #[test] fn test_enum_as_json() { - test_none_with_extra(cast_enum_as_json); + test_none_with_args_and_extra(cast_enum_as_json); let mut jo1: BTreeMap = BTreeMap::new(); jo1.insert( @@ -2107,13 +2148,20 @@ mod tests { ), ]; for (input, expect, parse_to_json) in cs { + let arg_type = FieldType::default(); + let arg_value = ScalarValue::Enum(Some(input.to_owned())); + let args = [RpnStackNode::Scalar { + value: &arg_value, + field_type: &arg_type, + }]; + let mut rft = FieldType::default(); if parse_to_json { let fta = rft.as_mut_accessor(); fta.set_flag(FieldTypeFlag::PARSE_TO_JSON); } let extra = make_extra(&rft); - let result = cast_enum_as_json(&extra, Some(input)); + let result = cast_enum_as_json(&args, &extra, Some(input)); let result_str = result.as_ref().map(|x| x.as_ref().map(|x| x.to_string())); let log = format!( "input: {}, parse_to_json: {}, expect: {:?}, result: {:?}", @@ -6647,7 +6695,7 @@ mod tests { #[test] fn test_string_as_json() { - test_none_with_extra(cast_string_as_json); + test_none_with_args_and_extra(cast_string_as_json); let mut jo1: BTreeMap = BTreeMap::new(); jo1.insert( @@ -6657,16 +6705,19 @@ mod tests { // HasParseToJSONFlag let cs = vec![ ( + FieldType::default(), "{\"a\": \"b\"}".to_string(), Json::from_object(jo1).unwrap(), true, ), ( + FieldType::default(), "{}".to_string(), Json::from_object(BTreeMap::new()).unwrap(), true, ), ( + FieldType::default(), "[1, 2, 3]".to_string(), Json::from_array(vec![ Json::from_i64(1).unwrap(), @@ -6677,49 +6728,109 @@ mod tests { true, ), ( + FieldType::default(), "[]".to_string(), Json::from_array(Vec::new()).unwrap(), true, ), ( + FieldType::default(), "9223372036854775807".to_string(), Json::from_i64(9223372036854775807).unwrap(), true, ), ( + FieldType::default(), "-9223372036854775808".to_string(), Json::from_i64(-9223372036854775808).unwrap(), true, ), ( + FieldType::default(), "18446744073709551615".to_string(), Json::from_f64(18446744073709552000.0).unwrap(), true, ), // FIXME: f64::MAX.to_string() to json should success // (f64::MAX.to_string(), Json::from_f64(f64::MAX), true), - ("0.0".to_string(), Json::from_f64(0.0).unwrap(), true), ( + FieldType::default(), + "0.0".to_string(), + Json::from_f64(0.0).unwrap(), + true, + ), + ( + FieldType::default(), "\"abcde\"".to_string(), Json::from_string("abcde".to_string()).unwrap(), true, ), ( + FieldType::default(), "\"\"".to_string(), Json::from_string("".to_string()).unwrap(), true, ), - ("true".to_string(), Json::from_bool(true).unwrap(), true), - ("false".to_string(), Json::from_bool(false).unwrap(), true), + ( + FieldType::default(), + "true".to_string(), + Json::from_bool(true).unwrap(), + true, + ), + ( + FieldType::default(), + "false".to_string(), + Json::from_bool(false).unwrap(), + true, + ), + ( + FieldTypeBuilder::new() + .tp(FieldTypeTp::String) + .flen(4) + .charset(CHARSET_BIN) + .collation(Collation::Binary) + .build(), + "a".to_string(), + Json::from_opaque(FieldTypeTp::String, &[97, 0, 0, 0]).unwrap(), + true, + ), + ( + FieldTypeBuilder::new() + .tp(FieldTypeTp::String) + .flen(256) + .charset(CHARSET_BIN) + .collation(Collation::Binary) + .build(), + "".to_string(), + Json::from_opaque(FieldTypeTp::String, &[0; 256]).unwrap(), + true, + ), + ( + FieldTypeBuilder::new() + .tp(FieldTypeTp::VarChar) + .flen(256) + .charset(CHARSET_BIN) + .collation(Collation::Binary) + .build(), + "a".to_string(), + Json::from_opaque(FieldTypeTp::String, &[97]).unwrap(), + true, + ), ]; - for (input, expect, parse_to_json) in cs { + for (arg_type, input, expect, parse_to_json) in cs { + let arg_value = ScalarValue::Bytes(Some(input.clone().into_bytes())); + let args = [RpnStackNode::Scalar { + value: &arg_value, + field_type: &arg_type, + }]; + let mut rft = FieldType::default(); if parse_to_json { let fta = rft.as_mut_accessor(); fta.set_flag(FieldTypeFlag::PARSE_TO_JSON); } let extra = make_extra(&rft); - let result = cast_string_as_json(&extra, Some(&input.clone().into_bytes())); + let result = cast_string_as_json(&args, &extra, Some(&input.clone().into_bytes())); let result_str = result.as_ref().map(|x| x.as_ref().map(|x| x.to_string())); let log = format!( "input: {}, parse_to_json: {}, expect: {:?}, result: {:?}", From aaf124e24bfceff0839ffed127b8fbe70a18321f Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 2 Sep 2022 11:18:24 +0800 Subject: [PATCH 0182/1149] *: Block reads, writes and schedules before finishing flashback (#13348) ref tikv/tikv#13303 Add Msg and peer's flashback state field Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- components/error_code/src/raftstore.rs | 2 + components/raftstore/src/errors.rs | 9 + components/raftstore/src/store/fsm/peer.rs | 56 +++- components/raftstore/src/store/metrics.rs | 3 +- components/raftstore/src/store/msg.rs | 3 + components/raftstore/src/store/peer.rs | 57 +++- components/test_raftstore/src/cluster.rs | 24 +- components/tikv_kv/src/lib.rs | 2 + components/txn_types/src/types.rs | 4 + src/server/raftkv.rs | 15 +- .../txn/commands/acquire_pessimistic_lock.rs | 1 + .../txn/commands/flashback_to_version.rs | 1 + src/storage/txn/commands/prewrite.rs | 1 + src/storage/txn/scheduler.rs | 5 +- tests/integrations/raftstore/mod.rs | 1 + .../integrations/raftstore/test_flashback.rs | 289 ++++++++++++++++++ 16 files changed, 462 insertions(+), 11 deletions(-) create mode 100644 tests/integrations/raftstore/test_flashback.rs diff --git a/components/error_code/src/raftstore.rs b/components/error_code/src/raftstore.rs index 2926c69c21e..29c4c3c1849 100644 --- a/components/error_code/src/raftstore.rs +++ b/components/error_code/src/raftstore.rs @@ -30,6 +30,8 @@ define_error_codes!( DEADLINE_EXCEEDED => ("DeadlineExceeded", "", ""), PENDING_PREPARE_MERGE => ("PendingPrepareMerge", "", ""), RECOVERY_IN_PROGRESS => ("RecoveryInProgress", "", ""), + // TODO: add FLASHBACK in errorpb + FLASHBACK_IN_PROGRESS => ("RecoveryInProgress", "", ""), SNAP_ABORT => ("SnapAbort", "", ""), SNAP_TOO_MANY => ("SnapTooMany", "", ""), diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 89648de7731..1adaef08c3f 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -58,6 +58,9 @@ pub enum Error { #[error("region {0} is in the recovery progress")] RecoveryInProgress(u64), + #[error("region {0} is in the flashback progress")] + FlashbackInProgress(u64), + #[error( "key {} is not in region key range [{}, {}) for region {}", log_wrappers::Value::key(.0), @@ -241,6 +244,11 @@ impl From for errorpb::Error { e.set_region_id(region_id); errorpb.set_recovery_in_progress(e); } + Error::FlashbackInProgress(region_id) => { + let mut e = errorpb::RecoveryInProgress::default(); + e.set_region_id(region_id); + errorpb.set_recovery_in_progress(e); + } _ => {} }; @@ -275,6 +283,7 @@ impl ErrorCodeExt for Error { Error::NotLeader(..) => error_code::raftstore::NOT_LEADER, Error::DiskFull(..) => error_code::raftstore::DISK_FULL, Error::RecoveryInProgress(..) => error_code::raftstore::RECOVERY_IN_PROGRESS, + Error::FlashbackInProgress(..) => error_code::raftstore::FLASHBACK_IN_PROGRESS, Error::StaleCommand => error_code::raftstore::STALE_COMMAND, Error::RegionNotInitialized(_) => error_code::raftstore::REGION_NOT_INITIALIZED, Error::KeyNotInRegion(..) => error_code::raftstore::KEY_NOT_IN_REGION, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index c587ea5f32c..eb79965d617 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -21,6 +21,7 @@ use collections::{HashMap, HashSet}; use engine_traits::{Engines, KvEngine, RaftEngine, SstMetaInfo, WriteBatchExt, CF_LOCK, CF_RAFT}; use error_code::ErrorCodeExt; use fail::fail_point; +use futures::channel::oneshot::Sender; use keys::{self, enc_end_key, enc_start_key}; use kvproto::{ errorpb, @@ -79,8 +80,8 @@ use crate::{ metrics::*, msg::{Callback, ExtCallback, InspectedRaftMessage}, peer::{ - ConsistencyState, ForceLeaderState, Peer, PersistSnapshotResult, StaleState, - UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + ConsistencyState, FlashbackState, ForceLeaderState, Peer, PersistSnapshotResult, + StaleState, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, @@ -923,6 +924,38 @@ where syncer.report_for_self(self_report); } + // Call msg PrepareFlashback to stop the scheduling and RW tasks. + // Once called, it will wait for the channel's notification in FlashbackState to + // finish. We place a flag in the request, which is checked when the + // pre_propose_raft_command is called. Stopping tasks is done by applying + // the flashback-only command in this way, But for RW local reads which need + // to be considered, we let the leader lease to None to ensure that local reads + // are not executed. + fn on_prepare_flashback(&mut self, ch: Sender) { + info!( + "prepare flashback"; + "region_id" => self.region().get_id(), + "peer_id" => self.fsm.peer.peer_id(), + ); + if self.fsm.peer.flashback_state.is_some() { + ch.send(false).unwrap(); + return; + } + self.fsm.peer.flashback_state = Some(FlashbackState::new(ch)); + // Let the leader lease to None to ensure that local reads are not executed. + self.fsm.peer.leader_lease_mut().expire_remote_lease(); + self.fsm.peer.maybe_finish_flashback_wait_apply(); + } + + fn on_finish_flashback(&mut self) { + info!( + "finish flashback"; + "region_id" => self.region().get_id(), + "peer_id" => self.fsm.peer.peer_id(), + ); + self.fsm.peer.flashback_state.take(); + } + fn on_casual_msg(&mut self, msg: CasualMessage) { match msg { CasualMessage::SplitRegion { @@ -1335,6 +1368,8 @@ where SignificantMsg::UnsafeRecoveryFillOutReport(syncer) => { self.on_unsafe_recovery_fill_out_report(syncer) } + SignificantMsg::PrepareFlashback(ch) => self.on_prepare_flashback(ch), + SignificantMsg::FinishFlashback => self.on_finish_flashback(), } } @@ -2172,6 +2207,10 @@ where if self.fsm.peer.unsafe_recovery_state.is_some() { self.check_unsafe_recovery_state(); } + // TODO: combine recovery state and flashback state as a wait apply queue. + if self.fsm.peer.flashback_state.is_some() { + self.fsm.peer.maybe_finish_flashback_wait_apply(); + } } fn retry_pending_prepare_merge(&mut self, applied_index: u64) { @@ -4737,12 +4776,23 @@ where return Ok(Some(resp)); } - // Check whether the store has the right peer to handle the request. let region_id = self.region_id(); + // When in the flashback state, we should not allow any other request to be + // proposed. + if self.fsm.peer.flashback_state.is_some() { + self.ctx.raft_metrics.invalid_proposal.flashback.inc(); + let flags = WriteBatchFlags::from_bits_truncate(msg.get_header().get_flags()); + if !flags.contains(WriteBatchFlags::FLASHBACK) { + return Err(Error::FlashbackInProgress(self.region_id())); + } + } + + // Check whether the store has the right peer to handle the request. let leader_id = self.fsm.peer.leader_id(); let request = msg.get_requests(); if self.fsm.peer.force_leader.is_some() { + self.ctx.raft_metrics.invalid_proposal.force_leader.inc(); // in force leader state, forbid requests to make the recovery progress less // error-prone if !(msg.has_admin_request() diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index ad4ee7e7f98..719a2d8c09a 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -201,6 +201,8 @@ make_static_metric! { read_index_no_leader, region_not_initialized, is_applying_snapshot, + force_leader, + flashback, } pub label_enum RaftLogGcSkippedReason { @@ -271,7 +273,6 @@ make_static_metric! { pub struct LoadBaseSplitEventCounterVec: IntCounter { "type" => LoadBaseSplitEventType, } - } lazy_static! { diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 251094e6475..bb8c2c0bd89 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -7,6 +7,7 @@ use std::{borrow::Cow, fmt}; use collections::HashSet; use engine_traits::{CompactedEvent, KvEngine, Snapshot}; +use futures::channel::oneshot::Sender; use kvproto::{ import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, @@ -456,6 +457,8 @@ where UnsafeRecoveryDestroy(UnsafeRecoveryExecutePlanSyncer), UnsafeRecoveryWaitApply(UnsafeRecoveryWaitApplySyncer), UnsafeRecoveryFillOutReport(UnsafeRecoveryFillOutReportSyncer), + PrepareFlashback(Sender), + FinishFlashback, } /// Message that will be sent to a peer. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 91698be98e9..53747f082e4 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -23,7 +23,8 @@ use engine_traits::{ }; use error_code::ErrorCodeExt; use fail::fail_point; -use getset::Getters; +use futures::channel::oneshot::Sender; +use getset::{Getters, MutGetters}; use kvproto::{ errorpb, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp, LockInfo}, @@ -706,7 +707,33 @@ pub enum UnsafeRecoveryState { Destroy(UnsafeRecoveryExecutePlanSyncer), } -#[derive(Getters)] +// This state is set by the peer fsm when invoke msg PrepareFlashback. Once set, +// it is checked every time this peer applies a new entry or a snapshot, +// if the latest committed index is met, the syncer will be called to notify the +// result. +#[derive(Debug)] +pub struct FlashbackState(Option>); + +impl FlashbackState { + pub fn new(ch: Sender) -> Self { + FlashbackState(Some(ch)) + } + + pub fn finish_wait_apply(&mut self) { + if self.0.is_none() { + return; + } + let ch = self.0.take().unwrap(); + match ch.send(true) { + Ok(_) => {} + Err(e) => { + error!("Fail to notify flashback state"; "err" => ?e); + } + } + } +} + +#[derive(Getters, MutGetters)] pub struct Peer where EK: KvEngine, @@ -731,7 +758,7 @@ where proposals: ProposalQueue>, leader_missing_time: Option, - #[getset(get = "pub")] + #[getset(get = "pub", get_mut = "pub")] leader_lease: Lease, pending_reads: ReadIndexQueue>, /// Threshold of long uncommitted proposals. @@ -887,6 +914,7 @@ where /// lead_transferee if the peer is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, + pub flashback_state: Option, } impl Peer @@ -1018,6 +1046,7 @@ where last_region_buckets: None, lead_transferee: raft::INVALID_ID, unsafe_recovery_state: None, + flashback_state: None, }; // If this region has only one peer and I am the one, campaign directly. @@ -2378,6 +2407,10 @@ where debug!("unsafe recovery finishes applying a snapshot"); self.unsafe_recovery_maybe_finish_wait_apply(/* force= */ false); } + if self.flashback_state.is_some() { + debug!("flashback finishes applying a snapshot"); + self.maybe_finish_flashback_wait_apply(); + } } // If `apply_snap_ctx` is none, it means this snapshot does not // come from the ready but comes from the unfinished snapshot task @@ -3352,6 +3385,13 @@ where "peer_id" => self.peer.get_id(), ); None + } else if self.flashback_state.is_some() { + debug!( + "prevents renew lease while in flashback state"; + "region_id" => self.region_id, + "peer_id" => self.peer.get_id(), + ); + None } else { self.leader_lease.renew(ts); let term = self.term(); @@ -4272,6 +4312,7 @@ where // In `pre_propose_raft_command`, it rejects all the requests expect conf-change // if in force leader state. if self.force_leader.is_some() { + poll_ctx.raft_metrics.invalid_proposal.force_leader.inc(); panic!( "{} propose normal in force leader state {:?}", self.tag, self.force_leader @@ -4945,6 +4986,16 @@ where } } } + + pub fn maybe_finish_flashback_wait_apply(&mut self) { + let finished = + self.raft_group.raft.raft_log.applied == self.raft_group.raft.raft_log.last_index(); + if finished { + if let Some(flashback_state) = self.flashback_state.as_mut() { + flashback_state.finish_wait_apply(); + } + } + } } #[derive(Default, Debug)] diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 097e74f157b..79f0b8ef709 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -19,7 +19,7 @@ use engine_traits::{ WriteBatchExt, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; -use futures::executor::block_on; +use futures::{self, channel::oneshot, executor::block_on}; use kvproto::{ errorpb::Error as PbError, kvrpcpb::{ApiVersion, Context}, @@ -1411,6 +1411,28 @@ impl Cluster { .unwrap(); } + pub async fn call_and_wait_prepare_flashback(&mut self, region_id: u64, store_id: u64) { + let router = self.sim.rl().get_router(store_id).unwrap(); + let (tx, rx) = oneshot::channel(); + + router + .significant_send(region_id, SignificantMsg::PrepareFlashback(tx)) + .unwrap(); + + let prepared = rx.await.unwrap(); + if !prepared { + panic!("prepare flashback failed"); + } + } + + pub fn call_finish_flashback(&mut self, region_id: u64, store_id: u64) { + let router = self.sim.rl().get_router(store_id).unwrap(); + + router + .significant_send(region_id, SignificantMsg::FinishFlashback) + .unwrap(); + } + pub fn must_split(&mut self, region: &metapb::Region, split_key: &[u8]) { let mut try_cnt = 0; let split_count = self.pd_client.get_split_count(); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 466bd973906..64a05a98622 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -255,6 +255,8 @@ pub struct SnapContext<'a> { // `key_ranges` is used in replica read. It will send to // the leader via raft "read index" to check memory locks. pub key_ranges: Vec, + // Marks that this read is a FlashbackToVersionReadPhase. + pub for_flashback: bool, } /// Engine defines the common behaviour for a storage engine type. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 75df337f80c..9496994f38f 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -513,6 +513,8 @@ pub struct TxnExtra { // Marks that this transaction is a 1PC transaction. RaftKv should set this flag // in the raft command request. pub one_pc: bool, + // Marks that this transaction is a flashback transaction. + pub for_flashback: bool, } impl TxnExtra { @@ -537,6 +539,8 @@ bitflags! { /// Indicates this request is a transfer leader command that needs to be proposed /// like a normal command. const TRANSFER_LEADER_PROPOSAL = 0b00000100; + /// Indicates this request is a flashback transaction. + const FLASHBACK = 0b00001000; } } diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index 0a3f2fdd742..9443ba26cd4 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -201,14 +201,20 @@ where cb: Callback>, ) -> Result<()> { let mut header = self.new_request_header(ctx.pb_ctx); + let mut flags = 0; if ctx.pb_ctx.get_stale_read() && !ctx.start_ts.is_zero() { let mut data = [0u8; 8]; (&mut data[..]) .encode_u64(ctx.start_ts.into_inner()) .unwrap(); - header.set_flags(WriteBatchFlags::STALE_READ.bits()); + flags |= WriteBatchFlags::STALE_READ.bits(); header.set_flag_data(data.into()); } + if ctx.for_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } + header.set_flags(flags); + let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); cmd.set_requests(vec![req].into()); @@ -252,9 +258,14 @@ where let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); let txn_extra = batch.extra; let mut header = self.new_request_header(ctx); + let mut flags = 0; if txn_extra.one_pc { - header.set_flags(WriteBatchFlags::ONE_PC.bits()); + flags |= WriteBatchFlags::ONE_PC.bits(); + } + if txn_extra.for_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); } + header.set_flags(flags); let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 3632d847e59..949b347f251 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -150,6 +150,7 @@ impl WriteCommand for AcquirePessimisticLock old_values: self.old_values, // One pc status is unkown AcquirePessimisticLock stage. one_pc: false, + for_flashback: false, }; let write_data = WriteData::new(txn.into_modifies(), extra); (pr, write_data, rows, ctx, None) diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 058758888d5..3bb6f3aa268 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -107,6 +107,7 @@ impl WriteCommand for FlashbackToVersion { let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); + write_data.extra.for_flashback = true; Ok(WriteResult { ctx: self.ctx.clone(), to_be_write: write_data, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index deca5733eb0..333d3eb1aca 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -670,6 +670,7 @@ impl Prewriter { old_values: self.old_values, // Set one_pc flag in TxnExtra to let CDC skip handling the resolver. one_pc: self.try_one_pc, + for_flashback: false, }; // Here the lock guards are taken and will be released after the write finishes. // If an error (KeyIsLocked or WriteConflict) occurs before, these lock guards diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 0bad0078821..a72bd671d0a 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -564,10 +564,13 @@ impl Scheduler { let tag = task.cmd.tag(); SCHED_STAGE_COUNTER_VEC.get(tag).snapshot.inc(); - let snap_ctx = SnapContext { + let mut snap_ctx = SnapContext { pb_ctx: task.cmd.ctx(), ..Default::default() }; + if let Command::FlashbackToVersionReadPhase { .. } = task.cmd { + snap_ctx.for_flashback = true; + } // The program is currently in scheduler worker threads. // Safety: `self.inner.worker_pool` should ensure that a TLS engine exists. match unsafe { with_tls_engine(|engine: &E| kv::snapshot(engine, snap_ctx)) }.await diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index efa118fb8f1..d34aae05e77 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -7,6 +7,7 @@ mod test_compact_lock_cf; mod test_compact_log; mod test_conf_change; mod test_early_apply; +mod test_flashback; mod test_hibernate; mod test_joint_consensus; mod test_lease_read; diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs new file mode 100644 index 00000000000..e4d0276f9e6 --- /dev/null +++ b/tests/integrations/raftstore/test_flashback.rs @@ -0,0 +1,289 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use futures::executor::block_on; +use kvproto::metapb; +use test_raftstore::*; +use txn_types::WriteBatchFlags; + +#[test] +fn test_flahsback_for_applied_index() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + // write for cluster. + let value = vec![1_u8; 8096]; + multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); + + // prepare for flashback + let region = cluster.get_region(b"k1"); + block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + + let last_index = cluster + .raft_local_state(region.get_id(), 1) + .get_last_index(); + let appied_index = cluster.apply_state(region.get_id(), 1).get_applied_index(); + + assert_eq!(last_index, appied_index); +} + +#[test] +fn test_flashback_for_schedule() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + cluster.must_transfer_leader(1, new_peer(2, 2)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + // prepare for flashback + let region = cluster.get_region(b"k1"); + block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + + // verify the schedule is unabled. + let mut region = cluster.get_region(b"k3"); + let admin_req = new_transfer_leader_cmd(new_peer(2, 2)); + let mut transfer_leader = + new_admin_request(region.get_id(), ®ion.take_region_epoch(), admin_req); + transfer_leader.mut_header().set_peer(new_peer(1, 1)); + let resp = cluster + .call_command_on_leader(transfer_leader, Duration::from_secs(3)) + .unwrap(); + let e = resp.get_header().get_error(); + // reuse recovery_in_progress error code. + assert_eq!( + e.get_recovery_in_progress(), + &kvproto::errorpb::RecoveryInProgress { + region_id: region.get_id(), + ..Default::default() + } + ); + + // verify the schedule can be executed if add flashback flag in request's + // header. + let mut region = cluster.get_region(b"k3"); + let admin_req = new_transfer_leader_cmd(new_peer(2, 2)); + let mut transfer_leader = + new_admin_request(region.get_id(), ®ion.take_region_epoch(), admin_req); + transfer_leader.mut_header().set_peer(new_peer(1, 1)); + transfer_leader + .mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let resp = cluster + .call_command_on_leader(transfer_leader, Duration::from_secs(5)) + .unwrap(); + assert!(!resp.get_header().has_error()); + + cluster.call_finish_flashback(region.get_id(), 1); + // transfer leader to (1, 1) + cluster.must_transfer_leader(1, new_peer(1, 1)); +} + +#[test] +fn test_flahsback_for_write() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + // write for cluster + let value = vec![1_u8; 8096]; + multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); + + // prepare for flashback + let region = cluster.get_region(b"k1"); + block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + + // write will be blocked + let value = vec![1_u8; 8096]; + must_get_error_recovery_in_progress(&mut cluster, ®ion, new_put_cmd(b"k1", &value)); + + must_cmd_add_flashback_flag( + &mut cluster, + &mut region.clone(), + new_put_cmd(b"k1", &value), + ); + + cluster.call_finish_flashback(region.get_id(), 1); + + multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); +} + +#[test] +fn test_flahsback_for_read() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + // write for cluster + let value = vec![1_u8; 8096]; + multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); + // read for cluster + multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); + + // prepare for flashback + let region = cluster.get_region(b"k1"); + block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + + // read will be blocked + must_get_error_recovery_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); + + // verify the read can be executed if add flashback flag in request's + // header. + must_cmd_add_flashback_flag( + &mut cluster, + &mut region.clone(), + new_get_cf_cmd("write", b"k1"), + ); + + cluster.call_finish_flashback(region.get_id(), 1); + + multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); +} + +// LocalReader will attempt to renew the lease. +// However, when flashback is enabled, it will make the lease None and prevent +// renew lease. +#[test] +fn test_flahsback_for_local_read() { + let mut cluster = new_node_cluster(0, 3); + let election_timeout = configure_for_lease_read(&mut cluster, Some(50), None); + + // Avoid triggering the log compaction in this test case. + cluster.cfg.raft_store.raft_log_gc_threshold = 100; + + let node_id = 3u64; + let store_id = 3u64; + let peer = new_peer(store_id, node_id); + cluster.run(); + + cluster.must_put(b"k1", b"v1"); + let region = cluster.get_region(b"k1"); + cluster.must_transfer_leader(region.get_id(), peer.clone()); + + // check local read before prepare flashback + let state = cluster.raft_local_state(region.get_id(), store_id); + let last_index = state.get_last_index(); + // Make sure the leader transfer procedure timeouts. + std::thread::sleep(election_timeout * 2); + must_read_on_peer(&mut cluster, peer.clone(), region.clone(), b"k1", b"v1"); + // Check the leader does a local read. + let state = cluster.raft_local_state(region.get_id(), store_id); + assert_eq!(state.get_last_index(), last_index); + + // prepare for flashback + block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), store_id)); + + must_error_read_on_peer( + &mut cluster, + peer.clone(), + region.clone(), + b"k1", + Duration::from_secs(1), + ); + + // Wait for the leader's lease to expire to ensure that a renew lease interval + // has elapsed. + std::thread::sleep(election_timeout * 2); + must_error_read_on_peer( + &mut cluster, + peer.clone(), + region.clone(), + b"k1", + Duration::from_secs(1), + ); + + // Also check read by propose was blocked + let state = cluster.raft_local_state(region.get_id(), store_id); + assert_eq!(state.get_last_index(), last_index); + + cluster.call_finish_flashback(region.get_id(), store_id); + + // check local read after finish flashback + let state = cluster.raft_local_state(region.get_id(), store_id); + let last_index = state.get_last_index(); + // Make sure the leader transfer procedure timeouts. + std::thread::sleep(election_timeout * 2); + must_read_on_peer(&mut cluster, peer, region.clone(), b"k1", b"v1"); + + // Check the leader does a local read. + let state = cluster.raft_local_state(region.get_id(), store_id); + assert_eq!(state.get_last_index(), last_index); +} + +#[test] +fn test_flahsback_for_status_cmd_as_region_detail() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + let region = cluster.get_region(b"k1"); + block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + + let leader = cluster.leader_of_region(1).unwrap(); + let region_detail = cluster.region_detail(1, 1); + assert!(region_detail.has_region()); + let region = region_detail.get_region(); + assert_eq!(region.get_id(), 1); + assert!(region.get_start_key().is_empty()); + assert!(region.get_end_key().is_empty()); + assert_eq!(region.get_peers().len(), 3); + let epoch = region.get_region_epoch(); + assert_eq!(epoch.get_conf_ver(), 1); + assert_eq!(epoch.get_version(), 1); + + assert!(region_detail.has_leader()); + assert_eq!(region_detail.get_leader(), &leader); +} + +fn multi_do_cmd(cluster: &mut Cluster, cmd: kvproto::raft_cmdpb::Request) { + for _ in 0..100 { + let mut reqs = vec![]; + for _ in 0..100 { + reqs.push(cmd.clone()); + } + cluster.batch_put(b"k1", reqs).unwrap(); + } +} + +fn must_cmd_add_flashback_flag( + cluster: &mut Cluster, + region: &mut metapb::Region, + cmd: kvproto::raft_cmdpb::Request, +) { + // verify the read can be executed if add flashback flag in request's + // header. + let mut req = new_request( + region.get_id(), + region.take_region_epoch(), + vec![cmd], + false, + ); + let new_leader = cluster.query_leader(1, region.get_id(), Duration::from_secs(1)); + req.mut_header().set_peer(new_leader.unwrap()); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let resp = cluster.call_command(req, Duration::from_secs(5)).unwrap(); + assert!(!resp.get_header().has_error()); +} + +fn must_get_error_recovery_in_progress( + cluster: &mut Cluster, + region: &metapb::Region, + cmd: kvproto::raft_cmdpb::Request, +) { + for _ in 0..100 { + let mut reqs = vec![]; + for _ in 0..100 { + reqs.push(cmd.clone()); + } + match cluster.batch_put(b"k1", reqs) { + Ok(_) => {} + Err(e) => { + assert_eq!( + e.get_recovery_in_progress(), + &kvproto::errorpb::RecoveryInProgress { + region_id: region.get_id(), + ..Default::default() + } + ); + } + } + } +} From a1d7b93635c06608d5b00592a6627c8ab5ad8a58 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 2 Sep 2022 11:46:24 +0800 Subject: [PATCH 0183/1149] engine: default enable raft engine log recycling (#13372) ref tikv/tikv#13229 Signed-off-by: Lucasliang Co-authored-by: Xinye Tao --- Cargo.lock | 204 +++++++++++++++++++++------------------ etc/config-template.toml | 8 +- 2 files changed, 113 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e76166d88c8..9463bbd717b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -209,7 +209,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d962799a5863fdf06fbf594e04102130582d010379137e9a98a7e2e693a5885" dependencies = [ "error-code", - "libc 0.2.125", + "libc 0.2.132", "wasm-bindgen", "winapi 0.3.9", ] @@ -240,15 +240,15 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] [[package]] name = "autocfg" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "aws" @@ -383,7 +383,7 @@ dependencies = [ "addr2line", "cc", "cfg-if 1.0.0", - "libc 0.2.125", + "libc 0.2.132", "miniz_oxide 0.4.4", "object", "rustc-demangle", @@ -533,7 +533,7 @@ dependencies = [ "bcc-sys", "bitflags", "byteorder", - "libc 0.2.125", + "libc 0.2.132", "regex", "thiserror", ] @@ -665,7 +665,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" dependencies = [ "cc", - "libc 0.2.125", + "libc 0.2.132", "pkg-config", ] @@ -691,7 +691,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7f788eaf239475a3c1e1acf89951255a46c4b9b46cf3e866fc4d0707b4b9e36" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "valgrind_request", ] @@ -861,7 +861,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1" dependencies = [ "glob", - "libc 0.2.125", + "libc 0.2.132", "libloading", ] @@ -946,7 +946,7 @@ dependencies = [ "byteorder", "bytes", "error_code", - "libc 0.2.125", + "libc 0.2.132", "panic_hook", "protobuf", "rand 0.8.3", @@ -1005,7 +1005,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" dependencies = [ "core-foundation-sys", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -1020,7 +1020,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -1078,7 +1078,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63aaaf47e457badbcb376c65a49d0f182c317ebd97dc6d1ced94c8e1d09c0f3a" dependencies = [ "criterion", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -1348,7 +1348,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "redox_users", "winapi 0.3.9", ] @@ -1601,7 +1601,7 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5115567ac25674e0043e472be13d14e537f37ea8aa4bdc4aef0c89add1db1ff" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "str-buf", ] @@ -1703,10 +1703,10 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "libloading", "matches", - "nix", + "nix 0.24.1", "once_cell", "protobuf", "rust-ini", @@ -1759,7 +1759,7 @@ dependencies = [ "crossbeam-utils 0.8.8", "fs2", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "maligned", "online_config", "openssl", @@ -1784,7 +1784,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed3d8a5e20435ff00469e51a0d82049bae66504b5c429920dadf9bb54d47b3f" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "thiserror", "winapi 0.3.9", ] @@ -1796,7 +1796,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.125", + "libc 0.2.132", "redox_syscall 0.2.11", "winapi 0.3.9", ] @@ -1809,7 +1809,7 @@ checksum = "d691fdb3f817632d259d09220d4cf0991dbb2c9e59e044a02a59194bf6e14484" dependencies = [ "cc", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -1837,7 +1837,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2adaffba6388640136149e18ed080b77a78611c1e1d6de75aedcdf78df5d4682" dependencies = [ "crc32fast", - "libc 0.2.125", + "libc 0.2.132", "libz-sys", "miniz_oxide 0.3.7", ] @@ -1878,7 +1878,7 @@ name = "fs2" version = "0.4.3" source = "git+https://github.com/tabokie/fs2-rs?branch=tikv#cd503764a19a99d74c1ab424dd13d6bcd093fcae" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -1904,7 +1904,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f41b048a94555da0f42f1d632e2e19510084fb8e303b0daa2816e733fb3644a0" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2139,7 +2139,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.125", + "libc 0.2.132", "wasi 0.7.0", ] @@ -2151,7 +2151,7 @@ checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if 1.0.0", "js-sys", - "libc 0.2.125", + "libc 0.2.132", "wasi 0.10.2+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2200,7 +2200,7 @@ dependencies = [ "futures-executor", "futures-util", "grpcio-sys", - "libc 0.2.125", + "libc 0.2.132", "log", "parking_lot 0.11.1", "protobuf", @@ -2237,7 +2237,7 @@ dependencies = [ "bindgen 0.59.2", "cc", "cmake", - "libc 0.2.125", + "libc 0.2.132", "libz-sys", "openssl-sys", "pkg-config", @@ -2305,7 +2305,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307c3c9f937f38e3534b1d6447ecf090cafcc9744e4a6360e8b037b2cf5af120" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2507,7 +2507,7 @@ checksum = "4816c66d2c8ae673df83366c18341538f234a26d65a9ecea5c348b453ac1d02f" dependencies = [ "bitflags", "inotify-sys", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2516,7 +2516,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2543,7 +2543,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2589,7 +2589,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b1d42ef453b30b7387e113da1c83ab1605d90c5b4e0eb8e96d016ed3b8c160" dependencies = [ "getrandom 0.1.12", - "libc 0.2.125", + "libc 0.2.132", "log", ] @@ -2720,9 +2720,9 @@ checksum = "e32a70cf75e5846d53a673923498228bbec6a8624708a9ea5645f075d6276122" [[package]] name = "libc" -version = "0.2.125" +version = "0.2.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" +checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" [[package]] name = "libfuzzer-sys" @@ -2762,7 +2762,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.125", + "libc 0.2.132", "libtitan_sys", "libz-sys", "lz4-sys", @@ -2780,7 +2780,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.125", + "libc 0.2.132", "libz-sys", "lz4-sys", "snappy-sys", @@ -2794,7 +2794,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66" dependencies = [ "cc", - "libc 0.2.125", + "libc 0.2.132", "pkg-config", "vcpkg", ] @@ -2850,7 +2850,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" dependencies = [ "cc", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2899,7 +2899,7 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2908,7 +2908,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -2918,7 +2918,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -2989,7 +2989,7 @@ dependencies = [ "fuchsia-zircon-sys", "iovec", "kernel32-sys", - "libc 0.2.125", + "libc 0.2.132", "log", "miow 0.2.2", "net2", @@ -3003,7 +3003,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba272f85fa0b41fc91872be579b3bbe0f56b792aa361a380eb669469f68dafb2" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "log", "miow 0.3.7", "ntapi", @@ -3101,7 +3101,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" dependencies = [ "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "log", "openssl", "openssl-probe", @@ -3119,7 +3119,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -3131,8 +3131,22 @@ checksum = "8f17df307904acd05aa8e32e97bb20f2a0df1728bbc2d771ae8f9a90463441e9" dependencies = [ "bitflags", "cfg-if 1.0.0", - "libc 0.2.125", + "libc 0.2.132", + "memoffset", +] + +[[package]] +name = "nix" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e322c04a9e3440c327fca7b6c8a63e6890a32fa2ad689db972425f07e0d22abb" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if 1.0.0", + "libc 0.2.132", "memoffset", + "pin-utils", ] [[package]] @@ -3189,7 +3203,7 @@ dependencies = [ "fsevent", "fsevent-sys", "inotify", - "libc 0.2.125", + "libc 0.2.132", "mio 0.6.23", "mio-extras", "walkdir", @@ -3342,7 +3356,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -3420,7 +3434,7 @@ dependencies = [ "bitflags", "cfg-if 1.0.0", "foreign-types", - "libc 0.2.125", + "libc 0.2.132", "once_cell", "openssl-macros", "openssl-sys", @@ -3460,7 +3474,7 @@ checksum = "e5f9bd0c2710541a3cda73d6f9ac4f1b240de4ae261065d309dbe73d9dceb42f" dependencies = [ "autocfg", "cc", - "libc 0.2.125", + "libc 0.2.132", "openssl-src", "pkg-config", "vcpkg", @@ -3490,7 +3504,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -3527,7 +3541,7 @@ checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ "cfg-if 1.0.0", "instant", - "libc 0.2.125", + "libc 0.2.132", "redox_syscall 0.2.11", "smallvec", "winapi 0.3.9", @@ -3540,7 +3554,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.125", + "libc 0.2.132", "redox_syscall 0.2.11", "smallvec", "windows-sys", @@ -3616,7 +3630,7 @@ checksum = "b8f94885300e262ef461aa9fd1afbf7df3caf9e84e271a74925d1c6c8b24830f" dependencies = [ "bitflags", "byteorder", - "libc 0.2.125", + "libc 0.2.132", "mmap", "nom 4.2.3", "phf", @@ -3759,7 +3773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d27361d7578b410d0eb5fe815c2b2105b01ab770a7c738cb9a231457a809fcc7" dependencies = [ "ipnetwork", - "libc 0.2.125", + "libc 0.2.132", "pnet_base", "pnet_sys", "winapi 0.2.8", @@ -3771,7 +3785,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82f881a6d75ac98c5541db6144682d1773bb14c6fc50c6ebac7086c8f7f23c29" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.2.8", "ws2_32-sys", ] @@ -3785,9 +3799,9 @@ dependencies = [ "cfg-if 1.0.0", "findshlibs", "inferno", - "libc 0.2.125", + "libc 0.2.132", "log", - "nix", + "nix 0.24.1", "once_cell", "parking_lot 0.12.0", "protobuf", @@ -3859,7 +3873,7 @@ dependencies = [ "byteorder", "hex 0.4.2", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -3868,7 +3882,7 @@ version = "0.4.2" source = "git+https://github.com/tikv/procinfo-rs?rev=6599eb9dca74229b2c1fcc44118bef7eff127128#6599eb9dca74229b2c1fcc44118bef7eff127128" dependencies = [ "byteorder", - "libc 0.2.125", + "libc 0.2.132", "nom 2.2.1", "rustc_version 0.2.3", ] @@ -3893,7 +3907,7 @@ dependencies = [ "cfg-if 1.0.0", "fnv", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "memchr", "parking_lot 0.11.1", "protobuf", @@ -4042,7 +4056,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#6a6fe3bd2e0a1ca0b4fc643800ddc93abe74cd87" +source = "git+https://github.com/tikv/raft-engine.git#9751c6dd5c20a056570c9fbfe62bad6e0d585094" dependencies = [ "byteorder", "crc32fast", @@ -4053,11 +4067,11 @@ dependencies = [ "hex 0.4.2", "if_chain", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "log", "lz4-sys", "memmap2", - "nix", + "nix 0.25.0", "num-derive", "num-traits", "parking_lot 0.12.0", @@ -4076,7 +4090,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.2.0" -source = "git+https://github.com/tikv/raft-engine.git#6a6fe3bd2e0a1ca0b4fc643800ddc93abe74cd87" +source = "git+https://github.com/tikv/raft-engine.git#9751c6dd5c20a056570c9fbfe62bad6e0d585094" dependencies = [ "clap 3.1.6", "env_logger", @@ -4224,7 +4238,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" dependencies = [ "fuchsia-cprng", - "libc 0.2.125", + "libc 0.2.132", "rand_core 0.3.1", "rdrand", "winapi 0.3.9", @@ -4237,7 +4251,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ "getrandom 0.1.12", - "libc 0.2.125", + "libc 0.2.132", "rand_chacha 0.2.1", "rand_core 0.5.1", "rand_hc 0.2.0", @@ -4249,7 +4263,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "rand_chacha 0.3.0", "rand_core 0.6.2", "rand_hc 0.3.0", @@ -4529,7 +4543,7 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "log", "online_config", "pdqselect", @@ -4592,7 +4606,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b72b84d47e8ec5a4f2872e8262b8f8256c5be1c938a7d6d3a867a3ba8f722f74" dependencies = [ "cc", - "libc 0.2.125", + "libc 0.2.132", "once_cell", "spin", "untrusted", @@ -4605,7 +4619,7 @@ name = "rocksdb" version = "0.3.0" source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "librocksdb_sys", ] @@ -4877,7 +4891,7 @@ dependencies = [ "bitflags", "core-foundation", "core-foundation-sys", - "libc 0.2.125", + "libc 0.2.132", "security-framework-sys", ] @@ -4888,7 +4902,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3676258fd3cfe2c9a0ec99ce3038798d847ce3e4bb17746373eb9f0f1ac16339" dependencies = [ "core-foundation-sys", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -5084,7 +5098,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.125", + "libc 0.2.132", "log", "log_wrappers", "pd_client", @@ -5142,7 +5156,7 @@ version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "signal-hook-registry", ] @@ -5152,7 +5166,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -5255,7 +5269,7 @@ version = "0.1.0" source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", - "libc 0.2.125", + "libc 0.2.132", "pkg-config", ] @@ -5283,7 +5297,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "winapi 0.3.9", ] @@ -5487,7 +5501,7 @@ dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", "doc-comment", - "libc 0.2.125", + "libc 0.2.132", "ntapi", "once_cell", "rayon", @@ -5570,7 +5584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.125", + "libc 0.2.132", "rand 0.8.3", "redox_syscall 0.2.11", "remove_dir_all", @@ -5786,7 +5800,7 @@ dependencies = [ "hyper", "keys", "kvproto", - "libc 0.2.125", + "libc 0.2.132", "log_wrappers", "more-asserts", "online_config", @@ -6078,7 +6092,7 @@ dependencies = [ "keys", "kvproto", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "libloading", "log", "log_wrappers", @@ -6174,7 +6188,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.125", + "libc 0.2.132", "log", "log_wrappers", "pd_client", @@ -6209,7 +6223,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "paste", "tikv-jemalloc-sys", ] @@ -6222,7 +6236,7 @@ checksum = "aeab4310214fe0226df8bfeb893a291a58b19682e8a07e1e1d4483ad4200d315" dependencies = [ "cc", "fs_extra", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -6231,7 +6245,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "tikv-jemalloc-sys", ] @@ -6254,7 +6268,7 @@ version = "0.1.0" dependencies = [ "fxhash", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "mimalloc", "snmalloc-rs", "tcmalloc", @@ -6320,10 +6334,10 @@ dependencies = [ "http", "kvproto", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", "log", "log_wrappers", - "nix", + "nix 0.24.1", "num-traits", "num_cpus", "online_config", @@ -6367,7 +6381,7 @@ version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.125", + "libc 0.2.132", "redox_syscall 0.1.56", "winapi 0.3.9", ] @@ -6409,7 +6423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" dependencies = [ "bytes", - "libc 0.2.125", + "libc 0.2.132", "memchr", "mio 0.8.0", "num_cpus", @@ -6800,7 +6814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "055058552ca15c566082fc61da433ae678f78986a6f16957e33162d1b218792a" dependencies = [ "kernel32-sys", - "libc 0.2.125", + "libc 0.2.132", "winapi 0.2.8", ] @@ -6983,7 +6997,7 @@ checksum = "2a5a7e487e921cf220206864a94a89b6c6905bfc19f1057fa26a4cb360e5c1d2" dependencies = [ "either", "lazy_static", - "libc 0.2.125", + "libc 0.2.132", ] [[package]] @@ -7155,5 +7169,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", - "libc 0.2.125", + "libc 0.2.132", ] diff --git a/etc/config-template.toml b/etc/config-template.toml index 558612151ec..674eaa1a149 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -1092,16 +1092,16 @@ ## 1: Can be read by TiKV release 6.1 and above. ## 2: Can be read by TiKV release 6.3 and above. Supports log recycling. ## -## Default: 1. -# format-version = 1 +## Default: 2. +# format-version = 2 ## Whether to recycle stale log files in Raft Engine. ## If `true`, logically purged log files will be reserved for recycling. ## Only available for `format-version` >= 2. This option is only ## available when TiKV >= 6.3.x. ## -## Default: false. -# enable-log-recycle = false +## Default: true. +# enable-log-recycle = true [security] ## The path for TLS certificates. Empty string means disabling secure connections. From e8679338dcf63f152dbc525f4c7d44bbc0743df4 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Fri, 2 Sep 2022 14:32:25 +0800 Subject: [PATCH 0184/1149] causal_ts: add benchmark (#13389) ref tikv/tikv#12794, ref tikv/tikv#12970 1. Add benchmark for `causal_ts.BatchTsoProvider`. 2. Change implementation of `TestPdClient.batch_get_tso` to meet interface convention of real PD. 3. Remove "TODO" of making batch removal async for `causal_ts.TsoBatchList`. Signed-off-by: pingyu Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/causal_ts/Cargo.toml | 6 ++ components/causal_ts/benches/tso.rs | 123 ++++++++++++++++++++++++++ components/causal_ts/src/tso.rs | 13 +-- components/test_raftstore/src/pd.rs | 36 +++++++- components/txn_types/src/lib.rs | 2 +- components/txn_types/src/timestamp.rs | 2 +- 7 files changed, 173 insertions(+), 10 deletions(-) create mode 100644 components/causal_ts/benches/tso.rs diff --git a/Cargo.lock b/Cargo.lock index 9463bbd717b..bf5a40762e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -722,6 +722,7 @@ name = "causal_ts" version = "0.0.1" dependencies = [ "api_version", + "criterion", "engine_rocks", "engine_traits", "error_code", diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index b1ad4ed449a..7505a043a69 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -34,4 +34,10 @@ tokio = { version = "1", features = ["sync"] } txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] +criterion = "0.3" test_raftstore = { path = "../test_raftstore" } + +[[bench]] +name = "tso" +path = "benches/tso.rs" +harness = false diff --git a/components/causal_ts/benches/tso.rs b/components/causal_ts/benches/tso.rs new file mode 100644 index 00000000000..86d7ed9b9ea --- /dev/null +++ b/components/causal_ts/benches/tso.rs @@ -0,0 +1,123 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{sync::Arc, time::Duration}; + +use causal_ts::{BatchTsoProvider, CausalTsProvider, TsoBatchList}; +use criterion::*; +use futures::executor::block_on; +use test_raftstore::TestPdClient; +use txn_types::TimeStamp; + +fn bench_batch_tso_list_pop(c: &mut Criterion) { + const CAPACITY: u64 = 10_000; + let cases = vec![("100", 100), ("10k", 10_000)]; // (id, batch_size) + + let bench_func = |b: &mut Bencher<'_>, batch_size: u64| { + let batch_list = TsoBatchList::new(CAPACITY as u32); + b.iter_batched( + || { + batch_list.flush(); + for i in 0..CAPACITY { + batch_list + .push( + batch_size as u32, + TimeStamp::compose(i as u64, batch_size), + false, + ) + .unwrap(); + } + }, + |_| { + black_box(batch_list.pop(None).unwrap()); + }, + BatchSize::NumIterations(CAPACITY * batch_size), + ) + }; + + let mut group = c.benchmark_group("batch_tso_list_pop"); + for (id, batch_size) in cases { + group.bench_function(id, |b| { + bench_func(b, batch_size); + }); + } +} + +fn bench_batch_tso_list_push(c: &mut Criterion) { + const BATCH_SIZE: u64 = 8192; + let cases = vec![("50", 50), ("1024", 1024)]; // (id, capacity) + + let bench_func = |b: &mut Bencher<'_>, capacity: u64| { + let batch_list = TsoBatchList::new(capacity as u32); + let mut i = 0; + b.iter(|| { + i += 1; + black_box( + batch_list + .push( + BATCH_SIZE as u32, + TimeStamp::compose(i as u64, BATCH_SIZE), + false, + ) + .unwrap(), + ); + }) + }; + + let mut group = c.benchmark_group("batch_tso_list_push"); + for (id, capacity) in cases { + group.bench_function(id, |b| { + bench_func(b, capacity); + }); + } +} + +fn bench_batch_tso_provider_get_ts(c: &mut Criterion) { + let pd_cli = Arc::new(TestPdClient::new(1, false)); + + // Disable background renew by setting `renew_interval` to 0 to make test result + // stable. + let provider = block_on(BatchTsoProvider::new_opt( + pd_cli, + Duration::ZERO, + Duration::from_secs(1), // cache_multiplier = 10 + 100, + 80000, + )) + .unwrap(); + + c.bench_function("bench_batch_tso_provider_get_ts", |b| { + b.iter(|| { + black_box(provider.get_ts().unwrap()); + }) + }); +} + +fn bench_batch_tso_provider_flush(c: &mut Criterion) { + let pd_cli = Arc::new(TestPdClient::new(1, false)); + + // Disable background renew by setting `renew_interval` to 0 to make test result + // stable. + let provider = block_on(BatchTsoProvider::new_opt( + pd_cli, + Duration::ZERO, + Duration::from_secs(1), // cache_multiplier = 10 + 100, + 80000, + )) + .unwrap(); + + c.bench_function("bench_batch_tso_provider_flush", |b| { + b.iter(|| { + black_box(provider.flush()).unwrap(); + }) + }); +} + +criterion_group!( + benches, + bench_batch_tso_list_pop, + bench_batch_tso_list_push, + bench_batch_tso_provider_get_ts, + bench_batch_tso_provider_flush, +); +criterion_main!(benches); diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 6eabf8bf351..3bb0034af8f 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -140,7 +140,7 @@ impl TsoBatch { /// the scenario of leader transfer). Other regions without the requirement can /// still use older TSO cache. #[derive(Default, Debug)] -struct TsoBatchList { +pub struct TsoBatchList { inner: RwLock, /// Number of remaining (available) TSO. @@ -191,7 +191,6 @@ impl TsoBatchList { usage } - // TODO: make it async fn remove_batch(&self, key: u64) { if let Some(batch) = self.inner.write().remove(&key) { self.tso_remain @@ -218,7 +217,9 @@ impl TsoBatchList { self.tso_usage.fetch_add(1, Ordering::Relaxed); self.tso_remain.fetch_sub(1, Ordering::Relaxed); if is_used_up { - // TODO: make it async + // Note: do NOT try to make it async. + // According to benchmark, `remove_batch` can be done in ~50ns, while async + // implemented by `Worker` costs ~1us. self.remove_batch(key); } return Some(ts); @@ -253,8 +254,10 @@ impl TsoBatchList { .fetch_add(batch_size as i32, Ordering::Relaxed); } - // remove items out of capacity limitation. - // TODO: make it async + // Remove items out of capacity limitation. + // Note: do NOT try to make it async. + // According to benchmark, `write().pop_first()` can be done in ~50ns, while + // async implemented by `Worker` costs ~1us. if self.inner.read().len() > self.capacity as usize { if let Some((_, batch)) = self.inner.write().pop_first() { self.tso_remain diff --git a/components/test_raftstore/src/pd.rs b/components/test_raftstore/src/pd.rs index 33241862e07..75ea189c312 100644 --- a/components/test_raftstore/src/pd.rs +++ b/components/test_raftstore/src/pd.rs @@ -46,7 +46,7 @@ use tikv_util::{ Either, HandyRwLock, }; use tokio_timer::timer::Handle; -use txn_types::TimeStamp; +use txn_types::{TimeStamp, TSO_PHYSICAL_SHIFT_BITS}; use super::*; @@ -1698,8 +1698,38 @@ impl PdClient for TestPdClient { )), ))); } - let tso = self.tso.fetch_add(count as u64, Ordering::SeqCst); - Box::pin(ok(TimeStamp::new(tso + count as u64))) + + assert!(count > 0); + assert!(count < (1 << TSO_PHYSICAL_SHIFT_BITS)); + + let mut old_tso = self.tso.load(Ordering::SeqCst); + loop { + let ts: TimeStamp = old_tso.into(); + + // Add to logical part first. + let (mut physical, mut logical) = (ts.physical(), ts.logical() + count as u64); + + // When logical part is overflow, add to physical part. + // Moreover, logical part must not less than `count-1`, as the + // generated batch of TSO is treated as of the same physical time. + // Refer to real PD's implementation: + // https://github.com/tikv/pd/blob/v6.2.0/server/tso/tso.go#L361 + if logical >= (1 << TSO_PHYSICAL_SHIFT_BITS) { + physical += 1; + logical = (count - 1) as u64; + } + + let new_tso = TimeStamp::compose(physical, logical); + match self.tso.compare_exchange_weak( + old_tso, + new_tso.into_inner(), + Ordering::SeqCst, + Ordering::SeqCst, + ) { + Ok(_) => return Box::pin(ok(new_tso)), + Err(x) => old_tso = x, + } + } } fn update_service_safe_point( diff --git a/components/txn_types/src/lib.rs b/components/txn_types/src/lib.rs index be99fcc30c4..2f018c23923 100644 --- a/components/txn_types/src/lib.rs +++ b/components/txn_types/src/lib.rs @@ -16,7 +16,7 @@ use std::io; use error_code::{self, ErrorCode, ErrorCodeExt}; pub use lock::{Lock, LockType, PessimisticLock}; use thiserror::Error; -pub use timestamp::{TimeStamp, TsSet}; +pub use timestamp::{TimeStamp, TsSet, TSO_PHYSICAL_SHIFT_BITS}; pub use types::{ is_short_value, Key, KvPair, Mutation, MutationType, OldValue, OldValues, TxnExtra, TxnExtraScheduler, Value, WriteBatchFlags, SHORT_VALUE_MAX_LEN, diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index 946ccfbbdcb..fb0cd900123 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -12,7 +12,7 @@ use collections::HashSet; #[repr(transparent)] pub struct TimeStamp(u64); -const TSO_PHYSICAL_SHIFT_BITS: u64 = 18; +pub const TSO_PHYSICAL_SHIFT_BITS: u64 = 18; impl TimeStamp { /// Create a time stamp from physical and logical components. From b8315adf8dbd0c594d40d30c726a80de4c55c100 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 2 Sep 2022 17:08:25 +0800 Subject: [PATCH 0185/1149] service: add the kv_flashback_to_version interface (#13378) ref tikv/tikv#13303 Implement the `kv_flashback_to_version` interface. Signed-off-by: JmPotato --- Cargo.lock | 2 +- components/test_raftstore/src/cluster.rs | 7 ++ components/test_raftstore/src/util.rs | 1 - src/server/metrics.rs | 1 + src/server/service/kv.rs | 96 ++++++++++++++- src/storage/txn/commands/mod.rs | 12 ++ src/storage/txn/scheduler.rs | 6 +- tests/integrations/server/kv_service.rs | 149 +++++++++++++++++++++++ 8 files changed, 269 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf5a40762e4..34795afc974 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2628,7 +2628,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#9cc5e1ddfda3aec6eddfc09de1d0072ebbd7bb21" +source = "git+https://github.com/pingcap/kvproto.git#f95ac338b3312e0a9bd7c33c9647a87a74314567" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 79f0b8ef709..9b5aa1a6646 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1334,6 +1334,13 @@ impl Cluster { } } + pub fn try_transfer_leader(&mut self, region_id: u64, leader: metapb::Peer) -> RaftCmdResponse { + let epoch = self.get_region_epoch(region_id); + let transfer_leader = new_admin_request(region_id, &epoch, new_transfer_leader_cmd(leader)); + self.call_command_on_leader(transfer_leader, Duration::from_secs(5)) + .unwrap() + } + pub fn get_snap_dir(&self, node_id: u64) -> String { self.sim.rl().get_snap_dir(node_id) } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 9b653ac2096..882095c5a7d 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -931,7 +931,6 @@ pub fn must_kv_prewrite_with( ); } -// Disk full test interface. pub fn try_kv_prewrite_with( client: &TikvClient, ctx: Context, diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 86ca07f38b4..6df6f0e96a8 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -35,6 +35,7 @@ make_auto_flush_static_metric! { kv_resolve_lock, kv_gc, kv_delete_range, + kv_flashback_to_version, raw_get, raw_batch_get, raw_batch_get_command, diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 79fbd9c6624..ab2fc41c47c 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -6,6 +6,7 @@ use std::{mem, sync::Arc}; use api_version::KvFormat; use fail::fail_point; use futures::{ + channel::oneshot, compat::Future01CompatExt, future::{self, Future, FutureExt, TryFutureExt}, sink::SinkExt, @@ -31,7 +32,7 @@ use raftstore::{ store::{ memory::{MEMTRACE_APPLYS, MEMTRACE_RAFT_ENTRIES, MEMTRACE_RAFT_MESSAGES}, metrics::RAFT_ENTRIES_CACHES_GAUGE, - Callback, CasualMessage, CheckLeaderTask, RaftCmdExtraOpts, + Callback, CasualMessage, CheckLeaderTask, RaftCmdExtraOpts, SignificantMsg, }, DiscardReason, Error as RaftStoreError, Result as RaftStoreResult, }; @@ -400,6 +401,37 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ); } + fn kv_flashback_to_version( + &mut self, + ctx: RpcContext<'_>, + mut req: FlashbackToVersionRequest, + sink: UnarySink, + ) { + let begin_instant = Instant::now(); + + let source = req.mut_context().take_request_source(); + let resp = future_flashback_to_version(&self.storage, &self.ch, req); + let task = async move { + let resp = resp.await?; + let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; + GRPC_MSG_HISTOGRAM_STATIC + .kv_flashback_to_version + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); + ServerResult::Ok(()) + } + .map_err(|e| { + log_net_error!(e, "kv rpc failed"; + "request" => stringify!($fn_name) + ); + GRPC_MSG_FAIL_COUNTER.kv_flashback_to_version.inc(); + }) + .map(|_| ()); + + ctx.spawn(task); + } + fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.mut_context().take_request_source(); @@ -1026,6 +1058,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let copr_v2 = self.copr_v2.clone(); let pool_size = storage.get_normal_pool_size(); let batch_builder = BatcherBuilder::new(self.enable_req_batch, pool_size); + let ch = self.ch.clone(); let request_handler = stream.try_for_each(move |mut req| { let request_ids = req.take_request_ids(); let requests: Vec<_> = req.take_requests().into(); @@ -1042,6 +1075,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor id, req, &tx, + &ch, ); if let Some(batch) = batcher.as_mut() { batch.maybe_commit(&storage, &tx); @@ -1242,7 +1276,12 @@ fn response_batch_commands_request( poll_future_notify(task); } -fn handle_batch_commands_request( +fn handle_batch_commands_request< + T: RaftStoreRouter + 'static, + E: Engine, + L: LockManager, + F: KvFormat, +>( batcher: &mut Option, storage: &Storage, copr: &Endpoint, @@ -1251,6 +1290,7 @@ fn handle_batch_commands_request( id: u64, req: batch_commands_request::Request, tx: &Sender, + ch: &T, ) { // To simplify code and make the logic more clear. macro_rules! oneof { @@ -1353,6 +1393,7 @@ fn handle_batch_commands_request( ResolveLock, future_resolve_lock(storage), kv_resolve_lock; Gc, future_gc(), kv_gc; DeleteRange, future_delete_range(storage), kv_delete_range; + FlashbackToVersion, future_flashback_to_version(storage, ch), kv_flashback_to_version; RawBatchGet, future_raw_batch_get(storage), raw_batch_get; RawPut, future_raw_put(storage), raw_put; RawBatchPut, future_raw_batch_put(storage), raw_batch_put; @@ -1645,6 +1686,57 @@ fn future_delete_range( } } +fn future_flashback_to_version< + T: RaftStoreRouter + 'static, + E: Engine, + L: LockManager, + F: KvFormat, +>( + storage: &Storage, + raft_router: &T, + req: FlashbackToVersionRequest, +) -> impl Future> { + let storage_clone = storage.clone(); + let raft_router_clone = raft_router.clone(); + async move { + // Send a `SignificantMsg::PrepareFlashback` to prepare the raftstore for the + // later flashback. This will first block all scheduling, read and write + // operations and then wait for the latest Raft log to be applied before + // we start the flashback command. + let region_id = req.get_context().get_region_id(); + let (result_tx, result_rx) = oneshot::channel(); + raft_router_clone + .significant_send(region_id, SignificantMsg::PrepareFlashback(result_tx))?; + if !result_rx.await? { + return Err(Error::Other(box_err!( + "failed to prepare the region {} for flashback", + region_id + ))); + } + let (cb, f) = paired_future_callback(); + let res = storage_clone.sched_txn_command(req.into(), cb); + // Avoid crossing `.await` to bypass the `Send` constraint. + drop(storage_clone); + let v = match res { + Err(e) => Err(e), + Ok(_) => f.await?, + }; + fail_point!("skip_finish_flashback_to_version", |_| { + Ok(FlashbackToVersionResponse::default()) + }); + // Send a `SignificantMsg::FinishFlashback` to notify the raftstore that the + // flashback has been finished. + raft_router_clone.significant_send(region_id, SignificantMsg::FinishFlashback)?; + let mut resp = FlashbackToVersionResponse::default(); + if let Some(err) = extract_region_error(&v) { + resp.set_region_error(err); + } else if let Err(e) = v { + resp.set_error(format!("{}", e)); + } + Ok(resp) + } +} + fn future_raw_get( storage: &Storage, mut req: RawGetRequest, diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index a204ab4f30f..c15b27deb66 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -351,6 +351,18 @@ impl From for TypedCommand> { } } +impl From for TypedCommand<()> { + fn from(mut req: FlashbackToVersionRequest) -> Self { + FlashbackToVersionReadPhase::new( + req.get_version().into(), + Some(Key::from_raw(req.get_end_key())), + Some(Key::from_raw(req.get_start_key())), + Some(Key::from_raw(req.get_start_key())), + req.take_context(), + ) + } +} + #[derive(Default)] pub(super) struct ReleasedLocks { start_ts: TimeStamp, diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index a72bd671d0a..a7c38e147ee 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -568,7 +568,11 @@ impl Scheduler { pb_ctx: task.cmd.ctx(), ..Default::default() }; - if let Command::FlashbackToVersionReadPhase { .. } = task.cmd { + if matches!( + task.cmd, + Command::FlashbackToVersionReadPhase { .. } + | Command::FlashbackToVersion { .. } + ) { snap_ctx.for_flashback = true; } // The program is currently in scheduler worker threads. diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 262060b4491..d60edf7bc97 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -596,6 +596,155 @@ fn test_mvcc_resolve_lock_gc_and_delete() { assert!(del_resp.error.is_empty()); } +#[test] +fn test_mvcc_flashback() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let mut ts = 0; + let k = b"key".to_vec(); + for i in 0..10 { + let v = format!("value@{}", i).into_bytes(); + // Prewrite + ts += 1; + let prewrite_start_version = ts; + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v.clone()); + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation], + k.clone(), + prewrite_start_version, + ); + // Commit + ts += 1; + let commit_version = ts; + must_kv_commit( + &client, + ctx.clone(), + vec![k.clone()], + prewrite_start_version, + commit_version, + commit_version, + ); + // Get + ts += 1; + must_kv_read_equal(&client, ctx.clone(), k.clone(), v.clone(), ts) + } + // Prewrite to leave a lock. + ts += 1; + let prewrite_start_version = ts; + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(b"value@latest".to_vec()); + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation], + k.clone(), + prewrite_start_version, + ); + ts += 1; + let get_version = ts; + let mut get_req = GetRequest::default(); + get_req.set_context(ctx.clone()); + get_req.key = k.clone(); + get_req.version = get_version; + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(!get_resp.has_region_error()); + assert!(get_resp.get_error().has_locked()); + assert!(get_resp.value.is_empty()); + // Flashback + let mut flashback_to_version_req = FlashbackToVersionRequest::default(); + flashback_to_version_req.set_context(ctx.clone()); + flashback_to_version_req.version = 5; + flashback_to_version_req.start_key = b"a".to_vec(); + flashback_to_version_req.end_key = b"z".to_vec(); + let flashback_resp = client + .kv_flashback_to_version(&flashback_to_version_req) + .unwrap(); + assert!(!flashback_resp.has_region_error()); + assert!(flashback_resp.get_error().is_empty()); + // Should not meet the lock and can not get the latest data any more. + must_kv_read_equal(&client, ctx, k, b"value@1".to_vec(), ts); +} + +#[test] +#[cfg(feature = "failpoints")] +fn test_mvcc_flashback_block_rw() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); + // Flashback + let mut flashback_to_version_req = FlashbackToVersionRequest::default(); + flashback_to_version_req.set_context(ctx.clone()); + flashback_to_version_req.version = 0; + flashback_to_version_req.start_key = b"a".to_vec(); + flashback_to_version_req.end_key = b"z".to_vec(); + let flashback_resp = client + .kv_flashback_to_version(&flashback_to_version_req) + .unwrap(); + assert!(!flashback_resp.has_region_error()); + assert!(flashback_resp.get_error().is_empty()); + // Try to read. + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + // Get + let mut get_req = GetRequest::default(); + get_req.set_context(ctx.clone()); + get_req.key = k.clone(); + get_req.version = 1; + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(get_resp.get_region_error().has_recovery_in_progress()); + assert!(!get_resp.has_error()); + assert!(get_resp.value.is_empty()); + // Scan + let mut scan_req = ScanRequest::default(); + scan_req.set_context(ctx.clone()); + scan_req.start_key = k.clone(); + scan_req.limit = 1; + scan_req.version = 1; + let scan_resp = client.kv_scan(&scan_req).unwrap(); + assert!(scan_resp.get_region_error().has_recovery_in_progress()); + assert!(scan_resp.pairs.is_empty()); + // Try to write. + // Prewrite + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v); + let prewrite_resp = try_kv_prewrite(&client, ctx, vec![mutation], k, 1); + assert!(prewrite_resp.get_region_error().has_recovery_in_progress()); + fail::remove("skip_finish_flashback_to_version"); +} + +#[test] +#[cfg(feature = "failpoints")] +fn test_mvcc_flashback_block_scheduling() { + let (mut cluster, client, ctx) = must_new_cluster_and_kv_client(); + fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); + // Flashback + let mut flashback_to_version_req = FlashbackToVersionRequest::default(); + flashback_to_version_req.set_context(ctx); + flashback_to_version_req.version = 0; + flashback_to_version_req.start_key = b"a".to_vec(); + flashback_to_version_req.end_key = b"z".to_vec(); + let flashback_resp = client + .kv_flashback_to_version(&flashback_to_version_req) + .unwrap(); + assert!(!flashback_resp.has_region_error()); + assert!(flashback_resp.get_error().is_empty()); + // Try to transfer leader. + let transfer_leader_resp = cluster.try_transfer_leader(1, new_peer(2, 2)); + assert!( + transfer_leader_resp + .get_header() + .get_error() + .has_recovery_in_progress() + ); + fail::remove("skip_finish_flashback_to_version"); +} + // raft related RPC is tested as parts of test_snapshot.rs, so skip here. #[test] From d471b933d6eaaa041b32ab0613cf844294567d2e Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Mon, 5 Sep 2022 14:52:55 +0800 Subject: [PATCH 0186/1149] engine: toggle purge worker with `need_manual_purge` (#13253) ref tikv/tikv#11119 None Signed-off-by: tabokie --- components/engine_panic/src/raft_engine.rs | 6 ++- components/engine_rocks/src/raft_engine.rs | 4 -- components/engine_traits/src/raft_engine.rs | 8 ++- components/raft_log_engine/src/engine.rs | 6 ++- components/raftstore/src/store/fsm/store.rs | 56 ++++++++++++--------- 5 files changed, 48 insertions(+), 32 deletions(-) diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 2fffb544fe3..bb501007a76 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -114,7 +114,11 @@ impl RaftEngine for PanicEngine { panic!() } - fn purge_expired_files(&self) -> Result> { + fn need_manual_purge(&self) -> bool { + panic!() + } + + fn manual_purge(&self) -> Result> { panic!() } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index f1e86903e9d..605ef4c5514 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -312,10 +312,6 @@ impl RaftEngine for RocksEngine { Ok(total) } - fn purge_expired_files(&self) -> Result> { - Ok(vec![]) - } - fn flush_metrics(&self, instance: &str) { KvEngine::flush_metrics(self, instance) } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index a7bd66d3230..e64bbe18018 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -115,9 +115,15 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send Ok(total) } + fn need_manual_purge(&self) -> bool { + false + } + /// Purge expired logs files and return a set of Raft group ids /// which needs to be compacted ASAP. - fn purge_expired_files(&self) -> Result>; + fn manual_purge(&self) -> Result> { + unimplemented!() + } fn flush_metrics(&self, _instance: &str) {} fn flush_stats(&self) -> Option { diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 8991a6f6838..2cd27d89538 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -587,7 +587,11 @@ impl RaftEngine for RaftLogEngine { Ok(total as usize) } - fn purge_expired_files(&self) -> Result> { + fn need_manual_purge(&self) -> bool { + true + } + + fn manual_purge(&self) -> Result> { self.0.purge_expired_files().map_err(transfer_error) } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index d6faf92ca85..5743b0ec3a5 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1385,9 +1385,9 @@ struct Workers { // blocking operation, which can take an extensive amount of time. cleanup_worker: Worker, region_worker: Worker, - // Used for calling `purge_expired_files`, which can be time-consuming for certain - // engine implementations. - purge_worker: Worker, + // Used for calling `manual_purge` if the specific engine implementation requires it + // (`need_manual_purge`). + purge_worker: Option, raftlog_fetch_worker: Worker, @@ -1452,12 +1452,36 @@ impl RaftBatchSystem { .registry .register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); + let purge_worker = if engines.raft.need_manual_purge() { + let worker = Worker::new("purge-worker"); + let raft_clone = engines.raft.clone(); + let router_clone = self.router(); + worker.spawn_interval_task(cfg.value().raft_engine_purge_interval.0, move || { + match raft_clone.manual_purge() { + Ok(regions) => { + for region_id in regions { + let _ = router_clone.send( + region_id, + PeerMsg::CasualMessage(CasualMessage::ForceCompactRaftLogs), + ); + } + } + Err(e) => { + warn!("purge expired files"; "err" => %e); + } + }; + }); + Some(worker) + } else { + None + }; + let workers = Workers { pd_worker, background_worker, cleanup_worker: Worker::new("cleanup-worker"), region_worker: Worker::new("region-worker"), - purge_worker: Worker::new("purge-worker"), + purge_worker, raftlog_fetch_worker: Worker::new("raftlog-fetch-worker"), coprocessor_host: coprocessor_host.clone(), refresh_config_worker: LazyWorker::new("refreash-config-worker"), @@ -1484,26 +1508,6 @@ impl RaftBatchSystem { let raftlog_gc_scheduler = workers .background_worker .start_with_timer("raft-gc-worker", raftlog_gc_runner); - let router_clone = self.router(); - let engines_clone = engines.clone(); - workers.purge_worker.spawn_interval_task( - cfg.value().raft_engine_purge_interval.0, - move || { - match engines_clone.raft.purge_expired_files() { - Ok(regions) => { - for region_id in regions { - let _ = router_clone.send( - region_id, - PeerMsg::CasualMessage(CasualMessage::ForceCompactRaftLogs), - ); - } - } - Err(e) => { - warn!("purge expired files"; "err" => %e); - } - }; - }, - ); let raftlog_fetch_scheduler = workers.raftlog_fetch_worker.start( "raftlog-fetch-worker", @@ -1711,7 +1715,9 @@ impl RaftBatchSystem { workers.cleanup_worker.stop(); workers.region_worker.stop(); workers.background_worker.stop(); - workers.purge_worker.stop(); + if let Some(w) = workers.purge_worker { + w.stop(); + } workers.refresh_config_worker.stop(); workers.raftlog_fetch_worker.stop(); } From 14a8a9c4522e5be7a5e571dbaffe0278ec87bbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 5 Sep 2022 15:04:55 +0800 Subject: [PATCH 0187/1149] log-backup: fixed pessimistic lock in initial scanning (#13354) close tikv/tikv#13304 Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- components/backup-stream/src/event_loader.rs | 4 +- components/backup-stream/tests/mod.rs | 73 +++++++++++++++++++- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 0f83d4726e4..fc34b65eead 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -152,7 +152,9 @@ impl EventLoader { ) })?; debug!("meet lock during initial scanning."; "key" => %utils::redact(&lock_at), "ts" => %lock.ts); - resolver.track_phase_one_lock(lock.ts, lock_at) + if utils::should_track_lock(&lock) { + resolver.track_phase_one_lock(lock.ts, lock_at); + } } TxnEntry::Commit { default, write, .. } => { result.push(ApplyEvent { diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index c5d3442fb84..4a437421dac 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -44,8 +44,12 @@ use txn_types::{Key, TimeStamp, WriteRef}; use walkdir::WalkDir; fn mutation(k: Vec, v: Vec) -> Mutation { + mutation_op(k, v, Op::Put) +} + +fn mutation_op(k: Vec, v: Vec, op: Op) -> Mutation { let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); + mutation.set_op(op); mutation.key = k; mutation.value = v; mutation @@ -419,6 +423,36 @@ impl Suite { // Copy & Paste from cdc::tests::TestSuite, maybe make it a mixin? impl Suite { + pub fn tso(&self) -> TimeStamp { + run_async_test(self.cluster.pd_client.get_tso()).unwrap() + } + + pub fn must_kv_pessimistic_lock( + &mut self, + region_id: u64, + keys: Vec>, + ts: TimeStamp, + pk: Vec, + ) { + let mut lock_req = PessimisticLockRequest::new(); + lock_req.set_context(self.get_context(region_id)); + let mut mutations = vec![]; + for key in keys { + mutations.push(mutation_op(key, vec![], Op::PessimisticLock)); + } + lock_req.set_mutations(mutations.into()); + lock_req.primary_lock = pk; + lock_req.start_version = ts.into_inner(); + lock_req.lock_ttl = ts.into_inner() + 1; + let resp = self + .get_tikv_client(region_id) + .kv_pessimistic_lock(&lock_req) + .unwrap(); + + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); + } + pub fn must_kv_prewrite( &mut self, region_id: u64, @@ -1020,4 +1054,41 @@ mod test { regions ); } + + /// This test case tests whether we correctly handle the pessimistic locks. + #[test] + fn pessimistic_lock() { + let mut suite = SuiteBuilder::new_named("pessimistic_lock").nodes(3).build(); + suite.must_kv_pessimistic_lock( + 1, + vec![make_record_key(1, 42)], + suite.tso(), + make_record_key(1, 42), + ); + suite.must_register_task(1, "pessimistic_lock"); + suite.must_kv_pessimistic_lock( + 1, + vec![make_record_key(1, 43)], + suite.tso(), + make_record_key(1, 43), + ); + let expected_tso = suite.tso().into_inner(); + suite.force_flush_files("pessimistic_lock"); + suite.wait_for_flush(); + std::thread::sleep(Duration::from_secs(1)); + let checkpoint = run_async_test( + suite + .get_meta_cli() + .global_progress_of_task("pessimistic_lock"), + ) + .unwrap(); + // The checkpoint should be advanced: because PiTR is "Read" operation, + // which shouldn't be blocked by pessimistic locks. + assert!( + checkpoint > expected_tso, + "expected = {}; checkpoint = {}", + expected_tso, + checkpoint + ); + } } From 7d36f3490570444c944e4c7cafb7887642b695ea Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 5 Sep 2022 15:20:56 +0800 Subject: [PATCH 0188/1149] gc_worker: use async_snapshot instead of raw API in GC (#13322) close tikv/tikv#13319 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/backup-stream/Cargo.toml | 1 + components/backup-stream/src/event_loader.rs | 8 +- components/engine_panic/src/snapshot.rs | 1 + .../src/coprocessor/region_info_accessor.rs | 154 +++- components/raftstore/src/store/worker/read.rs | 1 - components/server/src/server.rs | 25 +- components/test_coprocessor/src/store.rs | 2 +- components/test_raftstore/src/server.rs | 3 +- components/test_storage/src/assert_storage.rs | 48 +- components/test_storage/src/sync_storage.rs | 34 +- components/test_storage/src/util.rs | 4 +- components/tikv_kv/src/btree_engine.rs | 8 +- components/tikv_kv/src/lib.rs | 9 +- components/tikv_kv/src/mock_engine.rs | 4 - components/tikv_kv/src/rocksdb_engine.rs | 4 - src/coprocessor/endpoint.rs | 2 +- src/lib.rs | 1 + src/server/gc_worker/compaction_filter.rs | 1 - src/server/gc_worker/gc_manager.rs | 43 +- src/server/gc_worker/gc_worker.rs | 728 ++++++++++++------ .../gc_worker/rawkv_compaction_filter.rs | 1 - src/server/raftkv.rs | 27 +- src/server/server.rs | 10 +- src/storage/mod.rs | 16 +- src/storage/mvcc/reader/reader.rs | 15 + src/storage/txn/store.rs | 6 + tests/benches/hierarchy/storage/mod.rs | 6 +- tests/benches/misc/storage/incremental_get.rs | 2 +- tests/benches/misc/storage/mvcc_reader.rs | 2 +- tests/benches/misc/storage/scan.rs | 2 +- tests/failpoints/cases/test_gc_metrics.rs | 30 +- tests/failpoints/cases/test_gc_worker.rs | 2 + .../integrations/config/dynamic/gc_worker.rs | 12 +- .../integrations/raftstore/test_lease_read.rs | 46 +- tests/integrations/server/gc_worker.rs | 34 +- tests/integrations/server/kv_service.rs | 3 +- .../integrations/storage/test_raft_storage.rs | 6 +- tests/integrations/storage/test_raftkv.rs | 2 +- .../storage/test_region_info_accessor.rs | 36 + tests/integrations/storage/test_storage.rs | 24 +- 41 files changed, 960 insertions(+), 404 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34795afc974..0aa7586a608 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -485,6 +485,7 @@ dependencies = [ "tidb_query_datatype", "tikv", "tikv_alloc", + "tikv_kv", "tikv_util", "tokio", "tokio-stream", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index e5bb889420d..6090d929291 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -59,6 +59,7 @@ thiserror = "1" tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } tikv = { path = "../../", default-features = false } tikv_alloc = { path = "../tikv_alloc" } +tikv_kv = { path = "../tikv_kv" } tikv_util = { path = "../tikv_util" } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index fc34b65eead..fc84fab0635 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -489,8 +489,10 @@ where #[cfg(test)] mod tests { + use futures::executor::block_on; use kvproto::metapb::*; - use tikv::storage::{txn::tests::*, Engine, TestEngineBuilder}; + use tikv::storage::{txn::tests::*, TestEngineBuilder}; + use tikv_kv::SnapContext; use txn_types::TimeStamp; use super::EventLoader; @@ -517,7 +519,9 @@ mod tests { r.set_id(42); r.set_start_key(b"".to_vec()); r.set_end_key(b"".to_vec()); - let snap = engine.snapshot_on_kv_engine(b"", b"").unwrap(); + + let snap = + block_on(async { tikv_kv::snapshot(&engine, SnapContext::default()).await }).unwrap(); let mut loader = EventLoader::load_from(snap, TimeStamp::zero(), TimeStamp::max(), &r).unwrap(); diff --git a/components/engine_panic/src/snapshot.rs b/components/engine_panic/src/snapshot.rs index cf651db4956..296d7ce617a 100644 --- a/components/engine_panic/src/snapshot.rs +++ b/components/engine_panic/src/snapshot.rs @@ -17,6 +17,7 @@ impl Peekable for PanicSnapshot { fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { panic!() } + fn get_value_cf_opt( &self, opts: &ReadOptions, diff --git a/components/raftstore/src/coprocessor/region_info_accessor.rs b/components/raftstore/src/coprocessor/region_info_accessor.rs index 8f9021c8e60..338cf3962c4 100644 --- a/components/raftstore/src/coprocessor/region_info_accessor.rs +++ b/components/raftstore/src/coprocessor/region_info_accessor.rs @@ -12,6 +12,7 @@ use std::{ use collections::{HashMap, HashSet}; use engine_traits::KvEngine; +use itertools::Itertools; use kvproto::metapb::Region; use raft::StateRole; use tikv_util::{ @@ -656,6 +657,10 @@ pub trait RegionInfoProvider: Send + Sync { unimplemented!() } + fn find_region_by_key(&self, _key: &[u8]) -> Result { + unimplemented!() + } + fn get_regions_in_range(&self, _start_key: &[u8], _end_key: &[u8]) -> Result> { unimplemented!() } @@ -686,6 +691,27 @@ impl RegionInfoProvider for RegionInfoAccessor { .map_err(|e| box_err!("failed to send request to region collector: {:?}", e)) } + fn find_region_by_key(&self, key: &[u8]) -> Result { + let key_in_vec = key.to_vec(); + let (tx, rx) = mpsc::channel(); + self.seek_region( + key, + Box::new(move |iter| { + if let Some(info) = iter.next() && info.region.get_start_key() <= key_in_vec.as_slice() { + if let Err(e) = tx.send(info.region.clone()) { + warn!("failed to send find_region_by_key result: {:?}", e); + } + } + }), + )?; + rx.recv().map_err(|e| { + box_err!( + "failed to receive find_region_by_key result from region collector: {:?}", + e + ) + }) + } + fn get_regions_in_range(&self, start_key: &[u8], end_key: &[u8]) -> Result> { let (tx, rx) = mpsc::channel(); let msg = RegionInfoQuery::GetRegionsInRange { @@ -712,28 +738,87 @@ impl RegionInfoProvider for RegionInfoAccessor { } // Use in tests only. -pub struct MockRegionInfoProvider(Mutex>); +// Note: The `StateRole` in RegionInfo here should not be used +pub struct MockRegionInfoProvider(Mutex>); impl MockRegionInfoProvider { pub fn new(regions: Vec) -> Self { - MockRegionInfoProvider(Mutex::new(regions)) + MockRegionInfoProvider(Mutex::new( + regions + .into_iter() + .map(|region| RegionInfo::new(region, StateRole::Leader)) + .collect_vec(), + )) } } impl Clone for MockRegionInfoProvider { fn clone(&self) -> Self { - MockRegionInfoProvider::new(self.0.lock().unwrap().clone()) + MockRegionInfoProvider::new( + self.0 + .lock() + .unwrap() + .iter() + .map(|region_info| region_info.region.clone()) + .collect_vec(), + ) } } impl RegionInfoProvider for MockRegionInfoProvider { - fn get_regions_in_range(&self, _start_key: &[u8], _end_key: &[u8]) -> Result> { - Ok(self.0.lock().unwrap().clone()) + fn get_regions_in_range(&self, start_key: &[u8], end_key: &[u8]) -> Result> { + let mut regions = Vec::new(); + let (tx, rx) = mpsc::channel(); + let end_key = RangeKey::from_end_key(end_key.to_vec()); + + self.seek_region( + start_key, + Box::new(move |iter| { + for region_info in iter { + if RangeKey::from_start_key(region_info.region.get_start_key().to_vec()) + > end_key + { + continue; + } + tx.send(region_info.region.clone()).unwrap(); + } + }), + )?; + + for region in rx { + regions.push(region); + } + Ok(regions) + } + + fn seek_region(&self, from: &[u8], callback: SeekRegionCallback) -> Result<()> { + let region_infos = self.0.lock().unwrap(); + let mut iter = region_infos.iter().filter(|®ion_info| { + RangeKey::from_end_key(region_info.region.get_end_key().to_vec()) + > RangeKey::from_start_key(from.to_vec()) + }); + callback(&mut iter); + Ok(()) + } + + fn find_region_by_key(&self, key: &[u8]) -> Result { + let region_infos = self.0.lock().unwrap(); + let key = RangeKey::from_start_key(key.to_vec()); + region_infos + .iter() + .find(|region_info| { + RangeKey::from_start_key(region_info.region.get_start_key().to_vec()) <= key + && key < RangeKey::from_end_key(region_info.region.get_end_key().to_vec()) + }) + .map(|region_info| region_info.region.clone()) + .ok_or(box_err!("Not found region containing {:?}", key)) } } #[cfg(test)] mod tests { + use txn_types::Key; + use super::*; fn new_region_collector() -> RegionCollector { @@ -1290,4 +1375,63 @@ mod tests { ], ); } + + #[test] + fn test_mock_region_info_provider() { + fn init_region(start_key: &[u8], end_key: &[u8], region_id: u64) -> Region { + let start_key = Key::from_encoded(start_key.to_vec()); + let end_key = Key::from_encoded(end_key.to_vec()); + let mut region = Region::default(); + region.set_start_key(start_key.as_encoded().clone()); + region.set_end_key(end_key.as_encoded().clone()); + region.id = region_id; + region + } + + let regions = vec![ + init_region(b"k01", b"k03", 1), + init_region(b"k05", b"k10", 2), + init_region(b"k10", b"k15", 3), + ]; + + let provider = MockRegionInfoProvider::new(regions); + + // Test ranges covering all regions + let regions = provider.get_regions_in_range(b"k01", b"k15").unwrap(); + assert!(regions.len() == 3); + assert!(regions[0].id == 1); + assert!(regions[1].id == 2); + assert!(regions[2].id == 3); + + // Test ranges covering partial regions + let regions = provider.get_regions_in_range(b"k04", b"k10").unwrap(); + assert!(regions.len() == 2); + assert!(regions[0].id == 2); + assert!(regions[1].id == 3); + + // Test seek for all regions + provider + .seek_region( + b"k02", + Box::new(|iter| { + assert!(iter.next().unwrap().region.id == 1); + assert!(iter.next().unwrap().region.id == 2); + assert!(iter.next().unwrap().region.id == 3); + assert!(iter.next().is_none()); + }), + ) + .unwrap(); + + // Test seek for partial regions + provider + .seek_region( + b"k04", + Box::new(|iter| { + assert!(iter.next().unwrap().region.id == 2); + assert!(iter.next().unwrap().region.id == 3); + assert!(iter.next().is_none()); + }), + ) + .unwrap(); + } } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index a3c3878cf68..9c5889f876e 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -782,7 +782,6 @@ where // Replica can serve stale read if and only if its `safe_ts` >= `read_ts` RequestPolicy::StaleRead => { let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); - assert!(read_ts > 0); if let Err(resp) = delegate.check_stale_read_safe(read_ts) { cb.invoke_read(resp); return; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 35a06d1321f..ca95ddaf310 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -591,22 +591,14 @@ where RaftRouter, > { let engines = self.engines.as_ref().unwrap(); - let mut gc_worker = GcWorker::new( + let gc_worker = GcWorker::new( engines.engine.clone(), self.router.clone(), self.flow_info_sender.take().unwrap(), self.config.gc.clone(), self.pd_client.feature_gate().clone(), + Arc::new(self.region_info_accessor.clone()), ); - gc_worker - .start() - .unwrap_or_else(|e| fatal!("failed to start gc worker: {}", e)); - gc_worker - .start_observe_lock_apply( - self.coprocessor_host.as_mut().unwrap(), - self.concurrency_manager.clone(), - ) - .unwrap_or_else(|e| fatal!("gc worker failed to observe lock apply: {}", e)); let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( @@ -623,7 +615,7 @@ where self.engines.as_ref().unwrap().engine.kv_engine(), self.flow_info_receiver.take().unwrap(), ))); - let gc_worker = self.init_gc_worker(); + let mut gc_worker = self.init_gc_worker(); let mut ttl_checker = Box::new(LazyWorker::new("ttl-checker")); let ttl_scheduler = ttl_checker.scheduler(); @@ -1040,7 +1032,16 @@ where self.region_info_accessor.clone(), node.id(), ); - if let Err(e) = gc_worker.start_auto_gc(auto_gc_config, safe_point) { + gc_worker + .start(node.id()) + .unwrap_or_else(|e| fatal!("failed to start gc worker: {}", e)); + gc_worker + .start_observe_lock_apply( + self.coprocessor_host.as_mut().unwrap(), + self.concurrency_manager.clone(), + ) + .unwrap_or_else(|e| fatal!("gc worker failed to observe lock apply: {}", e)); + if let Err(e) = gc_worker.start_auto_gc(&engines.engines.kv, auto_gc_config, safe_point) { fatal!("failed to start auto_gc on storage, error: {}", e); } diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index a85f75c422e..f19b0a113bd 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -132,7 +132,7 @@ impl Default for Store { impl Store { pub fn from_storage(storage: StorageApiV1) -> Self { Self { - store: SyncTestStorageApiV1::from_storage(storage, GcConfig::default()).unwrap(), + store: SyncTestStorageApiV1::from_storage(0, storage, GcConfig::default()).unwrap(), current_ts: 1.into(), last_committed_ts: TimeStamp::zero(), handles: vec![], diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 683de2e5a7d..2c1798877d9 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -337,8 +337,9 @@ impl ServerCluster { tx, cfg.gc.clone(), Default::default(), + Arc::new(region_info_accessor.clone()), ); - gc_worker.start().unwrap(); + gc_worker.start(node_id).unwrap(); gc_worker .start_observe_lock_apply(&mut coprocessor_host, concurrency_manager.clone()) .unwrap(); diff --git a/components/test_storage/src/assert_storage.rs b/components/test_storage/src/assert_storage.rs index 5cb6e43d8cb..3a641a322a2 100644 --- a/components/test_storage/src/assert_storage.rs +++ b/components/test_storage/src/assert_storage.rs @@ -1,7 +1,10 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use api_version::{ApiV1, KvFormat}; -use kvproto::kvrpcpb::{Context, KeyRange, LockInfo}; +use kvproto::{ + kvrpcpb::{Context, KeyRange, LockInfo}, + metapb, +}; use test_raftstore::{Cluster, ServerCluster, SimulateEngine}; use tikv::storage::{ self, @@ -27,7 +30,7 @@ impl Default for AssertionStorage { fn default() -> Self { AssertionStorage { ctx: Context::default(), - store: SyncTestStorageBuilder::default().build().unwrap(), + store: SyncTestStorageBuilder::default().build(0).unwrap(), } } } @@ -36,7 +39,7 @@ impl AssertionStorage { pub fn new() -> Self { AssertionStorage { ctx: Context::default(), - store: SyncTestStorageBuilder::new().build().unwrap(), + store: SyncTestStorageBuilder::new().build(0).unwrap(), } } } @@ -51,19 +54,27 @@ impl AssertionStorage { (cluster, storage) } - pub fn update_with_key_byte(&mut self, cluster: &mut Cluster, key: &[u8]) { + pub fn update_with_key_byte( + &mut self, + cluster: &mut Cluster, + key: &[u8], + ) -> metapb::Region { // ensure the leader of range which contains current key has been elected cluster.must_get(key); let region = cluster.get_region(key); let leader = cluster.leader_of_region(region.get_id()).unwrap(); if leader.get_store_id() == self.ctx.get_peer().get_store_id() { - return; + return region; } + let store_id = leader.store_id; let engine = cluster.sim.rl().storages[&leader.get_id()].clone(); self.ctx.set_region_id(region.get_id()); self.ctx.set_region_epoch(region.get_region_epoch().clone()); self.ctx.set_peer(leader); - self.store = SyncTestStorageBuilder::from_engine(engine).build().unwrap(); + self.store = SyncTestStorageBuilder::from_engine(engine) + .build(store_id) + .unwrap(); + region } pub fn delete_ok_for_cluster( @@ -173,7 +184,7 @@ impl AssertionStorage { break; } self.expect_not_leader_or_stale_command(res.unwrap_err()); - self.update_with_key_byte(cluster, key) + self.update_with_key_byte(cluster, key); } assert!(success); @@ -188,7 +199,7 @@ impl AssertionStorage { break; } self.expect_not_leader_or_stale_command(res.unwrap_err()); - self.update_with_key_byte(cluster, key) + self.update_with_key_byte(cluster, key); } assert!(success); } @@ -197,16 +208,17 @@ impl AssertionStorage { &mut self, cluster: &mut Cluster, region_key: &[u8], + mut region: metapb::Region, safe_point: impl Into, ) { let safe_point = safe_point.into(); for _ in 0..3 { - let ret = self.store.gc(self.ctx.clone(), safe_point); + let ret = self.store.gc(region, self.ctx.clone(), safe_point); if ret.is_ok() { return; } self.expect_not_leader_or_stale_command(ret.unwrap_err()); - self.update_with_key_byte(cluster, region_key); + region = self.update_with_key_byte(cluster, region_key); } panic!("failed with 3 retry!"); } @@ -224,7 +236,9 @@ impl AssertionStorage { self.delete_ok_for_cluster(cluster, &key, 1000, 1050); self.get_none_from_cluster(cluster, &key, 2000); - self.gc_ok_for_cluster(cluster, &key, 2000); + + let region = cluster.get_region(&key); + self.gc_ok_for_cluster(cluster, &key, region, 2000); self.get_none_from_cluster(cluster, &key, 3000); } } @@ -793,8 +807,10 @@ impl AssertionStorage { self.expect_invalid_tso_err(resp, start_ts, commit_ts.unwrap()) } - pub fn gc_ok(&self, safe_point: impl Into) { - self.store.gc(self.ctx.clone(), safe_point.into()).unwrap(); + pub fn gc_ok(&self, region: metapb::Region, safe_point: impl Into) { + self.store + .gc(region, self.ctx.clone(), safe_point.into()) + .unwrap(); } pub fn delete_range_ok(&self, start_key: &[u8], end_key: &[u8]) { @@ -1069,11 +1085,11 @@ impl AssertionStorage { .unwrap_err(); } - pub fn test_txn_store_gc(&self, key: &str) { + pub fn test_txn_store_gc(&self, key: &str, region: metapb::Region) { let key_bytes = key.as_bytes(); self.put_ok(key_bytes, b"v1", 5, 10); self.put_ok(key_bytes, b"v2", 15, 20); - self.gc_ok(30); + self.gc_ok(region, 30); self.get_none(key_bytes, 15); self.get_ok(key_bytes, 25, b"v2"); } @@ -1086,7 +1102,7 @@ impl AssertionStorage { } self.delete_ok(&key, 1000, 1050); self.get_none(&key, 2000); - self.gc_ok(2000); + self.gc_ok(metapb::Region::default(), 2000); self.get_none(&key, 3000); } } diff --git a/components/test_storage/src/sync_storage.rs b/components/test_storage/src/sync_storage.rs index b32dbe08fd5..c0b47bab2cf 100644 --- a/components/test_storage/src/sync_storage.rs +++ b/components/test_storage/src/sync_storage.rs @@ -8,8 +8,14 @@ use std::{ use api_version::{ApiV1, KvFormat}; use collections::HashMap; use futures::executor::block_on; -use kvproto::kvrpcpb::{ChecksumAlgorithm, Context, GetRequest, KeyRange, LockInfo, RawGetRequest}; -use raftstore::{coprocessor::RegionInfoProvider, router::RaftStoreBlackHole}; +use kvproto::{ + kvrpcpb::{ChecksumAlgorithm, Context, GetRequest, KeyRange, LockInfo, RawGetRequest}, + metapb, +}; +use raftstore::{ + coprocessor::{region_info_accessor::MockRegionInfoProvider, RegionInfoProvider}, + router::RaftStoreBlackHole, +}; use tikv::{ server::gc_worker::{AutoGcConfig, GcConfig, GcSafePointProvider, GcWorker}, storage::{ @@ -78,7 +84,7 @@ impl SyncTestStorageBuilder { self } - pub fn build(mut self) -> Result> { + pub fn build(mut self, store_id: u64) -> Result> { let mut builder = TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr( self.engine.clone(), DummyLockManager, @@ -87,7 +93,11 @@ impl SyncTestStorageBuilder { builder = builder.config(config); } builder = builder.set_api_version(F::TAG); - SyncTestStorage::from_storage(builder.build()?, self.gc_config.unwrap_or_default()) + SyncTestStorage::from_storage( + store_id, + builder.build()?, + self.gc_config.unwrap_or_default(), + ) } } @@ -106,6 +116,7 @@ pub type SyncTestStorageApiV1 = SyncTestStorage; impl SyncTestStorage { pub fn from_storage( + store_id: u64, storage: Storage, config: GcConfig, ) -> Result { @@ -116,8 +127,9 @@ impl SyncTestStorage { tx, config, Default::default(), + Arc::new(MockRegionInfoProvider::new(Vec::new())), ); - gc_worker.start()?; + gc_worker.start(store_id)?; Ok(Self { gc_worker, store: storage, @@ -126,10 +138,11 @@ impl SyncTestStorage { pub fn start_auto_gc( &mut self, + kv_engine: &E::Local, cfg: AutoGcConfig, ) { self.gc_worker - .start_auto_gc(cfg, Arc::new(AtomicU64::new(0))) + .start_auto_gc(kv_engine, cfg, Arc::new(AtomicU64::new(0))) .unwrap(); } @@ -334,8 +347,13 @@ impl SyncTestStorage { .unwrap() } - pub fn gc(&self, _: Context, safe_point: impl Into) -> Result<()> { - wait_op!(|cb| self.gc_worker.gc(safe_point.into(), cb)).unwrap() + pub fn gc( + &self, + region: metapb::Region, + _: Context, + safe_point: impl Into, + ) -> Result<()> { + wait_op!(|cb| self.gc_worker.gc(region, safe_point.into(), cb)).unwrap() } pub fn delete_range( diff --git a/components/test_storage/src/util.rs b/components/test_storage/src/util.rs index 62b46ffd082..032fe24c60c 100644 --- a/components/test_storage/src/util.rs +++ b/components/test_storage/src/util.rs @@ -36,7 +36,9 @@ pub fn new_raft_storage_with_store_count( let (cluster, engine, ctx) = new_raft_engine(count, key); ( cluster, - SyncTestStorageBuilder::from_engine(engine).build().unwrap(), + SyncTestStorageBuilder::from_engine(engine) + .build(ctx.peer.as_ref().unwrap().store_id) + .unwrap(), ctx, ) } diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index 757c3e2c378..5fa9b3bd7f0 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -81,10 +81,6 @@ impl Engine for BTreeEngine { unimplemented!(); } - fn snapshot_on_kv_engine(&self, _: &[u8], _: &[u8]) -> EngineResult { - unimplemented!(); - } - fn modify_on_kv_engine(&self, _: Vec) -> EngineResult<()> { unimplemented!(); } @@ -237,6 +233,7 @@ impl Snapshot for BTreeEngineSnapshot { fn get(&self, key: &Key) -> EngineResult> { self.get_cf(CF_DEFAULT, key) } + fn get_cf(&self, cf: CfName, key: &Key) -> EngineResult> { let tree_cf = self.inner_engine.get_cf(cf); let tree = tree_cf.read().unwrap(); @@ -246,14 +243,17 @@ impl Snapshot for BTreeEngineSnapshot { Some(v) => Ok(Some(v.clone())), } } + fn get_cf_opt(&self, _: ReadOptions, cf: CfName, key: &Key) -> EngineResult> { self.get_cf(cf, key) } + #[inline] fn iter(&self, cf: CfName, iter_opt: IterOptions) -> EngineResult { let tree = self.inner_engine.get_cf(cf); Ok(BTreeEngineIterator::new(tree, iter_opt)) } + fn ext(&self) -> DummySnapshotExt { DummySnapshotExt } diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 64a05a98622..b72a2e487b0 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -251,7 +251,9 @@ impl WriteData { pub struct SnapContext<'a> { pub pb_ctx: &'a Context, pub read_id: Option, - pub start_ts: TimeStamp, + // When start_ts is None and `stale_read` is true, it means acquire a snapshot without any + // consistency guarantee. + pub start_ts: Option, // `key_ranges` is used in replica read. It will send to // the leader via raft "read index" to check memory locks. pub key_ranges: Vec, @@ -267,8 +269,6 @@ pub trait Engine: Send + Clone + 'static { /// Local storage engine. fn kv_engine(&self) -> Self::Local; - fn snapshot_on_kv_engine(&self, start_key: &[u8], end_key: &[u8]) -> Result; - /// Write modifications into internal local engine directly. fn modify_on_kv_engine(&self, modifies: Vec) -> Result<()>; @@ -365,12 +365,15 @@ pub trait Snapshot: Sync + Send + Clone { /// Get the value associated with `key` in `cf` column family, with Options /// in `opts` fn get_cf_opt(&self, opts: ReadOptions, cf: CfName, key: &Key) -> Result>; + fn iter(&self, cf: CfName, iter_opt: IterOptions) -> Result; + // The minimum key this snapshot can retrieve. #[inline] fn lower_bound(&self) -> Option<&[u8]> { None } + // The maximum key can be fetched from the snapshot should less than the upper // bound. #[inline] diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index bec883c1f71..71d424b1b0f 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -152,10 +152,6 @@ impl Engine for MockEngine { self.base.kv_engine() } - fn snapshot_on_kv_engine(&self, start_key: &[u8], end_key: &[u8]) -> Result { - self.base.snapshot_on_kv_engine(start_key, end_key) - } - fn modify_on_kv_engine(&self, modifies: Vec) -> Result<()> { self.base.modify_on_kv_engine(modifies) } diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 031b182b9fe..82ebfe0e1bd 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -210,10 +210,6 @@ impl Engine for RocksEngine { self.engines.kv.clone() } - fn snapshot_on_kv_engine(&self, _: &[u8], _: &[u8]) -> Result { - self.snapshot(Default::default()) - } - fn modify_on_kv_engine(&self, modifies: Vec) -> Result<()> { write_modifies(&self.engines.kv, modifies) } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 8c2e6d571c0..5f1027e738a 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -355,7 +355,7 @@ impl Endpoint { ) -> impl std::future::Future> { let mut snap_ctx = SnapContext { pb_ctx: &ctx.context, - start_ts: ctx.txn_start_ts, + start_ts: Some(ctx.txn_start_ts), ..Default::default() }; // need to pass start_ts and ranges to check memory locks for replica read diff --git a/src/lib.rs b/src/lib.rs index 5b7bf6e2ac1..a961abc7d38 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ #![feature(drain_filter)] #![feature(deadline_api)] #![feature(generic_associated_types)] +#![feature(let_else)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 23f007eb8be..7a5d62ee79d 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -392,7 +392,6 @@ impl WriteCompactionFilter { let task = GcTask::GcKeys { keys: mem::replace(&mut self.mvcc_deletions, empty), safe_point: self.safe_point.into(), - store_id: self.regions_provider.0, region_info_provider: self.regions_provider.1.clone(), }; self.schedule_gc_task(task, false); diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index b2a6a9d02dc..d4c1a8fd830 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -11,6 +11,7 @@ use std::{ }; use engine_traits::KvEngine; +use kvproto::metapb::Region; use pd_client::FeatureGate; use raftstore::{coprocessor::RegionInfoProvider, store::util::find_peer}; use tikv_util::{time::Instant, worker::Scheduler}; @@ -545,23 +546,14 @@ impl GcMan processed_regions: &mut usize, ) -> GcManagerResult> { // Get the information of the next region to do GC. - let (range, next_key) = self.get_next_gc_context(from_key); - let (region_id, start, end) = match range { - Some((r, s, e)) => (r, s, e), - None => return Ok(None), - }; + let (region, next_key) = self.get_next_gc_context(from_key); + let Some(region) = region else { return Ok(None) }; + + let hex_start = format!("{:?}", log_wrappers::Value::key(region.get_start_key())); + let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); + debug!("trying gc"; "region_id" => region.id, "start_key" => &hex_start, "end_key" => &hex_end); - let hex_start = format!("{:?}", log_wrappers::Value::key(&start)); - let hex_end = format!("{:?}", log_wrappers::Value::key(&end)); - debug!("trying gc"; "start_key" => &hex_start, "end_key" => &hex_end); - - if let Err(e) = sync_gc( - &self.worker_scheduler, - region_id, - start, - end, - self.curr_safe_point(), - ) { + if let Err(e) = sync_gc(&self.worker_scheduler, region, self.curr_safe_point()) { // Ignore the error and continue, since it's useless to retry this. // TODO: Find a better way to handle errors. Maybe we should retry. warn!("failed gc"; "start_key" => &hex_start, "end_key" => &hex_end, "err" => ?e); @@ -580,7 +572,7 @@ impl GcMan /// the first is the next region can be sent to GC worker; /// the second is the next key which can be passed into this method later. #[allow(clippy::type_complexity)] - fn get_next_gc_context(&mut self, key: Key) -> (Option<(u64, Vec, Vec)>, Option) { + fn get_next_gc_context(&mut self, key: Key) -> (Option, Option) { let (tx, rx) = mpsc::channel(); let store_id = self.cfg.self_store_id; @@ -612,15 +604,14 @@ impl GcMan }); match seek_region_res { - Ok(Some(mut region)) => { - let r = region.get_id(); - let (s, e) = (region.take_start_key(), region.take_end_key()); - let next_key = if e.is_empty() { + Ok(Some(region)) => { + let end_key = region.get_end_key(); + let next_key = if end_key.is_empty() { None } else { - Some(Key::from_encoded_slice(&e)) + Some(Key::from_encoded_slice(end_key)) }; - (Some((r, s, e)), next_key) + (Some(region), next_key) } Ok(None) => (None, None), Err(e) => { @@ -812,10 +803,8 @@ mod tests { .iter() .map(|task| match task { GcTask::Gc { - region_id, - safe_point, - .. - } => (*region_id, *safe_point), + region, safe_point, .. + } => (region.id, *safe_point), _ => unreachable!(), }) .collect(); diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index eadd1d77fb2..dfa0dec4ddc 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -32,11 +32,12 @@ use raftstore::{ router::RaftStoreRouter, store::{msg::StoreMsg, util::find_peer}, }; -use tikv_kv::{CfStatistics, CursorBuilder, Modify}; +use tikv_kv::{CfStatistics, CursorBuilder, Modify, SnapContext}; use tikv_util::{ config::{Tracker, VersionTrack}, time::{duration_to_sec, Instant, Limiter, SlowTimer}, worker::{Builder as WorkerBuilder, LazyWorker, Runnable, ScheduleError, Scheduler}, + Either, }; use txn_types::{Key, TimeStamp}; @@ -92,22 +93,18 @@ where E: KvEngine, { Gc { - region_id: u64, - start_key: Vec, - end_key: Vec, + region: Region, safe_point: TimeStamp, callback: Callback<()>, }, GcKeys { keys: Vec, safe_point: TimeStamp, - store_id: u64, region_info_provider: Arc, }, RawGcKeys { keys: Vec, safe_point: TimeStamp, - store_id: u64, region_info_provider: Arc, }, UnsafeDestroyRange { @@ -115,6 +112,7 @@ where start_key: Key, end_key: Key, callback: Callback<()>, + region_info_provider: Arc, }, PhysicalScanLock { ctx: Context, @@ -122,6 +120,7 @@ where start_key: Key, limit: usize, callback: Callback>, + region_info_provider: Arc, }, /// If GC in compaction filter is enabled, versions on default CF will be /// handled with `DB::delete` in write CF's compaction filter. However if @@ -164,14 +163,10 @@ where fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { GcTask::Gc { - start_key, - end_key, - safe_point, - .. + region, safe_point, .. } => f .debug_struct("Gc") - .field("start_key", &log_wrappers::Value::key(start_key)) - .field("end_key", &log_wrappers::Value::key(end_key)) + .field("region", region) .field("safe_point", safe_point) .finish(), GcTask::GcKeys { .. } => f.debug_struct("GcKeys").finish(), @@ -204,6 +199,7 @@ where E: Engine, RR: RaftStoreRouter, { + store_id: u64, engine: E, raft_store_router: RR, @@ -241,46 +237,66 @@ impl MvccRaw { } } -struct KeysInRegions> { - keys: Peekable>, - regions: Peekable, -} +// Return regions that keys are related to. +fn get_regions_for_gc( + store_id: u64, + keys: &[Key], + region_provider: Arc, +) -> Result> { + assert!(!keys.is_empty()); -impl> Iterator for KeysInRegions { - type Item = Key; - fn next(&mut self) -> Option { - loop { - let region = self.regions.peek()?; - let key = self.keys.peek()?.as_encoded().as_slice(); - if key < region.get_start_key() { - self.keys.next(); - } else if region.get_end_key().is_empty() || key < region.get_end_key() { - return self.keys.next(); - } else { - self.regions.next(); - } + if keys.len() >= 2 { + let start = keys.first().unwrap().as_encoded(); + let end = keys.last().unwrap().as_encoded(); + let regions = box_try!(region_provider.get_regions_in_range(start, end)) + .into_iter() + .filter(|r| find_peer(r, store_id).is_some()) + .peekable() + .collect(); + + Ok(regions) + } else { + // We only have one key. + let key = keys[0].as_encoded(); + let region = box_try!(region_provider.find_region_by_key(key)); + if find_peer(®ion, store_id).is_none() { + return Ok(Vec::new()); } + + Ok(vec![region]) } } -fn get_keys_in_regions( - keys: Vec, - regions_provider: Option<(u64, Arc)>, -) -> Result>> { - if keys.len() >= 2 { - if let Some((store_id, region_info_provider)) = regions_provider { - let start = keys.first().unwrap().as_encoded(); - let end = keys.last().unwrap().as_encoded(); - let regions = box_try!(region_info_provider.get_regions_in_range(start, end)) - .into_iter() - .filter(move |r| find_peer(r, store_id).is_some()) - .peekable(); +fn get_keys_in_region(keys: &mut Peekable>, region: &Region) -> Vec { + let mut keys_in_region = Vec::new(); + + loop { + let Some(key) = keys.peek() else {break}; + let key = key.as_encoded().as_slice(); - let keys = keys.into_iter().peekable(); - return Ok(Box::new(KeysInRegions { keys, regions })); + if key < region.get_start_key() { + keys.next(); + } else if region.get_end_key().is_empty() || key < region.get_end_key() { + keys_in_region.push(keys.next().unwrap()); + } else { + break; } } - Ok(Box::new(keys.into_iter())) + + keys_in_region +} + +fn init_snap_ctx(store_id: u64, region: &Region) -> Context { + let mut ctx = Context::default(); + ctx.region_id = region.id; + ctx.region_epoch = region.region_epoch.clone(); + ctx.stale_read = true; + + if let Some(peer) = region.peers.iter().find(|peer| peer.store_id == store_id) { + ctx.set_peer(peer.clone()); + } + + ctx } impl GcRunner @@ -289,6 +305,7 @@ where RR: RaftStoreRouter, { pub fn new( + store_id: u64, engine: E, raft_store_router: RR, flow_info_sender: Sender, @@ -301,6 +318,7 @@ where f64::INFINITY }); Self { + store_id, engine, raft_store_router, flow_info_sender, @@ -359,19 +377,19 @@ where Ok(()) } - fn gc(&mut self, start_key: &[u8], end_key: &[u8], safe_point: TimeStamp) -> Result<()> { - if !self.need_gc(start_key, end_key, safe_point) { + fn gc(&mut self, region: Region, safe_point: TimeStamp) -> Result<()> { + if !self.need_gc(region.get_start_key(), region.get_end_key(), safe_point) { GC_SKIPPED_COUNTER.inc(); return Ok(()); } let mut reader = MvccReader::new( - self.engine.snapshot_on_kv_engine(start_key, end_key)?, + self.get_snapshot(self.store_id, ®ion)?, Some(ScanMode::Forward), false, ); - let mut next_key = Some(Key::from_encoded_slice(start_key)); + let mut next_key = Some(Key::from_encoded_slice(region.get_start_key())); while next_key.is_some() { // Scans at most `GcConfig.batch_keys` keys. let (keys, updated_next_key) = reader @@ -383,14 +401,14 @@ where GC_EMPTY_RANGE_COUNTER.inc(); break; } - self.gc_keys(keys, safe_point, None)?; + self.gc_keys(keys, safe_point, Either::Left(region.clone()))?; } self.mut_stats(GcKeyMode::txn).add(&reader.statistics); debug!( "gc has finished"; - "start_key" => log_wrappers::Value::key(start_key), - "end_key" => log_wrappers::Value::key(end_key), + "start_key" => log_wrappers::Value::key(region.get_start_key()), + "end_key" => log_wrappers::Value::key(region.get_end_key()), "safe_point" => safe_point ); Ok(()) @@ -400,10 +418,11 @@ where &mut self, keys: Vec, safe_point: TimeStamp, - regions_provider: Option<(u64, Arc)>, + region_or_provider: Either>, ) -> Result<(usize, usize)> { + let store_id = self.store_id; let count = keys.len(); - let range_start_key = keys.first().unwrap().clone().into_encoded(); + let range_start_key = keys.first().unwrap().clone(); let range_end_key = { let mut k = keys .last() @@ -411,77 +430,110 @@ where .to_raw() .map_err(|e| EngineError::Codec(e))?; k.push(0); - Key::from_raw(&k).into_encoded() + Key::from_raw(&k) }; - let snapshot = self - .engine - .snapshot_on_kv_engine(&range_start_key, &range_end_key)?; - let mut keys = get_keys_in_regions(keys, regions_provider)?; - - let mut txn = Self::new_txn(); - let mut reader = if count <= 1 { - MvccReader::new(snapshot, None, false) - } else { - // keys are closing to each other in one batch of gc keys, so do not use - // prefix seek here to avoid too many seeks - MvccReader::new(snapshot, Some(ScanMode::Forward), false) + let (mut handled_keys, mut wasted_keys) = (0, 0); + let regions = match region_or_provider { + Either::Left(region) => vec![region], + Either::Right(region_provider) => get_regions_for_gc(store_id, &keys, region_provider)?, }; - let (mut handled_keys, mut wasted_keys) = (0, 0); - let mut gc_info = GcInfo::default(); - let mut next_gc_key = keys.next(); - while let Some(ref key) = next_gc_key { - if let Err(e) = self.gc_key(safe_point, key, &mut gc_info, &mut txn, &mut reader) { - GC_KEY_FAILURES.inc(); - error!(?e; "GC meets failure"; "key" => %key,); - // Switch to the next key if meets failure. - gc_info.is_completed = true; - } + // First item is fetched to initialize the reader and kv_engine + if regions.is_empty() { + return Ok((handled_keys, wasted_keys)); + } - if gc_info.is_completed { - if gc_info.found_versions >= GC_LOG_FOUND_VERSION_THRESHOLD { - debug!( - "GC found plenty versions for a key"; - "key" => %key, - "versions" => gc_info.found_versions, - ); - } - if gc_info.deleted_versions as usize >= GC_LOG_DELETED_VERSION_THRESHOLD { - debug!( - "GC deleted plenty versions for a key"; - "key" => %key, - "versions" => gc_info.deleted_versions, - ); + let mut txn = Self::new_txn(); + let mut gc_info = GcInfo::default(); + let mut keys = keys.into_iter().peekable(); + for region in regions { + let mut reader = self.create_reader( + count, + ®ion, + range_start_key.clone(), + range_end_key.clone(), + )?; + + let mut keys_in_region = get_keys_in_region(&mut keys, ®ion).into_iter(); + let mut next_gc_key = keys_in_region.next(); + while let Some(ref key) = next_gc_key { + if let Err(e) = self.gc_key(safe_point, key, &mut gc_info, &mut txn, &mut reader) { + GC_KEY_FAILURES.inc(); + error!(?e; "GC meets failure"; "key" => %key,); + // Switch to the next key if meets failure. + gc_info.is_completed = true; } - if gc_info.found_versions > 0 { - handled_keys += 1; + if gc_info.is_completed { + if gc_info.found_versions >= GC_LOG_FOUND_VERSION_THRESHOLD { + debug!( + "GC found plenty versions for a key"; + "key" => %key, + "versions" => gc_info.found_versions, + ); + } + if gc_info.deleted_versions as usize >= GC_LOG_DELETED_VERSION_THRESHOLD { + debug!( + "GC deleted plenty versions for a key"; + "key" => %key, + "versions" => gc_info.deleted_versions, + ); + } + + if gc_info.found_versions > 0 { + handled_keys += 1; + } else { + wasted_keys += 1; + } + next_gc_key = keys_in_region.next(); + gc_info = GcInfo::default(); } else { - wasted_keys += 1; + Self::flush_txn(txn, &self.limiter, &self.engine)?; + reader = self.create_reader( + count, + ®ion, + range_start_key.clone(), + range_end_key.clone(), + )?; + txn = Self::new_txn(); } - next_gc_key = keys.next(); - gc_info = GcInfo::default(); - } else { - Self::flush_txn(txn, &self.limiter, &self.engine)?; - let snapshot = self - .engine - .snapshot_on_kv_engine(&range_start_key, &range_end_key)?; - txn = Self::new_txn(); - reader = MvccReader::new(snapshot, Some(ScanMode::Forward), false); } } + Self::flush_txn(txn, &self.limiter, &self.engine)?; Ok((handled_keys, wasted_keys)) } + fn create_reader( + &self, + key_count: usize, + region: &Region, + range_start_key: Key, + range_end_key: Key, + ) -> Result> { + let mut reader = { + let snapshot = self.get_snapshot(self.store_id, region)?; + + if key_count <= 1 { + MvccReader::new(snapshot, None, false) + } else { + // keys are closing to each other in one batch of gc keys, so do not use + // prefix seek here to avoid too many seeks + MvccReader::new(snapshot, Some(ScanMode::Forward), false) + } + }; + reader.set_range(Some(range_start_key), Some(range_end_key)); + Ok(reader) + } + fn raw_gc_keys( &mut self, keys: Vec, safe_point: TimeStamp, - regions_provider: Option<(u64, Arc)>, + regions_provider: Arc, ) -> Result<(usize, usize)> { - let range_start_key = keys.first().unwrap().clone().into_encoded(); + let range_start_key = keys.first().unwrap().clone(); let range_end_key = { let mut k = keys .last() @@ -489,56 +541,64 @@ where .to_raw() .map_err(|e| EngineError::Codec(e))?; k.push(0); - Key::from_raw(&k).into_encoded() + Key::from_raw(&k) }; - let mut snapshot = self - .engine - .snapshot_on_kv_engine(&range_start_key, &range_end_key)?; - let mut raw_modifies = MvccRaw::new(); - let mut keys = get_keys_in_regions(keys, regions_provider)?; - let (mut handled_keys, mut wasted_keys) = (0, 0); - let mut gc_info = GcInfo::default(); - let mut next_gc_key = keys.next(); - while let Some(ref key) = next_gc_key { - if let Err(e) = self.raw_gc_key( - safe_point, - key, - &mut raw_modifies, - &mut snapshot, - &mut gc_info, - ) { - GC_KEY_FAILURES.inc(); - error!(?e; "Raw GC meets failure"; "key" => %key,); - // Switch to the next key if meets failure. - gc_info.is_completed = true; - } + let regions = get_regions_for_gc(self.store_id, &keys, regions_provider)?; - if gc_info.is_completed { - if gc_info.found_versions >= GC_LOG_FOUND_VERSION_THRESHOLD { - debug!( - "RawKV GC found plenty versions for a key"; - "key" => %key, - "versions" => gc_info.found_versions, - ); - } - if gc_info.found_versions > 0 { - handled_keys += 1; - } else { - wasted_keys += 1; + if regions.is_empty() { + return Ok((handled_keys, wasted_keys)); + } + + let mut gc_info = GcInfo::default(); + let mut keys = keys.into_iter().peekable(); + for region in regions { + let mut snapshot = self.get_snapshot(self.store_id, ®ion)?; + + let mut keys_in_region = get_keys_in_region(&mut keys, ®ion).into_iter(); + let mut next_gc_key = keys_in_region.next(); + while let Some(ref key) = next_gc_key { + if let Err(e) = self.raw_gc_key( + safe_point, + key, + &range_start_key, + &range_end_key, + &mut raw_modifies, + &mut snapshot, + &mut gc_info, + ) { + GC_KEY_FAILURES.inc(); + error!(?e; "Raw GC meets failure"; "key" => %key,); + // Switch to the next key if meets failure. + gc_info.is_completed = true; } - gc_info.report_metrics(STAT_RAW_KEYMODE); + if gc_info.is_completed { + if gc_info.found_versions >= GC_LOG_FOUND_VERSION_THRESHOLD { + debug!( + "RawKV GC found plenty versions for a key"; + "key" => %key, + "versions" => gc_info.found_versions, + ); + } + if gc_info.found_versions > 0 { + handled_keys += 1; + } else { + wasted_keys += 1; + } - next_gc_key = keys.next(); - gc_info = GcInfo::default(); - } else { - // Flush writeBatch to engine. - Self::flush_raw_gc(raw_modifies, &self.limiter, &self.engine)?; - // After flush, reset raw_modifies. - raw_modifies = MvccRaw::new(); + gc_info.report_metrics(STAT_RAW_KEYMODE); + + next_gc_key = keys_in_region.next(); + gc_info = GcInfo::default(); + } else { + // Flush writeBatch to engine. + Self::flush_raw_gc(raw_modifies, &self.limiter, &self.engine)?; + // After flush, reset raw_modifies. + raw_modifies = MvccRaw::new(); + } } } @@ -551,12 +611,16 @@ where &mut self, safe_point: TimeStamp, key: &Key, + range_start_key: &Key, + range_end_key: &Key, raw_modifies: &mut MvccRaw, kv_snapshot: &mut ::Snap, gc_info: &mut GcInfo, ) -> Result<()> { let start_key = key.clone().append_ts(safe_point.prev()); - let mut cursor = CursorBuilder::new(kv_snapshot, CF_DEFAULT).build()?; + let mut cursor = CursorBuilder::new(kv_snapshot, CF_DEFAULT) + .range(Some(range_start_key.clone()), Some(range_end_key.clone())) + .build()?; let mut statistics = CfStatistics::default(); cursor.seek(&start_key, &mut statistics)?; @@ -629,7 +693,13 @@ where Ok(()) } - fn unsafe_destroy_range(&self, ctx: &Context, start_key: &Key, end_key: &Key) -> Result<()> { + fn unsafe_destroy_range( + &self, + ctx: &Context, + start_key: &Key, + end_key: &Key, + _regions_provider: Arc, + ) -> Result<()> { info!( "unsafe destroy range started"; "start_key" => %start_key, "end_key" => %end_key @@ -727,15 +797,31 @@ where max_ts: TimeStamp, start_key: &Key, limit: usize, + regions_provider: Arc, ) -> Result> { - let snap = self - .engine - .snapshot_on_kv_engine(start_key.as_encoded(), &[]) - .unwrap(); - let mut reader = MvccReader::new(snap, Some(ScanMode::Forward), false); - let (locks, _) = reader - .scan_locks(Some(start_key), None, |l| l.ts <= max_ts, limit) - .map_err(TxnError::from_mvcc)?; + let regions = box_try!(regions_provider.get_regions_in_range(start_key.as_encoded(), &[])) + .into_iter() + .filter(move |r| find_peer(r, self.store_id).is_some()); + + let mut first_round = true; + let mut locks = Vec::new(); + for region in regions { + let start_key = { + if first_round { + first_round = false; + start_key.clone() + } else { + Key::from_raw(region.get_start_key()) + } + }; + let snap = self.get_snapshot(self.store_id, ®ion)?; + let mut reader = MvccReader::new(snap, Some(ScanMode::Forward), false); + let (locks_this_region, _) = reader + .scan_locks(Some(&start_key), None, |l| l.ts <= max_ts, limit) + .map_err(TxnError::from_mvcc)?; + + locks.extend(locks_this_region); + } let mut lock_infos = Vec::with_capacity(locks.len()); for (key, lock) in locks { @@ -771,6 +857,18 @@ where self.cfg = incoming.clone(); } } + + fn get_snapshot(&self, store_id: u64, region: &Region) -> Result<::Snap> { + let ctx = init_snap_ctx(store_id, region); + let snap_ctx = SnapContext { + pb_ctx: &ctx, + ..Default::default() + }; + + Ok(block_on(async { + tikv_kv::snapshot(&self.engine, snap_ctx).await + })?) + } } impl Runnable for GcRunner @@ -803,32 +901,30 @@ where match task { GcTask::Gc { - start_key, - end_key, + region, safe_point, callback, - .. } => { - let res = self.gc(&start_key, &end_key, safe_point); + let res = self.gc(region.clone(), safe_point); update_metrics(res.is_err()); callback(res); self.update_statistics_metrics(GcKeyMode::txn); slow_log!( T timer, "GC on range [{}, {}), safe_point {}", - log_wrappers::Value::key(&start_key), - log_wrappers::Value::key(&end_key), + log_wrappers::Value::key(region.get_start_key()), + log_wrappers::Value::key(region.get_end_key()), safe_point ); } GcTask::GcKeys { keys, safe_point, - store_id, region_info_provider, } => { let old_seek_tombstone = self.mut_stats(GcKeyMode::txn).write.seek_tombstone; - match self.gc_keys(keys, safe_point, Some((store_id, region_info_provider))) { + + match self.gc_keys(keys, safe_point, Either::Right(region_info_provider)) { Ok((handled, wasted)) => { GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED .with_label_values(&[STAT_TXN_KEYMODE]) @@ -851,10 +947,9 @@ where GcTask::RawGcKeys { keys, safe_point, - store_id, region_info_provider, } => { - match self.raw_gc_keys(keys, safe_point, Some((store_id, region_info_provider))) { + match self.raw_gc_keys(keys, safe_point, region_info_provider) { Ok((handled, wasted)) => { GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED .with_label_values(&[STAT_RAW_KEYMODE]) @@ -876,8 +971,10 @@ where start_key, end_key, callback, + region_info_provider, } => { - let res = self.unsafe_destroy_range(&ctx, &start_key, &end_key); + let res = + self.unsafe_destroy_range(&ctx, &start_key, &end_key, region_info_provider); update_metrics(res.is_err()); callback(res); slow_log!( @@ -893,8 +990,15 @@ where start_key, limit, callback, + region_info_provider, } => { - let res = self.handle_physical_scan_lock(&ctx, max_ts, &start_key, limit); + let res = self.handle_physical_scan_lock( + &ctx, + max_ts, + &start_key, + limit, + region_info_provider, + ); update_metrics(res.is_err()); callback(res); slow_log!( @@ -952,17 +1056,13 @@ fn handle_gc_task_schedule_error(e: ScheduleError>) -> Res /// Schedules a `GcTask` to the `GcRunner`. fn schedule_gc( scheduler: &Scheduler>, - region_id: u64, - start_key: Vec, - end_key: Vec, + region: Region, safe_point: TimeStamp, callback: Callback<()>, ) -> Result<()> { scheduler .schedule(GcTask::Gc { - region_id, - start_key, - end_key, + region, safe_point, callback, }) @@ -972,15 +1072,10 @@ fn schedule_gc( /// Does GC synchronously. pub fn sync_gc( scheduler: &Scheduler>, - region_id: u64, - start_key: Vec, - end_key: Vec, + region: Region, safe_point: TimeStamp, ) -> Result<()> { - wait_op!(|callback| schedule_gc( - scheduler, region_id, start_key, end_key, safe_point, callback - )) - .unwrap_or_else(|| { + wait_op!(|callback| schedule_gc(scheduler, region, safe_point, callback)).unwrap_or_else(|| { error!("failed to receive result of gc"); Err(box_err!("gc_worker: failed to receive result of gc")) }) @@ -999,6 +1094,7 @@ where raft_store_router: RR, /// Used to signal unsafe destroy range is executed. flow_info_sender: Option>, + region_info_provider: Arc, config_manager: GcWorkerConfigManager, @@ -1034,6 +1130,7 @@ where applied_lock_collector: self.applied_lock_collector.clone(), gc_manager_handle: self.gc_manager_handle.clone(), feature_gate: self.feature_gate.clone(), + region_info_provider: self.region_info_provider.clone(), } } } @@ -1069,6 +1166,7 @@ where flow_info_sender: Sender, cfg: GcConfig, feature_gate: FeatureGate, + region_info_provider: Arc, ) -> GcWorker { let worker_builder = WorkerBuilder::new("gc-worker").pending_capacity(GC_MAX_PENDING_TASKS); let worker = worker_builder.create().lazy_build("gc-worker"); @@ -1084,11 +1182,13 @@ where applied_lock_collector: None, gc_manager_handle: Arc::new(Mutex::new(None)), feature_gate, + region_info_provider, } } pub fn start_auto_gc( &self, + kv_engine: &E::Local, cfg: AutoGcConfig, safe_point: Arc, // Store safe point here. ) -> Result<()> { @@ -1098,7 +1198,7 @@ where ); info!("initialize compaction filter to perform GC when necessary"); - self.engine.kv_engine().init_compaction_filter( + kv_engine.init_compaction_filter( cfg.self_store_id, safe_point.clone(), self.config_manager.clone(), @@ -1122,8 +1222,9 @@ where Ok(()) } - pub fn start(&mut self) -> Result<()> { + pub fn start(&mut self, store_id: u64) -> Result<()> { let runner = GcRunner::new( + store_id, self.engine.clone(), self.raft_store_router.clone(), self.flow_info_sender.take().unwrap(), @@ -1163,14 +1264,10 @@ where } /// Only for tests. - pub fn gc(&self, safe_point: TimeStamp, callback: Callback<()>) -> Result<()> { - let start_key = vec![]; - let end_key = vec![]; + pub fn gc(&self, region: Region, safe_point: TimeStamp, callback: Callback<()>) -> Result<()> { self.worker_scheduler .schedule(GcTask::Gc { - region_id: 0, - start_key, - end_key, + region, safe_point, callback, }) @@ -1203,6 +1300,7 @@ where start_key, end_key, callback, + region_info_provider: self.region_info_provider.clone(), }) .or_else(handle_gc_task_schedule_error) } @@ -1228,6 +1326,7 @@ where start_key, limit, callback, + region_info_provider: self.region_info_provider.clone(), }) .or_else(handle_gc_task_schedule_error) } @@ -1271,20 +1370,22 @@ pub mod test_gc_worker { use std::sync::Arc; use engine_rocks::{RocksEngine, RocksSnapshot}; - use engine_traits::KvEngine; use kvproto::{ kvrpcpb::Context, metapb::{Peer, Region}, }; use raftstore::store::RegionSnapshot; - use tikv_kv::{write_modifies, Engine, Modify, SnapContext, WriteData}; + use tikv_kv::write_modifies; use txn_types::{Key, TimeStamp}; use crate::{ server::gc_worker::{GcSafePointProvider, Result as GcWorkerResult}, storage::{ - kv, - kv::{Callback as EngineCallback, Result as EngineResult}, + kv::{ + self, Callback as EngineCallback, Modify, Result as EngineResult, SnapContext, + WriteData, + }, + Engine, }, }; @@ -1306,22 +1407,6 @@ pub mod test_gc_worker { self.0.kv_engine() } - fn snapshot_on_kv_engine( - &self, - start_key: &[u8], - end_key: &[u8], - ) -> kv::Result { - let mut region = Region::default(); - region.set_start_key(start_key.to_owned()); - region.set_end_key(end_key.to_owned()); - // Use a fake peer to avoid panic. - region.mut_peers().push(Default::default()); - Ok(RegionSnapshot::from_snapshot( - Arc::new(self.kv_engine().snapshot()), - Arc::new(region), - )) - } - fn modify_on_kv_engine(&self, mut modifies: Vec) -> kv::Result<()> { for modify in &mut modifies { match modify { @@ -1419,8 +1504,12 @@ mod tests { }; use raft::StateRole; use raftstore::{ - coprocessor::{region_info_accessor::RegionInfoAccessor, RegionChangeEvent}, + coprocessor::{ + region_info_accessor::{MockRegionInfoProvider, RegionInfoAccessor}, + RegionChangeEvent, + }, router::RaftStoreBlackHole, + store::util::new_peer, }; use tikv_kv::Snapshot; use tikv_util::{codec::number::NumberEncoder, future::paired_future_callback}; @@ -1444,6 +1533,64 @@ mod tests { }, }; + #[test] + fn test_get_regions_for_gc() { + fn init_region( + start_key: &[u8], + end_key: &[u8], + region_id: u64, + store_id: Option, + ) -> Region { + let start_key = Key::from_encoded(start_key.to_vec()); + let end_key = Key::from_encoded(end_key.to_vec()); + let mut region = Region::default(); + region.set_start_key(start_key.as_encoded().clone()); + region.set_end_key(end_key.as_encoded().clone()); + region.id = region_id; + if let Some(store_id) = store_id { + region.mut_peers().push(Peer::default()); + region.mut_peers()[0].set_store_id(store_id); + } + region + } + + let store_id = 1; + + let r1 = init_region(b"", b"k10", 1, None); + let r2 = init_region(b"k20", b"k30", 2, Some(store_id)); + let r3 = init_region(b"k30", b"", 3, Some(store_id)); + + let ri_provider = Arc::new(MockRegionInfoProvider::new(vec![ + r1, + r2.clone(), + r3.clone(), + ])); + + let keys = vec![Key::from_encoded(b"k05".to_vec())]; + let regions = get_regions_for_gc(store_id, &keys, ri_provider.clone()).unwrap(); + // store id not match + assert!(regions.is_empty()); + + let keys = vec![ + Key::from_encoded(b"k05".to_vec()), + Key::from_encoded(b"k10".to_vec()), + Key::from_encoded(b"k25".to_vec()), + ]; + let regions = get_regions_for_gc(store_id, &keys, ri_provider.clone()).unwrap(); + let rs = vec![r2.clone()]; + assert_eq!(regions, rs); + + let keys = vec![ + Key::from_encoded(b"k05".to_vec()), + Key::from_encoded(b"k10".to_vec()), + Key::from_encoded(b"k25".to_vec()), + Key::from_encoded(b"k35".to_vec()), + ]; + let regions = get_regions_for_gc(store_id, &keys, ri_provider).unwrap(); + let rs = vec![r2, r3]; + assert_eq!(regions, rs); + } + /// Assert the data in `storage` is the same as `expected_data`. Keys in /// `expected_data` should be encoded form without ts. fn check_data( @@ -1476,8 +1623,10 @@ mod tests { commit_ts: impl Into, start_key: &[u8], end_key: &[u8], + split_key: &[u8], ) -> Result<()> { // Return Result from this function so we can use the `wait_op` macro here. + let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); let storage = @@ -1486,10 +1635,26 @@ mod tests { .unwrap(); let gate = FeatureGate::default(); gate.set_version("5.0.0").unwrap(); + let (tx, _rx) = mpsc::channel(); - let mut gc_worker = - GcWorker::new(engine, RaftStoreBlackHole, tx, GcConfig::default(), gate); - gc_worker.start().unwrap(); + + let mut region1 = Region::default(); + region1.mut_peers().push(new_peer(store_id, 1)); + region1.set_end_key(split_key.to_vec()); + + let mut region2 = Region::default(); + region2.mut_peers().push(new_peer(store_id, 2)); + region2.set_start_key(split_key.to_vec()); + + let mut gc_worker = GcWorker::new( + engine, + RaftStoreBlackHole, + tx, + GcConfig::default(), + gate, + Arc::new(MockRegionInfoProvider::new(vec![region1, region2])), + ); + gc_worker.start(store_id).unwrap(); // Convert keys to key value pairs, where the value is "value-{key}". let data: BTreeMap<_, _> = init_keys .iter() @@ -1567,6 +1732,7 @@ mod tests { 10, b"key2", b"key4", + b"key3", ) .unwrap(); @@ -1576,6 +1742,7 @@ mod tests { 10, b"key3", b"key7", + b"key5", ) .unwrap(); @@ -1591,6 +1758,7 @@ mod tests { 10, b"key1", b"key9", + b"key5", ) .unwrap(); @@ -1606,6 +1774,7 @@ mod tests { 10, b"key2\x00", b"key4", + b"key3", ) .unwrap(); @@ -1620,6 +1789,7 @@ mod tests { 10, b"key1\x00", b"key1\x00\x00", + b"key1", ) .unwrap(); @@ -1634,12 +1804,14 @@ mod tests { 10, b"key1\x00", b"key1\x00", + b"key1", ) .unwrap(); } #[test] fn test_physical_scan_lock() { + let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); let prefixed_engine = PrefixedEngine(engine); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr( @@ -1649,14 +1821,17 @@ mod tests { .build() .unwrap(); let (tx, _rx) = mpsc::channel(); + let mut region = Region::default(); + region.mut_peers().push(new_peer(store_id, 0)); let mut gc_worker = GcWorker::new( prefixed_engine, RaftStoreBlackHole, tx, GcConfig::default(), FeatureGate::default(), + Arc::new(MockRegionInfoProvider::new(vec![region])), ); - gc_worker.start().unwrap(); + gc_worker.start(store_id).unwrap(); let physical_scan_lock = |max_ts: u64, start_key, limit| { let (cb, f) = paired_future_callback(); @@ -1718,20 +1893,27 @@ mod tests { #[test] fn test_gc_keys_with_region_info_provider() { + let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); let prefixed_engine = PrefixedEngine(engine.clone()); let (tx, _rx) = mpsc::channel(); let feature_gate = FeatureGate::default(); feature_gate.set_version("5.0.0").unwrap(); + + let sp_provider = MockSafePointProvider(200); + let mut host = CoprocessorHost::::default(); + let ri_provider = RegionInfoAccessor::new(&mut host); + let mut gc_worker = GcWorker::new( prefixed_engine.clone(), RaftStoreBlackHole, tx, GcConfig::default(), feature_gate, + Arc::new(ri_provider.clone()), ); - gc_worker.start().unwrap(); + gc_worker.start(store_id).unwrap(); let mut r1 = Region::default(); r1.set_id(1); @@ -1745,7 +1927,7 @@ mod tests { r2.set_start_key(format!("k{:02}", 20).into_bytes()); r2.set_end_key(format!("k{:02}", 30).into_bytes()); r2.mut_peers().push(Peer::default()); - r2.mut_peers()[0].set_store_id(1); + r2.mut_peers()[0].set_store_id(store_id); let mut r3 = Region::default(); r3.set_id(3); @@ -1753,14 +1935,14 @@ mod tests { r3.set_start_key(format!("k{:02}", 30).into_bytes()); r3.set_end_key(b"".to_vec()); r3.mut_peers().push(Peer::default()); - r3.mut_peers()[0].set_store_id(1); + r3.mut_peers()[0].set_store_id(store_id); - let sp_provider = MockSafePointProvider(200); - let mut host = CoprocessorHost::::default(); - let ri_provider = RegionInfoAccessor::new(&mut host); let auto_gc_cfg = AutoGcConfig::new(sp_provider, ri_provider, 1); let safe_point = Arc::new(AtomicU64::new(0)); - gc_worker.start_auto_gc(auto_gc_cfg, safe_point).unwrap(); + let kv_engine = engine.get_rocksdb(); + gc_worker + .start_auto_gc(&kv_engine, auto_gc_cfg, safe_point) + .unwrap(); host.on_region_changed(&r1, RegionChangeEvent::Create, StateRole::Leader); host.on_region_changed(&r2, RegionChangeEvent::Create, StateRole::Leader); host.on_region_changed(&r3, RegionChangeEvent::Create, StateRole::Leader); @@ -1811,12 +1993,14 @@ mod tests { #[test] fn test_gc_keys_statistics() { + let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); let prefixed_engine = PrefixedEngine(engine.clone()); let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); let mut runner = GcRunner::new( + store_id, prefixed_engine.clone(), RaftStoreBlackHole, tx, @@ -1832,7 +2016,7 @@ mod tests { r1.set_start_key(b"".to_vec()); r1.set_end_key(b"".to_vec()); r1.mut_peers().push(Peer::default()); - r1.mut_peers()[0].set_store_id(1); + r1.mut_peers()[0].set_store_id(store_id); let mut host = CoprocessorHost::::default(); let ri_provider = RegionInfoAccessor::new(&mut host); @@ -1854,7 +2038,11 @@ mod tests { assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek, 0); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.next, 0); runner - .gc_keys(keys, TimeStamp::new(200), Some((1, Arc::new(ri_provider)))) + .gc_keys( + keys, + TimeStamp::new(200), + Either::Right(Arc::new(ri_provider)), + ) .unwrap(); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek, 1); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.next, 100 * 2); @@ -1862,6 +2050,7 @@ mod tests { #[test] fn test_raw_gc_keys() { + let store_id = 1; // init engine and gc runner let mut cfg = DbConfig::default(); cfg.defaultcf.disable_auto_compactions = true; @@ -1874,6 +2063,7 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); let mut runner = GcRunner::new( + store_id, prefixed_engine.clone(), RaftStoreBlackHole, tx, @@ -1889,7 +2079,7 @@ mod tests { r1.set_start_key(b"".to_vec()); r1.set_end_key(b"".to_vec()); r1.mut_peers().push(Peer::default()); - r1.mut_peers()[0].set_store_id(1); + r1.mut_peers()[0].set_store_id(store_id); let mut host = CoprocessorHost::::default(); let ri_provider = Arc::new(RegionInfoAccessor::new(&mut host)); @@ -1946,13 +2136,15 @@ mod tests { .collect(); runner - .raw_gc_keys(to_gc_keys, TimeStamp::new(120), Some((1, ri_provider))) + .raw_gc_keys(to_gc_keys, TimeStamp::new(120), ri_provider) .unwrap(); assert_eq!(7, runner.mut_stats(GcKeyMode::raw).data.next); assert_eq!(2, runner.mut_stats(GcKeyMode::raw).data.seek); - let snapshot = prefixed_engine.snapshot_on_kv_engine(&[], &[]).unwrap(); + let snapshot = + block_on(async { tikv_kv::snapshot(&prefixed_engine, SnapContext::default()).await }) + .unwrap(); test_raws .clone() @@ -1972,6 +2164,7 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); let mut runner = GcRunner::new( + 1, prefixed_engine.clone(), RaftStoreBlackHole, tx, @@ -2008,7 +2201,7 @@ mod tests { .gc_keys( vec![Key::from_raw(b"k2\x00")], TimeStamp::new(200), - Some((1, ri_provider.clone())), + Either::Right(ri_provider.clone()), ) .unwrap(); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 20); @@ -2023,7 +2216,7 @@ mod tests { .gc_keys( vec![Key::from_raw(b"k2")], TimeStamp::new(200), - Some((1, ri_provider.clone())), + Either::Right(ri_provider.clone()), ) .unwrap(); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); @@ -2038,7 +2231,7 @@ mod tests { .gc_keys( vec![Key::from_raw(b"k1"), Key::from_raw(b"k2")], TimeStamp::new(200), - Some((1, ri_provider.clone())), + Either::Right(ri_provider.clone()), ) .unwrap(); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); @@ -2069,7 +2262,7 @@ mod tests { .gc_keys( vec![Key::from_raw(b"k2")], safepoint.into(), - Some((1, ri_provider)), + Either::Right(ri_provider), ) .unwrap(); // The first batch will leave tombstones that will be seen while processing the @@ -2085,6 +2278,7 @@ mod tests { #[test] fn delete_range_when_worker_is_full() { + let store_id = 1; let engine = PrefixedEngine(TestEngineBuilder::new().build().unwrap()); must_prewrite_put(&engine, b"key", b"value", b"key", 10); must_commit(&engine, b"key", 10, 20); @@ -2096,12 +2290,16 @@ mod tests { gate.set_version("5.0.0").unwrap(); let (tx, _rx) = mpsc::channel(); + let mut region = Region::default(); + region.mut_peers().push(new_peer(store_id, 1)); + let mut gc_worker = GcWorker::new( engine.clone(), RaftStoreBlackHole, tx, GcConfig::default(), gate, + Arc::new(MockRegionInfoProvider::new(vec![region.clone()])), ); // Before starting gc_worker, fill the scheduler to full. @@ -2109,9 +2307,7 @@ mod tests { gc_worker .scheduler() .schedule(GcTask::Gc { - region_id: 0, - start_key: vec![], - end_key: vec![], + region: region.clone(), safe_point: TimeStamp::from(100), callback: Box::new(|_res| {}), }) @@ -2121,6 +2317,7 @@ mod tests { let (tx, rx) = mpsc::channel(); gc_worker .gc( + Region::default(), TimeStamp::from(1), Box::new(move |res| { tx.send(res).unwrap(); @@ -2143,11 +2340,74 @@ mod tests { ) .unwrap(); - gc_worker.start().unwrap(); + gc_worker.start(store_id).unwrap(); // After the worker starts running, the destroy range task should run, // and the key in the range will be deleted. rx.recv_timeout(Duration::from_secs(10)).unwrap().unwrap(); must_get_none(&engine, b"key", 30); } + + #[test] + fn test_keys_in_regions_iteration() { + fn init_region(start_key: &[u8], end_key: &[u8]) -> Region { + let start_key = Key::from_raw(start_key); + let end_key = Key::from_raw(end_key); + let mut region = Region::default(); + region.set_start_key(start_key.as_encoded().clone()); + region.set_end_key(end_key.as_encoded().clone()); + region + } + + fn generate_keys(start: u64, end: u64) -> Vec { + (start..end) + .into_iter() + .map(|i| { + let key = format!("k{:02}", i); + Key::from_raw(key.as_bytes()) + }) + .collect::>() + } + + // One region cover all keys + let keys = generate_keys(1, 4); + let region = init_region(b"k01", b"k04"); + let mut iter = keys.clone().into_iter().peekable(); + let ks = get_keys_in_region(&mut iter, ®ion); + assert!(iter.peek().is_none()); + assert_eq!(ks, keys); + + // More than one regions cover all keys + let keys = generate_keys(1, 9); + let region1 = init_region(b"k01", b"k04"); + let region2 = init_region(b"k04", b"k06"); + let region3 = init_region(b"k06", b"k09"); + let mut iter = keys.into_iter().peekable(); + let ks = get_keys_in_region(&mut iter, ®ion1); + assert_eq!(ks, generate_keys(1, 4)); + let ks = get_keys_in_region(&mut iter, ®ion2); + assert_eq!(ks, generate_keys(4, 6)); + let ks = get_keys_in_region(&mut iter, ®ion3); + assert_eq!(ks, generate_keys(6, 9)); + assert!(iter.peek().is_none()); + + // Cover partial keys + let keys = generate_keys(1, 9); + let region1 = init_region(b"k01", b"k04"); + let region2 = init_region(b"k06", b"k09"); + let mut iter = keys.into_iter().peekable(); + let ks = get_keys_in_region(&mut iter, ®ion1); + assert_eq!(ks, generate_keys(1, 4)); + let ks = get_keys_in_region(&mut iter, ®ion2); + assert_eq!(ks, generate_keys(6, 9)); + assert!(iter.peek().is_none()); + + // No key + let keys = generate_keys(1, 9); + let region = init_region(b"k11", b"k20"); + let mut iter = keys.into_iter().peekable(); + let ks = get_keys_in_region(&mut iter, ®ion); + assert!(iter.peek().is_none()); + assert!(ks.is_empty()); + } } diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index e50e33c1b38..652b2cc54ac 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -239,7 +239,6 @@ impl RawCompactionFilter { let task = GcTask::RawGcKeys { keys: mem::replace(&mut self.mvcc_deletions, empty), safe_point: self.safe_point.into(), - store_id: self.regions_provider.0, region_info_provider: self.regions_provider.1.clone(), }; self.schedule_gc_task(task, false); diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index 9443ba26cd4..85aedb4d538 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -18,7 +18,6 @@ use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; use kvproto::{ errorpb, kvrpcpb::{Context, IsolationLevel}, - metapb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, Request, Response}, }; use raft::{ @@ -37,6 +36,7 @@ use raftstore::{ }, }; use thiserror::Error; +use tikv_kv::write_modifies; use tikv_util::{codec::number::NumberEncoder, time::Instant}; use txn_types::{Key, TimeStamp, TxnExtra, TxnExtraScheduler, WriteBatchFlags}; @@ -44,8 +44,8 @@ use super::metrics::*; use crate::storage::{ self, kv, kv::{ - write_modifies, Callback, Engine, Error as KvError, ErrorInner as KvErrorInner, - ExtCallback, Modify, SnapContext, WriteData, + Callback, Engine, Error as KvError, ErrorInner as KvErrorInner, ExtCallback, Modify, + SnapContext, WriteData, }, }; @@ -202,10 +202,10 @@ where ) -> Result<()> { let mut header = self.new_request_header(ctx.pb_ctx); let mut flags = 0; - if ctx.pb_ctx.get_stale_read() && !ctx.start_ts.is_zero() { + if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { let mut data = [0u8; 8]; (&mut data[..]) - .encode_u64(ctx.start_ts.into_inner()) + .encode_u64(ctx.start_ts.unwrap_or_default().into_inner()) .unwrap(); flags |= WriteBatchFlags::STALE_READ.bits(); header.set_flag_data(data.into()); @@ -329,18 +329,6 @@ where self.engine.clone() } - fn snapshot_on_kv_engine(&self, start_key: &[u8], end_key: &[u8]) -> kv::Result { - let mut region = metapb::Region::default(); - region.set_start_key(start_key.to_owned()); - region.set_end_key(end_key.to_owned()); - // Use a fake peer to avoid panic. - region.mut_peers().push(Default::default()); - Ok(RegionSnapshot::::from_raw( - self.engine.clone(), - region, - )) - } - fn modify_on_kv_engine(&self, mut modifies: Vec) -> kv::Result<()> { for modify in &mut modifies { match modify { @@ -438,8 +426,9 @@ where let mut req = Request::default(); req.set_cmd_type(CmdType::Snap); - if !ctx.key_ranges.is_empty() && !ctx.start_ts.is_zero() { - req.mut_read_index().set_start_ts(ctx.start_ts.into_inner()); + if !ctx.key_ranges.is_empty() && ctx.start_ts.map_or(false, |ts| !ts.is_zero()) { + req.mut_read_index() + .set_start_ts(ctx.start_ts.as_ref().unwrap().into_inner()); req.mut_read_index() .set_key_ranges(mem::take(&mut ctx.key_ranges).into()); } diff --git a/src/server/server.rs b/src/server/server.rs index f202e30e761..23c52793c5f 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -421,7 +421,10 @@ mod tests { use engine_rocks::RocksSnapshot; use grpcio::EnvBuilder; use kvproto::raft_serverpb::RaftMessage; - use raftstore::store::{transport::Transport, *}; + use raftstore::{ + coprocessor::region_info_accessor::MockRegionInfoProvider, + store::{transport::Transport, *}, + }; use resource_metering::ResourceTagFactory; use security::SecurityConfig; use tikv_util::quota_limiter::QuotaLimiter; @@ -481,6 +484,7 @@ mod tests { // 'https_proxy', and retry. #[test] fn test_peer_resolve() { + let mock_store_id = 5; let cfg = Config { addr: "127.0.0.1:0".to_owned(), ..Default::default() @@ -507,8 +511,9 @@ mod tests { tx, Default::default(), Default::default(), + Arc::new(MockRegionInfoProvider::new(Vec::new())), ); - gc_worker.start().unwrap(); + gc_worker.start(mock_store_id).unwrap(); let quick_fail = Arc::new(AtomicBool::new(false)); let cfg = Arc::new(VersionTrack::new(cfg)); @@ -535,7 +540,6 @@ mod tests { .build() .unwrap(), ); - let mock_store_id = 5; let addr = Arc::new(Mutex::new(None)); let (check_leader_scheduler, _) = tikv_util::worker::dummy_scheduler(); let mut server = Server::new( diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0a7801848b9..dd9a1a01c33 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1179,7 +1179,7 @@ impl Storage { let mut snap_ctx = SnapContext { pb_ctx: &ctx, - start_ts, + start_ts: Some(start_ts), ..Default::default() }; let mut key_range = KeyRange::default(); @@ -2715,7 +2715,7 @@ fn prepare_snap_ctx<'a>( let mut snap_ctx = SnapContext { pb_ctx, - start_ts, + start_ts: Some(start_ts), ..Default::default() }; if need_check_locks_in_replica_read(pb_ctx) { @@ -2791,18 +2791,6 @@ impl Engine for TxnTestEngine { self.engine.kv_engine() } - fn snapshot_on_kv_engine( - &self, - start_key: &[u8], - end_key: &[u8], - ) -> tikv_kv::Result { - let snapshot = self.engine.snapshot_on_kv_engine(start_key, end_key)?; - Ok(TxnTestSnapshot { - snapshot, - txn_ext: self.txn_ext.clone(), - }) - } - fn modify_on_kv_engine(&self, modifies: Vec) -> tikv_kv::Result<()> { self.engine.modify_on_kv_engine(modifies) } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index eb83af270a1..3f2771f0b59 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -124,6 +124,9 @@ pub struct MvccReader { lock_cursor: Option>, write_cursor: Option>, + lower_bound: Option, + upper_bound: Option, + /// None means following operations are performed on a single user key, /// i.e., different versions of the same key. It can use prefix seek to /// speed up reads from the write-cf. @@ -149,6 +152,8 @@ impl MvccReader { data_cursor: None, lock_cursor: None, write_cursor: None, + lower_bound: None, + upper_bound: None, scan_mode, current_key: None, fill_cache, @@ -164,6 +169,8 @@ impl MvccReader { data_cursor: None, lock_cursor: None, write_cursor: None, + lower_bound: None, + upper_bound: None, scan_mode, current_key: None, fill_cache: !ctx.get_not_fill_cache(), @@ -421,6 +428,7 @@ impl MvccReader { let cursor = CursorBuilder::new(&self.snapshot, CF_DEFAULT) .fill_cache(self.fill_cache) .scan_mode(self.get_scan_mode(true)) + .range(self.lower_bound.clone(), self.upper_bound.clone()) .build()?; self.data_cursor = Some(cursor); } @@ -434,6 +442,7 @@ impl MvccReader { // Only use prefix seek in non-scan mode. .prefix_seek(self.scan_mode.is_none()) .scan_mode(self.get_scan_mode(true)) + .range(self.lower_bound.clone(), self.upper_bound.clone()) .hint_min_ts(hint_min_ts) .build()?; self.write_cursor = Some(cursor); @@ -446,6 +455,7 @@ impl MvccReader { let cursor = CursorBuilder::new(&self.snapshot, CF_LOCK) .fill_cache(self.fill_cache) .scan_mode(self.get_scan_mode(true)) + .range(self.lower_bound.clone(), self.upper_bound.clone()) .build()?; self.lock_cursor = Some(cursor); } @@ -676,6 +686,11 @@ impl MvccReader { None => OldValue::None, }) } + + pub fn set_range(&mut self, lower: Option, upper: Option) { + self.lower_bound = lower; + self.upper_bound = upper; + } } #[cfg(test)] diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 2af968c21be..7300074bfde 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -809,21 +809,27 @@ mod tests { fn get(&self, _: &Key) -> EngineResult> { Ok(None) } + fn get_cf(&self, _: CfName, _: &Key) -> EngineResult> { Ok(None) } + fn get_cf_opt(&self, _: ReadOptions, _: CfName, _: &Key) -> EngineResult> { Ok(None) } + fn iter(&self, _: CfName, _: IterOptions) -> EngineResult { Ok(MockRangeSnapshotIter::default()) } + fn lower_bound(&self) -> Option<&[u8]> { Some(self.start.as_slice()) } + fn upper_bound(&self) -> Option<&[u8]> { Some(self.end.as_slice()) } + fn ext(&self) -> DummySnapshotExt { DummySnapshotExt } diff --git a/tests/benches/hierarchy/storage/mod.rs b/tests/benches/hierarchy/storage/mod.rs index 3b906f0fffc..15873e2e424 100644 --- a/tests/benches/hierarchy/storage/mod.rs +++ b/tests/benches/hierarchy/storage/mod.rs @@ -13,7 +13,7 @@ use super::{BenchConfig, EngineFactory, DEFAULT_ITERATIONS}; fn storage_raw_get>(b: &mut Bencher<'_>, config: &BenchConfig) { let engine = config.engine_factory.build(); let store = SyncTestStorageBuilderApiV1::from_engine(engine) - .build() + .build(0) .unwrap(); b.iter_batched( || { @@ -37,7 +37,7 @@ fn storage_raw_get>(b: &mut Bencher<'_>, config: fn storage_prewrite>(b: &mut Bencher<'_>, config: &BenchConfig) { let engine = config.engine_factory.build(); let store = SyncTestStorageBuilderApiV1::from_engine(engine) - .build() + .build(0) .unwrap(); b.iter_batched( || { @@ -68,7 +68,7 @@ fn storage_prewrite>(b: &mut Bencher<'_>, config: fn storage_commit>(b: &mut Bencher<'_>, config: &BenchConfig) { let engine = config.engine_factory.build(); let store = SyncTestStorageBuilderApiV1::from_engine(engine) - .build() + .build(0) .unwrap(); b.iter_batched( || { diff --git a/tests/benches/misc/storage/incremental_get.rs b/tests/benches/misc/storage/incremental_get.rs index eb65f55fd72..a57bd3c90d5 100644 --- a/tests/benches/misc/storage/incremental_get.rs +++ b/tests/benches/misc/storage/incremental_get.rs @@ -11,7 +11,7 @@ use tikv::storage::{Engine, SnapshotStore, Statistics, Store}; use txn_types::{Key, Mutation}; fn table_lookup_gen_data() -> (SnapshotStore>, Vec) { - let store = SyncTestStorageBuilder::default().build().unwrap(); + let store = SyncTestStorageBuilder::default().build(0).unwrap(); let mut mutations = Vec::new(); let mut keys = Vec::new(); for i in 0..30000 { diff --git a/tests/benches/misc/storage/mvcc_reader.rs b/tests/benches/misc/storage/mvcc_reader.rs index df0f1d662d3..3e784ef6b73 100644 --- a/tests/benches/misc/storage/mvcc_reader.rs +++ b/tests/benches/misc/storage/mvcc_reader.rs @@ -7,7 +7,7 @@ use tikv::storage::{kv::RocksEngine, mvcc::SnapshotReader, Engine}; use txn_types::{Key, Mutation}; fn prepare_mvcc_data(key: &Key, n: u64) -> SyncTestStorageApiV1 { - let store = SyncTestStorageBuilderApiV1::default().build().unwrap(); + let store = SyncTestStorageBuilderApiV1::default().build(0).unwrap(); for ts in 1..=n { let mutation = Mutation::make_put(key.clone(), b"value".to_vec()); store diff --git a/tests/benches/misc/storage/scan.rs b/tests/benches/misc/storage/scan.rs index f17f61e1195..088ac013545 100644 --- a/tests/benches/misc/storage/scan.rs +++ b/tests/benches/misc/storage/scan.rs @@ -11,7 +11,7 @@ use txn_types::{Key, Mutation}; #[ignore] #[bench] fn bench_tombstone_scan(b: &mut Bencher) { - let store = SyncTestStorageBuilder::default().build().unwrap(); + let store = SyncTestStorageBuilder::default().build(0).unwrap(); let mut ts_generator = 1..; let mut kvs = KvGenerator::new(100, 1000); diff --git a/tests/failpoints/cases/test_gc_metrics.rs b/tests/failpoints/cases/test_gc_metrics.rs index ede14988744..c0f0d990f11 100644 --- a/tests/failpoints/cases/test_gc_metrics.rs +++ b/tests/failpoints/cases/test_gc_metrics.rs @@ -16,7 +16,9 @@ use kvproto::{ use pd_client::FeatureGate; use raft::StateRole; use raftstore::{ - coprocessor::{CoprocessorHost, RegionChangeEvent}, + coprocessor::{ + region_info_accessor::MockRegionInfoProvider, CoprocessorHost, RegionChangeEvent, + }, router::RaftStoreBlackHole, RegionInfoAccessor, }; @@ -128,6 +130,7 @@ fn test_txn_mvcc_filtered() { #[test] fn test_txn_gc_keys_handled() { + let store_id = 1; GC_COMPACTION_FILTER_MVCC_DELETION_MET.reset(); GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); @@ -143,8 +146,9 @@ fn test_txn_gc_keys_handled() { tx, GcConfig::default(), feature_gate, + Arc::new(MockRegionInfoProvider::new(vec![])), ); - gc_worker.start().unwrap(); + gc_worker.start(store_id).unwrap(); let mut r1 = Region::default(); r1.set_id(1); @@ -152,14 +156,18 @@ fn test_txn_gc_keys_handled() { r1.set_start_key(b"".to_vec()); r1.set_end_key(b"".to_vec()); r1.mut_peers().push(Peer::default()); - r1.mut_peers()[0].set_store_id(1); + r1.mut_peers()[0].set_store_id(store_id); let sp_provider = MockSafePointProvider(200); let mut host = CoprocessorHost::::default(); let ri_provider = RegionInfoAccessor::new(&mut host); let auto_gc_cfg = AutoGcConfig::new(sp_provider, ri_provider, 1); let safe_point = Arc::new(AtomicU64::new(500)); - gc_worker.start_auto_gc(auto_gc_cfg, safe_point).unwrap(); + + let kv_engine = engine.get_rocksdb(); + gc_worker + .start_auto_gc(&kv_engine, auto_gc_cfg, safe_point) + .unwrap(); host.on_region_changed(&r1, RegionChangeEvent::Create, StateRole::Leader); let db = engine.kv_engine().as_inner().clone(); @@ -267,6 +275,7 @@ fn test_raw_mvcc_filtered() { #[test] fn test_raw_gc_keys_handled() { + let store_id = 1; GC_COMPACTION_FILTER_MVCC_DELETION_MET.reset(); GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); @@ -285,8 +294,9 @@ fn test_raw_gc_keys_handled() { tx, GcConfig::default(), feature_gate, + Arc::new(MockRegionInfoProvider::new(vec![])), ); - gc_worker.start().unwrap(); + gc_worker.start(store_id).unwrap(); let mut r1 = Region::default(); r1.set_id(1); @@ -294,14 +304,18 @@ fn test_raw_gc_keys_handled() { r1.set_start_key(b"".to_vec()); r1.set_end_key(b"".to_vec()); r1.mut_peers().push(Peer::default()); - r1.mut_peers()[0].set_store_id(1); + r1.mut_peers()[0].set_store_id(store_id); let sp_provider = MockSafePointProvider(200); let mut host = CoprocessorHost::::default(); let ri_provider = RegionInfoAccessor::new(&mut host); - let auto_gc_cfg = AutoGcConfig::new(sp_provider, ri_provider, 1); + let auto_gc_cfg = AutoGcConfig::new(sp_provider, ri_provider, store_id); let safe_point = Arc::new(AtomicU64::new(500)); - gc_worker.start_auto_gc(auto_gc_cfg, safe_point).unwrap(); + + let kv_engine = engine.get_rocksdb(); + gc_worker + .start_auto_gc(&kv_engine, auto_gc_cfg, safe_point) + .unwrap(); host.on_region_changed(&r1, RegionChangeEvent::Create, StateRole::Leader); let db = engine.kv_engine().as_inner().clone(); diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index 73031b10283..9f80d942cd8 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -438,8 +438,10 @@ fn init_compaction_filter(cluster: &Cluster, store_id: u64) { let sim = cluster.sim.rl(); let gc_worker = sim.get_gc_worker(store_id); + let kv_engine = cluster.get_engine(store_id); gc_worker .start_auto_gc( + &kv_engine, AutoGcConfig::new(MockSafePointProvider, MockRegionInfoProvider, 1), Arc::new(AtomicU64::new(0)), ) diff --git a/tests/integrations/config/dynamic/gc_worker.rs b/tests/integrations/config/dynamic/gc_worker.rs index 3014ebc3ba2..e8b437f941a 100644 --- a/tests/integrations/config/dynamic/gc_worker.rs +++ b/tests/integrations/config/dynamic/gc_worker.rs @@ -1,8 +1,13 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::mpsc::channel, time::Duration}; +use std::{ + sync::{mpsc::channel, Arc}, + time::Duration, +}; -use raftstore::router::RaftStoreBlackHole; +use raftstore::{ + coprocessor::region_info_accessor::MockRegionInfoProvider, router::RaftStoreBlackHole, +}; use tikv::{ config::{ConfigController, Module, TikvConfig}, server::gc_worker::{GcConfig, GcTask, GcWorker}, @@ -34,8 +39,9 @@ fn setup_cfg_controller( tx, cfg.gc.clone(), Default::default(), + Arc::new(MockRegionInfoProvider::new(Vec::new())), ); - gc_worker.start().unwrap(); + gc_worker.start(0).unwrap(); let cfg_controller = ConfigController::new(cfg); cfg_controller.register(Module::Gc, Box::new(gc_worker.get_config_manager())); diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 80b90d78045..855063bae98 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -10,7 +10,7 @@ use std::{ }; use engine_rocks::RocksSnapshot; -use kvproto::metapb; +use kvproto::{kvrpcpb::Op, metapb}; use more_asserts::assert_le; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; @@ -828,3 +828,47 @@ fn test_node_local_read_renew_lease() { thread::sleep(request_wait); } } + +#[test] +fn test_stale_read_with_ts0() { + let mut cluster = new_server_cluster(0, 3); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.cfg.resolved_ts.enable = true; + cluster.run(); + + let leader = new_peer(1, 1); + cluster.must_transfer_leader(1, leader.clone()); + let mut leader_client = PeerClient::new(&cluster, 1, leader); + + let mut follower_client2 = PeerClient::new(&cluster, 1, new_peer(2, 2)); + + // Set the `stale_read` flag + leader_client.ctx.set_stale_read(true); + follower_client2.ctx.set_stale_read(true); + + let commit_ts1 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"key1"[..], &b"value1"[..])], + b"key1".to_vec(), + ); + + let commit_ts2 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"key1"[..], &b"value2"[..])], + b"key1".to_vec(), + ); + + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), commit_ts2); + assert!( + follower_client2 + .kv_read(b"key1".to_vec(), 0) + .region_error + .into_option() + .unwrap() + .not_leader + .is_some() + ); + assert!(leader_client.kv_read(b"key1".to_vec(), 0).not_found); +} diff --git a/tests/integrations/server/gc_worker.rs b/tests/integrations/server/gc_worker.rs index 59dc776dcca..36f9eed9ca8 100644 --- a/tests/integrations/server/gc_worker.rs +++ b/tests/integrations/server/gc_worker.rs @@ -271,7 +271,7 @@ fn test_applied_lock_collector() { // `keys::DATA_PREFIX`. This case ensures it's performed correctly. #[test] fn test_gc_bypass_raft() { - let (cluster, leader, ctx) = must_new_cluster_mul(1); + let (cluster, leader, ctx) = must_new_cluster_mul(2); cluster.pd_client.disable_default_operator(); let env = Arc::new(Environment::new(1)); @@ -300,17 +300,25 @@ fn test_gc_bypass_raft() { assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_some()); } - let gc_sched = cluster.sim.rl().get_gc_worker(1).scheduler(); - sync_gc(&gc_sched, 0, b"k1".to_vec(), b"k2".to_vec(), 200.into()).unwrap(); - - for &start_ts in &[10, 20, 30] { - let commit_ts = start_ts + 5; - let key = Key::from_raw(b"k1").append_ts(start_ts.into()); - let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_none()); - - let key = Key::from_raw(b"k1").append_ts(commit_ts.into()); - let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_none()); + let node_ids = cluster.get_node_ids(); + for store_id in node_ids { + let gc_sched = cluster.sim.rl().get_gc_worker(store_id).scheduler(); + + let mut region = cluster.get_region(b"a"); + region.set_start_key(b"k1".to_vec()); + region.set_end_key(b"k2".to_vec()); + sync_gc(&gc_sched, region, 200.into()).unwrap(); + + let engine = cluster.engines.get(&store_id).unwrap(); + for &start_ts in &[10, 20, 30] { + let commit_ts = start_ts + 5; + let key = Key::from_raw(b"k1").append_ts(start_ts.into()); + let key = data_key(key.as_encoded()); + assert!(engine.kv.get_value(&key).unwrap().is_none()); + + let key = Key::from_raw(b"k1").append_ts(commit_ts.into()); + let key = data_key(key.as_encoded()); + assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_none()); + } } } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index d60edf7bc97..6b2e52b8fee 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -552,7 +552,8 @@ fn test_mvcc_resolve_lock_gc_and_delete() { ts += 1; let gc_safe_ponit = TimeStamp::from(ts); let gc_scheduler = cluster.sim.rl().get_gc_worker(1).scheduler(); - sync_gc(&gc_scheduler, 0, vec![], vec![], gc_safe_ponit).unwrap(); + let region = cluster.get_region(&k); + sync_gc(&gc_scheduler, region, gc_safe_ponit).unwrap(); // the `k` at the old ts should be none. let get_version2 = commit_version + 1; diff --git a/tests/integrations/storage/test_raft_storage.rs b/tests/integrations/storage/test_raft_storage.rs index 98e60386884..58488cb91cd 100644 --- a/tests/integrations/storage/test_raft_storage.rs +++ b/tests/integrations/storage/test_raft_storage.rs @@ -294,7 +294,7 @@ fn test_auto_gc() { config.ratio_threshold = 0.9; let storage = SyncTestStorageBuilderApiV1::from_engine(engine.clone()) .gc_config(config) - .build() + .build(*id) .unwrap(); (*id, storage) @@ -312,7 +312,9 @@ fn test_auto_gc() { *id, ); cfg.post_a_round_of_gc = Some(Box::new(move || tx.send(()).unwrap())); - storage.start_auto_gc(cfg); + + let kv_engine = cluster.get_engine(*id); + storage.start_auto_gc(&kv_engine, cfg); } assert_eq!(storages.len(), count); diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 420f9bd7765..20a3e5ebeaf 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -259,7 +259,7 @@ fn test_read_on_replica_check_memory_locks() { range.set_start_key(encoded_key.as_encoded().to_vec()); let follower_snap_ctx = SnapContext { pb_ctx: &follower_ctx, - start_ts: 100.into(), + start_ts: Some(100.into()), key_ranges: vec![range], ..Default::default() }; diff --git a/tests/integrations/storage/test_region_info_accessor.rs b/tests/integrations/storage/test_region_info_accessor.rs index b42a0d4c15a..2df7238e1a9 100644 --- a/tests/integrations/storage/test_region_info_accessor.rs +++ b/tests/integrations/storage/test_region_info_accessor.rs @@ -176,3 +176,39 @@ fn test_region_collection_get_regions_in_range() { p.stop(); } } + +#[test] +fn test_region_collection_find_region_by_key() { + let mut cluster = new_node_cluster(0, 3); + + let (tx, rx) = channel(); + cluster + .sim + .wl() + .post_create_coprocessor_host(Box::new(move |id, host| { + let p = RegionInfoAccessor::new(host); + tx.send((id, p)).unwrap() + })); + + cluster.run(); + let region_info_providers: HashMap<_, _> = rx.try_iter().collect(); + assert_eq!(region_info_providers.len(), 3); + let regions = prepare_cluster(&mut cluster); + + for node_id in cluster.get_node_ids() { + let engine = ®ion_info_providers[&node_id]; + + let region = engine.find_region_by_key(b"").unwrap(); + assert_eq!(region, regions[0]); + + let region = engine.find_region_by_key(b"k2").unwrap(); + assert_eq!(region, regions[1]); + + let region = engine.find_region_by_key(b"k99").unwrap(); + assert_eq!(region, *regions.last().unwrap()); + } + + for (_, p) in region_info_providers { + p.stop(); + } +} diff --git a/tests/integrations/storage/test_storage.rs b/tests/integrations/storage/test_storage.rs index 21c9db6fe42..b0c60ae5aab 100644 --- a/tests/integrations/storage/test_storage.rs +++ b/tests/integrations/storage/test_storage.rs @@ -13,8 +13,12 @@ use std::{ use api_version::{dispatch_api_version, KvFormat}; use engine_traits::{CF_DEFAULT, CF_LOCK}; -use kvproto::kvrpcpb::{ApiVersion, Context, KeyRange, LockInfo}; +use kvproto::{ + kvrpcpb::{ApiVersion, Context, KeyRange, LockInfo}, + metapb, +}; use rand::random; +use test_raftstore::new_peer; use test_storage::*; use tikv::{ coprocessor::checksum_crc64_xor, @@ -680,9 +684,11 @@ fn test_store_resolve_with_illegal_tso() { fn test_txn_store_gc() { let key = "k"; let store = AssertionStorage::default(); - let (_cluster, raft_store) = AssertionStorageApiV1::new_raft_storage_with_store_count(3, key); - store.test_txn_store_gc(key); - raft_store.test_txn_store_gc(key); + let (cluster, raft_store) = AssertionStorageApiV1::new_raft_storage_with_store_count(3, key); + + let region = cluster.get_region(key.as_bytes()); + store.test_txn_store_gc(key, region.clone()); + raft_store.test_txn_store_gc(key, region); } fn test_txn_store_gc_multiple_keys(key_prefix_len: usize, n: usize) { @@ -698,7 +704,11 @@ pub fn test_txn_store_gc_multiple_keys_single_storage(n: usize, prefix: String) store.put_ok(k.as_bytes(), b"v1", 5, 10); store.put_ok(k.as_bytes(), b"v2", 15, 20); } - store.gc_ok(30); + + let store_id = 1; + let mut region = metapb::Region::default(); + region.mut_peers().push(new_peer(store_id, 0)); + store.gc_ok(region, 30); for k in &keys { store.get_none(k.as_bytes(), 15); } @@ -714,12 +724,12 @@ pub fn test_txn_store_gc_multiple_keys_cluster_storage(n: usize, prefix: String) } let mut last_region = cluster.get_region(b""); - store.gc_ok_for_cluster(&mut cluster, b"", 30); + store.gc_ok_for_cluster(&mut cluster, b"", last_region.clone(), 30); for k in &keys { // clear data whose commit_ts < 30 let region = cluster.get_region(k.as_bytes()); if last_region != region { - store.gc_ok_for_cluster(&mut cluster, k.as_bytes(), 30); + store.gc_ok_for_cluster(&mut cluster, k.as_bytes(), region.clone(), 30); last_region = region; } } From 5ae75c8faf93b496030aaf7576f3ca7e2df28b19 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Tue, 6 Sep 2022 02:44:55 -0400 Subject: [PATCH 0189/1149] copr: use manually written json path parser (#13317) close tikv/tikv#13316 Signed-off-by: YangKeao --- .../src/codec/mysql/json/path_expr.rs | 557 ++++++++++++++---- 1 file changed, 455 insertions(+), 102 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs b/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs index afb9cafff67..a760f748348 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs @@ -25,21 +25,13 @@ // select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]] // ``` -use std::ops::Index; - -use regex::Regex; +use std::{iter::Peekable, str::CharIndices}; use super::json_unquote::unquote_string; -use crate::codec::Result; +use crate::codec::{Error, Result}; pub const PATH_EXPR_ASTERISK: &str = "*"; -// [a-zA-Z_][a-zA-Z0-9_]* matches any identifier; -// "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped -// quotes. -const PATH_EXPR_LEG_RE_STR: &str = - r#"(\.\s*([a-zA-Z_][a-zA-Z0-9_]*|\*|"[^"\\]*(\\.[^"\\]*)*")|(\[\s*([0-9]+|\*)\s*\])|\*\*)"#; - #[derive(Clone, Debug, PartialEq)] pub enum PathLeg { /// `Key` indicates the path leg with '.key'. @@ -73,82 +65,334 @@ impl PathExpression { } } -/// Parses a JSON path expression. Returns a `PathExpression` -/// object which can be used in `JSON_EXTRACT`, `JSON_SET` and so on. -pub fn parse_json_path_expr(path_expr: &str) -> Result { - // Find the position of first '$'. If any no-blank characters in - // path_expr[0: dollarIndex], return an error. - let dollar_index = match path_expr.find('$') { - Some(i) => i, - None => return Err(box_err!("Invalid JSON path: {}", path_expr)), - }; - if path_expr - .index(0..dollar_index) - .char_indices() - .any(|(_, c)| !c.is_ascii_whitespace()) - { - return Err(box_err!("Invalid JSON path: {}", path_expr)); +/// `box_json_path_err` creates an error from the slice position +/// The position is added with 1, to count from 1 as start +macro_rules! box_json_path_err { + ($e:expr) => {{ + box_err!( + "Invalid JSON path expression. The error is around character position {}.", + ($e) + 1 + ) + }}; +} + +struct PathExpressionTokenizer<'a> { + input: &'a str, + + char_iterator: Peekable>, +} + +struct Position { + start: usize, + end: usize, +} + +/// PathExpressionToken represents a section in path expression and its position +enum PathExpressionToken { + Leg((PathLeg, Position)), + /// Represents the beginning "$" in the expression + Start(Position), +} + +impl<'a> Iterator for PathExpressionTokenizer<'a> { + type Item = Result; + + /// Next will try to parse the next path leg and return + /// If it returns None, it means the input is over. + /// If it returns Some(Err(..)), it means the format is error. + /// If it returns Some(Ok(..)), it represents the next token. + fn next(&mut self) -> Option> { + self.trim_white_spaces(); + // Trim all spaces at first + if self.reached_end() { + return None; + }; + + let (start, ch) = *self.char_iterator.peek().unwrap(); + match ch { + '$' => { + self.char_iterator.next(); + Some(Ok(PathExpressionToken::Start(Position { + start, + end: self.current_index(), + }))) + } + '.' => Some(self.next_key()), + '[' => Some(self.next_index()), + '*' => Some(self.next_double_asterisk()), + _ => Some(Err(box_json_path_err!(self.current_index()))), + } } +} - let expr = path_expr.index(dollar_index + 1..).trim_start(); +impl<'a> PathExpressionTokenizer<'a> { + fn new(input: &'a str) -> PathExpressionTokenizer<'a> { + PathExpressionTokenizer { + input, + char_iterator: input.char_indices().peekable(), + } + } - lazy_static::lazy_static! { - static ref RE: Regex = Regex::new(PATH_EXPR_LEG_RE_STR).unwrap(); + /// Returns the current index on the slice + fn current_index(&mut self) -> usize { + match self.char_iterator.peek() { + Some((start, _)) => *start, + None => self.input.len(), + } } - let mut legs = vec![]; - let mut flags = PathExpressionFlag::default(); - let mut last_end = 0; - for m in RE.find_iter(expr) { - let (start, end) = (m.start(), m.end()); - // Check all characters between two legs are blank. - if expr - .index(last_end..start) - .char_indices() - .any(|(_, c)| !c.is_ascii_whitespace()) - { - return Err(box_err!("Invalid JSON path: {}", path_expr)); + + /// `trim_while_spaces` removes following spaces + fn trim_white_spaces(&mut self) { + while self + .char_iterator + .next_if(|(_, ch)| ch.is_whitespace()) + .is_some() + {} + } + + /// Returns whether the input has reached the end + fn reached_end(&mut self) -> bool { + return self.char_iterator.peek().is_none(); + } + + fn next_key(&mut self) -> Result { + let (start, _) = self.char_iterator.next().unwrap(); + + self.trim_white_spaces(); + if self.reached_end() { + return Err(box_json_path_err!(self.current_index())); } - last_end = end; - - let next_char = expr.index(start..).chars().next().unwrap(); - if next_char == '[' { - // The leg is an index of a JSON array. - let leg = expr[start + 1..end].trim(); - let index_str = leg[0..leg.len() - 1].trim(); - let index = if index_str == PATH_EXPR_ASTERISK { - flags |= PATH_EXPRESSION_CONTAINS_ASTERISK; - PATH_EXPR_ARRAY_INDEX_ASTERISK - } else { - box_try!(index_str.parse::()) - }; - legs.push(PathLeg::Index(index)) - } else if next_char == '.' { - // The leg is a key of a JSON object. - let mut key = expr[start + 1..end].trim().to_owned(); - if key == PATH_EXPR_ASTERISK { - flags |= PATH_EXPRESSION_CONTAINS_ASTERISK; - } else if key.starts_with('"') { - // We need to unquote the origin string. - key = unquote_string(&key[1..key.len() - 1])?; + + match *self.char_iterator.peek().unwrap() { + (_, '*') => { + self.char_iterator.next().unwrap(); + + Ok(PathExpressionToken::Leg(( + PathLeg::Key(PATH_EXPR_ASTERISK.to_string()), + Position { + start, + end: self.current_index(), + }, + ))) + } + (mut key_start, '"') => { + // Skip this '"' character + key_start += 1; + self.char_iterator.next().unwrap(); + + // Next until the next '"' character + while self.char_iterator.next_if(|(_, ch)| *ch != '"').is_some() {} + + // Now, it's a '"' or the end + if self.char_iterator.peek().is_none() { + return Err(box_json_path_err!(self.current_index())); + } + + // `key_end` is the index of '"' + let key_end = self.current_index(); + self.char_iterator.next().unwrap(); + + let key = unquote_string(unsafe { self.input.get_unchecked(key_start..key_end) })?; + for ch in key.chars() { + // According to JSON standard, a string cannot + // contain any ASCII control characters + if ch.is_control() { + // TODO: add the concrete error location + // after unquote, we lost the map between + // the character and input position. + return Err(box_json_path_err!(key_start)); + } + } + + Ok(PathExpressionToken::Leg(( + PathLeg::Key(key), + Position { + start, + end: self.current_index(), + }, + ))) + } + (key_start, _) => { + // We have to also check the current value + while self + .char_iterator + .next_if(|(_, ch)| { + !(ch.is_whitespace() || *ch == '.' || *ch == '[' || *ch == '*') + }) + .is_some() + {} + + // Now it reaches the end or a whitespace/./[/* + let key_end = self.current_index(); + + // The start character is not available + if key_end == key_start { + return Err(box_json_path_err!(key_start)); + } + + let key = unsafe { self.input.get_unchecked(key_start..key_end) }.to_string(); + + // It's not quoted, we'll have to validate whether it's an available ECMEScript + // identifier + for (i, c) in key.char_indices() { + if i == 0 && c.is_ascii_digit() { + return Err(box_json_path_err!(key_start + i)); + } + if !c.is_ascii_alphanumeric() && c != '_' && c != '$' && c.is_ascii() { + return Err(box_json_path_err!(key_start + i)); + } + } + + Ok(PathExpressionToken::Leg(( + PathLeg::Key(key), + Position { + start, + end: key_end, + }, + ))) } - legs.push(PathLeg::Key(key)) - } else { - // The leg is '**'. - flags |= PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK; - legs.push(PathLeg::DoubleAsterisk); } } - // Check `!expr.is_empty()` here because "$" is a valid path to specify the - // current JSON. - if (last_end == 0) && (!expr.is_empty()) { - return Err(box_err!("Invalid JSON path: {}", path_expr)); + + fn next_index(&mut self) -> Result { + let (start, _) = self.char_iterator.next().unwrap(); + + self.trim_white_spaces(); + if self.reached_end() { + return Err(box_json_path_err!(self.current_index())); + } + + return match self.char_iterator.next().unwrap() { + (_, '*') => { + // Then it's a glob array index + self.trim_white_spaces(); + if self.reached_end() { + return Err(box_json_path_err!(self.current_index())); + } + + if self.char_iterator.next_if(|(_, ch)| *ch == ']').is_none() { + return Err(box_json_path_err!(self.current_index())); + } + + Ok(PathExpressionToken::Leg(( + PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK), + Position { + start, + end: self.current_index(), + }, + ))) + } + (number_start, '0'..='9') => { + // Then it's a number array index + while self + .char_iterator + .next_if(|(_, ch)| ch.is_ascii_digit()) + .is_some() + {} + let number_end = self.current_index(); + + self.trim_white_spaces(); + // now, it reaches the end of input, or reaches a non-digit character + match self.char_iterator.peek() { + Some((_, ']')) => {} + Some((pos, _)) => { + return Err(box_json_path_err!(pos)); + } + None => { + return Err(box_json_path_err!(self.current_index())); + } + } + self.char_iterator.next().unwrap(); + + let index = self.input[number_start..number_end] + .parse::() + .map_err(|_| -> Error { box_json_path_err!(number_end) })?; + Ok(PathExpressionToken::Leg(( + PathLeg::Index(index), + Position { + start, + end: self.current_index(), + }, + ))) + } + (pos, _) => Err(box_json_path_err!(pos)), + }; } - if !legs.is_empty() { - if let PathLeg::DoubleAsterisk = *legs.last().unwrap() { - // The last leg of a path expression cannot be '**'. - return Err(box_err!("Invalid JSON path: {}", path_expr)); + + fn next_double_asterisk(&mut self) -> Result { + let (start, _) = self.char_iterator.next().unwrap(); + + match self.char_iterator.next() { + Some((end, '*')) => { + // Three or more asterisks are not allowed + if let Some((pos, '*')) = self.char_iterator.peek() { + return Err(box_json_path_err!(pos)); + } + + Ok(PathExpressionToken::Leg(( + PathLeg::DoubleAsterisk, + Position { start, end }, + ))) + } + Some((pos, _)) => Err(box_json_path_err!(pos)), + None => Err(box_json_path_err!(self.current_index())), } } +} + +/// Parses a JSON path expression. Returns a `PathExpression` +/// object which can be used in `JSON_EXTRACT`, `JSON_SET` and so on. +pub fn parse_json_path_expr(path_expr: &str) -> Result { + let mut legs = Vec::new(); + let tokenizer = PathExpressionTokenizer::new(path_expr); + let mut flags = PathExpressionFlag::default(); + + let mut started = false; + let mut last_position = Position { start: 0, end: 0 }; + for (index, token) in tokenizer.enumerate() { + let token = token?; + + match token { + PathExpressionToken::Leg((leg, position)) => { + if !started { + return Err(box_json_path_err!(position.start)); + } + + match &leg { + PathLeg::Key(key) => { + if key == PATH_EXPR_ASTERISK { + flags |= PATH_EXPRESSION_CONTAINS_ASTERISK + } + } + PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK) => { + flags |= PATH_EXPRESSION_CONTAINS_ASTERISK + } + PathLeg::DoubleAsterisk => flags |= PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + _ => {} + } + + legs.push(leg.clone()); + last_position = position; + } + PathExpressionToken::Start(position) => { + started = true; + + if index != 0 { + return Err(box_json_path_err!(position.start)); + } + } + } + } + + // There is no available token + if !started { + return Err(box_json_path_err!(path_expr.len())); + } + // The last one cannot be the double asterisk + if !legs.is_empty() && legs.last().unwrap() == &PathLeg::DoubleAsterisk { + return Err(box_json_path_err!(last_position.end)); + } + Ok(PathExpression { legs, flags }) } @@ -175,7 +419,7 @@ mod tests { let mut test_cases = vec![ ( "$", - true, + None, Some(PathExpression { legs: vec![], flags: PathExpressionFlag::default(), @@ -183,23 +427,58 @@ mod tests { ), ( "$.a", - true, + None, Some(PathExpression { legs: vec![PathLeg::Key(String::from("a"))], flags: PathExpressionFlag::default(), }), ), + ( + "$ .a. $", + None, + Some(PathExpression { + legs: vec![ + PathLeg::Key(String::from("a")), + PathLeg::Key(String::from("$")), + ], + flags: PathExpressionFlag::default(), + }), + ), ( "$.\"hello world\"", - true, + None, Some(PathExpression { legs: vec![PathLeg::Key(String::from("hello world"))], flags: PathExpressionFlag::default(), }), ), ( - "$[0]", - true, + "$. \"你好 世界\" ", + None, + Some(PathExpression { + legs: vec![PathLeg::Key(String::from("你好 世界"))], + flags: PathExpressionFlag::default(), + }), + ), + ( + "$. ❤️ ", + None, + Some(PathExpression { + legs: vec![PathLeg::Key(String::from("❤️"))], + flags: PathExpressionFlag::default(), + }), + ), + ( + "$. 你好 ", + None, + Some(PathExpression { + legs: vec![PathLeg::Key(String::from("你好"))], + flags: PathExpressionFlag::default(), + }), + ), + ( + "$[ 0 ]", + None, Some(PathExpression { legs: vec![PathLeg::Index(0)], flags: PathExpressionFlag::default(), @@ -207,33 +486,107 @@ mod tests { ), ( "$**.a", - true, + None, Some(PathExpression { legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("a"))], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }), ), + ( + " $ ** . a", + None, + Some(PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("a"))], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }), + ), + ( + " $ ** . $", + None, + Some(PathExpression { + legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("$"))], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }), + ), // invalid path expressions - (".a", false, None), - ("xx$[1]", false, None), - ("$.a xx .b", false, None), - ("$[a]", false, None), - ("$.\"\\u33\"", false, None), - ("$**", false, None), + ( + " $ ** . 5", + Some("Invalid JSON path expression. The error is around character position 13."), + None, + ), + ( + ".a", + Some("Invalid JSON path expression. The error is around character position 1."), + None, + ), + ( + "xx$[1]", + Some("Invalid JSON path expression. The error is around character position 1."), + None, + ), + ( + "$.a xx .b", + Some("Invalid JSON path expression. The error is around character position 5."), + None, + ), + ( + "$[a]", + Some("Invalid JSON path expression. The error is around character position 3."), + None, + ), + ( + "$.\"\\u33\"", + // TODO: pass the position in the unquote unicode error + Some("Invalid unicode, byte len too short"), + None, + ), + ( + "$**", + Some("Invalid JSON path expression. The error is around character position 3."), + None, + ), + ( + "$.\"a\\t\"", + Some("Invalid JSON path expression. The error is around character position 4."), + None, + ), + ( + "$ .a $", + Some("Invalid JSON path expression. The error is around character position 6."), + None, + ), + ( + "$ [ 2147483648 ]", + Some("Invalid JSON path expression. The error is around character position 15."), + None, + ), ]; - for (i, (path_expr, no_error, expected)) in test_cases.drain(..).enumerate() { + for (i, (path_expr, error_message, expected)) in test_cases.drain(..).enumerate() { let r = parse_json_path_expr(path_expr); - if no_error { - assert!(r.is_ok(), "#{} expect parse ok but got err {:?}", i, r); - let got = r.unwrap(); - let expected = expected.unwrap(); - assert_eq!( - got, expected, - "#{} expect {:?} but got {:?}", - i, expected, got - ); - } else { - assert!(r.is_err(), "#{} expect error but got {:?}", i, r); + + match error_message { + Some(error_message) => { + assert!(r.is_err(), "#{} expect error but got {:?}", i, r); + + let got = r.err().unwrap().to_string(); + assert!( + got.contains(error_message), + "#{} error message {} should contain {}", + i, + got, + error_message + ) + } + None => { + assert!(r.is_ok(), "#{} expect parse ok but got err {:?}", i, r); + let got = r.unwrap(); + let expected = expected.unwrap(); + assert_eq!( + got, expected, + "#{} expect {:?} but got {:?}", + i, expected, got + ); + } } } } @@ -241,10 +594,10 @@ mod tests { #[test] fn test_parse_json_path_expr_contains_any_asterisk() { let mut test_cases = vec![ - ("$.a[b]", false), + ("$.a[0]", false), ("$.a[*]", true), - ("$.*[b]", true), - ("$**.a[b]", true), + ("$.*[0]", true), + ("$**.a[0]", true), ]; for (i, (path_expr, expected)) in test_cases.drain(..).enumerate() { let r = parse_json_path_expr(path_expr); From b55ef7215572e675fd6e30ad10e97e7024c3c469 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 6 Sep 2022 15:18:55 +0800 Subject: [PATCH 0190/1149] *: introduce the non-retryable error FlashbackInProgress (#13398) close tikv/tikv#13397 Introduce the non-retryable error `FlashbackInProgress` for the region in the flashback progress to reject any read or write. Signed-off-by: JmPotato --- Cargo.lock | 2 +- components/error_code/src/raftstore.rs | 5 +- components/raftstore/src/errors.rs | 4 +- etc/error_code.toml | 160 ++++++++++-------- .../integrations/raftstore/test_flashback.rs | 15 +- tests/integrations/server/kv_service.rs | 8 +- 6 files changed, 107 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0aa7586a608..2f9c09fa164 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2629,7 +2629,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#f95ac338b3312e0a9bd7c33c9647a87a74314567" +source = "git+https://github.com/pingcap/kvproto.git#2e37953b2b435961ad5b4f0e36b32c53f4777b23" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/raftstore.rs b/components/error_code/src/raftstore.rs index 29c4c3c1849..2fd0d168a14 100644 --- a/components/error_code/src/raftstore.rs +++ b/components/error_code/src/raftstore.rs @@ -30,8 +30,7 @@ define_error_codes!( DEADLINE_EXCEEDED => ("DeadlineExceeded", "", ""), PENDING_PREPARE_MERGE => ("PendingPrepareMerge", "", ""), RECOVERY_IN_PROGRESS => ("RecoveryInProgress", "", ""), - // TODO: add FLASHBACK in errorpb - FLASHBACK_IN_PROGRESS => ("RecoveryInProgress", "", ""), + FLASHBACK_IN_PROGRESS => ("FlashbackInProgress", "", ""), SNAP_ABORT => ("SnapAbort", "", ""), SNAP_TOO_MANY => ("SnapTooMany", "", ""), @@ -66,6 +65,8 @@ impl ErrorCodeExt for errorpb::Error { DATA_IS_NOT_READY } else if self.has_recovery_in_progress() { RECOVERY_IN_PROGRESS + } else if self.has_flashback_in_progress() { + FLASHBACK_IN_PROGRESS } else { UNKNOWN } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 1adaef08c3f..878ad6c2825 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -245,9 +245,9 @@ impl From for errorpb::Error { errorpb.set_recovery_in_progress(e); } Error::FlashbackInProgress(region_id) => { - let mut e = errorpb::RecoveryInProgress::default(); + let mut e = errorpb::FlashbackInProgress::default(); e.set_region_id(region_id); - errorpb.set_recovery_in_progress(e); + errorpb.set_flashback_in_progress(e); } _ => {} }; diff --git a/etc/error_code.toml b/etc/error_code.toml index 9a42cc3769a..7a6b956449f 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -1,11 +1,11 @@ -["KV:Cloud:IO"] +["KV:Cloud:Io"] error = ''' -KV:Cloud:IO +KV:Cloud:Io ''' -["KV:Cloud:SSL"] +["KV:Cloud:Ssl"] error = ''' -KV:Cloud:SSL +KV:Cloud:Ssl ''' ["KV:Cloud:Proto"] @@ -143,9 +143,9 @@ error = ''' KV:Encryption:Rocks ''' -["KV:Encryption:IO"] +["KV:Encryption:Io"] error = ''' -KV:Encryption:IO +KV:Encryption:Io ''' ["KV:Encryption:Crypter"] @@ -193,9 +193,9 @@ error = ''' KV:Engine:Protobuf ''' -["KV:Engine:IO"] +["KV:Engine:Io"] error = ''' -KV:Engine:IO +KV:Engine:Io ''' ["KV:Engine:CfName"] @@ -218,49 +218,54 @@ error = ''' KV:Engine:DataCompacted ''' -["KV:PD:IO"] +["KV:Pd:Io"] error = ''' -KV:PD:IO +KV:Pd:Io ''' -["KV:PD:ClusterBootstraped"] +["KV:Pd:ClusterBootstraped"] error = ''' -KV:PD:ClusterBootstraped +KV:Pd:ClusterBootstraped ''' -["KV:PD:ClusterNotBootstraped"] +["KV:Pd:ClusterNotBootstraped"] error = ''' -KV:PD:ClusterNotBootstraped +KV:Pd:ClusterNotBootstraped ''' -["KV:PD:Imcompatible"] +["KV:Pd:Imcompatible"] error = ''' -KV:PD:Imcompatible +KV:Pd:Imcompatible ''' -["KV:PD:gRPC"] +["KV:Pd:Grpc"] error = ''' -KV:PD:gRPC +KV:Pd:Grpc ''' -["KV:PD:RegionNotFound"] +["KV:Pd:StreamDisconnect"] error = ''' -KV:PD:RegionNotFound +KV:Pd:StreamDisconnect ''' -["KV:PD:StoreTombstone"] +["KV:Pd:RegionNotFound"] error = ''' -KV:PD:StoreTombstone +KV:Pd:RegionNotFound ''' -["KV:PD:GlobalConfigNotFound"] +["KV:Pd:StoreTombstone"] error = ''' -KV:PD:GlobalConfigNotFound +KV:Pd:StoreTombstone ''' -["KV:PD:Unknown"] +["KV:Pd:GlobalConfigNotFound"] error = ''' -KV:PD:Unknown +KV:Pd:GlobalConfigNotFound +''' + +["KV:Pd:Unknown"] +error = ''' +KV:Pd:Unknown ''' ["KV:Raft:Io"] @@ -373,9 +378,9 @@ error = ''' KV:Raftstore:Coprocessor ''' -["KV:Raftstore:IO"] +["KV:Raftstore:Io"] error = ''' -KV:Raftstore:IO +KV:Raftstore:Io ''' ["KV:Raftstore:Protobuf"] @@ -428,6 +433,11 @@ error = ''' KV:Raftstore:RecoveryInProgress ''' +["KV:Raftstore:FlashbackInProgress"] +error = ''' +KV:Raftstore:FlashbackInProgress +''' + ["KV:Raftstore:SnapAbort"] error = ''' KV:Raftstore:SnapAbort @@ -443,94 +453,99 @@ error = ''' KV:Raftstore:SnapUnknown ''' -["KV:SSTImporter:Io"] +["KV:SstImporter:Io"] error = ''' -KV:SSTImporter:Io +KV:SstImporter:Io ''' -["KV:SSTImporter:gRPC"] +["KV:SstImporter:Grpc"] error = ''' -KV:SSTImporter:gRPC +KV:SstImporter:Grpc ''' -["KV:SSTImporter:Uuid"] +["KV:SstImporter:Uuid"] error = ''' -KV:SSTImporter:Uuid +KV:SstImporter:Uuid ''' -["KV:SSTImporter:Future"] +["KV:SstImporter:Future"] error = ''' -KV:SSTImporter:Future +KV:SstImporter:Future ''' -["KV:SSTImporter:RocksDb"] +["KV:SstImporter:RocksDb"] error = ''' -KV:SSTImporter:RocksDb +KV:SstImporter:RocksDb ''' -["KV:SSTImporter:ParseIntError"] +["KV:SstImporter:ParseIntError"] error = ''' -KV:SSTImporter:ParseIntError +KV:SstImporter:ParseIntError ''' -["KV:SSTImporter:FileExists"] +["KV:SstImporter:FileExists"] error = ''' -KV:SSTImporter:FileExists +KV:SstImporter:FileExists ''' -["KV:SSTImporter:FileCorrupted"] +["KV:SstImporter:FileCorrupted"] error = ''' -KV:SSTImporter:FileCorrupted +KV:SstImporter:FileCorrupted ''' -["KV:SSTImporter:InvalidSstPath"] +["KV:SstImporter:InvalidSstPath"] error = ''' -KV:SSTImporter:InvalidSstPath +KV:SstImporter:InvalidSstPath ''' -["KV:SSTImporter:InvalidChunk"] +["KV:SstImporter:InvalidChunk"] error = ''' -KV:SSTImporter:InvalidChunk +KV:SstImporter:InvalidChunk ''' -["KV:SSTImporter:Engine"] +["KV:SstImporter:Engine"] error = ''' -KV:SSTImporter:Engine +KV:SstImporter:Engine ''' -["KV:SSTImporter:CannotReadExternalStorage"] +["KV:SstImporter:CannotReadExternalStorage"] error = ''' -KV:SSTImporter:CannotReadExternalStorage +KV:SstImporter:CannotReadExternalStorage ''' -["KV:SSTImporter:WrongKeyPrefix"] +["KV:SstImporter:WrongKeyPrefix"] error = ''' -KV:SSTImporter:WrongKeyPrefix +KV:SstImporter:WrongKeyPrefix ''' -["KV:SSTImporter:BadFormat"] +["KV:SstImporter:BadFormat"] error = ''' -KV:SSTImporter:BadFormat +KV:SstImporter:BadFormat ''' -["KV:SSTImporter:FileConflict"] +["KV:SstImporter:FileConflict"] error = ''' -KV:SSTImporter:FileConflict +KV:SstImporter:FileConflict ''' -["KV:SSTImporter:TtlNotEnabled"] +["KV:SstImporter:TtlNotEnabled"] error = ''' -KV:SSTImporter:TtlNotEnabled +KV:SstImporter:TtlNotEnabled ''' -["KV:SSTImporter:TtlLenNotEqualsToPairs"] +["KV:SstImporter:TtlLenNotEqualsToPairs"] error = ''' -KV:SSTImporter:TtlLenNotEqualsToPairs +KV:SstImporter:TtlLenNotEqualsToPairs ''' -["KV:SSTImporter:IncompatibleApiVersion"] +["KV:SstImporter:IncompatibleApiVersion"] error = ''' -KV:SSTImporter:IncompatibleApiVersion +KV:SstImporter:IncompatibleApiVersion +''' + +["KV:SstImporter:InvalidKeyMode"] +error = ''' +KV:SstImporter:InvalidKeyMode ''' ["KV:Storage:Timeout"] @@ -568,14 +583,14 @@ error = ''' KV:Storage:KeyTooLarge ''' -["KV:Storage:InvalidCF"] +["KV:Storage:InvalidCf"] error = ''' -KV:Storage:InvalidCF +KV:Storage:InvalidCf ''' -["KV:Storage:CFDeprecated"] +["KV:Storage:CfDeprecated"] error = ''' -KV:Storage:CFDeprecated +KV:Storage:CfDeprecated ''' ["KV:Storage:TtlNotEnabled"] @@ -593,9 +608,9 @@ error = ''' KV:Storage:Protobuf ''' -["KV:Storage:INVALIDTXNTSO"] +["KV:Storage:InvalidTxnTso"] error = ''' -KV:Storage:INVALIDTXNTSO +KV:Storage:InvalidTxnTso ''' ["KV:Storage:InvalidReqRange"] @@ -708,6 +723,11 @@ error = ''' KV:Storage:AssertionFailed ''' +["KV:Storage:LockIfExistsFailed"] +error = ''' +KV:Storage:LockIfExistsFailed +''' + ["KV:Storage:Unknown"] error = ''' KV:Storage:Unknown diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index e4d0276f9e6..cf91873d385 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -50,10 +50,9 @@ fn test_flashback_for_schedule() { .call_command_on_leader(transfer_leader, Duration::from_secs(3)) .unwrap(); let e = resp.get_header().get_error(); - // reuse recovery_in_progress error code. assert_eq!( - e.get_recovery_in_progress(), - &kvproto::errorpb::RecoveryInProgress { + e.get_flashback_in_progress(), + &kvproto::errorpb::FlashbackInProgress { region_id: region.get_id(), ..Default::default() } @@ -94,7 +93,7 @@ fn test_flahsback_for_write() { // write will be blocked let value = vec![1_u8; 8096]; - must_get_error_recovery_in_progress(&mut cluster, ®ion, new_put_cmd(b"k1", &value)); + must_get_error_flashback_in_progress(&mut cluster, ®ion, new_put_cmd(b"k1", &value)); must_cmd_add_flashback_flag( &mut cluster, @@ -123,7 +122,7 @@ fn test_flahsback_for_read() { block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); // read will be blocked - must_get_error_recovery_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); + must_get_error_flashback_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); // verify the read can be executed if add flashback flag in request's // header. @@ -263,7 +262,7 @@ fn must_cmd_add_flashback_flag( assert!(!resp.get_header().has_error()); } -fn must_get_error_recovery_in_progress( +fn must_get_error_flashback_in_progress( cluster: &mut Cluster, region: &metapb::Region, cmd: kvproto::raft_cmdpb::Request, @@ -277,8 +276,8 @@ fn must_get_error_recovery_in_progress( Ok(_) => {} Err(e) => { assert_eq!( - e.get_recovery_in_progress(), - &kvproto::errorpb::RecoveryInProgress { + e.get_flashback_in_progress(), + &kvproto::errorpb::FlashbackInProgress { region_id: region.get_id(), ..Default::default() } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 6b2e52b8fee..70c7f9bda4c 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -696,7 +696,7 @@ fn test_mvcc_flashback_block_rw() { get_req.key = k.clone(); get_req.version = 1; let get_resp = client.kv_get(&get_req).unwrap(); - assert!(get_resp.get_region_error().has_recovery_in_progress()); + assert!(get_resp.get_region_error().has_flashback_in_progress()); assert!(!get_resp.has_error()); assert!(get_resp.value.is_empty()); // Scan @@ -706,7 +706,7 @@ fn test_mvcc_flashback_block_rw() { scan_req.limit = 1; scan_req.version = 1; let scan_resp = client.kv_scan(&scan_req).unwrap(); - assert!(scan_resp.get_region_error().has_recovery_in_progress()); + assert!(scan_resp.get_region_error().has_flashback_in_progress()); assert!(scan_resp.pairs.is_empty()); // Try to write. // Prewrite @@ -715,7 +715,7 @@ fn test_mvcc_flashback_block_rw() { mutation.set_key(k.clone()); mutation.set_value(v); let prewrite_resp = try_kv_prewrite(&client, ctx, vec![mutation], k, 1); - assert!(prewrite_resp.get_region_error().has_recovery_in_progress()); + assert!(prewrite_resp.get_region_error().has_flashback_in_progress()); fail::remove("skip_finish_flashback_to_version"); } @@ -741,7 +741,7 @@ fn test_mvcc_flashback_block_scheduling() { transfer_leader_resp .get_header() .get_error() - .has_recovery_in_progress() + .has_flashback_in_progress() ); fail::remove("skip_finish_flashback_to_version"); } From bcaa663c614f044fcfb596c555cac0f152f35d3e Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 6 Sep 2022 15:42:55 +0800 Subject: [PATCH 0191/1149] cloud: add retry on web identity credentials (#13343) close tikv/tikv#13122 Signed-off-by: 3pointer Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/cloud/aws/Cargo.toml | 1 + components/cloud/aws/src/s3.rs | 22 ++--- components/cloud/aws/src/util.rs | 134 +++++++++++++++++++++++++---- components/cloud/src/metrics.rs | 6 ++ components/tikv_util/src/stream.rs | 56 +++++++++++- 6 files changed, 191 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2f9c09fa164..f9dc0e6c418 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,6 +279,7 @@ dependencies = [ "tikv_util", "tokio", "url", + "uuid", ] [[package]] diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 299192e9ca3..314e2281425 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -36,6 +36,7 @@ url = "2.0" thiserror = "1.0" lazy_static = "1.3" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +uuid = "0.8" [dev-dependencies] futures = "0.3" diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 991ae154427..3e9c3665f58 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -17,14 +17,10 @@ use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; use rusoto_s3::{util::AddressingStyle, *}; use thiserror::Error; -use tikv_util::{ - debug, - stream::{error_stream, retry}, - time::Instant, -}; +use tikv_util::{debug, stream::error_stream, time::Instant}; use tokio::time::{sleep, timeout}; -use crate::util; +use crate::util::{self, retry_and_count}; const CONNECTION_TIMEOUT: Duration = Duration::from_secs(900); pub const STORAGE_VENDOR_NAME_AWS: &str = "aws"; @@ -311,11 +307,11 @@ impl<'client> S3Uploader<'client> { // For short files, execute one put_object to upload the entire thing. let mut data = Vec::with_capacity(est_len as usize); reader.read_to_end(&mut data).await?; - retry(|| self.upload(&data)).await?; + retry_and_count(|| self.upload(&data), "upload_small_file").await?; Ok(()) } else { // Otherwise, use multipart upload to improve robustness. - self.upload_id = retry(|| self.begin()).await?; + self.upload_id = retry_and_count(|| self.begin(), "begin_upload").await?; let upload_res = async { let mut buf = vec![0; self.multi_part_size]; let mut part_number = 1; @@ -324,7 +320,11 @@ impl<'client> S3Uploader<'client> { if data_size == 0 { break; } - let part = retry(|| self.upload_part(part_number, &buf[..data_size])).await?; + let part = retry_and_count( + || self.upload_part(part_number, &buf[..data_size]), + "upload_part", + ) + .await?; self.parts.push(part); part_number += 1; } @@ -333,9 +333,9 @@ impl<'client> S3Uploader<'client> { .await; if upload_res.is_ok() { - retry(|| self.complete()).await?; + retry_and_count(|| self.complete(), "complete_upload").await?; } else { - let _ = retry(|| self.abort()).await; + let _ = retry_and_count(|| self.abort(), "abort_upload").await; } upload_res } diff --git a/components/cloud/aws/src/util.rs b/components/cloud/aws/src/util.rs index c4ff356f462..a2dc1ca8c76 100644 --- a/components/cloud/aws/src/util.rs +++ b/components/cloud/aws/src/util.rs @@ -3,6 +3,8 @@ use std::io::{self, Error, ErrorKind}; use async_trait::async_trait; +use cloud::metrics; +use futures::{future::TryFutureExt, Future}; use rusoto_core::{ region::Region, request::{HttpClient, HttpConfig}, @@ -11,10 +13,36 @@ use rusoto_credential::{ AutoRefreshingProvider, AwsCredentials, ChainProvider, CredentialsError, ProvideAwsCredentials, }; use rusoto_sts::WebIdentityProvider; +use tikv_util::{ + stream::{retry_ext, RetryError, RetryExt}, + warn, +}; #[allow(dead_code)] // This will be used soon, please remove the allow. const READ_BUF_SIZE: usize = 1024 * 1024 * 2; +const AWS_WEB_IDENTITY_TOKEN_FILE: &str = "AWS_WEB_IDENTITY_TOKEN_FILE"; +struct CredentialsErrorWrapper(CredentialsError); + +impl From for CredentialsError { + fn from(c: CredentialsErrorWrapper) -> CredentialsError { + c.0 + } +} + +impl std::fmt::Display for CredentialsErrorWrapper { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.message)?; + Ok(()) + } +} + +impl RetryError for CredentialsErrorWrapper { + fn is_retryable(&self) -> bool { + true + } +} + pub fn new_http_client() -> io::Result { let mut http_config = HttpConfig::new(); // This can greatly improve performance dealing with payloads greater @@ -49,6 +77,22 @@ pub fn get_region(region: &str, endpoint: &str) -> io::Result { } } +pub async fn retry_and_count(action: G, name: &'static str) -> Result +where + G: FnMut() -> F, + F: Future>, + E: RetryError + std::fmt::Display, +{ + let id = uuid::Uuid::new_v4(); + retry_ext( + action, + RetryExt::default().with_fail_hook(move |err: &E| { + warn!("aws request meet error."; "err" => %err, "retry?" => %err.is_retryable(), "context" => %name, "uuid" => %id); + metrics::CLOUD_ERROR_VEC.with_label_values(&["aws", name]).inc(); + }), + ).await +} + pub struct CredentialsProvider(AutoRefreshingProvider); impl CredentialsProvider { @@ -92,21 +136,81 @@ impl Default for DefaultCredentialsProvider { #[async_trait] impl ProvideAwsCredentials for DefaultCredentialsProvider { async fn credentials(&self) -> Result { - // Prefer the web identity provider first for the kubernetes environment. - // Search for both in parallel. - let web_creds = self.web_identity_provider.credentials(); - let def_creds = self.default_provider.credentials(); - let k8s_error = match web_creds.await { - res @ Ok(_) => return res, - Err(e) => e, - }; - let def_error = match def_creds.await { - res @ Ok(_) => return res, - Err(e) => e, + // use web identity provider first for the kubernetes environment. + let cred = if std::env::var(AWS_WEB_IDENTITY_TOKEN_FILE).is_ok() { + // we need invoke assume_role in web identity provider + // this API may failed sometimes. + // according to AWS experience, it's better to retry it with 10 times + // exponential backoff for every error, because we cannot + // distinguish the error type. + retry_and_count( + || { + #[cfg(test)] + fail::fail_point!("cred_err", |_| { + Box::pin(futures::future::err(CredentialsErrorWrapper( + CredentialsError::new("injected error"), + ))) + as std::pin::Pin + Send>> + }); + let res = self + .web_identity_provider + .credentials() + .map_err(|e| CredentialsErrorWrapper(e)); + #[cfg(test)] + return Box::pin(res); + #[cfg(not(test))] + res + }, + "get_cred_over_the_cloud", + ) + .await + .map_err(|e| e.0) + } else { + // Add exponential backoff for every error, because we cannot + // distinguish the error type. + retry_and_count( + || { + self.default_provider + .credentials() + .map_err(|e| CredentialsErrorWrapper(e)) + }, + "get_cred_on_premise", + ) + .await + .map_err(|e| e.0) }; - Err(CredentialsError::new(format_args!( - "Couldn't find AWS credentials in default sources ({}) or k8s environment ({}).", - def_error.message, k8s_error.message, - ))) + + cred.map_err(|e| { + CredentialsError::new(format_args!( + "Couldn't find AWS credentials in sources ({}).", + e.message + )) + }) + } +} + +#[cfg(test)] +mod tests { + #[allow(unused_imports)] + use super::*; + + #[cfg(feature = "failpoints")] + #[tokio::test] + async fn test_default_provider() { + let default_provider = DefaultCredentialsProvider::default(); + std::env::set_var(AWS_WEB_IDENTITY_TOKEN_FILE, "tmp"); + // mock k8s env with web_identitiy_provider + fail::cfg("cred_err", "return").unwrap(); + fail::cfg("retry_count", "return(1)").unwrap(); + let res = default_provider.credentials().await; + assert_eq!(res.is_err(), true); + assert_eq!( + res.err().unwrap().message, + "Couldn't find AWS credentials in sources (injected error)." + ); + fail::remove("cred_err"); + fail::remove("retry_count"); + + std::env::remove_var(AWS_WEB_IDENTITY_TOKEN_FILE); } } diff --git a/components/cloud/src/metrics.rs b/components/cloud/src/metrics.rs index e115abe0853..58e267a56fa 100644 --- a/components/cloud/src/metrics.rs +++ b/components/cloud/src/metrics.rs @@ -10,4 +10,10 @@ lazy_static! { &["cloud", "req"] ) .unwrap(); + pub static ref CLOUD_ERROR_VEC: IntCounterVec = register_int_counter_vec!( + "tikv_cloud_error_count", + "Total number of credentail errors from EKS env", + &["cloud", "error"] + ) + .unwrap(); } diff --git a/components/tikv_util/src/stream.rs b/components/tikv_util/src/stream.rs index b7ba46c45bf..8f892659f68 100644 --- a/components/tikv_util/src/stream.rs +++ b/components/tikv_util/src/stream.rs @@ -96,19 +96,69 @@ pub trait RetryError { /// /// Since rusoto does not have transparent auto-retry /// (), we need to implement this manually. -pub async fn retry(mut action: G) -> Result +pub async fn retry(action: G) -> Result +where + G: FnMut() -> F, + F: Future>, + E: RetryError, +{ + retry_ext(action, RetryExt::default()).await +} + +/// The extra configuration for retry. +pub struct RetryExt { + // NOTE: we can move `MAX_RETRY_DELAY` and `MAX_RETRY_TIMES` + // to here, for making the retry more configurable. + // However those are constant for now and no place for configure them. + on_failure: Option>, +} + +impl RetryExt { + /// Attaches the failure hook to the ext. + pub fn with_fail_hook(mut self, f: F) -> Self + where + F: FnMut(&E) + Send + Sync + 'static, + { + self.on_failure = Some(Box::new(f)); + self + } +} + +// If we use the default derive macro, it would complain that `E` isn't +// `Default` :( +impl Default for RetryExt { + fn default() -> Self { + Self { + on_failure: Default::default(), + } + } +} + +/// Retires a future execution. Comparing to `retry`, this version allows more +/// configurations. +pub async fn retry_ext(mut action: G, mut ext: RetryExt) -> Result where G: FnMut() -> F, F: Future>, E: RetryError, { const MAX_RETRY_DELAY: Duration = Duration::from_secs(32); - const MAX_RETRY_TIMES: usize = 4; + const MAX_RETRY_TIMES: usize = 14; + let max_retry_times = (|| { + fail::fail_point!("retry_count", |t| t + .and_then(|v| v.parse::().ok()) + .unwrap_or(MAX_RETRY_TIMES)); + MAX_RETRY_TIMES + })(); + let mut retry_wait_dur = Duration::from_secs(1); let mut final_result = action().await; - for _ in 1..MAX_RETRY_TIMES { + for _ in 1..max_retry_times { if let Err(e) = &final_result { + if let Some(ref mut f) = ext.on_failure { + f(e); + } if e.is_retryable() { let backoff = thread_rng().gen_range(0..1000); sleep(retry_wait_dur + Duration::from_millis(backoff)).await; From cbf85c11fda31c808014786c0d67a436a8cb63ed Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 6 Sep 2022 17:48:55 +0800 Subject: [PATCH 0192/1149] raftstore: optimize region destroy (#13384) close tikv/tikv#12421 Optimize the performance of merging empty regions Signed-off-by: tabokie --- components/engine_panic/src/misc.rs | 4 - components/engine_rocks/src/misc.rs | 82 ++-- components/engine_traits/src/misc.rs | 23 +- components/raftstore/src/store/fsm/store.rs | 10 +- .../raftstore/src/store/worker/region.rs | 349 +++++++++--------- tests/integrations/storage/test_titan.rs | 8 +- 6 files changed, 230 insertions(+), 246 deletions(-) diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 5a78ea66e5a..82012b84ed6 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -34,10 +34,6 @@ impl MiscExt for PanicEngine { panic!() } - fn roughly_cleanup_ranges(&self, ranges: &[(Vec, Vec)]) -> Result<()> { - panic!() - } - fn path(&self) -> &str { panic!() } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 7cf5d771486..e7c9ef547d8 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -4,7 +4,6 @@ use engine_traits::{ CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, Range, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, ALL_CFS, }; -use rocksdb::Range as RocksRange; use tikv_util::{box_try, keybuilder::KeyBuilder}; use crate::{ @@ -29,17 +28,6 @@ impl RocksEngine { ) -> Result<()> { let mut ranges = ranges.to_owned(); ranges.sort_by(|a, b| a.start_key.cmp(b.start_key)); - let max_end_key = ranges - .iter() - .fold(ranges[0].end_key, |x, y| std::cmp::max(x, y.end_key)); - let start = KeyBuilder::from_slice(ranges[0].start_key, 0, 0); - let end = KeyBuilder::from_slice(max_end_key, 0, 0); - let mut opts = IterOptions::new(Some(start), Some(end), false); - if self.is_titan() { - // Cause DeleteFilesInRange may expose old blob index keys, setting key only for - // Titan to avoid referring to missing blob files. - opts.set_key_only(true); - } let mut writer_wrapper: Option = None; let mut data: Vec> = vec![]; @@ -55,7 +43,17 @@ impl RocksEngine { } last_end_key = Some(r.end_key.to_owned()); - let mut it = self.iterator_opt(cf, opts.clone())?; + let mut opts = IterOptions::new( + Some(KeyBuilder::from_slice(r.start_key, 0, 0)), + Some(KeyBuilder::from_slice(r.end_key, 0, 0)), + false, + ); + if self.is_titan() { + // Cause DeleteFilesInRange may expose old blob index keys, setting key only for + // Titan to avoid referring to missing blob files. + opts.set_key_only(true); + } + let mut it = self.iterator_opt(cf, opts)?; let mut it_valid = it.seek(r.start_key)?; while it_valid { if it.key() >= r.end_key { @@ -225,29 +223,6 @@ impl MiscExt for RocksEngine { Ok(used_size) } - fn roughly_cleanup_ranges(&self, ranges: &[(Vec, Vec)]) -> Result<()> { - let db = self.as_inner(); - let mut delete_ranges = Vec::new(); - for &(ref start, ref end) in ranges { - if start == end { - continue; - } - assert!(start < end); - delete_ranges.push(RocksRange::new(start, end)); - } - if delete_ranges.is_empty() { - return Ok(()); - } - - for cf in db.cf_names() { - let handle = util::get_cf_handle(db, cf)?; - db.delete_files_in_ranges_cf(handle, &delete_ranges, /* include_end */ false) - .map_err(r2e)?; - } - - Ok(()) - } - fn path(&self) -> &str { self.as_inner().path() } @@ -363,13 +338,9 @@ mod tests { } } - fn test_delete_all_in_range( - strategy: DeleteStrategy, - origin_keys: &[Vec], - ranges: &[Range<'_>], - ) { + fn test_delete_ranges(strategy: DeleteStrategy, origin_keys: &[Vec], ranges: &[Range<'_>]) { let path = Builder::new() - .prefix("engine_delete_all_in_range") + .prefix("engine_delete_ranges") .tempdir() .unwrap(); let path_str = path.path().to_str().unwrap(); @@ -399,8 +370,7 @@ mod tests { wb.write().unwrap(); check_data(&db, ALL_CFS, kvs.as_slice()); - // Delete all in ranges. - db.delete_all_in_range(strategy, ranges).unwrap(); + db.delete_ranges_cfs(strategy, ranges).unwrap(); let mut kvs_left: Vec<_> = kvs; for r in ranges { @@ -419,25 +389,25 @@ mod tests { b"k4".to_vec(), ]; // Single range. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByRange, &data, &[Range::new(b"k1", b"k4")], ); // Two ranges without overlap. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByRange, &data, &[Range::new(b"k0", b"k1"), Range::new(b"k3", b"k4")], ); // Two ranges with overlap. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByRange, &data, &[Range::new(b"k1", b"k3"), Range::new(b"k2", b"k4")], ); // One range contains the other range. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByRange, &data, &[Range::new(b"k1", b"k4"), Range::new(b"k2", b"k3")], @@ -454,25 +424,25 @@ mod tests { b"k4".to_vec(), ]; // Single range. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByKey, &data, &[Range::new(b"k1", b"k4")], ); // Two ranges without overlap. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByKey, &data, &[Range::new(b"k0", b"k1"), Range::new(b"k3", b"k4")], ); // Two ranges with overlap. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByKey, &data, &[Range::new(b"k1", b"k3"), Range::new(b"k2", b"k4")], ); // One range contains the other range. - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByKey, &data, &[Range::new(b"k1", b"k4"), Range::new(b"k2", b"k3")], @@ -491,7 +461,7 @@ mod tests { for i in 1000..5000 { data.push(i.to_string().as_bytes().to_vec()); } - test_delete_all_in_range( + test_delete_ranges( DeleteStrategy::DeleteByWriter { sst_path }, &data, &[ @@ -538,9 +508,9 @@ mod tests { } check_data(&db, ALL_CFS, kvs.as_slice()); - db.delete_all_in_range(DeleteStrategy::DeleteFiles, &[Range::new(b"k2", b"k4")]) + db.delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[Range::new(b"k2", b"k4")]) .unwrap(); - db.delete_all_in_range(DeleteStrategy::DeleteBlobs, &[Range::new(b"k2", b"k4")]) + db.delete_ranges_cfs(DeleteStrategy::DeleteBlobs, &[Range::new(b"k2", b"k4")]) .unwrap(); check_data(&db, ALL_CFS, kvs_left.as_slice()); } @@ -585,7 +555,7 @@ mod tests { check_data(&db, &[cf], kvs.as_slice()); // Delete all in ["k2", "k4"). - db.delete_all_in_range( + db.delete_ranges_cfs( DeleteStrategy::DeleteByRange, &[Range::new(b"kabcdefg2", b"kabcdefg4")], ) diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index f0ba9d03c39..18991038ee8 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -14,6 +14,15 @@ pub enum DeleteStrategy { /// Delete the SST files that are fullly fit in range. However, the SST /// files that are partially overlapped with the range will not be /// touched. + /// + /// Note: + /// - After this operation, some keys in the range might still exist in + /// the database. + /// - After this operation, some keys in the range might be removed from + /// existing snapshot, so you shouldn't expect to be able to read data + /// from the range using existing snapshots any more. + /// + /// Ref: DeleteFiles, /// Delete the data stored in Titan. DeleteBlobs, @@ -33,7 +42,7 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn flush_cf(&self, cf: &str, wait: bool) -> Result<()>; - fn delete_all_in_range(&self, strategy: DeleteStrategy, ranges: &[Range<'_>]) -> Result<()> { + fn delete_ranges_cfs(&self, strategy: DeleteStrategy, ranges: &[Range<'_>]) -> Result<()> { for cf in self.cf_names() { self.delete_ranges_cf(cf, strategy.clone(), ranges)?; } @@ -59,18 +68,6 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { /// * total size (bytes) of all blob files. fn get_engine_used_size(&self) -> Result; - /// Roughly deletes files in multiple ranges. - /// - /// Note: - /// - After this operation, some keys in the range might still exist in - /// the database. - /// - After this operation, some keys in the range might be removed from - /// existing snapshot, so you shouldn't expect to be able to read data - /// from the range using existing snapshots any more. - /// - /// Ref: - fn roughly_cleanup_ranges(&self, ranges: &[(Vec, Vec)]) -> Result<()>; - /// The path to the directory on the filesystem where the database is stored fn path(&self) -> &str; diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 5743b0ec3a5..4ee3c5dc091 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1239,8 +1239,14 @@ impl RaftPollerBuilder { last_start_key = keys::enc_end_key(region); } ranges.push((last_start_key, keys::DATA_MAX_KEY.to_vec())); + let ranges: Vec<_> = ranges + .iter() + .map(|(start, end)| Range::new(start, end)) + .collect(); - self.engines.kv.roughly_cleanup_ranges(&ranges)?; + self.engines + .kv + .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &ranges)?; info!( "cleans up garbage data"; @@ -2851,7 +2857,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } drop(meta); - if let Err(e) = self.ctx.engines.kv.delete_all_in_range( + if let Err(e) = self.ctx.engines.kv.delete_ranges_cfs( DeleteStrategy::DeleteByKey, &[Range::new(&start_key, &end_key)], ) { diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 244ca514924..53b88d6ef16 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -50,10 +50,8 @@ use crate::{ // used to periodically check whether we should delete a stale peer's range in // region runner - #[cfg(test)] pub const STALE_PEER_CHECK_TICK: usize = 1; // 1000 milliseconds - #[cfg(not(test))] pub const STALE_PEER_CHECK_TICK: usize = 10; // 10000 milliseconds @@ -88,7 +86,8 @@ pub enum Task { }, /// Destroy data between [start_key, end_key). /// - /// The deletion may and may not succeed. + /// The actual deletion may be delayed if the engine is overloaded or a + /// reader is still referencing the data. Destroy { region_id: u64, start_key: Vec, @@ -133,8 +132,8 @@ struct StalePeerInfo { pub region_id: u64, pub end_key: Vec, // Once the oldest snapshot sequence exceeds this, it ensures that no one is - // reading on this peer anymore. So we can safely call `delete_files_in_range` - // , which may break the consistency of snapshot, of this peer range. + // reading on this peer anymore. So we can safely call `delete_files_in_range`, + // which may break the consistency of snapshot, of this peer range. pub stale_sequence: u64, } @@ -207,21 +206,27 @@ impl PendingDeleteRanges { /// /// Before an insert is called, it must call drain_overlap_ranges to clean /// the overlapping range. - fn insert(&mut self, region_id: u64, start_key: &[u8], end_key: &[u8], stale_sequence: u64) { - if !self.find_overlap_ranges(start_key, end_key).is_empty() { + fn insert( + &mut self, + region_id: u64, + start_key: Vec, + end_key: Vec, + stale_sequence: u64, + ) { + if !self.find_overlap_ranges(&start_key, &end_key).is_empty() { panic!( "[region {}] register deleting data in [{}, {}) failed due to overlap", region_id, - log_wrappers::Value::key(start_key), - log_wrappers::Value::key(end_key), + log_wrappers::Value::key(&start_key), + log_wrappers::Value::key(&end_key), ); } let info = StalePeerInfo { region_id, - end_key: end_key.to_owned(), + end_key, stale_sequence, }; - self.ranges.insert(start_key.to_owned(), info); + self.ranges.insert(start_key, info); } /// Gets all stale ranges info. @@ -243,21 +248,13 @@ impl PendingDeleteRanges { } } -#[derive(Clone)] -struct SnapContext -where - EK: KvEngine, -{ +struct SnapGenContext { engine: EK, - batch_size: usize, mgr: SnapManager, - use_delete_range: bool, - pending_delete_ranges: PendingDeleteRanges, - coprocessor_host: CoprocessorHost, router: R, } -impl SnapContext +impl SnapGenContext where EK: KvEngine, R: CasualRouter, @@ -347,6 +344,74 @@ where .generate .observe(start.saturating_elapsed_secs()); } +} + +pub struct Runner +where + EK: KvEngine, + T: PdClient + 'static, +{ + batch_size: usize, + use_delete_range: bool, + clean_stale_tick: usize, + clean_stale_check_interval: Duration, + + tiflash_stores: HashMap, + // we may delay some apply tasks if level 0 files to write stall threshold, + // pending_applies records all delayed apply task, and will check again later + pending_applies: VecDeque>, + // Ranges that have been logically destroyed at a specific sequence number. We can + // assume there will be no reader (engine snapshot) newer than that sequence number. Therefore, + // they can be physically deleted with `DeleteFiles` when we're sure there is no older + // reader as well. + // To protect this assumption, before a new snapshot is applied, the overlapping pending ranges + // must first be removed. + // The sole purpose of maintaining this list is to optimize deletion with `DeleteFiles` + // whenever we can. Errors while processing them can be ignored. + pending_delete_ranges: PendingDeleteRanges, + + engine: EK, + mgr: SnapManager, + coprocessor_host: CoprocessorHost, + router: R, + pd_client: Option>, + pool: ThreadPool, +} + +impl Runner +where + EK: KvEngine, + R: CasualRouter, + T: PdClient + 'static, +{ + pub fn new( + engine: EK, + mgr: SnapManager, + batch_size: usize, + use_delete_range: bool, + snap_generator_pool_size: usize, + coprocessor_host: CoprocessorHost, + router: R, + pd_client: Option>, + ) -> Runner { + Runner { + batch_size, + use_delete_range, + clean_stale_tick: 0, + clean_stale_check_interval: Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL), + tiflash_stores: HashMap::default(), + pending_applies: VecDeque::new(), + pending_delete_ranges: PendingDeleteRanges::default(), + engine, + mgr, + coprocessor_host, + router, + pd_client, + pool: Builder::new(thd_name!("snap-generator")) + .max_thread_count(snap_generator_pool_size) + .build_future_pool(), + } + } fn region_state(&self, region_id: u64) -> Result { let region_key = keys::region_state_key(region_id); @@ -389,18 +454,7 @@ where let start_key = keys::enc_start_key(®ion); let end_key = keys::enc_end_key(®ion); check_abort(&abort)?; - - // clear up origin data. - let overlap_ranges = self - .pending_delete_ranges - .drain_overlap_ranges(&start_key, &end_key); - if !overlap_ranges.is_empty() { - CLEAN_COUNTER_VEC - .with_label_values(&["overlap-with-apply"]) - .inc(); - self.cleanup_overlap_regions(overlap_ranges)?; - } - self.delete_all_in_range(&[Range::new(&start_key, &end_key)])?; + self.clean_overlap_ranges(start_key, end_key)?; check_abort(&abort)?; fail_point!("apply_snap_cleanup_range"); @@ -485,80 +539,77 @@ where let _ = self.router.send(region_id, CasualMessage::SnapshotApplied); } - /// Cleans up the data within the range. - fn cleanup_range(&self, ranges: &[Range<'_>]) -> Result<()> { - self.engine - .delete_all_in_range(DeleteStrategy::DeleteFiles, ranges) - .unwrap_or_else(|e| { - error!("failed to delete files in range"; "err" => %e); - }); - self.delete_all_in_range(ranges)?; - self.engine - .delete_all_in_range(DeleteStrategy::DeleteBlobs, ranges) - .unwrap_or_else(|e| { - error!("failed to delete files in range"; "err" => %e); - }); - Ok(()) - } - - /// Gets the overlapping ranges and cleans them up. - fn cleanup_overlap_regions( + /// Tries to clean up files in pending ranges overlapping with the given + /// bounds. These pending ranges will be removed. Returns an updated range + /// that also includes these ranges. Caller must ensure the remaining keys + /// in the returning range will be deleted properly. + fn clean_overlap_ranges_roughly( &mut self, - overlap_ranges: Vec<(u64, Vec, Vec, u64)>, - ) -> Result<()> { + mut start_key: Vec, + mut end_key: Vec, + ) -> (Vec, Vec) { + let overlap_ranges = self + .pending_delete_ranges + .drain_overlap_ranges(&start_key, &end_key); + if overlap_ranges.is_empty() { + return (start_key, end_key); + } + CLEAN_COUNTER_VEC.with_label_values(&["overlap"]).inc(); let oldest_sequence = self .engine .get_oldest_snapshot_sequence_number() .unwrap_or(u64::MAX); - let mut ranges = Vec::with_capacity(overlap_ranges.len()); - let mut df_ranges = Vec::with_capacity(overlap_ranges.len()); - for (region_id, start_key, end_key, stale_sequence) in overlap_ranges.iter() { - // `DeleteFiles` may break current rocksdb snapshots consistency, - // so do not use it unless we can make sure there is no reader of the destroyed - // peer anymore. - if *stale_sequence < oldest_sequence { - df_ranges.push(Range::new(start_key, end_key)); - } else { - SNAP_COUNTER_VEC - .with_label_values(&["overlap", "not_delete_files"]) - .inc(); - } - info!("delete data in range because of overlap"; "region_id" => region_id, - "start_key" => log_wrappers::Value::key(start_key), - "end_key" => log_wrappers::Value::key(end_key)); - ranges.push(Range::new(start_key, end_key)); - } + let df_ranges: Vec<_> = overlap_ranges + .iter() + .filter_map(|(region_id, cur_start, cur_end, stale_sequence)| { + info!( + "delete data in range because of overlap"; "region_id" => region_id, + "start_key" => log_wrappers::Value::key(cur_start), + "end_key" => log_wrappers::Value::key(cur_end) + ); + if &start_key > cur_start { + start_key = cur_start.clone(); + } + if &end_key < cur_end { + end_key = cur_end.clone(); + } + if *stale_sequence < oldest_sequence { + Some(Range::new(cur_start, cur_end)) + } else { + SNAP_COUNTER_VEC + .with_label_values(&["overlap", "not_delete_files"]) + .inc(); + None + } + }) + .collect(); self.engine - .delete_all_in_range(DeleteStrategy::DeleteFiles, &df_ranges) + .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &df_ranges) .unwrap_or_else(|e| { error!("failed to delete files in range"; "err" => %e); }); + (start_key, end_key) + } - self.delete_all_in_range(&ranges) + /// Cleans up data in the given range and all pending ranges overlapping + /// with it. + fn clean_overlap_ranges(&mut self, start_key: Vec, end_key: Vec) -> Result<()> { + let (start_key, end_key) = self.clean_overlap_ranges_roughly(start_key, end_key); + self.delete_all_in_range(&[Range::new(&start_key, &end_key)]) } /// Inserts a new pending range, and it will be cleaned up with some delay. - fn insert_pending_delete_range(&mut self, region_id: u64, start_key: &[u8], end_key: &[u8]) { - let overlap_ranges = self - .pending_delete_ranges - .drain_overlap_ranges(start_key, end_key); - if !overlap_ranges.is_empty() { - CLEAN_COUNTER_VEC - .with_label_values(&["overlap-with-destroy"]) - .inc(); - if let Err(e) = self.cleanup_overlap_regions(overlap_ranges) { - warn!("cleanup_overlap_ranges failed"; - "region_id" => region_id, - "start_key" => log_wrappers::Value::key(start_key), - "end_key" => log_wrappers::Value::key(end_key), - "err" => %e, - ); - } - } + fn insert_pending_delete_range( + &mut self, + region_id: u64, + start_key: Vec, + end_key: Vec, + ) { + let (start_key, end_key) = self.clean_overlap_ranges_roughly(start_key, end_key); info!("register deleting data in range"; "region_id" => region_id, - "start_key" => log_wrappers::Value::key(start_key), - "end_key" => log_wrappers::Value::key(end_key), + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), ); let seq = self.engine.get_latest_sequence_number(); self.pending_delete_ranges @@ -575,33 +626,43 @@ where .engine .get_oldest_snapshot_sequence_number() .unwrap_or(u64::MAX); - let mut cleanup_ranges: Vec<(u64, Vec, Vec)> = self + let mut region_ranges: Vec<(u64, Vec, Vec)> = self .pending_delete_ranges .stale_ranges(oldest_sequence) .map(|(region_id, s, e)| (region_id, s.to_vec(), e.to_vec())) .collect(); - if cleanup_ranges.is_empty() { + if region_ranges.is_empty() { return; } CLEAN_COUNTER_VEC.with_label_values(&["destroy"]).inc_by(1); - cleanup_ranges.sort_by(|a, b| a.1.cmp(&b.1)); - while cleanup_ranges.len() > CLEANUP_MAX_REGION_COUNT { - cleanup_ranges.pop(); - } - let ranges: Vec> = cleanup_ranges + region_ranges.sort_by(|a, b| a.1.cmp(&b.1)); + region_ranges.truncate(CLEANUP_MAX_REGION_COUNT); + let ranges: Vec<_> = region_ranges .iter() .map(|(region_id, start, end)| { info!("delete data in range because of stale"; "region_id" => region_id, - "start_key" => log_wrappers::Value::key(start), - "end_key" => log_wrappers::Value::key(end)); + "start_key" => log_wrappers::Value::key(start), + "end_key" => log_wrappers::Value::key(end)); Range::new(start, end) }) .collect(); - if let Err(e) = self.cleanup_range(&ranges) { + + self.engine + .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &ranges) + .unwrap_or_else(|e| { + error!("failed to delete files in range"; "err" => %e); + }); + if let Err(e) = self.delete_all_in_range(&ranges) { error!("failed to cleanup stale range"; "err" => %e); return; } - for (_, key, _) in cleanup_ranges { + self.engine + .delete_ranges_cfs(DeleteStrategy::DeleteBlobs, &ranges) + .unwrap_or_else(|e| { + error!("failed to delete blobs in range"; "err" => %e); + }); + + for (_, key, _) in region_ranges { assert!( self.pending_delete_ranges.remove(&key).is_some(), "cleanup pending_delete_ranges {} should exist", @@ -682,60 +743,6 @@ where ); Ok(()) } -} - -pub struct Runner -where - EK: KvEngine, - T: PdClient + 'static, -{ - pool: ThreadPool, - ctx: SnapContext, - // we may delay some apply tasks if level 0 files to write stall threshold, - // pending_applies records all delayed apply task, and will check again later - pending_applies: VecDeque>, - clean_stale_tick: usize, - clean_stale_check_interval: Duration, - tiflash_stores: HashMap, - pd_client: Option>, -} - -impl Runner -where - EK: KvEngine, - R: CasualRouter, - T: PdClient + 'static, -{ - pub fn new( - engine: EK, - mgr: SnapManager, - batch_size: usize, - use_delete_range: bool, - snap_generator_pool_size: usize, - coprocessor_host: CoprocessorHost, - router: R, - pd_client: Option>, - ) -> Runner { - Runner { - pool: Builder::new(thd_name!("snap-generator")) - .max_thread_count(snap_generator_pool_size) - .build_future_pool(), - ctx: SnapContext { - engine, - mgr, - batch_size, - use_delete_range, - pending_delete_ranges: PendingDeleteRanges::default(), - coprocessor_host, - router, - }, - pending_applies: VecDeque::new(), - clean_stale_tick: 0, - clean_stale_check_interval: Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL), - tiflash_stores: HashMap::default(), - pd_client, - } - } /// Tries to apply pending tasks if there is some. fn handle_pending_applies(&mut self) { @@ -744,7 +751,7 @@ where // should not handle too many applies than the number of files that can be // ingested. check level 0 every time because we can not make sure // how does the number of level 0 files change. - if self.ctx.ingest_maybe_stall() { + if self.ingest_maybe_stall() { break; } if let Some(Task::Apply { @@ -753,7 +760,7 @@ where peer_id, }) = self.pending_applies.pop_front() { - self.ctx.handle_apply(region_id, peer_id, status); + self.handle_apply(region_id, peer_id, status); } } } @@ -781,7 +788,6 @@ where } => { // It is safe for now to handle generating and applying snapshot concurrently, // but it may not when merge is implemented. - let ctx = self.ctx.clone(); let mut allow_multi_files_snapshot = false; // if to_store_id is 0, it means the to_store_id cannot be found if to_store_id != 0 { @@ -806,6 +812,11 @@ where } } + let ctx = SnapGenContext { + engine: self.engine.clone(), + mgr: self.mgr.clone(), + router: self.router.clone(), + }; self.pool.spawn(async move { tikv_alloc::add_thread_memory_accessor(); ctx.handle_gen( @@ -823,8 +834,8 @@ where } task @ Task::Apply { .. } => { fail_point!("on_region_worker_apply", true, |_| {}); - if self.ctx.coprocessor_host.should_pre_apply_snapshot() { - let _ = self.ctx.pre_apply_snapshot(&task); + if self.coprocessor_host.should_pre_apply_snapshot() { + let _ = self.pre_apply_snapshot(&task); } // to makes sure applying snapshots in order. self.pending_applies.push_back(task); @@ -842,9 +853,8 @@ where fail_point!("on_region_worker_destroy", true, |_| {}); // try to delay the range deletion because // there might be a coprocessor request related to this range - self.ctx - .insert_pending_delete_range(region_id, &start_key, &end_key); - self.ctx.clean_stale_ranges(); + self.insert_pending_delete_range(region_id, start_key, end_key); + self.clean_stale_ranges(); } } } @@ -864,7 +874,7 @@ where self.handle_pending_applies(); self.clean_stale_tick += 1; if self.clean_stale_tick >= STALE_PEER_CHECK_TICK { - self.ctx.clean_stale_ranges(); + self.clean_stale_ranges(); self.clean_stale_tick = 0; } } @@ -917,7 +927,12 @@ mod tests { e: &str, stale_sequence: u64, ) { - pending_delete_ranges.insert(id, s.as_bytes(), e.as_bytes(), stale_sequence); + pending_delete_ranges.insert( + id, + s.as_bytes().to_owned(), + e.as_bytes().to_owned(), + stale_sequence, + ); } #[test] diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 5b957b88822..25a5bccf32b 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -298,11 +298,11 @@ fn test_delete_files_in_range_for_titan() { // blob4: (b_7, b_value) // `delete_files_in_range` may expose some old keys. - // For Titan it may encounter `missing blob file` in `delete_all_in_range`, + // For Titan it may encounter `missing blob file` in `delete_ranges_cfs`, // so we set key_only for Titan. engines .kv - .delete_all_in_range( + .delete_ranges_cfs( DeleteStrategy::DeleteFiles, &[Range::new( &data_key(Key::from_raw(b"a").as_encoded()), @@ -312,7 +312,7 @@ fn test_delete_files_in_range_for_titan() { .unwrap(); engines .kv - .delete_all_in_range( + .delete_ranges_cfs( DeleteStrategy::DeleteByKey, &[Range::new( &data_key(Key::from_raw(b"a").as_encoded()), @@ -322,7 +322,7 @@ fn test_delete_files_in_range_for_titan() { .unwrap(); engines .kv - .delete_all_in_range( + .delete_ranges_cfs( DeleteStrategy::DeleteBlobs, &[Range::new( &data_key(Key::from_raw(b"a").as_encoded()), From 7a33cb611bbc99a216fd25fca7fb8713ac1648c1 Mon Sep 17 00:00:00 2001 From: Potato Date: Thu, 8 Sep 2022 10:32:56 +0800 Subject: [PATCH 0193/1149] storage: add perform_read_local fail_point to stabilize the test (#13427) ref tikv/tikv#12362 This commit adds `perform_read_local` fail_point so we can force the lease read to be triggered, which allows the test `test_read_execution_tracker` to pass stably. Signed-off-by: OneSizeFitQuorum --- components/raftstore/src/store/peer.rs | 2 ++ components/raftstore/src/store/worker/read.rs | 2 ++ tests/failpoints/cases/test_read_execution_tracker.rs | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 53747f082e4..edf88a561ba 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5477,6 +5477,8 @@ pub trait RequestInspector { fail_point!("perform_read_index", |_| Ok(RequestPolicy::ReadIndex)); + fail_point!("perform_read_local", |_| Ok(RequestPolicy::ReadLocal)); + let flags = WriteBatchFlags::from_bits_check(req.get_header().get_flags()); if flags.contains(WriteBatchFlags::STALE_READ) { return Ok(RequestPolicy::StaleRead); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 9c5889f876e..5efb750b863 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -416,6 +416,8 @@ impl ReadDelegate { } pub fn is_in_leader_lease(&self, ts: Timespec) -> bool { + fail_point!("perform_read_local", |_| true); + if let Some(ref lease) = self.leader_lease { let term = lease.term(); if term == self.term { diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index 4357d65af5f..c5ff93a70c1 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -59,6 +59,8 @@ fn test_read_execution_tracking() { ); }; + fail::cfg("perform_read_local", "return()").unwrap(); + // should perform lease read let resp = kv_read(&client, ctx.clone(), k1.clone(), 100); @@ -80,6 +82,8 @@ fn test_read_execution_tracking() { lease_read_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + fail::remove("perform_read_local"); + let read_index_checker = |scan_detail: &ScanDetailV2| { assert!( scan_detail.get_read_index_propose_wait_nanos() > 0, From b0a80d497ccf0224c3ff01ded744e93c6d4686cb Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 7 Sep 2022 22:46:57 -0400 Subject: [PATCH 0194/1149] copr: fix wrong json opaque serialization (#13392) close tikv/tikv#13391 Signed-off-by: YangKeao Co-authored-by: Ti Chi Robot --- .../src/codec/mysql/json/serde.rs | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs index d15f728ed10..6c1f065f8d6 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/serde.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/serde.rs @@ -115,7 +115,11 @@ impl<'a> Serialize for JsonRef<'a> { .get_opaque_type() .map_err(|_| SerError::custom("invalid opaque type code"))?; - let str = format!("base64:type{}:{}", typ, base64::encode(bytes)); + let str = format!( + "base64:type{}:{}", + typ.to_u8().unwrap(), + base64::encode(bytes) + ); serializer.serialize_str(&str) } } @@ -227,6 +231,7 @@ impl<'de> Deserialize<'de> for Json { #[cfg(test)] mod tests { use super::*; + use crate::FieldTypeTp; #[test] fn test_from_str_for_object() { @@ -276,4 +281,40 @@ mod tests { resp.unwrap_err(); } } + + #[test] + fn test_to_str() { + let legal_cases = vec![ + ( + Json::from_kv_pairs(vec![( + b"key", + Json::from_str_val("value").unwrap().as_ref(), + )]) + .unwrap(), + r#"{"key": "value"}"#, + ), + ( + Json::from_array(vec![ + Json::from_str_val("d1").unwrap(), + Json::from_str_val("d2").unwrap(), + ]) + .unwrap(), + r#"["d1", "d2"]"#, + ), + (Json::from_i64(-3).unwrap(), r#"-3"#), + (Json::from_i64(3).unwrap(), r#"3"#), + (Json::from_f64(3.0).unwrap(), r#"3.0"#), + (Json::none().unwrap(), r#"null"#), + (Json::from_bool(true).unwrap(), r#"true"#), + (Json::from_bool(false).unwrap(), r#"false"#), + ( + Json::from_opaque(FieldTypeTp::VarString, &[0xAB, 0xCD]).unwrap(), + r#""base64:type253:q80=""#, + ), + ]; + + for (json, json_str) in legal_cases { + assert_eq!(json.to_string(), json_str); + } + } } From a57bb584ff85326c65ed76894d7c4c6b6a8b068e Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 7 Sep 2022 20:00:56 -0700 Subject: [PATCH 0195/1149] components: introduce tirocks module (#13411) ref tikv/tikv#13058 Only make it in the codebase, will not compile it. It will replace engine_rocks once it's finished. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 9 +- Cargo.toml | 12 +++ components/engine_tirocks/Cargo.toml | 9 ++ components/engine_tirocks/src/lib.rs | 12 +++ components/engine_tirocks/src/status.rs | 123 ++++++++++++++++++++++++ 5 files changed, 160 insertions(+), 5 deletions(-) create mode 100644 components/engine_tirocks/Cargo.toml create mode 100644 components/engine_tirocks/src/lib.rs create mode 100644 components/engine_tirocks/src/status.rs diff --git a/Cargo.lock b/Cargo.lock index f9dc0e6c418..aedc4328377 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -753,9 +753,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.69" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" dependencies = [ "jobserver", ] @@ -935,9 +935,8 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb6210b637171dfba4cda12e579ac6dc73f5165ad56133e5d72ef3131f320855" +version = "0.1.48" +source = "git+https://github.com/rust-lang/cmake-rs#00e6b220342a8b0ec4548071928ade38fd5f691b" dependencies = [ "cc", ] diff --git a/Cargo.toml b/Cargo.toml index e1dad6c5fa3..531449ab1b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -198,9 +198,14 @@ rusoto_mock = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr rusoto_s3 = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_sts = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } +snappy-sys = { git = "https://github.com/busyjay/rust-snappy.git", branch = "static-link" } + # remove this when https://github.com/danburkert/fs2-rs/pull/42 is merged. fs2 = { git = "https://github.com/tabokie/fs2-rs", branch = "tikv" } +# Remove this when a new version is release. We need to solve rust-lang/cmake-rs#143. +cmake = { git = "https://github.com/rust-lang/cmake-rs" } + [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to @@ -236,6 +241,9 @@ members = [ "components/encryption", "components/encryption/export", "components/engine_rocks_helper", +# Only enable tirocks in local development, otherwise it can slow down compilation. +# TODO: always enable tirocks and remove engine_rocks. +# "components/engine_tirocks", "components/error_code", "components/external_storage", "components/external_storage/export", @@ -292,6 +300,10 @@ opt-level = 1 debug = false opt-level = 1 +[profile.dev.package.tirocks-sys] +debug = false +opt-level = 1 + [profile.dev.package.tests] debug = 1 opt-level = 1 diff --git a/components/engine_tirocks/Cargo.toml b/components/engine_tirocks/Cargo.toml new file mode 100644 index 00000000000..31b3122d842 --- /dev/null +++ b/components/engine_tirocks/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "engine_tirocks" +version = "0.1.0" +edition = "2021" + +[dependencies] +engine_traits = { path = "../engine_traits" } +tikv_alloc = { path = "../tikv_alloc" } +tirocks = { git = "https://github.com/busyjay/tirocks.git", branch = "dev" } diff --git a/components/engine_tirocks/src/lib.rs b/components/engine_tirocks/src/lib.rs new file mode 100644 index 00000000000..3257eb9f0ae --- /dev/null +++ b/components/engine_tirocks/src/lib.rs @@ -0,0 +1,12 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! A new implementation of engine_traits using tirocks. +//! +//! When all features of engine_rocks are implemented in this module, +//! engine_rocks will be removed and TiKV will switch to tirocks. + +extern crate tikv_alloc as _; + +mod status; + +pub use status::*; diff --git a/components/engine_tirocks/src/status.rs b/components/engine_tirocks/src/status.rs new file mode 100644 index 00000000000..13ae730562f --- /dev/null +++ b/components/engine_tirocks/src/status.rs @@ -0,0 +1,123 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +pub fn to_engine_trait_status(s: tirocks::Status) -> engine_traits::Status { + let code = match s.code() { + tirocks::Code::kOk => engine_traits::Code::Ok, + tirocks::Code::kNotFound => engine_traits::Code::NotFound, + tirocks::Code::kCorruption => engine_traits::Code::Corruption, + tirocks::Code::kNotSupported => engine_traits::Code::NotSupported, + tirocks::Code::kInvalidArgument => engine_traits::Code::InvalidArgument, + tirocks::Code::kIOError => engine_traits::Code::IoError, + tirocks::Code::kMergeInProgress => engine_traits::Code::MergeInProgress, + tirocks::Code::kIncomplete => engine_traits::Code::Incomplete, + tirocks::Code::kShutdownInProgress => engine_traits::Code::ShutdownInProgress, + tirocks::Code::kTimedOut => engine_traits::Code::TimedOut, + tirocks::Code::kAborted => engine_traits::Code::Aborted, + tirocks::Code::kBusy => engine_traits::Code::Busy, + tirocks::Code::kExpired => engine_traits::Code::Expired, + tirocks::Code::kTryAgain => engine_traits::Code::TryAgain, + tirocks::Code::kCompactionTooLarge => engine_traits::Code::CompactionTooLarge, + tirocks::Code::kColumnFamilyDropped => engine_traits::Code::ColumnFamilyDropped, + tirocks::Code::kMaxCode => unreachable!(), + }; + let sev = match s.severity() { + tirocks::Severity::kNoError => engine_traits::Severity::NoError, + tirocks::Severity::kSoftError => engine_traits::Severity::SoftError, + tirocks::Severity::kHardError => engine_traits::Severity::HardError, + tirocks::Severity::kFatalError => engine_traits::Severity::FatalError, + tirocks::Severity::kUnrecoverableError => engine_traits::Severity::UnrecoverableError, + tirocks::Severity::kMaxSeverity => unreachable!(), + }; + let sub_code = match s.sub_code() { + tirocks::SubCode::kNone => engine_traits::SubCode::None, + tirocks::SubCode::kMutexTimeout => engine_traits::SubCode::MutexTimeout, + tirocks::SubCode::kLockTimeout => engine_traits::SubCode::LockTimeout, + tirocks::SubCode::kLockLimit => engine_traits::SubCode::LockLimit, + tirocks::SubCode::kNoSpace => engine_traits::SubCode::NoSpace, + tirocks::SubCode::kDeadlock => engine_traits::SubCode::Deadlock, + tirocks::SubCode::kStaleFile => engine_traits::SubCode::StaleFile, + tirocks::SubCode::kMemoryLimit => engine_traits::SubCode::MemoryLimit, + tirocks::SubCode::kSpaceLimit => engine_traits::SubCode::SpaceLimit, + tirocks::SubCode::kPathNotFound => engine_traits::SubCode::PathNotFound, + tirocks::SubCode::KMergeOperandsInsufficientCapacity => { + engine_traits::SubCode::MergeOperandsInsufficientCapacity + } + tirocks::SubCode::kManualCompactionPaused => engine_traits::SubCode::ManualCompactionPaused, + tirocks::SubCode::kOverwritten => engine_traits::SubCode::Overwritten, + tirocks::SubCode::kTxnNotPrepared => engine_traits::SubCode::TxnNotPrepared, + tirocks::SubCode::kIOFenced => engine_traits::SubCode::IoFenced, + tirocks::SubCode::kMaxSubCode => unreachable!(), + }; + let mut es = match s.state().map(|s| String::from_utf8_lossy(s).into_owned()) { + Some(msg) => engine_traits::Status::with_error(code, msg), + None => engine_traits::Status::with_code(code), + }; + es.set_severity(sev).set_sub_code(sub_code); + es +} + +/// A function that will transform a rocksdb error to engine trait error. +/// +/// r stands for rocksdb, e stands for engine_trait. +pub fn r2e(s: tirocks::Status) -> engine_traits::Error { + engine_traits::Error::Engine(to_engine_trait_status(s)) +} + +/// A function that will transform a engine trait error to rocksdb error. +/// +/// r stands for rocksdb, e stands for engine_trait. +pub fn e2r(s: engine_traits::Error) -> tirocks::Status { + let s = match s { + engine_traits::Error::Engine(s) => s, + // Any better options than IOError? + _ => return tirocks::Status::with_error(tirocks::Code::kIOError, format!("{}", s)), + }; + let code = match s.code() { + engine_traits::Code::Ok => tirocks::Code::kOk, + engine_traits::Code::NotFound => tirocks::Code::kNotFound, + engine_traits::Code::Corruption => tirocks::Code::kCorruption, + engine_traits::Code::NotSupported => tirocks::Code::kNotSupported, + engine_traits::Code::InvalidArgument => tirocks::Code::kInvalidArgument, + engine_traits::Code::IoError => tirocks::Code::kIOError, + engine_traits::Code::MergeInProgress => tirocks::Code::kMergeInProgress, + engine_traits::Code::Incomplete => tirocks::Code::kIncomplete, + engine_traits::Code::ShutdownInProgress => tirocks::Code::kShutdownInProgress, + engine_traits::Code::TimedOut => tirocks::Code::kTimedOut, + engine_traits::Code::Aborted => tirocks::Code::kAborted, + engine_traits::Code::Busy => tirocks::Code::kBusy, + engine_traits::Code::Expired => tirocks::Code::kExpired, + engine_traits::Code::TryAgain => tirocks::Code::kTryAgain, + engine_traits::Code::CompactionTooLarge => tirocks::Code::kCompactionTooLarge, + engine_traits::Code::ColumnFamilyDropped => tirocks::Code::kColumnFamilyDropped, + }; + let sev = match s.severity() { + engine_traits::Severity::NoError => tirocks::Severity::kNoError, + engine_traits::Severity::SoftError => tirocks::Severity::kSoftError, + engine_traits::Severity::HardError => tirocks::Severity::kHardError, + engine_traits::Severity::FatalError => tirocks::Severity::kFatalError, + engine_traits::Severity::UnrecoverableError => tirocks::Severity::kUnrecoverableError, + }; + let sub_code = match s.sub_code() { + engine_traits::SubCode::None => tirocks::SubCode::kNone, + engine_traits::SubCode::MutexTimeout => tirocks::SubCode::kMutexTimeout, + engine_traits::SubCode::LockTimeout => tirocks::SubCode::kLockTimeout, + engine_traits::SubCode::LockLimit => tirocks::SubCode::kLockLimit, + engine_traits::SubCode::NoSpace => tirocks::SubCode::kNoSpace, + engine_traits::SubCode::Deadlock => tirocks::SubCode::kDeadlock, + engine_traits::SubCode::StaleFile => tirocks::SubCode::kStaleFile, + engine_traits::SubCode::MemoryLimit => tirocks::SubCode::kMemoryLimit, + engine_traits::SubCode::SpaceLimit => tirocks::SubCode::kSpaceLimit, + engine_traits::SubCode::PathNotFound => tirocks::SubCode::kPathNotFound, + engine_traits::SubCode::MergeOperandsInsufficientCapacity => { + tirocks::SubCode::KMergeOperandsInsufficientCapacity + } + engine_traits::SubCode::ManualCompactionPaused => tirocks::SubCode::kManualCompactionPaused, + engine_traits::SubCode::Overwritten => tirocks::SubCode::kOverwritten, + engine_traits::SubCode::TxnNotPrepared => tirocks::SubCode::kTxnNotPrepared, + engine_traits::SubCode::IoFenced => tirocks::SubCode::kIOFenced, + }; + let mut ts = tirocks::Status::with_error(code, s.state()); + ts.set_severity(sev); + ts.set_sub_code(sub_code); + ts +} From 76f4a4e7ca6b15a1cc8e65c54049b816c2a1c45d Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 7 Sep 2022 21:06:56 -0700 Subject: [PATCH 0196/1149] raftstore-v2: support peer create and destroy (#13334) ref tikv/tikv#7475, ref tikv/tikv#12842 Compared to v1, there are few differences: - peer create is forced to go through store fsm, - destroy is fully asynchronous, - there is no wait for log gc as all writes go to raft io worker now. - uninitialized peer is always persisted, so problems like #7475 will not exist. - ranges are allowed to be conflict, it simplifies code a lot. This PR also adds a debug message to verify memory states easily. We still need to make leader to trace and gc removed peers. This will be implemented in next PR. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/batch-system/src/router.rs | 16 + components/engine_rocks/src/raft_engine.rs | 2 + components/engine_traits/src/engine.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 115 +++---- components/raftstore-v2/src/fsm/mod.rs | 2 +- components/raftstore-v2/src/fsm/peer.rs | 14 +- components/raftstore-v2/src/fsm/store.rs | 71 ++++- components/raftstore-v2/src/operation/life.rs | 284 ++++++++++++++++++ components/raftstore-v2/src/operation/mod.rs | 2 + .../src/operation/ready/async_writer.rs | 4 + .../raftstore-v2/src/operation/ready/mod.rs | 93 +++++- components/raftstore-v2/src/raft/peer.rs | 53 ++-- components/raftstore-v2/src/raft/storage.rs | 85 +++++- components/raftstore-v2/src/router/message.rs | 19 +- .../raftstore-v2/tests/integrations/mod.rs | 180 ++++++----- .../tests/integrations/test_life.rs | 194 ++++++++++++ .../raftstore/src/store/async_io/write.rs | 55 ++-- 17 files changed, 993 insertions(+), 200 deletions(-) create mode 100644 components/raftstore-v2/src/operation/life.rs create mode 100644 components/raftstore-v2/tests/integrations/test_life.rs diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 8b0936a9faa..660ab014939 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -174,6 +174,22 @@ where .store(normals.map.len(), Ordering::Relaxed); } + /// Same as send a message and then register the mailbox. + /// + /// The mailbox will not be registered if the message can't be sent. + pub fn send_and_register( + &self, + addr: u64, + mailbox: BasicMailbox, + msg: N::Message, + ) -> Result<(), (BasicMailbox, N::Message)> { + if let Err(SendError(m)) = mailbox.force_send(msg, &self.normal_scheduler) { + return Err((mailbox, m)); + } + self.register(addr, mailbox); + Ok(()) + } + pub fn register_all(&self, mailboxes: Vec<(u64, BasicMailbox)>) { let mut normals = self.normals.lock().unwrap(); normals.map.reserve(mailboxes.len()); diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 605ef4c5514..9e70f7158a7 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -252,6 +252,8 @@ impl RaftEngine for RocksEngine { batch: &mut Self::LogBatch, ) -> Result<()> { batch.delete(&keys::raft_state_key(raft_group_id))?; + batch.delete(&keys::region_state_key(raft_group_id))?; + batch.delete(&keys::apply_state_key(raft_group_id))?; if first_index == 0 { let seek_key = keys::raft_log_key(raft_group_id, 0); let prefix = keys::raft_log_prefix(raft_group_id); diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index e59d9104e56..e3e767f0ed2 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -219,7 +219,7 @@ impl OpenOptions { /// A factory trait to create new engine. // It should be named as `EngineFactory` for consistency, but we are about to // rename engine to tablet, so always use tablet for new traits/types. -pub trait TabletFactory: TabletAccessor { +pub trait TabletFactory: TabletAccessor + Send + Sync { /// Open the tablet with id and suffix according to the OpenOptions. /// /// The id is likely the region Id, the suffix could be the current raft log @@ -287,7 +287,7 @@ where impl TabletFactory for DummyFactory where - EK: CfOptionsExt + Clone + Send + 'static, + EK: CfOptionsExt + Clone + Send + Sync + 'static, { fn create_shared_db(&self) -> Result { Ok(self.engine.as_ref().unwrap().clone()) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index d30490f50d5..1a2d9b3750e 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -11,10 +11,13 @@ use batch_system::{ BasicMailbox, BatchRouter, BatchSystem, HandleResult, HandlerBuilder, PollHandler, }; use collections::HashMap; -use crossbeam::channel::Sender; +use crossbeam::channel::{Sender, TrySendError}; use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; use futures::{compat::Future01CompatExt, FutureExt}; -use kvproto::{metapb::Store, raft_serverpb::PeerState}; +use kvproto::{ + metapb::Store, + raft_serverpb::{PeerState, RaftMessage}, +}; use raft::INVALID_ID; use raftstore::store::{ fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, RaftlogFetchRunner, @@ -28,12 +31,13 @@ use tikv_util::{ time::Instant as TiInstant, timer::SteadyTimer, worker::{Scheduler, Worker}, + Either, }; use super::apply::{create_apply_batch_system, ApplyPollerBuilder, ApplyRouter, ApplySystem}; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate}, - raft::Peer, + raft::{Peer, Storage}, router::{PeerMsg, PeerTick, StoreMsg}, Error, Result, }; @@ -55,28 +59,9 @@ pub struct StoreContext { /// The precise timer for scheduling tick. pub timer: SteadyTimer, pub write_senders: WriteSenders, -} - -impl StoreContext { - fn new( - cfg: Config, - trans: T, - router: StoreRouter, - write_senders: WriteSenders, - logger: Logger, - ) -> Self { - Self { - logger, - trans, - has_ready: false, - raft_metrics: RaftMetrics::new(cfg.waterfall_metrics), - cfg, - router, - tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], - timer: SteadyTimer::default(), - write_senders, - } - } + pub engine: ER, + pub tablet_factory: Arc>, + pub log_fetch_scheduler: Scheduler, } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -260,19 +245,17 @@ impl StorePollerBuilder { self.engine .for_each_raft_group::(&mut |region_id| { assert_ne!(region_id, INVALID_ID); - let peer = match Peer::new( - &cfg, + let storage = match Storage::new( region_id, self.store_id, - self.tablet_factory.as_ref(), self.engine.clone(), self.log_fetch_scheduler.clone(), &self.logger, )? { - Some(peer) => peer, + Some(p) => p, None => return Ok(()), }; - let pair = PeerFsm::new(&cfg, peer)?; + let pair = PeerFsm::new(&cfg, &*self.tablet_factory, storage)?; let prev = regions.insert(region_id, pair); if let Some((_, p)) = prev { return Err(box_err!( @@ -303,13 +286,21 @@ where type Handler = StorePoller; fn build(&mut self, priority: batch_system::Priority) -> Self::Handler { - let poll_ctx = StoreContext::new( - self.cfg.value().clone(), - self.trans.clone(), - self.router.clone(), - self.write_senders.clone(), - self.logger.clone(), - ); + let cfg = self.cfg.value().clone(); + let poll_ctx = StoreContext { + logger: self.logger.clone(), + trans: self.trans.clone(), + has_ready: false, + raft_metrics: RaftMetrics::new(cfg.waterfall_metrics), + cfg, + router: self.router.clone(), + tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], + timer: SteadyTimer::default(), + write_senders: self.write_senders.clone(), + engine: self.engine.clone(), + tablet_factory: self.tablet_factory.clone(), + log_fetch_scheduler: self.log_fetch_scheduler.clone(), + }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); StorePoller::new(poll_ctx, cfg_tracker) } @@ -344,7 +335,7 @@ pub struct StoreSystem { impl StoreSystem { pub fn start( &mut self, - store: Store, + store_id: u64, cfg: Arc>, raft_engine: ER, tablet_factory: Arc>, @@ -355,14 +346,9 @@ impl StoreSystem { T: Transport + 'static, { let mut workers = Workers::default(); - workers.store_writers.spawn( - store.get_id(), - raft_engine.clone(), - None, - router, - &trans, - &cfg, - )?; + workers + .store_writers + .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; let log_fetch_scheduler = workers.log_fetch_worker.start( "raftlog-fetch-worker", RaftlogFetchRunner::new(router.clone(), raft_engine.clone()), @@ -370,7 +356,7 @@ impl StoreSystem { let mut builder = StorePollerBuilder::new( cfg.clone(), - store.get_id(), + store_id, raft_engine, tablet_factory, trans, @@ -385,7 +371,7 @@ impl StoreSystem { .schedule_all(peers.values().map(|pair| pair.1.peer())); // Choose a different name so we know what version is actually used. rs stands // for raft store. - let tag = format!("rs-{}", store.get_id()); + let tag = format!("rs-{}", store_id); self.system.spawn(tag, builder); let mut mailboxes = Vec::with_capacity(peers.len()); @@ -403,7 +389,7 @@ impl StoreSystem { for addr in address { router.force_send(addr, PeerMsg::Start).unwrap(); } - router.send_control(StoreMsg::Start { store }).unwrap(); + router.send_control(StoreMsg::Start).unwrap(); let apply_poller_builder = ApplyPollerBuilder::new(cfg); self.apply_system @@ -436,6 +422,33 @@ impl StoreRouter { pub fn logger(&self) -> &Logger { &self.logger } + + pub fn send_raft_message( + &self, + msg: Box, + ) -> std::result::Result<(), TrySendError>> { + let id = msg.get_region_id(); + let peer_msg = PeerMsg::RaftMessage(msg); + let store_msg = match self.try_send(id, peer_msg) { + Either::Left(Ok(())) => return Ok(()), + Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(m)))) => { + return Err(TrySendError::Full(m)); + } + Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m)))) => { + return Err(TrySendError::Disconnected(m)); + } + Either::Right(PeerMsg::RaftMessage(m)) => StoreMsg::RaftMessage(m), + _ => unreachable!(), + }; + match self.send_control(store_msg) { + Ok(()) => Ok(()), + Err(TrySendError::Full(StoreMsg::RaftMessage(m))) => Err(TrySendError::Full(m)), + Err(TrySendError::Disconnected(StoreMsg::RaftMessage(m))) => { + Err(TrySendError::Disconnected(m)) + } + _ => unreachable!(), + } + } } impl Deref for StoreRouter { @@ -457,14 +470,14 @@ impl DerefMut for StoreRouter { /// Creates the batch system for polling raft activities. pub fn create_store_batch_system( cfg: &Config, - store: Store, + store_id: u64, logger: Logger, ) -> (StoreRouter, StoreSystem) where EK: KvEngine, ER: RaftEngine, { - let (store_tx, store_fsm) = StoreFsm::new(cfg, store); + let (store_tx, store_fsm) = StoreFsm::new(cfg, store_id, logger.clone()); let (router, system) = batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm); let (apply_router, apply_system) = create_apply_batch_system(cfg); diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 8126c8a868a..191f629900a 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -11,4 +11,4 @@ mod store; pub use apply::{ApplyFsm, ApplyFsmDelegate}; pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; -pub use store::{StoreFsm, StoreFsmDelegate, StoreMeta}; +pub use store::{Store, StoreFsm, StoreFsmDelegate, StoreMeta}; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 886d8b2323a..5e3c2674fe5 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -6,7 +6,7 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, TabletFactory}; use kvproto::metapb; use raftstore::store::{Config, Transport}; use slog::{debug, error, info, trace, Logger}; @@ -18,7 +18,7 @@ use tikv_util::{ use crate::{ batch::StoreContext, - raft::Peer, + raft::{Peer, Storage}, router::{PeerMsg, PeerTick}, Result, }; @@ -36,7 +36,12 @@ pub struct PeerFsm { } impl PeerFsm { - pub fn new(cfg: &Config, peer: Peer) -> Result> { + pub fn new( + cfg: &Config, + tablet_factory: &dyn TabletFactory, + storage: Storage, + ) -> Result> { + let peer = Peer::new(cfg, tablet_factory, storage)?; info!(peer.logger, "create peer"); let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); let fsm = Box::new(PeerFsm { @@ -200,7 +205,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec) { for msg in peer_msgs_buf.drain(..) { match msg { - PeerMsg::RaftMessage(_) => unimplemented!(), + PeerMsg::RaftMessage(msg) => self.fsm.peer.on_raft_message(self.store_ctx, msg), PeerMsg::RaftQuery(cmd) => { self.on_receive_command(cmd.send_time); self.on_query(cmd.request, cmd.ch) @@ -224,6 +229,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::FetchedLogs(fetched_logs) => { self.fsm.peer_mut().on_fetched_logs(fetched_logs) } + PeerMsg::QueryDebugInfo(ch) => self.fsm.peer_mut().on_query_debug_info(ch), } } } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 61a3f76b138..0f607e5a1de 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -1,14 +1,20 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::time::SystemTime; + use batch_system::Fsm; use collections::HashMap; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::metapb::Store; use raftstore::store::{Config, ReadDelegate}; +use slog::{o, Logger}; use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; -use crate::{batch::StoreContext, router::StoreMsg, tablet::CachedTablet}; +use crate::{ + batch::StoreContext, + router::{StoreMsg, StoreTick}, + tablet::CachedTablet, +}; pub struct StoreMeta where @@ -34,16 +40,49 @@ where } } +pub struct Store { + id: u64, + // Unix time when it's started. + start_time: Option, + logger: Logger, +} + +impl Store { + pub fn new(id: u64, logger: Logger) -> Store { + Store { + id, + start_time: None, + logger: logger.new(o!("store_id" => id)), + } + } + + pub fn store_id(&self) -> u64 { + self.id + } + + pub fn start_time(&self) -> Option { + self.start_time + } + + pub fn logger(&self) -> &Logger { + &self.logger + } +} + pub struct StoreFsm { store: Store, receiver: Receiver, } impl StoreFsm { - pub fn new(cfg: &Config, store: Store) -> (LooseBoundedSender, Box) { + pub fn new( + cfg: &Config, + store_id: u64, + logger: Logger, + ) -> (LooseBoundedSender, Box) { let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); let fsm = Box::new(StoreFsm { - store, + store: Store::new(store_id, logger), receiver: rx, }); (tx, fsm) @@ -84,9 +123,29 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { Self { fsm, store_ctx } } - pub fn handle_msgs(&self, store_msg_buf: &mut Vec) { + fn on_start(&mut self) { + if self.fsm.store.start_time.is_some() { + panic!("{:?} unable to start again", self.fsm.store.logger.list(),); + } + + self.fsm.store.start_time = Some( + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map_or(0, |d| d.as_secs()), + ); + } + + fn on_tick(&mut self, tick: StoreTick) { + unimplemented!() + } + + pub fn handle_msgs(&mut self, store_msg_buf: &mut Vec) { for msg in store_msg_buf.drain(..) { - // TODO: handle the messages. + match msg { + StoreMsg::Start => self.on_start(), + StoreMsg::Tick(tick) => self.on_tick(tick), + StoreMsg::RaftMessage(msg) => self.fsm.store.on_raft_message(self.store_ctx, msg), + } } } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs new file mode 100644 index 00000000000..59e9057b846 --- /dev/null +++ b/components/raftstore-v2/src/operation/life.rs @@ -0,0 +1,284 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module implements the creation and destruction of peer. +//! +//! A peer can only be created by either: +//! - bootstrapping a cluster, it's coverred in crate::bootstrap; +//! - receiving a RaftMessage. +//! +//! In v1, it can also be created by split. In v2, it's required to create by +//! sending a message to store fsm first, and then using split to initialized +//! the peer. + +use std::cmp; + +use batch_system::BasicMailbox; +use crossbeam::channel::TrySendError; +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{ + metapb::Region, + raft_serverpb::{PeerState, RaftMessage}, +}; +use raftstore::store::{util, ExtraStates, WriteTask}; +use slog::{debug, error, info}; + +use crate::{ + batch::StoreContext, + fsm::{PeerFsm, Store, StoreFsmDelegate}, + raft::{Peer, Storage}, + router::PeerMsg, +}; + +/// When a peer is about to destroy, it becomes `WaitReady` first. If there is +/// no pending asynchronous apply, it becomes `Destroying` and then start +/// destroying asynchronously during handling ready. After the asynchronously +/// destroying is finished, it becomes `Destroyed`. +pub enum DestroyProgress { + /// Alive means destroy is not triggered at all. It's the same as None for + /// `Option`. Not using Option to avoid unwrap everywhere. + None, + /// If the destroy is triggered by message, then the message will be used + /// for creating new peer immediately. + WaitReady(Option>), + Destroying(Option>), + Destroyed, +} + +impl DestroyProgress { + #[inline] + pub fn started(&self) -> bool { + matches!( + self, + DestroyProgress::Destroying(_) | DestroyProgress::Destroyed + ) + } + + #[inline] + pub fn waiting(&self) -> bool { + matches!(self, DestroyProgress::WaitReady(_)) + } + + #[inline] + fn start(&mut self) { + match self { + DestroyProgress::WaitReady(msg) => *self = DestroyProgress::Destroying(msg.take()), + _ => panic!("must wait ready first to start destroying"), + } + } + + #[inline] + fn wait_with(&mut self, triggered_msg: Option>) { + match self { + DestroyProgress::None => *self = DestroyProgress::WaitReady(triggered_msg), + _ => panic!("must be alive to wait"), + } + } + + #[inline] + fn finish(&mut self) -> Option> { + match self { + DestroyProgress::Destroying(msg) => { + let msg = msg.take(); + *self = DestroyProgress::Destroyed; + msg + } + _ => panic!("must be destroying to finish"), + } + } +} + +impl Store { + /// When a message's recipient doesn't exist, it will be redirected to + /// store. Store is responsible for checking if it's neccessary to create + /// a peer to handle the message. + #[inline] + pub fn on_raft_message( + &mut self, + ctx: &mut StoreContext, + msg: Box, + ) where + EK: KvEngine, + ER: RaftEngine, + { + let region_id = msg.get_region_id(); + // The message can be sent when the peer is being created, so try send it first. + let msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m))) = + ctx.router.send(region_id, PeerMsg::RaftMessage(msg)) + { + m + } else { + return; + }; + let msg_type = msg.get_message().get_msg_type(); + let from_peer = msg.get_from_peer(); + let to_peer = msg.get_to_peer(); + // Now the peer should not exist. + debug!( + self.logger(), + "handle raft message"; + "from_peer_id" => from_peer.id, + "to_peer_id" => to_peer.id, + "region_id" => region_id, + "msg_type" => %util::MsgType(&msg) + ); + if to_peer.store_id != self.store_id() { + ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); + return; + } + if !msg.has_region_epoch() { + ctx.raft_metrics.message_dropped.mismatch_region_epoch.inc(); + return; + } + // TODO: maybe we need to ack the message to confirm the peer is destroyed. + if msg.get_is_tombstone() || msg.has_merge_target() { + // Target tombstone peer doesn't exist, so ignore it. + ctx.raft_metrics.message_dropped.stale_msg.inc(); + return; + } + let from_epoch = msg.get_region_epoch(); + let local_state = match ctx.engine.get_region_state(region_id) { + Ok(s) => s, + Err(e) => { + error!(self.logger(), "failed to get region state"; "region_id" => region_id, "err" => ?e); + return; + } + }; + if let Some(local_state) = local_state { + // Split will not create peer in v2, so the state must be Tombstone. + if local_state.get_state() != PeerState::Tombstone { + panic!( + "[region {}] {} peer doesn't exist but has valid local state {:?}", + region_id, to_peer.id, local_state + ); + } + // Compared to v1, we rely on leader to confirm destroy actively, so here + // skip handling gc for simplicity. + let local_epoch = local_state.get_region().get_region_epoch(); + // The region in this peer is already destroyed + if util::is_epoch_stale(from_epoch, local_epoch) { + ctx.raft_metrics.message_dropped.region_tombstone_peer.inc(); + return; + } + if let Some(local_peer) = util::find_peer(local_state.get_region(), self.store_id()) { + if to_peer.id <= local_peer.get_id() { + ctx.raft_metrics.message_dropped.region_tombstone_peer.inc(); + return; + } + } + } + + // So the peer must need to be created. We don't need to synchronous with split + // as split won't create peer in v2. And we don't check for range + // conflict as v2 depends on tablet, which allows conflict ranges. + let mut region = Region::default(); + region.set_id(region_id); + region.set_region_epoch(from_epoch.clone()); + // Peer list doesn't have to be complete, as it's uninitialized. + region.mut_peers().push(from_peer.clone()); + region.mut_peers().push(to_peer.clone()); + // We don't set the region range here as we allow range conflict. + let (tx, fsm) = match Storage::uninit( + self.store_id(), + region, + ctx.engine.clone(), + ctx.log_fetch_scheduler.clone(), + &ctx.logger, + ) + .and_then(|s| PeerFsm::new(&ctx.cfg, &*ctx.tablet_factory, s)) + { + Ok(p) => p, + res => { + error!(self.logger(), "failed to create peer"; "region_id" => region_id, "peer_id" => to_peer.id, "err" => ?res.err()); + return; + } + }; + let mailbox = BasicMailbox::new(tx, fsm, ctx.router.state_cnt().clone()); + if let Err((p, _)) = ctx + .router + .send_and_register(region_id, mailbox, PeerMsg::Start) + { + panic!( + "[region {}] {} failed to register peer", + region_id, to_peer.id + ); + } + // Only forward valid message. Split may use a message without sender to trigger + // creating a peer. + if from_peer.id != raft::INVALID_ID { + // For now the peer only exists in memory. It will persist its states when + // handling its first readiness. + let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg)); + } + } +} + +impl Peer { + /// A peer can be destroyed in three cases: + /// 1. Received a gc message; + /// 2. Received a message whose target peer's ID is larger than this; + /// 3. Applied a conf remove self command. + /// In all cases, the peer will be destroyed asynchronousely in next + /// handle_raft_ready. + /// `triggered_msg` will be sent to store fsm after destroy is finished. + /// Should set the message only when the target peer is supposed to be + /// created afterward. + pub fn mark_for_destroy(&mut self, triggered_msg: Option>) { + if self.serving() { + self.destroy_progress_mut().wait_with(triggered_msg); + self.set_has_ready(); + } + } + + /// In v2, it's possible to destroy the peer without waiting for apply. But + /// we better wait till all previous entries are applied in case there + /// are split. It's a waste to use snapshot to restore newly split + /// tablet. + #[inline] + pub fn postpond_destroy(&self) -> bool { + let entry_storage = self.storage().entry_storage(); + // TODO: check actual split index instead of commit index. + entry_storage.applied_index() != entry_storage.commit_index() + } + + /// Start the destroy progress. It will write `Tombstone` state + /// asynchronously. + /// + /// After destroy is finished, `finish_destroy` should be called to clean up + /// memory states. + pub fn start_destroy(&mut self, write_task: &mut WriteTask) { + let entry_storage = self.storage().entry_storage(); + if self.postpond_destroy() { + return; + } + let first_index = entry_storage.first_index(); + let last_index = entry_storage.last_index(); + if first_index <= last_index { + write_task.cut_logs = match write_task.cut_logs { + None => Some((first_index, last_index)), + Some((f, l)) => Some((cmp::min(first_index, f), cmp::max(last_index, l))), + }; + } + let mut extra_states = ExtraStates::new(entry_storage.apply_state().clone()); + let mut region_state = self.storage().region_state().clone(); + // Write worker will do the clean up when meeting tombstone state. + region_state.set_state(PeerState::Tombstone); + extra_states.set_region_state(region_state); + extra_states.set_raft_state(entry_storage.raft_state().clone()); + write_task.extra_write.set_v2(extra_states); + self.destroy_progress_mut().start(); + } + + /// Do clean up for destroy. The peer is permanently destroyed when + /// Tombstone state is persisted. This method is only for cleaning up + /// memory states. + pub fn finish_destroy(&mut self, ctx: &mut StoreContext) { + info!(self.logger, "peer destroyed"); + ctx.router.close(self.region_id()); + if let Some(msg) = self.destroy_progress_mut().finish() { + // The message will be dispatched to store fsm, which will create a + // new peer. Ignore error as it's just a best effort. + let _ = ctx.router.send_raft_message(msg); + } + // TODO: close apply mailbox. + } +} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index c352ffe0cc1..b840194b7e0 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -1,6 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod life; mod query; mod ready; +pub use life::DestroyProgress; pub use ready::AsyncWriter; diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index e0b2a1c4802..3db4426ebf7 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -157,6 +157,10 @@ impl AsyncWriter { pub fn persisted_number(&self) -> u64 { self.persisted_number } + + pub fn all_ready_persisted(&self) -> bool { + self.unpersisted_readies.is_empty() + } } impl WriteRouterContext for StoreContext diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 1be4b0ee546..aab6cc5d4c5 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -19,18 +19,21 @@ mod async_writer; +use std::cmp; + use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::raft_serverpb::RaftMessage; use protobuf::Message as _; use raft::{eraftpb, Ready}; -use raftstore::store::{FetchedLogs, Transport, WriteTask}; +use raftstore::store::{util, ExtraStates, FetchedLogs, Transport, WriteTask}; use slog::{debug, error, trace, warn}; pub use self::async_writer::AsyncWriter; use crate::{ batch::StoreContext, fsm::{PeerFsm, PeerFsmDelegate}, + operation::DestroyProgress, raft::{Peer, Storage}, router::PeerTick, }; @@ -52,6 +55,66 @@ impl Peer { self.raft_group_mut().tick() } + pub fn on_raft_message( + &mut self, + ctx: &mut StoreContext, + mut msg: Box, + ) { + debug!( + self.logger, + "handle raft message"; + "message_type" => %util::MsgType(&msg), + "from_peer_id" => msg.get_from_peer().get_id(), + "to_peer_id" => msg.get_to_peer().get_id(), + ); + if !self.serving() { + return; + } + if msg.get_to_peer().get_store_id() != self.peer().get_store_id() { + ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); + return; + } + if !msg.has_region_epoch() { + ctx.raft_metrics.message_dropped.mismatch_region_epoch.inc(); + return; + } + if msg.get_is_tombstone() { + self.mark_for_destroy(None); + return; + } + if msg.has_merge_target() { + unimplemented!(); + return; + } + // We don't handle stale message like v1, as we rely on leader to actively + // cleanup stale peers. + let to_peer = msg.get_to_peer(); + // Check if the message is sent to the right peer. + match to_peer.get_id().cmp(&self.peer_id()) { + cmp::Ordering::Equal => (), + cmp::Ordering::Less => { + ctx.raft_metrics.message_dropped.stale_msg.inc(); + return; + } + cmp::Ordering::Greater => { + // We need to create the target peer. + self.mark_for_destroy(Some(msg)); + return; + } + } + if msg.has_extra_msg() { + unimplemented!(); + return; + } + // TODO: drop all msg append when the peer is uninitialized and has conflict + // ranges with other peers. + self.insert_peer_cache(msg.take_from_peer()); + if let Err(e) = self.raft_group_mut().step(msg.take_message()) { + error!(self.logger, "raft step error"; "err" => ?e); + } + self.set_has_ready(); + } + /// Callback for fetching logs asynchronously. pub fn on_fetched_logs(&mut self, fetched_logs: FetchedLogs) { let FetchedLogs { context, logs } = fetched_logs; @@ -176,15 +239,17 @@ impl Peer { #[inline] pub fn handle_raft_ready(&mut self, ctx: &mut StoreContext) { let has_ready = self.reset_has_ready(); - if !has_ready { + if !has_ready || self.destroy_progress().started() { return; } ctx.has_ready = true; - if !self.raft_group().has_ready() { + if !self.raft_group().has_ready() && (self.serving() || self.postpond_destroy()) { return; } + // Note even the group has no ready, we can still get an empty ready. + debug!(self.logger, "handle raft ready"); let mut ready = self.raft_group_mut().ready(); @@ -218,6 +283,9 @@ impl Peer { .flat_map(|m| self.build_raft_message(ctx, m)) .collect(); } + if !self.serving() { + self.start_destroy(&mut write_task); + } // Ready number should increase monotonically. assert!(self.async_writer.known_largest_number() < ready.number()); if let Some(task) = self.async_writer.write(ctx, write_task) { @@ -273,8 +341,14 @@ impl Peer { self.storage_mut() .entry_storage_mut() .update_cache_persisted(persisted_index); - // We may need to check if there is persisted committed logs. - self.set_has_ready(); + if !self.destroy_progress().started() { + // We may need to check if there is persisted committed logs. + self.set_has_ready(); + } else if self.async_writer.all_ready_persisted() { + // Destroy ready is the last ready. All readies are persisted means destroy + // is persisted. + self.finish_destroy(ctx); + } } } @@ -287,6 +361,7 @@ impl Storage { write_task: &mut WriteTask, ) { let prev_raft_state = self.entry_storage().raft_state().clone(); + let ever_persisted = self.ever_persisted(); // TODO: handle snapshot @@ -297,8 +372,14 @@ impl Storage { if let Some(hs) = ready.hs() { entry_storage.raft_state_mut().set_hard_state(hs.clone()); } - if prev_raft_state != *entry_storage.raft_state() { + if !ever_persisted || prev_raft_state != *entry_storage.raft_state() { write_task.raft_state = Some(entry_storage.raft_state().clone()); } + if !ever_persisted { + let mut extra_states = ExtraStates::new(self.apply_state().clone()); + extra_states.set_region_state(self.region_state().clone()); + write_task.extra_write.set_v2(extra_states); + self.set_ever_persisted(); + } } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 6fd7b4b444c..a84dd36f224 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -11,7 +11,7 @@ use tikv_util::{box_err, config::ReadableSize, worker::Scheduler}; use super::storage::Storage; use crate::{ - operation::AsyncWriter, + operation::{AsyncWriter, DestroyProgress}, tablet::{self, CachedTablet}, Result, }; @@ -25,6 +25,7 @@ pub struct Peer { /// messages with unknown peers after recovery. peer_cache: Vec, pub(crate) async_writer: AsyncWriter, + destroy_progress: DestroyProgress, has_ready: bool, pub(crate) logger: Logger, } @@ -35,21 +36,13 @@ impl Peer { /// If peer is destroyed, `None` is returned. pub fn new( cfg: &Config, - region_id: u64, - store_id: u64, tablet_factory: &dyn TabletFactory, - engine: ER, - scheduler: Scheduler, - logger: &Logger, - ) -> Result> { - let s = match Storage::new(region_id, store_id, engine, scheduler, logger)? { - Some(s) => s, - None => return Ok(None), - }; - let logger = s.logger().clone(); + storage: Storage, + ) -> Result { + let logger = storage.logger().clone(); - let applied_index = s.apply_state().get_applied_index(); - let peer_id = s.peer().get_id(); + let applied_index = storage.apply_state().get_applied_index(); + let peer_id = storage.peer().get_id(); let raft_cfg = raft::Config { id: peer_id, @@ -67,7 +60,8 @@ impl Peer { ..Default::default() }; - let tablet_index = s.region_state().get_tablet_index(); + let region_id = storage.region().get_id(); + let tablet_index = storage.region_state().get_tablet_index(); // Another option is always create tablet even if tablet index is 0. But this // can introduce race when gc old tablet and create new peer. let tablet = if tablet_index != 0 { @@ -89,21 +83,25 @@ impl Peer { }; let mut peer = Peer { - raft_group: RawNode::new(&raft_cfg, s, &logger)?, + raft_group: RawNode::new(&raft_cfg, storage, &logger)?, tablet: CachedTablet::new(tablet), - has_ready: false, + peer_cache: vec![], async_writer: AsyncWriter::new(region_id, peer_id), + has_ready: false, + destroy_progress: DestroyProgress::None, logger, - peer_cache: vec![], }; // If this region has only one peer and I am the one, campaign directly. let region = peer.region(); - if region.get_peers().len() == 1 && region.get_peers()[0].get_store_id() == store_id { + if region.get_peers().len() == 1 + && region.get_peers()[0] == *peer.peer() + && tablet_index != 0 + { peer.raft_group.campaign()?; } - Ok(Some(peer)) + Ok(peer) } #[inline] @@ -241,4 +239,19 @@ impl Peer { pub fn term(&self) -> u64 { self.raft_group.raft.term } + + #[inline] + pub fn serving(&self) -> bool { + matches!(self.destroy_progress, DestroyProgress::None) + } + + #[inline] + pub fn destroy_progress(&self) -> &DestroyProgress { + &self.destroy_progress + } + + #[inline] + pub fn destroy_progress_mut(&mut self) -> &mut DestroyProgress { + &mut self.destroy_progress + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 4f625b751ac..fe0a9b5913e 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -54,6 +54,10 @@ pub struct Storage { entry_storage: EntryStorage, peer: metapb::Peer, region_state: RegionLocalState, + /// Whether states has been persisted before. If a peer is just created by + /// by messages, it has not persisted any states, we need to persist them + /// at least once dispite whether the state changes since create. + ever_persisted: bool, logger: Logger, } @@ -101,6 +105,30 @@ impl Storage { } impl Storage { + /// Creates a new storage with uninit states. + /// + /// This should only be used for creating new peer from raft message. + pub fn uninit( + store_id: u64, + region: Region, + engine: ER, + log_fetch_scheduler: Scheduler, + logger: &Logger, + ) -> Result { + let mut region_state = RegionLocalState::default(); + region_state.set_region(region); + Self::create( + store_id, + region_state, + RaftLocalState::default(), + RaftApplyState::default(), + engine, + log_fetch_scheduler, + false, + logger, + ) + } + /// Creates a new storage. /// /// All metadata should be initialized before calling this method. If the @@ -112,7 +140,7 @@ impl Storage { log_fetch_scheduler: Scheduler, logger: &Logger, ) -> Result>> { - let region_state: RegionLocalState = match engine.get_region_state(region_id) { + let region_state = match engine.get_region_state(region_id) { Ok(Some(s)) => s, res => { return Err(box_err!( @@ -127,16 +155,6 @@ impl Storage { return Ok(None); } - let peer = find_peer(region_state.get_region(), store_id); - let peer = match peer { - Some(p) if p.get_id() != INVALID_ID => p, - _ => { - return Err(box_err!("no valid peer found in {:?}", region_state)); - } - }; - - let logger = logger.new(o!("region_id" => region_id, "peer_id" => peer.get_id())); - let raft_state = match engine.get_raft_state(region_id) { Ok(Some(s)) => s, res => { @@ -151,8 +169,38 @@ impl Storage { } }; - let region = region_state.get_region(); + Self::create( + store_id, + region_state, + raft_state, + apply_state, + engine, + log_fetch_scheduler, + true, + logger, + ) + .map(Some) + } + fn create( + store_id: u64, + region_state: RegionLocalState, + raft_state: RaftLocalState, + apply_state: RaftApplyState, + engine: ER, + log_fetch_scheduler: Scheduler, + persisted: bool, + logger: &Logger, + ) -> Result { + let peer = find_peer(region_state.get_region(), store_id); + let peer = match peer { + Some(p) if p.get_id() != INVALID_ID => p, + _ => { + return Err(box_err!("no valid peer found in {:?}", region_state)); + } + }; + let region = region_state.get_region(); + let logger = logger.new(o!("region_id" => region.id, "peer_id" => peer.get_id())); let entry_storage = EntryStorage::new( peer.get_id(), engine, @@ -162,12 +210,13 @@ impl Storage { log_fetch_scheduler, )?; - Ok(Some(Storage { + Ok(Storage { entry_storage, peer: peer.clone(), region_state, + ever_persisted: persisted, logger, - })) + }) } #[inline] @@ -184,6 +233,14 @@ impl Storage { pub fn is_initialized(&self) -> bool { self.region_state.get_tablet_index() != 0 } + + pub fn ever_persisted(&self) -> bool { + self.ever_persisted + } + + pub fn set_ever_persisted(&mut self) { + self.ever_persisted = true; + } } impl raft::Storage for Storage { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 72e6149d7ad..7be1be95554 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -8,14 +8,15 @@ use kvproto::{ cdcpb::Event, metapb, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_serverpb::RaftMessage, }; -use raftstore::store::{ - metrics::RaftEventDurationType, FetchedLogs, InspectedRaftMessage, RegionSnapshot, -}; +use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, RegionSnapshot}; use tikv_util::time::Instant; use super::{ - response_channel::{CmdResChannel, CmdResSubscriber, QueryResChannel, QueryResSubscriber}, + response_channel::{ + CmdResChannel, CmdResSubscriber, DebugInfoChannel, QueryResChannel, QueryResSubscriber, + }, ApplyRes, }; @@ -116,7 +117,7 @@ pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. - RaftMessage(InspectedRaftMessage), + RaftMessage(Box), /// Query won't change any state. A typical query is KV read. In most cases, /// it will be processed using lease or read index. RaftQuery(RaftRequest), @@ -138,6 +139,7 @@ pub enum PeerMsg { peer_id: u64, ready_number: u64, }, + QueryDebugInfo(DebugInfoChannel), } impl PeerMsg { @@ -175,14 +177,15 @@ impl fmt::Debug for PeerMsg { peer_id, ready_number ), PeerMsg::FetchedLogs(fetched) => write!(fmt, "FetchedLogs {:?}", fetched), + PeerMsg::QueryDebugInfo(_) => write!(fmt, "QueryDebugInfo"), } } } pub enum StoreMsg { - RaftMessage(InspectedRaftMessage), + RaftMessage(Box), Tick(StoreTick), - Start { store: metapb::Store }, + Start, } impl fmt::Debug for StoreMsg { @@ -190,7 +193,7 @@ impl fmt::Debug for StoreMsg { match *self { StoreMsg::RaftMessage(_) => write!(fmt, "Raft Message"), StoreMsg::Tick(tick) => write!(fmt, "StoreTick {:?}", tick), - StoreMsg::Start { ref store } => write!(fmt, "Start store {:?}", store), + StoreMsg::Start => write!(fmt, "Start store"), } } } diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index d922020cbcb..5582921ce4d 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -10,11 +10,13 @@ use std::{ ops::{Deref, DerefMut}, + path::Path, sync::{ atomic::{AtomicUsize, Ordering}, Arc, }, - time::Duration, + thread, + time::{Duration, Instant}, }; use crossbeam::channel::{self, Receiver, Sender}; @@ -31,10 +33,10 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use pd_client::RpcClient; -use raftstore::store::{Config, Transport, RAFT_INIT_LOG_INDEX}; +use raftstore::store::{region_meta::RegionMeta, Config, Transport, RAFT_INIT_LOG_INDEX}; use raftstore_v2::{ create_store_batch_system, - router::{PeerMsg, QueryResult}, + router::{DebugInfoChannel, PeerMsg, QueryResult}, Bootstrap, StoreRouter, StoreSystem, }; use slog::{o, Logger}; @@ -42,6 +44,7 @@ use tempfile::TempDir; use test_pd::mocker::Service; use tikv_util::config::{ReadableDuration, VersionTrack}; +mod test_life; mod test_status; struct TestRouter(StoreRouter); @@ -67,6 +70,20 @@ impl TestRouter { block_on(sub.result()) } + fn must_query_debug_info(&self, region_id: u64, timeout: Duration) -> Option { + let timer = Instant::now(); + while timer.elapsed() < timeout { + let (ch, sub) = DebugInfoChannel::pair(); + let msg = PeerMsg::QueryDebugInfo(ch); + if self.send(region_id, msg).is_err() { + thread::sleep(Duration::from_millis(10)); + continue; + } + return block_on(sub.result()); + } + None + } + fn command(&self, region_id: u64, req: RaftCmdRequest) -> Option { let (msg, sub) = PeerMsg::raft_command(req); self.send(region_id, msg).unwrap(); @@ -74,113 +91,138 @@ impl TestRouter { } } -struct TestNode { - _pd_server: test_pd::Server, - _pd_client: RpcClient, - _path: TempDir, - store: Store, - raft_engine: Option, - factory: Option>, - system: Option>, - cfg: Option>>, - logger: Logger, +struct RunningState { + raft_engine: RaftTestEngine, + factory: Arc, + system: StoreSystem, + cfg: Arc>, + transport: TestTransport, } -impl TestNode { - fn new() -> TestNode { - let logger = slog_global::borrow_global().new(o!()); - let pd_server = test_pd::Server::new(1); - let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); - let path = TempDir::new().unwrap(); - +impl RunningState { + fn new( + pd_client: &RpcClient, + path: &Path, + cfg: Arc>, + transport: TestTransport, + logger: &Logger, + ) -> (TestRouter, Self) { let cf_opts = ALL_CFS .iter() .copied() .map(|cf| (cf, CfOptions::default())) .collect(); let factory = Arc::new(TestTabletFactoryV2::new( - path.path(), + path, DbOptions::default(), cf_opts, )); let raft_engine = - engine_test::raft::new_engine(&format!("{}", path.path().join("raft").display()), None) + engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) .unwrap(); - let mut bootstrap = Bootstrap::new(&raft_engine, 0, &pd_client, logger.clone()); + let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client, logger.clone()); let store_id = bootstrap.bootstrap_store().unwrap(); let mut store = Store::default(); store.set_id(store_id); - let region = bootstrap - .bootstrap_first_region(&store, store_id) - .unwrap() - .unwrap(); - if factory.exists(region.get_id(), RAFT_INIT_LOG_INDEX) { + if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { + if factory.exists(region.get_id(), RAFT_INIT_LOG_INDEX) { + factory + .destroy_tablet(region.get_id(), RAFT_INIT_LOG_INDEX) + .unwrap(); + } factory - .destroy_tablet(region.get_id(), RAFT_INIT_LOG_INDEX) + .open_tablet( + region.get_id(), + Some(RAFT_INIT_LOG_INDEX), + OpenOptions::default().set_create_new(true), + ) .unwrap(); } - factory - .open_tablet( - region.get_id(), - Some(RAFT_INIT_LOG_INDEX), - OpenOptions::default().set_create_new(true), - ) - .unwrap(); - TestNode { - _pd_server: pd_server, - _pd_client: pd_client, - _path: path, - store, - raft_engine: Some(raft_engine), - factory: Some(factory), - system: None, - cfg: None, - logger, - } - } - - fn start( - &mut self, - cfg: Arc>, - trans: impl Transport + 'static, - ) -> TestRouter { let (router, mut system) = create_store_batch_system::( &cfg.value(), - self.store.clone(), - self.logger.clone(), + store_id, + logger.clone(), ); system .start( - self.store.clone(), + store_id, cfg.clone(), - self.raft_engine.clone().unwrap(), - self.factory.clone().unwrap(), - trans, + raft_engine.clone(), + factory.clone(), + transport.clone(), &router, ) .unwrap(); - self.cfg = Some(cfg); - self.system = Some(system); - TestRouter(router) + + let state = Self { + raft_engine, + factory, + system, + cfg, + transport, + }; + (TestRouter(router), state) + } +} + +impl Drop for RunningState { + fn drop(&mut self) { + self.system.shutdown(); + } +} + +struct TestNode { + _pd_server: test_pd::Server, + pd_client: RpcClient, + path: TempDir, + running_state: Option, + logger: Logger, +} + +impl TestNode { + fn new() -> TestNode { + let logger = slog_global::borrow_global().new(o!()); + let pd_server = test_pd::Server::new(1); + let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); + let path = TempDir::new().unwrap(); + + TestNode { + _pd_server: pd_server, + pd_client, + path, + running_state: None, + logger, + } + } + + fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { + let (router, state) = + RunningState::new(&self.pd_client, self.path.path(), cfg, trans, &self.logger); + self.running_state = Some(state); + router } fn config(&self) -> &Arc> { - self.cfg.as_ref().unwrap() + &self.running_state.as_ref().unwrap().cfg } fn stop(&mut self) { - if let Some(mut system) = self.system.take() { - system.shutdown(); - } + self.running_state.take(); + } + + fn restart(&mut self) -> TestRouter { + let state = self.running_state.as_ref().unwrap(); + let prev_transport = state.transport.clone(); + let cfg = state.cfg.clone(); + self.stop(); + self.start(cfg, prev_transport) } } impl Drop for TestNode { fn drop(&mut self) { self.stop(); - self.raft_engine.take(); - self.factory.take(); } } diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs new file mode 100644 index 00000000000..c03c7fe10c4 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -0,0 +1,194 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + assert_matches::assert_matches, + thread, + time::{Duration, Instant}, +}; + +use crossbeam::channel::TrySendError; +use engine_traits::{RaftEngine, RaftEngineReadOnly}; +use futures::executor::block_on; +use kvproto::{ + metapb, + raft_cmdpb::{RaftCmdRequest, StatusCmdType}, + raft_serverpb::{PeerState, RaftMessage}, +}; +use raftstore::store::util::new_peer; +use raftstore_v2::router::{DebugInfoChannel, PeerMsg}; + +use crate::TestRouter; + +fn assert_peer_not_exist(region_id: u64, peer_id: u64, router: &TestRouter) { + let timer = Instant::now(); + loop { + let (ch, sub) = DebugInfoChannel::pair(); + let msg = PeerMsg::QueryDebugInfo(ch); + match router.send(region_id, msg) { + Err(TrySendError::Disconnected(_)) => return, + Ok(()) => { + if let Some(m) = block_on(sub.result()) { + if m.raft_status.id != peer_id { + return; + } + } + } + Err(_) => (), + } + if timer.elapsed() < Duration::from_secs(3) { + thread::sleep(Duration::from_millis(10)); + } else { + panic!("peer of {} still exists", region_id); + } + } +} + +// TODO: make raft engine support more suitable way to verify range is empty. +/// Verify all states in raft engine are cleared. +fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb::Peer) { + let mut buf = vec![]; + raft_engine.get_all_entries_to(region_id, &mut buf).unwrap(); + assert!(buf.is_empty(), "{:?}", buf); + assert_matches!(raft_engine.get_raft_state(region_id), Ok(None)); + assert_matches!(raft_engine.get_apply_state(region_id), Ok(None)); + let region_state = raft_engine.get_region_state(region_id).unwrap().unwrap(); + assert_matches!(region_state.get_state(), PeerState::Tombstone); + assert!( + region_state.get_region().get_peers().contains(peer), + "{:?}", + region_state + ); +} + +/// Test a peer can be created by general raft message and destroyed tombstone +/// message. +#[test] +fn test_life_by_message() { + let (mut node, _transport, router) = super::setup_default_cluster(); + let test_region_id = 4; + let test_peer_id = 5; + let test_leader_id = 6; + assert_peer_not_exist(test_region_id, test_peer_id, &router); + + // Build a correct message. + let mut msg = Box::new(RaftMessage::default()); + msg.set_region_id(test_region_id); + msg.set_to_peer(new_peer(1, test_peer_id)); + msg.mut_region_epoch().set_conf_ver(1); + msg.set_from_peer(new_peer(2, test_leader_id)); + let raft_message = msg.mut_message(); + raft_message.set_msg_type(raft::prelude::MessageType::MsgHeartbeat); + raft_message.set_from(6); + raft_message.set_term(5); + + let assert_wrong = |f: &dyn Fn(&mut RaftMessage)| { + let mut wrong_msg = msg.clone(); + f(&mut wrong_msg); + router.send_raft_message(wrong_msg).unwrap(); + assert_peer_not_exist(test_region_id, test_peer_id, &router); + }; + + // Check mismatch store id. + assert_wrong(&|msg| msg.mut_to_peer().set_store_id(4)); + + // Check missing region epoch. + assert_wrong(&|msg| { + msg.take_region_epoch(); + }); + + // Check tombstone. + assert_wrong(&|msg| msg.set_is_tombstone(true)); + + // Correct message will create a peer, but the peer will not be initialized. + router.send_raft_message(msg.clone()).unwrap(); + let timeout = Duration::from_secs(3); + let meta = router + .must_query_debug_info(test_region_id, timeout) + .unwrap(); + assert_eq!(meta.region_state.id, test_region_id); + assert_eq!(meta.raft_status.id, test_peer_id); + assert_eq!(meta.region_state.tablet_index, 0); + // But leader should be set. + assert_eq!(meta.raft_status.soft_state.leader_id, test_leader_id); + + // The peer should survive restart. + let router = node.restart(); + let meta = router + .must_query_debug_info(test_region_id, timeout) + .unwrap(); + assert_eq!(meta.raft_status.id, test_peer_id); + let raft_engine = &node.running_state.as_ref().unwrap().raft_engine; + raft_engine.get_raft_state(test_region_id).unwrap().unwrap(); + raft_engine + .get_apply_state(test_region_id) + .unwrap() + .unwrap(); + + // The peer should be destroyed by tombstone message. + let mut tombstone_msg = msg.clone(); + tombstone_msg.set_is_tombstone(true); + router.send_raft_message(tombstone_msg).unwrap(); + assert_peer_not_exist(test_region_id, test_peer_id, &router); + assert_tombstone(raft_engine, test_region_id, &new_peer(1, test_peer_id)); + + // Restart should not recreate tombstoned peer. + let router = node.restart(); + assert_peer_not_exist(test_region_id, test_peer_id, &router); + let raft_engine = &node.running_state.as_ref().unwrap().raft_engine; + assert_tombstone(raft_engine, test_region_id, &new_peer(1, test_peer_id)); +} + +#[test] +fn test_destroy_by_larger_id() { + let (mut node, _transport, router) = super::setup_default_cluster(); + let test_region_id = 4; + let test_peer_id = 6; + let init_term = 5; + let mut msg = Box::new(RaftMessage::default()); + msg.set_region_id(test_region_id); + msg.set_to_peer(new_peer(1, test_peer_id)); + msg.mut_region_epoch().set_conf_ver(1); + msg.set_from_peer(new_peer(2, 8)); + let raft_message = msg.mut_message(); + raft_message.set_msg_type(raft::prelude::MessageType::MsgHeartbeat); + raft_message.set_from(6); + raft_message.set_term(init_term); + // Create the peer. + router.send_raft_message(msg.clone()).unwrap(); + + let timeout = Duration::from_secs(3); + let meta = router + .must_query_debug_info(test_region_id, timeout) + .unwrap(); + assert_eq!(meta.raft_status.id, test_peer_id); + + // Smaller ID should be ignored. + let mut smaller_id_msg = msg; + smaller_id_msg.set_to_peer(new_peer(1, test_peer_id - 1)); + smaller_id_msg.mut_message().set_term(init_term + 1); + router.send_raft_message(smaller_id_msg.clone()).unwrap(); + let meta = router + .must_query_debug_info(test_region_id, timeout) + .unwrap(); + assert_eq!(meta.raft_status.id, test_peer_id); + assert_eq!(meta.raft_status.hard_state.term, init_term); + + // Larger ID should trigger destroy. + let mut larger_id_msg = smaller_id_msg; + larger_id_msg.set_to_peer(new_peer(1, test_peer_id + 1)); + router.send_raft_message(larger_id_msg).unwrap(); + assert_peer_not_exist(test_region_id, test_peer_id, &router); + let meta = router + .must_query_debug_info(test_region_id, timeout) + .unwrap(); + assert_eq!(meta.raft_status.id, test_peer_id + 1); + assert_eq!(meta.raft_status.hard_state.term, init_term + 1); + + // New peer should survive restart. + let router = node.restart(); + let meta = router + .must_query_debug_info(test_region_id, timeout) + .unwrap(); + assert_eq!(meta.raft_status.id, test_peer_id + 1); + assert_eq!(meta.raft_status.hard_state.term, init_term + 1); +} diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index ea796117e2c..e534a17fad1 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -20,7 +20,9 @@ use engine_traits::{ }; use error_code::ErrorCodeExt; use fail::fail_point; -use kvproto::raft_serverpb::{RaftApplyState, RaftLocalState, RaftMessage, RegionLocalState}; +use kvproto::raft_serverpb::{ + PeerState, RaftApplyState, RaftLocalState, RaftMessage, RegionLocalState, +}; use protobuf::Message; use raft::eraftpb::Entry; use tikv_util::{ @@ -37,6 +39,7 @@ use super::write_router::WriteSenders; use crate::{ store::{ config::Config, + entry_storage::first_index, fsm::RaftRouter, local_metrics::{RaftSendMessageMetrics, StoreWriteMetrics, TimeTracker}, metrics::*, @@ -218,18 +221,6 @@ where self.ready_number )); } - if let Some(last_index) = self.entries.last().map(|e| e.get_index()) { - if let Some((from, _)) = self.cut_logs { - if from != last_index + 1 { - // Entries are put and deleted in the same writebatch. - return Err(box_err!( - "invalid cut logs, last_index {}, cut_logs {:?}", - last_index, - self.cut_logs - )); - } - } - } Ok(()) } @@ -272,6 +263,8 @@ where pub struct ExtraStates { apply_state: RaftApplyState, region_state: Option, + // Set only want to destroy the raft group in write worker. + raft_state: Option, } impl ExtraStates { @@ -280,6 +273,7 @@ impl ExtraStates { Self { apply_state, region_state: None, + raft_state: None, } } @@ -287,6 +281,11 @@ impl ExtraStates { pub fn set_region_state(&mut self, region_state: RegionLocalState) { self.region_state = Some(region_state); } + + #[inline] + pub fn set_raft_state(&mut self, raft_state: RaftLocalState) { + self.raft_state = Some(raft_state); + } } pub enum ExtraBatchWrite { @@ -331,6 +330,9 @@ impl ExtraBatchWrite { if let Some(region_state) = extra_states.region_state { slot.get_mut().region_state = Some(region_state); } + if let Some(raft_state) = extra_states.raft_state { + slot.get_mut().raft_state = Some(raft_state); + } } collections::HashMapEntry::Vacant(slot) => { slot.insert(extra_states); @@ -452,21 +454,35 @@ where self.state_size + self.raft_wb.persist_size() } - fn before_write_to_db(&mut self, metrics: &StoreWriteMetrics) { + fn before_write_to_db(&mut self, engine: &ER, metrics: &StoreWriteMetrics) { // Put raft state to raft writebatch for (region_id, state) in self.raft_states.drain() { self.raft_wb.put_raft_state(region_id, &state).unwrap(); } if let ExtraBatchWrite::V2(extra_states_map) = &mut self.extra_batch_write { for (region_id, state) in extra_states_map.drain() { - self.raft_wb - .put_apply_state(region_id, &state.apply_state) - .unwrap(); + let mut tombstone = false; if let Some(region_state) = state.region_state { + if region_state.get_state() == PeerState::Tombstone { + tombstone = true; + engine + .clean( + region_id, + first_index(&state.apply_state), + state.raft_state.as_ref().unwrap(), + &mut self.raft_wb, + ) + .unwrap(); + } self.raft_wb .put_region_state(region_id, ®ion_state) .unwrap(); } + if !tombstone { + self.raft_wb + .put_apply_state(region_id, &state.apply_state) + .unwrap(); + } } } self.state_size = 0; @@ -656,7 +672,8 @@ where let timer = Instant::now(); - self.batch.before_write_to_db(&self.metrics); + self.batch + .before_write_to_db(&self.raft_engine, &self.metrics); fail_point!("raft_before_save"); @@ -915,7 +932,7 @@ pub fn write_to_db_for_test( { let mut batch = WriteTaskBatch::new(engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE)); batch.add_write_task(task); - batch.before_write_to_db(&StoreWriteMetrics::new(false)); + batch.before_write_to_db(&engines.raft, &StoreWriteMetrics::new(false)); if let ExtraBatchWrite::V1(kv_wb) = &mut batch.extra_batch_write { if !kv_wb.is_empty() { let mut write_opts = WriteOptions::new(); From 0e7bc82824dfbde4d5b01b6848a5e5366512d56c Mon Sep 17 00:00:00 2001 From: ekexium Date: Thu, 8 Sep 2022 16:16:57 +0800 Subject: [PATCH 0197/1149] txn: distinguish different types of write conflicts (#13424) close tikv/tikv#13423 Signed-off-by: ekexium Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- Cargo.toml | 2 +- components/txn_types/src/lib.rs | 8 +++- components/txn_types/src/lock.rs | 3 +- src/server/lock_manager/waiter_manager.rs | 4 +- src/storage/errors.rs | 7 +++- src/storage/mvcc/mod.rs | 11 ++++-- src/storage/mvcc/reader/point_getter.rs | 3 +- src/storage/mvcc/reader/scanner/backward.rs | 3 +- src/storage/mvcc/reader/scanner/forward.rs | 3 +- src/storage/mvcc/txn.rs | 3 ++ .../txn/actions/acquire_pessimistic_lock.rs | 2 + src/storage/txn/actions/prewrite.rs | 37 ++++++++++++++++--- 13 files changed, 69 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aedc4328377..8a0356d6611 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2629,7 +2629,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#2e37953b2b435961ad5b4f0e36b32c53f4777b23" +source = "git+https://github.com/pingcap/kvproto.git#7c004f4daf21e0677b0ceca50a723377a3968022" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/Cargo.toml b/Cargo.toml index 531449ab1b1..1b622f0d61b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -211,7 +211,7 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to # kvproto at the same time. # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. -# [patch.'https://github.com/pingcap/kvproto'] +[patch.'https://github.com/pingcap/kvproto'] # kvproto = { git = "https://github.com/your_github_id/kvproto", branch="your_branch" } [workspace] diff --git a/components/txn_types/src/lib.rs b/components/txn_types/src/lib.rs index 2f018c23923..edd89256d2b 100644 --- a/components/txn_types/src/lib.rs +++ b/components/txn_types/src/lib.rs @@ -14,6 +14,7 @@ mod write; use std::io; use error_code::{self, ErrorCode, ErrorCodeExt}; +use kvproto::kvrpcpb; pub use lock::{Lock, LockType, PessimisticLock}; use thiserror::Error; pub use timestamp::{TimeStamp, TsSet, TSO_PHYSICAL_SHIFT_BITS}; @@ -36,9 +37,9 @@ pub enum ErrorInner { #[error("key is locked (backoff or cleanup) {0:?}")] KeyIsLocked(kvproto::kvrpcpb::LockInfo), #[error( - "write conflict, start_ts: {}, conflict_start_ts: {}, conflict_commit_ts: {}, key: {}, primary: {}", + "write conflict, start_ts: {}, conflict_start_ts: {}, conflict_commit_ts: {}, key: {}, primary: {}, reason: {:?}", .start_ts, .conflict_start_ts, .conflict_commit_ts, - log_wrappers::Value::key(.key), log_wrappers::Value::key(.primary) + log_wrappers::Value::key(.key), log_wrappers::Value::key(.primary), .reason )] WriteConflict { start_ts: TimeStamp, @@ -46,6 +47,7 @@ pub enum ErrorInner { conflict_commit_ts: TimeStamp, key: Vec, primary: Vec, + reason: kvrpcpb::WriteConflictReason, }, } @@ -63,12 +65,14 @@ impl ErrorInner { conflict_commit_ts, key, primary, + reason, } => Some(ErrorInner::WriteConflict { start_ts: *start_ts, conflict_start_ts: *conflict_start_ts, conflict_commit_ts: *conflict_commit_ts, key: key.to_owned(), primary: primary.to_owned(), + reason: reason.to_owned(), }), } } diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 4c784e31318..96c96828bcb 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -3,7 +3,7 @@ use std::{borrow::Cow, mem::size_of}; use byteorder::ReadBytesExt; -use kvproto::kvrpcpb::{IsolationLevel, LockInfo, Op}; +use kvproto::kvrpcpb::{IsolationLevel, LockInfo, Op, WriteConflictReason}; use tikv_util::codec::{ bytes::{self, BytesEncoder}, number::{self, NumberEncoder, MAX_VAR_I64_LEN, MAX_VAR_U64_LEN}, @@ -395,6 +395,7 @@ impl Lock { conflict_commit_ts: Default::default(), key: key.to_raw()?, primary: lock.primary.to_vec(), + reason: WriteConflictReason::RcCheckTs, })) } diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index b0e05091267..2ba2b583de9 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -18,7 +18,7 @@ use futures::{ future::Future, task::{Context, Poll}, }; -use kvproto::deadlock::WaitForEntry; +use kvproto::{deadlock::WaitForEntry, kvrpcpb::WriteConflictReason}; use tikv_util::{ config::ReadableDuration, time::{duration_to_sec, InstantExt}, @@ -247,6 +247,7 @@ impl Waiter { conflict_commit_ts: commit_ts, key, primary, + reason: WriteConflictReason::PessimisticRetry, }); self.pr = ProcessResult::Failed { err: StorageError::from(TxnError::from(mvcc_err)), @@ -822,6 +823,7 @@ pub mod tests { conflict_commit_ts, key, primary, + .. }), ))))) => { assert_eq!(start_ts, waiter_ts); diff --git a/src/storage/errors.rs b/src/storage/errors.rs index dae61653f07..faf12f34003 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -314,7 +314,7 @@ pub fn extract_key_error(err: &Error) -> kvrpcpb::KeyError { conflict_commit_ts, key, primary, - .. + reason, }, ))))) => { let mut write_conflict = kvrpcpb::WriteConflict::default(); @@ -323,6 +323,7 @@ pub fn extract_key_error(err: &Error) -> kvrpcpb::KeyError { write_conflict.set_conflict_commit_ts(conflict_commit_ts.into_inner()); write_conflict.set_key(key.to_owned()); write_conflict.set_primary(primary.to_owned()); + write_conflict.set_reason(reason.to_owned()); key_error.set_conflict(write_conflict); // for compatibility with older versions. key_error.set_retryable(format!("{:?}", err)); @@ -457,6 +458,8 @@ pub fn extract_key_errors(res: Result>>) -> Vec, primary: Vec, + reason: kvrpcpb::WriteConflictReason, }, #[error( @@ -203,12 +204,14 @@ impl ErrorInner { conflict_commit_ts, key, primary, + reason, } => Some(ErrorInner::WriteConflict { start_ts: *start_ts, conflict_start_ts: *conflict_start_ts, conflict_commit_ts: *conflict_commit_ts, key: key.to_owned(), primary: primary.to_owned(), + reason: reason.to_owned(), }), ErrorInner::Deadlock { start_ts, @@ -348,12 +351,14 @@ impl From for ErrorInner { conflict_commit_ts, key, primary, + reason, }) => ErrorInner::WriteConflict { start_ts, conflict_start_ts, conflict_commit_ts, key, primary, + reason, }, } } diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 2a231b42823..2758460a526 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -4,7 +4,7 @@ use std::borrow::Cow; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; -use kvproto::kvrpcpb::IsolationLevel; +use kvproto::kvrpcpb::{IsolationLevel, WriteConflictReason}; use txn_types::{Key, Lock, LockType, TimeStamp, TsSet, Value, WriteRef, WriteType}; use crate::storage::{ @@ -254,6 +254,7 @@ impl PointGetter { conflict_commit_ts: key_commit_ts, key: cursor_key.into(), primary: vec![], + reason: WriteConflictReason::RcCheckTs, } .into()); } diff --git a/src/storage/mvcc/reader/scanner/backward.rs b/src/storage/mvcc/reader/scanner/backward.rs index 6ade614e848..11ed487cd56 100644 --- a/src/storage/mvcc/reader/scanner/backward.rs +++ b/src/storage/mvcc/reader/scanner/backward.rs @@ -4,7 +4,7 @@ use std::{borrow::Cow, cmp::Ordering}; use engine_traits::CF_DEFAULT; -use kvproto::kvrpcpb::IsolationLevel; +use kvproto::kvrpcpb::{IsolationLevel, WriteConflictReason}; use txn_types::{Key, Lock, TimeStamp, Value, Write, WriteRef, WriteType}; use super::ScannerConfig; @@ -274,6 +274,7 @@ impl BackwardKvScanner { conflict_commit_ts: last_checked_commit_ts, key: current_key.into(), primary: vec![], + reason: WriteConflictReason::RcCheckTs, } .into()); } diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 6bed0289053..aee185e307f 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -4,7 +4,7 @@ use std::{borrow::Cow, cmp::Ordering}; use engine_traits::CF_DEFAULT; -use kvproto::kvrpcpb::{ExtraOp, IsolationLevel}; +use kvproto::kvrpcpb::{ExtraOp, IsolationLevel, WriteConflictReason}; use txn_types::{Key, Lock, LockType, OldValue, TimeStamp, Value, WriteRef, WriteType}; use super::ScannerConfig; @@ -350,6 +350,7 @@ impl> ForwardScanner { conflict_commit_ts: key_commit_ts, key: current_key.into(), primary: vec![], + reason: WriteConflictReason::RcCheckTs, } .into()); } diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index c02d8ef97c8..b456b359b8f 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -205,6 +205,8 @@ pub(crate) fn make_txn_error( key: &Key, start_ts: TimeStamp, ) -> crate::storage::mvcc::ErrorInner { + use kvproto::kvrpcpb::WriteConflictReason; + use crate::storage::mvcc::ErrorInner; if let Some(s) = s { match s.to_ascii_lowercase().as_str() { @@ -244,6 +246,7 @@ pub(crate) fn make_txn_error( conflict_commit_ts: TimeStamp::zero(), key: key.to_raw().unwrap(), primary: vec![], + reason: WriteConflictReason::Optimistic, }, "deadlock" => ErrorInner::Deadlock { start_ts, diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 7e30dcdd37c..699002f0126 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -1,5 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. +use kvproto::kvrpcpb::WriteConflictReason; // #[PerformanceCriticalPath] use txn_types::{Key, LockType, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteType}; @@ -173,6 +174,7 @@ pub fn acquire_pessimistic_lock( conflict_commit_ts: commit_ts, key: key.into_raw()?, primary: primary.to_vec(), + reason: WriteConflictReason::PessimisticRetry, } .into()); } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 7b562af8b43..85c1a6f8ccc 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -5,8 +5,9 @@ use std::cmp; use fail::fail_point; use kvproto::kvrpcpb::{ - Assertion, AssertionLevel, + self, Assertion, AssertionLevel, PrewriteRequestPessimisticAction::{self, *}, + WriteConflictReason, }; use txn_types::{ is_short_value, Key, Mutation, MutationType, OldValue, TimeStamp, Value, Write, WriteType, @@ -361,7 +362,11 @@ impl<'a> PrewriteMutation<'a> { TransactionKind::Optimistic(_) => { if commit_ts > self.txn_props.start_ts { MVCC_CONFLICT_COUNTER.prewrite_write_conflict.inc(); - self.write_conflict_error(&write, commit_ts)?; + self.write_conflict_error( + &write, + commit_ts, + WriteConflictReason::Optimistic, + )?; } } // Note: PessimisticLockNotFound can happen on a non-pessimistically locked key, @@ -370,7 +375,11 @@ impl<'a> PrewriteMutation<'a> { if let DoConstraintCheck = self.pessimistic_action { if commit_ts > self.txn_props.start_ts { MVCC_CONFLICT_COUNTER.prewrite_write_conflict.inc(); - self.write_conflict_error(&write, commit_ts)?; + self.write_conflict_error( + &write, + commit_ts, + WriteConflictReason::LazyUniquenessCheck, + )?; } } else if commit_ts > for_update_ts { warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; @@ -395,7 +404,11 @@ impl<'a> PrewriteMutation<'a> { { MVCC_CONFLICT_COUNTER.rolled_back.inc(); // TODO: Maybe we need to add a new error for the rolled back case. - self.write_conflict_error(&write, commit_ts)?; + self.write_conflict_error( + &write, + commit_ts, + WriteConflictReason::SelfRolledBack, + )?; } // Should check it when no lock exists, otherwise it can report error when there // is a lock belonging to a committed transaction which deletes the key. @@ -465,13 +478,19 @@ impl<'a> PrewriteMutation<'a> { final_min_commit_ts } - fn write_conflict_error(&self, write: &Write, commit_ts: TimeStamp) -> Result<()> { + fn write_conflict_error( + &self, + write: &Write, + commit_ts: TimeStamp, + reason: kvrpcpb::WriteConflictReason, + ) -> Result<()> { Err(ErrorInner::WriteConflict { start_ts: self.txn_props.start_ts, conflict_start_ts: write.start_ts, conflict_commit_ts: commit_ts, key: self.key.to_raw()?, primary: self.txn_props.primary.to_vec(), + reason, } .into()) } @@ -2075,7 +2094,13 @@ pub mod tests { must_pessimistic_prewrite_insert(&engine, key2, value, key, 3, 3, SkipPessimisticCheck); let err = must_pessimistic_prewrite_insert_err(&engine, key, value, key, 3, 3, DoConstraintCheck); - assert!(matches!(err, Error(box ErrorInner::WriteConflict { .. }))); + assert!(matches!( + err, + Error(box ErrorInner::WriteConflict { + reason: WriteConflictReason::LazyUniquenessCheck, + .. + }) + )); // 2. unique constraint fail must_prewrite_put(&engine, key, value, key, 11); From aebdada5f5473295e13dec25da9eade03774ac37 Mon Sep 17 00:00:00 2001 From: 5kbpers Date: Thu, 8 Sep 2022 17:48:55 +0800 Subject: [PATCH 0198/1149] raft_engine: set recover_state (#13272) ref tikv/tikv#12901 Add `put_recover_from_raft_db` and `recover_from_raft_db` to raft engine for checking if store should be recovered from states in raftdb. Signed-off-by: 5kbpers --- components/engine_panic/src/raft_engine.rs | 12 +++++++++++- components/engine_rocks/src/raft_engine.rs | 12 +++++++++++- components/engine_traits/src/raft_engine.rs | 11 ++++++++++- components/keys/src/lib.rs | 1 + components/raft_log_engine/src/engine.rs | 21 ++++++++++++++++++++- 5 files changed, 53 insertions(+), 4 deletions(-) diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index bb501007a76..75e0e68269d 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -3,7 +3,9 @@ use engine_traits::{Error, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch, Result}; use kvproto::{ metapb::Region, - raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, + raft_serverpb::{ + RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent, StoreRecoverState, + }, }; use raft::eraftpb::Entry; @@ -52,6 +54,10 @@ impl RaftEngineReadOnly for PanicEngine { fn get_apply_state(&self, raft_group_id: u64) -> Result> { panic!() } + + fn get_recover_state(&self) -> Result> { + panic!() + } } impl RaftEngineDebug for PanicEngine { @@ -149,6 +155,10 @@ impl RaftEngine for PanicEngine { { panic!() } + + fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()> { + panic!() + } } impl RaftLogBatch for PanicWriteBatch { diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 9e70f7158a7..b66a56caadf 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -8,7 +8,9 @@ use engine_traits::{ }; use kvproto::{ metapb::Region, - raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, + raft_serverpb::{ + RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent, StoreRecoverState, + }, }; use protobuf::Message; use raft::eraftpb::Entry; @@ -151,6 +153,10 @@ impl RaftEngineReadOnly for RocksEngine { let key = keys::apply_state_key(raft_group_id); self.get_msg_cf(CF_DEFAULT, &key) } + + fn get_recover_state(&self) -> Result> { + self.get_msg_cf(CF_DEFAULT, keys::RECOVER_STATE_KEY) + } } impl RaftEngineDebug for RocksEngine { @@ -364,6 +370,10 @@ impl RaftEngine for RocksEngine { Some(e) => Err(e), } } + + fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()> { + self.put_msg(keys::RECOVER_STATE_KEY, state) + } } impl RaftLogBatch for RocksWriteBatchVec { diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index e64bbe18018..b7a3f50699c 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -2,7 +2,9 @@ use kvproto::{ metapb::Region, - raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, + raft_serverpb::{ + RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent, StoreRecoverState, + }, }; use raft::eraftpb::Entry; @@ -19,6 +21,7 @@ pub trait RaftEngineReadOnly: Sync + Send + 'static { fn get_raft_state(&self, raft_group_id: u64) -> Result>; fn get_region_state(&self, raft_group_id: u64) -> Result>; fn get_apply_state(&self, raft_group_id: u64) -> Result>; + fn get_recover_state(&self) -> Result>; fn get_entry(&self, raft_group_id: u64, index: u64) -> Result>; @@ -144,6 +147,12 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send where F: FnMut(u64) -> std::result::Result<(), E>, E: From; + + /// Indicate whether region states should be recovered from raftdb and + /// replay raft logs. + /// When kvdb's write-ahead-log is disabled, the sequence number of the last + /// boot time is saved. + fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()>; } pub trait RaftLogBatch: Send { diff --git a/components/keys/src/lib.rs b/components/keys/src/lib.rs index f62ffc6f8ab..304e13f1e66 100644 --- a/components/keys/src/lib.rs +++ b/components/keys/src/lib.rs @@ -33,6 +33,7 @@ pub const DATA_MAX_KEY: &[u8] = &[DATA_PREFIX + 1]; // Following keys are all local keys, so the first byte must be 0x01. pub const STORE_IDENT_KEY: &[u8] = &[LOCAL_PREFIX, 0x01]; pub const PREPARE_BOOTSTRAP_KEY: &[u8] = &[LOCAL_PREFIX, 0x02]; +pub const RECOVER_STATE_KEY: &[u8] = &[LOCAL_PREFIX, 0x03]; // We save two types region data in DB, for raft and other meta data. // When the store starts, we should iterate all region meta data to // construct peer, no need to travel large raft data, so we separate them diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 2cd27d89538..07c7bb47bca 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -16,7 +16,9 @@ use engine_traits::{ use file_system::{IoOp, IoRateLimiter, IoType}; use kvproto::{ metapb::Region, - raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, + raft_serverpb::{ + RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent, StoreRecoverState, + }, }; use raft::eraftpb::Entry; use raft_engine::{ @@ -344,6 +346,7 @@ const STORE_IDENT_KEY: &[u8] = &[0x01]; const PREPARE_BOOTSTRAP_REGION_KEY: &[u8] = &[0x02]; const REGION_STATE_KEY: &[u8] = &[0x03]; const APPLY_STATE_KEY: &[u8] = &[0x04]; +const RECOVER_STATE_KEY: &[u8] = &[0x05]; impl RaftLogBatchTrait for RaftLogBatch { fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { @@ -472,6 +475,12 @@ impl RaftEngineReadOnly for RaftLogEngine { .get_message(raft_group_id, APPLY_STATE_KEY) .map_err(transfer_error) } + + fn get_recover_state(&self) -> Result> { + self.0 + .get_message(STORE_STATE_ID, RECOVER_STATE_KEY) + .map_err(transfer_error) + } } impl RaftEngineDebug for RaftLogEngine { @@ -621,6 +630,16 @@ impl RaftEngine for RaftLogEngine { } Ok(()) } + + fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()> { + let mut batch = Self::LogBatch::default(); + batch + .0 + .put_message(STORE_STATE_ID, RECOVER_STATE_KEY.to_vec(), state) + .map_err(transfer_error)?; + self.0.write(&mut batch.0, true).map_err(transfer_error)?; + Ok(()) + } } fn transfer_error(e: RaftEngineError) -> engine_traits::Error { From 079a06914256a0cbc4e6f3f85fd51f38267c06c7 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Thu, 8 Sep 2022 20:28:55 +0800 Subject: [PATCH 0199/1149] raftstore: Implement engine trait can_apply_snapshot (#12924) ref tikv/tikv#12849 Support new engine trait can_apply_snapshot Signed-off-by: CalvinNeo --- components/engine_rocks/src/engine.rs | 4 + components/engine_rocks/src/lib.rs | 2 +- components/engine_traits/src/engine.rs | 8 ++ .../raftstore/src/coprocessor/dispatcher.rs | 13 +- components/raftstore/src/store/config.rs | 17 +++ components/raftstore/src/store/fsm/store.rs | 4 +- .../raftstore/src/store/peer_storage.rs | 18 ++- components/raftstore/src/store/snap.rs | 4 + components/raftstore/src/store/worker/mod.rs | 2 + .../raftstore/src/store/worker/region.rs | 128 ++++++++++++------ components/tikv_util/src/timer.rs | 7 +- tests/integrations/config/mod.rs | 2 + 12 files changed, 143 insertions(+), 66 deletions(-) diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 13ae38b6afb..9e3bba56bad 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -66,6 +66,10 @@ impl RocksEngine { self.shared_block_cache = enable; } + pub fn shared_block_cache(&self) -> bool { + self.shared_block_cache + } + pub fn support_multi_batch_write(&self) -> bool { self.support_multi_batch_write } diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index b0e7012bad7..774fe9cb37b 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -68,7 +68,7 @@ mod perf_context_metrics; mod engine_iterator; pub use crate::engine_iterator::*; -mod options; +pub mod options; pub mod util; mod compact_listener; diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index e3e767f0ed2..5ad9a13b86f 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -64,6 +64,14 @@ pub trait KvEngine: /// This only exists as a temporary hack during refactoring. /// It cannot be used forever. fn bad_downcast(&self) -> &T; + + /// Returns false if KvEngine can't apply snapshot for this region now. + /// Some KvEngines need to do some transforms before apply data from + /// snapshot. These procedures can be batched in background if there are + /// more than one incoming snapshots, thus not blocking applying thread. + fn can_apply_snapshot(&self, _is_timeout: bool, _new_batch: bool, _region_id: u64) -> bool { + true + } } /// TabletAccessor is the trait to access all the tablets with provided accessor diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index d2c4e14567a..ed348950050 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -540,14 +540,11 @@ impl CoprocessorHost { snap_key: &crate::store::SnapKey, snap: Option<&crate::store::Snapshot>, ) { - loop_ob!( - region, - &self.registry.apply_snapshot_observers, - post_apply_snapshot, - peer_id, - snap_key, - snap, - ); + let mut ctx = ObserverContext::new(region); + for observer in &self.registry.apply_snapshot_observers { + let observer = observer.observer.inner(); + observer.post_apply_snapshot(&mut ctx, peer_id, snap_key, snap); + } } pub fn new_split_checker_host<'a>( diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 34805e4c9ca..8052a58dea8 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -137,6 +137,17 @@ pub struct Config { #[online_config(skip)] pub snap_apply_batch_size: ReadableSize, + // used to periodically check whether schedule pending applies in region runner + #[doc(hidden)] + #[online_config(skip)] + pub region_worker_tick_interval: ReadableDuration, + + // used to periodically check whether we should delete a stale peer's range in + // region runner + #[doc(hidden)] + #[online_config(skip)] + pub clean_stale_ranges_tick: usize, + // Interval (ms) to check region whether the data is consistent. pub consistency_check_interval: ReadableDuration, @@ -335,6 +346,12 @@ impl Default for Config { peer_stale_state_check_interval: ReadableDuration::minutes(5), leader_transfer_max_log_lag: 128, snap_apply_batch_size: ReadableSize::mb(10), + region_worker_tick_interval: if cfg!(feature = "test") { + ReadableDuration::millis(200) + } else { + ReadableDuration::millis(1000) + }, + clean_stale_ranges_tick: if cfg!(feature = "test") { 1 } else { 10 }, lock_cf_compact_interval: ReadableDuration::minutes(10), lock_cf_compact_bytes_threshold: ReadableSize::mb(256), // Disable consistency check by default as it will hurt performance. diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 4ee3c5dc091..930062f2e0c 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1496,9 +1496,7 @@ impl RaftBatchSystem { let region_runner = RegionRunner::new( engines.kv.clone(), mgr.clone(), - cfg.value().snap_apply_batch_size.0 as usize, - cfg.value().use_delete_range, - cfg.value().snap_generator_pool_size, + cfg.clone(), workers.coprocessor_host.clone(), self.router(), Some(Arc::clone(&pd_client)), diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index c99b7644321..7f4b6778860 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1142,7 +1142,8 @@ pub mod tests { fsm::apply::compact_raft_log, initial_region, prepare_bootstrap_cluster, worker::{ - FetchedLogs, LogFetchedNotifier, RaftlogFetchRunner, RegionRunner, RegionTask, + make_region_worker_raftstore_cfg, FetchedLogs, LogFetchedNotifier, + RaftlogFetchRunner, RegionRunner, RegionTask, }, }, }; @@ -1553,12 +1554,11 @@ pub mod tests { let (dummy_scheduler, _) = dummy_scheduler(); let mut s = new_storage_from_ents(sched.clone(), dummy_scheduler, &td, &ents); let (router, _) = mpsc::sync_channel(100); + let cfg = make_region_worker_raftstore_cfg(true); let runner = RegionRunner::new( s.engines.kv.clone(), mgr, - 0, - true, - 2, + cfg, CoprocessorHost::::default(), router, Option::>::None, @@ -1701,12 +1701,11 @@ pub mod tests { let store = new_store(1, labels); pd_client.add_store(store); let pd_mock = Arc::new(pd_client); + let cfg = make_region_worker_raftstore_cfg(true); let runner = RegionRunner::new( s.engines.kv.clone(), mgr, - 0, - true, - 2, + cfg, CoprocessorHost::::default(), router, Some(pd_mock), @@ -1767,12 +1766,11 @@ pub mod tests { let (dummy_scheduler, _) = dummy_scheduler(); let s1 = new_storage_from_ents(sched.clone(), dummy_scheduler.clone(), &td1, &ents); let (router, _) = mpsc::sync_channel(100); + let cfg = make_region_worker_raftstore_cfg(true); let runner = RegionRunner::new( s1.engines.kv.clone(), mgr, - 0, - true, - 2, + cfg, CoprocessorHost::::default(), router, Option::>::None, diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 8b063e9e1f0..d25fb5f11b8 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1200,6 +1200,10 @@ impl Snapshot { self.hold_tmp_files = false; Ok(()) } + + pub fn cf_files(&self) -> &[CfFile] { + &self.cf_files + } } // To check whether a procedure about apply snapshot aborts or not. diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 4910f3fdd2b..600a7a1ae6c 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -18,6 +18,8 @@ mod split_check; mod split_config; mod split_controller; +#[cfg(test)] +pub use self::region::tests::make_raftstore_cfg as make_region_worker_raftstore_cfg; pub use self::{ check_leader::{Runner as CheckLeaderRunner, Task as CheckLeaderTask}, cleanup::{Runner as CleanupRunner, Task as CleanupTask}, diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 53b88d6ef16..ad17779e42b 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -23,7 +23,9 @@ use kvproto::raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}; use pd_client::PdClient; use raft::eraftpb::Snapshot as RaftSnapshot; use tikv_util::{ - box_err, box_try, defer, error, info, thd_name, + box_err, box_try, + config::VersionTrack, + defer, error, info, thd_name, time::Instant, warn, worker::{Runnable, RunnableWithTimer}, @@ -44,23 +46,10 @@ use crate::{ }, snap::{plain_file_used, Error, Result, SNAPSHOT_CFS}, transport::CasualRouter, - ApplyOptions, CasualMessage, SnapEntry, SnapKey, SnapManager, + ApplyOptions, CasualMessage, Config, SnapEntry, SnapKey, SnapManager, }, }; -// used to periodically check whether we should delete a stale peer's range in -// region runner -#[cfg(test)] -pub const STALE_PEER_CHECK_TICK: usize = 1; // 1000 milliseconds -#[cfg(not(test))] -pub const STALE_PEER_CHECK_TICK: usize = 10; // 10000 milliseconds - -// used to periodically check whether schedule pending applies in region runner -#[cfg(not(test))] -pub const PENDING_APPLY_CHECK_INTERVAL: u64 = 1_000; // 1000 milliseconds -#[cfg(test)] -pub const PENDING_APPLY_CHECK_INTERVAL: u64 = 200; // 200 milliseconds - const CLEANUP_MAX_REGION_COUNT: usize = 64; const TIFLASH: &str = "tiflash"; @@ -355,6 +344,7 @@ where use_delete_range: bool, clean_stale_tick: usize, clean_stale_check_interval: Duration, + clean_stale_ranges_tick: usize, tiflash_stores: HashMap, // we may delay some apply tasks if level 0 files to write stall threshold, @@ -387,18 +377,19 @@ where pub fn new( engine: EK, mgr: SnapManager, - batch_size: usize, - use_delete_range: bool, - snap_generator_pool_size: usize, + cfg: Arc>, coprocessor_host: CoprocessorHost, router: R, pd_client: Option>, ) -> Runner { Runner { - batch_size, - use_delete_range, + batch_size: cfg.value().snap_apply_batch_size.0 as usize, + use_delete_range: cfg.value().use_delete_range, clean_stale_tick: 0, - clean_stale_check_interval: Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL), + clean_stale_check_interval: Duration::from_millis( + cfg.value().region_worker_tick_interval.as_millis(), + ), + clean_stale_ranges_tick: cfg.value().clean_stale_ranges_tick, tiflash_stores: HashMap::default(), pending_applies: VecDeque::new(), pending_delete_ranges: PendingDeleteRanges::default(), @@ -408,7 +399,7 @@ where router, pd_client, pool: Builder::new(thd_name!("snap-generator")) - .max_thread_count(snap_generator_pool_size) + .max_thread_count(cfg.value().snap_generator_pool_size) .build_future_pool(), } } @@ -745,8 +736,9 @@ where } /// Tries to apply pending tasks if there is some. - fn handle_pending_applies(&mut self) { + fn handle_pending_applies(&mut self, is_timeout: bool) { fail_point!("apply_pending_snapshot", |_| {}); + let mut new_batch = true; while !self.pending_applies.is_empty() { // should not handle too many applies than the number of files that can be // ingested. check level 0 every time because we can not make sure @@ -754,13 +746,24 @@ where if self.ingest_maybe_stall() { break; } - if let Some(Task::Apply { - region_id, - status, - peer_id, - }) = self.pending_applies.pop_front() - { - self.handle_apply(region_id, peer_id, status); + if let Some(Task::Apply { region_id, .. }) = self.pending_applies.front() { + fail_point!("handle_new_pending_applies", |_| {}); + if !self + .engine + .can_apply_snapshot(is_timeout, new_batch, *region_id) + { + // KvEngine can't apply snapshot for other reasons. + break; + } + if let Some(Task::Apply { + region_id, + status, + peer_id, + }) = self.pending_applies.pop_front() + { + new_batch = false; + self.handle_apply(region_id, peer_id, status); + } } } } @@ -839,7 +842,7 @@ where } // to makes sure applying snapshots in order. self.pending_applies.push_back(task); - self.handle_pending_applies(); + self.handle_pending_applies(false); if !self.pending_applies.is_empty() { // delay the apply and retry later SNAP_COUNTER.apply.delay.inc() @@ -871,9 +874,9 @@ where T: PdClient + 'static, { fn on_timeout(&mut self) { - self.handle_pending_applies(); + self.handle_pending_applies(true); self.clean_stale_tick += 1; - if self.clean_stale_tick >= STALE_PEER_CHECK_TICK { + if self.clean_stale_tick >= self.clean_stale_ranges_tick { self.clean_stale_ranges(); self.clean_stale_tick = 0; } @@ -885,7 +888,7 @@ where } #[cfg(test)] -mod tests { +pub(crate) mod tests { use std::{ io, sync::{atomic::AtomicUsize, mpsc, Arc}, @@ -906,7 +909,10 @@ mod tests { use pd_client::RpcClient; use protobuf::Message; use tempfile::Builder; - use tikv_util::worker::{LazyWorker, Worker}; + use tikv_util::{ + config::{ReadableDuration, ReadableSize}, + worker::{LazyWorker, Worker}, + }; use super::*; use crate::{ @@ -920,6 +926,20 @@ mod tests { }, }; + const PENDING_APPLY_CHECK_INTERVAL: u64 = 200; + const STALE_PEER_CHECK_TICK: usize = 1; + + pub fn make_raftstore_cfg(use_delete_range: bool) -> Arc> { + let mut store_cfg = Config::default(); + store_cfg.snap_apply_batch_size = ReadableSize(0); + store_cfg.region_worker_tick_interval = + ReadableDuration::millis(PENDING_APPLY_CHECK_INTERVAL); + store_cfg.clean_stale_ranges_tick = STALE_PEER_CHECK_TICK; + store_cfg.use_delete_range = use_delete_range; + store_cfg.snap_generator_pool_size = 2; + Arc::new(VersionTrack::new(store_cfg)) + } + fn insert_range( pending_delete_ranges: &mut PendingDeleteRanges, id: u64, @@ -1015,12 +1035,11 @@ mod tests { let mut worker: LazyWorker> = bg_worker.lazy_build("region-worker"); let sched = worker.scheduler(); let (router, _) = mpsc::sync_channel(11); + let cfg = make_raftstore_cfg(false); let mut runner = RegionRunner::new( engine.kv.clone(), mgr, - 0, - false, - 2, + cfg, CoprocessorHost::::default(), router, Option::>::None, @@ -1123,12 +1142,11 @@ mod tests { let mut worker = bg_worker.lazy_build("snap-manager"); let sched = worker.scheduler(); let (router, receiver) = mpsc::sync_channel(1); + let cfg = make_raftstore_cfg(true); let runner = RegionRunner::new( engine.kv.clone(), mgr, - 0, - true, - 2, + cfg, host, router, Option::>::None, @@ -1237,6 +1255,22 @@ mod tests { } }; + #[allow(dead_code)] + let must_not_finish = |ids: &[u64]| { + for id in ids { + let region_key = keys::region_state_key(*id); + assert_eq!( + engine + .kv + .get_msg_cf::(CF_RAFT, ®ion_key) + .unwrap() + .unwrap() + .get_state(), + PeerState::Applying + ) + } + }; + // snapshot will not ingest cause already write stall gen_and_apply_snap(1); assert_eq!( @@ -1371,6 +1405,18 @@ mod tests { ); thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); assert!(!check_region_exist(6)); + + #[cfg(feature = "failpoints")] + { + engine.kv.compact_files_in_range(None, None, None).unwrap(); + fail::cfg("handle_new_pending_applies", "return").unwrap(); + gen_and_apply_snap(7); + thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); + must_not_finish(&[7]); + fail::remove("handle_new_pending_applies"); + thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); + wait_apply_finish(&[7]); + } } #[derive(Clone, Default)] diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index f47cdaf21e9..30445780ac8 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -93,14 +93,15 @@ impl Ord for TimeoutTask { } lazy_static! { - pub static ref GLOBAL_TIMER_HANDLE: Handle = start_global_timer(); + pub static ref GLOBAL_TIMER_HANDLE: Handle = start_global_timer("timer"); } -fn start_global_timer() -> Handle { +/// Create a global timer with specific thread name. +pub fn start_global_timer(name: &str) -> Handle { let (tx, rx) = mpsc::channel(); let props = crate::thread_group::current_properties(); Builder::new() - .name(thd_name!("timer")) + .name(thd_name!(name)) .spawn_wrapper(move || { crate::thread_group::set_properties(props); tikv_alloc::add_thread_memory_accessor(); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 247b06834b0..1e87b5f7aa1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -204,6 +204,8 @@ fn test_serde_custom_tikv_config() { peer_stale_state_check_interval: ReadableDuration::hours(2), leader_transfer_max_log_lag: 123, snap_apply_batch_size: ReadableSize::mb(12), + region_worker_tick_interval: ReadableDuration::millis(1000), + clean_stale_ranges_tick: 10, lock_cf_compact_interval: ReadableDuration::minutes(12), lock_cf_compact_bytes_threshold: ReadableSize::mb(123), consistency_check_interval: ReadableDuration::secs(12), From 3e863071dcd3ff6a56e772f5b97493b48998f432 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Fri, 9 Sep 2022 11:18:56 +0800 Subject: [PATCH 0200/1149] *: move ioload to tikv_util (#13421) ref tikv/tikv#13433 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- components/tikv_util/src/quota_limiter.rs | 2 +- .../diagnostics => components/tikv_util/src/sys}/ioload.rs | 0 components/tikv_util/src/sys/mod.rs | 1 + src/server/service/diagnostics/mod.rs | 6 ++++-- src/server/service/diagnostics/sys.rs | 4 ++-- 5 files changed, 8 insertions(+), 5 deletions(-) rename {src/server/service/diagnostics => components/tikv_util/src/sys}/ioload.rs (100%) diff --git a/components/tikv_util/src/quota_limiter.rs b/components/tikv_util/src/quota_limiter.rs index 818ec0ea60c..ae2e52d40d9 100644 --- a/components/tikv_util/src/quota_limiter.rs +++ b/components/tikv_util/src/quota_limiter.rs @@ -26,7 +26,7 @@ use super::{ // It's better to use a universal approach. const CPU_LIMITER_REFILL_DURATION: Duration = Duration::from_millis(100); -// Limter can be issued to cpu, write and read bandwidth +// Limiter can be issued to cpu, write and read bandwidth #[derive(Debug)] pub struct LimiterItems { cputime_limiter: Limiter, diff --git a/src/server/service/diagnostics/ioload.rs b/components/tikv_util/src/sys/ioload.rs similarity index 100% rename from src/server/service/diagnostics/ioload.rs rename to components/tikv_util/src/sys/ioload.rs diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index 8dd7aefa77c..d17c821e995 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -5,6 +5,7 @@ mod cgroup; pub mod cpu_time; pub mod disk; pub mod inspector; +pub mod ioload; pub mod thread; // re-export some traits for ease of use diff --git a/src/server/service/diagnostics/mod.rs b/src/server/service/diagnostics/mod.rs index 438f618ff19..60df07aa167 100644 --- a/src/server/service/diagnostics/mod.rs +++ b/src/server/service/diagnostics/mod.rs @@ -19,12 +19,14 @@ use kvproto::diagnosticspb::{ Diagnostics, SearchLogRequest, SearchLogRequestTarget, SearchLogResponse, ServerInfoRequest, ServerInfoResponse, ServerInfoType, }; -use tikv_util::{sys::SystemExt, timer::GLOBAL_TIMER_HANDLE}; +use tikv_util::{ + sys::{ioload, SystemExt}, + timer::GLOBAL_TIMER_HANDLE, +}; use tokio::runtime::Handle; use crate::server::Error; -mod ioload; mod log; mod sys; diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index f39da646ad1..e62028e66e6 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -5,11 +5,11 @@ use std::{collections::HashMap, string::ToString}; use kvproto::diagnosticspb::{ServerInfoItem, ServerInfoPair}; use tikv_util::{ config::KIB, - sys::{cpu_time::LinuxStyleCpuTime, SysQuota, *}, + sys::{cpu_time::LinuxStyleCpuTime, ioload, SysQuota, *}, }; use walkdir::WalkDir; -use crate::server::service::diagnostics::{ioload, SYS_INFO}; +use crate::server::service::diagnostics::SYS_INFO; type CpuTimeSnapshot = Option; From 3f5acade42d6fb61ea55577fa4bffb54e16c5dc6 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 9 Sep 2022 13:58:56 +0800 Subject: [PATCH 0201/1149] storage: skip Rollback when checking newer version for non-pessimisitc keys (#13426) close tikv/tikv#13425, ref pingcap/tidb#35525 Don't treat newer Rollback records as write conflicts for non-pessimistic keys in pessimistic transactions. They can cause false positive errors because they can be written even if the pessimistic lock of the corresponding row key exists. Rollback records are only used to prevent retried prewrite from succeeding. Even if the Rollback record of the current transaction is collapsed by a newer record, it is safe to prewrite this non-pessimistic key because either the primary key is rolled back or it's protected because it's written by CheckSecondaryLocks. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/txn/actions/prewrite.rs | 151 ++++++++++++++++++---------- 1 file changed, 99 insertions(+), 52 deletions(-) diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 85c1a6f8ccc..5883fc4b983 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -353,71 +353,78 @@ impl<'a> PrewriteMutation<'a> { &self, reader: &mut SnapshotReader, ) -> Result> { - match reader.seek_write(&self.key, TimeStamp::max())? { - Some((commit_ts, write)) => { - // Abort on writes after our start/for_update timestamp ... - // If exists a commit version whose commit timestamp is larger than current - // start/for_update timestamp, we should abort current prewrite. - match self.txn_props.kind { - TransactionKind::Optimistic(_) => { + let mut seek_ts = TimeStamp::max(); + while let Some((commit_ts, write)) = reader.seek_write(&self.key, seek_ts)? { + // If there's a write record whose commit_ts equals to our start ts, the current + // transaction is ok to continue, unless the record means that the current + // transaction has been rolled back. + if commit_ts == self.txn_props.start_ts + && (write.write_type == WriteType::Rollback || write.has_overlapped_rollback) + { + MVCC_CONFLICT_COUNTER.rolled_back.inc(); + // TODO: Maybe we need to add a new error for the rolled back case. + self.write_conflict_error(&write, commit_ts, WriteConflictReason::SelfRolledBack)?; + } + match self.txn_props.kind { + TransactionKind::Optimistic(_) => { + if commit_ts > self.txn_props.start_ts { + MVCC_CONFLICT_COUNTER.prewrite_write_conflict.inc(); + self.write_conflict_error( + &write, + commit_ts, + WriteConflictReason::Optimistic, + )?; + } + } + // Note: PessimisticLockNotFound can happen on a non-pessimistically locked key, + // if it is a retrying prewrite request. + TransactionKind::Pessimistic(for_update_ts) => { + if let DoConstraintCheck = self.pessimistic_action { + // Do the same as optimistic transactions if constraint checks are needed. if commit_ts > self.txn_props.start_ts { MVCC_CONFLICT_COUNTER.prewrite_write_conflict.inc(); self.write_conflict_error( &write, commit_ts, - WriteConflictReason::Optimistic, + WriteConflictReason::LazyUniquenessCheck, )?; } } - // Note: PessimisticLockNotFound can happen on a non-pessimistically locked key, - // if it is a retrying prewrite request. - TransactionKind::Pessimistic(for_update_ts) => { - if let DoConstraintCheck = self.pessimistic_action { - if commit_ts > self.txn_props.start_ts { - MVCC_CONFLICT_COUNTER.prewrite_write_conflict.inc(); - self.write_conflict_error( - &write, - commit_ts, - WriteConflictReason::LazyUniquenessCheck, - )?; - } - } else if commit_ts > for_update_ts { - warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; - "key" => %self.key, - "start_ts" => self.txn_props.start_ts, - "for_update_ts" => for_update_ts, - "conflicting start_ts" => write.start_ts, - "conflicting commit_ts" => commit_ts); - return Err(ErrorInner::PessimisticLockNotFound { - start_ts: self.txn_props.start_ts, - key: self.key.clone().into_raw()?, - } - .into()); + if commit_ts > for_update_ts { + // Don't treat newer Rollback records as write conflicts. They can cause + // false positive errors because they can be written even if the pessimistic + // lock of the corresponding row key exists. + // Rollback records are only used to prevent retried prewrite from + // succeeding. Even if the Rollback record of the current transaction is + // collapsed by a newer record, it is safe to prewrite this non-pessimistic + // key because either the primary key is rolled back or it's protected + // because it's written by CheckSecondaryLocks. + if write.write_type == WriteType::Rollback { + seek_ts = commit_ts.prev(); + continue; + } + + warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; + "key" => %self.key, + "start_ts" => self.txn_props.start_ts, + "for_update_ts" => for_update_ts, + "conflicting start_ts" => write.start_ts, + "conflicting commit_ts" => commit_ts); + return Err(ErrorInner::PessimisticLockNotFound { + start_ts: self.txn_props.start_ts, + key: self.key.clone().into_raw()?, } + .into()); } } - // If there's a write record whose commit_ts equals to our start ts, the current - // transaction is ok to continue, unless the record means that the current - // transaction has been rolled back. - if commit_ts == self.txn_props.start_ts - && (write.write_type == WriteType::Rollback || write.has_overlapped_rollback) - { - MVCC_CONFLICT_COUNTER.rolled_back.inc(); - // TODO: Maybe we need to add a new error for the rolled back case. - self.write_conflict_error( - &write, - commit_ts, - WriteConflictReason::SelfRolledBack, - )?; - } - // Should check it when no lock exists, otherwise it can report error when there - // is a lock belonging to a committed transaction which deletes the key. - check_data_constraint(reader, self.should_not_exist, &write, commit_ts, &self.key)?; - - Ok(Some((write, commit_ts))) } - None => Ok(None), + // Should check it when no lock exists, otherwise it can report error when there + // is a lock belonging to a committed transaction which deletes the key. + check_data_constraint(reader, self.should_not_exist, &write, commit_ts, &self.key)?; + + return Ok(Some((write, commit_ts))); } + Ok(None) } fn write_lock(self, lock_status: LockStatus, txn: &mut MvccTxn) -> Result { @@ -1554,6 +1561,46 @@ pub mod tests { kvproto::kvrpcpb::AssertionLevel::Off, ); must_locked(&engine, b"k2", 13); + must_rollback(&engine, b"k2", 13, false); + + // Write a Rollback at 50 first. A retried prewrite at the same ts should + // report WriteConflict. + must_rollback(&engine, b"k2", 50, false); + let err = must_retry_pessimistic_prewrite_put_err( + &engine, + b"k2", + b"v2", + b"k1", + &None, + 50, + 50, + SkipPessimisticCheck, + 0, + ); + assert!( + matches!(err, Error(box ErrorInner::WriteConflict { .. })), + "{:?}", + err + ); + // But prewriting at 48 can succeed because a newer rollback is allowed. + must_prewrite_put_impl( + &engine, + b"k2", + b"v2", + b"k1", + &None, + 48.into(), + SkipPessimisticCheck, + 100, + 48.into(), + 1, + 49.into(), + TimeStamp::default(), + true, + kvproto::kvrpcpb::Assertion::None, + kvproto::kvrpcpb::AssertionLevel::Off, + ); + must_locked(&engine, b"k2", 48); } #[test] From 4cd28ba026b846792769b2e75857adba19cc564c Mon Sep 17 00:00:00 2001 From: hehechen Date: Fri, 9 Sep 2022 14:10:56 +0800 Subject: [PATCH 0202/1149] hook after update safe ts (#13432) close tikv/tikv#13435 Add hook to observe the update of safe ts to calculate the TiFlash synchronization progress, including leader safe ts from check_leader RPC, and the update of self safe ts when receive check_leader RPC or apply. Signed-off-by: hehechen Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/dispatcher.rs | 36 +++++++++++- components/raftstore/src/coprocessor/mod.rs | 7 ++- components/raftstore/src/store/fsm/peer.rs | 9 +-- components/raftstore/src/store/peer.rs | 6 +- components/raftstore/src/store/util.rs | 55 +++++++++++++++---- .../src/store/worker/check_leader.rs | 33 ++++++++--- components/server/src/server.rs | 5 +- components/test_raftstore/src/server.rs | 3 +- 8 files changed, 127 insertions(+), 27 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index ed348950050..3cddc21e8cb 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -133,6 +133,11 @@ macro_rules! impl_box_observer_g { impl_box_observer!(BoxAdminObserver, AdminObserver, WrappedAdminObserver); impl_box_observer!(BoxQueryObserver, QueryObserver, WrappedQueryObserver); +impl_box_observer!( + BoxUpdateSafeTsObserver, + UpdateSafeTsObserver, + WrappedUpdateSafeTsObserver +); impl_box_observer!( BoxApplySnapshotObserver, ApplySnapshotObserver, @@ -178,6 +183,7 @@ where cmd_observers: Vec>>, read_index_observers: Vec>, pd_task_observers: Vec>, + update_safe_ts_observers: Vec>, // TODO: add endpoint } @@ -194,6 +200,7 @@ impl Default for Registry { cmd_observers: Default::default(), read_index_observers: Default::default(), pd_task_observers: Default::default(), + update_safe_ts_observers: Default::default(), } } } @@ -259,6 +266,9 @@ impl Registry { pub fn register_read_index_observer(&mut self, priority: u32, rio: BoxReadIndexObserver) { push!(priority, rio, self.read_index_observers); } + pub fn register_update_safe_ts_observer(&mut self, priority: u32, qo: BoxUpdateSafeTsObserver) { + push!(priority, qo, self.update_safe_ts_observers); + } } /// A macro that loops over all observers and returns early when error is found @@ -662,6 +672,16 @@ impl CoprocessorHost { } } + pub fn on_update_safe_ts(&self, region_id: u64, self_safe_ts: u64, leader_safe_ts: u64) { + if self.registry.query_observers.is_empty() { + return; + } + for observer in &self.registry.update_safe_ts_observers { + let observer = observer.observer.inner(); + observer.on_update_safe_ts(region_id, self_safe_ts, leader_safe_ts) + } + } + pub fn shutdown(&self) { for entry in &self.registry.admin_observers { entry.observer.inner().stop(); @@ -690,7 +710,7 @@ mod tests { use tikv_util::box_err; use crate::{ - coprocessor::*, + coprocessor::{dispatcher::BoxUpdateSafeTsObserver, *}, store::{SnapKey, Snapshot}, }; @@ -722,6 +742,7 @@ mod tests { PreApplySnapshot = 20, PostApplySnapshot = 21, ShouldPreApplySnapshot = 22, + OnUpdateSafeTs = 23, } impl Coprocessor for TestCoprocessor {} @@ -938,6 +959,13 @@ mod tests { fn on_applied_current_term(&self, _: StateRole, _: &Region) {} } + impl UpdateSafeTsObserver for TestCoprocessor { + fn on_update_safe_ts(&self, _: u64, _: u64, _: u64) { + self.called + .fetch_add(ObserverIndex::OnUpdateSafeTs as usize, Ordering::SeqCst); + } + } + macro_rules! assert_all { ($target:expr, $expect:expr) => {{ for (c, e) in ($target).iter().zip($expect) { @@ -972,6 +1000,8 @@ mod tests { .register_region_change_observer(1, BoxRegionChangeObserver::new(ob.clone())); host.registry .register_cmd_observer(1, BoxCmdObserver::new(ob.clone())); + host.registry + .register_update_safe_ts_observer(1, BoxUpdateSafeTsObserver::new(ob.clone())); let mut index: usize = 0; let region = Region::default(); @@ -1078,6 +1108,10 @@ mod tests { host.should_pre_apply_snapshot(); index += ObserverIndex::ShouldPreApplySnapshot as usize; assert_all!([&ob.called], &[index]); + + host.on_update_safe_ts(1, 1, 1); + index += ObserverIndex::OnUpdateSafeTs as usize; + assert_all!([&ob.called], &[index]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index cc6bfb91b06..8a309dc4734 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -33,7 +33,7 @@ pub use self::{ dispatcher::{ BoxAdminObserver, BoxApplySnapshotObserver, BoxCmdObserver, BoxConsistencyCheckObserver, BoxPdTaskObserver, BoxQueryObserver, BoxRegionChangeObserver, BoxRoleObserver, - BoxSplitCheckObserver, CoprocessorHost, Registry, + BoxSplitCheckObserver, BoxUpdateSafeTsObserver, CoprocessorHost, Registry, }, error::{Error, Result}, region_info_accessor::{ @@ -531,6 +531,11 @@ pub trait ReadIndexObserver: Coprocessor { fn on_step(&self, _msg: &mut eraftpb::Message, _role: StateRole) {} } +pub trait UpdateSafeTsObserver: Coprocessor { + /// Hook after update self safe_ts and received leader safe_ts. + fn on_update_safe_ts(&self, _: u64, _: u64, _: u64) {} +} + #[cfg(test)] mod tests { use super::*; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index eb79965d617..1f709c6dce9 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4304,10 +4304,11 @@ where // After the region commit merged, the region's key range is extended and the // region's `safe_ts` should reset to `min(source_safe_ts, target_safe_ts)` let source_read_progress = meta.region_read_progress.remove(&source.get_id()).unwrap(); - self.fsm - .peer - .read_progress - .merge_safe_ts(source_read_progress.safe_ts(), merge_index); + self.fsm.peer.read_progress.merge_safe_ts( + source_read_progress.safe_ts(), + merge_index, + &self.ctx.coprocessor_host, + ); // If a follower merges into a leader, a more recent read may happen // on the leader of the follower. So max ts should be updated after diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index edf88a561ba..0d7932a6169 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2422,7 +2422,8 @@ where // Resume `read_progress` self.read_progress.resume(); // Update apply index to `last_applying_idx` - self.read_progress.update_applied(self.last_applying_idx); + self.read_progress + .update_applied(self.last_applying_idx, &ctx.coprocessor_host); } CheckApplyingSnapStatus::Idle => { // FIXME: It's possible that the snapshot applying task is canceled. @@ -3318,7 +3319,8 @@ where } self.pending_reads.gc(); - self.read_progress.update_applied(applied_index); + self.read_progress + .update_applied(applied_index, &ctx.coprocessor_host); // Only leaders need to update applied_term. if progress_to_be_updated && self.is_leader() { diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index db62674e6a5..922ba70a2c8 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -14,6 +14,7 @@ use std::{ u64, }; +use engine_traits::KvEngine; use kvproto::{ kvrpcpb::{self, KeyRange, LeaderInfo}, metapb::{self, Peer, PeerRole, Region, RegionEpoch}, @@ -28,9 +29,12 @@ use raft::{ use raft_proto::ConfChangeI; use tikv_util::{box_err, debug, info, time::monotonic_raw_now, Either}; use time::{Duration, Timespec}; +use txn_types::TimeStamp; use super::peer_storage; -use crate::{Error, Result}; +use crate::{coprocessor::CoprocessorHost, Error, Result}; + +const INVALID_TIMESTAMP: u64 = u64::MAX; pub fn find_peer(region: &metapb::Region, store_id: u64) -> Option<&metapb::Peer> { region @@ -929,13 +933,17 @@ impl RegionReadProgressRegistry { // Update `safe_ts` with the provided `LeaderInfo` and return the regions that // have the same `LeaderInfo` - pub fn handle_check_leaders(&self, leaders: Vec) -> Vec { + pub fn handle_check_leaders( + &self, + leaders: Vec, + coprocessor: &CoprocessorHost, + ) -> Vec { let mut regions = Vec::with_capacity(leaders.len()); let registry = self.registry.lock().unwrap(); for leader_info in leaders { let region_id = leader_info.get_region_id(); if let Some(rp) = registry.get(®ion_id) { - if rp.consume_leader_info(leader_info) { + if rp.consume_leader_info(leader_info, coprocessor) { regions.push(region_id); } } @@ -1012,11 +1020,17 @@ impl RegionReadProgress { } } - pub fn update_applied(&self, applied: u64) { + pub fn update_applied(&self, applied: u64, coprocessor: &CoprocessorHost) { let mut core = self.core.lock().unwrap(); if let Some(ts) = core.update_applied(applied) { if !core.pause { self.safe_ts.store(ts, AtomicOrdering::Release); + // No need to update leader safe ts here. + coprocessor.on_update_safe_ts( + core.region_id, + TimeStamp::new(ts).physical(), + INVALID_TIMESTAMP, + ) } } } @@ -1036,18 +1050,34 @@ impl RegionReadProgress { } } - pub fn merge_safe_ts(&self, source_safe_ts: u64, merge_index: u64) { + pub fn merge_safe_ts( + &self, + source_safe_ts: u64, + merge_index: u64, + coprocessor: &CoprocessorHost, + ) { let mut core = self.core.lock().unwrap(); if let Some(ts) = core.merge_safe_ts(source_safe_ts, merge_index) { if !core.pause { self.safe_ts.store(ts, AtomicOrdering::Release); + // After region merge, self safe ts may decrease, so leader safe ts should be + // reset. + coprocessor.on_update_safe_ts( + core.region_id, + TimeStamp::new(ts).physical(), + TimeStamp::new(ts).physical(), + ) } } } // Consume the provided `LeaderInfo` to update `safe_ts` and return whether the // provided `LeaderInfo` is same as ours - pub fn consume_leader_info(&self, mut leader_info: LeaderInfo) -> bool { + pub fn consume_leader_info( + &self, + mut leader_info: LeaderInfo, + coprocessor: &CoprocessorHost, + ) -> bool { let mut core = self.core.lock().unwrap(); if leader_info.has_read_state() { // It is okay to update `safe_ts` without checking the `LeaderInfo`, the @@ -1061,6 +1091,9 @@ impl RegionReadProgress { } } } + let self_phy_ts = TimeStamp::new(self.safe_ts()).physical(); + let leader_phy_ts = TimeStamp::new(rs.get_safe_ts()).physical(); + coprocessor.on_update_safe_ts(leader_info.region_id, self_phy_ts, leader_phy_ts) } // whether the provided `LeaderInfo` is same as ours core.leader_info.leader_term == leader_info.term @@ -1357,6 +1390,7 @@ impl LatencyInspector { mod tests { use std::thread; + use engine_test::kv::KvTestEngine; use kvproto::{ metapb::{self, RegionEpoch}, raft_cmdpb::AdminRequest, @@ -1978,7 +2012,8 @@ mod tests { assert_eq!(rrp.safe_ts(), 10); assert_eq!(pending_items_num(&rrp), 10); - rrp.update_applied(20); + let coprocessor_host = CoprocessorHost::::default(); + rrp.update_applied(20, &coprocessor_host); assert_eq!(rrp.safe_ts(), 20); assert_eq!(pending_items_num(&rrp), 0); @@ -1990,7 +2025,7 @@ mod tests { assert!(pending_items_num(&rrp) <= cap); // `applied_index` large than all pending items will clear all pending items - rrp.update_applied(200); + rrp.update_applied(200, &coprocessor_host); assert_eq!(rrp.safe_ts(), 199); assert_eq!(pending_items_num(&rrp), 0); @@ -2004,9 +2039,9 @@ mod tests { rrp.update_safe_ts(301, 600); assert_eq!(pending_items_num(&rrp), 2); // `safe_ts` will update to 500 instead of 300 - rrp.update_applied(300); + rrp.update_applied(300, &coprocessor_host); assert_eq!(rrp.safe_ts(), 500); - rrp.update_applied(301); + rrp.update_applied(301, &coprocessor_host); assert_eq!(rrp.safe_ts(), 600); assert_eq!(pending_items_num(&rrp), 0); diff --git a/components/raftstore/src/store/worker/check_leader.rs b/components/raftstore/src/store/worker/check_leader.rs index 8821bb6118d..696caab7d69 100644 --- a/components/raftstore/src/store/worker/check_leader.rs +++ b/components/raftstore/src/store/worker/check_leader.rs @@ -6,16 +6,24 @@ use std::{ sync::{Arc, Mutex}, }; +use engine_traits::KvEngine; use fail::fail_point; use keys::{data_end_key, data_key, enc_start_key}; use kvproto::kvrpcpb::{KeyRange, LeaderInfo}; use tikv_util::worker::Runnable; -use crate::store::{fsm::store::StoreMeta, util::RegionReadProgressRegistry}; +use crate::{ + coprocessor::CoprocessorHost, + store::{fsm::store::StoreMeta, util::RegionReadProgressRegistry}, +}; -pub struct Runner { +pub struct Runner +where + E: KvEngine, +{ store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, + coprocessor: CoprocessorHost, } pub enum Task { @@ -47,12 +55,16 @@ impl fmt::Display for Task { } } -impl Runner { - pub fn new(store_meta: Arc>) -> Runner { +impl Runner +where + E: KvEngine, +{ + pub fn new(store_meta: Arc>, coprocessor: CoprocessorHost) -> Runner { let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); Runner { region_read_progress, store_meta, + coprocessor, } } @@ -96,7 +108,10 @@ impl Runner { } } -impl Runnable for Runner { +impl Runnable for Runner +where + E: KvEngine, +{ type Task = Task; fn run(&mut self, task: Task) { match task { @@ -111,7 +126,9 @@ impl Runnable for Runner { self.store_meta.lock().unwrap().store_id == Some(3), |_| {} ); - let regions = self.region_read_progress.handle_check_leaders(leaders); + let regions = self + .region_read_progress + .handle_check_leaders(leaders, &self.coprocessor); cb(regions); } Task::GetStoreTs { key_range, cb } => { @@ -124,6 +141,7 @@ impl Runnable for Runner { #[cfg(test)] mod tests { + use engine_test::kv::KvTestEngine; use keys::enc_end_key; use kvproto::metapb::Region; @@ -155,7 +173,8 @@ mod tests { } let meta = Arc::new(Mutex::new(StoreMeta::new(0))); - let runner = Runner::new(meta.clone()); + let coprocessor_host = CoprocessorHost::::default(); + let runner = Runner::new(meta.clone(), coprocessor_host); assert_eq!(0, runner.get_range_safe_ts(key_range(b"", b""))); add_region(&meta, 1, key_range(b"", b"k1"), 100); assert_eq!(100, runner.get_range_safe_ts(key_range(b"", b""))); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ca95ddaf310..ba4c515557e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -837,7 +837,10 @@ where causal_ob.register_to(self.coprocessor_host.as_mut().unwrap()); }; - let check_leader_runner = CheckLeaderRunner::new(engines.store_meta.clone()); + let check_leader_runner = CheckLeaderRunner::new( + engines.store_meta.clone(), + self.coprocessor_host.clone().unwrap(), + ); let check_leader_scheduler = self .background_worker .start("check-leader", check_leader_runner); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 2c1798877d9..f1626b9f2c9 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -390,7 +390,8 @@ impl ServerCluster { let (res_tag_factory, collector_reg_handle, rsmeter_cleanup) = self.init_resource_metering(&cfg.resource_metering); - let check_leader_runner = CheckLeaderRunner::new(store_meta.clone()); + let check_leader_runner = + CheckLeaderRunner::new(store_meta.clone(), coprocessor_host.clone()); let check_leader_scheduler = bg_worker.start("check-leader", check_leader_runner); let mut lock_mgr = LockManager::new(&cfg.pessimistic_txn); From cc127a069496dea5f05b7e2a3816c66f9c3c7713 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 9 Sep 2022 19:50:56 +0800 Subject: [PATCH 0203/1149] engine: update rust-rocksdb (#13393) close tikv/tikv#13095 Update rust-rocksdb Signed-off-by: tabokie --- Cargo.lock | 6 +++--- components/engine_rocks/src/write_batch.rs | 1 + tests/integrations/server/kv_service.rs | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a0356d6611..4da587d6d4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2758,7 +2758,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" +source = "git+https://github.com/tikv/rust-rocksdb.git#4c859a208355bc15ceb7dc1f05303f68acfb4791" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2777,7 +2777,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" +source = "git+https://github.com/tikv/rust-rocksdb.git#4c859a208355bc15ceb7dc1f05303f68acfb4791" dependencies = [ "bzip2-sys", "cc", @@ -4619,7 +4619,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#827a5df22cd59dc708c4c6a87dd8735a2312773d" +source = "git+https://github.com/tikv/rust-rocksdb.git#4c859a208355bc15ceb7dc1f05303f68acfb4791" dependencies = [ "libc 0.2.132", "librocksdb_sys", diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index f617608119b..6b92a285c76 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -105,6 +105,7 @@ impl engine_traits::WriteBatch for RocksWriteBatchVec { self.get_db() .multi_batch_write(self.as_inner(), &opt.into_raw()) .map_err(r2e) + .map(|_| ()) } else { self.get_db() .write_opt(&self.wbs[0], &opt.into_raw()) diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 70c7f9bda4c..6aca801b275 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2191,7 +2191,9 @@ fn test_commands_write_detail() { assert!(wd.get_commit_log_nanos() > 0); assert!(wd.get_apply_batch_wait_nanos() > 0); assert!(wd.get_apply_log_nanos() > 0); - assert!(wd.get_apply_mutex_lock_nanos() > 0); + // Mutex has been removed from write path. + // Ref https://github.com/facebook/rocksdb/pull/7516 + // assert!(wd.get_apply_mutex_lock_nanos() > 0); assert!(wd.get_apply_write_wal_nanos() > 0); assert!(wd.get_apply_write_memtable_nanos() > 0); }; From 49223a70dc1578559a9effb162f8b20dbeede92f Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 13 Sep 2022 13:22:58 +0800 Subject: [PATCH 0204/1149] cloud: support backup to s3 when object lock enabled. (#13350) close tikv/tikv#13442 Signed-off-by: 3pointer Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 ++ components/cloud/aws/Cargo.toml | 2 ++ components/cloud/aws/src/s3.rs | 27 +++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 4da587d6d4e..45e589819c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,6 +255,7 @@ name = "aws" version = "0.0.1" dependencies = [ "async-trait", + "base64", "bytes", "cloud", "fail", @@ -266,6 +267,7 @@ dependencies = [ "hyper-tls", "kvproto", "lazy_static", + "md5", "prometheus", "rusoto_core", "rusoto_credential", diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 314e2281425..293509709db 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -37,6 +37,8 @@ thiserror = "1.0" lazy_static = "1.3" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } uuid = "0.8" +md5 = "0.7.0" +base64 = "0.13.0" [dev-dependencies] futures = "0.3" diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 3e9c3665f58..05f418e4c3a 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -50,6 +50,7 @@ pub struct Config { sse_kms_key_id: Option, storage_class: Option, multi_part_size: usize, + object_lock_enabled: bool, } impl Config { @@ -64,6 +65,7 @@ impl Config { sse_kms_key_id: None, storage_class: None, multi_part_size: MINIMUM_PART_SIZE, + object_lock_enabled: false, } } @@ -96,6 +98,7 @@ impl Config { force_path_style, sse_kms_key_id: StringNonEmpty::opt(attrs.get("sse_kms_key_id").unwrap_or(def).clone()), multi_part_size: MINIMUM_PART_SIZE, + object_lock_enabled: false, }) } @@ -128,6 +131,7 @@ impl Config { force_path_style: input.force_path_style, sse_kms_key_id: StringNonEmpty::opt(input.sse_kms_key_id), multi_part_size: MINIMUM_PART_SIZE, + object_lock_enabled: input.object_lock_enabled, }) } } @@ -232,6 +236,7 @@ struct S3Uploader<'client> { sse_kms_key_id: Option, storage_class: Option, multi_part_size: usize, + object_lock_enabled: bool, upload_id: String, parts: Vec, @@ -275,6 +280,13 @@ async fn try_read_exact( } } +fn get_content_md5(object_lock_enabled: bool, content: &[u8]) -> Option { + object_lock_enabled.then(|| { + let digest = md5::compute(content); + base64::encode(digest.0) + }) +} + /// Specifies the minimum size to use multi-part upload. /// AWS S3 requires each part to be at least 5 MiB. const MINIMUM_PART_SIZE: usize = 5 * 1024 * 1024; @@ -292,6 +304,7 @@ impl<'client> S3Uploader<'client> { sse_kms_key_id: config.sse_kms_key_id.as_ref().cloned(), storage_class: config.storage_class.as_ref().cloned(), multi_part_size: config.multi_part_size, + object_lock_enabled: config.object_lock_enabled, upload_id: "".to_owned(), parts: Vec::new(), } @@ -432,6 +445,7 @@ impl<'client> S3Uploader<'client> { upload_id: self.upload_id.clone(), part_number, content_length: Some(data.len() as i64), + content_md5: get_content_md5(self.object_lock_enabled, data), body: Some(data.to_vec().into()), ..Default::default() }) @@ -492,6 +506,7 @@ impl<'client> S3Uploader<'client> { ssekms_key_id: self.sse_kms_key_id.as_ref().map(|s| s.to_string()), storage_class: self.storage_class.as_ref().map(|s| s.to_string()), content_length: Some(data.len() as i64), + content_md5: get_content_md5(self.object_lock_enabled, data), body: Some(data.to_vec().into()), ..Default::default() }) @@ -590,6 +605,18 @@ mod tests { use super::*; + #[test] + fn test_s3_get_content_md5() { + // base64 encode md5sum "helloworld" + let code = "helloworld".to_string(); + let expect = "/F4DjTilcDIIVEHn/nAQsA==".to_string(); + let actual = get_content_md5(true, code.as_bytes()).unwrap(); + assert_eq!(actual, expect); + + let actual = get_content_md5(false, b"xxx"); + assert!(actual.is_none()) + } + #[test] fn test_s3_config() { let bucket_name = StringNonEmpty::required("mybucket".to_string()).unwrap(); From 63465fabcb255ecb82e431f51a166e699cfc4aa3 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Tue, 13 Sep 2022 15:00:58 +0800 Subject: [PATCH 0205/1149] log-backup: merge small files in each flush (#13233) close tikv/tikv#13232 Signed-off-by: Leavrth Co-authored-by: Ti Chi Robot --- Cargo.lock | 37 ++ components/backup-stream/Cargo.toml | 2 + components/backup-stream/src/endpoint.rs | 2 +- components/backup-stream/src/router.rs | 438 ++++++++++++------ components/backup-stream/src/utils.rs | 80 +++- components/backup-stream/tests/mod.rs | 123 +++-- components/cloud/aws/src/s3.rs | 64 +-- components/cloud/azure/src/azblob.rs | 48 +- components/cloud/gcp/src/gcs.rs | 83 ++-- components/cloud/src/blob.rs | 7 + components/external_storage/Cargo.toml | 1 + components/external_storage/export/Cargo.toml | 1 + .../external_storage/export/src/export.rs | 17 +- components/external_storage/src/hdfs.rs | 9 + components/external_storage/src/lib.rs | 21 +- components/external_storage/src/local.rs | 19 +- components/external_storage/src/noop.rs | 4 + components/sst_importer/src/sst_importer.rs | 28 +- 18 files changed, 725 insertions(+), 259 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45e589819c2..3f64d59eed9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,6 +148,21 @@ dependencies = [ "futures-core", ] +[[package]] +name = "async-compression" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345fd392ab01f746c717b1357165b76f0b67a60192007b234058c9045fdcf695" +dependencies = [ + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "zstd", + "zstd-safe", +] + [[package]] name = "async-speed-limit" version = "0.4.0" @@ -445,6 +460,7 @@ dependencies = [ name = "backup-stream" version = "0.1.0" dependencies = [ + "async-compression", "async-trait", "bytes", "chrono", @@ -1653,6 +1669,7 @@ dependencies = [ name = "external_storage" version = "0.0.1" dependencies = [ + "async-compression", "async-trait", "bytes", "encryption", @@ -1690,6 +1707,7 @@ dependencies = [ name = "external_storage_export" version = "0.0.1" dependencies = [ + "async-compression", "async-trait", "aws", "azure", @@ -7166,6 +7184,25 @@ dependencies = [ "rand 0.7.3", ] +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc 0.2.125", + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.1+zstd.1.5.2" diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 6090d929291..8e6e43c8203 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -19,6 +19,7 @@ test = true harness = true [dependencies] +async-compression = { version = "0.3.14", features = ["tokio", "zstd"] } async-trait = { version = "0.1" } bytes = "1" chrono = "0.4" @@ -74,6 +75,7 @@ async-trait = "0.1" engine_panic = { path = "../engine_panic" } grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } hex = "0.4" +protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8.0" tempdir = "0.3" tempfile = "3.0" diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 81374484463..d463964558a 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -615,7 +615,7 @@ where }) .collect::>(); range_router - .register_task(task.clone(), ranges.clone()) + .register_task(task.clone(), ranges.clone(), self.config.file_size_limit.0) .await?; for (start_key, end_key) in ranges { diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index f1280103e89..fd63cd1841e 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -14,12 +14,13 @@ use std::{ time::Duration, }; +use async_compression::{tokio::write::ZstdEncoder, Level}; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_WRITE}; use external_storage::{BackendConfig, UnpinReader}; use external_storage_export::{create_storage, ExternalStorage}; use futures::io::Cursor; use kvproto::{ - brpb::{DataFileInfo, FileType, Metadata, StreamBackupTaskInfo}, + brpb::{DataFileGroup, DataFileInfo, FileType, MetaVersion, Metadata, StreamBackupTaskInfo}, raft_cmdpb::CmdType, }; use openssl::hash::{Hasher, MessageDigest}; @@ -53,17 +54,11 @@ use crate::{ metrics::{HANDLE_KV_HISTOGRAM, SKIP_KV_COUNTER}, subscription_track::TwoPhaseResolver, try_send, - utils::{self, SegmentMap, Slot, SlotMap, StopWatch}, + utils::{self, FilesReader, SegmentMap, SlotMap, StopWatch}, }; const FLUSH_FAILURE_BECOME_FATAL_THRESHOLD: usize = 30; -/// FLUSH_LOG_CONCURRENT_BATCH_COUNT specifies the concurrent count to write to -/// storage. 'Log backup' will produce a large mount of small files during flush -/// interval, and storage could take mistaken if writing all of these files to -/// storage concurrently. -const FLUSH_LOG_CONCURRENT_BATCH_COUNT: usize = 128; - #[derive(Clone, Debug)] pub enum TaskSelector { ByName(String), @@ -422,13 +417,20 @@ impl RouterInner { &self, mut task: StreamTask, ranges: Vec<(Vec, Vec)>, + merged_file_size_limit: u64, ) -> Result<()> { let task_name = task.info.take_name(); // register task info let prefix_path = self.prefix.join(&task_name); - let stream_task = - StreamTaskInfo::new(prefix_path, task, self.max_flush_interval, ranges.clone()).await?; + let stream_task = StreamTaskInfo::new( + prefix_path, + task, + self.max_flush_interval, + ranges.clone(), + merged_file_size_limit, + ) + .await?; self.tasks .lock() .await @@ -694,31 +696,34 @@ impl TempFileKey { } } - /// path_to_log_file specifies the path of record log. + /// path_to_log_file specifies the path of record log for v2. /// ```text - /// v1/${date}/${hour}/${store_id}/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log + /// V1: v1/${date}/${hour}/${store_id}/t00000071/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log + /// V2: v1/${date}/${hour}/${store_id}/434098800931373064-f0251bd5-1441-499a-8f53-adc0d1057a73.log /// ``` - fn path_to_log_file(&self, store_id: u64, min_ts: u64, max_ts: u64) -> String { + /// For v2, we merged the small files (partition by table_id) into one file. + fn path_to_log_file(store_id: u64, min_ts: u64, max_ts: u64) -> String { format!( - "v1/{}/{}/{}/t{:08}/{:012}-{}.log", + "v1/{}/{}/{}/{}-{}.log", // We may delete a range of files, so using the max_ts for preventing remove some // records wrong. Self::format_date_time(max_ts, FormatType::Date), Self::format_date_time(max_ts, FormatType::Hour), store_id, - self.table_id, min_ts, uuid::Uuid::new_v4() ) } - /// path_to_schema_file specifies the path of schema log. + /// path_to_schema_file specifies the path of schema log for v2. /// ```text - /// v1/${date}/${hour}/${store_id}/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log + /// V1: v1/${date}/${hour}/${store_id}/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log + /// V2: v1/${date}/${hour}/${store_id}/schema-meta/434055683656384515-cc3cb7a3-e03b-4434-ab6c-907656fddf67.log /// ``` + /// For v2, we merged the small files (partition by table_id) into one file. fn path_to_schema_file(store_id: u64, min_ts: u64, max_ts: u64) -> String { format!( - "v1/{}/{}/{}/schema-meta/{:012}-{}.log", + "v1/{}/{}/{}/schema-meta/{}-{}.log", Self::format_date_time(max_ts, FormatType::Date), Self::format_date_time(max_ts, FormatType::Hour), store_id, @@ -727,11 +732,11 @@ impl TempFileKey { ) } - fn file_name(&self, store_id: u64, min_ts: TimeStamp, max_ts: TimeStamp) -> String { - if self.is_meta { - Self::path_to_schema_file(store_id, min_ts.into_inner(), max_ts.into_inner()) + fn file_name(store_id: u64, min_ts: u64, max_ts: u64, is_meta: bool) -> String { + if is_meta { + Self::path_to_schema_file(store_id, min_ts, max_ts) } else { - self.path_to_log_file(store_id, min_ts.into_inner(), max_ts.into_inner()) + Self::path_to_log_file(store_id, min_ts, max_ts) } } } @@ -748,7 +753,9 @@ pub struct StreamTaskInfo { /// prefixed keys). files: SlotMap, /// flushing_files contains files pending flush. - flushing_files: RwLock, DataFileInfo)>>, + flushing_files: RwLock>, + /// flushing_meta_files contains meta files pending flush. + flushing_meta_files: RwLock>, /// last_flush_ts represents last time this task flushed to storage. last_flush_time: AtomicPtr, /// flush_interval represents the tick interval of flush, setting by users. @@ -768,6 +775,8 @@ pub struct StreamTaskInfo { flush_fail_count: AtomicUsize, /// global checkpoint ts for this task. global_checkpoint_ts: AtomicU64, + /// The size limit of the merged file for this task. + merged_file_size_limit: u64, } impl Drop for StreamTaskInfo { @@ -776,12 +785,19 @@ impl Drop for StreamTaskInfo { .flushing_files .get_mut() .drain(..) - .map(|(a, b, _)| (a, b)) - .chain(self.files.get_mut().drain()) + .chain(self.flushing_meta_files.get_mut().drain(..)) + .map(|(_, f, _)| f.local_path) + .map(std::fs::remove_file) + .partition(|r| r.is_ok()); + info!("stream task info dropped[1/2], removing flushing_temp files"; "success" => %success.len(), "failure" => %failed.len()); + let (success, failed): (Vec<_>, Vec<_>) = self + .files + .get_mut() + .drain() .map(|(_, f)| f.into_inner().local_path) .map(std::fs::remove_file) .partition(|r| r.is_ok()); - info!("stream task info dropped, removing temp files"; "success" => %success.len(), "failure" => %failed.len()) + info!("stream task info dropped[2/2], removing temp files"; "success" => %success.len(), "failure" => %failed.len()); } } @@ -804,6 +820,7 @@ impl StreamTaskInfo { task: StreamTask, flush_interval: Duration, ranges: Vec<(Vec, Vec)>, + merged_file_size_limit: u64, ) -> Result { tokio::fs::create_dir_all(&temp_dir).await?; let storage = Arc::from(create_storage( @@ -819,12 +836,14 @@ impl StreamTaskInfo { min_resolved_ts: TimeStamp::max(), files: SlotMap::default(), flushing_files: RwLock::default(), + flushing_meta_files: RwLock::default(), last_flush_time: AtomicPtr::new(Box::into_raw(Box::new(Instant::now()))), flush_interval, total_size: AtomicUsize::new(0), flushing: AtomicBool::new(false), flush_fail_count: AtomicUsize::new(0), global_checkpoint_ts: AtomicU64::new(start_ts), + merged_file_size_limit, }) } @@ -882,24 +901,29 @@ impl StreamTaskInfo { /// Flush all template files and generate corresponding metadata. pub async fn generate_metadata(&self, store_id: u64) -> Result { - let w = self.flushing_files.read().await; + let mut w = self.flushing_files.write().await; + let mut wm = self.flushing_meta_files.write().await; // Let's flush all files first... - futures::future::join_all(w.iter().map(|(_, f, _)| async move { - let file = &mut f.lock().await.inner; - file.flush().await?; - file.get_ref().sync_all().await?; - Result::Ok(()) - })) + futures::future::join_all( + w.iter_mut() + .chain(wm.iter_mut()) + .map(|(_, f, _)| async move { + let encoder = &mut f.inner; + encoder.shutdown().await?; + let file = encoder.get_mut(); + file.flush().await?; + file.get_ref().sync_all().await?; + Result::Ok(()) + }), + ) .await .into_iter() .map(|r| r.map_err(Error::from)) .fold(Ok(()), Result::and)?; - let mut metadata = MetadataInfo::with_capacity(w.len()); + let mut metadata = MetadataInfo::with_capacity(w.len() + wm.len()); metadata.set_store_id(store_id); - for (_, _, file_meta) in w.iter() { - metadata.push(file_meta.to_owned()) - } + // delay push files until log files are flushed Ok(metadata) } @@ -933,7 +957,7 @@ impl StreamTaskInfo { } /// move need-flushing files to flushing_files. - pub async fn move_to_flushing_files(&self, store_id: u64) -> Result<&Self> { + pub async fn move_to_flushing_files(&self) -> Result<&Self> { // if flushing_files is not empty, which represents this flush is a retry // operation. if !self.flushing_files.read().await.is_empty() { @@ -942,20 +966,25 @@ impl StreamTaskInfo { let mut w = self.files.write().await; let mut fw = self.flushing_files.write().await; + let mut fw_meta = self.flushing_meta_files.write().await; for (k, v) in w.drain() { // we should generate file metadata(calculate sha256) when moving file. // because sha256 calculation is a unsafe move operation. // we cannot re-calculate it in retry. // TODO refactor move_to_flushing_files and generate_metadata - let file_meta = v.lock().await.generate_metadata(&k, store_id)?; - fw.push((k, v, file_meta)); + let mut v = v.into_inner(); + let file_meta = v.generate_metadata(&k)?; + if file_meta.is_meta { + fw_meta.push((k, v, file_meta)); + } else { + fw.push((k, v, file_meta)); + } } Ok(self) } pub async fn clear_flushing_files(&self) { - for (_, v, _) in self.flushing_files.write().await.drain(..) { - let data_file = v.lock().await; + for (_, data_file, _) in self.flushing_files.write().await.drain(..) { debug!("removing data file"; "size" => %data_file.file_size, "name" => %data_file.local_path.display()); self.total_size .fetch_sub(data_file.file_size, Ordering::SeqCst); @@ -964,69 +993,161 @@ impl StreamTaskInfo { info!("remove template file"; "err" => ?e); } } + for (_, data_file, _) in self.flushing_meta_files.write().await.drain(..) { + debug!("removing meta data file"; "size" => %data_file.file_size, "name" => %data_file.local_path.display()); + self.total_size + .fetch_sub(data_file.file_size, Ordering::SeqCst); + if let Err(e) = data_file.remove_temp_file().await { + // if remove template failed, just skip it. + info!("remove template file"; "err" => ?e); + } + } } - async fn flush_log_file_to( + async fn merge_and_flush_log_files_to( storage: Arc, - file: &Mutex, + files: &[(TempFileKey, DataFile, DataFileInfo)], + metadata: &mut MetadataInfo, + is_meta: bool, ) -> Result<()> { - let data_file = file.lock().await; + let mut data_files_open = Vec::new(); + let mut data_file_infos = Vec::new(); + let mut merged_file_info = DataFileGroup::new(); + let mut stat_length = 0; + let mut max_ts: Option = None; + let mut min_ts: Option = None; + let mut min_resolved_ts: Option = None; + for (_, data_file, file_info) in files { + let mut file_info_clone = file_info.to_owned(); + // Update offset of file_info(DataFileInfo) + // and push it into merged_file_info(DataFileGroup). + file_info_clone.set_offset(stat_length); + data_files_open.push({ + let file = File::open(data_file.local_path.clone()).await?; + let compress_length = file.metadata().await?.len(); + stat_length += compress_length; + file_info_clone.set_compress_length(compress_length); + file + }); + data_file_infos.push(file_info_clone); + + let rts = file_info.resolved_ts; + min_resolved_ts = min_resolved_ts.map_or(Some(rts), |r| Some(r.min(rts))); + min_ts = min_ts.map_or(Some(file_info.min_ts), |ts| Some(ts.min(file_info.min_ts))); + max_ts = max_ts.map_or(Some(file_info.max_ts), |ts| Some(ts.max(file_info.max_ts))); + } + let min_ts = min_ts.unwrap_or_default(); + let max_ts = max_ts.unwrap_or_default(); + merged_file_info.set_path(TempFileKey::file_name( + metadata.store_id, + min_ts, + max_ts, + is_meta, + )); + merged_file_info.set_data_files_info(data_file_infos.into()); + merged_file_info.set_length(stat_length); + merged_file_info.set_max_ts(max_ts); + merged_file_info.set_min_ts(min_ts); + merged_file_info.set_min_resolved_ts(min_resolved_ts.unwrap_or_default()); + // to do: limiter to storage let limiter = Limiter::builder(std::f64::INFINITY).build(); - let reader = File::open(data_file.local_path.clone()).await?; - let stat = reader.metadata().await?; - let reader = UnpinReader(Box::new(limiter.limit(reader.compat()))); - let filepath = &data_file.storage_path; - let est_len = stat.len(); - let ret = storage.write(filepath, reader, est_len).await; + let files_reader = FilesReader::new(data_files_open); + + let reader = UnpinReader(Box::new(limiter.limit(files_reader.compat()))); + let filepath = &merged_file_info.path; + + let ret = storage.write(filepath, reader, stat_length).await; + match ret { Ok(_) => { debug!( "backup stream flush success"; - "tmp file" => ?data_file.local_path, "storage file" => ?filepath, + "est_len" => ?stat_length, ); } Err(e) => { warn!("backup stream flush failed"; - "file" => ?data_file.local_path, - "est_len" => ?est_len, + "est_len" => ?stat_length, "err" => ?e, ); return Err(Error::Io(e)); } } + + // push merged file into metadata + metadata.push(merged_file_info); Ok(()) } - pub async fn flush_log(&self) -> Result<()> { - // if failed to write storage, we should retry write flushing_files. + pub async fn flush_log(&self, metadata: &mut MetadataInfo) -> Result<()> { let storage = self.storage.clone(); - let files = self.flushing_files.write().await; - - for batch_files in files.chunks(FLUSH_LOG_CONCURRENT_BATCH_COUNT) { - let futs = batch_files - .iter() - .map(|(_, v, _)| Self::flush_log_file_to(storage.clone(), v)); - futures::future::try_join_all(futs).await?; - } + self.merge_log(metadata, storage.clone(), &self.flushing_files, false) + .await?; + self.merge_log(metadata, storage.clone(), &self.flushing_meta_files, true) + .await?; Ok(()) } - pub async fn flush_meta(&self, metadata_info: MetadataInfo) -> Result<()> { - let meta_path = metadata_info.path_to_meta(); - let meta_buff = metadata_info.marshal_to()?; - let buflen = meta_buff.len(); + async fn merge_log( + &self, + metadata: &mut MetadataInfo, + storage: Arc, + files_lock: &RwLock>, + is_meta: bool, + ) -> Result<()> { + let files = files_lock.write().await; + let mut batch_size = 0; + // file[batch_begin_index, i) is a batch + let mut batch_begin_index = 0; + // TODO: upload the merged file concurrently, + // then collect merged_file_infos and push them into `metadata`. + for (i, (_, _, info)) in files.iter().enumerate() { + if batch_size >= self.merged_file_size_limit { + Self::merge_and_flush_log_files_to( + storage.clone(), + &files[batch_begin_index..i], + metadata, + is_meta, + ) + .await?; - self.storage - .write( - &meta_path, - UnpinReader(Box::new(Cursor::new(meta_buff))), - buflen as _, + batch_begin_index = i; + batch_size = 0; + } + + batch_size += info.length; + } + if batch_begin_index < files.len() { + Self::merge_and_flush_log_files_to( + storage.clone(), + &files[batch_begin_index..], + metadata, + is_meta, ) .await?; + } + + Ok(()) + } + + pub async fn flush_meta(&self, metadata_info: MetadataInfo) -> Result<()> { + if !metadata_info.file_groups.is_empty() { + let meta_path = metadata_info.path_to_meta(); + let meta_buff = metadata_info.marshal_to()?; + let buflen = meta_buff.len(); + + self.storage + .write( + &meta_path, + UnpinReader(Box::new(Cursor::new(meta_buff))), + buflen as _, + ) + .await?; + } Ok(()) } @@ -1055,25 +1176,29 @@ impl StreamTaskInfo { // generate meta data and prepare to flush to storage let mut metadata_info = self - .move_to_flushing_files(store_id) + .move_to_flushing_files() .await? .generate_metadata(store_id) .await?; - metadata_info.min_resolved_ts = metadata_info - .min_resolved_ts - .max(Some(resolved_ts_provided.into_inner())); - let rts = metadata_info.min_resolved_ts; crate::metrics::FLUSH_DURATION .with_label_values(&["generate_metadata"]) .observe(sw.lap().as_secs_f64()); // flush log file to storage. - self.flush_log().await?; + self.flush_log(&mut metadata_info).await?; + + // the field `min_resolved_ts` of metadata will be updated + // only after flush is done. + metadata_info.min_resolved_ts = metadata_info + .min_resolved_ts + .max(Some(resolved_ts_provided.into_inner())); + let rts = metadata_info.min_resolved_ts; + // compress length let file_size_vec = metadata_info - .files + .file_groups .iter() - .map(|d| d.length) + .map(|d| (d.length, d.data_files_info.len())) .collect::>(); // flush meta file to storage. self.flush_meta(metadata_info).await?; @@ -1088,10 +1213,11 @@ impl StreamTaskInfo { .observe(sw.lap().as_secs_f64()); file_size_vec .iter() - .for_each(|size| crate::metrics::FLUSH_FILE_SIZE.observe(*size as _)); + .for_each(|(size, _)| crate::metrics::FLUSH_FILE_SIZE.observe(*size as _)); info!("log backup flush done"; - "files" => %file_size_vec.len(), - "total_size" => %file_size_vec.iter().sum::(), + "merged_files" => %file_size_vec.len(), // the number of the merged files + "files" => %file_size_vec.iter().map(|(_, v)| v).sum::(), + "total_size" => %file_size_vec.iter().map(|(v, _)| v).sum::(), // the size of the merged files after compressed "take" => ?begin.saturating_elapsed(), ); Ok(rts) @@ -1152,18 +1278,20 @@ struct DataFile { resolved_ts: TimeStamp, min_begin_ts: Option, sha256: Hasher, - inner: BufWriter, + // TODO: use lz4 with async feature + inner: ZstdEncoder>, start_key: Vec, end_key: Vec, number_of_entries: usize, file_size: usize, local_path: PathBuf, - storage_path: String, } #[derive(Debug)] pub struct MetadataInfo { - pub files: Vec, + // the field files is deprecated in v6.3.0 + // pub files: Vec, + pub file_groups: Vec, pub min_resolved_ts: Option, pub min_ts: Option, pub max_ts: Option, @@ -1173,7 +1301,7 @@ pub struct MetadataInfo { impl MetadataInfo { fn with_capacity(cap: usize) -> Self { Self { - files: Vec::with_capacity(cap), + file_groups: Vec::with_capacity(cap), min_resolved_ts: None, min_ts: None, max_ts: None, @@ -1185,8 +1313,8 @@ impl MetadataInfo { self.store_id = store_id; } - fn push(&mut self, file: DataFileInfo) { - let rts = file.resolved_ts; + fn push(&mut self, file: DataFileGroup) { + let rts = file.min_resolved_ts; self.min_resolved_ts = self.min_resolved_ts.map_or(Some(rts), |r| Some(r.min(rts))); self.min_ts = self .min_ts @@ -1194,16 +1322,17 @@ impl MetadataInfo { self.max_ts = self .max_ts .map_or(Some(file.max_ts), |ts| Some(ts.max(file.max_ts))); - self.files.push(file); + self.file_groups.push(file); } fn marshal_to(self) -> Result> { let mut metadata = Metadata::new(); - metadata.set_files(self.files.into()); + metadata.set_file_groups(self.file_groups.into()); metadata.set_store_id(self.store_id as _); metadata.set_resolved_ts(self.min_resolved_ts.unwrap_or_default()); metadata.set_min_ts(self.min_ts.unwrap_or(0)); metadata.set_max_ts(self.max_ts.unwrap_or(0)); + metadata.set_meta_version(MetaVersion::V2); metadata .write_to_bytes() @@ -1212,7 +1341,7 @@ impl MetadataInfo { fn path_to_meta(&self) -> String { format!( - "v1/backupmeta/{:012}-{}.meta", + "v1/backupmeta/{}-{}.meta", self.min_resolved_ts.unwrap_or_default(), uuid::Uuid::new_v4() ) @@ -1225,19 +1354,19 @@ impl DataFile { async fn new(local_path: impl AsRef) -> Result { let sha256 = Hasher::new(MessageDigest::sha256()) .map_err(|err| Error::Other(box_err!("openssl hasher failed to init: {}", err)))?; + let inner = BufWriter::with_capacity(128 * 1024, File::create(local_path.as_ref()).await?); Ok(Self { min_ts: TimeStamp::max(), max_ts: TimeStamp::zero(), resolved_ts: TimeStamp::zero(), min_begin_ts: None, - inner: BufWriter::with_capacity(128 * 1024, File::create(local_path.as_ref()).await?), + inner: ZstdEncoder::with_quality(inner, Level::Fastest), sha256, number_of_entries: 0, file_size: 0, start_key: vec![], end_key: vec![], local_path: local_path.as_ref().to_owned(), - storage_path: String::default(), }) } @@ -1313,15 +1442,11 @@ impl DataFile { } } - /// generage path for log file before flushing to Storage - fn set_storage_path(&mut self, path: String) { - self.storage_path = path; - } - - /// generate the metadata in protocol buffer of the file. - fn generate_metadata(&mut self, file_key: &TempFileKey, store_id: u64) -> Result { - self.set_storage_path(file_key.file_name(store_id, self.min_ts, self.max_ts)); - + /// generate the metadata v2 where each file becomes a part of the merged + /// file. + fn generate_metadata(&mut self, file_key: &TempFileKey) -> Result { + // Note: the field `storage_path` is empty!!! It will be stored in the upper + // layer `DataFileGroup`. let mut meta = DataFileInfo::new(); meta.set_sha256( self.sha256 @@ -1329,7 +1454,6 @@ impl DataFile { .map(|bytes| bytes.to_vec()) .map_err(|err| Error::Other(box_err!("openssl hasher failed to init: {}", err)))?, ); - meta.set_path(self.storage_path.clone()); meta.set_number_of_entries(self.number_of_entries as _); meta.set_max_ts(self.max_ts.into_inner() as _); meta.set_min_ts(self.min_ts.into_inner() as _); @@ -1385,7 +1509,7 @@ mod tests { codec::number::NumberEncoder, worker::{dummy_scheduler, ReceiverWrapper}, }; - use tokio::{fs::File, sync::Mutex}; + use tokio::fs::File; use txn_types::{Write, WriteType}; use super::*; @@ -1550,6 +1674,7 @@ mod tests { utils::wrap_key(make_table_key(table_id, b"")), utils::wrap_key(make_table_key(table_id + 1, b"")), )], + 0x100000, ) .await .expect("failed to register task") @@ -1593,43 +1718,56 @@ mod tests { let end_ts = TimeStamp::physical_now(); let files = router.tasks.lock().await.get("dummy").unwrap().clone(); - let meta = files - .move_to_flushing_files(1) + let mut meta = files + .move_to_flushing_files() .await? .generate_metadata(1) .await?; - assert_eq!(meta.files.len(), 3, "test file len = {}", meta.files.len()); + assert!( - meta.files.iter().all(|item| { - TimeStamp::new(item.min_ts as _).physical() >= start_ts - && TimeStamp::new(item.max_ts as _).physical() <= end_ts - && item.min_ts <= item.max_ts - }), + meta.file_groups + .iter() + .all(|group| group.data_files_info.iter().all(|item| { + TimeStamp::new(item.min_ts as _).physical() >= start_ts + && TimeStamp::new(item.max_ts as _).physical() <= end_ts + && item.min_ts <= item.max_ts + })), "meta = {:#?}; start ts = {}, end ts = {}", - meta.files, + meta.file_groups, start_ts, end_ts ); // in some case when flush failed to write files to storage. // we may run `generate_metadata` again with same files. - let another_meta = files - .move_to_flushing_files(1) + let mut another_meta = files + .move_to_flushing_files() .await? .generate_metadata(1) .await?; - assert_eq!(meta.files.len(), another_meta.files.len()); - for i in 0..meta.files.len() { - let file1 = meta.files.get(i).unwrap(); - let file2 = another_meta.files.get(i).unwrap(); + files.flush_log(&mut meta).await?; + files.flush_log(&mut another_meta).await?; + // meta updated + let files_num = meta + .file_groups + .iter() + .map(|v| v.data_files_info.len()) + .sum::(); + assert_eq!(files_num, 3, "test file len = {}", files_num); + for i in 0..meta.file_groups.len() { + let file_groups1 = meta.file_groups.get(i).unwrap(); + let file_groups2 = another_meta.file_groups.get(i).unwrap(); // we have to make sure two times sha256 of file must be the same. - assert_eq!(file1.sha256, file2.sha256); - assert_eq!(file1.start_key, file2.start_key); - assert_eq!(file1.end_key, file2.end_key); + for j in 0..file_groups1.data_files_info.len() { + let file1 = file_groups1.data_files_info.get(j).unwrap(); + let file2 = file_groups2.data_files_info.get(j).unwrap(); + assert_eq!(file1.sha256, file2.sha256); + assert_eq!(file1.start_key, file2.start_key); + assert_eq!(file1.end_key, file2.end_key); + } } - files.flush_log().await?; files.flush_meta(meta).await?; files.clear_flushing_files().await; @@ -1662,13 +1800,18 @@ mod tests { } assert_eq!(meta_count, 1); - assert_eq!(log_count, 3); + assert_eq!(log_count, 2); // flush twice Ok(()) } - fn mock_build_kv_events(table_id: i64, region_id: u64, resolved_ts: u64) -> ApplyEvents { + fn mock_build_large_kv_events(table_id: i64, region_id: u64, resolved_ts: u64) -> ApplyEvents { let mut events_builder = KvEventsBuilder::new(region_id, resolved_ts); - events_builder.put_table("default", table_id, b"hello", b"world"); + events_builder.put_table( + "default", + table_id, + b"hello", + "world".repeat(1024).as_bytes(), + ); events_builder.finish() } @@ -1682,19 +1825,21 @@ mod tests { info: task_info, is_paused: false, }; + let merged_file_size_limit = 0x10000; let task = StreamTaskInfo::new( tmp_dir.path().to_path_buf(), stream_task, Duration::from_secs(300), vec![(vec![], vec![])], + merged_file_size_limit, ) .await .unwrap(); // on_event - let region_count = FLUSH_LOG_CONCURRENT_BATCH_COUNT + 5; + let region_count = merged_file_size_limit / (4 * 1024); // 2 merged log files for i in 1..=region_count { - let kv_events = mock_build_kv_events(i as _, i as _, i as _); + let kv_events = mock_build_large_kv_events(i as _, i as _, i as _); task.on_events(kv_events).await.unwrap(); } // do_flush @@ -1716,7 +1861,7 @@ mod tests { } } assert_eq!(meta_count, 1); - assert_eq!(log_count, region_count); + assert_eq!(log_count, 2); } struct ErrorStorage { @@ -1779,6 +1924,15 @@ mod tests { fn read(&self, name: &str) -> Box { self.inner.read(name) } + + fn read_part( + &self, + name: &str, + off: u64, + len: u64, + ) -> Box { + self.inner.read_part(name, off, len) + } } fn build_kv_event(base: i32, count: i32) -> ApplyEvents { @@ -1845,6 +1999,7 @@ mod tests { is_paused: false, }, vec![], + 0x100000, ) .await .unwrap(); @@ -1871,7 +2026,7 @@ mod tests { router .get_task_info("cleanup_test") .await? - .move_to_flushing_files(1) + .move_to_flushing_files() .await?; write_simple_data(&router).await; let mut w = walkdir::WalkDir::new(&tmp).into_iter(); @@ -2039,6 +2194,7 @@ mod tests { stream_task, Duration::from_secs(300), vec![(vec![], vec![])], + 0x100000, ) .await .unwrap(); @@ -2115,6 +2271,10 @@ mod tests { fn read(&self, name: &str) -> Box { self.s.read(name) } + + fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + self.s.read_part(name, off, len) + } } #[tokio::test] @@ -2126,7 +2286,19 @@ mod tests { f.write_all("test-data".as_bytes()).await?; let data_file = DataFile::new(file_path).await.unwrap(); - let result = StreamTaskInfo::flush_log_file_to(Arc::new(ms), &Mutex::new(data_file)).await; + let info = DataFileInfo::new(); + + let mut meta = MetadataInfo::with_capacity(1); + let kv_event = build_kv_event(1, 1); + let tmp_key = TempFileKey::of(&kv_event.events[0], 1); + let files = vec![(tmp_key, data_file, info)]; + let result = StreamTaskInfo::merge_and_flush_log_files_to( + Arc::new(ms), + &files[0..], + &mut meta, + false, + ) + .await; assert_eq!(result.is_ok(), true); Ok(()) } diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index ac1b3dec168..22163eccf5f 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use core::pin::Pin; use std::{ borrow::Borrow, collections::{hash_map::RandomState, BTreeMap, HashMap}, @@ -13,7 +14,7 @@ use std::{ use engine_rocks::ReadPerfInstant; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; -use futures::{channel::mpsc, executor::block_on, FutureExt, StreamExt}; +use futures::{channel::mpsc, executor::block_on, ready, task::Poll, FutureExt, StreamExt}; use kvproto::raft_cmdpb::{CmdType, Request}; use raft::StateRole; use raftstore::{coprocessor::RegionInfoProvider, RegionInfo}; @@ -28,7 +29,11 @@ use tikv_util::{ worker::Scheduler, Either, }; -use tokio::sync::{oneshot, Mutex, RwLock}; +use tokio::{ + fs::File, + io::AsyncRead, + sync::{oneshot, Mutex, RwLock}, +}; use txn_types::{Key, Lock, LockType}; use crate::{ @@ -589,6 +594,39 @@ pub fn is_overlapping(range: (&[u8], &[u8]), range2: (&[u8], &[u8])) -> bool { } } +pub struct FilesReader { + files: Vec, + index: usize, +} + +impl FilesReader { + pub fn new(files: Vec) -> Self { + FilesReader { files, index: 0 } + } +} + +impl AsyncRead for FilesReader { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + let me = self.get_mut(); + + while me.index < me.files.len() { + let rem = buf.remaining(); + ready!(Pin::new(&mut me.files[me.index]).poll_read(cx, buf))?; + if buf.remaining() == rem { + me.index += 1; + } else { + return Poll::Ready(Ok(())); + } + } + + Poll::Ready(Ok(())) + } +} + #[cfg(test)] mod test { use std::{ @@ -601,6 +639,7 @@ mod test { use engine_traits::WriteOptions; use futures::executor::block_on; + use tokio::io::AsyncWriteExt; use crate::utils::{is_in_range, CallbackWaitGroup, SegmentMap}; @@ -788,4 +827,41 @@ mod test { items_size ); } + + #[tokio::test] + async fn test_files_reader() { + use tempdir::TempDir; + use tokio::{fs::File, io::AsyncReadExt}; + + use super::FilesReader; + + let dir = TempDir::new("test_files").unwrap(); + let files_num = 5; + let mut files_path = Vec::new(); + let mut expect_content = String::new(); + for i in 0..files_num { + let path = dir.path().join(format!("f{}", i)); + let mut file = File::create(&path).await.unwrap(); + let content = format!("{i}_{i}_{i}_{i}_{i}\n{i}{i}{i}{i}\n").repeat(10); + file.write_all(content.as_bytes()).await.unwrap(); + file.sync_all().await.unwrap(); + + files_path.push(path); + expect_content.push_str(&content); + } + + let mut files = Vec::new(); + for i in 0..files_num { + let file = File::open(&files_path[i]).await.unwrap(); + files.push(file); + } + + let mut files_reader = FilesReader::new(files); + let mut read_content = String::new(); + files_reader + .read_to_string(&mut read_content) + .await + .unwrap(); + assert_eq!(expect_content, read_content); + } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 4a437421dac..de9b9893567 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -9,6 +9,7 @@ use std::{ time::Duration, }; +use async_compression::futures::write::ZstdDecoder; use backup_stream::{ errors::Result, metadata::{ @@ -19,14 +20,15 @@ use backup_stream::{ router::Router, Endpoint, Task, }; -use futures::{executor::block_on, Future}; +use futures::{executor::block_on, AsyncWriteExt, Future}; use grpcio::ChannelBuilder; use kvproto::{ - brpb::{Local, StorageBackend}, + brpb::{Local, Metadata, StorageBackend}, kvrpcpb::*, tikvpb::*, }; use pd_client::PdClient; +use protobuf::parse_from_bytes; use tempdir::TempDir; use test_raftstore::{new_server_cluster, Cluster, ServerCluster}; use test_util::retry; @@ -361,7 +363,37 @@ impl Suite { } } - fn check_for_write_records<'a>( + fn load_metadata_for_write_records(&self, path: &Path) -> HashMap> { + let mut meta_map: HashMap> = HashMap::new(); + for entry in WalkDir::new(path) { + let entry = entry.unwrap(); + if entry.file_type().is_file() + && entry + .file_name() + .to_str() + .map_or(false, |s| s.ends_with(".meta")) + { + let content = std::fs::read(entry.path()).unwrap(); + let meta = parse_from_bytes::(content.as_ref()).unwrap(); + for g in meta.file_groups.into_iter() { + let path = g.path.split('/').last().unwrap(); + for f in g.data_files_info.into_iter() { + let file_info = meta_map.get_mut(path); + if let Some(v) = file_info { + v.push((f.offset as usize, (f.offset + f.compress_length) as usize)); + } else { + let v = + vec![(f.offset as usize, (f.offset + f.compress_length) as usize)]; + meta_map.insert(String::from(path), v); + } + } + } + } + } + meta_map + } + + async fn check_for_write_records<'a>( &self, path: &Path, key_set: impl std::iter::Iterator, @@ -370,6 +402,7 @@ impl Suite { let n = remain_keys.len(); let mut extra_key = 0; let mut extra_len = 0; + let meta_map = self.load_metadata_for_write_records(path); for entry in WalkDir::new(path) { let entry = entry.unwrap(); println!("checking: {:?}", entry); @@ -379,21 +412,31 @@ impl Suite { .to_str() .map_or(false, |s| s.ends_with(".log")) { - let content = std::fs::read(entry.path()).unwrap(); - let mut iter = EventIterator::new(content); - loop { - if !iter.valid() { - break; - } - iter.next().unwrap(); - if !remain_keys.remove(iter.key()) { - extra_key += 1; - extra_len += iter.key().len() + iter.value().len(); + let buf = std::fs::read(entry.path()).unwrap(); + let file_infos = meta_map.get(entry.file_name().to_str().unwrap()).unwrap(); + for &file_info in file_infos { + let mut decoder = ZstdDecoder::new(Vec::new()); + let pbuf: &[u8] = &buf[file_info.0..file_info.1]; + decoder.write_all(pbuf).await.unwrap(); + decoder.flush().await.unwrap(); + decoder.close().await.unwrap(); + let content = decoder.into_inner(); + + let mut iter = EventIterator::new(content); + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + if !remain_keys.remove(iter.key()) { + extra_key += 1; + extra_len += iter.key().len() + iter.value().len(); + } + + let value = iter.value(); + let wf = WriteRef::parse(value).unwrap(); + assert_eq!(wf.short_value, Some(b"hello, world" as &[u8])); } - - let value = iter.value(); - let wf = WriteRef::parse(value).unwrap(); - assert_eq!(wf.short_value, Some(b"hello, world" as &[u8])); } } } @@ -671,10 +714,12 @@ mod test { let round2 = suite.write_records(256, 128, 1).await; suite.force_flush_files("test_basic"); suite.wait_for_flush(); - suite.check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ); + suite + .check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ) + .await; }); suite.cluster.shutdown(); } @@ -691,10 +736,12 @@ mod test { let round2 = suite.write_records(256, 128, 1).await; suite.force_flush_files("test_with_split"); suite.wait_for_flush(); - suite.check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ); + suite + .check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ) + .await; }); suite.cluster.shutdown(); } @@ -743,10 +790,12 @@ mod test { .into_encoded() }) .collect::>(); - suite.check_for_write_records( - suite.flushed_files.path(), - keys_encoded.iter().map(Vec::as_slice), - ); + suite + .check_for_write_records( + suite.flushed_files.path(), + keys_encoded.iter().map(Vec::as_slice), + ) + .await; }); suite.cluster.shutdown(); } @@ -765,10 +814,10 @@ mod test { let round2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("test_leader_down"); suite.wait_for_flush(); - suite.check_for_write_records( + run_async_test(suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(Vec::as_slice), - ); + )); suite.cluster.shutdown(); } @@ -944,10 +993,10 @@ mod test { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("region_failure"); suite.wait_for_flush(); - suite.check_for_write_records( + run_async_test(suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - ); + )); } #[test] @@ -969,10 +1018,10 @@ mod test { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("initial_scan_failure"); suite.wait_for_flush(); - suite.check_for_write_records( + run_async_test(suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - ); + )); } #[test] @@ -1029,10 +1078,10 @@ mod test { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("fail_to_refresh_region"); suite.wait_for_flush(); - suite.check_for_write_records( + run_async_test(suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - ); + )); let leader = suite.cluster.leader_of_region(1).unwrap().store_id; let (tx, rx) = std::sync::mpsc::channel(); suite.endpoints[&leader] diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 05f418e4c3a..469cac97d6c 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -221,6 +221,37 @@ impl S3Storage { } key.to_owned() } + + fn get_range(&self, name: &str, range: Option) -> Box { + let key = self.maybe_prefix_key(name); + let bucket = self.config.bucket.bucket.clone(); + debug!("read file from s3 storage"; "key" => %key); + let req = GetObjectRequest { + key, + bucket: (*bucket).clone(), + range, + ..Default::default() + }; + Box::new( + self.client + .get_object(req) + .map(move |future| match future { + Ok(out) => out.body.unwrap(), + Err(RusotoError::Service(GetObjectError::NoSuchKey(key))) => { + ByteStream::new(error_stream(io::Error::new( + io::ErrorKind::NotFound, + format!("no key {} at bucket {}", key, *bucket), + ))) + } + Err(e) => ByteStream::new(error_stream(io::Error::new( + io::ErrorKind::Other, + format!("failed to get object {}", e), + ))), + }) + .flatten_stream() + .into_async_read(), + ) + } } /// A helper for uploading a large files to S3 storage. @@ -565,33 +596,12 @@ impl BlobStorage for S3Storage { } fn get(&self, name: &str) -> Box { - let key = self.maybe_prefix_key(name); - let bucket = self.config.bucket.bucket.clone(); - debug!("read file from s3 storage"; "key" => %key); - let req = GetObjectRequest { - key, - bucket: (*bucket).clone(), - ..Default::default() - }; - Box::new( - self.client - .get_object(req) - .map(move |future| match future { - Ok(out) => out.body.unwrap(), - Err(RusotoError::Service(GetObjectError::NoSuchKey(key))) => { - ByteStream::new(error_stream(io::Error::new( - io::ErrorKind::NotFound, - format!("no key {} at bucket {}", key, *bucket), - ))) - } - Err(e) => ByteStream::new(error_stream(io::Error::new( - io::ErrorKind::Other, - format!("failed to get object {}", e), - ))), - }) - .flatten_stream() - .into_async_read(), - ) + self.get_range(name, None) + } + + fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + // inclusive, bytes=0-499 -> [0, 499] + self.get_range(name, Some(format!("bytes={}-{}", off, off + len - 1))) } } diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 2d7f2566509..5bf02696de7 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -553,6 +553,33 @@ impl AzureStorage { } key.to_owned() } + + fn get_range( + &self, + name: &str, + range: Option>, + ) -> Box { + let name = self.maybe_prefix_key(name); + debug!("read file from Azure storage"; "key" => %name); + let t = async move { + let blob_client = self.client_builder.get_client().await?.as_blob_client(name); + + let builder = if let Some(r) = range { + blob_client.get().range(r) + } else { + blob_client.get() + }; + + builder + .execute() + .await + .map(|res| res.data) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", e))) + }; + let k = stream::once(t); + let t = k.boxed().into_async_read(); + Box::new(t) + } } #[async_trait] @@ -576,22 +603,11 @@ impl BlobStorage for AzureStorage { } fn get(&self, name: &str) -> Box { - let name = self.maybe_prefix_key(name); - debug!("read file from Azure storage"; "key" => %name); - let t = async move { - self.client_builder - .get_client() - .await? - .as_blob_client(name) - .get() - .execute() - .await - .map(|res| res.data) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", e))) - }; - let k = stream::once(t); - let t = k.boxed().into_async_read(); - Box::new(t) + self.get_range(name, None) + } + + fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + self.get_range(name, Some(off..off + len)) } } diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index 799d1b02ee9..e8e8ad20ee9 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -10,6 +10,7 @@ use futures_util::{ io::{AsyncRead, AsyncReadExt, Cursor}, stream::{StreamExt, TryStreamExt}, }; +use http::HeaderValue; use hyper::{client::HttpConnector, Body, Client, Request, Response, StatusCode}; use hyper_tls::HttpsConnector; pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, Gcs as InputConfig}; @@ -345,6 +346,49 @@ impl GcsStorage { { Box::new(error_stream(io::Error::new(kind, e)).into_async_read()) } + + fn get_range(&self, name: &str, range: Option) -> Box { + let bucket = self.config.bucket.bucket.to_string(); + let name = self.maybe_prefix_key(name); + debug!("read file from GCS storage"; "key" => %name); + let oid = match ObjectId::new(bucket, name) { + Ok(oid) => oid, + Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::InvalidInput, e), + }; + let mut request = match Object::download(&oid, None /* optional */) { + Ok(request) => request.map(|_: io::Empty| Body::empty()), + Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::Other, e), + }; + if let Some(r) = range { + let header_value = match HeaderValue::from_str(&r) { + Ok(v) => v, + Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::Other, e), + }; + request.headers_mut().insert("Range", header_value); + } + Box::new( + self.make_request(request, tame_gcs::Scopes::ReadOnly) + .and_then(|response| async { + if response.status().is_success() { + Ok(response.into_body().map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("download from GCS error: {}", e), + ) + })) + } else { + Err(status_code_error( + response.status(), + "bucket read".to_string(), + )) + } + }) + .err_into::() + .try_flatten_stream() + .boxed() // this `.boxed()` pin the stream. + .into_async_read(), + ) + } } fn change_host(host: &StringNonEmpty, url: &str) -> Option { @@ -449,39 +493,12 @@ impl BlobStorage for GcsStorage { } fn get(&self, name: &str) -> Box { - let bucket = self.config.bucket.bucket.to_string(); - let name = self.maybe_prefix_key(name); - debug!("read file from GCS storage"; "key" => %name); - let oid = match ObjectId::new(bucket, name) { - Ok(oid) => oid, - Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::InvalidInput, e), - }; - let request = match Object::download(&oid, None /* optional */) { - Ok(request) => request.map(|_: io::Empty| Body::empty()), - Err(e) => return GcsStorage::error_to_async_read(io::ErrorKind::Other, e), - }; - Box::new( - self.make_request(request, tame_gcs::Scopes::ReadOnly) - .and_then(|response| async { - if response.status().is_success() { - Ok(response.into_body().map_err(|e| { - io::Error::new( - io::ErrorKind::Other, - format!("download from GCS error: {}", e), - ) - })) - } else { - Err(status_code_error( - response.status(), - "bucket read".to_string(), - )) - } - }) - .err_into::() - .try_flatten_stream() - .boxed() // this `.boxed()` pin the stream. - .into_async_read(), - ) + self.get_range(name, None) + } + + fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + // inclusive, bytes=0-499 -> [0, 499] + self.get_range(name, Some(format!("bytes={}-{}", off, off + len - 1))) } } diff --git a/components/cloud/src/blob.rs b/components/cloud/src/blob.rs index 2e38097e385..d80d3a47a28 100644 --- a/components/cloud/src/blob.rs +++ b/components/cloud/src/blob.rs @@ -46,6 +46,9 @@ pub trait BlobStorage: 'static + Send + Sync { /// Read all contents of the given path. fn get(&self, name: &str) -> Box; + + /// Read part of contents of the given path. + fn get_part(&self, name: &str, off: u64, len: u64) -> Box; } impl BlobConfig for dyn BlobStorage { @@ -72,6 +75,10 @@ impl BlobStorage for Box { fn get(&self, name: &str) -> Box { (**self).get(name) } + + fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + (**self).get_part(name, off, len) + } } #[derive(Clone, Debug, PartialEq)] diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 049f8ab2e43..b74af6ff39d 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -16,6 +16,7 @@ cloud-storage-grpc = [ failpoints = ["fail/failpoints"] [dependencies] +async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } async-trait = "0.1" bytes = "1.0" encryption = { path = "../encryption" } diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml index 1f75af2734a..82ff01c2afb 100644 --- a/components/external_storage/export/Cargo.toml +++ b/components/external_storage/export/Cargo.toml @@ -74,6 +74,7 @@ tokio = { version = "1.5", features = ["time", "rt", "net"], optional = true } tokio-util = { version = "0.7", features = ["compat"], optional = true } url = "2.0" async-trait = "0.1" +async-compression = { version = "0.3.14", features = ["futures-io", "zstd"]} [dev-dependencies] matches = "0.1.8" diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index 0fb24ef48ce..6ce16334aef 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -9,6 +9,7 @@ use std::{ sync::Arc, }; +use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; #[cfg(feature = "cloud-aws")] pub use aws::{Config as S3Config, S3Storage}; @@ -28,6 +29,7 @@ pub use external_storage::{ read_external_storage_into_file, ExternalStorage, LocalStorage, NoopStorage, UnpinReader, }; use futures_io::AsyncRead; +use futures_util::io::BufReader; #[cfg(feature = "cloud-gcp")] pub use gcp::{Config as GcsConfig, GcsStorage}; pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; @@ -324,16 +326,25 @@ impl ExternalStorage for EncryptedExternalStorage { fn read(&self, name: &str) -> Box { self.storage.read(name) } + fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + self.storage.read_part(name, off, len) + } fn restore( &self, storage_name: &str, restore_name: std::path::PathBuf, + compressed_range: Option<(u64, u64)>, expected_length: u64, expected_sha256: Option>, speed_limiter: &Limiter, file_crypter: Option, ) -> io::Result<()> { - let reader = self.read(storage_name); + let reader = if let Some((off, len)) = compressed_range { + let r = self.read_part(storage_name, off, len); + Box::new(ZstdDecoder::new(BufReader::new(r))) + } else { + self.read(storage_name) + }; let file_writer: &mut dyn Write = &mut self.key_manager.create_file_for_write(&restore_name)?; let min_read_speed: usize = 8192; @@ -367,4 +378,8 @@ impl ExternalStorage for BlobStore { fn read(&self, name: &str) -> Box { (**self).get(name) } + + fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + (**self).get_part(name, off, len) + } } diff --git a/components/external_storage/src/hdfs.rs b/components/external_storage/src/hdfs.rs index 175104d06cb..53574633c73 100644 --- a/components/external_storage/src/hdfs.rs +++ b/components/external_storage/src/hdfs.rs @@ -134,6 +134,15 @@ impl ExternalStorage for HdfsStorage { fn read(&self, _name: &str) -> Box { unimplemented!("currently only HDFS export is implemented") } + + fn read_part( + &self, + _name: &str, + _off: u64, + _len: u64, + ) -> Box { + unimplemented!("currently only HDFS export is implemented") + } } #[cfg(test)] diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index afae433e54a..97f0f83ddbc 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -15,10 +15,12 @@ use std::{ time::Duration, }; +use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; use encryption::{from_engine_encryption_method, DecrypterReader, Iv}; use engine_traits::FileEncryptionInfo; use file_system::File; +use futures::io::BufReader; use futures_io::AsyncRead; use futures_util::AsyncReadExt; use openssl::hash::{Hasher, MessageDigest}; @@ -75,17 +77,26 @@ pub trait ExternalStorage: 'static + Send + Sync { /// Read all contents of the given path. fn read(&self, name: &str) -> Box; + /// Read part of contents of the given path. + fn read_part(&self, name: &str, off: u64, len: u64) -> Box; + /// Read from external storage and restore to the given path fn restore( &self, storage_name: &str, restore_name: std::path::PathBuf, + compressed_range: Option<(u64, u64)>, expected_length: u64, expected_sha256: Option>, speed_limiter: &Limiter, file_crypter: Option, ) -> io::Result<()> { - let reader = self.read(storage_name); + let reader: Box = if let Some((off, len)) = compressed_range { + let r = self.read_part(storage_name, off, len); + Box::new(ZstdDecoder::new(BufReader::new(r))) + } else { + self.read(storage_name) + }; let output: &mut dyn Write = &mut File::create(restore_name)?; // the minimum speed of reading data, in bytes/second. // if reading speed is slower than this rate, we will stop with @@ -122,6 +133,10 @@ impl ExternalStorage for Arc { fn read(&self, name: &str) -> Box { (**self).read(name) } + + fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + (**self).read_part(name, off, len) + } } #[async_trait] @@ -141,6 +156,10 @@ impl ExternalStorage for Box { fn read(&self, name: &str) -> Box { self.as_ref().read(name) } + + fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + self.as_ref().read_part(name, off, len) + } } /// Wrap the reader with file_crypter. diff --git a/components/external_storage/src/local.rs b/components/external_storage/src/local.rs index 80c22929525..4b22de96a6a 100644 --- a/components/external_storage/src/local.rs +++ b/components/external_storage/src/local.rs @@ -2,7 +2,7 @@ use std::{ fs::File as StdFile, - io, + io::{self, BufReader, Read, Seek}, marker::Unpin, path::{Path, PathBuf}, sync::Arc, @@ -130,6 +130,23 @@ impl ExternalStorage for LocalStorage { Err(e) => Box::new(error_stream(e).into_async_read()) as _, } } + + fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + debug!("read part of file from local storage"; + "name" => %name, "off" => %off, "len" => %len, "base" => %self.base.display()); + + let mut file = match StdFile::open(self.base.join(name)) { + Ok(file) => file, + Err(e) => return Box::new(error_stream(e).into_async_read()) as _, + }; + match file.seek(std::io::SeekFrom::Start(off)) { + Ok(_) => (), + Err(e) => return Box::new(error_stream(e).into_async_read()) as _, + }; + let reader = BufReader::new(file); + let take = reader.take(len); + Box::new(AllowStdIo::new(take)) as _ + } } #[cfg(test)] diff --git a/components/external_storage/src/noop.rs b/components/external_storage/src/noop.rs index cb590ca6e44..42746742624 100644 --- a/components/external_storage/src/noop.rs +++ b/components/external_storage/src/noop.rs @@ -47,6 +47,10 @@ impl ExternalStorage for NoopStorage { fn read(&self, _name: &str) -> Box { Box::new(io::empty().compat()) } + + fn read_part(&self, _name: &str, _off: u64, _len: u64) -> Box { + Box::new(io::empty().compat()) + } } #[cfg(test)] diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 806066bd202..405991b1efe 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -225,6 +225,7 @@ impl SstImporter { fn download_file_from_external_storage( &self, + compressed_range: Option<(u64, u64)>, file_length: u64, src_file_name: &str, dst_file: std::path::PathBuf, @@ -265,6 +266,7 @@ impl SstImporter { let result = ext_storage.restore( src_file_name, dst_file.clone(), + compressed_range, file_length, expect_sha256, speed_limiter, @@ -300,8 +302,10 @@ impl SstImporter { backend: &StorageBackend, speed_limiter: &Limiter, ) -> Result { - let name = meta.get_name(); - let path = self.dir.get_import_path(name)?; + let offset = meta.get_offset(); + let src_name = meta.get_name(); + let dst_name = format!("{}_{}", src_name, offset); + let path = self.dir.get_import_path(&dst_name)?; let start = Instant::now(); let sha256 = meta.get_sha256().to_vec(); let expected_sha256 = if !sha256.is_empty() { @@ -313,16 +317,22 @@ impl SstImporter { return Ok(path.save); } - let lock = self.file_locks.entry(name.to_string()).or_default(); + let lock = self.file_locks.entry(dst_name.to_string()).or_default(); if path.save.exists() { return Ok(path.save); } + let length = meta.get_compress_length(); + let compressed_range = if length == 0 { + None + } else { + Some((offset, length)) + }; self.download_file_from_external_storage( - // don't check file length after download file for now. + compressed_range, meta.get_length(), - name, + src_name, path.temp.clone(), backend, expected_sha256, @@ -335,7 +345,7 @@ impl SstImporter { None, speed_limiter, )?; - info!("download file finished {}", name); + info!("download file finished {}, offset {}", src_name, offset); if let Some(p) = path.save.parent() { // we have v1 prefix in file name. @@ -347,10 +357,11 @@ impl SstImporter { } })?; } + file_system::rename(path.temp, path.save.clone())?; drop(lock); - self.file_locks.remove(name); + self.file_locks.remove(&dst_name); IMPORTER_APPLY_DURATION .with_label_values(&["download"]) @@ -494,6 +505,7 @@ impl SstImporter { }); self.download_file_from_external_storage( + None, meta.length, name, path.temp.clone(), @@ -1251,6 +1263,7 @@ mod tests { let path = importer.dir.get_import_path(file_name).unwrap(); importer .download_file_from_external_storage( + None, meta.get_length(), file_name, path.temp.clone(), @@ -1285,6 +1298,7 @@ mod tests { let path = importer.dir.get_import_path(kv_meta.get_name()).unwrap(); importer .download_file_from_external_storage( + None, kv_meta.get_length(), kv_meta.get_name(), path.temp.clone(), From 8c93b91fea7396b9261551297d1bfa037c82e605 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Tue, 13 Sep 2022 15:31:00 +0800 Subject: [PATCH 0206/1149] server: support customized addr/status_addr (#13234) ref tikv/tikv#12849 Support self-defined addr/status_addr Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/server/src/server.rs | 2 +- components/test_raftstore/src/server.rs | 2 +- src/server/node.rs | 53 +++++++++++++++++-------- src/server/status_server/mod.rs | 9 +++-- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ba4c515557e..8b49becc8e3 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -728,7 +728,7 @@ where storage_read_pools.handle() }; - let storage = create_raft_storage::<_, _, _, F>( + let storage = create_raft_storage::<_, _, _, F, _>( engines.engine.clone(), &self.config.storage, storage_read_pool_handle, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f1626b9f2c9..72282f02dc0 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -405,7 +405,7 @@ impl ServerCluster { cfg.quota.max_delay_duration, cfg.quota.enable_auto_tune, )); - let store = create_raft_storage::<_, _, _, F>( + let store = create_raft_storage::<_, _, _, F, _>( engine, &cfg.storage, storage_read_pool.handle(), diff --git a/src/server/node.rs b/src/server/node.rs index d8bee9abfd7..0916ebc8b9c 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -35,9 +35,9 @@ use super::{RaftKv, Result}; use crate::{ import::SstImporter, read_pool::ReadPoolHandle, - server::{lock_manager::LockManager, Config as ServerConfig}, + server::Config as ServerConfig, storage::{ - config::Config as StorageConfig, kv::FlowStatsReporter, + config::Config as StorageConfig, kv::FlowStatsReporter, lock_manager, txn::flow_controller::FlowController, DynamicConfigs as StorageDynamicConfigs, Storage, }, }; @@ -47,11 +47,17 @@ const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs( /// Creates a new storage engine which is backed by the Raft consensus /// protocol. -pub fn create_raft_storage( +pub fn create_raft_storage< + S, + EK, + R: FlowStatsReporter, + F: KvFormat, + LM: lock_manager::LockManager, +>( engine: RaftKv, cfg: &StorageConfig, read_pool: ReadPoolHandle, - lock_mgr: LockManager, + lock_mgr: LM, concurrency_manager: ConcurrencyManager, dynamic_configs: StorageDynamicConfigs, flow_controller: Arc, @@ -59,7 +65,7 @@ pub fn create_raft_storage( resource_tag_factory: ResourceTagFactory, quota_limiter: Arc, feature_gate: FeatureGate, -) -> Result, LockManager, F>> +) -> Result, LM, F>> where S: RaftStoreRouter + LocalReadRouter + 'static, EK: KvEngine, @@ -119,19 +125,27 @@ where Some(s) => s, }; store.set_id(INVALID_ID); - if cfg.advertise_addr.is_empty() { - store.set_address(cfg.addr.clone()); - store.set_peer_address(cfg.addr.clone()); - } else { - store.set_address(cfg.advertise_addr.clone()); - store.set_peer_address(cfg.advertise_addr.clone()); + if store.get_address().is_empty() { + if cfg.advertise_addr.is_empty() { + store.set_address(cfg.addr.clone()); + if store.get_peer_address().is_empty() { + store.set_peer_address(cfg.addr.clone()); + } + } else { + store.set_address(cfg.advertise_addr.clone()); + if store.get_peer_address().is_empty() { + store.set_peer_address(cfg.advertise_addr.clone()); + } + } } - if cfg.advertise_status_addr.is_empty() { - store.set_status_address(cfg.status_addr.clone()); - } else { - store.set_status_address(cfg.advertise_status_addr.clone()) + if store.get_status_address().is_empty() { + if cfg.advertise_status_addr.is_empty() { + store.set_status_address(cfg.status_addr.clone()); + } else { + store.set_status_address(cfg.advertise_status_addr.clone()) + } } - if store.get_version() == "" { + if store.get_version().is_empty() { store.set_version(env!("CARGO_PKG_VERSION").to_string()); } @@ -142,7 +156,7 @@ where }; store.set_start_timestamp(chrono::Local::now().timestamp()); - if store.get_git_hash() == "" { + if store.get_git_hash().is_empty() { store.set_git_hash( option_env!("TIKV_BUILD_GIT_HASH") .unwrap_or("Unknown git hash") @@ -251,6 +265,11 @@ where self.store.get_id() } + /// Gets a copy of Store which is registered to Pd. + pub fn store(&self) -> metapb::Store { + self.store.clone() + } + /// Gets the Scheduler of RaftstoreConfigTask, it must be called after /// start. pub fn refresh_config_scheduler(&mut self) -> Scheduler { diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 7c001baec1e..78302550fd5 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -1,5 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +/// Provides profilers for TiKV. mod profile; use std::{ error::Error as StdError, @@ -39,6 +40,10 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; +pub use profile::{ + activate_heap_profile, deactivate_heap_profile, jeprof_heap_profile, list_heap_profiles, + read_file, start_one_cpu_profile, start_one_heap_profile, +}; use prometheus::TEXT_FORMAT; use raftstore::store::{transport::CasualRouter, CasualMessage}; use regex::Regex; @@ -56,10 +61,6 @@ use tokio::{ }; use tokio_openssl::SslStream; -use self::profile::{ - activate_heap_profile, deactivate_heap_profile, jeprof_heap_profile, list_heap_profiles, - read_file, start_one_cpu_profile, start_one_heap_profile, -}; use crate::{ config::{ConfigController, LogLevel}, server::Result, From 2563311bca1083a803e6b360df03e91dc129e0fa Mon Sep 17 00:00:00 2001 From: haojinming Date: Wed, 14 Sep 2022 20:02:59 +0800 Subject: [PATCH 0207/1149] test: Separate TestPdClient from test_raftstore component (#13453) close tikv/tikv#13452 Signed-off-by: haojinming --- Cargo.lock | 27 +++++++- Cargo.toml | 1 + components/causal_ts/Cargo.toml | 2 +- components/causal_ts/benches/tso.rs | 2 +- components/causal_ts/src/observer.rs | 2 +- components/causal_ts/src/tso.rs | 2 +- components/cdc/Cargo.toml | 1 + components/cdc/src/endpoint.rs | 3 +- components/test_pd_client/Cargo.toml | 23 +++++++ components/test_pd_client/src/lib.rs | 8 +++ .../src/pd.rs | 64 ++++++++++++++++++- components/test_raftstore/Cargo.toml | 1 + components/test_raftstore/src/cluster.rs | 1 + components/test_raftstore/src/lib.rs | 4 +- components/test_raftstore/src/node.rs | 1 + components/test_raftstore/src/server.rs | 1 + components/test_raftstore/src/util.rs | 60 +---------------- tests/Cargo.toml | 1 + tests/failpoints/cases/test_bootstrap.rs | 1 + .../cases/test_replica_stale_read.rs | 1 + tests/failpoints/cases/test_sst_recovery.rs | 1 + .../config/dynamic/pessimistic_txn.rs | 2 +- .../integrations/config/dynamic/raftstore.rs | 2 +- .../integrations/raftstore/test_bootstrap.rs | 1 + .../raftstore/test_conf_change.rs | 1 + 25 files changed, 142 insertions(+), 71 deletions(-) create mode 100644 components/test_pd_client/Cargo.toml create mode 100644 components/test_pd_client/src/lib.rs rename components/{test_raftstore => test_pd_client}/src/pd.rs (96%) diff --git a/Cargo.lock b/Cargo.lock index 3f64d59eed9..3a1ad699087 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -761,7 +761,7 @@ dependencies = [ "serde_derive", "slog", "slog-global", - "test_raftstore", + "test_pd_client", "thiserror", "tikv_alloc", "tikv_util", @@ -813,6 +813,7 @@ dependencies = [ "slog", "slog-global", "tempfile", + "test_pd_client", "test_raftstore", "test_util", "thiserror", @@ -5697,6 +5698,28 @@ dependencies = [ "tikv_util", ] +[[package]] +name = "test_pd_client" +version = "0.0.1" +dependencies = [ + "collections", + "fail", + "futures 0.3.15", + "grpcio", + "keys", + "kvproto", + "log_wrappers", + "pd_client", + "raft", + "raftstore", + "slog", + "slog-global", + "tikv_util", + "tokio", + "tokio-timer", + "txn_types", +] + [[package]] name = "test_raftstore" version = "0.0.1" @@ -5733,6 +5756,7 @@ dependencies = [ "slog", "slog-global", "tempfile", + "test_pd_client", "test_util", "tikv", "tikv_util", @@ -5848,6 +5872,7 @@ dependencies = [ "test_backup", "test_coprocessor", "test_pd", + "test_pd_client", "test_raftstore", "test_sst_importer", "test_storage", diff --git a/Cargo.toml b/Cargo.toml index 1b622f0d61b..2ce23dddd3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -265,6 +265,7 @@ members = [ "components/test_coprocessor", "components/test_coprocessor_plugin/example_plugin", "components/test_pd", + "components/test_pd_client", "components/test_raftstore", "components/test_sst_importer", "components/test_storage", diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index 7505a043a69..335cd2528b6 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -35,7 +35,7 @@ txn_types = { path = "../txn_types", default-features = false } [dev-dependencies] criterion = "0.3" -test_raftstore = { path = "../test_raftstore" } +test_pd_client = { path = "../test_pd_client" } [[bench]] name = "tso" diff --git a/components/causal_ts/benches/tso.rs b/components/causal_ts/benches/tso.rs index 86d7ed9b9ea..66d950a52b5 100644 --- a/components/causal_ts/benches/tso.rs +++ b/components/causal_ts/benches/tso.rs @@ -5,7 +5,7 @@ use std::{sync::Arc, time::Duration}; use causal_ts::{BatchTsoProvider, CausalTsProvider, TsoBatchList}; use criterion::*; use futures::executor::block_on; -use test_raftstore::TestPdClient; +use test_pd_client::TestPdClient; use txn_types::TimeStamp; fn bench_batch_tso_list_pop(c: &mut Criterion) { diff --git a/components/causal_ts/src/observer.rs b/components/causal_ts/src/observer.rs index f648d8cba08..c07624e2781 100644 --- a/components/causal_ts/src/observer.rs +++ b/components/causal_ts/src/observer.rs @@ -166,7 +166,7 @@ pub mod tests { metapb::Region, raft_cmdpb::{RaftCmdRequest, Request as RaftRequest}, }; - use test_raftstore::TestPdClient; + use test_pd_client::TestPdClient; use txn_types::{Key, TimeStamp}; use super::*; diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 3bb0034af8f..86efd73198a 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -633,7 +633,7 @@ impl CausalTsProvider for SimpleTsoProvider { #[cfg(test)] pub mod tests { - use test_raftstore::TestPdClient; + use test_pd_client::TestPdClient; use super::*; diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 255ef552c73..dbefc7df82c 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -70,6 +70,7 @@ engine_rocks = { path = "../engine_rocks", default-features = false } engine_traits = { path = "../engine_traits", default-features = false } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tempfile = "3.0" +test_pd_client = { path = "../test_pd_client" } test_raftstore = { path = "../test_raftstore", default-features = false } test_util = { path = "../test_util", default-features = false } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 2e0253b23a9..10251f2a257 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1433,7 +1433,8 @@ mod tests { errors::{DiscardReason, Error as RaftStoreError}, store::{msg::CasualMessage, PeerMsg, ReadDelegate}, }; - use test_raftstore::{MockRaftStoreRouter, TestPdClient}; + use test_pd_client::TestPdClient; + use test_raftstore::MockRaftStoreRouter; use tikv::{ server::DEFAULT_CLUSTER_ID, storage::{kv::Engine, TestEngineBuilder}, diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml new file mode 100644 index 00000000000..909da59d2ae --- /dev/null +++ b/components/test_pd_client/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "test_pd_client" +version = "0.0.1" +edition = "2018" +publish = false + +[dependencies] +collections = { path = "../collections" } +fail = "0.5" +futures = "0.3" +grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +keys = { path = "../keys", default-features = false } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } +log_wrappers = { path = "../log_wrappers" } +pd_client = { path = "../pd_client", default-features = false } +raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raftstore = { path = "../raftstore", default-features = false, features = ["testexport"] } +slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +tikv_util = { path = "../tikv_util", default-features = false } +tokio = { version = "1.5", features = ["rt-multi-thread"] } +tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +txn_types = { path = "../txn_types", default-features = false } diff --git a/components/test_pd_client/src/lib.rs b/components/test_pd_client/src/lib.rs new file mode 100644 index 00000000000..9ea837e335e --- /dev/null +++ b/components/test_pd_client/src/lib.rs @@ -0,0 +1,8 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +#[macro_use] +extern crate tikv_util; + +mod pd; + +pub use crate::pd::*; diff --git a/components/test_raftstore/src/pd.rs b/components/test_pd_client/src/pd.rs similarity index 96% rename from components/test_raftstore/src/pd.rs rename to components/test_pd_client/src/pd.rs index 75ea189c312..69cd1a30d03 100644 --- a/components/test_raftstore/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -26,7 +26,10 @@ use futures::{ use keys::{self, data_key, enc_end_key, enc_start_key}; use kvproto::{ metapb::{self, PeerRole}, - pdpb, + pdpb::{ + self, ChangePeer, ChangePeerV2, CheckPolicy, Merge, RegionHeartbeatResponse, SplitRegion, + TransferLeader, + }, replication_modepb::{ DrAutoSyncState, RegionReplicationStatus, ReplicationMode, ReplicationStatus, StoreDrAutoSyncStatus, @@ -37,7 +40,7 @@ use pd_client::{ }; use raft::eraftpb::ConfChangeType; use raftstore::store::{ - util::{check_key_in_region, find_peer, is_learner}, + util::{check_key_in_region, find_peer, is_learner, new_peer}, QueryStats, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, }; use tikv_util::{ @@ -134,6 +137,10 @@ enum Operator { }, } +pub fn sleep_ms(ms: u64) { + std::thread::sleep(Duration::from_millis(ms)); +} + fn change_peer(change_type: ConfChangeType, peer: metapb::Peer) -> pdpb::ChangePeer { let mut cp = pdpb::ChangePeer::default(); cp.set_change_type(change_type); @@ -141,6 +148,59 @@ fn change_peer(change_type: ConfChangeType, peer: metapb::Peer) -> pdpb::ChangeP cp } +pub fn new_pd_change_peer( + change_type: ConfChangeType, + peer: metapb::Peer, +) -> RegionHeartbeatResponse { + let mut change_peer = ChangePeer::default(); + change_peer.set_change_type(change_type); + change_peer.set_peer(peer); + + let mut resp = RegionHeartbeatResponse::default(); + resp.set_change_peer(change_peer); + resp +} + +pub fn new_pd_change_peer_v2(changes: Vec) -> RegionHeartbeatResponse { + let mut change_peer = ChangePeerV2::default(); + change_peer.set_changes(changes.into()); + + let mut resp = RegionHeartbeatResponse::default(); + resp.set_change_peer_v2(change_peer); + resp +} + +pub fn new_split_region(policy: CheckPolicy, keys: Vec>) -> RegionHeartbeatResponse { + let mut split_region = SplitRegion::default(); + split_region.set_policy(policy); + split_region.set_keys(keys.into()); + let mut resp = RegionHeartbeatResponse::default(); + resp.set_split_region(split_region); + resp +} + +pub fn new_pd_transfer_leader( + peer: metapb::Peer, + peers: Vec, +) -> RegionHeartbeatResponse { + let mut transfer_leader = TransferLeader::default(); + transfer_leader.set_peer(peer); + transfer_leader.set_peers(peers.into()); + + let mut resp = RegionHeartbeatResponse::default(); + resp.set_transfer_leader(transfer_leader); + resp +} + +pub fn new_pd_merge_region(target_region: metapb::Region) -> RegionHeartbeatResponse { + let mut merge = Merge::default(); + merge.set_target(target_region); + + let mut resp = RegionHeartbeatResponse::default(); + resp.set_merge(merge); + resp +} + impl Operator { fn make_region_heartbeat_response( &self, diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index cd9df2e3c05..8c19c78a0f6 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -56,6 +56,7 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tempfile = "3.0" +test_pd_client = { path = "../test_pd_client" } test_util = { path = "../test_util", default-features = false } tikv = { path = "../../", default-features = false } tikv_util = { path = "../tikv_util", default-features = false } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 9b5aa1a6646..9a69c7110b4 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -46,6 +46,7 @@ use raftstore::{ Error, Result, }; use tempfile::TempDir; +use test_pd_client::TestPdClient; use tikv::server::Result as ServerResult; use tikv_util::{ thread_group::GroupProperties, diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 82695be12ba..8893d8a7ca4 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -8,13 +8,11 @@ extern crate tikv_util; mod cluster; mod config; mod node; -mod pd; mod router; mod server; mod transport_simulate; mod util; pub use crate::{ - cluster::*, config::Config, node::*, pd::*, router::*, server::*, transport_simulate::*, - util::*, + cluster::*, config::Config, node::*, router::*, server::*, transport_simulate::*, util::*, }; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 11a5dda87bd..1616504c820 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -32,6 +32,7 @@ use raftstore::{ }; use resource_metering::CollectorRegHandle; use tempfile::TempDir; +use test_pd_client::TestPdClient; use tikv::{ config::{ConfigController, Module}, import::SstImporter, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 72282f02dc0..6895915d466 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -45,6 +45,7 @@ use raftstore::{ use resource_metering::{CollectorRegHandle, ResourceTagFactory}; use security::SecurityManager; use tempfile::TempDir; +use test_pd_client::TestPdClient; use tikv::{ config::ConfigController, coprocessor, coprocessor_v2, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 882095c5a7d..117ca6d44df 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -26,10 +26,6 @@ use kvproto::{ encryptionpb::EncryptionMethod, kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, metapb::{self, RegionEpoch}, - pdpb::{ - ChangePeer, ChangePeerV2, CheckPolicy, Merge, RegionHeartbeatResponse, SplitRegion, - TransferLeader, - }, raft_cmdpb::{ AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, CmdType, RaftCmdRequest, RaftCmdResponse, Request, StatusCmdType, StatusRequest, @@ -50,11 +46,12 @@ use raftstore::{ use rand::RngCore; use server::server::ConfiguredRaftEngine; use tempfile::TempDir; +use test_pd_client::TestPdClient; use tikv::{config::*, server::KvEngineFactoryBuilder, storage::point_key_range}; use tikv_util::{config::*, escape, time::ThreadReadId, worker::LazyWorker, HandyRwLock}; use txn_types::Key; -use crate::{Cluster, Config, ServerCluster, Simulator, TestPdClient}; +use crate::{Cluster, Config, ServerCluster, Simulator}; pub fn must_get(engine: &RocksEngine, cf: &str, key: &[u8], value: Option<&[u8]>) { for _ in 1..300 { @@ -334,59 +331,6 @@ pub fn is_error_response(resp: &RaftCmdResponse) -> bool { resp.get_header().has_error() } -pub fn new_pd_change_peer( - change_type: ConfChangeType, - peer: metapb::Peer, -) -> RegionHeartbeatResponse { - let mut change_peer = ChangePeer::default(); - change_peer.set_change_type(change_type); - change_peer.set_peer(peer); - - let mut resp = RegionHeartbeatResponse::default(); - resp.set_change_peer(change_peer); - resp -} - -pub fn new_pd_change_peer_v2(changes: Vec) -> RegionHeartbeatResponse { - let mut change_peer = ChangePeerV2::default(); - change_peer.set_changes(changes.into()); - - let mut resp = RegionHeartbeatResponse::default(); - resp.set_change_peer_v2(change_peer); - resp -} - -pub fn new_split_region(policy: CheckPolicy, keys: Vec>) -> RegionHeartbeatResponse { - let mut split_region = SplitRegion::default(); - split_region.set_policy(policy); - split_region.set_keys(keys.into()); - let mut resp = RegionHeartbeatResponse::default(); - resp.set_split_region(split_region); - resp -} - -pub fn new_pd_transfer_leader( - peer: metapb::Peer, - peers: Vec, -) -> RegionHeartbeatResponse { - let mut transfer_leader = TransferLeader::default(); - transfer_leader.set_peer(peer); - transfer_leader.set_peers(peers.into()); - - let mut resp = RegionHeartbeatResponse::default(); - resp.set_transfer_leader(transfer_leader); - resp -} - -pub fn new_pd_merge_region(target_region: metapb::Region) -> RegionHeartbeatResponse { - let mut merge = Merge::default(); - merge.set_target(target_region); - - let mut resp = RegionHeartbeatResponse::default(); - resp.set_merge(merge); - resp -} - #[derive(Default)] struct CallbackLeakDetector { called: bool, diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 14bf818aaf0..2cc30338f83 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -144,6 +144,7 @@ sst_importer = { path = "../components/sst_importer", default-features = false } test_backup = { path = "../components/test_backup", default-features = false } test_coprocessor = { path = "../components/test_coprocessor", default-features = false } test_pd = { path = "../components/test_pd", default-features = false } +test_pd_client = { path = "../components/test_pd_client" } test_raftstore = { path = "../components/test_raftstore", default-features = false } test_sst_importer = { path = "../components/test_sst_importer", default-features = false } test_storage = { path = "../components/test_storage", default-features = false } diff --git a/tests/failpoints/cases/test_bootstrap.rs b/tests/failpoints/cases/test_bootstrap.rs index 3923f4e77f2..8dc2eb8b371 100644 --- a/tests/failpoints/cases/test_bootstrap.rs +++ b/tests/failpoints/cases/test_bootstrap.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, RwLock}; use engine_traits::Peekable; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb}; +use test_pd_client::TestPdClient; use test_raftstore::*; fn test_bootstrap_half_way_failure(fp: &str) { diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index a8aaa030bfc..7748ed73b96 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -5,6 +5,7 @@ use std::{sync::Arc, time::Duration}; use kvproto::{kvrpcpb::Op, metapb::Peer}; use pd_client::PdClient; use raft::eraftpb::MessageType; +use test_pd_client::TestPdClient; use test_raftstore::*; fn prepare_for_stale_read(leader: Peer) -> (Cluster, Arc, PeerClient) { diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index f5dadc4205a..a4c1f10b5ae 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -5,6 +5,7 @@ use std::{fmt::Debug, io::Write, path::Path, sync::Arc, time::Duration}; use engine_rocks::RocksEngine; use engine_rocks_helper::sst_recovery::*; use engine_traits::{CompactExt, Peekable, CF_DEFAULT}; +use test_pd_client::TestPdClient; use test_raftstore::*; const CHECK_DURATION: Duration = Duration::from_millis(50); diff --git a/tests/integrations/config/dynamic/pessimistic_txn.rs b/tests/integrations/config/dynamic/pessimistic_txn.rs index 49bedd38c73..caad8a64f9b 100644 --- a/tests/integrations/config/dynamic/pessimistic_txn.rs +++ b/tests/integrations/config/dynamic/pessimistic_txn.rs @@ -6,7 +6,7 @@ use std::{ }; use security::SecurityManager; -use test_raftstore::TestPdClient; +use test_pd_client::TestPdClient; use tikv::{ config::*, server::{ diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 35d5fe23e49..55cf75d2b75 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -21,7 +21,7 @@ use raftstore::{ }; use resource_metering::CollectorRegHandle; use tempfile::TempDir; -use test_raftstore::TestPdClient; +use test_pd_client::TestPdClient; use tikv::{ config::{ConfigController, Module, TikvConfig}, import::SstImporter, diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 92e4422c57f..cc5b6ca1ee0 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -13,6 +13,7 @@ use raftstore::{ }; use resource_metering::CollectorRegHandle; use tempfile::Builder; +use test_pd_client::{bootstrap_with_first_region, TestPdClient}; use test_raftstore::*; use tikv::{import::SstImporter, server::Node}; use tikv_util::{ diff --git a/tests/integrations/raftstore/test_conf_change.rs b/tests/integrations/raftstore/test_conf_change.rs index b37b207ac11..9f888b828be 100644 --- a/tests/integrations/raftstore/test_conf_change.rs +++ b/tests/integrations/raftstore/test_conf_change.rs @@ -19,6 +19,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::{store::util::is_learner, Result}; +use test_pd_client::TestPdClient; use test_raftstore::*; use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; From b6b96382b99c4c0e995e31cf227f1a6c747c577a Mon Sep 17 00:00:00 2001 From: hehechen Date: Wed, 14 Sep 2022 20:44:59 +0800 Subject: [PATCH 0208/1149] resolved_ts: track ingest sst (#13454) ref tikv/tikv#13353 Signed-off-by: hehechen Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/file_system/src/rate_limiter.rs | 4 +- components/raftstore/src/store/util.rs | 8 ++ components/resolved_ts/Cargo.toml | 1 + components/resolved_ts/src/cmd.rs | 42 ++++++++--- components/resolved_ts/src/endpoint.rs | 4 + .../resolved_ts/tests/integrations/mod.rs | 31 +++++++- components/resolved_ts/tests/mod.rs | 75 ++++++++++++++++++- 8 files changed, 151 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a1ad699087..5f1ced440c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4547,6 +4547,7 @@ dependencies = [ "slog-global", "tempfile", "test_raftstore", + "test_sst_importer", "test_util", "thiserror", "tikv", diff --git a/components/file_system/src/rate_limiter.rs b/components/file_system/src/rate_limiter.rs index f3ec05a4314..feffb6dcf14 100644 --- a/components/file_system/src/rate_limiter.rs +++ b/components/file_system/src/rate_limiter.rs @@ -569,8 +569,8 @@ mod tests { macro_rules! approximate_eq { ($left:expr, $right:expr) => { - assert!(($left) >= ($right) * 0.85); - assert!(($right) >= ($left) * 0.85); + assert!(($left) >= ($right) * 0.75); + assert!(($right) >= ($left) * 0.75); }; } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 922ba70a2c8..3d566d41416 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -931,6 +931,14 @@ impl RegionReadProgressRegistry { .map(|rp| rp.safe_ts()) } + pub fn get_tracked_index(&self, region_id: &u64) -> Option { + self.registry + .lock() + .unwrap() + .get(region_id) + .map(|rp| rp.core.lock().unwrap().applied_index) + } + // Update `safe_ts` with the provided `LeaderInfo` and return the regions that // have the same `LeaderInfo` pub fn handle_check_leaders( diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index e781fbc1f75..6309440202b 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -55,6 +55,7 @@ panic_hook = { path = "../panic_hook" } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tempfile = "3.0" test_raftstore = { path = "../test_raftstore", default-features = false } +test_sst_importer = { path = "../test_sst_importer" } test_util = { path = "../test_util", default-features = false } tikv_kv = { path = "../tikv_kv" } diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 0bb22e0a21e..8d3eb3bb48d 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -33,6 +33,7 @@ pub enum ChangeRow { commit_ts: TimeStamp, value: Option, }, + IngestSsT, } #[allow(clippy::large_enum_variant)] @@ -58,8 +59,11 @@ impl ChangeLog { let flags = WriteBatchFlags::from_bits_truncate(request.get_header().get_flags()); let is_one_pc = flags.contains(WriteBatchFlags::ONE_PC); - let changes = group_row_changes(request.requests.into()); - let rows = Self::encode_rows(changes, is_one_pc); + let (changes, has_ingest_sst) = group_row_changes(request.requests.into()); + let mut rows = Self::encode_rows(changes, is_one_pc); + if has_ingest_sst { + rows.push(ChangeRow::IngestSsT); + } ChangeLog::Rows { index, rows } } else { ChangeLog::Admin(request.take_admin_request().get_cmd_type()) @@ -190,13 +194,17 @@ struct RowChange { default: Option, } -fn group_row_changes(requests: Vec) -> HashMap { +fn group_row_changes(requests: Vec) -> (HashMap, bool) { let mut changes: HashMap = HashMap::default(); // The changes about default cf was recorded here and need to be matched with a // `write` or a `lock`. let mut unmatched_default = HashMap::default(); + let mut has_ingest_sst = false; for mut req in requests { match req.get_cmd_type() { + CmdType::IngestSst => { + has_ingest_sst = true; + } CmdType::Put => { let mut put = req.take_put(); let key = Key::from_encoded(put.take_key()); @@ -253,7 +261,7 @@ fn group_row_changes(requests: Vec) -> HashMap { row.default = Some(default); } } - changes + (changes, has_ingest_sst) } /// Filter non-lock related data (i.e `default_cf` data), the implement is @@ -274,7 +282,7 @@ pub fn lock_only_filter(mut cmd_batch: CmdBatch) -> Option { CmdType::Delete => req.get_delete().cf.as_str(), _ => "", }; - cf == CF_LOCK || cf == CF_WRITE + cf == CF_LOCK || cf == CF_WRITE || req.get_cmd_type() == CmdType::IngestSst }); cmd.request.set_requests(requests.into()); } @@ -286,7 +294,10 @@ pub fn lock_only_filter(mut cmd_batch: CmdBatch) -> Option { #[cfg(test)] mod tests { use concurrency_manager::ConcurrencyManager; - use kvproto::kvrpcpb::{AssertionLevel, PrewriteRequestPessimisticAction::*}; + use kvproto::{ + kvrpcpb::{AssertionLevel, PrewriteRequestPessimisticAction::*}, + raft_cmdpb::{CmdType, Request}, + }; use tikv::storage::{ kv::{MockEngineBuilder, TestEngineBuilder}, lock_manager::DummyLockManager, @@ -307,8 +318,13 @@ mod tests { let rocks_engine = TestEngineBuilder::new().build().unwrap(); let engine = MockEngineBuilder::from_rocks_engine(rocks_engine).build(); - let reqs = vec![Modify::Put("default", Key::from_raw(b"k1"), b"v1".to_vec()).into()]; - assert!(ChangeLog::encode_rows(group_row_changes(reqs), false).is_empty()); + let mut reqs = vec![Modify::Put("default", Key::from_raw(b"k1"), b"v1".to_vec()).into()]; + let mut req = Request::default(); + req.set_cmd_type(CmdType::IngestSst); + reqs.push(req); + let (changes, has_ingest_sst) = group_row_changes(reqs); + assert_eq!(has_ingest_sst, true); + assert!(ChangeLog::encode_rows(changes, false).is_empty()); must_prewrite_put(&engine, b"k1", b"v1", b"k1", 1); must_commit(&engine, b"k1", 1, 2); @@ -327,8 +343,10 @@ mod tests { .take_last_modifies() .into_iter() .flat_map(|m| { - let reqs = m.into_iter().map(Into::into).collect(); - ChangeLog::encode_rows(group_row_changes(reqs), false) + let reqs: Vec = m.into_iter().map(Into::into).collect(); + let (changes, has_ingest_sst) = group_row_changes(reqs); + assert_eq!(has_ingest_sst, false); + ChangeLog::encode_rows(changes, false) }) .collect(); @@ -415,7 +433,9 @@ mod tests { .into_iter() .flat_map(|m| { let reqs = m.into_iter().map(Into::into).collect(); - ChangeLog::encode_rows(group_row_changes(reqs), true) + let (changes, has_ingest_sst) = group_row_changes(reqs); + assert_eq!(has_ingest_sst, false); + ChangeLog::encode_rows(changes, true) }) .last() .unwrap(); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index f2920e2af69..a4e5f6e3864 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -134,6 +134,7 @@ impl ObserveRegion { }), // One pc command do not contains any lock, so just skip it ChangeRow::OnePc { .. } => {} + ChangeRow::IngestSsT => {} }); assert!( *tracked_index < *index, @@ -191,6 +192,9 @@ impl ObserveRegion { .untrack_lock(&key.to_raw().unwrap(), Some(*index)), // One pc command do not contains any lock, so just skip it ChangeRow::OnePc { .. } => {} + ChangeRow::IngestSsT => { + self.resolver.update_tracked_index(*index); + } }); } } diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index 7916d03d8d2..a8acab00625 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -5,9 +5,11 @@ mod testsuite; use std::time::Duration; use futures::executor::block_on; -use kvproto::kvrpcpb::*; +use kvproto::{kvrpcpb::*, metapb::RegionEpoch}; use pd_client::PdClient; +use tempfile::Builder; use test_raftstore::sleep_ms; +use test_sst_importer::*; pub use testsuite::*; #[test] @@ -52,6 +54,33 @@ fn test_resolved_ts_basic() { let current_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); suite.must_get_rts_ge(r1.id, current_ts); + // ingest sst + let temp_dir = Builder::new().prefix("test_resolved_ts").tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + let sst_range = (0, 100); + + let mut sst_epoch = RegionEpoch::default(); + sst_epoch.set_conf_ver(1); + sst_epoch.set_version(4); + + let (mut meta, data) = gen_sst_file(&sst_path, sst_range); + meta.set_region_id(r1.id); + meta.set_region_epoch(sst_epoch); + + suite.upload_sst(r1.id, &meta, &data).unwrap(); + + let tracked_index_before = suite.region_tracked_index(r1.id); + suite.must_ingest_sst(r1.id, meta); + let mut tracked_index_after = suite.region_tracked_index(r1.id); + for _ in 0..10 { + if tracked_index_after > tracked_index_before { + break; + } + tracked_index_after = suite.region_tracked_index(r1.id); + sleep_ms(200) + } + assert!(tracked_index_after > tracked_index_before); + suite.stop(); } diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 0e6d8bbc9f8..812f9057e6b 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -5,8 +5,11 @@ use std::{sync::*, time::Duration}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::{RocksEngine, RocksSnapshot}; -use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment}; +use futures::{executor::block_on, stream, SinkExt}; +use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment, Result, WriteFlags}; use kvproto::{ + import_sstpb::{IngestRequest, SstMeta, UploadRequest, UploadResponse}, + import_sstpb_grpc::ImportSstClient, kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, tikvpb::TikvClient, }; @@ -28,6 +31,7 @@ pub struct TestSuite { pub endpoints: HashMap>>, pub obs: HashMap>, tikv_cli: HashMap, + import_cli: HashMap, concurrency_managers: HashMap, env: Arc, @@ -97,6 +101,7 @@ impl TestSuite { concurrency_managers, env: Arc::new(Environment::new(1)), tikv_cli: HashMap::default(), + import_cli: HashMap::default(), } } @@ -323,6 +328,19 @@ impl TestSuite { }) } + pub fn get_import_client(&mut self, region_id: u64) -> &ImportSstClient { + let leader = self.cluster.leader_of_region(region_id).unwrap(); + let store_id = leader.get_store_id(); + let addr = self.cluster.sim.rl().get_addr(store_id); + let env = self.env.clone(); + self.import_cli + .entry(leader.get_store_id()) + .or_insert_with(|| { + let channel = ChannelBuilder::new(env).connect(&addr); + ImportSstClient::new(channel) + }) + } + pub fn get_txn_concurrency_manager(&self, store_id: u64) -> Option { self.concurrency_managers.get(&store_id).cloned() } @@ -342,6 +360,20 @@ impl TestSuite { ) } + pub fn region_tracked_index(&mut self, region_id: u64) -> u64 { + for _ in 0..50 { + if let Some(leader) = self.cluster.leader_of_region(region_id) { + let meta = self.cluster.store_metas[&leader.store_id].lock().unwrap(); + if let Some(tracked_index) = meta.region_read_progress.get_tracked_index(®ion_id) + { + return tracked_index; + } + } + sleep_ms(100) + } + panic!("fail to get region tracked index after 50 trys"); + } + pub fn must_get_rts(&mut self, region_id: u64, rts: TimeStamp) { for _ in 0..50 { if let Some(ts) = self.region_resolved_ts(region_id) { @@ -365,4 +397,45 @@ impl TestSuite { } panic!("fail to get greater ts after 50 trys"); } + + pub fn upload_sst( + &mut self, + region_id: u64, + meta: &SstMeta, + data: &[u8], + ) -> Result { + let import = self.get_import_client(region_id); + let mut r1 = UploadRequest::default(); + r1.set_meta(meta.clone()); + let mut r2 = UploadRequest::default(); + r2.set_data(data.to_vec()); + let reqs: Vec<_> = vec![r1, r2] + .into_iter() + .map(|r| Result::Ok((r, WriteFlags::default()))) + .collect(); + let (mut tx, rx) = import.upload().unwrap(); + let mut stream = stream::iter(reqs); + block_on(async move { + tx.send_all(&mut stream).await?; + tx.close().await?; + rx.await + }) + } + + pub fn must_ingest_sst(&mut self, region_id: u64, meta: SstMeta) { + let mut ingest_request = IngestRequest::default(); + ingest_request.set_context(self.get_context(region_id)); + ingest_request.set_sst(meta); + + let ingest_sst_resp = self + .get_import_client(region_id) + .ingest(&ingest_request) + .unwrap(); + + assert!( + !ingest_sst_resp.has_error(), + "{:?}", + ingest_sst_resp.get_error() + ); + } } From 592d423d76f0762e75179285119c1965c9cd4b76 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Thu, 15 Sep 2022 10:58:59 +0800 Subject: [PATCH 0209/1149] raftstore: Implement coprocessor observer pre_persist (#12957) ref tikv/tikv#12849 Support coprocessor observer pre_commit Signed-off-by: CalvinNeo --- .../raftstore/src/coprocessor/dispatcher.rs | 20 +++++++ components/raftstore/src/coprocessor/mod.rs | 11 ++++ components/raftstore/src/store/fsm/apply.rs | 58 +++++++++++++++++-- components/test_raftstore/src/node.rs | 1 + components/test_raftstore/src/server.rs | 1 + 5 files changed, 85 insertions(+), 6 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 3cddc21e8cb..df7794c3701 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -635,6 +635,26 @@ impl CoprocessorHost { ); } + /// `pre_persist` is called we we want to persist data or meta for a region. + /// For example, in `finish_for` and `commit`, + /// we will separately call `pre_persist` with is_finished = true/false. + /// By returning false, we reject this persistence. + pub fn pre_persist( + &self, + region: &Region, + is_finished: bool, + cmd: Option<&RaftCmdRequest>, + ) -> bool { + let mut ctx = ObserverContext::new(region); + for observer in &self.registry.region_change_observers { + let observer = observer.observer.inner(); + if !observer.pre_persist(&mut ctx, is_finished, cmd) { + return false; + } + } + true + } + pub fn on_flush_applied_cmd_batch( &self, max_level: ObserveLevel, diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 8a309dc4734..35330701a95 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -310,6 +310,17 @@ pub enum RegionChangeEvent { pub trait RegionChangeObserver: Coprocessor { /// Hook to call when a region changed on this TiKV fn on_region_changed(&self, _: &mut ObserverContext<'_>, _: RegionChangeEvent, _: StateRole) {} + + /// Should be called everytime before we write a WriteBatch into + /// KvEngine. Returns false if we can't commit at this time. + fn pre_persist( + &self, + _: &mut ObserverContext<'_>, + _is_finished: bool, + _cmd: Option<&RaftCmdRequest>, + ) -> bool { + true + } } #[derive(Clone, Debug, Default)] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 6d1d1881046..e23ba64eb7b 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -584,10 +584,17 @@ where delegate: &mut ApplyDelegate, results: VecDeque>, ) { - if !delegate.pending_remove { - delegate.write_apply_state(self.kv_wb_mut()); + if self.host.pre_persist(&delegate.region, true, None) { + if !delegate.pending_remove { + delegate.write_apply_state(self.kv_wb_mut()); + } + self.commit_opt(delegate, false); + } else { + debug!("do not persist when finish_for"; + "region" => ?delegate.region, + "tag" => &delegate.tag, + ); } - self.commit_opt(delegate, false); self.apply_res.push(ApplyRes { region_id: delegate.region_id(), apply_state: delegate.apply_state.clone(), @@ -1073,8 +1080,9 @@ where } let mut has_unflushed_data = self.last_flush_applied_index != self.apply_state.get_applied_index(); - if has_unflushed_data && should_write_to_engine(&cmd) - || apply_ctx.kv_wb().should_write_to_engine() + if (has_unflushed_data && should_write_to_engine(&cmd) + || apply_ctx.kv_wb().should_write_to_engine()) + && apply_ctx.host.pre_persist(&self.region, false, Some(&cmd)) { apply_ctx.commit(self); if let Some(start) = self.handle_start.as_ref() { @@ -4972,6 +4980,7 @@ mod tests { cmd_sink: Option>>>, filter_compact_log: Arc, filter_consistency_check: Arc, + skip_persist_when_pre_commit: Arc, delay_remove_ssts: Arc, last_delete_sst_count: Arc, last_pending_delete_sst_count: Arc, @@ -5095,6 +5104,17 @@ mod tests { fn on_applied_current_term(&self, _: raft::StateRole, _: &Region) {} } + impl RegionChangeObserver for ApplyObserver { + fn pre_persist( + &self, + _: &mut ObserverContext<'_>, + _is_finished: bool, + _cmd: Option<&RaftCmdRequest>, + ) -> bool { + !self.skip_persist_when_pre_commit.load(Ordering::SeqCst) + } + } + #[test] fn test_handle_raft_committed_entries() { let (_path, engine) = create_tmp_engine("test-delegate"); @@ -5716,6 +5736,8 @@ mod tests { let obs = ApplyObserver::default(); host.registry .register_admin_observer(1, BoxAdminObserver::new(obs.clone())); + host.registry + .register_region_change_observer(1, BoxRegionChangeObserver::new(obs.clone())); host.registry .register_query_observer(1, BoxQueryObserver::new(obs.clone())); @@ -5751,6 +5773,8 @@ mod tests { reg.region.mut_region_epoch().set_version(3); router.schedule_task(1, Msg::Registration(reg)); + obs.skip_persist_when_pre_commit + .store(true, Ordering::SeqCst); let mut index_id = 1; let put_entry = EntryBuilder::new(index_id, 1) .put(b"k1", b"v1") @@ -5759,7 +5783,19 @@ mod tests { .epoch(1, 3) .build(); router.schedule_task(1, Msg::apply(apply(peer_id, 1, 1, vec![put_entry], vec![]))); - fetch_apply_res(&rx); + let apply_res = fetch_apply_res(&rx); + + // We don't persist at `finish_for`, since we disabled `pre_persist`. + let state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap_or_default(); + assert_eq!( + apply_res.apply_state.get_applied_index(), + state.get_applied_index() + 1 + ); + obs.skip_persist_when_pre_commit + .store(false, Ordering::SeqCst); // Phase 1: we test if pre_exec will filter execution of commands correctly. index_id += 1; @@ -5781,6 +5817,16 @@ mod tests { assert_eq!(apply_res.exec_res.len(), 0); assert_eq!(apply_res.apply_state.get_truncated_state().get_index(), 0); + // We persist at `finish_for`, since we enabled `pre_persist`. + let state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap_or_default(); + assert_eq!( + apply_res.apply_state.get_applied_index(), + state.get_applied_index() + ); + index_id += 1; // Don't filter CompactLog obs.filter_compact_log.store(false, Ordering::SeqCst); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 1616504c820..f604ce7dff7 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -270,6 +270,7 @@ impl Simulator for NodeCluster { .max_total_size(cfg.server.snap_max_total_size.0) .encryption_key_manager(key_manager) .max_per_file_size(cfg.raft_store.max_snapshot_file_raw_size.0) + .enable_multi_snapshot_files(true) .build(tmp.path().to_str().unwrap()); (snap_mgr, Some(tmp)) } else { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 6895915d466..51092007bff 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -455,6 +455,7 @@ impl ServerCluster { .max_total_size(cfg.server.snap_max_total_size.0) .encryption_key_manager(key_manager) .max_per_file_size(cfg.raft_store.max_snapshot_file_raw_size.0) + .enable_multi_snapshot_files(true) .build(tmp_str); self.snap_mgrs.insert(node_id, snap_mgr.clone()); let server_cfg = Arc::new(VersionTrack::new(cfg.server.clone())); From d762bd89a6c25eac1f07f40319f69c9f95db0131 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Thu, 15 Sep 2022 00:54:59 -0400 Subject: [PATCH 0210/1149] copr: implement several mysql time related types json (#13418) close tikv/tikv#13417 Signed-off-by: YangKeao Co-authored-by: Ti Chi Robot --- .../tidb_query_datatype/src/codec/convert.rs | 12 +- .../src/codec/mysql/json/binary.rs | 249 +++++++++++++++- .../src/codec/mysql/json/comparison.rs | 144 +++++++++ .../src/codec/mysql/json/constants.rs | 2 + .../src/codec/mysql/json/jcodec.rs | 4 + .../src/codec/mysql/json/json_type.rs | 7 + .../src/codec/mysql/json/json_unquote.rs | 42 +++ .../src/codec/mysql/json/mod.rs | 98 +++++-- .../src/codec/mysql/json/modifier.rs | 11 +- .../src/codec/mysql/json/serde.rs | 18 ++ components/tidb_query_expr/src/impl_cast.rs | 275 ++++++++++++++++-- 11 files changed, 793 insertions(+), 69 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index efd99f5317a..26ae799c4ff 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -510,14 +510,14 @@ impl<'a> ToInt for JsonRef<'a> { // TiDB: 5 // MySQL: 4 let val = match self.get_type() { - JsonType::Object | JsonType::Array | JsonType::Opaque => Ok(ctx - .handle_truncate_err(Error::truncated_wrong_val("Integer", self.to_string())) - .map(|_| 0)?), JsonType::Literal => Ok(self.get_literal().map_or(0, |x| x as i64)), JsonType::I64 => Ok(self.get_i64()), JsonType::U64 => Ok(self.get_u64() as i64), JsonType::Double => self.get_double().to_int(ctx, tp), JsonType::String => self.get_str_bytes()?.to_int(ctx, tp), + _ => Ok(ctx + .handle_truncate_err(Error::truncated_wrong_val("Integer", self.to_string())) + .map(|_| 0)?), }?; val.to_int(ctx, tp) } @@ -526,14 +526,14 @@ impl<'a> ToInt for JsonRef<'a> { #[inline] fn to_uint(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { let val = match self.get_type() { - JsonType::Object | JsonType::Array | JsonType::Opaque => Ok(ctx - .handle_truncate_err(Error::truncated_wrong_val("Integer", self.to_string())) - .map(|_| 0)?), JsonType::Literal => Ok(self.get_literal().map_or(0, |x| x as u64)), JsonType::I64 => Ok(self.get_i64() as u64), JsonType::U64 => Ok(self.get_u64()), JsonType::Double => self.get_double().to_uint(ctx, tp), JsonType::String => self.get_str_bytes()?.to_uint(ctx, tp), + _ => Ok(ctx + .handle_truncate_err(Error::truncated_wrong_val("Integer", self.to_string())) + .map(|_| 0)?), }?; val.to_uint(ctx, tp) } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs index 9b8264ee3fb..12f8fbd5129 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs @@ -88,6 +88,13 @@ impl<'a> JsonRef<'a> { &self.value()[val_offset..val_offset + opaque_bytes_len as usize + len_len + 1], ) } + JsonType::Date | JsonType::Datetime | JsonType::Timestamp => { + JsonRef::new(val_type, &self.value()[val_offset..val_offset + TIME_LEN]) + } + JsonType::Time => JsonRef::new( + val_type, + &self.value()[val_offset..val_offset + DURATION_LEN], + ), _ => { let data_size = NumberCodec::decode_u32_le(&self.value()[val_offset + ELEMENT_COUNT_LEN..]) @@ -122,7 +129,16 @@ impl<'a> JsonRef<'a> { #[cfg(test)] mod tests { - use super::{super::Json, *}; + use std::collections::BTreeMap; + + use super::*; + use crate::{ + codec::{ + data_type::Duration, + mysql::{Json, Time, TimeType}, + }, + expr::EvalContext, + }; #[test] fn test_type() { @@ -143,4 +159,235 @@ mod tests { assert_eq!(json.as_ref().get_type(), tp, "{:?}", json_str); } } + + #[test] + fn test_array_get_elem() { + let mut ctx = EvalContext::default(); + + let time = Time::parse( + &mut ctx, + "1998-06-13 12:13:14", + TimeType::DateTime, + 0, + false, + ) + .unwrap(); + let duration = Duration::parse(&mut ctx, "12:13:14", 0).unwrap(); + let array = vec![ + Json::from_u64(1).unwrap(), + Json::from_str_val("abcdefg").unwrap(), + ]; + let object = BTreeMap::from([ + ("key1".to_string(), Json::from_u64(1).unwrap()), + ("key2".to_string(), Json::from_str_val("abcdefg").unwrap()), + ]); + + let json_array = Json::from_array(vec![ + Json::from_u64(1).unwrap(), + Json::from_time(time).unwrap(), + Json::from_duration(duration).unwrap(), + Json::from_array(array).unwrap(), + Json::from_str_val("abcdefg").unwrap(), + Json::from_bool(false).unwrap(), + Json::from_object(object).unwrap(), + ]) + .unwrap(); + let json_array_ref = json_array.as_ref(); + + assert_eq!(json_array_ref.array_get_elem(0).unwrap().get_u64(), 1); + assert_eq!( + json_array_ref + .array_get_elem(1) + .unwrap() + .get_time() + .unwrap(), + time + ); + assert_eq!( + json_array_ref + .array_get_elem(2) + .unwrap() + .get_duration() + .unwrap(), + duration + ); + assert_eq!( + json_array_ref + .array_get_elem(3) + .unwrap() + .array_get_elem(0) + .unwrap() + .get_u64(), + 1 + ); + assert_eq!( + json_array_ref + .array_get_elem(3) + .unwrap() + .array_get_elem(1) + .unwrap() + .get_str() + .unwrap(), + "abcdefg" + ); + assert_eq!( + json_array_ref.array_get_elem(4).unwrap().get_str().unwrap(), + "abcdefg" + ); + assert_eq!( + json_array_ref + .array_get_elem(5) + .unwrap() + .get_literal() + .unwrap(), + false + ); + assert_eq!( + json_array_ref.array_get_elem(6).unwrap().object_get_key(0), + b"key1" + ); + assert_eq!( + json_array_ref.array_get_elem(6).unwrap().object_get_key(1), + b"key2" + ); + assert_eq!( + json_array_ref + .array_get_elem(6) + .unwrap() + .object_get_val(0) + .unwrap() + .get_u64(), + 1 + ); + assert_eq!( + json_array_ref + .array_get_elem(6) + .unwrap() + .object_get_val(1) + .unwrap() + .get_str() + .unwrap(), + "abcdefg" + ); + } + + #[test] + fn test_object_get_val() { + let mut ctx = EvalContext::default(); + + let time = Time::parse( + &mut ctx, + "1998-06-13 12:13:14", + TimeType::DateTime, + 0, + false, + ) + .unwrap(); + let duration = Duration::parse(&mut ctx, "12:13:14", 0).unwrap(); + let array = vec![ + Json::from_u64(1).unwrap(), + Json::from_str_val("abcdefg").unwrap(), + ]; + let object = BTreeMap::from([ + ("key1".to_string(), Json::from_u64(1).unwrap()), + ("key2".to_string(), Json::from_str_val("abcdefg").unwrap()), + ]); + + let json_object = Json::from_object(BTreeMap::from([ + ("0".to_string(), Json::from_u64(1).unwrap()), + ("1".to_string(), Json::from_time(time).unwrap()), + ("2".to_string(), Json::from_duration(duration).unwrap()), + ("3".to_string(), Json::from_array(array).unwrap()), + ("4".to_string(), Json::from_str_val("abcdefg").unwrap()), + ("5".to_string(), Json::from_bool(false).unwrap()), + ("6".to_string(), Json::from_object(object).unwrap()), + ])) + .unwrap(); + let json_object_ref = json_object.as_ref(); + + assert_eq!(json_object_ref.object_get_key(0), b"0"); + assert_eq!(json_object_ref.object_get_key(1), b"1"); + assert_eq!(json_object_ref.object_get_key(2), b"2"); + assert_eq!(json_object_ref.object_get_key(3), b"3"); + + assert_eq!(json_object_ref.object_get_val(0).unwrap().get_u64(), 1); + assert_eq!( + json_object_ref + .object_get_val(1) + .unwrap() + .get_time() + .unwrap(), + time + ); + assert_eq!( + json_object_ref + .object_get_val(2) + .unwrap() + .get_duration() + .unwrap(), + duration + ); + assert_eq!( + json_object_ref + .object_get_val(3) + .unwrap() + .array_get_elem(0) + .unwrap() + .get_u64(), + 1 + ); + assert_eq!( + json_object_ref + .object_get_val(3) + .unwrap() + .array_get_elem(1) + .unwrap() + .get_str() + .unwrap(), + "abcdefg" + ); + assert_eq!( + json_object_ref + .object_get_val(4) + .unwrap() + .get_str() + .unwrap(), + "abcdefg" + ); + assert_eq!( + json_object_ref + .object_get_val(5) + .unwrap() + .get_literal() + .unwrap(), + false + ); + assert_eq!( + json_object_ref.object_get_val(6).unwrap().object_get_key(0), + b"key1" + ); + assert_eq!( + json_object_ref.object_get_val(6).unwrap().object_get_key(1), + b"key2" + ); + assert_eq!( + json_object_ref + .object_get_val(6) + .unwrap() + .object_get_val(0) + .unwrap() + .get_u64(), + 1 + ); + assert_eq!( + json_object_ref + .object_get_val(6) + .unwrap() + .object_get_val(1) + .unwrap() + .get_str() + .unwrap(), + "abcdefg" + ); + } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index f948a172ef0..757ccdfc6bf 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -38,6 +38,10 @@ impl<'a> JsonRef<'a> { JsonType::I64 | JsonType::U64 | JsonType::Double => PRECEDENCE_NUMBER, JsonType::String => PRECEDENCE_STRING, JsonType::Opaque => PRECEDENCE_OPAQUE, + JsonType::Date => PRECEDENCE_DATE, + JsonType::Datetime => PRECEDENCE_DATETIME, + JsonType::Timestamp => PRECEDENCE_DATETIME, + JsonType::Time => PRECEDENCE_TIME, } } @@ -150,6 +154,23 @@ impl<'a> PartialOrd for JsonRef<'a> { return None; } } + JsonType::Date | JsonType::Datetime | JsonType::Timestamp => { + // The jsonTypePrecedences guarantees that the DATE is only comparable with the + // DATE, and the DATETIME and TIMESTAMP will compare with + // each other + if let (Ok(left), Ok(right)) = (self.get_time(), right.get_time()) { + left.partial_cmp(&right) + } else { + return None; + } + } + JsonType::Time => { + if let (Ok(left), Ok(right)) = (self.get_duration(), right.get_duration()) { + left.partial_cmp(&right) + } else { + return None; + } + } }; } @@ -191,6 +212,13 @@ impl PartialOrd for Json { #[cfg(test)] mod tests { use super::*; + use crate::{ + codec::{ + data_type::Duration, + mysql::{Time, TimeType}, + }, + expr::EvalContext, + }; #[test] fn test_cmp_json_numberic_type() { @@ -295,4 +323,120 @@ mod tests { assert_eq!(Json::from_i64(2).unwrap(), Json::from_bool(false).unwrap()); } + + #[test] + fn test_cmp_json_between_json_type() { + let mut ctx = EvalContext::default(); + + let cmp = [ + ( + Json::from_time( + Time::parse( + &mut ctx, + "1998-06-13 12:13:14", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(), + Json::from_time( + Time::parse( + &mut ctx, + "1998-06-14 13:14:15", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(), + Ordering::Less, + ), + ( + Json::from_time( + Time::parse( + &mut ctx, + "1998-06-13 12:13:14", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(), + Json::from_time( + Time::parse( + &mut ctx, + "1998-06-12 13:14:15", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(), + Ordering::Greater, + ), + ( + // DateTime is always greater than Date + Json::from_time( + Time::parse( + &mut ctx, + "1998-06-13 12:13:14", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(), + Json::from_time( + Time::parse(&mut ctx, "1998-06-14", TimeType::Date, 0, false).unwrap(), + ) + .unwrap(), + Ordering::Greater, + ), + ( + Json::from_duration(Duration::parse(&mut ctx, "12:13:14", 0).unwrap()).unwrap(), + Json::from_duration(Duration::parse(&mut ctx, "12:13:16", 0).unwrap()).unwrap(), + Ordering::Less, + ), + ( + Json::from_duration(Duration::parse(&mut ctx, "12:13:16", 0).unwrap()).unwrap(), + Json::from_duration(Duration::parse(&mut ctx, "12:13:14", 0).unwrap()).unwrap(), + Ordering::Greater, + ), + ( + // Time is always greater than Date + Json::from_duration(Duration::parse(&mut ctx, "12:13:16", 0).unwrap()).unwrap(), + Json::from_time( + Time::parse(&mut ctx, "1998-06-12", TimeType::Date, 0, false).unwrap(), + ) + .unwrap(), + Ordering::Greater, + ), + ( + // Time is always less than DateTime + Json::from_duration(Duration::parse(&mut ctx, "12:13:16", 0).unwrap()).unwrap(), + Json::from_time( + Time::parse( + &mut ctx, + "1998-06-12 11:11:11", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(), + Ordering::Less, + ), + ]; + + for (l, r, result) in cmp { + assert_eq!(l.cmp(&r), result) + } + } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/constants.rs b/components/tidb_query_datatype/src/codec/mysql/json/constants.rs index 57927b4b99c..7dec22a6c0b 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/constants.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/constants.rs @@ -11,6 +11,8 @@ pub const LITERAL_LEN: usize = 1; pub const U16_LEN: usize = 2; pub const U32_LEN: usize = 4; pub const NUMBER_LEN: usize = 8; +pub const TIME_LEN: usize = NUMBER_LEN; +pub const DURATION_LEN: usize = NUMBER_LEN + U32_LEN; pub const HEADER_LEN: usize = ELEMENT_COUNT_LEN + SIZE_LEN; // element size + data size pub const KEY_OFFSET_LEN: usize = U32_LEN; pub const KEY_LEN_LEN: usize = U16_LEN; diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index 51ca3ba0da0..867d8ec2c20 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -260,6 +260,10 @@ pub trait JsonDecoder: NumberDecoder { let (opaque_bytes_len, len_len) = NumberCodec::try_decode_var_u64(&value[1..])?; self.read_bytes(opaque_bytes_len as usize + len_len + 1)? } + JsonType::Date | JsonType::Datetime | JsonType::Timestamp => { + self.read_bytes(TIME_LEN)? + } + JsonType::Time => self.read_bytes(DURATION_LEN)?, }; Ok(Json::new(tp, Vec::from(value))) } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs index 28c4d275471..70321080ef7 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_type.rs @@ -14,6 +14,9 @@ const JSON_TYPE_ARRAY: &[u8] = b"ARRAY"; const JSON_TYPE_BIT: &[u8] = b"BIT"; const JSON_TYPE_BLOB: &[u8] = b"BLOB"; const JSON_TYPE_OPAQUE: &[u8] = b"OPAQUE"; +const JSON_TYPE_DATE: &[u8] = b"DATE"; +const JSON_TYPE_DATETIME: &[u8] = b"DATETIME"; +const JSON_TYPE_TIME: &[u8] = b"TIME"; impl<'a> JsonRef<'a> { /// `json_type` is the implementation for @@ -43,6 +46,10 @@ impl<'a> JsonRef<'a> { Ok(FieldTypeTp::Bit) => JSON_TYPE_BIT, _ => JSON_TYPE_OPAQUE, }, + JsonType::Date => JSON_TYPE_DATE, + JsonType::Datetime => JSON_TYPE_DATETIME, + JsonType::Timestamp => JSON_TYPE_DATETIME, + JsonType::Time => JSON_TYPE_TIME, } } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_unquote.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_unquote.rs index 5cfc8bc908d..f95c08cf958 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_unquote.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_unquote.rs @@ -24,6 +24,16 @@ impl<'a> JsonRef<'a> { let s = self.get_str()?; unquote_string(s) } + JsonType::Date + | JsonType::Datetime + | JsonType::Timestamp + | JsonType::Time + | JsonType::Opaque => { + let s = self.to_string(); + // Remove the quotes of output + assert!(s.len() > 2); + Ok(s[1..s.len() - 1].to_string()) + } _ => Ok(self.to_string()), } } @@ -83,6 +93,13 @@ mod tests { use std::collections::BTreeMap; use super::{super::Json, *}; + use crate::{ + codec::{ + data_type::Duration, + mysql::{Time, TimeType}, + }, + expr::EvalContext, + }; #[test] fn test_decode_escaped_unicode() { @@ -161,4 +178,29 @@ mod tests { ); } } + + #[test] + fn test_json_unquote_time_duration() { + let mut ctx = EvalContext::default(); + + let time = Json::from_time( + Time::parse( + &mut ctx, + "1998-06-13 12:13:14", + TimeType::DateTime, + 0, + false, + ) + .unwrap(), + ) + .unwrap(); + assert_eq!( + time.as_ref().unquote().unwrap(), + "1998-06-13 12:13:14.000000" + ); + + let duration = + Json::from_duration(Duration::parse(&mut ctx, "12:13:14", 0).unwrap()).unwrap(); + assert_eq!(duration.as_ref().unquote().unwrap(), "12:13:14.000000"); + } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index c4e3a9ebf5c..f21f789c0d0 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -75,7 +75,11 @@ mod json_remove; mod json_type; pub mod json_unquote; -use std::{collections::BTreeMap, convert::TryFrom, str}; +use std::{ + collections::BTreeMap, + convert::{TryFrom, TryInto}, + str, +}; use codec::number::{NumberCodec, F64_SIZE, I64_SIZE}; use constants::{JSON_LITERAL_FALSE, JSON_LITERAL_NIL, JSON_LITERAL_TRUE}; @@ -91,7 +95,6 @@ use crate::{ codec::{ convert::ConvertTo, data_type::{BytesRef, Decimal, Real}, - mysql, mysql::{Duration, Time, TimeType}, }, expr::EvalContext, @@ -114,6 +117,10 @@ pub enum JsonType { // It's a special value for the compatibility with MySQL. // It will store the raw buffer containing unexpected type (e.g. Binary). Opaque = 0x0d, + Date = 0x0e, + Datetime = 0x0f, + Timestamp = 0x10, + Time = 0x11, } impl TryFrom for JsonType { @@ -225,19 +232,43 @@ impl<'a> JsonRef<'a> { FieldTypeTp::from_u8(val[0]).ok_or(box_err!("invalid opaque type code")) } + pub fn get_time(&self) -> Result

{ + pub fn new() -> Self { + Self { + inner: P::capture(), + _phantom: PhantomData, + } + } + + pub fn delta(&self) -> P { + P::capture() - self.inner + } +} + +impl Default for PerfStatisticsInstant

{ + fn default() -> Self { + Self::new() + } +} + +impl slog::KV for PerfStatisticsInstant

{ + fn serialize( + &self, + record: &::slog::Record<'_>, + serializer: &mut dyn slog::Serializer, + ) -> slog::Result { + slog::KV::serialize(&self.inner, record, serializer) + } +} + +impl PerfContextFields for ReadPerfContext { + fn capture() -> Self { + let perf_context = PerfContext::get(); + ReadPerfContext { + user_key_comparison_count: perf_context.user_key_comparison_count(), + block_cache_hit_count: perf_context.block_cache_hit_count(), + block_read_count: perf_context.block_read_count(), + block_read_byte: perf_context.block_read_byte(), + block_read_time: perf_context.block_read_time(), + block_cache_index_hit_count: perf_context.block_cache_index_hit_count(), + index_block_read_count: perf_context.index_block_read_count(), + block_cache_filter_hit_count: perf_context.block_cache_filter_hit_count(), + filter_block_read_count: perf_context.filter_block_read_count(), + block_checksum_time: perf_context.block_checksum_time(), + block_decompress_time: perf_context.block_decompress_time(), + get_read_bytes: perf_context.get_read_bytes(), + iter_read_bytes: perf_context.iter_read_bytes(), + internal_key_skipped_count: perf_context.internal_key_skipped_count(), + internal_delete_skipped_count: perf_context.internal_delete_skipped_count(), + internal_recent_skipped_count: perf_context.internal_recent_skipped_count(), + get_snapshot_time: perf_context.get_snapshot_time(), + get_from_memtable_time: perf_context.get_from_memtable_time(), + get_from_memtable_count: perf_context.get_from_memtable_count(), + get_post_process_time: perf_context.get_post_process_time(), + get_from_output_files_time: perf_context.get_from_output_files_time(), + seek_on_memtable_time: perf_context.seek_on_memtable_time(), + seek_on_memtable_count: perf_context.seek_on_memtable_count(), + next_on_memtable_count: perf_context.next_on_memtable_count(), + prev_on_memtable_count: perf_context.prev_on_memtable_count(), + seek_child_seek_time: perf_context.seek_child_seek_time(), + seek_child_seek_count: perf_context.seek_child_seek_count(), + seek_min_heap_time: perf_context.seek_min_heap_time(), + seek_max_heap_time: perf_context.seek_max_heap_time(), + seek_internal_seek_time: perf_context.seek_internal_seek_time(), + db_mutex_lock_nanos: perf_context.db_mutex_lock_nanos(), + db_condition_wait_nanos: perf_context.db_condition_wait_nanos(), + read_index_block_nanos: perf_context.read_index_block_nanos(), + read_filter_block_nanos: perf_context.read_filter_block_nanos(), + new_table_block_iter_nanos: perf_context.new_table_block_iter_nanos(), + new_table_iterator_nanos: perf_context.new_table_iterator_nanos(), + block_seek_nanos: perf_context.block_seek_nanos(), + find_table_nanos: perf_context.find_table_nanos(), + bloom_memtable_hit_count: perf_context.bloom_memtable_hit_count(), + bloom_memtable_miss_count: perf_context.bloom_memtable_miss_count(), + bloom_sst_hit_count: perf_context.bloom_sst_hit_count(), + bloom_sst_miss_count: perf_context.bloom_sst_miss_count(), + get_cpu_nanos: perf_context.get_cpu_nanos(), + iter_next_cpu_nanos: perf_context.iter_next_cpu_nanos(), + iter_prev_cpu_nanos: perf_context.iter_prev_cpu_nanos(), + iter_seek_cpu_nanos: perf_context.iter_seek_cpu_nanos(), + encrypt_data_nanos: perf_context.encrypt_data_nanos(), + decrypt_data_nanos: perf_context.decrypt_data_nanos(), + } + } +} + +impl PerfContextFields for WritePerfContext { + fn capture() -> Self { + let perf_context = PerfContext::get(); + WritePerfContext { + write_wal_time: perf_context.write_wal_time(), + pre_and_post_process: perf_context.write_pre_and_post_process_time(), + write_memtable_time: perf_context.write_memtable_time(), + write_thread_wait: perf_context.write_thread_wait_nanos(), + db_mutex_lock_nanos: perf_context.db_mutex_lock_nanos(), + write_scheduling_flushes_compactions_time: perf_context + .write_scheduling_flushes_compactions_time(), + db_condition_wait_nanos: perf_context.db_condition_wait_nanos(), + write_delay_time: perf_context.write_delay_time(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_field_operations() { + let f1 = ReadPerfContext { + internal_key_skipped_count: 1, + internal_delete_skipped_count: 2, + block_cache_hit_count: 3, + block_read_count: 4, + block_read_byte: 5, + ..Default::default() + }; + let f2 = ReadPerfContext { + internal_key_skipped_count: 2, + internal_delete_skipped_count: 3, + block_cache_hit_count: 5, + block_read_count: 7, + block_read_byte: 11, + ..Default::default() + }; + let f3 = f1 + f2; + assert_eq!(f3.internal_key_skipped_count, 3); + assert_eq!(f3.block_cache_hit_count, 8); + assert_eq!(f3.block_read_byte, 16); + + let mut f3 = f1; + f3 += f2; + assert_eq!(f3.internal_key_skipped_count, 3); + assert_eq!(f3.block_cache_hit_count, 8); + assert_eq!(f3.block_read_byte, 16); + + let f3 = f2 - f1; + assert_eq!(f3.internal_key_skipped_count, 1); + assert_eq!(f3.block_cache_hit_count, 2); + assert_eq!(f3.block_read_byte, 6); + + let mut f3 = f2; + f3 -= f1; + assert_eq!(f3.internal_key_skipped_count, 1); + assert_eq!(f3.block_cache_hit_count, 2); + assert_eq!(f3.block_read_byte, 6); + } + + #[test] + fn test_deref() { + let mut stats = ReadPerfContext { + internal_key_skipped_count: 1, + internal_delete_skipped_count: 2, + block_cache_hit_count: 3, + block_read_count: 4, + block_read_byte: 5, + ..Default::default() + }; + assert_eq!(stats.block_cache_hit_count, 3); + stats.block_cache_hit_count = 6; + assert_eq!(stats.block_cache_hit_count, 6); + } +} diff --git a/components/engine_tirocks/src/write_batch.rs b/components/engine_tirocks/src/write_batch.rs new file mode 100644 index 00000000000..1671e686917 --- /dev/null +++ b/components/engine_tirocks/src/write_batch.rs @@ -0,0 +1,383 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{Result, WriteBatchExt as _}; +use tirocks::{option::WriteOptions, WriteBatch}; + +use crate::{r2e, RocksEngine}; + +const WRITE_BATCH_MAX_BATCH_NUM: usize = 16; +const WRITE_BATCH_MAX_KEY_NUM: usize = 16; + +impl engine_traits::WriteBatchExt for RocksEngine { + type WriteBatch = RocksWriteBatchVec; + + const WRITE_BATCH_MAX_KEYS: usize = 256; + + #[inline] + fn write_batch(&self) -> RocksWriteBatchVec { + self.write_batch_with_cap(1) + } + + #[inline] + fn write_batch_with_cap(&self, cap: usize) -> RocksWriteBatchVec { + RocksWriteBatchVec::with_unit_capacity(self, cap) + } +} + +/// `RocksWriteBatchVec` is for method `MultiBatchWrite` of RocksDB, which +/// splits a large WriteBatch into many smaller ones and then any thread could +/// help to deal with these small WriteBatch when it is calling +/// `MultiBatchCommit` and wait the front writer to finish writing. +/// `MultiBatchWrite` will perform much better than traditional +/// `pipelined_write` when TiKV writes very large data into RocksDB. +/// We will remove this feature when `unordered_write` of RocksDB becomes more +/// stable and becomes compatible with Titan. +pub struct RocksWriteBatchVec { + engine: RocksEngine, + wbs: Vec, + save_points: Vec, + index: usize, +} + +impl RocksWriteBatchVec { + pub fn with_unit_capacity(engine: &RocksEngine, cap: usize) -> RocksWriteBatchVec { + let wb = WriteBatch::with_capacity(cap); + RocksWriteBatchVec { + engine: engine.clone(), + wbs: vec![wb], + save_points: vec![], + index: 0, + } + } + + /// `check_switch_batch` will split a large WriteBatch into many smaller + /// ones. This is to avoid a large WriteBatch blocking write_thread too + /// long. + #[inline(always)] + fn check_switch_batch(&mut self) { + if self.engine.multi_batch_write() + && self.wbs[self.index].count() >= WRITE_BATCH_MAX_KEY_NUM + { + self.index += 1; + if self.index >= self.wbs.len() { + self.wbs.push(WriteBatch::default()); + } + } + } +} + +/// Converts engine_traits options to tirocks write options. +pub fn to_tirocks_opt(opt: &engine_traits::WriteOptions) -> WriteOptions { + let mut r = WriteOptions::default(); + r.set_sync(opt.sync()) + .set_no_slowdown(opt.no_slowdown()) + .set_disable_wal(opt.disable_wal()) + + // TODO: enable it. + .set_memtable_insert_hint_per_batch(false); + r +} + +impl engine_traits::WriteBatch for RocksWriteBatchVec { + fn write_opt(&mut self, opts: &engine_traits::WriteOptions) -> Result { + let opts = to_tirocks_opt(opts); + if self.engine.multi_batch_write() { + self.engine + .as_inner() + .write_multi(&opts, &mut self.wbs[..=self.index]) + .map_err(r2e) + } else { + self.engine + .as_inner() + .write(&opts, &mut self.wbs[0]) + .map_err(r2e) + } + } + + fn data_size(&self) -> usize { + let mut size = 0; + for w in &self.wbs[..=self.index] { + size += w.as_bytes().len(); + } + size + } + + fn count(&self) -> usize { + let mut size = 0; + for w in &self.wbs[..=self.index] { + size += w.count(); + } + size + } + + fn is_empty(&self) -> bool { + self.wbs[0].as_bytes().is_empty() + } + + #[inline] + fn should_write_to_engine(&self) -> bool { + if self.engine.multi_batch_write() { + self.index >= WRITE_BATCH_MAX_BATCH_NUM + } else { + self.wbs[0].count() > RocksEngine::WRITE_BATCH_MAX_KEYS + } + } + + fn clear(&mut self) { + for i in 0..=self.index { + self.wbs[i].clear(); + } + self.save_points.clear(); + // Avoid making the wbs too big at one time, then the memory will be kept + // after reusing + if self.index > WRITE_BATCH_MAX_BATCH_NUM { + self.wbs.shrink_to(WRITE_BATCH_MAX_BATCH_NUM); + } + self.index = 0; + } + + fn set_save_point(&mut self) { + self.wbs[self.index].set_save_point(); + self.save_points.push(self.index); + } + + fn pop_save_point(&mut self) -> Result<()> { + if let Some(x) = self.save_points.pop() { + return self.wbs[x].pop_save_point().map_err(r2e); + } + Err(engine_traits::Error::Engine( + engine_traits::Status::with_error( + engine_traits::Code::InvalidArgument, + "no save point", + ), + )) + } + + fn rollback_to_save_point(&mut self) -> Result<()> { + if let Some(x) = self.save_points.pop() { + for i in x + 1..=self.index { + self.wbs[i].clear(); + } + self.index = x; + return self.wbs[x].rollback_to_save_point().map_err(r2e); + } + Err(engine_traits::Error::Engine( + engine_traits::Status::with_error( + engine_traits::Code::InvalidArgument, + "no save point", + ), + )) + } + + fn merge(&mut self, mut other: Self) -> Result<()> { + if !self.engine.multi_batch_write() { + let self_wb = &mut self.wbs[0]; + for wb in &other.wbs[..=other.index] { + self_wb.append(wb).map_err(r2e)?; + } + return Ok(()); + } + let self_wb = &mut self.wbs[self.index]; + let mut other_start = 0; + if self_wb.count() < WRITE_BATCH_MAX_KEY_NUM { + self_wb.append(&other.wbs[0]).map_err(r2e)?; + other_start = 1; + } + // From this point, either of following statements is true: + // - self_wb.count() >= WRITE_BATCH_MAX_KEY_NUM + // - other.index == 0 + if other.index >= other_start { + for wb in other.wbs.drain(other_start..=other.index) { + self.index += 1; + if self.wbs.len() == self.index { + self.wbs.push(wb); + } else { + self.wbs[self.index] = wb; + } + } + } + Ok(()) + } +} + +impl engine_traits::Mutable for RocksWriteBatchVec { + fn put(&mut self, key: &[u8], value: &[u8]) -> Result<()> { + self.check_switch_batch(); + let handle = self.engine.as_inner().default_cf(); + self.wbs[self.index].put(handle, key, value).map_err(r2e) + } + + fn put_cf(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + self.check_switch_batch(); + let handle = self.engine.cf(cf)?; + self.wbs[self.index].put(handle, key, value).map_err(r2e) + } + + fn delete(&mut self, key: &[u8]) -> Result<()> { + self.check_switch_batch(); + let handle = self.engine.as_inner().default_cf(); + self.wbs[self.index].delete(handle, key).map_err(r2e) + } + + fn delete_cf(&mut self, cf: &str, key: &[u8]) -> Result<()> { + self.check_switch_batch(); + let handle = self.engine.cf(cf)?; + self.wbs[self.index].delete(handle, key).map_err(r2e) + } + + fn delete_range(&mut self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + self.check_switch_batch(); + let handle = self.engine.as_inner().default_cf(); + self.wbs[self.index] + .delete_range(handle, begin_key, end_key) + .map_err(r2e) + } + + fn delete_range_cf(&mut self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + self.check_switch_batch(); + let handle = self.engine.cf(cf)?; + self.wbs[self.index] + .delete_range(handle, begin_key, end_key) + .map_err(r2e) + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use engine_traits::{Mutable, Peekable, WriteBatch, WriteBatchExt, CF_DEFAULT}; + use tempfile::Builder; + + use super::*; + use crate::{ + cf_options::RocksCfOptions, db_options::RocksDbOptions, new_engine_opt, RocksEngine, + }; + + fn new_engine(path: &Path, multi_batch_write: bool) -> RocksEngine { + let mut db_opt = RocksDbOptions::default(); + db_opt + .set_unordered_write(false) + .set_enable_pipelined_write(!multi_batch_write) + .set_multi_batch_write(multi_batch_write); + let engine = new_engine_opt( + &path.join("db"), + db_opt, + vec![(CF_DEFAULT, RocksCfOptions::default())], + ) + .unwrap(); + assert_eq!( + engine.as_inner().db_options().multi_batch_write(), + multi_batch_write + ); + engine + } + + #[test] + fn test_should_write_to_engine_with_pipeline_write_mode() { + let path = Builder::new() + .prefix("test-should-write-to-engine") + .tempdir() + .unwrap(); + let engine = new_engine(path.path(), false); + let mut wb = engine.write_batch(); + for _ in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert!(!wb.should_write_to_engine()); + wb.put(b"aaa", b"bbb").unwrap(); + assert!(wb.should_write_to_engine()); + wb.write().unwrap(); + + let v = engine.get_value(b"aaa").unwrap(); + + assert!(v.is_some()); + assert_eq!(v.unwrap(), b"bbb"); + let mut wb = RocksWriteBatchVec::with_unit_capacity(&engine, 1024); + for _i in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert!(!wb.should_write_to_engine()); + wb.put(b"aaa", b"bbb").unwrap(); + assert!(wb.should_write_to_engine()); + wb.clear(); + assert!(!wb.should_write_to_engine()); + } + + #[test] + fn test_should_write_to_engine_with_multi_batch_write_mode() { + let path = Builder::new() + .prefix("test-should-write-to-engine") + .tempdir() + .unwrap(); + let engine = new_engine(path.path(), true); + let mut wb = engine.write_batch(); + for _ in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert!(!wb.should_write_to_engine()); + wb.put(b"aaa", b"bbb").unwrap(); + assert!(wb.should_write_to_engine()); + let mut wb = RocksWriteBatchVec::with_unit_capacity(&engine, 1024); + for _ in 0..WRITE_BATCH_MAX_BATCH_NUM * WRITE_BATCH_MAX_KEY_NUM { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert!(!wb.should_write_to_engine()); + wb.put(b"aaa", b"bbb").unwrap(); + assert!(wb.should_write_to_engine()); + wb.clear(); + assert!(!wb.should_write_to_engine()); + } + + #[test] + fn test_write_batch_merge() { + let path = Builder::new() + .prefix("test-should-write-to-engine") + .tempdir() + .unwrap(); + for multi_batch_write in &[false, true] { + let engine = new_engine(path.path(), *multi_batch_write); + let mut wb = engine.write_batch(); + for _ in 0..RocksEngine::WRITE_BATCH_MAX_KEYS { + wb.put(b"aaa", b"bbb").unwrap(); + } + assert_eq!(wb.count(), RocksEngine::WRITE_BATCH_MAX_KEYS); + + let mut wb2 = engine.write_batch(); + for _ in 0..WRITE_BATCH_MAX_KEY_NUM / 2 { + wb2.put(b"aaa", b"bbb").unwrap(); + } + assert_eq!(wb2.count(), WRITE_BATCH_MAX_KEY_NUM / 2); + // The only batch should be moved directly. + wb.merge(wb2).unwrap(); + assert_eq!( + wb.count(), + RocksEngine::WRITE_BATCH_MAX_KEYS + WRITE_BATCH_MAX_KEY_NUM / 2 + ); + if *multi_batch_write { + assert_eq!( + wb.wbs.len(), + RocksEngine::WRITE_BATCH_MAX_KEYS / WRITE_BATCH_MAX_KEY_NUM + 1 + ); + } + + let mut wb3 = engine.write_batch(); + for _ in 0..WRITE_BATCH_MAX_KEY_NUM / 2 * 3 { + wb3.put(b"aaa", b"bbb").unwrap(); + } + assert_eq!(wb3.count(), WRITE_BATCH_MAX_KEY_NUM / 2 * 3); + // The half batch should be merged together, and then move the left one. + wb.merge(wb3).unwrap(); + assert_eq!( + wb.count(), + RocksEngine::WRITE_BATCH_MAX_KEYS + WRITE_BATCH_MAX_KEY_NUM * 2 + ); + if *multi_batch_write { + assert_eq!( + wb.wbs.len(), + RocksEngine::WRITE_BATCH_MAX_KEYS / WRITE_BATCH_MAX_KEY_NUM + 2 + ); + } + } + } +} diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c53e06792da..32a23cd070e 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -656,28 +656,6 @@ lazy_static! { "Pending read index count." ).unwrap(); - pub static ref APPLY_PERF_CONTEXT_TIME_HISTOGRAM: HistogramVec = - register_histogram_vec!( - "tikv_raftstore_apply_perf_context_time_duration_secs", - "Bucketed histogram of request wait time duration.", - &["type"], - exponential_buckets(0.00001, 2.0, 26).unwrap() - ).unwrap(); - - pub static ref STORE_PERF_CONTEXT_TIME_HISTOGRAM: HistogramVec = - register_histogram_vec!( - "tikv_raftstore_store_perf_context_time_duration_secs", - "Bucketed histogram of request wait time duration.", - &["type"], - exponential_buckets(0.00001, 2.0, 26).unwrap() - ).unwrap(); - - pub static ref APPLY_PERF_CONTEXT_TIME_HISTOGRAM_STATIC: PerfContextTimeDuration= - auto_flush_from!(APPLY_PERF_CONTEXT_TIME_HISTOGRAM, PerfContextTimeDuration); - - pub static ref STORE_PERF_CONTEXT_TIME_HISTOGRAM_STATIC: PerfContextTimeDuration= - auto_flush_from!(STORE_PERF_CONTEXT_TIME_HISTOGRAM, PerfContextTimeDuration); - pub static ref READ_QPS_TOPN: GaugeVec = register_gauge_vec!( "tikv_read_qps_topn", From 585763a39348b12e2b5c54430cc9b3ea916f65e6 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 26 Sep 2022 14:29:45 +0800 Subject: [PATCH 0244/1149] commands: use ReaderWithStats in flashback_to_version command (#13525) ref tikv/tikv#13303 Use `ReaderWithStats` in `flashback_to_version` command to collect the statistics info as much as possible. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- src/storage/txn/actions/cleanup.rs | 4 +--- src/storage/txn/actions/flashback_to_version.rs | 4 ++-- src/storage/txn/commands/flashback_to_version.rs | 9 ++++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/storage/txn/actions/cleanup.rs b/src/storage/txn/actions/cleanup.rs index 19cb90f0a22..c72905c8910 100644 --- a/src/storage/txn/actions/cleanup.rs +++ b/src/storage/txn/actions/cleanup.rs @@ -39,14 +39,12 @@ pub fn cleanup( ErrorInner::KeyIsLocked(lock.clone().into_lock_info(key.into_raw()?)).into(), ); } - - let is_pessimistic_txn = !lock.for_update_ts.is_zero(); rollback_lock( txn, reader, key, lock, - is_pessimistic_txn, + lock.is_pessimistic_txn(), !protect_rollback, ) } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 8047d5dd304..0b9f0461297 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -75,9 +75,9 @@ pub fn flashback_to_version_read_write( Ok((key_old_writes, has_remain_writes)) } -pub fn flashback_to_version( +pub fn flashback_to_version( txn: &mut MvccTxn, - reader: &mut SnapshotReader, + reader: &mut SnapshotReader, next_lock_key: &mut Option, next_write_key: &mut Option, key_locks: Vec<(Key, Lock)>, diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 61086020b09..b4255138eeb 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -9,8 +9,8 @@ use crate::storage::{ mvcc::{MvccTxn, SnapshotReader}, txn::{ commands::{ - Command, CommandExt, FlashbackToVersionReadPhase, ResponsePolicy, TypedCommand, - WriteCommand, WriteContext, WriteResult, + Command, CommandExt, FlashbackToVersionReadPhase, ReaderWithStats, ResponsePolicy, + TypedCommand, WriteCommand, WriteContext, WriteResult, }, flashback_to_version, latch, Result, }, @@ -62,7 +62,10 @@ impl CommandExt for FlashbackToVersion { impl WriteCommand for FlashbackToVersion { fn process_write(mut self, snapshot: S, context: WriteContext<'_, L>) -> Result { - let mut reader = SnapshotReader::new_with_ctx(self.version, snapshot, &self.ctx); + let mut reader = ReaderWithStats::new( + SnapshotReader::new_with_ctx(self.version, snapshot, &self.ctx), + context.statistics, + ); let mut txn = MvccTxn::new(TimeStamp::zero(), context.concurrency_manager); let mut next_lock_key = self.next_lock_key.take(); From 975c0543238a28c9d8a2aaf63009c429e6c2218d Mon Sep 17 00:00:00 2001 From: Hangjie Mo Date: Mon, 26 Sep 2022 17:29:44 +0800 Subject: [PATCH 0245/1149] metrics: fix grafana expr for `tikv_gc_compaction_filtered` (#13536) close tikv/tikv#13537 fix error grafana expr for `tikv_gc_compaction_filtered ` Signed-off-by: Jason Mo Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b48aa216a93..9d64207c214 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -22555,7 +22555,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gc_compaction_filtered{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by {key_mode}", + "expr": "sum(rate(tikv_gc_compaction_filtered{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", "format": "time_series", "instant": false, "interval": "", From a68a44e09dd4ef7de59db22b11593e7abd4e94df Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 27 Sep 2022 17:35:44 +0800 Subject: [PATCH 0246/1149] *: strip the RefCell off the LocalReader in ServerRaftStoreRouter (#13542) close tikv/tikv#13546 Signed-off-by: SpadeA-Tang --- components/backup-stream/src/event_loader.rs | 10 +- components/backup/src/endpoint.rs | 22 +- components/backup/src/service.rs | 6 +- components/cdc/src/initializer.rs | 32 +- components/cdc/src/old_value.rs | 140 +-- components/raftstore/src/router.rs | 26 +- components/resolved_ts/src/cmd.rs | 18 +- components/test_backup/src/lib.rs | 4 +- components/test_raftstore/src/cluster.rs | 8 +- components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 6 +- .../test_raftstore/src/transport_simulate.rs | 4 +- components/tikv_kv/src/btree_engine.rs | 16 +- components/tikv_kv/src/lib.rs | 42 +- components/tikv_kv/src/mock_engine.rs | 2 +- components/tikv_kv/src/rocksdb_engine.rs | 2 +- src/coprocessor/endpoint.rs | 2 +- src/import/duplicate_detect.rs | 6 +- src/server/gc_worker/compaction_filter.rs | 82 +- src/server/gc_worker/gc_worker.rs | 103 +- src/server/raftkv.rs | 10 +- src/storage/kv/test_engine_builder.rs | 26 +- src/storage/metrics.rs | 2 +- src/storage/mod.rs | 24 +- src/storage/mvcc/consistency_check.rs | 24 +- src/storage/mvcc/mod.rs | 65 +- src/storage/mvcc/reader/point_getter.rs | 379 +++---- src/storage/mvcc/reader/reader.rs | 4 +- src/storage/mvcc/reader/scanner/backward.rs | 140 +-- src/storage/mvcc/reader/scanner/forward.rs | 254 ++--- src/storage/mvcc/reader/scanner/mod.rs | 157 +-- src/storage/mvcc/txn.rs | 958 ++++++++++-------- src/storage/raw/raw_mvcc.rs | 2 +- .../txn/actions/acquire_pessimistic_lock.rs | 697 +++++++------ .../txn/actions/check_data_constraint.rs | 2 +- src/storage/txn/actions/cleanup.rs | 68 +- src/storage/txn/actions/commit.rs | 82 +- .../txn/actions/flashback_to_version.rs | 78 +- src/storage/txn/actions/gc.rs | 52 +- src/storage/txn/actions/prewrite.rs | 654 ++++++------ src/storage/txn/actions/tests.rs | 61 +- src/storage/txn/commands/atomic_store.rs | 2 +- .../txn/commands/check_secondary_locks.rs | 43 +- src/storage/txn/commands/check_txn_status.rs | 285 +++--- src/storage/txn/commands/compare_and_swap.rs | 12 +- src/storage/txn/commands/mod.rs | 14 +- .../txn/commands/pessimistic_rollback.rs | 76 +- src/storage/txn/commands/prewrite.rs | 413 ++++---- src/storage/txn/commands/rollback.rs | 12 +- src/storage/txn/commands/txn_heart_beat.rs | 52 +- src/storage/txn/scheduler.rs | 9 +- src/storage/txn/store.rs | 2 +- tests/benches/hierarchy/engine/mod.rs | 6 +- tests/benches/hierarchy/mvcc/mod.rs | 22 +- tests/benches/hierarchy/txn/mod.rs | 21 +- tests/benches/misc/raftkv/mod.rs | 6 +- tests/benches/misc/storage/incremental_get.rs | 2 +- tests/failpoints/cases/test_gc_metrics.rs | 32 +- tests/failpoints/cases/test_gc_worker.rs | 18 +- tests/failpoints/cases/test_storage.rs | 2 +- tests/failpoints/cases/test_transaction.rs | 28 +- tests/failpoints/cases/test_ttl.rs | 4 +- tests/integrations/raftstore/test_merge.rs | 4 +- .../raftstore/test_transfer_leader.rs | 4 +- tests/integrations/storage/test_raftkv.rs | 60 +- 65 files changed, 2833 insertions(+), 2568 deletions(-) diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index fc84fab0635..90a330cf446 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -503,14 +503,14 @@ mod tests { #[test] fn test_disk_read() { - let engine = TestEngineBuilder::new().build_without_cache().unwrap(); + let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); for i in 0..100 { let owned_key = format!("{:06}", i); let key = owned_key.as_bytes(); let owned_value = [i as u8; 512]; let value = owned_value.as_slice(); - must_prewrite_put(&engine, key, value, key, i * 2); - must_commit(&engine, key, i * 2, i * 2 + 1); + must_prewrite_put(&mut engine, key, value, key, i * 2); + must_commit(&mut engine, key, i * 2, i * 2 + 1); } // let compact the memtable to disk so we can see the disk read. engine.get_rocksdb().as_inner().compact_range(None, None); @@ -520,8 +520,8 @@ mod tests { r.set_start_key(b"".to_vec()); r.set_end_key(b"".to_vec()); - let snap = - block_on(async { tikv_kv::snapshot(&engine, SnapContext::default()).await }).unwrap(); + let snap = block_on(async { tikv_kv::snapshot(&mut engine, SnapContext::default()).await }) + .unwrap(); let mut loader = EventLoader::load_from(snap, TimeStamp::zero(), TimeStamp::max(), &r).unwrap(); diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index b24c61f4efd..1d4f9bbfdd9 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -290,7 +290,7 @@ impl BackupRange { async fn backup( &self, writer_builder: BackupWriterBuilder, - engine: E, + mut engine: E, concurrency_manager: ConcurrencyManager, backup_ts: TimeStamp, begin_ts: TimeStamp, @@ -507,7 +507,7 @@ impl BackupRange { async fn backup_raw_kv_to_file( &self, - engine: E, + mut engine: E, db: RocksEngine, limiter: &Limiter, file_name: String, @@ -1515,7 +1515,7 @@ pub mod tests { let limiter = Arc::new(IoRateLimiter::new_for_test()); let stats = limiter.statistics().unwrap(); let (tmp, endpoint) = new_endpoint_with_limiter(Some(limiter), ApiVersion::V1, false, None); - let engine = endpoint.engine.clone(); + let mut engine = endpoint.engine.clone(); endpoint .region_info @@ -1531,13 +1531,13 @@ pub mod tests { let commit = alloc_ts(); let key = format!("{}", i); must_prewrite_put( - &engine, + &mut engine, key.as_bytes(), &vec![i; *len], key.as_bytes(), start, ); - must_commit(&engine, key.as_bytes(), start, commit); + must_commit(&mut engine, key.as_bytes(), start, commit); backup_tss.push((alloc_ts(), len)); } } @@ -1851,7 +1851,7 @@ pub mod tests { #[test] fn test_scan_error() { let (tmp, endpoint) = new_endpoint(); - let engine = endpoint.engine.clone(); + let mut engine = endpoint.engine.clone(); endpoint .region_info @@ -1862,7 +1862,7 @@ pub mod tests { let start = alloc_ts(); let key = format!("{}", start); must_prewrite_put( - &engine, + &mut engine, key.as_bytes(), key.as_bytes(), key.as_bytes(), @@ -1890,7 +1890,7 @@ pub mod tests { // Commit the perwrite. let commit = alloc_ts(); - must_commit(&engine, key.as_bytes(), start, commit); + must_commit(&mut engine, key.as_bytes(), start, commit); // Test whether it can correctly convert not leader to region error. engine.trigger_not_leader(); @@ -1916,7 +1916,7 @@ pub mod tests { #[test] fn test_cancel() { let (temp, mut endpoint) = new_endpoint(); - let engine = endpoint.engine.clone(); + let mut engine = endpoint.engine.clone(); endpoint .region_info @@ -1927,7 +1927,7 @@ pub mod tests { let start = alloc_ts(); let key = format!("{}", start); must_prewrite_put( - &engine, + &mut engine, key.as_bytes(), key.as_bytes(), key.as_bytes(), @@ -1935,7 +1935,7 @@ pub mod tests { ); // Commit the perwrite. let commit = alloc_ts(); - must_commit(&engine, key.as_bytes(), start, commit); + must_commit(&mut engine, key.as_bytes(), start, commit); let now = alloc_ts(); let mut req = BackupRequest::default(); diff --git a/components/backup/src/service.rs b/components/backup/src/service.rs index 1281f12ad79..dd3355b1e92 100644 --- a/components/backup/src/service.rs +++ b/components/backup/src/service.rs @@ -160,7 +160,7 @@ mod tests { let (_server, client, mut rx) = new_rpc_suite(); let (tmp, endpoint) = new_endpoint(); - let engine = endpoint.engine.clone(); + let mut engine = endpoint.engine.clone(); endpoint.region_info.set_regions(vec![ (b"".to_vec(), b"2".to_vec(), 1), (b"2".to_vec(), b"5".to_vec(), 2), @@ -172,14 +172,14 @@ mod tests { let start = alloc_ts(); let key = format!("{}", i); must_prewrite_put( - &engine, + &mut engine, key.as_bytes(), key.as_bytes(), key.as_bytes(), start, ); let commit = alloc_ts(); - must_commit(&engine, key.as_bytes(), start, commit); + must_commit(&mut engine, key.as_bytes(), start, commit); } let now = alloc_ts(); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 6be880af84c..36c1636a7e8 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -652,7 +652,7 @@ mod tests { #[test] fn test_initializer_build_resolver() { - let engine = TestEngineBuilder::new().build_without_cache().unwrap(); + let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); let mut expected_locks = BTreeMap::>>::new(); @@ -662,7 +662,7 @@ mod tests { let k = &[b'k', i]; total_bytes += k.len(); let ts = TimeStamp::new(i as _); - must_acquire_pessimistic_lock(&engine, k, k, ts, ts); + must_acquire_pessimistic_lock(&mut engine, k, k, ts, ts); } for i in 10..100 { @@ -670,7 +670,7 @@ mod tests { total_bytes += k.len(); total_bytes += v.len(); let ts = TimeStamp::new(i as _); - must_prewrite_put(&engine, k, v, k, ts); + must_prewrite_put(&mut engine, k, v, k, ts); expected_locks .entry(ts) .or_default() @@ -760,7 +760,7 @@ mod tests { // handling `OldValue::SeekWrite` with `OldValueReader`. #[test] fn test_incremental_scanner_with_hint_min_ts() { - let engine = TestEngineBuilder::new().build_without_cache().unwrap(); + let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); let v_suffix = |suffix: usize| -> Vec { let suffix = suffix.to_string().into_bytes(); @@ -770,7 +770,11 @@ mod tests { v }; - let check_handling_old_value_seek_write = || { + fn check_handling_old_value_seek_write(engine: &mut E, v_suffix: F) + where + E: Engine, + F: Fn(usize) -> Vec, + { // Do incremental scan with different `hint_min_ts` values. for checkpoint_ts in [200, 100, 150] { let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( @@ -807,29 +811,29 @@ mod tests { block_on(th).unwrap(); worker.stop(); } - }; + } // Create the initial data with CF_WRITE L0: |zkey_110, zkey1_160| - must_prewrite_put(&engine, b"zkey", &v_suffix(100), b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); - must_prewrite_put(&engine, b"zzzz", &v_suffix(150), b"zzzz", 150); - must_commit(&engine, b"zzzz", 150, 160); + must_prewrite_put(&mut engine, b"zkey", &v_suffix(100), b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); + must_prewrite_put(&mut engine, b"zzzz", &v_suffix(150), b"zzzz", 150); + must_commit(&mut engine, b"zzzz", 150, 160); engine .kv_engine() .unwrap() .flush_cf(CF_WRITE, true) .unwrap(); - must_prewrite_delete(&engine, b"zkey", b"zkey", 200); - check_handling_old_value_seek_write(); // For TxnEntry::Prewrite. + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 200); + check_handling_old_value_seek_write(&mut engine, v_suffix); // For TxnEntry::Prewrite. // CF_WRITE L0: |zkey_110, zkey1_160|, |zkey_210| - must_commit(&engine, b"zkey", 200, 210); + must_commit(&mut engine, b"zkey", 200, 210); engine .kv_engine() .unwrap() .flush_cf(CF_WRITE, false) .unwrap(); - check_handling_old_value_seek_write(); // For TxnEntry::Commit. + check_handling_old_value_seek_write(&mut engine, v_suffix); // For TxnEntry::Commit. } #[test] diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index 9d60474b952..1149d8ce3e0 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -381,120 +381,120 @@ mod tests { #[test] fn test_old_value_reader() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let kv_engine = engine.get_rocksdb(); let k = b"k"; let key = Key::from_raw(k); - must_prewrite_put(&engine, k, b"v1", k, 1); + must_prewrite_put(&mut engine, k, b"v1", k, 1); must_get_eq(&kv_engine, &key, 2, None); must_get_eq(&kv_engine, &key, 1, None); - must_commit(&engine, k, 1, 1); + must_commit(&mut engine, k, 1, 1); must_get_eq(&kv_engine, &key, 1, Some(b"v1".to_vec())); - must_prewrite_put(&engine, k, b"v2", k, 2); + must_prewrite_put(&mut engine, k, b"v2", k, 2); must_get_eq(&kv_engine, &key, 2, Some(b"v1".to_vec())); - must_rollback(&engine, k, 2, false); + must_rollback(&mut engine, k, 2, false); - must_prewrite_put(&engine, k, b"v3", k, 3); + must_prewrite_put(&mut engine, k, b"v3", k, 3); must_get_eq(&kv_engine, &key, 3, Some(b"v1".to_vec())); - must_commit(&engine, k, 3, 3); + must_commit(&mut engine, k, 3, 3); - must_prewrite_delete(&engine, k, k, 4); + must_prewrite_delete(&mut engine, k, k, 4); must_get_eq(&kv_engine, &key, 4, Some(b"v3".to_vec())); - must_commit(&engine, k, 4, 4); + must_commit(&mut engine, k, 4, 4); - must_prewrite_put(&engine, k, vec![b'v'; 5120].as_slice(), k, 5); + must_prewrite_put(&mut engine, k, vec![b'v'; 5120].as_slice(), k, 5); must_get_eq(&kv_engine, &key, 5, None); - must_commit(&engine, k, 5, 5); + must_commit(&mut engine, k, 5, 5); - must_prewrite_delete(&engine, k, k, 6); + must_prewrite_delete(&mut engine, k, k, 6); must_get_eq(&kv_engine, &key, 6, Some(vec![b'v'; 5120])); - must_rollback(&engine, k, 6, false); + must_rollback(&mut engine, k, 6, false); - must_prewrite_put(&engine, k, b"v4", k, 7); - must_commit(&engine, k, 7, 9); + must_prewrite_put(&mut engine, k, b"v4", k, 7); + must_commit(&mut engine, k, 7, 9); - must_acquire_pessimistic_lock(&engine, k, k, 8, 10); - must_pessimistic_prewrite_put(&engine, k, b"v5", k, 8, 10, DoPessimisticCheck); + must_acquire_pessimistic_lock(&mut engine, k, k, 8, 10); + must_pessimistic_prewrite_put(&mut engine, k, b"v5", k, 8, 10, DoPessimisticCheck); must_get_eq(&kv_engine, &key, 10, Some(b"v4".to_vec())); - must_commit(&engine, k, 8, 11); + must_commit(&mut engine, k, 8, 11); } #[test] fn test_old_value_reader_check_gc_fence() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let kv_engine = engine.get_rocksdb(); // PUT, Read // `--------------^ - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 10); - must_commit(&engine, b"k1", 10, 20); - must_cleanup_with_gc_fence(&engine, b"k1", 20, 0, 50, true); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine, b"k1", 10, 20); + must_cleanup_with_gc_fence(&mut engine, b"k1", 20, 0, 50, true); // PUT, Read // `---------^ - must_prewrite_put(&engine, b"k2", b"v2", b"k2", 11); - must_commit(&engine, b"k2", 11, 20); - must_cleanup_with_gc_fence(&engine, b"k2", 20, 0, 40, true); + must_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 11); + must_commit(&mut engine, b"k2", 11, 20); + must_cleanup_with_gc_fence(&mut engine, b"k2", 20, 0, 40, true); // PUT, Read // `-----^ - must_prewrite_put(&engine, b"k3", b"v3", b"k3", 12); - must_commit(&engine, b"k3", 12, 20); - must_cleanup_with_gc_fence(&engine, b"k3", 20, 0, 30, true); + must_prewrite_put(&mut engine, b"k3", b"v3", b"k3", 12); + must_commit(&mut engine, b"k3", 12, 20); + must_cleanup_with_gc_fence(&mut engine, b"k3", 20, 0, 30, true); // PUT, PUT, Read // `-----^ `----^ - must_prewrite_put(&engine, b"k4", b"v4", b"k4", 13); - must_commit(&engine, b"k4", 13, 14); - must_prewrite_put(&engine, b"k4", b"v4x", b"k4", 15); - must_commit(&engine, b"k4", 15, 20); - must_cleanup_with_gc_fence(&engine, b"k4", 14, 0, 20, false); - must_cleanup_with_gc_fence(&engine, b"k4", 20, 0, 30, true); + must_prewrite_put(&mut engine, b"k4", b"v4", b"k4", 13); + must_commit(&mut engine, b"k4", 13, 14); + must_prewrite_put(&mut engine, b"k4", b"v4x", b"k4", 15); + must_commit(&mut engine, b"k4", 15, 20); + must_cleanup_with_gc_fence(&mut engine, b"k4", 14, 0, 20, false); + must_cleanup_with_gc_fence(&mut engine, b"k4", 20, 0, 30, true); // PUT, DEL, Read // `-----^ `----^ - must_prewrite_put(&engine, b"k5", b"v5", b"k5", 13); - must_commit(&engine, b"k5", 13, 14); - must_prewrite_delete(&engine, b"k5", b"v5", 15); - must_commit(&engine, b"k5", 15, 20); - must_cleanup_with_gc_fence(&engine, b"k5", 14, 0, 20, false); - must_cleanup_with_gc_fence(&engine, b"k5", 20, 0, 30, true); + must_prewrite_put(&mut engine, b"k5", b"v5", b"k5", 13); + must_commit(&mut engine, b"k5", 13, 14); + must_prewrite_delete(&mut engine, b"k5", b"v5", 15); + must_commit(&mut engine, b"k5", 15, 20); + must_cleanup_with_gc_fence(&mut engine, b"k5", 14, 0, 20, false); + must_cleanup_with_gc_fence(&mut engine, b"k5", 20, 0, 30, true); // PUT, LOCK, LOCK, Read // `------------------------^ - must_prewrite_put(&engine, b"k6", b"v6", b"k6", 16); - must_commit(&engine, b"k6", 16, 20); - must_prewrite_lock(&engine, b"k6", b"k6", 25); - must_commit(&engine, b"k6", 25, 26); - must_prewrite_lock(&engine, b"k6", b"k6", 28); - must_commit(&engine, b"k6", 28, 29); - must_cleanup_with_gc_fence(&engine, b"k6", 20, 0, 50, true); + must_prewrite_put(&mut engine, b"k6", b"v6", b"k6", 16); + must_commit(&mut engine, b"k6", 16, 20); + must_prewrite_lock(&mut engine, b"k6", b"k6", 25); + must_commit(&mut engine, b"k6", 25, 26); + must_prewrite_lock(&mut engine, b"k6", b"k6", 28); + must_commit(&mut engine, b"k6", 28, 29); + must_cleanup_with_gc_fence(&mut engine, b"k6", 20, 0, 50, true); // PUT, LOCK, LOCK, Read // `---------^ - must_prewrite_put(&engine, b"k7", b"v7", b"k7", 16); - must_commit(&engine, b"k7", 16, 20); - must_prewrite_lock(&engine, b"k7", b"k7", 25); - must_commit(&engine, b"k7", 25, 26); - must_cleanup_with_gc_fence(&engine, b"k7", 20, 0, 27, true); - must_prewrite_lock(&engine, b"k7", b"k7", 28); - must_commit(&engine, b"k7", 28, 29); + must_prewrite_put(&mut engine, b"k7", b"v7", b"k7", 16); + must_commit(&mut engine, b"k7", 16, 20); + must_prewrite_lock(&mut engine, b"k7", b"k7", 25); + must_commit(&mut engine, b"k7", 25, 26); + must_cleanup_with_gc_fence(&mut engine, b"k7", 20, 0, 27, true); + must_prewrite_lock(&mut engine, b"k7", b"k7", 28); + must_commit(&mut engine, b"k7", 28, 29); // PUT, Read // * (GC fence ts is 0) - must_prewrite_put(&engine, b"k8", b"v8", b"k8", 17); - must_commit(&engine, b"k8", 17, 30); - must_cleanup_with_gc_fence(&engine, b"k8", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k8", b"v8", b"k8", 17); + must_commit(&mut engine, b"k8", 17, 30); + must_cleanup_with_gc_fence(&mut engine, b"k8", 30, 0, 0, true); // PUT, LOCK, Read // `-----------^ - must_prewrite_put(&engine, b"k9", b"v9", b"k9", 18); - must_commit(&engine, b"k9", 18, 20); - must_prewrite_lock(&engine, b"k9", b"k9", 25); - must_commit(&engine, b"k9", 25, 26); - must_cleanup_with_gc_fence(&engine, b"k9", 20, 0, 27, true); + must_prewrite_put(&mut engine, b"k9", b"v9", b"k9", 18); + must_commit(&mut engine, b"k9", 18, 20); + must_prewrite_lock(&mut engine, b"k9", b"k9", 25); + must_commit(&mut engine, b"k9", 25, 26); + must_cleanup_with_gc_fence(&mut engine, b"k9", 20, 0, 27, true); let expected_results = vec![ (b"k1", Some(b"v1")), @@ -515,16 +515,16 @@ mod tests { #[test] fn test_old_value_reuse_cursor() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let kv_engine = engine.get_rocksdb(); let value = || vec![b'v'; 1024]; for i in 0..100 { let key = format!("key-{:0>3}", i).into_bytes(); - must_prewrite_put(&engine, &key, &value(), &key, 100); - must_commit(&engine, &key, 100, 101); - must_prewrite_put(&engine, &key, &value(), &key, 200); - must_commit(&engine, &key, 200, 201); + must_prewrite_put(&mut engine, &key, &value(), &key, 100); + must_commit(&mut engine, &key, 100, 101); + must_prewrite_put(&mut engine, &key, &value(), &key, 200); + must_commit(&mut engine, &key, 200, 201); } let snapshot = Arc::new(kv_engine.snapshot()); @@ -586,14 +586,14 @@ mod tests { let mut cfg = DbConfig::default(); cfg.writecf.disable_auto_compactions = true; cfg.writecf.pin_l0_filter_and_index_blocks = false; - let engine = TestEngineBuilder::new().build_with_cfg(&cfg).unwrap(); + let mut engine = TestEngineBuilder::new().build_with_cfg(&cfg).unwrap(); let kv_engine = engine.get_rocksdb(); // Key must start with `z` to pass `TsFilter`'s check. for i in 0..4 { let key = format!("zkey-{:0>3}", i).into_bytes(); - must_prewrite_put(&engine, &key, b"value", &key, 100); - must_commit(&engine, &key, 100, 101); + must_prewrite_put(&mut engine, &key, b"value", &key, 100); + must_commit(&mut engine, &key, 100, 101); kv_engine.flush_cf(CF_WRITE, true).unwrap(); } diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index f52687c311f..90cc41f2bd8 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -1,8 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::cell::RefCell; - use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, Snapshot}; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; @@ -115,13 +113,13 @@ where EK: KvEngine, { fn read( - &self, + &mut self, read_id: Option, req: RaftCmdRequest, cb: Callback, ) -> RaftStoreResult<()>; - fn release_snapshot_cache(&self); + fn release_snapshot_cache(&mut self); } #[derive(Clone)] @@ -174,7 +172,7 @@ where { router: RaftRouter, local_reader: - RefCell, EK, CachedReadDelegate, StoreMetaDelegate>>, + LocalReader, EK, CachedReadDelegate, StoreMetaDelegate>, } impl Clone for ServerRaftStoreRouter @@ -194,9 +192,13 @@ impl ServerRaftStoreRouter { /// Creates a new router. pub fn new( router: RaftRouter, - reader: LocalReader, EK, CachedReadDelegate, StoreMetaDelegate>, + local_reader: LocalReader< + RaftRouter, + EK, + CachedReadDelegate, + StoreMetaDelegate, + >, ) -> ServerRaftStoreRouter { - let local_reader = RefCell::new(reader); ServerRaftStoreRouter { router, local_reader, @@ -247,19 +249,17 @@ impl RaftStoreRouter for ServerRaftStoreRouter impl LocalReadRouter for ServerRaftStoreRouter { fn read( - &self, + &mut self, read_id: Option, req: RaftCmdRequest, cb: Callback, ) -> RaftStoreResult<()> { - let mut local_reader = self.local_reader.borrow_mut(); - local_reader.read(read_id, req, cb); + self.local_reader.read(read_id, req, cb); Ok(()) } - fn release_snapshot_cache(&self) { - let mut local_reader = self.local_reader.borrow_mut(); - local_reader.release_snapshot_cache(); + fn release_snapshot_cache(&mut self) { + self.local_reader.release_snapshot_cache(); } } diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 8d3eb3bb48d..89d7167cc26 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -316,7 +316,7 @@ mod tests { #[test] fn test_cmd_encode() { let rocks_engine = TestEngineBuilder::new().build().unwrap(); - let engine = MockEngineBuilder::from_rocks_engine(rocks_engine).build(); + let mut engine = MockEngineBuilder::from_rocks_engine(rocks_engine).build(); let mut reqs = vec![Modify::Put("default", Key::from_raw(b"k1"), b"v1".to_vec()).into()]; let mut req = Request::default(); @@ -326,17 +326,17 @@ mod tests { assert_eq!(has_ingest_sst, true); assert!(ChangeLog::encode_rows(changes, false).is_empty()); - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 1); - must_commit(&engine, b"k1", 1, 2); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 1); + must_commit(&mut engine, b"k1", 1, 2); - must_prewrite_put(&engine, b"k1", b"v2", b"k1", 3); - must_rollback(&engine, b"k1", 3, false); + must_prewrite_put(&mut engine, b"k1", b"v2", b"k1", 3); + must_rollback(&mut engine, b"k1", 3, false); - must_prewrite_put(&engine, b"k1", &[b'v'; 512], b"k1", 4); - must_commit(&engine, b"k1", 4, 5); + must_prewrite_put(&mut engine, b"k1", &[b'v'; 512], b"k1", 4); + must_commit(&mut engine, b"k1", 4, 5); - must_prewrite_put(&engine, b"k1", b"v3", b"k1", 5); - must_rollback(&engine, b"k1", 5, false); + must_prewrite_put(&mut engine, b"k1", b"v3", b"k1", 5); + must_rollback(&mut engine, b"k1", 5, false); let k1 = Key::from_raw(b"k1"); let rows: Vec<_> = engine diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 5447e8f2b37..a45a3f52462 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -339,7 +339,7 @@ impl TestSuite { let mut total_kvs = 0; let mut total_bytes = 0; let sim = self.cluster.sim.rl(); - let engine = sim.storages[&self.context.get_peer().get_store_id()].clone(); + let mut engine = sim.storages[&self.context.get_peer().get_store_id()].clone(); let snap_ctx = SnapContext { pb_ctx: &self.context, ..Default::default() @@ -382,7 +382,7 @@ impl TestSuite { let mut total_bytes = 0; let sim = self.cluster.sim.rl(); - let engine = sim.storages[&self.context.get_peer().get_store_id()].clone(); + let mut engine = sim.storages[&self.context.get_peer().get_store_id()].clone(); let snap_ctx = SnapContext { pb_ctx: &self.context, ..Default::default() diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 9a69c7110b4..ef0f2246b7d 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -111,7 +111,7 @@ pub trait Simulator { } fn read( - &self, + &mut self, batch_id: Option, request: RaftCmdRequest, timeout: Duration, @@ -124,7 +124,7 @@ pub trait Simulator { } fn async_read( - &self, + &mut self, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -415,7 +415,7 @@ impl Cluster { request: RaftCmdRequest, timeout: Duration, ) -> Result { - match self.sim.rl().read(batch_id, request.clone(), timeout) { + match self.sim.wl().read(batch_id, request.clone(), timeout) { Err(e) => { warn!("failed to read {:?}: {:?}", request, e); Err(e) @@ -439,7 +439,7 @@ impl Cluster { } } let ret = if is_read { - self.sim.rl().read(None, request.clone(), timeout) + self.sim.wl().read(None, request.clone(), timeout) } else { self.sim.rl().call_command(request.clone(), timeout) }; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index f604ce7dff7..d6aa1eaefc8 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -444,7 +444,7 @@ impl Simulator for NodeCluster { } fn async_read( - &self, + &mut self, node_id: u64, batch_id: Option, request: RaftCmdRequest, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 859477ee5b0..a3a9455fb20 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -708,13 +708,13 @@ impl Simulator for ServerCluster { } fn async_read( - &self, + &mut self, node_id: u64, batch_id: Option, request: RaftCmdRequest, cb: Callback, ) { - match self.metas.get(&node_id) { + match self.metas.get_mut(&node_id) { None => { let e: RaftError = box_err!("missing sender for store {}", node_id); let mut resp = RaftCmdResponse::default(); @@ -781,7 +781,7 @@ impl Cluster { ctx.set_peer(leader); ctx.set_region_epoch(epoch); - let storage = self.sim.rl().storages.get(&store_id).unwrap().clone(); + let mut storage = self.sim.rl().storages.get(&store_id).unwrap().clone(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 0aa778d01b0..00c12073511 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -251,7 +251,7 @@ impl> RaftStoreRouter for SimulateT impl> LocalReadRouter for SimulateTransport { fn read( - &self, + &mut self, read_id: Option, req: RaftCmdRequest, cb: Callback, @@ -259,7 +259,7 @@ impl> LocalReadRouter for SimulateT self.ch.read(read_id, req, cb) } - fn release_snapshot_cache(&self) { + fn release_snapshot_cache(&mut self) { self.ch.release_snapshot_cache() } } diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index b75c5d6851a..473b993bf39 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -103,7 +103,7 @@ impl Engine for BTreeEngine { /// warning: It returns a fake snapshot whose content will be affected by /// the later modifies! fn async_snapshot( - &self, + &mut self, _ctx: SnapContext<'_>, cb: EngineCallback, ) -> EngineResult<()> { @@ -307,25 +307,25 @@ pub mod tests { #[test] fn test_btree_engine() { - let engine = BTreeEngine::new(TEST_ENGINE_CFS); - test_base_curd_options(&engine) + let mut engine = BTreeEngine::new(TEST_ENGINE_CFS); + test_base_curd_options(&mut engine) } #[test] fn test_linear_of_btree_engine() { - let engine = BTreeEngine::default(); - test_linear(&engine); + let mut engine = BTreeEngine::default(); + test_linear(&mut engine); } #[test] fn test_statistic_of_btree_engine() { - let engine = BTreeEngine::default(); - test_cfs_statistics(&engine); + let mut engine = BTreeEngine::default(); + test_cfs_statistics(&mut engine); } #[test] fn test_bounds_of_btree_engine() { - let engine = BTreeEngine::default(); + let mut engine = BTreeEngine::default(); let test_data = vec![ (b"a1".to_vec(), b"v1".to_vec()), (b"a3".to_vec(), b"v3".to_vec()), diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index eec49db506c..77f9a00efcb 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -278,7 +278,7 @@ pub trait Engine: Send + Clone + 'static { /// region_modifies records each region's modifications. fn modify_on_kv_engine(&self, region_modifies: HashMap>) -> Result<()>; - fn async_snapshot(&self, ctx: SnapContext<'_>, cb: Callback) -> Result<()>; + fn async_snapshot(&mut self, ctx: SnapContext<'_>, cb: Callback) -> Result<()>; /// Precheck request which has write with it's context. fn precheck_write_with_ctx(&self, _ctx: &Context) -> Result<()> { @@ -308,9 +308,9 @@ pub trait Engine: Send + Clone + 'static { .unwrap_or_else(|| Err(Error::from(ErrorInner::Timeout(timeout)))) } - fn release_snapshot(&self) {} + fn release_snapshot(&mut self) {} - fn snapshot(&self, ctx: SnapContext<'_>) -> Result { + fn snapshot(&mut self, ctx: SnapContext<'_>) -> Result { let timeout = Duration::from_secs(DEFAULT_TIMEOUT_SECS); wait_op!(|cb| self.async_snapshot(ctx, cb), timeout) .unwrap_or_else(|| Err(Error::from(ErrorInner::Timeout(timeout)))) @@ -538,10 +538,10 @@ thread_local! { /// Precondition: `TLS_ENGINE_ANY` is non-null. pub unsafe fn with_tls_engine(f: F) -> R where - F: FnOnce(&E) -> R, + F: FnOnce(&mut E) -> R, { TLS_ENGINE_ANY.with(|e| { - let engine = &*(*e.get() as *const E); + let engine = &mut *(*e.get() as *mut E); f(engine) }) } @@ -583,7 +583,7 @@ pub unsafe fn destroy_tls_engine() { /// Get a snapshot of `engine`. pub fn snapshot( - engine: &E, + engine: &mut E, ctx: SnapContext<'_>, ) -> impl std::future::Future> { let begin = Instant::now(); @@ -697,12 +697,12 @@ pub mod tests { .unwrap(); } - pub fn assert_has(engine: &E, key: &[u8], value: &[u8]) { + pub fn assert_has(engine: &mut E, key: &[u8], value: &[u8]) { let snapshot = engine.snapshot(Default::default()).unwrap(); assert_eq!(snapshot.get(&Key::from_raw(key)).unwrap().unwrap(), value); } - pub fn assert_has_cf(engine: &E, cf: CfName, key: &[u8], value: &[u8]) { + pub fn assert_has_cf(engine: &mut E, cf: CfName, key: &[u8], value: &[u8]) { let snapshot = engine.snapshot(Default::default()).unwrap(); assert_eq!( snapshot.get_cf(cf, &Key::from_raw(key)).unwrap().unwrap(), @@ -710,17 +710,17 @@ pub mod tests { ); } - pub fn assert_none(engine: &E, key: &[u8]) { + pub fn assert_none(engine: &mut E, key: &[u8]) { let snapshot = engine.snapshot(Default::default()).unwrap(); assert_eq!(snapshot.get(&Key::from_raw(key)).unwrap(), None); } - pub fn assert_none_cf(engine: &E, cf: CfName, key: &[u8]) { + pub fn assert_none_cf(engine: &mut E, cf: CfName, key: &[u8]) { let snapshot = engine.snapshot(Default::default()).unwrap(); assert_eq!(snapshot.get_cf(cf, &Key::from_raw(key)).unwrap(), None); } - fn assert_seek(engine: &E, key: &[u8], pair: (&[u8], &[u8])) { + fn assert_seek(engine: &mut E, key: &[u8], pair: (&[u8], &[u8])) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), @@ -733,7 +733,7 @@ pub mod tests { assert_eq!(cursor.value(&mut statistics), pair.1); } - fn assert_reverse_seek(engine: &E, key: &[u8], pair: (&[u8], &[u8])) { + fn assert_reverse_seek(engine: &mut E, key: &[u8], pair: (&[u8], &[u8])) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), @@ -778,7 +778,7 @@ pub mod tests { assert_eq!(cursor.value(&mut statistics), pair.1); } - pub fn test_base_curd_options(engine: &E) { + pub fn test_base_curd_options(engine: &mut E) { test_get_put(engine); test_batch(engine); test_empty_seek(engine); @@ -788,7 +788,7 @@ pub mod tests { test_empty_write(engine); } - fn test_get_put(engine: &E) { + fn test_get_put(engine: &mut E) { assert_none(engine, b"x"); must_put(engine, b"x", b"1"); assert_has(engine, b"x", b"1"); @@ -796,7 +796,7 @@ pub mod tests { assert_has(engine, b"x", b"2"); } - fn test_batch(engine: &E) { + fn test_batch(engine: &mut E) { engine .write( &Context::default(), @@ -822,7 +822,7 @@ pub mod tests { assert_none(engine, b"y"); } - fn test_seek(engine: &E) { + fn test_seek(engine: &mut E) { must_put(engine, b"x", b"1"); assert_seek(engine, b"x", (b"x", b"1")); assert_seek(engine, b"a", (b"x", b"1")); @@ -853,7 +853,7 @@ pub mod tests { must_delete(engine, b"z"); } - fn test_near_seek(engine: &E) { + fn test_near_seek(engine: &mut E) { must_put(engine, b"x", b"1"); must_put(engine, b"z", b"2"); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -897,7 +897,7 @@ pub mod tests { } } - fn test_empty_seek(engine: &E) { + fn test_empty_seek(engine: &mut E) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut cursor = Cursor::new( snapshot.iter(CF_DEFAULT, IterOptions::default()).unwrap(), @@ -1042,7 +1042,7 @@ pub mod tests { } } - pub fn test_linear(engine: &E) { + pub fn test_linear(engine: &mut E) { for i in 50..50 + SEEK_BOUND * 10 { let key = format!("key_{}", i * 2); let value = format!("value_{}", i); @@ -1090,7 +1090,7 @@ pub mod tests { } } - fn test_cf(engine: &E) { + fn test_cf(engine: &mut E) { assert_none_cf(engine, "cf", b"key"); must_put_cf(engine, "cf", b"key", b"value"); assert_has_cf(engine, "cf", b"key", b"value"); @@ -1104,7 +1104,7 @@ pub mod tests { .unwrap_err(); } - pub fn test_cfs_statistics(engine: &E) { + pub fn test_cfs_statistics(engine: &mut E) { must_put(engine, b"foo", b"bar1"); must_put(engine, b"foo2", b"bar2"); must_put(engine, b"foo3", b"bar3"); // deleted diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index 3f9d0e1a098..84605a04084 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -157,7 +157,7 @@ impl Engine for MockEngine { self.base.modify_on_kv_engine(region_modifies) } - fn async_snapshot(&self, ctx: SnapContext<'_>, cb: Callback) -> Result<()> { + fn async_snapshot(&mut self, ctx: SnapContext<'_>, cb: Callback) -> Result<()> { self.base.async_snapshot(ctx, cb) } diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index a1e98326fe2..0ef9b5b274c 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -253,7 +253,7 @@ impl Engine for RocksEngine { Ok(()) } - fn async_snapshot(&self, _: SnapContext<'_>, cb: Callback) -> Result<()> { + fn async_snapshot(&mut self, _: SnapContext<'_>, cb: Callback) -> Result<()> { fail_point!("rockskv_async_snapshot", |_| Err(box_err!( "snapshot failed" ))); diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 5f1027e738a..1b7d42a8575 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -350,7 +350,7 @@ impl Endpoint { #[inline] fn async_snapshot( - engine: &E, + engine: &mut E, ctx: &ReqContext, ) -> impl std::future::Future> { let mut snap_ctx = SnapContext { diff --git a/src/import/duplicate_detect.rs b/src/import/duplicate_detect.rs index c5429315938..dbd819efbbf 100644 --- a/src/import/duplicate_detect.rs +++ b/src/import/duplicate_detect.rs @@ -350,7 +350,7 @@ mod tests { #[test] fn test_duplicate_detect() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) .build() .unwrap(); let mut data = vec![]; @@ -408,7 +408,7 @@ mod tests { // (108,18) is not repeated with (108,10). #[test] fn test_duplicate_detect_incremental() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) .build() .unwrap(); for &start in &[100, 104, 108, 112] { @@ -469,7 +469,7 @@ mod tests { #[test] fn test_duplicate_detect_rollback_and_delete() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) .build() .unwrap(); let data = vec![ diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index eef7739f979..bd5896296bb 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -750,7 +750,7 @@ pub mod test_utils { use crate::storage::kv::RocksEngine as StorageRocksEngine; /// Do a global GC with the given safe point. - pub fn gc_by_compact(engine: &StorageRocksEngine, _: &[u8], safe_point: u64) { + pub fn gc_by_compact(engine: &mut StorageRocksEngine, _: &[u8], safe_point: u64) { let engine = engine.get_rocksdb(); // Put a new key-value pair to ensure compaction can be triggered correctly. engine.delete_cf("write", b"znot-exists-key").unwrap(); @@ -942,31 +942,31 @@ pub mod tests { // Test compaction filter won't break basic GC rules. #[test] fn test_compaction_filter_basic() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let raw_engine = engine.get_rocksdb(); let value = vec![b'v'; 512]; let mut gc_runner = TestGcRunner::new(0); // GC can't delete keys after the given safe point. - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); gc_runner.safe_point(50).gc(&raw_engine); - must_get(&engine, b"zkey", 110, &value); + must_get(&mut engine, b"zkey", 110, &value); // GC can't delete keys before the safe ponit if they are latest versions. gc_runner.safe_point(200).gc(&raw_engine); - must_get(&engine, b"zkey", 110, &value); + must_get(&mut engine, b"zkey", 110, &value); - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 120); - must_commit(&engine, b"zkey", 120, 130); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 120); + must_commit(&mut engine, b"zkey", 120, 130); // GC can't delete the latest version before the safe ponit. gc_runner.safe_point(115).gc(&raw_engine); - must_get(&engine, b"zkey", 110, &value); + must_get(&mut engine, b"zkey", 110, &value); // GC a version will also delete the key on default CF. gc_runner.safe_point(200).gc(&raw_engine); - must_get_none(&engine, b"zkey", 110); + must_get_none(&mut engine, b"zkey", 110); let default_key = Key::from_encoded_slice(b"zkey").append_ts(100.into()); let default_key = default_key.into_encoded(); assert!(raw_engine.get_value(&default_key).unwrap().is_none()); @@ -976,7 +976,7 @@ pub mod tests { #[test] fn test_compaction_filter_handle_deleting() { let value = vec![b'v'; 512]; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let raw_engine = engine.get_rocksdb(); let mut gc_runner = TestGcRunner::new(0); @@ -1001,10 +1001,10 @@ pub mod tests { }; // No key switch after the deletion mark. - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); - must_prewrite_delete(&engine, b"zkey", b"zkey", 120); - must_commit(&engine, b"zkey", 120, 130); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 120); + must_commit(&mut engine, b"zkey", 120, 130); // No GC task should be emit because the mvcc-deletion mark covers some older // versions. @@ -1022,12 +1022,12 @@ pub mod tests { .unwrap(); // Key switch after the deletion mark. - must_prewrite_put(&engine, b"zkey1", &value, b"zkey1", 200); - must_commit(&engine, b"zkey1", 200, 210); - must_prewrite_delete(&engine, b"zkey1", b"zkey1", 220); - must_commit(&engine, b"zkey1", 220, 230); - must_prewrite_put(&engine, b"zkey2", &value, b"zkey2", 220); - must_commit(&engine, b"zkey2", 220, 230); + must_prewrite_put(&mut engine, b"zkey1", &value, b"zkey1", 200); + must_commit(&mut engine, b"zkey1", 200, 210); + must_prewrite_delete(&mut engine, b"zkey1", b"zkey1", 220); + must_commit(&mut engine, b"zkey1", 220, 230); + must_prewrite_put(&mut engine, b"zkey2", &value, b"zkey2", 220); + must_commit(&mut engine, b"zkey2", 220, 230); // No GC task should be emit because the mvcc-deletion mark covers some older // versions. @@ -1045,17 +1045,17 @@ pub mod tests { cfg.writecf.dynamic_level_bytes = false; let dir = tempfile::TempDir::new().unwrap(); let builder = TestEngineBuilder::new().path(dir.path()); - let engine = builder.build_with_cfg(&cfg).unwrap(); + let mut engine = builder.build_with_cfg(&cfg).unwrap(); let raw_engine = engine.get_rocksdb(); let value = vec![b'v'; 512]; let mut gc_runner = TestGcRunner::new(0); for start_ts in &[100, 110, 120, 130] { - must_prewrite_put(&engine, b"zkey", &value, b"zkey", *start_ts); - must_commit(&engine, b"zkey", *start_ts, *start_ts + 5); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", *start_ts); + must_commit(&mut engine, b"zkey", *start_ts, *start_ts + 5); } - must_prewrite_delete(&engine, b"zkey", b"zkey", 140); - must_commit(&engine, b"zkey", 140, 145); + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 140); + must_commit(&mut engine, b"zkey", 140, 145); // Can't perform GC because the min timestamp is greater than safe point. gc_runner @@ -1072,18 +1072,18 @@ pub mod tests { gc_runner.target_level = Some(6); gc_runner.safe_point(140).gc(&raw_engine); for commit_ts in &[105, 115, 125] { - must_get_none(&engine, b"zkey", commit_ts); + must_get_none(&mut engine, b"zkey", commit_ts); } // Put an extra key to make the memtable overlap with the bottommost one. - must_prewrite_put(&engine, b"zkey1", &value, b"zkey1", 200); - must_commit(&engine, b"zkey1", 200, 205); + must_prewrite_put(&mut engine, b"zkey1", &value, b"zkey1", 200); + must_commit(&mut engine, b"zkey1", 200, 205); for start_ts in &[200, 210, 220, 230] { - must_prewrite_put(&engine, b"zkey", &value, b"zkey", *start_ts); - must_commit(&engine, b"zkey", *start_ts, *start_ts + 5); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", *start_ts); + must_commit(&mut engine, b"zkey", *start_ts, *start_ts + 5); } - must_prewrite_delete(&engine, b"zkey", b"zkey", 240); - must_commit(&engine, b"zkey", 240, 245); + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 240); + must_commit(&mut engine, b"zkey", 240, 245); raw_engine.flush_cf(CF_WRITE, true).unwrap(); // At internal levels can't perform GC because the threshold is not reached. @@ -1096,7 +1096,7 @@ pub mod tests { .safe_point(300) .gc_on_files(&raw_engine, files, CF_WRITE); for commit_ts in &[205, 215, 225, 235] { - must_get(&engine, b"zkey", commit_ts, &value); + must_get(&mut engine, b"zkey", commit_ts, &value); } } @@ -1114,14 +1114,14 @@ pub mod tests { let dir = tempfile::TempDir::new().unwrap(); let builder = TestEngineBuilder::new().path(dir.path()); - let engine = builder.build_with_cfg(&cfg).unwrap(); + let mut engine = builder.build_with_cfg(&cfg).unwrap(); let raw_engine = engine.get_rocksdb(); let mut gc_runner = TestGcRunner::new(0); // So the construction of SST files will be: // L6: |key_110| - must_prewrite_put(&engine, b"zkey", b"zvalue", b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); + must_prewrite_put(&mut engine, b"zkey", b"zvalue", b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); gc_runner.target_level = Some(6); gc_runner.safe_point(50).gc(&raw_engine); assert_eq!(rocksdb_level_file_counts(&raw_engine, CF_WRITE)[6], 1); @@ -1129,8 +1129,8 @@ pub mod tests { // So the construction of SST files will be: // L0: |key_130, key_110| // L6: |key_110| - must_prewrite_delete(&engine, b"zkey", b"zkey", 120); - must_commit(&engine, b"zkey", 120, 130); + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 120); + must_commit(&mut engine, b"zkey", 120, 130); let k_110 = Key::from_raw(b"zkey").append_ts(110.into()).into_encoded(); raw_engine.delete_cf(CF_WRITE, &k_110).unwrap(); raw_engine.flush_cf(CF_WRITE, true).unwrap(); @@ -1147,11 +1147,11 @@ pub mod tests { .gc_on_files(&raw_engine, files, CF_WRITE); assert_eq!(rocksdb_level_file_counts(&raw_engine, CF_WRITE)[5], 1); assert_eq!(rocksdb_level_file_counts(&raw_engine, CF_WRITE)[6], 1); - must_get_none(&engine, b"zkey", 200); + must_get_none(&mut engine, b"zkey", 200); // Compact the mvcc deletion mark to L6, the stale version shouldn't be exposed. gc_runner.target_level = Some(6); gc_runner.safe_point(200).gc(&raw_engine); - must_get_none(&engine, b"zkey", 200); + must_get_none(&mut engine, b"zkey", 200); } } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 7b03d0fb5e8..82496068b99 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -530,7 +530,7 @@ where } fn create_reader( - &self, + &mut self, key_count: usize, region: &Region, range_start_key: Key, @@ -871,16 +871,17 @@ where } fn handle_physical_scan_lock( - &self, + &mut self, _: &Context, max_ts: TimeStamp, start_key: &Key, limit: usize, regions_provider: Arc, ) -> Result> { + let store_id = self.store_id; let regions = box_try!(regions_provider.get_regions_in_range(start_key.as_encoded(), &[])) .into_iter() - .filter(move |r| find_peer(r, self.store_id).is_some()); + .filter(move |r| find_peer(r, store_id).is_some()); let mut first_round = true; let mut locks = Vec::new(); @@ -893,7 +894,7 @@ where Key::from_raw(region.get_start_key()) } }; - let snap = self.get_snapshot(self.store_id, ®ion)?; + let snap = self.get_snapshot(store_id, ®ion)?; let mut reader = MvccReader::new(snap, Some(ScanMode::Forward), false); let (locks_this_region, _) = reader .scan_locks(Some(&start_key), None, |l| l.ts <= max_ts, limit) @@ -937,7 +938,7 @@ where } } - fn get_snapshot(&self, store_id: u64, region: &Region) -> Result<::Snap> { + fn get_snapshot(&mut self, store_id: u64, region: &Region) -> Result<::Snap> { let ctx = init_snap_ctx(store_id, region); let snap_ctx = SnapContext { pb_ctx: &ctx, @@ -945,7 +946,7 @@ where }; Ok(block_on(async { - tikv_kv::snapshot(&self.engine, snap_ctx).await + tikv_kv::snapshot(&mut self.engine, snap_ctx).await })?) } } @@ -1543,7 +1544,7 @@ pub mod test_gc_worker { } fn async_snapshot( - &self, + &mut self, ctx: SnapContext<'_>, callback: EngineCallback, ) -> EngineResult<()> { @@ -1659,7 +1660,7 @@ pub mod test_gc_worker { } fn async_snapshot( - &self, + &mut self, ctx: SnapContext<'_>, callback: EngineCallback, ) -> EngineResult<()> { @@ -2099,7 +2100,7 @@ mod tests { fn test_gc_keys_with_region_info_provider() { let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); - let prefixed_engine = PrefixedEngine(engine.clone()); + let mut prefixed_engine = PrefixedEngine(engine.clone()); let (tx, _rx) = mpsc::channel(); let feature_gate = FeatureGate::default(); @@ -2153,10 +2154,10 @@ mod tests { for i in 0..100 { let k = format!("k{:02}", i).into_bytes(); - must_prewrite_put(&prefixed_engine, &k, b"value", &k, 101); - must_commit(&prefixed_engine, &k, 101, 102); - must_prewrite_delete(&prefixed_engine, &k, &k, 151); - must_commit(&prefixed_engine, &k, 151, 152); + must_prewrite_put(&mut prefixed_engine, &k, b"value", &k, 101); + must_commit(&mut prefixed_engine, &k, 101, 102); + must_prewrite_delete(&mut prefixed_engine, &k, &k, 151); + must_commit(&mut prefixed_engine, &k, 151, 152); } db.flush_cf(cf, true).unwrap(); @@ -2165,7 +2166,7 @@ mod tests { let k = format!("k{:02}", i).into_bytes(); // Stale MVCC-PUTs will be cleaned in write CF's compaction filter. - must_get_none(&prefixed_engine, &k, 150); + must_get_none(&mut prefixed_engine, &k, 150); // However, MVCC-DELETIONs will be kept. let mut raw_k = vec![b'z']; @@ -2196,7 +2197,7 @@ mod tests { fn test_gc_keys_statistics() { let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); - let prefixed_engine = PrefixedEngine(engine.clone()); + let mut prefixed_engine = PrefixedEngine(engine.clone()); let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); @@ -2228,10 +2229,10 @@ mod tests { let mut keys = vec![]; for i in 0..100 { let k = format!("k{:02}", i).into_bytes(); - must_prewrite_put(&prefixed_engine, &k, b"value", &k, 101); - must_commit(&prefixed_engine, &k, 101, 102); - must_prewrite_delete(&prefixed_engine, &k, &k, 151); - must_commit(&prefixed_engine, &k, 151, 152); + must_prewrite_put(&mut prefixed_engine, &k, b"value", &k, 101); + must_commit(&mut prefixed_engine, &k, 101, 102); + must_prewrite_delete(&mut prefixed_engine, &k, &k, 151); + must_commit(&mut prefixed_engine, &k, 151, 152); keys.push(Key::from_raw(&k)); } db.flush_cf(cf, true).unwrap(); @@ -2259,7 +2260,7 @@ mod tests { let dir = tempfile::TempDir::new().unwrap(); let builder = TestEngineBuilder::new().path(dir.path()); let engine = builder.build_with_cfg(&cfg).unwrap(); - let prefixed_engine = PrefixedEngine(engine); + let mut prefixed_engine = PrefixedEngine(engine); let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); @@ -2343,9 +2344,10 @@ mod tests { assert_eq!(7, runner.mut_stats(GcKeyMode::raw).data.next); assert_eq!(2, runner.mut_stats(GcKeyMode::raw).data.seek); - let snapshot = - block_on(async { tikv_kv::snapshot(&prefixed_engine, SnapContext::default()).await }) - .unwrap(); + let snapshot = block_on(async { + tikv_kv::snapshot(&mut prefixed_engine, SnapContext::default()).await + }) + .unwrap(); test_raws .clone() @@ -2360,7 +2362,7 @@ mod tests { #[test] fn test_gc_keys_scan_range_limit() { let engine = TestEngineBuilder::new().build().unwrap(); - let prefixed_engine = PrefixedEngine(engine.clone()); + let mut prefixed_engine = PrefixedEngine(engine.clone()); let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); @@ -2391,10 +2393,10 @@ mod tests { let cf = get_cf_handle(&db, CF_WRITE).unwrap(); // Generate some tombstone for i in 10u64..30 { - must_rollback(&prefixed_engine, b"k2\x00", i, true); + must_rollback(&mut prefixed_engine, b"k2\x00", i, true); } db.flush_cf(cf, true).unwrap(); - must_gc(&prefixed_engine, b"k2\x00", 30); + must_gc(&mut prefixed_engine, b"k2\x00", 30); // Test tombstone counter works assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek_tombstone, 0); @@ -2449,8 +2451,8 @@ mod tests { let versions = (MAX_TXN_WRITE_SIZE - 1) / key_size + 4; for start_ts in (1..versions).map(|x| x as u64 * 2) { let commit_ts = start_ts + 1; - must_prewrite_put(&prefixed_engine, b"k2", b"v2", b"k2", start_ts); - must_commit(&prefixed_engine, b"k2", start_ts, commit_ts); + must_prewrite_put(&mut prefixed_engine, b"k2", b"v2", b"k2", start_ts); + must_commit(&mut prefixed_engine, b"k2", start_ts, commit_ts); } db.flush_cf(cf, true).unwrap(); let safepoint = versions as u64 * 2; @@ -2480,9 +2482,9 @@ mod tests { #[test] fn delete_range_when_worker_is_full() { let store_id = 1; - let engine = PrefixedEngine(TestEngineBuilder::new().build().unwrap()); - must_prewrite_put(&engine, b"key", b"value", b"key", 10); - must_commit(&engine, b"key", 10, 20); + let mut engine = PrefixedEngine(TestEngineBuilder::new().build().unwrap()); + must_prewrite_put(&mut engine, b"key", b"value", b"key", 10); + must_commit(&mut engine, b"key", 10, 20); let db = engine.kv_engine().unwrap().as_inner().clone(); let cf = get_cf_handle(&db, CF_WRITE).unwrap(); db.flush_cf(cf, true).unwrap(); @@ -2546,7 +2548,7 @@ mod tests { // After the worker starts running, the destroy range task should run, // and the key in the range will be deleted. rx.recv_timeout(Duration::from_secs(10)).unwrap().unwrap(); - must_get_none(&engine, b"key", 30); + must_get_none(&mut engine, b"key", 30); } #[test] @@ -2655,7 +2657,7 @@ mod tests { region_info.insert(1, r1.clone()); region_info.insert(2, r2.clone()); region_info.insert(3, r3.clone()); - let engine = MultiRocksEngine { + let mut engine = MultiRocksEngine { factory: factory.clone(), region_info, }; @@ -2689,11 +2691,17 @@ mod tests { } let k = format!("k{:02}", i).into_bytes(); let v = format!("value-{:02}", i).into_bytes(); - must_prewrite_put_on_region(&engine, region_id, &k, &v, &k, put_start_ts); - must_commit_on_region(&engine, region_id, &k, put_start_ts, put_start_ts + 1); + must_prewrite_put_on_region(&mut engine, region_id, &k, &v, &k, put_start_ts); + must_commit_on_region(&mut engine, region_id, &k, put_start_ts, put_start_ts + 1); if need_deletion { - must_prewrite_delete_on_region(&engine, region_id, &k, &k, delete_start_ts); - must_commit_on_region(&engine, region_id, &k, delete_start_ts, delete_start_ts + 1); + must_prewrite_delete_on_region(&mut engine, region_id, &k, &k, delete_start_ts); + must_commit_on_region( + &mut engine, + region_id, + &k, + delete_start_ts, + delete_start_ts + 1, + ); } } @@ -2713,7 +2721,7 @@ mod tests { let put_start_ts = 100; let delete_start_ts = 150; - let (factory, engine, _ri_provider, mut gc_runner, regions, _) = + let (factory, mut engine, _ri_provider, mut gc_runner, regions, _) = multi_gc_engine_setup(store_id, put_start_ts, delete_start_ts, true); gc_runner.gc(regions[0].clone(), 200.into()).unwrap(); @@ -2731,7 +2739,7 @@ mod tests { let k = format!("k{:02}", i).into_bytes(); // Stale MVCC-PUTs will be cleaned in write CF's compaction filter. - must_get_none_on_region(&engine, region_id, &k, delete_start_ts - 1); + must_get_none_on_region(&mut engine, region_id, &k, delete_start_ts - 1); // MVCC-DELETIONs is cleaned let mut raw_k = vec![b'z']; @@ -2748,7 +2756,7 @@ mod tests { let put_start_ts = 100; let delete_start_ts = 150; - let (factory, engine, ri_provider, mut gc_runner, ..) = + let (factory, mut engine, ri_provider, mut gc_runner, ..) = multi_gc_engine_setup(store_id, put_start_ts, delete_start_ts, true); let mut keys = Vec::new(); @@ -2782,10 +2790,10 @@ mod tests { if i % 2 == 0 { assert!(db.get_cf(cf, &raw_k).unwrap().is_some()); - must_get_on_region(&engine, region_id, &k, delete_start_ts - 1, &val); + must_get_on_region(&mut engine, region_id, &k, delete_start_ts - 1, &val); } else { assert!(db.get_cf(cf, &raw_k).unwrap().is_none()); - must_get_none_on_region(&engine, region_id, &k, delete_start_ts - 1); + must_get_none_on_region(&mut engine, region_id, &k, delete_start_ts - 1); } } } @@ -2817,7 +2825,7 @@ mod tests { let mut region_info = HashMap::default(); region_info.insert(1, r1.clone()); region_info.insert(2, r2.clone()); - let engine = MultiRocksEngine { + let mut engine = MultiRocksEngine { factory, region_info, }; @@ -2915,7 +2923,8 @@ mod tests { pb_ctx: &ctx, ..Default::default() }; - let snapshot = block_on(async { tikv_kv::snapshot(&engine, snap_ctx).await }).unwrap(); + let snapshot = + block_on(async { tikv_kv::snapshot(&mut engine, snap_ctx).await }).unwrap(); test_raws_region .clone() @@ -2938,7 +2947,7 @@ mod tests { ) { let store_id = 1; let put_start_ts = 100; - let (factory, engine, ri_provider, gc_runner, _, _rx) = + let (factory, mut engine, ri_provider, gc_runner, _, _rx) = multi_gc_engine_setup(store_id, put_start_ts, 0, false); let start_key = Key::from_raw(start_key); @@ -2970,10 +2979,10 @@ mod tests { if start_key <= key && key < end_key { regions.insert(region_id); assert!(db.get_cf(cf, &raw_k).unwrap().is_none()); - must_get_none_on_region(&engine, region_id, &k, put_start_ts + 10); + must_get_none_on_region(&mut engine, region_id, &k, put_start_ts + 10); } else { assert!(db.get_cf(cf, &raw_k).unwrap().is_some()); - must_get_on_region(&engine, region_id, &k, put_start_ts + 10, &val); + must_get_on_region(&mut engine, region_id, &k, put_start_ts + 10, &val); } } } diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index 085e8381943..eaa13995650 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -195,7 +195,7 @@ where } fn exec_snapshot( - &self, + &mut self, ctx: SnapContext<'_>, req: Request, cb: Callback>, @@ -428,7 +428,11 @@ where }) } - fn async_snapshot(&self, mut ctx: SnapContext<'_>, cb: Callback) -> kv::Result<()> { + fn async_snapshot( + &mut self, + mut ctx: SnapContext<'_>, + cb: Callback, + ) -> kv::Result<()> { fail_point!("raftkv_async_snapshot_err", |_| Err(box_err!( "injected error for async_snapshot" ))); @@ -481,7 +485,7 @@ where }) } - fn release_snapshot(&self) { + fn release_snapshot(&mut self) { self.router.release_snapshot_cache(); } diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index d42b29cfe47..f0192372e4b 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -138,29 +138,29 @@ mod tests { #[test] fn test_rocksdb() { - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .cfs(TEST_ENGINE_CFS) .build() .unwrap(); - test_base_curd_options(&engine) + test_base_curd_options(&mut engine) } #[test] fn test_rocksdb_linear() { - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .cfs(TEST_ENGINE_CFS) .build() .unwrap(); - test_linear(&engine); + test_linear(&mut engine); } #[test] fn test_rocksdb_statistic() { - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .cfs(TEST_ENGINE_CFS) .build() .unwrap(); - test_cfs_statistics(&engine); + test_cfs_statistics(&mut engine); } #[test] @@ -178,27 +178,27 @@ mod tests { must_put_cf(&engine, "cf", b"k", b"v1"); } { - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .path(dir.path()) .cfs(TEST_ENGINE_CFS) .build() .unwrap(); - assert_has_cf(&engine, "cf", b"k", b"v1"); + assert_has_cf(&mut engine, "cf", b"k", b"v1"); } } #[test] fn test_rocksdb_perf_statistics() { - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .cfs(TEST_ENGINE_CFS) .build() .unwrap(); - test_perf_statistics(&engine); + test_perf_statistics(&mut engine); } #[test] fn test_max_skippable_internal_keys_error() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); must_put(&engine, b"foo", b"bar"); must_delete(&engine, b"foo"); must_put(&engine, b"foo1", b"bar1"); @@ -224,7 +224,7 @@ mod tests { ); } - fn test_perf_statistics(engine: &E) { + fn test_perf_statistics(engine: &mut E) { must_put(engine, b"foo", b"bar1"); must_put(engine, b"foo2", b"bar2"); must_put(engine, b"foo3", b"bar3"); // deleted @@ -268,7 +268,7 @@ mod tests { #[test] fn test_prefix_seek_skip_tombstone() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); engine .put_cf( &Context::default(), diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 9a5f37011aa..3dd5fc2e10a 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -347,7 +347,7 @@ where tls_cell.with(|c| { let mut c = c.borrow_mut(); let perf_context = c.get_or_insert_with(|| { - with_tls_engine(|engine: &E| { + with_tls_engine(|engine: &mut E| { Box::new(engine.kv_engine().unwrap().get_perf_context( PerfLevel::Uninitialized, PerfContextKind::Storage(cmd.get_str()), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index e9d1f06e524..abdfcd333ac 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -315,7 +315,7 @@ impl Storage { /// Get a snapshot of `engine`. fn snapshot( - engine: &E, + engine: &mut E, ctx: SnapContext<'_>, ) -> impl std::future::Future> { kv::snapshot(engine, ctx) @@ -324,11 +324,11 @@ impl Storage { } #[cfg(test)] - pub fn get_snapshot(&self) -> E::Snap { + pub fn get_snapshot(&mut self) -> E::Snap { self.engine.snapshot(Default::default()).unwrap() } - pub fn release_snapshot(&self) { + pub fn release_snapshot(&mut self) { self.engine.release_snapshot(); } @@ -349,7 +349,7 @@ impl Storage { } #[inline] - fn with_tls_engine(f: impl FnOnce(&E) -> R) -> R { + fn with_tls_engine(f: impl FnOnce(&mut E) -> R) -> R { // Safety: the read pools ensure that a TLS engine exists. unsafe { with_tls_engine(f) } } @@ -2971,7 +2971,7 @@ impl Engine for TxnTestEngine { } fn async_snapshot( - &self, + &mut self, ctx: SnapContext<'_>, cb: tikv_kv::Callback, ) -> tikv_kv::Result<()> { @@ -3444,7 +3444,7 @@ mod tests { #[test] fn test_prewrite_blocks_read() { use kvproto::kvrpcpb::ExtraOp; - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) .build() .unwrap(); @@ -8682,7 +8682,7 @@ mod tests { // they should not have overlapped ts, which is an expected property. #[test] fn test_overlapped_ts_rollback_before_prewrite() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine.clone(), DummyLockManager) .build() @@ -8787,8 +8787,8 @@ mod tests { .unwrap(); rx.recv().unwrap(); - must_unlocked(&engine, k2); - must_written(&engine, k2, 10, 10, WriteType::Rollback); + must_unlocked(&mut engine, k2); + must_written(&mut engine, k2, 10, 10, WriteType::Rollback); // T1 prewrites, start_ts = 1, for_update_ts = 3 storage @@ -9003,7 +9003,7 @@ mod tests { #[test] fn test_resolve_commit_pessimistic_locks() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) .build() .unwrap(); let (tx, rx) = channel(); @@ -9083,7 +9083,7 @@ mod tests { // Pessimistically rollback the k2 lock. // Non lite lock resolve on k1 and k2, there should no errors as lock on k2 is // pessimistic type. - must_rollback(&storage.engine, b"k2", 10, false); + must_rollback(&mut storage.engine, b"k2", 10, false); let mut temp_map = HashMap::default(); temp_map.insert(10.into(), 20.into()); storage @@ -9169,7 +9169,7 @@ mod tests { // Unlock the k6 first. // Non lite lock resolve on k5 and k6, error should be reported. - must_rollback(&storage.engine, b"k6", 10, true); + must_rollback(&mut storage.engine, b"k6", 10, true); storage .sched_txn_command( commands::ResolveLock::new( diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index fba4f207054..487ae61d5e8 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -448,18 +448,18 @@ mod tests { #[test] fn test_mvcc_checksum() { - let engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, b"zAAAAA", b"value", b"PRIMARY", 100); - must_commit(&engine, b"zAAAAA", 100, 101); - must_prewrite_put(&engine, b"zCCCCC", b"value", b"PRIMARY", 110); - must_commit(&engine, b"zCCCCC", 110, 111); - - must_prewrite_put(&engine, b"zBBBBB", b"value", b"PRIMARY", 200); - must_commit(&engine, b"zBBBBB", 200, 201); - must_prewrite_put(&engine, b"zDDDDD", b"value", b"PRIMARY", 200); - must_rollback(&engine, b"zDDDDD", 200, false); - must_prewrite_put(&engine, b"zFFFFF", b"value", b"PRIMARY", 200); - must_prewrite_delete(&engine, b"zGGGGG", b"PRIMARY", 200); + let mut engine = TestEngineBuilder::new().build().unwrap(); + must_prewrite_put(&mut engine, b"zAAAAA", b"value", b"PRIMARY", 100); + must_commit(&mut engine, b"zAAAAA", 100, 101); + must_prewrite_put(&mut engine, b"zCCCCC", b"value", b"PRIMARY", 110); + must_commit(&mut engine, b"zCCCCC", 110, 111); + + must_prewrite_put(&mut engine, b"zBBBBB", b"value", b"PRIMARY", 200); + must_commit(&mut engine, b"zBBBBB", 200, 201); + must_prewrite_put(&mut engine, b"zDDDDD", b"value", b"PRIMARY", 200); + must_rollback(&mut engine, b"zDDDDD", 200, false); + must_prewrite_put(&mut engine, b"zFFFFF", b"value", b"PRIMARY", 200); + must_prewrite_delete(&mut engine, b"zGGGGG", b"PRIMARY", 200); let mut checksums = Vec::with_capacity(3); for &safe_point in &[150, 160, 100] { diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 3e6678e760d..6191c2ad46d 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -439,12 +439,17 @@ pub mod tests { } } - pub fn must_get(engine: &E, key: &[u8], ts: impl Into, expect: &[u8]) { + pub fn must_get( + engine: &mut E, + key: &[u8], + ts: impl Into, + expect: &[u8], + ) { must_get_impl(engine, None, key, ts, expect); } pub fn must_get_on_region( - engine: &E, + engine: &mut E, region_id: u64, key: &[u8], ts: impl Into, @@ -454,7 +459,7 @@ pub mod tests { } fn must_get_impl( - engine: &E, + engine: &mut E, region_id: Option, key: &[u8], ts: impl Into, @@ -478,7 +483,7 @@ pub mod tests { } pub fn must_get_no_lock_check( - engine: &E, + engine: &mut E, key: &[u8], ts: impl Into, expect: &[u8], @@ -514,12 +519,12 @@ pub mod tests { Ok(()) } - pub fn must_get_none(engine: &E, key: &[u8], ts: impl Into) { + pub fn must_get_none(engine: &mut E, key: &[u8], ts: impl Into) { must_get_none_impl(engine, key, ts, None); } pub fn must_get_none_on_region( - engine: &E, + engine: &mut E, region_id: u64, key: &[u8], ts: impl Into, @@ -528,7 +533,7 @@ pub mod tests { } fn must_get_none_impl( - engine: &E, + engine: &mut E, key: &[u8], ts: impl Into, region_id: Option, @@ -549,7 +554,7 @@ pub mod tests { assert!(reader.get(key, ts).unwrap().is_none()); } - pub fn must_get_err(engine: &E, key: &[u8], ts: impl Into) { + pub fn must_get_err(engine: &mut E, key: &[u8], ts: impl Into) { let ts = ts.into(); let ctx = SnapContext::default(); let snapshot = engine.snapshot(ctx).unwrap(); @@ -561,7 +566,11 @@ pub mod tests { reader.get(key, ts).unwrap_err(); } - pub fn must_locked(engine: &E, key: &[u8], start_ts: impl Into) -> Lock { + pub fn must_locked( + engine: &mut E, + key: &[u8], + start_ts: impl Into, + ) -> Lock { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); let lock = reader.load_lock(&Key::from_raw(key)).unwrap().unwrap(); @@ -571,7 +580,7 @@ pub mod tests { } pub fn must_locked_with_ttl( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, ttl: u64, @@ -585,7 +594,7 @@ pub mod tests { } pub fn must_large_txn_locked( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, ttl: u64, @@ -605,14 +614,14 @@ pub mod tests { } } - pub fn must_unlocked(engine: &E, key: &[u8]) { + pub fn must_unlocked(engine: &mut E, key: &[u8]) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); assert!(reader.load_lock(&Key::from_raw(key)).unwrap().is_none()); } pub fn must_written( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -628,7 +637,7 @@ pub mod tests { } pub fn must_have_write( - engine: &E, + engine: &mut E, key: &[u8], commit_ts: impl Into, ) -> Write { @@ -639,14 +648,18 @@ pub mod tests { write.to_owned() } - pub fn must_not_have_write(engine: &E, key: &[u8], commit_ts: impl Into) { + pub fn must_not_have_write( + engine: &mut E, + key: &[u8], + commit_ts: impl Into, + ) { let snapshot = engine.snapshot(Default::default()).unwrap(); let k = Key::from_raw(key).append_ts(commit_ts.into()); let v = snapshot.get_cf(CF_WRITE, &k).unwrap(); assert!(v.is_none()); } - pub fn must_seek_write_none(engine: &E, key: &[u8], ts: impl Into) { + pub fn must_seek_write_none(engine: &mut E, key: &[u8], ts: impl Into) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); assert!( @@ -658,7 +671,7 @@ pub mod tests { } pub fn must_seek_write( - engine: &E, + engine: &mut E, key: &[u8], ts: impl Into, start_ts: impl Into, @@ -677,7 +690,7 @@ pub mod tests { } pub fn must_get_commit_ts( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -694,7 +707,7 @@ pub mod tests { } pub fn must_get_commit_ts_none( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, ) { @@ -710,7 +723,11 @@ pub mod tests { } } - pub fn must_get_rollback_ts(engine: &E, key: &[u8], start_ts: impl Into) { + pub fn must_get_rollback_ts( + engine: &mut E, + key: &[u8], + start_ts: impl Into, + ) { let start_ts = start_ts.into(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = SnapshotReader::new(start_ts, snapshot, true); @@ -725,7 +742,7 @@ pub mod tests { } pub fn must_get_rollback_ts_none( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, ) { @@ -740,7 +757,7 @@ pub mod tests { } pub fn must_get_rollback_protected( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, protected: bool, @@ -759,7 +776,7 @@ pub mod tests { } pub fn must_get_overlapped_rollback>( - engine: &E, + engine: &mut E, key: &[u8], start_ts: T, overlapped_start_ts: T, @@ -783,7 +800,7 @@ pub mod tests { } pub fn must_scan_keys( - engine: &E, + engine: &mut E, start: Option<&[u8]>, limit: usize, keys: Vec<&[u8]>, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 2758460a526..1e26d9bf21b 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -403,12 +403,12 @@ mod tests { }, }; - fn new_point_getter(engine: &E, ts: TimeStamp) -> PointGetter { + fn new_point_getter(engine: &mut E, ts: TimeStamp) -> PointGetter { new_point_getter_with_iso(engine, ts, IsolationLevel::Si) } fn new_point_getter_with_iso( - engine: &E, + engine: &mut E, ts: TimeStamp, iso_level: IsolationLevel, ) -> PointGetter { @@ -429,7 +429,7 @@ mod tests { } fn must_met_newer_ts_data( - engine: &E, + engine: &mut E, getter_ts: impl Into, key: &[u8], value: &[u8], @@ -502,59 +502,59 @@ mod tests { /// PUT zz -> zvzv.... (commit at 103) fn new_sample_engine() -> RocksEngine { let suffix = "v".repeat(SHORT_VALUE_MAX_LEN + 1); - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); must_prewrite_put( - &engine, + &mut engine, b"foo1", &format!("foo1{}", suffix).into_bytes(), b"foo1", 2, ); - must_commit(&engine, b"foo1", 2, 3); + must_commit(&mut engine, b"foo1", 2, 3); must_prewrite_put( - &engine, + &mut engine, b"foo2", &format!("foo2{}", suffix).into_bytes(), b"foo2", 4, ); must_prewrite_put( - &engine, + &mut engine, b"bar", &format!("bar{}", suffix).into_bytes(), b"foo2", 4, ); - must_commit(&engine, b"foo2", 4, 5); - must_commit(&engine, b"bar", 4, 5); - must_prewrite_delete(&engine, b"xxx", b"xxx", 6); - must_commit(&engine, b"xxx", 6, 7); + must_commit(&mut engine, b"foo2", 4, 5); + must_commit(&mut engine, b"bar", 4, 5); + must_prewrite_delete(&mut engine, b"xxx", b"xxx", 6); + must_commit(&mut engine, b"xxx", 6, 7); must_prewrite_put( - &engine, + &mut engine, b"box", &format!("box{}", suffix).into_bytes(), b"box", 8, ); - must_prewrite_delete(&engine, b"foo1", b"box", 8); - must_commit(&engine, b"box", 8, 9); - must_commit(&engine, b"foo1", 8, 9); - must_prewrite_lock(&engine, b"bar", b"bar", 10); - must_commit(&engine, b"bar", 10, 11); + must_prewrite_delete(&mut engine, b"foo1", b"box", 8); + must_commit(&mut engine, b"box", 8, 9); + must_commit(&mut engine, b"foo1", 8, 9); + must_prewrite_lock(&mut engine, b"bar", b"bar", 10); + must_commit(&mut engine, b"bar", 10, 11); for i in 20..100 { if i % 2 == 0 { - must_prewrite_lock(&engine, b"foo2", b"foo2", i); - must_commit(&engine, b"foo2", i, i + 1); + must_prewrite_lock(&mut engine, b"foo2", b"foo2", i); + must_commit(&mut engine, b"foo2", i, i + 1); } } must_prewrite_put( - &engine, + &mut engine, b"zz", &format!("zz{}", suffix).into_bytes(), b"zz", 102, ); - must_commit(&engine, b"zz", 102, 103); + must_commit(&mut engine, b"zz", 102, 103); engine } @@ -566,35 +566,35 @@ mod tests { /// PUT foo2 -> foo2vv... (start at 4) fn new_sample_engine_2() -> RocksEngine { let suffix = "v".repeat(SHORT_VALUE_MAX_LEN + 1); - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); must_prewrite_put( - &engine, + &mut engine, b"foo1", &format!("foo1{}", suffix).into_bytes(), b"foo1", 2, ); - must_prewrite_put(&engine, b"bar", b"barval", b"foo1", 2); - must_commit(&engine, b"foo1", 2, 3); - must_commit(&engine, b"bar", 2, 3); + must_prewrite_put(&mut engine, b"bar", b"barval", b"foo1", 2); + must_commit(&mut engine, b"foo1", 2, 3); + must_commit(&mut engine, b"bar", 2, 3); must_prewrite_put( - &engine, + &mut engine, b"foo2", &format!("foo2{}", suffix).into_bytes(), b"foo2", 4, ); - must_prewrite_delete(&engine, b"bar", b"foo2", 4); + must_prewrite_delete(&mut engine, b"bar", b"foo2", 4); engine } /// No ts larger than get ts #[test] fn test_basic_1() { - let engine = new_sample_engine(); + let mut engine = new_sample_engine(); - let mut getter = new_point_getter(&engine, 200.into()); + let mut getter = new_point_getter(&mut engine, 200.into()); // Get a deleted key must_get_none(&mut getter, b"foo1"); @@ -661,42 +661,42 @@ mod tests { #[test] fn test_use_prefix_seek() { - let engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, b"foo1", b"bar1", b"foo1", 10); - must_commit(&engine, b"foo1", 10, 20); + let mut engine = TestEngineBuilder::new().build().unwrap(); + must_prewrite_put(&mut engine, b"foo1", b"bar1", b"foo1", 10); + must_commit(&mut engine, b"foo1", 10, 20); // Mustn't get the next user key even if point getter doesn't compare user key. - let mut getter = new_point_getter(&engine, 30.into()); + let mut getter = new_point_getter(&mut engine, 30.into()); must_get_none(&mut getter, b"foo0"); - let mut getter = new_point_getter(&engine, 30.into()); + let mut getter = new_point_getter(&mut engine, 30.into()); must_get_none(&mut getter, b"foo"); must_get_none(&mut getter, b"foo0"); } #[test] fn test_tombstone() { - let engine = TestEngineBuilder::new().build().unwrap(); - - must_prewrite_put(&engine, b"foo", b"bar", b"foo", 10); - must_prewrite_put(&engine, b"foo1", b"bar1", b"foo", 10); - must_prewrite_put(&engine, b"foo2", b"bar2", b"foo", 10); - must_prewrite_put(&engine, b"foo3", b"bar3", b"foo", 10); - must_commit(&engine, b"foo", 10, 20); - must_commit(&engine, b"foo1", 10, 20); - must_commit(&engine, b"foo2", 10, 20); - must_commit(&engine, b"foo3", 10, 20); - must_prewrite_delete(&engine, b"foo1", b"foo1", 30); - must_prewrite_delete(&engine, b"foo2", b"foo1", 30); - must_commit(&engine, b"foo1", 30, 40); - must_commit(&engine, b"foo2", 30, 40); - - must_gc(&engine, b"foo", 50); - must_gc(&engine, b"foo1", 50); - must_gc(&engine, b"foo2", 50); - must_gc(&engine, b"foo3", 50); - - let mut getter = new_point_getter(&engine, TimeStamp::max()); + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, b"foo", b"bar", b"foo", 10); + must_prewrite_put(&mut engine, b"foo1", b"bar1", b"foo", 10); + must_prewrite_put(&mut engine, b"foo2", b"bar2", b"foo", 10); + must_prewrite_put(&mut engine, b"foo3", b"bar3", b"foo", 10); + must_commit(&mut engine, b"foo", 10, 20); + must_commit(&mut engine, b"foo1", 10, 20); + must_commit(&mut engine, b"foo2", 10, 20); + must_commit(&mut engine, b"foo3", 10, 20); + must_prewrite_delete(&mut engine, b"foo1", b"foo1", 30); + must_prewrite_delete(&mut engine, b"foo2", b"foo1", 30); + must_commit(&mut engine, b"foo1", 30, 40); + must_commit(&mut engine, b"foo2", 30, 40); + + must_gc(&mut engine, b"foo", 50); + must_gc(&mut engine, b"foo1", 50); + must_gc(&mut engine, b"foo2", 50); + must_gc(&mut engine, b"foo3", 50); + + let mut getter = new_point_getter(&mut engine, TimeStamp::max()); let perf_statistics = ReadPerfInstant::new(); must_get_value(&mut getter, b"foo", b"bar"); assert_eq!(perf_statistics.delta().internal_delete_skipped_count, 0); @@ -716,9 +716,9 @@ mod tests { #[test] fn test_with_iter_lower_bound() { - let engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, b"foo", b"bar", b"foo", 10); - must_commit(&engine, b"foo", 10, 20); + let mut engine = TestEngineBuilder::new().build().unwrap(); + must_prewrite_put(&mut engine, b"foo", b"bar", b"foo", 10); + must_commit(&mut engine, b"foo", 10, 20); let snapshot = engine.snapshot(Default::default()).unwrap(); let write_cursor = CursorBuilder::new(&snapshot, CF_WRITE) @@ -747,9 +747,9 @@ mod tests { /// Some ts larger than get ts #[test] fn test_basic_2() { - let engine = new_sample_engine(); + let mut engine = new_sample_engine(); - let mut getter = new_point_getter(&engine, 5.into()); + let mut getter = new_point_getter(&mut engine, 5.into()); must_get_value(&mut getter, b"bar", b"barv"); let s = getter.take_statistics(); @@ -814,9 +814,9 @@ mod tests { /// All ts larger than get ts #[test] fn test_basic_3() { - let engine = new_sample_engine(); + let mut engine = new_sample_engine(); - let mut getter = new_point_getter(&engine, 2.into()); + let mut getter = new_point_getter(&mut engine, 2.into()); must_get_none(&mut getter, b"foo1"); let s = getter.take_statistics(); @@ -838,9 +838,9 @@ mod tests { /// There are some locks in the Lock CF. #[test] fn test_locked() { - let engine = new_sample_engine_2(); + let mut engine = new_sample_engine_2(); - let mut getter = new_point_getter(&engine, 1.into()); + let mut getter = new_point_getter(&mut engine, 1.into()); must_get_none(&mut getter, b"a"); must_get_none(&mut getter, b"bar"); must_get_none(&mut getter, b"foo1"); @@ -849,7 +849,7 @@ mod tests { assert_seek_next_prev(&s.write, 4, 0, 0); assert_eq!(s.processed_size, 0); - let mut getter = new_point_getter(&engine, 3.into()); + let mut getter = new_point_getter(&mut engine, 3.into()); must_get_none(&mut getter, b"a"); must_get_value(&mut getter, b"bar", b"barv"); must_get_value(&mut getter, b"bar", b"barv"); @@ -868,7 +868,7 @@ mod tests { * 2 ); - let mut getter = new_point_getter(&engine, 4.into()); + let mut getter = new_point_getter(&mut engine, 4.into()); must_get_none(&mut getter, b"a"); must_get_err(&mut getter, b"bar"); must_get_err(&mut getter, b"bar"); @@ -887,7 +887,7 @@ mod tests { #[test] fn test_omit_value() { - let engine = new_sample_engine_2(); + let mut engine = new_sample_engine_2(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -904,46 +904,46 @@ mod tests { #[test] fn test_get_latest_value() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, val) = (b"foo", b"bar"); - must_prewrite_put(&engine, key, val, key, 10); - must_commit(&engine, key, 10, 20); + must_prewrite_put(&mut engine, key, val, key, 10); + must_commit(&mut engine, key, 10, 20); - let mut getter = new_point_getter(&engine, TimeStamp::max()); + let mut getter = new_point_getter(&mut engine, TimeStamp::max()); must_get_value(&mut getter, key, val); // Ignore the primary lock if read with max ts. - must_prewrite_delete(&engine, key, key, 30); - let mut getter = new_point_getter(&engine, TimeStamp::max()); + must_prewrite_delete(&mut engine, key, key, 30); + let mut getter = new_point_getter(&mut engine, TimeStamp::max()); must_get_value(&mut getter, key, val); - must_rollback(&engine, key, 30, false); + must_rollback(&mut engine, key, 30, false); // Should not ignore the secondary lock even though reading the latest version - must_prewrite_delete(&engine, key, b"bar", 40); - let mut getter = new_point_getter(&engine, TimeStamp::max()); + must_prewrite_delete(&mut engine, key, b"bar", 40); + let mut getter = new_point_getter(&mut engine, TimeStamp::max()); must_get_err(&mut getter, key); - must_rollback(&engine, key, 40, false); + must_rollback(&mut engine, key, 40, false); // Should get the latest committed value if there is a primary lock with a ts // less than the latest Write's commit_ts. // // write.start_ts(10) < primary_lock.start_ts(15) < write.commit_ts(20) - must_acquire_pessimistic_lock(&engine, key, key, 15, 50); - must_pessimistic_prewrite_delete(&engine, key, key, 15, 50, DoPessimisticCheck); - let mut getter = new_point_getter(&engine, TimeStamp::max()); + must_acquire_pessimistic_lock(&mut engine, key, key, 15, 50); + must_pessimistic_prewrite_delete(&mut engine, key, key, 15, 50, DoPessimisticCheck); + let mut getter = new_point_getter(&mut engine, TimeStamp::max()); must_get_value(&mut getter, key, val); } #[test] fn test_get_bypass_locks() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, val) = (b"foo", b"bar"); - must_prewrite_put(&engine, key, val, key, 10); - must_commit(&engine, key, 10, 20); + must_prewrite_put(&mut engine, key, val, key, 10); + must_commit(&mut engine, key, 10, 20); - must_prewrite_delete(&engine, key, key, 30); + must_prewrite_delete(&mut engine, key, key, 30); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut getter = PointGetterBuilder::new(snapshot, 60.into()) @@ -964,9 +964,10 @@ mod tests { #[test] fn test_get_access_locks() { - let engine = TestEngineBuilder::new().build().unwrap(); - let build_getter = |ts: u64, bypass_locks, access_locks| { - let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut engine_clone = engine.clone(); + let mut build_getter = |ts: u64, bypass_locks, access_locks| { + let snapshot = engine_clone.snapshot(Default::default()).unwrap(); PointGetterBuilder::new(snapshot, ts.into()) .isolation_level(IsolationLevel::Si) .bypass_locks(TsSet::from_u64s(bypass_locks)) @@ -977,42 +978,42 @@ mod tests { // short value let (key, val) = (b"foo", b"bar"); - must_prewrite_put(&engine, key, val, key, 10); + must_prewrite_put(&mut engine, key, val, key, 10); must_get_value(&mut build_getter(20, vec![], vec![10]), key, val); - must_commit(&engine, key, 10, 15); + must_commit(&mut engine, key, 10, 15); must_get_value(&mut build_getter(20, vec![], vec![]), key, val); // load value from default cf. let val = "v".repeat(SHORT_VALUE_MAX_LEN + 1); let val = val.as_bytes(); - must_prewrite_put(&engine, key, val, key, 20); + must_prewrite_put(&mut engine, key, val, key, 20); must_get_value(&mut build_getter(30, vec![], vec![20]), key, val); - must_commit(&engine, key, 20, 25); + must_commit(&mut engine, key, 20, 25); must_get_value(&mut build_getter(30, vec![], vec![]), key, val); // delete - must_prewrite_delete(&engine, key, key, 30); + must_prewrite_delete(&mut engine, key, key, 30); must_get_none(&mut build_getter(40, vec![], vec![30]), key); - must_commit(&engine, key, 30, 35); + must_commit(&mut engine, key, 30, 35); must_get_none(&mut build_getter(40, vec![], vec![]), key); // ignore locks not blocking read let (key, val) = (b"foo", b"bar"); // lock's ts > read's ts - must_prewrite_put(&engine, key, val, key, 50); + must_prewrite_put(&mut engine, key, val, key, 50); must_get_none(&mut build_getter(45, vec![], vec![50]), key); - must_commit(&engine, key, 50, 55); + must_commit(&mut engine, key, 50, 55); // LockType::Lock - must_prewrite_lock(&engine, key, key, 60); + must_prewrite_lock(&mut engine, key, key, 60); must_get_value(&mut build_getter(65, vec![], vec![60]), key, val); - must_commit(&engine, key, 60, 65); + must_commit(&mut engine, key, 60, 65); // LockType::Pessimistic - must_acquire_pessimistic_lock(&engine, key, key, 70, 70); + must_acquire_pessimistic_lock(&mut engine, key, key, 70, 70); must_get_value(&mut build_getter(75, vec![], vec![70]), key, val); - must_rollback(&engine, key, 70, false); + must_rollback(&mut engine, key, 70, false); // lock's min_commit_ts > read's ts must_prewrite_put_impl( - &engine, + &mut engine, key, &val[..1], key, @@ -1029,117 +1030,117 @@ mod tests { AssertionLevel::Off, ); must_get_value(&mut build_getter(85, vec![], vec![80]), key, val); - must_rollback(&engine, key, 80, false); + must_rollback(&mut engine, key, 80, false); // read'ts == max && lock is a primary lock. - must_prewrite_put(&engine, key, &val[..1], key, 90); + must_prewrite_put(&mut engine, key, &val[..1], key, 90); must_get_value( &mut build_getter(TimeStamp::max().into_inner(), vec![], vec![90]), key, val, ); - must_rollback(&engine, key, 90, false); + must_rollback(&mut engine, key, 90, false); // lock in resolve_keys(it can't happen). - must_prewrite_put(&engine, key, &val[..1], key, 100); + must_prewrite_put(&mut engine, key, &val[..1], key, 100); must_get_value(&mut build_getter(105, vec![100], vec![100]), key, val); - must_rollback(&engine, key, 100, false); + must_rollback(&mut engine, key, 100, false); } #[test] fn test_met_newer_ts_data() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, val1) = (b"foo", b"bar1"); - must_prewrite_put(&engine, key, val1, key, 10); - must_commit(&engine, key, 10, 20); + must_prewrite_put(&mut engine, key, val1, key, 10); + must_commit(&mut engine, key, 10, 20); let (key, val2) = (b"foo", b"bar2"); - must_prewrite_put(&engine, key, val2, key, 30); - must_commit(&engine, key, 30, 40); + must_prewrite_put(&mut engine, key, val2, key, 30); + must_commit(&mut engine, key, 30, 40); - must_met_newer_ts_data(&engine, 20, key, val1, true); - must_met_newer_ts_data(&engine, 30, key, val1, true); - must_met_newer_ts_data(&engine, 40, key, val2, false); - must_met_newer_ts_data(&engine, 50, key, val2, false); + must_met_newer_ts_data(&mut engine, 20, key, val1, true); + must_met_newer_ts_data(&mut engine, 30, key, val1, true); + must_met_newer_ts_data(&mut engine, 40, key, val2, false); + must_met_newer_ts_data(&mut engine, 50, key, val2, false); - must_prewrite_lock(&engine, key, key, 60); + must_prewrite_lock(&mut engine, key, key, 60); - must_met_newer_ts_data(&engine, 50, key, val2, true); - must_met_newer_ts_data(&engine, 60, key, val2, true); + must_met_newer_ts_data(&mut engine, 50, key, val2, true); + must_met_newer_ts_data(&mut engine, 60, key, val2, true); } #[test] fn test_point_get_check_gc_fence() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // PUT, Read // `--------------^ - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 10); - must_commit(&engine, b"k1", 10, 20); - must_cleanup_with_gc_fence(&engine, b"k1", 20, 0, 50, true); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine, b"k1", 10, 20); + must_cleanup_with_gc_fence(&mut engine, b"k1", 20, 0, 50, true); // PUT, Read // `---------^ - must_prewrite_put(&engine, b"k2", b"v2", b"k2", 11); - must_commit(&engine, b"k2", 11, 20); - must_cleanup_with_gc_fence(&engine, b"k2", 20, 0, 40, true); + must_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 11); + must_commit(&mut engine, b"k2", 11, 20); + must_cleanup_with_gc_fence(&mut engine, b"k2", 20, 0, 40, true); // PUT, Read // `-----^ - must_prewrite_put(&engine, b"k3", b"v3", b"k3", 12); - must_commit(&engine, b"k3", 12, 20); - must_cleanup_with_gc_fence(&engine, b"k3", 20, 0, 30, true); + must_prewrite_put(&mut engine, b"k3", b"v3", b"k3", 12); + must_commit(&mut engine, b"k3", 12, 20); + must_cleanup_with_gc_fence(&mut engine, b"k3", 20, 0, 30, true); // PUT, PUT, Read // `-----^ `----^ - must_prewrite_put(&engine, b"k4", b"v4", b"k4", 13); - must_commit(&engine, b"k4", 13, 14); - must_prewrite_put(&engine, b"k4", b"v4x", b"k4", 15); - must_commit(&engine, b"k4", 15, 20); - must_cleanup_with_gc_fence(&engine, b"k4", 14, 0, 20, false); - must_cleanup_with_gc_fence(&engine, b"k4", 20, 0, 30, true); + must_prewrite_put(&mut engine, b"k4", b"v4", b"k4", 13); + must_commit(&mut engine, b"k4", 13, 14); + must_prewrite_put(&mut engine, b"k4", b"v4x", b"k4", 15); + must_commit(&mut engine, b"k4", 15, 20); + must_cleanup_with_gc_fence(&mut engine, b"k4", 14, 0, 20, false); + must_cleanup_with_gc_fence(&mut engine, b"k4", 20, 0, 30, true); // PUT, DEL, Read // `-----^ `----^ - must_prewrite_put(&engine, b"k5", b"v5", b"k5", 13); - must_commit(&engine, b"k5", 13, 14); - must_prewrite_delete(&engine, b"k5", b"v5", 15); - must_commit(&engine, b"k5", 15, 20); - must_cleanup_with_gc_fence(&engine, b"k5", 14, 0, 20, false); - must_cleanup_with_gc_fence(&engine, b"k5", 20, 0, 30, true); + must_prewrite_put(&mut engine, b"k5", b"v5", b"k5", 13); + must_commit(&mut engine, b"k5", 13, 14); + must_prewrite_delete(&mut engine, b"k5", b"v5", 15); + must_commit(&mut engine, b"k5", 15, 20); + must_cleanup_with_gc_fence(&mut engine, b"k5", 14, 0, 20, false); + must_cleanup_with_gc_fence(&mut engine, b"k5", 20, 0, 30, true); // PUT, LOCK, LOCK, Read // `------------------------^ - must_prewrite_put(&engine, b"k6", b"v6", b"k6", 16); - must_commit(&engine, b"k6", 16, 20); - must_prewrite_lock(&engine, b"k6", b"k6", 25); - must_commit(&engine, b"k6", 25, 26); - must_prewrite_lock(&engine, b"k6", b"k6", 28); - must_commit(&engine, b"k6", 28, 29); - must_cleanup_with_gc_fence(&engine, b"k6", 20, 0, 50, true); + must_prewrite_put(&mut engine, b"k6", b"v6", b"k6", 16); + must_commit(&mut engine, b"k6", 16, 20); + must_prewrite_lock(&mut engine, b"k6", b"k6", 25); + must_commit(&mut engine, b"k6", 25, 26); + must_prewrite_lock(&mut engine, b"k6", b"k6", 28); + must_commit(&mut engine, b"k6", 28, 29); + must_cleanup_with_gc_fence(&mut engine, b"k6", 20, 0, 50, true); // PUT, LOCK, LOCK, Read // `---------^ - must_prewrite_put(&engine, b"k7", b"v7", b"k7", 16); - must_commit(&engine, b"k7", 16, 20); - must_prewrite_lock(&engine, b"k7", b"k7", 25); - must_commit(&engine, b"k7", 25, 26); - must_cleanup_with_gc_fence(&engine, b"k7", 20, 0, 27, true); - must_prewrite_lock(&engine, b"k7", b"k7", 28); - must_commit(&engine, b"k7", 28, 29); + must_prewrite_put(&mut engine, b"k7", b"v7", b"k7", 16); + must_commit(&mut engine, b"k7", 16, 20); + must_prewrite_lock(&mut engine, b"k7", b"k7", 25); + must_commit(&mut engine, b"k7", 25, 26); + must_cleanup_with_gc_fence(&mut engine, b"k7", 20, 0, 27, true); + must_prewrite_lock(&mut engine, b"k7", b"k7", 28); + must_commit(&mut engine, b"k7", 28, 29); // PUT, Read // * (GC fence ts is 0) - must_prewrite_put(&engine, b"k8", b"v8", b"k8", 17); - must_commit(&engine, b"k8", 17, 30); - must_cleanup_with_gc_fence(&engine, b"k8", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k8", b"v8", b"k8", 17); + must_commit(&mut engine, b"k8", 17, 30); + must_cleanup_with_gc_fence(&mut engine, b"k8", 30, 0, 0, true); // PUT, LOCK, Read // `-----------^ - must_prewrite_put(&engine, b"k9", b"v9", b"k9", 18); - must_commit(&engine, b"k9", 18, 20); - must_prewrite_lock(&engine, b"k9", b"k9", 25); - must_commit(&engine, b"k9", 25, 26); - must_cleanup_with_gc_fence(&engine, b"k9", 20, 0, 27, true); + must_prewrite_put(&mut engine, b"k9", b"v9", b"k9", 18); + must_commit(&mut engine, b"k9", 18, 20); + must_prewrite_lock(&mut engine, b"k9", b"k9", 25); + must_commit(&mut engine, b"k9", 25, 26); + must_cleanup_with_gc_fence(&mut engine, b"k9", 20, 0, 27, true); let expected_results = vec![ (b"k1", Some(b"v1")), @@ -1154,12 +1155,12 @@ mod tests { ]; for (k, v) in expected_results.iter().copied() { - let mut single_getter = new_point_getter(&engine, 40.into()); + let mut single_getter = new_point_getter(&mut engine, 40.into()); let value = single_getter.get(&Key::from_raw(k)).unwrap(); assert_eq!(value, v.map(|v| v.to_vec())); } - let mut getter = new_point_getter(&engine, 40.into()); + let mut getter = new_point_getter(&mut engine, 40.into()); for (k, v) in expected_results { let value = getter.get(&Key::from_raw(k)).unwrap(); assert_eq!(value, v.map(|v| v.to_vec())); @@ -1168,68 +1169,68 @@ mod tests { #[test] fn test_point_get_check_rc_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key0, val0) = (b"k0", b"v0"); - must_prewrite_put(&engine, key0, val0, key0, 1); - must_commit(&engine, key0, 1, 5); + must_prewrite_put(&mut engine, key0, val0, key0, 1); + must_commit(&mut engine, key0, 1, 5); let (key1, val1) = (b"k1", b"v1"); - must_prewrite_put(&engine, key1, val1, key1, 10); - must_commit(&engine, key1, 10, 20); + must_prewrite_put(&mut engine, key1, val1, key1, 10); + must_commit(&mut engine, key1, 10, 20); let (key2, val2, val22) = (b"k2", b"v2", b"v22"); - must_prewrite_put(&engine, key2, val2, key2, 30); - must_commit(&engine, key2, 30, 40); - must_prewrite_put(&engine, key2, val22, key2, 41); - must_commit(&engine, key2, 41, 42); + must_prewrite_put(&mut engine, key2, val2, key2, 30); + must_commit(&mut engine, key2, 30, 40); + must_prewrite_put(&mut engine, key2, val22, key2, 41); + must_commit(&mut engine, key2, 41, 42); let (key3, val3) = (b"k3", b"v3"); - must_prewrite_put(&engine, key3, val3, key3, 50); + must_prewrite_put(&mut engine, key3, val3, key3, 50); let (key4, val4) = (b"k4", b"val4"); - must_prewrite_put(&engine, key4, val4, key4, 55); - must_commit(&engine, key4, 55, 56); - must_prewrite_lock(&engine, key4, key4, 60); + must_prewrite_put(&mut engine, key4, val4, key4, 55); + must_commit(&mut engine, key4, 55, 56); + must_prewrite_lock(&mut engine, key4, key4, 60); let (key5, val5) = (b"k5", b"val5"); - must_prewrite_put(&engine, key5, val5, key5, 57); - must_commit(&engine, key5, 57, 58); - must_acquire_pessimistic_lock(&engine, key5, key5, 65, 65); + must_prewrite_put(&mut engine, key5, val5, key5, 57); + must_commit(&mut engine, key5, 57, 58); + must_acquire_pessimistic_lock(&mut engine, key5, key5, 65, 65); // No more recent version. let mut getter_with_ts_ok = - new_point_getter_with_iso(&engine, 25.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 25.into(), IsolationLevel::RcCheckTs); must_get_value(&mut getter_with_ts_ok, key1, val1); // The read_ts is stale error should be reported. let mut getter_not_ok = - new_point_getter_with_iso(&engine, 35.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 35.into(), IsolationLevel::RcCheckTs); must_get_err(&mut getter_not_ok, key2); // Though lock.ts > read_ts error should still be reported. let mut getter_not_ok = - new_point_getter_with_iso(&engine, 45.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 45.into(), IsolationLevel::RcCheckTs); must_get_err(&mut getter_not_ok, key3); // Error should not be reported if the lock type is rollback or lock. let mut getter_ok = - new_point_getter_with_iso(&engine, 70.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 70.into(), IsolationLevel::RcCheckTs); must_get_value(&mut getter_ok, key4, val4); let mut getter_ok = - new_point_getter_with_iso(&engine, 70.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 70.into(), IsolationLevel::RcCheckTs); must_get_value(&mut getter_ok, key5, val5); // Test batch point get. Report error if more recent version is met. let mut batch_getter = - new_point_getter_with_iso(&engine, 35.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 35.into(), IsolationLevel::RcCheckTs); must_get_value(&mut batch_getter, key0, val0); must_get_value(&mut batch_getter, key1, val1); must_get_err(&mut batch_getter, key2); // Test batch point get. Report error if lock is met though lock.ts > read_ts. let mut batch_getter = - new_point_getter_with_iso(&engine, 45.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 45.into(), IsolationLevel::RcCheckTs); must_get_value(&mut batch_getter, key0, val0); must_get_value(&mut batch_getter, key1, val1); must_get_value(&mut batch_getter, key2, val22); @@ -1238,7 +1239,7 @@ mod tests { // Test batch point get. Error should not be reported if the lock type is // rollback or lock. let mut batch_getter_ok = - new_point_getter_with_iso(&engine, 70.into(), IsolationLevel::RcCheckTs); + new_point_getter_with_iso(&mut engine, 70.into(), IsolationLevel::RcCheckTs); must_get_value(&mut batch_getter_ok, key4, val4); must_get_value(&mut batch_getter_ok, key5, val5); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 1aff262186c..0f6eb5a390e 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -2428,7 +2428,7 @@ pub mod tests { }, ]; for (i, case) in cases.into_iter().enumerate() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); let mut txn = MvccTxn::new(TimeStamp::new(10), cm.clone()); for (write_record, put_ts) in case.written.iter() { @@ -2461,7 +2461,7 @@ pub mod tests { // Must return Oldvalue::None when prev_write_loaded is true and prev_write is // None. - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); let prev_write_loaded = true; diff --git a/src/storage/mvcc/reader/scanner/backward.rs b/src/storage/mvcc/reader/scanner/backward.rs index 11ed487cd56..ee1780b76b4 100644 --- a/src/storage/mvcc/reader/scanner/backward.rs +++ b/src/storage/mvcc/reader/scanner/backward.rs @@ -506,30 +506,30 @@ mod tests { #[test] fn test_basic() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate REVERSE_SEEK_BOUND / 2 Put for key [10]. let k = &[10_u8]; for ts in 0..REVERSE_SEEK_BOUND / 2 { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); - must_commit(&engine, k, ts, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); + must_commit(&mut engine, k, ts, ts); } // Generate REVERSE_SEEK_BOUND + 1 Put for key [9]. let k = &[9_u8]; for ts in 0..=REVERSE_SEEK_BOUND { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); - must_commit(&engine, k, ts, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); + must_commit(&mut engine, k, ts, ts); } // Generate REVERSE_SEEK_BOUND / 2 Put and REVERSE_SEEK_BOUND / 2 + 1 Rollback // for key [8]. let k = &[8_u8]; for ts in 0..=REVERSE_SEEK_BOUND { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); if ts < REVERSE_SEEK_BOUND / 2 { - must_commit(&engine, k, ts, ts); + must_commit(&mut engine, k, ts, ts); } else { let modifies = vec![ // ts is rather small, so it is ok to `as u8` @@ -548,16 +548,16 @@ mod tests { // Rollback for key [7]. let k = &[7_u8]; for ts in 0..REVERSE_SEEK_BOUND / 2 { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); - must_commit(&engine, k, ts, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); + must_commit(&mut engine, k, ts, ts); } { let ts = REVERSE_SEEK_BOUND / 2; - must_prewrite_delete(&engine, k, k, ts); - must_commit(&engine, k, ts, ts); + must_prewrite_delete(&mut engine, k, k, ts); + must_commit(&mut engine, k, ts, ts); } for ts in REVERSE_SEEK_BOUND / 2 + 1..=REVERSE_SEEK_BOUND { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); let modifies = vec![ // ts is rather small, so it is ok to `as u8` Modify::Put( @@ -573,14 +573,14 @@ mod tests { // Generate 1 PUT for key [6]. let k = &[6_u8]; for ts in 0..1 { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); - must_commit(&engine, k, ts, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); + must_commit(&mut engine, k, ts, ts); } // Generate REVERSE_SEEK_BOUND + 1 Rollback for key [5]. let k = &[5_u8]; for ts in 0..=REVERSE_SEEK_BOUND { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); let modifies = vec![ // ts is rather small, so it is ok to `as u8` Modify::Put( @@ -597,8 +597,8 @@ mod tests { // with ts = REVERSE_SEEK_BOUND + 1 for key [4]. let k = &[4_u8]; for ts in REVERSE_SEEK_BOUND..REVERSE_SEEK_BOUND + 2 { - must_prewrite_put(&engine, k, &[ts as u8], k, ts); - must_commit(&engine, k, ts, ts); + must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); + must_commit(&mut engine, k, ts, ts); } // Assume REVERSE_SEEK_BOUND == 4, we have keys: @@ -806,7 +806,7 @@ mod tests { /// Case 1. prev out of bound, next_version is None. #[test] fn test_reverse_get_out_of_bound_1() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate N/2 rollback for [b]. for ts in 0..REVERSE_SEEK_BOUND / 2 { @@ -823,9 +823,9 @@ mod tests { } // Generate 1 put for [c]. - must_prewrite_put(&engine, b"c", b"value", b"c", REVERSE_SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"c", b"value", b"c", REVERSE_SEEK_BOUND * 2); must_commit( - &engine, + &mut engine, b"c", REVERSE_SEEK_BOUND * 2, REVERSE_SEEK_BOUND * 2, @@ -890,11 +890,11 @@ mod tests { /// Case 2. prev out of bound, next_version is Some. #[test] fn test_reverse_get_out_of_bound_2() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put and N/2 rollback for [b]. - must_prewrite_put(&engine, b"b", b"value_b", b"b", 0); - must_commit(&engine, b"b", 0, 0); + must_prewrite_put(&mut engine, b"b", b"value_b", b"b", 0); + must_commit(&mut engine, b"b", 0, 0); for ts in 1..=REVERSE_SEEK_BOUND / 2 { let modifies = vec![ // ts is rather small, so it is ok to `as u8` @@ -909,9 +909,9 @@ mod tests { } // Generate 1 put for [c]. - must_prewrite_put(&engine, b"c", b"value_c", b"c", REVERSE_SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"c", b"value_c", b"c", REVERSE_SEEK_BOUND * 2); must_commit( - &engine, + &mut engine, b"c", REVERSE_SEEK_BOUND * 2, REVERSE_SEEK_BOUND * 2, @@ -983,16 +983,16 @@ mod tests { /// Case 1. prev() out of bound #[test] fn test_move_prev_user_key_out_of_bound_1() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate 1 put for [c]. - must_prewrite_put(&engine, b"c", b"value", b"c", 1); - must_commit(&engine, b"c", 1, 1); + must_prewrite_put(&mut engine, b"c", b"value", b"c", 1); + must_commit(&mut engine, b"c", 1, 1); // Generate N/2 put for [b] . for ts in 1..=SEEK_BOUND / 2 { - must_prewrite_put(&engine, b"b", &[ts as u8], b"b", ts); - must_commit(&engine, b"b", ts, ts); + must_prewrite_put(&mut engine, b"b", &[ts as u8], b"b", ts); + must_commit(&mut engine, b"b", ts, ts); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1065,16 +1065,16 @@ mod tests { /// Case 2. seek_for_prev() out of bound #[test] fn test_move_prev_user_key_out_of_bound_2() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate 1 put for [c]. - must_prewrite_put(&engine, b"c", b"value", b"c", 1); - must_commit(&engine, b"c", 1, 1); + must_prewrite_put(&mut engine, b"c", b"value", b"c", 1); + must_commit(&mut engine, b"c", 1, 1); // Generate N+1 put for [b] . for ts in 1..SEEK_BOUND + 2 { - must_prewrite_put(&engine, b"b", &[ts as u8], b"b", ts); - must_commit(&engine, b"b", ts, ts); + must_prewrite_put(&mut engine, b"b", &[ts as u8], b"b", ts); + must_commit(&mut engine, b"b", ts, ts); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1153,18 +1153,18 @@ mod tests { /// Case 3. a more complicated case #[test] fn test_move_prev_user_key_out_of_bound_3() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // N denotes for SEEK_BOUND, M denotes for REVERSE_SEEK_BOUND // Generate 1 put for [c]. - must_prewrite_put(&engine, b"c", b"value", b"c", 1); - must_commit(&engine, b"c", 1, 1); + must_prewrite_put(&mut engine, b"c", b"value", b"c", 1); + must_commit(&mut engine, b"c", 1, 1); // Generate N+M+1 put for [b] . for ts in 1..SEEK_BOUND + REVERSE_SEEK_BOUND + 2 { - must_prewrite_put(&engine, b"b", &[ts as u8], b"b", ts); - must_commit(&engine, b"b", ts, ts); + must_prewrite_put(&mut engine, b"b", &[ts as u8], b"b", ts); + must_commit(&mut engine, b"b", ts, ts); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1246,21 +1246,21 @@ mod tests { /// Range is left open right closed. #[test] fn test_range() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate 1 put for [1], [2] ... [6]. for i in 1..7 { // ts = 1: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 1); - must_commit(&engine, &[i], 1, 1); + must_prewrite_put(&mut engine, &[i], &[], &[i], 1); + must_commit(&mut engine, &[i], 1, 1); // ts = 7: value = [ts] - must_prewrite_put(&engine, &[i], &[i], &[i], 7); - must_commit(&engine, &[i], 7, 7); + must_prewrite_put(&mut engine, &[i], &[i], &[i], 7); + must_commit(&mut engine, &[i], 7, 7); // ts = 14: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 14); - must_commit(&engine, &[i], 14, 14); + must_prewrite_put(&mut engine, &[i], &[], &[i], 14); + must_commit(&mut engine, &[i], 14, 14); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1376,7 +1376,7 @@ mod tests { #[test] fn test_many_tombstones() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate RocksDB tombstones in write cf. let start_ts = 1; @@ -1384,11 +1384,11 @@ mod tests { for i in 0..16 { for y in 0..16 { let pk = &[i as u8, y as u8]; - must_prewrite_put(&engine, pk, b"", pk, start_ts); - must_rollback(&engine, pk, start_ts, false); + must_prewrite_put(&mut engine, pk, b"", pk, start_ts); + must_rollback(&mut engine, pk, start_ts, false); // Generate 254 RocksDB tombstones between [0,0] and [15,15]. if !((i == 0 && y == 0) || (i == 15 && y == 15)) { - must_gc(&engine, pk, safe_point); + must_gc(&mut engine, pk, safe_point); } } } @@ -1397,7 +1397,7 @@ mod tests { let start_ts = 3; for i in 0..16 { let pk = &[i as u8]; - must_prewrite_put(&engine, pk, b"", pk, start_ts); + must_prewrite_put(&mut engine, pk, b"", pk, start_ts); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1420,9 +1420,9 @@ mod tests { #[test] fn test_backward_scanner_check_gc_fence() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - let (read_ts, expected_result) = prepare_test_data_for_check_gc_fence(&engine); + let (read_ts, expected_result) = prepare_test_data_for_check_gc_fence(&mut engine); let expected_result: Vec<_> = expected_result .into_iter() .filter_map(|(key, value)| value.map(|v| (key, v))) @@ -1446,34 +1446,34 @@ mod tests { #[test] fn test_rc_read_check_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key0, val0) = (b"k0", b"v0"); - must_prewrite_put(&engine, key0, val0, key0, 60); + must_prewrite_put(&mut engine, key0, val0, key0, 60); let (key1, val1) = (b"k1", b"v1"); - must_prewrite_put(&engine, key1, val1, key1, 25); - must_commit(&engine, key1, 25, 30); + must_prewrite_put(&mut engine, key1, val1, key1, 25); + must_commit(&mut engine, key1, 25, 30); let (key2, val2, val22) = (b"k2", b"v2", b"v22"); - must_prewrite_put(&engine, key2, val2, key2, 6); - must_commit(&engine, key2, 6, 9); - must_prewrite_put(&engine, key2, val22, key2, 10); - must_commit(&engine, key2, 10, 20); + must_prewrite_put(&mut engine, key2, val2, key2, 6); + must_commit(&mut engine, key2, 6, 9); + must_prewrite_put(&mut engine, key2, val22, key2, 10); + must_commit(&mut engine, key2, 10, 20); let (key3, val3) = (b"k3", b"v3"); - must_prewrite_put(&engine, key3, val3, key3, 5); - must_commit(&engine, key3, 5, 6); + must_prewrite_put(&mut engine, key3, val3, key3, 5); + must_commit(&mut engine, key3, 5, 6); let (key4, val4) = (b"k4", b"val4"); - must_prewrite_put(&engine, key4, val4, key4, 3); - must_commit(&engine, key4, 3, 4); - must_prewrite_lock(&engine, key4, key4, 5); + must_prewrite_put(&mut engine, key4, val4, key4, 3); + must_commit(&mut engine, key4, 3, 4); + must_prewrite_lock(&mut engine, key4, key4, 5); let (key5, val5) = (b"k5", b"val5"); - must_prewrite_put(&engine, key5, val5, key5, 1); - must_commit(&engine, key5, 1, 2); - must_acquire_pessimistic_lock(&engine, key5, key5, 3, 3); + must_prewrite_put(&mut engine, key5, val5, key5, 1); + must_commit(&mut engine, key5, 1, 2); + must_acquire_pessimistic_lock(&mut engine, key5, key5, 3, 3); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, 29.into()) diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index aee185e307f..c59c20fbe05 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -1005,7 +1005,7 @@ pub mod test_util { #[allow(clippy::type_complexity)] pub fn prepare_test_data_for_check_gc_fence( - engine: &impl Engine, + engine: &mut impl Engine, ) -> (TimeStamp, Vec<(Vec, Option>)>) { // Generates test data that is consistent after timestamp 40. @@ -1120,12 +1120,12 @@ mod latest_kv_tests { /// goes out of bound. #[test] fn test_get_out_of_bound() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"value", b"a", 7); - must_commit(&engine, b"a", 7, 7); + must_prewrite_put(&mut engine, b"a", b"value", b"a", 7); + must_commit(&mut engine, b"a", 7, 7); // Generate 5 rollback for [b]. for ts in 0..5 { @@ -1189,11 +1189,11 @@ mod latest_kv_tests { /// Case 1. next() out of bound #[test] fn test_move_next_user_key_out_of_bound_1() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); - must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); + must_commit(&mut engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND / 2 rollback and 1 put for [b] . for ts in 0..SEEK_BOUND / 2 { @@ -1208,8 +1208,8 @@ mod latest_kv_tests { ]; write(&engine, &ctx, modifies); } - must_prewrite_put(&engine, b"b", b"b_value", b"a", SEEK_BOUND / 2); - must_commit(&engine, b"b", SEEK_BOUND / 2, SEEK_BOUND / 2); + must_prewrite_put(&mut engine, b"b", b"b_value", b"a", SEEK_BOUND / 2); + must_commit(&mut engine, b"b", SEEK_BOUND / 2, SEEK_BOUND / 2); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, (SEEK_BOUND * 2).into()) @@ -1271,12 +1271,12 @@ mod latest_kv_tests { /// Case 2. seek() out of bound #[test] fn test_move_next_user_key_out_of_bound_2() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); - must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); + must_commit(&mut engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND-1 rollback and 1 put for [b] . for ts in 1..SEEK_BOUND { @@ -1291,8 +1291,8 @@ mod latest_kv_tests { ]; write(&engine, &ctx, modifies); } - must_prewrite_put(&engine, b"b", b"b_value", b"a", SEEK_BOUND); - must_commit(&engine, b"b", SEEK_BOUND, SEEK_BOUND); + must_prewrite_put(&mut engine, b"b", b"b_value", b"a", SEEK_BOUND); + must_commit(&mut engine, b"b", SEEK_BOUND, SEEK_BOUND); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, (SEEK_BOUND * 2).into()) @@ -1353,21 +1353,21 @@ mod latest_kv_tests { /// Range is left open right closed. #[test] fn test_range() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate 1 put for [1], [2] ... [6]. for i in 1..7 { // ts = 1: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 1); - must_commit(&engine, &[i], 1, 1); + must_prewrite_put(&mut engine, &[i], &[], &[i], 1); + must_commit(&mut engine, &[i], 1, 1); // ts = 7: value = [ts] - must_prewrite_put(&engine, &[i], &[i], &[i], 7); - must_commit(&engine, &[i], 7, 7); + must_prewrite_put(&mut engine, &[i], &[i], &[i], 7); + must_commit(&mut engine, &[i], 7, 7); // ts = 14: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 14); - must_commit(&engine, &[i], 14, 14); + must_prewrite_put(&mut engine, &[i], &[], &[i], 14); + must_commit(&mut engine, &[i], 14, 14); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1478,9 +1478,9 @@ mod latest_kv_tests { #[test] fn test_latest_kv_check_gc_fence() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - let (read_ts, expected_result) = prepare_test_data_for_check_gc_fence(&engine); + let (read_ts, expected_result) = prepare_test_data_for_check_gc_fence(&mut engine); let expected_result: Vec<_> = expected_result .into_iter() .filter_map(|(key, value)| value.map(|v| (key, v))) @@ -1502,38 +1502,38 @@ mod latest_kv_tests { #[test] fn test_rc_read_check_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key0, val0) = (b"k0", b"v0"); - must_prewrite_put(&engine, key0, val0, key0, 1); - must_commit(&engine, key0, 1, 5); + must_prewrite_put(&mut engine, key0, val0, key0, 1); + must_commit(&mut engine, key0, 1, 5); let (key1, val1) = (b"k1", b"v1"); - must_prewrite_put(&engine, key1, val1, key1, 10); - must_commit(&engine, key1, 10, 20); + must_prewrite_put(&mut engine, key1, val1, key1, 10); + must_commit(&mut engine, key1, 10, 20); let (key2, val2, val22) = (b"k2", b"v2", b"v22"); - must_prewrite_put(&engine, key2, val2, key2, 30); - must_commit(&engine, key2, 30, 40); - must_prewrite_put(&engine, key2, val22, key2, 41); - must_commit(&engine, key2, 41, 42); + must_prewrite_put(&mut engine, key2, val2, key2, 30); + must_commit(&mut engine, key2, 30, 40); + must_prewrite_put(&mut engine, key2, val22, key2, 41); + must_commit(&mut engine, key2, 41, 42); let (key3, val3) = (b"k3", b"v3"); - must_prewrite_put(&engine, key3, val3, key3, 50); - must_commit(&engine, key3, 50, 51); + must_prewrite_put(&mut engine, key3, val3, key3, 50); + must_commit(&mut engine, key3, 50, 51); let (key4, val4) = (b"k4", b"val4"); - must_prewrite_put(&engine, key4, val4, key4, 55); - must_commit(&engine, key4, 55, 56); - must_prewrite_lock(&engine, key4, key4, 60); + must_prewrite_put(&mut engine, key4, val4, key4, 55); + must_commit(&mut engine, key4, 55, 56); + must_prewrite_lock(&mut engine, key4, key4, 60); let (key5, val5) = (b"k5", b"val5"); - must_prewrite_put(&engine, key5, val5, key5, 57); - must_commit(&engine, key5, 57, 58); - must_acquire_pessimistic_lock(&engine, key5, key5, 65, 65); + must_prewrite_put(&mut engine, key5, val5, key5, 57); + must_commit(&mut engine, key5, 57, 58); + must_acquire_pessimistic_lock(&mut engine, key5, key5, 65, 65); let (key6, val6) = (b"k6", b"v6"); - must_prewrite_put(&engine, key6, val6, key6, 75); + must_prewrite_put(&mut engine, key6, val6, key6, 75); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, 35.into()) @@ -1607,12 +1607,12 @@ mod latest_entry_tests { /// out of bound. #[test] fn test_get_out_of_bound() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"value", b"a", 7); - must_commit(&engine, b"a", 7, 7); + must_prewrite_put(&mut engine, b"a", b"value", b"a", 7); + must_commit(&mut engine, b"a", 7, 7); // Generate 5 rollback for [b]. for ts in 0..5 { @@ -1678,12 +1678,12 @@ mod latest_entry_tests { /// Case 1. next() out of bound #[test] fn test_move_next_user_key_out_of_bound_1() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); - must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); + must_commit(&mut engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND / 2 rollback and 1 put for [b] . for ts in 0..SEEK_BOUND / 2 { @@ -1698,8 +1698,8 @@ mod latest_entry_tests { ]; write(&engine, &ctx, modifies); } - must_prewrite_put(&engine, b"b", b"b_value", b"a", SEEK_BOUND / 2); - must_commit(&engine, b"b", SEEK_BOUND / 2, SEEK_BOUND / 2); + must_prewrite_put(&mut engine, b"b", b"b_value", b"a", SEEK_BOUND / 2); + must_commit(&mut engine, b"b", SEEK_BOUND / 2, SEEK_BOUND / 2); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, (SEEK_BOUND * 2).into()) @@ -1762,12 +1762,12 @@ mod latest_entry_tests { /// Case 2. seek() out of bound #[test] fn test_move_next_user_key_out_of_bound_2() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); - must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); + must_commit(&mut engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND-1 rollback and 1 put for [b] . for ts in 1..SEEK_BOUND { @@ -1782,8 +1782,8 @@ mod latest_entry_tests { ]; write(&engine, &ctx, modifies); } - must_prewrite_put(&engine, b"b", b"b_value", b"a", SEEK_BOUND); - must_commit(&engine, b"b", SEEK_BOUND, SEEK_BOUND); + must_prewrite_put(&mut engine, b"b", b"b_value", b"a", SEEK_BOUND); + must_commit(&mut engine, b"b", SEEK_BOUND, SEEK_BOUND); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, (SEEK_BOUND * 2).into()) @@ -1846,21 +1846,21 @@ mod latest_entry_tests { /// Range is left open right closed. #[test] fn test_range() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate 1 put for [1], [2] ... [6]. for i in 1..7 { // ts = 1: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 1); - must_commit(&engine, &[i], 1, 1); + must_prewrite_put(&mut engine, &[i], &[], &[i], 1); + must_commit(&mut engine, &[i], 1, 1); // ts = 7: value = [ts] - must_prewrite_put(&engine, &[i], &[i], &[i], 7); - must_commit(&engine, &[i], 7, 7); + must_prewrite_put(&mut engine, &[i], &[i], &[i], 7); + must_commit(&mut engine, &[i], 7, 7); // ts = 14: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 14); - must_commit(&engine, &[i], 14, 14); + must_prewrite_put(&mut engine, &[i], &[], &[i], 14); + must_commit(&mut engine, &[i], 14, 14); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1918,20 +1918,20 @@ mod latest_entry_tests { #[test] fn test_output_delete_and_after_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate put for [a] at 3. - must_prewrite_put(&engine, b"a", b"a_3", b"a", 3); - must_commit(&engine, b"a", 3, 3); + must_prewrite_put(&mut engine, b"a", b"a_3", b"a", 3); + must_commit(&mut engine, b"a", 3, 3); // Generate put for [a] at 7. - must_prewrite_put(&engine, b"a", b"a_7", b"a", 7); - must_commit(&engine, b"a", 7, 7); + must_prewrite_put(&mut engine, b"a", b"a_7", b"a", 7); + must_commit(&mut engine, b"a", 7, 7); // Generate put for [b] at 1. - must_prewrite_put(&engine, b"b", b"b_1", b"b", 1); - must_commit(&engine, b"b", 1, 1); + must_prewrite_put(&mut engine, b"b", b"b_1", b"b", 1); + must_commit(&mut engine, b"b", 1, 1); // Generate rollbacks for [b] at 2, 3, 4. for ts in 2..5 { @@ -1948,8 +1948,8 @@ mod latest_entry_tests { } // Generate delete for [b] at 10. - must_prewrite_delete(&engine, b"b", b"b", 10); - must_commit(&engine, b"b", 10, 10); + must_prewrite_delete(&mut engine, b"b", b"b", 10); + must_commit(&mut engine, b"b", 10, 10); let entry_a_3 = EntryBuilder::default() .key(b"a") @@ -1975,7 +1975,7 @@ mod latest_entry_tests { .commit_ts(10.into()) .build_commit(WriteType::Delete, true); - let check = |ts: u64, after_ts: u64, output_delete, expected: Vec<&TxnEntry>| { + let mut check = |ts: u64, after_ts: u64, output_delete, expected: Vec<&TxnEntry>| { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, ts.into()) .range(None, None) @@ -2003,9 +2003,9 @@ mod latest_entry_tests { #[test] fn test_latest_entry_check_gc_fence() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - let (read_ts, expected_result) = prepare_test_data_for_check_gc_fence(&engine); + let (read_ts, expected_result) = prepare_test_data_for_check_gc_fence(&mut engine); let expected_result: Vec<_> = expected_result .into_iter() .filter_map(|(key, value)| value.map(|v| (key, v))) @@ -2039,12 +2039,12 @@ mod delta_entry_tests { /// bound. #[test] fn test_get_out_of_bound() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"value", b"a", 7); - must_commit(&engine, b"a", 7, 7); + must_prewrite_put(&mut engine, b"a", b"value", b"a", 7); + must_commit(&mut engine, b"a", 7, 7); // Generate 5 rollback for [b]. for ts in 0..5 { @@ -2110,11 +2110,11 @@ mod delta_entry_tests { /// Case 1. next() out of bound #[test] fn test_move_next_user_key_out_of_bound_1() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); - must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); + must_commit(&mut engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND / 2 rollback and 1 put for [b] . for ts in 0..SEEK_BOUND / 2 { @@ -2129,8 +2129,8 @@ mod delta_entry_tests { ]; write(&engine, &ctx, modifies); } - must_prewrite_put(&engine, b"b", b"b_value", b"a", SEEK_BOUND / 2); - must_commit(&engine, b"b", SEEK_BOUND / 2, SEEK_BOUND / 2); + must_prewrite_put(&mut engine, b"b", b"b_value", b"a", SEEK_BOUND / 2); + must_commit(&mut engine, b"b", SEEK_BOUND / 2, SEEK_BOUND / 2); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, (SEEK_BOUND * 2).into()) @@ -2193,12 +2193,12 @@ mod delta_entry_tests { /// Case 2. seek() out of bound #[test] fn test_move_next_user_key_out_of_bound_2() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate 1 put for [a]. - must_prewrite_put(&engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); - must_commit(&engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); + must_prewrite_put(&mut engine, b"a", b"a_value", b"a", SEEK_BOUND * 2); + must_commit(&mut engine, b"a", SEEK_BOUND * 2, SEEK_BOUND * 2); // Generate SEEK_BOUND rollback and 1 put for [b] . // It differs from EntryScanner that this will try to fetch multiple versions of @@ -2215,8 +2215,8 @@ mod delta_entry_tests { ]; write(&engine, &ctx, modifies); } - must_prewrite_put(&engine, b"b", b"b_value", b"a", SEEK_BOUND + 1); - must_commit(&engine, b"b", SEEK_BOUND + 1, SEEK_BOUND + 1); + must_prewrite_put(&mut engine, b"b", b"b_value", b"a", SEEK_BOUND + 1); + must_commit(&mut engine, b"b", SEEK_BOUND + 1, SEEK_BOUND + 1); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, (SEEK_BOUND * 2).into()) @@ -2279,21 +2279,21 @@ mod delta_entry_tests { /// Range is left open right closed. #[test] fn test_range() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Generate 1 put for [1], [2] ... [6]. for i in 1..7 { // ts = 1: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 1); - must_commit(&engine, &[i], 1, 1); + must_prewrite_put(&mut engine, &[i], &[], &[i], 1); + must_commit(&mut engine, &[i], 1, 1); // ts = 7: value = [ts] - must_prewrite_put(&engine, &[i], &[i], &[i], 7); - must_commit(&engine, &[i], 7, 7); + must_prewrite_put(&mut engine, &[i], &[i], &[i], 7); + must_commit(&mut engine, &[i], 7, 7); // ts = 14: value = [] - must_prewrite_put(&engine, &[i], &[], &[i], 14); - must_commit(&engine, &[i], 14, 14); + must_prewrite_put(&mut engine, &[i], &[], &[i], 14); + must_commit(&mut engine, &[i], 14, 14); } let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -2472,16 +2472,16 @@ mod delta_entry_tests { .collect::>() }; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); for (key, lock, writes) in &test_data { for (start_ts, commit_ts, write_type, value) in writes { let value = make_value(value); if *write_type != WriteType::Rollback { - must_acquire_pessimistic_lock(&engine, key, key, start_ts, commit_ts - 1); + must_acquire_pessimistic_lock(&mut engine, key, key, start_ts, commit_ts - 1); } match write_type { WriteType::Put => must_pessimistic_prewrite_put( - &engine, + &mut engine, key, &value, key, @@ -2490,7 +2490,7 @@ mod delta_entry_tests { DoPessimisticCheck, ), WriteType::Delete => must_pessimistic_prewrite_delete( - &engine, + &mut engine, key, key, start_ts, @@ -2498,17 +2498,17 @@ mod delta_entry_tests { DoPessimisticCheck, ), WriteType::Lock => must_pessimistic_prewrite_lock( - &engine, + &mut engine, key, key, start_ts, commit_ts - 1, DoPessimisticCheck, ), - WriteType::Rollback => must_rollback(&engine, key, start_ts, false), + WriteType::Rollback => must_rollback(&mut engine, key, start_ts, false), } if *write_type != WriteType::Rollback { - must_commit(&engine, key, start_ts, commit_ts); + must_commit(&mut engine, key, start_ts, commit_ts); } } @@ -2520,10 +2520,10 @@ mod delta_entry_tests { .map(|(_, commit_ts, ..)| commit_ts) .unwrap_or(0); let for_update_ts = std::cmp::max(*ts, max_commit_ts + 1); - must_acquire_pessimistic_lock(&engine, key, key, *ts, for_update_ts); + must_acquire_pessimistic_lock(&mut engine, key, key, *ts, for_update_ts); match lock_type { LockType::Put => must_pessimistic_prewrite_put( - &engine, + &mut engine, key, &value, key, @@ -2532,7 +2532,7 @@ mod delta_entry_tests { DoPessimisticCheck, ), LockType::Delete => must_pessimistic_prewrite_delete( - &engine, + &mut engine, key, key, ts, @@ -2540,7 +2540,7 @@ mod delta_entry_tests { DoPessimisticCheck, ), LockType::Lock => must_pessimistic_prewrite_lock( - &engine, + &mut engine, key, key, ts, @@ -2552,7 +2552,7 @@ mod delta_entry_tests { } } - let check = |from_key, to_key, from_ts, to_ts| { + let mut check = |from_key, to_key, from_ts, to_ts| { let expected = expected_entries(from_key, to_key, from_ts, to_ts); let from_key = if from_key.is_empty() { @@ -2604,23 +2604,23 @@ mod delta_entry_tests { #[test] fn test_output_old_value() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); // Generate put for [a] at 1. - must_prewrite_put(&engine, b"a", b"a_1", b"a", 1); - must_commit(&engine, b"a", 1, 1); + must_prewrite_put(&mut engine, b"a", b"a_1", b"a", 1); + must_commit(&mut engine, b"a", 1, 1); // Generate put for [a] at 3. - must_prewrite_put(&engine, b"a", b"a_3", b"a", 3); - must_commit(&engine, b"a", 3, 3); + must_prewrite_put(&mut engine, b"a", b"a_3", b"a", 3); + must_commit(&mut engine, b"a", 3, 3); // Generate delete for [a] at 5. - must_prewrite_delete(&engine, b"a", b"a", 5); + must_prewrite_delete(&mut engine, b"a", b"a", 5); // Generate put for [b] at 2. - must_prewrite_put(&engine, b"b", b"b_2", b"b", 2); - must_commit(&engine, b"b", 2, 2); + must_prewrite_put(&mut engine, b"b", b"b_2", b"b", 2); + must_commit(&mut engine, b"b", 2, 2); // Generate rollbacks for [b] at 6, 7, 8. for ts in 6..9 { @@ -2637,18 +2637,18 @@ mod delta_entry_tests { } // Generate delete for [b] at 10. - must_prewrite_delete(&engine, b"b", b"b", 10); - must_commit(&engine, b"b", 10, 10); + must_prewrite_delete(&mut engine, b"b", b"b", 10); + must_commit(&mut engine, b"b", 10, 10); // Generate put for [b] at 15. - must_acquire_pessimistic_lock(&engine, b"b", b"b", 9, 15); - must_pessimistic_prewrite_put(&engine, b"b", b"b_15", b"b", 9, 15, DoPessimisticCheck); + must_acquire_pessimistic_lock(&mut engine, b"b", b"b", 9, 15); + must_pessimistic_prewrite_put(&mut engine, b"b", b"b_15", b"b", 9, 15, DoPessimisticCheck); - must_prewrite_put(&engine, b"c", b"c_4", b"c", 4); - must_commit(&engine, b"c", 4, 6); - must_acquire_pessimistic_lock(&engine, b"c", b"c", 5, 15); - must_pessimistic_prewrite_put(&engine, b"c", b"c_5", b"c", 5, 15, DoPessimisticCheck); - must_cleanup(&engine, b"c", 20, 0); + must_prewrite_put(&mut engine, b"c", b"c_4", b"c", 4); + must_commit(&mut engine, b"c", 4, 6); + must_acquire_pessimistic_lock(&mut engine, b"c", b"c", 5, 15); + must_pessimistic_prewrite_put(&mut engine, b"c", b"c_5", b"c", 5, 15, DoPessimisticCheck); + must_cleanup(&mut engine, b"c", 20, 0); let entry_a_1 = EntryBuilder::default() .key(b"a") @@ -2703,7 +2703,7 @@ mod delta_entry_tests { .old_value(b"c_4") .build_prewrite(LockType::Put, true); - let check = |after_ts: u64, expected: Vec<&TxnEntry>| { + let mut check = |after_ts: u64, expected: Vec<&TxnEntry>| { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, TimeStamp::max()) .range(None, None) @@ -2739,8 +2739,8 @@ mod delta_entry_tests { #[test] fn test_old_value_check_gc_fence() { - let engine = TestEngineBuilder::new().build().unwrap(); - prepare_test_data_for_check_gc_fence(&engine); + let mut engine = TestEngineBuilder::new().build().unwrap(); + prepare_test_data_for_check_gc_fence(&mut engine); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, TimeStamp::max()) @@ -2772,7 +2772,7 @@ mod delta_entry_tests { for i in b'1'..=b'8' { let key = &[b'k', i]; let value = &[b'v', i, b'x', b'x']; - must_prewrite_put(&engine, key, value, b"k1", 55); + must_prewrite_put(&mut engine, key, value, b"k1", 55); } let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, TimeStamp::max()) @@ -2808,7 +2808,7 @@ mod delta_entry_tests { // Commit all the locks and check again. for i in b'1'..=b'8' { let key = &[b'k', i]; - must_commit(&engine, key, 55, 56); + must_commit(&mut engine, key, 55, 56); } let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, TimeStamp::max()) diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index 1f45390a21e..7b799a3f456 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -627,18 +627,18 @@ mod tests { const POST_TS: TimeStamp = TimeStamp::new(5); let new_engine = || TestEngineBuilder::new().build().unwrap(); - let add_write_at_ts = |commit_ts, engine, key, value| { + let add_write_at_ts = |commit_ts, engine: &mut _, key, value| { must_prewrite_put(engine, key, value, key, commit_ts); must_commit(engine, key, commit_ts, commit_ts); }; - let add_lock_at_ts = |lock_ts, engine, key| { + let add_lock_at_ts = |lock_ts, engine: &mut _, key| { must_prewrite_put(engine, key, b"lock", key, lock_ts); must_locked(engine, key, lock_ts); }; let test_scanner_result = - move |engine: &RocksEngine, expected_result: Vec<(Vec, Option>)>| { + move |engine: &mut RocksEngine, expected_result: Vec<(Vec, Option>)>| { let snapshot = engine.snapshot(Default::default()).unwrap(); let scanner = ScannerBuilder::new(snapshot, SCAN_TS) @@ -657,68 +657,68 @@ mod tests { }; // Lock after write - let engine = new_engine(); + let mut engine = new_engine(); - add_write_at_ts(POST_TS, &engine, b"a", b"a_value"); - add_lock_at_ts(PREV_TS, &engine, b"b"); + add_write_at_ts(POST_TS, &mut engine, b"a", b"a_value"); + add_lock_at_ts(PREV_TS, &mut engine, b"b"); let expected_result = desc_map(vec![ (b"a".to_vec(), Some(b"a_value".to_vec())), (b"b".to_vec(), None), ]); - test_scanner_result(&engine, expected_result); + test_scanner_result(&mut engine, expected_result); // Lock before write for same key - let engine = new_engine(); - add_write_at_ts(PREV_TS, &engine, b"a", b"a_value"); - add_lock_at_ts(POST_TS, &engine, b"a"); + let mut engine = new_engine(); + add_write_at_ts(PREV_TS, &mut engine, b"a", b"a_value"); + add_lock_at_ts(POST_TS, &mut engine, b"a"); let expected_result = vec![(b"a".to_vec(), None)]; - test_scanner_result(&engine, expected_result); + test_scanner_result(&mut engine, expected_result); // Lock before write in different keys - let engine = new_engine(); - add_lock_at_ts(POST_TS, &engine, b"a"); - add_write_at_ts(PREV_TS, &engine, b"b", b"b_value"); + let mut engine = new_engine(); + add_lock_at_ts(POST_TS, &mut engine, b"a"); + add_write_at_ts(PREV_TS, &mut engine, b"b", b"b_value"); let expected_result = desc_map(vec![ (b"a".to_vec(), None), (b"b".to_vec(), Some(b"b_value".to_vec())), ]); - test_scanner_result(&engine, expected_result); + test_scanner_result(&mut engine, expected_result); // Only a lock here - let engine = new_engine(); - add_lock_at_ts(PREV_TS, &engine, b"a"); + let mut engine = new_engine(); + add_lock_at_ts(PREV_TS, &mut engine, b"a"); let expected_result = desc_map(vec![(b"a".to_vec(), None)]); - test_scanner_result(&engine, expected_result); + test_scanner_result(&mut engine, expected_result); // Write Only - let engine = new_engine(); - add_write_at_ts(PREV_TS, &engine, b"a", b"a_value"); + let mut engine = new_engine(); + add_write_at_ts(PREV_TS, &mut engine, b"a", b"a_value"); let expected_result = desc_map(vec![(b"a".to_vec(), Some(b"a_value".to_vec()))]); - test_scanner_result(&engine, expected_result); + test_scanner_result(&mut engine, expected_result); } fn test_scan_with_lock_impl(desc: bool) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); for i in 0..5 { - must_prewrite_put(&engine, &[i], &[b'v', i], &[i], 1); - must_commit(&engine, &[i], 1, 2); - must_prewrite_put(&engine, &[i], &[b'v', i], &[i], 10); - must_commit(&engine, &[i], 10, 100); + must_prewrite_put(&mut engine, &[i], &[b'v', i], &[i], 1); + must_commit(&mut engine, &[i], 1, 2); + must_prewrite_put(&mut engine, &[i], &[b'v', i], &[i], 10); + must_commit(&mut engine, &[i], 10, 100); } - must_acquire_pessimistic_lock(&engine, &[1], &[1], 20, 110); - must_acquire_pessimistic_lock(&engine, &[2], &[2], 50, 110); - must_acquire_pessimistic_lock(&engine, &[3], &[3], 105, 110); - must_prewrite_put(&engine, &[4], b"a", &[4], 105); + must_acquire_pessimistic_lock(&mut engine, &[1], &[1], 20, 110); + must_acquire_pessimistic_lock(&mut engine, &[2], &[2], 50, 110); + must_acquire_pessimistic_lock(&mut engine, &[3], &[3], 105, 110); + must_prewrite_put(&mut engine, &[4], b"a", &[4], 105); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -778,16 +778,16 @@ mod tests { } fn test_scan_bypass_locks_impl(desc: bool) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); for i in 0..5 { - must_prewrite_put(&engine, &[i], &[b'v', i], &[i], 10); - must_commit(&engine, &[i], 10, 20); + must_prewrite_put(&mut engine, &[i], &[b'v', i], &[i], 10); + must_commit(&mut engine, &[i], 10, 20); } // Locks are: 30, 40, 50, 60, 70 for i in 0..5 { - must_prewrite_put(&engine, &[i], &[b'v', i], &[i], 30 + u64::from(i) * 10); + must_prewrite_put(&mut engine, &[i], &[b'v', i], &[i], 30 + u64::from(i) * 10); } let bypass_locks = TsSet::from_u64s(vec![30, 41, 50]); @@ -821,28 +821,28 @@ mod tests { } fn test_scan_access_locks_impl(desc: bool, delete_bound: bool) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); for i in 0..=8 { - must_prewrite_put(&engine, &[i], &[b'v', i], &[i], 10); - must_commit(&engine, &[i], 10, 20); + must_prewrite_put(&mut engine, &[i], &[b'v', i], &[i], 10); + must_commit(&mut engine, &[i], 10, 20); } if delete_bound { - must_prewrite_delete(&engine, &[0], &[0], 30); // access delete + must_prewrite_delete(&mut engine, &[0], &[0], 30); // access delete } else { - must_prewrite_put(&engine, &[0], &[b'v', 0, 0], &[0], 30); // access put + must_prewrite_put(&mut engine, &[0], &[b'v', 0, 0], &[0], 30); // access put } - must_prewrite_put(&engine, &[1], &[b'v', 1, 1], &[1], 40); // access put - must_prewrite_delete(&engine, &[2], &[2], 50); // access delete - must_prewrite_lock(&engine, &[3], &[3], 60); // access lock(actually ignored) - must_prewrite_put(&engine, &[4], &[b'v', 4, 4], &[4], 70); // locked - must_prewrite_put(&engine, &[5], &[b'v', 5, 5], &[5], 80); // bypass - must_prewrite_put(&engine, &[6], &[b'v', 6, 6], &[6], 100); // locked with larger ts + must_prewrite_put(&mut engine, &[1], &[b'v', 1, 1], &[1], 40); // access put + must_prewrite_delete(&mut engine, &[2], &[2], 50); // access delete + must_prewrite_lock(&mut engine, &[3], &[3], 60); // access lock(actually ignored) + must_prewrite_put(&mut engine, &[4], &[b'v', 4, 4], &[4], 70); // locked + must_prewrite_put(&mut engine, &[5], &[b'v', 5, 5], &[5], 80); // bypass + must_prewrite_put(&mut engine, &[6], &[b'v', 6, 6], &[6], 100); // locked with larger ts if delete_bound { - must_prewrite_delete(&engine, &[8], &[8], 90); // access delete + must_prewrite_delete(&mut engine, &[8], &[8], 90); // access delete } else { - must_prewrite_put(&engine, &[8], &[b'v', 8, 8], &[8], 90); // access put + must_prewrite_put(&mut engine, &[8], &[b'v', 8, 8], &[8], 90); // access put } let bypass_locks = TsSet::from_u64s(vec![80]); @@ -887,7 +887,7 @@ mod tests { } fn must_met_newer_ts_data( - engine: &E, + engine: &mut E, scanner_ts: impl Into, key: &[u8], value: Option<&[u8]>, @@ -922,39 +922,39 @@ mod tests { } fn test_met_newer_ts_data_impl(deep_write_seek: bool, desc: bool) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, val1) = (b"foo", b"bar1"); if deep_write_seek { for i in 0..SEEK_BOUND { - must_prewrite_put(&engine, key, val1, key, i); - must_commit(&engine, key, i, i); + must_prewrite_put(&mut engine, key, val1, key, i); + must_commit(&mut engine, key, i, i); } } - must_prewrite_put(&engine, key, val1, key, 100); - must_commit(&engine, key, 100, 200); + must_prewrite_put(&mut engine, key, val1, key, 100); + must_commit(&mut engine, key, 100, 200); let (key, val2) = (b"foo", b"bar2"); - must_prewrite_put(&engine, key, val2, key, 300); - must_commit(&engine, key, 300, 400); + must_prewrite_put(&mut engine, key, val2, key, 300); + must_commit(&mut engine, key, 300, 400); must_met_newer_ts_data( - &engine, + &mut engine, 100, key, if deep_write_seek { Some(val1) } else { None }, desc, true, ); - must_met_newer_ts_data(&engine, 200, key, Some(val1), desc, true); - must_met_newer_ts_data(&engine, 300, key, Some(val1), desc, true); - must_met_newer_ts_data(&engine, 400, key, Some(val2), desc, false); - must_met_newer_ts_data(&engine, 500, key, Some(val2), desc, false); + must_met_newer_ts_data(&mut engine, 200, key, Some(val1), desc, true); + must_met_newer_ts_data(&mut engine, 300, key, Some(val1), desc, true); + must_met_newer_ts_data(&mut engine, 400, key, Some(val2), desc, false); + must_met_newer_ts_data(&mut engine, 500, key, Some(val2), desc, false); - must_prewrite_lock(&engine, key, key, 600); + must_prewrite_lock(&mut engine, key, key, 600); - must_met_newer_ts_data(&engine, 500, key, Some(val2), desc, true); - must_met_newer_ts_data(&engine, 600, key, Some(val2), desc, true); + must_met_newer_ts_data(&mut engine, 500, key, Some(val2), desc, true); + must_met_newer_ts_data(&mut engine, 600, key, Some(val2), desc, true); } #[test] @@ -967,9 +967,10 @@ mod tests { #[test] fn test_old_value_with_hint_min_ts() { - let engine = TestEngineBuilder::new().build_without_cache().unwrap(); - let create_scanner = |from_ts: u64| { - let snap = engine.snapshot(Default::default()).unwrap(); + let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); + let mut engine_clone = engine.clone(); + let mut create_scanner = |from_ts: u64| { + let snap = engine_clone.snapshot(Default::default()).unwrap(); ScannerBuilder::new(snap, TimeStamp::max()) .fill_cache(false) .hint_min_ts(Some(from_ts.into())) @@ -981,10 +982,10 @@ mod tests { (0..128).for_each(|_| value.extend_from_slice(b"long-val")); // Create the initial data with CF_WRITE L0: |zkey_110, zkey1_160| - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); - must_prewrite_put(&engine, b"zkey1", &value, b"zkey1", 150); - must_commit(&engine, b"zkey1", 150, 160); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); + must_prewrite_put(&mut engine, b"zkey1", &value, b"zkey1", 150); + must_commit(&mut engine, b"zkey1", 150, 160); engine .kv_engine() .unwrap() @@ -995,7 +996,7 @@ mod tests { .unwrap() .flush_cf(CF_DEFAULT, true) .unwrap(); - must_prewrite_delete(&engine, b"zkey", b"zkey", 200); + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 200); let tests = vec![ // `zkey_110` is filtered, so no old value and block reads is 0. @@ -1018,7 +1019,7 @@ mod tests { } // CF_WRITE L0: |zkey_110, zkey1_160|, |zkey_210| - must_commit(&engine, b"zkey", 200, 210); + must_commit(&mut engine, b"zkey", 200, 210); engine .kv_engine() .unwrap() @@ -1058,7 +1059,7 @@ mod tests { } fn test_rc_scan_skip_lock_impl(desc: bool) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key1, val1, val12) = (b"foo1", b"bar1", b"bar12"); let (key2, val2) = (b"foo2", b"bar2"); let mut expected = vec![(key1, val1), (key2, val2)]; @@ -1066,13 +1067,13 @@ mod tests { expected.reverse(); } - must_prewrite_put(&engine, key1, val1, key1, 10); - must_commit(&engine, key1, 10, 20); + must_prewrite_put(&mut engine, key1, val1, key1, 10); + must_commit(&mut engine, key1, 10, 20); - must_prewrite_put(&engine, key2, val2, key2, 30); - must_commit(&engine, key2, 30, 40); + must_prewrite_put(&mut engine, key2, val2, key2, 30); + must_commit(&mut engine, key2, 30, 40); - must_prewrite_put(&engine, key1, val12, key1, 50); + must_prewrite_put(&mut engine, key1, val12, key1, 50); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, 60.into()) diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index b456b359b8f..7171417d060 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -292,66 +292,66 @@ pub(crate) mod tests { }; fn test_mvcc_txn_read_imp(k1: &[u8], k2: &[u8], v: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - must_get_none(&engine, k1, 1); + must_get_none(&mut engine, k1, 1); - must_prewrite_put(&engine, k1, v, k1, 2); - must_rollback(&engine, k1, 2, false); + must_prewrite_put(&mut engine, k1, v, k1, 2); + must_rollback(&mut engine, k1, 2, false); // should ignore rollback - must_get_none(&engine, k1, 3); + must_get_none(&mut engine, k1, 3); - must_prewrite_lock(&engine, k1, k1, 3); - must_commit(&engine, k1, 3, 4); + must_prewrite_lock(&mut engine, k1, k1, 3); + must_commit(&mut engine, k1, 3, 4); // should ignore read lock - must_get_none(&engine, k1, 5); + must_get_none(&mut engine, k1, 5); - must_prewrite_put(&engine, k1, v, k1, 5); - must_prewrite_put(&engine, k2, v, k1, 5); + must_prewrite_put(&mut engine, k1, v, k1, 5); + must_prewrite_put(&mut engine, k2, v, k1, 5); // should not be affected by later locks - must_get_none(&engine, k1, 4); + must_get_none(&mut engine, k1, 4); // should read pending locks - must_get_err(&engine, k1, 7); + must_get_err(&mut engine, k1, 7); // should ignore the primary lock and get none when reading the latest record - must_get_none(&engine, k1, u64::max_value()); + must_get_none(&mut engine, k1, u64::max_value()); // should read secondary locks even when reading the latest record - must_get_err(&engine, k2, u64::max_value()); + must_get_err(&mut engine, k2, u64::max_value()); - must_commit(&engine, k1, 5, 10); - must_commit(&engine, k2, 5, 10); - must_get_none(&engine, k1, 3); + must_commit(&mut engine, k1, 5, 10); + must_commit(&mut engine, k2, 5, 10); + must_get_none(&mut engine, k1, 3); // should not read with ts < commit_ts - must_get_none(&engine, k1, 7); + must_get_none(&mut engine, k1, 7); // should read with ts > commit_ts - must_get(&engine, k1, 13, v); + must_get(&mut engine, k1, 13, v); // should read the latest record if `ts == u64::max_value()` - must_get(&engine, k1, u64::max_value(), v); + must_get(&mut engine, k1, u64::max_value(), v); - must_prewrite_delete(&engine, k1, k1, 15); + must_prewrite_delete(&mut engine, k1, k1, 15); // should ignore the lock and get previous record when reading the latest record - must_get(&engine, k1, u64::max_value(), v); - must_commit(&engine, k1, 15, 20); - must_get_none(&engine, k1, 3); - must_get_none(&engine, k1, 7); - must_get(&engine, k1, 13, v); - must_get(&engine, k1, 17, v); - must_get_none(&engine, k1, 23); + must_get(&mut engine, k1, u64::max_value(), v); + must_commit(&mut engine, k1, 15, 20); + must_get_none(&mut engine, k1, 3); + must_get_none(&mut engine, k1, 7); + must_get(&mut engine, k1, 13, v); + must_get(&mut engine, k1, 17, v); + must_get_none(&mut engine, k1, 23); // intersecting timestamps with pessimistic txn // T1: start_ts = 25, commit_ts = 27 // T2: start_ts = 23, commit_ts = 31 - must_prewrite_put(&engine, k1, v, k1, 25); - must_commit(&engine, k1, 25, 27); - must_acquire_pessimistic_lock(&engine, k1, k1, 23, 29); - must_get(&engine, k1, 30, v); - must_pessimistic_prewrite_delete(&engine, k1, k1, 23, 29, DoPessimisticCheck); - must_get_err(&engine, k1, 30); + must_prewrite_put(&mut engine, k1, v, k1, 25); + must_commit(&mut engine, k1, 25, 27); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 23, 29); + must_get(&mut engine, k1, 30, v); + must_pessimistic_prewrite_delete(&mut engine, k1, k1, 23, 29, DoPessimisticCheck); + must_get_err(&mut engine, k1, 30); // should read the latest record when `ts == u64::max_value()` // even if lock.start_ts(23) < latest write.commit_ts(27) - must_get(&engine, k1, u64::max_value(), v); - must_commit(&engine, k1, 23, 31); - must_get(&engine, k1, 30, v); - must_get_none(&engine, k1, 32); + must_get(&mut engine, k1, u64::max_value(), v); + must_commit(&mut engine, k1, 23, 31); + must_get(&mut engine, k1, 30, v); + must_get_none(&mut engine, k1, 32); } #[test] @@ -363,217 +363,217 @@ pub(crate) mod tests { } fn test_mvcc_txn_prewrite_imp(k: &[u8], v: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, k, v, k, 5); + must_prewrite_put(&mut engine, k, v, k, 5); // Key is locked. - must_locked(&engine, k, 5); + must_locked(&mut engine, k, 5); // Retry prewrite. - must_prewrite_put(&engine, k, v, k, 5); + must_prewrite_put(&mut engine, k, v, k, 5); // Conflict. - must_prewrite_lock_err(&engine, k, k, 6); + must_prewrite_lock_err(&mut engine, k, k, 6); - must_commit(&engine, k, 5, 10); - must_written(&engine, k, 5, 10, WriteType::Put); + must_commit(&mut engine, k, 5, 10); + must_written(&mut engine, k, 5, 10, WriteType::Put); // Delayed prewrite request after committing should do nothing. - must_prewrite_put_err(&engine, k, v, k, 5); - must_unlocked(&engine, k); + must_prewrite_put_err(&mut engine, k, v, k, 5); + must_unlocked(&mut engine, k); // Write conflict. - must_prewrite_lock_err(&engine, k, k, 6); - must_unlocked(&engine, k); + must_prewrite_lock_err(&mut engine, k, k, 6); + must_unlocked(&mut engine, k); // Not conflict. - must_prewrite_lock(&engine, k, k, 12); - must_locked(&engine, k, 12); - must_rollback(&engine, k, 12, false); - must_unlocked(&engine, k); - must_written(&engine, k, 12, 12, WriteType::Rollback); + must_prewrite_lock(&mut engine, k, k, 12); + must_locked(&mut engine, k, 12); + must_rollback(&mut engine, k, 12, false); + must_unlocked(&mut engine, k); + must_written(&mut engine, k, 12, 12, WriteType::Rollback); // Cannot retry Prewrite after rollback. - must_prewrite_lock_err(&engine, k, k, 12); + must_prewrite_lock_err(&mut engine, k, k, 12); // Can prewrite after rollback. - must_prewrite_delete(&engine, k, k, 13); - must_rollback(&engine, k, 13, false); - must_unlocked(&engine, k); + must_prewrite_delete(&mut engine, k, k, 13); + must_rollback(&mut engine, k, 13, false); + must_unlocked(&mut engine, k); } #[test] fn test_mvcc_txn_prewrite_insert() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, v1, v2, v3) = (b"k1", b"v1", b"v2", b"v3"); - must_prewrite_put(&engine, k1, v1, k1, 1); - must_commit(&engine, k1, 1, 2); + must_prewrite_put(&mut engine, k1, v1, k1, 1); + must_commit(&mut engine, k1, 1, 2); // "k1" already exist, returns AlreadyExist error. assert!(matches!( - try_prewrite_insert(&engine, k1, v2, k1, 3), + try_prewrite_insert(&mut engine, k1, v2, k1, 3), Err(Error(box ErrorInner::AlreadyExist { .. })) )); // Delete "k1" - must_prewrite_delete(&engine, k1, k1, 4); + must_prewrite_delete(&mut engine, k1, k1, 4); // There is a lock, returns KeyIsLocked error. assert!(matches!( - try_prewrite_insert(&engine, k1, v2, k1, 6), + try_prewrite_insert(&mut engine, k1, v2, k1, 6), Err(Error(box ErrorInner::KeyIsLocked(_))) )); - must_commit(&engine, k1, 4, 5); + must_commit(&mut engine, k1, 4, 5); // After delete "k1", insert returns ok. - try_prewrite_insert(&engine, k1, v2, k1, 6).unwrap(); - must_commit(&engine, k1, 6, 7); + try_prewrite_insert(&mut engine, k1, v2, k1, 6).unwrap(); + must_commit(&mut engine, k1, 6, 7); // Rollback - must_prewrite_put(&engine, k1, v3, k1, 8); - must_rollback(&engine, k1, 8, false); + must_prewrite_put(&mut engine, k1, v3, k1, 8); + must_rollback(&mut engine, k1, 8, false); assert!(matches!( - try_prewrite_insert(&engine, k1, v3, k1, 9), + try_prewrite_insert(&mut engine, k1, v3, k1, 9), Err(Error(box ErrorInner::AlreadyExist { .. })) )); // Delete "k1" again - must_prewrite_delete(&engine, k1, k1, 10); - must_commit(&engine, k1, 10, 11); + must_prewrite_delete(&mut engine, k1, k1, 10); + must_commit(&mut engine, k1, 10, 11); // Rollback again - must_prewrite_put(&engine, k1, v3, k1, 12); - must_rollback(&engine, k1, 12, false); + must_prewrite_put(&mut engine, k1, v3, k1, 12); + must_rollback(&mut engine, k1, 12, false); // After delete "k1", insert returns ok. - try_prewrite_insert(&engine, k1, v2, k1, 13).unwrap(); - must_commit(&engine, k1, 13, 14); + try_prewrite_insert(&mut engine, k1, v2, k1, 13).unwrap(); + must_commit(&mut engine, k1, 13, 14); } #[test] fn test_mvcc_txn_prewrite_check_not_exist() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, v1, v2, v3) = (b"k1", b"v1", b"v2", b"v3"); - must_prewrite_put(&engine, k1, v1, k1, 1); - must_commit(&engine, k1, 1, 2); + must_prewrite_put(&mut engine, k1, v1, k1, 1); + must_commit(&mut engine, k1, 1, 2); // "k1" already exist, returns AlreadyExist error. - try_prewrite_check_not_exists(&engine, k1, k1, 3).unwrap_err(); + try_prewrite_check_not_exists(&mut engine, k1, k1, 3).unwrap_err(); // Delete "k1" - must_prewrite_delete(&engine, k1, k1, 4); - must_commit(&engine, k1, 4, 5); + must_prewrite_delete(&mut engine, k1, k1, 4); + must_commit(&mut engine, k1, 4, 5); // After delete "k1", check_not_exists returns ok. - try_prewrite_check_not_exists(&engine, k1, k1, 6).unwrap(); + try_prewrite_check_not_exists(&mut engine, k1, k1, 6).unwrap(); - try_prewrite_insert(&engine, k1, v2, k1, 7).unwrap(); - must_commit(&engine, k1, 7, 8); + try_prewrite_insert(&mut engine, k1, v2, k1, 7).unwrap(); + must_commit(&mut engine, k1, 7, 8); // Rollback - must_prewrite_put(&engine, k1, v3, k1, 9); - must_rollback(&engine, k1, 9, false); - try_prewrite_check_not_exists(&engine, k1, k1, 10).unwrap_err(); + must_prewrite_put(&mut engine, k1, v3, k1, 9); + must_rollback(&mut engine, k1, 9, false); + try_prewrite_check_not_exists(&mut engine, k1, k1, 10).unwrap_err(); // Delete "k1" again - must_prewrite_delete(&engine, k1, k1, 11); - must_commit(&engine, k1, 11, 12); + must_prewrite_delete(&mut engine, k1, k1, 11); + must_commit(&mut engine, k1, 11, 12); // Rollback again - must_prewrite_put(&engine, k1, v3, k1, 13); - must_rollback(&engine, k1, 13, false); + must_prewrite_put(&mut engine, k1, v3, k1, 13); + must_rollback(&mut engine, k1, 13, false); // After delete "k1", check_not_exists returns ok. - try_prewrite_check_not_exists(&engine, k1, k1, 14).unwrap(); + try_prewrite_check_not_exists(&mut engine, k1, k1, 14).unwrap(); } #[test] fn test_mvcc_txn_pessmistic_prewrite_check_not_exist() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; - try_pessimistic_prewrite_check_not_exists(&engine, k, k, 3).unwrap_err(); + try_pessimistic_prewrite_check_not_exists(&mut engine, k, k, 3).unwrap_err(); } #[test] fn test_rollback_lock_optimistic() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k1", b"v1"); - must_prewrite_put(&engine, k, v, k, 5); - must_commit(&engine, k, 5, 10); + must_prewrite_put(&mut engine, k, v, k, 5); + must_commit(&mut engine, k, 5, 10); // Lock - must_prewrite_lock(&engine, k, k, 15); - must_locked(&engine, k, 15); + must_prewrite_lock(&mut engine, k, k, 15); + must_locked(&mut engine, k, 15); // Rollback lock - must_rollback(&engine, k, 15, false); + must_rollback(&mut engine, k, 15, false); // Rollbacks of optimistic transactions needn't be protected - must_get_rollback_protected(&engine, k, 15, false); + must_get_rollback_protected(&mut engine, k, 15, false); } #[test] fn test_rollback_lock_pessimistic() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, k2, v) = (b"k1", b"k2", b"v1"); - must_acquire_pessimistic_lock(&engine, k1, k1, 5, 5); - must_acquire_pessimistic_lock(&engine, k2, k1, 5, 7); - must_rollback(&engine, k1, 5, false); - must_rollback(&engine, k2, 5, false); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 5, 5); + must_acquire_pessimistic_lock(&mut engine, k2, k1, 5, 7); + must_rollback(&mut engine, k1, 5, false); + must_rollback(&mut engine, k2, 5, false); // The rollback of the primary key should be protected - must_get_rollback_protected(&engine, k1, 5, true); + must_get_rollback_protected(&mut engine, k1, 5, true); // The rollback of the secondary key needn't be protected - must_get_rollback_protected(&engine, k2, 5, false); - - must_acquire_pessimistic_lock(&engine, k1, k1, 15, 15); - must_acquire_pessimistic_lock(&engine, k2, k1, 15, 17); - must_pessimistic_prewrite_put(&engine, k1, v, k1, 15, 17, DoPessimisticCheck); - must_pessimistic_prewrite_put(&engine, k2, v, k1, 15, 17, DoPessimisticCheck); - must_rollback(&engine, k1, 15, false); - must_rollback(&engine, k2, 15, false); + must_get_rollback_protected(&mut engine, k2, 5, false); + + must_acquire_pessimistic_lock(&mut engine, k1, k1, 15, 15); + must_acquire_pessimistic_lock(&mut engine, k2, k1, 15, 17); + must_pessimistic_prewrite_put(&mut engine, k1, v, k1, 15, 17, DoPessimisticCheck); + must_pessimistic_prewrite_put(&mut engine, k2, v, k1, 15, 17, DoPessimisticCheck); + must_rollback(&mut engine, k1, 15, false); + must_rollback(&mut engine, k2, 15, false); // The rollback of the primary key should be protected - must_get_rollback_protected(&engine, k1, 15, true); + must_get_rollback_protected(&mut engine, k1, 15, true); // The rollback of the secondary key needn't be protected - must_get_rollback_protected(&engine, k2, 15, false); + must_get_rollback_protected(&mut engine, k2, 15, false); } #[test] fn test_rollback_del() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k1", b"v1"); - must_prewrite_put(&engine, k, v, k, 5); - must_commit(&engine, k, 5, 10); + must_prewrite_put(&mut engine, k, v, k, 5); + must_commit(&mut engine, k, 5, 10); // Prewrite delete - must_prewrite_delete(&engine, k, k, 15); - must_locked(&engine, k, 15); + must_prewrite_delete(&mut engine, k, k, 15); + must_locked(&mut engine, k, 15); // Rollback delete - must_rollback(&engine, k, 15, false); + must_rollback(&mut engine, k, 15, false); } #[test] fn test_rollback_overlapped() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, v1) = (b"key1", b"v1"); let (k2, v2) = (b"key2", b"v2"); - must_prewrite_put(&engine, k1, v1, k1, 10); - must_prewrite_put(&engine, k2, v2, k2, 11); - must_commit(&engine, k1, 10, 20); - must_commit(&engine, k2, 11, 20); - let w1 = must_written(&engine, k1, 10, 20, WriteType::Put); - let w2 = must_written(&engine, k2, 11, 20, WriteType::Put); + must_prewrite_put(&mut engine, k1, v1, k1, 10); + must_prewrite_put(&mut engine, k2, v2, k2, 11); + must_commit(&mut engine, k1, 10, 20); + must_commit(&mut engine, k2, 11, 20); + let w1 = must_written(&mut engine, k1, 10, 20, WriteType::Put); + let w2 = must_written(&mut engine, k2, 11, 20, WriteType::Put); assert!(!w1.has_overlapped_rollback); assert!(!w2.has_overlapped_rollback); - must_cleanup(&engine, k1, 20, 0); - must_rollback(&engine, k2, 20, false); + must_cleanup(&mut engine, k1, 20, 0); + must_rollback(&mut engine, k2, 20, false); - let w1r = must_written(&engine, k1, 10, 20, WriteType::Put); + let w1r = must_written(&mut engine, k1, 10, 20, WriteType::Put); assert!(w1r.has_overlapped_rollback); // The only difference between w1r and w1 is the overlapped_rollback flag. assert_eq!(w1r.set_overlapped_rollback(false, None), w1); - let w2r = must_written(&engine, k2, 11, 20, WriteType::Put); + let w2r = must_written(&mut engine, k2, 11, 20, WriteType::Put); // Rollback is invoked on secondaries, so the rollback is not protected and // overlapped_rollback won't be set. assert_eq!(w2r, w2); @@ -589,7 +589,7 @@ pub(crate) mod tests { #[test] fn test_mvcc_txn_rollback_after_commit() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k"; let v = b"v"; @@ -598,49 +598,49 @@ pub(crate) mod tests { let t3 = 20; let t4 = 30; - must_prewrite_put(&engine, k, v, k, t1); + must_prewrite_put(&mut engine, k, v, k, t1); - must_rollback(&engine, k, t2, false); - must_rollback(&engine, k, t2, false); - must_rollback(&engine, k, t4, false); + must_rollback(&mut engine, k, t2, false); + must_rollback(&mut engine, k, t2, false); + must_rollback(&mut engine, k, t4, false); - must_commit(&engine, k, t1, t3); + must_commit(&mut engine, k, t1, t3); // The rollback should be failed since the transaction // was committed before. - must_rollback_err(&engine, k, t1); - must_get(&engine, k, t4, v); + must_rollback_err(&mut engine, k, t1); + must_get(&mut engine, k, t4, v); } fn test_mvcc_txn_rollback_imp(k: &[u8], v: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, k, v, k, 5); - must_rollback(&engine, k, 5, false); + must_prewrite_put(&mut engine, k, v, k, 5); + must_rollback(&mut engine, k, 5, false); // Rollback should be idempotent - must_rollback(&engine, k, 5, false); + must_rollback(&mut engine, k, 5, false); // Lock should be released after rollback - must_unlocked(&engine, k); - must_prewrite_lock(&engine, k, k, 10); - must_rollback(&engine, k, 10, false); + must_unlocked(&mut engine, k); + must_prewrite_lock(&mut engine, k, k, 10); + must_rollback(&mut engine, k, 10, false); // data should be dropped after rollback - must_get_none(&engine, k, 20); + must_get_none(&mut engine, k, 20); // Can't rollback committed transaction. - must_prewrite_put(&engine, k, v, k, 25); - must_commit(&engine, k, 25, 30); - must_rollback_err(&engine, k, 25); - must_rollback_err(&engine, k, 25); + must_prewrite_put(&mut engine, k, v, k, 25); + must_commit(&mut engine, k, 25, 30); + must_rollback_err(&mut engine, k, 25); + must_rollback_err(&mut engine, k, 25); // Can't rollback other transaction's lock - must_prewrite_delete(&engine, k, k, 35); - must_rollback(&engine, k, 34, true); - must_rollback(&engine, k, 36, true); - must_written(&engine, k, 34, 34, WriteType::Rollback); - must_written(&engine, k, 36, 36, WriteType::Rollback); - must_locked(&engine, k, 35); - must_commit(&engine, k, 35, 40); - must_get(&engine, k, 39, v); - must_get_none(&engine, k, 41); + must_prewrite_delete(&mut engine, k, k, 35); + must_rollback(&mut engine, k, 34, true); + must_rollback(&mut engine, k, 36, true); + must_written(&mut engine, k, 34, 34, WriteType::Rollback); + must_written(&mut engine, k, 36, 36, WriteType::Rollback); + must_locked(&mut engine, k, 35); + must_commit(&mut engine, k, 35, 40); + must_get(&mut engine, k, 39, v); + must_get_none(&mut engine, k, 41); } #[test] @@ -653,33 +653,40 @@ pub(crate) mod tests { #[test] fn test_mvcc_txn_rollback_before_prewrite() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let key = b"key"; - must_rollback(&engine, key, 5, false); - must_prewrite_lock_err(&engine, key, key, 5); + must_rollback(&mut engine, key, 5, false); + must_prewrite_lock_err(&mut engine, key, key, 5); } fn test_write_imp(k: &[u8], v: &[u8], k2: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); - - must_prewrite_put(&engine, k, v, k, 5); - must_seek_write_none(&engine, k, 5); - - must_commit(&engine, k, 5, 10); - must_seek_write(&engine, k, TimeStamp::max(), 5, 10, WriteType::Put); - must_seek_write_none(&engine, k2, TimeStamp::max()); - must_get_commit_ts(&engine, k, 5, 10); - - must_prewrite_delete(&engine, k, k, 15); - must_rollback(&engine, k, 15, false); - must_seek_write(&engine, k, TimeStamp::max(), 15, 15, WriteType::Rollback); - must_get_commit_ts(&engine, k, 5, 10); - must_get_commit_ts_none(&engine, k, 15); - - must_prewrite_lock(&engine, k, k, 25); - must_commit(&engine, k, 25, 30); - must_seek_write(&engine, k, TimeStamp::max(), 25, 30, WriteType::Lock); - must_get_commit_ts(&engine, k, 25, 30); + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, k, v, k, 5); + must_seek_write_none(&mut engine, k, 5); + + must_commit(&mut engine, k, 5, 10); + must_seek_write(&mut engine, k, TimeStamp::max(), 5, 10, WriteType::Put); + must_seek_write_none(&mut engine, k2, TimeStamp::max()); + must_get_commit_ts(&mut engine, k, 5, 10); + + must_prewrite_delete(&mut engine, k, k, 15); + must_rollback(&mut engine, k, 15, false); + must_seek_write( + &mut engine, + k, + TimeStamp::max(), + 15, + 15, + WriteType::Rollback, + ); + must_get_commit_ts(&mut engine, k, 5, 10); + must_get_commit_ts_none(&mut engine, k, 15); + + must_prewrite_lock(&mut engine, k, k, 25); + must_commit(&mut engine, k, 25, 30); + must_seek_write(&mut engine, k, TimeStamp::max(), 25, 30, WriteType::Lock); + must_get_commit_ts(&mut engine, k, 25, 30); } #[test] @@ -691,21 +698,27 @@ pub(crate) mod tests { } fn test_scan_keys_imp(keys: Vec<&[u8]>, values: Vec<&[u8]>) { - let engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, keys[0], values[0], keys[0], 1); - must_commit(&engine, keys[0], 1, 10); - must_prewrite_lock(&engine, keys[1], keys[1], 1); - must_commit(&engine, keys[1], 1, 5); - must_prewrite_delete(&engine, keys[2], keys[2], 1); - must_commit(&engine, keys[2], 1, 20); - must_prewrite_put(&engine, keys[3], values[1], keys[3], 1); - must_prewrite_lock(&engine, keys[4], keys[4], 10); - must_prewrite_delete(&engine, keys[5], keys[5], 5); - - must_scan_keys(&engine, None, 100, vec![keys[0], keys[1], keys[2]], None); - must_scan_keys(&engine, None, 3, vec![keys[0], keys[1], keys[2]], None); - must_scan_keys(&engine, None, 2, vec![keys[0], keys[1]], Some(keys[1])); - must_scan_keys(&engine, Some(keys[1]), 1, vec![keys[1]], Some(keys[1])); + let mut engine = TestEngineBuilder::new().build().unwrap(); + must_prewrite_put(&mut engine, keys[0], values[0], keys[0], 1); + must_commit(&mut engine, keys[0], 1, 10); + must_prewrite_lock(&mut engine, keys[1], keys[1], 1); + must_commit(&mut engine, keys[1], 1, 5); + must_prewrite_delete(&mut engine, keys[2], keys[2], 1); + must_commit(&mut engine, keys[2], 1, 20); + must_prewrite_put(&mut engine, keys[3], values[1], keys[3], 1); + must_prewrite_lock(&mut engine, keys[4], keys[4], 10); + must_prewrite_delete(&mut engine, keys[5], keys[5], 5); + + must_scan_keys( + &mut engine, + None, + 100, + vec![keys[0], keys[1], keys[2]], + None, + ); + must_scan_keys(&mut engine, None, 3, vec![keys[0], keys[1], keys[2]], None); + must_scan_keys(&mut engine, None, 2, vec![keys[0], keys[1]], Some(keys[1])); + must_scan_keys(&mut engine, Some(keys[1]), 1, vec![keys[1]], Some(keys[1])); } #[test] @@ -746,7 +759,7 @@ pub(crate) mod tests { } fn test_write_size_imp(k: &[u8], v: &[u8], pk: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let cm = ConcurrencyManager::new(10.into()); @@ -789,11 +802,11 @@ pub(crate) mod tests { #[test] fn test_skip_constraint_check() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, value) = (b"key", b"value"); - must_prewrite_put(&engine, key, value, key, 5); - must_commit(&engine, key, 5, 10); + must_prewrite_put(&mut engine, key, value, key, 5); + must_commit(&mut engine, key, 5, 10); let snapshot = engine.snapshot(Default::default()).unwrap(); let cm = ConcurrencyManager::new(10.into()); @@ -825,82 +838,82 @@ pub(crate) mod tests { #[test] fn test_read_commit() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, v1, v2) = (b"key", b"v1", b"v2"); - must_prewrite_put(&engine, key, v1, key, 5); - must_commit(&engine, key, 5, 10); - must_prewrite_put(&engine, key, v2, key, 15); - must_get_err(&engine, key, 20); - must_get_no_lock_check(&engine, key, 12, v1); - must_get_no_lock_check(&engine, key, 20, v1); + must_prewrite_put(&mut engine, key, v1, key, 5); + must_commit(&mut engine, key, 5, 10); + must_prewrite_put(&mut engine, key, v2, key, 15); + must_get_err(&mut engine, key, 20); + must_get_no_lock_check(&mut engine, key, 12, v1); + must_get_no_lock_check(&mut engine, key, 20, v1); } #[test] fn test_collapse_prev_rollback() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, value) = (b"key", b"value"); // Add a Rollback whose start ts is 1. - must_prewrite_put(&engine, key, value, key, 1); - must_rollback(&engine, key, 1, false); - must_get_rollback_ts(&engine, key, 1); + must_prewrite_put(&mut engine, key, value, key, 1); + must_rollback(&mut engine, key, 1, false); + must_get_rollback_ts(&mut engine, key, 1); // Add a Rollback whose start ts is 2, the previous Rollback whose // start ts is 1 will be collapsed. - must_prewrite_put(&engine, key, value, key, 2); - must_rollback(&engine, key, 2, false); - must_get_none(&engine, key, 2); - must_get_rollback_ts(&engine, key, 2); - must_get_rollback_ts_none(&engine, key, 1); + must_prewrite_put(&mut engine, key, value, key, 2); + must_rollback(&mut engine, key, 2, false); + must_get_none(&mut engine, key, 2); + must_get_rollback_ts(&mut engine, key, 2); + must_get_rollback_ts_none(&mut engine, key, 1); // Rollback arrive before Prewrite, it will collapse the // previous rollback whose start ts is 2. - must_rollback(&engine, key, 3, false); - must_get_none(&engine, key, 3); - must_get_rollback_ts(&engine, key, 3); - must_get_rollback_ts_none(&engine, key, 2); + must_rollback(&mut engine, key, 3, false); + must_get_none(&mut engine, key, 3); + must_get_rollback_ts(&mut engine, key, 3); + must_get_rollback_ts_none(&mut engine, key, 2); } #[test] fn test_scan_values_in_default() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); must_prewrite_put( - &engine, + &mut engine, &[2], "v".repeat(SHORT_VALUE_MAX_LEN + 1).as_bytes(), &[2], 3, ); - must_commit(&engine, &[2], 3, 3); + must_commit(&mut engine, &[2], 3, 3); must_prewrite_put( - &engine, + &mut engine, &[3], "a".repeat(SHORT_VALUE_MAX_LEN + 1).as_bytes(), &[3], 3, ); - must_commit(&engine, &[3], 3, 4); + must_commit(&mut engine, &[3], 3, 4); must_prewrite_put( - &engine, + &mut engine, &[3], "b".repeat(SHORT_VALUE_MAX_LEN + 1).as_bytes(), &[3], 5, ); - must_commit(&engine, &[3], 5, 5); + must_commit(&mut engine, &[3], 5, 5); must_prewrite_put( - &engine, + &mut engine, &[6], "x".repeat(SHORT_VALUE_MAX_LEN + 1).as_bytes(), &[6], 3, ); - must_commit(&engine, &[6], 3, 6); + must_commit(&mut engine, &[6], 3, 6); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, Some(ScanMode::Forward), true); @@ -919,31 +932,31 @@ pub(crate) mod tests { #[test] fn test_seek_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, &[2], b"vv", &[2], 3); - must_commit(&engine, &[2], 3, 3); + must_prewrite_put(&mut engine, &[2], b"vv", &[2], 3); + must_commit(&mut engine, &[2], 3, 3); must_prewrite_put( - &engine, + &mut engine, &[3], "a".repeat(SHORT_VALUE_MAX_LEN + 1).as_bytes(), &[3], 4, ); - must_commit(&engine, &[3], 4, 4); + must_commit(&mut engine, &[3], 4, 4); must_prewrite_put( - &engine, + &mut engine, &[5], "b".repeat(SHORT_VALUE_MAX_LEN + 1).as_bytes(), &[5], 2, ); - must_commit(&engine, &[5], 2, 5); + must_commit(&mut engine, &[5], 2, 5); - must_prewrite_put(&engine, &[6], b"xxx", &[6], 3); - must_commit(&engine, &[6], 3, 6); + must_prewrite_put(&mut engine, &[6], b"xxx", &[6], 3); + must_commit(&mut engine, &[6], 3, 6); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, Some(ScanMode::Forward), true); @@ -956,53 +969,71 @@ pub(crate) mod tests { #[test] fn test_pessimistic_txn_ttl() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k", b"v"); // Pessimistic prewrite keeps the larger TTL of the prewrite request and the // original pessimisitic lock. - must_acquire_pessimistic_lock_with_ttl(&engine, k, k, 10, 10, 100); - must_pessimistic_locked(&engine, k, 10, 10); - must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 10, 10, DoPessimisticCheck, 110); - must_locked_with_ttl(&engine, k, 10, 110); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k, k, 10, 10, 100); + must_pessimistic_locked(&mut engine, k, 10, 10); + must_pessimistic_prewrite_put_with_ttl( + &mut engine, + k, + v, + k, + 10, + 10, + DoPessimisticCheck, + 110, + ); + must_locked_with_ttl(&mut engine, k, 10, 110); - must_rollback(&engine, k, 10, false); + must_rollback(&mut engine, k, 10, false); // TTL not changed if the pessimistic lock's TTL is larger than that provided in // the prewrite request. - must_acquire_pessimistic_lock_with_ttl(&engine, k, k, 20, 20, 100); - must_pessimistic_locked(&engine, k, 20, 20); - must_pessimistic_prewrite_put_with_ttl(&engine, k, v, k, 20, 20, DoPessimisticCheck, 90); - must_locked_with_ttl(&engine, k, 20, 100); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k, k, 20, 20, 100); + must_pessimistic_locked(&mut engine, k, 20, 20); + must_pessimistic_prewrite_put_with_ttl( + &mut engine, + k, + v, + k, + 20, + 20, + DoPessimisticCheck, + 90, + ); + must_locked_with_ttl(&mut engine, k, 20, 100); } #[test] fn test_constraint_check_with_overlapping_txn() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; let v = b"v1"; - must_prewrite_put(&engine, k, v, k, 10); - must_commit(&engine, k, 10, 11); - must_acquire_pessimistic_lock(&engine, k, k, 5, 12); - must_pessimistic_prewrite_lock(&engine, k, k, 5, 12, DoPessimisticCheck); - must_commit(&engine, k, 5, 15); + must_prewrite_put(&mut engine, k, v, k, 10); + must_commit(&mut engine, k, 10, 11); + must_acquire_pessimistic_lock(&mut engine, k, k, 5, 12); + must_pessimistic_prewrite_lock(&mut engine, k, k, 5, 12, DoPessimisticCheck); + must_commit(&mut engine, k, 5, 15); // Now in write cf: // start_ts = 10, commit_ts = 11, Put("v1") // start_ts = 5, commit_ts = 15, Lock - must_get(&engine, k, 19, v); - try_prewrite_insert(&engine, k, v, k, 20).unwrap_err(); + must_get(&mut engine, k, 19, v); + try_prewrite_insert(&mut engine, k, v, k, 20).unwrap_err(); } #[test] fn test_lock_info_validation() { use kvproto::kvrpcpb::{LockInfo, Op}; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k"; let v = b"v"; @@ -1022,7 +1053,7 @@ pub(crate) mod tests { expected_lock_info.set_lock_type(Op::Put); // Write an optimistic lock. must_prewrite_put_impl( - &engine, + &mut engine, expected_lock_info.get_key(), v, expected_lock_info.get_primary_lock(), @@ -1043,7 +1074,7 @@ pub(crate) mod tests { expected_lock_info.set_lock_for_update_ts(10); // Write a pessimistic lock. must_acquire_pessimistic_lock_impl( - &engine, + &mut engine, expected_lock_info.get_key(), expected_lock_info.get_primary_lock(), expected_lock_info.get_lock_version(), @@ -1058,30 +1089,38 @@ pub(crate) mod tests { } assert_lock_info_eq( - must_prewrite_put_err(&engine, k, v, k, 20), + must_prewrite_put_err(&mut engine, k, v, k, 20), &expected_lock_info, ); assert_lock_info_eq( - must_acquire_pessimistic_lock_err(&engine, k, k, 30, 30), + must_acquire_pessimistic_lock_err(&mut engine, k, k, 30, 30), &expected_lock_info, ); // If the lock is not expired, cleanup will return the lock info. - assert_lock_info_eq(must_cleanup_err(&engine, k, 10, 1), &expected_lock_info); + assert_lock_info_eq(must_cleanup_err(&mut engine, k, 10, 1), &expected_lock_info); expected_lock_info.set_lock_ttl(0); assert_lock_info_eq( - must_pessimistic_prewrite_put_err(&engine, k, v, k, 40, 40, SkipPessimisticCheck), + must_pessimistic_prewrite_put_err( + &mut engine, + k, + v, + k, + 40, + 40, + SkipPessimisticCheck, + ), &expected_lock_info, ); // Delete the lock if *is_optimistic { - must_rollback(&engine, k, expected_lock_info.get_lock_version(), false); + must_rollback(&mut engine, k, expected_lock_info.get_lock_version(), false); } else { pessimistic_rollback::tests::must_success( - &engine, + &mut engine, k, expected_lock_info.get_lock_version(), expected_lock_info.get_lock_for_update_ts(), @@ -1092,20 +1131,20 @@ pub(crate) mod tests { #[test] fn test_non_pessimistic_lock_conflict_with_optimistic_txn() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; let v = b"v1"; - must_prewrite_put(&engine, k, v, k, 2); - must_locked(&engine, k, 2); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 1, 1, SkipPessimisticCheck); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 3, 3, SkipPessimisticCheck); + must_prewrite_put(&mut engine, k, v, k, 2); + must_locked(&mut engine, k, 2); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 1, 1, SkipPessimisticCheck); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 3, 3, SkipPessimisticCheck); } #[test] fn test_non_pessimistic_lock_conflict_with_pessismitic_txn() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // k1 is a row key, k2 is the corresponding index key. let (k1, v1) = (b"k1", b"v1"); @@ -1113,27 +1152,35 @@ pub(crate) mod tests { let (k3, v3) = (b"k3", b"v3"); // Commit k3 at 20. - must_prewrite_put(&engine, k3, v3, k3, 1); - must_commit(&engine, k3, 1, 20); + must_prewrite_put(&mut engine, k3, v3, k3, 1); + must_commit(&mut engine, k3, 1, 20); // Txn-10 acquires pessimistic locks on k1 and k3. - must_acquire_pessimistic_lock(&engine, k1, k1, 10, 10); - must_acquire_pessimistic_lock_err(&engine, k3, k1, 10, 10); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 10, 10); + must_acquire_pessimistic_lock_err(&mut engine, k3, k1, 10, 10); // Update for_update_ts to 20 due to write conflict - must_acquire_pessimistic_lock(&engine, k3, k1, 10, 20); - must_pessimistic_prewrite_put(&engine, k1, v1, k1, 10, 20, DoPessimisticCheck); - must_pessimistic_prewrite_put(&engine, k3, v3, k1, 10, 20, DoPessimisticCheck); + must_acquire_pessimistic_lock(&mut engine, k3, k1, 10, 20); + must_pessimistic_prewrite_put(&mut engine, k1, v1, k1, 10, 20, DoPessimisticCheck); + must_pessimistic_prewrite_put(&mut engine, k3, v3, k1, 10, 20, DoPessimisticCheck); // Write a non-pessimistic lock with for_update_ts 20. - must_pessimistic_prewrite_put(&engine, k2, v2, k1, 10, 20, SkipPessimisticCheck); + must_pessimistic_prewrite_put(&mut engine, k2, v2, k1, 10, 20, SkipPessimisticCheck); // Roll back the primary key due to timeout, but the non-pessimistic lock is not // rolled back. - must_rollback(&engine, k1, 10, false); + must_rollback(&mut engine, k1, 10, false); // Txn-15 acquires pessimistic locks on k1. - must_acquire_pessimistic_lock(&engine, k1, k1, 15, 15); - must_pessimistic_prewrite_put(&engine, k1, v1, k1, 15, 15, DoPessimisticCheck); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 15, 15); + must_pessimistic_prewrite_put(&mut engine, k1, v1, k1, 15, 15, DoPessimisticCheck); // There is a non-pessimistic lock conflict here. - match must_pessimistic_prewrite_put_err(&engine, k2, v2, k1, 15, 15, SkipPessimisticCheck) { + match must_pessimistic_prewrite_put_err( + &mut engine, + k2, + v2, + k1, + 15, + 15, + SkipPessimisticCheck, + ) { Error(box ErrorInner::KeyIsLocked(info)) => assert_eq!(info.get_lock_ttl(), 0), e => panic!("unexpected error: {}", e), }; @@ -1141,19 +1188,19 @@ pub(crate) mod tests { #[test] fn test_commit_pessimistic_lock() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k"; - must_acquire_pessimistic_lock(&engine, k, k, 10, 10); - must_commit_err(&engine, k, 20, 30); - must_commit(&engine, k, 10, 20); - must_seek_write(&engine, k, 30, 10, 20, WriteType::Lock); + must_acquire_pessimistic_lock(&mut engine, k, k, 10, 10); + must_commit_err(&mut engine, k, 20, 30); + must_commit(&mut engine, k, 10, 20); + must_seek_write(&mut engine, k, 30, 10, 20, WriteType::Lock); } #[test] fn test_amend_pessimistic_lock() { fn fail_to_write_pessimistic_lock( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, for_update_ts: impl Into, @@ -1165,35 +1212,35 @@ pub(crate) mod tests { pessimistic_rollback::tests::must_success(engine, key, start_ts, for_update_ts); } - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, mut v) = (b"k", b"v".to_vec()); // Key not exist; should succeed. - fail_to_write_pessimistic_lock(&engine, k, 10, 10); - must_pessimistic_prewrite_put(&engine, k, &v, k, 10, 10, DoPessimisticCheck); - must_commit(&engine, k, 10, 20); - must_get(&engine, k, 20, &v); + fail_to_write_pessimistic_lock(&mut engine, k, 10, 10); + must_pessimistic_prewrite_put(&mut engine, k, &v, k, 10, 10, DoPessimisticCheck); + must_commit(&mut engine, k, 10, 20); + must_get(&mut engine, k, 20, &v); // for_update_ts(30) >= start_ts(30) > commit_ts(20); should succeed. v.push(0); - fail_to_write_pessimistic_lock(&engine, k, 30, 30); - must_pessimistic_prewrite_put(&engine, k, &v, k, 30, 30, DoPessimisticCheck); - must_commit(&engine, k, 30, 40); - must_get(&engine, k, 40, &v); + fail_to_write_pessimistic_lock(&mut engine, k, 30, 30); + must_pessimistic_prewrite_put(&mut engine, k, &v, k, 30, 30, DoPessimisticCheck); + must_commit(&mut engine, k, 30, 40); + must_get(&mut engine, k, 40, &v); // for_update_ts(40) >= commit_ts(40) > start_ts(35); should fail. - fail_to_write_pessimistic_lock(&engine, k, 35, 40); - must_pessimistic_prewrite_put_err(&engine, k, &v, k, 35, 40, DoPessimisticCheck); + fail_to_write_pessimistic_lock(&mut engine, k, 35, 40); + must_pessimistic_prewrite_put_err(&mut engine, k, &v, k, 35, 40, DoPessimisticCheck); // KeyIsLocked; should fail. - must_acquire_pessimistic_lock(&engine, k, k, 50, 50); - must_pessimistic_prewrite_put_err(&engine, k, &v, k, 60, 60, DoPessimisticCheck); - pessimistic_rollback::tests::must_success(&engine, k, 50, 50); + must_acquire_pessimistic_lock(&mut engine, k, k, 50, 50); + must_pessimistic_prewrite_put_err(&mut engine, k, &v, k, 60, 60, DoPessimisticCheck); + pessimistic_rollback::tests::must_success(&mut engine, k, 50, 50); // The txn has been rolled back; should fail. - must_acquire_pessimistic_lock(&engine, k, k, 80, 80); - must_cleanup(&engine, k, 80, TimeStamp::max()); - must_pessimistic_prewrite_put_err(&engine, k, &v, k, 80, 80, DoPessimisticCheck); + must_acquire_pessimistic_lock(&mut engine, k, k, 80, 80); + must_cleanup(&mut engine, k, 80, TimeStamp::max()); + must_pessimistic_prewrite_put_err(&mut engine, k, &v, k, 80, 80, DoPessimisticCheck); } #[test] @@ -1201,12 +1248,13 @@ pub(crate) mod tests { // copy must_prewrite_put_impl, check that the key is written with the correct // secondaries and the right timestamp - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut engine_clone = engine.clone(); let ctx = Context::default(); let cm = ConcurrencyManager::new(42.into()); - let do_prewrite = || { - let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut do_prewrite = || { + let snapshot = engine_clone.snapshot(Default::default()).unwrap(); let mut txn = MvccTxn::new(TimeStamp::new(2), cm.clone()); let mut reader = SnapshotReader::new(TimeStamp::new(2), snapshot, true); let mutation = Mutation::make_put(Key::from_raw(b"key"), b"value".to_vec()); @@ -1228,7 +1276,7 @@ pub(crate) mod tests { .unwrap(); let modifies = txn.into_modifies(); if !modifies.is_empty() { - engine + engine_clone .write(&ctx, WriteData::from_modifies(modifies)) .unwrap(); } @@ -1257,13 +1305,13 @@ pub(crate) mod tests { #[test] fn test_async_pessimistic_prewrite_primary() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let ctx = Context::default(); let cm = ConcurrencyManager::new(42.into()); - must_acquire_pessimistic_lock(&engine, b"key", b"key", 2, 2); + must_acquire_pessimistic_lock(&mut engine, b"key", b"key", 2, 2); - let do_pessimistic_prewrite = || { + let do_pessimistic_prewrite = |engine: &mut RocksEngine| { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut txn = MvccTxn::new(TimeStamp::new(2), cm.clone()); let mut reader = SnapshotReader::new(TimeStamp::new(2), snapshot, true); @@ -1293,7 +1341,7 @@ pub(crate) mod tests { min_commit_ts }; - assert_eq!(do_pessimistic_prewrite(), 43.into()); + assert_eq!(do_pessimistic_prewrite(&mut engine), 43.into()); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); @@ -1310,17 +1358,27 @@ pub(crate) mod tests { // A duplicate prewrite request should return the min_commit_ts in the primary // key - assert_eq!(do_pessimistic_prewrite(), 43.into()); + assert_eq!(do_pessimistic_prewrite(&mut engine), 43.into()); } #[test] fn test_async_commit_pushed_min_commit_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); // Simulate that min_commit_ts is pushed forward larger than latest_ts must_acquire_pessimistic_lock_impl( - &engine, b"key", b"key", 2, false, 20000, 2, false, false, 100, false, + &mut engine, + b"key", + b"key", + 2, + false, + 20000, + 2, + false, + false, + 100, + false, ); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1348,125 +1406,125 @@ pub(crate) mod tests { #[test] fn test_txn_timestamp_overlapping() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k1", b"v1"); // Prepare a committed transaction. - must_prewrite_put(&engine, k, v, k, 10); - must_locked(&engine, k, 10); - must_commit(&engine, k, 10, 20); - must_unlocked(&engine, k); - must_written(&engine, k, 10, 20, WriteType::Put); + must_prewrite_put(&mut engine, k, v, k, 10); + must_locked(&mut engine, k, 10); + must_commit(&mut engine, k, 10, 20); + must_unlocked(&mut engine, k); + must_written(&mut engine, k, 10, 20, WriteType::Put); // Optimistic transaction allows the start_ts equals to another transaction's // commit_ts on the same key. - must_prewrite_put(&engine, k, v, k, 20); - must_locked(&engine, k, 20); - must_commit(&engine, k, 20, 30); - must_unlocked(&engine, k); + must_prewrite_put(&mut engine, k, v, k, 20); + must_locked(&mut engine, k, 20); + must_commit(&mut engine, k, 20, 30); + must_unlocked(&mut engine, k); // ...but it can be rejected by overlapped rollback flag. - must_cleanup(&engine, k, 30, 0); - let w = must_written(&engine, k, 20, 30, WriteType::Put); + must_cleanup(&mut engine, k, 30, 0); + let w = must_written(&mut engine, k, 20, 30, WriteType::Put); assert!(w.has_overlapped_rollback); - must_unlocked(&engine, k); - must_prewrite_put_err(&engine, k, v, k, 30); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); + must_prewrite_put_err(&mut engine, k, v, k, 30); + must_unlocked(&mut engine, k); // Prepare a committed transaction. - must_prewrite_put(&engine, k, v, k, 40); - must_locked(&engine, k, 40); - must_commit(&engine, k, 40, 50); - must_unlocked(&engine, k); - must_written(&engine, k, 40, 50, WriteType::Put); + must_prewrite_put(&mut engine, k, v, k, 40); + must_locked(&mut engine, k, 40); + must_commit(&mut engine, k, 40, 50); + must_unlocked(&mut engine, k); + must_written(&mut engine, k, 40, 50, WriteType::Put); // Pessimistic transaction also works in the same case. - must_acquire_pessimistic_lock(&engine, k, k, 50, 50); - must_pessimistic_locked(&engine, k, 50, 50); - must_pessimistic_prewrite_put(&engine, k, v, k, 50, 50, DoPessimisticCheck); - must_commit(&engine, k, 50, 60); - must_unlocked(&engine, k); - must_written(&engine, k, 50, 60, WriteType::Put); + must_acquire_pessimistic_lock(&mut engine, k, k, 50, 50); + must_pessimistic_locked(&mut engine, k, 50, 50); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 50, 50, DoPessimisticCheck); + must_commit(&mut engine, k, 50, 60); + must_unlocked(&mut engine, k); + must_written(&mut engine, k, 50, 60, WriteType::Put); // .. and it can also be rejected by overlapped rollback flag. - must_cleanup(&engine, k, 60, 0); - let w = must_written(&engine, k, 50, 60, WriteType::Put); + must_cleanup(&mut engine, k, 60, 0); + let w = must_written(&mut engine, k, 50, 60, WriteType::Put); assert!(w.has_overlapped_rollback); - must_unlocked(&engine, k); - must_acquire_pessimistic_lock_err(&engine, k, k, 60, 60); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); + must_acquire_pessimistic_lock_err(&mut engine, k, k, 60, 60); + must_unlocked(&mut engine, k); } #[test] fn test_rollback_while_other_transaction_running() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k1", b"v1"); - must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 10, 0); - must_cleanup(&engine, k, 15, 0); - must_commit(&engine, k, 10, 15); - let w = must_written(&engine, k, 10, 15, WriteType::Put); + must_prewrite_put_async_commit(&mut engine, k, v, k, &Some(vec![]), 10, 0); + must_cleanup(&mut engine, k, 15, 0); + must_commit(&mut engine, k, 10, 15); + let w = must_written(&mut engine, k, 10, 15, WriteType::Put); assert!(w.has_overlapped_rollback); // GC fence shouldn't be set in this case. assert!(w.gc_fence.is_none()); - must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 20, 0); - check_txn_status::tests::must_success(&engine, k, 25, 0, 0, true, false, false, |s| { + must_prewrite_put_async_commit(&mut engine, k, v, k, &Some(vec![]), 20, 0); + check_txn_status::tests::must_success(&mut engine, k, 25, 0, 0, true, false, false, |s| { s == TxnStatus::LockNotExist }); - must_commit(&engine, k, 20, 25); - let w = must_written(&engine, k, 20, 25, WriteType::Put); + must_commit(&mut engine, k, 20, 25); + let w = must_written(&mut engine, k, 20, 25, WriteType::Put); assert!(w.has_overlapped_rollback); assert!(w.gc_fence.is_none()); - must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 30, 0); + must_prewrite_put_async_commit(&mut engine, k, v, k, &Some(vec![]), 30, 0); check_secondary_locks::tests::must_success( - &engine, + &mut engine, k, 35, SecondaryLocksStatus::RolledBack, ); - must_commit(&engine, k, 30, 35); - let w = must_written(&engine, k, 30, 35, WriteType::Put); + must_commit(&mut engine, k, 30, 35); + let w = must_written(&mut engine, k, 30, 35, WriteType::Put); assert!(w.has_overlapped_rollback); assert!(w.gc_fence.is_none()); // Do not commit with overlapped_rollback if the rollback ts doesn't equal to // commit_ts. - must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 40, 0); - must_cleanup(&engine, k, 44, 0); - must_commit(&engine, k, 40, 45); - let w = must_written(&engine, k, 40, 45, WriteType::Put); + must_prewrite_put_async_commit(&mut engine, k, v, k, &Some(vec![]), 40, 0); + must_cleanup(&mut engine, k, 44, 0); + must_commit(&mut engine, k, 40, 45); + let w = must_written(&mut engine, k, 40, 45, WriteType::Put); assert!(!w.has_overlapped_rollback); // Do not put rollback mark to the lock if the lock is not async commit or if // lock.ts is before start_ts or min_commit_ts. - must_prewrite_put(&engine, k, v, k, 50); - must_cleanup(&engine, k, 55, 0); - let l = must_locked(&engine, k, 50); + must_prewrite_put(&mut engine, k, v, k, 50); + must_cleanup(&mut engine, k, 55, 0); + let l = must_locked(&mut engine, k, 50); assert!(l.rollback_ts.is_empty()); - must_commit(&engine, k, 50, 56); + must_commit(&mut engine, k, 50, 56); - must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 60, 0); - must_cleanup(&engine, k, 59, 0); - let l = must_locked(&engine, k, 60); + must_prewrite_put_async_commit(&mut engine, k, v, k, &Some(vec![]), 60, 0); + must_cleanup(&mut engine, k, 59, 0); + let l = must_locked(&mut engine, k, 60); assert!(l.rollback_ts.is_empty()); - must_commit(&engine, k, 60, 65); + must_commit(&mut engine, k, 60, 65); - must_prewrite_put_async_commit(&engine, k, v, k, &Some(vec![]), 70, 75); - must_cleanup(&engine, k, 74, 0); - must_cleanup(&engine, k, 75, 0); - let l = must_locked(&engine, k, 70); + must_prewrite_put_async_commit(&mut engine, k, v, k, &Some(vec![]), 70, 75); + must_cleanup(&mut engine, k, 74, 0); + must_cleanup(&mut engine, k, 75, 0); + let l = must_locked(&mut engine, k, 70); assert_eq!(l.min_commit_ts, 75.into()); assert_eq!(l.rollback_ts, vec![75.into()]); } #[test] fn test_gc_fence() { - let rollback = |engine: &RocksEngine, k: &[u8], start_ts: u64| { + let rollback = |engine: &mut RocksEngine, k: &[u8], start_ts: u64| { must_cleanup(engine, k, start_ts, 0); }; - let check_status = |engine: &RocksEngine, k: &[u8], start_ts: u64| { + let check_status = |engine: &mut RocksEngine, k: &[u8], start_ts: u64| { check_txn_status::tests::must_success( engine, k, @@ -1479,7 +1537,7 @@ pub(crate) mod tests { |_| true, ); }; - let check_secondary = |engine: &RocksEngine, k: &[u8], start_ts: u64| { + let check_secondary = |engine: &mut RocksEngine, k: &[u8], start_ts: u64| { check_secondary_locks::tests::must_success( engine, k, @@ -1489,115 +1547,115 @@ pub(crate) mod tests { }; for &rollback in &[rollback, check_status, check_secondary] { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Get gc fence without any newer versions. - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 101); - must_commit(&engine, b"k1", 101, 102); - rollback(&engine, b"k1", 102); - must_get_overlapped_rollback(&engine, b"k1", 102, 101, WriteType::Put, Some(0)); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 101); + must_commit(&mut engine, b"k1", 101, 102); + rollback(&mut engine, b"k1", 102); + must_get_overlapped_rollback(&mut engine, b"k1", 102, 101, WriteType::Put, Some(0)); // Get gc fence with a newer put. - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 103); - must_commit(&engine, b"k1", 103, 104); - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 105); - must_commit(&engine, b"k1", 105, 106); - rollback(&engine, b"k1", 104); - must_get_overlapped_rollback(&engine, b"k1", 104, 103, WriteType::Put, Some(106)); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 103); + must_commit(&mut engine, b"k1", 103, 104); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 105); + must_commit(&mut engine, b"k1", 105, 106); + rollback(&mut engine, b"k1", 104); + must_get_overlapped_rollback(&mut engine, b"k1", 104, 103, WriteType::Put, Some(106)); // Get gc fence with a newer delete. - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 107); - must_commit(&engine, b"k1", 107, 108); - must_prewrite_delete(&engine, b"k1", b"k1", 109); - must_commit(&engine, b"k1", 109, 110); - rollback(&engine, b"k1", 108); - must_get_overlapped_rollback(&engine, b"k1", 108, 107, WriteType::Put, Some(110)); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 107); + must_commit(&mut engine, b"k1", 107, 108); + must_prewrite_delete(&mut engine, b"k1", b"k1", 109); + must_commit(&mut engine, b"k1", 109, 110); + rollback(&mut engine, b"k1", 108); + must_get_overlapped_rollback(&mut engine, b"k1", 108, 107, WriteType::Put, Some(110)); // Get gc fence with a newer rollback and lock. - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 111); - must_commit(&engine, b"k1", 111, 112); - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 113); - must_rollback(&engine, b"k1", 113, false); - must_prewrite_lock(&engine, b"k1", b"k1", 115); - must_commit(&engine, b"k1", 115, 116); - rollback(&engine, b"k1", 112); - must_get_overlapped_rollback(&engine, b"k1", 112, 111, WriteType::Put, Some(0)); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 111); + must_commit(&mut engine, b"k1", 111, 112); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 113); + must_rollback(&mut engine, b"k1", 113, false); + must_prewrite_lock(&mut engine, b"k1", b"k1", 115); + must_commit(&mut engine, b"k1", 115, 116); + rollback(&mut engine, b"k1", 112); + must_get_overlapped_rollback(&mut engine, b"k1", 112, 111, WriteType::Put, Some(0)); // Get gc fence with a newer put after some rollbacks and locks. - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 121); - must_commit(&engine, b"k1", 121, 122); - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 123); - must_rollback(&engine, b"k1", 123, false); - must_prewrite_lock(&engine, b"k1", b"k1", 125); - must_commit(&engine, b"k1", 125, 126); - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 127); - must_commit(&engine, b"k1", 127, 128); - rollback(&engine, b"k1", 122); - must_get_overlapped_rollback(&engine, b"k1", 122, 121, WriteType::Put, Some(128)); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 121); + must_commit(&mut engine, b"k1", 121, 122); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 123); + must_rollback(&mut engine, b"k1", 123, false); + must_prewrite_lock(&mut engine, b"k1", b"k1", 125); + must_commit(&mut engine, b"k1", 125, 126); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 127); + must_commit(&mut engine, b"k1", 127, 128); + rollback(&mut engine, b"k1", 122); + must_get_overlapped_rollback(&mut engine, b"k1", 122, 121, WriteType::Put, Some(128)); // A key's gc fence won't be another MVCC key. - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 131); - must_commit(&engine, b"k1", 131, 132); - must_prewrite_put(&engine, b"k0", b"v1", b"k0", 133); - must_commit(&engine, b"k0", 133, 134); - must_prewrite_put(&engine, b"k2", b"v1", b"k2", 133); - must_commit(&engine, b"k2", 133, 134); - rollback(&engine, b"k1", 132); - must_get_overlapped_rollback(&engine, b"k1", 132, 131, WriteType::Put, Some(0)); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 131); + must_commit(&mut engine, b"k1", 131, 132); + must_prewrite_put(&mut engine, b"k0", b"v1", b"k0", 133); + must_commit(&mut engine, b"k0", 133, 134); + must_prewrite_put(&mut engine, b"k2", b"v1", b"k2", 133); + must_commit(&mut engine, b"k2", 133, 134); + rollback(&mut engine, b"k1", 132); + must_get_overlapped_rollback(&mut engine, b"k1", 132, 131, WriteType::Put, Some(0)); } } #[test] fn test_overlapped_ts_commit_before_rollback() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, v1) = (b"key1", b"v1"); let (k2, v2) = (b"key2", b"v2"); let key2 = k2.to_vec(); let secondaries = Some(vec![key2]); // T1, start_ts = 10, commit_ts = 20; write k1, k2 - must_prewrite_put_async_commit(&engine, k1, v1, k1, &secondaries, 10, 0); - must_prewrite_put_async_commit(&engine, k2, v2, k1, &secondaries, 10, 0); - must_commit(&engine, k1, 10, 20); - must_commit(&engine, k2, 10, 20); + must_prewrite_put_async_commit(&mut engine, k1, v1, k1, &secondaries, 10, 0); + must_prewrite_put_async_commit(&mut engine, k2, v2, k1, &secondaries, 10, 0); + must_commit(&mut engine, k1, 10, 20); + must_commit(&mut engine, k2, 10, 20); - let w = must_written(&engine, k1, 10, 20, WriteType::Put); + let w = must_written(&mut engine, k1, 10, 20, WriteType::Put); assert!(!w.has_overlapped_rollback); // T2, start_ts = 20 - must_acquire_pessimistic_lock(&engine, k2, k2, 20, 25); - must_pessimistic_prewrite_put(&engine, k2, v2, k2, 20, 25, DoPessimisticCheck); + must_acquire_pessimistic_lock(&mut engine, k2, k2, 20, 25); + must_pessimistic_prewrite_put(&mut engine, k2, v2, k2, 20, 25, DoPessimisticCheck); - must_cleanup(&engine, k2, 20, 0); + must_cleanup(&mut engine, k2, 20, 0); - let w = must_written(&engine, k2, 10, 20, WriteType::Put); + let w = must_written(&mut engine, k2, 10, 20, WriteType::Put); assert!(w.has_overlapped_rollback); - must_get(&engine, k2, 30, v2); - must_acquire_pessimistic_lock_err(&engine, k2, k2, 20, 25); + must_get(&mut engine, k2, 30, v2); + must_acquire_pessimistic_lock_err(&mut engine, k2, k2, 20, 25); } #[test] fn test_overlapped_ts_prewrite_before_rollback() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, v1) = (b"key1", b"v1"); let (k2, v2) = (b"key2", b"v2"); let key2 = k2.to_vec(); let secondaries = Some(vec![key2]); // T1, start_ts = 10 - must_prewrite_put_async_commit(&engine, k1, v1, k1, &secondaries, 10, 0); - must_prewrite_put_async_commit(&engine, k2, v2, k1, &secondaries, 10, 0); + must_prewrite_put_async_commit(&mut engine, k1, v1, k1, &secondaries, 10, 0); + must_prewrite_put_async_commit(&mut engine, k2, v2, k1, &secondaries, 10, 0); // T2, start_ts = 20 - must_prewrite_put_err(&engine, k2, v2, k2, 20); - must_cleanup(&engine, k2, 20, 0); + must_prewrite_put_err(&mut engine, k2, v2, k2, 20); + must_cleanup(&mut engine, k2, 20, 0); // commit T1 - must_commit(&engine, k1, 10, 20); - must_commit(&engine, k2, 10, 20); + must_commit(&mut engine, k1, 10, 20); + must_commit(&mut engine, k2, 10, 20); - let w = must_written(&engine, k2, 10, 20, WriteType::Put); + let w = must_written(&mut engine, k2, 10, 20, WriteType::Put); assert!(w.has_overlapped_rollback); - must_prewrite_put_err(&engine, k2, v2, k2, 20); + must_prewrite_put_err(&mut engine, k2, v2, k2, 20); } } diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 59dd5e8f13d..6d86203e8f2 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -257,7 +257,7 @@ mod tests { fn test_raw_mvcc_snapshot() { // Use `Engine` to be independent to `Storage`. // Do not set "api version" to use `Engine` as a raw RocksDB. - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (tx, rx) = channel(); let ctx = Context::default(); diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 699002f0126..7c2f41d3e1b 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -293,7 +293,7 @@ pub mod tests { }; pub fn must_succeed_impl( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -337,7 +337,7 @@ pub mod tests { } pub fn must_succeed( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -347,7 +347,7 @@ pub mod tests { } pub fn must_succeed_return_value( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -370,7 +370,7 @@ pub mod tests { } pub fn must_succeed_with_ttl( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -396,7 +396,7 @@ pub mod tests { } pub fn must_succeed_for_large_txn( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -421,7 +421,7 @@ pub mod tests { } pub fn must_err( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -442,7 +442,7 @@ pub mod tests { } pub fn must_err_return_value( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -464,7 +464,7 @@ pub mod tests { } fn must_err_impl( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], start_ts: impl Into, @@ -499,7 +499,7 @@ pub mod tests { } pub fn must_pessimistic_locked( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, for_update_ts: impl Into, @@ -514,7 +514,7 @@ pub mod tests { #[test] fn test_pessimistic_lock() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; let v = b"v1"; @@ -523,221 +523,221 @@ pub mod tests { // important, we should consider whether they are better to be fixed. // Normal - must_succeed(&engine, k, k, 1, 1); - must_pessimistic_locked(&engine, k, 1, 1); - must_pessimistic_prewrite_put(&engine, k, v, k, 1, 1, DoPessimisticCheck); - must_locked(&engine, k, 1); - must_commit(&engine, k, 1, 2); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 1, 1); + must_pessimistic_locked(&mut engine, k, 1, 1); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 1, 1, DoPessimisticCheck); + must_locked(&mut engine, k, 1); + must_commit(&mut engine, k, 1, 2); + must_unlocked(&mut engine, k); // Lock conflict - must_prewrite_put(&engine, k, v, k, 3); - must_err(&engine, k, k, 4, 4); - must_cleanup(&engine, k, 3, 0); - must_unlocked(&engine, k); - must_succeed(&engine, k, k, 5, 5); - must_prewrite_lock_err(&engine, k, k, 6); - must_err(&engine, k, k, 6, 6); - must_cleanup(&engine, k, 5, 0); - must_unlocked(&engine, k); + must_prewrite_put(&mut engine, k, v, k, 3); + must_err(&mut engine, k, k, 4, 4); + must_cleanup(&mut engine, k, 3, 0); + must_unlocked(&mut engine, k); + must_succeed(&mut engine, k, k, 5, 5); + must_prewrite_lock_err(&mut engine, k, k, 6); + must_err(&mut engine, k, k, 6, 6); + must_cleanup(&mut engine, k, 5, 0); + must_unlocked(&mut engine, k); // Data conflict - must_prewrite_put(&engine, k, v, k, 7); - must_commit(&engine, k, 7, 9); - must_unlocked(&engine, k); - must_prewrite_lock_err(&engine, k, k, 8); - must_err(&engine, k, k, 8, 8); - must_succeed(&engine, k, k, 8, 9); - must_pessimistic_prewrite_put(&engine, k, v, k, 8, 8, DoPessimisticCheck); - must_commit(&engine, k, 8, 10); - must_unlocked(&engine, k); + must_prewrite_put(&mut engine, k, v, k, 7); + must_commit(&mut engine, k, 7, 9); + must_unlocked(&mut engine, k); + must_prewrite_lock_err(&mut engine, k, k, 8); + must_err(&mut engine, k, k, 8, 8); + must_succeed(&mut engine, k, k, 8, 9); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 8, 8, DoPessimisticCheck); + must_commit(&mut engine, k, 8, 10); + must_unlocked(&mut engine, k); // Rollback - must_succeed(&engine, k, k, 11, 11); - must_pessimistic_locked(&engine, k, 11, 11); - must_cleanup(&engine, k, 11, 0); - must_err(&engine, k, k, 11, 11); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 11, 11, DoPessimisticCheck); - must_prewrite_lock_err(&engine, k, k, 11); - must_unlocked(&engine, k); - - must_succeed(&engine, k, k, 12, 12); - must_pessimistic_prewrite_put(&engine, k, v, k, 12, 12, DoPessimisticCheck); - must_locked(&engine, k, 12); - must_cleanup(&engine, k, 12, 0); - must_err(&engine, k, k, 12, 12); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 12, 12, DoPessimisticCheck); - must_prewrite_lock_err(&engine, k, k, 12); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 11, 11); + must_pessimistic_locked(&mut engine, k, 11, 11); + must_cleanup(&mut engine, k, 11, 0); + must_err(&mut engine, k, k, 11, 11); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 11, 11, DoPessimisticCheck); + must_prewrite_lock_err(&mut engine, k, k, 11); + must_unlocked(&mut engine, k); + + must_succeed(&mut engine, k, k, 12, 12); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 12, 12, DoPessimisticCheck); + must_locked(&mut engine, k, 12); + must_cleanup(&mut engine, k, 12, 0); + must_err(&mut engine, k, k, 12, 12); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 12, 12, DoPessimisticCheck); + must_prewrite_lock_err(&mut engine, k, k, 12); + must_unlocked(&mut engine, k); // Duplicated - must_succeed(&engine, k, k, 13, 13); - must_pessimistic_locked(&engine, k, 13, 13); - must_succeed(&engine, k, k, 13, 13); - must_pessimistic_locked(&engine, k, 13, 13); - must_pessimistic_prewrite_put(&engine, k, v, k, 13, 13, DoPessimisticCheck); - must_locked(&engine, k, 13); - must_pessimistic_prewrite_put(&engine, k, v, k, 13, 13, DoPessimisticCheck); - must_locked(&engine, k, 13); - must_commit(&engine, k, 13, 14); - must_unlocked(&engine, k); - must_commit(&engine, k, 13, 14); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 13, 13); + must_pessimistic_locked(&mut engine, k, 13, 13); + must_succeed(&mut engine, k, k, 13, 13); + must_pessimistic_locked(&mut engine, k, 13, 13); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 13, 13, DoPessimisticCheck); + must_locked(&mut engine, k, 13); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 13, 13, DoPessimisticCheck); + must_locked(&mut engine, k, 13); + must_commit(&mut engine, k, 13, 14); + must_unlocked(&mut engine, k); + must_commit(&mut engine, k, 13, 14); + must_unlocked(&mut engine, k); // Pessimistic lock doesn't block reads. - must_succeed(&engine, k, k, 15, 15); - must_pessimistic_locked(&engine, k, 15, 15); - must_get(&engine, k, 16, v); - must_pessimistic_prewrite_delete(&engine, k, k, 15, 15, DoPessimisticCheck); - must_get_err(&engine, k, 16); - must_commit(&engine, k, 15, 17); + must_succeed(&mut engine, k, k, 15, 15); + must_pessimistic_locked(&mut engine, k, 15, 15); + must_get(&mut engine, k, 16, v); + must_pessimistic_prewrite_delete(&mut engine, k, k, 15, 15, DoPessimisticCheck); + must_get_err(&mut engine, k, 16); + must_commit(&mut engine, k, 15, 17); // Rollback - must_succeed(&engine, k, k, 18, 18); - must_rollback(&engine, k, 18, false); - must_unlocked(&engine, k); - must_prewrite_put(&engine, k, v, k, 19); - must_commit(&engine, k, 19, 20); - must_err(&engine, k, k, 18, 21); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 18, 18); + must_rollback(&mut engine, k, 18, false); + must_unlocked(&mut engine, k); + must_prewrite_put(&mut engine, k, v, k, 19); + must_commit(&mut engine, k, 19, 20); + must_err(&mut engine, k, k, 18, 21); + must_unlocked(&mut engine, k); // LockTypeNotMatch - must_prewrite_put(&engine, k, v, k, 23); - must_locked(&engine, k, 23); - must_err(&engine, k, k, 23, 23); - must_cleanup(&engine, k, 23, 0); - must_succeed(&engine, k, k, 24, 24); - must_pessimistic_locked(&engine, k, 24, 24); - must_prewrite_put_err(&engine, k, v, k, 24); - must_rollback(&engine, k, 24, false); + must_prewrite_put(&mut engine, k, v, k, 23); + must_locked(&mut engine, k, 23); + must_err(&mut engine, k, k, 23, 23); + must_cleanup(&mut engine, k, 23, 0); + must_succeed(&mut engine, k, k, 24, 24); + must_pessimistic_locked(&mut engine, k, 24, 24); + must_prewrite_put_err(&mut engine, k, v, k, 24); + must_rollback(&mut engine, k, 24, false); // Acquire lock on a prewritten key should fail. - must_succeed(&engine, k, k, 26, 26); - must_pessimistic_locked(&engine, k, 26, 26); - must_pessimistic_prewrite_delete(&engine, k, k, 26, 26, DoPessimisticCheck); - must_locked(&engine, k, 26); - must_err(&engine, k, k, 26, 26); - must_locked(&engine, k, 26); + must_succeed(&mut engine, k, k, 26, 26); + must_pessimistic_locked(&mut engine, k, 26, 26); + must_pessimistic_prewrite_delete(&mut engine, k, k, 26, 26, DoPessimisticCheck); + must_locked(&mut engine, k, 26); + must_err(&mut engine, k, k, 26, 26); + must_locked(&mut engine, k, 26); // Acquire lock on a committed key should fail. - must_commit(&engine, k, 26, 27); - must_unlocked(&engine, k); - must_get_none(&engine, k, 28); - must_err(&engine, k, k, 26, 26); - must_unlocked(&engine, k); - must_get_none(&engine, k, 28); + must_commit(&mut engine, k, 26, 27); + must_unlocked(&mut engine, k); + must_get_none(&mut engine, k, 28); + must_err(&mut engine, k, k, 26, 26); + must_unlocked(&mut engine, k); + must_get_none(&mut engine, k, 28); // Pessimistic prewrite on a committed key should fail. - must_pessimistic_prewrite_put_err(&engine, k, v, k, 26, 26, DoPessimisticCheck); - must_unlocked(&engine, k); - must_get_none(&engine, k, 28); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 26, 26, DoPessimisticCheck); + must_unlocked(&mut engine, k); + must_get_none(&mut engine, k, 28); // Currently we cannot avoid this. - must_succeed(&engine, k, k, 26, 29); - pessimistic_rollback::tests::must_success(&engine, k, 26, 29); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 26, 29); + pessimistic_rollback::tests::must_success(&mut engine, k, 26, 29); + must_unlocked(&mut engine, k); // Non pessimistic key in pessimistic transaction. - must_pessimistic_prewrite_put(&engine, k, v, k, 30, 30, SkipPessimisticCheck); - must_locked(&engine, k, 30); - must_commit(&engine, k, 30, 31); - must_unlocked(&engine, k); - must_get_commit_ts(&engine, k, 30, 31); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 30, 30, SkipPessimisticCheck); + must_locked(&mut engine, k, 30); + must_commit(&mut engine, k, 30, 31); + must_unlocked(&mut engine, k); + must_get_commit_ts(&mut engine, k, 30, 31); // Rollback collapsed. - must_rollback(&engine, k, 32, false); - must_rollback(&engine, k, 33, false); - must_err(&engine, k, k, 32, 32); + must_rollback(&mut engine, k, 32, false); + must_rollback(&mut engine, k, 33, false); + must_err(&mut engine, k, k, 32, 32); // Currently we cannot avoid this. - must_succeed(&engine, k, k, 32, 34); - pessimistic_rollback::tests::must_success(&engine, k, 32, 34); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 32, 34); + pessimistic_rollback::tests::must_success(&mut engine, k, 32, 34); + must_unlocked(&mut engine, k); // Acquire lock when there is lock with different for_update_ts. - must_succeed(&engine, k, k, 35, 36); - must_pessimistic_locked(&engine, k, 35, 36); - must_succeed(&engine, k, k, 35, 35); - must_pessimistic_locked(&engine, k, 35, 36); - must_succeed(&engine, k, k, 35, 37); - must_pessimistic_locked(&engine, k, 35, 37); + must_succeed(&mut engine, k, k, 35, 36); + must_pessimistic_locked(&mut engine, k, 35, 36); + must_succeed(&mut engine, k, k, 35, 35); + must_pessimistic_locked(&mut engine, k, 35, 36); + must_succeed(&mut engine, k, k, 35, 37); + must_pessimistic_locked(&mut engine, k, 35, 37); // Cannot prewrite when there is another transaction's pessimistic lock. - must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 36, DoPessimisticCheck); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, DoPessimisticCheck); - must_pessimistic_locked(&engine, k, 35, 37); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 36, 36, DoPessimisticCheck); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 36, 38, DoPessimisticCheck); + must_pessimistic_locked(&mut engine, k, 35, 37); // Cannot prewrite when there is another transaction's non-pessimistic lock. - must_pessimistic_prewrite_put(&engine, k, v, k, 35, 37, DoPessimisticCheck); - must_locked(&engine, k, 35); - must_pessimistic_prewrite_put_err(&engine, k, v, k, 36, 38, DoPessimisticCheck); - must_locked(&engine, k, 35); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 35, 37, DoPessimisticCheck); + must_locked(&mut engine, k, 35); + must_pessimistic_prewrite_put_err(&mut engine, k, v, k, 36, 38, DoPessimisticCheck); + must_locked(&mut engine, k, 35); // Commit pessimistic transaction's key but with smaller commit_ts than // for_update_ts. Currently not checked, so in this case it will // actually be successfully committed. - must_commit(&engine, k, 35, 36); - must_unlocked(&engine, k); - must_get_commit_ts(&engine, k, 35, 36); + must_commit(&mut engine, k, 35, 36); + must_unlocked(&mut engine, k); + must_get_commit_ts(&mut engine, k, 35, 36); // Prewrite meets pessimistic lock on a non-pessimistic key. // Currently not checked, so prewrite will success. - must_succeed(&engine, k, k, 40, 40); - must_pessimistic_locked(&engine, k, 40, 40); - must_pessimistic_prewrite_put(&engine, k, v, k, 40, 40, SkipPessimisticCheck); - must_locked(&engine, k, 40); - must_commit(&engine, k, 40, 41); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 40, 40); + must_pessimistic_locked(&mut engine, k, 40, 40); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 40, 40, SkipPessimisticCheck); + must_locked(&mut engine, k, 40); + must_commit(&mut engine, k, 40, 41); + must_unlocked(&mut engine, k); // Prewrite with different for_update_ts. // Currently not checked. - must_succeed(&engine, k, k, 42, 45); - must_pessimistic_locked(&engine, k, 42, 45); - must_pessimistic_prewrite_put(&engine, k, v, k, 42, 43, DoPessimisticCheck); - must_locked(&engine, k, 42); - must_commit(&engine, k, 42, 45); - must_unlocked(&engine, k); - - must_succeed(&engine, k, k, 46, 47); - must_pessimistic_locked(&engine, k, 46, 47); - must_pessimistic_prewrite_put(&engine, k, v, k, 46, 48, DoPessimisticCheck); - must_locked(&engine, k, 46); - must_commit(&engine, k, 46, 50); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, k, 42, 45); + must_pessimistic_locked(&mut engine, k, 42, 45); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 42, 43, DoPessimisticCheck); + must_locked(&mut engine, k, 42); + must_commit(&mut engine, k, 42, 45); + must_unlocked(&mut engine, k); + + must_succeed(&mut engine, k, k, 46, 47); + must_pessimistic_locked(&mut engine, k, 46, 47); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 46, 48, DoPessimisticCheck); + must_locked(&mut engine, k, 46); + must_commit(&mut engine, k, 46, 50); + must_unlocked(&mut engine, k); // Prewrite on non-pessimistic key meets write with larger commit_ts than // current for_update_ts (non-pessimistic data conflict). // Normally non-pessimistic keys in pessimistic transactions are used when we // are sure that there won't be conflicts. So this case is also not checked, and // prewrite will succeeed. - must_pessimistic_prewrite_put(&engine, k, v, k, 47, 48, SkipPessimisticCheck); - must_locked(&engine, k, 47); - must_cleanup(&engine, k, 47, 0); - must_unlocked(&engine, k); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 47, 48, SkipPessimisticCheck); + must_locked(&mut engine, k, 47); + must_cleanup(&mut engine, k, 47, 0); + must_unlocked(&mut engine, k); // The rollback of the primary key in a pessimistic transaction should be // protected from being collapsed. - must_succeed(&engine, k, k, 49, 60); - must_pessimistic_prewrite_put(&engine, k, v, k, 49, 60, DoPessimisticCheck); - must_locked(&engine, k, 49); - must_cleanup(&engine, k, 49, 0); - must_get_rollback_protected(&engine, k, 49, true); - must_prewrite_put(&engine, k, v, k, 51); - must_rollback(&engine, k, 51, false); - must_err(&engine, k, k, 49, 60); + must_succeed(&mut engine, k, k, 49, 60); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 49, 60, DoPessimisticCheck); + must_locked(&mut engine, k, 49); + must_cleanup(&mut engine, k, 49, 0); + must_get_rollback_protected(&mut engine, k, 49, true); + must_prewrite_put(&mut engine, k, v, k, 51); + must_rollback(&mut engine, k, 51, false); + must_err(&mut engine, k, k, 49, 60); // Overlapped rollback record will be written when the current start_ts equals // to another write records' commit ts. Now there is a commit record with // commit_ts = 50. - must_succeed(&engine, k, k, 50, 61); - must_pessimistic_prewrite_put(&engine, k, v, k, 50, 61, DoPessimisticCheck); - must_locked(&engine, k, 50); - must_cleanup(&engine, k, 50, 0); - must_get_overlapped_rollback(&engine, k, 50, 46, WriteType::Put, Some(0)); + must_succeed(&mut engine, k, k, 50, 61); + must_pessimistic_prewrite_put(&mut engine, k, v, k, 50, 61, DoPessimisticCheck); + must_locked(&mut engine, k, 50); + must_cleanup(&mut engine, k, 50, 0); + must_get_overlapped_rollback(&mut engine, k, 50, 46, WriteType::Put, Some(0)); // start_ts and commit_ts interlacing for start_ts in &[140, 150, 160] { let for_update_ts = start_ts + 48; let commit_ts = start_ts + 50; - must_succeed(&engine, k, k, *start_ts, for_update_ts); + must_succeed(&mut engine, k, k, *start_ts, for_update_ts); must_pessimistic_prewrite_put( - &engine, + &mut engine, k, v, k, @@ -745,105 +745,108 @@ pub mod tests { for_update_ts, DoPessimisticCheck, ); - must_commit(&engine, k, *start_ts, commit_ts); - must_get(&engine, k, commit_ts + 1, v); + must_commit(&mut engine, k, *start_ts, commit_ts); + must_get(&mut engine, k, commit_ts + 1, v); } - must_rollback(&engine, k, 170, false); + must_rollback(&mut engine, k, 170, false); // Now the data should be like: (start_ts -> commit_ts) // 140 -> 190 // 150 -> 200 // 160 -> 210 // 170 -> rollback - must_get_commit_ts(&engine, k, 140, 190); - must_get_commit_ts(&engine, k, 150, 200); - must_get_commit_ts(&engine, k, 160, 210); - must_get_rollback_ts(&engine, k, 170); + must_get_commit_ts(&mut engine, k, 140, 190); + must_get_commit_ts(&mut engine, k, 150, 200); + must_get_commit_ts(&mut engine, k, 160, 210); + must_get_rollback_ts(&mut engine, k, 170); } #[test] fn test_pessimistic_lock_return_value() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k", b"v"); assert_eq!( - must_succeed_return_value(&engine, k, k, 10, 10, false), + must_succeed_return_value(&mut engine, k, k, 10, 10, false), None ); - must_pessimistic_locked(&engine, k, 10, 10); - pessimistic_rollback::tests::must_success(&engine, k, 10, 10); + must_pessimistic_locked(&mut engine, k, 10, 10); + pessimistic_rollback::tests::must_success(&mut engine, k, 10, 10); // Put - must_prewrite_put(&engine, k, v, k, 10); + must_prewrite_put(&mut engine, k, v, k, 10); // KeyIsLocked - match must_err_return_value(&engine, k, k, 20, 20, false) { + match must_err_return_value(&mut engine, k, k, 20, 20, false) { MvccError(box ErrorInner::KeyIsLocked(_)) => (), e => panic!("unexpected error: {}", e), }; - must_commit(&engine, k, 10, 20); + must_commit(&mut engine, k, 10, 20); // WriteConflict - match must_err_return_value(&engine, k, k, 15, 15, false) { + match must_err_return_value(&mut engine, k, k, 15, 15, false) { MvccError(box ErrorInner::WriteConflict { .. }) => (), e => panic!("unexpected error: {}", e), }; assert_eq!( - must_succeed_return_value(&engine, k, k, 25, 25, false), + must_succeed_return_value(&mut engine, k, k, 25, 25, false), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 25, 25); - pessimistic_rollback::tests::must_success(&engine, k, 25, 25); + must_pessimistic_locked(&mut engine, k, 25, 25); + pessimistic_rollback::tests::must_success(&mut engine, k, 25, 25); // Skip Write::Lock - must_prewrite_lock(&engine, k, k, 30); - must_commit(&engine, k, 30, 40); + must_prewrite_lock(&mut engine, k, k, 30); + must_commit(&mut engine, k, 30, 40); assert_eq!( - must_succeed_return_value(&engine, k, k, 45, 45, false), + must_succeed_return_value(&mut engine, k, k, 45, 45, false), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 45, 45); - pessimistic_rollback::tests::must_success(&engine, k, 45, 45); + must_pessimistic_locked(&mut engine, k, 45, 45); + pessimistic_rollback::tests::must_success(&mut engine, k, 45, 45); // Skip Write::Rollback - must_rollback(&engine, k, 50, false); + must_rollback(&mut engine, k, 50, false); assert_eq!( - must_succeed_return_value(&engine, k, k, 55, 55, false), + must_succeed_return_value(&mut engine, k, k, 55, 55, false), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 55, 55); - pessimistic_rollback::tests::must_success(&engine, k, 55, 55); + must_pessimistic_locked(&mut engine, k, 55, 55); + pessimistic_rollback::tests::must_success(&mut engine, k, 55, 55); // Delete - must_prewrite_delete(&engine, k, k, 60); - must_commit(&engine, k, 60, 70); + must_prewrite_delete(&mut engine, k, k, 60); + must_commit(&mut engine, k, 60, 70); assert_eq!( - must_succeed_return_value(&engine, k, k, 75, 75, false), + must_succeed_return_value(&mut engine, k, k, 75, 75, false), None ); // Duplicated command assert_eq!( - must_succeed_return_value(&engine, k, k, 75, 75, false), + must_succeed_return_value(&mut engine, k, k, 75, 75, false), None ); assert_eq!( - must_succeed_return_value(&engine, k, k, 75, 55, false), + must_succeed_return_value(&mut engine, k, k, 75, 55, false), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 75, 75); - pessimistic_rollback::tests::must_success(&engine, k, 75, 75); + must_pessimistic_locked(&mut engine, k, 75, 75); + pessimistic_rollback::tests::must_success(&mut engine, k, 75, 75); } #[test] fn test_pessimistic_lock_only_if_exists() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k", b"v"); // The key doesn't exist, no pessimistic lock is generated - assert_eq!(must_succeed_return_value(&engine, k, k, 10, 10, true), None); - must_unlocked(&engine, k); + assert_eq!( + must_succeed_return_value(&mut engine, k, k, 10, 10, true), + None + ); + must_unlocked(&mut engine, k); match must_err_impl( - &engine, + &mut engine, k, k, 10, @@ -862,133 +865,139 @@ pub mod tests { }; // Put the value, writecf: k_20_put_v - must_prewrite_put(&engine, k, v, k, 10); - must_commit(&engine, k, 10, 20); + must_prewrite_put(&mut engine, k, v, k, 10); + must_commit(&mut engine, k, 10, 20); // Pessimistic lock generated assert_eq!( - must_succeed_return_value(&engine, k, k, 25, 25, true), + must_succeed_return_value(&mut engine, k, k, 25, 25, true), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 25, 25); - pessimistic_rollback::tests::must_success(&engine, k, 25, 25); + must_pessimistic_locked(&mut engine, k, 25, 25); + pessimistic_rollback::tests::must_success(&mut engine, k, 25, 25); // Skip Write::Lock, WriteRecord: k_20_put_v k_40_lock - must_prewrite_lock(&engine, k, k, 30); - must_commit(&engine, k, 30, 40); + must_prewrite_lock(&mut engine, k, k, 30); + must_commit(&mut engine, k, 30, 40); assert_eq!( - must_succeed_return_value(&engine, k, k, 45, 45, true), + must_succeed_return_value(&mut engine, k, k, 45, 45, true), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 45, 45); - pessimistic_rollback::tests::must_success(&engine, k, 45, 45); + must_pessimistic_locked(&mut engine, k, 45, 45); + pessimistic_rollback::tests::must_success(&mut engine, k, 45, 45); // Skip Write::Rollback WriteRecord: k_20_put_v k_40_lock k_50_R - must_rollback(&engine, k, 50, false); + must_rollback(&mut engine, k, 50, false); assert_eq!( - must_succeed_return_value(&engine, k, k, 55, 55, true), + must_succeed_return_value(&mut engine, k, k, 55, 55, true), Some(v.to_vec()) ); - must_pessimistic_locked(&engine, k, 55, 55); - pessimistic_rollback::tests::must_success(&engine, k, 55, 55); + must_pessimistic_locked(&mut engine, k, 55, 55); + pessimistic_rollback::tests::must_success(&mut engine, k, 55, 55); // Delete WriteRecord: k_20_put_v k_40_lock k_50_R k_70_delete - must_prewrite_delete(&engine, k, k, 60); - must_commit(&engine, k, 60, 70); - assert_eq!(must_succeed_return_value(&engine, k, k, 75, 75, true), None); - must_unlocked(&engine, k); + must_prewrite_delete(&mut engine, k, k, 60); + must_commit(&mut engine, k, 60, 70); + assert_eq!( + must_succeed_return_value(&mut engine, k, k, 75, 75, true), + None + ); + must_unlocked(&mut engine, k); // Duplicated command assert_eq!( - must_succeed_return_value(&engine, k, k, 75, 75, false), + must_succeed_return_value(&mut engine, k, k, 75, 75, false), None ); - must_pessimistic_locked(&engine, k, 75, 75); - assert_eq!(must_succeed_return_value(&engine, k, k, 75, 85, true), None); - must_pessimistic_locked(&engine, k, 75, 85); - pessimistic_rollback::tests::must_success(&engine, k, 75, 85); - must_unlocked(&engine, k); + must_pessimistic_locked(&mut engine, k, 75, 75); + assert_eq!( + must_succeed_return_value(&mut engine, k, k, 75, 85, true), + None + ); + must_pessimistic_locked(&mut engine, k, 75, 85); + pessimistic_rollback::tests::must_success(&mut engine, k, 75, 85); + must_unlocked(&mut engine, k); } #[test] fn test_overwrite_pessimistic_lock() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; - must_succeed(&engine, k, k, 1, 2); - must_pessimistic_locked(&engine, k, 1, 2); - must_succeed(&engine, k, k, 1, 1); - must_pessimistic_locked(&engine, k, 1, 2); - must_succeed(&engine, k, k, 1, 3); - must_pessimistic_locked(&engine, k, 1, 3); + must_succeed(&mut engine, k, k, 1, 2); + must_pessimistic_locked(&mut engine, k, 1, 2); + must_succeed(&mut engine, k, k, 1, 1); + must_pessimistic_locked(&mut engine, k, 1, 2); + must_succeed(&mut engine, k, k, 1, 3); + must_pessimistic_locked(&mut engine, k, 1, 3); } #[test] fn test_pessimistic_lock_check_gc_fence() { use pessimistic_rollback::tests::must_success as must_pessimistic_rollback; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // PUT, Read // `------^ - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 10); - must_commit(&engine, b"k1", 10, 30); - must_cleanup_with_gc_fence(&engine, b"k1", 30, 0, 40, true); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine, b"k1", 10, 30); + must_cleanup_with_gc_fence(&mut engine, b"k1", 30, 0, 40, true); // PUT, Read // * (GC fence ts = 0) - must_prewrite_put(&engine, b"k2", b"v2", b"k2", 11); - must_commit(&engine, b"k2", 11, 30); - must_cleanup_with_gc_fence(&engine, b"k2", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 11); + must_commit(&mut engine, b"k2", 11, 30); + must_cleanup_with_gc_fence(&mut engine, b"k2", 30, 0, 0, true); // PUT, LOCK, LOCK, Read // `---------^ - must_prewrite_put(&engine, b"k3", b"v3", b"k3", 12); - must_commit(&engine, b"k3", 12, 30); - must_prewrite_lock(&engine, b"k3", b"k3", 37); - must_commit(&engine, b"k3", 37, 38); - must_cleanup_with_gc_fence(&engine, b"k3", 30, 0, 40, true); - must_prewrite_lock(&engine, b"k3", b"k3", 42); - must_commit(&engine, b"k3", 42, 43); + must_prewrite_put(&mut engine, b"k3", b"v3", b"k3", 12); + must_commit(&mut engine, b"k3", 12, 30); + must_prewrite_lock(&mut engine, b"k3", b"k3", 37); + must_commit(&mut engine, b"k3", 37, 38); + must_cleanup_with_gc_fence(&mut engine, b"k3", 30, 0, 40, true); + must_prewrite_lock(&mut engine, b"k3", b"k3", 42); + must_commit(&mut engine, b"k3", 42, 43); // PUT, LOCK, LOCK, Read // * - must_prewrite_put(&engine, b"k4", b"v4", b"k4", 13); - must_commit(&engine, b"k4", 13, 30); - must_prewrite_lock(&engine, b"k4", b"k4", 37); - must_commit(&engine, b"k4", 37, 38); - must_prewrite_lock(&engine, b"k4", b"k4", 42); - must_commit(&engine, b"k4", 42, 43); - must_cleanup_with_gc_fence(&engine, b"k4", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k4", b"v4", b"k4", 13); + must_commit(&mut engine, b"k4", 13, 30); + must_prewrite_lock(&mut engine, b"k4", b"k4", 37); + must_commit(&mut engine, b"k4", 37, 38); + must_prewrite_lock(&mut engine, b"k4", b"k4", 42); + must_commit(&mut engine, b"k4", 42, 43); + must_cleanup_with_gc_fence(&mut engine, b"k4", 30, 0, 0, true); // PUT, PUT, READ // `-----^ `------^ - must_prewrite_put(&engine, b"k5", b"v5", b"k5", 14); - must_commit(&engine, b"k5", 14, 20); - must_prewrite_put(&engine, b"k5", b"v5x", b"k5", 21); - must_commit(&engine, b"k5", 21, 30); - must_cleanup_with_gc_fence(&engine, b"k5", 20, 0, 30, false); - must_cleanup_with_gc_fence(&engine, b"k5", 30, 0, 40, true); + must_prewrite_put(&mut engine, b"k5", b"v5", b"k5", 14); + must_commit(&mut engine, b"k5", 14, 20); + must_prewrite_put(&mut engine, b"k5", b"v5x", b"k5", 21); + must_commit(&mut engine, b"k5", 21, 30); + must_cleanup_with_gc_fence(&mut engine, b"k5", 20, 0, 30, false); + must_cleanup_with_gc_fence(&mut engine, b"k5", 30, 0, 40, true); // PUT, PUT, READ // `-----^ * - must_prewrite_put(&engine, b"k6", b"v6", b"k6", 15); - must_commit(&engine, b"k6", 15, 20); - must_prewrite_put(&engine, b"k6", b"v6x", b"k6", 22); - must_commit(&engine, b"k6", 22, 30); - must_cleanup_with_gc_fence(&engine, b"k6", 20, 0, 30, false); - must_cleanup_with_gc_fence(&engine, b"k6", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k6", b"v6", b"k6", 15); + must_commit(&mut engine, b"k6", 15, 20); + must_prewrite_put(&mut engine, b"k6", b"v6x", b"k6", 22); + must_commit(&mut engine, b"k6", 22, 30); + must_cleanup_with_gc_fence(&mut engine, b"k6", 20, 0, 30, false); + must_cleanup_with_gc_fence(&mut engine, b"k6", 30, 0, 0, true); // PUT, LOCK, READ // `----------^ // Note that this case is special because usually the `LOCK` is the first write // already got during prewrite/acquire_pessimistic_lock and will continue // searching an older version from the `LOCK` record. - must_prewrite_put(&engine, b"k7", b"v7", b"k7", 16); - must_commit(&engine, b"k7", 16, 30); - must_prewrite_lock(&engine, b"k7", b"k7", 37); - must_commit(&engine, b"k7", 37, 38); - must_cleanup_with_gc_fence(&engine, b"k7", 30, 0, 40, true); + must_prewrite_put(&mut engine, b"k7", b"v7", b"k7", 16); + must_commit(&mut engine, b"k7", 16, 30); + must_prewrite_lock(&mut engine, b"k7", b"k7", 37); + must_commit(&mut engine, b"k7", 37, 38); + must_cleanup_with_gc_fence(&mut engine, b"k7", 30, 0, 40, true); let cases = vec![ (b"k1" as &[u8], None), @@ -1004,34 +1013,68 @@ pub mod tests { // Test constraint check with `should_not_exist`. if expected_value.is_none() { assert!( - must_succeed_impl(&engine, key, key, 50, true, 0, 50, false, false, 51, false) - .is_none() + must_succeed_impl( + &mut engine, + key, + key, + 50, + true, + 0, + 50, + false, + false, + 51, + false + ) + .is_none() ); - must_pessimistic_rollback(&engine, key, 50, 51); + must_pessimistic_rollback(&mut engine, key, 50, 51); } else { - must_err_impl(&engine, key, key, 50, true, 50, false, false, 51, false); + must_err_impl(&mut engine, key, key, 50, true, 50, false, false, 51, false); } - must_unlocked(&engine, key); + must_unlocked(&mut engine, key); // Test getting value. - let res = - must_succeed_impl(&engine, key, key, 50, false, 0, 50, true, false, 51, false); + let res = must_succeed_impl( + &mut engine, + key, + key, + 50, + false, + 0, + 50, + true, + false, + 51, + false, + ); assert_eq!(res, expected_value.map(|v| v.to_vec())); - must_pessimistic_rollback(&engine, key, 50, 51); + must_pessimistic_rollback(&mut engine, key, 50, 51); // Test getting value when already locked. - must_succeed(&engine, key, key, 50, 51); - let res2 = - must_succeed_impl(&engine, key, key, 50, false, 0, 50, true, false, 51, false); + must_succeed(&mut engine, key, key, 50, 51); + let res2 = must_succeed_impl( + &mut engine, + key, + key, + 50, + false, + 0, + 50, + true, + false, + 51, + false, + ); assert_eq!(res2, expected_value.map(|v| v.to_vec())); - must_pessimistic_rollback(&engine, key, 50, 51); + must_pessimistic_rollback(&mut engine, key, 50, 51); } } #[test] fn test_old_value_put_delete_lock_insert() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - let start_ts = old_value_put_delete_lock_insert(&engine, b"k1"); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let start_ts = old_value_put_delete_lock_insert(&mut engine, b"k1"); let key = Key::from_raw(b"k1"); for should_not_exist in &[true, false] { for need_value in &[true, false] { @@ -1067,21 +1110,21 @@ pub mod tests { #[test] fn test_old_value_for_update_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; let v1 = b"v1"; // Put v1 @ start ts 1, commit ts 2 - must_succeed(&engine, k, k, 1, 1); - must_pessimistic_prewrite_put(&engine, k, v1, k, 1, 1, DoPessimisticCheck); - must_commit(&engine, k, 1, 2); + must_succeed(&mut engine, k, k, 1, 1); + must_pessimistic_prewrite_put(&mut engine, k, v1, k, 1, 1, DoPessimisticCheck); + must_commit(&mut engine, k, 1, 2); let v2 = b"v2"; // Put v2 @ start ts 10, commit ts 11 - must_succeed(&engine, k, k, 10, 10); - must_pessimistic_prewrite_put(&engine, k, v2, k, 10, 10, DoPessimisticCheck); - must_commit(&engine, k, 10, 11); + must_succeed(&mut engine, k, k, 10, 10); + must_pessimistic_prewrite_put(&mut engine, k, v2, k, 10, 10, DoPessimisticCheck); + must_commit(&mut engine, k, 10, 11); // Lock @ start ts 9, for update ts 12, commit ts 13 let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1204,14 +1247,14 @@ pub mod tests { #[test] fn test_acquire_pessimistic_lock_should_not_exist() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (key, value) = (b"k", b"val"); // T1: start_ts = 3, commit_ts = 5, put key:value - must_succeed(&engine, key, key, 3, 3); - must_pessimistic_prewrite_put(&engine, key, value, key, 3, 3, DoPessimisticCheck); - must_commit(&engine, key, 3, 5); + must_succeed(&mut engine, key, key, 3, 3); + must_pessimistic_prewrite_put(&mut engine, key, value, key, 3, 3, DoPessimisticCheck); + must_commit(&mut engine, key, 3, 5); // T2: start_ts = 15, acquire pessimistic lock on k, with should_not_exist flag // set. @@ -1245,9 +1288,9 @@ pub mod tests { // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on // k - must_succeed(&engine, key, key, 8, 8); - must_pessimistic_prewrite_delete(&engine, key, key, 8, 8, DoPessimisticCheck); - must_commit(&engine, key, 8, cm.max_ts().into_inner() + 1); + must_succeed(&mut engine, key, key, 8, 8); + must_pessimistic_prewrite_delete(&mut engine, key, key, 8, 8, DoPessimisticCheck); + must_commit(&mut engine, key, 8, cm.max_ts().into_inner() + 1); // T1: start_ts = 10, repeatedly acquire pessimistic lock on k, with // should_not_exist flag set @@ -1279,35 +1322,35 @@ pub mod tests { #[test] fn test_check_existence() { use pessimistic_rollback::tests::must_success as must_pessimistic_rollback; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // k1: Not exists // k2: Exists - must_prewrite_put(&engine, b"k2", b"v2", b"k2", 5); - must_commit(&engine, b"k2", 5, 20); + must_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 5); + must_commit(&mut engine, b"k2", 5, 20); // k3: Delete - must_prewrite_put(&engine, b"k3", b"v3", b"k3", 5); - must_commit(&engine, b"k3", 5, 6); - must_prewrite_delete(&engine, b"k3", b"k3", 7); - must_commit(&engine, b"k3", 7, 20); + must_prewrite_put(&mut engine, b"k3", b"v3", b"k3", 5); + must_commit(&mut engine, b"k3", 5, 6); + must_prewrite_delete(&mut engine, b"k3", b"k3", 7); + must_commit(&mut engine, b"k3", 7, 20); // k4: Exist + Lock + Rollback - must_prewrite_put(&engine, b"k4", b"v4", b"k4", 5); - must_commit(&engine, b"k4", 5, 15); - must_prewrite_lock(&engine, b"k4", b"k4", 16); - must_commit(&engine, b"k4", 16, 17); - must_rollback(&engine, b"k4", 20, true); + must_prewrite_put(&mut engine, b"k4", b"v4", b"k4", 5); + must_commit(&mut engine, b"k4", 5, 15); + must_prewrite_lock(&mut engine, b"k4", b"k4", 16); + must_commit(&mut engine, b"k4", 16, 17); + must_rollback(&mut engine, b"k4", 20, true); // k5: GC fence invalid - must_prewrite_put(&engine, b"k5", b"v5", b"k5", 5); - must_commit(&engine, b"k5", 5, 6); + must_prewrite_put(&mut engine, b"k5", b"v5", b"k5", 5); + must_commit(&mut engine, b"k5", 5, 6); // A invalid gc fence is assumed never pointing to a ts greater than GC // safepoint, and a read operation's ts is assumed never less than the // GC safepoint. Therefore since we will read at ts=10 later, we can't // put a version greater than 10 in this case. - must_cleanup_with_gc_fence(&engine, b"k5", 6, 0, 8, true); + must_cleanup_with_gc_fence(&mut engine, b"k5", 6, 0, 8, true); for &need_value in &[false, true] { for &need_check_existence in &[false, true] { @@ -1319,7 +1362,7 @@ pub mod tests { ); if repeated_request { for &k in &[b"k1", b"k2", b"k3", b"k4", b"k5"] { - must_succeed(&engine, k, k, start_ts, 30); + must_succeed(&mut engine, k, k, start_ts, 30); } } @@ -1334,7 +1377,7 @@ pub mod tests { }; let value1 = must_succeed_impl( - &engine, + &mut engine, b"k1", b"k1", start_ts, @@ -1347,10 +1390,10 @@ pub mod tests { false, ); assert_eq!(value1, None); - must_pessimistic_rollback(&engine, b"k1", start_ts, 30); + must_pessimistic_rollback(&mut engine, b"k1", start_ts, 30); let value2 = must_succeed_impl( - &engine, + &mut engine, b"k2", b"k2", start_ts, @@ -1363,10 +1406,10 @@ pub mod tests { false, ); assert_eq!(value2, expected_value(Some(b"v2"))); - must_pessimistic_rollback(&engine, b"k2", start_ts, 30); + must_pessimistic_rollback(&mut engine, b"k2", start_ts, 30); let value3 = must_succeed_impl( - &engine, + &mut engine, b"k3", b"k3", start_ts, @@ -1379,10 +1422,10 @@ pub mod tests { false, ); assert_eq!(value3, None); - must_pessimistic_rollback(&engine, b"k3", start_ts, 30); + must_pessimistic_rollback(&mut engine, b"k3", start_ts, 30); let value4 = must_succeed_impl( - &engine, + &mut engine, b"k4", b"k4", start_ts, @@ -1395,10 +1438,10 @@ pub mod tests { false, ); assert_eq!(value4, expected_value(Some(b"v4"))); - must_pessimistic_rollback(&engine, b"k4", start_ts, 30); + must_pessimistic_rollback(&mut engine, b"k4", start_ts, 30); let value5 = must_succeed_impl( - &engine, + &mut engine, b"k5", b"k5", start_ts, @@ -1411,7 +1454,7 @@ pub mod tests { false, ); assert_eq!(value5, None); - must_pessimistic_rollback(&engine, b"k5", start_ts, 30); + must_pessimistic_rollback(&mut engine, b"k5", start_ts, 30); } } } diff --git a/src/storage/txn/actions/check_data_constraint.rs b/src/storage/txn/actions/check_data_constraint.rs index 35999ee6cb2..d90a95a24ab 100644 --- a/src/storage/txn/actions/check_data_constraint.rs +++ b/src/storage/txn/actions/check_data_constraint.rs @@ -50,7 +50,7 @@ mod tests { #[test] fn test_check_data_constraint() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); let mut txn = MvccTxn::new(TimeStamp::new(2), cm); txn.put_write( diff --git a/src/storage/txn/actions/cleanup.rs b/src/storage/txn/actions/cleanup.rs index c72905c8910..5ed77d4fab3 100644 --- a/src/storage/txn/actions/cleanup.rs +++ b/src/storage/txn/actions/cleanup.rs @@ -105,7 +105,7 @@ pub mod tests { }; pub fn must_succeed( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, current_ts: impl Into, @@ -122,7 +122,7 @@ pub mod tests { } pub fn must_err( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, current_ts: impl Into, @@ -137,7 +137,7 @@ pub mod tests { } pub fn must_cleanup_with_gc_fence( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, current_ts: impl Into, @@ -183,11 +183,11 @@ pub mod tests { #[test] fn test_must_cleanup_with_gc_fence() { // Tests the test util - let engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, b"k", b"v", b"k", 10); - must_commit(&engine, b"k", 10, 20); - must_cleanup_with_gc_fence(&engine, b"k", 20, 0, 30, true); - let w = must_written(&engine, b"k", 10, 20, WriteType::Put); + let mut engine = TestEngineBuilder::new().build().unwrap(); + must_prewrite_put(&mut engine, b"k", b"v", b"k", 10); + must_commit(&mut engine, b"k", 10, 20); + must_cleanup_with_gc_fence(&mut engine, b"k", 20, 0, 30, true); + let w = must_written(&mut engine, b"k", 10, 20, WriteType::Put); assert!(w.has_overlapped_rollback); assert_eq!(w.gc_fence.unwrap(), 30.into()); } @@ -196,45 +196,53 @@ pub mod tests { fn test_cleanup() { // Cleanup's logic is mostly similar to rollback, except the TTL check. Tests // that not related to TTL check should be covered by other test cases. - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Shorthand for composing ts. let ts = TimeStamp::compose; let (k, v) = (b"k", b"v"); - must_prewrite_put(&engine, k, v, k, ts(10, 0)); - must_locked(&engine, k, ts(10, 0)); - txn_heart_beat::tests::must_success(&engine, k, ts(10, 0), 100, 100); + must_prewrite_put(&mut engine, k, v, k, ts(10, 0)); + must_locked(&mut engine, k, ts(10, 0)); + txn_heart_beat::tests::must_success(&mut engine, k, ts(10, 0), 100, 100); // Check the last txn_heart_beat has set the lock's TTL to 100. - txn_heart_beat::tests::must_success(&engine, k, ts(10, 0), 90, 100); + txn_heart_beat::tests::must_success(&mut engine, k, ts(10, 0), 90, 100); // TTL not expired. Do nothing but returns an error. - must_err(&engine, k, ts(10, 0), ts(20, 0)); - must_locked(&engine, k, ts(10, 0)); + must_err(&mut engine, k, ts(10, 0), ts(20, 0)); + must_locked(&mut engine, k, ts(10, 0)); // Try to cleanup another transaction's lock. Does nothing. - must_succeed(&engine, k, ts(10, 1), ts(120, 0)); + must_succeed(&mut engine, k, ts(10, 1), ts(120, 0)); // If there is no existing lock when cleanup, it may be a pessimistic // transaction, so the rollback should be protected. - must_get_rollback_protected(&engine, k, ts(10, 1), true); - must_locked(&engine, k, ts(10, 0)); + must_get_rollback_protected(&mut engine, k, ts(10, 1), true); + must_locked(&mut engine, k, ts(10, 0)); // TTL expired. The lock should be removed. - must_succeed(&engine, k, ts(10, 0), ts(120, 0)); - must_unlocked(&engine, k); + must_succeed(&mut engine, k, ts(10, 0), ts(120, 0)); + must_unlocked(&mut engine, k); // Rollbacks of optimistic transactions needn't be protected - must_get_rollback_protected(&engine, k, ts(10, 0), false); - must_get_rollback_ts(&engine, k, ts(10, 0)); + must_get_rollback_protected(&mut engine, k, ts(10, 0), false); + must_get_rollback_ts(&mut engine, k, ts(10, 0)); // Rollbacks of primary keys in pessimistic transactions should be protected - must_acquire_pessimistic_lock(&engine, k, k, ts(11, 1), ts(12, 1)); - must_succeed(&engine, k, ts(11, 1), ts(120, 0)); - must_get_rollback_protected(&engine, k, ts(11, 1), true); - - must_acquire_pessimistic_lock(&engine, k, k, ts(13, 1), ts(14, 1)); - must_pessimistic_prewrite_put(&engine, k, v, k, ts(13, 1), ts(14, 1), DoPessimisticCheck); - must_succeed(&engine, k, ts(13, 1), ts(120, 0)); - must_get_rollback_protected(&engine, k, ts(13, 1), true); + must_acquire_pessimistic_lock(&mut engine, k, k, ts(11, 1), ts(12, 1)); + must_succeed(&mut engine, k, ts(11, 1), ts(120, 0)); + must_get_rollback_protected(&mut engine, k, ts(11, 1), true); + + must_acquire_pessimistic_lock(&mut engine, k, k, ts(13, 1), ts(14, 1)); + must_pessimistic_prewrite_put( + &mut engine, + k, + v, + k, + ts(13, 1), + ts(14, 1), + DoPessimisticCheck, + ); + must_succeed(&mut engine, k, ts(13, 1), ts(120, 0)); + must_get_rollback_protected(&mut engine, k, ts(13, 1), true); } } diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index e0a4257de26..6fd925b536e 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -128,7 +128,7 @@ pub mod tests { }; pub fn must_succeed( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -137,7 +137,7 @@ pub mod tests { } pub fn must_succeed_on_region( - engine: &E, + engine: &mut E, region_id: u64, key: &[u8], start_ts: impl Into, @@ -147,7 +147,7 @@ pub mod tests { } fn must_succeed_impl( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -171,7 +171,7 @@ pub mod tests { } pub fn must_err( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -186,23 +186,23 @@ pub mod tests { #[cfg(test)] fn test_commit_ok_imp(k1: &[u8], v1: &[u8], k2: &[u8], k3: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine, k1, v1, k1, 10); - must_prewrite_lock(&engine, k2, k1, 10); - must_prewrite_delete(&engine, k3, k1, 10); - must_locked(&engine, k1, 10); - must_locked(&engine, k2, 10); - must_locked(&engine, k3, 10); - must_succeed(&engine, k1, 10, 15); - must_succeed(&engine, k2, 10, 15); - must_succeed(&engine, k3, 10, 15); - must_written(&engine, k1, 10, 15, WriteType::Put); - must_written(&engine, k2, 10, 15, WriteType::Lock); - must_written(&engine, k3, 10, 15, WriteType::Delete); + let mut engine = TestEngineBuilder::new().build().unwrap(); + must_prewrite_put(&mut engine, k1, v1, k1, 10); + must_prewrite_lock(&mut engine, k2, k1, 10); + must_prewrite_delete(&mut engine, k3, k1, 10); + must_locked(&mut engine, k1, 10); + must_locked(&mut engine, k2, 10); + must_locked(&mut engine, k3, 10); + must_succeed(&mut engine, k1, 10, 15); + must_succeed(&mut engine, k2, 10, 15); + must_succeed(&mut engine, k3, 10, 15); + must_written(&mut engine, k1, 10, 15, WriteType::Put); + must_written(&mut engine, k2, 10, 15, WriteType::Lock); + must_written(&mut engine, k3, 10, 15, WriteType::Delete); // commit should be idempotent - must_succeed(&engine, k1, 10, 15); - must_succeed(&engine, k2, 10, 15); - must_succeed(&engine, k3, 10, 15); + must_succeed(&mut engine, k1, 10, 15); + must_succeed(&mut engine, k2, 10, 15); + must_succeed(&mut engine, k3, 10, 15); } #[test] @@ -215,16 +215,16 @@ pub mod tests { #[cfg(test)] fn test_commit_err_imp(k: &[u8], v: &[u8]) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // Not prewrite yet - must_err(&engine, k, 1, 2); - must_prewrite_put(&engine, k, v, k, 5); + must_err(&mut engine, k, 1, 2); + must_prewrite_put(&mut engine, k, v, k, 5); // start_ts not match - must_err(&engine, k, 4, 5); - must_rollback(&engine, k, 5, false); + must_err(&mut engine, k, 4, 5); + must_rollback(&mut engine, k, 5, false); // commit after rollback - must_err(&engine, k, 5, 6); + must_err(&mut engine, k, 5, 6); } #[test] @@ -237,7 +237,7 @@ pub mod tests { #[test] fn test_min_commit_ts() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k", b"v"); @@ -253,9 +253,9 @@ pub mod tests { } }; - must_prewrite_put_for_large_txn(&engine, k, v, k, ts(10, 0), 100, 0); + must_prewrite_put_for_large_txn(&mut engine, k, v, k, ts(10, 0), 100, 0); check_txn_status::tests::must_success( - &engine, + &mut engine, k, ts(10, 0), ts(20, 0), @@ -266,13 +266,13 @@ pub mod tests { uncommitted(100, ts(20, 1)), ); // The min_commit_ts should be ts(20, 1) - must_err(&engine, k, ts(10, 0), ts(15, 0)); - must_err(&engine, k, ts(10, 0), ts(20, 0)); - must_succeed(&engine, k, ts(10, 0), ts(20, 1)); + must_err(&mut engine, k, ts(10, 0), ts(15, 0)); + must_err(&mut engine, k, ts(10, 0), ts(20, 0)); + must_succeed(&mut engine, k, ts(10, 0), ts(20, 1)); - must_prewrite_put_for_large_txn(&engine, k, v, k, ts(30, 0), 100, 0); + must_prewrite_put_for_large_txn(&mut engine, k, v, k, ts(30, 0), 100, 0); check_txn_status::tests::must_success( - &engine, + &mut engine, k, ts(30, 0), ts(40, 0), @@ -282,13 +282,13 @@ pub mod tests { false, uncommitted(100, ts(40, 1)), ); - must_succeed(&engine, k, ts(30, 0), ts(50, 0)); + must_succeed(&mut engine, k, ts(30, 0), ts(50, 0)); // If the min_commit_ts of the pessimistic lock is greater than prewrite's, use // it. - must_acquire_pessimistic_lock_for_large_txn(&engine, k, k, ts(60, 0), ts(60, 0), 100); + must_acquire_pessimistic_lock_for_large_txn(&mut engine, k, k, ts(60, 0), ts(60, 0), 100); check_txn_status::tests::must_success( - &engine, + &mut engine, k, ts(60, 0), ts(70, 0), @@ -299,7 +299,7 @@ pub mod tests { uncommitted(100, ts(70, 1)), ); must_prewrite_put_impl( - &engine, + &mut engine, k, v, k, @@ -316,8 +316,8 @@ pub mod tests { kvproto::kvrpcpb::AssertionLevel::Off, ); // The min_commit_ts is ts(70, 0) other than ts(60, 1) in prewrite request. - must_large_txn_locked(&engine, k, ts(60, 0), 100, ts(70, 1), false); - must_err(&engine, k, ts(60, 0), ts(65, 0)); - must_succeed(&engine, k, ts(60, 0), ts(80, 0)); + must_large_txn_locked(&mut engine, k, ts(60, 0), 100, ts(70, 1), false); + must_err(&mut engine, k, ts(60, 0), ts(65, 0)); + must_succeed(&mut engine, k, ts(60, 0), ts(80, 0)); } } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 0b9f0461297..5fcf0327c37 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -164,7 +164,7 @@ pub mod tests { }; fn must_flashback_write( - engine: &E, + engine: &mut E, key: &[u8], version: impl Into, start_ts: impl Into, @@ -209,91 +209,91 @@ pub mod tests { #[test] fn test_flashback_to_version() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); let k = b"k"; // Prewrite and commit Put(k -> v1) with stat_ts = 1, commit_ts = 2. let v1 = b"v1"; - must_prewrite_put(&engine, k, v1, k, *ts.incr()); - must_commit(&engine, k, ts, *ts.incr()); - must_get(&engine, k, *ts.incr(), v1); + must_prewrite_put(&mut engine, k, v1, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, *ts.incr(), v1); // Prewrite and rollback Put(k -> v2) with stat_ts = 4. let v2 = b"v2"; - must_prewrite_put(&engine, k, v2, k, *ts.incr()); - must_rollback(&engine, k, ts, false); - must_get(&engine, k, *ts.incr(), v1); + must_prewrite_put(&mut engine, k, v2, k, *ts.incr()); + must_rollback(&mut engine, k, ts, false); + must_get(&mut engine, k, *ts.incr(), v1); // Prewrite and rollback Delete(k) with stat_ts = 6. - must_prewrite_delete(&engine, k, k, *ts.incr()); - must_rollback(&engine, k, ts, false); - must_get(&engine, k, *ts.incr(), v1); + must_prewrite_delete(&mut engine, k, k, *ts.incr()); + must_rollback(&mut engine, k, ts, false); + must_get(&mut engine, k, *ts.incr(), v1); // Prewrite and commit Delete(k) with stat_ts = 8, commit_ts = 9. - must_prewrite_delete(&engine, k, k, *ts.incr()); - must_commit(&engine, k, ts, *ts.incr()); - must_get_none(&engine, k, *ts.incr()); + must_prewrite_delete(&mut engine, k, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get_none(&mut engine, k, *ts.incr()); // Prewrite and commit Put(k -> v2) with stat_ts = 11, commit_ts = 12. - must_prewrite_put(&engine, k, v2, k, *ts.incr()); - must_commit(&engine, k, ts, *ts.incr()); - must_get(&engine, k, *ts.incr(), v2); + must_prewrite_put(&mut engine, k, v2, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, *ts.incr(), v2); // Flashback to version 1 with start_ts = 14, commit_ts = 15. assert_eq!( - must_flashback_write(&engine, k, 1, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 1, *ts.incr(), *ts.incr()), 1 ); - must_get_none(&engine, k, *ts.incr()); + must_get_none(&mut engine, k, *ts.incr()); // Flashback to version 2 with start_ts = 17, commit_ts = 18. assert_eq!( - must_flashback_write(&engine, k, 2, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 2, *ts.incr(), *ts.incr()), 1 ); - must_get(&engine, k, *ts.incr(), v1); + must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 5 with start_ts = 20, commit_ts = 21. assert_eq!( - must_flashback_write(&engine, k, 5, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 5, *ts.incr(), *ts.incr()), 1 ); - must_get(&engine, k, *ts.incr(), v1); + must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 7 with start_ts = 23, commit_ts = 24. assert_eq!( - must_flashback_write(&engine, k, 7, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 7, *ts.incr(), *ts.incr()), 1 ); - must_get(&engine, k, *ts.incr(), v1); + must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 10 with start_ts = 26, commit_ts = 27. assert_eq!( - must_flashback_write(&engine, k, 10, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 10, *ts.incr(), *ts.incr()), 1 ); - must_get_none(&engine, k, *ts.incr()); + must_get_none(&mut engine, k, *ts.incr()); // Flashback to version 13 with start_ts = 29, commit_ts = 30. assert_eq!( - must_flashback_write(&engine, k, 13, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 13, *ts.incr(), *ts.incr()), 1 ); - must_get(&engine, k, *ts.incr(), v2); + must_get(&mut engine, k, *ts.incr(), v2); // Flashback to version 27 with start_ts = 32, commit_ts = 33. assert_eq!( - must_flashback_write(&engine, k, 27, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, 27, *ts.incr(), *ts.incr()), 1 ); - must_get_none(&engine, k, *ts.incr()); + must_get_none(&mut engine, k, *ts.incr()); } #[test] fn test_flashback_to_version_deleted() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); let (k, v) = (b"k", b"v"); - must_prewrite_put(&engine, k, v, k, *ts.incr()); - must_commit(&engine, k, ts, *ts.incr()); - must_get(&engine, k, ts, v); - must_prewrite_delete(&engine, k, k, *ts.incr()); - must_commit(&engine, k, ts, *ts.incr()); + must_prewrite_put(&mut engine, k, v, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, ts, v); + must_prewrite_delete(&mut engine, k, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); // Since the key has been deleted, flashback to version 1 should not do // anything. assert_eq!( - must_flashback_write(&engine, k, ts, *ts.incr(), *ts.incr()), + must_flashback_write(&mut engine, k, ts, *ts.incr(), *ts.incr()), 0 ); - must_get_none(&engine, k, ts); + must_get_none(&mut engine, k, ts); } } diff --git a/src/storage/txn/actions/gc.rs b/src/storage/txn/actions/gc.rs index 29264c7df90..8c24baf7d5b 100644 --- a/src/storage/txn/actions/gc.rs +++ b/src/storage/txn/actions/gc.rs @@ -137,7 +137,7 @@ pub mod tests { RocksEngine, TestEngineBuilder, }; - pub fn must_succeed(engine: &E, key: &[u8], safe_point: impl Into) { + pub fn must_succeed(engine: &mut E, key: &[u8], safe_point: impl Into) { let ctx = SnapContext::default(); let snapshot = engine.snapshot(ctx).unwrap(); let cm = ConcurrencyManager::new(1.into()); @@ -150,22 +150,22 @@ pub mod tests { #[cfg(test)] fn test_gc_imp(k: &[u8], v1: &[u8], v2: &[u8], v3: &[u8], v4: &[u8], gc: F) where - F: Fn(&RocksEngine, &[u8], u64), + F: Fn(&mut RocksEngine, &[u8], u64), { - let engine = TestEngineBuilder::new().build().unwrap(); - - must_prewrite_put(&engine, k, v1, k, 5); - must_commit(&engine, k, 5, 10); - must_prewrite_put(&engine, k, v2, k, 15); - must_commit(&engine, k, 15, 20); - must_prewrite_delete(&engine, k, k, 25); - must_commit(&engine, k, 25, 30); - must_prewrite_put(&engine, k, v3, k, 35); - must_commit(&engine, k, 35, 40); - must_prewrite_lock(&engine, k, k, 45); - must_commit(&engine, k, 45, 50); - must_prewrite_put(&engine, k, v4, k, 55); - must_rollback(&engine, k, 55, false); + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, k, v1, k, 5); + must_commit(&mut engine, k, 5, 10); + must_prewrite_put(&mut engine, k, v2, k, 15); + must_commit(&mut engine, k, 15, 20); + must_prewrite_delete(&mut engine, k, k, 25); + must_commit(&mut engine, k, 25, 30); + must_prewrite_put(&mut engine, k, v3, k, 35); + must_commit(&mut engine, k, 35, 40); + must_prewrite_lock(&mut engine, k, k, 45); + must_commit(&mut engine, k, 45, 50); + must_prewrite_put(&mut engine, k, v4, k, 55); + must_rollback(&mut engine, k, 55, false); // Transactions: // startTS commitTS Command @@ -192,19 +192,19 @@ pub mod tests { // 10 Commit(PUT,5) // 5 x5 - gc(&engine, k, 12); - must_get(&engine, k, 12, v1); + gc(&mut engine, k, 12); + must_get(&mut engine, k, 12, v1); - gc(&engine, k, 22); - must_get(&engine, k, 22, v2); - must_get_none(&engine, k, 12); + gc(&mut engine, k, 22); + must_get(&mut engine, k, 22, v2); + must_get_none(&mut engine, k, 12); - gc(&engine, k, 32); - must_get_none(&engine, k, 22); - must_get_none(&engine, k, 35); + gc(&mut engine, k, 32); + must_get_none(&mut engine, k, 22); + must_get_none(&mut engine, k, 35); - gc(&engine, k, 60); - must_get(&engine, k, 62, v3); + gc(&mut engine, k, 60); + must_get(&mut engine, k, 62, v3); } #[test] diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 5883fc4b983..a8a33799686 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -752,6 +752,8 @@ pub mod tests { #[cfg(test)] use rand::{Rng, SeedableRng}; #[cfg(test)] + use tikv_kv::RocksEngine; + #[cfg(test)] use txn_types::OldValue; use super::*; @@ -805,7 +807,7 @@ pub mod tests { // Insert has a constraint that key should not exist pub fn try_prewrite_insert( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -840,7 +842,7 @@ pub mod tests { } pub fn try_prewrite_check_not_exists( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -865,7 +867,7 @@ pub mod tests { #[test] fn test_async_commit_prewrite_check_max_commit_ts() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -903,14 +905,14 @@ pub mod tests { let modifies = txn.into_modifies(); assert_eq!(modifies.len(), 2); // the mutation that meets CommitTsTooLarge still exists write(&engine, &Default::default(), modifies); - assert!(must_locked(&engine, b"k1", 10).use_async_commit); + assert!(must_locked(&mut engine, b"k1", 10).use_async_commit); // The written lock should not have use_async_commit flag. - assert!(!must_locked(&engine, b"k2", 10).use_async_commit); + assert!(!must_locked(&mut engine, b"k2", 10).use_async_commit); } #[test] fn test_async_commit_prewrite_min_commit_ts() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(41.into()); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1042,7 +1044,7 @@ pub mod tests { #[test] fn test_1pc_check_max_commit_ts() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1082,12 +1084,12 @@ pub mod tests { assert_eq!(modifies.len(), 2); // the mutation that meets CommitTsTooLarge still exists write(&engine, &Default::default(), modifies); // success 1pc prewrite needs to be transformed to locks - assert!(!must_locked(&engine, b"k1", 10).use_async_commit); - assert!(!must_locked(&engine, b"k2", 10).use_async_commit); + assert!(!must_locked(&mut engine, b"k1", 10).use_async_commit); + assert!(!must_locked(&mut engine, b"k2", 10).use_async_commit); } pub fn try_pessimistic_prewrite_check_not_exists( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -1123,11 +1125,11 @@ pub mod tests { #[test] fn test_async_commit_pessimistic_prewrite_check_max_commit_ts() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); - must_acquire_pessimistic_lock(&engine, b"k1", b"k1", 10, 10); - must_acquire_pessimistic_lock(&engine, b"k2", b"k1", 10, 10); + must_acquire_pessimistic_lock(&mut engine, b"k1", b"k1", 10, 10); + must_acquire_pessimistic_lock(&mut engine, b"k2", b"k1", 10, 10); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1173,11 +1175,11 @@ pub mod tests { #[test] fn test_1pc_pessimistic_prewrite_check_max_commit_ts() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(42.into()); - must_acquire_pessimistic_lock(&engine, b"k1", b"k1", 10, 10); - must_acquire_pessimistic_lock(&engine, b"k2", b"k1", 10, 10); + must_acquire_pessimistic_lock(&mut engine, b"k1", b"k1", 10, 10); + must_acquire_pessimistic_lock(&mut engine, b"k2", b"k1", 10, 10); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1223,69 +1225,69 @@ pub mod tests { #[test] fn test_prewrite_check_gc_fence() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); // PUT, Read // `------^ - must_prewrite_put(&engine, b"k1", b"v1", b"k1", 10); - must_commit(&engine, b"k1", 10, 30); - must_cleanup_with_gc_fence(&engine, b"k1", 30, 0, 40, true); + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine, b"k1", 10, 30); + must_cleanup_with_gc_fence(&mut engine, b"k1", 30, 0, 40, true); // PUT, Read // * (GC fence ts = 0) - must_prewrite_put(&engine, b"k2", b"v2", b"k2", 11); - must_commit(&engine, b"k2", 11, 30); - must_cleanup_with_gc_fence(&engine, b"k2", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 11); + must_commit(&mut engine, b"k2", 11, 30); + must_cleanup_with_gc_fence(&mut engine, b"k2", 30, 0, 0, true); // PUT, LOCK, LOCK, Read // `---------^ - must_prewrite_put(&engine, b"k3", b"v3", b"k3", 12); - must_commit(&engine, b"k3", 12, 30); - must_prewrite_lock(&engine, b"k3", b"k3", 37); - must_commit(&engine, b"k3", 37, 38); - must_cleanup_with_gc_fence(&engine, b"k3", 30, 0, 40, true); - must_prewrite_lock(&engine, b"k3", b"k3", 42); - must_commit(&engine, b"k3", 42, 43); + must_prewrite_put(&mut engine, b"k3", b"v3", b"k3", 12); + must_commit(&mut engine, b"k3", 12, 30); + must_prewrite_lock(&mut engine, b"k3", b"k3", 37); + must_commit(&mut engine, b"k3", 37, 38); + must_cleanup_with_gc_fence(&mut engine, b"k3", 30, 0, 40, true); + must_prewrite_lock(&mut engine, b"k3", b"k3", 42); + must_commit(&mut engine, b"k3", 42, 43); // PUT, LOCK, LOCK, Read // * - must_prewrite_put(&engine, b"k4", b"v4", b"k4", 13); - must_commit(&engine, b"k4", 13, 30); - must_prewrite_lock(&engine, b"k4", b"k4", 37); - must_commit(&engine, b"k4", 37, 38); - must_prewrite_lock(&engine, b"k4", b"k4", 42); - must_commit(&engine, b"k4", 42, 43); - must_cleanup_with_gc_fence(&engine, b"k4", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k4", b"v4", b"k4", 13); + must_commit(&mut engine, b"k4", 13, 30); + must_prewrite_lock(&mut engine, b"k4", b"k4", 37); + must_commit(&mut engine, b"k4", 37, 38); + must_prewrite_lock(&mut engine, b"k4", b"k4", 42); + must_commit(&mut engine, b"k4", 42, 43); + must_cleanup_with_gc_fence(&mut engine, b"k4", 30, 0, 0, true); // PUT, PUT, READ // `-----^ `------^ - must_prewrite_put(&engine, b"k5", b"v5", b"k5", 14); - must_commit(&engine, b"k5", 14, 20); - must_prewrite_put(&engine, b"k5", b"v5x", b"k5", 21); - must_commit(&engine, b"k5", 21, 30); - must_cleanup_with_gc_fence(&engine, b"k5", 20, 0, 30, false); - must_cleanup_with_gc_fence(&engine, b"k5", 30, 0, 40, true); + must_prewrite_put(&mut engine, b"k5", b"v5", b"k5", 14); + must_commit(&mut engine, b"k5", 14, 20); + must_prewrite_put(&mut engine, b"k5", b"v5x", b"k5", 21); + must_commit(&mut engine, b"k5", 21, 30); + must_cleanup_with_gc_fence(&mut engine, b"k5", 20, 0, 30, false); + must_cleanup_with_gc_fence(&mut engine, b"k5", 30, 0, 40, true); // PUT, PUT, READ // `-----^ * - must_prewrite_put(&engine, b"k6", b"v6", b"k6", 15); - must_commit(&engine, b"k6", 15, 20); - must_prewrite_put(&engine, b"k6", b"v6x", b"k6", 22); - must_commit(&engine, b"k6", 22, 30); - must_cleanup_with_gc_fence(&engine, b"k6", 20, 0, 30, false); - must_cleanup_with_gc_fence(&engine, b"k6", 30, 0, 0, true); + must_prewrite_put(&mut engine, b"k6", b"v6", b"k6", 15); + must_commit(&mut engine, b"k6", 15, 20); + must_prewrite_put(&mut engine, b"k6", b"v6x", b"k6", 22); + must_commit(&mut engine, b"k6", 22, 30); + must_cleanup_with_gc_fence(&mut engine, b"k6", 20, 0, 30, false); + must_cleanup_with_gc_fence(&mut engine, b"k6", 30, 0, 0, true); // PUT, LOCK, READ // `----------^ // Note that this case is special because usually the `LOCK` is the first write // already got during prewrite/acquire_pessimistic_lock and will continue // searching an older version from the `LOCK` record. - must_prewrite_put(&engine, b"k7", b"v7", b"k7", 16); - must_commit(&engine, b"k7", 16, 30); - must_prewrite_lock(&engine, b"k7", b"k7", 37); - must_commit(&engine, b"k7", 37, 38); - must_cleanup_with_gc_fence(&engine, b"k7", 30, 0, 40, true); + must_prewrite_put(&mut engine, b"k7", b"v7", b"k7", 16); + must_commit(&mut engine, b"k7", 16, 30); + must_prewrite_lock(&mut engine, b"k7", b"k7", 37); + must_commit(&mut engine, b"k7", 37, 38); + must_cleanup_with_gc_fence(&mut engine, b"k7", 30, 0, 40, true); // 1. Check GC fence when doing constraint check with the older version. let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -1401,11 +1403,11 @@ pub mod tests { #[test] fn test_resend_prewrite_non_pessimistic_lock() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - must_acquire_pessimistic_lock(&engine, b"k1", b"k1", 10, 10); + must_acquire_pessimistic_lock(&mut engine, b"k1", b"k1", 10, 10); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k1", b"v1", b"k1", @@ -1416,7 +1418,7 @@ pub mod tests { 15, ); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1428,14 +1430,14 @@ pub mod tests { ); // The transaction may be committed by another reader. - must_commit(&engine, b"k1", 10, 20); - must_commit(&engine, b"k2", 10, 20); + must_commit(&mut engine, b"k1", 10, 20); + must_commit(&mut engine, b"k2", 10, 20); // This is a re-sent prewrite. It should report a PessimisticLockNotFound. In // production, the caller will need to check if the current transaction is // already committed before, in order to provide the idempotency. let err = must_retry_pessimistic_prewrite_put_err( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1450,23 +1452,23 @@ pub mod tests { Error(box ErrorInner::PessimisticLockNotFound { .. }) )); // Commit repeatedly, these operations should have no effect. - must_commit(&engine, b"k1", 10, 25); - must_commit(&engine, b"k2", 10, 25); + must_commit(&mut engine, b"k1", 10, 25); + must_commit(&mut engine, b"k2", 10, 25); // Seek from 30, we should read commit_ts = 20 instead of 25. - must_seek_write(&engine, b"k1", 30, 10, 20, WriteType::Put); - must_seek_write(&engine, b"k2", 30, 10, 20, WriteType::Put); + must_seek_write(&mut engine, b"k1", 30, 10, 20, WriteType::Put); + must_seek_write(&mut engine, b"k2", 30, 10, 20, WriteType::Put); // Write another version to the keys. - must_prewrite_put(&engine, b"k1", b"v11", b"k1", 35); - must_prewrite_put(&engine, b"k2", b"v22", b"k1", 35); - must_commit(&engine, b"k1", 35, 40); - must_commit(&engine, b"k2", 35, 40); + must_prewrite_put(&mut engine, b"k1", b"v11", b"k1", 35); + must_prewrite_put(&mut engine, b"k2", b"v22", b"k1", 35); + must_commit(&mut engine, b"k1", 35, 40); + must_commit(&mut engine, b"k2", 35, 40); // A retrying non-pessimistic-lock prewrite request should not skip constraint // checks. It reports a PessimisticLockNotFound. let err = must_retry_pessimistic_prewrite_put_err( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1480,10 +1482,10 @@ pub mod tests { err, Error(box ErrorInner::PessimisticLockNotFound { .. }) )); - must_unlocked(&engine, b"k2"); + must_unlocked(&mut engine, b"k2"); let err = must_retry_pessimistic_prewrite_put_err( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1497,13 +1499,13 @@ pub mod tests { err, Error(box ErrorInner::PessimisticLockNotFound { .. }) )); - must_unlocked(&engine, b"k2"); + must_unlocked(&mut engine, b"k2"); // Committing still does nothing. - must_commit(&engine, b"k2", 10, 25); + must_commit(&mut engine, b"k2", 10, 25); // Try a different txn start ts (which haven't been successfully committed // before). let err = must_retry_pessimistic_prewrite_put_err( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1517,11 +1519,11 @@ pub mod tests { err, Error(box ErrorInner::PessimisticLockNotFound { .. }) )); - must_unlocked(&engine, b"k2"); + must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request // arriving. must_prewrite_put_impl( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1537,14 +1539,14 @@ pub mod tests { kvproto::kvrpcpb::Assertion::None, kvproto::kvrpcpb::AssertionLevel::Off, ); - must_locked(&engine, b"k2", 12); - must_rollback(&engine, b"k2", 12, false); + must_locked(&mut engine, b"k2", 12); + must_rollback(&mut engine, b"k2", 12, false); // And conflict check is according to the for_update_ts for pessimistic // prewrite. So, it will not report error if for_update_ts is large // enough. must_prewrite_put_impl( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1560,14 +1562,14 @@ pub mod tests { kvproto::kvrpcpb::Assertion::None, kvproto::kvrpcpb::AssertionLevel::Off, ); - must_locked(&engine, b"k2", 13); - must_rollback(&engine, b"k2", 13, false); + must_locked(&mut engine, b"k2", 13); + must_rollback(&mut engine, b"k2", 13, false); // Write a Rollback at 50 first. A retried prewrite at the same ts should // report WriteConflict. - must_rollback(&engine, b"k2", 50, false); + must_rollback(&mut engine, b"k2", 50, false); let err = must_retry_pessimistic_prewrite_put_err( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1584,7 +1586,7 @@ pub mod tests { ); // But prewriting at 48 can succeed because a newer rollback is allowed. must_prewrite_put_impl( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -1600,28 +1602,28 @@ pub mod tests { kvproto::kvrpcpb::Assertion::None, kvproto::kvrpcpb::AssertionLevel::Off, ); - must_locked(&engine, b"k2", 48); + must_locked(&mut engine, b"k2", 48); } #[test] fn test_old_value_rollback_and_lock() { - let engine_rollback = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine_rollback = crate::storage::TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine_rollback, b"k1", b"v1", b"k1", 10); - must_commit(&engine_rollback, b"k1", 10, 30); + must_prewrite_put(&mut engine_rollback, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine_rollback, b"k1", 10, 30); - must_prewrite_put(&engine_rollback, b"k1", b"v2", b"k1", 40); - must_rollback(&engine_rollback, b"k1", 40, false); + must_prewrite_put(&mut engine_rollback, b"k1", b"v2", b"k1", 40); + must_rollback(&mut engine_rollback, b"k1", 40, false); - let engine_lock = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine_lock = crate::storage::TestEngineBuilder::new().build().unwrap(); - must_prewrite_put(&engine_lock, b"k1", b"v1", b"k1", 10); - must_commit(&engine_lock, b"k1", 10, 30); + must_prewrite_put(&mut engine_lock, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine_lock, b"k1", 10, 30); - must_prewrite_lock(&engine_lock, b"k1", b"k1", 40); - must_commit(&engine_lock, b"k1", 40, 45); + must_prewrite_lock(&mut engine_lock, b"k1", b"k1", 40); + must_commit(&mut engine_lock, b"k1", 40, 45); - for engine in &[engine_rollback, engine_lock] { + for engine in &mut [engine_rollback, engine_lock] { let start_ts = TimeStamp::from(50); let txn_props = TransactionProperties { start_ts, @@ -1660,7 +1662,7 @@ pub mod tests { // Prepares a test case that put, delete and lock a key and returns // a timestamp for testing the case. #[cfg(test)] - pub fn old_value_put_delete_lock_insert(engine: &E, key: &[u8]) -> TimeStamp { + pub fn old_value_put_delete_lock_insert(engine: &mut E, key: &[u8]) -> TimeStamp { must_prewrite_put(engine, key, b"v1", key, 10); must_commit(engine, key, 10, 20); @@ -1675,8 +1677,8 @@ pub mod tests { #[test] fn test_old_value_put_delete_lock_insert() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - let start_ts = old_value_put_delete_lock_insert(&engine, b"k1"); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let start_ts = old_value_put_delete_lock_insert(&mut engine, b"k1"); let txn_props = TransactionProperties { start_ts, kind: TransactionKind::Optimistic(false), @@ -1729,7 +1731,7 @@ pub mod tests { let mut rg = rand::rngs::StdRng::seed_from_u64(seed); // Generate 1000 random cases; - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let cases = 1000; for _ in 0..cases { // At most 12 ops per-case. @@ -1753,20 +1755,20 @@ pub mod tests { match op { 0 => { - must_prewrite_put(&engine, key, &[i as u8], key, start_ts); - must_commit(&engine, key, start_ts, commit_ts); + must_prewrite_put(&mut engine, key, &[i as u8], key, start_ts); + must_commit(&mut engine, key, start_ts, commit_ts); } 1 => { - must_prewrite_delete(&engine, key, key, start_ts); - must_commit(&engine, key, start_ts, commit_ts); + must_prewrite_delete(&mut engine, key, key, start_ts); + must_commit(&mut engine, key, start_ts, commit_ts); } 2 => { - must_prewrite_lock(&engine, key, key, start_ts); - must_commit(&engine, key, start_ts, commit_ts); + must_prewrite_lock(&mut engine, key, key, start_ts); + must_commit(&mut engine, key, start_ts, commit_ts); } 3 => { - must_prewrite_put(&engine, key, &[i as u8], key, start_ts); - must_rollback(&engine, key, start_ts, false); + must_prewrite_put(&mut engine, key, &[i as u8], key, start_ts); + must_rollback(&mut engine, key, start_ts, false); } _ => unreachable!(), } @@ -1881,19 +1883,22 @@ pub mod tests { #[test] fn test_prewrite_with_assertion() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - - let prewrite_put = |key: &'_ _, - value, - ts: u64, - pessimistic_action, - for_update_ts: u64, - assertion, - assertion_level, - expect_success| { + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + + fn prewrite_put( + engine: &mut E, + key: &[u8], + value: &[u8], + ts: u64, + pessimistic_action: PrewriteRequestPessimisticAction, + for_update_ts: u64, + assertion: Assertion, + assertion_level: AssertionLevel, + expect_success: bool, + ) { if expect_success { must_prewrite_put_impl( - &engine, + engine, key, value, key, @@ -1911,7 +1916,7 @@ pub mod tests { ); } else { let err = must_prewrite_put_err_impl( - &engine, + engine, key, value, key, @@ -1926,193 +1931,209 @@ pub mod tests { ); assert!(matches!(err, Error(box ErrorInner::AssertionFailed { .. }))); } - }; - - let test = |key_prefix: &[u8], assertion_level, prepare: &dyn for<'a> Fn(&'a [u8])| { - let k1 = [key_prefix, b"k1"].concat(); - let k2 = [key_prefix, b"k2"].concat(); - let k3 = [key_prefix, b"k3"].concat(); - let k4 = [key_prefix, b"k4"].concat(); + } - for k in &[&k1, &k2, &k3, &k4] { - prepare(k.as_slice()); - } + let mut test = + |key_prefix: &[u8], + assertion_level, + prepare: &mut dyn for<'a> FnMut(&mut RocksEngine, &'a [u8])| { + let k1 = [key_prefix, b"k1"].concat(); + let k2 = [key_prefix, b"k2"].concat(); + let k3 = [key_prefix, b"k3"].concat(); + let k4 = [key_prefix, b"k4"].concat(); + + for k in &[&k1, &k2, &k3, &k4] { + prepare(&mut engine, k.as_slice()); + } - // Assertion passes (optimistic). - prewrite_put( - &k1, - b"v1", - 10, - SkipPessimisticCheck, - 0, - Assertion::NotExist, - assertion_level, - true, - ); - must_commit(&engine, &k1, 10, 15); + // Assertion passes (optimistic). + prewrite_put( + &mut engine, + &k1, + b"v1", + 10, + SkipPessimisticCheck, + 0, + Assertion::NotExist, + assertion_level, + true, + ); + must_commit(&mut engine, &k1, 10, 15); - prewrite_put( - &k1, - b"v1", - 20, - SkipPessimisticCheck, - 0, - Assertion::Exist, - assertion_level, - true, - ); - must_commit(&engine, &k1, 20, 25); - - // Assertion passes (pessimistic). - prewrite_put( - &k2, - b"v2", - 10, - DoPessimisticCheck, - 11, - Assertion::NotExist, - assertion_level, - true, - ); - must_commit(&engine, &k2, 10, 15); - - prewrite_put( - &k2, - b"v2", - 20, - DoPessimisticCheck, - 21, - Assertion::Exist, - assertion_level, - true, - ); - must_commit(&engine, &k2, 20, 25); - - // Optimistic transaction assertion fail on fast/strict level. - let pass = assertion_level == AssertionLevel::Off; - prewrite_put( - &k1, - b"v1", - 30, - SkipPessimisticCheck, - 0, - Assertion::NotExist, - assertion_level, - pass, - ); - prewrite_put( - &k3, - b"v3", - 30, - SkipPessimisticCheck, - 0, - Assertion::Exist, - assertion_level, - pass, - ); - must_rollback(&engine, &k1, 30, true); - must_rollback(&engine, &k3, 30, true); - - // Pessimistic transaction assertion fail on fast/strict level if assertion - // happens during amending pessimistic lock. - let pass = assertion_level == AssertionLevel::Off; - prewrite_put( - &k2, - b"v2", - 30, - DoPessimisticCheck, - 31, - Assertion::NotExist, - assertion_level, - pass, - ); - prewrite_put( - &k4, - b"v4", - 30, - DoPessimisticCheck, - 31, - Assertion::Exist, - assertion_level, - pass, - ); - must_rollback(&engine, &k2, 30, true); - must_rollback(&engine, &k4, 30, true); - - // Pessimistic transaction fail on strict level no matter what - // `pessimistic_action` is. - let pass = assertion_level != AssertionLevel::Strict; - prewrite_put( - &k1, - b"v1", - 40, - SkipPessimisticCheck, - 41, - Assertion::NotExist, - assertion_level, - pass, - ); - prewrite_put( - &k3, - b"v3", - 40, - SkipPessimisticCheck, - 41, - Assertion::Exist, - assertion_level, - pass, - ); - must_rollback(&engine, &k1, 40, true); - must_rollback(&engine, &k3, 40, true); - - must_acquire_pessimistic_lock(&engine, &k2, &k2, 40, 41); - must_acquire_pessimistic_lock(&engine, &k4, &k4, 40, 41); - prewrite_put( - &k2, - b"v2", - 40, - DoPessimisticCheck, - 41, - Assertion::NotExist, - assertion_level, - pass, - ); - prewrite_put( - &k4, - b"v4", - 40, - DoPessimisticCheck, - 41, - Assertion::Exist, - assertion_level, - pass, - ); - must_rollback(&engine, &k1, 40, true); - must_rollback(&engine, &k3, 40, true); - }; + prewrite_put( + &mut engine, + &k1, + b"v1", + 20, + SkipPessimisticCheck, + 0, + Assertion::Exist, + assertion_level, + true, + ); + must_commit(&mut engine, &k1, 20, 25); + + // Assertion passes (pessimistic). + prewrite_put( + &mut engine, + &k2, + b"v2", + 10, + DoPessimisticCheck, + 11, + Assertion::NotExist, + assertion_level, + true, + ); + must_commit(&mut engine, &k2, 10, 15); + + prewrite_put( + &mut engine, + &k2, + b"v2", + 20, + DoPessimisticCheck, + 21, + Assertion::Exist, + assertion_level, + true, + ); + must_commit(&mut engine, &k2, 20, 25); + + // Optimistic transaction assertion fail on fast/strict level. + let pass = assertion_level == AssertionLevel::Off; + prewrite_put( + &mut engine, + &k1, + b"v1", + 30, + SkipPessimisticCheck, + 0, + Assertion::NotExist, + assertion_level, + pass, + ); + prewrite_put( + &mut engine, + &k3, + b"v3", + 30, + SkipPessimisticCheck, + 0, + Assertion::Exist, + assertion_level, + pass, + ); + must_rollback(&mut engine, &k1, 30, true); + must_rollback(&mut engine, &k3, 30, true); + + // Pessimistic transaction assertion fail on fast/strict level if assertion + // happens during amending pessimistic lock. + let pass = assertion_level == AssertionLevel::Off; + prewrite_put( + &mut engine, + &k2, + b"v2", + 30, + DoPessimisticCheck, + 31, + Assertion::NotExist, + assertion_level, + pass, + ); + prewrite_put( + &mut engine, + &k4, + b"v4", + 30, + DoPessimisticCheck, + 31, + Assertion::Exist, + assertion_level, + pass, + ); + must_rollback(&mut engine, &k2, 30, true); + must_rollback(&mut engine, &k4, 30, true); + + // Pessimistic transaction fail on strict level no matter what + // `pessimistic_action` is. + let pass = assertion_level != AssertionLevel::Strict; + prewrite_put( + &mut engine, + &k1, + b"v1", + 40, + SkipPessimisticCheck, + 41, + Assertion::NotExist, + assertion_level, + pass, + ); + prewrite_put( + &mut engine, + &k3, + b"v3", + 40, + SkipPessimisticCheck, + 41, + Assertion::Exist, + assertion_level, + pass, + ); + must_rollback(&mut engine, &k1, 40, true); + must_rollback(&mut engine, &k3, 40, true); + + must_acquire_pessimistic_lock(&mut engine, &k2, &k2, 40, 41); + must_acquire_pessimistic_lock(&mut engine, &k4, &k4, 40, 41); + prewrite_put( + &mut engine, + &k2, + b"v2", + 40, + DoPessimisticCheck, + 41, + Assertion::NotExist, + assertion_level, + pass, + ); + prewrite_put( + &mut engine, + &k4, + b"v4", + 40, + DoPessimisticCheck, + 41, + Assertion::Exist, + assertion_level, + pass, + ); + must_rollback(&mut engine, &k1, 40, true); + must_rollback(&mut engine, &k3, 40, true); + }; - let prepare_rollback = |k: &'_ _| must_rollback(&engine, k, 3, true); - let prepare_lock_record = |k: &'_ _| { - must_prewrite_lock(&engine, k, k, 3); - must_commit(&engine, k, 3, 5); + let mut prepare_rollback = + |engine: &mut RocksEngine, k: &'_ _| must_rollback(engine, k, 3, true); + let mut prepare_lock_record = |engine: &mut RocksEngine, k: &'_ _| { + must_prewrite_lock(engine, k, k, 3); + must_commit(engine, k, 3, 5); }; - let prepare_delete = |k: &'_ _| { - must_prewrite_put(&engine, k, b"deleted-value", k, 3); - must_commit(&engine, k, 3, 5); - must_prewrite_delete(&engine, k, k, 7); - must_commit(&engine, k, 7, 9); + let mut prepare_delete = |engine: &mut RocksEngine, k: &'_ _| { + must_prewrite_put(engine, k, b"deleted-value", k, 3); + must_commit(engine, k, 3, 5); + must_prewrite_delete(engine, k, k, 7); + must_commit(engine, k, 7, 9); }; - let prepare_gc_fence = |k: &'_ _| { - must_prewrite_put(&engine, k, b"deleted-value", k, 3); - must_commit(&engine, k, 3, 5); - must_cleanup_with_gc_fence(&engine, k, 5, 0, 7, true); + let mut prepare_gc_fence = |engine: &mut RocksEngine, k: &'_ _| { + must_prewrite_put(engine, k, b"deleted-value", k, 3); + must_commit(engine, k, 3, 5); + must_cleanup_with_gc_fence(engine, k, 5, 0, 7, true); }; // Test multiple cases without recreating the engine. So use a increasing key // prefix to avoid each case interfering each other. let mut key_prefix = b'a'; - let mut test_all_levels = |prepare| { + let mut test_all_levels = |prepare: &mut dyn for<'a> FnMut(&mut RocksEngine, &'a [u8])| { test(&[key_prefix], AssertionLevel::Off, prepare); key_prefix += 1; test(&[key_prefix], AssertionLevel::Fast, prepare); @@ -2121,26 +2142,33 @@ pub mod tests { key_prefix += 1; }; - test_all_levels(&|_| ()); - test_all_levels(&prepare_rollback); - test_all_levels(&prepare_lock_record); - test_all_levels(&prepare_delete); - test_all_levels(&prepare_gc_fence); + test_all_levels(&mut |_, _| ()); + test_all_levels(&mut prepare_rollback); + test_all_levels(&mut prepare_lock_record); + test_all_levels(&mut prepare_delete); + test_all_levels(&mut prepare_gc_fence); } #[test] fn test_deferred_constraint_check() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let key = b"key"; let key2 = b"key2"; let value = b"value"; // 1. write conflict - must_prewrite_put(&engine, key, value, key, 1); - must_commit(&engine, key, 1, 5); - must_pessimistic_prewrite_insert(&engine, key2, value, key, 3, 3, SkipPessimisticCheck); - let err = - must_pessimistic_prewrite_insert_err(&engine, key, value, key, 3, 3, DoConstraintCheck); + must_prewrite_put(&mut engine, key, value, key, 1); + must_commit(&mut engine, key, 1, 5); + must_pessimistic_prewrite_insert(&mut engine, key2, value, key, 3, 3, SkipPessimisticCheck); + let err = must_pessimistic_prewrite_insert_err( + &mut engine, + key, + value, + key, + 3, + 3, + DoConstraintCheck, + ); assert!(matches!( err, Error(box ErrorInner::WriteConflict { @@ -2150,10 +2178,10 @@ pub mod tests { )); // 2. unique constraint fail - must_prewrite_put(&engine, key, value, key, 11); - must_commit(&engine, key, 11, 12); + must_prewrite_put(&mut engine, key, value, key, 11); + must_commit(&mut engine, key, 11, 12); let err = must_pessimistic_prewrite_insert_err( - &engine, + &mut engine, key, value, key, @@ -2164,8 +2192,8 @@ pub mod tests { assert!(matches!(err, Error(box ErrorInner::AlreadyExist { .. }))); // 3. success - must_prewrite_delete(&engine, key, key, 21); - must_commit(&engine, key, 21, 22); - must_pessimistic_prewrite_insert(&engine, key, value, key, 23, 23, DoConstraintCheck); + must_prewrite_delete(&mut engine, key, key, 21); + must_commit(&mut engine, key, 21, 22); + must_pessimistic_prewrite_insert(&mut engine, key, value, key, 23, 23, DoConstraintCheck); } } diff --git a/src/storage/txn/actions/tests.rs b/src/storage/txn/actions/tests.rs index 65eafa157ce..fdf060d950d 100644 --- a/src/storage/txn/actions/tests.rs +++ b/src/storage/txn/actions/tests.rs @@ -18,7 +18,7 @@ use crate::storage::{ }; pub fn must_prewrite_put_impl( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -56,7 +56,7 @@ pub fn must_prewrite_put_impl( } pub fn must_prewrite_insert_impl( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -94,7 +94,7 @@ pub fn must_prewrite_insert_impl( } pub fn must_prewrite_put_impl_with_should_not_exist( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -164,7 +164,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( } pub fn must_prewrite_put( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -190,7 +190,7 @@ pub fn must_prewrite_put( } pub fn must_prewrite_put_on_region( - engine: &E, + engine: &mut E, region_id: u64, key: &[u8], value: &[u8], @@ -219,7 +219,7 @@ pub fn must_prewrite_put_on_region( } pub fn must_pessimistic_prewrite_put( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -247,7 +247,7 @@ pub fn must_pessimistic_prewrite_put( } pub fn must_pessimistic_prewrite_insert( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -275,7 +275,7 @@ pub fn must_pessimistic_prewrite_insert( } pub fn must_pessimistic_prewrite_put_with_ttl( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -304,7 +304,7 @@ pub fn must_pessimistic_prewrite_put_with_ttl( } pub fn must_prewrite_put_for_large_txn( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -341,7 +341,7 @@ pub fn must_prewrite_put_for_large_txn( } pub fn must_prewrite_put_async_commit( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -370,7 +370,7 @@ pub fn must_prewrite_put_async_commit( } pub fn must_pessimistic_prewrite_put_async_commit( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -426,7 +426,7 @@ fn default_txn_props( } pub fn must_prewrite_put_err_impl( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -457,7 +457,7 @@ pub fn must_prewrite_put_err_impl( } pub fn must_prewrite_insert_err_impl( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -488,7 +488,7 @@ pub fn must_prewrite_insert_err_impl( } pub fn must_prewrite_put_err_impl_with_should_not_exist( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -535,7 +535,7 @@ pub fn must_prewrite_put_err_impl_with_should_not_exist( } pub fn must_prewrite_put_err( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -558,7 +558,7 @@ pub fn must_prewrite_put_err( } pub fn must_pessimistic_prewrite_put_err( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -583,7 +583,7 @@ pub fn must_pessimistic_prewrite_put_err( } pub fn must_pessimistic_prewrite_insert_err( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -608,7 +608,7 @@ pub fn must_pessimistic_prewrite_insert_err( } pub fn must_retry_pessimistic_prewrite_put_err( - engine: &E, + engine: &mut E, key: &[u8], value: &[u8], pk: &[u8], @@ -635,7 +635,7 @@ pub fn must_retry_pessimistic_prewrite_put_err( } fn must_prewrite_delete_impl( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -675,7 +675,7 @@ fn must_prewrite_delete_impl( } pub fn must_prewrite_delete( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -692,7 +692,7 @@ pub fn must_prewrite_delete( } pub fn must_prewrite_delete_on_region( - engine: &E, + engine: &mut E, region_id: u64, key: &[u8], pk: &[u8], @@ -710,7 +710,7 @@ pub fn must_prewrite_delete_on_region( } pub fn must_pessimistic_prewrite_delete( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -721,7 +721,7 @@ pub fn must_pessimistic_prewrite_delete( } fn must_prewrite_lock_impl( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -752,12 +752,17 @@ fn must_prewrite_lock_impl( .unwrap(); } -pub fn must_prewrite_lock(engine: &E, key: &[u8], pk: &[u8], ts: impl Into) { +pub fn must_prewrite_lock( + engine: &mut E, + key: &[u8], + pk: &[u8], + ts: impl Into, +) { must_prewrite_lock_impl(engine, key, pk, ts, TimeStamp::zero(), SkipPessimisticCheck); } pub fn must_prewrite_lock_err( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -780,7 +785,7 @@ pub fn must_prewrite_lock_err( } pub fn must_pessimistic_prewrite_lock( - engine: &E, + engine: &mut E, key: &[u8], pk: &[u8], ts: impl Into, @@ -791,7 +796,7 @@ pub fn must_pessimistic_prewrite_lock( } pub fn must_rollback( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, protect_rollback: bool, @@ -813,7 +818,7 @@ pub fn must_rollback( write(engine, &ctx, txn.into_modifies()); } -pub fn must_rollback_err(engine: &E, key: &[u8], start_ts: impl Into) { +pub fn must_rollback_err(engine: &mut E, key: &[u8], start_ts: impl Into) { let snapshot = engine.snapshot(Default::default()).unwrap(); let start_ts = start_ts.into(); let cm = ConcurrencyManager::new(start_ts); diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index bdb4bca2110..4b780f5bf2d 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -81,7 +81,7 @@ mod tests { } fn test_atomic_process_write_impl() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let raw_keys = vec![b"ra", b"rz"]; let raw_values = vec![b"valuea", b"valuez"]; diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 7f6f4879a3d..2678effbf7b 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -178,7 +178,7 @@ pub mod tests { }; pub fn must_success( - engine: &E, + engine: &mut E, key: &[u8], lock_ts: impl Into, expect_status: SecondaryLocksStatus, @@ -215,12 +215,13 @@ pub mod tests { #[test] fn test_check_async_commit_secondary_locks() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut engine_clone = engine.clone(); let ctx = Context::default(); let cm = ConcurrencyManager::new(1.into()); - let check_secondary = |key, ts| { - let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut check_secondary = |key, ts| { + let snapshot = engine_clone.snapshot(Default::default()).unwrap(); let key = Key::from_raw(key); let ts = TimeStamp::new(ts); let command = crate::storage::txn::commands::CheckSecondaryLocks { @@ -242,7 +243,7 @@ pub mod tests { ) .unwrap(); if !result.to_be_write.modifies.is_empty() { - engine.write(&ctx, result.to_be_write).unwrap(); + engine_clone.write(&ctx, result.to_be_write).unwrap(); } if let ProcessResult::SecondaryLocksStatus { status } = result.pr { status @@ -251,11 +252,11 @@ pub mod tests { } }; - must_prewrite_lock(&engine, b"k1", b"key", 1); - must_commit(&engine, b"k1", 1, 3); - must_rollback(&engine, b"k1", 5, false); - must_prewrite_lock(&engine, b"k1", b"key", 7); - must_commit(&engine, b"k1", 7, 9); + must_prewrite_lock(&mut engine, b"k1", b"key", 1); + must_commit(&mut engine, b"k1", 1, 3); + must_rollback(&mut engine, b"k1", 5, false); + must_prewrite_lock(&mut engine, b"k1", b"key", 7); + must_commit(&mut engine, b"k1", 7, 9); // Lock CF has no lock // @@ -269,20 +270,20 @@ pub mod tests { check_secondary(b"k1", 7), SecondaryLocksStatus::Committed(9.into()) ); - must_get_commit_ts(&engine, b"k1", 7, 9); + must_get_commit_ts(&mut engine, b"k1", 7, 9); assert_eq!(check_secondary(b"k1", 5), SecondaryLocksStatus::RolledBack); - must_get_rollback_ts(&engine, b"k1", 5); + must_get_rollback_ts(&mut engine, b"k1", 5); assert_eq!( check_secondary(b"k1", 1), SecondaryLocksStatus::Committed(3.into()) ); - must_get_commit_ts(&engine, b"k1", 1, 3); + must_get_commit_ts(&mut engine, b"k1", 1, 3); assert_eq!(check_secondary(b"k1", 6), SecondaryLocksStatus::RolledBack); - must_get_rollback_protected(&engine, b"k1", 6, true); + must_get_rollback_protected(&mut engine, b"k1", 6, true); // ---------------------------- - must_acquire_pessimistic_lock(&engine, b"k1", b"key", 11, 11); + must_acquire_pessimistic_lock(&mut engine, b"k1", b"key", 11, 11); // Lock CF has a pessimistic lock // @@ -294,11 +295,11 @@ pub mod tests { let status = check_secondary(b"k1", 11); assert_eq!(status, SecondaryLocksStatus::RolledBack); - must_get_rollback_protected(&engine, b"k1", 11, true); + must_get_rollback_protected(&mut engine, b"k1", 11, true); // ---------------------------- - must_prewrite_lock(&engine, b"k1", b"key", 13); + must_prewrite_lock(&mut engine, b"k1", b"key", 13); // Lock CF has an optimistic lock // @@ -313,11 +314,11 @@ pub mod tests { SecondaryLocksStatus::Locked(_) => {} res => panic!("unexpected lock status: {:?}", res), } - must_locked(&engine, b"k1", 13); + must_locked(&mut engine, b"k1", 13); // ---------------------------- - must_commit(&engine, b"k1", 13, 15); + must_commit(&mut engine, b"k1", 13, 15); // Lock CF has an optimistic lock // @@ -333,12 +334,12 @@ pub mod tests { SecondaryLocksStatus::RolledBack => {} res => panic!("unexpected lock status: {:?}", res), } - must_get_rollback_protected(&engine, b"k1", 14, true); + must_get_rollback_protected(&mut engine, b"k1", 14, true); match check_secondary(b"k1", 15) { SecondaryLocksStatus::RolledBack => {} res => panic!("unexpected lock status: {:?}", res), } - must_get_overlapped_rollback(&engine, b"k1", 15, 13, WriteType::Lock, Some(0)); + must_get_overlapped_rollback(&mut engine, b"k1", 15, 13, WriteType::Lock, Some(0)); } } diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 24f69e9a237..ef323cf206b 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -166,7 +166,7 @@ pub mod tests { }; pub fn must_success( - engine: &E, + engine: &mut E, primary_key: &[u8], lock_ts: impl Into, caller_start_ts: impl Into, @@ -213,7 +213,7 @@ pub mod tests { } pub fn must_err( - engine: &E, + engine: &mut E, primary_key: &[u8], lock_ts: impl Into, caller_start_ts: impl Into, @@ -285,15 +285,15 @@ pub mod tests { #[test] fn test_check_async_commit_txn_status() { let do_test = |rollback_if_not_exist: bool| { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let r = rollback_if_not_exist; // case 1: primary is prewritten (optimistic) - must_prewrite_put_async_commit(&engine, b"k1", b"v", b"k1", &Some(vec![]), 1, 2); + must_prewrite_put_async_commit(&mut engine, b"k1", b"v", b"k1", &Some(vec![]), 1, 2); // All following check_txn_status should return the unchanged lock information // caller_start_ts == current_ts == 0 must_success( - &engine, + &mut engine, b"k1", 1, 0, @@ -305,7 +305,7 @@ pub mod tests { ); // caller_start_ts != 0 must_success( - &engine, + &mut engine, b"k1", 1, 5, @@ -317,7 +317,7 @@ pub mod tests { ); // current_ts != 0 must_success( - &engine, + &mut engine, b"k1", 1, 0, @@ -329,7 +329,7 @@ pub mod tests { ); // caller_start_ts != 0 && current_ts != 0 must_success( - &engine, + &mut engine, b"k1", 1, 10, @@ -341,7 +341,7 @@ pub mod tests { ); // caller_start_ts == u64::MAX must_success( - &engine, + &mut engine, b"k1", 1, TimeStamp::max(), @@ -353,7 +353,7 @@ pub mod tests { ); // current_ts == u64::MAX must_success( - &engine, + &mut engine, b"k1", 1, 12, @@ -365,7 +365,7 @@ pub mod tests { ); // force_sync_commit = true must_success( - &engine, + &mut engine, b"k1", 1, 12, @@ -375,13 +375,13 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, b"k1"); - must_get_rollback_protected(&engine, b"k1", 1, false); + must_unlocked(&mut engine, b"k1"); + must_get_rollback_protected(&mut engine, b"k1", 1, false); // case 2: primary is prewritten (pessimistic) - must_acquire_pessimistic_lock(&engine, b"k2", b"k2", 15, 15); + must_acquire_pessimistic_lock(&mut engine, b"k2", b"k2", 15, 15); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k2", b"v", b"k2", @@ -394,7 +394,7 @@ pub mod tests { // All following check_txn_status should return the unchanged lock information // caller_start_ts == current_ts == 0 must_success( - &engine, + &mut engine, b"k2", 15, 0, @@ -406,7 +406,7 @@ pub mod tests { ); // caller_start_ts != 0 must_success( - &engine, + &mut engine, b"k2", 15, 18, @@ -418,7 +418,7 @@ pub mod tests { ); // current_ts != 0 must_success( - &engine, + &mut engine, b"k2", 15, 0, @@ -430,7 +430,7 @@ pub mod tests { ); // caller_start_ts != 0 && current_ts != 0 must_success( - &engine, + &mut engine, b"k2", 15, 19, @@ -442,7 +442,7 @@ pub mod tests { ); // caller_start_ts == u64::MAX must_success( - &engine, + &mut engine, b"k2", 15, TimeStamp::max(), @@ -454,7 +454,7 @@ pub mod tests { ); // current_ts == u64::MAX must_success( - &engine, + &mut engine, b"k2", 15, 20, @@ -466,7 +466,7 @@ pub mod tests { ); // force_sync_commit = true must_success( - &engine, + &mut engine, b"k2", 15, 20, @@ -476,15 +476,15 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, b"k2"); - must_get_rollback_protected(&engine, b"k2", 15, true); + must_unlocked(&mut engine, b"k2"); + must_get_rollback_protected(&mut engine, b"k2", 15, true); // case 3: pessimistic transaction with two keys (large txn), secondary is // prewritten first - must_acquire_pessimistic_lock_for_large_txn(&engine, b"k3", b"k3", 20, 20, 100); - must_acquire_pessimistic_lock_for_large_txn(&engine, b"k4", b"k3", 20, 25, 100); + must_acquire_pessimistic_lock_for_large_txn(&mut engine, b"k3", b"k3", 20, 20, 100); + must_acquire_pessimistic_lock_for_large_txn(&mut engine, b"k4", b"k3", 20, 25, 100); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k4", b"v", b"k3", @@ -497,7 +497,7 @@ pub mod tests { // the client must call check_txn_status with caller_start_ts == current_ts == // 0, should not push must_success( - &engine, + &mut engine, b"k3", 20, 0, @@ -510,10 +510,10 @@ pub mod tests { // case 4: pessimistic transaction with two keys (not large txn), secondary is // prewritten first - must_acquire_pessimistic_lock_with_ttl(&engine, b"k5", b"k5", 30, 30, 100); - must_acquire_pessimistic_lock_with_ttl(&engine, b"k6", b"k5", 30, 35, 100); + must_acquire_pessimistic_lock_with_ttl(&mut engine, b"k5", b"k5", 30, 30, 100); + must_acquire_pessimistic_lock_with_ttl(&mut engine, b"k6", b"k5", 30, 35, 100); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k6", b"v", b"k5", @@ -526,7 +526,7 @@ pub mod tests { // the client must call check_txn_status with caller_start_ts == current_ts == // 0, should not push must_success( - &engine, + &mut engine, b"k5", 30, 0, @@ -543,7 +543,7 @@ pub mod tests { } fn test_check_txn_status_impl(rollback_if_not_exist: bool) { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k1", b"v1"); @@ -554,7 +554,7 @@ pub mod tests { // Try to check a not exist thing. if r { must_success( - &engine, + &mut engine, k, ts(3, 0), ts(3, 1), @@ -565,20 +565,29 @@ pub mod tests { |s| s == LockNotExist, ); // A protected rollback record will be written. - must_get_rollback_protected(&engine, k, ts(3, 0), true); + must_get_rollback_protected(&mut engine, k, ts(3, 0), true); } else { - must_err(&engine, k, ts(3, 0), ts(3, 1), ts(3, 2), r, false, false); + must_err( + &mut engine, + k, + ts(3, 0), + ts(3, 1), + ts(3, 2), + r, + false, + false, + ); } // Lock the key with TTL=100. - must_prewrite_put_for_large_txn(&engine, k, v, k, ts(5, 0), 100, 0); + must_prewrite_put_for_large_txn(&mut engine, k, v, k, ts(5, 0), 100, 0); // The initial min_commit_ts is start_ts + 1. - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(5, 1), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(5, 1), false); // CheckTxnStatus with caller_start_ts = 0 and current_ts = 0 should just return // the information of the lock without changing it. must_success( - &engine, + &mut engine, k, ts(5, 0), 0, @@ -591,7 +600,7 @@ pub mod tests { // Update min_commit_ts to current_ts. must_success( - &engine, + &mut engine, k, ts(5, 0), ts(6, 0), @@ -601,12 +610,12 @@ pub mod tests { false, uncommitted(100, ts(7, 0), true), ); - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(7, 0), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(7, 0), false); // Update min_commit_ts to caller_start_ts + 1 if current_ts < caller_start_ts. // This case should be impossible. But if it happens, we prevents it. must_success( - &engine, + &mut engine, k, ts(5, 0), ts(9, 0), @@ -616,13 +625,13 @@ pub mod tests { false, uncommitted(100, ts(9, 1), true), ); - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(9, 1), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(9, 1), false); // caller_start_ts < lock.min_commit_ts < current_ts // When caller_start_ts < lock.min_commit_ts, no need to update it, but pushed // should be true. must_success( - &engine, + &mut engine, k, ts(5, 0), ts(8, 0), @@ -632,11 +641,11 @@ pub mod tests { false, uncommitted(100, ts(9, 1), true), ); - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(9, 1), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(9, 1), false); // current_ts < lock.min_commit_ts < caller_start_ts must_success( - &engine, + &mut engine, k, ts(5, 0), ts(11, 0), @@ -646,12 +655,12 @@ pub mod tests { false, uncommitted(100, ts(11, 1), true), ); - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(11, 1), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(11, 1), false); // For same caller_start_ts and current_ts, update min_commit_ts to // caller_start_ts + 1 must_success( - &engine, + &mut engine, k, ts(5, 0), ts(12, 0), @@ -661,11 +670,11 @@ pub mod tests { false, uncommitted(100, ts(12, 1), true), ); - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(12, 1), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(12, 1), false); // Logical time is also considered in the comparing must_success( - &engine, + &mut engine, k, ts(5, 0), ts(13, 1), @@ -675,14 +684,14 @@ pub mod tests { false, uncommitted(100, ts(13, 3), true), ); - must_large_txn_locked(&engine, k, ts(5, 0), 100, ts(13, 3), false); + must_large_txn_locked(&mut engine, k, ts(5, 0), 100, ts(13, 3), false); - must_commit(&engine, k, ts(5, 0), ts(15, 0)); - must_unlocked(&engine, k); + must_commit(&mut engine, k, ts(5, 0), ts(15, 0)); + must_unlocked(&mut engine, k); // Check committed key will get the commit ts. must_success( - &engine, + &mut engine, k, ts(5, 0), ts(12, 0), @@ -692,14 +701,14 @@ pub mod tests { false, committed(ts(15, 0)), ); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); - must_prewrite_put_for_large_txn(&engine, k, v, k, ts(20, 0), 100, 0); + must_prewrite_put_for_large_txn(&mut engine, k, v, k, ts(20, 0), 100, 0); // Check a committed transaction when there is another lock. Expect getting the // commit ts. must_success( - &engine, + &mut engine, k, ts(5, 0), ts(12, 0), @@ -714,7 +723,7 @@ pub mod tests { // `rollback_if_not_exist` is set. if r { must_success( - &engine, + &mut engine, k, ts(6, 0), ts(12, 0), @@ -726,7 +735,7 @@ pub mod tests { ); // And a rollback record will be written. must_seek_write( - &engine, + &mut engine, k, ts(6, 0), ts(6, 0), @@ -734,13 +743,22 @@ pub mod tests { WriteType::Rollback, ); } else { - must_err(&engine, k, ts(6, 0), ts(12, 0), ts(12, 0), r, false, false); + must_err( + &mut engine, + k, + ts(6, 0), + ts(12, 0), + ts(12, 0), + r, + false, + false, + ); } // TTL check is based on physical time (in ms). When logical time's difference // is larger than TTL, the lock won't be resolved. must_success( - &engine, + &mut engine, k, ts(20, 0), ts(21, 105), @@ -750,11 +768,11 @@ pub mod tests { false, uncommitted(100, ts(21, 106), true), ); - must_large_txn_locked(&engine, k, ts(20, 0), 100, ts(21, 106), false); + must_large_txn_locked(&mut engine, k, ts(20, 0), 100, ts(21, 106), false); // If physical time's difference exceeds TTL, lock will be resolved. must_success( - &engine, + &mut engine, k, ts(20, 0), ts(121, 0), @@ -764,9 +782,9 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); must_seek_write( - &engine, + &mut engine, k, TimeStamp::max(), ts(20, 0), @@ -775,10 +793,10 @@ pub mod tests { ); // Push the min_commit_ts of pessimistic locks. - must_acquire_pessimistic_lock_for_large_txn(&engine, k, k, ts(4, 0), ts(130, 0), 200); - must_large_txn_locked(&engine, k, ts(4, 0), 200, ts(130, 1), true); + must_acquire_pessimistic_lock_for_large_txn(&mut engine, k, k, ts(4, 0), ts(130, 0), 200); + must_large_txn_locked(&mut engine, k, ts(4, 0), 200, ts(130, 1), true); must_success( - &engine, + &mut engine, k, ts(4, 0), ts(135, 0), @@ -788,20 +806,28 @@ pub mod tests { false, uncommitted(200, ts(135, 1), true), ); - must_large_txn_locked(&engine, k, ts(4, 0), 200, ts(135, 1), true); + must_large_txn_locked(&mut engine, k, ts(4, 0), 200, ts(135, 1), true); // Commit the key. - must_pessimistic_prewrite_put(&engine, k, v, k, ts(4, 0), ts(130, 0), DoPessimisticCheck); - must_commit(&engine, k, ts(4, 0), ts(140, 0)); - must_unlocked(&engine, k); - must_get_commit_ts(&engine, k, ts(4, 0), ts(140, 0)); + must_pessimistic_prewrite_put( + &mut engine, + k, + v, + k, + ts(4, 0), + ts(130, 0), + DoPessimisticCheck, + ); + must_commit(&mut engine, k, ts(4, 0), ts(140, 0)); + must_unlocked(&mut engine, k); + must_get_commit_ts(&mut engine, k, ts(4, 0), ts(140, 0)); // Now the transactions are intersecting: // T1: start_ts = 5, commit_ts = 15 // T2: start_ts = 20, rollback // T3: start_ts = 4, commit_ts = 140 must_success( - &engine, + &mut engine, k, ts(4, 0), ts(10, 0), @@ -812,7 +838,7 @@ pub mod tests { committed(ts(140, 0)), ); must_success( - &engine, + &mut engine, k, ts(5, 0), ts(10, 0), @@ -823,7 +849,7 @@ pub mod tests { committed(ts(15, 0)), ); must_success( - &engine, + &mut engine, k, ts(20, 0), ts(10, 0), @@ -835,9 +861,9 @@ pub mod tests { ); // Rollback expired pessimistic lock. - must_acquire_pessimistic_lock_for_large_txn(&engine, k, k, ts(150, 0), ts(150, 0), 100); + must_acquire_pessimistic_lock_for_large_txn(&mut engine, k, k, ts(150, 0), ts(150, 0), 100); must_success( - &engine, + &mut engine, k, ts(150, 0), ts(160, 0), @@ -847,9 +873,9 @@ pub mod tests { false, uncommitted(100, ts(160, 1), true), ); - must_large_txn_locked(&engine, k, ts(150, 0), 100, ts(160, 1), true); + must_large_txn_locked(&mut engine, k, ts(150, 0), 100, ts(160, 1), true); must_success( - &engine, + &mut engine, k, ts(150, 0), ts(160, 0), @@ -859,10 +885,10 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); // Rolling back a pessimistic lock should leave Rollback mark. must_seek_write( - &engine, + &mut engine, k, TimeStamp::max(), ts(150, 0), @@ -871,10 +897,10 @@ pub mod tests { ); // Rollback when current_ts is u64::max_value() - must_prewrite_put_for_large_txn(&engine, k, v, k, ts(270, 0), 100, 0); - must_large_txn_locked(&engine, k, ts(270, 0), 100, ts(270, 1), false); + must_prewrite_put_for_large_txn(&mut engine, k, v, k, ts(270, 0), 100, 0); + must_large_txn_locked(&mut engine, k, ts(270, 0), 100, ts(270, 1), false); must_success( - &engine, + &mut engine, k, ts(270, 0), ts(271, 0), @@ -884,9 +910,9 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); must_seek_write( - &engine, + &mut engine, k, TimeStamp::max(), ts(270, 0), @@ -894,10 +920,10 @@ pub mod tests { WriteType::Rollback, ); - must_acquire_pessimistic_lock_for_large_txn(&engine, k, k, ts(280, 0), ts(280, 0), 100); - must_large_txn_locked(&engine, k, ts(280, 0), 100, ts(280, 1), true); + must_acquire_pessimistic_lock_for_large_txn(&mut engine, k, k, ts(280, 0), ts(280, 0), 100); + must_large_txn_locked(&mut engine, k, ts(280, 0), 100, ts(280, 1), true); must_success( - &engine, + &mut engine, k, ts(280, 0), ts(281, 0), @@ -907,9 +933,9 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, k); + must_unlocked(&mut engine, k); must_seek_write( - &engine, + &mut engine, k, TimeStamp::max(), ts(280, 0), @@ -918,9 +944,9 @@ pub mod tests { ); // Don't push forward the min_commit_ts if the min_commit_ts of the lock is 0. - must_acquire_pessimistic_lock_with_ttl(&engine, k, k, ts(290, 0), ts(290, 0), 100); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k, k, ts(290, 0), ts(290, 0), 100); must_success( - &engine, + &mut engine, k, ts(290, 0), ts(300, 0), @@ -930,11 +956,11 @@ pub mod tests { false, uncommitted(100, TimeStamp::zero(), false), ); - must_large_txn_locked(&engine, k, ts(290, 0), 100, TimeStamp::zero(), true); - pessimistic_rollback::tests::must_success(&engine, k, ts(290, 0), ts(290, 0)); + must_large_txn_locked(&mut engine, k, ts(290, 0), 100, TimeStamp::zero(), true); + pessimistic_rollback::tests::must_success(&mut engine, k, ts(290, 0), ts(290, 0)); must_prewrite_put_impl( - &engine, + &mut engine, k, v, k, @@ -953,7 +979,7 @@ pub mod tests { kvproto::kvrpcpb::AssertionLevel::Off, ); must_success( - &engine, + &mut engine, k, ts(300, 0), ts(310, 0), @@ -963,15 +989,15 @@ pub mod tests { false, uncommitted(100, TimeStamp::zero(), false), ); - must_large_txn_locked(&engine, k, ts(300, 0), 100, TimeStamp::zero(), false); - must_rollback(&engine, k, ts(300, 0), false); + must_large_txn_locked(&mut engine, k, ts(300, 0), 100, TimeStamp::zero(), false); + must_rollback(&mut engine, k, ts(300, 0), false); - must_prewrite_put_for_large_txn(&engine, k, v, k, ts(310, 0), 100, 0); - must_large_txn_locked(&engine, k, ts(310, 0), 100, ts(310, 1), false); + must_prewrite_put_for_large_txn(&mut engine, k, v, k, ts(310, 0), 100, 0); + must_large_txn_locked(&mut engine, k, ts(310, 0), 100, ts(310, 1), false); // Don't push forward the min_commit_ts if caller_start_ts is max, but pushed // should be true. must_success( - &engine, + &mut engine, k, ts(310, 0), TimeStamp::max(), @@ -981,9 +1007,9 @@ pub mod tests { false, uncommitted(100, ts(310, 1), true), ); - must_commit(&engine, k, ts(310, 0), ts(315, 0)); + must_commit(&mut engine, k, ts(310, 0), ts(315, 0)); must_success( - &engine, + &mut engine, k, ts(310, 0), TimeStamp::max(), @@ -1003,7 +1029,7 @@ pub mod tests { #[test] fn test_check_txn_status_resolving_pessimistic_lock() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; let v = b"v1"; let ts = TimeStamp::compose; @@ -1012,7 +1038,7 @@ pub mod tests { // Path: there is no commit or rollback record, no rollback record should be // written. must_success( - &engine, + &mut engine, k, ts(3, 0), ts(3, 0), @@ -1022,17 +1048,26 @@ pub mod tests { true, |s| s == LockNotExistDoNothing, ); - must_get_rollback_ts_none(&engine, k, ts(5, 0)); + must_get_rollback_ts_none(&mut engine, k, ts(5, 0)); // Path: there is no commit or rollback record, error should be reported if // rollback_if_not_exist is set to false. - must_err(&engine, k, ts(3, 0), ts(5, 0), ts(5, 0), false, false, true); + must_err( + &mut engine, + k, + ts(3, 0), + ts(5, 0), + ts(5, 0), + false, + false, + true, + ); // Path: the pessimistic primary key lock does exist, and it's not expired yet. - must_acquire_pessimistic_lock_with_ttl(&engine, k, k, ts(10, 0), ts(10, 0), 10); - must_pessimistic_locked(&engine, k, ts(10, 0), ts(10, 0)); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k, k, ts(10, 0), ts(10, 0), 10); + must_pessimistic_locked(&mut engine, k, ts(10, 0), ts(10, 0)); must_success( - &engine, + &mut engine, k, ts(10, 0), ts(11, 0), @@ -1047,7 +1082,7 @@ pub mod tests { // primary lock will be pessimistically rolled back but there will not // be a rollback record. must_success( - &engine, + &mut engine, k, ts(10, 0), ts(21, 0), @@ -1057,13 +1092,13 @@ pub mod tests { true, |s| s == PessimisticRollBack, ); - must_unlocked(&engine, k); - must_get_rollback_ts_none(&engine, k, ts(22, 0)); + must_unlocked(&mut engine, k); + must_get_rollback_ts_none(&mut engine, k, ts(22, 0)); // Path: the prewrite primary key lock does exist, and it's not expired yet. // Should return locked status. must_prewrite_put_impl( - &engine, + &mut engine, k, v, k, @@ -1082,7 +1117,7 @@ pub mod tests { kvproto::kvrpcpb::AssertionLevel::Off, ); must_success( - &engine, + &mut engine, k, ts(30, 0), ts(31, 0), @@ -1097,7 +1132,7 @@ pub mod tests { // lock, rollback record should be written and the transaction status is // certain. must_success( - &engine, + &mut engine, k, ts(30, 0), ts(41, 0), @@ -1107,16 +1142,16 @@ pub mod tests { true, |s| s == TtlExpire, ); - must_unlocked(&engine, k); - must_get_rollback_ts(&engine, k, ts(30, 0)); + must_unlocked(&mut engine, k); + must_get_rollback_ts(&mut engine, k, ts(30, 0)); // Path: the resolving_pessimistic_lock is false and the primary key lock is // pessimistic lock, the transaction is in commit phase and the rollback // record should be written. - must_acquire_pessimistic_lock_with_ttl(&engine, k, k, ts(50, 0), ts(50, 0), 10); - must_pessimistic_locked(&engine, k, ts(50, 0), ts(50, 0)); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k, k, ts(50, 0), ts(50, 0), 10); + must_pessimistic_locked(&mut engine, k, ts(50, 0), ts(50, 0)); must_success( - &engine, + &mut engine, k, ts(50, 0), ts(61, 0), @@ -1127,7 +1162,7 @@ pub mod tests { false, |s| s == TtlExpire, ); - must_unlocked(&engine, k); - must_get_rollback_ts(&engine, k, ts(50, 0)); + must_unlocked(&mut engine, k); + must_get_rollback_ts(&mut engine, k, ts(50, 0)); } } diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index b3aa7088dc6..34d9114f48a 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -125,7 +125,7 @@ mod tests { /// to key. The full test of `RawCompareAndSwap` is in /// `src/storage/mod.rs`. fn test_cas_basic_impl() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let key = b"rk"; @@ -146,7 +146,7 @@ mod tests { ts, Context::default(), ); - let (prev_val, succeed) = sched_command(&engine, cm.clone(), cmd).unwrap(); + let (prev_val, succeed) = sched_command(&mut engine, cm.clone(), cmd).unwrap(); assert!(prev_val.is_none()); assert!(succeed); @@ -161,7 +161,7 @@ mod tests { ts, Context::default(), ); - let (prev_val, succeed) = sched_command(&engine, cm.clone(), cmd).unwrap(); + let (prev_val, succeed) = sched_command(&mut engine, cm.clone(), cmd).unwrap(); assert_eq!(prev_val, Some(b"v1".to_vec())); assert!(!succeed); @@ -176,13 +176,13 @@ mod tests { ts, Context::default(), ); - let (prev_val, succeed) = sched_command(&engine, cm, cmd).unwrap(); + let (prev_val, succeed) = sched_command(&mut engine, cm, cmd).unwrap(); assert_eq!(prev_val, Some(b"v1".to_vec())); assert!(succeed); } pub fn sched_command( - engine: &E, + engine: &mut E, cm: ConcurrencyManager, cmd: TypedCommand<(Option, bool)>, ) -> Result<(Option, bool)> { @@ -218,7 +218,7 @@ mod tests { } fn test_cas_process_write_impl() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let raw_key = b"rk"; let raw_value = b"valuek"; diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index d06218338da..2f2d123e9bb 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -752,7 +752,7 @@ pub mod test_util { // Some utils for tests that may be used in multiple source code files. pub fn prewrite_command( - engine: &E, + engine: &mut E, cm: ConcurrencyManager, statistics: &mut Statistics, cmd: TypedCommand, @@ -786,7 +786,7 @@ pub mod test_util { } pub fn prewrite( - engine: &E, + engine: &mut E, statistics: &mut Statistics, mutations: Vec, primary: Vec, @@ -806,7 +806,7 @@ pub mod test_util { } pub fn prewrite_with_cm( - engine: &E, + engine: &mut E, cm: ConcurrencyManager, statistics: &mut Statistics, mutations: Vec, @@ -828,7 +828,7 @@ pub mod test_util { } pub fn pessimistic_prewrite( - engine: &E, + engine: &mut E, statistics: &mut Statistics, mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, primary: Vec, @@ -850,7 +850,7 @@ pub mod test_util { } pub fn pessimistic_prewrite_with_cm( - engine: &E, + engine: &mut E, cm: ConcurrencyManager, statistics: &mut Statistics, mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, @@ -879,7 +879,7 @@ pub mod test_util { } pub fn commit( - engine: &E, + engine: &mut E, statistics: &mut Statistics, keys: Vec, lock_ts: u64, @@ -910,7 +910,7 @@ pub mod test_util { } pub fn rollback( - engine: &E, + engine: &mut E, statistics: &mut Statistics, keys: Vec, start_ts: u64, diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 010238426ee..837d077153e 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -119,7 +119,7 @@ pub mod tests { }; pub fn must_success( - engine: &E, + engine: &mut E, key: &[u8], start_ts: impl Into, for_update_ts: impl Into, @@ -150,60 +150,60 @@ pub mod tests { #[test] fn test_pessimistic_rollback() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; let v = b"v1"; // Normal - must_acquire_pessimistic_lock(&engine, k, k, 1, 1); - must_pessimistic_locked(&engine, k, 1, 1); - must_success(&engine, k, 1, 1); - must_unlocked(&engine, k); - must_get_commit_ts_none(&engine, k, 1); + must_acquire_pessimistic_lock(&mut engine, k, k, 1, 1); + must_pessimistic_locked(&mut engine, k, 1, 1); + must_success(&mut engine, k, 1, 1); + must_unlocked(&mut engine, k); + must_get_commit_ts_none(&mut engine, k, 1); // Pessimistic rollback is idempotent - must_success(&engine, k, 1, 1); - must_unlocked(&engine, k); - must_get_commit_ts_none(&engine, k, 1); + must_success(&mut engine, k, 1, 1); + must_unlocked(&mut engine, k); + must_get_commit_ts_none(&mut engine, k, 1); // Succeed if the lock doesn't exist. - must_success(&engine, k, 2, 2); + must_success(&mut engine, k, 2, 2); // Do nothing if meets other transaction's pessimistic lock - must_acquire_pessimistic_lock(&engine, k, k, 2, 3); - must_success(&engine, k, 1, 1); - must_success(&engine, k, 1, 2); - must_success(&engine, k, 1, 3); - must_success(&engine, k, 1, 4); - must_success(&engine, k, 3, 3); - must_success(&engine, k, 4, 4); + must_acquire_pessimistic_lock(&mut engine, k, k, 2, 3); + must_success(&mut engine, k, 1, 1); + must_success(&mut engine, k, 1, 2); + must_success(&mut engine, k, 1, 3); + must_success(&mut engine, k, 1, 4); + must_success(&mut engine, k, 3, 3); + must_success(&mut engine, k, 4, 4); // Succeed if for_update_ts is larger; do nothing if for_update_ts is smaller. - must_pessimistic_locked(&engine, k, 2, 3); - must_success(&engine, k, 2, 2); - must_pessimistic_locked(&engine, k, 2, 3); - must_success(&engine, k, 2, 4); - must_unlocked(&engine, k); + must_pessimistic_locked(&mut engine, k, 2, 3); + must_success(&mut engine, k, 2, 2); + must_pessimistic_locked(&mut engine, k, 2, 3); + must_success(&mut engine, k, 2, 4); + must_unlocked(&mut engine, k); // Do nothing if rollbacks a non-pessimistic lock. - must_prewrite_put(&engine, k, v, k, 3); - must_locked(&engine, k, 3); - must_success(&engine, k, 3, 3); - must_locked(&engine, k, 3); + must_prewrite_put(&mut engine, k, v, k, 3); + must_locked(&mut engine, k, 3); + must_success(&mut engine, k, 3, 3); + must_locked(&mut engine, k, 3); // Do nothing if meets other transaction's optimistic lock - must_success(&engine, k, 2, 2); - must_success(&engine, k, 2, 3); - must_success(&engine, k, 2, 4); - must_success(&engine, k, 4, 4); - must_locked(&engine, k, 3); + must_success(&mut engine, k, 2, 2); + must_success(&mut engine, k, 2, 3); + must_success(&mut engine, k, 2, 4); + must_success(&mut engine, k, 4, 4); + must_locked(&mut engine, k, 3); // Do nothing if committed - must_commit(&engine, k, 3, 4); - must_unlocked(&engine, k); - must_get_commit_ts(&engine, k, 3, 4); - must_success(&engine, k, 3, 3); - must_success(&engine, k, 3, 4); - must_success(&engine, k, 3, 5); + must_commit(&mut engine, k, 3, 4); + must_unlocked(&mut engine, k); + must_get_commit_ts(&mut engine, k, 3, 4); + must_success(&mut engine, k, 3, 3); + must_success(&mut engine, k, 3, 4); + must_success(&mut engine, k, 3, 5); } } diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 333d3eb1aca..be47e22e42b 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -918,9 +918,9 @@ mod tests { )); } let mut statistic = Statistics::default(); - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); prewrite( - &engine, + &mut engine, &mut statistic, vec![Mutation::make_put( Key::from_raw(&[pri_key_number]), @@ -933,7 +933,7 @@ mod tests { .unwrap(); assert_eq!(1, statistic.write.seek); let e = prewrite( - &engine, + &mut engine, &mut statistic, mutations.clone(), pri_key.to_vec(), @@ -948,7 +948,7 @@ mod tests { _ => panic!("error type not match"), } commit( - &engine, + &mut engine, &mut statistic, vec![Key::from_raw(&[pri_key_number])], 99, @@ -957,7 +957,7 @@ mod tests { .unwrap(); assert_eq!(3, statistic.write.seek); let e = prewrite( - &engine, + &mut engine, &mut statistic, mutations.clone(), pri_key.to_vec(), @@ -973,7 +973,7 @@ mod tests { _ => panic!("error type not match"), } let e = prewrite( - &engine, + &mut engine, &mut statistic, mutations.clone(), pri_key.to_vec(), @@ -997,7 +997,7 @@ mod tests { ) .unwrap(); prewrite( - &engine, + &mut engine, &mut statistic, mutations.clone(), pri_key.to_vec(), @@ -1008,7 +1008,7 @@ mod tests { // All keys are prewritten successful with only one seek operations. assert_eq!(1, statistic.write.seek); let keys: Vec = mutations.iter().map(|m| m.key().clone()).collect(); - commit(&engine, &mut statistic, keys.clone(), 104, 105).unwrap(); + commit(&mut engine, &mut statistic, keys.clone(), 104, 105).unwrap(); let snap = engine.snapshot(Default::default()).unwrap(); for k in keys { let v = snap.get_cf(CF_WRITE, &k.append_ts(105.into())).unwrap(); @@ -1040,11 +1040,11 @@ mod tests { b"100".to_vec(), )); } - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let keys: Vec = mutations.iter().map(|m| m.key().clone()).collect(); let mut statistic = Statistics::default(); prewrite( - &engine, + &mut engine, &mut statistic, mutations.clone(), pri_key.to_vec(), @@ -1053,10 +1053,10 @@ mod tests { ) .unwrap(); // Rollback to make tombstones in lock-cf. - rollback(&engine, &mut statistic, keys, 100).unwrap(); + rollback(&mut engine, &mut statistic, keys, 100).unwrap(); // Gc rollback flags store in write-cf to make sure the next prewrite operation // will skip seek write cf. - gc_by_compact(&engine, pri_key, 101); + gc_by_compact(&mut engine, pri_key, 101); set_perf_level(PerfLevel::EnableTimeExceptForMutex); let perf = ReadPerfInstant::new(); let mut statistic = Statistics::default(); @@ -1064,7 +1064,7 @@ mod tests { mutations.pop(); } prewrite( - &engine, + &mut engine, &mut statistic, mutations, pri_key.to_vec(), @@ -1081,7 +1081,7 @@ mod tests { fn test_prewrite_1pc() { use crate::storage::mvcc::tests::{must_get, must_get_commit_ts, must_unlocked}; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let key = b"k"; @@ -1090,7 +1090,7 @@ mod tests { let mut statistics = Statistics::default(); prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations, @@ -1099,9 +1099,9 @@ mod tests { Some(15), ) .unwrap(); - must_unlocked(&engine, key); - must_get(&engine, key, 12, value); - must_get_commit_ts(&engine, key, 10, 11); + must_unlocked(&mut engine, key); + must_get(&mut engine, key, 12, value); + must_get_commit_ts(&mut engine, key, 10, 11); cm.update_max_ts(50.into()); @@ -1111,7 +1111,7 @@ mod tests { // Test the idempotency of prewrite when falling back to 2PC. for _ in 0..2 { let res = prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations.clone(), @@ -1122,17 +1122,17 @@ mod tests { .unwrap(); assert!(res.min_commit_ts.is_zero()); assert!(res.one_pc_commit_ts.is_zero()); - must_locked(&engine, key, 20); + must_locked(&mut engine, key, 20); } - must_rollback(&engine, key, 20, false); + must_rollback(&mut engine, key, 20, false); let mutations = vec![ Mutation::make_put(Key::from_raw(key), value.to_vec()), Mutation::make_check_not_exists(Key::from_raw(b"non_exist")), ]; let mut statistics = Statistics::default(); prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations, @@ -1150,7 +1150,7 @@ mod tests { // Lock k2. let mut statistics = Statistics::default(); prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, vec![Mutation::make_put(Key::from_raw(k2), v2.to_vec())], @@ -1165,7 +1165,7 @@ mod tests { Mutation::make_put(Key::from_raw(k2), v2.to_vec()), ]; prewrite_with_cm( - &engine, + &mut engine, cm, &mut statistics, mutations, @@ -1174,20 +1174,20 @@ mod tests { Some(70), ) .unwrap_err(); - must_unlocked(&engine, k1); - must_locked(&engine, k2, 50); - must_get_commit_ts_none(&engine, k1, 60); - must_get_commit_ts_none(&engine, k2, 60); + must_unlocked(&mut engine, k1); + must_locked(&mut engine, k2, 50); + must_get_commit_ts_none(&mut engine, k1, 60); + must_get_commit_ts_none(&mut engine, k2, 60); } #[test] fn test_prewrite_pessimsitic_1pc() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let key = b"k"; let value = b"v"; - must_acquire_pessimistic_lock(&engine, key, key, 10, 10); + must_acquire_pessimistic_lock(&mut engine, key, key, 10, 10); let mutations = vec![( Mutation::make_put(Key::from_raw(key), value.to_vec()), @@ -1195,7 +1195,7 @@ mod tests { )]; let mut statistics = Statistics::default(); pessimistic_prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations, @@ -1206,14 +1206,14 @@ mod tests { ) .unwrap(); - must_unlocked(&engine, key); - must_get(&engine, key, 12, value); - must_get_commit_ts(&engine, key, 10, 11); + must_unlocked(&mut engine, key); + must_get(&mut engine, key, 12, value); + must_get_commit_ts(&mut engine, key, 10, 11); let (k1, v1) = (b"k", b"v"); let (k2, v2) = (b"k2", b"v2"); - must_acquire_pessimistic_lock(&engine, k1, k1, 8, 12); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 8, 12); let mutations = vec![ ( @@ -1227,7 +1227,7 @@ mod tests { ]; statistics = Statistics::default(); pessimistic_prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations, @@ -1238,15 +1238,15 @@ mod tests { ) .unwrap(); - must_unlocked(&engine, k1); - must_unlocked(&engine, k2); - must_get(&engine, k1, 16, v1); - must_get(&engine, k2, 16, v2); - must_get_commit_ts(&engine, k1, 8, 13); - must_get_commit_ts(&engine, k2, 8, 13); + must_unlocked(&mut engine, k1); + must_unlocked(&mut engine, k2); + must_get(&mut engine, k1, 16, v1); + must_get(&mut engine, k2, 16, v2); + must_get_commit_ts(&mut engine, k1, 8, 13); + must_get_commit_ts(&mut engine, k2, 8, 13); cm.update_max_ts(50.into()); - must_acquire_pessimistic_lock(&engine, k1, k1, 20, 20); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 20, 20); let mutations = vec![( Mutation::make_put(Key::from_raw(k1), v1.to_vec()), @@ -1254,7 +1254,7 @@ mod tests { )]; statistics = Statistics::default(); let res = pessimistic_prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations, @@ -1266,9 +1266,9 @@ mod tests { .unwrap(); assert!(res.min_commit_ts.is_zero()); assert!(res.one_pc_commit_ts.is_zero()); - must_locked(&engine, k1, 20); + must_locked(&mut engine, k1, 20); - must_rollback(&engine, k1, 20, true); + must_rollback(&mut engine, k1, 20, true); // Test a 1PC request should not be partially written when encounters error on // the halfway. If some of the keys are successfully written as committed state, @@ -1277,7 +1277,7 @@ mod tests { // Lock k2 with a optimistic lock. let mut statistics = Statistics::default(); prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, vec![Mutation::make_put(Key::from_raw(k2), v2.to_vec())], @@ -1297,9 +1297,9 @@ mod tests { SkipPessimisticCheck, ), ]; - must_acquire_pessimistic_lock(&engine, k1, k1, 60, 60); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 60, 60); pessimistic_prewrite_with_cm( - &engine, + &mut engine, cm, &mut statistics, mutations, @@ -1309,15 +1309,15 @@ mod tests { Some(70), ) .unwrap_err(); - must_pessimistic_locked(&engine, k1, 60, 60); - must_locked(&engine, k2, 50); - must_get_commit_ts_none(&engine, k1, 60); - must_get_commit_ts_none(&engine, k2, 60); + must_pessimistic_locked(&mut engine, k1, 60, 60); + must_locked(&mut engine, k2, 50); + must_get_commit_ts_none(&mut engine, k1, 60); + must_get_commit_ts_none(&mut engine, k2, 60); } #[test] fn test_prewrite_async_commit() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let key = b"k"; @@ -1340,10 +1340,10 @@ mod tests { Context::default(), ); - let res = prewrite_command(&engine, cm.clone(), &mut statistics, cmd).unwrap(); + let res = prewrite_command(&mut engine, cm.clone(), &mut statistics, cmd).unwrap(); assert!(!res.min_commit_ts.is_zero()); assert_eq!(res.one_pc_commit_ts, TimeStamp::zero()); - must_locked(&engine, key, 10); + must_locked(&mut engine, key, 10); cm.update_max_ts(50.into()); @@ -1373,23 +1373,23 @@ mod tests { Context::default(), ); - let res = prewrite_command(&engine, cm.clone(), &mut statistics, cmd).unwrap(); + let res = prewrite_command(&mut engine, cm.clone(), &mut statistics, cmd).unwrap(); assert!(res.min_commit_ts.is_zero()); assert!(res.one_pc_commit_ts.is_zero()); - assert!(!must_locked(&engine, k1, 20).use_async_commit); - assert!(!must_locked(&engine, k2, 20).use_async_commit); + assert!(!must_locked(&mut engine, k1, 20).use_async_commit); + assert!(!must_locked(&mut engine, k2, 20).use_async_commit); } } #[test] fn test_prewrite_pessimsitic_async_commit() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let key = b"k"; let value = b"v"; - must_acquire_pessimistic_lock(&engine, key, key, 10, 10); + must_acquire_pessimistic_lock(&mut engine, key, key, 10, 10); let mutations = vec![( Mutation::make_put(Key::from_raw(key), value.to_vec()), @@ -1411,18 +1411,18 @@ mod tests { Context::default(), ); - let res = prewrite_command(&engine, cm.clone(), &mut statistics, cmd).unwrap(); + let res = prewrite_command(&mut engine, cm.clone(), &mut statistics, cmd).unwrap(); assert!(!res.min_commit_ts.is_zero()); assert_eq!(res.one_pc_commit_ts, TimeStamp::zero()); - must_locked(&engine, key, 10); + must_locked(&mut engine, key, 10); cm.update_max_ts(50.into()); let (k1, v1) = (b"k1", b"v1"); let (k2, v2) = (b"k2", b"v2"); - must_acquire_pessimistic_lock(&engine, k1, k1, 20, 20); - must_acquire_pessimistic_lock(&engine, k2, k1, 20, 20); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 20, 20); + must_acquire_pessimistic_lock(&mut engine, k2, k1, 20, 20); let mutations = vec![ ( @@ -1451,11 +1451,11 @@ mod tests { Context::default(), ); - let res = prewrite_command(&engine, cm, &mut statistics, cmd).unwrap(); + let res = prewrite_command(&mut engine, cm, &mut statistics, cmd).unwrap(); assert!(res.min_commit_ts.is_zero()); assert!(res.one_pc_commit_ts.is_zero()); - assert!(!must_locked(&engine, k1, 20).use_async_commit); - assert!(!must_locked(&engine, k2, 20).use_async_commit); + assert!(!must_locked(&mut engine, k1, 20).use_async_commit); + assert!(!must_locked(&mut engine, k2, 20).use_async_commit); } #[test] @@ -1675,7 +1675,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: case.async_apply_prewrite, }; - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); assert_eq!(result.response_policy, case.expected); @@ -1685,7 +1685,7 @@ mod tests { // this test for prewrite with should_not_exist flag #[test] fn test_prewrite_should_not_exist() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); // concurency_manager.max_tx = 5 let cm = ConcurrencyManager::new(5.into()); let mut statistics = Statistics::default(); @@ -1693,12 +1693,12 @@ mod tests { let (key, value) = (b"k", b"val"); // T1: start_ts = 3, commit_ts = 5, put key:value - must_prewrite_put(&engine, key, value, key, 3); - must_commit(&engine, key, 3, 5); + must_prewrite_put(&mut engine, key, value, key, 3); + must_commit(&mut engine, key, 3, 5); // T2: start_ts = 15, prewrite on k, with should_not_exist flag set. let res = prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, vec![Mutation::make_check_not_exists(Key::from_raw(key))], @@ -1718,12 +1718,12 @@ mod tests { // T3: start_ts = 8, commit_ts = max_ts + 1 = 16, prewrite a DELETE operation on // k - must_prewrite_delete(&engine, key, key, 8); - must_commit(&engine, key, 8, cm.max_ts().into_inner() + 1); + must_prewrite_delete(&mut engine, key, key, 8); + must_commit(&mut engine, key, 8, cm.max_ts().into_inner() + 1); // T1: start_ts = 10, repeatedly prewrite on k, with should_not_exist flag set let res = prewrite_with_cm( - &engine, + &mut engine, cm, &mut statistics, vec![Mutation::make_check_not_exists(Key::from_raw(key))], @@ -1742,15 +1742,15 @@ mod tests { #[test] fn test_optimistic_prewrite_committed_transaction() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); let mut statistics = Statistics::default(); let key = b"k"; // T1: start_ts = 5, commit_ts = 10, async commit - must_prewrite_put_async_commit(&engine, key, b"v1", key, &Some(vec![]), 5, 10); - must_commit(&engine, key, 5, 10); + must_prewrite_put_async_commit(&mut engine, key, b"v1", key, &Some(vec![]), 5, 10); + must_commit(&mut engine, key, 5, 10); // T2: start_ts = 15, commit_ts = 16, 1PC let cmd = Prewrite::with_1pc( @@ -1759,12 +1759,12 @@ mod tests { 15.into(), TimeStamp::default(), ); - let result = prewrite_command(&engine, cm.clone(), &mut statistics, cmd).unwrap(); + let result = prewrite_command(&mut engine, cm.clone(), &mut statistics, cmd).unwrap(); let one_pc_commit_ts = result.one_pc_commit_ts; // T3 is after T1 and T2 - must_prewrite_put(&engine, key, b"v3", key, 20); - must_commit(&engine, key, 20, 25); + must_prewrite_put(&mut engine, key, b"v3", key, 20); + must_commit(&mut engine, key, 20, 25); // Repeating the T1 prewrite request let cmd = Prewrite::new( @@ -1831,16 +1831,16 @@ mod tests { #[test] fn test_pessimistic_prewrite_committed_transaction() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); let mut statistics = Statistics::default(); let key = b"k"; // T1: start_ts = 5, commit_ts = 10, async commit - must_acquire_pessimistic_lock(&engine, key, key, 5, 5); + must_acquire_pessimistic_lock(&mut engine, key, key, 5, 5); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, key, b"v1", key, @@ -1850,10 +1850,10 @@ mod tests { DoPessimisticCheck, 10, ); - must_commit(&engine, key, 5, 10); + must_commit(&mut engine, key, 5, 10); // T2: start_ts = 15, commit_ts = 16, 1PC - must_acquire_pessimistic_lock(&engine, key, key, 15, 15); + must_acquire_pessimistic_lock(&mut engine, key, key, 15, 15); let cmd = PrewritePessimistic::with_1pc( vec![( Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), @@ -1864,12 +1864,12 @@ mod tests { 15.into(), TimeStamp::default(), ); - let result = prewrite_command(&engine, cm.clone(), &mut statistics, cmd).unwrap(); + let result = prewrite_command(&mut engine, cm.clone(), &mut statistics, cmd).unwrap(); let one_pc_commit_ts = result.one_pc_commit_ts; // T3 is after T1 and T2 - must_prewrite_put(&engine, key, b"v3", key, 20); - must_commit(&engine, key, 20, 25); + must_prewrite_put(&mut engine, key, b"v3", key, 20); + must_commit(&mut engine, key, 20, 25); // Repeating the T1 prewrite request let cmd = PrewritePessimistic::new( @@ -1943,11 +1943,11 @@ mod tests { #[test] fn test_repeated_pessimistic_prewrite_1pc() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); let mut statistics = Statistics::default(); - must_acquire_pessimistic_lock(&engine, b"k2", b"k2", 5, 5); + must_acquire_pessimistic_lock(&mut engine, b"k2", b"k2", 5, 5); // The second key needs a pessimistic lock let mutations = vec![ ( @@ -1960,7 +1960,7 @@ mod tests { ), ]; let res = pessimistic_prewrite_with_cm( - &engine, + &mut engine, cm.clone(), &mut statistics, mutations.clone(), @@ -1974,7 +1974,7 @@ mod tests { cm.update_max_ts(commit_ts.next()); // repeate the prewrite let res = pessimistic_prewrite_with_cm( - &engine, + &mut engine, cm, &mut statistics, mutations, @@ -1986,48 +1986,52 @@ mod tests { .unwrap(); // The new commit ts should be same as before. assert_eq!(res.one_pc_commit_ts, commit_ts); - must_seek_write(&engine, b"k1", 100, 5, commit_ts, WriteType::Put); - must_seek_write(&engine, b"k2", 100, 5, commit_ts, WriteType::Put); + must_seek_write(&mut engine, b"k1", 100, 5, commit_ts, WriteType::Put); + must_seek_write(&mut engine, b"k2", 100, 5, commit_ts, WriteType::Put); } #[test] fn test_repeated_prewrite_non_pessimistic_lock() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); let mut statistics = Statistics::default(); let cm = &cm; - let mut prewrite_with_retry_flag = - |key: &[u8], - value: &[u8], - pk: &[u8], - secondary_keys, - ts: u64, - pessimistic_action, - is_retry_request| { - let mutation = Mutation::make_put(Key::from_raw(key), value.to_vec()); - let mut ctx = Context::default(); - ctx.set_is_retry_request(is_retry_request); - let cmd = PrewritePessimistic::new( - vec![(mutation, pessimistic_action)], - pk.to_vec(), - ts.into(), - 100, - ts.into(), - 1, - (ts + 1).into(), - 0.into(), - secondary_keys, - false, - AssertionLevel::Off, - ctx, - ); - prewrite_command(&engine, cm.clone(), &mut statistics, cmd) - }; + fn prewrite_with_retry_flag( + key: &[u8], + value: &[u8], + pk: &[u8], + secondary_keys: Option>>, + ts: u64, + pessimistic_action: PrewriteRequestPessimisticAction, + is_retry_request: bool, + engine: &mut E, + cm: &ConcurrencyManager, + statistics: &mut Statistics, + ) -> Result { + let mutation = Mutation::make_put(Key::from_raw(key), value.to_vec()); + let mut ctx = Context::default(); + ctx.set_is_retry_request(is_retry_request); + let cmd = PrewritePessimistic::new( + vec![(mutation, pessimistic_action)], + pk.to_vec(), + ts.into(), + 100, + ts.into(), + 1, + (ts + 1).into(), + 0.into(), + secondary_keys, + false, + AssertionLevel::Off, + ctx, + ); + prewrite_command(engine, cm.clone(), statistics, cmd) + } - must_acquire_pessimistic_lock(&engine, b"k1", b"k1", 10, 10); + must_acquire_pessimistic_lock(&mut engine, b"k1", b"k1", 10, 10); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k1", b"v1", b"k1", @@ -2038,7 +2042,7 @@ mod tests { 15, ); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -2050,8 +2054,8 @@ mod tests { ); // The transaction may be committed by another reader. - must_commit(&engine, b"k1", 10, 20); - must_commit(&engine, b"k2", 10, 20); + must_commit(&mut engine, b"k1", 10, 20); + must_commit(&mut engine, b"k2", 10, 20); // This is a re-sent prewrite. prewrite_with_retry_flag( @@ -2062,21 +2066,24 @@ mod tests { 10, SkipPessimisticCheck, true, + &mut engine, + cm, + &mut statistics, ) .unwrap(); // Commit repeatedly, these operations should have no effect. - must_commit(&engine, b"k1", 10, 25); - must_commit(&engine, b"k2", 10, 25); + must_commit(&mut engine, b"k1", 10, 25); + must_commit(&mut engine, b"k2", 10, 25); // Seek from 30, we should read commit_ts = 20 instead of 25. - must_seek_write(&engine, b"k1", 30, 10, 20, WriteType::Put); - must_seek_write(&engine, b"k2", 30, 10, 20, WriteType::Put); + must_seek_write(&mut engine, b"k1", 30, 10, 20, WriteType::Put); + must_seek_write(&mut engine, b"k2", 30, 10, 20, WriteType::Put); // Write another version to the keys. - must_prewrite_put(&engine, b"k1", b"v11", b"k1", 35); - must_prewrite_put(&engine, b"k2", b"v22", b"k1", 35); - must_commit(&engine, b"k1", 35, 40); - must_commit(&engine, b"k2", 35, 40); + must_prewrite_put(&mut engine, b"k1", b"v11", b"k1", 35); + must_prewrite_put(&mut engine, b"k2", b"v22", b"k1", 35); + must_commit(&mut engine, b"k1", 35, 40); + must_commit(&mut engine, b"k2", 35, 40); // A retrying non-pessimistic-lock prewrite request should not skip constraint // checks. Here it should take no effect, even there's already a newer version @@ -2089,37 +2096,72 @@ mod tests { 10, SkipPessimisticCheck, true, + &mut engine, + cm, + &mut statistics, ) .unwrap(); - must_unlocked(&engine, b"k2"); + must_unlocked(&mut engine, b"k2"); - prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, SkipPessimisticCheck, true) - .unwrap(); - must_unlocked(&engine, b"k2"); + prewrite_with_retry_flag( + b"k2", + b"v2", + b"k1", + None, + 10, + SkipPessimisticCheck, + true, + &mut engine, + cm, + &mut statistics, + ) + .unwrap(); + must_unlocked(&mut engine, b"k2"); // Committing still does nothing. - must_commit(&engine, b"k2", 10, 25); + must_commit(&mut engine, b"k2", 10, 25); // Try a different txn start ts (which haven't been successfully committed // before). It should report a PessimisticLockNotFound. - let err = - prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 11, SkipPessimisticCheck, true) - .unwrap_err(); + let err = prewrite_with_retry_flag( + b"k2", + b"v2", + b"k1", + None, + 11, + SkipPessimisticCheck, + true, + &mut engine, + cm, + &mut statistics, + ) + .unwrap_err(); assert!(matches!( err, Error(box ErrorInner::Mvcc(MvccError( box MvccErrorInner::PessimisticLockNotFound { .. } ))) )); - must_unlocked(&engine, b"k2"); + must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request // arriving. - prewrite_with_retry_flag(b"k2", b"v2", b"k1", None, 10, SkipPessimisticCheck, false) - .unwrap(); - must_locked(&engine, b"k2", 10); + prewrite_with_retry_flag( + b"k2", + b"v2", + b"k1", + None, + 10, + SkipPessimisticCheck, + false, + &mut engine, + cm, + &mut statistics, + ) + .unwrap(); + must_locked(&mut engine, b"k2", 10); } #[test] fn test_prewrite_rolledback_transaction() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); let mut statistics = Statistics::default(); @@ -2128,10 +2170,10 @@ mod tests { let v2 = b"v2"; // Test the write conflict path. - must_acquire_pessimistic_lock(&engine, k1, v1, 1, 1); - must_rollback(&engine, k1, 1, true); - must_prewrite_put(&engine, k1, v2, k1, 5); - must_commit(&engine, k1, 5, 6); + must_acquire_pessimistic_lock(&mut engine, k1, v1, 1, 1); + must_rollback(&mut engine, k1, 1, true); + must_prewrite_put(&mut engine, k1, v2, k1, 5); + must_commit(&mut engine, k1, 5, 6); let prewrite_cmd = Prewrite::new( vec![Mutation::make_put(Key::from_raw(k1), v1.to_vec())], k1.to_vec(), @@ -2157,9 +2199,9 @@ mod tests { assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); // Test the pessimistic lock is not found path. - must_acquire_pessimistic_lock(&engine, k1, v1, 10, 10); - must_rollback(&engine, k1, 10, true); - must_acquire_pessimistic_lock(&engine, k1, v1, 15, 15); + must_acquire_pessimistic_lock(&mut engine, k1, v1, 10, 10); + must_rollback(&mut engine, k1, 10, true); + must_acquire_pessimistic_lock(&mut engine, k1, v1, 15, 15); let prewrite_cmd = PrewritePessimistic::with_defaults( vec![( Mutation::make_put(Key::from_raw(k1), v1.to_vec()), @@ -2182,7 +2224,7 @@ mod tests { #[test] fn test_assertion_fail_on_conflicting_index_key() { - let engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); // Simulate two transactions that tries to insert the same row with a secondary // index, and the second one canceled the first one (by rolling back its lock). @@ -2192,13 +2234,18 @@ mod tests { let t2_commit_ts = TimeStamp::compose(3, 0); // txn1 acquires lock on the row key. - must_acquire_pessimistic_lock(&engine, b"row", b"row", t1_start_ts, t1_start_ts); + must_acquire_pessimistic_lock(&mut engine, b"row", b"row", t1_start_ts, t1_start_ts); // txn2 rolls it back. - let err = - must_acquire_pessimistic_lock_err(&engine, b"row", b"row", t2_start_ts, t2_start_ts); + let err = must_acquire_pessimistic_lock_err( + &mut engine, + b"row", + b"row", + t2_start_ts, + t2_start_ts, + ); assert!(matches!(err, MvccError(box MvccErrorInner::KeyIsLocked(_)))); must_check_txn_status( - &engine, + &mut engine, b"row", t1_start_ts, t2_start_ts, @@ -2209,9 +2256,9 @@ mod tests { |status| status == TxnStatus::PessimisticRollBack, ); // And then txn2 acquire continues and finally commits - must_acquire_pessimistic_lock(&engine, b"row", b"row", t2_start_ts, t2_start_ts); + must_acquire_pessimistic_lock(&mut engine, b"row", b"row", t2_start_ts, t2_start_ts); must_prewrite_put_impl( - &engine, + &mut engine, b"row", b"value", b"row", @@ -2228,7 +2275,7 @@ mod tests { AssertionLevel::Strict, ); must_prewrite_put_impl( - &engine, + &mut engine, b"index", b"value", b"row", @@ -2244,8 +2291,8 @@ mod tests { Assertion::NotExist, AssertionLevel::Strict, ); - must_commit(&engine, b"row", t2_start_ts, t2_commit_ts); - must_commit(&engine, b"index", t2_start_ts, t2_commit_ts); + must_commit(&mut engine, b"row", t2_start_ts, t2_commit_ts); + must_commit(&mut engine, b"index", t2_start_ts, t2_commit_ts); // Txn1 continues. If the two keys are sent in the single prewrite request, the // AssertionFailed error won't be returned since there are other error. @@ -2267,7 +2314,7 @@ mod tests { t1_start_ts, t2_start_ts, ); - let err = prewrite_command(&engine, cm.clone(), &mut stat, cmd).unwrap_err(); + let err = prewrite_command(&mut engine, cm.clone(), &mut stat, cmd).unwrap_err(); assert!(matches!( err, Error(box ErrorInner::Mvcc(MvccError( @@ -2290,7 +2337,7 @@ mod tests { t1_start_ts, t2_start_ts, ); - let err = prewrite_command(&engine, cm, &mut stat, cmd).unwrap_err(); + let err = prewrite_command(&mut engine, cm, &mut stat, cmd).unwrap_err(); assert!(matches!( err, Error(box ErrorInner::Mvcc(MvccError( @@ -2301,7 +2348,7 @@ mod tests { // If the two keys are sent in different requests, it would be the client's duty // to ignore the assertion error. let err = must_prewrite_put_err_impl( - &engine, + &mut engine, b"row", b"value", b"row", @@ -2319,7 +2366,7 @@ mod tests { MvccError(box MvccErrorInner::PessimisticLockNotFound { .. }) )); let err = must_prewrite_put_err_impl( - &engine, + &mut engine, b"index", b"value", b"row", @@ -2340,19 +2387,19 @@ mod tests { #[test] fn test_prewrite_committed_encounter_newer_lock() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let mut statistics = Statistics::default(); let k1 = b"k1"; let v1 = b"v1"; let v2 = b"v2"; - must_prewrite_put_async_commit(&engine, k1, v1, k1, &Some(vec![]), 5, 10); + must_prewrite_put_async_commit(&mut engine, k1, v1, k1, &Some(vec![]), 5, 10); // This commit may actually come from a ResolveLock command - must_commit(&engine, k1, 5, 15); + must_commit(&mut engine, k1, 5, 15); // Another transaction prewrites - must_prewrite_put(&engine, k1, v2, k1, 20); + must_prewrite_put(&mut engine, k1, v2, k1, 20); // A retried prewrite of the first transaction should be idempotent. let prewrite_cmd = Prewrite::new( @@ -2389,14 +2436,14 @@ mod tests { #[test] fn test_repeated_prewrite_commit_ts_too_large() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = ConcurrencyManager::new(1.into()); let mut statistics = Statistics::default(); // First, prewrite and commit normally. - must_acquire_pessimistic_lock(&engine, b"k1", b"k1", 5, 10); + must_acquire_pessimistic_lock(&mut engine, b"k1", b"k1", 5, 10); must_pessimistic_prewrite_put_async_commit( - &engine, + &mut engine, b"k1", b"v1", b"k1", @@ -2407,7 +2454,7 @@ mod tests { 15, ); must_prewrite_put_impl( - &engine, + &mut engine, b"k2", b"v2", b"k1", @@ -2423,8 +2470,8 @@ mod tests { Assertion::None, AssertionLevel::Off, ); - must_commit(&engine, b"k1", 5, 18); - must_commit(&engine, b"k2", 5, 18); + must_commit(&mut engine, b"k1", 5, 18); + must_commit(&mut engine, b"k2", 5, 18); // Update max_ts to be larger than the max_commit_ts. cm.update_max_ts(50.into()); @@ -2446,9 +2493,9 @@ mod tests { AssertionLevel::Off, Context::default(), ); - let res = prewrite_command(&engine, cm, &mut statistics, cmd).unwrap(); + let res = prewrite_command(&mut engine, cm, &mut statistics, cmd).unwrap(); // It should return the real commit TS as the min_commit_ts in the result. assert_eq!(res.min_commit_ts, 18.into(), "{:?}", res); - must_unlocked(&engine, b"k2"); + must_unlocked(&mut engine, b"k2"); } } diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index 7e93e77dee6..fc3846931f3 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -81,15 +81,15 @@ mod tests { #[test] fn rollback_lock_with_existing_rollback() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k1, k2) = (b"k1", b"k2"); let v = b"v"; - must_acquire_pessimistic_lock(&engine, k1, k1, 10, 10); - must_rollback(&engine, k1, 10, false); - must_rollback(&engine, k2, 10, false); + must_acquire_pessimistic_lock(&mut engine, k1, k1, 10, 10); + must_rollback(&mut engine, k1, 10, false); + must_rollback(&mut engine, k2, 10, false); - must_pessimistic_prewrite_put(&engine, k2, v, k1, 10, 10, SkipPessimisticCheck); - must_rollback(&engine, k2, 10, false); + must_pessimistic_prewrite_put(&mut engine, k2, v, k1, 10, 10, SkipPessimisticCheck); + must_rollback(&mut engine, k2, 10, false); } } diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index 2149d5571da..70c13a20c26 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -113,7 +113,7 @@ pub mod tests { }; pub fn must_success( - engine: &E, + engine: &mut E, primary_key: &[u8], start_ts: impl Into, advise_ttl: u64, @@ -154,7 +154,7 @@ pub mod tests { } pub fn must_err( - engine: &E, + engine: &mut E, primary_key: &[u8], start_ts: impl Into, advise_ttl: u64, @@ -188,50 +188,50 @@ pub mod tests { #[test] fn test_txn_heart_beat() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k1", b"v1"); - let test = |ts| { + fn test(ts: u64, k: &[u8], engine: &mut impl Engine) { // Do nothing if advise_ttl is less smaller than current TTL. - must_success(&engine, k, ts, 90, 100); + must_success(engine, k, ts, 90, 100); // Return the new TTL if the TTL when the TTL is updated. - must_success(&engine, k, ts, 110, 110); + must_success(engine, k, ts, 110, 110); // The lock's TTL is updated and persisted into the db. - must_success(&engine, k, ts, 90, 110); + must_success(engine, k, ts, 90, 110); // Heart beat another transaction's lock will lead to an error. - must_err(&engine, k, ts - 1, 150); - must_err(&engine, k, ts + 1, 150); + must_err(engine, k, ts - 1, 150); + must_err(engine, k, ts + 1, 150); // The existing lock is not changed. - must_success(&engine, k, ts, 90, 110); - }; + must_success(engine, k, ts, 90, 110); + } // No lock. - must_err(&engine, k, 5, 100); + must_err(&mut engine, k, 5, 100); // Create a lock with TTL=100. // The initial TTL will be set to 0 after calling must_prewrite_put. Update it // first. - must_prewrite_put(&engine, k, v, k, 5); - must_locked(&engine, k, 5); - must_success(&engine, k, 5, 100, 100); + must_prewrite_put(&mut engine, k, v, k, 5); + must_locked(&mut engine, k, 5); + must_success(&mut engine, k, 5, 100, 100); - test(5); + test(5, k, &mut engine); - must_locked(&engine, k, 5); - must_commit(&engine, k, 5, 10); - must_unlocked(&engine, k); + must_locked(&mut engine, k, 5); + must_commit(&mut engine, k, 5, 10); + must_unlocked(&mut engine, k); // No lock. - must_err(&engine, k, 5, 100); - must_err(&engine, k, 10, 100); + must_err(&mut engine, k, 5, 100); + must_err(&mut engine, k, 10, 100); - must_acquire_pessimistic_lock(&engine, k, k, 8, 15); - must_pessimistic_locked(&engine, k, 8, 15); - must_success(&engine, k, 8, 100, 100); + must_acquire_pessimistic_lock(&mut engine, k, k, 8, 15); + must_pessimistic_locked(&mut engine, k, 8, 15); + must_success(&mut engine, k, 8, 100, 100); - test(8); + test(8, k, &mut engine); - must_pessimistic_locked(&engine, k, 8, 15); + must_pessimistic_locked(&mut engine, k, 8, 15); } } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index a7c38e147ee..c3967820b34 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -473,7 +473,7 @@ impl Scheduler { .pool .spawn(async move { match unsafe { - with_tls_engine(|engine: &E| engine.precheck_write_with_ctx(&cmd_ctx)) + with_tls_engine(|engine: &mut E| engine.precheck_write_with_ctx(&cmd_ctx)) } { // Precheck failed, try to return err early. Err(e) => { @@ -577,7 +577,8 @@ impl Scheduler { } // The program is currently in scheduler worker threads. // Safety: `self.inner.worker_pool` should ensure that a TLS engine exists. - match unsafe { with_tls_engine(|engine: &E| kv::snapshot(engine, snap_ctx)) }.await + match unsafe { with_tls_engine(|engine: &mut E| kv::snapshot(engine, snap_ctx)) } + .await { Ok(snapshot) => { SCHED_STAGE_COUNTER_VEC.get(tag).snapshot_ok.inc(); @@ -943,7 +944,7 @@ impl Scheduler { { // Safety: `self.sched_pool` ensures a TLS engine exists. unsafe { - with_tls_engine(|engine: &E| { + with_tls_engine(|engine: &mut E| { // We skip writing the raftstore, but to improve CDC old value hit rate, // we should send the old values to the CDC scheduler. engine.schedule_txn_extra(to_be_write.extra); @@ -1152,7 +1153,7 @@ impl Scheduler { // Safety: `self.sched_pool` ensures a TLS engine exists. unsafe { - with_tls_engine(|engine: &E| { + with_tls_engine(|engine: &mut E| { if let Err(e) = engine.async_write_ext(&ctx, to_be_write, engine_cb, proposed_cb, committed_cb) { diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 7300074bfde..b2f25cff640 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -663,7 +663,7 @@ mod tests { impl TestStore { fn new(key_num: u64) -> TestStore { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let keys: Vec = (START_ID..START_ID + key_num) .map(|i| format!("{}{}", KEY_PREFIX, i)) .collect(); diff --git a/tests/benches/hierarchy/engine/mod.rs b/tests/benches/hierarchy/engine/mod.rs index 85e6ce77e33..e089ef013ec 100644 --- a/tests/benches/hierarchy/engine/mod.rs +++ b/tests/benches/hierarchy/engine/mod.rs @@ -40,9 +40,9 @@ fn bench_engine_snapshot>( bencher: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); bencher.iter(|| { - black_box(&engine) + black_box(&mut engine) .snapshot(black_box(Default::default())) .unwrap() }); @@ -53,7 +53,7 @@ fn bench_engine_get>( bencher: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let test_kvs: Vec = KvGenerator::with_seed( config.key_length, config.value_length, diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index f88533171c3..f57946a11cf 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -14,7 +14,7 @@ use txn_types::{Key, Mutation, TimeStamp}; use super::{BenchConfig, EngineFactory, DEFAULT_ITERATIONS, DEFAULT_KV_GENERATOR_SEED}; fn setup_prewrite( - engine: &E, + engine: &mut E, config: &BenchConfig, start_ts: impl Into, ) -> (E::Snap, Vec) @@ -66,7 +66,7 @@ where } fn mvcc_prewrite>(b: &mut Bencher<'_>, config: &BenchConfig) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( || { @@ -114,10 +114,10 @@ fn mvcc_prewrite>(b: &mut Bencher<'_>, config: &B } fn mvcc_commit>(b: &mut Bencher<'_>, config: &BenchConfig) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( - || setup_prewrite(&engine, config, 1), + || setup_prewrite(&mut engine, config, 1), |(snapshot, keys)| { for key in keys { let mut txn = mvcc::MvccTxn::new(1.into(), cm.clone()); @@ -133,10 +133,10 @@ fn mvcc_rollback_prewrote>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( - || setup_prewrite(&engine, config, 1), + || setup_prewrite(&mut engine, config, 1), |(snapshot, keys)| { for key in keys { let mut txn = mvcc::MvccTxn::new(1.into(), cm.clone()); @@ -159,10 +159,10 @@ fn mvcc_rollback_conflict>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( - || setup_prewrite(&engine, config, 2), + || setup_prewrite(&mut engine, config, 2), |(snapshot, keys)| { for key in keys { let mut txn = mvcc::MvccTxn::new(1.into(), cm.clone()); @@ -185,7 +185,7 @@ fn mvcc_rollback_non_prewrote>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( || { @@ -221,7 +221,7 @@ fn mvcc_reader_load_lock>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let test_keys: Vec = KvGenerator::with_seed( config.key_length, config.value_length, @@ -251,7 +251,7 @@ fn mvcc_reader_seek_write>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); b.iter_batched( || { let snapshot = engine.snapshot(Default::default()).unwrap(); diff --git a/tests/benches/hierarchy/txn/mod.rs b/tests/benches/hierarchy/txn/mod.rs index 840d4ac81fa..0bdb7ae8870 100644 --- a/tests/benches/hierarchy/txn/mod.rs +++ b/tests/benches/hierarchy/txn/mod.rs @@ -14,7 +14,7 @@ use txn_types::{Key, Mutation, TimeStamp}; use super::{BenchConfig, EngineFactory, DEFAULT_ITERATIONS}; fn setup_prewrite( - engine: &E, + engine: &mut E, config: &BenchConfig, start_ts: impl Into, ) -> Vec @@ -61,7 +61,7 @@ where } fn txn_prewrite>(b: &mut Bencher<'_>, config: &BenchConfig) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let ctx = Context::default(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( @@ -109,11 +109,12 @@ fn txn_prewrite>(b: &mut Bencher<'_>, config: &Be } fn txn_commit>(b: &mut Bencher<'_>, config: &BenchConfig) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); + let mut engine_clone = engine.clone(); let ctx = Context::default(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( - || setup_prewrite(&engine, config, 1), + || setup_prewrite(&mut engine_clone, config, 1), |keys| { for key in keys { let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -132,11 +133,12 @@ fn txn_rollback_prewrote>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); + let mut engine_clone = engine.clone(); let ctx = Context::default(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( - || setup_prewrite(&engine, config, 1), + || setup_prewrite(&mut engine_clone, config, 1), |keys| { for key in keys { let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -155,11 +157,12 @@ fn txn_rollback_conflict>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); + let mut engine_clone = engine.clone(); let ctx = Context::default(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( - || setup_prewrite(&engine, config, 2), + || setup_prewrite(&mut engine_clone, config, 2), |keys| { for key in keys { let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -178,7 +181,7 @@ fn txn_rollback_non_prewrote>( b: &mut Bencher<'_>, config: &BenchConfig, ) { - let engine = config.engine_factory.build(); + let mut engine = config.engine_factory.build(); let ctx = Context::default(); let cm = ConcurrencyManager::new(1.into()); b.iter_batched( diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index 7802a90beac..a949570ebe1 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -119,7 +119,7 @@ impl RaftStoreRouter for SyncBenchRouter { impl LocalReadRouter for SyncBenchRouter { fn read( - &self, + &mut self, _: Option, req: RaftCmdRequest, cb: Callback, @@ -127,7 +127,7 @@ impl LocalReadRouter for SyncBenchRouter { self.send_command(req, cb, RaftCmdExtraOpts::default()) } - fn release_snapshot_cache(&self) {} + fn release_snapshot_cache(&mut self) {} } fn new_engine() -> (TempDir, RocksEngine) { @@ -180,7 +180,7 @@ fn bench_async_snapshot(b: &mut test::Bencher) { region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(5); let (_tmp, db) = new_engine(); - let kv = RaftKv::new( + let mut kv = RaftKv::new( SyncBenchRouter::new(region.clone(), db.clone()), db, Arc::new(RwLock::new(HashSet::default())), diff --git a/tests/benches/misc/storage/incremental_get.rs b/tests/benches/misc/storage/incremental_get.rs index a57bd3c90d5..336f99cd35e 100644 --- a/tests/benches/misc/storage/incremental_get.rs +++ b/tests/benches/misc/storage/incremental_get.rs @@ -30,7 +30,7 @@ fn table_lookup_gen_data() -> (SnapshotStore>, Vec) { .unwrap(); store.commit(Context::default(), keys, 1, 2).unwrap(); - let engine = store.get_engine(); + let mut engine = store.get_engine(); let db = engine.get_rocksdb().get_sync_db(); db.compact_range_cf(db.cf_handle("write").unwrap(), None, None); db.compact_range_cf(db.cf_handle("default").unwrap(), None, None); diff --git a/tests/failpoints/cases/test_gc_metrics.rs b/tests/failpoints/cases/test_gc_metrics.rs index f96c03fe9f9..e698031f0bc 100644 --- a/tests/failpoints/cases/test_gc_metrics.rs +++ b/tests/failpoints/cases/test_gc_metrics.rs @@ -53,14 +53,14 @@ fn test_txn_create_compaction_filter() { cfg.writecf.dynamic_level_bytes = false; let dir = tempfile::TempDir::new().unwrap(); let builder = TestEngineBuilder::new().path(dir.path()); - let engine = builder.build_with_cfg(&cfg).unwrap(); + let mut engine = builder.build_with_cfg(&cfg).unwrap(); let raw_engine = engine.get_rocksdb(); let mut gc_runner = TestGcRunner::new(0); let value = vec![b'v'; 512]; - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); gc_runner .safe_point(TimeStamp::new(1).into_inner()) @@ -87,27 +87,27 @@ fn test_txn_mvcc_filtered() { MVCC_VERSIONS_HISTOGRAM.reset(); GC_COMPACTION_FILTERED.reset(); - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let raw_engine = engine.get_rocksdb(); let value = vec![b'v'; 512]; let mut gc_runner = TestGcRunner::new(0); // GC can't delete keys after the given safe point. - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 100); - must_commit(&engine, b"zkey", 100, 110); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); gc_runner.safe_point(50).gc(&raw_engine); - must_get(&engine, b"zkey", 110, &value); + must_get(&mut engine, b"zkey", 110, &value); // GC can't delete keys before the safe ponit if they are latest versions. gc_runner.safe_point(200).gc(&raw_engine); - must_get(&engine, b"zkey", 110, &value); + must_get(&mut engine, b"zkey", 110, &value); - must_prewrite_put(&engine, b"zkey", &value, b"zkey", 120); - must_commit(&engine, b"zkey", 120, 130); + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 120); + must_commit(&mut engine, b"zkey", 120, 130); // GC can't delete the latest version before the safe ponit. gc_runner.safe_point(115).gc(&raw_engine); - must_get(&engine, b"zkey", 110, &value); + must_get(&mut engine, b"zkey", 110, &value); // GC a version will also delete the key on default CF. gc_runner.safe_point(200).gc(&raw_engine); @@ -135,7 +135,7 @@ fn test_txn_gc_keys_handled() { GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED.reset(); let engine = TestEngineBuilder::new().build().unwrap(); - let prefixed_engine = PrefixedEngine(engine.clone()); + let mut prefixed_engine = PrefixedEngine(engine.clone()); let (tx, _rx) = mpsc::channel(); let feature_gate = FeatureGate::default(); @@ -172,10 +172,10 @@ fn test_txn_gc_keys_handled() { for i in 0..3 { let k = format!("k{:02}", i).into_bytes(); - must_prewrite_put(&prefixed_engine, &k, b"value", &k, 101); - must_commit(&prefixed_engine, &k, 101, 102); - must_prewrite_delete(&prefixed_engine, &k, &k, 151); - must_commit(&prefixed_engine, &k, 151, 152); + must_prewrite_put(&mut prefixed_engine, &k, b"value", &k, 101); + must_commit(&mut prefixed_engine, &k, 101, 102); + must_prewrite_delete(&mut prefixed_engine, &k, &k, 151); + must_commit(&mut prefixed_engine, &k, 151, 152); } db.flush_cf(cf, true).unwrap(); diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index 73031b10283..5845d4d4eb7 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -315,16 +315,16 @@ fn test_collect_applying_locks() { // correctly. #[test] fn test_error_in_compaction_filter() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let raw_engine = engine.get_rocksdb(); let large_value = vec![b'x'; 300]; - must_prewrite_put(&engine, b"zkey", &large_value, b"zkey", 101); - must_commit(&engine, b"zkey", 101, 102); - must_prewrite_put(&engine, b"zkey", &large_value, b"zkey", 103); - must_commit(&engine, b"zkey", 103, 104); - must_prewrite_delete(&engine, b"zkey", b"zkey", 105); - must_commit(&engine, b"zkey", 105, 106); + must_prewrite_put(&mut engine, b"zkey", &large_value, b"zkey", 101); + must_commit(&mut engine, b"zkey", 101, 102); + must_prewrite_put(&mut engine, b"zkey", &large_value, b"zkey", 103); + must_commit(&mut engine, b"zkey", 103, 104); + must_prewrite_delete(&mut engine, b"zkey", b"zkey", 105); + must_commit(&mut engine, b"zkey", 105, 106); let fp = "write_compaction_filter_flush_write_batch"; fail::cfg(fp, "return").unwrap(); @@ -339,8 +339,8 @@ fn test_error_in_compaction_filter() { } // Although versions on default CF is not cleaned, write CF is GCed correctly. - must_get_none(&engine, b"zkey", 102); - must_get_none(&engine, b"zkey", 104); + must_get_none(&mut engine, b"zkey", 102); + must_get_none(&mut engine, b"zkey", 104); fail::remove(fp); } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 933fce2add0..101cf30d446 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -512,7 +512,7 @@ fn test_async_commit_prewrite_with_stale_max_ts() { let mut cluster = new_server_cluster(0, 2); cluster.run(); - let engine = cluster + let mut engine = cluster .sim .read() .unwrap() diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index d3e9e08500f..1a6f2da9b87 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -42,28 +42,28 @@ use txn_types::{Key, Mutation, PessimisticLock, TimeStamp}; #[test] fn test_txn_failpoints() { - let engine = TestEngineBuilder::new().build().unwrap(); + let mut engine = TestEngineBuilder::new().build().unwrap(); let (k, v) = (b"k", b"v"); fail::cfg("prewrite", "return(WriteConflict)").unwrap(); - must_prewrite_put_err(&engine, k, v, k, 10); + must_prewrite_put_err(&mut engine, k, v, k, 10); fail::remove("prewrite"); - must_prewrite_put(&engine, k, v, k, 10); + must_prewrite_put(&mut engine, k, v, k, 10); fail::cfg("commit", "delay(100)").unwrap(); - must_commit(&engine, k, 10, 20); + must_commit(&mut engine, k, 10, 20); fail::remove("commit"); let v1 = b"v1"; let (k2, v2) = (b"k2", b"v2"); - must_acquire_pessimistic_lock(&engine, k, k, 30, 30); + must_acquire_pessimistic_lock(&mut engine, k, k, 30, 30); fail::cfg("pessimistic_prewrite", "return()").unwrap(); - must_pessimistic_prewrite_put_err(&engine, k, v1, k, 30, 30, DoPessimisticCheck); - must_prewrite_put(&engine, k2, v2, k2, 31); + must_pessimistic_prewrite_put_err(&mut engine, k, v1, k, 30, 30, DoPessimisticCheck); + must_prewrite_put(&mut engine, k2, v2, k2, 31); fail::remove("pessimistic_prewrite"); - must_pessimistic_prewrite_put(&engine, k, v1, k, 30, 30, DoPessimisticCheck); - must_commit(&engine, k, 30, 40); - must_commit(&engine, k2, 31, 41); - must_get(&engine, k, 50, v1); - must_get(&engine, k2, 50, v2); + must_pessimistic_prewrite_put(&mut engine, k, v1, k, 30, 30, DoPessimisticCheck); + must_commit(&mut engine, k, 30, 40); + must_commit(&mut engine, k2, 31, 41); + must_get(&mut engine, k, 50, v1); + must_get(&mut engine, k2, 50, v2); } #[test] @@ -338,8 +338,8 @@ fn test_max_commit_ts_error() { cm.read_range_check(None, None, |_, _| Err(())).unwrap(); // Two locks should be written, the second one does not async commit. - let l1 = must_locked(&storage.get_engine(), b"k1", 10); - let l2 = must_locked(&storage.get_engine(), b"k2", 10); + let l1 = must_locked(&mut storage.get_engine(), b"k1", 10); + let l2 = must_locked(&mut storage.get_engine(), b"k2", 10); assert!(l1.use_async_commit); assert!(!l2.use_async_commit); } diff --git a/tests/failpoints/cases/test_ttl.rs b/tests/failpoints/cases/test_ttl.rs index 25ffcf6ff4c..12449752285 100644 --- a/tests/failpoints/cases/test_ttl.rs +++ b/tests/failpoints/cases/test_ttl.rs @@ -176,7 +176,7 @@ fn test_ttl_snapshot() { fn test_ttl_snapshot_impl() { fail::cfg("ttl_current_ts", "return(100)").unwrap(); let dir = tempfile::TempDir::new().unwrap(); - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .path(dir.path()) .api_version(F::TAG) .build() @@ -273,7 +273,7 @@ fn test_ttl_iterator() { fn test_ttl_iterator_impl() { fail::cfg("ttl_current_ts", "return(100)").unwrap(); let dir = tempfile::TempDir::new().unwrap(); - let engine = TestEngineBuilder::new() + let mut engine = TestEngineBuilder::new() .path(dir.path()) .api_version(F::TAG) .build() diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index f44b2f99642..6bc7e2fb7b8 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1161,7 +1161,7 @@ fn test_sync_max_ts_after_region_merge() { let right = cluster.get_region(b"k3"); let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); - let storage = cluster + let mut storage = cluster .sim .read() .unwrap() @@ -1169,7 +1169,7 @@ fn test_sync_max_ts_after_region_merge() { .get(&1) .unwrap() .clone(); - let wait_for_synced = |cluster: &mut Cluster| { + let mut wait_for_synced = |cluster: &mut Cluster| { let region_id = right.get_id(); let leader = cluster.leader_of_region(region_id).unwrap(); let epoch = cluster.get_region_epoch(region_id); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index b360bd3da58..130290e01b8 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -232,7 +232,7 @@ fn test_sync_max_ts_after_leader_transfer() { cluster.run(); let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); - let storage = cluster + let mut storage = cluster .sim .read() .unwrap() @@ -240,7 +240,7 @@ fn test_sync_max_ts_after_leader_transfer() { .get(&1) .unwrap() .clone(); - let wait_for_synced = |cluster: &mut Cluster| { + let mut wait_for_synced = |cluster: &mut Cluster| { let region_id = 1; let leader = cluster.leader_of_region(region_id).unwrap(); let epoch = cluster.get_region_epoch(region_id); diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 20a3e5ebeaf..01993fb89cd 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -24,7 +24,7 @@ fn test_raftkv() { let region = cluster.get_region(b""); let leader_id = cluster.leader_of_region(region.get_id()).unwrap(); - let storage = cluster.sim.rl().storages[&leader_id.get_id()].clone(); + let mut storage = cluster.sim.rl().storages[&leader_id.get_id()].clone(); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); @@ -35,11 +35,11 @@ fn test_raftkv() { ..Default::default() }; - get_put(snap_ctx.clone(), &storage); - batch(snap_ctx.clone(), &storage); - seek(snap_ctx.clone(), &storage); - near_seek(snap_ctx.clone(), &storage); - cf(snap_ctx, &storage); + get_put(snap_ctx.clone(), &mut storage); + batch(snap_ctx.clone(), &mut storage); + seek(snap_ctx.clone(), &mut storage); + near_seek(snap_ctx.clone(), &mut storage); + cf(snap_ctx, &mut storage); empty_write(&ctx, &storage); wrong_context(&ctx, &storage); // TODO: test multiple node @@ -59,7 +59,7 @@ fn test_read_leader_in_lease() { let region = cluster.get_region(b""); let leader = cluster.leader_of_region(region.get_id()).unwrap(); - let storage = cluster.sim.rl().storages[&leader.get_id()].clone(); + let mut storage = cluster.sim.rl().storages[&leader.get_id()].clone(); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); @@ -71,14 +71,14 @@ fn test_read_leader_in_lease() { }; // write some data - assert_none(snap_ctx.clone(), &storage, k2); + assert_none(snap_ctx.clone(), &mut storage, k2); must_put(&ctx, &storage, k2, v2); // isolate leader cluster.add_send_filter(IsolationFilterFactory::new(leader.get_store_id())); // leader still in lease, check if can read on leader - assert_eq!(can_read(snap_ctx, &storage, k2, v2), true); + assert_eq!(can_read(snap_ctx, &mut storage, k2, v2), true); } #[test] @@ -95,7 +95,7 @@ fn test_read_index_on_replica() { let region = cluster.get_region(b""); let leader = cluster.leader_of_region(region.get_id()).unwrap(); - let storage = cluster.sim.rl().storages[&leader.get_id()].clone(); + let mut storage = cluster.sim.rl().storages[&leader.get_id()].clone(); let mut ctx = Context::default(); ctx.set_region_id(region.get_id()); @@ -108,7 +108,7 @@ fn test_read_index_on_replica() { // write some data let peers = region.get_peers(); - assert_none(snap_ctx, &storage, k2); + assert_none(snap_ctx, &mut storage, k2); must_put(&ctx, &storage, k2, v2); // read on follower @@ -155,7 +155,7 @@ fn test_read_on_replica() { let region = cluster.get_region(b""); let leader = cluster.leader_of_region(region.get_id()).unwrap(); - let leader_storage = cluster.sim.rl().storages[&leader.get_id()].clone(); + let mut leader_storage = cluster.sim.rl().storages[&leader.get_id()].clone(); let mut leader_ctx = Context::default(); leader_ctx.set_region_id(region.get_id()); @@ -168,7 +168,7 @@ fn test_read_on_replica() { // write some data let peers = region.get_peers(); - assert_none(leader_snap_ctx, &leader_storage, k2); + assert_none(leader_snap_ctx, &mut leader_storage, k2); must_put(&leader_ctx, &leader_storage, k2, v2); // read on follower @@ -192,19 +192,19 @@ fn test_read_on_replica() { pb_ctx: &follower_ctx, ..Default::default() }; - let follower_storage = cluster.sim.rl().storages[&follower_id].clone(); - assert_has(follower_snap_ctx.clone(), &follower_storage, k2, v2); + let mut follower_storage = cluster.sim.rl().storages[&follower_id].clone(); + assert_has(follower_snap_ctx.clone(), &mut follower_storage, k2, v2); must_put(&leader_ctx, &leader_storage, k3, v3); - assert_has(follower_snap_ctx.clone(), &follower_storage, k3, v3); + assert_has(follower_snap_ctx.clone(), &mut follower_storage, k3, v3); cluster.stop_node(follower_id); must_put(&leader_ctx, &leader_storage, k4, v4); cluster.run_node(follower_id).unwrap(); - let follower_storage = cluster.sim.rl().storages[&follower_id].clone(); + let mut follower_storage = cluster.sim.rl().storages[&follower_id].clone(); // sleep to ensure the follower has received a heartbeat from the leader thread::sleep(time::Duration::from_millis(300)); - assert_has(follower_snap_ctx, &follower_storage, k4, v4); + assert_has(follower_snap_ctx, &mut follower_storage, k4, v4); } #[test] @@ -263,7 +263,7 @@ fn test_read_on_replica_check_memory_locks() { key_ranges: vec![range], ..Default::default() }; - let follower_storage = cluster.sim.rl().storages[&follower_id].clone(); + let mut follower_storage = cluster.sim.rl().storages[&follower_id].clone(); match follower_storage.snapshot(follower_snap_ctx) { Err(Error(box ErrorInner::KeyIsLocked(lock_info))) => { assert_eq!(lock_info, lock.into_lock_info(raw_key.to_vec())) @@ -397,12 +397,12 @@ fn must_delete_cf(ctx: &Context, engine: &E, cf: CfName, key: &[u8]) engine.delete_cf(ctx, cf, Key::from_raw(key)).unwrap(); } -fn assert_has(ctx: SnapContext<'_>, engine: &E, key: &[u8], value: &[u8]) { +fn assert_has(ctx: SnapContext<'_>, engine: &mut E, key: &[u8], value: &[u8]) { let snapshot = engine.snapshot(ctx).unwrap(); assert_eq!(snapshot.get(&Key::from_raw(key)).unwrap().unwrap(), value); } -fn can_read(ctx: SnapContext<'_>, engine: &E, key: &[u8], value: &[u8]) -> bool { +fn can_read(ctx: SnapContext<'_>, engine: &mut E, key: &[u8], value: &[u8]) -> bool { if let Ok(s) = engine.snapshot(ctx) { assert_eq!(s.get(&Key::from_raw(key)).unwrap().unwrap(), value); return true; @@ -412,7 +412,7 @@ fn can_read(ctx: SnapContext<'_>, engine: &E, key: &[u8], value: &[u8 fn assert_has_cf( ctx: SnapContext<'_>, - engine: &E, + engine: &mut E, cf: CfName, key: &[u8], value: &[u8], @@ -424,19 +424,19 @@ fn assert_has_cf( ); } -fn assert_none(ctx: SnapContext<'_>, engine: &E, key: &[u8]) { +fn assert_none(ctx: SnapContext<'_>, engine: &mut E, key: &[u8]) { let snapshot = engine.snapshot(ctx).unwrap(); assert_eq!(snapshot.get(&Key::from_raw(key)).unwrap(), None); } -fn assert_none_cf(ctx: SnapContext<'_>, engine: &E, cf: CfName, key: &[u8]) { +fn assert_none_cf(ctx: SnapContext<'_>, engine: &mut E, cf: CfName, key: &[u8]) { let snapshot = engine.snapshot(ctx).unwrap(); assert_eq!(snapshot.get_cf(cf, &Key::from_raw(key)).unwrap(), None); } fn assert_seek( ctx: SnapContext<'_>, - engine: &E, + engine: &mut E, cf: CfName, key: &[u8], pair: (&[u8], &[u8]), @@ -479,7 +479,7 @@ fn assert_near_reverse_seek(cursor: &mut Cursor, key: &[u8], pai assert_eq!(cursor.value(&mut statistics), pair.1); } -fn get_put(ctx: SnapContext<'_>, engine: &E) { +fn get_put(ctx: SnapContext<'_>, engine: &mut E) { assert_none(ctx.clone(), engine, b"x"); must_put(ctx.pb_ctx, engine, b"x", b"1"); assert_has(ctx.clone(), engine, b"x", b"1"); @@ -487,7 +487,7 @@ fn get_put(ctx: SnapContext<'_>, engine: &E) { assert_has(ctx, engine, b"x", b"2"); } -fn batch(ctx: SnapContext<'_>, engine: &E) { +fn batch(ctx: SnapContext<'_>, engine: &mut E) { engine .write( ctx.pb_ctx, @@ -513,7 +513,7 @@ fn batch(ctx: SnapContext<'_>, engine: &E) { assert_none(ctx, engine, b"y"); } -fn seek(ctx: SnapContext<'_>, engine: &E) { +fn seek(ctx: SnapContext<'_>, engine: &mut E) { must_put(ctx.pb_ctx, engine, b"x", b"1"); assert_seek(ctx.clone(), engine, CF_DEFAULT, b"x", (b"x", b"1")); assert_seek(ctx.clone(), engine, CF_DEFAULT, b"a", (b"x", b"1")); @@ -536,7 +536,7 @@ fn seek(ctx: SnapContext<'_>, engine: &E) { must_delete(ctx.pb_ctx, engine, b"z"); } -fn near_seek(ctx: SnapContext<'_>, engine: &E) { +fn near_seek(ctx: SnapContext<'_>, engine: &mut E) { must_put(ctx.pb_ctx, engine, b"x", b"1"); must_put(ctx.pb_ctx, engine, b"z", b"2"); let snapshot = engine.snapshot(ctx.clone()).unwrap(); @@ -562,7 +562,7 @@ fn near_seek(ctx: SnapContext<'_>, engine: &E) { } // TODO: remove following as the code path of cf is the same. -fn cf(ctx: SnapContext<'_>, engine: &E) { +fn cf(ctx: SnapContext<'_>, engine: &mut E) { assert_none_cf(ctx.clone(), engine, "default", b"key"); must_put_cf(ctx.pb_ctx, engine, "default", b"key", b"value"); assert_has_cf(ctx.clone(), engine, "default", b"key", b"value"); From a7e8153fc6f557eef4fda0662c2c024f0fa15a0d Mon Sep 17 00:00:00 2001 From: Lei Zhao Date: Wed, 28 Sep 2022 16:25:44 +0800 Subject: [PATCH 0247/1149] tikv_util: make LruCache sound (#13552) close tikv/tikv#13551 It's error-prone to use reference/`Box` and raw pointer/`NonNull` at the same time. This PR replaces `Box` by `NonNull` and always use it as parameter. Signed-off-by: youjiali1995 Co-authored-by: Ti Chi Robot --- components/tikv_util/src/lru.rs | 83 +++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index d8d2385fc34..2488fe7ef36 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -20,37 +20,39 @@ struct ValueEntry { } struct Trace { - head: Box>, - tail: Box>, + head: NonNull>, + tail: NonNull>, tick: usize, sample_mask: usize, } #[inline] -unsafe fn suture(leading: &mut Record, following: &mut Record) { - leading.next = NonNull::new_unchecked(following); - following.prev = NonNull::new_unchecked(leading); +unsafe fn suture(mut leading: NonNull>, mut following: NonNull>) { + leading.as_mut().next = following; + following.as_mut().prev = leading; } #[inline] -unsafe fn cut_out(record: &mut Record) { - suture(record.prev.as_mut(), record.next.as_mut()) +unsafe fn cut_out(record: NonNull>) { + suture(record.as_ref().prev, record.as_ref().next) } impl Trace { fn new(sample_mask: usize) -> Trace { unsafe { - let mut head = Box::new(Record { - prev: NonNull::new_unchecked(1usize as _), - next: NonNull::new_unchecked(1usize as _), + let head = Box::leak(Box::new(Record { + prev: NonNull::dangling(), + next: NonNull::dangling(), key: MaybeUninit::uninit(), - }); - let mut tail = Box::new(Record { - prev: NonNull::new_unchecked(1usize as _), - next: NonNull::new_unchecked(1usize as _), + })) + .into(); + let tail = Box::leak(Box::new(Record { + prev: NonNull::dangling(), + next: NonNull::dangling(), key: MaybeUninit::uninit(), - }); - suture(&mut head, &mut tail); + })) + .into(); + suture(head, tail); Trace { head, @@ -69,17 +71,17 @@ impl Trace { } } - fn promote(&mut self, mut record: NonNull>) { + fn promote(&mut self, record: NonNull>) { unsafe { - cut_out(record.as_mut()); - suture(record.as_mut(), self.head.next.as_mut()); - suture(&mut self.head, record.as_mut()); + cut_out(record); + suture(record, self.head.as_ref().next); + suture(self.head, record); } } - fn delete(&mut self, mut record: NonNull>) { + fn delete(&mut self, record: NonNull>) { unsafe { - cut_out(record.as_mut()); + cut_out(record); ptr::drop_in_place(Box::from_raw(record.as_ptr()).key.as_mut_ptr()); } @@ -87,24 +89,24 @@ impl Trace { fn create(&mut self, key: K) -> NonNull> { let record = Box::leak(Box::new(Record { - prev: unsafe { NonNull::new_unchecked(&mut *self.head) }, - next: self.head.next, + prev: self.head, + next: unsafe { self.head.as_ref().next }, key: MaybeUninit::new(key), })) .into(); unsafe { - self.head.next.as_mut().prev = record; - self.head.next = record; + self.head.as_mut().next.as_mut().prev = record; + self.head.as_mut().next = record; } record } fn reuse_tail(&mut self, key: K) -> (K, NonNull>) { unsafe { - let mut record = self.tail.prev; - cut_out(record.as_mut()); - suture(record.as_mut(), self.head.next.as_mut()); - suture(&mut self.head, record.as_mut()); + let mut record = self.tail.as_ref().prev; + cut_out(record); + suture(record, self.head.as_ref().next); + suture(self.head, record); let old_key = record.as_mut().key.as_ptr().read(); record.as_mut().key = MaybeUninit::new(key); @@ -113,21 +115,21 @@ impl Trace { } fn clear(&mut self) { - let mut cur = self.head.next; unsafe { - while cur.as_ptr() != &mut *self.tail { - let tmp = cur.as_mut().next; + let mut cur = self.head.as_ref().next; + while cur != self.tail { + let tmp = cur.as_ref().next; ptr::drop_in_place(Box::from_raw(cur.as_ptr()).key.as_mut_ptr()); cur = tmp; } - suture(&mut self.head, &mut self.tail); + suture(self.head, self.tail); } } fn remove_tail(&mut self) -> K { unsafe { - let mut record = self.tail.prev; - cut_out(record.as_mut()); + let record = self.tail.as_ref().prev; + cut_out(record); let r = Box::from_raw(record.as_ptr()); r.key.as_ptr().read() @@ -135,6 +137,15 @@ impl Trace { } } +impl Drop for Trace { + fn drop(&mut self) { + unsafe { + drop(Box::from_raw(self.head.as_ptr())); + drop(Box::from_raw(self.tail.as_ptr())); + } + } +} + pub trait SizePolicy { fn current(&self) -> usize; fn on_insert(&mut self, key: &K, value: &V); From c6f4f1cbfd82a349996c3a2fce386879d1537149 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 28 Sep 2022 17:01:45 +0800 Subject: [PATCH 0248/1149] engine: fix performance issue of deleting files in ranges (#13540) close tikv/tikv#13534 Signed-off-by: tabokie --- Cargo.lock | 6 ++-- components/engine_rocks/src/misc.rs | 45 ++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12f1271156d..bb3b33463cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2793,7 +2793,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#4c859a208355bc15ceb7dc1f05303f68acfb4791" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd07e9e598db63574cf06edaeea3c4687eadff59" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2812,7 +2812,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#4c859a208355bc15ceb7dc1f05303f68acfb4791" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd07e9e598db63574cf06edaeea3c4687eadff59" dependencies = [ "bzip2-sys", "cc", @@ -4655,7 +4655,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#4c859a208355bc15ceb7dc1f05303f68acfb4791" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd07e9e598db63574cf06edaeea3c4687eadff59" dependencies = [ "libc 0.2.132", "librocksdb_sys", diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index e7c9ef547d8..482686ffd1a 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -4,6 +4,7 @@ use engine_traits::{ CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, Range, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, ALL_CFS, }; +use rocksdb::Range as RocksRange; use tikv_util::{box_try, keybuilder::KeyBuilder}; use crate::{ @@ -150,26 +151,42 @@ impl MiscExt for RocksEngine { match strategy { DeleteStrategy::DeleteFiles => { let handle = util::get_cf_handle(self.as_inner(), cf)?; - for r in ranges { - if r.start_key >= r.end_key { - continue; - } - self.as_inner() - .delete_files_in_range_cf(handle, r.start_key, r.end_key, false) - .map_err(r2e)?; + let rocks_ranges: Vec<_> = ranges + .iter() + .filter_map(|r| { + if r.start_key >= r.end_key { + None + } else { + Some(RocksRange::new(r.start_key, r.end_key)) + } + }) + .collect(); + if rocks_ranges.is_empty() { + return Ok(()); } + self.as_inner() + .delete_files_in_ranges_cf(handle, &rocks_ranges, false) + .map_err(r2e)?; } DeleteStrategy::DeleteBlobs => { let handle = util::get_cf_handle(self.as_inner(), cf)?; if self.is_titan() { - for r in ranges { - if r.start_key >= r.end_key { - continue; - } - self.as_inner() - .delete_blob_files_in_range_cf(handle, r.start_key, r.end_key, false) - .map_err(r2e)?; + let rocks_ranges: Vec<_> = ranges + .iter() + .filter_map(|r| { + if r.start_key >= r.end_key { + None + } else { + Some(RocksRange::new(r.start_key, r.end_key)) + } + }) + .collect(); + if rocks_ranges.is_empty() { + return Ok(()); } + self.as_inner() + .delete_blob_files_in_ranges_cf(handle, &rocks_ranges, false) + .map_err(r2e)?; } } DeleteStrategy::DeleteByRange => { From d96b8d100b6982f79dadd61a449c743889e21a88 Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 29 Sep 2022 11:35:45 +0800 Subject: [PATCH 0249/1149] storage: Rollback all locks to prevent from losing the pessimistic lock in flashback (#13521) close tikv/tikv#13493 Rollback all locks to prevent from losing the pessimistic lock in flashback Signed-off-by: husharp --- .../txn/actions/flashback_to_version.rs | 89 +++++++++++++------ .../integrations/raftstore/test_flashback.rs | 10 +-- 2 files changed, 67 insertions(+), 32 deletions(-) diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 5fcf0327c37..e160a4a43b9 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -1,10 +1,10 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use txn_types::{Key, Lock, LockType, TimeStamp, Write, WriteType}; +use txn_types::{Key, Lock, TimeStamp, Write, WriteType}; use crate::storage::{ mvcc::{MvccReader, MvccTxn, SnapshotReader, MAX_TXN_WRITE_SIZE}, - txn::{Error, ErrorInner, Result as TxnResult}, + txn::{actions::check_txn_status::rollback_lock, Error, ErrorInner, Result as TxnResult}, Snapshot, Statistics, }; @@ -85,7 +85,6 @@ pub fn flashback_to_version( start_ts: TimeStamp, commit_ts: TimeStamp, ) -> TxnResult { - let mut rows = 0; // To flashback the `CF_LOCK`, we need to delete all locks records whose // `start_ts` is greater than the specified version, and if it's not a // short-value `LockType::Put`, we need to delete the actual data from @@ -96,14 +95,16 @@ pub fn flashback_to_version( *next_lock_key = Some(key); break; } - txn.unlock_key(key.clone(), lock.is_pessimistic_txn()); - rows += 1; - // If the short value is none and it's a `LockType::Put`, we should delete the - // corresponding key from `CF_DEFAULT` as well. - if lock.short_value.is_none() && lock.lock_type == LockType::Put { - txn.delete_value(key, lock.ts); - rows += 1; - } + // To guarantee rollback with start ts of the locks + reader.start_ts = lock.ts; + rollback_lock( + txn, + reader, + key.clone(), + &lock, + lock.is_pessimistic_txn(), + true, + )?; } // To flashback the `CF_WRITE` and `CF_DEFAULT`, we need to write a new MVCC // record for each key in `self.keys` with its old value at `self.version`, @@ -127,7 +128,6 @@ pub fn flashback_to_version( start_ts, reader.load_data(&key, old_write.clone())?, ); - rows += 1; } Write::new(old_write.write_type, start_ts, old_write.short_value) } else { @@ -141,9 +141,8 @@ pub fn flashback_to_version( Write::new(WriteType::Delete, start_ts, None) }; txn.put_write(key.clone(), commit_ts, new_write.as_ref().to_bytes()); - rows += 1; } - Ok(rows) + Ok(txn.modifies.len()) } #[cfg(test)] @@ -156,14 +155,18 @@ pub mod tests { use super::*; use crate::storage::{ mvcc::tests::{must_get, must_get_none, write}, - txn::actions::{ - commit::tests::must_succeed as must_commit, - tests::{must_prewrite_delete, must_prewrite_put, must_rollback}, + txn::{ + actions::{ + acquire_pessimistic_lock::tests::must_pessimistic_locked, + commit::tests::must_succeed as must_commit, + tests::{must_prewrite_delete, must_prewrite_put, must_rollback}, + }, + tests::{must_acquire_pessimistic_lock, must_pessimistic_prewrite_put_err}, }, Engine, TestEngineBuilder, }; - fn must_flashback_write( + fn must_flashback_to_version( engine: &mut E, key: &[u8], version: impl Into, @@ -176,6 +179,10 @@ pub mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); let mut statistics = Statistics::default(); + let (key_locks, has_remain_locks) = + flashback_to_version_read_lock(&mut reader, &Some(key.clone()), &None, &mut statistics) + .unwrap(); + assert!(!has_remain_locks); let (key_old_writes, has_remain_writes) = flashback_to_version_read_write( &mut reader, 0, @@ -197,7 +204,7 @@ pub mod tests { &mut reader, &mut None, &mut Some(key), - vec![], + key_locks, key_old_writes, start_ts, commit_ts, @@ -236,43 +243,43 @@ pub mod tests { must_get(&mut engine, k, *ts.incr(), v2); // Flashback to version 1 with start_ts = 14, commit_ts = 15. assert_eq!( - must_flashback_write(&mut engine, k, 1, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, *ts.incr()); // Flashback to version 2 with start_ts = 17, commit_ts = 18. assert_eq!( - must_flashback_write(&mut engine, k, 2, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 2, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 5 with start_ts = 20, commit_ts = 21. assert_eq!( - must_flashback_write(&mut engine, k, 5, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 5, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 7 with start_ts = 23, commit_ts = 24. assert_eq!( - must_flashback_write(&mut engine, k, 7, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 7, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 10 with start_ts = 26, commit_ts = 27. assert_eq!( - must_flashback_write(&mut engine, k, 10, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 10, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, *ts.incr()); // Flashback to version 13 with start_ts = 29, commit_ts = 30. assert_eq!( - must_flashback_write(&mut engine, k, 13, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 13, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v2); // Flashback to version 27 with start_ts = 32, commit_ts = 33. assert_eq!( - must_flashback_write(&mut engine, k, 27, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 27, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, *ts.incr()); @@ -291,9 +298,37 @@ pub mod tests { // Since the key has been deleted, flashback to version 1 should not do // anything. assert_eq!( - must_flashback_write(&mut engine, k, ts, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, ts, *ts.incr(), *ts.incr()), 0 ); must_get_none(&mut engine, k, ts); } + + #[test] + fn test_flashback_to_version_pessimistic() { + use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; + + let mut engine = TestEngineBuilder::new().build().unwrap(); + let k = b"k"; + let (v1, v2, v3) = (b"v1", b"v2", b"v3"); + // Prewrite and commit Put(k -> v1) with stat_ts = 10, commit_ts = 15. + must_prewrite_put(&mut engine, k, v1, k, 10); + must_commit(&mut engine, k, 10, 15); + // Prewrite and commit Put(k -> v2) with stat_ts = 20, commit_ts = 25. + must_prewrite_put(&mut engine, k, v2, k, 20); + must_commit(&mut engine, k, 20, 25); + + must_acquire_pessimistic_lock(&mut engine, k, k, 30, 30); + must_pessimistic_locked(&mut engine, k, 30, 30); + + // Flashback to version 17 with start_ts = 35, commit_ts = 40. + // Distinguish from pessimistic start_ts 30 to make sure rollback ts is by lock + // ts. + assert_eq!(must_flashback_to_version(&mut engine, k, 17, 35, 40), 3); + + // Pessimistic Prewrite Put(k -> v3) with stat_ts = 30 will be error with + // Rollback. + must_pessimistic_prewrite_put_err(&mut engine, k, v3, k, 30, 30, DoPessimisticCheck); + must_get(&mut engine, k, 45, v1); + } } diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index cf91873d385..064edebf88a 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -8,7 +8,7 @@ use test_raftstore::*; use txn_types::WriteBatchFlags; #[test] -fn test_flahsback_for_applied_index() { +fn test_flashback_for_applied_index() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -79,7 +79,7 @@ fn test_flashback_for_schedule() { } #[test] -fn test_flahsback_for_write() { +fn test_flashback_for_write() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -107,7 +107,7 @@ fn test_flahsback_for_write() { } #[test] -fn test_flahsback_for_read() { +fn test_flashback_for_read() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -141,7 +141,7 @@ fn test_flahsback_for_read() { // However, when flashback is enabled, it will make the lease None and prevent // renew lease. #[test] -fn test_flahsback_for_local_read() { +fn test_flashback_for_local_read() { let mut cluster = new_node_cluster(0, 3); let election_timeout = configure_for_lease_read(&mut cluster, Some(50), None); @@ -208,7 +208,7 @@ fn test_flahsback_for_local_read() { } #[test] -fn test_flahsback_for_status_cmd_as_region_detail() { +fn test_flashback_for_status_cmd_as_region_detail() { let mut cluster = new_node_cluster(0, 3); cluster.run(); From 5bc1fa7183886c2b0098c47e80df721037b193aa Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 29 Sep 2022 13:49:45 +0800 Subject: [PATCH 0250/1149] storage: remove histogram about locking key in prewrite (#13527) close tikv/tikv#13526 It is impossible to get blocked when acquiring a lock from the concurrency manager when generating async-commit timestamp because the latch already ensures there can't be race on the same key. So, the histogram recording the duration is useless and is removed in this commit. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/mvcc/metrics.rs | 9 ++------- src/storage/txn/actions/prewrite.rs | 8 +++----- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/storage/mvcc/metrics.rs b/src/storage/mvcc/metrics.rs index ddfdc14f5ef..3c4bda63f7e 100644 --- a/src/storage/mvcc/metrics.rs +++ b/src/storage/mvcc/metrics.rs @@ -68,12 +68,6 @@ lazy_static! { exponential_buckets(1.0, 2.0, 30).unwrap() ) .unwrap(); - pub static ref CONCURRENCY_MANAGER_LOCK_DURATION_HISTOGRAM: Histogram = register_histogram!( - "tikv_concurrency_manager_lock_duration", - "Histogram of the duration of lock key in the concurrency manager", - exponential_buckets(1e-7, 2.0, 20).unwrap() // 100ns ~ 100ms - ) - .unwrap(); pub static ref MVCC_CONFLICT_COUNTER: MvccConflictCounterVec = { register_static_int_counter_vec!( MvccConflictCounterVec, @@ -107,6 +101,7 @@ lazy_static! { "tikv_storage_mvcc_prewrite_assertion_perf", "Counter of assertion operations in transactions", &["type"] - ).unwrap() + ) + .unwrap() }; } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index a8a33799686..40709032d61 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -16,8 +16,8 @@ use txn_types::{ use crate::storage::{ mvcc::{ metrics::{ - CONCURRENCY_MANAGER_LOCK_DURATION_HISTOGRAM, MVCC_CONFLICT_COUNTER, - MVCC_DUPLICATE_CMD_COUNTER_VEC, MVCC_PREWRITE_ASSERTION_PERF_COUNTER_VEC, + MVCC_CONFLICT_COUNTER, MVCC_DUPLICATE_CMD_COUNTER_VEC, + MVCC_PREWRITE_ASSERTION_PERF_COUNTER_VEC, }, Error, ErrorInner, Lock, LockType, MvccTxn, Result, SnapshotReader, }, @@ -646,9 +646,7 @@ fn async_commit_timestamps( ) -> Result { // This operation should not block because the latch makes sure only one thread // is operating on this key. - let key_guard = CONCURRENCY_MANAGER_LOCK_DURATION_HISTOGRAM.observe_closure_duration(|| { - ::futures_executor::block_on(txn.concurrency_manager.lock_key(key)) - }); + let key_guard = ::futures_executor::block_on(txn.concurrency_manager.lock_key(key)); let final_min_commit_ts = key_guard.with_lock(|l| { let max_ts = txn.concurrency_manager.max_ts(); From 9321040f2eea6617e486c58b959d40abe527aa82 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Thu, 29 Sep 2022 14:15:45 +0800 Subject: [PATCH 0251/1149] storage: Add new implementation of lock waiting queue (#13486) ref tikv/tikv#13298 Implements a new version of lock waiting queue, which is important for supporting resumable acquire_pessimistic_lock requests. (ref #13298) * Make `storage::lock_manager` an directory * Add new implementation of the lock waiting queue and other related stuff. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- Cargo.lock | 9 + Cargo.toml | 3 + src/storage/errors.rs | 31 + src/storage/lock_manager/lock_wait_context.rs | 270 +++++ .../lock_manager/lock_waiting_queue.rs | 977 ++++++++++++++++++ .../{lock_manager.rs => lock_manager/mod.rs} | 15 + src/storage/types.rs | 26 + 7 files changed, 1331 insertions(+) create mode 100644 src/storage/lock_manager/lock_wait_context.rs create mode 100644 src/storage/lock_manager/lock_waiting_queue.rs rename src/storage/{lock_manager.rs => lock_manager/mod.rs} (90%) diff --git a/Cargo.lock b/Cargo.lock index bb3b33463cf..821e15edc18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5561,6 +5561,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "sync_wrapper" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" + [[package]] name = "sysinfo" version = "0.16.4" @@ -6157,6 +6163,7 @@ dependencies = [ "crc32fast", "crc64fast", "crossbeam", + "dashmap", "encryption_export", "engine_panic", "engine_rocks", @@ -6228,8 +6235,10 @@ dependencies = [ "serde_json", "slog", "slog-global", + "smallvec", "sst_importer", "strum 0.20.0", + "sync_wrapper", "sysinfo", "tempfile", "test_sst_importer", diff --git a/Cargo.toml b/Cargo.toml index 13479b2a8fb..f51e2ddd303 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ coprocessor_plugin_api = { path = "components/coprocessor_plugin_api" } crc32fast = "1.2" crc64fast = "0.1" crossbeam = "0.8" +dashmap = "5" encryption_export = { path = "components/encryption/export", default-features = false } engine_panic = { path = "components/engine_panic", default-features = false } engine_rocks = { path = "components/engine_rocks", default-features = false } @@ -146,8 +147,10 @@ serde_ignored = "0.1" serde_json = { version = "1.0", features = ["preserve_order"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +smallvec = "1.4" sst_importer = { path = "components/sst_importer", default-features = false } strum = { version = "0.20", features = ["derive"] } +sync_wrapper = "0.1.1" sysinfo = "0.16" tempfile = "3.0" thiserror = "1.0" diff --git a/src/storage/errors.rs b/src/storage/errors.rs index faf12f34003..7ce5d925dfa 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -2,9 +2,11 @@ //! Types for storage related errors and associated helper methods. use std::{ + convert::TryFrom, error::Error as StdError, fmt::{self, Debug, Display, Formatter}, io::Error as IoError, + sync::Arc, }; use error_code::{self, ErrorCode, ErrorCodeExt}; @@ -456,6 +458,35 @@ pub fn extract_key_errors(res: Result>>) -> Vec); + +impl From for SharedError { + fn from(e: ErrorInner) -> Self { + Self(Arc::new(e)) + } +} + +impl From for SharedError { + fn from(e: Error) -> Self { + Self(Arc::from(e.0)) + } +} + +/// Tries to convert the shared error to owned one. It can success only when +/// it's the only reference to the error. +impl TryFrom for Error { + type Error = (); + + fn try_from(e: SharedError) -> std::result::Result { + Arc::try_unwrap(e.0).map(Into::into).map_err(|_| ()) + } +} + #[cfg(test)] mod test { use kvproto::kvrpcpb::WriteConflictReason; diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs new file mode 100644 index 00000000000..97ff49f965b --- /dev/null +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -0,0 +1,270 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! Holds the state of a lock-waiting `AcquirePessimisticLock` request. +//! +//! When an `AcquirePessimisticLock` request meets a lock and enters +//! lock-waiting state, it then may be either woken up by popping from the +//! [`LockWaitingQueue`](super::lock_waiting_queue::LockWaitQueues), +//! or cancelled by the +//! [`WaiterManager`](crate::server::lock_manager::WaiterManager) due to +//! timeout. [`LockWaitContext`] is therefore used to share the necessary state +//! of a single `AcquirePessimisticLock` request, and ensuring the internal +//! callback for returning response through RPC is called at most only once. +//! +//! Note: The corresponding implementation in `WaiterManager` is not yet +//! implemented, and this mod is currently not used yet. + +use std::{ + convert::TryInto, + result::Result, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; + +use parking_lot::Mutex; +use txn_types::TimeStamp; + +use crate::storage::{ + errors::SharedError, + lock_manager::{lock_waiting_queue::PessimisticLockKeyCallback, LockManager, LockWaitToken}, + Error as StorageError, PessimisticLockRes, ProcessResult, StorageCallback, +}; + +pub struct LockWaitContextInner { + /// The callback for finishing the current AcquirePessimisticLock request. + /// Usually, requests are accepted from RPC, and in this case calling + /// the callback means returning the response to the client via RPC. + cb: StorageCallback, + + /// The token of the corresponding waiter in `LockManager`. + #[allow(dead_code)] + lock_wait_token: LockWaitToken, +} + +/// The content of the `LockWaitContext` that needs to be shared among all +/// clones. +/// +/// When a AcquirePessimisticLock request meets lock and enters lock waiting +/// state, a `LockWaitContext` will be created, and the +/// `LockWaitContextSharedState` will be shared in these places: +/// * Callbacks created from the `lockWaitContext` and distributed to the lock +/// waiting queue and the `LockManager`. When one of the callbacks is called +/// and the request is going to be finished, they need to take the +/// [`LockWaitContextInner`] to call the callback. +/// * The [`LockWaitEntry`](crate::storage::lock_manager::lock_waiting_queue::LockWaitEntry), for +/// checking whether the request is already finished (cancelled). +pub struct LockWaitContextSharedState { + ctx_inner: Mutex>, + pub finished: AtomicBool, +} + +impl LockWaitContextSharedState { + /// Checks whether the lock-waiting request is already finished. + pub fn is_finished(&self) -> bool { + self.finished.load(Ordering::Acquire) + } +} + +#[derive(Clone)] +pub struct LockWaitContext { + shared_states: Arc, + #[allow(dead_code)] + lock_manager: L, + allow_lock_with_conflict: bool, + + // Fields for logging: + start_ts: TimeStamp, + for_update_ts: TimeStamp, +} + +impl LockWaitContext { + pub fn new( + lock_manager: L, + lock_wait_token: LockWaitToken, + start_ts: TimeStamp, + for_update_ts: TimeStamp, + cb: StorageCallback, + allow_lock_with_conflict: bool, + ) -> Self { + let inner = LockWaitContextInner { + cb, + lock_wait_token, + }; + Self { + shared_states: Arc::new(LockWaitContextSharedState { + ctx_inner: Mutex::new(Some(inner)), + finished: AtomicBool::new(false), + }), + lock_manager, + allow_lock_with_conflict, + start_ts, + for_update_ts, + } + } + + pub fn get_shared_states(&self) -> &Arc { + &self.shared_states + } + + /// Get the callback that should be invoked when finishes executing the + /// scheduler command that issued the lock-waiting. + /// + /// When we support partially finishing a pessimistic lock request (i.e. + /// when acquiring lock multiple keys in one single request, allowing + /// some keys to be locked successfully while the others are blocked or + /// failed), this will be useful for handling the result of the first + /// write batch. But currently, the first write batch of a lock-waiting + /// request is always empty, so the callback is just noop. + pub fn get_callback_for_first_write_batch(&self) -> StorageCallback { + StorageCallback::Boolean(Box::new(|res| { + res.unwrap(); + })) + } + + /// Get the callback that should be called when the request is woken up on a + /// key. + pub fn get_callback_for_blocked_key(&self) -> PessimisticLockKeyCallback { + let ctx = self.clone(); + Box::new(move |res| { + ctx.finish_request(res); + }) + } + + /// Get the callback that's used to cancel a lock-waiting request. Usually + /// called by + /// [`WaiterManager`](crate::server::lock_manager::WaiterManager) due to + /// timeout. + pub fn get_callback_for_cancellation(&self) -> impl FnOnce(StorageError) { + let ctx = self.clone(); + move |e| { + ctx.finish_request(Err(e.into())); + } + } + + fn finish_request(&self, result: Result) { + let ctx_inner = if let Some(inner) = self.shared_states.ctx_inner.lock().take() { + inner + } else { + debug!("double invoking of finish_request of LockWaitContext"; + "start_ts" => self.start_ts, + "for_update_ts" => self.for_update_ts + ); + return; + }; + + self.shared_states.finished.store(true, Ordering::Release); + + // TODO: Uncomment this after the corresponding change of `LockManager` is done. + // self.lock_manager + // .remove_lock_wait(ctx_inner.lock_wait_token); + + if !self.allow_lock_with_conflict { + // The result must be an owned error. + let err = result.unwrap_err().try_into().unwrap(); + ctx_inner.cb.execute(ProcessResult::Failed { err }); + return; + } + + // The following code is only valid after implementing the new lock-waiting + // model. + unreachable!(); + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::mpsc::{channel, Receiver}, + time::Duration, + }; + + use super::*; + use crate::storage::{ + lock_manager::DummyLockManager, + mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, + txn::{Error as TxnError, ErrorInner as TxnErrorInner}, + ErrorInner as StorageErrorInner, Result as StorageResult, + }; + + fn create_storage_cb() -> ( + StorageCallback, + Receiver>>, + ) { + let (tx, rx) = channel(); + let cb = StorageCallback::PessimisticLock(Box::new(move |r| tx.send(r).unwrap())); + (cb, rx) + } + + fn create_test_lock_wait_ctx() -> ( + LockWaitContext, + Receiver>>, + ) { + // TODO: Use `ProxyLockMgr` to check the correctness of the `remove_lock_wait` + // invocation. + let lock_mgr = DummyLockManager {}; + let (cb, rx) = create_storage_cb(); + let ctx = LockWaitContext::new( + lock_mgr, + super::super::LockWaitToken(Some(1)), + 1.into(), + 1.into(), + cb, + false, + ); + (ctx, rx) + } + + #[test] + fn test_lock_wait_context() { + let write_conflict = || { + StorageErrorInner::Txn(TxnError::from(TxnErrorInner::Mvcc(MvccError::from( + MvccErrorInner::WriteConflict { + start_ts: 1.into(), + conflict_start_ts: 2.into(), + conflict_commit_ts: 2.into(), + key: b"k1".to_vec(), + primary: b"k1".to_vec(), + reason: kvproto::kvrpcpb::WriteConflictReason::PessimisticRetry, + }, + )))) + }; + let key_is_locked = || { + StorageErrorInner::Txn(TxnError::from(TxnErrorInner::Mvcc(MvccError::from( + MvccErrorInner::KeyIsLocked(kvproto::kvrpcpb::LockInfo::default()), + )))) + }; + + let (ctx, rx) = create_test_lock_wait_ctx(); + // Nothing happens currently. + (ctx.get_callback_for_first_write_batch()).execute(ProcessResult::Res); + rx.recv_timeout(Duration::from_millis(20)).unwrap_err(); + (ctx.get_callback_for_blocked_key())(Err(SharedError::from(write_conflict()))); + let res = rx.recv().unwrap().unwrap_err(); + assert!(matches!( + &res, + StorageError(box StorageErrorInner::Txn(TxnError( + box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. })) + ))) + )); + // The tx should be dropped. + rx.recv().unwrap_err(); + // Nothing happens if the callback is double-called. + (ctx.get_callback_for_cancellation())(StorageError::from(key_is_locked())); + + let (ctx, rx) = create_test_lock_wait_ctx(); + (ctx.get_callback_for_cancellation())(StorageError::from(key_is_locked())); + let res = rx.recv().unwrap().unwrap_err(); + assert!(matches!( + &res, + StorageError(box StorageErrorInner::Txn(TxnError( + box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) + ))) + )); + // Nothing happens if the callback is double-called. + (ctx.get_callback_for_blocked_key())(Err(SharedError::from(write_conflict()))); + // The tx should be dropped. + rx.recv().unwrap_err(); + } +} diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs new file mode 100644 index 00000000000..a3312a4fdb2 --- /dev/null +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -0,0 +1,977 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This mod contains the [`LockWaitQueues`] for managing waiting and waking up +//! of `AcquirePessimisticLock` requests in lock-contention scenarios, and other +//! related accessories, including: +//! +//! - [`SharedError`]: A wrapper type to [`crate::storage::Error`] to allow the +//! error being shared in multiple places +//! - Related type aliases +//! - [`LockWaitEntry`]: which is used to represent lock-waiting requests in the +//! queue +//! - [`Box`]: The comparable wrapper of [`LockWaitEntry`] which +//! defines the priority ordering among lock-waiting requests +//! +//! Each key may have its own lock-waiting queue, which is a priority queue that +//! orders the entries with the order defined by +//! [`Box`]. +//! +//! There are be two kinds of `AcquirePessimisticLock` requests: +//! +//! * Requests in legacy mode: indicated by `allow_lock_with_conflict = false`. +//! A legacy request is woken up, it should return a `WriteConflict` +//! immediately to the client to tell the client to retry. Then, the remaining +//! lock-waiting entries should be woken up after delaying for +//! `wake-up-delay-duration` which is a configurable value. +//! * Resumable requests: indicated by `allow_lock_with_conflict = true`. This +//! kind of requests are allowed to lock even if there is write conflict, When +//! it's woken up after waiting for another lock, it can then resume execution +//! and try to acquire the lock again. No delayed waking up is necessary. +//! **Note that though the `LockWaitQueues` is designed to accept it, this +//! kind of requests are currently not implemented yet.** +//! +//! ## Details about delayed waking up +//! +//! The delayed waking-up is needed after waking up a request in legacy mode. +//! The reasons are: +//! +//! * The head of the queue (let's denote its belonging transaction by `T1`) is +//! woken up after the current lock being released, then the request will +//! return a `WriteConflict` error immediately, and the key is left unlocked. +//! It's possible that `T1` won't lock the key again. However, the other +//! waiting requests need releasing-lock event to be woken up. In this case, +//! we should not let them wait forever until timeout. +//! * When many transactions are blocked on the same key, and a transaction is +//! granted the lock after another releasing the lock, the transaction that's +//! blocking other transactions is changed. After cancelling the other +//! transactions and let them retry the `AcquirePessimisticLock` request, they +//! will be able to re-detect deadlocks with the latest information. +//! +//! To achieve this, after delaying for `wake-up-delay-duration` since the +//! latest waking-up event on the key, a call to +//! [`LockWaitQueues::delayed_notify_all`] will be made. However, since the +//! [`LockWaitQueues`] do not have its own thread pool, the user may receive a +//! future after calling some of the functions, and the user will be responsible +//! for executing the future in a suitable place. + +use std::{ + collections::BinaryHeap, + future::Future, + pin::Pin, + result::Result, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + +use dashmap; +use futures_util::compat::Future01CompatExt; +use kvproto::kvrpcpb; +use smallvec::SmallVec; +use sync_wrapper::SyncWrapper; +use tikv_util::{time::InstantExt, timer::GLOBAL_TIMER_HANDLE}; +use txn_types::{Key, TimeStamp}; + +use crate::storage::{ + errors::SharedError, + lock_manager::{lock_wait_context::LockWaitContextSharedState, LockManager, LockWaitToken}, + mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, + txn::Error as TxnError, + types::{PessimisticLockParameters, PessimisticLockRes}, + Error as StorageError, +}; + +pub type CallbackWithSharedError = Box) + Send + 'static>; +pub type PessimisticLockKeyCallback = CallbackWithSharedError; + +/// Represents an `AcquirePessimisticLock` request that's waiting for a lock, +/// and contains the request's parameters. +pub struct LockWaitEntry { + pub key: Key, + pub lock_hash: u64, + // TODO: Use term to filter out stale entries in the queue. + // pub term: Option, + pub parameters: PessimisticLockParameters, + pub lock_wait_token: LockWaitToken, + pub req_states: Option>, + pub legacy_wake_up_index: Option, + pub key_cb: Option>, +} + +impl PartialEq for LockWaitEntry { + fn eq(&self, other: &Self) -> bool { + self.parameters.start_ts == other.parameters.start_ts + } +} + +impl Eq for LockWaitEntry {} + +impl PartialOrd for LockWaitEntry { + fn partial_cmp(&self, other: &Self) -> Option { + // Reverse it since the std BinaryHeap is max heap and we want to pop the + // minimal. + other + .parameters + .start_ts + .partial_cmp(&self.parameters.start_ts) + } +} + +impl Ord for LockWaitEntry { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Reverse it since the std BinaryHeap is max heap and we want to pop the + // minimal. + other.parameters.start_ts.cmp(&self.parameters.start_ts) + } +} + +pub struct KeyLockWaitState { + #[allow(dead_code)] + current_lock: kvrpcpb::LockInfo, + + /// The counter of wake up events of legacy pessimistic lock requests + /// (`allow_lock_with_conflict == false`). When an lock wait entry is + /// pushed to the queue, it records the current counter. The purpose + /// is to mark the entries that needs to be woken up after delaying. + /// + /// Here is an example showing how it works (note that requests in + /// the example are all in legacy mode): + /// + /// Let's denote a lock-wait entry by `(start_ts, + /// legacy_wake_up_index)`. Consider there are three requests with + /// start_ts 20, 30, 40 respectively, and they are pushed to the + /// queue when the `KeyLockWaitState::legacy_wake_up_index` is 0. Then the + /// `KeyLockWaitState` is: + /// + /// ```text + /// legacy_wake_up_index: 0, queue: [(20, 0), (30, 0), (40, 0)] + /// ``` + /// + /// Then the lock on the key is released. We pops the first entry in the + /// queue to wake it up, and then schedule a call to + /// [`LockWaitQueues::delayed_notify_all`] after delaying for + /// `wake_up_delay_duration`. The current state becomes: + /// + /// ```text + /// legacy_wake_up_index: 1, queue: [(30, 0), (40, 0)] + /// ```` + /// + /// Here, if some other request arrives, one of them may successfully + /// acquire the lock and others are pushed to the queue. the state + /// becomes: + /// + /// ```text + /// legacy_wake_up_index: 1, queue: [(30, 0), (40, 0), (50, 1), (60, 1)] + /// ``` + /// + /// Then `wake_up_delay_duration` is elapsed since the previous waking up. + /// Here, we expect that the lock wait entries 30 and 40 can be woken + /// up, since they exists when the previous waking up occurs. But we + /// don't want to wake up later-arrived entries (50 and 60) since it + /// introduces useless pessimistic retries to transaction 50 and 60 when + /// they don't need to. The solution is, only wake up the entries that + /// has `entry.legacy_wake_up_index < + /// key_lock_wait_state.legacy_wake_up_index`. Therefore, we only wakes up + /// entries 30 and 40 who has `legacy_wake_up_index < 1`, while 50 + /// and 60 will be left untouched. + /// + /// When waking up resumable requests, the mechanism above won't take + /// effect. If a legacy request is woken up and triggered the mechanism, + /// and there is a resumable request in the queue, `delayed_notify_all` + /// will stop at the first resumable request it meets, pop it out, and + /// return it from a [`DelayedNotifyAllFuture`]. See + /// [`LockWaitQueues::pop_for_waking_up`]. + legacy_wake_up_index: usize, + queue: BinaryHeap>, + + /// The start_ts of the most recent waking up event. + last_conflict_start_ts: TimeStamp, + /// The commit_ts of the most recent waking up event. + last_conflict_commit_ts: TimeStamp, + + /// `(id, start_time, delay_duration)` + delayed_notify_all_state: Option<(u64, Instant, Arc)>, +} + +impl KeyLockWaitState { + fn new() -> Self { + Self { + current_lock: kvrpcpb::LockInfo::default(), + legacy_wake_up_index: 0, + queue: BinaryHeap::new(), + last_conflict_start_ts: TimeStamp::zero(), + last_conflict_commit_ts: TimeStamp::zero(), + delayed_notify_all_state: None, + } + } +} + +pub type DelayedNotifyAllFuture = Pin>> + Send>>; + +pub struct LockWaitQueueInner { + queue_map: dashmap::DashMap, + id_allocated: AtomicU64, +} + +#[derive(Clone)] +pub struct LockWaitQueues { + inner: Arc, + #[allow(dead_code)] + lock_mgr: L, +} + +impl LockWaitQueues { + pub fn new(lock_mgr: L) -> Self { + Self { + inner: Arc::new(LockWaitQueueInner { + queue_map: dashmap::DashMap::new(), + id_allocated: AtomicU64::new(1), + }), + lock_mgr, + } + } + + /// Enqueues a lock wait entry. The key is indicated by the `key` field of + /// the `lock_wait_entry`. The caller also needs to provide the + /// information of the current-holding lock. + pub fn push_lock_wait( + &self, + mut lock_wait_entry: Box, + current_lock: kvrpcpb::LockInfo, + ) { + let mut key_state = self + .inner + .queue_map + .entry(lock_wait_entry.key.clone()) + .or_insert_with(|| KeyLockWaitState::new()); + key_state.current_lock = current_lock; + + if lock_wait_entry.legacy_wake_up_index.is_none() { + lock_wait_entry.legacy_wake_up_index = Some(key_state.value().legacy_wake_up_index); + } + key_state.value_mut().queue.push(lock_wait_entry); + } + + /// Dequeues the head of the lock waiting queue of the specified key, + /// assuming the popped entry will be woken up. + /// + /// If it's waking up a legacy request and the queue is not empty, a future + /// will be returned and the caller will be responsible for executing it. + /// The future waits until `wake_up_delay_duration` is elapsed since the + /// most recent waking-up, and then wakes up all lock waiting entries that + /// exists at the time when the latest waking-up happens. The future + /// will return a `LockWaitEntry` if a resumable entry is popped out + /// from the queue while executing, and in this case the caller will be + /// responsible to wake it up. + pub fn pop_for_waking_up( + &self, + key: &Key, + conflicting_start_ts: TimeStamp, + conflicting_commit_ts: TimeStamp, + wake_up_delay_duration_ms: u64, + ) -> Option<(Box, Option)> { + self.pop_for_waking_up_impl( + key, + conflicting_start_ts, + conflicting_commit_ts, + Some(wake_up_delay_duration_ms), + ) + } + + fn pop_for_waking_up_impl( + &self, + key: &Key, + conflicting_start_ts: TimeStamp, + conflicting_commit_ts: TimeStamp, + wake_up_delay_duration_ms: Option, + ) -> Option<(Box, Option)> { + let mut result = None; + + // We don't want other threads insert any more entries between finding the + // queue is empty and removing the queue from the map. Wrap the logic + // within a call to `remove_if_mut` to avoid releasing lock during the + // procedure. + self.inner.queue_map.remove_if_mut(key, |_, v| { + v.last_conflict_start_ts = conflicting_start_ts; + v.last_conflict_commit_ts = conflicting_commit_ts; + + while let Some(lock_wait_entry) = v.queue.pop() { + if lock_wait_entry.req_states.as_ref().unwrap().is_finished() { + // Skip already cancelled entries. + continue; + } + + if !lock_wait_entry.parameters.allow_lock_with_conflict { + // If a pessimistic lock request in legacy mode is woken up, increase the + // counter. + v.legacy_wake_up_index += 1; + let notify_all_future = match wake_up_delay_duration_ms { + Some(delay) if !v.queue.is_empty() => { + self.handle_delayed_wake_up(v, key, delay) + } + _ => None, + }; + result = Some((lock_wait_entry, notify_all_future)); + } else { + result = Some((lock_wait_entry, None)); + } + break; + } + + // Remove the queue if it's emptied. + v.queue.is_empty() + }); + + result + } + + /// Schedule delayed waking up on the specified key. + /// + /// Returns a future if it's needed to spawn a new async task to do the + /// delayed waking up. The caller should be responsible for executing + /// it. + fn handle_delayed_wake_up( + &self, + key_lock_wait_state: &mut KeyLockWaitState, + key: &Key, + wake_up_delay_duration_ms: u64, + ) -> Option { + if let Some((_, start_time, delay_duration)) = + &mut key_lock_wait_state.delayed_notify_all_state + { + // There's already an async task spawned for handling delayed waking up on this + // key. Update its state to extend its delaying duration (until now + // + wake_up_delay_duration). + let new_delay_duration = + (start_time.saturating_elapsed().as_millis() as u64) + wake_up_delay_duration_ms; + delay_duration.store(new_delay_duration, Ordering::Release); + None + } else { + // It's needed to spawn a new async task for performing delayed waking up on + // this key. Return a future to let the caller execute it in a + // proper thread pool. + let notify_id = self.allocate_internal_id(); + let start_time = Instant::now(); + let delay_ms = Arc::new(AtomicU64::new(wake_up_delay_duration_ms)); + + key_lock_wait_state.delayed_notify_all_state = + Some((notify_id, start_time, delay_ms.clone())); + Some(Box::pin(self.clone().async_delayed_notify_all( + key.clone(), + start_time, + delay_ms, + notify_id, + ))) + } + } + + fn allocate_internal_id(&self) -> u64 { + self.inner.id_allocated.fetch_add(1, Ordering::SeqCst) + } + + async fn async_delayed_notify_all( + self, + key: Key, + start_time: Instant, + delay_ms: Arc, + notify_id: u64, + ) -> Option> { + let mut prev_delay_ms = 0; + // The delay duration may be extended by later waking-up events, by updating the + // value of `delay_ms`. So we loop until we find that the elapsed + // duration is larger than `delay_ms`. + loop { + let current_delay_ms = delay_ms.load(Ordering::Acquire); + if current_delay_ms == 0 { + // Cancelled. + return None; + } + + if current_delay_ms <= prev_delay_ms + || (start_time.saturating_elapsed().as_millis() as u64) >= current_delay_ms + { + // Timed out. + break; + } + + let deadline = start_time + Duration::from_millis(current_delay_ms); + + GLOBAL_TIMER_HANDLE.delay(deadline).compat().await.unwrap(); + + prev_delay_ms = current_delay_ms; + } + + self.delayed_notify_all(&key, notify_id) + } + + fn delayed_notify_all(&self, key: &Key, notify_id: u64) -> Option> { + let mut popped_lock_wait_entries = SmallVec::<[_; 4]>::new(); + + let mut woken_up_resumable_entry = None; + let mut conflicting_start_ts = TimeStamp::zero(); + let mut conflicting_commit_ts = TimeStamp::zero(); + + // We don't want other threads insert any more entries between finding the + // queue is empty and removing the queue from the map. Wrap the logic + // within a call to `remove_if_mut` to avoid releasing lock during the + // procedure. + self.inner.queue_map.remove_if_mut(key, |_, v| { + // The KeyLockWaitState of the key might have been removed from the map and then + // recreated. Skip. + if v.delayed_notify_all_state + .as_ref() + .map_or(true, |(id, ..)| *id != notify_id) + { + return false; + } + + // Clear the state which indicates the scheduled `delayed_notify_all` has + // finished. + v.delayed_notify_all_state = None; + + conflicting_start_ts = v.last_conflict_start_ts; + conflicting_commit_ts = v.last_conflict_commit_ts; + + let legacy_wake_up_index = v.legacy_wake_up_index; + + while let Some(front) = v.queue.peek() { + if front.req_states.as_ref().unwrap().is_finished() { + // Skip already cancelled entries. + v.queue.pop(); + continue; + } + if front + .legacy_wake_up_index + .map_or(false, |idx| idx >= legacy_wake_up_index) + { + // This entry is added after the legacy-wakeup that issued the current + // delayed_notify_all operation. Keep it and other remaining items in the queue. + break; + } + let lock_wait_entry = v.queue.pop().unwrap(); + if lock_wait_entry.parameters.allow_lock_with_conflict { + woken_up_resumable_entry = Some(lock_wait_entry); + break; + } + popped_lock_wait_entries.push(lock_wait_entry); + } + + // If the queue is empty, remove it from the map. + v.queue.is_empty() + }); + + // Call callbacks to cancel these entries here. + // TODO: Perhaps we'd better make it concurrent with scheduling the new command + // (if `woken_up_resumable_entry` is some) if there are too many. + for lock_wait_entry in popped_lock_wait_entries { + let lock_wait_entry = *lock_wait_entry; + let cb = lock_wait_entry.key_cb.unwrap().into_inner(); + let e = StorageError::from(TxnError::from(MvccError::from( + MvccErrorInner::WriteConflict { + start_ts: lock_wait_entry.parameters.start_ts, + conflict_start_ts: conflicting_start_ts, + conflict_commit_ts: conflicting_commit_ts, + key: lock_wait_entry.key.into_raw().unwrap(), + primary: lock_wait_entry.parameters.primary, + reason: kvrpcpb::WriteConflictReason::PessimisticRetry, + }, + ))); + cb(Err(e.into())); + } + + // Return the item to be woken up in resumable way. + woken_up_resumable_entry + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::mpsc::{channel, Receiver, RecvTimeoutError}, + time::Duration, + }; + + use super::*; + use crate::storage::{ + lock_manager::{lock_wait_context::LockWaitContext, DummyLockManager, WaitTimeout}, + txn::ErrorInner as TxnErrorInner, + ErrorInner as StorageErrorInner, StorageCallback, + }; + + struct TestLockWaitEntryHandle { + wake_up_rx: Receiver>, + cancel_cb: Box, + } + + impl TestLockWaitEntryHandle { + fn wait_for_result_timeout( + &self, + timeout: Duration, + ) -> Option> { + match self.wake_up_rx.recv_timeout(timeout) { + Ok(res) => Some(res), + Err(RecvTimeoutError::Timeout) => None, + Err(e) => panic!( + "unexpected error when receiving result of a LockWaitEntry: {:?}", + e + ), + } + } + + fn wait_for_result(self) -> Result { + self.wake_up_rx + .recv_timeout(Duration::from_secs(10)) + .unwrap() + } + + fn cancel(self) { + (self.cancel_cb)(); + } + } + + // Additionally add some helper functions to the LockWaitQueues for simplifying + // test code. + impl LockWaitQueues { + fn make_lock_info_pb(&self, key: &[u8], ts: impl Into) -> kvrpcpb::LockInfo { + let ts = ts.into(); + let mut lock_info = kvrpcpb::LockInfo::default(); + lock_info.set_lock_version(ts.into_inner()); + lock_info.set_lock_for_update_ts(ts.into_inner()); + lock_info.set_key(key.to_owned()); + lock_info.set_primary_lock(key.to_owned()); + lock_info + } + + fn make_mock_lock_wait_entry( + &self, + key: &[u8], + start_ts: impl Into, + lock_info_pb: kvrpcpb::LockInfo, + ) -> (Box, TestLockWaitEntryHandle) { + let start_ts = start_ts.into(); + let token = super::super::LockWaitToken(Some(1)); + let dummy_request_cb = StorageCallback::PessimisticLock(Box::new(|_| ())); + let dummy_ctx = LockWaitContext::new( + self.lock_mgr.clone(), + token, + start_ts, + start_ts, + dummy_request_cb, + false, + ); + + let parameters = PessimisticLockParameters { + pb_ctx: Default::default(), + primary: key.to_owned(), + start_ts, + lock_ttl: 1000, + for_update_ts: start_ts, + wait_timeout: Some(WaitTimeout::Default), + return_values: false, + min_commit_ts: 0.into(), + check_existence: false, + is_first_lock: false, + allow_lock_with_conflict: false, + }; + + let key = Key::from_raw(key); + let lock_hash = key.gen_hash(); + let (tx, rx) = channel(); + let lock_wait_entry = Box::new(LockWaitEntry { + key, + lock_hash, + parameters, + lock_wait_token: token, + req_states: Some(dummy_ctx.get_shared_states().clone()), + legacy_wake_up_index: None, + key_cb: Some(SyncWrapper::new(Box::new(move |res| tx.send(res).unwrap()))), + }); + + let cancel_callback = dummy_ctx.get_callback_for_cancellation(); + let cancel = move || { + cancel_callback(StorageError::from(TxnError::from(MvccError::from( + MvccErrorInner::KeyIsLocked(lock_info_pb), + )))) + }; + + ( + lock_wait_entry, + TestLockWaitEntryHandle { + wake_up_rx: rx, + cancel_cb: Box::new(cancel), + }, + ) + } + + fn mock_lock_wait( + &self, + key: &[u8], + start_ts: impl Into, + encountered_lock_ts: impl Into, + resumable: bool, + ) -> TestLockWaitEntryHandle { + let lock_info_pb = self.make_lock_info_pb(key, encountered_lock_ts); + let (mut entry, handle) = + self.make_mock_lock_wait_entry(key, start_ts, lock_info_pb.clone()); + entry.parameters.allow_lock_with_conflict = resumable; + self.push_lock_wait(entry, lock_info_pb); + handle + } + + /// Pop an entry from the queue of the specified key, but do not create + /// the future for delayed wake up. Used in tests that do not + /// care about the delayed wake up. + fn must_pop( + &self, + key: &[u8], + conflicting_start_ts: impl Into, + conflicting_commit_ts: impl Into, + ) -> Box { + let (entry, f) = self + .pop_for_waking_up_impl( + &Key::from_raw(key), + conflicting_start_ts.into(), + conflicting_commit_ts.into(), + None, + ) + .unwrap(); + assert!(f.is_none()); + entry + } + + fn must_pop_none( + &self, + key: &[u8], + conflicting_start_ts: impl Into, + conflicting_commit_ts: impl Into, + ) { + let res = self.pop_for_waking_up_impl( + &Key::from_raw(key), + conflicting_start_ts.into(), + conflicting_commit_ts.into(), + Some(1), + ); + assert!(res.is_none()); + } + + fn must_pop_with_delayed_notify( + &self, + key: &[u8], + conflicting_start_ts: impl Into, + conflicting_commit_ts: impl Into, + ) -> (Box, DelayedNotifyAllFuture) { + let (res, f) = self + .pop_for_waking_up_impl( + &Key::from_raw(key), + conflicting_start_ts.into(), + conflicting_commit_ts.into(), + Some(50), + ) + .unwrap(); + (res, f.unwrap()) + } + + fn must_pop_with_no_delayed_notify( + &self, + key: &[u8], + conflicting_start_ts: impl Into, + conflicting_commit_ts: impl Into, + ) -> Box { + let (res, f) = self + .pop_for_waking_up_impl( + &Key::from_raw(key), + conflicting_start_ts.into(), + conflicting_commit_ts.into(), + Some(50), + ) + .unwrap(); + assert!(f.is_none()); + res + } + + fn must_not_contain_key(&self, key: &[u8]) { + assert!(self.inner.queue_map.get(&Key::from_raw(key)).is_none()); + } + + fn must_have_next_entry(&self, key: &[u8], start_ts: impl Into) { + assert_eq!( + self.inner + .queue_map + .get(&Key::from_raw(key)) + .unwrap() + .queue + .peek() + .unwrap() + .parameters + .start_ts, + start_ts.into() + ); + } + + fn get_delayed_notify_id(&self, key: &[u8]) -> Option { + self.inner + .queue_map + .get(&Key::from_raw(key)) + .unwrap() + .delayed_notify_all_state + .as_ref() + .map(|(id, ..)| *id) + } + } + + impl LockWaitEntry { + fn check_key(&self, expected_key: &[u8]) -> &Self { + assert_eq!(self.key, Key::from_raw(expected_key)); + self + } + + fn check_start_ts(&self, expected_start_ts: impl Into) -> &Self { + assert_eq!(self.parameters.start_ts, expected_start_ts.into()); + self + } + } + + fn expect_write_conflict( + err: &StorageErrorInner, + expect_conflict_start_ts: impl Into, + expect_conflict_commit_ts: impl Into, + ) { + match err { + StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::WriteConflict { + conflict_start_ts, + conflict_commit_ts, + .. + }, + )))) => { + assert_eq!(*conflict_start_ts, expect_conflict_start_ts.into()); + assert_eq!(*conflict_commit_ts, expect_conflict_commit_ts.into()); + } + e => panic!("unexpected error: {:?}", e), + } + } + + #[test] + fn test_simple_push_pop() { + let queues = LockWaitQueues::new(DummyLockManager {}); + + queues.mock_lock_wait(b"k1", 10, 5, false); + queues.mock_lock_wait(b"k2", 11, 5, false); + + queues + .must_pop(b"k1", 5, 6) + .check_key(b"k1") + .check_start_ts(10); + queues.must_pop_none(b"k1", 5, 6); + queues.must_not_contain_key(b"k1"); + + queues + .must_pop(b"k2", 5, 6) + .check_key(b"k2") + .check_start_ts(11); + queues.must_pop_none(b"k2", 5, 6); + queues.must_not_contain_key(b"k2"); + } + + #[test] + fn test_popping_priority() { + let queues = LockWaitQueues::new(DummyLockManager {}); + + queues.mock_lock_wait(b"k1", 10, 5, false); + queues.mock_lock_wait(b"k1", 20, 5, false); + queues.mock_lock_wait(b"k1", 12, 5, false); + queues.mock_lock_wait(b"k1", 13, 5, false); + // Duplication is possible considering network issues and RPC retrying. + queues.mock_lock_wait(b"k1", 12, 5, false); + + // Ordered by start_ts + for &expected_start_ts in &[10u64, 12, 12, 13, 20] { + queues + .must_pop(b"k1", 5, 6) + .check_key(b"k1") + .check_start_ts(expected_start_ts); + } + + queues.must_not_contain_key(b"k1"); + } + + #[test] + fn test_dropping_cancelled_entries() { + let queues = LockWaitQueues::new(DummyLockManager {}); + + let h10 = queues.mock_lock_wait(b"k1", 10, 5, false); + let h11 = queues.mock_lock_wait(b"k1", 11, 5, false); + queues.mock_lock_wait(b"k1", 12, 5, false); + let h13 = queues.mock_lock_wait(b"k1", 13, 5, false); + queues.mock_lock_wait(b"k1", 14, 5, false); + + h10.cancel(); + h11.cancel(); + h13.cancel(); + + for &expected_start_ts in &[12u64, 14] { + queues + .must_pop(b"k1", 5, 6) + .check_start_ts(expected_start_ts); + } + queues.must_not_contain_key(b"k1"); + } + + #[tokio::test] + async fn test_delayed_notify_all() { + let queues = LockWaitQueues::new(DummyLockManager {}); + + queues.mock_lock_wait(b"k1", 8, 5, false); + + let handles1 = vec![ + queues.mock_lock_wait(b"k1", 11, 5, false), + queues.mock_lock_wait(b"k1", 12, 5, false), + queues.mock_lock_wait(b"k1", 13, 5, false), + ]; + + // Current queue: [8, 11, 12, 13] + + let (entry, delay_wake_up_future) = queues.must_pop_with_delayed_notify(b"k1", 5, 6); + entry.check_key(b"k1").check_start_ts(8); + + // Current queue: [11*, 12*, 13*] (Items marked with * means it has + // legacy_wake_up_index less than that in KeyLockWaitState, so it might + // be woken up when calling delayed_notify_all). + + let handles2 = vec![ + queues.mock_lock_wait(b"k1", 14, 5, false), + queues.mock_lock_wait(b"k1", 15, 5, true), + queues.mock_lock_wait(b"k1", 16, 5, false), + ]; + + // Current queue: [11*, 12*, 13*, 14, 15, 16] + + assert!( + handles1[0] + .wait_for_result_timeout(Duration::from_millis(100)) + .is_none() + ); + + // Wakes up transaction 11 to 13, and cancels them. + assert!(delay_wake_up_future.await.is_none()); + assert!(queues.get_delayed_notify_id(b"k1").is_none()); + handles1 + .into_iter() + .for_each(|h| expect_write_conflict(&h.wait_for_result().unwrap_err().0, 5, 6)); + // 14 is not woken up. + assert!( + handles2[0] + .wait_for_result_timeout(Duration::from_millis(100)) + .is_none() + ); + + // Current queue: [14, 15, 16] + + queues.mock_lock_wait(b"k1", 9, 5, false); + // Current queue: [9, 14, 15, 16] + + // 9 will be woken up and delayed wake up should be scheduled. After delaying, + // 14 to 16 should be all woken up later if they are all not resumable. + // However since 15 is resumable, it will only wake up 14 and return 15 + // through the result of the `delay_wake_up_future`. + let (entry, delay_wake_up_future) = queues.must_pop_with_delayed_notify(b"k1", 7, 8); + entry.check_key(b"k1").check_start_ts(9); + + // Current queue: [14*, 15*, 16*] + + queues.mock_lock_wait(b"k1", 17, 5, false); + let handle18 = queues.mock_lock_wait(b"k1", 18, 5, false); + + // Current queue: [14*, 15*, 16*, 17, 18] + + // Wakes up 14, and stops at 15 which is resumable. Then, 15 should be returned + // and the caller should be responsible for waking it up. + let entry15 = delay_wake_up_future.await.unwrap(); + entry15.check_key(b"k1").check_start_ts(15); + + // Current queue: [16*, 17, 18] + + let mut it = handles2.into_iter(); + // Receive 14. + expect_write_conflict(&it.next().unwrap().wait_for_result().unwrap_err().0, 7, 8); + // 15 is not woken up. + assert!( + it.next() + .unwrap() + .wait_for_result_timeout(Duration::from_millis(100)) + .is_none() + ); + // Neither did 16. + let handle16 = it.next().unwrap(); + assert!( + handle16 + .wait_for_result_timeout(Duration::from_millis(100)) + .is_none() + ); + + queues.must_have_next_entry(b"k1", 16); + + // Call delayed_notify_all when the key does not have + // `delayed_notify_all_state`. This case may happen when the key is + // removed and recreated in the map. Nothing would happen. + assert!(queues.get_delayed_notify_id(b"k1").is_none()); + assert!( + queues + .delayed_notify_all(&Key::from_raw(b"k1"), 1) + .is_none() + ); + queues.must_have_next_entry(b"k1", 16); + assert!( + handle16 + .wait_for_result_timeout(Duration::from_millis(100)) + .is_none() + ); + + // Current queue: [16*, 17, 18] + + let (entry, delayed_wake_up_future) = queues.must_pop_with_delayed_notify(b"k1", 7, 8); + entry.check_key(b"k1").check_start_ts(16); + queues.must_have_next_entry(b"k1", 17); + let notify_id = queues.get_delayed_notify_id(b"k1").unwrap(); + // Call `delayed_notify_all` with a different ID. Nothing happens. + assert!( + queues + .delayed_notify_all(&Key::from_raw(b"k1"), notify_id - 1) + .is_none() + ); + queues.must_have_next_entry(b"k1", 17); + + // Current queue: [17*, 18*] + + // Don't need to create new future if there already exists one for the key. + let entry = queues.must_pop_with_no_delayed_notify(b"k1", 9, 10); + entry.check_key(b"k1").check_start_ts(17); + queues.must_have_next_entry(b"k1", 18); + + // Current queue: [18*] + + queues.mock_lock_wait(b"k1", 19, 5, false); + // Current queue: [18*, 19] + assert!(delayed_wake_up_future.await.is_none()); + // 18 will be cancelled with ts of the latest wake-up event. + expect_write_conflict(&handle18.wait_for_result().unwrap_err().0, 9, 10); + // Current queue: [19] + + // Don't need to create new future if the queue is cleared. + let entry = queues.must_pop_with_no_delayed_notify(b"k1", 9, 10); + entry.check_key(b"k1").check_start_ts(19); + // Current queue: empty + queues.must_not_contain_key(b"k1"); + + // Calls delayed_notify_all on keys that not exists (maybe deleted due to + // completely waking up). Nothing would happen. + assert!( + queues + .delayed_notify_all(&Key::from_raw(b"k1"), 1) + .is_none() + ); + queues.must_not_contain_key(b"k1"); + } +} diff --git a/src/storage/lock_manager.rs b/src/storage/lock_manager/mod.rs similarity index 90% rename from src/storage/lock_manager.rs rename to src/storage/lock_manager/mod.rs index 79a9d0572f3..235a31c3710 100644 --- a/src/storage/lock_manager.rs +++ b/src/storage/lock_manager/mod.rs @@ -10,6 +10,9 @@ use crate::{ storage::{txn::ProcessResult, types::StorageCallback}, }; +pub mod lock_wait_context; +pub mod lock_waiting_queue; + #[derive(Clone, Copy, PartialEq, Debug, Default)] pub struct Lock { pub ts: TimeStamp, @@ -64,6 +67,18 @@ impl From for WaitTimeout { } } +/// Uniquely identifies a lock-waiting request in a `LockManager`. +/// +/// Not used yet, but necessary for implementing `LockWaitQueues`. +#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] +pub struct LockWaitToken(pub Option); + +impl LockWaitToken { + pub fn is_valid(&self) -> bool { + self.0.is_some() + } +} + /// `LockManager` manages transactions waiting for locks held by other /// transactions. It has responsibility to handle deadlocks between /// transactions. diff --git a/src/storage/types.rs b/src/storage/types.rs index 70cd7d2d991..c8303787a41 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -8,6 +8,7 @@ use kvproto::kvrpcpb; use txn_types::{Key, Value}; use crate::storage::{ + lock_manager::WaitTimeout, mvcc::{Lock, LockType, TimeStamp, Write, WriteType}, txn::ProcessResult, Callback, Result, @@ -121,6 +122,31 @@ pub struct PrewriteResult { pub one_pc_commit_ts: TimeStamp, } +#[cfg_attr(test, derive(Default))] +pub struct PessimisticLockParameters { + pub pb_ctx: kvrpcpb::Context, + pub primary: Vec, + pub start_ts: TimeStamp, + pub lock_ttl: u64, + pub for_update_ts: TimeStamp, + pub wait_timeout: Option, + pub return_values: bool, + pub min_commit_ts: TimeStamp, + pub check_existence: bool, + pub is_first_lock: bool, + + /// Whether it's allowed for an pessimistic lock request to acquire the lock + /// even there is write conflict (i.e. the latest version's `commit_ts` is + /// greater than the current request's `for_update_ts`. + /// + /// When this is true, it's also inferred that the request is resumable, + /// which means, if such a request encounters a lock of another + /// transaction and it waits for the lock, it can resume executing and + /// continue trying to acquire the lock when it's woken up. Also see: + /// [`super::lock_manager::lock_waiting_queue`] + pub allow_lock_with_conflict: bool, +} + #[derive(Clone, Debug, PartialEq)] pub enum PessimisticLockRes { /// The previous value is loaded while handling the `AcquirePessimisticLock` From c48b0cfa8630d949a6e32a14a74fad7fdbc78c02 Mon Sep 17 00:00:00 2001 From: lijie Date: Thu, 29 Sep 2022 18:59:44 +0800 Subject: [PATCH 0252/1149] bump master version to v6.4.0-alpha (#13499) close tikv/tikv#13563 bump master version to v6.4.0-alpha Signed-off-by: lijie Co-authored-by: Yilin Chen --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 821e15edc18..25897f0a8a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6144,7 +6144,7 @@ dependencies = [ [[package]] name = "tikv" -version = "6.2.0-alpha" +version = "6.4.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index f51e2ddd303..545ee9380a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "6.2.0-alpha" +version = "6.4.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 1f0b9bf70f2b91f85deb4a298ee74d06d1da0e9d Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 29 Sep 2022 19:49:45 -0700 Subject: [PATCH 0253/1149] raftstore-v2: add basic apply (#13495) ref tikv/tikv#12842 This PR adds the implementation of apply for v2. In v2, we don't need to batch writes across regions, so there is no need to use batch system. Instead, future pool is used to simplify implementations. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/batch-system/src/lib.rs | 2 +- components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/batch/apply.rs | 184 --------- components/raftstore-v2/src/batch/mod.rs | 2 - components/raftstore-v2/src/batch/store.rs | 32 +- components/raftstore-v2/src/fsm/apply.rs | 130 ++++--- components/raftstore-v2/src/fsm/mod.rs | 2 +- components/raftstore-v2/src/fsm/peer.rs | 7 +- .../raftstore-v2/src/operation/command/mod.rs | 249 +++++++++++- .../src/operation/command/write/mod.rs | 81 +++- components/raftstore-v2/src/operation/mod.rs | 2 +- .../raftstore-v2/src/operation/query/mod.rs | 15 +- components/raftstore-v2/src/raft/apply.rs | 101 ++++- components/raftstore-v2/src/raft/peer.rs | 25 +- .../src/router/internal_message.rs | 15 +- .../raftstore-v2/tests/failpoints/mod.rs | 4 + .../tests/failpoints/test_basic_write.rs | 105 ++++++ .../tests/integrations/cluster.rs | 356 ++++++++++++++++++ .../raftstore-v2/tests/integrations/mod.rs | 330 +--------------- .../tests/integrations/test_basic_write.rs | 63 +++- .../tests/integrations/test_life.rs | 7 +- .../tests/integrations/test_read.rs | 12 +- .../tests/integrations/test_status.rs | 6 +- components/raftstore/src/store/fsm/apply.rs | 9 +- 25 files changed, 1086 insertions(+), 655 deletions(-) delete mode 100644 components/raftstore-v2/src/batch/apply.rs create mode 100644 components/raftstore-v2/tests/failpoints/test_basic_write.rs create mode 100644 components/raftstore-v2/tests/integrations/cluster.rs diff --git a/Cargo.lock b/Cargo.lock index 25897f0a8a8..2091ea3d4f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4247,6 +4247,7 @@ dependencies = [ "engine_traits", "error_code", "fail", + "file_system", "futures 0.3.15", "keys", "kvproto", diff --git a/components/batch-system/src/lib.rs b/components/batch-system/src/lib.rs index 9ca2953972d..9a307a534ac 100644 --- a/components/batch-system/src/lib.rs +++ b/components/batch-system/src/lib.rs @@ -16,7 +16,7 @@ pub use self::{ PollHandler, Poller, PoolState, }, config::Config, - fsm::{Fsm, Priority}, + fsm::{Fsm, FsmScheduler, Priority}, mailbox::{BasicMailbox, Mailbox}, router::Router, }; diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index c7d920e4011..09fa707c408 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -35,6 +35,7 @@ crossbeam = "0.8" engine_traits = { path = "../engine_traits" } error_code = { path = "../error_code" } fail = "0.5" +file_system = { path = "../file_system" } futures = { version = "0.3", features = ["compat"] } keys = { path = "../keys", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } diff --git a/components/raftstore-v2/src/batch/apply.rs b/components/raftstore-v2/src/batch/apply.rs deleted file mode 100644 index ebc7696aa64..00000000000 --- a/components/raftstore-v2/src/batch/apply.rs +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -//! This module contains all structs related to apply batch system. -//! -//! After being started, each thread will have its own `ApplyPoller` and poll -//! using `ApplyContext`. For more information, see the documentation of -//! batch-system. - -use std::{ - ops::{Deref, DerefMut}, - sync::Arc, -}; - -use batch_system::{ - BasicMailbox, BatchRouter, BatchSystem, HandleResult, HandlerBuilder, PollHandler, -}; -use engine_traits::{KvEngine, RaftEngine}; -use raftstore::store::{ - fsm::{ - apply::{ControlFsm, ControlMsg}, - ApplyNotifier, - }, - util::LatencyInspector, - Config, -}; -use slog::Logger; -use tikv_util::config::{Tracker, VersionTrack}; - -use crate::{ - fsm::{ApplyFsm, ApplyFsmDelegate}, - raft::{Apply, Peer}, - router::ApplyTask, -}; - -pub struct ApplyContext { - cfg: Config, -} - -impl ApplyContext { - pub fn new(cfg: Config) -> Self { - ApplyContext { cfg } - } -} - -pub struct ApplyPoller { - apply_task_buf: Vec, - pending_latency_inspect: Vec, - apply_ctx: ApplyContext, - cfg_tracker: Tracker, -} - -impl ApplyPoller { - pub fn new(apply_ctx: ApplyContext, cfg_tracker: Tracker) -> ApplyPoller { - Self { - apply_task_buf: Vec::new(), - pending_latency_inspect: Vec::new(), - apply_ctx, - cfg_tracker, - } - } - - /// Updates the internal buffer to match the latest configuration. - fn apply_buf_capacity(&mut self) { - let new_cap = self.messages_per_tick(); - tikv_util::set_vec_capacity(&mut self.apply_task_buf, new_cap); - } - - #[inline] - fn messages_per_tick(&self) -> usize { - self.apply_ctx.cfg.messages_per_tick - } -} - -impl PollHandler, ControlFsm> for ApplyPoller -where - EK: KvEngine, -{ - fn begin(&mut self, _batch_size: usize, update_cfg: F) - where - for<'a> F: FnOnce(&'a batch_system::Config), - { - let cfg = self.cfg_tracker.any_new().map(|c| c.clone()); - if let Some(cfg) = cfg { - let last_messages_per_tick = self.messages_per_tick(); - self.apply_ctx.cfg = cfg; - if self.apply_ctx.cfg.messages_per_tick != last_messages_per_tick { - self.apply_buf_capacity(); - } - update_cfg(&self.apply_ctx.cfg.apply_batch_system); - } - } - - fn handle_control(&mut self, control: &mut ControlFsm) -> Option { - control.handle_messages(&mut self.pending_latency_inspect); - for inspector in self.pending_latency_inspect.drain(..) { - // TODO: support apply duration. - inspector.finish(); - } - Some(0) - } - - fn handle_normal( - &mut self, - normal: &mut impl DerefMut>, - ) -> batch_system::HandleResult { - let received_cnt = normal.recv(&mut self.apply_task_buf); - let handle_result = if received_cnt == self.messages_per_tick() { - HandleResult::KeepProcessing - } else { - HandleResult::stop_at(0, false) - }; - let mut delegate = ApplyFsmDelegate::new(normal, &mut self.apply_ctx); - delegate.handle_msgs(&mut self.apply_task_buf); - handle_result - } - - fn end(&mut self, batch: &mut [Option>>]) { - // TODO: support memory trace - } -} - -pub struct ApplyPollerBuilder { - cfg: Arc>, -} - -impl ApplyPollerBuilder { - pub fn new(cfg: Arc>) -> Self { - Self { cfg } - } -} - -impl HandlerBuilder, ControlFsm> for ApplyPollerBuilder { - type Handler = ApplyPoller; - - fn build(&mut self, priority: batch_system::Priority) -> Self::Handler { - let apply_ctx = ApplyContext::new(self.cfg.value().clone()); - let cfg_tracker = self.cfg.clone().tracker("apply".to_string()); - ApplyPoller::new(apply_ctx, cfg_tracker) - } -} - -/// Batch system for applying logs pipeline. -pub struct ApplySystem { - system: BatchSystem, ControlFsm>, -} - -impl Deref for ApplySystem { - type Target = BatchSystem, ControlFsm>; - - fn deref(&self) -> &BatchSystem, ControlFsm> { - &self.system - } -} - -impl DerefMut for ApplySystem { - fn deref_mut(&mut self) -> &mut BatchSystem, ControlFsm> { - &mut self.system - } -} - -impl ApplySystem { - pub fn schedule_all<'a, ER: RaftEngine>(&self, peers: impl Iterator>) { - let mut mailboxes = Vec::with_capacity(peers.size_hint().0); - for peer in peers { - let apply = Apply::new(peer); - let (tx, fsm) = ApplyFsm::new(apply); - mailboxes.push(( - peer.region_id(), - BasicMailbox::new(tx, fsm, self.router().state_cnt().clone()), - )); - } - self.router().register_all(mailboxes); - } -} - -pub type ApplyRouter = BatchRouter, ControlFsm>; - -pub fn create_apply_batch_system(cfg: &Config) -> (ApplyRouter, ApplySystem) { - let (control_tx, control_fsm) = ControlFsm::new(); - let (router, system) = - batch_system::create_system(&cfg.apply_batch_system, control_tx, control_fsm); - let system = ApplySystem { system }; - (router, system) -} diff --git a/components/raftstore-v2/src/batch/mod.rs b/components/raftstore-v2/src/batch/mod.rs index 7e00932d1e1..7daeebaa8f0 100644 --- a/components/raftstore-v2/src/batch/mod.rs +++ b/components/raftstore-v2/src/batch/mod.rs @@ -5,8 +5,6 @@ //! StoreSystem is used for polling raft state machines, ApplySystem is used for //! applying logs. -mod apply; mod store; -pub(crate) use apply::ApplyContext; pub use store::{create_store_batch_system, StoreContext, StoreRouter, StoreSystem}; diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1bb17ff2c85..bd777477bf0 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -14,6 +14,7 @@ use batch_system::{ use collections::HashMap; use crossbeam::channel::{Sender, TrySendError}; use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; +use file_system::{set_io_type, IoType}; use futures::{compat::Future01CompatExt, FutureExt}; use kvproto::{ metapb::Store, @@ -30,14 +31,15 @@ use tikv_util::{ config::{Tracker, VersionTrack}, defer, future::poll_future_notify, + sys::SysQuota, time::Instant as TiInstant, timer::SteadyTimer, worker::{Scheduler, Worker}, + yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, }; use time::Timespec; -use super::apply::{create_apply_batch_system, ApplyPollerBuilder, ApplyRouter, ApplySystem}; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, raft::{Peer, Storage}, @@ -67,6 +69,7 @@ pub struct StoreContext { pub store_meta: Arc>>, pub engine: ER, pub tablet_factory: Arc>, + pub apply_pool: FuturePool, pub log_fetch_scheduler: Scheduler, } @@ -216,6 +219,7 @@ struct StorePollerBuilder { router: StoreRouter, log_fetch_scheduler: Scheduler, write_senders: WriteSenders, + apply_pool: FuturePool, logger: Logger, store_meta: Arc>>, } @@ -233,6 +237,16 @@ impl StorePollerBuilder { logger: Logger, store_meta: Arc>>, ) -> Self { + let pool_size = cfg.value().apply_batch_system.pool_size; + let max_pool_size = std::cmp::max( + pool_size, + std::cmp::max(4, SysQuota::cpu_cores_quota() as usize), + ); + let apply_pool = YatpPoolBuilder::new(DefaultTicker::default()) + .thread_count(1, pool_size, max_pool_size) + .after_start(move || set_io_type(IoType::ForegroundWrite)) + .name_prefix("apply") + .build_future_pool(); StorePollerBuilder { cfg, store_id, @@ -241,6 +255,7 @@ impl StorePollerBuilder { trans, router, log_fetch_scheduler, + apply_pool, logger, write_senders: store_writers.senders(), store_meta, @@ -310,6 +325,7 @@ where store_meta: self.store_meta.clone(), engine: self.engine.clone(), tablet_factory: self.tablet_factory.clone(), + apply_pool: self.apply_pool.clone(), log_fetch_scheduler: self.log_fetch_scheduler.clone(), }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); @@ -337,8 +353,6 @@ impl Default for Workers { /// The system used for polling Raft activities. pub struct StoreSystem { system: BatchSystem, StoreFsm>, - apply_router: ApplyRouter, - apply_system: ApplySystem, workers: Option>, logger: Logger, } @@ -380,8 +394,6 @@ impl StoreSystem { ); self.workers = Some(workers); let peers = builder.init()?; - self.apply_system - .schedule_all(peers.values().map(|pair| pair.1.peer())); // Choose a different name so we know what version is actually used. rs stands // for raft store. let tag = format!("rs-{}", store_id); @@ -403,10 +415,6 @@ impl StoreSystem { router.force_send(addr, PeerMsg::Start).unwrap(); } router.send_control(StoreMsg::Start).unwrap(); - - let apply_poller_builder = ApplyPollerBuilder::new(cfg); - self.apply_system - .spawn("apply".to_owned(), apply_poller_builder); Ok(()) } @@ -416,7 +424,8 @@ impl StoreSystem { } let mut workers = self.workers.take().unwrap(); - self.apply_system.shutdown(); + // TODO: gracefully shutdown future pool + self.system.shutdown(); workers.store_writers.shutdown(); @@ -493,11 +502,8 @@ where let (store_tx, store_fsm) = StoreFsm::new(cfg, store_id, logger.clone()); let (router, system) = batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm); - let (apply_router, apply_system) = create_apply_batch_system(cfg); let system = StoreSystem { system, - apply_router, - apply_system, workers: None, logger: logger.clone(), }; diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 21646be4738..b37d0b33518 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -1,74 +1,102 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use batch_system::Fsm; +use std::{ + pin::Pin, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + task::{Context, Poll}, +}; + +use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; use engine_traits::KvEngine; -use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; +use futures::{Future, StreamExt}; +use kvproto::raft_serverpb::RegionLocalState; +use slog::Logger; +use tikv_util::mpsc::future::{self, Receiver, Sender, WakePolicy}; -use crate::{batch::ApplyContext, raft::Apply, router::ApplyTask}; +use crate::{ + raft::Apply, + router::{ApplyRes, ApplyTask, PeerMsg}, + tablet::CachedTablet, +}; -pub struct ApplyFsm { - apply: Apply, - receiver: Receiver, - is_stopped: bool, +/// A trait for reporting apply result. +/// +/// Using a trait to make signiture simpler. +pub trait ApplyResReporter { + fn report(&self, apply_res: ApplyRes); } -impl ApplyFsm { - pub fn new(apply: Apply) -> (LooseBoundedSender, Box) { - let (tx, rx) = mpsc::loose_bounded(usize::MAX); - ( - tx, - Box::new(Self { - apply, - receiver: rx, - is_stopped: false, - }), - ) - } - - /// Fetches messages to `apply_task_buf`. It will stop when the buffer - /// capacity is reached or there is no more pending messages. - /// - /// Returns how many messages are fetched. - pub fn recv(&mut self, apply_task_buf: &mut Vec) -> usize { - let l = apply_task_buf.len(); - for i in l..apply_task_buf.capacity() { - match self.receiver.try_recv() { - Ok(msg) => apply_task_buf.push(msg), - Err(e) => { - if let TryRecvError::Disconnected = e { - self.is_stopped = true; - } - return i - l; - } - } - } - apply_task_buf.capacity() - l +impl, S: FsmScheduler> ApplyResReporter for Mailbox { + fn report(&self, apply_res: ApplyRes) { + // TODO: check shutdown. + self.force_send(PeerMsg::ApplyRes(apply_res)).unwrap(); } } -impl Fsm for ApplyFsm { - type Message = ApplyTask; +/// Schedule task to `ApplyFsm`. +pub struct ApplyScheduler { + sender: Sender, +} +impl ApplyScheduler { #[inline] - fn is_stopped(&self) -> bool { - self.is_stopped + pub fn send(&self, task: ApplyTask) { + // TODO: ignore error when shutting down. + self.sender.send(task).unwrap(); } } -pub struct ApplyFsmDelegate<'a, EK: KvEngine> { - fsm: &'a mut ApplyFsm, - apply_ctx: &'a mut ApplyContext, +pub struct ApplyFsm { + apply: Apply, + receiver: Receiver, } -impl<'a, EK: KvEngine> ApplyFsmDelegate<'a, EK> { - pub fn new(fsm: &'a mut ApplyFsm, apply_ctx: &'a mut ApplyContext) -> Self { - Self { fsm, apply_ctx } +impl ApplyFsm { + pub fn new( + region_state: RegionLocalState, + res_reporter: R, + remote_tablet: CachedTablet, + logger: Logger, + ) -> (ApplyScheduler, Self) { + let (tx, rx) = future::unbounded(WakePolicy::Immediately); + let apply = Apply::new(region_state, res_reporter, remote_tablet, logger); + ( + ApplyScheduler { sender: tx }, + Self { + apply, + receiver: rx, + }, + ) } +} - pub fn handle_msgs(&self, apply_task_buf: &mut Vec) { - for task in apply_task_buf.drain(..) { - // TODO: handle the tasks. +impl ApplyFsm { + pub async fn handle_all_tasks(&mut self) { + loop { + let mut task = match self.receiver.next().await { + Some(t) => t, + None => return, + }; + loop { + match task { + // TODO: flush by buffer size. + ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, + } + + // TODO: yield after some time. + + // Perhaps spin sometime? + match self.receiver.try_recv() { + Ok(t) => task = t, + Err(TryRecvError::Empty) => break, + Err(TryRecvError::Disconnected) => return, + } + } + self.apply.flush(); } } } diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 191f629900a..b3d0e0483ba 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -9,6 +9,6 @@ mod apply; mod peer; mod store; -pub use apply::{ApplyFsm, ApplyFsmDelegate}; +pub use apply::{ApplyFsm, ApplyResReporter, ApplyScheduler}; pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; pub use store::{Store, StoreFsm, StoreFsmDelegate, StoreMeta}; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 8443ef265a8..cd8775359fc 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -14,8 +14,10 @@ use tikv_util::{ is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver, Sender}, time::{duration_to_sec, Instant}, + yatp_pool::FuturePool, }; +use super::ApplyFsm; use crate::{ batch::StoreContext, raft::{Peer, Storage}, @@ -176,6 +178,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, fn on_start(&mut self) { self.schedule_tick(PeerTick::Raft); + if self.fsm.peer.storage().is_initialized() { + self.fsm.peer.schedule_apply_fsm(self.store_ctx); + } } #[inline] @@ -215,7 +220,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.on_command(cmd.request, cmd.ch) } PeerMsg::Tick(tick) => self.on_tick(tick), - PeerMsg::ApplyRes(res) => unimplemented!(), + PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(res), PeerMsg::Start => self.on_start(), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index fa3c89dce74..bef599d5239 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -4,17 +4,39 @@ //! all replicas and executed in the same order. Typical commands include: //! - normal writes like put, delete, etc. //! - admin commands like split, compact, etc. +//! +//! General proceessing is: +//! - Propose a command to the leader via PeerMsg::Command, +//! - The leader batch up commands and replicates them to followers, +//! - Once they are replicated to majority, leader considers it committed and +//! send to another thread for execution via +//! `schedule_apply_committed_entries`, +//! - The apply thread executes the commands in buffer, and write to LSM tree +//! via `flush`, +//! - Applied result are sent back to peer fsm, and update memory state in +//! `on_apply_res`. use std::cmp; -use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest}; +use batch_system::{Fsm, FsmScheduler, Mailbox}; +use engine_traits::{KvEngine, RaftEngine, WriteBatch, WriteOptions}; +use kvproto::{ + raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader}, + raft_serverpb::RegionLocalState, +}; use protobuf::Message; use raft::eraftpb::Entry; use raftstore::{ store::{ - fsm::Proposal, local_metrics::RaftMetrics, metrics::*, msg::ErrorCallback, util, - WriteCallback, + cmd_resp, + fsm::{ + apply::{APPLY_WB_SHRINK_SIZE, DEFAULT_APPLY_WB_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP}, + Proposal, + }, + local_metrics::RaftMetrics, + metrics::*, + msg::ErrorCallback, + util, WriteCallback, }, Error, Result, }; @@ -23,15 +45,33 @@ use tikv_util::{box_err, time::monotonic_raw_now}; use crate::{ batch::StoreContext, - fsm::{PeerFsm, PeerFsmDelegate}, - raft::Peer, - router::CmdResChannel, + fsm::{ApplyFsm, ApplyResReporter, PeerFsmDelegate}, + raft::{Apply, Peer}, + router::{ApplyRes, ApplyTask, CmdResChannel, PeerMsg}, }; mod write; pub use write::{SimpleWriteDecoder, SimpleWriteEncoder}; +use self::write::SimpleWrite; + +#[derive(Debug)] +pub struct CommittedEntries { + /// Entries need to be applied. Note some entries may not be included for + /// flow control. + entry_and_proposals: Vec<(Entry, Vec)>, +} + +fn new_response(header: &RaftRequestHeader) -> RaftCmdResponse { + let mut resp = RaftCmdResponse::default(); + if !header.get_uuid().is_empty() { + let uuid = header.get_uuid().to_vec(); + resp.mut_header().set_uuid(uuid); + } + resp +} + impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { #[inline] pub fn on_command(&mut self, req: RaftCmdRequest, ch: CmdResChannel) { @@ -48,6 +88,25 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { } impl Peer { + /// Schedule an apply fsm to apply logs in the background. + /// + /// Everytime a snapshot is applied or peer is just started, it will + /// schedule a new apply fsm. The old fsm will stopped automatically + /// when the old apply scheduler is dropped. + #[inline] + pub fn schedule_apply_fsm(&mut self, store_ctx: &mut StoreContext) { + let region_state = self.storage().region_state().clone(); + let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); + let tablet = self.tablet().clone(); + let logger = self.logger.clone(); + let (apply_scheduler, mut apply_fsm) = ApplyFsm::new(region_state, mailbox, tablet, logger); + store_ctx + .apply_pool + .spawn(async move { apply_fsm.handle_all_tasks().await }) + .unwrap(); + self.set_apply_scheduler(apply_scheduler); + } + #[inline] fn validate_command(&self, req: &RaftCmdRequest, metrics: &mut RaftMetrics) -> Result<()> { if let Err(e) = util::check_store_id(req, self.peer().get_store_id()) { @@ -147,19 +206,171 @@ impl Peer { // on_committed callback if necessary. p.cb.notify_committed(); } - entry_and_proposals.push((e, proposal)); + entry_and_proposals.push((e, proposal.map_or_else(Vec::new, |p| p.cb))); } } else { - entry_and_proposals = committed_entries.into_iter().map(|e| (e, None)).collect(); - } - // Note that the `commit_index` and `commit_term` here may be used to - // forward the commit index after being restarted. So it must be less - // than or equal to persisted index. - let commit_index = cmp::min( - self.raft_group().raft.raft_log.committed, - self.raft_group().raft.raft_log.persisted, - ); - let commit_term = self.raft_group().raft.raft_log.term(commit_index).unwrap(); - // TODO: schedule apply task + entry_and_proposals = committed_entries.into_iter().map(|e| (e, vec![])).collect(); + } + // Unlike v1, v2 doesn't need to persist commit index and commit term. The + // point of persist commit index/term of raft apply state is to recover commit + // index when the writes to raft engine is lost but writes to kv engine is + // persisted. But in v2, writes to raft engine must be persisted before + // memtables in kv engine is flushed. + let apply = CommittedEntries { + entry_and_proposals, + }; + self.apply_scheduler() + .send(ApplyTask::CommittedEntries(apply)); + } + + pub fn on_apply_res(&mut self, apply_res: ApplyRes) { + if !self.serving() { + return; + } + // It must just applied a snapshot. + if apply_res.applied_index < self.entry_storage().first_index() { + // TODO: handle admin side effects like split/merge. + return; + } + self.raft_group_mut() + .advance_apply_to(apply_res.applied_index); + let is_leader = self.is_leader(); + let entry_storage = self.entry_storage_mut(); + entry_storage + .apply_state_mut() + .set_applied_index(apply_res.applied_index); + entry_storage.set_applied_term(apply_res.applied_term); + if !is_leader { + entry_storage.compact_entry_cache(apply_res.applied_index + 1); + // TODO: handle read. + } else { + // TODO: handle read. + } + } +} + +impl Apply { + #[inline] + pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { + fail::fail_point!("APPLY_COMMITTED_ENTRIES"); + for (e, ch) in ce.entry_and_proposals { + if !e.get_data().is_empty() { + let mut set_save_point = false; + if let Some(wb) = self.write_batch_mut() { + wb.set_save_point(); + set_save_point = true; + } + let resp = match self.apply_entry(&e).await { + Ok(resp) => resp, + Err(e) => { + if let Some(wb) = self.write_batch_mut() { + if set_save_point { + wb.rollback_to_save_point().unwrap(); + } else { + wb.clear(); + } + } + cmd_resp::new_error(e) + } + }; + self.callbacks_mut().push((ch, resp)); + } else { + assert!(ch.is_empty()); + } + // Flush may be triggerred in the middle, so always update the index and term. + self.set_apply_progress(e.index, e.term); + } + } + + #[inline] + async fn apply_entry(&mut self, entry: &Entry) -> Result { + match SimpleWriteDecoder::new(entry.get_data()) { + Ok(decoder) => { + util::compare_region_epoch( + decoder.header().get_region_epoch(), + self.region_state().get_region(), + false, + true, + true, + )?; + let res = Ok(new_response(decoder.header())); + for req in decoder { + match req { + SimpleWrite::Put(put) => self.apply_put(put.cf, put.key, put.value)?, + SimpleWrite::Delete(delete) => self.apply_delete(delete.cf, delete.key)?, + SimpleWrite::DeleteRange(dr) => self.apply_delete_range( + dr.cf, + dr.start_key, + dr.end_key, + dr.notify_only, + )?, + } + } + res + } + Err(req) => { + util::check_region_epoch(&req, self.region_state().get_region(), true)?; + if req.has_admin_request() { + // TODO: implement admin request. + } else { + for r in req.get_requests() { + match r.get_cmd_type() { + // These three writes should all use the new codec. Keep them here for + // backward compatibility. + CmdType::Put => { + let put = r.get_put(); + self.apply_put(put.get_cf(), put.get_key(), put.get_value())?; + } + CmdType::Delete => { + let delete = r.get_delete(); + self.apply_delete(delete.get_cf(), delete.get_key())?; + } + CmdType::DeleteRange => { + let dr = r.get_delete_range(); + self.apply_delete_range( + dr.get_cf(), + dr.get_start_key(), + dr.get_end_key(), + dr.get_notify_only(), + )?; + } + _ => unimplemented!(), + } + } + } + Ok(new_response(req.get_header())) + } + } + } + + #[inline] + pub fn flush(&mut self) { + if let Some(wb) = self.write_batch_mut() && !wb.is_empty() { + let mut write_opt = WriteOptions::default(); + write_opt.set_disable_wal(true); + if let Err(e) = wb.write_opt(&write_opt) { + panic!("failed to write data: {:?}", self.logger.list()); + } + if wb.data_size() <= APPLY_WB_SHRINK_SIZE { + wb.clear(); + } else { + self.write_batch_mut().take(); + } + } + let callbacks = self.callbacks_mut(); + for (ch, resp) in callbacks.drain(..) { + ch.set_result(resp); + } + if callbacks.capacity() > SHRINK_PENDING_CMD_QUEUE_CAP { + callbacks.shrink_to(SHRINK_PENDING_CMD_QUEUE_CAP); + } + let mut apply_res = ApplyRes::default(); + let (index, term) = self.apply_progress(); + apply_res.applied_index = index; + apply_res.applied_term = term; + if self.reset_state_changed() { + apply_res.region_state = Some(self.region_state().clone()); + } + self.res_reporter().report(apply_res); } } diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 30dfb3bb753..76692b6af0a 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,24 +1,31 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_cmdpb::RaftCmdRequest; +use engine_traits::{KvEngine, Mutable, RaftEngine, CF_DEFAULT}; +use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; use raftstore::{ store::{ cmd_resp, fsm::{apply, Proposal, MAX_PROPOSAL_SIZE_RATIO}, msg::ErrorCallback, - WriteCallback, + util, WriteCallback, }, Result, }; use tikv_util::Either; -use crate::{batch::StoreContext, raft::Peer, router::CmdResChannel}; +use crate::{ + batch::StoreContext, + raft::{Apply, Peer}, + router::CmdResChannel, +}; mod simple_write; pub use simple_write::{SimpleWriteDecoder, SimpleWriteEncoder}; +pub use self::simple_write::SimpleWrite; +use super::CommittedEntries; + impl Peer { #[inline] pub fn on_write_command( @@ -31,7 +38,7 @@ impl Peer { apply::notify_req_region_removed(self.region_id(), ch); return; } - if let Some(encoder) = self.raw_write_encoder_mut() { + if let Some(encoder) = self.simple_write_encoder_mut() { match encoder.amend(req) { Ok(()) => { encoder.add_response_channel(ch); @@ -55,7 +62,7 @@ impl Peer { Ok(mut encoder) => { encoder.add_response_channel(ch); self.set_has_ready(); - self.raw_write_encoder_mut().replace(encoder); + self.simple_write_encoder_mut().replace(encoder); } Err(req) => { let res = self.propose_command(ctx, req); @@ -83,10 +90,70 @@ impl Peer { } pub fn propose_pending_command(&mut self, ctx: &mut StoreContext) { - if let Some(encoder) = self.raw_write_encoder_mut().take() { + if let Some(encoder) = self.simple_write_encoder_mut().take() { let (data, chs) = encoder.encode(); let res = self.propose(ctx, data); self.post_propose_write(ctx, res, chs); } } } + +impl Apply { + #[inline] + pub fn apply_put(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + util::check_key_in_region(key, self.region_state().get_region())?; + let res = if cf.is_empty() || cf == CF_DEFAULT { + // TODO: use write_vector + self.write_batch_or_default().put(key, value) + } else { + self.write_batch_or_default().put_cf(cf, key, value) + }; + res.unwrap_or_else(|e| { + panic!( + "{:?} failed to write ({}, {}) {}: {:?}", + self.logger.list(), + log_wrappers::Value::key(key), + log_wrappers::Value::value(value), + cf, + e + ); + }); + fail::fail_point!("APPLY_PUT", |_| Err(raftstore::Error::Other( + "aborted by failpoint".into() + ))); + Ok(()) + } + + #[inline] + pub fn apply_delete(&mut self, cf: &str, key: &[u8]) -> Result<()> { + util::check_key_in_region(key, self.region_state().get_region())?; + let res = if cf.is_empty() || cf == CF_DEFAULT { + // TODO: use write_vector + self.write_batch_or_default().delete(key) + } else { + self.write_batch_or_default().delete_cf(cf, key) + }; + res.unwrap_or_else(|e| { + panic!( + "{:?} failed to delete {} {}: {:?}", + self.logger.list(), + log_wrappers::Value::key(key), + cf, + e + ); + }); + Ok(()) + } + + #[inline] + pub fn apply_delete_range( + &mut self, + cf: &str, + start_key: &[u8], + end_key: &[u8], + notify_only: bool, + ) -> Result<()> { + /// TODO: reuse the same delete as split/merge. + Ok(()) + } +} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index fd1a4c79600..ebef0cf0595 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -5,6 +5,6 @@ mod life; mod query; mod ready; -pub use command::{SimpleWriteDecoder, SimpleWriteEncoder}; +pub use command::{CommittedEntries, SimpleWriteDecoder, SimpleWriteEncoder}; pub use life::DestroyProgress; pub use ready::AsyncWriter; diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index dbee230a7a5..14cedc7b212 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -11,6 +11,8 @@ //! Follower's read index and replica read is implemenented replica module. //! Leader's read index and lease renew is implemented in lease module. +use std::{cmp, sync::Arc}; + use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ @@ -354,12 +356,23 @@ impl Peer { /// Query internal states for debugging purpose. pub fn on_query_debug_info(&self, ch: DebugInfoChannel) { let entry_storage = self.storage().entry_storage(); - let meta = RegionMeta::new( + let mut meta = RegionMeta::new( self.storage().region_state(), entry_storage.apply_state(), GroupState::Ordered, self.raft_group().status(), ); + // V2 doesn't persist commit index and term, fill them with in-memory values. + meta.raft_apply.commit_index = cmp::min( + self.raft_group().raft.raft_log.committed, + self.raft_group().raft.raft_log.persisted, + ); + meta.raft_apply.commit_term = self + .raft_group() + .raft + .raft_log + .term(meta.raft_apply.commit_index) + .unwrap(); ch.set_result(meta); } } diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 09646965bda..b210890ac40 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -1,23 +1,108 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; +use raftstore::store::fsm::apply::DEFAULT_APPLY_WB_SIZE; use slog::Logger; use super::Peer; -use crate::tablet::CachedTablet; +use crate::{ + fsm::ApplyResReporter, + router::{ApplyRes, CmdResChannel}, + tablet::CachedTablet, +}; /// Apply applies all the committed commands to kv db. -pub struct Apply { - tablet: CachedTablet, - logger: Logger, +pub struct Apply { + remote_tablet: CachedTablet, + tablet: EK, + write_batch: Option, + + callbacks: Vec<(Vec, RaftCmdResponse)>, + + applied_index: u64, + applied_term: u64, + + region_state: RegionLocalState, + state_changed: bool, + + res_reporter: R, + pub(crate) logger: Logger, } -impl Apply { +impl Apply { #[inline] - pub fn new(peer: &Peer) -> Self { + pub fn new( + region_state: RegionLocalState, + res_reporter: R, + mut remote_tablet: CachedTablet, + logger: Logger, + ) -> Self { Apply { - tablet: peer.tablet().clone(), - logger: peer.logger.clone(), + tablet: remote_tablet.latest().unwrap().clone(), + remote_tablet, + write_batch: None, + callbacks: vec![], + applied_index: 0, + applied_term: 0, + region_state, + state_changed: false, + res_reporter, + logger, + } + } + + #[inline] + pub fn res_reporter(&self) -> &R { + &self.res_reporter + } + + #[inline] + pub fn callbacks_mut(&mut self) -> &mut Vec<(Vec, RaftCmdResponse)> { + &mut self.callbacks + } + + #[inline] + pub fn write_batch_mut(&mut self) -> &mut Option { + &mut self.write_batch + } + + #[inline] + pub fn write_batch_or_default(&mut self) -> &mut EK::WriteBatch { + if self.write_batch.is_none() { + self.write_batch = Some(self.tablet.write_batch_with_cap(DEFAULT_APPLY_WB_SIZE)); } + self.write_batch.as_mut().unwrap() + } + + #[inline] + pub fn set_apply_progress(&mut self, index: u64, term: u64) { + self.applied_index = index; + self.applied_term = term; + } + + #[inline] + pub fn apply_progress(&self) -> (u64, u64) { + (self.applied_index, self.applied_term) + } + + #[inline] + pub fn region_state(&self) -> &RegionLocalState { + &self.region_state + } + + #[inline] + pub fn reset_state_changed(&mut self) -> bool { + std::mem::take(&mut self.state_changed) + } + + /// Publish the tablet so that it can be used by read worker. + /// + /// Note, during split/merge, lease is expired explicitly and read is + /// forbidden. So publishing it immediately is OK. + #[inline] + pub fn publish_tablet(&mut self, tablet: EK) { + self.remote_tablet.set(tablet.clone()); + self.tablet = tablet; } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 57e14bad02d..8b69a52f623 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -31,9 +31,9 @@ use tikv_util::{ Either, }; -use super::storage::Storage; +use super::{storage::Storage, Apply}; use crate::{ - batch::StoreContext, + fsm::{ApplyFsm, ApplyScheduler}, operation::{AsyncWriter, DestroyProgress, SimpleWriteEncoder}, router::{CmdResChannel, QueryResChannel}, tablet::{self, CachedTablet}, @@ -55,6 +55,7 @@ pub struct Peer { /// than protobuf. raw_write_encoder: Option, proposals: ProposalQueue>, + apply_scheduler: Option, /// Set to true if any side effect needs to be handled. has_ready: bool, @@ -121,15 +122,18 @@ impl Peer { None }; + let tablet = CachedTablet::new(tablet); + let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { - tablet: CachedTablet::new(tablet), + tablet, peer_cache: vec![], raw_write_encoder: None, proposals: ProposalQueue::new(region_id, raft_group.raft.id), async_writer: AsyncWriter::new(region_id, peer_id), + apply_scheduler: None, has_ready: false, destroy_progress: DestroyProgress::None, raft_group, @@ -364,12 +368,13 @@ impl Peer { self.entry_storage().applied_term() == self.term() } - pub fn raw_write_encoder_mut(&mut self) -> &mut Option { + #[inline] + pub fn simple_write_encoder_mut(&mut self) -> &mut Option { &mut self.raw_write_encoder } #[inline] - pub fn raw_write_encoder(&self) -> &Option { + pub fn simple_write_encoder(&self) -> &Option { &self.raw_write_encoder } @@ -382,4 +387,14 @@ impl Peer { pub fn proposals_mut(&mut self) -> &mut ProposalQueue> { &mut self.proposals } + + #[inline] + pub fn apply_scheduler(&self) -> &ApplyScheduler { + self.apply_scheduler.as_ref().unwrap() + } + + #[inline] + pub fn set_apply_scheduler(&mut self, apply_scheduler: ApplyScheduler) { + self.apply_scheduler = Some(apply_scheduler); + } } diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 05653e4fdcc..28a93e897af 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,6 +1,17 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -pub enum ApplyTask {} +use kvproto::raft_serverpb::RegionLocalState; + +use crate::operation::CommittedEntries; #[derive(Debug)] -pub enum ApplyRes {} +pub enum ApplyTask { + CommittedEntries(CommittedEntries), +} + +#[derive(Debug, Default)] +pub struct ApplyRes { + pub applied_index: u64, + pub applied_term: u64, + pub region_state: Option, +} diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index 88dfd0a81aa..26403f2f0a3 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -5,4 +5,8 @@ #![feature(custom_test_frameworks)] #![test_runner(test_util::run_failpoint_tests)] +#[allow(dead_code)] +#[path = "../integrations/cluster.rs"] +mod cluster; +mod test_basic_write; mod test_bootstrap; diff --git a/components/raftstore-v2/tests/failpoints/test_basic_write.rs b/components/raftstore-v2/tests/failpoints/test_basic_write.rs new file mode 100644 index 00000000000..5014e0efd3e --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_basic_write.rs @@ -0,0 +1,105 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{assert_matches::assert_matches, time::Duration}; + +use engine_traits::{OpenOptions, Peekable, TabletFactory}; +use futures::executor::block_on; +use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; +use raftstore::store::{INIT_EPOCH_CONF_VER, INIT_EPOCH_VER}; +use raftstore_v2::router::PeerMsg; +use tikv_util::store::new_peer; + +use crate::cluster::Cluster; + +/// Check if write batch is correctly maintained during apply. +#[test] +fn test_write_batch_rollback() { + let cluster = Cluster::default(); + let router = cluster.router(0); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(2); + let epoch = req.mut_header().mut_region_epoch(); + epoch.set_version(INIT_EPOCH_VER); + epoch.set_conf_ver(INIT_EPOCH_CONF_VER); + req.mut_header().set_peer(new_peer(1, 3)); + let mut put_req = Request::default(); + put_req.set_cmd_type(CmdType::Put); + put_req.mut_put().set_key(b"key".to_vec()); + put_req.mut_put().set_value(b"value".to_vec()); + req.mut_requests().push(put_req.clone()); + + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + // Make several entries to batch in apply thread. + fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); + + let tablet_factory = cluster.node(0).tablet_factory(); + let tablet = tablet_factory + .open_tablet(2, None, OpenOptions::default().set_cache_only(true)) + .unwrap(); + + // Good proposal should be committed. + let (msg, mut sub0) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + assert!(block_on(sub0.wait_proposed())); + assert!(block_on(sub0.wait_committed())); + + // If the write batch is correctly initialized, next write should not contain + // last result. + req.mut_requests()[0].mut_put().set_key(b"key1".to_vec()); + let (msg, mut sub1) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + assert!(block_on(sub1.wait_proposed())); + assert!(block_on(sub1.wait_committed())); + + fail::cfg("APPLY_PUT", "1*return()").unwrap(); + // Wake up and sleep in next committed entry. + fail::remove("APPLY_COMMITTED_ENTRIES"); + // First apply will fail due to aborted. If write batch is initialized + // correctly, correct response can be returned. + let resp = block_on(sub0.result()).unwrap(); + assert!( + resp.get_header() + .get_error() + .get_message() + .contains("aborted"), + "{:?}", + resp + ); + let resp = block_on(sub1.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert_matches!(tablet.get_value(b"key"), Ok(None)); + assert_eq!(tablet.get_value(b"key1").unwrap().unwrap(), b"value"); + + fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); + + // Trigger error again, so an initialized write batch should be rolled back. + req.mut_requests()[0].mut_put().set_key(b"key2".to_vec()); + let (msg, mut sub0) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + assert!(block_on(sub0.wait_proposed())); + assert!(block_on(sub0.wait_committed())); + + // If the write batch is correctly rollbacked, next write should not contain + // last result. + req.mut_requests()[0].mut_put().set_key(b"key3".to_vec()); + let (msg, mut sub1) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + assert!(block_on(sub1.wait_proposed())); + assert!(block_on(sub1.wait_committed())); + + fail::cfg("APPLY_PUT", "1*return()").unwrap(); + fail::remove("APPLY_COMMITTED_ENTRIES"); + let resp = block_on(sub0.result()).unwrap(); + assert!( + resp.get_header() + .get_error() + .get_message() + .contains("aborted"), + "{:?}", + resp + ); + let resp = block_on(sub1.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert_matches!(tablet.get_value(b"key2"), Ok(None)); + assert_eq!(tablet.get_value(b"key3").unwrap().unwrap(), b"value"); +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs new file mode 100644 index 00000000000..caaa5120325 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -0,0 +1,356 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + ops::{Deref, DerefMut}, + path::Path, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, Mutex, + }, + thread, + time::{Duration, Instant}, +}; + +use crossbeam::channel::{self, Receiver, Sender}; +use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, TestTabletFactoryV2}, + raft::RaftTestEngine, +}; +use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; +use futures::executor::block_on; +use kvproto::{ + metapb::Store, + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_serverpb::RaftMessage, +}; +use pd_client::RpcClient; +use raftstore::store::{region_meta::RegionMeta, Config, Transport, RAFT_INIT_LOG_INDEX}; +use raftstore_v2::{ + create_store_batch_system, + router::{DebugInfoChannel, PeerMsg, QueryResult}, + Bootstrap, StoreMeta, StoreRouter, StoreSystem, +}; +use slog::{o, Logger}; +use tempfile::TempDir; +use test_pd::mocker::Service; +use tikv_util::config::{ReadableDuration, VersionTrack}; + +#[derive(Clone)] +pub struct TestRouter(StoreRouter); + +impl Deref for TestRouter { + type Target = StoreRouter; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for TestRouter { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl TestRouter { + pub fn query(&self, region_id: u64, req: RaftCmdRequest) -> Option { + let (msg, sub) = PeerMsg::raft_query(req); + self.send(region_id, msg).unwrap(); + block_on(sub.result()) + } + + pub fn must_query_debug_info(&self, region_id: u64, timeout: Duration) -> Option { + let timer = Instant::now(); + while timer.elapsed() < timeout { + let (ch, sub) = DebugInfoChannel::pair(); + let msg = PeerMsg::QueryDebugInfo(ch); + let res = self.send(region_id, msg); + if res.is_err() { + thread::sleep(Duration::from_millis(10)); + continue; + } + return block_on(sub.result()); + } + None + } + + pub fn command(&self, region_id: u64, req: RaftCmdRequest) -> Option { + let (msg, sub) = PeerMsg::raft_command(req); + self.send(region_id, msg).unwrap(); + block_on(sub.result()) + } + + pub fn wait_applied_to_current_term(&self, region_id: u64, timeout: Duration) { + let mut now = Instant::now(); + let deadline = now + timeout; + let mut res = None; + while now < deadline { + res = self.must_query_debug_info(region_id, deadline - now); + if let Some(info) = &res { + // If term matches and apply to commit index, then it must apply to current + // term. + if info.raft_apply.applied_index == info.raft_apply.commit_index + && info.raft_apply.commit_term == info.raft_status.hard_state.term + { + return; + } + } + thread::sleep(Duration::from_millis(10)); + now = Instant::now(); + } + panic!( + "region {} is not applied to current term, {:?}", + region_id, res + ); + } +} + +pub struct RunningState { + pub raft_engine: RaftTestEngine, + pub factory: Arc, + pub system: StoreSystem, + pub cfg: Arc>, + pub transport: TestTransport, +} + +impl RunningState { + fn new( + pd_client: &RpcClient, + path: &Path, + cfg: Arc>, + transport: TestTransport, + logger: &Logger, + ) -> (TestRouter, Self) { + let cf_opts = ALL_CFS + .iter() + .copied() + .map(|cf| (cf, CfOptions::default())) + .collect(); + let factory = Arc::new(TestTabletFactoryV2::new( + path, + DbOptions::default(), + cf_opts, + )); + let raft_engine = + engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) + .unwrap(); + let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client, logger.clone()); + let store_id = bootstrap.bootstrap_store().unwrap(); + let mut store = Store::default(); + store.set_id(store_id); + if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { + if factory.exists(region.get_id(), RAFT_INIT_LOG_INDEX) { + factory + .destroy_tablet(region.get_id(), RAFT_INIT_LOG_INDEX) + .unwrap(); + } + factory + .open_tablet( + region.get_id(), + Some(RAFT_INIT_LOG_INDEX), + OpenOptions::default().set_create_new(true), + ) + .unwrap(); + } + + let (router, mut system) = create_store_batch_system::( + &cfg.value(), + store_id, + logger.clone(), + ); + + let store_meta = Arc::new(Mutex::new(StoreMeta::::new())); + system + .start( + store_id, + cfg.clone(), + raft_engine.clone(), + factory.clone(), + transport.clone(), + &router, + store_meta, + ) + .unwrap(); + + let state = Self { + raft_engine, + factory, + system, + cfg, + transport, + }; + (TestRouter(router), state) + } +} + +impl Drop for RunningState { + fn drop(&mut self) { + self.system.shutdown(); + } +} + +pub struct TestNode { + pd_client: RpcClient, + path: TempDir, + running_state: Option, + logger: Logger, +} + +impl TestNode { + fn with_pd(pd_server: &test_pd::Server) -> TestNode { + let logger = slog_global::borrow_global().new(o!()); + let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); + let path = TempDir::new().unwrap(); + + TestNode { + pd_client, + path, + running_state: None, + logger, + } + } + + fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { + let (router, state) = + RunningState::new(&self.pd_client, self.path.path(), cfg, trans, &self.logger); + self.running_state = Some(state); + router + } + + pub fn tablet_factory(&self) -> &Arc { + &self.running_state().unwrap().factory + } + + fn stop(&mut self) { + self.running_state.take(); + } + + fn restart(&mut self) -> TestRouter { + let state = self.running_state().unwrap(); + let prev_transport = state.transport.clone(); + let cfg = state.cfg.clone(); + self.stop(); + self.start(cfg, prev_transport) + } + + pub fn running_state(&self) -> Option<&RunningState> { + self.running_state.as_ref() + } +} + +impl Drop for TestNode { + fn drop(&mut self) { + self.stop(); + } +} + +#[derive(Clone)] +pub struct TestTransport { + tx: Sender, + flush_cnt: Arc, +} + +pub fn new_test_transport() -> (TestTransport, Receiver) { + let (tx, rx) = channel::unbounded(); + let flush_cnt = Default::default(); + (TestTransport { tx, flush_cnt }, rx) +} + +impl Transport for TestTransport { + fn send(&mut self, msg: RaftMessage) -> raftstore_v2::Result<()> { + let _ = self.tx.send(msg); + Ok(()) + } + + fn set_store_allowlist(&mut self, _stores: Vec) {} + + fn need_flush(&self) -> bool { + !self.tx.is_empty() + } + + fn flush(&mut self) { + self.flush_cnt.fetch_add(1, Ordering::SeqCst); + } +} + +// TODO: remove following when we finally integrate it in tikv-server binary. +pub fn v2_default_config() -> Config { + let mut config = Config::default(); + config.store_io_pool_size = 1; + config +} + +/// Disable all ticks, so test case can schedule manually. +pub fn disable_all_auto_ticks(cfg: &mut Config) { + cfg.raft_base_tick_interval = ReadableDuration::ZERO; + cfg.raft_log_gc_tick_interval = ReadableDuration::ZERO; + cfg.raft_log_compact_sync_interval = ReadableDuration::ZERO; + cfg.raft_engine_purge_interval = ReadableDuration::ZERO; + cfg.split_region_check_tick_interval = ReadableDuration::ZERO; + cfg.region_compact_check_interval = ReadableDuration::ZERO; + cfg.pd_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.pd_store_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.snap_mgr_gc_tick_interval = ReadableDuration::ZERO; + cfg.lock_cf_compact_interval = ReadableDuration::ZERO; + cfg.peer_stale_state_check_interval = ReadableDuration::ZERO; + cfg.consistency_check_interval = ReadableDuration::ZERO; + cfg.report_region_flow_interval = ReadableDuration::ZERO; + cfg.check_leader_lease_interval = ReadableDuration::ZERO; + cfg.merge_check_tick_interval = ReadableDuration::ZERO; + cfg.cleanup_import_sst_interval = ReadableDuration::ZERO; + cfg.inspect_interval = ReadableDuration::ZERO; + cfg.report_min_resolved_ts_interval = ReadableDuration::ZERO; + cfg.reactive_memory_lock_tick_interval = ReadableDuration::ZERO; + cfg.report_region_buckets_tick_interval = ReadableDuration::ZERO; + cfg.check_long_uncommitted_interval = ReadableDuration::ZERO; +} + +pub struct Cluster { + pd_server: test_pd::Server, + nodes: Vec, + receivers: Vec>, + routers: Vec, +} + +impl Default for Cluster { + fn default() -> Cluster { + Cluster::with_node_count(1) + } +} + +impl Cluster { + pub fn with_node_count(count: usize) -> Self { + let pd_server = test_pd::Server::new(1); + let mut cluster = Cluster { + pd_server, + nodes: vec![], + receivers: vec![], + routers: vec![], + }; + let mut cfg = v2_default_config(); + disable_all_auto_ticks(&mut cfg); + for _ in 1..=count { + let mut node = TestNode::with_pd(&cluster.pd_server); + let (tx, rx) = new_test_transport(); + let router = node.start(Arc::new(VersionTrack::new(cfg.clone())), tx); + cluster.nodes.push(node); + cluster.receivers.push(rx); + cluster.routers.push(router); + } + cluster + } + + pub fn restart(&mut self, offset: usize) { + let router = self.nodes[offset].restart(); + self.routers[offset] = router; + } + + pub fn node(&self, offset: usize) -> &TestNode { + &self.nodes[offset] + } + + pub fn router(&self, offset: usize) -> TestRouter { + self.routers[offset].clone() + } +} diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index 5c5fc02b489..db37c7cbf64 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -4,337 +4,9 @@ #![feature(assert_matches)] #![feature(custom_test_frameworks)] #![test_runner(test_util::run_tests)] -// TODO: remove following when tests can be run. -#![allow(dead_code)] -#![allow(unused_imports)] - -use std::{ - ops::{Deref, DerefMut}, - path::Path, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, - }, - thread, - time::{Duration, Instant}, -}; - -use crossbeam::channel::{self, Receiver, Sender}; -use engine_test::{ - ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, TestTabletFactoryV2}, - raft::RaftTestEngine, -}; -use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; -use futures::executor::block_on; -use kvproto::{ - metapb::Store, - raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, - raft_serverpb::RaftMessage, -}; -use pd_client::RpcClient; -use raftstore::store::{region_meta::RegionMeta, Config, Transport, RAFT_INIT_LOG_INDEX}; -use raftstore_v2::{ - create_store_batch_system, - router::{DebugInfoChannel, PeerMsg, QueryResult}, - Bootstrap, StoreMeta, StoreRouter, StoreSystem, -}; -use slog::{o, Logger}; -use tempfile::TempDir; -use test_pd::mocker::Service; -use tikv_util::config::{ReadableDuration, VersionTrack}; +mod cluster; mod test_basic_write; mod test_life; mod test_read; mod test_status; - -#[derive(Clone)] -struct TestRouter(StoreRouter); - -impl Deref for TestRouter { - type Target = StoreRouter; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for TestRouter { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -impl TestRouter { - fn query(&self, region_id: u64, req: RaftCmdRequest) -> Option { - let (msg, sub) = PeerMsg::raft_query(req); - self.send(region_id, msg).unwrap(); - block_on(sub.result()) - } - - fn must_query_debug_info(&self, region_id: u64, timeout: Duration) -> Option { - let timer = Instant::now(); - while timer.elapsed() < timeout { - let (ch, sub) = DebugInfoChannel::pair(); - let msg = PeerMsg::QueryDebugInfo(ch); - if self.send(region_id, msg).is_err() { - thread::sleep(Duration::from_millis(10)); - continue; - } - return block_on(sub.result()); - } - None - } - - fn command(&self, region_id: u64, req: RaftCmdRequest) -> Option { - let (msg, sub) = PeerMsg::raft_command(req); - self.send(region_id, msg).unwrap(); - block_on(sub.result()) - } -} - -struct RunningState { - raft_engine: RaftTestEngine, - factory: Arc, - system: StoreSystem, - cfg: Arc>, - transport: TestTransport, -} - -impl RunningState { - fn new( - pd_client: &RpcClient, - path: &Path, - cfg: Arc>, - transport: TestTransport, - logger: &Logger, - ) -> (TestRouter, Self) { - let cf_opts = ALL_CFS - .iter() - .copied() - .map(|cf| (cf, CfOptions::default())) - .collect(); - let factory = Arc::new(TestTabletFactoryV2::new( - path, - DbOptions::default(), - cf_opts, - )); - let raft_engine = - engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) - .unwrap(); - let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client, logger.clone()); - let store_id = bootstrap.bootstrap_store().unwrap(); - let mut store = Store::default(); - store.set_id(store_id); - if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { - if factory.exists(region.get_id(), RAFT_INIT_LOG_INDEX) { - factory - .destroy_tablet(region.get_id(), RAFT_INIT_LOG_INDEX) - .unwrap(); - } - factory - .open_tablet( - region.get_id(), - Some(RAFT_INIT_LOG_INDEX), - OpenOptions::default().set_create_new(true), - ) - .unwrap(); - } - - let (router, mut system) = create_store_batch_system::( - &cfg.value(), - store_id, - logger.clone(), - ); - - let store_meta = Arc::new(Mutex::new(StoreMeta::::new())); - system - .start( - store_id, - cfg.clone(), - raft_engine.clone(), - factory.clone(), - transport.clone(), - &router, - store_meta, - ) - .unwrap(); - - let state = Self { - raft_engine, - factory, - system, - cfg, - transport, - }; - (TestRouter(router), state) - } -} - -impl Drop for RunningState { - fn drop(&mut self) { - self.system.shutdown(); - } -} - -struct TestNode { - pd_client: RpcClient, - path: TempDir, - running_state: Option, - logger: Logger, -} - -impl TestNode { - fn with_pd(pd_server: &test_pd::Server) -> TestNode { - let logger = slog_global::borrow_global().new(o!()); - let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); - let path = TempDir::new().unwrap(); - - TestNode { - pd_client, - path, - running_state: None, - logger, - } - } - - fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { - let (router, state) = - RunningState::new(&self.pd_client, self.path.path(), cfg, trans, &self.logger); - self.running_state = Some(state); - router - } - - fn config(&self) -> &Arc> { - &self.running_state.as_ref().unwrap().cfg - } - - fn stop(&mut self) { - self.running_state.take(); - } - - fn restart(&mut self) -> TestRouter { - let state = self.running_state.as_ref().unwrap(); - let prev_transport = state.transport.clone(); - let cfg = state.cfg.clone(); - self.stop(); - self.start(cfg, prev_transport) - } -} - -impl Drop for TestNode { - fn drop(&mut self) { - self.stop(); - } -} - -#[derive(Clone)] -pub struct TestTransport { - tx: Sender, - flush_cnt: Arc, -} - -fn new_test_transport() -> (TestTransport, Receiver) { - let (tx, rx) = channel::unbounded(); - let flush_cnt = Default::default(); - (TestTransport { tx, flush_cnt }, rx) -} - -impl Transport for TestTransport { - fn send(&mut self, msg: RaftMessage) -> raftstore_v2::Result<()> { - let _ = self.tx.send(msg); - Ok(()) - } - - fn set_store_allowlist(&mut self, _stores: Vec) {} - - fn need_flush(&self) -> bool { - !self.tx.is_empty() - } - - fn flush(&mut self) { - self.flush_cnt.fetch_add(1, Ordering::SeqCst); - } -} - -// TODO: remove following when we finally integrate it in tikv-server binary. -fn v2_default_config() -> Config { - let mut config = Config::default(); - config.store_io_pool_size = 1; - config -} - -/// Disable all ticks, so test case can schedule manually. -fn disable_all_auto_ticks(cfg: &mut Config) { - cfg.raft_base_tick_interval = ReadableDuration::ZERO; - cfg.raft_log_gc_tick_interval = ReadableDuration::ZERO; - cfg.raft_log_compact_sync_interval = ReadableDuration::ZERO; - cfg.raft_engine_purge_interval = ReadableDuration::ZERO; - cfg.split_region_check_tick_interval = ReadableDuration::ZERO; - cfg.region_compact_check_interval = ReadableDuration::ZERO; - cfg.pd_heartbeat_tick_interval = ReadableDuration::ZERO; - cfg.pd_store_heartbeat_tick_interval = ReadableDuration::ZERO; - cfg.snap_mgr_gc_tick_interval = ReadableDuration::ZERO; - cfg.lock_cf_compact_interval = ReadableDuration::ZERO; - cfg.peer_stale_state_check_interval = ReadableDuration::ZERO; - cfg.consistency_check_interval = ReadableDuration::ZERO; - cfg.report_region_flow_interval = ReadableDuration::ZERO; - cfg.check_leader_lease_interval = ReadableDuration::ZERO; - cfg.merge_check_tick_interval = ReadableDuration::ZERO; - cfg.cleanup_import_sst_interval = ReadableDuration::ZERO; - cfg.inspect_interval = ReadableDuration::ZERO; - cfg.report_min_resolved_ts_interval = ReadableDuration::ZERO; - cfg.reactive_memory_lock_tick_interval = ReadableDuration::ZERO; - cfg.report_region_buckets_tick_interval = ReadableDuration::ZERO; - cfg.check_long_uncommitted_interval = ReadableDuration::ZERO; -} - -struct Cluster { - pd_server: test_pd::Server, - nodes: Vec, - receivers: Vec>, - routers: Vec, -} - -impl Default for Cluster { - fn default() -> Cluster { - Cluster::with_node_count(1) - } -} - -impl Cluster { - fn with_node_count(count: usize) -> Self { - let pd_server = test_pd::Server::new(1); - let mut cluster = Cluster { - pd_server, - nodes: vec![], - receivers: vec![], - routers: vec![], - }; - let mut cfg = v2_default_config(); - disable_all_auto_ticks(&mut cfg); - for _ in 1..=count { - let mut node = TestNode::with_pd(&cluster.pd_server); - let (tx, rx) = new_test_transport(); - let router = node.start(Arc::new(VersionTrack::new(cfg.clone())), tx); - cluster.nodes.push(node); - cluster.receivers.push(rx); - cluster.routers.push(router); - } - cluster - } - - fn restart(&mut self, offset: usize) { - let router = self.nodes[offset].restart(); - self.routers[offset] = router; - } - - fn node(&self, offset: usize) -> &TestNode { - &self.nodes[offset] - } - - fn router(&self, offset: usize) -> TestRouter { - self.routers[offset].clone() - } -} diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index 7627d85c4e1..ce775982686 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -1,5 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::{assert_matches::assert_matches, time::Duration}; + +use engine_traits::{OpenOptions, Peekable, TabletFactory}; use futures::executor::block_on; use kvproto::{ raft_cmdpb::{CmdType, RaftCmdRequest, Request}, @@ -9,7 +12,7 @@ use raftstore::store::{INIT_EPOCH_CONF_VER, INIT_EPOCH_VER}; use raftstore_v2::router::PeerMsg; use tikv_util::store::new_peer; -use crate::Cluster; +use crate::cluster::Cluster; /// Test basic write flow. #[test] @@ -28,16 +31,15 @@ fn test_basic_write() { put_req.mut_put().set_value(b"value".to_vec()); req.mut_requests().push(put_req); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + // Good proposal should be committed. let (msg, mut sub) = PeerMsg::raft_command(req.clone()); router.send(2, msg).unwrap(); - // TODO: check proposed event is triggered. It won't work for now as there is no - // apply yet. - // assert!(block_on(sub.wait_proposed())); - // Epoch checker is not introduced yet, so committed won't be triggerred. - // Instead, it will be cancelled. - assert!(!block_on(sub.wait_committed())); - // TODO: verify it's applied. + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); // Store id should be checked. let mut invalid_req = req.clone(); @@ -112,3 +114,48 @@ fn test_basic_write() { let resp = router.command(2, req).unwrap(); assert!(resp.get_header().get_error().has_not_leader(), "{:?}", resp); } + +#[test] +fn test_put_delete() { + let cluster = Cluster::default(); + let router = cluster.router(0); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(2); + let epoch = req.mut_header().mut_region_epoch(); + epoch.set_version(INIT_EPOCH_VER); + epoch.set_conf_ver(INIT_EPOCH_CONF_VER); + req.mut_header().set_peer(new_peer(1, 3)); + let mut put_req = Request::default(); + put_req.set_cmd_type(CmdType::Put); + put_req.mut_put().set_key(b"key".to_vec()); + put_req.mut_put().set_value(b"value".to_vec()); + req.mut_requests().push(put_req); + + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + let tablet_factory = cluster.node(0).tablet_factory(); + let tablet = tablet_factory + .open_tablet(2, None, OpenOptions::default().set_cache_only(true)) + .unwrap(); + assert!(tablet.get_value(b"key").unwrap().is_none()); + let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert_eq!(tablet.get_value(b"key").unwrap().unwrap(), b"value"); + + let mut delete_req = Request::default(); + delete_req.set_cmd_type(CmdType::Delete); + delete_req.mut_delete().set_key(b"key".to_vec()); + req.clear_requests(); + req.mut_requests().push(delete_req); + let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert_matches!(tablet.get_value(b"key"), Ok(None)); +} diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index aebd0ee70bf..e905e7e4ac2 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -11,13 +11,12 @@ use engine_traits::{RaftEngine, RaftEngineReadOnly}; use futures::executor::block_on; use kvproto::{ metapb, - raft_cmdpb::{RaftCmdRequest, StatusCmdType}, raft_serverpb::{PeerState, RaftMessage}, }; use raftstore_v2::router::{DebugInfoChannel, PeerMsg}; use tikv_util::store::new_peer; -use crate::{Cluster, TestRouter}; +use crate::cluster::{Cluster, TestRouter}; fn assert_peer_not_exist(region_id: u64, peer_id: u64, router: &TestRouter) { let timer = Instant::now(); @@ -119,7 +118,7 @@ fn test_life_by_message() { .must_query_debug_info(test_region_id, timeout) .unwrap(); assert_eq!(meta.raft_status.id, test_peer_id); - let raft_engine = &cluster.node(0).running_state.as_ref().unwrap().raft_engine; + let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; raft_engine.get_raft_state(test_region_id).unwrap().unwrap(); raft_engine .get_apply_state(test_region_id) @@ -137,7 +136,7 @@ fn test_life_by_message() { cluster.restart(0); let router = cluster.router(0); assert_peer_not_exist(test_region_id, test_peer_id, &router); - let raft_engine = &cluster.node(0).running_state.as_ref().unwrap().raft_engine; + let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; assert_tombstone(raft_engine, test_region_id, &new_peer(1, test_peer_id)); } diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index 90a6cf671c6..8e2c3eeb04f 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -1,16 +1,10 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::assert_matches::assert_matches; - -use futures::executor::block_on; -use kvproto::{ - kvrpcpb::Context, - raft_cmdpb::{CmdType, GetRequest, RaftCmdRequest, ReadIndexRequest, Request, StatusCmdType}, -}; -use tikv_util::{codec::number::NumberEncoder, store::new_peer}; +use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, ReadIndexRequest, Request, StatusCmdType}; +use tikv_util::store::new_peer; use txn_types::WriteBatchFlags; -use crate::Cluster; +use crate::cluster::Cluster; #[test] fn test_read_index() { diff --git a/components/raftstore-v2/tests/integrations/test_status.rs b/components/raftstore-v2/tests/integrations/test_status.rs index bb7071ab16d..1f7415d9da3 100644 --- a/components/raftstore-v2/tests/integrations/test_status.rs +++ b/components/raftstore-v2/tests/integrations/test_status.rs @@ -1,13 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::assert_matches::assert_matches; - -use futures::executor::block_on; use kvproto::raft_cmdpb::{RaftCmdRequest, StatusCmdType}; -use raftstore_v2::router::{PeerMsg, PeerTick, QueryResChannel, QueryResult, RaftRequest}; use tikv_util::store::new_peer; -use crate::Cluster; +use crate::cluster::Cluster; #[test] fn test_status() { diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 510196f9ce1..a84a60183b6 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -97,10 +97,11 @@ use crate::{ Error, Result, }; -const DEFAULT_APPLY_WB_SIZE: usize = 4 * 1024; -const APPLY_WB_SHRINK_SIZE: usize = 1024 * 1024; -const SHRINK_PENDING_CMD_QUEUE_CAP: usize = 64; -const MAX_APPLY_BATCH_SIZE: usize = 64 * 1024 * 1024; +// These consts are shared in both v1 and v2. +pub const DEFAULT_APPLY_WB_SIZE: usize = 4 * 1024; +pub const APPLY_WB_SHRINK_SIZE: usize = 1024 * 1024; +pub const SHRINK_PENDING_CMD_QUEUE_CAP: usize = 64; +pub const MAX_APPLY_BATCH_SIZE: usize = 64 * 1024 * 1024; pub struct PendingCmd { pub index: u64, From 6a78b01181cfe8de4346d7505b3a84b32d5ed421 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 30 Sep 2022 11:41:45 +0800 Subject: [PATCH 0254/1149] *: Refine interfaces for TiFlash when using TiKV as a component (#13487) close tikv/tikv#12849 Export necessary functions for TiFlash when using TiKV as a component Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/encryption/src/manager/mod.rs | 5 +++++ src/config.rs | 3 ++- src/server/service/diagnostics/mod.rs | 3 ++- src/server/service/mod.rs | 2 +- src/storage/config_manager.rs | 18 +++++++++--------- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index fb6b2312027..0f78e794629 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -732,6 +732,11 @@ impl DataKeyManager { }; Ok(Some(encrypted_file)) } + + /// Return which method this manager is using. + pub fn encryption_method(&self) -> engine_traits::EncryptionMethod { + crypter::to_engine_encryption_method(self.method) + } } impl Drop for DataKeyManager { diff --git a/src/config.rs b/src/config.rs index 265bcce4071..f4fbf17a38f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -100,7 +100,8 @@ const LOCKCF_MIN_MEM: usize = 256 * MIB as usize; const LOCKCF_MAX_MEM: usize = GIB as usize; const RAFT_MIN_MEM: usize = 256 * MIB as usize; const RAFT_MAX_MEM: usize = 2 * GIB as usize; -const LAST_CONFIG_FILE: &str = "last_tikv.toml"; +/// Configs that actually took effect in the last run +pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; const TMP_CONFIG_FILE: &str = "tmp_tikv.toml"; const MAX_BLOCK_SIZE: usize = 32 * MIB as usize; diff --git a/src/server/service/diagnostics/mod.rs b/src/server/service/diagnostics/mod.rs index 60df07aa167..abede000858 100644 --- a/src/server/service/diagnostics/mod.rs +++ b/src/server/service/diagnostics/mod.rs @@ -28,7 +28,8 @@ use tokio::runtime::Handle; use crate::server::Error; mod log; -mod sys; +/// Information about the current hardware and operating system. +pub mod sys; lazy_static! { pub static ref SYS_INFO: Mutex = Mutex::new(sysinfo::System::new()); diff --git a/src/server/service/mod.rs b/src/server/service/mod.rs index d80c2f6806c..1576e7db41c 100644 --- a/src/server/service/mod.rs +++ b/src/server/service/mod.rs @@ -2,7 +2,7 @@ mod batch; mod debug; -mod diagnostics; +pub mod diagnostics; mod kv; pub use self::{ diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index 8bc92a7f697..de3b13408f0 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -4,7 +4,7 @@ use std::{convert::TryInto, sync::Arc}; -use engine_traits::{CfNamesExt, CfOptionsExt, TabletFactory, CF_DEFAULT}; +use engine_traits::{KvEngine, TabletFactory, CF_DEFAULT}; use file_system::{get_io_rate_limiter, IoPriority, IoType}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use strum::IntoEnumIterator; @@ -19,20 +19,20 @@ use crate::{ storage::{lock_manager::LockManager, txn::flow_controller::FlowController, TxnScheduler}, }; -pub struct StorageConfigManger { - tablet_factory: Arc + Send + Sync>, +pub struct StorageConfigManger { + tablet_factory: Arc + Send + Sync>, shared_block_cache: bool, ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, } -unsafe impl Send for StorageConfigManger {} -unsafe impl Sync for StorageConfigManger {} +unsafe impl Send for StorageConfigManger {} +unsafe impl Sync for StorageConfigManger {} -impl StorageConfigManger { +impl StorageConfigManger { pub fn new( - tablet_factory: Arc + Send + Sync>, + tablet_factory: Arc + Send + Sync>, shared_block_cache: bool, ttl_checker_scheduler: Scheduler, flow_controller: Arc, @@ -48,7 +48,7 @@ impl StorageConfigManger { } } -impl ConfigManager for StorageConfigManger { +impl ConfigManager for StorageConfigManger { fn dispatch(&mut self, mut change: ConfigChange) -> CfgResult<()> { if let Some(ConfigValue::Module(mut block_cache)) = change.remove("block_cache") { if !self.shared_block_cache { @@ -74,7 +74,7 @@ impl ConfigManager for StorageConfigManger { let enable: bool = v.into(); let enable_str = if enable { "true" } else { "false" }; self.tablet_factory.for_each_opened_tablet( - &mut |_region_id, _suffix, tablet: &EK::Local| { + &mut |_region_id, _suffix, tablet: &K| { for cf in tablet.cf_names() { tablet .set_options_cf(cf, &[("disable_write_stall", enable_str)]) From e412adfb32747a10339a7937fed019a1295fdea9 Mon Sep 17 00:00:00 2001 From: zzm Date: Fri, 30 Sep 2022 16:17:45 +0800 Subject: [PATCH 0255/1149] storage, raftstore, causal-ts: Get snapshot before raw put(delete) to ensure that causal ts provider flush correctly (#13520) close tikv/tikv#13502, ref tikv/tikv#13550 1. Move causal ts provider's `flush` from coprocessor observer to raftstore pd woker. When implementing asynchronous refresh, `storage` can check whether the causal ts provider has completed the refresh through the flag `max_ts_sync_status`. 2. To check `max_ts_sync_status` in `storage`, we need get snapshot. Signed-off-by: zeminzhou Signed-off-by: zzm Co-authored-by: Ping Yu --- Cargo.lock | 2 +- components/backup/src/endpoint.rs | 12 +- components/causal_ts/Cargo.toml | 1 - components/causal_ts/benches/tso.rs | 4 +- components/causal_ts/src/lib.rs | 27 +-- components/causal_ts/src/observer.rs | 105 --------- components/causal_ts/src/tso.rs | 44 ++-- components/cdc/src/endpoint.rs | 7 +- .../cdc/tests/failpoints/test_endpoint.rs | 2 +- components/cdc/tests/mod.rs | 17 +- components/raftstore/Cargo.toml | 1 + components/raftstore/src/store/fsm/store.rs | 5 + components/raftstore/src/store/worker/pd.rs | 27 ++- components/server/src/server.rs | 7 +- components/test_raftstore/src/node.rs | 1 + components/test_raftstore/src/server.rs | 8 +- src/server/node.rs | 4 + src/storage/mod.rs | 217 +++++++++--------- src/storage/txn/commands/atomic_store.rs | 47 ++-- .../txn/commands/check_secondary_locks.rs | 2 + src/storage/txn/commands/check_txn_status.rs | 2 + src/storage/txn/commands/compare_and_swap.rs | 105 +++++---- src/storage/txn/commands/mod.rs | 23 ++ .../txn/commands/pessimistic_rollback.rs | 1 + src/storage/txn/commands/prewrite.rs | 9 + src/storage/txn/commands/txn_heart_beat.rs | 2 + src/storage/txn/scheduler.rs | 78 ++++++- tests/failpoints/cases/test_rawkv.rs | 68 ++++-- tests/failpoints/cases/test_storage.rs | 26 ++- .../integrations/config/dynamic/raftstore.rs | 1 + .../integrations/raftstore/test_bootstrap.rs | 1 + tests/integrations/raftstore/test_merge.rs | 7 +- .../raftstore/test_transfer_leader.rs | 7 +- tests/integrations/server/kv_service.rs | 1 + 34 files changed, 497 insertions(+), 374 deletions(-) delete mode 100644 components/causal_ts/src/observer.rs diff --git a/Cargo.lock b/Cargo.lock index 2091ea3d4f0..14620ebb6d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -758,7 +758,6 @@ dependencies = [ "prometheus", "prometheus-static-metric", "raft", - "raftstore", "serde", "serde_derive", "slog", @@ -4175,6 +4174,7 @@ dependencies = [ "bitflags", "byteorder", "bytes", + "causal_ts", "collections", "concurrency_manager", "crc32fast", diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 1d4f9bbfdd9..92131381017 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -15,7 +15,7 @@ use engine_rocks::RocksEngine; use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, SstCompressionType}; use external_storage::{BackendConfig, HdfsConfig}; use external_storage_export::{create_storage, ExternalStorage}; -use futures::channel::mpsc::*; +use futures::{channel::mpsc::*, executor::block_on}; use kvproto::{ brpb::*, encryptionpb::EncryptionMethod, @@ -982,7 +982,9 @@ impl Endpoint { if let Err(e) = self .causal_ts_provider .as_ref() - .map_or(Ok(()), |provider| provider.flush()) + .map_or(Ok(TimeStamp::new(0)), |provider| { + block_on(provider.async_flush()) + }) { error!("backup flush causal timestamp failed"; "err" => ?e); let mut response = BackupResponse::default(); @@ -1826,7 +1828,7 @@ pub mod tests { let limiter = Arc::new(IoRateLimiter::new_for_test()); let ts_provider: Arc = Arc::new(causal_ts::tests::TestProvider::default().into()); - let start_ts = ts_provider.get_ts().unwrap(); + let start_ts = block_on(ts_provider.async_get_ts()).unwrap(); let (tmp, endpoint) = new_endpoint_with_limiter( Some(limiter), ApiVersion::V2, @@ -1844,8 +1846,8 @@ pub mod tests { req.set_dst_api_version(ApiVersion::V2); let (task, _) = Task::new(req, tx).unwrap(); endpoint.handle_backup_task(task); - let end_ts = ts_provider.get_ts().unwrap(); - assert_eq!(end_ts.into_inner(), start_ts.next().into_inner() + 100); + let end_ts = block_on(ts_provider.async_get_ts()).unwrap(); + assert_eq!(end_ts.into_inner(), start_ts.next().into_inner() + 101); } #[test] diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index d4a7d95d4ea..beaf5575c80 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -24,7 +24,6 @@ pd_client = { path = "../pd_client", default-features = false } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raftstore = { path = "../raftstore", default-features = false } serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } diff --git a/components/causal_ts/benches/tso.rs b/components/causal_ts/benches/tso.rs index 66d950a52b5..72d381a4be7 100644 --- a/components/causal_ts/benches/tso.rs +++ b/components/causal_ts/benches/tso.rs @@ -87,7 +87,7 @@ fn bench_batch_tso_provider_get_ts(c: &mut Criterion) { c.bench_function("bench_batch_tso_provider_get_ts", |b| { b.iter(|| { - black_box(provider.get_ts().unwrap()); + black_box(block_on(provider.async_get_ts()).unwrap()); }) }); } @@ -108,7 +108,7 @@ fn bench_batch_tso_provider_flush(c: &mut Criterion) { c.bench_function("bench_batch_tso_provider_flush", |b| { b.iter(|| { - black_box(provider.flush()).unwrap(); + black_box(block_on(provider.async_flush())).unwrap(); }) }); } diff --git a/components/causal_ts/src/lib.rs b/components/causal_ts/src/lib.rs index b32e33540f6..3eb59f35c36 100644 --- a/components/causal_ts/src/lib.rs +++ b/components/causal_ts/src/lib.rs @@ -13,12 +13,11 @@ pub use errors::*; mod tso; pub use tso::*; mod metrics; -pub use metrics::*; -mod observer; use async_trait::async_trait; use enum_dispatch::enum_dispatch; -use futures::executor::block_on; -pub use observer::*; +pub use metrics::*; +#[cfg(any(test, feature = "testexport"))] +use test_pd_client::TestPdClient; use txn_types::TimeStamp; pub use crate::errors::Result; @@ -27,26 +26,18 @@ pub use crate::errors::Result; #[enum_dispatch] pub trait CausalTsProvider: Send + Sync { /// Get a new timestamp. - fn get_ts(&self) -> Result { - block_on(self.async_get_ts()) - } - - /// Flush (cached) timestamps to keep causality on some events, such as - /// "leader transfer". - fn flush(&self) -> Result<()> { - block_on(self.async_flush()) - } - async fn async_get_ts(&self) -> Result; - async fn async_flush(&self) -> Result<()>; + /// Flush (cached) timestamps and return first timestamp to keep causality + /// on some events, such as "leader transfer". + async fn async_flush(&self) -> Result; } #[enum_dispatch(CausalTsProvider)] pub enum CausalTsProviderImpl { BatchTsoProvider(BatchTsoProvider), #[cfg(any(test, feature = "testexport"))] - BatchTsoProviderTest(BatchTsoProvider), + BatchTsoProviderTest(BatchTsoProvider), TestProvider(tests::TestProvider), } @@ -81,9 +72,9 @@ pub mod tests { // This is used for unit test. Add 100 from current. // Do not modify this value as several test cases depend on it. - async fn async_flush(&self) -> Result<()> { + async fn async_flush(&self) -> Result { self.ts.fetch_add(100, Ordering::Relaxed); - Ok(()) + self.async_get_ts().await } } } diff --git a/components/causal_ts/src/observer.rs b/components/causal_ts/src/observer.rs deleted file mode 100644 index 4b101c01b14..00000000000 --- a/components/causal_ts/src/observer.rs +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::Arc; - -use engine_traits::KvEngine; -use kvproto::metapb::Region; -use raft::StateRole; -use raftstore::coprocessor::{ - BoxRegionChangeObserver, BoxRoleObserver, Coprocessor, CoprocessorHost, ObserverContext, - RegionChangeEvent, RegionChangeObserver, RegionChangeReason, RoleChange, RoleObserver, -}; - -use crate::CausalTsProvider; - -/// CausalObserver appends timestamp for RawKV V2 data, and invoke -/// causal_ts_provider.flush() on specified event, e.g. leader -/// transfer, snapshot apply. -/// Should be used ONLY when API v2 is enabled. -pub struct CausalObserver { - causal_ts_provider: Arc, -} - -impl Clone for CausalObserver { - fn clone(&self) -> Self { - Self { - causal_ts_provider: self.causal_ts_provider.clone(), - } - } -} - -// Causal observer's priority should be higher than all other observers, to -// avoid being bypassed. -const CAUSAL_OBSERVER_PRIORITY: u32 = 0; -impl CausalObserver { - pub fn new(causal_ts_provider: Arc) -> Self { - Self { causal_ts_provider } - } - - pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { - coprocessor_host - .registry - .register_role_observer(CAUSAL_OBSERVER_PRIORITY, BoxRoleObserver::new(self.clone())); - coprocessor_host.registry.register_region_change_observer( - CAUSAL_OBSERVER_PRIORITY, - BoxRegionChangeObserver::new(self.clone()), - ); - } -} - -const REASON_LEADER_TRANSFER: &str = "leader_transfer"; -const REASON_REGION_MERGE: &str = "region_merge"; - -impl CausalObserver { - fn flush_timestamp(&self, region: &Region, reason: &'static str) { - fail::fail_point!("causal_observer_flush_timestamp", |_| ()); - - if let Err(err) = self.causal_ts_provider.flush() { - warn!("CausalObserver::flush_timestamp error"; "error" => ?err, "region_id" => region.get_id(), "region" => ?region, "reason" => reason); - } else { - debug!("CausalObserver::flush_timestamp succeed"; "region_id" => region.get_id(), "region" => ?region, "reason" => reason); - } - } -} - -impl Coprocessor for CausalObserver {} - -impl RoleObserver for CausalObserver { - /// Observe becoming leader, to flush CausalTsProvider. - fn on_role_change(&self, ctx: &mut ObserverContext<'_>, role_change: &RoleChange) { - // In scenario of frequent leader transfer, the observing of change from - // follower to leader by `on_role_change` would be later than the real role - // change in raft state and adjacent write commands. - // This would lead to the late of flush, and violate causality. See issue - // #12498. So we observe role change to Candidate to fix this issue. - // Also note that when there is only one peer, it would become leader directly. - if role_change.state == StateRole::Candidate - || (ctx.region().peers.len() == 1 && role_change.state == StateRole::Leader) - { - self.flush_timestamp(ctx.region(), REASON_LEADER_TRANSFER); - } - } -} - -impl RegionChangeObserver for CausalObserver { - fn on_region_changed( - &self, - ctx: &mut ObserverContext<'_>, - event: RegionChangeEvent, - role: StateRole, - ) { - if role != StateRole::Leader { - return; - } - - // In the scenario of region merge, the target region would merge some entries - // from source region with larger timestamps (when leader of source region is in - // another store with larger TSO batch than the store of target region's - // leader). So we need a flush after commit merge. See issue #12680. - // TODO: do not need flush if leaders of source & target region are in the same - // store. - if let RegionChangeEvent::Update(RegionChangeReason::CommitMerge) = event { - self.flush_timestamp(ctx.region(), REASON_REGION_MERGE); - } - } -} diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 70aa692dd15..5a9d119f6d5 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -31,6 +31,8 @@ use std::{ }; use async_trait::async_trait; +#[cfg(test)] +use futures::executor::block_on; use parking_lot::RwLock; use pd_client::PdClient; use tikv_util::{ @@ -560,6 +562,16 @@ impl BatchTsoProvider { pub fn tso_usage(&self) -> u32 { self.batch_list.usage() } + + #[cfg(test)] + pub fn get_ts(&self) -> Result { + block_on(self.async_get_ts()) + } + + #[cfg(test)] + pub fn flush(&self) -> Result { + block_on(self.async_flush()) + } } const GET_TS_MAX_RETRY: u32 = 3; @@ -609,8 +621,14 @@ impl CausalTsProvider for BatchTsoProvider { Err(Error::TsoBatchUsedUp(last_batch_size)) } - async fn async_flush(&self) -> Result<()> { - self.renew_tso_batch(true, TsoBatchRenewReason::flush).await + async fn async_flush(&self) -> Result { + fail::fail_point!("causal_ts_provider_flush", |_| Err(box_err!( + "async_flush err(failpoints)" + ))); + self.renew_tso_batch(true, TsoBatchRenewReason::flush) + .await?; + // TODO: Return the first tso by renew_tso_batch instead of async_get_ts + self.async_get_ts().await } } @@ -634,8 +652,8 @@ impl CausalTsProvider for SimpleTsoProvider { Ok(ts) } - async fn async_flush(&self) -> Result<()> { - Ok(()) + async fn async_flush(&self) -> Result { + self.async_get_ts().await } } @@ -858,7 +876,7 @@ pub mod tests { let provider = SimpleTsoProvider::new(pd_cli.clone()); pd_cli.set_tso(100.into()); - let ts = provider.get_ts().unwrap(); + let ts = block_on(provider.async_get_ts()).unwrap(); assert_eq!(ts, 101.into(), "ts: {:?}", ts); } @@ -886,12 +904,12 @@ pub mod tests { assert_eq!(provider.tso_remain(), 90); assert_eq!(provider.tso_usage(), 10); - provider.flush().unwrap(); // allocated: [1101, 1200] - assert_eq!(provider.tso_remain(), 100); - assert_eq!(provider.tso_usage(), 0); + assert_eq!(provider.flush().unwrap(), TimeStamp::from(1101)); // allocated: [1101, 1200] + assert_eq!(provider.tso_remain(), 99); + assert_eq!(provider.tso_usage(), 1); // used up pd_cli.trigger_tso_failure(); // make renew fail to verify used-up - for ts in 1101..=1200u64 { + for ts in 1102..=1200u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } assert_eq!(provider.tso_remain(), 0); @@ -900,8 +918,8 @@ pub mod tests { assert_eq!(provider.tso_remain(), 0); assert_eq!(provider.tso_usage(), 100); - provider.flush().unwrap(); // allocated: [1201, 2200] - for ts in 1201..=1260u64 { + assert_eq!(provider.flush().unwrap(), TimeStamp::from(1201)); // allocated: [1201, 2200] + for ts in 1202..=1260u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } assert_eq!(provider.tso_remain(), 940); @@ -979,9 +997,9 @@ pub mod tests { pd_cli.trigger_tso_failure(); provider.flush().unwrap_err(); - provider.flush().unwrap(); // allocated: [1301, 3300] + assert_eq!(provider.flush().unwrap(), TimeStamp::from(1301)); // allocated: [1301, 3300] pd_cli.trigger_tso_failure(); // make renew fail to verify used-up - for ts in 1301..=3300u64 { + for ts in 1302..=3300u64 { assert_eq!(TimeStamp::from(ts), provider.get_ts().unwrap()) } provider.get_ts().unwrap_err(); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index fccf0ec0cad..26c0a11371e 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1025,7 +1025,7 @@ impl, E: KvEngine> Endpoint { // RawKV write requests will get larger TSO after this point. // RawKV CDC's resolved_ts is guaranteed by ConcurrencyManager::global_min_lock_ts, // which lock flying keys's ts in raw put and delete interfaces in `Storage`. - Some(provider) => provider.get_ts().unwrap_or_default(), + Some(provider) => provider.async_get_ts().await.unwrap_or_default(), None => pd_client.get_tso().await.unwrap_or_default(), }; let mut min_ts = min_ts_pd; @@ -1285,6 +1285,7 @@ mod tests { use std::ops::{Deref, DerefMut}; use engine_rocks::RocksEngine; + use futures::executor::block_on; use kvproto::{ cdcpb::{ChangeDataRequestKvApi, Header}, errorpb::Error as ErrorHeader, @@ -1893,7 +1894,7 @@ mod tests { }; let ts_provider: Arc = Arc::new(causal_ts::tests::TestProvider::default().into()); - let start_ts = ts_provider.get_ts().unwrap(); + let start_ts = block_on(ts_provider.async_get_ts()).unwrap(); let mut suite = mock_endpoint_with_ts_provider(&cfg, None, ApiVersion::V2, Some(ts_provider.clone())); suite.run(Task::RegisterMinTsEvent); @@ -1902,7 +1903,7 @@ mod tests { .recv_timeout(Duration::from_millis(1500)) .unwrap() .unwrap(); - let end_ts = ts_provider.get_ts().unwrap(); + let end_ts = block_on(ts_provider.async_get_ts()).unwrap(); assert!(end_ts.into_inner() > start_ts.next().into_inner()); // may trigger more than once. } diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index 19e24926d5b..31c302c3c14 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -493,7 +493,7 @@ fn test_cdc_rawkv_resolved_ts() { let pause_write_fp = "raftkv_async_write"; fail::cfg(pause_write_fp, "pause").unwrap(); - let ts = ts_provider.get_ts().unwrap(); + let ts = block_on(ts_provider.async_get_ts()).unwrap(); let handle = thread::spawn(move || { let _ = client.raw_put(&put_req).unwrap(); }); diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index a14ebd14c80..c14a91de99a 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -7,6 +7,7 @@ use cdc::{recv_timeout, CdcObserver, FeatureGate, MemoryQuota, Task}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; +use futures::executor::block_on; use grpcio::{ ChannelBuilder, ClientDuplexReceiver, ClientDuplexSender, ClientUnaryReceiver, Environment, }; @@ -512,12 +513,14 @@ impl TestSuite { pub fn flush_causal_timestamp_for_region(&mut self, region_id: u64) { let leader = self.cluster.leader_of_region(region_id).unwrap(); - self.cluster - .sim - .rl() - .get_causal_ts_provider(leader.get_store_id()) - .unwrap() - .flush() - .unwrap(); + block_on( + self.cluster + .sim + .rl() + .get_causal_ts_provider(leader.get_store_id()) + .unwrap() + .async_flush(), + ) + .unwrap(); } } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 3b47ca08ec5..4c41b19c828 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -32,6 +32,7 @@ batch-system = { path = "../batch-system", default-features = false } bitflags = "1.0.1" byteorder = "1.2" bytes = "1.0" +causal_ts = { path = "../causal_ts" } collections = { path = "../collections" } concurrency_manager = { path = "../concurrency_manager", default-features = false } crc32fast = "1.2" diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 160a63a394a..d53270c2ef0 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -19,6 +19,7 @@ use batch_system::{ BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, HandleResult, HandlerBuilder, PollHandler, Priority, }; +use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashMapEntry, HashSet}; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{unbounded, TryRecvError, TrySendError}; @@ -1461,6 +1462,7 @@ impl RaftBatchSystem { concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, health_service: Option, + causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Result<()> { assert!(self.workers.is_none()); // TODO: we can get cluster meta regularly too later. @@ -1599,6 +1601,7 @@ impl RaftBatchSystem { collector_reg_handle, region_read_progress, health_service, + causal_ts_provider, )?; Ok(()) } @@ -1615,6 +1618,7 @@ impl RaftBatchSystem { collector_reg_handle: CollectorRegHandle, region_read_progress: RegionReadProgressRegistry, health_service: Option, + causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Result<()> { let cfg = builder.cfg.value().clone(); let store = builder.store.clone(); @@ -1696,6 +1700,7 @@ impl RaftBatchSystem { region_read_progress, health_service, coprocessor_host, + causal_ts_provider, ); assert!(workers.pd_worker.start_with_timer(pd_runner)); diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 75393d486f9..f3518f4f674 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -14,6 +14,7 @@ use std::{ time::{Duration, Instant}, }; +use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine}; @@ -47,6 +48,7 @@ use tikv_util::{ warn, worker::{Runnable, RunnableWithTimer, ScheduleError, Scheduler}, }; +use txn_types::TimeStamp; use yatp::Remote; use crate::{ @@ -895,6 +897,7 @@ where health_service: Option, curr_health_status: ServingStatus, coprocessor_host: CoprocessorHost, + causal_ts_provider: Option>, // used for rawkv apiv2 } impl Runner @@ -920,6 +923,7 @@ where region_read_progress: RegionReadProgressRegistry, health_service: Option, coprocessor_host: CoprocessorHost, + causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Runner { // Register the region CPU records collector. let mut region_cpu_records_collector = None; @@ -964,6 +968,7 @@ where health_service, curr_health_status: ServingStatus::Serving, coprocessor_host, + causal_ts_provider, } } @@ -1600,10 +1605,30 @@ where ) { let pd_client = self.pd_client.clone(); let concurrency_manager = self.concurrency_manager.clone(); + let causal_ts_provider = self.causal_ts_provider.clone(); + let f = async move { let mut success = false; while txn_ext.max_ts_sync_status.load(Ordering::SeqCst) == initial_status { - match pd_client.get_tso().await { + // On leader transfer / region merge, RawKV API v2 need to invoke + // causal_ts_provider.flush() to renew cached TSO, to ensure that + // the next TSO returned by causal_ts_provider.get_ts() on current + // store must be larger than the store where the leader is on before. + // + // And it won't break correctness of transaction commands, as + // causal_ts_provider.flush() is implemented as pd_client.get_tso() + renew TSO + // cached. + let res: crate::Result = + if let Some(causal_ts_provider) = &causal_ts_provider { + causal_ts_provider + .async_flush() + .await + .map_err(|e| box_err!(e)) + } else { + pd_client.get_tso().await.map_err(Into::into) + }; + + match res { Ok(ts) => { concurrency_manager.update_max_ts(ts); // Set the least significant bit to 1 to mark it as synced. diff --git a/components/server/src/server.rs b/components/server/src/server.rs index bafc61ea077..247bc6ccb58 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -862,12 +862,6 @@ where None }; - // Register causal observer for RawKV API V2 - if let Some(provider) = self.causal_ts_provider.clone() { - let causal_ob = causal_ts::CausalObserver::new(provider); - causal_ob.register_to(self.coprocessor_host.as_mut().unwrap()); - }; - let check_leader_runner = CheckLeaderRunner::new( engines.store_meta.clone(), self.coprocessor_host.clone().unwrap(), @@ -1055,6 +1049,7 @@ where auto_split_controller, self.concurrency_manager.clone(), collector_reg_handle, + self.causal_ts_provider.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index d6aa1eaefc8..78d98e5a5d3 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -325,6 +325,7 @@ impl Simulator for NodeCluster { AutoSplitController::default(), cm, CollectorRegHandle::new_for_test(), + None, )?; assert!( engines diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index a3a9455fb20..67eb3a22db6 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -381,10 +381,7 @@ impl ServerCluster { .unwrap() .into(), ); - self.causal_ts_providers - .insert(node_id, causal_ts_provider.clone()); - let causal_ob = causal_ts::CausalObserver::new(causal_ts_provider); - causal_ob.register_to(&mut coprocessor_host); + self.causal_ts_providers.insert(node_id, causal_ts_provider); } // Start resource metering. @@ -583,6 +580,8 @@ impl ServerCluster { max_unified_read_pool_thread_count, None, ); + + let causal_ts_provider = self.get_causal_ts_provider(node_id); node.start( engines, simulate_trans.clone(), @@ -595,6 +594,7 @@ impl ServerCluster { auto_split_controller, concurrency_manager.clone(), collector_reg_handle, + causal_ts_provider, )?; assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); diff --git a/src/server/node.rs b/src/server/node.rs index f8c10673e1a..65dd592b490 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -222,6 +222,7 @@ where auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, + causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Result<()> where T: Transport + 'static, @@ -258,6 +259,7 @@ where auto_split_controller, concurrency_manager, collector_reg_handle, + causal_ts_provider, )?; Ok(()) @@ -504,6 +506,7 @@ where auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, + causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Result<()> where T: Transport + 'static, @@ -536,6 +539,7 @@ where concurrency_manager, collector_reg_handle, self.health_service.clone(), + causal_ts_provider, )?; Ok(()) } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index abdfcd333ac..e2192573dea 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -123,7 +123,7 @@ use crate::{ commands::{RawAtomicStore, RawCompareAndSwap, TypedCommand}, flow_controller::{EngineFlowController, FlowController}, scheduler::Scheduler as TxnScheduler, - Command, + Command, ErrorInner as TxnError, }, types::StorageCallbackType, }, @@ -273,6 +273,7 @@ impl Storage { config, dynamic_switches, flow_controller, + causal_ts_provider.clone(), reporter, resource_tag_factory.clone(), Arc::clone("a_limiter), @@ -1847,47 +1848,42 @@ impl Storage { } } - fn get_causal_ts(ts_provider: &Option>) -> Result> { - if let Some(p) = ts_provider { - match p.get_ts() { - Ok(ts) => Ok(Some(ts)), - Err(e) => Err(box_err!("Fail to get ts: {}", e)), + async fn check_causal_ts_flushed(ctx: &mut Context, tag: CommandKind) -> Result<()> { + if F::TAG == ApiVersion::V2 { + let snap_ctx = SnapContext { + pb_ctx: ctx, + ..Default::default() + }; + match Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await { + Ok(snapshot) => { + SCHED_STAGE_COUNTER_VEC.get(tag).snapshot_ok.inc(); + if !snapshot.ext().is_max_ts_synced() { + return Err(Error::from(txn::Error::from( + TxnError::MaxTimestampNotSynced { + region_id: ctx.get_region_id(), + start_ts: TimeStamp::zero(), + }, + ))); + } + let term = snapshot.ext().get_term(); + if let Some(term) = term { + ctx.set_term(term.get()); + } + } + Err(err) => { + SCHED_STAGE_COUNTER_VEC.get(tag).snapshot_err.inc(); + info!("get snapshot failed"; "tag" => ?tag, "err" => ?err); + return Err(err); + } } - } else { - Ok(None) - } - } - - async fn get_raw_key_guard( - ts_provider: &Option>, - concurrency_manager: ConcurrencyManager, - ) -> Result> { - // NOTE: the ts cannot be reused as timestamp of data key. - // There is a little chance that CDC will acquired a timestamp for resolved-ts - // just between the Self::get_causal_ts & concurrency_manager.lock_key, - // which violate the constraint that resolve-ts should not be larger - // than timestamp of captured data. - let ts = Self::get_causal_ts(ts_provider)?; - if let Some(ts) = ts { - let raw_key = vec![api_version::api_v2::RAW_KEY_PREFIX]; - // Make keys for locking by RAW_KEY_PREFIX & ts. RAW_KEY_PREFIX to avoid - // conflict with TiDB & TxnKV keys, and ts to avoid collision with - // other raw write requests. Ts in lock value to used by CDC which - // get maximum resolved-ts from concurrency_manager.global_min_lock_ts - let encode_key = ApiV2::encode_raw_key(&raw_key, Some(ts)); - let key_guard = concurrency_manager.lock_key(&encode_key).await; - let lock = Lock::new(LockType::Put, raw_key, ts, 0, None, 0.into(), 1, ts); - key_guard.with_lock(|l| *l = Some(lock)); - Ok(Some(key_guard)) - } else { - Ok(None) } + Ok(()) } /// Write a raw key to the storage. pub fn raw_put( &self, - ctx: Context, + mut ctx: Context, cf: String, key: Vec, value: Vec, @@ -1914,11 +1910,16 @@ impl Storage { return callback(Err(Error::from(e))); } let command_duration = tikv_util::time::Instant::now(); - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; + + if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { + return callback(Err(e)); + } + + let key_guard = get_raw_key_guard(&provider, concurrency_manager).await; if let Err(e) = key_guard { return callback(Err(e)); } - let ts = Self::get_causal_ts(&provider); + let ts = get_causal_ts(&provider).await; if let Err(e) = ts { return callback(Err(e)); } @@ -1989,7 +1990,7 @@ impl Storage { /// Write some keys to the storage in a batch. pub fn raw_batch_put( &self, - ctx: Context, + mut ctx: Context, cf: String, pairs: Vec, ttls: Vec, @@ -2022,11 +2023,15 @@ impl Storage { } let command_duration = tikv_util::time::Instant::now(); - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; + if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { + return callback(Err(e)); + } + + let key_guard = get_raw_key_guard(&provider, concurrency_manager).await; if let Err(e) = key_guard { return callback(Err(e)); } - let ts = Self::get_causal_ts(&provider); + let ts = get_causal_ts(&provider).await; if let Err(e) = ts { return callback(Err(e)); } @@ -2063,7 +2068,7 @@ impl Storage { /// operations. pub fn raw_delete( &self, - ctx: Context, + mut ctx: Context, cf: String, key: Vec, callback: Callback<()>, @@ -2083,11 +2088,15 @@ impl Storage { } let command_duration = tikv_util::time::Instant::now(); - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; + if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { + return callback(Err(e)); + } + + let key_guard = get_raw_key_guard(&provider, concurrency_manager).await; if let Err(e) = key_guard { return callback(Err(e)); } - let ts = Self::get_causal_ts(&provider); + let ts = get_causal_ts(&provider).await; if let Err(e) = ts { return callback(Err(e)); } @@ -2168,7 +2177,7 @@ impl Storage { /// operations. pub fn raw_batch_delete( &self, - ctx: Context, + mut ctx: Context, cf: String, keys: Vec>, callback: Callback<()>, @@ -2188,11 +2197,15 @@ impl Storage { } let command_duration = tikv_util::time::Instant::now(); - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; + if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { + return callback(Err(e)); + } + + let key_guard = get_raw_key_guard(&provider, concurrency_manager).await; if let Err(e) = key_guard { return callback(Err(e)); } - let ts = Self::get_causal_ts(&provider); + let ts = get_causal_ts(&provider).await; if let Err(e) = ts { return callback(Err(e)); } @@ -2602,7 +2615,7 @@ impl Storage { previous_value: Option>, value: Vec, ttl: u64, - cb: Callback<(Option, bool)>, + callback: Callback<(Option, bool)>, ) -> Result<()> { const CMD: CommandKind = CommandKind::raw_compare_and_swap; let api_version = self.api_version; @@ -2612,43 +2625,14 @@ impl Storage { if !F::IS_TTL_ENABLED && ttl != 0 { return Err(Error::from(ErrorInner::TtlNotEnabled)); } - let provider = self.causal_ts_provider.clone(); let sched = self.get_scheduler(); - let concurrency_manager = self.get_concurrency_manager(); self.sched_raw_command(CMD, async move { - // Raw atomic cmd has two locks, one is concurrency_manager and the other is txn - // latch. Now, concurrency_manager lock key with ts encoded, it aims - // to "lock" resolved-ts to be less than its timestamp, rather than - // to "lock" other concurrent requests. TODO: Merge the two locks - // into one to simplify the process. Same to other raw atomic - // commands. - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; - if let Err(e) = key_guard { - return cb(Err(e)); - } - let ts = Self::get_causal_ts(&provider); - if let Err(e) = ts { - return cb(Err(e)); - } - // Do NOT encode ts here as RawCompareAndSwap use key to gen lock. let key = F::encode_raw_key_owned(key, None); - let cmd = RawCompareAndSwap::new( - cf, - key, - previous_value, - value, - ttl, - api_version, - ts.unwrap(), - ctx, - ); + let cmd = RawCompareAndSwap::new(cf, key, previous_value, value, ttl, api_version, ctx); Self::sched_raw_atomic_command( sched, cmd, - Box::new(|res| { - cb(res.map_err(Error::from)); - drop(key_guard) - }), + Box::new(|res| callback(res.map_err(Error::from))), ); }) } @@ -2672,28 +2656,14 @@ impl Storage { let cf = Self::rawkv_cf(&cf, self.api_version)?; Self::check_ttl_valid(pairs.len(), &ttls)?; - let provider = self.causal_ts_provider.clone(); let sched = self.get_scheduler(); - let concurrency_manager = self.get_concurrency_manager(); self.sched_raw_command(CMD, async move { - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; - if let Err(e) = key_guard { - return callback(Err(e)); - } - let ts = Self::get_causal_ts(&provider); - if let Err(e) = ts { - return callback(Err(e)); - } - // Do NOT encode ts here as RawAtomicStore use key to gen lock let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls, None); - let cmd = RawAtomicStore::new(cf, modifies, ts.unwrap(), ctx); + let cmd = RawAtomicStore::new(cf, modifies, ctx); Self::sched_raw_atomic_command( sched, cmd, - Box::new(|res| { - callback(res.map_err(Error::from)); - drop(key_guard) - }), + Box::new(|res| callback(res.map_err(Error::from))), ); }) } @@ -2706,34 +2676,21 @@ impl Storage { callback: Callback<()>, ) -> Result<()> { const CMD: CommandKind = CommandKind::raw_atomic_store; - Self::check_api_version(self.api_version, ctx.api_version, CMD, &keys)?; + Self::check_api_version(self.api_version, ctx.api_version, CMD, &keys)?; let cf = Self::rawkv_cf(&cf, self.api_version)?; - let provider = self.causal_ts_provider.clone(); let sched = self.get_scheduler(); - let concurrency_manager = self.get_concurrency_manager(); self.sched_raw_command(CMD, async move { - let key_guard = Self::get_raw_key_guard(&provider, concurrency_manager).await; - if let Err(e) = key_guard { - return callback(Err(e)); - } - let ts = Self::get_causal_ts(&provider); - if let Err(e) = ts { - return callback(Err(e)); - } // Do NOT encode ts here as RawAtomicStore use key to gen lock let modifies = keys .into_iter() .map(|k| Self::raw_delete_request_to_modify(cf, k, None)) .collect(); - let cmd = RawAtomicStore::new(cf, modifies, ts.unwrap(), ctx); + let cmd = RawAtomicStore::new(cf, modifies, ctx); Self::sched_raw_atomic_command( sched, cmd, - Box::new(|res| { - callback(res.map_err(Error::from)); - drop(key_guard) - }), + Box::new(|res| callback(res.map_err(Error::from))), ); }) } @@ -2829,6 +2786,45 @@ impl Storage { } } +pub async fn get_raw_key_guard( + ts_provider: &Option>, + concurrency_manager: ConcurrencyManager, +) -> Result> { + // NOTE: the ts cannot be reused as timestamp of data key. + // There is a little chance that CDC will acquired a timestamp for resolved-ts + // just between the get_causal_ts & concurrency_manager.lock_key, + // which violate the constraint that resolve-ts should not be larger + // than timestamp of captured data. + let ts = get_causal_ts(ts_provider).await?; + if let Some(ts) = ts { + let raw_key = vec![api_version::api_v2::RAW_KEY_PREFIX]; + // Make keys for locking by RAW_KEY_PREFIX & ts. RAW_KEY_PREFIX to avoid + // conflict with TiDB & TxnKV keys, and ts to avoid collision with + // other raw write requests. Ts in lock value to used by CDC which + // get maximum resolved-ts from concurrency_manager.global_min_lock_ts + let encode_key = ApiV2::encode_raw_key(&raw_key, Some(ts)); + let key_guard = concurrency_manager.lock_key(&encode_key).await; + let lock = Lock::new(LockType::Put, raw_key, ts, 0, None, 0.into(), 1, ts); + key_guard.with_lock(|l| *l = Some(lock)); + Ok(Some(key_guard)) + } else { + Ok(None) + } +} + +pub async fn get_causal_ts( + ts_provider: &Option>, +) -> Result> { + if let Some(p) = ts_provider { + match p.async_get_ts().await { + Ok(ts) => Ok(Some(ts)), + Err(e) => Err(box_err!("Fail to get ts: {}", e)), + } + } else { + Ok(None) + } +} + pub struct DynamicConfigs { pub pipelined_pessimistic_lock: Arc, pub in_memory_pessimistic_lock: Arc, @@ -3465,6 +3461,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut Statistics::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .unwrap(); diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 4b780f5bf2d..150b065e5db 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -2,7 +2,6 @@ // #[PerformanceCriticalPath] use engine_traits::CfName; -use txn_types::TimeStamp; use crate::storage::{ kv::{Modify, WriteData}, @@ -26,7 +25,6 @@ command! { /// The set of mutations to apply. cf: CfName, mutations: Vec, - data_ts: Option, } } @@ -41,16 +39,18 @@ impl CommandExt for RawAtomicStore { } impl WriteCommand for RawAtomicStore { - fn process_write(self, _: S, _: WriteContext<'_, L>) -> Result { + fn process_write(self, _: S, wctx: WriteContext<'_, L>) -> Result { let rows = self.mutations.len(); - let (mut mutations, ctx) = (self.mutations, self.ctx); - if let Some(ts) = self.data_ts { + let (mut mutations, ctx, raw_ext) = (self.mutations, self.ctx, wctx.raw_ext); + + if let Some(ref raw_ext) = raw_ext { for mutation in &mut mutations { if let Modify::Put(_, ref mut key, _) = mutation { - key.append_ts_inplace(ts); + key.append_ts_inplace(raw_ext.ts); } } }; + let mut to_be_write = WriteData::from_modifies(mutations); to_be_write.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -59,7 +59,7 @@ impl WriteCommand for RawAtomicStore { rows, pr: ProcessResult::Res, lock_info: None, - lock_guards: vec![], + lock_guards: raw_ext.into_iter().map(|r| r.key_guard).collect(), response_policy: ResponsePolicy::OnApplied, }) } @@ -67,13 +67,16 @@ impl WriteCommand for RawAtomicStore { #[cfg(test)] mod tests { - use api_version::{test_kv_format_impl, KvFormat, RawValue}; + use api_version::{test_kv_format_impl, ApiV2, KvFormat, RawValue}; use engine_traits::CF_DEFAULT; - use kvproto::kvrpcpb::Context; + use futures::executor::block_on; + use kvproto::kvrpcpb::{ApiVersion, Context}; use tikv_kv::Engine; use super::*; - use crate::storage::{lock_manager::DummyLockManager, Statistics, TestEngineBuilder}; + use crate::storage::{ + lock_manager::DummyLockManager, txn::scheduler::get_raw_ext, Statistics, TestEngineBuilder, + }; #[test] fn test_atomic_process_write() { @@ -85,11 +88,8 @@ mod tests { let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let raw_keys = vec![b"ra", b"rz"]; let raw_values = vec![b"valuea", b"valuez"]; - let encode_ts = if F::TAG == kvproto::kvrpcpb::ApiVersion::V2 { - Some(TimeStamp::from(100)) - } else { - None - }; + let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); + let mut modifies = vec![]; for i in 0..raw_keys.len() { let raw_value = RawValue { @@ -103,15 +103,17 @@ mod tests { F::encode_raw_value_owned(raw_value), )); } - let cmd = RawAtomicStore::new(CF_DEFAULT, modifies, encode_ts, Context::default()); + let cmd = RawAtomicStore::new(CF_DEFAULT, modifies, Context::default()); let mut statistic = Statistics::default(); let snap = engine.snapshot(Default::default()).unwrap(); + let raw_ext = block_on(get_raw_ext(ts_provider, cm.clone(), true, &cmd.cmd)).unwrap(); let context = WriteContext { lock_mgr: &DummyLockManager {}, concurrency_manager: cm, extra_op: kvproto::kvrpcpb::ExtraOp::Noop, statistics: &mut statistic, async_apply_prewrite: false, + raw_ext, }; let cmd: Command = cmd.into(); let write_result = cmd.process_write(snap, context).unwrap(); @@ -124,10 +126,19 @@ mod tests { }; modifies_with_ts.push(Modify::Put( CF_DEFAULT, - F::encode_raw_key_owned(raw_keys[i].to_vec(), encode_ts), + F::encode_raw_key_owned(raw_keys[i].to_vec(), Some(101.into())), F::encode_raw_value_owned(raw_value), )); } - assert_eq!(write_result.to_be_write.modifies, modifies_with_ts) + assert_eq!(write_result.to_be_write.modifies, modifies_with_ts); + if F::TAG == ApiVersion::V2 { + assert_eq!(write_result.lock_guards.len(), 1); + let raw_key = vec![api_version::api_v2::RAW_KEY_PREFIX]; + let encoded_key = ApiV2::encode_raw_key(&raw_key, Some(100.into())); + assert_eq!( + write_result.lock_guards.first().unwrap().key(), + &encoded_key + ); + } } } diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 2678effbf7b..56138a09a50 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -202,6 +202,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .unwrap(); @@ -239,6 +240,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .unwrap(); diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index ef323cf206b..73079e00f5d 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -201,6 +201,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .unwrap(); @@ -248,6 +249,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .is_err() diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index 34d9114f48a..4dbd51e70e0 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -6,7 +6,7 @@ use engine_traits::{raw_ttl::ttl_to_expire_ts, CfName}; use kvproto::kvrpcpb::ApiVersion; use raw::RawStore; use tikv_kv::Statistics; -use txn_types::{Key, TimeStamp, Value}; +use txn_types::{Key, Value}; use crate::storage::{ kv::{Modify, WriteData}, @@ -37,7 +37,6 @@ command! { value: Value, ttl: u64, api_version: ApiVersion, - data_ts: Option, } } @@ -52,9 +51,16 @@ impl CommandExt for RawCompareAndSwap { } impl WriteCommand for RawCompareAndSwap { - fn process_write(self, snapshot: S, _: WriteContext<'_, L>) -> Result { - let (cf, mut key, value, previous_value, ctx) = - (self.cf, self.key, self.value, self.previous_value, self.ctx); + fn process_write(self, snapshot: S, wctx: WriteContext<'_, L>) -> Result { + let (cf, mut key, value, previous_value, ctx, raw_ext) = ( + self.cf, + self.key, + self.value, + self.previous_value, + self.ctx, + wctx.raw_ext, + ); + let mut data = vec![]; let old_value = RawStore::new(snapshot, self.api_version).raw_get_key_value( cf, @@ -62,7 +68,7 @@ impl WriteCommand for RawCompareAndSwap { &mut Statistics::default(), )?; - let pr = if old_value == previous_value { + let (pr, lock_guards) = if old_value == previous_value { let raw_value = RawValue { user_value: value, expire_ts: ttl_to_expire_ts(self.ttl), @@ -74,20 +80,28 @@ impl WriteCommand for RawCompareAndSwap { ApiVersion::API => API::encode_raw_value_owned(raw_value), } ); - if let Some(ts) = self.data_ts { - key = key.append_ts(ts); + + if let Some(ref raw_ext) = raw_ext { + key = key.append_ts(raw_ext.ts); } + let m = Modify::Put(cf, key, encoded_raw_value); data.push(m); - ProcessResult::RawCompareAndSwapRes { - previous_value: old_value, - succeed: true, - } + ( + ProcessResult::RawCompareAndSwapRes { + previous_value: old_value, + succeed: true, + }, + raw_ext.into_iter().map(|r| r.key_guard).collect(), + ) } else { - ProcessResult::RawCompareAndSwapRes { - previous_value: old_value, - succeed: false, - } + ( + ProcessResult::RawCompareAndSwapRes { + previous_value: old_value, + succeed: false, + }, + vec![], + ) }; fail_point!("txn_commands_compare_and_swap"); let rows = data.len(); @@ -99,7 +113,7 @@ impl WriteCommand for RawCompareAndSwap { rows, pr, lock_info: None, - lock_guards: vec![], + lock_guards, response_policy: ResponsePolicy::OnApplied, }) } @@ -107,13 +121,20 @@ impl WriteCommand for RawCompareAndSwap { #[cfg(test)] mod tests { - use api_version::test_kv_format_impl; + use std::sync::Arc; + + use api_version::{test_kv_format_impl, ApiV2}; + use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; use engine_traits::CF_DEFAULT; + use futures::executor::block_on; use kvproto::kvrpcpb::Context; use super::*; - use crate::storage::{lock_manager::DummyLockManager, Engine, Statistics, TestEngineBuilder}; + use crate::storage::{ + lock_manager::DummyLockManager, txn::scheduler::get_raw_ext, Engine, Statistics, + TestEngineBuilder, + }; #[test] fn test_cas_basic() { @@ -126,15 +147,11 @@ mod tests { /// `src/storage/mod.rs`. fn test_cas_basic_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); + let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let key = b"rk"; let encoded_key = F::encode_raw_key(key, None); - let mut ts = if F::TAG == kvproto::kvrpcpb::ApiVersion::V2 { - Some(TimeStamp::from(100)) - } else { - None - }; let cmd = RawCompareAndSwap::new( CF_DEFAULT, @@ -143,14 +160,13 @@ mod tests { b"v1".to_vec(), 0, F::TAG, - ts, Context::default(), ); - let (prev_val, succeed) = sched_command(&mut engine, cm.clone(), cmd).unwrap(); + let (prev_val, succeed) = + sched_command(&mut engine, cm.clone(), cmd, ts_provider.clone()).unwrap(); assert!(prev_val.is_none()); assert!(succeed); - ts = ts.map(|t| t.next()); let cmd = RawCompareAndSwap::new( CF_DEFAULT, encoded_key.clone(), @@ -158,14 +174,13 @@ mod tests { b"v2".to_vec(), 1, F::TAG, - ts, Context::default(), ); - let (prev_val, succeed) = sched_command(&mut engine, cm.clone(), cmd).unwrap(); + let (prev_val, succeed) = + sched_command(&mut engine, cm.clone(), cmd, ts_provider.clone()).unwrap(); assert_eq!(prev_val, Some(b"v1".to_vec())); assert!(!succeed); - ts = ts.map(|t| t.next()); let cmd = RawCompareAndSwap::new( CF_DEFAULT, encoded_key, @@ -173,10 +188,9 @@ mod tests { b"v3".to_vec(), 2, F::TAG, - ts, Context::default(), ); - let (prev_val, succeed) = sched_command(&mut engine, cm, cmd).unwrap(); + let (prev_val, succeed) = sched_command(&mut engine, cm, cmd, ts_provider).unwrap(); assert_eq!(prev_val, Some(b"v1".to_vec())); assert!(succeed); } @@ -185,16 +199,20 @@ mod tests { engine: &mut E, cm: ConcurrencyManager, cmd: TypedCommand<(Option, bool)>, + ts_provider: Option>, ) -> Result<(Option, bool)> { let snap = engine.snapshot(Default::default())?; use kvproto::kvrpcpb::ExtraOp; let mut statistic = Statistics::default(); + + let raw_ext = block_on(get_raw_ext(ts_provider, cm.clone(), true, &cmd.cmd)).unwrap(); let context = WriteContext { lock_mgr: &DummyLockManager {}, concurrency_manager: cm, extra_op: ExtraOp::Noop, statistics: &mut statistic, async_apply_prewrite: false, + raw_ext, }; let ret = cmd.cmd.process_write(snap, context)?; match ret.pr { @@ -219,14 +237,11 @@ mod tests { fn test_cas_process_write_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); + let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); + let cm = concurrency_manager::ConcurrencyManager::new(1.into()); let raw_key = b"rk"; let raw_value = b"valuek"; - let encode_ts = if F::TAG == kvproto::kvrpcpb::ApiVersion::V2 { - Some(TimeStamp::from(100)) - } else { - None - }; let ttl = 30; let encode_value = RawValue { user_value: raw_value.to_vec(), @@ -240,25 +255,35 @@ mod tests { raw_value.to_vec(), ttl, F::TAG, - encode_ts, Context::default(), ); let mut statistic = Statistics::default(); let snap = engine.snapshot(Default::default()).unwrap(); + let raw_ext = block_on(get_raw_ext(ts_provider, cm.clone(), true, &cmd.cmd)).unwrap(); let context = WriteContext { lock_mgr: &DummyLockManager {}, concurrency_manager: cm, extra_op: kvproto::kvrpcpb::ExtraOp::Noop, statistics: &mut statistic, async_apply_prewrite: false, + raw_ext, }; let cmd: Command = cmd.into(); let write_result = cmd.process_write(snap, context).unwrap(); let modifies_with_ts = vec![Modify::Put( CF_DEFAULT, - F::encode_raw_key(raw_key, encode_ts), + F::encode_raw_key(raw_key, Some(101.into())), F::encode_raw_value_owned(encode_value), )]; - assert_eq!(write_result.to_be_write.modifies, modifies_with_ts) + assert_eq!(write_result.to_be_write.modifies, modifies_with_ts); + if F::TAG == ApiVersion::V2 { + assert_eq!(write_result.lock_guards.len(), 1); + let raw_key = vec![api_version::api_v2::RAW_KEY_PREFIX]; + let encoded_key = ApiV2::encode_raw_key(&raw_key, Some(100.into())); + assert_eq!( + write_result.lock_guards.first().unwrap().key(), + &encoded_key + ); + } } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 2f2d123e9bb..7c2c945d4e2 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -526,12 +526,18 @@ pub trait CommandExt: Display { fn gen_lock(&self) -> latch::Lock; } +pub struct RawExt { + pub ts: TimeStamp, + pub key_guard: KeyHandleGuard, +} + pub struct WriteContext<'a, L: LockManager> { pub lock_mgr: &'a L, pub concurrency_manager: ConcurrencyManager, pub extra_op: ExtraOp, pub statistics: &'a mut Statistics, pub async_apply_prewrite: bool, + pub raw_ext: Option, // use for apiv2 } pub struct ReaderWithStats<'a, S: Snapshot> { @@ -740,6 +746,10 @@ pub trait WriteCommand: CommandExt { #[cfg(test)] pub mod test_util { + use std::sync::Arc; + + use causal_ts::CausalTsProviderImpl; + use kvproto::kvrpcpb::ApiVersion; use txn_types::Mutation; use super::*; @@ -764,6 +774,7 @@ pub mod test_util { extra_op: ExtraOp::Noop, statistics, async_apply_prewrite: false, + raw_ext: None, }; let ret = cmd.cmd.process_write(snap, context)?; let res = match ret.pr { @@ -901,6 +912,7 @@ pub mod test_util { extra_op: ExtraOp::Noop, statistics, async_apply_prewrite: false, + raw_ext: None, }; let ret = cmd.cmd.process_write(snap, context)?; @@ -925,6 +937,7 @@ pub mod test_util { extra_op: ExtraOp::Noop, statistics, async_apply_prewrite: false, + raw_ext: None, }; let ret = cmd.cmd.process_write(snap, context)?; @@ -932,4 +945,14 @@ pub mod test_util { engine.write(&ctx, ret.to_be_write).unwrap(); Ok(()) } + + pub fn gen_ts_provider(api_version: ApiVersion) -> Option> { + if api_version == ApiVersion::V2 { + let test_provider: causal_ts::CausalTsProviderImpl = + causal_ts::tests::TestProvider::default().into(); + Some(Arc::new(test_provider)) + } else { + None + } + } } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 837d077153e..f7394cf32aa 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -143,6 +143,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }; let result = command.process_write(snapshot, write_context).unwrap(); write(engine, &ctx, result.to_be_write.modifies); diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index be47e22e42b..be57873b68c 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1505,6 +1505,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut Statistics::default(), async_apply_prewrite: false, + raw_ext: None, } }; } @@ -1674,6 +1675,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: case.async_apply_prewrite, + raw_ext: None, }; let mut engine = TestEngineBuilder::new().build().unwrap(); let snap = engine.snapshot(Default::default()).unwrap(); @@ -1787,6 +1789,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -1814,6 +1817,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -1895,6 +1899,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -1926,6 +1931,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2194,6 +2200,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); @@ -2217,6 +2224,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); @@ -2422,6 +2430,7 @@ mod tests { extra_op: ExtraOp::Noop, statistics: &mut statistics, async_apply_prewrite: false, + raw_ext: None, }; let snap = engine.snapshot(Default::default()).unwrap(); let res = prewrite_cmd.cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index 70c13a20c26..7ec773b99dc 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -139,6 +139,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .unwrap(); @@ -180,6 +181,7 @@ pub mod tests { extra_op: Default::default(), statistics: &mut Default::default(), async_apply_prewrite: false, + raw_ext: None, }, ) .is_err() diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index c3967820b34..b65445b8c24 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -34,6 +34,7 @@ use std::{ u64, }; +use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; use crossbeam::utils::CachePadded; @@ -58,7 +59,7 @@ use crate::{ server::lock_manager::waiter_manager, storage::{ config::Config, - get_priority_tag, + get_causal_ts, get_priority_tag, get_raw_key_guard, kv::{ self, with_tls_engine, Engine, ExtCallback, FlowStatsReporter, Result as EngineResult, SnapContext, Statistics, @@ -66,11 +67,13 @@ use crate::{ lock_manager::{self, DiagnosticContext, LockManager, WaitTimeout}, metrics::*, txn::{ - commands::{Command, ResponsePolicy, WriteContext, WriteResult, WriteResultLockInfo}, + commands::{ + Command, RawExt, ResponsePolicy, WriteContext, WriteResult, WriteResultLockInfo, + }, flow_controller::FlowController, latch::{Latches, Lock}, sched_pool::{tls_collect_query, tls_collect_scan_details, SchedPool}, - Error, ProcessResult, + Error, ErrorInner, ProcessResult, }, types::StorageCallback, DynamicConfigs, Error as StorageError, ErrorInner as StorageErrorInner, @@ -205,6 +208,9 @@ struct SchedulerInner { flow_controller: Arc, + // used for apiv2 + causal_ts_provider: Option>, + control_mutex: Arc>, lock_mgr: L, @@ -349,6 +355,7 @@ impl Scheduler { config: &Config, dynamic_configs: DynamicConfigs, flow_controller: Arc, + causal_ts_provider: Option>, reporter: R, resource_tag_factory: ResourceTagFactory, quota_limiter: Arc, @@ -385,6 +392,7 @@ impl Scheduler { in_memory_pessimistic_lock: dynamic_configs.in_memory_pessimistic_lock, enable_async_apply_prewrite: config.enable_async_apply_prewrite, flow_controller, + causal_ts_provider, resource_tag_factory, quota_limiter, feature_gate, @@ -843,16 +851,34 @@ impl Scheduler { let pipelined = task.cmd.can_be_pipelined() && pessimistic_lock_mode == PessimisticLockMode::Pipelined; let txn_ext = snapshot.ext().get_txn_ext().cloned(); + let max_ts_synced = snapshot.ext().is_max_ts_synced(); + let causal_ts_provider = self.inner.causal_ts_provider.clone(); + let concurrency_manager = self.inner.concurrency_manager.clone(); + + let raw_ext = get_raw_ext( + causal_ts_provider, + concurrency_manager.clone(), + max_ts_synced, + &task.cmd, + ) + .await; + if let Err(err) = raw_ext { + info!("get_raw_ext failed"; "cid" => cid, "err" => ?err); + scheduler.finish_with_err(cid, err); + return; + } + let raw_ext = raw_ext.unwrap(); let deadline = task.cmd.deadline(); let write_result = { let _guard = sample.observe_cpu(); let context = WriteContext { lock_mgr: &self.inner.lock_mgr, - concurrency_manager: self.inner.concurrency_manager.clone(), + concurrency_manager, extra_op: task.extra_op, statistics, async_apply_prewrite: self.inner.enable_async_apply_prewrite, + raw_ext, }; let begin_instant = Instant::now(); let res = unsafe { @@ -1238,6 +1264,44 @@ impl Scheduler { } } +pub async fn get_raw_ext( + causal_ts_provider: Option>, + concurrency_manager: ConcurrencyManager, + max_ts_synced: bool, + cmd: &Command, +) -> Result, Error> { + if causal_ts_provider.is_some() { + match cmd { + Command::RawCompareAndSwap(_) | Command::RawAtomicStore(_) => { + if !max_ts_synced { + return Err(ErrorInner::MaxTimestampNotSynced { + region_id: cmd.ctx().get_region_id(), + start_ts: TimeStamp::zero(), + } + .into()); + } + let key_guard = get_raw_key_guard(&causal_ts_provider, concurrency_manager) + .await + .map_err(|err: StorageError| { + ErrorInner::Other(box_err!("failed to key guard: {:?}", err)) + })?; + let ts = + get_causal_ts(&causal_ts_provider) + .await + .map_err(|err: StorageError| { + ErrorInner::Other(box_err!("failed to get casual ts: {:?}", err)) + })?; + return Ok(Some(RawExt { + ts: ts.unwrap(), + key_guard: key_guard.unwrap(), + })); + } + _ => {} + } + } + Ok(None) +} + #[derive(Debug, PartialEq)] enum PessimisticLockMode { // Return success only if the pessimistic lock is persisted. @@ -1302,6 +1366,7 @@ mod tests { in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), + None, DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1448,6 +1513,7 @@ mod tests { in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), + None, DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1552,6 +1618,7 @@ mod tests { in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), + None, DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1610,6 +1677,7 @@ mod tests { in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), + None, DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1676,6 +1744,7 @@ mod tests { in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), + None, DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), @@ -1737,6 +1806,7 @@ mod tests { in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), + None, DummyReporter, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index c50450c9dc4..fd56bd87992 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -3,6 +3,7 @@ use std::{sync::Arc, time::Duration}; use causal_ts::CausalTsProvider; +use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::*, @@ -64,6 +65,25 @@ impl TestSuite { must_raw_put(&client, ctx, key.to_vec(), value.to_vec()) } + pub fn raw_put_err_by_timestamp_not_synced(&mut self, key: &[u8], value: &[u8]) { + let region_id = self.cluster.get_region_id(key); + let client = self.get_client(region_id); + let ctx = self.get_context(region_id); + + let mut put_req = RawPutRequest::default(); + put_req.set_context(ctx); + put_req.key = key.to_vec(); + put_req.value = value.to_vec(); + + let put_resp = client.raw_put(&put_req).unwrap(); + assert!(put_resp.get_region_error().has_max_timestamp_not_synced()); + assert!( + put_resp.get_error().is_empty(), + "{:?}", + put_resp.get_error() + ); + } + pub fn must_raw_get(&mut self, key: &[u8]) -> Option> { let region_id = self.cluster.get_region_id(key); let client = self.get_client(region_id); @@ -72,13 +92,15 @@ impl TestSuite { } pub fn flush_timestamp(&mut self, node_id: u64) { - self.cluster - .sim - .rl() - .get_causal_ts_provider(node_id) - .unwrap() - .flush() - .unwrap(); + block_on( + self.cluster + .sim + .rl() + .get_causal_ts_provider(node_id) + .unwrap() + .async_flush(), + ) + .unwrap(); } pub fn must_merge_region_by_key(&mut self, source_key: &[u8], target_key: &[u8]) { @@ -92,7 +114,7 @@ impl TestSuite { let mut merged; let timer = Instant::now(); loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { + if timer.saturating_elapsed() > Duration::from_secs(10) { panic!("region merge failed"); } merged = self.cluster.get_region(source_key); @@ -119,7 +141,7 @@ impl TestSuite { } } -const FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP: &str = "causal_observer_flush_timestamp"; +const FP_CAUSAL_TS_PROVIDER_FLUSH: &str = "causal_ts_provider_flush"; /// Verify correctness on leader transfer. // TODO: simulate and test for the scenario of issue #12498. @@ -129,9 +151,6 @@ fn test_leader_transfer() { let key1 = b"rk1"; let region = suite.cluster.get_region(key1); - // Disable CausalObserver::flush_timestamp to produce causality issue. - fail::cfg(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP, "return").unwrap(); - // Transfer leader and write to store 1. { suite.must_transfer_leader(®ion, 1); @@ -145,15 +164,18 @@ fn test_leader_transfer() { assert_eq!(suite.must_raw_get(key1), Some(b"v4".to_vec())); } + // Disable CausalObserver::flush_timestamp to produce causality issue. + fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "return").unwrap(); + // Transfer leader and write to store 2. { suite.must_transfer_leader(®ion, 2); suite.must_leader_on_store(key1, 2); // Store 2 has a TSO batch smaller than store 1. - suite.must_raw_put(key1, b"v5"); + suite.raw_put_err_by_timestamp_not_synced(key1, b"v5"); assert_eq!(suite.must_raw_get(key1), Some(b"v4".to_vec())); - suite.must_raw_put(key1, b"v6"); + suite.raw_put_err_by_timestamp_not_synced(key1, b"v6"); assert_eq!(suite.must_raw_get(key1), Some(b"v4".to_vec())); } @@ -161,7 +183,7 @@ fn test_leader_transfer() { suite.must_transfer_leader(®ion, 1); suite.must_leader_on_store(key1, 1); // Enable CausalObserver::flush_timestamp. - fail::cfg(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP, "off").unwrap(); + fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "off").unwrap(); // Transfer leader and write to store 2 again. { suite.must_transfer_leader(®ion, 2); @@ -173,7 +195,7 @@ fn test_leader_transfer() { assert_eq!(suite.must_raw_get(key1), Some(b"v8".to_vec())); } - fail::remove(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP); + fail::remove(FP_CAUSAL_TS_PROVIDER_FLUSH); suite.stop(); } @@ -199,9 +221,6 @@ fn test_region_merge() { assert_eq!(region1.get_end_key(), region3.get_start_key()); assert_eq!(region3.get_end_key(), region5.get_start_key()); - // Disable CausalObserver::flush_timestamp to produce causality issue. - fail::cfg(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP, "return").unwrap(); - // Transfer leaders: region 1 -> store 1, region 3 -> store 2, region 5 -> store // 3. suite.must_transfer_leader(®ion1, 1); @@ -219,20 +238,23 @@ fn test_region_merge() { assert_eq!(suite.must_raw_get(keys[1]), Some(b"v4".to_vec())); } + // Disable CausalObserver::flush_timestamp to produce causality issue. + fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "return").unwrap(); + // Merge region 1 to 3. { suite.must_merge_region_by_key(keys[1], keys[3]); suite.must_leader_on_store(keys[1], 2); // Write to store 2. Store 2 has a TSO batch smaller than store 1. - suite.must_raw_put(keys[1], b"v5"); + suite.raw_put_err_by_timestamp_not_synced(keys[1], b"v5"); assert_eq!(suite.must_raw_get(keys[1]), Some(b"v4".to_vec())); - suite.must_raw_put(keys[1], b"v6"); + suite.raw_put_err_by_timestamp_not_synced(keys[1], b"v6"); assert_eq!(suite.must_raw_get(keys[1]), Some(b"v4".to_vec())); } // Enable CausalObserver::flush_timestamp. - fail::cfg(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP, "off").unwrap(); + fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "off").unwrap(); // Merge region 3 to 5. { @@ -246,6 +268,6 @@ fn test_region_merge() { assert_eq!(suite.must_raw_get(keys[1]), Some(b"v8".to_vec())); } - fail::remove(FP_CAUSAL_OBSERVER_FLUSH_TIMESTAMP); + fail::remove(FP_CAUSAL_TS_PROVIDER_FLUSH); suite.stop(); } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 101cf30d446..45f5e16675c 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -10,7 +10,7 @@ use std::{ time::Duration, }; -use api_version::KvFormat; +use api_version::{ApiV1, ApiV2, KvFormat}; use causal_ts::CausalTsProvider; use collections::HashMap; use engine_traits::DummyFactory; @@ -509,7 +509,12 @@ fn test_pipelined_pessimistic_lock() { #[test] fn test_async_commit_prewrite_with_stale_max_ts() { - let mut cluster = new_server_cluster(0, 2); + test_async_commit_prewrite_with_stale_max_ts_impl::(); + test_async_commit_prewrite_with_stale_max_ts_impl::(); +} + +fn test_async_commit_prewrite_with_stale_max_ts_impl() { + let mut cluster = new_server_cluster_with_api_ver(0, 2, F::TAG); cluster.run(); let mut engine = cluster @@ -521,7 +526,7 @@ fn test_async_commit_prewrite_with_stale_max_ts() { .unwrap() .clone(); let storage = - TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine.clone(), DummyLockManager) + TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr(engine.clone(), DummyLockManager) .build() .unwrap(); @@ -532,6 +537,7 @@ fn test_async_commit_prewrite_with_stale_max_ts() { let mut ctx = Context::default(); ctx.set_region_id(1); + ctx.set_api_version(F::TAG); ctx.set_region_epoch(cluster.get_region_epoch(1)); ctx.set_peer(cluster.leader_of_region(1).unwrap()); @@ -541,15 +547,15 @@ fn test_async_commit_prewrite_with_stale_max_ts() { storage .sched_txn_command( commands::Prewrite::new( - vec![Mutation::make_put(Key::from_raw(b"k1"), b"v".to_vec())], - b"k1".to_vec(), + vec![Mutation::make_put(Key::from_raw(b"xk1"), b"v".to_vec())], + b"xk1".to_vec(), 10.into(), 100, false, 2, TimeStamp::default(), TimeStamp::default(), - Some(vec![b"k2".to_vec()]), + Some(vec![b"xk2".to_vec()]), false, AssertionLevel::Off, ctx.clone(), @@ -574,17 +580,17 @@ fn test_async_commit_prewrite_with_stale_max_ts() { .sched_txn_command( commands::PrewritePessimistic::new( vec![( - Mutation::make_put(Key::from_raw(b"k1"), b"v".to_vec()), + Mutation::make_put(Key::from_raw(b"xk1"), b"v".to_vec()), DoPessimisticCheck, )], - b"k1".to_vec(), + b"xk1".to_vec(), 10.into(), 100, 20.into(), 2, TimeStamp::default(), TimeStamp::default(), - Some(vec![b"k2".to_vec()]), + Some(vec![b"xk2".to_vec()]), false, AssertionLevel::Off, ctx.clone(), @@ -1485,7 +1491,7 @@ fn test_raw_put_key_guard() { let node_id = leader.get_id(); let leader_cm = cluster.sim.rl().get_concurrency_manager(node_id); let ts_provider = cluster.sim.rl().get_causal_ts_provider(node_id).unwrap(); - let ts = ts_provider.get_ts().unwrap(); + let ts = block_on(ts_provider.async_get_ts()).unwrap(); let env = Arc::new(Environment::new(1)); let channel = diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 55cf75d2b75..38fdf5c175c 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -111,6 +111,7 @@ fn start_raftstore( ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), None, + None, ) .unwrap(); diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index cc5b6ca1ee0..8ede13bd0f4 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -112,6 +112,7 @@ fn test_node_bootstrap_with_prepared_data() { AutoSplitController::default(), ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), + None, ) .unwrap(); assert!( diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 6bc7e2fb7b8..48adb2eb84c 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -2,6 +2,7 @@ use std::{iter::*, sync::*, thread, time::*}; +use api_version::{test_kv_format_impl, KvFormat}; use engine_traits::{Peekable, CF_LOCK, CF_RAFT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, @@ -1145,7 +1146,11 @@ fn test_merge_remove_target_peer_isolated() { #[test] fn test_sync_max_ts_after_region_merge() { - let mut cluster = new_server_cluster(0, 3); + test_kv_format_impl!(test_sync_max_ts_after_region_merge_impl); +} + +fn test_sync_max_ts_after_region_merge_impl() { + let mut cluster = new_server_cluster_with_api_ver(0, 3, F::TAG); configure_for_merge(&mut cluster); cluster.run(); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index 130290e01b8..9f2e564341f 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -2,6 +2,7 @@ use std::{sync::Arc, thread, time::Duration}; +use api_version::{test_kv_format_impl, KvFormat}; use engine_traits::CF_LOCK; use kvproto::kvrpcpb::Context; use raft::eraftpb::MessageType; @@ -227,7 +228,11 @@ fn test_server_transfer_leader_during_snapshot() { #[test] fn test_sync_max_ts_after_leader_transfer() { - let mut cluster = new_server_cluster(0, 3); + test_kv_format_impl!(test_sync_max_ts_after_leader_transfer_impl); +} + +fn test_sync_max_ts_after_leader_transfer_impl() { + let mut cluster = new_server_cluster_with_api_ver(0, 3, F::TAG); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; cluster.run(); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index ee23f2fc179..253d1e0c067 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1143,6 +1143,7 @@ fn test_double_run_node() { AutoSplitController::default(), ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), + None, ) .unwrap_err(); assert!(format!("{:?}", e).contains("already started"), "{:?}", e); From 47d8c9e483db762cefe8725abac9f5110e97ae63 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 30 Sep 2022 17:39:45 +0800 Subject: [PATCH 0256/1149] storage/lock_manager: Add metrics to the new lock waiting queue (#13560) ref tikv/tikv#13298 Add metrics to the new lock waiting queue, including: * The number of keys on which there is lock waiting * The number of requests that are waiting in the queue * The histogram of the queue length observed when enqueueing new requests Signed-off-by: MyonKeminta Signed-off-by: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Co-authored-by: Ti Chi Robot Co-authored-by: TonsnakeLin <87681388+TonsnakeLin@users.noreply.github.com> --- .../lock_manager/lock_waiting_queue.rs | 45 +++++++++++++++++-- src/storage/metrics.rs | 24 ++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index a3312a4fdb2..c1f2e800834 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -77,6 +77,7 @@ use txn_types::{Key, TimeStamp}; use crate::storage::{ errors::SharedError, lock_manager::{lock_wait_context::LockWaitContextSharedState, LockManager, LockWaitToken}, + metrics::*, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::Error as TxnError, types::{PessimisticLockParameters, PessimisticLockRes}, @@ -241,17 +242,29 @@ impl LockWaitQueues { mut lock_wait_entry: Box, current_lock: kvrpcpb::LockInfo, ) { + let mut new_key = false; let mut key_state = self .inner .queue_map .entry(lock_wait_entry.key.clone()) - .or_insert_with(|| KeyLockWaitState::new()); + .or_insert_with(|| { + new_key = true; + KeyLockWaitState::new() + }); key_state.current_lock = current_lock; if lock_wait_entry.legacy_wake_up_index.is_none() { lock_wait_entry.legacy_wake_up_index = Some(key_state.value().legacy_wake_up_index); } key_state.value_mut().queue.push(lock_wait_entry); + + let len = key_state.value_mut().queue.len(); + drop(key_state); + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.waiters.inc(); + LOCK_WAIT_QUEUE_LENGTH_HISTOGRAM.observe(len as f64); + if new_key { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.keys.inc() + } } /// Dequeues the head of the lock waiting queue of the specified key, @@ -288,16 +301,20 @@ impl LockWaitQueues { wake_up_delay_duration_ms: Option, ) -> Option<(Box, Option)> { let mut result = None; + // For statistics. + let mut removed_waiters = 0; // We don't want other threads insert any more entries between finding the // queue is empty and removing the queue from the map. Wrap the logic // within a call to `remove_if_mut` to avoid releasing lock during the // procedure. - self.inner.queue_map.remove_if_mut(key, |_, v| { + let removed_key = self.inner.queue_map.remove_if_mut(key, |_, v| { v.last_conflict_start_ts = conflicting_start_ts; v.last_conflict_commit_ts = conflicting_commit_ts; while let Some(lock_wait_entry) = v.queue.pop() { + removed_waiters += 1; + if lock_wait_entry.req_states.as_ref().unwrap().is_finished() { // Skip already cancelled entries. continue; @@ -324,6 +341,15 @@ impl LockWaitQueues { v.queue.is_empty() }); + if removed_waiters != 0 { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC + .waiters + .sub(removed_waiters); + } + if removed_key.is_some() { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.keys.dec(); + } + result } @@ -413,11 +439,13 @@ impl LockWaitQueues { let mut conflicting_start_ts = TimeStamp::zero(); let mut conflicting_commit_ts = TimeStamp::zero(); + let mut removed_waiters = 0; + // We don't want other threads insert any more entries between finding the // queue is empty and removing the queue from the map. Wrap the logic // within a call to `remove_if_mut` to avoid releasing lock during the // procedure. - self.inner.queue_map.remove_if_mut(key, |_, v| { + let removed_key = self.inner.queue_map.remove_if_mut(key, |_, v| { // The KeyLockWaitState of the key might have been removed from the map and then // recreated. Skip. if v.delayed_notify_all_state @@ -440,6 +468,7 @@ impl LockWaitQueues { if front.req_states.as_ref().unwrap().is_finished() { // Skip already cancelled entries. v.queue.pop(); + removed_waiters += 1; continue; } if front @@ -451,6 +480,7 @@ impl LockWaitQueues { break; } let lock_wait_entry = v.queue.pop().unwrap(); + removed_waiters += 1; if lock_wait_entry.parameters.allow_lock_with_conflict { woken_up_resumable_entry = Some(lock_wait_entry); break; @@ -462,6 +492,15 @@ impl LockWaitQueues { v.queue.is_empty() }); + if removed_waiters != 0 { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC + .waiters + .sub(removed_waiters); + } + if removed_key.is_some() { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.keys.dec(); + } + // Call callbacks to cancel these entries here. // TODO: Perhaps we'd better make it concurrent with scheduling the new command // (if `woken_up_resumable_entry` is some) if there are too many. diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 3dd5fc2e10a..b74c5b7d51f 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -361,6 +361,15 @@ where }) } +make_static_metric! { + pub struct LockWaitQueueEntriesGauge: IntGauge { + "type" => { + waiters, + keys, + }, + } +} + lazy_static! { pub static ref KV_COMMAND_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_storage_command_total", @@ -575,4 +584,19 @@ lazy_static! { .unwrap(); pub static ref IN_MEMORY_PESSIMISTIC_LOCKING_COUNTER_STATIC: InMemoryPessimisticLockingCounter = auto_flush_from!(IN_MEMORY_PESSIMISTIC_LOCKING_COUNTER, InMemoryPessimisticLockingCounter); + + pub static ref LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC: LockWaitQueueEntriesGauge = register_static_int_gauge_vec!( + LockWaitQueueEntriesGauge, + "tikv_lock_wait_queue_entries_gauge_vec", + "Statistics of the lock wait queue's state", + &["type"] + ) + .unwrap(); + + pub static ref LOCK_WAIT_QUEUE_LENGTH_HISTOGRAM: Histogram = register_histogram!( + "tikv_lock_wait_queue_length", + "Statistics of length of queues counted when enqueueing", + exponential_buckets(1.0, 2.0, 16).unwrap() + ) + .unwrap(); } From 956610725039835557e7516828b069a44073c36d Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Thu, 6 Oct 2022 09:57:47 +0800 Subject: [PATCH 0257/1149] copr: fix wrong sql mode constants (#13567) close tikv/tikv#13566 Signed-off-by: gengliqi Co-authored-by: Ti Chi Robot --- components/tidb_query_datatype/src/expr/ctx.rs | 12 ++++++------ components/tidb_query_expr/src/impl_regexp.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/components/tidb_query_datatype/src/expr/ctx.rs b/components/tidb_query_datatype/src/expr/ctx.rs index ffaf63a9774..758f7b13736 100644 --- a/components/tidb_query_datatype/src/expr/ctx.rs +++ b/components/tidb_query_datatype/src/expr/ctx.rs @@ -11,12 +11,12 @@ use crate::codec::mysql::Tz; bitflags! { /// Please refer to SQLMode in `mysql/const.go` in repo `pingcap/parser` for details. pub struct SqlMode: u64 { - const STRICT_TRANS_TABLES = 1 << 22; - const STRICT_ALL_TABLES = 1 << 23; - const NO_ZERO_IN_DATE = 1 << 24; - const NO_ZERO_DATE = 1 << 25; - const INVALID_DATES = 1 << 26; - const ERROR_FOR_DIVISION_BY_ZERO = 1 << 27; + const STRICT_TRANS_TABLES = 1 << 21; + const STRICT_ALL_TABLES = 1 << 22; + const NO_ZERO_IN_DATE = 1 << 23; + const NO_ZERO_DATE = 1 << 24; + const INVALID_DATES = 1 << 25; + const ERROR_FOR_DIVISION_BY_ZERO = 1 << 26; } } diff --git a/components/tidb_query_expr/src/impl_regexp.rs b/components/tidb_query_expr/src/impl_regexp.rs index 253b376c2f2..2e5830740ee 100644 --- a/components/tidb_query_expr/src/impl_regexp.rs +++ b/components/tidb_query_expr/src/impl_regexp.rs @@ -88,7 +88,7 @@ fn build_regexp_from_args( b"" }; - build_regexp::(pattern, match_type).map(|reg| Some(reg)) + build_regexp::(pattern, match_type).map(Some) } fn init_regexp_data(expr: &mut Expr) -> Result> { @@ -111,7 +111,7 @@ fn init_regexp_data(expr: &mut Expr) -> Result(pattern, match_type).map(|reg| Some(reg)) + build_regexp::(pattern, match_type).map(Some) } /// Currently, TiDB only supports regular expressions for utf-8 strings. From 1a9446f334c2d29417f656fa8ef4ec6a1dda95f7 Mon Sep 17 00:00:00 2001 From: Jay Date: Sun, 9 Oct 2022 01:05:48 -0700 Subject: [PATCH 0258/1149] engine_tirocks: add properties (#13558) ref tikv/tikv#13058 Significant differences are: - This PR uses codec components instead of tikv_util::codec. - Files are re-arranged to make properties related logic better organized. - An extra allocation is reduced by passing `UserCollectedProperties` directly instead of creating a new HashMap. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/codec/src/error.rs | 3 + components/engine_rocks/src/properties.rs | 6 - components/engine_tirocks/Cargo.toml | 7 + components/engine_tirocks/src/engine.rs | 24 + components/engine_tirocks/src/lib.rs | 7 + .../engine_tirocks/src/properties/mod.rs | 164 ++++ .../engine_tirocks/src/properties/mvcc.rs | 364 ++++++++ .../engine_tirocks/src/properties/range.rs | 803 ++++++++++++++++++ .../engine_tirocks/src/properties/table.rs | 96 +++ .../engine_tirocks/src/properties/ttl.rs | 225 +++++ 10 files changed, 1693 insertions(+), 6 deletions(-) create mode 100644 components/engine_tirocks/src/properties/mod.rs create mode 100644 components/engine_tirocks/src/properties/mvcc.rs create mode 100644 components/engine_tirocks/src/properties/range.rs create mode 100644 components/engine_tirocks/src/properties/table.rs create mode 100644 components/engine_tirocks/src/properties/ttl.rs diff --git a/components/codec/src/error.rs b/components/codec/src/error.rs index b85d8dd078d..09118824c6b 100644 --- a/components/codec/src/error.rs +++ b/components/codec/src/error.rs @@ -13,6 +13,8 @@ pub enum ErrorInner { #[error("Data padding is incorrect")] BadPadding, + #[error("key not found")] + KeyNotFound, } impl ErrorInner { @@ -56,6 +58,7 @@ impl ErrorCodeExt for Error { match self.0.as_ref() { ErrorInner::Io(_) => error_code::codec::IO, ErrorInner::BadPadding => error_code::codec::BAD_PADDING, + ErrorInner::KeyNotFound => error_code::codec::KEY_NOT_FOUND, } } } diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index d468fb2d523..a95a9aecf7b 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -131,12 +131,6 @@ impl<'a> DecodeProperties for UserCollectedPropertiesDecoder<'a> { } } -#[derive(Debug, Clone, PartialEq, Copy)] -pub enum RangeOffsetKind { - Size, - Keys, -} - #[derive(Debug, Default, Clone, Copy)] pub struct RangeOffsets { pub size: u64, diff --git a/components/engine_tirocks/Cargo.toml b/components/engine_tirocks/Cargo.toml index 469a659567e..5ffa4428dd2 100644 --- a/components/engine_tirocks/Cargo.toml +++ b/components/engine_tirocks/Cargo.toml @@ -4,9 +4,14 @@ version = "0.1.0" edition = "2021" [dependencies] +api_version = { path = "../api_version" } +codec = { path = "../codec" } +collections = { path = "../collections" } derive_more = "0.99.3" engine_traits = { path = "../engine_traits" } +keys = { path = "../keys" } lazy_static = "1.4.0" +log_wrappers = { path = "../log_wrappers" } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } @@ -16,7 +21,9 @@ tikv_alloc = { path = "../tikv_alloc" } tikv_util = { path = "../tikv_util" } tirocks = { git = "https://github.com/busyjay/tirocks.git", branch = "dev" } tracker = { path = "../tracker" } +txn_types = { path = "../txn_types" } [dev-dependencies] kvproto = { git = "https://github.com/pingcap/kvproto.git" } +rand = "0.8" tempfile = "3.0" diff --git a/components/engine_tirocks/src/engine.rs b/components/engine_tirocks/src/engine.rs index 87ae0efeb79..c3f99cafcc6 100644 --- a/components/engine_tirocks/src/engine.rs +++ b/components/engine_tirocks/src/engine.rs @@ -87,6 +87,30 @@ impl RocksEngine { pub(crate) fn multi_batch_write(&self) -> bool { self.multi_batch_write } + + #[inline] + pub(crate) fn approximate_memtable_stats( + &self, + cf: &str, + start: &[u8], + end: &[u8], + ) -> Result<(u64, u64)> { + let handle = self.cf(cf)?; + Ok(self + .as_inner() + .approximate_mem_table_stats(handle, start, end)) + } + + // TODO: move this function when MiscExt is implemented. + #[cfg(test)] + pub(crate) fn flush(&self, cf: &str, wait: bool) -> Result<()> { + use tirocks::option::FlushOptions; + + let write_handle = self.cf(cf)?; + self.as_inner() + .flush(FlushOptions::default().set_wait(wait), write_handle) + .map_err(r2e) + } } impl engine_traits::Iterable for RocksEngine { diff --git a/components/engine_tirocks/src/lib.rs b/components/engine_tirocks/src/lib.rs index da56cfabb6c..ecf7035b8c4 100644 --- a/components/engine_tirocks/src/lib.rs +++ b/components/engine_tirocks/src/lib.rs @@ -5,8 +5,13 @@ //! When all features of engine_rocks are implemented in this module, //! engine_rocks will be removed and TiKV will switch to tirocks. +#![cfg_attr(test, feature(test))] + extern crate tikv_alloc as _; +#[cfg(test)] +extern crate test; + mod cf_options; mod db_options; mod db_vector; @@ -14,6 +19,7 @@ mod engine; mod engine_iterator; mod logger; mod perf_context; +mod properties; mod snapshot; mod status; mod util; @@ -23,6 +29,7 @@ pub use engine::*; pub use engine_iterator::*; pub use logger::*; pub use perf_context::*; +pub use properties::*; pub use snapshot::RocksSnapshot; pub use status::*; pub use util::*; diff --git a/components/engine_tirocks/src/properties/mod.rs b/components/engine_tirocks/src/properties/mod.rs new file mode 100644 index 00000000000..967273aae3a --- /dev/null +++ b/components/engine_tirocks/src/properties/mod.rs @@ -0,0 +1,164 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod mvcc; +mod range; +mod table; +mod ttl; + +use std::{ + cmp, + collections::BTreeMap, + io::Read, + ops::{Deref, DerefMut}, +}; + +use codec::{ + number::NumberCodec, + prelude::{NumberDecoder, NumberEncoder}, +}; +use collections::HashMap; +use tirocks::properties::table::user::UserCollectedProperties; + +pub use self::{ + mvcc::MvccPropertiesCollectorFactory, + range::{RangeProperties, RangePropertiesCollectorFactory}, + table::{RocksTablePropertiesCollection, RocksUserCollectedProperties}, + ttl::TtlPropertiesCollectorFactory, +}; + +/// A struct to help collect properties. +/// +/// The properties of a file can be collected by ranges. Every range will be +/// referenced by a `PropIndex`. +#[derive(Clone, Debug, Default)] +pub struct PropIndex { + /// The properties calculated from the range. The range starts from + /// `offset` of previous `PropIndex` to this `offset`. How large the range + /// is depends on the implementation. + pub prop: u64, + /// The offset in the file. Offsets are not necessary the size of file. It + /// only makes sense to the implementations. + pub offset: u64, +} + +#[derive(Debug, Default)] +pub struct PropIndexes(BTreeMap, PropIndex>); + +impl Deref for PropIndexes { + type Target = BTreeMap, PropIndex>; + fn deref(&self) -> &BTreeMap, PropIndex> { + &self.0 + } +} + +impl DerefMut for PropIndexes { + fn deref_mut(&mut self) -> &mut BTreeMap, PropIndex> { + &mut self.0 + } +} + +impl PropIndexes { + pub fn new() -> PropIndexes { + PropIndexes(BTreeMap::new()) + } + + pub fn into_map(self) -> BTreeMap, PropIndex> { + self.0 + } + + pub fn add(&mut self, key: Vec, index: PropIndex) { + self.0.insert(key, index); + } + + // Format: | klen | k | v.size | v.offset | + pub fn encode(&self) -> Vec { + let cap = cmp::min((8 * 3 + 24) * self.0.len(), 1024); + let mut buf = Vec::with_capacity(cap); + for (k, v) in &self.0 { + buf.write_u64(k.len() as u64).unwrap(); + buf.extend(k); + buf.write_u64(v.prop).unwrap(); + buf.write_u64(v.offset).unwrap(); + } + buf + } + + pub fn decode(mut buf: &[u8]) -> codec::Result { + let mut res = BTreeMap::new(); + while !buf.is_empty() { + let klen = buf.read_u64()?; + let mut k = vec![0; klen as usize]; + buf.read_exact(&mut k)?; + let v = PropIndex { + prop: buf.read_u64()?, + offset: buf.read_u64()?, + }; + res.insert(k, v); + } + Ok(PropIndexes(res)) + } +} + +trait EncodeProperties { + fn encode(&mut self, name: &str, value: &[u8]); + + #[inline] + fn encode_u64(&mut self, name: &str, value: u64) { + let mut buf = [0; 8]; + NumberCodec::encode_u64(&mut buf, value); + self.encode(name, &buf); + } + + #[inline] + fn encode_indexes(&mut self, name: &str, indexes: &PropIndexes) { + self.encode(name, &indexes.encode()); + } +} + +impl EncodeProperties for UserCollectedProperties { + #[inline] + fn encode(&mut self, name: &str, value: &[u8]) { + self.add(name.as_bytes(), value); + } +} + +impl EncodeProperties for HashMap, Vec> { + #[inline] + fn encode(&mut self, name: &str, value: &[u8]) { + self.insert(name.as_bytes().to_owned(), value.to_owned()); + } +} + +trait DecodeProperties { + fn decode(&self, k: &str) -> codec::Result<&[u8]>; + + #[inline] + fn decode_u64(&self, k: &str) -> codec::Result { + let mut buf = self.decode(k)?; + buf.read_u64() + } + + #[inline] + fn decode_indexes(&self, k: &str) -> codec::Result { + let buf = self.decode(k)?; + PropIndexes::decode(buf) + } +} + +impl DecodeProperties for UserCollectedProperties { + #[inline] + fn decode(&self, k: &str) -> codec::Result<&[u8]> { + self.get(k.as_bytes()) + .ok_or_else(|| codec::ErrorInner::KeyNotFound.into()) + } +} + +impl DecodeProperties for HashMap, Vec> { + #[inline] + fn decode(&self, k: &str) -> codec::Result<&[u8]> { + match self.get(k.as_bytes()) { + Some(v) => Ok(v.as_slice()), + None => Err(codec::ErrorInner::KeyNotFound.into()), + } + } +} diff --git a/components/engine_tirocks/src/properties/mvcc.rs b/components/engine_tirocks/src/properties/mvcc.rs new file mode 100644 index 00000000000..1ca170f33d5 --- /dev/null +++ b/components/engine_tirocks/src/properties/mvcc.rs @@ -0,0 +1,364 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{cmp, ffi::CStr}; + +use api_version::{ApiV2, KeyMode, KvFormat}; +use engine_traits::{raw_ttl::ttl_current_ts, MvccProperties}; +use tirocks::properties::table::user::{ + Context, EntryType, SequenceNumber, TablePropertiesCollector, TablePropertiesCollectorFactory, + UserCollectedProperties, +}; +use txn_types::{Key, TimeStamp, Write, WriteType}; + +use super::{DecodeProperties, EncodeProperties, PropIndex, PropIndexes}; +use crate::RocksEngine; + +pub const PROP_NUM_ERRORS: &str = "tikv.num_errors"; +pub const PROP_MIN_TS: &str = "tikv.min_ts"; +pub const PROP_MAX_TS: &str = "tikv.max_ts"; +pub const PROP_NUM_ROWS: &str = "tikv.num_rows"; +pub const PROP_NUM_PUTS: &str = "tikv.num_puts"; +pub const PROP_NUM_DELETES: &str = "tikv.num_deletes"; +pub const PROP_NUM_VERSIONS: &str = "tikv.num_versions"; +pub const PROP_MAX_ROW_VERSIONS: &str = "tikv.max_row_versions"; +pub const PROP_ROWS_INDEX: &str = "tikv.rows_index"; +pub const PROP_ROWS_INDEX_DISTANCE: u64 = 10000; + +/// Can be used for write CF in TiDB & TxnKV scenario, or be used for default CF +/// in RawKV scenario. +pub struct MvccPropertiesCollector { + name: &'static CStr, + props: MvccProperties, + last_row: Vec, + num_errors: u64, + row_versions: u64, + cur_prop_index: PropIndex, + row_prop_indexes: PropIndexes, + key_mode: KeyMode, // Use KeyMode::Txn for both TiDB & TxnKV, KeyMode::Raw for RawKV. + current_ts: u64, +} + +impl MvccPropertiesCollector { + fn new(name: &'static CStr, key_mode: KeyMode) -> MvccPropertiesCollector { + MvccPropertiesCollector { + name, + props: MvccProperties::new(), + last_row: Vec::new(), + num_errors: 0, + row_versions: 0, + cur_prop_index: PropIndex::default(), + row_prop_indexes: PropIndexes::new(), + key_mode, + current_ts: ttl_current_ts(), + } + } + + fn finish(&mut self, properties: &mut impl EncodeProperties) { + // Insert last handle. + if self.cur_prop_index.prop > 0 { + self.row_prop_indexes + .insert(self.last_row.clone(), self.cur_prop_index.clone()); + } + encode_mvcc(&self.props, properties); + properties.encode_u64(PROP_NUM_ERRORS, self.num_errors); + properties.encode_indexes(PROP_ROWS_INDEX, &self.row_prop_indexes); + } +} + +impl TablePropertiesCollector for MvccPropertiesCollector { + fn name(&self) -> &CStr { + self.name + } + + fn add( + &mut self, + key: &[u8], + value: &[u8], + entry_type: EntryType, + _: SequenceNumber, + _: u64, + ) -> tirocks::Result<()> { + // TsFilter filters sst based on max_ts and min_ts during iterating. + // To prevent seeing outdated (GC) records, we should consider + // RocksDB delete entry type. + if entry_type != EntryType::kEntryPut && entry_type != EntryType::kEntryDelete { + return Ok(()); + } + + if !keys::validate_data_key(key) { + self.num_errors += 1; + return Ok(()); + } + + let (k, ts) = match Key::split_on_ts_for(key) { + Ok((k, ts)) => (k, ts), + Err(_) => { + self.num_errors += 1; + return Ok(()); + } + }; + + self.props.min_ts = cmp::min(self.props.min_ts, ts); + self.props.max_ts = cmp::max(self.props.max_ts, ts); + if entry_type == EntryType::kEntryDelete { + // Empty value for delete entry type, skip following properties. + return Ok(()); + } + + self.props.num_versions += 1; + + if k != self.last_row.as_slice() { + self.props.num_rows += 1; + self.row_versions = 1; + self.last_row.clear(); + self.last_row.extend(k); + } else { + self.row_versions += 1; + } + if self.row_versions > self.props.max_row_versions { + self.props.max_row_versions = self.row_versions; + } + + if self.key_mode == KeyMode::Raw { + let decode_raw_value = ApiV2::decode_raw_value(value); + match decode_raw_value { + Ok(raw_value) => { + if raw_value.is_valid(self.current_ts) { + self.props.num_puts += 1; + } else { + self.props.num_deletes += 1; + } + } + Err(_) => { + self.num_errors += 1; + } + } + } else { + let write_type = match Write::parse_type(value) { + Ok(v) => v, + Err(_) => { + self.num_errors += 1; + return Ok(()); + } + }; + + match write_type { + WriteType::Put => self.props.num_puts += 1, + WriteType::Delete => self.props.num_deletes += 1, + _ => {} + } + } + + // Add new row. + if self.row_versions == 1 { + self.cur_prop_index.prop += 1; + self.cur_prop_index.offset += 1; + if self.cur_prop_index.offset == 1 + || self.cur_prop_index.prop >= PROP_ROWS_INDEX_DISTANCE + { + self.row_prop_indexes + .insert(self.last_row.clone(), self.cur_prop_index.clone()); + self.cur_prop_index.prop = 0; + } + } + Ok(()) + } + + fn finish(&mut self, properties: &mut UserCollectedProperties) -> tirocks::Result<()> { + self.finish(properties); + Ok(()) + } +} + +/// Can be used for write CF of TiDB/TxnKV, default CF of RawKV. +pub struct MvccPropertiesCollectorFactory { + name: &'static CStr, + key_mode: KeyMode, +} + +impl Default for MvccPropertiesCollectorFactory { + fn default() -> Self { + Self { + name: CStr::from_bytes_with_nul(b"tikv.mvcc-properties-collector\0").unwrap(), + key_mode: KeyMode::Txn, + } + } +} + +impl MvccPropertiesCollectorFactory { + pub fn rawkv() -> Self { + Self { + name: CStr::from_bytes_with_nul(b"tikv.rawkv-mvcc-properties-collector\0").unwrap(), + key_mode: KeyMode::Raw, + } + } +} + +impl TablePropertiesCollectorFactory for MvccPropertiesCollectorFactory { + type Collector = MvccPropertiesCollector; + + fn name(&self) -> &CStr { + self.name + } + + fn create_table_properties_collector(&self, _: Context) -> Self::Collector { + MvccPropertiesCollector::new(self.name, self.key_mode) + } +} + +fn encode_mvcc(mvcc_props: &MvccProperties, props: &mut impl EncodeProperties) { + props.encode_u64(PROP_MIN_TS, mvcc_props.min_ts.into_inner()); + props.encode_u64(PROP_MAX_TS, mvcc_props.max_ts.into_inner()); + props.encode_u64(PROP_NUM_ROWS, mvcc_props.num_rows); + props.encode_u64(PROP_NUM_PUTS, mvcc_props.num_puts); + props.encode_u64(PROP_NUM_DELETES, mvcc_props.num_deletes); + props.encode_u64(PROP_NUM_VERSIONS, mvcc_props.num_versions); + props.encode_u64(PROP_MAX_ROW_VERSIONS, mvcc_props.max_row_versions); +} + +pub(super) fn decode_mvcc(props: &impl DecodeProperties) -> codec::Result { + let mut res = MvccProperties::new(); + res.min_ts = props.decode_u64(PROP_MIN_TS)?.into(); + res.max_ts = props.decode_u64(PROP_MAX_TS)?.into(); + res.num_rows = props.decode_u64(PROP_NUM_ROWS)?; + res.num_puts = props.decode_u64(PROP_NUM_PUTS)?; + res.num_versions = props.decode_u64(PROP_NUM_VERSIONS)?; + // To be compatible with old versions. + res.num_deletes = props + .decode_u64(PROP_NUM_DELETES) + .unwrap_or(res.num_versions - res.num_puts); + res.max_row_versions = props.decode_u64(PROP_MAX_ROW_VERSIONS)?; + Ok(res) +} + +impl engine_traits::MvccPropertiesExt for RocksEngine { + fn get_mvcc_properties_cf( + &self, + cf: &str, + safe_point: TimeStamp, + start_key: &[u8], + end_key: &[u8], + ) -> Option { + let collection = match self.range_properties(cf, start_key, end_key) { + Ok(c) if !c.is_empty() => c, + _ => return None, + }; + let mut props = MvccProperties::new(); + for (_, v) in &*collection { + let mvcc = match decode_mvcc(v.user_collected_properties()) { + Ok(m) => m, + Err(_) => return None, + }; + // Filter out properties after safe_point. + if mvcc.min_ts > safe_point { + continue; + } + props.add(&mvcc); + } + Some(props) + } +} + +#[cfg(test)] +mod tests { + use api_version::RawValue; + use collections::HashMap; + use test::Bencher; + use txn_types::{Key, Write, WriteType}; + + use super::*; + + #[test] + fn test_mvcc_properties() { + let cases = [ + ("ab", 2, WriteType::Put, EntryType::kEntryPut), + ("ab", 1, WriteType::Delete, EntryType::kEntryPut), + ("ab", 1, WriteType::Delete, EntryType::kEntryDelete), + ("cd", 5, WriteType::Delete, EntryType::kEntryPut), + ("cd", 4, WriteType::Put, EntryType::kEntryPut), + ("cd", 3, WriteType::Put, EntryType::kEntryPut), + ("ef", 6, WriteType::Put, EntryType::kEntryPut), + ("ef", 6, WriteType::Put, EntryType::kEntryDelete), + ("gh", 7, WriteType::Delete, EntryType::kEntryPut), + ]; + let mut collector = + MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Txn); + for &(key, ts, write_type, entry_type) in &cases { + let ts = ts.into(); + let k = Key::from_raw(key.as_bytes()).append_ts(ts); + let k = keys::data_key(k.as_encoded()); + let v = Write::new(write_type, ts, None).as_ref().to_bytes(); + collector.add(&k, &v, entry_type, 0, 0).unwrap(); + } + let mut result = HashMap::default(); + collector.finish(&mut result); + + let props = decode_mvcc(&result).unwrap(); + assert_eq!(props.min_ts, 1.into()); + assert_eq!(props.max_ts, 7.into()); + assert_eq!(props.num_rows, 4); + assert_eq!(props.num_puts, 4); + assert_eq!(props.num_versions, 7); + assert_eq!(props.max_row_versions, 3); + } + + #[test] + fn test_mvcc_properties_rawkv_mode() { + let test_raws = vec![ + (b"r\0a", 1, false, u64::MAX), + (b"r\0a", 5, false, u64::MAX), + (b"r\0a", 7, false, u64::MAX), + (b"r\0b", 1, false, u64::MAX), + (b"r\0b", 1, true, u64::MAX), + (b"r\0c", 1, true, 10), + (b"r\0d", 1, true, 10), + ]; + + let mut collector = + MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Raw); + for &(key, ts, is_delete, expire_ts) in &test_raws { + let encode_key = ApiV2::encode_raw_key(key, Some(ts.into())); + let k = keys::data_key(encode_key.as_encoded()); + let v = ApiV2::encode_raw_value(RawValue { + user_value: &[0; 10][..], + expire_ts: Some(expire_ts), + is_delete, + }); + collector.add(&k, &v, EntryType::kEntryPut, 0, 0).unwrap(); + } + + let mut result = HashMap::default(); + collector.finish(&mut result); + + let props = decode_mvcc(&result).unwrap(); + assert_eq!(props.min_ts, 1.into()); + assert_eq!(props.max_ts, 7.into()); + assert_eq!(props.num_rows, 4); + assert_eq!(props.num_deletes, 3); + assert_eq!(props.num_puts, 4); + assert_eq!(props.num_versions, 7); + assert_eq!(props.max_row_versions, 3); + } + + #[bench] + fn bench_mvcc_properties(b: &mut Bencher) { + let ts = 1.into(); + let num_entries = 100; + let mut entries = Vec::new(); + for i in 0..num_entries { + let s = format!("{:032}", i); + let k = Key::from_raw(s.as_bytes()).append_ts(ts); + let k = keys::data_key(k.as_encoded()); + let w = Write::new(WriteType::Put, ts, Some(s.as_bytes().to_owned())); + entries.push((k, w.as_ref().to_bytes())); + } + + let mut collector = + MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Txn); + b.iter(|| { + for &(ref k, ref v) in &entries { + collector.add(k, v, EntryType::kEntryPut, 0, 0).unwrap(); + } + }); + } +} diff --git a/components/engine_tirocks/src/properties/range.rs b/components/engine_tirocks/src/properties/range.rs new file mode 100644 index 00000000000..59b9e68a6bb --- /dev/null +++ b/components/engine_tirocks/src/properties/range.rs @@ -0,0 +1,803 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ffi::CStr, io::Read, path::Path}; + +use codec::prelude::{NumberDecoder, NumberEncoder}; +use engine_traits::{MvccProperties, Range, Result, CF_DEFAULT, CF_LOCK, CF_WRITE, LARGE_CFS}; +use tikv_util::{box_err, box_try, debug, info}; +use tirocks::{ + properties::table::user::{ + Context, EntryType, SequenceNumber, TablePropertiesCollector, + TablePropertiesCollectorFactory, UserCollectedProperties, + }, + titan::TitanBlobIndex, +}; + +use super::{mvcc::decode_mvcc, DecodeProperties, EncodeProperties, PropIndexes}; +use crate::RocksEngine; + +const PROP_TOTAL_SIZE: &str = "tikv.total_size"; +const PROP_SIZE_INDEX: &str = "tikv.size_index"; +const PROP_RANGE_INDEX: &str = "tikv.range_index"; +pub const DEFAULT_PROP_SIZE_INDEX_DISTANCE: u64 = 4 * 1024 * 1024; +pub const DEFAULT_PROP_KEYS_INDEX_DISTANCE: u64 = 40 * 1024; + +// Deprecated. Only for compatible issue from v2.0 or older version. +#[derive(Debug, Default)] +pub struct SizeProperties { + pub total_size: u64, + pub prop_indexes: PropIndexes, +} + +impl SizeProperties { + fn decode(props: &impl DecodeProperties) -> codec::Result { + Ok(SizeProperties { + total_size: props.decode_u64(PROP_TOTAL_SIZE)?, + prop_indexes: props.decode_indexes(PROP_SIZE_INDEX)?, + }) + } +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct RangeOffsets { + pub size: u64, + pub keys: u64, +} + +#[derive(Debug, Default)] +pub struct RangeProperties { + pub offsets: Vec<(Vec, RangeOffsets)>, +} + +impl RangeProperties { + pub fn get(&self, key: &[u8]) -> &RangeOffsets { + let idx = self + .offsets + .binary_search_by_key(&key, |&(ref k, _)| k) + .unwrap(); + &self.offsets[idx].1 + } + + fn encode(&self, props: &mut impl EncodeProperties) { + let mut buf = Vec::with_capacity(1024); + for (k, offsets) in &self.offsets { + buf.write_u64(k.len() as u64).unwrap(); + buf.extend(k); + buf.write_u64(offsets.size).unwrap(); + buf.write_u64(offsets.keys).unwrap(); + } + props.encode(PROP_RANGE_INDEX, &buf); + } + + pub(super) fn decode(props: &impl DecodeProperties) -> codec::Result { + match RangeProperties::decode_from_range_properties(props) { + Ok(res) => return Ok(res), + Err(e) => info!( + "decode to RangeProperties failed with err: {:?}, try to decode to SizeProperties, maybe upgrade from v2.0 or older version?", + e + ), + } + SizeProperties::decode(props).map(|res| res.into()) + } + + fn decode_from_range_properties( + props: &impl DecodeProperties, + ) -> codec::Result { + let mut res = RangeProperties::default(); + let mut buf = props.decode(PROP_RANGE_INDEX)?; + while !buf.is_empty() { + let klen = buf.read_u64()?; + let mut k = vec![0; klen as usize]; + buf.read_exact(&mut k)?; + let offsets = RangeOffsets { + size: buf.read_u64()?, + keys: buf.read_u64()?, + }; + res.offsets.push((k, offsets)); + } + Ok(res) + } + + pub fn get_approximate_size_in_range(&self, start: &[u8], end: &[u8]) -> u64 { + self.get_approximate_distance_in_range(start, end).0 + } + + pub fn get_approximate_keys_in_range(&self, start: &[u8], end: &[u8]) -> u64 { + self.get_approximate_distance_in_range(start, end).1 + } + + /// Returns `size` and `keys`. + pub fn get_approximate_distance_in_range(&self, start: &[u8], end: &[u8]) -> (u64, u64) { + assert!(start <= end); + if start == end { + return (0, 0); + } + let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { + Ok(idx) => Some(idx), + Err(next_idx) => next_idx.checked_sub(1), + }; + let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { + Ok(idx) => Some(idx), + Err(next_idx) => next_idx.checked_sub(1), + }; + let start = start_offset.map_or_else(|| Default::default(), |x| self.offsets[x].1); + let end = end_offset.map_or_else(|| Default::default(), |x| self.offsets[x].1); + assert!(end.size >= start.size && end.keys >= start.keys); + (end.size - start.size, end.keys - start.keys) + } + + // equivalent to range(Excluded(start_key), Excluded(end_key)) + pub fn take_excluded_range( + mut self, + start_key: &[u8], + end_key: &[u8], + ) -> Vec<(Vec, RangeOffsets)> { + let start_offset = match self + .offsets + .binary_search_by_key(&start_key, |&(ref k, _)| k) + { + Ok(idx) => { + if idx == self.offsets.len() - 1 { + return vec![]; + } else { + idx + 1 + } + } + Err(next_idx) => next_idx, + }; + + let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { + Ok(idx) => { + if idx == 0 { + return vec![]; + } else { + idx - 1 + } + } + Err(next_idx) => { + if next_idx == 0 { + return vec![]; + } else { + next_idx - 1 + } + } + }; + + if start_offset > end_offset { + return vec![]; + } + + self.offsets.drain(start_offset..=end_offset).collect() + } + + pub fn smallest_key(&self) -> Option> { + self.offsets.first().map(|(k, _)| k.to_owned()) + } + + pub fn largest_key(&self) -> Option> { + self.offsets.last().map(|(k, _)| k.to_owned()) + } +} + +impl From for RangeProperties { + fn from(p: SizeProperties) -> RangeProperties { + let mut res = RangeProperties::default(); + for (key, size_index) in p.prop_indexes.into_map() { + let range = RangeOffsets { + // For SizeProperties, the offset is accumulation of the size. + size: size_index.offset, + ..Default::default() + }; + res.offsets.push((key, range)); + } + res + } +} + +fn range_properties_collector_name() -> &'static CStr { + CStr::from_bytes_with_nul(b"tikv.range-properties-collector\0").unwrap() +} + +pub struct RangePropertiesCollector { + props: RangeProperties, + last_offsets: RangeOffsets, + last_key: Vec, + cur_offsets: RangeOffsets, + prop_size_index_distance: u64, + prop_keys_index_distance: u64, +} + +impl Default for RangePropertiesCollector { + fn default() -> Self { + RangePropertiesCollector { + props: RangeProperties::default(), + last_offsets: RangeOffsets::default(), + last_key: vec![], + cur_offsets: RangeOffsets::default(), + prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, + prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, + } + } +} + +impl RangePropertiesCollector { + pub fn new(prop_size_index_distance: u64, prop_keys_index_distance: u64) -> Self { + RangePropertiesCollector { + prop_size_index_distance, + prop_keys_index_distance, + ..Default::default() + } + } + + #[inline] + fn size_in_last_range(&self) -> u64 { + self.cur_offsets.size - self.last_offsets.size + } + + #[inline] + fn keys_in_last_range(&self) -> u64 { + self.cur_offsets.keys - self.last_offsets.keys + } + + #[inline] + fn insert_new_point(&mut self, key: Vec) { + self.last_offsets = self.cur_offsets; + self.props.offsets.push((key, self.cur_offsets)); + } + + #[inline] + fn finish(&mut self, props: &mut impl EncodeProperties) { + if self.size_in_last_range() > 0 || self.keys_in_last_range() > 0 { + let key = self.last_key.clone(); + self.insert_new_point(key); + } + self.props.encode(props); + } +} + +impl TablePropertiesCollector for RangePropertiesCollector { + #[inline] + fn name(&self) -> &CStr { + range_properties_collector_name() + } + + #[inline] + fn add( + &mut self, + key: &[u8], + value: &[u8], + entry_type: EntryType, + _: SequenceNumber, + _: u64, + ) -> tirocks::Result<()> { + // size + let entry_size = match entry_type { + EntryType::kEntryPut => value.len() as u64, + EntryType::kEntryBlobIndex => match TitanBlobIndex::decode(value) { + Ok(index) => index.blob_size + value.len() as u64, + // Perhaps should panic? + Err(_) => return Ok(()), + }, + _ => return Ok(()), + }; + self.cur_offsets.size += entry_size + key.len() as u64; + // keys + self.cur_offsets.keys += 1; + // Add the start key for convenience. + if self.last_key.is_empty() + || self.size_in_last_range() >= self.prop_size_index_distance + || self.keys_in_last_range() >= self.prop_keys_index_distance + { + self.insert_new_point(key.to_owned()); + } + self.last_key.clear(); + self.last_key.extend_from_slice(key); + Ok(()) + } + + #[inline] + fn finish(&mut self, prop: &mut UserCollectedProperties) -> tirocks::Result<()> { + self.finish(prop); + Ok(()) + } +} + +pub struct RangePropertiesCollectorFactory { + pub prop_size_index_distance: u64, + pub prop_keys_index_distance: u64, +} + +impl Default for RangePropertiesCollectorFactory { + #[inline] + fn default() -> Self { + RangePropertiesCollectorFactory { + prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, + prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, + } + } +} + +impl TablePropertiesCollectorFactory for RangePropertiesCollectorFactory { + type Collector = RangePropertiesCollector; + + #[inline] + fn name(&self) -> &CStr { + range_properties_collector_name() + } + + #[inline] + fn create_table_properties_collector(&self, _: Context) -> RangePropertiesCollector { + RangePropertiesCollector::new(self.prop_size_index_distance, self.prop_keys_index_distance) + } +} + +fn get_range_entries_and_versions( + engine: &crate::RocksEngine, + cf: &str, + start: &[u8], + end: &[u8], +) -> Option<(u64, u64)> { + let collection = match engine.properties_of_tables_in_range(cf, &[(start, end)]) { + Ok(v) => v, + Err(_) => return None, + }; + + if collection.is_empty() { + return None; + } + + // Aggregate total MVCC properties and total number entries. + let mut props = MvccProperties::new(); + let mut num_entries = 0; + for (_, v) in &*collection { + let mvcc = match decode_mvcc(v.user_collected_properties()) { + Ok(v) => v, + Err(_) => return None, + }; + num_entries += v.num_entries(); + props.add(&mvcc); + } + + Some((num_entries, props.num_versions)) +} + +impl engine_traits::RangePropertiesExt for RocksEngine { + fn get_range_approximate_keys(&self, range: Range<'_>, large_threshold: u64) -> Result { + // try to get from RangeProperties first. + match self.get_range_approximate_keys_cf(CF_WRITE, range, large_threshold) { + Ok(v) => { + return Ok(v); + } + Err(e) => debug!( + "failed to get keys from RangeProperties"; + "err" => ?e, + ), + } + + let start = &range.start_key; + let end = &range.end_key; + let (_, keys) = + get_range_entries_and_versions(self, CF_WRITE, start, end).unwrap_or_default(); + Ok(keys) + } + + fn get_range_approximate_keys_cf( + &self, + cfname: &str, + range: Range<'_>, + large_threshold: u64, + ) -> Result { + let start_key = &range.start_key; + let end_key = &range.end_key; + let mut total_keys = 0; + let (mem_keys, _) = + self.approximate_memtable_stats(cfname, range.start_key, range.end_key)?; + total_keys += mem_keys; + + let collection = box_try!(self.range_properties(cfname, start_key, end_key)); + for (_, v) in &*collection { + let props = box_try!(RangeProperties::decode(v.user_collected_properties())); + total_keys += props.get_approximate_keys_in_range(start_key, end_key); + } + + if large_threshold != 0 && total_keys > large_threshold { + let ssts = collection + .into_iter() + .map(|(k, v)| { + let props = RangeProperties::decode(v.user_collected_properties()).unwrap(); + let keys = props.get_approximate_keys_in_range(start_key, end_key); + let p = std::str::from_utf8(k).unwrap(); + format!( + "{}:{}", + Path::new(p) + .file_name() + .map(|f| f.to_str().unwrap()) + .unwrap_or(p), + keys + ) + }) + .collect::>() + .join(", "); + info!( + "range contains too many keys"; + "start" => log_wrappers::Value::key(range.start_key), + "end" => log_wrappers::Value::key(range.end_key), + "total_keys" => total_keys, + "memtable" => mem_keys, + "ssts_keys" => ssts, + "cf" => cfname, + ) + } + Ok(total_keys) + } + + fn get_range_approximate_size(&self, range: Range<'_>, large_threshold: u64) -> Result { + let mut size = 0; + for cf in LARGE_CFS { + size += self + .get_range_approximate_size_cf(cf, range, large_threshold) + // CF_LOCK doesn't have RangeProperties until v4.0, so we swallow the error for + // backward compatibility. + .or_else(|e| if cf == &CF_LOCK { Ok(0) } else { Err(e) })?; + } + Ok(size) + } + + fn get_range_approximate_size_cf( + &self, + cf: &str, + range: Range<'_>, + large_threshold: u64, + ) -> Result { + let start_key = &range.start_key; + let end_key = &range.end_key; + let mut total_size = 0; + let (_, mem_size) = self.approximate_memtable_stats(cf, range.start_key, range.end_key)?; + total_size += mem_size; + + let collection = box_try!(self.range_properties(cf, start_key, end_key)); + for (_, v) in &*collection { + let props = box_try!(RangeProperties::decode(v.user_collected_properties())); + total_size += props.get_approximate_size_in_range(start_key, end_key); + } + + if large_threshold != 0 && total_size > large_threshold { + let ssts = collection + .into_iter() + .map(|(k, v)| { + let props = RangeProperties::decode(v.user_collected_properties()).unwrap(); + let size = props.get_approximate_size_in_range(start_key, end_key); + let p = std::str::from_utf8(k).unwrap(); + format!( + "{}:{}", + Path::new(p) + .file_name() + .map(|f| f.to_str().unwrap()) + .unwrap_or(p), + size + ) + }) + .collect::>() + .join(", "); + info!( + "range size is too large"; + "start" => log_wrappers::Value::key(range.start_key), + "end" => log_wrappers::Value::key(range.end_key), + "total_size" => total_size, + "memtable" => mem_size, + "ssts_size" => ssts, + "cf" => cf, + ) + } + Ok(total_size) + } + + fn get_range_approximate_split_keys( + &self, + range: Range<'_>, + key_count: usize, + ) -> Result>> { + let get_cf_size = |cf: &str| self.get_range_approximate_size_cf(cf, range, 0); + let cfs = [ + (CF_DEFAULT, box_try!(get_cf_size(CF_DEFAULT))), + (CF_WRITE, box_try!(get_cf_size(CF_WRITE))), + // CF_LOCK doesn't have RangeProperties until v4.0, so we swallow the error for + // backward compatibility. + (CF_LOCK, get_cf_size(CF_LOCK).unwrap_or(0)), + ]; + + let total_size: u64 = cfs.iter().map(|(_, s)| s).sum(); + if total_size == 0 { + return Err(box_err!("all CFs are empty")); + } + + let (cf, _) = cfs.iter().max_by_key(|(_, s)| s).unwrap(); + + self.get_range_approximate_split_keys_cf(cf, range, key_count) + } + + fn get_range_approximate_split_keys_cf( + &self, + cfname: &str, + range: Range<'_>, + key_count: usize, + ) -> Result>> { + let start_key = &range.start_key; + let end_key = &range.end_key; + let collection = box_try!(self.range_properties(cfname, start_key, end_key)); + + let mut keys = vec![]; + for (_, v) in &*collection { + let props = box_try!(RangeProperties::decode(v.user_collected_properties())); + keys.extend( + props + .take_excluded_range(start_key, end_key) + .into_iter() + .map(|(k, _)| k), + ); + } + + if keys.is_empty() { + return Ok(vec![]); + } + + const SAMPLING_THRESHOLD: usize = 20000; + const SAMPLE_RATIO: usize = 1000; + // If there are too many keys, reduce its amount before sorting, or it may take + // too much time to sort the keys. + if keys.len() > SAMPLING_THRESHOLD { + let len = keys.len(); + keys = keys.into_iter().step_by(len / SAMPLE_RATIO).collect(); + } + keys.sort(); + + // If the keys are too few, return them directly. + if keys.len() <= key_count { + return Ok(keys); + } + + // Find `key_count` keys which divides the whole range into `parts` parts + // evenly. + let mut res = Vec::with_capacity(key_count); + let section_len = (keys.len() as f64) / ((key_count + 1) as f64); + for i in 1..=key_count { + res.push(keys[(section_len * (i as f64)) as usize].clone()) + } + res.dedup(); + Ok(res) + } +} + +#[cfg(test)] +mod tests { + use collections::HashMap; + use engine_traits::{SyncMutable, CF_WRITE, LARGE_CFS}; + use rand::Rng; + use tempfile::Builder; + use tirocks::properties::table::user::SysTablePropertiesCollectorFactory; + use txn_types::Key; + + use super::*; + use crate::{ + cf_options::RocksCfOptions, db_options::RocksDbOptions, + properties::mvcc::MvccPropertiesCollectorFactory, + }; + + #[allow(clippy::many_single_char_names)] + #[test] + fn test_range_properties() { + let cases = [ + ("a", 0, 1), + // handle "a": size(size = 1, offset = 1),keys(1,1) + ("b", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8, 1), + ("c", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4, 1), + ("d", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), + ("e", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8, 1), + // handle "e": size(size = DISTANCE + 4, offset = DISTANCE + 5),keys(4,5) + ("f", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4, 1), + ("g", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), + ("h", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8, 1), + ("i", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4, 1), + // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + + // 9),keys(4,5) + ("j", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), + ("k", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2, 1), + // handle "k": size(size = DISTANCE + 2, offset = DISTANCE / 8 * 25 + 11),keys(2,11) + ("l", 0, DEFAULT_PROP_KEYS_INDEX_DISTANCE / 2), + ("m", 0, DEFAULT_PROP_KEYS_INDEX_DISTANCE / 2), + // handle "m": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE,offset = + // 11+DEFAULT_PROP_KEYS_INDEX_DISTANCE + ("n", 1, DEFAULT_PROP_KEYS_INDEX_DISTANCE), + // handle "n": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE, offset = + // 11+2*DEFAULT_PROP_KEYS_INDEX_DISTANCE + ("o", 1, 1), + // handle "o": keys = 1, offset = 12 + 2*DEFAULT_PROP_KEYS_INDEX_DISTANCE + ]; + + let mut collector = RangePropertiesCollector::default(); + for &(k, vlen, count) in &cases { + let v = vec![0; vlen as usize]; + for _ in 0..count { + collector + .add(k.as_bytes(), &v, EntryType::kEntryPut, 0, 0) + .unwrap(); + } + } + for &(k, vlen, _) in &cases { + let v = vec![0; vlen as usize]; + collector + .add(k.as_bytes(), &v, EntryType::kEntryOther, 0, 0) + .unwrap(); + } + let mut result = HashMap::default(); + collector.finish(&mut result); + + let props = RangeProperties::decode(&result).unwrap(); + assert_eq!(props.smallest_key().unwrap(), cases[0].0.as_bytes()); + assert_eq!( + props.largest_key().unwrap(), + cases[cases.len() - 1].0.as_bytes() + ); + assert_eq!( + props.get_approximate_size_in_range(b"", b"k"), + DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8 * 25 + 11 + ); + assert_eq!(props.get_approximate_keys_in_range(b"", b"k"), 11_u64); + + assert_eq!(props.offsets.len(), 7); + let a = props.get(b"a"); + assert_eq!(a.size, 1); + let e = props.get(b"e"); + assert_eq!(e.size, DEFAULT_PROP_SIZE_INDEX_DISTANCE + 5); + let i = props.get(b"i"); + assert_eq!(i.size, DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8 * 17 + 9); + let k = props.get(b"k"); + assert_eq!(k.size, DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8 * 25 + 11); + let m = props.get(b"m"); + assert_eq!(m.keys, 11 + DEFAULT_PROP_KEYS_INDEX_DISTANCE); + let n = props.get(b"n"); + assert_eq!(n.keys, 11 + 2 * DEFAULT_PROP_KEYS_INDEX_DISTANCE); + let o = props.get(b"o"); + assert_eq!(o.keys, 12 + 2 * DEFAULT_PROP_KEYS_INDEX_DISTANCE); + let empty = RangeOffsets::default(); + let cases = [ + (" ", "k", k, &empty, 3), + (" ", " ", &empty, &empty, 0), + ("k", "k", k, k, 0), + ("a", "k", k, a, 2), + ("a", "i", i, a, 1), + ("e", "h", e, e, 0), + ("b", "h", e, a, 1), + ("g", "g", i, i, 0), + ]; + for &(start, end, end_idx, start_idx, count) in &cases { + let props = RangeProperties::decode(&result).unwrap(); + let size = end_idx.size - start_idx.size; + assert_eq!( + props.get_approximate_size_in_range(start.as_bytes(), end.as_bytes()), + size + ); + let keys = end_idx.keys - start_idx.keys; + assert_eq!( + props.get_approximate_keys_in_range(start.as_bytes(), end.as_bytes()), + keys + ); + assert_eq!( + props + .take_excluded_range(start.as_bytes(), end.as_bytes()) + .len(), + count + ); + } + } + + #[test] + fn test_range_properties_with_blob_index() { + let cases = [ + ("a", 0), + // handle "a": size(size = 1, offset = 1),keys(1,1) + ("b", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8), + ("c", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4), + ("d", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), + ("e", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8), + // handle "e": size(size = DISTANCE + 4, offset = DISTANCE + 5),keys(4,5) + ("f", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4), + ("g", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), + ("h", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8), + ("i", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 4), + // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + + // 9),keys(4,5) + ("j", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), + ("k", DEFAULT_PROP_SIZE_INDEX_DISTANCE / 2), + // handle "k": size(size = DISTANCE + 2, offset = DISTANCE / 8 * 25 + 11),keys(2,11) + ]; + + let handles = ["a", "e", "i", "k"]; + + let mut rng = rand::thread_rng(); + let mut collector = RangePropertiesCollector::default(); + let mut extra_value_size: u64 = 0; + for &(k, vlen) in &cases { + if handles.contains(&k) || rng.gen_range(0..2) == 0 { + let v = vec![0; vlen as usize - extra_value_size as usize]; + extra_value_size = 0; + collector + .add(k.as_bytes(), &v, EntryType::kEntryPut, 0, 0) + .unwrap(); + } else { + let blob_index = TitanBlobIndex::new(0, vlen - extra_value_size, 0); + let v = blob_index.encode(); + extra_value_size = v.len() as u64; + collector + .add(k.as_bytes(), &v, EntryType::kEntryBlobIndex, 0, 0) + .unwrap(); + } + } + let mut result = HashMap::default(); + collector.finish(&mut result); + + let props = RangeProperties::decode(&result).unwrap(); + assert_eq!(props.smallest_key().unwrap(), cases[0].0.as_bytes()); + assert_eq!( + props.largest_key().unwrap(), + cases[cases.len() - 1].0.as_bytes() + ); + assert_eq!( + props.get_approximate_size_in_range(b"e", b"i"), + DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8 * 9 + 4 + ); + assert_eq!( + props.get_approximate_size_in_range(b"", b"k"), + DEFAULT_PROP_SIZE_INDEX_DISTANCE / 8 * 25 + 11 + ); + } + + #[test] + fn test_get_range_entries_and_versions() { + let path = Builder::new() + .prefix("_test_get_range_entries_and_versions") + .tempdir() + .unwrap(); + let db_opts = RocksDbOptions::default(); + let cfs_opts = LARGE_CFS + .iter() + .map(|cf| { + let mut cf_opts = RocksCfOptions::default(); + cf_opts + .set_level0_file_num_compaction_trigger(10) + .add_table_properties_collector_factory( + &SysTablePropertiesCollectorFactory::new( + MvccPropertiesCollectorFactory::default(), + ), + ); + (*cf, cf_opts) + }) + .collect(); + let db = crate::util::new_engine_opt(path.path(), db_opts, cfs_opts).unwrap(); + + let cases = ["a", "b", "c"]; + for &key in &cases { + let k1 = keys::data_key( + Key::from_raw(key.as_bytes()) + .append_ts(2.into()) + .as_encoded(), + ); + db.put_cf(CF_WRITE, &k1, b"v1").unwrap(); + db.delete_cf(CF_WRITE, &k1).unwrap(); + let key = keys::data_key( + Key::from_raw(key.as_bytes()) + .append_ts(3.into()) + .as_encoded(), + ); + db.put_cf(CF_WRITE, &key, b"v2").unwrap(); + db.flush(CF_WRITE, true).unwrap(); + } + + let start_keys = keys::data_key(&[]); + let end_keys = keys::data_end_key(&[]); + let (entries, versions) = + get_range_entries_and_versions(&db, CF_WRITE, &start_keys, &end_keys).unwrap(); + assert_eq!(entries, (cases.len() * 2) as u64); + assert_eq!(versions, cases.len() as u64); + } +} diff --git a/components/engine_tirocks/src/properties/table.rs b/components/engine_tirocks/src/properties/table.rs new file mode 100644 index 00000000000..84998bbeb88 --- /dev/null +++ b/components/engine_tirocks/src/properties/table.rs @@ -0,0 +1,96 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::mem; + +use engine_traits::{Range, Result}; +use tirocks::properties::table::{ + builtin::OwnedTablePropertiesCollection, user::UserCollectedProperties, +}; + +use super::range::RangeProperties; +use crate::{r2e, RocksEngine}; + +#[repr(transparent)] +pub struct RocksUserCollectedProperties(UserCollectedProperties); + +impl RocksUserCollectedProperties { + #[inline] + fn from_rocks(v: &UserCollectedProperties) -> &Self { + unsafe { mem::transmute(v) } + } +} + +impl engine_traits::UserCollectedProperties for RocksUserCollectedProperties { + #[inline] + fn get(&self, index: &[u8]) -> Option<&[u8]> { + self.0.get(index) + } + + #[inline] + fn approximate_size_and_keys(&self, start: &[u8], end: &[u8]) -> Option<(usize, usize)> { + let rp = RangeProperties::decode(&self.0).ok()?; + let x = rp.get_approximate_distance_in_range(start, end); + Some((x.0 as usize, x.1 as usize)) + } +} + +#[repr(transparent)] +pub struct RocksTablePropertiesCollection(OwnedTablePropertiesCollection); + +impl engine_traits::TablePropertiesCollection for RocksTablePropertiesCollection { + type UserCollectedProperties = RocksUserCollectedProperties; + + #[inline] + fn iter_user_collected_properties(&self, mut f: F) + where + F: FnMut(&Self::UserCollectedProperties) -> bool, + { + for (_, props) in &*self.0 { + let props = props.user_collected_properties(); + if !f(RocksUserCollectedProperties::from_rocks(props)) { + break; + } + } + } +} + +impl engine_traits::TablePropertiesExt for RocksEngine { + type TablePropertiesCollection = RocksTablePropertiesCollection; + + fn table_properties_collection( + &self, + cf: &str, + ranges: &[Range<'_>], + ) -> Result { + // FIXME: extra allocation + let ranges: Vec<_> = ranges.iter().map(|r| (r.start_key, r.end_key)).collect(); + let collection = self.properties_of_tables_in_range(cf, &ranges)?; + Ok(RocksTablePropertiesCollection(collection)) + } +} + +impl RocksEngine { + #[inline] + pub(crate) fn properties_of_tables_in_range( + &self, + cf: &str, + ranges: &[(&[u8], &[u8])], + ) -> Result { + let handle = self.cf(cf)?; + let mut c = OwnedTablePropertiesCollection::default(); + self.as_inner() + .properties_of_tables_in_range(handle, ranges, &mut c) + .map_err(r2e)?; + Ok(c) + } + + #[inline] + pub fn range_properties( + &self, + cf: &str, + start_key: &[u8], + end_key: &[u8], + ) -> Result { + self.properties_of_tables_in_range(cf, &[(start_key, end_key)]) + } +} diff --git a/components/engine_tirocks/src/properties/ttl.rs b/components/engine_tirocks/src/properties/ttl.rs new file mode 100644 index 00000000000..c4190fe59bd --- /dev/null +++ b/components/engine_tirocks/src/properties/ttl.rs @@ -0,0 +1,225 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ffi::CStr, marker::PhantomData}; + +use api_version::{KeyMode, KvFormat, RawValue}; +use engine_traits::{Result, TtlProperties, TtlPropertiesExt}; +use tikv_util::error; +use tirocks::properties::table::user::{ + Context, EntryType, SequenceNumber, TablePropertiesCollector, TablePropertiesCollectorFactory, + UserCollectedProperties, +}; + +use super::{DecodeProperties, EncodeProperties}; +use crate::RocksEngine; + +const PROP_MAX_EXPIRE_TS: &str = "tikv.max_expire_ts"; +const PROP_MIN_EXPIRE_TS: &str = "tikv.min_expire_ts"; + +fn encode_ttl(ttl_props: &TtlProperties, props: &mut impl EncodeProperties) { + props.encode_u64(PROP_MAX_EXPIRE_TS, ttl_props.max_expire_ts); + props.encode_u64(PROP_MIN_EXPIRE_TS, ttl_props.min_expire_ts); +} + +pub(super) fn decode_ttl(props: &impl DecodeProperties) -> codec::Result { + let res = TtlProperties { + max_expire_ts: props.decode_u64(PROP_MAX_EXPIRE_TS)?, + min_expire_ts: props.decode_u64(PROP_MIN_EXPIRE_TS)?, + }; + Ok(res) +} + +impl TtlPropertiesExt for RocksEngine { + fn get_range_ttl_properties_cf( + &self, + cf: &str, + start_key: &[u8], + end_key: &[u8], + ) -> Result> { + let collection = self.properties_of_tables_in_range(cf, &[(start_key, end_key)])?; + if collection.is_empty() { + return Ok(vec![]); + } + + let mut res = Vec::new(); + for (file_name, v) in &*collection { + let prop = match decode_ttl(v.user_collected_properties()) { + Ok(v) => v, + Err(_) => continue, + }; + res.push((std::str::from_utf8(file_name).unwrap().to_string(), prop)); + } + Ok(res) + } +} + +/// Can only be used for default CF. +pub struct TtlPropertiesCollector { + prop: TtlProperties, + _phantom: PhantomData, +} + +impl TtlPropertiesCollector { + fn finish(&mut self, properties: &mut impl EncodeProperties) { + if self.prop.max_expire_ts == 0 && self.prop.min_expire_ts == 0 { + return; + } + encode_ttl(&self.prop, properties); + } +} + +impl TablePropertiesCollector for TtlPropertiesCollector { + fn name(&self) -> &CStr { + ttl_properties_collector_name() + } + + fn add( + &mut self, + key: &[u8], + value: &[u8], + entry_type: EntryType, + _: SequenceNumber, + _: u64, + ) -> tirocks::Result<()> { + if entry_type != EntryType::kEntryPut { + return Ok(()); + } + // Only consider data keys. + if !key.starts_with(keys::DATA_PREFIX_KEY) { + return Ok(()); + } + // Only consider raw keys. + if F::parse_key_mode(&key[keys::DATA_PREFIX_KEY.len()..]) != KeyMode::Raw { + return Ok(()); + } + + match F::decode_raw_value(value) { + Ok(RawValue { + expire_ts: Some(expire_ts), + .. + }) => { + self.prop.max_expire_ts = std::cmp::max(self.prop.max_expire_ts, expire_ts); + if self.prop.min_expire_ts == 0 { + self.prop.min_expire_ts = expire_ts; + } else { + self.prop.min_expire_ts = std::cmp::min(self.prop.min_expire_ts, expire_ts); + } + } + Err(err) => { + error!( + "failed to get expire ts"; + "key" => log_wrappers::Value::key(key), + "value" => log_wrappers::Value::value(value), + "err" => %err, + ); + } + _ => {} + } + Ok(()) + } + + fn finish(&mut self, properties: &mut UserCollectedProperties) -> tirocks::Result<()> { + self.finish(properties); + Ok(()) + } +} + +fn ttl_properties_collector_name() -> &'static CStr { + CStr::from_bytes_with_nul(b"tikv.ttl-properties-collector\0").unwrap() +} + +#[derive(Default)] +pub struct TtlPropertiesCollectorFactory { + _phantom: PhantomData, +} + +impl TablePropertiesCollectorFactory for TtlPropertiesCollectorFactory { + type Collector = TtlPropertiesCollector; + + fn name(&self) -> &CStr { + ttl_properties_collector_name() + } + + fn create_table_properties_collector(&self, _: Context) -> TtlPropertiesCollector { + TtlPropertiesCollector { + prop: Default::default(), + _phantom: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use api_version::test_kv_format_impl; + use collections::HashMap; + use kvproto::kvrpcpb::ApiVersion; + use tikv_util::time::UnixSecs; + + use super::*; + + #[test] + fn test_ttl_properties() { + test_kv_format_impl!(test_ttl_properties_impl); + } + + fn test_ttl_properties_impl() { + let get_properties = |case: &[(&'static str, u64)]| -> codec::Result { + let mut collector = TtlPropertiesCollector:: { + prop: Default::default(), + _phantom: PhantomData, + }; + for &(k, ts) in case { + let v = RawValue { + user_value: &[0; 10][..], + expire_ts: Some(ts), + is_delete: false, + }; + collector + .add( + k.as_bytes(), + &F::encode_raw_value(v), + EntryType::kEntryPut, + 0, + 0, + ) + .unwrap(); + } + for &(k, _) in case { + let v = vec![0; 10]; + collector + .add(k.as_bytes(), &v, EntryType::kEntryOther, 0, 0) + .unwrap(); + } + let mut result = HashMap::default(); + collector.finish(&mut result); + decode_ttl(&result) + }; + + let case1 = [ + ("zr\0a", 0), + ("zr\0b", UnixSecs::now().into_inner()), + ("zr\0c", 1), + ("zr\0d", u64::MAX), + ("zr\0e", 0), + ]; + let props = get_properties(&case1).unwrap(); + assert_eq!(props.max_expire_ts, u64::MAX); + match F::TAG { + ApiVersion::V1 => unreachable!(), + ApiVersion::V1ttl => assert_eq!(props.min_expire_ts, 1), + // expire_ts = 0 is no longer a special case in API V2 + ApiVersion::V2 => assert_eq!(props.min_expire_ts, 0), + } + + let case2 = [("zr\0a", 0)]; + get_properties(&case2).unwrap_err(); + + let case3 = []; + get_properties(&case3).unwrap_err(); + + let case4 = [("zr\0a", 1)]; + let props = get_properties(&case4).unwrap(); + assert_eq!(props.max_expire_ts, 1); + assert_eq!(props.min_expire_ts, 1); + } +} From 6d71c2bf9101db808d63eb1a3ae94d1194e208ac Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Tue, 11 Oct 2022 11:39:50 +0800 Subject: [PATCH 0259/1149] rawkv: Fix unstable test-region-merge (#13580) close tikv/tikv#13582 Fix unstable test `test_rawkv::test_region_merge`. Signed-off-by: pingyu --- components/causal_ts/src/tso.rs | 3 --- tests/failpoints/cases/test_rawkv.rs | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 5a9d119f6d5..ad9f3ec1fc6 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -622,9 +622,6 @@ impl CausalTsProvider for BatchTsoProvider { } async fn async_flush(&self) -> Result { - fail::fail_point!("causal_ts_provider_flush", |_| Err(box_err!( - "async_flush err(failpoints)" - ))); self.renew_tso_batch(true, TsoBatchRenewReason::flush) .await?; // TODO: Return the first tso by renew_tso_batch instead of async_get_ts diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index fd56bd87992..547b6144c7c 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -141,7 +141,7 @@ impl TestSuite { } } -const FP_CAUSAL_TS_PROVIDER_FLUSH: &str = "causal_ts_provider_flush"; +const FP_GET_TSO: &str = "test_raftstore_get_tso"; /// Verify correctness on leader transfer. // TODO: simulate and test for the scenario of issue #12498. @@ -164,8 +164,8 @@ fn test_leader_transfer() { assert_eq!(suite.must_raw_get(key1), Some(b"v4".to_vec())); } - // Disable CausalObserver::flush_timestamp to produce causality issue. - fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "return").unwrap(); + // Make causal_ts_provider.async_flush() & handle_update_max_timestamp fail. + fail::cfg(FP_GET_TSO, "return(50)").unwrap(); // Transfer leader and write to store 2. { @@ -182,8 +182,8 @@ fn test_leader_transfer() { // Transfer leader back. suite.must_transfer_leader(®ion, 1); suite.must_leader_on_store(key1, 1); - // Enable CausalObserver::flush_timestamp. - fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "off").unwrap(); + // Make handle_update_max_timestamp succeed. + fail::cfg(FP_GET_TSO, "off").unwrap(); // Transfer leader and write to store 2 again. { suite.must_transfer_leader(®ion, 2); @@ -195,7 +195,7 @@ fn test_leader_transfer() { assert_eq!(suite.must_raw_get(key1), Some(b"v8".to_vec())); } - fail::remove(FP_CAUSAL_TS_PROVIDER_FLUSH); + fail::remove(FP_GET_TSO); suite.stop(); } @@ -238,8 +238,8 @@ fn test_region_merge() { assert_eq!(suite.must_raw_get(keys[1]), Some(b"v4".to_vec())); } - // Disable CausalObserver::flush_timestamp to produce causality issue. - fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "return").unwrap(); + // Make causal_ts_provider.async_flush() & handle_update_max_timestamp fail. + fail::cfg(FP_GET_TSO, "return(50)").unwrap(); // Merge region 1 to 3. { @@ -253,8 +253,8 @@ fn test_region_merge() { assert_eq!(suite.must_raw_get(keys[1]), Some(b"v4".to_vec())); } - // Enable CausalObserver::flush_timestamp. - fail::cfg(FP_CAUSAL_TS_PROVIDER_FLUSH, "off").unwrap(); + // Make handle_update_max_timestamp succeed. + fail::cfg(FP_GET_TSO, "off").unwrap(); // Merge region 3 to 5. { @@ -268,6 +268,6 @@ fn test_region_merge() { assert_eq!(suite.must_raw_get(keys[1]), Some(b"v8".to_vec())); } - fail::remove(FP_CAUSAL_TS_PROVIDER_FLUSH); + fail::remove(FP_GET_TSO); suite.stop(); } From 94c8aa5c3c5335ea99281bea7c6c79f61cb201de Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 11 Oct 2022 16:47:50 +0800 Subject: [PATCH 0260/1149] raftstore-v2: implement local read for raftstore-v2 (#13375) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 32 +- components/raftstore-v2/src/bootstrap.rs | 2 +- components/raftstore-v2/src/fsm/peer.rs | 3 +- components/raftstore-v2/src/fsm/store.rs | 1 - .../src/operation/command/write/mod.rs | 1 - .../operation/command/write/simple_write.rs | 3 - components/raftstore-v2/src/operation/life.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 2 + .../raftstore-v2/src/operation/query/lease.rs | 27 +- .../raftstore-v2/src/operation/query/local.rs | 641 ++++++++++++++++-- .../raftstore-v2/src/operation/query/mod.rs | 4 +- .../src/operation/query/replica.rs | 14 +- .../src/operation/ready/async_writer.rs | 10 +- .../raftstore-v2/src/operation/ready/mod.rs | 7 +- components/raftstore-v2/src/raft/peer.rs | 60 +- components/raftstore-v2/src/raft/storage.rs | 4 +- components/raftstore-v2/src/router/imp.rs | 75 +- components/raftstore-v2/src/router/message.rs | 10 +- components/raftstore-v2/src/router/mod.rs | 1 + .../src/router/response_channel.rs | 25 +- .../tests/integrations/cluster.rs | 24 +- .../tests/integrations/test_read.rs | 43 ++ components/raftstore/src/router.rs | 14 +- components/raftstore/src/store/mod.rs | 9 +- components/raftstore/src/store/peer.rs | 8 +- components/raftstore/src/store/worker/mod.rs | 6 +- components/raftstore/src/store/worker/read.rs | 512 +++++++------- 27 files changed, 1120 insertions(+), 420 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index bd777477bf0..b387300b40e 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -1,10 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - cell::Cell, - mem, ops::{Deref, DerefMut}, - sync::{atomic::AtomicUsize, Arc, Mutex}, + sync::{Arc, Mutex}, time::Duration, }; @@ -23,7 +21,7 @@ use kvproto::{ use raft::INVALID_ID; use raftstore::store::{ fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, RaftlogFetchRunner, - RaftlogFetchTask, StoreWriters, Transport, WriteMsg, WriteSenders, + RaftlogFetchTask, StoreWriters, Transport, WriteSenders, }; use slog::Logger; use tikv_util::{ @@ -42,8 +40,8 @@ use time::Timespec; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, - raft::{Peer, Storage}, - router::{PeerMsg, PeerTick, QueryResChannel, StoreMsg}, + raft::Storage, + router::{PeerMsg, PeerTick, StoreMsg}, Error, Result, }; @@ -390,7 +388,7 @@ impl StoreSystem { log_fetch_scheduler, &mut workers.store_writers, self.logger.clone(), - store_meta, + store_meta.clone(), ); self.workers = Some(workers); let peers = builder.init()?; @@ -401,12 +399,20 @@ impl StoreSystem { let mut mailboxes = Vec::with_capacity(peers.len()); let mut address = Vec::with_capacity(peers.len()); - for (region_id, (tx, fsm)) in peers { - address.push(region_id); - mailboxes.push(( - region_id, - BasicMailbox::new(tx, fsm, router.state_cnt().clone()), - )); + { + let mut meta = store_meta.as_ref().lock().unwrap(); + for (region_id, (tx, fsm)) in peers { + meta.readers + .insert(region_id, fsm.peer().generate_read_delegate()); + meta.tablet_caches + .insert(region_id, fsm.peer().tablet().clone()); + + address.push(region_id); + mailboxes.push(( + region_id, + BasicMailbox::new(tx, fsm, router.state_cnt().clone()), + )); + } } router.register_all(mailboxes); diff --git a/components/raftstore-v2/src/bootstrap.rs b/components/raftstore-v2/src/bootstrap.rs index c3e2d2de6f7..6700db4d45f 100644 --- a/components/raftstore-v2/src/bootstrap.rs +++ b/components/raftstore-v2/src/bootstrap.rs @@ -7,7 +7,7 @@ use error_code::ErrorCodeExt; use fail::fail_point; use kvproto::{ metapb::{Region, Store}, - raft_serverpb::{RaftLocalState, RegionLocalState, StoreIdent}, + raft_serverpb::{RaftLocalState, StoreIdent}, }; use pd_client::PdClient; use raft::INVALID_ID; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index cd8775359fc..389e59f0ee4 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -7,12 +7,11 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine, TabletFactory}; -use kvproto::metapb; use raftstore::store::{Config, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ is_zero_duration, - mpsc::{self, LooseBoundedSender, Receiver, Sender}, + mpsc::{self, LooseBoundedSender, Receiver}, time::{duration_to_sec, Instant}, yatp_pool::FuturePool, }; diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index d55d132679f..3be571bdfbc 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -4,7 +4,6 @@ use std::time::SystemTime; use batch_system::Fsm; use collections::HashMap; -use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine}; use raftstore::store::{Config, ReadDelegate}; use slog::{o, Logger}; diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 76692b6af0a..798e1b45631 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -11,7 +11,6 @@ use raftstore::{ }, Result, }; -use tikv_util::Either; use crate::{ batch::StoreContext, diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index 82628a40385..46544be1a32 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -1,11 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::borrow::Cow; - use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request}; use protobuf::{CodedInputStream, Message, SingularPtrField}; -use tikv_util::Either; use crate::router::CmdResChannel; diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 2cf7594b9a7..678cf6ece4b 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -25,7 +25,7 @@ use tikv_util::store::find_peer; use crate::{ batch::StoreContext, - fsm::{PeerFsm, Store, StoreFsmDelegate}, + fsm::{PeerFsm, Store}, raft::{Peer, Storage}, router::PeerMsg, }; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index ebef0cf0595..7b31473f784 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -8,3 +8,5 @@ mod ready; pub use command::{CommittedEntries, SimpleWriteDecoder, SimpleWriteEncoder}; pub use life::DestroyProgress; pub use ready::AsyncWriter; + +pub(crate) use self::query::LocalReader; diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index fe25fdab454..00a485c8460 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -3,24 +3,14 @@ use std::sync::{Arc, Mutex}; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::{ - kvrpcpb::ExtraOp as TxnExtraOp, - raft_cmdpb::{self, RaftCmdRequest, RaftCmdResponse}, +use kvproto::raft_cmdpb::RaftCmdRequest; +use raftstore::store::{ + can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, + msg::ReadCallback, propose_read_index, should_renew_lease, util::LeaseState, ReadDelegate, + ReadIndexRequest, ReadProgress, TrackVer, Transport, }; -use raftstore::{ - store::{ - can_amend_read, cmd_resp, - fsm::{apply::notify_stale_req, Proposal}, - metrics::RAFT_READ_INDEX_PENDING_COUNT, - msg::{ErrorCallback, ReadCallback}, - propose_read_index, should_renew_lease, - util::{check_region_epoch, LeaseState}, - ReadDelegate, ReadIndexRequest, ReadProgress, TrackVer, Transport, - }, - Error, -}; -use slog::{debug, error, info, o, Logger}; -use tikv_util::{box_err, time::monotonic_raw_now, Either}; +use slog::debug; +use tikv_util::time::monotonic_raw_now; use time::Timespec; use tracker::GLOBAL_TRACKERS; @@ -28,8 +18,7 @@ use crate::{ batch::StoreContext, fsm::StoreMeta, raft::Peer, - router::{CmdResChannel, QueryResChannel, QueryResult, ReadResponse}, - Result, + router::{QueryResChannel, QueryResult, ReadResponse}, }; impl Peer { diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index a0535643eb3..bdf829dc4f5 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -2,44 +2,251 @@ // #[PerformanceCriticalPath] use std::{ - cell::Cell, - collections::HashMap, - fmt::{self, Display, Formatter}, - marker::PhantomData, ops::Deref, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, - }, - time::Duration, + sync::{atomic, Arc, Mutex}, }; -use crossbeam::atomic::AtomicCell; -use engine_traits::{KvEngine, RaftEngine, Snapshot, TabletFactory}; -use fail::fail_point; +use batch_system::Router; +use crossbeam::channel::TrySendError; +use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ - metapb, - raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, ReadIndexResponse, Request, Response}, + errorpb, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse}, }; -use pd_client::BucketMeta; use raftstore::{ + errors::RAFTSTORE_IS_BUSY, store::{ - cmd_resp, - util::{self, LeaseState, RegionReadProgress, RemoteLease}, - ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadResponse, - RegionSnapshot, RequestInspector, RequestPolicy, + cmd_resp, util::LeaseState, LocalReaderCore, ReadDelegate, ReadExecutor, + ReadExecutorProvider, RegionSnapshot, RequestInspector, RequestPolicy, + TLS_LOCAL_READ_METRICS, }, Error, Result, }; -use slog::{debug, error, info, o, warn, Logger}; +use slog::{debug, Logger}; use tikv_util::{ + box_err, codec::number::decode_u64, - lru::LruCache, - time::{monotonic_raw_now, Instant, ThreadReadId}, + time::{monotonic_raw_now, ThreadReadId}, }; use time::Timespec; +use txn_types::WriteBatchFlags; + +use crate::{ + fsm::StoreMeta, + router::{PeerMsg, QueryResult}, + tablet::CachedTablet, + StoreRouter, +}; + +pub trait MsgRouter: Send { + fn send(&self, addr: u64, msg: PeerMsg) -> std::result::Result<(), TrySendError>; +} + +impl MsgRouter for StoreRouter +where + EK: KvEngine, + ER: RaftEngine, +{ + fn send(&self, addr: u64, msg: PeerMsg) -> std::result::Result<(), TrySendError> { + Router::send(self, addr, msg) + } +} + +#[derive(Clone)] +pub struct LocalReader +where + E: KvEngine, + C: MsgRouter, +{ + local_reader: LocalReaderCore, StoreMetaDelegate>, + router: C, + + logger: Logger, +} + +impl LocalReader +where + E: KvEngine, + C: MsgRouter, +{ + pub fn new(store_meta: Arc>>, router: C, logger: Logger) -> Self { + Self { + local_reader: LocalReaderCore::new(StoreMetaDelegate::new(store_meta)), + router, + logger, + } + } + + pub fn store_meta(&self) -> &Arc>> { + self.local_reader.store_meta() + } + + pub fn pre_propose_raft_command( + &mut self, + req: &RaftCmdRequest, + ) -> Result, RequestPolicy)>> { + if let Some(delegate) = self.local_reader.validate_request(req)? { + let mut inspector = SnapRequestInspector { + delegate: &delegate, + logger: &self.logger, + }; + match inspector.inspect(req) { + Ok(RequestPolicy::ReadLocal) => Ok(Some((delegate, RequestPolicy::ReadLocal))), + Ok(RequestPolicy::StaleRead) => Ok(Some((delegate, RequestPolicy::StaleRead))), + // It can not handle other policies. + Ok(_) => Ok(None), + Err(e) => Err(e), + } + } else { + Err(Error::RegionNotFound(req.get_header().get_region_id())) + } + } + + fn try_get_snapshot( + &mut self, + req: RaftCmdRequest, + ) -> std::result::Result>, RaftCmdResponse> { + match self.pre_propose_raft_command(&req) { + Ok(Some((mut delegate, policy))) => match policy { + RequestPolicy::ReadLocal => { + let region = Arc::clone(&delegate.region); + let snap = RegionSnapshot::from_snapshot( + delegate.get_snapshot(None, &mut None), + region, + ); + // Ensures the snapshot is acquired before getting the time + atomic::fence(atomic::Ordering::Release); + let snapshot_ts = monotonic_raw_now(); + + if !delegate.is_in_leader_lease(snapshot_ts) { + return Ok(None); + } + + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); + + // Try renew lease in advance + self.maybe_renew_lease_in_advance(&delegate, &req, snapshot_ts); + Ok(Some(snap)) + } + RequestPolicy::StaleRead => { + let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); + delegate.check_stale_read_safe(read_ts)?; + + let region = Arc::clone(&delegate.region); + let snap = RegionSnapshot::from_snapshot( + delegate.get_snapshot(None, &mut None), + region, + ); + + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); + + delegate.check_stale_read_safe(read_ts)?; -use crate::{fsm::StoreMeta, tablet::CachedTablet}; + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); + Ok(Some(snap)) + } + _ => unreachable!(), + }, + Ok(None) => Ok(None), + Err(e) => { + let mut response = cmd_resp::new_error(e); + if let Some(delegate) = self + .local_reader + .delegates + .get(&req.get_header().get_region_id()) + { + cmd_resp::bind_term(&mut response, delegate.term); + } + Err(response) + } + } + } + + pub async fn snapshot( + &mut self, + mut req: RaftCmdRequest, + ) -> std::result::Result, RaftCmdResponse> { + let region_id = req.header.get_ref().region_id; + if let Some(snap) = self.try_get_snapshot(req.clone())? { + return Ok(snap); + } + + if let Some(query_res) = self.try_to_renew_lease(region_id, &req).await? { + // If query successful, try again. + if query_res.read().is_some() { + req.mut_header().set_read_quorum(false); + if let Some(snap) = self.try_get_snapshot(req)? { + return Ok(snap); + } + } + } + + let mut err = errorpb::Error::default(); + err.set_message(format!( + "Fail to get snapshot from LocalReader for region {}. Maybe due to `not leader` or `not applied to the current term`", + region_id + )); + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + Err(resp) + } + + // try to renew the lease by sending read query where the reading process may + // renew the lease + async fn try_to_renew_lease( + &self, + region_id: u64, + req: &RaftCmdRequest, + ) -> std::result::Result, RaftCmdResponse> { + let (msg, sub) = PeerMsg::raft_query(req.clone()); + let mut err = errorpb::Error::default(); + match MsgRouter::send(&self.router, region_id, msg) { + Ok(()) => return Ok(sub.result().await), + Err(TrySendError::Full(c)) => { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.channel_full.inc()); + err.set_message(RAFTSTORE_IS_BUSY.to_owned()); + err.mut_server_is_busy() + .set_reason(RAFTSTORE_IS_BUSY.to_owned()); + } + Err(TrySendError::Disconnected(c)) => { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); + err.set_message(format!("region {} is missing", region_id)); + err.mut_region_not_found().set_region_id(region_id); + } + } + + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + Err(resp) + } + + // If the remote lease will be expired in near future send message + // to `raftstore` to renew it + fn maybe_renew_lease_in_advance( + &self, + delegate: &ReadDelegate, + req: &RaftCmdRequest, + ts: Timespec, + ) { + if !delegate.need_renew_lease(ts) { + return; + } + + let region_id = req.header.get_ref().region_id; + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().renew_lease_advance.inc()); + // Send a read query which may renew the lease + let (msg, sub) = PeerMsg::raft_query(req.clone()); + if let Err(e) = MsgRouter::send(&self.router, region_id, msg) { + debug!( + self.logger, + "failed to send query for trying to renew lease"; + "region" => region_id, + "error" => ?e + ) + } + } +} /// CachedReadDelegate is a wrapper the ReadDelegate and CachedTablet. /// CachedTablet can fetch the latest tablet of this ReadDelegate's region. The @@ -78,10 +285,12 @@ where } } -impl ReadExecutor for CachedReadDelegate +impl ReadExecutor for CachedReadDelegate where E: KvEngine, { + type Tablet = E; + fn get_tablet(&mut self) -> &E { self.cached_tablet.latest().unwrap() } @@ -112,11 +321,12 @@ where } } -impl ReadExecutorProvider for StoreMetaDelegate +impl ReadExecutorProvider for StoreMetaDelegate where E: KvEngine, { type Executor = CachedReadDelegate; + type StoreMeta = Arc>>; fn store_id(&self) -> Option { self.store_meta.as_ref().lock().unwrap().store_id @@ -140,41 +350,375 @@ where } (meta.readers.len(), None) } + + fn store_meta(&self) -> &Self::StoreMeta { + &self.store_meta + } +} + +struct SnapRequestInspector<'r> { + delegate: &'r ReadDelegate, + logger: &'r Logger, +} + +impl<'r> SnapRequestInspector<'r> { + fn inspect(&mut self, req: &RaftCmdRequest) -> Result { + assert!(!req.has_admin_request()); + if req.get_requests().len() != 1 + || req.get_requests().first().unwrap().get_cmd_type() != CmdType::Snap + { + return Err(box_err!( + "LocalReader can only serve for exactly one Snap request" + )); + } + + let flags = WriteBatchFlags::from_bits_check(req.get_header().get_flags()); + if flags.contains(WriteBatchFlags::STALE_READ) { + return Ok(RequestPolicy::StaleRead); + } + + if req.get_header().get_read_quorum() { + return Ok(RequestPolicy::ReadIndex); + } + + // If applied index's term is differ from current raft's term, leader transfer + // must happened, if read locally, we may read old value. + if !self.has_applied_to_current_term() { + return Ok(RequestPolicy::ReadIndex); + } + + // Local read should be performed, if and only if leader is in lease. + // None for now. + match self.inspect_lease() { + LeaseState::Valid => Ok(RequestPolicy::ReadLocal), + LeaseState::Expired | LeaseState::Suspect => { + // Perform a consistent read to Raft quorum and try to renew the leader lease. + Ok(RequestPolicy::ReadIndex) + } + } + } + + fn has_applied_to_current_term(&mut self) -> bool { + if self.delegate.applied_term == self.delegate.term { + true + } else { + debug!( + self.logger, + "rejected by term check"; + "tag" => &self.delegate.tag, + "applied_term" => self.delegate.applied_term, + "delegate_term" => ?self.delegate.term, + ); + + // only for metric. + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.applied_term.inc()); + false + } + } + + fn inspect_lease(&mut self) -> LeaseState { + // TODO: disable localreader if we did not enable raft's check_quorum. + if self.delegate.leader_lease.is_some() { + // We skip lease check, because it is postponed until `handle_read`. + LeaseState::Valid + } else { + debug!(self.logger, "rejected by leader lease"; "tag" => &self.delegate.tag); + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_lease.inc()); + LeaseState::Expired + } + } } #[cfg(test)] mod tests { - use std::{borrow::Borrow, sync::mpsc::*, thread}; + use std::{ + cell::Cell, + sync::mpsc::*, + thread::{self, JoinHandle}, + }; + use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, KvTestSnapshot, TestTabletFactoryV2}, + kv::{KvTestEngine, TestTabletFactoryV2}, }; - use engine_traits::{OpenOptions, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; - use kvproto::{metapb::Region, raft_cmdpb::*}; + use engine_traits::{OpenOptions, Peekable, SyncMutable, TabletFactory, ALL_CFS}; + use futures::executor::block_on; + use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; use raftstore::store::{ - util::Lease, Callback, CasualMessage, CasualRouter, LocalReader, ProposalRouter, - RaftCommand, + util::Lease, ReadCallback, ReadProgress, RegionReadProgress, TrackVer, TxnExt, + TLS_LOCAL_READ_METRICS, }; - use tempfile::{Builder, TempDir}; + use slog::o; + use tempfile::Builder; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; use time::Duration; - use txn_types::{Key, Lock, LockType, WriteBatchFlags}; + use txn_types::WriteBatchFlags; use super::*; + use crate::router::{QueryResult, ReadResponse}; + + struct MockRouter { + p_router: SyncSender<(u64, PeerMsg)>, + } + + impl MockRouter { + fn new() -> (MockRouter, Receiver<(u64, PeerMsg)>) { + let (p_ch, p_rx) = sync_channel(1); + (MockRouter { p_router: p_ch }, p_rx) + } + } + + impl MsgRouter for MockRouter { + fn send(&self, addr: u64, cmd: PeerMsg) -> std::result::Result<(), TrySendError> { + self.p_router.send((addr, cmd)).unwrap(); + Ok(()) + } + } + + #[allow(clippy::type_complexity)] + fn new_reader( + store_id: u64, + store_meta: Arc>>, + ) -> ( + LocalReader, + Receiver<(u64, PeerMsg)>, + ) { + let (ch, rx) = MockRouter::new(); + let mut reader = LocalReader::new( + store_meta, + ch, + Logger::root(slog::Discard, o!("key1" => "value1")), + ); + reader.local_reader.store_id = Cell::new(Some(store_id)); + (reader, rx) + } + + fn new_peers(store_id: u64, pr_ids: Vec) -> Vec { + pr_ids + .into_iter() + .map(|id| { + let mut pr = metapb::Peer::default(); + pr.set_store_id(store_id); + pr.set_id(id); + pr + }) + .collect() + } + + #[test] + fn test_read() { + // It mocks that local reader communications with raftstore. + // rx receives msgs like raftstore, then call f() to do something (such as renew + // lease or something), then send the result back to the local reader through ch + fn handle_msg( + f: F, + rx: Receiver<(u64, PeerMsg)>, + ch_tx: SyncSender>, + ) -> JoinHandle<()> { + thread::spawn(move || { + // Msg for query will be sent + let (_, msg) = rx.recv().unwrap(); + + f(); + match msg { + PeerMsg::RaftQuery(query) => ReadCallback::set_result( + query.ch, + QueryResult::Read(ReadResponse { + read_index: 0, + txn_extra_op: Default::default(), + }), + ), + _ => unreachable!(), + } + ch_tx.send(rx).unwrap(); + }) + } + + let store_id = 1; + + // Building a tablet factory + let ops = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let path = Builder::new() + .prefix("test-local-reader") + .tempdir() + .unwrap(); + let factory = Arc::new(TestTabletFactoryV2::new(path.path(), ops, cf_opts)); + + let store_meta = Arc::new(Mutex::new(StoreMeta::new())); + let (mut reader, mut rx) = new_reader(store_id, store_meta.clone()); - fn new_read_delegate( - region: &Region, - peer_id: u64, - term: u64, - applied_index_term: u64, - ) -> ReadDelegate { - let mut read_delegate_core = ReadDelegate::mock(region.id); - read_delegate_core.peer_id = peer_id; - read_delegate_core.term = term; - read_delegate_core.applied_term = applied_index_term; - read_delegate_core.region = Arc::new(region.clone()); - read_delegate_core + let mut region1 = metapb::Region::default(); + region1.set_id(1); + let prs = new_peers(store_id, vec![1, 2, 3]); + region1.set_peers(prs.clone().into()); + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let leader2 = prs[0].clone(); + region1.set_region_epoch(epoch13.clone()); + let term6 = 6; + let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); + let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, "".to_owned())); + + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader2); + header.set_region_epoch(epoch13); + header.set_term(term6); + cmd.set_header(header); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + // The region is not register yet. + let res = block_on(reader.snapshot(cmd.clone())).unwrap_err(); + assert!( + res.header + .as_ref() + .unwrap() + .get_error() + .has_region_not_found() + ); + // No msg will ben sent + rx.try_recv().unwrap_err(); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.no_region.get()), + 1 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 1 + ); + assert!(reader.local_reader.delegates.get(&1).is_none()); + + // Register region 1 + lease.renew(monotonic_raw_now()); + let remote = lease.maybe_new_remote_lease(term6).unwrap(); + { + let mut meta = store_meta.as_ref().lock().unwrap(); + + // Create read_delegate with region id 1 + let read_delegate = ReadDelegate { + tag: String::new(), + region: Arc::new(region1.clone()), + peer_id: 1, + term: term6, + applied_term: term6 - 1, + leader_lease: Some(remote), + last_valid_ts: Timespec::new(0, 0), + txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), + txn_ext: Arc::new(TxnExt::default()), + read_progress: read_progress.clone(), + pending_remove: false, + track_ver: TrackVer::new(), + bucket_meta: None, + }; + meta.readers.insert(1, read_delegate); + // create tablet with region_id 1 and prepare some data + let tablet1 = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); + let cache = CachedTablet::new(Some(tablet1)); + meta.tablet_caches.insert(1, cache); + } + + let (ch_tx, ch_rx) = sync_channel(1); + + // Case: Applied term not match + let store_meta_clone = store_meta.clone(); + let handler = handle_msg( + move || { + let mut meta = store_meta_clone.lock().unwrap(); + meta.readers + .get_mut(&1) + .unwrap() + .update(ReadProgress::applied_term(term6)); + }, + rx, + ch_tx.clone(), + ); + // The first try will be rejected due to unmatched applied term but after update + // the applied term by the above thread, the snapshot will be acquired by + // retrying. + let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); + assert_eq!(*snap.get_region(), region1); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 3 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.applied_term.get()), + 1 + ); + handler.join().unwrap(); + rx = ch_rx.recv().unwrap(); + + // Case: Expire lease to make the local reader lease check fail. + lease.expire_remote_lease(); + let remote = lease.maybe_new_remote_lease(term6).unwrap(); + let handler = handle_msg( + move || { + let mut meta = store_meta.lock().unwrap(); + meta.readers + .get_mut(&1) + .unwrap() + .update(ReadProgress::leader_lease(remote)); + }, + rx, + ch_tx.clone(), + ); + let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); + // Updating lease makes cache miss. + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 4 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()), + 1 + ); + handler.join().unwrap(); + rx = ch_rx.recv().unwrap(); + + // Case: Read quorum. + let mut cmd_read_quorum = cmd.clone(); + cmd_read_quorum.mut_header().set_read_quorum(true); + let handler = handle_msg(|| {}, rx, ch_tx); + let _ = block_on(reader.snapshot(cmd_read_quorum.clone())).unwrap(); + handler.join().unwrap(); + ch_rx.recv().unwrap(); + + // Case: Stale read + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.safe_ts.get()), + 0 + ); + read_progress.update_safe_ts(1, 1); + assert_eq!(read_progress.safe_ts(), 1); + let data = { + let mut d = [0u8; 8]; + (&mut d[..]).encode_u64(2).unwrap(); + d + }; + cmd.mut_header() + .set_flags(WriteBatchFlags::STALE_READ.bits()); + cmd.mut_header().set_flag_data(data.into()); + let res = block_on(reader.snapshot(cmd.clone())).unwrap_err(); + assert!(res.get_header().get_error().has_data_is_not_ready()); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.safe_ts.get()), + 1 + ); + read_progress.update_safe_ts(1, 2); + assert_eq!(read_progress.safe_ts(), 2); + let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); + assert_eq!(*snap.get_region(), region1); } #[test] @@ -197,7 +741,7 @@ mod tests { let mut meta = store_meta.store_meta.as_ref().lock().unwrap(); // Create read_delegate with region id 1 - let mut read_delegate = ReadDelegate::mock(1); + let read_delegate = ReadDelegate::mock(1); meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data @@ -208,9 +752,8 @@ mod tests { let cache = CachedTablet::new(Some(tablet1.clone())); meta.tablet_caches.insert(1, cache); - // Create read_delegate with region id 1 - let mut read_delegate = ReadDelegate::mock(2); - let cache = CachedTablet::new(Some(read_delegate.clone())); + // Create read_delegate with region id 2 + let read_delegate = ReadDelegate::mock(2); meta.readers.insert(2, read_delegate); // create tablet with region_id 1 and prepare some data diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 14cedc7b212..b592b4819a5 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -25,7 +25,7 @@ use raftstore::{ store::{ cmd_resp, local_metrics::RaftMetrics, metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::ErrorCallback, region_meta::RegionMeta, util, util::LeaseState, GroupState, - ReadCallback, ReadIndexContext, RequestPolicy, Transport, + ReadIndexContext, RequestPolicy, Transport, }, Error, Result, }; @@ -46,6 +46,8 @@ mod lease; mod local; mod replica; +pub(crate) use self::local::LocalReader; + impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> PeerFsmDelegate<'a, EK, ER, T> { diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index 5a56a23663e..9433cd10c52 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_cmdpb::{self, CmdType, RaftCmdRequest, RaftCmdResponse}; +use kvproto::raft_cmdpb::{self, RaftCmdRequest, RaftCmdResponse}; use pd_client::INVALID_ID; use raftstore::{ store::{ @@ -9,22 +9,18 @@ use raftstore::{ fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::{ErrorCallback, ReadCallback}, - propose_read_index, - util::check_region_epoch, - ReadIndexRequest, Transport, + propose_read_index, ReadIndexRequest, Transport, }, Error, }; -use slog::{debug, error, info, o, Logger}; -use tikv_util::{box_err, time::monotonic_raw_now}; -use time::Timespec; +use slog::debug; +use tikv_util::time::monotonic_raw_now; use tracker::GLOBAL_TRACKERS; use crate::{ batch::StoreContext, raft::Peer, - router::{message::RaftRequest, QueryResChannel, QueryResult, ReadResponse}, - Result, + router::{QueryResChannel, QueryResult, ReadResponse}, }; impl Peer { /// read index on follower diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index 3db4426ebf7..3ebc1f20da7 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -1,16 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - collections::VecDeque, - sync::{atomic::AtomicUsize, Arc}, -}; +use std::collections::VecDeque; -use crossbeam::channel::Sender; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_serverpb::RaftMessage; use raftstore::store::{ - local_metrics::RaftMetrics, Config, PersistedNotifier, WriteMsg, WriteRouter, - WriteRouterContext, WriteSenders, WriteTask, + local_metrics::RaftMetrics, Config, PersistedNotifier, WriteRouter, WriteRouterContext, + WriteSenders, WriteTask, }; use slog::{warn, Logger}; diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 2580b4bb79a..e20192394a6 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -32,8 +32,7 @@ use slog::{debug, error, trace, warn}; pub use self::async_writer::AsyncWriter; use crate::{ batch::StoreContext, - fsm::{PeerFsm, PeerFsmDelegate}, - operation::DestroyProgress, + fsm::PeerFsmDelegate, raft::{Peer, Storage}, router::PeerTick, }; @@ -83,7 +82,7 @@ impl Peer { } if msg.has_merge_target() { unimplemented!(); - return; + // return; } // We don't handle stale message like v1, as we rely on leader to actively // cleanup stale peers. @@ -103,7 +102,7 @@ impl Peer { } if msg.has_extra_msg() { unimplemented!(); - return; + // return; } // TODO: drop all msg append when the peer is uninitialized and has conflict // ranges with other peers. diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 8b69a52f623..e7ee6e7465a 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -4,39 +4,23 @@ use std::{mem, sync::Arc}; use crossbeam::atomic::AtomicCell; use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; -use fail::fail_point; -use kvproto::{ - metapb, - raft_cmdpb::{self, RaftCmdRequest}, - raft_serverpb::RegionLocalState, -}; -use protobuf::Message; -use raft::{RawNode, StateRole, INVALID_ID}; -use raftstore::{ - store::{ - fsm::Proposal, - metrics::PEER_PROPOSE_LOG_SIZE_HISTOGRAM, - util::{Lease, RegionReadProgress}, - Config, EntryStorage, ProposalQueue, RaftlogFetchTask, ReadIndexQueue, ReadIndexRequest, - Transport, WriteRouter, - }, - Error, -}; -use slog::{debug, error, info, o, warn, Logger}; -use tikv_util::{ - box_err, - config::ReadableSize, - time::{monotonic_raw_now, Instant as TiInstant}, - worker::Scheduler, - Either, +use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb}; +use pd_client::BucketStat; +use raft::{RawNode, StateRole}; +use raftstore::store::{ + util::{Lease, RegionReadProgress}, + Config, EntryStorage, ProposalQueue, ReadDelegate, ReadIndexQueue, TrackVer, TxnExt, }; +use slog::Logger; +use tikv_util::{box_err, config::ReadableSize}; +use time::Timespec; use super::{storage::Storage, Apply}; use crate::{ fsm::{ApplyFsm, ApplyScheduler}, operation::{AsyncWriter, DestroyProgress, SimpleWriteEncoder}, router::{CmdResChannel, QueryResChannel}, - tablet::{self, CachedTablet}, + tablet::CachedTablet, Result, }; @@ -68,6 +52,12 @@ pub struct Peer { pending_reads: ReadIndexQueue, read_progress: Arc, leader_lease: Lease, + + /// region buckets. + region_buckets: Option, + /// Transaction extensions related to this peer. + txn_ext: Arc, + txn_extra_op: Arc>, } impl Peer { @@ -149,6 +139,9 @@ impl Peer { cfg.raft_store_max_leader_lease(), cfg.renew_leader_lease_advance_duration(), ), + region_buckets: None, + txn_ext: Arc::default(), + txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), }; // If this region has only one peer and I am the one, campaign directly. @@ -397,4 +390,19 @@ impl Peer { pub fn set_apply_scheduler(&mut self, apply_scheduler: ApplyScheduler) { self.apply_scheduler = Some(apply_scheduler); } + + pub fn generate_read_delegate(&self) -> ReadDelegate { + let peer_id = self.peer().get_id(); + + ReadDelegate::new( + peer_id, + self.term(), + self.region().clone(), + self.storage().entry_storage().applied_term(), + self.txn_extra_op.clone(), + self.txn_ext.clone(), + self.read_progress().clone(), + self.region_buckets.as_ref().map(|b| b.meta.clone()), + ) + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 2ada737c620..1615255ab23 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -8,7 +8,7 @@ use kvproto::{ raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, }; use raft::{ - eraftpb::{ConfState, Entry, HardState, Snapshot}, + eraftpb::{ConfState, Entry, Snapshot}, GetEntriesContext, RaftState, INVALID_ID, }; use raftstore::store::{ @@ -17,7 +17,7 @@ use raftstore::store::{ use slog::{o, Logger}; use tikv_util::{box_err, store::find_peer, worker::Scheduler}; -use crate::{Error, Result}; +use crate::Result; pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Result<()> { let region_id = region.get_id(); diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 401961dfdb1..78abef13247 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -1,13 +1,84 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::sync::{Arc, Mutex}; + +use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine}; -use raftstore::store::{FetchedLogs, LogFetchedNotifier}; +use kvproto::{ + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_serverpb::RaftMessage, +}; +use raftstore::store::{FetchedLogs, LogFetchedNotifier, RegionSnapshot}; +use slog::Logger; use super::PeerMsg; -use crate::batch::StoreRouter; +use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; impl LogFetchedNotifier for StoreRouter { fn notify(&self, region_id: u64, fetched: FetchedLogs) { let _ = self.force_send(region_id, PeerMsg::FetchedLogs(fetched)); } } + +/// A router that routes messages to the raftstore +pub struct RaftRouter +where + EK: KvEngine, + ER: RaftEngine, +{ + router: StoreRouter, + local_reader: LocalReader>, +} + +impl Clone for RaftRouter +where + EK: KvEngine, + ER: RaftEngine, +{ + fn clone(&self) -> Self { + RaftRouter { + router: self.router.clone(), + local_reader: self.local_reader.clone(), + } + } +} + +impl RaftRouter { + pub fn new(store_id: u64, router: StoreRouter) -> Self { + let mut store_meta = StoreMeta::new(); + store_meta.store_id = Some(store_id); + let store_meta = Arc::new(Mutex::new(store_meta)); + + let logger = router.logger().clone(); + RaftRouter { + router: router.clone(), + local_reader: LocalReader::new(store_meta, router, logger), + } + } + + pub fn store_router(&self) -> &StoreRouter { + &self.router + } + + pub fn send(&self, addr: u64, msg: PeerMsg) -> Result<(), TrySendError> { + self.router.send(addr, msg) + } + + pub fn store_meta(&self) -> &Arc>> { + self.local_reader.store_meta() + } + + pub fn send_raft_message( + &self, + msg: Box, + ) -> std::result::Result<(), TrySendError>> { + self.router.send_raft_message(msg) + } + + pub async fn get_snapshot( + &mut self, + req: RaftCmdRequest, + ) -> std::result::Result, RaftCmdResponse> { + self.local_reader.snapshot(req).await + } +} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 7be1be95554..fb323dca9d4 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -3,14 +3,8 @@ // #[PerformanceCriticalPath] use std::fmt; -use engine_traits::{KvEngine, Snapshot}; -use kvproto::{ - cdcpb::Event, - metapb, - raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, - raft_serverpb::RaftMessage, -}; -use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, RegionSnapshot}; +use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; +use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs}; use tikv_util::time::Instant; use super::{ diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 1ee580a12d2..e9e7cf6cfc8 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -7,6 +7,7 @@ mod response_channel; pub(crate) use self::internal_message::ApplyTask; pub use self::{ + imp::RaftRouter, internal_message::ApplyRes, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index 9478eb52339..55219540c2f 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -16,7 +16,6 @@ use std::{ cell::UnsafeCell, fmt::{self, Debug, Formatter}, future::Future, - mem, pin::Pin, sync::{ atomic::{AtomicU64, Ordering}, @@ -25,18 +24,13 @@ use std::{ task::{Context, Poll}, }; -use engine_traits::Snapshot; use futures::task::AtomicWaker; -use kvproto::{ - kvrpcpb::ExtraOp as TxnExtraOp, - raft_cmdpb::{RaftCmdResponse, Response}, -}; +use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, raft_cmdpb::RaftCmdResponse}; use raftstore::store::{ local_metrics::TimeTracker, msg::ErrorCallback, region_meta::RegionMeta, ReadCallback, - RegionSnapshot, WriteCallback, + WriteCallback, }; use smallvec::SmallVec; -use tikv_util::memory::HeapSize; use tracker::TrackerToken; /// A struct allows to watch and notify specific events. @@ -224,7 +218,7 @@ pub struct BaseSubscriber { impl BaseSubscriber { /// Wait for the result. #[inline] - pub async fn result(mut self) -> Option { + pub async fn result(self) -> Option { WaitResult { core: &self.core }.await } } @@ -247,7 +241,7 @@ impl BaseChannel { /// Sets the final result. #[inline] - pub fn set_result(mut self, res: Res) { + pub fn set_result(self, res: Res) { self.core.set_result(res); } } @@ -334,7 +328,7 @@ impl WriteCallback for CmdResChannel { // TODO: support executing hooks inside setting result. #[inline] - fn set_result(mut self, res: RaftCmdResponse) { + fn set_result(self, res: RaftCmdResponse) { self.set_result(res); } } @@ -425,14 +419,13 @@ pub type DebugInfoSubscriber = BaseSubscriber; #[cfg(test)] mod tests { - use engine_test::kv::KvTestSnapshot; use futures::executor::block_on; use super::*; #[test] fn test_cancel() { - let (mut chan, mut sub) = CmdResChannel::pair(); + let (chan, mut sub) = CmdResChannel::pair(); drop(chan); assert!(!block_on(sub.wait_proposed())); assert!(!block_on(sub.wait_committed())); @@ -447,7 +440,7 @@ mod tests { assert!(!block_on(sub.wait_committed())); assert_eq!(block_on(sub.result()), Some(result)); - let (mut chan, mut sub) = QueryResChannel::pair(); + let (chan, sub) = QueryResChannel::pair(); drop(chan); assert!(block_on(sub.result()).is_none()); } @@ -464,12 +457,12 @@ mod tests { assert!(block_on(sub.wait_committed())); assert_eq!(block_on(sub.result()), Some(result.clone())); - let (mut chan, mut sub) = QueryResChannel::pair(); + let (chan, sub) = QueryResChannel::pair(); let resp = QueryResult::Response(result.clone()); chan.set_result(resp.clone()); assert_eq!(block_on(sub.result()).unwrap(), resp); - let (mut chan, mut sub) = QueryResChannel::pair(); + let (chan, sub) = QueryResChannel::pair(); let read = QueryResult::Read(ReadResponse { read_index: 0, txn_extra_op: TxnExtraOp::ReadOldValue, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index caaa5120325..d46ff09f2b1 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -28,8 +28,8 @@ use pd_client::RpcClient; use raftstore::store::{region_meta::RegionMeta, Config, Transport, RAFT_INIT_LOG_INDEX}; use raftstore_v2::{ create_store_batch_system, - router::{DebugInfoChannel, PeerMsg, QueryResult}, - Bootstrap, StoreMeta, StoreRouter, StoreSystem, + router::{DebugInfoChannel, PeerMsg, QueryResult, RaftRouter}, + Bootstrap, StoreMeta, StoreSystem, }; use slog::{o, Logger}; use tempfile::TempDir; @@ -37,10 +37,10 @@ use test_pd::mocker::Service; use tikv_util::config::{ReadableDuration, VersionTrack}; #[derive(Clone)] -pub struct TestRouter(StoreRouter); +pub struct TestRouter(RaftRouter); impl Deref for TestRouter { - type Target = StoreRouter; + type Target = RaftRouter; fn deref(&self) -> &Self::Target { &self.0 @@ -112,6 +112,8 @@ pub struct RunningState { pub system: StoreSystem, pub cfg: Arc>, pub transport: TestTransport, + // We need this to clear the ref counts of CachedTablet when shutdown + store_meta: Arc>>, } impl RunningState { @@ -160,7 +162,9 @@ impl RunningState { logger.clone(), ); - let store_meta = Arc::new(Mutex::new(StoreMeta::::new())); + let router = RaftRouter::new(store_id, router); + let store_meta = router.store_meta().clone(); + system .start( store_id, @@ -168,8 +172,8 @@ impl RunningState { raft_engine.clone(), factory.clone(), transport.clone(), - &router, - store_meta, + router.store_router(), + store_meta.clone(), ) .unwrap(); @@ -179,6 +183,7 @@ impl RunningState { system, cfg, transport, + store_meta, }; (TestRouter(router), state) } @@ -223,7 +228,10 @@ impl TestNode { } fn stop(&mut self) { - self.running_state.take(); + if let Some(state) = std::mem::take(&mut self.running_state) { + let mut meta = state.store_meta.lock().unwrap(); + meta.tablet_caches.clear(); + } } fn restart(&mut self) -> TestRouter { diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index 8e2c3eeb04f..bb7156c6af7 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use futures::executor::block_on; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, ReadIndexRequest, Request, StatusCmdType}; use tikv_util::store::new_peer; use txn_types::WriteBatchFlags; @@ -190,3 +191,45 @@ fn test_snap_with_invalid_parameter() { let error_resp = res.response().unwrap(); assert!(error_resp.get_header().has_error()); } + +#[test] +fn test_local_read() { + let cluster = Cluster::default(); + let mut router = cluster.router(0); + std::thread::sleep(std::time::Duration::from_millis(200)); + let region_id = 2; + let mut req = RaftCmdRequest::default(); + req.mut_header().set_peer(new_peer(1, 3)); + req.mut_status_request() + .set_cmd_type(StatusCmdType::RegionDetail); + let res = router.query(region_id, req.clone()).unwrap(); + let status_resp = res.response().unwrap().get_status_response(); + let detail = status_resp.get_region_detail(); + let mut region = detail.get_region().clone(); + + let mut req = RaftCmdRequest::default(); + req.mut_header().set_peer(new_peer(1, 3)); + req.mut_header().set_term(6); + req.mut_header().set_region_id(region_id); + req.mut_header() + .set_region_epoch(region.take_region_epoch()); + let mut request_inner = Request::default(); + request_inner.set_cmd_type(CmdType::Snap); + req.mut_requests().push(request_inner); + + // FIXME: Get snapshot from local reader, but it will fail as the leader has not + // applied in the current term (due to unimplementation of ApplyRes). + let resp = block_on(async { router.get_snapshot(req.clone()).await.unwrap_err() }); + assert!( + resp.get_header() + .get_error() + .get_message() + .contains("Fail to get snapshot ") + ); + + let res = router.query(region_id, req.clone()).unwrap(); + let resp = res.read().unwrap(); + // The read index will be 0 as the retry process in the `get_snapshot` will + // renew the lease. + assert_eq!(resp.read_index, 0); +} diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 90cc41f2bd8..1ded8be3886 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -11,8 +11,8 @@ use crate::{ store::{ fsm::RaftRouter, transport::{CasualRouter, ProposalRouter, SignificantRouter}, - CachedReadDelegate, Callback, CasualMessage, LocalReader, PeerMsg, RaftCmdExtraOpts, - RaftCommand, SignificantMsg, StoreMetaDelegate, StoreMsg, StoreRouter, + Callback, CasualMessage, LocalReader, PeerMsg, RaftCmdExtraOpts, RaftCommand, + SignificantMsg, StoreMsg, StoreRouter, }, DiscardReason, Error as RaftStoreError, Result as RaftStoreResult, }; @@ -171,8 +171,7 @@ where ER: RaftEngine, { router: RaftRouter, - local_reader: - LocalReader, EK, CachedReadDelegate, StoreMetaDelegate>, + local_reader: LocalReader>, } impl Clone for ServerRaftStoreRouter @@ -192,12 +191,7 @@ impl ServerRaftStoreRouter { /// Creates a new router. pub fn new( router: RaftRouter, - local_reader: LocalReader< - RaftRouter, - EK, - CachedReadDelegate, - StoreMetaDelegate, - >, + local_reader: LocalReader>, ) -> ServerRaftStoreRouter { ServerRaftStoreRouter { router, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index ed722fd2475..a60eb087562 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -77,9 +77,10 @@ pub use self::{ worker::{ AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FetchedLogs, FlowStatistics, FlowStatsReporter, KeyEntry, - LocalReadContext, LocalReader, LogFetchedNotifier, PdTask, RaftlogFetchRunner, - RaftlogFetchTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, - ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, - SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, + LocalReadContext, LocalReader, LocalReaderCore, LogFetchedNotifier, PdTask, + RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, + ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, + SplitConfig, SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, + TLS_LOCAL_READ_METRICS, }, }; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 10996fcbae0..aca4db04fd5 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -613,7 +613,7 @@ pub fn can_amend_read( now: Timespec, ) -> bool { match lease_state { - // Here combine the new read request with the previous one even if the lease expired + // Here, combining the new read request with the previous one even if the lease expired // is ok because in this case, the previous read index must be sent out with a valid // lease instead of a suspect lease. So there must no pending transfer-leader // proposals before or after the previous read index, and the lease can be renewed @@ -4716,7 +4716,7 @@ where Ok(propose_index) } - fn handle_read>( + fn handle_read>( &self, reader: &mut E, req: RaftCmdRequest, @@ -5609,11 +5609,13 @@ where } } -impl ReadExecutor for PollContext +impl ReadExecutor for PollContext where EK: KvEngine, ER: RaftEngine, { + type Tablet = EK; + fn get_tablet(&mut self) -> &EK { &self.engines.kv } diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 99adcecc04a..4335369c3cb 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -26,6 +26,7 @@ pub use self::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, + metrics::TLS_LOCAL_READ_METRICS, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, Runner as PdRunner, Task as PdTask, @@ -35,8 +36,9 @@ pub use self::{ }, raftlog_gc::{Runner as RaftlogGcRunner, Task as RaftlogGcTask}, read::{ - CachedReadDelegate, LocalReadContext, LocalReader, Progress as ReadProgress, ReadDelegate, - ReadExecutor, ReadExecutorProvider, StoreMetaDelegate, TrackVer, + CachedReadDelegate, LocalReadContext, LocalReader, LocalReaderCore, + Progress as ReadProgress, ReadDelegate, ReadExecutor, ReadExecutorProvider, + StoreMetaDelegate, TrackVer, }, refresh_config::{ BatchComponent as RaftStoreBatchComponent, Runner as RefreshConfigRunner, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5efb750b863..5801083f1bc 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -12,12 +12,12 @@ use std::{ }; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; -use engine_traits::{KvEngine, RaftEngine, Snapshot}; +use engine_traits::{KvEngine, Peekable, RaftEngine}; use fail::fail_point; use kvproto::{ errorpb, kvrpcpb::ExtraOp as TxnExtraOp, - metapb, + metapb::{self, Region}, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, ReadIndexResponse, Request, Response}, }; use pd_client::BucketMeta; @@ -36,20 +36,22 @@ use crate::{ cmd_resp, fsm::store::StoreMeta, util::{self, LeaseState, RegionReadProgress, RemoteLease}, - Callback, CasualMessage, CasualRouter, Peer, ProposalRouter, RaftCommand, ReadResponse, - RegionSnapshot, RequestInspector, RequestPolicy, TxnExt, + Callback, CasualMessage, CasualRouter, Peer, ProposalRouter, RaftCommand, ReadCallback, + ReadResponse, RegionSnapshot, RequestInspector, RequestPolicy, TxnExt, }, Error, Result, }; /// #[RaftstoreCommon] -pub trait ReadExecutor { - fn get_tablet(&mut self) -> &E; +pub trait ReadExecutor { + type Tablet: KvEngine; + + fn get_tablet(&mut self) -> &Self::Tablet; fn get_snapshot( &mut self, ts: Option, - read_context: &mut Option>, - ) -> Arc; + read_context: &mut Option>, + ) -> Arc<::Snapshot>; fn get_value(&mut self, req: &Request, region: &metapb::Region) -> Result { let key = req.get_get().get_key(); @@ -94,8 +96,8 @@ pub trait ReadExecutor { region: &Arc, read_index: Option, mut ts: Option, - mut read_context: Option>, - ) -> ReadResponse { + mut read_context: Option>, + ) -> ReadResponse<::Snapshot> { let requests = msg.get_requests(); let mut response = ReadResponse { response: RaftCmdResponse::default(), @@ -151,28 +153,6 @@ pub trait ReadExecutor { } } -/// #[RaftstoreCommon]: A read only delegate of `Peer`. -#[derive(Clone, Debug)] -pub struct ReadDelegate { - pub region: Arc, - pub peer_id: u64, - pub term: u64, - pub applied_term: u64, - pub leader_lease: Option, - pub last_valid_ts: Timespec, - - pub tag: String, - pub bucket_meta: Option>, - pub txn_extra_op: Arc>, - pub txn_ext: Arc, - pub read_progress: Arc, - pub pending_remove: bool, - - // `track_ver` used to keep the local `ReadDelegate` in `LocalReader` - // up-to-date with the global `ReadDelegate` stored at `StoreMeta` - pub track_ver: TrackVer, -} - /// CachedReadDelegate is a wrapper the ReadDelegate and kv_engine. LocalReader /// dispatch local read requests to ReadDeleage according to the region_id where /// ReadDelegate needs kv_engine to read data or fetch snapshot. @@ -224,17 +204,17 @@ impl Drop for ReadDelegate { } /// #[RaftstoreCommon] -pub trait ReadExecutorProvider: Send + Clone + 'static -where - E: KvEngine, -{ - type Executor: ReadExecutor; +pub trait ReadExecutorProvider: Send + Clone + 'static { + type Executor: ReadExecutor; + type StoreMeta; fn store_id(&self) -> Option; /// get the ReadDelegate with region_id and the number of delegates in the /// StoreMeta fn get_executor_and_len(&self, region_id: u64) -> (usize, Option); + + fn store_meta(&self) -> &Self::StoreMeta; } #[derive(Clone)] @@ -258,11 +238,12 @@ where } } -impl ReadExecutorProvider for StoreMetaDelegate +impl ReadExecutorProvider for StoreMetaDelegate where E: KvEngine, { type Executor = CachedReadDelegate; + type StoreMeta = Arc>; fn store_id(&self) -> Option { self.store_meta.as_ref().lock().unwrap().store_id @@ -284,6 +265,10 @@ where } (meta.readers.len(), None) } + + fn store_meta(&self) -> &Self::StoreMeta { + &self.store_meta + } } /// #[RaftstoreCommon] @@ -336,8 +321,30 @@ impl Clone for TrackVer { } } +/// #[RaftstoreCommon]: A read only delegate of `Peer`. +#[derive(Clone, Debug)] +pub struct ReadDelegate { + pub region: Arc, + pub peer_id: u64, + pub term: u64, + pub applied_term: u64, + pub leader_lease: Option, + pub last_valid_ts: Timespec, + + pub tag: String, + pub bucket_meta: Option>, + pub txn_extra_op: Arc>, + pub txn_ext: Arc, + pub read_progress: Arc, + pub pending_remove: bool, + + // `track_ver` used to keep the local `ReadDelegate` in `LocalReader` + // up-to-date with the global `ReadDelegate` stored at `StoreMeta` + pub track_ver: TrackVer, +} + impl ReadDelegate { - pub fn from_peer(peer: &Peer) -> ReadDelegate { + pub fn from_peer(peer: &Peer) -> Self { let region = peer.region().clone(); let region_id = region.get_id(); let peer_id = peer.peer.get_id(); @@ -358,6 +365,34 @@ impl ReadDelegate { } } + pub fn new( + peer_id: u64, + term: u64, + region: Region, + applied_term: u64, + txn_extra_op: Arc>, + txn_ext: Arc, + read_progress: Arc, + bucket_meta: Option>, + ) -> Self { + let region_id = region.id; + ReadDelegate { + region: Arc::new(region), + peer_id, + term, + applied_term, + leader_lease: None, + last_valid_ts: Timespec::new(0, 0), + tag: format!("[region {}] {}", region_id, peer_id), + txn_extra_op, + txn_ext, + read_progress, + pending_remove: false, + bucket_meta, + track_ver: TrackVer::new(), + } + } + pub fn fresh_valid_ts(&mut self) { self.last_valid_ts = monotonic_raw_now(); } @@ -389,21 +424,24 @@ impl ReadDelegate { } } + pub fn need_renew_lease(&self, ts: Timespec) -> bool { + self.leader_lease + .as_ref() + .map(|lease| lease.need_renew(ts)) + .unwrap_or(false) + } + // If the remote lease will be expired in near future send message - // to `raftstore` renew it + // to `raftstore` to renew it pub fn maybe_renew_lease_advance( &self, router: &dyn CasualRouter, ts: Timespec, ) { - if !self - .leader_lease - .as_ref() - .map(|lease| lease.need_renew(ts)) - .unwrap_or(false) - { + if !self.need_renew_lease(ts) { return; } + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().renew_lease_advance.inc()); let region_id = self.region.get_id(); if let Err(e) = router.send(region_id, CasualMessage::RenewLease) { @@ -437,10 +475,7 @@ impl ReadDelegate { false } - pub fn check_stale_read_safe( - &self, - read_ts: u64, - ) -> std::result::Result<(), ReadResponse> { + pub fn check_stale_read_safe(&self, read_ts: u64) -> std::result::Result<(), RaftCmdResponse> { let safe_ts = self.read_progress.safe_ts(); if safe_ts >= read_ts { return Ok(()); @@ -458,11 +493,7 @@ impl ReadDelegate { safe_ts, }); cmd_resp::bind_term(&mut response, self.term); - Err(ReadResponse { - response, - snapshot: None, - txn_extra_op: TxnExtraOp::Noop, - }) + Err(response) } /// Used in some external tests. @@ -538,107 +569,31 @@ impl Progress { /// #[RaftstoreCommon]: LocalReader is an entry point where local read requests are dipatch to the /// relevant regions by LocalReader so that these requests can be handled by the /// relevant ReadDelegate respectively. -pub struct LocalReader +pub struct LocalReaderCore where - C: ProposalRouter + CasualRouter, - E: KvEngine, - D: ReadExecutor + Deref, - S: ReadExecutorProvider, + D: ReadExecutor + Deref, + S: ReadExecutorProvider, { pub store_id: Cell>, store_meta: S, - kv_engine: E, - // region id -> ReadDelegate - // The use of `Arc` here is a workaround, see the comment at `get_delegate` pub delegates: LruCache, - snap_cache: Box>>, - cache_read_id: ThreadReadId, - // A channel to raftstore. - router: C, } -impl ReadExecutor for CachedReadDelegate +impl LocalReaderCore where - E: KvEngine, + D: ReadExecutor + Deref + Clone, + S: ReadExecutorProvider, { - fn get_tablet(&mut self) -> &E { - &self.kv_engine - } - - fn get_snapshot( - &mut self, - create_time: Option, - read_context: &mut Option>, - ) -> Arc { - let ctx = read_context.as_mut().unwrap(); - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); - if let Some(ts) = create_time { - if ts == *ctx.read_id { - if let Some(snap) = ctx.snap_cache.as_ref().as_ref() { - TLS_LOCAL_READ_METRICS - .with(|m| m.borrow_mut().local_executed_snapshot_cache_hit.inc()); - return snap.clone(); - } - } - let snap = Arc::new(self.kv_engine.snapshot()); - *ctx.read_id = ts; - *ctx.snap_cache = Box::new(Some(snap.clone())); - return snap; - } - Arc::new(self.kv_engine.snapshot()) - } -} - -impl LocalReader -where - C: ProposalRouter + CasualRouter, - E: KvEngine, - D: ReadExecutor + Deref + Clone, - S: ReadExecutorProvider, -{ - pub fn new(kv_engine: E, store_meta: S, router: C) -> Self { - let cache_read_id = ThreadReadId::new(); - LocalReader { + pub fn new(store_meta: S) -> Self { + LocalReaderCore { store_meta, - kv_engine, - router, - snap_cache: Box::new(None), - cache_read_id, store_id: Cell::new(None), delegates: LruCache::with_capacity_and_sample(0, 7), } } - fn redirect(&mut self, mut cmd: RaftCommand) { - debug!("localreader redirects command"; "command" => ?cmd); - let region_id = cmd.request.get_header().get_region_id(); - let mut err = errorpb::Error::default(); - match ProposalRouter::send(&self.router, cmd) { - Ok(()) => return, - Err(TrySendError::Full(c)) => { - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.channel_full.inc()); - err.set_message(RAFTSTORE_IS_BUSY.to_owned()); - err.mut_server_is_busy() - .set_reason(RAFTSTORE_IS_BUSY.to_owned()); - cmd = c; - } - Err(TrySendError::Disconnected(c)) => { - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); - err.set_message(format!("region {} is missing", region_id)); - err.mut_region_not_found().set_region_id(region_id); - cmd = c; - } - } - - let mut resp = RaftCmdResponse::default(); - resp.mut_header().set_error(err); - let read_resp = ReadResponse { - response: resp, - snapshot: None, - txn_extra_op: TxnExtraOp::Noop, - }; - - cmd.callback.invoke_read(read_resp); + pub fn store_meta(&self) -> &S::StoreMeta { + self.store_meta.store_meta() } // Ideally `get_delegate` should return `Option<&ReadDelegate>`, but if so the @@ -673,10 +628,7 @@ where rd.filter(|r| !r.pending_remove) } - pub fn pre_propose_raft_command( - &mut self, - req: &RaftCmdRequest, - ) -> Result> { + pub fn validate_request(&mut self, req: &RaftCmdRequest) -> Result> { // Check store id. if self.store_id.get().is_none() { let store_id = self.store_meta.store_id(); @@ -728,17 +680,113 @@ where return Ok(None); } - let mut inspector = Inspector { - delegate: &delegate, - }; - match inspector.inspect(req) { - Ok(RequestPolicy::ReadLocal) => Ok(Some((delegate, RequestPolicy::ReadLocal))), - Ok(RequestPolicy::StaleRead) => Ok(Some((delegate, RequestPolicy::StaleRead))), - // It can not handle other policies. - Ok(_) => Ok(None), - Err(e) => Err(e), + Ok(Some(delegate)) + } +} + +impl Clone for LocalReaderCore +where + D: ReadExecutor + Deref, + S: ReadExecutorProvider, +{ + fn clone(&self) -> Self { + LocalReaderCore { + store_meta: self.store_meta.clone(), + store_id: self.store_id.clone(), + delegates: LruCache::with_capacity_and_sample(0, 7), } } +} + +pub struct LocalReader +where + E: KvEngine, + C: ProposalRouter + CasualRouter, +{ + local_reader: LocalReaderCore, StoreMetaDelegate>, + kv_engine: E, + + snap_cache: Box>>, + cache_read_id: ThreadReadId, + + // A channel to raftstore. + router: C, +} + +impl LocalReader +where + E: KvEngine, + C: ProposalRouter + CasualRouter, +{ + pub fn new(kv_engine: E, store_meta: StoreMetaDelegate, router: C) -> Self { + let cache_read_id = ThreadReadId::new(); + Self { + local_reader: LocalReaderCore::new(store_meta), + kv_engine, + snap_cache: Box::new(None), + cache_read_id, + router, + } + } + + fn local_read_context(&mut self) -> LocalReadContext<'_, E> { + LocalReadContext { + snap_cache: &mut self.snap_cache, + read_id: &mut self.cache_read_id, + } + } + + pub fn pre_propose_raft_command( + &mut self, + req: &RaftCmdRequest, + ) -> Result, RequestPolicy)>> { + if let Some(delegate) = self.local_reader.validate_request(req)? { + let mut inspector = Inspector { + delegate: &delegate, + }; + match inspector.inspect(req) { + Ok(RequestPolicy::ReadLocal) => Ok(Some((delegate, RequestPolicy::ReadLocal))), + Ok(RequestPolicy::StaleRead) => Ok(Some((delegate, RequestPolicy::StaleRead))), + // It can not handle other policies. + Ok(_) => Ok(None), + Err(e) => Err(e), + } + } else { + Ok(None) + } + } + + fn redirect(&mut self, mut cmd: RaftCommand) { + debug!("localreader redirects command"; "command" => ?cmd); + let region_id = cmd.request.get_header().get_region_id(); + let mut err = errorpb::Error::default(); + match ProposalRouter::send(&self.router, cmd) { + Ok(()) => return, + Err(TrySendError::Full(c)) => { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.channel_full.inc()); + err.set_message(RAFTSTORE_IS_BUSY.to_owned()); + err.mut_server_is_busy() + .set_reason(RAFTSTORE_IS_BUSY.to_owned()); + cmd = c; + } + Err(TrySendError::Disconnected(c)) => { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); + err.set_message(format!("region {} is missing", region_id)); + err.mut_region_not_found().set_region_id(region_id); + cmd = c; + } + } + + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + let read_resp = ReadResponse { + response: resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }; + + cmd.callback.set_result(read_resp); + } pub fn propose_raft_command( &mut self, @@ -748,7 +796,6 @@ where ) { match self.pre_propose_raft_command(&req) { Ok(Some((mut delegate, policy))) => { - let delegate_ext: LocalReadContext<'_, E>; let mut response = match policy { // Leader can read local if and only if it is in lease. RequestPolicy::ReadLocal => { @@ -769,14 +816,11 @@ where return; } - delegate_ext = LocalReadContext { - snap_cache: &mut self.snap_cache, - read_id: &mut self.cache_read_id, - }; + let read_ctx = self.local_read_context(); let region = Arc::clone(&delegate.region); let response = - delegate.execute(&req, ®ion, None, read_id, Some(delegate_ext)); + delegate.execute(&req, ®ion, None, read_id, Some(read_ctx)); // Try renew lease in advance delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); response @@ -785,24 +829,29 @@ where RequestPolicy::StaleRead => { let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); if let Err(resp) = delegate.check_stale_read_safe(read_ts) { - cb.invoke_read(resp); + cb.set_result(ReadResponse { + response: resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }); return; } - delegate_ext = LocalReadContext { - snap_cache: &mut self.snap_cache, - read_id: &mut self.cache_read_id, - }; + let read_ctx = self.local_read_context(); let region = Arc::clone(&delegate.region); // Getting the snapshot let response = - delegate.execute(&req, ®ion, None, read_id, Some(delegate_ext)); + delegate.execute(&req, ®ion, None, read_id, Some(read_ctx)); // Double check in case `safe_ts` change after the first check and before // getting snapshot if let Err(resp) = delegate.check_stale_read_safe(read_ts) { - cb.invoke_read(resp); + cb.set_result(ReadResponse { + response: resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }); return; } TLS_LOCAL_READ_METRICS @@ -817,16 +866,20 @@ where snap.bucket_meta = delegate.bucket_meta.clone(); } response.txn_extra_op = delegate.txn_extra_op.load(); - cb.invoke_read(response); + cb.set_result(response); } // Forward to raftstore. Ok(None) => self.redirect(RaftCommand::new(req, cb)), Err(e) => { let mut response = cmd_resp::new_error(e); - if let Some(delegate) = self.delegates.get(&req.get_header().get_region_id()) { + if let Some(delegate) = self + .local_reader + .delegates + .get(&req.get_header().get_region_id()) + { cmd_resp::bind_term(&mut response, delegate.term); } - cb.invoke_read(ReadResponse { + cb.set_result(ReadResponse { response, snapshot: None, txn_extra_op: TxnExtraOp::Noop, @@ -857,23 +910,53 @@ where } } -impl Clone for LocalReader +impl Clone for LocalReader where - C: ProposalRouter + CasualRouter + Clone, E: KvEngine, - D: ReadExecutor + Deref, - S: ReadExecutorProvider, + C: ProposalRouter + CasualRouter + Clone, { fn clone(&self) -> Self { - LocalReader { - store_meta: self.store_meta.clone(), + Self { + local_reader: self.local_reader.clone(), kv_engine: self.kv_engine.clone(), - router: self.router.clone(), - store_id: self.store_id.clone(), - delegates: LruCache::with_capacity_and_sample(0, 7), snap_cache: self.snap_cache.clone(), cache_read_id: self.cache_read_id.clone(), + router: self.router.clone(), + } + } +} + +impl ReadExecutor for CachedReadDelegate +where + E: KvEngine, +{ + type Tablet = E; + + fn get_tablet(&mut self) -> &E { + &self.kv_engine + } + + fn get_snapshot( + &mut self, + create_time: Option, + read_context: &mut Option>, + ) -> Arc { + let ctx = read_context.as_mut().unwrap(); + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); + if let Some(ts) = create_time { + if ts == *ctx.read_id { + if let Some(snap) = ctx.snap_cache.as_ref().as_ref() { + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_snapshot_cache_hit.inc()); + return snap.clone(); + } + } + let snap = Arc::new(self.kv_engine.snapshot()); + *ctx.read_id = ts; + *ctx.snap_cache = Box::new(Some(snap.clone())); + return snap; } + Arc::new(self.kv_engine.snapshot()) } } @@ -976,19 +1059,14 @@ mod tests { store_meta: Arc>, ) -> ( TempDir, - LocalReader< - MockRouter, - KvTestEngine, - CachedReadDelegate, - StoreMetaDelegate, - >, + LocalReader, Receiver>, ) { let path = Builder::new().prefix(path).tempdir().unwrap(); let db = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let (ch, rx, _) = MockRouter::new(); let mut reader = LocalReader::new(db.clone(), StoreMetaDelegate::new(store_meta, db), ch); - reader.store_id = Cell::new(Some(store_id)); + reader.local_reader.store_id = Cell::new(Some(store_id)); (path, reader, rx) } @@ -1005,12 +1083,7 @@ mod tests { } fn must_redirect( - reader: &mut LocalReader< - MockRouter, - KvTestEngine, - CachedReadDelegate, - StoreMetaDelegate, - >, + reader: &mut LocalReader, rx: &Receiver>, cmd: RaftCmdRequest, ) { @@ -1030,12 +1103,7 @@ mod tests { } fn must_not_redirect( - reader: &mut LocalReader< - MockRouter, - KvTestEngine, - CachedReadDelegate, - StoreMetaDelegate, - >, + reader: &mut LocalReader, rx: &Receiver>, task: RaftCommand, ) { @@ -1092,7 +1160,7 @@ mod tests { TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), 1 ); - assert!(reader.delegates.get(&1).is_none()); + assert!(reader.local_reader.delegates.get(&1).is_none()); // Register region 1 lease.renew(monotonic_raw_now()); @@ -1369,16 +1437,16 @@ mod tests { // Remove invalid delegate let reader_clone = store_meta.lock().unwrap().readers.get(&1).unwrap().clone(); - assert!(reader.get_delegate(1).is_some()); + assert!(reader.local_reader.get_delegate(1).is_some()); // dropping the non-source `reader` will not make other readers invalid drop(reader_clone); - assert!(reader.get_delegate(1).is_some()); + assert!(reader.local_reader.get_delegate(1).is_some()); // drop the source `reader` store_meta.lock().unwrap().readers.remove(&1).unwrap(); // the invalid delegate should be removed - assert!(reader.get_delegate(1).is_none()); + assert!(reader.local_reader.get_delegate(1).is_none()); } #[test] @@ -1408,7 +1476,7 @@ mod tests { meta.readers.insert(1, read_delegate); } - let d = reader.get_delegate(1).unwrap(); + let d = reader.local_reader.get_delegate(1).unwrap(); assert_eq!(&*d.region, ®ion); assert_eq!(d.term, 1); assert_eq!(d.applied_term, 1); @@ -1423,13 +1491,16 @@ mod tests { .unwrap() .update(Progress::region(region.clone())); } - assert_eq!(&*reader.get_delegate(1).unwrap().region, ®ion); + assert_eq!( + &*reader.local_reader.get_delegate(1).unwrap().region, + ®ion + ); { let mut meta = store_meta.lock().unwrap(); meta.readers.get_mut(&1).unwrap().update(Progress::term(2)); } - assert_eq!(reader.get_delegate(1).unwrap().term, 2); + assert_eq!(reader.local_reader.get_delegate(1).unwrap().term, 2); { let mut meta = store_meta.lock().unwrap(); @@ -1438,7 +1509,7 @@ mod tests { .unwrap() .update(Progress::applied_term(2)); } - assert_eq!(reader.get_delegate(1).unwrap().applied_term, 2); + assert_eq!(reader.local_reader.get_delegate(1).unwrap().applied_term, 2); { let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. @@ -1447,7 +1518,7 @@ mod tests { let mut meta = store_meta.lock().unwrap(); meta.readers.get_mut(&1).unwrap().update(pg); } - let d = reader.get_delegate(1).unwrap(); + let d = reader.local_reader.get_delegate(1).unwrap(); assert_eq!(d.leader_lease.clone().unwrap().term(), 3); } @@ -1545,14 +1616,11 @@ mod tests { meta.readers.insert(1, read_delegate); } - let mut delegate = reader.get_delegate(region1.id).unwrap(); + let mut delegate = reader.local_reader.get_delegate(region1.id).unwrap(); let read_id = Some(ThreadReadId::new()); { - let mut read_context = Some(LocalReadContext { - snap_cache: &mut reader.snap_cache, - read_id: &mut reader.cache_read_id, - }); + let mut read_context = Some(reader.local_read_context()); for _ in 0..10 { // Different region id should reuse the cache @@ -1568,10 +1636,7 @@ mod tests { let read_id = Some(ThreadReadId::new()); { - let read_context = LocalReadContext { - snap_cache: &mut reader.snap_cache, - read_id: &mut reader.cache_read_id, - }; + let read_context = reader.local_read_context(); let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); } @@ -1582,10 +1647,7 @@ mod tests { ); { - let read_context = LocalReadContext { - snap_cache: &mut reader.snap_cache, - read_id: &mut reader.cache_read_id, - }; + let read_context = reader.local_read_context(); let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); // We can hit it again. assert_eq!( @@ -1596,10 +1658,7 @@ mod tests { reader.release_snapshot_cache(); { - let read_context = LocalReadContext { - snap_cache: &mut reader.snap_cache, - read_id: &mut reader.cache_read_id, - }; + let read_context = reader.local_read_context(); let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); } // After release, we will mss the cache even with the prevsiou read_id. @@ -1609,10 +1668,7 @@ mod tests { ); { - let read_context = LocalReadContext { - snap_cache: &mut reader.snap_cache, - read_id: &mut reader.cache_read_id, - }; + let read_context = reader.local_read_context(); let _ = delegate.get_snapshot(read_id, &mut Some(read_context)); } // We can hit it again. From be44dbabf13be6a037e58163cf291d3e6ffa9808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 12 Oct 2022 01:49:50 +0800 Subject: [PATCH 0261/1149] log-backup: remove checkpoint V2 from codebase (#13197) ref tikv/tikv#13196 Now, all integration test cases uses checkpoint V3. Removed `test_inflight_message` because it is invalid in V3. Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 103 +----------- components/backup-stream/src/endpoint.rs | 62 ++----- .../backup-stream/src/metadata/client.rs | 37 +---- components/backup-stream/src/metadata/keys.rs | 4 + components/backup-stream/src/metadata/test.rs | 76 +-------- .../backup-stream/src/subscription_manager.rs | 5 +- .../backup-stream/src/subscription_track.rs | 111 +------------ components/backup-stream/tests/mod.rs | 152 +++++++----------- src/config.rs | 4 - 9 files changed, 92 insertions(+), 462 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 2874d548c5a..4b80eb44a2f 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -11,11 +11,9 @@ use tikv_util::{info, worker::Scheduler}; use txn_types::TimeStamp; use crate::{ - errors::{ContextualResultExt, Error, Result}, + errors::{Error, Result}, metadata::{store::MetaStore, Checkpoint, CheckpointProvider, MetadataClient}, - metrics, - subscription_track::SubscriptionTracer, - try_send, RegionCheckpointOperation, Task, + metrics, try_send, RegionCheckpointOperation, Task, }; /// A manager for maintaining the last flush ts. @@ -221,119 +219,25 @@ impl FlushObserver for BasicFlushObserver { } } -pub struct CheckpointV2FlushObserver { - resolvers: SubscriptionTracer, - meta_cli: MetadataClient, - - fresh_regions: Vec, - checkpoints: Vec<(Region, TimeStamp)>, - can_advance: Option, - base: O, -} - -impl CheckpointV2FlushObserver { - pub fn new( - meta_cli: MetadataClient, - can_advance: F, - resolvers: SubscriptionTracer, - base: O, - ) -> Self { - Self { - resolvers, - meta_cli, - fresh_regions: vec![], - checkpoints: vec![], - can_advance: Some(can_advance), - base, - } - } -} - -#[async_trait::async_trait] -impl FlushObserver for CheckpointV2FlushObserver -where - S: MetaStore + 'static, - F: FnOnce() -> bool + Send + 'static, - O: FlushObserver, -{ - async fn before(&mut self, _checkpoints: Vec<(Region, TimeStamp)>) { - let fresh_regions = self.resolvers.collect_fresh_subs(); - let removal = self.resolvers.collect_removal_subs(); - let checkpoints = removal - .into_iter() - .map(|sub| (sub.meta, sub.resolver.resolved_ts())) - .collect::>(); - self.checkpoints = checkpoints; - self.fresh_regions = fresh_regions; - } - - async fn after(&mut self, task: &str, rts: u64) -> Result<()> { - if !self.can_advance.take().map(|f| f()).unwrap_or(true) { - let cp_now = self - .meta_cli - .get_local_task_checkpoint(task) - .await - .context(format_args!( - "during checking whether we should skip advancing ts to {}.", - rts - ))?; - // if we need to roll back checkpoint ts, don't prevent it. - if rts >= cp_now.into_inner() { - info!("skipping advance checkpoint."; "rts" => %rts, "old_rts" => %cp_now); - return Ok(()); - } - } - // Optionally upload the region checkpoint. - // Unless in some extreme condition, skipping upload the region checkpoint won't - // lead to data loss. - if let Err(err) = self - .meta_cli - .upload_region_checkpoint(task, &self.checkpoints) - .await - { - err.report("failed to upload region checkpoint"); - } - // we can advance the progress at next time. - // return early so we won't be mislead by the metrics. - self.meta_cli - .set_local_task_checkpoint(task, rts) - .await - .context(format_args!("on flushing task {}", task))?; - self.base.after(task, rts).await?; - self.meta_cli - .clear_region_checkpoint(task, &self.fresh_regions) - .await - .context(format_args!("on clearing the checkpoint for task {}", task))?; - Ok(()) - } -} - pub struct CheckpointV3FlushObserver { /// We should modify the rts (the local rts isn't right.) /// This should be a BasicFlushObserver or something likewise. baseline: O, sched: Scheduler, meta_cli: MetadataClient, - subs: SubscriptionTracer, checkpoints: Vec<(Region, TimeStamp)>, global_checkpoint_cache: HashMap, } impl CheckpointV3FlushObserver { - pub fn new( - sched: Scheduler, - meta_cli: MetadataClient, - subs: SubscriptionTracer, - baseline: O, - ) -> Self { + pub fn new(sched: Scheduler, meta_cli: MetadataClient, baseline: O) -> Self { Self { sched, meta_cli, checkpoints: vec![], // We almost always have only one entry. global_checkpoint_cache: HashMap::with_capacity(1), - subs, baseline, } } @@ -369,7 +273,6 @@ where } async fn after(&mut self, task: &str, _rts: u64) -> Result<()> { - self.subs.update_status_for_v3(); let t = Task::RegionCheckpointsOp(RegionCheckpointOperation::Update(std::mem::take( &mut self.checkpoints, ))); diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index d463964558a..22a415ca6bb 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1,12 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - collections::HashSet, - fmt, - marker::PhantomData, - path::PathBuf, - sync::{atomic::Ordering, Arc}, - time::Duration, + collections::HashSet, fmt, marker::PhantomData, path::PathBuf, sync::Arc, time::Duration, }; use concurrency_manager::ConcurrencyManager; @@ -46,8 +41,8 @@ use super::metrics::HANDLE_EVENT_DURATION_HISTOGRAM; use crate::{ annotate, checkpoint_manager::{ - BasicFlushObserver, CheckpointManager, CheckpointV2FlushObserver, - CheckpointV3FlushObserver, FlushObserver, GetCheckpointResult, RegionIdWithVersion, + BasicFlushObserver, CheckpointManager, CheckpointV3FlushObserver, FlushObserver, + GetCheckpointResult, RegionIdWithVersion, }, errors::{Error, Result}, event_loader::{InitialDataLoader, PendingMemoryQuota}, @@ -92,6 +87,9 @@ pub struct Endpoint { initial_scan_throughput_quota: Limiter, region_operator: RegionSubscriptionManager, failover_time: Option, + // We holds the config before, even it is useless for now, + // however probably it would be useful in the future. + #[allow(dead_code)] config: BackupStreamConfig, checkpoint_mgr: CheckpointManager, } @@ -226,7 +224,7 @@ where let safepoint = meta_cli.global_progress_of_task(&task).await?; pdc.update_service_safe_point( safepoint_name, - TimeStamp::new(safepoint - 1), + TimeStamp::new(safepoint.saturating_sub(1)), safepoint_ttl, ) .await?; @@ -402,23 +400,9 @@ where } } - fn flush_observer(&self) -> Box { + fn flush_observer(&self) -> impl FlushObserver { let basic = BasicFlushObserver::new(self.pd_client.clone(), self.store_id); - if self.config.use_checkpoint_v3 { - Box::new(CheckpointV3FlushObserver::new( - self.scheduler.clone(), - self.meta_client.clone(), - self.subs.clone(), - basic, - )) - } else { - Box::new(CheckpointV2FlushObserver::new( - self.meta_client.clone(), - self.make_flush_guard(), - self.subs.clone(), - basic, - )) - } + CheckpointV3FlushObserver::new(self.scheduler.clone(), self.meta_client.clone(), basic) } /// Convert a batch of events to the cmd batch, and update the resolver @@ -574,7 +558,6 @@ where let cli = self.meta_client.clone(); let init = self.make_initial_loader(); let range_router = self.range_router.clone(); - let use_v3 = self.config.use_checkpoint_v3; info!( "register backup stream task"; @@ -598,9 +581,7 @@ where let task_clone = task.clone(); let run = async move { let task_name = task.info.get_name(); - if !use_v3 { - cli.init_task(&task.info).await?; - } + cli.init_task(&task.info).await?; let ranges = cli.ranges_of_task(task_name).await?; info!( "register backup stream ranges"; @@ -704,29 +685,6 @@ where self.pool.block_on(router.unregister_task(task)) } - /// Make a guard for checking whether we can flush the checkpoint ts. - fn make_flush_guard(&self) -> impl FnOnce() -> bool + Send { - let failover = self.failover_time; - let flush_duration = self.config.max_flush_interval; - move || { - if failover - .as_ref() - .map(|failover_t| failover_t.saturating_elapsed() < flush_duration.0 * 2) - .unwrap_or(false) - { - warn!("during failover, skipping advancing resolved ts"; - "failover_time_ago" => ?failover.map(|failover_t| failover_t.saturating_elapsed())); - return false; - } - let in_flight = crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.load(Ordering::SeqCst); - if in_flight > 0 { - warn!("inflight leader detected, skipping advancing resolved ts"; "in_flight" => %in_flight); - return false; - } - true - } - } - fn prepare_min_ts(&self) -> future![TimeStamp] { let pd_cli = self.pd_client.clone(); let cm = self.concurrency_manager.clone(); diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index e92addd2992..2ebf553e1cb 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp::Ordering, collections::HashMap, fmt::Debug, path::Path, time::Duration}; +use std::{cmp::Ordering, collections::HashMap, fmt::Debug, path::Path}; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, @@ -13,8 +13,8 @@ use txn_types::TimeStamp; use super::{ keys::{self, KeyValue, MetaKey}, store::{ - CondTransaction, Condition, GetExtra, Keys, KvEvent, KvEventType, MetaStore, PutOption, - Snapshot, Subscription, Transaction, WithRevision, + CondTransaction, Condition, GetExtra, Keys, KvEvent, KvEventType, MetaStore, Snapshot, + Subscription, Transaction, WithRevision, }, }; use crate::{ @@ -671,37 +671,6 @@ impl MetadataClient { .await } - /// upload a region-level checkpoint. - pub async fn upload_region_checkpoint( - &self, - task_name: &str, - checkpoints: &[(Region, TimeStamp)], - ) -> Result<()> { - let txn = checkpoints - .iter() - .fold(Transaction::default(), |txn, (region, cp)| { - txn.put_opt( - KeyValue( - MetaKey::next_bakcup_ts_of_region(task_name, region), - (*cp).into_inner().to_be_bytes().to_vec(), - ), - PutOption { - ttl: Duration::from_secs(600), - }, - ) - }); - self.meta_store.txn(txn).await - } - - pub async fn clear_region_checkpoint(&self, task_name: &str, regions: &[Region]) -> Result<()> { - let txn = regions.iter().fold(Transaction::default(), |txn, region| { - txn.delete(Keys::Key(MetaKey::next_bakcup_ts_of_region( - task_name, region, - ))) - }); - self.meta_store.txn(txn).await - } - pub async fn global_checkpoint_of(&self, task: &str) -> Result> { let cps = self.checkpoints_of(task).await?; let mut min_checkpoint = None; diff --git a/components/backup-stream/src/metadata/keys.rs b/components/backup-stream/src/metadata/keys.rs index 32962ec36b0..f7a2c960ec4 100644 --- a/components/backup-stream/src/metadata/keys.rs +++ b/components/backup-stream/src/metadata/keys.rs @@ -162,6 +162,10 @@ impl MetaKey { Self(format!("{}{}/{}/{}", PREFIX, PATH_LAST_ERROR, name, store).into_bytes()) } + pub fn central_global_checkpoint_of(name: &str) -> Self { + Self(format!("{}/checkpoint/{}/central_global", PREFIX, name).into_bytes()) + } + /// return the key that keeps the range [self, self.next()) contains only /// `self`. pub fn next(&self) -> Self { diff --git a/components/backup-stream/src/metadata/test.rs b/components/backup-stream/src/metadata/test.rs index b9fb965033a..ec2a30efbf3 100644 --- a/components/backup-stream/src/metadata/test.rs +++ b/components/backup-stream/src/metadata/test.rs @@ -7,21 +7,13 @@ use std::{ iter::FromIterator, }; -use kvproto::{ - brpb::{Noop, StorageBackend}, - metapb::Region, -}; +use kvproto::brpb::{Noop, StorageBackend}; use tokio_stream::StreamExt; -use txn_types::TimeStamp; use super::{keys::MetaKey, MetadataClient, StreamTask}; use crate::{ errors::Result, - metadata::{ - client::{Checkpoint, CheckpointProvider}, - store::SlashEtcStore, - MetadataEvent, - }, + metadata::{store::SlashEtcStore, MetadataEvent}, }; fn test_meta_cli() -> MetadataClient { @@ -99,12 +91,6 @@ fn task_matches(expected: &[StreamTask], real: &[StreamTask]) { ); } -fn fake_region(id: u64) -> Region { - let mut r = Region::new(); - r.set_id(id); - r -} - #[tokio::test] async fn test_watch() -> Result<()> { let cli = test_meta_cli(); @@ -197,61 +183,3 @@ async fn test_init() -> Result<()> { Ok(()) } - -#[tokio::test] -async fn test_region_checkpoint() -> Result<()> { - let cli = test_meta_cli(); - let task = simple_task("simple_2"); - cli.insert_task_with_range(&task, &[]).await?; - - let cps = [ - (fake_region(1), TimeStamp::new(42)), - (fake_region(2), TimeStamp::new(64)), - ]; - cli.upload_region_checkpoint("simple_2", &cps).await?; - cli.set_local_task_checkpoint("simple_2", 50).await?; - - let rcp = cli - .get_region_checkpoint("simple_2", &fake_region(1)) - .await?; - assert_eq!( - rcp, - Checkpoint { - provider: CheckpointProvider::Region { id: 1, version: 0 }, - ts: TimeStamp::new(42) - } - ); - let gcp = cli - .get_region_checkpoint("simple_2", &fake_region(3)) - .await?; - assert_eq!( - gcp, - Checkpoint { - provider: CheckpointProvider::Store(42), - ts: TimeStamp::new(50) - } - ); - cli.clear_region_checkpoint("simple_2", &[fake_region(1)]) - .await?; - let rcp = cli - .get_region_checkpoint("simple_2", &fake_region(2)) - .await?; - assert_eq!( - rcp, - Checkpoint { - provider: CheckpointProvider::Region { id: 2, version: 0 }, - ts: TimeStamp::new(64) - } - ); - let gcp = cli - .get_region_checkpoint("simple_2", &fake_region(1)) - .await?; - assert_eq!( - gcp, - Checkpoint { - provider: CheckpointProvider::Store(42), - ts: TimeStamp::new(50) - } - ); - Ok(()) -} diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 751f41ee587..d47974bcd42 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -403,7 +403,7 @@ where self.subs.deregister_region_if(region, |_, _| true); } ObserveOp::Destroy { ref region } => { - let stopped = self.subs.deregister_region_if(region, |old, new| { + self.subs.deregister_region_if(region, |old, new| { raftstore::store::util::compare_region_epoch( old.meta.get_region_epoch(), new, @@ -414,9 +414,6 @@ where .map_err(|err| warn!("check epoch and stop failed."; "err" => %err)) .is_ok() }); - if stopped { - self.subs.destroy_stopped_region(region.get_id()); - } } ObserveOp::RefreshResolver { ref region } => self.refresh_resolver(region).await, ObserveOp::NotifyFailToStartObserve { diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 4120a71e4ee..50c3c6c1143 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -15,22 +15,10 @@ use crate::{debug, metrics::TRACK_REGION, utils}; #[derive(Clone, Default, Debug)] pub struct SubscriptionTracer(Arc>); -#[derive(Debug, PartialEq, Clone, Copy)] -pub enum SubscriptionState { - /// When it is newly added (maybe after split or leader transfered from - /// other store), without any flush. - Fresh, - /// It has been flushed, and running normally. - Normal, - /// It has been moved to other store. - Removal, -} - pub struct RegionSubscription { pub meta: Region, pub(crate) handle: ObserveHandle, pub(crate) resolver: TwoPhaseResolver, - state: SubscriptionState, } impl std::fmt::Debug for RegionSubscription { @@ -43,32 +31,17 @@ impl std::fmt::Debug for RegionSubscription { } impl RegionSubscription { - /// move self out. - fn take(&mut self) -> Self { - Self { - meta: self.meta.clone(), - handle: self.handle.clone(), - resolver: std::mem::replace(&mut self.resolver, TwoPhaseResolver::new(0, None)), - state: self.state, - } - } - pub fn new(region: Region, handle: ObserveHandle, start_ts: Option) -> Self { let resolver = TwoPhaseResolver::new(region.get_id(), start_ts); Self { handle, meta: region, resolver, - state: SubscriptionState::Fresh, } } pub fn stop(&mut self) { - if self.state == SubscriptionState::Removal { - return; - } self.handle.stop_observing(); - self.state = SubscriptionState::Removal; } pub fn is_observing(&self) -> bool { @@ -111,10 +84,7 @@ impl SubscriptionTracer { region.get_id(), RegionSubscription::new(region.clone(), handle, start_ts), ) { - if o.state != SubscriptionState::Removal { - TRACK_REGION.dec(); - warn!("register region which is already registered"; "region_id" => %region.get_id()); - } + TRACK_REGION.dec(); o.stop(); } } @@ -125,7 +95,6 @@ impl SubscriptionTracer { self.0 .iter_mut() // Don't advance the checkpoint ts of removed region. - .filter(|s| s.state != SubscriptionState::Removal) .map(|mut s| (s.meta.clone(), s.resolver.resolve(min_ts))) .collect() } @@ -150,12 +119,6 @@ impl SubscriptionTracer { } } - /// destroy subscription if the subscription is stopped. - pub fn destroy_stopped_region(&self, region_id: u64) { - self.0 - .remove_if(®ion_id, |_, sub| sub.state == SubscriptionState::Removal); - } - /// try to mark a region no longer be tracked by this observer. /// returns whether success (it failed if the region hasn't been observed /// when calling this.) @@ -165,27 +128,13 @@ impl SubscriptionTracer { if_cond: impl FnOnce(&RegionSubscription, &Region) -> bool, ) -> bool { let region_id = region.get_id(); - let remove_result = self.0.get_mut(®ion_id); + let remove_result = self.0.remove(®ion_id); match remove_result { - Some(mut o) => { - // If the state is 'removal', we should act as if the region subscription - // has been removed: the callback should not be called because somebody may - // use this method to check whether a key exists: - // ``` - // let mut present = false; - // deregister_region_if(42, |..| { - // present = true; - // }); - // ``` - // At that time, if we call the callback with stale value, the called may get - // false positive. - if o.state == SubscriptionState::Removal { - return false; - } - if if_cond(o.value(), region) { + Some((_, mut v)) => { + if if_cond(&v, region) { TRACK_REGION.dec(); - o.value_mut().stop(); - info!("stop listen stream from store"; "observer" => ?o.value(), "region_id"=> %region_id); + v.stop(); + info!("stop listen stream from store"; "observer" => ?v, "region_id"=> %region_id); return true; } false @@ -224,54 +173,11 @@ impl SubscriptionTracer { false } - /// Remove and collect the subscriptions have been marked as removed. - pub fn collect_removal_subs(&self) -> Vec { - let mut result = vec![]; - self.0.retain(|_k, v| { - if v.state == SubscriptionState::Removal { - result.push(v.take()); - false - } else { - true - } - }); - result - } - - /// Collect the fresh subscriptions, and mark them as Normal. - pub fn collect_fresh_subs(&self) -> Vec { - self.0 - .iter_mut() - .filter_map(|mut s| { - let v = s.value_mut(); - if v.state == SubscriptionState::Fresh { - v.state = SubscriptionState::Normal; - Some(v.meta.clone()) - } else { - None - } - }) - .collect() - } - - /// Remove all "Removal" entries. - /// Set all "Fresh" entries to "Normal". - pub fn update_status_for_v3(&self) { - self.0.retain(|_k, v| match v.state { - SubscriptionState::Fresh => { - v.state = SubscriptionState::Normal; - true - } - SubscriptionState::Normal => true, - SubscriptionState::Removal => false, - }) - } - /// check whether the region_id should be observed by this observer. pub fn is_observing(&self, region_id: u64) -> bool { let sub = self.0.get_mut(®ion_id); match sub { - Some(mut sub) if !sub.is_observing() || sub.state == SubscriptionState::Removal => { + Some(mut sub) if !sub.is_observing() => { sub.value_mut().stop(); false } @@ -538,8 +444,5 @@ mod test { (region(4, 8, 1), TimeStamp::new(128)), ] ); - let removal = subs.collect_removal_subs(); - assert_eq!(removal.len(), 1); - assert_eq!(removal[0].meta.get_id(), 5); } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 5b53c040582..6e902fb1e08 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -13,12 +13,13 @@ use async_compression::futures::write::ZstdDecoder; use backup_stream::{ errors::Result, metadata::{ + keys::{KeyValue, MetaKey}, store::{MetaStore, SlashEtcStore}, MetadataClient, StreamTask, }, observer::BackupStreamObserver, router::Router, - Endpoint, Task, + Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; use futures::{executor::block_on, AsyncWriteExt, Future}; use grpcio::ChannelBuilder; @@ -98,7 +99,6 @@ struct ErrorStore { pub struct SuiteBuilder { name: String, nodes: usize, - use_v3: bool, metastore_error: Box Result<()> + Send + Sync>, } @@ -107,16 +107,10 @@ impl SuiteBuilder { Self { name: s.to_owned(), nodes: 4, - use_v3: false, metastore_error: Box::new(|_| Ok(())), } } - pub fn use_v3(mut self) -> Self { - self.use_v3 = true; - self - } - pub fn nodes(mut self, n: usize) -> Self { self.nodes = n; self @@ -134,7 +128,6 @@ impl SuiteBuilder { let Self { name: case, nodes: n, - use_v3, metastore_error, } = self; @@ -162,7 +155,7 @@ impl SuiteBuilder { } suite.cluster.run(); for id in 1..=(n as u64) { - suite.start_endpoint(id, use_v3); + suite.start_endpoint(id); } // We must wait until the endpoints get ready to watching the metastore, or some // modifies may be lost. Either make Endpoint::with_client wait until watch did @@ -254,7 +247,7 @@ impl Suite { worker } - fn start_endpoint(&mut self, id: u64, use_v3: bool) { + fn start_endpoint(&mut self, id: u64) { let cluster = &mut self.cluster; let worker = self.endpoints.get_mut(&id).unwrap(); let sim = cluster.sim.wl(); @@ -263,7 +256,6 @@ impl Suite { let regions = sim.region_info_accessors.get(&id).unwrap().clone(); let mut cfg = BackupStreamConfig::default(); cfg.enable = true; - cfg.use_checkpoint_v3 = use_v3; cfg.temp_path = format!("/{}/{}", self.temp_files.path().display(), id); let ob = self.obs.get(&id).unwrap().clone(); let endpoint = Endpoint::new( @@ -303,6 +295,44 @@ impl Suite { self.wait_with(move |r| block_on(r.get_task_info(&name)).is_ok()) } + /// This function tries to calculate the global checkpoint from the flush + /// status of nodes. + /// + /// NOTE: this won't check the region consistency for now, the checkpoint + /// may be weaker than expected. + fn global_checkpoint(&self) -> u64 { + let (tx, rx) = std::sync::mpsc::channel(); + self.run(|| { + let tx = tx.clone(); + Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( + RegionSet::Universal, + Box::new(move |rs| rs.into_iter().for_each(|x| tx.send(x).unwrap())), + )) + }); + drop(tx); + + rx.into_iter() + .map(|r| match r { + GetCheckpointResult::Ok { checkpoint, .. } => checkpoint.into_inner(), + GetCheckpointResult::NotFound { .. } + | GetCheckpointResult::EpochNotMatch { .. } => { + unreachable!() + } + }) + .min() + .unwrap_or(0) + } + + async fn advance_global_checkpoint(&self, task: &str) -> Result<()> { + let cp = self.global_checkpoint(); + self.meta_store + .set(KeyValue( + MetaKey::central_global_checkpoint_of(task), + cp.to_be_bytes().to_vec(), + )) + .await + } + async fn write_records(&mut self, from: usize, n: usize, for_table: i64) -> HashSet> { let mut inserted = HashSet::default(); for ts in (from..(from + n)).map(|x| x * 2) { @@ -696,8 +726,8 @@ mod test { use std::time::{Duration, Instant}; use backup_stream::{ - errors::Error, metadata::MetadataClient, router::TaskSelector, GetCheckpointResult, - RegionCheckpointOperation, RegionSet, Task, + errors::Error, router::TaskSelector, GetCheckpointResult, RegionCheckpointOperation, + RegionSet, Task, }; use pd_client::PdClient; use tikv_util::{box_err, defer, info, HandyRwLock}; @@ -709,7 +739,7 @@ mod test { #[test] fn basic() { - let mut suite = super::SuiteBuilder::new_named("basic").use_v3().build(); + let mut suite = super::SuiteBuilder::new_named("basic").build(); fail::cfg("try_start_observe", "1*return").unwrap(); run_async_test(async { @@ -732,9 +762,7 @@ mod test { #[test] fn with_split() { - let mut suite = super::SuiteBuilder::new_named("with_split") - .use_v3() - .build(); + let mut suite = super::SuiteBuilder::new_named("with_split").build(); run_async_test(async { let round1 = suite.write_records(0, 128, 1).await; suite.must_split(&make_split_key_at_record(1, 42)); @@ -769,7 +797,7 @@ mod test { /// scanning get the snapshot. #[test] fn with_split_txn() { - let mut suite = super::SuiteBuilder::new_named("split_txn").use_v3().build(); + let mut suite = super::SuiteBuilder::new_named("split_txn").build(); run_async_test(async { let start_ts = suite.cluster.pd_client.get_tso().await.unwrap(); let keys = (1..1960).map(|i| make_record_key(1, i)).collect::>(); @@ -809,9 +837,7 @@ mod test { #[test] /// This case tests whether the backup can continue when the leader failes. fn leader_down() { - let mut suite = super::SuiteBuilder::new_named("leader_down") - .use_v3() - .build(); + let mut suite = super::SuiteBuilder::new_named("leader_down").build(); suite.must_register_task(1, "test_leader_down"); suite.sync(); let round1 = run_async_test(suite.write_records(0, 128, 1)); @@ -842,20 +868,11 @@ mod test { suite.write_records(258, 128, 1).await; suite.force_flush_files("test_async_commit"); std::thread::sleep(Duration::from_secs(4)); - let cli = MetadataClient::new(suite.meta_store.clone(), 1); - assert_eq!( - cli.global_progress_of_task("test_async_commit") - .await - .unwrap(), - 256 - ); + assert_eq!(suite.global_checkpoint(), 256); suite.just_commit_a_key(make_record_key(1, 256), TimeStamp::new(256), ts); suite.force_flush_files("test_async_commit"); suite.wait_for_flush(); - let cp = cli - .global_progress_of_task("test_async_commit") - .await - .unwrap(); + let cp = suite.global_checkpoint(); assert!(cp > 256, "it is {:?}", cp); }); suite.cluster.shutdown(); @@ -871,6 +888,7 @@ mod test { run_async_test(suite.write_records(0, 1, 1)); suite.force_flush_files("test_fatal_error"); suite.wait_for_flush(); + run_async_test(suite.advance_global_checkpoint("test_fatal_error")).unwrap(); let (victim, endpoint) = suite.endpoints.iter().next().unwrap(); endpoint .scheduler() @@ -879,24 +897,23 @@ mod test { Box::new(Error::Other(box_err!("everything is alright"))), )) .unwrap(); - let meta_cli = suite.get_meta_cli(); suite.sync(); - let err = run_async_test(meta_cli.get_last_error("test_fatal_error", *victim)) - .unwrap() - .unwrap(); + let err = run_async_test( + suite + .get_meta_cli() + .get_last_error("test_fatal_error", *victim), + ) + .unwrap() + .unwrap(); info!("err"; "err" => ?err); assert_eq!(err.error_code, error_code::backup_stream::OTHER.code); assert!(err.error_message.contains("everything is alright")); assert_eq!(err.store_id, *victim); - let paused = run_async_test(meta_cli.check_task_paused("test_fatal_error")).unwrap(); + let paused = + run_async_test(suite.get_meta_cli().check_task_paused("test_fatal_error")).unwrap(); assert!(paused); let safepoints = suite.cluster.pd_client.gc_safepoints.rl(); - let checkpoint = run_async_test( - suite - .get_meta_cli() - .global_progress_of_task("test_fatal_error"), - ) - .unwrap(); + let checkpoint = suite.global_checkpoint(); assert!( safepoints.iter().any(|sp| { @@ -909,55 +926,10 @@ mod test { ); } - #[test] - fn inflight_messages() { - // We should remove the failpoints when paniked or we may get stucked. - defer! {{ - fail::remove("delay_on_start_observe"); - fail::remove("delay_on_flush"); - }} - let mut suite = super::SuiteBuilder::new_named("inflight_message") - .nodes(3) - .build(); - suite.must_register_task(1, "inflight_message"); - run_async_test(suite.write_records(0, 128, 1)); - fail::cfg("delay_on_flush", "pause").unwrap(); - suite.force_flush_files("inflight_message"); - fail::cfg("delay_on_start_observe", "pause").unwrap(); - suite.must_shuffle_leader(1); - // Handling the `StartObserve` message and doing flush are executed - // asynchronously. Make a delay of unblocking flush thread for make sure - // we have handled the `StartObserve`. - std::thread::sleep(Duration::from_secs(1)); - fail::cfg("delay_on_flush", "off").unwrap(); - suite.wait_for_flush(); - let checkpoint = run_async_test( - suite - .get_meta_cli() - .global_progress_of_task("inflight_message"), - ); - fail::cfg("delay_on_start_observe", "off").unwrap(); - // The checkpoint should not advance if there are inflight messages. - assert_eq!(checkpoint.unwrap(), 0); - run_async_test(suite.write_records(256, 128, 1)); - suite.force_flush_files("inflight_message"); - suite.wait_for_flush(); - let checkpoint = run_async_test( - suite - .get_meta_cli() - .global_progress_of_task("inflight_message"), - ) - .unwrap(); - // The checkpoint should be advanced as expected when the inflight message has - // been consumed. - assert!(checkpoint > 512, "checkpoint = {}", checkpoint); - } - #[test] fn region_checkpoint_info() { let mut suite = super::SuiteBuilder::new_named("checkpoint_info") .nodes(1) - .use_v3() .build(); suite.must_register_task(1, "checkpoint_info"); suite.must_split(&make_split_key_at_record(1, 42)); @@ -1070,7 +1042,6 @@ mod test { let mut suite = SuiteBuilder::new_named("fail_to_refresh_region") .nodes(1) - .use_v3() .build(); suite.must_register_task(1, "fail_to_refresh_region"); @@ -1131,6 +1102,7 @@ mod test { suite.force_flush_files("pessimistic_lock"); suite.wait_for_flush(); std::thread::sleep(Duration::from_secs(1)); + run_async_test(suite.advance_global_checkpoint("pessimistic_lock")).unwrap(); let checkpoint = run_async_test( suite .get_meta_cli() diff --git a/src/config.rs b/src/config.rs index f4fbf17a38f..68193fe0ba9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2497,9 +2497,6 @@ pub struct BackupStreamConfig { pub initial_scan_pending_memory_quota: ReadableSize, #[online_config(skip)] pub initial_scan_rate_limit: ReadableSize, - #[serde(skip)] - #[online_config(skip)] - pub use_checkpoint_v3: bool, } impl BackupStreamConfig { @@ -2532,7 +2529,6 @@ impl Default for BackupStreamConfig { file_size_limit: ReadableSize::mb(256), initial_scan_pending_memory_quota: ReadableSize(quota_size as _), initial_scan_rate_limit: ReadableSize::mb(60), - use_checkpoint_v3: true, } } } From f702db210210f852962f2a96087839b4fab01a04 Mon Sep 17 00:00:00 2001 From: Zwb Date: Wed, 12 Oct 2022 12:59:50 +0800 Subject: [PATCH 0262/1149] trace peers' availability info on leader side (#13209) ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/raftstore/src/store/config.rs | 8 +++ components/raftstore/src/store/fsm/peer.rs | 65 +++++++++++++++++++++ components/raftstore/src/store/fsm/store.rs | 2 + components/raftstore/src/store/msg.rs | 3 + components/raftstore/src/store/peer.rs | 37 ++++++++++++ components/raftstore/src/store/worker/pd.rs | 1 + src/server/service/kv.rs | 1 + tests/integrations/config/mod.rs | 1 + 9 files changed, 119 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 14620ebb6d1..7de5b6975f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2663,7 +2663,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#f6d05706948aa296cff4db060e0962d3720f32eb" +source = "git+https://github.com/pingcap/kvproto.git#43b4391f08e72aa7c86e9a86ab62d084f3633cc0" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 8052a58dea8..a5e84aa8501 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -303,6 +303,12 @@ pub struct Config { pub max_snapshot_file_raw_size: ReadableSize, pub unreachable_backoff: ReadableDuration, + + #[doc(hidden)] + #[serde(skip_serializing)] + #[online_config(hidden)] + // Interval to check peers availability info. + pub check_peers_availability_interval: ReadableDuration, } impl Default for Config { @@ -407,6 +413,8 @@ impl Default for Config { report_region_buckets_tick_interval: ReadableDuration::secs(10), max_snapshot_file_raw_size: ReadableSize::mb(100), unreachable_backoff: ReadableDuration::secs(10), + // TODO: make its value reasonable + check_peers_availability_interval: ReadableDuration::secs(30), } } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index e9a50d54db2..311258e72ff 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1217,6 +1217,7 @@ where PeerTick::ReactivateMemoryLock => self.on_reactivate_memory_lock_tick(), PeerTick::ReportBuckets => self.on_report_region_buckets_tick(), PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted_tick(), + PeerTick::CheckPeersAvailability => self.on_check_peers_availability(), } } @@ -2627,6 +2628,42 @@ where self.fsm.hibernate_state.count_vote(from.get_id()); } + fn on_availability_response(&mut self, from: &metapb::Peer, msg: &ExtraMessage) { + if !self.fsm.peer.is_leader() { + return; + } + if !msg.wait_data { + self.fsm + .peer + .wait_data_peers + .retain(|id| *id != from.get_id()); + debug!( + "receive peer ready info"; + "peer_id" => self.fsm.peer.peer.get_id(), + ); + return; + } + self.register_check_peers_availability_tick(); + } + + fn on_availability_request(&mut self, from: &metapb::Peer) { + if self.fsm.peer.is_leader() { + return; + } + let mut resp = ExtraMessage::default(); + resp.set_type(ExtraMessageType::MsgAvailabilityResponse); + resp.wait_data = self.fsm.peer.wait_data; + self.fsm + .peer + .send_extra_message(resp, &mut self.ctx.trans, from); + debug!( + "peer responses availability info to leader"; + "region_id" => self.region().get_id(), + "peer_id" => self.fsm.peer.peer.get_id(), + "leader_id" => from.id, + ); + } + fn on_extra_message(&mut self, mut msg: RaftMessage) { match msg.get_extra_msg().get_type() { ExtraMessageType::MsgRegionWakeUp | ExtraMessageType::MsgCheckStalePeer => { @@ -2660,6 +2697,12 @@ where ExtraMessageType::MsgRejectRaftLogCausedByMemoryUsage => { unimplemented!() } + ExtraMessageType::MsgAvailabilityRequest => { + self.on_availability_request(msg.get_from_peer()); + } + ExtraMessageType::MsgAvailabilityResponse => { + self.on_availability_response(msg.get_from_peer(), msg.get_extra_msg()); + } } } @@ -3209,6 +3252,7 @@ where ); } else { self.fsm.peer.transfer_leader(&from); + self.fsm.peer.wait_data_peers.clear(); } } } @@ -3660,6 +3704,7 @@ where .peer .peers_start_pending_time .retain(|&(p, _)| p != peer_id); + self.fsm.peer.wait_data_peers.retain(|id| *id != peer_id); } self.fsm.peer.remove_peer_from_cache(peer_id); // We only care remove itself now. @@ -5858,6 +5903,26 @@ where self.schedule_tick(PeerTick::PdHeartbeat) } + fn register_check_peers_availability_tick(&mut self) { + fail_point!("ignore schedule check peers availability tick", |_| {}); + self.schedule_tick(PeerTick::CheckPeersAvailability) + } + + fn on_check_peers_availability(&mut self) { + for peer_id in self.fsm.peer.wait_data_peers.iter() { + let peer = self.fsm.peer.get_peer_from_cache(*peer_id).unwrap(); + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgAvailabilityRequest); + self.fsm + .peer + .send_extra_message(msg, &mut self.ctx.trans, &peer); + debug!( + "check peer availability"; + "target peer id" => *peer_id, + ); + } + } + fn on_check_peer_stale_state_tick(&mut self) { if self.fsm.peer.pending_remove { return; diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index d53270c2ef0..c83309011ac 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -558,6 +558,8 @@ where self.cfg.report_region_buckets_tick_interval.0; self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = self.cfg.check_long_uncommitted_interval.0; + self.tick_batch[PeerTick::CheckPeersAvailability as usize].wait_duration = + self.cfg.check_peers_availability_interval.0; } } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 991a89e7147..93c691fb241 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -376,6 +376,7 @@ pub enum PeerTick { ReactivateMemoryLock = 8, ReportBuckets = 9, CheckLongUncommitted = 10, + CheckPeersAvailability = 11, } impl PeerTick { @@ -395,6 +396,7 @@ impl PeerTick { PeerTick::ReactivateMemoryLock => "reactivate_memory_lock", PeerTick::ReportBuckets => "report_buckets", PeerTick::CheckLongUncommitted => "check_long_uncommitted", + PeerTick::CheckPeersAvailability => "check_peers_availability", } } @@ -411,6 +413,7 @@ impl PeerTick { PeerTick::ReactivateMemoryLock, PeerTick::ReportBuckets, PeerTick::CheckLongUncommitted, + PeerTick::CheckPeersAvailability, ]; TICKS } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index aca4db04fd5..2d3fea79378 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -888,6 +888,8 @@ where peer_cache: RefCell>, /// Record the last instant of each peer's heartbeat response. pub peer_heartbeats: HashMap, + /// Record the waiting data status of each follower or learner peer. + pub wait_data_peers: Vec, proposals: ProposalQueue>, leader_missing_time: Option, @@ -910,6 +912,13 @@ where /// target peer. /// - all read requests must be rejected. pub pending_remove: bool, + /// Currently it's used to indicate whether the witness -> non-witess + /// convertion operation is complete. The meaning of completion is that + /// this peer must contain the applied data, then PD can consider that + /// the conversion operation is complete, and can continue to schedule + /// other operators to prevent the existence of multiple witnesses in + /// the same time period. + pub wait_data: bool, /// Force leader state is only used in online recovery when the majority of /// peers are missing. In this state, it forces one peer to become leader @@ -1112,6 +1121,7 @@ where long_uncommitted_threshold: cfg.long_uncommitted_base_threshold.0, peer_cache: RefCell::new(HashMap::default()), peer_heartbeats: HashMap::default(), + wait_data_peers: Vec::default(), peers_start_pending_time: vec![], down_peer_ids: vec![], size_diff_hint: 0, @@ -1122,6 +1132,7 @@ where compaction_declined_bytes: 0, leader_unreachable: false, pending_remove: false, + wait_data: false, should_wake_up: false, force_leader: None, pending_merge_state: None, @@ -2005,6 +2016,7 @@ where if !self.is_leader() { self.peer_heartbeats.clear(); self.peers_start_pending_time.clear(); + self.wait_data_peers.clear(); return; } @@ -2564,6 +2576,7 @@ where // Update apply index to `last_applying_idx` self.read_progress .update_applied(self.last_applying_idx, &ctx.coprocessor_host); + self.notify_leader_the_peer_is_available(ctx); } CheckApplyingSnapStatus::Idle => { // FIXME: It's possible that the snapshot applying task is canceled. @@ -2580,6 +2593,29 @@ where true } + fn notify_leader_the_peer_is_available( + &mut self, + ctx: &mut PollContext, + ) { + if self.wait_data { + self.wait_data = false; + fail_point!("ignore notify leader the peer is available", |_| {}); + let leader_id = self.leader_id(); + let leader = self.get_peer_from_cache(leader_id); + if let Some(leader) = leader { + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgAvailabilityResponse); + msg.wait_data = false; + self.send_extra_message(msg, &mut ctx.trans, &leader); + info!( + "notify leader the leader is available"; + "region id" => self.region().get_id(), + "peer id" => self.peer.id + ); + } + } + } + pub fn handle_raft_ready_append( &mut self, ctx: &mut PollContext, @@ -5221,6 +5257,7 @@ where approximate_size: self.approximate_size, approximate_keys: self.approximate_keys, replication_status: self.region_replication_status(), + wait_data_peers: self.wait_data_peers.clone(), }); if let Err(e) = ctx.pd_scheduler.schedule(task) { error!( diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index f3518f4f674..ec06d756fe9 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -120,6 +120,7 @@ pub struct HeartbeatTask { pub approximate_size: Option, pub approximate_keys: Option, pub replication_status: Option, + pub wait_data_peers: Vec, } /// Uses an asynchronous thread to tell PD something. diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 35deb7e4107..1beab4f0dc6 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1375,6 +1375,7 @@ fn handle_batch_commands_request< response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source); })* Some(batch_commands_request::request::Cmd::Import(_)) => unimplemented!(), + Some(batch_commands_request::request::Cmd::PrepareFlashbackToVersion(_)) => unimplemented!(), } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d16fe3b39f6..d0eac27e3b1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -250,6 +250,7 @@ fn test_serde_custom_tikv_config() { long_uncommitted_base_threshold: ReadableDuration::secs(1), max_snapshot_file_raw_size: ReadableSize::gb(10), unreachable_backoff: ReadableDuration::secs(111), + check_peers_availability_interval: ReadableDuration::secs(30), }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { From c286b8a0a62115beba99f13ccd3db2529debcd18 Mon Sep 17 00:00:00 2001 From: Hu# Date: Wed, 12 Oct 2022 16:31:52 +0800 Subject: [PATCH 0263/1149] *: Add a Raft admin command to put the region into a locking flashback state. (#13541) ref tikv/tikv#13303, ref tikv/tikv#13519 Add a Raft admin command to put the region into a lock state to prevent any reading, writing, and scheduling and persist the state in the RegionLocalState. Signed-off-by: husharp --- Cargo.lock | 2 +- components/raftstore/src/store/fsm/apply.rs | 151 +++++++++- components/raftstore/src/store/fsm/peer.rs | 62 +--- components/raftstore/src/store/msg.rs | 4 +- components/raftstore/src/store/peer.rs | 51 +--- components/raftstore/src/store/util.rs | 24 +- components/test_raftstore/src/cluster.rs | 59 ++-- src/server/metrics.rs | 1 + src/server/service/kv.rs | 159 ++++++++-- .../integrations/raftstore/test_flashback.rs | 275 ++++++++++++++---- 10 files changed, 586 insertions(+), 202 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7de5b6975f3..cee27c1494d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2663,7 +2663,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#43b4391f08e72aa7c86e9a86ab62d084f3633cc0" +source = "git+https://github.com/pingcap/kvproto.git#4c6f1502851ed55b3ed023d180b6b10766446630" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index a84a60183b6..5fb5754b116 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -87,10 +87,9 @@ use crate::{ msg::{Callback, ErrorCallback, PeerMsg, ReadResponse, SignificantMsg}, peer::Peer, peer_storage::{write_initial_apply_state, write_peer_state}, - util, util::{ - admin_cmd_epoch_lookup, check_region_epoch, compare_region_epoch, ChangePeerI, - ConfChangeKind, KeysInfoFormatter, LatencyInspector, + self, admin_cmd_epoch_lookup, check_flashback_state, check_region_epoch, + compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, LatencyInspector, }, Config, RegionSnapshot, RegionTask, WriteCallback, }, @@ -277,6 +276,9 @@ pub enum ExecResult { TransferLeader { term: u64, }, + SetFlashbackState { + region: Region, + }, } /// The possible returned value when applying logs. @@ -1342,6 +1344,12 @@ where "peer_id" => self.id(), "err" => ?e ), + Error::FlashbackInProgress(..) => debug!( + "flashback is in process"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "err" => ?e + ), _ => error!(?e; "execute raft command"; "region_id" => self.region_id(), @@ -1368,6 +1376,7 @@ where ExecResult::CommitMerge { ref region, .. } => (Some(region.clone()), None), ExecResult::RollbackMerge { ref region, .. } => (Some(region.clone()), None), ExecResult::IngestSst { ref ssts } => (None, Some(ssts.clone())), + ExecResult::SetFlashbackState { region } => (Some(region.clone()), None), _ => (None, None), }, _ => (None, None), @@ -1432,6 +1441,9 @@ where self.region = region.clone(); self.is_merging = false; } + ExecResult::SetFlashbackState { ref region } => { + self.region = region.clone(); + } } } if let Some(epoch) = origin_epoch { @@ -1510,6 +1522,7 @@ where let include_region = req.get_header().get_region_epoch().get_version() >= self.last_merge_version; check_region_epoch(req, &self.region, include_region)?; + check_flashback_state(req, &self.region)?; if req.has_admin_request() { self.exec_admin_cmd(ctx, req) } else { @@ -1548,6 +1561,9 @@ where AdminCmdType::PrepareMerge => self.exec_prepare_merge(ctx, request), AdminCmdType::CommitMerge => self.exec_commit_merge(ctx, request), AdminCmdType::RollbackMerge => self.exec_rollback_merge(ctx, request), + AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { + self.exec_flashback(ctx, request) + } AdminCmdType::InvalidAdmin => Err(box_err!("unsupported admin command type")), }?; response.set_cmd_type(cmd_type); @@ -2792,6 +2808,41 @@ where )) } + fn exec_flashback( + &self, + ctx: &mut ApplyContext, + req: &AdminRequest, + ) -> Result<(AdminResponse, ApplyResult)> { + let region_id = self.region_id(); + let region_state_key = keys::region_state_key(region_id); + let mut old_state = match ctx + .engine + .get_msg_cf::(CF_RAFT, ®ion_state_key) + { + Ok(Some(s)) => s, + _ => { + return Err(box_err!("failed to get region state of {}", region_id)); + } + }; + let is_in_flashback = req.get_cmd_type() == AdminCmdType::PrepareFlashback; + old_state.mut_region().set_is_in_flashback(is_in_flashback); + let mut region = self.region.clone(); + region.set_is_in_flashback(is_in_flashback); + ctx.kv_wb_mut() + .put_msg_cf(CF_RAFT, &keys::region_state_key(region_id), &old_state) + .unwrap_or_else(|e| { + error!( + "{} failed to change flashback state to {:?} for region {}: {:?}", + self.tag, req, region_id, e + ) + }); + + Ok(( + AdminResponse::default(), + ApplyResult::Res(ExecResult::SetFlashbackState { region }), + )) + } + fn exec_compact_log( &mut self, req: &AdminRequest, @@ -4439,7 +4490,7 @@ mod tests { use engine_panic::PanicEngine; use engine_test::kv::{new_engine, KvTestEngine, KvTestSnapshot}; - use engine_traits::{Peekable as PeekableTrait, WriteBatchExt}; + use engine_traits::{Peekable as PeekableTrait, SyncMutable, WriteBatchExt}; use kvproto::{ kvrpcpb::ApiVersion, metapb::{self, RegionEpoch}, @@ -4454,6 +4505,7 @@ mod tests { store::{new_learner_peer, new_peer}, worker::dummy_scheduler, }; + use txn_types::WriteBatchFlags; use uuid::Uuid; use super::*; @@ -5110,6 +5162,7 @@ mod tests { true } AdminCmdType::BatchSplit => true, + AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => true, _ => false, } } @@ -6516,4 +6569,94 @@ mod tests { }); res.unwrap_err(); } + + #[test] + fn flashback_need_to_be_applied() { + let (_path, engine) = create_tmp_engine("flashback_need_to_be_applied"); + let (_, importer) = create_tmp_importer("flashback_need_to_be_applied"); + let mut host = CoprocessorHost::::default(); + host.registry + .register_query_observer(1, BoxQueryObserver::new(ApplyObserver::default())); + + let (tx, rx) = mpsc::channel(); + let (region_scheduler, _) = dummy_scheduler(); + let sender = Box::new(TestNotifier { tx }); + let cfg = Arc::new(VersionTrack::new(Config::default())); + let (router, mut system) = create_apply_batch_system(&cfg.value()); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "flashback_need_to_be_applied".to_owned(), + cfg, + sender, + region_scheduler, + coprocessor_host: host, + importer, + engine: engine.clone(), + router: router.clone(), + store_id: 1, + pending_create_peers, + }; + system.spawn("flashback_need_to_be_applied".to_owned(), builder); + + let peer_id = 3; + let mut reg = Registration { + id: peer_id, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.mut_peers().push(new_peer(2, 3)); + reg.region.mut_region_epoch().set_conf_ver(1); + reg.region.mut_region_epoch().set_version(3); + reg.region.set_is_in_flashback(true); + router.schedule_task(1, Msg::Registration(reg)); + + let (capture_tx, capture_rx) = mpsc::channel(); + let mut region_state = RegionLocalState::default(); + region_state.mut_region().set_is_in_flashback(false); + let region_state_key = keys::region_state_key(1); + engine + .put_msg_cf(CF_RAFT, ®ion_state_key, ®ion_state) + .unwrap(); + // Check for not flashback request. + let mut cmd = AdminRequest::default(); + cmd.set_cmd_type(AdminCmdType::TransferLeader); + let mut flashback_req = EntryBuilder::new(1, 1).epoch(1, 3); + flashback_req.req.set_admin_request(cmd.clone()); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 1, + vec![flashback_req.build()], + vec![cb(1, 1, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().get_error().has_flashback_in_progress()); + // Check for flashback request. + cmd.set_cmd_type(AdminCmdType::PrepareFlashback); + region_state.mut_region().set_is_in_flashback(false); + let mut flashback_req = EntryBuilder::new(2, 2).epoch(1, 3); + flashback_req.req.set_admin_request(cmd.clone()); + flashback_req + .req + .mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 2, + vec![flashback_req.build()], + vec![cb(2, 2, capture_tx)], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + rx.recv_timeout(Duration::from_millis(500)).unwrap(); + system.shutdown(); + } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 311258e72ff..d4a31561c63 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -21,7 +21,7 @@ use collections::{HashMap, HashSet}; use engine_traits::{Engines, KvEngine, RaftEngine, SstMetaInfo, WriteBatchExt, CF_LOCK, CF_RAFT}; use error_code::ErrorCodeExt; use fail::fail_point; -use futures::channel::{mpsc::UnboundedSender, oneshot::Sender}; +use futures::channel::mpsc::UnboundedSender; use keys::{self, enc_end_key, enc_start_key}; use kvproto::{ brpb::CheckAdminResponse, @@ -82,11 +82,10 @@ use crate::{ metrics::*, msg::{Callback, ExtCallback, InspectedRaftMessage}, peer::{ - ConsistencyState, FlashbackState, ForceLeaderState, Peer, PersistSnapshotResult, - SnapshotRecoveryState, SnapshotRecoveryWaitApplySyncer, StaleState, - UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, - UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, - TRANSFER_LEADER_COMMAND_REPLY_CTX, + ConsistencyState, ForceLeaderState, Peer, PersistSnapshotResult, SnapshotRecoveryState, + SnapshotRecoveryWaitApplySyncer, StaleState, UnsafeRecoveryExecutePlanSyncer, + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, + UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, region_meta::RegionMeta, transport::Transport, @@ -987,38 +986,6 @@ where syncer.report_for_self(self_report); } - // Call msg PrepareFlashback to stop the scheduling and RW tasks. - // Once called, it will wait for the channel's notification in FlashbackState to - // finish. We place a flag in the request, which is checked when the - // pre_propose_raft_command is called. Stopping tasks is done by applying - // the flashback-only command in this way, But for RW local reads which need - // to be considered, we let the leader lease to None to ensure that local reads - // are not executed. - fn on_prepare_flashback(&mut self, ch: Sender) { - info!( - "prepare flashback"; - "region_id" => self.region().get_id(), - "peer_id" => self.fsm.peer.peer_id(), - ); - if self.fsm.peer.flashback_state.is_some() { - ch.send(false).unwrap(); - return; - } - self.fsm.peer.flashback_state = Some(FlashbackState::new(ch)); - // Let the leader lease to None to ensure that local reads are not executed. - self.fsm.peer.leader_lease_mut().expire_remote_lease(); - self.fsm.peer.maybe_finish_flashback_wait_apply(); - } - - fn on_finish_flashback(&mut self) { - info!( - "finish flashback"; - "region_id" => self.region().get_id(), - "peer_id" => self.fsm.peer.peer_id(), - ); - self.fsm.peer.flashback_state.take(); - } - fn on_check_pending_admin(&mut self, ch: UnboundedSender) { if !self.fsm.peer.is_leader() { // no need to check non-leader pending conf change. @@ -1464,9 +1431,6 @@ where SignificantMsg::UnsafeRecoveryFillOutReport(syncer) => { self.on_unsafe_recovery_fill_out_report(syncer) } - - SignificantMsg::PrepareFlashback(ch) => self.on_prepare_flashback(ch), - SignificantMsg::FinishFlashback => self.on_finish_flashback(), // for snapshot recovery (safe recovery) SignificantMsg::SnapshotRecoveryWaitApply(syncer) => { self.on_snapshot_recovery_wait_apply(syncer) @@ -2309,10 +2273,6 @@ where if self.fsm.peer.unsafe_recovery_state.is_some() { self.check_unsafe_recovery_state(); } - // TODO: combine recovery state and flashback state as a wait apply queue. - if self.fsm.peer.flashback_state.is_some() { - self.fsm.peer.maybe_finish_flashback_wait_apply(); - } if self.fsm.peer.snapshot_recovery_state.is_some() { self.fsm @@ -4831,6 +4791,9 @@ where } ExecResult::IngestSst { ssts } => self.on_ingest_sst_result(ssts), ExecResult::TransferLeader { term } => self.on_transfer_leader(term), + ExecResult::SetFlashbackState { region } => { + self.on_set_flashback_state(region.get_is_in_flashback()) + } } } @@ -4938,7 +4901,7 @@ where let region_id = self.region_id(); // When in the flashback state, we should not allow any other request to be // proposed. - if self.fsm.peer.flashback_state.is_some() { + if self.fsm.peer.is_in_flashback { self.ctx.raft_metrics.invalid_proposal.flashback.inc(); let flags = WriteBatchFlags::from_bits_truncate(msg.get_header().get_flags()); if !flags.contains(WriteBatchFlags::FLASHBACK) { @@ -6193,6 +6156,13 @@ where self.fsm.has_ready = true; } + fn on_set_flashback_state(&mut self, is_in_flashback: bool) { + // Set flashback memory + self.fsm.peer.is_in_flashback = is_in_flashback; + // Let the leader lease to None to ensure that local reads are not executed. + self.fsm.peer.leader_lease_mut().expire_remote_lease(); + } + /// Verify and store the hash to state. return true means the hash has been /// stored successfully. // TODO: Consider context in the function. diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 93c691fb241..6851ebd30d8 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -7,7 +7,7 @@ use std::{borrow::Cow, fmt}; use collections::HashSet; use engine_traits::{CompactedEvent, KvEngine, Snapshot}; -use futures::channel::{mpsc::UnboundedSender, oneshot::Sender}; +use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, import_sstpb::SstMeta, @@ -516,8 +516,6 @@ where UnsafeRecoveryWaitApply(UnsafeRecoveryWaitApplySyncer), UnsafeRecoveryFillOutReport(UnsafeRecoveryFillOutReportSyncer), SnapshotRecoveryWaitApply(SnapshotRecoveryWaitApplySyncer), - PrepareFlashback(Sender), - FinishFlashback, CheckPendingAdmin(UnboundedSender), } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 2d3fea79378..c95dda17c2c 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -24,7 +24,6 @@ use engine_traits::{ }; use error_code::ErrorCodeExt; use fail::fail_point; -use futures::channel::oneshot::Sender; use getset::{Getters, MutGetters}; use kvproto::{ errorpb, @@ -840,32 +839,6 @@ pub enum UnsafeRecoveryState { Destroy(UnsafeRecoveryExecutePlanSyncer), } -// This state is set by the peer fsm when invoke msg PrepareFlashback. Once set, -// it is checked every time this peer applies a new entry or a snapshot, -// if the latest committed index is met, the syncer will be called to notify the -// result. -#[derive(Debug)] -pub struct FlashbackState(Option>); - -impl FlashbackState { - pub fn new(ch: Sender) -> Self { - FlashbackState(Some(ch)) - } - - pub fn finish_wait_apply(&mut self) { - if self.0.is_none() { - return; - } - let ch = self.0.take().unwrap(); - match ch.send(true) { - Ok(_) => {} - Err(e) => { - error!("Fail to notify flashback state"; "err" => ?e); - } - } - } -} - #[derive(Getters, MutGetters)] pub struct Peer where @@ -1056,7 +1029,8 @@ where /// lead_transferee if the peer is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, - pub flashback_state: Option, + // Used as the memory state for Flashback to reject RW/Schedule before proposing. + pub is_in_flashback: bool, pub snapshot_recovery_state: Option, } @@ -1089,7 +1063,6 @@ where peer.get_id(), tag.clone(), )?; - let applied_index = ps.applied_index(); let raft_cfg = raft::Config { @@ -1192,7 +1165,7 @@ where last_region_buckets: None, lead_transferee: raft::INVALID_ID, unsafe_recovery_state: None, - flashback_state: None, + is_in_flashback: region.get_is_in_flashback(), snapshot_recovery_state: None, }; @@ -2555,10 +2528,6 @@ where debug!("unsafe recovery finishes applying a snapshot"); self.unsafe_recovery_maybe_finish_wait_apply(/* force= */ false); } - if self.flashback_state.is_some() { - debug!("flashback finishes applying a snapshot"); - self.maybe_finish_flashback_wait_apply(); - } if self.snapshot_recovery_state.is_some() { debug!("snapshot recovery finishes applying a snapshot"); self.snapshot_recovery_maybe_finish_wait_apply(false); @@ -3541,7 +3510,7 @@ where self.force_leader.is_some(), ) { None - } else if self.flashback_state.is_some() { + } else if self.is_in_flashback { debug!( "prevents renew lease while in flashback state"; "region_id" => self.region_id, @@ -5131,16 +5100,6 @@ where } } } - - pub fn maybe_finish_flashback_wait_apply(&mut self) { - let finished = - self.raft_group.raft.raft_log.applied == self.raft_group.raft.raft_log.last_index(); - if finished { - if let Some(flashback_state) = self.flashback_state.as_mut() { - flashback_state.finish_wait_apply(); - } - } - } } #[derive(Default, Debug)] @@ -5690,6 +5649,8 @@ pub fn get_sync_log_from_request(msg: &RaftCmdRequest) -> bool { | AdminCmdType::PrepareMerge | AdminCmdType::CommitMerge | AdminCmdType::RollbackMerge + | AdminCmdType::PrepareFlashback + | AdminCmdType::FinishFlashback ); } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 067bb6f727e..42276c79ab6 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -29,7 +29,7 @@ use raft::{ use raft_proto::ConfChangeI; use tikv_util::{box_err, debug, info, store::region, time::monotonic_raw_now, Either}; use time::{Duration, Timespec}; -use txn_types::TimeStamp; +use txn_types::{TimeStamp, WriteBatchFlags}; use super::peer_storage; use crate::{coprocessor::CoprocessorHost, Error, Result}; @@ -192,6 +192,9 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat AdminCmdType::RollbackMerge => AdminCmdEpochState::new(true, true, true, false), // Transfer leader AdminCmdType::TransferLeader => AdminCmdEpochState::new(true, true, false, false), + AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { + AdminCmdEpochState::new(false, false, false, false) + } } } @@ -277,6 +280,25 @@ pub fn compare_region_epoch( Ok(()) } +pub fn check_flashback_state(req: &RaftCmdRequest, region: &metapb::Region) -> Result<()> { + // If admin flashback has not been applied but the region is already in a + // flashback state, the request is rejected + if region.get_is_in_flashback() { + let flags = WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()); + if flags.contains(WriteBatchFlags::FLASHBACK) { + return Ok(()); + } + if req.has_admin_request() + && (req.get_admin_request().get_cmd_type() == AdminCmdType::PrepareFlashback + || req.get_admin_request().get_cmd_type() == AdminCmdType::FinishFlashback) + { + return Ok(()); + } + return Err(Error::FlashbackInProgress(region.get_id())); + } + Ok(()) +} + pub fn is_region_epoch_equal( from_epoch: &metapb::RegionEpoch, current_epoch: &metapb::RegionEpoch, diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index ef0f2246b7d..9b1f19bf21a 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -34,6 +34,7 @@ use kvproto::{ use pd_client::{BucketStat, PdClient}; use raft::eraftpb::ConfChangeType; use raftstore::{ + router::RaftStoreRouter, store::{ fsm::{ create_raft_batch_system, @@ -54,6 +55,7 @@ use tikv_util::{ worker::LazyWorker, HandyRwLock, }; +use txn_types::WriteBatchFlags; use super::*; use crate::Config; @@ -1419,26 +1421,49 @@ impl Cluster { .unwrap(); } - pub async fn call_and_wait_prepare_flashback(&mut self, region_id: u64, store_id: u64) { - let router = self.sim.rl().get_router(store_id).unwrap(); - let (tx, rx) = oneshot::channel(); - - router - .significant_send(region_id, SignificantMsg::PrepareFlashback(tx)) - .unwrap(); - - let prepared = rx.await.unwrap(); - if !prepared { - panic!("prepare flashback failed"); - } - } + pub async fn send_flashback_msg( + &mut self, + region_id: u64, + store_id: u64, + cmd_type: AdminCmdType, + epoch: metapb::RegionEpoch, + peer: metapb::Peer, + ) { + let (result_tx, result_rx) = oneshot::channel(); + let cb = Callback::write(Box::new(move |resp| { + if resp.response.get_header().has_error() { + result_tx.send(false).unwrap(); + error!("send flashback msg failed"; "region_id" => region_id); + return; + } + result_tx.send(true).unwrap(); + })); + + let mut admin = AdminRequest::default(); + admin.set_cmd_type(cmd_type); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header().set_region_epoch(epoch); + req.mut_header().set_peer(peer); + req.set_admin_request(admin); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); - pub fn call_finish_flashback(&mut self, region_id: u64, store_id: u64) { let router = self.sim.rl().get_router(store_id).unwrap(); + if let Err(e) = router.send_command( + req, + cb, + RaftCmdExtraOpts { + deadline: None, + disk_full_opt: kvproto::kvrpcpb::DiskFullOpt::AllowedOnAlmostFull, + }, + ) { + panic!("router send failed, error{}", e); + } - router - .significant_send(region_id, SignificantMsg::FinishFlashback) - .unwrap(); + if !result_rx.await.unwrap() { + panic!("Flashback call msg failed"); + } } pub fn must_split(&mut self, region: &metapb::Region, split_key: &[u8]) { diff --git a/src/server/metrics.rs b/src/server/metrics.rs index a73e79ec59b..3e07a75899f 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -35,6 +35,7 @@ make_auto_flush_static_metric! { kv_resolve_lock, kv_gc, kv_delete_range, + kv_prepare_flashback_to_version, kv_flashback_to_version, raw_get, raw_batch_get, diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 1beab4f0dc6..924236529d9 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -21,7 +21,10 @@ use kvproto::{ errorpb::{Error as RegionError, *}, kvrpcpb::*, mpp::*, - raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request as RaftRequest}, + raft_cmdpb::{ + AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftRequestHeader, + Request as RaftRequest, + }, raft_serverpb::*, tikvpb::*, }; @@ -32,7 +35,7 @@ use raftstore::{ store::{ memory::{MEMTRACE_APPLYS, MEMTRACE_RAFT_ENTRIES, MEMTRACE_RAFT_MESSAGES}, metrics::RAFT_ENTRIES_CACHES_GAUGE, - Callback, CasualMessage, CheckLeaderTask, RaftCmdExtraOpts, SignificantMsg, + Callback, CasualMessage, CheckLeaderTask, RaftCmdExtraOpts, }, DiscardReason, Error as RaftStoreError, Result as RaftStoreResult, }; @@ -44,8 +47,9 @@ use tikv_util::{ time::{duration_to_ms, duration_to_sec, Instant}, worker::Scheduler, }; +use tokio::sync::Mutex; use tracker::{set_tls_tracker_token, RequestInfo, RequestType, Tracker, GLOBAL_TRACKERS}; -use txn_types::{self, Key}; +use txn_types::{self, Key, WriteBatchFlags}; use super::batch::{BatcherBuilder, ReqBatcher}; use crate::{ @@ -401,6 +405,37 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ); } + fn kv_prepare_flashback_to_version( + &mut self, + ctx: RpcContext<'_>, + mut req: PrepareFlashbackToVersionRequest, + sink: UnarySink, + ) { + let begin_instant = Instant::now(); + + let source = req.mut_context().take_request_source(); + let resp = future_prepare_flashback_to_version(&self.storage, &self.ch, req); + let task = async move { + let resp = resp.await?; + let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; + GRPC_MSG_HISTOGRAM_STATIC + .kv_prepare_flashback_to_version + .observe(elapsed.as_secs_f64()); + record_request_source_metrics(source, elapsed); + ServerResult::Ok(()) + } + .map_err(|e| { + log_net_error!(e, "kv rpc failed"; + "request" => stringify!($fn_name) + ); + GRPC_MSG_FAIL_COUNTER.kv_prepare_flashback_to_version.inc(); + }) + .map(|_| ()); + + ctx.spawn(task); + } + fn kv_flashback_to_version( &mut self, ctx: RpcContext<'_>, @@ -1375,7 +1410,6 @@ fn handle_batch_commands_request< response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source); })* Some(batch_commands_request::request::Cmd::Import(_)) => unimplemented!(), - Some(batch_commands_request::request::Cmd::PrepareFlashbackToVersion(_)) => unimplemented!(), } } } @@ -1394,6 +1428,7 @@ fn handle_batch_commands_request< ResolveLock, future_resolve_lock(storage), kv_resolve_lock; Gc, future_gc(), kv_gc; DeleteRange, future_delete_range(storage), kv_delete_range; + PrepareFlashbackToVersion, future_prepare_flashback_to_version(storage, ch), kv_prepare_flashback_to_version; FlashbackToVersion, future_flashback_to_version(storage, ch), kv_flashback_to_version; RawBatchGet, future_raw_batch_get(storage), raw_batch_get; RawPut, future_raw_put(storage), raw_put; @@ -1687,6 +1722,27 @@ fn future_delete_range( } } +// Preparing the flashback for a region/key range will "lock" the region so that +// there is no any read, write or schedule operation could be proposed before +// the actual flashback operation. +fn future_prepare_flashback_to_version< + E: Engine, + L: LockManager, + F: KvFormat, + T: RaftStoreRouter + 'static, +>( + // Keep this param to hint the type of E for the compiler. + _storage: &Storage, + _raft_router: &T, + _req: PrepareFlashbackToVersionRequest, +) -> impl Future> { + // TODO: implement this. + async move { unimplemented!() } +} + +// Flashback the region to a specific point with the given `version`, please +// make sure the region is "locked" by `PrepareFlashbackToVersion` first, +// otherwise this request will fail. fn future_flashback_to_version< T: RaftStoreRouter + 'static, E: Engine, @@ -1698,24 +1754,26 @@ fn future_flashback_to_version< req: FlashbackToVersionRequest, ) -> impl Future> { let storage_clone = storage.clone(); - let raft_router_clone = raft_router.clone(); + let raft_router = Mutex::new(raft_router.clone()); async move { - // Send a `SignificantMsg::PrepareFlashback` to prepare the raftstore for the + // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the // later flashback. This will first block all scheduling, read and write - // operations and then wait for the latest Raft log to be applied before - // we start the flashback command. - let region_id = req.get_context().get_region_id(); - let (result_tx, result_rx) = oneshot::channel(); - raft_router_clone - .significant_send(region_id, SignificantMsg::PrepareFlashback(result_tx))?; - if !result_rx.await? { - return Err(Error::Other(box_err!( - "failed to prepare the region {} for flashback", - region_id - ))); - } + // operations, then wait for the latest Raft log to be applied before we start + // the flashback command. Once invoked, we update the persistence state + // in `RegionLocalState` and region's meta, and when that + // admin cmd is applied, the `PrepareFlashback` command will update the memory + // state of the flashback, rejecting all read and write operations at + // propose and applied. We make FlashbackToVersion a two-stage request + // and lock the region in the first stage. + send_flashback_msg::( + &raft_router, + req.get_context(), + AdminCmdType::PrepareFlashback, + ) + .await?; + let (cb, f) = paired_future_callback(); - let res = storage_clone.sched_txn_command(req.into(), cb); + let res = storage_clone.sched_txn_command(req.clone().into(), cb); // Avoid crossing `.await` to bypass the `Send` constraint. drop(storage_clone); let v = match res { @@ -1725,9 +1783,17 @@ fn future_flashback_to_version< fail_point!("skip_finish_flashback_to_version", |_| { Ok(FlashbackToVersionResponse::default()) }); - // Send a `SignificantMsg::FinishFlashback` to notify the raftstore that the - // flashback has been finished. - raft_router_clone.significant_send(region_id, SignificantMsg::FinishFlashback)?; + // Send an `AdminCmdType::FinishFlashback` to unset the persistence state + // in `RegionLocalState` and region's meta, and when that + // admin cmd is applied, will update the memory + // state of the flashback + send_flashback_msg::( + &raft_router, + req.get_context(), + AdminCmdType::FinishFlashback, + ) + .await?; + let mut resp = FlashbackToVersionResponse::default(); if let Some(err) = extract_region_error(&v) { resp.set_region_error(err); @@ -2401,6 +2467,55 @@ fn needs_reject_raft_append(reject_messages_on_memory_ratio: f64) -> bool { false } +async fn send_flashback_msg + 'static, E: Engine>( + raft_router: &Mutex, + ctx: &Context, + cmd_type: AdminCmdType, +) -> ServerResult<()> { + let (result_tx, result_rx) = oneshot::channel(); + let cb = Callback::write(Box::new(move |resp| { + if resp.response.get_header().has_error() { + result_tx.send(false).unwrap(); + error!("send flashback msg failed"; "error" => ?resp.response.get_header().get_error()); + return; + } + result_tx.send(true).unwrap(); + })); + let mut admin = AdminRequest::default(); + admin.set_cmd_type(cmd_type); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(ctx.get_region_id()); + req.mut_header() + .set_region_epoch(ctx.get_region_epoch().clone()); + req.mut_header().set_peer(ctx.get_peer().clone()); + req.set_admin_request(admin); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + // call admin request directly + let raft_router = raft_router.lock().await; + if let Err(e) = raft_router.send_command( + req, + cb, + RaftCmdExtraOpts { + deadline: None, + disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, + }, + ) { + return Err(Error::Other(box_err!( + "flashback router send failed, error {:?}", + e + ))); + } + if !result_rx.await? { + return Err(Error::Other(box_err!( + "send flashback msg {:?} to region {} failed", + cmd_type, + ctx.get_region_id() + ))); + } + Ok(()) +} + #[cfg(test)] mod tests { use std::thread; diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 064edebf88a..5709cd22804 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -1,33 +1,13 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::time::{Duration, Instant}; use futures::executor::block_on; use kvproto::metapb; use test_raftstore::*; +use tikv_util::time::InstantExt; use txn_types::WriteBatchFlags; -#[test] -fn test_flashback_for_applied_index() { - let mut cluster = new_node_cluster(0, 3); - cluster.run(); - - // write for cluster. - let value = vec![1_u8; 8096]; - multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); - - // prepare for flashback - let region = cluster.get_region(b"k1"); - block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); - - let last_index = cluster - .raft_local_state(region.get_id(), 1) - .get_last_index(); - let appied_index = cluster.apply_state(region.get_id(), 1).get_applied_index(); - - assert_eq!(last_index, appied_index); -} - #[test] fn test_flashback_for_schedule() { let mut cluster = new_node_cluster(0, 3); @@ -36,16 +16,21 @@ fn test_flashback_for_schedule() { cluster.must_transfer_leader(1, new_peer(2, 2)); cluster.must_transfer_leader(1, new_peer(1, 1)); - // prepare for flashback + // Prepare for flashback let region = cluster.get_region(b"k1"); - block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + block_on(cluster.send_flashback_msg( + region.get_id(), + 1, + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + new_peer(1, 1), + )); - // verify the schedule is unabled. + // Verify the schedule is unabled. let mut region = cluster.get_region(b"k3"); let admin_req = new_transfer_leader_cmd(new_peer(2, 2)); - let mut transfer_leader = + let transfer_leader = new_admin_request(region.get_id(), ®ion.take_region_epoch(), admin_req); - transfer_leader.mut_header().set_peer(new_peer(1, 1)); let resp = cluster .call_command_on_leader(transfer_leader, Duration::from_secs(3)) .unwrap(); @@ -58,23 +43,17 @@ fn test_flashback_for_schedule() { } ); - // verify the schedule can be executed if add flashback flag in request's + // Verify the schedule can be executed if add flashback flag in request's // header. - let mut region = cluster.get_region(b"k3"); - let admin_req = new_transfer_leader_cmd(new_peer(2, 2)); - let mut transfer_leader = - new_admin_request(region.get_id(), ®ion.take_region_epoch(), admin_req); - transfer_leader.mut_header().set_peer(new_peer(1, 1)); - transfer_leader - .mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster - .call_command_on_leader(transfer_leader, Duration::from_secs(5)) - .unwrap(); - assert!(!resp.get_header().has_error()); - - cluster.call_finish_flashback(region.get_id(), 1); - // transfer leader to (1, 1) + must_transfer_leader(&mut cluster, region.get_id(), new_peer(2, 2)); + block_on(cluster.send_flashback_msg( + region.get_id(), + 2, + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + cluster.get_region_epoch(1), + new_peer(2, 2), + )); + // Transfer leader to (1, 1) cluster.must_transfer_leader(1, new_peer(1, 1)); } @@ -82,16 +61,23 @@ fn test_flashback_for_schedule() { fn test_flashback_for_write() { let mut cluster = new_node_cluster(0, 3); cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); - // write for cluster + // Write for cluster let value = vec![1_u8; 8096]; multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); - // prepare for flashback + // Prepare for flashback let region = cluster.get_region(b"k1"); - block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + block_on(cluster.send_flashback_msg( + region.get_id(), + 1, + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + new_peer(1, 1), + )); - // write will be blocked + // Write will be blocked let value = vec![1_u8; 8096]; must_get_error_flashback_in_progress(&mut cluster, ®ion, new_put_cmd(b"k1", &value)); @@ -101,7 +87,13 @@ fn test_flashback_for_write() { new_put_cmd(b"k1", &value), ); - cluster.call_finish_flashback(region.get_id(), 1); + block_on(cluster.send_flashback_msg( + region.get_id(), + 1, + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + cluster.get_region_epoch(1), + new_peer(1, 1), + )); multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); } @@ -110,21 +102,28 @@ fn test_flashback_for_write() { fn test_flashback_for_read() { let mut cluster = new_node_cluster(0, 3); cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); - // write for cluster + // Write for cluster let value = vec![1_u8; 8096]; multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); // read for cluster multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); - // prepare for flashback + // Prepare for flashback let region = cluster.get_region(b"k1"); - block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + block_on(cluster.send_flashback_msg( + region.get_id(), + 1, + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + new_peer(1, 1), + )); // read will be blocked must_get_error_flashback_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); - // verify the read can be executed if add flashback flag in request's + // Verify the read can be executed if add flashback flag in request's // header. must_cmd_add_flashback_flag( &mut cluster, @@ -132,7 +131,13 @@ fn test_flashback_for_read() { new_get_cf_cmd("write", b"k1"), ); - cluster.call_finish_flashback(region.get_id(), 1); + block_on(cluster.send_flashback_msg( + region.get_id(), + 1, + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + cluster.get_region_epoch(1), + new_peer(1, 1), + )); multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); } @@ -157,7 +162,7 @@ fn test_flashback_for_local_read() { let region = cluster.get_region(b"k1"); cluster.must_transfer_leader(region.get_id(), peer.clone()); - // check local read before prepare flashback + // Check local read before prepare flashback let state = cluster.raft_local_state(region.get_id(), store_id); let last_index = state.get_last_index(); // Make sure the leader transfer procedure timeouts. @@ -167,8 +172,20 @@ fn test_flashback_for_local_read() { let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index); - // prepare for flashback - block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), store_id)); + // Prepare for flashback + block_on(cluster.send_flashback_msg( + region.get_id(), + store_id, + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + new_peer(store_id, store_id), + )); + + // Check the leader does a local read. + let state = cluster.raft_local_state(region.get_id(), store_id); + assert_eq!(state.get_last_index(), last_index + 1); + // Wait for apply_res to set leader lease . + sleep_ms(500); must_error_read_on_peer( &mut cluster, @@ -191,11 +208,20 @@ fn test_flashback_for_local_read() { // Also check read by propose was blocked let state = cluster.raft_local_state(region.get_id(), store_id); - assert_eq!(state.get_last_index(), last_index); + assert_eq!(state.get_last_index(), last_index + 1); + + block_on(cluster.send_flashback_msg( + region.get_id(), + store_id, + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + cluster.get_region_epoch(1), + new_peer(store_id, store_id), + )); - cluster.call_finish_flashback(region.get_id(), store_id); + let state = cluster.raft_local_state(region.get_id(), store_id); + assert_eq!(state.get_last_index(), last_index + 2); - // check local read after finish flashback + // Check local read after finish flashback let state = cluster.raft_local_state(region.get_id(), store_id); let last_index = state.get_last_index(); // Make sure the leader transfer procedure timeouts. @@ -212,11 +238,17 @@ fn test_flashback_for_status_cmd_as_region_detail() { let mut cluster = new_node_cluster(0, 3); cluster.run(); + let leader = cluster.leader_of_region(1).unwrap(); let region = cluster.get_region(b"k1"); - block_on(cluster.call_and_wait_prepare_flashback(region.get_id(), 1)); + block_on(cluster.send_flashback_msg( + region.get_id(), + leader.get_store_id(), + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + new_peer(leader.get_store_id(), leader.get_store_id()), + )); - let leader = cluster.leader_of_region(1).unwrap(); - let region_detail = cluster.region_detail(1, 1); + let region_detail = cluster.region_detail(region.get_id(), leader.get_store_id()); assert!(region_detail.has_region()); let region = region_detail.get_region(); assert_eq!(region.get_id(), 1); @@ -231,6 +263,123 @@ fn test_flashback_for_status_cmd_as_region_detail() { assert_eq!(region_detail.get_leader(), &leader); } +#[test] +fn test_flashback_for_check_is_in_persist() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + let leader_peer = new_peer(2, 2); + cluster.must_transfer_leader(1, leader_peer.clone()); + + let local_state = cluster.region_local_state(1, 2); + assert!(!local_state.get_region().get_is_in_flashback()); + + // Prepare for flashback + block_on(cluster.send_flashback_msg( + 1, + 2, + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + leader_peer.clone(), + )); + let local_state = cluster.region_local_state(1, 2); + assert!(local_state.get_region().get_is_in_flashback()); + + block_on(cluster.send_flashback_msg( + 1, + 2, + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + cluster.get_region_epoch(1), + leader_peer, + )); + let local_state = cluster.region_local_state(1, 2); + assert!(!local_state.get_region().get_is_in_flashback()); +} + +#[test] +fn test_flashback_for_apply_snapshot() { + let mut cluster = new_node_cluster(0, 5); + cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + // Make node3 isolationed + cluster.add_send_filter(IsolationFilterFactory::new(5)); + + let local_state = cluster.region_local_state(1, 1); + assert!(!local_state.get_region().get_is_in_flashback()); + + // Write for cluster + let value = vec![1_u8; 8096]; + multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); + + // Prepare for flashback + block_on(cluster.send_flashback_msg( + 1, + 1, + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + cluster.get_region_epoch(1), + new_peer(1, 1), + )); + let local_state = cluster.region_local_state(1, 1); + assert!(local_state.get_region().get_is_in_flashback()); + + // Add node 3 back. + cluster.clear_send_filters(); + // Wait for snapshot + sleep_ms(500); + + must_transfer_leader(&mut cluster, 1, new_peer(5, 5)); + let local_state = cluster.region_local_state(1, 5); + assert!(local_state.get_region().get_is_in_flashback()); + + block_on(cluster.send_flashback_msg( + 1, + 5, + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + cluster.get_region_epoch(1), + new_peer(5, 5), + )); +} + +fn transfer_leader(cluster: &mut Cluster, region_id: u64, leader: metapb::Peer) { + let epoch = cluster.get_region_epoch(region_id); + let admin_req = new_transfer_leader_cmd(leader); + let mut transfer_leader = new_admin_request(region_id, &epoch, admin_req); + transfer_leader + .mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let resp = cluster + .call_command_on_leader(transfer_leader, Duration::from_secs(5)) + .unwrap(); + assert!(!resp.get_header().has_error()); +} + +fn must_transfer_leader( + cluster: &mut Cluster, + region_id: u64, + leader: metapb::Peer, +) { + let timer = Instant::now(); + loop { + cluster.reset_leader_of_region(region_id); + let cur_leader = cluster.leader_of_region(region_id); + if let Some(ref cur_leader) = cur_leader { + if cur_leader.get_id() == leader.get_id() + && cur_leader.get_store_id() == leader.get_store_id() + { + return; + } + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!( + "failed to transfer leader to [{}] {:?}, current leader: {:?}", + region_id, leader, cur_leader + ); + } + transfer_leader(cluster, region_id, leader.clone()); + } +} + fn multi_do_cmd(cluster: &mut Cluster, cmd: kvproto::raft_cmdpb::Request) { for _ in 0..100 { let mut reqs = vec![]; @@ -246,7 +395,7 @@ fn must_cmd_add_flashback_flag( region: &mut metapb::Region, cmd: kvproto::raft_cmdpb::Request, ) { - // verify the read can be executed if add flashback flag in request's + // Verify the read can be executed if add flashback flag in request's // header. let mut req = new_request( region.get_id(), From 066a4222da1689e94b7cf78ed3f3131166c9a524 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 12 Oct 2022 01:49:50 -0700 Subject: [PATCH 0264/1149] channel: early break when fetch nothing (#13516) close tikv/tikv#13394 Otherwise it will waste CPU on loop. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/tikv_util/src/mpsc/future.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index c38dc8c1492..1e9f94c2f2d 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -213,6 +213,8 @@ where for _ in 1..ctx.max_batch_size { if let Poll::Ready(Some(m)) = ctx.rx.poll_next_unpin(cx) { (ctx.collector)(&mut collector, m); + } else { + break; } } Poll::Ready(Some(collector)) From 2a9888381bdf5384f4b097cd670bb1192496c2b4 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Thu, 13 Oct 2022 12:15:51 +0800 Subject: [PATCH 0265/1149] storage/lock_manager: Avoid stale entries in the new lock waiting queue (#13584) ref tikv/tikv#13298 Avoid stale entries in the new lock waiting queue. This is done by making use of another implementation of the priority queue (instead of the std BinaryHeap) that supports efficiently removing element by key. So that when a lock-waiting request is canceled, the entry can be removed from the queue immediately, instead of waiting for the lazy-cleaning-up. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot Co-authored-by: Yilin Chen --- Cargo.lock | 11 ++ Cargo.toml | 2 + src/storage/lock_manager/lock_wait_context.rs | 159 ++++++++------- .../lock_manager/lock_waiting_queue.rs | 183 +++++++++++++----- src/storage/metrics.rs | 2 +- 5 files changed, 232 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cee27c1494d..bf3536544fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2648,6 +2648,15 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "keyed_priority_queue" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d63b6407b66fc81fc539dccf3ddecb669f393c5101b6a2be3976c95099a06e8" +dependencies = [ + "indexmap", +] + [[package]] name = "keys" version = "0.1.0" @@ -6180,6 +6189,7 @@ dependencies = [ "futures-executor", "futures-timer", "futures-util", + "fxhash", "getset", "grpcio", "grpcio-health", @@ -6190,6 +6200,7 @@ dependencies = [ "hyper-tls", "into_other", "itertools", + "keyed_priority_queue", "keys", "kvproto", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 545ee9380a7..c38b98631c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,7 @@ futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" futures-timer = "3.0" futures-util = { version = "0.3.1", default-features = false, features = ["io", "async-await"] } +fxhash = "0.2.1" getset = "0.1" grpcio = { version = "0.10.3", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } @@ -104,6 +105,7 @@ hyper = { version = "0.14", features = ["full"] } hyper-tls = "0.5" into_other = { path = "components/into_other", default-features = false } itertools = "0.10" +keyed_priority_queue = "0.4" keys = { path = "components/keys", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 97ff49f965b..46ed24fde70 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -14,21 +14,17 @@ //! Note: The corresponding implementation in `WaiterManager` is not yet //! implemented, and this mod is currently not used yet. -use std::{ - convert::TryInto, - result::Result, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, -}; +use std::{convert::TryInto, result::Result, sync::Arc}; use parking_lot::Mutex; -use txn_types::TimeStamp; +use txn_types::Key; use crate::storage::{ errors::SharedError, - lock_manager::{lock_waiting_queue::PessimisticLockKeyCallback, LockManager, LockWaitToken}, + lock_manager::{ + lock_waiting_queue::{LockWaitQueues, PessimisticLockKeyCallback}, + LockManager, LockWaitToken, + }, Error as StorageError, PessimisticLockRes, ProcessResult, StorageCallback, }; @@ -37,10 +33,6 @@ pub struct LockWaitContextInner { /// Usually, requests are accepted from RPC, and in this case calling /// the callback means returning the response to the client via RPC. cb: StorageCallback, - - /// The token of the corresponding waiter in `LockManager`. - #[allow(dead_code)] - lock_wait_token: LockWaitToken, } /// The content of the `LockWaitContext` that needs to be shared among all @@ -54,53 +46,41 @@ pub struct LockWaitContextInner { /// and the request is going to be finished, they need to take the /// [`LockWaitContextInner`] to call the callback. /// * The [`LockWaitEntry`](crate::storage::lock_manager::lock_waiting_queue::LockWaitEntry), for -/// checking whether the request is already finished (cancelled). +/// providing information pub struct LockWaitContextSharedState { ctx_inner: Mutex>, - pub finished: AtomicBool, -} -impl LockWaitContextSharedState { - /// Checks whether the lock-waiting request is already finished. - pub fn is_finished(&self) -> bool { - self.finished.load(Ordering::Acquire) - } + /// The token to identify the waiter. + lock_wait_token: LockWaitToken, + + /// The key on which lock waiting occurs. + key: Key, } #[derive(Clone)] pub struct LockWaitContext { shared_states: Arc, - #[allow(dead_code)] - lock_manager: L, + lock_wait_queues: LockWaitQueues, allow_lock_with_conflict: bool, - - // Fields for logging: - start_ts: TimeStamp, - for_update_ts: TimeStamp, } impl LockWaitContext { pub fn new( - lock_manager: L, + key: Key, + lock_wait_queues: LockWaitQueues, lock_wait_token: LockWaitToken, - start_ts: TimeStamp, - for_update_ts: TimeStamp, cb: StorageCallback, allow_lock_with_conflict: bool, ) -> Self { - let inner = LockWaitContextInner { - cb, - lock_wait_token, - }; + let inner = LockWaitContextInner { cb }; Self { shared_states: Arc::new(LockWaitContextSharedState { ctx_inner: Mutex::new(Some(inner)), - finished: AtomicBool::new(false), + key, + lock_wait_token, }), - lock_manager, + lock_wait_queues, allow_lock_with_conflict, - start_ts, - for_update_ts, } } @@ -128,7 +108,7 @@ impl LockWaitContext { pub fn get_callback_for_blocked_key(&self) -> PessimisticLockKeyCallback { let ctx = self.clone(); Box::new(move |res| { - ctx.finish_request(res); + ctx.finish_request(res, false); }) } @@ -136,29 +116,38 @@ impl LockWaitContext { /// called by /// [`WaiterManager`](crate::server::lock_manager::WaiterManager) due to /// timeout. + /// + /// This function is assumed to be called when the lock-waiting request is + /// queueing but canceled outside, so it includes an operation to actively + /// remove the entry from the lock waiting queue. pub fn get_callback_for_cancellation(&self) -> impl FnOnce(StorageError) { let ctx = self.clone(); move |e| { - ctx.finish_request(Err(e.into())); + ctx.finish_request(Err(e.into()), true); } } - fn finish_request(&self, result: Result) { - let ctx_inner = if let Some(inner) = self.shared_states.ctx_inner.lock().take() { - inner + fn finish_request(&self, result: Result, is_canceling: bool) { + if is_canceling { + let entry = self + .lock_wait_queues + .remove_by_token(&self.shared_states.key, self.shared_states.lock_wait_token); + if entry.is_none() { + // Already popped out from the queue so that it will be woken up normally. Do + // nothing. + return; + } } else { - debug!("double invoking of finish_request of LockWaitContext"; - "start_ts" => self.start_ts, - "for_update_ts" => self.for_update_ts - ); - return; - }; - - self.shared_states.finished.store(true, Ordering::Release); + // TODO: Uncomment this after the corresponding change of + // `LockManager` is done. self.lock_wait_queues. + // get_lock_mgr() .remove_lock_wait(ctx_inner. + // lock_wait_token); + } - // TODO: Uncomment this after the corresponding change of `LockManager` is done. - // self.lock_manager - // .remove_lock_wait(ctx_inner.lock_wait_token); + // When this is executed, the waiter is either woken up from the queue or + // canceled and removed from the queue. There should be no chance to try + // to take the `ctx_inner` more than once. + let ctx_inner = self.shared_states.ctx_inner.lock().take().unwrap(); if !self.allow_lock_with_conflict { // The result must be an owned error. @@ -176,15 +165,17 @@ impl LockWaitContext { #[cfg(test)] mod tests { use std::{ + default::Default, sync::mpsc::{channel, Receiver}, time::Duration, }; use super::*; use crate::storage::{ - lock_manager::DummyLockManager, + lock_manager::{lock_waiting_queue::LockWaitEntry, DummyLockManager}, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error as TxnError, ErrorInner as TxnErrorInner}, + types::PessimisticLockParameters, ErrorInner as StorageErrorInner, Result as StorageResult, }; @@ -197,23 +188,18 @@ mod tests { (cb, rx) } - fn create_test_lock_wait_ctx() -> ( + fn create_test_lock_wait_ctx( + key: &Key, + lock_wait_queues: &LockWaitQueues, + ) -> ( + LockWaitToken, LockWaitContext, Receiver>>, ) { - // TODO: Use `ProxyLockMgr` to check the correctness of the `remove_lock_wait` - // invocation. - let lock_mgr = DummyLockManager {}; let (cb, rx) = create_storage_cb(); - let ctx = LockWaitContext::new( - lock_mgr, - super::super::LockWaitToken(Some(1)), - 1.into(), - 1.into(), - cb, - false, - ); - (ctx, rx) + let token = LockWaitToken(Some(1)); + let ctx = LockWaitContext::new(key.clone(), lock_wait_queues.clone(), token, cb, false); + (token, ctx, rx) } #[test] @@ -236,7 +222,13 @@ mod tests { )))) }; - let (ctx, rx) = create_test_lock_wait_ctx(); + let key = Key::from_raw(b"k"); + + // TODO: Use `ProxyLockMgr` to check the correctness of the `remove_lock_wait` + // invocation. + let lock_wait_queues = LockWaitQueues::new(DummyLockManager {}); + + let (_, ctx, rx) = create_test_lock_wait_ctx(&key, &lock_wait_queues); // Nothing happens currently. (ctx.get_callback_for_first_write_batch()).execute(ProcessResult::Res); rx.recv_timeout(Duration::from_millis(20)).unwrap_err(); @@ -253,8 +245,27 @@ mod tests { // Nothing happens if the callback is double-called. (ctx.get_callback_for_cancellation())(StorageError::from(key_is_locked())); - let (ctx, rx) = create_test_lock_wait_ctx(); + let (token, ctx, rx) = create_test_lock_wait_ctx(&key, &lock_wait_queues); + // Add a corresponding entry to the lock waiting queue to test actively removing + // the entry from the queue. + lock_wait_queues.push_lock_wait( + Box::new(LockWaitEntry { + key: key.clone(), + lock_hash: key.gen_hash(), + parameters: PessimisticLockParameters { + start_ts: 1.into(), + for_update_ts: 1.into(), + ..Default::default() + }, + lock_wait_token: token, + legacy_wake_up_index: None, + key_cb: None, + }), + kvproto::kvrpcpb::LockInfo::default(), + ); + lock_wait_queues.must_have_next_entry(b"k", 1); (ctx.get_callback_for_cancellation())(StorageError::from(key_is_locked())); + lock_wait_queues.must_not_contain_key(b"k"); let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, @@ -262,8 +273,10 @@ mod tests { box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) ))) )); - // Nothing happens if the callback is double-called. - (ctx.get_callback_for_blocked_key())(Err(SharedError::from(write_conflict()))); + // Since the cancellation callback can fully execute only when it's successfully + // removed from the lock waiting queues, it's impossible that `finish_request` + // is called again after that. + // The tx should be dropped. rx.recv().unwrap_err(); } diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index c1f2e800834..3651ce21c1c 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -55,7 +55,6 @@ //! for executing the future in a suitable place. use std::{ - collections::BinaryHeap, future::Future, pin::Pin, result::Result, @@ -68,6 +67,7 @@ use std::{ use dashmap; use futures_util::compat::Future01CompatExt; +use keyed_priority_queue::KeyedPriorityQueue; use kvproto::kvrpcpb; use smallvec::SmallVec; use sync_wrapper::SyncWrapper; @@ -76,7 +76,7 @@ use txn_types::{Key, TimeStamp}; use crate::storage::{ errors::SharedError, - lock_manager::{lock_wait_context::LockWaitContextSharedState, LockManager, LockWaitToken}, + lock_manager::{LockManager, LockWaitToken}, metrics::*, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::Error as TxnError, @@ -92,11 +92,8 @@ pub type PessimisticLockKeyCallback = CallbackWithSharedError, pub parameters: PessimisticLockParameters, pub lock_wait_token: LockWaitToken, - pub req_states: Option>, pub legacy_wake_up_index: Option, pub key_cb: Option>, } @@ -111,7 +108,7 @@ impl Eq for LockWaitEntry {} impl PartialOrd for LockWaitEntry { fn partial_cmp(&self, other: &Self) -> Option { - // Reverse it since the std BinaryHeap is max heap and we want to pop the + // Reverse it since the priority queue is a max heap and we want to pop the // minimal. other .parameters @@ -122,7 +119,7 @@ impl PartialOrd for LockWaitEntry { impl Ord for LockWaitEntry { fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // Reverse it since the std BinaryHeap is max heap and we want to pop the + // Reverse it since the priority queue is a max heap and we want to pop the // minimal. other.parameters.start_ts.cmp(&self.parameters.start_ts) } @@ -185,7 +182,11 @@ pub struct KeyLockWaitState { /// return it from a [`DelayedNotifyAllFuture`]. See /// [`LockWaitQueues::pop_for_waking_up`]. legacy_wake_up_index: usize, - queue: BinaryHeap>, + queue: KeyedPriorityQueue< + LockWaitToken, + Box, + std::hash::BuildHasherDefault, + >, /// The start_ts of the most recent waking up event. last_conflict_start_ts: TimeStamp, @@ -201,7 +202,7 @@ impl KeyLockWaitState { Self { current_lock: kvrpcpb::LockInfo::default(), legacy_wake_up_index: 0, - queue: BinaryHeap::new(), + queue: KeyedPriorityQueue::default(), last_conflict_start_ts: TimeStamp::zero(), last_conflict_commit_ts: TimeStamp::zero(), delayed_notify_all_state: None, @@ -219,7 +220,6 @@ pub struct LockWaitQueueInner { #[derive(Clone)] pub struct LockWaitQueues { inner: Arc, - #[allow(dead_code)] lock_mgr: L, } @@ -256,7 +256,10 @@ impl LockWaitQueues { if lock_wait_entry.legacy_wake_up_index.is_none() { lock_wait_entry.legacy_wake_up_index = Some(key_state.value().legacy_wake_up_index); } - key_state.value_mut().queue.push(lock_wait_entry); + key_state + .value_mut() + .queue + .push(lock_wait_entry.lock_wait_token, lock_wait_entry); let len = key_state.value_mut().queue.len(); drop(key_state); @@ -312,14 +315,9 @@ impl LockWaitQueues { v.last_conflict_start_ts = conflicting_start_ts; v.last_conflict_commit_ts = conflicting_commit_ts; - while let Some(lock_wait_entry) = v.queue.pop() { + if let Some((_, lock_wait_entry)) = v.queue.pop() { removed_waiters += 1; - if lock_wait_entry.req_states.as_ref().unwrap().is_finished() { - // Skip already cancelled entries. - continue; - } - if !lock_wait_entry.parameters.allow_lock_with_conflict { // If a pessimistic lock request in legacy mode is woken up, increase the // counter. @@ -334,7 +332,6 @@ impl LockWaitQueues { } else { result = Some((lock_wait_entry, None)); } - break; } // Remove the queue if it's emptied. @@ -464,13 +461,7 @@ impl LockWaitQueues { let legacy_wake_up_index = v.legacy_wake_up_index; - while let Some(front) = v.queue.peek() { - if front.req_states.as_ref().unwrap().is_finished() { - // Skip already cancelled entries. - v.queue.pop(); - removed_waiters += 1; - continue; - } + while let Some((_, front)) = v.queue.peek() { if front .legacy_wake_up_index .map_or(false, |idx| idx >= legacy_wake_up_index) @@ -479,7 +470,7 @@ impl LockWaitQueues { // delayed_notify_all operation. Keep it and other remaining items in the queue. break; } - let lock_wait_entry = v.queue.pop().unwrap(); + let (_, lock_wait_entry) = v.queue.pop().unwrap(); removed_waiters += 1; if lock_wait_entry.parameters.allow_lock_with_conflict { woken_up_resumable_entry = Some(lock_wait_entry); @@ -523,6 +514,63 @@ impl LockWaitQueues { // Return the item to be woken up in resumable way. woken_up_resumable_entry } + + /// Finds a specific LockWaitEntry by key and token, and removes it from the + /// queue. No extra operation will be performed on the removed entry. + /// The caller is responsible for finishing or cancelling the request to + /// let it return the response to the client. + pub fn remove_by_token( + &self, + key: &Key, + lock_wait_token: LockWaitToken, + ) -> Option> { + let mut result = None; + + // We don't want other threads insert any more entries between finding the + // queue is empty and removing the queue from the map. Wrap the logic + // within a call to `remove_if_mut` to avoid releasing lock during the + // procedure. + let removed_key = self.inner.queue_map.remove_if_mut(key, |_, v| { + if let Some(res) = v.queue.remove(&lock_wait_token) { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.waiters.dec(); + result = Some(res); + } + v.queue.is_empty() + }); + + if removed_key.is_some() { + LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.keys.dec(); + } + + result + } + + #[allow(dead_code)] + pub(super) fn get_lock_mgr(&self) -> &L { + &self.lock_mgr + } + + #[cfg(test)] + pub fn must_not_contain_key(&self, key: &[u8]) { + assert!(self.inner.queue_map.get(&Key::from_raw(key)).is_none()); + } + + #[cfg(test)] + pub fn must_have_next_entry(&self, key: &[u8], start_ts: impl Into) { + assert_eq!( + self.inner + .queue_map + .get(&Key::from_raw(key)) + .unwrap() + .queue + .peek() + .unwrap() + .1 + .parameters + .start_ts, + start_ts.into() + ); + } } #[cfg(test)] @@ -540,6 +588,7 @@ mod tests { }; struct TestLockWaitEntryHandle { + token: LockWaitToken, wake_up_rx: Receiver>, cancel_cb: Box, } @@ -573,7 +622,7 @@ mod tests { // Additionally add some helper functions to the LockWaitQueues for simplifying // test code. impl LockWaitQueues { - fn make_lock_info_pb(&self, key: &[u8], ts: impl Into) -> kvrpcpb::LockInfo { + pub fn make_lock_info_pb(&self, key: &[u8], ts: impl Into) -> kvrpcpb::LockInfo { let ts = ts.into(); let mut lock_info = kvrpcpb::LockInfo::default(); lock_info.set_lock_version(ts.into_inner()); @@ -590,13 +639,12 @@ mod tests { lock_info_pb: kvrpcpb::LockInfo, ) -> (Box, TestLockWaitEntryHandle) { let start_ts = start_ts.into(); - let token = super::super::LockWaitToken(Some(1)); + let token = LockWaitToken(Some(self.allocate_internal_id())); let dummy_request_cb = StorageCallback::PessimisticLock(Box::new(|_| ())); let dummy_ctx = LockWaitContext::new( - self.lock_mgr.clone(), + Key::from_raw(key), + self.clone(), token, - start_ts, - start_ts, dummy_request_cb, false, ); @@ -623,7 +671,6 @@ mod tests { lock_hash, parameters, lock_wait_token: token, - req_states: Some(dummy_ctx.get_shared_states().clone()), legacy_wake_up_index: None, key_cb: Some(SyncWrapper::new(Box::new(move |res| tx.send(res).unwrap()))), }); @@ -638,6 +685,7 @@ mod tests { ( lock_wait_entry, TestLockWaitEntryHandle { + token, wake_up_rx: rx, cancel_cb: Box::new(cancel), }, @@ -730,25 +778,6 @@ mod tests { res } - fn must_not_contain_key(&self, key: &[u8]) { - assert!(self.inner.queue_map.get(&Key::from_raw(key)).is_none()); - } - - fn must_have_next_entry(&self, key: &[u8], start_ts: impl Into) { - assert_eq!( - self.inner - .queue_map - .get(&Key::from_raw(key)) - .unwrap() - .queue - .peek() - .unwrap() - .parameters - .start_ts, - start_ts.into() - ); - } - fn get_delayed_notify_id(&self, key: &[u8]) -> Option { self.inner .queue_map @@ -758,6 +787,13 @@ mod tests { .as_ref() .map(|(id, ..)| *id) } + + fn get_queue_length_of_key(&self, key: &[u8]) -> usize { + self.inner + .queue_map + .get(&Key::from_raw(key)) + .map_or(0, |v| v.queue.len()) + } } impl LockWaitEntry { @@ -836,6 +872,47 @@ mod tests { queues.must_not_contain_key(b"k1"); } + #[test] + fn test_removing_by_token() { + let queues = LockWaitQueues::new(DummyLockManager {}); + + queues.mock_lock_wait(b"k1", 10, 5, false); + let token11 = queues.mock_lock_wait(b"k1", 11, 5, false).token; + queues.mock_lock_wait(b"k1", 12, 5, false); + let token13 = queues.mock_lock_wait(b"k1", 13, 5, false).token; + queues.mock_lock_wait(b"k1", 14, 5, false); + assert_eq!(queues.get_queue_length_of_key(b"k1"), 5); + + queues + .remove_by_token(&Key::from_raw(b"k1"), token11) + .unwrap() + .check_key(b"k1") + .check_start_ts(11); + queues + .remove_by_token(&Key::from_raw(b"k1"), token13) + .unwrap() + .check_key(b"k1") + .check_start_ts(13); + assert_eq!(queues.get_queue_length_of_key(b"k1"), 3); + + // Removing not-existing entry takes no effect. + assert!( + queues + .remove_by_token(&Key::from_raw(b"k1"), token11) + .is_none() + ); + assert!( + queues + .remove_by_token(&Key::from_raw(b"k2"), token11) + .is_none() + ); + assert_eq!(queues.get_queue_length_of_key(b"k1"), 3); + + queues.must_pop(b"k1", 5, 6).check_start_ts(10); + queues.must_pop(b"k1", 5, 6).check_start_ts(12); + queues.must_pop(b"k1", 5, 6).check_start_ts(14); + } + #[test] fn test_dropping_cancelled_entries() { let queues = LockWaitQueues::new(DummyLockManager {}); @@ -846,10 +923,14 @@ mod tests { let h13 = queues.mock_lock_wait(b"k1", 13, 5, false); queues.mock_lock_wait(b"k1", 14, 5, false); + assert_eq!(queues.get_queue_length_of_key(b"k1"), 5); + h10.cancel(); h11.cancel(); h13.cancel(); + assert_eq!(queues.get_queue_length_of_key(b"k1"), 2); + for &expected_start_ts in &[12u64, 14] { queues .must_pop(b"k1", 5, 6) diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index b74c5b7d51f..2bbe4b7b762 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -595,7 +595,7 @@ lazy_static! { pub static ref LOCK_WAIT_QUEUE_LENGTH_HISTOGRAM: Histogram = register_histogram!( "tikv_lock_wait_queue_length", - "Statistics of length of queues counted when enqueueing", + "Statistics of length of queues counted when enqueueing", exponential_buckets(1.0, 2.0, 16).unwrap() ) .unwrap(); From efc84fc2e346fffef3f21d5e15696e217068abcd Mon Sep 17 00:00:00 2001 From: lizhenhuan <1916038084@qq.com> Date: Thu, 13 Oct 2022 13:19:50 +0800 Subject: [PATCH 0266/1149] Json contains push tikv (#13469) close tikv/tikv#13468 Signed-off-by: lizhenhuan <1916038084@qq.com> Signed-off-by: 3pointer Signed-off-by: Leavrth Signed-off-by: CalvinNeo Co-authored-by: 3pointer Co-authored-by: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Co-authored-by: Calvin Neo Co-authored-by: Yilin Chen Co-authored-by: Ti Chi Robot --- .../src/codec/mysql/json/binary.rs | 4 +- .../src/codec/mysql/json/json_contains.rs | 106 +++++ .../src/codec/mysql/json/mod.rs | 1 + components/tidb_query_expr/src/impl_json.rs | 383 ++++++++++++++++++ components/tidb_query_expr/src/lib.rs | 1 + 5 files changed, 493 insertions(+), 2 deletions(-) create mode 100644 components/tidb_query_datatype/src/codec/mysql/json/json_contains.rs diff --git a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs index 12f8fbd5129..daeae751fb5 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs @@ -17,7 +17,7 @@ impl<'a> JsonRef<'a> { /// Return the `i`th key in current Object json /// - /// See `arrayGetElem()` in TiDB `json/binary.go` + /// See `objectGetKey()` in TiDB `types/json_binary.go` pub fn object_get_key(&self, i: usize) -> &'a [u8] { let key_off_start = HEADER_LEN + i * KEY_ENTRY_LEN; let key_off = NumberCodec::decode_u32_le(&self.value()[key_off_start..]) as usize; @@ -28,7 +28,7 @@ impl<'a> JsonRef<'a> { /// Returns the JsonRef of `i`th value in current Object json /// - /// See `arrayGetElem()` in TiDB `json/binary.go` + /// See `objectGetVal()` in TiDB `types/json_binary.go` pub fn object_get_val(&self, i: usize) -> Result> { let ele_count = self.get_elem_count(); let val_entry_off = HEADER_LEN + ele_count * KEY_ENTRY_LEN + i * VALUE_ENTRY_LEN; diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_contains.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_contains.rs new file mode 100644 index 00000000000..46de1af9e0b --- /dev/null +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_contains.rs @@ -0,0 +1,106 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::cmp::Ordering; + +use super::{super::Result, JsonRef, JsonType}; + +impl<'a> JsonRef<'a> { + /// `json_contains` is the implementation for JSON_CONTAINS in mysql + /// + /// See `ContainsBinaryJSON()` in TiDB `types/json_binary_functions.go` + pub fn json_contains(&self, target: JsonRef<'_>) -> Result { + match self.type_code { + JsonType::Object => { + if target.type_code == JsonType::Object { + let elem_count = target.get_elem_count(); + for i in 0..elem_count { + let key = target.object_get_key(i); + let val = target.object_get_val(i)?; + let idx = self.object_search_key(key); + match idx { + None => { + return Ok(false); + } + Some(idx) => { + let exp = self.object_get_val(idx)?; + if !(exp.json_contains(val)?) { + return Ok(false); + } + } + } + } + return Ok(true); + } + } + JsonType::Array => { + if target.type_code == JsonType::Array { + let elem_count = target.get_elem_count(); + for i in 0..elem_count { + if !(self.json_contains(target.array_get_elem(i)?)?) { + return Ok(false); + } + } + return Ok(true); + } + let elem_count = self.get_elem_count(); + for i in 0..elem_count { + if self.array_get_elem(i)?.json_contains(target)? { + return Ok(true); + } + } + } + _ => { + return match self.partial_cmp(&target).unwrap() { + Ordering::Equal => Ok(true), + _ => Ok(false), + }; + } + }; + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use super::super::Json; + #[test] + fn test_json_contains() { + let mut test_cases = vec![ + (r#"{"a":{"a":1},"b":2}"#, r#"{"b":2}"#, true), + (r#"{}"#, r#"{}"#, true), + (r#"{"a":1}"#, r#"{}"#, true), + (r#"{"a":1}"#, r#"1"#, false), + (r#"{"a":[1]}"#, r#"[1]"#, false), + (r#"{"b":2, "c":3}"#, r#"{"c":3}"#, true), + (r#"1"#, r#"1"#, true), + (r#"[1]"#, r#"1"#, true), + (r#"[1,2]"#, r#"[1]"#, true), + (r#"[1,2]"#, r#"[1,3]"#, false), + (r#"[1,2]"#, r#"["1"]"#, false), + (r#"[1,2,[1,3]]"#, r#"[1,3]"#, true), + (r#"[1,2,[1,[5,[3]]]]"#, r#"[1,3]"#, true), + (r#"[1,2,[1,[5,{"a":[2,3]}]]]"#, r#"[1,{"a":[3]}]"#, true), + (r#"[{"a":1}]"#, r#"{"a":1}"#, true), + (r#"[{"a":1,"b":2}]"#, r#"{"a":1}"#, true), + (r#"[{"a":{"a":1},"b":2}]"#, r#"{"a":1}"#, false), + (r#"{"a":{"a":1},"b":2}"#, r#"{"b":3}"#, false), + (r#"[1,2,[1,[5,{"a":[2,3]}]]]"#, r#"[1,{"a":[3]}]"#, true), + (r#"[1,2,[1,[5,{"a":[2,3]}]]]"#, r#"[10,{"a":[3]}]"#, false), + ]; + for (i, (js, value, expected)) in test_cases.drain(..).enumerate() { + let j = js.parse(); + assert!(j.is_ok(), "#{} expect parse ok but got {:?}", i, j); + let j: Json = j.unwrap(); + let value = value.parse(); + assert!(value.is_ok(), "#{} expect parse ok but got {:?}", i, j); + let value: Json = value.unwrap(); + + let got = j.as_ref().json_contains(value.as_ref()).unwrap(); + assert_eq!( + got, expected, + "#{} expect {:?}, but got {:?}", + i, expected, got + ); + } + } +} diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index f21f789c0d0..0cd382f6d65 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -65,6 +65,7 @@ mod modifier; mod path_expr; mod serde; // json functions +mod json_contains; mod json_depth; mod json_extract; mod json_keys; diff --git a/components/tidb_query_expr/src/impl_json.rs b/components/tidb_query_expr/src/impl_json.rs index 60f784dc604..1926cc648e0 100644 --- a/components/tidb_query_expr/src/impl_json.rs +++ b/components/tidb_query_expr/src/impl_json.rs @@ -290,6 +290,53 @@ fn json_length(args: &[ScalarValueRef]) -> Result> { }) } +// Args should be like `(Option , Option, +// &[Option])`. or `(Option , Option)` +fn json_contains_validator(expr: &tipb::Expr) -> Result<()> { + assert!(expr.get_children().len() == 2 || expr.get_children().len() == 3); + let children = expr.get_children(); + super::function::validate_expr_return_type(&children[0], EvalType::Json)?; + super::function::validate_expr_return_type(&children[1], EvalType::Json)?; + if expr.get_children().len() == 3 { + super::function::validate_expr_return_type(&children[2], EvalType::Bytes)?; + } + Ok(()) +} + +#[rpn_fn(nullable, raw_varg,min_args= 2, max_args = 3, extra_validator = json_contains_validator)] +#[inline] +fn json_contains(args: &[ScalarValueRef]) -> Result> { + assert!(args.len() == 2 || args.len() == 3); + let j: Option = args[0].as_json(); + let mut j = match j { + None => return Ok(None), + Some(j) => j.to_owned(), + }; + let target: Option = args[1].as_json(); + let target = match target { + None => return Ok(None), + Some(target) => target, + }; + + if args.len() == 3 { + match parse_json_path_list(&args[2..])? { + Some(path_expr_list) => { + if path_expr_list.len() == 1 && path_expr_list[0].contains_any_asterisk() { + return Ok(None); + } + match j.as_ref().extract(&path_expr_list)? { + Some(json) => { + j = json; + } + _ => return Ok(None), + } + } + None => return Ok(None), + }; + } + Ok(Some(j.as_ref().json_contains(target)? as i64)) +} + #[rpn_fn(nullable, raw_varg, min_args = 2, extra_validator = json_with_paths_validator)] #[inline] fn json_remove(args: &[ScalarValueRef]) -> Result> { @@ -779,6 +826,342 @@ mod tests { } } + #[test] + fn test_json_contains() { + let cases: Vec<(Vec, Option)> = vec![ + ( + vec![ + Some(Json::from_str(r#"{"a":{"a":1},"b":2}"#).unwrap()).into(), + Some(Json::from_str(r#"2"#).unwrap()).into(), + Some(b"$.b".to_vec()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"{"a":{"a":1},"b":2}"#).unwrap()).into(), + Some(Json::from_str(r#"3"#).unwrap()).into(), + Some(b"$.b".to_vec()).into(), + ], + Some(0), + ), + ( + vec![ + Some(Json::from_str(r#"{"a":{"a":1},"b":2}"#).unwrap()).into(), + Some(Json::from_str(r#"{"b":3}"#).unwrap()).into(), + ], + Some(0), + ), + ( + vec![ + Some(Json::from_str(r#"{"a":{"a":1},"b":2}"#).unwrap()).into(), + Some(Json::from_str(r#"{"b":2}"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"{"a":{"a":1},"b":2}"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + Some(b"$.a".to_vec()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[{"optUid": 10, "value": "admin"}]"#).unwrap()).into(), + Some(Json::from_str(r#"10"#).unwrap()).into(), + Some(b"$[0].optUid".to_vec()).into(), + ], + Some(1), + ), + // copy from tidb Tests None arguments + (vec![None::.into(), None::.into()], None), + ( + vec![ + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + None::.into(), + ], + None, + ), + ( + vec![ + None::.into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + ], + None, + ), + ( + vec![ + None::.into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$.c".to_vec()).into(), + ], + None, + ), + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + None::.into(), + Some(b"$.a[3]".to_vec()).into(), + ], + None, + ), + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + None::.into(), + ], + None, + ), + // Tests with path expression + ( + vec![ + Some(Json::from_str(r#"[1,2,[1,[5,[3]]]]"#).unwrap()).into(), + Some(Json::from_str(r#"[1,3]"#).unwrap()).into(), + Some(b"$[2]".to_vec()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[1,2,[1,[5,{"a":[2,3]}]]]"#).unwrap()).into(), + Some(Json::from_str(r#"[1,{"a":[3]}]"#).unwrap()).into(), + Some(b"$[2]".to_vec()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[{"a":1}]"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + Some(b"$".to_vec()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[{"a":1,"b":2}]"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1,"b":2}"#).unwrap()).into(), + Some(b"$".to_vec()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[{"a":{"a":1},"b":2}]"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + Some(b"$.a".to_vec()).into(), + ], + None, + ), + // Tests without path expression + // {[]interface{}{`{}`, `{}`}, 1, nil}, + // {[]interface{}{`{"a":1}`, `{}`}, 1, nil}, + // {[]interface{}{`{"a":1}`, `1`}, 0, nil}, + ( + vec![ + Some(Json::from_str(r#"{}"#).unwrap()).into(), + Some(Json::from_str(r#"{}"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + Some(Json::from_str(r#"{}"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + ], + Some(0), + ), + // {[]interface{}{`{"a":[1]}`, `[1]`}, 0, nil}, + // {[]interface{}{`{"b":2, "c":3}`, `{"c":3}`}, 1, nil}, + // {[]interface{}{`1`, `1`}, 1, nil}, + // {[]interface{}{`[1]`, `1`}, 1, nil}, + ( + vec![ + Some(Json::from_str(r#"{"a":[1]}"#).unwrap()).into(), + Some(Json::from_str(r#"[1]"#).unwrap()).into(), + ], + Some(0), + ), + ( + vec![ + Some(Json::from_str(r#"{"b":2, "c":3}"#).unwrap()).into(), + Some(Json::from_str(r#"{"c":3}"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[1]"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + ], + Some(1), + ), + // {[]interface{}{`[1,2]`, `[1]`}, 1, nil}, + // {[]interface{}{`[1,2]`, `[1,3]`}, 0, nil}, + // {[]interface{}{`[1,2]`, `["1"]`}, 0, nil}, + // {[]interface{}{`[1,2,[1,3]]`, `[1,3]`}, 1, nil}, + ( + vec![ + Some(Json::from_str(r#"[1,2]"#).unwrap()).into(), + Some(Json::from_str(r#"[1]"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[1,2]"#).unwrap()).into(), + Some(Json::from_str(r#"[1,3]"#).unwrap()).into(), + ], + Some(0), + ), + ( + vec![ + Some(Json::from_str(r#"[1,2]"#).unwrap()).into(), + Some(Json::from_str(r#"["1"]"#).unwrap()).into(), + ], + Some(0), + ), + ( + vec![ + Some(Json::from_str(r#"[1,2,[1,3]]"#).unwrap()).into(), + Some(Json::from_str(r#"[1,3]"#).unwrap()).into(), + ], + Some(1), + ), + // {[]interface{}{`[1,2,[1,3]]`, `[1, 3]`}, 1, nil}, + // {[]interface{}{`[1,2,[1,[5,[3]]]]`, `[1,3]`}, 1, nil}, + // {[]interface{}{`[1,2,[1,[5,{"a":[2,3]}]]]`, `[1,{"a":[3]}]`}, 1, nil}, + ( + vec![ + Some(Json::from_str(r#"[1,2,[1,3]]"#).unwrap()).into(), + Some(Json::from_str(r#"[1, 3]"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[1,2,[1,[5,[3]]]]"#).unwrap()).into(), + Some(Json::from_str(r#"[1,3]"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[1,2,[1,[5,{"a":[2,3]}]]]"#).unwrap()).into(), + Some(Json::from_str(r#"[1,{"a":[3]}]"#).unwrap()).into(), + ], + Some(1), + ), + // {[]interface{}{`[{"a":1}]`, `{"a":1}`}, 1, nil}, + // {[]interface{}{`[{"a":1,"b":2}]`, `{"a":1}`}, 1, nil}, + // {[]interface{}{`[{"a":{"a":1},"b":2}]`, `{"a":1}`}, 0, nil}, + ( + vec![ + Some(Json::from_str(r#"[{"a":1}]"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[{"a":1,"b":2}]"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + ], + Some(1), + ), + ( + vec![ + Some(Json::from_str(r#"[{"a":{"a":1},"b":2}]"#).unwrap()).into(), + Some(Json::from_str(r#"{"a":1}"#).unwrap()).into(), + ], + Some(0), + ), + // Tests path expression contains any asterisk + // {[]interface{}{`{"a": [1, 2, {"aa": "xx"}]}`, `1`, "$.*"}, nil, + // json.ErrInvalidJSONPathWildcard}, {[]interface{}{`{"a": [1, 2, {"aa": + // "xx"}]}`, `1`, "$[*]"}, nil, json.ErrInvalidJSONPathWildcard}, + // {[]interface{}{`{"a": [1, 2, {"aa": "xx"}]}`, `1`, "$**.a"}, nil, + // json.ErrInvalidJSONPathWildcard}, + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$.*".to_vec()).into(), + ], + None, + ), + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$[*]".to_vec()).into(), + ], + None, + ), + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$**.a".to_vec()).into(), + ], + None, + ), + // Tests path expression does not identify a section of the target document + // {[]interface{}{`{"a": [1, 2, {"aa": "xx"}]}`, `1`, "$.c"}, nil, nil}, + // {[]interface{}{`{"a": [1, 2, {"aa": "xx"}]}`, `1`, "$.a[3]"}, nil, nil}, + // {[]interface{}{`{"a": [1, 2, {"aa": "xx"}]}`, `1`, "$.a[2].b"}, nil, nil}, + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$.c".to_vec()).into(), + ], + None, + ), + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$.a[3]".to_vec()).into(), + ], + None, + ), + ( + vec![ + Some(Json::from_str(r#"{"a": [1, 2, {"aa": "xx"}]}"#).unwrap()).into(), + Some(Json::from_str(r#"1"#).unwrap()).into(), + Some(b"$.a[2].b".to_vec()).into(), + ], + None, + ), + ]; + + for (vargs, expected) in cases { + let output = RpnFnScalarEvaluator::new() + .push_params(vargs.clone()) + .evaluate(ScalarFuncSig::JsonContainsSig) + .unwrap(); + assert_eq!(output, expected, "{:?}", vargs); + } + } + #[test] fn test_json_keys() { let cases: Vec<(Vec, Option, bool)> = vec![ diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index ab6e788ae2e..8bb1cc05480 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -590,6 +590,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::JsonUnquoteSig => json_unquote_fn_meta(), ScalarFuncSig::JsonExtractSig => json_extract_fn_meta(), ScalarFuncSig::JsonLengthSig => json_length_fn_meta(), + ScalarFuncSig::JsonContainsSig => json_contains_fn_meta(), ScalarFuncSig::JsonRemoveSig => json_remove_fn_meta(), ScalarFuncSig::JsonKeysSig => json_keys_fn_meta(), ScalarFuncSig::JsonKeys2ArgsSig => json_keys_fn_meta(), From 11a340c3ec54fdbe1582a4b6eb669f43aa924ba2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 13 Oct 2022 14:43:51 +0800 Subject: [PATCH 0267/1149] local_reader: solve the race condition when acquiring the snapshot (#13568) close tikv/tikv#13553 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/query/local.rs | 24 +- .../raftstore-v2/src/operation/query/mod.rs | 4 +- components/raftstore/src/store/peer.rs | 14 +- components/raftstore/src/store/worker/read.rs | 530 ++++++++++++------ tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_local_read.rs | 81 +++ 6 files changed, 453 insertions(+), 201 deletions(-) create mode 100644 tests/failpoints/cases/test_local_read.rs diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index bdf829dc4f5..78cc9976dab 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -16,7 +16,7 @@ use kvproto::{ use raftstore::{ errors::RAFTSTORE_IS_BUSY, store::{ - cmd_resp, util::LeaseState, LocalReaderCore, ReadDelegate, ReadExecutor, + cmd_resp, util::LeaseState, LocalReadContext, LocalReaderCore, ReadDelegate, ReadExecutor, ReadExecutorProvider, RegionSnapshot, RequestInspector, RequestPolicy, TLS_LOCAL_READ_METRICS, }, @@ -110,10 +110,7 @@ where Ok(Some((mut delegate, policy))) => match policy { RequestPolicy::ReadLocal => { let region = Arc::clone(&delegate.region); - let snap = RegionSnapshot::from_snapshot( - delegate.get_snapshot(None, &mut None), - region, - ); + let snap = RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); let snapshot_ts = monotonic_raw_now(); @@ -133,10 +130,7 @@ where delegate.check_stale_read_safe(read_ts)?; let region = Arc::clone(&delegate.region); - let snap = RegionSnapshot::from_snapshot( - delegate.get_snapshot(None, &mut None), - region, - ); + let snap = RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); @@ -295,11 +289,7 @@ where self.cached_tablet.latest().unwrap() } - fn get_snapshot( - &mut self, - _: Option, - _: &mut Option>, - ) -> Arc { + fn get_snapshot(&mut self, _: &Option>) -> Arc { Arc::new(self.cached_tablet.latest().unwrap().snapshot()) } } @@ -381,7 +371,7 @@ impl<'r> SnapRequestInspector<'r> { return Ok(RequestPolicy::ReadIndex); } - // If applied index's term is differ from current raft's term, leader transfer + // If applied index's term differs from current raft's term, leader transfer // must happened, if read locally, we may read old value. if !self.has_applied_to_current_term() { return Ok(RequestPolicy::ReadIndex); @@ -769,7 +759,7 @@ mod tests { let mut delegate = delegate.unwrap(); let tablet = delegate.get_tablet(); assert_eq!(tablet1.as_inner().path(), tablet.as_inner().path()); - let snapshot = delegate.get_snapshot(None, &mut None); + let snapshot = delegate.get_snapshot(&None); assert_eq!( b"val1".to_vec(), *snapshot.get_value(b"a1").unwrap().unwrap() @@ -779,7 +769,7 @@ mod tests { let mut delegate = delegate.unwrap(); let tablet = delegate.get_tablet(); assert_eq!(tablet2.as_inner().path(), tablet.as_inner().path()); - let snapshot = delegate.get_snapshot(None, &mut None); + let snapshot = delegate.get_snapshot(&None); assert_eq!( b"val2".to_vec(), *snapshot.get_value(b"a2").unwrap().unwrap() diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index b592b4819a5..0b10e0679a5 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -56,8 +56,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> return Ok(RequestPolicy::ReadIndex); } - // If applied index's term is differ from current raft's term, leader transfer - // must happened, if read locally, we may read old value. + // If applied index's term differs from current raft's term, leader + // transfer must happened, if read locally, we may read old value. // TODO: to add the block back when apply is implemented. // if !self.fsm.peer().has_applied_to_current_term() { // return Ok(RequestPolicy::ReadIndex); diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index c95dda17c2c..37c2fd5a99a 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -60,7 +60,7 @@ use tikv_util::{ codec::number::decode_u64, debug, error, info, sys::disk::DiskUsage, - time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, InstantExt, ThreadReadId}, + time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, InstantExt}, warn, worker::Scheduler, Either, @@ -4766,7 +4766,7 @@ where } } - let mut resp = reader.execute(&req, &Arc::new(region), read_index, None, None); + let mut resp = reader.execute(&req, &Arc::new(region), read_index, None); if let Some(snap) = resp.snapshot.as_mut() { snap.txn_ext = Some(self.txn_ext.clone()); snap.bucket_meta = self.region_buckets.as_ref().map(|b| b.meta.clone()); @@ -5558,8 +5558,8 @@ pub trait RequestInspector { return Ok(RequestPolicy::ReadIndex); } - // If applied index's term is differ from current raft's term, leader transfer - // must happened, if read locally, we may read old value. + // If applied index's term differs from current raft's term, leader + // transfer must happened, if read locally, we may read old value. if !self.has_applied_to_current_term() { return Ok(RequestPolicy::ReadIndex); } @@ -5616,11 +5616,7 @@ where &self.engines.kv } - fn get_snapshot( - &mut self, - _: Option, - _: &mut Option>, - ) -> Arc { + fn get_snapshot(&mut self, _: &Option>) -> Arc { Arc::new(self.engines.kv.snapshot()) } } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5801083f1bc..1f6d7c4bab7 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -6,7 +6,7 @@ use std::{ fmt::{self, Display, Formatter}, ops::Deref, sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{self, AtomicU64, Ordering}, Arc, Mutex, }, }; @@ -47,22 +47,31 @@ pub trait ReadExecutor { type Tablet: KvEngine; fn get_tablet(&mut self) -> &Self::Tablet; + + /// Get the snapshot fo the tablet. + /// + /// If the tablet is not ready, `None` is returned. + /// Currently, only multi-rocksdb version may return `None`. fn get_snapshot( &mut self, - ts: Option, - read_context: &mut Option>, + read_context: &Option>, ) -> Arc<::Snapshot>; - fn get_value(&mut self, req: &Request, region: &metapb::Region) -> Result { + fn get_value( + &mut self, + req: &Request, + region: &metapb::Region, + read_context: &Option>, + ) -> Result { let key = req.get_get().get_key(); // region key range has no data prefix, so we must use origin key to check. util::check_key_in_region(key, region)?; - let engine = self.get_tablet(); let mut resp = Response::default(); + let snapshot = self.get_snapshot(read_context); let res = if !req.get_get().get_cf().is_empty() { let cf = req.get_get().get_cf(); - engine + snapshot .get_value_cf(cf, &keys::data_key(key)) .unwrap_or_else(|e| { panic!( @@ -74,14 +83,16 @@ pub trait ReadExecutor { ) }) } else { - engine.get_value(&keys::data_key(key)).unwrap_or_else(|e| { - panic!( - "[region {}] failed to get {}: {:?}", - region.get_id(), - log_wrappers::Value::key(key), - e - ) - }) + snapshot + .get_value(&keys::data_key(key)) + .unwrap_or_else(|e| { + panic!( + "[region {}] failed to get {}: {:?}", + region.get_id(), + log_wrappers::Value::key(key), + e + ) + }) }; if let Some(res) = res { resp.mut_get().set_value(res.to_vec()); @@ -95,8 +106,7 @@ pub trait ReadExecutor { msg: &RaftCmdRequest, region: &Arc, read_index: Option, - mut ts: Option, - mut read_context: Option>, + local_read_ctx: Option>, ) -> ReadResponse<::Snapshot> { let requests = msg.get_requests(); let mut response = ReadResponse { @@ -108,7 +118,7 @@ pub trait ReadExecutor { for req in requests { let cmd_type = req.get_cmd_type(); let mut resp = match cmd_type { - CmdType::Get => match self.get_value(req, region.as_ref()) { + CmdType::Get => match self.get_value(req, region.as_ref(), &local_read_ctx) { Ok(resp) => resp, Err(e) => { error!(?e; @@ -121,7 +131,7 @@ pub trait ReadExecutor { }, CmdType::Snap => { let snapshot = RegionSnapshot::from_snapshot( - self.get_snapshot(ts.take(), &mut read_context), + self.get_snapshot(&local_read_ctx), region.clone(), ); response.snapshot = Some(snapshot); @@ -187,13 +197,61 @@ where } } -/// #[RaftstoreCommon]: LocalReadContext combines some LocalReader's fields for temporary usage. pub struct LocalReadContext<'a, E> where E: KvEngine, { - read_id: &'a mut ThreadReadId, - snap_cache: &'a mut Box>>, + read_id: Option, + snap_cache: &'a mut SnapCache, +} + +impl<'a, E> LocalReadContext<'a, E> +where + E: KvEngine, +{ + fn new(snap_cache: &'a mut SnapCache, read_id: Option) -> Self { + Self { + snap_cache, + read_id, + } + } + + /// Update the snapshot in the `snap_cache` if the read_id is None or does + /// not match. + fn maybe_update_snapshot(&mut self, engine: &E, delegate_last_valid_ts: Timespec) -> bool { + // When the read_id is None, it means the `snap_cache` has been cleared + // before and the `cached_read_id` of it is None because only a consecutive + // requests will have the same cache and the cache will be cleared after the + // last request of the batch. + if self.read_id.is_some() { + if self.snap_cache.cached_read_id == self.read_id + && self.read_id.as_ref().unwrap().create_time >= delegate_last_valid_ts + { + // Cache hit + return false; + } + + self.snap_cache.cached_read_id = self.read_id.clone(); + } + + self.snap_cache.snapshot = Some(Arc::new(engine.snapshot())); + + // Ensures the snapshot is acquired before getting the time + atomic::fence(atomic::Ordering::Release); + self.snap_cache.cached_snapshot_ts = monotonic_raw_now(); + + true + } + + // Note: must be called after `maybe_update_snapshot` + fn snapshot_ts(&self) -> Timespec { + self.snap_cache.cached_snapshot_ts + } + + // Note: must be called after `maybe_update_snapshot` + fn snapshot(&self) -> Option> { + self.snap_cache.snapshot.clone() + } } impl Drop for ReadDelegate { @@ -460,6 +518,7 @@ impl ReadDelegate { let term = lease.term(); if term == self.term { if lease.inspect(Some(ts)) == LeaseState::Valid { + fail_point!("after_pass_lease_check"); return true; } else { TLS_LOCAL_READ_METRICS @@ -566,6 +625,46 @@ impl Progress { } } +struct SnapCache +where + E: KvEngine, +{ + cached_read_id: Option, + snapshot: Option>, + cached_snapshot_ts: Timespec, +} + +impl SnapCache +where + E: KvEngine, +{ + fn new() -> Self { + SnapCache { + cached_read_id: None, + snapshot: None, + cached_snapshot_ts: Timespec::new(0, 0), + } + } + + fn clear(&mut self) { + self.cached_read_id.take(); + self.snapshot.take(); + } +} + +impl Clone for SnapCache +where + E: KvEngine, +{ + fn clone(&self) -> Self { + Self { + cached_read_id: self.cached_read_id.clone(), + snapshot: self.snapshot.clone(), + cached_snapshot_ts: self.cached_snapshot_ts, + } + } +} + /// #[RaftstoreCommon]: LocalReader is an entry point where local read requests are dipatch to the /// relevant regions by LocalReader so that these requests can be handled by the /// relevant ReadDelegate respectively. @@ -705,10 +804,7 @@ where { local_reader: LocalReaderCore, StoreMetaDelegate>, kv_engine: E, - - snap_cache: Box>>, - cache_read_id: ThreadReadId, - + snap_cache: SnapCache, // A channel to raftstore. router: C, } @@ -719,23 +815,14 @@ where C: ProposalRouter + CasualRouter, { pub fn new(kv_engine: E, store_meta: StoreMetaDelegate, router: C) -> Self { - let cache_read_id = ThreadReadId::new(); Self { local_reader: LocalReaderCore::new(store_meta), kv_engine, - snap_cache: Box::new(None), - cache_read_id, + snap_cache: SnapCache::new(), router, } } - fn local_read_context(&mut self) -> LocalReadContext<'_, E> { - LocalReadContext { - snap_cache: &mut self.snap_cache, - read_id: &mut self.cache_read_id, - } - } - pub fn pre_propose_raft_command( &mut self, req: &RaftCmdRequest, @@ -790,37 +877,34 @@ where pub fn propose_raft_command( &mut self, - mut read_id: Option, + read_id: Option, req: RaftCmdRequest, cb: Callback, ) { match self.pre_propose_raft_command(&req) { Ok(Some((mut delegate, policy))) => { + let snap_updated; + let last_valid_ts = delegate.last_valid_ts; let mut response = match policy { // Leader can read local if and only if it is in lease. RequestPolicy::ReadLocal => { - let snapshot_ts = match read_id.as_mut() { - // If this peer became Leader not long ago and just after the cached - // snapshot was created, this snapshot can not see all data of the peer. - Some(id) => { - if id.create_time <= delegate.last_valid_ts { - id.create_time = monotonic_raw_now(); - } - id.create_time - } - None => monotonic_raw_now(), - }; + let mut local_read_ctx = + LocalReadContext::new(&mut self.snap_cache, read_id); + + snap_updated = local_read_ctx + .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + + let snapshot_ts = local_read_ctx.snapshot_ts(); if !delegate.is_in_leader_lease(snapshot_ts) { + fail_point!("localreader_before_redirect", |_| {}); // Forward to raftstore. self.redirect(RaftCommand::new(req, cb)); return; } - let read_ctx = self.local_read_context(); - let region = Arc::clone(&delegate.region); - let response = - delegate.execute(&req, ®ion, None, read_id, Some(read_ctx)); + let response = delegate.execute(&req, ®ion, None, Some(local_read_ctx)); + // Try renew lease in advance delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); response @@ -837,12 +921,14 @@ where return; } - let read_ctx = self.local_read_context(); + let mut local_read_ctx = + LocalReadContext::new(&mut self.snap_cache, read_id); + snap_updated = local_read_ctx + .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); let region = Arc::clone(&delegate.region); // Getting the snapshot - let response = - delegate.execute(&req, ®ion, None, read_id, Some(read_ctx)); + let response = delegate.execute(&req, ®ion, None, Some(local_read_ctx)); // Double check in case `safe_ts` change after the first check and before // getting snapshot @@ -860,6 +946,13 @@ where } _ => unreachable!(), }; + + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); + if !snap_updated { + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_snapshot_cache_hit.inc()); + } + cmd_resp::bind_term(&mut response.response, delegate.term); if let Some(snap) = response.snapshot.as_mut() { snap.txn_ext = Some(delegate.txn_ext.clone()); @@ -906,7 +999,7 @@ where } pub fn release_snapshot_cache(&mut self) { - self.snap_cache.as_mut().take(); + self.snap_cache.clear(); } } @@ -920,7 +1013,6 @@ where local_reader: self.local_reader.clone(), kv_engine: self.kv_engine.clone(), snap_cache: self.snap_cache.clone(), - cache_read_id: self.cache_read_id.clone(), router: self.router.clone(), } } @@ -936,27 +1028,8 @@ where &self.kv_engine } - fn get_snapshot( - &mut self, - create_time: Option, - read_context: &mut Option>, - ) -> Arc { - let ctx = read_context.as_mut().unwrap(); - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); - if let Some(ts) = create_time { - if ts == *ctx.read_id { - if let Some(snap) = ctx.snap_cache.as_ref().as_ref() { - TLS_LOCAL_READ_METRICS - .with(|m| m.borrow_mut().local_executed_snapshot_cache_hit.inc()); - return snap.clone(); - } - } - let snap = Arc::new(self.kv_engine.snapshot()); - *ctx.read_id = ts; - *ctx.snap_cache = Box::new(Some(snap.clone())); - return snap; - } - Arc::new(self.kv_engine.snapshot()) + fn get_snapshot(&mut self, read_context: &Option>) -> Arc { + read_context.as_ref().unwrap().snapshot().unwrap() } } @@ -998,12 +1071,12 @@ impl<'r> RequestInspector for Inspector<'r> { #[cfg(test)] mod tests { - use std::{sync::mpsc::*, thread}; + use std::{ops::Add, sync::mpsc::*, thread}; use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; use engine_traits::{Peekable, SyncMutable, ALL_CFS}; - use kvproto::raft_cmdpb::*; + use kvproto::{metapb::RegionEpoch, raft_cmdpb::*}; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; use time::Duration; @@ -1107,7 +1180,16 @@ mod tests { rx: &Receiver>, task: RaftCommand, ) { - reader.propose_raft_command(None, task.request, task.callback); + must_not_redirect_with_read_id(reader, rx, task, None); + } + + fn must_not_redirect_with_read_id( + reader: &mut LocalReader, + rx: &Receiver>, + task: RaftCommand, + read_id: Option, + ) { + reader.propose_raft_command(read_id, task.request, task.callback); assert_eq!(rx.try_recv().unwrap_err(), TryRecvError::Empty); } @@ -1523,14 +1605,13 @@ mod tests { } #[test] - fn test_read_delegate() { + fn test_read_executor_provider() { let path = Builder::new() .prefix("test-local-reader") .tempdir() .unwrap(); let kv_engine = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); - kv_engine.put(b"a1", b"val1").unwrap(); let store_meta = StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::new(0))), kv_engine.clone()); @@ -1546,135 +1627,238 @@ mod tests { meta.readers.insert(2, read_delegate); } - let mut read_id = ThreadReadId::new(); - let mut snap_cache = Box::new(None); - - let read_id_copy = Some(read_id.clone()); - - let mut read_context = Some(LocalReadContext { - read_id: &mut read_id, - snap_cache: &mut snap_cache, - }); - - let (_, delegate) = store_meta.get_executor_and_len(1); + let (len, delegate) = store_meta.get_executor_and_len(1); + assert_eq!(2, len); let mut delegate = delegate.unwrap(); + assert_eq!(1, delegate.region.id); let tablet = delegate.get_tablet(); assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); - let snapshot = delegate.get_snapshot(read_id_copy.clone(), &mut read_context); - assert_eq!( - b"val1".to_vec(), - *snapshot.get_value(b"a1").unwrap().unwrap() - ); - let (_, delegate) = store_meta.get_executor_and_len(2); + let (len, delegate) = store_meta.get_executor_and_len(2); + assert_eq!(2, len); let mut delegate = delegate.unwrap(); + assert_eq!(2, delegate.region.id); let tablet = delegate.get_tablet(); assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); - let snapshot = delegate.get_snapshot(read_id_copy, &mut read_context); - assert_eq!( - b"val1".to_vec(), - *snapshot.get_value(b"a1").unwrap().unwrap() - ); - - assert!(snap_cache.as_ref().is_some()); - assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_requests.get()), - 2 - ); - assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), - 1 - ); } - #[test] - fn test_snap_cache_hit() { - let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); - let (_tmp, mut reader, _) = new_reader("test-local-reader", 1, store_meta.clone()); + fn prepare_read_delegate( + store_id: u64, + region_id: u64, + term: u64, + pr_ids: Vec, + region_epoch: RegionEpoch, + store_meta: Arc>, + ) { + let mut region = metapb::Region::default(); + region.set_id(region_id); + let prs = new_peers(store_id, pr_ids); + region.set_peers(prs.clone().into()); - let mut region1 = metapb::Region::default(); - region1.set_id(1); + let leader = prs[0].clone(); + region.set_region_epoch(region_epoch); + let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. + let read_progress = Arc::new(RegionReadProgress::new(®ion, 1, 1, "".to_owned())); - // Register region 1 + // Register region + lease.renew(monotonic_raw_now()); + let remote = lease.maybe_new_remote_lease(term).unwrap(); + // But the applied_term is stale. { let mut meta = store_meta.lock().unwrap(); let read_delegate = ReadDelegate { tag: String::new(), - region: Arc::new(region1.clone()), - peer_id: 1, - term: 1, - applied_term: 1, - leader_lease: None, + region: Arc::new(region.clone()), + peer_id: leader.get_id(), + term, + applied_term: term, + leader_lease: Some(remote), last_valid_ts: Timespec::new(0, 0), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), txn_ext: Arc::new(TxnExt::default()), - read_progress: Arc::new(RegionReadProgress::new(®ion1, 1, 1, "".to_owned())), + read_progress, pending_remove: false, track_ver: TrackVer::new(), bucket_meta: None, }; - meta.readers.insert(1, read_delegate); + meta.readers.insert(region_id, read_delegate); } + } - let mut delegate = reader.local_reader.get_delegate(region1.id).unwrap(); - let read_id = Some(ThreadReadId::new()); + #[test] + fn test_snap_across_regions() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx) = new_reader("test-local-reader", store_id, store_meta.clone()); - { - let mut read_context = Some(reader.local_read_context()); + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let term6 = 6; - for _ in 0..10 { - // Different region id should reuse the cache - let _ = delegate.get_snapshot(read_id.clone(), &mut read_context); - } - } - // We should hit cache 9 times - assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), - 9 + // Register region1 + let pr_ids1 = vec![2, 3, 4]; + let prs1 = new_peers(store_id, pr_ids1.clone()); + prepare_read_delegate( + store_id, + 1, + term6, + pr_ids1, + epoch13.clone(), + store_meta.clone(), + ); + let leader1 = prs1[0].clone(); + + // Register region2 + let pr_ids2 = vec![22, 33, 44]; + let prs2 = new_peers(store_id, pr_ids2.clone()); + prepare_read_delegate(store_id, 2, term6, pr_ids2, epoch13.clone(), store_meta); + let leader2 = prs2[0].clone(); + + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader1); + header.set_region_epoch(epoch13.clone()); + header.set_term(term6); + cmd.set_header(header); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp.snapshot.unwrap()).unwrap(); + })), ); + // First request will not hit cache let read_id = Some(ThreadReadId::new()); + must_not_redirect_with_read_id(&mut reader, &rx, task, read_id.clone()); + let snap1 = snap_rx.recv().unwrap(); - { - let read_context = reader.local_read_context(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(2); + header.set_peer(leader2); + header.set_region_epoch(epoch13); + header.set_term(term6); + cmd.set_header(header); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp.snapshot.unwrap()).unwrap(); + })), + ); + must_not_redirect_with_read_id(&mut reader, &rx, task, read_id); + let snap2 = snap_rx.recv().unwrap(); + assert!(std::ptr::eq(snap1.get_snapshot(), snap2.get_snapshot())); - let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); - } - // This time, we will miss the cache - assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), - 9 + // If we use a new read id, the cache will be miss and a new snapshot will be + // generated + let read_id = Some(ThreadReadId::new()); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp.snapshot.unwrap()).unwrap(); + })), ); + must_not_redirect_with_read_id(&mut reader, &rx, task, read_id); + let snap2 = snap_rx.recv().unwrap(); + assert!(!std::ptr::eq(snap1.get_snapshot(), snap2.get_snapshot())); + } - { - let read_context = reader.local_read_context(); - let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); - // We can hit it again. - assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), - 10 - ); - } + fn create_engine(path: &str) -> KvTestEngine { + let path = Builder::new().prefix(path).tempdir().unwrap(); + engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap() + } - reader.release_snapshot_cache(); - { - let read_context = reader.local_read_context(); - let _ = delegate.get_snapshot(read_id.clone(), &mut Some(read_context)); - } - // After release, we will mss the cache even with the prevsiou read_id. + #[test] + fn test_snap_cache_context() { + let db = create_engine("test_snap_cache_context"); + let mut snap_cache = SnapCache::new(); + let mut read_context = LocalReadContext::new(&mut snap_cache, None); + + // Have not inited the snap cache + assert!(read_context.snapshot().is_none()); + + db.put(b"a1", b"val1").unwrap(); + + let compare_ts = monotonic_raw_now(); + // Case 1: snap_cache_context.read_id is None + assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.snapshot_ts() > compare_ts); assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), - 10 + read_context + .snapshot() + .unwrap() + .get_value(b"a1") + .unwrap() + .unwrap(), + b"val1" ); - { - let read_context = reader.local_read_context(); - let _ = delegate.get_snapshot(read_id, &mut Some(read_context)); - } - // We can hit it again. + // snap_cache_context is *not* created with read_id, so calling + // `maybe_update_snapshot` again will update the snapshot + let compare_ts = monotonic_raw_now(); + assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.snapshot_ts() > compare_ts); + + let read_id = ThreadReadId::new(); + let read_id_clone = read_id.clone(); + let mut read_context = LocalReadContext::new(&mut snap_cache, Some(read_id)); + + let compare_ts = monotonic_raw_now(); + // Case 2: snap_cache_context.read_id is not None but not equals to the + // snap_cache.cached_read_id + assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.snapshot_ts() > compare_ts); + let snap_ts = read_context.snapshot_ts(); + assert_eq!( + read_context + .snapshot() + .unwrap() + .get_value(b"a1") + .unwrap() + .unwrap(), + b"val1" + ); + + let db2 = create_engine("test_snap_cache_context2"); + // snap_cache_context is created with read_id, so calling + // `maybe_update_snapshot` again will *not* update the snapshot + // Case 3: snap_cache_context.read_id is not None and equals to the + // snap_cache.cached_read_id + assert!(!read_context.maybe_update_snapshot(&db2, Timespec::new(0, 0))); + assert_eq!(read_context.snapshot_ts(), snap_ts); assert_eq!( - TLS_LOCAL_READ_METRICS.with(|m| m.borrow().local_executed_snapshot_cache_hit.get()), - 11 + read_context + .snapshot() + .unwrap() + .get_value(b"a1") + .unwrap() + .unwrap(), + b"val1" + ); + + // Case 4: delegate.last_valid_ts is larger than create_time of read_id + let mut last_valid_ts = read_id_clone.create_time; + last_valid_ts = last_valid_ts.add(Duration::nanoseconds(1)); + assert!(read_context.maybe_update_snapshot(&db2, last_valid_ts)); + assert!(read_context.snapshot_ts() > snap_ts); + assert!( + read_context + .snapshot() + .unwrap() + .get_value(b"a1") + .unwrap() + .is_none(), ); } } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 3fda1ca0a80..b291e86b88c 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -15,6 +15,7 @@ mod test_gc_worker; mod test_hibernate; mod test_import_service; mod test_kv_service; +mod test_local_read; mod test_memory_usage_limit; mod test_merge; mod test_metrics_overflow; diff --git a/tests/failpoints/cases/test_local_read.rs b/tests/failpoints/cases/test_local_read.rs new file mode 100644 index 00000000000..06365fb36fb --- /dev/null +++ b/tests/failpoints/cases/test_local_read.rs @@ -0,0 +1,81 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{sync::Arc, thread, time::Duration}; + +use grpcio::{ChannelBuilder, Environment}; +use kvproto::{ + kvrpcpb::{Context, RawGetRequest}, + tikvpb_grpc::TikvClient, +}; +use test_raftstore::{ + must_get_equal, must_get_none, must_raw_get, must_raw_put, new_peer, new_server_cluster, +}; +use tikv_util::HandyRwLock; + +// The test mocks the situation that just after passing the lease check, even +// when lease expires, we can read the correct value. +#[test] +fn test_consistency_after_lease_pass() { + let mut cluster = new_server_cluster(0, 3); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + let leader = new_peer(1, 1); + cluster.must_transfer_leader(1, leader); + + // Create clients. + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(1)); + let client = TikvClient::new(channel); + + let region = cluster.get_region(&b"key1"[..]); + let region_id = region.id; + let leader = cluster.leader_of_region(region_id).unwrap(); + + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader.clone()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + + must_raw_put(&client, ctx.clone(), b"key1".to_vec(), b"value1".to_vec()); + must_get_equal(&cluster.get_engine(1), b"key1", b"value1"); + + // Ensure the request is executed by the local reader + fail::cfg("localreader_before_redirect", "panic").unwrap(); + + // Lease read works correctly + assert_eq!( + must_raw_get(&client, ctx.clone(), b"key1".to_vec()).unwrap(), + b"value1".to_vec() + ); + + // we pause just after pass the lease check, and then remove the peer. We can + // still read the relevant value as we should have already got the snapshot when + // passing the lease check. + fail::cfg("after_pass_lease_check", "pause").unwrap(); + + let mut get_req = RawGetRequest::default(); + get_req.set_context(ctx); + get_req.key = b"key1".to_vec(); + let mut receiver = client.raw_get_async(&get_req).unwrap(); + + thread::sleep(Duration::from_millis(200)); + + let mut peer = leader.clone(); + cluster.must_transfer_leader(1, new_peer(2, 2)); + pd_client.must_remove_peer(region_id, leader); + peer.id = 1000; + // After we pass the lease check, we should have got the snapshot, so the data + // that the region contains cannot be deleted. + // So we need to add the new peer for this region and stop before applying the + // snapshot so that the old data will be deleted and the snapshot data has not + // been written. + fail::cfg("apply_snap_cleanup_range", "pause").unwrap(); + pd_client.must_add_peer(region_id, peer); + + // Wait for data to be cleaned + must_get_none(&cluster.get_engine(1), b"key1"); + fail::cfg("after_pass_lease_check", "off").unwrap(); + + assert_eq!(b"value1", receiver.receive_sync().unwrap().1.get_value()); +} From b448214b8f2c0a6a9ba2381a1983ce20e6514218 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Fri, 14 Oct 2022 12:59:51 +0800 Subject: [PATCH 0268/1149] causal-ts: rename `available-interval` to `alloc-ahead-buffer` (#13597) ref tikv/tikv#13596, close tikv/tikv#13596 Rename `causal-ts.available-interval` to `causal-ts.alloc-ahead-buffer` for more clear meaning. Signed-off-by: pingyu --- components/causal_ts/src/config.rs | 23 ++++++++++++----------- components/causal_ts/src/tso.rs | 20 ++++++++++---------- components/server/src/server.rs | 2 +- components/test_raftstore/src/server.rs | 2 +- tests/integrations/config/mod.rs | 2 +- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/components/causal_ts/src/config.rs b/components/causal_ts/src/config.rs index 0b08fecc7d6..17994344924 100644 --- a/components/causal_ts/src/config.rs +++ b/components/causal_ts/src/config.rs @@ -28,28 +28,29 @@ pub struct Config { /// interval. The 50ms limitation can not be broken through now (see /// `tso-update-physical-interval`). pub renew_batch_max_size: u32, - /// The available interval of BatchTsoProvider. + /// The size (in duration) of TSO buffer allocated ahead for + /// BatchTsoProvider. /// /// Default is 3s. - /// The longer of the value can provide better "high-availability" against - /// PD failure, but more overhead of `TsoBatchList` & pressure to TSO + /// The longer of the value will help to improve tolerance against PD + /// failure, but more overhead of `TsoBatchList` & pressure to TSO /// service. - pub available_interval: ReadableDuration, + pub alloc_ahead_buffer: ReadableDuration, } impl Config { pub fn validate(&self) -> Result<(), Box> { if self.renew_interval.is_zero() { - return Err("causal-ts.renew_interval can't be zero".into()); + return Err("causal-ts.renew-interval can't be zero".into()); } if self.renew_batch_min_size == 0 { - return Err("causal-ts.renew_batch_min_size should be greater than 0".into()); + return Err("causal-ts.renew-batch-min-size should be greater than 0".into()); } if self.renew_batch_max_size == 0 { - return Err("causal-ts.renew_batch_max_size should be greater than 0".into()); + return Err("causal-ts.renew-batch-max-size should be greater than 0".into()); } - if self.available_interval.is_zero() { - return Err("causal-ts.available-interval can't be zero".into()); + if self.alloc_ahead_buffer.is_zero() { + return Err("causal-ts.alloc-ahead-buffer can't be zero".into()); } Ok(()) } @@ -63,8 +64,8 @@ impl Default for Config { ), renew_batch_min_size: crate::tso::DEFAULT_TSO_BATCH_MIN_SIZE, renew_batch_max_size: crate::tso::DEFAULT_TSO_BATCH_MAX_SIZE, - available_interval: ReadableDuration::millis( - crate::tso::DEFAULT_TSO_BATCH_AVAILABLE_INTERVAL_MS, + alloc_ahead_buffer: ReadableDuration::millis( + crate::tso::DEFAULT_TSO_BATCH_ALLOC_AHEAD_BUFFER_MS, ), } } diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index ad9f3ec1fc6..5056cfe2ebd 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -2,12 +2,12 @@ //! ## The algorithm to make the TSO cache tolerate failure of TSO service //! -//! 1. The scale of High-Available is specified by config item -//! `causal-ts.available-interval`. +//! 1. The expected total size (in duration) of TSO cache is specified by +//! config item `causal-ts.alloc-ahead-buffer`. //! //! 2. Count usage of TSO on every renew interval. //! -//! 3. Calculate `cache_multiplier` by `causal-ts.available-interval / +//! 3. Calculate `cache_multiplier` by `causal-ts.alloc-ahead-buffer / //! causal-ts.renew-interval`. //! //! 4. Then `tso_usage x cache_multiplier` is the expected number of TSO should @@ -67,9 +67,9 @@ pub(crate) const DEFAULT_TSO_BATCH_MAX_SIZE: u32 = 8192; /// of PD. The longer of the value can provide better "High-Availability" /// against PD failure, but more overhead of `TsoBatchList` & pressure to TSO /// service. -pub(crate) const DEFAULT_TSO_BATCH_AVAILABLE_INTERVAL_MS: u64 = 3000; +pub(crate) const DEFAULT_TSO_BATCH_ALLOC_AHEAD_BUFFER_MS: u64 = 3000; /// Just a limitation for safety, in case user specify a too big -/// `available_interval`. +/// `alloc_ahead_buffer`. const MAX_TSO_BATCH_LIST_CAPACITY: u32 = 1024; /// TSO range: [(physical, logical_start), (physical, logical_end)) @@ -326,7 +326,7 @@ impl BatchTsoProvider { Self::new_opt( pd_client, Duration::from_millis(DEFAULT_TSO_BATCH_RENEW_INTERVAL_MS), - Duration::from_millis(DEFAULT_TSO_BATCH_AVAILABLE_INTERVAL_MS), + Duration::from_millis(DEFAULT_TSO_BATCH_ALLOC_AHEAD_BUFFER_MS), DEFAULT_TSO_BATCH_MIN_SIZE, DEFAULT_TSO_BATCH_MAX_SIZE, ) @@ -334,23 +334,23 @@ impl BatchTsoProvider { } #[allow(unused_mut)] - fn calc_cache_multiplier(mut renew_interval: Duration, available_interval: Duration) -> u32 { + fn calc_cache_multiplier(mut renew_interval: Duration, alloc_ahead: Duration) -> u32 { #[cfg(any(test, feature = "testexport"))] if renew_interval.is_zero() { // Should happen in test only. renew_interval = Duration::from_millis(DEFAULT_TSO_BATCH_RENEW_INTERVAL_MS); } - available_interval.div_duration_f64(renew_interval).ceil() as u32 + alloc_ahead.div_duration_f64(renew_interval).ceil() as u32 } pub async fn new_opt( pd_client: Arc, renew_interval: Duration, - available_interval: Duration, + alloc_ahead: Duration, batch_min_size: u32, batch_max_size: u32, ) -> Result { - let cache_multiplier = Self::calc_cache_multiplier(renew_interval, available_interval); + let cache_multiplier = Self::calc_cache_multiplier(renew_interval, alloc_ahead); let renew_parameter = RenewParameter { batch_min_size, batch_max_size, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 247bc6ccb58..2320d1156f4 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -349,7 +349,7 @@ where let tso = block_on(causal_ts::BatchTsoProvider::new_opt( pd_client.clone(), config.causal_ts.renew_interval.0, - config.causal_ts.available_interval.0, + config.causal_ts.alloc_ahead_buffer.0, config.causal_ts.renew_batch_min_size, config.causal_ts.renew_batch_max_size, )); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 67eb3a22db6..4c0bbce3fd1 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -374,7 +374,7 @@ impl ServerCluster { block_on(causal_ts::BatchTsoProvider::new_opt( self.pd_client.clone(), cfg.causal_ts.renew_interval.0, - cfg.causal_ts.available_interval.0, + cfg.causal_ts.alloc_ahead_buffer.0, cfg.causal_ts.renew_batch_min_size, cfg.causal_ts.renew_batch_max_size, )) diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d0eac27e3b1..e2d5ef06b6e 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -798,7 +798,7 @@ fn test_serde_custom_tikv_config() { renew_interval: ReadableDuration::millis(100), renew_batch_min_size: 100, renew_batch_max_size: 8192, - available_interval: ReadableDuration::millis(3000), + alloc_ahead_buffer: ReadableDuration::millis(3000), }; let custom = read_file_in_project_dir("integrations/config/test-custom.toml"); From 26b6c3cfccb386af50b1592ae4583dfb0003d7c1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 14 Oct 2022 18:21:53 +0800 Subject: [PATCH 0269/1149] local_reader: release snapshot properly (#13605) close tikv/tikv#13553 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/worker/read.rs | 153 +++++++++++++++--- 1 file changed, 133 insertions(+), 20 deletions(-) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 1f6d7c4bab7..fd6c7552f5d 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -203,6 +203,11 @@ where { read_id: Option, snap_cache: &'a mut SnapCache, + + // Used when read_id is not set, duplicated definition to avoid cache invalidation in case + // stale read and local read are mixed in one batch. + snapshot: Option>, + snapshot_ts: Option, } impl<'a, E> LocalReadContext<'a, E> @@ -213,6 +218,8 @@ where Self { snap_cache, read_id, + snapshot: None, + snapshot_ts: None, } } @@ -232,25 +239,40 @@ where } self.snap_cache.cached_read_id = self.read_id.clone(); - } + self.snap_cache.snapshot = Some(Arc::new(engine.snapshot())); - self.snap_cache.snapshot = Some(Arc::new(engine.snapshot())); + // Ensures the snapshot is acquired before getting the time + atomic::fence(atomic::Ordering::Release); + self.snap_cache.cached_snapshot_ts = monotonic_raw_now(); + } else { + // read_id being None means the snapshot acquired will only be used in this + // request + self.snapshot = Some(Arc::new(engine.snapshot())); - // Ensures the snapshot is acquired before getting the time - atomic::fence(atomic::Ordering::Release); - self.snap_cache.cached_snapshot_ts = monotonic_raw_now(); + // Ensures the snapshot is acquired before getting the time + atomic::fence(atomic::Ordering::Release); + self.snapshot_ts = Some(monotonic_raw_now()); + } true } - // Note: must be called after `maybe_update_snapshot` - fn snapshot_ts(&self) -> Timespec { - self.snap_cache.cached_snapshot_ts + fn snapshot_ts(&self) -> Option { + if self.read_id.is_some() { + Some(self.snap_cache.cached_snapshot_ts) + } else { + self.snapshot_ts + } } // Note: must be called after `maybe_update_snapshot` fn snapshot(&self) -> Option> { - self.snap_cache.snapshot.clone() + // read_id being some means we go through cache + if self.read_id.is_some() { + self.snap_cache.snapshot.clone() + } else { + self.snapshot.clone() + } } } @@ -894,7 +916,7 @@ where snap_updated = local_read_ctx .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); - let snapshot_ts = local_read_ctx.snapshot_ts(); + let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); if !delegate.is_in_leader_lease(snapshot_ts) { fail_point!("localreader_before_redirect", |_| {}); // Forward to raftstore. @@ -921,8 +943,8 @@ where return; } - let mut local_read_ctx = - LocalReadContext::new(&mut self.snap_cache, read_id); + // Stale read does not use cache, so we pass None for read_id + let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); snap_updated = local_read_ctx .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); @@ -1075,7 +1097,7 @@ mod tests { use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; - use engine_traits::{Peekable, SyncMutable, ALL_CFS}; + use engine_traits::{MiscExt, Peekable, SyncMutable, ALL_CFS}; use kvproto::{metapb::RegionEpoch, raft_cmdpb::*}; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; @@ -1785,15 +1807,15 @@ mod tests { let mut snap_cache = SnapCache::new(); let mut read_context = LocalReadContext::new(&mut snap_cache, None); - // Have not inited the snap cache assert!(read_context.snapshot().is_none()); + assert!(read_context.snapshot_ts().is_none()); db.put(b"a1", b"val1").unwrap(); let compare_ts = monotonic_raw_now(); // Case 1: snap_cache_context.read_id is None assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); - assert!(read_context.snapshot_ts() > compare_ts); + assert!(read_context.snapshot_ts().unwrap() > compare_ts); assert_eq!( read_context .snapshot() @@ -1808,7 +1830,7 @@ mod tests { // `maybe_update_snapshot` again will update the snapshot let compare_ts = monotonic_raw_now(); assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); - assert!(read_context.snapshot_ts() > compare_ts); + assert!(read_context.snapshot_ts().unwrap() > compare_ts); let read_id = ThreadReadId::new(); let read_id_clone = read_id.clone(); @@ -1818,8 +1840,8 @@ mod tests { // Case 2: snap_cache_context.read_id is not None but not equals to the // snap_cache.cached_read_id assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); - assert!(read_context.snapshot_ts() > compare_ts); - let snap_ts = read_context.snapshot_ts(); + assert!(read_context.snapshot_ts().unwrap() > compare_ts); + let snap_ts = read_context.snapshot_ts().unwrap(); assert_eq!( read_context .snapshot() @@ -1836,7 +1858,7 @@ mod tests { // Case 3: snap_cache_context.read_id is not None and equals to the // snap_cache.cached_read_id assert!(!read_context.maybe_update_snapshot(&db2, Timespec::new(0, 0))); - assert_eq!(read_context.snapshot_ts(), snap_ts); + assert_eq!(read_context.snapshot_ts().unwrap(), snap_ts); assert_eq!( read_context .snapshot() @@ -1851,7 +1873,7 @@ mod tests { let mut last_valid_ts = read_id_clone.create_time; last_valid_ts = last_valid_ts.add(Duration::nanoseconds(1)); assert!(read_context.maybe_update_snapshot(&db2, last_valid_ts)); - assert!(read_context.snapshot_ts() > snap_ts); + assert!(read_context.snapshot_ts().unwrap() > snap_ts); assert!( read_context .snapshot() @@ -1861,4 +1883,95 @@ mod tests { .is_none(), ); } + + #[test] + fn test_snap_release_for_not_using_cache() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx) = new_reader("test-local-reader", store_id, store_meta.clone()); + reader.kv_engine.put(b"key", b"value").unwrap(); + + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let term6 = 6; + + // Register region1 + let pr_ids1 = vec![2, 3, 4]; + let prs1 = new_peers(store_id, pr_ids1.clone()); + prepare_read_delegate(store_id, 1, term6, pr_ids1, epoch13.clone(), store_meta); + let leader1 = prs1[0].clone(); + + // Local read + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader1); + header.set_region_epoch(epoch13); + header.set_term(term6); + cmd.set_header(header.clone()); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + // using cache and release + let read_id = ThreadReadId::new(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |_: ReadResponse| {})), + ); + must_not_redirect_with_read_id(&mut reader, &rx, task, Some(read_id)); + assert!( + reader + .kv_engine + .get_oldest_snapshot_sequence_number() + .is_some() + ); + reader.release_snapshot_cache(); + assert!( + reader + .kv_engine + .get_oldest_snapshot_sequence_number() + .is_none() + ); + + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |_: ReadResponse| {})), + ); + + // not use cache + must_not_redirect_with_read_id(&mut reader, &rx, task, None); + assert!( + reader + .kv_engine + .get_oldest_snapshot_sequence_number() + .is_none() + ); + + // Stale read + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(0).unwrap(); + header.set_flags(header.get_flags() | WriteBatchFlags::STALE_READ.bits()); + header.set_flag_data(data.into()); + + cmd.set_header(header); + let task = RaftCommand::::new( + cmd, + Callback::read(Box::new(move |_: ReadResponse| {})), + ); + let read_id = ThreadReadId::new(); + must_not_redirect_with_read_id(&mut reader, &rx, task, Some(read_id)); + // Stale read will not use snap cache + assert!(reader.snap_cache.snapshot.is_none()); + assert!( + reader + .kv_engine + .get_oldest_snapshot_sequence_number() + .is_none() + ); + } } From b37c2f606d48d3a0fdb2227900746b7180b96724 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 17 Oct 2022 11:39:52 +0800 Subject: [PATCH 0270/1149] *: introduce the two-phase kv_flashback_to_version (#13557) close tikv/tikv#13519, ref tikv/tikv#13519, ref tikv/tikv#13541 Make `FlashbackToVersion` become a two-phase request as described in #13519. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/error_code/src/raftstore.rs | 3 + components/raftstore/src/errors.rs | 9 ++ components/raftstore/src/store/fsm/apply.rs | 8 +- components/raftstore/src/store/fsm/peer.rs | 23 +++-- components/raftstore/src/store/metrics.rs | 3 +- components/raftstore/src/store/util.rs | 40 +++++--- components/test_raftstore/src/cluster.rs | 94 +++++++++++------- components/test_raftstore/src/util.rs | 22 +++++ src/server/service/kv.rs | 60 +++++------ .../txn/actions/flashback_to_version.rs | 35 ++++++- .../integrations/raftstore/test_flashback.rs | 99 +++++++++++++------ tests/integrations/server/kv_service.rs | 62 ++++++------ 13 files changed, 308 insertions(+), 152 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf3536544fc..97c6209b2d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2672,7 +2672,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#4c6f1502851ed55b3ed023d180b6b10766446630" +source = "git+https://github.com/pingcap/kvproto.git#26e28e6a281abb927f91ef992eb8f93b39698ffa" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/raftstore.rs b/components/error_code/src/raftstore.rs index 2fd0d168a14..1b6a85493cf 100644 --- a/components/error_code/src/raftstore.rs +++ b/components/error_code/src/raftstore.rs @@ -31,6 +31,7 @@ define_error_codes!( PENDING_PREPARE_MERGE => ("PendingPrepareMerge", "", ""), RECOVERY_IN_PROGRESS => ("RecoveryInProgress", "", ""), FLASHBACK_IN_PROGRESS => ("FlashbackInProgress", "", ""), + FLASHBACK_NOT_PREPARED => ("FlashbackNotPrepared", "", ""), SNAP_ABORT => ("SnapAbort", "", ""), SNAP_TOO_MANY => ("SnapTooMany", "", ""), @@ -67,6 +68,8 @@ impl ErrorCodeExt for errorpb::Error { RECOVERY_IN_PROGRESS } else if self.has_flashback_in_progress() { FLASHBACK_IN_PROGRESS + } else if self.has_flashback_not_prepared() { + FLASHBACK_NOT_PREPARED } else { UNKNOWN } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 70e342da96a..3c415c65af6 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -61,6 +61,9 @@ pub enum Error { #[error("region {0} is in the flashback progress")] FlashbackInProgress(u64), + #[error("region {0} not prepared the flashback")] + FlashbackNotPrepared(u64), + #[error( "key {} is not in region key range [{}, {}) for region {}", log_wrappers::Value::key(.0), @@ -255,6 +258,11 @@ impl From for errorpb::Error { e.set_region_id(region_id); errorpb.set_flashback_in_progress(e); } + Error::FlashbackNotPrepared(region_id) => { + let mut e = errorpb::FlashbackNotPrepared::default(); + e.set_region_id(region_id); + errorpb.set_flashback_not_prepared(e); + } _ => {} }; @@ -290,6 +298,7 @@ impl ErrorCodeExt for Error { Error::DiskFull(..) => error_code::raftstore::DISK_FULL, Error::RecoveryInProgress(..) => error_code::raftstore::RECOVERY_IN_PROGRESS, Error::FlashbackInProgress(..) => error_code::raftstore::FLASHBACK_IN_PROGRESS, + Error::FlashbackNotPrepared(..) => error_code::raftstore::FLASHBACK_NOT_PREPARED, Error::StaleCommand => error_code::raftstore::STALE_COMMAND, Error::RegionNotInitialized(_) => error_code::raftstore::REGION_NOT_INITIALIZED, Error::KeyNotInRegion(..) => error_code::raftstore::KEY_NOT_IN_REGION, diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 5fb5754b116..dae732797b1 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1350,6 +1350,12 @@ where "peer_id" => self.id(), "err" => ?e ), + Error::FlashbackNotPrepared(..) => debug!( + "flashback is not prepared"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "err" => ?e + ), _ => error!(?e; "execute raft command"; "region_id" => self.region_id(), @@ -1522,7 +1528,7 @@ where let include_region = req.get_header().get_region_epoch().get_version() >= self.last_merge_version; check_region_epoch(req, &self.region, include_region)?; - check_flashback_state(req, &self.region)?; + check_flashback_state(self.region.get_is_in_flashback(), req, self.region_id())?; if req.has_admin_request() { self.exec_admin_cmd(ctx, req) } else { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index d4a31561c63..30877f57263 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4899,14 +4899,23 @@ where } let region_id = self.region_id(); - // When in the flashback state, we should not allow any other request to be - // proposed. - if self.fsm.peer.is_in_flashback { - self.ctx.raft_metrics.invalid_proposal.flashback.inc(); - let flags = WriteBatchFlags::from_bits_truncate(msg.get_header().get_flags()); - if !flags.contains(WriteBatchFlags::FLASHBACK) { - return Err(Error::FlashbackInProgress(self.region_id())); + if let Err(e) = util::check_flashback_state(self.fsm.peer.is_in_flashback, msg, region_id) { + match e { + Error::FlashbackInProgress(_) => self + .ctx + .raft_metrics + .invalid_proposal + .flashback_in_progress + .inc(), + Error::FlashbackNotPrepared(_) => self + .ctx + .raft_metrics + .invalid_proposal + .flashback_not_prepared + .inc(), + _ => unreachable!(), } + return Err(e); } // Check whether the store has the right peer to handle the request. diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 32a23cd070e..af877e14b46 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -202,7 +202,8 @@ make_static_metric! { region_not_initialized, is_applying_snapshot, force_leader, - flashback, + flashback_in_progress, + flashback_not_prepared } pub label_enum RaftLogGcSkippedReason { diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 42276c79ab6..a21eb7756e2 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -280,21 +280,31 @@ pub fn compare_region_epoch( Ok(()) } -pub fn check_flashback_state(req: &RaftCmdRequest, region: &metapb::Region) -> Result<()> { - // If admin flashback has not been applied but the region is already in a - // flashback state, the request is rejected - if region.get_is_in_flashback() { - let flags = WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()); - if flags.contains(WriteBatchFlags::FLASHBACK) { - return Ok(()); - } - if req.has_admin_request() - && (req.get_admin_request().get_cmd_type() == AdminCmdType::PrepareFlashback - || req.get_admin_request().get_cmd_type() == AdminCmdType::FinishFlashback) - { - return Ok(()); - } - return Err(Error::FlashbackInProgress(region.get_id())); +// Check if the request could be proposed/applied under the current state of the +// flashback. +pub fn check_flashback_state( + is_in_flashback: bool, + req: &RaftCmdRequest, + region_id: u64, +) -> Result<()> { + // The admin flashback cmd could be proposed/applied under any state. + if req.has_admin_request() + && (req.get_admin_request().get_cmd_type() == AdminCmdType::PrepareFlashback + || req.get_admin_request().get_cmd_type() == AdminCmdType::FinishFlashback) + { + return Ok(()); + } + let is_flashback_request = WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + .contains(WriteBatchFlags::FLASHBACK); + // If the region is in the flashback state, the only allowed request is the + // flashback request itself. + if is_in_flashback && !is_flashback_request { + return Err(Error::FlashbackInProgress(region_id)); + } + // If the region is not in the flashback state, the flashback request itself + // should be rejected. + if !is_in_flashback && is_flashback_request { + return Err(Error::FlashbackNotPrepared(region_id)); } Ok(()) } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 9b1f19bf21a..a5ce174c6d2 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1421,7 +1421,7 @@ impl Cluster { .unwrap(); } - pub async fn send_flashback_msg( + pub fn block_send_flashback_msg( &mut self, region_id: u64, store_id: u64, @@ -1429,41 +1429,67 @@ impl Cluster { epoch: metapb::RegionEpoch, peer: metapb::Peer, ) { - let (result_tx, result_rx) = oneshot::channel(); - let cb = Callback::write(Box::new(move |resp| { - if resp.response.get_header().has_error() { - result_tx.send(false).unwrap(); - error!("send flashback msg failed"; "region_id" => region_id); - return; + self.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + block_on(async move { + let (result_tx, result_rx) = oneshot::channel(); + let cb = Callback::write(Box::new(move |resp| { + if resp.response.get_header().has_error() { + result_tx + .send(Some(resp.response.get_header().get_error().clone())) + .unwrap(); + return; + } + result_tx.send(None).unwrap(); + })); + + let mut admin = AdminRequest::default(); + admin.set_cmd_type(cmd_type); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header().set_region_epoch(epoch); + req.mut_header().set_peer(peer); + req.set_admin_request(admin); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let router = self.sim.rl().get_router(store_id).unwrap(); + if let Err(e) = router.send_command( + req, + cb, + RaftCmdExtraOpts { + deadline: None, + disk_full_opt: kvproto::kvrpcpb::DiskFullOpt::AllowedOnAlmostFull, + }, + ) { + panic!( + "router send flashback msg {:?} failed, error: {}", + cmd_type, e + ); } - result_tx.send(true).unwrap(); - })); - - let mut admin = AdminRequest::default(); - admin.set_cmd_type(cmd_type); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(region_id); - req.mut_header().set_region_epoch(epoch); - req.mut_header().set_peer(peer); - req.set_admin_request(admin); - req.mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - - let router = self.sim.rl().get_router(store_id).unwrap(); - if let Err(e) = router.send_command( - req, - cb, - RaftCmdExtraOpts { - deadline: None, - disk_full_opt: kvproto::kvrpcpb::DiskFullOpt::AllowedOnAlmostFull, - }, - ) { - panic!("router send failed, error{}", e); - } - - if !result_rx.await.unwrap() { - panic!("Flashback call msg failed"); + if let Some(e) = result_rx.await.unwrap() { + panic!("call flashback msg {:?} failed, error: {:?}", cmd_type, e); + } + }); + } + + fn wait_applied_to_current_term(&mut self, region_id: u64, timeout: Duration) { + let mut now = Instant::now(); + let deadline = now + timeout; + while now < deadline { + if let Some(leader) = self.leader_of_region(region_id) { + let raft_apply_state = self.apply_state(region_id, leader.get_store_id()); + let raft_local_state = self.raft_local_state(region_id, leader.get_store_id()); + // If term matches and apply to commit index, then it must apply to current + // term. + if raft_apply_state.applied_index == raft_apply_state.commit_index + && raft_apply_state.commit_term == raft_local_state.get_hard_state().get_term() + { + return; + } + } + thread::sleep(Duration::from_millis(10)); + now = Instant::now(); } + panic!("region {} is not applied to current term", region_id,); } pub fn must_split(&mut self, region: &metapb::Region, split_key: &[u8]) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 87269ac5e02..3718dbce906 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1218,6 +1218,28 @@ pub fn must_raw_get(client: &TikvClient, ctx: Context, key: Vec) -> Option FlashbackToVersionResponse { + let mut prepare_req = PrepareFlashbackToVersionRequest::default(); + prepare_req.set_context(ctx.clone()); + client + .kv_prepare_flashback_to_version(&prepare_req) + .unwrap(); + let mut req = FlashbackToVersionRequest::default(); + req.set_context(ctx); + req.set_start_ts(start_ts); + req.set_commit_ts(commit_ts); + req.version = version; + req.start_key = b"a".to_vec(); + req.end_key = b"z".to_vec(); + client.kv_flashback_to_version(&req).unwrap() +} + // A helpful wrapper to make the test logic clear pub struct PeerClient { pub cli: TikvClient, diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 924236529d9..84015ddab57 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1722,8 +1722,8 @@ fn future_delete_range( } } -// Preparing the flashback for a region/key range will "lock" the region so that -// there is no any read, write or schedule operation could be proposed before +// Preparing the flashback for a region will "lock" the region so that +// there is no any read, write or scheduling operation could be proposed before // the actual flashback operation. fn future_prepare_flashback_to_version< E: Engine, @@ -1733,11 +1733,24 @@ fn future_prepare_flashback_to_version< >( // Keep this param to hint the type of E for the compiler. _storage: &Storage, - _raft_router: &T, - _req: PrepareFlashbackToVersionRequest, + raft_router: &T, + req: PrepareFlashbackToVersionRequest, ) -> impl Future> { - // TODO: implement this. - async move { unimplemented!() } + let raft_router = Mutex::new(raft_router.clone()); + async move { + // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the + // later flashback. Once invoked, we will update the persistent region meta and + // the memory state of the flashback in Peer FSM to reject all read, write + // and scheduling operations for this region when propose/apply before we + // start the actual data flashback transaction command in the next phase. + send_flashback_msg::( + &raft_router, + req.get_context(), + AdminCmdType::PrepareFlashback, + ) + .await?; + Ok(PrepareFlashbackToVersionResponse::default()) + } } // Flashback the region to a specific point with the given `version`, please @@ -1756,22 +1769,8 @@ fn future_flashback_to_version< let storage_clone = storage.clone(); let raft_router = Mutex::new(raft_router.clone()); async move { - // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the - // later flashback. This will first block all scheduling, read and write - // operations, then wait for the latest Raft log to be applied before we start - // the flashback command. Once invoked, we update the persistence state - // in `RegionLocalState` and region's meta, and when that - // admin cmd is applied, the `PrepareFlashback` command will update the memory - // state of the flashback, rejecting all read and write operations at - // propose and applied. We make FlashbackToVersion a two-stage request - // and lock the region in the first stage. - send_flashback_msg::( - &raft_router, - req.get_context(), - AdminCmdType::PrepareFlashback, - ) - .await?; - + // Perform the data flashback transaction command. We will check if the region + // is in the flashback state when proposing the flashback modification. let (cb, f) = paired_future_callback(); let res = storage_clone.sched_txn_command(req.clone().into(), cb); // Avoid crossing `.await` to bypass the `Send` constraint. @@ -1793,7 +1792,6 @@ fn future_flashback_to_version< AdminCmdType::FinishFlashback, ) .await?; - let mut resp = FlashbackToVersionResponse::default(); if let Some(err) = extract_region_error(&v) { resp.set_region_error(err); @@ -2472,11 +2470,15 @@ async fn send_flashback_msg + 'static, E: Engine>( ctx: &Context, cmd_type: AdminCmdType, ) -> ServerResult<()> { + let region_id = ctx.get_region_id(); let (result_tx, result_rx) = oneshot::channel(); let cb = Callback::write(Box::new(move |resp| { if resp.response.get_header().has_error() { result_tx.send(false).unwrap(); - error!("send flashback msg failed"; "error" => ?resp.response.get_header().get_error()); + error!("exec flashback msg failed"; + "region_id" => region_id, + "type" => ?cmd_type, + "error" => ?resp.response.get_header().get_error()); return; } result_tx.send(true).unwrap(); @@ -2484,7 +2486,7 @@ async fn send_flashback_msg + 'static, E: Engine>( let mut admin = AdminRequest::default(); admin.set_cmd_type(cmd_type); let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(ctx.get_region_id()); + req.mut_header().set_region_id(region_id); req.mut_header() .set_region_epoch(ctx.get_region_epoch().clone()); req.mut_header().set_peer(ctx.get_peer().clone()); @@ -2502,15 +2504,17 @@ async fn send_flashback_msg + 'static, E: Engine>( }, ) { return Err(Error::Other(box_err!( - "flashback router send failed, error {:?}", + "send flashback msg {:?} failed for region {}, error {:?}", + cmd_type, + region_id, e ))); } if !result_rx.await? { return Err(Error::Other(box_err!( - "send flashback msg {:?} to region {} failed", + "wait flashback msg {:?} result failed for region {} failed", cmd_type, - ctx.get_region_id() + region_id ))); } Ok(()) diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index e160a4a43b9..96f80b9389c 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -10,7 +10,6 @@ use crate::storage::{ pub const FLASHBACK_BATCH_SIZE: usize = 256 + 1 /* To store the next key for multiple batches */; -// TODO: we should resolve all locks before starting a flashback. pub fn flashback_to_version_read_lock( reader: &mut MvccReader, next_lock_key: &Option, @@ -64,12 +63,19 @@ pub fn flashback_to_version_read_write( // Check the latest commit ts to make sure there is no commit change during the // flashback, otherwise, we need to abort the flashback. for (key, commit_ts, old_write) in key_ts_old_writes { - if commit_ts >= flashback_commit_ts { + if commit_ts > flashback_commit_ts { return Err(Error::from(ErrorInner::InvalidTxnTso { start_ts: flashback_start_ts, commit_ts: flashback_commit_ts, })); } + // Since the first flashback preparation phase make sure there will be no writes + // other than flashback after it, so we need to check if there is already a + // successful flashback result, and if so, just finish the flashback ASAP. + if commit_ts == flashback_commit_ts { + key_old_writes.clear(); + return Ok((key_old_writes, false)); + } key_old_writes.push((key, old_write)); } Ok((key_old_writes, has_remain_writes)) @@ -298,7 +304,7 @@ pub mod tests { // Since the key has been deleted, flashback to version 1 should not do // anything. assert_eq!( - must_flashback_to_version(&mut engine, k, ts, *ts.incr(), *ts.incr()), + must_flashback_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), 0 ); must_get_none(&mut engine, k, ts); @@ -331,4 +337,27 @@ pub mod tests { must_pessimistic_prewrite_put_err(&mut engine, k, v3, k, 30, 30, DoPessimisticCheck); must_get(&mut engine, k, 45, v1); } + + #[test] + fn test_duplicated_flashback_to_version() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut ts = TimeStamp::zero(); + let (k, v) = (b"k", b"v"); + must_prewrite_put(&mut engine, k, v, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, ts, v); + let start_ts = *ts.incr(); + let commit_ts = *ts.incr(); + assert_eq!( + must_flashback_to_version(&mut engine, k, 1, start_ts, commit_ts), + 1 + ); + must_get_none(&mut engine, k, ts); + // Flashback again with the same `start_ts` and `commit_ts` should not do + // anything. + assert_eq!( + must_flashback_to_version(&mut engine, k, 1, start_ts, commit_ts), + 0 + ); + } } diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 5709cd22804..be70e176f01 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -2,12 +2,39 @@ use std::time::{Duration, Instant}; -use futures::executor::block_on; -use kvproto::metapb; +use kvproto::{ + metapb, + raft_cmdpb::{CmdType, Request}, +}; use test_raftstore::*; use tikv_util::time::InstantExt; use txn_types::WriteBatchFlags; +#[test] +fn test_flashback_unprepared() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + cluster.must_transfer_leader(1, new_peer(2, 2)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + let mut region = cluster.get_region(b"k1"); + let mut cmd = Request::default(); + cmd.set_cmd_type(CmdType::Put); + let mut req = new_request( + region.get_id(), + region.take_region_epoch(), + vec![cmd], + false, + ); + let new_leader = cluster.query_leader(1, region.get_id(), Duration::from_secs(1)); + req.mut_header().set_peer(new_leader.unwrap()); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let resp = cluster.call_command(req, Duration::from_secs(5)).unwrap(); + assert!(resp.get_header().get_error().has_flashback_not_prepared()); +} + #[test] fn test_flashback_for_schedule() { let mut cluster = new_node_cluster(0, 3); @@ -18,15 +45,15 @@ fn test_flashback_for_schedule() { // Prepare for flashback let region = cluster.get_region(b"k1"); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), 1, kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), new_peer(1, 1), - )); + ); - // Verify the schedule is unabled. + // Verify the schedule is disabled. let mut region = cluster.get_region(b"k3"); let admin_req = new_transfer_leader_cmd(new_peer(2, 2)); let transfer_leader = @@ -46,13 +73,13 @@ fn test_flashback_for_schedule() { // Verify the schedule can be executed if add flashback flag in request's // header. must_transfer_leader(&mut cluster, region.get_id(), new_peer(2, 2)); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), 2, kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, cluster.get_region_epoch(1), new_peer(2, 2), - )); + ); // Transfer leader to (1, 1) cluster.must_transfer_leader(1, new_peer(1, 1)); } @@ -69,13 +96,13 @@ fn test_flashback_for_write() { // Prepare for flashback let region = cluster.get_region(b"k1"); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), 1, kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), new_peer(1, 1), - )); + ); // Write will be blocked let value = vec![1_u8; 8096]; @@ -87,13 +114,13 @@ fn test_flashback_for_write() { new_put_cmd(b"k1", &value), ); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), 1, kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, cluster.get_region_epoch(1), new_peer(1, 1), - )); + ); multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); } @@ -112,13 +139,13 @@ fn test_flashback_for_read() { // Prepare for flashback let region = cluster.get_region(b"k1"); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), 1, kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), new_peer(1, 1), - )); + ); // read will be blocked must_get_error_flashback_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); @@ -131,13 +158,13 @@ fn test_flashback_for_read() { new_get_cf_cmd("write", b"k1"), ); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), 1, kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, cluster.get_region_epoch(1), new_peer(1, 1), - )); + ); multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); } @@ -173,13 +200,13 @@ fn test_flashback_for_local_read() { assert_eq!(state.get_last_index(), last_index); // Prepare for flashback - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), store_id, kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), new_peer(store_id, store_id), - )); + ); // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); @@ -210,13 +237,13 @@ fn test_flashback_for_local_read() { let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 1); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), store_id, kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, cluster.get_region_epoch(1), new_peer(store_id, store_id), - )); + ); let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 2); @@ -240,13 +267,13 @@ fn test_flashback_for_status_cmd_as_region_detail() { let leader = cluster.leader_of_region(1).unwrap(); let region = cluster.get_region(b"k1"); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( region.get_id(), leader.get_store_id(), kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), new_peer(leader.get_store_id(), leader.get_store_id()), - )); + ); let region_detail = cluster.region_detail(region.get_id(), leader.get_store_id()); assert!(region_detail.has_region()); @@ -275,23 +302,23 @@ fn test_flashback_for_check_is_in_persist() { assert!(!local_state.get_region().get_is_in_flashback()); // Prepare for flashback - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( 1, 2, kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), leader_peer.clone(), - )); + ); let local_state = cluster.region_local_state(1, 2); assert!(local_state.get_region().get_is_in_flashback()); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( 1, 2, kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, cluster.get_region_epoch(1), leader_peer, - )); + ); let local_state = cluster.region_local_state(1, 2); assert!(!local_state.get_region().get_is_in_flashback()); } @@ -302,26 +329,30 @@ fn test_flashback_for_apply_snapshot() { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); - // Make node3 isolationed + // Make node3 isolated cluster.add_send_filter(IsolationFilterFactory::new(5)); let local_state = cluster.region_local_state(1, 1); assert!(!local_state.get_region().get_is_in_flashback()); + let local_state = cluster.region_local_state(1, 5); + assert!(!local_state.get_region().get_is_in_flashback()); // Write for cluster let value = vec![1_u8; 8096]; multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); // Prepare for flashback - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( 1, 1, kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, cluster.get_region_epoch(1), new_peer(1, 1), - )); + ); let local_state = cluster.region_local_state(1, 1); assert!(local_state.get_region().get_is_in_flashback()); + let local_state = cluster.region_local_state(1, 5); + assert!(!local_state.get_region().get_is_in_flashback()); // Add node 3 back. cluster.clear_send_filters(); @@ -332,13 +363,21 @@ fn test_flashback_for_apply_snapshot() { let local_state = cluster.region_local_state(1, 5); assert!(local_state.get_region().get_is_in_flashback()); - block_on(cluster.send_flashback_msg( + cluster.block_send_flashback_msg( 1, 5, kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, cluster.get_region_epoch(1), new_peer(5, 5), - )); + ); + + // Wait for applying + sleep_ms(500); + + let local_state = cluster.region_local_state(1, 5); + assert!(!local_state.get_region().get_is_in_flashback()); + let local_state = cluster.region_local_state(1, 1); + assert!(!local_state.get_region().get_is_in_flashback()); } fn transfer_leader(cluster: &mut Cluster, region_id: u64, leader: metapb::Peer) { diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 253d1e0c067..f3e3bda8a24 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -658,18 +658,8 @@ fn test_mvcc_flashback() { assert!(get_resp.get_error().has_locked()); assert!(get_resp.value.is_empty()); // Flashback - let mut flashback_to_version_req = FlashbackToVersionRequest::default(); - flashback_to_version_req.set_context(ctx.clone()); - ts += 1; - flashback_to_version_req.set_start_ts(ts); - ts += 1; - flashback_to_version_req.set_commit_ts(ts); - flashback_to_version_req.version = 5; - flashback_to_version_req.start_key = b"a".to_vec(); - flashback_to_version_req.end_key = b"z".to_vec(); - let flashback_resp = client - .kv_flashback_to_version(&flashback_to_version_req) - .unwrap(); + let flashback_resp = must_flashback_to_version(&client, ctx.clone(), 5, ts + 1, ts + 2); + ts += 2; assert!(!flashback_resp.has_region_error()); assert!(flashback_resp.get_error().is_empty()); // Should not meet the lock and can not get the latest data any more. @@ -682,16 +672,7 @@ fn test_mvcc_flashback_block_rw() { let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); // Flashback - let mut flashback_to_version_req = FlashbackToVersionRequest::default(); - flashback_to_version_req.set_context(ctx.clone()); - flashback_to_version_req.set_start_ts(1); - flashback_to_version_req.set_commit_ts(2); - flashback_to_version_req.version = 0; - flashback_to_version_req.start_key = b"a".to_vec(); - flashback_to_version_req.end_key = b"z".to_vec(); - let flashback_resp = client - .kv_flashback_to_version(&flashback_to_version_req) - .unwrap(); + let flashback_resp = must_flashback_to_version(&client, ctx.clone(), 0, 1, 2); assert!(!flashback_resp.has_region_error()); assert!(flashback_resp.get_error().is_empty()); // Try to read. @@ -731,16 +712,7 @@ fn test_mvcc_flashback_block_scheduling() { let (mut cluster, client, ctx) = must_new_cluster_and_kv_client(); fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); // Flashback - let mut flashback_to_version_req = FlashbackToVersionRequest::default(); - flashback_to_version_req.set_context(ctx); - flashback_to_version_req.set_start_ts(1); - flashback_to_version_req.set_commit_ts(2); - flashback_to_version_req.version = 0; - flashback_to_version_req.start_key = b"a".to_vec(); - flashback_to_version_req.end_key = b"z".to_vec(); - let flashback_resp = client - .kv_flashback_to_version(&flashback_to_version_req) - .unwrap(); + let flashback_resp = must_flashback_to_version(&client, ctx, 0, 1, 2); assert!(!flashback_resp.has_region_error()); assert!(flashback_resp.get_error().is_empty()); // Try to transfer leader. @@ -754,6 +726,32 @@ fn test_mvcc_flashback_block_scheduling() { fail::remove("skip_finish_flashback_to_version"); } +#[test] +fn test_mvcc_flashback_unprepared() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + // Prewrite + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v.clone()); + must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 1); + // Commit + must_kv_commit(&client, ctx.clone(), vec![k.clone()], 1, 2, 2); + must_kv_read_equal(&client, ctx.clone(), k.clone(), v.clone(), 3); + // Try to flashback without preparing first. + let mut req = FlashbackToVersionRequest::default(); + req.set_context(ctx.clone()); + req.set_start_ts(4); + req.set_commit_ts(5); + req.version = 0; + req.start_key = b"a".to_vec(); + req.end_key = b"z".to_vec(); + let resp = client.kv_flashback_to_version(&req).unwrap(); + assert!(resp.get_region_error().has_flashback_not_prepared()); + must_kv_read_equal(&client, ctx, k, v, 6); +} + // raft related RPC is tested as parts of test_snapshot.rs, so skip here. #[test] From 39961b106722d17fe7b52c67d9b623f2168812a0 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 17 Oct 2022 18:43:53 +0800 Subject: [PATCH 0271/1149] raftstore: move check_flashback_state to after check_region_epoch (#13618) ref tikv/tikv#13303, ref pingcap/tidb#38475 Move `check_flashback_state` to after `check_region_epoch` to make sure the Region Cache on the client-side could be refreshed ASAP. Signed-off-by: JmPotato --- components/raftstore/src/store/fsm/peer.rs | 49 ++++++++++++---------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 30877f57263..57f5fe158f5 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4898,26 +4898,6 @@ where return Ok(Some(resp)); } - let region_id = self.region_id(); - if let Err(e) = util::check_flashback_state(self.fsm.peer.is_in_flashback, msg, region_id) { - match e { - Error::FlashbackInProgress(_) => self - .ctx - .raft_metrics - .invalid_proposal - .flashback_in_progress - .inc(), - Error::FlashbackNotPrepared(_) => self - .ctx - .raft_metrics - .invalid_proposal - .flashback_not_prepared - .inc(), - _ => unreachable!(), - } - return Err(e); - } - // Check whether the store has the right peer to handle the request. let leader_id = self.fsm.peer.leader_id(); let request = msg.get_requests(); @@ -4944,6 +4924,7 @@ where _ => read_only = false, } } + let region_id = self.region_id(); let allow_replica_read = read_only && msg.get_header().get_replica_read(); let flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); let allow_stale_read = read_only && flags.contains(WriteBatchFlags::STALE_READ); @@ -5005,11 +4986,33 @@ where let requested_version = msg.get_header().get_region_epoch().version; self.collect_sibling_region(requested_version, &mut new_regions); self.ctx.raft_metrics.invalid_proposal.epoch_not_match.inc(); - Err(Error::EpochNotMatch(m, new_regions)) + return Err(Error::EpochNotMatch(m, new_regions)); + } + Err(e) => return Err(e), + _ => {} + }; + // Check whether the region is in the flashback state and the request could be + // proposed. + if let Err(e) = util::check_flashback_state(self.fsm.peer.is_in_flashback, msg, region_id) { + match e { + Error::FlashbackInProgress(_) => self + .ctx + .raft_metrics + .invalid_proposal + .flashback_in_progress + .inc(), + Error::FlashbackNotPrepared(_) => self + .ctx + .raft_metrics + .invalid_proposal + .flashback_not_prepared + .inc(), + _ => unreachable!(), } - Err(e) => Err(e), - Ok(()) => Ok(None), + return Err(e); } + + Ok(None) } /// Proposes pending batch raft commands (if any), then proposes the From 571b5a263c7e84c2ab8aeb5feaebc8d50cae48cb Mon Sep 17 00:00:00 2001 From: haojinming Date: Mon, 17 Oct 2022 19:03:53 +0800 Subject: [PATCH 0272/1149] test: Fix incorrect rawkv case test_raw_put_key_guard (#13600) close tikv/tikv#13599 The logic in `test_raw_put_key_guard` is incorrect, fix it. Signed-off-by: haojinming Co-authored-by: Ping Yu Co-authored-by: Ti Chi Robot --- tests/failpoints/cases/test_rawkv.rs | 50 ++++++++++++++++++++++++- tests/failpoints/cases/test_storage.rs | 51 +------------------------- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index 547b6144c7c..274a458958e 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -1,8 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, time::Duration}; +use std::{sync::Arc, thread, time::Duration}; -use causal_ts::CausalTsProvider; +use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -103,6 +103,10 @@ impl TestSuite { .unwrap(); } + pub fn get_causal_ts_provider(&mut self, node_id: u64) -> Option> { + self.cluster.sim.rl().get_causal_ts_provider(node_id) + } + pub fn must_merge_region_by_key(&mut self, source_key: &[u8], target_key: &[u8]) { let source = self.cluster.get_region(source_key); let target = self.cluster.get_region(target_key); @@ -271,3 +275,45 @@ fn test_region_merge() { fail::remove(FP_GET_TSO); suite.stop(); } + +// Verify the raw key guard correctness in apiv2 +#[test] +fn test_raw_put_key_guard() { + let mut suite = TestSuite::new(3, ApiVersion::V2); + let pause_write_fp = "raftkv_async_write"; + + let test_key = b"rk3".to_vec(); + let test_value = b"v3".to_vec(); + + let region = suite.cluster.get_region(&test_key); + let region_id = region.get_id(); + let client = suite.get_client(region_id); + let ctx = suite.get_context(region_id); + let node_id = region.get_peers()[0].get_id(); + let leader_cm = suite.cluster.sim.rl().get_concurrency_manager(node_id); + let ts_provider = suite.get_causal_ts_provider(node_id).unwrap(); + let ts = block_on(ts_provider.async_get_ts()).unwrap(); + + let copy_test_key = test_key.clone(); + let copy_test_value = test_value.clone(); + let apply_wait_timeout = 2000; // ms, assume send request and apply can be finished in 2s. + fail::cfg(pause_write_fp, "pause").unwrap(); + let handle = thread::spawn(move || { + must_raw_put(&client, ctx, copy_test_key, copy_test_value); + }); + thread::sleep(Duration::from_millis(apply_wait_timeout)); + + // Before raw_put finish, min_ts should be the ts of "key guard" of the raw_put + // request. + assert_eq!(suite.must_raw_get(&test_key), None); + let min_ts = leader_cm.global_min_lock_ts(); + assert_eq!(min_ts.unwrap(), ts.next()); + + fail::remove(pause_write_fp); + handle.join().unwrap(); + + // After raw_put is finished, "key guard" is released. + assert_eq!(suite.must_raw_get(&test_key), Some(test_value)); + let min_ts = leader_cm.global_min_lock_ts(); + assert!(min_ts.is_none()); +} diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 45f5e16675c..ec38958ad57 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -11,7 +11,6 @@ use std::{ }; use api_version::{ApiV1, ApiV2, KvFormat}; -use causal_ts::CausalTsProvider; use collections::HashMap; use engine_traits::DummyFactory; use errors::{extract_key_error, extract_region_error}; @@ -19,8 +18,8 @@ use futures::executor::block_on; use grpcio::*; use kvproto::{ kvrpcpb::{ - self, ApiVersion, AssertionLevel, BatchRollbackRequest, CommandPri, CommitRequest, Context, - GetRequest, Op, PrewriteRequest, PrewriteRequestPessimisticAction::*, RawPutRequest, + self, AssertionLevel, BatchRollbackRequest, CommandPri, CommitRequest, Context, GetRequest, + Op, PrewriteRequest, PrewriteRequestPessimisticAction::*, RawPutRequest, }, tikvpb::TikvClient, }; @@ -1479,49 +1478,3 @@ fn test_raw_put_deadline() { assert!(!put_resp.has_region_error(), "{:?}", put_resp); must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); } - -#[test] -fn test_raw_put_key_guard() { - let api_version = ApiVersion::V2; - let pause_write_fp = "raftkv_async_write"; - let mut cluster = new_server_cluster_with_api_ver(0, 1, api_version); - cluster.run(); - let region = cluster.get_region(b""); - let leader = region.get_peers()[0].clone(); - let node_id = leader.get_id(); - let leader_cm = cluster.sim.rl().get_concurrency_manager(node_id); - let ts_provider = cluster.sim.rl().get_causal_ts_provider(node_id).unwrap(); - let ts = block_on(ts_provider.async_get_ts()).unwrap(); - - let env = Arc::new(Environment::new(1)); - let channel = - ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); - let client = TikvClient::new(channel); - - let mut ctx = Context::default(); - ctx.set_region_id(region.get_id()); - ctx.set_region_epoch(region.get_region_epoch().clone()); - ctx.set_peer(leader); - ctx.set_api_version(api_version); - let mut put_req = RawPutRequest::default(); - put_req.set_context(ctx); - put_req.key = b"rk3".to_vec(); - put_req.value = b"v3".to_vec(); - - fail::cfg(pause_write_fp, "pause").unwrap(); - let handle = thread::spawn(move || { - let _ = client.raw_put(&put_req).unwrap(); - }); - - thread::sleep(Duration::from_millis(100)); - must_get_none(&cluster.get_engine(1), b"rk3"); - let min_ts = leader_cm.global_min_lock_ts(); - assert_eq!(min_ts.unwrap(), ts.next()); - - fail::remove(pause_write_fp); - handle.join().unwrap(); - thread::sleep(Duration::from_millis(100)); - must_get_none(&cluster.get_engine(1), b"rk3"); - let min_ts = leader_cm.global_min_lock_ts(); - assert!(min_ts.is_none()); -} From 13f58a9b05500375b537ab9da58768051fa6fdfa Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 18 Oct 2022 14:27:53 +0800 Subject: [PATCH 0273/1149] tests: refine the flashback raftstore test (#13615) ref tikv/tikv#13303 Refine the flashback raftstore test. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/test_raftstore/src/cluster.rs | 16 +- etc/error_code.toml | 5 + .../integrations/raftstore/test_flashback.rs | 259 +++++------------- 3 files changed, 86 insertions(+), 194 deletions(-) diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index a5ce174c6d2..7a932d324f0 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1421,15 +1421,11 @@ impl Cluster { .unwrap(); } - pub fn block_send_flashback_msg( - &mut self, - region_id: u64, - store_id: u64, - cmd_type: AdminCmdType, - epoch: metapb::RegionEpoch, - peer: metapb::Peer, - ) { + pub fn must_send_flashback_msg(&mut self, region_id: u64, cmd_type: AdminCmdType) { self.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + let leader = self.leader_of_region(region_id).unwrap(); + let store_id = leader.get_store_id(); + let region_epoch = self.get_region_epoch(region_id); block_on(async move { let (result_tx, result_rx) = oneshot::channel(); let cb = Callback::write(Box::new(move |resp| { @@ -1446,8 +1442,8 @@ impl Cluster { admin.set_cmd_type(cmd_type); let mut req = RaftCmdRequest::default(); req.mut_header().set_region_id(region_id); - req.mut_header().set_region_epoch(epoch); - req.mut_header().set_peer(peer); + req.mut_header().set_region_epoch(region_epoch); + req.mut_header().set_peer(leader); req.set_admin_request(admin); req.mut_header() .set_flags(WriteBatchFlags::FLASHBACK.bits()); diff --git a/etc/error_code.toml b/etc/error_code.toml index 7a6b956449f..5cdd770f8d2 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -438,6 +438,11 @@ error = ''' KV:Raftstore:FlashbackInProgress ''' +["KV:Raftstore:FlashbackNotPrepared"] +error = ''' +KV:Raftstore:FlashbackNotPrepared +''' + ["KV:Raftstore:SnapAbort"] error = ''' KV:Raftstore:SnapAbort diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index be70e176f01..810da9d840f 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -1,13 +1,15 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::{Duration, Instant}; +use std::{ + thread::sleep, + time::{Duration, Instant}, +}; use kvproto::{ metapb, - raft_cmdpb::{CmdType, Request}, + raft_cmdpb::{AdminCmdType, CmdType, Request}, }; use test_raftstore::*; -use tikv_util::time::InstantExt; use txn_types::WriteBatchFlags; #[test] @@ -31,7 +33,7 @@ fn test_flashback_unprepared() { req.mut_header().set_peer(new_leader.unwrap()); req.mut_header() .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster.call_command(req, Duration::from_secs(5)).unwrap(); + let resp = cluster.call_command(req, Duration::from_secs(3)).unwrap(); assert!(resp.get_header().get_error().has_flashback_not_prepared()); } @@ -45,13 +47,7 @@ fn test_flashback_for_schedule() { // Prepare for flashback let region = cluster.get_region(b"k1"); - cluster.block_send_flashback_msg( - region.get_id(), - 1, - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - new_peer(1, 1), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // Verify the schedule is disabled. let mut region = cluster.get_region(b"k3"); @@ -70,18 +66,9 @@ fn test_flashback_for_schedule() { } ); - // Verify the schedule can be executed if add flashback flag in request's - // header. - must_transfer_leader(&mut cluster, region.get_id(), new_peer(2, 2)); - cluster.block_send_flashback_msg( - region.get_id(), - 2, - kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, - cluster.get_region_epoch(1), - new_peer(2, 2), - ); - // Transfer leader to (1, 1) - cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); + // Transfer leader to (2, 2) should succeed. + cluster.must_transfer_leader(1, new_peer(2, 2)); } #[test] @@ -96,31 +83,19 @@ fn test_flashback_for_write() { // Prepare for flashback let region = cluster.get_region(b"k1"); - cluster.block_send_flashback_msg( - region.get_id(), - 1, - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - new_peer(1, 1), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // Write will be blocked let value = vec![1_u8; 8096]; must_get_error_flashback_in_progress(&mut cluster, ®ion, new_put_cmd(b"k1", &value)); - - must_cmd_add_flashback_flag( + // Write with flashback flag will succeed + must_do_cmd_with_flashback_flag( &mut cluster, &mut region.clone(), new_put_cmd(b"k1", &value), ); - cluster.block_send_flashback_msg( - region.get_id(), - 1, - kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, - cluster.get_region_epoch(1), - new_peer(1, 1), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); } @@ -139,32 +114,20 @@ fn test_flashback_for_read() { // Prepare for flashback let region = cluster.get_region(b"k1"); - cluster.block_send_flashback_msg( - region.get_id(), - 1, - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - new_peer(1, 1), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // read will be blocked must_get_error_flashback_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); // Verify the read can be executed if add flashback flag in request's // header. - must_cmd_add_flashback_flag( + must_do_cmd_with_flashback_flag( &mut cluster, &mut region.clone(), new_get_cf_cmd("write", b"k1"), ); - cluster.block_send_flashback_msg( - region.get_id(), - 1, - kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, - cluster.get_region_epoch(1), - new_peer(1, 1), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); } @@ -180,9 +143,8 @@ fn test_flashback_for_local_read() { // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; - let node_id = 3u64; - let store_id = 3u64; - let peer = new_peer(store_id, node_id); + let store_id = 3; + let peer = new_peer(store_id, 3); cluster.run(); cluster.must_put(b"k1", b"v1"); @@ -193,25 +155,19 @@ fn test_flashback_for_local_read() { let state = cluster.raft_local_state(region.get_id(), store_id); let last_index = state.get_last_index(); // Make sure the leader transfer procedure timeouts. - std::thread::sleep(election_timeout * 2); + sleep(election_timeout * 2); must_read_on_peer(&mut cluster, peer.clone(), region.clone(), b"k1", b"v1"); // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index); // Prepare for flashback - cluster.block_send_flashback_msg( - region.get_id(), - store_id, - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - new_peer(store_id, store_id), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 1); - // Wait for apply_res to set leader lease . + // Wait for apply_res to set leader lease. sleep_ms(500); must_error_read_on_peer( @@ -224,7 +180,7 @@ fn test_flashback_for_local_read() { // Wait for the leader's lease to expire to ensure that a renew lease interval // has elapsed. - std::thread::sleep(election_timeout * 2); + sleep(election_timeout * 2); must_error_read_on_peer( &mut cluster, peer.clone(), @@ -237,13 +193,7 @@ fn test_flashback_for_local_read() { let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 1); - cluster.block_send_flashback_msg( - region.get_id(), - store_id, - kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, - cluster.get_region_epoch(1), - new_peer(store_id, store_id), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 2); @@ -252,7 +202,7 @@ fn test_flashback_for_local_read() { let state = cluster.raft_local_state(region.get_id(), store_id); let last_index = state.get_last_index(); // Make sure the leader transfer procedure timeouts. - std::thread::sleep(election_timeout * 2); + sleep(election_timeout * 2); must_read_on_peer(&mut cluster, peer, region.clone(), b"k1", b"v1"); // Check the leader does a local read. @@ -267,13 +217,7 @@ fn test_flashback_for_status_cmd_as_region_detail() { let leader = cluster.leader_of_region(1).unwrap(); let region = cluster.get_region(b"k1"); - cluster.block_send_flashback_msg( - region.get_id(), - leader.get_store_id(), - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - new_peer(leader.get_store_id(), leader.get_store_id()), - ); + cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); let region_detail = cluster.region_detail(region.get_id(), leader.get_store_id()); assert!(region_detail.has_region()); @@ -295,128 +239,75 @@ fn test_flashback_for_check_is_in_persist() { let mut cluster = new_node_cluster(0, 3); cluster.run(); - let leader_peer = new_peer(2, 2); - cluster.must_transfer_leader(1, leader_peer.clone()); - - let local_state = cluster.region_local_state(1, 2); - assert!(!local_state.get_region().get_is_in_flashback()); + cluster.must_transfer_leader(1, new_peer(2, 2)); + must_check_flashback_state(&mut cluster, 1, 2, false); // Prepare for flashback - cluster.block_send_flashback_msg( - 1, - 2, - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - leader_peer.clone(), - ); - let local_state = cluster.region_local_state(1, 2); - assert!(local_state.get_region().get_is_in_flashback()); - - cluster.block_send_flashback_msg( - 1, - 2, - kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, - cluster.get_region_epoch(1), - leader_peer, - ); - let local_state = cluster.region_local_state(1, 2); - assert!(!local_state.get_region().get_is_in_flashback()); + cluster.must_send_flashback_msg(1, AdminCmdType::PrepareFlashback); + must_check_flashback_state(&mut cluster, 1, 2, true); + + cluster.must_send_flashback_msg(1, AdminCmdType::FinishFlashback); + must_check_flashback_state(&mut cluster, 1, 2, false); } #[test] fn test_flashback_for_apply_snapshot() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_node_cluster(0, 3); + configure_for_snapshot(&mut cluster); cluster.run(); + + cluster.must_transfer_leader(1, new_peer(3, 3)); cluster.must_transfer_leader(1, new_peer(1, 1)); - // Make node3 isolated - cluster.add_send_filter(IsolationFilterFactory::new(5)); + must_check_flashback_state(&mut cluster, 1, 1, false); + must_check_flashback_state(&mut cluster, 1, 3, false); - let local_state = cluster.region_local_state(1, 1); - assert!(!local_state.get_region().get_is_in_flashback()); - let local_state = cluster.region_local_state(1, 5); - assert!(!local_state.get_region().get_is_in_flashback()); + // Make store 3 isolated. + cluster.add_send_filter(IsolationFilterFactory::new(3)); - // Write for cluster - let value = vec![1_u8; 8096]; - multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); + // Write some data to trigger snapshot. + for i in 100..110 { + let key = format!("k{}", i); + let value = format!("v{}", i); + cluster.must_put_cf("write", key.as_bytes(), value.as_bytes()); + } // Prepare for flashback - cluster.block_send_flashback_msg( - 1, - 1, - kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, - cluster.get_region_epoch(1), - new_peer(1, 1), - ); - let local_state = cluster.region_local_state(1, 1); - assert!(local_state.get_region().get_is_in_flashback()); - let local_state = cluster.region_local_state(1, 5); - assert!(!local_state.get_region().get_is_in_flashback()); + cluster.must_send_flashback_msg(1, AdminCmdType::PrepareFlashback); + must_check_flashback_state(&mut cluster, 1, 1, true); + must_check_flashback_state(&mut cluster, 1, 3, false); - // Add node 3 back. + // Add store 3 back. cluster.clear_send_filters(); - // Wait for snapshot - sleep_ms(500); - - must_transfer_leader(&mut cluster, 1, new_peer(5, 5)); - let local_state = cluster.region_local_state(1, 5); - assert!(local_state.get_region().get_is_in_flashback()); - - cluster.block_send_flashback_msg( - 1, - 5, - kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, - cluster.get_region_epoch(1), - new_peer(5, 5), - ); + must_check_flashback_state(&mut cluster, 1, 1, true); + must_check_flashback_state(&mut cluster, 1, 3, true); - // Wait for applying - sleep_ms(500); - - let local_state = cluster.region_local_state(1, 5); - assert!(!local_state.get_region().get_is_in_flashback()); - let local_state = cluster.region_local_state(1, 1); - assert!(!local_state.get_region().get_is_in_flashback()); -} - -fn transfer_leader(cluster: &mut Cluster, region_id: u64, leader: metapb::Peer) { - let epoch = cluster.get_region_epoch(region_id); - let admin_req = new_transfer_leader_cmd(leader); - let mut transfer_leader = new_admin_request(region_id, &epoch, admin_req); - transfer_leader - .mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster - .call_command_on_leader(transfer_leader, Duration::from_secs(5)) - .unwrap(); - assert!(!resp.get_header().has_error()); + cluster.must_send_flashback_msg(1, AdminCmdType::FinishFlashback); + must_check_flashback_state(&mut cluster, 1, 1, false); + must_check_flashback_state(&mut cluster, 1, 3, false); } -fn must_transfer_leader( - cluster: &mut Cluster, +fn must_check_flashback_state( + cluster: &mut Cluster, region_id: u64, - leader: metapb::Peer, + store_id: u64, + is_in_flashback: bool, ) { - let timer = Instant::now(); - loop { - cluster.reset_leader_of_region(region_id); - let cur_leader = cluster.leader_of_region(region_id); - if let Some(ref cur_leader) = cur_leader { - if cur_leader.get_id() == leader.get_id() - && cur_leader.get_store_id() == leader.get_store_id() - { - return; - } - } - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!( - "failed to transfer leader to [{}] {:?}, current leader: {:?}", - region_id, leader, cur_leader - ); + let mut now = Instant::now(); + let timeout = Duration::from_secs(3); + let deadline = now + timeout; + while now < deadline { + let local_state = cluster.region_local_state(region_id, store_id); + if local_state.get_region().get_is_in_flashback() == is_in_flashback { + return; } - transfer_leader(cluster, region_id, leader.clone()); + sleep(Duration::from_millis(10)); + now = Instant::now(); } + panic!( + "region {} on store {} flashback state unmatched, want: {}", + region_id, store_id, is_in_flashback, + ); } fn multi_do_cmd(cluster: &mut Cluster, cmd: kvproto::raft_cmdpb::Request) { @@ -429,7 +320,7 @@ fn multi_do_cmd(cluster: &mut Cluster, cmd: kvproto::raft_cmdpb } } -fn must_cmd_add_flashback_flag( +fn must_do_cmd_with_flashback_flag( cluster: &mut Cluster, region: &mut metapb::Region, cmd: kvproto::raft_cmdpb::Request, @@ -446,7 +337,7 @@ fn must_cmd_add_flashback_flag( req.mut_header().set_peer(new_leader.unwrap()); req.mut_header() .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster.call_command(req, Duration::from_secs(5)).unwrap(); + let resp = cluster.call_command(req, Duration::from_secs(3)).unwrap(); assert!(!resp.get_header().has_error()); } From ed64ed21cfa1c734191549a0db66986dab04f4bd Mon Sep 17 00:00:00 2001 From: hehechen Date: Tue, 18 Oct 2022 16:47:53 +0800 Subject: [PATCH 0274/1149] resolved_ts: track 1PC (#13579) close tikv/tikv#13353 Signed-off-by: hehechen Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/util.rs | 2 +- components/resolved_ts/src/endpoint.rs | 4 +- .../resolved_ts/tests/failpoints/mod.rs | 6 +-- .../resolved_ts/tests/integrations/mod.rs | 25 +++++++++- components/resolved_ts/tests/mod.rs | 5 ++ .../cases/test_replica_stale_read.rs | 49 +++++++++++++++++++ 6 files changed, 84 insertions(+), 7 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index a21eb7756e2..5f2c6615527 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -903,7 +903,7 @@ impl RegionReadProgressRegistry { .lock() .unwrap() .get(region_id) - .map(|rp| rp.core.lock().unwrap().applied_index) + .map(|rp| rp.core.lock().unwrap().read_state.idx) } // NOTICE: this function is an alias of `get_safe_ts` to distinguish the diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 76202240a10..a79ff66e384 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -191,7 +191,9 @@ impl ObserveRegion { .resolver .untrack_lock(&key.to_raw().unwrap(), Some(*index)), // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => {} + ChangeRow::OnePc { .. } => { + self.resolver.update_tracked_index(*index); + } ChangeRow::IngestSsT => { self.resolver.update_tracked_index(*index); } diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index ab4e88f9d25..808f5ed62ff 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -22,7 +22,7 @@ fn test_check_leader_timeout() { mutation.set_op(Op::Put); mutation.key = k.to_vec(); mutation.value = v.to_vec(); - suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); suite .cluster .must_transfer_leader(region.id, new_peer(1, 1)); @@ -78,7 +78,7 @@ fn test_report_min_resolved_ts() { mutation.set_op(Op::Put); mutation.key = k.to_vec(); mutation.value = v.to_vec(); - suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); // Commit let commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); @@ -112,7 +112,7 @@ fn test_report_min_resolved_ts_disable() { mutation.set_op(Op::Put); mutation.key = k.to_vec(); mutation.value = v.to_vec(); - suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); // Commit let commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index a8acab00625..da28758a5d2 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -19,12 +19,12 @@ fn test_resolved_ts_basic() { // Prewrite let (k, v) = (b"k1", b"v"); - let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); let mut mutation = Mutation::default(); mutation.set_op(Op::Put); mutation.key = k.to_vec(); mutation.value = v.to_vec(); - suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); // The `resolved-ts` won't be updated due to there is lock on the region, // the `resolved-ts` may not be the `start_ts` of the lock if the `resolved-ts` @@ -81,6 +81,27 @@ fn test_resolved_ts_basic() { } assert!(tracked_index_after > tracked_index_before); + // 1PC + let tracked_index_before = suite.region_tracked_index(r1.id); + + start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let (k, v) = (b"k2", b"v"); + let mut mutation_1pc = Mutation::default(); + mutation_1pc.set_op(Op::Put); + mutation_1pc.key = k.to_vec(); + mutation_1pc.value = v.to_vec(); + suite.must_kv_prewrite(r1.id, vec![mutation_1pc], k.to_vec(), start_ts, true); + + tracked_index_after = suite.region_tracked_index(r1.id); + for _ in 0..10 { + if tracked_index_after > tracked_index_before { + break; + } + tracked_index_after = suite.region_tracked_index(r1.id); + sleep_ms(200) + } + assert!(tracked_index_after > tracked_index_before); + suite.stop(); } diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index cd95b1e911d..376aa216224 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -131,6 +131,7 @@ impl TestSuite { muts: Vec, pk: Vec, ts: TimeStamp, + try_one_pc: bool, ) { let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(self.get_context(region_id)); @@ -138,6 +139,7 @@ impl TestSuite { prewrite_req.primary_lock = pk; prewrite_req.start_version = ts.into_inner(); prewrite_req.lock_ttl = prewrite_req.start_version + 1; + prewrite_req.try_one_pc = try_one_pc; let prewrite_resp = self .get_tikv_client(region_id) .kv_prewrite(&prewrite_req) @@ -152,6 +154,9 @@ impl TestSuite { "{:?}", prewrite_resp.get_errors() ); + if try_one_pc { + assert_ne!(prewrite_resp.get_one_pc_commit_ts(), 0); + } } pub fn must_kv_commit( diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index 7748ed73b96..3dc7223ae41 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -84,6 +84,55 @@ fn test_stale_read_basic_flow_replicate() { follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), get_tso(&pd_client)); } +// Similar to test_stale_read_basic_flow_replicate, but we use 1pc to update. +#[test] +fn test_stale_read_1pc_flow_replicate() { + let (mut cluster, pd_client, mut leader_client) = prepare_for_stale_read(new_peer(1, 1)); + let mut follower_client2 = PeerClient::new(&cluster, 1, new_peer(2, 2)); + // Set the `stale_read` flag + leader_client.ctx.set_stale_read(true); + follower_client2.ctx.set_stale_read(true); + + let commit_ts1 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"key1"[..], &b"value1"[..])], + b"key1".to_vec(), + ); + + // Can read `value1` with the newest ts + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); + + // Stop replicate data to follower 2 + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 2) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppend), + )); + // Update `key1` + leader_client.must_kv_prewrite_one_pc( + vec![new_mutation(Op::Put, &b"key1"[..], &b"value2"[..])], + b"key1".to_vec(), + get_tso(&pd_client), + ); + let read_ts = get_tso(&pd_client); + // wait for advance_resolved_ts. + sleep_ms(200); + // Follower 2 can still read `value1`, but can not read `value2` due + // to it don't have enough data + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); + let resp1 = follower_client2.kv_read(b"key1".to_vec(), read_ts); + assert!(resp1.get_region_error().has_data_is_not_ready()); + + // Leader have up to date data so it can read `value2` + leader_client.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), get_tso(&pd_client)); + + // clear the `MsgAppend` filter + cluster.clear_send_filters(); + + // Now we can read `value2` with the newest ts + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), get_tso(&pd_client)); +} + // Testing how mvcc locks could effect stale read service #[test] fn test_stale_read_basic_flow_lock() { From a63944c4c6f51bebaee2ce4be99cfc1f7d3947c9 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 18 Oct 2022 22:03:53 +0800 Subject: [PATCH 0275/1149] *: fix build with panic engines (#13585) ref tikv/tikv#13131 None Signed-off-by: tabokie --- Cargo.lock | 8 +- Cargo.toml | 159 ++++++--- Makefile | 2 +- cmd/tikv-ctl/Cargo.toml | 42 +-- cmd/tikv-server/Cargo.toml | 4 +- components/api_version/Cargo.toml | 12 +- components/backup-stream/Cargo.toml | 46 +-- components/backup/Cargo.toml | 48 +-- components/batch-system/Cargo.toml | 10 +- components/causal_ts/Cargo.toml | 20 +- components/cdc/Cargo.toml | 46 +-- components/cloud/Cargo.toml | 4 +- components/cloud/aws/Cargo.toml | 20 +- components/cloud/azure/Cargo.toml | 6 +- components/cloud/gcp/Cargo.toml | 6 +- components/codec/Cargo.toml | 6 +- components/collections/Cargo.toml | 2 +- components/concurrency_manager/Cargo.toml | 6 +- components/encryption/Cargo.toml | 14 +- components/encryption/export/Cargo.toml | 14 +- components/encryption/export/examples/ecli.rs | 2 +- components/engine_panic/Cargo.toml | 10 +- components/engine_rocks/Cargo.toml | 26 +- components/engine_rocks/src/engine.rs | 14 +- components/engine_rocks_helper/Cargo.toml | 16 +- components/engine_test/Cargo.toml | 18 +- components/engine_test/src/lib.rs | 20 +- components/engine_tirocks/Cargo.toml | 20 +- components/engine_traits/Cargo.toml | 16 +- components/engine_traits_tests/Cargo.toml | 8 +- components/error_code/Cargo.toml | 4 +- components/external_storage/Cargo.toml | 12 +- components/external_storage/export/Cargo.toml | 28 +- .../external_storage/export/examples/scli.rs | 25 +- components/file_system/Cargo.toml | 8 +- components/into_other/Cargo.toml | 2 +- components/keys/Cargo.toml | 6 +- components/log_wrappers/Cargo.toml | 2 +- components/pd_client/Cargo.toml | 16 +- components/profiler/Cargo.toml | 2 +- components/raft_log_engine/Cargo.toml | 12 +- components/raftstore-v2/Cargo.toml | 30 +- .../raftstore-v2/src/operation/query/local.rs | 6 +- components/raftstore/Cargo.toml | 56 ++-- components/raftstore/src/store/worker/read.rs | 4 +- components/resolved_ts/Cargo.toml | 36 +-- components/resource_metering/Cargo.toml | 8 +- components/security/Cargo.toml | 8 +- components/server/Cargo.toml | 56 ++-- components/server/src/lib.rs | 3 + components/server/src/server.rs | 17 +- components/snap_recovery/Cargo.toml | 26 +- components/sst_importer/Cargo.toml | 30 +- components/test_backup/Cargo.toml | 26 +- components/test_coprocessor/Cargo.toml | 22 +- .../example_plugin/Cargo.toml | 4 +- components/test_pd/Cargo.toml | 10 +- components/test_pd_client/Cargo.toml | 14 +- components/test_raftstore/Cargo.toml | 48 +-- components/test_sst_importer/Cargo.toml | 6 +- components/test_storage/Cargo.toml | 18 +- components/test_util/Cargo.toml | 10 +- components/tidb_query_aggr/Cargo.toml | 14 +- components/tidb_query_common/Cargo.toml | 6 +- components/tidb_query_datatype/Cargo.toml | 14 +- components/tidb_query_executors/Cargo.toml | 20 +- components/tidb_query_expr/Cargo.toml | 20 +- components/tikv_kv/Cargo.toml | 32 +- components/tikv_kv/src/cursor.rs | 20 +- components/tikv_util/Cargo.toml | 18 +- components/tipb_helper/Cargo.toml | 4 +- components/tracker/Cargo.toml | 2 +- components/txn_types/Cargo.toml | 14 +- fuzz/fuzzer-afl/Cargo.toml | 2 +- fuzz/fuzzer-honggfuzz/Cargo.toml | 2 +- fuzz/fuzzer-libfuzzer/Cargo.toml | 2 +- fuzz/targets/Cargo.toml | 4 +- scripts/check-bins.py | 2 +- scripts/clippy | 2 +- scripts/clippy-all | 2 +- src/config.rs | 2 +- src/server/engine_factory_v2.rs | 3 +- src/server/gc_worker/gc_worker.rs | 302 ++++++++---------- src/storage/txn/commands/prewrite.rs | 3 + tests/Cargo.toml | 90 +++--- 85 files changed, 915 insertions(+), 845 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 97c6209b2d7..14951b8e253 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1673,7 +1673,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7531096570974c3a9dcf9e4b8e1cede1ec26cf5046219fb3b9d897503b9be59" [[package]] -name = "example_plugin" +name = "example_coprocessor_plugin" version = "0.1.0" dependencies = [ "coprocessor_plugin_api", @@ -4101,7 +4101,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#5f718cfe50a28f7fee0282c0959670de5962eec8" +source = "git+https://github.com/tikv/raft-engine.git#a0d29980f1448565a6d03f911ebb103c4266f1f4" dependencies = [ "byteorder", "crc32fast", @@ -4135,7 +4135,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#5f718cfe50a28f7fee0282c0959670de5962eec8" +source = "git+https://github.com/tikv/raft-engine.git#a0d29980f1448565a6d03f911ebb103c4266f1f4" dependencies = [ "clap 3.1.6", "env_logger", @@ -6181,7 +6181,7 @@ dependencies = [ "engine_traits", "engine_traits_tests", "error_code", - "example_plugin", + "example_coprocessor_plugin", "fail", "file_system", "flate2", diff --git a/Cargo.toml b/Cargo.toml index c38b98631c8..786b229df3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,32 +64,32 @@ name = "tikv" [dependencies] anyhow = "1.0" -api_version = { path = "components/api_version", default-features = false } +api_version = { workspace = true } async-stream = "0.2" async-trait = "0.1" backtrace = "0.3" -batch-system = { path = "components/batch-system", default-features = false } +batch-system = { workspace = true } byteorder = "1.2" -case_macros = { path = "components/case_macros" } -causal_ts = { path = "components/causal_ts" } +case_macros = { workspace = true } +causal_ts = { workspace = true } chrono = "0.4" -codec = { path = "components/codec", default-features = false } -collections = { path = "components/collections" } -concurrency_manager = { path = "components/concurrency_manager", default-features = false } -coprocessor_plugin_api = { path = "components/coprocessor_plugin_api" } +codec = { workspace = true } +collections = { workspace = true } +concurrency_manager = { workspace = true } +coprocessor_plugin_api = { workspace = true } crc32fast = "1.2" crc64fast = "0.1" crossbeam = "0.8" dashmap = "5" -encryption_export = { path = "components/encryption/export", default-features = false } -engine_panic = { path = "components/engine_panic", default-features = false } -engine_rocks = { path = "components/engine_rocks", default-features = false } -engine_test = { path = "components/engine_test", default-features = false } -engine_traits = { path = "components/engine_traits", default-features = false } -engine_traits_tests = { path = "components/engine_traits_tests", default-features = false } -error_code = { path = "components/error_code", default-features = false } +encryption_export = { workspace = true } +engine_panic = { workspace = true } +engine_rocks = { workspace = true } +engine_test = { workspace = true } +engine_traits = { workspace = true } +engine_traits_tests = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "components/file_system", default-features = false } +file_system = { workspace = true } flate2 = { version = "1.0", default-features = false, features = ["zlib"] } futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" @@ -103,18 +103,18 @@ hex = "0.4" http = "0" hyper = { version = "0.14", features = ["full"] } hyper-tls = "0.5" -into_other = { path = "components/into_other", default-features = false } +into_other = { workspace = true } itertools = "0.10" keyed_priority_queue = "0.4" -keys = { path = "components/keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" libc = "0.2" libloading = "0.7" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -log_wrappers = { path = "components/log_wrappers" } +log_wrappers = { workspace = true } match-template = "0.0.1" -memory_trace_macros = { path = "components/memory_trace_macros" } +memory_trace_macros = { workspace = true } mime = "0.3.13" more-asserts = "0.2" murmur3 = "0.5.1" @@ -122,11 +122,11 @@ nom = { version = "5.1.0", default-features = false, features = ["std"] } notify = "4" num-traits = "0.2.14" num_cpus = "1" -online_config = { path = "components/online_config" } +online_config = { workspace = true } openssl = "0.10" parking_lot = "0.12" paste = "1.0" -pd_client = { path = "components/pd_client", default-features = false } +pd_client = { workspace = true } pin-project = "1.0" pnet_datalink = "0.23" pprof = { git = "https://github.com/tikv/pprof-rs.git", rev = "3fed55af8fc6cf69dbd954a0321c799c5a111e4e", default-features = false, features = ["flamegraph", "protobuf-codec"] } @@ -134,14 +134,14 @@ prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raft_log_engine = { path = "components/raft_log_engine", default-features = false } -raftstore = { path = "components/raftstore", default-features = false, features = ["engine_rocks"] } +raft_log_engine = { workspace = true } +raftstore = { workspace = true, features = ["engine_rocks"] } rand = "0.7.3" regex = "1.3" -resource_metering = { path = "components/resource_metering" } +resource_metering = { workspace = true } rev_lines = "0.2.1" seahash = "4.1.0" -security = { path = "components/security", default-features = false } +security = { workspace = true } semver = "0.11" serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" @@ -150,41 +150,41 @@ serde_json = { version = "1.0", features = ["preserve_order"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } smallvec = "1.4" -sst_importer = { path = "components/sst_importer", default-features = false } +sst_importer = { workspace = true } strum = { version = "0.20", features = ["derive"] } sync_wrapper = "0.1.1" sysinfo = "0.16" tempfile = "3.0" thiserror = "1.0" -tidb_query_aggr = { path = "components/tidb_query_aggr", default-features = false } -tidb_query_common = { path = "components/tidb_query_common", default-features = false } -tidb_query_datatype = { path = "components/tidb_query_datatype", default-features = false } -tidb_query_executors = { path = "components/tidb_query_executors", default-features = false } -tidb_query_expr = { path = "components/tidb_query_expr", default-features = false } -tikv_alloc = { path = "components/tikv_alloc" } -tikv_kv = { path = "components/tikv_kv", default-features = false } -tikv_util = { path = "components/tikv_util", default-features = false } +tidb_query_aggr = { workspace = true } +tidb_query_common = { workspace = true } +tidb_query_datatype = { workspace = true } +tidb_query_executors = { workspace = true } +tidb_query_expr = { workspace = true } +tikv_alloc = { workspace = true } +tikv_kv = { workspace = true } +tikv_util = { workspace = true } time = "0.1" tipb = { git = "https://github.com/pingcap/tipb.git" } tokio = { version = "1.17", features = ["full"] } tokio-openssl = "0.6" tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } toml = "0.5" -tracker = { path = "components/tracker" } -txn_types = { path = "components/txn_types", default-features = false } +tracker = { workspace = true } +txn_types = { workspace = true } url = "2" uuid = { version = "0.8.1", features = ["serde", "v4"] } walkdir = "2" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] -api_version = { path = "components/api_version", features = ["testexport"] } -example_plugin = { path = "components/test_coprocessor_plugin/example_plugin" } # should be a binary dependency +api_version = { workspace = true, features = ["testexport"] } +example_coprocessor_plugin = { workspace = true } # should be a binary dependency hyper-openssl = "0.9" -panic_hook = { path = "components/panic_hook" } +panic_hook = { workspace = true } reqwest = { version = "0.11", features = ["blocking"] } -test_sst_importer = { path = "components/test_sst_importer", default-features = false } -test_util = { path = "components/test_util", default-features = false } +test_sst_importer = { workspace = true } +test_util = { workspace = true } tokio = { version = "1.17", features = ["macros", "rt-multi-thread", "time"] } zipf = "6.1.0" @@ -241,7 +241,6 @@ members = [ "components/codec", "components/collections", "components/concurrency_manager", - "components/concurrency_manager", "components/coprocessor_plugin_api", "components/encryption", "components/encryption/export", @@ -259,11 +258,12 @@ members = [ "components/online_config", "components/panic_hook", "components/pd_client", + "components/profiler", "components/raftstore", "components/raftstore-v2", "components/resolved_ts", "components/resource_metering", - "components/server", + "components/security", "components/server", "components/snap_recovery", "components/sst_importer", @@ -283,6 +283,7 @@ members = [ "components/tidb_query_executors", "components/tidb_query_expr", "components/tikv_alloc", + "components/tikv_kv", "components/tikv_util", "components/tipb_helper", "components/tracker", @@ -295,6 +296,76 @@ members = [ ] default-members = ["cmd/tikv-server", "cmd/tikv-ctl"] +[workspace.dependencies] +api_version = { path = "components/api_version" } +aws = { path = "components/cloud/aws" } +azure = { path = "components/cloud/azure" } +backup = { path = "components/backup", default-features = false } +backup-stream = { path = "components/backup-stream", default-features = false } +batch-system = { path = "components/batch-system" } +case_macros = { path = "components/case_macros" } +causal_ts = { path = "components/causal_ts" } +cdc = { path = "components/cdc", default-features = false } +cloud = { path = "components/cloud" } +codec = { path = "components/codec" } +collections = { path = "components/collections" } +concurrency_manager = { path = "components/concurrency_manager" } +coprocessor_plugin_api = { path = "components/coprocessor_plugin_api" } +encryption = { path = "components/encryption" } +encryption_export = { path = "components/encryption/export" } +engine_panic = { path = "components/engine_panic" } +engine_rocks = { path = "components/engine_rocks" } +engine_rocks_helper = { path = "components/engine_rocks_helper" } +engine_test = { path = "components/engine_test", default-features = false } +engine_traits = { path = "components/engine_traits" } +engine_traits_tests = { path = "components/engine_traits_tests", default-features = false } +error_code = { path = "components/error_code" } +external_storage = { path = "components/external_storage" } +external_storage_export = { path = "components/external_storage/export" } +file_system = { path = "components/file_system" } +gcp = { path = "components/cloud/gcp" } +into_other = { path = "components/into_other" } +keys = { path = "components/keys" } +log_wrappers = { path = "components/log_wrappers" } +memory_trace_macros = { path = "components/memory_trace_macros" } +online_config = { path = "components/online_config" } +panic_hook = { path = "components/panic_hook" } +pd_client = { path = "components/pd_client" } +profiler = { path = "components/profiler" } +raft_log_engine = { path = "components/raft_log_engine" } +raftstore = { path = "components/raftstore", default-features = false } +raftstore_v2 = { path = "components/raftstore-v2", default-features = false } +resolved_ts = { path = "components/resolved_ts" } +resource_metering = { path = "components/resource_metering" } +security = { path = "components/security" } +server = { path = "components/server" } +snap_recovery = { path = "components/snap_recovery" } +sst_importer = { path = "components/sst_importer" } +test_backup = { path = "components/test_backup" } +test_coprocessor = { path = "components/test_coprocessor", default-features = false } +example_coprocessor_plugin = { path = "components/test_coprocessor_plugin/example_plugin" } +test_pd = { path = "components/test_pd" } +test_pd_client = { path = "components/test_pd_client" } +test_raftstore = { path = "components/test_raftstore", default-features = false } +test_sst_importer = { path = "components/test_sst_importer" } +test_storage = { path = "components/test_storage", default-features = false } +test_util = { path = "components/test_util" } +tidb_query_aggr = { path = "components/tidb_query_aggr" } +tidb_query_codegen = { path = "components/tidb_query_codegen" } +tidb_query_common = { path = "components/tidb_query_common" } +tidb_query_datatype = { path = "components/tidb_query_datatype" } +tidb_query_executors = { path = "components/tidb_query_executors" } +tidb_query_expr = { path = "components/tidb_query_expr" } +tikv = { path = ".", default-features = false } +tikv_alloc = { path = "components/tikv_alloc" } +tikv_kv = { path = "components/tikv_kv", default-features = false } +tikv_util = { path = "components/tikv_util" } +tipb_helper = { path = "components/tipb_helper" } +tracker = { path = "components/tracker" } +txn_types = { path = "components/txn_types" } +# External libs +grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } + [profile.dev.package.grpcio-sys] debug = false opt-level = 1 diff --git a/Makefile b/Makefile index 3229a307e7f..f60fb16bcb0 100644 --- a/Makefile +++ b/Makefile @@ -334,7 +334,7 @@ pre-format: unset-override format: pre-format @cargo fmt - @cargo sort -w ./Cargo.toml ./*/Cargo.toml components/*/Cargo.toml cmd/*/Cargo.toml >/dev/null + @cargo sort -w >/dev/null doc: @cargo doc --workspace --document-private-items \ diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 13d8b351e21..3b2d1dd2f75 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -45,49 +45,49 @@ test-engines-panic = [ nortcheck = ["engine_rocks/nortcheck"] [dependencies] -backup = { path = "../../components/backup", default-features = false } -cdc = { path = "../../components/cdc", default-features = false } +backup = { workspace = true } +cdc = { workspace = true } chrono = "0.4" clap = "2.32" -collections = { path = "../../components/collections" } -concurrency_manager = { path = "../../components/concurrency_manager", default-features = false } +collections = { workspace = true } +concurrency_manager = { workspace = true } crossbeam = "0.8" -encryption_export = { path = "../../components/encryption/export", default-features = false } -engine_rocks = { path = "../../components/engine_rocks", default-features = false } -engine_traits = { path = "../../components/engine_traits", default-features = false } -error_code = { path = "../../components/error_code", default-features = false } -file_system = { path = "../../components/file_system", default-features = false } +encryption_export = { workspace = true } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } +file_system = { workspace = true } futures = "0.3" gag = "1.0" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } hex = "0.4" -keys = { path = "../../components/keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -log_wrappers = { path = "../../components/log_wrappers" } -pd_client = { path = "../../components/pd_client", default-features = false } +log_wrappers = { workspace = true } +pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } -raft_log_engine = { path = "../../components/raft_log_engine", default-features = false } -raftstore = { path = "../../components/raftstore", default-features = false } +raft_log_engine = { workspace = true } +raftstore = { workspace = true } rand = "0.8" regex = "1" -security = { path = "../../components/security", default-features = false } +security = { workspace = true } serde_json = "1.0" -server = { path = "../../components/server" } +server = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } structopt = "0.3" tempfile = "3.0" -tikv = { path = "../../", default-features = false } -tikv_alloc = { path = "../../components/tikv_alloc" } -tikv_util = { path = "../../components/tikv_util", default-features = false } +tikv = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "time"] } toml = "0.5" -txn_types = { path = "../../components/txn_types", default-features = false } +txn_types = { workspace = true } [build-dependencies] cc = "1.0" diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 9b1aa869037..c5b5cb6403c 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -33,8 +33,8 @@ pprof-fp = ["tikv/pprof-fp"] [dependencies] clap = "2.32" serde_json = { version = "1.0", features = ["preserve_order"] } -server = { path = "../../components/server", default-features = false } -tikv = { path = "../../", default-features = false } +server = { workspace = true } +tikv = { workspace = true } toml = "0.5" [build-dependencies] diff --git a/components/api_version/Cargo.toml b/components/api_version/Cargo.toml index e2d4beaacbf..421c01a1514 100644 --- a/components/api_version/Cargo.toml +++ b/components/api_version/Cargo.toml @@ -9,14 +9,14 @@ testexport = [] [dependencies] bitflags = "1.0.1" -codec = { path = "../codec", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } +codec = { workspace = true } +engine_traits = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } match-template = "0.0.1" thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } -txn_types = { path = "../txn_types", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } +txn_types = { workspace = true } [dev-dependencies] -panic_hook = { path = "../panic_hook" } +panic_hook = { workspace = true } diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 8e6e43c8203..0f3b97461bb 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -23,63 +23,63 @@ async-compression = { version = "0.3.14", features = ["tokio", "zstd"] } async-trait = { version = "0.1" } bytes = "1" chrono = "0.4" -concurrency_manager = { path = "../concurrency_manager" } +concurrency_manager = { workspace = true } crossbeam = "0.8" crossbeam-channel = "0.5" dashmap = "5" -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code" } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "e0321a1990ee561cf042973666c0db61c8d82364", features = ["pub-response-field", "tls"] } -external_storage = { path = "../external_storage", default-features = false } -external_storage_export = { path = "../external_storage/export", default-features = false } +external_storage = { workspace = true } +external_storage_export = { workspace = true } fail = "0.5" -file_system = { path = "../file_system" } +file_system = { workspace = true } futures = "0.3" futures-io = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } hex = "0.4" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.4" -log_wrappers = { path = "../log_wrappers" } -online_config = { path = "../online_config" } +log_wrappers = { workspace = true } +online_config = { workspace = true } openssl = "0.10" -pd_client = { path = "../pd_client" } +pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raftstore = { path = "../raftstore", default-features = false } +raftstore = { workspace = true } regex = "1" -resolved_ts = { path = "../resolved_ts" } +resolved_ts = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1" -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } -tikv = { path = "../../", default-features = false } -tikv_alloc = { path = "../tikv_alloc" } -tikv_kv = { path = "../tikv_kv" } -tikv_util = { path = "../tikv_util" } +tidb_query_datatype = { workspace = true } +tikv = { workspace = true } +tikv_alloc = { workspace = true } +tikv_kv = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } tonic = "0.5" -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } uuid = "0.8" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] async-trait = "0.1" -engine_panic = { path = "../engine_panic" } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +engine_panic = { workspace = true } +grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8.0" tempdir = "0.3" tempfile = "3.0" -test_raftstore = { path = "../test_raftstore", default-features = false } -test_util = { path = "../test_util", default-features = false } +test_raftstore = { workspace = true } +test_util = { workspace = true } url = "2" walkdir = "2" diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index a59f8949b77..17439a0f615 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -33,47 +33,47 @@ mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] [dependencies] -api_version = { path = "../api_version", default-features = false } +api_version = { workspace = true } async-channel = "1.4" -aws = { path = "../cloud/aws" } -causal_ts = { path = "../causal_ts" } -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } +aws = { workspace = true } +causal_ts = { workspace = true } +collections = { workspace = true } +concurrency_manager = { workspace = true } crc64fast = "0.1" -encryption = { path = "../encryption", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code", default-features = false } -external_storage = { path = "../external_storage", default-features = false } -external_storage_export = { path = "../external_storage/export", default-features = false } -file_system = { path = "../file_system", default-features = false } +encryption = { workspace = true } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } +external_storage = { workspace = true } +external_storage_export = { workspace = true } +file_system = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } hex = "0.4" -keys = { path = "../keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } -online_config = { path = "../online_config" } -pd_client = { path = "../pd_client", default-features = false } +log_wrappers = { workspace = true } +online_config = { workspace = true } +pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raftstore = { path = "../raftstore", default-features = false } -security = { path = "../security", default-features = false } +raftstore = { workspace = true } +security = { workspace = true } serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tidb_query_common = { path = "../tidb_query_common", default-features = false } -tikv = { path = "../../", default-features = false } -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tidb_query_common = { workspace = true } +tikv = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-stream = "0.1" -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index 03aabafe3ae..7fe5798f833 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -8,20 +8,20 @@ default = ["test-runner"] test-runner = ["derive_more"] [dependencies] -collections = { path = "../collections" } +collections = { workspace = true } crossbeam = "0.8" derive_more = { version = "0.99", optional = true } fail = "0.5" -file_system = { path = "../file_system", default-features = false } +file_system = { workspace = true } lazy_static = "1.3" -online_config = { path = "../online_config" } +online_config = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_alloc = { path = "../tikv_alloc", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } [dev-dependencies] criterion = "0.3" diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index beaf5575c80..d05e9b66ddd 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -8,19 +8,19 @@ publish = false testexport = [] [dependencies] -api_version = { path = "../api_version", default-features = false } +api_version = { workspace = true } async-trait = { version = "0.1" } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } enum_dispatch = "0.3.8" -error_code = { path = "../error_code", default-features = false } +error_code = { workspace = true } fail = "0.5" futures = { version = "0.3" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } parking_lot = "0.12" -pd_client = { path = "../pd_client", default-features = false } +pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } @@ -28,12 +28,12 @@ serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -test_pd_client = { path = "../test_pd_client" } +test_pd_client = { workspace = true } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1", features = ["sync"] } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } [dev-dependencies] criterion = "0.3" diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index dbefc7df82c..27ce81c57b4 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -28,51 +28,51 @@ mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] [dependencies] -api_version = { path = "../api_version" } +api_version = { workspace = true } bitflags = "1.0" -causal_ts = { path = "../causal_ts" } -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } +causal_ts = { workspace = true } +collections = { workspace = true } +concurrency_manager = { workspace = true } crossbeam = "0.8" -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } fail = "0.5" futures = "0.3" futures-timer = "3.0" getset = "0.1" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } -keys = { path = "../keys" } +grpcio = { workspace = true } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } -online_config = { path = "../online_config" } -pd_client = { path = "../pd_client", default-features = false } +log_wrappers = { workspace = true } +online_config = { workspace = true } +pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raftstore = { path = "../raftstore", default-features = false } -resolved_ts = { path = "../resolved_ts", default-features = false } -security = { path = "../security", default-features = false } +raftstore = { workspace = true } +resolved_ts = { workspace = true } +security = { workspace = true } semver = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tikv = { path = "../..", default-features = false } -tikv_kv = { path = "../tikv_kv", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +tikv = { workspace = true } +tikv_kv = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "time"] } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } [dev-dependencies] criterion = "0.3" -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tempfile = "3.0" -test_pd_client = { path = "../test_pd_client" } -test_raftstore = { path = "../test_raftstore", default-features = false } -test_util = { path = "../test_util", default-features = false } +test_pd_client = { workspace = true } +test_raftstore = { workspace = true } +test_util = { workspace = true } [[test]] name = "integrations" diff --git a/components/cloud/Cargo.toml b/components/cloud/Cargo.toml index 5752f84e43c..45ae2b40b23 100644 --- a/components/cloud/Cargo.toml +++ b/components/cloud/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] async-trait = "0.1" derive_more = "0.99.3" -error_code = { path = "../error_code", default-features = false } +error_code = { workspace = true } futures-io = "0.3" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" @@ -16,7 +16,7 @@ prometheus = { version = "0.13", default-features = false, features = ["nightly" protobuf = { version = "2.8", features = ["bytes"] } rusoto_core = "0.46.0" thiserror = "1.0" -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } url = "2.0" [dev-dependencies] diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 293509709db..964048121d6 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -9,36 +9,36 @@ failpoints = ["fail/failpoints"] [dependencies] async-trait = "0.1" +base64 = "0.13.0" bytes = "1.0" -cloud = { path = "../", default-features = false } +cloud = { workspace = true } fail = "0.5" futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } # This is only a dependency to vendor openssl for rusoto. It's not clear exactly # how openssl is built for tikv, but it seems to be controlled by grpcio. This # makes `cargo test -p aws` link correctly. -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } http = "0.2.0" hyper = "0.14" hyper-tls = "0.5" kvproto = { git = "https://github.com/pingcap/kvproto.git" } +lazy_static = "1.3" +md5 = "0.7.0" +prometheus = { version = "0.13", default-features = false, features = ["nightly"] } rusoto_core = "0.46.0" rusoto_credential = "0.46.0" rusoto_kms = { version = "0.46.0", features = ["serialize_structs"] } -rusoto_sts = "0.46.0" rusoto_s3 = { version = "0.46.0", features = ["serialize_structs"] } +rusoto_sts = "0.46.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +thiserror = "1.0" +tikv_util = { workspace = true } # better to not use slog-global, but pass in the logger tokio = { version = "1.5", features = ["time"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../../tikv_util", default-features = false } url = "2.0" -thiserror = "1.0" -lazy_static = "1.3" -prometheus = { version = "0.13", default-features = false, features = ["nightly"] } uuid = "0.8" -md5 = "0.7.0" -base64 = "0.13.0" [dev-dependencies] futures = "0.3" diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 042898c31d5..3d8b01e893b 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -6,18 +6,18 @@ publish = false [dependencies] async-trait = "0.1" -azure_core = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust"} +azure_core = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust" } azure_identity = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust" } azure_storage = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false, features = ["account", "blob"] } base64 = "0.13" chrono = "0.4" -cloud = { path = "../", default-features = false } +cloud = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { git = "https://github.com/pingcap/kvproto.git" } oauth2 = { version = "4.0.0", default-features = false } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../../tikv_util", default-features = false } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time"] } url = "2.0" diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index a9045d6f27c..f184377c0af 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -5,8 +5,9 @@ edition = "2018" publish = false [dependencies] -futures-util = { version = "0.3", default-features = false, features = ["io"] } async-trait = "0.1" +cloud = { workspace = true } +futures-util = { version = "0.3", default-features = false, features = ["io"] } http = "0.2.0" hyper = "0.14" hyper-tls = "0.5" @@ -16,8 +17,7 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tame-gcs = { version = "0.10", features = ["async-multipart"] } tame-oauth = "0.4.7" -cloud = { path = "../", default-features = false } -tikv_util = { path = "../../tikv_util", default-features = false } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time"] } url = "2.0" diff --git a/components/codec/Cargo.toml b/components/codec/Cargo.toml index 93e91209d66..8b00f077863 100644 --- a/components/codec/Cargo.toml +++ b/components/codec/Cargo.toml @@ -6,14 +6,14 @@ publish = false [dependencies] byteorder = "1.2" -error_code = { path = "../error_code", default-features = false } +error_code = { workspace = true } libc = "0.2" static_assertions = { version = "1.0", features = ["nightly"] } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } [dev-dependencies] bytes = "1.0" -panic_hook = { path = "../panic_hook" } +panic_hook = { workspace = true } protobuf = "2" rand = "0.8" diff --git a/components/collections/Cargo.toml b/components/collections/Cargo.toml index a94cb0216cf..dca0afbc2c8 100644 --- a/components/collections/Cargo.toml +++ b/components/collections/Cargo.toml @@ -6,4 +6,4 @@ publish = false [dependencies] fxhash = "0.2.1" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index b6e382d7f14..2d008cf49f1 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -8,9 +8,9 @@ version = "0.0.1" fail = "0.5" kvproto = { git = "https://github.com/pingcap/kvproto.git" } parking_lot = "0.12" -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["macros", "sync", "time"] } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } # FIXME: switch to the crates.io version after crossbeam-skiplist is released [dependencies.crossbeam-skiplist] @@ -22,7 +22,7 @@ package = "crossbeam-skiplist" criterion = "0.3" futures = "0.3" rand = "0.8.3" -tikv_alloc = { path = "../tikv_alloc", features = ["jemalloc"] } +tikv_alloc = { workspace = true, features = ["jemalloc"] } [[bench]] name = "lock_table" diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 80ad86b3b75..b66ef2aa147 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -14,16 +14,16 @@ bytes = "1.0" crc32fast = "1.2" crossbeam = "0.8" derive_more = "0.99.3" -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code", default-features = false } +engine_traits = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "../file_system", default-features = false } +file_system = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["std", "io"] } hex = "0.4.2" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -online_config = { path = "../online_config" } +online_config = { workspace = true } openssl = "0.10" prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } @@ -34,12 +34,12 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "rt"] } [dev-dependencies] matches = "0.1.8" tempfile = "3.1" -test_util = { path = "../test_util", default-features = false } +test_util = { workspace = true } toml = "0.5" diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index 2fe0b0cb55a..f76c2b8f03c 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -12,20 +12,20 @@ cloud-azure = [] [dependencies] async-trait = "0.1" -aws = { path = "../../cloud/aws", optional = true, default-features = false } -cloud = { path = "../../cloud/", default-features = false } +aws = { workspace = true, optional = true } +cloud = { workspace = true } derive_more = "0.99.3" -encryption = { path = "../", default-features = false } -error_code = { path = "../../error_code", default-features = false } -file_system = { path = "../../file_system", default-features = false } +encryption = { workspace = true } +error_code = { workspace = true } +file_system = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } openssl = "0.10" protobuf = { version = "2.8", features = ["bytes"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../../tikv_util", default-features = false } +tikv_util = { workspace = true } [dev-dependencies] rust-ini = "0.14.0" -structopt = "0.3" \ No newline at end of file +structopt = "0.3" diff --git a/components/encryption/export/examples/ecli.rs b/components/encryption/export/examples/ecli.rs index d9d2bcb8098..ed2247cc77c 100644 --- a/components/encryption/export/examples/ecli.rs +++ b/components/encryption/export/examples/ecli.rs @@ -3,7 +3,7 @@ use std::io::{Read, Write}; pub use cloud::kms::Config as CloudConfig; -#[cfg(feature = "aws")] +#[cfg(feature = "cloud-aws")] use encryption_export::{create_cloud_backend, KmsConfig}; use encryption_export::{Backend, Error, Result}; use file_system::{File, OpenOptions}; diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index b00180c98d2..c5703994c73 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -6,11 +6,11 @@ edition = "2018" publish = false [dependencies] -engine_traits = { path = "../engine_traits", default-features = false } +engine_traits = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } # FIXME: Remove this dep from the engine_traits interface -tikv_util = { path = "../tikv_util", default-features = false } -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types", default-features = false } +tikv_util = { workspace = true } +tracker = { workspace = true } +txn_types = { workspace = true } diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index e35438c4fe1..44dd708271d 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -23,20 +23,20 @@ failpoints = ["fail/failpoints"] nortcheck = [] [dependencies] -api_version = { path = "../api_version", default-features = false } -case_macros = { path = "../case_macros" } -collections = { path = "../collections", default-features = false } +api_version = { workspace = true } +case_macros = { workspace = true } +collections = { workspace = true } derive_more = "0.99.3" -encryption = { path = "../encryption", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } +encryption = { workspace = true } +engine_traits = { workspace = true } fail = "0.5" -file_system = { path = "../file_system", default-features = false } -keys = { path = "../keys", default-features = false } +file_system = { workspace = true } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.4.0" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } num_cpus = "1" -online_config = { path = "../online_config" } +online_config = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = "2" @@ -48,11 +48,11 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } slog_derive = "0.2" tempfile = "3.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } time = "0.1" -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types", default-features = false } +tracker = { workspace = true } +txn_types = { workspace = true } [dependencies.rocksdb] git = "https://github.com/tikv/rust-rocksdb.git" diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 9e3bba56bad..41066c85756 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -1,6 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{any::Any, fs, path::Path, sync::Arc}; +use std::{any::Any, sync::Arc}; use engine_traits::{ IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, TabletAccessor, @@ -50,18 +50,6 @@ impl RocksEngine { self.db.clone() } - pub fn exists(path: &str) -> bool { - let path = Path::new(path); - if !path.exists() || !path.is_dir() { - return false; - } - - // If path is not an empty directory, we say db exists. If path is not an empty - // directory but db has not been created, `DB::list_column_families` fails and - // we can clean up the directory by this indication. - fs::read_dir(&path).unwrap().next().is_some() - } - pub fn set_shared_block_cache(&mut self, enable: bool) { self.shared_block_cache = enable; } diff --git a/components/engine_rocks_helper/Cargo.toml b/components/engine_rocks_helper/Cargo.toml index 77133f09cbd..16e79a3b007 100644 --- a/components/engine_rocks_helper/Cargo.toml +++ b/components/engine_rocks_helper/Cargo.toml @@ -8,21 +8,21 @@ publish = false failpoints = ["fail/failpoints"] [dependencies] -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits" } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } fail = "0.5" futures = "0.3" -keys = { path = "../keys", default-features = false } +keys = { workspace = true } lazy_static = "1.4.0" -pd_client = { path = "../pd_client", default-features = false } +pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = "2.8" -raftstore = { path = "../raftstore", default-features = false } +raftstore = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } [dev-dependencies] -engine_test = { path = "../engine_test" } -kvproto = { git = "https://github.com/pingcap/kvproto.git", default-features = false } +engine_test = { workspace = true } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } tempfile = "3.0" diff --git a/components/engine_test/Cargo.toml b/components/engine_test/Cargo.toml index a9bfbfd41d3..16e538acc51 100644 --- a/components/engine_test/Cargo.toml +++ b/components/engine_test/Cargo.toml @@ -24,14 +24,14 @@ test-engines-panic = [ ] [dependencies] -collections = { path = "../collections", default-features = false } -encryption = { path = "../encryption", default-features = false } -engine_panic = { path = "../engine_panic", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -file_system = { path = "../file_system", default-features = false } -raft_log_engine = { path = "../raft_log_engine", default-features = false } +collections = { workspace = true } +encryption = { workspace = true } +engine_panic = { workspace = true } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +file_system = { workspace = true } +raft_log_engine = { workspace = true } tempfile = "3.0" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } # FIXME: Remove this dep from the engine_traits interface -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index a1b9e156ce1..b2a574422fb 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -91,7 +91,8 @@ pub mod kv { RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; use engine_traits::{ - CfOptions, CfOptionsExt, OpenOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, + CfOptions, CfOptionsExt, MiscExt, OpenOptions, Result, TabletAccessor, TabletFactory, + CF_DEFAULT, }; use tikv_util::box_err; @@ -160,10 +161,7 @@ pub mod kv { ) -> Result { if let Some(db) = self.root_db.lock().unwrap().as_ref() { if options.create_new() { - return Err(box_err!( - "root tablet {} already exists", - db.as_inner().path() - )); + return Err(box_err!("root tablet {} already exists", db.path())); } return Ok(db.clone()); } @@ -273,11 +271,7 @@ pub mod kv { // Target tablet exist in the cache if options.create_new() { - return Err(box_err!( - "region {} {} already exists", - id, - tablet.as_inner().path() - )); + return Err(box_err!("region {} {} already exists", id, tablet.path())); } return Ok(tablet.clone()); } else if !options.cache_only() { @@ -382,11 +376,7 @@ pub mod kv { { let reg = self.registry.lock().unwrap(); if let Some(db) = reg.get(&(id, suffix)) { - return Err(box_err!( - "region {} {} already exists", - id, - db.as_inner().path() - )); + return Err(box_err!("region {} {} already exists", id, db.path())); } } diff --git a/components/engine_tirocks/Cargo.toml b/components/engine_tirocks/Cargo.toml index 5ffa4428dd2..8ecce112579 100644 --- a/components/engine_tirocks/Cargo.toml +++ b/components/engine_tirocks/Cargo.toml @@ -4,24 +4,24 @@ version = "0.1.0" edition = "2021" [dependencies] -api_version = { path = "../api_version" } -codec = { path = "../codec" } -collections = { path = "../collections" } +api_version = { workspace = true } +codec = { workspace = true } +collections = { workspace = true } derive_more = "0.99.3" -engine_traits = { path = "../engine_traits" } -keys = { path = "../keys" } +engine_traits = { workspace = true } +keys = { workspace = true } lazy_static = "1.4.0" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } slog_derive = "0.2" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util" } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tirocks = { git = "https://github.com/busyjay/tirocks.git", branch = "dev" } -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types" } +tracker = { workspace = true } +txn_types = { workspace = true } [dev-dependencies] kvproto = { git = "https://github.com/pingcap/kvproto.git" } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index fb4bb69e5bc..c2e9d729868 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -8,22 +8,22 @@ publish = false failpoints = ["fail/failpoints"] [dependencies] -case_macros = { path = "../case_macros" } -error_code = { path = "../error_code", default-features = false } +case_macros = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "../file_system", default-features = false } +file_system = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } protobuf = "2" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } serde = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } +tracker = { workspace = true } +txn_types = { workspace = true } [dev-dependencies] serde_derive = "1.0" diff --git a/components/engine_traits_tests/Cargo.toml b/components/engine_traits_tests/Cargo.toml index a011b1cc281..301a7ee5d76 100644 --- a/components/engine_traits_tests/Cargo.toml +++ b/components/engine_traits_tests/Cargo.toml @@ -25,8 +25,8 @@ test-engines-panic = [ ] [dependencies] -engine_test = { path = "../engine_test", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -panic_hook = { path = "../panic_hook" } +engine_test = { workspace = true } +engine_traits = { workspace = true } +panic_hook = { workspace = true } tempfile = "3.0" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } diff --git a/components/error_code/Cargo.toml b/components/error_code/Cargo.toml index 3b7284faa63..484f8d24ad3 100644 --- a/components/error_code/Cargo.toml +++ b/components/error_code/Cargo.toml @@ -13,9 +13,9 @@ name = "error_code_gen" path = "bin.rs" [dependencies] -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } serde = { version = "1.0", features = ["derive"] } -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index b74af6ff39d..8c92b79583e 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -19,16 +19,16 @@ failpoints = ["fail/failpoints"] async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } async-trait = "0.1" bytes = "1.0" -encryption = { path = "../encryption" } -engine_traits = { path = "../engine_traits" } +encryption = { workspace = true } +engine_traits = { workspace = true } fail = "0.5" ffi-support = { optional = true, version = "0.4.2" } -file_system = { path = "../file_system" } +file_system = { workspace = true } futures = "0.3" futures-executor = "0.3" futures-io = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { version = "0.10", optional = true, default-features = false, features = ["openssl-vendored"] } +grpcio = { workspace = true, optional = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" libloading = { optional = true, version = "0.7.0" } @@ -40,8 +40,8 @@ rusoto_core = "0.46.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "fs", "process"] } tokio-util = { version = "0.7", features = ["compat"] } url = "2.0" diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml index 82ff01c2afb..076bdd9d0dd 100644 --- a/components/external_storage/export/Cargo.toml +++ b/components/external_storage/export/Cargo.toml @@ -49,36 +49,36 @@ cloud-storage-grpc = [ ] [dependencies] -aws = { optional = true, path = "../../cloud/aws", default-features = false } -azure = { optional = true, path = "../../cloud/azure", default-features = false } -cloud = { path = "../../cloud", default_features = false } -lazy_static = { optional = true, version = "1.3" } -gcp = { optional = true, path = "../../cloud/gcp", default-features = false } -grpcio = { version = "0.10", optional = true, default-features = false, features = ["openssl-vendored"] } -encryption = { path = "../../encryption", default-features = false } -external_storage = { path = "../", default-features = false } -engine_traits = { path = "../../engine_traits", default-features = false } +async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } +async-trait = "0.1" +aws = { optional = true, workspace = true } +azure = { optional = true, workspace = true } +cloud = { workspace = true } +encryption = { workspace = true } +engine_traits = { workspace = true } +external_storage = { workspace = true } ffi-support = { optional = true, version = "0.4.2" } -file_system = { optional = true, path = "../../file_system" } +file_system = { workspace = true, optional = true } futures = { optional = true, version = "0.3" } futures-executor = { optional = true, version = "0.3" } futures-io = { version = "0.3" } futures-util = { version = "0.3", default-features = false, features = ["io"] } +gcp = { optional = true, workspace = true } +grpcio = { workspace = true, optional = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } +lazy_static = { optional = true, version = "1.3" } libloading = { optional = true, version = "0.7.0" } once_cell = { optional = true, version = "1.3.1" } protobuf = { optional = true, version = "2" } slog-global = { optional = true, version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../../tikv_util" } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "rt", "net"], optional = true } tokio-util = { version = "0.7", features = ["compat"], optional = true } url = "2.0" -async-trait = "0.1" -async-compression = { version = "0.3.14", features = ["futures-io", "zstd"]} [dev-dependencies] -matches = "0.1.8" futures-util = { version = "0.3", default-features = false, features = ["io"] } +matches = "0.1.8" rust-ini = "0.14.0" structopt = "0.3" tempfile = "3.1" diff --git a/components/external_storage/export/examples/scli.rs b/components/external_storage/export/examples/scli.rs index e98e24ab452..0ab54721b29 100644 --- a/components/external_storage/export/examples/scli.rs +++ b/components/external_storage/export/examples/scli.rs @@ -6,9 +6,15 @@ use std::{ path::Path, }; +#[cfg(feature = "cloud-azure")] +use external_storage_export::make_azblob_backend; +#[cfg(feature = "cloud-gcp")] +use external_storage_export::make_gcs_backend; +#[cfg(feature = "cloud-aws")] +use external_storage_export::make_s3_backend; use external_storage_export::{ - create_storage, make_azblob_backend, make_cloud_backend, make_gcs_backend, make_hdfs_backend, - make_local_backend, make_noop_backend, make_s3_backend, ExternalStorage, UnpinReader, + create_storage, make_cloud_backend, make_hdfs_backend, make_local_backend, make_noop_backend, + ExternalStorage, UnpinReader, }; use futures_util::io::{copy, AllowStdIo}; use ini::ini::Ini; @@ -144,7 +150,10 @@ fn create_s3_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - Ok(make_s3_backend(config)) + #[cfg(feature = "cloud-aws")] + return Ok(make_s3_backend(config)); + #[cfg(not(feature = "cloud-aws"))] + return Err(Error::new(ErrorKind::Other, "missing feature")); } fn create_gcs_storage(opt: &Opt) -> Result { @@ -164,7 +173,10 @@ fn create_gcs_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - Ok(make_gcs_backend(config)) + #[cfg(feature = "cloud-gcp")] + return Ok(make_gcs_backend(config)); + #[cfg(not(feature = "cloud-gcp"))] + return Err(Error::new(ErrorKind::Other, "missing feature")); } fn create_azure_storage(opt: &Opt) -> Result { @@ -200,7 +212,10 @@ fn create_azure_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - Ok(make_azblob_backend(config)) + #[cfg(feature = "cloud-azure")] + return Ok(make_azblob_backend(config)); + #[cfg(not(feature = "cloud-azure"))] + return Err(Error::new(ErrorKind::Other, "missing feature")); } fn process() -> Result<()> { diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index e3924c0fc25..033d31681c1 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -8,13 +8,13 @@ publish = false bcc-iosnoop = ["bcc"] [dependencies] -collections = { path = "../collections" } +collections = { workspace = true } crc32fast = "1.2" crossbeam-utils = "0.8.0" fs2 = "0.4" lazy_static = "1.3" libc = "0.2" -online_config = { path = "../online_config" } +online_config = { workspace = true } openssl = "0.10" parking_lot = "0.12" prometheus = { version = "0.13", features = ["nightly"] } @@ -24,8 +24,8 @@ serde = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } strum = { version = "0.20", features = ["derive"] } -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time"] } [dev-dependencies] diff --git a/components/into_other/Cargo.toml b/components/into_other/Cargo.toml index be278cdc764..39989a4bf75 100644 --- a/components/into_other/Cargo.toml +++ b/components/into_other/Cargo.toml @@ -5,6 +5,6 @@ edition = "2018" publish = false [dependencies] -engine_traits = { path = "../engine_traits", default-features = false } +engine_traits = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } diff --git a/components/keys/Cargo.toml b/components/keys/Cargo.toml index a9bd4ddbf18..f8318237b20 100644 --- a/components/keys/Cargo.toml +++ b/components/keys/Cargo.toml @@ -7,9 +7,9 @@ publish = false [dependencies] byteorder = "1.2" kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } [dev-dependencies] -panic_hook = { path = "../panic_hook" } +panic_hook = { workspace = true } diff --git a/components/log_wrappers/Cargo.toml b/components/log_wrappers/Cargo.toml index e8e9a3cc52f..4c9e62b6876 100644 --- a/components/log_wrappers/Cargo.toml +++ b/components/log_wrappers/Cargo.toml @@ -9,4 +9,4 @@ hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } slog = "2.3" slog-term = "2.4" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } diff --git a/components/pd_client/Cargo.toml b/components/pd_client/Cargo.toml index 44f09485705..c2ee9982bcd 100644 --- a/components/pd_client/Cargo.toml +++ b/components/pd_client/Cargo.toml @@ -8,26 +8,26 @@ publish = false failpoints = ["fail/failpoints"] [dependencies] -collections = { path = "../collections" } -error_code = { path = "../error_code", default-features = false } +collections = { workspace = true } +error_code = { workspace = true } fail = "0.5" futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } -security = { path = "../security", default-features = false } +security = { workspace = true } semver = "0.10" serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1", features = ["sync"] } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/profiler/Cargo.toml b/components/profiler/Cargo.toml index f0879722b1b..b0c456b209f 100644 --- a/components/profiler/Cargo.toml +++ b/components/profiler/Cargo.toml @@ -8,7 +8,7 @@ publish = false profiling = ["lazy_static", "gperftools", "callgrind", "valgrind_request"] [dependencies] -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } [target.'cfg(unix)'.dependencies] lazy_static = { version = "1.3.0", optional = true } diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index d13e9ea4a0b..2b9d2de73ff 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -5,13 +5,13 @@ publish = false edition = "2018" [dependencies] -encryption = { path = "../encryption" } -engine_traits = { path = "../engine_traits", default-features = false } -file_system = { path = "../file_system" } +encryption = { workspace = true } +engine_traits = { workspace = true } +file_system = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.4.0" num_cpus = "1" -online_config = { path = "../online_config" } +online_config = { workspace = true } protobuf = "2" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-engine = { git = "https://github.com/tikv/raft-engine.git", features = ["swap"] } @@ -19,6 +19,6 @@ serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } time = "0.1" -tracker = { path = "../tracker" } +tracker = { workspace = true } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 09fa707c408..9adaf0c13e2 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -29,35 +29,35 @@ cloud-gcp = ["raftstore/cloud-gcp"] cloud-azure = ["raftstore/cloud-azure"] [dependencies] -batch-system = { path = "../batch-system", default-features = false } -collections = { path = "../collections" } +batch-system = { workspace = true } +collections = { workspace = true } crossbeam = "0.8" -engine_traits = { path = "../engine_traits" } -error_code = { path = "../error_code" } +engine_traits = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "../file_system" } +file_system = { workspace = true } futures = { version = "0.3", features = ["compat"] } -keys = { path = "../keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } -pd_client = { path = "../pd_client" } +log_wrappers = { workspace = true } +pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0" } -raftstore = { path = "../raftstore" } +raftstore = { workspace = true } slog = "2.3" smallvec = "1.4" -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } time = "0.1" -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types", default-features = false } +tracker = { workspace = true } +txn_types = { workspace = true } [dev-dependencies] -engine_test = { path = "../engine_test", default-features = false } +engine_test = { workspace = true } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tempfile = "3.0" -test_pd = { path = "../test_pd" } -test_util = { path = "../test_util" } +test_pd = { workspace = true } +test_util = { workspace = true } [[test]] name = "raftstore-v2-failpoints" diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 78cc9976dab..5986d3e4596 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -432,7 +432,7 @@ mod tests { ctor::{CfOptions, DbOptions}, kv::{KvTestEngine, TestTabletFactoryV2}, }; - use engine_traits::{OpenOptions, Peekable, SyncMutable, TabletFactory, ALL_CFS}; + use engine_traits::{MiscExt, OpenOptions, Peekable, SyncMutable, TabletFactory, ALL_CFS}; use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; use raftstore::store::{ @@ -758,7 +758,7 @@ mod tests { let (_, delegate) = store_meta.get_executor_and_len(1); let mut delegate = delegate.unwrap(); let tablet = delegate.get_tablet(); - assert_eq!(tablet1.as_inner().path(), tablet.as_inner().path()); + assert_eq!(tablet1.path(), tablet.path()); let snapshot = delegate.get_snapshot(&None); assert_eq!( b"val1".to_vec(), @@ -768,7 +768,7 @@ mod tests { let (_, delegate) = store_meta.get_executor_and_len(2); let mut delegate = delegate.unwrap(); let tablet = delegate.get_tablet(); - assert_eq!(tablet2.as_inner().path(), tablet.as_inner().path()); + assert_eq!(tablet2.path(), tablet.path()); let snapshot = delegate.get_snapshot(&None); assert_eq!( b"val2".to_vec(), diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 4c41b19c828..54eb07e8161 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -28,73 +28,73 @@ cloud-gcp = ["sst_importer/cloud-gcp"] cloud-azure = ["sst_importer/cloud-azure"] [dependencies] -batch-system = { path = "../batch-system", default-features = false } +batch-system = { workspace = true } bitflags = "1.0.1" byteorder = "1.2" bytes = "1.0" -causal_ts = { path = "../causal_ts" } -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } +causal_ts = { workspace = true } +collections = { workspace = true } +concurrency_manager = { workspace = true } crc32fast = "1.2" crossbeam = "0.8" derivative = "2" -encryption = { path = "../encryption", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false, optional = true } +encryption = { workspace = true } +engine_rocks = { workspace = true, optional = true } # Should be [dev-dependencies] but we need to control the features # https://github.com/rust-lang/cargo/issues/6915 -engine_test = { path = "../engine_test", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code", default-features = false } +engine_test = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "../file_system", default-features = false } +file_system = { workspace = true } fs2 = "0.4" futures = "0.3" futures-util = { version = "0.3.1", default-features = false, features = ["io"] } getset = "0.1" grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } -into_other = { path = "../into_other", default-features = false } +into_other = { workspace = true } itertools = "0.10" -keys = { path = "../keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -log_wrappers = { path = "../log_wrappers" } -memory_trace_macros = { path = "../memory_trace_macros" } -online_config = { path = "../online_config" } +log_wrappers = { workspace = true } +memory_trace_macros = { workspace = true } +online_config = { workspace = true } openssl = "0.10" ordered-float = "2.6" parking_lot = "0.12" -pd_client = { path = "../pd_client", default-features = false } +pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0", default-features = false } rand = "0.8.3" -resource_metering = { path = "../resource_metering" } +resource_metering = { workspace = true } serde = "1.0" serde_derive = "1.0" serde_with = "1.4" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } smallvec = "1.4" -sst_importer = { path = "../sst_importer", default-features = false } +sst_importer = { workspace = true } tempfile = "3.0" thiserror = "1.0" -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tidb_query_datatype = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } time = "0.1" tokio = { version = "1.5", features = ["sync", "rt-multi-thread"] } -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types", default-features = false } +tracker = { workspace = true } +txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] -encryption_export = { path = "../encryption/export", default-features = false } -engine_panic = { path = "../engine_panic", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -panic_hook = { path = "../panic_hook" } -test_sst_importer = { path = "../test_sst_importer", default-features = false } +encryption_export = { workspace = true } +engine_panic = { workspace = true } +engine_rocks = { workspace = true } +panic_hook = { workspace = true } +test_sst_importer = { workspace = true } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index fd6c7552f5d..d62f2f6c1db 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -1654,14 +1654,14 @@ mod tests { let mut delegate = delegate.unwrap(); assert_eq!(1, delegate.region.id); let tablet = delegate.get_tablet(); - assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); + assert_eq!(kv_engine.path(), tablet.path()); let (len, delegate) = store_meta.get_executor_and_len(2); assert_eq!(2, len); let mut delegate = delegate.unwrap(); assert_eq!(2, delegate.region.id); let tablet = delegate.get_tablet(); - assert_eq!(kv_engine.as_inner().path(), tablet.as_inner().path()); + assert_eq!(kv_engine.path(), tablet.path()); } fn prepare_read_delegate( diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index 6309440202b..d4a7e3d1ca2 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -23,41 +23,41 @@ test-engines-rocksdb = ["tikv/test-engines-rocksdb"] test-engines-panic = ["tikv/test-engines-panic"] [dependencies] -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } +collections = { workspace = true } +concurrency_manager = { workspace = true } crossbeam = "0.8" -engine_traits = { path = "../engine_traits", default-features = false } +engine_traits = { workspace = true } fail = "0.5" futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored"] } +grpcio = { workspace = true } hex = "0.4" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } -online_config = { path = "../online_config" } -pd_client = { path = "../pd_client", default-features = false } +log_wrappers = { workspace = true } +online_config = { workspace = true } +pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raftstore = { path = "../raftstore", default-features = false } -security = { path = "../security", default-features = false } +raftstore = { workspace = true } +security = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tikv = { path = "../../", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +tikv = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "time"] } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } [dev-dependencies] -engine_rocks = { path = "../engine_rocks", default-features = false } -panic_hook = { path = "../panic_hook" } +engine_rocks = { workspace = true } +panic_hook = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tempfile = "3.0" -test_raftstore = { path = "../test_raftstore", default-features = false } -test_sst_importer = { path = "../test_sst_importer" } -test_util = { path = "../test_util", default-features = false } -tikv_kv = { path = "../tikv_kv" } +test_raftstore = { workspace = true } +test_sst_importer = { workspace = true } +test_util = { workspace = true } +tikv_kv = { workspace = true } [[test]] name = "integrations" diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index 72a0c0dc339..acb2dff89d3 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -4,15 +4,15 @@ version = "0.0.1" edition = "2018" [dependencies] -collections = { path = "../collections" } +collections = { workspace = true } crossbeam = "0.8" futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -online_config = { path = "../online_config" } +online_config = { workspace = true } pdqselect = "0.1" pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } @@ -20,7 +20,7 @@ serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../tikv_util" } +tikv_util = { workspace = true } [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } diff --git a/components/security/Cargo.toml b/components/security/Cargo.toml index 8257d04f51f..4599b1df43e 100644 --- a/components/security/Cargo.toml +++ b/components/security/Cargo.toml @@ -8,13 +8,13 @@ publish = false tonic = ["dep:tonic"] [dependencies] -collections = { path = "../collections" } -encryption = { path = "../encryption", default-features = false } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +collections = { workspace = true } +encryption = { workspace = true } +grpcio = { workspace = true } serde = "1.0" serde_derive = "1.0" serde_json = "1.0" -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } tonic = { version = "0.5", features = ["tls"], optional = true } [dev-dependencies] diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 6f54f37ba0b..1f4d98b2847 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -33,54 +33,54 @@ nortcheck = ["engine_rocks/nortcheck"] backup-stream-debug = ["backup-stream/backup-stream-debug"] [dependencies] -api_version = { path = "../api_version" } -backup = { path = "../backup", default-features = false } -backup-stream = { path = "../backup-stream", default-features = false } -causal_ts = { path = "../causal_ts" } -cdc = { path = "../cdc", default-features = false } +api_version = { workspace = true } +backup = { workspace = true } +backup-stream = { workspace = true } +causal_ts = { workspace = true } +cdc = { workspace = true } chrono = "0.4" clap = "2.32" -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } +collections = { workspace = true } +concurrency_manager = { workspace = true } crossbeam = "0.8" -encryption = { path = "../encryption", default-features = false } -encryption_export = { path = "../encryption/export", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_rocks_helper = { path = "../engine_rocks_helper" } -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code", default-features = false } -file_system = { path = "../file_system", default-features = false } +encryption = { workspace = true } +encryption_export = { workspace = true } +engine_rocks = { workspace = true } +engine_rocks_helper = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } +file_system = { workspace = true } fs2 = "0.4" futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored"] } +grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } hex = "0.4" -keys = { path = "../keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -log_wrappers = { path = "../log_wrappers" } -pd_client = { path = "../pd_client", default-features = false } +log_wrappers = { workspace = true } +pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raft_log_engine = { path = "../raft_log_engine", default-features = false } -raftstore = { path = "../raftstore", default-features = false, features = ["engine_rocks"] } +raft_log_engine = { workspace = true } +raftstore = { workspace = true, features = ["engine_rocks"] } rand = "0.8" -resolved_ts = { path = "../../components/resolved_ts", default-features = false } -resource_metering = { path = "../resource_metering" } -security = { path = "../security", default-features = false, features = ["tonic"] } +resolved_ts = { workspace = true } +resource_metering = { workspace = true } +security = { workspace = true, features = ["tonic"] } serde_json = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -snap_recovery = { path = "../snap_recovery", default-features = false } +snap_recovery = { workspace = true } tempfile = "3.0" -tikv = { path = "../..", default-features = false } -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } toml = "0.5" -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [target.'cfg(unix)'.dependencies] diff --git a/components/server/src/lib.rs b/components/server/src/lib.rs index 8a46f601a75..57793792289 100644 --- a/components/server/src/lib.rs +++ b/components/server/src/lib.rs @@ -1,5 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +#![allow(incomplete_features)] +#![feature(specialization)] + #[macro_use] extern crate tikv_util; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 2320d1156f4..2295839a806 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1595,10 +1595,23 @@ pub trait ConfiguredRaftEngine: RaftEngine { _: &Option>, _: &Option, ) -> Self; - fn as_rocks_engine(&self) -> Option<&RocksEngine> { + fn as_rocks_engine(&self) -> Option<&RocksEngine>; + fn register_config(&self, _cfg_controller: &mut ConfigController, _share_cache: bool); +} + +impl ConfiguredRaftEngine for T { + default fn build( + _: &TikvConfig, + _: &Arc, + _: &Option>, + _: &Option, + ) -> Self { + unimplemented!() + } + default fn as_rocks_engine(&self) -> Option<&RocksEngine> { None } - fn register_config(&self, _cfg_controller: &mut ConfigController, _share_cache: bool) {} + default fn register_config(&self, _cfg_controller: &mut ConfigController, _share_cache: bool) {} } impl ConfiguredRaftEngine for RocksEngine { diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index d82601f577a..1b69d8ba150 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -7,26 +7,26 @@ publish = false [dependencies] chrono = "0.4" -encryption = { path = "../../components/encryption", default-features = false } -encryption_export = { path = "../../components/encryption/export", default-features = false } -engine_rocks = { path = "../../components/engine_rocks", default-features = false } -engine_traits = { path = "../../components/engine_traits", default-features = false } +encryption = { workspace = true } +encryption_export = { workspace = true } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } futures = { version = "0.3", features = ["executor"] } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } -keys = { path = "../../components/keys", default-features = false } +grpcio = { workspace = true } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto" } log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -pd_client = { path = "../pd_client", default-features = false } +pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } -raft_log_engine = { path = "../raft_log_engine", default-features = false } -raftstore = { path = "../../components/raftstore", default-features = false } +raft_log_engine = { workspace = true } +raftstore = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } structopt = "0.3" tempfile = "3.0" thiserror = "1.0" -tikv = { path = "../.." } -tikv_alloc = { path = "../../components/tikv_alloc" } -tikv_util = { path = "../../components/tikv_util", default-features = false } +tikv = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } toml = "0.5" -txn_types = { path = "../../components/txn_types", default-features = false } +txn_types = { workspace = true } diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index 887c9df6655..6b5fbd9127f 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -13,22 +13,22 @@ cloud-storage-grpc = ["external_storage_export/cloud-storage-grpc"] cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] [dependencies] -api_version = { path = "../api_version", default-features = false } +api_version = { workspace = true } crc32fast = "1.2" dashmap = "5" -encryption = { path = "../encryption", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code", default-features = false } -external_storage_export = { path = "../external_storage/export", default-features = false } -file_system = { path = "../file_system", default-features = false } +encryption = { workspace = true } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } +external_storage_export = { workspace = true } +file_system = { workspace = true } futures = { version = "0.3", features = ["thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } -keys = { path = "../keys", default-features = false } +grpcio = { workspace = true } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } openssl = "0.10" prometheus = { version = "0.13", default-features = false } serde = "1.0" @@ -36,13 +36,13 @@ serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "rt-multi-thread", "macros"] } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } [dev-dependencies] tempfile = "3.0" -test_sst_importer = { path = "../test_sst_importer", default-features = false } -test_util = { path = "../test_util", default-features = false } +test_sst_importer = { workspace = true } +test_util = { workspace = true } diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index ea85e329202..902e57d5eed 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -11,24 +11,24 @@ cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] [dependencies] -api_version = { path = "../api_version" } -backup = { path = "../backup" } -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager" } +api_version = { workspace = true } +backup = { workspace = true } +collections = { workspace = true } +concurrency_manager = { workspace = true } crc64fast = "0.1" -engine_traits = { path = "../engine_traits" } -external_storage_export = { path = "../external_storage/export", default-features = false } -file_system = { path = "../file_system", default-features = false } +engine_traits = { workspace = true } +external_storage_export = { workspace = true } +file_system = { workspace = true } futures = "0.3" futures-executor = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } protobuf = "2" rand = "0.8" tempfile = "3.0" -test_raftstore = { path = "../test_raftstore" } -tidb_query_common = { path = "../tidb_query_common" } -tikv = { path = "../../", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } -txn_types = { path = "../txn_types", default-features = false } +test_raftstore = { workspace = true } +tidb_query_common = { workspace = true } +tikv = { workspace = true } +tikv_util = { workspace = true } +txn_types = { workspace = true } diff --git a/components/test_coprocessor/Cargo.toml b/components/test_coprocessor/Cargo.toml index 6a12f16138f..a3bb3f8e476 100644 --- a/components/test_coprocessor/Cargo.toml +++ b/components/test_coprocessor/Cargo.toml @@ -20,18 +20,18 @@ test-engines-panic = [ ] [dependencies] -api_version = { path = "../api_version" } -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } +api_version = { workspace = true } +collections = { workspace = true } +concurrency_manager = { workspace = true } +engine_rocks = { workspace = true } futures = "0.3" kvproto = { git = "https://github.com/pingcap/kvproto.git" } protobuf = "2" -resource_metering = { path = "../resource_metering" } -test_storage = { path = "../test_storage", default-features = false } -tidb_query_common = { path = "../tidb_query_common", default-features = false } -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } -tikv = { path = "../../", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +resource_metering = { workspace = true } +test_storage = { workspace = true } +tidb_query_common = { workspace = true } +tidb_query_datatype = { workspace = true } +tikv = { workspace = true } +tikv_util = { workspace = true } tipb = { git = "https://github.com/pingcap/tipb.git" } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } diff --git a/components/test_coprocessor_plugin/example_plugin/Cargo.toml b/components/test_coprocessor_plugin/example_plugin/Cargo.toml index cda1f2fa0c7..6bbc8b25012 100644 --- a/components/test_coprocessor_plugin/example_plugin/Cargo.toml +++ b/components/test_coprocessor_plugin/example_plugin/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "example_plugin" +name = "example_coprocessor_plugin" version = "0.1.0" edition = "2018" publish = false @@ -8,4 +8,4 @@ publish = false crate-type = ["dylib"] [dependencies] -coprocessor_plugin_api = { path = "../../coprocessor_plugin_api" } +coprocessor_plugin_api = { workspace = true } diff --git a/components/test_pd/Cargo.toml b/components/test_pd/Cargo.toml index efdc1a5a23c..d9163706895 100644 --- a/components/test_pd/Cargo.toml +++ b/components/test_pd/Cargo.toml @@ -5,13 +5,13 @@ edition = "2018" publish = false [dependencies] -collections = { path = "../collections" } +collections = { workspace = true } fail = "0.5" futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } -pd_client = { path = "../pd_client", default-features = false } -security = { path = "../security", default-features = false } +pd_client = { workspace = true } +security = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml index 8894ce4a8e4..ad2b20de5a0 100644 --- a/components/test_pd_client/Cargo.toml +++ b/components/test_pd_client/Cargo.toml @@ -5,18 +5,18 @@ edition = "2018" publish = false [dependencies] -collections = { path = "../collections" } +collections = { workspace = true } fail = "0.5" futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } -keys = { path = "../keys", default-features = false } +grpcio = { workspace = true } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } -pd_client = { path = "../pd_client", default-features = false } +log_wrappers = { workspace = true } +pd_client = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index c442ab71137..fb627dccb11 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -23,43 +23,43 @@ test-engines-panic = [ ] [dependencies] -api_version = { path = "../api_version" } +api_version = { workspace = true } backtrace = "0.3" -causal_ts = { path = "../causal_ts", features = ["testexport"] } -collections = { path = "../collections" } -concurrency_manager = { path = "../concurrency_manager", default-features = false } +causal_ts = { workspace = true, features = ["testexport"] } +collections = { workspace = true } +concurrency_manager = { workspace = true } crossbeam = "0.8" -encryption_export = { path = "../encryption/export", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_rocks_helper = { path = "../engine_rocks_helper" } -engine_test = { path = "../engine_test", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } +encryption_export = { workspace = true } +engine_rocks = { workspace = true } +engine_rocks_helper = { workspace = true } +engine_test = { workspace = true } +engine_traits = { workspace = true } fail = "0.5" -file_system = { path = "../file_system" } +file_system = { workspace = true } futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } -keys = { path = "../keys", default-features = false } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } -pd_client = { path = "../pd_client", default-features = false } +log_wrappers = { workspace = true } +pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raftstore = { path = "../raftstore", default-features = false, features = ["testexport"] } +raftstore = { workspace = true, features = ["testexport"] } rand = "0.8" -resolved_ts = { path = "../resolved_ts" } -resource_metering = { path = "../resource_metering" } -security = { path = "../security", default-features = false } -server = { path = "../server" } +resolved_ts = { workspace = true } +resource_metering = { workspace = true } +security = { workspace = true } +server = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tempfile = "3.0" -test_pd_client = { path = "../test_pd_client" } -test_util = { path = "../test_util", default-features = false } -tikv = { path = "../../", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +test_pd_client = { workspace = true } +test_util = { workspace = true } +tikv = { workspace = true } +tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } -txn_types = { path = "../txn_types", default-features = false } +txn_types = { workspace = true } diff --git a/components/test_sst_importer/Cargo.toml b/components/test_sst_importer/Cargo.toml index 71b8a69cf75..b0c3e96ef5a 100644 --- a/components/test_sst_importer/Cargo.toml +++ b/components/test_sst_importer/Cargo.toml @@ -10,8 +10,8 @@ test = false [dependencies] crc32fast = "1.2" -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -keys = { path = "../keys", default-features = false } +engine_rocks = { workspace = true } +engine_traits = { workspace = true } +keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } uuid = { version = "0.8.1", features = ["serde", "v4"] } diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index 65aa08cd101..04adc4e6de4 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -21,14 +21,14 @@ test-engines-panic = [ ] [dependencies] -api_version = { path = "../api_version" } -collections = { path = "../collections" } +api_version = { workspace = true } +collections = { workspace = true } futures = "0.3" kvproto = { git = "https://github.com/pingcap/kvproto.git" } -pd_client = { path = "../pd_client", default-features = false } -raftstore = { path = "../raftstore", default-features = false } -test_raftstore = { path = "../test_raftstore", default-features = false } -tikv = { path = "../../", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } -tracker = { path = "../tracker", default-features = false } -txn_types = { path = "../txn_types", default-features = false } +pd_client = { workspace = true } +raftstore = { workspace = true } +test_raftstore = { workspace = true } +tikv = { workspace = true } +tikv_util = { workspace = true } +tracker = { workspace = true } +txn_types = { workspace = true } diff --git a/components/test_util/Cargo.toml b/components/test_util/Cargo.toml index c5dc5dfd1d2..8aca28b092b 100644 --- a/components/test_util/Cargo.toml +++ b/components/test_util/Cargo.toml @@ -12,16 +12,16 @@ cloud-azure = ["encryption_export/cloud-azure"] [dependencies] backtrace = "0.3" -collections = { path = "../collections" } -encryption_export = { path = "../encryption/export", default-features = false } +collections = { workspace = true } +encryption_export = { workspace = true } fail = "0.5" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } rand = "0.8" rand_isaac = "0.3" -security = { path = "../security", default-features = false } +security = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tempfile = "3.0" -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } time = "0.1" diff --git a/components/tidb_query_aggr/Cargo.toml b/components/tidb_query_aggr/Cargo.toml index e1642fb6f31..db8d9d64faf 100644 --- a/components/tidb_query_aggr/Cargo.toml +++ b/components/tidb_query_aggr/Cargo.toml @@ -7,13 +7,13 @@ description = "Vector aggr functions of query engine to run TiDB pushed down exe [dependencies] match-template = "0.0.1" -tidb_query_codegen = { path = "../tidb_query_codegen" } -tidb_query_common = { path = "../tidb_query_common", default-features = false } -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } -tidb_query_expr = { path = "../tidb_query_expr", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +tidb_query_codegen = { workspace = true } +tidb_query_common = { workspace = true } +tidb_query_datatype = { workspace = true } +tidb_query_expr = { workspace = true } +tikv_util = { workspace = true } tipb = { git = "https://github.com/pingcap/tipb.git" } [dev-dependencies] -panic_hook = { path = "../panic_hook" } -tipb_helper = { path = "../tipb_helper", default-features = false } +panic_hook = { workspace = true } +tipb_helper = { workspace = true } diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index 0efadbd48e9..05133b130e7 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -9,16 +9,16 @@ description = "Common utility of a query engine to run TiDB pushed down executor anyhow = "1.0" async-trait = "0.1" derive_more = "0.99.3" -error_code = { path = "../error_code", default-features = false } +error_code = { workspace = true } futures = "0.3" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" serde_json = "1.0" thiserror = "1.0" -tikv_util = { path = "../tikv_util", default-features = false } +tikv_util = { workspace = true } time = "0.1" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 7eb9a296ac2..de8f0b41110 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -13,14 +13,14 @@ boolinator = "2.4.0" bstr = "0.2.8" chrono = "0.4" chrono-tz = "0.5.1" -codec = { path = "../codec", default-features = false } -collections = { path = "../collections" } +codec = { workspace = true } +collections = { workspace = true } encoding_rs = { git = "https://github.com/xiongjiwei/encoding_rs.git", rev = "68e0bc5a72a37a78228d80cd98047326559cf43c" } -error_code = { path = "../error_code", default-features = false } +error_code = { workspace = true } hex = "0.4" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } match-template = "0.0.1" nom = { version = "5.1.0", default-features = false, features = ["std"] } num = { version = "0.3", default-features = false } @@ -35,7 +35,7 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } static_assertions = { version = "1.0", features = ["nightly"] } thiserror = "1.0" -tidb_query_common = { path = "../tidb_query_common", default-features = false } -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tidb_query_common = { workspace = true } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } tipb = { git = "https://github.com/pingcap/tipb.git" } diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index ada01c8aef0..e448340eddf 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -7,27 +7,27 @@ description = "A vector query engine to run TiDB pushed down executors" [dependencies] async-trait = "0.1" -codec = { path = "../codec", default-features = false } -collections = { path = "../collections" } +codec = { workspace = true } +collections = { workspace = true } fail = "0.5" futures = { version = "0.3", features = ["compat"] } itertools = "0.10" kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } match-template = "0.0.1" protobuf = { version = "2.8", features = ["bytes"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } smallvec = "1.4" -tidb_query_aggr = { path = "../tidb_query_aggr", default-features = false } -tidb_query_common = { path = "../tidb_query_common", default-features = false } -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } -tidb_query_expr = { path = "../tidb_query_expr", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +tidb_query_aggr = { workspace = true } +tidb_query_common = { workspace = true } +tidb_query_datatype = { workspace = true } +tidb_query_expr = { workspace = true } +tikv_util = { workspace = true } tipb = { git = "https://github.com/pingcap/tipb.git" } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] anyhow = "1.0" -tidb_query_codegen = { path = "../tidb_query_codegen", default-features = false } -tipb_helper = { path = "../tipb_helper", default-features = false } +tidb_query_codegen = { workspace = true } +tipb_helper = { workspace = true } diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index a04553b5b6d..1ca4a46b6dd 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -9,11 +9,11 @@ description = "Vector expressions of query engine to run TiDB pushed down execut base64 = "0.13" bstr = "0.2.8" byteorder = "1.2" -codec = { path = "../codec", default-features = false } -file_system = { path = "../file_system", default-features = false } +codec = { workspace = true } +file_system = { workspace = true } flate2 = { version = "=1.0.11", default-features = false, features = ["zlib"] } hex = "0.4" -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } match-template = "0.0.1" num = { version = "0.3", default-features = false } num-traits = "0.2" @@ -25,10 +25,10 @@ safemem = { version = "0.3", default-features = false } serde = "1.0" serde_json = "1.0" static_assertions = { version = "1.0", features = ["nightly"] } -tidb_query_codegen = { path = "../tidb_query_codegen" } -tidb_query_common = { path = "../tidb_query_common", default-features = false } -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } -tikv_util = { path = "../tikv_util", default-features = false } +tidb_query_codegen = { workspace = true } +tidb_query_common = { workspace = true } +tidb_query_datatype = { workspace = true } +tikv_util = { workspace = true } time = "0.1" tipb = { git = "https://github.com/pingcap/tipb.git" } twoway = "0.2.0" @@ -37,6 +37,6 @@ uuid = { version = "0.8.1", features = ["v4"] } [dev-dependencies] bstr = "0.2.8" chrono = "0.4" -panic_hook = { path = "../panic_hook" } -profiler = { path = "../profiler" } -tipb_helper = { path = "../tipb_helper", default-features = false } +panic_hook = { workspace = true } +profiler = { workspace = true } +tipb_helper = { workspace = true } diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 768f67626c2..6ee74371674 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -26,31 +26,31 @@ test-engines-panic = [ [dependencies] backtrace = "0.3" -collections = { path = "../collections" } -engine_panic = { path = "../engine_panic", default-features = false } -engine_rocks = { path = "../engine_rocks", default-features = false } -engine_test = { path = "../engine_test", default-features = false } -engine_traits = { path = "../engine_traits", default-features = false } -error_code = { path = "../error_code", default-features = false } +collections = { workspace = true } +engine_panic = { workspace = true } +engine_rocks = { workspace = true } +engine_test = { workspace = true } +engine_traits = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "../file_system" } +file_system = { workspace = true } futures = { version = "0.3", features = ["thread-pool", "compat"] } -into_other = { path = "../into_other", default-features = false } +into_other = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } -pd_client = { path = "../pd_client" } +log_wrappers = { workspace = true } +pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" -raftstore = { path = "../raftstore", default-features = false } +raftstore = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } slog_derive = "0.2" tempfile = "3.0" thiserror = "1.0" -tikv_util = { path = "../tikv_util", default-features = false } -tracker = { path = "../tracker" } -txn_types = { path = "../txn_types", default-features = false } +tikv_util = { workspace = true } +tracker = { workspace = true } +txn_types = { workspace = true } [dev-dependencies] -keys = { path = "../keys", default-features = false } -panic_hook = { path = "../panic_hook" } +keys = { workspace = true } +panic_hook = { workspace = true } diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index e0eaad4c0ce..2c9a071fbbb 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -579,7 +579,6 @@ mod tests { util::{new_engine_opt, FixedPrefixSliceTransform}, RocksCfOptions, RocksDbOptions, RocksEngine, RocksSnapshot, }; - use engine_test::new_temp_engine; use engine_traits::{IterOptions, SyncMutable, CF_DEFAULT}; use keys::data_key; use kvproto::metapb::{Peer, Region}; @@ -666,11 +665,20 @@ mod tests { #[test] fn test_reverse_iterate() { - let path = Builder::new().prefix("test-cursor").tempdir().unwrap(); - let engines = new_temp_engine(&path); - let (region, test_data) = load_default_dataset(engines.kv.clone()); + let path = Builder::new() + .prefix("test_reverse_iterate") + .tempdir() + .unwrap(); + let cf_opts = RocksCfOptions::default(); + let engine = new_engine_opt( + path.path().to_str().unwrap(), + RocksDbOptions::default(), + vec![(CF_DEFAULT, cf_opts)], + ) + .unwrap(); + let (region, test_data) = load_default_dataset(engine.clone()); - let snap = RegionSnapshot::::from_raw(engines.kv.clone(), region); + let snap = RegionSnapshot::::from_raw(engine.clone(), region); let mut statistics = CfStatistics::default(); let it = snap.iter(CF_DEFAULT, IterOptions::default()).unwrap(); let mut iter = Cursor::new(it, ScanMode::Mixed, false); @@ -725,7 +733,7 @@ mod tests { // test last region let mut region = Region::default(); region.mut_peers().push(Peer::default()); - let snap = RegionSnapshot::::from_raw(engines.kv, region); + let snap = RegionSnapshot::::from_raw(engine, region); let it = snap.iter(CF_DEFAULT, IterOptions::default()).unwrap(); let mut iter = Cursor::new(it, ScanMode::Mixed, false); assert!( diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 5b508a4a4d4..5ff65b33df3 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -14,27 +14,27 @@ backtrace = "0.3.9" byteorder = "1.2" bytes = "1.0" chrono = "0.4" -codec = { path = "../codec", default-features = false } -collections = { path = "../collections" } +codec = { workspace = true } +collections = { workspace = true } cpu-time = "1.0.0" crc32fast = "1.2" crossbeam = "0.8" derive_more = "0.99.3" -error_code = { path = "../error_code", default-features = false } +error_code = { workspace = true } fail = "0.5" futures = { version = "0.3", features = ["compat", "thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } http = "0.2.0" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } nix = "0.24" num-traits = "0.2" num_cpus = "1" -online_config = { path = "../online_config" } +online_config = { workspace = true } openssl = "0.10" pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } @@ -51,12 +51,12 @@ slog-json = "2.3" slog-term = "2.4" sysinfo = "0.16" thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } +tikv_alloc = { workspace = true } time = "0.1" tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-executor = "0.1" tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } -tracker = { path = "../tracker" } +tracker = { workspace = true } url = "2" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } @@ -67,7 +67,7 @@ procfs = { version = "0.12", default-features = false } [dev-dependencies] gag = "1.0" -panic_hook = { path = "../panic_hook" } +panic_hook = { workspace = true } protobuf = "2" regex = "1.0" tempfile = "3.0" diff --git a/components/tipb_helper/Cargo.toml b/components/tipb_helper/Cargo.toml index 1e7f30c4c9f..31d2c290fdc 100644 --- a/components/tipb_helper/Cargo.toml +++ b/components/tipb_helper/Cargo.toml @@ -5,6 +5,6 @@ edition = "2018" publish = false [dependencies] -codec = { path = "../codec", default-features = false } -tidb_query_datatype = { path = "../tidb_query_datatype", default-features = false } +codec = { workspace = true } +tidb_query_datatype = { workspace = true } tipb = { git = "https://github.com/pingcap/tipb.git" } diff --git a/components/tracker/Cargo.toml b/components/tracker/Cargo.toml index f9b97010bd8..b369fab9628 100644 --- a/components/tracker/Cargo.toml +++ b/components/tracker/Cargo.toml @@ -5,7 +5,7 @@ edition = "2018" publish = false [dependencies] -collections = { path = "../../components/collections" } +collections = { workspace = true } crossbeam-utils = "0.8" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1" diff --git a/components/txn_types/Cargo.toml b/components/txn_types/Cargo.toml index 18127d81254..9ccfe0bb323 100644 --- a/components/txn_types/Cargo.toml +++ b/components/txn_types/Cargo.toml @@ -7,17 +7,17 @@ publish = false [dependencies] bitflags = "1.0.1" byteorder = "1.2" -codec = { path = "../codec", default-features = false } -collections = { path = "../collections" } -error_code = { path = "../error_code", default-features = false } +codec = { workspace = true } +collections = { workspace = true } +error_code = { workspace = true } farmhash = "1.1.5" kvproto = { git = "https://github.com/pingcap/kvproto.git" } -log_wrappers = { path = "../log_wrappers" } +log_wrappers = { workspace = true } slog = "2.3" thiserror = "1.0" -tikv_alloc = { path = "../tikv_alloc" } -tikv_util = { path = "../tikv_util", default-features = false } +tikv_alloc = { workspace = true } +tikv_util = { workspace = true } [dev-dependencies] -panic_hook = { path = "../panic_hook" } +panic_hook = { workspace = true } rand = "0.8" diff --git a/fuzz/fuzzer-afl/Cargo.toml b/fuzz/fuzzer-afl/Cargo.toml index 4987508c80b..6c97305a253 100644 --- a/fuzz/fuzzer-afl/Cargo.toml +++ b/fuzz/fuzzer-afl/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" publish = false [dependencies] -fuzz-targets = { path = "../targets", default-features = false } +fuzz-targets = { path = "../targets" } # AFL only works for x86 targets [target.'cfg(all(not(target_os = "windows"), target_arch = "x86_64"))'.dependencies] diff --git a/fuzz/fuzzer-honggfuzz/Cargo.toml b/fuzz/fuzzer-honggfuzz/Cargo.toml index 51b6fa0e975..500f7466af3 100644 --- a/fuzz/fuzzer-honggfuzz/Cargo.toml +++ b/fuzz/fuzzer-honggfuzz/Cargo.toml @@ -4,7 +4,7 @@ version = "0.0.1" publish = false [dependencies] -fuzz-targets = { path = "../targets", default-features = false } +fuzz-targets = { path = "../targets" } [target.'cfg(not(target_os = "windows"))'.dependencies] honggfuzz = "0.5.47" diff --git a/fuzz/fuzzer-libfuzzer/Cargo.toml b/fuzz/fuzzer-libfuzzer/Cargo.toml index 97e27b015d8..db508147afa 100644 --- a/fuzz/fuzzer-libfuzzer/Cargo.toml +++ b/fuzz/fuzzer-libfuzzer/Cargo.toml @@ -4,5 +4,5 @@ version = "0.0.1" publish = false [dependencies] -fuzz-targets = { path = "../targets", default-features = false } +fuzz-targets = { path = "../targets" } libfuzzer-sys = "0.3.1" diff --git a/fuzz/targets/Cargo.toml b/fuzz/targets/Cargo.toml index 35de6e02f58..878ce33aea9 100644 --- a/fuzz/targets/Cargo.toml +++ b/fuzz/targets/Cargo.toml @@ -10,5 +10,5 @@ path = "mod.rs" [dependencies] anyhow = "1.0" byteorder = "1" -tidb_query_datatype = { path = "../../components/tidb_query_datatype", default-features = false } -tikv_util = { path = "../../components/tikv_util", default-features = false } +tidb_query_datatype = { workspace = true } +tikv_util = { workspace = true } diff --git a/scripts/check-bins.py b/scripts/check-bins.py index e8c7bf03791..aaa13e6b9de 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -13,7 +13,7 @@ WHITE_LIST = { "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", - "coprocessor_plugin_api", "example_plugin", "memory_trace_macros", "case_macros", + "coprocessor_plugin_api", "example_coprocessor_plugin", "memory_trace_macros", "case_macros", "tracker" } diff --git a/scripts/clippy b/scripts/clippy index e03ea2bfa8f..c5999ad670c 100755 --- a/scripts/clippy +++ b/scripts/clippy @@ -48,4 +48,4 @@ CLIPPY_LINTS=( cargo clippy --workspace \ --exclude fuzz-targets --exclude fuzzer-honggfuzz --exclude fuzzer-afl --exclude fuzzer-libfuzzer \ - --features "${TIKV_ENABLE_FEATURES}" "$@" -- "${CLIPPY_LINTS[@]}" + --no-default-features --features "${TIKV_ENABLE_FEATURES}" "$@" -- "${CLIPPY_LINTS[@]}" diff --git a/scripts/clippy-all b/scripts/clippy-all index 44b0663e106..e9257cf0c35 100755 --- a/scripts/clippy-all +++ b/scripts/clippy-all @@ -15,7 +15,7 @@ if [[ -n "$SHELL_DEBUG" ]] ; then set -x fi -./scripts/clippy --all-targets +./scripts/clippy --all-targets --features "testexport failpoints" # for pkg in "components/cdc" "components/backup" "cmd" "tests"; do # cd $pkg diff --git a/src/config.rs b/src/config.rs index 68193fe0ba9..9dcf17d17d5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -37,7 +37,7 @@ use engine_rocks::{ DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ - CfOptions as _, CfOptionsExt, DbOptions as _, DbOptionsExt, TabletAccessor, + CfOptions as _, CfOptionsExt, DbOptions as _, DbOptionsExt, MiscExt, TabletAccessor, TabletErrorCollector, TitanCfOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use file_system::IoRateLimiter; diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 73331580990..b4a7688ef68 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -8,7 +8,8 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; use engine_traits::{ - CfOptions, CfOptionsExt, OpenOptions, Result, TabletAccessor, TabletFactory, CF_DEFAULT, + CfOptions, CfOptionsExt, MiscExt, OpenOptions, Result, TabletAccessor, TabletFactory, + CF_DEFAULT, }; use crate::server::engine_factory::KvEngineFactory; diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 82496068b99..5b638a01f48 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1446,12 +1446,10 @@ where #[cfg(any(test, feature = "testexport"))] pub mod test_gc_worker { - use std::sync::Arc; + use std::sync::{Arc, Mutex}; use collections::HashMap; use engine_rocks::{RocksEngine, RocksSnapshot}; - use engine_test::kv::TestTabletFactoryV2; - use engine_traits::{KvEngine, OpenOptions, TabletFactory}; use kvproto::{ kvrpcpb::Context, metapb::{Peer, Region}, @@ -1570,17 +1568,15 @@ pub mod test_gc_worker { } } - #[derive(Clone)] + #[derive(Clone, Default)] pub struct MultiRocksEngine { - // Factory is not a normal way to fetch tablet and is just used in test to ease the test. - // Note: at most one tablet is allowed to exist for each region in the cache of factory - pub factory: Arc, + pub engines: Arc>>, pub region_info: HashMap, } impl Engine for MultiRocksEngine { - type Snap = RegionSnapshot; - type Local = RocksEngine; + type Snap = ::Snap; + type Local = ::Local; fn kv_engine(&self) -> Option { None @@ -1590,36 +1586,10 @@ pub mod test_gc_worker { &self, region_modifies: HashMap>, ) -> kv::Result<()> { - for (region_id, mut modifies) in region_modifies { - for modify in &mut modifies { - match modify { - Modify::Delete(_, ref mut key) => { - let bytes = keys::data_key(key.as_encoded()); - *key = Key::from_encoded(bytes); - } - Modify::Put(_, ref mut key, _) => { - let bytes = keys::data_key(key.as_encoded()); - *key = Key::from_encoded(bytes); - } - Modify::PessimisticLock(ref mut key, _) => { - let bytes = keys::data_key(key.as_encoded()); - *key = Key::from_encoded(bytes); - } - Modify::DeleteRange(_, ref mut key1, ref mut key2, _) => { - let bytes = keys::data_key(key1.as_encoded()); - *key1 = Key::from_encoded(bytes); - let bytes = keys::data_end_key(key2.as_encoded()); - *key2 = Key::from_encoded(bytes); - } - } - } - - let tablet = self - .factory - .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); - - write_modifies(&tablet, modifies)?; + for (region_id, modifies) in region_modifies { + let mut map = HashMap::default(); + map.insert(region_id, modifies); + self.engines.lock().unwrap()[®ion_id].modify_on_kv_engine(map)?; } Ok(()) @@ -1628,35 +1598,10 @@ pub mod test_gc_worker { fn async_write( &self, ctx: &Context, - mut batch: WriteData, + batch: WriteData, callback: EngineCallback<()>, ) -> EngineResult<()> { - batch.modifies.iter_mut().for_each(|modify| match modify { - Modify::Delete(_, ref mut key) => { - *key = Key::from_encoded(keys::data_key(key.as_encoded())); - } - Modify::Put(_, ref mut key, _) => { - *key = Key::from_encoded(keys::data_key(key.as_encoded())); - } - Modify::PessimisticLock(ref mut key, _) => { - *key = Key::from_encoded(keys::data_key(key.as_encoded())); - } - Modify::DeleteRange(_, ref mut start_key, ref mut end_key, _) => { - *start_key = Key::from_encoded(keys::data_key(start_key.as_encoded())); - *end_key = Key::from_encoded(keys::data_end_key(end_key.as_encoded())); - } - }); - let tablet = self - .factory - .open_tablet( - ctx.region_id, - None, - OpenOptions::default().set_cache_only(true), - ) - .unwrap(); - - callback(write_modifies(&tablet, batch.modifies)); - Ok(()) + self.engines.lock().unwrap()[&ctx.region_id].async_write(ctx, batch, callback) } fn async_snapshot( @@ -1665,15 +1610,12 @@ pub mod test_gc_worker { callback: EngineCallback, ) -> EngineResult<()> { let region_id = ctx.pb_ctx.region_id; - let tablet = self - .factory - .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); - callback(Ok(RegionSnapshot::from_snapshot( - Arc::new(tablet.snapshot()), - Arc::new(self.region_info.get(®ion_id).unwrap().clone()), - ))); - Ok(()) + self.engines + .lock() + .unwrap() + .get_mut(®ion_id) + .unwrap() + .async_snapshot(ctx, callback) } } } @@ -1683,6 +1625,7 @@ mod tests { use std::{ collections::{BTreeMap, BTreeSet}, + path::Path, sync::mpsc::{self, channel}, thread, time::Duration, @@ -1690,11 +1633,7 @@ mod tests { use api_version::{ApiV2, KvFormat, RawValue}; use engine_rocks::{util::get_cf_handle, RocksEngine}; - use engine_test::{ - ctor::{CfOptions, DbOptions}, - kv::TestTabletFactoryV2, - }; - use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; + use engine_traits::Peekable as _; use futures::executor::block_on; use kvproto::{ kvrpcpb::{ApiVersion, Op}, @@ -2620,47 +2559,55 @@ mod tests { // region 2: includes ("k10", "value-10") to ("k19", "value-19") // region 3: includes ("k20", "value-20") to ("k29", "value-29") fn multi_gc_engine_setup( + path: &Path, store_id: u64, put_start_ts: u64, delete_start_ts: u64, need_deletion: bool, ) -> ( - Arc, MultiRocksEngine, Arc, GcRunner, Vec, mpsc::Receiver, ) { - // Building a tablet factory - let ops = DbOptions::default(); - let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); - let path = Builder::new().prefix("multi-rocks-gc").tempdir().unwrap(); - let factory = Arc::new(TestTabletFactoryV2::new(path.path(), ops, cf_opts)); + let mut engine = MultiRocksEngine::default(); // Note: as the tablet split is not supported yet, we artificially divide the // region to: 1 ["", "k10"], 2 ["k10", "k20"], 3["k20", "30"] let r1 = init_region(b"", b"k10", 1, Some(store_id)); + engine.region_info.insert(1, r1.clone()); + engine.engines.lock().unwrap().insert( + 1, + PrefixedEngine( + TestEngineBuilder::new() + .path(path.join("1")) + .build() + .unwrap(), + ), + ); let r2 = init_region(b"k10", b"k20", 2, Some(store_id)); + engine.region_info.insert(2, r2.clone()); + engine.engines.lock().unwrap().insert( + 2, + PrefixedEngine( + TestEngineBuilder::new() + .path(path.join("2")) + .build() + .unwrap(), + ), + ); let r3 = init_region(b"k20", b"", 3, Some(store_id)); - let _ = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - let _ = factory - .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - let _ = factory - .open_tablet(3, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - - let mut region_info = HashMap::default(); - region_info.insert(1, r1.clone()); - region_info.insert(2, r2.clone()); - region_info.insert(3, r3.clone()); - let mut engine = MultiRocksEngine { - factory: factory.clone(), - region_info, - }; + engine.region_info.insert(3, r3.clone()); + engine.engines.lock().unwrap().insert( + 3, + PrefixedEngine( + TestEngineBuilder::new() + .path(path.join("3")) + .build() + .unwrap(), + ), + ); let (tx, rx) = mpsc::channel(); let feature_gate = FeatureGate::default(); @@ -2705,36 +2652,29 @@ mod tests { } } - ( - factory, - engine, - ri_provider, - gc_runner, - vec![r1, r2, r3], - rx, - ) + (engine, ri_provider, gc_runner, vec![r1, r2, r3], rx) } #[test] fn test_gc_for_multi_rocksdb() { + let dir = Builder::new() + .prefix("test_gc_for_multi_rocksdb") + .tempdir() + .unwrap(); let store_id = 1; let put_start_ts = 100; let delete_start_ts = 150; - let (factory, mut engine, _ri_provider, mut gc_runner, regions, _) = - multi_gc_engine_setup(store_id, put_start_ts, delete_start_ts, true); + let (mut engine, _ri_provider, mut gc_runner, regions, _) = + multi_gc_engine_setup(dir.path(), store_id, put_start_ts, delete_start_ts, true); gc_runner.gc(regions[0].clone(), 200.into()).unwrap(); gc_runner.gc(regions[1].clone(), 200.into()).unwrap(); gc_runner.gc(regions[2].clone(), 200.into()).unwrap(); for region_id in 1..=3 { - let db = factory - .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) - .unwrap() - .as_inner() - .clone(); - let cf = get_cf_handle(&db, CF_WRITE).unwrap(); + let region_engine = engine.engines.lock().unwrap()[®ion_id].clone(); + for i in 10 * (region_id - 1)..10 * region_id { let k = format!("k{:02}", i).into_bytes(); @@ -2745,19 +2685,30 @@ mod tests { let mut raw_k = vec![b'z']; let suffix = Key::from_raw(&k).append_ts((delete_start_ts + 1).into()); raw_k.extend_from_slice(suffix.as_encoded()); - assert!(db.get_cf(cf, &raw_k).unwrap().is_none()); + assert!( + region_engine + .kv_engine() + .unwrap() + .get_value_cf(CF_WRITE, &raw_k) + .unwrap() + .is_none() + ); } } } #[test] fn test_gc_keys_for_multi_rocksdb() { + let dir = Builder::new() + .prefix("test_gc_keys_for_multi_rocksdb") + .tempdir() + .unwrap(); let store_id = 1; let put_start_ts = 100; let delete_start_ts = 150; - let (factory, mut engine, ri_provider, mut gc_runner, ..) = - multi_gc_engine_setup(store_id, put_start_ts, delete_start_ts, true); + let (mut engine, ri_provider, mut gc_runner, ..) = + multi_gc_engine_setup(dir.path(), store_id, put_start_ts, delete_start_ts, true); let mut keys = Vec::new(); for i in 0..30 { @@ -2774,12 +2725,8 @@ mod tests { .unwrap(); for region_id in 1..=3 { - let db = factory - .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) - .unwrap() - .as_inner() - .clone(); - let cf = get_cf_handle(&db, CF_WRITE).unwrap(); + let region_engine = engine.engines.lock().unwrap()[®ion_id].clone(); + for i in 10 * (region_id - 1)..10 * region_id { let k = format!("k{:02}", i).into_bytes(); let val = format!("value-{:02}", i).into_bytes(); @@ -2789,10 +2736,24 @@ mod tests { raw_k.extend_from_slice(suffix.as_encoded()); if i % 2 == 0 { - assert!(db.get_cf(cf, &raw_k).unwrap().is_some()); + assert!( + region_engine + .kv_engine() + .unwrap() + .get_value_cf(CF_WRITE, &raw_k) + .unwrap() + .is_some() + ); must_get_on_region(&mut engine, region_id, &k, delete_start_ts - 1, &val); } else { - assert!(db.get_cf(cf, &raw_k).unwrap().is_none()); + assert!( + region_engine + .kv_engine() + .unwrap() + .get_value_cf(CF_WRITE, &raw_k) + .unwrap() + .is_none() + ); must_get_none_on_region(&mut engine, region_id, &k, delete_start_ts - 1); } } @@ -2801,34 +2762,38 @@ mod tests { #[test] fn test_raw_gc_keys_for_multi_rocksdb() { - let store_id = 1; - // Building a tablet factory - let ops = DbOptions::default(); - let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); - let path = Builder::new() - .prefix("multi-rocks-raw-gc") + let dir = Builder::new() + .prefix("test_raw_gc_keys_for_multi_rocksdb") .tempdir() .unwrap(); - let factory = Arc::new(TestTabletFactoryV2::new(path.path(), ops, cf_opts)); + let store_id = 1; + + let mut engine = MultiRocksEngine::default(); // Note: as the tablet split is not supported yet, we artificially divide the // region to: 1 ["", "k10"], 2 ["k10", ""] let r1 = init_region(b"", b"k10", 1, Some(store_id)); + engine.region_info.insert(1, r1.clone()); + engine.engines.lock().unwrap().insert( + 1, + PrefixedEngine( + TestEngineBuilder::new() + .path(dir.path().join("1")) + .build() + .unwrap(), + ), + ); let r2 = init_region(b"k10", b"", 2, Some(store_id)); - let _ = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - let _ = factory - .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - - let mut region_info = HashMap::default(); - region_info.insert(1, r1.clone()); - region_info.insert(2, r2.clone()); - let mut engine = MultiRocksEngine { - factory, - region_info, - }; + engine.region_info.insert(2, r2.clone()); + engine.engines.lock().unwrap().insert( + 2, + PrefixedEngine( + TestEngineBuilder::new() + .path(dir.path().join("2")) + .build() + .unwrap(), + ), + ); let (tx, _rx) = mpsc::channel(); let ri_provider = Arc::new(MockRegionInfoProvider::new(vec![r1, r2])); @@ -2945,10 +2910,14 @@ mod tests { end_key: &[u8], exected_regions: Vec, ) { + let dir = Builder::new() + .prefix("test_destroy_range_for_multi_rocksdb_impl") + .tempdir() + .unwrap(); let store_id = 1; let put_start_ts = 100; - let (factory, mut engine, ri_provider, gc_runner, _, _rx) = - multi_gc_engine_setup(store_id, put_start_ts, 0, false); + let (mut engine, ri_provider, gc_runner, _, _rx) = + multi_gc_engine_setup(dir.path(), store_id, put_start_ts, 0, false); let start_key = Key::from_raw(start_key); let end_key = Key::from_raw(end_key); @@ -2960,12 +2929,7 @@ mod tests { let mut regions = BTreeSet::new(); for region_id in 1..=3 { - let db = factory - .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) - .unwrap() - .as_inner() - .clone(); - let cf = get_cf_handle(&db, CF_WRITE).unwrap(); + let region_engine = engine.engines.lock().unwrap()[®ion_id].clone(); for i in 10 * (region_id - 1)..10 * region_id { let k = format!("k{:02}", i).into_bytes(); @@ -2978,10 +2942,24 @@ mod tests { if start_key <= key && key < end_key { regions.insert(region_id); - assert!(db.get_cf(cf, &raw_k).unwrap().is_none()); + assert!( + region_engine + .kv_engine() + .unwrap() + .get_value_cf(CF_WRITE, &raw_k) + .unwrap() + .is_none() + ); must_get_none_on_region(&mut engine, region_id, &k, put_start_ts + 10); } else { - assert!(db.get_cf(cf, &raw_k).unwrap().is_some()); + assert!( + region_engine + .kv_engine() + .unwrap() + .get_value_cf(CF_WRITE, &raw_k) + .unwrap() + .is_some() + ); must_get_on_region(&mut engine, region_id, &k, put_start_ts + 10, &val); } } diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index be57873b68c..e8b85d37d66 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1459,6 +1459,9 @@ mod tests { } #[test] + // FIXME: Either implement storage::kv traits for all engine types, or avoid using raw engines + // in this test. + #[cfg(feature = "test-engine-kv-rocksdb")] fn test_out_of_sync_max_ts() { use engine_test::kv::KvTestEngineIterator; use engine_traits::{IterOptions, ReadOptions}; diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 5c573b6e809..043e3ad2d23 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -68,47 +68,47 @@ sse = ["tikv/sse"] portable = ["tikv/portable"] [dependencies] -api_version = { path = "../components/api_version", default-features = false } +api_version = { workspace = true } async-trait = "0.1" -batch-system = { path = "../components/batch-system", default-features = false } -cdc = { path = "../components/cdc", default-features = false } -collections = { path = "../components/collections" } +batch-system = { workspace = true } +cdc = { workspace = true } +collections = { workspace = true } crc64fast = "0.1" crossbeam = "0.8" -encryption = { path = "../components/encryption", default-features = false } -engine_rocks_helper = { path = "../components/engine_rocks_helper" } -error_code = { path = "../components/error_code", default-features = false } +encryption = { workspace = true } +engine_rocks_helper = { workspace = true } +error_code = { workspace = true } fail = "0.5" -file_system = { path = "../components/file_system" } +file_system = { workspace = true } futures = "0.3" -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false } kvproto = { git = "https://github.com/pingcap/kvproto.git" } libc = "0.2" -log_wrappers = { path = "../components/log_wrappers" } +log_wrappers = { workspace = true } more-asserts = "0.2" -online_config = { path = "../components/online_config", default-features = false } +online_config = { workspace = true } paste = "1.0" -pd_client = { path = "../components/pd_client", default-features = false } +pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -raft_log_engine = { path = "../components/raft_log_engine", default-features = false } -raftstore = { path = "../components/raftstore", default-features = false } +raft_log_engine = { workspace = true } +raftstore = { workspace = true } rand = "0.8.3" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tempfile = "3.0" -tidb_query_aggr = { path = "../components/tidb_query_aggr", default-features = false } -tidb_query_common = { path = "../components/tidb_query_common", default-features = false } -tidb_query_datatype = { path = "../components/tidb_query_datatype", default-features = false } -tidb_query_executors = { path = "../components/tidb_query_executors", default-features = false } -tidb_query_expr = { path = "../components/tidb_query_expr", default-features = false } -tikv = { path = "../", default-features = false } -tikv_util = { path = "../components/tikv_util", default-features = false } +tidb_query_aggr = { workspace = true } +tidb_query_common = { workspace = true } +tidb_query_datatype = { workspace = true } +tidb_query_executors = { workspace = true } +tidb_query_expr = { workspace = true } +tikv = { workspace = true } +tikv_util = { workspace = true } time = "0.1" tipb = { git = "https://github.com/pingcap/tipb.git" } toml = "0.5" -txn_types = { path = "../components/txn_types", default-features = false } +txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } [target.'cfg(target_os = "linux")'.dependencies] @@ -119,34 +119,34 @@ arrow = "13.0" byteorder = "1.2" # See https://bheisler.github.io/criterion.rs/book/user_guide/known_limitations.html for the usage # of `real_blackbox` feature. -causal_ts = { path = "../components/causal_ts" } -concurrency_manager = { path = "../components/concurrency_manager", default-features = false } +causal_ts = { workspace = true } +concurrency_manager = { workspace = true } criterion = "0.3" criterion-cpu-time = "0.1" -engine_rocks = { path = "../components/engine_rocks", default-features = false } -engine_test = { path = "../components/engine_test", default-features = false } -engine_traits = { path = "../components/engine_traits", default-features = false } -external_storage_export = { path = "../components/external_storage/export", default-features = false } -file_system = { path = "../components/file_system" } +engine_rocks = { workspace = true } +engine_test = { workspace = true } +engine_traits = { workspace = true } +external_storage_export = { workspace = true } +file_system = { workspace = true } hyper = { version = "0.14", default-features = false, features = ["runtime"] } -keys = { path = "../components/keys", default-features = false } -panic_hook = { path = "../components/panic_hook" } -profiler = { path = "../components/profiler" } +keys = { workspace = true } +panic_hook = { workspace = true } +profiler = { workspace = true } rand_xorshift = "0.3" -resource_metering = { path = "../components/resource_metering" } -security = { path = "../components/security", default-features = false } +resource_metering = { workspace = true } +security = { workspace = true } serde_json = "1.0" -sst_importer = { path = "../components/sst_importer", default-features = false } -test_backup = { path = "../components/test_backup", default-features = false } -test_coprocessor = { path = "../components/test_coprocessor", default-features = false } -test_pd = { path = "../components/test_pd", default-features = false } -test_pd_client = { path = "../components/test_pd_client" } -test_raftstore = { path = "../components/test_raftstore", default-features = false } -test_sst_importer = { path = "../components/test_sst_importer", default-features = false } -test_storage = { path = "../components/test_storage", default-features = false } -test_util = { path = "../components/test_util", default-features = false } -tidb_query_datatype = { path = "../components/tidb_query_datatype", default-features = false } -tipb_helper = { path = "../components/tipb_helper", default-features = false } +sst_importer = { workspace = true } +test_backup = { workspace = true } +test_coprocessor = { workspace = true } +test_pd = { workspace = true } +test_pd_client = { workspace = true } +test_raftstore = { workspace = true } +test_sst_importer = { workspace = true } +test_storage = { workspace = true } +test_util = { workspace = true } +tidb_query_datatype = { workspace = true } +tipb_helper = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } [target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dev-dependencies] From 08b5a4a6e5197143da1f592476da3203748a01b1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 19 Oct 2022 14:51:54 +0800 Subject: [PATCH 0276/1149] raftstore-v2: test_read is not stable (#13626) close tikv/tikv#13625 Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/query/local.rs | 104 ++++++++++-------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 5986d3e4596..12df1e7926f 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -178,7 +178,8 @@ where let mut err = errorpb::Error::default(); err.set_message(format!( - "Fail to get snapshot from LocalReader for region {}. Maybe due to `not leader` or `not applied to the current term`", + "Fail to get snapshot from LocalReader for region {}. \ + Maybe due to `not leader`, `region not found` or `not applied to the current term`", region_id )); let mut resp = RaftCmdResponse::default(); @@ -496,22 +497,28 @@ mod tests { .collect() } - #[test] - fn test_read() { - // It mocks that local reader communications with raftstore. - // rx receives msgs like raftstore, then call f() to do something (such as renew - // lease or something), then send the result back to the local reader through ch - fn handle_msg( - f: F, - rx: Receiver<(u64, PeerMsg)>, - ch_tx: SyncSender>, - ) -> JoinHandle<()> { - thread::spawn(move || { - // Msg for query will be sent + // It mocks that local reader communications with raftstore. + // mix_rx receives a closure, msg receiver, and sender of the msg receiver + // - closure: do some update such as renew lease or something which we could do + // in real raftstore + // - msg receiver: receives the msg from local reader + // - sender of the msg receiver: send the msg receiver out of the thread so that + // we can use it again. + fn mock_raftstore( + mix_rx: Receiver<( + Box, + Receiver<(u64, PeerMsg)>, + SyncSender>, + )>, + ) -> JoinHandle<()> { + thread::spawn(move || { + while let Ok((f, rx, ch_tx)) = mix_rx.recv() { + // Receives msg from local reader let (_, msg) = rx.recv().unwrap(); - f(); + match msg { + // send the result back to local reader PeerMsg::RaftQuery(query) => ReadCallback::set_result( query.ch, QueryResult::Read(ReadResponse { @@ -522,9 +529,12 @@ mod tests { _ => unreachable!(), } ch_tx.send(rx).unwrap(); - }) - } + } + }) + } + #[test] + fn test_read() { let store_id = 1; // Building a tablet factory @@ -538,6 +548,8 @@ mod tests { let store_meta = Arc::new(Mutex::new(StoreMeta::new())); let (mut reader, mut rx) = new_reader(store_id, store_meta.clone()); + let (mix_tx, mix_rx) = sync_channel(1); + let handler = mock_raftstore(mix_rx); let mut region1 = metapb::Region::default(); region1.set_id(1); @@ -552,7 +564,7 @@ mod tests { let leader2 = prs[0].clone(); region1.set_region_epoch(epoch13.clone()); let term6 = 6; - let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); + let mut lease = Lease::new(Duration::seconds(10), Duration::milliseconds(2500)); let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, "".to_owned())); let mut cmd = RaftCmdRequest::default(); @@ -622,17 +634,20 @@ mod tests { // Case: Applied term not match let store_meta_clone = store_meta.clone(); - let handler = handle_msg( - move || { - let mut meta = store_meta_clone.lock().unwrap(); - meta.readers - .get_mut(&1) - .unwrap() - .update(ReadProgress::applied_term(term6)); - }, - rx, - ch_tx.clone(), - ); + // Send what we want to do to mock raftstore + mix_tx + .send(( + Box::new(move || { + let mut meta = store_meta_clone.lock().unwrap(); + meta.readers + .get_mut(&1) + .unwrap() + .update(ReadProgress::applied_term(term6)); + }), + rx, + ch_tx.clone(), + )) + .unwrap(); // The first try will be rejected due to unmatched applied term but after update // the applied term by the above thread, the snapshot will be acquired by // retrying. @@ -646,23 +661,25 @@ mod tests { TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.applied_term.get()), 1 ); - handler.join().unwrap(); rx = ch_rx.recv().unwrap(); // Case: Expire lease to make the local reader lease check fail. lease.expire_remote_lease(); let remote = lease.maybe_new_remote_lease(term6).unwrap(); - let handler = handle_msg( - move || { - let mut meta = store_meta.lock().unwrap(); - meta.readers - .get_mut(&1) - .unwrap() - .update(ReadProgress::leader_lease(remote)); - }, - rx, - ch_tx.clone(), - ); + // Send what we want to do to mock raftstore + mix_tx + .send(( + Box::new(move || { + let mut meta = store_meta.lock().unwrap(); + meta.readers + .get_mut(&1) + .unwrap() + .update(ReadProgress::leader_lease(remote)); + }), + rx, + ch_tx.clone(), + )) + .unwrap(); let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); // Updating lease makes cache miss. assert_eq!( @@ -673,15 +690,13 @@ mod tests { TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()), 1 ); - handler.join().unwrap(); rx = ch_rx.recv().unwrap(); // Case: Read quorum. let mut cmd_read_quorum = cmd.clone(); cmd_read_quorum.mut_header().set_read_quorum(true); - let handler = handle_msg(|| {}, rx, ch_tx); + mix_tx.send((Box::new(move || {}), rx, ch_tx)).unwrap(); let _ = block_on(reader.snapshot(cmd_read_quorum.clone())).unwrap(); - handler.join().unwrap(); ch_rx.recv().unwrap(); // Case: Stale read @@ -709,6 +724,9 @@ mod tests { assert_eq!(read_progress.safe_ts(), 2); let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); assert_eq!(*snap.get_region(), region1); + + drop(mix_tx); + handler.join().unwrap(); } #[test] From da4877567c0ff11a863d6e39923e8c9819064a57 Mon Sep 17 00:00:00 2001 From: haojinming Date: Thu, 20 Oct 2022 11:53:54 +0800 Subject: [PATCH 0277/1149] test: Add rawkv empty key/value test (#13636) close tikv/tikv#13635 Signed-off-by: haojinming --- src/storage/mod.rs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index e2192573dea..84d52b6990a 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -5244,7 +5244,13 @@ mod tests { fn test_raw_v2_multi_versions() { // Test update on the same key to verify multi-versions implementation of RawKV // V2. - let test_data = vec![Some(b"v1"), Some(b"v2"), None, Some(b"v3")]; + let test_data = vec![ + Some(b"v1".to_vec()), + Some(b"v2".to_vec()), + None, + Some(b"".to_vec()), + Some(b"v3".to_vec()), + ]; let k = b"r\0k".to_vec(); let storage = TestStorageBuilder::<_, _, ApiV2>::new(DummyLockManager) @@ -5256,7 +5262,11 @@ mod tests { ..Default::default() }; - let last_data = test_data.last().unwrap().map(|x| (k.clone(), x.to_vec())); + let last_data = test_data + .last() + .unwrap() + .as_ref() + .map(|x| (k.clone(), x.clone())); for v in test_data { if let Some(v) = v { storage @@ -5264,7 +5274,7 @@ mod tests { ctx.clone(), "".to_string(), k.clone(), - v.to_vec(), + v.clone(), 0, expect_ok_callback(tx.clone(), 0), ) @@ -5272,7 +5282,7 @@ mod tests { rx.recv().unwrap(); expect_value( - v.to_vec(), + v.clone(), block_on(storage.raw_get(ctx.clone(), "".to_string(), k.clone())).unwrap(), ); } else { @@ -5543,12 +5553,19 @@ mod tests { ..Default::default() }; + let empty_key = if F::TAG == ApiVersion::V2 { + b"r".to_vec() + } else { + b"".to_vec() + }; let test_data = vec![ + (empty_key.clone(), b"ff".to_vec(), 10), // empty key (b"r\0a".to_vec(), b"aa".to_vec(), 10), (b"r\0b".to_vec(), b"bb".to_vec(), 20), (b"r\0c".to_vec(), b"cc".to_vec(), 30), (b"r\0d".to_vec(), b"dd".to_vec(), 0), (b"r\0e".to_vec(), b"ee".to_vec(), 40), + (b"r\0g".to_vec(), b"".to_vec(), 50), // empty value ]; let kvpairs = test_data @@ -5601,7 +5618,7 @@ mod tests { block_on(storage.raw_scan( ctx, "".to_string(), - b"r".to_vec(), + empty_key, Some(b"rz".to_vec()), 20, false, From 262c1b840773b468ee7857725c305c16c6d5d047 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 20 Oct 2022 23:57:54 -0700 Subject: [PATCH 0278/1149] raftstore-v2: add read handling after apply (#13565) ref tikv/tikv#12842 Renew lease if possible after write proposal committed. Respond to read index if any pending reads after apply. Signed-off-by: qi.xu Signed-off-by: tonyxuqqi Co-authored-by: qi.xu --- components/raftstore-v2/src/fsm/peer.rs | 2 +- .../raftstore-v2/src/operation/command/mod.rs | 7 +- .../raftstore-v2/src/operation/query/lease.rs | 64 +++++++++++-------- .../raftstore-v2/src/operation/query/mod.rs | 64 +++++++++++++++---- .../raftstore-v2/src/operation/ready/mod.rs | 20 +++++- components/raftstore-v2/src/raft/peer.rs | 27 ++++++++ .../tests/integrations/cluster.rs | 14 +++- .../tests/integrations/test_read.rs | 57 ++++++++++++----- .../src/coprocessor/split_check/size.rs | 16 ++--- components/raftstore/src/store/peer.rs | 2 +- components/raftstore/src/store/util.rs | 10 +++ 11 files changed, 208 insertions(+), 75 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 389e59f0ee4..6b9cccc8b84 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -219,7 +219,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.on_command(cmd.request, cmd.ch) } PeerMsg::Tick(tick) => self.on_tick(tick), - PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(res), + PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(self.store_ctx, res), PeerMsg::Start => self.on_start(), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index bef599d5239..fcfeb29fbe2 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -223,7 +223,7 @@ impl Peer { .send(ApplyTask::CommittedEntries(apply)); } - pub fn on_apply_res(&mut self, apply_res: ApplyRes) { + pub fn on_apply_res(&mut self, ctx: &mut StoreContext, apply_res: ApplyRes) { if !self.serving() { return; } @@ -235,6 +235,7 @@ impl Peer { self.raft_group_mut() .advance_apply_to(apply_res.applied_index); let is_leader = self.is_leader(); + let progress_to_be_updated = self.entry_storage().applied_term() != apply_res.applied_term; let entry_storage = self.entry_storage_mut(); entry_storage .apply_state_mut() @@ -242,10 +243,8 @@ impl Peer { entry_storage.set_applied_term(apply_res.applied_term); if !is_leader { entry_storage.compact_entry_cache(apply_res.applied_index + 1); - // TODO: handle read. - } else { - // TODO: handle read. } + self.handle_read_on_apply(ctx, apply_res, progress_to_be_updated); } } diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 00a485c8460..1ae4aecd1cc 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -175,38 +175,50 @@ impl Peer { self.maybe_update_read_progress(reader, progress); } if let Some(progress) = read_progress { - let mut meta = store_meta.lock().unwrap(); - // TODO: remove this block of code when snapshot is done; add the logic into - // on_persist_snapshot. - let reader = meta.readers.get_mut(&self.region_id()); - if reader.is_none() { - let region = self.region().clone(); - let region_id = region.get_id(); - let peer_id = self.peer_id(); - let delegate = ReadDelegate { - region: Arc::new(region), - peer_id, - term: self.term(), - applied_term: self.entry_storage().applied_term(), - leader_lease: None, - last_valid_ts: Timespec::new(0, 0), - tag: format!("[region {}] {}", region_id, peer_id), - read_progress: self.read_progress().clone(), - pending_remove: false, - bucket_meta: None, - txn_extra_op: Default::default(), - txn_ext: Default::default(), - track_ver: TrackVer::new(), - }; - meta.readers.insert(self.region_id(), delegate); - } + // TODO: remove it + self.add_reader_if_necessary(store_meta); + let mut meta = store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&self.region_id()).unwrap(); self.maybe_update_read_progress(reader, progress); } } - fn maybe_update_read_progress(&self, reader: &mut ReadDelegate, progress: ReadProgress) { + // TODO: remove this block of code when snapshot is done; add the logic into + // on_persist_snapshot. + pub(crate) fn add_reader_if_necessary(&mut self, store_meta: &mut Arc>>) { + let mut meta = store_meta.lock().unwrap(); + // TODO: remove this block of code when snapshot is done; add the logic into + // on_persist_snapshot. + let reader = meta.readers.get_mut(&self.region_id()); + if reader.is_none() { + let region = self.region().clone(); + let region_id = region.get_id(); + let peer_id = self.peer_id(); + let delegate = ReadDelegate { + region: Arc::new(region), + peer_id, + term: self.term(), + applied_term: self.entry_storage().applied_term(), + leader_lease: None, + last_valid_ts: Timespec::new(0, 0), + tag: format!("[region {}] {}", region_id, peer_id), + read_progress: self.read_progress().clone(), + pending_remove: false, + bucket_meta: None, + txn_extra_op: Default::default(), + txn_ext: Default::default(), + track_ver: TrackVer::new(), + }; + meta.readers.insert(self.region_id(), delegate); + } + } + + pub(crate) fn maybe_update_read_progress( + &self, + reader: &mut ReadDelegate, + progress: ReadProgress, + ) { debug!( self.logger, "update read progress"; diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 0b10e0679a5..960e667c7d9 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -18,14 +18,16 @@ use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ errorpb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, StatusCmdType}, + raft_serverpb::RaftApplyState, }; use raft::Ready; use raftstore::{ errors::RAFTSTORE_IS_BUSY, store::{ - cmd_resp, local_metrics::RaftMetrics, metrics::RAFT_READ_INDEX_PENDING_COUNT, - msg::ErrorCallback, region_meta::RegionMeta, util, util::LeaseState, GroupState, - ReadIndexContext, RequestPolicy, Transport, + cmd_resp, fsm::ApplyMetrics, local_metrics::RaftMetrics, + metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::ErrorCallback, region_meta::RegionMeta, util, + util::LeaseState, GroupState, ReadCallback, ReadIndexContext, ReadProgress, RequestPolicy, + Transport, }, Error, Result, }; @@ -38,7 +40,8 @@ use crate::{ fsm::PeerFsmDelegate, raft::Peer, router::{ - message::RaftRequest, DebugInfoChannel, PeerMsg, QueryResChannel, QueryResult, ReadResponse, + message::RaftRequest, ApplyRes, DebugInfoChannel, PeerMsg, QueryResChannel, QueryResult, + ReadResponse, }, }; @@ -56,12 +59,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> return Ok(RequestPolicy::ReadIndex); } - // If applied index's term differs from current raft's term, leader - // transfer must happened, if read locally, we may read old value. - // TODO: to add the block back when apply is implemented. - // if !self.fsm.peer().has_applied_to_current_term() { - // return Ok(RequestPolicy::ReadIndex); - // } + // If applied index's term is differ from current raft's term, leader transfer + // must happened, if read locally, we may read old value. + if !self.fsm.peer().applied_to_current_term() { + return Ok(RequestPolicy::ReadIndex); + } match self.fsm.peer_mut().inspect_lease() { LeaseState::Valid => Ok(RequestPolicy::ReadLocal), @@ -218,9 +220,10 @@ impl Peer { } } - // TODO: add ready_to_handle_read for splitting and merging - while let Some(mut read) = self.pending_reads_mut().pop_front() { - self.respond_read_index(&mut read, ctx); + if self.ready_to_handle_read() { + while let Some(mut read) = self.pending_reads_mut().pop_front() { + self.respond_read_index(&mut read, ctx); + } } } @@ -377,4 +380,39 @@ impl Peer { .unwrap(); ch.set_result(meta); } + + // the v1's post_apply + // As the logic is mostly for read, rename it to handle_read_after_apply + pub fn handle_read_on_apply( + &mut self, + ctx: &mut StoreContext, + apply_res: ApplyRes, + progress_to_be_updated: bool, + ) { + // TODO: add is_handling_snapshot check + // it could update has_ready + + // TODO: add peer_stat(for PD hotspot scheduling) and deleted_keys_hint + if !self.is_leader() { + self.post_pending_read_index_on_replica(ctx) + } else if self.ready_to_handle_read() { + while let Some(mut read) = self.pending_reads_mut().pop_front() { + self.respond_read_index(&mut read, ctx); + } + } + self.pending_reads_mut().gc(); + self.read_progress_mut() + .update_applied_core(apply_res.applied_index); + + // Only leaders need to update applied_term. + if progress_to_be_updated && self.is_leader() { + // TODO: add coprocessor_host hook + let progress = ReadProgress::applied_term(apply_res.applied_term); + // TODO: remove it + self.add_reader_if_necessary(&mut ctx.store_meta); + let mut meta = ctx.store_meta.lock().unwrap(); + let reader = meta.readers.get_mut(&self.region_id()).unwrap(); + self.maybe_update_read_progress(reader, progress); + } + } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index e20192394a6..3129dcfb832 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -28,6 +28,7 @@ use protobuf::Message as _; use raft::{eraftpb, Ready}; use raftstore::store::{util, ExtraStates, FetchedLogs, Transport, WriteTask}; use slog::{debug, error, trace, warn}; +use tikv_util::time::{duration_to_sec, monotonic_raw_now}; pub use self::async_writer::AsyncWriter; use crate::{ @@ -228,7 +229,24 @@ impl Peer { // TODO: skip handling committed entries if a snapshot is being applied // asynchronously. if self.is_leader() { - // TODO: Update lease + for entry in committed_entries.iter().rev() { + // TODO: handle raft_log_size_hint + let propose_time = self + .proposals() + .find_propose_time(entry.get_term(), entry.get_index()); + if let Some(propose_time) = propose_time { + // We must renew current_time because this value may be created a long time ago. + // If we do not renew it, this time may be smaller than propose_time of a + // command, which was proposed in another thread while this thread receives its + // AppendEntriesResponse and is ready to calculate its commit-log-duration. + ctx.current_time.replace(monotonic_raw_now()); + ctx.raft_metrics.commit_log.observe(duration_to_sec( + (ctx.current_time.unwrap() - propose_time).to_std().unwrap(), + )); + self.maybe_renew_leader_lease(propose_time, &mut ctx.store_meta, None); + break; + } + } } self.schedule_apply_committed_entries(ctx, committed_entries); } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index e7ee6e7465a..650c410cef9 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -187,6 +187,11 @@ impl Peer { &self.read_progress } + #[inline] + pub fn read_progress_mut(&mut self) -> &mut Arc { + &mut self.read_progress + } + #[inline] pub fn leader_lease(&self) -> &Lease { &self.leader_lease @@ -382,6 +387,28 @@ impl Peer { } #[inline] + pub fn proposals(&self) -> &ProposalQueue> { + &self.proposals + } + + #[inline] + pub fn ready_to_handle_read(&self) -> bool { + // TODO: It may cause read index to wait a long time. + + // There may be some values that are not applied by this leader yet but the old + // leader, if applied_term isn't equal to current term. + self.applied_to_current_term() + // There may be stale read if the old leader splits really slow, + // the new region may already elected a new leader while + // the old leader still think it owns the split range. + && !self.is_splitting() + // There may be stale read if a target leader is in another store and + // applied commit merge, written new values, but the sibling peer in + // this store does not apply commit merge, so the leader is not ready + // to read, until the merge is rollbacked. + && !self.is_merging() + } + pub fn apply_scheduler(&self) -> &ApplyScheduler { self.apply_scheduler.as_ref().unwrap() } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index d46ff09f2b1..554db96acbf 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -323,12 +323,16 @@ pub struct Cluster { impl Default for Cluster { fn default() -> Cluster { - Cluster::with_node_count(1) + Cluster::with_node_count(1, None) } } impl Cluster { - pub fn with_node_count(count: usize) -> Self { + pub fn with_config(config: Config) -> Cluster { + Cluster::with_node_count(1, Some(config)) + } + + pub fn with_node_count(count: usize, config: Option) -> Self { let pd_server = test_pd::Server::new(1); let mut cluster = Cluster { pd_server, @@ -336,7 +340,11 @@ impl Cluster { receivers: vec![], routers: vec![], }; - let mut cfg = v2_default_config(); + let mut cfg = if let Some(config) = config { + config + } else { + v2_default_config() + }; disable_all_auto_ticks(&mut cfg); for _ in 1..=count { let mut node = TestNode::with_pd(&cluster.pd_server); diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index bb7156c6af7..9f3c5c2c03a 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -2,14 +2,17 @@ use futures::executor::block_on; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, ReadIndexRequest, Request, StatusCmdType}; -use tikv_util::store::new_peer; +use raftstore_v2::router::PeerMsg; +use tikv_util::{config::ReadableDuration, store::new_peer}; use txn_types::WriteBatchFlags; -use crate::cluster::Cluster; +use crate::cluster::{v2_default_config, Cluster}; #[test] fn test_read_index() { - let cluster = Cluster::default(); + let mut config = v2_default_config(); + config.raft_store_max_leader_lease = ReadableDuration::millis(150); + let cluster = Cluster::with_config(config); let router = cluster.router(0); std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; @@ -20,7 +23,7 @@ fn test_read_index() { let res = router.query(region_id, req.clone()).unwrap(); let status_resp = res.response().unwrap().get_status_response(); let detail = status_resp.get_region_detail(); - let mut region = detail.get_region().clone(); + let region = detail.get_region().clone(); let read_index_req = ReadIndexRequest::default(); let mut req = RaftCmdRequest::default(); @@ -28,7 +31,7 @@ fn test_read_index() { req.mut_header().set_term(7); req.mut_header().set_region_id(region_id); req.mut_header() - .set_region_epoch(region.take_region_epoch()); + .set_region_epoch(region.get_region_epoch().clone()); let mut request_inner = Request::default(); request_inner.set_cmd_type(CmdType::Snap); request_inner.set_read_index(read_index_req); @@ -37,7 +40,38 @@ fn test_read_index() { let resp = res.read().unwrap(); assert_eq!(resp.read_index, 6); // single node commited index should be 6. - // TODO: add more test when write is implemented. + let res = router.query(region_id, req.clone()).unwrap(); + let resp = res.read().unwrap(); + // Since it's still with the lease, read index will be skipped. + assert_eq!(resp.read_index, 0); + + std::thread::sleep(std::time::Duration::from_millis(200)); + // the read lease should be expired + let res = router.query(region_id, req.clone()).unwrap(); + let resp = res.read().unwrap(); + assert_eq!(resp.read_index, 6); + + std::thread::sleep(std::time::Duration::from_millis(200)); + let read_req = req.clone(); + // the read lease should be expired and renewed by write + let mut req = RaftCmdRequest::default(); + req.mut_header().set_peer(new_peer(1, 3)); + req.mut_header().set_region_id(region_id); + req.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + let mut put_req = Request::default(); + put_req.set_cmd_type(CmdType::Put); + put_req.mut_put().set_key(b"key".to_vec()); + put_req.mut_put().set_value(b"value".to_vec()); + req.mut_requests().push(put_req); + + let (msg, sub) = PeerMsg::raft_command(req.clone()); + router.send(region_id, msg).unwrap(); + block_on(sub.result()).unwrap(); + + let res = router.query(region_id, read_req).unwrap(); + let resp = res.read().unwrap(); + assert_eq!(resp.read_index, 0); } #[test] @@ -217,16 +251,7 @@ fn test_local_read() { request_inner.set_cmd_type(CmdType::Snap); req.mut_requests().push(request_inner); - // FIXME: Get snapshot from local reader, but it will fail as the leader has not - // applied in the current term (due to unimplementation of ApplyRes). - let resp = block_on(async { router.get_snapshot(req.clone()).await.unwrap_err() }); - assert!( - resp.get_header() - .get_error() - .get_message() - .contains("Fail to get snapshot ") - ); - + block_on(async { router.get_snapshot(req.clone()).await.unwrap() }); let res = router.query(region_id, req.clone()).unwrap(); let resp = res.read().unwrap(); // The read index will be 0 as the retry process in the `get_snapshot` will diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 44318a27b60..bdcf817365c 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -615,7 +615,7 @@ pub mod tests { }; let cop_host = CoprocessorHost::new(tx.clone(), cfg); let mut runnable = SplitCheckRunner::new(engine.clone(), tx, cop_host.clone()); - for i in 0..2000 { + for i in 0..1000 { // if not mvcc, kv size is (6+1)*2 = 14, given bucket size is 3000, expect each // bucket has about 210 keys if mvcc, kv size is about 18*2 = 36, expect each // bucket has about 80 keys @@ -646,7 +646,7 @@ pub mod tests { let end = format!("{:04}", 20).into_bytes(); // insert keys into 0000 ~ 0020 with 000000 ~ 002000 - for i in 0..2000 { + for i in 0..1000 { // kv size is (6+1)*2 = 14, given bucket size is 3000, expect each bucket has // about 210 keys if mvcc, kv size is about 18*2 = 36, expect each bucket has // about 80 keys @@ -667,9 +667,9 @@ pub mod tests { assert_eq!(host.policy(), CheckPolicy::Approximate); if !mvcc { - must_generate_buckets_approximate(&rx, Some(BucketRange(start, end)), 150, 450, mvcc); + must_generate_buckets_approximate(&rx, Some(BucketRange(start, end)), 75, 225, mvcc); } else { - must_generate_buckets_approximate(&rx, Some(BucketRange(start, end)), 70, 150, mvcc); + must_generate_buckets_approximate(&rx, Some(BucketRange(start, end)), 35, 85, mvcc); } drop(rx); } @@ -685,16 +685,12 @@ pub mod tests { #[test] fn test_generate_bucket_by_approximate() { - for cf in LARGE_CFS { - test_generate_bucket_impl(LARGE_CFS, cf, false); - } + test_generate_bucket_impl(LARGE_CFS, CF_WRITE, false); } #[test] fn test_generate_bucket_mvcc_by_approximate() { - for cf in LARGE_CFS { - test_generate_bucket_impl(LARGE_CFS, cf, true); - } + test_generate_bucket_impl(LARGE_CFS, CF_DEFAULT, true); } #[test] diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 37c2fd5a99a..522b68e2f09 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -153,7 +153,7 @@ impl ProposalQueue { }) } - fn find_propose_time(&self, term: u64, index: u64) -> Option { + pub fn find_propose_time(&self, term: u64, index: u64) -> Option { self.queue .binary_search_by_key(&(term, index), |p: &Proposal<_>| (p.term, p.index)) .ok() diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 5f2c6615527..a4b48e4ba37 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1033,6 +1033,16 @@ impl RegionReadProgress { } } + // TODO: remove it when coprocessor hook is implemented in v2. + pub fn update_applied_core(&self, applied: u64) { + let mut core = self.core.lock().unwrap(); + if let Some(ts) = core.update_applied(applied) { + if !core.pause { + self.safe_ts.store(ts, AtomicOrdering::Release); + } + } + } + pub fn update_safe_ts(&self, apply_index: u64, ts: u64) { if apply_index == 0 || ts == 0 { return; From 92aaf95e0dc83f855cf6d9bfd046c2ea0dfc9f3d Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 21 Oct 2022 16:53:55 +0800 Subject: [PATCH 0279/1149] cdc: observe the slowest resolved ts lag (#13647) close tikv/tikv#13646 cdc: observe the slowest resolved ts lag Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/src/delegate.rs | 2 -- components/cdc/src/endpoint.rs | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index da12c1521d6..de38a7b1fc8 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -423,8 +423,6 @@ impl Delegate { let resolved_ts = resolver.resolve(min_ts); debug!("cdc resolved ts updated"; "region_id" => self.region_id, "resolved_ts" => resolved_ts); - CDC_RESOLVED_TS_GAP_HISTOGRAM - .observe((min_ts.physical() - resolved_ts.physical()) as f64 / 1000f64); Some(resolved_ts) } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 26c0a11371e..7542bb1bfc8 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -65,7 +65,7 @@ use crate::{ }; const FEATURE_RESOLVED_TS_STORE: Feature = Feature::require(5, 0, 0); -const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s +const METRICS_FLUSH_INTERVAL: u64 = 1_000; // 1s // 10 minutes, it's the default gc life time of TiDB // and is long enough for most transactions. const WARN_RESOLVED_TS_LAG_THRESHOLD: Duration = Duration::from_secs(600); @@ -1247,6 +1247,12 @@ impl, E: KvEngine> RunnableWithTimer for Endpoin .physical() .saturating_sub(self.min_resolved_ts.physical()) as i64, ); + CDC_RESOLVED_TS_GAP_HISTOGRAM.observe( + self.current_ts + .physical() + .saturating_sub(self.min_resolved_ts.physical()) as f64 + / 1000f64, + ); } self.min_resolved_ts = TimeStamp::max(); self.current_ts = TimeStamp::max(); From 71af2905ad8fdd6e3aade27eb66fac06e77d6cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 24 Oct 2022 17:03:56 +0800 Subject: [PATCH 0280/1149] log-backup: Fix Initial Scan Racing (#13628) close tikv/tikv#13616 Checking the resolver version by observer ID. Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/event_loader.rs | 27 ++++++-- .../backup-stream/src/subscription_manager.rs | 5 +- components/backup-stream/tests/mod.rs | 66 +++++++++++++++++-- 3 files changed, 85 insertions(+), 13 deletions(-) diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 90a330cf446..27c05b5b875 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -10,7 +10,7 @@ use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::CmdType}; use raftstore::{ - coprocessor::RegionInfoProvider, + coprocessor::{ObserveHandle, RegionInfoProvider}, router::RaftStoreRouter, store::{fsm::ChangeObserver, Callback, SignificantMsg}, }; @@ -335,17 +335,19 @@ where Ok(snap) } - pub fn with_resolver( + fn with_resolver( &self, region: &Region, + handle: &ObserveHandle, f: impl FnOnce(&mut TwoPhaseResolver) -> Result, ) -> Result { - Self::with_resolver_by(&self.tracing, region, f) + Self::with_resolver_by(&self.tracing, region, handle, f) } - pub fn with_resolver_by( + fn with_resolver_by( tracing: &SubscriptionTracer, region: &Region, + handle: &ObserveHandle, f: impl FnOnce(&mut TwoPhaseResolver) -> Result, ) -> Result { let region_id = region.get_id(); @@ -353,6 +355,8 @@ where .get_subscription_of(region_id) .ok_or_else(|| Error::Other(box_err!("observer for region {} canceled", region_id))) .and_then(|v| { + // NOTE: once we have compared the observer handle, perhaps we can remove this + // check because epoch version changed implies observer handle changed. raftstore::store::util::compare_region_epoch( region.get_region_epoch(), &v.value().meta, @@ -362,6 +366,10 @@ where true, false, )?; + if v.value().handle().id != handle.id { + return Err(box_err!("stale observe handle {:?}, should be {:?}, perhaps new initial scanning starts", + handle.id, v.value().handle().id)); + } Ok(v) }) .map_err(|err| Error::Contextual { @@ -379,6 +387,7 @@ where fn scan_and_async_send( &self, region: &Region, + handle: &ObserveHandle, mut event_loader: EventLoader, join_handles: &mut Vec>, ) -> Result { @@ -401,7 +410,9 @@ where // and we would exit after the first run of loop :( let no_progress = event_loader.entry_batch.is_empty(); let stat = stat?; - self.with_resolver(region, |r| event_loader.emit_entries_to(&mut events, r))?; + self.with_resolver(region, handle, |r| { + event_loader.emit_entries_to(&mut events, r) + })?; if no_progress { metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); return Ok(stats.stat); @@ -429,6 +440,8 @@ where pub fn do_initial_scan( &self, region: &Region, + // We are using this handle for checking whether the initial scan is stale. + handle: ObserveHandle, start_ts: TimeStamp, snap: impl Snapshot, ) -> Result { @@ -440,13 +453,13 @@ where // It is ok to sink more data than needed. So scan to +inf TS for convenance. let event_loader = EventLoader::load_from(snap, start_ts, TimeStamp::max(), region)?; - let stats = self.scan_and_async_send(region, event_loader, &mut join_handles)?; + let stats = self.scan_and_async_send(region, &handle, event_loader, &mut join_handles)?; Handle::current() .block_on(futures::future::try_join_all(join_handles)) .map_err(|err| annotate!(err, "tokio runtime failed to join consuming threads"))?; - Self::with_resolver_by(&tr, region, |r| { + Self::with_resolver_by(&tr, region, &handle, |r| { r.phase_one_done(); Ok(()) }) diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index d47974bcd42..83181829b43 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -128,12 +128,15 @@ where handle: ObserveHandle, ) -> Result { let region_id = region.get_id(); + let h = handle.clone(); // Note: we have external retry at `ScanCmd::exec_by_with_retry`, should we keep // retrying here? let snap = self.observe_over_with_retry(region, move || { ChangeObserver::from_pitr(region_id, handle.clone()) })?; - let stat = self.do_initial_scan(region, start_ts, snap)?; + #[cfg(feature = "failpoints")] + fail::fail_point!("scan_after_get_snapshot"); + let stat = self.do_initial_scan(region, h, start_ts, snap)?; Ok(stat) } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 6e902fb1e08..284f1605c30 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -100,6 +100,7 @@ pub struct SuiteBuilder { name: String, nodes: usize, metastore_error: Box Result<()> + Send + Sync>, + cfg: Box, } impl SuiteBuilder { @@ -108,6 +109,9 @@ impl SuiteBuilder { name: s.to_owned(), nodes: 4, metastore_error: Box::new(|_| Ok(())), + cfg: Box::new(|cfg| { + cfg.enable = true; + }), } } @@ -124,11 +128,21 @@ impl SuiteBuilder { self } + pub fn cfg(mut self, f: impl FnOnce(&mut BackupStreamConfig) + 'static) -> Self { + let old_f = self.cfg; + self.cfg = Box::new(move |cfg| { + old_f(cfg); + f(cfg); + }); + self + } + pub fn build(self) -> Suite { let Self { name: case, nodes: n, metastore_error, + cfg: cfg_f, } = self; info!("start test"; "case" => %case, "nodes" => %n); @@ -154,8 +168,10 @@ impl SuiteBuilder { suite.endpoints.insert(id, worker); } suite.cluster.run(); + let mut cfg = BackupStreamConfig::default(); + cfg_f(&mut cfg); for id in 1..=(n as u64) { - suite.start_endpoint(id); + suite.start_endpoint(id, cfg.clone()); } // We must wait until the endpoints get ready to watching the metastore, or some // modifies may be lost. Either make Endpoint::with_client wait until watch did @@ -247,17 +263,16 @@ impl Suite { worker } - fn start_endpoint(&mut self, id: u64) { + fn start_endpoint(&mut self, id: u64, mut cfg: BackupStreamConfig) { let cluster = &mut self.cluster; let worker = self.endpoints.get_mut(&id).unwrap(); let sim = cluster.sim.wl(); let raft_router = sim.get_server_router(id); let cm = sim.get_concurrency_manager(id); let regions = sim.region_info_accessors.get(&id).unwrap().clone(); - let mut cfg = BackupStreamConfig::default(); + let ob = self.obs.get(&id).unwrap().clone(); cfg.enable = true; cfg.temp_path = format!("/{}/{}", self.temp_files.path().display(), id); - let ob = self.obs.get(&id).unwrap().clone(); let endpoint = Endpoint::new( id, self.meta_store.clone(), @@ -313,7 +328,10 @@ impl Suite { rx.into_iter() .map(|r| match r { - GetCheckpointResult::Ok { checkpoint, .. } => checkpoint.into_inner(), + GetCheckpointResult::Ok { checkpoint, region } => { + info!("getting checkpoint"; "checkpoint" => %checkpoint, "region" => ?region); + checkpoint.into_inner() + } GetCheckpointResult::NotFound { .. } | GetCheckpointResult::EpochNotMatch { .. } => { unreachable!() @@ -834,6 +852,44 @@ mod test { suite.cluster.shutdown(); } + #[test] + fn frequent_initial_scan() { + let mut suite = super::SuiteBuilder::new_named("frequent_initial_scan") + .cfg(|c| c.num_threads = 1) + .build(); + let keys = (1..1024).map(|i| make_record_key(1, i)).collect::>(); + let start_ts = suite.tso(); + suite.must_kv_prewrite( + 1, + keys.clone() + .into_iter() + .map(|k| mutation(k, b"hello, world".to_vec())) + .collect(), + make_record_key(1, 886), + start_ts, + ); + fail::cfg("scan_after_get_snapshot", "pause").unwrap(); + suite.must_register_task(1, "frequent_initial_scan"); + let commit_ts = suite.tso(); + suite.commit_keys(keys, start_ts, commit_ts); + suite.run(|| { + Task::ModifyObserve(backup_stream::ObserveOp::Stop { + region: suite.cluster.get_region(&make_record_key(1, 886)), + }) + }); + suite.run(|| { + Task::ModifyObserve(backup_stream::ObserveOp::Start { + region: suite.cluster.get_region(&make_record_key(1, 886)), + }) + }); + fail::cfg("scan_after_get_snapshot", "off").unwrap(); + suite.force_flush_files("frequent_initial_scan"); + suite.wait_for_flush(); + std::thread::sleep(Duration::from_secs(1)); + let c = suite.global_checkpoint(); + assert!(c > commit_ts.into_inner(), "{} vs {}", c, commit_ts); + } + #[test] /// This case tests whether the backup can continue when the leader failes. fn leader_down() { From f0c33168fbdece62837ca40d80d570855d4871d0 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Mon, 24 Oct 2022 18:51:56 +0800 Subject: [PATCH 0281/1149] tikv_util: fix panic when there are multiple cgroup2 mountinfos (#13661) close tikv/tikv#13660 fix panic when there are multiple cgroup2 mountinfos Signed-off-by: tabokie --- components/tikv_util/src/sys/cgroup.rs | 47 +++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/components/tikv_util/src/sys/cgroup.rs b/components/tikv_util/src/sys/cgroup.rs index 59830748382..df15a2dac76 100644 --- a/components/tikv_util/src/sys/cgroup.rs +++ b/components/tikv_util/src/sys/cgroup.rs @@ -255,11 +255,18 @@ fn cgroup_mountinfos_v2() -> HashMap { } fn parse_mountinfos_v2(infos: Vec) -> HashMap { - let mut ret = HashMap::new(); - let mut cg_infos = infos.into_iter().filter(|x| x.fs_type == "cgroup2"); - if let Some(cg_info) = cg_infos.next() { - assert!(cg_infos.next().is_none()); // Only one item for cgroup-2. - ret.insert("".to_string(), (cg_info.root, cg_info.mount_point)); + let mut ret: HashMap = HashMap::new(); + let cg_infos = infos.into_iter().filter(|x| x.fs_type == "cgroup2"); + for info in cg_infos { + // Should only be one item for cgroup-2. + if let Some((root, mount_point)) = ret.insert("".to_string(), (info.root, info.mount_point)) + { + warn!( + "Found multiple cgroup2 mountinfos, dropping {} {}", + root, + mount_point.display() + ); + } } ret } @@ -450,6 +457,36 @@ mod tests { assert_eq!(cgroup_sys.memory_limit_in_bytes(), None); } + #[test] + fn test_conflicting_mountinfo() { + let temp = tempfile::TempDir::new().unwrap(); + let dir = temp.path().to_str().unwrap(); + std::fs::copy("/proc/self/stat", &format!("{}/stat", dir)).unwrap(); + + let mut f = OpenOptions::new() + .create(true) + .write(true) + .open(&format!("{}/mountinfo", dir)) + .unwrap(); + f.write_all(b"1663 1661 0:27 /../../../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw + 1663 1661 0:27 /../../../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw").unwrap(); + + let cgroups = parse_proc_cgroup_v2("0::/\n"); + let mount_points = { + let infos = Process::new_with_root(PathBuf::from(dir)) + .and_then(|x| x.mountinfo()) + .unwrap(); + parse_mountinfos_v2(infos) + }; + let cgroup_sys = CGroupSys { + cgroups, + mount_points, + is_v2: true, + }; + + assert_eq!(cgroup_sys.memory_limit_in_bytes(), None); + } + #[test] fn test_cgroup_without_mountinfo() { let temp = tempfile::TempDir::new().unwrap(); From 7692be47d181f0ceb96d904fe7bc485f274e0c1f Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Tue, 25 Oct 2022 10:51:56 +0800 Subject: [PATCH 0282/1149] mvcc: log the mvcc key if default not found error happens (#13659) close tikv/tikv#13655 Log the mvcc key if the default not error happens. Usually, the next step is to locate the key region and do an unsafe recovery, so mvcc key format logging is more convenient. Signed-off-by: cfzjywxk Co-authored-by: Ti Chi Robot --- src/storage/mvcc/reader/point_getter.rs | 2 +- src/storage/mvcc/reader/reader.rs | 17 +++++++++++++---- src/storage/mvcc/reader/scanner/mod.rs | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 1e26d9bf21b..012189201c5 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -347,7 +347,7 @@ impl PointGetter { Ok(value) } else { Err(default_not_found_error( - user_key.to_raw()?, + user_key.clone().append_ts(write_start_ts).into_encoded(), "load_data_from_default_cf", )) } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 0f6eb5a390e..6bf712050ac 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -204,7 +204,7 @@ impl MvccReader { self.statistics.data.processed_keys += 1; Ok(val) } - None => Err(default_not_found_error(key.to_raw()?, "get")), + None => Err(default_not_found_error(k.into_encoded(), "get")), } } @@ -2163,7 +2163,10 @@ pub mod tests { }, Case { // write has no short_value, the reader has a cursor, got nothing - expected: Err(default_not_found_error(k.to_vec(), "get")), + expected: Err(default_not_found_error( + Key::from_raw(k).append_ts(TimeStamp::new(3)).into_encoded(), + "get", + )), modifies: vec![Modify::Put( CF_WRITE, Key::from_raw(k).append_ts(TimeStamp::new(1)), @@ -2189,7 +2192,10 @@ pub mod tests { }, Case { // write has no short_value, the reader has no cursor, got nothing - expected: Err(default_not_found_error(k.to_vec(), "get")), + expected: Err(default_not_found_error( + Key::from_raw(k).append_ts(TimeStamp::new(5)).into_encoded(), + "get", + )), modifies: vec![], scan_mode: None, key: Key::from_raw(k), @@ -2248,7 +2254,10 @@ pub mod tests { // some write for `key` at `ts` exists, load data return Err // todo: "some write for `key` at `ts` exists" should be checked by `test_get_write` // "load data return Err" is checked by test_load_data - expected: Err(default_not_found_error(k.to_vec(), "get")), + expected: Err(default_not_found_error( + Key::from_raw(k).append_ts(TimeStamp::new(2)).into_encoded(), + "get", + )), modifies: vec![Modify::Put( CF_WRITE, Key::from_raw(k).append_ts(TimeStamp::new(2)), diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index 7b799a3f456..664a4fed99e 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -366,7 +366,7 @@ where || default_cursor.key(&mut statistics.data) != seek_key.as_encoded().as_slice() { return Err(default_not_found_error( - user_key.to_raw()?, + user_key.clone().append_ts(write_start_ts).into_encoded(), "near_load_data_by_write", )); } @@ -391,7 +391,7 @@ where || default_cursor.key(&mut statistics.data) != seek_key.as_encoded().as_slice() { return Err(default_not_found_error( - user_key.to_raw()?, + user_key.clone().append_ts(write_start_ts).into_encoded(), "near_reverse_load_data_by_write", )); } From 5d2e706b258d339479cac0f0906d0dfb282b7c44 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 25 Oct 2022 01:53:56 -0700 Subject: [PATCH 0283/1149] raftstore-v2: support conf change (#13631) ref tikv/tikv#12842 Most workflow is the same as v1 except if apply failed, v2 will not return apply result back to raftstore. This behavior is a legacy behavior that only works for conf change before joint consensus and can be removed now. Signed-off-by: Jay Lee --- components/raftstore-v2/Cargo.toml | 5 +- components/raftstore-v2/src/fsm/apply.rs | 5 +- components/raftstore-v2/src/fsm/peer.rs | 2 + .../operation/command/admin/conf_change.rs | 509 ++++++++++++++++++ .../src/operation/command/admin/mod.rs | 87 +++ .../raftstore-v2/src/operation/command/mod.rs | 225 +++++--- .../src/operation/command/write/mod.rs | 3 +- .../operation/command/write/simple_write.rs | 37 +- components/raftstore-v2/src/operation/mod.rs | 2 +- .../raftstore-v2/src/operation/query/mod.rs | 8 +- .../src/operation/ready/async_writer.rs | 32 ++ .../raftstore-v2/src/operation/ready/mod.rs | 11 + components/raftstore-v2/src/raft/apply.rs | 50 +- components/raftstore-v2/src/raft/storage.rs | 15 +- .../src/router/internal_message.rs | 5 +- components/raftstore-v2/src/router/message.rs | 5 + components/raftstore-v2/src/router/mod.rs | 4 + .../src/router/response_channel.rs | 5 + .../tests/failpoints/test_basic_write.rs | 11 +- .../tests/integrations/cluster.rs | 101 +++- .../raftstore-v2/tests/integrations/mod.rs | 1 + .../tests/integrations/test_basic_write.rs | 16 +- .../tests/integrations/test_conf_change.rs | 69 +++ .../tests/integrations/test_read.rs | 97 +--- components/raftstore/src/store/peer.rs | 155 +----- components/raftstore/src/store/region_meta.rs | 32 +- components/raftstore/src/store/util.rs | 119 +++- .../raftstore/test_replication_mode.rs | 2 +- 28 files changed, 1264 insertions(+), 349 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/conf_change.rs create mode 100644 components/raftstore-v2/src/operation/command/admin/mod.rs create mode 100644 components/raftstore-v2/tests/integrations/test_conf_change.rs diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 9adaf0c13e2..8bb91b40bb9 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [features] -default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] +default = ["testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] failpoints = ["raftstore/failpoints"] testexport = ["raftstore/testexport"] test-engine-kv-rocksdb = [ @@ -62,8 +62,9 @@ test_util = { workspace = true } [[test]] name = "raftstore-v2-failpoints" path = "tests/failpoints/mod.rs" -required-features = ["failpoints"] +required-features = ["failpoints", "testexport"] [[test]] name = "raftstore-v2-integrations" path = "tests/integrations/mod.rs" +required-features = ["testexport"] diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index b37d0b33518..4a1e05b8f75 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -13,7 +13,7 @@ use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; use engine_traits::KvEngine; use futures::{Future, StreamExt}; -use kvproto::raft_serverpb::RegionLocalState; +use kvproto::{metapb, raft_serverpb::RegionLocalState}; use slog::Logger; use tikv_util::mpsc::future::{self, Receiver, Sender, WakePolicy}; @@ -57,13 +57,14 @@ pub struct ApplyFsm { impl ApplyFsm { pub fn new( + peer: metapb::Peer, region_state: RegionLocalState, res_reporter: R, remote_tablet: CachedTablet, logger: Logger, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); - let apply = Apply::new(region_state, res_reporter, remote_tablet, logger); + let apply = Apply::new(peer, region_state, res_reporter, remote_tablet, logger); ( ApplyScheduler { sender: tx }, Self { diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 6b9cccc8b84..a1beedef968 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -233,6 +233,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.fsm.peer_mut().on_fetched_logs(fetched_logs) } PeerMsg::QueryDebugInfo(ch) => self.fsm.peer_mut().on_query_debug_info(ch), + #[cfg(feature = "testexport")] + PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } } // TODO: instead of propose pending commands immediately, we should use timeout. diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs new file mode 100644 index 00000000000..03d0690fe25 --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -0,0 +1,509 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module implements the configuration change command. +//! +//! The command will go through the following steps: +//! - Propose conf change +//! - Apply after conf change is committed +//! - Update raft state using the result of conf change + +use collections::HashSet; +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{ + metapb::{self, PeerRole}, + raft_cmdpb::{AdminRequest, AdminResponse, ChangePeerRequest, RaftCmdRequest}, + raft_serverpb::{PeerState, RegionLocalState}, +}; +use protobuf::Message; +use raft::prelude::*; +use raft_proto::ConfChangeI; +use raftstore::{ + store::{ + metrics::{PEER_ADMIN_CMD_COUNTER_VEC, PEER_PROPOSE_LOG_SIZE_HISTOGRAM}, + util::{self, ChangePeerI, ConfChangeKind}, + ProposalContext, + }, + Error, Result, +}; +use slog::{error, info, warn}; +use tikv_util::box_err; + +use super::AdminCmdResult; +use crate::{ + batch::StoreContext, + raft::{Apply, Peer}, + router::ApplyRes, +}; + +/// The apply result of conf change. +#[derive(Default, Debug)] +pub struct ConfChangeResult { + pub index: u64, + // The proposed ConfChangeV2 or (legacy) ConfChange + // ConfChange (if it is) will convert to ConfChangeV2 + pub conf_change: ConfChangeV2, + // The change peer requests come along with ConfChangeV2 + // or (legacy) ConfChange, for ConfChange, it only contains + // one element + pub changes: Vec, + pub region_state: RegionLocalState, +} + +impl Peer { + #[inline] + pub fn propose_conf_change( + &mut self, + ctx: &mut StoreContext, + mut req: RaftCmdRequest, + ) -> Result { + if self.raft_group().raft.has_pending_conf() { + info!( + self.logger, + "there is a pending conf change, try later"; + ); + return Err(box_err!("there is a pending conf change, try later")); + } + let data = req.write_to_bytes()?; + let admin = req.get_admin_request(); + let leader_role = self.peer().get_role(); + if admin.has_change_peer() { + self.propose_conf_change_imp(ctx, admin.get_change_peer(), data) + } else if admin.has_change_peer_v2() { + self.propose_conf_change_imp(ctx, admin.get_change_peer_v2(), data) + } else { + unreachable!() + } + } + + /// Fails in following cases: + /// + /// 1. A pending conf change has not been applied yet; + /// 2. Removing the leader is not allowed in the configuration; + /// 3. The conf change makes the raft group not healthy; + /// 4. The conf change is dropped by raft group internally. + /// 5. There is a same peer on the same store in history record (TODO). + fn propose_conf_change_imp( + &mut self, + ctx: &mut StoreContext, + change_peer: impl ChangePeerI, + data: Vec, + ) -> Result { + let data_size = data.len(); + let cc = change_peer.to_confchange(data); + let changes = change_peer.get_change_peers(); + + util::check_conf_change( + &ctx.cfg, + self.raft_group(), + self.peer(), + changes.as_ref(), + &cc, + false, + )?; + + // TODO: check if the new peer is already in history record. + + ctx.raft_metrics.propose.conf_change.inc(); + // TODO: use local histogram metrics + PEER_PROPOSE_LOG_SIZE_HISTOGRAM.observe(data_size as f64); + info!( + self.logger, + "propose conf change peer"; + "changes" => ?changes.as_ref(), + "kind" => ?ConfChangeKind::confchange_kind(changes.as_ref().len()), + ); + + let last_index = self.raft_group().raft.raft_log.last_index(); + self.raft_group_mut() + .propose_conf_change(ProposalContext::SYNC_LOG.to_vec(), cc)?; + let proposal_index = self.raft_group().raft.raft_log.last_index(); + if proposal_index == last_index { + // The message is dropped silently, this usually due to leader absence + // or transferring leader. Both cases can be considered as NotLeader error. + return Err(Error::NotLeader(self.region_id(), None)); + } + + Ok(proposal_index) + } + + pub fn on_apply_res_conf_change(&mut self, conf_change: ConfChangeResult) { + // TODO: cancel generating snapshot. + + // Snapshot is applied in memory without waiting for all entries being + // applied. So it's possible conf_change.index < first_index. + if conf_change.index >= self.raft_group().raft.raft_log.first_index() { + match self.raft_group_mut().apply_conf_change(&conf_change.conf_change) { + Ok(_) + // PD could dispatch redundant conf changes. + | Err(raft::Error::NotExists { .. }) | Err(raft::Error::Exists { .. }) => (), + _ => unreachable!(), + } + } + + let remove_self = conf_change.region_state.get_state() == PeerState::Tombstone; + self.storage_mut() + .set_region_state(conf_change.region_state); + if self.is_leader() { + info!( + self.logger, + "notify pd with change peer region"; + "region" => ?self.region(), + ); + let demote_self = tikv_util::store::is_learner(self.peer()); + if remove_self || demote_self { + warn!(self.logger, "removing or demoting leader"; "remove" => remove_self, "demote" => demote_self); + let term = self.term(); + self.raft_group_mut() + .raft + .become_follower(term, raft::INVALID_ID); + } else if conf_change.changes.iter().any(|c| { + matches!( + c.get_change_type(), + ConfChangeType::AddNode | ConfChangeType::AddLearnerNode + ) + }) { + // Speed up snapshot instead of waiting another heartbeat. + self.raft_group_mut().ping(); + self.set_has_ready(); + } + } + if remove_self { + self.mark_for_destroy(None); + } + } +} + +impl Apply { + #[inline] + pub fn apply_conf_change( + &mut self, + index: u64, + req: &AdminRequest, + cc: ConfChangeV2, + ) -> Result<(AdminResponse, AdminCmdResult)> { + assert!(req.has_change_peer()); + self.apply_conf_change_imp(index, std::slice::from_ref(req.get_change_peer()), cc, true) + } + + #[inline] + pub fn apply_conf_change_v2( + &mut self, + index: u64, + req: &AdminRequest, + cc: ConfChangeV2, + ) -> Result<(AdminResponse, AdminCmdResult)> { + assert!(req.has_change_peer_v2()); + self.apply_conf_change_imp( + index, + req.get_change_peer_v2().get_change_peers(), + cc, + false, + ) + } + + #[inline] + fn apply_conf_change_imp( + &mut self, + index: u64, + changes: &[ChangePeerRequest], + cc: ConfChangeV2, + legacy: bool, + ) -> Result<(AdminResponse, AdminCmdResult)> { + let region = self.region_state().get_region(); + let peer_id = self.peer().get_id(); + let change_kind = ConfChangeKind::confchange_kind(changes.len()); + info!(self.logger, "exec ConfChangeV2"; "kind" => ?change_kind, "legacy" => legacy, "epoch" => ?region.get_region_epoch()); + let mut new_region = region.clone(); + match change_kind { + ConfChangeKind::LeaveJoint => self.apply_leave_joint(&mut new_region), + kind => { + debug_assert!(!legacy || kind == ConfChangeKind::Simple, "{:?}", kind); + debug_assert!( + kind != ConfChangeKind::Simple || changes.len() == 1, + "{:?}", + changes + ); + for cp in changes { + let res = if legacy { + self.apply_single_change_legacy(cp, &mut new_region) + } else { + self.apply_single_change(kind, cp, &mut new_region) + }; + if let Err(e) = res { + error!(self.logger, "failed to apply conf change"; + "changes" => ?changes, + "legacy" => legacy, + "original region" => ?region, "err" => ?e); + } + } + let conf_ver = region.get_region_epoch().get_conf_ver() + changes.len() as u64; + new_region.mut_region_epoch().set_conf_ver(conf_ver); + } + }; + + info!( + self.logger, + "conf change successfully"; + "changes" => ?changes, + "legacy" => legacy, + "original region" => ?region, + "current region" => ?new_region, + ); + let my_id = self.peer().get_id(); + let state = self.region_state_mut(); + state.set_region(new_region.clone()); + let new_peer = new_region + .get_peers() + .iter() + .find(|p| p.get_id() == my_id) + .cloned(); + if new_peer.is_none() { + // A peer will reject any snapshot that doesn't include itself in the + // configuration. So if it disappear from the configuration, it must + // be removed by conf change. + state.set_state(PeerState::Tombstone); + } + let mut resp = AdminResponse::default(); + resp.mut_change_peer().set_region(new_region); + let mut conf_change = ConfChangeResult { + index, + conf_change: cc, + changes: changes.to_vec(), + region_state: state.clone(), + }; + if state.get_state() == PeerState::Tombstone { + self.mark_tombstone(); + } + if let Some(peer) = new_peer { + self.set_peer(peer); + } + Ok((resp, AdminCmdResult::ConfChange(conf_change))) + } + + #[inline] + fn apply_leave_joint(&self, region: &mut metapb::Region) { + let mut change_num = 0; + for peer in region.mut_peers().iter_mut() { + match peer.get_role() { + PeerRole::IncomingVoter => peer.set_role(PeerRole::Voter), + PeerRole::DemotingVoter => peer.set_role(PeerRole::Learner), + _ => continue, + } + change_num += 1; + } + if change_num == 0 { + panic!( + "{:?} can't leave a non-joint config, region: {:?}", + self.logger.list(), + self.region_state() + ); + } + let conf_ver = region.get_region_epoch().get_conf_ver() + change_num; + region.mut_region_epoch().set_conf_ver(conf_ver); + info!(self.logger, "leave joint state successfully"; "region" => ?region); + } + + /// This is used for conf change v1. Use a standalone function to avoid + /// future refactor breaks consistency accidentally. + #[inline] + fn apply_single_change_legacy( + &self, + cp: &ChangePeerRequest, + region: &mut metapb::Region, + ) -> Result<()> { + let peer = cp.get_peer(); + let store_id = peer.get_store_id(); + let change_type = cp.get_change_type(); + + match change_type { + ConfChangeType::AddNode => { + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["add_peer", "all"]) + .inc(); + + let mut exists = false; + if let Some(p) = tikv_util::store::find_peer_mut(region, store_id) { + exists = true; + if !tikv_util::store::is_learner(p) || p.get_id() != peer.get_id() { + return Err(box_err!( + "can't add duplicated peer {:?} to region {:?}", + peer, + self.region_state() + )); + } else { + p.set_role(PeerRole::Voter); + } + } + if !exists { + // TODO: Do we allow adding peer in same node? + region.mut_peers().push(peer.clone()); + } + + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["add_peer", "success"]) + .inc(); + } + ConfChangeType::RemoveNode => { + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["remove_peer", "all"]) + .inc(); + + if let Some(p) = tikv_util::store::remove_peer(region, store_id) { + // Considering `is_learner` flag in `Peer` here is by design. + if &p != peer { + return Err(box_err!( + "remove unmatched peer: expect: {:?}, get {:?}, ignore", + peer, + p + )); + } + } else { + return Err(box_err!( + "remove missing peer {:?} from region {:?}", + peer, + self.region_state() + )); + } + + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["remove_peer", "success"]) + .inc(); + } + ConfChangeType::AddLearnerNode => { + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["add_learner", "all"]) + .inc(); + + if tikv_util::store::find_peer(region, store_id).is_some() { + return Err(box_err!( + "can't add duplicated learner {:?} to region {:?}", + peer, + self.region_state() + )); + } + region.mut_peers().push(peer.clone()); + + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["add_learner", "success"]) + .inc(); + } + } + Ok(()) + } + + #[inline] + fn apply_single_change( + &self, + kind: ConfChangeKind, + cp: &ChangePeerRequest, + region: &mut metapb::Region, + ) -> Result<()> { + let (change_type, peer) = (cp.get_change_type(), cp.get_peer()); + let store_id = peer.get_store_id(); + + let metric = match change_type { + ConfChangeType::AddNode => "add_peer", + ConfChangeType::RemoveNode => "remove_peer", + ConfChangeType::AddLearnerNode => "add_learner", + }; + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&[metric, "all"]) + .inc(); + + if let Some(exist_peer) = tikv_util::store::find_peer(region, store_id) { + let r = exist_peer.get_role(); + if r == PeerRole::IncomingVoter || r == PeerRole::DemotingVoter { + panic!( + "{:?} can't apply confchange because configuration is still in joint state, confchange: {:?}, region: {:?}", + self.logger.list(), + cp, + self.region_state() + ); + } + } + match ( + tikv_util::store::find_peer_mut(region, store_id), + change_type, + ) { + (None, ConfChangeType::AddNode) => { + let mut peer = peer.clone(); + match kind { + ConfChangeKind::Simple => peer.set_role(PeerRole::Voter), + ConfChangeKind::EnterJoint => peer.set_role(PeerRole::IncomingVoter), + _ => unreachable!(), + } + region.mut_peers().push(peer); + } + (None, ConfChangeType::AddLearnerNode) => { + let mut peer = peer.clone(); + peer.set_role(PeerRole::Learner); + region.mut_peers().push(peer); + } + (None, ConfChangeType::RemoveNode) => { + return Err(box_err!( + "remove missing peer {:?} from region {:?}", + peer, + self.region_state() + )); + } + // Add node + (Some(exist_peer), ConfChangeType::AddNode) + | (Some(exist_peer), ConfChangeType::AddLearnerNode) => { + let (role, exist_id, incoming_id) = + (exist_peer.get_role(), exist_peer.get_id(), peer.get_id()); + + if exist_id != incoming_id // Add peer with different id to the same store + // The peer is already the requested role + || (role, change_type) == (PeerRole::Voter, ConfChangeType::AddNode) + || (role, change_type) == (PeerRole::Learner, ConfChangeType::AddLearnerNode) + { + return Err(box_err!( + "can't add duplicated peer {:?} to region {:?}, duplicated with exist peer {:?}", + peer, + self.region_state(), + exist_peer + )); + } + match (role, change_type) { + (PeerRole::Voter, ConfChangeType::AddLearnerNode) => match kind { + ConfChangeKind::Simple => exist_peer.set_role(PeerRole::Learner), + ConfChangeKind::EnterJoint => exist_peer.set_role(PeerRole::DemotingVoter), + _ => unreachable!(), + }, + (PeerRole::Learner, ConfChangeType::AddNode) => match kind { + ConfChangeKind::Simple => exist_peer.set_role(PeerRole::Voter), + ConfChangeKind::EnterJoint => exist_peer.set_role(PeerRole::IncomingVoter), + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + // Remove node + (Some(exist_peer), ConfChangeType::RemoveNode) => { + if kind == ConfChangeKind::EnterJoint && exist_peer.get_role() == PeerRole::Voter { + return Err(box_err!( + "can not remove voter {:?} directly from region {:?}", + peer, + self.region_state() + )); + } + match tikv_util::store::remove_peer(region, store_id) { + Some(p) => { + if &p != peer { + return Err(box_err!( + "remove unmatched peer: expect: {:?}, get {:?}, ignore", + peer, + p + )); + } + } + None => unreachable!(), + } + } + } + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&[metric, "success"]) + .inc(); + Ok(()) + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs new file mode 100644 index 00000000000..396e3ede98f --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -0,0 +1,87 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod conf_change; + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{ + raft_cmdpb::{AdminRequest, RaftCmdRequest}, + raft_serverpb::PeerState, +}; +use protobuf::Message; +use raft::prelude::ConfChangeV2; +use raftstore::{ + store::{ + self, cmd_resp, + fsm::apply, + msg::ErrorCallback, + util::{ChangePeerI, ConfChangeKind}, + }, + Result, +}; +use slog::info; +use tikv_util::box_err; + +use self::conf_change::ConfChangeResult; +use crate::{ + batch::StoreContext, + raft::{Apply, Peer}, + router::CmdResChannel, +}; + +#[derive(Debug)] +pub enum AdminCmdResult { + ConfChange(ConfChangeResult), +} + +impl Peer { + #[inline] + pub fn on_admin_command( + &mut self, + ctx: &mut StoreContext, + mut req: RaftCmdRequest, + ch: CmdResChannel, + ) { + if !self.serving() { + apply::notify_req_region_removed(self.region_id(), ch); + return; + } + if let Err(e) = self.validate_command(&req, &mut ctx.raft_metrics) { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } + + // The admin request is rejected because it may need to update epoch checker + // which introduces an uncertainty and may breaks the correctness of epoch + // checker. + if !self.applied_to_current_term() { + let e = box_err!( + "{:?} peer has not applied to current term, applied_term {}, current_term {}", + self.logger.list(), + self.storage().entry_storage().applied_term(), + self.term() + ); + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } + // To maintain propose order, we need to make pending proposal first. + self.propose_pending_command(ctx); + let cmd_type = req.get_admin_request().get_cmd_type(); + let res = if apply::is_conf_change_cmd(&req) { + self.propose_conf_change(ctx, req) + } else { + // propose other admin command. + unimplemented!() + }; + if let Err(e) = &res { + info!( + self.logger, + "failed to propose admin command"; + "cmd_type" => ?cmd_type, + "error" => ?e, + ); + } + self.post_propose_write(ctx, res, vec![ch]); + } +} diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index fcfeb29fbe2..fe863a74b8a 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -21,16 +21,19 @@ use std::cmp; use batch_system::{Fsm, FsmScheduler, Mailbox}; use engine_traits::{KvEngine, RaftEngine, WriteBatch, WriteOptions}; use kvproto::{ - raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader}, + raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader}, raft_serverpb::RegionLocalState, }; use protobuf::Message; -use raft::eraftpb::Entry; +use raft::eraftpb::{ConfChange, ConfChangeV2, Entry, EntryType}; +use raft_proto::ConfChangeI; use raftstore::{ store::{ cmd_resp, fsm::{ - apply::{APPLY_WB_SHRINK_SIZE, DEFAULT_APPLY_WB_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP}, + apply::{ + self, APPLY_WB_SHRINK_SIZE, DEFAULT_APPLY_WB_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP, + }, Proposal, }, local_metrics::RaftMetrics, @@ -50,12 +53,28 @@ use crate::{ router::{ApplyRes, ApplyTask, CmdResChannel, PeerMsg}, }; +mod admin; mod write; +pub use admin::AdminCmdResult; pub use write::{SimpleWriteDecoder, SimpleWriteEncoder}; use self::write::SimpleWrite; +fn parse_at(logger: &slog::Logger, buf: &[u8], index: u64, term: u64) -> M { + let mut m = M::default(); + match m.merge_from_bytes(buf) { + Ok(()) => m, + Err(e) => panic!( + "{:?} data is corrupted at [{}] {}: {:?}", + logger.list(), + term, + index, + e + ), + } +} + #[derive(Debug)] pub struct CommittedEntries { /// Entries need to be applied. Note some entries may not be included for @@ -80,7 +99,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { .peer_mut() .on_write_command(self.store_ctx, req, ch) } else if req.has_admin_request() { - // self.on_admin_request(req, ch) + self.fsm + .peer_mut() + .on_admin_command(self.store_ctx, req, ch) } else if req.has_status_request() { error!(self.fsm.logger(), "status command should be sent by Query"); } @@ -99,7 +120,8 @@ impl Peer { let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); let tablet = self.tablet().clone(); let logger = self.logger.clone(); - let (apply_scheduler, mut apply_fsm) = ApplyFsm::new(region_state, mailbox, tablet, logger); + let (apply_scheduler, mut apply_fsm) = + ApplyFsm::new(self.peer().clone(), region_state, mailbox, tablet, logger); store_ctx .apply_pool .spawn(async move { apply_fsm.handle_all_tasks().await }) @@ -229,9 +251,17 @@ impl Peer { } // It must just applied a snapshot. if apply_res.applied_index < self.entry_storage().first_index() { - // TODO: handle admin side effects like split/merge. + // Ignore admin command side effects, otherwise it may split incomplete + // region. return; } + for admin_res in apply_res.admin_result { + match admin_res { + AdminCmdResult::ConfChange(conf_change) => { + self.on_apply_res_conf_change(conf_change) + } + } + } self.raft_group_mut() .advance_apply_to(apply_res.applied_index); let is_leader = self.is_leader(); @@ -244,7 +274,12 @@ impl Peer { if !is_leader { entry_storage.compact_entry_cache(apply_res.applied_index + 1); } - self.handle_read_on_apply(ctx, apply_res, progress_to_be_updated); + self.handle_read_on_apply( + ctx, + apply_res.applied_term, + apply_res.applied_index, + progress_to_be_updated, + ); } } @@ -253,6 +288,10 @@ impl Apply { pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); for (e, ch) in ce.entry_and_proposals { + if self.tombstone() { + apply::notify_req_region_removed(self.region_state().get_region().get_id(), ch); + continue; + } if !e.get_data().is_empty() { let mut set_save_point = false; if let Some(wb) = self.write_batch_mut() { @@ -283,62 +322,130 @@ impl Apply { #[inline] async fn apply_entry(&mut self, entry: &Entry) -> Result { - match SimpleWriteDecoder::new(entry.get_data()) { - Ok(decoder) => { - util::compare_region_epoch( - decoder.header().get_region_epoch(), - self.region_state().get_region(), - false, - true, - true, - )?; - let res = Ok(new_response(decoder.header())); - for req in decoder { - match req { - SimpleWrite::Put(put) => self.apply_put(put.cf, put.key, put.value)?, - SimpleWrite::Delete(delete) => self.apply_delete(delete.cf, delete.key)?, - SimpleWrite::DeleteRange(dr) => self.apply_delete_range( - dr.cf, - dr.start_key, - dr.end_key, - dr.notify_only, - )?, + let mut conf_change = None; + let req = match entry.get_entry_type() { + EntryType::EntryNormal => match SimpleWriteDecoder::new( + &self.logger, + entry.get_data(), + entry.get_index(), + entry.get_term(), + ) { + Ok(decoder) => { + util::compare_region_epoch( + decoder.header().get_region_epoch(), + self.region_state().get_region(), + false, + true, + true, + )?; + let res = Ok(new_response(decoder.header())); + for req in decoder { + match req { + SimpleWrite::Put(put) => self.apply_put(put.cf, put.key, put.value)?, + SimpleWrite::Delete(delete) => { + self.apply_delete(delete.cf, delete.key)? + } + SimpleWrite::DeleteRange(dr) => self.apply_delete_range( + dr.cf, + dr.start_key, + dr.end_key, + dr.notify_only, + )?, + } } + return res; } - res + Err(req) => req, + }, + EntryType::EntryConfChange => { + let cc: ConfChange = parse_at( + &self.logger, + entry.get_data(), + entry.get_index(), + entry.get_term(), + ); + let req: RaftCmdRequest = parse_at( + &self.logger, + cc.get_context(), + entry.get_index(), + entry.get_term(), + ); + conf_change = Some(cc.into_v2()); + req } - Err(req) => { - util::check_region_epoch(&req, self.region_state().get_region(), true)?; - if req.has_admin_request() { - // TODO: implement admin request. - } else { - for r in req.get_requests() { - match r.get_cmd_type() { - // These three writes should all use the new codec. Keep them here for - // backward compatibility. - CmdType::Put => { - let put = r.get_put(); - self.apply_put(put.get_cf(), put.get_key(), put.get_value())?; - } - CmdType::Delete => { - let delete = r.get_delete(); - self.apply_delete(delete.get_cf(), delete.get_key())?; - } - CmdType::DeleteRange => { - let dr = r.get_delete_range(); - self.apply_delete_range( - dr.get_cf(), - dr.get_start_key(), - dr.get_end_key(), - dr.get_notify_only(), - )?; - } - _ => unimplemented!(), - } + EntryType::EntryConfChangeV2 => { + let cc: ConfChangeV2 = parse_at( + &self.logger, + entry.get_data(), + entry.get_index(), + entry.get_term(), + ); + let req: RaftCmdRequest = parse_at( + &self.logger, + cc.get_context(), + entry.get_index(), + entry.get_term(), + ); + conf_change = Some(cc); + req + } + }; + + util::check_region_epoch(&req, self.region_state().get_region(), true)?; + if req.has_admin_request() { + let admin_req = req.get_admin_request(); + let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { + AdminCmdType::CompactLog => unimplemented!(), + AdminCmdType::Split => unimplemented!(), + AdminCmdType::BatchSplit => unimplemented!(), + AdminCmdType::PrepareMerge => unimplemented!(), + AdminCmdType::CommitMerge => unimplemented!(), + AdminCmdType::RollbackMerge => unimplemented!(), + AdminCmdType::TransferLeader => unreachable!(), + AdminCmdType::ChangePeer => { + self.apply_conf_change(entry.get_index(), admin_req, conf_change.unwrap())? + } + AdminCmdType::ChangePeerV2 => { + self.apply_conf_change_v2(entry.get_index(), admin_req, conf_change.unwrap())? + } + AdminCmdType::ComputeHash => unimplemented!(), + AdminCmdType::VerifyHash => unimplemented!(), + AdminCmdType::PrepareFlashback => unimplemented!(), + AdminCmdType::FinishFlashback => unimplemented!(), + AdminCmdType::InvalidAdmin => { + return Err(box_err!("invalid admin command type")); + } + }; + self.push_admin_result(admin_result); + let mut resp = new_response(req.get_header()); + resp.set_admin_response(admin_resp); + Ok(resp) + } else { + for r in req.get_requests() { + match r.get_cmd_type() { + // These three writes should all use the new codec. Keep them here for + // backward compatibility. + CmdType::Put => { + let put = r.get_put(); + self.apply_put(put.get_cf(), put.get_key(), put.get_value())?; } + CmdType::Delete => { + let delete = r.get_delete(); + self.apply_delete(delete.get_cf(), delete.get_key())?; + } + CmdType::DeleteRange => { + let dr = r.get_delete_range(); + self.apply_delete_range( + dr.get_cf(), + dr.get_start_key(), + dr.get_end_key(), + dr.get_notify_only(), + )?; + } + _ => unimplemented!(), } - Ok(new_response(req.get_header())) } + Ok(new_response(req.get_header())) } } @@ -367,9 +474,7 @@ impl Apply { let (index, term) = self.apply_progress(); apply_res.applied_index = index; apply_res.applied_term = term; - if self.reset_state_changed() { - apply_res.region_state = Some(self.region_state().clone()); - } + apply_res.admin_result = self.take_admin_result(); self.res_reporter().report(apply_res); } } diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 798e1b45631..a760a5acfb2 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -70,7 +70,8 @@ impl Peer { } } - fn post_propose_write( + #[inline] + pub fn post_propose_write( &mut self, ctx: &mut StoreContext, res: Result, diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index 46544be1a32..364e2741868 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -3,8 +3,9 @@ use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request}; use protobuf::{CodedInputStream, Message, SingularPtrField}; +use slog::Logger; -use crate::router::CmdResChannel; +use crate::{operation::command::parse_at, router::CmdResChannel}; // MAGIC number to hint simple write codec is used. If it's a protobuf message, // the first one or several bytes are for field tag, which can't be zero. @@ -135,22 +136,32 @@ pub struct SimpleWriteDecoder<'a> { } impl<'a> SimpleWriteDecoder<'a> { - pub fn new(buf: &'a [u8]) -> Result, RaftCmdRequest> { + pub fn new( + logger: &Logger, + buf: &'a [u8], + index: u64, + term: u64, + ) -> Result, RaftCmdRequest> { match buf.first().cloned() { Some(MAGIC_PREFIX) => { let mut is = CodedInputStream::from_bytes(&buf[1..]); - let header = is.read_message().unwrap(); + let header = match is.read_message() { + Ok(h) => h, + Err(e) => panic!( + "{:?} data corrupted at [{}] {}: {:?}", + logger.list(), + term, + index, + e + ), + }; let read = is.pos(); Ok(SimpleWriteDecoder { header, buf: &buf[1 + read as usize..], }) } - _ => { - let mut req = RaftCmdRequest::new(); - req.merge_from_bytes(buf).unwrap(); - Err(req) - } + _ => Err(parse_at(logger, buf, index, term)), } } @@ -346,6 +357,8 @@ fn decode<'a>(buf: &mut &'a [u8]) -> Option> { #[cfg(test)] mod tests { + use slog::o; + use super::*; #[test] @@ -392,7 +405,8 @@ mod tests { encoder.amend(cmd.clone()).unwrap(); let (bytes, _) = encoder.encode(); - let mut decoder = SimpleWriteDecoder::new(&bytes).unwrap(); + let logger = slog_global::borrow_global().new(o!()); + let mut decoder = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap(); assert_eq!(decoder.header(), cmd.get_header()); let write = decoder.next().unwrap(); let SimpleWrite::Put(put) = write else { panic!("should be put") }; @@ -459,7 +473,8 @@ mod tests { invalid_cmd.mut_requests().push(req); let fallback = SimpleWriteEncoder::new(invalid_cmd.clone(), usize::MAX).unwrap_err(); let bytes = fallback.write_to_bytes().unwrap(); - let decoded = SimpleWriteDecoder::new(&bytes).unwrap_err(); + let logger = slog_global::borrow_global().new(o!()); + let decoded = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap_err(); assert_eq!(decoded, invalid_cmd); let mut valid_cmd = RaftCmdRequest::default(); @@ -480,7 +495,7 @@ mod tests { encoder.amend(valid_cmd2).unwrap_err(); let (bytes, _) = encoder.encode(); - let mut decoder = SimpleWriteDecoder::new(&bytes).unwrap(); + let mut decoder = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap(); assert_eq!(decoder.header(), valid_cmd.get_header()); let req = decoder.next().unwrap(); let SimpleWrite::Put(put) = req else { panic!("should be put") }; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 7b31473f784..1eaeb21ec18 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -5,7 +5,7 @@ mod life; mod query; mod ready; -pub use command::{CommittedEntries, SimpleWriteDecoder, SimpleWriteEncoder}; +pub use command::{AdminCmdResult, CommittedEntries, SimpleWriteDecoder, SimpleWriteEncoder}; pub use life::DestroyProgress; pub use ready::AsyncWriter; diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 960e667c7d9..8b84b0788ce 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -386,7 +386,8 @@ impl Peer { pub fn handle_read_on_apply( &mut self, ctx: &mut StoreContext, - apply_res: ApplyRes, + applied_term: u64, + applied_index: u64, progress_to_be_updated: bool, ) { // TODO: add is_handling_snapshot check @@ -401,13 +402,12 @@ impl Peer { } } self.pending_reads_mut().gc(); - self.read_progress_mut() - .update_applied_core(apply_res.applied_index); + self.read_progress_mut().update_applied_core(applied_index); // Only leaders need to update applied_term. if progress_to_be_updated && self.is_leader() { // TODO: add coprocessor_host hook - let progress = ReadProgress::applied_term(apply_res.applied_term); + let progress = ReadProgress::applied_term(applied_term); // TODO: remove it self.add_reader_if_necessary(&mut ctx.store_meta); let mut meta = ctx.store_meta.lock().unwrap(); diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index 3ebc1f20da7..d5673d76a40 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -29,6 +29,8 @@ pub struct AsyncWriter { write_router: WriteRouter, unpersisted_readies: VecDeque, persisted_number: u64, + #[cfg(feature = "testexport")] + flush_subscribers: VecDeque<(u64, crate::router::FlushChannel)>, } impl AsyncWriter { @@ -38,6 +40,8 @@ impl AsyncWriter { write_router, unpersisted_readies: VecDeque::new(), persisted_number: 0, + #[cfg(feature = "testexport")] + flush_subscribers: VecDeque::new(), } } @@ -159,6 +163,34 @@ impl AsyncWriter { } } +#[cfg(feature = "testexport")] +impl AsyncWriter { + pub fn subscirbe_flush(&mut self, ch: crate::router::FlushChannel) { + self.flush_subscribers + .push_back((self.known_largest_number(), ch)); + } + + pub fn notify_flush(&mut self) { + if self.flush_subscribers.is_empty() { + return; + } + if self.all_ready_persisted() { + for (_, ch) in self.flush_subscribers.drain(..) { + ch.set_result(()); + } + } + while let Some((number, ch)) = self.flush_subscribers.pop_front() { + // A channel is registered without ready, so persisted_number should be larger. + if self.persisted_number > number { + ch.set_result(()); + } else { + self.flush_subscribers.push_front((number, ch)); + break; + } + } + } +} + impl WriteRouterContext for StoreContext where EK: KvEngine, diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3129dcfb832..cfc3d086163 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -261,11 +261,15 @@ impl Peer { pub fn handle_raft_ready(&mut self, ctx: &mut StoreContext) { let has_ready = self.reset_has_ready(); if !has_ready || self.destroy_progress().started() { + #[cfg(feature = "testexport")] + self.async_writer.notify_flush(); return; } ctx.has_ready = true; if !self.raft_group().has_ready() && (self.serving() || self.postpond_destroy()) { + #[cfg(feature = "testexport")] + self.async_writer.notify_flush(); return; } @@ -336,6 +340,8 @@ impl Peer { } ctx.raft_metrics.ready.has_ready_region.inc(); + #[cfg(feature = "testexport")] + self.async_writer.notify_flush(); } /// Called when an asynchronously write finishes. @@ -372,6 +378,11 @@ impl Peer { self.finish_destroy(ctx); } } + + #[cfg(feature = "testexport")] + pub fn on_wait_flush(&mut self, ch: crate::router::FlushChannel) { + self.async_writer.subscirbe_flush(ch); + } } impl Storage { diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index b210890ac40..068e5124c0c 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -1,30 +1,37 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::mem; + use engine_traits::{KvEngine, RaftEngine}; -use kvproto::{raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; +use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::fsm::apply::DEFAULT_APPLY_WB_SIZE; use slog::Logger; use super::Peer; use crate::{ fsm::ApplyResReporter, + operation::AdminCmdResult, router::{ApplyRes, CmdResChannel}, tablet::CachedTablet, }; /// Apply applies all the committed commands to kv db. pub struct Apply { + peer: metapb::Peer, remote_tablet: CachedTablet, tablet: EK, write_batch: Option, callbacks: Vec<(Vec, RaftCmdResponse)>, + /// A flag indicates whether the peer is destroyed by applying admin + /// command. + tombstone: bool, applied_index: u64, applied_term: u64, + admin_cmd_result: Vec, region_state: RegionLocalState, - state_changed: bool, res_reporter: R, pub(crate) logger: Logger, @@ -33,20 +40,23 @@ pub struct Apply { impl Apply { #[inline] pub fn new( + peer: metapb::Peer, region_state: RegionLocalState, res_reporter: R, mut remote_tablet: CachedTablet, logger: Logger, ) -> Self { Apply { + peer, tablet: remote_tablet.latest().unwrap().clone(), remote_tablet, write_batch: None, callbacks: vec![], + tombstone: false, applied_index: 0, applied_term: 0, + admin_cmd_result: vec![], region_state, - state_changed: false, res_reporter, logger, } @@ -92,8 +102,8 @@ impl Apply { } #[inline] - pub fn reset_state_changed(&mut self) -> bool { - std::mem::take(&mut self.state_changed) + pub fn region_state_mut(&mut self) -> &mut RegionLocalState { + &mut self.region_state } /// Publish the tablet so that it can be used by read worker. @@ -105,4 +115,34 @@ impl Apply { self.remote_tablet.set(tablet.clone()); self.tablet = tablet; } + + #[inline] + pub fn peer(&self) -> &metapb::Peer { + &self.peer + } + + #[inline] + pub fn set_peer(&mut self, peer: metapb::Peer) { + self.peer = peer; + } + + #[inline] + pub fn mark_tombstone(&mut self) { + self.tombstone = true; + } + + #[inline] + pub fn tombstone(&self) -> bool { + self.tombstone + } + + #[inline] + pub fn push_admin_result(&mut self, admin_result: AdminCmdResult) { + self.admin_cmd_result.push(admin_result); + } + + #[inline] + pub fn take_admin_result(&mut self) -> Vec { + mem::take(&mut self.admin_cmd_result) + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 1615255ab23..b08624b1185 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -240,6 +240,17 @@ impl Storage { pub fn set_ever_persisted(&mut self) { self.ever_persisted = true; } + + #[inline] + pub fn set_region_state(&mut self, state: RegionLocalState) { + self.region_state = state; + for peer in self.region_state.get_region().get_peers() { + if peer.get_id() == self.peer.get_id() { + self.peer = peer.clone(); + break; + } + } + } } impl raft::Storage for Storage { @@ -295,7 +306,9 @@ impl raft::Storage for Storage { } fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { - unimplemented!() + Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )) } } diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 28a93e897af..e9893bad968 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,8 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use kvproto::raft_serverpb::RegionLocalState; +use raftstore::store::fsm::ChangePeer; -use crate::operation::CommittedEntries; +use crate::operation::{AdminCmdResult, CommittedEntries}; #[derive(Debug)] pub enum ApplyTask { @@ -13,5 +14,5 @@ pub enum ApplyTask { pub struct ApplyRes { pub applied_index: u64, pub applied_term: u64, - pub region_state: Option, + pub admin_result: Vec, } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index fb323dca9d4..c607e389135 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -134,6 +134,9 @@ pub enum PeerMsg { ready_number: u64, }, QueryDebugInfo(DebugInfoChannel), + /// A message that used to check if a flush is happened. + #[cfg(feature = "testexport")] + WaitFlush(super::FlushChannel), } impl PeerMsg { @@ -172,6 +175,8 @@ impl fmt::Debug for PeerMsg { ), PeerMsg::FetchedLogs(fetched) => write!(fmt, "FetchedLogs {:?}", fetched), PeerMsg::QueryDebugInfo(_) => write!(fmt, "QueryDebugInfo"), + #[cfg(feature = "testexport")] + PeerMsg::WaitFlush(_) => write!(fmt, "FlushMessages"), } } } diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index e9e7cf6cfc8..a09b0593b80 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -6,6 +6,10 @@ pub mod message; mod response_channel; pub(crate) use self::internal_message::ApplyTask; +#[cfg(feature = "testexport")] +pub use self::response_channel::FlushChannel; +#[cfg(feature = "testexport")] +pub use self::response_channel::FlushSubscriber; pub use self::{ imp::RaftRouter, internal_message::ApplyRes, diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index 55219540c2f..d68c414ca5f 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -417,6 +417,11 @@ impl fmt::Debug for QueryResChannel { pub type DebugInfoChannel = BaseChannel; pub type DebugInfoSubscriber = BaseSubscriber; +#[cfg(feature = "testexport")] +pub type FlushChannel = BaseChannel<()>; +#[cfg(feature = "testexport")] +pub type FlushSubscriber = BaseSubscriber<()>; + #[cfg(test)] mod tests { use futures::executor::block_on; diff --git a/components/raftstore-v2/tests/failpoints/test_basic_write.rs b/components/raftstore-v2/tests/failpoints/test_basic_write.rs index 5014e0efd3e..4bf4201f67c 100644 --- a/components/raftstore-v2/tests/failpoints/test_basic_write.rs +++ b/components/raftstore-v2/tests/failpoints/test_basic_write.rs @@ -4,10 +4,8 @@ use std::{assert_matches::assert_matches, time::Duration}; use engine_traits::{OpenOptions, Peekable, TabletFactory}; use futures::executor::block_on; -use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; -use raftstore::store::{INIT_EPOCH_CONF_VER, INIT_EPOCH_VER}; +use kvproto::raft_cmdpb::{CmdType, Request}; use raftstore_v2::router::PeerMsg; -use tikv_util::store::new_peer; use crate::cluster::Cluster; @@ -16,12 +14,7 @@ use crate::cluster::Cluster; fn test_write_batch_rollback() { let cluster = Cluster::default(); let router = cluster.router(0); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(2); - let epoch = req.mut_header().mut_region_epoch(); - epoch.set_version(INIT_EPOCH_VER); - epoch.set_conf_ver(INIT_EPOCH_CONF_VER); - req.mut_header().set_peer(new_peer(1, 3)); + let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); put_req.mut_put().set_key(b"key".to_vec()); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 554db96acbf..1d458d7a73e 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -11,7 +11,8 @@ use std::{ time::{Duration, Instant}, }; -use crossbeam::channel::{self, Receiver, Sender}; +use collections::HashSet; +use crossbeam::channel::{self, Receiver, Sender, TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, kv::{KvTestEngine, TestTabletFactoryV2}, @@ -28,13 +29,16 @@ use pd_client::RpcClient; use raftstore::store::{region_meta::RegionMeta, Config, Transport, RAFT_INIT_LOG_INDEX}; use raftstore_v2::{ create_store_batch_system, - router::{DebugInfoChannel, PeerMsg, QueryResult, RaftRouter}, + router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, Bootstrap, StoreMeta, StoreSystem, }; -use slog::{o, Logger}; +use slog::{debug, o, Logger}; use tempfile::TempDir; use test_pd::mocker::Service; -use tikv_util::config::{ReadableDuration, VersionTrack}; +use tikv_util::{ + config::{ReadableDuration, VersionTrack}, + store::new_peer, +}; #[derive(Clone)] pub struct TestRouter(RaftRouter); @@ -81,6 +85,20 @@ impl TestRouter { block_on(sub.result()) } + pub fn wait_flush(&self, region_id: u64, timeout: Duration) -> bool { + let timer = Instant::now(); + while timer.elapsed() < timeout { + let (ch, sub) = FlushChannel::pair(); + let res = self.send(region_id, PeerMsg::WaitFlush(ch)); + match res { + Ok(_) => return block_on(sub.result()).is_some(), + Err(TrySendError::Disconnected(_)) => return false, + Err(TrySendError::Full(_)) => thread::sleep(Duration::from_millis(10)), + } + } + panic!("unable to flush {}", region_id); + } + pub fn wait_applied_to_current_term(&self, region_id: u64, timeout: Duration) { let mut now = Instant::now(); let deadline = now + timeout; @@ -104,9 +122,33 @@ impl TestRouter { region_id, res ); } + + pub fn new_request_for(&self, region_id: u64) -> RaftCmdRequest { + let meta = self + .must_query_debug_info(region_id, Duration::from_secs(1)) + .unwrap(); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + let epoch = req.mut_header().mut_region_epoch(); + let epoch_meta = &meta.region_state.epoch; + epoch.set_version(epoch_meta.version); + epoch.set_conf_ver(epoch_meta.conf_ver); + let target_peer = *meta + .region_state + .peers + .iter() + .find(|p| p.id == meta.raft_status.id) + .unwrap(); + let mut peer = new_peer(target_peer.store_id, target_peer.id); + peer.role = target_peer.role.into(); + req.mut_header().set_peer(peer); + req.mut_header().set_term(meta.raft_status.hard_state.term); + req + } } pub struct RunningState { + store_id: u64, pub raft_engine: RaftTestEngine, pub factory: Arc, pub system: StoreSystem, @@ -178,6 +220,7 @@ impl RunningState { .unwrap(); let state = Self { + store_id, raft_engine, factory, system, @@ -203,8 +246,7 @@ pub struct TestNode { } impl TestNode { - fn with_pd(pd_server: &test_pd::Server) -> TestNode { - let logger = slog_global::borrow_global().new(o!()); + fn with_pd(pd_server: &test_pd::Server, logger: Logger) -> TestNode { let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); let path = TempDir::new().unwrap(); @@ -245,6 +287,10 @@ impl TestNode { pub fn running_state(&self) -> Option<&RunningState> { self.running_state.as_ref() } + + pub fn id(&self) -> u64 { + self.running_state().unwrap().store_id + } } impl Drop for TestNode { @@ -319,6 +365,7 @@ pub struct Cluster { nodes: Vec, receivers: Vec>, routers: Vec, + logger: Logger, } impl Default for Cluster { @@ -334,11 +381,13 @@ impl Cluster { pub fn with_node_count(count: usize, config: Option) -> Self { let pd_server = test_pd::Server::new(1); + let logger = slog_global::borrow_global().new(o!()); let mut cluster = Cluster { pd_server, nodes: vec![], receivers: vec![], routers: vec![], + logger, }; let mut cfg = if let Some(config) = config { config @@ -347,7 +396,7 @@ impl Cluster { }; disable_all_auto_ticks(&mut cfg); for _ in 1..=count { - let mut node = TestNode::with_pd(&cluster.pd_server); + let mut node = TestNode::with_pd(&cluster.pd_server, cluster.logger.clone()); let (tx, rx) = new_test_transport(); let router = node.start(Arc::new(VersionTrack::new(cfg.clone())), tx); cluster.nodes.push(node); @@ -369,4 +418,42 @@ impl Cluster { pub fn router(&self, offset: usize) -> TestRouter { self.routers[offset].clone() } + + /// Send messages and wait for side effects are all handled. + #[allow(clippy::vec_box)] + pub fn dispatch(&self, region_id: u64, mut msgs: Vec>) { + let mut regions = HashSet::default(); + regions.insert(region_id); + loop { + for msg in msgs.drain(..) { + let offset = match self + .nodes + .iter() + .position(|n| n.id() == msg.get_to_peer().get_store_id()) + { + Some(offset) => offset, + None => { + debug!(self.logger, "failed to find node"; "message" => ?msg); + continue; + } + }; + regions.insert(msg.get_region_id()); + if let Err(e) = self.routers[offset].send_raft_message(msg) { + debug!(self.logger, "failed to send raft message"; "err" => ?e); + } + } + for (router, rx) in self.routers.iter().zip(&self.receivers) { + for region_id in ®ions { + router.wait_flush(*region_id, Duration::from_secs(3)); + } + while let Ok(msg) = rx.try_recv() { + msgs.push(Box::new(msg)); + } + } + regions.clear(); + if msgs.is_empty() { + return; + } + } + } } diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index db37c7cbf64..50fb5c4e16a 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -7,6 +7,7 @@ mod cluster; mod test_basic_write; +mod test_conf_change; mod test_life; mod test_read; mod test_status; diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index ce775982686..7c8bdb369a1 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -5,7 +5,7 @@ use std::{assert_matches::assert_matches, time::Duration}; use engine_traits::{OpenOptions, Peekable, TabletFactory}; use futures::executor::block_on; use kvproto::{ - raft_cmdpb::{CmdType, RaftCmdRequest, Request}, + raft_cmdpb::{CmdType, Request}, raft_serverpb::RaftMessage, }; use raftstore::store::{INIT_EPOCH_CONF_VER, INIT_EPOCH_VER}; @@ -19,12 +19,7 @@ use crate::cluster::Cluster; fn test_basic_write() { let cluster = Cluster::default(); let router = cluster.router(0); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(2); - let epoch = req.mut_header().mut_region_epoch(); - epoch.set_version(INIT_EPOCH_VER); - epoch.set_conf_ver(INIT_EPOCH_CONF_VER); - req.mut_header().set_peer(new_peer(1, 3)); + let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); put_req.mut_put().set_key(b"key".to_vec()); @@ -119,12 +114,7 @@ fn test_basic_write() { fn test_put_delete() { let cluster = Cluster::default(); let router = cluster.router(0); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(2); - let epoch = req.mut_header().mut_region_epoch(); - epoch.set_version(INIT_EPOCH_VER); - epoch.set_conf_ver(INIT_EPOCH_CONF_VER); - req.mut_header().set_peer(new_peer(1, 3)); + let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); put_req.mut_put().set_key(b"key".to_vec()); diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs new file mode 100644 index 00000000000..f9479786a7b --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -0,0 +1,69 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use kvproto::raft_cmdpb::AdminCmdType; +use raft::prelude::ConfChangeType; +use tikv_util::store::new_learner_peer; + +use crate::cluster::Cluster; + +#[test] +fn test_simple_change() { + let cluster = Cluster::with_node_count(2, None); + let router0 = cluster.router(0); + let mut req = router0.new_request_for(2); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::ChangePeer); + admin_req + .mut_change_peer() + .set_change_type(ConfChangeType::AddLearnerNode); + let store_id = cluster.node(1).id(); + let new_peer = new_learner_peer(store_id, 10); + admin_req.mut_change_peer().set_peer(new_peer.clone()); + let resp = router0.command(2, req.clone()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let epoch = req.get_header().get_region_epoch(); + let new_conf_ver = epoch.get_conf_ver() + 1; + let leader_peer = req.get_header().get_peer().clone(); + let meta = router0 + .must_query_debug_info(2, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.region_state.epoch.version, epoch.get_version()); + assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); + assert_eq!(meta.region_state.peers, vec![leader_peer, new_peer]); + + // So heartbeat will create a learner. + cluster.dispatch(2, vec![]); + let router1 = cluster.router(1); + let meta = router1 + .must_query_debug_info(2, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.raft_status.id, 10, "{:?}", meta); + assert_eq!(meta.region_state.epoch.version, epoch.get_version()); + assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); + assert_eq!( + meta.raft_status.soft_state.leader_id, + req.get_header().get_peer().get_id() + ); + + req.mut_header() + .mut_region_epoch() + .set_conf_ver(new_conf_ver); + req.mut_admin_request() + .mut_change_peer() + .set_change_type(ConfChangeType::RemoveNode); + let resp = router0.command(2, req.clone()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let epoch = req.get_header().get_region_epoch(); + let new_conf_ver = epoch.get_conf_ver() + 1; + let leader_peer = req.get_header().get_peer().clone(); + let meta = router0 + .must_query_debug_info(2, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.region_state.epoch.version, epoch.get_version()); + assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); + assert_eq!(meta.region_state.peers, vec![leader_peer]); + // TODO: check if the peer is removed once life trace is implemented or + // snapshot is implemented. +} diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index 9f3c5c2c03a..4f49757085f 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use futures::executor::block_on; -use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, ReadIndexRequest, Request, StatusCmdType}; +use kvproto::raft_cmdpb::{CmdType, Request}; use raftstore_v2::router::PeerMsg; use tikv_util::{config::ReadableDuration, store::new_peer}; use txn_types::WriteBatchFlags; @@ -16,25 +16,10 @@ fn test_read_index() { let router = cluster.router(0); std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_status_request() - .set_cmd_type(StatusCmdType::RegionDetail); - let res = router.query(region_id, req.clone()).unwrap(); - let status_resp = res.response().unwrap().get_status_response(); - let detail = status_resp.get_region_detail(); - let region = detail.get_region().clone(); - - let read_index_req = ReadIndexRequest::default(); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_header().set_term(7); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(region.get_region_epoch().clone()); + let mut req = router.new_request_for(region_id); let mut request_inner = Request::default(); request_inner.set_cmd_type(CmdType::Snap); - request_inner.set_read_index(read_index_req); + request_inner.mut_read_index(); req.mut_requests().push(request_inner); let res = router.query(region_id, req.clone()).unwrap(); let resp = res.read().unwrap(); @@ -54,11 +39,7 @@ fn test_read_index() { std::thread::sleep(std::time::Duration::from_millis(200)); let read_req = req.clone(); // the read lease should be expired and renewed by write - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(region.get_region_epoch().clone()); + let mut req = router.new_request_for(region_id); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); put_req.mut_put().set_key(b"key".to_vec()); @@ -80,21 +61,7 @@ fn test_snap_without_read_index() { let router = cluster.router(0); std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_status_request() - .set_cmd_type(StatusCmdType::RegionDetail); - let res = router.query(region_id, req.clone()).unwrap(); - let status_resp = res.response().unwrap().get_status_response(); - let detail = status_resp.get_region_detail(); - let mut region = detail.get_region().clone(); - - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_header().set_term(6); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(region.take_region_epoch()); + let mut req = router.new_request_for(region_id); let mut request_inner = Request::default(); request_inner.set_cmd_type(CmdType::Snap); req.mut_requests().push(request_inner); @@ -126,21 +93,7 @@ fn test_query_with_write_cmd() { let router = cluster.router(0); std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_status_request() - .set_cmd_type(StatusCmdType::RegionDetail); - let res = router.query(region_id, req.clone()).unwrap(); - let status_resp = res.response().unwrap().get_status_response(); - let detail = status_resp.get_region_detail(); - let mut region = detail.get_region().clone(); - - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_header().set_term(6); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(region.take_region_epoch()); + let mut req = router.new_request_for(2); for write_cmd in [ CmdType::Prewrite, @@ -157,6 +110,7 @@ fn test_query_with_write_cmd() { assert!(resp.is_none()); let error_resp = res.response().unwrap(); assert!(error_resp.get_header().has_error()); + req.clear_requests(); } } @@ -166,21 +120,7 @@ fn test_snap_with_invalid_parameter() { let router = cluster.router(0); std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_status_request() - .set_cmd_type(StatusCmdType::RegionDetail); - let res = router.query(region_id, req.clone()).unwrap(); - let status_resp = res.response().unwrap().get_status_response(); - let detail = status_resp.get_region_detail(); - let mut region = detail.get_region().clone(); - let mut region_epoch = region.take_region_epoch(); - - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_header().set_term(6); - req.mut_header().set_region_id(region_id); - req.mut_header().set_region_epoch(region_epoch.clone()); + let mut req = router.new_request_for(region_id); let mut request_inner = Request::default(); request_inner.set_cmd_type(CmdType::Snap); req.mut_requests().push(request_inner); @@ -217,10 +157,11 @@ fn test_snap_with_invalid_parameter() { // run again with invalid region_epoch let mut invalid_req = req.clone(); - region_epoch.set_version(region_epoch.get_version() + 1); + let invalid_ver = req.get_header().get_region_epoch().get_version() + 1; invalid_req .mut_header() - .set_region_epoch(region_epoch.clone()); + .mut_region_epoch() + .set_version(invalid_ver); let res = router.query(region_id, invalid_req).unwrap(); let error_resp = res.response().unwrap(); assert!(error_resp.get_header().has_error()); @@ -232,21 +173,7 @@ fn test_local_read() { let mut router = cluster.router(0); std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_status_request() - .set_cmd_type(StatusCmdType::RegionDetail); - let res = router.query(region_id, req.clone()).unwrap(); - let status_resp = res.response().unwrap().get_status_response(); - let detail = status_resp.get_region_detail(); - let mut region = detail.get_region().clone(); - - let mut req = RaftCmdRequest::default(); - req.mut_header().set_peer(new_peer(1, 3)); - req.mut_header().set_term(6); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(region.take_region_epoch()); + let mut req = router.new_request_for(region_id); let mut request_inner = Request::default(); request_inner.set_cmd_type(CmdType::Snap); req.mut_requests().push(request_inner); diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 522b68e2f09..f67c3a28800 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -31,9 +31,8 @@ use kvproto::{ metapb::{self, PeerRole}, pdpb::{self, PeerStats}, raft_cmdpb::{ - self, AdminCmdType, AdminResponse, ChangePeerRequest, CmdType, CommitMergeRequest, - PutRequest, RaftCmdRequest, RaftCmdResponse, Request, TransferLeaderRequest, - TransferLeaderResponse, + self, AdminCmdType, AdminResponse, CmdType, CommitMergeRequest, PutRequest, RaftCmdRequest, + RaftCmdResponse, Request, TransferLeaderRequest, TransferLeaderResponse, }, raft_serverpb::{ ExtraMessage, ExtraMessageType, MergeState, PeerState, RaftApplyState, RaftMessage, @@ -47,11 +46,10 @@ use pd_client::{BucketStat, INVALID_ID}; use protobuf::Message; use raft::{ self, - eraftpb::{self, ConfChangeType, Entry, EntryType, MessageType}, - Changer, GetEntriesContext, LightReady, ProgressState, ProgressTracker, RawNode, Ready, - SnapshotStatus, StateRole, INVALID_INDEX, NO_LIMIT, + eraftpb::{self, Entry, EntryType, MessageType}, + GetEntriesContext, LightReady, ProgressState, RawNode, Ready, SnapshotStatus, StateRole, + INVALID_INDEX, NO_LIMIT, }; -use raft_proto::ConfChangeI; use rand::seq::SliceRandom; use smallvec::SmallVec; use tikv_alloc::trace::TraceEvent; @@ -3696,138 +3694,6 @@ where self.proposals.push(p); } - // TODO: set higher election priority of voter/incoming voter than demoting - // voter - /// Validate the `ConfChange` requests and check whether it's safe to - /// propose these conf change requests. - /// It's safe iff at least the quorum of the Raft group is still healthy - /// right after all conf change is applied. - /// If 'allow_remove_leader' is false then the peer to be removed should - /// not be the leader. - fn check_conf_change( - &mut self, - ctx: &mut PollContext, - change_peers: &[ChangePeerRequest], - cc: &impl ConfChangeI, - ) -> Result<()> { - // Check whether current joint state can handle this request - let mut after_progress = self.check_joint_state(cc)?; - let current_progress = self.raft_group.status().progress.unwrap().clone(); - let kind = ConfChangeKind::confchange_kind(change_peers.len()); - - if kind == ConfChangeKind::LeaveJoint { - if self.peer.get_role() == PeerRole::DemotingVoter && !self.is_force_leader() { - return Err(box_err!( - "{} ignore leave joint command that demoting leader", - self.tag - )); - } - // Leaving joint state, skip check - return Ok(()); - } - - // Check whether this request is valid - let mut check_dup = HashSet::default(); - let mut only_learner_change = true; - let current_voter = current_progress.conf().voters().ids(); - for cp in change_peers.iter() { - let (change_type, peer) = (cp.get_change_type(), cp.get_peer()); - match (change_type, peer.get_role()) { - (ConfChangeType::RemoveNode, PeerRole::Voter) if kind != ConfChangeKind::Simple => { - return Err(box_err!( - "{} invalid conf change request: {:?}, can not remove voter directly", - self.tag, - cp - )); - } - (ConfChangeType::RemoveNode, _) - | (ConfChangeType::AddNode, PeerRole::Voter) - | (ConfChangeType::AddLearnerNode, PeerRole::Learner) => {} - _ => { - return Err(box_err!( - "{} invalid conf change request: {:?}", - self.tag, - cp - )); - } - } - - if !check_dup.insert(peer.get_id()) { - return Err(box_err!( - "{} invalid conf change request, have multiple commands for the same peer {}", - self.tag, - peer.get_id() - )); - } - - if peer.get_id() == self.peer_id() - && (change_type == ConfChangeType::RemoveNode - // In Joint confchange, the leader is allowed to be DemotingVoter - || (kind == ConfChangeKind::Simple - && change_type == ConfChangeType::AddLearnerNode)) - && !ctx.cfg.allow_remove_leader() - { - return Err(box_err!( - "{} ignore remove leader or demote leader", - self.tag - )); - } - - if current_voter.contains(peer.get_id()) || change_type == ConfChangeType::AddNode { - only_learner_change = false; - } - } - - // Multiple changes that only effect learner will not product `IncommingVoter` - // or `DemotingVoter` after apply, but raftstore layer and PD rely on these - // roles to detect joint state - if kind != ConfChangeKind::Simple && only_learner_change { - return Err(box_err!( - "{} invalid conf change request, multiple changes that only effect learner", - self.tag - )); - } - - let promoted_commit_index = after_progress.maximal_committed_index().0; - if current_progress.is_singleton() // It's always safe if there is only one node in the cluster. - || promoted_commit_index >= self.get_store().truncated_index() || self.force_leader.is_some() - { - return Ok(()); - } - - PEER_ADMIN_CMD_COUNTER_VEC - .with_label_values(&["conf_change", "reject_unsafe"]) - .inc(); - - // Waking it up to replicate logs to candidate. - self.should_wake_up = true; - Err(box_err!( - "{} unsafe to perform conf change {:?}, before: {:?}, after: {:?}, truncated index {}, promoted commit index {}", - self.tag, - change_peers, - current_progress.conf().to_conf_state(), - after_progress.conf().to_conf_state(), - self.get_store().truncated_index(), - promoted_commit_index - )) - } - - /// Check if current joint state can handle this confchange - fn check_joint_state(&mut self, cc: &impl ConfChangeI) -> Result { - let cc = &cc.as_v2(); - let mut prs = self.raft_group.status().progress.unwrap().clone(); - let mut changer = Changer::new(&prs); - let (cfg, changes) = if cc.leave_joint() { - changer.leave_joint()? - } else if let Some(auto_leave) = cc.enter_joint() { - changer.enter_joint(auto_leave, &cc.changes)? - } else { - changer.simple(&cc.changes)? - }; - prs.apply_conf(cfg, changes, self.raft_group.raft.raft_log.last_index()); - Ok(prs) - } - pub fn transfer_leader(&mut self, peer: &metapb::Peer) { info!( "transfer leader"; @@ -4696,7 +4562,16 @@ where let cc = change_peer.to_confchange(data); let changes = change_peer.get_change_peers(); - self.check_conf_change(ctx, changes.as_ref(), &cc)?; + // Because the group is always woken up when there is log gap, so no need + // to wake it up again when command is aborted by log gap. + util::check_conf_change( + &ctx.cfg, + &self.raft_group, + &self.peer, + changes.as_ref(), + &cc, + self.is_force_leader(), + )?; ctx.raft_metrics.propose.conf_change.inc(); // TODO: use local histogram metrics diff --git a/components/raftstore/src/store/region_meta.rs b/components/raftstore/src/store/region_meta.rs index 9af541cbfd9..0370c7604ec 100644 --- a/components/raftstore/src/store/region_meta.rs +++ b/components/raftstore/src/store/region_meta.rs @@ -2,7 +2,10 @@ use std::collections::HashMap; -use kvproto::{metapb::PeerRole, raft_serverpb}; +use kvproto::{ + metapb::{self, PeerRole}, + raft_serverpb, +}; use raft::{Progress, ProgressState, StateRole, Status}; use serde::{Deserialize, Serialize}; @@ -127,7 +130,7 @@ impl<'a> From> for RaftStatus { } } -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] pub enum RaftPeerRole { Voter, Learner, @@ -146,6 +149,24 @@ impl From for RaftPeerRole { } } +impl From for PeerRole { + fn from(role: RaftPeerRole) -> Self { + match role { + RaftPeerRole::Voter => PeerRole::Voter, + RaftPeerRole::Learner => PeerRole::Learner, + RaftPeerRole::IncomingVoter => PeerRole::IncomingVoter, + RaftPeerRole::DemotingVoter => PeerRole::DemotingVoter, + } + } +} + +impl PartialEq for RaftPeerRole { + fn eq(&self, other: &PeerRole) -> bool { + let r: RaftPeerRole = (*other).into(); + *self == r + } +} + #[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub struct Epoch { pub conf_ver: u64, @@ -159,6 +180,13 @@ pub struct RegionPeer { pub role: RaftPeerRole, } +impl PartialEq for RegionPeer { + #[inline] + fn eq(&self, other: &metapb::Peer) -> bool { + self.id == other.id && self.store_id == other.store_id && self.role == other.role + } +} + #[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub struct RegionMergeState { pub min_index: u64, diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index a4b48e4ba37..a49d4707eb3 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -14,6 +14,7 @@ use std::{ u64, }; +use collections::HashSet; use engine_traits::KvEngine; use kvproto::{ kvrpcpb::{self, KeyRange, LeaderInfo}, @@ -24,14 +25,14 @@ use kvproto::{ use protobuf::{self, Message}; use raft::{ eraftpb::{self, ConfChangeType, ConfState, MessageType}, - INVALID_INDEX, + Changer, RawNode, INVALID_INDEX, }; use raft_proto::ConfChangeI; use tikv_util::{box_err, debug, info, store::region, time::monotonic_raw_now, Either}; use time::{Duration, Timespec}; use txn_types::{TimeStamp, WriteBatchFlags}; -use super::peer_storage; +use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; use crate::{coprocessor::CoprocessorHost, Error, Result}; const INVALID_TIMESTAMP: u64 = u64::MAX; @@ -765,7 +766,7 @@ impl< } } -#[derive(PartialEq, Debug)] +#[derive(PartialEq, Debug, Clone, Copy)] pub enum ConfChangeKind { // Only contains one configuration change Simple, @@ -847,6 +848,118 @@ impl<'a> ChangePeerI for &'a ChangePeerV2Request { } } +/// Check if the conf change request is valid. +/// +/// The function will try to keep operation safe. In some edge cases (or +/// tests), we may not care about safety. In this case, `ignore_safety` +/// can be set to true. +/// +/// Make sure the peer can serve read and write when ignore safety, otherwise +/// it may produce stale result or cause unavailability. +pub fn check_conf_change( + cfg: &Config, + node: &RawNode, + leader: &metapb::Peer, + change_peers: &[ChangePeerRequest], + cc: &impl ConfChangeI, + ignore_safety: bool, +) -> Result<()> { + let current_progress = node.status().progress.unwrap().clone(); + let mut after_progress = current_progress.clone(); + let cc_v2 = cc.as_v2(); + let mut changer = Changer::new(&after_progress); + let (conf, changes) = if cc_v2.leave_joint() { + changer.leave_joint()? + } else if let Some(auto_leave) = cc_v2.enter_joint() { + changer.enter_joint(auto_leave, &cc_v2.changes)? + } else { + changer.simple(&cc_v2.changes)? + }; + after_progress.apply_conf(conf, changes, node.raft.raft_log.last_index()); + + // Because the conf change can be applied successfully above, so the current + // raft group state must matches the command. For example, won't call leave + // joint on a non joint state. + let kind = ConfChangeKind::confchange_kind(change_peers.len()); + if kind == ConfChangeKind::LeaveJoint { + if ignore_safety || leader.get_role() != PeerRole::DemotingVoter { + return Ok(()); + } + return Err(box_err!("ignore leave joint command that demoting leader")); + } + + let mut check_dup = HashSet::default(); + let mut only_learner_change = true; + let current_voter = current_progress.conf().voters().ids(); + for cp in change_peers { + let (change_type, peer) = (cp.get_change_type(), cp.get_peer()); + match (change_type, peer.get_role()) { + (ConfChangeType::RemoveNode, PeerRole::Voter) if kind != ConfChangeKind::Simple => { + return Err(box_err!("{:?}: can not remove voter directly", cp)); + } + (ConfChangeType::RemoveNode, _) + | (ConfChangeType::AddNode, PeerRole::Voter) + | (ConfChangeType::AddLearnerNode, PeerRole::Learner) => {} + _ => { + return Err(box_err!("{:?}: op not match role", cp)); + } + } + + if !check_dup.insert(peer.get_id()) { + return Err(box_err!( + "have multiple commands for the same peer {}", + peer.get_id() + )); + } + + if peer.get_id() == leader.get_id() + && (change_type == ConfChangeType::RemoveNode + // In Joint confchange, the leader is allowed to be DemotingVoter + || (kind == ConfChangeKind::Simple + && change_type == ConfChangeType::AddLearnerNode)) + && !cfg.allow_remove_leader() + { + return Err(box_err!("ignore remove leader or demote leader")); + } + + if current_voter.contains(peer.get_id()) || change_type == ConfChangeType::AddNode { + only_learner_change = false; + } + } + + // Multiple changes that only effect learner will not product `IncommingVoter` + // or `DemotingVoter` after apply, but raftstore layer and PD rely on these + // roles to detect joint state + if kind != ConfChangeKind::Simple && only_learner_change { + return Err(box_err!("multiple changes that only effect learner")); + } + + if !ignore_safety { + let promoted_commit_index = after_progress.maximal_committed_index().0; + let first_index = node.raft.raft_log.first_index(); + if current_progress.is_singleton() // It's always safe if there is only one node in the cluster. + || promoted_commit_index + 1 >= first_index + { + return Ok(()); + } + + PEER_ADMIN_CMD_COUNTER_VEC + .with_label_values(&["conf_change", "reject_unsafe"]) + .inc(); + + Err(box_err!( + "{:?}: before: {:?}, after: {:?}, first index {}, promoted commit index {}", + change_peers, + current_progress.conf().to_conf_state(), + after_progress.conf().to_conf_state(), + first_index, + promoted_commit_index + )) + } else { + Ok(()) + } +} + pub struct MsgType<'a>(pub &'a RaftMessage); impl Display for MsgType<'_> { diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 3eddc7ce40d..d20249bc53f 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -189,7 +189,7 @@ fn test_check_conf_change() { res.get_header() .get_error() .get_message() - .contains("unsafe to perform conf change"), + .contains("promoted commit index"), "{:?}", res ); From 2c083a41e1cae9b9222b8cd3f57675c7c37fffdc Mon Sep 17 00:00:00 2001 From: cosven Date: Tue, 25 Oct 2022 17:15:56 +0800 Subject: [PATCH 0284/1149] tests: fix one flaky testcase (#13602) close tikv/tikv#13603 fix one flaky testcase Signed-off-by: cosven Co-authored-by: qupeng --- tests/integrations/raftstore/test_snap_recovery.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integrations/raftstore/test_snap_recovery.rs b/tests/integrations/raftstore/test_snap_recovery.rs index 2db42d68e3f..70f9ae8d97c 100644 --- a/tests/integrations/raftstore/test_snap_recovery.rs +++ b/tests/integrations/raftstore/test_snap_recovery.rs @@ -16,6 +16,8 @@ fn test_check_pending_admin() { cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + // write a key to let leader stuck. cluster.must_put(b"k", b"v"); must_get_equal(&cluster.get_engine(1), b"k", b"v"); From 7fbbcdcee6f7381a9c048030c8f95fac055c42bb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 25 Oct 2022 17:29:56 +0800 Subject: [PATCH 0285/1149] cdc, resolved_ts: add leadership resolver (#13657) close tikv/tikv#13656 cdc, resolved_ts: add leadership resolver Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/Cargo.toml | 1 + components/cdc/src/endpoint.rs | 186 +++---- .../cdc/tests/failpoints/test_endpoint.rs | 15 +- components/raftstore/src/store/util.rs | 78 +-- components/resolved_ts/src/advance.rs | 485 ++++++++++-------- components/resolved_ts/src/endpoint.rs | 92 ++-- components/test_raftstore/src/server.rs | 2 + 7 files changed, 465 insertions(+), 394 deletions(-) diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 27ce81c57b4..62ef4cc29f5 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -26,6 +26,7 @@ portable = ["tikv/portable"] sse = ["tikv/sse"] mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] +pprof-fp = ["tikv/pprof-fp"] [dependencies] api_version = { workspace = true } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 7542bb1bfc8..614e282a5d9 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -24,33 +24,30 @@ use kvproto::{ }, kvrpcpb::ApiVersion, metapb::Region, - tikvpb::TikvClient, }; use online_config::{ConfigChange, OnlineConfig}; use pd_client::{Feature, PdClient}; use raftstore::{ coprocessor::{CmdBatch, ObserveId}, router::RaftStoreRouter, - store::{ - fsm::{ChangeObserver, StoreMeta}, - msg::{Callback, SignificantMsg}, - RegionReadProgressRegistry, - }, + store::fsm::{ChangeObserver, StoreMeta}, }; -use resolved_ts::Resolver; +use resolved_ts::{LeadershipResolver, Resolver}; use security::SecurityManager; use tikv::{config::CdcConfig, storage::Statistics}; use tikv_util::{ - debug, error, impl_display_as_debug, info, + debug, defer, error, impl_display_as_debug, info, + mpsc::bounded, + slow_log, sys::thread::ThreadBuildWrapper, - time::Limiter, + time::{Limiter, SlowTimer}, timer::SteadyTimer, warn, worker::{Runnable, RunnableWithTimer, ScheduleError, Scheduler}, }; use tokio::{ runtime::{Builder, Runtime}, - sync::{Mutex, Semaphore}, + sync::Semaphore, }; use txn_types::{TimeStamp, TxnExtra, TxnExtraScheduler}; @@ -155,7 +152,9 @@ pub enum Task { region: Region, resolver: Resolver, }, - RegisterMinTsEvent, + RegisterMinTsEvent { + leader_resolver: LeadershipResolver, + }, // The result of ChangeCmd should be returned from CDC Endpoint to ensure // the downstream switches to Normal after the previous commands was sunk. InitDownstream { @@ -223,7 +222,7 @@ impl fmt::Debug for Task { .field("observe_id", &observe_id) .field("region_id", ®ion.get_id()) .finish(), - Task::RegisterMinTsEvent => de.field("type", &"register_min_ts").finish(), + Task::RegisterMinTsEvent { .. } => de.field("type", &"register_min_ts").finish(), Task::InitDownstream { ref region_id, ref downstream_id, @@ -348,12 +347,6 @@ pub struct Endpoint { old_value_cache: OldValueCache, resolved_region_heap: ResolvedRegionHeap, - // Check leader - // store_id -> client - tikv_clients: Arc>>, - env: Arc, - security_mgr: Arc, - region_read_progress: RegionReadProgressRegistry, causal_ts_provider: Option>, // Metrics and logging. @@ -416,10 +409,17 @@ impl, E: KvEngine> Endpoint { let max_scan_batch_size = 1024; let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); - let ep = Endpoint { - cluster_id, + let store_resolver_gc_interval = Duration::from_secs(60); + let leader_resolver = LeadershipResolver::new( + store_meta.lock().unwrap().store_id.unwrap(), + pd_client.clone(), env, security_mgr, + region_read_progress, + store_resolver_gc_interval, + ); + let ep = Endpoint { + cluster_id, capture_regions: HashMap::default(), connections: HashMap::default(), scheduler, @@ -447,14 +447,13 @@ impl, E: KvEngine> Endpoint { resolved_region_count: 0, unresolved_region_count: 0, sink_memory_quota, - tikv_clients: Arc::new(Mutex::new(HashMap::default())), - region_read_progress, + // store_resolver, // Log the first resolved ts warning. warn_resolved_ts_repeat_count: WARN_RESOLVED_TS_COUNT_THRESHOLD, current_ts: TimeStamp::zero(), causal_ts_provider, }; - ep.register_min_ts_event(); + ep.register_min_ts_event(leader_resolver); ep } @@ -997,24 +996,21 @@ impl, E: KvEngine> Endpoint { let _ = downstream.sink_event(resolved_ts_event, force_send); } - fn register_min_ts_event(&self) { + fn register_min_ts_event(&self, mut leader_resolver: LeadershipResolver) { let timeout = self.timer.delay(self.config.min_ts_interval.0); let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); let raft_router = self.raft_router.clone(); - let regions: Vec<(u64, ObserveId)> = self + let regions: Vec = self .capture_regions .iter() - .map(|(region_id, delegate)| (*region_id, delegate.handle.id)) + .map(|(region_id, _)| *region_id) .collect(); let cm: ConcurrencyManager = self.concurrency_manager.clone(); - let env = self.env.clone(); - let security_mgr = self.security_mgr.clone(); - let store_meta = self.store_meta.clone(); - let tikv_clients = self.tikv_clients.clone(); let hibernate_regions_compatible = self.config.hibernate_regions_compatible; - let region_read_progress = self.region_read_progress.clone(); let causal_ts_provider = self.causal_ts_provider.clone(); + // We use channel to deliver leader_resolver in async block. + let (leader_resolver_tx, leader_resolver_rx) = bounded(1); let fut = async move { let _ = timeout.compat().await; @@ -1043,37 +1039,37 @@ impl, E: KvEngine> Endpoint { min_ts_min_lock = min_mem_lock_ts; } - match scheduler.schedule(Task::RegisterMinTsEvent) { - Ok(_) | Err(ScheduleError::Stopped(_)) => (), - // Must schedule `RegisterMinTsEvent` event otherwise resolved ts can not - // advance normally. - Err(err) => panic!("failed to regiester min ts event, error: {:?}", err), - } + let slow_timer = SlowTimer::default(); + defer!({ + slow_log!(T slow_timer, "cdc resolve region leadership"); + if let Ok(leader_resolver) = leader_resolver_rx.try_recv() { + match scheduler.schedule(Task::RegisterMinTsEvent { leader_resolver }) { + Ok(_) | Err(ScheduleError::Stopped(_)) => (), + // Must schedule `RegisterMinTsEvent` event otherwise resolved ts can not + // advance normally. + Err(err) => panic!("failed to regiester min ts event, error: {:?}", err), + } + } else { + // During shutdown, tso runtime drops future immediately, + // leader_resolver may be lost when this future drops before + // delivering leader_resolver. + warn!("cdc leader resolver is lost, are we shutdown?"); + } + }); + // Check region peer leadership, make sure they are leaders. let gate = pd_client.feature_gate(); - let regions = if hibernate_regions_compatible && gate.can_enable(FEATURE_RESOLVED_TS_STORE) { CDC_RESOLVED_TS_ADVANCE_METHOD.set(1); - let regions = regions - .into_iter() - .map(|(region_id, _)| region_id) - .collect(); - resolved_ts::region_resolved_ts_store( - regions, - store_meta, - region_read_progress, - pd_client, - security_mgr, - env, - tikv_clients, - min_ts, - ) - .await + leader_resolver.resolve(regions, min_ts).await } else { CDC_RESOLVED_TS_ADVANCE_METHOD.set(0); - Self::region_resolved_ts_raft(regions, &scheduler, raft_router, min_ts).await + leader_resolver + .resolve_by_raft(regions, min_ts, raft_router) + .await }; + leader_resolver_tx.send(leader_resolver).unwrap(); if !regions.is_empty() { match scheduler.schedule(Task::MinTs { @@ -1082,7 +1078,7 @@ impl, E: KvEngine> Endpoint { current_ts: min_ts_pd, }) { Ok(_) | Err(ScheduleError::Stopped(_)) => (), - // Must schedule `RegisterMinTsEvent` event otherwise resolved ts can not + // Must schedule `MinTS` event otherwise resolved ts can not // advance normally. Err(err) => panic!("failed to schedule min ts event, error: {:?}", err), } @@ -1098,54 +1094,6 @@ impl, E: KvEngine> Endpoint { self.tso_worker.spawn(fut); } - async fn region_resolved_ts_raft( - regions: Vec<(u64, ObserveId)>, - scheduler: &Scheduler, - raft_router: T, - min_ts: TimeStamp, - ) -> Vec { - // TODO: send a message to raftstore would consume too much cpu time, - // try to handle it outside raftstore. - let regions: Vec<_> = regions - .iter() - .copied() - .map(|(region_id, observe_id)| { - let scheduler_clone = scheduler.clone(); - let raft_router_clone = raft_router.clone(); - async move { - let (tx, rx) = tokio::sync::oneshot::channel(); - if let Err(e) = raft_router_clone.significant_send( - region_id, - SignificantMsg::LeaderCallback(Callback::read(Box::new(move |resp| { - let resp = if resp.response.get_header().has_error() { - None - } else { - Some(region_id) - }; - if tx.send(resp).is_err() { - error!("cdc send tso response failed"; "region_id" => region_id); - } - }))), - ) { - warn!("cdc send LeaderCallback failed"; "err" => ?e, "min_ts" => min_ts); - let deregister = Deregister::Delegate { - observe_id, - region_id, - err: Error::request(e.into()), - }; - if let Err(e) = scheduler_clone.schedule(Task::Deregister(deregister)) { - error!("cdc schedule cdc task failed"; "error" => ?e); - } - return None; - } - rx.await.unwrap_or(None) - } - }) - .collect(); - let resps = futures::future::join_all(regions).await; - resps.into_iter().flatten().collect::>() - } - fn on_open_conn(&mut self, conn: Conn) { self.connections.insert(conn.get_id(), conn); } @@ -1180,7 +1128,9 @@ impl, E: KvEngine> Runnable for Endpoint { old_value_cb, } => self.on_multi_batch(multi, old_value_cb), Task::OpenConn { conn } => self.on_open_conn(conn), - Task::RegisterMinTsEvent => self.register_min_ts_event(), + Task::RegisterMinTsEvent { + leader_resolver: store_resolver, + } => self.register_min_ts_event(store_resolver), Task::InitDownstream { region_id, downstream_id, @@ -1320,6 +1270,7 @@ mod tests { raft_router: MockRaftStoreRouter, task_rx: ReceiverWrapper, raft_rxs: HashMap>>, + leader_resolver: Option, } impl TestEndpointSuite { @@ -1384,11 +1335,26 @@ mod tests { ) -> TestEndpointSuite { let (task_sched, task_rx) = dummy_scheduler(); let raft_router = MockRaftStoreRouter::new(); + let mut store_meta = StoreMeta::new(0); + store_meta.store_id = Some(1); + let region_read_progress = store_meta.region_read_progress.clone(); + let pd_client = Arc::new(TestPdClient::new(0, true)); + let env = Arc::new(Environment::new(1)); + let security_mgr = Arc::new(SecurityManager::default()); + let store_resolver_gc_interval = Duration::from_secs(60); + let leader_resolver = LeadershipResolver::new( + 1, + pd_client.clone(), + env.clone(), + security_mgr.clone(), + region_read_progress, + store_resolver_gc_interval, + ); let ep = Endpoint::new( DEFAULT_CLUSTER_ID, cfg, api_version, - Arc::new(TestPdClient::new(0, true)), + pd_client, task_sched.clone(), raft_router.clone(), engine.unwrap_or_else(|| { @@ -1399,10 +1365,10 @@ mod tests { .unwrap() }), CdcObserver::new(task_sched), - Arc::new(StdMutex::new(StoreMeta::new(0))), + Arc::new(StdMutex::new(store_meta)), ConcurrencyManager::new(1.into()), - Arc::new(Environment::new(1)), - Arc::new(SecurityManager::default()), + env, + security_mgr, MemoryQuota::new(usize::MAX), causal_ts_provider, ); @@ -1412,6 +1378,7 @@ mod tests { raft_router, task_rx, raft_rxs: HashMap::default(), + leader_resolver: Some(leader_resolver), } } @@ -1903,7 +1870,8 @@ mod tests { let start_ts = block_on(ts_provider.async_get_ts()).unwrap(); let mut suite = mock_endpoint_with_ts_provider(&cfg, None, ApiVersion::V2, Some(ts_provider.clone())); - suite.run(Task::RegisterMinTsEvent); + let leader_resolver = suite.leader_resolver.take().unwrap(); + suite.run(Task::RegisterMinTsEvent { leader_resolver }); suite .task_rx .recv_timeout(Duration::from_millis(1500)) diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index 31c302c3c14..6e208ccac90 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -501,7 +501,20 @@ fn test_cdc_rawkv_resolved_ts() { sleep_ms(100); let event = receive_event(true).resolved_ts.unwrap(); - assert_eq!(ts.next(), TimeStamp::from(event.ts)); + assert!( + ts.next() >= TimeStamp::from(event.ts), + "{} {}", + ts, + TimeStamp::from(event.ts) + ); + // Receive again to make sure resolved ts <= ongoing request's ts. + let event = receive_event(true).resolved_ts.unwrap(); + assert!( + ts.next() >= TimeStamp::from(event.ts), + "{} {}", + ts, + TimeStamp::from(event.ts) + ); fail::remove(pause_write_fp); handle.join().unwrap(); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index a49d4707eb3..61da5805727 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -9,7 +9,7 @@ use std::{ option::Option, sync::{ atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering}, - Arc, Mutex, + Arc, Mutex, MutexGuard, }, u64, }; @@ -1051,7 +1051,7 @@ impl RegionReadProgressRegistry { ) -> Vec { let mut regions = Vec::with_capacity(leaders.len()); let registry = self.registry.lock().unwrap(); - for leader_info in leaders { + for leader_info in &leaders { let region_id = leader_info.get_region_id(); if let Some(rp) = registry.get(®ion_id) { if rp.consume_leader_info(leader_info, coprocessor) { @@ -1062,18 +1062,6 @@ impl RegionReadProgressRegistry { regions } - // Get the `LeaderInfo` of the requested regions - pub fn dump_leader_infos(&self, regions: &[u64]) -> HashMap, LeaderInfo)> { - let registry = self.registry.lock().unwrap(); - let mut info_map = HashMap::with_capacity(regions.len()); - for region_id in regions { - if let Some(rrp) = registry.get(region_id) { - info_map.insert(*region_id, rrp.dump_leader_info()); - } - } - info_map - } - /// Invoke the provided callback with the registry, an internal lock will /// hold while invoking the callback so it is important that *not* try /// to acquiring any lock inside the callback to avoid dead lock @@ -1196,14 +1184,14 @@ impl RegionReadProgress { // provided `LeaderInfo` is same as ours pub fn consume_leader_info( &self, - mut leader_info: LeaderInfo, + leader_info: &LeaderInfo, coprocessor: &CoprocessorHost, ) -> bool { let mut core = self.core.lock().unwrap(); if leader_info.has_read_state() { // It is okay to update `safe_ts` without checking the `LeaderInfo`, the // `read_state` is guaranteed to be valid when it is published by the leader - let rs = leader_info.take_read_state(); + let rs = leader_info.get_read_state(); let (apply_index, ts) = (rs.get_applied_index(), rs.get_safe_ts()); if apply_index != 0 && ts != 0 && !core.discard { if let Some(ts) = core.update_safe_ts(apply_index, ts) { @@ -1224,23 +1212,11 @@ impl RegionReadProgress { // Dump the `LeaderInfo` and the peer list pub fn dump_leader_info(&self) -> (Vec, LeaderInfo) { - let mut leader_info = LeaderInfo::default(); let core = self.core.lock().unwrap(); - let read_state = { - // Get the latest `read_state` - let ReadState { idx, ts } = core.pending_items.back().unwrap_or(&core.read_state); - let mut rs = kvrpcpb::ReadState::default(); - rs.set_applied_index(*idx); - rs.set_safe_ts(*ts); - rs - }; - let li = &core.leader_info; - leader_info.set_peer_id(li.leader_id); - leader_info.set_term(li.leader_term); - leader_info.set_region_id(core.region_id); - leader_info.set_region_epoch(li.epoch.clone()); - leader_info.set_read_state(read_state); - (li.peers.clone(), leader_info) + ( + core.get_local_leader_info().peers.clone(), + core.get_leader_info(), + ) } pub fn update_leader_info(&self, peer_id: u64, term: u64, region: &Region) { @@ -1286,10 +1262,15 @@ impl RegionReadProgress { pub fn resolved_ts(&self) -> u64 { self.safe_ts() } + + // Dump the `LeaderInfo` and the peer list + pub fn get_core(&self) -> MutexGuard<'_, RegionReadProgressCore> { + self.core.lock().unwrap() + } } #[derive(Debug)] -struct RegionReadProgressCore { +pub struct RegionReadProgressCore { tag: String, region_id: u64, applied_index: u64, @@ -1336,6 +1317,14 @@ impl LocalLeaderInfo { peers: region.get_peers().to_vec(), } } + + pub fn get_peers(&self) -> &[Peer] { + &self.peers + } + + pub fn get_leader_id(&self) -> u64 { + self.leader_id + } } impl RegionReadProgressCore { @@ -1449,6 +1438,29 @@ impl RegionReadProgressCore { } self.pending_items.push_back(item); } + + pub fn get_leader_info(&self) -> LeaderInfo { + let mut leader_info = LeaderInfo::default(); + let read_state = { + // Get the latest `read_state` + let ReadState { idx, ts } = self.pending_items.back().unwrap_or(&self.read_state); + let mut rs = kvrpcpb::ReadState::default(); + rs.set_applied_index(*idx); + rs.set_safe_ts(*ts); + rs + }; + let li = &self.leader_info; + leader_info.set_peer_id(li.leader_id); + leader_info.set_term(li.leader_term); + leader_info.set_region_id(self.region_id); + leader_info.set_region_epoch(li.epoch.clone()); + leader_info.set_read_state(read_state); + leader_info + } + + pub fn get_local_leader_info(&self) -> &LocalLeaderInfo { + &self.leader_info + } } /// Represent the duration of all stages of raftstore recorded by one diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 190c4474711..35426f4861d 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -4,7 +4,7 @@ use std::{ ffi::CString, sync::{ atomic::{AtomicI32, Ordering}, - Arc, Mutex as StdMutex, + Arc, }, time::Duration, }; @@ -14,7 +14,7 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use fail::fail_point; use futures::{compat::Future01CompatExt, future::select_all, FutureExt, TryFutureExt}; -use grpcio::{ChannelBuilder, Environment, Error as GrpcError, RpcStatusCode}; +use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::{CheckLeaderRequest, LeaderInfo}, metapb::{Peer, PeerRole}, @@ -22,14 +22,24 @@ use kvproto::{ }; use pd_client::PdClient; use protobuf::Message; -use raftstore::store::{fsm::StoreMeta, util::RegionReadProgressRegistry}; +use raftstore::{ + router::RaftStoreRouter, + store::{ + msg::{Callback, SignificantMsg}, + util::RegionReadProgressRegistry, + }, +}; use security::SecurityManager; use tikv_util::{ - info, sys::thread::ThreadBuildWrapper, time::Instant, timer::SteadyTimer, worker::Scheduler, + info, + sys::thread::ThreadBuildWrapper, + time::{Instant, SlowTimer}, + timer::SteadyTimer, + worker::Scheduler, }; use tokio::{ runtime::{Builder, Runtime}, - sync::Mutex, + sync::{Mutex, Notify}, }; use txn_types::TimeStamp; @@ -38,8 +48,6 @@ use crate::{endpoint::Task, metrics::*, util}; const DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS: u64 = 5_000; // 5s pub struct AdvanceTsWorker { - store_meta: Arc>, - region_read_progress: RegionReadProgressRegistry, pd_client: Arc, timer: SteadyTimer, worker: Runtime, @@ -47,21 +55,13 @@ pub struct AdvanceTsWorker { /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, - // store_id -> client - tikv_clients: Arc>>, - env: Arc, - security_mgr: Arc, } impl AdvanceTsWorker { pub fn new( pd_client: Arc, scheduler: Scheduler>, - store_meta: Arc>, - region_read_progress: RegionReadProgressRegistry, concurrency_manager: ConcurrencyManager, - env: Arc, - security_mgr: Arc, ) -> Self { let worker = Builder::new_multi_thread() .thread_name("advance-ts") @@ -72,33 +72,28 @@ impl AdvanceTsWorker { .build() .unwrap(); Self { - env, - security_mgr, scheduler, pd_client, worker, timer: SteadyTimer::default(), - store_meta, - region_read_progress, concurrency_manager, - tikv_clients: Arc::new(Mutex::new(HashMap::default())), } } } impl AdvanceTsWorker { - pub fn advance_ts_for_regions(&self, regions: Vec) { - if regions.is_empty() { - return; - } + // Advance ts asynchronously and register RegisterAdvanceEvent when its done. + pub fn advance_ts_for_regions( + &self, + regions: Vec, + mut leader_resolver: LeadershipResolver, + advance_ts_interval: Duration, + cfg_update_notify: Arc, + ) { + let cm = self.concurrency_manager.clone(); let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); - let cm: ConcurrencyManager = self.concurrency_manager.clone(); - let env = self.env.clone(); - let security_mgr = self.security_mgr.clone(); - let store_meta = self.store_meta.clone(); - let tikv_clients = self.tikv_clients.clone(); - let region_read_progress = self.region_read_progress.clone(); + let timeout = self.timer.delay(advance_ts_interval); let fut = async move { // Ignore get tso errors since we will retry every `advance_ts_interval`. @@ -115,211 +110,291 @@ impl AdvanceTsWorker { } } - let regions = region_resolved_ts_store( - regions, - store_meta, - region_read_progress, - pd_client, - security_mgr, - env, - tikv_clients, - min_ts, - ) - .await; - + let regions = leader_resolver.resolve(regions, min_ts).await; if !regions.is_empty() { - if let Err(e) = scheduler.schedule(Task::AdvanceResolvedTs { + if let Err(e) = scheduler.schedule(Task::ResolvedTsAdvanced { regions, ts: min_ts, }) { info!("failed to schedule advance event"; "err" => ?e); } } - }; - self.worker.spawn(fut); - } - pub fn register_next_event(&self, advance_ts_interval: Duration, cfg_version: usize) { - let scheduler = self.scheduler.clone(); - let timeout = self.timer.delay(advance_ts_interval); - let fut = async move { - let _ = timeout.compat().await; - if let Err(e) = scheduler.schedule(Task::RegisterAdvanceEvent { cfg_version }) { - info!("failed to schedule register advance event"; "err" => ?e); + futures::select! { + _ = timeout.compat().fuse() => (), + // Skip wait timeout if cfg is updated. + _ = cfg_update_notify.notified().fuse() => (), + }; + // NB: We must schedule the leader resolver even if there is no region, + // otherwise we can not advance resolved ts next time. + if let Err(e) = scheduler.schedule(Task::AdvanceResolvedTs { leader_resolver }) { + error!("failed to schedule register advance event"; "err" => ?e); } }; self.worker.spawn(fut); } } -// Confirms leadership of region peer before trying to advance resolved ts. -// This function broadcasts a special message to all stores, gets the leader id -// of them to confirm whether current peer has a quorum which accepts its -// leadership. -pub async fn region_resolved_ts_store( - regions: Vec, - store_meta: Arc>, - region_read_progress: RegionReadProgressRegistry, +pub struct LeadershipResolver { + tikv_clients: Mutex>, pd_client: Arc, - security_mgr: Arc, env: Arc, - tikv_clients: Arc>>, - min_ts: TimeStamp, -) -> Vec { - PENDING_RTS_COUNT.inc(); - defer!(PENDING_RTS_COUNT.dec()); - fail_point!("before_sync_replica_read_state", |_| regions.clone()); - - let store_id = match store_meta.lock().unwrap().store_id { - Some(id) => id, - None => return vec![], - }; + security_mgr: Arc, + region_read_progress: RegionReadProgressRegistry, + store_id: u64, // store_id -> leaders info, record the request to each stores - let mut store_map: HashMap> = HashMap::default(); + store_map: HashMap>, // region_id -> region, cache the information of regions - let mut region_map: HashMap> = HashMap::default(); + region_map: HashMap>, // region_id -> peers id, record the responses - let mut resp_map: HashMap> = HashMap::default(); - // region_id -> `(Vec, LeaderInfo)` - let info_map = region_read_progress.dump_leader_infos(®ions); - let mut valid_regions = HashSet::default(); - - for (region_id, (peer_list, leader_info)) in info_map { - let leader_id = leader_info.get_peer_id(); - // Check if the leader in this store - if util::find_store_id(&peer_list, leader_id) != Some(store_id) { - continue; + resp_map: HashMap>, + valid_regions: HashSet, + + gc_interval: Duration, + last_gc_time: Instant, +} + +impl LeadershipResolver { + pub fn new( + store_id: u64, + pd_client: Arc, + env: Arc, + security_mgr: Arc, + region_read_progress: RegionReadProgressRegistry, + gc_interval: Duration, + ) -> LeadershipResolver { + LeadershipResolver { + tikv_clients: Mutex::default(), + store_id, + pd_client, + env, + security_mgr, + region_read_progress, + + store_map: HashMap::default(), + region_map: HashMap::default(), + resp_map: HashMap::default(), + valid_regions: HashSet::default(), + last_gc_time: Instant::now_coarse(), + gc_interval, } - let mut unvotes = 0; - for peer in &peer_list { - if peer.store_id == store_id && peer.id == leader_id { - resp_map.entry(region_id).or_default().push(store_id); - } else { - // It's still necessary to check leader on learners even if they don't vote - // because performing stale read on learners require it. - store_map - .entry(peer.store_id) - .or_default() - .push(leader_info.clone()); - if peer.get_role() != PeerRole::Learner { - unvotes += 1; - } - } + } + + fn gc(&mut self) { + let now = Instant::now_coarse(); + if now - self.last_gc_time > self.gc_interval { + self.store_map = HashMap::default(); + self.region_map = HashMap::default(); + self.resp_map = HashMap::default(); + self.valid_regions = HashSet::default(); + self.last_gc_time = now; } - // Check `region_has_quorum` here because `store_map` can be empty, - // in which case `region_has_quorum` won't be called any more. - if unvotes == 0 && region_has_quorum(&peer_list, &resp_map[®ion_id]) { - valid_regions.insert(region_id); - } else { - region_map.insert(region_id, peer_list); + } + + fn clear(&mut self) { + self.store_map.clear(); + self.region_map.clear(); + self.resp_map.clear(); + self.valid_regions.clear(); + } + + pub async fn resolve_by_raft( + &self, + regions: Vec, + min_ts: TimeStamp, + raft_router: T, + ) -> Vec + where + T: 'static + RaftStoreRouter, + E: KvEngine, + { + let mut reqs = Vec::with_capacity(regions.len()); + for region_id in regions { + let raft_router_clone = raft_router.clone(); + let req = async move { + let (tx, rx) = tokio::sync::oneshot::channel(); + let msg = SignificantMsg::LeaderCallback(Callback::read(Box::new(move |resp| { + let resp = if resp.response.get_header().has_error() { + None + } else { + Some(region_id) + }; + if tx.send(resp).is_err() { + error!("cdc send tso response failed"; "region_id" => region_id); + } + }))); + if let Err(e) = raft_router_clone.significant_send(region_id, msg) { + warn!("cdc send LeaderCallback failed"; "err" => ?e, "min_ts" => min_ts); + return None; + } + rx.await.unwrap_or(None) + }; + reqs.push(req); } + + let resps = futures::future::join_all(reqs).await; + resps.into_iter().flatten().collect::>() } - // Approximate `LeaderInfo` size - let leader_info_size = store_map - .values() - .next() - .map_or(0, |regions| regions[0].compute_size()); - let store_count = store_map.len(); - let mut stores: Vec<_> = store_map - .into_iter() - .map(|(to_store, regions)| { - let tikv_clients = tikv_clients.clone(); - let env = env.clone(); - let pd_client = pd_client.clone(); - let security_mgr = security_mgr.clone(); - let region_num = regions.len() as u32; - CHECK_LEADER_REQ_SIZE_HISTOGRAM.observe((leader_info_size * region_num) as f64); - CHECK_LEADER_REQ_ITEM_COUNT_HISTOGRAM.observe(region_num as f64); - - // Check leadership for `regions` on `to_store`. - async move { - PENDING_CHECK_LEADER_REQ_COUNT.inc(); - defer!(PENDING_CHECK_LEADER_REQ_COUNT.dec()); - let client = - get_tikv_client(to_store, pd_client, security_mgr, env, tikv_clients.clone()) - .await - .map_err(|e| { - (to_store, e.retryable(), format!("[get tikv client] {}", e)) - })?; - - let mut req = CheckLeaderRequest::default(); - req.set_regions(regions.into()); - req.set_ts(min_ts.into_inner()); - let start = Instant::now_coarse(); - defer!({ - let elapsed = start.saturating_elapsed(); - slow_log!( - elapsed, - "check leader rpc costs too long, to_store: {}", - to_store - ); - RTS_CHECK_LEADER_DURATION_HISTOGRAM_VEC - .with_label_values(&["rpc"]) - .observe(elapsed.as_secs_f64()); - }); - - let rpc = match client.check_leader_async(&req) { - Ok(rpc) => rpc, - Err(GrpcError::RpcFailure(status)) - if status.code() == RpcStatusCode::UNIMPLEMENTED => - { - // Some stores like TiFlash don't implement it. - return Ok((to_store, vec![])); + + // Confirms leadership of region peer before trying to advance resolved ts. + // This function broadcasts a special message to all stores, gets the leader id + // of them to confirm whether current peer has a quorum which accepts its + // leadership. + pub async fn resolve(&mut self, regions: Vec, min_ts: TimeStamp) -> Vec { + // Clear previous result before resolving. + self.clear(); + // GC when necessary to prevent memory leak. + self.gc(); + + PENDING_RTS_COUNT.inc(); + defer!(PENDING_RTS_COUNT.dec()); + fail_point!("before_sync_replica_read_state", |_| regions.clone()); + + let store_id = self.store_id; + let valid_regions = &mut self.valid_regions; + let region_map = &mut self.region_map; + let resp_map = &mut self.resp_map; + let store_map = &mut self.store_map; + self.region_read_progress.with(|registry| { + for (region_id, read_progress) in registry { + let core = read_progress.get_core(); + let local_leader_info = core.get_local_leader_info(); + let leader_id = local_leader_info.get_leader_id(); + let peer_list = local_leader_info.get_peers(); + // Check if the leader in this store + if util::find_store_id(peer_list, leader_id) != Some(store_id) { + continue; + } + let leader_info = core.get_leader_info(); + + let mut unvotes = 0; + for peer in peer_list { + if peer.store_id == store_id && peer.id == leader_id { + resp_map.entry(*region_id).or_default().push(store_id); + } else { + // It's still necessary to check leader on learners even if they don't vote + // because performing stale read on learners require it. + store_map + .entry(peer.store_id) + .or_default() + .push(leader_info.clone()); + if peer.get_role() != PeerRole::Learner { + unvotes += 1; + } } - Err(e) => return Err((to_store, true, format!("[rpc create failed]{}", e))), - }; - - PENDING_CHECK_LEADER_REQ_SENT_COUNT.inc(); - defer!(PENDING_CHECK_LEADER_REQ_SENT_COUNT.dec()); - let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); - let regions = tokio::time::timeout(timeout, rpc) - .map_err(|e| (to_store, true, format!("[timeout] {}", e))) - .await? - .map_err(|e| (to_store, true, format!("[rpc failed] {}", e)))? - .take_regions(); - Ok((to_store, regions)) + } + // Check `region_has_quorum` here because `store_map` can be empty, + // in which case `region_has_quorum` won't be called any more. + if unvotes == 0 && region_has_quorum(peer_list, &resp_map[region_id]) { + valid_regions.insert(*region_id); + } else { + region_map.insert(*region_id, peer_list.to_vec()); + } } - .boxed() - }) - .collect(); - let start = Instant::now_coarse(); + }); - defer!({ - RTS_CHECK_LEADER_DURATION_HISTOGRAM_VEC - .with_label_values(&["all"]) - .observe(start.saturating_elapsed_secs()); - }); - for _ in 0..store_count { - // Use `select_all` to avoid the process getting blocked when some TiKVs were - // down. - let (res, _, remains) = select_all(stores).await; - stores = remains; - match res { - Ok((to_store, regions)) => regions.into_iter().for_each(|region_id| { - if let Some(r) = region_map.get(®ion_id) { - let resps = resp_map.entry(region_id).or_default(); - resps.push(to_store); - if region_has_quorum(r, resps) { - valid_regions.insert(region_id); + let env = &self.env; + let pd_client = &self.pd_client; + let security_mgr = &self.security_mgr; + let tikv_clients = &self.tikv_clients; + // Approximate `LeaderInfo` size + let leader_info_size = store_map + .values() + .next() + .map_or(0, |regions| regions[0].compute_size()); + let store_count = store_map.len(); + let mut stores: Vec<_> = store_map + .drain() + .map(|(to_store, regions)| { + let env = env.clone(); + let region_num = regions.len() as u32; + CHECK_LEADER_REQ_SIZE_HISTOGRAM.observe((leader_info_size * region_num) as f64); + CHECK_LEADER_REQ_ITEM_COUNT_HISTOGRAM.observe(region_num as f64); + + // Check leadership for `regions` on `to_store`. + async move { + PENDING_CHECK_LEADER_REQ_COUNT.inc(); + defer!(PENDING_CHECK_LEADER_REQ_COUNT.dec()); + let client = + get_tikv_client(to_store, pd_client, security_mgr, env, tikv_clients) + .await + .map_err(|e| { + (to_store, e.retryable(), format!("[get tikv client] {}", e)) + })?; + + let mut req = CheckLeaderRequest::default(); + req.set_regions(regions.into()); + req.set_ts(min_ts.into_inner()); + let slow_timer = SlowTimer::default(); + defer!({ + slow_log!( + T + slow_timer, + "check leader rpc costs too long, to_store: {}", + to_store + ); + let elapsed = slow_timer.saturating_elapsed(); + RTS_CHECK_LEADER_DURATION_HISTOGRAM_VEC + .with_label_values(&["rpc"]) + .observe(elapsed.as_secs_f64()); + }); + + let rpc = client + .check_leader_async(&req) + .map_err(|e| (to_store, true, format!("[rpc create failed]{}", e)))?; + PENDING_CHECK_LEADER_REQ_SENT_COUNT.inc(); + defer!(PENDING_CHECK_LEADER_REQ_SENT_COUNT.dec()); + let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); + let resp = tokio::time::timeout(timeout, rpc) + .map_err(|e| (to_store, true, format!("[timeout] {}", e))) + .await? + .map_err(|e| (to_store, true, format!("[rpc failed] {}", e)))?; + Ok((to_store, resp)) + } + .boxed() + }) + .collect(); + let start = Instant::now_coarse(); + + defer!({ + RTS_CHECK_LEADER_DURATION_HISTOGRAM_VEC + .with_label_values(&["all"]) + .observe(start.saturating_elapsed_secs()); + }); + for _ in 0..store_count { + // Use `select_all` to avoid the process getting blocked when some + // TiKVs were down. + let (res, _, remains) = select_all(stores).await; + stores = remains; + match res { + Ok((to_store, resp)) => { + for region_id in resp.regions { + if let Some(r) = region_map.get(®ion_id) { + let resps = resp_map.entry(region_id).or_default(); + resps.push(to_store); + if region_has_quorum(r, resps) { + valid_regions.insert(region_id); + } + } } } - }), - Err((to_store, reconnect, err)) => { - info!("check leader failed"; "error" => ?err, "to_store" => to_store); - if reconnect { - tikv_clients.lock().await.remove(&to_store); + Err((to_store, reconnect, err)) => { + info!("check leader failed"; "error" => ?err, "to_store" => to_store); + if reconnect { + self.tikv_clients.lock().await.remove(&to_store); + } } } + // Return early if all regions had already got quorum. + if valid_regions.len() == regions.len() { + // break here because all regions have quorum, + // so there is no need waiting for other stores to respond. + break; + } } - // Return early if all regions had already got quorum. - if valid_regions.len() == regions.len() { - // break here because all regions have quorum, - // so there is no need waiting for other stores to respond. - break; - } + self.valid_regions.drain().collect() } - valid_regions.into_iter().collect() } fn region_has_quorum(peers: &[Peer], stores: &[u64]) -> bool { @@ -374,10 +449,10 @@ static CONN_ID: AtomicI32 = AtomicI32::new(0); async fn get_tikv_client( store_id: u64, - pd_client: Arc, - security_mgr: Arc, + pd_client: &Arc, + security_mgr: &SecurityManager, env: Arc, - tikv_clients: Arc>>, + tikv_clients: &Mutex>, ) -> pd_client::Result { { let clients = tikv_clients.lock().await; diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index a79ff66e384..480c0ee6896 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -28,11 +28,15 @@ use raftstore::{ }; use security::SecurityManager; use tikv::config::ResolvedTsConfig; -use tikv_util::worker::{Runnable, RunnableWithTimer, Scheduler}; +use tikv_util::{ + warn, + worker::{Runnable, RunnableWithTimer, Scheduler}, +}; +use tokio::sync::Notify; use txn_types::{Key, TimeStamp}; use crate::{ - advance::AdvanceTsWorker, + advance::{AdvanceTsWorker, LeadershipResolver}, cmd::{ChangeLog, ChangeRow}, metrics::*, resolver::Resolver, @@ -263,7 +267,7 @@ impl ObserveRegion { pub struct Endpoint { store_id: Option, cfg: ResolvedTsConfig, - cfg_version: usize, + cfg_update_notify: Arc, store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, regions: HashMap, @@ -295,20 +299,22 @@ where let meta = store_meta.lock().unwrap(); (meta.region_read_progress.clone(), meta.store_id) }; - let advance_worker = AdvanceTsWorker::new( - pd_client, - scheduler.clone(), - store_meta.clone(), - region_read_progress.clone(), - concurrency_manager, + let advance_worker = + AdvanceTsWorker::new(pd_client.clone(), scheduler.clone(), concurrency_manager); + let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, raft_router); + let store_resolver_gc_interval = Duration::from_secs(60); + let leader_resolver = LeadershipResolver::new( + store_id.unwrap(), + pd_client.clone(), env, security_mgr, + region_read_progress.clone(), + store_resolver_gc_interval, ); - let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, raft_router); let ep = Self { store_id, cfg: cfg.clone(), - cfg_version: 0, + cfg_update_notify: Arc::new(Notify::new()), scheduler, store_meta, region_read_progress, @@ -318,7 +324,7 @@ where regions: HashMap::default(), _phantom: PhantomData::default(), }; - ep.register_advance_event(ep.cfg_version); + ep.handle_advance_resolved_ts(leader_resolver); ep } @@ -490,9 +496,9 @@ where } } - // Try to advance resolved ts. + // Update advanced resolved ts. // Must ensure all regions are leaders at the point of ts. - fn advance_resolved_ts(&mut self, regions: Vec, ts: TimeStamp) { + fn handle_resolved_ts_advanced(&mut self, regions: Vec, ts: TimeStamp) { if regions.is_empty() { return; } @@ -576,36 +582,28 @@ where } } - fn register_advance_event(&self, cfg_version: usize) { - // Ignore advance event that registered with previous `advance_ts_interval` - // config - if self.cfg_version != cfg_version { - return; - } + fn handle_advance_resolved_ts(&self, leader_resolver: LeadershipResolver) { let regions = self.regions.keys().into_iter().copied().collect(); - self.advance_worker.advance_ts_for_regions(regions); - self.advance_worker - .register_next_event(self.cfg.advance_ts_interval.0, self.cfg_version); + self.advance_worker.advance_ts_for_regions( + regions, + leader_resolver, + self.cfg.advance_ts_interval.0, + self.cfg_update_notify.clone(), + ); } fn handle_change_config(&mut self, change: ConfigChange) { let prev = format!("{:?}", self.cfg); - let prev_advance_ts_interval = self.cfg.advance_ts_interval; if let Err(e) = self.cfg.update(change) { - error!("update resolved-ts config unexpectly failed"; "err" => ?e); - return; - } - if self.cfg.advance_ts_interval != prev_advance_ts_interval { - // Increase the `cfg_version` to reject advance event that registered before - self.cfg_version += 1; - // Advance `resolved-ts` immediately after `advance_ts_interval` changed - self.register_advance_event(self.cfg_version); + warn!("resolved-ts config fails"; "error" => ?e); + } else { + self.cfg_update_notify.notify_waiters(); + info!( + "resolved-ts config changed"; + "prev" => prev, + "current" => ?self.cfg, + ); } - info!( - "resolved-ts config changed"; - "prev" => prev, - "current" => ?self.cfg, - ); } fn get_or_init_store_id(&mut self) -> Option { @@ -631,10 +629,10 @@ pub enum Task { observe_id: ObserveId, cause: String, }, - RegisterAdvanceEvent { - cfg_version: usize, - }, AdvanceResolvedTs { + leader_resolver: LeadershipResolver, + }, + ResolvedTsAdvanced { regions: Vec, ts: TimeStamp, }, @@ -683,7 +681,7 @@ impl fmt::Debug for Task { .field("observe_id", &observe_id) .field("cause", &cause) .finish(), - Task::AdvanceResolvedTs { + Task::ResolvedTsAdvanced { ref regions, ref ts, } => de @@ -703,9 +701,7 @@ impl fmt::Debug for Task { .field("observe_id", &observe_id) .field("apply_index", &apply_index) .finish(), - Task::RegisterAdvanceEvent { .. } => { - de.field("name", &"register_advance_event").finish() - } + Task::AdvanceResolvedTs { .. } => de.field("name", &"advance_resolved_ts").finish(), Task::ChangeConfig { ref change } => de .field("name", &"change_config") .field("change", &change) @@ -740,7 +736,12 @@ where observe_id, cause, } => self.re_register_region(region_id, observe_id, cause), - Task::AdvanceResolvedTs { regions, ts } => self.advance_resolved_ts(regions, ts), + Task::AdvanceResolvedTs { leader_resolver } => { + self.handle_advance_resolved_ts(leader_resolver) + } + Task::ResolvedTsAdvanced { regions, ts } => { + self.handle_resolved_ts_advanced(regions, ts) + } Task::ChangeLog { cmd_batch, snapshot, @@ -751,7 +752,6 @@ where entries, apply_index, } => self.handle_scan_locks(region_id, observe_id, entries, apply_index), - Task::RegisterAdvanceEvent { cfg_version } => self.register_advance_event(cfg_version), Task::ChangeConfig { change } => self.handle_change_config(change), } } diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 4c0bbce3fd1..5ae1b1a13a6 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -350,6 +350,8 @@ impl ServerCluster { let mut rts_worker = LazyWorker::new("resolved-ts"); let rts_ob = resolved_ts::Observer::new(rts_worker.scheduler()); rts_ob.register_to(&mut coprocessor_host); + // resolved ts endpoint needs store id. + store_meta.lock().unwrap().store_id = Some(node_id); // Resolved ts endpoint let rts_endpoint = resolved_ts::Endpoint::new( &cfg.resolved_ts, From 469eab2cfd0ffe1f6ddb6ab261322488f9880bf8 Mon Sep 17 00:00:00 2001 From: cosven Date: Tue, 25 Oct 2022 22:17:56 +0800 Subject: [PATCH 0286/1149] raftstore: warm up entry cache before leadership transfer (#13556) ref tikv/tikv#13060 Warm up the entry cache before becoming leader to avoid QPS spike. Signed-off-by: cosven Signed-off-by: cosven Co-authored-by: Xinye Tao --- components/backup-stream/tests/mod.rs | 2 +- components/raftstore/src/store/config.rs | 10 + .../raftstore/src/store/entry_storage.rs | 276 +++++++++++++++++- components/raftstore/src/store/fsm/peer.rs | 55 +++- components/raftstore/src/store/metrics.rs | 19 ++ components/raftstore/src/store/peer.rs | 105 ++++++- components/test_raftstore/src/cluster.rs | 17 ++ .../test_raftstore/src/common-test.toml | 1 + .../failpoints/cases/test_transfer_leader.rs | 260 ++++++++++++++++- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + .../raftstore/test_transfer_leader.rs | 7 +- 12 files changed, 721 insertions(+), 33 deletions(-) diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 284f1605c30..2cc6016aeb1 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -722,7 +722,7 @@ impl Suite { let leader = self.cluster.leader_of_region(region_id); for peer in region.get_peers() { if leader.as_ref().map(|p| p.id != peer.id).unwrap_or(true) { - self.cluster.transfer_leader(region_id, peer.clone()); + self.cluster.must_transfer_leader(region_id, peer.clone()); self.cluster.reset_leader_of_region(region_id); return; } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index a5e84aa8501..4d9cd73d207 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -299,6 +299,10 @@ pub struct Config { #[doc(hidden)] pub long_uncommitted_base_threshold: ReadableDuration, + /// Max duration for the entry cache to be warmed up. + /// Set it to 0 to disable warmup. + pub max_entry_cache_warmup_duration: ReadableDuration, + #[doc(hidden)] pub max_snapshot_file_raw_size: ReadableSize, @@ -401,6 +405,7 @@ impl Default for Config { /// the log commit duration is less than 1s. Feel free to adjust /// this config :) long_uncommitted_base_threshold: ReadableDuration::secs(20), + max_entry_cache_warmup_duration: ReadableDuration::secs(1), // They are preserved for compatibility check. region_max_size: ReadableSize(0), @@ -452,6 +457,11 @@ impl Config { self.raft_log_gc_size_limit.unwrap() } + #[inline] + pub fn warmup_entry_cache_enabled(&self) -> bool { + self.max_entry_cache_warmup_duration.0 != Duration::from_secs(0) + } + pub fn region_split_check_diff(&self) -> ReadableSize { self.region_split_check_diff.unwrap() } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index e5c617ec91b..a0828d12332 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -11,6 +11,7 @@ use std::{ mem, ops::Range, sync::{Arc, Mutex}, + time::Duration, }; use collections::HashMap; @@ -21,9 +22,9 @@ use kvproto::{ raft_serverpb::{RaftApplyState, RaftLocalState}, }; use protobuf::Message; -use raft::{prelude::*, util::limit_size, GetEntriesContext, StorageError}; +use raft::{prelude::*, util::limit_size, GetEntriesContext, StorageError, INVALID_INDEX}; use tikv_alloc::TraceEvent; -use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; +use tikv_util::{box_err, debug, error, info, time::Instant, warn, worker::Scheduler}; use super::{ metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE, RAFT_INIT_LOG_INDEX, @@ -35,6 +36,7 @@ const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; const SHRINK_CACHE_CAPACITY: usize = 64; const ENTRY_MEM_SIZE: usize = mem::size_of::(); +pub const MAX_WARMED_UP_CACHE_KEEP_TIME: Duration = Duration::from_secs(10); pub const MAX_INIT_ENTRY_COUNT: usize = 1024; #[inline] @@ -147,6 +149,25 @@ impl EntryCache { } } + /// Push entries to the left of the cache. + /// + /// When cache is not empty, the index of the last entry in entries + /// should be equal to `cache first index - 1`. When cache is + /// empty, it should be equal to the store's last index. Otherwise, + /// append new entries may fail due to unexpected hole. + fn prepend(&mut self, entries: Vec) { + let mut mem_size_change = 0; + let old_capacity = self.cache.capacity(); + for e in entries.into_iter().rev() { + mem_size_change += (bytes_capacity(&e.data) + bytes_capacity(&e.context)) as i64; + self.cache.push_front(e); + } + let new_capacity = self.cache.capacity(); + mem_size_change += Self::cache_vec_mem_size_change(new_capacity, old_capacity); + mem_size_change += self.shrink_if_necessary(); + self.flush_mem_size_change(mem_size_change); + } + fn append_impl(&mut self, region_id: u64, peer_id: u64, entries: &[Entry]) -> i64 { let mut mem_size_change = 0; @@ -530,6 +551,76 @@ pub fn init_applied_term( } } +/// When a peer(follower) receives a TransferLeaderMsg, it enters the +/// CacheWarmupState. When the peer becomes leader or it doesn't +/// become leader before a deadline, it exits the state. +#[derive(Clone, Debug)] +pub struct CacheWarmupState { + range: (u64, u64), + is_task_timeout: bool, + is_stale: bool, + started_at: Instant, +} + +impl CacheWarmupState { + pub fn new() -> Self { + CacheWarmupState::new_with_range(INVALID_INDEX, INVALID_INDEX) + } + + pub fn new_with_range(low: u64, high: u64) -> Self { + CacheWarmupState { + range: (low, high), + is_task_timeout: false, + is_stale: false, + started_at: Instant::now(), + } + } + + pub fn range(&self) -> (u64, u64) { + self.range + } + + /// How long has it been in this state. + pub fn elapsed(&self) -> Duration { + self.started_at.saturating_elapsed() + } + + /// Whether the warmup task is already timeout. + pub fn is_task_timeout(&self) -> bool { + self.is_task_timeout + } + + /// Check whether the task is timeout. + pub fn check_task_timeout(&mut self, duration: Duration) -> bool { + if self.is_task_timeout { + return true; + } + if self.elapsed() > duration { + WARM_UP_ENTRY_CACHE_COUNTER.timeout.inc(); + self.is_task_timeout = true; + } + self.is_task_timeout + } + + /// Check whether this state is stale. + pub fn check_stale(&mut self, duration: Duration) -> bool { + fail_point!("entry_cache_warmed_up_state_is_stale", |_| true); + if self.is_stale { + return true; + } + if self.elapsed() > duration { + self.is_stale = true; + } + self.is_stale + } +} + +impl Default for CacheWarmupState { + fn default() -> Self { + Self::new() + } +} + /// A subset of `PeerStorage` that focus on accessing log entries. pub struct EntryStorage { region_id: u64, @@ -543,6 +634,7 @@ pub struct EntryStorage { raftlog_fetch_scheduler: Scheduler, raftlog_fetch_stats: AsyncFetchStats, async_fetch_results: RefCell>, + cache_warmup_state: Option, } impl EntryStorage { @@ -576,6 +668,7 @@ impl EntryStorage { raftlog_fetch_scheduler, raftlog_fetch_stats: AsyncFetchStats::default(), async_fetch_results: RefCell::new(HashMap::default()), + cache_warmup_state: None, }) } @@ -980,8 +1073,129 @@ impl EntryStorage { self.last_term = last_term; } + pub fn entry_cache_warmup_state(&self) -> &Option { + &self.cache_warmup_state + } + + pub fn entry_cache_warmup_state_mut(&mut self) -> &mut Option { + &mut self.cache_warmup_state + } + + pub fn clear_entry_cache_warmup_state(&mut self) { + self.cache_warmup_state = None; + } + + /// Trigger a task to warm up the entry cache. + /// + /// This will ensure the range [low..=last_index] are loaded into + /// cache. Return the high index of the warmup range if a task is + /// successfully triggered. + pub fn async_warm_up_entry_cache(&mut self, low: u64) -> Option { + let high = if let Some(first_index) = self.entry_cache_first_index() { + if low >= first_index { + // Already warmed up. + self.cache_warmup_state = Some(CacheWarmupState::new()); + return None; + } + // Partially warmed up. + first_index + } else { + self.last_index() + 1 + }; + + // Fetch entries [low, high) to trigger an async fetch task in background. + self.cache_warmup_state = Some(CacheWarmupState::new_with_range(low, high)); + match self.entries(low, high, u64::MAX, GetEntriesContext::empty(true)) { + Ok(_) => { + // This should not happen, but it's OK :) + debug_assert!(false, "entries should not have been fetched"); + error!("entries are fetched unexpectedly during warming up"); + None + } + Err(raft::Error::Store(raft::StorageError::LogTemporarilyUnavailable)) => { + WARM_UP_ENTRY_CACHE_COUNTER.started.inc(); + Some(high) + } + Err(e) => { + error!( + "fetching entries met unexpected error during warming up"; + "err" => ?e, + ); + None + } + } + } + + /// Warm up entry cache if the result is valid. + /// + /// Return true when the warmup operation succeed within the timeout. + pub fn maybe_warm_up_entry_cache(&mut self, res: RaftlogFetchResult) -> bool { + let low = res.low; + // Warm up the entry cache if the low and high index are + // exactly the same as the warmup range. + let state = self.entry_cache_warmup_state().as_ref().unwrap(); + let range = state.range(); + let is_task_timeout = state.is_task_timeout(); + + if range.0 != low { + return false; + } + + match res.ents { + Ok(mut entries) => { + let last_entry_index = entries.last().map(|e| e.index); + if let Some(index) = last_entry_index { + // Generally speaking, when the res.low is the same as the warmup + // range start, the fetch result is exactly used for warmup. + // As the low index of each async_fetch task is different. + // There should exist only one exception. A async fetch task + // with same low index is triggered before the warmup task. + if index + 1 >= range.1 { + let is_valid = if let Some(first_index) = self.entry_cache_first_index() { + range.1 == first_index + } else { + range.1 == self.last_index() + 1 + }; + assert!(is_valid, "the warmup range should still be valid"); + entries.truncate((range.1 - range.0) as usize); + self.cache.prepend(entries); + WARM_UP_ENTRY_CACHE_COUNTER.finished.inc(); + fail_point!("on_entry_cache_warmed_up"); + return !is_task_timeout; + } + } + warn!( + "warm up the entry cache failed"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "last_entry_index" => last_entry_index.unwrap_or(0), + "expected_high" => range.1, + ); + } + Err(e) => { + warn!( + "warm up the entry cache failed"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "err" => ?e, + ); + } + } + false + } + pub fn compact_entry_cache(&mut self, idx: u64) { - self.cache.compact_to(idx); + let mut can_compact = true; + if let Some(state) = self.entry_cache_warmup_state_mut() { + if state.check_stale(MAX_WARMED_UP_CACHE_KEEP_TIME) { + self.clear_entry_cache_warmup_state(); + } else { + can_compact = false; + } + } + if can_compact { + self.cache.compact_to(idx); + } } #[inline] @@ -1091,6 +1305,12 @@ pub mod tests { ); assert_eq!(rx.try_recv().unwrap(), 3); + cache.prepend(vec![new_padded_entry(100, 1, 1)]); + assert_eq!(rx.try_recv().unwrap(), 1); + cache.persisted = 100; + cache.compact_to(101); + assert_eq!(rx.try_recv().unwrap(), -1); + // Test size change for one overlapped entry. cache.append(0, 0, &[new_padded_entry(102, 2, 3)]); assert_eq!(rx.try_recv().unwrap(), 1); @@ -1522,6 +1742,7 @@ pub mod tests { entries = vec![new_entry(6, 6), new_entry(7, 6)]; append_ents(&mut store, &entries); validate_cache(&store, &entries); + store.cache.prepend(vec![new_entry(6, 5)]); // rewrite old entry entries = vec![new_entry(5, 6), new_entry(6, 6)]; @@ -1564,4 +1785,53 @@ pub mod tests { // invalid compaction should be ignored. store.compact_entry_cache(6); } + + #[test] + fn test_async_warm_up_entry_cache() { + let ents = vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 6)]; + + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let region_worker = Worker::new("snap-manager").lazy_build("snap-manager"); + let region_scheduler = region_worker.scheduler(); + let (dummy_scheduler, _rx) = dummy_scheduler(); + + let mut store = new_storage_from_ents(region_scheduler, dummy_scheduler, &td, &ents); + store.cache.compact_to(6); + assert_eq!(store.entry_cache_first_index().unwrap(), 6); + + // The return value should be None when it is already warmed up. + assert!(store.async_warm_up_entry_cache(6).is_none()); + + // The high index should be equal to the entry_cache_first_index. + assert_eq!(store.async_warm_up_entry_cache(5).unwrap(), 6); + + store.cache.compact_to(7); // Clean cache. + // The high index should be equal to the last_index + 1. + assert_eq!(store.async_warm_up_entry_cache(5).unwrap(), 7); + } + + #[test] + fn test_warmup_entry_cache() { + let ents = vec![new_entry(4, 4), new_entry(5, 5), new_entry(6, 6)]; + + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let region_worker = Worker::new("snap-manager").lazy_build("snap-manager"); + let region_scheduler = region_worker.scheduler(); + let (dummy_scheduler, _rx) = dummy_scheduler(); + let mut store = new_storage_from_ents(region_scheduler, dummy_scheduler, &td, &ents); + store.cache.compact_to(6); + store.cache_warmup_state = Some(CacheWarmupState::new_with_range(5, 6)); + + let res = RaftlogFetchResult { + ents: Ok(ents[1..3].to_vec()), + low: 5, + max_size: u64::MAX, + hit_size_limit: false, + tried_cnt: MAX_ASYNC_FETCH_TRY_CNT, + term: 1, + }; + store.maybe_warm_up_entry_cache(res); + // Cache should be warmed up. + assert_eq!(store.entry_cache_first_index().unwrap(), 5); + } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 57f5fe158f5..63761321405 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -70,6 +70,7 @@ use crate::{ coprocessor::{RegionChangeEvent, RegionChangeReason}, store::{ cmd_resp::{bind_term, new_error}, + entry_storage::MAX_WARMED_UP_CACHE_KEEP_TIME, fsm::{ apply, store::{PollContext, StoreMeta}, @@ -1801,8 +1802,17 @@ where fn on_raft_log_fetched(&mut self, context: GetEntriesContext, res: Box) { let low = res.low; - // if the peer is not the leader anymore or being destroyed, ignore the result. - if !self.fsm.peer.is_leader() || self.fsm.peer.pending_remove { + // If the peer is not the leader anymore and it's not in entry cache warmup + // state, or it is being destroyed, ignore the result. + if !self.fsm.peer.is_leader() + && self + .fsm + .peer + .get_store() + .entry_cache_warmup_state() + .is_none() + || self.fsm.peer.pending_remove + { self.fsm.peer.mut_store().clean_async_fetch_res(low); return; } @@ -1810,6 +1820,19 @@ where if self.fsm.peer.term() != res.term { // term has changed, the result may be not correct. self.fsm.peer.mut_store().clean_async_fetch_res(low); + } else if self + .fsm + .peer + .get_store() + .entry_cache_warmup_state() + .is_some() + { + if self.fsm.peer.mut_store().maybe_warm_up_entry_cache(*res) { + self.fsm.peer.ack_transfer_leader_msg(false); + self.fsm.has_ready = true; + } + self.fsm.peer.mut_store().clean_async_fetch_res(low); + return; } else { self.fsm .peer @@ -3216,10 +3239,13 @@ where } } } - } else { - self.fsm - .peer - .execute_transfer_leader(self.ctx, msg.get_from(), peer_disk_usage, false); + } else if !self + .fsm + .peer + .maybe_reject_transfer_leader_msg(self.ctx, msg, peer_disk_usage) + && self.fsm.peer.pre_ack_transfer_leader_msg(self.ctx, msg) + { + self.fsm.peer.ack_transfer_leader_msg(false); } } @@ -3757,6 +3783,14 @@ where } fn on_ready_compact_log(&mut self, first_index: u64, state: RaftTruncatedState) { + // Since this peer may be warming up the entry cache, log compaction should be + // temporarily skipped. Otherwise, the warmup task may fail. + if let Some(state) = self.fsm.peer.mut_store().entry_cache_warmup_state_mut() { + if !state.check_stale(MAX_WARMED_UP_CACHE_KEEP_TIME) { + return; + } + } + let total_cnt = self.fsm.peer.last_applying_idx - first_index; // the size of current CompactLog command can be ignored. let remain_cnt = self.fsm.peer.last_applying_idx - state.get_index() - 1; @@ -6157,14 +6191,7 @@ where if term != self.fsm.peer.term() { return; } - // As the leader can propose the TransferLeader request successfully, the disk - // of the leader is probably not full. - self.fsm.peer.execute_transfer_leader( - self.ctx, - self.fsm.peer.leader_id(), - DiskUsage::Normal, - true, - ); + self.fsm.peer.ack_transfer_leader_msg(true); self.fsm.has_ready = true; } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index af877e14b46..14d8d7e97cc 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -78,6 +78,11 @@ make_auto_flush_static_metric! { fetch_unused, } + pub label_enum WarmUpEntryCacheType { + started, + timeout, + finished, + } pub label_enum RaftEventDurationType { compact_check, @@ -103,6 +108,11 @@ make_auto_flush_static_metric! { pub struct RaftEntryFetches : LocalIntCounter { "type" => RaftEntryType } + + pub struct WarmUpEntryCacheCounter : LocalIntCounter { + "type" => WarmUpEntryCacheType + } + pub struct SnapCf : LocalHistogram { "type" => CfNames, } @@ -614,6 +624,15 @@ lazy_static! { exponential_buckets(0.0005, 2.0, 21).unwrap() // 500us ~ 8.7m ).unwrap(); + pub static ref WARM_UP_ENTRY_CACHE_COUNTER_VEC: IntCounterVec = + register_int_counter_vec!( + "tikv_raftstore_prefill_entry_cache_total", + "Total number of prefill entry cache.", + &["type"] + ).unwrap(); + pub static ref WARM_UP_ENTRY_CACHE_COUNTER: WarmUpEntryCacheCounter = + auto_flush_from!(WARM_UP_ENTRY_CACHE_COUNTER_VEC, WarmUpEntryCacheCounter); + pub static ref LEADER_MISSING: IntGauge = register_int_gauge!( "tikv_raftstore_leader_missing", diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index f67c3a28800..b06eb5c0c3f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1024,7 +1024,7 @@ where /// region buckets. pub region_buckets: Option, pub last_region_buckets: Option, - /// lead_transferee if the peer is in a leadership transferring. + /// lead_transferee if this peer(leader) is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, // Used as the memory state for Flashback to reject RW/Schedule before proposing. @@ -2249,6 +2249,8 @@ where self.require_updating_max_ts(&ctx.pd_scheduler); // Init the in-memory pessimistic lock table when the peer becomes leader. self.activate_in_memory_pessimistic_locks(); + // Exit entry cache warmup state when the peer becomes leader. + self.mut_store().clear_entry_cache_warmup_state(); if !ctx.store_disk_usages.is_empty() { self.refill_disk_full_peers(ctx); @@ -3721,10 +3723,12 @@ where // Broadcast heartbeat to make sure followers commit the entries immediately. // It's only necessary to ping the target peer, but ping all for simplicity. self.raft_group.ping(); + let mut msg = eraftpb::Message::new(); msg.set_to(peer.get_id()); msg.set_msg_type(eraftpb::MessageType::MsgTransferLeader); msg.set_from(self.peer_id()); + msg.set_index(self.get_store().entry_cache_first_index().unwrap_or(0)); // log term here represents the term of last log. For leader, the term of last // log is always its current term. Not just set term because raft library // forbids setting it for MsgTransferLeader messages. @@ -4384,33 +4388,95 @@ where Ok(Either::Left(propose_index)) } - pub fn execute_transfer_leader( + pub fn maybe_reject_transfer_leader_msg( &mut self, ctx: &mut PollContext, - from: u64, + msg: &eraftpb::Message, peer_disk_usage: DiskUsage, - reply_cmd: bool, // whether it is a reply to a TransferLeader command - ) { + ) -> bool { let pending_snapshot = self.is_handling_snapshot() || self.has_pending_snapshot(); if pending_snapshot - || from != self.leader_id() + || msg.get_from() != self.leader_id() // Transfer leader to node with disk full will lead to write availablity downback. // But if the current leader is disk full, and send such request, we should allow it, // because it may be a read leader balance request. || (!matches!(ctx.self_disk_usage, DiskUsage::Normal) && - matches!(peer_disk_usage,DiskUsage::Normal)) + matches!(peer_disk_usage, DiskUsage::Normal)) { info!( "reject transferring leader"; "region_id" => self.region_id, "peer_id" => self.peer.get_id(), - "from" => from, + "from" => msg.get_from(), "pending_snapshot" => pending_snapshot, "disk_usage" => ?ctx.self_disk_usage, ); - return; + return true; + } + false + } + + /// Before ack the transfer leader message sent by the leader. + /// Currently, it only warms up the entry cache in this stage. + /// + /// This return whether the msg should be acked. When cache is warmed up + /// or the warmup operation is timeout, it is true. + pub fn pre_ack_transfer_leader_msg( + &mut self, + ctx: &mut PollContext, + msg: &eraftpb::Message, + ) -> bool { + if !ctx.cfg.warmup_entry_cache_enabled() { + return true; } + // The start index of warmup range. It is leader's entry_cache_first_index, + // which in general is equal to the lowest matched index. + let mut low = msg.get_index(); + let last_index = self.get_store().last_index(); + let mut should_ack_now = false; + + // Need not to warm up when the index is 0. + // There are two cases where index can be 0: + // 1. During rolling upgrade, old instances may not support warmup. + // 2. The leader's entry cache is empty. + if low == 0 || low > last_index { + // There is little possibility that the warmup_range_start + // is larger than the last index. Check the test case + // `test_when_warmup_range_start_is_larger_than_last_index` + // for details. + should_ack_now = true; + } else { + if low < self.last_compacted_idx { + low = self.last_compacted_idx + }; + // Check if the entry cache is already warmed up. + if let Some(first_index) = self.get_store().entry_cache_first_index() { + if low >= first_index { + fail_point!("entry_cache_already_warmed_up"); + should_ack_now = true; + } + } + } + + if should_ack_now { + return true; + } + + // Check if the warmup operation is timeout if warmup is already started. + if let Some(state) = self.mut_store().entry_cache_warmup_state_mut() { + // If it is timeout, this peer should ack the message so that + // the leadership transfer process can continue. + state.check_task_timeout(ctx.cfg.max_entry_cache_warmup_duration.0) + } else { + self.mut_store().async_warm_up_entry_cache(low).is_none() + } + } + + pub fn ack_transfer_leader_msg( + &mut self, + reply_cmd: bool, // whether it is a reply to a TransferLeader command + ) { let mut msg = eraftpb::Message::new(); msg.set_from(self.peer_id()); msg.set_to(self.leader_id()); @@ -4431,10 +4497,23 @@ where /// /// 1. pre_transfer_leader on leader: /// Leader will send a MsgTransferLeader to follower. - /// 2. execute_transfer_leader on follower - /// If follower passes all necessary checks, it will reply an - /// ACK with type MsgTransferLeader and its promised persistent index. - /// 3. ready_to_transfer_leader on leader: + /// 2. pre_ack_transfer_leader_msg on follower: + /// If follower passes all necessary checks, it will try to warmup + /// the entry cache. + /// 3. ack_transfer_leader_msg on follower: + /// When the entry cache has been warmed up or the operator is timeout, + /// the follower reply an ACK with type MsgTransferLeader and + /// its promised persistent index. + /// + /// Additional steps when there are remaining pessimistic + /// locks to propose (detected in function on_transfer_leader_msg). + /// 1. Leader firstly proposes pessimistic locks and then proposes a + /// TransferLeader command. + /// 2. ack_transfer_leader_msg on follower again: + /// The follower applies the TransferLeader command and replies an + /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// + /// 4. ready_to_transfer_leader on leader: /// Leader checks if it's appropriate to transfer leadership. If it /// does, it calls raft transfer_leader API to do the remaining work. /// diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 7a932d324f0..c097b22222d 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1139,6 +1139,23 @@ impl Cluster { } } + pub fn wait_applied_index(&mut self, region_id: u64, store_id: u64, index: u64) { + let timer = Instant::now(); + loop { + let applied_index = self.apply_state(region_id, store_id).applied_index; + if applied_index >= index { + return; + } + if timer.saturating_elapsed() >= Duration::from_secs(5) { + panic!( + "[region {}] log is still not applied to {}: {} on store {}", + region_id, index, applied_index, store_id, + ); + } + thread::sleep(Duration::from_millis(10)); + } + } + pub fn wait_tombstone(&self, region_id: u64, peer: metapb::Peer, check_exist: bool) { let timer = Instant::now(); let mut state; diff --git a/components/test_raftstore/src/common-test.toml b/components/test_raftstore/src/common-test.toml index 6b179081def..50e62f67d28 100644 --- a/components/test_raftstore/src/common-test.toml +++ b/components/test_raftstore/src/common-test.toml @@ -65,6 +65,7 @@ raft-store-max-leader-lease = "240ms" allow-remove-leader = true merge-check-tick-interval = "100ms" pd-heartbeat-tick-interval = "20ms" +max-entry-cache-warmup-duration = "0ms" dev-assert = true hibernate-regions = true store-io-pool-size = 0 diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 556549b8141..cc6b043f0e5 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -6,14 +6,19 @@ use std::{ time::Duration, }; +use crossbeam::channel; use engine_traits::CF_LOCK; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use pd_client::PdClient; +use raft::eraftpb::MessageType; use test_raftstore::*; use tikv::storage::Snapshot; -use tikv_util::HandyRwLock; +use tikv_util::{ + config::{ReadableDuration, ReadableSize}, + HandyRwLock, +}; use txn_types::{Key, PessimisticLock}; /// When a follower applies log slowly, leader should not transfer leader @@ -332,3 +337,256 @@ fn test_read_lock_after_become_follower() { // PessimisticLockNotFound. assert!(resp.get_region_error().has_stale_command()); } + +/// This function does the following things +/// +/// 0. Transfer the region's(id=1) leader to store 1. +/// 1. Inserted 5 entries and make all stores commit and apply them. +/// 2. Prevent the store 3 from append following logs. +/// 3. Insert another 20 entries. +/// 4. Wait for some time so that part of the entry cache are compacted +/// on the leader(store 1). +fn run_cluster_for_test_warmup_entry_cache(cluster: &mut Cluster) { + // Let the leader compact the entry cache. + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); + cluster.run(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + + for i in 1..5u32 { + let k = i.to_string().into_bytes(); + let v = k.clone(); + cluster.must_put(&k, &v); + must_get_equal(&cluster.get_engine(3), &k, &v); + } + + // Let store 3 fall behind. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 3).direction(Direction::Recv), + )); + + for i in 1..20u32 { + let k = i.to_string().into_bytes(); + let v = k.clone(); + cluster.must_put(&k, &v); + must_get_equal(&cluster.get_engine(2), &k, &v); + } + + // Wait until part of the leader's entry cache is compacted. + sleep_ms(cluster.cfg.raft_store.raft_log_gc_tick_interval.as_millis() * 2); +} + +fn prevent_from_gc_raft_log(cluster: &mut Cluster) { + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100000); + cluster.cfg.raft_store.raft_log_gc_threshold = 1000; + cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); + cluster.cfg.raft_store.raft_log_reserve_max_ticks = 20; +} + +fn run_cluster_and_warm_up_cache_for_store2() -> Cluster { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); + prevent_from_gc_raft_log(&mut cluster); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + + let (sx, rx) = channel::unbounded(); + let recv_filter = Box::new( + RegionPacketFilter::new(1, 1) + .direction(Direction::Recv) + .msg_type(MessageType::MsgTransferLeader) + .set_msg_callback(Arc::new(move |m| { + sx.send(m.get_message().get_from()).unwrap(); + })), + ); + cluster.sim.wl().add_recv_filter(1, recv_filter); + + let (sx2, rx2) = channel::unbounded(); + fail::cfg_callback("on_entry_cache_warmed_up", move || sx2.send(true).unwrap()).unwrap(); + cluster.transfer_leader(1, new_peer(2, 2)); + + // Cache should be warmed up. + assert!(rx2.recv_timeout(Duration::from_millis(500)).unwrap()); + // It should ack the message just after cache is warmed up. + assert_eq!(rx.recv_timeout(Duration::from_millis(500)).unwrap(), 2); + cluster.sim.wl().clear_recv_filters(1); + cluster +} + +/// Leader should carry a correct index in TransferLeaderMsg so that +/// the follower can warm up the entry cache with this index. +#[test] +fn test_transfer_leader_msg_index() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.raft_entry_cache_life_time = ReadableDuration::secs(1000); + prevent_from_gc_raft_log(&mut cluster); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + + let (sx, rx) = channel::unbounded(); + let recv_filter = Box::new( + RegionPacketFilter::new(1, 2) + .direction(Direction::Recv) + .msg_type(MessageType::MsgTransferLeader) + .set_msg_callback(Arc::new(move |m| { + sx.send(m.get_message().get_index()).unwrap(); + })), + ); + cluster.sim.wl().add_recv_filter(2, recv_filter); + + // TransferLeaderMsg.index should be equal to the store3's replicated_index. + cluster.transfer_leader(1, new_peer(2, 2)); + let replicated_index = cluster.raft_local_state(1, 3).last_index; + assert_eq!( + rx.recv_timeout(Duration::from_secs(2)).unwrap(), + replicated_index, + ); +} + +/// The store should ack the transfer leader msg immediately +/// when the warmup range start is larger than it's last index. +#[test] +fn test_when_warmup_range_start_is_larger_than_last_index() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.raft_entry_cache_life_time = ReadableDuration::secs(1000); + prevent_from_gc_raft_log(&mut cluster); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + cluster.pd_client.disable_default_operator(); + + let s4 = cluster.add_new_engine(); + + // Prevent peer 4 from appending logs, so it's last index should + // be really small. + let recv_filter_s4 = Box::new( + RegionPacketFilter::new(1, s4) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppend), + ); + cluster.sim.wl().add_recv_filter(s4, recv_filter_s4); + + let (sx, rx) = channel::unbounded(); + let recv_filter_1 = Box::new( + RegionPacketFilter::new(1, 1) + .direction(Direction::Recv) + .msg_type(MessageType::MsgTransferLeader) + .set_msg_callback(Arc::new(move |m| { + sx.send(m.get_message().get_from()).unwrap(); + })), + ); + cluster.sim.wl().add_recv_filter(1, recv_filter_1); + + cluster.pd_client.must_add_peer(1, new_peer(s4, s4)); + cluster.transfer_leader(1, new_peer(s4, s4)); + // Store(s4) should ack the transfer leader msg immediately. + assert_eq!(rx.recv_timeout(Duration::from_millis(500)).unwrap(), s4); +} + +/// When the start index of warmup range is compacted, the follower should +/// still warm up and use the compacted_idx as the start index. +#[test] +fn test_when_warmup_range_start_is_compacted() { + let mut cluster = new_node_cluster(0, 3); + // GC raft log aggressively. + cluster.cfg.raft_store.merge_max_log_gap = 1; + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(5); + cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + cluster.pd_client.disable_default_operator(); + + // Case `test_transfer_leader_msg_index` already proves that + // the warmup_range_start is equal to the replicated_index. + let warmup_range_start = cluster.raft_local_state(1, 3).last_index; + cluster.wait_log_truncated(1, 2, warmup_range_start + 10); + let s2_truncated_index = cluster.truncated_state(1, 2).get_index(); + let s2_last_index = cluster.raft_local_state(1, 2).last_index; + assert!(warmup_range_start < s2_truncated_index); + assert!(s2_truncated_index + 5 <= s2_last_index); + + // Cache should be warmed up successfully. + let (sx, rx) = channel::unbounded(); + fail::cfg_callback("on_entry_cache_warmed_up", move || sx.send(true).unwrap()).unwrap(); + cluster.transfer_leader(1, new_peer(2, 2)); + rx.recv_timeout(Duration::from_millis(500)).unwrap(); +} + +/// Transfer leader should work as normal when disable warming up entry cache. +#[test] +fn test_turnoff_warmup_entry_cache() { + let mut cluster = new_node_cluster(0, 3); + prevent_from_gc_raft_log(&mut cluster); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(0); + fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); + cluster.must_transfer_leader(1, new_peer(2, 2)); +} + +/// When the follower has not warmed up the entry cache and the timeout of +/// warmup is very long, then the leadership transfer can never succeed. +#[test] +fn test_when_warmup_fail_and_its_timeout_is_too_long() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); + prevent_from_gc_raft_log(&mut cluster); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + + fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); + cluster.transfer_leader(1, new_peer(2, 2)); + // Theoretically, the leader transfer can't succeed unless it sleeps + // max_entry_cache_warmup_duration. + sleep_ms(50); + let leader = cluster.leader_of_region(1).unwrap(); + assert_eq!(leader.get_id(), 1); +} + +/// When the follower has not warmed up the entry cache and the timeout of +/// warmup is pretty short, then the leadership transfer should succeed quickly. +#[test] +fn test_when_warmup_fail_and_its_timeout_is_short() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::millis(10); + prevent_from_gc_raft_log(&mut cluster); + run_cluster_for_test_warmup_entry_cache(&mut cluster); + + fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); + cluster.must_transfer_leader(1, new_peer(2, 2)); +} + +/// The follower should ack the msg when the cache is warmed up. +/// Besides, the cache should be kept for a period of time. +#[test] +fn test_when_warmup_succeed_and_become_leader() { + let mut cluster = run_cluster_and_warm_up_cache_for_store2(); + + // Generally, the cache will be compacted during post_apply. + // However, if the cache is warmed up recently, the cache should be kept. + let applied_index = cluster.apply_state(1, 2).applied_index; + cluster.must_put(b"kk1", b"vv1"); + cluster.wait_applied_index(1, 2, applied_index + 1); + + // It should ack the message when cache is already warmed up. + // It needs not to fetch raft log anymore. + fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); + cluster.sim.wl().clear_recv_filters(1); + cluster.must_transfer_leader(1, new_peer(2, 2)); +} + +/// The follower should exit warmup state if it does not become leader +/// in a period of time. +#[test] +fn test_when_warmup_succeed_and_not_become_leader() { + let mut cluster = run_cluster_and_warm_up_cache_for_store2(); + + let (sx, rx) = channel::unbounded(); + fail::cfg_callback("worker_async_fetch_raft_log", move || { + sx.send(true).unwrap() + }) + .unwrap(); + fail::cfg("entry_cache_warmed_up_state_is_stale", "return").unwrap(); + + // Since the warmup state is stale, the peer should exit warmup state, + // and the entry cache should be compacted during post_apply. + let applied_index = cluster.apply_state(1, 2).applied_index; + cluster.must_put(b"kk1", b"vv1"); + cluster.wait_applied_index(1, 2, applied_index + 1); + // The peer should warm up cache again when it receives a new TransferLeaderMsg. + cluster.transfer_leader(1, new_peer(2, 2)); + assert!(rx.recv_timeout(Duration::from_millis(500)).unwrap()); +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index e2d5ef06b6e..90524079bfa 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -248,6 +248,7 @@ fn test_serde_custom_tikv_config() { report_region_buckets_tick_interval: ReadableDuration::secs(1234), check_long_uncommitted_interval: ReadableDuration::secs(1), long_uncommitted_base_threshold: ReadableDuration::secs(1), + max_entry_cache_warmup_duration: ReadableDuration::secs(2), max_snapshot_file_raw_size: ReadableSize::gb(10), unreachable_backoff: ReadableDuration::secs(111), check_peers_availability_interval: ReadableDuration::secs(30), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 097ebd612cd..17f82f9eb87 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -222,6 +222,7 @@ report-min-resolved-ts-interval = "233ms" report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" unreachable-backoff = "111s" +max-entry-cache-warmup-duration = "2s" [coprocessor] split-region-on-table = false diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index 9f2e564341f..b0fade84d8b 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -19,7 +19,12 @@ fn test_basic_transfer_leader(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_heartbeat_ticks = 20; let reserved_time = Duration::from_millis( cluster.cfg.raft_store.raft_base_tick_interval.as_millis() - * cluster.cfg.raft_store.raft_heartbeat_ticks as u64, + * cluster.cfg.raft_store.raft_heartbeat_ticks as u64 + + cluster + .cfg + .raft_store + .max_entry_cache_warmup_duration + .as_millis(), ); cluster.run(); From c74c8ca907632263a0cfcd84b8413e9edc5c19e5 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Wed, 26 Oct 2022 10:13:56 +0800 Subject: [PATCH 0287/1149] raftstore: introduce an observer to control write apply state (#13609) close tikv/tikv#12849 introduce an observer to control write apply state Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- .../raftstore/src/coprocessor/dispatcher.rs | 39 +++++++++++++++++++ components/raftstore/src/coprocessor/mod.rs | 7 ++++ components/raftstore/src/store/fsm/apply.rs | 16 ++++++-- tests/integrations/raftstore/test_snap.rs | 1 + 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index df7794c3701..99228aef44c 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -655,6 +655,20 @@ impl CoprocessorHost { true } + /// Should be called everytime before we want to write apply state when + /// applying. Return a bool which indicates whether we can actually do + /// this write. + pub fn pre_write_apply_state(&self, region: &Region) -> bool { + let mut ctx = ObserverContext::new(region); + for observer in &self.registry.region_change_observers { + let observer = observer.observer.inner(); + if !observer.pre_write_apply_state(&mut ctx) { + return false; + } + } + true + } + pub fn on_flush_applied_cmd_batch( &self, max_level: ObserveLevel, @@ -763,6 +777,8 @@ mod tests { PostApplySnapshot = 21, ShouldPreApplySnapshot = 22, OnUpdateSafeTs = 23, + PrePersist = 24, + PreWriteApplyState = 25, } impl Coprocessor for TestCoprocessor {} @@ -911,6 +927,25 @@ mod tests { .fetch_add(ObserverIndex::OnRegionChanged as usize, Ordering::SeqCst); ctx.bypass = self.bypass.load(Ordering::SeqCst); } + + fn pre_persist( + &self, + ctx: &mut ObserverContext<'_>, + _: bool, + _: Option<&RaftCmdRequest>, + ) -> bool { + self.called + .fetch_add(ObserverIndex::PrePersist as usize, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + true + } + + fn pre_write_apply_state(&self, ctx: &mut ObserverContext<'_>) -> bool { + self.called + .fetch_add(ObserverIndex::PreWriteApplyState as usize, Ordering::SeqCst); + ctx.bypass = self.bypass.load(Ordering::SeqCst); + true + } } impl ApplySnapshotObserver for TestCoprocessor { @@ -1132,6 +1167,10 @@ mod tests { host.on_update_safe_ts(1, 1, 1); index += ObserverIndex::OnUpdateSafeTs as usize; assert_all!([&ob.called], &[index]); + + host.pre_write_apply_state(®ion); + index += ObserverIndex::PreWriteApplyState as usize; + assert_all!([&ob.called], &[index]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 35330701a95..7ac783c0d6d 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -321,6 +321,13 @@ pub trait RegionChangeObserver: Coprocessor { ) -> bool { true } + + /// Should be called everytime before we want to write apply state when + /// applying. Return a bool which indicates whether we can actually do + /// this write. + fn pre_write_apply_state(&self, _: &mut ObserverContext<'_>) -> bool { + true + } } #[derive(Clone, Debug, Default)] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index dae732797b1..a9124dc2faf 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -500,7 +500,7 @@ where /// `finish_for`. pub fn commit(&mut self, delegate: &mut ApplyDelegate) { if delegate.last_flush_applied_index < delegate.apply_state.get_applied_index() { - delegate.write_apply_state(self.kv_wb_mut()); + delegate.maybe_write_apply_state(self); } self.commit_opt(delegate, true); } @@ -621,7 +621,7 @@ where ) { if self.host.pre_persist(&delegate.region, true, None) { if !delegate.pending_remove { - delegate.write_apply_state(self.kv_wb_mut()); + delegate.maybe_write_apply_state(self); } self.commit_opt(delegate, false); } else { @@ -1101,6 +1101,13 @@ where }); } + fn maybe_write_apply_state(&self, apply_ctx: &mut ApplyContext) { + let can_write = apply_ctx.host.pre_write_apply_state(&self.region); + if can_write { + self.write_apply_state(apply_ctx.kv_wb_mut()); + } + } + fn handle_raft_entry_normal( &mut self, apply_ctx: &mut ApplyContext, @@ -1285,6 +1292,9 @@ where .applied_batch .push(cmd_cb, cmd, &self.observe_info, self.region_id()); if should_write { + // An observer shall prevent a write_apply_state here by not return true + // when `post_exec`. + self.write_apply_state(apply_ctx.kv_wb_mut()); apply_ctx.commit(self); } exec_result @@ -3741,7 +3751,7 @@ where if apply_ctx.timer.is_none() { apply_ctx.timer = Some(Instant::now_coarse()); } - self.delegate.write_apply_state(apply_ctx.kv_wb_mut()); + self.delegate.maybe_write_apply_state(apply_ctx); fail_point!( "apply_on_handle_snapshot_1_1", self.delegate.id == 1 && self.delegate.region_id() == 1, diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 2bc05726bfc..8d3212ad4a6 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -514,6 +514,7 @@ fn test_inspected_snapshot() { assert_ne!(stats.fetch(IoType::Replication, IoOp::Write), 0); pd_client.must_remove_peer(1, new_peer(2, 2)); + must_get_none(&cluster.get_engine(2), b"k2"); assert_eq!(stats.fetch(IoType::LoadBalance, IoOp::Read), 0); assert_eq!(stats.fetch(IoType::LoadBalance, IoOp::Write), 0); pd_client.must_add_peer(1, new_peer(2, 2)); From a4dc37b0c07ee1b5bc7c60a5d8360666500ecdb4 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Wed, 26 Oct 2022 11:33:57 +0800 Subject: [PATCH 0288/1149] storage, lock_manager: Use the new lock waiting queue instead of WaiterManager to handle pessimistic lock waking up (#13447) ref tikv/tikv#13298 Updates the write path of acquiring lock and releasing lock to make use of the new `LockWaitQueue`. Some important points are: 1. `WriteResultLockInfo` (returned by `AcquirePessimisticLock::process_write`) carries parameters, which can be used for resuming the request in the future. 2. `WriteResultLockInfo` will be converted into `LockWaitContext` and `LockWaitEntry`, and then send to both `LockManager` and the new `LockWaitQueues`. 3. When a storage command releases some locks, will return the released locks to `Scheduler::process_write`, which will then call `on_release_locks` to pop lock waiting entries from the queues and wake up them asynchronously (to avoid increasing too much latency of the current command). 4. The `LockManager` (and its inner module `WaiterManager`) no longer has the responsibility for waking up waiters, but keeps its functionality of handling timeout and performing deadlock detection. Instead, it has a new `remove_lock_wait` method to remove a waiter from it. 5. Waiters in `WaiterManager` can now be uniquely identified by a `LockWaitToken`, and the data structure in `WaiterManager` is therefore changed. Accessing by lock hash and transaction ts is still necessary to handle the result of deadlock detection. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot Co-authored-by: Yilin Chen --- Cargo.lock | 8 +- components/resolved_ts/src/cmd.rs | 5 +- components/test_coprocessor/src/fixture.rs | 4 +- components/test_coprocessor/src/store.rs | 6 +- components/test_storage/src/sync_storage.rs | 10 +- metrics/grafana/tikv_details.json | 9 +- src/config.rs | 6 +- src/coprocessor_v2/raw_storage_impl.rs | 6 +- src/import/duplicate_detect.rs | 8 +- src/server/gc_worker/gc_worker.rs | 16 +- src/server/lock_manager/config.rs | 31 +- src/server/lock_manager/deadlock.rs | 129 +- src/server/lock_manager/metrics.rs | 8 +- src/server/lock_manager/mod.rs | 269 +++-- src/server/lock_manager/waiter_manager.rs | 1044 +++++++---------- src/server/server.rs | 4 +- src/storage/lock_manager/lock_wait_context.rs | 16 +- .../lock_manager/lock_waiting_queue.rs | 24 +- src/storage/lock_manager/mod.rs | 145 ++- src/storage/mod.rs | 561 ++++----- src/storage/mvcc/txn.rs | 24 +- src/storage/txn/actions/check_txn_status.rs | 6 +- src/storage/txn/actions/commit.rs | 2 +- .../txn/commands/acquire_pessimistic_lock.rs | 49 +- src/storage/txn/commands/atomic_store.rs | 9 +- .../txn/commands/check_secondary_locks.rs | 13 +- src/storage/txn/commands/check_txn_status.rs | 14 +- src/storage/txn/commands/cleanup.rs | 4 +- src/storage/txn/commands/commit.rs | 4 +- src/storage/txn/commands/compare_and_swap.rs | 11 +- .../txn/commands/flashback_to_version.rs | 5 +- src/storage/txn/commands/mod.rs | 78 +- src/storage/txn/commands/pause.rs | 5 +- .../txn/commands/pessimistic_rollback.rs | 10 +- src/storage/txn/commands/prewrite.rs | 55 +- src/storage/txn/commands/resolve_lock.rs | 13 +- src/storage/txn/commands/resolve_lock_lite.rs | 4 +- src/storage/txn/commands/rollback.rs | 4 +- src/storage/txn/commands/txn_heart_beat.rs | 11 +- src/storage/txn/scheduler.rs | 239 +++- tests/failpoints/cases/test_storage.rs | 43 +- tests/failpoints/cases/test_transaction.rs | 14 +- tests/failpoints/cases/test_ttl.rs | 4 +- .../config/dynamic/pessimistic_txn.rs | 82 +- .../resource_metering/test_suite/mod.rs | 15 +- 45 files changed, 1552 insertions(+), 1475 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14951b8e253..a6b25808098 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1305,9 +1305,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.1.0" +version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0834a35a3fce649144119e18da2a4d8ed12ef3862f47183fd46f625d072d96c" +checksum = "4c8858831f7781322e539ea39e72449c46b059638250c14344fec8d0aa6e539c" dependencies = [ "cfg-if 1.0.0", "num_cpus", @@ -5297,9 +5297,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" [[package]] name = "smartstring" diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 89d7167cc26..a1468e15bab 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -300,10 +300,9 @@ mod tests { }; use tikv::storage::{ kv::{MockEngineBuilder, TestEngineBuilder}, - lock_manager::DummyLockManager, mvcc::{tests::write, Mutation, MvccTxn, SnapshotReader}, txn::{ - commands::one_pc_commit_ts, prewrite, tests::*, CommitKind, TransactionKind, + commands::one_pc_commit, prewrite, tests::*, CommitKind, TransactionKind, TransactionProperties, }, Engine, @@ -426,7 +425,7 @@ mod tests { SkipPessimisticCheck, ) .unwrap(); - one_pc_commit_ts(true, &mut txn, 10.into(), &DummyLockManager); + one_pc_commit(true, &mut txn, 10.into()); write(&engine, &Default::default(), txn.into_modifies()); let one_pc_row = engine .take_last_modifies() diff --git a/components/test_coprocessor/src/fixture.rs b/components/test_coprocessor/src/fixture.rs index 23fc877a996..a53ba4500bc 100644 --- a/components/test_coprocessor/src/fixture.rs +++ b/components/test_coprocessor/src/fixture.rs @@ -12,7 +12,7 @@ use tikv::{ read_pool::ReadPool, server::Config, storage::{ - kv::RocksEngine, lock_manager::DummyLockManager, Engine, TestEngineBuilder, + kv::RocksEngine, lock_manager::MockLockManager, Engine, TestEngineBuilder, TestStorageBuilderApiV1, }, }; @@ -79,7 +79,7 @@ pub fn init_data_with_details( commit: bool, cfg: &Config, ) -> (Store, Endpoint, Arc) { - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); let mut store = Store::from_storage(storage); diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index f19b0a113bd..278e210bc98 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -13,7 +13,7 @@ use tikv::{ server::gc_worker::GcConfig, storage::{ kv::{Engine, RocksEngine}, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, txn::FixtureStore, SnapshotStore, StorageApiV1, TestStorageBuilderApiV1, }, @@ -116,7 +116,7 @@ pub struct Store { impl Store { pub fn new() -> Self { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); Self::from_storage(storage) @@ -130,7 +130,7 @@ impl Default for Store { } impl Store { - pub fn from_storage(storage: StorageApiV1) -> Self { + pub fn from_storage(storage: StorageApiV1) -> Self { Self { store: SyncTestStorageApiV1::from_storage(0, storage, GcConfig::default()).unwrap(), current_ts: 1.into(), diff --git a/components/test_storage/src/sync_storage.rs b/components/test_storage/src/sync_storage.rs index faa84944eca..fa53688ea75 100644 --- a/components/test_storage/src/sync_storage.rs +++ b/components/test_storage/src/sync_storage.rs @@ -19,7 +19,7 @@ use raftstore::{ use tikv::{ server::gc_worker::{AutoGcConfig, GcConfig, GcSafePointProvider, GcWorker}, storage::{ - config::Config, kv::RocksEngine, lock_manager::DummyLockManager, test_util::GetConsumer, + config::Config, kv::RocksEngine, lock_manager::MockLockManager, test_util::GetConsumer, txn::commands, Engine, KvGetStatistics, PrewriteResult, Result, Storage, TestEngineBuilder, TestStorageBuilder, TxnStatus, }, @@ -87,7 +87,7 @@ impl SyncTestStorageBuilder { pub fn build(mut self, store_id: u64) -> Result> { let mut builder = TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr( self.engine.clone(), - DummyLockManager, + MockLockManager::new(), ); if let Some(config) = self.config.take() { builder = builder.config(config); @@ -107,7 +107,7 @@ impl SyncTestStorageBuilder { #[derive(Clone)] pub struct SyncTestStorage { gc_worker: GcWorker, - store: Storage, + store: Storage, } /// SyncTestStorage for Api V1 @@ -117,7 +117,7 @@ pub type SyncTestStorageApiV1 = SyncTestStorage; impl SyncTestStorage { pub fn from_storage( store_id: u64, - storage: Storage, + storage: Storage, config: GcConfig, ) -> Result { let (tx, _rx) = std::sync::mpsc::channel(); @@ -145,7 +145,7 @@ impl SyncTestStorage { .unwrap(); } - pub fn get_storage(&self) -> Storage { + pub fn get_storage(&self) -> Storage { self.store.clone() } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 9d64207c214..45a657cc4bb 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -35349,13 +35349,20 @@ "legendFormat": "{{type}}", "refId": "A", "step": 4 + }, + { + "expr": "sum(max_over_time(tikv_lock_wait_queue_entries_gauge_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (type)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Wait table", + "title": "Lock Waiting Queue", "tooltip": { "msResolution": false, "shared": true, diff --git a/src/config.rs b/src/config.rs index 9dcf17d17d5..c978b1bf90a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -4104,7 +4104,7 @@ mod tests { server::{config::ServerConfigManager, ttl::TtlCheckerTask}, storage::{ config_manager::StorageConfigManger, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, txn::flow_controller::{EngineFlowController, FlowController}, Storage, TestStorageBuilder, }, @@ -4494,7 +4494,7 @@ mod tests { fn new_engines( cfg: TikvConfig, ) -> ( - Storage, + Storage, ConfigController, ReceiverWrapper, Arc, @@ -4513,7 +4513,7 @@ mod tests { ) .unwrap(); let storage = - TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr(engine, DummyLockManager) + TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr(engine, MockLockManager::new()) .config(cfg.storage.clone()) .build() .unwrap(); diff --git a/src/coprocessor_v2/raw_storage_impl.rs b/src/coprocessor_v2/raw_storage_impl.rs index fc505c50312..9a57b4c8624 100644 --- a/src/coprocessor_v2/raw_storage_impl.rs +++ b/src/coprocessor_v2/raw_storage_impl.rs @@ -215,11 +215,11 @@ mod test { use kvproto::kvrpcpb::{ApiVersion, Context}; use super::*; - use crate::storage::{lock_manager::DummyLockManager, TestStorageBuilder}; + use crate::storage::{lock_manager::MockLockManager, TestStorageBuilder}; #[tokio::test] async fn test_storage_api() { - let storage = TestStorageBuilder::<_, _, ApiV2>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, ApiV2>::new(MockLockManager::new()) .build() .unwrap(); let ctx = Context { @@ -255,7 +255,7 @@ mod test { #[tokio::test] async fn test_storage_api_batch() { - let storage = TestStorageBuilder::<_, _, ApiV2>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, ApiV2>::new(MockLockManager::new()) .build() .unwrap(); let ctx = Context { diff --git a/src/import/duplicate_detect.rs b/src/import/duplicate_detect.rs index dbd819efbbf..b1eaecab881 100644 --- a/src/import/duplicate_detect.rs +++ b/src/import/duplicate_detect.rs @@ -239,7 +239,7 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::{DummyLockManager, LockManager}, + lock_manager::{LockManager, MockLockManager}, txn::commands, Storage, TestStorageBuilderApiV1, }; @@ -350,7 +350,7 @@ mod tests { #[test] fn test_duplicate_detect() { - let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let mut data = vec![]; @@ -408,7 +408,7 @@ mod tests { // (108,18) is not repeated with (108,10). #[test] fn test_duplicate_detect_incremental() { - let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); for &start in &[100, 104, 108, 112] { @@ -469,7 +469,7 @@ mod tests { #[test] fn test_duplicate_detect_rollback_and_delete() { - let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let data = vec![ diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 5b638a01f48..8e345f0909b 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1660,7 +1660,7 @@ mod tests { server::gc_worker::{MockSafePointProvider, PrefixedEngine}, storage::{ kv::{metrics::GcKeyMode, Modify, TestEngineBuilder, WriteData}, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, mvcc::{ tests::{must_get_none, must_get_none_on_region, must_get_on_region}, MAX_TXN_WRITE_SIZE, @@ -1738,7 +1738,7 @@ mod tests { /// Assert the data in `storage` is the same as `expected_data`. Keys in /// `expected_data` should be encoded form without ts. fn check_data( - storage: &Storage, + storage: &Storage, expected_data: &BTreeMap, Vec>, ) { let scan_res = block_on(storage.scan( @@ -1773,10 +1773,12 @@ mod tests { let store_id = 1; let engine = TestEngineBuilder::new().build().unwrap(); - let storage = - TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine.clone(), DummyLockManager) - .build() - .unwrap(); + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr( + engine.clone(), + MockLockManager::new(), + ) + .build() + .unwrap(); let gate = FeatureGate::default(); gate.set_version("5.0.0").unwrap(); @@ -1960,7 +1962,7 @@ mod tests { let prefixed_engine = PrefixedEngine(engine); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr( prefixed_engine.clone(), - DummyLockManager, + MockLockManager::new(), ) .build() .unwrap(); diff --git a/src/server/lock_manager/config.rs b/src/server/lock_manager/config.rs index aba08f3d2e7..6464c3cb1cd 100644 --- a/src/server/lock_manager/config.rs +++ b/src/server/lock_manager/config.rs @@ -3,7 +3,7 @@ use std::{ error::Error, sync::{ - atomic::{AtomicBool, Ordering}, + atomic::{AtomicBool, AtomicU64, Ordering}, Arc, }, }; @@ -80,6 +80,7 @@ pub struct LockManagerConfigManager { pub detector_scheduler: DeadlockScheduler, pub pipelined: Arc, pub in_memory: Arc, + pub wake_up_delay_duration_ms: Arc, } impl LockManagerConfigManager { @@ -88,29 +89,35 @@ impl LockManagerConfigManager { detector_scheduler: DeadlockScheduler, pipelined: Arc, in_memory: Arc, + wake_up_delay_duration_ms: Arc, ) -> Self { LockManagerConfigManager { waiter_mgr_scheduler, detector_scheduler, pipelined, in_memory, + wake_up_delay_duration_ms, } } } impl ConfigManager for LockManagerConfigManager { fn dispatch(&mut self, mut change: ConfigChange) -> Result<(), Box> { - match ( - change.remove("wait_for_lock_timeout").map(Into::into), - change.remove("wake_up_delay_duration").map(Into::into), - ) { - (timeout @ Some(_), delay) => { - self.waiter_mgr_scheduler.change_config(timeout, delay); - self.detector_scheduler.change_ttl(timeout.unwrap().into()); - } - (None, delay @ Some(_)) => self.waiter_mgr_scheduler.change_config(None, delay), - (None, None) => {} - }; + if let Some(p) = change.remove("wait_for_lock_timeout").map(Into::into) { + self.waiter_mgr_scheduler.change_config(Some(p)); + self.detector_scheduler.change_ttl(p.into()); + } + if let Some(p) = change + .remove("wake_up_delay_duration") + .map(ReadableDuration::from) + { + info!( + "Waiter manager config changed"; + "wake_up_delay_duration" => %p, + ); + self.wake_up_delay_duration_ms + .store(p.as_millis(), Ordering::Relaxed); + } if let Some(p) = change.remove("pipelined").map(Into::into) { self.pipelined.store(p, Ordering::Relaxed); } diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 4fee40138c1..9583df80dd6 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -46,7 +46,7 @@ use super::{ }; use crate::{ server::resolve::StoreAddrResolver, - storage::lock_manager::{DiagnosticContext, Lock}, + storage::lock_manager::{DiagnosticContext, KeyLockWaitInfo, LockDigest}, }; /// `Locks` is a set of locks belonging to one transaction. @@ -308,11 +308,11 @@ impl DetectTable { } /// Removes the corresponding wait_for_entry. - fn clean_up_wait_for(&mut self, txn_ts: TimeStamp, lock_ts: TimeStamp, lock_hash: u64) { + fn clean_up_wait_for(&mut self, txn_ts: TimeStamp, lock_digest: LockDigest) { if let Some(wait_for) = self.wait_for_map.get_mut(&txn_ts) { - if let Some(locks) = wait_for.get_mut(&lock_ts) { - if locks.remove(lock_hash) { - wait_for.remove(&lock_ts); + if let Some(locks) = wait_for.get_mut(&lock_digest.ts) { + if locks.remove(lock_digest.hash) { + wait_for.remove(&lock_digest.ts); if wait_for.is_empty() { self.wait_for_map.remove(&txn_ts); } @@ -396,7 +396,7 @@ pub enum Task { Detect { tp: DetectType, txn_ts: TimeStamp, - lock: Lock, + wait_info: Option, // Only valid when `tp == Detect`. diag_ctx: DiagnosticContext, }, @@ -424,11 +424,14 @@ impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Task::Detect { - tp, txn_ts, lock, .. + tp, + txn_ts, + wait_info, + .. } => write!( f, - "Detect {{ tp: {:?}, txn_ts: {}, lock: {:?} }}", - tp, txn_ts, lock + "Detect {{ tp: {:?}, txn_ts: {}, wait_info: {:?} }}", + tp, txn_ts, wait_info ), Task::DetectRpc { .. } => write!(f, "Detect Rpc"), Task::ChangeRole(role) => write!(f, "ChangeRole {{ role: {:?} }}", role), @@ -459,20 +462,26 @@ impl Scheduler { } } - pub fn detect(&self, txn_ts: TimeStamp, lock: Lock, diag_ctx: DiagnosticContext) { + pub fn detect( + &self, + txn_ts: TimeStamp, + wait_info: KeyLockWaitInfo, + diag_ctx: DiagnosticContext, + ) { + // TODO: Support detect many keys in a batch self.notify_scheduler(Task::Detect { tp: DetectType::Detect, txn_ts, - lock, + wait_info: Some(wait_info), diag_ctx, }); } - pub fn clean_up_wait_for(&self, txn_ts: TimeStamp, lock: Lock) { + pub fn clean_up_wait_for(&self, start_ts: TimeStamp, wait_info: KeyLockWaitInfo) { self.notify_scheduler(Task::Detect { tp: DetectType::CleanUpWaitFor, - txn_ts, - lock, + txn_ts: start_ts, + wait_info: Some(wait_info), diag_ctx: DiagnosticContext::default(), }); } @@ -481,7 +490,7 @@ impl Scheduler { self.notify_scheduler(Task::Detect { tp: DetectType::CleanUp, txn_ts, - lock: Lock::default(), + wait_info: None, diag_ctx: DiagnosticContext::default(), }); } @@ -785,13 +794,14 @@ where let (send, recv) = leader_client.register_detect_handler(Box::new(move |mut resp| { let entry = resp.take_entry(); let txn = entry.txn.into(); - let lock = Lock { + let lock = LockDigest { ts: entry.wait_for_txn.into(), hash: entry.key_hash, }; let mut wait_chain: Vec<_> = resp.take_wait_chain().into(); + let key = entry.get_key().to_vec(); wait_chain.push(entry); - waiter_mgr_scheduler.deadlock(txn, lock, resp.get_deadlock_key_hash(), wait_chain) + waiter_mgr_scheduler.deadlock(txn, key, lock, resp.get_deadlock_key_hash(), wait_chain) })); spawn_local(send.map_err(|e| error!("leader client failed"; "err" => ?e))); // No need to log it again. @@ -810,7 +820,7 @@ where &mut self, tp: DetectType, txn_ts: TimeStamp, - lock: Lock, + wait_info: &Option, diag_ctx: DiagnosticContext, ) -> bool { assert!(!self.is_leader() && self.leader_info.is_some()); @@ -826,8 +836,10 @@ where }; let mut entry = WaitForEntry::default(); entry.set_txn(txn_ts.into_inner()); - entry.set_wait_for_txn(lock.ts.into_inner()); - entry.set_key_hash(lock.hash); + if let Some(wait_info) = wait_info.as_ref() { + entry.set_wait_for_txn(wait_info.lock_digest.ts.into_inner()); + entry.set_key_hash(wait_info.lock_digest.hash); + } entry.set_key(diag_ctx.key); entry.set_resource_group_tag(diag_ctx.resource_group_tag); let mut req = DeadlockRequest::default(); @@ -846,32 +858,38 @@ where &self, tp: DetectType, txn_ts: TimeStamp, - lock: Lock, + wait_info: Option, diag_ctx: DiagnosticContext, ) { let detect_table = &mut self.inner.borrow_mut().detect_table; match tp { DetectType::Detect => { + let wait_info = wait_info.unwrap(); if let Some((deadlock_key_hash, mut wait_chain)) = detect_table.detect( txn_ts, - lock.ts, - lock.hash, + wait_info.lock_digest.ts, + wait_info.lock_digest.hash, &diag_ctx.key, &diag_ctx.resource_group_tag, ) { let mut last_entry = WaitForEntry::default(); last_entry.set_txn(txn_ts.into_inner()); - last_entry.set_wait_for_txn(lock.ts.into_inner()); - last_entry.set_key_hash(lock.hash); - last_entry.set_key(diag_ctx.key); + last_entry.set_wait_for_txn(wait_info.lock_digest.ts.into_inner()); + last_entry.set_key_hash(wait_info.lock_digest.hash); + last_entry.set_key(diag_ctx.key.clone()); last_entry.set_resource_group_tag(diag_ctx.resource_group_tag); wait_chain.push(last_entry); - self.waiter_mgr_scheduler - .deadlock(txn_ts, lock, deadlock_key_hash, wait_chain); + self.waiter_mgr_scheduler.deadlock( + txn_ts, + diag_ctx.key.clone(), + wait_info.lock_digest, + deadlock_key_hash, + wait_chain, + ); } } DetectType::CleanUpWaitFor => { - detect_table.clean_up_wait_for(txn_ts, lock.ts, lock.hash) + detect_table.clean_up_wait_for(txn_ts, wait_info.unwrap().lock_digest) } DetectType::CleanUp => detect_table.clean_up(txn_ts), } @@ -882,11 +900,11 @@ where &mut self, tp: DetectType, txn_ts: TimeStamp, - lock: Lock, + wait_info: Option, diag_ctx: DiagnosticContext, ) { if self.is_leader() { - self.handle_detect_locally(tp, txn_ts, lock, diag_ctx); + self.handle_detect_locally(tp, txn_ts, wait_info, diag_ctx); } else { for _ in 0..2 { // TODO: If the leader hasn't been elected, it requests Pd for @@ -896,7 +914,7 @@ where if self.leader_client.is_none() && !self.refresh_leader_info() { break; } - if self.send_request_to_leader(tp, txn_ts, lock, diag_ctx.clone()) { + if self.send_request_to_leader(tp, txn_ts, &wait_info, diag_ctx.clone()) { return; } // Because the client is asynchronous, it won't be closed until @@ -906,7 +924,7 @@ where // If a request which causes deadlock is dropped, it leads to the waiter // timeout. TiDB will retry to acquire the lock and detect deadlock // again. - warn!("detect request dropped"; "tp" => ?tp, "txn_ts" => txn_ts, "lock" => ?lock); + warn!("detect request dropped"; "tp" => ?tp, "txn_ts" => txn_ts, "wait_info" => ?wait_info); ERROR_COUNTER_METRICS.dropped.inc(); } } @@ -917,6 +935,7 @@ where stream: RequestStream, sink: DuplexSink, ) { + // TODO: Support batch checking. if !self.is_leader() { let status = RpcStatus::with_message( RpcStatusCode::FAILED_PRECONDITION, @@ -963,7 +982,13 @@ where } } DeadlockRequestType::CleanUpWaitFor => { - detect_table.clean_up_wait_for(txn.into(), wait_for_txn.into(), *key_hash); + detect_table.clean_up_wait_for( + txn.into(), + LockDigest { + ts: wait_for_txn.into(), + hash: *key_hash, + }, + ); None } DeadlockRequestType::CleanUp => { @@ -1005,10 +1030,10 @@ where Task::Detect { tp, txn_ts, - lock, + wait_info, diag_ctx, } => { - self.handle_detect(tp, txn_ts, lock, diag_ctx); + self.handle_detect(tp, txn_ts, wait_info, diag_ctx); } Task::DetectRpc { stream, sink } => { self.handle_detect_rpc(stream, sink); @@ -1180,7 +1205,13 @@ pub mod tests { ); // Clean up entries shrinking the map. - detect_table.clean_up_wait_for(3.into(), 1.into(), 1); + detect_table.clean_up_wait_for( + 3.into(), + LockDigest { + ts: 1.into(), + hash: 1, + }, + ); assert_eq!( detect_table .wait_for_map @@ -1192,14 +1223,32 @@ pub mod tests { .len(), 1 ); - detect_table.clean_up_wait_for(3.into(), 1.into(), 2); + detect_table.clean_up_wait_for( + 3.into(), + LockDigest { + ts: 1.into(), + hash: 2, + }, + ); assert_eq!(detect_table.wait_for_map.get(&3.into()).unwrap().len(), 1); - detect_table.clean_up_wait_for(3.into(), 2.into(), 2); + detect_table.clean_up_wait_for( + 3.into(), + LockDigest { + ts: 2.into(), + hash: 2, + }, + ); assert_eq!(detect_table.wait_for_map.contains_key(&3.into()), false); // Clean up non-exist entry detect_table.clean_up(3.into()); - detect_table.clean_up_wait_for(3.into(), 1.into(), 1); + detect_table.clean_up_wait_for( + 3.into(), + LockDigest { + ts: 1.into(), + hash: 1, + }, + ); } #[test] diff --git a/src/server/lock_manager/metrics.rs b/src/server/lock_manager/metrics.rs index f400652966b..d6ff48bcf80 100644 --- a/src/server/lock_manager/metrics.rs +++ b/src/server/lock_manager/metrics.rs @@ -12,6 +12,7 @@ make_auto_flush_static_metric! { detect, clean_up_wait_for, clean_up, + update_wait_for, }, } @@ -60,13 +61,6 @@ lazy_static! { exponential_buckets(0.0001, 2.0, 20).unwrap() // 0.1ms ~ 104s ) .unwrap(); - pub static ref WAIT_TABLE_STATUS_GAUGE: WaitTableStatusGauge = register_static_int_gauge_vec!( - WaitTableStatusGauge, - "tikv_lock_manager_wait_table_status", - "Status of the wait table", - &["type"] - ) - .unwrap(); pub static ref DETECTOR_LEADER_GAUGE: IntGauge = register_int_gauge!( "tikv_lock_manager_detector_leader_heartbeat", "Heartbeat of the leader of the deadlock detector" diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index e437cea2bf1..ae60467124b 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -7,19 +7,15 @@ mod metrics; pub mod waiter_manager; use std::{ - collections::hash_map::DefaultHasher, - hash::{Hash, Hasher}, sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, Arc, }, thread::JoinHandle, }; -use collections::HashSet; -use crossbeam::utils::CachePadded; use engine_traits::KvEngine; -use parking_lot::Mutex; +use kvproto::metapb::RegionEpoch; use pd_client::PdClient; use raftstore::coprocessor::CoprocessorHost; use security::SecurityManager; @@ -32,26 +28,22 @@ pub use self::{ waiter_manager::Scheduler as WaiterMgrScheduler, }; use self::{ - deadlock::{Detector, RoleChangeNotifier}, - waiter_manager::WaiterManager, + deadlock::Detector, + waiter_manager::{Waiter, WaiterManager}, }; use crate::{ - server::{resolve::StoreAddrResolver, Error, Result}, + server::{ + lock_manager::deadlock::RoleChangeNotifier, resolve::StoreAddrResolver, Error, Result, + }, storage::{ - lock_manager::{DiagnosticContext, Lock, LockManager as LockManagerTrait, WaitTimeout}, - DynamicConfigs as StorageDynamicConfigs, ProcessResult, StorageCallback, + lock_manager::{ + DiagnosticContext, KeyLockWaitInfo, LockManager as LockManagerTrait, LockWaitToken, + UpdateWaitForEvent, WaitTimeout, + }, + DynamicConfigs as StorageDynamicConfigs, Error as StorageError, }, }; -const DETECTED_SLOTS_NUM: usize = 128; - -#[inline] -fn detected_slot_idx(txn_ts: TimeStamp) -> usize { - let mut s = DefaultHasher::new(); - txn_ts.hash(&mut s); - (s.finish() as usize) & (DETECTED_SLOTS_NUM - 1) -} - /// `LockManager` has two components working in two threads: /// * One is the `WaiterManager` which manages transactions waiting for locks. /// * The other one is the `Detector` which detects deadlocks between @@ -65,12 +57,13 @@ pub struct LockManager { waiter_count: Arc, - /// Record transactions which have sent requests to detect deadlock. - detected: Arc<[CachePadded>>]>, + token_allocator: Arc, pipelined: Arc, in_memory: Arc, + + wake_up_delay_duration_ms: Arc, } impl Clone for LockManager { @@ -81,9 +74,10 @@ impl Clone for LockManager { waiter_mgr_scheduler: self.waiter_mgr_scheduler.clone(), detector_scheduler: self.detector_scheduler.clone(), waiter_count: self.waiter_count.clone(), - detected: self.detected.clone(), + token_allocator: self.token_allocator.clone(), pipelined: self.pipelined.clone(), in_memory: self.in_memory.clone(), + wake_up_delay_duration_ms: self.wake_up_delay_duration_ms.clone(), } } } @@ -92,8 +86,6 @@ impl LockManager { pub fn new(cfg: &Config) -> Self { let waiter_mgr_worker = FutureWorker::new("waiter-manager"); let detector_worker = FutureWorker::new("deadlock-detector"); - let mut detected = Vec::with_capacity(DETECTED_SLOTS_NUM); - detected.resize_with(DETECTED_SLOTS_NUM, || Mutex::new(HashSet::default()).into()); Self { waiter_mgr_scheduler: WaiterMgrScheduler::new(waiter_mgr_worker.scheduler()), @@ -101,9 +93,12 @@ impl LockManager { detector_scheduler: DetectorScheduler::new(detector_worker.scheduler()), detector_worker: Some(detector_worker), waiter_count: Arc::new(AtomicUsize::new(0)), - detected: detected.into(), + token_allocator: Arc::new(AtomicU64::new(0)), pipelined: Arc::new(AtomicBool::new(cfg.pipelined)), in_memory: Arc::new(AtomicBool::new(cfg.in_memory)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new( + cfg.wake_up_delay_duration.as_millis(), + )), } } @@ -225,6 +220,7 @@ impl LockManager { self.detector_scheduler.clone(), self.pipelined.clone(), self.in_memory.clone(), + self.wake_up_delay_duration_ms.clone(), ) } @@ -232,35 +228,33 @@ impl LockManager { StorageDynamicConfigs { pipelined_pessimistic_lock: self.pipelined.clone(), in_memory_pessimistic_lock: self.in_memory.clone(), + wake_up_delay_duration_ms: self.wake_up_delay_duration_ms.clone(), } } - - fn add_to_detected(&self, txn_ts: TimeStamp) { - let mut detected = self.detected[detected_slot_idx(txn_ts)].lock(); - detected.insert(txn_ts); - } - - fn remove_from_detected(&self, txn_ts: TimeStamp) -> bool { - let mut detected = self.detected[detected_slot_idx(txn_ts)].lock(); - detected.remove(&txn_ts) - } } impl LockManagerTrait for LockManager { + fn allocate_token(&self) -> LockWaitToken { + LockWaitToken(Some(self.token_allocator.fetch_add(1, Ordering::Relaxed))) + } + fn wait_for( &self, + token: LockWaitToken, + region_id: u64, + region_epoch: RegionEpoch, + term: u64, start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, + cancel_callback: Box, diag_ctx: DiagnosticContext, ) { let timeout = match timeout { Some(t) => t, None => { - cb.execute(pr); + Waiter::cancel_no_timeout(wait_info, cancel_callback); return; } }; @@ -268,35 +262,34 @@ impl LockManagerTrait for LockManager { // Increase `waiter_count` here to prevent there is an on-the-fly WaitFor msg // but the waiter_mgr haven't processed it, subsequent WakeUp msgs may be lost. self.waiter_count.fetch_add(1, Ordering::SeqCst); - self.waiter_mgr_scheduler - .wait_for(start_ts, cb, pr, lock, timeout, diag_ctx.clone()); + + self.waiter_mgr_scheduler.wait_for( + token, + region_id, + region_epoch, + term, + start_ts, + wait_info.clone(), + timeout, + cancel_callback, + diag_ctx.clone(), + ); // If it is the first lock the transaction tries to lock, it won't cause // deadlock. if !is_first_lock { - self.add_to_detected(start_ts); - self.detector_scheduler.detect(start_ts, lock, diag_ctx); + self.detector_scheduler + .detect(start_ts, wait_info, diag_ctx); } } - fn wake_up( - &self, - lock_ts: TimeStamp, - hashes: Vec, - commit_ts: TimeStamp, - is_pessimistic_txn: bool, - ) { - // If `hashes` is some, there may be some waiters waiting for these locks. - // Try to wake up them. - if !hashes.is_empty() && self.has_waiter() { - self.waiter_mgr_scheduler - .wake_up(lock_ts, hashes, commit_ts); - } - // If a pessimistic transaction is committed or rolled back and it once sent - // requests to detect deadlock, clean up its wait-for entries in the - // deadlock detector. - if is_pessimistic_txn && self.remove_from_detected(lock_ts) { - self.detector_scheduler.clean_up(lock_ts); + fn update_wait_for(&self, updated_items: Vec) { + self.waiter_mgr_scheduler.update_wait_for(updated_items); + } + + fn remove_lock_wait(&self, token: LockWaitToken) { + if self.has_waiter() { + self.waiter_mgr_scheduler.remove_lock_wait(token); } } @@ -321,9 +314,11 @@ mod tests { use security::SecurityConfig; use tikv_util::config::ReadableDuration; use tracker::{TrackerToken, INVALID_TRACKER_TOKEN}; + use txn_types::Key; use self::{deadlock::tests::*, metrics::*, waiter_manager::tests::*}; use super::*; + use crate::storage::lock_manager::LockDigest; fn start_lock_manager() -> LockManager { let mut coprocessor_host = CoprocessorHost::::default(); @@ -378,69 +373,88 @@ mod tests { assert!(!lock_mgr.has_waiter()); let (waiter, lock_info, f) = new_test_waiter(10.into(), 20.into(), 20); lock_mgr.wait_for( + lock_mgr.allocate_token(), + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, true, Some(WaitTimeout::Default), + waiter.cancel_callback, DiagnosticContext::default(), ); assert!(lock_mgr.has_waiter()); assert_elapsed( - || expect_key_is_locked(block_on(f).unwrap().unwrap(), lock_info), + || expect_key_is_locked(block_on(f).unwrap(), lock_info), 2500, 3500, ); assert!(!lock_mgr.has_waiter()); - // Wake up + // Removal let (waiter_ts, lock) = ( 10.into(), - Lock { + LockDigest { ts: 20.into(), hash: 20, }, ); - let (waiter, lock_info, f) = new_test_waiter(waiter_ts, lock.ts, lock.hash); + let (waiter, _lock_info, f) = new_test_waiter(waiter_ts, lock.ts, lock.hash); + let token = lock_mgr.allocate_token(); lock_mgr.wait_for( + token, + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, true, Some(WaitTimeout::Default), + waiter.cancel_callback, DiagnosticContext::default(), ); assert!(lock_mgr.has_waiter()); - lock_mgr.wake_up(lock.ts, vec![lock.hash], 30.into(), false); + lock_mgr.remove_lock_wait(token); + // The waiter will be directly dropped. + // In normal cases, when `remove_lock_wait` is invoked, the request's callback + // must be called somewhere else. assert_elapsed( - || expect_write_conflict(block_on(f).unwrap(), waiter_ts, lock_info, 30.into()), + || { + block_on(f).unwrap_err(); + }, 0, 500, ); assert!(!lock_mgr.has_waiter()); // Deadlock - let (waiter1, lock_info1, f1) = new_test_waiter(10.into(), 20.into(), 20); + let (waiter1, _lock_info1, f1) = new_test_waiter_with_key(10.into(), 20.into(), b"k1"); + let token1 = lock_mgr.allocate_token(); lock_mgr.wait_for( + token1, + 1, + RegionEpoch::default(), + 1, waiter1.start_ts, - waiter1.cb, - waiter1.pr, - waiter1.lock, + waiter1.wait_info, false, Some(WaitTimeout::Default), + waiter1.cancel_callback, diag_ctx(b"k1", b"tag1", INVALID_TRACKER_TOKEN), ); assert!(lock_mgr.has_waiter()); - let (waiter2, lock_info2, f2) = new_test_waiter(20.into(), 10.into(), 10); + let (waiter2, lock_info2, f2) = new_test_waiter_with_key(20.into(), 10.into(), b"k2"); lock_mgr.wait_for( + lock_mgr.allocate_token(), + 1, + RegionEpoch::default(), + 1, waiter2.start_ts, - waiter2.cb, - waiter2.pr, - waiter2.lock, + waiter2.wait_info, false, Some(WaitTimeout::Default), + waiter2.cancel_callback, diag_ctx(b"k2", b"tag2", INVALID_TRACKER_TOKEN), ); assert!(lock_mgr.has_waiter()); @@ -450,17 +464,19 @@ mod tests { block_on(f2).unwrap(), 20.into(), lock_info2, - 20, + Key::from_raw(b"k1").gen_hash(), &[(10, 20, b"k1", b"tag1"), (20, 10, b"k2", b"tag2")], ) }, 0, 500, ); - // Waiter2 releases its lock. - lock_mgr.wake_up(20.into(), vec![20], 20.into(), true); + // Simulating waiter2 releases its lock so that waiter1 is removed + lock_mgr.remove_lock_wait(token1); assert_elapsed( - || expect_write_conflict(block_on(f1).unwrap(), 10.into(), lock_info1, 20.into()), + || { + block_on(f1).unwrap_err(); + }, 0, 500, ); @@ -468,53 +484,80 @@ mod tests { // If it's the first lock, no detect. // If it's not, detect deadlock. + // Note that if txn 30 is writing its first lock, there should never be another + // transaction waiting for txn 30's lock. We added this waiter (40 + // waiting for 30) just for checking whether the lock manager does the + // detection internally. + let (waiter1, _, f1) = new_test_waiter_with_key(40.into(), 30.into(), b"k1"); + let token1 = lock_mgr.allocate_token(); + lock_mgr.wait_for( + token1, + 1, + RegionEpoch::default(), + 1, + waiter1.start_ts, + waiter1.wait_info, + false, + Some(WaitTimeout::Default), + waiter1.cancel_callback, + diag_ctx(b"k1", b"tag1", INVALID_TRACKER_TOKEN), + ); for is_first_lock in &[true, false] { - let (waiter, _, f) = new_test_waiter(30.into(), 40.into(), 40); + let (waiter, lock_info2, f2) = new_test_waiter_with_key(30.into(), 40.into(), b"k2"); + let token2 = lock_mgr.allocate_token(); lock_mgr.wait_for( + token2, + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, *is_first_lock, Some(WaitTimeout::Default), - DiagnosticContext::default(), + waiter.cancel_callback, + diag_ctx(b"k2", b"tag2", INVALID_TRACKER_TOKEN), ); assert!(lock_mgr.has_waiter()); - assert_eq!(lock_mgr.remove_from_detected(30.into()), !is_first_lock); - lock_mgr.wake_up(40.into(), vec![40], 40.into(), false); - block_on(f).unwrap().unwrap_err(); + if *is_first_lock { + lock_mgr.remove_lock_wait(token2); + block_on(f2).unwrap_err(); + } else { + assert_elapsed( + || { + expect_deadlock( + block_on(f2).unwrap(), + 30.into(), + lock_info2, + Key::from_raw(b"k1").gen_hash(), + &[(40, 30, b"k1", b"tag1"), (30, 40, b"k2", b"tag2")], + ) + }, + 0, + 500, + ); + } } + lock_mgr.remove_lock_wait(token1); + block_on(f1).unwrap_err(); assert!(!lock_mgr.has_waiter()); - // If key_hashes is empty, no wake up. - let prev_wake_up = TASK_COUNTER_METRICS.wake_up.get(); - lock_mgr.wake_up(10.into(), vec![], 10.into(), false); - assert_eq!(TASK_COUNTER_METRICS.wake_up.get(), prev_wake_up); - - // If it's non-pessimistic-txn, no clean up. - let prev_clean_up = TASK_COUNTER_METRICS.clean_up.get(); - lock_mgr.wake_up(10.into(), vec![], 10.into(), false); - assert_eq!(TASK_COUNTER_METRICS.clean_up.get(), prev_clean_up); - - // If the txn doesn't wait for locks, no clean up. - let prev_clean_up = TASK_COUNTER_METRICS.clean_up.get(); - lock_mgr.wake_up(10.into(), vec![], 10.into(), true); - assert_eq!(TASK_COUNTER_METRICS.clean_up.get(), prev_clean_up); - // If timeout is none, no wait for. let (waiter, lock_info, f) = new_test_waiter(10.into(), 20.into(), 20); let prev_wait_for = TASK_COUNTER_METRICS.wait_for.get(); lock_mgr.wait_for( + lock_mgr.allocate_token(), + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, false, None, + waiter.cancel_callback, DiagnosticContext::default(), ); assert_elapsed( - || expect_key_is_locked(block_on(f).unwrap().unwrap(), lock_info), + || expect_key_is_locked(block_on(f).unwrap(), lock_info), 0, 500, ); diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 2ba2b583de9..33164833fba 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -9,7 +9,7 @@ use std::{ atomic::{AtomicUsize, Ordering}, Arc, }, - time::{Duration, Instant}, + time::Instant, }; use collections::HashMap; @@ -18,7 +18,7 @@ use futures::{ future::Future, task::{Context, Poll}, }; -use kvproto::{deadlock::WaitForEntry, kvrpcpb::WriteConflictReason}; +use kvproto::{deadlock::WaitForEntry, metapb::RegionEpoch}; use tikv_util::{ config::ReadableDuration, time::{duration_to_sec, InstantExt}, @@ -27,13 +27,17 @@ use tikv_util::{ }; use tokio::task::spawn_local; use tracker::GLOBAL_TRACKERS; +use txn_types::Key; use super::{config::Config, deadlock::Scheduler as DetectorScheduler, metrics::*}; use crate::storage::{ - lock_manager::{DiagnosticContext, Lock, WaitTimeout}, + lock_manager::{ + DiagnosticContext, KeyLockWaitInfo, LockDigest, LockWaitToken, UpdateWaitForEvent, + WaitTimeout, + }, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, TimeStamp}, - txn::{Error as TxnError, ErrorInner as TxnErrorInner}, - Error as StorageError, ErrorInner as StorageErrorInner, ProcessResult, StorageCallback, + txn::Error as TxnError, + Error as StorageError, ErrorInner as StorageErrorInner, }; struct DelayInner { @@ -103,21 +107,27 @@ pub type Callback = Box) + Send>; #[allow(clippy::large_enum_variant)] pub enum Task { + SetKeyWakeUpDelayCallback { + cb: Box, + }, WaitFor { + token: LockWaitToken, + region_id: u64, + region_epoch: RegionEpoch, + term: u64, // which txn waits for the lock start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, timeout: WaitTimeout, + cancel_callback: Box, diag_ctx: DiagnosticContext, start_waiting_time: Instant, }, - WakeUp { - // lock info - lock_ts: TimeStamp, - hashes: Vec, - commit_ts: TimeStamp, + RemoveLockWait { + token: LockWaitToken, + }, + UpdateWaitFor { + events: Vec, }, Dump { cb: Callback, @@ -125,16 +135,16 @@ pub enum Task { Deadlock { // Which txn causes deadlock start_ts: TimeStamp, - lock: Lock, + key: Vec, + lock: LockDigest, deadlock_key_hash: u64, wait_chain: Vec, }, ChangeConfig { timeout: Option, - delay: Option, }, #[cfg(any(test, feature = "testexport"))] - Validate(Box), + Validate(Box), } /// Debug for task. @@ -148,16 +158,33 @@ impl Debug for Task { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - Task::WaitFor { start_ts, lock, .. } => { - write!(f, "txn:{} waiting for {}:{}", start_ts, lock.ts, lock.hash) + Task::SetKeyWakeUpDelayCallback { .. } => { + write!(f, "setting key wake up delay callback") + } + Task::WaitFor { + token, + start_ts, + wait_info, + .. + } => { + write!( + f, + "txn:{} waiting for {}:{}, token {:?}", + start_ts, wait_info.lock_digest.ts, wait_info.lock_digest.hash, token + ) + } + Task::RemoveLockWait { token } => { + write!(f, "waking up txns waiting for token {:?}", token) + } + Task::UpdateWaitFor { events } => { + write!(f, "updating wait info {:?}", events) } - Task::WakeUp { lock_ts, .. } => write!(f, "waking up txns waiting for {}", lock_ts), Task::Dump { .. } => write!(f, "dump"), Task::Deadlock { start_ts, .. } => write!(f, "txn:{} deadlock", start_ts), - Task::ChangeConfig { timeout, delay } => write!( + Task::ChangeConfig { timeout } => write!( f, - "change config to default_wait_for_lock_timeout: {:?}, wake_up_delay_duration: {:?}", - timeout, delay + "change config to default_wait_for_lock_timeout: {:?}", + timeout ), #[cfg(any(test, feature = "testexport"))] Task::Validate(_) => write!(f, "validate waiter manager config"), @@ -172,15 +199,14 @@ impl Display for Task { /// has a timeout. Transaction will be notified when the lock is released /// or the corresponding waiter times out. pub(crate) struct Waiter { + // These field will be needed for supporting region-level waking up when region errors + // happens. + // region_id: u64, + // region_epoch: RegionEpoch, + // term: u64, pub(crate) start_ts: TimeStamp, - pub(crate) cb: StorageCallback, - /// The result of `Command::AcquirePessimisticLock`. - /// - /// It contains a `KeyIsLocked` error at the beginning. It will be changed - /// to `WriteConflict` error if the lock is released or `Deadlock` error if - /// it causes deadlock. - pub(crate) pr: ProcessResult, - pub(crate) lock: Lock, + pub(crate) wait_info: KeyLockWaitInfo, + pub(crate) cancel_callback: Box, pub diag_ctx: DiagnosticContext, delay: Delay, start_waiting_time: Instant, @@ -188,19 +214,20 @@ pub(crate) struct Waiter { impl Waiter { fn new( + _region_id: u64, + _region_epoch: RegionEpoch, + _term: u64, start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, + cancel_callback: Box, deadline: Instant, diag_ctx: DiagnosticContext, start_waiting_time: Instant, ) -> Self { Self { start_ts, - cb, - pr, - lock, + wait_info, + cancel_callback, delay: Delay::new(deadline), diag_ctx, start_waiting_time, @@ -220,13 +247,13 @@ impl Waiter { } } + #[allow(dead_code)] fn reset_timeout(&self, deadline: Instant) { self.delay.reset(deadline); } - /// `Notify` consumes the `Waiter` to notify the corresponding transaction - /// going on. - fn notify(self) { + /// Consumes the `Waiter` to notify the corresponding transaction going on. + fn cancel(self, error: Option) -> KeyLockWaitInfo { let elapsed = self.start_waiting_time.saturating_elapsed(); GLOBAL_TRACKERS.with_tracker(self.diag_ctx.tracker, |tracker| { tracker.metrics.pessimistic_lock_wait_nanos = elapsed.as_nanos() as u64; @@ -234,165 +261,138 @@ impl Waiter { WAITER_LIFETIME_HISTOGRAM.observe(duration_to_sec(elapsed)); // Cancel the delay timer to prevent removing the same `Waiter` earlier. self.delay.cancel(); - self.cb.execute(self.pr); + if let Some(error) = error { + (self.cancel_callback)(error); + } + self.wait_info } - /// Changes the `ProcessResult` to `WriteConflict`. - /// It may be invoked more than once. - fn conflict_with(&mut self, lock_ts: TimeStamp, commit_ts: TimeStamp) { - let (key, primary) = self.extract_key_info(); - let mvcc_err = MvccError::from(MvccErrorInner::WriteConflict { - start_ts: self.start_ts, - conflict_start_ts: lock_ts, - conflict_commit_ts: commit_ts, - key, - primary, - reason: WriteConflictReason::PessimisticRetry, - }); - self.pr = ProcessResult::Failed { - err: StorageError::from(TxnError::from(mvcc_err)), - }; + fn cancel_for_finished(self) -> KeyLockWaitInfo { + self.cancel(None) + } + + fn cancel_for_timeout(self, _skip_resolving_lock: bool) -> KeyLockWaitInfo { + let lock_info = self.wait_info.lock_info.clone(); + // lock_info.set_skip_resolving_lock(skip_resolving_lock); + let error = MvccError::from(MvccErrorInner::KeyIsLocked(lock_info)); + self.cancel(Some(StorageError::from(TxnError::from(error)))) } - /// Changes the `ProcessResult` to `Deadlock`. - fn deadlock_with(&mut self, deadlock_key_hash: u64, wait_chain: Vec) { - let (key, _) = self.extract_key_info(); - let mvcc_err = MvccError::from(MvccErrorInner::Deadlock { + pub(super) fn cancel_no_timeout( + wait_info: KeyLockWaitInfo, + cancel_callback: Box, + ) { + let lock_info = wait_info.lock_info; + let error = MvccError::from(MvccErrorInner::KeyIsLocked(lock_info)); + cancel_callback(StorageError::from(TxnError::from(error))) + } + + fn cancel_for_deadlock( + self, + lock_digest: LockDigest, + key: Vec, + deadlock_key_hash: u64, + wait_chain: Vec, + ) -> KeyLockWaitInfo { + let e = MvccError::from(MvccErrorInner::Deadlock { start_ts: self.start_ts, - lock_ts: self.lock.ts, + lock_ts: lock_digest.ts, lock_key: key, deadlock_key_hash, wait_chain, }); - self.pr = ProcessResult::Failed { - err: StorageError::from(TxnError::from(mvcc_err)), - }; - } - - /// Extracts key and primary key from `ProcessResult`. - fn extract_key_info(&mut self) -> (Vec, Vec) { - match &mut self.pr { - ProcessResult::PessimisticLockRes { res } => match res { - Err(StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(info))), - )))) => (info.take_key(), info.take_primary_lock()), - _ => panic!("unexpected mvcc error"), - }, - ProcessResult::Failed { err } => match err { - StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( - MvccError(box MvccErrorInner::WriteConflict { - ref mut key, - ref mut primary, - .. - }), - )))) => (std::mem::take(key), std::mem::take(primary)), - _ => panic!("unexpected mvcc error"), - }, - _ => panic!("unexpected progress result"), - } + self.cancel(Some(StorageError::from(TxnError::from(e)))) } } -// NOTE: Now we assume `Waiters` is not very long. -// Maybe needs to use `BinaryHeap` or sorted `VecDeque` instead. -type Waiters = Vec; - struct WaitTable { - // Map lock hash to waiters. - wait_table: HashMap, + // Map lock hash and ts to waiters. + // For compatibility. + wait_table: HashMap<(u64, TimeStamp), LockWaitToken>, + waiter_pool: HashMap, waiter_count: Arc, + + wake_up_key_delay_callback: Option>, } impl WaitTable { fn new(waiter_count: Arc) -> Self { Self { wait_table: HashMap::default(), + waiter_pool: HashMap::default(), waiter_count, + wake_up_key_delay_callback: None, } } + fn set_wake_up_key_delay_callback( + &mut self, + cb: Option>, + ) { + self.wake_up_key_delay_callback = cb; + } + #[cfg(test)] fn count(&self) -> usize { - self.wait_table.iter().map(|(_, v)| v.len()).sum() + self.waiter_pool.len() } fn is_empty(&self) -> bool { - self.wait_table.is_empty() + self.waiter_pool.is_empty() } - /// Returns the duplicated `Waiter` if there is. - fn add_waiter(&mut self, waiter: Waiter) -> Option { - let waiters = self.wait_table.entry(waiter.lock.hash).or_insert_with(|| { - WAIT_TABLE_STATUS_GAUGE.locks.inc(); - Waiters::default() - }); - let old_idx = waiters.iter().position(|w| w.start_ts == waiter.start_ts); - waiters.push(waiter); - if let Some(old_idx) = old_idx { - let old = waiters.swap_remove(old_idx); - self.waiter_count.fetch_sub(1, Ordering::SeqCst); - Some(old) - } else { - WAIT_TABLE_STATUS_GAUGE.txns.inc(); - None - } - // Here we don't increase waiter_count because it's already updated in - // LockManager::wait_for() + /// Adds a waiter identified by given token. The caller must guarantee that + /// the `token` is unique and doesn't exist in waiter manager currently. + fn add_waiter(&mut self, token: LockWaitToken, waiter: Waiter) { + self.wait_table + .insert((waiter.wait_info.lock_digest.hash, waiter.start_ts), token); + assert!(self.waiter_pool.insert(token, waiter).is_none()); } - /// Removes all waiters waiting for the lock. - fn remove(&mut self, lock: Lock) { - self.wait_table.remove(&lock.hash); - WAIT_TABLE_STATUS_GAUGE.locks.dec(); + fn take_waiter(&mut self, token: LockWaitToken) -> Option { + let waiter = self.waiter_pool.remove(&token)?; + self.waiter_count.fetch_sub(1, Ordering::SeqCst); + self.wait_table + .remove(&(waiter.wait_info.lock_digest.hash, waiter.start_ts)); + Some(waiter) } - fn remove_waiter(&mut self, lock: Lock, waiter_ts: TimeStamp) -> Option { - let waiters = self.wait_table.get_mut(&lock.hash)?; - let idx = waiters - .iter() - .position(|waiter| waiter.start_ts == waiter_ts)?; - let waiter = waiters.swap_remove(idx); - self.waiter_count.fetch_sub(1, Ordering::SeqCst); - WAIT_TABLE_STATUS_GAUGE.txns.dec(); - if waiters.is_empty() { - self.remove(lock); + fn update_waiter(&mut self, update_event: &UpdateWaitForEvent) -> Option { + let waiter = self.waiter_pool.get_mut(&update_event.token)?; + + assert_eq!(waiter.wait_info.key, update_event.wait_info.key); + + if waiter.wait_info.lock_digest.ts == update_event.wait_info.lock_digest.ts { + // Unchanged. + return None; } - Some(waiter) + + let result = std::mem::replace(&mut waiter.wait_info, update_event.wait_info.clone()); + waiter.diag_ctx = update_event.diag_ctx.clone(); + + Some(result) } - /// Removes the `Waiter` with the smallest start ts and returns it with - /// remaining waiters. - /// - /// NOTE: Due to the borrow checker, it doesn't remove the entry in the - /// `WaitTable` even if there is no remaining waiter. - fn remove_oldest_waiter(&mut self, lock: Lock) -> Option<(Waiter, &mut Waiters)> { - let waiters = self.wait_table.get_mut(&lock.hash)?; - let oldest_idx = waiters - .iter() - .enumerate() - .min_by_key(|(_, w)| w.start_ts) - .unwrap() - .0; - let oldest = waiters.swap_remove(oldest_idx); - self.waiter_count.fetch_sub(1, Ordering::SeqCst); - WAIT_TABLE_STATUS_GAUGE.txns.dec(); - Some((oldest, waiters)) + fn take_waiter_by_lock_digest( + &mut self, + lock: LockDigest, + waiter_ts: TimeStamp, + ) -> Option { + let token = *self.wait_table.get(&(lock.hash, waiter_ts))?; + self.take_waiter(token) } fn to_wait_for_entries(&self) -> Vec { - self.wait_table + self.waiter_pool .iter() - .flat_map(|(_, waiters)| { - waiters.iter().map(|waiter| { - let mut wait_for_entry = WaitForEntry::default(); - wait_for_entry.set_txn(waiter.start_ts.into_inner()); - wait_for_entry.set_wait_for_txn(waiter.lock.ts.into_inner()); - wait_for_entry.set_key_hash(waiter.lock.hash); - wait_for_entry.set_key(waiter.diag_ctx.key.clone()); - wait_for_entry - .set_resource_group_tag(waiter.diag_ctx.resource_group_tag.clone()); - wait_for_entry - }) + .map(|(_, waiter)| { + let mut wait_for_entry = WaitForEntry::default(); + wait_for_entry.set_txn(waiter.start_ts.into_inner()); + wait_for_entry.set_wait_for_txn(waiter.wait_info.lock_digest.ts.into_inner()); + wait_for_entry.set_key_hash(waiter.wait_info.lock_digest.hash); + wait_for_entry.set_key(waiter.wait_info.key.to_raw().unwrap()); + wait_for_entry.set_resource_group_tag(waiter.diag_ctx.resource_group_tag.clone()); + wait_for_entry }) .collect() } @@ -409,8 +409,12 @@ impl Scheduler { fn notify_scheduler(&self, task: Task) -> bool { if let Err(Stopped(task)) = self.0.schedule(task) { error!("failed to send task to waiter_manager"; "task" => %task); - if let Task::WaitFor { cb, pr, .. } = task { - cb.execute(pr); + if let Task::WaitFor { + cancel_callback, .. + } = task + { + // TODO: Pass proper error for the scheduling error. + cancel_callback(StorageError(Box::new(StorageErrorInner::SchedTooBusy))); } return false; } @@ -419,30 +423,43 @@ impl Scheduler { pub fn wait_for( &self, + token: LockWaitToken, + region_id: u64, + region_epoch: RegionEpoch, + term: u64, start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, timeout: WaitTimeout, + cancel_callback: Box, diag_ctx: DiagnosticContext, ) { self.notify_scheduler(Task::WaitFor { + token, + region_id, + region_epoch, + term, start_ts, - cb, - pr, - lock, + wait_info, timeout, + cancel_callback, diag_ctx, start_waiting_time: Instant::now(), }); } - pub fn wake_up(&self, lock_ts: TimeStamp, hashes: Vec, commit_ts: TimeStamp) { - self.notify_scheduler(Task::WakeUp { - lock_ts, - hashes, - commit_ts, - }); + pub fn set_key_wake_up_delay_callback( + &self, + cb: Box, + ) { + self.notify_scheduler(Task::SetKeyWakeUpDelayCallback { cb }); + } + + pub fn remove_lock_wait(&self, token: LockWaitToken) { + self.notify_scheduler(Task::RemoveLockWait { token }); + } + + pub fn update_wait_for(&self, events: Vec) { + self.notify_scheduler(Task::UpdateWaitFor { events }); } pub fn dump_wait_table(&self, cb: Callback) -> bool { @@ -452,43 +469,38 @@ impl Scheduler { pub fn deadlock( &self, txn_ts: TimeStamp, - lock: Lock, + key: Vec, + lock: LockDigest, deadlock_key_hash: u64, wait_chain: Vec, ) { self.notify_scheduler(Task::Deadlock { start_ts: txn_ts, + key, lock, deadlock_key_hash, wait_chain, }); } - pub fn change_config( - &self, - timeout: Option, - delay: Option, - ) { - self.notify_scheduler(Task::ChangeConfig { timeout, delay }); + pub fn change_config(&self, timeout: Option) { + self.notify_scheduler(Task::ChangeConfig { timeout }); } #[cfg(any(test, feature = "testexport"))] - pub fn validate(&self, f: Box) { + pub fn validate(&self, f: Box) { self.notify_scheduler(Task::Validate(f)); } } -/// WaiterManager handles waiting and wake-up of pessimistic lock +/// WaiterManager handles lock waiting, cancels waiters when needed (due to +/// timeout or deadlock detected), and provide lock waiting information for +/// diagnosing. pub struct WaiterManager { wait_table: Rc>, detector_scheduler: DetectorScheduler, /// It is the default and maximum timeout of waiter. default_wait_for_lock_timeout: ReadableDuration, - /// If more than one waiters are waiting for the same lock, only the - /// oldest one will be waked up immediately when the lock is released. - /// Others will be waked up after `wake_up_delay_duration` to reduce - /// contention and make the oldest one more likely acquires the lock. - wake_up_delay_duration: ReadableDuration, } unsafe impl Send for WaiterManager {} @@ -499,11 +511,12 @@ impl WaiterManager { detector_scheduler: DetectorScheduler, cfg: &Config, ) -> Self { + let wait_table = WaitTable::new(waiter_count); + Self { - wait_table: Rc::new(RefCell::new(WaitTable::new(waiter_count))), + wait_table: Rc::new(RefCell::new(wait_table)), detector_scheduler, default_wait_for_lock_timeout: cfg.wait_for_lock_timeout, - wake_up_delay_duration: cfg.wake_up_delay_duration, } } @@ -512,51 +525,52 @@ impl WaiterManager { + timeout.into_duration_with_ceiling(self.default_wait_for_lock_timeout.as_millis()) } - fn handle_wait_for(&mut self, waiter: Waiter) { - let (waiter_ts, lock) = (waiter.start_ts, waiter.lock); + fn handle_wait_for(&mut self, token: LockWaitToken, waiter: Waiter) { let wait_table = self.wait_table.clone(); let detector_scheduler = self.detector_scheduler.clone(); // Remove the waiter from wait table when it times out. let f = waiter.on_timeout(move || { - if let Some(waiter) = wait_table.borrow_mut().remove_waiter(lock, waiter_ts) { - detector_scheduler.clean_up_wait_for(waiter.start_ts, waiter.lock); - waiter.notify(); + let mut wait_table = wait_table.borrow_mut(); + if let Some(waiter) = wait_table.take_waiter(token) { + let start_ts = waiter.start_ts; + let wait_info = waiter.cancel_for_timeout(false); + detector_scheduler.clean_up_wait_for(start_ts, wait_info); } }); - if let Some(old) = self.wait_table.borrow_mut().add_waiter(waiter) { - old.notify(); - }; + self.wait_table.borrow_mut().add_waiter(token, waiter); spawn_local(f); } - fn handle_wake_up(&mut self, lock_ts: TimeStamp, hashes: Vec, commit_ts: TimeStamp) { + fn handle_remove_lock_wait(&mut self, token: LockWaitToken) { let mut wait_table = self.wait_table.borrow_mut(); if wait_table.is_empty() { return; } - let duration: Duration = self.wake_up_delay_duration.into(); - let new_timeout = Instant::now() + duration; - for hash in hashes { - let lock = Lock { ts: lock_ts, hash }; - if let Some((mut oldest, others)) = wait_table.remove_oldest_waiter(lock) { - // Notify the oldest one immediately. + let waiter = if let Some(w) = wait_table.take_waiter(token) { + w + } else { + return; + }; + let start_ts = waiter.start_ts; + let wait_info = waiter.cancel_for_finished(); + self.detector_scheduler + .clean_up_wait_for(start_ts, wait_info); + } + + fn handle_update_wait_for(&mut self, events: Vec) { + let mut wait_table = self.wait_table.borrow_mut(); + for event in events { + let previous_wait_info = wait_table.update_waiter(&event); + + if event.is_first_lock { + continue; + } + + if let Some(previous_wait_info) = previous_wait_info { self.detector_scheduler - .clean_up_wait_for(oldest.start_ts, oldest.lock); - oldest.conflict_with(lock_ts, commit_ts); - oldest.notify(); - // Others will be waked up after `wake_up_delay_duration`. - // - // NOTE: Actually these waiters are waiting for an unknown transaction. - // If there is a deadlock between them, it will be detected after timeout. - if others.is_empty() { - // Remove the empty entry here. - wait_table.remove(lock); - } else { - others.iter_mut().for_each(|waiter| { - waiter.conflict_with(lock_ts, commit_ts); - waiter.reset_timeout(new_timeout); - }); - } + .clean_up_wait_for(event.start_ts, previous_wait_info); + self.detector_scheduler + .detect(event.start_ts, event.wait_info, event.diag_ctx); } } } @@ -568,31 +582,27 @@ impl WaiterManager { fn handle_deadlock( &mut self, waiter_ts: TimeStamp, - lock: Lock, + key: Vec, + lock: LockDigest, deadlock_key_hash: u64, wait_chain: Vec, ) { - if let Some(mut waiter) = self.wait_table.borrow_mut().remove_waiter(lock, waiter_ts) { - waiter.deadlock_with(deadlock_key_hash, wait_chain); - waiter.notify(); + let waiter = self + .wait_table + .borrow_mut() + .take_waiter_by_lock_digest(lock, waiter_ts); + if let Some(waiter) = waiter { + waiter.cancel_for_deadlock(lock, key, deadlock_key_hash, wait_chain); } } - fn handle_config_change( - &mut self, - timeout: Option, - delay: Option, - ) { + fn handle_config_change(&mut self, timeout: Option) { if let Some(timeout) = timeout { self.default_wait_for_lock_timeout = timeout; } - if let Some(delay) = delay { - self.wake_up_delay_duration = delay; - } info!( "Waiter manager config changed"; "default_wait_for_lock_timeout" => self.default_wait_for_lock_timeout.to_string(), - "wake_up_delay_duration" => self.wake_up_delay_duration.to_string() ); } } @@ -600,52 +610,63 @@ impl WaiterManager { impl FutureRunnable for WaiterManager { fn run(&mut self, task: Task) { match task { + Task::SetKeyWakeUpDelayCallback { cb } => { + self.wait_table + .borrow_mut() + .set_wake_up_key_delay_callback(Some(cb)); + } Task::WaitFor { + token, + region_id, + region_epoch, + term, start_ts, - cb, - pr, - lock, + wait_info, timeout, + cancel_callback, diag_ctx, start_waiting_time, } => { let waiter = Waiter::new( + region_id, + region_epoch, + term, start_ts, - cb, - pr, - lock, + wait_info, + cancel_callback, self.normalize_deadline(timeout), diag_ctx, start_waiting_time, ); - self.handle_wait_for(waiter); + self.handle_wait_for(token, waiter); TASK_COUNTER_METRICS.wait_for.inc(); } - Task::WakeUp { - lock_ts, - hashes, - commit_ts, - } => { - self.handle_wake_up(lock_ts, hashes, commit_ts); + Task::RemoveLockWait { token } => { + self.handle_remove_lock_wait(token); TASK_COUNTER_METRICS.wake_up.inc(); } + Task::UpdateWaitFor { events } => { + self.handle_update_wait_for(events); + TASK_COUNTER_METRICS.update_wait_for.inc(); + } Task::Dump { cb } => { self.handle_dump(cb); TASK_COUNTER_METRICS.dump.inc(); } Task::Deadlock { start_ts, + key, lock, deadlock_key_hash, wait_chain, } => { - self.handle_deadlock(start_ts, lock, deadlock_key_hash, wait_chain); + self.handle_deadlock(start_ts, key, lock, deadlock_key_hash, wait_chain); } - Task::ChangeConfig { timeout, delay } => self.handle_config_change(timeout, delay), + Task::ChangeConfig { timeout } => self.handle_config_change(timeout), #[cfg(any(test, feature = "testexport"))] Task::Validate(f) => f( self.default_wait_for_lock_timeout, - self.wake_up_delay_duration, + // self.wake_up_delay_duration, ), } } @@ -662,16 +683,20 @@ pub mod tests { config::ReadableDuration, future::paired_future_callback, time::InstantExt, worker::FutureWorker, }; + use txn_types::Key; use super::*; - use crate::storage::PessimisticLockRes; + use crate::storage::txn::ErrorInner as TxnErrorInner; fn dummy_waiter(start_ts: TimeStamp, lock_ts: TimeStamp, hash: u64) -> Waiter { Waiter { start_ts, - cb: StorageCallback::Boolean(Box::new(|_| ())), - pr: ProcessResult::Res, - lock: Lock { ts: lock_ts, hash }, + wait_info: KeyLockWaitInfo { + key: Key::from_raw(b""), + lock_digest: LockDigest { ts: lock_ts, hash }, + lock_info: Default::default(), + }, + cancel_callback: Box::new(|_| ()), diag_ctx: DiagnosticContext::default(), delay: Delay::new(Instant::now()), start_waiting_time: Instant::now(), @@ -741,9 +766,7 @@ pub mod tests { pub(crate) type WaiterCtx = ( Waiter, LockInfo, - futures::channel::oneshot::Receiver< - Result, StorageError>, - >, + futures::channel::oneshot::Receiver, ); pub(crate) fn new_test_waiter( @@ -751,29 +774,48 @@ pub mod tests { lock_ts: TimeStamp, lock_hash: u64, ) -> WaiterCtx { - let raw_key = b"foo".to_vec(); + new_test_waiter_impl(waiter_ts, lock_ts, None, Some(lock_hash)) + } + + pub(crate) fn new_test_waiter_with_key( + waiter_ts: TimeStamp, + lock_ts: TimeStamp, + key: &[u8], + ) -> WaiterCtx { + new_test_waiter_impl(waiter_ts, lock_ts, Some(key), None) + } + + fn new_test_waiter_impl( + waiter_ts: TimeStamp, + lock_ts: TimeStamp, + key: Option<&[u8]>, + lock_hash: Option, + ) -> WaiterCtx { + let raw_key = key.unwrap_or(b"foo").to_vec(); + let lock_hash = lock_hash.unwrap_or_else(|| Key::from_raw(&raw_key).gen_hash()); let primary = b"bar".to_vec(); let mut info = LockInfo::default(); - info.set_key(raw_key); + info.set_key(raw_key.clone()); info.set_lock_version(lock_ts.into_inner()); info.set_primary_lock(primary); info.set_lock_ttl(3000); info.set_txn_size(16); - let pr = ProcessResult::PessimisticLockRes { - res: Err(StorageError::from(TxnError::from(MvccError::from( - MvccErrorInner::KeyIsLocked(info.clone()), - )))), - }; - let lock = Lock { + let lock = LockDigest { ts: lock_ts, hash: lock_hash, }; let (cb, f) = paired_future_callback(); let waiter = Waiter::new( + 1, + Default::default(), + 1, waiter_ts, - StorageCallback::PessimisticLock(cb), - pr, - lock, + KeyLockWaitInfo { + key: Key::from_raw(&raw_key), + lock_digest: lock, + lock_info: info.clone(), + }, + cb, Instant::now() + Duration::from_millis(3000), DiagnosticContext::default(), Instant::now(), @@ -781,71 +823,25 @@ pub mod tests { (waiter, info, f) } - #[test] - fn test_waiter_extract_key_info() { - let (mut waiter, mut lock_info, _) = new_test_waiter(10.into(), 20.into(), 20); - assert_eq!( - waiter.extract_key_info(), - (lock_info.take_key(), lock_info.take_primary_lock()) - ); - - let (mut waiter, mut lock_info, _) = new_test_waiter(10.into(), 20.into(), 20); - waiter.conflict_with(20.into(), 30.into()); - assert_eq!( - waiter.extract_key_info(), - (lock_info.take_key(), lock_info.take_primary_lock()) - ); - } - - pub(crate) fn expect_key_is_locked( - res: Result, - lock_info: LockInfo, - ) { - match res { - Err(StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + pub(crate) fn expect_key_is_locked(error: StorageError, lock_info: LockInfo) { + match error { + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( MvccError(box MvccErrorInner::KeyIsLocked(res)), - ))))) => assert_eq!(res, lock_info), + )))) => assert_eq!(res, lock_info), e => panic!("unexpected error: {:?}", e), } } - pub(crate) fn expect_write_conflict( - res: Result, - waiter_ts: TimeStamp, - mut lock_info: LockInfo, - commit_ts: TimeStamp, - ) { - match res { - Err(StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( - MvccError(box MvccErrorInner::WriteConflict { - start_ts, - conflict_start_ts, - conflict_commit_ts, - key, - primary, - .. - }), - ))))) => { - assert_eq!(start_ts, waiter_ts); - assert_eq!(conflict_start_ts, lock_info.get_lock_version().into()); - assert_eq!(conflict_commit_ts, commit_ts); - assert_eq!(key, lock_info.take_key()); - assert_eq!(primary, lock_info.take_primary_lock()); - } - e => panic!("unexpected error: {:?}", e), - } - } - - pub(crate) fn expect_deadlock( - res: Result, + pub(crate) fn expect_deadlock( + error: StorageError, waiter_ts: TimeStamp, mut lock_info: LockInfo, deadlock_hash: u64, expect_wait_chain: &[(u64, u64, &[u8], &[u8])], /* (waiter_ts, wait_for_ts, key, * resource_group_tag) */ ) { - match res { - Err(StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + match error { + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( MvccError(box MvccErrorInner::Deadlock { start_ts, lock_ts, @@ -853,7 +849,7 @@ pub mod tests { deadlock_key_hash, wait_chain, }), - ))))) => { + )))) => { assert_eq!(start_ts, waiter_ts); assert_eq!(lock_ts, lock_info.get_lock_version().into()); assert_eq!(lock_key, lock_info.take_key()); @@ -900,41 +896,21 @@ pub mod tests { #[test] fn test_waiter_notify() { let (waiter, lock_info, f) = new_test_waiter(10.into(), 20.into(), 20); - waiter.notify(); - expect_key_is_locked(block_on(f).unwrap().unwrap(), lock_info); - - // A waiter can conflict with other transactions more than once. - for conflict_times in 1..=3 { - let waiter_ts = TimeStamp::new(10); - let mut lock_ts = TimeStamp::new(20); - let (mut waiter, mut lock_info, f) = new_test_waiter(waiter_ts, lock_ts, 20); - let mut conflict_commit_ts = TimeStamp::new(30); - for _ in 0..conflict_times { - waiter.conflict_with(*lock_ts.incr(), *conflict_commit_ts.incr()); - lock_info.set_lock_version(lock_ts.into_inner()); - } - waiter.notify(); - expect_write_conflict( - block_on(f).unwrap(), - waiter_ts, - lock_info, - conflict_commit_ts, - ); - } + waiter.cancel_for_timeout(false); + expect_key_is_locked(block_on(f).unwrap(), lock_info); // Deadlock let waiter_ts = TimeStamp::new(10); - let (mut waiter, lock_info, f) = new_test_waiter(waiter_ts, 20.into(), 20); - waiter.deadlock_with(111, vec![]); - waiter.notify(); - expect_deadlock(block_on(f).unwrap(), waiter_ts, lock_info, 111, &[]); - - // Conflict then deadlock. - let waiter_ts = TimeStamp::new(10); - let (mut waiter, lock_info, f) = new_test_waiter(waiter_ts, 20.into(), 20); - waiter.conflict_with(20.into(), 30.into()); - waiter.deadlock_with(111, vec![]); - waiter.notify(); + let (waiter, lock_info, f) = new_test_waiter(waiter_ts, 20.into(), 20); + waiter.cancel_for_deadlock( + LockDigest { + ts: 20.into(), + hash: 20, + }, + b"foo".to_vec(), + 111, + vec![], + ); expect_deadlock(block_on(f).unwrap(), waiter_ts, lock_info, 111, &[]); } @@ -953,7 +929,7 @@ pub mod tests { waiter.reset_timeout(Instant::now() + Duration::from_millis(100)); let (tx, rx) = mpsc::sync_channel(1); let f = waiter.on_timeout(move || tx.send(1).unwrap()); - waiter.notify(); + waiter.cancel_for_timeout(false); assert_elapsed(|| block_on(f), 0, 200); rx.try_recv().unwrap_err(); } @@ -963,33 +939,33 @@ pub mod tests { let mut wait_table = WaitTable::new(Arc::new(AtomicUsize::new(0))); let mut waiter_info = Vec::new(); let mut rng = rand::thread_rng(); - for _ in 0..20 { + for i in 0..20 { let waiter_ts = rng.gen::().into(); - let lock = Lock { + let lock = LockDigest { ts: rng.gen::().into(), hash: rng.gen(), }; - // Avoid adding duplicated waiter. - if wait_table - .add_waiter(dummy_waiter(waiter_ts, lock.ts, lock.hash)) - .is_none() - { - waiter_info.push((waiter_ts, lock)); - } + wait_table.add_waiter( + LockWaitToken(Some(i)), + dummy_waiter(waiter_ts, lock.ts, lock.hash), + ); + waiter_info.push((waiter_ts, lock)); } assert_eq!(wait_table.count(), waiter_info.len()); for (waiter_ts, lock) in waiter_info { - let waiter = wait_table.remove_waiter(lock, waiter_ts).unwrap(); + let waiter = wait_table + .take_waiter_by_lock_digest(lock, waiter_ts) + .unwrap(); assert_eq!(waiter.start_ts, waiter_ts); - assert_eq!(waiter.lock, lock); + assert_eq!(waiter.wait_info.lock_digest, lock); } assert_eq!(wait_table.count(), 0); assert!(wait_table.wait_table.is_empty()); assert!( wait_table - .remove_waiter( - Lock { + .take_waiter_by_lock_digest( + LockDigest { ts: TimeStamp::zero(), hash: 0, }, @@ -999,87 +975,53 @@ pub mod tests { ); } - #[test] - fn test_wait_table_add_duplicated_waiter() { - let mut wait_table = WaitTable::new(Arc::new(AtomicUsize::new(0))); - let waiter_ts = 10.into(); - let lock = Lock { - ts: 20.into(), - hash: 20, - }; - assert!( - wait_table - .add_waiter(dummy_waiter(waiter_ts, lock.ts, lock.hash)) - .is_none() - ); - let waiter = wait_table - .add_waiter(dummy_waiter(waiter_ts, lock.ts, lock.hash)) - .unwrap(); - assert_eq!(waiter.start_ts, waiter_ts); - assert_eq!(waiter.lock, lock); - } - - #[test] - fn test_wait_table_remove_oldest_waiter() { - let mut wait_table = WaitTable::new(Arc::new(AtomicUsize::new(0))); - let lock = Lock { - ts: 10.into(), - hash: 10, - }; - let waiter_count = 10; - let mut waiters_ts: Vec = (0..waiter_count).map(TimeStamp::from).collect(); - waiters_ts.shuffle(&mut rand::thread_rng()); - for ts in waiters_ts.iter() { - wait_table.add_waiter(dummy_waiter(*ts, lock.ts, lock.hash)); - } - assert_eq!(wait_table.count(), waiters_ts.len()); - waiters_ts.sort(); - for (i, ts) in waiters_ts.into_iter().enumerate() { - let (oldest, others) = wait_table.remove_oldest_waiter(lock).unwrap(); - assert_eq!(oldest.start_ts, ts); - assert_eq!(others.len(), waiter_count as usize - i - 1); - } - // There is no waiter in the wait table but there is an entry in it. - assert_eq!(wait_table.count(), 0); - assert_eq!(wait_table.wait_table.len(), 1); - wait_table.remove(lock); - assert!(wait_table.wait_table.is_empty()); - } - #[test] fn test_wait_table_is_empty() { let waiter_count = Arc::new(AtomicUsize::new(0)); let mut wait_table = WaitTable::new(Arc::clone(&waiter_count)); - let lock = Lock { + let lock = LockDigest { ts: 2.into(), hash: 2, }; - wait_table.add_waiter(dummy_waiter(1.into(), lock.ts, lock.hash)); + wait_table.add_waiter( + LockWaitToken(Some(1)), + dummy_waiter(1.into(), lock.ts, lock.hash), + ); // Increase waiter_count manually and assert the previous value is zero assert_eq!(waiter_count.fetch_add(1, Ordering::SeqCst), 0); - // Adding a duplicated waiter shouldn't increase waiter count. - waiter_count.fetch_add(1, Ordering::SeqCst); - wait_table.add_waiter(dummy_waiter(1.into(), lock.ts, lock.hash)); - assert_eq!(waiter_count.load(Ordering::SeqCst), 1); - // Remove the waiter. - wait_table.remove_waiter(lock, 1.into()).unwrap(); + wait_table + .take_waiter_by_lock_digest(lock, 1.into()) + .unwrap(); assert_eq!(waiter_count.load(Ordering::SeqCst), 0); // Removing a non-existed waiter shouldn't decrease waiter count. - assert!(wait_table.remove_waiter(lock, 1.into()).is_none()); + assert!( + wait_table + .take_waiter_by_lock_digest(lock, 1.into()) + .is_none() + ); assert_eq!(waiter_count.load(Ordering::SeqCst), 0); - wait_table.add_waiter(dummy_waiter(1.into(), lock.ts, lock.hash)); - wait_table.add_waiter(dummy_waiter(2.into(), lock.ts, lock.hash)); + wait_table.add_waiter( + LockWaitToken(Some(2)), + dummy_waiter(1.into(), lock.ts, lock.hash), + ); + wait_table.add_waiter( + LockWaitToken(Some(3)), + dummy_waiter(2.into(), lock.ts, lock.hash), + ); waiter_count.fetch_add(2, Ordering::SeqCst); - wait_table.remove_oldest_waiter(lock).unwrap(); + wait_table.take_waiter(LockWaitToken(Some(3))).unwrap(); assert_eq!(waiter_count.load(Ordering::SeqCst), 1); - wait_table.remove_oldest_waiter(lock).unwrap(); + wait_table.take_waiter(LockWaitToken(Some(2))).unwrap(); assert_eq!(waiter_count.load(Ordering::SeqCst), 0); - wait_table.remove(lock); // Removing a non-existed waiter shouldn't decrease waiter count. - assert!(wait_table.remove_oldest_waiter(lock).is_none()); + assert!( + wait_table + .take_waiter_by_lock_digest(lock, 1.into()) + .is_none() + ); assert_eq!(waiter_count.load(Ordering::SeqCst), 0); } @@ -1090,7 +1032,10 @@ pub mod tests { for i in 1..5 { for j in 0..i { - wait_table.add_waiter(dummy_waiter((i * 10 + j).into(), i.into(), j)); + wait_table.add_waiter( + LockWaitToken(Some(i * 10 + j)), + dummy_waiter((i * 10 + j).into(), i.into(), j), + ); } } @@ -1135,15 +1080,18 @@ pub mod tests { // Default timeout let (waiter, lock_info, f) = new_test_waiter(10.into(), 20.into(), 20); scheduler.wait_for( + LockWaitToken(Some(1)), + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, WaitTimeout::Millis(1000), + waiter.cancel_callback, DiagnosticContext::default(), ); assert_elapsed( - || expect_key_is_locked(block_on(f).unwrap().unwrap(), lock_info), + || expect_key_is_locked(block_on(f).unwrap(), lock_info), 900, 1200, ); @@ -1151,15 +1099,18 @@ pub mod tests { // Custom timeout let (waiter, lock_info, f) = new_test_waiter(20.into(), 30.into(), 30); scheduler.wait_for( + LockWaitToken(Some(2)), + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, WaitTimeout::Millis(100), + waiter.cancel_callback, DiagnosticContext::default(), ); assert_elapsed( - || expect_key_is_locked(block_on(f).unwrap().unwrap(), lock_info), + || expect_key_is_locked(block_on(f).unwrap(), lock_info), 50, 300, ); @@ -1167,15 +1118,18 @@ pub mod tests { // Timeout can't exceed wait_for_lock_timeout let (waiter, lock_info, f) = new_test_waiter(30.into(), 40.into(), 40); scheduler.wait_for( + LockWaitToken(Some(3)), + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, WaitTimeout::Millis(3000), + waiter.cancel_callback, DiagnosticContext::default(), ); assert_elapsed( - || expect_key_is_locked(block_on(f).unwrap().unwrap(), lock_info), + || expect_key_is_locked(block_on(f).unwrap(), lock_info), 900, 1200, ); @@ -1183,160 +1137,29 @@ pub mod tests { worker.stop().unwrap(); } - #[test] - fn test_waiter_manager_wake_up() { - let (wait_for_lock_timeout, wake_up_delay_duration) = (1000, 100); - let (mut worker, scheduler) = - start_waiter_manager(wait_for_lock_timeout, wake_up_delay_duration); - - // Waiters waiting for different locks should be waked up immediately. - let lock_ts = 10.into(); - let lock_hashes = vec![10, 11, 12]; - let waiters_ts = vec![20.into(), 30.into(), 40.into()]; - let mut waiters_info = vec![]; - for (&lock_hash, &waiter_ts) in lock_hashes.iter().zip(waiters_ts.iter()) { - let (waiter, lock_info, f) = new_test_waiter(waiter_ts, lock_ts, lock_hash); - scheduler.wait_for( - waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, - WaitTimeout::Millis(wait_for_lock_timeout), - DiagnosticContext::default(), - ); - waiters_info.push((waiter_ts, lock_info, f)); - } - let commit_ts = 15.into(); - scheduler.wake_up(lock_ts, lock_hashes, commit_ts); - for (waiter_ts, lock_info, f) in waiters_info { - assert_elapsed( - || expect_write_conflict(block_on(f).unwrap(), waiter_ts, lock_info, commit_ts), - 0, - 200, - ); - } - - // Multiple waiters are waiting for one lock. - let mut lock = Lock { - ts: 10.into(), - hash: 10, - }; - let mut waiters_ts: Vec = (20..25).map(TimeStamp::from).collect(); - // Waiters are added in arbitrary order. - waiters_ts.shuffle(&mut rand::thread_rng()); - let mut waiters_info = vec![]; - for waiter_ts in waiters_ts { - let (waiter, lock_info, f) = new_test_waiter(waiter_ts, lock.ts, lock.hash); - scheduler.wait_for( - waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, - WaitTimeout::Millis(wait_for_lock_timeout), - DiagnosticContext::default(), - ); - waiters_info.push((waiter_ts, lock_info, f)); - } - waiters_info.sort_by_key(|(ts, ..)| *ts); - let mut commit_ts = 30.into(); - // Each waiter should be waked up immediately in order. - for (waiter_ts, mut lock_info, f) in waiters_info.drain(..waiters_info.len() - 1) { - scheduler.wake_up(lock.ts, vec![lock.hash], commit_ts); - lock_info.set_lock_version(lock.ts.into_inner()); - assert_elapsed( - || expect_write_conflict(block_on(f).unwrap(), waiter_ts, lock_info, commit_ts), - 0, - 200, - ); - // Now the lock is held by the waked up transaction. - lock.ts = waiter_ts; - commit_ts.incr(); - } - // Last waiter isn't waked up by other transactions. It will be waked up after - // wake_up_delay_duration. - let (waiter_ts, mut lock_info, f) = waiters_info.pop().unwrap(); - // It conflicts with the last transaction. - lock_info.set_lock_version(lock.ts.into_inner() - 1); - assert_elapsed( - || { - expect_write_conflict( - block_on(f).unwrap(), - waiter_ts, - lock_info, - *commit_ts.decr(), - ) - }, - wake_up_delay_duration - 50, - wake_up_delay_duration + 200, - ); - - // The max lifetime of waiter is its timeout. - let lock = Lock { - ts: 10.into(), - hash: 10, - }; - let (waiter1, lock_info1, f1) = new_test_waiter(20.into(), lock.ts, lock.hash); - scheduler.wait_for( - waiter1.start_ts, - waiter1.cb, - waiter1.pr, - waiter1.lock, - WaitTimeout::Millis(wait_for_lock_timeout), - DiagnosticContext::default(), - ); - let (waiter2, lock_info2, f2) = new_test_waiter(30.into(), lock.ts, lock.hash); - // Waiter2's timeout is 50ms which is less than wake_up_delay_duration. - scheduler.wait_for( - waiter2.start_ts, - waiter2.cb, - waiter2.pr, - waiter2.lock, - WaitTimeout::Millis(50), - DiagnosticContext::default(), - ); - let commit_ts = 15.into(); - let (tx, rx) = mpsc::sync_channel(1); - std::thread::spawn(move || { - // Waiters2's lifetime can't exceed it timeout. - assert_elapsed( - || expect_write_conflict(block_on(f2).unwrap(), 30.into(), lock_info2, 15.into()), - 30, - 100, - ); - tx.send(()).unwrap(); - }); - // It will increase waiter2's timeout to wake_up_delay_duration. - scheduler.wake_up(lock.ts, vec![lock.hash], commit_ts); - assert_elapsed( - || expect_write_conflict(block_on(f1).unwrap(), 20.into(), lock_info1, commit_ts), - 0, - 200, - ); - rx.recv().unwrap(); - - worker.stop().unwrap(); - } - #[test] fn test_waiter_manager_deadlock() { let (mut worker, scheduler) = start_waiter_manager(1000, 100); let (waiter_ts, lock) = ( 10.into(), - Lock { + LockDigest { ts: 20.into(), hash: 20, }, ); let (waiter, lock_info, f) = new_test_waiter(waiter_ts, lock.ts, lock.hash); scheduler.wait_for( + LockWaitToken(Some(1)), + 1, + RegionEpoch::default(), + 1, waiter.start_ts, - waiter.cb, - waiter.pr, - waiter.lock, + waiter.wait_info, WaitTimeout::Millis(1000), + waiter.cancel_callback, DiagnosticContext::default(), ); - scheduler.deadlock(waiter_ts, lock, 30, vec![]); + scheduler.deadlock(waiter_ts, b"foo".to_vec(), lock, 30, vec![]); assert_elapsed( || expect_deadlock(block_on(f).unwrap(), waiter_ts, lock_info, 30, &[]), 0, @@ -1344,67 +1167,4 @@ pub mod tests { ); worker.stop().unwrap(); } - - #[test] - fn test_waiter_manager_with_duplicated_waiters() { - let (mut worker, scheduler) = start_waiter_manager(1000, 100); - let (waiter_ts, lock) = ( - 10.into(), - Lock { - ts: 20.into(), - hash: 20, - }, - ); - let (waiter1, lock_info1, f1) = new_test_waiter(waiter_ts, lock.ts, lock.hash); - scheduler.wait_for( - waiter1.start_ts, - waiter1.cb, - waiter1.pr, - waiter1.lock, - WaitTimeout::Millis(1000), - DiagnosticContext::default(), - ); - let (waiter2, lock_info2, f2) = new_test_waiter(waiter_ts, lock.ts, lock.hash); - scheduler.wait_for( - waiter2.start_ts, - waiter2.cb, - waiter2.pr, - waiter2.lock, - WaitTimeout::Millis(1000), - DiagnosticContext::default(), - ); - // Should notify duplicated waiter immediately. - assert_elapsed( - || expect_key_is_locked(block_on(f1).unwrap().unwrap(), lock_info1), - 0, - 200, - ); - // The new waiter will be wake up after timeout. - assert_elapsed( - || expect_key_is_locked(block_on(f2).unwrap().unwrap(), lock_info2), - 900, - 1200, - ); - - worker.stop().unwrap(); - } - - #[bench] - fn bench_wake_up_small_table_against_big_hashes(b: &mut test::Bencher) { - let detect_worker = FutureWorker::new("dummy-deadlock"); - let detector_scheduler = DetectorScheduler::new(detect_worker.scheduler()); - let mut waiter_mgr = WaiterManager::new( - Arc::new(AtomicUsize::new(0)), - detector_scheduler, - &Config::default(), - ); - waiter_mgr - .wait_table - .borrow_mut() - .add_waiter(dummy_waiter(10.into(), 20.into(), 10000)); - let hashes: Vec = (0..1000).collect(); - b.iter(|| { - waiter_mgr.handle_wake_up(20.into(), hashes.clone(), 30.into()); - }); - } } diff --git a/src/server/server.rs b/src/server/server.rs index 23c52793c5f..992b5cf6fa0 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -441,7 +441,7 @@ mod tests { config::CoprReadPoolConfig, coprocessor::{self, readpool_impl}, server::TestRaftStoreRouter, - storage::{lock_manager::DummyLockManager, TestStorageBuilderApiV1}, + storage::{lock_manager::MockLockManager, TestStorageBuilderApiV1}, }; #[derive(Clone)] @@ -490,7 +490,7 @@ mod tests { ..Default::default() }; - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 46ed24fde70..24a61876f44 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -10,9 +10,6 @@ //! timeout. [`LockWaitContext`] is therefore used to share the necessary state //! of a single `AcquirePessimisticLock` request, and ensuring the internal //! callback for returning response through RPC is called at most only once. -//! -//! Note: The corresponding implementation in `WaiterManager` is not yet -//! implemented, and this mod is currently not used yet. use std::{convert::TryInto, result::Result, sync::Arc}; @@ -138,10 +135,9 @@ impl LockWaitContext { return; } } else { - // TODO: Uncomment this after the corresponding change of - // `LockManager` is done. self.lock_wait_queues. - // get_lock_mgr() .remove_lock_wait(ctx_inner. - // lock_wait_token); + self.lock_wait_queues + .get_lock_mgr() + .remove_lock_wait(self.shared_states.lock_wait_token); } // When this is executed, the waiter is either woken up from the queue or @@ -172,7 +168,7 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::{lock_waiting_queue::LockWaitEntry, DummyLockManager}, + lock_manager::{lock_waiting_queue::LockWaitEntry, MockLockManager}, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error as TxnError, ErrorInner as TxnErrorInner}, types::PessimisticLockParameters, @@ -197,7 +193,7 @@ mod tests { Receiver>>, ) { let (cb, rx) = create_storage_cb(); - let token = LockWaitToken(Some(1)); + let token = lock_wait_queues.get_lock_mgr().allocate_token(); let ctx = LockWaitContext::new(key.clone(), lock_wait_queues.clone(), token, cb, false); (token, ctx, rx) } @@ -226,7 +222,7 @@ mod tests { // TODO: Use `ProxyLockMgr` to check the correctness of the `remove_lock_wait` // invocation. - let lock_wait_queues = LockWaitQueues::new(DummyLockManager {}); + let lock_wait_queues = LockWaitQueues::new(MockLockManager::new()); let (_, ctx, rx) = create_test_lock_wait_ctx(&key, &lock_wait_queues); // Nothing happens currently. diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 3651ce21c1c..16b3787bd7e 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -212,15 +212,15 @@ impl KeyLockWaitState { pub type DelayedNotifyAllFuture = Pin>> + Send>>; -pub struct LockWaitQueueInner { +pub struct LockWaitQueueInner { queue_map: dashmap::DashMap, id_allocated: AtomicU64, + lock_mgr: L, } #[derive(Clone)] pub struct LockWaitQueues { - inner: Arc, - lock_mgr: L, + inner: Arc>, } impl LockWaitQueues { @@ -229,8 +229,8 @@ impl LockWaitQueues { inner: Arc::new(LockWaitQueueInner { queue_map: dashmap::DashMap::new(), id_allocated: AtomicU64::new(1), + lock_mgr, }), - lock_mgr, } } @@ -547,7 +547,7 @@ impl LockWaitQueues { #[allow(dead_code)] pub(super) fn get_lock_mgr(&self) -> &L { - &self.lock_mgr + &self.inner.lock_mgr } #[cfg(test)] @@ -582,7 +582,7 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::{lock_wait_context::LockWaitContext, DummyLockManager, WaitTimeout}, + lock_manager::{lock_wait_context::LockWaitContext, MockLockManager, WaitTimeout}, txn::ErrorInner as TxnErrorInner, ErrorInner as StorageErrorInner, StorageCallback, }; @@ -639,7 +639,7 @@ mod tests { lock_info_pb: kvrpcpb::LockInfo, ) -> (Box, TestLockWaitEntryHandle) { let start_ts = start_ts.into(); - let token = LockWaitToken(Some(self.allocate_internal_id())); + let token = self.inner.lock_mgr.allocate_token(); let dummy_request_cb = StorageCallback::PessimisticLock(Box::new(|_| ())); let dummy_ctx = LockWaitContext::new( Key::from_raw(key), @@ -830,7 +830,7 @@ mod tests { #[test] fn test_simple_push_pop() { - let queues = LockWaitQueues::new(DummyLockManager {}); + let queues = LockWaitQueues::new(MockLockManager::new()); queues.mock_lock_wait(b"k1", 10, 5, false); queues.mock_lock_wait(b"k2", 11, 5, false); @@ -852,7 +852,7 @@ mod tests { #[test] fn test_popping_priority() { - let queues = LockWaitQueues::new(DummyLockManager {}); + let queues = LockWaitQueues::new(MockLockManager::new()); queues.mock_lock_wait(b"k1", 10, 5, false); queues.mock_lock_wait(b"k1", 20, 5, false); @@ -874,7 +874,7 @@ mod tests { #[test] fn test_removing_by_token() { - let queues = LockWaitQueues::new(DummyLockManager {}); + let queues = LockWaitQueues::new(MockLockManager::new()); queues.mock_lock_wait(b"k1", 10, 5, false); let token11 = queues.mock_lock_wait(b"k1", 11, 5, false).token; @@ -915,7 +915,7 @@ mod tests { #[test] fn test_dropping_cancelled_entries() { - let queues = LockWaitQueues::new(DummyLockManager {}); + let queues = LockWaitQueues::new(MockLockManager::new()); let h10 = queues.mock_lock_wait(b"k1", 10, 5, false); let h11 = queues.mock_lock_wait(b"k1", 11, 5, false); @@ -941,7 +941,7 @@ mod tests { #[tokio::test] async fn test_delayed_notify_all() { - let queues = LockWaitQueues::new(DummyLockManager {}); + let queues = LockWaitQueues::new(MockLockManager::new()); queues.mock_lock_wait(b"k1", 8, 5, false); diff --git a/src/storage/lock_manager/mod.rs b/src/storage/lock_manager/mod.rs index 235a31c3710..3ba9c7f7905 100644 --- a/src/storage/lock_manager/mod.rs +++ b/src/storage/lock_manager/mod.rs @@ -1,20 +1,34 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{ + fmt::{Debug, Formatter}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::Duration, +}; +use collections::HashMap; +use kvproto::{kvrpcpb::LockInfo, metapb::RegionEpoch}; +use parking_lot::Mutex; use tracker::TrackerToken; -use txn_types::TimeStamp; +use txn_types::{Key, TimeStamp}; use crate::{ server::lock_manager::{waiter_manager, waiter_manager::Callback}, - storage::{txn::ProcessResult, types::StorageCallback}, + storage::{ + mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, + txn::Error as TxnError, + Error as StorageError, + }, }; pub mod lock_wait_context; pub mod lock_waiting_queue; #[derive(Clone, Copy, PartialEq, Debug, Default)] -pub struct Lock { +pub struct LockDigest { pub ts: TimeStamp, pub hash: u64, } @@ -32,6 +46,16 @@ pub struct DiagnosticContext { pub tracker: TrackerToken, } +impl Debug for DiagnosticContext { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DiagnosticContext") + .field("key", &log_wrappers::Value::key(&self.key)) + // TODO: Perhaps the resource group tag don't need to be a secret + .field("resource_group_tag", &log_wrappers::Value::key(&self.resource_group_tag)) + .finish() + } +} + /// Time to wait for lock released when encountering locks. #[derive(Clone, Copy, PartialEq, Debug)] pub enum WaitTimeout { @@ -67,9 +91,14 @@ impl From for WaitTimeout { } } +#[derive(Debug, Clone)] +pub struct KeyLockWaitInfo { + pub key: Key, + pub lock_digest: LockDigest, + pub lock_info: LockInfo, +} + /// Uniquely identifies a lock-waiting request in a `LockManager`. -/// -/// Not used yet, but necessary for implementing `LockWaitQueues`. #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] pub struct LockWaitToken(pub Option); @@ -79,10 +108,27 @@ impl LockWaitToken { } } +#[derive(Debug)] +pub struct UpdateWaitForEvent { + pub token: LockWaitToken, + pub start_ts: TimeStamp, + pub is_first_lock: bool, + pub wait_info: KeyLockWaitInfo, + pub diag_ctx: DiagnosticContext, +} + /// `LockManager` manages transactions waiting for locks held by other /// transactions. It has responsibility to handle deadlocks between /// transactions. -pub trait LockManager: Clone + Send + 'static { +pub trait LockManager: Clone + Send + Sync + 'static { + /// Allocates a token for identifying a specific lock-waiting relationship. + /// Use this to allocate a token before invoking `wait_for`. + /// + /// Since some information required by `wait_for` need to be initialized by + /// the token, allocating token is therefore separated to a single + /// function instead of internally allocated in `wait_for`. + fn allocate_token(&self) -> LockWaitToken; + /// Transaction with `start_ts` waits for `lock` released. /// /// If the lock is released or waiting times out or deadlock occurs, the @@ -93,24 +139,22 @@ pub trait LockManager: Clone + Send + 'static { /// in deadlock. fn wait_for( &self, + token: LockWaitToken, + region_id: u64, + region_epoch: RegionEpoch, + term: u64, start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, + cancel_callback: Box, diag_ctx: DiagnosticContext, ); - /// The locks with `lock_ts` and `hashes` are released, tries to wake up - /// transactions. - fn wake_up( - &self, - lock_ts: TimeStamp, - hashes: Vec, - commit_ts: TimeStamp, - is_pessimistic_txn: bool, - ); + fn update_wait_for(&self, updated_items: Vec); + + /// Remove a waiter specified by token. + fn remove_lock_wait(&self, token: LockWaitToken); /// Returns true if there are waiters in the `LockManager`. /// @@ -124,31 +168,66 @@ pub trait LockManager: Clone + Send + 'static { // For test #[derive(Clone)] -pub struct DummyLockManager; +pub struct MockLockManager { + allocated_token: Arc, + waiters: + Arc)>>>, +} + +impl MockLockManager { + pub fn new() -> Self { + Self { + allocated_token: Arc::new(AtomicU64::new(1)), + waiters: Arc::new(Mutex::new(HashMap::default())), + } + } +} + +// Make the linter happy. +impl Default for MockLockManager { + fn default() -> Self { + Self::new() + } +} + +impl LockManager for MockLockManager { + fn allocate_token(&self) -> LockWaitToken { + LockWaitToken(Some(self.allocated_token.fetch_add(1, Ordering::Relaxed))) + } -impl LockManager for DummyLockManager { fn wait_for( &self, + token: LockWaitToken, + _region_id: u64, + _region_epoch: RegionEpoch, + _term: u64, _start_ts: TimeStamp, - _cb: StorageCallback, - _pr: ProcessResult, - _lock: Lock, + wait_info: KeyLockWaitInfo, _is_first_lock: bool, - _wait_timeout: Option, + _timeout: Option, + cancel_callback: Box, _diag_ctx: DiagnosticContext, ) { + self.waiters + .lock() + .insert(token, (wait_info, cancel_callback)); } - fn wake_up( - &self, - _lock_ts: TimeStamp, - _hashes: Vec, - _commit_ts: TimeStamp, - _is_pessimistic_txn: bool, - ) { - } + fn update_wait_for(&self, _updated_items: Vec) {} + + fn remove_lock_wait(&self, _token: LockWaitToken) {} fn dump_wait_for_entries(&self, cb: Callback) { cb(vec![]) } } + +impl MockLockManager { + pub fn simulate_timeout_all(&self) { + let mut map = self.waiters.lock(); + for (_, (wait_info, cancel_callback)) in map.drain() { + let error = MvccError::from(MvccErrorInner::KeyIsLocked(wait_info.lock_info)); + cancel_callback(StorageError::from(TxnError::from(error))); + } + } +} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 84d52b6990a..33d1c4ddf97 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -65,7 +65,7 @@ use std::{ iter, marker::PhantomData, sync::{ - atomic::{self, AtomicBool}, + atomic::{self, AtomicBool, AtomicU64}, Arc, }, }; @@ -116,7 +116,7 @@ use crate::{ storage::{ config::Config, kv::{with_tls_engine, Modify, WriteData}, - lock_manager::{DummyLockManager, LockManager}, + lock_manager::{LockManager, MockLockManager}, metrics::{CommandKind, *}, mvcc::{MvccReader, PointGetterBuilder}, txn::{ @@ -2828,6 +2828,7 @@ pub async fn get_causal_ts( pub struct DynamicConfigs { pub pipelined_pessimistic_lock: Arc, pub in_memory_pessimistic_lock: Arc, + pub wake_up_delay_duration_ms: Arc, } fn get_priority_tag(priority: CommandPri) -> CommandPriority { @@ -2923,6 +2924,7 @@ pub struct TestStorageBuilder { config: Config, pipelined_pessimistic_lock: Arc, in_memory_pessimistic_lock: Arc, + wake_up_delay_duration_ms: Arc, lock_mgr: L, resource_tag_factory: ResourceTagFactory, _phantom: PhantomData, @@ -2932,9 +2934,9 @@ pub struct TestStorageBuilder { /// To be convenience for test cases unrelated to RawKV. pub type TestStorageBuilderApiV1 = TestStorageBuilder; -impl TestStorageBuilder { +impl TestStorageBuilder { /// Build `Storage`. - pub fn new(lock_mgr: DummyLockManager) -> Self { + pub fn new(lock_mgr: MockLockManager) -> Self { let engine = TestEngineBuilder::new() .api_version(F::TAG) .build() @@ -3055,6 +3057,8 @@ impl TestStorageBuilder { config, pipelined_pessimistic_lock: Arc::new(AtomicBool::new(false)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + // Make it very large to avoid tests being affected by the delayed-waking-up behavior. + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(100000)), lock_mgr, resource_tag_factory: ResourceTagFactory::new_for_test(), _phantom: PhantomData, @@ -3119,6 +3123,7 @@ impl TestStorageBuilder { DynamicConfigs { pipelined_pessimistic_lock: self.pipelined_pessimistic_lock, in_memory_pessimistic_lock: self.in_memory_pessimistic_lock, + wake_up_delay_duration_ms: self.wake_up_delay_duration_ms, }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, @@ -3148,6 +3153,7 @@ impl TestStorageBuilder { DynamicConfigs { pipelined_pessimistic_lock: self.pipelined_pessimistic_lock, in_memory_pessimistic_lock: self.in_memory_pessimistic_lock, + wake_up_delay_duration_ms: self.wake_up_delay_duration_ms, }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), DummyReporter, @@ -3179,7 +3185,7 @@ pub mod test_util { }; use super::*; - use crate::storage::txn::commands; + use crate::storage::{lock_manager::WaitTimeout, txn::commands}; pub fn expect_none(x: Option) { assert_eq!(x, None); @@ -3283,7 +3289,7 @@ pub mod test_util { 3000, false, for_update_ts, - None, + Some(WaitTimeout::Default), return_values, for_update_ts.next(), OldValues::default(), @@ -3405,9 +3411,11 @@ mod tests { use error_code::ErrorCodeExt; use errors::extract_key_error; use futures::executor::block_on; - use kvproto::kvrpcpb::{ - Assertion, AssertionLevel, CommandPri, Op, PrewriteRequestPessimisticAction::*, + use kvproto::{ + kvrpcpb::{Assertion, AssertionLevel, CommandPri, Op, PrewriteRequestPessimisticAction::*}, + metapb::RegionEpoch, }; + use parking_lot::Mutex; use tikv_util::config::ReadableSize; use tracker::INVALID_TRACKER_TOKEN; use txn_types::{Mutation, PessimisticLock, WriteType, SHORT_VALUE_MAX_LEN}; @@ -3426,8 +3434,11 @@ mod tests { kv::{ Error as KvError, ErrorInner as EngineErrorInner, ExpectedWrite, MockEngineBuilder, }, - lock_manager::{DiagnosticContext, Lock, WaitTimeout}, - mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, LockType}, + lock_manager::{ + DiagnosticContext, KeyLockWaitInfo, LockDigest, LockWaitToken, UpdateWaitForEvent, + WaitTimeout, + }, + mvcc::LockType, txn::{ commands, commands::{AcquirePessimisticLock, Prewrite}, @@ -3440,7 +3451,7 @@ mod tests { #[test] fn test_prewrite_blocks_read() { use kvproto::kvrpcpb::ExtraOp; - let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); @@ -3456,7 +3467,7 @@ mod tests { .process_write( snapshot, commands::WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: storage.concurrency_manager.clone(), extra_op: ExtraOp::Noop, statistics: &mut Statistics::default(), @@ -3478,7 +3489,7 @@ mod tests { #[test] fn test_get_put() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -3539,9 +3550,10 @@ mod tests { .cfs([CF_DEFAULT, "foo"]) .build() .unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) - .build() - .unwrap(); + let storage = + TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) + .build() + .unwrap(); let (tx, rx) = channel(); storage .sched_txn_command( @@ -3628,7 +3640,7 @@ mod tests { #[test] fn test_scan() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -3966,9 +3978,10 @@ mod tests { ) } .unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) - .build() - .unwrap(); + let storage = + TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) + .build() + .unwrap(); let (tx, rx) = channel(); storage .sched_txn_command( @@ -4195,7 +4208,7 @@ mod tests { #[test] fn test_batch_get() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4270,7 +4283,7 @@ mod tests { #[test] fn test_batch_get_command() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4358,7 +4371,7 @@ mod tests { #[test] fn test_txn() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4444,7 +4457,7 @@ mod tests { scheduler_pending_write_threshold: ReadableSize(1), ..Default::default() }; - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .config(config) .build() .unwrap(); @@ -4487,7 +4500,7 @@ mod tests { #[test] fn test_cleanup() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let cm = storage.concurrency_manager.clone(); @@ -4525,7 +4538,7 @@ mod tests { #[test] fn test_cleanup_check_ttl() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4583,7 +4596,7 @@ mod tests { #[test] fn test_flashback_to_version() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let mut ts = TimeStamp::zero(); @@ -4714,7 +4727,7 @@ mod tests { #[test] fn test_flashback_to_version_lock() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4818,7 +4831,7 @@ mod tests { #[test] fn test_flashback_to_version_in_multi_batch() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4915,7 +4928,7 @@ mod tests { #[test] fn test_high_priority_get_put() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -4972,7 +4985,7 @@ mod tests { scheduler_worker_pool_size: 1, ..Default::default() }; - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .config(config) .build() .unwrap(); @@ -5026,7 +5039,7 @@ mod tests { #[test] fn test_delete_range() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5132,7 +5145,7 @@ mod tests { } fn test_raw_get_put_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5186,7 +5199,7 @@ mod tests { } fn test_raw_checksum_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5253,7 +5266,7 @@ mod tests { ]; let k = b"r\0k".to_vec(); - let storage = TestStorageBuilder::<_, _, ApiV2>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, ApiV2>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5323,7 +5336,7 @@ mod tests { } fn test_raw_delete_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5417,7 +5430,7 @@ mod tests { } fn test_raw_delete_range_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5530,7 +5543,7 @@ mod tests { fn run_raw_batch_put( for_cas: bool, - storage: &Storage, + storage: &Storage, ctx: Context, kvpairs: Vec, ttls: Vec, @@ -5544,7 +5557,7 @@ mod tests { } fn test_raw_batch_put_impl(for_cas: bool) { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5634,7 +5647,7 @@ mod tests { } fn test_raw_batch_get_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5681,7 +5694,7 @@ mod tests { } fn test_raw_batch_get_command_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5745,7 +5758,7 @@ mod tests { fn run_raw_batch_delete( for_cas: bool, - storage: &Storage, + storage: &Storage, ctx: Context, keys: Vec>, cb: Callback<()>, @@ -5758,7 +5771,7 @@ mod tests { } fn test_raw_batch_delete_impl(for_cas: bool) { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -5872,7 +5885,7 @@ mod tests { (None, None) }; - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -6174,7 +6187,7 @@ mod tests { ]); // TODO: refactor to use `Api` parameter. assert_eq!( - >::check_key_ranges(&ranges, false,), + >::check_key_ranges(&ranges, false,), true ); @@ -6184,7 +6197,7 @@ mod tests { (b"c".to_vec(), vec![]), ]); assert_eq!( - >::check_key_ranges(&ranges, false,), + >::check_key_ranges(&ranges, false,), true ); @@ -6194,7 +6207,7 @@ mod tests { (b"c3".to_vec(), b"c".to_vec()), ]); assert_eq!( - >::check_key_ranges(&ranges, false,), + >::check_key_ranges(&ranges, false,), false ); @@ -6206,7 +6219,7 @@ mod tests { (b"a".to_vec(), vec![]), ]); assert_eq!( - >::check_key_ranges(&ranges, false,), + >::check_key_ranges(&ranges, false,), false ); @@ -6216,7 +6229,7 @@ mod tests { (b"c3".to_vec(), b"c".to_vec()), ]); assert_eq!( - >::check_key_ranges(&ranges, true,), + >::check_key_ranges(&ranges, true,), true ); @@ -6226,7 +6239,7 @@ mod tests { (b"a3".to_vec(), vec![]), ]); assert_eq!( - >::check_key_ranges(&ranges, true,), + >::check_key_ranges(&ranges, true,), true ); @@ -6236,7 +6249,7 @@ mod tests { (b"c".to_vec(), b"c3".to_vec()), ]); assert_eq!( - >::check_key_ranges(&ranges, true,), + >::check_key_ranges(&ranges, true,), false ); @@ -6246,7 +6259,7 @@ mod tests { (b"c3".to_vec(), vec![]), ]); assert_eq!( - >::check_key_ranges(&ranges, true,), + >::check_key_ranges(&ranges, true,), false ); } @@ -6271,7 +6284,7 @@ mod tests { .collect() }; - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -6512,7 +6525,7 @@ mod tests { } fn test_raw_get_key_ttl_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -6571,7 +6584,7 @@ mod tests { } fn test_raw_compare_and_swap_impl() { - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -6757,7 +6770,7 @@ mod tests { #[test] fn test_scan_lock() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -7058,7 +7071,7 @@ mod tests { fn test_resolve_lock_impl() { use crate::storage::txn::RESOLVE_LOCK_BATCH_SIZE; - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -7169,7 +7182,7 @@ mod tests { #[test] fn test_resolve_lock_lite() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -7277,7 +7290,7 @@ mod tests { #[test] fn test_txn_heart_beat() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -7364,7 +7377,7 @@ mod tests { #[test] fn test_check_txn_status() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let cm = storage.concurrency_manager.clone(); @@ -7571,7 +7584,7 @@ mod tests { #[test] fn test_check_secondary_locks() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let cm = storage.concurrency_manager.clone(); @@ -7689,7 +7702,8 @@ mod tests { } fn test_pessimistic_lock_impl(pipelined_pessimistic_lock: bool) { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let lock_mgr = MockLockManager::new(); + let storage = TestStorageBuilderApiV1::new(lock_mgr.clone()) .pipelined_pessimistic_lock(pipelined_pessimistic_lock) .build() .unwrap(); @@ -7781,8 +7795,11 @@ mod tests { }), ) .unwrap(); - // The DummyLockManager consumes the Msg::WaitForLock. + // The request enters lock waiting state. rx.recv_timeout(Duration::from_millis(100)).unwrap_err(); + lock_mgr.simulate_timeout_all(); + // The lock-waiting request is cancelled. + rx.recv().unwrap(); } // Needn't update max_ts when failing to read value @@ -7904,20 +7921,19 @@ mod tests { #[allow(clippy::large_enum_variant)] pub enum Msg { WaitFor { + token: LockWaitToken, + region_id: u64, + region_epoch: RegionEpoch, + term: u64, start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, + cancel_callback: Box, diag_ctx: DiagnosticContext, }, - - WakeUp { - lock_ts: TimeStamp, - hashes: Vec, - commit_ts: TimeStamp, - is_pessimistic_txn: bool, + RemoveLockWait { + token: LockWaitToken, }, } @@ -7925,62 +7941,58 @@ mod tests { // It's used to check whether we send right messages to lock manager. #[derive(Clone)] pub struct ProxyLockMgr { - tx: Sender, + tx: Arc>>, has_waiter: Arc, } impl ProxyLockMgr { pub fn new(tx: Sender) -> Self { Self { - tx, + tx: Arc::new(Mutex::new(tx)), has_waiter: Arc::new(AtomicBool::new(false)), } } - - pub fn set_has_waiter(&mut self, has_waiter: bool) { - self.has_waiter.store(has_waiter, Ordering::Relaxed); - } } impl LockManager for ProxyLockMgr { + fn allocate_token(&self) -> LockWaitToken { + LockWaitToken(Some(1)) + } + fn wait_for( &self, + token: LockWaitToken, + region_id: u64, + region_epoch: RegionEpoch, + term: u64, start_ts: TimeStamp, - cb: StorageCallback, - pr: ProcessResult, - lock: Lock, + wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, + cancel_callback: Box, diag_ctx: DiagnosticContext, ) { self.tx + .lock() .send(Msg::WaitFor { + token, + region_id, + region_epoch, + term, start_ts, - cb, - pr, - lock, + wait_info, is_first_lock, timeout, + cancel_callback, diag_ctx, }) .unwrap(); } - fn wake_up( - &self, - lock_ts: TimeStamp, - hashes: Vec, - commit_ts: TimeStamp, - is_pessimistic_txn: bool, - ) { - self.tx - .send(Msg::WakeUp { - lock_ts, - hashes, - commit_ts, - is_pessimistic_txn, - }) - .unwrap(); + fn update_wait_for(&self, _updated_items: Vec) {} + + fn remove_lock_wait(&self, token: LockWaitToken) { + self.tx.lock().send(Msg::RemoveLockWait { token }).unwrap(); } fn has_waiter(&self) -> bool { @@ -8049,77 +8061,114 @@ mod tests { match msg { Msg::WaitFor { start_ts, - pr, - lock, + wait_info, is_first_lock, timeout, .. } => { assert_eq!(start_ts, TimeStamp::new(20)); assert_eq!( - lock, - Lock { + wait_info.lock_digest, + LockDigest { ts: 10.into(), hash: Key::from_raw(&k).gen_hash(), } ); assert_eq!(is_first_lock, true); assert_eq!(timeout, Some(WaitTimeout::Millis(100))); - match pr { - ProcessResult::PessimisticLockRes { res } => match res { - Err(Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( - MvccError(box MvccErrorInner::KeyIsLocked(info)), - ))))) => { - assert_eq!(info.get_key(), k.as_slice()); - assert_eq!(info.get_primary_lock(), k.as_slice()); - assert_eq!(info.get_lock_version(), 10); - } - _ => panic!("unexpected error"), - }, - _ => panic!("unexpected process result"), - }; } _ => panic!("unexpected msg"), } } - // Test whether `Storage` sends right wake-up msgs to `LockManager` + // Test whether `Storage` correctly wakes up lock-waiting requests #[test] - fn validate_wake_up_msg() { - fn assert_wake_up_msg_eq( - msg: Msg, - expected_lock_ts: TimeStamp, - expected_hashes: Vec, - expected_commit_ts: TimeStamp, - expected_is_pessimistic_txn: bool, - ) { - match msg { - Msg::WakeUp { - lock_ts, - hashes, - commit_ts, - is_pessimistic_txn, - } => { - assert_eq!(lock_ts, expected_lock_ts); - assert_eq!(hashes, expected_hashes); - assert_eq!(commit_ts, expected_commit_ts); - assert_eq!(is_pessimistic_txn, expected_is_pessimistic_txn); + fn test_wake_up() { + struct BlockedLockRequestHandle { + remaining: usize, + rx: std::sync::mpsc::Receiver, + } + + impl BlockedLockRequestHandle { + fn assert_blocked(&mut self) { + while self.remaining > 0 { + match self.rx.recv_timeout(Duration::from_millis(50)) { + Ok(_) => self.remaining -= 1, + Err(std::sync::mpsc::RecvTimeoutError::Timeout) => return, + Err(e) => panic!("unexpected error: {:?}", e), + } + } + panic!("pessimistic lock requests expected to be blocked finished unexpectedly") + } + + fn assert_woken_up(mut self) { + while self.remaining > 0 { + match self.rx.recv_timeout(Duration::from_millis(200)) { + Ok(_) => self.remaining -= 1, + Err(e) => panic!("unexpected error: {:?}", e), + } } - _ => panic!("unexpected msg"), } } - let (msg_tx, msg_rx) = channel(); - let mut lock_mgr = ProxyLockMgr::new(msg_tx); - lock_mgr.set_has_waiter(true); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr( TestEngineBuilder::new().build().unwrap(), - lock_mgr, + MockLockManager::new(), ) .build() .unwrap(); + let lock_blocked = |keys: &[Key], + lock_ts: u64, + expected_conflicting_start_ts: u64, + expected_conflicting_commit_ts: u64| { + let (tx, rx) = channel(); + for k in keys { + storage + .sched_txn_command( + commands::AcquirePessimisticLock::new( + vec![(k.clone(), false)], + k.to_raw().unwrap(), + lock_ts.into(), + 3000, + false, + lock_ts.into(), + Some(WaitTimeout::Millis(5000)), + false, + (lock_ts + 1).into(), + OldValues::default(), + false, + false, + Context::default(), + ), + expect_fail_callback(tx.clone(), 6, move |e| match e { + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + mvcc::Error(box mvcc::ErrorInner::WriteConflict { + conflict_start_ts, + conflict_commit_ts, + .. + }), + )))) => { + assert_eq!(conflict_start_ts, expected_conflicting_start_ts.into()); + assert_eq!( + conflict_commit_ts, + expected_conflicting_commit_ts.into() + ); + } + e => panic!("unexpected error chain: {:?}", e), + }), + ) + .unwrap(); + } + let mut h = BlockedLockRequestHandle { + remaining: keys.len(), + rx, + }; + h.assert_blocked(); + h + }; + let (tx, rx) = channel(); let prewrite_locks = |keys: &[Key], ts: TimeStamp| { storage @@ -8157,28 +8206,19 @@ mod tests { Key::from_raw(b"b"), Key::from_raw(b"c"), ]; - let key_hashes: Vec = keys.iter().map(|k| k.gen_hash()).collect(); // Commit prewrite_locks(&keys, 10.into()); - // If locks don't exsit, hashes of released locks should be empty. - for empty_hashes in &[false, true] { - storage - .sched_txn_command( - commands::Commit::new(keys.clone(), 10.into(), 20.into(), Context::default()), - expect_ok_callback(tx.clone(), 0), - ) - .unwrap(); - rx.recv().unwrap(); + let h = lock_blocked(&keys, 15, 10, 20); + storage + .sched_txn_command( + commands::Commit::new(keys.clone(), 10.into(), 20.into(), Context::default()), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); - let msg = msg_rx.recv().unwrap(); - let hashes = if *empty_hashes { - Vec::new() - } else { - key_hashes.clone() - }; - assert_wake_up_msg_eq(msg, 10.into(), hashes, 20.into(), false); - } + h.assert_woken_up(); // Cleanup for pessimistic in &[false, true] { @@ -8189,28 +8229,21 @@ mod tests { } else { prewrite_locks(&keys[..1], ts); } - for empty_hashes in &[false, true] { - storage - .sched_txn_command( - commands::Cleanup::new( - keys[0].clone(), - ts, - TimeStamp::max(), - Context::default(), - ), - expect_ok_callback(tx.clone(), 0), - ) - .unwrap(); - rx.recv().unwrap(); + let h = lock_blocked(&keys[..1], 35, ts.into_inner(), 0); + storage + .sched_txn_command( + commands::Cleanup::new( + keys[0].clone(), + ts, + TimeStamp::max(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); - let msg = msg_rx.recv().unwrap(); - let (hashes, pessimistic) = if *empty_hashes { - (Vec::new(), false) - } else { - (key_hashes[..1].to_vec(), *pessimistic) - }; - assert_wake_up_msg_eq(msg, ts, hashes, 0.into(), pessimistic); - } + h.assert_woken_up(); } // Rollback @@ -8222,50 +8255,36 @@ mod tests { } else { prewrite_locks(&keys, ts); } - for empty_hashes in &[false, true] { - storage - .sched_txn_command( - commands::Rollback::new(keys.clone(), ts, Context::default()), - expect_ok_callback(tx.clone(), 0), - ) - .unwrap(); - rx.recv().unwrap(); - - let msg = msg_rx.recv().unwrap(); - let (hashes, pessimistic) = if *empty_hashes { - (Vec::new(), false) - } else { - (key_hashes.clone(), *pessimistic) - }; - assert_wake_up_msg_eq(msg, ts, hashes, 0.into(), pessimistic); - } - } - - // PessimisticRollback - acquire_pessimistic_locks(&keys, 50.into()); - for empty_hashes in &[false, true] { + let h = lock_blocked(&keys, 45, ts.into_inner(), 0); storage .sched_txn_command( - commands::PessimisticRollback::new( - keys.clone(), - 50.into(), - 50.into(), - Context::default(), - ), + commands::Rollback::new(keys.clone(), ts, Context::default()), expect_ok_callback(tx.clone(), 0), ) .unwrap(); rx.recv().unwrap(); - let msg = msg_rx.recv().unwrap(); - let (hashes, pessimistic) = if *empty_hashes { - (Vec::new(), false) - } else { - (key_hashes.clone(), true) - }; - assert_wake_up_msg_eq(msg, 50.into(), hashes, 0.into(), pessimistic); + h.assert_woken_up(); } + // PessimisticRollback + acquire_pessimistic_locks(&keys, 50.into()); + let h = lock_blocked(&keys, 55, 50, 0); + storage + .sched_txn_command( + commands::PessimisticRollback::new( + keys.clone(), + 50.into(), + 50.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + h.assert_woken_up(); + // ResolveLockLite for commit in &[false, true] { let mut start_ts = TimeStamp::new(60); @@ -8276,28 +8295,21 @@ mod tests { TimeStamp::zero() }; prewrite_locks(&keys, start_ts); - for empty_hashes in &[false, true] { - storage - .sched_txn_command( - commands::ResolveLockLite::new( - start_ts, - commit_ts, - keys.clone(), - Context::default(), - ), - expect_ok_callback(tx.clone(), 0), - ) - .unwrap(); - rx.recv().unwrap(); + let h = lock_blocked(&keys, 65, start_ts.into_inner(), commit_ts.into_inner()); + storage + .sched_txn_command( + commands::ResolveLockLite::new( + start_ts, + commit_ts, + keys.clone(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); - let msg = msg_rx.recv().unwrap(); - let hashes = if *empty_hashes { - Vec::new() - } else { - key_hashes.clone() - }; - assert_wake_up_msg_eq(msg, start_ts, hashes, commit_ts, false); - } + h.assert_woken_up(); } // ResolveLock @@ -8310,10 +8322,10 @@ mod tests { Key::from_raw(b"e"), Key::from_raw(b"f"), ]; - let committed_key_hashes: Vec = committed_keys.iter().map(|k| k.gen_hash()).collect(); - // Commit start_ts=75 prewrite_locks(&committed_keys, 75.into()); txn_status.insert(TimeStamp::new(75), TimeStamp::new(76)); + let h_rolled_back = lock_blocked(&keys, 76, 70, 0); + let h_committed = lock_blocked(&committed_keys, 76, 75, 76); storage .sched_txn_command( commands::ResolveLockReadPhase::new(txn_status, None, Context::default()), @@ -8321,20 +8333,8 @@ mod tests { ) .unwrap(); rx.recv().unwrap(); - - let mut msg1 = msg_rx.recv().unwrap(); - let mut msg2 = msg_rx.recv().unwrap(); - match msg1 { - Msg::WakeUp { lock_ts, .. } => { - if lock_ts != TimeStamp::new(70) { - // Let msg1 be the msg of rolled back transaction. - std::mem::swap(&mut msg1, &mut msg2); - } - assert_wake_up_msg_eq(msg1, 70.into(), key_hashes, 0.into(), true); - assert_wake_up_msg_eq(msg2, 75.into(), committed_key_hashes, 76.into(), false); - } - _ => panic!("unexpect msg"), - } + h_rolled_back.assert_woken_up(); + h_committed.assert_woken_up(); // CheckTxnStatus let key = Key::from_raw(b"k"); @@ -8352,6 +8352,8 @@ mod tests { .unwrap(); rx.recv().unwrap(); + let mut h = lock_blocked(&[key.clone()], 105, start_ts.into_inner(), 0); + // Not expire storage .sched_txn_command( @@ -8385,14 +8387,14 @@ mod tests { ) .unwrap(); rx.recv().unwrap(); - // No msg - assert!(msg_rx.try_recv().is_err()); + // Not woken up + h.assert_blocked(); // Expired storage .sched_txn_command( commands::CheckTxnStatus::new( - key.clone(), + key, start_ts, TimeStamp::compose(110, 0), TimeStamp::compose(201, 0), @@ -8405,18 +8407,12 @@ mod tests { ) .unwrap(); rx.recv().unwrap(); - assert_wake_up_msg_eq( - msg_rx.recv().unwrap(), - start_ts, - vec![key.gen_hash()], - 0.into(), - false, - ); + h.assert_woken_up(); } #[test] fn test_check_memory_locks() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let cm = storage.get_concurrency_manager(); @@ -8516,7 +8512,7 @@ mod tests { #[test] fn test_read_access_locks() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); @@ -8601,7 +8597,7 @@ mod tests { #[test] fn test_async_commit_prewrite() { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let cm = storage.concurrency_manager.clone(); @@ -8697,10 +8693,12 @@ mod tests { #[test] fn test_overlapped_ts_rollback_before_prewrite() { let mut engine = TestEngineBuilder::new().build().unwrap(); - let storage = - TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine.clone(), DummyLockManager) - .build() - .unwrap(); + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr( + engine.clone(), + MockLockManager::new(), + ) + .build() + .unwrap(); let (k1, v1) = (b"key1", b"v1"); let (k2, v2) = (b"key2", b"v2"); @@ -8874,8 +8872,10 @@ mod tests { builder = builder.add_expected_write(expected_write) } let engine = builder.build(); - let mut builder = - TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager); + let mut builder = TestStorageBuilderApiV1::from_engine_and_lock_mgr( + engine, + MockLockManager::new(), + ); builder.config.enable_async_apply_prewrite = true; if self.pipelined_pessimistic_lock { builder @@ -9017,7 +9017,7 @@ mod tests { #[test] fn test_resolve_commit_pessimistic_locks() { - let mut storage = TestStorageBuilderApiV1::new(DummyLockManager) + let mut storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); @@ -9354,7 +9354,7 @@ mod tests { test_data.into_iter().enumerate() { // TODO: refactor to use `Api` parameter. - let res = StorageApiV1::::check_api_version( + let res = StorageApiV1::::check_api_version( storage_api_version, req_api_version, cmd, @@ -9410,7 +9410,7 @@ mod tests { range: &[(Option<&[u8]>, Option<&[u8]>)], err| { // TODO: refactor to use `Api` parameter. - let res = StorageApiV1::::check_api_version_ranges( + let res = StorageApiV1::::check_api_version_ranges( storage_api_version, req_api_version, cmd, @@ -9575,7 +9575,8 @@ mod tests { #[test] fn test_write_in_memory_pessimistic_locks() { let txn_ext = Arc::new(TxnExt::default()); - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let lock_mgr = MockLockManager::new(); + let storage = TestStorageBuilderApiV1::new(lock_mgr.clone()) .pipelined_pessimistic_lock(true) .in_memory_pessimistic_lock(true) .build_for_txn(txn_ext.clone()) @@ -9632,9 +9633,11 @@ mod tests { }), ) .unwrap(); - // DummyLockManager just drops the callback, so it will fail to receive - // anything. - rx.recv().unwrap_err(); + // The request enters lock waiting state. + rx.recv_timeout(Duration::from_millis(100)).unwrap_err(); + lock_mgr.simulate_timeout_all(); + // The lock-waiting request is cancelled. + rx.recv().unwrap().unwrap_err(); let (tx, rx) = channel(); storage @@ -9672,7 +9675,7 @@ mod tests { #[test] fn test_disable_in_memory_pessimistic_locks() { let txn_ext = Arc::new(TxnExt::default()); - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .pipelined_pessimistic_lock(true) .in_memory_pessimistic_lock(false) .build_for_txn(txn_ext.clone()) diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 7171417d060..a73f8b99027 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -37,16 +37,19 @@ impl GcInfo { /// waiting for locks. #[derive(Debug, PartialEq)] pub struct ReleasedLock { - /// The hash value of the lock. - pub hash: u64, + pub start_ts: TimeStamp, + pub commit_ts: TimeStamp, + pub key: Key, /// Whether it is a pessimistic lock. pub pessimistic: bool, } impl ReleasedLock { - fn new(key: &Key, pessimistic: bool) -> Self { + pub fn new(start_ts: TimeStamp, commit_ts: TimeStamp, key: Key, pessimistic: bool) -> Self { Self { - hash: key.gen_hash(), + start_ts, + commit_ts, + key, pessimistic, } } @@ -114,8 +117,17 @@ impl MvccTxn { self.modifies.push(Modify::PessimisticLock(key, lock)) } - pub(crate) fn unlock_key(&mut self, key: Key, pessimistic: bool) -> Option { - let released = ReleasedLock::new(&key, pessimistic); + /// Append a modify that unlocks the key. If the lock is removed due to + /// committing, a non-zero `commit_ts` needs to be provided; otherwise if + /// the lock is removed due to rolling back, `commit_ts` must be set to + /// zero. + pub(crate) fn unlock_key( + &mut self, + key: Key, + pessimistic: bool, + commit_ts: TimeStamp, + ) -> Option { + let released = ReleasedLock::new(self.start_ts, commit_ts, key.clone(), pessimistic); let write = Modify::Delete(CF_LOCK, key); self.write_size += write.size(); self.modifies.push(write); diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index f80e61f93ad..4c900e5a438 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -45,7 +45,7 @@ pub fn check_txn_status_lock_exists( // If the resolving and primary key lock are both pessimistic locks, just unlock // the primary pessimistic lock and do not write rollback records. return if resolving_pessimistic_lock && lock.lock_type == LockType::Pessimistic { - let released = txn.unlock_key(primary_key, is_pessimistic_txn); + let released = txn.unlock_key(primary_key, is_pessimistic_txn, TimeStamp::zero()); MVCC_CHECK_TXN_STATUS_COUNTER_VEC.pessimistic_rollback.inc(); Ok((TxnStatus::PessimisticRollBack, released)) } else { @@ -157,7 +157,7 @@ pub fn rollback_lock( TxnCommitRecord::SingleRecord { write, .. } if write.write_type != WriteType::Rollback => { panic!("txn record found but not expected: {:?}", txn) } - _ => return Ok(txn.unlock_key(key, is_pessimistic_txn)), + _ => return Ok(txn.unlock_key(key, is_pessimistic_txn, TimeStamp::zero())), }; // If prewrite type is DEL or LOCK or PESSIMISTIC, it is no need to delete @@ -176,7 +176,7 @@ pub fn rollback_lock( collapse_prev_rollback(txn, reader, &key)?; } - Ok(txn.unlock_key(key, is_pessimistic_txn)) + Ok(txn.unlock_key(key, is_pessimistic_txn, TimeStamp::zero())) } pub fn collapse_prev_rollback( diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 6fd925b536e..eb798090ba2 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -101,7 +101,7 @@ pub fn commit( } txn.put_write(key.clone(), commit_ts, write.as_ref().to_bytes()); - Ok(txn.unlock_key(key, lock.is_pessimistic_txn())) + Ok(txn.unlock_key(key, lock.is_pessimistic_txn(), commit_ts)) } pub mod tests { diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 949b347f251..359f0abacd8 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -11,11 +11,12 @@ use crate::storage::{ txn::{ acquire_pessimistic_lock, commands::{ - Command, CommandExt, ReaderWithStats, ResponsePolicy, TypedCommand, WriteCommand, - WriteContext, WriteResult, WriteResultLockInfo, + Command, CommandExt, ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, + WriteCommand, WriteContext, WriteResult, WriteResultLockInfo, }, Error, ErrorInner, Result, }, + types::PessimisticLockParameters, Error as StorageError, ErrorInner as StorageErrorInner, PessimisticLockRes, ProcessResult, Result as StorageResult, Snapshot, }; @@ -155,12 +156,21 @@ impl WriteCommand for AcquirePessimisticLock let write_data = WriteData::new(txn.into_modifies(), extra); (pr, write_data, rows, ctx, None) } else { + let request_parameters = PessimisticLockParameters { + pb_ctx: ctx.clone(), + primary: self.primary.clone(), + start_ts: self.start_ts, + lock_ttl: self.lock_ttl, + for_update_ts: self.for_update_ts, + wait_timeout: self.wait_timeout, + return_values: self.return_values, + min_commit_ts: self.min_commit_ts, + check_existence: self.check_existence, + is_first_lock: self.is_first_lock, + allow_lock_with_conflict: false, + }; let lock_info_pb = extract_lock_info_from_result(&res); - let lock_info = WriteResultLockInfo::from_lock_info_pb( - lock_info_pb, - self.is_first_lock, - self.wait_timeout, - ); + let lock_info = WriteResultLockInfo::new(lock_info_pb.clone(), request_parameters); let pr = ProcessResult::PessimisticLockRes { res }; // Wait for lock released (pr, WriteData::default(), 0, ctx, Some(lock_info)) @@ -171,6 +181,7 @@ impl WriteCommand for AcquirePessimisticLock rows, pr, lock_info, + released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, }) @@ -194,17 +205,21 @@ mod tests { info.set_lock_version(ts); info.set_lock_ttl(100); let case = StorageError::from(StorageErrorInner::Txn(Error::from(ErrorInner::Mvcc( - MvccError::from(MvccErrorInner::KeyIsLocked(info)), + MvccError::from(MvccErrorInner::KeyIsLocked(info.clone())), )))); - let lock_info = WriteResultLockInfo::from_lock_info_pb( - extract_lock_info_from_result::<()>(&Err(case)), - is_first_lock, - wait_timeout, + let lock_info = WriteResultLockInfo::new( + extract_lock_info_from_result::<()>(&Err(case)).clone(), + PessimisticLockParameters { + is_first_lock, + wait_timeout, + ..Default::default() + }, ); - assert_eq!(lock_info.lock.ts, ts.into()); - assert_eq!(lock_info.lock.hash, key.gen_hash()); - assert_eq!(lock_info.key, raw_key); - assert_eq!(lock_info.is_first_lock, is_first_lock); - assert_eq!(lock_info.wait_timeout, wait_timeout); + assert_eq!(lock_info.lock_digest.ts, ts.into()); + assert_eq!(lock_info.lock_digest.hash, key.gen_hash()); + assert_eq!(lock_info.key.into_raw().unwrap(), raw_key); + assert_eq!(lock_info.parameters.is_first_lock, is_first_lock); + assert_eq!(lock_info.parameters.wait_timeout, wait_timeout); + assert_eq!(lock_info.lock_info_pb, info); } } diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 150b065e5db..b935d991eea 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -8,8 +8,8 @@ use crate::storage::{ lock_manager::LockManager, txn::{ commands::{ - Command, CommandExt, ResponsePolicy, TypedCommand, WriteCommand, WriteContext, - WriteResult, + Command, CommandExt, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, + WriteContext, WriteResult, }, Result, }, @@ -59,6 +59,7 @@ impl WriteCommand for RawAtomicStore { rows, pr: ProcessResult::Res, lock_info: None, + released_locks: ReleasedLocks::new(), lock_guards: raw_ext.into_iter().map(|r| r.key_guard).collect(), response_policy: ResponsePolicy::OnApplied, }) @@ -75,7 +76,7 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::DummyLockManager, txn::scheduler::get_raw_ext, Statistics, TestEngineBuilder, + lock_manager::MockLockManager, txn::scheduler::get_raw_ext, Statistics, TestEngineBuilder, }; #[test] @@ -108,7 +109,7 @@ mod tests { let snap = engine.snapshot(Default::default()).unwrap(); let raw_ext = block_on(get_raw_ext(ts_provider, cm.clone(), true, &cmd.cmd)).unwrap(); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: kvproto::kvrpcpb::ExtraOp::Noop, statistics: &mut statistic, diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 56138a09a50..1a4b547b6d7 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -65,7 +65,7 @@ impl WriteCommand for CheckSecondaryLocks { SnapshotReader::new_with_ctx(self.start_ts, snapshot, &self.ctx), context.statistics, ); - let mut released_locks = ReleasedLocks::new(self.start_ts, TimeStamp::zero()); + let mut released_locks = ReleasedLocks::new(); let mut result = SecondaryLocksStatus::Locked(Vec::new()); for key in self.keys { @@ -76,7 +76,7 @@ impl WriteCommand for CheckSecondaryLocks { // The lock exists, the lock information is returned. Some(lock) if lock.ts == self.start_ts => { if lock.lock_type == LockType::Pessimistic { - released_lock = txn.unlock_key(key.clone(), true); + released_lock = txn.unlock_key(key.clone(), true, TimeStamp::zero()); let overlapped_write = reader.get_txn_commit_record(&key)?.unwrap_none(); (SecondaryLockStatus::RolledBack, true, overlapped_write) } else { @@ -142,8 +142,6 @@ impl WriteCommand for CheckSecondaryLocks { let mut rows = 0; if let SecondaryLocksStatus::RolledBack = &result { - // Lock is only released when result is `RolledBack`. - released_locks.wake_up(context.lock_mgr); // One row is mutated only when a secondary lock is rolled back. rows = 1; } @@ -156,6 +154,7 @@ impl WriteCommand for CheckSecondaryLocks { rows, pr, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) @@ -171,7 +170,7 @@ pub mod tests { use super::*; use crate::storage::{ kv::TestEngineBuilder, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, mvcc::tests::*, txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, Engine, @@ -197,7 +196,7 @@ pub mod tests { .process_write( snapshot, WriteContext { - lock_mgr: &DummyLockManager, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: Default::default(), statistics: &mut Default::default(), @@ -235,7 +234,7 @@ pub mod tests { .process_write( snapshot, WriteContext { - lock_mgr: &DummyLockManager, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm.clone(), extra_op: Default::default(), statistics: &mut Default::default(), diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 73079e00f5d..58f7f557448 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -121,13 +121,8 @@ impl WriteCommand for CheckTxnStatus { ), }; - let mut released_locks = ReleasedLocks::new(self.lock_ts, TimeStamp::zero()); + let mut released_locks = ReleasedLocks::new(); released_locks.push(released); - // The lock is released here only when the `check_txn_status` returns - // `TtlExpire`. - if let TxnStatus::TtlExpire = txn_status { - released_locks.wake_up(context.lock_mgr); - } let pr = ProcessResult::TxnStatus { txn_status }; let mut write_data = WriteData::from_modifies(txn.into_modifies()); @@ -138,6 +133,7 @@ impl WriteCommand for CheckTxnStatus { rows: 1, pr, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) @@ -154,7 +150,7 @@ pub mod tests { use super::{TxnStatus::*, *}; use crate::storage::{ kv::Engine, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, mvcc::tests::*, txn::{ commands::{pessimistic_rollback, WriteCommand, WriteContext}, @@ -196,7 +192,7 @@ pub mod tests { .process_write( snapshot, WriteContext { - lock_mgr: &DummyLockManager, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: Default::default(), statistics: &mut Default::default(), @@ -244,7 +240,7 @@ pub mod tests { .process_write( snapshot, WriteContext { - lock_mgr: &DummyLockManager, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: Default::default(), statistics: &mut Default::default(), diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index c810c749bd6..0b82432e3cd 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -56,7 +56,7 @@ impl WriteCommand for Cleanup { context.statistics, ); - let mut released_locks = ReleasedLocks::new(self.start_ts, TimeStamp::zero()); + let mut released_locks = ReleasedLocks::new(); // The rollback must be protected, see more on // [issue #7364](https://github.com/tikv/tikv/issues/7364) released_locks.push(cleanup( @@ -66,7 +66,6 @@ impl WriteCommand for Cleanup { self.current_ts, true, )?); - released_locks.wake_up(context.lock_mgr); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -76,6 +75,7 @@ impl WriteCommand for Cleanup { rows: 1, pr: ProcessResult::Res, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index f89d4fc09af..86e1f541306 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -59,11 +59,10 @@ impl WriteCommand for Commit { let rows = self.keys.len(); // Pessimistic txn needs key_hashes to wake up waiters - let mut released_locks = ReleasedLocks::new(self.lock_ts, self.commit_ts); + let mut released_locks = ReleasedLocks::new(); for k in self.keys { released_locks.push(commit(&mut txn, &mut reader, k, self.commit_ts)?); } - released_locks.wake_up(context.lock_mgr); let pr = ProcessResult::TxnStatus { txn_status: TxnStatus::committed(self.commit_ts), @@ -76,6 +75,7 @@ impl WriteCommand for Commit { rows, pr, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index 4dbd51e70e0..2fff0620b27 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -14,8 +14,8 @@ use crate::storage::{ raw, txn::{ commands::{ - Command, CommandExt, ResponsePolicy, TypedCommand, WriteCommand, WriteContext, - WriteResult, + Command, CommandExt, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, + WriteContext, WriteResult, }, Result, }, @@ -113,6 +113,7 @@ impl WriteCommand for RawCompareAndSwap { rows, pr, lock_info: None, + released_locks: ReleasedLocks::new(), lock_guards, response_policy: ResponsePolicy::OnApplied, }) @@ -132,7 +133,7 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::DummyLockManager, txn::scheduler::get_raw_ext, Engine, Statistics, + lock_manager::MockLockManager, txn::scheduler::get_raw_ext, Engine, Statistics, TestEngineBuilder, }; @@ -207,7 +208,7 @@ mod tests { let raw_ext = block_on(get_raw_ext(ts_provider, cm.clone(), true, &cmd.cmd)).unwrap(); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: ExtraOp::Noop, statistics: &mut statistic, @@ -261,7 +262,7 @@ mod tests { let snap = engine.snapshot(Default::default()).unwrap(); let raw_ext = block_on(get_raw_ext(ts_provider, cm.clone(), true, &cmd.cmd)).unwrap(); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: kvproto::kvrpcpb::ExtraOp::Noop, statistics: &mut statistic, diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index b4255138eeb..9b198724e3b 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -9,8 +9,8 @@ use crate::storage::{ mvcc::{MvccTxn, SnapshotReader}, txn::{ commands::{ - Command, CommandExt, FlashbackToVersionReadPhase, ReaderWithStats, ResponsePolicy, - TypedCommand, WriteCommand, WriteContext, WriteResult, + Command, CommandExt, FlashbackToVersionReadPhase, ReaderWithStats, ReleasedLocks, + ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, }, flashback_to_version, latch, Result, }, @@ -104,6 +104,7 @@ impl WriteCommand for FlashbackToVersion { } }, lock_info: None, + released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 7c2c945d4e2..f5331087ac1 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -46,7 +46,7 @@ pub use mvcc_by_key::MvccByKey; pub use mvcc_by_start_ts::MvccByStartTs; pub use pause::Pause; pub use pessimistic_rollback::PessimisticRollback; -pub use prewrite::{one_pc_commit_ts, Prewrite, PrewritePessimistic}; +pub use prewrite::{one_pc_commit, Prewrite, PrewritePessimistic}; pub use resolve_lock::{ResolveLock, RESOLVE_LOCK_BATCH_SIZE}; pub use resolve_lock_lite::ResolveLockLite; pub use resolve_lock_readphase::ResolveLockReadPhase; @@ -63,8 +63,8 @@ use crate::storage::{ mvcc::{Lock as MvccLock, MvccReader, ReleasedLock, SnapshotReader}, txn::{latch, ProcessResult, Result}, types::{ - MvccInfo, PessimisticLockRes, PrewriteResult, SecondaryLocksStatus, StorageCallbackType, - TxnStatus, + MvccInfo, PessimisticLockParameters, PessimisticLockRes, PrewriteResult, + SecondaryLocksStatus, StorageCallbackType, TxnStatus, }, Result as StorageResult, Snapshot, Statistics, }; @@ -365,14 +365,6 @@ impl From for TypedCommand<()> { } } -#[derive(Default)] -pub(super) struct ReleasedLocks { - start_ts: TimeStamp, - commit_ts: TimeStamp, - hashes: Vec, - pessimistic: bool, -} - /// Represents for a scheduler command, when should the response sent to the /// client. For most cases, the response should be sent after the result being /// successfully applied to the storage (if needed). But in some special cases, @@ -398,62 +390,58 @@ pub struct WriteResult { pub rows: usize, pub pr: ProcessResult, pub lock_info: Option, + pub released_locks: ReleasedLocks, pub lock_guards: Vec, pub response_policy: ResponsePolicy, } pub struct WriteResultLockInfo { - pub lock: lock_manager::Lock, - pub key: Vec, - pub is_first_lock: bool, - pub wait_timeout: Option, + pub lock_digest: lock_manager::LockDigest, + pub key: Key, + pub lock_info_pb: LockInfo, + pub parameters: PessimisticLockParameters, } impl WriteResultLockInfo { - pub fn from_lock_info_pb( - lock_info: &LockInfo, - is_first_lock: bool, - wait_timeout: Option, - ) -> Self { - let lock = lock_manager::Lock { - ts: lock_info.get_lock_version().into(), - hash: Key::from_raw(lock_info.get_key()).gen_hash(), + pub fn new(lock_info_pb: LockInfo, parameters: PessimisticLockParameters) -> Self { + let lock = lock_manager::LockDigest { + ts: lock_info_pb.get_lock_version().into(), + hash: Key::from_raw(lock_info_pb.get_key()).gen_hash(), }; - let key = lock_info.get_key().to_owned(); + let key = Key::from_raw(lock_info_pb.get_key()); Self { - lock, + lock_digest: lock, key, - is_first_lock, - wait_timeout, + lock_info_pb, + parameters, } } } +#[derive(Default)] +pub struct ReleasedLocks(Vec); + impl ReleasedLocks { - pub fn new(start_ts: TimeStamp, commit_ts: TimeStamp) -> Self { - Self { - start_ts, - commit_ts, - ..Default::default() - } + pub fn new() -> Self { + Self::default() } pub fn push(&mut self, lock: Option) { if let Some(lock) = lock { - self.hashes.push(lock.hash); - if !self.pessimistic { - self.pessimistic = lock.pessimistic; - } + self.0.push(lock); } } pub fn is_empty(&self) -> bool { - self.hashes.is_empty() + self.0.is_empty() + } + + pub fn clear(&mut self) { + self.0.clear() } - // Wake up pessimistic transactions that waiting for these locks. - pub fn wake_up(self, lock_mgr: &L) { - lock_mgr.wake_up(self.start_ts, self.hashes, self.commit_ts, self.pessimistic); + pub fn into_iter(self) -> impl Iterator { + self.0.into_iter() } } @@ -756,7 +744,7 @@ pub mod test_util { use crate::storage::{ mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error, ErrorInner, Result}, - DummyLockManager, Engine, + Engine, MockLockManager, }; // Some utils for tests that may be used in multiple source code files. @@ -769,7 +757,7 @@ pub mod test_util { ) -> Result { let snap = engine.snapshot(Default::default())?; let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: ExtraOp::Noop, statistics, @@ -907,7 +895,7 @@ pub mod test_util { ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager, extra_op: ExtraOp::Noop, statistics, @@ -932,7 +920,7 @@ pub mod test_util { let concurrency_manager = ConcurrencyManager::new(start_ts.into()); let cmd = Rollback::new(keys, TimeStamp::from(start_ts), ctx); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager, extra_op: ExtraOp::Noop, statistics, diff --git a/src/storage/txn/commands/pause.rs b/src/storage/txn/commands/pause.rs index 684804f990d..05bbb508bdc 100644 --- a/src/storage/txn/commands/pause.rs +++ b/src/storage/txn/commands/pause.rs @@ -10,8 +10,8 @@ use crate::storage::{ lock_manager::LockManager, txn::{ commands::{ - Command, CommandExt, ResponsePolicy, TypedCommand, WriteCommand, WriteContext, - WriteResult, + Command, CommandExt, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, + WriteContext, WriteResult, }, Result, }, @@ -49,6 +49,7 @@ impl WriteCommand for Pause { rows: 0, pr: ProcessResult::Res, lock_info: None, + released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index f7394cf32aa..b575787208a 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -58,7 +58,7 @@ impl WriteCommand for PessimisticRollback { let keys = mem::take(&mut self.keys); let rows = keys.len(); - let mut released_locks = ReleasedLocks::new(self.start_ts, TimeStamp::zero()); + let mut released_locks = ReleasedLocks::new(); for key in keys { fail_point!("pessimistic_rollback", |err| Err( crate::storage::mvcc::Error::from(crate::storage::mvcc::txn::make_txn_error( @@ -73,7 +73,7 @@ impl WriteCommand for PessimisticRollback { && lock.ts == self.start_ts && lock.for_update_ts <= self.for_update_ts { - Ok(txn.unlock_key(key, true)) + Ok(txn.unlock_key(key, true, TimeStamp::zero())) } else { Ok(None) } @@ -82,7 +82,6 @@ impl WriteCommand for PessimisticRollback { }; released_locks.push(released_lock?); } - released_locks.wake_up(context.lock_mgr); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -92,6 +91,7 @@ impl WriteCommand for PessimisticRollback { rows, pr: ProcessResult::MultiRes { results: vec![] }, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) @@ -108,7 +108,7 @@ pub mod tests { use super::*; use crate::storage::{ kv::Engine, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, mvcc::tests::*, txn::{ commands::{WriteCommand, WriteContext}, @@ -136,7 +136,7 @@ pub mod tests { for_update_ts, deadline: Deadline::from_now(DEFAULT_EXECUTION_DURATION_LIMIT), }; - let lock_mgr = DummyLockManager; + let lock_mgr = MockLockManager::new(); let write_context = WriteContext { lock_mgr: &lock_mgr, concurrency_manager: cm, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index e8b85d37d66..2b0915a5fdc 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -461,7 +461,6 @@ impl Prewriter { final_min_commit_ts, rows, context.async_apply_prewrite, - context.lock_mgr, )) } @@ -645,7 +644,6 @@ impl Prewriter { final_min_commit_ts: TimeStamp, rows: usize, async_apply_prewrite: bool, - lock_manager: &impl LockManager, ) -> WriteResult { let async_commit_ts = if self.secondary_keys.is_some() { final_min_commit_ts @@ -654,16 +652,14 @@ impl Prewriter { }; let mut result = if locks.is_empty() { + let (one_pc_commit_ts, released_locks) = + one_pc_commit(self.try_one_pc, &mut txn, final_min_commit_ts); + let pr = ProcessResult::PrewriteResult { result: PrewriteResult { locks: vec![], min_commit_ts: async_commit_ts, - one_pc_commit_ts: one_pc_commit_ts( - self.try_one_pc, - &mut txn, - final_min_commit_ts, - lock_manager, - ), + one_pc_commit_ts, }, }; let extra = TxnExtra { @@ -685,6 +681,7 @@ impl Prewriter { rows, pr, lock_info: None, + released_locks, lock_guards, response_policy: ResponsePolicy::OnApplied, } @@ -703,6 +700,7 @@ impl Prewriter { rows, pr, lock_info: None, + released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, } @@ -822,31 +820,28 @@ impl MutationLock for (Mutation, PrewriteRequestPessimisticAction) { } } -/// Compute the commit ts of a 1pc transaction. -pub fn one_pc_commit_ts( +/// Commits a 1pc transaction if possible, returns the commit ts and released +/// locks on success. +pub fn one_pc_commit( try_one_pc: bool, txn: &mut MvccTxn, final_min_commit_ts: TimeStamp, - lock_manager: &impl LockManager, -) -> TimeStamp { +) -> (TimeStamp, ReleasedLocks) { if try_one_pc { assert_ne!(final_min_commit_ts, TimeStamp::zero()); // All keys can be successfully locked and `try_one_pc` is set. Try to directly // commit them. let released_locks = handle_1pc_locks(txn, final_min_commit_ts); - if !released_locks.is_empty() { - released_locks.wake_up(lock_manager); - } - final_min_commit_ts + (final_min_commit_ts, released_locks) } else { assert!(txn.locks_for_1pc.is_empty()); - TimeStamp::zero() + (TimeStamp::zero(), ReleasedLocks::new()) } } /// Commit and delete all 1pc locks in txn. fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { - let mut released_locks = ReleasedLocks::new(txn.start_ts, commit_ts); + let mut released_locks = ReleasedLocks::new(); for (key, lock, delete_pessimistic_lock) in std::mem::take(&mut txn.locks_for_1pc) { let write = Write::new( @@ -858,7 +853,7 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { // records. txn.put_write(key.clone(), commit_ts, write.as_ref().to_bytes()); if delete_pessimistic_lock { - released_locks.push(txn.unlock_key(key, true)); + released_locks.push(txn.unlock_key(key, true, commit_ts)); } } @@ -905,7 +900,7 @@ mod tests { Error, ErrorInner, }, types::TxnStatus, - DummyLockManager, Engine, Snapshot, Statistics, TestEngineBuilder, + Engine, MockLockManager, Snapshot, Statistics, TestEngineBuilder, }; fn inner_test_prewrite_skip_constraint_check(pri_key_number: u8, write_num: usize) { @@ -1467,7 +1462,7 @@ mod tests { use engine_traits::{IterOptions, ReadOptions}; use kvproto::kvrpcpb::ExtraOp; - use crate::storage::{kv::Result, CfName, ConcurrencyManager, DummyLockManager, Value}; + use crate::storage::{kv::Result, CfName, ConcurrencyManager, MockLockManager, Value}; #[derive(Clone)] struct MockSnapshot; @@ -1503,7 +1498,7 @@ mod tests { macro_rules! context { () => { WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: ConcurrencyManager::new(10.into()), extra_op: ExtraOp::Noop, statistics: &mut Statistics::default(), @@ -1673,7 +1668,7 @@ mod tests { ) }; let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm.clone(), extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -1787,7 +1782,7 @@ mod tests { Context::default(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm.clone(), extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -1815,7 +1810,7 @@ mod tests { TimeStamp::default(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -1897,7 +1892,7 @@ mod tests { Context::default(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm.clone(), extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -1929,7 +1924,7 @@ mod tests { TimeStamp::default(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -2198,7 +2193,7 @@ mod tests { Context::default(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm.clone(), extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -2222,7 +2217,7 @@ mod tests { 10.into(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: ExtraOp::Noop, statistics: &mut statistics, @@ -2428,7 +2423,7 @@ mod tests { Context::default(), ); let context = WriteContext { - lock_mgr: &DummyLockManager {}, + lock_mgr: &MockLockManager::new(), concurrency_manager: ConcurrencyManager::new(20.into()), extra_op: ExtraOp::Noop, statistics: &mut statistics, diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index 1d2bfbf49d8..b89e91593f9 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -82,8 +82,7 @@ impl WriteCommand for ResolveLock { let mut scan_key = self.scan_key.take(); let rows = key_locks.len(); - // Map txn's start_ts to ReleasedLocks - let mut released_locks = HashMap::default(); + let mut released_locks = ReleasedLocks::new(); for (current_key, current_lock) in key_locks { txn.start_ts = current_lock.ts; reader.start_ts = current_lock.ts; @@ -118,20 +117,13 @@ impl WriteCommand for ResolveLock { commit_ts, })); }; - released_locks - .entry(current_lock.ts) - .or_insert_with(|| ReleasedLocks::new(current_lock.ts, commit_ts)) - .push(released); + released_locks.push(released); if txn.write_size() >= MAX_TXN_WRITE_SIZE { scan_key = Some(current_key); break; } } - let lock_mgr = context.lock_mgr; - released_locks - .into_iter() - .for_each(|(_, released_locks)| released_locks.wake_up(lock_mgr)); let pr = if scan_key.is_none() { ProcessResult::Res @@ -154,6 +146,7 @@ impl WriteCommand for ResolveLock { rows, pr, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index 5a0f636d2f6..a31211c564e 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -54,7 +54,7 @@ impl WriteCommand for ResolveLockLite { let rows = self.resolve_keys.len(); // ti-client guarantees the size of resolve_keys will not too large, so no // necessary to control the write_size as ResolveLock. - let mut released_locks = ReleasedLocks::new(self.start_ts, self.commit_ts); + let mut released_locks = ReleasedLocks::new(); for key in self.resolve_keys { released_locks.push(if !self.commit_ts.is_zero() { commit(&mut txn, &mut reader, key, self.commit_ts)? @@ -62,7 +62,6 @@ impl WriteCommand for ResolveLockLite { cleanup(&mut txn, &mut reader, key, TimeStamp::zero(), false)? }); } - released_locks.wake_up(context.lock_mgr); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -72,6 +71,7 @@ impl WriteCommand for ResolveLockLite { rows, pr: ProcessResult::Res, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index fc3846931f3..479f29cb276 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -50,14 +50,13 @@ impl WriteCommand for Rollback { ); let rows = self.keys.len(); - let mut released_locks = ReleasedLocks::new(self.start_ts, TimeStamp::zero()); + let mut released_locks = ReleasedLocks::new(); for k in self.keys { // Rollback is called only if the transaction is known to fail. Under the // circumstances, the rollback record needn't be protected. let released_lock = cleanup(&mut txn, &mut reader, k, TimeStamp::zero(), false)?; released_locks.push(released_lock); } - released_locks.wake_up(context.lock_mgr); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -67,6 +66,7 @@ impl WriteCommand for Rollback { rows, pr: ProcessResult::Res, lock_info: None, + released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index 7ec773b99dc..9bfbda5c748 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -9,8 +9,8 @@ use crate::storage::{ mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, SnapshotReader}, txn::{ commands::{ - Command, CommandExt, ReaderWithStats, ResponsePolicy, TypedCommand, WriteCommand, - WriteContext, WriteResult, + Command, CommandExt, ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, + WriteCommand, WriteContext, WriteResult, }, Result, }, @@ -91,6 +91,7 @@ impl WriteCommand for TxnHeartBeat { rows: 1, pr, lock_info: None, + released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) @@ -106,7 +107,7 @@ pub mod tests { use super::*; use crate::storage::{ kv::TestEngineBuilder, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, mvcc::tests::*, txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, Engine, @@ -134,7 +135,7 @@ pub mod tests { .process_write( snapshot, WriteContext { - lock_mgr: &DummyLockManager, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: Default::default(), statistics: &mut Default::default(), @@ -176,7 +177,7 @@ pub mod tests { .process_write( snapshot, WriteContext { - lock_mgr: &DummyLockManager, + lock_mgr: &MockLockManager::new(), concurrency_manager: cm, extra_op: Default::default(), statistics: &mut Default::default(), diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index b65445b8c24..4ccc868f30d 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -41,7 +41,7 @@ use crossbeam::utils::CachePadded; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use futures::compat::Future01CompatExt; use kvproto::{ - kvrpcpb::{CommandPri, Context, DiskFullOpt, ExtraOp}, + kvrpcpb::{self, CommandPri, Context, DiskFullOpt, ExtraOp}, pdpb::QueryKind, }; use parking_lot::{Mutex, MutexGuard, RwLockWriteGuard}; @@ -64,11 +64,18 @@ use crate::{ self, with_tls_engine, Engine, ExtCallback, FlowStatsReporter, Result as EngineResult, SnapContext, Statistics, }, - lock_manager::{self, DiagnosticContext, LockManager, WaitTimeout}, + lock_manager::{ + self, + lock_wait_context::LockWaitContext, + lock_waiting_queue::{DelayedNotifyAllFuture, LockWaitEntry, LockWaitQueues}, + DiagnosticContext, LockManager, LockWaitToken, + }, metrics::*, + mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, ReleasedLock}, txn::{ commands::{ - Command, RawExt, ResponsePolicy, WriteContext, WriteResult, WriteResultLockInfo, + Command, RawExt, ReleasedLocks, ResponsePolicy, WriteContext, WriteResult, + WriteResultLockInfo, }, flow_controller::FlowController, latch::{Latches, Lock}, @@ -223,8 +230,12 @@ struct SchedulerInner { enable_async_apply_prewrite: bool, + pessimistic_lock_wake_up_delay_duration_ms: Arc, + resource_tag_factory: ResourceTagFactory, + lock_wait_queues: LockWaitQueues, + quota_limiter: Arc, feature_gate: FeatureGate, } @@ -367,6 +378,8 @@ impl Scheduler { task_slots.push(Mutex::new(Default::default()).into()); } + let lock_wait_queues = LockWaitQueues::new(lock_mgr.clone()); + let inner = Arc::new(SchedulerInner { task_slots, id_alloc: AtomicU64::new(0).into(), @@ -391,9 +404,11 @@ impl Scheduler { pipelined_pessimistic_lock: dynamic_configs.pipelined_pessimistic_lock, in_memory_pessimistic_lock: dynamic_configs.in_memory_pessimistic_lock, enable_async_apply_prewrite: config.enable_async_apply_prewrite, + pessimistic_lock_wake_up_delay_duration_ms: dynamic_configs.wake_up_delay_duration_ms, flow_controller, causal_ts_provider, resource_tag_factory, + lock_wait_queues, quota_limiter, feature_gate, }); @@ -722,27 +737,128 @@ impl Scheduler { /// Event handler for the request of waiting for lock fn on_wait_for_lock( &self, + ctx: &Context, cid: u64, - start_ts: TimeStamp, - pr: ProcessResult, - lock: lock_manager::Lock, - is_first_lock: bool, - wait_timeout: Option, - diag_ctx: DiagnosticContext, + lock_info: WriteResultLockInfo, + tracker: TrackerToken, ) { - debug!("command waits for lock released"; "cid" => cid); - let tctx = self.inner.dequeue_task_context(cid); - SCHED_STAGE_COUNTER_VEC.get(tctx.tag).lock_wait.inc(); + let key = lock_info.key.clone(); + let lock_digest = lock_info.lock_digest; + let start_ts = lock_info.parameters.start_ts; + let is_first_lock = lock_info.parameters.is_first_lock; + let wait_timeout = lock_info.parameters.wait_timeout; + + let diag_ctx = DiagnosticContext { + key: lock_info.key.to_raw().unwrap(), + resource_group_tag: ctx.get_resource_group_tag().into(), + tracker, + }; + let wait_token = self.inner.lock_mgr.allocate_token(); + + let (lock_req_ctx, lock_wait_entry, lock_info_pb) = + self.make_lock_waiting(cid, wait_token, lock_info); + + // The entry must be pushed to the lock waiting queue before sending to + // `lock_mgr`. When the request is canceled in anywhere outside the lock + // waiting queue (including `lock_mgr`), it first tries to remove the + // entry from the lock waiting queue. If the entry doesn't exist + // in the queue, it will be regarded as already popped out from the queue and + // therefore will woken up, thus the canceling operation will be + // skipped. So pushing the entry to the queue must be done before any + // possible cancellation. + self.inner + .lock_wait_queues + .push_lock_wait(lock_wait_entry, lock_info_pb.clone()); + + let wait_info = lock_manager::KeyLockWaitInfo { + key, + lock_digest, + lock_info: lock_info_pb, + }; self.inner.lock_mgr.wait_for( + wait_token, + ctx.get_region_id(), + ctx.get_region_epoch().clone(), + ctx.get_term(), start_ts, - tctx.cb.unwrap(), - pr, - lock, + wait_info, is_first_lock, wait_timeout, + Box::new(lock_req_ctx.get_callback_for_cancellation()), diag_ctx, ); - self.release_lock(&tctx.lock, cid); + } + + fn on_release_locks(&self, released_locks: ReleasedLocks) { + let mut legacy_wake_up_list = vec![]; + let mut delay_wake_up_futures = vec![]; + let wake_up_delay_duration_ms = self + .inner + .pessimistic_lock_wake_up_delay_duration_ms + .load(Ordering::Relaxed); + + released_locks.into_iter().for_each(|released_lock| { + let (lock_wait_entry, delay_wake_up_future) = + match self.inner.lock_wait_queues.pop_for_waking_up( + &released_lock.key, + released_lock.start_ts, + released_lock.commit_ts, + wake_up_delay_duration_ms, + ) { + Some(e) => e, + None => return, + }; + + // TODO: Currently there are only legacy requests. When resumable requests are + // supported, do not put them to the `legacy_wake_up_list`. + legacy_wake_up_list.push((lock_wait_entry, released_lock)); + if let Some(f) = delay_wake_up_future { + delay_wake_up_futures.push(f); + } + }); + + self.wake_up_legacy_pessimistic_locks(legacy_wake_up_list, delay_wake_up_futures); + } + + fn wake_up_legacy_pessimistic_locks( + &self, + legacy_wake_up_list: Vec<(Box, ReleasedLock)>, + delayed_wake_up_futures: Vec, + ) { + let self1 = self.clone(); + self.get_sched_pool(CommandPri::High) + .pool + .spawn(async move { + for (lock_info, released_lock) in legacy_wake_up_list { + let cb = lock_info.key_cb.unwrap().into_inner(); + let e = StorageError::from(Error::from(MvccError::from( + MvccErrorInner::WriteConflict { + start_ts: lock_info.parameters.start_ts, + conflict_start_ts: released_lock.start_ts, + conflict_commit_ts: released_lock.commit_ts, + key: released_lock.key.into_raw().unwrap(), + primary: lock_info.parameters.primary, + reason: kvrpcpb::WriteConflictReason::PessimisticRetry, + }, + ))); + cb(Err(e.into())); + } + + for f in delayed_wake_up_futures { + self1 + .get_sched_pool(CommandPri::High) + .pool + .spawn(async move { + let res = f.await; + // It returns only None currently. + // TODO: Handle not-none case when supporting resumable pessimistic lock + // requests. + assert!(res.is_none()); + }) + .unwrap(); + } + }) + .unwrap(); } fn early_response( @@ -842,7 +958,6 @@ impl Scheduler { let tag = task.cmd.tag(); let cid = task.cid; let priority = task.cmd.priority(); - let ts = task.cmd.ts(); let tracker = task.tracker; let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); @@ -916,6 +1031,7 @@ impl Scheduler { rows, pr, lock_info, + released_locks, lock_guards, response_policy, } = match deadline @@ -938,23 +1054,27 @@ impl Scheduler { let region_id = ctx.get_region_id(); SCHED_STAGE_COUNTER_VEC.get(tag).write.inc(); + let mut pr = Some(pr); + + // TODO: Lock wait handling here. if let Some(lock_info) = lock_info { - let WriteResultLockInfo { - lock, - key, - is_first_lock, - wait_timeout, - } = lock_info; - let diag_ctx = DiagnosticContext { - key, - resource_group_tag: ctx.get_resource_group_tag().into(), - tracker, - }; - scheduler.on_wait_for_lock(cid, ts, pr, lock, is_first_lock, wait_timeout, diag_ctx); - return; + // Only handle lock waiting if `wait_timeout` is set. Otherwise it indicates + // that it's a lock-no-wait request and we need to report error + // immediately. + if lock_info.parameters.wait_timeout.is_some() { + assert_eq!(to_be_write.size(), 0); + pr = Some(ProcessResult::Res); + // allow_lock_with_conflict is not supported yet in this version. + assert!(!lock_info.parameters.allow_lock_with_conflict); + + scheduler.on_wait_for_lock(&ctx, cid, lock_info, tracker); + } + } + + if !released_locks.is_empty() { + scheduler.on_release_locks(released_locks); } - let mut pr = Some(pr); if to_be_write.modifies.is_empty() { scheduler.on_write_finished(cid, pr, Ok(()), lock_guards, false, false, tag); return; @@ -1262,6 +1382,39 @@ impl Scheduler { PessimisticLockMode::Sync } } + + fn make_lock_waiting( + &self, + cid: u64, + lock_wait_token: LockWaitToken, + lock_info: WriteResultLockInfo, + ) -> (LockWaitContext, Box, kvrpcpb::LockInfo) { + let mut slot = self.inner.get_task_slot(cid); + let task_ctx = slot.get_mut(&cid).unwrap(); + let cb = task_ctx.cb.take().unwrap(); + + let ctx = LockWaitContext::new( + lock_info.key.clone(), + self.inner.lock_wait_queues.clone(), + lock_wait_token, + cb, + lock_info.parameters.allow_lock_with_conflict, + ); + let first_batch_cb = ctx.get_callback_for_first_write_batch(); + task_ctx.cb = Some(first_batch_cb); + drop(slot); + + let lock_wait_entry = Box::new(LockWaitEntry { + key: lock_info.key, + lock_hash: lock_info.lock_digest.hash, + parameters: lock_info.parameters, + lock_wait_token, + legacy_wake_up_index: None, + key_cb: Some(ctx.get_callback_for_blocked_key().into()), + }); + + (ctx, lock_wait_entry, lock_info.lock_info_pb) + } } pub async fn get_raw_ext( @@ -1320,12 +1473,12 @@ mod tests { use kvproto::kvrpcpb::{BatchRollbackRequest, CheckTxnStatusRequest, Context}; use raftstore::store::{ReadStats, WriteStats}; use tikv_util::{config::ReadableSize, future::paired_future_callback}; - use txn_types::{Key, OldValues}; + use txn_types::{Key, OldValues, TimeStamp}; use super::*; use crate::storage::{ kv::{Error as KvError, ErrorInner as KvErrorInner}, - lock_manager::DummyLockManager, + lock_manager::{MockLockManager, WaitTimeout}, mvcc::{self, Mutation}, test_util::latest_feature_gate, txn::{ @@ -1346,7 +1499,7 @@ mod tests { } // TODO(cosven): use this in the following test cases to reduce duplicate code. - fn new_test_scheduler() -> (Scheduler, RocksEngine) { + fn new_test_scheduler() -> (Scheduler, RocksEngine) { let engine = TestEngineBuilder::new().build().unwrap(); let config = Config { scheduler_concurrency: 1024, @@ -1358,12 +1511,13 @@ mod tests { ( Scheduler::new( engine.clone(), - DummyLockManager, + MockLockManager::new(), ConcurrencyManager::new(1.into()), &config, DynamicConfigs { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), None, @@ -1505,12 +1659,13 @@ mod tests { }; let scheduler = Scheduler::new( engine, - DummyLockManager, + MockLockManager::new(), ConcurrencyManager::new(1.into()), &config, DynamicConfigs { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), None, @@ -1610,12 +1765,13 @@ mod tests { }; let scheduler = Scheduler::new( engine, - DummyLockManager, + MockLockManager::new(), ConcurrencyManager::new(1.into()), &config, DynamicConfigs { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), None, @@ -1669,12 +1825,13 @@ mod tests { }; let scheduler = Scheduler::new( engine, - DummyLockManager, + MockLockManager::new(), ConcurrencyManager::new(1.into()), &config, DynamicConfigs { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), None, @@ -1736,12 +1893,13 @@ mod tests { }; let scheduler = Scheduler::new( engine, - DummyLockManager, + MockLockManager::new(), ConcurrencyManager::new(1.into()), &config, DynamicConfigs { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), None, @@ -1798,12 +1956,13 @@ mod tests { let scheduler = Scheduler::new( engine, - DummyLockManager, + MockLockManager::new(), ConcurrencyManager::new(1.into()), &config, DynamicConfigs { pipelined_pessimistic_lock: Arc::new(AtomicBool::new(false)), in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), + wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), }, Arc::new(FlowController::Singleton(EngineFlowController::empty())), None, diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index ec38958ad57..e0f68b721b5 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -30,7 +30,7 @@ use tikv::{ self, config_manager::StorageConfigManger, kv::{Error as KvError, ErrorInner as KvErrorInner, SnapContext, SnapshotExt}, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, test_util::*, txn::{ @@ -53,9 +53,10 @@ fn test_scheduler_leader_change_twice() { let peers = region0.get_peers(); cluster.must_transfer_leader(region0.get_id(), peers[0].clone()); let engine0 = cluster.sim.rl().storages[&peers[0].get_id()].clone(); - let storage0 = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine0, DummyLockManager) - .build() - .unwrap(); + let storage0 = + TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine0, MockLockManager::new()) + .build() + .unwrap(); let mut ctx0 = Context::default(); ctx0.set_region_id(region0.get_id()); @@ -247,7 +248,7 @@ fn test_scale_scheduler_pool() { .get(&1) .unwrap() .clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .config(cluster.cfg.tikv.storage.clone()) .build() .unwrap(); @@ -344,7 +345,7 @@ fn test_pipelined_pessimistic_lock() { let before_pipelined_write_finish_fp = "before_pipelined_write_finish"; { - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .pipelined_pessimistic_lock(false) .build() .unwrap(); @@ -371,7 +372,7 @@ fn test_pipelined_pessimistic_lock() { fail::remove(rockskv_write_modifies_fp); } - let storage = TestStorageBuilderApiV1::new(DummyLockManager) + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) .pipelined_pessimistic_lock(true) .build() .unwrap(); @@ -524,10 +525,12 @@ fn test_async_commit_prewrite_with_stale_max_ts_impl() { .get(&1) .unwrap() .clone(); - let storage = - TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr(engine.clone(), DummyLockManager) - .build() - .unwrap(); + let storage = TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr( + engine.clone(), + MockLockManager::new(), + ) + .build() + .unwrap(); // Fail to get timestamp from PD at first fail::cfg("test_raftstore_get_tso", "pause").unwrap(); @@ -641,7 +644,7 @@ fn expect_locked(err: tikv::storage::Error, key: &[u8], lock_ts: TimeStamp) { } fn test_async_apply_prewrite_impl( - storage: &Storage, + storage: &Storage, ctx: Context, key: &[u8], value: &[u8], @@ -825,7 +828,7 @@ fn test_async_apply_prewrite() { .get(&1) .unwrap() .clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .async_apply_prewrite(true) .build() .unwrap(); @@ -923,7 +926,7 @@ fn test_async_apply_prewrite_fallback() { .get(&1) .unwrap() .clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .async_apply_prewrite(true) .build() .unwrap(); @@ -985,7 +988,7 @@ fn test_async_apply_prewrite_fallback() { } fn test_async_apply_prewrite_1pc_impl( - storage: &Storage, + storage: &Storage, ctx: Context, key: &[u8], value: &[u8], @@ -1112,7 +1115,7 @@ fn test_async_apply_prewrite_1pc() { .get(&1) .unwrap() .clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .async_apply_prewrite(true) .build() .unwrap(); @@ -1139,7 +1142,7 @@ fn test_atomic_cas_lock_by_latch() { .get(&1) .unwrap() .clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -1227,7 +1230,7 @@ fn test_before_async_write_deadline() { .get(&1) .unwrap() .clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -1259,7 +1262,7 @@ fn test_before_propose_deadline() { cluster.run(); let engine = cluster.sim.read().unwrap().storages[&1].clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -1292,7 +1295,7 @@ fn test_resolve_lock_deadline() { cluster.run(); let engine = cluster.sim.read().unwrap().storages[&1].clone(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 1a6f2da9b87..e42a44047a4 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -30,7 +30,7 @@ use test_raftstore::new_server_cluster; use tikv::storage::{ self, kv::SnapshotExt, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, txn::tests::{ must_acquire_pessimistic_lock, must_commit, must_pessimistic_prewrite_put, must_pessimistic_prewrite_put_err, must_prewrite_put, must_prewrite_put_err, @@ -69,7 +69,7 @@ fn test_txn_failpoints() { #[test] fn test_atomic_getting_max_ts_and_storing_memory_lock() { let engine = TestEngineBuilder::new().build().unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -120,7 +120,7 @@ fn test_atomic_getting_max_ts_and_storing_memory_lock() { #[test] fn test_snapshot_must_be_later_than_updating_max_ts() { let engine = TestEngineBuilder::new().build().unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -163,7 +163,7 @@ fn test_snapshot_must_be_later_than_updating_max_ts() { #[test] fn test_update_max_ts_before_scan_memory_locks() { let engine = TestEngineBuilder::new().build().unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -217,7 +217,7 @@ macro_rules! lock_release_test { fn $test_name() { let engine = TestEngineBuilder::new().build().unwrap(); let storage = - TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); @@ -294,7 +294,7 @@ lock_release_test!( #[test] fn test_max_commit_ts_error() { let engine = TestEngineBuilder::new().build().unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); let cm = storage.get_concurrency_manager(); @@ -347,7 +347,7 @@ fn test_max_commit_ts_error() { #[test] fn test_exceed_max_commit_ts_in_the_middle_of_prewrite() { let engine = TestEngineBuilder::new().build().unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() .unwrap(); let cm = storage.get_concurrency_manager(); diff --git a/tests/failpoints/cases/test_ttl.rs b/tests/failpoints/cases/test_ttl.rs index 12449752285..026a21136ab 100644 --- a/tests/failpoints/cases/test_ttl.rs +++ b/tests/failpoints/cases/test_ttl.rs @@ -12,7 +12,7 @@ use tikv::{ server::ttl::check_ttl_and_compact_files, storage::{ kv::{SnapContext, TestEngineBuilder}, - lock_manager::DummyLockManager, + lock_manager::MockLockManager, raw::encoded::RawEncodeSnapshot, test_util::{expect_ok_callback, expect_value}, Engine, Iterator, Snapshot, Statistics, TestStorageBuilder, @@ -394,7 +394,7 @@ fn test_stoarge_raw_batch_put_ttl() { fn test_stoarge_raw_batch_put_ttl_impl() { fail::cfg("ttl_current_ts", "return(100)").unwrap(); - let storage = TestStorageBuilder::<_, _, F>::new(DummyLockManager) + let storage = TestStorageBuilder::<_, _, F>::new(MockLockManager::new()) .build() .unwrap(); let (tx, rx) = channel(); diff --git a/tests/integrations/config/dynamic/pessimistic_txn.rs b/tests/integrations/config/dynamic/pessimistic_txn.rs index caad8a64f9b..7af5455a199 100644 --- a/tests/integrations/config/dynamic/pessimistic_txn.rs +++ b/tests/integrations/config/dynamic/pessimistic_txn.rs @@ -69,11 +69,11 @@ fn setup( fn validate_waiter(router: &WaiterMgrScheduler, f: F) where - F: FnOnce(ReadableDuration, ReadableDuration) + Send + 'static, + F: FnOnce(ReadableDuration) + Send + 'static, { let (tx, rx) = mpsc::channel(); - router.validate(Box::new(move |v1, v2| { - f(v1, v2); + router.validate(Box::new(move |v1| { + f(v1); tx.send(()).unwrap(); })); rx.recv_timeout(Duration::from_secs(3)).unwrap(); @@ -107,30 +107,10 @@ fn test_lock_manager_cfg_update() { cfg_controller .update_config("raftstore.raft-log-gc-threshold", "2000") .unwrap(); - validate_waiter( - &waiter, - move |timeout: ReadableDuration, delay: ReadableDuration| { - assert_eq!(timeout.as_millis(), DEFAULT_TIMEOUT); - assert_eq!(delay.as_millis(), DEFAULT_DELAY); - }, - ); - validate_dead_lock(&deadlock, move |ttl: u64| { - assert_eq!(ttl, DEFAULT_TIMEOUT); + validate_waiter(&waiter, move |timeout: ReadableDuration| { + assert_eq!(timeout.as_millis(), DEFAULT_TIMEOUT); }); - - // only update wake_up_delay_duration - cfg_controller - .update_config("pessimistic-txn.wake-up-delay-duration", "500ms") - .unwrap(); - validate_waiter( - &waiter, - move |timeout: ReadableDuration, delay: ReadableDuration| { - assert_eq!(timeout.as_millis(), DEFAULT_TIMEOUT); - assert_eq!(delay.as_millis(), 500); - }, - ); validate_dead_lock(&deadlock, move |ttl: u64| { - // dead lock ttl should not change assert_eq!(ttl, DEFAULT_TIMEOUT); }); @@ -138,38 +118,11 @@ fn test_lock_manager_cfg_update() { cfg_controller .update_config("pessimistic-txn.wait-for-lock-timeout", "4000ms") .unwrap(); - validate_waiter( - &waiter, - move |timeout: ReadableDuration, delay: ReadableDuration| { - assert_eq!(timeout.as_millis(), 4000); - // wake_up_delay_duration should be the same as last update - assert_eq!(delay.as_millis(), 500); - }, - ); - validate_dead_lock(&deadlock, move |ttl: u64| { - assert_eq!(ttl, 4000); + validate_waiter(&waiter, move |timeout: ReadableDuration| { + assert_eq!(timeout.as_millis(), 4000); }); - - // update both config - let mut m = std::collections::HashMap::new(); - m.insert( - "pessimistic-txn.wait-for-lock-timeout".to_owned(), - "4321ms".to_owned(), - ); - m.insert( - "pessimistic-txn.wake-up-delay-duration".to_owned(), - "123ms".to_owned(), - ); - cfg_controller.update(m).unwrap(); - validate_waiter( - &waiter, - move |timeout: ReadableDuration, delay: ReadableDuration| { - assert_eq!(timeout.as_millis(), 4321); - assert_eq!(delay.as_millis(), 123); - }, - ); validate_dead_lock(&deadlock, move |ttl: u64| { - assert_eq!(ttl, 4321); + assert_eq!(ttl, 4000); }); // update pipelined @@ -206,5 +159,24 @@ fn test_lock_manager_cfg_update() { .load(Ordering::SeqCst) ); + // update wake-up-delay-duration + assert_eq!( + lock_mgr + .get_storage_dynamic_configs() + .wake_up_delay_duration_ms + .load(Ordering::SeqCst), + DEFAULT_DELAY + ); + cfg_controller + .update_config("pessimistic-txn.wake-up-delay-duration", "500ms") + .unwrap(); + assert_eq!( + lock_mgr + .get_storage_dynamic_configs() + .wake_up_delay_duration_ms + .load(Ordering::SeqCst), + 500 + ); + lock_mgr.stop(); } diff --git a/tests/integrations/resource_metering/test_suite/mod.rs b/tests/integrations/resource_metering/test_suite/mod.rs index 667c86d230a..7dc6eceb0d5 100644 --- a/tests/integrations/resource_metering/test_suite/mod.rs +++ b/tests/integrations/resource_metering/test_suite/mod.rs @@ -21,7 +21,7 @@ use test_util::alloc_port; use tikv::{ config::{ConfigController, TikvConfig}, storage::{ - lock_manager::DummyLockManager, RocksEngine, StorageApiV1, TestEngineBuilder, + lock_manager::MockLockManager, RocksEngine, StorageApiV1, TestEngineBuilder, TestStorageBuilderApiV1, }, }; @@ -32,7 +32,7 @@ pub struct TestSuite { pubsub_server_port: u16, receiver_server: Option, - storage: StorageApiV1, + storage: StorageApiV1, cfg_controller: ConfigController, resource_tag_factory: ResourceTagFactory, @@ -84,10 +84,11 @@ impl TestSuite { ); let engine = TestEngineBuilder::new().build().unwrap(); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, DummyLockManager) - .set_resource_tag_factory(resource_tag_factory.clone()) - .build() - .unwrap(); + let storage = + TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) + .set_resource_tag_factory(resource_tag_factory.clone()) + .build() + .unwrap(); let (tx, rx) = unbounded(); @@ -118,7 +119,7 @@ impl TestSuite { } } - pub fn get_storage(&self) -> StorageApiV1 { + pub fn get_storage(&self) -> StorageApiV1 { self.storage.clone() } From 0f5058ebcc489bc28f997b93765862d79312b5c0 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 27 Oct 2022 14:59:57 +0800 Subject: [PATCH 0289/1149] raftstore: make sure PrepareFlashback will get the latest region meta (#13664) close tikv/tikv#13643 * Use `self.region` while executing `PrepareFlashback` command to ensure it gets the latest region meta. * Check the epoch before executing the `PrepareFlashback`. Signed-off-by: JmPotato --- components/raftstore/src/store/fsm/apply.rs | 39 +++-- components/raftstore/src/store/metrics.rs | 4 +- components/raftstore/src/store/util.rs | 6 +- components/test_raftstore/src/cluster.rs | 81 +++++----- .../integrations/raftstore/test_flashback.rs | 146 +++++++++++++++--- 5 files changed, 199 insertions(+), 77 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index a9124dc2faf..d3eb7f86461 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1392,7 +1392,7 @@ where ExecResult::CommitMerge { ref region, .. } => (Some(region.clone()), None), ExecResult::RollbackMerge { ref region, .. } => (Some(region.clone()), None), ExecResult::IngestSst { ref ssts } => (None, Some(ssts.clone())), - ExecResult::SetFlashbackState { region } => (Some(region.clone()), None), + ExecResult::SetFlashbackState { ref region } => (Some(region.clone()), None), _ => (None, None), }, _ => (None, None), @@ -2829,30 +2829,27 @@ where ctx: &mut ApplyContext, req: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { - let region_id = self.region_id(); - let region_state_key = keys::region_state_key(region_id); - let mut old_state = match ctx - .engine - .get_msg_cf::(CF_RAFT, ®ion_state_key) - { - Ok(Some(s)) => s, - _ => { - return Err(box_err!("failed to get region state of {}", region_id)); - } - }; let is_in_flashback = req.get_cmd_type() == AdminCmdType::PrepareFlashback; - old_state.mut_region().set_is_in_flashback(is_in_flashback); + // Modify the region meta in memory. let mut region = self.region.clone(); region.set_is_in_flashback(is_in_flashback); - ctx.kv_wb_mut() - .put_msg_cf(CF_RAFT, &keys::region_state_key(region_id), &old_state) - .unwrap_or_else(|e| { - error!( - "{} failed to change flashback state to {:?} for region {}: {:?}", - self.tag, req, region_id, e - ) - }); + // Modify the `RegionLocalState` persisted in disk. + write_peer_state(ctx.kv_wb_mut(), ®ion, PeerState::Normal, None).unwrap_or_else(|e| { + panic!( + "{} failed to change the flashback state to {} for region {:?}: {:?}", + self.tag, is_in_flashback, region, e + ) + }); + match req.get_cmd_type() { + AdminCmdType::PrepareFlashback => { + PEER_ADMIN_CMD_COUNTER.prepare_flashback.success.inc(); + } + AdminCmdType::FinishFlashback => { + PEER_ADMIN_CMD_COUNTER.finish_flashback.success.inc(); + } + _ => unreachable!(), + } Ok(( AdminResponse::default(), ApplyResult::Res(ExecResult::SetFlashbackState { region }), diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 14d8d7e97cc..7ab47cc90c6 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -33,7 +33,9 @@ make_auto_flush_static_metric! { commit_merge, rollback_merge, compact, - transfer_leader + transfer_leader, + prepare_flashback, + finish_flashback } pub label_enum AdminCmdStatus { diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 61da5805727..9f49730e1d0 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -193,8 +193,12 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat AdminCmdType::RollbackMerge => AdminCmdEpochState::new(true, true, true, false), // Transfer leader AdminCmdType::TransferLeader => AdminCmdEpochState::new(true, true, false, false), + // PrepareFlashback could be committed successfully before a split being applied, so we need + // to check the epoch to make sure it's sent to a correct key range. + // NOTICE: FinishFlashback will never meet the epoch not match error since any scheduling + // before it's forbidden. AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { - AdminCmdEpochState::new(false, false, false, false) + AdminCmdEpochState::new(true, true, false, false) } } } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index c097b22222d..c4ac98180a6 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -22,7 +22,7 @@ use file_system::IoRateLimiter; use futures::{self, channel::oneshot, executor::block_on}; use kvproto::{ errorpb::Error as PbError, - kvrpcpb::{ApiVersion, Context}, + kvrpcpb::{ApiVersion, Context, DiskFullOpt}, metapb::{self, Buckets, PeerRole, RegionEpoch, StoreLabel}, pdpb::{self, CheckPolicy, StoreReport}, raft_cmdpb::*, @@ -1438,14 +1438,47 @@ impl Cluster { .unwrap(); } - pub fn must_send_flashback_msg(&mut self, region_id: u64, cmd_type: AdminCmdType) { - self.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + pub fn must_send_flashback_msg( + &mut self, + region_id: u64, + cmd_type: AdminCmdType, + cb: Callback, + ) { let leader = self.leader_of_region(region_id).unwrap(); let store_id = leader.get_store_id(); let region_epoch = self.get_region_epoch(region_id); - block_on(async move { - let (result_tx, result_rx) = oneshot::channel(); - let cb = Callback::write(Box::new(move |resp| { + let mut admin = AdminRequest::default(); + admin.set_cmd_type(cmd_type); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header().set_region_epoch(region_epoch); + req.mut_header().set_peer(leader); + req.set_admin_request(admin); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let router = self.sim.rl().get_router(store_id).unwrap(); + if let Err(e) = router.send_command( + req, + cb, + RaftCmdExtraOpts { + deadline: None, + disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, + }, + ) { + panic!( + "router send flashback msg {:?} failed, error: {}", + cmd_type, e + ); + } + } + + pub fn must_send_wait_flashback_msg(&mut self, region_id: u64, cmd_type: AdminCmdType) { + self.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + let (result_tx, result_rx) = oneshot::channel(); + self.must_send_flashback_msg( + region_id, + cmd_type, + Callback::write(Box::new(move |resp| { if resp.response.get_header().has_error() { result_tx .send(Some(resp.response.get_header().get_error().clone())) @@ -1453,38 +1486,14 @@ impl Cluster { return; } result_tx.send(None).unwrap(); - })); - - let mut admin = AdminRequest::default(); - admin.set_cmd_type(cmd_type); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(region_id); - req.mut_header().set_region_epoch(region_epoch); - req.mut_header().set_peer(leader); - req.set_admin_request(admin); - req.mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let router = self.sim.rl().get_router(store_id).unwrap(); - if let Err(e) = router.send_command( - req, - cb, - RaftCmdExtraOpts { - deadline: None, - disk_full_opt: kvproto::kvrpcpb::DiskFullOpt::AllowedOnAlmostFull, - }, - ) { - panic!( - "router send flashback msg {:?} failed, error: {}", - cmd_type, e - ); - } - if let Some(e) = result_rx.await.unwrap() { - panic!("call flashback msg {:?} failed, error: {:?}", cmd_type, e); - } - }); + })), + ); + if let Some(e) = block_on(result_rx).unwrap() { + panic!("call flashback msg {:?} failed, error: {:?}", cmd_type, e); + } } - fn wait_applied_to_current_term(&mut self, region_id: u64, timeout: Duration) { + pub fn wait_applied_to_current_term(&mut self, region_id: u64, timeout: Duration) { let mut now = Instant::now(); let deadline = now + timeout; while now < deadline { diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 810da9d840f..5227e7ea6bc 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -5,13 +5,123 @@ use std::{ time::{Duration, Instant}, }; +use futures::{channel::oneshot, executor::block_on}; use kvproto::{ + errorpb::FlashbackInProgress, metapb, raft_cmdpb::{AdminCmdType, CmdType, Request}, }; +use raftstore::store::Callback; use test_raftstore::*; use txn_types::WriteBatchFlags; +#[test] +fn test_prepare_flashback_after_split() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + + let old_region = cluster.get_region(b"a"); + cluster.wait_applied_to_current_term(old_region.get_id(), Duration::from_secs(3)); + // Pause the apply to make sure the split cmd and prepare flashback cmd are in + // the same batch. + let on_handle_apply_fp = "on_handle_apply"; + fail::cfg(on_handle_apply_fp, "pause").unwrap(); + // Send the split msg. + cluster.split_region( + &old_region, + b"b", + Callback::write(Box::new(|resp| { + if resp.response.get_header().has_error() { + panic!("split failed: {:?}", resp.response.get_header().get_error()); + } + })), + ); + // Make sure the admin split cmd is ready. + sleep(Duration::from_millis(100)); + // Send the prepare flashback msg. + let (result_tx, result_rx) = oneshot::channel(); + cluster.must_send_flashback_msg( + old_region.get_id(), + AdminCmdType::PrepareFlashback, + Callback::write(Box::new(move |resp| { + if resp.response.get_header().has_error() { + result_tx + .send(Some(resp.response.get_header().get_error().clone())) + .unwrap(); + return; + } + result_tx.send(None).unwrap(); + })), + ); + // Remove the pause to make these two commands are in the same batch to apply. + fail::remove(on_handle_apply_fp); + let prepare_flashback_err = block_on(result_rx).unwrap().unwrap(); + assert!( + prepare_flashback_err.has_epoch_not_match(), + "prepare flashback should fail with epoch not match, but got {:?}", + prepare_flashback_err + ); + // Check the region meta. + let left_region = cluster.get_region(b"a"); + let right_region = cluster.get_region(b"b"); + assert!(left_region.get_id() != old_region.get_id()); + assert!(left_region.get_end_key() == right_region.get_start_key()); + assert!( + left_region.get_region_epoch().get_version() + == right_region.get_region_epoch().get_version() + ); + must_check_flashback_state(&mut cluster, left_region.get_id(), 1, false); + must_check_flashback_state(&mut cluster, right_region.get_id(), 1, false); +} + +#[test] +fn test_prepare_flashback_after_conf_change() { + let mut cluster = new_node_cluster(0, 3); + // Disable default max peer count check. + cluster.pd_client.disable_default_operator(); + + let region_id = cluster.run_conf_change(); + cluster.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + // Pause the apply to make sure the conf change cmd and prepare flashback cmd + // are in the same batch. + let on_handle_apply_fp = "on_handle_apply"; + fail::cfg(on_handle_apply_fp, "pause").unwrap(); + // Send the conf change msg. + cluster.async_add_peer(region_id, new_peer(2, 2)).unwrap(); + // Make sure the conf change cmd is ready. + sleep(Duration::from_millis(100)); + // Send the prepare flashback msg. + let (result_tx, result_rx) = oneshot::channel(); + cluster.must_send_flashback_msg( + region_id, + AdminCmdType::PrepareFlashback, + Callback::write(Box::new(move |resp| { + if resp.response.get_header().has_error() { + result_tx + .send(Some(resp.response.get_header().get_error().clone())) + .unwrap(); + return; + } + result_tx.send(None).unwrap(); + })), + ); + // Remove the pause to make these two commands are in the same batch to apply. + fail::remove(on_handle_apply_fp); + let prepare_flashback_err = block_on(result_rx).unwrap().unwrap(); + assert!( + prepare_flashback_err.has_epoch_not_match(), + "prepare flashback should fail with epoch not match, but got {:?}", + prepare_flashback_err + ); + // Check the region meta. + let region = cluster.get_region(b"a"); + assert!(region.get_id() == region_id); + assert!(region.get_peers().len() == 2); + must_check_flashback_state(&mut cluster, region_id, 1, false); +} + #[test] fn test_flashback_unprepared() { let mut cluster = new_node_cluster(0, 3); @@ -47,7 +157,7 @@ fn test_flashback_for_schedule() { // Prepare for flashback let region = cluster.get_region(b"k1"); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // Verify the schedule is disabled. let mut region = cluster.get_region(b"k3"); @@ -60,13 +170,13 @@ fn test_flashback_for_schedule() { let e = resp.get_header().get_error(); assert_eq!( e.get_flashback_in_progress(), - &kvproto::errorpb::FlashbackInProgress { + &FlashbackInProgress { region_id: region.get_id(), ..Default::default() } ); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); // Transfer leader to (2, 2) should succeed. cluster.must_transfer_leader(1, new_peer(2, 2)); } @@ -83,7 +193,7 @@ fn test_flashback_for_write() { // Prepare for flashback let region = cluster.get_region(b"k1"); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // Write will be blocked let value = vec![1_u8; 8096]; @@ -95,7 +205,7 @@ fn test_flashback_for_write() { new_put_cmd(b"k1", &value), ); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); } @@ -114,7 +224,7 @@ fn test_flashback_for_read() { // Prepare for flashback let region = cluster.get_region(b"k1"); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // read will be blocked must_get_error_flashback_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); @@ -127,7 +237,7 @@ fn test_flashback_for_read() { new_get_cf_cmd("write", b"k1"), ); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); } @@ -162,7 +272,7 @@ fn test_flashback_for_local_read() { assert_eq!(state.get_last_index(), last_index); // Prepare for flashback - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); @@ -193,7 +303,7 @@ fn test_flashback_for_local_read() { let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 1); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 2); @@ -217,7 +327,7 @@ fn test_flashback_for_status_cmd_as_region_detail() { let leader = cluster.leader_of_region(1).unwrap(); let region = cluster.get_region(b"k1"); - cluster.must_send_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); let region_detail = cluster.region_detail(region.get_id(), leader.get_store_id()); assert!(region_detail.has_region()); @@ -243,10 +353,10 @@ fn test_flashback_for_check_is_in_persist() { must_check_flashback_state(&mut cluster, 1, 2, false); // Prepare for flashback - cluster.must_send_flashback_msg(1, AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(1, AdminCmdType::PrepareFlashback); must_check_flashback_state(&mut cluster, 1, 2, true); - cluster.must_send_flashback_msg(1, AdminCmdType::FinishFlashback); + cluster.must_send_wait_flashback_msg(1, AdminCmdType::FinishFlashback); must_check_flashback_state(&mut cluster, 1, 2, false); } @@ -273,7 +383,7 @@ fn test_flashback_for_apply_snapshot() { } // Prepare for flashback - cluster.must_send_flashback_msg(1, AdminCmdType::PrepareFlashback); + cluster.must_send_wait_flashback_msg(1, AdminCmdType::PrepareFlashback); must_check_flashback_state(&mut cluster, 1, 1, true); must_check_flashback_state(&mut cluster, 1, 3, false); @@ -282,7 +392,7 @@ fn test_flashback_for_apply_snapshot() { must_check_flashback_state(&mut cluster, 1, 1, true); must_check_flashback_state(&mut cluster, 1, 3, true); - cluster.must_send_flashback_msg(1, AdminCmdType::FinishFlashback); + cluster.must_send_wait_flashback_msg(1, AdminCmdType::FinishFlashback); must_check_flashback_state(&mut cluster, 1, 1, false); must_check_flashback_state(&mut cluster, 1, 3, false); } @@ -310,7 +420,7 @@ fn must_check_flashback_state( ); } -fn multi_do_cmd(cluster: &mut Cluster, cmd: kvproto::raft_cmdpb::Request) { +fn multi_do_cmd(cluster: &mut Cluster, cmd: Request) { for _ in 0..100 { let mut reqs = vec![]; for _ in 0..100 { @@ -323,7 +433,7 @@ fn multi_do_cmd(cluster: &mut Cluster, cmd: kvproto::raft_cmdpb fn must_do_cmd_with_flashback_flag( cluster: &mut Cluster, region: &mut metapb::Region, - cmd: kvproto::raft_cmdpb::Request, + cmd: Request, ) { // Verify the read can be executed if add flashback flag in request's // header. @@ -344,7 +454,7 @@ fn must_do_cmd_with_flashback_flag( fn must_get_error_flashback_in_progress( cluster: &mut Cluster, region: &metapb::Region, - cmd: kvproto::raft_cmdpb::Request, + cmd: Request, ) { for _ in 0..100 { let mut reqs = vec![]; @@ -356,7 +466,7 @@ fn must_get_error_flashback_in_progress( Err(e) => { assert_eq!( e.get_flashback_in_progress(), - &kvproto::errorpb::FlashbackInProgress { + &FlashbackInProgress { region_id: region.get_id(), ..Default::default() } From b7880cdd1447e92bca7e6e68f0ab41e2fefcce45 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 28 Oct 2022 14:33:58 +0800 Subject: [PATCH 0290/1149] cdc, resolved_ts: reduce allocation in leadership resolver (#13666) close tikv/tikv#13665 cdc, resolved_ts: reduce allocation in leadership resolver Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/Cargo.toml | 1 - components/cdc/src/endpoint.rs | 38 +++-- components/raftstore/src/store/util.rs | 37 +++-- components/resolved_ts/src/advance.rs | 214 ++++++++++++++----------- components/resolved_ts/src/endpoint.rs | 3 +- components/resolved_ts/src/lib.rs | 1 - components/resolved_ts/src/util.rs | 12 -- 7 files changed, 181 insertions(+), 125 deletions(-) delete mode 100644 components/resolved_ts/src/util.rs diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 62ef4cc29f5..27ce81c57b4 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -26,7 +26,6 @@ portable = ["tikv/portable"] sse = ["tikv/sse"] mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] -pprof-fp = ["tikv/pprof-fp"] [dependencies] api_version = { workspace = true } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 614e282a5d9..8aa6aad3c29 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -40,7 +40,7 @@ use tikv_util::{ mpsc::bounded, slow_log, sys::thread::ThreadBuildWrapper, - time::{Limiter, SlowTimer}, + time::{Instant, Limiter, SlowTimer}, timer::SteadyTimer, warn, worker::{Runnable, RunnableWithTimer, ScheduleError, Scheduler}, @@ -154,6 +154,8 @@ pub enum Task { }, RegisterMinTsEvent { leader_resolver: LeadershipResolver, + // The time at which the event actually occurred. + event_time: Instant, }, // The result of ChangeCmd should be returned from CDC Endpoint to ensure // the downstream switches to Normal after the previous commands was sunk. @@ -222,7 +224,9 @@ impl fmt::Debug for Task { .field("observe_id", &observe_id) .field("region_id", ®ion.get_id()) .finish(), - Task::RegisterMinTsEvent { .. } => de.field("type", &"register_min_ts").finish(), + Task::RegisterMinTsEvent { ref event_time, .. } => { + de.field("event_time", &event_time).finish() + } Task::InitDownstream { ref region_id, ref downstream_id, @@ -447,13 +451,12 @@ impl, E: KvEngine> Endpoint { resolved_region_count: 0, unresolved_region_count: 0, sink_memory_quota, - // store_resolver, // Log the first resolved ts warning. warn_resolved_ts_repeat_count: WARN_RESOLVED_TS_COUNT_THRESHOLD, current_ts: TimeStamp::zero(), causal_ts_provider, }; - ep.register_min_ts_event(leader_resolver); + ep.register_min_ts_event(leader_resolver, Instant::now()); ep } @@ -996,8 +999,16 @@ impl, E: KvEngine> Endpoint { let _ = downstream.sink_event(resolved_ts_event, force_send); } - fn register_min_ts_event(&self, mut leader_resolver: LeadershipResolver) { - let timeout = self.timer.delay(self.config.min_ts_interval.0); + fn register_min_ts_event(&self, mut leader_resolver: LeadershipResolver, event_time: Instant) { + // Try to keep advance resolved ts every `min_ts_interval`, thus + // the actual wait interval = `min_ts_interval` - the last register min_ts event + // time. + let interval = self + .config + .min_ts_interval + .0 + .checked_sub(event_time.saturating_elapsed()); + let timeout = self.timer.delay(interval.unwrap_or_default()); let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); let raft_router = self.raft_router.clone(); @@ -1043,7 +1054,10 @@ impl, E: KvEngine> Endpoint { defer!({ slow_log!(T slow_timer, "cdc resolve region leadership"); if let Ok(leader_resolver) = leader_resolver_rx.try_recv() { - match scheduler.schedule(Task::RegisterMinTsEvent { leader_resolver }) { + match scheduler.schedule(Task::RegisterMinTsEvent { + leader_resolver, + event_time: Instant::now(), + }) { Ok(_) | Err(ScheduleError::Stopped(_)) => (), // Must schedule `RegisterMinTsEvent` event otherwise resolved ts can not // advance normally. @@ -1129,8 +1143,9 @@ impl, E: KvEngine> Runnable for Endpoint { } => self.on_multi_batch(multi, old_value_cb), Task::OpenConn { conn } => self.on_open_conn(conn), Task::RegisterMinTsEvent { - leader_resolver: store_resolver, - } => self.register_min_ts_event(store_resolver), + leader_resolver, + event_time, + } => self.register_min_ts_event(leader_resolver, event_time), Task::InitDownstream { region_id, downstream_id, @@ -1871,7 +1886,10 @@ mod tests { let mut suite = mock_endpoint_with_ts_provider(&cfg, None, ApiVersion::V2, Some(ts_provider.clone())); let leader_resolver = suite.leader_resolver.take().unwrap(); - suite.run(Task::RegisterMinTsEvent { leader_resolver }); + suite.run(Task::RegisterMinTsEvent { + leader_resolver, + event_time: Instant::now(), + }); suite .task_rx .recv_timeout(Duration::from_millis(1500)) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 9f49730e1d0..1e571296e1a 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1215,11 +1215,11 @@ impl RegionReadProgress { } // Dump the `LeaderInfo` and the peer list - pub fn dump_leader_info(&self) -> (Vec, LeaderInfo) { + pub fn dump_leader_info(&self) -> (LeaderInfo, Option) { let core = self.core.lock().unwrap(); ( - core.get_local_leader_info().peers.clone(), core.get_leader_info(), + core.get_local_leader_info().leader_store_id, ) } @@ -1231,6 +1231,8 @@ impl RegionReadProgress { core.leader_info.epoch = region.get_region_epoch().clone(); core.leader_info.peers = region.get_peers().to_vec(); } + core.leader_info.leader_store_id = + find_store_id(&core.leader_info.peers, core.leader_info.leader_id) } /// Reset `safe_ts` to 0 and stop updating it @@ -1308,6 +1310,7 @@ pub struct ReadState { pub struct LocalLeaderInfo { leader_id: u64, leader_term: u64, + leader_store_id: Option, epoch: RegionEpoch, peers: Vec, } @@ -1317,6 +1320,7 @@ impl LocalLeaderInfo { LocalLeaderInfo { leader_id: raft::INVALID_ID, leader_term: 0, + leader_store_id: None, epoch: region.get_region_epoch().clone(), peers: region.get_peers().to_vec(), } @@ -1329,6 +1333,19 @@ impl LocalLeaderInfo { pub fn get_leader_id(&self) -> u64 { self.leader_id } + + pub fn get_leader_store_id(&self) -> Option { + self.leader_store_id + } +} + +fn find_store_id(peer_list: &[Peer], peer_id: u64) -> Option { + for peer in peer_list { + if peer.id == peer_id { + return Some(peer.store_id); + } + } + None } impl RegionReadProgressCore { @@ -1444,7 +1461,6 @@ impl RegionReadProgressCore { } pub fn get_leader_info(&self) -> LeaderInfo { - let mut leader_info = LeaderInfo::default(); let read_state = { // Get the latest `read_state` let ReadState { idx, ts } = self.pending_items.back().unwrap_or(&self.read_state); @@ -1454,12 +1470,15 @@ impl RegionReadProgressCore { rs }; let li = &self.leader_info; - leader_info.set_peer_id(li.leader_id); - leader_info.set_term(li.leader_term); - leader_info.set_region_id(self.region_id); - leader_info.set_region_epoch(li.epoch.clone()); - leader_info.set_read_state(read_state); - leader_info + LeaderInfo { + peer_id: li.leader_id, + region_id: self.region_id, + term: li.leader_term, + region_epoch: protobuf::SingularPtrField::some(li.epoch.clone()), + read_state: protobuf::SingularPtrField::some(read_state), + unknown_fields: protobuf::UnknownFields::default(), + cached_size: protobuf::CachedSize::default(), + } } pub fn get_local_leader_info(&self) -> &LocalLeaderInfo { diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 35426f4861d..496c5c8fab8 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -14,9 +14,9 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use fail::fail_point; use futures::{compat::Future01CompatExt, future::select_all, FutureExt, TryFutureExt}; -use grpcio::{ChannelBuilder, Environment}; +use grpcio::{ChannelBuilder, Environment, Error as GrpcError, RpcStatusCode}; use kvproto::{ - kvrpcpb::{CheckLeaderRequest, LeaderInfo}, + kvrpcpb::{CheckLeaderRequest, CheckLeaderResponse}, metapb::{Peer, PeerRole}, tikvpb::TikvClient, }; @@ -43,7 +43,7 @@ use tokio::{ }; use txn_types::TimeStamp; -use crate::{endpoint::Task, metrics::*, util}; +use crate::{endpoint::Task, metrics::*}; const DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS: u64 = 5_000; // 5s @@ -143,11 +143,11 @@ pub struct LeadershipResolver { region_read_progress: RegionReadProgressRegistry, store_id: u64, - // store_id -> leaders info, record the request to each stores - store_map: HashMap>, - // region_id -> region, cache the information of regions + // store_id -> check leader request, record the request to each stores. + store_req_map: HashMap, + // region_id -> region, cache the information of regions. region_map: HashMap>, - // region_id -> peers id, record the responses + // region_id -> peers id, record the responses. resp_map: HashMap>, valid_regions: HashSet, @@ -172,7 +172,7 @@ impl LeadershipResolver { security_mgr, region_read_progress, - store_map: HashMap::default(), + store_req_map: HashMap::default(), region_map: HashMap::default(), resp_map: HashMap::default(), valid_regions: HashSet::default(), @@ -184,7 +184,7 @@ impl LeadershipResolver { fn gc(&mut self) { let now = Instant::now_coarse(); if now - self.last_gc_time > self.gc_interval { - self.store_map = HashMap::default(); + self.store_req_map = HashMap::default(); self.region_map = HashMap::default(); self.resp_map = HashMap::default(); self.valid_regions = HashSet::default(); @@ -193,9 +193,16 @@ impl LeadershipResolver { } fn clear(&mut self) { - self.store_map.clear(); - self.region_map.clear(); - self.resp_map.clear(); + for v in self.store_req_map.values_mut() { + v.regions.clear(); + v.ts = 0; + } + for v in self.region_map.values_mut() { + v.clear(); + } + for v in self.resp_map.values_mut() { + v.clear(); + } self.valid_regions.clear(); } @@ -241,7 +248,7 @@ impl LeadershipResolver { // This function broadcasts a special message to all stores, gets the leader id // of them to confirm whether current peer has a quorum which accepts its // leadership. - pub async fn resolve(&mut self, regions: Vec, min_ts: TimeStamp) -> Vec { + pub async fn resolve(&mut self, _regions: Vec, min_ts: TimeStamp) -> Vec { // Clear previous result before resolving. self.clear(); // GC when necessary to prevent memory leak. @@ -249,21 +256,22 @@ impl LeadershipResolver { PENDING_RTS_COUNT.inc(); defer!(PENDING_RTS_COUNT.dec()); - fail_point!("before_sync_replica_read_state", |_| regions.clone()); + fail_point!("before_sync_replica_read_state", |_| _regions.clone()); let store_id = self.store_id; let valid_regions = &mut self.valid_regions; let region_map = &mut self.region_map; let resp_map = &mut self.resp_map; - let store_map = &mut self.store_map; + let store_req_map = &mut self.store_req_map; self.region_read_progress.with(|registry| { for (region_id, read_progress) in registry { let core = read_progress.get_core(); let local_leader_info = core.get_local_leader_info(); let leader_id = local_leader_info.get_leader_id(); + let leader_store_id = local_leader_info.get_leader_store_id(); let peer_list = local_leader_info.get_peers(); // Check if the leader in this store - if util::find_store_id(peer_list, leader_id) != Some(store_id) { + if leader_store_id != Some(store_id) { continue; } let leader_info = core.get_leader_info(); @@ -271,13 +279,21 @@ impl LeadershipResolver { let mut unvotes = 0; for peer in peer_list { if peer.store_id == store_id && peer.id == leader_id { - resp_map.entry(*region_id).or_default().push(store_id); + resp_map + .entry(*region_id) + .or_insert_with(|| Vec::with_capacity(peer_list.len())) + .push(store_id); } else { // It's still necessary to check leader on learners even if they don't vote // because performing stale read on learners require it. - store_map + store_req_map .entry(peer.store_id) - .or_default() + .or_insert_with(|| { + let mut req = CheckLeaderRequest::default(); + req.regions = Vec::with_capacity(registry.len()).into(); + req + }) + .regions .push(leader_info.clone()); if peer.get_role() != PeerRole::Learner { unvotes += 1; @@ -289,7 +305,10 @@ impl LeadershipResolver { if unvotes == 0 && region_has_quorum(peer_list, &resp_map[region_id]) { valid_regions.insert(*region_id); } else { - region_map.insert(*region_id, peer_list.to_vec()); + region_map + .entry(*region_id) + .or_insert_with(|| Vec::with_capacity(peer_list.len())) + .extend_from_slice(peer_list); } } }); @@ -299,62 +318,69 @@ impl LeadershipResolver { let security_mgr = &self.security_mgr; let tikv_clients = &self.tikv_clients; // Approximate `LeaderInfo` size - let leader_info_size = store_map + let leader_info_size = store_req_map .values() - .next() - .map_or(0, |regions| regions[0].compute_size()); - let store_count = store_map.len(); - let mut stores: Vec<_> = store_map - .drain() - .map(|(to_store, regions)| { - let env = env.clone(); - let region_num = regions.len() as u32; - CHECK_LEADER_REQ_SIZE_HISTOGRAM.observe((leader_info_size * region_num) as f64); - CHECK_LEADER_REQ_ITEM_COUNT_HISTOGRAM.observe(region_num as f64); - - // Check leadership for `regions` on `to_store`. - async move { - PENDING_CHECK_LEADER_REQ_COUNT.inc(); - defer!(PENDING_CHECK_LEADER_REQ_COUNT.dec()); - let client = - get_tikv_client(to_store, pd_client, security_mgr, env, tikv_clients) - .await - .map_err(|e| { - (to_store, e.retryable(), format!("[get tikv client] {}", e)) - })?; - - let mut req = CheckLeaderRequest::default(); - req.set_regions(regions.into()); - req.set_ts(min_ts.into_inner()); - let slow_timer = SlowTimer::default(); - defer!({ - slow_log!( - T - slow_timer, - "check leader rpc costs too long, to_store: {}", - to_store - ); - let elapsed = slow_timer.saturating_elapsed(); - RTS_CHECK_LEADER_DURATION_HISTOGRAM_VEC - .with_label_values(&["rpc"]) - .observe(elapsed.as_secs_f64()); - }); - - let rpc = client - .check_leader_async(&req) - .map_err(|e| (to_store, true, format!("[rpc create failed]{}", e)))?; - PENDING_CHECK_LEADER_REQ_SENT_COUNT.inc(); - defer!(PENDING_CHECK_LEADER_REQ_SENT_COUNT.dec()); - let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); - let resp = tokio::time::timeout(timeout, rpc) - .map_err(|e| (to_store, true, format!("[timeout] {}", e))) - .await? - .map_err(|e| (to_store, true, format!("[rpc failed] {}", e)))?; - Ok((to_store, resp)) - } - .boxed() - }) - .collect(); + .find(|req| !req.regions.is_empty()) + .map_or(0, |req| req.regions[0].compute_size()); + let store_count = store_req_map.len(); + let mut check_leader_rpcs = Vec::with_capacity(store_req_map.len()); + for (store_id, req) in store_req_map { + if req.regions.is_empty() { + continue; + } + let env = env.clone(); + let to_store = *store_id; + let region_num = req.regions.len() as u32; + CHECK_LEADER_REQ_SIZE_HISTOGRAM.observe((leader_info_size * region_num) as f64); + CHECK_LEADER_REQ_ITEM_COUNT_HISTOGRAM.observe(region_num as f64); + + // Check leadership for `regions` on `to_store`. + let rpc = async move { + PENDING_CHECK_LEADER_REQ_COUNT.inc(); + defer!(PENDING_CHECK_LEADER_REQ_COUNT.dec()); + let client = get_tikv_client(to_store, pd_client, security_mgr, env, tikv_clients) + .await + .map_err(|e| (to_store, e.retryable(), format!("[get tikv client] {}", e)))?; + + // Set min_ts in the request. + req.set_ts(min_ts.into_inner()); + let slow_timer = SlowTimer::default(); + defer!({ + slow_log!( + T + slow_timer, + "check leader rpc costs too long, to_store: {}", + to_store + ); + let elapsed = slow_timer.saturating_elapsed(); + RTS_CHECK_LEADER_DURATION_HISTOGRAM_VEC + .with_label_values(&["rpc"]) + .observe(elapsed.as_secs_f64()); + }); + + let rpc = match client.check_leader_async(req) { + Ok(rpc) => rpc, + Err(GrpcError::RpcFailure(status)) + if status.code() == RpcStatusCode::UNIMPLEMENTED => + { + // Some stores like TiFlash don't implement it. + return Ok((to_store, CheckLeaderResponse::default())); + } + Err(e) => return Err((to_store, true, format!("[rpc create failed]{}", e))), + }; + + PENDING_CHECK_LEADER_REQ_SENT_COUNT.inc(); + defer!(PENDING_CHECK_LEADER_REQ_SENT_COUNT.dec()); + let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); + let resp = tokio::time::timeout(timeout, rpc) + .map_err(|e| (to_store, true, format!("[timeout] {}", e))) + .await? + .map_err(|e| (to_store, true, format!("[rpc failed] {}", e)))?; + Ok((to_store, resp)) + } + .boxed(); + check_leader_rpcs.push(rpc); + } let start = Instant::now_coarse(); defer!({ @@ -362,21 +388,19 @@ impl LeadershipResolver { .with_label_values(&["all"]) .observe(start.saturating_elapsed_secs()); }); - for _ in 0..store_count { + let rpc_count = check_leader_rpcs.len(); + for _ in 0..rpc_count { // Use `select_all` to avoid the process getting blocked when some // TiKVs were down. - let (res, _, remains) = select_all(stores).await; - stores = remains; + let (res, _, remains) = select_all(check_leader_rpcs).await; + check_leader_rpcs = remains; match res { Ok((to_store, resp)) => { for region_id in resp.regions { - if let Some(r) = region_map.get(®ion_id) { - let resps = resp_map.entry(region_id).or_default(); - resps.push(to_store); - if region_has_quorum(r, resps) { - valid_regions.insert(region_id); - } - } + resp_map + .entry(region_id) + .or_insert_with(|| Vec::with_capacity(store_count)) + .push(to_store); } } Err((to_store, reconnect, err)) => { @@ -386,11 +410,21 @@ impl LeadershipResolver { } } } - // Return early if all regions had already got quorum. - if valid_regions.len() == regions.len() { - // break here because all regions have quorum, - // so there is no need waiting for other stores to respond. - break; + } + for (region_id, prs) in region_map { + if prs.is_empty() { + // The peer had the leadership before, but now it's no longer + // the case. Skip checking the region. + continue; + } + if let Some(resp) = resp_map.get(region_id) { + if resp.is_empty() { + // No response, maybe the peer lost leadership. + continue; + } + if region_has_quorum(prs, resp) { + valid_regions.insert(*region_id); + } } } self.valid_regions.drain().collect() diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 480c0ee6896..4f957e8266d 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -788,8 +788,7 @@ where let (mut oldest_leader_ts, mut oldest_leader_region) = (u64::MAX, 0); self.region_read_progress.with(|registry| { for (region_id, read_progress) in registry { - let (peers, leader_info) = read_progress.dump_leader_info(); - let leader_store_id = crate::util::find_store_id(&peers, leader_info.peer_id); + let (leader_info, leader_store_id) = read_progress.dump_leader_info(); let ts = leader_info.get_read_state().get_safe_ts(); if ts == 0 { zero_ts_count += 1; diff --git a/components/resolved_ts/src/lib.rs b/components/resolved_ts/src/lib.rs index 5ad2941dde2..5d4e233808d 100644 --- a/components/resolved_ts/src/lib.rs +++ b/components/resolved_ts/src/lib.rs @@ -37,4 +37,3 @@ mod scanner; pub use scanner::*; mod metrics; pub use metrics::*; -mod util; diff --git a/components/resolved_ts/src/util.rs b/components/resolved_ts/src/util.rs deleted file mode 100644 index 11bc1c547a0..00000000000 --- a/components/resolved_ts/src/util.rs +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use kvproto::metapb::Peer; - -pub fn find_store_id(peer_list: &[Peer], peer_id: u64) -> Option { - for peer in peer_list { - if peer.id == peer_id { - return Some(peer.store_id); - } - } - None -} From d03290473d8116d4c676c42fd6780b198d8ce7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 28 Oct 2022 15:19:58 +0800 Subject: [PATCH 0291/1149] log-backup: added `Debug` output of error to report log (#13686) close tikv/tikv#13685 `report` would now print the `Debug` version of error. Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/errors.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index 493cf28babc..b34e7126360 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -132,14 +132,14 @@ macro_rules! annotate { impl Error { pub fn report(&self, context: impl Display) { - warn!("backup stream meet error"; "context" => %context, "err" => %self); + warn!("backup stream meet error"; "context" => %context, "err" => %self, "verbose_err" => ?self); metrics::STREAM_ERROR .with_label_values(&[self.kind()]) .inc() } pub fn report_fatal(&self) { - error!(%self; "backup stream meet fatal error"); + error!(%self; "backup stream meet fatal error"; "verbose" => ?self, ); metrics::STREAM_FATAL_ERROR .with_label_values(&[self.kind()]) .inc() From fe61a03438d91bb1739e8e95db66c9927254d62f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 28 Oct 2022 17:05:58 +0800 Subject: [PATCH 0292/1149] log-backup: fix slow flush in GCP (#13674) close tikv/tikv#13688, close pingcap/tidb#38642 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/cloud/gcp/Cargo.toml | 2 + components/cloud/gcp/src/gcs.rs | 157 ++++++++++++++++++++++++++------ components/cloud/gcp/src/lib.rs | 19 ++++ 4 files changed, 152 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6b25808098..49e6184ab5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2149,6 +2149,7 @@ dependencies = [ "hyper-tls", "kvproto", "matches", + "pin-project", "slog", "slog-global", "tame-gcs", diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index f184377c0af..f0446fa096d 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -23,3 +23,5 @@ url = "2.0" [dev-dependencies] matches = "0.1.8" +pin-project = "1" +tokio = { version = "1.5", features = ["rt"] } diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index e8e8ad20ee9..01f69a6d245 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -2,12 +2,13 @@ use std::{convert::TryInto, fmt::Display, io, sync::Arc}; use async_trait::async_trait; -use cloud::blob::{ - none_to_empty, BlobConfig, BlobStorage, BucketConf, PutResource, StringNonEmpty, +use cloud::{ + blob::{none_to_empty, BlobConfig, BlobStorage, BucketConf, PutResource, StringNonEmpty}, + metrics, }; use futures_util::{ future::TryFutureExt, - io::{AsyncRead, AsyncReadExt, Cursor}, + io::{self as async_io, AsyncRead, Cursor}, stream::{StreamExt, TryStreamExt}, }; use http::HeaderValue; @@ -20,7 +21,12 @@ use tame_gcs::{ types::{BucketName, ObjectId}, }; use tame_oauth::gcp::{ServiceAccountAccess, ServiceAccountInfo, TokenOrRequest}; -use tikv_util::stream::{error_stream, retry, AsyncReadAsSyncStreamOfBytes, RetryError}; +use tikv_util::{ + stream::{error_stream, AsyncReadAsSyncStreamOfBytes, RetryError}, + time::Instant, +}; + +use crate::utils::retry; const GOOGLE_APIS: &str = "https://www.googleapis.com"; const HARDCODED_ENDPOINTS_SUFFIX: &[&str] = &["upload/storage/v1/", "storage/v1/"]; @@ -156,6 +162,7 @@ impl ResultExt for Result { } } +#[derive(Debug)] enum RequestError { Hyper(hyper::Error, String), OAuth(tame_oauth::Error, String), @@ -433,6 +440,14 @@ fn parse_predefined_acl(acl: &str) -> Result, &str> { })) } +/// Like AsyncReadExt::read_to_end, but only try to initialize the buffer once. +/// Check https://github.com/rust-lang/futures-rs/issues/2658 for the reason we cannot +/// directly use it. +async fn read_to_end(r: R, v: &mut Vec) -> std::io::Result { + let mut c = Cursor::new(v); + async_io::copy(r, &mut c).await +} + const STORAGE_NAME: &str = "gcs"; #[async_trait] @@ -441,12 +456,7 @@ impl BlobStorage for GcsStorage { Box::new(self.config.clone()) as Box } - async fn put( - &self, - name: &str, - mut reader: PutResource, - content_length: u64, - ) -> io::Result<()> { + async fn put(&self, name: &str, reader: PutResource, content_length: u64) -> io::Result<()> { if content_length == 0 { // It is probably better to just write the empty file // However, currently going forward results in a body write aborted error @@ -470,25 +480,36 @@ impl BlobStorage for GcsStorage { // FIXME: Switch to upload() API so we don't need to read the entire data into // memory in order to retry. + let begin = Instant::now_coarse(); let mut data = Vec::with_capacity(content_length as usize); - reader.read_to_end(&mut data).await?; - retry(|| async { - let data = Cursor::new(data.clone()); - let req = Object::insert_multipart( - &bucket, - data, - content_length, - &metadata, - Some(InsertObjectOptional { - predefined_acl: self.config.predefined_acl, - ..Default::default() - }), - ) - .map_err(RequestError::Gcs)? - .map(|reader| Body::wrap_stream(AsyncReadAsSyncStreamOfBytes::new(reader))); - self.make_request(req, tame_gcs::Scopes::ReadWrite).await - }) + read_to_end(reader, &mut data).await?; + metrics::CLOUD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["gcp", "read_local"]) + .observe(begin.saturating_elapsed_secs()); + let begin = Instant::now_coarse(); + retry( + || async { + let data = Cursor::new(data.clone()); + let req = Object::insert_multipart( + &bucket, + data, + content_length, + &metadata, + Some(InsertObjectOptional { + predefined_acl: self.config.predefined_acl, + ..Default::default() + }), + ) + .map_err(RequestError::Gcs)? + .map(|reader| Body::wrap_stream(AsyncReadAsSyncStreamOfBytes::new(reader))); + self.make_request(req, tame_gcs::Scopes::ReadWrite).await + }, + "insert_multipart", + ) .await?; + metrics::CLOUD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["gcp", "insert_multipart"]) + .observe(begin.saturating_elapsed_secs()); Ok::<_, io::Error>(()) } @@ -504,6 +525,10 @@ impl BlobStorage for GcsStorage { #[cfg(test)] mod tests { + extern crate test; + use std::task::Poll; + + use futures_util::AsyncReadExt; use matches::assert_matches; use super::*; @@ -605,6 +630,84 @@ mod tests { assert_eq!(c1.bucket.prefix, c2.bucket.prefix); } + enum ThrottleReadState { + Spawning, + Emitting, + } + /// ThrottleRead throttles a `Read` -- make it emits 2 chars for each + /// `read` call. This is copy & paste from the implmentation from s3.rs. + #[pin_project::pin_project] + struct ThrottleRead { + #[pin] + inner: R, + state: ThrottleReadState, + } + impl AsyncRead for ThrottleRead { + fn poll_read( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &mut [u8], + ) -> Poll> { + let this = self.project(); + match this.state { + ThrottleReadState::Spawning => { + *this.state = ThrottleReadState::Emitting; + cx.waker().wake_by_ref(); + Poll::Pending + } + ThrottleReadState::Emitting => { + *this.state = ThrottleReadState::Spawning; + this.inner.poll_read(cx, &mut buf[..2]) + } + } + } + } + impl ThrottleRead { + fn new(r: R) -> Self { + Self { + inner: r, + state: ThrottleReadState::Spawning, + } + } + } + + const BENCH_READ_SIZE: usize = 128 * 1024; + + // 255,120,895 ns/iter (+/- 73,332,249) (futures-util 0.3.15) + #[bench] + fn bench_read_to_end(b: &mut test::Bencher) { + let mut v = [0; BENCH_READ_SIZE]; + let mut dst = Vec::with_capacity(BENCH_READ_SIZE); + let rt = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap(); + + b.iter(|| { + let mut r = ThrottleRead::new(Cursor::new(&mut v)); + dst.clear(); + + rt.block_on(r.read_to_end(&mut dst)).unwrap(); + assert_eq!(dst.len(), BENCH_READ_SIZE) + }) + } + + // 5,850,042 ns/iter (+/- 3,787,438) + #[bench] + fn bench_manual_read_to_end(b: &mut test::Bencher) { + let mut v = [0; BENCH_READ_SIZE]; + let mut dst = Vec::with_capacity(BENCH_READ_SIZE); + let rt = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap(); + b.iter(|| { + let r = ThrottleRead::new(Cursor::new(&mut v)); + dst.clear(); + + rt.block_on(read_to_end(r, &mut dst)).unwrap(); + assert_eq!(dst.len(), BENCH_READ_SIZE) + }) + } + fn cloud_dynamic_from_input(mut gcs: InputConfig) -> CloudDynamic { let mut bucket = InputBucket::default(); if !gcs.endpoint.is_empty() { diff --git a/components/cloud/gcp/src/lib.rs b/components/cloud/gcp/src/lib.rs index 4652bbf5b74..9ad97793988 100644 --- a/components/cloud/gcp/src/lib.rs +++ b/components/cloud/gcp/src/lib.rs @@ -1,7 +1,26 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(test)] #[macro_use] extern crate slog_global; mod gcs; pub use gcs::{Config, GcsStorage}; + +pub mod utils { + use std::future::Future; + + use cloud::metrics; + use tikv_util::stream::{retry_ext, RetryError, RetryExt}; + pub async fn retry(action: G, name: &'static str) -> Result + where + G: FnMut() -> F, + F: Future>, + E: RetryError + std::fmt::Debug, + { + retry_ext(action, RetryExt::default().with_fail_hook(move |err: &E| { + warn!("gcp request meet error."; "err" => ?err, "retry?" => %err.is_retryable(), "context" => %name); + metrics::CLOUD_ERROR_VEC.with_label_values(&["gcp", name]).inc(); + })).await + } +} From dea7b050fbca2266a5e81883bb396b9652fba646 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 28 Oct 2022 18:41:58 +0800 Subject: [PATCH 0293/1149] readpool: update yatp and add new metrics for unified-read-pool (#13489) ref tikv/tikv#13313 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- Cargo.lock | 33 ++- Cargo.toml | 4 + components/server/src/server.rs | 3 + metrics/grafana/tikv_details.json | 360 ++++++++++++++++++++++++++++++ 4 files changed, 394 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49e6184ab5b..bc757a3ecdf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1149,13 +1149,12 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +version = "0.8.2" +source = "git+https://github.com/crossbeam-rs/crossbeam?rev=41ed3d948720f26149b2ebeaf58fe8a193134056#41ed3d948720f26149b2ebeaf58fe8a193134056" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch 0.9.8", - "crossbeam-utils 0.8.8", + "crossbeam-epoch 0.9.10", + "crossbeam-utils 0.8.11", ] [[package]] @@ -1184,6 +1183,19 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "crossbeam-epoch" +version = "0.9.10" +source = "git+https://github.com/crossbeam-rs/crossbeam?rev=41ed3d948720f26149b2ebeaf58fe8a193134056#41ed3d948720f26149b2ebeaf58fe8a193134056" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "crossbeam-utils 0.8.11", + "memoffset", + "once_cell", + "scopeguard", +] + [[package]] name = "crossbeam-queue" version = "0.3.5" @@ -1236,6 +1248,15 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "git+https://github.com/crossbeam-rs/crossbeam?rev=41ed3d948720f26149b2ebeaf58fe8a193134056#41ed3d948720f26149b2ebeaf58fe8a193134056" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", +] + [[package]] name = "crypto-mac" version = "0.10.0" @@ -7251,7 +7272,7 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#2f5f6e47ba6fce8d55e7a57b7ee39a93bc0e8194" +source = "git+https://github.com/tikv/yatp.git?branch=master#39cb495953d40a7e846363c06090755c2eac65fa" dependencies = [ "crossbeam-deque", "dashmap", diff --git a/Cargo.toml b/Cargo.toml index 786b229df3b..d95dd1c67c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -211,6 +211,10 @@ fs2 = { git = "https://github.com/tabokie/fs2-rs", branch = "tikv" } # Remove this when a new version is release. We need to solve rust-lang/cmake-rs#143. cmake = { git = "https://github.com/rust-lang/cmake-rs" } +# TODO: remove this after crossbeam-deque is updated to the next release version. +# This is a workaround for cargo can't resolving the this patch in yatp. +crossbeam-deque = { git = "https://github.com/crossbeam-rs/crossbeam", rev = "41ed3d948720f26149b2ebeaf58fe8a193134056" } + [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 2295839a806..82973946d96 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -564,6 +564,9 @@ where yatp::metrics::set_namespace(Some("tikv")); prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL0_CHANCE.clone())).unwrap(); prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL_ELAPSED.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_EXEC_DURATION.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_POLL_DURATION.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_EXEC_TIMES.clone())).unwrap(); } fn init_encryption(&mut self) { diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 45a657cc4bb..471bf4bea2e 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -17938,6 +17938,366 @@ "yBucketBound": "auto", "yBucketNumber": null, "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Unified read pool task execution time during one schedule.", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 4199, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "50%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "999%", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Duration of One Time Slice", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Unified read pool task total execution duration.", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 4202, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "50%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "999%", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Task Execute Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Task schedule number of times.", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 4204, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "50%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "999%", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Task Schedule Times", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "title": "Unified Read Pool", From 53470735e263343a0a53acc596fe1e82fe1c5b65 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 31 Oct 2022 16:59:59 +0800 Subject: [PATCH 0294/1149] Raftstore-v2: tablet factory supports temporary split path (#13678) close tikv/tikv#13679 Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- components/engine_test/src/lib.rs | 91 ++++++++--------- components/engine_traits/src/engine.rs | 15 ++- src/server/engine_factory.rs | 2 +- src/server/engine_factory_v2.rs | 133 +++++++++++++++---------- 4 files changed, 142 insertions(+), 99 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index b2a574422fb..b460e97d4ce 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -189,8 +189,8 @@ pub mod kv { } #[inline] - fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { - Path::new(&self.root_path).join(format!("tablets/{}_{}", id, suffix)) + fn tablet_path_with_prefix(&self, _prefix: &str, _id: u64, _suffix: u64) -> PathBuf { + self.root_path.join("db") } #[inline] @@ -226,7 +226,8 @@ pub mod kv { #[derive(Clone)] pub struct TestTabletFactoryV2 { inner: TestTabletFactory, - registry: Arc>>, + // region_id -> (tablet, tablet_suffix) + registry: Arc>>, } impl TestTabletFactoryV2 { @@ -242,17 +243,6 @@ pub mod kv { } } - // Extract tablet id and tablet suffix from the path. - fn get_id_and_suffix_from_path(path: &Path) -> (u64, u64) { - let (mut tablet_id, mut tablet_suffix) = (0, 1); - if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { - let mut split = s.split('_'); - tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); - tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); - } - (tablet_id, tablet_suffix) - } - impl TabletFactory for TestTabletFactoryV2 { /// See the comment above the same name method in KvEngineFactoryV2 fn open_tablet( @@ -261,33 +251,34 @@ pub mod kv { suffix: Option, mut options: OpenOptions, ) -> Result { + if options.create_new() && suffix.is_none() { + return Err(box_err!( + "suffix should be provided when creating new tablet" + )); + } + if options.create_new() || options.create() { options = options.set_cache_only(false); } let mut reg = self.registry.lock().unwrap(); if let Some(suffix) = suffix { - if let Some(tablet) = reg.get(&(id, suffix)) { + if let Some((cached_tablet, cached_suffix)) = reg.get(&id) && *cached_suffix == suffix { // Target tablet exist in the cache - if options.create_new() { - return Err(box_err!("region {} {} already exists", id, tablet.path())); + return Err(box_err!("region {} {} already exists", id, cached_tablet.path())); } - return Ok(tablet.clone()); + return Ok(cached_tablet.clone()); } else if !options.cache_only() { let tablet_path = self.tablet_path(id, suffix); let tablet = self.open_tablet_raw(&tablet_path, id, suffix, options.clone())?; if !options.skip_cache() { - reg.insert((id, suffix), tablet.clone()); + reg.insert(id, (tablet.clone(), suffix)); } return Ok(tablet); } - } else if options.cache_only() { - // This branch reads an arbitrary tablet with region id `id` - - if let Some(k) = reg.keys().find(|k| k.0 == id) { - return Ok(reg.get(k).unwrap().clone()); - } + } else if let Some((tablet, _)) = reg.get(&id) { + return Ok(tablet.clone()); } Err(box_err!( @@ -343,17 +334,24 @@ pub mod kv { } #[inline] - fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + fn tablet_path_with_prefix(&self, prefix: &str, id: u64, suffix: u64) -> PathBuf { self.inner .root_path - .join(format!("tablets/{}_{}", id, suffix)) + .join(format!("tablets/{}{}_{}", prefix, id, suffix)) } #[inline] fn mark_tombstone(&self, region_id: u64, suffix: u64) { let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); - std::fs::File::create(&path).unwrap(); - self.registry.lock().unwrap().remove(&(region_id, suffix)); + // When the full directory path does not exsit, create will return error and in + // this case, we just ignore it. + let _ = std::fs::File::create(&path); + { + let mut reg = self.registry.lock().unwrap(); + if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { + reg.insert(region_id, (cached_tablet, cached_suffix)); + } + } } #[inline] @@ -364,37 +362,40 @@ pub mod kv { } #[inline] - fn destroy_tablet(&self, id: u64, suffix: u64) -> engine_traits::Result<()> { - let path = self.tablet_path(id, suffix); - self.registry.lock().unwrap().remove(&(id, suffix)); + fn destroy_tablet(&self, region_id: u64, suffix: u64) -> engine_traits::Result<()> { + let path = self.tablet_path(region_id, suffix); + { + let mut reg = self.registry.lock().unwrap(); + if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { + reg.insert(region_id, (cached_tablet, cached_suffix)); + } + } let _ = std::fs::remove_dir_all(path); Ok(()) } #[inline] - fn load_tablet(&self, path: &Path, id: u64, suffix: u64) -> Result { + fn load_tablet(&self, path: &Path, region_id: u64, suffix: u64) -> Result { { let reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { - return Err(box_err!("region {} {} already exists", id, db.path())); + if let Some((db, db_suffix)) = reg.get(®ion_id) && *db_suffix == suffix { + return Err(box_err!("region {} {} already exists", region_id, db.path())); } } - let db_path = self.tablet_path(id, suffix); + let db_path = self.tablet_path(region_id, suffix); std::fs::rename(path, &db_path)?; - let new_engine = - self.open_tablet(id, Some(suffix), OpenOptions::default().set_create(true)); - if new_engine.is_ok() { - let (old_id, old_suffix) = get_id_and_suffix_from_path(path); - self.registry.lock().unwrap().remove(&(old_id, old_suffix)); - } - new_engine + self.open_tablet( + region_id, + Some(suffix), + OpenOptions::default().set_create(true), + ) } fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { let reg = self.registry.lock().unwrap(); // pick up any tablet and set the shared block cache capacity - if let Some(((_id, _suffix), tablet)) = (*reg).iter().next() { + if let Some((_id, (tablet, _suffix))) = (*reg).iter().next() { let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap opt.set_block_cache_capacity(capacity)?; } @@ -406,7 +407,7 @@ pub mod kv { #[inline] fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &KvTestEngine)) { let reg = self.registry.lock().unwrap(); - for ((id, suffix), tablet) in &*reg { + for (id, (tablet, suffix)) in &*reg { f(*id, *suffix, tablet) } } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 5ad9a13b86f..34c8d67c3d3 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -224,6 +224,9 @@ impl OpenOptions { } } +pub const SPLIT_PREFIX: &str = "split_"; +pub const MERGE_PREFIX: &str = "merge_"; + /// A factory trait to create new engine. // It should be named as `EngineFactory` for consistency, but we are about to // rename engine to tablet, so always use tablet for new traits/types. @@ -261,7 +264,15 @@ pub trait TabletFactory: TabletAccessor + Send + Sync { fn exists_raw(&self, path: &Path) -> bool; /// Get the tablet path by id and suffix - fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf; + fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + self.tablet_path_with_prefix("", id, suffix) + } + + /// Get the tablet path by id and suffix + /// + /// Used in special situations + /// Ex: split/merge. + fn tablet_path_with_prefix(&self, prefix: &str, id: u64, suffix: u64) -> PathBuf; /// Tablets root path fn tablets_path(&self) -> PathBuf; @@ -323,7 +334,7 @@ where true } - fn tablet_path(&self, _id: u64, _suffix: u64) -> PathBuf { + fn tablet_path_with_prefix(&self, _prefix: &str, _id: u64, _suffix: u64) -> PathBuf { PathBuf::from(&self.root_path) } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index d8492dae5ce..7e8a1457500 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -272,7 +272,7 @@ impl TabletFactory for KvEngineFactory { false } - fn tablet_path(&self, _id: u64, _suffix: u64) -> PathBuf { + fn tablet_path_with_prefix(&self, _prefix: &str, _id: u64, _suffix: u64) -> PathBuf { self.kv_engine_path() } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index b4a7688ef68..323f414c05c 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -19,7 +19,8 @@ const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; #[derive(Clone)] pub struct KvEngineFactoryV2 { inner: KvEngineFactory, - registry: Arc>>, + // region_id -> (tablet, tablet_suffix) + registry: Arc>>, } impl KvEngineFactoryV2 { @@ -31,23 +32,11 @@ impl KvEngineFactoryV2 { } } -// Extract tablet id and tablet suffix from the path. -fn get_id_and_suffix_from_path(path: &Path) -> (u64, u64) { - let (mut tablet_id, mut tablet_suffix) = (0, 1); - if let Some(s) = path.file_name().map(|s| s.to_string_lossy()) { - let mut split = s.split('_'); - tablet_id = split.next().and_then(|s| s.parse().ok()).unwrap_or(0); - tablet_suffix = split.next().and_then(|s| s.parse().ok()).unwrap_or(1); - } - (tablet_id, tablet_suffix) -} - impl TabletFactory for KvEngineFactoryV2 { /// open a tablet according to the OpenOptions. /// /// If options.cache_only is true, only open the relevant tablet from - /// `registry`, and if suffix is None, return an arbitrary tablet with the - /// target region id if there are any. + /// `registry`. /// /// If options.create_new is true, create a tablet by id and suffix. If the /// tablet exists, it will fail. @@ -55,6 +44,8 @@ impl TabletFactory for KvEngineFactoryV2 { /// If options.create is true, open the tablet with id and suffix if it /// exists or create it otherwise. /// + /// If options.skip_cache is true, cache will not be updated. + /// /// Note: options.cache_only and options.create and/or options.create_new /// cannot be true simultaneously fn open_tablet( @@ -63,39 +54,39 @@ impl TabletFactory for KvEngineFactoryV2 { suffix: Option, mut options: OpenOptions, ) -> Result { + if options.create_new() && suffix.is_none() { + return Err(box_err!( + "suffix should be provided when creating new tablet" + )); + } + if options.create() || options.create_new() { options = options.set_cache_only(false); } let mut reg = self.registry.lock().unwrap(); if let Some(suffix) = suffix { - if let Some(tablet) = reg.get(&(id, suffix)) { + if let Some((cached_tablet, cached_suffix)) = reg.get(&id) && *cached_suffix == suffix { // Target tablet exist in the cache - if options.create_new() { return Err(box_err!( "region {} {} already exists", id, - tablet.as_inner().path() + cached_tablet.as_inner().path() )); } - return Ok(tablet.clone()); + return Ok(cached_tablet.clone()); } else if !options.cache_only() { let tablet_path = self.tablet_path(id, suffix); let tablet = self.open_tablet_raw(&tablet_path, id, suffix, options.clone())?; if !options.skip_cache() { debug!("Insert a tablet"; "key" => ?(id, suffix)); - reg.insert((id, suffix), tablet.clone()); + reg.insert(id, (tablet.clone(), suffix)); } return Ok(tablet); } - } else if options.cache_only() { - // This branch reads an arbitrary tablet with region id `id` - - if let Some(k) = reg.keys().find(|k| k.0 == id) { - debug!("choose a random tablet"; "key" => ?k); - return Ok(reg.get(k).unwrap().clone()); - } + } else if let Some((tablet, _)) = reg.get(&id) { + return Ok(tablet.clone()); } Err(box_err!( @@ -154,18 +145,25 @@ impl TabletFactory for KvEngineFactoryV2 { } #[inline] - fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + fn tablet_path_with_prefix(&self, prefix: &str, id: u64, suffix: u64) -> PathBuf { self.inner .store_path() - .join(format!("tablets/{}_{}", id, suffix)) + .join(format!("tablets/{}{}_{}", prefix, id, suffix)) } #[inline] fn mark_tombstone(&self, region_id: u64, suffix: u64) { let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); - std::fs::File::create(&path).unwrap(); + // When the full directory path does not exsit, create will return error and in + // this case, we just ignore it. + let _ = std::fs::File::create(&path); debug!("tombstone tablet"; "region_id" => region_id, "suffix" => suffix); - self.registry.lock().unwrap().remove(&(region_id, suffix)); + { + let mut reg = self.registry.lock().unwrap(); + if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { + reg.insert(region_id, (cached_tablet, cached_suffix)); + } + } } #[inline] @@ -176,42 +174,45 @@ impl TabletFactory for KvEngineFactoryV2 { } #[inline] - fn destroy_tablet(&self, id: u64, suffix: u64) -> engine_traits::Result<()> { - let path = self.tablet_path(id, suffix); - self.registry.lock().unwrap().remove(&(id, suffix)); + fn destroy_tablet(&self, region_id: u64, suffix: u64) -> engine_traits::Result<()> { + let path = self.tablet_path(region_id, suffix); + { + let mut reg = self.registry.lock().unwrap(); + if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { + reg.insert(region_id, (cached_tablet, cached_suffix)); + } + } self.inner.destroy_tablet(&path)?; - self.inner.on_tablet_destroy(id, suffix); + self.inner.on_tablet_destroy(region_id, suffix); Ok(()) } #[inline] - fn load_tablet(&self, path: &Path, id: u64, suffix: u64) -> Result { + fn load_tablet(&self, path: &Path, region_id: u64, suffix: u64) -> Result { { let reg = self.registry.lock().unwrap(); - if let Some(db) = reg.get(&(id, suffix)) { + if let Some((db, db_suffix)) = reg.get(®ion_id) && *db_suffix == suffix { return Err(box_err!( "region {} {} already exists", - id, + region_id, db.as_inner().path() )); } } - let db_path = self.tablet_path(id, suffix); + let db_path = self.tablet_path(region_id, suffix); std::fs::rename(path, &db_path)?; - let new_engine = - self.open_tablet(id, Some(suffix), OpenOptions::default().set_create(true)); - if new_engine.is_ok() { - let (old_id, old_suffix) = get_id_and_suffix_from_path(path); - self.registry.lock().unwrap().remove(&(old_id, old_suffix)); - } - new_engine + self.open_tablet( + region_id, + Some(suffix), + OpenOptions::default().set_create(true), + ) } fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { let reg = self.registry.lock().unwrap(); // pick up any tablet and set the shared block cache capacity - if let Some(((_id, _suffix), tablet)) = (*reg).iter().next() { + if let Some((_id, (tablet, _suffix))) = (*reg).iter().next() { let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap opt.set_block_cache_capacity(capacity)?; } @@ -223,7 +224,7 @@ impl TabletAccessor for KvEngineFactoryV2 { #[inline] fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { let reg = self.registry.lock().unwrap(); - for ((id, suffix), tablet) in &*reg { + for (id, (tablet, suffix)) in &*reg { f(*id, *suffix, tablet) } } @@ -236,7 +237,7 @@ impl TabletAccessor for KvEngineFactoryV2 { #[cfg(test)] mod tests { - use engine_traits::{OpenOptions, TabletFactory, CF_WRITE}; + use engine_traits::{OpenOptions, TabletFactory, CF_WRITE, SPLIT_PREFIX}; use super::*; use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; @@ -373,6 +374,11 @@ mod tests { .unwrap(); assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); + // Only both region id and suffix match can get the tablet from the cache. + factory + .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) + .unwrap_err(); + let tablet_path = factory.tablet_path(1, 10); let result = factory.open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)); result.unwrap_err(); @@ -400,14 +406,39 @@ mod tests { .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) .unwrap(); + factory + .open_tablet(1, Some(30), OpenOptions::default().set_create_new(true)) + .unwrap(); + // After open a tablet with the same id but higher suffix, we cannot get the old + // one from cache. + factory + .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) + .unwrap_err(); + // Destroy/mark tombstone the old tablet will not unregister the new tablet in + // the cache factory.mark_tombstone(1, 20); - assert!(factory.is_tombstoned(1, 20)); + factory + .open_tablet(1, Some(30), OpenOptions::default().set_cache_only(true)) + .unwrap(); factory.destroy_tablet(1, 20).unwrap(); + factory + .open_tablet(1, Some(30), OpenOptions::default().set_cache_only(true)) + .unwrap(); - let result = factory.open_tablet(1, Some(20), OpenOptions::default()); + factory.mark_tombstone(1, 30); + assert!(factory.is_tombstoned(1, 30)); + factory.destroy_tablet(1, 30).unwrap(); + + let result = factory.open_tablet(1, Some(30), OpenOptions::default()); result.unwrap_err(); assert!(!factory.is_single_engine()); + + assert!( + factory + .tablet_path_with_prefix(SPLIT_PREFIX, 1, 10) + .ends_with("split_1_10") + ); } #[test] @@ -428,7 +459,7 @@ mod tests { .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) .unwrap(); drop(tablet); - let tablet = factory.registry.lock().unwrap().remove(&(1, 10)).unwrap(); + let (tablet, _) = factory.registry.lock().unwrap().remove(&1).unwrap(); drop(tablet); factory .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) From e0885803a17dfcd26964e74029ce5af9a32cd797 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 31 Oct 2022 17:31:59 +0800 Subject: [PATCH 0295/1149] *: prepare for raftstore-v2's split (#13693) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- components/engine_panic/src/checkpoint.rs | 29 +++++++ components/engine_panic/src/lib.rs | 1 + components/engine_rocks/src/checkpoint.rs | 55 +++++++++++++ components/engine_rocks/src/lib.rs | 2 + components/engine_traits/src/checkpoint.rs | 20 +++++ components/engine_traits/src/engine.rs | 1 + components/engine_traits/src/lib.rs | 2 + components/raftstore/src/store/fsm/apply.rs | 85 +++++++++++++-------- 8 files changed, 163 insertions(+), 32 deletions(-) create mode 100644 components/engine_panic/src/checkpoint.rs create mode 100644 components/engine_rocks/src/checkpoint.rs create mode 100644 components/engine_traits/src/checkpoint.rs diff --git a/components/engine_panic/src/checkpoint.rs b/components/engine_panic/src/checkpoint.rs new file mode 100644 index 00000000000..6743810eb90 --- /dev/null +++ b/components/engine_panic/src/checkpoint.rs @@ -0,0 +1,29 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use core::panic; +use std::path::Path; + +use engine_traits::{Checkpointable, Checkpointer, Result}; + +use crate::PanicEngine; + +pub struct PanicCheckpointer {} + +impl Checkpointable for PanicEngine { + type Checkpointer = PanicCheckpointer; + + fn new_checkpointer(&self) -> Result { + panic!() + } +} + +impl Checkpointer for PanicCheckpointer { + fn create_at( + &mut self, + db_out_dir: &Path, + titan_out_dir: Option<&Path>, + log_size_for_flush: u64, + ) -> Result<()> { + panic!() + } +} diff --git a/components/engine_panic/src/lib.rs b/components/engine_panic/src/lib.rs index 0573c936135..70c7f00ece8 100644 --- a/components/engine_panic/src/lib.rs +++ b/components/engine_panic/src/lib.rs @@ -46,5 +46,6 @@ pub mod flow_control_factors; pub use crate::flow_control_factors::*; pub mod table_properties; pub use crate::table_properties::*; +pub mod checkpoint; mod raft_engine; diff --git a/components/engine_rocks/src/checkpoint.rs b/components/engine_rocks/src/checkpoint.rs new file mode 100644 index 00000000000..8b82043a392 --- /dev/null +++ b/components/engine_rocks/src/checkpoint.rs @@ -0,0 +1,55 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::path::Path; + +use engine_traits::{Checkpointable, Checkpointer, Result}; + +use crate::{r2e, RocksEngine}; + +impl Checkpointable for RocksEngine { + type Checkpointer = RocksEngineCheckpointer; + + fn new_checkpointer(&self) -> Result { + match self.as_inner().new_checkpointer() { + Ok(pointer) => Ok(RocksEngineCheckpointer(pointer)), + Err(e) => Err(r2e(e)), + } + } +} + +pub struct RocksEngineCheckpointer(rocksdb::Checkpointer); + +impl Checkpointer for RocksEngineCheckpointer { + fn create_at( + &mut self, + db_out_dir: &Path, + titan_out_dir: Option<&Path>, + log_size_for_flush: u64, + ) -> Result<()> { + self.0 + .create_at(db_out_dir, titan_out_dir, log_size_for_flush) + .map_err(|e| r2e(e)) + } +} + +#[cfg(test)] +mod tests { + use engine_traits::{Checkpointable, Checkpointer, Peekable, SyncMutable, ALL_CFS}; + use tempfile::tempdir; + + use crate::util::new_engine; + + #[test] + fn test_checkpoint() { + let dir = tempdir().unwrap(); + let path = dir.path().join("origin"); + let engine = new_engine(path.as_path().to_str().unwrap(), ALL_CFS).unwrap(); + engine.put(b"key", b"value").unwrap(); + + let mut check_pointer = engine.new_checkpointer().unwrap(); + let path2 = dir.path().join("checkpoint"); + check_pointer.create_at(path2.as_path(), None, 0).unwrap(); + let engine2 = new_engine(path2.as_path().to_str().unwrap(), ALL_CFS).unwrap(); + assert_eq!(engine2.get_value(b"key").unwrap().unwrap(), b"value"); + } +} diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 774fe9cb37b..c1e23dac4a6 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -28,6 +28,8 @@ mod cf_names; pub use crate::cf_names::*; mod cf_options; pub use crate::cf_options::*; +mod checkpoint; +pub use crate::checkpoint::*; mod compact; pub use crate::compact::*; mod db_options; diff --git a/components/engine_traits/src/checkpoint.rs b/components/engine_traits/src/checkpoint.rs new file mode 100644 index 00000000000..6ea3556938f --- /dev/null +++ b/components/engine_traits/src/checkpoint.rs @@ -0,0 +1,20 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::path::Path; + +use crate::Result; + +pub trait Checkpointable { + type Checkpointer: Checkpointer; + + fn new_checkpointer(&self) -> Result; +} + +pub trait Checkpointer { + fn create_at( + &mut self, + db_out_dir: &Path, + titan_out_dir: Option<&Path>, + log_size_for_flush: u64, + ) -> Result<()>; +} diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 34c8d67c3d3..55ab5d63caa 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -40,6 +40,7 @@ pub trait KvEngine: + Clone + Debug + Unpin + + Checkpointable + 'static { /// A consistent read-only snapshot of the database diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 47fe16b4768..29351636694 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -303,6 +303,8 @@ mod flow_control_factors; pub use crate::flow_control_factors::*; mod table_properties; pub use crate::table_properties::*; +mod checkpoint; +pub use crate::checkpoint::*; // These modules contain more general traits, some of which may be implemented // by multiple types. diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index d3eb7f86461..c8fee703e63 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -38,8 +38,8 @@ use kvproto::{ kvrpcpb::ExtraOp as TxnExtraOp, metapb::{PeerRole, Region, RegionEpoch}, raft_cmdpb::{ - AdminCmdType, AdminRequest, AdminResponse, ChangePeerRequest, CmdType, CommitMergeRequest, - RaftCmdRequest, RaftCmdResponse, Request, + AdminCmdType, AdminRequest, AdminResponse, BatchSplitRequest, ChangePeerRequest, CmdType, + CommitMergeRequest, RaftCmdRequest, RaftCmdResponse, Request, }, raft_serverpb::{MergeState, PeerState, RaftApplyState, RaftTruncatedState, RegionLocalState}, }; @@ -1899,6 +1899,42 @@ mod confchange_cmd_metric { } } +// Validate the request and the split keys +pub fn extract_split_keys( + split_reqs: &BatchSplitRequest, + region_to_split: &Region, +) -> Result>> { + if split_reqs.get_requests().is_empty() { + return Err(box_err!("missing split requests")); + } + let mut keys: VecDeque> = VecDeque::with_capacity(split_reqs.get_requests().len() + 1); + for req in split_reqs.get_requests() { + let split_key = req.get_split_key(); + if split_key.is_empty() { + return Err(box_err!("missing split key")); + } + if split_key + <= keys + .back() + .map_or_else(|| region_to_split.get_start_key(), Vec::as_slice) + { + return Err(box_err!("invalid split request: {:?}", split_reqs)); + } + if req.get_new_peer_ids().len() != region_to_split.get_peers().len() { + return Err(box_err!( + "invalid new peer id count, need {:?}, but got {:?}", + region_to_split.get_peers(), + req.get_new_peer_ids() + )); + } + keys.push_back(split_key.to_vec()); + } + + util::check_key_in_region_exclusive(keys.back().unwrap(), region_to_split)?; + + Ok(keys) +} + // Admin commands related. impl ApplyDelegate where @@ -2368,37 +2404,8 @@ where PEER_ADMIN_CMD_COUNTER.batch_split.all.inc(); let split_reqs = req.get_splits(); - let right_derive = split_reqs.get_right_derive(); - if split_reqs.get_requests().is_empty() { - return Err(box_err!("missing split requests")); - } + let mut keys = extract_split_keys(split_reqs, &self.region)?; let mut derived = self.region.clone(); - let new_region_cnt = split_reqs.get_requests().len(); - let mut regions = Vec::with_capacity(new_region_cnt + 1); - let mut keys: VecDeque> = VecDeque::with_capacity(new_region_cnt + 1); - for req in split_reqs.get_requests() { - let split_key = req.get_split_key(); - if split_key.is_empty() { - return Err(box_err!("missing split key")); - } - if split_key - <= keys - .back() - .map_or_else(|| derived.get_start_key(), Vec::as_slice) - { - return Err(box_err!("invalid split request: {:?}", split_reqs)); - } - if req.get_new_peer_ids().len() != derived.get_peers().len() { - return Err(box_err!( - "invalid new peer id count, need {:?}, but got {:?}", - derived.get_peers(), - req.get_new_peer_ids() - )); - } - keys.push_back(split_key.to_vec()); - } - - util::check_key_in_region(keys.back().unwrap(), &self.region)?; info!( "split region"; @@ -2407,8 +2414,13 @@ where "region" => ?derived, "keys" => %KeysInfoFormatter(keys.iter()), ); + + let new_region_cnt = split_reqs.get_requests().len(); let new_version = derived.get_region_epoch().get_version() + new_region_cnt as u64; derived.mut_region_epoch().set_version(new_version); + + let right_derive = split_reqs.get_right_derive(); + let mut regions = Vec::with_capacity(new_region_cnt + 1); // Note that the split requests only contain ids for new regions, so we need // to handle new regions and old region separately. if right_derive { @@ -2423,6 +2435,7 @@ where regions.push(derived.clone()); } + // Init split regions' meta info let mut new_split_regions: HashMap = HashMap::default(); for req in split_reqs.get_requests() { let mut new_region = Region::default(); @@ -2453,6 +2466,11 @@ where regions.push(derived.clone()); } + // Generally, a peer is created in pending_create_peers when it is + // created by raft_message (or by split here) and removed from + // pending_create_peers when it has applied the snapshot. So, if the + // peer of the split region is already created by raft_message in + // pending_create_peers ,we decide to replace it. let mut replace_regions = HashSet::default(); { let mut pending_create_peers = ctx.pending_create_peers.lock().unwrap(); @@ -2498,6 +2516,9 @@ where self.tag, region_id, new_split_peer.peer_id, state ) } + // If the peer's state is already persisted, add some info in + // new_split_peer.result so that we will skip this region in later + // executions. already_exist_regions.push((*region_id, new_split_peer.peer_id)); new_split_peer.result = Some(format!("state {:?} exist in kv engine", state)); } From d9fe2ffd78f4db5ec2141c98e019d14ba980f121 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Tue, 1 Nov 2022 12:23:59 +0800 Subject: [PATCH 0296/1149] log-backup: set `checkpoint-ts -1` as sevice-safe-point when checkpoint advances (#13559) close tikv/tikv#13532 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 61 +++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 4b80eb44a2f..3a13acd2f4c 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -199,7 +199,7 @@ impl FlushObserver for BasicFlushObserver { .pd_cli .update_service_safe_point( format!("backup-stream-{}-{}", task, self.store_id), - TimeStamp::new(rts), + TimeStamp::new(rts - 1), // Add a service safe point for 30 mins (6x the default flush interval). // It would probably be safe. Duration::from_secs(1800), @@ -299,12 +299,19 @@ where #[cfg(test)] mod tests { - use std::assert_matches; - + use std::{ + assert_matches, + collections::HashMap, + sync::{Arc, RwLock}, + time::Duration, + }; + + use futures::future::ok; use kvproto::metapb::*; + use pd_client::{PdClient, PdFuture}; use txn_types::TimeStamp; - use super::RegionIdWithVersion; + use super::{BasicFlushObserver, FlushObserver, RegionIdWithVersion}; use crate::GetCheckpointResult; fn region(id: u64, version: u64, conf_version: u64) -> Region { @@ -342,4 +349,50 @@ mod tests { let r = mgr.get_from_region(RegionIdWithVersion::new(1, 33)); assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 24); } + + struct MockPdClient { + safepoint: RwLock>, + } + + impl PdClient for MockPdClient { + fn update_service_safe_point( + &self, + name: String, + safepoint: TimeStamp, + _ttl: Duration, + ) -> PdFuture<()> { + // let _ = self.safepoint.insert(name, safepoint); + self.safepoint.write().unwrap().insert(name, safepoint); + + Box::pin(ok(())) + } + } + + impl MockPdClient { + fn new() -> Self { + Self { + safepoint: RwLock::new(HashMap::default()), + } + } + + fn get_service_safe_point(&self, name: String) -> Option { + self.safepoint.read().unwrap().get(&name).copied() + } + } + + #[tokio::test] + async fn test_after() { + let store_id = 1; + let pd_cli = Arc::new(MockPdClient::new()); + let mut flush_observer = BasicFlushObserver::new(pd_cli.clone(), store_id); + let task = String::from("test"); + let rts = 12345; + + let r = flush_observer.after(&task, rts).await; + assert_eq!(r.is_ok(), true); + + let serivce_id = format!("backup-stream-{}-{}", task, store_id); + let r = pd_cli.get_service_safe_point(serivce_id).unwrap(); + assert_eq!(r.into_inner(), rts - 1); + } } From 6cf4100bd71a0bde491e4a05acca0587ae0b8232 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Tue, 1 Nov 2022 17:04:00 +0800 Subject: [PATCH 0297/1149] coprocessor_v2: fix incorrect shared library name (#13707) ref tikv/tikv#13585, close tikv/tikv#13708 This commit fixes test `registry_unload_plugin`. The test has been failing since the example crate renamed its name in #13585. Signed-off-by: Yilin Chen --- src/coprocessor_v2/plugin_registry.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/coprocessor_v2/plugin_registry.rs b/src/coprocessor_v2/plugin_registry.rs index c02a652fc88..cbcba39995d 100644 --- a/src/coprocessor_v2/plugin_registry.rs +++ b/src/coprocessor_v2/plugin_registry.rs @@ -481,7 +481,7 @@ mod tests { fn initialize_library() -> PathBuf { let mut path = std::env::current_exe().unwrap(); - path.set_file_name(pkgname_to_libname("example-plugin")); + path.set_file_name(pkgname_to_libname("example-coprocessor-plugin")); path } @@ -491,7 +491,7 @@ mod tests { let loaded_plugin = unsafe { LoadedPlugin::new(&library_path).unwrap() }; - assert_eq!(loaded_plugin.name(), "example_plugin"); + assert_eq!(loaded_plugin.name(), "example_coprocessor_plugin"); assert_eq!(loaded_plugin.version(), &Version::parse("0.1.0").unwrap()); } @@ -504,10 +504,15 @@ mod tests { let plugin = registry.get_plugin(&plugin_name).unwrap(); - assert_eq!(plugin.name(), "example_plugin"); - assert_eq!(registry.loaded_plugin_names(), vec!["example_plugin"]); + assert_eq!(plugin.name(), "example_coprocessor_plugin"); assert_eq!( - registry.get_path_for_plugin("example_plugin").unwrap(), + registry.loaded_plugin_names(), + vec!["example_coprocessor_plugin"] + ); + assert_eq!( + registry + .get_path_for_plugin("example_coprocessor_plugin") + .unwrap(), library_path.as_os_str() ); } @@ -519,7 +524,7 @@ mod tests { let library_path_2 = library_path .parent() .unwrap() - .join(pkgname_to_libname("example-plugin-2")); + .join(pkgname_to_libname("example-coprocessor-plugin-2")); let registry = PluginRegistry::new(); let plugin_name = registry.load_plugin(&library_path).unwrap(); @@ -558,9 +563,10 @@ mod tests { let original_library_path = initialize_library(); let coprocessor_dir = std::env::temp_dir().join("coprocessors"); - let library_path = coprocessor_dir.join(pkgname_to_libname("example-plugin")); - let library_path_2 = coprocessor_dir.join(pkgname_to_libname("example-plugin-2")); - let plugin_name = "example_plugin"; + let library_path = coprocessor_dir.join(pkgname_to_libname("example-coprocessor-plugin")); + let library_path_2 = + coprocessor_dir.join(pkgname_to_libname("example-coprocessor-plugin-2")); + let plugin_name = "example_coprocessor_plugin"; // Make the coprocessor directory is empty. std::fs::create_dir_all(&coprocessor_dir).unwrap(); From 986bfde546aef4fb2acb0d18fe41c4cec6b5500e Mon Sep 17 00:00:00 2001 From: zhangguangchao <1614439+zgcbj@users.noreply.github.com> Date: Tue, 1 Nov 2022 17:24:00 +0800 Subject: [PATCH 0298/1149] raftstore fix typo (#13681) close tikv/tikv#13682 Signed-off-by: zhangguangchao <1614439+zgcbj@users.noreply.github.com> Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/peer_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 7f4b6778860..081149a6889 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -434,7 +434,7 @@ where } /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no - /// unavailable snapshot. + /// available snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { let mut snap_state = self.snap_state.borrow_mut(); let mut tried_cnt = self.snap_tried_cnt.borrow_mut(); From 26830eb6a371654035fca7ae7a723c45cbe30764 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Tue, 1 Nov 2022 17:42:00 +0800 Subject: [PATCH 0299/1149] scheduler: Optimize CPU usage of waking up (#13697) close tikv/tikv#13692, ref tikv/tikv#13692 Optimize CPU usage of waking up. 1. When popped entries are empty in `on_release_locks`, do not do anything in the high priority pool. This seems to be the reason of the performance regression mentioned in #13692 . 2. Skip waking up if the `LockWaitQueues` is empty. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- .../lock_manager/lock_waiting_queue.rs | 74 +++++++++++++++++-- src/storage/txn/scheduler.rs | 24 +++++- 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 16b3787bd7e..da8f2e2d289 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -59,7 +59,7 @@ use std::{ pin::Pin, result::Result, sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, }, time::{Duration, Instant}, @@ -215,6 +215,7 @@ pub type DelayedNotifyAllFuture = Pin { queue_map: dashmap::DashMap, id_allocated: AtomicU64, + entries_count: AtomicUsize, lock_mgr: L, } @@ -229,6 +230,7 @@ impl LockWaitQueues { inner: Arc::new(LockWaitQueueInner { queue_map: dashmap::DashMap::new(), id_allocated: AtomicU64::new(1), + entries_count: AtomicUsize::new(0), lock_mgr, }), } @@ -256,10 +258,12 @@ impl LockWaitQueues { if lock_wait_entry.legacy_wake_up_index.is_none() { lock_wait_entry.legacy_wake_up_index = Some(key_state.value().legacy_wake_up_index); } + key_state .value_mut() .queue .push(lock_wait_entry.lock_wait_token, lock_wait_entry); + self.inner.entries_count.fetch_add(1, Ordering::SeqCst); let len = key_state.value_mut().queue.len(); drop(key_state); @@ -305,7 +309,7 @@ impl LockWaitQueues { ) -> Option<(Box, Option)> { let mut result = None; // For statistics. - let mut removed_waiters = 0; + let mut removed_waiters = 0usize; // We don't want other threads insert any more entries between finding the // queue is empty and removing the queue from the map. Wrap the logic @@ -334,6 +338,10 @@ impl LockWaitQueues { } } + self.inner + .entries_count + .fetch_sub(removed_waiters, Ordering::SeqCst); + // Remove the queue if it's emptied. v.queue.is_empty() }); @@ -341,7 +349,7 @@ impl LockWaitQueues { if removed_waiters != 0 { LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC .waiters - .sub(removed_waiters); + .sub(removed_waiters as i64); } if removed_key.is_some() { LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.keys.dec(); @@ -436,7 +444,7 @@ impl LockWaitQueues { let mut conflicting_start_ts = TimeStamp::zero(); let mut conflicting_commit_ts = TimeStamp::zero(); - let mut removed_waiters = 0; + let mut removed_waiters = 0usize; // We don't want other threads insert any more entries between finding the // queue is empty and removing the queue from the map. Wrap the logic @@ -479,6 +487,10 @@ impl LockWaitQueues { popped_lock_wait_entries.push(lock_wait_entry); } + self.inner + .entries_count + .fetch_sub(removed_waiters, Ordering::SeqCst); + // If the queue is empty, remove it from the map. v.queue.is_empty() }); @@ -486,7 +498,7 @@ impl LockWaitQueues { if removed_waiters != 0 { LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC .waiters - .sub(removed_waiters); + .sub(removed_waiters as i64); } if removed_key.is_some() { LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.keys.dec(); @@ -532,6 +544,7 @@ impl LockWaitQueues { // procedure. let removed_key = self.inner.queue_map.remove_if_mut(key, |_, v| { if let Some(res) = v.queue.remove(&lock_wait_token) { + self.inner.entries_count.fetch_sub(1, Ordering::SeqCst); LOCK_WAIT_QUEUE_ENTRIES_GAUGE_VEC.waiters.dec(); result = Some(res); } @@ -545,6 +558,20 @@ impl LockWaitQueues { result } + /// Gets the count of entries currently waiting in queues. + /// + /// Mind that the contents of the queues may be changed concurrently. + pub fn entry_count(&self) -> usize { + self.inner.entries_count.load(Ordering::SeqCst) + } + + /// Checks whether there's nothing at all waiting in queue. + /// + /// Mind that the contents of the queues may be changed concurrently. + pub fn is_empty(&self) -> bool { + self.entry_count() == 0 + } + #[allow(dead_code)] pub(super) fn get_lock_mgr(&self) -> &L { &self.inner.lock_mgr @@ -831,9 +858,13 @@ mod tests { #[test] fn test_simple_push_pop() { let queues = LockWaitQueues::new(MockLockManager::new()); + assert_eq!(queues.entry_count(), 0); + assert_eq!(queues.is_empty(), true); queues.mock_lock_wait(b"k1", 10, 5, false); queues.mock_lock_wait(b"k2", 11, 5, false); + assert_eq!(queues.entry_count(), 2); + assert_eq!(queues.is_empty(), false); queues .must_pop(b"k1", 5, 6) @@ -841,6 +872,8 @@ mod tests { .check_start_ts(10); queues.must_pop_none(b"k1", 5, 6); queues.must_not_contain_key(b"k1"); + assert_eq!(queues.entry_count(), 1); + assert_eq!(queues.is_empty(), false); queues .must_pop(b"k2", 5, 6) @@ -848,11 +881,14 @@ mod tests { .check_start_ts(11); queues.must_pop_none(b"k2", 5, 6); queues.must_not_contain_key(b"k2"); + assert_eq!(queues.entry_count(), 0); + assert_eq!(queues.is_empty(), true); } #[test] fn test_popping_priority() { let queues = LockWaitQueues::new(MockLockManager::new()); + assert_eq!(queues.entry_count(), 0); queues.mock_lock_wait(b"k1", 10, 5, false); queues.mock_lock_wait(b"k1", 20, 5, false); @@ -860,6 +896,7 @@ mod tests { queues.mock_lock_wait(b"k1", 13, 5, false); // Duplication is possible considering network issues and RPC retrying. queues.mock_lock_wait(b"k1", 12, 5, false); + assert_eq!(queues.entry_count(), 5); // Ordered by start_ts for &expected_start_ts in &[10u64, 12, 12, 13, 20] { @@ -870,11 +907,13 @@ mod tests { } queues.must_not_contain_key(b"k1"); + assert_eq!(queues.entry_count(), 0); } #[test] fn test_removing_by_token() { let queues = LockWaitQueues::new(MockLockManager::new()); + assert_eq!(queues.entry_count(), 0); queues.mock_lock_wait(b"k1", 10, 5, false); let token11 = queues.mock_lock_wait(b"k1", 11, 5, false).token; @@ -882,6 +921,7 @@ mod tests { let token13 = queues.mock_lock_wait(b"k1", 13, 5, false).token; queues.mock_lock_wait(b"k1", 14, 5, false); assert_eq!(queues.get_queue_length_of_key(b"k1"), 5); + assert_eq!(queues.entry_count(), 5); queues .remove_by_token(&Key::from_raw(b"k1"), token11) @@ -894,6 +934,7 @@ mod tests { .check_key(b"k1") .check_start_ts(13); assert_eq!(queues.get_queue_length_of_key(b"k1"), 3); + assert_eq!(queues.entry_count(), 3); // Removing not-existing entry takes no effect. assert!( @@ -907,15 +948,19 @@ mod tests { .is_none() ); assert_eq!(queues.get_queue_length_of_key(b"k1"), 3); + assert_eq!(queues.entry_count(), 3); queues.must_pop(b"k1", 5, 6).check_start_ts(10); queues.must_pop(b"k1", 5, 6).check_start_ts(12); queues.must_pop(b"k1", 5, 6).check_start_ts(14); + queues.must_not_contain_key(b"k1"); + assert_eq!(queues.entry_count(), 0); } #[test] fn test_dropping_cancelled_entries() { let queues = LockWaitQueues::new(MockLockManager::new()); + assert_eq!(queues.entry_count(), 0); let h10 = queues.mock_lock_wait(b"k1", 10, 5, false); let h11 = queues.mock_lock_wait(b"k1", 11, 5, false); @@ -924,12 +969,14 @@ mod tests { queues.mock_lock_wait(b"k1", 14, 5, false); assert_eq!(queues.get_queue_length_of_key(b"k1"), 5); + assert_eq!(queues.entry_count(), 5); h10.cancel(); h11.cancel(); h13.cancel(); assert_eq!(queues.get_queue_length_of_key(b"k1"), 2); + assert_eq!(queues.entry_count(), 2); for &expected_start_ts in &[12u64, 14] { queues @@ -937,11 +984,13 @@ mod tests { .check_start_ts(expected_start_ts); } queues.must_not_contain_key(b"k1"); + assert_eq!(queues.entry_count(), 0); } #[tokio::test] async fn test_delayed_notify_all() { let queues = LockWaitQueues::new(MockLockManager::new()); + assert_eq!(queues.entry_count(), 0); queues.mock_lock_wait(b"k1", 8, 5, false); @@ -952,6 +1001,7 @@ mod tests { ]; // Current queue: [8, 11, 12, 13] + assert_eq!(queues.entry_count(), 4); let (entry, delay_wake_up_future) = queues.must_pop_with_delayed_notify(b"k1", 5, 6); entry.check_key(b"k1").check_start_ts(8); @@ -959,6 +1009,7 @@ mod tests { // Current queue: [11*, 12*, 13*] (Items marked with * means it has // legacy_wake_up_index less than that in KeyLockWaitState, so it might // be woken up when calling delayed_notify_all). + assert_eq!(queues.entry_count(), 3); let handles2 = vec![ queues.mock_lock_wait(b"k1", 14, 5, false), @@ -967,6 +1018,7 @@ mod tests { ]; // Current queue: [11*, 12*, 13*, 14, 15, 16] + assert_eq!(queues.entry_count(), 6); assert!( handles1[0] @@ -988,9 +1040,11 @@ mod tests { ); // Current queue: [14, 15, 16] + assert_eq!(queues.entry_count(), 3); queues.mock_lock_wait(b"k1", 9, 5, false); // Current queue: [9, 14, 15, 16] + assert_eq!(queues.entry_count(), 4); // 9 will be woken up and delayed wake up should be scheduled. After delaying, // 14 to 16 should be all woken up later if they are all not resumable. @@ -1000,11 +1054,13 @@ mod tests { entry.check_key(b"k1").check_start_ts(9); // Current queue: [14*, 15*, 16*] + assert_eq!(queues.entry_count(), 3); queues.mock_lock_wait(b"k1", 17, 5, false); let handle18 = queues.mock_lock_wait(b"k1", 18, 5, false); // Current queue: [14*, 15*, 16*, 17, 18] + assert_eq!(queues.entry_count(), 5); // Wakes up 14, and stops at 15 which is resumable. Then, 15 should be returned // and the caller should be responsible for waking it up. @@ -1012,6 +1068,7 @@ mod tests { entry15.check_key(b"k1").check_start_ts(15); // Current queue: [16*, 17, 18] + assert_eq!(queues.entry_count(), 3); let mut it = handles2.into_iter(); // Receive 14. @@ -1050,6 +1107,7 @@ mod tests { ); // Current queue: [16*, 17, 18] + assert_eq!(queues.entry_count(), 3); let (entry, delayed_wake_up_future) = queues.must_pop_with_delayed_notify(b"k1", 7, 8); entry.check_key(b"k1").check_start_ts(16); @@ -1064,6 +1122,7 @@ mod tests { queues.must_have_next_entry(b"k1", 17); // Current queue: [17*, 18*] + assert_eq!(queues.entry_count(), 2); // Don't need to create new future if there already exists one for the key. let entry = queues.must_pop_with_no_delayed_notify(b"k1", 9, 10); @@ -1071,18 +1130,22 @@ mod tests { queues.must_have_next_entry(b"k1", 18); // Current queue: [18*] + assert_eq!(queues.entry_count(), 1); queues.mock_lock_wait(b"k1", 19, 5, false); // Current queue: [18*, 19] + assert_eq!(queues.entry_count(), 2); assert!(delayed_wake_up_future.await.is_none()); // 18 will be cancelled with ts of the latest wake-up event. expect_write_conflict(&handle18.wait_for_result().unwrap_err().0, 9, 10); // Current queue: [19] + assert_eq!(queues.entry_count(), 1); // Don't need to create new future if the queue is cleared. let entry = queues.must_pop_with_no_delayed_notify(b"k1", 9, 10); entry.check_key(b"k1").check_start_ts(19); // Current queue: empty + assert_eq!(queues.entry_count(), 0); queues.must_not_contain_key(b"k1"); // Calls delayed_notify_all on keys that not exists (maybe deleted due to @@ -1093,5 +1156,6 @@ mod tests { .is_none() ); queues.must_not_contain_key(b"k1"); + assert_eq!(queues.entry_count(), 0); } } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 4ccc868f30d..917c9fbaffc 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -48,6 +48,7 @@ use parking_lot::{Mutex, MutexGuard, RwLockWriteGuard}; use pd_client::{Feature, FeatureGate}; use raftstore::store::TxnExt; use resource_metering::{FutureExt, ResourceTagFactory}; +use smallvec::SmallVec; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData}; use tikv_util::{ deadline::Deadline, quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE, @@ -790,8 +791,17 @@ impl Scheduler { } fn on_release_locks(&self, released_locks: ReleasedLocks) { - let mut legacy_wake_up_list = vec![]; - let mut delay_wake_up_futures = vec![]; + // This function is always called when holding the latch of the involved keys. + // So if we found the lock waiting queues are empty, there's no chance + // that other threads/commands adds new lock-wait entries to the keys + // concurrently. Therefore it's safe to skip waking up when we found the + // lock waiting queues are empty. + if self.inner.lock_wait_queues.is_empty() { + return; + } + + let mut legacy_wake_up_list = SmallVec::<[_; 4]>::new(); + let mut delay_wake_up_futures = SmallVec::<[_; 4]>::new(); let wake_up_delay_duration_ms = self .inner .pessimistic_lock_wake_up_delay_duration_ms @@ -817,13 +827,19 @@ impl Scheduler { } }); + if legacy_wake_up_list.is_empty() && delay_wake_up_futures.is_empty() { + return; + } + self.wake_up_legacy_pessimistic_locks(legacy_wake_up_list, delay_wake_up_futures); } fn wake_up_legacy_pessimistic_locks( &self, - legacy_wake_up_list: Vec<(Box, ReleasedLock)>, - delayed_wake_up_futures: Vec, + legacy_wake_up_list: impl IntoIterator, ReleasedLock)> + + Send + + 'static, + delayed_wake_up_futures: impl IntoIterator + Send + 'static, ) { let self1 = self.clone(); self.get_sched_pool(CommandPri::High) From de4cd130d0fba0ce47505205b41801f2d7cefa39 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Tue, 1 Nov 2022 17:58:00 +0800 Subject: [PATCH 0300/1149] raftstore-v2: add snapshot basic logic in peer storage (#13555) ref tikv/tikv#12842 this commit is part of support snapshot in raftstore v2 - add snapshot basic implementation in peer storage Signed-off-by: nolouch Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 30 +- components/raftstore-v2/src/fsm/apply.rs | 17 +- components/raftstore-v2/src/fsm/peer.rs | 9 +- .../raftstore-v2/src/operation/command/mod.rs | 13 +- components/raftstore-v2/src/operation/life.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 26 +- .../src/operation/ready/snapshot.rs | 286 ++++++++++++++++++ components/raftstore-v2/src/raft/apply.rs | 16 +- components/raftstore-v2/src/raft/peer.rs | 16 +- components/raftstore-v2/src/raft/storage.rs | 219 ++++++++++++-- components/raftstore-v2/src/router/imp.rs | 13 +- .../src/router/internal_message.rs | 4 +- components/raftstore-v2/src/router/message.rs | 8 +- .../raftstore/src/store/async_io/mod.rs | 1 + .../raftlog_fetch.rs => async_io/read.rs} | 71 +++-- .../raftstore/src/store/entry_storage.rs | 26 +- components/raftstore/src/store/fsm/peer.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 13 +- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/mod.rs | 6 +- components/raftstore/src/store/msg.rs | 4 +- components/raftstore/src/store/peer.rs | 8 +- .../raftstore/src/store/peer_storage.rs | 35 ++- components/raftstore/src/store/transport.rs | 12 +- components/raftstore/src/store/worker/mod.rs | 4 - 26 files changed, 695 insertions(+), 156 deletions(-) create mode 100644 components/raftstore-v2/src/operation/ready/snapshot.rs rename components/raftstore/src/store/{worker/raftlog_fetch.rs => async_io/read.rs} (58%) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index b387300b40e..76d4fd16bea 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -20,8 +20,8 @@ use kvproto::{ }; use raft::INVALID_ID; use raftstore::store::{ - fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, RaftlogFetchRunner, - RaftlogFetchTask, StoreWriters, Transport, WriteSenders, + fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, + StoreWriters, Transport, WriteSenders, }; use slog::Logger; use tikv_util::{ @@ -68,7 +68,7 @@ pub struct StoreContext { pub engine: ER, pub tablet_factory: Arc>, pub apply_pool: FuturePool, - pub log_fetch_scheduler: Scheduler, + pub read_scheduler: Scheduler>, } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -215,7 +215,7 @@ struct StorePollerBuilder { tablet_factory: Arc>, trans: T, router: StoreRouter, - log_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, write_senders: WriteSenders, apply_pool: FuturePool, logger: Logger, @@ -230,7 +230,7 @@ impl StorePollerBuilder { tablet_factory: Arc>, trans: T, router: StoreRouter, - log_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, store_writers: &mut StoreWriters, logger: Logger, store_meta: Arc>>, @@ -252,7 +252,7 @@ impl StorePollerBuilder { tablet_factory, trans, router, - log_fetch_scheduler, + read_scheduler, apply_pool, logger, write_senders: store_writers.senders(), @@ -271,7 +271,7 @@ impl StorePollerBuilder { region_id, self.store_id, self.engine.clone(), - self.log_fetch_scheduler.clone(), + self.read_scheduler.clone(), &self.logger, )? { Some(p) => p, @@ -324,7 +324,7 @@ where engine: self.engine.clone(), tablet_factory: self.tablet_factory.clone(), apply_pool: self.apply_pool.clone(), - log_fetch_scheduler: self.log_fetch_scheduler.clone(), + read_scheduler: self.read_scheduler.clone(), }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); StorePoller::new(poll_ctx, cfg_tracker) @@ -335,14 +335,14 @@ where /// raftstore. struct Workers { /// Worker for fetching raft logs asynchronously - log_fetch_worker: Worker, + async_read_worker: Worker, store_writers: StoreWriters, } impl Default for Workers { fn default() -> Self { Self { - log_fetch_worker: Worker::new("raftlog-fetch-worker"), + async_read_worker: Worker::new("async-read-worker"), store_writers: StoreWriters::default(), } } @@ -373,9 +373,9 @@ impl StoreSystem { workers .store_writers .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; - let log_fetch_scheduler = workers.log_fetch_worker.start( - "raftlog-fetch-worker", - RaftlogFetchRunner::new(router.clone(), raft_engine.clone()), + let read_scheduler = workers.async_read_worker.start( + "async-read-worker", + ReadRunner::new(router.clone(), raft_engine.clone()), ); let mut builder = StorePollerBuilder::new( @@ -385,7 +385,7 @@ impl StoreSystem { tablet_factory, trans, router.clone(), - log_fetch_scheduler, + read_scheduler, &mut workers.store_writers, self.logger.clone(), store_meta.clone(), @@ -435,7 +435,7 @@ impl StoreSystem { self.system.shutdown(); workers.store_writers.shutdown(); - workers.log_fetch_worker.stop(); + workers.async_read_worker.stop(); } } diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 4a1e05b8f75..c4eb03f350d 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -14,8 +14,12 @@ use crossbeam::channel::TryRecvError; use engine_traits::KvEngine; use futures::{Future, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; +use raftstore::store::ReadTask; use slog::Logger; -use tikv_util::mpsc::future::{self, Receiver, Sender, WakePolicy}; +use tikv_util::{ + mpsc::future::{self, Receiver, Sender, WakePolicy}, + worker::Scheduler, +}; use crate::{ raft::Apply, @@ -61,10 +65,18 @@ impl ApplyFsm { region_state: RegionLocalState, res_reporter: R, remote_tablet: CachedTablet, + read_scheduler: Scheduler>, logger: Logger, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); - let apply = Apply::new(peer, region_state, res_reporter, remote_tablet, logger); + let apply = Apply::new( + peer, + region_state, + res_reporter, + remote_tablet, + read_scheduler, + logger, + ); ( ApplyScheduler { sender: tx }, Self { @@ -86,6 +98,7 @@ impl ApplyFsm { match task { // TODO: flush by buffer size. ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, + ApplyTask::Snapshot(snap_task) => self.apply.schedule_gen_snapshot(snap_task), } // TODO: yield after some time. diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index a1beedef968..7083a9e529c 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -40,7 +40,7 @@ impl PeerFsm { pub fn new( cfg: &Config, tablet_factory: &dyn TabletFactory, - storage: Storage, + storage: Storage, ) -> Result> { let peer = Peer::new(cfg, tablet_factory, storage)?; info!(peer.logger, "create peer"); @@ -229,8 +229,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .fsm .peer_mut() .on_persisted(self.store_ctx, peer_id, ready_number), - PeerMsg::FetchedLogs(fetched_logs) => { - self.fsm.peer_mut().on_fetched_logs(fetched_logs) + PeerMsg::LogsFetched(fetched_logs) => { + self.fsm.peer_mut().on_logs_fetched(fetched_logs) + } + PeerMsg::SnapshotGenerated(snap_res) => { + self.fsm.peer_mut().on_snapshot_generated(snap_res) } PeerMsg::QueryDebugInfo(ch) => self.fsm.peer_mut().on_query_debug_info(ch), #[cfg(feature = "testexport")] diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index fe863a74b8a..21122e5559f 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -49,6 +49,7 @@ use tikv_util::{box_err, time::monotonic_raw_now}; use crate::{ batch::StoreContext, fsm::{ApplyFsm, ApplyResReporter, PeerFsmDelegate}, + operation::GenSnapTask, raft::{Apply, Peer}, router::{ApplyRes, ApplyTask, CmdResChannel, PeerMsg}, }; @@ -120,8 +121,16 @@ impl Peer { let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); let tablet = self.tablet().clone(); let logger = self.logger.clone(); - let (apply_scheduler, mut apply_fsm) = - ApplyFsm::new(self.peer().clone(), region_state, mailbox, tablet, logger); + let read_scheduler = self.storage().read_scheduler(); + let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( + self.peer().clone(), + region_state, + mailbox, + tablet, + read_scheduler, + logger, + ); + store_ctx .apply_pool .spawn(async move { apply_fsm.handle_all_tasks().await }) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 678cf6ece4b..7be70a9afe7 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -182,7 +182,7 @@ impl Store { self.store_id(), region, ctx.engine.clone(), - ctx.log_fetch_scheduler.clone(), + ctx.read_scheduler.clone(), &ctx.logger, ) .and_then(|s| PeerFsm::new(&ctx.cfg, &*ctx.tablet_factory, s)) diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 1eaeb21ec18..5b19db91b71 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -7,6 +7,6 @@ mod ready; pub use command::{AdminCmdResult, CommittedEntries, SimpleWriteDecoder, SimpleWriteEncoder}; pub use life::DestroyProgress; -pub use ready::AsyncWriter; +pub use ready::{AsyncWriter, GenSnapTask, SnapState}; pub(crate) use self::query::LocalReader; diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index cfc3d086163..62cb42ef253 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -18,6 +18,7 @@ //! There two steps can be processed concurrently. mod async_writer; +mod snapshot; use std::cmp; @@ -30,12 +31,15 @@ use raftstore::store::{util, ExtraStates, FetchedLogs, Transport, WriteTask}; use slog::{debug, error, trace, warn}; use tikv_util::time::{duration_to_sec, monotonic_raw_now}; -pub use self::async_writer::AsyncWriter; +pub use self::{ + async_writer::AsyncWriter, + snapshot::{GenSnapTask, SnapState}, +}; use crate::{ batch::StoreContext, fsm::PeerFsmDelegate, raft::{Peer, Storage}, - router::PeerTick, + router::{ApplyTask, PeerTick}, }; impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { /// Raft relies on periodic ticks to keep the state machine sync with other @@ -115,7 +119,7 @@ impl Peer { } /// Callback for fetching logs asynchronously. - pub fn on_fetched_logs(&mut self, fetched_logs: FetchedLogs) { + pub fn on_logs_fetched(&mut self, fetched_logs: FetchedLogs) { let FetchedLogs { context, logs } = fetched_logs; let low = logs.low; if !self.is_leader() { @@ -298,6 +302,14 @@ impl Peer { self.handle_raft_committed_entries(ctx, ready.take_committed_entries()); } + // Check whether there is a pending generate snapshot task, the task + // needs to be sent to the apply system. + // Always sending snapshot task after apply task, so it gets latest + // snapshot. + if let Some(gen_task) = self.storage_mut().take_gen_snap_task() { + self.apply_scheduler().send(ApplyTask::Snapshot(gen_task)); + } + let ready_number = ready.number(); let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); self.storage_mut() @@ -385,14 +397,10 @@ impl Peer { } } -impl Storage { +impl Storage { /// Apply the ready to the storage. If there is any states need to be /// persisted, it will be written to `write_task`. - fn handle_raft_ready( - &mut self, - ready: &mut Ready, - write_task: &mut WriteTask, - ) { + fn handle_raft_ready(&mut self, ready: &mut Ready, write_task: &mut WriteTask) { let prev_raft_state = self.entry_storage().raft_state().clone(); let ever_persisted = self.ever_persisted(); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs new file mode 100644 index 00000000000..6f4b63630a9 --- /dev/null +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -0,0 +1,286 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +//! This module contains snapshot relative processing logic. +//! +//! # Snapshot State +//! +//! generator and apply snapshot works asynchronously. the snap_sate indicates +//! the curren snapshot state. +//! +//! # Process Overview +//! +//! generate snapshot: +//! - Raft call `snapshot` interface to acquire a snapshot, then storage setup +//! the gen_snap_task. +//! - handle ready will send the gen_snap_task to the apply work +//! - apply worker schedule a gen tablet snapshot task to async read worker with +//! region state and apply state. +//! - async read worker generates the tablet snapshot and sends the result to +//! peer fsm, then Raft will get the snapshot. + +use std::{ + borrow::BorrowMut, + fmt::{self, Debug}, + mem, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + mpsc, Arc, + }, +}; + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::raft_serverpb::{RaftSnapshotData, RegionLocalState}; +use protobuf::Message; +use raft::eraftpb::Snapshot; +use raftstore::store::{metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, ReadTask}; +use slog::{error, info}; +use tikv_util::{box_try, worker::Scheduler}; + +use crate::{ + fsm::ApplyResReporter, + raft::{Apply, Peer, Storage}, + router::{ApplyTask, PeerTick}, + Result, +}; + +#[derive(Debug)] +pub enum SnapState { + Relax, + Generating { + canceled: Arc, + index: Arc, + }, + Generated(Box), +} + +impl PartialEq for SnapState { + fn eq(&self, other: &SnapState) -> bool { + match (self, other) { + (&SnapState::Relax, &SnapState::Relax) + | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, + (&SnapState::Generated(ref snap1), &SnapState::Generated(ref snap2)) => { + *snap1 == *snap2 + } + _ => false, + } + } +} + +pub struct GenSnapTask { + region_id: u64, + // Fill it when you are going to generate the snapshot. + // index used to check if the gen task should be canceled. + index: Arc, + // Set it to true to cancel the task if necessary. + canceled: Arc, + // indicates whether the snapshot is triggered due to load balance + for_balance: bool, +} + +impl GenSnapTask { + pub fn new(region_id: u64, index: Arc, canceled: Arc) -> GenSnapTask { + GenSnapTask { + region_id, + index, + canceled, + for_balance: false, + } + } + + pub fn set_for_balance(&mut self) { + self.for_balance = true; + } +} + +impl Debug for GenSnapTask { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GenSnapTask") + .field("region_id", &self.region_id) + .finish() + } +} + +impl Peer { + pub fn on_snapshot_generated(&mut self, snapshot: Box) { + if self.storage_mut().on_snapshot_generated(snapshot) { + self.raft_group_mut().ping(); + self.set_has_ready(); + } + } +} + +impl Apply { + /// Handle snapshot. + /// + /// Will schedule a task to read worker and then generate a snapshot + /// asynchronously. + pub fn schedule_gen_snapshot(&mut self, snap_task: GenSnapTask) { + // Flush before do snapshot. + if snap_task.canceled.load(Ordering::SeqCst) { + return; + } + self.flush(); + + // Send generate snapshot task to region worker. + let (last_applied_index, last_applied_term) = self.apply_progress(); + snap_task.index.store(last_applied_index, Ordering::SeqCst); + let gen_tablet_sanp_task = ReadTask::GenTabletSnapshot { + region_id: snap_task.region_id, + tablet: self.tablet().clone(), + region_state: self.region_state().clone(), + last_applied_term, + last_applied_index, + for_balance: snap_task.for_balance, + canceled: snap_task.canceled.clone(), + }; + if let Err(e) = self.read_scheduler().schedule(gen_tablet_sanp_task) { + error!( + self.logger, + "schedule snapshot failed"; + "error" => ?e, + ); + snap_task.canceled.store(true, Ordering::SeqCst); + } + } +} + +impl Storage { + /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no + /// unavailable snapshot. + pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { + let mut snap_state = self.snap_state_mut(); + match *snap_state { + SnapState::Generating { ref canceled, .. } => { + if canceled.load(Ordering::SeqCst) { + self.cancel_generating_snap(None); + } else { + return Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )); + } + } + SnapState::Generated(ref s) => { + let SnapState::Generated(snap) = mem::replace(&mut *snap_state, SnapState::Relax) else { unreachable!() }; + if self.validate_snap(&snap, request_index) { + return Ok(*snap); + } + } + _ => {} + } + + if SnapState::Relax != *snap_state { + panic!( + "{:?} unexpected state: {:?}", + self.logger().list(), + *snap_state + ); + } + + info!( + self.logger(), + "requesting snapshot"; + "request_index" => request_index, + "request_peer" => to, + ); + let canceled = Arc::new(AtomicBool::new(false)); + let index = Arc::new(AtomicU64::new(0)); + *snap_state = SnapState::Generating { + canceled: canceled.clone(), + index: index.clone(), + }; + + let task = GenSnapTask::new(self.region().get_id(), index, canceled); + let mut gen_snap_task = self.gen_snap_task_mut(); + assert!(gen_snap_task.is_none()); + *gen_snap_task = Box::new(Some(task)); + Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )) + } + + /// Validate the snapshot. Returns true if it's valid. + fn validate_snap(&self, snap: &Snapshot, request_index: u64) -> bool { + let idx = snap.get_metadata().get_index(); + // TODO(nolouch): check tuncated index + if idx < request_index { + // stale snapshot, should generate again. + info!( + self.logger(), + "snapshot is stale, generate again"; + "snap_index" => idx, + "request_index" => request_index, + ); + STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.stale.inc(); + return false; + } + + let mut snap_data = RaftSnapshotData::default(); + if let Err(e) = snap_data.merge_from_bytes(snap.get_data()) { + error!( + self.logger(), + "failed to decode snapshot, it may be corrupted"; + "err" => ?e, + ); + STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.decode.inc(); + return false; + } + let snap_epoch = snap_data.get_region().get_region_epoch(); + let latest_epoch = self.region().get_region_epoch(); + if snap_epoch.get_conf_ver() < latest_epoch.get_conf_ver() { + info!( + self.logger(), + "snapshot epoch is stale"; + "snap_epoch" => ?snap_epoch, + "latest_epoch" => ?latest_epoch, + ); + STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.epoch.inc(); + return false; + } + + true + } + + /// Cancel generating snapshot. + pub fn cancel_generating_snap(&self, compact_to: Option) { + let mut snap_state = self.snap_state_mut(); + let SnapState::Generating { + ref canceled, + ref index, + } = *snap_state else { return }; + + if let Some(idx) = compact_to { + let snap_index = index.load(Ordering::SeqCst); + if snap_index == 0 || idx <= snap_index + 1 { + return; + } + } + canceled.store(true, Ordering::SeqCst); + *snap_state = SnapState::Relax; + self.gen_snap_task_mut().take(); + info!( + self.logger(), + "snapshot is canceled"; + "compact_to" => compact_to, + ); + STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.cancel.inc(); + } + + /// Try to switch snap state to generated. only `Generating` can switch to + /// `Generated`. + /// TODO: make the snap state more clearer, the snapshot must be consumed. + pub fn on_snapshot_generated(&self, snap: Box) -> bool { + let mut snap_state = self.snap_state_mut(); + let SnapState::Generating { + ref canceled, + ref index, + } = *snap_state else { return false }; + + if snap.get_metadata().get_index() < index.load(Ordering::SeqCst) { + return false; + } + // Should changed `SnapState::Generated` to `SnapState::Relax` when the + // snap is consumed or canceled. Such as leader changed, the state of generated + // should be reset. + *snap_state = SnapState::Generated(snap); + true + } +} diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 068e5124c0c..ff29b3ba029 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -4,8 +4,9 @@ use std::mem; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; -use raftstore::store::fsm::apply::DEFAULT_APPLY_WB_SIZE; +use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; use slog::Logger; +use tikv_util::worker::Scheduler; use super::Peer; use crate::{ @@ -34,6 +35,7 @@ pub struct Apply { region_state: RegionLocalState, res_reporter: R, + read_scheduler: Scheduler>, pub(crate) logger: Logger, } @@ -44,6 +46,7 @@ impl Apply { region_state: RegionLocalState, res_reporter: R, mut remote_tablet: CachedTablet, + read_scheduler: Scheduler>, logger: Logger, ) -> Self { Apply { @@ -57,6 +60,7 @@ impl Apply { applied_term: 0, admin_cmd_result: vec![], region_state, + read_scheduler, res_reporter, logger, } @@ -96,6 +100,11 @@ impl Apply { (self.applied_index, self.applied_term) } + #[inline] + pub fn read_scheduler(&self) -> &Scheduler> { + &self.read_scheduler + } + #[inline] pub fn region_state(&self) -> &RegionLocalState { &self.region_state @@ -116,6 +125,11 @@ impl Apply { self.tablet = tablet; } + #[inline] + pub fn tablet(&self) -> &EK { + &self.tablet + } + #[inline] pub fn peer(&self) -> &metapb::Peer { &self.peer diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 650c410cef9..8619b8cf2d4 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -28,7 +28,7 @@ const REGION_READ_PROGRESS_CAP: usize = 128; /// A peer that delegates commands between state machine and raft. pub struct Peer { - raft_group: RawNode>, + raft_group: RawNode>, tablet: CachedTablet, /// We use a cache for looking up peers. Not all peers exist in region's /// peer list, for example, an isolated peer may need to send/receive @@ -67,7 +67,7 @@ impl Peer { pub fn new( cfg: &Config, tablet_factory: &dyn TabletFactory, - storage: Storage, + storage: Storage, ) -> Result { let logger = storage.logger().clone(); @@ -178,7 +178,7 @@ impl Peer { } #[inline] - pub fn storage(&self) -> &Storage { + pub fn storage(&self) -> &Storage { self.raft_group.store() } @@ -203,7 +203,7 @@ impl Peer { } #[inline] - pub fn storage_mut(&mut self) -> &mut Storage { + pub fn storage_mut(&mut self) -> &mut Storage { self.raft_group.mut_store() } @@ -218,12 +218,12 @@ impl Peer { } #[inline] - pub fn entry_storage(&self) -> &EntryStorage { + pub fn entry_storage(&self) -> &EntryStorage { self.raft_group.store().entry_storage() } #[inline] - pub fn entry_storage_mut(&mut self) -> &mut EntryStorage { + pub fn entry_storage_mut(&mut self) -> &mut EntryStorage { self.raft_group.mut_store().entry_storage_mut() } @@ -238,12 +238,12 @@ impl Peer { } #[inline] - pub fn raft_group(&self) -> &RawNode> { + pub fn raft_group(&self) -> &RawNode> { &self.raft_group } #[inline] - pub fn raft_group_mut(&mut self) -> &mut RawNode> { + pub fn raft_group_mut(&mut self) -> &mut RawNode> { &mut self.raft_group } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index b08624b1185..19a52d4c5a2 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -1,8 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::fmt::{self, Debug, Formatter}; +use std::{ + cell::{RefCell, RefMut}, + fmt::{self, Debug, Formatter}, + sync::{mpsc::Receiver, Arc}, +}; -use engine_traits::{RaftEngine, RaftLogBatch}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ metapb::{self, Region}, raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, @@ -11,13 +15,14 @@ use raft::{ eraftpb::{ConfState, Entry, Snapshot}, GetEntriesContext, RaftState, INVALID_ID, }; -use raftstore::store::{ - util, EntryStorage, RaftlogFetchTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, -}; -use slog::{o, Logger}; +use raftstore::store::{util, EntryStorage, ReadTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use slog::{info, o, Logger}; use tikv_util::{box_err, store::find_peer, worker::Scheduler}; -use crate::Result; +use crate::{ + operation::{GenSnapTask, SnapState}, + Result, +}; pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Result<()> { let region_id = region.get_id(); @@ -49,8 +54,8 @@ pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Resul /// A storage for raft. /// /// It's similar to `PeerStorage` in v1. -pub struct Storage { - entry_storage: EntryStorage, +pub struct Storage { + entry_storage: EntryStorage, peer: metapb::Peer, region_state: RegionLocalState, /// Whether states has been persisted before. If a peer is just created by @@ -58,9 +63,13 @@ pub struct Storage { /// at least once dispite whether the state changes since create. ever_persisted: bool, logger: Logger, + + /// Snapshot part. + snap_state: RefCell, + gen_snap_task: RefCell>>, } -impl Debug for Storage { +impl Debug for Storage { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, @@ -71,14 +80,14 @@ impl Debug for Storage { } } -impl Storage { +impl Storage { #[inline] - pub fn entry_storage(&self) -> &EntryStorage { + pub fn entry_storage(&self) -> &EntryStorage { &self.entry_storage } #[inline] - pub fn entry_storage_mut(&mut self) -> &mut EntryStorage { + pub fn entry_storage_mut(&mut self) -> &mut EntryStorage { &mut self.entry_storage } @@ -101,9 +110,19 @@ impl Storage { pub fn logger(&self) -> &Logger { &self.logger } + + #[inline] + pub fn snap_state_mut(&self) -> RefMut<'_, SnapState> { + self.snap_state.borrow_mut() + } + + #[inline] + pub fn gen_snap_task_mut(&self) -> RefMut<'_, Box>> { + self.gen_snap_task.borrow_mut() + } } -impl Storage { +impl Storage { /// Creates a new storage with uninit states. /// /// This should only be used for creating new peer from raft message. @@ -111,7 +130,7 @@ impl Storage { store_id: u64, region: Region, engine: ER, - log_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, logger: &Logger, ) -> Result { let mut region_state = RegionLocalState::default(); @@ -122,7 +141,7 @@ impl Storage { RaftLocalState::default(), RaftApplyState::default(), engine, - log_fetch_scheduler, + read_scheduler, false, logger, ) @@ -136,9 +155,9 @@ impl Storage { region_id: u64, store_id: u64, engine: ER, - log_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, logger: &Logger, - ) -> Result>> { + ) -> Result>> { let region_state = match engine.get_region_state(region_id) { Ok(Some(s)) => s, res => { @@ -174,7 +193,7 @@ impl Storage { raft_state, apply_state, engine, - log_fetch_scheduler, + read_scheduler, true, logger, ) @@ -187,7 +206,7 @@ impl Storage { raft_state: RaftLocalState, apply_state: RaftApplyState, engine: ER, - log_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, persisted: bool, logger: &Logger, ) -> Result { @@ -206,7 +225,7 @@ impl Storage { raft_state, apply_state, region, - log_fetch_scheduler, + read_scheduler, )?; Ok(Storage { @@ -215,6 +234,8 @@ impl Storage { region_state, ever_persisted: persisted, logger, + snap_state: RefCell::new(SnapState::Relax), + gen_snap_task: RefCell::new(Box::new(None)), }) } @@ -223,6 +244,11 @@ impl Storage { self.entry_storage.raft_state() } + #[inline] + pub fn read_scheduler(&self) -> Scheduler> { + self.entry_storage.read_scheduler() + } + #[inline] pub fn apply_state(&self) -> &RaftApplyState { self.entry_storage.apply_state() @@ -241,6 +267,19 @@ impl Storage { self.ever_persisted = true; } + #[inline] + pub fn take_gen_snap_task(&mut self) -> Option { + self.gen_snap_task.get_mut().take() + } + + #[inline] + pub fn tablet_index(&self) -> u64 { + match self.region_state.get_state() { + PeerState::Tombstone | PeerState::Applying => 0, + _ => self.region_state.get_tablet_index(), + } + } + #[inline] pub fn set_region_state(&mut self, state: RegionLocalState) { self.region_state = state; @@ -253,7 +292,7 @@ impl Storage { } } -impl raft::Storage for Storage { +impl raft::Storage for Storage { fn initial_state(&self) -> raft::Result { let hard_state = self.raft_state().get_hard_state().clone(); // We will persist hard state no matter if it's initialized or not in @@ -306,24 +345,68 @@ impl raft::Storage for Storage { } fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { - Err(raft::Error::Store( - raft::StorageError::SnapshotTemporarilyUnavailable, - )) + self.snapshot(request_index, to) } } #[cfg(test)] mod tests { - use engine_traits::{RaftEngine, RaftEngineReadOnly, RaftLogBatch}; + use std::{ + sync::mpsc::{sync_channel, SyncSender}, + time::Duration, + }; + + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, TestTabletFactoryV2}, + raft::RaftTestEngine, + }; + use engine_traits::{ + KvEngine, OpenOptions, RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletFactory, ALL_CFS, + }; use kvproto::{ metapb::{Peer, Region}, raft_serverpb::PeerState, }; - use raftstore::store::{RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; + use raft::{eraftpb::Snapshot as RaftSnapshot, Error as RaftError, StorageError}; + use raftstore::store::{ + AsyncReadNotifier, FetchedLogs, ReadRunner, ReadTask, RAFT_INIT_LOG_INDEX, + RAFT_INIT_LOG_TERM, + }; + use slog::o; use tempfile::TempDir; + use tikv_util::worker::{Runnable, Worker}; - #[test] - fn test_write_initial_states() { + use super::*; + use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes, tablet::CachedTablet}; + + #[derive(Clone)] + pub struct TestRouter { + ch: SyncSender>, + } + + impl TestRouter { + pub fn new() -> (Self, Receiver>) { + let (tx, rx) = sync_channel(1); + (Self { ch: tx }, rx) + } + } + + impl AsyncReadNotifier for TestRouter { + fn notify_logs_fetched(&self, _region_id: u64, _fetched_logs: FetchedLogs) { + unreachable!(); + } + + fn notify_snapshot_generated(&self, _region_id: u64, snapshot: Box) { + self.ch.send(snapshot).unwrap(); + } + } + + impl ApplyResReporter for TestRouter { + fn report(&self, _res: ApplyRes) {} + } + + fn new_region() -> Region { let mut region = Region::default(); region.set_id(4); let mut p = Peer::default(); @@ -332,12 +415,17 @@ mod tests { region.mut_peers().push(p); region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(4); + region + } + #[test] + fn test_write_initial_states() { + let region = new_region(); let path = TempDir::new().unwrap(); let engine = engine_test::new_temp_engine(&path); let raft_engine = &engine.raft; let mut wb = raft_engine.log_batch(10); - super::write_initial_states(&mut wb, region.clone()).unwrap(); + write_initial_states(&mut wb, region.clone()).unwrap(); assert!(!wb.is_empty()); raft_engine.consume(&mut wb, true).unwrap(); @@ -358,4 +446,75 @@ mod tests { assert_eq!(ts.get_index(), RAFT_INIT_LOG_INDEX); assert_eq!(ts.get_term(), RAFT_INIT_LOG_TERM); } + + #[test] + fn test_storage_create_snapshot() { + let region = new_region(); + let path = TempDir::new().unwrap(); + let raft_engine = + engine_test::raft::new_engine(&format!("{}", path.path().join("raft").display()), None) + .unwrap(); + let mut wb = raft_engine.log_batch(10); + write_initial_states(&mut wb, region.clone()).unwrap(); + assert!(!wb.is_empty()); + raft_engine.consume(&mut wb, true).unwrap(); + // building a tablet factory + let ops = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let factory = Arc::new(TestTabletFactoryV2::new( + path.path().join("tablet").as_path(), + ops, + cf_opts, + )); + // create tablet with region_id 1 + let tablet = factory + .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) + .unwrap(); + // setup read runner worker and peer storage + let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); + let sched = worker.scheduler(); + let logger = slog_global::borrow_global().new(o!()); + let mut s = Storage::new(4, 6, raft_engine.clone(), sched.clone(), &logger.clone()) + .unwrap() + .unwrap(); + let (router, rx) = TestRouter::new(); + worker.start(ReadRunner::new(router.clone(), raft_engine)); + // setup peer applyer + let mut apply = Apply::new( + region.get_peers()[0].clone(), + RegionLocalState::default(), + router, + CachedTablet::new(Some(tablet)), + sched, + logger, + ); + + // test get snapshot + let snap = s.snapshot(0, 0); + let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); + assert_eq!(snap.unwrap_err(), unavailable); + let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); + apply.schedule_gen_snapshot(gen_task); + let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + s.on_snapshot_generated(res); + let snap = match *s.snap_state.borrow() { + SnapState::Generated(ref snap) => *snap.clone(), + ref s => panic!("unexpected state: {:?}", s), + }; + assert_eq!(snap.get_metadata().get_index(), 0); + assert_eq!(snap.get_metadata().get_term(), 0); + assert!(snap.get_data().is_empty()); + + // test cancel snapshot + let snap = s.snapshot(0, 0); + let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); + assert_eq!(snap.unwrap_err(), unavailable); + let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); + apply.schedule_gen_snapshot(gen_task); + let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + s.cancel_generating_snap(None); + assert_eq!(*s.snap_state.borrow(), SnapState::Relax); + + // TODO: add test get twice snapshot and cancel once + } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 78abef13247..7c02ee10243 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -8,15 +8,20 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; -use raftstore::store::{FetchedLogs, LogFetchedNotifier, RegionSnapshot}; +use raft::eraftpb::Snapshot as RaftSnapshot; +use raftstore::store::{AsyncReadNotifier, FetchedLogs, RegionSnapshot}; use slog::Logger; use super::PeerMsg; use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; -impl LogFetchedNotifier for StoreRouter { - fn notify(&self, region_id: u64, fetched: FetchedLogs) { - let _ = self.force_send(region_id, PeerMsg::FetchedLogs(fetched)); +impl AsyncReadNotifier for StoreRouter { + fn notify_logs_fetched(&self, region_id: u64, fetched_logs: FetchedLogs) { + let _ = self.force_send(region_id, PeerMsg::LogsFetched(fetched_logs)); + } + + fn notify_snapshot_generated(&self, region_id: u64, snapshot: Box) { + let _ = self.force_send(region_id, PeerMsg::SnapshotGenerated(snapshot)); } } diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index e9893bad968..1507d404297 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,13 +1,13 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use kvproto::raft_serverpb::RegionLocalState; use raftstore::store::fsm::ChangePeer; -use crate::operation::{AdminCmdResult, CommittedEntries}; +use crate::operation::{AdminCmdResult, CommittedEntries, GenSnapTask}; #[derive(Debug)] pub enum ApplyTask { CommittedEntries(CommittedEntries), + Snapshot(GenSnapTask), } #[derive(Debug, Default)] diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index c607e389135..64af4d41d71 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -3,7 +3,9 @@ // #[PerformanceCriticalPath] use std::fmt; +use engine_traits::Snapshot; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; +use raft::eraftpb::Snapshot as RaftSnapshot; use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs}; use tikv_util::time::Instant; @@ -123,7 +125,8 @@ pub enum PeerMsg { Tick(PeerTick), /// Result of applying committed entries. The message can't be lost. ApplyRes(ApplyRes), - FetchedLogs(FetchedLogs), + LogsFetched(FetchedLogs), + SnapshotGenerated(Box), /// Start the FSM. Start, /// A message only used to notify a peer. @@ -173,7 +176,8 @@ impl fmt::Debug for PeerMsg { "Persisted peer_id {}, ready_number {}", peer_id, ready_number ), - PeerMsg::FetchedLogs(fetched) => write!(fmt, "FetchedLogs {:?}", fetched), + PeerMsg::LogsFetched(fetched) => write!(fmt, "LogsFetched {:?}", fetched), + PeerMsg::SnapshotGenerated(_) => write!(fmt, "SnapshotGenerated"), PeerMsg::QueryDebugInfo(_) => write!(fmt, "QueryDebugInfo"), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(_) => write!(fmt, "FlushMessages"), diff --git a/components/raftstore/src/store/async_io/mod.rs b/components/raftstore/src/store/async_io/mod.rs index c9b2fad532f..56cc2d576e1 100644 --- a/components/raftstore/src/store/async_io/mod.rs +++ b/components/raftstore/src/store/async_io/mod.rs @@ -1,4 +1,5 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. +pub mod read; pub mod write; pub mod write_router; diff --git a/components/raftstore/src/store/worker/raftlog_fetch.rs b/components/raftstore/src/store/async_io/read.rs similarity index 58% rename from components/raftstore/src/store/worker/raftlog_fetch.rs rename to components/raftstore/src/store/async_io/read.rs index b3de87f7715..30ce2102040 100644 --- a/components/raftstore/src/store/worker/raftlog_fetch.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -1,16 +1,21 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::fmt; +use std::{ + fmt, + marker::PhantomData, + sync::{atomic::AtomicBool, Arc}, +}; -use engine_traits::RaftEngine; +use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; -use raft::GetEntriesContext; +use kvproto::raft_serverpb::RegionLocalState; +use raft::{eraftpb::Snapshot as RaftSnapshot, GetEntriesContext}; use tikv_util::worker::Runnable; use crate::store::{RaftlogFetchResult, MAX_INIT_ENTRY_COUNT}; -pub enum Task { - PeerStorage { +pub enum ReadTask { + FetchLogs { region_id: u64, context: GetEntriesContext, low: u64, @@ -19,13 +24,23 @@ pub enum Task { tried_cnt: usize, term: u64, }, - // More to support, suck as fetch entries ayschronously when apply and schedule merge + + // GenTabletSnapshot is used to generate tablet snapshot. + GenTabletSnapshot { + region_id: u64, + tablet: EK, + region_state: RegionLocalState, + last_applied_term: u64, + last_applied_index: u64, + canceled: Arc, + for_balance: bool, + }, } -impl fmt::Display for Task { +impl fmt::Display for ReadTask { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Task::PeerStorage { + ReadTask::FetchLogs { region_id, context, low, @@ -38,6 +53,9 @@ impl fmt::Display for Task { "Fetch Raft Logs [region: {}, low: {}, high: {}, max_size: {}] for sending with context {:?}, tried: {}, term: {}", region_id, low, high, max_size, context, tried_cnt, term, ), + ReadTask::GenTabletSnapshot { region_id, .. } => { + write!(f, "Snapshot gen for {}", region_id) + } } } } @@ -49,38 +67,42 @@ pub struct FetchedLogs { } /// A router for receiving fetched result. -pub trait LogFetchedNotifier: Send { - fn notify(&self, region_id: u64, fetched: FetchedLogs); +pub trait AsyncReadNotifier: Send { + fn notify_logs_fetched(&self, region_id: u64, fetched: FetchedLogs); + fn notify_snapshot_generated(&self, region_id: u64, snapshot: Box); } -pub struct Runner +pub struct ReadRunner where + EK: KvEngine, ER: RaftEngine, - N: LogFetchedNotifier, + N: AsyncReadNotifier, { notifier: N, raft_engine: ER, + _phantom: PhantomData, } -impl Runner { - pub fn new(notifier: N, raft_engine: ER) -> Runner { - Runner { +impl ReadRunner { + pub fn new(notifier: N, raft_engine: ER) -> ReadRunner { + ReadRunner { notifier, raft_engine, + _phantom: PhantomData, } } } -impl Runnable for Runner +impl Runnable for ReadRunner where + EK: KvEngine, ER: RaftEngine, - N: LogFetchedNotifier, + N: AsyncReadNotifier, { - type Task = Task; - - fn run(&mut self, task: Task) { + type Task = ReadTask; + fn run(&mut self, task: ReadTask) { match task { - Task::PeerStorage { + ReadTask::FetchLogs { region_id, low, high, @@ -104,7 +126,7 @@ where .map(|c| (*c as u64) != high - low) .unwrap_or(false); fail_point!("worker_async_fetch_raft_log"); - self.notifier.notify( + self.notifier.notify_logs_fetched( region_id, FetchedLogs { context, @@ -119,6 +141,11 @@ where }, ); } + ReadTask::GenTabletSnapshot { region_id, .. } => { + // TODO: implement generate tablet snapshot for raftstore v2 + self.notifier + .notify_snapshot_generated(region_id, Box::new(RaftSnapshot::default())); + } } } } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index a0828d12332..fcc3d535aa2 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -30,7 +30,7 @@ use super::{ metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use crate::{bytes_capacity, store::worker::RaftlogFetchTask, Result}; +use crate::{bytes_capacity, store::ReadTask, Result}; const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; const SHRINK_CACHE_CAPACITY: usize = 64; @@ -622,7 +622,7 @@ impl Default for CacheWarmupState { } /// A subset of `PeerStorage` that focus on accessing log entries. -pub struct EntryStorage { +pub struct EntryStorage { region_id: u64, peer_id: u64, raft_engine: ER, @@ -631,20 +631,20 @@ pub struct EntryStorage { apply_state: RaftApplyState, last_term: u64, applied_term: u64, - raftlog_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, raftlog_fetch_stats: AsyncFetchStats, async_fetch_results: RefCell>, cache_warmup_state: Option, } -impl EntryStorage { +impl EntryStorage { pub fn new( peer_id: u64, raft_engine: ER, mut raft_state: RaftLocalState, apply_state: RaftApplyState, region: &metapb::Region, - raftlog_fetch_scheduler: Scheduler, + read_scheduler: Scheduler>, ) -> Result { if let Err(e) = validate_states(region.id, &raft_engine, &mut raft_state, &apply_state) { return Err(box_err!( @@ -665,7 +665,7 @@ impl EntryStorage { apply_state, last_term, applied_term, - raftlog_fetch_scheduler, + read_scheduler, raftlog_fetch_stats: AsyncFetchStats::default(), async_fetch_results: RefCell::new(HashMap::default()), cache_warmup_state: None, @@ -862,8 +862,8 @@ impl EntryStorage { self.async_fetch_results .borrow_mut() .insert(low, RaftlogFetchState::Fetching(Instant::now_coarse())); - self.raftlog_fetch_scheduler - .schedule(RaftlogFetchTask::PeerStorage { + self.read_scheduler + .schedule(ReadTask::FetchLogs { region_id, context, low, @@ -1046,7 +1046,7 @@ impl EntryStorage { // Append the given entries to the raft log using previous last index or // self.last_index. - pub fn append(&mut self, entries: Vec, task: &mut WriteTask) { + pub fn append(&mut self, entries: Vec, task: &mut WriteTask) { if entries.is_empty() { return; } @@ -1242,13 +1242,17 @@ impl EntryStorage { pub fn clear(&mut self) { self.cache = EntryCache::default(); } + + pub fn read_scheduler(&self) -> Scheduler> { + self.read_scheduler.clone() + } } #[cfg(test)] pub mod tests { use std::sync::mpsc; - use engine_test::raft::RaftTestEngine; + use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; use engine_traits::RaftEngineReadOnly; use protobuf::Message; use raft::{GetEntriesContext, StorageError}; @@ -1273,7 +1277,7 @@ pub mod tests { } } - pub fn validate_cache(store: &EntryStorage, exp_ents: &[Entry]) { + pub fn validate_cache(store: &EntryStorage, exp_ents: &[Entry]) { assert_eq!(store.cache.cache, exp_ents); for e in exp_ents { let entry = store diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 63761321405..a800832ba82 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -94,11 +94,10 @@ use crate::{ util::{KeysInfoFormatter, LeaseState}, worker::{ new_change_peer_v2_request, Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, - GcSnapshotTask, RaftlogFetchTask, RaftlogGcTask, ReadDelegate, ReadProgress, - RegionTask, SplitCheckTask, + GcSnapshotTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, }, CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, PeerTick, - ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, + ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, ReadTask, SignificantMsg, SnapKey, StoreMsg, WriteCallback, }, Error, Result, @@ -245,7 +244,7 @@ where store_id: u64, cfg: &Config, region_scheduler: Scheduler>, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, engines: Engines, region: &metapb::Region, ) -> Result> { @@ -304,7 +303,7 @@ where store_id: u64, cfg: &Config, region_scheduler: Scheduler>, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, engines: Engines, region_id: u64, peer: metapb::Peer, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index c83309011ac..1179a535c7d 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -72,6 +72,7 @@ use crate::{ }, store::{ async_io::{ + read::{ReadRunner, ReadTask}, write::{StoreWriters, Worker as WriteWorker, WriteMsg}, write_router::WriteSenders, }, @@ -95,9 +96,9 @@ use crate::{ worker::{ AutoSplitController, CleanupRunner, CleanupSstRunner, CleanupSstTask, CleanupTask, CompactRunner, CompactTask, ConsistencyCheckRunner, ConsistencyCheckTask, - GcSnapshotRunner, GcSnapshotTask, PdRunner, RaftlogFetchRunner, RaftlogFetchTask, - RaftlogGcRunner, RaftlogGcTask, ReadDelegate, RefreshConfigRunner, RefreshConfigTask, - RegionRunner, RegionTask, SplitCheckTask, + GcSnapshotRunner, GcSnapshotTask, PdRunner, RaftlogGcRunner, RaftlogGcTask, + ReadDelegate, RefreshConfigRunner, RefreshConfigTask, RegionRunner, RegionTask, + SplitCheckTask, }, Callback, CasualMessage, GlobalReplicationState, InspectedRaftMessage, MergeResultKind, PdTask, PeerMsg, PeerTick, RaftCommand, SignificantMsg, SnapManager, StoreMsg, StoreTick, @@ -474,7 +475,7 @@ where // handle Compact, CleanupSst task pub cleanup_scheduler: Scheduler, pub raftlog_gc_scheduler: Scheduler, - pub raftlog_fetch_scheduler: Scheduler, + pub raftlog_fetch_scheduler: Scheduler>, pub region_scheduler: Scheduler>, pub apply_router: ApplyRouter, pub router: RaftRouter, @@ -1081,7 +1082,7 @@ pub struct RaftPollerBuilder { split_check_scheduler: Scheduler, cleanup_scheduler: Scheduler, raftlog_gc_scheduler: Scheduler, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, pub region_scheduler: Scheduler>, apply_router: ApplyRouter, pub router: RaftRouter, @@ -1531,7 +1532,7 @@ impl RaftBatchSystem { let raftlog_fetch_scheduler = workers.raftlog_fetch_worker.start( "raftlog-fetch-worker", - RaftlogFetchRunner::new(self.router.clone(), engines.raft.clone()), + ReadRunner::new(self.router.clone(), engines.raft.clone()), ); let compact_runner = CompactRunner::new(engines.kv.clone()); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 7ab47cc90c6..ea8ccc3219f 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -48,6 +48,7 @@ make_auto_flush_static_metric! { stale, decode, epoch, + cancel, } pub label_enum RegionHashType { diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index a60eb087562..2078ccabafc 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -30,6 +30,7 @@ mod worker; pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ + read::{AsyncReadNotifier, FetchedLogs, ReadRunner, ReadTask}, write::{ ExtraStates, PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask, @@ -76,9 +77,8 @@ pub use self::{ util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, - CheckLeaderTask, FetchedLogs, FlowStatistics, FlowStatsReporter, KeyEntry, - LocalReadContext, LocalReader, LocalReaderCore, LogFetchedNotifier, PdTask, - RaftlogFetchRunner, RaftlogFetchTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, + CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, + LocalReader, LocalReaderCore, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, TLS_LOCAL_READ_METRICS, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 6851ebd30d8..b86700af8e6 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -26,9 +26,7 @@ use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; use tracker::{get_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; -use super::{ - local_metrics::TimeTracker, region_meta::RegionMeta, worker::FetchedLogs, RegionSnapshot, -}; +use super::{local_metrics::TimeTracker, region_meta::RegionMeta, FetchedLogs, RegionSnapshot}; use crate::store::{ fsm::apply::{CatchUpLogs, ChangeObserver, TaskRes as ApplyTaskRes}, metrics::RaftEventDurationType, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index b06eb5c0c3f..b9cf76889b4 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -85,7 +85,7 @@ use crate::{ coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason, RoleChange}, errors::RAFTSTORE_IS_BUSY, store::{ - async_io::{write::WriteMsg, write_router::WriteRouter}, + async_io::{read::ReadTask, write::WriteMsg, write_router::WriteRouter}, fsm::{ apply::{self, CatchUpLogs}, store::{PollContext, RaftRouter}, @@ -97,8 +97,8 @@ use crate::{ txn_ext::LocksStatus, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ - HeartbeatTask, RaftlogFetchTask, RaftlogGcTask, ReadDelegate, ReadExecutor, - ReadProgress, RegionTask, SplitCheckTask, + HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, ReadProgress, RegionTask, + SplitCheckTask, }, Callback, Config, GlobalReplicationState, PdTask, ReadCallback, ReadIndexContext, ReadResponse, TxnExt, WriteCallback, RAFT_INIT_LOG_INDEX, @@ -1041,7 +1041,7 @@ where store_id: u64, cfg: &Config, region_scheduler: Scheduler>, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, engines: Engines, region: &metapb::Region, peer: metapb::Peer, diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 081149a6889..a53ca1e9258 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -36,8 +36,11 @@ use tikv_util::{ use super::{metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager}; use crate::{ store::{ - async_io::write::WriteTask, entry_storage::EntryStorage, fsm::GenSnapTask, - peer::PersistSnapshotResult, util, worker::RaftlogFetchTask, + async_io::{read::ReadTask, write::WriteTask}, + entry_storage::EntryStorage, + fsm::GenSnapTask, + peer::PersistSnapshotResult, + util, }, Error, Result, }; @@ -218,13 +221,13 @@ where region_scheduler: Scheduler>, snap_tried_cnt: RefCell, - entry_storage: EntryStorage, + entry_storage: EntryStorage, pub tag: String, } impl Deref for PeerStorage { - type Target = EntryStorage; + type Target = EntryStorage; #[inline] fn deref(&self) -> &Self::Target { @@ -286,7 +289,7 @@ where engines: Engines, region: &metapb::Region, region_scheduler: Scheduler>, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, peer_id: u64, tag: String, ) -> Result> { @@ -1136,21 +1139,19 @@ pub mod tests { use crate::{ coprocessor::CoprocessorHost, store::{ - async_io::write::write_to_db_for_test, + async_io::{read::ReadRunner, write::write_to_db_for_test}, bootstrap_store, entry_storage::tests::validate_cache, fsm::apply::compact_raft_log, initial_region, prepare_bootstrap_cluster, - worker::{ - make_region_worker_raftstore_cfg, FetchedLogs, LogFetchedNotifier, - RaftlogFetchRunner, RegionRunner, RegionTask, - }, + worker::{make_region_worker_raftstore_cfg, RegionRunner, RegionTask}, + AsyncReadNotifier, FetchedLogs, }, }; fn new_storage( region_scheduler: Scheduler>, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, path: &TempDir, ) -> PeerStorage { let kv_db = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); @@ -1183,7 +1184,7 @@ pub mod tests { pub fn new_storage_from_ents( region_scheduler: Scheduler>, - raftlog_fetch_scheduler: Scheduler, + raftlog_fetch_scheduler: Scheduler>, path: &TempDir, ents: &[Entry], ) -> PeerStorage { @@ -1378,10 +1379,14 @@ pub mod tests { } } - impl LogFetchedNotifier for TestRouter { - fn notify(&self, _region_id: u64, fetched_logs: FetchedLogs) { + impl AsyncReadNotifier for TestRouter { + fn notify_logs_fetched(&self, _region_id: u64, fetched_logs: FetchedLogs) { self.ch.send(fetched_logs).unwrap(); } + + fn notify_snapshot_generated(&self, _region_id: u64, _snapshot: Box) { + unreachable!(); + } } #[test] @@ -1455,7 +1460,7 @@ pub mod tests { let raftlog_fetch_scheduler = raftlog_fetch_worker.scheduler(); let mut store = new_storage_from_ents(region_scheduler, raftlog_fetch_scheduler, &td, &ents); - raftlog_fetch_worker.start(RaftlogFetchRunner::new(router, store.engines.raft.clone())); + raftlog_fetch_worker.start(ReadRunner::new(router, store.engines.raft.clone())); store.compact_entry_cache(5); let mut e = store.entries(lo, hi, maxsize, GetEntriesContext::empty(true)); if e == Err(raft::Error::Store( diff --git a/components/raftstore/src/store/transport.rs b/components/raftstore/src/store/transport.rs index 19b825ac20c..d2bbe921eea 100644 --- a/components/raftstore/src/store/transport.rs +++ b/components/raftstore/src/store/transport.rs @@ -6,9 +6,10 @@ use std::sync::mpsc; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, Snapshot}; use kvproto::raft_serverpb::RaftMessage; +use raft::eraftpb::Snapshot as RaftSnapshot; use tikv_util::{error, warn}; -use super::worker::{FetchedLogs, LogFetchedNotifier}; +use super::{AsyncReadNotifier, FetchedLogs}; use crate::{ store::{CasualMessage, PeerMsg, RaftCommand, RaftRouter, SignificantMsg, StoreMsg}, DiscardReason, Error, Result, @@ -173,10 +174,15 @@ where } } -impl LogFetchedNotifier for RaftRouter { +impl AsyncReadNotifier for RaftRouter { #[inline] - fn notify(&self, region_id: u64, fetched: FetchedLogs) { + fn notify_logs_fetched(&self, region_id: u64, fetched: FetchedLogs) { // Ignore region not found as it may be removed. let _ = self.significant_send(region_id, SignificantMsg::RaftlogFetched(fetched)); } + + #[inline] + fn notify_snapshot_generated(&self, _region_id: u64, _snapshot: Box) { + unreachable!() + } } diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 4335369c3cb..cd7680ebc4a 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -8,7 +8,6 @@ mod compact; mod consistency_check; mod metrics; mod pd; -mod raftlog_fetch; mod raftlog_gc; mod read; mod refresh_config; @@ -31,9 +30,6 @@ pub use self::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, Runner as PdRunner, Task as PdTask, }, - raftlog_fetch::{ - FetchedLogs, LogFetchedNotifier, Runner as RaftlogFetchRunner, Task as RaftlogFetchTask, - }, raftlog_gc::{Runner as RaftlogGcRunner, Task as RaftlogGcTask}, read::{ CachedReadDelegate, LocalReadContext, LocalReader, LocalReaderCore, From dd4299c6956f5f3472330e6fc8cc8fc16c4bd791 Mon Sep 17 00:00:00 2001 From: goldwind-ting <63939636+goldwind-ting@users.noreply.github.com> Date: Tue, 1 Nov 2022 18:20:00 +0800 Subject: [PATCH 0301/1149] fix typo (#13699) close tikv/tikv#13706 fix-typo: replace `threahold` with `threshold `. Signed-off-by: goldwind-ting <63939636+goldwind-ting@users.noreply.github.com> Co-authored-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/read_pool.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/read_pool.rs b/src/read_pool.rs index deb7336975c..4d9f7fd9264 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -32,10 +32,10 @@ use crate::{ // the duration to check auto-scale unified-thread-pool's thread const READ_POOL_THREAD_CHECK_DURATION: Duration = Duration::from_secs(10); // consider scale out read pool size if the average thread cpu usage is higher -// than this threahold. +// than this threshold. const READ_POOL_THREAD_HIGH_THRESHOLD: f64 = 0.8; // consider scale in read pool size if the average thread cpu usage is lower -// than this threahold. +// than this threshold. const READ_POOL_THREAD_LOW_THRESHOLD: f64 = 0.7; // avg running tasks per-thread that indicates read-pool is busy const RUNNING_TASKS_PER_THREAD_THRESHOLD: i64 = 3; From 497ae1b0a1f05dacdbe2f59d5b92ee99172e3a49 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 1 Nov 2022 19:24:00 +0800 Subject: [PATCH 0302/1149] raft_client: Report store unreachable once until being connected again (#13677) close tikv/tikv#13676 Avoid reporting store unreachable again and again as broadcasting is time-consuming and blocks raftstore. Only send store unreachable when the store is ever connected. Signed-off-by: Connor1996 Co-authored-by: Xinye Tao --- components/batch-system/src/metrics.rs | 7 + components/batch-system/src/router.rs | 11 +- components/raftstore/src/store/fsm/peer.rs | 24 ++- components/raftstore/src/store/fsm/store.rs | 14 +- .../raftstore/src/store/local_metrics.rs | 6 + components/raftstore/src/store/metrics.rs | 39 +++-- .../test_raftstore/src/common-test.toml | 3 +- metrics/grafana/tikv_details.json | 95 ++++++++++- src/server/config.rs | 20 ++- src/server/raft_client.rs | 158 +++++++++++------- src/server/server.rs | 41 +++-- tests/integrations/config/mod.rs | 3 +- tests/integrations/config/test-custom.toml | 1 + tests/integrations/server/raft_client.rs | 62 ++++++- 14 files changed, 362 insertions(+), 122 deletions(-) diff --git a/components/batch-system/src/metrics.rs b/components/batch-system/src/metrics.rs index 9edcd656bf4..a4728f32ad7 100644 --- a/components/batch-system/src/metrics.rs +++ b/components/batch-system/src/metrics.rs @@ -10,4 +10,11 @@ lazy_static! { &["type"] ) .unwrap(); + + pub static ref BROADCAST_NORMAL_DURATION: Histogram = + register_histogram!( + "tikv_broadcast_normal_duration_seconds", + "Duration of broadcasting normals.", + exponential_buckets(0.001, 1.59, 20).unwrap() // max 10s + ).unwrap(); } diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 660ab014939..d96e65e1e99 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -12,12 +12,17 @@ use std::{ use collections::HashMap; use crossbeam::channel::{SendError, TrySendError}; -use tikv_util::{debug, info, lru::LruCache, Either}; +use tikv_util::{ + debug, info, + lru::LruCache, + time::{duration_to_sec, Instant}, + Either, +}; use crate::{ fsm::{Fsm, FsmScheduler, FsmState}, mailbox::{BasicMailbox, Mailbox}, - metrics::CHANNEL_FULL_COUNTER_VEC, + metrics::*, }; /// A struct that traces the approximate memory usage of router. @@ -306,10 +311,12 @@ where /// Try to notify all normal FSMs a message. pub fn broadcast_normal(&self, mut msg_gen: impl FnMut() -> N::Message) { + let timer = Instant::now_coarse(); let mailboxes = self.normals.lock().unwrap(); for mailbox in mailboxes.map.values() { let _ = mailbox.force_send(msg_gen(), &self.normal_scheduler); } + BROADCAST_NORMAL_DURATION.observe(duration_to_sec(timer.saturating_elapsed()) as f64); } /// Try to notify all FSMs that the cluster is being shutdown. diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index a800832ba82..b7f7b005137 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -55,7 +55,7 @@ use tikv_util::{ mpsc::{self, LooseBoundedSender, Receiver}, store::{find_peer, is_learner, region_on_same_stores}, sys::{disk::DiskUsage, memory_usage_reaches_high_water}, - time::{monotonic_raw_now, Instant as TiInstant}, + time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, trace, warn, worker::{ScheduleError, Scheduler}, Either, @@ -605,6 +605,8 @@ where } pub fn handle_msgs(&mut self, msgs: &mut Vec>) { + let timer = TiInstant::now_coarse(); + let count = msgs.len(); for m in msgs.drain(..) { match m { PeerMsg::RaftMessage(msg) => { @@ -687,6 +689,12 @@ where } } self.on_loop_finished(); + self.ctx.raft_metrics.peer_msg_len.observe(count as f64); + self.ctx + .raft_metrics + .event_time + .peer_msg + .observe(duration_to_sec(timer.saturating_elapsed()) as f64); } #[inline] @@ -1382,7 +1390,7 @@ where SignificantMsg::CatchUpLogs(catch_up_logs) => { self.on_catch_up_logs_for_merge(catch_up_logs); } - SignificantMsg::StoreResolved { group_id, .. } => { + SignificantMsg::StoreResolved { group_id, store_id } => { let state = self.ctx.global_replication_state.lock().unwrap(); if state.status().get_mode() != ReplicationMode::DrAutoSync { return; @@ -1391,11 +1399,13 @@ where return; } drop(state); - self.fsm - .peer - .raft_group - .raft - .assign_commit_groups(&[(self.fsm.peer_id(), group_id)]); + if let Some(peer_id) = find_peer(self.region(), store_id).map(|p| p.get_id()) { + self.fsm + .peer + .raft_group + .raft + .assign_commit_groups(&[(peer_id, group_id)]); + } } SignificantMsg::CaptureChange { cmd, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 1179a535c7d..2bb2ea636e1 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -685,7 +685,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { fn on_tick(&mut self, tick: StoreTick) { - let t = TiInstant::now_coarse(); + let timer = TiInstant::now_coarse(); match tick { StoreTick::PdStoreHeartbeat => self.on_pd_store_heartbeat_tick(), StoreTick::SnapGc => self.on_snap_mgr_gc(), @@ -694,8 +694,10 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), } - let elapsed = t.saturating_elapsed(); - RAFT_EVENT_DURATION + let elapsed = timer.saturating_elapsed(); + self.ctx + .raft_metrics + .event_time .get(tick.tag()) .observe(duration_to_sec(elapsed) as f64); slow_log!( @@ -707,6 +709,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } fn handle_msgs(&mut self, msgs: &mut Vec>) { + let timer = TiInstant::now_coarse(); for m in msgs.drain(..) { match m { StoreMsg::Tick(tick) => self.on_tick(tick), @@ -757,6 +760,11 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreMsg::GcSnapshotFinish => self.register_snap_mgr_gc_tick(), } } + self.ctx + .raft_metrics + .event_time + .store_msg + .observe(duration_to_sec(timer.saturating_elapsed()) as f64); } fn start(&mut self, store: metapb::Store) { diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index aa33ae49fea..1648bd345ca 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -82,6 +82,8 @@ pub struct RaftMetrics { pub store_time: LocalHistogram, pub propose_wait_time: LocalHistogram, pub process_ready: LocalHistogram, + pub event_time: RaftEventDurationVec, + pub peer_msg_len: LocalHistogram, pub commit_log: LocalHistogram, pub write_block_wait: LocalHistogram, @@ -117,6 +119,8 @@ impl RaftMetrics { process_ready: PEER_RAFT_PROCESS_DURATION .with_label_values(&["ready"]) .local(), + event_time: RaftEventDurationVec::from(&RAFT_EVENT_DURATION_VEC), + peer_msg_len: PEER_MSG_LEN.local(), commit_log: PEER_COMMIT_LOG_HISTOGRAM.local(), write_block_wait: STORE_WRITE_MSG_BLOCK_WAIT_DURATION_HISTOGRAM.local(), waterfall_metrics, @@ -149,6 +153,8 @@ impl RaftMetrics { self.store_time.flush(); self.propose_wait_time.flush(); self.process_ready.flush(); + self.event_time.flush(); + self.peer_msg_len.flush(); self.commit_log.flush(); self.write_block_wait.flush(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index ea8ccc3219f..2fe6fce580e 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -87,16 +87,6 @@ make_auto_flush_static_metric! { finished, } - pub label_enum RaftEventDurationType { - compact_check, - pd_store_heartbeat, - snap_gc, - compact_lock_cf, - consistency_check, - cleanup_import_sst, - raft_engine_purge, - } - pub label_enum CompactionGuardAction { init, init_failure, @@ -104,10 +94,6 @@ make_auto_flush_static_metric! { skip_partition, } - pub struct RaftEventDuration : LocalHistogram { - "type" => RaftEventDurationType - } - pub struct RaftEntryFetches : LocalIntCounter { "type" => RaftEntryType } @@ -219,6 +205,18 @@ make_static_metric! { flashback_not_prepared } + pub label_enum RaftEventDurationType { + compact_check, + pd_store_heartbeat, + snap_gc, + compact_lock_cf, + consistency_check, + cleanup_import_sst, + raft_engine_purge, + peer_msg, + store_msg, + } + pub label_enum RaftLogGcSkippedReason { reserve_log, compact_idx_too_small, @@ -280,6 +278,10 @@ make_static_metric! { "type" => RaftInvalidProposal } + pub struct RaftEventDurationVec : LocalHistogram { + "type" => RaftEventDurationType + } + pub struct RaftLogGcSkippedCounterVec: LocalIntCounter { "reason" => RaftLogGcSkippedReason, } @@ -663,8 +665,13 @@ lazy_static! { &["type"], exponential_buckets(0.001, 1.59, 20).unwrap() // max 10s ).unwrap(); - pub static ref RAFT_EVENT_DURATION: RaftEventDuration = - auto_flush_from!(RAFT_EVENT_DURATION_VEC, RaftEventDuration); + + pub static ref PEER_MSG_LEN: Histogram = + register_histogram!( + "tikv_raftstore_peer_msg_len", + "Length of peer msg.", + exponential_buckets(1.0, 2.0, 20).unwrap() // max 1000s + ).unwrap(); pub static ref RAFT_READ_INDEX_PENDING_DURATION: Histogram = register_histogram!( diff --git a/components/test_raftstore/src/common-test.toml b/components/test_raftstore/src/common-test.toml index 50e62f67d28..a121a6c1e0e 100644 --- a/components/test_raftstore/src/common-test.toml +++ b/components/test_raftstore/src/common-test.toml @@ -24,7 +24,8 @@ grpc-raft-conn-num = 1 # Disable stats concurrency. procinfo performs too bad without optimization, # disable it to save CPU for real tests. stats-concurrency = 0 -raft-client-backoff-step = "5ms" +raft-client-max-backoff = "100ms" +raft-client-initial-reconnect-backoff = "100ms" [server.labels] diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 471bf4bea2e..ccac776b508 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -14426,7 +14426,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The time consumed by raftstore events (P99).99", + "description": "The max time consumed by raftstore events", "editable": true, "error": false, "fieldConfig": { @@ -14466,7 +14466,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -14476,12 +14476,25 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_event_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "exemplar": true, + "expr": "histogram_quantile(1.0, sum(rate(tikv_raftstore_event_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", "format": "time_series", + "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "C", "step": 4 + }, + { + "exemplar": true, + "expr": "histogram_quantile(1.0, sum(rate(tikv_broadcast_normal_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "broadcast_normal", + "refId": "A", + "step": 4 } ], "thresholds": [ @@ -14496,7 +14509,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "0.99 Duration of raft store events", + "title": "Max duration of raft store events", "tooltip": { "msResolution": false, "shared": true, @@ -14557,7 +14570,7 @@ "h": 8, "w": 12, "x": 0, - "y": 20 + "y": 21 }, "heatmap": {}, "hideZeroBuckets": true, @@ -14603,6 +14616,78 @@ "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The length of peer msgs for each round handling", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763572958, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tikv_raftstore_peer_msg_len_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "C", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Peer msg length distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null } ], "repeat": null, diff --git a/src/server/config.rs b/src/server/config.rs index 1959b77df00..ae5c70abe1d 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -90,9 +90,18 @@ pub struct Config { // When merge raft messages into a batch message, leave a buffer. #[online_config(skip)] pub raft_client_grpc_send_msg_buffer: usize, - #[online_config(skip)] pub raft_client_queue_size: usize, + // Test only + #[doc(hidden)] + #[serde(skip_serializing)] + #[online_config(skip)] + pub raft_client_max_backoff: ReadableDuration, + // Test only + #[doc(hidden)] + #[serde(skip_serializing)] + #[online_config(skip)] + pub raft_client_initial_reconnect_backoff: ReadableDuration, pub raft_msg_max_batch_size: usize, @@ -156,12 +165,6 @@ pub struct Config { #[online_config(skip)] pub forward_max_connections_per_address: usize, - // Test only. - #[doc(hidden)] - #[serde(skip_serializing)] - #[online_config(skip)] - pub raft_client_backoff_step: ReadableDuration, - #[doc(hidden)] #[online_config(skip)] /// When TiKV memory usage reaches `memory_usage_high_water` it will try to @@ -218,6 +221,8 @@ impl Default for Config { max_grpc_send_msg_len: DEFAULT_MAX_GRPC_SEND_MSG_LEN, raft_client_grpc_send_msg_buffer: 512 * 1024, raft_client_queue_size: 8192, + raft_client_max_backoff: ReadableDuration::secs(5), + raft_client_initial_reconnect_backoff: ReadableDuration::secs(1), raft_msg_max_batch_size: 128, grpc_compression_type: GrpcCompressionType::None, grpc_gzip_compression_level: DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL, @@ -254,7 +259,6 @@ impl Default for Config { heavy_load_threshold: 75, heavy_load_wait_duration: None, enable_request_batch: true, - raft_client_backoff_step: ReadableDuration::secs(1), reject_messages_on_memory_ratio: 0.2, background_thread_count, end_point_slow_log_threshold: ReadableDuration::secs(1), diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index bc0e8a59303..7b29976f218 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -1,7 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - cmp, collections::VecDeque, ffi::CString, marker::{PhantomData, Unpin}, @@ -27,8 +26,8 @@ use futures::{ }; use futures_timer::Delay; use grpcio::{ - ChannelBuilder, ClientCStreamReceiver, ClientCStreamSender, Environment, RpcStatusCode, - WriteFlags, + Channel, ChannelBuilder, ClientCStreamReceiver, ClientCStreamSender, Environment, + RpcStatusCode, WriteFlags, }; use kvproto::{ raft_serverpb::{Done, RaftMessage}, @@ -550,10 +549,18 @@ where } } +#[derive(PartialEq)] +enum RaftCallRes { + // the call is not supported, probably due to visiting to older version TiKV + Fallback, + // the connection is aborted or closed + Disconnected, +} + struct RaftCall { sender: AsyncRaftSender, receiver: ClientCStreamReceiver, - lifetime: Option>, + lifetime: Option>, store_id: u64, } @@ -563,29 +570,31 @@ where B: Buffer + Unpin, E: KvEngine, { - fn clean_up(&mut self, sink_err: Option, recv_err: Option) { - error!("connection aborted"; "store_id" => self.store_id, "sink_error" => ?sink_err, "receiver_err" => ?recv_err, "addr" => %self.sender.addr); + async fn poll(&mut self) { + let res = futures::join!(&mut self.sender, &mut self.receiver); + if let (Ok(()), Ok(Done { .. })) = res { + info!("connection close"; "store_id" => self.store_id, "addr" => %self.sender.addr); + if let Some(tx) = self.lifetime.take() { + let _ = tx.send(RaftCallRes::Disconnected); + } + return; + } + let (sink_err, recv_err) = (res.0.err(), res.1.err()); + error!("connection aborted"; "store_id" => self.store_id, "sink_error" => ?sink_err, "receiver_err" => ?recv_err, "addr" => %self.sender.addr); if let Some(tx) = self.lifetime.take() { let should_fallback = [sink_err, recv_err] .iter() .any(|e| e.as_ref().map_or(false, grpc_error_is_unimplemented)); - if should_fallback { - // Asks backend to fallback. - let _ = tx.send(()); - return; - } - } - self.sender.router.broadcast_unreachable(self.store_id); - } - async fn poll(&mut self) { - let res = futures::join!(&mut self.sender, &mut self.receiver); - if let (Ok(()), Ok(Done { .. })) = res { - info!("connection close"; "store_id" => self.store_id, "addr" => %self.sender.addr); - return; + let res = if should_fallback { + // Asks backend to fallback. + RaftCallRes::Fallback + } else { + RaftCallRes::Disconnected + }; + let _ = tx.send(res); } - self.clean_up(res.0.err(), res.1.err()); } } @@ -686,7 +695,7 @@ where .inc_by(len as u64); } - fn connect(&self, addr: &str) -> TikvClient { + fn connect(&self, addr: &str) -> Channel { info!("server: new connection with tikv endpoint"; "addr" => addr, "store_id" => self.store_id); let cfg = self.builder.cfg.value(); @@ -697,16 +706,17 @@ where .default_compression_algorithm(cfg.grpc_compression_algorithm()) .default_gzip_compression_level(cfg.grpc_gzip_compression_level) .default_grpc_min_message_size_to_compress(cfg.grpc_min_message_size_to_compress) + .max_reconnect_backoff(cfg.raft_client_max_backoff.0) + .initial_reconnect_backoff(cfg.raft_client_initial_reconnect_backoff.0) // hack: so it's different args, grpc will always create a new connection. .raw_cfg_int( CString::new("random id").unwrap(), CONN_ID.fetch_add(1, Ordering::SeqCst), ); - let channel = self.builder.security_mgr.connect(cb, addr); - TikvClient::new(channel) + self.builder.security_mgr.connect(cb, addr) } - fn batch_call(&self, client: &TikvClient, addr: String) -> oneshot::Receiver<()> { + fn batch_call(&self, client: &TikvClient, addr: String) -> oneshot::Receiver { let (batch_sink, batch_stream) = client.batch_raft().unwrap(); let (tx, rx) = oneshot::channel(); let mut call = RaftCall { @@ -731,7 +741,7 @@ where rx } - fn call(&self, client: &TikvClient, addr: String) -> oneshot::Receiver<()> { + fn call(&self, client: &TikvClient, addr: String) -> oneshot::Receiver { let (sink, stream) = client.raft().unwrap(); let (tx, rx) = oneshot::channel(); let mut call = RaftCall { @@ -756,22 +766,23 @@ where } } -async fn maybe_backoff(backoff: Duration, last_wake_time: &mut Instant, retry_times: &mut u32) { - if *retry_times == 0 { - return; - } - let timeout = backoff * cmp::min(*retry_times, 5); +async fn maybe_backoff(backoff: Duration, last_wake_time: &mut Option) { let now = Instant::now(); - if *last_wake_time + timeout < now { - // We have spent long enough time in last retry, no need to backoff again. - *last_wake_time = now; - *retry_times = 0; + if let Some(last) = *last_wake_time { + if last + backoff < now { + // We have spent long enough time in last retry, no need to backoff again. + *last_wake_time = Some(now); + return; + } + } else { + *last_wake_time = Some(now); return; } - if let Err(e) = GLOBAL_TIMER_HANDLE.delay(now + timeout).compat().await { + + if let Err(e) = GLOBAL_TIMER_HANDLE.delay(now + backoff).compat().await { error_unknown!(?e; "failed to backoff"); } - *last_wake_time = Instant::now(); + *last_wake_time = Some(Instant::now()); } /// A future that drives the life cycle of a connection. @@ -793,12 +804,12 @@ async fn start( R: RaftStoreRouter + Unpin + Send + 'static, E: KvEngine, { - let mut last_wake_time = Instant::now(); - let mut retry_times = 0; - let backoff_duration = back_end.builder.cfg.value().raft_client_backoff_step.0; + let mut last_wake_time = None; + let mut first_time = true; + let backoff_duration = back_end.builder.cfg.value().raft_client_max_backoff.0; + let mut addr_channel = None; loop { - maybe_backoff(backoff_duration, &mut last_wake_time, &mut retry_times).await; - retry_times += 1; + maybe_backoff(backoff_duration, &mut last_wake_time).await; let f = back_end.resolve(); let addr = match f.await { Ok(addr) => { @@ -822,36 +833,65 @@ async fn start( continue; } }; - let client = back_end.connect(&addr); + + // reuse channel if the address is the same. + if addr_channel + .as_ref() + .map_or(true, |(_, prev_addr)| prev_addr != &addr) + { + addr_channel = Some((back_end.connect(&addr), addr.clone())); + } + let channel = addr_channel.as_ref().unwrap().0.clone(); + + debug!("connecting to store"; "store_id" => back_end.store_id, "addr" => %addr); + if !channel.wait_for_connected(backoff_duration).await { + error!("wait connect timeout"; "store_id" => back_end.store_id, "addr" => addr); + + // Clears pending messages to avoid consuming high memory when one node is + // shutdown. + back_end.clear_pending_message("unreachable"); + + // broadcast is time consuming operation which would blocks raftstore, so report + // unreachable only once until being connected again. + if first_time { + first_time = false; + back_end + .builder + .router + .broadcast_unreachable(back_end.store_id); + } + continue; + } else { + debug!("connection established"; "store_id" => back_end.store_id, "addr" => %addr); + } + + let client = TikvClient::new(channel); let f = back_end.batch_call(&client, addr.clone()); - let mut res = f.await; - if res == Ok(()) { - // If the call is setup successfully, it will never finish. Returning `Ok(())` - // means the batch_call is not supported, we are probably connect to - // an old version of TiKV. So we need to fallback to use legacy API. + let mut res = f.await; // block here until the stream call is closed or aborted. + if res == Ok(RaftCallRes::Fallback) { + // If the call is setup successfully, it will never finish. Returning + // `UnImplemented` means the batch_call is not supported, we are probably + // connect to an old version of TiKV. So we need to fallback to use + // legacy API. let f = back_end.call(&client, addr.clone()); res = f.await; } match res { - Ok(()) => { + Ok(RaftCallRes::Fallback) => { error!("connection fail"; "store_id" => back_end.store_id, "addr" => addr, "err" => "require fallback even with legacy API"); } - Err(_) => { + // Err(_) should be tx is dropped + Ok(RaftCallRes::Disconnected) | Err(_) => { error!("connection abort"; "store_id" => back_end.store_id, "addr" => addr); - if retry_times > 1 { - // Clears pending messages to avoid consuming high memory when one node is - // shutdown. - back_end.clear_pending_message("unreachable"); - } else { - // At least report failure in metrics. - REPORT_FAILURE_MSG_COUNTER - .with_label_values(&["unreachable", &back_end.store_id.to_string()]) - .inc_by(1); - } + REPORT_FAILURE_MSG_COUNTER + .with_label_values(&["unreachable", &back_end.store_id.to_string()]) + .inc_by(1); back_end .builder .router .broadcast_unreachable(back_end.store_id); + addr_channel = None; + first_time = false; } } } diff --git a/src/server/server.rs b/src/server/server.rs index 992b5cf6fa0..a4d82f1e347 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -340,7 +340,6 @@ pub mod test_router { use std::sync::mpsc::*; use engine_rocks::{RocksEngine, RocksSnapshot}; - use engine_traits::{KvEngine, Snapshot}; use kvproto::raft_serverpb::RaftMessage; use raftstore::{store::*, Result as RaftStoreResult}; @@ -348,13 +347,13 @@ pub mod test_router { #[derive(Clone)] pub struct TestRaftStoreRouter { - tx: Sender, + tx: Sender, StoreMsg>>, significant_msg_sender: Sender>, } impl TestRaftStoreRouter { pub fn new( - tx: Sender, + tx: Sender, StoreMsg>>, significant_msg_sender: Sender>, ) -> TestRaftStoreRouter { TestRaftStoreRouter { @@ -365,25 +364,26 @@ pub mod test_router { } impl StoreRouter for TestRaftStoreRouter { - fn send(&self, _: StoreMsg) -> RaftStoreResult<()> { - let _ = self.tx.send(1); + fn send(&self, msg: StoreMsg) -> RaftStoreResult<()> { + let _ = self.tx.send(Either::Right(msg)); Ok(()) } } - impl ProposalRouter for TestRaftStoreRouter { + impl ProposalRouter for TestRaftStoreRouter { fn send( &self, - _: RaftCommand, - ) -> std::result::Result<(), crossbeam::channel::TrySendError>> { - let _ = self.tx.send(1); + cmd: RaftCommand, + ) -> std::result::Result<(), crossbeam::channel::TrySendError>> + { + let _ = self.tx.send(Either::Left(PeerMsg::RaftCommand(cmd))); Ok(()) } } - impl CasualRouter for TestRaftStoreRouter { - fn send(&self, _: u64, _: CasualMessage) -> RaftStoreResult<()> { - let _ = self.tx.send(1); + impl CasualRouter for TestRaftStoreRouter { + fn send(&self, _: u64, msg: CasualMessage) -> RaftStoreResult<()> { + let _ = self.tx.send(Either::Left(PeerMsg::CasualMessage(msg))); Ok(()) } } @@ -400,13 +400,18 @@ pub mod test_router { } impl RaftStoreRouter for TestRaftStoreRouter { - fn send_raft_msg(&self, _: RaftMessage) -> RaftStoreResult<()> { - let _ = self.tx.send(1); + fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()> { + let _ = self + .tx + .send(Either::Left(PeerMsg::RaftMessage(InspectedRaftMessage { + heap_size: 0, + msg, + }))); Ok(()) } - fn broadcast_normal(&self, _: impl FnMut() -> PeerMsg) { - let _ = self.tx.send(1); + fn broadcast_normal(&self, mut f: impl FnMut() -> PeerMsg) { + let _ = self.tx.send(Either::Left(f())); } } } @@ -427,7 +432,7 @@ mod tests { }; use resource_metering::ResourceTagFactory; use security::SecurityConfig; - use tikv_util::quota_limiter::QuotaLimiter; + use tikv_util::{config::ReadableDuration, quota_limiter::QuotaLimiter}; use tokio::runtime::Builder as TokioBuilder; use super::{ @@ -487,6 +492,8 @@ mod tests { let mock_store_id = 5; let cfg = Config { addr: "127.0.0.1:0".to_owned(), + raft_client_max_backoff: ReadableDuration::millis(100), + raft_client_initial_reconnect_backoff: ReadableDuration::millis(100), ..Default::default() }; diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 90524079bfa..9bb2f7b88da 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -88,6 +88,8 @@ fn test_serde_custom_tikv_config() { max_grpc_send_msg_len: 6 * (1 << 20), raft_client_grpc_send_msg_buffer: 1234 * 1024, raft_client_queue_size: 1234, + raft_client_max_backoff: ReadableDuration::secs(5), + raft_client_initial_reconnect_backoff: ReadableDuration::secs(1), raft_msg_max_batch_size: 123, concurrent_send_snap_limit: 4, concurrent_recv_snap_limit: 4, @@ -117,7 +119,6 @@ fn test_serde_custom_tikv_config() { heavy_load_wait_duration: Some(ReadableDuration::millis(2)), enable_request_batch: false, background_thread_count: 999, - raft_client_backoff_step: ReadableDuration::secs(1), end_point_slow_log_threshold: ReadableDuration::secs(1), forward_max_connections_per_address: 5, reject_messages_on_memory_ratio: 0.8, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 17f82f9eb87..9c1837c1fbd 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -54,6 +54,7 @@ status-thread-pool-size = 1 max-grpc-send-msg-len = 6291456 raft-client-grpc-send-msg-buffer = 1263616 raft-client-queue-size = 1234 +raft-client-max-backoff = "5s" raft-msg-max-batch-size = 123 grpc-compression-type = "gzip" grpc-concurrency = 123 diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index 7ee38a72c87..edf4d0f1c65 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -23,14 +23,16 @@ use raft::eraftpb::Entry; use raftstore::{ errors::DiscardReason, router::{RaftStoreBlackHole, RaftStoreRouter}, + store::StoreMsg, }; use tikv::server::{ self, load_statistics::ThreadLoadPool, resolve, resolve::Callback, Config, ConnectionBuilder, RaftClient, StoreAddrResolver, TestRaftStoreRouter, }; use tikv_util::{ - config::VersionTrack, + config::{ReadableDuration, VersionTrack}, worker::{Builder as WorkerBuilder, LazyWorker}, + Either, }; use super::*; @@ -59,7 +61,10 @@ where T: StoreAddrResolver + 'static, { let env = Arc::new(Environment::new(2)); - let cfg = Arc::new(VersionTrack::new(Config::default())); + let mut config = Config::default(); + config.raft_client_max_backoff = ReadableDuration::millis(100); + config.raft_client_initial_reconnect_backoff = ReadableDuration::millis(100); + let cfg = Arc::new(VersionTrack::new(config)); let security_mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); let worker = LazyWorker::new("test-raftclient"); let loads = Arc::new(ThreadLoadPool::with_threshold(1000)); @@ -194,7 +199,6 @@ fn test_raft_client_reconnect() { raft_client.send(RaftMessage::default()).unwrap(); } raft_client.flush(); - rx.recv_timeout(Duration::from_secs(3)).unwrap(); // `send` should success after the mock server restarted. let service = MockKvForRaft::new(Arc::clone(&msg_count), batch_msg_count, true); @@ -207,6 +211,58 @@ fn test_raft_client_reconnect() { drop(mock_server); } +#[test] +// Test raft_client reports store unreachable only once until being connected +// again +fn test_raft_client_report_unreachable() { + let msg_count = Arc::new(AtomicUsize::new(0)); + let batch_msg_count = Arc::new(AtomicUsize::new(0)); + let service = MockKvForRaft::new(Arc::clone(&msg_count), Arc::clone(&batch_msg_count), true); + let (mut mock_server, port) = create_mock_server(service, 60100, 60200).unwrap(); + + let (tx, rx) = mpsc::channel(); + let (significant_msg_sender, _significant_msg_receiver) = mpsc::channel(); + let router = TestRaftStoreRouter::new(tx, significant_msg_sender); + let mut raft_client = get_raft_client(router, StaticResolver::new(port)); + + // server is disconnected + mock_server.shutdown(); + drop(mock_server); + + raft_client.send(RaftMessage::default()).unwrap(); + let msg = rx.recv_timeout(Duration::from_millis(200)).unwrap(); + if let Either::Right(StoreMsg::StoreUnreachable { store_id }) = msg { + assert_eq!(store_id, 0); + } else { + panic!("expect StoreUnreachable"); + } + // no more unreachable message is sent until it's connected again. + rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); + + // restart the mock server. + let service = MockKvForRaft::new(Arc::clone(&msg_count), batch_msg_count, true); + let mut mock_server = create_mock_server_on(service, port); + + // make sure the connection is connected, otherwise the following sent messages + // may be dropped + std::thread::sleep(Duration::from_millis(200)); + (0..50).for_each(|_| raft_client.send(RaftMessage::default()).unwrap()); + raft_client.flush(); + check_msg_count(500, &msg_count, 50); + + // server is disconnected + mock_server.take().unwrap().shutdown(); + + let msg = rx.recv_timeout(Duration::from_millis(200)).unwrap(); + if let Either::Right(StoreMsg::StoreUnreachable { store_id }) = msg { + assert_eq!(store_id, 0); + } else { + panic!("expect StoreUnreachable"); + } + // no more unreachable message is sent until it's connected again. + rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); +} + #[test] fn test_batch_size_limit() { let msg_count = Arc::new(AtomicUsize::new(0)); From e1ca10e4735d7e3cce29c1b1a9be895d31b051cc Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Wed, 2 Nov 2022 11:24:00 +0800 Subject: [PATCH 0303/1149] txn_types: add info about last change to Lock and Write (#13698) ref tikv/tikv#13694 This commit adds support for serializing and parsing the infomation about the last change stored in the lock and write CF. Signed-off-by: Yilin Chen --- Cargo.lock | 2 +- Cargo.toml | 2 +- components/pd_client/src/util.rs | 1 + components/raftstore/src/store/txn_ext.rs | 8 +- components/tikv_kv/src/lib.rs | 4 + components/txn_types/src/lock.rs | 91 +++++++++++++++++-- components/txn_types/src/write.rs | 51 +++++++++++ src/storage/mod.rs | 2 + .../txn/actions/acquire_pessimistic_lock.rs | 5 + src/storage/types.rs | 8 ++ tests/failpoints/cases/test_merge.rs | 8 ++ tests/failpoints/cases/test_split_region.rs | 4 + tests/failpoints/cases/test_transaction.rs | 2 + .../failpoints/cases/test_transfer_leader.rs | 6 ++ tests/integrations/raftstore/test_merge.rs | 6 ++ tests/integrations/raftstore/test_multi.rs | 2 + .../raftstore/test_split_region.rs | 4 + .../raftstore/test_transfer_leader.rs | 4 + 18 files changed, 196 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc757a3ecdf..f1152b2002e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2694,7 +2694,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#26e28e6a281abb927f91ef992eb8f93b39698ffa" +source = "git+https://github.com/pingcap/kvproto.git#65d0ae8fa853c1e41b43f329afbf60616bdd4d18" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/Cargo.toml b/Cargo.toml index d95dd1c67c1..756f36a0c50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -221,7 +221,7 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # kvproto at the same time. # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. [patch.'https://github.com/pingcap/kvproto'] -# kvproto = { git = "https://github.com/your_github_id/kvproto", branch="your_branch" } +# kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 2aa74176627..da77783c167 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -854,6 +854,7 @@ pub fn check_resp_header(header: &ResponseHeader) -> Result<()> { ErrorType::Ok => Ok(()), ErrorType::DuplicatedEntry | ErrorType::EntryNotFound => Err(box_err!(err.get_message())), ErrorType::Unknown => Err(box_err!(err.get_message())), + ErrorType::InvalidValue => Err(box_err!(err.get_message())), } } diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 1270ae104c9..ccc4027e9d1 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -322,8 +322,10 @@ mod tests { primary: primary.to_vec().into_boxed_slice(), start_ts: 100.into(), ttl: 3000, - for_update_ts: 100.into(), - min_commit_ts: Default::default(), + for_update_ts: 110.into(), + min_commit_ts: 110.into(), + last_change_ts: 105.into(), + versions_to_last_change: 2, } } @@ -424,6 +426,8 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), + last_change_ts: 5.into(), + versions_to_last_change: 2, }, deleted, ), diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 77f9a00efcb..9d4eb4a8370 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -1183,6 +1183,8 @@ mod unit_tests { ttl: 200, for_update_ts: 101.into(), min_commit_ts: 102.into(), + last_change_ts: 80.into(), + versions_to_last_change: 2, }, ), Modify::DeleteRange( @@ -1225,6 +1227,8 @@ mod unit_tests { ttl: 200, for_update_ts: 101.into(), min_commit_ts: 102.into(), + last_change_ts: 80.into(), + versions_to_last_change: 2, } .into_lock() .to_bytes(), diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 96c96828bcb..3e666c29e40 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -33,6 +33,7 @@ const TXN_SIZE_PREFIX: u8 = b't'; const MIN_COMMIT_TS_PREFIX: u8 = b'c'; const ASYNC_COMMIT_PREFIX: u8 = b'a'; const ROLLBACK_TS_PREFIX: u8 = b'r'; +const LAST_CHANGE_PREFIX: u8 = b'l'; impl LockType { pub fn from_mutation(mutation: &Mutation) -> Option { @@ -85,6 +86,12 @@ pub struct Lock { // while committing is relatively expensive. So the solution is putting the ts of the rollback // to the lock. pub rollback_ts: Vec, + + /// The commit TS of the latest PUT/DELETE record + pub last_change_ts: TimeStamp, + /// The number of versions that need skipping from the latest version to + /// find the latest PUT/DELETE record + pub versions_to_last_change: u64, } impl std::fmt::Debug for Lock { @@ -108,6 +115,8 @@ impl std::fmt::Debug for Lock { .field("use_async_commit", &self.use_async_commit) .field("secondaries", &secondary_keys) .field("rollback_ts", &self.rollback_ts) + .field("last_change_ts", &self.last_change_ts) + .field("versions_to_last_change", &self.versions_to_last_change) .finish() } } @@ -135,6 +144,8 @@ impl Lock { use_async_commit: false, secondaries: Vec::default(), rollback_ts: Vec::default(), + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, } } @@ -151,6 +162,17 @@ impl Lock { self } + #[must_use] + pub fn set_last_change( + mut self, + last_change_ts: TimeStamp, + versions_to_last_change: u64, + ) -> Self { + self.last_change_ts = last_change_ts; + self.versions_to_last_change = versions_to_last_change; + self + } + pub fn to_bytes(&self) -> Vec { let mut b = Vec::with_capacity(self.pre_allocate_size()); b.push(self.lock_type.to_u8()); @@ -188,6 +210,11 @@ impl Lock { b.encode_u64(ts.into_inner()).unwrap(); } } + if !self.last_change_ts.is_zero() { + b.push(LAST_CHANGE_PREFIX); + b.encode_u64(self.last_change_ts.into_inner()).unwrap(); + b.encode_var_u64(self.versions_to_last_change).unwrap(); + } b } @@ -217,6 +244,9 @@ impl Lock { if !self.rollback_ts.is_empty() { size += 1 + MAX_VAR_U64_LEN + size_of::() * self.rollback_ts.len(); } + if !self.last_change_ts.is_zero() { + size += 1 + size_of::() + MAX_VAR_U64_LEN; + } size } @@ -253,6 +283,8 @@ impl Lock { let mut use_async_commit = false; let mut secondaries = Vec::new(); let mut rollback_ts = Vec::new(); + let mut last_change_ts = TimeStamp::zero(); + let mut versions_to_last_change = 0; while !b.is_empty() { match b.read_u8()? { SHORT_VALUE_PREFIX => { @@ -286,6 +318,10 @@ impl Lock { rollback_ts.push(number::decode_u64(&mut b)?.into()); } } + LAST_CHANGE_PREFIX => { + last_change_ts = number::decode_u64(&mut b)?.into(); + versions_to_last_change = number::decode_var_u64(&mut b)?; + } _ => { // To support forward compatibility, all fields should be serialized in order // and stop parsing if meets an unknown byte. @@ -302,7 +338,8 @@ impl Lock { for_update_ts, txn_size, min_commit_ts, - ); + ) + .set_last_change(last_change_ts, versions_to_last_change); if use_async_commit { lock = lock.use_async_commit(secondaries); } @@ -328,6 +365,7 @@ impl Lock { info.set_use_async_commit(self.use_async_commit); info.set_min_commit_ts(self.min_commit_ts.into_inner()); info.set_secondaries(self.secondaries.into()); + // The client does not care about last_change_ts and versions_to_last_version. info } @@ -434,6 +472,9 @@ pub struct PessimisticLock { pub ttl: u64, pub for_update_ts: TimeStamp, pub min_commit_ts: TimeStamp, + + pub last_change_ts: TimeStamp, + pub versions_to_last_change: u64, } impl PessimisticLock { @@ -448,6 +489,7 @@ impl PessimisticLock { 0, self.min_commit_ts, ) + .set_last_change(self.last_change_ts, self.versions_to_last_change) } // Same with `to_lock` but does not copy the primary key. @@ -462,6 +504,7 @@ impl PessimisticLock { 0, self.min_commit_ts, ) + .set_last_change(self.last_change_ts, self.versions_to_last_change) } pub fn memory_size(&self) -> usize { @@ -477,6 +520,8 @@ impl std::fmt::Debug for PessimisticLock { .field("ttl", &self.ttl) .field("for_update_ts", &self.for_update_ts) .field("min_commit_ts", &self.min_commit_ts) + .field("last_change_ts", &self.last_change_ts) + .field("versions_to_last_change", &self.versions_to_last_change) .finish() } } @@ -687,6 +732,17 @@ mod tests { 555.into(), ) .with_rollback_ts(vec![12.into(), 24.into(), 13.into()]), + Lock::new( + LockType::Lock, + b"pk".to_vec(), + 1.into(), + 10, + None, + 6.into(), + 16, + 8.into(), + ) + .set_last_change(4.into(), 2), ]; for (i, lock) in locks.drain(..).enumerate() { let v = lock.to_bytes(); @@ -931,7 +987,8 @@ mod tests { b"secondary_kkkkk2".to_vec(), b"secondary_k3k3k3k3k3k3".to_vec(), b"secondary_k4".to_vec(), - ]); + ]) + .set_last_change(80.into(), 4); assert_eq!( format!("{:?}", lock), @@ -939,7 +996,8 @@ mod tests { short_value: 73686F72745F76616C7565, for_update_ts: TimeStamp(101), txn_size: 10, \ min_commit_ts: TimeStamp(127), use_async_commit: true, \ secondaries: [7365636F6E646172795F6B31, 7365636F6E646172795F6B6B6B6B6B32, \ - 7365636F6E646172795F6B336B336B336B336B336B33, 7365636F6E646172795F6B34], rollback_ts: [] }" + 7365636F6E646172795F6B336B336B336B336B336B33, 7365636F6E646172795F6B34], rollback_ts: [], \ + last_change_ts: TimeStamp(80), versions_to_last_change: 4 }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -948,7 +1006,8 @@ mod tests { redact_result, "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, \ short_value: ?, for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ - use_async_commit: true, secondaries: [?, ?, ?, ?], rollback_ts: [] }" + use_async_commit: true, secondaries: [?, ?, ?, ?], rollback_ts: [], \ + last_change_ts: TimeStamp(80), versions_to_last_change: 4 }" ); lock.short_value = None; @@ -957,7 +1016,8 @@ mod tests { format!("{:?}", lock), "Lock { lock_type: Put, primary_key: 706B, start_ts: TimeStamp(100), ttl: 3, short_value: , \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ - use_async_commit: true, secondaries: [], rollback_ts: [] }" + use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ + versions_to_last_change: 4 }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -966,7 +1026,8 @@ mod tests { redact_result, "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, short_value: ?, \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ - use_async_commit: true, secondaries: [], rollback_ts: [] }" + use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ + versions_to_last_change: 4 }" ); } @@ -978,6 +1039,8 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), + last_change_ts: 8.into(), + versions_to_last_change: 2, }; let expected_lock = Lock { lock_type: LockType::Pessimistic, @@ -991,6 +1054,8 @@ mod tests { use_async_commit: false, secondaries: vec![], rollback_ts: vec![], + last_change_ts: 8.into(), + versions_to_last_change: 2, }; assert_eq!(pessimistic_lock.to_lock(), expected_lock); assert_eq!(pessimistic_lock.into_lock(), expected_lock); @@ -1004,11 +1069,14 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), + last_change_ts: 8.into(), + versions_to_last_change: 2, }; assert_eq!( format!("{:?}", pessimistic_lock), "PessimisticLock { primary_key: 7072696D617279, start_ts: TimeStamp(5), ttl: 1000, \ - for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20) }" + for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), last_change_ts: TimeStamp(8), \ + versions_to_last_change: 2 }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", pessimistic_lock); @@ -1016,7 +1084,8 @@ mod tests { assert_eq!( redact_result, "PessimisticLock { primary_key: ?, start_ts: TimeStamp(5), ttl: 1000, \ - for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20) }" + for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), last_change_ts: TimeStamp(8), \ + versions_to_last_change: 2 }" ); } @@ -1028,8 +1097,10 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), + last_change_ts: 8.into(), + versions_to_last_change: 2, }; - // 7 bytes for primary key, 16 bytes for Box<[u8]>, and 4 8-byte integers. - assert_eq!(lock.memory_size(), 7 + 16 + 4 * 8); + // 7 bytes for primary key, 16 bytes for Box<[u8]>, and 6 8-byte integers. + assert_eq!(lock.memory_size(), 7 + 16 + 6 * 8); } } diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 755207ed3f3..411295de9ee 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -28,6 +28,7 @@ const FLAG_ROLLBACK: u8 = b'R'; const FLAG_OVERLAPPED_ROLLBACK: u8 = b'R'; const GC_FENCE_PREFIX: u8 = b'F'; +const LAST_CHANGE_PREFIX: u8 = b'l'; /// The short value for rollback records which are protected from being /// collapsed. @@ -150,6 +151,12 @@ pub struct Write { /// * `Some(ts)`: A commit record that has been rewritten due to overlapping /// rollback, and it's next version's `commit_ts` is `ts` pub gc_fence: Option, + + /// The commit TS of the latest PUT/DELETE record + pub last_change_ts: TimeStamp, + /// The number of versions that need skipping from this record + /// to find the latest PUT/DELETE record + pub versions_to_last_change: u64, } impl std::fmt::Debug for Write { @@ -169,6 +176,8 @@ impl std::fmt::Debug for Write { ) .field("has_overlapped_rollback", &self.has_overlapped_rollback) .field("gc_fence", &self.gc_fence) + .field("last_change_ts", &self.last_change_ts) + .field("versions_to_last_change", &self.versions_to_last_change) .finish() } } @@ -183,6 +192,8 @@ impl Write { short_value, has_overlapped_rollback: false, gc_fence: None, + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, } } @@ -200,6 +211,8 @@ impl Write { short_value, has_overlapped_rollback: false, gc_fence: None, + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, } } @@ -215,6 +228,17 @@ impl Write { self } + #[must_use] + pub fn set_last_change( + mut self, + last_change_ts: TimeStamp, + versions_to_last_change: u64, + ) -> Self { + self.last_change_ts = last_change_ts; + self.versions_to_last_change = versions_to_last_change; + self + } + #[inline] pub fn parse_type(mut b: &[u8]) -> Result { let write_type_bytes = b @@ -231,6 +255,8 @@ impl Write { short_value: self.short_value.as_deref(), has_overlapped_rollback: self.has_overlapped_rollback, gc_fence: self.gc_fence, + last_change_ts: self.last_change_ts, + versions_to_last_change: self.versions_to_last_change, } } } @@ -255,6 +281,13 @@ pub struct WriteRef<'a> { /// /// See [`Write::gc_fence`] for more detail. pub gc_fence: Option, + + /// The commit TS of the last PUT/DELETE record before this write record. + /// It only exists if this is a LOCK/ROLLBACK record. + pub last_change_ts: TimeStamp, + /// The number of versions that need skipping from this record + /// to find the latest PUT/DELETE record + pub versions_to_last_change: u64, } impl WriteRef<'_> { @@ -272,6 +305,8 @@ impl WriteRef<'_> { let mut short_value = None; let mut has_overlapped_rollback = false; let mut gc_fence = None; + let mut last_change_ts = TimeStamp::zero(); + let mut versions_to_last_change = 0; while !b.is_empty() { match b @@ -296,6 +331,10 @@ impl WriteRef<'_> { has_overlapped_rollback = true; } GC_FENCE_PREFIX => gc_fence = Some(number::decode_u64(&mut b)?.into()), + LAST_CHANGE_PREFIX => { + last_change_ts = number::decode_u64(&mut b)?.into(); + versions_to_last_change = number::decode_var_u64(&mut b)?; + } _ => { // To support forward compatibility, all fields should be serialized in order // and stop parsing if meets an unknown byte. @@ -310,6 +349,8 @@ impl WriteRef<'_> { short_value, has_overlapped_rollback, gc_fence, + last_change_ts, + versions_to_last_change, }) } @@ -329,6 +370,11 @@ impl WriteRef<'_> { b.push(GC_FENCE_PREFIX); b.encode_u64(ts.into_inner()).unwrap(); } + if !self.last_change_ts.is_zero() { + b.push(LAST_CHANGE_PREFIX); + b.encode_u64(self.last_change_ts.into_inner()).unwrap(); + b.encode_var_u64(self.versions_to_last_change).unwrap(); + } b } @@ -341,6 +387,9 @@ impl WriteRef<'_> { if self.gc_fence.is_some() { size += 1 + size_of::(); } + if !self.last_change_ts.is_zero() { + size += 1 + size_of::() + MAX_VAR_U64_LEN; + } size } @@ -389,6 +438,7 @@ impl WriteRef<'_> { self.short_value.map(|v| v.to_owned()), ) .set_overlapped_rollback(self.has_overlapped_rollback, self.gc_fence) + .set_last_change(self.last_change_ts, self.versions_to_last_change) } } @@ -447,6 +497,7 @@ mod tests { .set_overlapped_rollback(true, Some(2345678.into())), Write::new(WriteType::Put, 456.into(), Some(b"short_value".to_vec())) .set_overlapped_rollback(true, Some(421397468076048385.into())), + Write::new(WriteType::Lock, 456.into(), None).set_last_change(345.into(), 11), ]; for (i, write) in writes.drain(..).enumerate() { let v = write.as_ref().to_bytes(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 33d1c4ddf97..3ce45689c49 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -9610,6 +9610,8 @@ mod tests { ttl: 3000, for_update_ts: 10.into(), min_commit_ts: 11.into(), + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, }, false ) diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 7c2f41d3e1b..e77e8b7ff59 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -142,6 +142,8 @@ pub fn acquire_pessimistic_lock( ttl: lock_ttl, for_update_ts, min_commit_ts, + last_change_ts: lock.last_change_ts, + versions_to_last_change: lock.versions_to_last_change, }; txn.put_pessimistic_lock(key, lock); } else { @@ -256,6 +258,9 @@ pub fn acquire_pessimistic_lock( ttl: lock_ttl, for_update_ts, min_commit_ts, + // TODO: calculate the two fields below from the latest write record + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, }; // When lock_only_if_exists is false, always accquire pessimitic lock, otherwise diff --git a/src/storage/types.rs b/src/storage/types.rs index c8303787a41..07219435800 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -52,6 +52,10 @@ impl MvccInfo { write_info.set_start_ts(write.start_ts.into_inner()); write_info.set_commit_ts(commit_ts.into_inner()); write_info.set_short_value(write.short_value.unwrap_or_default()); + if !write.last_change_ts.is_zero() { + write_info.set_last_change_ts(write.last_change_ts.into_inner()); + write_info.set_versions_to_last_change(write.versions_to_last_change); + } write_info }) .collect() @@ -70,6 +74,10 @@ impl MvccInfo { lock_info.set_start_ts(lock.ts.into_inner()); lock_info.set_primary(lock.primary); lock_info.set_short_value(lock.short_value.unwrap_or_default()); + if !lock.last_change_ts.is_zero() { + lock_info.set_last_change_ts(lock.last_change_ts.into_inner()); + lock_info.set_versions_to_last_change(lock.versions_to_last_change); + } mvcc_info.set_lock(lock_info); } let vv = extract_2pc_values(self.values); diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index c602fc6e4f7..fa4f6e9cb42 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1346,6 +1346,8 @@ fn test_merge_with_concurrent_pessimistic_locking() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 15.into(), + versions_to_last_change: 3, }, )]) .unwrap(); @@ -1433,6 +1435,8 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 15.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks @@ -1512,6 +1516,8 @@ fn test_retry_pending_prepare_merge_fail() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 15.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks @@ -1586,6 +1592,8 @@ fn test_merge_pessimistic_locks_propose_fail() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 15.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 9ed57b94091..416116c833b 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -943,6 +943,8 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { ttl: 3000, for_update_ts: (commit_ts + 10).into(), min_commit_ts: (commit_ts + 10).into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; let lock_c = PessimisticLock { primary: b"c".to_vec().into_boxed_slice(), @@ -950,6 +952,8 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { ttl: 3000, for_update_ts: (commit_ts + 10).into(), min_commit_ts: (commit_ts + 10).into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; { let mut locks = txn_ext.pessimistic_locks.write(); diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index e42a44047a4..564b5f393ec 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -566,6 +566,8 @@ fn test_concurrent_write_after_transfer_leader_invalidates_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index cc6b043f0e5..ed4a8501188 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -134,6 +134,8 @@ fn test_delete_lock_proposed_after_proposing_locks_impl(transfer_msg_count: usiz ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }, )]) .unwrap(); @@ -211,6 +213,8 @@ fn test_delete_lock_proposed_before_proposing_locks() { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }, )]) .unwrap(); @@ -293,6 +297,8 @@ fn test_read_lock_after_become_follower() { ttl: 1000, for_update_ts, min_commit_ts: for_update_ts, + last_change_ts: start_ts.prev(), + versions_to_last_change: 1, }, )]) .unwrap(); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 48adb2eb84c..c72ba5ac595 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1298,6 +1298,8 @@ fn test_propose_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks @@ -1314,6 +1316,8 @@ fn test_propose_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks @@ -1421,6 +1425,8 @@ fn test_merge_pessimistic_locks_repeated_merge() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; txn_ext .pessimistic_locks diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index 2cda3b8a0b8..ef368bbe0cb 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -833,6 +833,8 @@ fn test_leader_drop_with_pessimistic_lock() { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 10.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }, )]) .unwrap(); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 6ac72f668db..10771c57863 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -963,6 +963,8 @@ fn test_split_with_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; let lock_c = PessimisticLock { primary: b"c".to_vec().into_boxed_slice(), @@ -970,6 +972,8 @@ fn test_split_with_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; { let mut locks = txn_ext.pessimistic_locks.write(); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index b0fade84d8b..b4f8c33d54d 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -304,6 +304,8 @@ fn test_propose_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; // Write a pessimistic lock to the in-memory pessimistic lock table. { @@ -344,6 +346,8 @@ fn test_memory_pessimistic_locks_status_after_transfer_leader_failure() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, }; // Write a pessimistic lock to the in-memory pessimistic lock table. txn_ext From de73806c165f31d728b323af8cd5c707500478b3 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 2 Nov 2022 14:15:59 +0800 Subject: [PATCH 0304/1149] raftstore: restrict the total write size of each apply round (#13594) ref tikv/tikv#13313 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/config.rs | 12 ++ components/raftstore/src/store/fsm/apply.rs | 106 ++++++++++++++++-- .../integrations/config/dynamic/raftstore.rs | 2 + tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 5 files changed, 115 insertions(+), 7 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 4d9cd73d207..cbd83d0b85d 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -218,6 +218,14 @@ pub struct Config { pub dev_assert: bool, #[online_config(hidden)] pub apply_yield_duration: ReadableDuration, + /// yield the fsm when apply flushed data size exceeds this threshold. + /// the yield is check after commit, so the actual handled messages can be + /// bigger than the configed value. + // NOTE: the default value is much smaller than the default max raft batch msg size(0.2 + // * raft_entry_max_size), this is intentional because in the common case, a raft entry + // is unlikely to exceed this threshold, but in case when raftstore is the bottleneck, + // we still allow big raft batch for better throughput. + pub apply_yield_write_size: ReadableSize, #[serde(with = "perf_level_serde")] #[online_config(skip)] @@ -386,6 +394,7 @@ impl Default for Config { hibernate_regions: true, dev_assert: false, apply_yield_duration: ReadableDuration::millis(500), + apply_yield_write_size: ReadableSize::kb(32), perf_level: PerfLevel::Uninitialized, evict_cache_on_memory_ratio: 0.0, cmd_batch: true, @@ -898,6 +907,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["local_read_batch_size"]) .set(self.local_read_batch_size as f64); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["apply_yield_write_size"]) + .set(self.apply_yield_write_size.0 as f64); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["apply_max_batch_size"]) .set(self.apply_batch_system.max_batch_size() as f64); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index c8fee703e63..a5da7b9c9f1 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -378,6 +378,7 @@ where perf_context: EK::PerfContext, yield_duration: Duration, + yield_msg_size: u64, store_id: u64, /// region_id -> (peer_id, is_splitting) @@ -467,6 +468,7 @@ where use_delete_range: cfg.use_delete_range, perf_context: engine.get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply), yield_duration: cfg.apply_yield_duration.0, + yield_msg_size: cfg.apply_yield_write_size.0, delete_ssts: vec![], pending_delete_ssts: vec![], store_id, @@ -635,7 +637,7 @@ where apply_state: delegate.apply_state.clone(), write_seqno: mem::take(&mut delegate.unfinished_write_seqno), exec_res: results, - metrics: delegate.metrics.clone(), + metrics: mem::take(&mut delegate.metrics), applied_term: delegate.applied_term, bucket_stat: delegate.buckets.clone().map(Box::new), }); @@ -1136,10 +1138,14 @@ where && apply_ctx.host.pre_persist(&self.region, false, Some(&cmd)) { apply_ctx.commit(self); - if let Some(start) = self.handle_start.as_ref() { - if start.saturating_elapsed() >= apply_ctx.yield_duration { - return ApplyResult::Yield; - } + if self.metrics.written_bytes >= apply_ctx.yield_msg_size + || self + .handle_start + .as_ref() + .map_or(Duration::ZERO, Instant::saturating_elapsed) + >= apply_ctx.yield_duration + { + return ApplyResult::Yield; } has_unflushed_data = false; } @@ -3576,7 +3582,6 @@ where RAFT_ENTRIES_CACHES_GAUGE.sub(dangle_size as i64); } - self.delegate.metrics = ApplyMetrics::default(); self.delegate.term = apply.term; if let Some(meta) = apply.bucket_meta.clone() { let buckets = self @@ -4096,6 +4101,7 @@ where } _ => {} } + self.apply_ctx.yield_msg_size = incoming.apply_yield_write_size.0; update_cfg(&incoming.apply_batch_system); } } @@ -4535,7 +4541,7 @@ mod tests { use tempfile::{Builder, TempDir}; use test_sst_importer::*; use tikv_util::{ - config::VersionTrack, + config::{ReadableSize, VersionTrack}, store::{new_learner_peer, new_peer}, worker::dummy_scheduler, }; @@ -5596,6 +5602,92 @@ mod tests { system.shutdown(); } + #[test] + fn test_apply_yield_with_msg_size() { + let (_path, engine) = create_tmp_engine("test-apply-yield"); + let (_import_dir, importer) = create_tmp_importer("test-apply-yield"); + let obs = ApplyObserver::default(); + let mut host = CoprocessorHost::::default(); + host.registry + .register_query_observer(1, BoxQueryObserver::new(obs)); + + let (tx, rx) = mpsc::channel(); + let (region_scheduler, _) = dummy_scheduler(); + let sender = Box::new(TestNotifier { tx }); + let cfg = Arc::new(VersionTrack::new(Config::default())); + let (router, mut system) = create_apply_batch_system(&cfg.value()); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "test-store".to_owned(), + cfg: cfg.clone(), + sender, + region_scheduler, + coprocessor_host: host, + importer, + engine, + router: router.clone(), + store_id: 1, + pending_create_peers, + }; + system.spawn("test-handle-raft".to_owned(), builder); + + let peer_id = 3; + let mut reg = Registration { + id: peer_id, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.mut_peers().push(new_peer(2, 3)); + reg.region.set_end_key(b"k5".to_vec()); + reg.region.mut_region_epoch().set_conf_ver(1); + reg.region.mut_region_epoch().set_version(3); + router.schedule_task(1, Msg::Registration(reg)); + + let schedule_apply = |idx: u64, count: usize, size: usize| { + let mut entries = Vec::with_capacity(count); + for i in 0..count { + let put_entry = EntryBuilder::new(idx + i as u64, 3) + .put(format!("k{:03}", i).as_ref(), &vec![0; size - 4]) + .epoch(1, 3) + .build(); + entries.push(put_entry); + } + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 3, entries, vec![]))); + }; + + fn approximate_eq(a: u64, b: u64, delta: u64) { + assert!( + a >= b - delta && a <= b + delta, + "left: {}, right: {}, delta: {}", + a, + b, + delta + ); + } + + // schedule a batch with 512 keys and 64k total size will trigger 2 flush and + // yield. + schedule_apply(1, 512, 128); + let apply_res = fetch_apply_res(&rx); + approximate_eq(apply_res.metrics.written_bytes, 32768, 2048); + approximate_eq(apply_res.metrics.written_keys, 256, 15); + // the second part, note that resume apply not clean up the metrics + let apply_res = fetch_apply_res(&rx); + approximate_eq(apply_res.metrics.written_bytes, 32768, 2048); + approximate_eq(apply_res.metrics.written_keys, 256, 15); + + // update apply yeild size to 64kb + _ = cfg.update(|c| { + c.apply_yield_write_size = ReadableSize::kb(64); + Ok::<(), ()>(()) + }); + // only trigger one time of + schedule_apply(513, 512, 128); + let apply_res = fetch_apply_res(&rx); + approximate_eq(apply_res.metrics.written_bytes, 65536, 4096); + approximate_eq(apply_res.metrics.written_keys, 512, 20); + } + #[test] fn test_handle_ingest_sst() { let (_path, engine) = create_tmp_engine("test-ingest"); diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 38fdf5c175c..03bc7ba46c1 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -162,6 +162,7 @@ fn test_update_raftstore_config() { ("raftstore.apply-max-batch-size", "1234"), ("raftstore.store-max-batch-size", "4321"), ("raftstore.raft-entry-max-size", "32MiB"), + ("raftstore.apply-yield-write-size", "10KiB"), ]); cfg_controller.update(change).unwrap(); @@ -169,6 +170,7 @@ fn test_update_raftstore_config() { // config should be updated let mut raft_store = config.raft_store; raft_store.messages_per_tick = 12345; + raft_store.apply_yield_write_size = ReadableSize::kb(10); raft_store.raft_log_gc_threshold = 54321; raft_store.apply_batch_system.max_batch_size = Some(1234); raft_store.store_batch_system.max_batch_size = Some(4321); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 9bb2f7b88da..a61b66e1436 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -231,6 +231,7 @@ fn test_serde_custom_tikv_config() { hibernate_regions: false, dev_assert: true, apply_yield_duration: ReadableDuration::millis(333), + apply_yield_write_size: ReadableSize(12345), perf_level: PerfLevel::Disable, evict_cache_on_memory_ratio: 0.8, cmd_batch: false, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 9c1837c1fbd..a041b696158 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -190,6 +190,7 @@ merge-check-tick-interval = "11s" use-delete-range = true cleanup-import-sst-interval = "12m" local-read-batch-size = 33 +apply-yield-write-size = "12345B" apply-max-batch-size = 22 apply-pool-size = 4 apply-reschedule-duration = "3s" From 9f707fd941819f0bd22b2b03ea02f3e98d84a024 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Wed, 2 Nov 2022 15:44:00 +0800 Subject: [PATCH 0305/1149] storage: calculate last_change_ts in acquire_pessimistic_lock (#13717) ref tikv/tikv#13694 Information about the last change is calculated and stored in the pessimistic lock. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- components/txn_types/src/write.rs | 19 +++ src/storage/mvcc/reader/scanner/forward.rs | 34 +++-- .../txn/actions/acquire_pessimistic_lock.rs | 118 +++++++++++++++++- 3 files changed, 159 insertions(+), 12 deletions(-) diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 411295de9ee..0c0994640d2 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -259,6 +259,25 @@ impl Write { versions_to_last_change: self.versions_to_last_change, } } + + /// Returns the new `last_change_ts` and `versions_to_last_change` according + /// to this write record. + pub fn next_last_change_info(&self, commit_ts: TimeStamp) -> (TimeStamp, u64) { + match self.write_type { + WriteType::Put | WriteType::Delete => (commit_ts, 1), + WriteType::Lock | WriteType::Rollback => { + // If `last_change_ts` is zero, do not set `last_change_ts` to indicate we don't + // know where is the last change. + // This should not happen if data is written in new version TiKV. If we hope to + // support data from old TiKV, consider iterating to the last change to find it. + if !self.last_change_ts.is_zero() { + (self.last_change_ts, self.versions_to_last_change + 1) + } else { + (TimeStamp::zero(), 0) + } + } + } + } } #[derive(PartialEq, Clone)] diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index c59c20fbe05..5d9d1b9bb83 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -886,6 +886,8 @@ pub mod test_util { pub commit_ts: TimeStamp, pub for_update_ts: TimeStamp, pub old_value: OldValue, + pub last_change_ts: TimeStamp, + pub versions_to_last_change: u64, } impl Default for EntryBuilder { @@ -898,6 +900,8 @@ pub mod test_util { commit_ts: 0.into(), for_update_ts: 0.into(), old_value: OldValue::None, + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, } } } @@ -931,6 +935,15 @@ pub mod test_util { self.old_value = OldValue::value(old_value.to_owned()); self } + pub fn last_change( + &mut self, + last_change_ts: TimeStamp, + versions_to_last_change: u64, + ) -> &mut Self { + self.last_change_ts = last_change_ts; + self.versions_to_last_change = versions_to_last_change; + self + } pub fn build_commit(&self, wt: WriteType, is_short_value: bool) -> TxnEntry { let write_key = Key::from_raw(&self.key).append_ts(self.commit_ts); let (key, value, short) = if is_short_value { @@ -949,7 +962,8 @@ pub mod test_util { None, ) }; - let write_value = Write::new(wt, self.start_ts, short); + let write_value = Write::new(wt, self.start_ts, short) + .set_last_change(self.last_change_ts, self.versions_to_last_change); TxnEntry::Commit { default: (key, value), write: (write_key.into_encoded(), write_value.as_ref().to_bytes()), @@ -984,7 +998,8 @@ pub mod test_util { self.for_update_ts, 0, 0.into(), - ); + ) + .set_last_change(self.last_change_ts, self.versions_to_last_change); TxnEntry::Prewrite { default: (key, value), lock: (lock_key.into_encoded(), lock_value.to_bytes()), @@ -2426,11 +2441,15 @@ mod delta_entry_tests { let mut entries_of_key = vec![]; if let Some((ts, lock_type, value)) = lock { - let max_commit_ts = writes - .last() - .cloned() - .map(|(_, commit_ts, ..)| commit_ts) - .unwrap_or(0); + let last_write = writes.last(); + let max_commit_ts = + last_write.map(|(_, commit_ts, ..)| *commit_ts).unwrap_or(0); + let (mut last_change_ts, mut versions_to_last_change) = (0,0); + // TODO: Remove `*lock_type == LockType::Pessimistic` after calculating last_change_ts for prewrite. + if *lock_type == LockType::Pessimistic && + let Some((_, commit_ts, WriteType::Put | WriteType::Delete, _)) = last_write { + (last_change_ts, versions_to_last_change) = (*commit_ts, 1); + } let for_update_ts = std::cmp::max(*ts, max_commit_ts + 1); if *ts <= to_ts { @@ -2441,6 +2460,7 @@ mod delta_entry_tests { .for_update_ts(for_update_ts.into()) .primary(key) .value(&value) + .last_change(last_change_ts.into(), versions_to_last_change) .build_prewrite(*lock_type, is_short_value(&value)); entries_of_key.push(entry); } diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index e77e8b7ff59..656b75bfbde 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -156,6 +156,8 @@ pub fn acquire_pessimistic_lock( // Following seek_write read the previous write. let (prev_write_loaded, mut prev_write) = (true, None); + let mut last_change_ts = TimeStamp::zero(); + let mut versions_to_last_change = 0; if let Some((commit_ts, write)) = reader.seek_write(&key, TimeStamp::max())? { // Find a previous write. if need_old_value { @@ -216,6 +218,8 @@ pub fn acquire_pessimistic_lock( // Check data constraint when acquiring pessimistic lock. check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; + (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); + if need_value || need_check_existence { val = match write.write_type { // If it's a valid Write, no need to read again. @@ -258,9 +262,8 @@ pub fn acquire_pessimistic_lock( ttl: lock_ttl, for_update_ts, min_commit_ts, - // TODO: calculate the two fields below from the latest write record - last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + last_change_ts, + versions_to_last_change, }; // When lock_only_if_exists is false, always accquire pessimitic lock, otherwise @@ -278,7 +281,7 @@ pub mod tests { use kvproto::kvrpcpb::Context; #[cfg(test)] use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; - use txn_types::TimeStamp; + use txn_types::{Lock, TimeStamp}; use super::*; use crate::storage::{ @@ -508,13 +511,14 @@ pub mod tests { key: &[u8], start_ts: impl Into, for_update_ts: impl Into, - ) { + ) -> Lock { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new(snapshot, None, true); let lock = reader.load_lock(&Key::from_raw(key)).unwrap().unwrap(); assert_eq!(lock.ts, start_ts.into()); assert_eq!(lock.for_update_ts, for_update_ts.into()); assert_eq!(lock.lock_type, LockType::Pessimistic); + lock } #[test] @@ -1465,4 +1469,108 @@ pub mod tests { } } } + + #[test] + fn test_calculate_last_change_ts() { + use engine_traits::CF_WRITE; + + let mut engine = TestEngineBuilder::new().build().unwrap(); + let key = b"k"; + + // Latest version is a PUT + let write = Write::new(WriteType::Put, 15.into(), Some(b"value".to_vec())); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(20.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + must_succeed(&mut engine, key, key, 10, 30); + let lock = must_pessimistic_locked(&mut engine, key, 10, 30); + assert_eq!(lock.last_change_ts, 20.into()); + assert_eq!(lock.versions_to_last_change, 1); + pessimistic_rollback::tests::must_success(&mut engine, key, 10, 30); + + // Latest version is a DELETE + let write = Write::new(WriteType::Delete, 40.into(), None); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(50.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + must_succeed(&mut engine, key, key, 60, 70); + let lock = must_pessimistic_locked(&mut engine, key, 60, 70); + assert_eq!(lock.last_change_ts, 50.into()); + assert_eq!(lock.versions_to_last_change, 1); + pessimistic_rollback::tests::must_success(&mut engine, key, 60, 70); + + // Latest version is a LOCK without last_change_ts + let write = Write::new(WriteType::Lock, 70.into(), None); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(75.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + must_succeed(&mut engine, key, key, 80, 80); + let lock = must_pessimistic_locked(&mut engine, key, 80, 80); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 0); + pessimistic_rollback::tests::must_success(&mut engine, key, 80, 80); + + // Latest version is a ROLLBACK without last_change_ts + let write = Write::new(WriteType::Lock, 90.into(), None); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(90.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + must_succeed(&mut engine, key, key, 95, 95); + let lock = must_pessimistic_locked(&mut engine, key, 95, 95); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 0); + pessimistic_rollback::tests::must_success(&mut engine, key, 95, 95); + + // Latest version is a LOCK with last_change_ts + let write = Write::new(WriteType::Lock, 100.into(), None).set_last_change(40.into(), 4); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(110.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + must_succeed(&mut engine, key, key, 120, 130); + let lock = must_pessimistic_locked(&mut engine, key, 120, 130); + assert_eq!(lock.last_change_ts, 40.into()); + assert_eq!(lock.versions_to_last_change, 5); + pessimistic_rollback::tests::must_success(&mut engine, key, 120, 130); + + // Latest version is a ROLLBACK with last_change_ts + let write = Write::new(WriteType::Rollback, 120.into(), None).set_last_change(40.into(), 5); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(120.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + must_succeed(&mut engine, key, key, 140, 140); + let lock = must_pessimistic_locked(&mut engine, key, 140, 140); + assert_eq!(lock.last_change_ts, 40.into()); + assert_eq!(lock.versions_to_last_change, 6); + pessimistic_rollback::tests::must_success(&mut engine, key, 140, 140); + } } From 97ab36eb7147cde02c1654595f99104155ac0c21 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Wed, 2 Nov 2022 17:06:00 +0800 Subject: [PATCH 0306/1149] txn: Add lock-with-conflict support to acquire_pessimistic_lock (#13680) ref tikv/tikv#13298 Add `lock_with_conflict` support to `acquire_pessimistic_lock`, and it's currently always disabled. Side changes: Updated the type for holding the results of `acquire_pessimistic_lock` (removed `PessimisticLockRes` and added new `PessimisticLockResults` and `PessimisticLockKeyResult`), and other necessary changes to adapt the new type. Signed-off-by: MyonKeminta --- src/server/service/kv.rs | 2 +- src/storage/errors.rs | 2 +- src/storage/lock_manager/lock_wait_context.rs | 15 +- .../lock_manager/lock_waiting_queue.rs | 10 +- src/storage/mod.rs | 102 +++++- src/storage/mvcc/reader/reader.rs | 1 + .../txn/actions/acquire_pessimistic_lock.rs | 309 +++++++++++++++--- .../txn/commands/acquire_pessimistic_lock.rs | 37 +-- src/storage/txn/commands/mod.rs | 4 +- src/storage/txn/mod.rs | 4 +- src/storage/types.rs | 195 +++++++++-- tests/failpoints/cases/test_storage.rs | 13 +- 12 files changed, 559 insertions(+), 135 deletions(-) diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 84015ddab57..8ac91031c33 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -2240,7 +2240,7 @@ txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (v, resp, tracker) {{ match v { Ok(Ok(res)) => { - let (values, not_founds) = res.into_values_and_not_founds(); + let (values, not_founds) = res.into_legacy_values_and_not_founds(); resp.set_values(values.into()); resp.set_not_founds(not_founds); }, diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 7ce5d925dfa..b5498e807f0 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -461,7 +461,7 @@ pub fn extract_key_errors(res: Result>>) -> Vec); diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 24a61876f44..7749ee983cb 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -22,7 +22,8 @@ use crate::storage::{ lock_waiting_queue::{LockWaitQueues, PessimisticLockKeyCallback}, LockManager, LockWaitToken, }, - Error as StorageError, PessimisticLockRes, ProcessResult, StorageCallback, + types::PessimisticLockKeyResult, + Error as StorageError, ProcessResult, StorageCallback, }; pub struct LockWaitContextInner { @@ -124,7 +125,11 @@ impl LockWaitContext { } } - fn finish_request(&self, result: Result, is_canceling: bool) { + fn finish_request( + &self, + result: Result, + is_canceling: bool, + ) { if is_canceling { let entry = self .lock_wait_queues @@ -171,13 +176,13 @@ mod tests { lock_manager::{lock_waiting_queue::LockWaitEntry, MockLockManager}, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error as TxnError, ErrorInner as TxnErrorInner}, - types::PessimisticLockParameters, + types::{PessimisticLockParameters, PessimisticLockResults}, ErrorInner as StorageErrorInner, Result as StorageResult, }; fn create_storage_cb() -> ( StorageCallback, - Receiver>>, + Receiver>>, ) { let (tx, rx) = channel(); let cb = StorageCallback::PessimisticLock(Box::new(move |r| tx.send(r).unwrap())); @@ -190,7 +195,7 @@ mod tests { ) -> ( LockWaitToken, LockWaitContext, - Receiver>>, + Receiver>>, ) { let (cb, rx) = create_storage_cb(); let token = lock_wait_queues.get_lock_mgr().allocate_token(); diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index da8f2e2d289..d3fb58b2a94 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -80,12 +80,12 @@ use crate::storage::{ metrics::*, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::Error as TxnError, - types::{PessimisticLockParameters, PessimisticLockRes}, + types::{PessimisticLockKeyResult, PessimisticLockParameters}, Error as StorageError, }; pub type CallbackWithSharedError = Box) + Send + 'static>; -pub type PessimisticLockKeyCallback = CallbackWithSharedError; +pub type PessimisticLockKeyCallback = CallbackWithSharedError; /// Represents an `AcquirePessimisticLock` request that's waiting for a lock, /// and contains the request's parameters. @@ -616,7 +616,7 @@ mod tests { struct TestLockWaitEntryHandle { token: LockWaitToken, - wake_up_rx: Receiver>, + wake_up_rx: Receiver>, cancel_cb: Box, } @@ -624,7 +624,7 @@ mod tests { fn wait_for_result_timeout( &self, timeout: Duration, - ) -> Option> { + ) -> Option> { match self.wake_up_rx.recv_timeout(timeout) { Ok(res) => Some(res), Err(RecvTimeoutError::Timeout) => None, @@ -635,7 +635,7 @@ mod tests { } } - fn wait_for_result(self) -> Result { + fn wait_for_result(self) -> Result { self.wake_up_rx .recv_timeout(Duration::from_secs(10)) .unwrap() diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 3ce45689c49..16043a348ce 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -107,7 +107,10 @@ pub use self::{ raw::RawStore, read_pool::{build_read_pool, build_read_pool_for_test}, txn::{Latches, Lock as LatchLock, ProcessResult, Scanner, SnapshotStore, Store}, - types::{PessimisticLockRes, PrewriteResult, SecondaryLocksStatus, StorageCallback, TxnStatus}, + types::{ + PessimisticLockKeyResult, PessimisticLockResults, PrewriteResult, SecondaryLocksStatus, + StorageCallback, TxnStatus, + }, }; use self::{kv::SnapContext, test_util::latest_feature_gate}; use crate::{ @@ -3185,7 +3188,11 @@ pub mod test_util { }; use super::*; - use crate::storage::{lock_manager::WaitTimeout, txn::commands}; + use crate::storage::{ + lock_manager::WaitTimeout, + txn::commands, + types::{PessimisticLockKeyResult, PessimisticLockResults}, + }; pub fn expect_none(x: Option) { assert_eq!(x, None); @@ -3253,10 +3260,52 @@ pub mod test_util { pub fn expect_pessimistic_lock_res_callback( done: Sender, - pessimistic_lock_res: PessimisticLockRes, - ) -> Callback> { - Box::new(move |res: Result>| { - assert_eq!(res.unwrap().unwrap(), pessimistic_lock_res); + pessimistic_lock_res: PessimisticLockResults, + ) -> Callback> { + fn key_res_matches_ignoring_error_content( + lhs: &PessimisticLockKeyResult, + rhs: &PessimisticLockKeyResult, + ) -> bool { + match (lhs, rhs) { + (PessimisticLockKeyResult::Empty, PessimisticLockKeyResult::Empty) => true, + (PessimisticLockKeyResult::Value(l), PessimisticLockKeyResult::Value(r)) => l == r, + ( + PessimisticLockKeyResult::Existence(l), + PessimisticLockKeyResult::Existence(r), + ) => l == r, + ( + PessimisticLockKeyResult::LockedWithConflict { + value: value1, + conflict_ts: ts1, + }, + PessimisticLockKeyResult::LockedWithConflict { + value: value2, + conflict_ts: ts2, + }, + ) => value1 == value2 && ts1 == ts2, + (PessimisticLockKeyResult::Waiting, PessimisticLockKeyResult::Waiting) => true, + (PessimisticLockKeyResult::Failed(_), PessimisticLockKeyResult::Failed(_)) => false, + _ => false, + } + } + + Box::new(move |res: Result>| { + let res = res.unwrap().unwrap(); + assert_eq!( + res.0.len(), + pessimistic_lock_res.0.len(), + "pessimistic lock result length not match, expected: {:?}, got: {:?}", + pessimistic_lock_res, + res + ); + for (expected, got) in pessimistic_lock_res.0.iter().zip(res.0.iter()) { + assert!( + key_res_matches_ignoring_error_content(expected, got), + "pessimistic lock result not match, expected: {:?}, got: {:?}", + pessimistic_lock_res, + res + ); + } done.send(0).unwrap(); }) } @@ -3271,7 +3320,7 @@ pub mod test_util { }) } - type PessimisticLockCommand = TypedCommand>; + type PessimisticLockCommand = TypedCommand>; pub fn new_acquire_pessimistic_lock_command( keys: Vec<(Key, bool)>, @@ -3445,6 +3494,7 @@ mod tests { tests::must_rollback, Error as TxnError, ErrorInner as TxnErrorInner, }, + types::{PessimisticLockKeyResult, PessimisticLockResults}, }, }; @@ -7712,16 +7762,33 @@ mod tests { let (key, val) = (Key::from_raw(b"key"), b"val".to_vec()); let (key2, val2) = (Key::from_raw(b"key2"), b"val2".to_vec()); + let results_values = |res: Vec>| { + PessimisticLockResults( + res.into_iter() + .map(|v| PessimisticLockKeyResult::Value(v)) + .collect::>(), + ) + }; + let results_existence = |res: Vec| { + PessimisticLockResults( + res.into_iter() + .map(|v| PessimisticLockKeyResult::Existence(v)) + .collect::>(), + ) + }; + let results_empty = + |len| PessimisticLockResults(vec![PessimisticLockKeyResult::Empty; len]); + // Key not exist for &(return_values, check_existence) in &[(false, false), (false, true), (true, false), (true, true)] { let pessimistic_lock_res = if return_values { - PessimisticLockRes::Values(vec![None]) + results_values(vec![None]) } else if check_existence { - PessimisticLockRes::Existence(vec![false]) + results_existence(vec![false]) } else { - PessimisticLockRes::Empty + results_empty(1) }; storage @@ -7769,7 +7836,7 @@ mod tests { false, false, ), - expect_pessimistic_lock_res_callback(tx.clone(), PessimisticLockRes::Empty), + expect_pessimistic_lock_res_callback(tx.clone(), results_empty(1)), ) .unwrap(); rx.recv().unwrap(); @@ -7802,8 +7869,8 @@ mod tests { rx.recv().unwrap(); } - // Needn't update max_ts when failing to read value - assert_eq!(cm.max_ts(), 10.into()); + // Always update max_ts when trying to read. + assert_eq!(cm.max_ts(), 20.into()); // Put key and key2. storage @@ -7872,19 +7939,18 @@ mod tests { rx.recv().unwrap(); } - // Needn't update max_ts when failing to read value - assert_eq!(cm.max_ts(), 10.into()); + assert_eq!(cm.max_ts(), 20.into()); // Return multiple values for &(return_values, check_existence) in &[(false, false), (false, true), (true, false), (true, true)] { let pessimistic_lock_res = if return_values { - PessimisticLockRes::Values(vec![Some(val.clone()), Some(val2.clone()), None]) + results_values(vec![Some(val.clone()), Some(val2.clone()), None]) } else if check_existence { - PessimisticLockRes::Existence(vec![true, true, false]) + results_existence(vec![true, true, false]) } else { - PessimisticLockRes::Empty + results_empty(3) }; storage .sched_txn_command( diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 6bf712050ac..321cc21427f 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -924,6 +924,7 @@ pub mod tests { TimeStamp::zero(), true, false, + false, ) .unwrap(); self.write(txn.into_modifies()); diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 656b75bfbde..9f645e389be 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -10,18 +10,26 @@ use crate::storage::{ ErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, }, txn::actions::check_data_constraint::check_data_constraint, + types::PessimisticLockKeyResult, Snapshot, }; /// Acquires pessimistic lock on a single key. Optionally reads the previous /// value by the way. /// -/// When `need_value` is set, the first return value will be the previous value -/// of the key (possibly `None`). When `need_value` is not set but -/// `need_check_existence` is set, the first return value will be an empty value -/// (`Some(vec![])`) if the key exists before or `None` if not. If neither -/// `need_value` nor `need_check_existence` is set, the first return value is -/// always `None`. +/// When `need_value` is set, the first return value will be +/// `PessimisticLockKeyResult::Value`. When `need_value` is not set but +/// `need_check_existence` is set, the first return value will be +/// `PessimisticLockKeyResult::Existence`. If neither `need_value` nor +/// `need_check_existence` is set, the first return value will be +/// `PessimisticLockKeyResult::Empty`. +/// +/// If `allow_lock_with_conflict` is set, and the lock is acquired successfully +/// ignoring a write conflict, the first return value will be +/// `PessimisticLockKeyResult::LockedWithConflict` no matter how `need_value` +/// and `need_check_existence` are set, and the `for_update_ts` in +/// the actually-written lock will be equal to the `commit_ts` of the latest +/// Write record found on the key. /// /// The second return value will also contains the previous value of the key if /// `need_old_value` is set, or `OldValue::Unspecified` otherwise. @@ -32,13 +40,14 @@ pub fn acquire_pessimistic_lock( primary: &[u8], should_not_exist: bool, lock_ttl: u64, - for_update_ts: TimeStamp, + mut for_update_ts: TimeStamp, need_value: bool, need_check_existence: bool, min_commit_ts: TimeStamp, need_old_value: bool, lock_only_if_exists: bool, -) -> MvccResult<(Option, OldValue)> { + allow_lock_with_conflict: bool, +) -> MvccResult<(PessimisticLockKeyResult, OldValue)> { fail_point!("acquire_pessimistic_lock", |err| Err( crate::storage::mvcc::txn::make_txn_error(err, &key, reader.start_ts).into() )); @@ -54,9 +63,10 @@ pub fn acquire_pessimistic_lock( } .into()); } - // Update max_ts for Insert operation to guarantee linearizability and snapshot - // isolation - if should_not_exist { + // If any of `should_not_exist`, `need_value`, `need_check_existence` is set, + // it infers a read to the value, in which case max_ts need to be updated to + // guarantee the linearizability and snapshot isolation. + if should_not_exist || need_value || need_check_existence { txn.concurrency_manager.update_max_ts(for_update_ts); } @@ -64,7 +74,7 @@ pub fn acquire_pessimistic_lock( // `need_check_existence` and `need_old_value` are both set, we also load // the value even if `need_value` is false, so that it avoids // `load_old_value` doing repeated work. - let need_load_value = need_value || (need_check_existence && need_old_value); + let mut need_load_value = need_value || (need_check_existence && need_old_value); fn load_old_value( need_old_value: bool, @@ -90,19 +100,6 @@ pub fn acquire_pessimistic_lock( } } - /// Returns proper result according to the loaded value (if any) the - /// specified settings. - #[inline] - fn ret_val(need_value: bool, need_check_existence: bool, val: Option) -> Option { - if need_value { - val - } else if need_check_existence { - val.map(|_| vec![]) - } else { - None - } - } - let mut val = None; if let Some(lock) = reader.load_lock(&key)? { if lock.ts != reader.start_ts { @@ -116,6 +113,32 @@ pub fn acquire_pessimistic_lock( } .into()); } + + let locked_with_conflict_ts = + if allow_lock_with_conflict && for_update_ts < lock.for_update_ts { + // If the key is already locked by the same transaction with larger + // for_update_ts, and the current request has + // `allow_lock_with_conflict` set, we must consider + // these possibilities: + // * If a previous request successfully locked the key with conflict, but the + // response is lost due to some errors such as RPC failures. In this case, we + // return like the current request's result is locked_with_conflict, for + // idempotency concern. + // * The key is locked by a newer request with larger for_update_ts, and the + // current request is stale. We can't distinguish this case with the above + // one, but we don't need to handle this case since no one would need the + // current request's result anymore. + + // Load value if locked_with_conflict, so that when the client (TiDB) need to + // read the value during statement retry, it will be possible to read the value + // from cache instead of RPC. + need_load_value = true; + for_update_ts = lock.for_update_ts; + Some(lock.for_update_ts) + } else { + None + }; + if need_load_value { val = reader.get(&key, for_update_ts)?; } else if need_check_existence { @@ -151,9 +174,19 @@ pub fn acquire_pessimistic_lock( .acquire_pessimistic_lock .inc(); } - return Ok((ret_val(need_value, need_check_existence, val), old_value)); + return Ok(( + PessimisticLockKeyResult::new_success( + need_value, + need_check_existence, + locked_with_conflict_ts, + val, + ), + old_value, + )); } + let mut locked_with_conflict_ts = None; + // Following seek_write read the previous write. let (prev_write_loaded, mut prev_write) = (true, None); let mut last_change_ts = TimeStamp::zero(); @@ -172,15 +205,22 @@ pub fn acquire_pessimistic_lock( MVCC_CONFLICT_COUNTER .acquire_pessimistic_lock_conflict .inc(); - return Err(ErrorInner::WriteConflict { - start_ts: reader.start_ts, - conflict_start_ts: write.start_ts, - conflict_commit_ts: commit_ts, - key: key.into_raw()?, - primary: primary.to_vec(), - reason: WriteConflictReason::PessimisticRetry, + if allow_lock_with_conflict { + // TODO: New metrics. + locked_with_conflict_ts = Some(commit_ts); + for_update_ts = commit_ts; + need_load_value = true; + } else { + return Err(ErrorInner::WriteConflict { + start_ts: reader.start_ts, + conflict_start_ts: write.start_ts, + conflict_commit_ts: commit_ts, + key: key.into_raw()?, + primary: primary.to_vec(), + reason: WriteConflictReason::PessimisticRetry, + } + .into()); } - .into()); } // Handle rollback. @@ -215,12 +255,19 @@ pub fn acquire_pessimistic_lock( } } - // Check data constraint when acquiring pessimistic lock. - check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; + // Check data constraint when acquiring pessimistic lock. But in case we are + // going to lock it with write conflict, we do not check it since the + // statement will then retry. + if locked_with_conflict_ts.is_none() { + check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; + } (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); - if need_value || need_check_existence { + // Load value if locked_with_conflict, so that when the client (TiDB) need to + // read the value during statement retry, it will be possible to read the value + // from cache instead of RPC. + if need_value || need_check_existence || locked_with_conflict_ts.is_some() { val = match write.write_type { // If it's a valid Write, no need to read again. WriteType::Put @@ -266,14 +313,22 @@ pub fn acquire_pessimistic_lock( versions_to_last_change, }; - // When lock_only_if_exists is false, always accquire pessimitic lock, otherwise + // When lock_only_if_exists is false, always acquire pessimistic lock, otherwise // do it when val exists if !lock_only_if_exists || val.is_some() { txn.put_pessimistic_lock(key, lock); } // TODO don't we need to commit the modifies in txn? - Ok((ret_val(need_value, need_check_existence, val), old_value)) + Ok(( + PessimisticLockKeyResult::new_success( + need_value, + need_check_existence, + locked_with_conflict_ts, + val, + ), + old_value, + )) } pub mod tests { @@ -300,6 +355,70 @@ pub mod tests { TestEngineBuilder, }; + #[cfg(test)] + pub fn acquire_pessimistic_lock_allow_lock_with_conflict( + engine: &mut E, + key: &[u8], + pk: &[u8], + start_ts: impl Into, + for_update_ts: impl Into, + need_value: bool, + need_check_existence: bool, + ) -> MvccResult { + let ctx = Context::default(); + let snapshot = engine.snapshot(Default::default()).unwrap(); + let cm = ConcurrencyManager::new(0.into()); + let start_ts = start_ts.into(); + let mut txn = MvccTxn::new(start_ts, cm); + let mut reader = SnapshotReader::new(start_ts, snapshot, true); + let res = acquire_pessimistic_lock( + &mut txn, + &mut reader, + Key::from_raw(key), + pk, + false, + 1, + for_update_ts.into(), + need_value, + need_check_existence, + 0.into(), + false, + false, + true, + ); + if res.is_ok() { + let modifies = txn.into_modifies(); + if !modifies.is_empty() { + engine + .write(&ctx, WriteData::from_modifies(modifies)) + .unwrap(); + } + } + res.map(|r| r.0) + } + + #[cfg(test)] + pub fn must_succeed_allow_lock_with_conflict( + engine: &mut E, + key: &[u8], + pk: &[u8], + start_ts: impl Into, + for_update_ts: impl Into, + need_value: bool, + need_check_existence: bool, + ) -> PessimisticLockKeyResult { + acquire_pessimistic_lock_allow_lock_with_conflict( + engine, + key, + pk, + start_ts, + for_update_ts, + need_value, + need_check_existence, + ) + .unwrap() + } + pub fn must_succeed_impl( engine: &mut E, key: &[u8], @@ -333,6 +452,7 @@ pub mod tests { min_commit_ts, false, lock_only_if_exists, + false, ) .unwrap(); let modifies = txn.into_modifies(); @@ -341,7 +461,19 @@ pub mod tests { .write(&ctx, WriteData::from_modifies(modifies)) .unwrap(); } - res.0 + // TODO: Adapt to new interface + match res.0 { + PessimisticLockKeyResult::Value(v) => v, + PessimisticLockKeyResult::Existence(e) => { + if e { + Some(vec![]) + } else { + None + } + } + PessimisticLockKeyResult::Empty => None, + res => panic!("unexpected result: {:?}", res), + } } pub fn must_succeed( @@ -502,6 +634,7 @@ pub mod tests { min_commit_ts, false, lock_only_if_exists, + false, ) .unwrap_err() } @@ -1109,6 +1242,7 @@ pub mod tests { min_commit_ts, need_old_value, false, + false, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -1160,6 +1294,7 @@ pub mod tests { min_commit_ts, need_old_value, false, + false, ) .unwrap(); assert_eq!( @@ -1194,6 +1329,7 @@ pub mod tests { min_commit_ts, true, false, + false, ) .unwrap(); assert_eq!( @@ -1237,6 +1373,7 @@ pub mod tests { min_commit_ts, need_old_value, false, + false, )?; Ok(old_value) }); @@ -1290,6 +1427,7 @@ pub mod tests { min_commit_ts, need_old_value, false, + false, ) .unwrap_err(); @@ -1324,6 +1462,7 @@ pub mod tests { min_commit_ts, need_old_value, false, + false, ) .unwrap_err(); } @@ -1573,4 +1712,94 @@ pub mod tests { assert_eq!(lock.versions_to_last_change, 6); pessimistic_rollback::tests::must_success(&mut engine, key, 140, 140); } + + #[test] + fn test_lock_with_conflict() { + use pessimistic_rollback::tests::must_success as must_pessimistic_rollback; + + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 10); + must_commit(&mut engine, b"k1", 10, 20); + + // Normal cases. + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, false, false) + .assert_empty(); + must_pessimistic_rollback(&mut engine, b"k1", 10, 30); + must_unlocked(&mut engine, b"k1"); + + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, false, true) + .assert_existence(true); + must_pessimistic_rollback(&mut engine, b"k1", 10, 30); + must_unlocked(&mut engine, b"k1"); + + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, true, false) + .assert_value(Some(b"v1")); + must_pessimistic_rollback(&mut engine, b"k1", 10, 30); + must_unlocked(&mut engine, b"k1"); + + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, true, true) + .assert_value(Some(b"v1")); + must_pessimistic_rollback(&mut engine, b"k1", 10, 30); + must_unlocked(&mut engine, b"k1"); + + // Conflicting cases. + for &(need_value, need_check_existence) in + &[(false, false), (false, true), (true, false), (true, true)] + { + must_succeed_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 15, + need_value, + need_check_existence, + ) + .assert_locked_with_conflict(Some(b"v1"), 20); + must_pessimistic_locked(&mut engine, b"k1", 10, 20); + must_pessimistic_rollback(&mut engine, b"k1", 10, 20); + must_unlocked(&mut engine, b"k1"); + } + + // Idempotency + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 50, false, false) + .assert_empty(); + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 40, false, false) + .assert_locked_with_conflict(Some(b"v1"), 50); + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 15, false, false) + .assert_locked_with_conflict(Some(b"v1"), 50); + must_pessimistic_locked(&mut engine, b"k1", 10, 50); + must_pessimistic_rollback(&mut engine, b"k1", 10, 50); + must_unlocked(&mut engine, b"k1"); + + // Lock waiting. + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 50, false, false) + .assert_empty(); + let err = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 11, + 55, + false, + false, + ) + .unwrap_err(); + assert!(matches!(err, MvccError(box ErrorInner::KeyIsLocked(_)))); + let err = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 9, + 9, + false, + false, + ) + .unwrap_err(); + assert!(matches!(err, MvccError(box ErrorInner::KeyIsLocked(_)))); + must_pessimistic_locked(&mut engine, b"k1", 10, 50); + must_pessimistic_rollback(&mut engine, b"k1", 10, 50); + must_unlocked(&mut engine, b"k1"); + } } diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 359f0abacd8..69a5179ab84 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -16,9 +16,9 @@ use crate::storage::{ }, Error, ErrorInner, Result, }, - types::PessimisticLockParameters, - Error as StorageError, ErrorInner as StorageErrorInner, PessimisticLockRes, ProcessResult, - Result as StorageResult, Snapshot, + types::{PessimisticLockParameters, PessimisticLockResults}, + Error as StorageError, ErrorInner as StorageErrorInner, ProcessResult, Result as StorageResult, + Snapshot, }; command! { @@ -26,7 +26,7 @@ command! { /// /// This can be rolled back with a [`PessimisticRollback`](Command::PessimisticRollback) command. AcquirePessimisticLock: - cmd_ty => StorageResult, + cmd_ty => StorageResult, display => "kv::command::acquirepessimisticlock keys({:?}) @ {} {} {} {:?} {} {} {} | {:?}", (keys, start_ts, lock_ttl, for_update_ts, wait_timeout, min_commit_ts, check_existence, lock_only_if_exists, ctx), content => { @@ -88,16 +88,7 @@ impl WriteCommand for AcquirePessimisticLock ); let rows = keys.len(); - let mut res = if self.return_values { - Ok(PessimisticLockRes::Values(vec![])) - } else if self.check_existence { - // If return_value is set, the existence status is implicitly included in the - // result. So check_existence only need to be explicitly handled if - // `return_values` is not set. - Ok(PessimisticLockRes::Existence(vec![])) - } else { - Ok(PessimisticLockRes::Empty) - }; + let mut res = Ok(PessimisticLockResults::with_capacity(rows)); let need_old_value = context.extra_op == ExtraOp::ReadOldValue; for (k, should_not_exist) in keys { match acquire_pessimistic_lock( @@ -113,11 +104,10 @@ impl WriteCommand for AcquirePessimisticLock self.min_commit_ts, need_old_value, self.lock_only_if_exists, + false, ) { - Ok((val, old_value)) => { - if self.return_values || self.check_existence { - res.as_mut().unwrap().push(val); - } + Ok((key_res, old_value)) => { + res.as_mut().unwrap().push(key_res); if old_value.resolved() { let key = k.append_ts(txn.start_ts); // MutationType is unknown in AcquirePessimisticLock stage. @@ -133,17 +123,6 @@ impl WriteCommand for AcquirePessimisticLock } } - // Some values are read, update max_ts - match &res { - Ok(PessimisticLockRes::Values(values)) if !values.is_empty() => { - txn.concurrency_manager.update_max_ts(self.for_update_ts); - } - Ok(PessimisticLockRes::Existence(values)) if !values.is_empty() => { - txn.concurrency_manager.update_max_ts(self.for_update_ts); - } - _ => (), - } - // no conflict let (pr, to_be_write, rows, ctx, lock_info) = if res.is_ok() { let pr = ProcessResult::PessimisticLockRes { res }; diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index f5331087ac1..4213eeb6b68 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -63,7 +63,7 @@ use crate::storage::{ mvcc::{Lock as MvccLock, MvccReader, ReleasedLock, SnapshotReader}, txn::{latch, ProcessResult, Result}, types::{ - MvccInfo, PessimisticLockParameters, PessimisticLockRes, PrewriteResult, + MvccInfo, PessimisticLockParameters, PessimisticLockResults, PrewriteResult, SecondaryLocksStatus, StorageCallbackType, TxnStatus, }, Result as StorageResult, Snapshot, Statistics, @@ -193,7 +193,7 @@ impl From for TypedCommand { } } -impl From for TypedCommand> { +impl From for TypedCommand> { fn from(mut req: PessimisticLockRequest) -> Self { let keys = req .take_mutations() diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 5b71d60e3bf..1af3c9d63e6 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -40,7 +40,7 @@ pub use self::{ }; use crate::storage::{ mvcc::Error as MvccError, - types::{MvccInfo, PessimisticLockRes, PrewriteResult, SecondaryLocksStatus, TxnStatus}, + types::{MvccInfo, PessimisticLockResults, PrewriteResult, SecondaryLocksStatus, TxnStatus}, Error as StorageError, Result as StorageResult, }; @@ -73,7 +73,7 @@ pub enum ProcessResult { err: StorageError, }, PessimisticLockRes { - res: StorageResult, + res: StorageResult, }, SecondaryLocksStatus { status: SecondaryLocksStatus, diff --git a/src/storage/types.rs b/src/storage/types.rs index 07219435800..6ad4c8e26ef 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -8,6 +8,7 @@ use kvproto::kvrpcpb; use txn_types::{Key, Value}; use crate::storage::{ + errors::SharedError, lock_manager::WaitTimeout, mvcc::{Lock, LockType, TimeStamp, Write, WriteType}, txn::ProcessResult, @@ -155,42 +156,180 @@ pub struct PessimisticLockParameters { pub allow_lock_with_conflict: bool, } -#[derive(Clone, Debug, PartialEq)] -pub enum PessimisticLockRes { - /// The previous value is loaded while handling the `AcquirePessimisticLock` - /// command. The i-th item is the value of the i-th key in the - /// `AcquirePessimisticLock` command. - Values(Vec>), - /// Checked whether the key exists while handling the - /// `AcquirePessimisticLock` command. The i-th item is true if the i-th key - /// in the `AcquirePessimisticLock` command exists. - Existence(Vec), +/// Represents the result of pessimistic lock on a single key. +#[derive(Debug, Clone)] +pub enum PessimisticLockKeyResult { + /// The lock is acquired successfully, returning no additional information. Empty, + /// The lock is acquired successfully, and the previous value is read and + /// returned. + Value(Option), + /// The lock is acquired successfully, and also checked if the key exists + /// previously. + Existence(bool), + /// There is a write conflict, but the lock is acquired ignoring the write + /// conflict. + LockedWithConflict { + /// The previous value of the key. + value: Option, + /// The `commit_ts` of the latest Write record found on this key. This + /// is also the actual `for_update_ts` written to the lock. + conflict_ts: TimeStamp, + }, + /// The key is already locked and lock-waiting is needed. + Waiting, + /// Failed to acquire the lock due to some error. + Failed(SharedError), } -impl PessimisticLockRes { - pub fn push(&mut self, value: Option) { +impl PessimisticLockKeyResult { + pub fn new_success( + need_value: bool, + need_check_existence: bool, + locked_with_conflict_ts: Option, + value: Option, + ) -> Self { + if let Some(conflict_ts) = locked_with_conflict_ts { + Self::LockedWithConflict { value, conflict_ts } + } else if need_value { + Self::Value(value) + } else if need_check_existence { + Self::Existence(value.is_some()) + } else { + Self::Empty + } + } + + pub fn unwrap_value(self) -> Option { match self { - PessimisticLockRes::Values(v) => v.push(value), - PessimisticLockRes::Existence(v) => v.push(value.is_some()), - _ => panic!("unexpected PessimisticLockRes"), + Self::Value(v) => v, + x => panic!( + "pessimistic lock key result expected to be a value, got {:?}", + x + ), } } - pub fn into_values_and_not_founds(self) -> (Vec, Vec) { + pub fn unwrap_existence(self) -> bool { match self { - PessimisticLockRes::Values(vals) => vals - .into_iter() - .map(|v| { - let is_not_found = v.is_none(); - (v.unwrap_or_default(), is_not_found) - }) - .unzip(), - PessimisticLockRes::Existence(mut vals) => { - vals.iter_mut().for_each(|x| *x = !*x); - (vec![], vals) + Self::Existence(e) => e, + x => panic!( + "pessimistic lock key result expected to be existence, got {:?}", + x + ), + } + } + + pub fn assert_empty(&self) { + assert!(matches!(self, Self::Empty)); + } + + #[cfg(test)] + pub fn assert_value(&self, expected_value: Option<&[u8]>) { + match self { + Self::Value(v) if v.as_ref().map(|v| v.as_slice()) == expected_value => (), + x => panic!( + "pessimistic lock key result not match, expected Value({:?}), got {:?}", + expected_value, x + ), + } + } + + #[cfg(test)] + pub fn assert_existence(&self, expected_existence: bool) { + match self { + Self::Existence(e) if *e == expected_existence => (), + x => panic!( + "pessimistic lock key result not match, expected Existence({:?}), got {:?}", + expected_existence, x + ), + } + } + + #[cfg(test)] + pub fn assert_locked_with_conflict( + &self, + expected_value: Option<&[u8]>, + expected_conflict_ts: impl Into, + ) { + let expected_conflict_ts = expected_conflict_ts.into(); + match self { + Self::LockedWithConflict { value, conflict_ts } + if value.as_ref().map(|v| v.as_slice()) == expected_value + && *conflict_ts == expected_conflict_ts => {} + x => panic!( + "pessimistic lock key result not match, expected LockedWithConflict{{ value: {:?}, conflict_ts: {} }}, got {:?}", + expected_value, expected_conflict_ts, x + ), + } + } + + #[cfg(test)] + pub fn assert_waiting(&self) { + assert!(matches!(self, Self::Waiting)); + } + + #[cfg(test)] + pub fn unwrap_err(&self) -> SharedError { + match self { + Self::Failed(e) => e.clone(), + x => panic!( + "pessimistic lock key result not match expected Failed, got {:?}", + x, + ), + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct PessimisticLockResults(pub Vec); + +impl PessimisticLockResults { + pub fn new() -> Self { + Self(vec![]) + } + + pub fn with_capacity(capacity: usize) -> Self { + Self(Vec::with_capacity(capacity)) + } + + pub fn push(&mut self, key_res: PessimisticLockKeyResult) { + self.0.push(key_res); + } + + pub fn into_legacy_values_and_not_founds(self) -> (Vec, Vec) { + if self.0.is_empty() { + return (vec![], vec![]); + } + + match &self.0[0] { + PessimisticLockKeyResult::Empty => { + self.0.into_iter().for_each(|res| res.assert_empty()); + (vec![], vec![]) + } + PessimisticLockKeyResult::Existence(_) => { + let not_founds = self.0.into_iter().map(|x| !x.unwrap_existence()).collect(); + (vec![], not_founds) + } + PessimisticLockKeyResult::Value(_) => { + let mut not_founds = Vec::with_capacity(self.0.len()); + let mut values = Vec::with_capacity(self.0.len()); + self.0.into_iter().for_each(|x| { + let v = x.unwrap_value(); + match v { + Some(v) => { + not_founds.push(false); + values.push(v); + } + None => { + not_founds.push(true); + values.push(vec![]); + } + } + }); + (values, not_founds) } - PessimisticLockRes::Empty => (vec![], vec![]), + _ => unreachable!(), } } } @@ -246,7 +385,7 @@ storage_callback! { Locks(Vec) ProcessResult::Locks { locks } => locks, TxnStatus(TxnStatus) ProcessResult::TxnStatus { txn_status } => txn_status, Prewrite(PrewriteResult) ProcessResult::PrewriteResult { result } => result, - PessimisticLock(Result) ProcessResult::PessimisticLockRes { res } => res, + PessimisticLock(Result) ProcessResult::PessimisticLockRes { res } => res, SecondaryLocksStatus(SecondaryLocksStatus) ProcessResult::SecondaryLocksStatus { status } => status, RawCompareAndSwap((Option, bool)) ProcessResult::RawCompareAndSwapRes { previous_value, succeed } => (previous_value, succeed), } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index e0f68b721b5..43f1b504f25 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -389,7 +389,7 @@ fn test_pipelined_pessimistic_lock() { new_acquire_pessimistic_lock_command(vec![(key.clone(), false)], 10, 10, true, false), expect_pessimistic_lock_res_callback( tx.clone(), - PessimisticLockRes::Values(vec![None]), + PessimisticLockResults(vec![PessimisticLockKeyResult::Value(None)]), ), ) .unwrap(); @@ -452,7 +452,9 @@ fn test_pipelined_pessimistic_lock() { ), expect_pessimistic_lock_res_callback( tx.clone(), - PessimisticLockRes::Values(vec![Some(val.clone())]), + PessimisticLockResults(vec![PessimisticLockKeyResult::Value(Some( + val.clone(), + ))]), ), ) .unwrap(); @@ -475,7 +477,7 @@ fn test_pipelined_pessimistic_lock() { new_acquire_pessimistic_lock_command(vec![(key.clone(), false)], 50, 50, true, false), expect_pessimistic_lock_res_callback( tx.clone(), - PessimisticLockRes::Values(vec![Some(val.clone())]), + PessimisticLockResults(vec![PessimisticLockKeyResult::Value(Some(val.clone()))]), ), ) .unwrap(); @@ -498,7 +500,10 @@ fn test_pipelined_pessimistic_lock() { ), expect_pessimistic_lock_res_callback( tx, - PessimisticLockRes::Values(vec![Some(val), None]), + PessimisticLockResults(vec![ + PessimisticLockKeyResult::Value(Some(val)), + PessimisticLockKeyResult::Value(None), + ]), ), ) .unwrap(); From 7fd31d3fc529cddd190b38276ab050bc390c9b6b Mon Sep 17 00:00:00 2001 From: YangKeao Date: Thu, 3 Nov 2022 00:08:00 -0400 Subject: [PATCH 0307/1149] copr, json: support json path range selection (#13645) close tikv/tikv#13644 Signed-off-by: YangKeao Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/tidb_query_datatype/Cargo.toml | 2 +- .../src/codec/mysql/json/binary.rs | 22 +- .../src/codec/mysql/json/json_extract.rs | 319 +++++++-- .../src/codec/mysql/json/json_modify.rs | 2 +- .../src/codec/mysql/json/json_remove.rs | 7 +- .../src/codec/mysql/json/modifier.rs | 21 +- .../src/codec/mysql/json/path_expr.rs | 671 +++++++++--------- 8 files changed, 638 insertions(+), 408 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1152b2002e..25a68864586 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6089,7 +6089,7 @@ dependencies = [ "lazy_static", "log_wrappers", "match-template", - "nom 5.1.0", + "nom 7.1.0", "num 0.3.0", "num-derive", "num-traits", diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index de8f0b41110..af7e7e08b9d 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -22,7 +22,7 @@ kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" log_wrappers = { workspace = true } match-template = "0.0.1" -nom = { version = "5.1.0", default-features = false, features = ["std"] } +nom = { version = "7.1.0", default-features = false, features = ["std"] } num = { version = "0.3", default-features = false } num-derive = "0.3" num-traits = "0.2" diff --git a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs index daeae751fb5..734ec1d4115 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs @@ -5,9 +5,29 @@ use std::convert::TryInto; use codec::number::NumberCodec; use super::{constants::*, JsonRef, JsonType, ERR_CONVERT_FAILED}; -use crate::codec::Result; +use crate::codec::{mysql::json::path_expr::ArrayIndex, Result}; impl<'a> JsonRef<'a> { + /// Gets the index from the ArrayIndex + /// + /// If the idx is greater than the count and is from right, it will return + /// `None` + /// + /// See `jsonPathArrayIndex.getIndexFromStart()` in TiDB + /// `types/json_path_expr.go` + pub fn array_get_index(&self, idx: ArrayIndex) -> Option { + match idx { + ArrayIndex::Left(idx) => Some(idx as usize), + ArrayIndex::Right(idx) => { + if self.get_elem_count() < 1 + (idx as usize) { + None + } else { + Some(self.get_elem_count() - 1 - (idx as usize)) + } + } + } + } + /// Gets the ith element in JsonRef /// /// See `arrayGetElem()` in TiDB `json/binary.go` diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs index d40451fc9b5..7e619e74c32 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_extract.rs @@ -4,9 +4,10 @@ use collections::HashSet; use super::{ super::Result, - path_expr::{PathExpression, PathLeg, PATH_EXPR_ARRAY_INDEX_ASTERISK, PATH_EXPR_ASTERISK}, + path_expr::{PathExpression, PathLeg}, Json, JsonRef, JsonType, }; +use crate::codec::mysql::json::path_expr::{ArrayIndex, ArraySelection, KeySelection}; impl<'a> JsonRef<'a> { /// `extract` receives several path expressions as arguments, matches them @@ -21,8 +22,11 @@ impl<'a> JsonRef<'a> { let mut elem_list = Vec::with_capacity(path_expr_list.len()); for path_expr in path_expr_list { could_return_multiple_matches |= path_expr.contains_any_asterisk(); + could_return_multiple_matches |= path_expr.contains_any_range(); + elem_list.append(&mut extract_json(*self, &path_expr.legs)?) } + if elem_list.is_empty() { Ok(None) } else if could_return_multiple_matches { @@ -79,43 +83,86 @@ pub fn extract_json<'a>(j: JsonRef<'a>, path_legs: &[PathLeg]) -> Result match j.get_type() { + match current_leg { + PathLeg::ArraySelection(selection) => match j.get_type() { JsonType::Array => { let elem_count = j.get_elem_count(); - if i == PATH_EXPR_ARRAY_INDEX_ASTERISK { - for k in 0..elem_count { - append_if_ref_unique( - &mut ret, - &extract_json(j.array_get_elem(k)?, sub_path_legs)?, - ) + match selection { + ArraySelection::Asterisk => { + for k in 0..elem_count { + append_if_ref_unique( + &mut ret, + &extract_json(j.array_get_elem(k)?, sub_path_legs)?, + ) + } + } + ArraySelection::Index(index) => { + if let Some(index) = j.array_get_index(*index) { + if index < elem_count { + append_if_ref_unique( + &mut ret, + &extract_json(j.array_get_elem(index)?, sub_path_legs)?, + ) + } + } + } + ArraySelection::Range(start, end) => { + if let (Some(start), Some(mut end)) = + (j.array_get_index(*start), j.array_get_index(*end)) + { + if end >= elem_count { + end = elem_count - 1 + } + if start <= end { + for i in start..=end { + append_if_ref_unique( + &mut ret, + &extract_json(j.array_get_elem(i)?, sub_path_legs)?, + ) + } + } + } } - } else if (i as usize) < elem_count { - append_if_ref_unique( - &mut ret, - &extract_json(j.array_get_elem(i as usize)?, sub_path_legs)?, - ) } } _ => { - if i as usize == 0 { - append_if_ref_unique(&mut ret, &extract_json(j, sub_path_legs)?) + // If the current object is not an array, still append them if the selection + // includes 0. But for asterisk, it still returns NULL. + // + // as the element is not array, don't use `array_get_index` + match selection { + ArraySelection::Index(ArrayIndex::Left(0)) => { + append_if_ref_unique(&mut ret, &extract_json(j, sub_path_legs)?) + } + ArraySelection::Range( + ArrayIndex::Left(0), + ArrayIndex::Right(0) | ArrayIndex::Left(_), + ) => { + // for [0 to Non-negative Number] and [0 to last], it extracts itself + append_if_ref_unique(&mut ret, &extract_json(j, sub_path_legs)?) + } + _ => {} } } }, - PathLeg::Key(ref key) => { + PathLeg::Key(key) => { if j.get_type() == JsonType::Object { - if key == PATH_EXPR_ASTERISK { - let elem_count = j.get_elem_count(); - for i in 0..elem_count { - append_if_ref_unique( - &mut ret, - &extract_json(j.object_get_val(i)?, sub_path_legs)?, - ) + match key { + KeySelection::Asterisk => { + let elem_count = j.get_elem_count(); + for i in 0..elem_count { + append_if_ref_unique( + &mut ret, + &extract_json(j.object_get_val(i)?, sub_path_legs)?, + ) + } + } + KeySelection::Key(key) => { + if let Some(idx) = j.object_search_key(key.as_bytes()) { + let val = j.object_get_val(idx)?; + append_if_ref_unique(&mut ret, &extract_json(val, sub_path_legs)?) + } } - } else if let Some(idx) = j.object_search_key(key.as_bytes()) { - let val = j.object_get_val(idx)?; - append_if_ref_unique(&mut ret, &extract_json(val, sub_path_legs)?) } } } @@ -154,10 +201,15 @@ mod tests { use super::{ super::path_expr::{ PathExpressionFlag, PATH_EXPRESSION_CONTAINS_ASTERISK, - PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, PATH_EXPR_ARRAY_INDEX_ASTERISK, + PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }, *, }; + use crate::codec::mysql::json::path_expr::{ArrayIndex, PATH_EXPRESSION_CONTAINS_RANGE}; + + fn select_from_left(index: usize) -> PathLeg { + PathLeg::ArraySelection(ArraySelection::Index(ArrayIndex::Left(index as u32))) + } #[test] fn test_json_extract() { @@ -168,7 +220,7 @@ mod tests { ( "[true, 2017]", vec![PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![select_from_left(0)], flags: PathExpressionFlag::default(), }], Some("true"), @@ -176,7 +228,7 @@ mod tests { ( "[true, 2017]", vec![PathExpression { - legs: vec![PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Asterisk)], flags: PATH_EXPRESSION_CONTAINS_ASTERISK, }], Some("[true, 2017]"), @@ -184,7 +236,7 @@ mod tests { ( "[true, 2107]", vec![PathExpression { - legs: vec![PathLeg::Index(2)], + legs: vec![select_from_left(2)], flags: PathExpressionFlag::default(), }], None, @@ -192,7 +244,7 @@ mod tests { ( "6.18", vec![PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![select_from_left(0)], flags: PathExpressionFlag::default(), }], Some("6.18"), @@ -200,7 +252,7 @@ mod tests { ( "6.18", vec![PathExpression { - legs: vec![PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Asterisk)], flags: PathExpressionFlag::default(), }], None, @@ -208,7 +260,7 @@ mod tests { ( "true", vec![PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![select_from_left(0)], flags: PathExpressionFlag::default(), }], Some("true"), @@ -216,7 +268,7 @@ mod tests { ( "true", vec![PathExpression { - legs: vec![PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Asterisk)], flags: PathExpressionFlag::default(), }], None, @@ -224,7 +276,7 @@ mod tests { ( "6", vec![PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![select_from_left(0)], flags: PathExpressionFlag::default(), }], Some("6"), @@ -232,7 +284,7 @@ mod tests { ( "6", vec![PathExpression { - legs: vec![PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Asterisk)], flags: PathExpressionFlag::default(), }], None, @@ -240,7 +292,7 @@ mod tests { ( "-6", vec![PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![select_from_left(0)], flags: PathExpressionFlag::default(), }], Some("-6"), @@ -248,7 +300,7 @@ mod tests { ( "-6", vec![PathExpression { - legs: vec![PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Asterisk)], flags: PathExpressionFlag::default(), }], None, @@ -256,7 +308,7 @@ mod tests { ( r#"{"a": [1, 2, {"aa": "xx"}]}"#, vec![PathExpression { - legs: vec![PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Asterisk)], flags: PathExpressionFlag::default(), }], None, @@ -264,7 +316,7 @@ mod tests { ( r#"{"a": [1, 2, {"aa": "xx"}]}"#, vec![PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![select_from_left(0)], flags: PathExpressionFlag::default(), }], Some(r#"{"a": [1, 2, {"aa": "xx"}]}"#), @@ -273,7 +325,7 @@ mod tests { ( r#"{"a": "a1", "b": 20.08, "c": false}"#, vec![PathExpression { - legs: vec![PathLeg::Key(String::from("c"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("c")))], flags: PathExpressionFlag::default(), }], Some("false"), @@ -281,7 +333,7 @@ mod tests { ( r#"{"a": "a1", "b": 20.08, "c": false}"#, vec![PathExpression { - legs: vec![PathLeg::Key(String::from(PATH_EXPR_ASTERISK))], + legs: vec![PathLeg::Key(KeySelection::Asterisk)], flags: PATH_EXPRESSION_CONTAINS_ASTERISK, }], Some(r#"["a1", 20.08, false]"#), @@ -289,7 +341,7 @@ mod tests { ( r#"{"a": "a1", "b": 20.08, "c": false}"#, vec![PathExpression { - legs: vec![PathLeg::Key(String::from("d"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("d")))], flags: PathExpressionFlag::default(), }], None, @@ -298,7 +350,10 @@ mod tests { ( "21", vec![PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("c"))], + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(KeySelection::Key(String::from("c"))), + ], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], None, @@ -306,7 +361,10 @@ mod tests { ( r#"{"g": {"a": "a1", "b": 20.08, "c": false}}"#, vec![PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("c"))], + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(KeySelection::Key(String::from("c"))), + ], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], Some("[false]"), @@ -314,7 +372,10 @@ mod tests { ( r#"[{"a": "a1", "b": 20.08, "c": false}, true]"#, vec![PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("c"))], + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(KeySelection::Key(String::from("c"))), + ], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], Some("[false]"), @@ -322,7 +383,7 @@ mod tests { ( r#"[[0, 1], [2, 3], [4, [5, 6]]]"#, vec![PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + legs: vec![PathLeg::DoubleAsterisk, select_from_left(0)], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], Some("[[0, 1], 0, 1, 2, 3, 4, 5, 6]"), @@ -331,11 +392,11 @@ mod tests { r#"[[0, 1], [2, 3], [4, [5, 6]]]"#, vec![ PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + legs: vec![PathLeg::DoubleAsterisk, select_from_left(0)], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }, PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + legs: vec![PathLeg::DoubleAsterisk, select_from_left(0)], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }, ], @@ -344,7 +405,7 @@ mod tests { ( "[1]", vec![PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + legs: vec![PathLeg::DoubleAsterisk, select_from_left(0)], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], Some("[1]"), @@ -352,7 +413,10 @@ mod tests { ( r#"{"a": 1}"#, vec![PathExpression { - legs: vec![PathLeg::Key(String::from("a")), PathLeg::Index(0)], + legs: vec![ + PathLeg::Key(KeySelection::Key(String::from("a"))), + select_from_left(0), + ], flags: PathExpressionFlag::default(), }], Some("1"), @@ -360,7 +424,7 @@ mod tests { ( r#"{"a": 1}"#, vec![PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Index(0)], + legs: vec![PathLeg::DoubleAsterisk, select_from_left(0)], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }], Some(r#"[{"a": 1}, 1]"#), @@ -369,10 +433,10 @@ mod tests { r#"{"a": 1}"#, vec![PathExpression { legs: vec![ - PathLeg::Index(0), - PathLeg::Index(0), - PathLeg::Index(0), - PathLeg::Key(String::from("a")), + select_from_left(0), + select_from_left(0), + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), ], flags: PathExpressionFlag::default(), }], @@ -383,8 +447,8 @@ mod tests { vec![PathExpression { legs: vec![ PathLeg::DoubleAsterisk, - PathLeg::Key(String::from("a")), - PathLeg::Key(String::from("*")), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::Key(KeySelection::Asterisk), ], flags: PATH_EXPRESSION_CONTAINS_ASTERISK | PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, @@ -395,11 +459,17 @@ mod tests { r#"[{"a": [3,4]}, {"b": 2 }]"#, vec![ PathExpression { - legs: vec![PathLeg::Index(0), PathLeg::Key(String::from("a"))], + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + ], flags: PathExpressionFlag::default(), }, PathExpression { - legs: vec![PathLeg::Index(1), PathLeg::Key(String::from("a"))], + legs: vec![ + select_from_left(1), + PathLeg::Key(KeySelection::Key(String::from("a"))), + ], flags: PathExpressionFlag::default(), }, ], @@ -408,11 +478,136 @@ mod tests { ( r#"[{"a": [1,1,1,1]}]"#, vec![PathExpression { - legs: vec![PathLeg::Index(0), PathLeg::Key(String::from("a"))], + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + ], flags: PathExpressionFlag::default(), }], Some("[1, 1, 1, 1]"), ), + ( + r#"[1,2,3,4]"#, + vec![PathExpression { + legs: vec![PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Left(2), + ))], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }], + Some("[2,3]"), + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Index(ArrayIndex::Right(0))), + ], + flags: PathExpressionFlag::default(), + }], + Some("4"), + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Index(ArrayIndex::Right(1))), + ], + flags: PathExpressionFlag::default(), + }], + Some("3"), + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Index(ArrayIndex::Right(100))), + ], + flags: PathExpressionFlag::default(), + }], + None, + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Right(0), + )), + ], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }], + Some("[2,3,4]"), + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Right(100), + )), + ], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }], + None, + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Left(100), + )), + ], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }], + Some("[2,3,4]"), + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(0), + ArrayIndex::Right(0), + )), + ], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }], + Some("[1,2,3,4]"), + ), + ( + r#"[{"a": [1,2,3,4]}]"#, + vec![PathExpression { + legs: vec![ + select_from_left(0), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(0), + ArrayIndex::Left(2), + )), + ], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }], + Some("[1,2,3]"), + ), ]; for (i, (js, exprs, expected)) in test_cases.drain(..).enumerate() { let j = js.parse(); diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs index e8c709e9571..b359158d06b 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs @@ -33,7 +33,7 @@ impl<'a> JsonRef<'a> { )); } for expr in path_expr_list { - if expr.contains_any_asterisk() { + if expr.contains_any_asterisk() || expr.contains_any_range() { return Err(box_err!( "Invalid path expression: expected no asterisk, found {:?}", expr diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_remove.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_remove.rs index a350df91b06..bcb6fd01716 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_remove.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_remove.rs @@ -7,10 +7,9 @@ impl<'a> JsonRef<'a> { /// All path expressions cannot contain * or ** wildcard. /// If any error occurs, the input won't be changed. pub fn remove(&self, path_expr_list: &[PathExpression]) -> Result { - if path_expr_list - .iter() - .any(|expr| expr.legs.is_empty() || expr.contains_any_asterisk()) - { + if path_expr_list.iter().any(|expr| { + expr.legs.is_empty() || expr.contains_any_asterisk() || expr.contains_any_range() + }) { return Err(box_err!("Invalid path expression")); } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs index 0836eae9d5b..58fe8fbbbcb 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/modifier.rs @@ -11,6 +11,7 @@ use super::{ path_expr::{PathExpression, PathLeg}, Json, JsonRef, JsonType, }; +use crate::codec::mysql::json::path_expr::{ArraySelection, KeySelection}; /// A helper struct that derives a new JSON by combining and manipulating /// the encoded bytes directly. Only used by `json_replace`, `json_set`, @@ -88,7 +89,7 @@ impl<'a> BinaryModifier<'a> { } let parent_node = &result[0]; match last_leg { - PathLeg::Index(_) => { + PathLeg::ArraySelection(ArraySelection::Index(_)) => { // Record the parent node value offset, as it's actually relative to `old` self.to_be_modified_ptr = parent_node.as_ptr(); match parent_node.get_type() { @@ -109,7 +110,7 @@ impl<'a> BinaryModifier<'a> { } } } - PathLeg::Key(insert_key) => { + PathLeg::Key(KeySelection::Key(insert_key)) => { // Ignore constant if parent_node.get_type() != JsonType::Object { return Ok(()); @@ -168,21 +169,23 @@ impl<'a> BinaryModifier<'a> { } let parent_node = &result[0]; match last_leg { - PathLeg::Index(remove_idx) => { + PathLeg::ArraySelection(ArraySelection::Index(remove_idx)) => { if parent_node.get_type() == JsonType::Array { self.to_be_modified_ptr = parent_node.as_ptr(); let elems_count = parent_node.get_elem_count(); let mut elems = Vec::with_capacity(elems_count - 1); - let remove_idx = *remove_idx as usize; - for i in 0..elems_count { - if i != remove_idx { - elems.push(parent_node.array_get_elem(i)?); + if let Some(remove_idx) = parent_node.array_get_index(*remove_idx) { + for i in 0..elems_count { + if i != remove_idx { + elems.push(parent_node.array_get_elem(i)?); + } } + + self.new_value = Some(Json::from_ref_array(elems)?); } - self.new_value = Some(Json::from_ref_array(elems)?); } } - PathLeg::Key(remove_key) => { + PathLeg::Key(KeySelection::Key(remove_key)) => { // Ignore constant if parent_node.get_type() == JsonType::Object { self.to_be_modified_ptr = parent_node.as_ptr(); diff --git a/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs b/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs index a760f748348..fb707887885 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/path_expr.rs @@ -25,375 +25,286 @@ // select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]] // ``` -use std::{iter::Peekable, str::CharIndices}; +use nom::{ + branch::alt, + bytes::complete::tag, + character::{ + complete, + complete::{char, none_of, satisfy, space0, space1}, + }, + combinator::{map, map_opt}, + multi::{many0, many1}, + sequence::{delimited, pair, tuple}, + IResult, +}; use super::json_unquote::unquote_string; -use crate::codec::{Error, Result}; +use crate::codec::Result; -pub const PATH_EXPR_ASTERISK: &str = "*"; - -#[derive(Clone, Debug, PartialEq)] -pub enum PathLeg { - /// `Key` indicates the path leg with '.key'. - Key(String), - /// `Index` indicates the path leg with form 'number'. - Index(i32), - /// `DoubleAsterisk` indicates the path leg with form '**'. - DoubleAsterisk, +fn lift_error_to_failure(err: nom::Err) -> nom::Err { + if let nom::Err::Error(err) = err { + nom::Err::Failure(err) + } else { + err + } } -// ArrayIndexAsterisk is for parsing '*' into a number. -// we need this number represent "all". -pub const PATH_EXPR_ARRAY_INDEX_ASTERISK: i32 = -1; - -pub type PathExpressionFlag = u8; - -pub const PATH_EXPRESSION_CONTAINS_ASTERISK: PathExpressionFlag = 0x01; -pub const PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK: PathExpressionFlag = 0x02; - -#[derive(Clone, Default, Debug, PartialEq)] -pub struct PathExpression { - pub legs: Vec, - pub flags: PathExpressionFlag, +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum ArrayIndex { + Left(u32), // `Left` represents an array index start from left + Right(u32), // `Right` represents an array index start from right } -impl PathExpression { - pub fn contains_any_asterisk(&self) -> bool { - (self.flags - & (PATH_EXPRESSION_CONTAINS_ASTERISK | PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK)) - != 0 - } +fn array_index_left(input: &str) -> IResult<&str, ArrayIndex> { + let (input, index) = complete::u32(input)?; + Ok((input, ArrayIndex::Left(index))) } -/// `box_json_path_err` creates an error from the slice position -/// The position is added with 1, to count from 1 as start -macro_rules! box_json_path_err { - ($e:expr) => {{ - box_err!( - "Invalid JSON path expression. The error is around character position {}.", - ($e) + 1 - ) - }}; +fn array_index_last(input: &str) -> IResult<&str, ArrayIndex> { + let (input, _) = tag("last")(input)?; + + Ok((input, ArrayIndex::Right(0))) } -struct PathExpressionTokenizer<'a> { - input: &'a str, +fn array_index_right(input: &str) -> IResult<&str, ArrayIndex> { + let (input, _) = tag("last")(input)?; + let (input, _) = space0(input)?; + let (input, _) = char('-')(input)?; + let (input, _) = space0(input)?; - char_iterator: Peekable>, + let (input, index) = complete::u32(input)?; + Ok((input, ArrayIndex::Right(index))) } -struct Position { - start: usize, - end: usize, +fn array_index(input: &str) -> IResult<&str, ArraySelection> { + map( + alt((array_index_left, array_index_right, array_index_last)), + |index| ArraySelection::Index(index), + )(input) } -/// PathExpressionToken represents a section in path expression and its position -enum PathExpressionToken { - Leg((PathLeg, Position)), - /// Represents the beginning "$" in the expression - Start(Position), +fn array_asterisk(input: &str) -> IResult<&str, ArraySelection> { + map(char('*'), |_| ArraySelection::Asterisk)(input) } -impl<'a> Iterator for PathExpressionTokenizer<'a> { - type Item = Result; - - /// Next will try to parse the next path leg and return - /// If it returns None, it means the input is over. - /// If it returns Some(Err(..)), it means the format is error. - /// If it returns Some(Ok(..)), it represents the next token. - fn next(&mut self) -> Option> { - self.trim_white_spaces(); - // Trim all spaces at first - if self.reached_end() { - return None; - }; - - let (start, ch) = *self.char_iterator.peek().unwrap(); - match ch { - '$' => { - self.char_iterator.next(); - Some(Ok(PathExpressionToken::Start(Position { - start, - end: self.current_index(), - }))) +fn array_range(input: &str) -> IResult<&str, ArraySelection> { + let (input, start) = array_index(input)?; + let (input, _) = space1(input)?; + let (input, _) = tag("to")(input)?; + let (before_last_index, _) = space1(input)?; + let (input, end) = array_index(before_last_index)?; + + match (start, end) { + (ArraySelection::Index(start), ArraySelection::Index(end)) => { + // specially check the position + let allowed = match (start, end) { + (ArrayIndex::Left(start), ArrayIndex::Left(end)) => start <= end, + (ArrayIndex::Right(start), ArrayIndex::Right(end)) => start >= end, + (..) => true, + }; + if !allowed { + // TODO: use a customized error kind, as the ErrorKind::Verify is designed + // to be used in `verify` combinator + return Err(nom::Err::Failure(nom::error::make_error( + before_last_index, + nom::error::ErrorKind::Verify, + ))); } - '.' => Some(self.next_key()), - '[' => Some(self.next_index()), - '*' => Some(self.next_double_asterisk()), - _ => Some(Err(box_json_path_err!(self.current_index()))), + Ok((input, ArraySelection::Range(start, end))) } + _ => unreachable!(), } } -impl<'a> PathExpressionTokenizer<'a> { - fn new(input: &'a str) -> PathExpressionTokenizer<'a> { - PathExpressionTokenizer { - input, - char_iterator: input.char_indices().peekable(), - } - } - - /// Returns the current index on the slice - fn current_index(&mut self) -> usize { - match self.char_iterator.peek() { - Some((start, _)) => *start, - None => self.input.len(), - } - } - - /// `trim_while_spaces` removes following spaces - fn trim_white_spaces(&mut self) { - while self - .char_iterator - .next_if(|(_, ch)| ch.is_whitespace()) - .is_some() - {} - } - - /// Returns whether the input has reached the end - fn reached_end(&mut self) -> bool { - return self.char_iterator.peek().is_none(); - } - - fn next_key(&mut self) -> Result { - let (start, _) = self.char_iterator.next().unwrap(); +#[derive(Clone, Debug, PartialEq)] +pub enum ArraySelection { + Asterisk, // `Asterisk` select all element from array. + Index(ArrayIndex), // `Index` select one element from array. + Range(ArrayIndex, ArrayIndex), // `Range` selects a closed-interval from array. +} - self.trim_white_spaces(); - if self.reached_end() { - return Err(box_json_path_err!(self.current_index())); - } +fn path_leg_array_selection(input: &str) -> IResult<&str, PathLeg> { + let (input, _) = char('[')(input)?; + let (input, _) = space0(input)?; + let (input, leg) = map( + alt((array_asterisk, array_range, array_index)), + |array_selection| PathLeg::ArraySelection(array_selection), + )(input) + .map_err(lift_error_to_failure)?; + let (input, _) = space0(input)?; + let (input, _) = char(']')(input).map_err(lift_error_to_failure)?; + + Ok((input, leg)) +} - match *self.char_iterator.peek().unwrap() { - (_, '*') => { - self.char_iterator.next().unwrap(); - - Ok(PathExpressionToken::Leg(( - PathLeg::Key(PATH_EXPR_ASTERISK.to_string()), - Position { - start, - end: self.current_index(), - }, - ))) - } - (mut key_start, '"') => { - // Skip this '"' character - key_start += 1; - self.char_iterator.next().unwrap(); +#[derive(Clone, Debug, PartialEq)] +pub enum KeySelection { + Asterisk, + Key(String), +} - // Next until the next '"' character - while self.char_iterator.next_if(|(_, ch)| *ch != '"').is_some() {} +fn key_selection_asterisk(input: &str) -> IResult<&str, KeySelection> { + map(char('*'), |_| KeySelection::Asterisk)(input) +} - // Now, it's a '"' or the end - if self.char_iterator.peek().is_none() { - return Err(box_json_path_err!(self.current_index())); +fn key_selection_key(input: &str) -> IResult<&str, KeySelection> { + let key_with_quote = map_opt( + delimited(char('"'), many1(none_of("\"")), char('"')), + |key: Vec<_>| { + let key: String = key.into_iter().collect(); + let key = unquote_string(&key).ok()?; + for ch in key.chars() { + if ch.is_control() { + return None; } + } + Some(KeySelection::Key(key)) + }, + ); + + let take_key_until_end = many1(satisfy(|ch| { + !(ch.is_whitespace() || ch == '.' || ch == '[' || ch == '*') + })); + let key_without_quote = map_opt(take_key_until_end, |key: Vec<_>| { + for (i, c) in key.iter().enumerate() { + if i == 0 && c.is_ascii_digit() { + return None; + } + if !c.is_ascii_alphanumeric() && *c != '_' && *c != '$' && c.is_ascii() { + return None; + } + } - // `key_end` is the index of '"' - let key_end = self.current_index(); - self.char_iterator.next().unwrap(); - - let key = unquote_string(unsafe { self.input.get_unchecked(key_start..key_end) })?; - for ch in key.chars() { - // According to JSON standard, a string cannot - // contain any ASCII control characters - if ch.is_control() { - // TODO: add the concrete error location - // after unquote, we lost the map between - // the character and input position. - return Err(box_json_path_err!(key_start)); - } - } + Some(KeySelection::Key(key.into_iter().collect())) + }); - Ok(PathExpressionToken::Leg(( - PathLeg::Key(key), - Position { - start, - end: self.current_index(), - }, - ))) - } - (key_start, _) => { - // We have to also check the current value - while self - .char_iterator - .next_if(|(_, ch)| { - !(ch.is_whitespace() || *ch == '.' || *ch == '[' || *ch == '*') - }) - .is_some() - {} - - // Now it reaches the end or a whitespace/./[/* - let key_end = self.current_index(); - - // The start character is not available - if key_end == key_start { - return Err(box_json_path_err!(key_start)); - } + alt((key_with_quote, key_without_quote))(input) +} - let key = unsafe { self.input.get_unchecked(key_start..key_end) }.to_string(); - - // It's not quoted, we'll have to validate whether it's an available ECMEScript - // identifier - for (i, c) in key.char_indices() { - if i == 0 && c.is_ascii_digit() { - return Err(box_json_path_err!(key_start + i)); - } - if !c.is_ascii_alphanumeric() && c != '_' && c != '$' && c.is_ascii() { - return Err(box_json_path_err!(key_start + i)); - } - } +fn path_leg_key(input: &str) -> IResult<&str, PathLeg> { + let (input, _) = char('.')(input)?; + let (input, _) = space0(input)?; - Ok(PathExpressionToken::Leg(( - PathLeg::Key(key), - Position { - start, - end: key_end, - }, - ))) - } - } - } + map( + alt((key_selection_key, key_selection_asterisk)), + |key_selection| PathLeg::Key(key_selection), + )(input) + .map_err(lift_error_to_failure) +} - fn next_index(&mut self) -> Result { - let (start, _) = self.char_iterator.next().unwrap(); +fn path_leg_double_asterisk(input: &str) -> IResult<&str, PathLeg> { + map(pair(char('*'), char('*')), |_| PathLeg::DoubleAsterisk)(input) +} - self.trim_white_spaces(); - if self.reached_end() { - return Err(box_json_path_err!(self.current_index())); - } +#[derive(Clone, Debug, PartialEq)] +pub enum PathLeg { + /// `Key` indicates the path leg with '.key'. + Key(KeySelection), + /// `ArraySelection` indicates the path leg with form '[...]'. + ArraySelection(ArraySelection), + /// `DoubleAsterisk` indicates the path leg with form '**'. + DoubleAsterisk, +} - return match self.char_iterator.next().unwrap() { - (_, '*') => { - // Then it's a glob array index - self.trim_white_spaces(); - if self.reached_end() { - return Err(box_json_path_err!(self.current_index())); - } +pub type PathExpressionFlag = u8; - if self.char_iterator.next_if(|(_, ch)| *ch == ']').is_none() { - return Err(box_json_path_err!(self.current_index())); - } +pub const PATH_EXPRESSION_CONTAINS_ASTERISK: PathExpressionFlag = 0x01; +pub const PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK: PathExpressionFlag = 0x02; +pub const PATH_EXPRESSION_CONTAINS_RANGE: PathExpressionFlag = 0x04; - Ok(PathExpressionToken::Leg(( - PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK), - Position { - start, - end: self.current_index(), - }, - ))) +fn path_expression(input: &str) -> IResult<&str, PathExpression> { + let mut flags = PathExpressionFlag::default(); + let (input, (_, _, legs)) = tuple(( + space0, + char('$'), + many0(delimited( + space0, + alt(( + path_leg_key, + path_leg_array_selection, + path_leg_double_asterisk, + )), + space0, + )), + ))(input)?; + + for leg in legs.iter() { + match leg { + PathLeg::DoubleAsterisk => flags |= PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + PathLeg::Key(KeySelection::Asterisk) => flags |= PATH_EXPRESSION_CONTAINS_ASTERISK, + PathLeg::ArraySelection(ArraySelection::Asterisk) => { + flags |= PATH_EXPRESSION_CONTAINS_ASTERISK } - (number_start, '0'..='9') => { - // Then it's a number array index - while self - .char_iterator - .next_if(|(_, ch)| ch.is_ascii_digit()) - .is_some() - {} - let number_end = self.current_index(); - - self.trim_white_spaces(); - // now, it reaches the end of input, or reaches a non-digit character - match self.char_iterator.peek() { - Some((_, ']')) => {} - Some((pos, _)) => { - return Err(box_json_path_err!(pos)); - } - None => { - return Err(box_json_path_err!(self.current_index())); - } - } - self.char_iterator.next().unwrap(); - - let index = self.input[number_start..number_end] - .parse::() - .map_err(|_| -> Error { box_json_path_err!(number_end) })?; - Ok(PathExpressionToken::Leg(( - PathLeg::Index(index), - Position { - start, - end: self.current_index(), - }, - ))) + PathLeg::ArraySelection(ArraySelection::Range(..)) => { + flags |= PATH_EXPRESSION_CONTAINS_RANGE } - (pos, _) => Err(box_json_path_err!(pos)), - }; + _ => {} + } } - fn next_double_asterisk(&mut self) -> Result { - let (start, _) = self.char_iterator.next().unwrap(); + Ok((input, PathExpression { legs, flags })) +} - match self.char_iterator.next() { - Some((end, '*')) => { - // Three or more asterisks are not allowed - if let Some((pos, '*')) = self.char_iterator.peek() { - return Err(box_json_path_err!(pos)); - } +#[derive(Clone, Default, Debug, PartialEq)] +pub struct PathExpression { + pub legs: Vec, + pub flags: PathExpressionFlag, +} - Ok(PathExpressionToken::Leg(( - PathLeg::DoubleAsterisk, - Position { start, end }, - ))) - } - Some((pos, _)) => Err(box_json_path_err!(pos)), - None => Err(box_json_path_err!(self.current_index())), - } +impl PathExpression { + pub fn contains_any_asterisk(&self) -> bool { + (self.flags + & (PATH_EXPRESSION_CONTAINS_ASTERISK | PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK)) + != 0 + } + + pub fn contains_any_range(&self) -> bool { + (self.flags & PATH_EXPRESSION_CONTAINS_RANGE) != 0 } } +/// `box_json_path_err` creates an error from the slice position +/// The position is added with 1, to count from 1 as start +macro_rules! box_json_path_err { + ($e:expr) => {{ + box_err!( + "Invalid JSON path expression. The error is around character position {}.", + ($e) + 1 + ) + }}; +} + /// Parses a JSON path expression. Returns a `PathExpression` /// object which can be used in `JSON_EXTRACT`, `JSON_SET` and so on. -pub fn parse_json_path_expr(path_expr: &str) -> Result { - let mut legs = Vec::new(); - let tokenizer = PathExpressionTokenizer::new(path_expr); - let mut flags = PathExpressionFlag::default(); - - let mut started = false; - let mut last_position = Position { start: 0, end: 0 }; - for (index, token) in tokenizer.enumerate() { - let token = token?; - - match token { - PathExpressionToken::Leg((leg, position)) => { - if !started { - return Err(box_json_path_err!(position.start)); +/// +/// See `parseJSONPathExpr` in TiDB `types/json_path_expr.go`. +pub fn parse_json_path_expr(path_expr_input: &str) -> Result { + let (left_input, path_expr) = match path_expression(path_expr_input) { + Ok(ret) => ret, + Err(err) => { + let input = match err { + nom::Err::Error(err) => err.input, + nom::Err::Failure(err) => err.input, + _ => { + unreachable!() } + }; - match &leg { - PathLeg::Key(key) => { - if key == PATH_EXPR_ASTERISK { - flags |= PATH_EXPRESSION_CONTAINS_ASTERISK - } - } - PathLeg::Index(PATH_EXPR_ARRAY_INDEX_ASTERISK) => { - flags |= PATH_EXPRESSION_CONTAINS_ASTERISK - } - PathLeg::DoubleAsterisk => flags |= PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, - _ => {} - } - - legs.push(leg.clone()); - last_position = position; - } - PathExpressionToken::Start(position) => { - started = true; - - if index != 0 { - return Err(box_json_path_err!(position.start)); - } - } + return Err(box_json_path_err!(path_expr_input.len() - input.len())); } - } + }; - // There is no available token - if !started { - return Err(box_json_path_err!(path_expr.len())); + // Some extra input is left + if !left_input.is_empty() { + return Err(box_json_path_err!(path_expr_input.len() - left_input.len())); } + // The last one cannot be the double asterisk - if !legs.is_empty() && legs.last().unwrap() == &PathLeg::DoubleAsterisk { - return Err(box_json_path_err!(last_position.end)); + if !path_expr.legs.is_empty() && path_expr.legs.last().unwrap() == &PathLeg::DoubleAsterisk { + return Err(box_json_path_err!(path_expr_input.len() - 1)); } - Ok(PathExpression { legs, flags }) + Ok(path_expr) } #[cfg(test)] @@ -429,7 +340,7 @@ mod tests { "$.a", None, Some(PathExpression { - legs: vec![PathLeg::Key(String::from("a"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("a")))], flags: PathExpressionFlag::default(), }), ), @@ -438,8 +349,8 @@ mod tests { None, Some(PathExpression { legs: vec![ - PathLeg::Key(String::from("a")), - PathLeg::Key(String::from("$")), + PathLeg::Key(KeySelection::Key(String::from("a"))), + PathLeg::Key(KeySelection::Key(String::from("$"))), ], flags: PathExpressionFlag::default(), }), @@ -448,7 +359,7 @@ mod tests { "$.\"hello world\"", None, Some(PathExpression { - legs: vec![PathLeg::Key(String::from("hello world"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("hello world")))], flags: PathExpressionFlag::default(), }), ), @@ -456,7 +367,7 @@ mod tests { "$. \"你好 世界\" ", None, Some(PathExpression { - legs: vec![PathLeg::Key(String::from("你好 世界"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("你好 世界")))], flags: PathExpressionFlag::default(), }), ), @@ -464,7 +375,7 @@ mod tests { "$. ❤️ ", None, Some(PathExpression { - legs: vec![PathLeg::Key(String::from("❤️"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("❤️")))], flags: PathExpressionFlag::default(), }), ), @@ -472,7 +383,7 @@ mod tests { "$. 你好 ", None, Some(PathExpression { - legs: vec![PathLeg::Key(String::from("你好"))], + legs: vec![PathLeg::Key(KeySelection::Key(String::from("你好")))], flags: PathExpressionFlag::default(), }), ), @@ -480,7 +391,9 @@ mod tests { "$[ 0 ]", None, Some(PathExpression { - legs: vec![PathLeg::Index(0)], + legs: vec![PathLeg::ArraySelection(ArraySelection::Index( + ArrayIndex::Left(0), + ))], flags: PathExpressionFlag::default(), }), ), @@ -488,7 +401,10 @@ mod tests { "$**.a", None, Some(PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("a"))], + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(KeySelection::Key(String::from("a"))), + ], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }), ), @@ -496,7 +412,10 @@ mod tests { " $ ** . a", None, Some(PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("a"))], + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(KeySelection::Key(String::from("a"))), + ], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }), ), @@ -504,7 +423,69 @@ mod tests { " $ ** . $", None, Some(PathExpression { - legs: vec![PathLeg::DoubleAsterisk, PathLeg::Key(String::from("$"))], + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::Key(KeySelection::Key(String::from("$"))), + ], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, + }), + ), + ( + " $ [ 1 to 10 ]", + None, + Some(PathExpression { + legs: vec![PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Left(10), + ))], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }), + ), + ( + " $ [ 1 to last - 10 ]", + None, + Some(PathExpression { + legs: vec![PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Right(10), + ))], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }), + ), + ( + " $ [ 1 to last-10 ]", + None, + Some(PathExpression { + legs: vec![PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Right(10), + ))], + flags: PATH_EXPRESSION_CONTAINS_RANGE, + }), + ), + ( + " $ ** [ 1 to last ]", + None, + Some(PathExpression { + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::ArraySelection(ArraySelection::Range( + ArrayIndex::Left(1), + ArrayIndex::Right(0), + )), + ], + flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK + | PATH_EXPRESSION_CONTAINS_RANGE, + }), + ), + ( + " $ ** [ last ]", + None, + Some(PathExpression { + legs: vec![ + PathLeg::DoubleAsterisk, + PathLeg::ArraySelection(ArraySelection::Index(ArrayIndex::Right(0))), + ], flags: PATH_EXPRESSION_CONTAINS_DOUBLE_ASTERISK, }), ), @@ -536,8 +517,7 @@ mod tests { ), ( "$.\"\\u33\"", - // TODO: pass the position in the unquote unicode error - Some("Invalid unicode, byte len too short"), + Some("Invalid JSON path expression. The error is around character position 3."), None, ), ( @@ -547,7 +527,7 @@ mod tests { ), ( "$.\"a\\t\"", - Some("Invalid JSON path expression. The error is around character position 4."), + Some("Invalid JSON path expression. The error is around character position 3."), None, ), ( @@ -556,8 +536,23 @@ mod tests { None, ), ( - "$ [ 2147483648 ]", - Some("Invalid JSON path expression. The error is around character position 15."), + "$ [ 4294967296 ]", + Some("Invalid JSON path expression. The error is around character position 5."), + None, + ), + ( + "$ [ 1to2 ]", + Some("Invalid JSON path expression. The error is around character position 6."), + None, + ), + ( + "$ [ 2 to 1 ]", + Some("Invalid JSON path expression. The error is around character position 10."), + None, + ), + ( + "$ [ last - 10 to last - 20 ]", + Some("Invalid JSON path expression. The error is around character position 18."), None, ), ]; @@ -607,4 +602,22 @@ mod tests { assert_eq!(b, expected, "#{} expect {:?} but got {:?}", i, expected, b); } } + + #[test] + fn test_parse_json_path_expr_contains_any_range() { + let mut test_cases = vec![ + ("$.a[0]", false), + ("$.a[*]", false), + ("$**.a[0]", false), + ("$.a[1 to 2]", true), + ("$.a[1 to last - 2]", true), + ]; + for (i, (path_expr, expected)) in test_cases.drain(..).enumerate() { + let r = parse_json_path_expr(path_expr); + assert!(r.is_ok(), "#{} expect parse ok but got err {:?}", i, r); + let e = r.unwrap(); + let b = e.contains_any_range(); + assert_eq!(b, expected, "#{} expect {:?} but got {:?}", i, expected, b); + } + } } From e57dc6d7d6dc70d6b212c57772fc7da9f2c9f007 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 3 Nov 2022 14:18:01 +0800 Subject: [PATCH 0308/1149] apply: avoid unnecessary clone (#13727) close tikv/tikv#13726 avoid unnecessary clone Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/apply.rs | 33 ++++++++++----------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index a5da7b9c9f1..6fce91114a7 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1263,7 +1263,7 @@ where apply_ctx: &mut ApplyContext, index: u64, term: u64, - cmd: RaftCmdRequest, + req: RaftCmdRequest, ) -> ApplyResult { if index == 0 { panic!( @@ -1273,11 +1273,10 @@ where } // Set sync log hint if the cmd requires so. - apply_ctx.sync_log_hint |= should_sync_log(&cmd); + apply_ctx.sync_log_hint |= should_sync_log(&req); - apply_ctx.host.pre_apply(&self.region, &cmd); - let (mut resp, exec_result, should_write) = - self.apply_raft_cmd(apply_ctx, index, term, &cmd); + apply_ctx.host.pre_apply(&self.region, &req); + let (mut cmd, exec_result, should_write) = self.apply_raft_cmd(apply_ctx, index, term, req); if let ApplyResult::WaitMergeSource(_) = exec_result { return exec_result; } @@ -1291,9 +1290,8 @@ where // TODO: if we have exec_result, maybe we should return this callback too. Outer // store will call it after handing exec result. - cmd_resp::bind_term(&mut resp, self.term); - let cmd_cb = self.find_pending(index, term, is_conf_change_cmd(&cmd)); - let cmd = Cmd::new(index, term, cmd, resp); + cmd_resp::bind_term(&mut cmd.response, self.term); + let cmd_cb = self.find_pending(index, term, is_conf_change_cmd(&cmd.request)); apply_ctx .applied_batch .push(cmd_cb, cmd, &self.observe_info, self.region_id()); @@ -1321,8 +1319,8 @@ where ctx: &mut ApplyContext, index: u64, term: u64, - req: &RaftCmdRequest, - ) -> (RaftCmdResponse, ApplyResult, bool) { + req: RaftCmdRequest, + ) -> (Cmd, ApplyResult, bool) { // if pending remove, apply should be aborted already. assert!(!self.pending_remove); @@ -1330,7 +1328,7 @@ where // E.g. `RaftApplyState` must not be changed. let mut origin_epoch = None; - let (resp, exec_result) = if ctx.host.pre_exec(&self.region, req, index, term) { + let (resp, exec_result) = if ctx.host.pre_exec(&self.region, &req, index, term) { // One of the observers want to filter execution of the command. let mut resp = RaftCmdResponse::default(); if !req.get_header().get_uuid().is_empty() { @@ -1342,7 +1340,7 @@ where ctx.exec_log_index = index; ctx.exec_log_term = term; ctx.kv_wb_mut().set_save_point(); - let (resp, exec_result) = match self.exec_raft_cmd(ctx, req) { + let (resp, exec_result) = match self.exec_raft_cmd(ctx, &req) { Ok(a) => { ctx.kv_wb_mut().pop_save_point().unwrap(); if req.has_admin_request() { @@ -1383,14 +1381,15 @@ where }; (resp, exec_result) }; + + let cmd = Cmd::new(index, term, req, resp); if let ApplyResult::WaitMergeSource(_) = exec_result { - return (resp, exec_result, false); + return (cmd, exec_result, false); } self.apply_state.set_applied_index(index); self.applied_term = term; - let cmd = Cmd::new(index, term, req.clone(), resp.clone()); let (modified_region, mut pending_handle_ssts) = match exec_result { ApplyResult::Res(ref e) => match e { ExecResult::SplitRegion { ref derived, .. } => (Some(derived.clone()), None), @@ -1469,7 +1468,7 @@ where } } if let Some(epoch) = origin_epoch { - let cmd_type = req.get_admin_request().get_cmd_type(); + let cmd_type = cmd.request.get_admin_request().get_cmd_type(); let epoch_state = admin_cmd_epoch_lookup(cmd_type); // The change-epoch behavior **MUST BE** equal to the settings in // `admin_cmd_epoch_lookup` @@ -1481,7 +1480,7 @@ where panic!( "{} apply admin cmd {:?} but epoch change is not expected, epoch state {:?}, before {:?}, after {:?}", self.tag, - req, + cmd.request, epoch_state, epoch, self.region.get_region_epoch() @@ -1489,7 +1488,7 @@ where } } - (resp, exec_result, should_write) + (cmd, exec_result, should_write) } fn destroy(&mut self, apply_ctx: &mut ApplyContext) { From e1ba8a278fe370b4ffb0ca38e6789f37cabcda05 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 3 Nov 2022 17:42:00 +0800 Subject: [PATCH 0309/1149] txn: set last_change_ts only on 6.5+ versions (#13728) ref tikv/tikv#13694 Some old versions or components (TiKV < 5.0, TiFlash) cannot handle unknown fields in Lock and Write. To avoid causing unexpected problems, we add a feature gate to the new field. We are not going to release this feature in TiKV 6.4, so I directly set the minimal version to 6.5. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/mvcc/reader/scanner/forward.rs | 10 +++++++ .../txn/actions/acquire_pessimistic_lock.rs | 26 ++++++++++++++++--- src/storage/txn/sched_pool.rs | 17 +++++++++++- src/storage/txn/scheduler.rs | 3 +++ 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 5d9d1b9bb83..709dc5803d1 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -2366,6 +2366,16 @@ mod delta_entry_tests { #[test] fn test_mess() { + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + // Set version to 6.5.0 to enable last_change_ts. + // TODO: Remove this after TiKV version reaches 6.5 + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + // TODO: non-pessimistic lock should be returned enven if its ts < from_ts. // (key, lock, [commit1, commit2, ...]) // Values ends with 'L' will be made larger than `SHORT_VALUE_MAX_LEN` so it diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 9f645e389be..db4c2485d09 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -9,7 +9,10 @@ use crate::storage::{ metrics::{MVCC_CONFLICT_COUNTER, MVCC_DUPLICATE_CMD_COUNTER_VEC}, ErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, }, - txn::actions::check_data_constraint::check_data_constraint, + txn::{ + actions::check_data_constraint::check_data_constraint, sched_pool::tls_can_enable, + scheduler::LAST_CHANGE_TS, + }, types::PessimisticLockKeyResult, Snapshot, }; @@ -262,7 +265,9 @@ pub fn acquire_pessimistic_lock( check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; } - (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); + if tls_can_enable(LAST_CHANGE_TS) { + (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); + } // Load value if locked_with_conflict, so that when the client (TiDB) need to // read the value during statement retry, it will be possible to read the value @@ -1612,11 +1617,19 @@ pub mod tests { #[test] fn test_calculate_last_change_ts() { use engine_traits::CF_WRITE; + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; let mut engine = TestEngineBuilder::new().build().unwrap(); let key = b"k"; - // Latest version is a PUT + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.4.0").unwrap(); + set_tls_feature_gate(feature_gate.clone()); + + // Latest version is a PUT, but last_change_ts is enabled with cluster version + // higher than 6.5.0. let write = Write::new(WriteType::Put, 15.into(), Some(b"value".to_vec())); engine .put_cf( @@ -1628,6 +1641,13 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 10, 30); let lock = must_pessimistic_locked(&mut engine, key, 10, 30); + assert_eq!(lock.last_change_ts, TimeStamp::zero()); + assert_eq!(lock.versions_to_last_change, 0); + pessimistic_rollback::tests::must_success(&mut engine, key, 10, 30); + // Set cluster version to 6.5.0, last_change_ts should work now. + feature_gate.set_version("6.5.0").unwrap(); + must_succeed(&mut engine, key, key, 10, 30); + let lock = must_pessimistic_locked(&mut engine, key, 10, 30); assert_eq!(lock.last_change_ts, 20.into()); assert_eq!(lock.versions_to_last_change, 1); pessimistic_rollback::tests::must_success(&mut engine, key, 10, 30); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 78a891b650e..c7c69b5bbf4 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -9,6 +9,7 @@ use std::{ use collections::HashMap; use file_system::{set_io_type, IoType}; use kvproto::pdpb::QueryKind; +use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; use tikv_util::{ @@ -19,6 +20,7 @@ use tikv_util::{ use crate::storage::{ kv::{destroy_tls_engine, set_tls_engine, Engine, FlowStatsReporter, Statistics}, metrics::*, + test_util::latest_feature_gate, }; pub struct SchedLocalMetrics { @@ -28,13 +30,15 @@ pub struct SchedLocalMetrics { } thread_local! { - static TLS_SCHED_METRICS: RefCell = RefCell::new( + static TLS_SCHED_METRICS: RefCell = RefCell::new( SchedLocalMetrics { local_scan_details: HashMap::default(), command_keyread_histogram_vec: KV_COMMAND_KEYREAD_HISTOGRAM_VEC.local(), local_write_stats:WriteStats::default(), } ); + + static TLS_FEATURE_GATE: RefCell = RefCell::new(latest_feature_gate()); } #[derive(Clone)] @@ -58,6 +62,7 @@ impl SchedPool { engine: E, pool_size: usize, reporter: R, + feature_gate: FeatureGate, name_prefix: &str, ) -> Self { let engine = Arc::new(Mutex::new(engine)); @@ -75,6 +80,7 @@ impl SchedPool { .after_start(move || { set_tls_engine(engine.lock().unwrap().clone()); set_io_type(IoType::ForegroundWrite); + TLS_FEATURE_GATE.with(|c| *c.borrow_mut() = feature_gate.clone()); }) .before_stop(move || unsafe { // Safety: we ensure the `set_` and `destroy_` calls use the same engine type. @@ -134,3 +140,12 @@ pub fn tls_collect_keyread_histogram_vec(cmd: &str, count: f64) { .observe(count); }); } + +pub fn tls_can_enable(feature: Feature) -> bool { + TLS_FEATURE_GATE.with(|feature_gate| feature_gate.borrow().can_enable(feature)) +} + +#[cfg(test)] +pub fn set_tls_feature_gate(feature_gate: FeatureGate) { + TLS_FEATURE_GATE.with(|f| *f.borrow_mut() = feature_gate); +} diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 917c9fbaffc..9966e14812e 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -95,6 +95,7 @@ const TASKS_SLOTS_NUM: usize = 1 << 12; // 4096 slots. pub const DEFAULT_EXECUTION_DURATION_LIMIT: Duration = Duration::from_secs(24 * 60 * 60); const IN_MEMORY_PESSIMISTIC_LOCK: Feature = Feature::require(6, 0, 0); +pub const LAST_CHANGE_TS: Feature = Feature::require(6, 5, 0); /// Task is a running command. pub(super) struct Task { @@ -391,12 +392,14 @@ impl Scheduler { engine.clone(), config.scheduler_worker_pool_size, reporter.clone(), + feature_gate.clone(), "sched-worker-pool", ), high_priority_pool: SchedPool::new( engine, std::cmp::max(1, config.scheduler_worker_pool_size / 2), reporter, + feature_gate.clone(), "sched-high-pri-pool", ), control_mutex: Arc::new(tokio::sync::Mutex::new(false)), From 133769217ef897dbed04478de71d2c345973d867 Mon Sep 17 00:00:00 2001 From: Yexiang Zhang Date: Thu, 3 Nov 2022 23:46:00 +0800 Subject: [PATCH 0310/1149] *: upgrade pprof-rs to v0.11 (#13733) close tikv/tikv#13732, ref tikv/tikv#13732 Signed-off-by: mornyx Co-authored-by: Ti Chi Robot --- Cargo.lock | 49 ++++++++++++++++++++++++++++--------------------- Cargo.toml | 2 +- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25a68864586..d49c13ae18c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -296,7 +296,7 @@ dependencies = [ "tikv_util", "tokio", "url", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -344,7 +344,7 @@ dependencies = [ "serde_json", "thiserror", "url", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -389,7 +389,7 @@ dependencies = [ "serde_json", "thiserror", "url", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -512,7 +512,7 @@ dependencies = [ "tonic", "txn_types", "url", - "uuid", + "uuid 0.8.2", "walkdir", "yatp", ] @@ -1337,11 +1337,11 @@ dependencies = [ [[package]] name = "debugid" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91cf5a8c2f2097e2a32627123508635d47ce10563d999ec1a95addf08b502ba" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" dependencies = [ - "uuid", + "uuid 1.2.1", ] [[package]] @@ -3859,8 +3859,9 @@ dependencies = [ [[package]] name = "pprof" -version = "0.9.1" -source = "git+https://github.com/tikv/pprof-rs.git?rev=3fed55af8fc6cf69dbd954a0321c799c5a111e4e#3fed55af8fc6cf69dbd954a0321c799c5a111e4e" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e20150f965e0e4c925982b9356da71c84bcd56cb66ef4e894825837cbcf6613e" dependencies = [ "backtrace", "cfg-if 1.0.0", @@ -4263,7 +4264,7 @@ dependencies = [ "tokio", "tracker", "txn_types", - "uuid", + "uuid 0.8.2", "yatp", ] @@ -5442,7 +5443,7 @@ dependencies = [ "tikv_util", "tokio", "txn_types", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -5562,21 +5563,21 @@ checksum = "343f3f510c2915908f155e94f17220b19ccfacf2a64a2a5d8004f2c3e311e7fd" [[package]] name = "symbolic-common" -version = "8.0.0" +version = "10.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0caab39ce6f074031b8fd3dd297bfda70a2d1f33c6e7cc1b737ac401f856448d" +checksum = "ac457d054f793cedfde6f32d21d692b8351cfec9084fefd0470c0373f6d799bc" dependencies = [ "debugid", - "memmap", + "memmap2", "stable_deref_trait", - "uuid", + "uuid 1.2.1", ] [[package]] name = "symbolic-demangle" -version = "8.0.0" +version = "10.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b77ecb5460a87faa37ed53521eed8f073c8339b7a5788c1f93efc09ce74e1b68" +checksum = "48808b846eef84e0ac06365dc620f028ae632355e5dcffc007bf1b2bf5eab17b" dependencies = [ "rustc-demangle", "symbolic-common", @@ -5857,7 +5858,7 @@ dependencies = [ "engine_traits", "keys", "kvproto", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -5974,7 +5975,7 @@ dependencies = [ "tokio", "toml", "txn_types", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -6171,7 +6172,7 @@ dependencies = [ "tipb", "tipb_helper", "twoway", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -6295,7 +6296,7 @@ dependencies = [ "tracker", "txn_types", "url", - "uuid", + "uuid 0.8.2", "walkdir", "yatp", "zipf", @@ -6965,6 +6966,12 @@ dependencies = [ "serde", ] +[[package]] +name = "uuid" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" + [[package]] name = "valgrind_request" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 756f36a0c50..4ccf0a2ad93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,7 +129,7 @@ paste = "1.0" pd_client = { workspace = true } pin-project = "1.0" pnet_datalink = "0.23" -pprof = { git = "https://github.com/tikv/pprof-rs.git", rev = "3fed55af8fc6cf69dbd954a0321c799c5a111e4e", default-features = false, features = ["flamegraph", "protobuf-codec"] } +pprof = { version = "0.11", default-features = false, features = ["flamegraph", "protobuf-codec"] } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } From be76c441d714e85a381f3af8eb15caa40b7fb007 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 4 Nov 2022 10:00:01 +0800 Subject: [PATCH 0311/1149] Optimize slow-score mechanism to reduce the time-cost on recovery when IO hung. (#13654) close tikv/tikv#13648 1. Make the reporting of Slow-Score more timely. Optimize the reporting strategy of slow-score. Making it more timely, even if store-heartbeat is delayed because of IO delay in one TiKV node. 2. Awaken hibernated regions on healthy nodes timely when IO hang. If one TiKV node is hung by abnormal IO, we can awaken related hibernated regions in time, to trigger self-revoting in these raft-groups for recovery. Signed-off-by: Lucasliang --- components/raftstore/src/store/fsm/peer.rs | 12 +- components/raftstore/src/store/fsm/store.rs | 50 +++++++- components/raftstore/src/store/msg.rs | 5 + components/raftstore/src/store/worker/pd.rs | 121 ++++++++++++++++++-- components/tikv_util/src/store/mod.rs | 74 +++++++++++- components/tikv_util/src/store/peer.rs | 34 ------ components/tikv_util/src/store/region.rs | 15 +++ 7 files changed, 261 insertions(+), 50 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index b7f7b005137..69215ecaf70 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2660,7 +2660,17 @@ where match msg.get_extra_msg().get_type() { ExtraMessageType::MsgRegionWakeUp | ExtraMessageType::MsgCheckStalePeer => { if self.fsm.hibernate_state.group_state() == GroupState::Idle { - self.reset_raft_tick(GroupState::Ordered); + if msg.get_extra_msg().forcely_awaken { + // Forcely awaken this region by manually setting this GroupState + // into Chaos to trigger a new voting in this RaftGroup. + self.reset_raft_tick(if !self.fsm.peer.is_leader() { + GroupState::Chaos + } else { + GroupState::Ordered + }); + } else { + self.reset_raft_tick(GroupState::Ordered); + } } if msg.get_extra_msg().get_type() == ExtraMessageType::MsgRegionWakeUp && self.fsm.peer.is_leader() diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 2bb2ea636e1..0f172b6c70f 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -36,7 +36,7 @@ use kvproto::{ metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, - raft_serverpb::{ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, + raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; use pd_client::{Feature, FeatureGate, PdClient}; @@ -53,7 +53,7 @@ use tikv_util::{ info, is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, slow_log, - store::find_peer, + store::{find_peer, region_on_stores}, sys as sys_util, sys::disk::{get_disk_status, DiskUsage}, time::{duration_to_sec, Instant as TiInstant}, @@ -758,6 +758,9 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> drop(syncer); } StoreMsg::GcSnapshotFinish => self.register_snap_mgr_gc_tick(), + StoreMsg::AwakenRegions { abnormal_stores } => { + self.on_wake_up_regions(abnormal_stores); + } } } self.ctx @@ -2447,11 +2450,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER ); stats.set_query_stats(query_stats); - let store_info = StoreInfo { + let store_info = Some(StoreInfo { kv_engine: self.ctx.engines.kv.clone(), raft_engine: self.ctx.engines.raft.clone(), capacity: self.ctx.cfg.capacity.0, - }; + }); let task = PdTask::StoreHeartbeat { stats, @@ -2534,6 +2537,45 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.register_compact_lock_cf_tick(); } + fn on_wake_up_regions(&self, abnormal_stores: Vec) { + info!("try to wake up all hibernated regions in this store"; + "to_all" => abnormal_stores.is_empty()); + let meta = self.ctx.store_meta.lock().unwrap(); + for region_id in meta.regions.keys() { + let region = &meta.regions[region_id]; + // Check whether the current region is not found on abnormal stores. If so, + // this region is not the target to be awaken. + if !region_on_stores(region, &abnormal_stores) { + continue; + } + let peer = { + match find_peer(region, self.ctx.store_id()) { + None => continue, + Some(p) => p.clone(), + } + }; + { + // Send MsgRegionWakeUp to Peer for awakening hibernated regions. + let mut message = RaftMessage::default(); + message.set_region_id(*region_id); + message.set_from_peer(peer.clone()); + message.set_to_peer(peer); + message.set_region_epoch(region.get_region_epoch().clone()); + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgRegionWakeUp); + msg.forcely_awaken = true; + message.set_extra_msg(msg); + if let Err(e) = self.ctx.router.send_raft_message(message) { + error!( + "send awaken region message failed"; + "region_id" => region_id, + "err" => ?e + ); + } + } + } + } + fn register_pd_store_heartbeat_tick(&self) { self.ctx.schedule_store_tick( StoreTick::PdStoreHeartbeat, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index b86700af8e6..262f9fd64c5 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -845,6 +845,10 @@ where }, GcSnapshotFinish, + + AwakenRegions { + abnormal_stores: Vec, + }, } impl fmt::Debug for StoreMsg @@ -878,6 +882,7 @@ where write!(fmt, "UnsafeRecoveryCreatePeer") } StoreMsg::GcSnapshotFinish => write!(fmt, "GcSnapshotFinish"), + StoreMsg::AwakenRegions { .. } => write!(fmt, "AwakenRegions"), } } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index ec06d756fe9..fdfa1b44c85 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -53,6 +53,7 @@ use yatp::Remote; use crate::{ coprocessor::CoprocessorHost, + router::RaftStoreRouter, store::{ cmd_resp::new_error, metrics::*, @@ -151,7 +152,7 @@ where Heartbeat(HeartbeatTask), StoreHeartbeat { stats: pdpb::StoreStats, - store_info: StoreInfo, + store_info: Option>, report: Option, dr_autosync_status: Option, }, @@ -204,6 +205,9 @@ pub struct StoreStat { pub engine_last_total_bytes_read: u64, pub engine_last_total_keys_read: u64, pub engine_last_query_num: QueryStats, + pub engine_last_capacity_size: u64, + pub engine_last_used_size: u64, + pub engine_last_available_size: u64, pub last_report_ts: UnixSecs, pub region_bytes_read: LocalHistogram, @@ -229,6 +233,9 @@ impl Default for StoreStat { engine_total_keys_read: 0, engine_last_total_bytes_read: 0, engine_last_total_keys_read: 0, + engine_last_capacity_size: 0, + engine_last_used_size: 0, + engine_last_available_size: 0, engine_total_query_num: QueryStats::default(), engine_last_query_num: QueryStats::default(), @@ -733,6 +740,9 @@ fn hotspot_query_num_report_threshold() -> u64 { HOTSPOT_QUERY_RATE_THRESHOLD * 10 } +/// Max limitation of delayed store_heartbeat. +const STORE_HEARTBEAT_DELAY_LIMIT: u64 = 5 * 60; + // Slow score is a value that represents the speed of a store and ranges in [1, // 100]. It is maintained in the AIMD way. // If there are some inspecting requests timeout during a round, by default the @@ -829,6 +839,10 @@ impl SlowScore { self.last_update_time = Instant::now(); self.value } + + fn should_force_report_slow_store(&self) -> bool { + self.value >= OrderedFloat(100.0) && (self.last_tick_id % self.round_ticks == 0) + } } // RegionCpuMeteringCollector is used to collect the region-related CPU info. @@ -883,6 +897,7 @@ where // calls Runner's run() on Task received. scheduler: Scheduler>, stats_monitor: StatsMonitor, + store_heartbeat_interval: Duration, collector_reg_handle: CollectorRegHandle, region_cpu_records_collector: Option, @@ -958,6 +973,7 @@ where store_stat: StoreStat::default(), start_ts: UnixSecs::now(), scheduler, + store_heartbeat_interval, stats_monitor, collector_reg_handle, region_cpu_records_collector, @@ -1176,7 +1192,7 @@ where fn handle_store_heartbeat( &mut self, mut stats: pdpb::StoreStats, - store_info: StoreInfo, + store_info: Option>, store_report: Option, dr_autosync_status: Option, ) { @@ -1207,13 +1223,27 @@ where } stats = collect_report_read_peer_stats(HOTSPOT_REPORT_CAPACITY, report_peers, stats); - let (capacity, used_size, available) = match collect_engine_size( - &self.coprocessor_host, - Some(&store_info), - self.snap_mgr.get_total_snap_size().unwrap(), - ) { - Some((capacity, used_size, available)) => (capacity, used_size, available), - None => return, + let (capacity, used_size, available) = if store_info.is_some() { + match collect_engine_size( + &self.coprocessor_host, + store_info.as_ref(), + self.snap_mgr.get_total_snap_size().unwrap(), + ) { + Some((capacity, used_size, available)) => { + // Update last reported infos on engine_size. + self.store_stat.engine_last_capacity_size = capacity; + self.store_stat.engine_last_used_size = used_size; + self.store_stat.engine_last_available_size = available; + (capacity, used_size, available) + } + None => return, + } + } else { + ( + self.store_stat.engine_last_capacity_size, + self.store_stat.engine_last_used_size, + self.store_stat.engine_last_available_size, + ) }; stats.set_capacity(capacity); @@ -1251,7 +1281,14 @@ where self.store_stat .engine_last_query_num .fill_query_stats(&self.store_stat.engine_total_query_num); - self.store_stat.last_report_ts = UnixSecs::now(); + self.store_stat.last_report_ts = if store_info.is_some() { + UnixSecs::now() + } else { + // If `store_info` is None, the given Task::StoreHeartbeat should be a fake + // heartbeat to PD, we won't update the last_report_ts to avoid incorrectly + // marking current TiKV node in normal state. + self.store_stat.last_report_ts + }; self.store_stat.region_bytes_written.flush(); self.store_stat.region_keys_written.flush(); self.store_stat.region_bytes_read.flush(); @@ -1338,6 +1375,14 @@ where } } } + // Forcely awaken all hibernated regions if there existed slow stores in this + // cluster. + if let Some(awaken_regions) = resp.awaken_regions.take() { + info!("forcely awaken hibernated regions in this store"); + let _ = router.send_store_msg(StoreMsg::AwakenRegions { + abnormal_stores: awaken_regions.get_abnormal_stores().to_vec(), + }); + } } Err(e) => { error!("store heartbeat failed"; "err" => ?e); @@ -1786,6 +1831,55 @@ where health_service.set_serving_status("", status); } } + + /// Force to send a special heartbeat to pd when current store is hung on + /// some special circumstances, i.e. disk busy, handler busy and others. + fn handle_fake_store_heartbeat(&mut self) { + let mut stats = pdpb::StoreStats::default(); + stats.set_store_id(self.store_id); + stats.set_region_count(self.region_peers.len() as u32); + + let snap_stats = self.snap_mgr.stats(); + stats.set_sending_snap_count(snap_stats.sending_count as u32); + stats.set_receiving_snap_count(snap_stats.receiving_count as u32); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC + .with_label_values(&["sending"]) + .set(snap_stats.sending_count as i64); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC + .with_label_values(&["receiving"]) + .set(snap_stats.receiving_count as i64); + + stats.set_start_time(self.start_ts.into_inner() as u32); + + // This calling means that the current node cannot report heartbeat in normaly + // scheduler. That is, the current node must in `busy` state. + stats.set_is_busy(true); + + // We do not need to report store_info, so we just set `None` here. + let task = Task::StoreHeartbeat { + stats, + store_info: None, + report: None, + dr_autosync_status: None, + }; + if let Err(e) = self.scheduler.schedule(task) { + error!("force report store heartbeat failed"; + "store_id" => self.store_id, + "err" => ?e + ); + } else { + warn!("scheduling store_heartbeat timeout, force report store slow score to pd."; + "store_id" => self.store_id, + ); + } + } + + fn is_store_heartbeat_delayed(&self) -> bool { + let now = UnixSecs::now(); + let interval_second = now.into_inner() - self.store_stat.last_report_ts.into_inner(); + (interval_second >= self.store_heartbeat_interval.as_secs()) + && (interval_second <= STORE_HEARTBEAT_DELAY_LIMIT) + } } fn calculate_region_cpu_records( @@ -2065,6 +2159,13 @@ where } if !self.slow_score.last_tick_finished { self.slow_score.record_timeout(); + // If the last slow_score already reached abnormal state and was delayed for + // reporting by `store-heartbeat` to PD, we should report it here manually as + // a FAKE `store-heartbeat`. + if self.slow_score.should_force_report_slow_store() && self.is_store_heartbeat_delayed() + { + self.handle_fake_store_heartbeat(); + } } let scheduler = self.scheduler.clone(); let id = self.slow_score.last_tick_id + 1; diff --git a/components/tikv_util/src/store/mod.rs b/components/tikv_util/src/store/mod.rs index 81afff2975a..f4bfea93519 100644 --- a/components/tikv_util/src/store/mod.rs +++ b/components/tikv_util/src/store/mod.rs @@ -9,6 +9,78 @@ pub use self::{ query_stats::{is_read_query, QueryStats}, region::{ check_key_in_region, check_key_in_region_exclusive, check_key_in_region_inclusive, - region_on_same_stores, + region_on_same_stores, region_on_stores, }, }; + +#[cfg(test)] +mod tests { + use kvproto::metapb::Region; + + use super::*; + + #[test] + fn test_on_same_store() { + let cases = vec![ + (vec![2, 3, 4], vec![], vec![1, 2, 3], vec![], false), + (vec![2, 3, 1], vec![], vec![1, 2, 3], vec![], true), + (vec![2, 3, 4], vec![], vec![1, 2], vec![], false), + (vec![1, 2, 3], vec![], vec![1, 2, 3], vec![], true), + (vec![1, 3], vec![2, 4], vec![1, 2], vec![3, 4], false), + (vec![1, 3], vec![2, 4], vec![1, 3], vec![], false), + (vec![1, 3], vec![2, 4], vec![], vec![2, 4], false), + (vec![1, 3], vec![2, 4], vec![3, 1], vec![4, 2], true), + ]; + + for (s1, s2, s3, s4, exp) in cases { + let mut r1 = Region::default(); + for (store_id, peer_id) in s1.into_iter().zip(0..) { + r1.mut_peers().push(new_peer(store_id, peer_id)); + } + for (store_id, peer_id) in s2.into_iter().zip(0..) { + r1.mut_peers().push(new_learner_peer(store_id, peer_id)); + } + + let mut r2 = Region::default(); + for (store_id, peer_id) in s3.into_iter().zip(10..) { + r2.mut_peers().push(new_peer(store_id, peer_id)); + } + for (store_id, peer_id) in s4.into_iter().zip(10..) { + r2.mut_peers().push(new_learner_peer(store_id, peer_id)); + } + let res = region_on_same_stores(&r1, &r2); + assert_eq!(res, exp, "{:?} vs {:?}", r1, r2); + } + } + + #[test] + fn test_check_region_on_store() { + let cases = vec![ + (vec![1, 2, 3], vec![], vec![], true), + (vec![2, 3, 1], vec![], vec![1], true), + (vec![1, 3, 2], vec![], vec![2, 3], true), + (vec![3, 2, 1], vec![], vec![4], false), + (vec![1, 2, 3], vec![], vec![2, 4], true), + (vec![1, 3], vec![2, 4], vec![2], true), + (vec![1, 3], vec![2, 4], vec![2, 3], true), + (vec![1, 3], vec![2], vec![4], false), + ]; + + for (s1, s2, target_stores, exp) in cases { + let mut region = Region::default(); + for (store_id, peer_id) in s1.into_iter().zip(0..) { + region.mut_peers().push(new_peer(store_id, peer_id)); + } + for (store_id, peer_id) in s2.into_iter().zip(0..) { + region.mut_peers().push(new_learner_peer(store_id, peer_id)); + } + + let res = region_on_stores(®ion, &target_stores); + assert_eq!( + res, exp, + "region {:?} exists on {:?}", + region, target_stores + ); + } + } +} diff --git a/components/tikv_util/src/store/peer.rs b/components/tikv_util/src/store/peer.rs index 1a9184134f0..59844bc957a 100644 --- a/components/tikv_util/src/store/peer.rs +++ b/components/tikv_util/src/store/peer.rs @@ -63,38 +63,4 @@ mod tests { assert!(remove_peer(&mut region, 1).is_none()); assert!(find_peer(®ion, 1).is_none()); } - - #[test] - fn test_on_same_store() { - let cases = vec![ - (vec![2, 3, 4], vec![], vec![1, 2, 3], vec![], false), - (vec![2, 3, 1], vec![], vec![1, 2, 3], vec![], true), - (vec![2, 3, 4], vec![], vec![1, 2], vec![], false), - (vec![1, 2, 3], vec![], vec![1, 2, 3], vec![], true), - (vec![1, 3], vec![2, 4], vec![1, 2], vec![3, 4], false), - (vec![1, 3], vec![2, 4], vec![1, 3], vec![], false), - (vec![1, 3], vec![2, 4], vec![], vec![2, 4], false), - (vec![1, 3], vec![2, 4], vec![3, 1], vec![4, 2], true), - ]; - - for (s1, s2, s3, s4, exp) in cases { - let mut r1 = Region::default(); - for (store_id, peer_id) in s1.into_iter().zip(0..) { - r1.mut_peers().push(new_peer(store_id, peer_id)); - } - for (store_id, peer_id) in s2.into_iter().zip(0..) { - r1.mut_peers().push(new_learner_peer(store_id, peer_id)); - } - - let mut r2 = Region::default(); - for (store_id, peer_id) in s3.into_iter().zip(10..) { - r2.mut_peers().push(new_peer(store_id, peer_id)); - } - for (store_id, peer_id) in s4.into_iter().zip(10..) { - r2.mut_peers().push(new_learner_peer(store_id, peer_id)); - } - let res = super::super::region_on_same_stores(&r1, &r2); - assert_eq!(res, exp, "{:?} vs {:?}", r1, r2); - } - } } diff --git a/components/tikv_util/src/store/region.rs b/components/tikv_util/src/store/region.rs index 17c3209e7d4..580d940ebeb 100644 --- a/components/tikv_util/src/store/region.rs +++ b/components/tikv_util/src/store/region.rs @@ -38,6 +38,21 @@ pub fn region_on_same_stores(lhs: &Region, rhs: &Region) -> bool { }) } +/// Check if the given region exists on stores, by checking whether any one of +/// the peers belonging to this region exist on the given stores. +pub fn region_on_stores(region: &Region, store_ids: &Vec) -> bool { + if store_ids.is_empty() { + return true; + } + // If one of peers in this region exists on any on in `store_ids`, it shows that + // the region exists on the given stores. + region.get_peers().iter().any(|p| { + store_ids + .iter() + .any(|store_id| *store_id == p.get_store_id()) + }) +} + #[cfg(test)] mod tests { use super::*; From c627407cbe265cab4965b4c8b5f324cee7e844bd Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 4 Nov 2022 10:16:01 +0800 Subject: [PATCH 0312/1149] *: resolve when flashback meets error in the first batch (#13695) close tikv/tikv#13672, close tikv/tikv#13704, close tikv/tikv#13723 resolve when flashback meets error in the first batch. Signed-off-by: husharp Signed-off-by: JmPotato Co-authored-by: JmPotato Co-authored-by: Ti Chi Robot --- components/test_raftstore/src/util.rs | 35 ++- src/server/service/kv.rs | 28 +-- src/storage/mod.rs | 73 +++---- src/storage/mvcc/reader/reader.rs | 2 +- .../txn/actions/flashback_to_version.rs | 199 ++++++++++-------- .../txn/commands/flashback_to_version.rs | 109 ++++++---- .../flashback_to_version_read_phase.rs | 182 ++++++++++------ src/storage/txn/commands/mod.rs | 11 +- src/storage/txn/mod.rs | 4 +- tests/integrations/server/kv_service.rs | 133 ++++++++++-- 10 files changed, 517 insertions(+), 259 deletions(-) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 3718dbce906..1e35dc0cf13 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -818,6 +818,35 @@ pub fn must_kv_read_equal(client: &TikvClient, ctx: Context, key: Vec, val: assert_eq!(get_resp.take_value(), val); } +// TODO: replace the redundant code +pub fn complete_data_commit(client: &TikvClient, ctx: &Context, ts: u64, k: Vec, v: Vec) { + // Prewrite + let prewrite_start_version = ts + 1; + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v.clone()); + must_kv_prewrite( + client, + ctx.clone(), + vec![mutation], + k.clone(), + prewrite_start_version, + ); + // Commit + let commit_version = ts + 2; + must_kv_commit( + client, + ctx.clone(), + vec![k.clone()], + prewrite_start_version, + commit_version, + commit_version, + ); + // Get + must_kv_read_equal(client, ctx.clone(), k, v, ts + 3); +} + pub fn kv_read(client: &TikvClient, ctx: Context, key: Vec, ts: u64) -> GetResponse { let mut get_req = GetRequest::default(); get_req.set_context(ctx); @@ -1224,7 +1253,7 @@ pub fn must_flashback_to_version( version: u64, start_ts: u64, commit_ts: u64, -) -> FlashbackToVersionResponse { +) { let mut prepare_req = PrepareFlashbackToVersionRequest::default(); prepare_req.set_context(ctx.clone()); client @@ -1237,7 +1266,9 @@ pub fn must_flashback_to_version( req.version = version; req.start_key = b"a".to_vec(); req.end_key = b"z".to_vec(); - client.kv_flashback_to_version(&req).unwrap() + let resp = client.kv_flashback_to_version(&req).unwrap(); + assert!(!resp.has_region_error()); + assert!(resp.get_error().is_empty()); } // A helpful wrapper to make the test logic clear diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 8ac91031c33..7fc5bb77f31 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1779,24 +1779,26 @@ fn future_flashback_to_version< Err(e) => Err(e), Ok(_) => f.await?, }; - fail_point!("skip_finish_flashback_to_version", |_| { - Ok(FlashbackToVersionResponse::default()) - }); - // Send an `AdminCmdType::FinishFlashback` to unset the persistence state - // in `RegionLocalState` and region's meta, and when that - // admin cmd is applied, will update the memory - // state of the flashback - send_flashback_msg::( - &raft_router, - req.get_context(), - AdminCmdType::FinishFlashback, - ) - .await?; let mut resp = FlashbackToVersionResponse::default(); if let Some(err) = extract_region_error(&v) { resp.set_region_error(err); } else if let Err(e) = v { resp.set_error(format!("{}", e)); + } else { + // Only finish flashback when Flashback executed successfully. + fail_point!("skip_finish_flashback_to_version", |_| { + Ok(FlashbackToVersionResponse::default()) + }); + // Send an `AdminCmdType::FinishFlashback` to unset the persistence state + // in `RegionLocalState` and region's meta, and when that + // admin cmd is applied, will update the memory + // state of the flashback + send_flashback_msg::( + &raft_router, + req.get_context(), + AdminCmdType::FinishFlashback, + ) + .await?; } Ok(resp) } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 16043a348ce..1c2688dd8a8 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3472,7 +3472,7 @@ mod tests { use super::{ mvcc::tests::{must_unlocked, must_written}, test_util::*, - txn::FLASHBACK_BATCH_SIZE, + txn::{commands::new_flashback_to_version_read_phase_cmd, FLASHBACK_BATCH_SIZE}, *, }; use crate::{ @@ -4745,13 +4745,12 @@ mod tests { let version = write.2; storage .sched_txn_command( - commands::FlashbackToVersionReadPhase::new( + new_flashback_to_version_read_phase_cmd( start_ts, commit_ts, version, - None, - Some(key.clone()), - Some(key.clone()), + key.clone(), + Key::from_raw(b"z"), Context::default(), ), expect_ok_callback(tx.clone(), 2), @@ -4836,13 +4835,12 @@ mod tests { let commit_ts = *ts.incr(); storage .sched_txn_command( - commands::FlashbackToVersionReadPhase::new( + new_flashback_to_version_read_phase_cmd( start_ts, commit_ts, 2.into(), - None, - Some(Key::from_raw(b"k")), - Some(Key::from_raw(b"k")), + Key::from_raw(b"k"), + Key::from_raw(b"z"), Context::default(), ), expect_ok_callback(tx.clone(), 3), @@ -4859,13 +4857,12 @@ mod tests { let commit_ts = *ts.incr(); storage .sched_txn_command( - commands::FlashbackToVersionReadPhase::new( + new_flashback_to_version_read_phase_cmd( start_ts, commit_ts, 1.into(), - None, - Some(Key::from_raw(b"k")), - Some(Key::from_raw(b"k")), + Key::from_raw(b"k"), + Key::from_raw(b"z"), Context::default(), ), expect_ok_callback(tx, 4), @@ -4950,29 +4947,33 @@ mod tests { .0, ); } - // Flashback all records. - storage - .sched_txn_command( - commands::FlashbackToVersionReadPhase::new( - *ts.incr(), - *ts.incr(), - TimeStamp::zero(), - None, - Some(Key::from_raw(b"k")), - Some(Key::from_raw(b"k")), - Context::default(), - ), - expect_ok_callback(tx, 2), - ) - .unwrap(); - rx.recv().unwrap(); - for i in 1..=FLASHBACK_BATCH_SIZE * 4 { - let key = Key::from_raw(format!("k{}", i).as_bytes()); - expect_none( - block_on(storage.get(Context::default(), key, *ts.incr())) - .unwrap() - .0, - ); + // Flashback all records multiple times to make sure the flashback operation is + // idempotent. + let flashback_start_ts = *ts.incr(); + let flashback_commit_ts = *ts.incr(); + for _ in 0..10 { + storage + .sched_txn_command( + new_flashback_to_version_read_phase_cmd( + flashback_start_ts, + flashback_commit_ts, + TimeStamp::zero(), + Key::from_raw(b"k"), + Key::from_raw(b"z"), + Context::default(), + ), + expect_ok_callback(tx.clone(), 2), + ) + .unwrap(); + rx.recv().unwrap(); + for i in 1..=FLASHBACK_BATCH_SIZE * 4 { + let key = Key::from_raw(format!("k{}", i).as_bytes()); + expect_none( + block_on(storage.get(Context::default(), key, *ts.incr())) + .unwrap() + .0, + ); + } } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 321cc21427f..d4767f3bb1a 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -2006,7 +2006,7 @@ pub mod tests { ], expect_is_remain: true, }, - // k1 and k2 have old version writes at version 8. + // k1 and k2 have old version writes at version 3. Case { start_key: None, end_key: None, diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 96f80b9389c..5a86a6caa7d 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -12,16 +12,13 @@ pub const FLASHBACK_BATCH_SIZE: usize = 256 + 1 /* To store the next key for mul pub fn flashback_to_version_read_lock( reader: &mut MvccReader, - next_lock_key: &Option, - end_key: &Option, + next_lock_key: Key, + end_key: &Key, statistics: &mut Statistics, ) -> TxnResult<(Vec<(Key, Lock)>, bool)> { - if next_lock_key.is_none() { - return Ok((vec![], false)); - } let key_locks_result = reader.scan_locks( - next_lock_key.as_ref(), - end_key.as_ref(), + Some(&next_lock_key), + Some(end_key), // To flashback `CF_LOCK`, we need to delete all locks. |_| true, FLASHBACK_BATCH_SIZE, @@ -32,74 +29,82 @@ pub fn flashback_to_version_read_lock( pub fn flashback_to_version_read_write( reader: &mut MvccReader, - key_locks_len: usize, - next_write_key: &Option, - end_key: &Option, + next_write_key: Key, + end_key: &Key, flashback_version: TimeStamp, flashback_start_ts: TimeStamp, flashback_commit_ts: TimeStamp, statistics: &mut Statistics, -) -> TxnResult<(Vec<(Key, Option)>, bool)> { - if next_write_key.is_none() { - return Ok((vec![], false)); - } else if key_locks_len >= FLASHBACK_BATCH_SIZE { - // The batch is full, we need to read the writes in the next batch later. - return Ok((vec![], true)); - } +) -> TxnResult)>> { // To flashback the data, we need to get all the latest keys first by scanning // every unique key in `CF_WRITE` and to get its corresponding old MVCC write // record if exists. - let (key_ts_old_writes, has_remain_writes) = reader.scan_writes( - next_write_key.as_ref(), - end_key.as_ref(), - Some(flashback_version), - // No need to find an old version for the key if its latest `commit_ts` is smaller - // than or equal to the version. - |key| key.decode_ts().unwrap_or(TimeStamp::zero()) > flashback_version, - FLASHBACK_BATCH_SIZE - key_locks_len, - )?; - statistics.add(&reader.statistics); - let mut key_old_writes = Vec::with_capacity(FLASHBACK_BATCH_SIZE - key_locks_len); - // Check the latest commit ts to make sure there is no commit change during the - // flashback, otherwise, we need to abort the flashback. - for (key, commit_ts, old_write) in key_ts_old_writes { - if commit_ts > flashback_commit_ts { - return Err(Error::from(ErrorInner::InvalidTxnTso { - start_ts: flashback_start_ts, - commit_ts: flashback_commit_ts, - })); + let mut key_old_writes = Vec::with_capacity(FLASHBACK_BATCH_SIZE); + let mut has_remain_writes = true; + let mut next_write_key = next_write_key; + // Try to read as many writes as possible in one batch. + while key_old_writes.len() < FLASHBACK_BATCH_SIZE && has_remain_writes { + let key_ts_old_writes; + (key_ts_old_writes, has_remain_writes) = reader.scan_writes( + Some(&next_write_key), + Some(end_key), + Some(flashback_version), + // No need to find an old version for the key if its latest `commit_ts` is smaller + // than or equal to the version. + |key| key.decode_ts().unwrap_or(TimeStamp::zero()) > flashback_version, + FLASHBACK_BATCH_SIZE - key_old_writes.len(), + )?; + statistics.add(&reader.statistics); + // If `has_remain_writes` is true, it means that the batch is full and we may + // need to read another round, so we have to update the `next_write_key` here. + if has_remain_writes { + next_write_key = key_ts_old_writes + .last() + .map(|(key, ..)| key.clone()) + .unwrap(); } - // Since the first flashback preparation phase make sure there will be no writes - // other than flashback after it, so we need to check if there is already a - // successful flashback result, and if so, just finish the flashback ASAP. - if commit_ts == flashback_commit_ts { - key_old_writes.clear(); - return Ok((key_old_writes, false)); + // Check the latest commit ts to make sure there is no commit change during the + // flashback, otherwise, we need to abort the flashback. + for (key, commit_ts, old_write) in key_ts_old_writes.into_iter() { + if commit_ts > flashback_commit_ts { + return Err(Error::from(ErrorInner::InvalidTxnTso { + start_ts: flashback_start_ts, + commit_ts: flashback_commit_ts, + })); + } + // Although the first flashback preparation phase makes sure there will be no + // writes other than flashback after it, we CAN NOT return directly here. + // Suppose the second phase procedure contains two batches to flashback. After + // the first batch is committed, if the region is down, the client will retry + // the flashback from the very first beginning, because the data in the + // first batch has been written the flashbacked data with the same + // `commit_ts`, So we need to skip it to ensure the following data will + // be flashbacked continuously. + // And some large key modifications will exceed the max txn size limit + // through the execution, the write will forcibly finish the batch of data. + // So it may happen that part of the keys in a batch may be flashbacked. + if commit_ts == flashback_commit_ts { + continue; + } + key_old_writes.push((key, old_write)); } - key_old_writes.push((key, old_write)); } - Ok((key_old_writes, has_remain_writes)) + Ok(key_old_writes) } -pub fn flashback_to_version( +// To flashback the `CF_LOCK`, we need to delete all locks records whose +// `start_ts` is greater than the specified version, and if it's not a +// short-value `LockType::Put`, we need to delete the actual data from +// `CF_DEFAULT` as well. +// TODO: `resolved_ts` should be taken into account. +pub fn flashback_to_version_lock( txn: &mut MvccTxn, reader: &mut SnapshotReader, - next_lock_key: &mut Option, - next_write_key: &mut Option, key_locks: Vec<(Key, Lock)>, - key_old_writes: Vec<(Key, Option)>, - start_ts: TimeStamp, - commit_ts: TimeStamp, -) -> TxnResult { - // To flashback the `CF_LOCK`, we need to delete all locks records whose - // `start_ts` is greater than the specified version, and if it's not a - // short-value `LockType::Put`, we need to delete the actual data from - // `CF_DEFAULT` as well. - // TODO: `resolved_ts` should be taken into account. +) -> TxnResult> { for (key, lock) in key_locks { if txn.write_size() >= MAX_TXN_WRITE_SIZE { - *next_lock_key = Some(key); - break; + return Ok(Some(key)); } // To guarantee rollback with start ts of the locks reader.start_ts = lock.ts; @@ -112,18 +117,37 @@ pub fn flashback_to_version( true, )?; } - // To flashback the `CF_WRITE` and `CF_DEFAULT`, we need to write a new MVCC - // record for each key in `self.keys` with its old value at `self.version`, - // specifically, the flashback will have the following behavior: - // - If a key doesn't exist at `self.version`, it will be put a - // `WriteType::Delete`. - // - If a key exists at `self.version`, it will be put the exact same record - // in `CF_WRITE` and `CF_DEFAULT` if needed with `self.commit_ts` and - // `self.start_ts`. + Ok(None) +} + +// To flashback the `CF_WRITE` and `CF_DEFAULT`, we need to write a new MVCC +// record for each key in `self.keys` with its old value at `self.version`, +// specifically, the flashback will have the following behavior: +// - If a key doesn't exist at `self.version`, it will be put a +// `WriteType::Delete`. +// - If a key exists at `self.version`, it will be put the exact same record +// in `CF_WRITE` and `CF_DEFAULT` if needed with `self.commit_ts` and +// `self.start_ts`. +pub fn flashback_to_version_write( + txn: &mut MvccTxn, + reader: &mut SnapshotReader, + key_old_writes: Vec<(Key, Option)>, + start_ts: TimeStamp, + commit_ts: TimeStamp, +) -> TxnResult> { for (key, old_write) in key_old_writes { + #[cfg(feature = "failpoints")] + { + let should_skip = || { + fail::fail_point!("flashback_skip_1_key_in_write", |_| true); + false + }; + if should_skip() { + continue; + } + } if txn.write_size() >= MAX_TXN_WRITE_SIZE { - *next_write_key = Some(key); - break; + return Ok(Some(key.clone())); } let new_write = if let Some(old_write) = old_write { // If it's not a short value and it's a `WriteType::Put`, we should put the old @@ -135,7 +159,11 @@ pub fn flashback_to_version( reader.load_data(&key, old_write.clone())?, ); } - Write::new(old_write.write_type, start_ts, old_write.short_value) + Write::new( + old_write.write_type, + start_ts, + old_write.short_value.clone(), + ) } else { // If the old write doesn't exist, we should put a `WriteType::Delete` record to // delete the current key when needed. @@ -148,7 +176,7 @@ pub fn flashback_to_version( }; txn.put_write(key.clone(), commit_ts, new_write.as_ref().to_bytes()); } - Ok(txn.modifies.len()) + Ok(None) } #[cfg(test)] @@ -179,43 +207,48 @@ pub mod tests { start_ts: impl Into, commit_ts: impl Into, ) -> usize { + let next_key = Key::from_raw(keys::next_key(key).as_slice()); let key = Key::from_raw(key); let (version, start_ts, commit_ts) = (version.into(), start_ts.into(), commit_ts.into()); let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); let mut statistics = Statistics::default(); + // Flashback the locks. let (key_locks, has_remain_locks) = - flashback_to_version_read_lock(&mut reader, &Some(key.clone()), &None, &mut statistics) + flashback_to_version_read_lock(&mut reader, key.clone(), &next_key, &mut statistics) .unwrap(); assert!(!has_remain_locks); - let (key_old_writes, has_remain_writes) = flashback_to_version_read_write( + let cm = ConcurrencyManager::new(TimeStamp::zero()); + let mut txn = MvccTxn::new(start_ts, cm.clone()); + let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); + flashback_to_version_lock(&mut txn, &mut snap_reader, key_locks).unwrap(); + let mut rows = txn.modifies.len(); + write(engine, &ctx, txn.into_modifies()); + // Flashback the writes. + let key_old_writes = flashback_to_version_read_write( &mut reader, - 0, - &Some(key.clone()), - &None, + key, + &next_key, version, start_ts, commit_ts, &mut statistics, ) .unwrap(); - assert!(!has_remain_writes); - let cm = ConcurrencyManager::new(TimeStamp::zero()); let mut txn = MvccTxn::new(start_ts, cm); let snapshot = engine.snapshot(Default::default()).unwrap(); - let mut reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); - let rows = flashback_to_version( + let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); + flashback_to_version_write( &mut txn, - &mut reader, - &mut None, - &mut Some(key), - key_locks, + &mut snap_reader, key_old_writes, start_ts, commit_ts, ) .unwrap(); + rows += txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); rows } diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 9b198724e3b..f20fd957ed7 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -1,18 +1,22 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use txn_types::{Key, Lock, TimeStamp, Write}; +use std::mem; + +use txn_types::{Key, TimeStamp}; use crate::storage::{ kv::WriteData, lock_manager::LockManager, mvcc::{MvccTxn, SnapshotReader}, txn::{ + actions::flashback_to_version::{flashback_to_version_lock, flashback_to_version_write}, commands::{ - Command, CommandExt, FlashbackToVersionReadPhase, ReaderWithStats, ReleasedLocks, - ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, + Command, CommandExt, FlashbackToVersionReadPhase, FlashbackToVersionState, + ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, + WriteContext, WriteResult, }, - flashback_to_version, latch, Result, + latch, Result, }, ProcessResult, Snapshot, }; @@ -25,11 +29,9 @@ command! { start_ts: TimeStamp, commit_ts: TimeStamp, version: TimeStamp, - end_key: Option, - next_lock_key: Option, - next_write_key: Option, - key_locks: Vec<(Key, Lock)>, - key_old_writes: Vec<(Key, Option)>, + start_key: Key, + end_key: Key, + state: FlashbackToVersionState, } } @@ -39,24 +41,27 @@ impl CommandExt for FlashbackToVersion { request_type!(KvFlashbackToVersion); fn gen_lock(&self) -> latch::Lock { - latch::Lock::new( - self.key_locks - .iter() - .map(|(key, _)| key) - .chain(self.key_old_writes.iter().map(|(key, _)| key)), - ) + match &self.state { + FlashbackToVersionState::ScanLock { key_locks, .. } => { + latch::Lock::new(key_locks.iter().map(|(key, _)| key)) + } + FlashbackToVersionState::ScanWrite { key_old_writes, .. } => { + latch::Lock::new(key_old_writes.iter().map(|(key, _)| key)) + } + } } fn write_bytes(&self) -> usize { - self.key_locks - .iter() - .map(|(key, _)| key.as_encoded().len()) - .chain( - self.key_old_writes - .iter() - .map(|(key, _)| key.as_encoded().len()), - ) - .sum() + match &self.state { + FlashbackToVersionState::ScanLock { key_locks, .. } => key_locks + .iter() + .map(|(key, _)| key.as_encoded().len()) + .sum(), + FlashbackToVersionState::ScanWrite { key_old_writes, .. } => key_old_writes + .iter() + .map(|(key, _)| key.as_encoded().len()) + .sum(), + } } } @@ -67,42 +72,58 @@ impl WriteCommand for FlashbackToVersion { context.statistics, ); let mut txn = MvccTxn::new(TimeStamp::zero(), context.concurrency_manager); - - let mut next_lock_key = self.next_lock_key.take(); - let mut next_write_key = self.next_write_key.take(); - let rows = flashback_to_version( - &mut txn, - &mut reader, - &mut next_lock_key, - &mut next_write_key, - self.key_locks, - self.key_old_writes, - self.start_ts, - self.commit_ts, - )?; + // The state must be `ScanLock` or `ScanWrite` here. + match self.state { + FlashbackToVersionState::ScanLock { + ref mut next_lock_key, + ref mut key_locks, + } => { + if let Some(new_next_lock_key) = + flashback_to_version_lock(&mut txn, &mut reader, mem::take(key_locks))? + { + *next_lock_key = new_next_lock_key; + } + } + FlashbackToVersionState::ScanWrite { + ref mut next_write_key, + ref mut key_old_writes, + } => { + if let Some(new_next_write_key) = flashback_to_version_write( + &mut txn, + &mut reader, + mem::take(key_old_writes), + self.start_ts, + self.commit_ts, + )? { + *next_write_key = new_next_write_key; + } + } + } + let rows = txn.modifies.len(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.extra.for_flashback = true; Ok(WriteResult { ctx: self.ctx.clone(), to_be_write: write_data, rows, - pr: if next_lock_key.is_none() && next_write_key.is_none() { - ProcessResult::Res - } else { + pr: (move || { + fail_point!("flashback_failed_after_first_batch", |_| { + ProcessResult::Res + }); let next_cmd = FlashbackToVersionReadPhase { - ctx: self.ctx.clone(), + ctx: self.ctx, deadline: self.deadline, start_ts: self.start_ts, commit_ts: self.commit_ts, version: self.version, + start_key: self.start_key, end_key: self.end_key, - next_lock_key, - next_write_key, + state: self.state, }; ProcessResult::NextCommand { cmd: Command::FlashbackToVersionReadPhase(next_cmd), } - }, + })(), lock_info: None, released_locks: ReleasedLocks::new(), lock_guards: vec![], diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 47348c8e188..d74c6f8d708 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use txn_types::{Key, TimeStamp}; +use txn_types::{Key, Lock, TimeStamp, Write}; use crate::storage::{ mvcc::MvccReader, @@ -13,9 +13,43 @@ use crate::storage::{ sched_pool::tls_collect_keyread_histogram_vec, Error, ErrorInner, Result, }, - ScanMode, Snapshot, Statistics, + Context, ScanMode, Snapshot, Statistics, }; +#[derive(Debug)] +pub enum FlashbackToVersionState { + ScanLock { + next_lock_key: Key, + key_locks: Vec<(Key, Lock)>, + }, + ScanWrite { + next_write_key: Key, + key_old_writes: Vec<(Key, Option)>, + }, +} + +pub fn new_flashback_to_version_read_phase_cmd( + start_ts: TimeStamp, + commit_ts: TimeStamp, + version: TimeStamp, + start_key: Key, + end_key: Key, + ctx: Context, +) -> TypedCommand<()> { + FlashbackToVersionReadPhase::new( + start_ts, + commit_ts, + version, + start_key.clone(), + end_key, + FlashbackToVersionState::ScanLock { + next_lock_key: start_key, + key_locks: Vec::new(), + }, + ctx, + ) +} + command! { FlashbackToVersionReadPhase: cmd_ty => (), @@ -24,9 +58,9 @@ command! { start_ts: TimeStamp, commit_ts: TimeStamp, version: TimeStamp, - end_key: Option, - next_lock_key: Option, - next_write_key: Option, + start_key: Key, + end_key: Key, + state: FlashbackToVersionState, } } @@ -58,62 +92,90 @@ impl ReadCommand for FlashbackToVersionReadPhase { commit_ts: self.commit_ts, })); } + let tag = self.tag().get_str(); + let mut read_again = false; let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); - // Scan the locks. - let (key_locks, has_remain_locks) = flashback_to_version_read_lock( - &mut reader, - &self.next_lock_key, - &self.end_key, - statistics, - )?; - // Scan the writes. - let (mut key_old_writes, has_remain_writes) = flashback_to_version_read_write( - &mut reader, - key_locks.len(), - &self.next_write_key, - &self.end_key, - self.version, - self.start_ts, - self.commit_ts, - statistics, - )?; - tls_collect_keyread_histogram_vec( - self.tag().get_str(), - (key_locks.len() + key_old_writes.len()) as f64, - ); - - if key_locks.is_empty() && key_old_writes.is_empty() { - Ok(ProcessResult::Res) - } else { - let next_lock_key = if has_remain_locks { - key_locks.last().map(|(key, _)| key.clone()) - } else { - None - }; - let next_write_key = if has_remain_writes && !key_old_writes.is_empty() { - key_old_writes.pop().map(|(key, _)| key) - } else if has_remain_writes && key_old_writes.is_empty() { - // We haven't read any write yet, so we need to read the writes in the next - // batch later. - self.next_write_key + // Separate the lock and write flashback to prevent from putting two writes for + // the same key in a single batch to make the TiCDC panic. + let next_state = match self.state { + FlashbackToVersionState::ScanLock { next_lock_key, .. } => { + let (mut key_locks, has_remain_locks) = flashback_to_version_read_lock( + &mut reader, + next_lock_key, + &self.end_key, + statistics, + )?; + if key_locks.is_empty() && !has_remain_locks { + // No more locks to flashback, continue to scan the writes. + read_again = true; + FlashbackToVersionState::ScanWrite { + next_write_key: self.start_key.clone(), + key_old_writes: Vec::new(), + } + } else { + assert!(!key_locks.is_empty()); + tls_collect_keyread_histogram_vec(tag, key_locks.len() as f64); + FlashbackToVersionState::ScanLock { + // DO NOT pop the last key as the next key when it's the only key to prevent + // from making flashback fall into an dead loop. + next_lock_key: if key_locks.len() > 1 { + key_locks.pop().map(|(key, _)| key).unwrap() + } else { + key_locks.last().map(|(key, _)| key.clone()).unwrap() + }, + key_locks, + } + } + } + FlashbackToVersionState::ScanWrite { next_write_key, .. } => { + let mut key_old_writes = flashback_to_version_read_write( + &mut reader, + next_write_key, + &self.end_key, + self.version, + self.start_ts, + self.commit_ts, + statistics, + )?; + if key_old_writes.is_empty() { + // No more writes to flashback, just return. + return Ok(ProcessResult::Res); + } + tls_collect_keyread_histogram_vec(tag, key_old_writes.len() as f64); + FlashbackToVersionState::ScanWrite { + next_write_key: if key_old_writes.len() > 1 { + key_old_writes.pop().map(|(key, _)| key).unwrap() + } else { + key_old_writes.last().map(|(key, _)| key.clone()).unwrap() + }, + key_old_writes, + } + } + }; + Ok(ProcessResult::NextCommand { + cmd: if read_again { + Command::FlashbackToVersionReadPhase(FlashbackToVersionReadPhase { + ctx: self.ctx, + deadline: self.deadline, + start_ts: self.start_ts, + commit_ts: self.commit_ts, + version: self.version, + start_key: self.start_key, + end_key: self.end_key, + state: next_state, + }) } else { - None - }; - let next_cmd = FlashbackToVersion { - ctx: self.ctx, - deadline: self.deadline, - start_ts: self.start_ts, - commit_ts: self.commit_ts, - version: self.version, - end_key: self.end_key, - key_locks, - key_old_writes, - next_lock_key, - next_write_key, - }; - Ok(ProcessResult::NextCommand { - cmd: Command::FlashbackToVersion(next_cmd), - }) - } + Command::FlashbackToVersion(FlashbackToVersion { + ctx: self.ctx, + deadline: self.deadline, + start_ts: self.start_ts, + commit_ts: self.commit_ts, + version: self.version, + start_key: self.start_key, + end_key: self.end_key, + state: next_state, + }) + }, + }) } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 4213eeb6b68..fc044a9fa78 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -40,7 +40,9 @@ pub use commit::Commit; pub use compare_and_swap::RawCompareAndSwap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; pub use flashback_to_version::FlashbackToVersion; -pub use flashback_to_version_read_phase::FlashbackToVersionReadPhase; +pub use flashback_to_version_read_phase::{ + new_flashback_to_version_read_phase_cmd, FlashbackToVersionReadPhase, FlashbackToVersionState, +}; use kvproto::kvrpcpb::*; pub use mvcc_by_key::MvccByKey; pub use mvcc_by_start_ts::MvccByStartTs; @@ -353,13 +355,12 @@ impl From for TypedCommand> { impl From for TypedCommand<()> { fn from(mut req: FlashbackToVersionRequest) -> Self { - FlashbackToVersionReadPhase::new( + new_flashback_to_version_read_phase_cmd( req.get_start_ts().into(), req.get_commit_ts().into(), req.get_version().into(), - Some(Key::from_raw(req.get_end_key())), - Some(Key::from_raw(req.get_start_key())), - Some(Key::from_raw(req.get_start_key())), + Key::from_raw(req.get_start_key()), + Key::from_raw(req.get_end_key()), req.take_context(), ) } diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 1af3c9d63e6..615ab98cb8c 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -24,8 +24,8 @@ pub use self::{ cleanup::cleanup, commit::commit, flashback_to_version::{ - flashback_to_version, flashback_to_version_read_lock, flashback_to_version_read_write, - FLASHBACK_BATCH_SIZE, + flashback_to_version_lock, flashback_to_version_read_lock, + flashback_to_version_read_write, flashback_to_version_write, FLASHBACK_BATCH_SIZE, }, gc::gc, prewrite::{prewrite, CommitKind, TransactionKind, TransactionProperties}, diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index f3e3bda8a24..cfbe6ff504e 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1,6 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + char::from_u32, path::Path, sync::*, thread, @@ -42,6 +43,7 @@ use tikv::{ gc_worker::sync_gc, service::{batch_commands_request, batch_commands_response}, }, + storage::txn::FLASHBACK_BATCH_SIZE, }; use tikv_util::{ config::ReadableSize, @@ -597,13 +599,123 @@ fn test_mvcc_resolve_lock_gc_and_delete() { assert!(del_resp.error.is_empty()); } +#[test] +#[cfg(feature = "failpoints")] +fn test_mvcc_flashback_failed_after_first_batch() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let mut ts = 0; + for i in 0..FLASHBACK_BATCH_SIZE * 2 { + // Meet the constraints of the alphabetical order for test + let k = format!("key@{}", from_u32(i as u32).unwrap()).into_bytes(); + complete_data_commit(&client, &ctx, ts, k.clone(), b"value@0".to_vec()); + } + ts += 3; + let check_ts = ts; + for i in 0..FLASHBACK_BATCH_SIZE * 2 { + let k = format!("key@{}", from_u32(i as u32).unwrap()).into_bytes(); + complete_data_commit(&client, &ctx, ts, k.clone(), b"value@1".to_vec()); + } + ts += 3; + // Flashback + fail::cfg("flashback_failed_after_first_batch", "return").unwrap(); + fail::cfg("flashback_skip_1_key_in_write", "1*return").unwrap(); + must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); + fail::remove("flashback_skip_1_key_in_write"); + fail::remove("flashback_failed_after_first_batch"); + // skip for key@0 + must_kv_read_equal( + &client, + ctx.clone(), + format!("key@{}", from_u32(0_u32).unwrap()) + .as_bytes() + .to_vec(), + b"value@1".to_vec(), + ts + 2, + ); + // The first batch of writes are flashbacked. + must_kv_read_equal( + &client, + ctx.clone(), + format!("key@{}", from_u32(1_u32).unwrap()) + .as_bytes() + .to_vec(), + b"value@0".to_vec(), + ts + 2, + ); + // Subsequent batches of writes are not flashbacked. + must_kv_read_equal( + &client, + ctx.clone(), + format!("key@{}", from_u32(FLASHBACK_BATCH_SIZE as u32 - 1).unwrap()) + .as_bytes() + .to_vec(), + b"value@1".to_vec(), + ts + 2, + ); + // Flashback batch 2. + fail::cfg("flashback_failed_after_first_batch", "return").unwrap(); + must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); + fail::remove("flashback_failed_after_first_batch"); + // key@0 must be flahsbacked in the second batch firstly. + must_kv_read_equal( + &client, + ctx.clone(), + format!("key@{}", from_u32(0_u32).unwrap()) + .as_bytes() + .to_vec(), + b"value@0".to_vec(), + ts + 2, + ); + must_kv_read_equal( + &client, + ctx.clone(), + format!("key@{}", from_u32(FLASHBACK_BATCH_SIZE as u32 - 1).unwrap()) + .as_bytes() + .to_vec(), + b"value@0".to_vec(), + ts + 2, + ); + // 2 * (FLASHBACK_BATCH_SIZE - 1) - 1 keys are flashbacked. + must_kv_read_equal( + &client, + ctx.clone(), + format!( + "key@{}", + from_u32(2 * FLASHBACK_BATCH_SIZE as u32 - 3).unwrap() + ) + .as_bytes() + .to_vec(), + b"value@1".to_vec(), + ts + 2, + ); + // Flashback needs to be continued. + must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); + // Flashback again to check if any error occurs :) + must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); + ts += 2; + // Subsequent batches of writes are flashbacked. + must_kv_read_equal( + &client, + ctx, + format!( + "key@{}", + from_u32(2 * FLASHBACK_BATCH_SIZE as u32 - 3).unwrap() + ) + .as_bytes() + .to_vec(), + b"value@0".to_vec(), + ts, + ); +} + #[test] fn test_mvcc_flashback() { let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); let mut ts = 0; - let k = b"key".to_vec(); - for i in 0..10 { + // Need to write many batches. + for i in 0..2000 { let v = format!("value@{}", i).into_bytes(); + let k = format!("key@{}", i % 1000).into_bytes(); // Prewrite ts += 1; let prewrite_start_version = ts; @@ -634,6 +746,7 @@ fn test_mvcc_flashback() { must_kv_read_equal(&client, ctx.clone(), k.clone(), v.clone(), ts) } // Prewrite to leave a lock. + let k = b"key@1".to_vec(); ts += 1; let prewrite_start_version = ts; let mut mutation = Mutation::default(); @@ -651,19 +764,17 @@ fn test_mvcc_flashback() { let get_version = ts; let mut get_req = GetRequest::default(); get_req.set_context(ctx.clone()); - get_req.key = k.clone(); + get_req.key = k; get_req.version = get_version; let get_resp = client.kv_get(&get_req).unwrap(); assert!(!get_resp.has_region_error()); assert!(get_resp.get_error().has_locked()); assert!(get_resp.value.is_empty()); // Flashback - let flashback_resp = must_flashback_to_version(&client, ctx.clone(), 5, ts + 1, ts + 2); + must_flashback_to_version(&client, ctx.clone(), 5, ts + 1, ts + 2); ts += 2; - assert!(!flashback_resp.has_region_error()); - assert!(flashback_resp.get_error().is_empty()); // Should not meet the lock and can not get the latest data any more. - must_kv_read_equal(&client, ctx, k, b"value@1".to_vec(), ts); + must_kv_read_equal(&client, ctx, b"key@1".to_vec(), b"value@1".to_vec(), ts); } #[test] @@ -672,9 +783,7 @@ fn test_mvcc_flashback_block_rw() { let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); // Flashback - let flashback_resp = must_flashback_to_version(&client, ctx.clone(), 0, 1, 2); - assert!(!flashback_resp.has_region_error()); - assert!(flashback_resp.get_error().is_empty()); + must_flashback_to_version(&client, ctx.clone(), 0, 1, 2); // Try to read. let (k, v) = (b"key".to_vec(), b"value".to_vec()); // Get @@ -712,9 +821,7 @@ fn test_mvcc_flashback_block_scheduling() { let (mut cluster, client, ctx) = must_new_cluster_and_kv_client(); fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); // Flashback - let flashback_resp = must_flashback_to_version(&client, ctx, 0, 1, 2); - assert!(!flashback_resp.has_region_error()); - assert!(flashback_resp.get_error().is_empty()); + must_flashback_to_version(&client, ctx, 0, 1, 2); // Try to transfer leader. let transfer_leader_resp = cluster.try_transfer_leader(1, new_peer(2, 2)); assert!( From 16d1e2a2c3e84634b7da13f3ee640ca6e5c08adc Mon Sep 17 00:00:00 2001 From: Yasuo Honda Date: Fri, 4 Nov 2022 12:42:01 +0900 Subject: [PATCH 0313/1149] *: Fix link to TiDB Release Notes (#13718) close tikv/tikv#13720 Signed-off-by: Yasuo Honda --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb19c34a583..26fd52f2bd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # TiKV Change Log All notable changes to this project are documented in this file. -See also [TiDB Changelog](https://github.com/pingcap/tidb/blob/master/CHANGELOG.md) and [PD Changelog](https://github.com/pingcap/pd/blob/master/CHANGELOG.md). +See also [TiDB Release Notes](https://github.com/pingcap/docs/blob/master/releases/release-notes.md) and [PD Changelog](https://github.com/pingcap/pd/blob/master/CHANGELOG.md). ## [5.3.0] - 2021-11-29 From 64fe6ce808ba1d3847ed02591e9dea3e022276db Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 4 Nov 2022 17:24:01 +0800 Subject: [PATCH 0314/1149] raftstore-v2: partially support tablet split (#13689) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- components/raftstore-v2/src/fsm/apply.rs | 4 +- components/raftstore-v2/src/lib.rs | 1 + .../src/operation/command/admin/mod.rs | 13 +- .../src/operation/command/admin/split.rs | 623 ++++++++++++++++++ .../raftstore-v2/src/operation/command/mod.rs | 29 +- .../src/operation/ready/snapshot.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 14 +- components/raftstore-v2/src/raft/storage.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 148 ++++- components/raftstore/src/store/util.rs | 22 +- 10 files changed, 813 insertions(+), 43 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/split.rs diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index c4eb03f350d..b8faf589760 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -11,7 +11,7 @@ use std::{ use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; -use engine_traits::KvEngine; +use engine_traits::{KvEngine, TabletFactory}; use futures::{Future, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use raftstore::store::ReadTask; @@ -65,6 +65,7 @@ impl ApplyFsm { region_state: RegionLocalState, res_reporter: R, remote_tablet: CachedTablet, + tablet_factory: Arc>, read_scheduler: Scheduler>, logger: Logger, ) -> (ApplyScheduler, Self) { @@ -74,6 +75,7 @@ impl ApplyFsm { region_state, res_reporter, remote_tablet, + tablet_factory, read_scheduler, logger, ); diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 0c1a460298d..2f30ee9873d 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -23,6 +23,7 @@ #![allow(unused)] #![feature(let_else)] +#![feature(array_windows)] mod batch; mod bootstrap; diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 396e3ede98f..afaefeb9b7e 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -1,10 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. mod conf_change; +mod split; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ - raft_cmdpb::{AdminRequest, RaftCmdRequest}, + raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest}, raft_serverpb::PeerState, }; use protobuf::Message; @@ -19,6 +20,7 @@ use raftstore::{ Result, }; use slog::info; +pub use split::SplitResult; use tikv_util::box_err; use self::conf_change::ConfChangeResult; @@ -30,6 +32,7 @@ use crate::{ #[derive(Debug)] pub enum AdminCmdResult { + SplitRegion(SplitResult), ConfChange(ConfChangeResult), } @@ -72,7 +75,13 @@ impl Peer { self.propose_conf_change(ctx, req) } else { // propose other admin command. - unimplemented!() + match cmd_type { + AdminCmdType::Split => Err(box_err!( + "Split is deprecated. Please use BatchSplit instead." + )), + AdminCmdType::BatchSplit => self.propose_split(ctx, req), + _ => unimplemented!(), + } }; if let Err(e) = &res { info!( diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs new file mode 100644 index 00000000000..c0d8998c4ad --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -0,0 +1,623 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains batch split related processing logic. +//! +//! Process Overview +//! +//! Propose: +//! - Nothing special except for validating batch split requests (ex: split keys +//! are in ascending order). +//! +//! Execution: +//! - exec_batch_split: Create and initialize metapb::region for split regions +//! and derived regions. Then, create checkpoints of the current talbet for +//! split regions and derived region to make tablet physical isolated. Update +//! the parent region's region state without persistency. Send the new regions +//! (including derived region) back to raftstore. +//! +//! Result apply: +//! - todo +//! +//! Split peer creation and initlization: +//! - todo +//! +//! Split finish: +//! - todo + +use std::collections::VecDeque; + +use engine_traits::{ + Checkpointer, KvEngine, OpenOptions, RaftEngine, TabletFactory, CF_DEFAULT, SPLIT_PREFIX, +}; +use kvproto::{ + metapb::Region, + raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, + raft_serverpb::RegionLocalState, +}; +use protobuf::Message; +use raftstore::{ + coprocessor::split_observer::{is_valid_split_key, strip_timestamp_if_exists}, + store::{ + fsm::apply::validate_batch_split, + metrics::PEER_ADMIN_CMD_COUNTER, + util::{self, KeysInfoFormatter}, + PeerStat, ProposalContext, RAFT_INIT_LOG_INDEX, + }, + Result, +}; +use slog::{info, warn, Logger}; +use tikv_util::box_err; + +use crate::{ + batch::StoreContext, + fsm::ApplyResReporter, + operation::AdminCmdResult, + raft::{Apply, Peer}, + router::ApplyRes, +}; + +#[derive(Debug)] +pub struct SplitResult { + pub regions: Vec, + // The index of the derived region in `regions` + pub derived_index: usize, + pub tablet_index: u64, +} + +impl Peer { + pub fn propose_split( + &mut self, + store_ctx: &mut StoreContext, + mut req: RaftCmdRequest, + ) -> Result { + validate_batch_split(req.mut_admin_request(), self.region())?; + let mut proposal_ctx = ProposalContext::empty(); + proposal_ctx.insert(ProposalContext::SYNC_LOG); + proposal_ctx.insert(ProposalContext::SPLIT); + + let data = req.write_to_bytes().unwrap(); + self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + } +} + +impl Apply { + pub fn apply_split( + &mut self, + req: &AdminRequest, + log_index: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + info!( + self.logger, + "split is deprecated, redirect to use batch split"; + ); + let split = req.get_split().to_owned(); + let mut admin_req = AdminRequest::default(); + admin_req + .mut_splits() + .set_right_derive(split.get_right_derive()); + admin_req.mut_splits().mut_requests().push(split); + // This method is executed only when there are unapplied entries after being + // restarted. So there will be no callback, it's OK to return a response + // that does not matched with its request. + self.apply_batch_split(req, log_index) + } + + pub fn apply_batch_split( + &mut self, + req: &AdminRequest, + log_index: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + PEER_ADMIN_CMD_COUNTER.batch_split.all.inc(); + + let region = self.region_state().get_region(); + let region_id = region.get_id(); + validate_batch_split(req, self.region_state().get_region())?; + + let mut boundaries: Vec<&[u8]> = Vec::default(); + boundaries.push(self.region_state().get_region().get_start_key()); + for req in req.get_splits().get_requests() { + boundaries.push(req.get_split_key()); + } + boundaries.push(self.region_state().get_region().get_end_key()); + + info!( + self.logger, + "split region"; + "region" => ?region, + "boundaries" => %KeysInfoFormatter(boundaries.iter()), + ); + + let split_reqs = req.get_splits(); + let new_region_cnt = split_reqs.get_requests().len(); + let new_version = region.get_region_epoch().get_version() + new_region_cnt as u64; + + let mut derived_req = SplitRequest::default(); + derived_req.new_region_id = region.id; + let derived_req = &[derived_req]; + + let right_derive = split_reqs.get_right_derive(); + let reqs = if right_derive { + split_reqs.get_requests().iter().chain(derived_req) + } else { + derived_req.iter().chain(split_reqs.get_requests()) + }; + + let regions: Vec<_> = boundaries + .array_windows::<2>() + .zip(reqs) + .map(|([start_key, end_key], req)| { + let mut new_region = Region::default(); + new_region.set_id(req.get_new_region_id()); + new_region.set_region_epoch(region.get_region_epoch().to_owned()); + new_region.mut_region_epoch().set_version(new_version); + new_region.set_start_key(start_key.to_vec()); + new_region.set_end_key(end_key.to_vec()); + new_region.set_peers(region.get_peers().to_vec().into()); + // If the `req` is the `derived_req`, the peers are already set correctly and + // the following loop will not be executed due to the empty `new_peer_ids` in + // the `derived_req` + for (peer, peer_id) in new_region + .mut_peers() + .iter_mut() + .zip(req.get_new_peer_ids()) + { + peer.set_id(*peer_id); + } + new_region + }) + .collect(); + + let derived_index = if right_derive { regions.len() - 1 } else { 0 }; + + // We will create checkpoint of the current tablet for both derived region and + // split regions. Before the creation, we should flush the writes and remove the + // write batch + self.flush(); + + // todo(SpadeA): Here: we use a temporary solution that we use checkpoint API to + // clone new tablets. It may cause large jitter as we need to flush the + // memtable. And more what is more important is that after removing WAL, the API + // will never flush. + // We will freeze the memtable rather than flush it in the following PR. + let tablet = self.tablet().clone(); + let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { + panic!( + "{:?} fails to create checkpoint object: {:?}", + self.logger.list(), + e + ) + }); + + for new_region in ®ions { + let new_region_id = new_region.id; + if new_region_id == region_id { + continue; + } + + let split_temp_path = self.tablet_factory().tablet_path_with_prefix( + SPLIT_PREFIX, + new_region_id, + RAFT_INIT_LOG_INDEX, + ); + checkpointer + .create_at(&split_temp_path, None, 0) + .unwrap_or_else(|e| { + panic!( + "{:?} fails to create checkpoint with path {:?}: {:?}", + self.logger.list(), + split_temp_path, + e + ) + }); + } + + let derived_path = self.tablet_factory().tablet_path(region_id, log_index); + checkpointer + .create_at(&derived_path, None, 0) + .unwrap_or_else(|e| { + panic!( + "{:?} fails to create checkpoint with path {:?}: {:?}", + self.logger.list(), + derived_path, + e + ) + }); + let tablet = self + .tablet_factory() + .open_tablet(region_id, Some(log_index), OpenOptions::default()) + .unwrap(); + // Remove the old write batch. + self.write_batch_mut().take(); + self.publish_tablet(tablet); + + self.region_state_mut() + .set_region(regions[derived_index].clone()); + self.region_state_mut().set_tablet_index(log_index); + + let mut resp = AdminResponse::default(); + resp.mut_splits().set_regions(regions.clone().into()); + PEER_ADMIN_CMD_COUNTER.batch_split.success.inc(); + + Ok(( + resp, + AdminCmdResult::SplitRegion(SplitResult { + regions, + derived_index, + tablet_index: log_index, + }), + )) + } +} + +#[cfg(test)] +mod test { + use std::sync::{ + mpsc::{channel, Receiver, Sender}, + Arc, + }; + + use collections::HashMap; + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::TestTabletFactoryV2, + raft, + }; + use engine_traits::{CfOptionsExt, Peekable, WriteBatch, ALL_CFS}; + use futures::channel::mpsc::unbounded; + use kvproto::{ + metapb::RegionEpoch, + raft_cmdpb::{AdminCmdType, BatchSplitRequest, PutRequest, RaftCmdResponse, SplitRequest}, + raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}, + }; + use raftstore::store::{cmd_resp::new_error, Config, ReadRunner}; + use slog::o; + use tempfile::TempDir; + use tikv_util::{ + codec::bytes::encode_bytes, + config::VersionTrack, + store::{new_learner_peer, new_peer}, + worker::{dummy_future_scheduler, dummy_scheduler, FutureScheduler, Scheduler, Worker}, + }; + + use super::*; + use crate::{ + fsm::{ApplyFsm, ApplyResReporter}, + raft::Apply, + tablet::CachedTablet, + }; + + struct MockReporter { + sender: Sender, + } + + impl MockReporter { + fn new() -> (Self, Receiver) { + let (tx, rx) = channel(); + (MockReporter { sender: tx }, rx) + } + } + + impl ApplyResReporter for MockReporter { + fn report(&self, apply_res: ApplyRes) { + let _ = self.sender.send(apply_res); + } + } + + fn new_split_req(key: &[u8], id: u64, children: Vec) -> SplitRequest { + let mut req = SplitRequest::default(); + req.set_split_key(key.to_vec()); + req.set_new_region_id(id); + req.set_new_peer_ids(children); + req + } + + fn assert_split( + apply: &mut Apply, + factory: &Arc, + parent_id: u64, + right_derived: bool, + new_region_ids: Vec, + split_keys: Vec>, + children_peers: Vec>, + log_index: u64, + region_boundries: Vec<(Vec, Vec)>, + expected_region_epoch: RegionEpoch, + expected_derived_index: usize, + ) { + let mut splits = BatchSplitRequest::default(); + splits.set_right_derive(right_derived); + + for ((new_region_id, children), split_key) in new_region_ids + .into_iter() + .zip(children_peers.clone()) + .zip(split_keys) + { + splits + .mut_requests() + .push(new_split_req(&split_key, new_region_id, children)); + } + + let mut req = AdminRequest::default(); + req.set_splits(splits); + + // Exec batch split + let (resp, apply_res) = apply.apply_batch_split(&req, log_index).unwrap(); + + let regions = resp.get_splits().get_regions(); + assert!(regions.len() == region_boundries.len()); + + let mut child_idx = 0; + for (i, region) in regions.iter().enumerate() { + assert_eq!(region.get_start_key().to_vec(), region_boundries[i].0); + assert_eq!(region.get_end_key().to_vec(), region_boundries[i].1); + assert_eq!(*region.get_region_epoch(), expected_region_epoch); + + if region.id == parent_id { + let state = apply.region_state(); + assert_eq!(state.tablet_index, log_index); + assert_eq!(state.get_region(), region); + let tablet_path = factory.tablet_path(region.id, log_index); + assert!(factory.exists_raw(&tablet_path)); + + match apply_res { + AdminCmdResult::SplitRegion(SplitResult { + derived_index, + tablet_index, + .. + }) => { + assert_eq!(expected_derived_index, derived_index); + assert_eq!(tablet_index, log_index); + } + _ => panic!(), + } + } else { + assert_eq! { + region.get_peers().iter().map(|peer| peer.id).collect::>(), + children_peers[child_idx] + } + child_idx += 1; + + let tablet_path = + factory.tablet_path_with_prefix(SPLIT_PREFIX, region.id, RAFT_INIT_LOG_INDEX); + assert!(factory.exists_raw(&tablet_path)); + } + } + } + + #[test] + fn test_split() { + let store_id = 2; + + let mut region = Region::default(); + region.set_id(1); + region.set_end_key(b"k10".to_vec()); + region.mut_region_epoch().set_version(3); + let peers = vec![new_peer(2, 3), new_peer(4, 5), new_learner_peer(6, 7)]; + region.set_peers(peers.into()); + + let logger = slog_global::borrow_global().new(o!()); + let path = TempDir::new().unwrap(); + let cf_opts = ALL_CFS + .iter() + .copied() + .map(|cf| (cf, CfOptions::default())) + .collect(); + let factory = Arc::new(TestTabletFactoryV2::new( + path.path(), + DbOptions::default(), + cf_opts, + )); + + let tablet = factory + .open_tablet( + region.id, + Some(5), + OpenOptions::default().set_create_new(true), + ) + .unwrap(); + + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Normal); + region_state.set_region(region.clone()); + region_state.set_tablet_index(5); + + let (read_scheduler, rx) = dummy_scheduler(); + let (reporter, _) = MockReporter::new(); + let mut apply = Apply::new( + region + .get_peers() + .iter() + .find(|p| p.store_id == store_id) + .unwrap() + .clone(), + region_state, + reporter, + CachedTablet::new(Some(tablet)), + factory.clone(), + read_scheduler, + logger.clone(), + ); + + let mut splits = BatchSplitRequest::default(); + splits.set_right_derive(true); + splits.mut_requests().push(new_split_req(b"k1", 1, vec![])); + let mut req = AdminRequest::default(); + req.set_splits(splits.clone()); + let err = apply.apply_batch_split(&req, 0).unwrap_err(); + // 3 followers are required. + assert!(err.to_string().contains("invalid new peer id count")); + + splits.mut_requests().clear(); + req.set_splits(splits.clone()); + let err = apply.apply_batch_split(&req, 0).unwrap_err(); + // Empty requests should be rejected. + assert!(err.to_string().contains("missing split requests")); + + splits + .mut_requests() + .push(new_split_req(b"k11", 1, vec![11, 12, 13])); + req.set_splits(splits.clone()); + let resp = new_error(apply.apply_batch_split(&req, 0).unwrap_err()); + // Out of range keys should be rejected. + assert!( + resp.get_header().get_error().has_key_not_in_region(), + "{:?}", + resp + ); + + splits.mut_requests().clear(); + splits + .mut_requests() + .push(new_split_req(b"", 1, vec![11, 12, 13])); + req.set_splits(splits.clone()); + let err = apply.apply_batch_split(&req, 0).unwrap_err(); + // Empty key will not in any region exclusively. + assert!(err.to_string().contains("missing split key"), "{:?}", err); + + splits.mut_requests().clear(); + splits + .mut_requests() + .push(new_split_req(b"k2", 1, vec![11, 12, 13])); + splits + .mut_requests() + .push(new_split_req(b"k1", 1, vec![11, 12, 13])); + req.set_splits(splits.clone()); + let err = apply.apply_batch_split(&req, 0).unwrap_err(); + // keys should be in ascend order. + assert!( + err.to_string().contains("invalid split request"), + "{:?}", + err + ); + + splits.mut_requests().clear(); + splits + .mut_requests() + .push(new_split_req(b"k1", 1, vec![11, 12, 13])); + splits + .mut_requests() + .push(new_split_req(b"k2", 1, vec![11, 12])); + req.set_splits(splits.clone()); + let err = apply.apply_batch_split(&req, 0).unwrap_err(); + // All requests should be checked. + assert!(err.to_string().contains("id count"), "{:?}", err); + + let cases = vec![ + // region 1["", "k10"] + // After split: region 1 ["", "k09"], + // region 10 ["k09", "k10"] + ( + 1, + false, + vec![10], + vec![b"k09".to_vec()], + vec![vec![11, 12, 13]], + 10, + vec![ + (b"".to_vec(), b"k09".to_vec()), + (b"k09".to_vec(), b"k10".to_vec()), + ], + 4, + 0, + ), + // region 1 ["", "k09"] + // After split: region 20 ["", "k01"], + // region 1 ["k01", "k09"] + ( + 1, + true, + vec![20], + vec![b"k01".to_vec()], + vec![vec![21, 22, 23]], + 20, + vec![ + (b"".to_vec(), b"k01".to_vec()), + (b"k01".to_vec(), b"k09".to_vec()), + ], + 5, + 1, + ), + // region 1 ["k01", "k09"] + // After split: region 30 ["k01", "k02"], + // region 40 ["k02", "k03"], + // region 1 ["k03", "k09"] + ( + 1, + true, + vec![30, 40], + vec![b"k02".to_vec(), b"k03".to_vec()], + vec![vec![31, 32, 33], vec![41, 42, 43]], + 30, + vec![ + (b"k01".to_vec(), b"k02".to_vec()), + (b"k02".to_vec(), b"k03".to_vec()), + (b"k03".to_vec(), b"k09".to_vec()), + ], + 7, + 2, + ), + // region 1 ["k03", "k09"] + // After split: region 1 ["k03", "k07"], + // region 50 ["k07", "k08"], + // region 60 ["k08", "k09"] + ( + 1, + false, + vec![50, 60], + vec![b"k07".to_vec(), b"k08".to_vec()], + vec![vec![51, 52, 53], vec![61, 62, 63]], + 40, + vec![ + (b"k03".to_vec(), b"k07".to_vec()), + (b"k07".to_vec(), b"k08".to_vec()), + (b"k08".to_vec(), b"k09".to_vec()), + ], + 9, + 0, + ), + ]; + + for ( + parent_id, + right_derive, + new_region_ids, + split_keys, + children_peers, + log_index, + region_boundries, + version, + expected_derived_index, + ) in cases + { + let mut expected_epoch = RegionEpoch::new(); + expected_epoch.set_version(version); + + assert_split( + &mut apply, + &factory, + parent_id, + right_derive, + new_region_ids, + split_keys, + children_peers, + log_index, + region_boundries, + expected_epoch, + expected_derived_index, + ); + } + + // Split will create checkpoint tablet, so if there are some writes before + // split, they should be flushed immediately. + apply.apply_put(CF_DEFAULT, b"k04", b"v4").unwrap(); + assert!(!apply.write_batch_mut().as_ref().unwrap().is_empty()); + splits.mut_requests().clear(); + splits + .mut_requests() + .push(new_split_req(b"k05", 70, vec![71, 72, 73])); + req.set_splits(splits); + apply.apply_batch_split(&req, 50).unwrap(); + assert!(apply.write_batch_mut().is_none()); + assert_eq!(apply.tablet().get_value(b"k04").unwrap().unwrap(), b"v4"); + } +} diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 21122e5559f..d39788ac611 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -127,6 +127,7 @@ impl Peer { region_state, mailbox, tablet, + store_ctx.tablet_factory.clone(), read_scheduler, logger, ); @@ -182,17 +183,31 @@ impl Peer { } #[inline] - fn propose(&mut self, ctx: &mut StoreContext, data: Vec) -> Result { - ctx.raft_metrics.propose.normal.inc(); + fn propose( + &mut self, + store_ctx: &mut StoreContext, + data: Vec, + ) -> Result { + self.propose_with_ctx(store_ctx, data, vec![]) + } + + #[inline] + fn propose_with_ctx( + &mut self, + store_ctx: &mut StoreContext, + data: Vec, + proposal_ctx: Vec, + ) -> Result { + store_ctx.raft_metrics.propose.normal.inc(); PEER_PROPOSE_LOG_SIZE_HISTOGRAM.observe(data.len() as f64); - if data.len() as u64 > ctx.cfg.raft_entry_max_size.0 { + if data.len() as u64 > store_ctx.cfg.raft_entry_max_size.0 { return Err(Error::RaftEntryTooLarge { region_id: self.region_id(), entry_size: data.len() as u64, }); } let last_index = self.raft_group().raft.raft_log.last_index(); - self.raft_group_mut().propose(vec![], data)?; + self.raft_group_mut().propose(proposal_ctx, data)?; if self.raft_group().raft.raft_log.last_index() == last_index { // The message is dropped silently, this usually due to leader absence // or transferring leader. Both cases can be considered as NotLeader error. @@ -269,6 +284,7 @@ impl Peer { AdminCmdResult::ConfChange(conf_change) => { self.on_apply_res_conf_change(conf_change) } + AdminCmdResult::SplitRegion(_) => unimplemented!(), } } self.raft_group_mut() @@ -405,8 +421,8 @@ impl Apply { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { AdminCmdType::CompactLog => unimplemented!(), - AdminCmdType::Split => unimplemented!(), - AdminCmdType::BatchSplit => unimplemented!(), + AdminCmdType::Split => self.apply_split(admin_req, entry.index)?, + AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, entry.index)?, AdminCmdType::PrepareMerge => unimplemented!(), AdminCmdType::CommitMerge => unimplemented!(), AdminCmdType::RollbackMerge => unimplemented!(), @@ -425,6 +441,7 @@ impl Apply { return Err(box_err!("invalid admin command type")); } }; + self.push_admin_result(admin_result); let mut resp = new_response(req.get_header()); resp.set_admin_response(admin_resp); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 6f4b63630a9..e0f4e5653de 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -1,4 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + //! This module contains snapshot relative processing logic. //! //! # Snapshot State diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index ff29b3ba029..06101da8d83 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -1,8 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::mem; +use std::{mem, sync::Arc}; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, TabletFactory}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; use slog::Logger; @@ -19,10 +19,13 @@ use crate::{ /// Apply applies all the committed commands to kv db. pub struct Apply { peer: metapb::Peer, + /// publish the update of the tablet remote_tablet: CachedTablet, tablet: EK, write_batch: Option, + tablet_factory: Arc>, + callbacks: Vec<(Vec, RaftCmdResponse)>, /// A flag indicates whether the peer is destroyed by applying admin @@ -46,6 +49,7 @@ impl Apply { region_state: RegionLocalState, res_reporter: R, mut remote_tablet: CachedTablet, + tablet_factory: Arc>, read_scheduler: Scheduler>, logger: Logger, ) -> Self { @@ -60,12 +64,18 @@ impl Apply { applied_term: 0, admin_cmd_result: vec![], region_state, + tablet_factory, read_scheduler, res_reporter, logger, } } + #[inline] + pub fn tablet_factory(&self) -> &Arc> { + &self.tablet_factory + } + #[inline] pub fn res_reporter(&self) -> &R { &self.res_reporter diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 19a52d4c5a2..aa642f5967f 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -485,6 +485,7 @@ mod tests { RegionLocalState::default(), router, CachedTablet::new(Some(tablet)), + factory, sched, logger, ); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 6fce91114a7..f5702092622 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -38,8 +38,8 @@ use kvproto::{ kvrpcpb::ExtraOp as TxnExtraOp, metapb::{PeerRole, Region, RegionEpoch}, raft_cmdpb::{ - AdminCmdType, AdminRequest, AdminResponse, BatchSplitRequest, ChangePeerRequest, CmdType, - CommitMergeRequest, RaftCmdRequest, RaftCmdResponse, Request, + AdminCmdType, AdminRequest, AdminResponse, ChangePeerRequest, CmdType, CommitMergeRequest, + RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, }, raft_serverpb::{MergeState, PeerState, RaftApplyState, RaftTruncatedState, RegionLocalState}, }; @@ -1904,40 +1904,37 @@ mod confchange_cmd_metric { } } -// Validate the request and the split keys -pub fn extract_split_keys( - split_reqs: &BatchSplitRequest, - region_to_split: &Region, -) -> Result>> { - if split_reqs.get_requests().is_empty() { +pub fn validate_batch_split(req: &AdminRequest, region: &Region) -> Result<()> { + if req.get_splits().get_requests().is_empty() { return Err(box_err!("missing split requests")); } - let mut keys: VecDeque> = VecDeque::with_capacity(split_reqs.get_requests().len() + 1); - for req in split_reqs.get_requests() { + + let split_reqs: &[SplitRequest] = req.get_splits().get_requests(); + let mut last_key = region.get_start_key(); + for req in split_reqs { let split_key = req.get_split_key(); if split_key.is_empty() { return Err(box_err!("missing split key")); } - if split_key - <= keys - .back() - .map_or_else(|| region_to_split.get_start_key(), Vec::as_slice) - { + + if split_key <= last_key { return Err(box_err!("invalid split request: {:?}", split_reqs)); } - if req.get_new_peer_ids().len() != region_to_split.get_peers().len() { + + if req.get_new_peer_ids().len() != region.get_peers().len() { return Err(box_err!( "invalid new peer id count, need {:?}, but got {:?}", - region_to_split.get_peers(), + region.get_peers(), req.get_new_peer_ids() )); } - keys.push_back(split_key.to_vec()); + + last_key = req.get_split_key(); } - util::check_key_in_region_exclusive(keys.back().unwrap(), region_to_split)?; + util::check_key_in_region_exclusive(last_key, region)?; - Ok(keys) + Ok(()) } // Admin commands related. @@ -2408,9 +2405,15 @@ where PEER_ADMIN_CMD_COUNTER.batch_split.all.inc(); - let split_reqs = req.get_splits(); - let mut keys = extract_split_keys(split_reqs, &self.region)?; let mut derived = self.region.clone(); + validate_batch_split(req, &derived)?; + + let split_reqs = req.get_splits(); + let mut keys: VecDeque<_> = split_reqs + .get_requests() + .iter() + .map(|req| req.get_split_key().to_vec()) + .collect(); info!( "split region"; @@ -6560,12 +6563,13 @@ mod tests { resp ); + splits.mut_requests().clear(); splits .mut_requests() .push(new_split_req(b"", 8, vec![9, 10, 11])); let resp = exec_split(&router, splits.clone()); - // Empty key should be rejected. - assert!(error_msg(&resp).contains("missing"), "{:?}", resp); + // Empty key will not in any region exclusively. + assert!(error_msg(&resp).contains("missing split key"), "{:?}", resp); splits.mut_requests().clear(); splits @@ -6784,4 +6788,100 @@ mod tests { rx.recv_timeout(Duration::from_millis(500)).unwrap(); system.shutdown(); } + + fn new_batch_split_request(keys: Vec>) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::BatchSplit); + for key in keys { + let mut split_req = SplitRequest::default(); + split_req.set_split_key(key); + split_req.set_new_peer_ids(vec![1]); + req.mut_splits().mut_requests().push(split_req); + } + req + } + + #[test] + fn test_validate_batch_split() { + let mut region = Region::default(); + region.set_start_key(b"k05".to_vec()); + region.set_end_key(b"k10".to_vec()); + region.set_peers(vec![new_peer(1, 2)].into()); + + let missing_error = "missing split requests"; + let invalid_error = "invalid split request"; + let not_in_region_error = "not in region"; + let empty_error = "missing split key"; + let peer_id_error = "invalid new peer id count"; + + // case: split is deprecated + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::Split); + let mut split_req = SplitRequest::default(); + split_req.set_split_key(b"k06".to_vec()); + req.set_split(split_req); + assert!( + validate_batch_split(&req, ®ion) + .unwrap_err() + .to_string() + .contains(missing_error) + ); + + // case: missing peer ids + let mut req = new_batch_split_request(vec![b"k07".to_vec()]); + req.mut_splits() + .mut_requests() + .get_mut(0) + .unwrap() + .new_peer_ids + .clear(); + assert!( + validate_batch_split(&req, ®ion) + .unwrap_err() + .to_string() + .contains(peer_id_error) + ); + + let fail_cases = vec![ + // case: default admin request should be rejected + (vec![], missing_error), + // case: empty split key + (vec![vec![]], empty_error), + // case: out of order split keys + ( + vec![b"k07".to_vec(), b"k08".to_vec(), b"k06".to_vec()], + invalid_error, + ), + // case: split keys are not in region range + ( + vec![b"k04".to_vec(), b"k07".to_vec(), b"k08".to_vec()], + invalid_error, + ), + // case: split keys are not in region range + ( + vec![b"k06".to_vec(), b"k07".to_vec(), b"k11".to_vec()], + not_in_region_error, + ), + // case: duplicated split keys + (vec![b"k06".to_vec(), b"k06".to_vec()], invalid_error), + ]; + + for (split_keys, fail_str) in fail_cases { + let req = if split_keys.is_empty() { + AdminRequest::default() + } else { + new_batch_split_request(split_keys) + }; + assert!( + validate_batch_split(&req, ®ion) + .unwrap_err() + .to_string() + .contains(fail_str) + ); + } + + // case: pass the validation + let req = new_batch_split_request(vec![b"k06".to_vec(), b"k07".to_vec(), b"k08".to_vec()]); + validate_batch_split(&req, ®ion).unwrap(); + } } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 1e571296e1a..2980f9931a5 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -742,29 +742,35 @@ pub fn conf_state_from_region(region: &metapb::Region) -> ConfState { pub struct KeysInfoFormatter< 'a, - I: std::iter::DoubleEndedIterator> - + std::iter::ExactSizeIterator> + T: 'a + AsRef<[u8]>, + I: std::iter::DoubleEndedIterator + + std::iter::ExactSizeIterator + Clone, >(pub I); impl< 'a, - I: std::iter::DoubleEndedIterator> - + std::iter::ExactSizeIterator> + T: 'a + AsRef<[u8]>, + I: std::iter::DoubleEndedIterator + + std::iter::ExactSizeIterator + Clone, -> fmt::Display for KeysInfoFormatter<'a, I> +> fmt::Display for KeysInfoFormatter<'a, T, I> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut it = self.0.clone(); match it.len() { 0 => write!(f, "(no key)"), - 1 => write!(f, "key {}", log_wrappers::Value::key(it.next().unwrap())), + 1 => write!( + f, + "key {}", + log_wrappers::Value::key(it.next().unwrap().as_ref()) + ), _ => write!( f, "{} keys range from {} to {}", it.len(), - log_wrappers::Value::key(it.next().unwrap()), - log_wrappers::Value::key(it.next_back().unwrap()) + log_wrappers::Value::key(it.next().unwrap().as_ref()), + log_wrappers::Value::key(it.next_back().unwrap().as_ref()) ), } } From dff77165fb3f906ac74891fdf0cd1333ac02bf9c Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Mon, 7 Nov 2022 12:11:49 +0800 Subject: [PATCH 0315/1149] cmd: fix raft engine ctl (#13108) ref tikv/tikv#11119 None Signed-off-by: tabokie --- cmd/tikv-ctl/src/cmd.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 2fec7ea9cef..eed2d7e8283 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -558,7 +558,11 @@ pub enum Cmd { version: u64, }, /// Control for Raft Engine - RaftEngineCtl { args: Vec }, + /// Usage: tikv-ctl raft-engine-ctl -- --help + RaftEngineCtl { + #[structopt(last = true)] + args: Vec, + }, #[structopt(external_subcommand)] External(Vec), } From 9e2305c2355d73e65fe9d5aa697b2454168c8f10 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 7 Nov 2022 15:09:50 +0800 Subject: [PATCH 0316/1149] storage: fix the flashback dead loop caused by a deleted key (#13744) close tikv/tikv#13743 The original thought is that when a key's latest MVCC record type is DELETE and the corresponding flashback operation is also DELETE, we skip it to avoid duplicated writing. However, this will cause the flashback to fall into a dead loop since the key doesn't have the written record with the flashback `commit_ts` and the flashback will always try to write it forever. Signed-off-by: JmPotato --- src/storage/mod.rs | 81 +++++++++++++++++++ .../txn/actions/flashback_to_version.rs | 14 +--- .../flashback_to_version_read_phase.rs | 2 +- 3 files changed, 86 insertions(+), 11 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 1c2688dd8a8..8b835bcfafd 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4977,6 +4977,87 @@ mod tests { } } + #[test] + fn test_flashback_to_version_deleted_key() { + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .build() + .unwrap(); + let (tx, rx) = channel(); + let mut ts = TimeStamp::zero(); + let (k, v) = (Key::from_raw(b"k"), b"v".to_vec()); + // Write a key. + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_put(k.clone(), v.clone())], + k.as_encoded().to_vec(), + *ts.incr(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new(vec![k.clone()], ts, *ts.incr(), Context::default()), + expect_value_callback(tx.clone(), 1, TxnStatus::committed(ts)), + ) + .unwrap(); + rx.recv().unwrap(); + expect_value( + v, + block_on(storage.get(Context::default(), k.clone(), ts)) + .unwrap() + .0, + ); + // Delete the key. + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_delete(k.clone())], + k.as_encoded().to_vec(), + *ts.incr(), + ), + expect_ok_callback(tx.clone(), 2), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new(vec![k.clone()], ts, *ts.incr(), Context::default()), + expect_value_callback(tx.clone(), 3, TxnStatus::committed(ts)), + ) + .unwrap(); + rx.recv().unwrap(); + expect_none( + block_on(storage.get(Context::default(), Key::from_raw(b"k"), ts)) + .unwrap() + .0, + ); + // Flashback the key. + let flashback_start_ts = *ts.incr(); + let flashback_commit_ts = *ts.incr(); + storage + .sched_txn_command( + new_flashback_to_version_read_phase_cmd( + flashback_start_ts, + flashback_commit_ts, + 1.into(), + Key::from_raw(b"k"), + Key::from_raw(b"z"), + Context::default(), + ), + expect_ok_callback(tx, 4), + ) + .unwrap(); + rx.recv().unwrap(); + expect_none( + block_on(storage.get(Context::default(), k, flashback_commit_ts)) + .unwrap() + .0, + ); + } + #[test] fn test_high_priority_get_put() { let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 5a86a6caa7d..02095d4b46d 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -126,8 +126,7 @@ pub fn flashback_to_version_lock( // - If a key doesn't exist at `self.version`, it will be put a // `WriteType::Delete`. // - If a key exists at `self.version`, it will be put the exact same record -// in `CF_WRITE` and `CF_DEFAULT` if needed with `self.commit_ts` and -// `self.start_ts`. +// in `CF_WRITE` and `CF_DEFAULT` with `self.commit_ts` and `self.start_ts`. pub fn flashback_to_version_write( txn: &mut MvccTxn, reader: &mut SnapshotReader, @@ -167,11 +166,6 @@ pub fn flashback_to_version_write( } else { // If the old write doesn't exist, we should put a `WriteType::Delete` record to // delete the current key when needed. - if let Some((_, latest_write)) = reader.seek_write(&key, commit_ts)? { - if latest_write.write_type == WriteType::Delete { - continue; - } - } Write::new(WriteType::Delete, start_ts, None) }; txn.put_write(key.clone(), commit_ts, new_write.as_ref().to_bytes()); @@ -334,11 +328,11 @@ pub mod tests { must_get(&mut engine, k, ts, v); must_prewrite_delete(&mut engine, k, k, *ts.incr()); must_commit(&mut engine, k, ts, *ts.incr()); - // Since the key has been deleted, flashback to version 1 should not do - // anything. + // Though the key has been deleted, flashback to version 1 still needs to write + // a new `WriteType::Delete` with the flashback `commit_ts`. assert_eq!( must_flashback_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), - 0 + 1 ); must_get_none(&mut engine, k, ts); } diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index d74c6f8d708..cfc6856da9c 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -117,7 +117,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { tls_collect_keyread_histogram_vec(tag, key_locks.len() as f64); FlashbackToVersionState::ScanLock { // DO NOT pop the last key as the next key when it's the only key to prevent - // from making flashback fall into an dead loop. + // from making flashback fall into a dead loop. next_lock_key: if key_locks.len() > 1 { key_locks.pop().map(|(key, _)| key).unwrap() } else { From 9d6332398427cc5563731be7810454b2f20c2fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Mon, 7 Nov 2022 09:31:50 +0100 Subject: [PATCH 0317/1149] tidb_query_datatype: Fix Geometry FieldType support (#13652) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#13651 Signed-off-by: Daniël van Eeden --- components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs b/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs index 79c08ec5404..8d0e34dfdf7 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/compat_v1.rs @@ -73,6 +73,7 @@ pub trait V1CompatibleEncoder: DatumFlagAndPayloadEncoder { FieldTypeTp::VarChar | FieldTypeTp::VarString | FieldTypeTp::String + | FieldTypeTp::Geometry | FieldTypeTp::TinyBlob | FieldTypeTp::MediumBlob | FieldTypeTp::LongBlob From 7598dd26168b317f50ddc528600b7d85e97f9011 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Tue, 8 Nov 2022 11:43:50 +0800 Subject: [PATCH 0318/1149] storage: calculate last_change_ts in prewrite (#13721) ref tikv/tikv#13694 This commit implements the different cases when last_change_ts is calculated in prewrite: 1. Inherit from the pessimistic lock 2. Calculate it when checking the new version 3. Amend the pessimistic lock Signed-off-by: Yilin Chen --- src/storage/mvcc/reader/scanner/forward.rs | 13 +- src/storage/txn/actions/prewrite.rs | 295 ++++++++++++++++++++- src/storage/txn/commands/prewrite.rs | 194 +++++++++++++- src/storage/txn/store.rs | 23 +- 4 files changed, 508 insertions(+), 17 deletions(-) diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 709dc5803d1..03f44deed7c 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -2454,12 +2454,6 @@ mod delta_entry_tests { let last_write = writes.last(); let max_commit_ts = last_write.map(|(_, commit_ts, ..)| *commit_ts).unwrap_or(0); - let (mut last_change_ts, mut versions_to_last_change) = (0,0); - // TODO: Remove `*lock_type == LockType::Pessimistic` after calculating last_change_ts for prewrite. - if *lock_type == LockType::Pessimistic && - let Some((_, commit_ts, WriteType::Put | WriteType::Delete, _)) = last_write { - (last_change_ts, versions_to_last_change) = (*commit_ts, 1); - } let for_update_ts = std::cmp::max(*ts, max_commit_ts + 1); if *ts <= to_ts { @@ -2470,7 +2464,6 @@ mod delta_entry_tests { .for_update_ts(for_update_ts.into()) .primary(key) .value(&value) - .last_change(last_change_ts.into(), versions_to_last_change) .build_prewrite(*lock_type, is_short_value(&value)); entries_of_key.push(entry); } @@ -2610,10 +2603,12 @@ mod delta_entry_tests { // Do assertions one by one so that if it fails it won't print too long panic // message. for i in 0..std::cmp::max(actual.len(), expected.len()) { + // We don't care about last_change_ts here. Use a trick to ignore them. + let actual_erased = actual[i].erasing_last_change_ts(); assert_eq!( - actual[i], expected[i], + actual_erased, expected[i], "item {} not match: expected {:?}, but got {:?}", - i, &expected[i], &actual[i] + i, &expected[i], &actual_erased ); } }; diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 40709032d61..8abaf1428e4 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -21,7 +21,10 @@ use crate::storage::{ }, Error, ErrorInner, Lock, LockType, MvccTxn, Result, SnapshotReader, }, - txn::{actions::check_data_constraint::check_data_constraint, LockInfo}, + txn::{ + actions::check_data_constraint::check_data_constraint, sched_pool::tls_can_enable, + scheduler::LAST_CHANGE_TS, LockInfo, + }, Snapshot, }; @@ -62,7 +65,7 @@ pub fn prewrite( let lock_status = match reader.load_lock(&mutation.key)? { Some(lock) => mutation.check_lock(lock, pessimistic_action)?, None if matches!(pessimistic_action, DoPessimisticCheck) => { - amend_pessimistic_lock(&mutation, reader)?; + amend_pessimistic_lock(&mut mutation, reader)?; lock_amended = true; LockStatus::None } @@ -236,6 +239,8 @@ struct PrewriteMutation<'a> { lock_type: Option, lock_ttl: u64, + last_change_ts: TimeStamp, + versions_to_last_change: u64, should_not_exist: bool, should_not_write: bool, @@ -273,6 +278,8 @@ impl<'a> PrewriteMutation<'a> { lock_type, lock_ttl: txn_props.lock_ttl, + last_change_ts: TimeStamp::zero(), + versions_to_last_change: 0, should_not_exist, should_not_write, @@ -320,6 +327,9 @@ impl<'a> PrewriteMutation<'a> { return Err(ErrorInner::KeyIsLocked(self.lock_info(lock)?).into()); } + self.last_change_ts = lock.last_change_ts; + self.versions_to_last_change = lock.versions_to_last_change; + if lock.lock_type == LockType::Pessimistic { // TODO: remove it in future if !self.txn_props.is_pessimistic() { @@ -350,7 +360,7 @@ impl<'a> PrewriteMutation<'a> { } fn check_for_newer_version( - &self, + &mut self, reader: &mut SnapshotReader, ) -> Result> { let mut seek_ts = TimeStamp::max(); @@ -365,6 +375,10 @@ impl<'a> PrewriteMutation<'a> { // TODO: Maybe we need to add a new error for the rolled back case. self.write_conflict_error(&write, commit_ts, WriteConflictReason::SelfRolledBack)?; } + if seek_ts == TimeStamp::max() { + (self.last_change_ts, self.versions_to_last_change) = + write.next_last_change_info(commit_ts); + } match self.txn_props.kind { TransactionKind::Optimistic(_) => { if commit_ts > self.txn_props.start_ts { @@ -440,6 +454,11 @@ impl<'a> PrewriteMutation<'a> { self.txn_props.txn_size, self.min_commit_ts, ); + // Only Lock needs to record `last_change_ts` in its write record, Put or Delete + // records themselves are effective changes. + if tls_can_enable(LAST_CHANGE_TS) && self.lock_type == Some(LockType::Lock) { + lock = lock.set_last_change(self.last_change_ts, self.versions_to_last_change); + } if let Some(value) = self.value { if is_short_value(&value) { @@ -503,7 +522,7 @@ impl<'a> PrewriteMutation<'a> { } fn check_assertion( - &self, + &mut self, reader: &mut SnapshotReader, write: &Option<(Write, TimeStamp)>, write_loaded: bool, @@ -694,11 +713,11 @@ fn async_commit_timestamps( // If the data is not changed after acquiring the lock, we can still prewrite // the key. fn amend_pessimistic_lock( - mutation: &PrewriteMutation<'_>, + mutation: &mut PrewriteMutation<'_>, reader: &mut SnapshotReader, ) -> Result<()> { let write = reader.seek_write(&mutation.key, TimeStamp::max())?; - if let Some((commit_ts, _)) = write.as_ref() { + if let Some((commit_ts, write)) = write.as_ref() { // The invariants of pessimistic locks are: // 1. lock's for_update_ts >= key's latest commit_ts // 2. lock's for_update_ts >= txn's start_ts @@ -727,6 +746,8 @@ fn amend_pessimistic_lock( } .into()); } + (mutation.last_change_ts, mutation.versions_to_last_change) = + write.next_last_change_info(*commit_ts); } // Used pipelined pessimistic lock acquiring in this txn but failed // Luckily no other txn modified this lock, amend it by treat it as optimistic @@ -2194,4 +2215,266 @@ pub mod tests { must_commit(&mut engine, key, 21, 22); must_pessimistic_prewrite_insert(&mut engine, key, value, key, 23, 23, DoConstraintCheck); } + + #[cfg(test)] + fn test_calculate_last_change_ts_from_latest_write_impl( + prewrite_func: impl Fn(&mut RocksEngine, LockType, /* start_ts */ u64), + ) { + use engine_traits::CF_WRITE; + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let key = b"k"; + + // Latest change ts should not be enabled on TiKV 6.4 + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.4.0").unwrap(); + set_tls_feature_gate(feature_gate); + let write = Write::new(WriteType::Put, 5.into(), Some(b"value".to_vec())); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(8.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 10); + let lock = must_locked(&mut engine, key, 10); + assert_eq!(lock.last_change_ts, TimeStamp::zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 10, false); + + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + // Latest version is a PUT. But as we are prewriting a PUT, no need to record + // `last_change_ts`. + let write = Write::new(WriteType::Put, 15.into(), Some(b"value".to_vec())); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(20.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Put, 25); + let lock = must_locked(&mut engine, key, 25); + assert_eq!(lock.last_change_ts, TimeStamp::zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 25, false); + + // Latest version is a PUT + let write = Write::new(WriteType::Put, 30.into(), Some(b"value".to_vec())); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(35.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 40); + let lock = must_locked(&mut engine, key, 40); + assert_eq!(lock.last_change_ts, 35.into()); + assert_eq!(lock.versions_to_last_change, 1); + must_rollback(&mut engine, key, 40, false); + + // Latest version is a DELETE + let write = Write::new(WriteType::Delete, 45.into(), None); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(50.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 55); + let lock = must_locked(&mut engine, key, 55); + assert_eq!(lock.last_change_ts, 50.into()); + assert_eq!(lock.versions_to_last_change, 1); + must_rollback(&mut engine, key, 55, false); + + // Latest version is a LOCK without last_change_ts. Set the last_change_ts of + // the new record to zero. + let write = Write::new(WriteType::Lock, 60.into(), None); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(65.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 70); + let lock = must_locked(&mut engine, key, 70); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 70, false); + + // Latest version is a ROLLBACK without last_change_ts. Set the last_change_ts + // of the new record to zero. + let write = Write::new(WriteType::Rollback, 75.into(), None); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(80.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 85); + let lock = must_locked(&mut engine, key, 85); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 85, false); + + // Latest version is a LOCK with last_change_ts + let write = Write::new(WriteType::Lock, 90.into(), None).set_last_change(20.into(), 6); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(95.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 100); + let lock = must_locked(&mut engine, key, 100); + assert_eq!(lock.last_change_ts, 20.into()); + assert_eq!(lock.versions_to_last_change, 7); + must_rollback(&mut engine, key, 100, false); + + // Latest version is a LOCK with last_change_ts + let write = Write::new(WriteType::Lock, 105.into(), None).set_last_change(20.into(), 8); + engine + .put_cf( + Default::default(), + CF_WRITE, + Key::from_raw(key).append_ts(110.into()), + write.as_ref().to_bytes(), + ) + .unwrap(); + prewrite_func(&mut engine, LockType::Lock, 120); + let lock = must_locked(&mut engine, key, 120); + assert_eq!(lock.last_change_ts, 20.into()); + assert_eq!(lock.versions_to_last_change, 9); + must_rollback(&mut engine, key, 120, false); + } + + #[test] + fn test_optimistic_txn_calculate_last_change_ts() { + test_calculate_last_change_ts_from_latest_write_impl(|engine, tp, start_ts| match tp { + LockType::Put => must_prewrite_put(engine, b"k", b"value", b"k", start_ts), + LockType::Delete => must_prewrite_delete(engine, b"k", b"k", start_ts), + LockType::Lock => must_prewrite_lock(engine, b"k", b"k", start_ts), + _ => unreachable!(), + }); + } + + #[test] + fn test_pessimistic_amend_txn_calculate_last_change_ts() { + test_calculate_last_change_ts_from_latest_write_impl(|engine, tp, start_ts| match tp { + LockType::Put => must_pessimistic_prewrite_put( + engine, + b"k", + b"value", + b"k", + start_ts, + start_ts, + DoPessimisticCheck, + ), + LockType::Delete => must_pessimistic_prewrite_delete( + engine, + b"k", + b"k", + start_ts, + start_ts, + DoPessimisticCheck, + ), + LockType::Lock => must_pessimistic_prewrite_lock( + engine, + b"k", + b"k", + start_ts, + start_ts, + DoPessimisticCheck, + ), + _ => unreachable!(), + }); + } + + #[test] + fn test_inherit_last_change_ts_from_pessimistic_lock() { + use engine_traits::CF_LOCK; + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let key = b"k"; + let put_lock = + |engine: &mut RocksEngine, ts: u64, last_change_ts: u64, versions_to_last_change| { + let lock = Lock::new( + LockType::Pessimistic, + key.to_vec(), + ts.into(), + 100, + None, + ts.into(), + 5, + ts.into(), + ) + .set_last_change(last_change_ts.into(), versions_to_last_change); + engine + .put_cf( + Default::default(), + CF_LOCK, + Key::from_raw(key), + lock.to_bytes(), + ) + .unwrap(); + }; + + // Prewrite LOCK from pessimistic lock without `last_change_ts` + put_lock(&mut engine, 10, 0, 0); + must_pessimistic_prewrite_lock(&mut engine, key, key, 10, 10, DoPessimisticCheck); + let lock = must_locked(&mut engine, key, 10); + assert_eq!(lock.last_change_ts, TimeStamp::zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 10, false); + + // Prewrite LOCK from pessimistic lock with `last_change_ts` + put_lock(&mut engine, 20, 15, 3); + must_pessimistic_prewrite_lock(&mut engine, key, key, 20, 20, DoPessimisticCheck); + let lock = must_locked(&mut engine, key, 20); + assert_eq!(lock.last_change_ts, 15.into()); + assert_eq!(lock.versions_to_last_change, 3); + must_rollback(&mut engine, key, 20, false); + + // Prewrite PUT from pessimistic lock with `last_change_ts` + put_lock(&mut engine, 30, 15, 5); + must_pessimistic_prewrite_put(&mut engine, key, b"value", key, 30, 30, DoPessimisticCheck); + let lock = must_locked(&mut engine, key, 30); + assert_eq!(lock.last_change_ts, TimeStamp::zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 30, false); + + // Prewrite DELETE from pessimistic lock with `last_change_ts` + put_lock(&mut engine, 40, 15, 5); + must_pessimistic_prewrite_delete(&mut engine, key, key, 40, 30, DoPessimisticCheck); + let lock = must_locked(&mut engine, key, 40); + assert_eq!(lock.last_change_ts, TimeStamp::zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_rollback(&mut engine, key, 40, false); + } } diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 2b0915a5fdc..06f9cd1f818 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -848,7 +848,8 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { WriteType::from_lock_type(lock.lock_type).unwrap(), txn.start_ts, lock.short_value, - ); + ) + .set_last_change(lock.last_change_ts, lock.versions_to_last_change); // Transactions committed with 1PC should be impossible to overwrite rollback // records. txn.put_write(key.clone(), commit_ts, write.as_ref().to_bytes()); @@ -2505,4 +2506,195 @@ mod tests { assert_eq!(res.min_commit_ts, 18.into(), "{:?}", res); must_unlocked(&mut engine, b"k2"); } + + #[test] + fn test_1pc_calculate_last_change_ts() { + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + let mut engine = TestEngineBuilder::new().build().unwrap(); + let cm = concurrency_manager::ConcurrencyManager::new(1.into()); + + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + let key = b"k"; + let value = b"v"; + must_prewrite_put(&mut engine, key, value, key, 10); + must_commit(&mut engine, key, 10, 20); + + // 1PC write a new LOCK + let mutations = vec![Mutation::make_lock(Key::from_raw(key))]; + let mut statistics = Statistics::default(); + let res = prewrite_with_cm( + &mut engine, + cm.clone(), + &mut statistics, + mutations.clone(), + key.to_vec(), + 30, + Some(40), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 30, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(write.last_change_ts, 20.into()); + assert_eq!(write.versions_to_last_change, 1); + + // 1PC write another LOCK + let res = prewrite_with_cm( + &mut engine, + cm.clone(), + &mut statistics, + mutations, + key.to_vec(), + 50, + Some(60), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 50, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(write.last_change_ts, 20.into()); + assert_eq!(write.versions_to_last_change, 2); + + // 1PC write a PUT + let mutations = vec![Mutation::make_put(Key::from_raw(key), b"v2".to_vec())]; + let res = prewrite_with_cm( + &mut engine, + cm.clone(), + &mut statistics, + mutations, + key.to_vec(), + 70, + Some(80), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 70, res.one_pc_commit_ts, WriteType::Put); + assert_eq!(write.last_change_ts, TimeStamp::zero()); + assert_eq!(write.versions_to_last_change, 0); + + // TiKV 6.4 should not have last_change_ts. + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.4.0").unwrap(); + set_tls_feature_gate(feature_gate); + let mutations = vec![Mutation::make_lock(Key::from_raw(key))]; + let res = prewrite_with_cm( + &mut engine, + cm, + &mut statistics, + mutations, + key.to_vec(), + 80, + Some(90), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 80, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(write.last_change_ts, TimeStamp::zero()); + assert_eq!(write.versions_to_last_change, 0); + } + + #[test] + fn test_pessimistic_1pc_calculate_last_change_ts() { + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + let mut engine = TestEngineBuilder::new().build().unwrap(); + let cm = concurrency_manager::ConcurrencyManager::new(1.into()); + + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + let key = b"k"; + let value = b"v"; + must_prewrite_put(&mut engine, key, value, key, 10); + must_commit(&mut engine, key, 10, 20); + + // Pessimistic 1PC write a new LOCK + must_acquire_pessimistic_lock(&mut engine, key, key, 30, 30); + let mutations = vec![(Mutation::make_lock(Key::from_raw(key)), DoPessimisticCheck)]; + let mut statistics = Statistics::default(); + let res = pessimistic_prewrite_with_cm( + &mut engine, + cm.clone(), + &mut statistics, + mutations.clone(), + key.to_vec(), + 30, + 30, + Some(40), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 30, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(write.last_change_ts, 20.into()); + assert_eq!(write.versions_to_last_change, 1); + + // Pessimistic 1PC write another LOCK + must_acquire_pessimistic_lock(&mut engine, key, key, 50, 50); + let res = pessimistic_prewrite_with_cm( + &mut engine, + cm.clone(), + &mut statistics, + mutations, + key.to_vec(), + 50, + 50, + Some(60), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 50, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(write.last_change_ts, 20.into()); + assert_eq!(write.versions_to_last_change, 2); + + // Pessimistic 1PC write a PUT + must_acquire_pessimistic_lock(&mut engine, key, key, 70, 70); + let mutations = vec![( + Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), + DoPessimisticCheck, + )]; + let res = pessimistic_prewrite_with_cm( + &mut engine, + cm.clone(), + &mut statistics, + mutations, + key.to_vec(), + 70, + 70, + Some(80), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 70, res.one_pc_commit_ts, WriteType::Put); + assert_eq!(write.last_change_ts, TimeStamp::zero()); + assert_eq!(write.versions_to_last_change, 0); + + // TiKV 6.4 should not have last_change_ts. + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.4.0").unwrap(); + set_tls_feature_gate(feature_gate); + must_acquire_pessimistic_lock(&mut engine, key, key, 80, 80); + let mutations = vec![(Mutation::make_lock(Key::from_raw(key)), DoPessimisticCheck)]; + let res = pessimistic_prewrite_with_cm( + &mut engine, + cm, + &mut statistics, + mutations, + key.to_vec(), + 80, + 80, + Some(90), + ) + .unwrap(); + must_unlocked(&mut engine, key); + let write = must_written(&mut engine, key, 80, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(write.last_change_ts, TimeStamp::zero()); + assert_eq!(write.versions_to_last_change, 0); + } } diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index b2f25cff640..9a38979c71b 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use kvproto::kvrpcpb::IsolationLevel; -use txn_types::{Key, KvPair, OldValue, TimeStamp, TsSet, Value, WriteRef}; +use txn_types::{Key, KvPair, Lock, OldValue, TimeStamp, TsSet, Value, WriteRef}; use super::{Error, ErrorInner, Result}; use crate::storage::{ @@ -159,6 +159,27 @@ impl TxnEntry { } => old_value, } } + + pub fn erasing_last_change_ts(&self) -> TxnEntry { + let mut e = self.clone(); + match &mut e { + TxnEntry::Prewrite { + lock: (_, value), .. + } => { + let l = Lock::parse(value).unwrap(); + *value = l.set_last_change(TimeStamp::zero(), 0).to_bytes(); + } + TxnEntry::Commit { + write: (_, value), .. + } => { + let mut w = WriteRef::parse(value).unwrap(); + w.last_change_ts = TimeStamp::zero(); + w.versions_to_last_change = 0; + *value = w.to_bytes(); + } + } + e + } } impl TxnEntry { From 8eaa805dc440dc6c056547c5ebb3989c56dde0c3 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 8 Nov 2022 20:27:50 +0800 Subject: [PATCH 0319/1149] backup: udpate rusoto to support backup to ap-southeast-3 (#13750) close tikv/tikv#13751 Signed-off-by: 3pointer --- Cargo.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d49c13ae18c..a1b238d0148 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4697,7 +4697,7 @@ dependencies = [ [[package]] name = "rusoto_core" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", "base64", @@ -4721,7 +4721,7 @@ dependencies = [ [[package]] name = "rusoto_credential" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", "chrono", @@ -4738,7 +4738,7 @@ dependencies = [ [[package]] name = "rusoto_kms" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", "bytes", @@ -4751,7 +4751,7 @@ dependencies = [ [[package]] name = "rusoto_mock" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", "chrono", @@ -4765,7 +4765,7 @@ dependencies = [ [[package]] name = "rusoto_s3" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", "bytes", @@ -4779,7 +4779,7 @@ dependencies = [ [[package]] name = "rusoto_signature" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "base64", "bytes", @@ -4804,7 +4804,7 @@ dependencies = [ [[package]] name = "rusoto_sts" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#5fcf2d1c36b93d0146cc49f257dd850e01b6e4db" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", "bytes", From 2a61f0777da4dfb635316d612f663932d0874b0c Mon Sep 17 00:00:00 2001 From: guoxiang1996 Date: Wed, 9 Nov 2022 13:11:51 +0800 Subject: [PATCH 0320/1149] debugger: fix compacting raftdb with tikv-ctl (#13742) close tikv/tikv#13515 Signed-off-by: kuiper Co-authored-by: Xinye Tao --- src/server/debug.rs | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/server/debug.rs b/src/server/debug.rs index 7f85aabcf50..6ee676ad1c4 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -120,6 +120,10 @@ impl From for debugpb::BottommostLevelCompaction { } } +trait InnerRocksEngineExtractor { + fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine>; +} + #[derive(Clone)] pub struct Debugger { engines: Engines, @@ -127,6 +131,26 @@ pub struct Debugger { cfg_controller: ConfigController, } +impl InnerRocksEngineExtractor for Debugger { + default fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { + match db { + DbType::Kv => Ok(&self.engines.kv), + DbType::Raft => Err(box_err!("Get raft db is not allowed")), + _ => Err(box_err!("invalid DB type")), + } + } +} + +impl InnerRocksEngineExtractor for Debugger { + fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { + match db { + DbType::Kv => Ok(&self.engines.kv), + DbType::Raft => Ok(&self.engines.raft), + _ => Err(box_err!("invalid DB type")), + } + } +} + impl Debugger { pub fn new( engines: Engines, @@ -163,14 +187,6 @@ impl Debugger { Ok(regions) } - fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { - match db { - DbType::Kv => Ok(&self.engines.kv), - DbType::Raft => Err(box_err!("Get raft db is not allowed")), - _ => Err(box_err!("invalid DB type")), - } - } - pub fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { validate_db_and_cf(db, cf)?; let db = self.get_db_from_type(db)?; @@ -2272,4 +2288,14 @@ mod tests { .get_api_version() ) } + + #[test] + fn test_compact() { + let debugger = new_debugger(); + let compact = |db, cf| debugger.compact(db, cf, &[0], &[0xFF], 1, Some("skip").into()); + compact(DbType::Kv, CF_DEFAULT).unwrap(); + compact(DbType::Kv, CF_LOCK).unwrap(); + compact(DbType::Kv, CF_WRITE).unwrap(); + compact(DbType::Raft, CF_DEFAULT).unwrap(); + } } From 8831c7d6032e46337c7c9e2970fa0942d6787403 Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 9 Nov 2022 16:21:51 +0800 Subject: [PATCH 0321/1149] raftstore: Introduce witness peer (#12972) ref tikv/tikv#12876 Introduce witness peer Signed-off-by: Connor1996 --- Cargo.toml | 4 +- .../operation/command/admin/conf_change.rs | 1 + .../raftstore-v2/src/operation/command/mod.rs | 5 +- .../raftstore-v2/src/operation/query/local.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 4 +- .../raftstore/src/store/entry_storage.rs | 2 +- components/raftstore/src/store/fsm/apply.rs | 248 +++++--- components/raftstore/src/store/fsm/peer.rs | 88 ++- .../raftstore/src/store/local_metrics.rs | 3 + components/raftstore/src/store/metrics.rs | 2 + components/raftstore/src/store/peer.rs | 50 +- .../raftstore/src/store/peer_storage.rs | 205 +++++-- components/raftstore/src/store/util.rs | 83 ++- .../src/store/worker/check_leader.rs | 2 +- .../raftstore/src/store/worker/metrics.rs | 1 + components/raftstore/src/store/worker/read.rs | 26 +- components/test_pd_client/src/pd.rs | 4 +- components/test_raftstore/src/util.rs | 1 - components/tikv_util/src/store/mod.rs | 5 +- components/tikv_util/src/store/peer.rs | 13 + components/tikv_util/src/store/region.rs | 8 +- src/server/raft_client.rs | 17 +- tests/integrations/raftstore/mod.rs | 1 + .../raftstore/test_unsafe_recovery.rs | 1 - tests/integrations/raftstore/test_witness.rs | 537 ++++++++++++++++++ 25 files changed, 1121 insertions(+), 192 deletions(-) create mode 100644 tests/integrations/raftstore/test_witness.rs diff --git a/Cargo.toml b/Cargo.toml index 4ccf0a2ad93..a408e4a84ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -220,8 +220,8 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to # kvproto at the same time. # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. -[patch.'https://github.com/pingcap/kvproto'] -# kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } +# [patch.'https://github.com/pingcap/kvproto'] +# kvproto = { git = "https://github.com/your_github_id/kvproto", branch="your_branch" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 03d0690fe25..8b4b7fe293f 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -95,6 +95,7 @@ impl Peer { util::check_conf_change( &ctx.cfg, self.raft_group(), + self.region(), self.peer(), changes.as_ref(), &cc, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index d39788ac611..2d89c3494d3 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -199,7 +199,10 @@ impl Peer { proposal_ctx: Vec, ) -> Result { store_ctx.raft_metrics.propose.normal.inc(); - PEER_PROPOSE_LOG_SIZE_HISTOGRAM.observe(data.len() as f64); + store_ctx + .raft_metrics + .propose_log_size + .observe(data.len() as f64); if data.len() as u64 > store_ctx.cfg.raft_entry_max_size.0 { return Err(Error::RaftEntryTooLarge { region_id: self.region_id(), diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 12df1e7926f..0736bc13fd8 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -565,7 +565,7 @@ mod tests { region1.set_region_epoch(epoch13.clone()); let term6 = 6; let mut lease = Lease::new(Duration::seconds(10), Duration::milliseconds(2500)); - let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, "".to_owned())); + let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, 1)); let mut cmd = RaftCmdRequest::default(); let mut header = RaftRequestHeader::default(); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 8619b8cf2d4..eb98851b3bb 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -128,12 +128,12 @@ impl Peer { destroy_progress: DestroyProgress::None, raft_group, logger, - pending_reads: ReadIndexQueue::new(tag.clone()), + pending_reads: ReadIndexQueue::new(tag), read_progress: Arc::new(RegionReadProgress::new( ®ion, applied_index, REGION_READ_PROGRESS_CAP, - tag, + peer_id, )), leader_lease: Lease::new( cfg.raft_store_max_leader_lease(), diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index fcc3d535aa2..705e2a776fa 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1004,7 +1004,7 @@ impl EntryStorage { } #[inline] - pub fn set_applied_state(&mut self, apply_state: RaftApplyState) { + pub fn set_apply_state(&mut self, apply_state: RaftApplyState) { self.apply_state = apply_state; } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index f5702092622..8cb7f58baca 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -9,6 +9,7 @@ use std::{ cmp::{Ord, Ordering as CmpOrdering}, collections::VecDeque, fmt::{self, Debug, Formatter}, + io::BufRead, mem, ops::{Deref, DerefMut, Range as StdRange}, sync::{ @@ -36,7 +37,7 @@ use fail::fail_point; use kvproto::{ import_sstpb::SstMeta, kvrpcpb::ExtraOp as TxnExtraOp, - metapb::{PeerRole, Region, RegionEpoch}, + metapb::{self, PeerRole, Region, RegionEpoch}, raft_cmdpb::{ AdminCmdType, AdminRequest, AdminResponse, ChangePeerRequest, CmdType, CommitMergeRequest, RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, @@ -45,6 +46,7 @@ use kvproto::{ }; use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; use prometheus::local::LocalHistogram; +use protobuf::{wire_format::WireType, CodedInputStream}; use raft::eraftpb::{ ConfChange, ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot, }; @@ -59,7 +61,7 @@ use tikv_util::{ memory::HeapSize, mpsc::{loose_bounded, LooseBoundedSender, Receiver}, safe_panic, slow_log, - store::{find_peer, find_peer_mut, is_learner, remove_peer}, + store::{find_peer, find_peer_by_id, find_peer_mut, is_learner, remove_peer}, time::{duration_to_sec, Instant}, warn, worker::Scheduler, @@ -816,6 +818,43 @@ fn should_sync_log(cmd: &RaftCmdRequest) -> bool { false } +fn can_witness_skip(entry: &Entry) -> bool { + // need to handle ConfChange entry type + if entry.get_entry_type() != EntryType::EntryNormal { + return false; + } + + // HACK: check admin request field in serialized data from `RaftCmdRequest` + // without deserializing all. It's done by checking the existence of the + // field number of `admin_request`. + // See the encoding in `write_to_with_cached_sizes()` of `RaftCmdRequest` in + // `raft_cmdpb.rs` for reference. + let mut is = CodedInputStream::from_bytes(entry.get_data()); + if is.eof().unwrap() { + return true; + } + let (mut field_number, wire_type) = is.read_tag_unpack().unwrap(); + // Header field is of number 1 + if field_number == 1 { + if wire_type != WireType::WireTypeLengthDelimited { + panic!("unexpected wire type"); + } + let len = is.read_raw_varint32().unwrap(); + // skip parsing the content of `Header` + is.consume(len as usize); + // read next field number + (field_number, _) = is.read_tag_unpack().unwrap(); + } + + // `Requests` field is of number 2 and `AdminRequest` field is of number 3. + // - If the next field is 2, there must be no admin request as in one + // `RaftCmdRequest`, either requests or admin_request is filled. + // - If the next field is 3, it's exactly an admin request. + // - If the next field is others, neither requests nor admin_request is filled, + // so there is no admin request. + field_number != 3 +} + /// A struct that stores the state related to Merge. /// /// When executing a `CommitMerge`, the source peer may have not applied @@ -895,12 +934,12 @@ pub struct ApplyDelegate where EK: KvEngine, { - /// The ID of the peer. - id: u64, /// The term of the Region. term: u64, /// The Region information of the peer. region: Region, + /// The Peer information. + peer: metapb::Peer, /// Peer_tag, "[region region_id] peer_id". tag: String, @@ -973,8 +1012,8 @@ where { fn from_registration(reg: Registration) -> ApplyDelegate { ApplyDelegate { - id: reg.id, tag: format!("[region {}] {}", reg.region.get_id(), reg.id), + peer: find_peer_by_id(®.region, reg.id).unwrap().clone(), region: reg.region, pending_remove: false, last_flush_applied_index: reg.apply_state.get_applied_index(), @@ -1006,7 +1045,7 @@ where } pub fn id(&self) -> u64 { - self.id + self.peer.get_id() } /// Handles all the committed_entries, namely, applies the committed @@ -1126,58 +1165,60 @@ where let data = entry.get_data(); if !data.is_empty() { - let cmd = util::parse_data_at(data, index, &self.tag); - - if apply_ctx.yield_high_latency_operation && has_high_latency_operation(&cmd) { - self.priority = Priority::Low; - } - let mut has_unflushed_data = - self.last_flush_applied_index != self.apply_state.get_applied_index(); - if (has_unflushed_data && should_write_to_engine(&cmd) - || apply_ctx.kv_wb().should_write_to_engine()) - && apply_ctx.host.pre_persist(&self.region, false, Some(&cmd)) - { - apply_ctx.commit(self); - if self.metrics.written_bytes >= apply_ctx.yield_msg_size - || self - .handle_start - .as_ref() - .map_or(Duration::ZERO, Instant::saturating_elapsed) - >= apply_ctx.yield_duration + if !self.peer.is_witness || !can_witness_skip(entry) { + let cmd = util::parse_data_at(data, index, &self.tag); + if apply_ctx.yield_high_latency_operation && has_high_latency_operation(&cmd) { + self.priority = Priority::Low; + } + let mut has_unflushed_data = + self.last_flush_applied_index != self.apply_state.get_applied_index(); + if (has_unflushed_data && should_write_to_engine(&cmd) + || apply_ctx.kv_wb().should_write_to_engine()) + && apply_ctx.host.pre_persist(&self.region, false, Some(&cmd)) { + apply_ctx.commit(self); + if self.metrics.written_bytes >= apply_ctx.yield_msg_size + || self + .handle_start + .as_ref() + .map_or(Duration::ZERO, Instant::saturating_elapsed) + >= apply_ctx.yield_duration + { + return ApplyResult::Yield; + } + has_unflushed_data = false; + } + if self.priority != apply_ctx.priority { + if has_unflushed_data { + apply_ctx.commit(self); + } return ApplyResult::Yield; } - has_unflushed_data = false; + + return self.process_raft_cmd(apply_ctx, index, term, cmd); } - if self.priority != apply_ctx.priority { - if has_unflushed_data { - apply_ctx.commit(self); + } else { + // we should observe empty cmd, aka leader change, + // read index during confchange, or other situations. + apply_ctx.host.on_empty_cmd(&self.region, index, term); + + // 1. When a peer become leader, it will send an empty entry. + // 2. When a leader tries to read index during transferring leader, + // it will also propose an empty entry. But that entry will not contain + // any associated callback. So no need to clear callback. + while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { + if let Some(cb) = cmd.cb.take() { + apply_ctx + .applied_batch + .push_cb(cb, cmd_resp::err_resp(Error::StaleCommand, term)); } - return ApplyResult::Yield; } - - return self.process_raft_cmd(apply_ctx, index, term, cmd); } - // we should observe empty cmd, aka leader change, - // read index during confchange, or other situations. - apply_ctx.host.on_empty_cmd(&self.region, index, term); - self.apply_state.set_applied_index(index); self.applied_term = term; assert!(term > 0); - // 1. When a peer become leader, it will send an empty entry. - // 2. When a leader tries to read index during transferring leader, - // it will also propose an empty entry. But that entry will not contain - // any associated callback. So no need to clear callback. - while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { - if let Some(cb) = cmd.cb.take() { - apply_ctx - .applied_batch - .push_cb(cb, cmd_resp::err_resp(Error::StaleCommand, term)); - } - } ApplyResult::None } @@ -1438,6 +1479,9 @@ where match *exec_result { ExecResult::ChangePeer(ref cp) => { self.region = cp.region.clone(); + if let Some(p) = find_peer_by_id(&self.region, self.id()) { + self.peer = p.clone(); + } } ExecResult::ComputeHash { .. } | ExecResult::VerifyHash { .. } @@ -1494,11 +1538,12 @@ where fn destroy(&mut self, apply_ctx: &mut ApplyContext) { self.stopped = true; apply_ctx.router.close(self.region_id()); + let id = self.id(); for cmd in self.pending_cmds.normals.drain(..) { - notify_region_removed(self.region.get_id(), self.id, cmd); + notify_region_removed(self.region.get_id(), id, cmd); } if let Some(cmd) = self.pending_cmds.conf_change.take() { - notify_region_removed(self.region.get_id(), self.id, cmd); + notify_region_removed(self.region.get_id(), id, cmd); } self.yield_state = None; @@ -1578,7 +1623,6 @@ where AdminCmdType::TransferLeader => self.exec_transfer_leader(request, ctx.exec_log_term), AdminCmdType::ComputeHash => self.exec_compute_hash(ctx, request), AdminCmdType::VerifyHash => self.exec_verify_hash(ctx, request), - // TODO: is it backward compatible to add new cmd_type? AdminCmdType::PrepareMerge => self.exec_prepare_merge(ctx, request), AdminCmdType::CommitMerge => self.exec_commit_merge(ctx, request), AdminCmdType::RollbackMerge => self.exec_rollback_merge(ctx, request), @@ -1884,23 +1928,22 @@ where mod confchange_cmd_metric { use super::*; - fn write_metric(cct: ConfChangeType, kind: &str) { - let metric = match cct { - ConfChangeType::AddNode => "add_peer", - ConfChangeType::RemoveNode => "remove_peer", - ConfChangeType::AddLearnerNode => "add_learner", - }; - PEER_ADMIN_CMD_COUNTER_VEC - .with_label_values(&[metric, kind]) - .inc(); - } - pub fn inc_all(cct: ConfChangeType) { - write_metric(cct, "all") + let metrics = match cct { + ConfChangeType::AddNode => &PEER_ADMIN_CMD_COUNTER.add_peer, + ConfChangeType::RemoveNode => &PEER_ADMIN_CMD_COUNTER.remove_peer, + ConfChangeType::AddLearnerNode => &PEER_ADMIN_CMD_COUNTER.add_learner, + }; + metrics.all.inc(); } pub fn inc_success(cct: ConfChangeType) { - write_metric(cct, "success") + let metrics = match cct { + ConfChangeType::AddNode => &PEER_ADMIN_CMD_COUNTER.add_peer, + ConfChangeType::RemoveNode => &PEER_ADMIN_CMD_COUNTER.remove_peer, + ConfChangeType::AddLearnerNode => &PEER_ADMIN_CMD_COUNTER.add_learner, + }; + metrics.success.inc(); } } @@ -1942,6 +1985,8 @@ impl ApplyDelegate where EK: KvEngine, { + // Legacy code for compatibility. All new conf changes are dispatched by + // ChangePeerV2 now. fn exec_change_peer( &mut self, ctx: &mut ApplyContext, @@ -1956,12 +2001,12 @@ where fail_point!( "apply_on_conf_change_1_3_1", - (self.id == 1 || self.id == 3) && self.region_id() == 1, + (self.id() == 1 || self.id() == 3) && self.region_id() == 1, |_| panic!("should not use return") ); fail_point!( "apply_on_conf_change_3_1", - self.id == 3 && self.region_id() == 1, + self.id() == 3 && self.region_id() == 1, |_| panic!("should not use return") ); fail_point!( @@ -1986,7 +2031,7 @@ where let add_ndoe_fp = || { fail_point!( "apply_on_add_node_1_2", - self.id == 2 && self.region_id() == 1, + self.id() == 2 && self.region_id() == 1, |_| {} ) }; @@ -2053,7 +2098,7 @@ where p )); } - if self.id == peer.get_id() { + if self.id() == peer.get_id() { // Remove ourself, we will destroy all region data later. // So we need not to apply following logs. self.stopped = true; @@ -2246,6 +2291,7 @@ where // The peer is already the requested role || (role, change_type) == (PeerRole::Voter, ConfChangeType::AddNode) || (role, change_type) == (PeerRole::Learner, ConfChangeType::AddLearnerNode) + || exist_peer.get_is_witness() != peer.get_is_witness() { error!( "can't add duplicated peer"; @@ -2253,7 +2299,7 @@ where "peer_id" => self.id(), "peer" => ?peer, "exist peer" => ?exist_peer, - "confchnage type" => ?change_type, + "confchange type" => ?change_type, "region" => ?&self.region ); return Err(box_err!( @@ -2307,7 +2353,7 @@ where "region_id" => self.region_id(), "peer_id" => self.id(), "expect_peer" => ?peer, - "get_peeer" => ?p + "get_peer" => ?p ); return Err(box_err!( "remove unmatched peer: expect: {:?}, get {:?}, ignore", @@ -2315,7 +2361,7 @@ where p )); } - if self.id == peer.get_id() { + if self.id() == peer.get_id() { // Remove ourself, we will destroy all region data later. // So we need not to apply following logs. self.stopped = true; @@ -2399,7 +2445,7 @@ where fail_point!("apply_before_split"); fail_point!( "apply_before_split_1_3", - self.id == 3 && self.region_id() == 1, + self.id() == 3 && self.region_id() == 1, |_| { unreachable!() } ); @@ -2582,7 +2628,7 @@ where fail_point!( "apply_after_split_1_3", - self.id == 3 && self.region_id() == 1, + self.id() == 3 && self.region_id() == 1, |_| { unreachable!() } ); @@ -2686,7 +2732,7 @@ where let apply_before_commit_merge = || { fail_point!( "apply_before_commit_merge_except_1_4", - self.region_id() == 1 && self.id != 4, + self.region_id() == 1 && self.id() != 4, |_| {} ); }; @@ -2958,7 +3004,7 @@ where let peer = req.get_transfer_leader().get_peer(); // Only execute TransferLeader if the expected new leader is self. - if peer.get_id() == self.id { + if peer.get_id() == self.id() { Ok((resp, ApplyResult::Res(ExecResult::TransferLeader { term }))) } else { Ok((resp, ApplyResult::None)) @@ -3534,7 +3580,7 @@ where "peer_id" => self.delegate.id(), "term" => reg.term ); - assert_eq!(self.delegate.id, reg.id); + assert_eq!(self.delegate.id(), reg.id); self.delegate.term = reg.term; self.delegate.clear_all_commands_as_stale(); self.delegate = ApplyDelegate::from_registration(reg); @@ -3681,7 +3727,7 @@ where PeerMsg::ApplyRes { res: TaskRes::Destroy { region_id: self.delegate.region_id(), - peer_id: self.delegate.id, + peer_id: self.delegate.id(), merge_from_snapshot: d.merge_from_snapshot, }, }, @@ -3762,6 +3808,10 @@ where if self.delegate.pending_remove || self.delegate.stopped { return; } + if self.delegate.peer.is_witness { + // witness shouldn't generate snapshot. + return; + } let applied_index = self.delegate.apply_state.get_applied_index(); let need_sync = apply_ctx .apply_res @@ -3779,7 +3829,7 @@ where self.delegate.maybe_write_apply_state(apply_ctx); fail_point!( "apply_on_handle_snapshot_1_1", - self.delegate.id == 1 && self.delegate.region_id() == 1, + self.delegate.id() == 1 && self.delegate.region_id() == 1, |_| unimplemented!() ); @@ -3805,7 +3855,7 @@ where .fetch_sub(1, Ordering::SeqCst); fail_point!( "apply_on_handle_snapshot_finish_1_1", - self.delegate.id == 1 && self.delegate.region_id() == 1, + self.delegate.id() == 1 && self.delegate.region_id() == 1, |_| unimplemented!() ); } @@ -4530,6 +4580,7 @@ mod tests { time::*, }; + use bytes::Bytes; use engine_panic::PanicEngine; use engine_test::kv::{new_engine, KvTestEngine, KvTestSnapshot}; use engine_traits::{Peekable as PeekableTrait, SyncMutable, WriteBatchExt}; @@ -4539,6 +4590,7 @@ mod tests { raft_cmdpb::*, }; use protobuf::Message; + use raft::eraftpb::{ConfChange, ConfChangeV2}; use sst_importer::Config as ImportConfig; use tempfile::{Builder, TempDir}; use test_sst_importer::*; @@ -4644,6 +4696,42 @@ mod tests { } } + #[test] + fn test_can_witness_skip() { + let mut entry = Entry::new(); + let mut req = RaftCmdRequest::default(); + entry.set_entry_type(EntryType::EntryNormal); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(can_witness_skip(&entry)); + + req.mut_admin_request() + .set_cmd_type(AdminCmdType::CompactLog); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!can_witness_skip(&entry)); + + let mut req = RaftCmdRequest::default(); + let mut request = Request::default(); + request.set_cmd_type(CmdType::Put); + req.set_requests(vec![request].into()); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(can_witness_skip(&entry)); + + entry.set_entry_type(EntryType::EntryConfChange); + let conf_change = ConfChange::new(); + let data = conf_change.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!can_witness_skip(&entry)); + + entry.set_entry_type(EntryType::EntryConfChangeV2); + let conf_change_v2 = ConfChangeV2::new(); + let data = conf_change_v2.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!can_witness_skip(&entry)); + } + #[test] fn test_should_sync_log() { // Admin command @@ -4841,10 +4929,14 @@ mod tests { ..Default::default() }; reg.region.set_id(2); + let mut peer = metapb::Peer::default(); + peer.set_id(1); + reg.region.mut_peers().push(peer.clone()); reg.apply_state.set_applied_index(3); router.schedule_task(2, Msg::Registration(reg.dup())); validate(&router, 2, move |delegate| { - assert_eq!(delegate.id, 1); + assert_eq!(delegate.id(), 1); + assert_eq!(delegate.peer, peer); assert_eq!(delegate.tag, "[region 2] 1"); assert_eq!(delegate.region, reg.region); assert!(!delegate.pending_remove); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 69215ecaf70..8c7ef17cfa6 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2489,12 +2489,14 @@ where // TODO: spin off the I/O code (delete_snapshot) let regions_to_destroy = match self.check_snapshot(&msg)? { Either::Left(key) => { - // If the snapshot file is not used again, then it's OK to - // delete them here. If the snapshot file will be reused when - // receiving, then it will fail to pass the check again, so - // missing snapshot files should not be noticed. - let s = self.ctx.snap_mgr.get_snapshot_for_applying(&key)?; - self.ctx.snap_mgr.delete_snapshot(&key, s.as_ref(), false); + if let Some(key) = key { + // If the snapshot file is not used again, then it's OK to + // delete them here. If the snapshot file will be reused when + // receiving, then it will fail to pass the check again, so + // missing snapshot files should not be noticed. + let s = self.ctx.snap_mgr.get_snapshot_for_applying(&key)?; + self.ctx.snap_mgr.delete_snapshot(&key, s.as_ref(), false); + } return Ok(()); } Either::Right(v) => v, @@ -2956,16 +2958,55 @@ where // Returns `Vec<(u64, bool)>` indicated (source_region_id, merge_to_this_peer) // if the `msg` doesn't contain a snapshot or this snapshot doesn't conflict // with any other snapshots or regions. Otherwise a `SnapKey` is returned. - fn check_snapshot(&mut self, msg: &RaftMessage) -> Result>> { + fn check_snapshot( + &mut self, + msg: &RaftMessage, + ) -> Result, Vec<(u64, bool)>>> { if !msg.get_message().has_snapshot() { return Ok(Either::Right(vec![])); } let region_id = msg.get_region_id(); let snap = msg.get_message().get_snapshot(); - let key = SnapKey::from_region_snap(region_id, snap); let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap.get_data())?; + + let key = if !snap_data.get_meta().get_for_witness() { + // Check if snapshot file exists. + // No need to get snapshot for witness, as witness's empty snapshot bypass + // snapshot manager. + let key = SnapKey::from_region_snap(region_id, snap); + self.ctx.snap_mgr.get_snapshot_for_applying(&key)?; + Some(key) + } else { + None + }; + + // If the index of snapshot is not newer than peer's apply index, it + // is possibly because there is witness -> non-witness switch, and the peer + // requests snapshot from leader but leader doesn't applies the switch yet. + // In that case, the snapshot is a witness snapshot whereas non-witness snapshot + // is expected. + if snap.get_metadata().get_index() < self.fsm.peer.get_store().applied_index() + && snap_data.get_meta().get_for_witness() != self.fsm.peer.is_witness() + { + info!( + "mismatch witness snapshot"; + "region_id" => region_id, + "peer_id" => self.fsm.peer_id(), + "for_witness" => snap_data.get_meta().get_for_witness(), + "is_witness" => self.fsm.peer.is_witness(), + "index" => snap.get_metadata().get_index(), + "applied_index" => self.fsm.peer.get_store().applied_index(), + ); + self.ctx + .raft_metrics + .message_dropped + .mismatch_witness_snapshot + .inc(); + return Ok(Either::Left(key)); + } + let snap_region = snap_data.take_region(); let peer_id = msg.get_to_peer().get_id(); let snap_enc_start_key = enc_start_key(&snap_region); @@ -3116,9 +3157,6 @@ where return Ok(Either::Left(key)); } - // Check if snapshot file exists. - self.ctx.snap_mgr.get_snapshot_for_applying(&key)?; - // WARNING: The checking code must be above this line. // Now all checking passed. @@ -4955,6 +4993,16 @@ where let leader_id = self.fsm.peer.leader_id(); let request = msg.get_requests(); + // peer_id must be the same as peer's. + if let Err(e) = util::check_peer_id(msg, self.fsm.peer.peer_id()) { + self.ctx + .raft_metrics + .invalid_proposal + .mismatch_peer_id + .inc(); + return Err(e); + } + if self.fsm.peer.force_leader.is_some() { self.ctx.raft_metrics.invalid_proposal.force_leader.inc(); // in force leader state, forbid requests to make the recovery progress less @@ -4992,15 +5040,17 @@ where self.register_raft_base_tick(); return Err(Error::NotLeader(region_id, leader)); } - // peer_id must be the same as peer's. - if let Err(e) = util::check_peer_id(msg, self.fsm.peer.peer_id()) { - self.ctx - .raft_metrics - .invalid_proposal - .mismatch_peer_id - .inc(); - return Err(e); + + // Forbid requests when it's a witness unless it's transfer leader + if self.fsm.peer.is_witness() + && !(msg.has_admin_request() + && msg.get_admin_request().get_cmd_type() == AdminCmdType::TransferLeader) + { + self.ctx.raft_metrics.invalid_proposal.witness.inc(); + // TODO: use a dedicated error type + return Err(Error::RecoveryInProgress(self.region_id())); } + // check whether the peer is initialized. if !self.fsm.peer.is_initialized() { self.ctx diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 1648bd345ca..5cfbb645612 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -86,6 +86,7 @@ pub struct RaftMetrics { pub peer_msg_len: LocalHistogram, pub commit_log: LocalHistogram, pub write_block_wait: LocalHistogram, + pub propose_log_size: LocalHistogram, // waterfall metrics pub waterfall_metrics: bool, @@ -123,6 +124,7 @@ impl RaftMetrics { peer_msg_len: PEER_MSG_LEN.local(), commit_log: PEER_COMMIT_LOG_HISTOGRAM.local(), write_block_wait: STORE_WRITE_MSG_BLOCK_WAIT_DURATION_HISTOGRAM.local(), + propose_log_size: PEER_PROPOSE_LOG_SIZE_HISTOGRAM.local(), waterfall_metrics, wf_batch_wait: STORE_WF_BATCH_WAIT_DURATION_HISTOGRAM.local(), wf_send_to_queue: STORE_WF_SEND_TO_QUEUE_DURATION_HISTOGRAM.local(), @@ -157,6 +159,7 @@ impl RaftMetrics { self.peer_msg_len.flush(); self.commit_log.flush(); self.write_block_wait.flush(); + self.propose_log_size.flush(); if self.waterfall_metrics { self.wf_batch_wait.flush(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 2fe6fce580e..b0f44c30c0f 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -169,6 +169,7 @@ make_static_metric! { pub label_enum RaftDroppedMessage { mismatch_store_id, mismatch_region_epoch, + mismatch_witness_snapshot, stale_msg, region_overlap, region_no_peer, @@ -201,6 +202,7 @@ make_static_metric! { region_not_initialized, is_applying_snapshot, force_leader, + witness, flashback_in_progress, flashback_not_prepared } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index b9cf76889b4..ff55597b30e 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -84,6 +84,7 @@ use super::{ use crate::{ coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason, RoleChange}, errors::RAFTSTORE_IS_BUSY, + router::RaftStoreRouter, store::{ async_io::{read::ReadTask, write::WriteMsg, write_router::WriteRouter}, fsm::{ @@ -93,7 +94,8 @@ use crate::{ }, hibernate_state::GroupState, memory::{needs_evict_entry_cache, MEMTRACE_RAFT_ENTRIES}, - msg::{ErrorCallback, PeerMsg, RaftCommand, SignificantMsg, StoreMsg}, + msg::{CasualMessage, ErrorCallback, PeerMsg, RaftCommand, SignificantMsg, StoreMsg}, + peer_storage::HandleSnapshotResult, txn_ext::LocksStatus, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ @@ -466,6 +468,7 @@ pub struct PersistSnapshotResult { pub prev_region: metapb::Region, pub region: metapb::Region, pub destroy_regions: Vec, + pub for_witness: bool, } #[derive(Debug)] @@ -1076,6 +1079,7 @@ where skip_bcast_commit: true, pre_vote: cfg.prevote, max_committed_size_per_ready: MAX_COMMITTED_SIZE_PER_READY, + // TODO: if peer.is_witness { 0 } else { 1 }, ..Default::default() }; @@ -1150,7 +1154,7 @@ where region, applied_index, REGION_READ_PROGRESS_CAP, - tag.clone(), + peer_id, )), memtrace_raft_entries: 0, write_router: WriteRouter::new(tag), @@ -1684,6 +1688,11 @@ where self.raft_group.raft.state == StateRole::Leader } + #[inline] + pub fn is_witness(&self) -> bool { + self.peer.is_witness + } + #[inline] pub fn get_role(&self) -> StateRole { self.raft_group.raft.state @@ -2013,7 +2022,6 @@ where if p.get_id() == self.peer.get_id() { continue; } - // TODO if let Some(instant) = self.peer_heartbeats.get(&p.get_id()) { let elapsed = instant.saturating_elapsed(); if elapsed >= max_duration { @@ -2856,13 +2864,20 @@ where } } - if let HandleReadyResult::Snapshot { + if let HandleReadyResult::Snapshot(box HandleSnapshotResult { msgs, snap_region, destroy_regions, last_first_index, - } = res + for_witness, + }) = res { + if for_witness { + // inform next round to check apply status + ctx.router + .send_casual_msg(snap_region.get_id(), CasualMessage::SnapshotApplied) + .unwrap(); + } // When applying snapshot, there is no log applied and not compacted yet. self.raft_log_size_hint = 0; @@ -2874,6 +2889,7 @@ where prev_region: self.region().clone(), region: snap_region, destroy_regions, + for_witness, }), }); if self.last_compacted_idx == 0 && last_first_index >= RAFT_INIT_LOG_INDEX { @@ -2977,6 +2993,7 @@ where } else { vec![] }; + // Note that the `commit_index` and `commit_term` here may be used to // forward the commit index. So it must be less than or equal to persist // index. @@ -2985,6 +3002,7 @@ where self.raft_group.raft.raft_log.persisted, ); let commit_term = self.get_store().term(commit_index).unwrap(); + let mut apply = Apply::new( self.peer_id(), self.region_id, @@ -3094,6 +3112,9 @@ where "after" => ?peer, ); self.peer = peer; + // TODO: set priority for witness + // self.raft_group + // .set_priority(if self.peer.is_witness { 0 } else { 1 }); }; self.activate(ctx); @@ -3443,7 +3464,7 @@ where } let progress_to_be_updated = self.mut_store().applied_term() != applied_term; - self.mut_store().set_applied_state(apply_state); + self.mut_store().set_apply_state(apply_state); self.mut_store().set_applied_term(applied_term); self.peer_stat.written_keys += apply_metrics.written_keys; @@ -4331,9 +4352,10 @@ where }; let data = req.write_to_bytes()?; - - // TODO: use local histogram metrics - PEER_PROPOSE_LOG_SIZE_HISTOGRAM.observe(data.len() as f64); + poll_ctx + .raft_metrics + .propose_log_size + .observe(data.len() as f64); if data.len() as u64 > poll_ctx.cfg.raft_entry_max_size.0 { error!( @@ -4394,6 +4416,11 @@ where msg: &eraftpb::Message, peer_disk_usage: DiskUsage, ) -> bool { + if self.is_witness() { + // shouldn't transfer leader to witness peer + return true; + } + let pending_snapshot = self.is_handling_snapshot() || self.has_pending_snapshot(); if pending_snapshot || msg.get_from() != self.leader_id() @@ -4646,6 +4673,7 @@ where util::check_conf_change( &ctx.cfg, &self.raft_group, + self.region(), &self.peer, changes.as_ref(), &cc, @@ -4653,8 +4681,7 @@ where )?; ctx.raft_metrics.propose.conf_change.inc(); - // TODO: use local histogram metrics - PEER_PROPOSE_LOG_SIZE_HISTOGRAM.observe(data_size as f64); + ctx.raft_metrics.propose_log_size.observe(data_size as f64); info!( "propose conf change peer"; "region_id" => self.region_id, @@ -5011,6 +5038,7 @@ where Some(ForceLeaderState::ForceLeader { .. }) ) } + pub fn unsafe_recovery_maybe_finish_wait_apply(&mut self, force: bool) { if let Some(UnsafeRecoveryState::WaitApply { target_index, .. }) = &self.unsafe_recovery_state diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index a53ca1e9258..56b80c94dcc 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -30,7 +30,8 @@ use raft::{ Error as RaftError, GetEntriesContext, RaftState, Ready, Storage, StorageError, }; use tikv_util::{ - box_err, box_try, debug, defer, error, info, time::Instant, warn, worker::Scheduler, + box_err, box_try, debug, defer, error, info, store::find_peer_by_id, time::Instant, warn, + worker::Scheduler, }; use super::{metrics::*, worker::RegionTask, SnapEntry, SnapKey, SnapManager}; @@ -113,17 +114,21 @@ impl From for RaftError { } } +#[derive(PartialEq, Debug)] +pub struct HandleSnapshotResult { + pub msgs: Vec, + pub snap_region: metapb::Region, + /// The regions whose range are overlapped with this region + pub destroy_regions: Vec, + /// The first index before applying the snapshot. + pub last_first_index: u64, + pub for_witness: bool, +} + #[derive(PartialEq, Debug)] pub enum HandleReadyResult { SendIoTask, - Snapshot { - msgs: Vec, - snap_region: metapb::Region, - /// The regions whose range are overlapped with this region - destroy_regions: Vec, - /// The first index before applying the snapshot. - last_first_index: u64, - }, + Snapshot(Box), // use boxing to reduce total size of the enum NoIoTask, } @@ -214,6 +219,7 @@ where pub engines: Engines, peer_id: u64, + peer: Option, // when uninitialized the peer info is unknown. region: metapb::Region, snap_state: RefCell, @@ -314,6 +320,7 @@ where Ok(PeerStorage { engines, peer_id, + peer: find_peer_by_id(region, peer_id).cloned(), region: region.clone(), snap_state: RefCell::new(SnapState::Relax), gen_snap_task: RefCell::new(None), @@ -354,6 +361,7 @@ where #[inline] pub fn set_region(&mut self, region: metapb::Region) { + self.peer = find_peer_by_id(®ion, self.peer_id).cloned(); self.region = region; } @@ -439,16 +447,31 @@ where /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no /// available snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { + if self.peer.as_ref().unwrap().is_witness { + // witness could be the leader for a while, do not generate snapshot now + return Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )); + } + + if find_peer_by_id(&self.region, to).map_or(false, |p| p.is_witness) { + // generate an empty snapshot for witness directly + return Ok(util::new_empty_snapshot( + self.region.clone(), + self.applied_index(), + self.applied_term(), + true, // for witness + )); + } + let mut snap_state = self.snap_state.borrow_mut(); let mut tried_cnt = self.snap_tried_cnt.borrow_mut(); let mut tried = false; let mut last_canceled = false; if let SnapState::Generating { - ref canceled, - ref receiver, - .. - } = *snap_state + canceled, receiver, .. + } = &*snap_state { tried = true; last_canceled = canceled.load(Ordering::SeqCst); @@ -551,7 +574,7 @@ where snap: &Snapshot, task: &mut WriteTask, destroy_regions: &[metapb::Region], - ) -> Result { + ) -> Result<(metapb::Region, bool)> { info!( "begin to apply snapshot"; "region_id" => self.region.get_id(), @@ -561,8 +584,9 @@ where let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap.get_data())?; - let region_id = self.get_region_id(); + let for_witness = snap_data.get_meta().get_for_witness(); + let region_id = self.get_region_id(); let region = snap_data.take_region(); if region.get_id() != region_id { return Err(box_err!( @@ -597,24 +621,32 @@ where for r in destroy_regions { write_peer_state(kv_wb, r, PeerState::Tombstone, None)?; } - write_peer_state(kv_wb, ®ion, PeerState::Applying, None)?; - let last_index = snap.get_metadata().get_index(); + // Witness snapshot is applied atomically as no async applying operation to + // region worker, so no need to set the peer state to `Applying` + let state = if for_witness { + PeerState::Normal + } else { + PeerState::Applying + }; + write_peer_state(kv_wb, ®ion, state, None)?; + + let snap_index = snap.get_metadata().get_index(); + let snap_term = snap.get_metadata().get_term(); - self.raft_state_mut().set_last_index(last_index); - self.set_last_term(snap.get_metadata().get_term()); - self.apply_state_mut().set_applied_index(last_index); - let last_term = self.last_term(); - self.set_applied_term(last_term); + self.raft_state_mut().set_last_index(snap_index); + self.set_last_term(snap_term); + self.apply_state_mut().set_applied_index(snap_index); + self.set_applied_term(snap_term); // The snapshot only contains log which index > applied index, so // here the truncate state's (index, term) is in snapshot metadata. self.apply_state_mut() .mut_truncated_state() - .set_index(last_index); + .set_index(snap_index); self.apply_state_mut() .mut_truncated_state() - .set_term(snap.get_metadata().get_term()); + .set_term(snap_term); // `region` will be updated after persisting. // Although there is an interval that other metadata are updated while `region` @@ -634,7 +666,7 @@ where "state" => ?self.apply_state(), ); - Ok(region) + Ok((region, for_witness)) } /// Delete all meta belong to the region. Results are stored in `wb`. @@ -856,20 +888,23 @@ where let mut write_task = WriteTask::new(region_id, self.peer_id, ready.number()); - let mut res = HandleReadyResult::SendIoTask; - if !ready.snapshot().is_empty() { + let mut res = if ready.snapshot().is_empty() { + HandleReadyResult::SendIoTask + } else { fail_point!("raft_before_apply_snap"); let last_first_index = self.first_index().unwrap(); - let snap_region = + let (snap_region, for_witness) = self.apply_snapshot(ready.snapshot(), &mut write_task, &destroy_regions)?; - res = HandleReadyResult::Snapshot { + let res = HandleReadyResult::Snapshot(Box::new(HandleSnapshotResult { msgs: ready.take_persisted_messages(), snap_region, destroy_regions, last_first_index, - }; + for_witness, + })); fail_point!("raft_after_apply_snap"); + res }; if !ready.entries().is_empty() { @@ -930,7 +965,7 @@ where // - After `PrepareMerge` log is committed, the source region leader's lease // will be suspected immediately which makes the local reader not serve read // request. - // - No read request can be responsed in peer fsm during merging. These + // - No read request can be responded in peer fsm during merging. These // conditions are used to prevent reading **stale** data in the past. At // present, they are also used to prevent reading **corrupt** data. for r in &res.destroy_regions { @@ -942,7 +977,14 @@ where } } - self.schedule_applying_snapshot(); + if !res.for_witness { + self.schedule_applying_snapshot(); + } else { + // Bypass apply snapshot process for witness as the snapshot is empty, so mark + // status as finished directly here + let status = Arc::new(AtomicUsize::new(JOB_STATUS_FINISHED)); + self.set_snap_state(SnapState::Applying(Arc::clone(&status))); + } // The `region` is updated after persisting in order to stay consistent with the // one in `StoreMeta::regions` (will be updated soon). @@ -1133,7 +1175,10 @@ pub mod tests { Error as RaftError, GetEntriesContext, StorageError, }; use tempfile::{Builder, TempDir}; - use tikv_util::worker::{dummy_scheduler, LazyWorker, Scheduler, Worker}; + use tikv_util::{ + store::{new_peer, new_witness_peer}, + worker::{dummy_scheduler, LazyWorker, Scheduler, Worker}, + }; use super::*; use crate::{ @@ -1569,7 +1614,7 @@ pub mod tests { Option::>::None, ); worker.start_with_timer(runner); - let snap = s.snapshot(0, 0); + let snap = s.snapshot(0, 1); let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); assert_eq!(snap.unwrap_err(), unavailable); assert_eq!(*s.snap_tried_cnt.borrow(), 1); @@ -1593,11 +1638,11 @@ pub mod tests { let (tx, rx) = channel(); s.set_snap_state(gen_snap_for_test(rx)); // Empty channel should cause snapshot call to wait. - assert_eq!(s.snapshot(0, 0).unwrap_err(), unavailable); + assert_eq!(s.snapshot(0, 1).unwrap_err(), unavailable); assert_eq!(*s.snap_tried_cnt.borrow(), 1); tx.send(snap.clone()).unwrap(); - assert_eq!(s.snapshot(0, 0), Ok(snap.clone())); + assert_eq!(s.snapshot(0, 1), Ok(snap.clone())); assert_eq!(*s.snap_tried_cnt.borrow(), 0); let (tx, rx) = channel(); @@ -1638,7 +1683,7 @@ pub mod tests { s.set_snap_state(gen_snap_for_test(rx)); *s.snap_tried_cnt.borrow_mut() = 1; // stale snapshot should be abandoned, snapshot index < truncated index. - assert_eq!(s.snapshot(0, 0).unwrap_err(), unavailable); + assert_eq!(s.snapshot(0, 1).unwrap_err(), unavailable); assert_eq!(*s.snap_tried_cnt.borrow(), 1); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); @@ -1655,7 +1700,7 @@ pub mod tests { ref s => panic!("unexpected state {:?}", s), } // Disconnected channel should trigger another try. - assert_eq!(s.snapshot(0, 0).unwrap_err(), unavailable); + assert_eq!(s.snapshot(0, 1).unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap_err(); assert_eq!(*s.snap_tried_cnt.borrow(), 2); @@ -1670,13 +1715,13 @@ pub mod tests { } // Scheduled job failed should trigger . - assert_eq!(s.snapshot(0, 0).unwrap_err(), unavailable); + assert_eq!(s.snapshot(0, 1).unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap_err(); } // When retry too many times, it should report a different error. - match s.snapshot(0, 0) { + match s.snapshot(0, 1) { Err(RaftError::Store(StorageError::Other(_))) => {} res => panic!("unexpected res: {:?}", res), } @@ -1752,6 +1797,80 @@ pub mod tests { test_storage_create_snapshot_for_role("tikv", 5); } + #[test] + fn test_storage_create_snapshot_for_witness() { + let ents = vec![new_entry(3, 3), new_entry(4, 4), new_entry(5, 5)]; + let mut cs = ConfState::default(); + cs.set_voters(vec![1, 2, 3]); + + let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); + let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); + let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); + let mut worker = Worker::new("region-worker").lazy_build("region-worker"); + let sched = worker.scheduler(); + let (dummy_scheduler, _) = dummy_scheduler(); + let mut s = new_storage_from_ents(sched.clone(), dummy_scheduler, &td, &ents); + let cfg = make_region_worker_raftstore_cfg(true); + let (router, _) = mpsc::sync_channel(100); + let runner = RegionRunner::new( + s.engines.kv.clone(), + mgr, + cfg, + CoprocessorHost::::default(), + router, + Option::>::None, + ); + worker.start_with_timer(runner); + + let mut r = s.region().clone(); + r.mut_peers().push(new_peer(2, 2)); + r.mut_peers().push(new_witness_peer(3, 3)); + + let mut kv_wb = s.engines.kv.write_batch(); + write_peer_state(&mut kv_wb, &r, PeerState::Normal, None).unwrap(); + kv_wb.write().unwrap(); + s.set_region(r); + + let wait_snapshot = |snap: raft::Result| -> Snapshot { + if let Ok(s) = snap { + return s; + } + let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); + assert_eq!(snap.unwrap_err(), unavailable); + assert_eq!(*s.snap_tried_cnt.borrow(), 1); + let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); + generate_and_schedule_snapshot(gen_task, &s.engines, &sched).unwrap(); + let snap = match *s.snap_state.borrow() { + SnapState::Generating { ref receiver, .. } => { + receiver.recv_timeout(Duration::from_secs(3)).unwrap() + } + ref s => panic!("unexpected state: {:?}", s), + }; + snap + }; + + // generate snapshot for peer + let snap = wait_snapshot(s.snapshot(0, 2)); + assert_eq!(snap.get_metadata().get_index(), 5); + assert_eq!(snap.get_metadata().get_term(), 5); + assert!(!snap.get_data().is_empty()); + + // generate snapshot for witness peer + let snap = wait_snapshot(s.snapshot(0, 3)); + assert_eq!(snap.get_metadata().get_index(), 5); + assert_eq!(snap.get_metadata().get_term(), 5); + assert!(!snap.get_data().is_empty()); + + let mut data = RaftSnapshotData::default(); + protobuf::Message::merge_from_bytes(&mut data, snap.get_data()).unwrap(); + assert_eq!(data.get_region().get_id(), 1); + assert_eq!(data.get_region().get_peers().len(), 3); + let files = data.get_meta().get_cf_files(); + for file in files { + assert_eq!(file.get_size(), 0); + } + } + #[test] fn test_storage_apply_snapshot() { let ents = vec![ @@ -1781,7 +1900,7 @@ pub mod tests { Option::>::None, ); worker.start(runner); - s1.snapshot(0, 0).unwrap_err(); + s1.snapshot(0, 1).unwrap_err(); let gen_task = s1.gen_snap_task.borrow_mut().take().unwrap(); generate_and_schedule_snapshot(gen_task, &s1.engines, &sched).unwrap(); @@ -1799,7 +1918,7 @@ pub mod tests { let mut s2 = new_storage(sched.clone(), dummy_scheduler.clone(), &td2); assert_eq!(s2.first_index(), Ok(s2.applied_index() + 1)); let mut write_task = WriteTask::new(s2.get_region_id(), s2.peer_id, 1); - let snap_region = s2.apply_snapshot(&snap1, &mut write_task, &[]).unwrap(); + let (snap_region, _) = s2.apply_snapshot(&snap1, &mut write_task, &[]).unwrap(); let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap1.get_data()).unwrap(); assert_eq!(snap_region, snap_data.take_region(),); @@ -1816,7 +1935,7 @@ pub mod tests { let mut s3 = new_storage_from_ents(sched, dummy_scheduler, &td3, ents); validate_cache(&s3, &ents[1..]); let mut write_task = WriteTask::new(s3.get_region_id(), s3.peer_id, 1); - let snap_region = s3.apply_snapshot(&snap1, &mut write_task, &[]).unwrap(); + let (snap_region, _) = s3.apply_snapshot(&snap1, &mut write_task, &[]).unwrap(); let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap1.get_data()).unwrap(); assert_eq!(snap_region, snap_data.take_region(),); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 2980f9931a5..b2180a8420d 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -20,20 +20,25 @@ use kvproto::{ kvrpcpb::{self, KeyRange, LeaderInfo}, metapb::{self, Peer, PeerRole, Region, RegionEpoch}, raft_cmdpb::{AdminCmdType, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest}, - raft_serverpb::RaftMessage, + raft_serverpb::{RaftMessage, RaftSnapshotData}, }; use protobuf::{self, Message}; use raft::{ - eraftpb::{self, ConfChangeType, ConfState, MessageType}, + eraftpb::{self, ConfChangeType, ConfState, MessageType, Snapshot}, Changer, RawNode, INVALID_INDEX, }; use raft_proto::ConfChangeI; -use tikv_util::{box_err, debug, info, store::region, time::monotonic_raw_now, Either}; +use tikv_util::{ + box_err, debug, info, + store::{find_peer_by_id, region}, + time::monotonic_raw_now, + Either, +}; use time::{Duration, Timespec}; use txn_types::{TimeStamp, WriteBatchFlags}; use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; -use crate::{coprocessor::CoprocessorHost, Error, Result}; +use crate::{coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result}; const INVALID_TIMESTAMP: u64 = u64::MAX; @@ -125,6 +130,27 @@ pub fn is_initial_msg(msg: &eraftpb::Message) -> bool { || (msg_type == MessageType::MsgHeartbeat && msg.get_commit() == INVALID_INDEX) } +pub fn new_empty_snapshot( + region: Region, + applied_index: u64, + applied_term: u64, + for_witness: bool, +) -> Snapshot { + let mut snapshot = Snapshot::default(); + snapshot.mut_metadata().set_index(applied_index); + snapshot.mut_metadata().set_term(applied_term); + snapshot + .mut_metadata() + .set_conf_state(conf_state_from_region(®ion)); + let mut snap_data = RaftSnapshotData::default(); + snap_data.set_region(region); + snap_data.set_file_size(0); + snap_data.set_version(SNAPSHOT_VERSION); + snap_data.mut_meta().set_for_witness(for_witness); + snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); + snapshot +} + const STR_CONF_CHANGE_ADD_NODE: &str = "AddNode"; const STR_CONF_CHANGE_REMOVE_NODE: &str = "RemoveNode"; const STR_CONF_CHANGE_ADDLEARNER_NODE: &str = "AddLearner"; @@ -869,6 +895,7 @@ impl<'a> ChangePeerI for &'a ChangePeerV2Request { pub fn check_conf_change( cfg: &Config, node: &RawNode, + region: &metapb::Region, leader: &metapb::Peer, change_peers: &[ChangePeerRequest], cc: &impl ConfChangeI, @@ -915,6 +942,18 @@ pub fn check_conf_change( } } + if region + .get_peers() + .iter() + .find(|p| p.get_id() == peer.get_id()) + .map_or(false, |p| p.get_is_witness() != peer.get_is_witness()) + { + return Err(box_err!( + "invalid conf change request: {:?}, can not switch witness in conf change", + cp + )); + } + if !check_dup.insert(peer.get_id()) { return Err(box_err!( "have multiple commands for the same peer {}", @@ -1122,9 +1161,19 @@ pub struct RegionReadProgress { } impl RegionReadProgress { - pub fn new(region: &Region, applied_index: u64, cap: usize, tag: String) -> RegionReadProgress { + pub fn new( + region: &Region, + applied_index: u64, + cap: usize, + peer_id: u64, + ) -> RegionReadProgress { RegionReadProgress { - core: Mutex::new(RegionReadProgressCore::new(region, applied_index, cap, tag)), + core: Mutex::new(RegionReadProgressCore::new( + region, + applied_index, + cap, + peer_id, + )), safe_ts: AtomicU64::from(0), } } @@ -1283,7 +1332,7 @@ impl RegionReadProgress { #[derive(Debug)] pub struct RegionReadProgressCore { - tag: String, + peer_id: u64, region_id: u64, applied_index: u64, // A wrapper of `(apply_index, safe_ts)` item, where the `read_state.ts` is the peer's current @@ -1355,17 +1404,24 @@ fn find_store_id(peer_list: &[Peer], peer_id: u64) -> Option { } impl RegionReadProgressCore { - fn new(region: &Region, applied_index: u64, cap: usize, tag: String) -> RegionReadProgressCore { + fn new( + region: &Region, + applied_index: u64, + cap: usize, + peer_id: u64, + ) -> RegionReadProgressCore { + // forbids stale read for witness + let is_witness = find_peer_by_id(region, peer_id).map_or(false, |p| p.is_witness); RegionReadProgressCore { - tag, + peer_id, region_id: region.get_id(), applied_index, read_state: ReadState::default(), leader_info: LocalLeaderInfo::new(region), pending_items: VecDeque::with_capacity(cap), last_merge_index: 0, - pause: false, - discard: false, + pause: is_witness, + discard: is_witness, } } @@ -1380,10 +1436,11 @@ impl RegionReadProgressCore { self.read_state.ts = cmp::min(source_safe_ts, target_safe_ts); info!( "reset safe_ts due to merge"; - "tag" => &self.tag, "source_safe_ts" => source_safe_ts, "target_safe_ts" => target_safe_ts, "safe_ts" => self.read_state.ts, + "region_id" => self.region_id, + "peer_id" => self.peer_id, ); if self.read_state.ts != target_safe_ts { Some(self.read_state.ts) @@ -2096,7 +2153,7 @@ mod tests { } let cap = 10; - let rrp = RegionReadProgress::new(&Default::default(), 10, cap, "".to_owned()); + let rrp = RegionReadProgress::new(&Default::default(), 10, cap, 1); for i in 1..=20 { rrp.update_safe_ts(i, i); } diff --git a/components/raftstore/src/store/worker/check_leader.rs b/components/raftstore/src/store/worker/check_leader.rs index 696caab7d69..ab83752d8c3 100644 --- a/components/raftstore/src/store/worker/check_leader.rs +++ b/components/raftstore/src/store/worker/check_leader.rs @@ -157,7 +157,7 @@ mod tests { region.set_start_key(kr.get_start_key().to_vec()); region.set_end_key(kr.get_end_key().to_vec()); region.set_peers(vec![kvproto::metapb::Peer::default()].into()); - let rrp = RegionReadProgress::new(®ion, 1, 1, "".to_owned()); + let rrp = RegionReadProgress::new(®ion, 1, 1, 1); rrp.update_safe_ts(1, safe_ts); assert_eq!(rrp.safe_ts(), safe_ts); meta.region_ranges.insert(enc_end_key(®ion), id); diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 7a680e4d7a6..0d396eae575 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -56,6 +56,7 @@ make_static_metric! { channel_full, cache_miss, safe_ts, + witness, } pub struct LocalReadRejectCounter : LocalIntCounter { diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index d62f2f6c1db..2c92923fc4e 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -25,6 +25,7 @@ use tikv_util::{ codec::number::decode_u64, debug, error, lru::LruCache, + store::find_peer_by_id, time::{monotonic_raw_now, ThreadReadId}, }; use time::Timespec; @@ -563,9 +564,11 @@ impl ReadDelegate { } debug!( "reject stale read by safe ts"; - "tag" => &self.tag, - "safe ts" => safe_ts, - "read ts" => read_ts + "safe_ts" => safe_ts, + "read_ts" => read_ts, + + "region_id" => self.region.get_id(), + "peer_id" => self.peer_id, ); TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.safe_ts.inc()); let mut response = cmd_resp::new_error(Error::DataIsNotReady { @@ -581,7 +584,7 @@ impl ReadDelegate { pub fn mock(region_id: u64) -> Self { let mut region: metapb::Region = Default::default(); region.set_id(region_id); - let read_progress = Arc::new(RegionReadProgress::new(®ion, 0, 0, "mock".to_owned())); + let read_progress = Arc::new(RegionReadProgress::new(®ion, 0, 0, 1)); ReadDelegate { region: Arc::new(region), peer_id: 1, @@ -782,6 +785,15 @@ where return Err(e); } + // Check witness + if find_peer_by_id(&delegate.region, delegate.peer_id) + .unwrap() + .is_witness + { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); + return Err(Error::RecoveryInProgress(region_id)); + } + // Check term. if let Err(e) = util::check_term(req, delegate.term) { debug!( @@ -1241,7 +1253,7 @@ mod tests { region1.set_region_epoch(epoch13.clone()); let term6 = 6; let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. - let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, "".to_owned())); + let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, 1)); let mut cmd = RaftCmdRequest::default(); let mut header = RaftRequestHeader::default(); @@ -1573,7 +1585,7 @@ mod tests { txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), txn_ext: Arc::new(TxnExt::default()), track_ver: TrackVer::new(), - read_progress: Arc::new(RegionReadProgress::new(®ion, 0, 0, "".to_owned())), + read_progress: Arc::new(RegionReadProgress::new(®ion, 0, 0, 1)), pending_remove: false, bucket_meta: None, }; @@ -1680,7 +1692,7 @@ mod tests { let leader = prs[0].clone(); region.set_region_epoch(region_epoch); let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. - let read_progress = Arc::new(RegionReadProgress::new(®ion, 1, 1, "".to_owned())); + let read_progress = Arc::new(RegionReadProgress::new(®ion, 1, 1, 1)); // Register region lease.renew(monotonic_raw_now()); diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index f23bc7e3b12..513d08643a7 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -215,13 +215,13 @@ impl Operator { } else { ConfChangeType::AddNode }; - new_pd_change_peer(conf_change_type, peer.clone()) + new_pd_change_peer_v2(vec![change_peer(conf_change_type, peer.clone())]) } else { pdpb::RegionHeartbeatResponse::default() } } Operator::RemovePeer { ref peer, .. } => { - new_pd_change_peer(ConfChangeType::RemoveNode, peer.clone()) + new_pd_change_peer_v2(vec![change_peer(ConfChangeType::RemoveNode, peer.clone())]) } Operator::TransferLeader { ref peer, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 1e35dc0cf13..eb8ab3fe885 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -301,7 +301,6 @@ pub fn new_transfer_leader_cmd(peer: metapb::Peer) -> AdminRequest { cmd } -#[allow(dead_code)] pub fn new_prepare_merge(target_region: metapb::Region) -> AdminRequest { let mut cmd = AdminRequest::default(); cmd.set_cmd_type(AdminCmdType::PrepareMerge); diff --git a/components/tikv_util/src/store/mod.rs b/components/tikv_util/src/store/mod.rs index f4bfea93519..9a36961c202 100644 --- a/components/tikv_util/src/store/mod.rs +++ b/components/tikv_util/src/store/mod.rs @@ -5,7 +5,10 @@ pub mod query_stats; pub mod region; pub use self::{ - peer::{find_peer, find_peer_mut, is_learner, new_learner_peer, new_peer, remove_peer}, + peer::{ + find_peer, find_peer_by_id, find_peer_mut, is_learner, new_learner_peer, new_peer, + new_witness_peer, remove_peer, + }, query_stats::{is_read_query, QueryStats}, region::{ check_key_in_region, check_key_in_region_exclusive, check_key_in_region_inclusive, diff --git a/components/tikv_util/src/store/peer.rs b/components/tikv_util/src/store/peer.rs index 59844bc957a..bbc96bb786f 100644 --- a/components/tikv_util/src/store/peer.rs +++ b/components/tikv_util/src/store/peer.rs @@ -16,6 +16,10 @@ pub fn find_peer_mut(region: &mut Region, store_id: u64) -> Option<&mut Peer> { .find(|p| p.get_store_id() == store_id) } +pub fn find_peer_by_id(region: &Region, peer_id: u64) -> Option<&Peer> { + region.get_peers().iter().find(|&p| p.get_id() == peer_id) +} + pub fn remove_peer(region: &mut Region, store_id: u64) -> Option { region .get_peers() @@ -45,6 +49,15 @@ pub fn is_learner(peer: &Peer) -> bool { peer.get_role() == PeerRole::Learner } +pub fn new_witness_peer(store_id: u64, peer_id: u64) -> Peer { + let mut peer = Peer::default(); + peer.set_store_id(store_id); + peer.set_id(peer_id); + peer.set_role(PeerRole::Voter); + peer.set_is_witness(true); + peer +} + #[cfg(test)] mod tests { use super::*; diff --git a/components/tikv_util/src/store/region.rs b/components/tikv_util/src/store/region.rs index 580d940ebeb..58af4e9fdfa 100644 --- a/components/tikv_util/src/store/region.rs +++ b/components/tikv_util/src/store/region.rs @@ -32,9 +32,11 @@ pub fn region_on_same_stores(lhs: &Region, rhs: &Region) -> bool { // Because every store can only have one replica for the same region, // so just one round check is enough. lhs.get_peers().iter().all(|lp| { - rhs.get_peers() - .iter() - .any(|rp| rp.get_store_id() == lp.get_store_id() && rp.get_role() == lp.get_role()) + rhs.get_peers().iter().any(|rp| { + rp.get_store_id() == lp.get_store_id() + && rp.get_role() == lp.get_role() + && rp.get_is_witness() == lp.get_is_witness() + }) }) } diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 7b29976f218..0230174fb42 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -30,9 +30,10 @@ use grpcio::{ RpcStatusCode, WriteFlags, }; use kvproto::{ - raft_serverpb::{Done, RaftMessage}, + raft_serverpb::{Done, RaftMessage, RaftSnapshotData}, tikvpb::{BatchRaftMessage, TikvClient}, }; +use protobuf::Message; use raft::SnapshotStatus; use raftstore::{errors::DiscardReason, router::RaftStoreRouter}; use security::SecurityManager; @@ -483,11 +484,17 @@ where None => return, }; if msg.get_message().has_snapshot() { - self.send_snapshot_sock(msg); - continue; - } else { - self.buffer.push(msg); + let mut snapshot = RaftSnapshotData::default(); + snapshot + .merge_from_bytes(msg.get_message().get_snapshot().get_data()) + .unwrap(); + // Witness's snapshot must be empty, no need to send snapshot files + if !snapshot.get_meta().get_for_witness() { + self.send_snapshot_sock(msg); + continue; + } } + self.buffer.push(msg); } } } diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index 9d648c06c8c..08657f7e75a 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -32,3 +32,4 @@ mod test_transfer_leader; mod test_transport; mod test_unsafe_recovery; mod test_update_region_size; +mod test_witness; diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index 505bd3bd0e4..a2c2ea75c64 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -677,7 +677,6 @@ fn test_force_leader_on_hibernated_leader() { // previous follower. #[test] fn test_force_leader_on_hibernated_follower() { - test_util::init_log_for_test(); let mut cluster = new_node_cluster(0, 5); cluster.pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs new file mode 100644 index 00000000000..8e36510753e --- /dev/null +++ b/tests/integrations/raftstore/test_witness.rs @@ -0,0 +1,537 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{iter::FromIterator, sync::Arc, time::Duration}; + +use futures::executor::block_on; +use kvproto::{metapb, raft_cmdpb::ChangePeerRequest, raft_serverpb::PeerState}; +use pd_client::PdClient; +use raft::eraftpb::ConfChangeType; +use test_raftstore::*; +use tikv_util::store::find_peer; + +fn become_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { + peer.set_role(metapb::PeerRole::Learner); + cluster.pd_client.must_add_peer(region_id, peer.clone()); + cluster.pd_client.must_remove_peer(region_id, peer.clone()); + peer.set_is_witness(true); + peer.set_id(peer.get_id() + 10); + cluster.pd_client.must_add_peer(region_id, peer.clone()); + peer.set_role(metapb::PeerRole::Voter); + cluster.pd_client.must_add_peer(region_id, peer.clone()); +} + +fn become_non_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { + peer.set_role(metapb::PeerRole::Learner); + cluster.pd_client.must_add_peer(region_id, peer.clone()); + cluster.pd_client.must_remove_peer(region_id, peer.clone()); + peer.set_is_witness(false); + peer.set_id(peer.get_id() + 10); + cluster.pd_client.must_add_peer(region_id, peer.clone()); + peer.set_role(metapb::PeerRole::Voter); + cluster.pd_client.must_add_peer(region_id, peer.clone()); +} + +// Test the case that region split or merge with witness peer +#[test] +fn test_witness_split_merge() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); + + let before = cluster + .apply_state(region.get_id(), nodes[2]) + .get_applied_index(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + cluster.must_split(®ion, b"k2"); + must_get_none(&cluster.get_engine(3), b"k1"); + must_get_none(&cluster.get_engine(3), b"k2"); + // applied index of witness is updated + let after = cluster + .apply_state(region.get_id(), nodes[2]) + .get_applied_index(); + assert!(after - before >= 3); + + // the newly split peer should be witness as well + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k2"); + assert_ne!(left.get_id(), right.get_id()); + assert!(find_peer(&left, nodes[2]).unwrap().is_witness); + assert!(find_peer(&right, nodes[2]).unwrap().is_witness); + + // merge + pd_client.must_merge(left.get_id(), right.get_id()); + let after_merge = cluster.get_region(b"k1"); + assert!(find_peer(&after_merge, nodes[2]).unwrap().is_witness); + must_get_none(&cluster.get_engine(3), b"k1"); + must_get_none(&cluster.get_engine(3), b"k2"); + // epoch of witness is updated + assert_eq!( + cluster + .region_local_state(after_merge.get_id(), nodes[2]) + .get_region() + .get_region_epoch(), + after_merge.get_region_epoch() + ); + + // split again + cluster.must_split(&after_merge, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k2"); + assert!(find_peer(&left, nodes[2]).unwrap().is_witness); + assert!(find_peer(&right, nodes[2]).unwrap().is_witness); + + // can't merge with different witness location + let mut peer_on_store3 = find_peer(&left, nodes[2]).unwrap().clone(); + become_non_witness(&cluster, left.get_id(), &mut peer_on_store3); + let left = cluster.get_region(b"k1"); + let req = new_admin_request( + left.get_id(), + left.get_region_epoch(), + new_prepare_merge(right), + ); + let resp = cluster + .call_command_on_leader(req, Duration::from_millis(100)) + .unwrap(); + assert!( + resp.get_header() + .get_error() + .get_message() + .contains("peers doesn't match") + ); +} + +// Test flow of witness conf change +#[test] +fn test_witness_conf_change() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k1", b"v1"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // can't switch witness by conf change + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + let mut peer = peer_on_store3.clone(); + peer.set_is_witness(true); + let mut cp = ChangePeerRequest::default(); + cp.set_change_type(ConfChangeType::AddLearnerNode); + cp.set_peer(peer); + let req = new_admin_request( + region.get_id(), + region.get_region_epoch(), + new_change_peer_v2_request(vec![cp]), + ); + let resp = cluster + .call_command_on_leader(req, Duration::from_millis(100)) + .unwrap(); + assert!(resp.get_header().has_error()); + + // add a new witness peer + cluster + .pd_client + .must_remove_peer(region.get_id(), peer_on_store3.clone()); + peer_on_store3.set_is_witness(true); + let applied_index = cluster.apply_state(1, 2).applied_index; + cluster + .pd_client + .must_add_peer(region.get_id(), peer_on_store3.clone()); + must_get_none(&cluster.get_engine(3), b"k1"); + let region = cluster.get_region(b"k1"); + cluster.wait_applied_index(region.get_id(), nodes[2], applied_index + 1); + assert_eq!( + cluster + .region_local_state(region.get_id(), nodes[2]) + .get_region(), + ®ion + ); + + // remove a witness peer + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster + .pd_client + .must_remove_peer(region.get_id(), peer_on_store3); + + assert_eq!( + cluster + .region_local_state(region.get_id(), nodes[2]) + .get_state(), + PeerState::Tombstone + ); +} + +// #[test] +// // Test flow of switch witness +// fn test_witness_switch_witness() { +// let mut cluster = new_server_cluster(0, 3); +// cluster.run(); +// let nodes = Vec::from_iter(cluster.get_node_ids()); +// assert_eq!(nodes.len(), 3); + +// let pd_client = Arc::clone(&cluster.pd_client); +// pd_client.disable_default_operator(); + +// cluster.must_put(b"k1", b"v1"); + +// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); +// let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); +// cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + +// // nonwitness -> witness +// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); +// become_witness(&cluster, region.get_id(), &mut peer_on_store3); + +// std::thread::sleep(Duration::from_millis(100)); +// must_get_none(&cluster.get_engine(3), b"k1"); + +// // witness -> nonwitness +// peer_on_store3.set_role(metapb::PeerRole::Learner); +// cluster +// .pd_client +// .must_add_peer(region.get_id(), peer_on_store3.clone()); +// cluster +// .pd_client +// .must_remove_peer(region.get_id(), peer_on_store3.clone()); +// peer_on_store3.set_is_witness(false); +// cluster +// .pd_client +// .must_add_peer(region.get_id(), peer_on_store3.clone()); +// std::thread::sleep(Duration::from_millis(100)); +// must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); +// } + +// TODO: add back when switch witness is supported +// // Test the case that leader is forbidden to become witness +// #[test] +// fn test_witness_leader() { +// let mut cluster = new_server_cluster(0, 3); +// cluster.run(); +// let nodes = Vec::from_iter(cluster.get_node_ids()); +// assert_eq!(nodes.len(), 3); + +// let pd_client = Arc::clone(&cluster.pd_client); +// pd_client.disable_default_operator(); + +// cluster.must_put(b"k1", b"v1"); + +// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); +// let mut peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); +// cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + +// // can't make leader to witness +// peer_on_store1.set_is_witness(true); +// cluster +// .pd_client +// .add_peer(region.get_id(), peer_on_store1.clone()); + +// std::thread::sleep(Duration::from_millis(100)); +// assert_eq!( +// cluster.leader_of_region(region.get_id()).unwrap().store_id, +// 1 +// ); +// // leader changes to witness failed, so still can get the value +// must_get_equal(&cluster.get_engine(nodes[0]), b"k1", b"v1"); + +// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); +// // can't transfer leader to witness +// cluster.transfer_leader(region.get_id(), &mut peer_on_store3); +// assert_eq!( +// cluster.leader_of_region(region.get_id()).unwrap().store_id, +// nodes[0], +// ); +// } + +// TODO: add back when election priority is supported +// // Test the case that witness can't be elected as leader based on election +// // priority when there is no log gap +// #[test] +// fn test_witness_election_priority() { +// let mut cluster = new_server_cluster(0, 3); +// cluster.run(); +// let nodes = Vec::from_iter(cluster.get_node_ids()); +// assert_eq!(nodes.len(), 3); + +// let pd_client = Arc::clone(&cluster.pd_client); +// pd_client.disable_default_operator(); + +// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); +// // nonwitness -> witness +// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); +// become_witness(&cluster, region.get_id(), &mut peer_on_store3); +// cluster.must_put(b"k0", b"v0"); + +// // make sure logs are replicated to the witness +// std::thread::sleep(Duration::from_millis(100)); + +// for i in 1..10 { +// let node = +// cluster.leader_of_region(region.get_id()).unwrap().store_id; cluster. +// stop_node(node); let (k, v) = (format!("k{}", i), format!("v{}", i)); +// let key = k.as_bytes(); +// let value = v.as_bytes(); +// cluster.must_put(key, value); +// // the witness can't be elected as the leader when there is no log +// gap assert_ne!( +// cluster.leader_of_region(region.get_id()).unwrap().store_id, +// nodes[2], +// ); +// cluster.run_node(node).unwrap(); +// } +// } + +// TODO: add back when raft log gc logic is updated for witness +// // Test the case that truncated index won't advance when there is a witness +// even // if the gap gap exceeds the gc count limit +// #[test] +// fn test_witness_raftlog_gc_lagged_follower() { +// let mut cluster = new_server_cluster(0, 3); +// cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); +// cluster.run(); +// let nodes = Vec::from_iter(cluster.get_node_ids()); +// assert_eq!(nodes.len(), 3); + +// let pd_client = Arc::clone(&cluster.pd_client); +// pd_client.disable_default_operator(); + +// cluster.must_put(b"k0", b"v0"); + +// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); +// let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); +// cluster.must_transfer_leader(region.get_id(), peer_on_store1); +// // nonwitness -> witness +// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); +// become_witness(&cluster, region.get_id(), &mut peer_on_store3); + +// // make sure raft log gc is triggered +// std::thread::sleep(Duration::from_millis(200)); +// let mut before_states = HashMap::default(); +// for (&id, engines) in &cluster.engines { +// let mut state: RaftApplyState = get_raft_msg_or_default(engines, +// &keys::apply_state_key(1)); before_states.insert(id, +// state.take_truncated_state()); } + +// // one follower is down +// cluster.stop_node(nodes[1]); + +// // write some data to make log gap exceeds the gc limit +// for i in 1..1000 { +// let (k, v) = (format!("k{}", i), format!("v{}", i)); +// let key = k.as_bytes(); +// let value = v.as_bytes(); +// cluster.must_put(key, value); +// } + +// // the truncated index is not advanced +// for (&id, engines) in &cluster.engines { +// let state: RaftApplyState = get_raft_msg_or_default(engines, +// &keys::apply_state_key(1)); assert!(state.get_truncated_state(). +// get_index() - before_states[&id].get_index() < 10); } + +// // the follower is back online +// cluster.run_node(nodes[1]).unwrap(); +// cluster.must_put(b"k00", b"v00"); +// must_get_equal(&cluster.get_engine(nodes[1]), b"k00", b"v00"); +// // make sure raft log gc is triggered +// std::thread::sleep(Duration::from_millis(300)); + +// // the truncated index is advanced now, as all the peers has replicated +// for (&id, engines) in &cluster.engines { +// let state: RaftApplyState = get_raft_msg_or_default(engines, +// &keys::apply_state_key(1)); assert_ge!( +// state.get_truncated_state().get_index() - +// before_states[&id].get_index(), 900 +// ); +// } +// } + +// TODO: add back when raft log gc logic is updated for witness +// // Test the case that truncated index is advance when there is a lagged +// witness #[test] +// fn test_witness_raftlog_gc_lagged_witness() { +// let mut cluster = new_server_cluster(0, 3); +// cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); +// cluster.run(); +// let nodes = Vec::from_iter(cluster.get_node_ids()); +// assert_eq!(nodes.len(), 3); + +// let pd_client = Arc::clone(&cluster.pd_client); +// pd_client.disable_default_operator(); + +// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); +// let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); +// cluster.must_transfer_leader(region.get_id(), peer_on_store1); +// // nonwitness -> witness +// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); +// become_witness(&cluster, region.get_id(), &mut peer_on_store3); +// cluster.must_put(b"k0", b"v0"); + +// // make sure raft log gc is triggered +// std::thread::sleep(Duration::from_millis(200)); +// let mut before_states = HashMap::default(); +// for (&id, engines) in &cluster.engines { +// let mut state: RaftApplyState = get_raft_msg_or_default(engines, +// &keys::apply_state_key(1)); before_states.insert(id, +// state.take_truncated_state()); } + +// // the witness is down +// cluster.stop_node(nodes[2]); + +// // write some data to make log gap exceeds the gc limit +// for i in 1..1000 { +// let (k, v) = (format!("k{}", i), format!("v{}", i)); +// let key = k.as_bytes(); +// let value = v.as_bytes(); +// cluster.must_put(key, value); +// } + +// // the witness is back online +// cluster.run_node(nodes[2]).unwrap(); + +// cluster.must_put(b"k00", b"v00"); +// std::thread::sleep(Duration::from_millis(200)); + +// // the truncated index is advanced +// for (&id, engines) in &cluster.engines { +// let state: RaftApplyState = get_raft_msg_or_default(engines, +// &keys::apply_state_key(1)); println!("{} {}", id, +// state.get_truncated_state().get_index()); assert_ge!( +// state.get_truncated_state().get_index() - +// before_states[&id].get_index(), 900 +// ); +// } +// } + +// Test the case replica read can't be performed on witness peer. +#[test] +fn test_witness_replica_read() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); + + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(b"k0")], + false, + ); + request.mut_header().set_peer(peer_on_store3); + request.mut_header().set_replica_read(true); + + let resp = cluster + .call_command_on_node(nodes[2], request, Duration::from_millis(100)) + .unwrap(); + assert_eq!( + resp.get_header().get_error().get_recovery_in_progress(), + &kvproto::errorpb::RecoveryInProgress { + region_id: region.get_id(), + ..Default::default() + } + ); +} + +fn must_get_error_recovery_in_progress( + cluster: &mut Cluster, + region: &metapb::Region, + cmd: kvproto::raft_cmdpb::Request, +) { + let req = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![cmd], + true, + ); + let resp = cluster + .call_command_on_leader(req, Duration::from_millis(100)) + .unwrap(); + assert_eq!( + resp.get_header().get_error().get_recovery_in_progress(), + &kvproto::errorpb::RecoveryInProgress { + region_id: region.get_id(), + ..Default::default() + }, + "{:?}", + resp + ); +} + +// Test the case that witness replicate logs to lagging behind follower when +// leader is down +#[test] +fn test_witness_leader_down() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + + let mut peer_on_store2 = find_peer(®ion, nodes[1]).unwrap().clone(); + // nonwitness -> witness + become_witness(&cluster, region.get_id(), &mut peer_on_store2); + + // the other follower is isolated + cluster.add_send_filter(IsolationFilterFactory::new(3)); + for i in 1..10 { + cluster.must_put(format!("k{}", i).as_bytes(), format!("v{}", i).as_bytes()); + } + // the leader is down + cluster.stop_node(1); + + // witness would help to replicate the logs + cluster.clear_send_filters(); + + // forbid writes + let put = new_put_cmd(b"k3", b"v3"); + must_get_error_recovery_in_progress(&mut cluster, ®ion, put); + // forbid reads + let get = new_get_cmd(b"k1"); + must_get_error_recovery_in_progress(&mut cluster, ®ion, get); + // forbid read index + let read_index = new_read_index_cmd(); + must_get_error_recovery_in_progress(&mut cluster, ®ion, read_index); + + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store3); + cluster.must_put(b"k1", b"v1"); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap().store_id, + nodes[2], + ); + assert_eq!(cluster.must_get(b"k9"), Some(b"v9".to_vec())); +} From f2e89a4e80e2b99d7ac2abe74d38d4f6eac9ceb6 Mon Sep 17 00:00:00 2001 From: lijie Date: Wed, 9 Nov 2022 17:51:51 +0800 Subject: [PATCH 0322/1149] bump version to 6.5.0-alpha (#13740) close tikv/tikv#13763 Signed-off-by: lijie Co-authored-by: Yilin Chen Co-authored-by: Lifu Wu --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a1b238d0148..9aa43209906 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6177,7 +6177,7 @@ dependencies = [ [[package]] name = "tikv" -version = "6.4.0-alpha" +version = "6.5.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index a408e4a84ca..f75a4a6511f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "6.4.0-alpha" +version = "6.5.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From e87d16d0b17e507e4fea0923c8438ddfd0296a0e Mon Sep 17 00:00:00 2001 From: haojinming Date: Thu, 10 Nov 2022 10:37:53 +0800 Subject: [PATCH 0323/1149] cdc: remove deprecated config (#13762) close tikv/tikv#13761 raw-min-ts-outlier-threshold is marked as deprecated from v6.4. It should be removed from v6.5. https://docs.pingcap.com/zh/tidb/dev/tikv-configuration-file#raw-min-ts-outlier-threshold-%E4%BB%8E-v620-%E7%89%88%E6%9C%AC%E5%BC%80%E5%A7%8B%E5%BC%95%E5%85%A5 Signed-off-by: haojinming Co-authored-by: Ti Chi Robot --- src/config.rs | 13 ------------- tests/integrations/config/mod.rs | 1 - 2 files changed, 14 deletions(-) diff --git a/src/config.rs b/src/config.rs index c978b1bf90a..e9c4c2bb85b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2566,9 +2566,6 @@ pub struct CdcConfig { // Deprecated! preserved for compatibility check. #[online_config(skip)] #[doc(hidden)] - pub raw_min_ts_outlier_threshold: ReadableDuration, - #[online_config(skip)] - #[doc(hidden)] #[serde(skip_serializing)] pub old_value_cache_size: usize, } @@ -2591,8 +2588,6 @@ impl Default for CdcConfig { sink_memory_quota: ReadableSize::mb(512), // 512MB memory for old value cache. old_value_cache_memory_quota: ReadableSize::mb(512), - // Trigger raw region outlier judgement if resolved_ts's lag is over 60s. - raw_min_ts_outlier_threshold: ReadableDuration::secs(60), // Deprecated! preserved for compatibility check. old_value_cache_size: 0, } @@ -2634,14 +2629,6 @@ impl CdcConfig { ); self.incremental_scan_ts_filter_ratio = default_cfg.incremental_scan_ts_filter_ratio; } - if self.raw_min_ts_outlier_threshold.is_zero() { - warn!( - "cdc.raw_min_ts_outlier_threshold should be larger than 0, - change it to {}", - default_cfg.raw_min_ts_outlier_threshold - ); - self.raw_min_ts_outlier_threshold = default_cfg.raw_min_ts_outlier_threshold; - } Ok(()) } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index a61b66e1436..5cb8c837fb1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -790,7 +790,6 @@ fn test_serde_custom_tikv_config() { tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), sink_memory_quota: ReadableSize::mb(7), - raw_min_ts_outlier_threshold: ReadableDuration::secs(60), }; value.resolved_ts = ResolvedTsConfig { enable: true, From 0339d5188d6b9e35bc7250e1a2f3cdb008f238da Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 10 Nov 2022 15:35:53 +0800 Subject: [PATCH 0324/1149] storage: calculate last_change_ts in rollback (#13749) ref tikv/tikv#13694 This commit supports calculating last_change_ts when writing a new Rollback record. To get the correct last_change_ts, we always call seek_write to find the last write record before start_ts. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/txn/actions/check_txn_status.rs | 41 +++++- .../txn/commands/check_secondary_locks.rs | 129 +++++++++++++++++- src/storage/txn/commands/check_txn_status.rs | 111 ++++++++++++++- 3 files changed, 274 insertions(+), 7 deletions(-) diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index 4c900e5a438..126c34ade92 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -8,6 +8,7 @@ use crate::storage::{ metrics::MVCC_CHECK_TXN_STATUS_COUNTER_VEC, reader::OverlappedWrite, ErrorInner, LockType, MvccTxn, ReleasedLock, Result, SnapshotReader, TxnCommitRecord, }, + txn::{sched_pool::tls_can_enable, scheduler::LAST_CHANGE_TS}, Snapshot, TxnStatus, }; @@ -134,7 +135,8 @@ pub fn check_txn_status_missing_lock( // Insert a Rollback to Write CF in case that a stale prewrite // command is received after a cleanup command. - if let Some(write) = action.construct_write(ts, overlapped_write) { + if let Some(mut write) = action.construct_write(ts, overlapped_write) { + update_last_change_for_rollback(reader, &mut write, &primary_key, ts)?; txn.put_write(primary_key, ts, write.as_ref().to_bytes()); } MVCC_CHECK_TXN_STATUS_COUNTER_VEC.rollback.inc(); @@ -168,7 +170,8 @@ pub fn rollback_lock( // Only the primary key of a pessimistic transaction needs to be protected. let protected: bool = is_pessimistic_txn && key.is_encoded_from(&lock.primary); - if let Some(write) = make_rollback(reader.start_ts, protected, overlapped_write) { + if let Some(mut write) = make_rollback(reader.start_ts, protected, overlapped_write) { + update_last_change_for_rollback(reader, &mut write, &key, lock.ts)?; txn.put_write(key.clone(), reader.start_ts, write.as_ref().to_bytes()); } @@ -192,6 +195,40 @@ pub fn collapse_prev_rollback( Ok(()) } +/// Updates the last_change_ts of a new Rollback record. +/// +/// When writing a new Rollback record, we don't always know about the +/// information about the last change. So, we will call `seek_write` again to +/// calculate the last_change_ts. +/// +/// The `seek_write` here is usually cheap because this functions is typically +/// called after `get_txn_commit_record` and `get_txn_commit_record` should have +/// moved the cursor around the record we want. +pub fn update_last_change_for_rollback( + reader: &mut SnapshotReader, + write: &mut Write, + key: &Key, + ts: TimeStamp, +) -> Result<()> { + // Also update the last_change_ts if we are writing an overlapped rollback to a + // LOCK record. Actually, because overlapped rollbacks are rare, it does not + // solve the inaccuracy caused by inserted rollback (and we don't intend it + // because it's uncommon). Just do it when it happens. + if tls_can_enable(LAST_CHANGE_TS) + && (write.write_type == WriteType::Lock || write.write_type == WriteType::Rollback) + { + if let Some((commit_ts, w)) = reader.seek_write(key, ts)? { + // Even with collapsed rollback, the deleted rollbacks will become tombstones + // which we probably need to skip them one by one. That's why we always use + // `next_last_change_info` here to calculate and count them in + // `versions_to_last_change`. + (write.last_change_ts, write.versions_to_last_change) = + w.next_last_change_info(commit_ts); + } + } + Ok(()) +} + /// Generate the Write record that should be written that means to perform a /// specified rollback operation. pub fn make_rollback( diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 1a4b547b6d7..bd494e91edc 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -8,7 +8,9 @@ use crate::storage::{ lock_manager::LockManager, mvcc::{LockType, MvccTxn, SnapshotReader, TimeStamp, TxnCommitRecord}, txn::{ - actions::check_txn_status::{collapse_prev_rollback, make_rollback}, + actions::check_txn_status::{ + collapse_prev_rollback, make_rollback, update_last_change_for_rollback, + }, commands::{ Command, CommandExt, ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, @@ -119,7 +121,10 @@ impl WriteCommand for CheckSecondaryLocks { } // We must protect this rollback in case this rollback is collapsed and a stale // acquire_pessimistic_lock and prewrite succeed again. - if let Some(write) = make_rollback(self.start_ts, true, rollback_overlapped_write) { + if let Some(mut write) = + make_rollback(self.start_ts, true, rollback_overlapped_write) + { + update_last_change_for_rollback(&mut reader, &mut write, &key, self.start_ts)?; txn.put_write(key.clone(), self.start_ts, write.as_ref().to_bytes()); collapse_prev_rollback(&mut txn, &mut reader, &key)?; } @@ -165,14 +170,20 @@ impl WriteCommand for CheckSecondaryLocks { pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::Context; + use tikv_kv::Statistics; use tikv_util::deadline::Deadline; + use txn_types::Mutation; use super::*; use crate::storage::{ kv::TestEngineBuilder, lock_manager::MockLockManager, mvcc::tests::*, - txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, + txn::{ + commands::{test_util::prewrite_with_cm, WriteCommand}, + scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, + tests::*, + }, Engine, }; @@ -343,4 +354,116 @@ pub mod tests { } must_get_overlapped_rollback(&mut engine, b"k1", 15, 13, WriteType::Lock, Some(0)); } + + // The main logic is almost identical to + // test_rollback_calculate_last_change_info of check_txn_status. But the small + // differences about handling lock CF make it difficult to reuse code. + #[test] + fn test_rollback_calculate_last_change_info() { + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let cm = ConcurrencyManager::new(1.into()); + let k = b"k"; + let mut statistics = Statistics::default(); + + must_prewrite_put(&mut engine, k, b"v1", k, 5); + must_commit(&mut engine, k, 5, 6); + must_prewrite_put(&mut engine, k, b"v2", k, 7); + must_commit(&mut engine, k, 7, 8); + must_prewrite_put(&mut engine, k, b"v3", k, 30); + must_commit(&mut engine, k, 30, 35); + + // TiKV 6.4 should not write last_change_ts. + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.4.0").unwrap(); + set_tls_feature_gate(feature_gate); + must_success(&mut engine, k, 40, SecondaryLocksStatus::RolledBack); + let rollback = must_written(&mut engine, k, 40, 40, WriteType::Rollback); + assert!(rollback.last_change_ts.is_zero()); + assert_eq!(rollback.versions_to_last_change, 0); + + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + must_prewrite_put(&mut engine, k, b"v4", k, 45); + must_commit(&mut engine, k, 45, 50); + + // Rollback when there is no lock; prev writes: + // - 50: PUT + must_success(&mut engine, k, 55, SecondaryLocksStatus::RolledBack); + let rollback = must_written(&mut engine, k, 55, 55, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 50.into()); + assert_eq!(rollback.versions_to_last_change, 1); + + // Write a LOCK; prev writes: + // - 55: ROLLBACK + // - 50: PUT + let res = prewrite_with_cm( + &mut engine, + cm, + &mut statistics, + vec![Mutation::make_lock(Key::from_raw(k))], + k.to_vec(), + 60, + Some(70), + ) + .unwrap(); + assert!(!res.one_pc_commit_ts.is_zero()); + let lock_commit_ts = res.one_pc_commit_ts; + let lock = must_written(&mut engine, k, 60, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(lock.last_change_ts, 50.into()); + assert_eq!(lock.versions_to_last_change, 2); + + // Write another ROLLBACK by rolling back a pessimistic lock; prev writes: + // - 61: LOCK + // - 55: ROLLBACK + // - 50: PUT + must_acquire_pessimistic_lock(&mut engine, k, b"pk", 70, 75); + must_success(&mut engine, k, 70, SecondaryLocksStatus::RolledBack); + let rollback = must_written(&mut engine, k, 70, 70, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 50.into()); + assert_eq!(rollback.versions_to_last_change, 3); + + // last_change_ts should point to the latest record before start_ts; prev + // writes: + // - 8: PUT + must_acquire_pessimistic_lock(&mut engine, k, k, 10, 75); + must_success(&mut engine, k, 10, SecondaryLocksStatus::RolledBack); + must_unlocked(&mut engine, k); + let rollback = must_written(&mut engine, k, 10, 10, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 8.into()); + assert_eq!(rollback.versions_to_last_change, 1); + + // Overlapped rollback should not update the last_change_ts of PUT; prev writes: + // - 8: PUT <- rollback overlaps + // - 6: PUT + must_success(&mut engine, k, 8, SecondaryLocksStatus::RolledBack); + let put = must_written(&mut engine, k, 7, 8, WriteType::Put); + assert!(put.last_change_ts.is_zero()); + assert_eq!(put.versions_to_last_change, 0); + assert!(put.has_overlapped_rollback); + + // Overlapped rollback can update the last_change_ts of LOCK; writes: + // - 61: PUT <- rollback overlaps + // - 57: ROLLBACK (inserted later) + // - 55: ROLLBACK + // - 50: PUT + must_rollback(&mut engine, k, 57, true); + let rollback = must_written(&mut engine, k, 57, 57, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 50.into()); + assert_eq!(rollback.versions_to_last_change, 2); + must_success( + &mut engine, + k, + lock_commit_ts, + SecondaryLocksStatus::RolledBack, + ); + let lock = must_written(&mut engine, k, 60, lock_commit_ts, WriteType::Lock); + assert_eq!(lock.last_change_ts, 50.into()); + assert_eq!(lock.versions_to_last_change, 3); + } } diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 58f7f557448..b74e7d5cb7c 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -144,8 +144,9 @@ impl WriteCommand for CheckTxnStatus { pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::{Context, PrewriteRequestPessimisticAction::*}; + use tikv_kv::Statistics; use tikv_util::deadline::Deadline; - use txn_types::{Key, WriteType}; + use txn_types::{Key, Mutation, WriteType}; use super::{TxnStatus::*, *}; use crate::storage::{ @@ -153,7 +154,9 @@ pub mod tests { lock_manager::MockLockManager, mvcc::tests::*, txn::{ - commands::{pessimistic_rollback, WriteCommand, WriteContext}, + commands::{ + pessimistic_rollback, test_util::prewrite_with_cm, WriteCommand, WriteContext, + }, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, }, @@ -1163,4 +1166,108 @@ pub mod tests { must_unlocked(&mut engine, k); must_get_rollback_ts(&mut engine, k, ts(50, 0)); } + + #[test] + fn test_rollback_calculate_last_change_info() { + use pd_client::FeatureGate; + + use crate::storage::txn::sched_pool::set_tls_feature_gate; + + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let cm = ConcurrencyManager::new(1.into()); + let k = b"k"; + let mut statistics = Statistics::default(); + + must_prewrite_put(&mut engine, k, b"v1", k, 5); + must_commit(&mut engine, k, 5, 6); + must_prewrite_put(&mut engine, k, b"v2", k, 7); + must_commit(&mut engine, k, 7, 8); + must_prewrite_put(&mut engine, k, b"v3", k, 30); + must_commit(&mut engine, k, 30, 35); + + // TiKV 6.4 should not write last_change_ts. + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.4.0").unwrap(); + set_tls_feature_gate(feature_gate); + must_rollback(&mut engine, k, 40, true); + let rollback = must_written(&mut engine, k, 40, 40, WriteType::Rollback); + assert!(rollback.last_change_ts.is_zero()); + assert_eq!(rollback.versions_to_last_change, 0); + + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + must_prewrite_put(&mut engine, k, b"v4", k, 45); + must_commit(&mut engine, k, 45, 50); + + // Rollback when there is no lock; prev writes: + // - 50: PUT + must_rollback(&mut engine, k, 55, true); + let rollback = must_written(&mut engine, k, 55, 55, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 50.into()); + assert_eq!(rollback.versions_to_last_change, 1); + + // Write a LOCK; prev writes: + // - 55: ROLLBACK + // - 50: PUT + let res = prewrite_with_cm( + &mut engine, + cm, + &mut statistics, + vec![Mutation::make_lock(Key::from_raw(k))], + k.to_vec(), + 60, + Some(70), + ) + .unwrap(); + assert!(!res.one_pc_commit_ts.is_zero()); + let lock_commit_ts = res.one_pc_commit_ts; + let lock = must_written(&mut engine, k, 60, res.one_pc_commit_ts, WriteType::Lock); + assert_eq!(lock.last_change_ts, 50.into()); + assert_eq!(lock.versions_to_last_change, 2); + + // Write another ROLLBACK; prev writes: + // - 61: LOCK + // - 55: ROLLBACK + // - 50: PUT + must_rollback(&mut engine, k, 70, true); + let rollback = must_written(&mut engine, k, 70, 70, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 50.into()); + assert_eq!(rollback.versions_to_last_change, 3); + + // last_change_ts should point to the latest record before start_ts; prev + // writes: + // - 8: PUT + must_acquire_pessimistic_lock(&mut engine, k, k, 10, 75); + must_pessimistic_prewrite_put(&mut engine, k, b"v5", k, 10, 75, DoPessimisticCheck); + must_rollback(&mut engine, k, 10, true); + must_unlocked(&mut engine, k); + let rollback = must_written(&mut engine, k, 10, 10, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 8.into()); + assert_eq!(rollback.versions_to_last_change, 1); + + // Overlapped rollback should not update the last_change_ts of PUT; prev writes: + // - 8: PUT <- rollback overlaps + // - 6: PUT + must_rollback(&mut engine, k, 8, true); + let put = must_written(&mut engine, k, 7, 8, WriteType::Put); + assert!(put.last_change_ts.is_zero()); + assert_eq!(put.versions_to_last_change, 0); + assert!(put.has_overlapped_rollback); + + // Overlapped rollback can update the last_change_ts of LOCK; writes: + // - 61: PUT <- rollback overlaps + // - 57: ROLLBACK (inserted later) + // - 55: ROLLBACK + // - 50: PUT + must_rollback(&mut engine, k, 57, true); + let rollback = must_written(&mut engine, k, 57, 57, WriteType::Rollback); + assert_eq!(rollback.last_change_ts, 50.into()); + assert_eq!(rollback.versions_to_last_change, 2); + must_rollback(&mut engine, k, lock_commit_ts, true); + let lock = must_written(&mut engine, k, 60, lock_commit_ts, WriteType::Lock); + assert_eq!(lock.last_change_ts, 50.into()); + assert_eq!(lock.versions_to_last_change, 3); + } } From fe997db4db8a5a096f8a45c0db3eb3c2e5879262 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 10 Nov 2022 15:51:52 +0800 Subject: [PATCH 0325/1149] txn: save last change info in write records (#13759) ref tikv/tikv#13694 In this commit, the commit action will save the last_change_ts and versions_to_last_change in the lock to the write record. It is unncessary to check the write type because it is checked during prewrite. So, among the committable locks, only those of Lock type will have last_change_ts and versions_to_last_change. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/mvcc/reader/reader.rs | 3 +- src/storage/mvcc/reader/scanner/forward.rs | 10 ------- src/storage/txn/actions/commit.rs | 32 +++++++++++++++++++++- src/storage/txn/actions/prewrite.rs | 7 ----- src/storage/txn/commands/prewrite.rs | 8 ------ 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index d4767f3bb1a..8e35e00936e 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -1395,7 +1395,8 @@ pub mod tests { let (commit_ts, write) = reader.seek_write(&k, 20.into()).unwrap().unwrap(); assert_eq!(commit_ts, 20.into()); - assert_eq!(write, Write::new(WriteType::Lock, 10.into(), None)); + assert_eq!(write.write_type, WriteType::Lock); + assert_eq!(write.start_ts, 10.into()); assert_eq!(reader.statistics.write.seek, 1); assert_eq!(reader.statistics.write.next, 1); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 03f44deed7c..32898f1bfe7 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -2366,16 +2366,6 @@ mod delta_entry_tests { #[test] fn test_mess() { - use pd_client::FeatureGate; - - use crate::storage::txn::sched_pool::set_tls_feature_gate; - - // Set version to 6.5.0 to enable last_change_ts. - // TODO: Remove this after TiKV version reaches 6.5 - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.5.0").unwrap(); - set_tls_feature_gate(feature_gate); - // TODO: non-pessimistic lock should be returned enven if its ts < from_ts. // (key, lock, [commit1, commit2, ...]) // Values ends with 'L' will be made larger than `SHORT_VALUE_MAX_LEN` so it diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index eb798090ba2..2ba4f527d0e 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -91,7 +91,8 @@ pub fn commit( WriteType::from_lock_type(lock.lock_type).unwrap(), reader.start_ts, lock.short_value.take(), - ); + ) + .set_last_change(lock.last_change_ts, lock.versions_to_last_change); for ts in &lock.rollback_ts { if *ts == commit_ts { @@ -320,4 +321,33 @@ pub mod tests { must_err(&mut engine, k, ts(60, 0), ts(65, 0)); must_succeed(&mut engine, k, ts(60, 0), ts(80, 0)); } + + #[test] + fn test_inherit_last_change_info_from_lock() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + let k = b"k"; + must_prewrite_put(&mut engine, k, b"v1", k, 5); + must_succeed(&mut engine, k, 5, 10); + + // WriteType is Lock + must_prewrite_lock(&mut engine, k, k, 15); + let lock = must_locked(&mut engine, k, 15); + assert_eq!(lock.last_change_ts, 10.into()); + assert_eq!(lock.versions_to_last_change, 1); + must_succeed(&mut engine, k, 15, 20); + let write = must_written(&mut engine, k, 15, 20, WriteType::Lock); + assert_eq!(write.last_change_ts, 10.into()); + assert_eq!(write.versions_to_last_change, 1); + + // WriteType is Put + must_prewrite_put(&mut engine, k, b"v2", k, 25); + let lock = must_locked(&mut engine, k, 25); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 0); + must_succeed(&mut engine, k, 25, 30); + let write = must_written(&mut engine, k, 25, 30, WriteType::Put); + assert!(write.last_change_ts.is_zero()); + assert_eq!(write.versions_to_last_change, 0); + } } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 8abaf1428e4..4c13a9d244b 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -2412,13 +2412,6 @@ pub mod tests { #[test] fn test_inherit_last_change_ts_from_pessimistic_lock() { use engine_traits::CF_LOCK; - use pd_client::FeatureGate; - - use crate::storage::txn::sched_pool::set_tls_feature_gate; - - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.5.0").unwrap(); - set_tls_feature_gate(feature_gate); let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let key = b"k"; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 06f9cd1f818..6b54a1f88db 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -2516,10 +2516,6 @@ mod tests { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.5.0").unwrap(); - set_tls_feature_gate(feature_gate); - let key = b"k"; let value = b"v"; must_prewrite_put(&mut engine, key, value, key, 10); @@ -2606,10 +2602,6 @@ mod tests { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.5.0").unwrap(); - set_tls_feature_gate(feature_gate); - let key = b"k"; let value = b"v"; must_prewrite_put(&mut engine, key, value, key, 10); From 60ded8941095c4ae307979cedb4ef28f7e9fcf40 Mon Sep 17 00:00:00 2001 From: qupeng Date: Fri, 11 Nov 2022 12:57:53 +0800 Subject: [PATCH 0326/1149] cdc: set min-ts-interval to 200ms to reduce latency (#12839) close tikv/tikv#12840 cdc: set min-ts-interval to 200ms to reduce latency Signed-off-by: qupeng Co-authored-by: Neil Shen --- components/cdc/src/endpoint.rs | 7 +++++-- src/config.rs | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 8aa6aad3c29..4086c8623b5 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1399,7 +1399,10 @@ mod tests { #[test] fn test_api_version_check() { - let cfg = CdcConfig::default(); + let mut cfg = CdcConfig::default(); + // To make the case more stable. + cfg.min_ts_interval = ReadableDuration(Duration::from_secs(1)); + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); let quota = crate::channel::MemoryQuota::new(usize::MAX); @@ -1539,7 +1542,7 @@ mod tests { } let diff = cfg.diff(&updated_cfg); ep.run(Task::ChangeConfig(diff)); - assert_eq!(ep.config.min_ts_interval, ReadableDuration::secs(1)); + assert_eq!(ep.config.min_ts_interval, ReadableDuration::millis(200)); assert_eq!(ep.config.hibernate_regions_compatible, true); { diff --git a/src/config.rs b/src/config.rs index e9c4c2bb85b..c33c8e8b63c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2573,7 +2573,7 @@ pub struct CdcConfig { impl Default for CdcConfig { fn default() -> Self { Self { - min_ts_interval: ReadableDuration::secs(1), + min_ts_interval: ReadableDuration::millis(200), hibernate_regions_compatible: true, // 4 threads for incremental scan. incremental_scan_threads: 4, From d23bea3fa69ae0060e94e9066ec883769f49bf87 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 11 Nov 2022 14:55:53 +0800 Subject: [PATCH 0327/1149] raftstore-v2: add proposal conflict checker (#13737) close tikv/tikv#12842 The implementation is a bit simplified and adjusted for v2, including: - Not require term for every methods, because when leader's term is changed, it must become follower. In this case, the checker won't be accessed until it becomes leader again. So pass term in every methods is useless. - Use the checker to detect if it's splitting and merging. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/fsm/peer.rs | 2 +- .../src/operation/command/admin/mod.rs | 29 +- .../src/operation/command/admin/split.rs | 8 +- .../src/operation/command/control.rs | 428 ++++++++++++++++++ .../raftstore-v2/src/operation/command/mod.rs | 23 +- .../src/operation/command/write/mod.rs | 61 ++- .../operation/command/write/simple_write.rs | 31 +- components/raftstore-v2/src/operation/mod.rs | 4 +- .../raftstore-v2/src/operation/query/lease.rs | 8 +- .../raftstore-v2/src/operation/query/mod.rs | 22 +- .../raftstore-v2/src/operation/ready/mod.rs | 107 ++++- components/raftstore-v2/src/raft/peer.rs | 56 +-- .../src/router/response_channel.rs | 7 + .../raftstore-v2/tests/integrations/mod.rs | 2 + .../tests/integrations/test_read.rs | 10 +- 15 files changed, 698 insertions(+), 100 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/control.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 7083a9e529c..b74f8b46b1c 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -241,6 +241,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } } // TODO: instead of propose pending commands immediately, we should use timeout. - self.fsm.peer.propose_pending_command(self.store_ctx); + self.fsm.peer.propose_pending_writes(self.store_ctx); } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index afaefeb9b7e..c3fe2cceded 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -68,9 +68,13 @@ impl Peer { ch.report_error(resp); return; } - // To maintain propose order, we need to make pending proposal first. - self.propose_pending_command(ctx); let cmd_type = req.get_admin_request().get_cmd_type(); + if let Some(conflict) = self.proposal_control_mut().check_conflict(Some(cmd_type)) { + conflict.delay_channel(ch); + return; + } + // To maintain propose order, we need to make pending proposal first. + self.propose_pending_writes(ctx); let res = if apply::is_conf_change_cmd(&req) { self.propose_conf_change(ctx, req) } else { @@ -83,14 +87,19 @@ impl Peer { _ => unimplemented!(), } }; - if let Err(e) = &res { - info!( - self.logger, - "failed to propose admin command"; - "cmd_type" => ?cmd_type, - "error" => ?e, - ); + match &res { + Ok(index) => self + .proposal_control_mut() + .record_proposed_admin(cmd_type, *index), + Err(e) => { + info!( + self.logger, + "failed to propose admin command"; + "cmd_type" => ?cmd_type, + "error" => ?e, + ); + } } - self.post_propose_write(ctx, res, vec![ch]); + self.post_propose_command(ctx, res, vec![ch], true); } } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index c0d8998c4ad..198819cfd7b 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -71,12 +71,10 @@ impl Peer { mut req: RaftCmdRequest, ) -> Result { validate_batch_split(req.mut_admin_request(), self.region())?; - let mut proposal_ctx = ProposalContext::empty(); - proposal_ctx.insert(ProposalContext::SYNC_LOG); - proposal_ctx.insert(ProposalContext::SPLIT); - + // We rely on ConflictChecker to detect conflicts, so no need to set proposal + // context. let data = req.write_to_bytes().unwrap(); - self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + self.propose(store_ctx, data) } } diff --git a/components/raftstore-v2/src/operation/command/control.rs b/components/raftstore-v2/src/operation/command/control.rs new file mode 100644 index 00000000000..5fb25b4e20d --- /dev/null +++ b/components/raftstore-v2/src/operation/command/control.rs @@ -0,0 +1,428 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{collections::LinkedList, mem, num::NonZeroU64}; + +use kvproto::{ + metapb, + raft_cmdpb::{AdminCmdType, RaftCmdRequest}, +}; +use raftstore::{ + store::{ + cmd_resp, + fsm::apply, + msg::ErrorCallback, + util::{ + admin_cmd_epoch_lookup, AdminCmdEpochState, NORMAL_REQ_CHECK_CONF_VER, + NORMAL_REQ_CHECK_VER, + }, + }, + Error, +}; + +use crate::router::CmdResChannel; + +#[derive(Debug)] +pub struct ProposedAdminCmd { + cmd_type: AdminCmdType, + committed: bool, + epoch_state: AdminCmdEpochState, + index: u64, + /// Callbacks of commands that are conflict with on going admin command. + /// + /// Callbacks are delayed to avoid making client retry with arbitrary + /// backoff. + delayed_chs: Vec, +} + +impl ProposedAdminCmd { + fn new( + cmd_type: AdminCmdType, + epoch_state: AdminCmdEpochState, + index: u64, + ) -> ProposedAdminCmd { + ProposedAdminCmd { + cmd_type, + committed: false, + epoch_state, + index, + delayed_chs: Vec::new(), + } + } + + pub fn cmd_type(&self) -> AdminCmdType { + self.cmd_type + } + + /// Delay responding to channel until the command is applied so client won't + /// retry with arbitrary timeout. + pub fn delay_channel(&mut self, ch: CmdResChannel) { + self.delayed_chs.push(ch); + } + + /// Same as `delay_channel`, but accepts a batch. + pub fn delay_channels(&mut self, chs: Vec) { + if self.delayed_chs.is_empty() { + self.delayed_chs = chs; + } else { + self.delayed_chs.extend(chs); + } + } +} + +/// `ProposalControl` is a rewrite of `CmdEpochChecker` from v1. +/// +/// Admin command may change the epoch of a region. If a proposal is proposed +/// after the admin command is proposed but before the command is applied, the +/// proposal is probably to fail because of epoch not match. `ProposalControl` +/// aims to detect the failure early. With `ProposalControl`, users can assume +/// once a command is proposed, it's likely to succeed in the end. +/// +/// Compared to `CmdEpochChecker`, `ProposalControl` also traces the whole +/// lifetime of prepare merge. +pub struct ProposalControl { + // Use `LinkedList` to reduce memory footprint. In most cases, the list + // should be empty or 1 element. And access speed is not a concern. + proposed_admin_cmd: LinkedList, + pending_merge_index: u64, + term: u64, +} + +impl ProposalControl { + pub fn new(term: u64) -> ProposalControl { + ProposalControl { + proposed_admin_cmd: LinkedList::new(), + pending_merge_index: 0, + term, + } + } + + /// Clears all queued conflict callbacks if term changed. + /// + /// If term is changed, leader is probably changed. Clear all callbacks to + /// notify clients to retry with new leader. + #[inline] + pub fn maybe_update_term(&mut self, term: u64) { + match term.cmp(&self.term) { + std::cmp::Ordering::Equal => (), + std::cmp::Ordering::Greater => { + for cmd in mem::take(&mut self.proposed_admin_cmd) { + for cb in cmd.delayed_chs { + apply::notify_stale_req(term, cb); + } + } + self.term = term; + } + std::cmp::Ordering::Less => { + panic!("term should not decrease, old {}, new {}", self.term, term) + } + } + } + + /// Check if a proposal is conflict with proposed admin commands in current + /// term. If the proposal is an admin command, then its type should be + /// passed, otherwise just provide `None`. + /// + /// Returns None if passing the epoch check, otherwise returns the last + /// conflict conflict proposal meta. + pub fn check_conflict( + &mut self, + cmd_type: Option, + ) -> Option<&mut ProposedAdminCmd> { + let (check_ver, check_conf_ver) = match cmd_type { + None => (NORMAL_REQ_CHECK_VER, NORMAL_REQ_CHECK_CONF_VER), + Some(ty) => { + let epoch_state = admin_cmd_epoch_lookup(ty); + (epoch_state.check_ver, epoch_state.check_conf_ver) + } + }; + self.proposed_admin_cmd.iter_mut().rev().find(|cmd| { + (check_ver && cmd.epoch_state.change_ver) + || (check_conf_ver && cmd.epoch_state.change_conf_ver) + }) + } + + /// Record an admin proposal. + /// + /// Further requests that is conflict with the admin proposal will be + /// rejected in `check_proposal_conflict`. + pub fn record_proposed_admin(&mut self, cmd_type: AdminCmdType, index: u64) { + let epoch_state = admin_cmd_epoch_lookup(cmd_type); + if !epoch_state.change_conf_ver && !epoch_state.change_ver { + return; + } + + let conflict_cmd = self.proposed_admin_cmd.iter_mut().rev().find(|cmd| { + (epoch_state.check_ver && cmd.epoch_state.change_ver) + || (epoch_state.check_conf_ver && cmd.epoch_state.change_conf_ver) + }); + assert!(conflict_cmd.is_none(), "{:?}", conflict_cmd); + + if let Some(cmd) = self.proposed_admin_cmd.back() { + assert!(cmd.index < index, "{:?} {}", cmd, index); + } + self.proposed_admin_cmd + .push_back(ProposedAdminCmd::new(cmd_type, epoch_state, index)); + } + + /// Commit the admin commands. + #[inline] + pub fn commit_to(&mut self, index: u64, mut on_commit: impl FnMut(&ProposedAdminCmd)) { + if self.proposed_admin_cmd.is_empty() { + return; + } + + for cmd in &mut self.proposed_admin_cmd { + if cmd.committed { + continue; + } + if cmd.index <= index { + cmd.committed = true; + on_commit(cmd); + continue; + } + return; + } + } + + pub fn advance_apply(&mut self, index: u64, term: u64, region: &metapb::Region) { + while !self.proposed_admin_cmd.is_empty() { + let cmd = self.proposed_admin_cmd.front_mut().unwrap(); + if cmd.index <= index { + for ch in cmd.delayed_chs.drain(..) { + let mut resp = cmd_resp::new_error(Error::EpochNotMatch( + format!( + "current epoch of region {} is {:?}", + region.get_id(), + region.get_region_epoch(), + ), + vec![region.to_owned()], + )); + cmd_resp::bind_term(&mut resp, term); + ch.report_error(resp); + } + } else { + break; + } + self.proposed_admin_cmd.pop_front(); + } + } + + #[inline] + pub fn enter_prepare_merge(&mut self, prepare_merge_index: u64) { + self.pending_merge_index = prepare_merge_index; + } + + #[inline] + pub fn leave_prepare_merge(&mut self, prepare_merge_index: u64) { + if self.pending_merge_index != 0 { + assert_eq!(self.pending_merge_index, prepare_merge_index); + self.pending_merge_index = 0; + } + } + + /// Check if there is an on-going split command on current term. + /// + /// The answer is reliable only when the peer is leader. + #[inline] + pub fn is_splitting(&self) -> bool { + if self.proposed_admin_cmd.is_empty() { + return false; + } + // Split is deprecated in v2, only needs to check `BatchSplit`. + self.proposed_admin_cmd + .iter() + .any(|c| c.cmd_type == AdminCmdType::BatchSplit && c.committed) + } + + /// Check if there the current peer is waiting for being merged. + /// + /// The answer is reliable only when the peer is leader or `PrepareMerge` is + /// applied. + #[inline] + pub fn is_merging(&self) -> bool { + if self.proposed_admin_cmd.is_empty() { + return self.pending_merge_index != 0; + } + self.proposed_admin_cmd + .iter() + .any(|c| c.cmd_type == AdminCmdType::PrepareMerge && c.committed) + } +} + +impl Drop for ProposalControl { + fn drop(&mut self) { + for state in mem::take(&mut self.proposed_admin_cmd) { + for ch in state.delayed_chs { + apply::notify_stale_req(self.term, ch); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn new_admin_request(cmd_type: AdminCmdType) -> RaftCmdRequest { + let mut request = RaftCmdRequest::default(); + request.mut_admin_request().set_cmd_type(cmd_type); + request + } + + #[test] + fn test_proposal_control() { + let region = metapb::Region::default(); + + let mut control = ProposalControl::new(10); + assert_eq!(control.term, 10); + assert!( + control + .check_conflict(Some(AdminCmdType::BatchSplit)) + .is_none() + ); + control.record_proposed_admin(AdminCmdType::BatchSplit, 5); + assert_eq!(control.proposed_admin_cmd.len(), 1); + + // Both conflict with the split admin cmd + let conflict = control.check_conflict(None).unwrap(); + assert_eq!(conflict.index, 5); + assert_eq!(conflict.cmd_type, AdminCmdType::BatchSplit); + let conflict = control + .check_conflict(Some(AdminCmdType::PrepareMerge)) + .unwrap(); + assert_eq!(conflict.index, 5); + + assert!( + control + .check_conflict(Some(AdminCmdType::ChangePeerV2)) + .is_none() + ); + control.record_proposed_admin(AdminCmdType::ChangePeerV2, 6); + assert_eq!(control.proposed_admin_cmd.len(), 2); + + assert!(!control.is_splitting()); + assert!(!control.is_merging()); + + // Conflict with the change peer admin cmd + let conflict = control + .check_conflict(Some(AdminCmdType::ChangePeerV2)) + .unwrap(); + assert_eq!(conflict.index, 6); + // Conflict with the split admin cmd + let conflict = control.check_conflict(None).unwrap(); + assert_eq!(conflict.index, 5); + // Conflict with the change peer admin cmd + let conflict = control + .check_conflict(Some(AdminCmdType::PrepareMerge)) + .unwrap(); + assert_eq!(conflict.index, 6); + + let mut commit_split = false; + control.commit_to(4, |c| commit_split = c.cmd_type == AdminCmdType::BatchSplit); + assert!(!commit_split); + assert!(!control.is_splitting()); + control.commit_to(5, |c| commit_split = c.cmd_type == AdminCmdType::BatchSplit); + assert!(commit_split); + assert!(control.is_splitting()); + + control.advance_apply(4, 10, ®ion); + // Have no effect on `proposed_admin_cmd` + assert_eq!(control.proposed_admin_cmd.len(), 2); + assert!(control.is_splitting()); + + control.advance_apply(5, 10, ®ion); + // Left one change peer admin cmd + assert_eq!(control.proposed_admin_cmd.len(), 1); + assert!(!control.is_splitting()); + + assert!(control.check_conflict(None).is_none()); + let conflict = control + .check_conflict(Some(AdminCmdType::BatchSplit)) + .unwrap(); + assert_eq!(conflict.index, 6); + + // Change term to 11 + control.maybe_update_term(11); + assert!( + control + .check_conflict(Some(AdminCmdType::BatchSplit)) + .is_none() + ); + assert_eq!(control.term, 11); + // Should be empty + assert_eq!(control.proposed_admin_cmd.len(), 0); + + // Test attaching multiple callbacks. + control.record_proposed_admin(AdminCmdType::BatchSplit, 7); + let mut subs = vec![]; + for _ in 0..3 { + let conflict = control.check_conflict(None).unwrap(); + let (ch, sub) = CmdResChannel::pair(); + conflict.delay_channel(ch); + subs.push(sub); + } + // Delayed channel should not be notified immediately. + for sub in &subs { + assert!(!sub.has_result()); + } + control.advance_apply(7, 12, ®ion); + for sub in subs { + assert!(sub.has_result()); + let res = futures::executor::block_on(sub.result()).unwrap(); + assert!( + res.get_header().get_error().has_epoch_not_match(), + "{:?}", + res + ); + } + + // Should invoke callbacks when term is increased. + control.record_proposed_admin(AdminCmdType::BatchSplit, 8); + let (ch, sub) = CmdResChannel::pair(); + control.check_conflict(None).unwrap().delay_channel(ch); + control.maybe_update_term(13); + assert!(control.check_conflict(None).is_none()); + let res = futures::executor::block_on(sub.result()).unwrap(); + assert!( + res.get_header().get_error().has_stale_command(), + "{:?}", + res + ); + + // Should invoke callbacks when it's dropped. + control.record_proposed_admin(AdminCmdType::BatchSplit, 9); + let (ch, sub) = CmdResChannel::pair(); + control.check_conflict(None).unwrap().delay_channel(ch); + drop(control); + let res = futures::executor::block_on(sub.result()).unwrap(); + assert!( + res.get_header().get_error().has_stale_command(), + "{:?}", + res + ); + } + + #[test] + fn test_proposal_control_merge() { + let region = metapb::Region::default(); + + let mut control = ProposalControl::new(5); + assert!(!control.is_merging()); + control.record_proposed_admin(AdminCmdType::PrepareMerge, 5); + assert!(!control.is_merging()); + control.commit_to(5, |_| ()); + assert!(control.is_merging()); + control.advance_apply(5, 5, ®ion); + assert!(!control.is_merging()); + + control.record_proposed_admin(AdminCmdType::PrepareMerge, 6); + assert!(!control.is_merging()); + control.commit_to(6, |_| ()); + assert!(control.is_merging()); + control.enter_prepare_merge(6); + control.advance_apply(6, 5, ®ion); + assert!(control.is_merging()); + control.leave_prepare_merge(6); + assert!(!control.is_merging()); + } +} diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 2d89c3494d3..75575e9a19f 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -55,9 +55,11 @@ use crate::{ }; mod admin; +mod control; mod write; pub use admin::AdminCmdResult; +pub use control::ProposalControl; pub use write::{SimpleWriteDecoder, SimpleWriteEncoder}; use self::write::SimpleWrite; @@ -220,18 +222,28 @@ impl Peer { } #[inline] - fn enqueue_pending_proposal( + pub fn post_propose_command( &mut self, ctx: &mut StoreContext, - mut proposal: Proposal>, + res: Result, + ch: Vec, + call_proposed_on_success: bool, ) { - let applied_to_current_term = self.applied_to_current_term(); - if applied_to_current_term { + let idx = match res { + Ok(i) => i, + Err(e) => { + ch.report_error(cmd_resp::err_resp(e, self.term())); + return; + } + }; + let mut proposal = Proposal::new(idx, self.term(), ch); + if call_proposed_on_success { proposal.cb.notify_proposed(); } - proposal.must_pass_epoch_check = applied_to_current_term; + proposal.must_pass_epoch_check = self.applied_to_current_term(); proposal.propose_time = Some(*ctx.current_time.get_or_insert_with(monotonic_raw_now)); self.proposals_mut().push(proposal); + self.set_has_ready(); } #[inline] @@ -292,6 +304,7 @@ impl Peer { } self.raft_group_mut() .advance_apply_to(apply_res.applied_index); + self.proposal_control_advance_apply(apply_res.applied_index); let is_leader = self.is_leader(); let progress_to_be_updated = self.entry_storage().applied_term() != apply_res.applied_term; let entry_storage = self.entry_storage_mut(); diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index a760a5acfb2..59c5679f95f 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -7,9 +7,10 @@ use raftstore::{ cmd_resp, fsm::{apply, Proposal, MAX_PROPOSAL_SIZE_RATIO}, msg::ErrorCallback, - util, WriteCallback, + util::{self, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER}, + WriteCallback, }, - Result, + Error, Result, }; use crate::{ @@ -53,10 +54,17 @@ impl Peer { return; } // To maintain propose order, we need to make pending proposal first. - self.propose_pending_command(ctx); + self.propose_pending_writes(ctx); + if let Some(conflict) = self.proposal_control_mut().check_conflict(None) { + conflict.delay_channel(ch); + return; + } + // ProposalControl is reliable only when applied to current term. + let call_proposed_on_success = self.applied_to_current_term(); match SimpleWriteEncoder::new( req, (ctx.cfg.raft_entry_max_size.0 as f64 * MAX_PROPOSAL_SIZE_RATIO) as usize, + call_proposed_on_success, ) { Ok(mut encoder) => { encoder.add_response_channel(ch); @@ -65,35 +73,38 @@ impl Peer { } Err(req) => { let res = self.propose_command(ctx, req); - self.post_propose_write(ctx, res, vec![ch]); + self.post_propose_command(ctx, res, vec![ch], call_proposed_on_success); } } } - #[inline] - pub fn post_propose_write( - &mut self, - ctx: &mut StoreContext, - res: Result, - ch: Vec, - ) { - let idx = match res { - Ok(i) => i, - Err(e) => { - ch.report_error(cmd_resp::err_resp(e, self.term())); - return; - } - }; - let p = Proposal::new(idx, self.term(), ch); - self.enqueue_pending_proposal(ctx, p); - self.set_has_ready(); - } - - pub fn propose_pending_command(&mut self, ctx: &mut StoreContext) { + pub fn propose_pending_writes(&mut self, ctx: &mut StoreContext) { if let Some(encoder) = self.simple_write_encoder_mut().take() { + let call_proposed_on_success = if encoder.notify_proposed() { + // The request has pass conflict check and called all proposed callbacks. + false + } else { + // Epoch may have changed since last check. + let from_epoch = encoder.header().get_region_epoch(); + let res = util::compare_region_epoch( + from_epoch, + self.region(), + NORMAL_REQ_CHECK_CONF_VER, + NORMAL_REQ_CHECK_VER, + true, + ); + if let Err(mut e) = res { + // TODO: query sibling regions. + ctx.raft_metrics.invalid_proposal.epoch_not_match.inc(); + encoder.encode().1.report_error(cmd_resp::new_error(e)); + return; + } + // Only when it applies to current term, the epoch check can be reliable. + self.applied_to_current_term() + }; let (data, chs) = encoder.encode(); let res = self.propose(ctx, data); - self.post_propose_write(ctx, res, chs); + self.post_propose_command(ctx, res, chs, call_proposed_on_success); } } } diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index 364e2741868..ca9e7d39366 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -3,6 +3,7 @@ use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request}; use protobuf::{CodedInputStream, Message, SingularPtrField}; +use raftstore::store::WriteCallback; use slog::Logger; use crate::{operation::command::parse_at, router::CmdResChannel}; @@ -21,12 +22,18 @@ pub struct SimpleWriteEncoder { buf: Vec, channels: Vec, size_limit: usize, + notify_proposed: bool, } impl SimpleWriteEncoder { + /// Create an encoder. + /// + /// If `notify_proposed` is true, channels will be called `notify_proposed` + /// when it's appended. pub fn new( mut req: RaftCmdRequest, size_limit: usize, + notify_proposed: bool, ) -> Result { if !Self::allow_request(&req) { return Err(req); @@ -46,6 +53,7 @@ impl SimpleWriteEncoder { buf, channels: vec![], size_limit, + notify_proposed, }) } @@ -96,9 +104,24 @@ impl SimpleWriteEncoder { } #[inline] - pub fn add_response_channel(&mut self, ch: CmdResChannel) { + pub fn add_response_channel(&mut self, mut ch: CmdResChannel) { + if self.notify_proposed { + ch.notify_proposed(); + } self.channels.push(ch); } + + #[inline] + pub fn notify_proposed(&self) -> bool { + self.notify_proposed + } + + #[inline] + pub fn header(&self) -> &RaftRequestHeader { + self.header + .as_ref() + .unwrap_or_else(|| RaftRequestHeader::default_instance()) + } } #[derive(Debug)] @@ -382,7 +405,7 @@ mod tests { delete_req.set_key(delete_key.clone()); cmd.mut_requests().push(req); - let mut encoder = SimpleWriteEncoder::new(cmd.clone(), usize::MAX).unwrap(); + let mut encoder = SimpleWriteEncoder::new(cmd.clone(), usize::MAX, false).unwrap(); cmd.clear_requests(); req = Request::default(); @@ -471,7 +494,7 @@ mod tests { let mut req = Request::default(); req.set_cmd_type(CmdType::Invalid); invalid_cmd.mut_requests().push(req); - let fallback = SimpleWriteEncoder::new(invalid_cmd.clone(), usize::MAX).unwrap_err(); + let fallback = SimpleWriteEncoder::new(invalid_cmd.clone(), usize::MAX, false).unwrap_err(); let bytes = fallback.write_to_bytes().unwrap(); let logger = slog_global::borrow_global().new(o!()); let decoded = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap_err(); @@ -486,7 +509,7 @@ mod tests { put_req.set_key(b"key".to_vec()); put_req.set_value(b"".to_vec()); valid_cmd.mut_requests().push(req); - let mut encoder = SimpleWriteEncoder::new(valid_cmd.clone(), usize::MAX).unwrap(); + let mut encoder = SimpleWriteEncoder::new(valid_cmd.clone(), usize::MAX, false).unwrap(); // Only simple write command can be batched. encoder.amend(invalid_cmd.clone()).unwrap_err(); let mut valid_cmd2 = valid_cmd.clone(); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 5b19db91b71..a110f4bf330 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -5,7 +5,9 @@ mod life; mod query; mod ready; -pub use command::{AdminCmdResult, CommittedEntries, SimpleWriteDecoder, SimpleWriteEncoder}; +pub use command::{ + AdminCmdResult, CommittedEntries, ProposalControl, SimpleWriteDecoder, SimpleWriteEncoder, +}; pub use life::DestroyProgress; pub use ready::{AsyncWriter, GenSnapTask, SnapState}; diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 1ae4aecd1cc..114080bcdbb 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -151,14 +151,14 @@ impl Peer { pub(crate) fn maybe_renew_leader_lease( &mut self, ts: Timespec, - store_meta: &mut Arc>>, + store_meta: &Mutex>, progress: Option, ) { // A nonleader peer should never has leader lease. let read_progress = if !should_renew_lease( self.is_leader(), - self.is_splitting(), - self.is_merging(), + self.proposal_control().is_splitting(), + self.proposal_control().is_merging(), self.has_force_leader(), ) { None @@ -186,7 +186,7 @@ impl Peer { // TODO: remove this block of code when snapshot is done; add the logic into // on_persist_snapshot. - pub(crate) fn add_reader_if_necessary(&mut self, store_meta: &mut Arc>>) { + pub(crate) fn add_reader_if_necessary(&mut self, store_meta: &Mutex>) { let mut meta = store_meta.lock().unwrap(); // TODO: remove this block of code when snapshot is done; add the logic into // on_persist_snapshot. diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 8b84b0788ce..77ca7b90074 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -216,7 +216,7 @@ impl Peer { self.pending_reads_mut().advance_leader_reads(states); if let Some(propose_time) = self.pending_reads().last_ready().map(|r| r.propose_time) { if !self.leader_lease_mut().is_suspect() { - self.maybe_renew_leader_lease(propose_time, &mut ctx.store_meta, None); + self.maybe_renew_leader_lease(propose_time, &ctx.store_meta, None); } } @@ -288,6 +288,24 @@ impl Peer { && !self.has_pending_merge_state() } + #[inline] + pub fn ready_to_handle_read(&self) -> bool { + // TODO: It may cause read index to wait a long time. + + // There may be some values that are not applied by this leader yet but the old + // leader, if applied_term isn't equal to current term. + self.applied_to_current_term() + // There may be stale read if the old leader splits really slow, + // the new region may already elected a new leader while + // the old leader still think it owns the split range. + && !self.proposal_control().is_splitting() + // There may be stale read if a target leader is in another store and + // applied commit merge, written new values, but the sibling peer in + // this store does not apply commit merge, so the leader is not ready + // to read, until the merge is rollbacked. + && !self.proposal_control().is_merging() + } + fn send_read_command( &self, ctx: &mut StoreContext, @@ -409,7 +427,7 @@ impl Peer { // TODO: add coprocessor_host hook let progress = ReadProgress::applied_term(applied_term); // TODO: remove it - self.add_reader_if_necessary(&mut ctx.store_meta); + self.add_reader_if_necessary(&ctx.store_meta); let mut meta = ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&self.region_id()).unwrap(); self.maybe_update_read_progress(reader, progress); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 62cb42ef253..6f6866b9671 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -24,10 +24,10 @@ use std::cmp; use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; -use kvproto::raft_serverpb::RaftMessage; +use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; -use raft::{eraftpb, Ready}; -use raftstore::store::{util, ExtraStates, FetchedLogs, Transport, WriteTask}; +use raft::{eraftpb, Ready, StateRole}; +use raftstore::store::{util, ExtraStates, FetchedLogs, ReadProgress, Transport, WriteTask}; use slog::{debug, error, trace, warn}; use tikv_util::time::{duration_to_sec, monotonic_raw_now}; @@ -247,7 +247,7 @@ impl Peer { ctx.raft_metrics.commit_log.observe(duration_to_sec( (ctx.current_time.unwrap() - propose_time).to_std().unwrap(), )); - self.maybe_renew_leader_lease(propose_time, &mut ctx.store_meta, None); + self.maybe_renew_leader_lease(propose_time, &ctx.store_meta, None); break; } } @@ -288,6 +288,22 @@ impl Peer { |entry| entry.index == self.raft_group().raft.raft_log.last_index() )); + self.on_role_changed(ctx, &ready); + + if let Some(hs) = ready.hs() { + let prev_commit_index = self.entry_storage().commit_index(); + assert!( + hs.get_commit() >= prev_commit_index, + "{:?} {:?} {}", + self.logger.list(), + hs, + prev_commit_index + ); + if self.is_leader() && hs.get_commit() > prev_commit_index { + self.on_leader_commit_index_changed(hs.get_commit()); + } + } + if !ready.messages().is_empty() { debug_assert!(self.is_leader()); for msg in ready.take_messages() { @@ -395,6 +411,89 @@ impl Peer { pub fn on_wait_flush(&mut self, ch: crate::router::FlushChannel) { self.async_writer.subscirbe_flush(ch); } + + pub fn on_role_changed(&mut self, ctx: &mut StoreContext, ready: &Ready) { + // Update leader lease when the Raft state changes. + if let Some(ss) = ready.ss() { + let term = self.term(); + match ss.raft_state { + StateRole::Leader => { + // The local read can only be performed after a new leader has applied + // the first empty entry on its term. After that the lease expiring time + // should be updated to + // send_to_quorum_ts + max_lease + // as the comments in `Lease` explain. + // It is recommended to update the lease expiring time right after + // this peer becomes leader because it's more convenient to do it here and + // it has no impact on the correctness. + let progress_term = ReadProgress::term(term); + self.maybe_renew_leader_lease( + monotonic_raw_now(), + &ctx.store_meta, + Some(progress_term), + ); + debug!( + self.logger, + "becomes leader with lease"; + "lease" => ?self.leader_lease(), + ); + // If the predecessor reads index during transferring leader and receives + // quorum's heartbeat response after that, it may wait for applying to + // current term to apply the read. So broadcast eagerly to avoid unexpected + // latency. + self.raft_group_mut().skip_bcast_commit(false); + + // Exit entry cache warmup state when the peer becomes leader. + self.entry_storage_mut().clear_entry_cache_warmup_state(); + } + StateRole::Follower => { + self.leader_lease_mut().expire(); + self.storage_mut().cancel_generating_snap(None); + } + _ => {} + } + self.proposal_control_mut().maybe_update_term(term); + } + } + + /// If leader commits new admin commands, it may break lease assumption. So + /// we need to cancel lease whenever necessary. + /// + /// Note this method should be called before sending out any messages. + fn on_leader_commit_index_changed(&mut self, commit_index: u64) { + let mut committed_prepare_merge = false; + self.proposal_control_mut().commit_to(commit_index, |cmd| { + committed_prepare_merge |= cmd.cmd_type() == AdminCmdType::PrepareMerge + }); + // There are two types of operations that will change the ownership of a range: + // split and merge. + // + // - For split, after the split command is committed, it's + // possible that the same range is govened by different region on different + // nodes due to different apply progress. But because only the peers on the + // same node as old leader will campaign despite election timeout, so there + // will be no modification to the overlapped range until either the original + // leader apply the split command or an election timeout is passed since split + // is committed. We already forbid renewing lease after committing split, and + // original leader will update the reader delegate with latest epoch after + // applying split before the split peer starts campaign, so here the only thing + // we need to do is marking split is committed (which is done by `commit_to` + // above). It's correct to allow local read during split. + // + // - For merge, after the prepare merge command is committed, the target peers + // may apply commit merge at any time, so we need to forbid any type of read + // to avoid missing the modifications from target peers. + if committed_prepare_merge { + // After prepare_merge is committed and the leader broadcasts commit + // index to followers, the leader can not know when the target region + // merges majority of this region, also it can not know when the target + // region writes new values. + // To prevent unsafe local read, we suspect its leader lease. + self.leader_lease_mut().suspect(monotonic_raw_now()); + // Stop updating `safe_ts` + self.read_progress_mut().discard(); + } + } } impl Storage { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index eb98851b3bb..dd53f47e152 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -18,7 +18,7 @@ use time::Timespec; use super::{storage::Storage, Apply}; use crate::{ fsm::{ApplyFsm, ApplyScheduler}, - operation::{AsyncWriter, DestroyProgress, SimpleWriteEncoder}, + operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, router::{CmdResChannel, QueryResChannel}, tablet::CachedTablet, Result, @@ -58,6 +58,9 @@ pub struct Peer { /// Transaction extensions related to this peer. txn_ext: Arc, txn_extra_op: Arc>, + + /// Check whether this proposal can be proposed based on its epoch. + proposal_control: ProposalControl, } impl Peer { @@ -142,6 +145,7 @@ impl Peer { region_buckets: None, txn_ext: Arc::default(), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), + proposal_control: ProposalControl::new(0), }; // If this region has only one peer and I am the one, campaign directly. @@ -153,6 +157,8 @@ impl Peer { peer.raft_group.campaign()?; peer.set_has_ready(); } + let term = peer.term(); + peer.proposal_control.maybe_update_term(term); Ok(peer) } @@ -323,18 +329,6 @@ impl Peer { self.raft_group.raft.term } - #[inline] - // TODO - pub fn is_splitting(&self) -> bool { - false - } - - #[inline] - // TODO - pub fn is_merging(&self) -> bool { - false - } - #[inline] // TODO pub fn has_force_leader(&self) -> bool { @@ -391,24 +385,6 @@ impl Peer { &self.proposals } - #[inline] - pub fn ready_to_handle_read(&self) -> bool { - // TODO: It may cause read index to wait a long time. - - // There may be some values that are not applied by this leader yet but the old - // leader, if applied_term isn't equal to current term. - self.applied_to_current_term() - // There may be stale read if the old leader splits really slow, - // the new region may already elected a new leader while - // the old leader still think it owns the split range. - && !self.is_splitting() - // There may be stale read if a target leader is in another store and - // applied commit merge, written new values, but the sibling peer in - // this store does not apply commit merge, so the leader is not ready - // to read, until the merge is rollbacked. - && !self.is_merging() - } - pub fn apply_scheduler(&self) -> &ApplyScheduler { self.apply_scheduler.as_ref().unwrap() } @@ -432,4 +408,22 @@ impl Peer { self.region_buckets.as_ref().map(|b| b.meta.clone()), ) } + + #[inline] + pub fn proposal_control_mut(&mut self) -> &mut ProposalControl { + &mut self.proposal_control + } + + #[inline] + pub fn proposal_control(&self) -> &ProposalControl { + &self.proposal_control + } + + #[inline] + pub fn proposal_control_advance_apply(&mut self, apply_index: u64) { + let region = self.raft_group.store().region(); + let term = self.term(); + self.proposal_control + .advance_apply(apply_index, term, region); + } } diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index d68c414ca5f..b6da3c804f0 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -221,6 +221,13 @@ impl BaseSubscriber { pub async fn result(self) -> Option { WaitResult { core: &self.core }.await } + + /// Test if the result is ready without any polling. + #[inline] + pub fn has_result(&self) -> bool { + let e = self.core.event.load(Ordering::Relaxed); + check_bit(e, fired_bit_of(PAYLOAD_EVENT)).is_some() + } } unsafe impl Send for BaseSubscriber {} diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index 50fb5c4e16a..740e64f7e29 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -5,6 +5,8 @@ #![feature(custom_test_frameworks)] #![test_runner(test_util::run_tests)] +// TODO: test conflict control in integration tests after split is supported. + mod cluster; mod test_basic_write; mod test_conf_change; diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index 4f49757085f..2155a4775c6 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -67,14 +67,8 @@ fn test_snap_without_read_index() { req.mut_requests().push(request_inner); let res = router.query(region_id, req.clone()).unwrap(); let resp = res.read().unwrap(); - // single node commited index should be 6. - assert_eq!(resp.read_index, 6); - - // run again, this time we expect the lease is not expired and the read index - // should be 0. - let res = router.query(region_id, req.clone()).unwrap(); - let resp = res.read().unwrap(); - // the request can be processed locally, read index should be 0. + // When it becomes leader, it will get a lease automatically because of empty + // entry. assert_eq!(resp.read_index, 0); // run with header read_quorum From 9c4f0d08fc47ab9e733b4464ffc75f44cff9ce50 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 11 Nov 2022 18:53:53 +0800 Subject: [PATCH 0328/1149] raftstore-v2: make generate snapshot works with checkpoint (#13736) ref tikv/tikv#12842 raftstore-v2: make generate snapshot works with checkpoint Signed-off-by: nolouch Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 14 ++- .../src/operation/ready/snapshot.rs | 41 ++++++- components/raftstore-v2/src/raft/storage.rs | 50 ++++++-- components/raftstore-v2/src/router/imp.rs | 4 +- components/raftstore-v2/src/router/message.rs | 4 +- .../tests/integrations/cluster.rs | 7 +- .../raftstore/src/store/async_io/read.rs | 114 ++++++++++++++++-- components/raftstore/src/store/mod.rs | 16 +-- .../raftstore/src/store/peer_storage.rs | 4 +- components/raftstore/src/store/snap.rs | 75 ++++++++++++ components/raftstore/src/store/transport.rs | 5 +- components/raftstore/src/store/worker/mod.rs | 3 +- 12 files changed, 279 insertions(+), 58 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 76d4fd16bea..0dbd0ed4b64 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -2,6 +2,7 @@ use std::{ ops::{Deref, DerefMut}, + path::Path, sync::{Arc, Mutex}, time::Duration, }; @@ -21,7 +22,7 @@ use kvproto::{ use raft::INVALID_ID; use raftstore::store::{ fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, - StoreWriters, Transport, WriteSenders, + StoreWriters, TabletSnapManager, Transport, WriteSenders, }; use slog::Logger; use tikv_util::{ @@ -365,6 +366,7 @@ impl StoreSystem { trans: T, router: &StoreRouter, store_meta: Arc>>, + snap_mgr: TabletSnapManager, ) -> Result<()> where T: Transport + 'static, @@ -373,10 +375,12 @@ impl StoreSystem { workers .store_writers .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; - let read_scheduler = workers.async_read_worker.start( - "async-read-worker", - ReadRunner::new(router.clone(), raft_engine.clone()), - ); + + let mut read_runner = ReadRunner::new(router.clone(), raft_engine.clone()); + read_runner.set_snap_mgr(snap_mgr); + let read_scheduler = workers + .async_read_worker + .start("async-read-worker", read_runner); let mut builder = StorePollerBuilder::new( cfg.clone(), diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index e0f4e5653de..ad836ed2455 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -32,8 +32,8 @@ use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_serverpb::{RaftSnapshotData, RegionLocalState}; use protobuf::Message; use raft::eraftpb::Snapshot; -use raftstore::store::{metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, ReadTask}; -use slog::{error, info}; +use raftstore::store::{metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask}; +use slog::{error, info, warn}; use tikv_util::{box_try, worker::Scheduler}; use crate::{ @@ -68,6 +68,8 @@ impl PartialEq for SnapState { pub struct GenSnapTask { region_id: u64, + // The snapshot will be sent to the peer. + to_peer: u64, // Fill it when you are going to generate the snapshot. // index used to check if the gen task should be canceled. index: Arc, @@ -78,9 +80,15 @@ pub struct GenSnapTask { } impl GenSnapTask { - pub fn new(region_id: u64, index: Arc, canceled: Arc) -> GenSnapTask { + pub fn new( + region_id: u64, + to_peer: u64, + index: Arc, + canceled: Arc, + ) -> GenSnapTask { GenSnapTask { region_id, + to_peer, index, canceled, for_balance: false, @@ -101,7 +109,7 @@ impl Debug for GenSnapTask { } impl Peer { - pub fn on_snapshot_generated(&mut self, snapshot: Box) { + pub fn on_snapshot_generated(&mut self, snapshot: GenSnapRes) { if self.storage_mut().on_snapshot_generated(snapshot) { self.raft_group_mut().ping(); self.set_has_ready(); @@ -115,6 +123,15 @@ impl Apply { /// Will schedule a task to read worker and then generate a snapshot /// asynchronously. pub fn schedule_gen_snapshot(&mut self, snap_task: GenSnapTask) { + // Do not generate, the peer is removed. + if self.tombstone() { + snap_task.canceled.store(true, Ordering::SeqCst); + error!( + self.logger, + "cancel generating snapshot because it's already destroyed"; + ); + return; + } // Flush before do snapshot. if snap_task.canceled.load(Ordering::SeqCst) { return; @@ -126,6 +143,7 @@ impl Apply { snap_task.index.store(last_applied_index, Ordering::SeqCst); let gen_tablet_sanp_task = ReadTask::GenTabletSnapshot { region_id: snap_task.region_id, + to_peer: snap_task.to_peer, tablet: self.tablet().clone(), region_state: self.region_state().clone(), last_applied_term, @@ -189,7 +207,7 @@ impl Storage { index: index.clone(), }; - let task = GenSnapTask::new(self.region().get_id(), index, canceled); + let task = GenSnapTask::new(self.region().get_id(), to, index, canceled); let mut gen_snap_task = self.gen_snap_task_mut(); assert!(gen_snap_task.is_none()); *gen_snap_task = Box::new(Some(task)); @@ -268,7 +286,12 @@ impl Storage { /// Try to switch snap state to generated. only `Generating` can switch to /// `Generated`. /// TODO: make the snap state more clearer, the snapshot must be consumed. - pub fn on_snapshot_generated(&self, snap: Box) -> bool { + pub fn on_snapshot_generated(&self, res: GenSnapRes) -> bool { + if res.is_none() { + self.cancel_generating_snap(None); + return false; + } + let snap = res.unwrap(); let mut snap_state = self.snap_state_mut(); let SnapState::Generating { ref canceled, @@ -276,6 +299,12 @@ impl Storage { } = *snap_state else { return false }; if snap.get_metadata().get_index() < index.load(Ordering::SeqCst) { + warn!( + self.logger(), + "snapshot is staled, skip"; + "snap index" => snap.get_metadata().get_index(), + "required index" => index.load(Ordering::SeqCst), + ); return false; } // Should changed `SnapState::Generated` to `SnapState::Relax` when the diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index aa642f5967f..8abeeeef73d 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -370,8 +370,8 @@ mod tests { }; use raft::{eraftpb::Snapshot as RaftSnapshot, Error as RaftError, StorageError}; use raftstore::store::{ - AsyncReadNotifier, FetchedLogs, ReadRunner, ReadTask, RAFT_INIT_LOG_INDEX, - RAFT_INIT_LOG_TERM, + AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask, TabletSnapKey, + TabletSnapManager, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; use slog::o; use tempfile::TempDir; @@ -382,11 +382,11 @@ mod tests { #[derive(Clone)] pub struct TestRouter { - ch: SyncSender>, + ch: SyncSender, } impl TestRouter { - pub fn new() -> (Self, Receiver>) { + pub fn new() -> (Self, Receiver) { let (tx, rx) = sync_channel(1); (Self { ch: tx }, rx) } @@ -397,8 +397,8 @@ mod tests { unreachable!(); } - fn notify_snapshot_generated(&self, _region_id: u64, snapshot: Box) { - self.ch.send(snapshot).unwrap(); + fn notify_snapshot_generated(&self, _region_id: u64, res: GenSnapRes) { + self.ch.send(res).unwrap(); } } @@ -458,6 +458,8 @@ mod tests { write_initial_states(&mut wb, region.clone()).unwrap(); assert!(!wb.is_empty()); raft_engine.consume(&mut wb, true).unwrap(); + let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()); + mgr.init().unwrap(); // building a tablet factory let ops = DbOptions::default(); let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); @@ -478,7 +480,9 @@ mod tests { .unwrap() .unwrap(); let (router, rx) = TestRouter::new(); - worker.start(ReadRunner::new(router.clone(), raft_engine)); + let mut read_runner = ReadRunner::new(router.clone(), raft_engine); + read_runner.set_snap_mgr(mgr.clone()); + worker.start(read_runner); // setup peer applyer let mut apply = Apply::new( region.get_peers()[0].clone(), @@ -490,8 +494,8 @@ mod tests { logger, ); - // test get snapshot - let snap = s.snapshot(0, 0); + // Test get snapshot + let snap = s.snapshot(0, 7); let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); assert_eq!(snap.unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); @@ -504,11 +508,13 @@ mod tests { }; assert_eq!(snap.get_metadata().get_index(), 0); assert_eq!(snap.get_metadata().get_term(), 0); - assert!(snap.get_data().is_empty()); + assert_eq!(snap.get_data().is_empty(), false); + let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); + let checkpointer_path = mgr.get_tablet_checkpointer_path(&snap_key); + assert!(checkpointer_path.exists()); - // test cancel snapshot + // Test cancel snapshot let snap = s.snapshot(0, 0); - let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); assert_eq!(snap.unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); @@ -516,6 +522,24 @@ mod tests { s.cancel_generating_snap(None); assert_eq!(*s.snap_state.borrow(), SnapState::Relax); - // TODO: add test get twice snapshot and cancel once + // Test get twice snapshot and cancel once. + // get snapshot a + let snap = s.snapshot(0, 0); + assert_eq!(snap.unwrap_err(), unavailable); + let gen_task_a = s.gen_snap_task.borrow_mut().take().unwrap(); + apply.set_apply_progress(1, 5); + apply.schedule_gen_snapshot(gen_task_a); + let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + s.cancel_generating_snap(None); + // cancel get snapshot a, try get snaphsot b + let snap = s.snapshot(0, 0); + assert_eq!(snap.unwrap_err(), unavailable); + let gen_task_b = s.gen_snap_task.borrow_mut().take().unwrap(); + apply.set_apply_progress(10, 5); + apply.schedule_gen_snapshot(gen_task_b); + // on snapshot a and b + assert_eq!(s.on_snapshot_generated(res), false); + let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + assert_eq!(s.on_snapshot_generated(res), true); } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 7c02ee10243..8cb65e40a3c 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -9,7 +9,7 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use raft::eraftpb::Snapshot as RaftSnapshot; -use raftstore::store::{AsyncReadNotifier, FetchedLogs, RegionSnapshot}; +use raftstore::store::{AsyncReadNotifier, FetchedLogs, GenSnapRes, RegionSnapshot}; use slog::Logger; use super::PeerMsg; @@ -20,7 +20,7 @@ impl AsyncReadNotifier for StoreRouter { let _ = self.force_send(region_id, PeerMsg::LogsFetched(fetched_logs)); } - fn notify_snapshot_generated(&self, region_id: u64, snapshot: Box) { + fn notify_snapshot_generated(&self, region_id: u64, snapshot: GenSnapRes) { let _ = self.force_send(region_id, PeerMsg::SnapshotGenerated(snapshot)); } } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 64af4d41d71..cda9e971c66 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -6,7 +6,7 @@ use std::fmt; use engine_traits::Snapshot; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::eraftpb::Snapshot as RaftSnapshot; -use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs}; +use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, GenSnapRes}; use tikv_util::time::Instant; use super::{ @@ -126,7 +126,7 @@ pub enum PeerMsg { /// Result of applying committed entries. The message can't be lost. ApplyRes(ApplyRes), LogsFetched(FetchedLogs), - SnapshotGenerated(Box), + SnapshotGenerated(GenSnapRes), /// Start the FSM. Start, /// A message only used to notify a peer. diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 1d458d7a73e..6ac567907af 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -26,7 +26,9 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use pd_client::RpcClient; -use raftstore::store::{region_meta::RegionMeta, Config, Transport, RAFT_INIT_LOG_INDEX}; +use raftstore::store::{ + region_meta::RegionMeta, Config, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, +}; use raftstore_v2::{ create_store_batch_system, router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, @@ -206,7 +208,7 @@ impl RunningState { let router = RaftRouter::new(store_id, router); let store_meta = router.store_meta().clone(); - + let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()); system .start( store_id, @@ -216,6 +218,7 @@ impl RunningState { transport.clone(), router.store_router(), store_meta.clone(), + snap_mgr, ) .unwrap(); diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 30ce2102040..2da4869d24b 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -3,16 +3,25 @@ use std::{ fmt, marker::PhantomData, - sync::{atomic::AtomicBool, Arc}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, }; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{Checkpointer, KvEngine, RaftEngine}; use fail::fail_point; -use kvproto::raft_serverpb::RegionLocalState; -use raft::{eraftpb::Snapshot as RaftSnapshot, GetEntriesContext}; -use tikv_util::worker::Runnable; +use file_system::{IoType, WithIoType}; +use kvproto::raft_serverpb::{PeerState, RaftSnapshotData, RegionLocalState}; +use protobuf::Message; +use raft::{eraftpb::Snapshot, GetEntriesContext}; +use tikv_util::{error, info, time::Instant, worker::Runnable}; -use crate::store::{RaftlogFetchResult, MAX_INIT_ENTRY_COUNT}; +use crate::store::{ + util, + worker::metrics::{SNAP_COUNTER, SNAP_HISTOGRAM}, + RaftlogFetchResult, TabletSnapKey, TabletSnapManager, MAX_INIT_ENTRY_COUNT, +}; pub enum ReadTask { FetchLogs { @@ -28,6 +37,7 @@ pub enum ReadTask { // GenTabletSnapshot is used to generate tablet snapshot. GenTabletSnapshot { region_id: u64, + to_peer: u64, tablet: EK, region_state: RegionLocalState, last_applied_term: u64, @@ -53,8 +63,10 @@ impl fmt::Display for ReadTask { "Fetch Raft Logs [region: {}, low: {}, high: {}, max_size: {}] for sending with context {:?}, tried: {}, term: {}", region_id, low, high, max_size, context, tried_cnt, term, ), - ReadTask::GenTabletSnapshot { region_id, .. } => { - write!(f, "Snapshot gen for {}", region_id) + ReadTask::GenTabletSnapshot { + region_id, to_peer, .. + } => { + write!(f, "Snapshot gen for {}, to peer {}", region_id, to_peer) } } } @@ -66,10 +78,12 @@ pub struct FetchedLogs { pub logs: Box, } +pub type GenSnapRes = Option>; + /// A router for receiving fetched result. pub trait AsyncReadNotifier: Send { fn notify_logs_fetched(&self, region_id: u64, fetched: FetchedLogs); - fn notify_snapshot_generated(&self, region_id: u64, snapshot: Box); + fn notify_snapshot_generated(&self, region_id: u64, res: Option>); } pub struct ReadRunner @@ -80,6 +94,7 @@ where { notifier: N, raft_engine: ER, + sanp_mgr: Option, _phantom: PhantomData, } @@ -88,9 +103,34 @@ impl ReadRunner { ReadRunner { notifier, raft_engine, + sanp_mgr: None, _phantom: PhantomData, } } + + #[inline] + pub fn set_snap_mgr(&mut self, mgr: TabletSnapManager) { + self.sanp_mgr = Some(mgr); + } + + #[inline] + fn snap_mgr(&self) -> &TabletSnapManager { + self.sanp_mgr.as_ref().unwrap() + } + + fn generate_snap(&self, snap_key: &TabletSnapKey, tablet: EK) -> crate::Result<()> { + let checkpointer_path = self.snap_mgr().get_tablet_checkpointer_path(snap_key); + if checkpointer_path.as_path().exists() { + // Remove the old checkpoint directly. + std::fs::remove_dir_all(checkpointer_path.as_path())?; + } + // Here not checkpoint to a temporary directory first, the temporary directory + // logic already implemented in rocksdb. + let mut checkpointer = tablet.new_checkpointer()?; + + checkpointer.create_at(checkpointer_path.as_path(), None, 0)?; + Ok(()) + } } impl Runnable for ReadRunner @@ -141,10 +181,58 @@ where }, ); } - ReadTask::GenTabletSnapshot { region_id, .. } => { - // TODO: implement generate tablet snapshot for raftstore v2 - self.notifier - .notify_snapshot_generated(region_id, Box::new(RaftSnapshot::default())); + + ReadTask::GenTabletSnapshot { + region_id, + to_peer, + tablet, + region_state, + last_applied_term, + last_applied_index, + canceled, + for_balance, + } => { + SNAP_COUNTER.generate.start.inc(); + if canceled.load(Ordering::Relaxed) { + info!("generate snap is canceled"; "region_id" => region_id); + SNAP_COUNTER.generate.abort.inc(); + return; + } + let start = Instant::now(); + let _io_type_guard = WithIoType::new(if for_balance { + IoType::LoadBalance + } else { + IoType::Replication + }); + // the state should already checked in apply workers. + assert_ne!(region_state.get_state(), PeerState::Tombstone); + let mut snapshot = Snapshot::default(); + // Set snapshot metadata. + snapshot.mut_metadata().set_term(last_applied_term); + snapshot.mut_metadata().set_index(last_applied_index); + let conf_state = util::conf_state_from_region(region_state.get_region()); + snapshot.mut_metadata().set_conf_state(conf_state); + // Set snapshot data. + let mut snap_data = RaftSnapshotData::default(); + snap_data.set_region(region_state.get_region().clone()); + snap_data.mut_meta().set_for_balance(for_balance); + snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); + + // create checkpointer. + let snap_key = TabletSnapKey::from_region_snap(region_id, to_peer, &snapshot); + let mut res = None; + if let Err(e) = self.generate_snap(&snap_key, tablet) { + error!("failed to create checkpointer"; "region_id" => region_id, "error" => %e); + SNAP_COUNTER.generate.fail.inc(); + } else { + SNAP_COUNTER.generate.success.inc(); + SNAP_HISTOGRAM + .generate + .observe(start.saturating_elapsed_secs()); + res = Some(Box::new(snapshot)) + } + + self.notifier.notify_snapshot_generated(region_id, res); } } } diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 2078ccabafc..5d7455b2d1c 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -30,7 +30,7 @@ mod worker; pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ - read::{AsyncReadNotifier, FetchedLogs, ReadRunner, ReadTask}, + read::{AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask}, write::{ ExtraStates, PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask, @@ -70,17 +70,17 @@ pub use self::{ check_abort, copy_snapshot, snap_io::{apply_sst_cf_file, build_sst_cf_file_list}, ApplyOptions, CfFile, Error as SnapError, SnapEntry, SnapKey, SnapManager, - SnapManagerBuilder, Snapshot, SnapshotStatistics, + SnapManagerBuilder, Snapshot, SnapshotStatistics, TabletSnapKey, TabletSnapManager, }, transport::{CasualRouter, ProposalRouter, SignificantRouter, StoreRouter, Transport}, txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ - AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, - CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, - LocalReader, LocalReaderCore, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, - ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, - SplitConfig, SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, - TLS_LOCAL_READ_METRICS, + metrics::TLS_LOCAL_READ_METRICS, AutoSplitController, Bucket, BucketRange, + CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, + KeyEntry, LocalReadContext, LocalReader, LocalReaderCore, PdTask, ReadDelegate, + ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, StoreMetaDelegate, + TrackVer, WriteStats, }, }; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 56b80c94dcc..0d10b1f36cf 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1190,7 +1190,7 @@ pub mod tests { fsm::apply::compact_raft_log, initial_region, prepare_bootstrap_cluster, worker::{make_region_worker_raftstore_cfg, RegionRunner, RegionTask}, - AsyncReadNotifier, FetchedLogs, + AsyncReadNotifier, FetchedLogs, GenSnapRes, }, }; @@ -1429,7 +1429,7 @@ pub mod tests { self.ch.send(fetched_logs).unwrap(); } - fn notify_snapshot_generated(&self, _region_id: u64, _snapshot: Box) { + fn notify_snapshot_generated(&self, _region_id: u64, _res: GenSnapRes) { unreachable!(); } } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index eda0ffaa9cb..9995582f13c 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1886,6 +1886,81 @@ impl SnapManagerBuilder { } } +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct TabletSnapKey { + pub region_id: u64, + pub to_peer: u64, + pub term: u64, + pub idx: u64, +} + +impl TabletSnapKey { + #[inline] + pub fn new(region_id: u64, to_peer: u64, term: u64, idx: u64) -> TabletSnapKey { + TabletSnapKey { + region_id, + to_peer, + term, + idx, + } + } + + pub fn from_region_snap(region_id: u64, to_peer: u64, snap: &RaftSnapshot) -> TabletSnapKey { + let index = snap.get_metadata().get_index(); + let term = snap.get_metadata().get_term(); + TabletSnapKey::new(region_id, to_peer, term, index) + } +} + +impl Display for TabletSnapKey { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{}_{}_{}_{}", + self.region_id, self.to_peer, self.term, self.idx + ) + } +} + +/// `TabletSnapManager` manager tablet snapshot and shared between raftstore v2. +/// It's similar `SnapManager`, but simpler in tablet version. +/// +/// TODO: +/// - add Limiter to control send/recv speed +/// - clean up expired tablet checkpointer +#[derive(Clone)] +pub struct TabletSnapManager { + // directory to store snapfile. + base: String, +} + +impl TabletSnapManager { + pub fn new>(path: T) -> Self { + Self { base: path.into() } + } + + pub fn init(&self) -> io::Result<()> { + // Initialize the directory if it doesn't exist. + let path = Path::new(&self.base); + if !path.exists() { + file_system::create_dir_all(path)?; + return Ok(()); + } + if !path.is_dir() { + return Err(io::Error::new( + ErrorKind::Other, + format!("{} should be a directory", path.display()), + )); + } + Ok(()) + } + + pub fn get_tablet_checkpointer_path(&self, key: &TabletSnapKey) -> PathBuf { + let prefix = format!("{}_{}", SNAP_GEN_PREFIX, key); + PathBuf::from(&self.base).join(&prefix) + } +} + #[cfg(test)] pub mod tests { use std::{ diff --git a/components/raftstore/src/store/transport.rs b/components/raftstore/src/store/transport.rs index d2bbe921eea..7f10e7cd249 100644 --- a/components/raftstore/src/store/transport.rs +++ b/components/raftstore/src/store/transport.rs @@ -6,10 +6,9 @@ use std::sync::mpsc; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, Snapshot}; use kvproto::raft_serverpb::RaftMessage; -use raft::eraftpb::Snapshot as RaftSnapshot; use tikv_util::{error, warn}; -use super::{AsyncReadNotifier, FetchedLogs}; +use super::{AsyncReadNotifier, FetchedLogs, GenSnapRes}; use crate::{ store::{CasualMessage, PeerMsg, RaftCommand, RaftRouter, SignificantMsg, StoreMsg}, DiscardReason, Error, Result, @@ -182,7 +181,7 @@ impl AsyncReadNotifier for RaftRouter { } #[inline] - fn notify_snapshot_generated(&self, _region_id: u64, _snapshot: Box) { + fn notify_snapshot_generated(&self, _region_id: u64, _snapshot: GenSnapRes) { unreachable!() } } diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index cd7680ebc4a..e021651ba3d 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -6,7 +6,7 @@ mod cleanup_snapshot; mod cleanup_sst; mod compact; mod consistency_check; -mod metrics; +pub mod metrics; mod pd; mod raftlog_gc; mod read; @@ -25,7 +25,6 @@ pub use self::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, - metrics::TLS_LOCAL_READ_METRICS, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, Runner as PdRunner, Task as PdTask, From 6b61f4a1e17e236db53d8ef3efc6338fa5ef159e Mon Sep 17 00:00:00 2001 From: Mike <842725815@qq.com> Date: Mon, 14 Nov 2022 13:19:54 +0800 Subject: [PATCH 0329/1149] Add curl command into docker image (#13765) ref pingcap/tidb-operator#4764, ref tikv/tikv#13781 Add curl command into docker image. Signed-off-by: mikechengwei <842725815@qq.com> --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index eca69ce3b8d..c4ad36dc6e7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -105,6 +105,9 @@ FROM pingcap/alpine-glibc COPY --from=builder /tikv/target/release/tikv-server /tikv-server COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl +RUN apk add --no-cache \ + curl + EXPOSE 20160 20180 ENTRYPOINT ["/tikv-server"] From 23dba4fe66bf8abed8cb5c61a33655fd5edad902 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 14 Nov 2022 14:41:54 +0800 Subject: [PATCH 0330/1149] raftstore-v2: support tablet split (#13709) ref tikv/tikv#12842, ref tikv/tikv#13689 Signed-off-by: SpadeA-Tang --- components/batch-system/src/router.rs | 8 +- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/peer.rs | 1 + components/raftstore-v2/src/fsm/store.rs | 11 +- .../src/operation/command/admin/mod.rs | 7 +- .../src/operation/command/admin/split.rs | 240 ++++++++++++++++-- .../raftstore-v2/src/operation/command/mod.rs | 12 +- components/raftstore-v2/src/operation/life.rs | 51 +++- components/raftstore-v2/src/operation/mod.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 140 ++++++++-- components/raftstore-v2/src/raft/storage.rs | 43 ++++ components/raftstore-v2/src/router/message.rs | 8 + .../tests/integrations/cluster.rs | 31 ++- .../raftstore-v2/tests/integrations/mod.rs | 1 + .../tests/integrations/test_split.rs | 183 +++++++++++++ components/raftstore/src/store/config.rs | 18 ++ 16 files changed, 704 insertions(+), 60 deletions(-) create mode 100644 components/raftstore-v2/tests/integrations/test_split.rs diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index d96e65e1e99..b863f1535f0 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -294,7 +294,7 @@ where } } - /// Force sending message to control FSM. + /// Sending message to control FSM. #[inline] pub fn send_control(&self, msg: C::Message) -> Result<(), TrySendError> { match self.control_box.try_send(msg, &self.control_scheduler) { @@ -309,6 +309,12 @@ where } } + /// Force sending message to control FSM. + #[inline] + pub fn force_send_control(&self, msg: C::Message) -> Result<(), SendError> { + self.control_box.force_send(msg, &self.control_scheduler) + } + /// Try to notify all normal FSMs a message. pub fn broadcast_normal(&self, mut msg_gen: impl FnMut() -> N::Message) { let timer = Instant::now_coarse(); diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 0dbd0ed4b64..605bbb95131 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -265,6 +265,7 @@ impl StorePollerBuilder { fn init(&self) -> Result>> { let mut regions = HashMap::default(); let cfg = self.cfg.value(); + let mut meta = self.store_meta.lock().unwrap(); self.engine .for_each_raft_group::(&mut |region_id| { assert_ne!(region_id, INVALID_ID); @@ -278,8 +279,11 @@ impl StorePollerBuilder { Some(p) => p, None => return Ok(()), }; - let pair = PeerFsm::new(&cfg, &*self.tablet_factory, storage)?; - let prev = regions.insert(region_id, pair); + let (sender, peer_fsm) = PeerFsm::new(&cfg, &*self.tablet_factory, storage)?; + meta.region_read_progress + .insert(region_id, peer_fsm.as_ref().peer().read_progress().clone()); + + let prev = regions.insert(region_id, (sender, peer_fsm)); if let Some((_, p)) = prev { return Err(box_err!( "duplicate region {:?} vs {:?}", diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index b74f8b46b1c..5abdcf31f0f 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -220,6 +220,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } PeerMsg::Tick(tick) => self.on_tick(tick), PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(self.store_ctx, res), + PeerMsg::SplitInit(msg) => self.fsm.peer.on_split_init(self.store_ctx, msg), PeerMsg::Start => self.on_start(), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 3be571bdfbc..0d390d5b51d 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -5,12 +5,17 @@ use std::time::SystemTime; use batch_system::Fsm; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; -use raftstore::store::{Config, ReadDelegate}; +use kvproto::{metapb::Region, raft_serverpb::RaftMessage}; +use raftstore::{ + coprocessor::RegionChangeReason, + store::{Config, ReadDelegate, RegionReadProgressRegistry}, +}; use slog::{o, Logger}; use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; use crate::{ batch::StoreContext, + raft::Peer, router::{StoreMsg, StoreTick}, tablet::CachedTablet, }; @@ -24,6 +29,8 @@ where pub readers: HashMap, /// region_id -> tablet cache pub tablet_caches: HashMap>, + /// region_id -> `RegionReadProgress` + pub region_read_progress: RegionReadProgressRegistry, } impl StoreMeta @@ -35,6 +42,7 @@ where store_id: None, readers: HashMap::default(), tablet_caches: HashMap::default(), + region_read_progress: RegionReadProgressRegistry::new(), } } } @@ -149,6 +157,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { StoreMsg::Start => self.on_start(), StoreMsg::Tick(tick) => self.on_tick(tick), StoreMsg::RaftMessage(msg) => self.fsm.store.on_raft_message(self.store_ctx, msg), + StoreMsg::SplitInit(msg) => self.fsm.store.on_split_init(self.store_ctx, msg), } } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index c3fe2cceded..eb6560d239e 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -4,10 +4,7 @@ mod conf_change; mod split; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::{ - raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest}, - raft_serverpb::PeerState, -}; +use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest}; use protobuf::Message; use raft::prelude::ConfChangeV2; use raftstore::{ @@ -20,7 +17,7 @@ use raftstore::{ Result, }; use slog::info; -pub use split::SplitResult; +pub use split::{SplitInit, SplitResult}; use tikv_util::box_err; use self::conf_change::ConfChangeResult; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 198819cfd7b..2e43e69b44c 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -8,52 +8,58 @@ //! - Nothing special except for validating batch split requests (ex: split keys //! are in ascending order). //! -//! Execution: -//! - exec_batch_split: Create and initialize metapb::region for split regions +//! Apply: +//! - apply_batch_split: Create and initialize metapb::region for split regions //! and derived regions. Then, create checkpoints of the current talbet for //! split regions and derived region to make tablet physical isolated. Update //! the parent region's region state without persistency. Send the new regions //! (including derived region) back to raftstore. //! -//! Result apply: -//! - todo +//! On Apply Result: +//! - on_ready_split_region: Update the relevant in memory meta info of the +//! parent peer, then send to the store the relevant info needed to create and +//! initialize the split regions. //! //! Split peer creation and initlization: -//! - todo -//! -//! Split finish: -//! - todo +//! - on_split_init: In normal cases, the uninitialized split region will be +//! created by the store, and here init it using the data sent from the parent +//! peer. use std::collections::VecDeque; +use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{ - Checkpointer, KvEngine, OpenOptions, RaftEngine, TabletFactory, CF_DEFAULT, SPLIT_PREFIX, + Checkpointer, DeleteStrategy, KvEngine, OpenOptions, RaftEngine, RaftLogBatch, Range, + CF_DEFAULT, SPLIT_PREFIX, }; +use fail::fail_point; +use keys::enc_end_key; use kvproto::{ - metapb::Region, + metapb::{self, Region, RegionEpoch}, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, raft_serverpb::RegionLocalState, }; use protobuf::Message; +use raft::RawNode; use raftstore::{ - coprocessor::split_observer::{is_valid_split_key, strip_timestamp_if_exists}, + coprocessor::RegionChangeReason, store::{ fsm::apply::validate_batch_split, metrics::PEER_ADMIN_CMD_COUNTER, util::{self, KeysInfoFormatter}, - PeerStat, ProposalContext, RAFT_INIT_LOG_INDEX, + PeerPessimisticLocks, PeerStat, ProposalContext, RAFT_INIT_LOG_INDEX, }, Result, }; -use slog::{info, warn, Logger}; +use slog::{error, info, warn, Logger}; use tikv_util::box_err; use crate::{ batch::StoreContext, - fsm::ApplyResReporter, + fsm::{ApplyResReporter, PeerFsmDelegate}, operation::AdminCmdResult, - raft::{Apply, Peer}, - router::ApplyRes, + raft::{write_initial_states, Apply, Peer, Storage}, + router::{ApplyRes, PeerMsg, StoreMsg}, }; #[derive(Debug)] @@ -63,14 +69,23 @@ pub struct SplitResult { pub derived_index: usize, pub tablet_index: u64, } +pub struct SplitInit { + /// Split region + pub region: metapb::Region, + pub check_split: bool, + pub parent_is_leader: bool, + + /// In-memory pessimistic locks that should be inherited from parent region + pub locks: PeerPessimisticLocks, +} impl Peer { pub fn propose_split( &mut self, store_ctx: &mut StoreContext, - mut req: RaftCmdRequest, + req: RaftCmdRequest, ) -> Result { - validate_batch_split(req.mut_admin_request(), self.region())?; + validate_batch_split(req.get_admin_request(), self.region())?; // We rely on ConflictChecker to detect conflicts, so no need to set proposal // context. let data = req.write_to_bytes().unwrap(); @@ -247,6 +262,187 @@ impl Apply { } } +impl Peer { + pub fn on_ready_split_region( + &mut self, + store_ctx: &mut StoreContext, + derived_index: usize, + tablet_index: u64, + regions: Vec, + ) { + fail_point!("on_split", self.peer().get_store_id() == 3, |_| {}); + + let derived = ®ions[derived_index]; + let derived_epoch = derived.get_region_epoch().clone(); + let region_id = derived.get_id(); + + // Group in-memory pessimistic locks in the original region into new regions. + // The locks of new regions will be put into the corresponding new regions + // later. And the locks belonging to the old region will stay in the original + // map. + let region_locks = { + let mut pessimistic_locks = self.txn_ext().pessimistic_locks.write(); + info!(self.logger, "moving {} locks to new regions", pessimistic_locks.len();); + // Update the version so the concurrent reader will fail due to EpochNotMatch + // instead of PessimisticLockNotFound. + pessimistic_locks.version = derived_epoch.get_version(); + pessimistic_locks.group_by_regions(®ions, derived) + }; + fail_point!("on_split_invalidate_locks"); + + // Roughly estimate the size and keys for new regions. + let new_region_count = regions.len() as u64; + { + let mut meta = store_ctx.store_meta.lock().unwrap(); + let reader = meta.readers.get_mut(&derived.get_id()).unwrap(); + self.set_region( + reader, + derived.clone(), + RegionChangeReason::Split, + tablet_index, + ); + } + + self.post_split(); + + let last_region_id = regions.last().unwrap().get_id(); + for (new_region, locks) in regions.into_iter().zip(region_locks) { + let new_region_id = new_region.get_id(); + if new_region_id == region_id { + continue; + } + + let split_init = PeerMsg::SplitInit(Box::new(SplitInit { + region: new_region, + parent_is_leader: self.is_leader(), + check_split: last_region_id == new_region_id, + locks, + })); + + // First, send init msg to peer directly. Returning error means the peer is not + // existed in which case we should redirect it to the store. + match store_ctx.router.force_send(new_region_id, split_init) { + Ok(_) => {} + Err(SendError(PeerMsg::SplitInit(msg))) => { + store_ctx + .router + .force_send_control(StoreMsg::SplitInit(msg)) + .unwrap_or_else(|e| { + panic!( + "{:?} fails to send split peer intialization msg to store : {:?}", + self.logger.list(), + e + ) + }); + } + _ => unreachable!(), + } + } + } + + pub fn on_split_init( + &mut self, + store_ctx: &mut StoreContext, + split_init: Box, + ) { + let region_id = split_init.region.id; + let replace = split_init.region.get_region_epoch().get_version() + > self + .storage() + .region_state() + .get_region() + .get_region_epoch() + .get_version(); + + if !self.storage().is_initialized() || replace { + let split_temp_path = store_ctx.tablet_factory.tablet_path_with_prefix( + SPLIT_PREFIX, + region_id, + RAFT_INIT_LOG_INDEX, + ); + + let tablet = store_ctx + .tablet_factory + .load_tablet(&split_temp_path, region_id, RAFT_INIT_LOG_INDEX) + .unwrap_or_else(|e| { + panic!( + "{:?} fails to load tablet {:?} :{:?}", + self.logger.list(), + split_temp_path, + e + ) + }); + + self.tablet_mut().set(tablet); + + let storage = Storage::with_split( + self.peer().get_store_id(), + &split_init.region, + store_ctx.engine.clone(), + store_ctx.read_scheduler.clone(), + &store_ctx.logger, + ) + .unwrap_or_else(|e| panic!("fail to create storage: {:?}", e)) + .unwrap(); + + let applied_index = storage.apply_state().get_applied_index(); + let peer_id = storage.peer().get_id(); + let raft_cfg = store_ctx.cfg.new_raft_config(peer_id, applied_index); + + let mut raft_group = RawNode::new(&raft_cfg, storage, &self.logger).unwrap(); + // If this region has only one peer and I am the one, campaign directly. + if split_init.region.get_peers().len() == 1 { + raft_group.campaign().unwrap(); + self.set_has_ready(); + } + self.set_raft_group(raft_group); + } else { + // todo: when reaching here (peer is initalized before and cannot be replaced), + // it is much complexer. + return; + } + + { + let mut meta = store_ctx.store_meta.lock().unwrap(); + + info!( + self.logger, + "init split region"; + "region" => ?split_init.region, + ); + + // todo: GlobalReplicationState + + for p in split_init.region.get_peers() { + self.insert_peer_cache(p.clone()); + } + + if split_init.parent_is_leader { + if self.maybe_campaign() { + self.set_has_ready(); + } + + *self.txn_ext().pessimistic_locks.write() = split_init.locks; + // The new peer is likely to become leader, send a heartbeat immediately to + // reduce client query miss. + self.heartbeat_pd(store_ctx); + } + + meta.tablet_caches.insert(region_id, self.tablet().clone()); + meta.readers + .insert(region_id, self.generate_read_delegate()); + meta.region_read_progress + .insert(region_id, self.read_progress().clone()); + } + + if split_init.check_split { + // todo: check if the last region needs to split again + } + + self.schedule_apply_fsm(store_ctx); + } +} + #[cfg(test)] mod test { use std::sync::{ @@ -260,7 +456,7 @@ mod test { kv::TestTabletFactoryV2, raft, }; - use engine_traits::{CfOptionsExt, Peekable, WriteBatch, ALL_CFS}; + use engine_traits::{CfOptionsExt, Peekable, TabletFactory, WriteBatch, ALL_CFS}; use futures::channel::mpsc::unbounded; use kvproto::{ metapb::RegionEpoch, @@ -419,7 +615,7 @@ mod test { region_state.set_region(region.clone()); region_state.set_tablet_index(5); - let (read_scheduler, rx) = dummy_scheduler(); + let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); let mut apply = Apply::new( region @@ -608,7 +804,9 @@ mod test { // Split will create checkpoint tablet, so if there are some writes before // split, they should be flushed immediately. apply.apply_put(CF_DEFAULT, b"k04", b"v4").unwrap(); - assert!(!apply.write_batch_mut().as_ref().unwrap().is_empty()); + assert!(!WriteBatch::is_empty( + apply.write_batch_mut().as_ref().unwrap() + )); splits.mut_requests().clear(); splits .mut_requests() diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 75575e9a19f..81365a162ec 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -39,7 +39,8 @@ use raftstore::{ local_metrics::RaftMetrics, metrics::*, msg::ErrorCallback, - util, WriteCallback, + util::{self, admin_cmd_epoch_lookup}, + WriteCallback, }, Error, Result, }; @@ -58,7 +59,7 @@ mod admin; mod control; mod write; -pub use admin::AdminCmdResult; +pub use admin::{AdminCmdResult, SplitInit, SplitResult}; pub use control::ProposalControl; pub use write::{SimpleWriteDecoder, SimpleWriteEncoder}; @@ -294,14 +295,21 @@ impl Peer { // region. return; } + for admin_res in apply_res.admin_result { match admin_res { AdminCmdResult::ConfChange(conf_change) => { self.on_apply_res_conf_change(conf_change) } + AdminCmdResult::SplitRegion(SplitResult { + regions, + derived_index, + tablet_index, + }) => self.on_ready_split_region(ctx, derived_index, tablet_index, regions), AdminCmdResult::SplitRegion(_) => unimplemented!(), } } + self.raft_group_mut() .advance_apply_to(apply_res.applied_index); self.proposal_control_advance_apply(apply_res.applied_index); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 7be70a9afe7..12c7d4ec544 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -13,16 +13,17 @@ use std::cmp; use batch_system::BasicMailbox; -use crossbeam::channel::TrySendError; +use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ metapb::Region, raft_serverpb::{PeerState, RaftMessage}, }; use raftstore::store::{util, ExtraStates, WriteTask}; -use slog::{debug, error, info}; +use slog::{debug, error, info, warn}; use tikv_util::store::find_peer; +use super::command::SplitInit; use crate::{ batch::StoreContext, fsm::{PeerFsm, Store}, @@ -89,6 +90,44 @@ impl DestroyProgress { } impl Store { + /// The method is called during split. + /// The creation process is: + /// 1. create an uninitialized peer if not existed before + /// 2. initialize the peer by the information sent from parent peer + #[inline] + pub fn on_split_init( + &mut self, + ctx: &mut StoreContext, + msg: Box, + ) where + EK: KvEngine, + ER: RaftEngine, + { + let region_id = msg.region.id; + let mut raft_msg = Box::new(RaftMessage::default()); + raft_msg.set_region_id(region_id); + raft_msg.set_region_epoch(msg.region.get_region_epoch().clone()); + raft_msg.set_to_peer( + msg.region + .get_peers() + .iter() + .find(|p| p.get_store_id() == self.store_id()) + .unwrap() + .clone(), + ); + + // It will create the peer if it does not exist + self.on_raft_message(ctx, raft_msg); + + if let Err(SendError(m)) = ctx.router.force_send(region_id, PeerMsg::SplitInit(msg)) { + warn!( + self.logger(), + "Split peer is destroyed before sending the intialization msg"; + "split init msg" => ?m, + ) + } + } + /// When a message's recipient doesn't exist, it will be redirected to /// store. Store is responsible for checking if it's neccessary to create /// a peer to handle the message. @@ -174,8 +213,14 @@ impl Store { let mut region = Region::default(); region.set_id(region_id); region.set_region_epoch(from_epoch.clone()); + // Peer list doesn't have to be complete, as it's uninitialized. - region.mut_peers().push(from_peer.clone()); + // + // If the id of the from_peer is INVALID_ID, this msg must be sent from parent + // peer in the split execution in which case we do not add it into the region. + if from_peer.id != raft::INVALID_ID { + region.mut_peers().push(from_peer.clone()); + } region.mut_peers().push(to_peer.clone()); // We don't set the region range here as we allow range conflict. let (tx, fsm) = match Storage::uninit( diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index a110f4bf330..84835231398 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -11,4 +11,4 @@ pub use command::{ pub use life::DestroyProgress; pub use ready::{AsyncWriter, GenSnapTask, SnapState}; -pub(crate) use self::query::LocalReader; +pub(crate) use self::{command::SplitInit, query::LocalReader}; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index dd53f47e152..6ebb3ed2056 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -2,21 +2,35 @@ use std::{mem, sync::Arc}; +use collections::HashMap; use crossbeam::atomic::AtomicCell; use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; -use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb}; +use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; -use raftstore::store::{ - util::{Lease, RegionReadProgress}, - Config, EntryStorage, ProposalQueue, ReadDelegate, ReadIndexQueue, TrackVer, TxnExt, +use raftstore::{ + coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, + store::{ + fsm::Proposal, + util::{Lease, RegionReadProgress}, + Config, EntryStorage, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, ReadProgress, + TxnExt, + }, + Error, +}; +use slog::{debug, error, info, o, warn, Logger}; +use tikv_util::{ + box_err, + config::ReadableSize, + time::{monotonic_raw_now, Instant as TiInstant}, + worker::Scheduler, + Either, }; -use slog::Logger; -use tikv_util::{box_err, config::ReadableSize}; use time::Timespec; use super::{storage::Storage, Apply}; use crate::{ + batch::StoreContext, fsm::{ApplyFsm, ApplyScheduler}, operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, router::{CmdResChannel, QueryResChannel}, @@ -55,6 +69,8 @@ pub struct Peer { /// region buckets. region_buckets: Option, + last_region_buckets: Option, + /// Transaction extensions related to this peer. txn_ext: Arc, txn_extra_op: Arc>, @@ -76,22 +92,7 @@ impl Peer { let applied_index = storage.apply_state().get_applied_index(); let peer_id = storage.peer().get_id(); - - let raft_cfg = raft::Config { - id: peer_id, - election_tick: cfg.raft_election_timeout_ticks, - heartbeat_tick: cfg.raft_heartbeat_ticks, - min_election_tick: cfg.raft_min_election_timeout_ticks, - max_election_tick: cfg.raft_max_election_timeout_ticks, - max_size_per_msg: cfg.raft_max_size_per_msg.0, - max_inflight_msgs: cfg.raft_max_inflight_msgs, - applied: applied_index, - check_quorum: true, - skip_bcast_commit: true, - pre_vote: cfg.prevote, - max_committed_size_per_ready: ReadableSize::mb(16).0, - ..Default::default() - }; + let raft_cfg = cfg.new_raft_config(peer_id, applied_index); let region_id = storage.region().get_id(); let tablet_index = storage.region_state().get_tablet_index(); @@ -143,6 +144,7 @@ impl Peer { cfg.renew_leader_lease_advance_duration(), ), region_buckets: None, + last_region_buckets: None, txn_ext: Arc::default(), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), proposal_control: ProposalControl::new(0), @@ -173,6 +175,63 @@ impl Peer { self.region().get_id() } + /// Set the region of a peer. + /// + /// This will update the region of the peer, caller must ensure the region + /// has been preserved in a durable device. + pub fn set_region( + &mut self, + // host: &CoprocessorHost, + reader: &mut ReadDelegate, + region: metapb::Region, + reason: RegionChangeReason, + tablet_index: u64, + ) { + if self.region().get_region_epoch().get_version() < region.get_region_epoch().get_version() + { + // Epoch version changed, disable read on the local reader for this region. + self.leader_lease.expire_remote_lease(); + } + + let mut region_state = RegionLocalState::default(); + region_state.set_region(region.clone()); + region_state.set_tablet_index(tablet_index); + region_state.set_state(self.storage().region_state().get_state()); + self.storage_mut().set_region_state(region_state); + + let progress = ReadProgress::region(region); + // Always update read delegate's region to avoid stale region info after a + // follower becoming a leader. + self.maybe_update_read_progress(reader, progress); + + if self.is_leader() { + // Unlike v1, we should renew remote lease if it's leader. This is because v2 + // only provides read in local reader which requires passing the lease check. If + // lease check fails, it sends query to raftstore to make it renew the remote + // lease. However, raftstore will answer immediately if the `bound` in + // `leader_lease` is valid, so the remote lease will not be updated. + if let Some(progress) = self + .leader_lease + .maybe_new_remote_lease(self.term()) + .map(ReadProgress::leader_lease) + { + self.maybe_update_read_progress(reader, progress); + } + } + + // Update leader info + self.read_progress + .update_leader_info(self.leader_id(), self.term(), self.region()); + + { + let mut pessimistic_locks = self.txn_ext.pessimistic_locks.write(); + pessimistic_locks.term = self.term(); + pessimistic_locks.version = self.region().get_region_epoch().get_version(); + } + + // todo: CoprocessorHost + } + #[inline] pub fn peer(&self) -> &metapb::Peer { self.raft_group.store().peer() @@ -253,6 +312,11 @@ impl Peer { &mut self.raft_group } + #[inline] + pub fn set_raft_group(&mut self, raft_group: RawNode>) { + self.raft_group = raft_group; + } + /// Mark the peer has a ready so it will be checked at the end of every /// processing round. #[inline] @@ -394,6 +458,38 @@ impl Peer { self.apply_scheduler = Some(apply_scheduler); } + #[inline] + pub fn post_split(&mut self) { + self.reset_region_buckets(); + } + + pub fn reset_region_buckets(&mut self) { + if self.region_buckets.is_some() { + self.last_region_buckets = self.region_buckets.take(); + } + } + + pub fn maybe_campaign(&mut self) -> bool { + if self.region().get_peers().len() <= 1 { + // The peer campaigned when it was created, no need to do it again. + return false; + } + + // If last peer is the leader of the region before split, it's intuitional for + // it to become the leader of new split region. + let _ = self.raft_group.campaign(); + true + } + + #[inline] + pub fn txn_ext(&self) -> &Arc { + &self.txn_ext + } + + pub fn heartbeat_pd(&self, store_ctx: &StoreContext) { + // todo + } + pub fn generate_read_delegate(&self) -> ReadDelegate { let peer_id = self.peer().get_id(); diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 8abeeeef73d..01285cc5a46 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -200,6 +200,49 @@ impl Storage { .map(Some) } + /// Creates a new storage for split peer. + /// + /// Except for region local state which uses the `region` provided with the + /// inital tablet index, all uses the inital states. + pub fn with_split( + store_id: u64, + region: &metapb::Region, + engine: ER, + read_scheduler: Scheduler>, + logger: &Logger, + ) -> Result>> { + let mut region_state = RegionLocalState::default(); + region_state.set_region(region.clone()); + region_state.set_state(PeerState::Normal); + region_state.set_tablet_index(RAFT_INIT_LOG_INDEX); + + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(RAFT_INIT_LOG_INDEX); + apply_state + .mut_truncated_state() + .set_index(RAFT_INIT_LOG_INDEX); + apply_state + .mut_truncated_state() + .set_term(RAFT_INIT_LOG_TERM); + + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(RAFT_INIT_LOG_INDEX); + raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); + raft_state.mut_hard_state().set_commit(RAFT_INIT_LOG_INDEX); + + Self::create( + store_id, + region_state, + raft_state, + apply_state, + engine, + read_scheduler, + true, + logger, + ) + .map(Some) + } + fn create( store_id: u64, region_state: RegionLocalState, diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index cda9e971c66..a4681d8a873 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -15,6 +15,7 @@ use super::{ }, ApplyRes, }; +use crate::operation::SplitInit; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] @@ -129,6 +130,8 @@ pub enum PeerMsg { SnapshotGenerated(GenSnapRes), /// Start the FSM. Start, + /// Messages from peer to peer in the same store + SplitInit(Box), /// A message only used to notify a peer. Noop, /// A message that indicates an asynchronous write has finished. @@ -167,6 +170,9 @@ impl fmt::Debug for PeerMsg { }, PeerMsg::ApplyRes(res) => write!(fmt, "ApplyRes {:?}", res), PeerMsg::Start => write!(fmt, "Startup"), + PeerMsg::SplitInit(_) => { + write!(fmt, "Split initialization") + } PeerMsg::Noop => write!(fmt, "Noop"), PeerMsg::Persisted { peer_id, @@ -187,6 +193,7 @@ impl fmt::Debug for PeerMsg { pub enum StoreMsg { RaftMessage(Box), + SplitInit(Box), Tick(StoreTick), Start, } @@ -195,6 +202,7 @@ impl fmt::Debug for StoreMsg { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { StoreMsg::RaftMessage(_) => write!(fmt, "Raft Message"), + StoreMsg::SplitInit(_) => write!(fmt, "Split initialization"), StoreMsg::Tick(tick) => write!(fmt, "StoreTick {:?}", tick), StoreMsg::Start => write!(fmt, "Start store"), } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 6ac567907af..d99c982fc97 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -21,13 +21,14 @@ use engine_test::{ use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; use futures::executor::block_on; use kvproto::{ - metapb::Store, + metapb::{self, RegionEpoch, Store}, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; use pd_client::RpcClient; use raftstore::store::{ - region_meta::RegionMeta, Config, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, + region_meta::{RegionLocalState, RegionMeta}, + Config, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, }; use raftstore_v2::{ create_store_batch_system, @@ -147,6 +148,32 @@ impl TestRouter { req.mut_header().set_term(meta.raft_status.hard_state.term); req } + + pub fn region_detail(&self, region_id: u64) -> metapb::Region { + let RegionLocalState { + id, + start_key, + end_key, + epoch, + peers, + .. + } = self + .must_query_debug_info(region_id, Duration::from_secs(1)) + .unwrap() + .region_state; + let mut region = metapb::Region::default(); + region.set_id(id); + region.set_start_key(start_key); + region.set_end_key(end_key); + let mut region_epoch = RegionEpoch::default(); + region_epoch.set_conf_ver(epoch.conf_ver); + region_epoch.set_version(epoch.version); + region.set_region_epoch(region_epoch); + for peer in peers { + region.mut_peers().push(new_peer(peer.store_id, peer.id)); + } + region + } } pub struct RunningState { diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index 740e64f7e29..4fb9ebcc323 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -12,4 +12,5 @@ mod test_basic_write; mod test_conf_change; mod test_life; mod test_read; +mod test_split; mod test_status; diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs new file mode 100644 index 00000000000..97487a5d0c2 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -0,0 +1,183 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{thread, time::Duration}; + +use futures::executor::block_on; +use kvproto::{ + metapb, pdpb, + raft_cmdpb::{ + AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, + }, +}; +use raftstore_v2::router::PeerMsg; +use tikv_util::store::new_peer; + +use crate::cluster::{Cluster, TestRouter}; + +fn new_batch_split_region_request( + split_keys: Vec>, + ids: Vec, + right_derive: bool, +) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::BatchSplit); + req.mut_splits().set_right_derive(right_derive); + let mut requests = Vec::with_capacity(ids.len()); + for (mut id, key) in ids.into_iter().zip(split_keys) { + let mut split = SplitRequest::default(); + split.set_split_key(key); + split.set_new_region_id(id.get_new_region_id()); + split.set_new_peer_ids(id.take_new_peer_ids()); + requests.push(split); + } + req.mut_splits().set_requests(requests.into()); + req +} + +fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { + let (msg, sub) = PeerMsg::raft_command(req); + router.send(region_id, msg).unwrap(); + block_on(sub.result()).unwrap(); + + // todo: when persistent implementation is ready, we can use tablet index of + // the parent to check whether the split is done. Now, just sleep a second. + thread::sleep(Duration::from_secs(1)); +} + +fn put(router: &mut TestRouter, region_id: u64, key: &[u8]) -> RaftCmdResponse { + let mut req = router.new_request_for(region_id); + + let mut put_req = Request::default(); + put_req.set_cmd_type(CmdType::Put); + put_req.mut_put().set_key(key.to_vec()); + put_req.mut_put().set_value(b"v1".to_vec()); + req.mut_requests().push(put_req); + + let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + router.send(region_id, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + block_on(sub.result()).unwrap() +} + +// Split the region according to the parameters +// return the updated original region +fn split_region( + router: &mut TestRouter, + region: metapb::Region, + peer: metapb::Peer, + split_region_id: u64, + split_peer: metapb::Peer, + left_key: &[u8], + right_key: &[u8], + split_key: &[u8], + right_derive: bool, +) -> (metapb::Region, metapb::Region) { + let region_id = region.id; + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + req.mut_header().set_peer(peer); + + let mut split_id = pdpb::SplitId::new(); + split_id.new_region_id = split_region_id; + split_id.new_peer_ids = vec![split_peer.id]; + let admin_req = + new_batch_split_region_request(vec![split_key.to_vec()], vec![split_id], right_derive); + req.mut_requests().clear(); + req.set_admin_request(admin_req); + + must_split(region_id, req, router); + + let (left, right) = if !right_derive { + ( + router.region_detail(region_id), + router.region_detail(split_region_id), + ) + } else { + ( + router.region_detail(split_region_id), + router.region_detail(region_id), + ) + }; + + // The end key of left region is `split_key` + // So writing `right_key` will fail + let resp = put(router, left.id, right_key); + assert!(resp.get_header().has_error(), "{:?}", resp); + // But `left_key` should succeed + let resp = put(router, left.id, left_key); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + // Mirror of above case + let resp = put(router, right.id, left_key); + assert!(resp.get_header().has_error(), "{:?}", resp); + let resp = put(router, right.id, right_key); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + assert_eq!(left.get_end_key(), split_key); + assert_eq!(right.get_start_key(), split_key); + assert_eq!(region.get_start_key(), left.get_start_key()); + assert_eq!(region.get_end_key(), right.get_end_key()); + + (left, right) +} + +#[test] +fn test_split() { + let cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let mut router = cluster.router(0); + // let factory = cluster.node(0).tablet_factory(); + + let region_id = 2; + let peer = new_peer(store_id, 3); + let region = router.region_detail(region_id); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + // Region 2 ["", ""] peer(1, 3) + // -> Region 2 ["", "k22"] peer(1, 3) + // Region 1000 ["k22", ""] peer(1, 10) + let (left, right) = split_region( + &mut router, + region, + peer.clone(), + 1000, + new_peer(store_id, 10), + b"k11", + b"k33", + b"k22", + false, + ); + + // Region 2 ["", "k22"] peer(1, 3) + // -> Region 2 ["", "k11"] peer(1, 3) + // Region 1001 ["k11", "k22"] peer(1, 11) + let _ = split_region( + &mut router, + left, + peer, + 1001, + new_peer(store_id, 11), + b"k00", + b"k11", + b"k11", + false, + ); + + // Region 1000 ["k22", ""] peer(1, 10) + // -> Region 1000 ["k22", "k33"] peer(1, 10) + // Region 1002 ["k33", ""] peer(1, 12) + let _ = split_region( + &mut router, + right, + new_peer(store_id, 10), + 1002, + new_peer(store_id, 12), + b"k22", + b"k33", + b"k33", + false, + ); +} diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index cbd83d0b85d..70cf6b67d1f 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -438,6 +438,24 @@ impl Config { Config::default() } + pub fn new_raft_config(&self, peer_id: u64, applied_index: u64) -> raft::Config { + raft::Config { + id: peer_id, + election_tick: self.raft_election_timeout_ticks, + heartbeat_tick: self.raft_heartbeat_ticks, + min_election_tick: self.raft_min_election_timeout_ticks, + max_election_tick: self.raft_max_election_timeout_ticks, + max_size_per_msg: self.raft_max_size_per_msg.0, + max_inflight_msgs: self.raft_max_inflight_msgs, + applied: applied_index, + check_quorum: true, + skip_bcast_commit: true, + pre_vote: self.prevote, + max_committed_size_per_ready: ReadableSize::mb(16).0, + ..Default::default() + } + } + pub fn raft_store_max_leader_lease(&self) -> TimeDuration { TimeDuration::from_std(self.raft_store_max_leader_lease.0).unwrap() } From a80ab9880d6c25fe9b962e582846360dd229783b Mon Sep 17 00:00:00 2001 From: YangKeao Date: Mon, 14 Nov 2022 21:57:55 -0500 Subject: [PATCH 0331/1149] copr: fix _ pattern in like behavior for old collation (#13785) close tikv/tikv#13769 Signed-off-by: YangKeao Co-authored-by: Ti Chi Robot --- .../src/codec/collation/charset.rs | 8 + .../src/codec/collation/mod.rs | 28 +++ components/tidb_query_expr/src/impl_like.rs | 185 +++++++++++++++--- components/tidb_query_expr/src/lib.rs | 47 ++++- 4 files changed, 233 insertions(+), 35 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/collation/charset.rs b/components/tidb_query_datatype/src/codec/collation/charset.rs index 482e19cb999..9ea76f16b92 100644 --- a/components/tidb_query_datatype/src/codec/collation/charset.rs +++ b/components/tidb_query_datatype/src/codec/collation/charset.rs @@ -22,6 +22,10 @@ impl Charset for CharsetBinary { Some((data[0], 1)) } } + + fn charset() -> crate::Charset { + crate::Charset::Binary + } } pub struct CharsetUtf8mb4; @@ -48,6 +52,10 @@ impl Charset for CharsetUtf8mb4 { }) } } + + fn charset() -> crate::Charset { + crate::Charset::Utf8Mb4 + } } // gbk character data actually stored with utf8mb4 character encoding. diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index cdc21cbe35a..9fbef4f1ee2 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -41,6 +41,32 @@ macro_rules! match_template_collator { }} } +#[macro_export] +macro_rules! match_template_multiple_collators { + ((), (), $($tail:tt)*) => { + $($tail)* + }; + (($first:tt), ($match_exprs:tt), $($tail:tt)*) => { + match_template_multiple_collators! { + ($first,), ($match_exprs,), $($tail)* + } + }; + (($first:tt, $($t:tt)*), ($first_match_expr:tt, $($match_exprs:tt)*), $($tail:tt)*) => {{ + #[allow(unused_imports)] + use $crate::codec::collation::collator::*; + + match_template_collator! { + $first, match $first_match_expr { + Collation::$first => { + match_template_multiple_collators! { + ($($t)*), ($($match_exprs)*), $($tail)* + } + } + } + } + }}; +} + #[macro_export] macro_rules! match_template_charset { ($t:tt, $($tail:tt)*) => {{ @@ -67,6 +93,8 @@ pub trait Charset { fn validate(bstr: &[u8]) -> Result<()>; fn decode_one(data: &[u8]) -> Option<(Self::Char, usize)>; + + fn charset() -> crate::Charset; } pub trait Collator: 'static + std::marker::Send + std::marker::Sync + std::fmt::Debug { diff --git a/components/tidb_query_expr/src/impl_like.rs b/components/tidb_query_expr/src/impl_like.rs index 39dce827650..2fe99017fe0 100644 --- a/components/tidb_query_expr/src/impl_like.rs +++ b/components/tidb_query_expr/src/impl_like.rs @@ -6,17 +6,21 @@ use tidb_query_datatype::codec::{collation::*, data_type::*}; #[rpn_fn] #[inline] -pub fn like(target: BytesRef, pattern: BytesRef, escape: &i64) -> Result> { +pub fn like( + target: BytesRef, + pattern: BytesRef, + escape: &i64, +) -> Result> { let escape = *escape as u32; // current search positions in pattern and target. let (mut px, mut tx) = (0, 0); // positions for backtrace. let (mut next_px, mut next_tx) = (0, 0); while px < pattern.len() || tx < target.len() { - if let Some((c, mut poff)) = C::Charset::decode_one(&pattern[px..]) { + if let Some((c, mut poff)) = CS::decode_one(&pattern[px..]) { let code: u32 = c.into(); if code == '_' as u32 { - if let Some((_, toff)) = C::Charset::decode_one(&target[tx..]) { + if let Some((_, toff)) = CS::decode_one(&target[tx..]) { px += poff; tx += toff; continue; @@ -26,7 +30,7 @@ pub fn like(target: BytesRef, pattern: BytesRef, escape: &i64) -> R next_px = px; px += poff; next_tx = tx; - next_tx += if let Some((_, toff)) = C::Charset::decode_one(&target[tx..]) { + next_tx += if let Some((_, toff)) = CS::decode_one(&target[tx..]) { toff } else { 1 @@ -35,13 +39,13 @@ pub fn like(target: BytesRef, pattern: BytesRef, escape: &i64) -> R } else { if code == escape && px + poff < pattern.len() { px += poff; - poff = if let Some((_, off)) = C::Charset::decode_one(&pattern[px..]) { + poff = if let Some((_, off)) = CS::decode_one(&pattern[px..]) { off } else { break; } } - if let Some((_, toff)) = C::Charset::decode_one(&target[tx..]) { + if let Some((_, toff)) = CS::decode_one(&target[tx..]) { if let Ok(std::cmp::Ordering::Equal) = C::sort_compare(&target[tx..tx + toff], &pattern[px..px + poff]) { @@ -154,20 +158,6 @@ mod tests { Collation::Binary, Some(0), ), - ( - r#"夏威夷吉他"#, - r#"_____"#, - '\\', - Collation::Binary, - Some(0), - ), - ( - r#"🐶🍐🍳➕🥜🎗🐜"#, - r#"_______"#, - '\\', - Collation::Utf8Mb4Bin, - Some(1), - ), ( r#"IpHONE"#, r#"iPhone"#, @@ -182,14 +172,6 @@ mod tests { Collation::Utf8Mb4GeneralCi, Some(1), ), - (r#"🕺_"#, r#"🕺🕺🕺_"#, '🕺', Collation::Binary, Some(0)), - ( - r#"🕺_"#, - r#"🕺🕺🕺_"#, - '🕺', - Collation::Utf8Mb4GeneralCi, - Some(1), - ), (r#"baab"#, r#"b_%b"#, '\\', Collation::Utf8Mb4Bin, Some(1)), (r#"baab"#, r#"b%_b"#, '\\', Collation::Utf8Mb4Bin, Some(1)), (r#"bab"#, r#"b_%b"#, '\\', Collation::Utf8Mb4Bin, Some(1)), @@ -238,4 +220,151 @@ mod tests { ); } } + + #[test] + fn test_like_wide_character() { + let cases = vec![ + ( + r#"夏威夷吉他"#, + r#"_____"#, + '\\', + Collation::Binary, + Collation::Binary, + Collation::Binary, + Some(0), + ), + ( + r#"🐶🍐🍳➕🥜🎗🐜"#, + r#"_______"#, + '\\', + Collation::Utf8Mb4Bin, + Collation::Utf8Mb4Bin, + Collation::Utf8Mb4Bin, + Some(1), + ), + ( + r#"🕺_"#, + r#"🕺🕺🕺_"#, + '🕺', + Collation::Binary, + Collation::Binary, + Collation::Binary, + Some(0), + ), + ( + r#"🕺_"#, + r#"🕺🕺🕺_"#, + '🕺', + Collation::Utf8Mb4GeneralCi, + Collation::Utf8Mb4GeneralCi, + Collation::Utf8Mb4GeneralCi, + Some(1), + ), + // When the new collation framework is not enabled, the collation + // will always be binary Some related tests are added here + ( + r#"夏威夷吉他"#, + r#"_____"#, + '\\', + Collation::Binary, + Collation::Utf8Mb4Bin, + Collation::Utf8Mb4Bin, + Some(1), + ), + ( + r#"🐶🍐🍳➕🥜🎗🐜"#, + r#"_______"#, + '\\', + Collation::Binary, + Collation::Utf8Mb4Bin, + Collation::Utf8Mb4Bin, + Some(1), + ), + ( + r#"🕺_"#, + r#"🕺🕺🕺_"#, + '🕺', + Collation::Binary, + Collation::Binary, + Collation::Binary, + Some(0), + ), + ( + r#"🕺_"#, + r#"🕺🕺🕺_"#, + '🕺', + Collation::Binary, + Collation::Utf8Mb4Bin, + Collation::Utf8Mb4Bin, + Some(1), + ), + // Will not match, because '_' matches only one byte. + ( + r#"测试"#, + r#"测_"#, + '\\', + Collation::Binary, + Collation::Utf8Mb4Bin, + Collation::Binary, + Some(0), + ), + // Both of them should be decoded with binary charset, so that we'll + // compare byte with byte, but not comparing a long character with a + // byte. + ( + r#"测试"#, + r#"测%"#, + '\\', + Collation::Binary, + Collation::Utf8Mb4Bin, + Collation::Binary, + Some(1), + ), + // This can happen when the new collation is not enabled, and TiDB + // doesn't push down the collation information. Using binary + // comparing order is fine, but we'll need to decode strings with + // their own charset (so '_' could match single character, rather + // than single byte). + ( + r#"测试"#, + r#"测_"#, + '\\', + Collation::Binary, + Collation::Utf8Mb4Bin, + Collation::Utf8Mb4Bin, + Some(1), + ), + ]; + for (target, pattern, escape, collation, target_collation, pattern_collation, expected) in + cases + { + let output = RpnFnScalarEvaluator::new() + .return_field_type( + FieldTypeBuilder::new() + .tp(FieldTypeTp::LongLong) + .collation(collation) + .build(), + ) + .push_param_with_field_type( + target.to_owned().into_bytes(), + FieldTypeBuilder::new() + .tp(FieldTypeTp::String) + .collation(target_collation), + ) + .push_param_with_field_type( + pattern.to_owned().into_bytes(), + FieldTypeBuilder::new() + .tp(FieldTypeTp::String) + .collation(pattern_collation), + ) + .push_param(escape as i64) + .evaluate(ScalarFuncSig::LikeSig) + .unwrap(); + assert_eq!( + output, expected, + "target={}, pattern={}, escape={}", + target, pattern, escape + ); + } + } } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 8bb1cc05480..5a25fe343d1 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -44,8 +44,12 @@ pub mod impl_time; use tidb_query_common::Result; use tidb_query_datatype::{ - codec::data_type::*, match_template_charset, match_template_collator, Charset, Collation, - FieldTypeAccessor, FieldTypeFlag, + codec::{ + collation::{Charset as _, Collator}, + data_type::*, + }, + match_template_charset, match_template_collator, match_template_multiple_collators, Charset, + Collation, FieldTypeAccessor, FieldTypeFlag, }; use tipb::{Expr, FieldType, ScalarFuncSig}; @@ -91,10 +95,39 @@ fn map_compare_in_string_sig(ret_field_type: &FieldType) -> Result { }) } -fn map_like_sig(ret_field_type: &FieldType) -> Result { - Ok(match_template_collator! { - TT, match ret_field_type.as_accessor().collation().map_err(tidb_query_datatype::codec::Error::from)? { - Collation::TT => like_fn_meta::() +fn map_like_sig(ret_field_type: &FieldType, children: &[Expr]) -> Result { + let ret_collation = ret_field_type + .as_accessor() + .collation() + .map_err(tidb_query_datatype::codec::Error::from)?; + let target_collation = children[0] + .get_field_type() + .as_accessor() + .collation() + .map_err(tidb_query_datatype::codec::Error::from)?; + let pattern_collation = children[1] + .get_field_type() + .as_accessor() + .collation() + .map_err(tidb_query_datatype::codec::Error::from)?; + + // If the target charset is the same with pattern charset, and is Utf8mb4, + // use their charset to decode bytes. If not, use the charset pushed down in + // the ret_field type to decode the bytes. + // + // This behavior is for the compatibility and correctness: The TiDB doesn't + // push down the collation information when the new collation framework is + // not enabled, and always use the binary collation. However, the `_` + // pattern considers not only the order of strings, but also the number of + // characters. Some characters more than 1 bytes cannot be matched by `_` if + // the new collation framework is not enabled. + Ok(match_template_multiple_collators! { + (TT, TC, PC), (ret_collation, target_collation, pattern_collation), { + if ::Charset::charset() == ::Charset::charset() { + like_fn_meta::::Charset>() + } else { + like_fn_meta::::Charset>() + } } }) } @@ -596,7 +629,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::JsonKeys2ArgsSig => json_keys_fn_meta(), ScalarFuncSig::JsonQuoteSig => json_quote_fn_meta(), // impl_like - ScalarFuncSig::LikeSig => map_like_sig(ft)?, + ScalarFuncSig::LikeSig => map_like_sig(ft, children)?, // impl_regexp ScalarFuncSig::RegexpSig => map_regexp_like_sig(ft)?, ScalarFuncSig::RegexpUtf8Sig => map_regexp_like_sig(ft)?, From 68da60482057b2ee85c81afd731a73fa47f152ac Mon Sep 17 00:00:00 2001 From: lizhenhuan <1916038084@qq.com> Date: Tue, 15 Nov 2022 11:57:54 +0800 Subject: [PATCH 0332/1149] Push Json_valid to tikv (#13572) close tikv/tikv#13571 Signed-off-by: lizhenhuan <1916038084@qq.com> Co-authored-by: Ti Chi Robot --- components/tidb_query_expr/src/impl_json.rs | 57 +++++++++++++++++++++ components/tidb_query_expr/src/lib.rs | 3 ++ 2 files changed, 60 insertions(+) diff --git a/components/tidb_query_expr/src/impl_json.rs b/components/tidb_query_expr/src/impl_json.rs index 1926cc648e0..0c905b7458c 100644 --- a/components/tidb_query_expr/src/impl_json.rs +++ b/components/tidb_query_expr/src/impl_json.rs @@ -204,6 +204,31 @@ fn quote(bytes: BytesRef) -> Result> { Ok(Some(result)) } +#[rpn_fn(nullable, raw_varg, min_args = 1, max_args = 1)] +#[inline] +fn json_valid(args: &[ScalarValueRef]) -> Result> { + assert_eq!(args.len(), 1); + let received_et = args[0].eval_type(); + let r = match args[0].to_owned().is_none() { + true => None, + _ => match received_et { + EvalType::Json => args[0].as_json().and(Some(1)), + EvalType::Bytes => match args[0].as_bytes() { + Some(p) => { + let tmp_str = + std::str::from_utf8(p).map_err(tidb_query_datatype::codec::Error::from)?; + let json: serde_json::error::Result = serde_json::from_str(tmp_str); + Some(json.is_ok() as Int) + } + _ => Some(0), + }, + _ => Some(0), + }, + }; + + Ok(r) +} + #[rpn_fn] #[inline] fn json_unquote(arg: BytesRef) -> Result> { @@ -826,6 +851,38 @@ mod tests { } } + #[test] + fn test_json_valid() { + let cases: Vec<(Vec, Option)> = vec![ + ( + vec![Some(Json::from_str(r#"{"a":1}"#).unwrap()).into()], + Some(1), + ), + (vec![Some(b"hello".to_vec()).into()], Some(0)), + (vec![Some(b"\"hello\"".to_vec()).into()], Some(1)), + (vec![Some(b"null".to_vec()).into()], Some(1)), + (vec![Some(Json::from_str(r#"{}"#).unwrap()).into()], Some(1)), + (vec![Some(Json::from_str(r#"[]"#).unwrap()).into()], Some(1)), + (vec![Some(b"2".to_vec()).into()], Some(1)), + (vec![Some(b"2.5".to_vec()).into()], Some(1)), + (vec![Some(b"2019-8-19".to_vec()).into()], Some(0)), + (vec![Some(b"\"2019-8-19\"".to_vec()).into()], Some(1)), + (vec![Some(2).into()], Some(0)), + (vec![Some(2.5).into()], Some(0)), + (vec![None::.into()], None), + (vec![None::.into()], None), + (vec![None::.into()], None), + ]; + + for (vargs, expected) in cases { + let output = RpnFnScalarEvaluator::new() + .push_params(vargs.clone()) + .evaluate(ScalarFuncSig::JsonValidJsonSig) + .unwrap(); + assert_eq!(output, expected, "{:?}", vargs); + } + } + #[test] fn test_json_contains() { let cases: Vec<(Vec, Option)> = vec![ diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 5a25fe343d1..43b0602ebbb 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -628,6 +628,9 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::JsonKeysSig => json_keys_fn_meta(), ScalarFuncSig::JsonKeys2ArgsSig => json_keys_fn_meta(), ScalarFuncSig::JsonQuoteSig => json_quote_fn_meta(), + ScalarFuncSig::JsonValidJsonSig => json_valid_fn_meta(), + ScalarFuncSig::JsonValidStringSig => json_valid_fn_meta(), + ScalarFuncSig::JsonValidOthersSig => json_valid_fn_meta(), // impl_like ScalarFuncSig::LikeSig => map_like_sig(ft, children)?, // impl_regexp From 4407cb1b02474e7aebea0c5feb45aee01ecf42d5 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Tue, 15 Nov 2022 13:35:55 +0800 Subject: [PATCH 0333/1149] txn: add a new field `txn_source` in write and lock (#13777) ref tikv/tikv#13779 This PR is the starting preparation of BDR, and only adds the ability to parse and serialize `txn_source` Signed-off-by: xiongjiwei Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- .../raftstore-v2/src/operation/command/mod.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 1 + components/raftstore/src/store/peer.rs | 1 + components/raftstore/src/store/util.rs | 1 + components/resolved_ts/src/cmd.rs | 1 + components/txn_types/src/lock.rs | 52 ++++++++++++++++--- components/txn_types/src/write.rs | 33 ++++++++++++ src/storage/mvcc/mod.rs | 10 ++++ src/storage/mvcc/reader/reader.rs | 1 + src/storage/mvcc/txn.rs | 1 + src/storage/txn/actions/commit.rs | 20 ++++++- src/storage/txn/actions/prewrite.rs | 15 +++++- src/storage/txn/actions/tests.rs | 37 +++++++++++++ src/storage/txn/commands/prewrite.rs | 40 +++++++++++++- src/storage/txn/store.rs | 1 + tests/benches/hierarchy/mvcc/mod.rs | 2 + tests/benches/hierarchy/txn/mod.rs | 2 + 18 files changed, 210 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9aa43209906..2622ed983f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2694,7 +2694,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#65d0ae8fa853c1e41b43f329afbf60616bdd4d18" +source = "git+https://github.com/pingcap/kvproto.git#29a30c4ef9c52aafb1b1da73dd9df60857068114" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 81365a162ec..5d308986229 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -461,6 +461,7 @@ impl Apply { AdminCmdType::VerifyHash => unimplemented!(), AdminCmdType::PrepareFlashback => unimplemented!(), AdminCmdType::FinishFlashback => unimplemented!(), + AdminCmdType::BatchSwitchWitness => unimplemented!(), AdminCmdType::InvalidAdmin => { return Err(box_err!("invalid admin command type")); } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 8cb7f58baca..45eadb0b89f 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1629,6 +1629,7 @@ where AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { self.exec_flashback(ctx, request) } + AdminCmdType::BatchSwitchWitness => Err(box_err!("unsupported admin command type")), AdminCmdType::InvalidAdmin => Err(box_err!("unsupported admin command type")), }?; response.set_cmd_type(cmd_type); diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index ff55597b30e..9614161739a 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5726,6 +5726,7 @@ mod tests { AdminCmdType::TransferLeader, AdminCmdType::ComputeHash, AdminCmdType::VerifyHash, + AdminCmdType::BatchSwitchWitness, ]; for tp in AdminCmdType::values() { let mut msg = RaftCmdRequest::default(); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index b2180a8420d..5f78065d32b 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -226,6 +226,7 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { AdminCmdEpochState::new(true, true, false, false) } + AdminCmdType::BatchSwitchWitness => unimplemented!(), } } diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index a1468e15bab..d3bda563a4f 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -419,6 +419,7 @@ mod tests { need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }, Mutation::make_put(k1.clone(), b"v4".to_vec()), &None, diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 3e666c29e40..28df70677a5 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -34,6 +34,7 @@ const MIN_COMMIT_TS_PREFIX: u8 = b'c'; const ASYNC_COMMIT_PREFIX: u8 = b'a'; const ROLLBACK_TS_PREFIX: u8 = b'r'; const LAST_CHANGE_PREFIX: u8 = b'l'; +const TXN_SOURCE_PREFIX: u8 = b's'; impl LockType { pub fn from_mutation(mutation: &Mutation) -> Option { @@ -92,6 +93,10 @@ pub struct Lock { /// The number of versions that need skipping from the latest version to /// find the latest PUT/DELETE record pub versions_to_last_change: u64, + /// The source of this txn. It is used by ticdc, if the value is 0 ticdc + /// will sync the kv change event to downstream, if it is not 0, ticdc + /// may ignore this change event. + pub txn_source: u8, } impl std::fmt::Debug for Lock { @@ -117,6 +122,7 @@ impl std::fmt::Debug for Lock { .field("rollback_ts", &self.rollback_ts) .field("last_change_ts", &self.last_change_ts) .field("versions_to_last_change", &self.versions_to_last_change) + .field("txn_source", &self.txn_source) .finish() } } @@ -146,6 +152,7 @@ impl Lock { rollback_ts: Vec::default(), last_change_ts: TimeStamp::zero(), versions_to_last_change: 0, + txn_source: 0, } } @@ -173,6 +180,13 @@ impl Lock { self } + #[inline] + #[must_use] + pub fn set_txn_source(mut self, source: u8) -> Self { + self.txn_source = source; + self + } + pub fn to_bytes(&self) -> Vec { let mut b = Vec::with_capacity(self.pre_allocate_size()); b.push(self.lock_type.to_u8()); @@ -215,6 +229,10 @@ impl Lock { b.encode_u64(self.last_change_ts.into_inner()).unwrap(); b.encode_var_u64(self.versions_to_last_change).unwrap(); } + if self.txn_source != 0 { + b.push(TXN_SOURCE_PREFIX); + b.push(self.txn_source); + } b } @@ -247,6 +265,9 @@ impl Lock { if !self.last_change_ts.is_zero() { size += 1 + size_of::() + MAX_VAR_U64_LEN; } + if self.txn_source != 0 { + size += 2; + } size } @@ -285,6 +306,7 @@ impl Lock { let mut rollback_ts = Vec::new(); let mut last_change_ts = TimeStamp::zero(); let mut versions_to_last_change = 0; + let mut txn_source = 0; while !b.is_empty() { match b.read_u8()? { SHORT_VALUE_PREFIX => { @@ -322,6 +344,9 @@ impl Lock { last_change_ts = number::decode_u64(&mut b)?.into(); versions_to_last_change = number::decode_var_u64(&mut b)?; } + TXN_SOURCE_PREFIX => { + txn_source = b.read_u8()?; + } _ => { // To support forward compatibility, all fields should be serialized in order // and stop parsing if meets an unknown byte. @@ -339,7 +364,8 @@ impl Lock { txn_size, min_commit_ts, ) - .set_last_change(last_change_ts, versions_to_last_change); + .set_last_change(last_change_ts, versions_to_last_change) + .set_txn_source(txn_source); if use_async_commit { lock = lock.use_async_commit(secondaries); } @@ -365,7 +391,8 @@ impl Lock { info.set_use_async_commit(self.use_async_commit); info.set_min_commit_ts(self.min_commit_ts.into_inner()); info.set_secondaries(self.secondaries.into()); - // The client does not care about last_change_ts and versions_to_last_version. + // The client does not care about last_change_ts, versions_to_last_version and + // txn_source. info } @@ -743,6 +770,18 @@ mod tests { 8.into(), ) .set_last_change(4.into(), 2), + Lock::new( + LockType::Lock, + b"pk".to_vec(), + 1.into(), + 10, + None, + 6.into(), + 16, + 8.into(), + ) + .set_last_change(4.into(), 2) + .set_txn_source(1), ]; for (i, lock) in locks.drain(..).enumerate() { let v = lock.to_bytes(); @@ -997,7 +1036,7 @@ mod tests { min_commit_ts: TimeStamp(127), use_async_commit: true, \ secondaries: [7365636F6E646172795F6B31, 7365636F6E646172795F6B6B6B6B6B32, \ 7365636F6E646172795F6B336B336B336B336B336B33, 7365636F6E646172795F6B34], rollback_ts: [], \ - last_change_ts: TimeStamp(80), versions_to_last_change: 4 }" + last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0 }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -1007,7 +1046,7 @@ mod tests { "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, \ short_value: ?, for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [?, ?, ?, ?], rollback_ts: [], \ - last_change_ts: TimeStamp(80), versions_to_last_change: 4 }" + last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0 }" ); lock.short_value = None; @@ -1017,7 +1056,7 @@ mod tests { "Lock { lock_type: Put, primary_key: 706B, start_ts: TimeStamp(100), ttl: 3, short_value: , \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ - versions_to_last_change: 4 }" + versions_to_last_change: 4, txn_source: 0 }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -1027,7 +1066,7 @@ mod tests { "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, short_value: ?, \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ - versions_to_last_change: 4 }" + versions_to_last_change: 4, txn_source: 0 }" ); } @@ -1056,6 +1095,7 @@ mod tests { rollback_ts: vec![], last_change_ts: 8.into(), versions_to_last_change: 2, + txn_source: 0, }; assert_eq!(pessimistic_lock.to_lock(), expected_lock); assert_eq!(pessimistic_lock.into_lock(), expected_lock); diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 0c0994640d2..6c46688defa 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -30,6 +30,8 @@ const FLAG_OVERLAPPED_ROLLBACK: u8 = b'R'; const GC_FENCE_PREFIX: u8 = b'F'; const LAST_CHANGE_PREFIX: u8 = b'l'; +const TXN_SOURCE_PREFIX: u8 = b'S'; + /// The short value for rollback records which are protected from being /// collapsed. const PROTECTED_ROLLBACK_SHORT_VALUE: &[u8] = b"p"; @@ -157,6 +159,8 @@ pub struct Write { /// The number of versions that need skipping from this record /// to find the latest PUT/DELETE record pub versions_to_last_change: u64, + /// The source of this txn. + pub txn_source: u8, } impl std::fmt::Debug for Write { @@ -178,6 +182,7 @@ impl std::fmt::Debug for Write { .field("gc_fence", &self.gc_fence) .field("last_change_ts", &self.last_change_ts) .field("versions_to_last_change", &self.versions_to_last_change) + .field("txn_source", &self.txn_source) .finish() } } @@ -194,6 +199,7 @@ impl Write { gc_fence: None, last_change_ts: TimeStamp::zero(), versions_to_last_change: 0, + txn_source: 0, } } @@ -213,6 +219,7 @@ impl Write { gc_fence: None, last_change_ts: TimeStamp::zero(), versions_to_last_change: 0, + txn_source: 0, } } @@ -239,6 +246,13 @@ impl Write { self } + #[inline] + #[must_use] + pub fn set_txn_source(mut self, source: u8) -> Self { + self.txn_source = source; + self + } + #[inline] pub fn parse_type(mut b: &[u8]) -> Result { let write_type_bytes = b @@ -257,6 +271,7 @@ impl Write { gc_fence: self.gc_fence, last_change_ts: self.last_change_ts, versions_to_last_change: self.versions_to_last_change, + txn_source: self.txn_source, } } @@ -307,6 +322,8 @@ pub struct WriteRef<'a> { /// The number of versions that need skipping from this record /// to find the latest PUT/DELETE record pub versions_to_last_change: u64, + /// The source of this txn. + pub txn_source: u8, } impl WriteRef<'_> { @@ -326,6 +343,7 @@ impl WriteRef<'_> { let mut gc_fence = None; let mut last_change_ts = TimeStamp::zero(); let mut versions_to_last_change = 0; + let mut txn_source = 0; while !b.is_empty() { match b @@ -354,6 +372,11 @@ impl WriteRef<'_> { last_change_ts = number::decode_u64(&mut b)?.into(); versions_to_last_change = number::decode_var_u64(&mut b)?; } + TXN_SOURCE_PREFIX => { + txn_source = b + .read_u8() + .map_err(|_| Error::from(ErrorInner::BadFormatWrite))? + } _ => { // To support forward compatibility, all fields should be serialized in order // and stop parsing if meets an unknown byte. @@ -370,6 +393,7 @@ impl WriteRef<'_> { gc_fence, last_change_ts, versions_to_last_change, + txn_source, }) } @@ -394,6 +418,10 @@ impl WriteRef<'_> { b.encode_u64(self.last_change_ts.into_inner()).unwrap(); b.encode_var_u64(self.versions_to_last_change).unwrap(); } + if self.txn_source != 0 { + b.push(TXN_SOURCE_PREFIX); + b.push(self.txn_source); + } b } @@ -409,6 +437,9 @@ impl WriteRef<'_> { if !self.last_change_ts.is_zero() { size += 1 + size_of::() + MAX_VAR_U64_LEN; } + if self.txn_source != 0 { + size += 2; + } size } @@ -458,6 +489,7 @@ impl WriteRef<'_> { ) .set_overlapped_rollback(self.has_overlapped_rollback, self.gc_fence) .set_last_change(self.last_change_ts, self.versions_to_last_change) + .set_txn_source(self.txn_source) } } @@ -517,6 +549,7 @@ mod tests { Write::new(WriteType::Put, 456.into(), Some(b"short_value".to_vec())) .set_overlapped_rollback(true, Some(421397468076048385.into())), Write::new(WriteType::Lock, 456.into(), None).set_last_change(345.into(), 11), + Write::new(WriteType::Lock, 456.into(), None).set_txn_source(1), ]; for (i, write) in writes.drain(..).enumerate() { let v = write.as_ref().to_bytes(); diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 6191c2ad46d..997cde71020 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -706,6 +706,16 @@ pub mod tests { assert_eq!(ts, commit_ts.into()); } + pub fn must_get_txn_source(engine: &mut E, key: &[u8], ts: u64, txn_source: u8) { + let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut reader = SnapshotReader::new(TimeStamp::from(ts), snapshot, true); + let write = reader + .get_write(&Key::from_raw(key), TimeStamp::from(ts)) + .unwrap() + .unwrap(); + assert_eq!(write.txn_source, txn_source); + } + pub fn must_get_commit_ts_none( engine: &mut E, key: &[u8], diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 8e35e00936e..2fe95c2c1dd 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -852,6 +852,7 @@ pub mod tests { need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, } } diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index a73f8b99027..66aa769d462 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -767,6 +767,7 @@ pub(crate) mod tests { need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, } } diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 2ba4f527d0e..1b8018e2aad 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -92,7 +92,8 @@ pub fn commit( reader.start_ts, lock.short_value.take(), ) - .set_last_change(lock.last_change_ts, lock.versions_to_last_change); + .set_last_change(lock.last_change_ts, lock.versions_to_last_change) + .set_txn_source(lock.txn_source); for ts in &lock.rollback_ts { if *ts == commit_ts { @@ -117,7 +118,8 @@ pub mod tests { #[cfg(test)] use crate::storage::txn::tests::{ must_acquire_pessimistic_lock_for_large_txn, must_prewrite_delete, must_prewrite_lock, - must_prewrite_put, must_prewrite_put_for_large_txn, must_prewrite_put_impl, must_rollback, + must_prewrite_put, must_prewrite_put_for_large_txn, must_prewrite_put_impl, + must_prewrite_put_with_txn_soucre, must_rollback, }; #[cfg(test)] use crate::storage::{ @@ -350,4 +352,18 @@ pub mod tests { assert!(write.last_change_ts.is_zero()); assert_eq!(write.versions_to_last_change, 0); } + + #[test] + fn test_2pc_with_txn_source() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + let k = b"k"; + // WriteType is Put + must_prewrite_put_with_txn_soucre(&mut engine, k, b"v2", k, 25, 1); + let lock = must_locked(&mut engine, k, 25); + assert_eq!(lock.txn_source, 1); + must_succeed(&mut engine, k, 25, 30); + let write = must_written(&mut engine, k, 25, 30, WriteType::Put); + assert_eq!(write.txn_source, 1); + } } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 4c13a9d244b..48caa3795af 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -172,6 +172,7 @@ pub struct TransactionProperties<'a> { pub need_old_value: bool, pub is_retry_request: bool, pub assertion_level: AssertionLevel, + pub txn_source: u8, } impl<'a> TransactionProperties<'a> { @@ -453,7 +454,8 @@ impl<'a> PrewriteMutation<'a> { self.txn_props.for_update_ts(), self.txn_props.txn_size, self.min_commit_ts, - ); + ) + .set_txn_source(self.txn_props.txn_source); // Only Lock needs to record `last_change_ts` in its write record, Put or Delete // records themselves are effective changes. if tls_can_enable(LAST_CHANGE_TS) && self.lock_type == Some(LockType::Lock) { @@ -795,6 +797,7 @@ pub mod tests { need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, } } @@ -821,6 +824,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, } } @@ -1133,6 +1137,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }, Mutation::make_check_not_exists(Key::from_raw(key)), &None, @@ -1165,6 +1170,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; // calculated commit_ts = 43 ≤ 50, ok let (_, old_value) = prewrite( @@ -1215,6 +1221,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; // calculated commit_ts = 43 ≤ 50, ok let (_, old_value) = prewrite( @@ -1324,6 +1331,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; let cases = vec![ @@ -1384,6 +1392,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; let cases: Vec<_> = vec![ @@ -1655,6 +1664,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; let snapshot = engine.snapshot(Default::default()).unwrap(); let cm = ConcurrencyManager::new(start_ts); @@ -1709,6 +1719,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; let snapshot = engine.snapshot(Default::default()).unwrap(); let cm = ConcurrencyManager::new(start_ts); @@ -1850,6 +1861,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; let (_, old_value) = prewrite( &mut txn, @@ -1886,6 +1898,7 @@ pub mod tests { need_old_value: true, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; let (_, old_value) = prewrite( &mut txn, diff --git a/src/storage/txn/actions/tests.rs b/src/storage/txn/actions/tests.rs index fdf060d950d..79d31a08c9c 100644 --- a/src/storage/txn/actions/tests.rs +++ b/src/storage/txn/actions/tests.rs @@ -52,6 +52,7 @@ pub fn must_prewrite_put_impl( assertion_level, false, None, + 0, ); } @@ -90,6 +91,7 @@ pub fn must_prewrite_insert_impl( assertion_level, true, None, + 0, ); } @@ -111,8 +113,10 @@ pub fn must_prewrite_put_impl_with_should_not_exist( assertion_level: AssertionLevel, should_not_exist: bool, region_id: Option, + txn_source: u32, ) { let mut ctx = Context::default(); + ctx.set_txn_source(txn_source); if let Some(region_id) = region_id { ctx.region_id = region_id; } @@ -154,6 +158,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( need_old_value: false, is_retry_request, assertion_level, + txn_source: txn_source as u8, }, mutation, secondary_keys, @@ -215,6 +220,37 @@ pub fn must_prewrite_put_on_region( AssertionLevel::Off, false, Some(region_id), + 0, + ); +} + +pub fn must_prewrite_put_with_txn_soucre( + engine: &mut E, + key: &[u8], + value: &[u8], + pk: &[u8], + ts: impl Into, + txn_source: u32, +) { + must_prewrite_put_impl_with_should_not_exist( + engine, + key, + value, + pk, + &None, + ts.into(), + SkipPessimisticCheck, + 0, + TimeStamp::default(), + 0, + TimeStamp::default(), + TimeStamp::default(), + false, + Assertion::None, + AssertionLevel::Off, + false, + None, + txn_source, ); } @@ -422,6 +458,7 @@ fn default_txn_props( need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, } } diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 6b54a1f88db..542c60819b5 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -508,6 +508,7 @@ impl Prewriter { need_old_value: extra_op == ExtraOp::ReadOldValue, is_retry_request: self.ctx.is_retry_request, assertion_level: self.assertion_level, + txn_source: self.ctx.get_txn_source() as u8, }; let async_commit_pk = self @@ -849,7 +850,8 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { txn.start_ts, lock.short_value, ) - .set_last_change(lock.last_change_ts, lock.versions_to_last_change); + .set_last_change(lock.last_change_ts, lock.versions_to_last_change) + .set_txn_source(lock.txn_source); // Transactions committed with 1PC should be impossible to overwrite rollback // records. txn.put_write(key.clone(), commit_ts, write.as_ref().to_bytes()); @@ -1073,6 +1075,42 @@ mod tests { assert_eq!(d.internal_delete_skipped_count, 0); } + #[test] + fn test_prewrite_1pc_with_txn_source() { + use crate::storage::mvcc::tests::{must_get, must_get_commit_ts, must_unlocked}; + + let mut engine = TestEngineBuilder::new().build().unwrap(); + let cm = concurrency_manager::ConcurrencyManager::new(1.into()); + + let key = b"k"; + let value = b"v"; + let mutations = vec![Mutation::make_put(Key::from_raw(key), value.to_vec())]; + + let mut statistics = Statistics::default(); + let mut ctx = Context::default(); + ctx.set_txn_source(1); + let cmd = Prewrite::new( + mutations, + key.to_vec(), + TimeStamp::from(10), + 0, + false, + 0, + TimeStamp::default(), + TimeStamp::from(15), + None, + true, + AssertionLevel::Off, + ctx, + ); + prewrite_command(&mut engine, cm, &mut statistics, cmd).unwrap(); + + must_unlocked(&mut engine, key); + must_get(&mut engine, key, 12, value); + must_get_commit_ts(&mut engine, key, 10, 11); + must_get_txn_source(&mut engine, key, 11, 1); + } + #[test] fn test_prewrite_1pc() { use crate::storage::mvcc::tests::{must_get, must_get_commit_ts, must_unlocked}; diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 9a38979c71b..46879d38e9f 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -726,6 +726,7 @@ mod tests { need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }, Mutation::make_put(Key::from_raw(key), key.to_vec()), &None, diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index f57946a11cf..20740b4cb16 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -47,6 +47,7 @@ where need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; prewrite( &mut txn, @@ -97,6 +98,7 @@ fn mvcc_prewrite>(b: &mut Bencher<'_>, config: &B need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; prewrite( &mut txn, diff --git a/tests/benches/hierarchy/txn/mod.rs b/tests/benches/hierarchy/txn/mod.rs index 0bdb7ae8870..404266e2c6f 100644 --- a/tests/benches/hierarchy/txn/mod.rs +++ b/tests/benches/hierarchy/txn/mod.rs @@ -43,6 +43,7 @@ where need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; prewrite( &mut txn, @@ -90,6 +91,7 @@ fn txn_prewrite>(b: &mut Bencher<'_>, config: &Be need_old_value: false, is_retry_request: false, assertion_level: AssertionLevel::Off, + txn_source: 0, }; prewrite( &mut txn, From 6b240c9e6ebbb4a35bc0309ffbeb9a9b293a6aa8 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 15 Nov 2022 17:19:56 +0800 Subject: [PATCH 0334/1149] *: update toolchain (#13797) ref tikv/tikv#12842 Latest Rust stabilizes GAT, which is required for async func in trait without allocation. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- cmd/tikv-ctl/src/executor.rs | 2 +- cmd/tikv-ctl/src/main.rs | 6 +- .../backup-stream/src/metadata/client.rs | 2 +- components/backup/src/endpoint.rs | 2 +- components/backup/src/writer.rs | 4 +- components/batch-system/src/router.rs | 2 +- components/causal_ts/benches/tso.rs | 6 +- components/causal_ts/src/lib.rs | 1 - components/causal_ts/src/tso.rs | 2 +- components/cdc/src/endpoint.rs | 6 +- components/cdc/src/old_value.rs | 6 +- components/codec/src/byte.rs | 2 +- .../concurrency_manager/benches/lock_table.rs | 1 - .../concurrency_manager/src/lock_table.rs | 13 +-- components/encryption/src/crypter.rs | 2 +- .../encryption/src/encrypted_file/mod.rs | 4 +- components/encryption/src/manager/mod.rs | 10 +-- components/engine_panic/src/lib.rs | 1 - components/engine_rocks/src/lib.rs | 1 - components/engine_rocks/src/util.rs | 2 +- components/engine_test/src/lib.rs | 6 +- components/engine_traits/src/lib.rs | 1 - components/error_code/bin.rs | 2 +- .../external_storage/export/src/export.rs | 6 +- components/external_storage/src/hdfs.rs | 2 +- components/file_system/src/io_stats/proc.rs | 2 +- components/file_system/src/lib.rs | 8 +- components/raft_log_engine/src/engine.rs | 2 +- components/raft_log_engine/src/lib.rs | 1 - components/raftstore-v2/src/fsm/peer.rs | 2 +- components/raftstore-v2/src/lib.rs | 2 +- components/raftstore-v2/src/operation/life.rs | 2 +- .../tests/integrations/test_basic_write.rs | 2 +- .../tests/integrations/test_life.rs | 4 +- components/raftstore/src/lib.rs | 2 +- components/raftstore/src/store/config.rs | 10 +-- components/raftstore/src/store/fsm/apply.rs | 6 +- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/fsm/store.rs | 4 +- components/raftstore/src/store/msg.rs | 1 + components/raftstore/src/store/snap.rs | 24 +++--- components/raftstore/src/store/snap/io.rs | 2 +- .../src/store/worker/split_controller.rs | 2 +- .../resolved_ts/tests/integrations/mod.rs | 2 +- .../resource_metering/src/recorder/mod.rs | 4 +- components/security/src/lib.rs | 2 +- components/server/src/server.rs | 6 +- components/server/src/signal_handler.rs | 2 +- components/sst_importer/src/import_file.rs | 10 +-- components/sst_importer/src/sst_importer.rs | 2 +- components/test_backup/src/lib.rs | 2 +- .../test_raftstore/src/transport_simulate.rs | 2 +- components/test_util/src/runner.rs | 4 +- .../tidb_query_aggr/src/impl_max_min.rs | 6 +- .../tidb_query_codegen/src/rpn_function.rs | 2 +- .../tidb_query_datatype/src/codec/convert.rs | 14 +-- .../src/codec/mysql/decimal.rs | 16 ++-- .../src/codec/mysql/duration.rs | 4 +- .../src/codec/mysql/json/binary.rs | 2 +- .../src/codec/mysql/time/extension.rs | 2 +- .../tidb_query_datatype/src/codec/overflow.rs | 6 +- .../tidb_query_datatype/src/codec/table.rs | 2 +- .../tidb_query_datatype/src/expr/ctx.rs | 4 +- .../src/simple_aggr_executor.rs | 2 +- .../src/top_n_executor.rs | 4 +- .../tidb_query_expr/src/impl_arithmetic.rs | 34 ++++---- components/tidb_query_expr/src/impl_cast.rs | 16 ++-- .../tidb_query_expr/src/impl_compare.rs | 8 +- components/tidb_query_expr/src/impl_math.rs | 4 +- components/tidb_query_expr/src/impl_op.rs | 4 +- components/tidb_query_expr/src/impl_time.rs | 8 +- components/tikv_kv/src/lib.rs | 1 - components/tikv_util/src/buffer_vec.rs | 86 ++++++++++--------- components/tikv_util/src/codec/bytes.rs | 2 +- components/tikv_util/src/config.rs | 28 +++--- components/tikv_util/src/lib.rs | 2 +- components/tikv_util/src/logger/file_log.rs | 2 +- components/tikv_util/src/sys/cgroup.rs | 30 +++---- components/tikv_util/src/sys/inspector.rs | 2 +- components/tikv_util/src/sys/thread.rs | 4 +- components/txn_types/src/types.rs | 2 +- fuzz/cli.rs | 18 ++-- rust-toolchain | 2 +- scripts/clippy | 6 +- src/config.rs | 34 +++----- src/coprocessor/endpoint.rs | 43 ++++------ src/coprocessor/statistics/analyze.rs | 4 +- src/coprocessor/statistics/histogram.rs | 2 +- src/coprocessor_v2/plugin_registry.rs | 10 +-- src/lib.rs | 3 +- src/read_pool.rs | 5 +- src/server/debug.rs | 2 +- src/server/engine_factory_v2.rs | 4 +- src/server/gc_worker/gc_manager.rs | 2 +- src/server/gc_worker/gc_worker.rs | 2 +- src/server/lock_manager/waiter_manager.rs | 4 +- src/server/node.rs | 2 +- src/server/service/diagnostics/log.rs | 8 +- src/server/service/diagnostics/sys.rs | 2 +- src/server/status_server/profile.rs | 4 +- src/storage/config.rs | 2 +- src/storage/txn/scheduler.rs | 2 +- .../misc/coprocessor/codec/mysql/json/mod.rs | 2 +- tests/failpoints/cases/test_snap.rs | 10 +-- tests/integrations/backup/mod.rs | 2 +- .../integrations/config/dynamic/raftstore.rs | 2 +- .../integrations/config/test_config_client.rs | 2 +- tests/integrations/coprocessor/test_select.rs | 2 +- 108 files changed, 330 insertions(+), 354 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index aa2f604b547..80915dbc564 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -866,7 +866,7 @@ impl DebugExecutor for Debugger { self.region_size(region, cfs) .unwrap_or_else(|e| perror_and_exit("Debugger::region_size", e)) .into_iter() - .map(|(cf, size)| (cf.to_owned(), size as usize)) + .map(|(cf, size)| (cf.to_owned(), size)) .collect() } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index ce39c121300..be5069397e4 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -68,7 +68,7 @@ fn main() { cfg }, |path| { - let s = fs::read_to_string(&path).unwrap(); + let s = fs::read_to_string(path).unwrap(); toml::from_str(&s).unwrap() }, ); @@ -169,7 +169,7 @@ fn main() { .unwrap(); let iv = Iv::from_slice(&file_info.iv).unwrap(); - let f = File::open(&infile).unwrap(); + let f = File::open(infile).unwrap(); let mut reader = DecrypterReader::new(f, mthd, &file_info.key, iv).unwrap(); io::copy(&mut reader, &mut outf).unwrap(); @@ -333,7 +333,7 @@ fn main() { let to_data_dir = to_data_dir.as_deref(); let to_host = to_host.as_deref(); let to_config = to_config.map_or_else(TikvConfig::default, |path| { - let s = fs::read_to_string(&path).unwrap(); + let s = fs::read_to_string(path).unwrap(); toml::from_str(&s).unwrap() }); debug_executor.diff_region(region, to_host, to_data_dir, &to_config, mgr); diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 2ebf553e1cb..b7f1fcb2025 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -546,7 +546,7 @@ impl MetadataClient { )) .await?; - let mut result = Vec::with_capacity(all.len() as usize + 1); + let mut result = Vec::with_capacity(all.len() + 1); if !prev.kvs.is_empty() { let kv = &mut prev.kvs[0]; if kv.value() > start_key.as_slice() { diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 92131381017..db6ff331d7f 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -1279,7 +1279,7 @@ pub mod tests { let temp = TempDir::new().unwrap(); let rocks = TestEngineBuilder::new() .path(temp.path()) - .cfs(&[ + .cfs([ engine_traits::CF_DEFAULT, engine_traits::CF_LOCK, engine_traits::CF_WRITE, diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 4e0750bd7d8..7a853fe485f 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -443,7 +443,7 @@ mod tests { let temp = TempDir::new().unwrap(); let rocks = TestEngineBuilder::new() .path(temp.path()) - .cfs(&[engine_traits::CF_DEFAULT, engine_traits::CF_WRITE]) + .cfs([engine_traits::CF_DEFAULT, engine_traits::CF_WRITE]) .build() .unwrap(); let db = rocks.get_rocksdb(); @@ -480,7 +480,7 @@ mod tests { let temp = TempDir::new().unwrap(); let rocks = TestEngineBuilder::new() .path(temp.path()) - .cfs(&[ + .cfs([ engine_traits::CF_DEFAULT, engine_traits::CF_LOCK, engine_traits::CF_WRITE, diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index b863f1535f0..bfcb93c9d6b 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -322,7 +322,7 @@ where for mailbox in mailboxes.map.values() { let _ = mailbox.force_send(msg_gen(), &self.normal_scheduler); } - BROADCAST_NORMAL_DURATION.observe(duration_to_sec(timer.saturating_elapsed()) as f64); + BROADCAST_NORMAL_DURATION.observe(duration_to_sec(timer.saturating_elapsed())); } /// Try to notify all FSMs that the cluster is being shutdown. diff --git a/components/causal_ts/benches/tso.rs b/components/causal_ts/benches/tso.rs index 72d381a4be7..f7e1980d15f 100644 --- a/components/causal_ts/benches/tso.rs +++ b/components/causal_ts/benches/tso.rs @@ -19,11 +19,7 @@ fn bench_batch_tso_list_pop(c: &mut Criterion) { batch_list.flush(); for i in 0..CAPACITY { batch_list - .push( - batch_size as u32, - TimeStamp::compose(i as u64, batch_size), - false, - ) + .push(batch_size as u32, TimeStamp::compose(i, batch_size), false) .unwrap(); } }, diff --git a/components/causal_ts/src/lib.rs b/components/causal_ts/src/lib.rs index 3eb59f35c36..ab57fbf734f 100644 --- a/components/causal_ts/src/lib.rs +++ b/components/causal_ts/src/lib.rs @@ -1,6 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(map_first_last)] // For `BTreeMap::pop_first`. #![feature(div_duration)] #[macro_use] diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 5056cfe2ebd..e63c3c2c3ba 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -712,7 +712,7 @@ pub mod tests { for (i, (remain, usage, need_flush, expected)) in cases.into_iter().enumerate() { let batch_list = Arc::new(TsoBatchList { inner: Default::default(), - tso_remain: AtomicI32::new(remain as i32), + tso_remain: AtomicI32::new(remain), tso_usage: AtomicU32::new(usage), capacity: cache_multiplier, }); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 4086c8623b5..4b6bbad6d35 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1012,11 +1012,7 @@ impl, E: KvEngine> Endpoint { let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); let raft_router = self.raft_router.clone(); - let regions: Vec = self - .capture_regions - .iter() - .map(|(region_id, _)| *region_id) - .collect(); + let regions: Vec = self.capture_regions.keys().copied().collect(); let cm: ConcurrencyManager = self.concurrency_manager.clone(); let hibernate_regions_compatible = self.config.hibernate_regions_compatible; let causal_ts_provider = self.causal_ts_provider.clone(); diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index 1149d8ce3e0..37e2781b766 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -341,8 +341,8 @@ mod tests { old_value_cache.cache.insert(key, value.clone()); } - assert_eq!(old_value_cache.cache.size(), size * cases as usize); - assert_eq!(old_value_cache.cache.len(), cases as usize); + assert_eq!(old_value_cache.cache.size(), size * cases); + assert_eq!(old_value_cache.cache.len(), cases); assert_eq!(old_value_cache.capacity(), capacity as usize); // Reduces capacity. @@ -360,7 +360,7 @@ mod tests { assert_eq!(old_value_cache.cache.size(), size * remaining_count); assert_eq!(old_value_cache.cache.len(), remaining_count); - assert_eq!(old_value_cache.capacity(), new_capacity as usize); + assert_eq!(old_value_cache.capacity(), new_capacity); for i in dropped_count..cases { let key = Key::from_raw(&i.to_be_bytes()); assert_eq!(old_value_cache.cache.get(&key).is_some(), true); diff --git a/components/codec/src/byte.rs b/components/codec/src/byte.rs index aa7baba9e75..8b5fd928edf 100644 --- a/components/codec/src/byte.rs +++ b/components/codec/src/byte.rs @@ -759,7 +759,7 @@ mod tests { for (exp, encoded) in cases { let mut path = env::temp_dir(); path.push("read-compact-codec-file"); - fs::write(&path, &encoded).unwrap(); + fs::write(&path, encoded).unwrap(); let f = File::open(&path).unwrap(); let mut rdr = BufReader::new(f); let decoded = rdr.read_compact_bytes().unwrap(); diff --git a/components/concurrency_manager/benches/lock_table.rs b/components/concurrency_manager/benches/lock_table.rs index f2d4a9b92c9..52c9bea960a 100644 --- a/components/concurrency_manager/benches/lock_table.rs +++ b/components/concurrency_manager/benches/lock_table.rs @@ -1,7 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. #![feature(test)] -#![feature(bench_black_box)] use std::{borrow::Cow, hint::black_box, mem::forget}; diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index bf7a224aa28..ad013a863a1 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -158,9 +158,9 @@ mod test { assert_eq!(counter.load(Ordering::SeqCst), 100); } - fn ts_check(lock: &Lock, ts: u64) -> Result<(), Lock> { + fn ts_check(lock: &Lock, ts: u64) -> Result<(), Box> { if lock.ts.into_inner() < ts { - Err(lock.clone()) + Err(Box::new(lock.clone())) } else { Ok(()) } @@ -193,7 +193,10 @@ mod test { lock_table.check_key(&key_k, |l| ts_check(l, 5)).unwrap(); // lock does not pass check_fn - assert_eq!(lock_table.check_key(&key_k, |l| ts_check(l, 20)), Err(lock)); + assert_eq!( + lock_table.check_key(&key_k, |l| ts_check(l, 20)), + Err(Box::new(lock)) + ); } #[tokio::test] @@ -247,13 +250,13 @@ mod test { // first lock does not pass check_fn assert_eq!( lock_table.check_range(Some(&Key::from_raw(b"a")), None, |_, l| ts_check(l, 25)), - Err(lock_k) + Err(Box::new(lock_k)) ); // first lock passes check_fn but the second does not assert_eq!( lock_table.check_range(None, None, |_, l| ts_check(l, 15)), - Err(lock_l) + Err(Box::new(lock_l)) ); } diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index 13286e416c9..7379b8a32a3 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -275,7 +275,7 @@ mod tests { let crypter = AesGcmCrypter::new(&key, iv); let (ciphertext, gcm_tag) = crypter.encrypt(&pt).unwrap(); assert_eq!(ciphertext, ct, "{}", hex::encode(&ciphertext)); - assert_eq!(gcm_tag.0.to_vec(), tag, "{}", hex::encode(&gcm_tag.0)); + assert_eq!(gcm_tag.0.to_vec(), tag, "{}", hex::encode(gcm_tag.0)); let plaintext = crypter.decrypt(&ct, gcm_tag).unwrap(); assert_eq!(plaintext, pt, "{}", hex::encode(&plaintext)); diff --git a/components/encryption/src/encrypted_file/mod.rs b/components/encryption/src/encrypted_file/mod.rs index 57b5527b7bf..9c76b857c70 100644 --- a/components/encryption/src/encrypted_file/mod.rs +++ b/components/encryption/src/encrypted_file/mod.rs @@ -64,7 +64,7 @@ impl<'a> EncryptedFile<'a> { let start = Instant::now(); // Write to a tmp file. // TODO what if a tmp file already exists? - let origin_path = self.base.join(&self.name); + let origin_path = self.base.join(self.name); let mut tmp_path = origin_path.clone(); tmp_path.set_extension(format!("{}.{}", thread_rng().next_u64(), TMP_FILE_SUFFIX)); let mut tmp_file = OpenOptions::new() @@ -92,7 +92,7 @@ impl<'a> EncryptedFile<'a> { // Replace old file with the tmp file aomticlly. rename(tmp_path, origin_path)?; - let base_dir = File::open(&self.base)?; + let base_dir = File::open(self.base)?; base_dir.sync_all()?; ENCRYPT_DECRPTION_FILE_HISTOGRAM diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 0f78e794629..0f3233d7819 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -815,7 +815,7 @@ mod tests { } fn new_mock_backend() -> Box { - Box::new(MockBackend::default()) + Box::::default() } fn new_key_manager_def( @@ -829,7 +829,7 @@ mod tests { } match DataKeyManager::new_previous_loaded( master_backend, - Box::new(MockBackend::default()), + Box::::default(), args, ) { Ok(None) => panic!("expected encryption"), @@ -932,7 +932,7 @@ mod tests { let manager = new_key_manager( &tmp_dir, Some(EncryptionMethod::Aes256Ctr), - Box::new(PlaintextBackend::default()), + Box::::default(), new_mock_backend() as Box, ); manager.err().unwrap(); @@ -1301,13 +1301,13 @@ mod tests { encrypt_fail: false, ..MockBackend::default() }); - let previous = Box::new(PlaintextBackend::default()) as Box; + let previous = Box::::default() as Box; let result = new_key_manager(&tmp_dir, None, wrong_key, previous); // When the master key is invalid, the key manager left a empty file dict and // return errors. assert!(result.is_err()); - let previous = Box::new(PlaintextBackend::default()) as Box; + let previous = Box::::default() as Box; new_key_manager(&tmp_dir, None, right_key, previous).unwrap(); } diff --git a/components/engine_panic/src/lib.rs b/components/engine_panic/src/lib.rs index 70c7f00ece8..93555f5ba5f 100644 --- a/components/engine_panic/src/lib.rs +++ b/components/engine_panic/src/lib.rs @@ -9,7 +9,6 @@ //! with your engine's own name; then fill in the implementations; remove //! the allow(unused) attribute; -#![feature(generic_associated_types)] #![allow(unused)] mod cf_names; diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index c1e23dac4a6..b6f3e36146c 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -16,7 +16,6 @@ //! Please read the engine_trait crate docs before hacking. #![cfg_attr(test, feature(test))] -#![feature(generic_associated_types)] #[allow(unused_extern_crates)] extern crate tikv_alloc; diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index f749f78851c..778e16c1a67 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -150,7 +150,7 @@ pub fn db_exist(path: &str) -> bool { // If path is not an empty directory but db has not been created, // `DB::list_column_families` fails and we can clean up the directory by // this indication. - fs::read_dir(&path).unwrap().next().is_some() + fs::read_dir(path).unwrap().next().is_some() } /// Returns a Vec of cf which is in `a' but not in `b'. diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index b460e97d4ce..ae834457757 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -55,6 +55,8 @@ //! storage engines, and that it be extracted into its own crate for use in //! TiKV, once the full requirements are better understood. +#![feature(let_chains)] + /// Types and constructors for the "raft" engine pub mod raft { #[cfg(feature = "test-engine-raft-panic")] @@ -345,7 +347,7 @@ pub mod kv { let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); // When the full directory path does not exsit, create will return error and in // this case, we just ignore it. - let _ = std::fs::File::create(&path); + let _ = std::fs::File::create(path); { let mut reg = self.registry.lock().unwrap(); if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { @@ -384,7 +386,7 @@ pub mod kv { } let db_path = self.tablet_path(region_id, suffix); - std::fs::rename(path, &db_path)?; + std::fs::rename(path, db_path)?; self.open_tablet( region_id, Some(suffix), diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 29351636694..b9cf8847751 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -251,7 +251,6 @@ #![cfg_attr(test, feature(test))] #![feature(min_specialization)] #![feature(assert_matches)] -#![feature(generic_associated_types)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/error_code/bin.rs b/components/error_code/bin.rs index ba6a21ac6fa..8f1ad087355 100644 --- a/components/error_code/bin.rs +++ b/components/error_code/bin.rs @@ -18,7 +18,7 @@ fn main() { storage::ALL_ERROR_CODES.iter(), ]; let path = Path::new("./etc/error_code.toml"); - let mut f = fs::File::create(&path).unwrap(); + let mut f = fs::File::create(path).unwrap(); err_codes .into_iter() .flatten() diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index 3cba0eaad8b..a36f3eba11e 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -186,7 +186,9 @@ fn create_backend_inner( Backend::Hdfs(hdfs) => { Box::new(HdfsStorage::new(&hdfs.remote, backend_config.hdfs_config)?) } - Backend::Noop(_) => Box::new(NoopStorage::default()) as Box, + Backend::Noop(_) => { + Box::::default() as Box + } #[cfg(feature = "cloud-aws")] Backend::S3(config) => { let mut s = S3Storage::from_input(config.clone())?; @@ -355,7 +357,7 @@ impl ExternalStorage for EncryptedExternalStorage { compression_reader_dispatcher(compression_type, inner)? }; let file_writer: &mut dyn Write = - &mut self.key_manager.create_file_for_write(&restore_name)?; + &mut self.key_manager.create_file_for_write(restore_name)?; let min_read_speed: usize = 8192; let mut input = encrypt_wrap_reader(file_crypter, reader)?; diff --git a/components/external_storage/src/hdfs.rs b/components/external_storage/src/hdfs.rs index 53574633c73..a9fa65dcdcf 100644 --- a/components/external_storage/src/hdfs.rs +++ b/components/external_storage/src/hdfs.rs @@ -101,7 +101,7 @@ impl ExternalStorage for HdfsStorage { } cmd_with_args.extend([&cmd_path, "dfs", "-put", "-", path]); info!("calling hdfs"; "cmd" => ?cmd_with_args); - let mut hdfs_cmd = Command::new(&cmd_with_args[0]) + let mut hdfs_cmd = Command::new(cmd_with_args[0]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::piped()) diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index 60c8cac9c36..51c74ae56a8 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -225,7 +225,7 @@ mod tests { .write(true) .create(true) .custom_flags(O_DIRECT) - .open(&file_path) + .open(file_path) .unwrap(); let w = vec![A512::default(); 8]; let base_local_bytes = id.fetch_io_bytes().unwrap(); diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 36acbc65a91..058b2a3a5f9 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -426,7 +426,7 @@ pub fn reserve_space_for_recover>(data_dir: P, file_size: u64) -> delete_file_if_exist(&path)?; } fn do_reserve(dir: &Path, path: &Path, file_size: u64) -> io::Result<()> { - let f = File::create(&path)?; + let f = File::create(path)?; f.allocate(file_size)?; f.sync_all()?; sync_dir(dir) @@ -483,7 +483,7 @@ mod tests { // Ensure it works for non-existent file. let non_existent_file = dir_path.join("non_existent_file"); - get_file_size(&non_existent_file).unwrap_err(); + get_file_size(non_existent_file).unwrap_err(); } #[test] @@ -504,7 +504,7 @@ mod tests { assert_eq!(file_exists(&existent_file), true); let non_existent_file = dir_path.join("non_existent_file"); - assert_eq!(file_exists(&non_existent_file), false); + assert_eq!(file_exists(non_existent_file), false); } #[test] @@ -525,7 +525,7 @@ mod tests { assert_eq!(file_exists(&existent_file), false); let non_existent_file = dir_path.join("non_existent_file"); - delete_file_if_exist(&non_existent_file).unwrap(); + delete_file_if_exist(non_existent_file).unwrap(); } fn gen_rand_file>(path: P, size: usize) -> u32 { diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 1da553cb22e..35cacf620fc 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -311,7 +311,7 @@ impl RaftLogEngine { if !path.exists() || !path.is_dir() { return false; } - fs::read_dir(&path).unwrap().next().is_some() + fs::read_dir(path).unwrap().next().is_some() } pub fn raft_groups(&self) -> Vec { diff --git a/components/raft_log_engine/src/lib.rs b/components/raft_log_engine/src/lib.rs index 6156771afa8..8eda4e5ae24 100644 --- a/components/raft_log_engine/src/lib.rs +++ b/components/raft_log_engine/src/lib.rs @@ -16,7 +16,6 @@ //! Please read the engine_trait crate docs before hacking. #![cfg_attr(test, feature(test))] -#![feature(generic_associated_types)] #[macro_use] extern crate tikv_util; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 5abdcf31f0f..6fac2d88db0 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -187,7 +187,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.store_ctx .raft_metrics .propose_wait_time - .observe(duration_to_sec(send_time.saturating_elapsed()) as f64); + .observe(duration_to_sec(send_time.saturating_elapsed())); } fn on_tick(&mut self, tick: PeerTick) { diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 2f30ee9873d..15dd6b4afc1 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -22,7 +22,7 @@ // using a standalone modules. #![allow(unused)] -#![feature(let_else)] +#![feature(let_chains)] #![feature(array_windows)] mod batch; diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 12c7d4ec544..58628637159 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -104,7 +104,7 @@ impl Store { ER: RaftEngine, { let region_id = msg.region.id; - let mut raft_msg = Box::new(RaftMessage::default()); + let mut raft_msg = Box::::default(); raft_msg.set_region_id(region_id); raft_msg.set_region_epoch(msg.region.get_region_epoch().clone()); raft_msg.set_to_peer( diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index 7c8bdb369a1..fc23e46e12f 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -96,7 +96,7 @@ fn test_basic_write() { ); // Make it step down and follower should reject write. - let mut msg = Box::new(RaftMessage::default()); + let mut msg = Box::::default(); msg.set_region_id(2); msg.set_to_peer(new_peer(1, 3)); msg.mut_region_epoch().set_conf_ver(INIT_EPOCH_CONF_VER); diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index e905e7e4ac2..ed0ebcc9b8a 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -71,7 +71,7 @@ fn test_life_by_message() { assert_peer_not_exist(test_region_id, test_peer_id, &router); // Build a correct message. - let mut msg = Box::new(RaftMessage::default()); + let mut msg = Box::::default(); msg.set_region_id(test_region_id); msg.set_to_peer(new_peer(1, test_peer_id)); msg.mut_region_epoch().set_conf_ver(1); @@ -147,7 +147,7 @@ fn test_destroy_by_larger_id() { let test_region_id = 4; let test_peer_id = 6; let init_term = 5; - let mut msg = Box::new(RaftMessage::default()); + let mut msg = Box::::default(); msg.set_region_id(test_region_id); msg.set_to_peer(new_peer(1, test_peer_id)); msg.mut_region_epoch().set_conf_ver(1); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 7b968af3c6a..e56678edec2 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -6,7 +6,7 @@ #![feature(min_specialization)] #![feature(box_patterns)] #![feature(hash_drain_filter)] -#![feature(let_else)] +#![feature(let_chains)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 70cf6b67d1f..454cf61a4c8 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -575,7 +575,7 @@ impl Config { let election_timeout = self.raft_base_tick_interval.as_millis() * self.raft_election_timeout_ticks as u64; - let lease = self.raft_store_max_leader_lease.as_millis() as u64; + let lease = self.raft_store_max_leader_lease.as_millis(); if election_timeout < lease { return Err(box_err!( "election timeout {} ms is less than lease {} ms", @@ -584,7 +584,7 @@ impl Config { )); } - let tick = self.raft_base_tick_interval.as_millis() as u64; + let tick = self.raft_base_tick_interval.as_millis(); if lease > election_timeout - tick { return Err(box_err!( "lease {} ms should not be greater than election timeout {} ms - 1 tick({} ms)", @@ -598,7 +598,7 @@ impl Config { return Err(box_err!("raftstore.merge-check-tick-interval can't be 0.")); } - let stale_state_check = self.peer_stale_state_check_interval.as_millis() as u64; + let stale_state_check = self.peer_stale_state_check_interval.as_millis(); if stale_state_check < election_timeout * 2 { return Err(box_err!( "peer stale state check interval {} ms is less than election timeout x 2 {} ms", @@ -613,7 +613,7 @@ impl Config { )); } - let abnormal_leader_missing = self.abnormal_leader_missing_duration.as_millis() as u64; + let abnormal_leader_missing = self.abnormal_leader_missing_duration.as_millis(); if abnormal_leader_missing < stale_state_check { return Err(box_err!( "abnormal leader missing {} ms is less than peer stale state check interval {} ms", @@ -622,7 +622,7 @@ impl Config { )); } - let max_leader_missing = self.max_leader_missing_duration.as_millis() as u64; + let max_leader_missing = self.max_leader_missing_duration.as_millis(); if max_leader_missing < abnormal_leader_missing { return Err(box_err!( "max leader missing {} ms is less than abnormal leader missing {} ms", diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 45eadb0b89f..bd582d1c24a 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -691,7 +691,7 @@ where } let elapsed = t.saturating_elapsed(); - STORE_APPLY_LOG_HISTOGRAM.observe(duration_to_sec(elapsed) as f64); + STORE_APPLY_LOG_HISTOGRAM.observe(duration_to_sec(elapsed)); for mut inspector in std::mem::take(&mut self.pending_latency_inspect) { inspector.record_apply_process(elapsed); inspector.finish(); @@ -5861,7 +5861,7 @@ mod tests { } } let sst_path = import_dir.path().join("test.sst"); - let (mut meta, data) = gen_sst_file_with_kvs(&sst_path, &kvs); + let (mut meta, data) = gen_sst_file_with_kvs(sst_path, &kvs); meta.set_region_id(1); meta.mut_region_epoch().set_conf_ver(1); meta.mut_region_epoch().set_version(3); @@ -5892,7 +5892,7 @@ mod tests { } } let sst_path = import_dir.path().join("test2.sst"); - let (mut meta, data) = gen_sst_file_with_kvs(&sst_path, &kvs); + let (mut meta, data) = gen_sst_file_with_kvs(sst_path, &kvs); meta.set_region_id(1); meta.mut_region_epoch().set_conf_ver(1); meta.mut_region_epoch().set_version(3); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 8c7ef17cfa6..63bb878838c 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -694,7 +694,7 @@ where .raft_metrics .event_time .peer_msg - .observe(duration_to_sec(timer.saturating_elapsed()) as f64); + .observe(duration_to_sec(timer.saturating_elapsed())); } #[inline] diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 0f172b6c70f..28c0db02eee 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -699,7 +699,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> .raft_metrics .event_time .get(tick.tag()) - .observe(duration_to_sec(elapsed) as f64); + .observe(duration_to_sec(elapsed)); slow_log!( elapsed, "[store {}] handle timeout {:?}", @@ -767,7 +767,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> .raft_metrics .event_time .store_msg - .observe(duration_to_sec(timer.saturating_elapsed()) as f64); + .observe(duration_to_sec(timer.saturating_elapsed())); } fn start(&mut self, store: metapb::Store) { diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 262f9fd64c5..a4c6c435741 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -719,6 +719,7 @@ pub struct InspectedRaftMessage { } /// Message that can be sent to a peer. +#[allow(clippy::large_enum_variant)] pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 9995582f13c..8ca5b26d02b 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -557,7 +557,7 @@ impl Snapshot { for (i, file_path) in file_paths.iter().enumerate() { if cf_file.size[i] > 0 { let path = Path::new(file_path); - let file = File::open(&path)?; + let file = File::open(path)?; cf_file .file_for_sending .push(Box::new(file) as Box); @@ -600,7 +600,7 @@ impl Snapshot { let f = OpenOptions::new() .write(true) .create_new(true) - .open(&file_path)?; + .open(file_path)?; cf_file.file_for_recving.push(CfFileForRecving { file: f, encrypter: None, @@ -788,7 +788,7 @@ impl Snapshot { if !for_send && !plain_file_used(cf_file.cf) { sst_importer::prepare_sst_for_ingestion( file_path, - &Path::new(&clone_file_paths[i]), + Path::new(&clone_file_paths[i]), self.mgr.encryption_key_manager.as_deref(), )?; } @@ -972,7 +972,7 @@ impl Snapshot { } else { // delete snapshot files according to meta file for clone_file_path in clone_file_paths { - delete_file_if_exist(&clone_file_path).unwrap(); + delete_file_if_exist(clone_file_path).unwrap(); } } @@ -983,7 +983,7 @@ impl Snapshot { try_delete_snapshot_files!(cf_file, gen_tmp_file_name); } else { for tmp_file_path in tmp_file_paths { - delete_file_if_exist(&tmp_file_path).unwrap(); + delete_file_if_exist(tmp_file_path).unwrap(); } } } @@ -994,7 +994,7 @@ impl Snapshot { try_delete_snapshot_files!(cf_file); } else { for file_path in &file_paths { - delete_file_if_exist(&file_path).unwrap(); + delete_file_if_exist(file_path).unwrap(); } if let Some(ref mgr) = self.mgr.encryption_key_manager { for file_path in &file_paths { @@ -1047,7 +1047,7 @@ impl Snapshot { snap_data.set_version(SNAPSHOT_VERSION); snap_data.set_meta(self.meta_file.meta.as_ref().unwrap().clone()); - SNAPSHOT_BUILD_TIME_HISTOGRAM.observe(duration_to_sec(t.saturating_elapsed()) as f64); + SNAPSHOT_BUILD_TIME_HISTOGRAM.observe(duration_to_sec(t.saturating_elapsed())); SNAPSHOT_KV_COUNT_HISTOGRAM.observe(total_count as f64); SNAPSHOT_SIZE_HISTOGRAM.observe(total_size as f64); info!( @@ -1115,7 +1115,7 @@ impl Snapshot { || (cf_file .file_paths() .iter() - .all(|file_path| file_exists(&Path::new(file_path)))) + .all(|file_path| file_exists(Path::new(file_path)))) }) && file_exists(&self.meta_file.path) } @@ -1184,7 +1184,7 @@ impl Snapshot { let tmp_paths = cf_file.tmp_file_paths(); let paths = cf_file.file_paths(); for (i, tmp_path) in tmp_paths.iter().enumerate() { - file_system::rename(&tmp_path, &paths[i])?; + file_system::rename(tmp_path, &paths[i])?; } } sync_dir(&self.dir_path)?; @@ -1488,7 +1488,7 @@ impl SnapManager { "{}_{}{}{}", DEL_RANGE_PREFIX, sst_id, SST_FILE_SUFFIX, TMP_FILE_SUFFIX ); - let path = PathBuf::from(&self.core.base).join(&filename); + let path = PathBuf::from(&self.core.base).join(filename); path.to_str().unwrap().to_string() } @@ -1802,7 +1802,7 @@ impl SnapManagerCore { } r?; } else { - file_system::rename(&tmp_file_path, &file_paths[i])?; + file_system::rename(tmp_file_path, &file_paths[i])?; } let file = Path::new(&file_paths[i]); let (checksum, size) = calc_checksum_and_size(file, mgr)?; @@ -1957,7 +1957,7 @@ impl TabletSnapManager { pub fn get_tablet_checkpointer_path(&self, key: &TabletSnapKey) -> PathBuf { let prefix = format!("{}_{}", SNAP_GEN_PREFIX, key); - PathBuf::from(&self.base).join(&prefix) + PathBuf::from(&self.base).join(prefix) } } diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 31bf3156c58..3cdee1e40f1 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -152,7 +152,7 @@ where Ok(new_sst_writer) => { let old_writer = sst_writer.replace(new_sst_writer); box_try!(old_writer.finish()); - box_try!(File::open(&prev_path).and_then(|f| f.sync_all())); + box_try!(File::open(prev_path).and_then(|f| f.sync_all())); } Err(e) => { let io_error = io::Error::new(io::ErrorKind::Other, e); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index a211a8f0a60..7e00daa2764 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -361,7 +361,7 @@ impl RegionInfo { if n == 0 || self.key_ranges.len() < self.sample_num { self.key_ranges.push(key_range); } else { - let j = rand::thread_rng().gen_range(0..n) as usize; + let j = rand::thread_rng().gen_range(0..n); if j < self.sample_num { self.key_ranges[j] = key_range; } diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index da28758a5d2..7802108b92b 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -63,7 +63,7 @@ fn test_resolved_ts_basic() { sst_epoch.set_conf_ver(1); sst_epoch.set_version(4); - let (mut meta, data) = gen_sst_file(&sst_path, sst_range); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); meta.set_region_id(r1.id); meta.set_region_epoch(sst_epoch); diff --git a/components/resource_metering/src/recorder/mod.rs b/components/resource_metering/src/recorder/mod.rs index 9ed6acfb74f..f0b2e88ee4e 100644 --- a/components/resource_metering/src/recorder/mod.rs +++ b/components/resource_metering/src/recorder/mod.rs @@ -303,8 +303,8 @@ pub fn init_recorder( ) { let recorder = RecorderBuilder::default() .precision_ms(precision_ms) - .add_sub_recorder(Box::new(CpuRecorder::default())) - .add_sub_recorder(Box::new(SummaryRecorder::default())) + .add_sub_recorder(Box::::default()) + .add_sub_recorder(Box::::default()) .build(); let mut recorder_worker = WorkerBuilder::new("resource-metering-recorder") .pending_capacity(256) diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index c0be3ba276b..cc87469426c 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -317,7 +317,7 @@ mod tests { .iter() .enumerate() { - fs::write(f, &[id as u8]).unwrap(); + fs::write(f, [id as u8]).unwrap(); } let mut c = cfg.clone(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 82973946d96..e4f4dc83049 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -473,7 +473,7 @@ where let cur_port = cur_addr.port(); let lock_dir = get_lock_dir(); - let search_base = env::temp_dir().join(&lock_dir); + let search_base = env::temp_dir().join(lock_dir); file_system::create_dir_all(&search_base) .unwrap_or_else(|_| panic!("create {} failed", search_base.display())); @@ -542,7 +542,7 @@ where disk::set_disk_reserved_space(reserve_space); let path = Path::new(&self.config.storage.data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); - if let Err(e) = file_system::remove_file(&path) { + if let Err(e) = file_system::remove_file(path) { warn!("failed to remove space holder on starting: {}", e); } @@ -1480,7 +1480,7 @@ where .join(Path::new(file_system::SPACE_PLACEHOLDER_FILE)); let placeholder_size: u64 = - file_system::get_file_size(&placeholer_file_path).unwrap_or(0); + file_system::get_file_size(placeholer_file_path).unwrap_or(0); let used_size = snap_size + kv_size + raft_size + placeholder_size; let capacity = if config_disk_capacity == 0 || disk_cap < config_disk_capacity { diff --git a/components/server/src/signal_handler.rs b/components/server/src/signal_handler.rs index 88c2ddac9f4..a92845b843d 100644 --- a/components/server/src/signal_handler.rs +++ b/components/server/src/signal_handler.rs @@ -13,7 +13,7 @@ mod imp { #[allow(dead_code)] pub fn wait_for_signal(engines: Option>) { - let mut signals = Signals::new(&[SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]).unwrap(); + let mut signals = Signals::new([SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]).unwrap(); for signal in &mut signals { match signal { SIGTERM | SIGINT | SIGHUP => { diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index c4a0498a9a6..f766729a066 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -247,9 +247,9 @@ impl ImportDir { /// Make an import path base on the basic path and the file name. pub fn get_import_path(&self, file_name: &str) -> Result { - let save_path = self.root_dir.join(&file_name); - let temp_path = self.temp_dir.join(&file_name); - let clone_path = self.clone_dir.join(&file_name); + let save_path = self.root_dir.join(file_name); + let temp_path = self.temp_dir.join(file_name); + let clone_path = self.clone_dir.join(file_name); Ok(ImportPath { save: save_path, temp: temp_path, @@ -276,7 +276,7 @@ impl ImportDir { pub fn delete_file(&self, path: &Path, key_manager: Option<&DataKeyManager>) -> Result<()> { if path.exists() { - file_system::remove_file(&path)?; + file_system::remove_file(path)?; if let Some(manager) = key_manager { manager.delete_file(path.to_str().unwrap())?; } @@ -515,7 +515,7 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let new_meta = path_to_sst_meta(&path).unwrap(); + let new_meta = path_to_sst_meta(path).unwrap(); assert_eq!(meta, new_meta); } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 947d7e98e0c..abd616c5bc9 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -877,7 +877,7 @@ mod tests { for (i, &range) in cases.iter().enumerate() { let path = temp_dir.path().join(format!("{}.sst", i)); - let (meta, data) = gen_sst_file(&path, range); + let (meta, data) = gen_sst_file(path, range); let mut f = dir.create(&meta, key_manager.clone()).unwrap(); f.append(&data).unwrap(); diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index a45a3f52462..e990924c638 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -256,7 +256,7 @@ impl TestSuite { let mut batch = Vec::with_capacity(1024); let mut keys = Vec::with_capacity(1024); // Write 50 times to include more different ts. - let batch_size = cmp::min(cmp::max(key_count / 50, 1), 1024); + let batch_size = (key_count / 50).clamp(1, 1024); for _ in 0..versions { let mut j = 0; while j < key_count { diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 00c12073511..06ff550aa64 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -273,7 +273,7 @@ pub struct DefaultFilterFactory(PhantomData); impl FilterFactory for DefaultFilterFactory { fn generate(&self, _: u64) -> Vec> { - vec![Box::new(F::default())] + vec![Box::::default()] } } diff --git a/components/test_util/src/runner.rs b/components/test_util/src/runner.rs index d05f7e98879..ee2b6548c23 100644 --- a/components/test_util/src/runner.rs +++ b/components/test_util/src/runner.rs @@ -61,11 +61,11 @@ pub fn run_test_with_hook(cases: &[&TestDescAndFn], hook: impl TestHook + Send + let f = match case.testfn { TestFn::StaticTestFn(f) => TestFn::DynTestFn(Box::new(move || { let _watcher = CaseLifeWatcher::new(name.clone(), hook.clone()); - f(); + f() })), TestFn::StaticBenchFn(f) => TestFn::DynBenchFn(Box::new(move |b| { let _watcher = CaseLifeWatcher::new(name.clone(), hook.clone()); - f(b); + f(b) })), ref f => panic!("unexpected testfn {:?}", f), }; diff --git a/components/tidb_query_aggr/src/impl_max_min.rs b/components/tidb_query_aggr/src/impl_max_min.rs index f4046c35440..c18710b3645 100644 --- a/components/tidb_query_aggr/src/impl_max_min.rs +++ b/components/tidb_query_aggr/src/impl_max_min.rs @@ -514,10 +514,10 @@ where self.extremum = value.copied() } } else { - let v1 = self.extremum.map(|x| x as i64); - let v2 = value.map(|x| *x as i64); + let v1: Option = self.extremum; + let v2: Option = value.copied(); if v1.cmp(&v2) == E::ORD { - self.extremum = value.copied() + self.extremum = v2; } } } diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index 864fce9afd8..dfdede3a3b3 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -385,7 +385,7 @@ impl parse::Parse for RpnFnAttr { )); } - if !is_varg && !is_raw_varg && (min_args != None || max_args != None) { + if !is_varg && !is_raw_varg && (min_args.is_some() || max_args.is_some()) { return Err(Error::new_spanned( config_items, "`min_args` or `max_args` is only available when `varg` or `raw_varg` presents", diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 26ae799c4ff..418841547ca 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -186,7 +186,7 @@ pub fn integer_signed_lower_bound(tp: FieldTypeTp) -> i64 { /// `truncate_binary` truncates a buffer to the specified length. #[inline] pub fn truncate_binary(s: &mut Vec, flen: isize) { - if flen != crate::UNSPECIFIED_LENGTH as isize && s.len() > flen as usize { + if flen != crate::UNSPECIFIED_LENGTH && s.len() > flen as usize { s.truncate(flen as usize); } } @@ -431,7 +431,7 @@ impl ToInt for Decimal { fn to_int(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { let dec = round_decimal_with_ctx(ctx, *self)?; let val = dec.as_i64(); - let err = Error::truncated_wrong_val("DECIMAL", &dec); + let err = Error::truncated_wrong_val("DECIMAL", dec); let r = val.into_result_with_overflow_err(ctx, err)?; r.to_int(ctx, tp) } @@ -440,7 +440,7 @@ impl ToInt for Decimal { fn to_uint(&self, ctx: &mut EvalContext, tp: FieldTypeTp) -> Result { let dec = round_decimal_with_ctx(ctx, *self)?; let val = dec.as_u64(); - let err = Error::truncated_wrong_val("DECIMAL", &dec); + let err = Error::truncated_wrong_val("DECIMAL", dec); let r = val.into_result_with_overflow_err(ctx, err)?; r.to_uint(ctx, tp) } @@ -639,7 +639,7 @@ pub fn produce_dec_with_specified_tp( // select (cast 111 as decimal(1)) causes a warning in MySQL. ctx.handle_overflow_err(Error::overflow( "Decimal", - &format!("({}, {})", flen, decimal), + format!("({}, {})", flen, decimal), ))?; dec = max_or_min_dec(dec.is_negative(), flen as u8, decimal as u8) } else if frac != decimal { @@ -648,7 +648,7 @@ pub fn produce_dec_with_specified_tp( .round(decimal as i8, RoundMode::HalfEven) .into_result_with_overflow_err( ctx, - Error::overflow("Decimal", &format!("({}, {})", flen, decimal)), + Error::overflow("Decimal", format!("({}, {})", flen, decimal)), )?; if !rounded.is_zero() && frac > decimal && rounded != old { if ctx.cfg.flag.contains(Flag::IN_INSERT_STMT) @@ -811,7 +811,7 @@ impl ConvertTo for &[u8] { .map_err(|err| -> Error { box_err!("Parse '{}' to float err: {:?}", vs, err) })?; // The `parse` will return Ok(inf) if the float string literal out of range if val.is_infinite() { - ctx.handle_truncate_err(Error::truncated_wrong_val("DOUBLE", &vs))?; + ctx.handle_truncate_err(Error::truncated_wrong_val("DOUBLE", vs))?; if val.is_sign_negative() { return Ok(f64::MIN); } else { @@ -1036,7 +1036,7 @@ fn exp_float_str_to_int_str<'a>( // And the intCnt may contain the len of `+/-`, // so here we use 21 here as the early detection. ctx.warnings - .append_warning(Error::overflow("BIGINT", &valid_float)); + .append_warning(Error::overflow("BIGINT", valid_float)); return Cow::Borrowed(valid_float); } if int_cnt <= 0 { diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 135a3cd2ce7..143ec6c7760 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -373,11 +373,11 @@ fn do_sub<'a>(mut lhs: &'a Decimal, mut rhs: &'a Decimal) -> Res { } let mut carry = 0; let mut res = res.map(|_| Decimal::new(int_cnt, frac_cnt, negative)); - let mut l_idx = l_start + l_int_word_cnt as usize + l_frac_word_cnt as usize; - let mut r_idx = r_start + r_int_word_cnt as usize + r_frac_word_cnt as usize; + let mut l_idx = l_start + l_int_word_cnt + l_frac_word_cnt as usize; + let mut r_idx = r_start + r_int_word_cnt + r_frac_word_cnt as usize; // adjust `l_idx` and `r_idx` to the same position of digits after the point. if l_frac_word_cnt > r_frac_word_cnt { - let l_stop = l_start + l_int_word_cnt as usize + r_frac_word_cnt as usize; + let l_stop = l_start + l_int_word_cnt + r_frac_word_cnt as usize; if l_frac_word_cnt < frac_word_to { // It happens only when suffix 0 exist(3.10000000000-2.00). idx_to -= (frac_word_to - l_frac_word_cnt) as usize; @@ -388,7 +388,7 @@ fn do_sub<'a>(mut lhs: &'a Decimal, mut rhs: &'a Decimal) -> Res { res.word_buf[idx_to] = lhs.word_buf[l_idx]; } } else { - let r_stop = r_start + r_int_word_cnt as usize + l_frac_word_cnt as usize; + let r_stop = r_start + r_int_word_cnt + l_frac_word_cnt as usize; if frac_word_to > r_frac_word_cnt { // It happens only when suffix 0 exist(3.00-2.00000000000). idx_to -= (frac_word_to - r_frac_word_cnt) as usize; @@ -802,7 +802,7 @@ fn do_mul(lhs: &Decimal, rhs: &Decimal) -> Res { word_cnt!(lhs.int_cnt + rhs.int_cnt) as usize, l_frac_word_cnt + r_frac_word_cnt, ); - let (mut old_int_word_to, mut old_frac_word_to) = (int_word_to as i32, frac_word_to as i32); + let (mut old_int_word_to, mut old_frac_word_to) = (int_word_to as i32, frac_word_to); let res = fix_word_cnt_err(int_word_to as u8, frac_word_to as u8, WORD_BUF_LEN); let (int_word_to, frac_word_to) = (res.0 as usize, res.1 as usize); let negative = lhs.negative != rhs.negative; @@ -1623,7 +1623,7 @@ impl Decimal { let mut inner_idx = 0; let mut word_idx = int_word_cnt as usize; let mut word = 0; - for c in bs[int_idx - int_cnt as usize..int_idx].iter().rev() { + for c in bs[int_idx - int_cnt..int_idx].iter().rev() { word += u32::from(c - b'0') * TEN_POW[inner_idx]; inner_idx += 1; if inner_idx == DIGITS_PER_WORD as usize { @@ -1642,7 +1642,7 @@ impl Decimal { word_idx = int_word_cnt as usize; word = 0; inner_idx = 0; - for &c in bs.iter().skip(int_idx + 1).take(frac_cnt as usize) { + for &c in bs.iter().skip(int_idx + 1).take(frac_cnt) { word = u32::from(c - b'0') + word * 10; inner_idx += 1; if inner_idx == DIGITS_PER_WORD as usize { @@ -2389,7 +2389,7 @@ impl Hash for Decimal { while idx < stop && self.word_buf[idx] == 0 { idx += 1; } - let start = idx as usize; + let start = idx; let int_word_cnt = stop - idx; int_word_cnt.hash(state); diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 520c985f4b5..7279f788146 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -1070,7 +1070,7 @@ mod tests { #[test] fn test_checked_add_and_sub_duration() { /// `MAX_TIME_IN_SECS` is the maximum for mysql time type. - const MAX_TIME_IN_SECS: i64 = MAX_HOUR_PART as i64 * SECS_PER_HOUR as i64 + const MAX_TIME_IN_SECS: i64 = MAX_HOUR_PART as i64 * SECS_PER_HOUR + MAX_MINUTE_PART as i64 * SECS_PER_MINUTE + MAX_SECOND_PART as i64; @@ -1110,7 +1110,7 @@ mod tests { // UNSPECIFIED_FSP ( 8385959, - UNSPECIFIED_FSP as i8, + UNSPECIFIED_FSP, Ok(Duration::parse(&mut EvalContext::default(), "838:59:59", 0).unwrap()), false, ), diff --git a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs index 734ec1d4115..c965247b8da 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/binary.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/binary.rs @@ -82,7 +82,7 @@ impl<'a> JsonRef<'a> { pub fn val_entry_get(&self, val_entry_off: usize) -> Result> { let val_type: JsonType = self.value()[val_entry_off].try_into()?; let val_offset = - NumberCodec::decode_u32_le(&self.value()[val_entry_off + TYPE_LEN as usize..]) as usize; + NumberCodec::decode_u32_le(&self.value()[val_entry_off + TYPE_LEN..]) as usize; Ok(match val_type { JsonType::Literal => { let offset = val_entry_off + TYPE_LEN; diff --git a/components/tidb_query_datatype/src/codec/mysql/time/extension.rs b/components/tidb_query_datatype/src/codec/mysql/time/extension.rs index 7cc233e92d1..9289625ad84 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/extension.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/extension.rs @@ -95,7 +95,7 @@ impl DateTimeExtension for Time { } if week_year && days >= 52 * 7 { - weekday = (weekday + calc_days_in_year(year as i32)) % 7; + weekday = (weekday + calc_days_in_year(year)) % 7; if (!first_weekday && weekday < 4) || (first_weekday && weekday == 0) { year += 1; return (year, 1); diff --git a/components/tidb_query_datatype/src/codec/overflow.rs b/components/tidb_query_datatype/src/codec/overflow.rs index b1329e989c7..4a81b23a995 100644 --- a/components/tidb_query_datatype/src/codec/overflow.rs +++ b/components/tidb_query_datatype/src/codec/overflow.rs @@ -13,7 +13,7 @@ pub fn div_i64(a: i64, b: i64) -> Result { match a.overflowing_div(b) { (_res, true) => Err(Error::overflow( "UNSIGNED BIGINT", - &format!("({} / {})", a, b), + format!("({} / {})", a, b), )), (res, false) => Ok(res), } @@ -31,7 +31,7 @@ pub fn div_u64_with_i64(a: u64, b: i64) -> Result { if a != 0 && (b.overflowing_neg().0 as u64) <= a { Err(Error::overflow( "UNSIGNED BIGINT", - &format!("({} / {})", a, b), + format!("({} / {})", a, b), )) } else { Ok(0) @@ -53,7 +53,7 @@ pub fn div_i64_with_u64(a: i64, b: u64) -> Result { if a.overflowing_neg().0 as u64 >= b { Err(Error::overflow( "UNSIGNED BIGINT", - &format!("({} / {})", a, b), + format!("({} / {})", a, b), )) } else { Ok(0) diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 0c995487b3d..00f6c22347b 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -647,7 +647,7 @@ mod tests { let mut ctx = EvalContext::default(); let col_ids: Vec<_> = row.iter().map(|(&id, _)| id).collect(); - let col_values: Vec<_> = row.iter().map(|(_, v)| v.clone()).collect(); + let col_values: Vec<_> = row.values().cloned().collect(); let mut col_encoded: HashMap<_, _> = row .iter() .map(|(k, v)| { diff --git a/components/tidb_query_datatype/src/expr/ctx.rs b/components/tidb_query_datatype/src/expr/ctx.rs index 758f7b13736..c17cb7af922 100644 --- a/components/tidb_query_datatype/src/expr/ctx.rs +++ b/components/tidb_query_datatype/src/expr/ctx.rs @@ -143,7 +143,7 @@ impl EvalConfig { self.tz = tz; Ok(self) } - None => Err(Error::invalid_timezone(&format!("offset {}s", offset_sec))), + None => Err(Error::invalid_timezone(format!("offset {}s", offset_sec))), } } @@ -300,7 +300,7 @@ impl EvalContext { } let orig_str = String::from_utf8_lossy(bytes); self.warnings - .append_warning(Error::truncated_wrong_val("INTEGER", &orig_str)); + .append_warning(Error::truncated_wrong_val("INTEGER", orig_str)); if negative { Ok(i64::MIN) } else { diff --git a/components/tidb_query_executors/src/simple_aggr_executor.rs b/components/tidb_query_executors/src/simple_aggr_executor.rs index 75790428187..b6717a40fb5 100644 --- a/components/tidb_query_executors/src/simple_aggr_executor.rs +++ b/components/tidb_query_executors/src/simple_aggr_executor.rs @@ -207,7 +207,7 @@ impl AggregationExecutorImpl for SimpleAggregationImpl #[inline] fn groups_len(&self) -> usize { - if self.has_input_rows { 1 } else { 0 } + self.has_input_rows as usize } #[inline] diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index 06dc1ce956b..6ef8c6b2224 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -98,7 +98,7 @@ impl BatchTopNExecutor { Self { heap: BinaryHeap::new(), - eval_columns_buffer_unsafe: Box::new(Vec::new()), + eval_columns_buffer_unsafe: Box::>::default(), order_exprs: order_exprs.into_boxed_slice(), order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), order_is_desc: order_is_desc.into_boxed_slice(), @@ -127,7 +127,7 @@ impl BatchTopNExecutor { Self { heap: BinaryHeap::new(), - eval_columns_buffer_unsafe: Box::new(Vec::new()), + eval_columns_buffer_unsafe: Box::>::default(), order_exprs: order_exprs.into_boxed_slice(), order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), order_is_desc: order_is_desc.into_boxed_slice(), diff --git a/components/tidb_query_expr/src/impl_arithmetic.rs b/components/tidb_query_expr/src/impl_arithmetic.rs index 01776c1ad7a..2f48fec4693 100644 --- a/components/tidb_query_expr/src/impl_arithmetic.rs +++ b/components/tidb_query_expr/src/impl_arithmetic.rs @@ -44,7 +44,7 @@ impl ArithmeticOp for IntIntPlus { fn calc(lhs: &Int, rhs: &Int) -> Result> { lhs.checked_add(*rhs) - .ok_or_else(|| Error::overflow("BIGINT", &format!("({} + {})", lhs, rhs)).into()) + .ok_or_else(|| Error::overflow("BIGINT", format!("({} + {})", lhs, rhs)).into()) .map(Some) } } @@ -61,10 +61,8 @@ impl ArithmeticOp for IntUintPlus { } else { (*rhs as u64).checked_sub(lhs.overflowing_neg().0 as u64) }; - res.ok_or_else(|| { - Error::overflow("BIGINT UNSIGNED", &format!("({} + {})", lhs, rhs)).into() - }) - .map(|v| Some(v as i64)) + res.ok_or_else(|| Error::overflow("BIGINT UNSIGNED", format!("({} + {})", lhs, rhs)).into()) + .map(|v| Some(v as i64)) } } @@ -89,7 +87,7 @@ impl ArithmeticOp for UintUintPlus { (*lhs as u64) .checked_add(*rhs as u64) .ok_or_else(|| { - Error::overflow("BIGINT UNSIGNED", &format!("({} + {})", lhs, rhs)).into() + Error::overflow("BIGINT UNSIGNED", format!("({} + {})", lhs, rhs)).into() }) .map(|v| Some(v as i64)) } @@ -104,7 +102,7 @@ impl ArithmeticOp for RealPlus { fn calc(lhs: &Real, rhs: &Real) -> Result> { let res = *lhs + *rhs; if !res.is_finite() { - return Err(Error::overflow("DOUBLE", &format!("({} + {})", lhs, rhs)).into()); + return Err(Error::overflow("DOUBLE", format!("({} + {})", lhs, rhs)).into()); } Ok(Some(res)) } @@ -130,7 +128,7 @@ impl ArithmeticOp for IntIntMinus { fn calc(lhs: &Int, rhs: &Int) -> Result> { lhs.checked_sub(*rhs) - .ok_or_else(|| Error::overflow("BIGINT", &format!("({} - {})", lhs, rhs)).into()) + .ok_or_else(|| Error::overflow("BIGINT", format!("({} - {})", lhs, rhs)).into()) .map(Some) } } @@ -145,10 +143,10 @@ impl ArithmeticOp for IntUintMinus { if *lhs >= 0 { (*lhs as u64) .checked_sub(*rhs as u64) - .ok_or_else(|| Error::overflow("BIGINT", &format!("({} - {})", lhs, rhs)).into()) + .ok_or_else(|| Error::overflow("BIGINT", format!("({} - {})", lhs, rhs)).into()) .map(|v| Some(v as i64)) } else { - Err(Error::overflow("BIGINT", &format!("({} - {})", lhs, rhs)).into()) + Err(Error::overflow("BIGINT", format!("({} - {})", lhs, rhs)).into()) } } } @@ -165,7 +163,7 @@ impl ArithmeticOp for UintIntMinus { } else { (*lhs as u64).checked_add(rhs.overflowing_neg().0 as u64) }; - res.ok_or_else(|| Error::overflow("BIGINT", &format!("({} - {})", lhs, rhs)).into()) + res.ok_or_else(|| Error::overflow("BIGINT", format!("({} - {})", lhs, rhs)).into()) .map(|v| Some(v as i64)) } } @@ -180,7 +178,7 @@ impl ArithmeticOp for UintUintMinus { (*lhs as u64) .checked_sub(*rhs as u64) .ok_or_else(|| { - Error::overflow("BIGINT UNSIGNED", &format!("({} - {})", lhs, rhs)).into() + Error::overflow("BIGINT UNSIGNED", format!("({} - {})", lhs, rhs)).into() }) .map(|v| Some(v as i64)) } @@ -195,7 +193,7 @@ impl ArithmeticOp for RealMinus { fn calc(lhs: &Real, rhs: &Real) -> Result> { let res = *lhs - *rhs; if !res.is_finite() { - return Err(Error::overflow("DOUBLE", &format!("({} - {})", lhs, rhs)).into()); + return Err(Error::overflow("DOUBLE", format!("({} - {})", lhs, rhs)).into()); } Ok(Some(res)) } @@ -332,7 +330,7 @@ impl ArithmeticOp for RealMultiply { fn calc(lhs: &Real, rhs: &Real) -> Result> { let res = *lhs * *rhs; if res.is_infinite() { - Err(Error::overflow("REAL", &format!("({} * {})", lhs, rhs)).into()) + Err(Error::overflow("REAL", format!("({} * {})", lhs, rhs)).into()) } else { Ok(Some(res)) } @@ -346,7 +344,7 @@ impl ArithmeticOp for IntIntMultiply { type T = Int; fn calc(lhs: &Int, rhs: &Int) -> Result> { lhs.checked_mul(*rhs) - .ok_or_else(|| Error::overflow("BIGINT", &format!("({} * {})", lhs, rhs)).into()) + .ok_or_else(|| Error::overflow("BIGINT", format!("({} * {})", lhs, rhs)).into()) .map(Some) } } @@ -362,7 +360,7 @@ impl ArithmeticOp for IntUintMultiply { } else { None } - .ok_or_else(|| Error::overflow("BIGINT UNSIGNED", &format!("({} * {})", lhs, rhs)).into()) + .ok_or_else(|| Error::overflow("BIGINT UNSIGNED", format!("({} * {})", lhs, rhs)).into()) .map(Some) } } @@ -386,7 +384,7 @@ impl ArithmeticOp for UintUintMultiply { (*lhs as u64) .checked_mul(*rhs as u64) .ok_or_else(|| { - Error::overflow("BIGINT UNSIGNED", &format!("({} * {})", lhs, rhs)).into() + Error::overflow("BIGINT UNSIGNED", format!("({} * {})", lhs, rhs)).into() }) .map(|v| Some(v as i64)) } @@ -500,7 +498,7 @@ impl ArithmeticOpWithCtx for RealDivide { } else { let result = *lhs / *rhs; if result.is_infinite() { - ctx.handle_overflow_err(Error::overflow("DOUBLE", &format!("{} / {}", lhs, rhs))) + ctx.handle_overflow_err(Error::overflow("DOUBLE", format!("{} / {}", lhs, rhs))) .map(|_| None)? } else { Some(result) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 81a08b95e94..76e90f79c5b 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -242,7 +242,7 @@ pub fn get_cast_fn_rpn_node( func_meta, args_len: 1, field_type: to_field_type, - metadata: Box::new(tipb::InUnionMetadata::default()), + metadata: Box::::default(), }) } @@ -373,7 +373,7 @@ fn cast_string_as_int( ctx.warnings .append_warning(Error::cast_neg_int_as_unsigned()); } - Ok(Some(x as i64)) + Ok(Some(x)) } Err(err) => match *err.kind() { IntErrorKind::PosOverflow | IntErrorKind::NegOverflow => { @@ -3118,7 +3118,7 @@ mod tests { (Json::from_bool(false).unwrap(), 0, false, false), (Json::none().unwrap(), 0, false, false), ( - Json::from_f64(((1u64 << 63) + (1u64 << 62)) as u64 as f64).unwrap(), + Json::from_f64(((1u64 << 63) + (1u64 << 62)) as f64).unwrap(), i64::MAX, true, false, @@ -4341,7 +4341,7 @@ mod tests { test_as_string_helper( ref_cs, |ctx, extra, val| { - let val = val.map(|x| *x as i64); + let val = val.copied(); cast_year_as_string(ctx, extra, &val.unwrap()) }, "cast_year_as_string", @@ -5026,10 +5026,8 @@ mod tests { let expect = match res_type { ResType::Zero => Decimal::zero(), ResType::Same => base_res, - ResType::TruncateToMax => max_decimal(res_flen as u8, res_decimal as u8), - ResType::TruncateToMin => { - max_or_min_dec(true, res_flen as u8, res_decimal as u8) - } + ResType::TruncateToMax => max_decimal(res_flen, res_decimal), + ResType::TruncateToMin => max_or_min_dec(true, res_flen, res_decimal), ResType::Round => { let r = base_res .round(res_decimal as i8, RoundMode::HalfEven) @@ -6697,7 +6695,7 @@ mod tests { Json::from_f64(i64::MAX as u64 as f64).unwrap(), Json::from_f64(i64::MIN as u64 as f64).unwrap(), Json::from_f64(i64::MIN as f64).unwrap(), - Json::from_f64(((1u64 << 63) + (1u64 << 62)) as u64 as f64).unwrap(), + Json::from_f64(((1u64 << 63) + (1u64 << 62)) as f64).unwrap(), Json::from_f64(-((1u64 << 63) as f64 + (1u64 << 62) as f64)).unwrap(), Json::from_f64(f64::from(f32::MIN)).unwrap(), Json::from_f64(f64::from(f32::MAX)).unwrap(), diff --git a/components/tidb_query_expr/src/impl_compare.rs b/components/tidb_query_expr/src/impl_compare.rs index a8dbf96d1cb..3eae996f249 100644 --- a/components/tidb_query_expr/src/impl_compare.rs +++ b/components/tidb_query_expr/src/impl_compare.rs @@ -361,7 +361,7 @@ pub fn greatest_cmp_string_as_time( Ok(t) => greatest = max(greatest, Some(t)), Err(_) => { return ctx - .handle_invalid_time_error(Error::invalid_time_format(&s)) + .handle_invalid_time_error(Error::invalid_time_format(s)) .map(|_| Ok(None))?; } } @@ -398,7 +398,7 @@ pub fn least_cmp_string_as_time( Ok(t) => least = min(least, Some(t)), Err(_) => { return ctx - .handle_invalid_time_error(Error::invalid_time_format(&s)) + .handle_invalid_time_error(Error::invalid_time_format(s)) .map(|_| Ok(None))?; } } @@ -434,7 +434,7 @@ pub fn greatest_cmp_string_as_date( Ok(t) => greatest = max(greatest, Some(t)), Err(_) => { return ctx - .handle_invalid_time_error(Error::invalid_time_format(&s)) + .handle_invalid_time_error(Error::invalid_time_format(s)) .map(|_| Ok(None))?; } } @@ -471,7 +471,7 @@ pub fn least_cmp_string_as_date( Ok(t) => least = min(least, Some(t)), Err(_) => { return ctx - .handle_invalid_time_error(Error::invalid_time_format(&s)) + .handle_invalid_time_error(Error::invalid_time_format(s)) .map(|_| Ok(None))?; } } diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index abd190d077a..beeeef288b4 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -226,7 +226,7 @@ impl Floor for FloorIntToInt { #[inline] fn abs_int(arg: &Int) -> Result> { match arg.checked_abs() { - None => Err(Error::overflow("BIGINT", &format!("abs({})", *arg)).into()), + None => Err(Error::overflow("BIGINT", format!("abs({})", *arg)).into()), Some(arg_abs) => Ok(Some(arg_abs)), } } @@ -288,7 +288,7 @@ fn radians(arg: &Real) -> Result> { pub fn exp(arg: &Real) -> Result> { let ret = arg.exp(); if ret.is_infinite() { - Err(Error::overflow("DOUBLE", &format!("exp({})", arg)).into()) + Err(Error::overflow("DOUBLE", format!("exp({})", arg)).into()) } else { Ok(Real::new(ret).ok()) } diff --git a/components/tidb_query_expr/src/impl_op.rs b/components/tidb_query_expr/src/impl_op.rs index 9081f623b8e..5289f427e93 100644 --- a/components/tidb_query_expr/src/impl_op.rs +++ b/components/tidb_query_expr/src/impl_op.rs @@ -64,7 +64,7 @@ pub fn unary_minus_uint(arg: Option<&Int>) -> Result> { Some(val) => { let uval = *val as u64; match uval.cmp(&(i64::MAX as u64 + 1)) { - Greater => Err(Error::overflow("BIGINT", &format!("-{}", uval)).into()), + Greater => Err(Error::overflow("BIGINT", format!("-{}", uval)).into()), Equal => Ok(Some(i64::MIN)), Less => Ok(Some(-*val)), } @@ -79,7 +79,7 @@ pub fn unary_minus_int(arg: Option<&Int>) -> Result> { match arg { Some(val) => { if *val == i64::MIN { - Err(Error::overflow("BIGINT", &format!("-{}", *val)).into()) + Err(Error::overflow("BIGINT", format!("-{}", *val)).into()) } else { Ok(Some(-*val)) } diff --git a/components/tidb_query_expr/src/impl_time.rs b/components/tidb_query_expr/src/impl_time.rs index 0f55e21bab5..aca40b658d6 100644 --- a/components/tidb_query_expr/src/impl_time.rs +++ b/components/tidb_query_expr/src/impl_time.rs @@ -256,7 +256,7 @@ pub fn add_string_and_duration( return match arg0.checked_add(*arg1) { Some(result) => Ok(writer.write(Some(duration_to_string(result).into_bytes()))), None => ctx - .handle_overflow_err(Error::overflow("DURATION", &format!("{} + {}", arg0, arg1))) + .handle_overflow_err(Error::overflow("DURATION", format!("{} + {}", arg0, arg1))) .map(|_| Ok(writer.write(None)))?, }; }; @@ -264,7 +264,7 @@ pub fn add_string_and_duration( return match arg0.checked_add(ctx, *arg1) { Some(result) => Ok(writer.write(Some(datetime_to_string(result).into_bytes()))), None => ctx - .handle_overflow_err(Error::overflow("DATETIME", &format!("{} + {}", arg0, arg1))) + .handle_overflow_err(Error::overflow("DATETIME", format!("{} + {}", arg0, arg1))) .map(|_| Ok(writer.write(None)))?, }; }; @@ -286,7 +286,7 @@ pub fn sub_string_and_duration( return match arg0.checked_sub(*arg1) { Some(result) => Ok(writer.write(Some(duration_to_string(result).into_bytes()))), None => ctx - .handle_overflow_err(Error::overflow("DURATION", &format!("{} - {}", arg0, arg1))) + .handle_overflow_err(Error::overflow("DURATION", format!("{} - {}", arg0, arg1))) .map(|_| Ok(writer.write(None)))?, }; }; @@ -294,7 +294,7 @@ pub fn sub_string_and_duration( return match arg0.checked_sub(ctx, *arg1) { Some(result) => Ok(writer.write(Some(datetime_to_string(result).into_bytes()))), None => ctx - .handle_overflow_err(Error::overflow("DATETIME", &format!("{} - {}", arg0, arg1))) + .handle_overflow_err(Error::overflow("DATETIME", format!("{} - {}", arg0, arg1))) .map(|_| Ok(writer.write(None)))?, }; }; diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 9d4eb4a8370..32f15786f79 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -6,7 +6,6 @@ //! [`RocksEngine`](RocksEngine) are used for testing only. #![feature(min_specialization)] -#![feature(generic_associated_types)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/tikv_util/src/buffer_vec.rs b/components/tikv_util/src/buffer_vec.rs index d2247c011ec..78196577366 100644 --- a/components/tikv_util/src/buffer_vec.rs +++ b/components/tikv_util/src/buffer_vec.rs @@ -429,7 +429,7 @@ mod tests { assert_eq!(format!("{:?}", v), "[]"); assert!(v.is_empty()); - v.push(&[0xAA, 0x0, 0xB]); + v.push([0xAA, 0x0, 0xB]); assert_eq!(v.len(), 1); assert_eq!(v.total_len(), 3); assert!(!v.is_empty()); @@ -475,7 +475,7 @@ mod tests { assert!(v.is_empty()); assert_eq!(format!("{:?}", v), "[]"); - v.push(&[0xCA, 0xB]); + v.push([0xCA, 0xB]); assert_eq!(v.len(), 1); assert_eq!(v.total_len(), 2); assert!(!v.is_empty()); @@ -488,8 +488,8 @@ mod tests { assert!(v.is_empty()); assert_eq!(format!("{:?}", v), "[]"); - v.push(&[0xCA, 0xB]); - v.push(&[]); + v.push([0xCA, 0xB]); + v.push([]); assert_eq!(v.len(), 2); assert_eq!(v.total_len(), 2); assert!(!v.is_empty()); @@ -503,8 +503,8 @@ mod tests { assert_eq!(v[0], [0xCA, 0xB]); assert_eq!(format!("{:?}", v), "[CA0B]"); - v.push(&[]); - v.push(&[]); + v.push([]); + v.push([]); assert_eq!(v.len(), 3); assert_eq!(v.total_len(), 2); assert!(!v.is_empty()); @@ -513,7 +513,7 @@ mod tests { assert!(v[2].is_empty()); assert_eq!(format!("{:?}", v), "[CA0B, null, null]"); - v.push(&[0xC]); + v.push([0xC]); assert_eq!(v.len(), 4); assert_eq!(v.total_len(), 3); assert!(!v.is_empty()); @@ -540,7 +540,7 @@ mod tests { assert!(v[1].is_empty()); assert_eq!(format!("{:?}", v), "[null, null]"); - v.push(&[0xAC, 0xBB, 0x00]); + v.push([0xAC, 0xBB, 0x00]); assert_eq!(v.len(), 3); assert_eq!(v.total_len(), 3); assert!(!v.is_empty()); @@ -561,7 +561,7 @@ mod tests { assert_eq!(v[1], [0xAC, 0xBB, 0x00]); assert_eq!(format!("{:?}", v), "[null, ACBB00]"); - v.push(&[]); + v.push([]); assert_eq!(v.len(), 3); assert_eq!(v.total_len(), 3); assert!(!v.is_empty()); @@ -590,12 +590,12 @@ mod tests { assert!(v.is_empty()); assert_eq!(format!("{:?}", v), "[]"); - v.push(&[0xA]); - v.push(&[0xB]); - v.push(&[0xC]); - v.push(&[0xD, 0xE]); - v.push(&[]); - v.push(&[]); + v.push([0xA]); + v.push([0xB]); + v.push([0xC]); + v.push([0xD, 0xE]); + v.push([]); + v.push([]); assert_eq!(v.len(), 6); assert_eq!(v.total_len(), 5); assert!(!v.is_empty()); @@ -630,14 +630,14 @@ mod tests { #[test] fn test_copy_from() { let mut v1 = BufferVec::new(); - v1.push(&[]); - v1.push(&[0xAA, 0xBB, 0x0C]); - v1.push(&[]); - v1.push(&[0x00]); + v1.push([]); + v1.push([0xAA, 0xBB, 0x0C]); + v1.push([]); + v1.push([0x00]); let mut v2 = BufferVec::new(); - v2.push(&[]); - v2.push(&[]); + v2.push([]); + v2.push([]); let mut v3 = v1.clone(); v3.copy_from(&v2); @@ -650,8 +650,8 @@ mod tests { assert_eq!(v3.total_len(), 3); assert_eq!(format!("{:?}", v3), "[null, AABB0C, null]"); - v3.push(&[]); - v3.push(&[0x00]); + v3.push([]); + v3.push([0x00]); assert_eq!(v3.len(), 5); assert_eq!(v3.total_len(), 4); assert_eq!(format!("{:?}", v3), "[null, AABB0C, null, null, 00]"); @@ -681,12 +681,12 @@ mod tests { assert_eq!(format!("{:?}", v3), "[]"); let mut v1 = BufferVec::new(); - v1.push(&[]); - v1.push(&[0xAA, 0xBB, 0x0C]); + v1.push([]); + v1.push([0xAA, 0xBB, 0x0C]); let mut v2 = BufferVec::new(); - v2.push(&[0x0C, 0x00]); - v2.push(&[]); + v2.push([0x0C, 0x00]); + v2.push([]); let mut v3 = v2.clone(); v3.copy_n_from(&v1, 0); @@ -694,7 +694,7 @@ mod tests { assert_eq!(v3.total_len(), 2); assert_eq!(format!("{:?}", v3), "[0C00, null]"); - v3.push(&[0xAA]); + v3.push([0xAA]); assert_eq!(v3.len(), 3); assert_eq!(v3.total_len(), 3); assert_eq!(format!("{:?}", v3), "[0C00, null, AA]"); @@ -705,16 +705,18 @@ mod tests { assert_eq!(v3.total_len(), 2); assert_eq!(format!("{:?}", v3), "[0C00, null, null]"); - v3.push(&[0xAA]); + v3.push([0xAA]); assert_eq!(v3.len(), 4); assert_eq!(v3.total_len(), 3); assert_eq!(format!("{:?}", v3), "[0C00, null, null, AA]"); - v3.extend(&[0xAA, 0xAB, 0xCC]); + v3.extend([0xAA, 0xAB, 0xCC]); assert_eq!(v3.len(), 5); assert_eq!(v3.total_len(), 6); assert_eq!(format!("{:?}", v3), "[0C00, null, null, AA, AAABCC]"); + // False positive: https://github.com/rust-lang/rust-clippy/issues/9111 + #[allow(clippy::needless_borrow)] v3.extend(&[]); assert_eq!(v3.len(), 6); assert_eq!(v3.total_len(), 6); @@ -761,7 +763,7 @@ mod tests { v.retain_by_array(&[]); assert_eq!(format!("{:?}", v), "[]"); - v.push(&[]); + v.push([]); assert_eq!(format!("{:?}", v), "[null]"); v.retain_by_array(&[true]); @@ -770,8 +772,8 @@ mod tests { v.retain_by_array(&[false]); assert_eq!(format!("{:?}", v), "[]"); - v.push(&[0xAA, 0x00]); - v.push(&[]); + v.push([0xAA, 0x00]); + v.push([]); assert_eq!(format!("{:?}", v), "[AA00, null]"); let mut v2 = v.clone(); @@ -790,8 +792,8 @@ mod tests { v2.retain_by_array(&[false, false]); assert_eq!(format!("{:?}", v2), "[]"); - v.push(&[]); - v.push(&[0xBB, 0x00, 0xA0]); + v.push([]); + v.push([0xBB, 0x00, 0xA0]); assert_eq!(format!("{:?}", v), "[AA00, null, null, BB00A0]"); let mut v2 = v.clone(); @@ -812,7 +814,7 @@ mod tests { v2.retain_by_array(&[false, false, true, true]); assert_eq!(format!("{:?}", v2), "[null, BB00A0]"); - v2.push(&[]); + v2.push([]); assert_eq!(format!("{:?}", v2), "[null, BB00A0, null]"); let mut v2 = v.clone(); @@ -841,12 +843,12 @@ mod tests { #[test] fn test_iter() { let mut v = BufferVec::new(); - v.push(&[]); - v.push(&[0xAA, 0xBB, 0x0C]); - v.push(&[]); - v.push(&[]); - v.push(&[0x00]); - v.push(&[]); + v.push([]); + v.push([0xAA, 0xBB, 0x0C]); + v.push([]); + v.push([]); + v.push([0x00]); + v.push([]); let mut it = v.iter(); assert_eq!(it.count(), 6); diff --git a/components/tikv_util/src/codec/bytes.rs b/components/tikv_util/src/codec/bytes.rs index df23090c9c7..b382f64739c 100644 --- a/components/tikv_util/src/codec/bytes.rs +++ b/components/tikv_util/src/codec/bytes.rs @@ -513,7 +513,7 @@ mod tests { desc ); let mut longer_encoded = encoded.clone(); - longer_encoded.extend(&[0, 0, 0, 0, 0, 0, 0, 0, 0xFF]); + longer_encoded.extend([0, 0, 0, 0, 0, 0, 0, 0, 0xFF]); assert!( !is_encoded_from(&longer_encoded, &raw, desc), "Encoded: {:?}, Raw: {:?}, desc: {}", diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index e11a4799bc0..c55cebea0ff 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -384,8 +384,8 @@ impl FromStr for ReadableDuration { if dur.is_sign_negative() { return Err("duration should be positive.".to_owned()); } - let secs = dur as u64 / SECOND as u64; - let micros = (dur as u64 % SECOND as u64) as u32 * 1_000; + let secs = dur as u64 / SECOND; + let micros = (dur as u64 % SECOND) as u32 * 1_000; Ok(ReadableDuration(Duration::new(secs, micros))) } } @@ -814,7 +814,7 @@ mod check_data_dir { } let ent = &*ent; let cur_dir = CStr::from_ptr(ent.mnt_dir).to_str().unwrap(); - if path.starts_with(&cur_dir) && cur_dir.len() >= fs.mnt_dir.len() { + if path.starts_with(cur_dir) && cur_dir.len() >= fs.mnt_dir.len() { fs.tp = CStr::from_ptr(ent.mnt_type).to_str().unwrap().to_owned(); fs.opts = CStr::from_ptr(ent.mnt_opts).to_str().unwrap().to_owned(); fs.fsname = CStr::from_ptr(ent.mnt_fsname).to_str().unwrap().to_owned(); @@ -844,7 +844,7 @@ mod check_data_dir { let block_dir = "/sys/block"; let mut device_dir = format!("{}/{}", block_dir, dev); if !Path::new(&device_dir).exists() { - let dir = fs::read_dir(&block_dir).map_err(|e| { + let dir = fs::read_dir(block_dir).map_err(|e| { ConfigError::FileSystem(format!( "{}: read block dir {:?} failed: {:?}", op, block_dir, e @@ -1554,7 +1554,7 @@ impl RaftDataStateMachine { fs::remove_dir_all(&trash).unwrap(); } else { info!("Removing file"; "path" => %path.display()); - fs::remove_file(&path).unwrap(); + fs::remove_file(path).unwrap(); Self::sync_dir(path.parent().unwrap()); } } @@ -1571,11 +1571,11 @@ impl RaftDataStateMachine { if !path.exists() || !path.is_dir() { return false; } - fs::read_dir(&path).unwrap().next().is_some() + fs::read_dir(path).unwrap().next().is_some() } fn sync_dir(dir: &Path) { - fs::File::open(&dir).and_then(|d| d.sync_all()).unwrap(); + fs::File::open(dir).and_then(|d| d.sync_all()).unwrap(); } } @@ -1789,8 +1789,8 @@ mod tests { ensure_dir_exist(&format!("{}", tmp_dir.to_path_buf().join("dir").display())).unwrap(); let nodes: &[&str] = if cfg!(target_os = "linux") { std::os::unix::fs::symlink( - &tmp_dir.to_path_buf().join("dir"), - &tmp_dir.to_path_buf().join("symlink"), + tmp_dir.to_path_buf().join("dir"), + tmp_dir.to_path_buf().join("symlink"), ) .unwrap(); &["non_existing", "dir", "symlink"] @@ -2116,10 +2116,10 @@ yyy = 100 let source_file = source.join("file"); let target_file = target.join("file"); if !target.exists() { - fs::create_dir_all(&target).unwrap(); + fs::create_dir_all(target).unwrap(); check(); } - fs::copy(&source_file, &target_file).unwrap(); + fs::copy(source_file, target_file).unwrap(); check(); state.after_dump_data_with_check(&check); } @@ -2130,14 +2130,14 @@ yyy = 100 if dst.exists() { fs::remove_dir_all(dst)?; } - fs::create_dir_all(&dst)?; + fs::create_dir_all(dst)?; for entry in fs::read_dir(src)? { let entry = entry?; let ty = entry.file_type()?; if ty.is_dir() { copy_dir(&entry.path(), &dst.join(entry.file_name()))?; } else { - fs::copy(entry.path(), &dst.join(entry.file_name()))?; + fs::copy(entry.path(), dst.join(entry.file_name()))?; } } Ok(()) @@ -2151,7 +2151,7 @@ yyy = 100 fs::create_dir_all(&target).unwrap(); // Write some data into source. let source_file = source.join("file"); - File::create(&source_file).unwrap(); + File::create(source_file).unwrap(); let backup = dir.path().join("backup"); diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 98c73e80c6a..9421c0e174b 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -91,7 +91,7 @@ pub fn panic_mark_file_path>(data_dir: P) -> PathBuf { pub fn create_panic_mark_file>(data_dir: P) { let file = panic_mark_file_path(data_dir); - File::create(&file).unwrap(); + File::create(file).unwrap(); } // Copied from file_system to avoid cyclic dependency diff --git a/components/tikv_util/src/logger/file_log.rs b/components/tikv_util/src/logger/file_log.rs index 5b575638c19..fa7b7c67fca 100644 --- a/components/tikv_util/src/logger/file_log.rs +++ b/components/tikv_util/src/logger/file_log.rs @@ -134,7 +134,7 @@ impl Write for RotatingFileLogger { self.file.flush()?; let new_path = (self.rename)(&self.path)?; - fs::rename(&self.path, &new_path)?; + fs::rename(&self.path, new_path)?; self.file = open_log_file(&self.path)?; // Updates all rotators' states. diff --git a/components/tikv_util/src/sys/cgroup.rs b/components/tikv_util/src/sys/cgroup.rs index df15a2dac76..2cd420e5d51 100644 --- a/components/tikv_util/src/sys/cgroup.rs +++ b/components/tikv_util/src/sys/cgroup.rs @@ -94,7 +94,7 @@ impl CGroupSys { } else { format!("{}/memory.limit_in_bytes", path.to_str().unwrap()) }; - return read_to_string(&path) + return read_to_string(path) .map(|x| parse_memory_max(x.trim())) .ok() .flatten(); @@ -112,7 +112,7 @@ impl CGroupSys { if let Some((root, mount_point)) = self.mount_points.get(component) { if let Some(path) = build_path(group, root, mount_point) { let path = format!("{}/cpuset.cpus", path.to_str().unwrap()); - if let Ok(s) = read_to_string(&path) { + if let Ok(s) = read_to_string(path) { return parse_cpu_cores(s.trim()); } } @@ -131,14 +131,14 @@ impl CGroupSys { if let Some(path) = build_path(group, root, mount_point) { if self.is_v2 { let path = format!("{}/cpu.max", path.to_str().unwrap()); - if let Ok(buffer) = read_to_string(&path) { + if let Ok(buffer) = read_to_string(path) { return parse_cpu_quota_v2(buffer.trim()); } } else { let path1 = format!("{}/cpu.cfs_quota_us", path.to_str().unwrap()); let path2 = format!("{}/cpu.cfs_period_us", path.to_str().unwrap()); if let (Ok(buffer1), Ok(buffer2)) = - (read_to_string(&path1), read_to_string(&path2)) + (read_to_string(path1), read_to_string(path2)) { return parse_cpu_quota_v1(buffer1.trim(), buffer2.trim()); } @@ -356,7 +356,7 @@ fn parse_cpu_quota_v1(line1: &str, line2: &str) -> Option { if max > 0.0 { if let Ok(period) = line2.parse::() { if period > 0.0 { - return Some(max as f64 / period as f64); + return Some(max / period); } } } else { @@ -385,11 +385,11 @@ mod tests { fn test_parse_mountinfos_without_cgroup() { let temp = tempfile::TempDir::new().unwrap(); let dir = temp.path().to_str().unwrap(); - std::fs::copy("/proc/self/stat", &format!("{}/stat", dir)).unwrap(); + std::fs::copy("/proc/self/stat", format!("{}/stat", dir)).unwrap(); let mut f = OpenOptions::new() .create(true) .write(true) - .open(&format!("{}/mountinfo", dir)) + .open(format!("{}/mountinfo", dir)) .unwrap(); f.write_all(b"").unwrap(); @@ -402,12 +402,12 @@ mod tests { fn test_cpuset_cpu_cpuacct() { let temp = tempfile::TempDir::new().unwrap(); let dir = temp.path().to_str().unwrap(); - std::fs::copy("/proc/self/stat", &format!("{}/stat", dir)).unwrap(); + std::fs::copy("/proc/self/stat", format!("{}/stat", dir)).unwrap(); let mut f = OpenOptions::new() .create(true) .write(true) - .open(&format!("{}/mountinfo", dir)) + .open(format!("{}/mountinfo", dir)) .unwrap(); f.write_all(b"30 26 0:27 / /sys/fs/cgroup/cpuset,cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,cpu,cpuacct\n").unwrap(); @@ -432,12 +432,12 @@ mod tests { fn test_mountinfo_with_relative_path() { let temp = tempfile::TempDir::new().unwrap(); let dir = temp.path().to_str().unwrap(); - std::fs::copy("/proc/self/stat", &format!("{}/stat", dir)).unwrap(); + std::fs::copy("/proc/self/stat", format!("{}/stat", dir)).unwrap(); let mut f = OpenOptions::new() .create(true) .write(true) - .open(&format!("{}/mountinfo", dir)) + .open(format!("{}/mountinfo", dir)) .unwrap(); f.write_all(b"1663 1661 0:27 /../../../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw\n").unwrap(); @@ -461,12 +461,12 @@ mod tests { fn test_conflicting_mountinfo() { let temp = tempfile::TempDir::new().unwrap(); let dir = temp.path().to_str().unwrap(); - std::fs::copy("/proc/self/stat", &format!("{}/stat", dir)).unwrap(); + std::fs::copy("/proc/self/stat", format!("{}/stat", dir)).unwrap(); let mut f = OpenOptions::new() .create(true) .write(true) - .open(&format!("{}/mountinfo", dir)) + .open(format!("{}/mountinfo", dir)) .unwrap(); f.write_all(b"1663 1661 0:27 /../../../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw 1663 1661 0:27 /../../../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw").unwrap(); @@ -491,12 +491,12 @@ mod tests { fn test_cgroup_without_mountinfo() { let temp = tempfile::TempDir::new().unwrap(); let dir = temp.path().to_str().unwrap(); - std::fs::copy("/proc/self/stat", &format!("{}/stat", dir)).unwrap(); + std::fs::copy("/proc/self/stat", format!("{}/stat", dir)).unwrap(); let mut f = OpenOptions::new() .create(true) .write(true) - .open(&format!("{}/mountinfo", dir)) + .open(format!("{}/mountinfo", dir)) .unwrap(); f.write_all(b"1663 1661 0:27 /../../../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw\n").unwrap(); diff --git a/components/tikv_util/src/sys/inspector.rs b/components/tikv_util/src/sys/inspector.rs index 7b49b647706..d2ff80c6416 100644 --- a/components/tikv_util/src/sys/inspector.rs +++ b/components/tikv_util/src/sys/inspector.rs @@ -90,7 +90,7 @@ mod linux { fn disk_stat(dev: &Self::DiskID) -> Result, String> { let path = "/proc/diskstats"; - let lines = read_to_string(&path).map_err(|e| format!("open({}): {}", path, e))?; + let lines = read_to_string(path).map_err(|e| format!("open({}): {}", path, e))?; for line in lines.split('\n').map(|x| x.trim()) { let stat = procfs::DiskStat::from_line(line) .map_err(|e| format!("parse disk stat: {}", e))?; diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index 00a6e47b409..60c420661d0 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -121,7 +121,7 @@ mod imp { // Unsafe due to FFI. unsafe { let tid = libc::syscall(libc::SYS_gettid); - if libc::setpriority(libc::PRIO_PROCESS as u32, tid as u32, pri) != 0 { + if libc::setpriority(libc::PRIO_PROCESS, tid as u32, pri) != 0 { let e = Error::last_os_error(); return Err(e); } @@ -134,7 +134,7 @@ mod imp { unsafe { let tid = libc::syscall(libc::SYS_gettid); clear_errno(); - let ret = libc::getpriority(libc::PRIO_PROCESS as u32, tid as u32); + let ret = libc::getpriority(libc::PRIO_PROCESS, tid as u32); if ret == -1 { let e = Error::last_os_error(); if let Some(errno) = e.raw_os_error() { diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 5c9abf0d305..01133a71924 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -681,7 +681,7 @@ mod tests { let shorter_encoded = Key::from_encoded_slice(&encoded.0[..encoded_len - 9]); assert!(!shorter_encoded.is_encoded_from(&raw)); let mut longer_encoded = encoded.as_encoded().clone(); - longer_encoded.extend(&[0, 0, 0, 0, 0, 0, 0, 0, 0xFF]); + longer_encoded.extend([0, 0, 0, 0, 0, 0, 0, 0, 0xFF]); let longer_encoded = Key::from_encoded(longer_encoded); assert!(!longer_encoded.is_encoded_from(&raw)); diff --git a/fuzz/cli.rs b/fuzz/cli.rs index 96972d94565..201e659d8ba 100644 --- a/fuzz/cli.rs +++ b/fuzz/cli.rs @@ -31,7 +31,7 @@ lazy_static! { static ref FUZZ_ROOT: PathBuf = WORKSPACE_ROOT.join("fuzz"); static ref FUZZ_TARGETS: Vec = { let source = FUZZ_ROOT.join("targets/mod.rs"); - let targets_rs = fs::read_to_string(&source).unwrap(); + let targets_rs = fs::read_to_string(source).unwrap(); let match_fuzz_fs = regex::Regex::new(r"pub fn fuzz_(\w+)\(").unwrap(); let target_names = match_fuzz_fs .captures_iter(&targets_rs) @@ -110,7 +110,7 @@ fn write_fuzz_target_source_file(fuzzer: Fuzzer, target: &str) -> Result<()> { template_file_path.display() ))?; - let target_file_path = fuzzer.directory().join(&format!("src/bin/{}.rs", target)); + let target_file_path = fuzzer.directory().join(format!("src/bin/{}.rs", target)); let mut file = fs::OpenOptions::new() .write(true) .create(true) @@ -159,7 +159,7 @@ fn get_seed_dir(target: &str) -> PathBuf { /// Create corpus dir for fuzz target fn create_corpus_dir(base: impl AsRef, target: &str) -> Result { let base = base.as_ref(); - let corpus_dir = base.join(&format!("corpus-{}", target)); + let corpus_dir = base.join(format!("corpus-{}", target)); fs::create_dir_all(&corpus_dir).context(format!( "unable to create corpus dir for {}{}", base.display(), @@ -192,13 +192,13 @@ fn run_afl(target: &str) -> Result<()> { let corpus_dir = create_corpus_dir(fuzzer.directory(), target)?; pre_check( - Command::new("cargo").args(&["afl", "--version"]), + Command::new("cargo").args(["afl", "--version"]), "cargo install afl", )?; // 1. cargo afl build (in fuzzer-afl directory) let fuzzer_build = Command::new("cargo") - .args(&["afl", "build", "--bin", target]) + .args(["afl", "build", "--bin", target]) .current_dir(fuzzer.directory()) .spawn() .context(format!("Failed to build {}", fuzzer))? @@ -218,7 +218,7 @@ fn run_afl(target: &str) -> Result<()> { // ``` let instrumented_bin = WORKSPACE_ROOT.join("target/debug").join(target); let fuzzer_bin = Command::new("cargo") - .args(&["afl", "fuzz"]) + .args(["afl", "fuzz"]) .arg("-i") .arg(&seed_dir) .arg("-o") @@ -244,7 +244,7 @@ fn run_afl(target: &str) -> Result<()> { /// Run one target fuzz test using Honggfuzz fn run_honggfuzz(target: &str) -> Result<()> { pre_check( - Command::new("cargo").args(&["hfuzz", "version"]), + Command::new("cargo").args(["hfuzz", "version"]), "cargo install honggfuzz --version 0.5.45", )?; @@ -262,7 +262,7 @@ fn run_honggfuzz(target: &str) -> Result<()> { ); let fuzzer_bin = Command::new("cargo") - .args(&["hfuzz", "run", target]) + .args(["hfuzz", "run", target]) .env("RUSTFLAGS", &rust_flags) .env("HFUZZ_RUN_ARGS", &hfuzz_args) .current_dir(fuzzer.directory()) @@ -321,7 +321,7 @@ fn run_libfuzzer(target: &str) -> Result<()> { asan_options.push_str(" detect_odr_violation=0"); let fuzzer_bin = Command::new("cargo") - .args(&["run", "--target", target_platform, "--bin", target, "--"]) + .args(["run", "--target", target_platform, "--bin", target, "--"]) .arg(&corpus_dir) .arg(&seed_dir) .env("RUSTFLAGS", &rust_flags) diff --git a/rust-toolchain b/rust-toolchain index 2181086f8d2..4e5f9a4d82b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-07-31 +nightly-2022-11-15 diff --git a/scripts/clippy b/scripts/clippy index c5999ad670c..7685cddfeeb 100755 --- a/scripts/clippy +++ b/scripts/clippy @@ -19,11 +19,15 @@ fi # - Enables `significant_drop_in_scrutinee` after # https://github.com/rust-lang/rust-clippy/issues/8963 is fixed. # - `derive_partial_eq_without_eq` has compilation overhead. +# - Blocking issue for enabling `result_large_err` is the protobuf messages. +# - Blocking issue for clippy::large_enum_variant is the raftstore peer message. CLIPPY_LINTS=( -A clippy::module_inception \ + -A clippy::result_large_err \ + -A clippy::large_enum_variant \ -A clippy::should_implement_trait \ -A clippy::too_many_arguments \ - -A clippy::blacklisted_name \ + -A clippy::disallowed_names \ -A clippy::redundant_closure \ -A clippy::field_reassign_with_default \ -A clippy::wrong_self_convention \ diff --git a/src/config.rs b/src/config.rs index c33c8e8b63c..e9eca154d6e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -113,12 +113,7 @@ fn memory_limit_for_cf(is_raft_db: bool, cf: &str, total_mem: u64) -> ReadableSi (false, CF_WRITE) => (0.15, 0, usize::MAX), _ => unreachable!(), }; - let mut size = (total_mem as f64 * ratio) as usize; - if size < min { - size = min; - } else if size > max { - size = max; - } + let size = ((total_mem as f64 * ratio) as usize).clamp(min, max); ReadableSize::mb(size as u64 / MIB) } @@ -182,13 +177,13 @@ impl Default for TitanCfConfig { impl TitanCfConfig { fn build_opts(&self) -> RocksTitanDbOptions { let mut opts = RocksTitanDbOptions::new(); - opts.set_min_blob_size(self.min_blob_size.0 as u64); + opts.set_min_blob_size(self.min_blob_size.0); opts.set_blob_file_compression(self.blob_file_compression.into()); opts.set_blob_cache(self.blob_cache_size.0 as usize, -1, false, 0.0); - opts.set_min_gc_batch_size(self.min_gc_batch_size.0 as u64); - opts.set_max_gc_batch_size(self.max_gc_batch_size.0 as u64); + opts.set_min_gc_batch_size(self.min_gc_batch_size.0); + opts.set_max_gc_batch_size(self.max_gc_batch_size.0); opts.set_discardable_ratio(self.discardable_ratio); - opts.set_merge_small_file_threshold(self.merge_small_file_threshold.0 as u64); + opts.set_merge_small_file_threshold(self.merge_small_file_threshold.0); opts.set_blob_run_mode(self.blob_run_mode.into()); opts.set_level_merge(self.level_merge); opts.set_range_merge(self.range_merge); @@ -254,10 +249,7 @@ fn get_background_job_limits_impl( ); // Cap max_sub_compactions to allow at least two compactions. let max_compactions = max_background_jobs - max_background_flushes; - let max_sub_compactions: u32 = cmp::max( - 1, - cmp::min(defaults.max_sub_compactions, (max_compactions - 1) as u32), - ); + let max_sub_compactions: u32 = (max_compactions - 1).clamp(1, defaults.max_sub_compactions); // Maximum background GC threads for Titan let max_titan_background_gc = cmp::min(defaults.max_titan_background_gc, cpu_num); @@ -1123,7 +1115,7 @@ impl Default for DbConfig { rate_limiter_auto_tuned: true, bytes_per_sync: ReadableSize::mb(1), wal_bytes_per_sync: ReadableSize::kb(512), - max_sub_compactions: bg_job_limits.max_sub_compactions as u32, + max_sub_compactions: bg_job_limits.max_sub_compactions, writable_file_max_buffer_size: ReadableSize::mb(1), use_direct_io_for_flush_and_compaction: false, enable_pipelined_write: false, @@ -1179,8 +1171,8 @@ impl DbConfig { } } - opts.set_bytes_per_sync(self.bytes_per_sync.0 as u64); - opts.set_wal_bytes_per_sync(self.wal_bytes_per_sync.0 as u64); + opts.set_bytes_per_sync(self.bytes_per_sync.0); + opts.set_wal_bytes_per_sync(self.wal_bytes_per_sync.0); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); opts.set_use_direct_io_for_flush_and_compaction( @@ -1434,7 +1426,7 @@ impl Default for RaftDbConfig { info_log_keep_log_file_num: 10, info_log_dir: "".to_owned(), info_log_level: RocksLogLevel::Info, - max_sub_compactions: bg_job_limits.max_sub_compactions as u32, + max_sub_compactions: bg_job_limits.max_sub_compactions, writable_file_max_buffer_size: ReadableSize::mb(1), use_direct_io_for_flush_and_compaction: false, enable_pipelined_write: true, @@ -1481,8 +1473,8 @@ impl RaftDbConfig { opts.enable_unordered_write(self.enable_unordered_write); opts.allow_concurrent_memtable_write(self.allow_concurrent_memtable_write); opts.add_event_listener(RocksEventListener::new("raft", None)); - opts.set_bytes_per_sync(self.bytes_per_sync.0 as u64); - opts.set_wal_bytes_per_sync(self.wal_bytes_per_sync.0 as u64); + opts.set_bytes_per_sync(self.bytes_per_sync.0); + opts.set_wal_bytes_per_sync(self.wal_bytes_per_sync.0); // TODO maybe create a new env for raft engine if self.titan.enabled { opts.set_titandb_options(&self.titan.build_opts()); @@ -3633,7 +3625,7 @@ pub fn persist_config(config: &TikvConfig) -> Result<(), String> { } // Create parent directory if missing. - if let Err(e) = fs::create_dir_all(&store_path) { + if let Err(e) = fs::create_dir_all(store_path) { return Err(format!( "create parent directory '{}' failed: {}", store_path.to_str().unwrap(), diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 1b7d42a8575..5123534db88 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -1332,7 +1332,7 @@ mod tests { let config = Config { end_point_request_max_handle_duration: ReadableDuration::millis( - (PAYLOAD_SMALL + PAYLOAD_LARGE) as u64 * 2, + (PAYLOAD_SMALL + PAYLOAD_LARGE) * 2, ), ..Default::default() }; @@ -1357,23 +1357,22 @@ mod tests { // Request 1: Unary, success response. let handler_builder = Box::new(|_, _: &_| { - Ok(UnaryFixture::new_with_duration( - Ok(coppb::Response::default()), - PAYLOAD_SMALL as u64, + Ok( + UnaryFixture::new_with_duration(Ok(coppb::Response::default()), PAYLOAD_SMALL) + .into_boxed(), ) - .into_boxed()) }); let resp_future_1 = copr.handle_unary_request(req_with_exec_detail.clone(), handler_builder); let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_1).unwrap()]).unwrap()); // Sleep a while to make sure that thread is spawn and snapshot is taken. - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS as u64)); + thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); // Request 2: Unary, error response. let handler_builder = Box::new(|_, _: &_| { Ok( - UnaryFixture::new_with_duration(Err(box_err!("foo")), PAYLOAD_LARGE as u64) + UnaryFixture::new_with_duration(Err(box_err!("foo")), PAYLOAD_LARGE) .into_boxed(), ) }); @@ -1381,7 +1380,7 @@ mod tests { copr.handle_unary_request(req_with_exec_detail.clone(), handler_builder); let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_2).unwrap()]).unwrap()); - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS as u64)); + thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); // Response 1 let resp = &rx.recv().unwrap()[0]; @@ -1447,7 +1446,7 @@ mod tests { let handler_builder = Box::new(|_, _: &_| { Ok(UnaryFixture::new_with_duration_yieldable( Ok(coppb::Response::default()), - PAYLOAD_SMALL as u64, + PAYLOAD_SMALL, ) .into_boxed()) }); @@ -1456,21 +1455,20 @@ mod tests { let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_1).unwrap()]).unwrap()); // Sleep a while to make sure that thread is spawn and snapshot is taken. - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS as u64)); + thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); // Request 2: Unary, error response. let handler_builder = Box::new(|_, _: &_| { - Ok(UnaryFixture::new_with_duration_yieldable( - Err(box_err!("foo")), - PAYLOAD_LARGE as u64, + Ok( + UnaryFixture::new_with_duration_yieldable(Err(box_err!("foo")), PAYLOAD_LARGE) + .into_boxed(), ) - .into_boxed()) }); let resp_future_2 = copr.handle_unary_request(req_with_exec_detail.clone(), handler_builder); let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_2).unwrap()]).unwrap()); - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS as u64)); + thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); // Response 1 // @@ -1524,18 +1522,17 @@ mod tests { // Request 1: Unary, success response. let handler_builder = Box::new(|_, _: &_| { - Ok(UnaryFixture::new_with_duration( - Ok(coppb::Response::default()), - PAYLOAD_LARGE as u64, + Ok( + UnaryFixture::new_with_duration(Ok(coppb::Response::default()), PAYLOAD_LARGE) + .into_boxed(), ) - .into_boxed()) }); let resp_future_1 = copr.handle_unary_request(req_with_exec_detail.clone(), handler_builder); let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_1).unwrap()]).unwrap()); // Sleep a while to make sure that thread is spawn and snapshot is taken. - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS as u64)); + thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); // Request 2: Stream. let handler_builder = Box::new(|_, _: &_| { @@ -1545,11 +1542,7 @@ mod tests { Err(box_err!("foo")), Ok(coppb::Response::default()), ], - vec![ - PAYLOAD_SMALL as u64, - PAYLOAD_LARGE as u64, - PAYLOAD_SMALL as u64, - ], + vec![PAYLOAD_SMALL, PAYLOAD_LARGE, PAYLOAD_SMALL], ) .into_boxed()) }); diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index ade8a007383..383f6161a1b 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -843,7 +843,7 @@ impl SampleBuilder { .map_or_else(|| 0_usize, |req| req.get_top_n_size() as usize), common_handle_col_ids: common_handle_ids, columns_info, - analyze_common_handle: common_handle_req != None, + analyze_common_handle: common_handle_req.is_some(), }) } @@ -1116,7 +1116,7 @@ impl AnalyzeSamplingResult { impl Default for AnalyzeSamplingResult { fn default() -> Self { - AnalyzeSamplingResult::new(Box::new(ReservoirRowSampleCollector::default())) + AnalyzeSamplingResult::new(Box::::default()) } } diff --git a/src/coprocessor/statistics/histogram.rs b/src/coprocessor/statistics/histogram.rs index 8797c38a721..b7a70600e39 100644 --- a/src/coprocessor/statistics/histogram.rs +++ b/src/coprocessor/statistics/histogram.rs @@ -29,7 +29,7 @@ impl Bucket { upper_bound, lower_bound, repeats, - ndv: if with_ndv { 1 } else { 0 }, + ndv: with_ndv as u64, } } diff --git a/src/coprocessor_v2/plugin_registry.rs b/src/coprocessor_v2/plugin_registry.rs index cbcba39995d..6262fe6bae9 100644 --- a/src/coprocessor_v2/plugin_registry.rs +++ b/src/coprocessor_v2/plugin_registry.rs @@ -130,7 +130,7 @@ impl PluginRegistry { // Simple helper functions for loading/unloading plugins. let maybe_load = |file: &PathBuf| { let mut hot_reload_registry = hot_reload_registry.write().unwrap(); - if is_library_file(&file) { + if is_library_file(file) { // Ignore errors. hot_reload_registry.load_plugin(file).ok(); } @@ -243,7 +243,7 @@ impl PluginRegistry { let dir_name = dir_name.into(); let mut loaded_plugins = Vec::new(); - for entry in std::fs::read_dir(&dir_name)? { + for entry in std::fs::read_dir(dir_name)? { if let Ok(file) = entry.map(|f| f.path()) { if is_library_file(&file) { // Ignore errors. @@ -489,7 +489,7 @@ mod tests { fn load_plugin() { let library_path = initialize_library(); - let loaded_plugin = unsafe { LoadedPlugin::new(&library_path).unwrap() }; + let loaded_plugin = unsafe { LoadedPlugin::new(library_path).unwrap() }; assert_eq!(loaded_plugin.name(), "example_coprocessor_plugin"); assert_eq!(loaded_plugin.version(), &Version::parse("0.1.0").unwrap()); @@ -548,7 +548,7 @@ mod tests { let registry = PluginRegistry::new(); - let plugin_name = registry.load_plugin(&library_path).unwrap(); + let plugin_name = registry.load_plugin(library_path).unwrap(); assert!(registry.get_plugin(&plugin_name).is_some()); @@ -576,7 +576,7 @@ mod tests { registry.start_hot_reloading(&coprocessor_dir).unwrap(); // trigger loading - std::fs::copy(&original_library_path, &library_path).unwrap(); + std::fs::copy(original_library_path, &library_path).unwrap(); // fs watcher detects changes in every 3 seconds, therefore, wait 4 seconds so // as to make sure the watcher is triggered. std::thread::sleep(Duration::from_secs(4)); diff --git a/src/lib.rs b/src/lib.rs index a961abc7d38..f4fcd1cd97c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,8 +25,7 @@ #![feature(box_patterns)] #![feature(drain_filter)] #![feature(deadline_api)] -#![feature(generic_associated_types)] -#![feature(let_else)] +#![feature(let_chains)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/read_pool.rs b/src/read_pool.rs index 4d9f7fd9264..5212c4ae594 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -192,10 +192,7 @@ impl ReadPoolHandle { match self { ReadPoolHandle::FuturePools { read_pool_normal, .. - } => { - read_pool_normal.get_running_task_count() as usize - / read_pool_normal.get_pool_size() - } + } => read_pool_normal.get_running_task_count() / read_pool_normal.get_pool_size(), ReadPoolHandle::Yatp { running_tasks, pool_size, diff --git a/src/server/debug.rs b/src/server/debug.rs index 6ee676ad1c4..48435f72163 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -884,7 +884,7 @@ impl Debugger { res.push(("region.end_key".to_owned(), hex::encode(®ion.end_key))); res.push(( "region.middle_key_by_approximate_size".to_owned(), - hex::encode(&middle_key), + hex::encode(middle_key), )); Ok(res) diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index 323f414c05c..f370a08e280 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -156,7 +156,7 @@ impl TabletFactory for KvEngineFactoryV2 { let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); // When the full directory path does not exsit, create will return error and in // this case, we just ignore it. - let _ = std::fs::File::create(&path); + let _ = std::fs::File::create(path); debug!("tombstone tablet"; "region_id" => region_id, "suffix" => suffix); { let mut reg = self.registry.lock().unwrap(); @@ -201,7 +201,7 @@ impl TabletFactory for KvEngineFactoryV2 { } let db_path = self.tablet_path(region_id, suffix); - std::fs::rename(path, &db_path)?; + std::fs::rename(path, db_path)?; self.open_tablet( region_id, Some(suffix), diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index b80c17e5ff9..01e37727f11 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -197,7 +197,7 @@ fn set_status_metrics(state: GcManagerState) { ] { AUTO_GC_STATUS_GAUGE_VEC .with_label_values(&[s.tag()]) - .set(if state == *s { 1 } else { 0 }); + .set((state == *s) as i64); } } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 8e345f0909b..81de11cbae9 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -492,7 +492,7 @@ where "versions" => gc_info.found_versions, ); } - if gc_info.deleted_versions as usize >= GC_LOG_DELETED_VERSION_THRESHOLD { + if gc_info.deleted_versions >= GC_LOG_DELETED_VERSION_THRESHOLD { debug!( "GC deleted plenty versions for a key"; "key" => %key, diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 33164833fba..8cce7bc5da6 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -384,8 +384,8 @@ impl WaitTable { fn to_wait_for_entries(&self) -> Vec { self.waiter_pool - .iter() - .map(|(_, waiter)| { + .values() + .map(|waiter| { let mut wait_for_entry = WaitForEntry::default(); wait_for_entry.set_txn(waiter.start_ts.into_inner()); wait_for_entry.set_wait_for_txn(waiter.wait_info.lock_digest.ts.into_inner()); diff --git a/src/server/node.rs b/src/server/node.rs index 65dd592b490..0b654921f59 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -357,7 +357,7 @@ where because found data key that is not written by TiDB: {:?}", ident.api_version, self.api_version, - log_wrappers::hex_encode_upper(&unexpected_data_key) + log_wrappers::hex_encode_upper(unexpected_data_key) )); } } diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 6f06bf17b30..8e77d65233e 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -559,7 +559,7 @@ Some invalid logs 2: Welcome to TiKV .unwrap(); let log_file2 = dir.path().join("tikv.2019-08-23T18-10-00.387.log"); - let mut file = File::create(&log_file2).unwrap(); + let mut file = File::create(log_file2).unwrap(); write!( file, r#"[2019/08/23 18:10:01.387 +08:00] [INFO] [foo.rs:100] [some message] [key=val] @@ -736,7 +736,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# // this file is ignored because its filename is not expected let log_file2 = dir.path().join("tikv.log.2"); - let mut file = File::create(&log_file2).unwrap(); + let mut file = File::create(log_file2).unwrap(); write!( file, r#"[2019/08/23 18:10:01.387 +08:00] [INFO] [foo.rs:100] [some message] [key=val] @@ -749,7 +749,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# .unwrap(); let log_file3 = dir.path().join("tikv.2019-08-23T18-11-02.123.log"); - let mut file = File::create(&log_file3).unwrap(); + let mut file = File::create(log_file3).unwrap(); write!( file, r#"[2019/08/23 18:11:53.387 +08:00] [INFO] [foo.rs:100] [some message] [key=val] @@ -766,7 +766,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# // this file is ignored because its filename is not expected let log_file4 = dir.path().join("tikv.T.log"); - let mut file = File::create(&log_file4).unwrap(); + let mut file = File::create(log_file4).unwrap(); write!( file, r#"[2019/08/23 18:10:01.387 +08:00] [INFO] [foo.rs:100] [some message] [key=val] diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index e62028e66e6..17ed9a78b3f 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -201,7 +201,7 @@ fn nic_load_info(prev_nic: HashMap, collector: &mut Vec, collector: &mut Vec) { let current = ioload::IoLoad::snapshot(); - let rate = |cur, prev| (cur - prev) as f64; + let rate = |cur, prev| (cur - prev); for (name, cur) in current.into_iter() { let prev = match prev_io.get(&name) { Some(p) => p, diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 3419c7df0c8..b3d91d3bea6 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -234,7 +234,7 @@ pub fn read_file(path: &str) -> Result, String> { pub fn jeprof_heap_profile(path: &str) -> Result, String> { info!("using jeprof to process {}", path); let output = Command::new("./jeprof") - .args(&["--show_bytes", "./bin/tikv-server", path, "--svg"]) + .args(["--show_bytes", "./bin/tikv-server", path, "--svg"]) .output() .map_err(|e| format!("jeprof: {}", e))?; if !output.status.success() { @@ -250,7 +250,7 @@ pub fn list_heap_profiles() -> Result, String> { None => return Ok(vec![]), }; - let dir = std::fs::read_dir(&path).map_err(|e| format!("read dir fail: {}", e))?; + let dir = std::fs::read_dir(path).map_err(|e| format!("read dir fail: {}", e))?; let mut profiles = Vec::new(); for item in dir { let item = match item { diff --git a/src/storage/config.rs b/src/storage/config.rs index 7f2e6820201..685272dbeee 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -78,7 +78,7 @@ impl Default for Config { scheduler_worker_pool_size: if cpu_num >= 16.0 { 8 } else { - std::cmp::max(1, std::cmp::min(4, cpu_num as usize)) + cpu_num.clamp(1., 4.) as usize }, scheduler_pending_write_threshold: ReadableSize::mb(DEFAULT_SCHED_PENDING_WRITE_MB), reserve_space: ReadableSize::gb(DEFAULT_RESERVED_SPACE_GB), diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 9966e14812e..3f5e48e8017 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1658,7 +1658,7 @@ mod tests { assert!(latches.acquire(&mut lock, id)); } let unlocked = latches.release(&lock, id); - if id as u64 == max_id { + if id == max_id { assert!(unlocked.is_empty()); } else { assert_eq!(unlocked, vec![id + 1]); diff --git a/tests/benches/misc/coprocessor/codec/mysql/json/mod.rs b/tests/benches/misc/coprocessor/codec/mysql/json/mod.rs index 2fcc3915125..7796be6c53b 100644 --- a/tests/benches/misc/coprocessor/codec/mysql/json/mod.rs +++ b/tests/benches/misc/coprocessor/codec/mysql/json/mod.rs @@ -18,7 +18,7 @@ fn download_and_extract_file(url: &str) -> io::Result { .stderr(Stdio::null()) .spawn()?; let mut tar_child = Command::new("tar") - .args(&["xzf", "-", "--to-stdout"]) + .args(["xzf", "-", "--to-stdout"]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::null()) diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 93acfffc258..dde25bff636 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -163,7 +163,7 @@ fn assert_snapshot(snap_dir: &str, region_id: u64, exist: bool) { let region_id = format!("{}", region_id); let timer = Instant::now(); loop { - for p in fs::read_dir(&snap_dir).unwrap() { + for p in fs::read_dir(snap_dir).unwrap() { let name = p.unwrap().file_name().into_string().unwrap(); let mut parts = name.split('_'); parts.next(); @@ -354,12 +354,12 @@ fn test_shutdown_when_snap_gc() { pd_client.must_add_peer(r1, new_learner_peer(2, 2)); // Snapshot directory on store 2 shouldn't be empty. - let snap_dir = cluster.get_snap_dir(2); + let snap_dir = &cluster.get_snap_dir(2); for i in 0..=100 { if i == 100 { panic!("store 2 snap dir must not be empty"); } - let dir = fs::read_dir(&snap_dir).unwrap(); + let dir = fs::read_dir(snap_dir).unwrap(); if dir.count() > 0 { break; } @@ -377,7 +377,7 @@ fn test_shutdown_when_snap_gc() { cluster.stop_node(2); let snap_dir = cluster.get_snap_dir(2); - let dir = fs::read_dir(&snap_dir).unwrap(); + let dir = fs::read_dir(snap_dir).unwrap(); if dir.count() == 0 { panic!("store 2 snap dir must not be empty"); } @@ -591,7 +591,7 @@ fn test_snapshot_gc_after_failed() { let mut snap_file_path = PathBuf::from(&snap_dir); snap_file_path.push(&f); let snap_file_path = snap_file_path.as_path(); - let mut file = match File::create(&snap_file_path) { + let mut file = match File::create(snap_file_path) { Err(why) => panic!("couldn't create {:?}: {}", snap_file_path, why), Ok(file) => file, }; diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index ff07d8a712a..f432fd72246 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -499,7 +499,7 @@ fn test_invalid_external_storage() { // Set backup directory read-only. TiKV fails to backup. let tmp = Builder::new().tempdir().unwrap(); - let f = File::open(&tmp.path()).unwrap(); + let f = File::open(tmp.path()).unwrap(); let mut perms = f.metadata().unwrap().permissions(); perms.set_readonly(true); f.set_permissions(perms.clone()).unwrap(); diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 03bc7ba46c1..70e70b3cbe6 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -76,7 +76,7 @@ fn start_raftstore( .as_path() .display() .to_string(); - Arc::new(SstImporter::new(&cfg.import, &p, None, cfg.storage.api_version()).unwrap()) + Arc::new(SstImporter::new(&cfg.import, p, None, cfg.storage.api_version()).unwrap()) }; let snap_mgr = { let p = dir diff --git a/tests/integrations/config/test_config_client.rs b/tests/integrations/config/test_config_client.rs index 6faa68f3932..b56987fa1dc 100644 --- a/tests/integrations/config/test_config_client.rs +++ b/tests/integrations/config/test_config_client.rs @@ -149,7 +149,7 @@ blob-run-mode = "normal" cfg_controller.update(change).unwrap(); let res = { let mut buf = Vec::new(); - let mut f = File::open(&cfg_controller.get_current().cfg_path).unwrap(); + let mut f = File::open(cfg_controller.get_current().cfg_path).unwrap(); f.read_to_end(&mut buf).unwrap(); buf }; diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 952516daf35..96ceb1c5c8c 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -764,7 +764,7 @@ fn test_order_by_pk_with_select_from_index() { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( &mut EvalContext::default(), - &[name_datum, (cnt as i64).into(), (id as i64).into()], + &[name_datum, cnt.into(), id.into()], ) .unwrap(); let result_encoded = datum::encode_value(&mut EvalContext::default(), &row).unwrap(); From d58343d03890c3970178bffc0e9fafdd3a0d7df0 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 15 Nov 2022 17:35:55 +0800 Subject: [PATCH 0335/1149] cdc: run CheckLeader in a dedicate thread (#13799) close tikv/tikv#13774 cdc: run CheckLeader in a dedicate thread Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- .../cdc/tests/failpoints/test_endpoint.rs | 8 ++++- components/cdc/tests/mod.rs | 36 +++++++++++++++++-- components/server/src/server.rs | 8 ++++- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index 6e208ccac90..3fdd6048971 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -8,7 +8,7 @@ use std::{ use api_version::{test_kv_format_impl, KvFormat}; use causal_ts::CausalTsProvider; -use cdc::{recv_timeout, OldValueCache, Task, Validate}; +use cdc::{recv_timeout, Delegate, OldValueCache, Task, Validate}; use futures::{executor::block_on, sink::SinkExt}; use grpcio::{ChannelBuilder, Environment, WriteFlags}; use kvproto::{cdcpb::*, kvrpcpb::*, tikvpb_grpc::TikvClient}; @@ -58,6 +58,12 @@ fn test_cdc_double_scan_deregister_impl() { new_event_feed(suite.get_region_cdc_client(1)); block_on(req_tx_1.send((req, WriteFlags::default()))).unwrap(); + // wait for the second connection register to the delegate. + suite.must_wait_delegate_condition( + 1, + Arc::new(|d: Option<&Delegate>| d.unwrap().downstreams().len() == 2), + ); + // close connection block_on(req_tx.close()).unwrap(); event_feed_wrap.replace(None); diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index c14a91de99a..9e6621ffbdf 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -1,9 +1,12 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::*, time::Duration}; +use std::{ + sync::*, + time::{Duration, Instant}, +}; use causal_ts::CausalTsProvider; -use cdc::{recv_timeout, CdcObserver, FeatureGate, MemoryQuota, Task}; +use cdc::{recv_timeout, CdcObserver, Delegate, FeatureGate, MemoryQuota, Task, Validate}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; @@ -523,4 +526,33 @@ impl TestSuite { ) .unwrap(); } + + pub fn must_wait_delegate_condition( + &self, + region_id: u64, + cond: Arc) -> bool + Sync + Send>, + ) { + let scheduler = self.endpoints[®ion_id].scheduler(); + let start = Instant::now(); + loop { + sleep_ms(100); + let (tx, rx) = mpsc::sync_channel(1); + let c = cond.clone(); + let checker = move |d: Option<&Delegate>| { + tx.send(c(d)).unwrap(); + }; + scheduler + .schedule(Task::Validate(Validate::Region( + region_id, + Box::new(checker), + ))) + .unwrap(); + if rx.recv().unwrap() { + return; + } + if start.elapsed() > Duration::from_secs(5) { + panic!("wait delegate timeout"); + } + } + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index e4f4dc83049..aa3a67591e2 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -232,6 +232,7 @@ struct TikvServer { concurrency_manager: ConcurrencyManager, env: Arc, background_worker: Worker, + check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -360,6 +361,10 @@ where info!("Causal timestamp provider startup."); } + // Run check leader in a dedicate thread, because it is time sensitive + // and crucial to TiCDC replication lag. + let check_leader_worker = WorkerBuilder::new("check_leader").thread_count(1).create(); + TikvServer { config, cfg_controller: Some(cfg_controller), @@ -381,6 +386,7 @@ where concurrency_manager, env, background_worker, + check_leader_worker, flow_info_sender: None, flow_info_receiver: None, sst_worker: None, @@ -870,7 +876,7 @@ where self.coprocessor_host.clone().unwrap(), ); let check_leader_scheduler = self - .background_worker + .check_leader_worker .start("check-leader", check_leader_runner); let server_config = Arc::new(VersionTrack::new(self.config.server.clone())); From fca5a9ef2d80a88282de57f75e5243bd2cd14486 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 16 Nov 2022 09:45:55 +0800 Subject: [PATCH 0336/1149] storage: avoid repeating unnecessary checks on flashback keys (#13801) ref tikv/tikv#13800 - A tiny refactor to `flashback_to_version_read_write` to reduce the unnecessary checks on flashback keys. - Check the flashback state while validating the local read request to not let a flashback read phase request bypass. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- .../raftstore/src/store/worker/metrics.rs | 2 + components/raftstore/src/store/worker/read.rs | 17 ++++ src/storage/mvcc/reader/reader.rs | 90 +++++++------------ .../txn/actions/flashback_to_version.rs | 71 ++++----------- .../flashback_to_version_read_phase.rs | 1 - .../integrations/raftstore/test_flashback.rs | 16 +++- 6 files changed, 86 insertions(+), 111 deletions(-) diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 0d396eae575..5861e27a508 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -57,6 +57,8 @@ make_static_metric! { cache_miss, safe_ts, witness, + flashback_not_prepared, + flashback_in_progress, } pub struct LocalReadRejectCounter : LocalIntCounter { diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 2c92923fc4e..0766a52a387 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -813,6 +813,23 @@ where return Ok(None); } + // Check whether the region is in the flashback state and the local read could + // be performed. + let is_in_flashback = delegate.region.is_in_flashback; + if let Err(e) = util::check_flashback_state(is_in_flashback, req, region_id) { + TLS_LOCAL_READ_METRICS.with(|m| match e { + Error::FlashbackNotPrepared(_) => { + m.borrow_mut().reject_reason.flashback_not_prepared.inc() + } + Error::FlashbackInProgress(_) => { + m.borrow_mut().reject_reason.flashback_in_progress.inc() + } + _ => unreachable!(), + }); + debug!("rejected by flashback state"; "is_in_flashback" => is_in_flashback, "tag" => &delegate.tag); + return Ok(None); + } + Ok(Some(delegate)) } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 2fe95c2c1dd..61975aa666c 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -536,9 +536,8 @@ impl MvccReader { /// specified, it will scan the latest version for each key, if the key /// does not exist or is not visible at that point, an `Option::None` will /// be placed. The return type is: - /// * `(Vec<(key, commit_ts, Option)>, has_remain)`. + /// * `(Vec<(key, Option)>, has_remain)`. /// - `key` is the encoded key without commit ts. - /// - `commit_ts` is the latest commit ts of the key. /// - `write` is the PUT/DELETE write record at the given version. /// - `has_remain` indicates whether there MAY be remaining writes that /// can be scanned. @@ -554,9 +553,9 @@ impl MvccReader { version: Option, filter: F, limit: usize, - ) -> Result<(Vec<(Key, TimeStamp, Option)>, bool)> + ) -> Result<(Vec<(Key, Option)>, bool)> where - F: Fn(&Key) -> bool, + F: Fn(&Key /* user key */, TimeStamp /* latest `commit_ts` */) -> bool, { self.create_write_cursor()?; let cursor = self.write_cursor.as_mut().unwrap(); @@ -581,14 +580,17 @@ impl MvccReader { } } let commit_ts = key.decode_ts()?; - let user_key = key.clone().truncate_ts()?; - // To make sure we only check each unique key once and `filter(&key)` returns + let user_key = key.truncate_ts()?; + // To make sure we only check each unique user key once and the filter returns // true. - if (cur_key.is_some() && cur_key.clone().unwrap() == user_key) || !filter(&key) { + let is_same_user_key = cur_key.as_ref() == Some(&user_key); + if !is_same_user_key { + cur_key = Some(user_key.clone()); + } + if is_same_user_key || !filter(&user_key, commit_ts) { cursor.next(&mut self.statistics.write); continue; } - cur_key = Some(user_key.clone()); let mut write = None; let version_key = user_key.clone().append_ts(version); @@ -623,7 +625,7 @@ impl MvccReader { } } } - key_writes.push((user_key, commit_ts, write)); + key_writes.push((user_key, write)); if limit > 0 && key_writes.len() == limit { has_remain = true; break; @@ -1824,7 +1826,7 @@ pub mod tests { end_key: Option, version: Option, limit: usize, - expect_res: Vec<(Key, TimeStamp, Option)>, + expect_res: Vec<(Key, Option)>, expect_is_remain: bool, } @@ -1838,7 +1840,6 @@ pub mod tests { expect_res: vec![ ( Key::from_raw(b"k0"), - 1000.into(), Some(Write::new( WriteType::Put, 999.into(), @@ -1847,17 +1848,14 @@ pub mod tests { ), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), ( Key::from_raw(b"k3"), - 9.into(), Some(Write::new(WriteType::Put, 8.into(), Some(b"v3@8".to_vec()))), ), ], @@ -1870,20 +1868,17 @@ pub mod tests { version: Some(9), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), ( Key::from_raw(b"k3"), - 9.into(), Some(Write::new(WriteType::Put, 8.into(), Some(b"v3@8".to_vec()))), ), ], @@ -1896,20 +1891,17 @@ pub mod tests { version: Some(8), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), ( Key::from_raw(b"k3"), - 9.into(), Some(Write::new(WriteType::Put, 5.into(), Some(b"v3@5".to_vec()))), ), ], @@ -1921,20 +1913,17 @@ pub mod tests { version: Some(7), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), ( Key::from_raw(b"k3"), - 9.into(), Some(Write::new(WriteType::Put, 5.into(), Some(b"v3@5".to_vec()))), ), ], @@ -1946,20 +1935,17 @@ pub mod tests { version: Some(6), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), ( Key::from_raw(b"k3"), - 9.into(), Some(Write::new(WriteType::Put, 5.into(), Some(b"v3@5".to_vec()))), ), ], @@ -1972,18 +1958,16 @@ pub mod tests { version: Some(5), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), - (Key::from_raw(b"k3"), 9.into(), None), + (Key::from_raw(b"k3"), None), ], expect_is_remain: true, }, @@ -1993,18 +1977,16 @@ pub mod tests { version: Some(4), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), ), - (Key::from_raw(b"k3"), 9.into(), None), + (Key::from_raw(b"k3"), None), ], expect_is_remain: true, }, @@ -2015,18 +1997,16 @@ pub mod tests { version: Some(3), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec()))), ), - (Key::from_raw(b"k3"), 9.into(), None), + (Key::from_raw(b"k3"), None), ], expect_is_remain: true, }, @@ -2036,18 +2016,16 @@ pub mod tests { version: Some(2), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), + (Key::from_raw(b"k0"), None), ( Key::from_raw(b"k1"), - 4.into(), Some(Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec()))), ), ( Key::from_raw(b"k2"), - 4.into(), Some(Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec()))), ), - (Key::from_raw(b"k3"), 9.into(), None), + (Key::from_raw(b"k3"), None), ], expect_is_remain: true, }, @@ -2058,10 +2036,10 @@ pub mod tests { version: Some(1), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), - (Key::from_raw(b"k1"), 4.into(), None), - (Key::from_raw(b"k2"), 4.into(), None), - (Key::from_raw(b"k3"), 9.into(), None), + (Key::from_raw(b"k0"), None), + (Key::from_raw(b"k1"), None), + (Key::from_raw(b"k2"), None), + (Key::from_raw(b"k3"), None), ], expect_is_remain: true, }, @@ -2071,7 +2049,7 @@ pub mod tests { end_key: None, version: Some(0), limit: 1, - expect_res: vec![(Key::from_raw(b"k0"), 1000.into(), None)], + expect_res: vec![(Key::from_raw(b"k0"), None)], expect_is_remain: true, }, Case { @@ -2080,10 +2058,10 @@ pub mod tests { version: Some(0), limit: 5, expect_res: vec![ - (Key::from_raw(b"k0"), 1000.into(), None), - (Key::from_raw(b"k1"), 4.into(), None), - (Key::from_raw(b"k2"), 4.into(), None), - (Key::from_raw(b"k3"), 9.into(), None), + (Key::from_raw(b"k0"), None), + (Key::from_raw(b"k1"), None), + (Key::from_raw(b"k2"), None), + (Key::from_raw(b"k3"), None), ], expect_is_remain: false, }, @@ -2097,7 +2075,7 @@ pub mod tests { case.start_key.as_ref(), case.end_key.as_ref(), case.version.map(Into::into), - |_| true, + |_, _| true, case.limit, ) .unwrap(); diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 02095d4b46d..98e2e433632 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -4,7 +4,7 @@ use txn_types::{Key, Lock, TimeStamp, Write, WriteType}; use crate::storage::{ mvcc::{MvccReader, MvccTxn, SnapshotReader, MAX_TXN_WRITE_SIZE}, - txn::{actions::check_txn_status::rollback_lock, Error, ErrorInner, Result as TxnResult}, + txn::{actions::check_txn_status::rollback_lock, Result as TxnResult}, Snapshot, Statistics, }; @@ -32,63 +32,29 @@ pub fn flashback_to_version_read_write( next_write_key: Key, end_key: &Key, flashback_version: TimeStamp, - flashback_start_ts: TimeStamp, flashback_commit_ts: TimeStamp, statistics: &mut Statistics, ) -> TxnResult)>> { // To flashback the data, we need to get all the latest keys first by scanning // every unique key in `CF_WRITE` and to get its corresponding old MVCC write // record if exists. - let mut key_old_writes = Vec::with_capacity(FLASHBACK_BATCH_SIZE); - let mut has_remain_writes = true; - let mut next_write_key = next_write_key; - // Try to read as many writes as possible in one batch. - while key_old_writes.len() < FLASHBACK_BATCH_SIZE && has_remain_writes { - let key_ts_old_writes; - (key_ts_old_writes, has_remain_writes) = reader.scan_writes( - Some(&next_write_key), - Some(end_key), - Some(flashback_version), - // No need to find an old version for the key if its latest `commit_ts` is smaller - // than or equal to the version. - |key| key.decode_ts().unwrap_or(TimeStamp::zero()) > flashback_version, - FLASHBACK_BATCH_SIZE - key_old_writes.len(), - )?; - statistics.add(&reader.statistics); - // If `has_remain_writes` is true, it means that the batch is full and we may - // need to read another round, so we have to update the `next_write_key` here. - if has_remain_writes { - next_write_key = key_ts_old_writes - .last() - .map(|(key, ..)| key.clone()) - .unwrap(); - } - // Check the latest commit ts to make sure there is no commit change during the - // flashback, otherwise, we need to abort the flashback. - for (key, commit_ts, old_write) in key_ts_old_writes.into_iter() { - if commit_ts > flashback_commit_ts { - return Err(Error::from(ErrorInner::InvalidTxnTso { - start_ts: flashback_start_ts, - commit_ts: flashback_commit_ts, - })); - } - // Although the first flashback preparation phase makes sure there will be no - // writes other than flashback after it, we CAN NOT return directly here. - // Suppose the second phase procedure contains two batches to flashback. After - // the first batch is committed, if the region is down, the client will retry - // the flashback from the very first beginning, because the data in the - // first batch has been written the flashbacked data with the same - // `commit_ts`, So we need to skip it to ensure the following data will - // be flashbacked continuously. - // And some large key modifications will exceed the max txn size limit - // through the execution, the write will forcibly finish the batch of data. - // So it may happen that part of the keys in a batch may be flashbacked. - if commit_ts == flashback_commit_ts { - continue; - } - key_old_writes.push((key, old_write)); - } - } + let result = reader.scan_writes( + Some(&next_write_key), + Some(end_key), + Some(flashback_version), + |_, latest_commit_ts| { + // There is no any other write could happen after the flashback begins. + assert!(latest_commit_ts <= flashback_commit_ts); + // - No need to find an old version for the key if its latest `commit_ts` is + // smaller than or equal to the flashback version. + // - No need to flashback a key twice if its latest `commit_ts` is equal to the + // flashback `commit_ts`. + latest_commit_ts > flashback_version && latest_commit_ts < flashback_commit_ts + }, + FLASHBACK_BATCH_SIZE, + ); + statistics.add(&reader.statistics); + let (key_old_writes, _) = result?; Ok(key_old_writes) } @@ -226,7 +192,6 @@ pub mod tests { key, &next_key, version, - start_ts, commit_ts, &mut statistics, ) diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index cfc6856da9c..b1a83a49ff8 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -133,7 +133,6 @@ impl ReadCommand for FlashbackToVersionReadPhase { next_write_key, &self.end_key, self.version, - self.start_ts, self.commit_ts, statistics, )?; diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 5227e7ea6bc..7fff4dad606 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -258,7 +258,7 @@ fn test_flashback_for_local_read() { cluster.run(); cluster.must_put(b"k1", b"v1"); - let region = cluster.get_region(b"k1"); + let mut region = cluster.get_region(b"k1"); cluster.must_transfer_leader(region.get_id(), peer.clone()); // Check local read before prepare flashback @@ -318,6 +318,20 @@ fn test_flashback_for_local_read() { // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index); + + // A local read with flashback flag will also be blocked. + let mut req = new_request( + region.get_id(), + region.take_region_epoch(), + vec![new_get_cmd(b"k1")], + false, + ); + let new_leader = cluster.query_leader(1, region.get_id(), Duration::from_secs(1)); + req.mut_header().set_peer(new_leader.unwrap()); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let resp = cluster.call_command(req, Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().get_error().has_flashback_not_prepared()); } #[test] From 2704588c6aaa1a269bb91499229e358d23cc636b Mon Sep 17 00:00:00 2001 From: Jarvis Date: Wed, 16 Nov 2022 14:51:55 +0800 Subject: [PATCH 0337/1149] Protect Raft Engine Disk Usage (#13633) close tikv/tikv#13642 Signed-off-by: Jarvis Zheng Signed-off-by: Jarvis Co-authored-by: Xinye Tao --- Cargo.lock | 14 +- components/engine_panic/src/raft_engine.rs | 4 + components/engine_rocks/src/raft_engine.rs | 4 + components/engine_traits/src/raft_engine.rs | 4 + components/raft_log_engine/src/engine.rs | 8 ++ components/server/src/server.rs | 139 ++++++++++++++++---- components/tikv_util/Cargo.toml | 1 + components/tikv_util/src/sys/disk.rs | 9 ++ components/tikv_util/src/sys/mod.rs | 62 +++++++++ etc/config-template.toml | 6 + src/storage/config.rs | 4 + tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 13 files changed, 227 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2622ed983f5..1722d0385e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3120,6 +3120,15 @@ dependencies = [ "tempdir", ] +[[package]] +name = "mnt" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1587ebb20a5b04738f16cffa7e2526f1b8496b84f92920facd518362ff1559eb" +dependencies = [ + "libc 0.2.132", +] + [[package]] name = "more-asserts" version = "0.2.1" @@ -4124,7 +4133,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#a0d29980f1448565a6d03f911ebb103c4266f1f4" +source = "git+https://github.com/tikv/raft-engine.git#82f6da7b8dff1856483e8e72a59dda903fb2499b" dependencies = [ "byteorder", "crc32fast", @@ -4158,7 +4167,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#a0d29980f1448565a6d03f911ebb103c4266f1f4" +source = "git+https://github.com/tikv/raft-engine.git#82f6da7b8dff1856483e8e72a59dda903fb2499b" dependencies = [ "clap 3.1.6", "env_logger", @@ -6475,6 +6484,7 @@ dependencies = [ "libc 0.2.132", "log", "log_wrappers", + "mnt", "nix 0.24.1", "num-traits", "num_cpus", diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 75e0e68269d..ad05e66c6fa 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -144,6 +144,10 @@ impl RaftEngine for PanicEngine { panic!() } + fn get_engine_path(&self) -> &str { + panic!() + } + fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index b66a56caadf..da15b1708b8 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -339,6 +339,10 @@ impl RaftEngine for RocksEngine { Ok(used_size) } + fn get_engine_path(&self) -> &str { + self.as_inner().path() + } + fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { self.put_msg(keys::STORE_IDENT_KEY, ident) } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index b7a3f50699c..7df681c96d5 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -68,6 +68,7 @@ pub struct RaftLogGcTask { pub to: u64, } +// TODO: Refactor common methods between Kv and Raft engine into a shared trait. pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send + 'static { type LogBatch: RaftLogBatch; @@ -140,6 +141,9 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send fn get_engine_size(&self) -> Result; + /// The path to the directory on the filesystem where the raft log is stored + fn get_engine_path(&self) -> &str; + /// Visit all available raft groups. /// /// If any error is returned, the iteration will stop. diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 35cacf620fc..a376adc25b7 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -305,6 +305,10 @@ impl RaftLogEngine { ))) } + pub fn path(&self) -> &str { + self.0.path() + } + /// If path is not an empty directory, we say db exists. pub fn exists(path: &str) -> bool { let path = Path::new(path); @@ -615,6 +619,10 @@ impl RaftEngine for RaftLogEngine { Ok(self.0.get_used_size() as u64) } + fn get_engine_path(&self) -> &str { + self.path() + } + fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> where F: FnMut(u64) -> std::result::Result<(), E>, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index aa3a67591e2..a5fb3fefaf9 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -116,7 +116,10 @@ use tikv_util::{ math::MovingAvgU32, metrics::INSTANCE_BACKEND_CPU_QUOTA, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, - sys::{cpu_time::ProcessStat, disk, register_memory_usage_high_water, SysQuota}, + sys::{ + cpu_time::ProcessStat, disk, path_in_diff_mount_point, register_memory_usage_high_water, + SysQuota, + }, thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, @@ -533,36 +536,66 @@ where // enough space to do compaction and region migration when TiKV recover. // This file is created in data_dir rather than db_path, because we must not // increase store size of db_path. + fn calculate_reserved_space(capacity: u64, reserved_size_from_config: u64) -> u64 { + let mut reserved_size = reserved_size_from_config; + if reserved_size_from_config != 0 { + reserved_size = + cmp::max((capacity as f64 * 0.05) as u64, reserved_size_from_config); + } + reserved_size + } + fn reserve_physical_space(data_dir: &String, available: u64, reserved_size: u64) { + let path = Path::new(data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); + if let Err(e) = file_system::remove_file(path) { + warn!("failed to remove space holder on starting: {}", e); + } + + // place holder file size is 20% of total reserved space. + if available > reserved_size { + file_system::reserve_space_for_recover(data_dir, reserved_size / 5) + .map_err(|e| panic!("Failed to reserve space for recovery: {}.", e)) + .unwrap(); + } else { + warn!("no enough disk space left to create the place holder file"); + } + } + let disk_stats = fs2::statvfs(&self.config.storage.data_dir).unwrap(); let mut capacity = disk_stats.total_space(); if self.config.raft_store.capacity.0 > 0 { capacity = cmp::min(capacity, self.config.raft_store.capacity.0); } - let mut reserve_space = self.config.storage.reserve_space.0; - if self.config.storage.reserve_space.0 != 0 { - reserve_space = cmp::max( - (capacity as f64 * 0.05) as u64, - self.config.storage.reserve_space.0, - ); - } - disk::set_disk_reserved_space(reserve_space); - let path = - Path::new(&self.config.storage.data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); - if let Err(e) = file_system::remove_file(path) { - warn!("failed to remove space holder on starting: {}", e); - } + // reserve space for kv engine + let kv_reserved_size = + calculate_reserved_space(capacity, self.config.storage.reserve_space.0); + disk::set_disk_reserved_space(kv_reserved_size); + reserve_physical_space( + &self.config.storage.data_dir, + disk_stats.available_space(), + kv_reserved_size, + ); - let available = disk_stats.available_space(); - // place holder file size is 20% of total reserved space. - if available > reserve_space { - file_system::reserve_space_for_recover( - &self.config.storage.data_dir, - reserve_space / 5, - ) - .map_err(|e| panic!("Failed to reserve space for recovery: {}.", e)) - .unwrap(); + let raft_data_dir = if self.config.raft_engine.enable { + self.config.raft_engine.config().dir } else { - warn!("no enough disk space left to create the place holder file"); + self.config.raft_store.raftdb_path.clone() + }; + + let separated_raft_mount_path = + path_in_diff_mount_point(&self.config.storage.data_dir, &raft_data_dir); + if separated_raft_mount_path { + let raft_disk_stats = fs2::statvfs(&raft_data_dir).unwrap(); + // reserve space for raft engine if raft engine is deployed separately + let raft_reserved_size = calculate_reserved_space( + raft_disk_stats.total_space(), + self.config.storage.reserve_raft_space.0, + ); + disk::set_raft_disk_reserved_space(raft_reserved_size); + reserve_physical_space( + &raft_data_dir, + raft_disk_stats.available_space(), + raft_reserved_size, + ); } } @@ -1448,13 +1481,28 @@ where let store_path = self.store_path.clone(); let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); - if reserve_space == 0 { + let reserve_raft_space = disk::get_raft_disk_reserved_space(); + if reserve_space == 0 && reserve_raft_space == 0 { info!("disk space checker not enabled"); return; } + let raft_path = engines.raft.get_engine_path().to_string(); + let separated_raft_mount_path = + path_in_diff_mount_point(raft_path.as_str(), engines.kv.path()); + let raft_almost_full_threshold = reserve_raft_space; + let raft_already_full_threshold = reserve_raft_space / 2; let almost_full_threshold = reserve_space; let already_full_threshold = reserve_space / 2; + fn calculate_disk_usage(a: disk::DiskUsage, b: disk::DiskUsage) -> disk::DiskUsage { + match (a, b) { + (disk::DiskUsage::AlreadyFull, _) => disk::DiskUsage::AlreadyFull, + (_, disk::DiskUsage::AlreadyFull) => disk::DiskUsage::AlreadyFull, + (disk::DiskUsage::AlmostFull, _) => disk::DiskUsage::AlmostFull, + (_, disk::DiskUsage::AlmostFull) => disk::DiskUsage::AlmostFull, + (disk::DiskUsage::Normal, disk::DiskUsage::Normal) => disk::DiskUsage::Normal, + } + } self.background_worker .spawn_interval_task(DEFAULT_STORAGE_STATS_INTERVAL, move || { let disk_stats = match fs2::statvfs(&store_path) { @@ -1481,6 +1529,33 @@ where .get_engine_size() .expect("get raft engine size"); + let mut raft_disk_status = disk::DiskUsage::Normal; + if separated_raft_mount_path && reserve_raft_space != 0 { + let raft_disk_stats = match fs2::statvfs(&raft_path) { + Err(e) => { + error!( + "get disk stat for raft engine failed"; + "raft engine path" => raft_path.clone(), + "err" => ?e + ); + return; + } + Ok(stats) => stats, + }; + let raft_disk_cap = raft_disk_stats.total_space(); + let mut raft_disk_available = + raft_disk_cap.checked_sub(raft_size).unwrap_or_default(); + raft_disk_available = cmp::min(raft_disk_available, raft_disk_stats.available_space()); + raft_disk_status = if raft_disk_available <= raft_already_full_threshold + { + disk::DiskUsage::AlreadyFull + } else if raft_disk_available <= raft_almost_full_threshold + { + disk::DiskUsage::AlmostFull + } else { + disk::DiskUsage::Normal + }; + } let placeholer_file_path = PathBuf::from_str(&data_dir) .unwrap() .join(Path::new(file_system::SPACE_PLACEHOLDER_FILE)); @@ -1488,7 +1563,11 @@ where let placeholder_size: u64 = file_system::get_file_size(placeholer_file_path).unwrap_or(0); - let used_size = snap_size + kv_size + raft_size + placeholder_size; + let used_size = if !separated_raft_mount_path { + snap_size + kv_size + raft_size + placeholder_size + } else { + snap_size + kv_size + placeholder_size + }; let capacity = if config_disk_capacity == 0 || disk_cap < config_disk_capacity { disk_cap } else { @@ -1499,18 +1578,22 @@ where available = cmp::min(available, disk_stats.available_space()); let prev_disk_status = disk::get_disk_status(0); //0 no need care about failpoint. - let cur_disk_status = if available <= already_full_threshold { + let cur_kv_disk_status = if available <= already_full_threshold { disk::DiskUsage::AlreadyFull } else if available <= almost_full_threshold { disk::DiskUsage::AlmostFull } else { disk::DiskUsage::Normal }; + let cur_disk_status = calculate_disk_usage(raft_disk_status, cur_kv_disk_status); if prev_disk_status != cur_disk_status { warn!( - "disk usage {:?}->{:?}, available={},snap={},kv={},raft={},capacity={}", + "disk usage {:?}->{:?} (raft engine usage: {:?}, kv engine usage: {:?}), seperated raft mount={}, kv available={}, snap={}, kv={}, raft={}, capacity={}", prev_disk_status, cur_disk_status, + raft_disk_status, + cur_kv_disk_status, + separated_raft_mount_path, available, snap_size, kv_size, diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 5ff65b33df3..36faa552804 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -31,6 +31,7 @@ lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } +mnt = "0.3.1" nix = "0.24" num-traits = "0.2" num_cpus = "1" diff --git a/components/tikv_util/src/sys/disk.rs b/components/tikv_util/src/sys/disk.rs index 3f2a60855ff..c8fe87a56b0 100644 --- a/components/tikv_util/src/sys/disk.rs +++ b/components/tikv_util/src/sys/disk.rs @@ -10,6 +10,7 @@ pub use kvproto::disk_usage::DiskUsage; // Percent is not configurable, But if you want to change, please make sure // the percent in both the init fs and store monitor are keep the same. static DISK_RESERVED_SPACE: AtomicU64 = AtomicU64::new(0); +static RAFT_DISK_RESERVED_SPACE: AtomicU64 = AtomicU64::new(0); static DISK_STATUS: AtomicI32 = AtomicI32::new(0); pub fn set_disk_reserved_space(v: u64) { @@ -20,6 +21,14 @@ pub fn get_disk_reserved_space() -> u64 { DISK_RESERVED_SPACE.load(Ordering::Acquire) } +pub fn set_raft_disk_reserved_space(v: u64) { + RAFT_DISK_RESERVED_SPACE.store(v, Ordering::Release) +} + +pub fn get_raft_disk_reserved_space() -> u64 { + RAFT_DISK_RESERVED_SPACE.load(Ordering::Acquire) +} + pub fn set_disk_status(status: DiskUsage) { let v = match status { DiskUsage::Normal => 0, diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index d17c821e995..dcc137f095c 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -9,11 +9,15 @@ pub mod ioload; pub mod thread; // re-export some traits for ease of use +#[cfg(target_os = "linux")] +use std::path::PathBuf; use std::sync::atomic::{AtomicU64, Ordering}; use fail::fail_point; #[cfg(target_os = "linux")] use lazy_static::lazy_static; +#[cfg(target_os = "linux")] +use mnt::get_mount; use sysinfo::RefreshKind; pub use sysinfo::{DiskExt, NetworkExt, ProcessExt, ProcessorExt, SystemExt}; @@ -156,3 +160,61 @@ pub fn cache_size(level: usize) -> Option { pub fn cache_line_size(level: usize) -> Option { read_size_in_cache(level, "coherency_line_size") } + +#[cfg(target_os = "linux")] +pub fn path_in_diff_mount_point(path1: &str, path2: &str) -> bool { + if path1.is_empty() || path2.is_empty() { + return false; + } + let path1 = PathBuf::from(path1); + let path2 = PathBuf::from(path2); + match (get_mount(&path1), get_mount(&path2)) { + (Err(e1), _) => { + warn!("Get mount point error for path {}, {}", path1.display(), e1); + false + } + (_, Err(e2)) => { + warn!("Get mount point error for path {}, {}", path2.display(), e2); + false + } + (Ok(None), _) => { + warn!("No mount point for {}", path1.display()); + false + } + (_, Ok(None)) => { + warn!("No mount point for {}", path2.display()); + false + } + (Ok(Some(mount1)), Ok(Some(mount2))) => mount1 != mount2, + } +} + +#[cfg(not(target_os = "linux"))] +pub fn path_in_diff_mount_point(_path1: &str, _path2: &str) -> bool { + return false; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(target_os = "linux")] + #[test] + fn test_path_in_diff_mount_point() { + let (empty_path1, path2) = ("", "/"); + let result = path_in_diff_mount_point(empty_path1, path2); + assert_eq!(result, false); + + let (no_mount_point_path, path2) = ("no_mount_point_path_w943nn", "/"); + let result = path_in_diff_mount_point(no_mount_point_path, path2); + assert_eq!(result, false); + + let (not_existed_path, path2) = ("/non_existed_path_eu2yndh", "/"); + let result = path_in_diff_mount_point(not_existed_path, path2); + assert_eq!(result, false); + + let (normal_path1, normal_path2) = ("/", "/"); + let result = path_in_diff_mount_point(normal_path1, normal_path2); + assert_eq!(result, false); + } +} diff --git a/etc/config-template.toml b/etc/config-template.toml index 92b6454ba29..a2b3ab13b00 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -253,6 +253,12 @@ ## Set it to 0 will cause no space is reserved at all. It's generally used for tests. # reserve-space = "5GB" +## Reserve some space for raft disk if raft disk is separated deployed with kv disk. +## `max(reserve-raft-space, raft disk capacity * 5%)` will be reserved exactly. +## +## Set it to 0 will cause no space is reserved at all. It's generally used for tests. +# reserve-raft-space = "1GB" + ## The maximum recovery time after rocksdb detects restorable background errors. When the data belonging ## to the data range is damaged, it will be reported to PD through heartbeat, and PD will add `remove-peer` ## operator to remove this damaged peer. When the damaged peer still exists in the current store, the diff --git a/src/storage/config.rs b/src/storage/config.rs index 685272dbeee..313f86ba048 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -29,6 +29,7 @@ const MAX_SCHED_CONCURRENCY: usize = 2 * 1024 * 1024; const DEFAULT_SCHED_PENDING_WRITE_MB: u64 = 100; const DEFAULT_RESERVED_SPACE_GB: u64 = 5; +const DEFAULT_RESERVED_RAFT_SPACE_GB: u64 = 1; #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] #[serde(default)] @@ -50,6 +51,8 @@ pub struct Config { // Reserve disk space to make tikv would have enough space to compact when disk is full. pub reserve_space: ReadableSize, #[online_config(skip)] + pub reserve_raft_space: ReadableSize, + #[online_config(skip)] pub enable_async_apply_prewrite: bool, #[online_config(skip)] pub api_version: u8, @@ -82,6 +85,7 @@ impl Default for Config { }, scheduler_pending_write_threshold: ReadableSize::mb(DEFAULT_SCHED_PENDING_WRITE_MB), reserve_space: ReadableSize::gb(DEFAULT_RESERVED_SPACE_GB), + reserve_raft_space: ReadableSize::gb(DEFAULT_RESERVED_RAFT_SPACE_GB), enable_async_apply_prewrite: false, api_version: 1, enable_ttl: false, diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 5cb8c837fb1..93c07f2f411 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -671,6 +671,7 @@ fn test_serde_custom_tikv_config() { scheduler_worker_pool_size: 1, scheduler_pending_write_threshold: ReadableSize::kb(123), reserve_space: ReadableSize::gb(10), + reserve_raft_space: ReadableSize::gb(2), enable_async_apply_prewrite: true, api_version: 1, enable_ttl: true, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index a041b696158..e5c896238bc 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -97,6 +97,7 @@ scheduler-worker-pool-size = 1 scheduler-pending-write-threshold = "123KB" enable-async-apply-prewrite = true reserve-space = "10GB" +reserve-raft-space = "2GB" enable-ttl = true ttl-check-poll-interval = "0s" From 616b4402192b4d092f8d6727f5fe95f133e85bca Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Wed, 16 Nov 2022 17:13:56 +0800 Subject: [PATCH 0338/1149] txn: Add batch-resumed mode for acquire_pessimistic_lock storage command (#13687) ref tikv/tikv#13298 Add batch-resumed mode for acquire_pessimistic_lock storage command. Now the storage command `AcquirePessimisticLock` contains an enum to determine whether it's executing a normal request or it's a batch of requests resumed after waiting for another lock. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- components/txn_types/src/lib.rs | 4 +- components/txn_types/src/types.rs | 13 + src/storage/lock_manager/lock_wait_context.rs | 1 + .../lock_manager/lock_waiting_queue.rs | 5 + src/storage/metrics.rs | 1 + src/storage/mod.rs | 30 +- src/storage/mvcc/txn.rs | 4 + .../txn/commands/acquire_pessimistic_lock.rs | 178 ++++---- .../acquire_pessimistic_lock_resumed.rs | 414 ++++++++++++++++++ src/storage/txn/commands/atomic_store.rs | 2 +- .../txn/commands/check_secondary_locks.rs | 2 +- src/storage/txn/commands/check_txn_status.rs | 2 +- src/storage/txn/commands/cleanup.rs | 2 +- src/storage/txn/commands/commit.rs | 2 +- src/storage/txn/commands/compare_and_swap.rs | 2 +- .../txn/commands/flashback_to_version.rs | 2 +- src/storage/txn/commands/mod.rs | 34 +- src/storage/txn/commands/pause.rs | 2 +- .../txn/commands/pessimistic_rollback.rs | 2 +- src/storage/txn/commands/prewrite.rs | 21 +- src/storage/txn/commands/resolve_lock.rs | 2 +- src/storage/txn/commands/resolve_lock_lite.rs | 2 +- src/storage/txn/commands/rollback.rs | 2 +- src/storage/txn/commands/txn_heart_beat.rs | 2 +- src/storage/txn/latch.rs | 15 +- src/storage/txn/mod.rs | 1 + src/storage/txn/scheduler.rs | 11 +- src/storage/types.rs | 2 + tests/failpoints/cases/test_storage.rs | 6 +- 29 files changed, 621 insertions(+), 145 deletions(-) create mode 100644 src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs diff --git a/components/txn_types/src/lib.rs b/components/txn_types/src/lib.rs index edd89256d2b..a1a759b21b9 100644 --- a/components/txn_types/src/lib.rs +++ b/components/txn_types/src/lib.rs @@ -19,8 +19,8 @@ pub use lock::{Lock, LockType, PessimisticLock}; use thiserror::Error; pub use timestamp::{TimeStamp, TsSet, TSO_PHYSICAL_SHIFT_BITS}; pub use types::{ - is_short_value, Key, KvPair, Mutation, MutationType, OldValue, OldValues, TxnExtra, - TxnExtraScheduler, Value, WriteBatchFlags, SHORT_VALUE_MAX_LEN, + insert_old_value_if_resolved, is_short_value, Key, KvPair, Mutation, MutationType, OldValue, + OldValues, TxnExtra, TxnExtraScheduler, Value, WriteBatchFlags, SHORT_VALUE_MAX_LEN, }; pub use write::{Write, WriteRef, WriteType}; diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 01133a71924..6a2c953afc1 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -512,6 +512,19 @@ impl OldValue { // MutationType is the type of mutation of the current write. pub type OldValues = HashMap)>; +pub fn insert_old_value_if_resolved( + old_values: &mut OldValues, + key: Key, + start_ts: TimeStamp, + old_value: OldValue, + mutation_type: Option, +) { + if old_value.resolved() { + let key = key.append_ts(start_ts); + old_values.insert(key, (old_value, mutation_type)); + } +} + // Extra data fields filled by kvrpcpb::ExtraOp. #[derive(Default, Debug, Clone)] pub struct TxnExtra { diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 7749ee983cb..1d53bdc38ea 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -258,6 +258,7 @@ mod tests { for_update_ts: 1.into(), ..Default::default() }, + should_not_exist: false, lock_wait_token: token, legacy_wake_up_index: None, key_cb: None, diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index d3fb58b2a94..4069bab5643 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -93,6 +93,9 @@ pub struct LockWaitEntry { pub key: Key, pub lock_hash: u64, pub parameters: PessimisticLockParameters, + // `parameters` provides parameter for a request, but `should_not_exist` is specified key-wise. + // Put it in a separated field. + pub should_not_exist: bool, pub lock_wait_token: LockWaitToken, pub legacy_wake_up_index: Option, pub key_cb: Option>, @@ -687,6 +690,7 @@ mod tests { min_commit_ts: 0.into(), check_existence: false, is_first_lock: false, + lock_only_if_exists: false, allow_lock_with_conflict: false, }; @@ -697,6 +701,7 @@ mod tests { key, lock_hash, parameters, + should_not_exist: false, lock_wait_token: token, legacy_wake_up_index: None, key_cb: Some(SyncWrapper::new(Box::new(move |res| tx.send(res).unwrap()))), diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 2bbe4b7b762..e84a7dfb4e9 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -126,6 +126,7 @@ make_auto_flush_static_metric! { batch_get_command, prewrite, acquire_pessimistic_lock, + acquire_pessimistic_lock_resumed, commit, cleanup, rollback, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8b835bcfafd..2032ffd86ae 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -96,7 +96,7 @@ use tikv_util::{ use tracker::{ clear_tls_tracker_token, set_tls_tracker_token, with_tls_tracker, TrackedFuture, TrackerToken, }; -use txn_types::{Key, KvPair, Lock, LockType, OldValues, TimeStamp, TsSet, Value}; +use txn_types::{Key, KvPair, Lock, LockType, TimeStamp, TsSet, Value}; pub use self::{ errors::{get_error_kind_from_header, get_tag_from_header, Error, ErrorHeaderKind, ErrorInner}, @@ -1416,7 +1416,7 @@ impl Storage { callback: Callback, ) -> Result<()> { use crate::storage::txn::commands::{ - AcquirePessimisticLock, Prewrite, PrewritePessimistic, + AcquirePessimisticLock, AcquirePessimisticLockResumed, Prewrite, PrewritePessimistic, }; let cmd: Command = cmd.into(); @@ -1452,6 +1452,18 @@ impl Storage { )?; check_key_size!(keys, self.max_key_size, callback); } + Command::AcquirePessimisticLockResumed(AcquirePessimisticLockResumed { + items, .. + }) => { + let keys = items.iter().map(|item| item.key.as_encoded()); + Self::check_api_version( + self.api_version, + cmd.ctx().api_version, + CommandKind::acquire_pessimistic_lock_resumed, + keys.clone(), + )?; + check_key_size!(keys, self.max_key_size, callback); + } _ => {} } with_tls_tracker(|tracker| { @@ -3341,9 +3353,9 @@ pub mod test_util { Some(WaitTimeout::Default), return_values, for_update_ts.next(), - OldValues::default(), check_existence, false, + false, Context::default(), ) } @@ -8193,7 +8205,7 @@ mod tests { Some(WaitTimeout::Millis(100)), false, 21.into(), - OldValues::default(), + false, false, false, Context::default(), @@ -8285,7 +8297,7 @@ mod tests { Some(WaitTimeout::Millis(5000)), false, (lock_ts + 1).into(), - OldValues::default(), + false, false, false, Context::default(), @@ -8870,7 +8882,7 @@ mod tests { None, false, 0.into(), - OldValues::default(), + false, false, false, Default::default(), @@ -8893,7 +8905,7 @@ mod tests { None, false, 0.into(), - OldValues::default(), + false, false, false, Default::default(), @@ -9123,7 +9135,7 @@ mod tests { None, false, TimeStamp::new(12), - OldValues::default(), + false, false, false, Context::default(), @@ -9149,7 +9161,7 @@ mod tests { None, false, TimeStamp::new(12), - OldValues::default(), + false, false, false, Context::default(), diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 66aa769d462..4cc0ab57ffb 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -103,6 +103,10 @@ impl MvccTxn { self.write_size } + pub fn is_empty(&self) -> bool { + self.modifies.len() == 0 && self.locks_for_1pc.len() == 0 + } + pub(crate) fn put_lock(&mut self, key: Key, lock: &Lock) { let write = Modify::Put(CF_LOCK, key, lock.to_bytes()); self.write_size += write.size(); diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 69a5179ab84..6bd147cf02e 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -1,8 +1,9 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use kvproto::kvrpcpb::{ExtraOp, LockInfo}; -use txn_types::{Key, OldValues, TimeStamp, TxnExtra}; +use kvproto::kvrpcpb::ExtraOp; +use tikv_kv::Modify; +use txn_types::{insert_old_value_if_resolved, Key, OldValues, TimeStamp, TxnExtra}; use crate::storage::{ kv::WriteData, @@ -17,7 +18,7 @@ use crate::storage::{ Error, ErrorInner, Result, }, types::{PessimisticLockParameters, PessimisticLockResults}, - Error as StorageError, ErrorInner as StorageErrorInner, ProcessResult, Result as StorageResult, + Error as StorageError, PessimisticLockKeyResult, ProcessResult, Result as StorageResult, Snapshot, }; @@ -46,9 +47,9 @@ command! { /// later read in the same transaction. return_values: bool, min_commit_ts: TimeStamp, - old_values: OldValues, check_existence: bool, lock_only_if_exists: bool, + allow_lock_with_conflict: bool, } } @@ -69,17 +70,15 @@ impl CommandExt for AcquirePessimisticLock { gen_lock!(keys: multiple(|x| &x.0)); } -fn extract_lock_info_from_result(res: &StorageResult) -> &LockInfo { - match res { - Err(StorageError(box StorageErrorInner::Txn(Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::KeyIsLocked(info), - )))))) => info, - _ => panic!("unexpected mvcc error"), - } -} - impl WriteCommand for AcquirePessimisticLock { - fn process_write(mut self, snapshot: S, context: WriteContext<'_, L>) -> Result { + fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { + if self.allow_lock_with_conflict && self.keys.len() > 1 { + // Currently multiple keys with `allow_lock_with_conflict` set is not supported. + return Err(Error::from(ErrorInner::Other(box_err!( + "multiple keys in a single request with allowed_lock_with_conflict set is not allowed" + )))); + } + let (start_ts, ctx, keys) = (self.start_ts, self.ctx, self.keys); let mut txn = MvccTxn::new(start_ts, context.concurrency_manager); let mut reader = ReaderWithStats::new( @@ -87,9 +86,11 @@ impl WriteCommand for AcquirePessimisticLock context.statistics, ); - let rows = keys.len(); - let mut res = Ok(PessimisticLockResults::with_capacity(rows)); + let total_keys = keys.len(); + let mut res = PessimisticLockResults::with_capacity(total_keys); + let mut encountered_locks = vec![]; let need_old_value = context.extra_op == ExtraOp::ReadOldValue; + let mut old_values = OldValues::default(); for (k, should_not_exist) in keys { match acquire_pessimistic_lock( &mut txn, @@ -104,62 +105,79 @@ impl WriteCommand for AcquirePessimisticLock self.min_commit_ts, need_old_value, self.lock_only_if_exists, - false, + self.allow_lock_with_conflict, ) { Ok((key_res, old_value)) => { - res.as_mut().unwrap().push(key_res); - if old_value.resolved() { - let key = k.append_ts(txn.start_ts); - // MutationType is unknown in AcquirePessimisticLock stage. - let mutation_type = None; - self.old_values.insert(key, (old_value, mutation_type)); - } + res.push(key_res); + // MutationType is unknown in AcquirePessimisticLock stage. + insert_old_value_if_resolved(&mut old_values, k, txn.start_ts, old_value, None); } - Err(e @ MvccError(box MvccErrorInner::KeyIsLocked { .. })) => { - res = Err(e).map_err(Error::from).map_err(StorageError::from); + Err(MvccError(box MvccErrorInner::KeyIsLocked(lock_info))) => { + let request_parameters = PessimisticLockParameters { + pb_ctx: ctx.clone(), + primary: self.primary.clone(), + start_ts, + lock_ttl: self.lock_ttl, + for_update_ts: self.for_update_ts, + wait_timeout: self.wait_timeout, + return_values: self.return_values, + min_commit_ts: self.min_commit_ts, + check_existence: self.check_existence, + is_first_lock: self.is_first_lock, + lock_only_if_exists: self.lock_only_if_exists, + allow_lock_with_conflict: self.allow_lock_with_conflict, + }; + let lock_info = WriteResultLockInfo::new( + lock_info, + request_parameters, + k, + should_not_exist, + ); + encountered_locks.push(lock_info); + // Do not lock previously succeeded keys. + txn.clear(); + res.0.clear(); + res.push(PessimisticLockKeyResult::Waiting); break; } Err(e) => return Err(Error::from(e)), } } - // no conflict - let (pr, to_be_write, rows, ctx, lock_info) = if res.is_ok() { - let pr = ProcessResult::PessimisticLockRes { res }; - let extra = TxnExtra { - old_values: self.old_values, - // One pc status is unkown AcquirePessimisticLock stage. - one_pc: false, - for_flashback: false, - }; - let write_data = WriteData::new(txn.into_modifies(), extra); - (pr, write_data, rows, ctx, None) - } else { - let request_parameters = PessimisticLockParameters { - pb_ctx: ctx.clone(), - primary: self.primary.clone(), - start_ts: self.start_ts, - lock_ttl: self.lock_ttl, - for_update_ts: self.for_update_ts, - wait_timeout: self.wait_timeout, - return_values: self.return_values, - min_commit_ts: self.min_commit_ts, - check_existence: self.check_existence, - is_first_lock: self.is_first_lock, - allow_lock_with_conflict: false, - }; - let lock_info_pb = extract_lock_info_from_result(&res); - let lock_info = WriteResultLockInfo::new(lock_info_pb.clone(), request_parameters); - let pr = ProcessResult::PessimisticLockRes { res }; - // Wait for lock released - (pr, WriteData::default(), 0, ctx, Some(lock_info)) - }; + let modifies = txn.into_modifies(); + + let mut res = Ok(res); + + // If encountered lock and `wait_timeout` is `None` (which means no wait), + // return error directly here. + if !encountered_locks.is_empty() && self.wait_timeout.is_none() { + // Mind the difference of the protocols of legacy requests and resumable + // requests. For resumable requests (allow_lock_with_conflict == + // true), key errors are considered key by key instead of for the + // whole request. + let lock_info = encountered_locks.drain(..).next().unwrap().lock_info_pb; + let err = StorageError::from(Error::from(MvccError::from( + MvccErrorInner::KeyIsLocked(lock_info), + ))); + if self.allow_lock_with_conflict { + res.as_mut().unwrap().0[0] = PessimisticLockKeyResult::Failed(err.into()) + } else { + res = Err(err) + } + } + + let rows = if res.is_ok() { total_keys } else { 0 }; + + let pr = ProcessResult::PessimisticLockRes { res }; + + let to_be_write = make_write_data(modifies, old_values); + Ok(WriteResult { ctx, to_be_write, rows, pr, - lock_info, + lock_info: encountered_locks, released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, @@ -167,38 +185,16 @@ impl WriteCommand for AcquirePessimisticLock } } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_gen_lock_info_from_result() { - let raw_key = b"key".to_vec(); - let key = Key::from_raw(&raw_key); - let ts = 100; - let is_first_lock = true; - let wait_timeout = WaitTimeout::from_encoded(200); - - let mut info = LockInfo::default(); - info.set_key(raw_key.clone()); - info.set_lock_version(ts); - info.set_lock_ttl(100); - let case = StorageError::from(StorageErrorInner::Txn(Error::from(ErrorInner::Mvcc( - MvccError::from(MvccErrorInner::KeyIsLocked(info.clone())), - )))); - let lock_info = WriteResultLockInfo::new( - extract_lock_info_from_result::<()>(&Err(case)).clone(), - PessimisticLockParameters { - is_first_lock, - wait_timeout, - ..Default::default() - }, - ); - assert_eq!(lock_info.lock_digest.ts, ts.into()); - assert_eq!(lock_info.lock_digest.hash, key.gen_hash()); - assert_eq!(lock_info.key.into_raw().unwrap(), raw_key); - assert_eq!(lock_info.parameters.is_first_lock, is_first_lock); - assert_eq!(lock_info.parameters.wait_timeout, wait_timeout); - assert_eq!(lock_info.lock_info_pb, info); +pub(super) fn make_write_data(modifies: Vec, old_values: OldValues) -> WriteData { + if !modifies.is_empty() { + let extra = TxnExtra { + old_values, + // One pc status is unknown in AcquirePessimisticLock stage. + one_pc: false, + for_flashback: false, + }; + WriteData::new(modifies, extra) + } else { + WriteData::default() } } diff --git a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs new file mode 100644 index 00000000000..3a35fe6d1a7 --- /dev/null +++ b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs @@ -0,0 +1,414 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use kvproto::kvrpcpb::ExtraOp; +use txn_types::{insert_old_value_if_resolved, Key, OldValues}; + +use crate::storage::{ + lock_manager::{lock_waiting_queue::LockWaitEntry, LockManager, LockWaitToken}, + mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, SnapshotReader}, + txn::{ + acquire_pessimistic_lock, + commands::{ + acquire_pessimistic_lock::make_write_data, Command, CommandExt, ReleasedLocks, + ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, + WriteResultLockInfo, + }, + Error, Result, + }, + types::{PessimisticLockParameters, PessimisticLockResults}, + Error as StorageError, PessimisticLockKeyResult, ProcessResult, Result as StorageResult, + Snapshot, +}; + +#[derive(Debug)] +pub struct ResumedPessimisticLockItem { + pub key: Key, + pub should_not_exist: bool, + pub params: PessimisticLockParameters, + pub lock_wait_token: LockWaitToken, +} + +command! { + /// Acquire a Pessimistic lock on the keys. + /// + /// This can be rolled back with a [`PessimisticRollback`](Command::PessimisticRollback) command. + AcquirePessimisticLockResumed: + cmd_ty => StorageResult, + display => "kv::command::acquirepessimisticlockresumed {:?}", + (items), + content => { + items: Vec, + } +} + +impl CommandExt for AcquirePessimisticLockResumed { + ctx!(); + tag!(acquire_pessimistic_lock_resumed); + request_type!(KvPessimisticLock); + + property!(can_be_pipelined); + + fn write_bytes(&self) -> usize { + self.items + .iter() + .map(|item| item.key.as_encoded().len()) + .sum() + } + + gen_lock!(items: multiple(|x| &x.key)); +} + +impl WriteCommand for AcquirePessimisticLockResumed { + fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { + let mut modifies = vec![]; + let mut txn = None; + let mut reader: Option> = None; + + let total_keys = self.items.len(); + let mut res = PessimisticLockResults::with_capacity(total_keys); + let mut encountered_locks = vec![]; + let need_old_value = context.extra_op == ExtraOp::ReadOldValue; + let mut old_values = OldValues::default(); + + let mut new_locked_keys = Vec::with_capacity(total_keys); + + for item in self.items.into_iter() { + let ResumedPessimisticLockItem { + key, + should_not_exist, + params, + lock_wait_token, + } = item; + + // TODO: Refine the code for rebuilding txn state. + if txn + .as_ref() + .map_or(true, |t: &MvccTxn| t.start_ts != params.start_ts) + { + if let Some(prev_txn) = txn.replace(MvccTxn::new( + params.start_ts, + context.concurrency_manager.clone(), + )) { + modifies.extend(prev_txn.into_modifies()); + } + // TODO: Is it possible to reuse the same reader but change the start_ts stored + // in it? + if let Some(mut prev_reader) = reader.replace(SnapshotReader::new_with_ctx( + params.start_ts, + snapshot.clone(), + &self.ctx, + )) { + context.statistics.add(&prev_reader.take_statistics()); + } + } + let txn = txn.as_mut().unwrap(); + let reader = reader.as_mut().unwrap(); + + match acquire_pessimistic_lock( + txn, + reader, + key.clone(), + ¶ms.primary, + should_not_exist, + params.lock_ttl, + params.for_update_ts, + params.return_values, + params.check_existence, + params.min_commit_ts, + need_old_value, + params.lock_only_if_exists, + true, + ) { + Ok((key_res, old_value)) => { + res.push(key_res); + new_locked_keys.push((params.start_ts, key.clone())); + + insert_old_value_if_resolved( + &mut old_values, + key, + params.start_ts, + old_value, + None, + ); + } + Err(MvccError(box MvccErrorInner::KeyIsLocked(lock_info))) => { + let mut lock_info = + WriteResultLockInfo::new(lock_info, params, key, should_not_exist); + lock_info.lock_wait_token = lock_wait_token; + res.push(PessimisticLockKeyResult::Waiting); + encountered_locks.push(lock_info); + } + Err(e) => { + res.push(PessimisticLockKeyResult::Failed( + StorageError::from(Error::from(e)).into(), + )); + } + }; + } + + if let Some(txn) = txn { + if !txn.is_empty() { + modifies.extend(txn.into_modifies()); + } + } + if let Some(mut reader) = reader { + context.statistics.add(&reader.take_statistics()); + } + + let pr = ProcessResult::PessimisticLockRes { res: Ok(res) }; + let to_be_write = make_write_data(modifies, old_values); + + Ok(WriteResult { + ctx: self.ctx, + to_be_write, + rows: total_keys, + pr, + lock_info: encountered_locks, + released_locks: ReleasedLocks::new(), + lock_guards: vec![], + response_policy: ResponsePolicy::OnProposed, + }) + } +} + +impl AcquirePessimisticLockResumed { + pub fn from_lock_wait_entries( + lock_wait_entries: impl IntoIterator>, + ) -> TypedCommand> { + let items: Vec<_> = lock_wait_entries + .into_iter() + .map(|item| { + assert!(item.key_cb.is_none()); + ResumedPessimisticLockItem { + key: item.key, + should_not_exist: item.should_not_exist, + params: item.parameters, + lock_wait_token: item.lock_wait_token, + } + }) + .collect(); + + assert!(!items.is_empty()); + let ctx = items[0].params.pb_ctx.clone(); + // TODO: May it cause problem by using the first one as the pb_ctx of the + // Command? + Self::new(items, ctx) + } +} + +#[cfg(test)] +mod tests { + use concurrency_manager::ConcurrencyManager; + use kvproto::kvrpcpb::Context; + use rand::random; + use tikv_kv::Engine; + use txn_types::TimeStamp; + + use super::*; + use crate::storage::{ + lock_manager::{MockLockManager, WaitTimeout}, + mvcc::tests::{must_locked, write}, + txn::{ + commands::pessimistic_rollback::tests::must_success as must_pessimistic_rollback, + tests::{must_commit, must_pessimistic_locked, must_prewrite_put, must_rollback}, + }, + TestEngineBuilder, + }; + + #[allow(clippy::vec_box)] + fn must_success( + engine: &mut E, + lock_wait_entries: Vec>, + ) -> PessimisticLockResults { + let ctx = Context::default(); + let snapshot = engine.snapshot(Default::default()).unwrap(); + let cm = ConcurrencyManager::new(TimeStamp::zero()); + + let items_info: Vec<_> = lock_wait_entries + .iter() + .map(|item| { + ( + item.lock_wait_token, + item.key.clone(), + item.parameters.clone(), + item.should_not_exist, + ) + }) + .collect(); + + let command = AcquirePessimisticLockResumed::from_lock_wait_entries(lock_wait_entries).cmd; + let result = command + .process_write( + snapshot, + WriteContext { + lock_mgr: &MockLockManager::new(), + concurrency_manager: cm, + extra_op: Default::default(), + statistics: &mut Default::default(), + async_apply_prewrite: false, + raw_ext: None, + }, + ) + .unwrap(); + let res = if let ProcessResult::PessimisticLockRes { res } = result.pr { + res.unwrap() + } else { + panic!("unexpected process result: {:?}", result.pr); + }; + + // Check correctness of returned lock info. + let mut lock_info_index = 0; + for (i, res) in res.0.iter().enumerate() { + if let PessimisticLockKeyResult::Waiting = res { + let (token, key, params, should_not_exist) = &items_info[i]; + let lock_info: &WriteResultLockInfo = &result.lock_info[lock_info_index]; + lock_info_index += 1; + + assert_eq!(lock_info.lock_wait_token, *token); + assert_eq!(&lock_info.key, key); + assert_eq!(&lock_info.parameters, params); + assert_eq!(lock_info.should_not_exist, *should_not_exist); + } + } + assert_eq!(lock_info_index, result.lock_info.len()); + + write(engine, &ctx, result.to_be_write.modifies); + res + } + + fn make_lock_waiting( + key: &[u8], + start_ts: impl Into, + for_update_ts: impl Into, + return_values: bool, + check_existence: bool, + ) -> Box { + let start_ts = start_ts.into(); + let for_update_ts = for_update_ts.into(); + assert!(for_update_ts >= start_ts); + let parameters = PessimisticLockParameters { + pb_ctx: Context::default(), + primary: key.to_vec(), + start_ts, + lock_ttl: 1000, + for_update_ts, + wait_timeout: Some(WaitTimeout::Millis(1000)), + return_values, + min_commit_ts: for_update_ts.next(), + check_existence, + is_first_lock: false, + lock_only_if_exists: false, + allow_lock_with_conflict: true, + }; + + let key = Key::from_raw(key); + let lock_hash = key.gen_hash(); + let entry = LockWaitEntry { + key, + lock_hash, + parameters, + should_not_exist: false, + lock_wait_token: LockWaitToken(Some(random())), + legacy_wake_up_index: Some(0), + key_cb: None, + }; + + Box::new(entry) + } + + #[test] + fn test_acquire_pessimistic_lock_resumed() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + let res = must_success( + &mut engine, + vec![make_lock_waiting(b"k1", 10, 15, false, false)], + ); + assert_eq!(res.0.len(), 1); + res.0[0].assert_empty(); + must_pessimistic_locked(&mut engine, b"k1", 10, 15); + must_pessimistic_rollback(&mut engine, b"k1", 10, 15); + + let res = must_success( + &mut engine, + vec![ + make_lock_waiting(b"k1", 20, 25, false, false), + make_lock_waiting(b"k2", 20, 25, false, false), + make_lock_waiting(b"k3", 21, 26, false, false), + ], + ); + assert_eq!(res.0.len(), 3); + res.0.iter().for_each(|x| x.assert_empty()); + must_pessimistic_locked(&mut engine, b"k1", 20, 25); + must_pessimistic_locked(&mut engine, b"k2", 20, 25); + must_pessimistic_locked(&mut engine, b"k3", 21, 26); + + must_pessimistic_rollback(&mut engine, b"k1", 20, 25); + must_pessimistic_rollback(&mut engine, b"k2", 20, 25); + must_pessimistic_rollback(&mut engine, b"k3", 21, 26); + + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 30); + must_commit(&mut engine, b"k1", 30, 35); + must_prewrite_put(&mut engine, b"k2", b"v2", b"k1", 30); + must_prewrite_put(&mut engine, b"k3", b"v3", b"k3", 28); + must_commit(&mut engine, b"k3", 28, 29); + let res = must_success( + &mut engine, + vec![ + make_lock_waiting(b"k1", 31, 31, false, false), + make_lock_waiting(b"k2", 32, 32, false, false), + make_lock_waiting(b"k3", 33, 33, true, false), + make_lock_waiting(b"k4", 34, 34, false, true), + make_lock_waiting(b"k5", 35, 35, false, false), + ], + ); + assert_eq!(res.0.len(), 5); + res.0[0].assert_locked_with_conflict(Some(b"v1"), 35); + res.0[1].assert_waiting(); + res.0[2].assert_value(Some(b"v3")); + res.0[3].assert_existence(false); + res.0[4].assert_empty(); + must_pessimistic_locked(&mut engine, b"k1", 31, 35); + must_locked(&mut engine, b"k2", 30); + must_pessimistic_locked(&mut engine, b"k3", 33, 33); + must_pessimistic_locked(&mut engine, b"k4", 34, 34); + must_pessimistic_locked(&mut engine, b"k5", 35, 35); + + must_pessimistic_rollback(&mut engine, b"k1", 31, 35); + must_pessimistic_rollback(&mut engine, b"k3", 33, 33); + must_pessimistic_rollback(&mut engine, b"k4", 34, 34); + must_pessimistic_rollback(&mut engine, b"k5", 35, 35); + + must_prewrite_put(&mut engine, b"k4", b"v4", b"k4", 40); + must_prewrite_put(&mut engine, b"k6", b"v6", b"k4", 40); + let res = must_success( + &mut engine, + vec![ + make_lock_waiting(b"k1", 41, 41, false, false), + make_lock_waiting(b"k2", 41, 41, false, false), + make_lock_waiting(b"k3", 42, 42, false, false), + make_lock_waiting(b"k4", 42, 42, false, false), + make_lock_waiting(b"k5", 43, 43, false, false), + make_lock_waiting(b"k6", 43, 43, false, false), + ], + ); + assert_eq!(res.0.len(), 6); + for &i in &[0, 2, 4] { + res.0[i].assert_empty(); + } + for &i in &[1, 3, 5] { + res.0[i].assert_waiting(); + } + must_pessimistic_locked(&mut engine, b"k1", 41, 41); + must_pessimistic_locked(&mut engine, b"k3", 42, 42); + must_pessimistic_locked(&mut engine, b"k5", 43, 43); + + must_pessimistic_rollback(&mut engine, b"k1", 41, 41); + must_rollback(&mut engine, b"k2", 30, false); + must_pessimistic_rollback(&mut engine, b"k3", 43, 43); + must_rollback(&mut engine, b"k2", 40, false); + must_pessimistic_rollback(&mut engine, b"k5", 45, 45); + must_rollback(&mut engine, b"k2", 40, false); + } +} diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index b935d991eea..1df5c5b2cf8 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -58,7 +58,7 @@ impl WriteCommand for RawAtomicStore { to_be_write, rows, pr: ProcessResult::Res, - lock_info: None, + lock_info: vec![], released_locks: ReleasedLocks::new(), lock_guards: raw_ext.into_iter().map(|r| r.key_guard).collect(), response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index bd494e91edc..71adda7a274 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -158,7 +158,7 @@ impl WriteCommand for CheckSecondaryLocks { to_be_write: write_data, rows, pr, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index b74e7d5cb7c..a118769a5db 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -132,7 +132,7 @@ impl WriteCommand for CheckTxnStatus { to_be_write: write_data, rows: 1, pr, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index 0b82432e3cd..a6c529420d3 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -74,7 +74,7 @@ impl WriteCommand for Cleanup { to_be_write: write_data, rows: 1, pr: ProcessResult::Res, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index 86e1f541306..910b7832ed1 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -74,7 +74,7 @@ impl WriteCommand for Commit { to_be_write: write_data, rows, pr, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index 2fff0620b27..943fc6f69d1 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -112,7 +112,7 @@ impl WriteCommand for RawCompareAndSwap { to_be_write, rows, pr, - lock_info: None, + lock_info: vec![], released_locks: ReleasedLocks::new(), lock_guards, response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index f20fd957ed7..dabb6acfcc5 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -124,7 +124,7 @@ impl WriteCommand for FlashbackToVersion { cmd: Command::FlashbackToVersionReadPhase(next_cmd), } })(), - lock_info: None, + lock_info: vec![], released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index fc044a9fa78..2de3687d18d 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -5,6 +5,7 @@ #[macro_use] mod macros; pub(crate) mod acquire_pessimistic_lock; +pub(crate) mod acquire_pessimistic_lock_resumed; pub(crate) mod atomic_store; pub(crate) mod check_secondary_locks; pub(crate) mod check_txn_status; @@ -32,6 +33,7 @@ use std::{ }; pub use acquire_pessimistic_lock::AcquirePessimisticLock; +pub use acquire_pessimistic_lock_resumed::AcquirePessimisticLockResumed; pub use atomic_store::RawAtomicStore; pub use check_secondary_locks::CheckSecondaryLocks; pub use check_txn_status::CheckTxnStatus; @@ -56,11 +58,11 @@ pub use rollback::Rollback; use tikv_util::deadline::Deadline; use tracker::RequestType; pub use txn_heart_beat::TxnHeartBeat; -use txn_types::{Key, OldValues, TimeStamp, Value, Write}; +use txn_types::{Key, TimeStamp, Value, Write}; use crate::storage::{ kv::WriteData, - lock_manager::{self, LockManager, WaitTimeout}, + lock_manager::{self, LockManager, LockWaitToken, WaitTimeout}, metrics, mvcc::{Lock as MvccLock, MvccReader, ReleasedLock, SnapshotReader}, txn::{latch, ProcessResult, Result}, @@ -83,6 +85,7 @@ pub enum Command { Prewrite(Prewrite), PrewritePessimistic(PrewritePessimistic), AcquirePessimisticLock(AcquirePessimisticLock), + AcquirePessimisticLockResumed(AcquirePessimisticLockResumed), Commit(Commit), Cleanup(Cleanup), Rollback(Rollback), @@ -219,9 +222,9 @@ impl From for TypedCommand, + pub lock_info: Vec, pub released_locks: ReleasedLocks, pub lock_guards: Vec, pub response_policy: ResponsePolicy, @@ -399,22 +402,36 @@ pub struct WriteResult { pub struct WriteResultLockInfo { pub lock_digest: lock_manager::LockDigest, pub key: Key, + pub should_not_exist: bool, pub lock_info_pb: LockInfo, pub parameters: PessimisticLockParameters, + pub hash_for_latch: u64, + /// If a request is woken up after waiting for some lock, and it encounters + /// another lock again after resuming, this field will carry the token + /// that was already allocated before. + pub lock_wait_token: LockWaitToken, } impl WriteResultLockInfo { - pub fn new(lock_info_pb: LockInfo, parameters: PessimisticLockParameters) -> Self { + pub fn new( + lock_info_pb: LockInfo, + parameters: PessimisticLockParameters, + key: Key, + should_not_exist: bool, + ) -> Self { let lock = lock_manager::LockDigest { ts: lock_info_pb.get_lock_version().into(), - hash: Key::from_raw(lock_info_pb.get_key()).gen_hash(), + hash: key.gen_hash(), }; - let key = Key::from_raw(lock_info_pb.get_key()); + let hash_for_latch = latch::Lock::hash(&key); Self { lock_digest: lock, key, + should_not_exist, lock_info_pb, parameters, + hash_for_latch, + lock_wait_token: LockWaitToken(None), } } } @@ -568,6 +585,7 @@ impl Command { Command::Prewrite(t) => t, Command::PrewritePessimistic(t) => t, Command::AcquirePessimisticLock(t) => t, + Command::AcquirePessimisticLockResumed(t) => t, Command::Commit(t) => t, Command::Cleanup(t) => t, Command::Rollback(t) => t, @@ -593,6 +611,7 @@ impl Command { Command::Prewrite(t) => t, Command::PrewritePessimistic(t) => t, Command::AcquirePessimisticLock(t) => t, + Command::AcquirePessimisticLockResumed(t) => t, Command::Commit(t) => t, Command::Cleanup(t) => t, Command::Rollback(t) => t, @@ -636,6 +655,7 @@ impl Command { Command::Prewrite(t) => t.process_write(snapshot, context), Command::PrewritePessimistic(t) => t.process_write(snapshot, context), Command::AcquirePessimisticLock(t) => t.process_write(snapshot, context), + Command::AcquirePessimisticLockResumed(t) => t.process_write(snapshot, context), Command::Commit(t) => t.process_write(snapshot, context), Command::Cleanup(t) => t.process_write(snapshot, context), Command::Rollback(t) => t.process_write(snapshot, context), diff --git a/src/storage/txn/commands/pause.rs b/src/storage/txn/commands/pause.rs index 05bbb508bdc..3dc7d06d5ef 100644 --- a/src/storage/txn/commands/pause.rs +++ b/src/storage/txn/commands/pause.rs @@ -48,7 +48,7 @@ impl WriteCommand for Pause { to_be_write: WriteData::default(), rows: 0, pr: ProcessResult::Res, - lock_info: None, + lock_info: vec![], released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index b575787208a..c35c362f19e 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -90,7 +90,7 @@ impl WriteCommand for PessimisticRollback { to_be_write: write_data, rows, pr: ProcessResult::MultiRes { results: vec![] }, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 542c60819b5..2cd908412c3 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -14,7 +14,10 @@ use kvproto::kvrpcpb::{ PrewriteRequestPessimisticAction::{self, *}, }; use tikv_kv::SnapshotExt; -use txn_types::{Key, Mutation, OldValue, OldValues, TimeStamp, TxnExtra, Write, WriteType}; +use txn_types::{ + insert_old_value_if_resolved, Key, Mutation, OldValue, OldValues, TimeStamp, TxnExtra, Write, + WriteType, +}; use super::ReaderWithStats; use crate::storage::{ @@ -569,11 +572,13 @@ impl Prewriter { if need_min_commit_ts && final_min_commit_ts < ts { final_min_commit_ts = ts; } - if old_value.resolved() { - let key = key.append_ts(txn.start_ts); - self.old_values - .insert(key, (old_value, Some(mutation_type))); - } + insert_old_value_if_resolved( + &mut self.old_values, + key, + txn.start_ts, + old_value, + Some(mutation_type), + ); } Ok((..)) => { // If it needs min_commit_ts but min_commit_ts is zero, the lock @@ -681,7 +686,7 @@ impl Prewriter { to_be_write, rows, pr, - lock_info: None, + lock_info: vec![], released_locks, lock_guards, response_policy: ResponsePolicy::OnApplied, @@ -700,7 +705,7 @@ impl Prewriter { to_be_write: WriteData::default(), rows, pr, - lock_info: None, + lock_info: vec![], released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index b89e91593f9..463275b2e1f 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -145,7 +145,7 @@ impl WriteCommand for ResolveLock { to_be_write: write_data, rows, pr, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index a31211c564e..d336d88a9ca 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -70,7 +70,7 @@ impl WriteCommand for ResolveLockLite { to_be_write: write_data, rows, pr: ProcessResult::Res, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index 479f29cb276..52c05ae34c7 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -65,7 +65,7 @@ impl WriteCommand for Rollback { to_be_write: write_data, rows, pr: ProcessResult::Res, - lock_info: None, + lock_info: vec![], released_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index 9bfbda5c748..f965b863494 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -90,7 +90,7 @@ impl WriteCommand for TxnHeartBeat { to_be_write: write_data, rows: 1, pr, - lock_info: None, + lock_info: vec![], released_locks: ReleasedLocks::new(), lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index 86d16858bd3..12cc51207bb 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -116,14 +116,7 @@ impl Lock { I: IntoIterator, { // prevent from deadlock, so we sort and deduplicate the index - let mut required_hashes: Vec = keys - .into_iter() - .map(|key| { - let mut s = DefaultHasher::new(); - key.hash(&mut s); - s.finish() - }) - .collect(); + let mut required_hashes: Vec = keys.into_iter().map(|key| Self::hash(key)).collect(); required_hashes.sort_unstable(); required_hashes.dedup(); Lock { @@ -132,6 +125,12 @@ impl Lock { } } + pub fn hash(key: &K) -> u64 { + let mut s = DefaultHasher::new(); + key.hash(&mut s); + s.finish() + } + /// Returns true if all the required latches have be acquired, false /// otherwise. pub fn acquired(&self) -> bool { diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 615ab98cb8c..86ceda2bdf1 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -45,6 +45,7 @@ use crate::storage::{ }; /// Process result of a command. +#[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum ProcessResult { Res, diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 3f5e48e8017..24ef7466e63 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1075,8 +1075,10 @@ impl Scheduler { let mut pr = Some(pr); - // TODO: Lock wait handling here. - if let Some(lock_info) = lock_info { + if !lock_info.is_empty() { + assert_eq!(lock_info.len(), 1); + let lock_info = lock_info.into_iter().next().unwrap(); + // Only handle lock waiting if `wait_timeout` is set. Otherwise it indicates // that it's a lock-no-wait request and we need to report error // immediately. @@ -1427,6 +1429,7 @@ impl Scheduler { key: lock_info.key, lock_hash: lock_info.lock_digest.hash, parameters: lock_info.parameters, + should_not_exist: lock_info.should_not_exist, lock_wait_token, legacy_wake_up_index: None, key_cb: Some(ctx.get_callback_for_blocked_key().into()), @@ -1492,7 +1495,7 @@ mod tests { use kvproto::kvrpcpb::{BatchRollbackRequest, CheckTxnStatusRequest, Context}; use raftstore::store::{ReadStats, WriteStats}; use tikv_util::{config::ReadableSize, future::paired_future_callback}; - use txn_types::{Key, OldValues, TimeStamp}; + use txn_types::{Key, TimeStamp}; use super::*; use crate::storage::{ @@ -1575,7 +1578,7 @@ mod tests { Some(WaitTimeout::Default), false, TimeStamp::default(), - OldValues::default(), + false, false, false, Context::default(), diff --git a/src/storage/types.rs b/src/storage/types.rs index 6ad4c8e26ef..63bab09eb5c 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -131,6 +131,7 @@ pub struct PrewriteResult { pub one_pc_commit_ts: TimeStamp, } +#[derive(Clone, Debug, PartialEq)] #[cfg_attr(test, derive(Default))] pub struct PessimisticLockParameters { pub pb_ctx: kvrpcpb::Context, @@ -143,6 +144,7 @@ pub struct PessimisticLockParameters { pub min_commit_ts: TimeStamp, pub check_existence: bool, pub is_first_lock: bool, + pub lock_only_if_exists: bool, /// Whether it's allowed for an pessimistic lock request to acquire the lock /// even there is write conflict (i.e. the latest version's `commit_ts` is diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 43f1b504f25..dd8f49bbde3 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -42,7 +42,7 @@ use tikv::{ }, }; use tikv_util::{future::paired_future_callback, worker::dummy_scheduler, HandyRwLock}; -use txn_types::{Key, Mutation, OldValues, TimeStamp}; +use txn_types::{Key, Mutation, TimeStamp}; #[test] fn test_scheduler_leader_change_twice() { @@ -679,7 +679,7 @@ fn test_async_apply_prewrite_impl( None, false, 0.into(), - OldValues::default(), + false, false, false, ctx.clone(), @@ -1018,7 +1018,7 @@ fn test_async_apply_prewrite_1pc_impl( None, false, 0.into(), - OldValues::default(), + false, false, false, ctx.clone(), From 65ad2a52e6d64c5aef324877668cb554629b25b4 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 17 Nov 2022 12:07:55 +0800 Subject: [PATCH 0339/1149] read: fix panic on witness check (#13767) close tikv/tikv#13764 After the remove peer conf-change is applied and before the peer is destroyed. There is a chance that local reader may not find the peer from region info. So this PR considers this case and fixes panic on witness check. Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/peer.rs | 5 ++ components/raftstore/src/store/worker/read.rs | 15 ++-- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_witness.rs | 71 +++++++++++++++++++ tests/integrations/raftstore/test_witness.rs | 2 +- 5 files changed, 84 insertions(+), 10 deletions(-) create mode 100644 tests/failpoints/cases/test_witness.rs diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 63bb878838c..b4c7d1fb097 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3703,6 +3703,11 @@ where self.update_region(cp.region); fail_point!("change_peer_after_update_region"); + fail_point!( + "change_peer_after_update_region_store_3", + self.store_id() == 3, + |_| panic!("should not use return") + ); let now = Instant::now(); let (mut remove_self, mut need_ping) = (false, false); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 0766a52a387..08e56aa7481 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -785,15 +785,6 @@ where return Err(e); } - // Check witness - if find_peer_by_id(&delegate.region, delegate.peer_id) - .unwrap() - .is_witness - { - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); - return Err(Error::RecoveryInProgress(region_id)); - } - // Check term. if let Err(e) = util::check_term(req, delegate.term) { debug!( @@ -813,6 +804,12 @@ where return Ok(None); } + // Check witness + if find_peer_by_id(&delegate.region, delegate.peer_id).map_or(true, |p| p.is_witness) { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); + return Err(Error::RecoveryInProgress(region_id)); + } + // Check whether the region is in the flashback state and the local read could // be performed. let is_in_flashback = delegate.region.is_in_flashback; diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index b291e86b88c..24a05f2ab9f 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -38,3 +38,4 @@ mod test_transaction; mod test_transfer_leader; mod test_ttl; mod test_unsafe_recovery; +mod test_witness; diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs new file mode 100644 index 00000000000..cee75ff44b9 --- /dev/null +++ b/tests/failpoints/cases/test_witness.rs @@ -0,0 +1,71 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{iter::FromIterator, sync::Arc, time::Duration}; + +use futures::executor::block_on; +use kvproto::metapb; +use pd_client::PdClient; +use test_raftstore::*; +use tikv_util::store::find_peer; + +fn become_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { + peer.set_role(metapb::PeerRole::Learner); + cluster.pd_client.must_add_peer(region_id, peer.clone()); + cluster.pd_client.must_remove_peer(region_id, peer.clone()); + peer.set_is_witness(true); + peer.set_id(peer.get_id() + 10); + cluster.pd_client.must_add_peer(region_id, peer.clone()); + peer.set_role(metapb::PeerRole::Voter); + cluster.pd_client.must_add_peer(region_id, peer.clone()); +} + +// Test the case local reader works well with witness peer. +#[test] +fn test_witness_update_region_in_local_reader() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); + + cluster.must_put(b"k0", b"v0"); + + // update region but the peer is not destroyed yet + fail::cfg("change_peer_after_update_region_store_3", "pause").unwrap(); + + cluster + .pd_client + .must_remove_peer(region.get_id(), peer_on_store3.clone()); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(b"k0")], + false, + ); + request.mut_header().set_peer(peer_on_store3); + request.mut_header().set_replica_read(true); + + let resp = cluster + .read(None, request.clone(), Duration::from_millis(100)) + .unwrap(); + assert_eq!( + resp.get_header().get_error().get_recovery_in_progress(), + &kvproto::errorpb::RecoveryInProgress { + region_id: region.get_id(), + ..Default::default() + } + ); + + fail::remove("change_peer_after_update_region_store_3"); +} diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index 8e36510753e..a2518cc64ae 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -448,7 +448,7 @@ fn test_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .call_command_on_node(nodes[2], request, Duration::from_millis(100)) + .read(None, request, Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_recovery_in_progress(), From 7dfb42ec36b8b7022125c6ded68183ae3dc64063 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Thu, 17 Nov 2022 16:15:56 +0800 Subject: [PATCH 0340/1149] *: update crossbeam-channel to avoid spin at sending side (#13807) close tikv/tikv#13815 According to https://github.com/crossbeam-rs/crossbeam/pull/835, spinning at the sending side is probably a bad idea because of large critical section and it's fixed in the recent version. This commit updates crossbeam-channel. It will reduce CPU usage a bit and improve performance. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1722d0385e8..abb420d2264 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1139,9 +1139,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.1" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if 1.0.0", "crossbeam-utils 0.8.8", From cc7345a3b82a42c9fee1f917afd8a8b729032717 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 17 Nov 2022 16:59:56 +0800 Subject: [PATCH 0341/1149] tests: refine the raftstore flashback tests (#13808) ref tikv/tikv#13303 Refine the raftstore flashback tests. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- .../integrations/raftstore/test_flashback.rs | 258 ++++++++---------- 1 file changed, 107 insertions(+), 151 deletions(-) diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 7fff4dad606..89a61223fa2 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -9,17 +9,19 @@ use futures::{channel::oneshot, executor::block_on}; use kvproto::{ errorpb::FlashbackInProgress, metapb, - raft_cmdpb::{AdminCmdType, CmdType, Request}, + raft_cmdpb::{AdminCmdType, RaftCmdResponse, Request}, }; use raftstore::store::Callback; use test_raftstore::*; use txn_types::WriteBatchFlags; +const TEST_KEY: &[u8] = b"k1"; +const TEST_VALUE: &[u8] = b"v1"; + #[test] fn test_prepare_flashback_after_split() { let mut cluster = new_node_cluster(0, 3); cluster.run(); - cluster.must_transfer_leader(1, new_peer(1, 1)); let old_region = cluster.get_region(b"a"); @@ -126,56 +128,42 @@ fn test_prepare_flashback_after_conf_change() { fn test_flashback_unprepared() { let mut cluster = new_node_cluster(0, 3); cluster.run(); - - cluster.must_transfer_leader(1, new_peer(2, 2)); cluster.must_transfer_leader(1, new_peer(1, 1)); - let mut region = cluster.get_region(b"k1"); - let mut cmd = Request::default(); - cmd.set_cmd_type(CmdType::Put); - let mut req = new_request( - region.get_id(), - region.take_region_epoch(), - vec![cmd], - false, + let mut region = cluster.get_region(TEST_KEY); + must_get_flashback_not_prepared_error( + &mut cluster, + &mut region, + new_put_cmd(TEST_KEY, TEST_VALUE), ); - let new_leader = cluster.query_leader(1, region.get_id(), Duration::from_secs(1)); - req.mut_header().set_peer(new_leader.unwrap()); - req.mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster.call_command(req, Duration::from_secs(3)).unwrap(); - assert!(resp.get_header().get_error().has_flashback_not_prepared()); } #[test] fn test_flashback_for_schedule() { let mut cluster = new_node_cluster(0, 3); cluster.run(); - cluster.must_transfer_leader(1, new_peer(2, 2)); cluster.must_transfer_leader(1, new_peer(1, 1)); - // Prepare for flashback - let region = cluster.get_region(b"k1"); + // Prepare flashback. + let region = cluster.get_region(TEST_KEY); cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); - - // Verify the schedule is disabled. - let mut region = cluster.get_region(b"k3"); + // Make sure the schedule is disabled. + let mut region = cluster.get_region(TEST_KEY); let admin_req = new_transfer_leader_cmd(new_peer(2, 2)); let transfer_leader = new_admin_request(region.get_id(), ®ion.take_region_epoch(), admin_req); let resp = cluster .call_command_on_leader(transfer_leader, Duration::from_secs(3)) .unwrap(); - let e = resp.get_header().get_error(); assert_eq!( - e.get_flashback_in_progress(), + resp.get_header().get_error().get_flashback_in_progress(), &FlashbackInProgress { region_id: region.get_id(), ..Default::default() } ); - + // Finish flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); // Transfer leader to (2, 2) should succeed. cluster.must_transfer_leader(1, new_peer(2, 2)); @@ -187,27 +175,33 @@ fn test_flashback_for_write() { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); - // Write for cluster - let value = vec![1_u8; 8096]; - multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); - - // Prepare for flashback - let region = cluster.get_region(b"k1"); + // Write without flashback flag. + let mut region = cluster.get_region(TEST_KEY); + must_request_without_flashback_flag( + &mut cluster, + &mut region.clone(), + new_put_cmd(TEST_KEY, TEST_VALUE), + ); + // Prepare flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); - // Write will be blocked - let value = vec![1_u8; 8096]; - must_get_error_flashback_in_progress(&mut cluster, ®ion, new_put_cmd(b"k1", &value)); - // Write with flashback flag will succeed - must_do_cmd_with_flashback_flag( + must_get_flashback_in_progress_error( &mut cluster, &mut region.clone(), - new_put_cmd(b"k1", &value), + new_put_cmd(TEST_KEY, TEST_VALUE), + ); + // Write with flashback flag will succeed. + must_request_with_flashback_flag( + &mut cluster, + &mut region.clone(), + new_put_cmd(TEST_KEY, TEST_VALUE), ); - cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); - - multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); + must_request_without_flashback_flag( + &mut cluster, + &mut region, + new_put_cmd(TEST_KEY, TEST_VALUE), + ); } #[test] @@ -216,30 +210,18 @@ fn test_flashback_for_read() { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); - // Write for cluster - let value = vec![1_u8; 8096]; - multi_do_cmd(&mut cluster, new_put_cf_cmd("write", b"k1", &value)); - // read for cluster - multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); - - // Prepare for flashback - let region = cluster.get_region(b"k1"); + // Read without flashback flag. + let mut region = cluster.get_region(TEST_KEY); + must_request_without_flashback_flag(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); + // Prepare flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); - - // read will be blocked - must_get_error_flashback_in_progress(&mut cluster, ®ion, new_get_cf_cmd("write", b"k1")); - - // Verify the read can be executed if add flashback flag in request's - // header. - must_do_cmd_with_flashback_flag( - &mut cluster, - &mut region.clone(), - new_get_cf_cmd("write", b"k1"), - ); - + // Read will be blocked. + must_get_flashback_in_progress_error(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); + // Read with flashback flag will succeed. + must_request_with_flashback_flag(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); + // Finish flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); - - multi_do_cmd(&mut cluster, new_get_cf_cmd("write", b"k1")); + must_request_without_flashback_flag(&mut cluster, &mut region, new_get_cmd(TEST_KEY)); } // LocalReader will attempt to renew the lease. @@ -249,62 +231,44 @@ fn test_flashback_for_read() { fn test_flashback_for_local_read() { let mut cluster = new_node_cluster(0, 3); let election_timeout = configure_for_lease_read(&mut cluster, Some(50), None); - // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; - + cluster.run(); + cluster.must_put(TEST_KEY, TEST_VALUE); + let mut region = cluster.get_region(TEST_KEY); let store_id = 3; let peer = new_peer(store_id, 3); - cluster.run(); - - cluster.must_put(b"k1", b"v1"); - let mut region = cluster.get_region(b"k1"); - cluster.must_transfer_leader(region.get_id(), peer.clone()); + cluster.must_transfer_leader(region.get_id(), peer); // Check local read before prepare flashback let state = cluster.raft_local_state(region.get_id(), store_id); let last_index = state.get_last_index(); // Make sure the leader transfer procedure timeouts. sleep(election_timeout * 2); - must_read_on_peer(&mut cluster, peer.clone(), region.clone(), b"k1", b"v1"); + must_request_without_flashback_flag(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index); - // Prepare for flashback + // Prepare flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); - // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 1); // Wait for apply_res to set leader lease. sleep_ms(500); - - must_error_read_on_peer( - &mut cluster, - peer.clone(), - region.clone(), - b"k1", - Duration::from_secs(1), - ); - + // Read should fail. + must_get_flashback_in_progress_error(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); // Wait for the leader's lease to expire to ensure that a renew lease interval // has elapsed. sleep(election_timeout * 2); - must_error_read_on_peer( - &mut cluster, - peer.clone(), - region.clone(), - b"k1", - Duration::from_secs(1), - ); - + // Read should fail. + must_get_flashback_in_progress_error(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); // Also check read by propose was blocked let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 1); - + // Finish flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); - let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index + 2); @@ -313,25 +277,12 @@ fn test_flashback_for_local_read() { let last_index = state.get_last_index(); // Make sure the leader transfer procedure timeouts. sleep(election_timeout * 2); - must_read_on_peer(&mut cluster, peer, region.clone(), b"k1", b"v1"); - + must_request_without_flashback_flag(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index); - // A local read with flashback flag will also be blocked. - let mut req = new_request( - region.get_id(), - region.take_region_epoch(), - vec![new_get_cmd(b"k1")], - false, - ); - let new_leader = cluster.query_leader(1, region.get_id(), Duration::from_secs(1)); - req.mut_header().set_peer(new_leader.unwrap()); - req.mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster.call_command(req, Duration::from_secs(3)).unwrap(); - assert!(resp.get_header().get_error().has_flashback_not_prepared()); + must_get_flashback_not_prepared_error(&mut cluster, &mut region, new_get_cmd(TEST_KEY)); } #[test] @@ -340,7 +291,7 @@ fn test_flashback_for_status_cmd_as_region_detail() { cluster.run(); let leader = cluster.leader_of_region(1).unwrap(); - let region = cluster.get_region(b"k1"); + let region = cluster.get_region(TEST_KEY); cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); let region_detail = cluster.region_detail(region.get_id(), leader.get_store_id()); @@ -434,58 +385,63 @@ fn must_check_flashback_state( ); } -fn multi_do_cmd(cluster: &mut Cluster, cmd: Request) { - for _ in 0..100 { - let mut reqs = vec![]; - for _ in 0..100 { - reqs.push(cmd.clone()); - } - cluster.batch_put(b"k1", reqs).unwrap(); - } -} - -fn must_do_cmd_with_flashback_flag( +fn request( cluster: &mut Cluster, region: &mut metapb::Region, - cmd: Request, -) { - // Verify the read can be executed if add flashback flag in request's - // header. - let mut req = new_request( + req: Request, + with_flashback_flag: bool, +) -> RaftCmdResponse { + let mut cmd_req = new_request( region.get_id(), region.take_region_epoch(), - vec![cmd], + vec![req], false, ); let new_leader = cluster.query_leader(1, region.get_id(), Duration::from_secs(1)); - req.mut_header().set_peer(new_leader.unwrap()); - req.mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - let resp = cluster.call_command(req, Duration::from_secs(3)).unwrap(); + let header = cmd_req.mut_header(); + header.set_peer(new_leader.unwrap()); + if with_flashback_flag { + header.set_flags(WriteBatchFlags::FLASHBACK.bits()); + } + cluster + .call_command(cmd_req, Duration::from_secs(3)) + .unwrap() +} + +// Make sure the request could be executed with flashback flag. +fn must_request_with_flashback_flag( + cluster: &mut Cluster, + region: &mut metapb::Region, + req: Request, +) { + let resp = request(cluster, region, req, true); assert!(!resp.get_header().has_error()); } -fn must_get_error_flashback_in_progress( +fn must_get_flashback_not_prepared_error( cluster: &mut Cluster, - region: &metapb::Region, - cmd: Request, + region: &mut metapb::Region, + req: Request, ) { - for _ in 0..100 { - let mut reqs = vec![]; - for _ in 0..100 { - reqs.push(cmd.clone()); - } - match cluster.batch_put(b"k1", reqs) { - Ok(_) => {} - Err(e) => { - assert_eq!( - e.get_flashback_in_progress(), - &FlashbackInProgress { - region_id: region.get_id(), - ..Default::default() - } - ); - } - } - } + let resp = request(cluster, region, req, true); + assert!(resp.get_header().get_error().has_flashback_not_prepared()); +} + +// Make sure the request could be executed without flashback flag. +fn must_request_without_flashback_flag( + cluster: &mut Cluster, + region: &mut metapb::Region, + req: Request, +) { + let resp = request(cluster, region, req, false); + assert!(!resp.get_header().has_error()); +} + +fn must_get_flashback_in_progress_error( + cluster: &mut Cluster, + region: &mut metapb::Region, + req: Request, +) { + let resp = request(cluster, region, req, false); + assert!(resp.get_header().get_error().has_flashback_in_progress()); } From ec42962f9f26261580e5b7bd8ed62347921dba73 Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 17 Nov 2022 17:57:56 +0800 Subject: [PATCH 0342/1149] cmd: support tikv-ctl to get the regions info within a given key range (#13768) ref tikv/tikv#13760 Provide a way to get the regions info within a given key range just like which in [pd-ctl](https://docs.pingcap.com/zh/tidb/dev/pd-control#region-key---formatrawencodehex-key) Also support limit for tikv-ctl raft region Signed-off-by: husharp Co-authored-by: Xinye Tao --- cmd/tikv-ctl/src/cmd.rs | 15 ++++++++-- cmd/tikv-ctl/src/executor.rs | 27 +++++++++++++++-- cmd/tikv-ctl/src/main.rs | 13 +++++++- cmd/tikv-ctl/src/util.rs | 58 ++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 6 deletions(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index eed2d7e8283..657d296109c 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -592,7 +592,6 @@ pub enum RaftCmd { #[structopt( short = "r", aliases = &["region"], - required_unless = "all-regions", conflicts_with = "all-regions", use_delimiter = true, require_delimiter = true, @@ -604,10 +603,22 @@ pub enum RaftCmd { // `regions` must be None when `all_regions` is present, // so we left `all_regions` unused. #[allow(dead_code)] - #[structopt(long, required_unless = "regions", conflicts_with = "regions")] + #[structopt(long, conflicts_with = "regions")] /// Print info for all regions all_regions: bool, + #[structopt(long, default_value = "")] + /// hex start key + start: String, + + #[structopt(long, default_value = "")] + /// hex end key + end: String, + + #[structopt(long, default_value = "16")] + /// Limit the number of keys to scan + limit: usize, + #[structopt(long)] /// Skip tombstone regions skip_tombstone: bool, diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 80915dbc564..b2d25a32d5b 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -23,7 +23,7 @@ use pd_client::{Config as PdConfig, PdClient, RpcClient}; use protobuf::Message; use raft::eraftpb::{ConfChange, ConfChangeV2, Entry, EntryType}; use raft_log_engine::RaftLogEngine; -use raftstore::store::INIT_EPOCH_CONF_VER; +use raftstore::store::{util::build_key_range, INIT_EPOCH_CONF_VER}; use security::SecurityManager; use serde_json::json; use tikv::{ @@ -151,17 +151,38 @@ pub trait DebugExecutor { println!("total region size: {}", convert_gbmb(total_size as u64)); } - fn dump_region_info(&self, region_ids: Option>, skip_tombstone: bool) { + fn dump_region_info( + &self, + region_ids: Option>, + start_key: &[u8], + end_key: &[u8], + limit: usize, + skip_tombstone: bool, + ) { let region_ids = region_ids.unwrap_or_else(|| self.get_all_regions_in_store()); let mut region_objects = serde_json::map::Map::new(); for region_id in region_ids { + if limit > 0 && region_objects.len() >= limit { + break; + } let r = self.get_region_info(region_id); if skip_tombstone { let region_state = r.region_local_state.as_ref(); if region_state.map_or(false, |s| s.get_state() == PeerState::Tombstone) { - return; + continue; } } + let region = r + .region_local_state + .as_ref() + .map(|s| s.get_region().clone()) + .unwrap(); + if !check_intersect_of_range( + &build_key_range(region.get_start_key(), region.get_end_key(), false), + &build_key_range(start_key, end_key, false), + ) { + continue; + } let region_object = json!({ "region_id": region_id, "region_local_state": r.region_local_state.map(|s| { diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index be5069397e4..72078d07f62 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -272,9 +272,20 @@ fn main() { RaftCmd::Region { regions, skip_tombstone, + start, + end, + limit, .. } => { - debug_executor.dump_region_info(regions, skip_tombstone); + let start_key = from_hex(&start).unwrap(); + let end_key = from_hex(&end).unwrap(); + debug_executor.dump_region_info( + regions, + &start_key, + &end_key, + limit, + skip_tombstone, + ); } }, Cmd::Size { region, cf } => { diff --git a/cmd/tikv-ctl/src/util.rs b/cmd/tikv-ctl/src/util.rs index d7e83511d3e..0e67c905e8d 100644 --- a/cmd/tikv-ctl/src/util.rs +++ b/cmd/tikv-ctl/src/util.rs @@ -2,6 +2,7 @@ use std::{borrow::ToOwned, error::Error, str, str::FromStr, u64}; +use kvproto::kvrpcpb::KeyRange; use server::setup::initial_logger; use tikv::config::TikvConfig; @@ -62,8 +63,27 @@ pub fn perror_and_exit(prefix: &str, e: E) -> ! { tikv_util::logger::exit_process_gracefully(-1); } +// Check if region's `key_range` intersects with `key_range_limit`. +pub fn check_intersect_of_range(key_range: &KeyRange, key_range_limit: &KeyRange) -> bool { + if !key_range.get_end_key().is_empty() + && !key_range_limit.get_start_key().is_empty() + && key_range.get_end_key() <= key_range_limit.get_start_key() + { + return false; + } + if !key_range_limit.get_end_key().is_empty() + && !key_range.get_start_key().is_empty() + && key_range_limit.get_end_key() < key_range.get_start_key() + { + return false; + } + true +} + #[cfg(test)] mod tests { + use raftstore::store::util::build_key_range; + use super::*; #[test] @@ -73,4 +93,42 @@ mod tests { assert_eq!(from_hex("0x74").unwrap(), result); assert_eq!(from_hex("0X74").unwrap(), result); } + + #[test] + fn test_included_region_in_range() { + // To avoid unfolding the code when `make format` is called + fn range(start: &[u8], end: &[u8]) -> KeyRange { + build_key_range(start, end, false) + } + let mut region = range(&[0x02], &[0x05]); + // region absolutely in range + assert!(check_intersect_of_range(®ion, &range(&[0x02], &[0x05]))); + assert!(check_intersect_of_range(®ion, &range(&[0x01], &[]))); + assert!(check_intersect_of_range(®ion, &range(&[0x02], &[]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[]))); + assert!(check_intersect_of_range(®ion, &range(&[0x02], &[0x06]))); + assert!(check_intersect_of_range(®ion, &range(&[0x01], &[0x05]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[0x05]))); + // region intersects with range + assert!(check_intersect_of_range(®ion, &range(&[0x04], &[0x05]))); + assert!(check_intersect_of_range(®ion, &range(&[0x04], &[]))); + assert!(check_intersect_of_range(®ion, &range(&[0x01], &[0x03]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[0x03]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[0x02]))); // region is left-closed and right-open interval + // range absolutely in region also need to return true + assert!(check_intersect_of_range(®ion, &range(&[0x03], &[0x04]))); + // region not intersects with range + assert!(!check_intersect_of_range(®ion, &range(&[0x05], &[]))); // region is left-closed and right-open interval + assert!(!check_intersect_of_range(®ion, &range(&[0x06], &[]))); + assert!(!check_intersect_of_range(®ion, &range(&[], &[0x01]))); + // check last region + region = range(&[0x02], &[]); + assert!(check_intersect_of_range(®ion, &range(&[0x02], &[0x05]))); + assert!(check_intersect_of_range(®ion, &range(&[0x02], &[]))); + assert!(check_intersect_of_range(®ion, &range(&[0x01], &[0x05]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[0x05]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[0x02]))); + assert!(check_intersect_of_range(®ion, &range(&[], &[]))); + assert!(!check_intersect_of_range(®ion, &range(&[], &[0x01]))); + } } From 3179b12df572e17f87e0d3c8689ddbfffe468018 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 18 Nov 2022 14:01:56 +0800 Subject: [PATCH 0343/1149] mvcc: skip accumulated locks by a second get (#13784) ref tikv/tikv#13694 Prewrite and acquire_pessimistic_lock use get_write to get the latest PUT or DELETE record. Point get is implemented by PointGetter. In these cases, with last_change_ts and versions_to_last_change, now we use an additional get operation to find the record directly if versions_to_last_change reaches SEEK_BOUND. I think no additional metrics are needed because the count of get operation of the write CF represents represents it. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/mvcc/reader/point_getter.rs | 35 ++++++++++-- src/storage/mvcc/reader/reader.rs | 74 ++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 012189201c5..651762aa88e 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -5,6 +5,7 @@ use std::borrow::Cow; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::kvrpcpb::{IsolationLevel, WriteConflictReason}; +use tikv_kv::SEEK_BOUND; use txn_types::{Key, Lock, LockType, TimeStamp, TsSet, Value, WriteRef, WriteType}; use crate::storage::{ @@ -281,10 +282,9 @@ impl PointGetter { return Ok(None); } + let mut write = WriteRef::parse(self.write_cursor.value(&mut self.statistics.write))?; + let mut owned_value: Vec; // To work around lifetime problem loop { - // No need to compare user key because it uses prefix seek. - let write = WriteRef::parse(self.write_cursor.value(&mut self.statistics.write))?; - if !write.check_gc_fence_as_latest_version(self.ts) { return Ok(None); } @@ -315,13 +315,35 @@ impl PointGetter { return Ok(None); } WriteType::Lock | WriteType::Rollback => { - // Continue iterate next `write`. + if write.versions_to_last_change < SEEK_BOUND || write.last_change_ts.is_zero() + { + // Continue iterate next `write`. + } else { + let commit_ts = write.last_change_ts; + let key_with_ts = user_key.clone().append_ts(commit_ts); + match self.snapshot.get_cf(CF_WRITE, &key_with_ts)? { + Some(v) => owned_value = v, + None => return Ok(None), + } + self.statistics.write.get += 1; + write = WriteRef::parse(&owned_value)?; + assert!( + write.write_type == WriteType::Put + || write.write_type == WriteType::Delete, + "Write record pointed by last_change_ts {} should be Put or Delete, but got {:?}", + commit_ts, + write.write_type, + ); + continue; + } } } if !self.write_cursor.next(&mut self.statistics.write) { return Ok(None); } + // No need to compare user key because it uses prefix seek. + write = WriteRef::parse(self.write_cursor.value(&mut self.statistics.write))?; } } @@ -611,7 +633,7 @@ mod tests { must_get_value(&mut getter, b"foo2", b"foo2v"); let s = getter.take_statistics(); // We have to check every version - assert_seek_next_prev(&s.write, 1, 40, 0); + assert_seek_next_prev(&s.write, 1, 0, 0); assert_eq!( s.processed_size, Key::from_raw(b"foo2").len() @@ -621,7 +643,8 @@ mod tests { // Get again must_get_value(&mut getter, b"foo2", b"foo2v"); let s = getter.take_statistics(); - assert_seek_next_prev(&s.write, 1, 40, 0); + assert_seek_next_prev(&s.write, 1, 0, 0); + assert_eq!(s.write.get, 1); assert_eq!( s.processed_size, Key::from_raw(b"foo2").len() diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 61975aa666c..c8ca1a5f671 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -6,7 +6,7 @@ use kvproto::{ errorpb::{self, EpochNotMatch, StaleCommand}, kvrpcpb::Context, }; -use tikv_kv::SnapshotExt; +use tikv_kv::{SnapshotExt, SEEK_BOUND}; use txn_types::{Key, Lock, OldValue, TimeStamp, Value, Write, WriteRef, WriteType}; use crate::storage::{ @@ -382,7 +382,31 @@ impl MvccReader { WriteType::Delete => { return Ok(None); } - WriteType::Lock | WriteType::Rollback => ts = commit_ts.prev(), + WriteType::Lock | WriteType::Rollback => { + if write.versions_to_last_change < SEEK_BOUND + || write.last_change_ts.is_zero() + { + ts = commit_ts.prev(); + } else { + let commit_ts = write.last_change_ts; + let key_with_ts = key.clone().append_ts(commit_ts); + let Some(value) = self + .snapshot + .get_cf(CF_WRITE, &key_with_ts)? else { + return Ok(None); + }; + self.statistics.write.get += 1; + let write = WriteRef::parse(&value)?.to_owned(); + assert!( + write.write_type == WriteType::Put + || write.write_type == WriteType::Delete, + "Write record pointed by last_change_ts {} should be Put or Delete, but got {:?}", + commit_ts, + write.write_type, + ); + return Ok(Some((write, commit_ts))); + } + } } } None => return Ok(None), @@ -2499,4 +2523,50 @@ pub mod tests { assert_eq!(reader.statistics.write.seek_tombstone, *tombstones); } } + + #[test] + fn test_get_write_second_get() { + let path = tempfile::Builder::new() + .prefix("_test_storage_mvcc_reader_get_write_second_get") + .tempdir() + .unwrap(); + let path = path.path().to_str().unwrap(); + let region = make_region(1, vec![], vec![]); + let db = open_db(path, true); + let mut engine = RegionEngine::new(&db, ®ion); + + let (k, v) = (b"k", b"v"); + let m = Mutation::make_put(Key::from_raw(k), v.to_vec()); + engine.prewrite(m, k, 1); + engine.commit(k, 1, 2); + + // Write enough ROLLBACK/LOCK recrods + engine.rollback(k, 5); + for start_ts in (6..30).into_iter().step_by(2) { + engine.lock(k, start_ts, start_ts + 1); + } + + let snap = RegionSnapshot::::from_raw(db, region); + let mut reader = MvccReader::new(snap, None, false); + + let key = Key::from_raw(k); + // Get write record whose commit_ts = 2 + let w2 = reader + .get_write(&key, TimeStamp::new(2), None) + .unwrap() + .unwrap(); + + // Clear statistics first + reader.statistics = Statistics::default(); + let (write, commit_ts) = reader + .get_write_with_commit_ts(&key, 40.into(), None) + .unwrap() + .unwrap(); + assert_eq!(commit_ts, 2.into()); + assert_eq!(write, w2); + // versions_to_last_change should be large enough to trigger a second get + // instead of calling a series of next, so the count of next should be 0 instead + assert_eq!(reader.statistics.write.next, 0); + assert_eq!(reader.statistics.write.get, 1); + } } From be2aec1ee49b6b68a074a036db059eb0084080c8 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 18 Nov 2022 14:41:55 +0800 Subject: [PATCH 0344/1149] test: fix flaky witness test (#13822) close tikv/tikv#13816 fix flaky witness test Signed-off-by: Connor1996 Co-authored-by: Xinye Tao --- components/test_raftstore/src/cluster.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index c4ac98180a6..f9088ff4e3b 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1202,7 +1202,7 @@ impl Cluster { self.get_engine(store_id) .get_msg_cf::(engine_traits::CF_RAFT, &key) .unwrap() - .unwrap() + .unwrap_or_default() } pub fn get_raft_local_state(&self, region_id: u64, store_id: u64) -> Option { From 5a1f11048714054232fd12bde01f11a1729e498d Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Fri, 18 Nov 2022 17:39:56 +0800 Subject: [PATCH 0345/1149] txn: make `txn_source` be u64 type (#13817) ref tikv/tikv#13779 Signed-off-by: xiongjiwei Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/txn_types/src/lock.rs | 14 +++++++++----- components/txn_types/src/write.rs | 14 ++++++-------- src/storage/mvcc/mod.rs | 2 +- src/storage/txn/actions/commit.rs | 22 ++++++++++++---------- src/storage/txn/actions/prewrite.rs | 2 +- src/storage/txn/actions/tests.rs | 6 +++--- src/storage/txn/commands/prewrite.rs | 2 +- 8 files changed, 34 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abb420d2264..7425528342d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2694,7 +2694,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#29a30c4ef9c52aafb1b1da73dd9df60857068114" +source = "git+https://github.com/pingcap/kvproto.git#51120697d051df163ec8aa313ee1916a68b07984" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 28df70677a5..040487388f9 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -96,7 +96,11 @@ pub struct Lock { /// The source of this txn. It is used by ticdc, if the value is 0 ticdc /// will sync the kv change event to downstream, if it is not 0, ticdc /// may ignore this change event. - pub txn_source: u8, + /// + /// We use `u64` to reserve more space for future use. For now, the upper + /// application is limited to setting this value under `0x80`, + /// so there will no more cost to change it to `u64`. + pub txn_source: u64, } impl std::fmt::Debug for Lock { @@ -182,7 +186,7 @@ impl Lock { #[inline] #[must_use] - pub fn set_txn_source(mut self, source: u8) -> Self { + pub fn set_txn_source(mut self, source: u64) -> Self { self.txn_source = source; self } @@ -231,7 +235,7 @@ impl Lock { } if self.txn_source != 0 { b.push(TXN_SOURCE_PREFIX); - b.push(self.txn_source); + b.encode_var_u64(self.txn_source).unwrap(); } b } @@ -266,7 +270,7 @@ impl Lock { size += 1 + size_of::() + MAX_VAR_U64_LEN; } if self.txn_source != 0 { - size += 2; + size += 1 + MAX_VAR_U64_LEN; } size } @@ -345,7 +349,7 @@ impl Lock { versions_to_last_change = number::decode_var_u64(&mut b)?; } TXN_SOURCE_PREFIX => { - txn_source = b.read_u8()?; + txn_source = number::decode_var_u64(&mut b)?; } _ => { // To support forward compatibility, all fields should be serialized in order diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 6c46688defa..52777e5e4b2 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -160,7 +160,7 @@ pub struct Write { /// to find the latest PUT/DELETE record pub versions_to_last_change: u64, /// The source of this txn. - pub txn_source: u8, + pub txn_source: u64, } impl std::fmt::Debug for Write { @@ -248,7 +248,7 @@ impl Write { #[inline] #[must_use] - pub fn set_txn_source(mut self, source: u8) -> Self { + pub fn set_txn_source(mut self, source: u64) -> Self { self.txn_source = source; self } @@ -323,7 +323,7 @@ pub struct WriteRef<'a> { /// to find the latest PUT/DELETE record pub versions_to_last_change: u64, /// The source of this txn. - pub txn_source: u8, + pub txn_source: u64, } impl WriteRef<'_> { @@ -373,9 +373,7 @@ impl WriteRef<'_> { versions_to_last_change = number::decode_var_u64(&mut b)?; } TXN_SOURCE_PREFIX => { - txn_source = b - .read_u8() - .map_err(|_| Error::from(ErrorInner::BadFormatWrite))? + txn_source = number::decode_var_u64(&mut b)?; } _ => { // To support forward compatibility, all fields should be serialized in order @@ -420,7 +418,7 @@ impl WriteRef<'_> { } if self.txn_source != 0 { b.push(TXN_SOURCE_PREFIX); - b.push(self.txn_source); + b.encode_var_u64(self.txn_source).unwrap(); } b } @@ -438,7 +436,7 @@ impl WriteRef<'_> { size += 1 + size_of::() + MAX_VAR_U64_LEN; } if self.txn_source != 0 { - size += 2; + size += 1 + MAX_VAR_U64_LEN; } size } diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 997cde71020..3dca7a219f9 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -706,7 +706,7 @@ pub mod tests { assert_eq!(ts, commit_ts.into()); } - pub fn must_get_txn_source(engine: &mut E, key: &[u8], ts: u64, txn_source: u8) { + pub fn must_get_txn_source(engine: &mut E, key: &[u8], ts: u64, txn_source: u64) { let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = SnapshotReader::new(TimeStamp::from(ts), snapshot, true); let write = reader diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 1b8018e2aad..bfb1d39f768 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -355,15 +355,17 @@ pub mod tests { #[test] fn test_2pc_with_txn_source() { - let mut engine = TestEngineBuilder::new().build().unwrap(); - - let k = b"k"; - // WriteType is Put - must_prewrite_put_with_txn_soucre(&mut engine, k, b"v2", k, 25, 1); - let lock = must_locked(&mut engine, k, 25); - assert_eq!(lock.txn_source, 1); - must_succeed(&mut engine, k, 25, 30); - let write = must_written(&mut engine, k, 25, 30, WriteType::Put); - assert_eq!(write.txn_source, 1); + for source in [0x1, 0x85] { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + let k = b"k"; + // WriteType is Put + must_prewrite_put_with_txn_soucre(&mut engine, k, b"v2", k, 25, source); + let lock = must_locked(&mut engine, k, 25); + assert_eq!(lock.txn_source, source); + must_succeed(&mut engine, k, 25, 30); + let write = must_written(&mut engine, k, 25, 30, WriteType::Put); + assert_eq!(write.txn_source, source); + } } } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 48caa3795af..46c9774dd52 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -172,7 +172,7 @@ pub struct TransactionProperties<'a> { pub need_old_value: bool, pub is_retry_request: bool, pub assertion_level: AssertionLevel, - pub txn_source: u8, + pub txn_source: u64, } impl<'a> TransactionProperties<'a> { diff --git a/src/storage/txn/actions/tests.rs b/src/storage/txn/actions/tests.rs index 79d31a08c9c..e6872ef493f 100644 --- a/src/storage/txn/actions/tests.rs +++ b/src/storage/txn/actions/tests.rs @@ -113,7 +113,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( assertion_level: AssertionLevel, should_not_exist: bool, region_id: Option, - txn_source: u32, + txn_source: u64, ) { let mut ctx = Context::default(); ctx.set_txn_source(txn_source); @@ -158,7 +158,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( need_old_value: false, is_retry_request, assertion_level, - txn_source: txn_source as u8, + txn_source, }, mutation, secondary_keys, @@ -230,7 +230,7 @@ pub fn must_prewrite_put_with_txn_soucre( value: &[u8], pk: &[u8], ts: impl Into, - txn_source: u32, + txn_source: u64, ) { must_prewrite_put_impl_with_should_not_exist( engine, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 2cd908412c3..cd24f54d13b 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -511,7 +511,7 @@ impl Prewriter { need_old_value: extra_op == ExtraOp::ReadOldValue, is_retry_request: self.ctx.is_retry_request, assertion_level: self.assertion_level, - txn_source: self.ctx.get_txn_source() as u8, + txn_source: self.ctx.get_txn_source(), }; let async_commit_pk = self From 07266ff2f99f47ca8be0fab0c6c6e84a29f270ea Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 21 Nov 2022 12:19:57 +0800 Subject: [PATCH 0346/1149] resolved_ts: remove the unused CDC sinker (#13795) close tikv/tikv#13794 Since CDC works independently of the `resolved_ts` component and we don't have any plan to integrate them, so the sinker code in the `resolved_ts` component could be removed to simplify the code. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/resolved_ts/src/advance.rs | 10 +-- components/resolved_ts/src/endpoint.rs | 112 +++++++++--------------- components/resolved_ts/src/lib.rs | 2 - components/resolved_ts/src/observer.rs | 24 +++-- components/resolved_ts/src/sinker.rs | 45 ---------- components/resolved_ts/tests/mod.rs | 6 +- components/server/src/server.rs | 2 - components/test_raftstore/src/server.rs | 3 +- 8 files changed, 58 insertions(+), 146 deletions(-) delete mode 100644 components/resolved_ts/src/sinker.rs diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 496c5c8fab8..a78e903bc72 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -47,20 +47,20 @@ use crate::{endpoint::Task, metrics::*}; const DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS: u64 = 5_000; // 5s -pub struct AdvanceTsWorker { +pub struct AdvanceTsWorker { pd_client: Arc, timer: SteadyTimer, worker: Runtime, - scheduler: Scheduler>, + scheduler: Scheduler, /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, } -impl AdvanceTsWorker { +impl AdvanceTsWorker { pub fn new( pd_client: Arc, - scheduler: Scheduler>, + scheduler: Scheduler, concurrency_manager: ConcurrencyManager, ) -> Self { let worker = Builder::new_multi_thread() @@ -81,7 +81,7 @@ impl AdvanceTsWorker { } } -impl AdvanceTsWorker { +impl AdvanceTsWorker { // Advance ts asynchronously and register RegisterAdvanceEvent when its done. pub fn advance_ts_for_regions( &self, diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 4f957e8266d..def3d512d3a 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -12,7 +12,7 @@ use std::{ }; use concurrency_manager::ConcurrencyManager; -use engine_traits::{KvEngine, Snapshot}; +use engine_traits::KvEngine; use grpcio::Environment; use kvproto::{metapb::Region, raft_cmdpb::AdminCmdType}; use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; @@ -23,7 +23,6 @@ use raftstore::{ store::{ fsm::StoreMeta, util::{self, RegionReadProgress, RegionReadProgressRegistry}, - RegionSnapshot, }, }; use security::SecurityManager; @@ -41,7 +40,6 @@ use crate::{ metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, - sinker::{CmdSinker, SinkCmd}, }; enum ResolverStatus { @@ -264,7 +262,7 @@ impl ObserveRegion { } } -pub struct Endpoint { +pub struct Endpoint { store_id: Option, cfg: ResolvedTsConfig, cfg_update_notify: Arc, @@ -272,28 +270,25 @@ pub struct Endpoint { region_read_progress: RegionReadProgressRegistry, regions: HashMap, scanner_pool: ScannerPool, - scheduler: Scheduler>, - sinker: C, - advance_worker: AdvanceTsWorker, + scheduler: Scheduler, + advance_worker: AdvanceTsWorker, _phantom: PhantomData<(T, E)>, } -impl Endpoint +impl Endpoint where T: 'static + RaftStoreRouter, E: KvEngine, - C: CmdSinker, { pub fn new( cfg: &ResolvedTsConfig, - scheduler: Scheduler>, + scheduler: Scheduler, raft_router: T, store_meta: Arc>, pd_client: Arc, concurrency_manager: ConcurrencyManager, env: Arc, security_mgr: Arc, - sinker: C, ) -> Self { let (region_read_progress, store_id) = { let meta = store_meta.lock().unwrap(); @@ -320,7 +315,6 @@ where region_read_progress, advance_worker, scanner_pool, - sinker, regions: HashMap::default(), _phantom: PhantomData::default(), }; @@ -502,64 +496,42 @@ where if regions.is_empty() { return; } - - let mut min_ts = TimeStamp::max(); for region_id in regions.iter() { if let Some(observe_region) = self.regions.get_mut(region_id) { if let ResolverStatus::Ready = observe_region.resolver_status { - let resolved_ts = observe_region.resolver.resolve(ts); - if resolved_ts < min_ts { - min_ts = resolved_ts; - } + let _ = observe_region.resolver.resolve(ts); } } } - self.sinker.sink_resolved_ts(regions, ts); } // Tracking or untracking locks with incoming commands that corresponding // observe id is valid. #[allow(clippy::drop_ref)] - fn handle_change_log( - &mut self, - cmd_batch: Vec, - snapshot: Option>, - ) { + fn handle_change_log(&mut self, cmd_batch: Vec) { let size = cmd_batch.iter().map(|b| b.size()).sum::(); RTS_CHANNEL_PENDING_CMD_BYTES.sub(size as i64); - let logs = cmd_batch - .into_iter() - .filter_map(|batch| { - if !batch.is_empty() { - if let Some(observe_region) = self.regions.get_mut(&batch.region_id) { - let observe_id = batch.rts_id; - let region_id = observe_region.meta.id; - if observe_region.handle.id == observe_id { - let logs = ChangeLog::encode_change_log(region_id, batch); - if let Err(e) = observe_region.track_change_log(&logs) { - drop(observe_region); - self.re_register_region(region_id, observe_id, e) - } - return Some(SinkCmd { - region_id, - observe_id, - logs, - }); - } else { - debug!("resolved ts CmdBatch discarded"; - "region_id" => batch.region_id, - "observe_id" => ?batch.rts_id, - "current" => ?observe_region.handle.id, - ); - } + for batch in cmd_batch { + if batch.is_empty() { + continue; + } + if let Some(observe_region) = self.regions.get_mut(&batch.region_id) { + let observe_id = batch.rts_id; + let region_id = observe_region.meta.id; + if observe_region.handle.id == observe_id { + let logs = ChangeLog::encode_change_log(region_id, batch); + if let Err(e) = observe_region.track_change_log(&logs) { + drop(observe_region); + self.re_register_region(region_id, observe_id, e); } + } else { + debug!("resolved ts CmdBatch discarded"; + "region_id" => batch.region_id, + "observe_id" => ?batch.rts_id, + "current" => ?observe_region.handle.id, + ); } - None - }) - .collect(); - match snapshot { - Some(snap) => self.sinker.sink_cmd_with_old_value(logs, snap), - None => self.sinker.sink_cmd(logs), + } } } @@ -615,7 +587,7 @@ where } } -pub enum Task { +pub enum Task { RegionUpdated(Region), RegionDestroyed(Region), RegisterRegion { @@ -638,7 +610,6 @@ pub enum Task { }, ChangeLog { cmd_batch: Vec, - snapshot: Option>, }, ScanLocks { region_id: u64, @@ -651,7 +622,7 @@ pub enum Task { }, } -impl fmt::Debug for Task { +impl fmt::Debug for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut de = f.debug_struct("ResolvedTsTask"); match self { @@ -710,21 +681,20 @@ impl fmt::Debug for Task { } } -impl fmt::Display for Task { +impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self) } } -impl Runnable for Endpoint +impl Runnable for Endpoint where T: 'static + RaftStoreRouter, E: KvEngine, - C: CmdSinker, { - type Task = Task; + type Task = Task; - fn run(&mut self, task: Task) { + fn run(&mut self, task: Task) { debug!("run resolved-ts task"; "task" => ?task); match task { Task::RegionDestroyed(region) => self.region_destroyed(region), @@ -742,10 +712,7 @@ where Task::ResolvedTsAdvanced { regions, ts } => { self.handle_resolved_ts_advanced(regions, ts) } - Task::ChangeLog { - cmd_batch, - snapshot, - } => self.handle_change_log(cmd_batch, snapshot), + Task::ChangeLog { cmd_batch } => self.handle_change_log(cmd_batch), Task::ScanLocks { region_id, observe_id, @@ -757,15 +724,15 @@ where } } -pub struct ResolvedTsConfigManager(Scheduler>); +pub struct ResolvedTsConfigManager(Scheduler); -impl ResolvedTsConfigManager { - pub fn new(scheduler: Scheduler>) -> ResolvedTsConfigManager { +impl ResolvedTsConfigManager { + pub fn new(scheduler: Scheduler) -> ResolvedTsConfigManager { ResolvedTsConfigManager(scheduler) } } -impl ConfigManager for ResolvedTsConfigManager { +impl ConfigManager for ResolvedTsConfigManager { fn dispatch(&mut self, change: ConfigChange) -> online_config::Result<()> { if let Err(e) = self.0.schedule(Task::ChangeConfig { change }) { error!("failed to schedule ChangeConfig task"; "err" => ?e); @@ -776,11 +743,10 @@ impl ConfigManager for ResolvedTsConfigManager { const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s -impl RunnableWithTimer for Endpoint +impl RunnableWithTimer for Endpoint where T: 'static + RaftStoreRouter, E: KvEngine, - C: CmdSinker, { fn on_timeout(&mut self) { let store_id = self.get_or_init_store_id(); diff --git a/components/resolved_ts/src/lib.rs b/components/resolved_ts/src/lib.rs index 5d4e233808d..eef1211a580 100644 --- a/components/resolved_ts/src/lib.rs +++ b/components/resolved_ts/src/lib.rs @@ -27,8 +27,6 @@ mod observer; pub use observer::*; mod advance; pub use advance::*; -mod sinker; -pub use sinker::*; mod endpoint; pub use endpoint::*; mod errors; diff --git a/components/resolved_ts/src/observer.rs b/components/resolved_ts/src/observer.rs index 9ff7b976ad4..7421beaad85 100644 --- a/components/resolved_ts/src/observer.rs +++ b/components/resolved_ts/src/observer.rs @@ -8,16 +8,16 @@ use tikv_util::worker::Scheduler; use crate::{cmd::lock_only_filter, endpoint::Task, metrics::RTS_CHANNEL_PENDING_CMD_BYTES}; -pub struct Observer { - scheduler: Scheduler>, +pub struct Observer { + scheduler: Scheduler, } -impl Observer { - pub fn new(scheduler: Scheduler>) -> Self { +impl Observer { + pub fn new(scheduler: Scheduler) -> Self { Observer { scheduler } } - pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { + pub fn register_to(&self, coprocessor_host: &mut CoprocessorHost) { // The `resolved-ts` cmd observer will `mem::take` the `Vec`, use a // low priority to let it be the last observer and avoid affecting other // observers @@ -33,7 +33,7 @@ impl Observer { } } -impl Clone for Observer { +impl Clone for Observer { fn clone(&self) -> Self { Self { scheduler: self.scheduler.clone(), @@ -41,9 +41,9 @@ impl Clone for Observer { } } -impl Coprocessor for Observer {} +impl Coprocessor for Observer {} -impl CmdObserver for Observer { +impl CmdObserver for Observer { fn on_flush_applied_cmd_batch( &self, max_level: ObserveLevel, @@ -64,7 +64,6 @@ impl CmdObserver for Observer { RTS_CHANNEL_PENDING_CMD_BYTES.add(size as i64); if let Err(e) = self.scheduler.schedule(Task::ChangeLog { cmd_batch: cmd_batches, - snapshot: None, }) { info!("failed to schedule change log event"; "err" => ?e); } @@ -82,7 +81,7 @@ impl CmdObserver for Observer { } } -impl RoleObserver for Observer { +impl RoleObserver for Observer { fn on_role_change(&self, ctx: &mut ObserverContext<'_>, role_change: &RoleChange) { // Stop to advance resolved ts after peer steps down to follower or candidate. // Do not need to check observe id because we expect all role change events are @@ -97,7 +96,7 @@ impl RoleObserver for Observer { } } -impl RegionChangeObserver for Observer { +impl RegionChangeObserver for Observer { fn on_region_changed( &self, ctx: &mut ObserverContext<'_>, @@ -139,7 +138,6 @@ impl RegionChangeObserver for Observer { mod test { use std::time::Duration; - use engine_rocks::RocksSnapshot; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::raft_cmdpb::*; use tikv::storage::kv::TestEngineBuilder; @@ -156,7 +154,7 @@ mod test { cmd } - fn expect_recv(rx: &mut ReceiverWrapper>, data: Vec) { + fn expect_recv(rx: &mut ReceiverWrapper, data: Vec) { if data.is_empty() { match rx.recv_timeout(Duration::from_millis(10)) { Err(std::sync::mpsc::RecvTimeoutError::Timeout) => return, diff --git a/components/resolved_ts/src/sinker.rs b/components/resolved_ts/src/sinker.rs deleted file mode 100644 index 383e5f7acc7..00000000000 --- a/components/resolved_ts/src/sinker.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::marker::PhantomData; - -use engine_traits::Snapshot; -use raftstore::{coprocessor::ObserveId, store::RegionSnapshot}; -use txn_types::TimeStamp; - -use crate::cmd::ChangeLog; - -pub struct SinkCmd { - pub region_id: u64, - pub observe_id: ObserveId, - pub logs: Vec, -} - -pub trait CmdSinker: Send { - fn sink_cmd(&mut self, sink_cmd: Vec); - - fn sink_cmd_with_old_value(&mut self, sink_cmd: Vec, snapshot: RegionSnapshot); - - fn sink_resolved_ts(&mut self, regions: Vec, ts: TimeStamp); -} - -pub struct DummySinker(PhantomData); - -impl DummySinker { - pub fn new() -> Self { - Self(PhantomData::default()) - } -} - -impl Default for DummySinker { - fn default() -> Self { - Self::new() - } -} - -impl CmdSinker for DummySinker { - fn sink_cmd(&mut self, _sink_cmd: Vec) {} - - fn sink_cmd_with_old_value(&mut self, _sink_cmd: Vec, _snapshot: RegionSnapshot) {} - - fn sink_resolved_ts(&mut self, _regions: Vec, _ts: TimeStamp) {} -} diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 376aa216224..e8d2a6429ba 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -4,7 +4,6 @@ use std::{sync::*, time::Duration}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; use futures::{executor::block_on, stream, SinkExt}; use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment, Result, WriteFlags}; use kvproto::{ @@ -28,8 +27,8 @@ pub fn init() { pub struct TestSuite { pub cluster: Cluster, - pub endpoints: HashMap>>, - pub obs: HashMap>, + pub endpoints: HashMap>, + pub obs: HashMap, tikv_cli: HashMap, import_cli: HashMap, concurrency_managers: HashMap, @@ -88,7 +87,6 @@ impl TestSuite { cm.clone(), env, sim.security_mgr.clone(), - resolved_ts::DummySinker::new(), ); concurrency_managers.insert(*id, cm); worker.start(rts_endpoint); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a5fb3fefaf9..80d44b114b9 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1158,8 +1158,6 @@ where self.concurrency_manager.clone(), server.env(), self.security_mgr.clone(), - // TODO: replace to the cdc sinker - resolved_ts::DummySinker::new(), ); rts_worker.start_with_timer(rts_endpoint); self.to_stop.push(rts_worker); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 5ae1b1a13a6..42cefe60496 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -131,7 +131,7 @@ struct ServerMeta { raw_router: RaftRouter, raw_apply_router: ApplyRouter, gc_worker: GcWorker, SimulateStoreTransport>, - rts_worker: Option>>, + rts_worker: Option>, rsmeter_cleanup: Box, } @@ -362,7 +362,6 @@ impl ServerCluster { concurrency_manager.clone(), self.env.clone(), self.security_mgr.clone(), - resolved_ts::DummySinker::new(), ); // Start the worker rts_worker.start(rts_endpoint); From b0075db291323727a0c643ac75d1a91c1d2a61fe Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 21 Nov 2022 22:35:58 +0800 Subject: [PATCH 0347/1149] raftstore-v2: support send/recv tablet snapshot (#13776) ref tikv/tikv#12842 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Xinye Tao --- components/raftstore-v2/src/raft/storage.rs | 2 +- .../raftstore/src/store/async_io/read.rs | 4 +- components/raftstore/src/store/snap.rs | 28 +- src/server/errors.rs | 5 +- src/server/mod.rs | 1 + src/server/snap.rs | 4 +- src/server/tablet_snap.rs | 557 ++++++++++++++++++ 7 files changed, 594 insertions(+), 7 deletions(-) create mode 100644 src/server/tablet_snap.rs diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 01285cc5a46..d2abb6818d8 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -553,7 +553,7 @@ mod tests { assert_eq!(snap.get_metadata().get_term(), 0); assert_eq!(snap.get_data().is_empty(), false); let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); - let checkpointer_path = mgr.get_tablet_checkpointer_path(&snap_key); + let checkpointer_path = mgr.get_final_path_for_gen(&snap_key); assert!(checkpointer_path.exists()); // Test cancel snapshot diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 2da4869d24b..9e0215ca9c1 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -18,6 +18,7 @@ use raft::{eraftpb::Snapshot, GetEntriesContext}; use tikv_util::{error, info, time::Instant, worker::Runnable}; use crate::store::{ + snap::TABLET_SNAPSHOT_VERSION, util, worker::metrics::{SNAP_COUNTER, SNAP_HISTOGRAM}, RaftlogFetchResult, TabletSnapKey, TabletSnapManager, MAX_INIT_ENTRY_COUNT, @@ -119,7 +120,7 @@ impl ReadRunner { } fn generate_snap(&self, snap_key: &TabletSnapKey, tablet: EK) -> crate::Result<()> { - let checkpointer_path = self.snap_mgr().get_tablet_checkpointer_path(snap_key); + let checkpointer_path = self.snap_mgr().get_final_path_for_gen(snap_key); if checkpointer_path.as_path().exists() { // Remove the old checkpoint directly. std::fs::remove_dir_all(checkpointer_path.as_path())?; @@ -215,6 +216,7 @@ where // Set snapshot data. let mut snap_data = RaftSnapshotData::default(); snap_data.set_region(region_state.get_region().clone()); + snap_data.set_version(TABLET_SNAPSHOT_VERSION); snap_data.mut_meta().set_for_balance(for_balance); snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 8ca5b26d02b..a9f50d61edb 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -4,6 +4,7 @@ use std::{ cmp::{self, Ordering as CmpOrdering, Reverse}, error::Error as StdError, fmt::{self, Display, Formatter}, + fs, io::{self, ErrorKind, Read, Write}, path::{Path, PathBuf}, result, str, @@ -56,6 +57,7 @@ pub const SNAPSHOT_CFS_ENUM_PAIR: &[(CfNames, CfName)] = &[ (CfNames::write, CF_WRITE), ]; pub const SNAPSHOT_VERSION: u64 = 2; +pub const TABLET_SNAPSHOT_VERSION: u64 = 3; pub const IO_LIMITER_CHUNK_SIZE: usize = 4 * 1024; /// Name prefix for the self-generated snapshot file. @@ -1926,7 +1928,6 @@ impl Display for TabletSnapKey { /// It's similar `SnapManager`, but simpler in tablet version. /// /// TODO: -/// - add Limiter to control send/recv speed /// - clean up expired tablet checkpointer #[derive(Clone)] pub struct TabletSnapManager { @@ -1955,10 +1956,33 @@ impl TabletSnapManager { Ok(()) } - pub fn get_tablet_checkpointer_path(&self, key: &TabletSnapKey) -> PathBuf { + pub fn get_final_path_for_gen(&self, key: &TabletSnapKey) -> PathBuf { let prefix = format!("{}_{}", SNAP_GEN_PREFIX, key); PathBuf::from(&self.base).join(prefix) } + + pub fn get_final_path_for_recv(&self, key: &TabletSnapKey) -> PathBuf { + let prefix = format!("{}_{}", SNAP_REV_PREFIX, key); + PathBuf::from(&self.base).join(prefix) + } + pub fn get_tmp_path_for_recv(&self, key: &TabletSnapKey) -> PathBuf { + let prefix = format!("{}_{}{}", SNAP_REV_PREFIX, key, TMP_FILE_SUFFIX); + PathBuf::from(&self.base).join(prefix) + } + + pub fn delete_snapshot(&self, key: &TabletSnapKey) -> bool { + let path = self.get_final_path_for_gen(key); + if path.exists() && let Err(e) = fs::remove_dir_all(path.as_path()) { + error!( + "delete snapshot failed"; + "path" => %path.display(), + "err" => ?e, + ); + false + } else { + true + } + } } #[cfg(test)] diff --git a/src/server/errors.rs b/src/server/errors.rs index c7a41947f79..5936f365120 100644 --- a/src/server/errors.rs +++ b/src/server/errors.rs @@ -3,7 +3,7 @@ use std::{error::Error as StdError, io::Error as IoError, net::AddrParseError, result}; use engine_traits::Error as EngineTraitError; -use futures::channel::oneshot::Canceled; +use futures::channel::{mpsc::SendError, oneshot::Canceled}; use grpcio::Error as GrpcError; use hyper::Error as HttpError; use openssl::error::ErrorStack as OpenSslError; @@ -66,6 +66,9 @@ pub enum Error { #[error("{0:?}")] OpenSsl(#[from] OpenSslError), + + #[error("{0:?}")] + StreamDisconnect(#[from] SendError), } pub type Result = result::Result; diff --git a/src/server/mod.rs b/src/server/mod.rs index af1aa289de7..d926ca40b2a 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -20,6 +20,7 @@ pub mod server; pub mod service; pub mod snap; pub mod status_server; +pub mod tablet_snap; pub mod transport; pub mod ttl; diff --git a/src/server/snap.rs b/src/server/snap.rs index 49c38cb645b..0200c779383 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -47,7 +47,7 @@ use crate::tikv_util::sys::thread::ThreadBuildWrapper; pub type Callback = Box) + Send>; -const DEFAULT_POOL_SIZE: usize = 4; +pub const DEFAULT_POOL_SIZE: usize = 4; /// A task for either receiving Snapshot or sending Snapshot pub enum Task { @@ -83,7 +83,7 @@ struct SnapChunk { remain_bytes: usize, } -const SNAP_CHUNK_LEN: usize = 1024 * 1024; +pub const SNAP_CHUNK_LEN: usize = 1024 * 1024; impl Stream for SnapChunk { type Item = Result<(SnapshotChunk, WriteFlags)>; diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs new file mode 100644 index 00000000000..cbda159a83e --- /dev/null +++ b/src/server/tablet_snap.rs @@ -0,0 +1,557 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + convert::{TryFrom, TryInto}, + fs::{self, File}, + io::{Read, Write}, + marker::PhantomData, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use engine_traits::KvEngine; +use file_system::{IoType, WithIoType}; +use futures::{ + future::{Future, TryFutureExt}, + sink::{Sink, SinkExt}, + stream::{Stream, StreamExt, TryStreamExt}, +}; +use grpcio::{ + self, ChannelBuilder, ClientStreamingSink, Environment, RequestStream, RpcStatus, + RpcStatusCode, WriteFlags, +}; +use kvproto::{ + raft_serverpb::{Done, RaftMessage, RaftSnapshotData, SnapshotChunk}, + tikvpb::TikvClient, +}; +use protobuf::Message; +use raftstore::{ + router::RaftStoreRouter, + store::snap::{TabletSnapKey, TabletSnapManager}, +}; +use security::SecurityManager; +use tikv_util::{ + config::{Tracker, VersionTrack}, + time::Instant, + worker::Runnable, +}; +use tokio::runtime::{Builder as RuntimeBuilder, Runtime}; + +use super::{ + metrics::*, + snap::{Task, DEFAULT_POOL_SIZE, SNAP_CHUNK_LEN}, + Config, Error, Result, +}; +use crate::tikv_util::{sys::thread::ThreadBuildWrapper, time::Limiter}; + +struct RecvTabletSnapContext { + key: TabletSnapKey, + raft_msg: RaftMessage, + io_type: IoType, + start: Instant, + chunk_size: usize, +} + +impl RecvTabletSnapContext { + fn new(mut head: SnapshotChunk) -> Result { + if !head.has_message() { + return Err(box_err!("no raft message in the first chunk")); + } + + let chunk_size = match head.take_data().try_into() { + Ok(buff) => usize::from_ne_bytes(buff), + Err(_) => return Err(box_err!("failed to get chunk size")), + }; + let meta = head.take_message(); + let key = TabletSnapKey::from_region_snap( + meta.get_region_id(), + meta.get_to_peer().get_id(), + meta.get_message().get_snapshot(), + ); + let io_type = io_type_from_raft_message(&meta)?; + + Ok(RecvTabletSnapContext { + key, + raft_msg: meta, + io_type, + start: Instant::now(), + chunk_size, + }) + } + + fn finish>(self, raft_router: R) -> Result<()> { + let key = self.key; + if let Err(e) = raft_router.send_raft_msg(self.raft_msg) { + return Err(box_err!("{} failed to send snapshot to raft: {}", key, e)); + } + info!("saving all snapshot files"; "snap_key" => %key, "takes" => ?self.start.saturating_elapsed()); + Ok(()) + } +} + +fn io_type_from_raft_message(msg: &RaftMessage) -> Result { + let snapshot = msg.get_message().get_snapshot(); + let data = snapshot.get_data(); + let mut snapshot_data = RaftSnapshotData::default(); + snapshot_data.merge_from_bytes(data)?; + let snapshot_meta = snapshot_data.get_meta(); + if snapshot_meta.get_for_balance() { + Ok(IoType::LoadBalance) + } else { + Ok(IoType::Replication) + } +} + +async fn send_snap_files( + mgr: &TabletSnapManager, + mut sender: impl Sink<(SnapshotChunk, WriteFlags), Error = Error> + Unpin, + msg: RaftMessage, + key: TabletSnapKey, + limiter: Limiter, +) -> Result { + let path = mgr.get_final_path_for_gen(&key); + info!("begin to send snapshot file";"snap_key" => %key); + let files = fs::read_dir(&path)? + .map(|f| Ok(f?.path())) + .filter(|f| f.is_ok() && f.as_ref().unwrap().is_file()) + .collect::>>()?; + let io_type = io_type_from_raft_message(&msg)?; + let _with_io_type = WithIoType::new(io_type); + let mut total_sent = msg.compute_size() as u64; + let mut chunk = SnapshotChunk::default(); + chunk.set_message(msg); + chunk.set_data(usize::to_ne_bytes(SNAP_CHUNK_LEN).to_vec()); + sender + .feed((chunk, WriteFlags::default().buffer_hint(true))) + .await?; + for path in files { + let name = path.file_name().unwrap().to_str().unwrap(); + let mut buffer = Vec::with_capacity(SNAP_CHUNK_LEN); + buffer.push(name.len() as u8); + buffer.extend_from_slice(name.as_bytes()); + let mut f = File::open(&path)?; + let mut off = buffer.len(); + loop { + unsafe { + buffer.set_len(SNAP_CHUNK_LEN); + } + // it should break if readed len is zero or the buffer is full. + while off < SNAP_CHUNK_LEN { + let readed = f.read(&mut buffer[off..])?; + if readed == 0 { + unsafe { + buffer.set_len(off); + } + break; + } + off += readed; + } + limiter.consume(off); + total_sent += off as u64; + let mut chunk = SnapshotChunk::default(); + chunk.set_data(buffer); + sender + .feed((chunk, WriteFlags::default().buffer_hint(true))) + .await?; + // It should switch the next file if the read buffer len is less than the + // SNAP_CHUNK_LEN. + if off < SNAP_CHUNK_LEN { + break; + } + buffer = Vec::with_capacity(SNAP_CHUNK_LEN); + off = 0 + } + } + info!("sent all snap file finish"; "snap_key" => %key); + sender.close().await?; + Ok(total_sent) +} + +/// Send the snapshot to specified address. +/// +/// It will first send the normal raft snapshot message and then send the +/// snapshot file. +pub fn send_snap( + env: Arc, + mgr: TabletSnapManager, + security_mgr: Arc, + cfg: &Config, + addr: &str, + msg: RaftMessage, + limiter: Limiter, +) -> Result>> { + assert!(msg.get_message().has_snapshot()); + let timer = Instant::now(); + let send_timer = SEND_SNAP_HISTOGRAM.start_coarse_timer(); + let key = TabletSnapKey::from_region_snap( + msg.get_region_id(), + msg.get_to_peer().get_id(), + msg.get_message().get_snapshot(), + ); + + let cb = ChannelBuilder::new(env) + .stream_initial_window_size(cfg.grpc_stream_initial_window_size.0 as i32) + .keepalive_time(cfg.grpc_keepalive_time.0) + .keepalive_timeout(cfg.grpc_keepalive_timeout.0) + .default_compression_algorithm(cfg.grpc_compression_algorithm()) + .default_gzip_compression_level(cfg.grpc_gzip_compression_level) + .default_grpc_min_message_size_to_compress(cfg.grpc_min_message_size_to_compress); + + let channel = security_mgr.connect(cb, addr); + let client = TikvClient::new(channel); + let (sink, receiver) = client.snapshot()?; + let send_task = async move { + let sink = sink.sink_map_err(Error::from); + let total_size = send_snap_files(&mgr, sink, msg, key.clone(), limiter).await?; + let recv_result = receiver.map_err(Error::from).await; + send_timer.observe_duration(); + drop(client); + match recv_result { + Ok(_) => { + mgr.delete_snapshot(&key); + Ok(SendStat { + key, + total_size, + elapsed: timer.saturating_elapsed(), + }) + } + Err(e) => Err(e), + } + }; + Ok(send_task) +} + +async fn recv_snap_files( + snap_mgr: TabletSnapManager, + mut stream: impl Stream> + Unpin, + limit: Limiter, +) -> Result { + let head = stream + .next() + .await + .transpose()? + .ok_or_else(|| Error::Other("empty gRPC stream".into()))?; + let context = RecvTabletSnapContext::new(head)?; + let chunk_size = context.chunk_size; + let path = snap_mgr.get_tmp_path_for_recv(&context.key); + info!("begin to receive tablet snapshot files"; "file" => %path.display()); + fs::create_dir_all(&path)?; + let _with_io_type = WithIoType::new(context.io_type); + loop { + let mut chunk = match stream.next().await { + Some(Ok(mut c)) if !c.has_message() => c.take_data(), + Some(_) => { + return Err(box_err!("duplicated metadata")); + } + None => break, + }; + // the format of chunk: + // |--name_len--|--name--|--content--| + let len = chunk[0] as usize; + let file_name = box_try!(std::str::from_utf8(&chunk[1..len + 1])); + let p = path.join(file_name); + let mut f = File::create(&p)?; + let mut size = chunk.len() - len - 1; + f.write_all(&chunk[len + 1..])?; + // It should switch next file if the chunk size is less than the SNAP_CHUNK_LEN. + while chunk.len() >= chunk_size { + chunk = match stream.next().await { + Some(Ok(mut c)) if !c.has_message() => c.take_data(), + Some(_) => return Err(box_err!("duplicated metadata")), + None => return Err(box_err!("missing chunk")), + }; + f.write_all(&chunk[..])?; + limit.consume(chunk.len()); + size += chunk.len(); + } + debug!("received snap file"; "file" => %p.display(), "size" => size); + SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC + .recv + .inc_by(size as u64); + f.sync_data()?; + } + info!("received all tablet snapshot file"; "snap_key" => %context.key); + let final_path = snap_mgr.get_final_path_for_recv(&context.key); + fs::rename(&path, final_path)?; + Ok(context) +} + +fn recv_snap + 'static>( + stream: RequestStream, + sink: ClientStreamingSink, + snap_mgr: TabletSnapManager, + raft_router: R, + limit: Limiter, +) -> impl Future> { + let recv_task = async move { + let stream = stream.map_err(Error::from); + let context = recv_snap_files(snap_mgr, stream, limit).await?; + context.finish(raft_router) + }; + async move { + match recv_task.await { + Ok(()) => sink.success(Done::default()).await.map_err(Error::from), + Err(e) => { + let status = RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); + sink.fail(status).await.map_err(Error::from) + } + } + } +} + +pub struct TabletRunner +where + E: KvEngine, + R: RaftStoreRouter + 'static, +{ + env: Arc, + snap_mgr: TabletSnapManager, + security_mgr: Arc, + pool: Runtime, + raft_router: R, + cfg_tracker: Tracker, + cfg: Config, + sending_count: Arc, + recving_count: Arc, + engine: PhantomData, + limiter: Limiter, +} + +impl TabletRunner +where + E: KvEngine, + R: RaftStoreRouter + 'static, +{ + pub fn new( + env: Arc, + snap_mgr: TabletSnapManager, + r: R, + security_mgr: Arc, + cfg: Arc>, + ) -> TabletRunner { + let config = cfg.value().clone(); + let cfg_tracker = cfg.tracker("tablet-sender".to_owned()); + let limit = i64::try_from(config.snap_max_write_bytes_per_sec.0) + .unwrap_or_else(|_| panic!("snap_max_write_bytes_per_sec > i64::max_value")); + let limiter = Limiter::new(if limit > 0 { + limit as f64 + } else { + f64::INFINITY + }); + + let snap_worker = TabletRunner { + env, + snap_mgr, + pool: RuntimeBuilder::new_multi_thread() + .thread_name(thd_name!("tablet-snap-sender")) + .worker_threads(DEFAULT_POOL_SIZE) + .after_start_wrapper(tikv_alloc::add_thread_memory_accessor) + .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) + .build() + .unwrap(), + raft_router: r, + security_mgr, + cfg_tracker, + cfg: config, + sending_count: Arc::new(AtomicUsize::new(0)), + recving_count: Arc::new(AtomicUsize::new(0)), + engine: PhantomData, + limiter, + }; + snap_worker + } + + fn refresh_cfg(&mut self) { + if let Some(incoming) = self.cfg_tracker.any_new() { + let limit = if incoming.snap_max_write_bytes_per_sec.0 > 0 { + incoming.snap_max_write_bytes_per_sec.0 as f64 + } else { + f64::INFINITY + }; + self.limiter.set_speed_limit(limit); + info!("refresh snapshot manager config"; + "speed_limit"=> limit); + self.cfg = incoming.clone(); + } + } +} + +pub struct SendStat { + key: TabletSnapKey, + total_size: u64, + elapsed: Duration, +} + +impl Runnable for TabletRunner +where + E: KvEngine, + R: RaftStoreRouter + 'static, +{ + type Task = Task; + + fn run(&mut self, task: Task) { + match task { + Task::Recv { stream, sink } => { + let task_num = self.recving_count.load(Ordering::SeqCst); + if task_num >= self.cfg.concurrent_recv_snap_limit { + warn!("too many recving snapshot tasks, ignore"); + let status = RpcStatus::with_message( + RpcStatusCode::RESOURCE_EXHAUSTED, + format!( + "the number of received snapshot tasks {} exceeded the limitation {}", + task_num, self.cfg.concurrent_recv_snap_limit + ), + ); + self.pool.spawn(sink.fail(status)); + return; + } + SNAP_TASK_COUNTER_STATIC.recv.inc(); + + let snap_mgr = self.snap_mgr.clone(); + let raft_router = self.raft_router.clone(); + let recving_count = self.recving_count.clone(); + recving_count.fetch_add(1, Ordering::SeqCst); + let limit = self.limiter.clone(); + let task = async move { + let result = recv_snap(stream, sink, snap_mgr, raft_router, limit).await; + recving_count.fetch_sub(1, Ordering::SeqCst); + if let Err(e) = result { + error!("failed to recv snapshot"; "err" => %e); + } + }; + self.pool.spawn(task); + } + Task::Send { addr, msg, cb } => { + let region_id = msg.get_region_id(); + if self.sending_count.load(Ordering::SeqCst) >= self.cfg.concurrent_send_snap_limit + { + warn!( + "too many sending snapshot tasks, drop Send Snap[to: {}, snap: {:?}]", + addr, msg + ); + cb(Err(Error::Other("Too many sending snapshot tasks".into()))); + return; + } + SNAP_TASK_COUNTER_STATIC.send.inc(); + + let env = Arc::clone(&self.env); + let mgr = self.snap_mgr.clone(); + let security_mgr = Arc::clone(&self.security_mgr); + let sending_count = Arc::clone(&self.sending_count); + sending_count.fetch_add(1, Ordering::SeqCst); + let limit = self.limiter.clone(); + let send_task = + send_snap(env, mgr, security_mgr, &self.cfg.clone(), &addr, msg, limit); + let task = async move { + let res = match send_task { + Err(e) => Err(e), + Ok(f) => f.await, + }; + match res { + Ok(stat) => { + info!( + "sent snapshot"; + "region_id" => region_id, + "snap_key" => %stat.key, + "size" => stat.total_size, + "duration" => ?stat.elapsed + ); + cb(Ok(())); + } + Err(e) => { + error!("failed to send snap"; "to_addr" => addr, "region_id" => region_id, "err" => ?e); + cb(Err(e)); + } + }; + sending_count.fetch_sub(1, Ordering::SeqCst); + }; + + self.pool.spawn(task); + } + Task::RefreshConfigEvent => { + self.refresh_cfg(); + } + Task::Validate(f) => { + f(&self.cfg); + } + } + } +} + +#[cfg(test)] +mod tests { + use std::{ + fs::{create_dir_all, File}, + io::Write, + }; + + use futures::{ + channel::mpsc::{self}, + executor::block_on, + sink::SinkExt, + }; + use futures_util::StreamExt; + use grpcio::WriteFlags; + use kvproto::raft_serverpb::{RaftMessage, SnapshotChunk}; + use raftstore::store::snap::{TabletSnapKey, TabletSnapManager}; + use tempfile::TempDir; + use tikv_util::{store::new_peer, time::Limiter}; + + use super::{super::Error, recv_snap_files, send_snap_files, SNAP_CHUNK_LEN}; + + #[test] + fn test_send_tablet() { + let limiter = Limiter::new(f64::INFINITY); + let snap_key = TabletSnapKey::new(1, 1, 1, 1); + let mut msg = RaftMessage::default(); + msg.set_region_id(1); + msg.set_to_peer(new_peer(1, 1)); + msg.mut_message().mut_snapshot().mut_metadata().set_index(1); + msg.mut_message().mut_snapshot().mut_metadata().set_term(1); + let send_path = TempDir::new().unwrap(); + let send_snap_mgr = + TabletSnapManager::new(send_path.path().join("snap_dir").to_str().unwrap()); + let snap_path = send_snap_mgr.get_final_path_for_gen(&snap_key); + create_dir_all(snap_path.as_path()).unwrap(); + // send file should skip directory + create_dir_all(snap_path.join("dir")).unwrap(); + for i in 0..2 { + let mut f = File::create(snap_path.join(i.to_string())).unwrap(); + let count = SNAP_CHUNK_LEN - 2; + let mut data = std::iter::repeat("a".as_bytes()) + .take(count) + .collect::>(); + for buffer in data.iter_mut() { + f.write_all(buffer).unwrap(); + } + f.sync_data().unwrap(); + } + + let recv_path = TempDir::new().unwrap(); + let recv_snap_manager = + TabletSnapManager::new(recv_path.path().join("snap_dir").to_str().unwrap()); + let (tx, rx) = mpsc::unbounded(); + let sink = tx.sink_map_err(Error::from); + block_on(send_snap_files( + &send_snap_mgr, + sink, + msg, + snap_key.clone(), + limiter.clone(), + )) + .unwrap(); + + let stream = rx.map(|x: (SnapshotChunk, WriteFlags)| Ok(x.0)); + let final_path = recv_snap_manager.get_final_path_for_recv(&snap_key); + let r = block_on(recv_snap_files(recv_snap_manager, stream, limiter)).unwrap(); + assert_eq!(r.key, snap_key); + std::thread::sleep(std::time::Duration::from_secs(1)); + let dir = std::fs::read_dir(final_path).unwrap(); + assert_eq!(2, dir.count()); + send_snap_mgr.delete_snapshot(&snap_key); + assert!(!snap_path.exists()); + } +} From e41dabb7064a3f9ec2b99a59af7a6fa792b325d9 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 22 Nov 2022 11:31:58 +0800 Subject: [PATCH 0348/1149] *: make flashback not aware of raftstore router (#13828) ref tikv/tikv#13827 flashback is a transaction concept, better make it only interact with the storage layer instead of raftstore directly. This PR also converts raftstore errors to region errors for flashback, so it will can make client retry more reliable. Signed-off-by: Jay Lee --- components/tikv_kv/src/lib.rs | 14 ++- components/tikv_util/src/sys/mod.rs | 5 +- src/server/raftkv.rs | 86 ++++++++++++--- src/server/service/kv.rs | 162 ++++++---------------------- 4 files changed, 118 insertions(+), 149 deletions(-) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 32f15786f79..3e15b399796 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -35,7 +35,7 @@ use engine_traits::{ CF_DEFAULT, CF_LOCK, }; use error_code::{self, ErrorCode, ErrorCodeExt}; -use futures::prelude::*; +use futures::{future::BoxFuture, prelude::*}; use into_other::IntoOther; use kvproto::{ errorpb::Error as ErrorHeader, @@ -347,6 +347,18 @@ pub trait Engine: Send + Clone + 'static { // Some engines have a `TxnExtraScheduler`. This method is to send the extra // to the scheduler. fn schedule_txn_extra(&self, _txn_extra: TxnExtra) {} + + /// Mark the start of flashback. + // It's an infrequent API, use trait object for simplicity. + fn start_flashback(&self, _ctx: &Context) -> BoxFuture<'static, Result<()>> { + Box::pin(futures::future::ready(Ok(()))) + } + + /// Mark the end of flashback. + // It's an infrequent API, use trait object for simplicity. + fn end_flashback(&self, _ctx: &Context) -> BoxFuture<'static, Result<()>> { + Box::pin(futures::future::ready(Ok(()))) + } } /// A Snapshot is a consistent view of the underlying engine at a given point in diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index dcc137f095c..8b5e846592f 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -191,14 +191,13 @@ pub fn path_in_diff_mount_point(path1: &str, path2: &str) -> bool { #[cfg(not(target_os = "linux"))] pub fn path_in_diff_mount_point(_path1: &str, _path2: &str) -> bool { - return false; + false } -#[cfg(test)] +#[cfg(all(test, target_os = "linux"))] mod tests { use super::*; - #[cfg(target_os = "linux")] #[test] fn test_path_in_diff_mount_point() { let (empty_path1, path2) = ("", "/"); diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index eaa13995650..8bef31eaebd 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -15,10 +15,14 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; +use futures::future::BoxFuture; use kvproto::{ errorpb, kvrpcpb::{Context, IsolationLevel}, - raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, Request, Response}, + raft_cmdpb::{ + AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, Request, + Response, + }, }; use raft::{ eraftpb::{self, MessageType}, @@ -37,7 +41,7 @@ use raftstore::{ }; use thiserror::Error; use tikv_kv::write_modifies; -use tikv_util::{codec::number::NumberEncoder, time::Instant}; +use tikv_util::{codec::number::NumberEncoder, future::paired_future_callback, time::Instant}; use txn_types::{Key, TimeStamp, TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::metrics::*; @@ -149,6 +153,49 @@ where } } +#[inline] +pub fn new_request_header(ctx: &Context) -> RaftRequestHeader { + let mut header = RaftRequestHeader::default(); + header.set_region_id(ctx.get_region_id()); + header.set_peer(ctx.get_peer().clone()); + header.set_region_epoch(ctx.get_region_epoch().clone()); + if ctx.get_term() != 0 { + header.set_term(ctx.get_term()); + } + header.set_sync_log(ctx.get_sync_log()); + header.set_replica_read(ctx.get_replica_read()); + header +} + +#[inline] +pub fn new_flashback_req(ctx: &Context, ty: AdminCmdType) -> RaftCmdRequest { + let header = new_request_header(ctx); + let mut req = RaftCmdRequest::default(); + req.set_header(header); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + req.mut_admin_request().set_cmd_type(ty); + req +} + +fn exec_admin>( + router: &S, + req: RaftCmdRequest, +) -> BoxFuture<'static, kv::Result<()>> { + let (cb, f) = paired_future_callback(); + let res = router.send_command( + req, + raftstore::store::Callback::write(cb), + RaftCmdExtraOpts::default(), + ); + Box::pin(async move { + res?; + let mut resp = box_try!(f.await); + check_raft_cmd_response(&mut resp.response)?; + Ok(()) + }) +} + /// `RaftKv` is a storage engine base on `RaftStore`. #[derive(Clone)] pub struct RaftKv @@ -181,26 +228,13 @@ where self.txn_extra_scheduler = Some(txn_extra_scheduler); } - fn new_request_header(&self, ctx: &Context) -> RaftRequestHeader { - let mut header = RaftRequestHeader::default(); - header.set_region_id(ctx.get_region_id()); - header.set_peer(ctx.get_peer().clone()); - header.set_region_epoch(ctx.get_region_epoch().clone()); - if ctx.get_term() != 0 { - header.set_term(ctx.get_term()); - } - header.set_sync_log(ctx.get_sync_log()); - header.set_replica_read(ctx.get_replica_read()); - header - } - fn exec_snapshot( &mut self, ctx: SnapContext<'_>, req: Request, cb: Callback>, ) -> Result<()> { - let mut header = self.new_request_header(ctx.pb_ctx); + let mut header = new_request_header(ctx.pb_ctx); let mut flags = 0; if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { let mut data = [0u8; 8]; @@ -257,7 +291,7 @@ where let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); let txn_extra = batch.extra; - let mut header = self.new_request_header(ctx); + let mut header = new_request_header(ctx); let mut flags = 0; if txn_extra.one_pc { flags |= WriteBatchFlags::ONE_PC.bits(); @@ -509,6 +543,24 @@ where } } } + + fn start_flashback(&self, ctx: &Context) -> BoxFuture<'static, kv::Result<()>> { + // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the + // later flashback. Once invoked, we will update the persistent region meta and + // the memory state of the flashback in Peer FSM to reject all read, write + // and scheduling operations for this region when propose/apply before we + // start the actual data flashback transaction command in the next phase. + let req = new_flashback_req(ctx, AdminCmdType::PrepareFlashback); + exec_admin(&self.router, req) + } + + fn end_flashback(&self, ctx: &Context) -> BoxFuture<'static, kv::Result<()>> { + // Send an `AdminCmdType::FinishFlashback` to unset the persistence state + // in `RegionLocalState` and region's meta, and when that admin cmd is applied, + // will update the memory state of the flashback + let req = new_flashback_req(ctx, AdminCmdType::FinishFlashback); + exec_admin(&self.router, req) + } } #[derive(Clone)] diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 7fc5bb77f31..54b0dc6782b 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -6,7 +6,6 @@ use std::{mem, sync::Arc}; use api_version::KvFormat; use fail::fail_point; use futures::{ - channel::oneshot, compat::Future01CompatExt, future::{self, Future, FutureExt, TryFutureExt}, sink::SinkExt, @@ -21,10 +20,7 @@ use kvproto::{ errorpb::{Error as RegionError, *}, kvrpcpb::*, mpp::*, - raft_cmdpb::{ - AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftRequestHeader, - Request as RaftRequest, - }, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request as RaftRequest}, raft_serverpb::*, tikvpb::*, }; @@ -47,9 +43,8 @@ use tikv_util::{ time::{duration_to_ms, duration_to_sec, Instant}, worker::Scheduler, }; -use tokio::sync::Mutex; use tracker::{set_tls_tracker_token, RequestInfo, RequestType, Tracker, GLOBAL_TRACKERS}; -use txn_types::{self, Key, WriteBatchFlags}; +use txn_types::{self, Key}; use super::batch::{BatcherBuilder, ReqBatcher}; use crate::{ @@ -60,6 +55,7 @@ use crate::{ Error, Proxy, Result as ServerResult, }, storage::{ + self, errors::{ extract_committed, extract_key_error, extract_key_errors, extract_kv_pairs, extract_region_error, map_kv_pairs, @@ -414,7 +410,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); - let resp = future_prepare_flashback_to_version(&self.storage, &self.ch, req); + let resp = future_prepare_flashback_to_version(&self.storage, req); let task = async move { let resp = resp.await?; let elapsed = begin_instant.saturating_elapsed(); @@ -445,7 +441,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); - let resp = future_flashback_to_version(&self.storage, &self.ch, req); + let resp = future_flashback_to_version(&self.storage, req); let task = async move { let resp = resp.await?; let elapsed = begin_instant.saturating_elapsed(); @@ -1093,7 +1089,6 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let copr_v2 = self.copr_v2.clone(); let pool_size = storage.get_normal_pool_size(); let batch_builder = BatcherBuilder::new(self.enable_req_batch, pool_size); - let ch = self.ch.clone(); let request_handler = stream.try_for_each(move |mut req| { let request_ids = req.take_request_ids(); let requests: Vec<_> = req.take_requests().into(); @@ -1110,7 +1105,6 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor id, req, &tx, - &ch, ); if let Some(batch) = batcher.as_mut() { batch.maybe_commit(&storage, &tx); @@ -1311,12 +1305,7 @@ fn response_batch_commands_request( poll_future_notify(task); } -fn handle_batch_commands_request< - T: RaftStoreRouter + 'static, - E: Engine, - L: LockManager, - F: KvFormat, ->( +fn handle_batch_commands_request( batcher: &mut Option, storage: &Storage, copr: &Endpoint, @@ -1325,7 +1314,6 @@ fn handle_batch_commands_request< id: u64, req: batch_commands_request::Request, tx: &Sender, - ch: &T, ) { // To simplify code and make the logic more clear. macro_rules! oneof { @@ -1428,8 +1416,8 @@ fn handle_batch_commands_request< ResolveLock, future_resolve_lock(storage), kv_resolve_lock; Gc, future_gc(), kv_gc; DeleteRange, future_delete_range(storage), kv_delete_range; - PrepareFlashbackToVersion, future_prepare_flashback_to_version(storage, ch), kv_prepare_flashback_to_version; - FlashbackToVersion, future_flashback_to_version(storage, ch), kv_flashback_to_version; + PrepareFlashbackToVersion, future_prepare_flashback_to_version(storage), kv_prepare_flashback_to_version; + FlashbackToVersion, future_flashback_to_version(storage), kv_flashback_to_version; RawBatchGet, future_raw_batch_get(storage), raw_batch_get; RawPut, future_raw_put(storage), raw_put; RawBatchPut, future_raw_batch_put(storage), raw_batch_put; @@ -1725,80 +1713,53 @@ fn future_delete_range( // Preparing the flashback for a region will "lock" the region so that // there is no any read, write or scheduling operation could be proposed before // the actual flashback operation. -fn future_prepare_flashback_to_version< - E: Engine, - L: LockManager, - F: KvFormat, - T: RaftStoreRouter + 'static, ->( +fn future_prepare_flashback_to_version( // Keep this param to hint the type of E for the compiler. - _storage: &Storage, - raft_router: &T, + storage: &Storage, req: PrepareFlashbackToVersionRequest, ) -> impl Future> { - let raft_router = Mutex::new(raft_router.clone()); + let f = storage.get_engine().start_flashback(req.get_context()); async move { - // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the - // later flashback. Once invoked, we will update the persistent region meta and - // the memory state of the flashback in Peer FSM to reject all read, write - // and scheduling operations for this region when propose/apply before we - // start the actual data flashback transaction command in the next phase. - send_flashback_msg::( - &raft_router, - req.get_context(), - AdminCmdType::PrepareFlashback, - ) - .await?; - Ok(PrepareFlashbackToVersionResponse::default()) + let res = f.await.map_err(storage::Error::from); + let mut resp = PrepareFlashbackToVersionResponse::default(); + if let Some(e) = extract_region_error(&res) { + resp.set_region_error(e); + } else if let Err(e) = res { + resp.set_error(format!("{}", e)); + } + Ok(resp) } } // Flashback the region to a specific point with the given `version`, please // make sure the region is "locked" by `PrepareFlashbackToVersion` first, // otherwise this request will fail. -fn future_flashback_to_version< - T: RaftStoreRouter + 'static, - E: Engine, - L: LockManager, - F: KvFormat, ->( +fn future_flashback_to_version( storage: &Storage, - raft_router: &T, req: FlashbackToVersionRequest, ) -> impl Future> { - let storage_clone = storage.clone(); - let raft_router = Mutex::new(raft_router.clone()); + let storage = storage.clone(); async move { // Perform the data flashback transaction command. We will check if the region // is in the flashback state when proposing the flashback modification. let (cb, f) = paired_future_callback(); - let res = storage_clone.sched_txn_command(req.clone().into(), cb); - // Avoid crossing `.await` to bypass the `Send` constraint. - drop(storage_clone); - let v = match res { - Err(e) => Err(e), - Ok(_) => f.await?, - }; - let mut resp = FlashbackToVersionResponse::default(); - if let Some(err) = extract_region_error(&v) { - resp.set_region_error(err); - } else if let Err(e) = v { - resp.set_error(format!("{}", e)); - } else { + let mut res = storage.sched_txn_command(req.clone().into(), cb); + if matches!(res, Ok(())) { + res = f.await.unwrap_or_else(|e| Err(box_err!(e))); + } + if matches!(res, Ok(())) { // Only finish flashback when Flashback executed successfully. fail_point!("skip_finish_flashback_to_version", |_| { Ok(FlashbackToVersionResponse::default()) }); - // Send an `AdminCmdType::FinishFlashback` to unset the persistence state - // in `RegionLocalState` and region's meta, and when that - // admin cmd is applied, will update the memory - // state of the flashback - send_flashback_msg::( - &raft_router, - req.get_context(), - AdminCmdType::FinishFlashback, - ) - .await?; + let f = storage.get_engine().end_flashback(req.get_context()); + res = f.await.map_err(storage::Error::from); + } + let mut resp = FlashbackToVersionResponse::default(); + if let Some(err) = extract_region_error(&res) { + resp.set_region_error(err); + } else if let Err(e) = res { + resp.set_error(format!("{}", e)); } Ok(resp) } @@ -2467,61 +2428,6 @@ fn needs_reject_raft_append(reject_messages_on_memory_ratio: f64) -> bool { false } -async fn send_flashback_msg + 'static, E: Engine>( - raft_router: &Mutex, - ctx: &Context, - cmd_type: AdminCmdType, -) -> ServerResult<()> { - let region_id = ctx.get_region_id(); - let (result_tx, result_rx) = oneshot::channel(); - let cb = Callback::write(Box::new(move |resp| { - if resp.response.get_header().has_error() { - result_tx.send(false).unwrap(); - error!("exec flashback msg failed"; - "region_id" => region_id, - "type" => ?cmd_type, - "error" => ?resp.response.get_header().get_error()); - return; - } - result_tx.send(true).unwrap(); - })); - let mut admin = AdminRequest::default(); - admin.set_cmd_type(cmd_type); - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(ctx.get_region_epoch().clone()); - req.mut_header().set_peer(ctx.get_peer().clone()); - req.set_admin_request(admin); - req.mut_header() - .set_flags(WriteBatchFlags::FLASHBACK.bits()); - // call admin request directly - let raft_router = raft_router.lock().await; - if let Err(e) = raft_router.send_command( - req, - cb, - RaftCmdExtraOpts { - deadline: None, - disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, - }, - ) { - return Err(Error::Other(box_err!( - "send flashback msg {:?} failed for region {}, error {:?}", - cmd_type, - region_id, - e - ))); - } - if !result_rx.await? { - return Err(Error::Other(box_err!( - "wait flashback msg {:?} result failed for region {} failed", - cmd_type, - region_id - ))); - } - Ok(()) -} - #[cfg(test)] mod tests { use std::thread; From 8465f44a55b457edb583e1141e0a84b91d1dea17 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Tue, 22 Nov 2022 17:01:57 +0800 Subject: [PATCH 0349/1149] mvcc: skip accumulated locks using seek in forward scanner (#13819) ref tikv/tikv#13694 This commit adds support of skipping versions with a seek to the LatestKvPolicy of the forward scanner. Now we don't add this to other policies because they are usually used for large range of scanning. So, the influence of accumulated locks of certain keys should be amortized. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/mvcc/reader/scanner/forward.rs | 86 +++++++++++++++++++++- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 32898f1bfe7..12300187739 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -472,12 +472,19 @@ impl ScanPolicy for LatestKvPolicy { } WriteType::Delete => break None, WriteType::Lock | WriteType::Rollback => { - // Continue iterate next `write`. + if write.versions_to_last_change < SEEK_BOUND || write.last_change_ts.is_zero() + { + // Continue iterate next `write`. + cursors.write.next(&mut statistics.write); + } else { + // Seek to the expected version directly. + let commit_ts = write.last_change_ts; + let key_with_ts = current_user_key.clone().append_ts(commit_ts); + cursors.write.seek(&key_with_ts, &mut statistics.write)?; + } } } - cursors.write.next(&mut statistics.write); - if !cursors.write.valid()? { // Key space ended. Needn't move write cursor to next key. return Ok(HandleRes::Skip(current_user_key)); @@ -1601,6 +1608,79 @@ mod latest_kv_tests { ); scanner.next().unwrap_err(); } + + #[test] + fn test_skip_versions_by_seek() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, b"k1", b"v11", b"k1", 1); + must_commit(&mut engine, b"k1", 1, 5); + must_prewrite_put(&mut engine, b"k1", b"v12", b"k1", 6); + must_commit(&mut engine, b"k1", 6, 8); + must_prewrite_put(&mut engine, b"k2", b"v21", b"k2", 2); + must_commit(&mut engine, b"k2", 2, 6); + must_prewrite_put(&mut engine, b"k3", b"v31", b"k3", 3); + must_commit(&mut engine, b"k3", 3, 7); + + for start_ts in (10..30).into_iter().step_by(2) { + must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); + must_commit(&mut engine, b"k1", start_ts, start_ts + 1); + must_rollback(&mut engine, b"k3", start_ts + 1, true); + } + + must_prewrite_put(&mut engine, b"k1", b"v13", b"k1", 40); + must_commit(&mut engine, b"k1", 40, 45); + must_prewrite_put(&mut engine, b"k2", b"v22", b"k2", 41); + must_commit(&mut engine, b"k2", 41, 46); + must_prewrite_put(&mut engine, b"k3", b"v32", b"k3", 42); + must_commit(&mut engine, b"k3", 42, 47); + + // KEY | COMMIT_TS | TYPE | VALUE + // ----|-----------|----------|------- + // k1 | 45 | PUT | v13 + // k1 | 29 | LOCK | + // k1 | 27 | LOCK | + // k1 | ... | LOCK | + // k1 | 11 | LOCK | + // k1 | 8 | PUT | v12 + // k1 | 5 | PUT | v1 + // k2 | 46 | PUT | v22 + // k2 | 6 | PUT | v21 + // k3 | 47 | PUT | v32 + // k3 | 29 | ROLLBACK | + // k3 | 27 | ROLLBACK | + // k3 | ... | ROLLBACK | + // k3 | 11 | ROLLBACK | + // k3 | 7 | PUT | v31 + + let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut scanner = ScannerBuilder::new(snapshot, 35.into()) + .range(None, None) + .build() + .unwrap(); + + assert_eq!( + scanner.next().unwrap(), + Some((Key::from_raw(b"k1"), b"v12".to_vec())) + ); + let stats = scanner.take_statistics(); + assert_eq!(stats.write.next, 3); // skip k1@45, k1@8, k1@5 + assert_eq!(stats.write.seek, 2); // seek beginning and k1@8 + + assert_eq!( + scanner.next().unwrap(), + Some((Key::from_raw(b"k2"), b"v21".to_vec())) + ); + scanner.take_statistics(); + + assert_eq!( + scanner.next().unwrap(), + Some((Key::from_raw(b"k3"), b"v31".to_vec())) + ); + let stats = scanner.take_statistics(); + assert_le!(stats.write.next, 2); // skip k2@6, k3@47 + assert_eq!(stats.write.seek, 1); // seek k3@7 + } } #[cfg(test)] From d9ce7d7e3cb03f0e0645bbda5291873ce632030f Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Tue, 22 Nov 2022 18:19:58 +0800 Subject: [PATCH 0350/1149] backup: backup sub-ranges in one request (#13702) close tikv/tikv#13701 Signed-off-by: Leavrth Signed-off-by: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Co-authored-by: Neil Shen --- components/backup/src/endpoint.rs | 291 +++++++++++++++++++++++++++--- 1 file changed, 262 insertions(+), 29 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index db6ff331d7f..b880da7a3dc 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -19,7 +19,7 @@ use futures::{channel::mpsc::*, executor::block_on}; use kvproto::{ brpb::*, encryptionpb::EncryptionMethod, - kvrpcpb::{ApiVersion, Context, IsolationLevel}, + kvrpcpb::{ApiVersion, Context, IsolationLevel, KeyRange}, metapb::*, }; use online_config::OnlineConfig; @@ -59,6 +59,7 @@ const BACKUP_BATCH_LIMIT: usize = 1024; struct Request { start_key: Vec, end_key: Vec, + sub_ranges: Vec, start_ts: TimeStamp, end_ts: TimeStamp, limiter: Limiter, @@ -119,6 +120,7 @@ impl Task { request: Request { start_key: req.get_start_key().to_owned(), end_key: req.get_end_key().to_owned(), + sub_ranges: req.get_sub_ranges().to_owned(), start_ts: req.get_start_version().into(), end_ts: req.get_end_version().into(), backend: req.get_storage_backend().clone(), @@ -676,6 +678,8 @@ pub struct Endpoint { /// The progress of a backup task pub struct Progress { store_id: u64, + ranges: Vec<(Option, Option)>, + next_index: usize, next_start: Option, end_key: Option, region_info: R, @@ -685,7 +689,7 @@ pub struct Progress { } impl Progress { - fn new( + fn new_with_range( store_id: u64, next_start: Option, end_key: Option, @@ -693,14 +697,41 @@ impl Progress { codec: KeyValueCodec, cf: CfName, ) -> Self { - Progress { + let ranges = vec![(next_start, end_key)]; + Self::new_with_ranges(store_id, ranges, region_info, codec, cf) + } + + fn new_with_ranges( + store_id: u64, + ranges: Vec<(Option, Option)>, + region_info: R, + codec: KeyValueCodec, + cf: CfName, + ) -> Self { + let mut prs = Progress { store_id, - next_start, - end_key, + ranges, + next_index: 0, + next_start: None, + end_key: None, region_info, finished: false, codec, cf, + }; + prs.try_next(); + prs + } + + /// try the next range. If all the ranges are consumed, + /// set self.finish true. + fn try_next(&mut self) { + if self.ranges.len() > self.next_index { + (self.next_start, self.end_key) = self.ranges[self.next_index].clone(); + + self.next_index += 1; + } else { + self.finished = true; } } @@ -770,11 +801,12 @@ impl Progress { // region, we need to set the `finished` flag here in case // we run with `next_start` set to None if b.region.get_end_key().is_empty() || b.end_key == self.end_key { - self.finished = true; + self.try_next(); + } else { + self.next_start = b.end_key.clone(); } - self.next_start = b.end_key.clone(); } else { - self.finished = true; + self.try_next(); } branges } @@ -958,6 +990,39 @@ impl Endpoint { }); } + fn get_progress_by_req( + &self, + request: &Request, + codec: KeyValueCodec, + ) -> Arc>> { + if request.sub_ranges.is_empty() { + let start_key = codec.encode_backup_key(request.start_key.clone()); + let end_key = codec.encode_backup_key(request.end_key.clone()); + Arc::new(Mutex::new(Progress::new_with_range( + self.store_id, + start_key, + end_key, + self.region_info.clone(), + codec, + request.cf, + ))) + } else { + let mut ranges = Vec::with_capacity(request.sub_ranges.len()); + for k in &request.sub_ranges { + let start_key = codec.encode_backup_key(k.start_key.clone()); + let end_key = codec.encode_backup_key(k.end_key.clone()); + ranges.push((start_key, end_key)); + } + Arc::new(Mutex::new(Progress::new_with_ranges( + self.store_id, + ranges, + self.region_info.clone(), + codec, + request.cf, + ))) + } + } + pub fn handle_backup_task(&self, task: Task) { let Task { request, resp } = task; let codec = KeyValueCodec::new(request.is_raw_kv, self.api_version, request.dst_api_ver); @@ -996,17 +1061,9 @@ impl Endpoint { return; } } - let start_key = codec.encode_backup_key(request.start_key.clone()); - let end_key = codec.encode_backup_key(request.end_key.clone()); - let prs = Arc::new(Mutex::new(Progress::new( - self.store_id, - start_key, - end_key, - self.region_info.clone(), - codec, - request.cf, - ))); + let prs = self.get_progress_by_req(&request, codec); + let backend = match create_storage(&request.backend, self.get_config()) { Ok(backend) => backend, Err(err) => { @@ -1384,17 +1441,9 @@ pub mod tests { // Test seek backup range. let test_seek_backup_range = |start_key: &[u8], end_key: &[u8], expect: Vec<(&[u8], &[u8])>| { - let start_key = if start_key.is_empty() { - None - } else { - Some(Key::from_raw(start_key)) - }; - let end_key = if end_key.is_empty() { - None - } else { - Some(Key::from_raw(end_key)) - }; - let mut prs = Progress::new( + let start_key = (!start_key.is_empty()).then_some(Key::from_raw(start_key)); + let end_key = (!end_key.is_empty()).then_some(Key::from_raw(end_key)); + let mut prs = Progress::new_with_range( endpoint.store_id, start_key, end_key, @@ -1446,6 +1495,7 @@ pub mod tests { request: Request { start_key: start_key.to_vec(), end_key: end_key.to_vec(), + sub_ranges: Vec::new(), start_ts: 1.into(), end_ts: 1.into(), backend, @@ -1512,6 +1562,189 @@ pub mod tests { } } + #[test] + fn test_seek_ranges() { + let (_tmp, endpoint) = new_endpoint(); + + endpoint.region_info.set_regions(vec![ + (b"".to_vec(), b"1".to_vec(), 1), + (b"1".to_vec(), b"2".to_vec(), 2), + (b"3".to_vec(), b"4".to_vec(), 3), + (b"7".to_vec(), b"9".to_vec(), 4), + (b"9".to_vec(), b"".to_vec(), 5), + ]); + // Test seek backup range. + let test_seek_backup_ranges = + |sub_ranges: Vec<(&[u8], &[u8])>, expect: Vec<(&[u8], &[u8])>| { + let mut ranges = Vec::with_capacity(sub_ranges.len()); + for &(start_key, end_key) in &sub_ranges { + let start_key = (!start_key.is_empty()).then_some(Key::from_raw(start_key)); + let end_key = (!end_key.is_empty()).then_some(Key::from_raw(end_key)); + ranges.push((start_key, end_key)); + } + let mut prs = Progress::new_with_ranges( + endpoint.store_id, + ranges, + endpoint.region_info.clone(), + KeyValueCodec::new(false, ApiVersion::V1, ApiVersion::V1), + engine_traits::CF_DEFAULT, + ); + + let mut ranges = Vec::with_capacity(expect.len()); + while ranges.len() != expect.len() { + let n = (rand::random::() % 3) + 1; + let mut r = prs.forward(n); + // The returned backup ranges should <= n + assert!(r.len() <= n); + + if r.is_empty() { + // if return a empty vec then the progress is finished + assert_eq!( + ranges.len(), + expect.len(), + "got {:?}, expect {:?}", + ranges, + expect + ); + } + ranges.append(&mut r); + } + + for (a, b) in ranges.into_iter().zip(expect) { + assert_eq!( + a.start_key.map_or_else(Vec::new, |k| k.into_raw().unwrap()), + b.0 + ); + assert_eq!( + a.end_key.map_or_else(Vec::new, |k| k.into_raw().unwrap()), + b.1 + ); + } + }; + + // Test whether responses contain correct range. + #[allow(clippy::blocks_in_if_conditions)] + let test_handle_backup_task_ranges = + |sub_ranges: Vec<(&[u8], &[u8])>, expect: Vec<(&[u8], &[u8])>| { + let tmp = TempDir::new().unwrap(); + let backend = make_local_backend(tmp.path()); + let (tx, rx) = unbounded(); + + let mut ranges = Vec::with_capacity(sub_ranges.len()); + for &(start_key, end_key) in &sub_ranges { + let key_range = KeyRange { + start_key: start_key.to_vec(), + end_key: end_key.to_vec(), + ..Default::default() + }; + ranges.push(key_range); + } + let task = Task { + request: Request { + start_key: b"1".to_vec(), + end_key: b"2".to_vec(), + sub_ranges: ranges, + start_ts: 1.into(), + end_ts: 1.into(), + backend, + limiter: Limiter::new(f64::INFINITY), + cancel: Arc::default(), + is_raw_kv: false, + dst_api_ver: ApiVersion::V1, + cf: engine_traits::CF_DEFAULT, + compression_type: CompressionType::Unknown, + compression_level: 0, + cipher: CipherInfo::default(), + }, + resp: tx, + }; + endpoint.handle_backup_task(task); + let resps: Vec<_> = block_on(rx.collect()); + for a in &resps { + assert!( + expect + .iter() + .any(|b| { a.get_start_key() == b.0 && a.get_end_key() == b.1 }), + "{:?} {:?}", + resps, + expect + ); + } + assert_eq!(resps.len(), expect.len()); + }; + + // Backup range from case.0 to case.1, + // the case.2 is the expected results. + type Case<'a> = (Vec<(&'a [u8], &'a [u8])>, Vec<(&'a [u8], &'a [u8])>); + + let case: Vec> = vec![ + ( + vec![(b"", b"1"), (b"1", b"2")], + vec![(b"", b"1"), (b"1", b"2")], + ), + ( + vec![(b"", b"2"), (b"3", b"4")], + vec![(b"", b"1"), (b"1", b"2"), (b"3", b"4")], + ), + ( + vec![(b"7", b"8"), (b"8", b"9")], + vec![(b"7", b"8"), (b"8", b"9")], + ), + ( + vec![(b"8", b"9"), (b"6", b"8")], + vec![(b"8", b"9"), (b"7", b"8")], + ), + ( + vec![(b"8", b"85"), (b"88", b"89"), (b"7", b"8")], + vec![(b"8", b"85"), (b"88", b"89"), (b"7", b"8")], + ), + ( + vec![(b"8", b"85"), (b"", b"35"), (b"88", b"89"), (b"7", b"8")], + vec![ + (b"8", b"85"), + (b"", b"1"), + (b"1", b"2"), + (b"3", b"35"), + (b"88", b"89"), + (b"7", b"8"), + ], + ), + (vec![(b"", b"1")], vec![(b"", b"1")]), + (vec![(b"", b"2")], vec![(b"", b"1"), (b"1", b"2")]), + (vec![(b"1", b"2")], vec![(b"1", b"2")]), + (vec![(b"1", b"3")], vec![(b"1", b"2")]), + (vec![(b"1", b"4")], vec![(b"1", b"2"), (b"3", b"4")]), + (vec![(b"4", b"5")], vec![]), + (vec![(b"4", b"6")], vec![]), + (vec![(b"4", b"6"), (b"6", b"7")], vec![]), + (vec![(b"2", b"3"), (b"4", b"6"), (b"6", b"7")], vec![]), + (vec![(b"2", b"7")], vec![(b"3", b"4")]), + (vec![(b"7", b"8")], vec![(b"7", b"8")]), + ( + vec![(b"3", b"")], + vec![(b"3", b"4"), (b"7", b"9"), (b"9", b"")], + ), + (vec![(b"5", b"")], vec![(b"7", b"9"), (b"9", b"")]), + (vec![(b"7", b"")], vec![(b"7", b"9"), (b"9", b"")]), + (vec![(b"8", b"91")], vec![(b"8", b"9"), (b"9", b"91")]), + (vec![(b"8", b"")], vec![(b"8", b"9"), (b"9", b"")]), + ( + vec![(b"", b"")], + vec![ + (b"", b"1"), + (b"1", b"2"), + (b"3", b"4"), + (b"7", b"9"), + (b"9", b""), + ], + ), + ]; + for (ranges, expect_ranges) in case { + test_seek_backup_ranges(ranges.clone(), expect_ranges.clone()); + test_handle_backup_task_ranges(ranges, expect_ranges); + } + } + #[test] fn test_handle_backup_task() { let limiter = Arc::new(IoRateLimiter::new_for_test()); From 1c915f34a3387d91625fb92902df87d705a32afe Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 22 Nov 2022 22:15:58 +0800 Subject: [PATCH 0351/1149] *: remove legacy code (#13832) ref tikv/tikv#13827 green gc is developed for gc without waking hibernated peers. But since dynamic regions, it's not compatible with physical isolation. And it's never used in production due to correctness concern. Read index RPC is used by tiflash in the past. But now tiflash is using internal message forward instead of explicit RPC. And I'm not aware of any other project is utilizing the API. Signed-off-by: Jay Lee --- components/server/src/server.rs | 6 - components/test_raftstore/src/server.rs | 3 - components/test_raftstore/src/util.rs | 51 - .../gc_worker/applied_lock_collector.rs | 894 ------------------ src/server/gc_worker/gc_manager.rs | 1 - src/server/gc_worker/gc_worker.rs | 262 +---- src/server/gc_worker/mod.rs | 1 - src/server/metrics.rs | 2 - src/server/service/kv.rs | 263 +----- tests/failpoints/cases/test_gc_worker.rs | 284 ------ tests/failpoints/cases/test_kv_service.rs | 89 -- tests/integrations/server/gc_worker.rs | 258 +---- tests/integrations/server/kv_service.rs | 113 +-- 13 files changed, 9 insertions(+), 2218 deletions(-) delete mode 100644 src/server/gc_worker/applied_lock_collector.rs diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 80d44b114b9..625db3e951f 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1106,12 +1106,6 @@ where gc_worker .start(node.id()) .unwrap_or_else(|e| fatal!("failed to start gc worker: {}", e)); - gc_worker - .start_observe_lock_apply( - self.coprocessor_host.as_mut().unwrap(), - self.concurrency_manager.clone(), - ) - .unwrap_or_else(|e| fatal!("gc worker failed to observe lock apply: {}", e)); if let Err(e) = gc_worker.start_auto_gc(auto_gc_config, safe_point) { fatal!("failed to start auto_gc on storage, error: {}", e); } diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 42cefe60496..1b532932b30 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -341,9 +341,6 @@ impl ServerCluster { Arc::new(region_info_accessor.clone()), ); gc_worker.start(node_id).unwrap(); - gc_worker - .start_observe_lock_apply(&mut coprocessor_host, concurrency_manager.clone()) - .unwrap(); let rts_worker = if cfg.resolved_ts.enable { // Resolved ts worker diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index eb8ab3fe885..06c2da432c0 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1083,57 +1083,6 @@ pub fn must_check_txn_status( resp } -pub fn must_physical_scan_lock( - client: &TikvClient, - ctx: Context, - max_ts: u64, - start_key: &[u8], - limit: usize, -) -> Vec { - let mut req = PhysicalScanLockRequest::default(); - req.set_context(ctx); - req.set_max_ts(max_ts); - req.set_start_key(start_key.to_owned()); - req.set_limit(limit as _); - let mut resp = client.physical_scan_lock(&req).unwrap(); - resp.take_locks().into() -} - -pub fn register_lock_observer(client: &TikvClient, max_ts: u64) -> RegisterLockObserverResponse { - let mut req = RegisterLockObserverRequest::default(); - req.set_max_ts(max_ts); - client.register_lock_observer(&req).unwrap() -} - -pub fn must_register_lock_observer(client: &TikvClient, max_ts: u64) { - let resp = register_lock_observer(client, max_ts); - assert!(resp.get_error().is_empty(), "{:?}", resp.get_error()); -} - -pub fn check_lock_observer(client: &TikvClient, max_ts: u64) -> CheckLockObserverResponse { - let mut req = CheckLockObserverRequest::default(); - req.set_max_ts(max_ts); - client.check_lock_observer(&req).unwrap() -} - -pub fn must_check_lock_observer(client: &TikvClient, max_ts: u64, clean: bool) -> Vec { - let mut resp = check_lock_observer(client, max_ts); - assert!(resp.get_error().is_empty(), "{:?}", resp.get_error()); - assert_eq!(resp.get_is_clean(), clean); - resp.take_locks().into() -} - -pub fn remove_lock_observer(client: &TikvClient, max_ts: u64) -> RemoveLockObserverResponse { - let mut req = RemoveLockObserverRequest::default(); - req.set_max_ts(max_ts); - client.remove_lock_observer(&req).unwrap() -} - -pub fn must_remove_lock_observer(client: &TikvClient, max_ts: u64) { - let resp = remove_lock_observer(client, max_ts); - assert!(resp.get_error().is_empty(), "{:?}", resp.get_error()); -} - pub fn get_tso(pd_client: &TestPdClient) -> u64 { block_on(pd_client.get_tso()).unwrap().into_inner() } diff --git a/src/server/gc_worker/applied_lock_collector.rs b/src/server/gc_worker/applied_lock_collector.rs deleted file mode 100644 index 9d0e16f4286..00000000000 --- a/src/server/gc_worker/applied_lock_collector.rs +++ /dev/null @@ -1,894 +0,0 @@ -// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - cmp::Ordering::*, - fmt::{self, Debug, Display}, - sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, - Arc, Mutex, - }, -}; - -use concurrency_manager::ConcurrencyManager; -use engine_traits::{CfName, KvEngine, CF_LOCK}; -use keys::origin_key; -use kvproto::{kvrpcpb::LockInfo, raft_cmdpb::CmdType}; -use raftstore::coprocessor::{ - ApplySnapshotObserver, BoxApplySnapshotObserver, BoxQueryObserver, Cmd, Coprocessor, - CoprocessorHost, ObserverContext, QueryObserver, -}; -use tikv_util::worker::{Builder as WorkerBuilder, Runnable, ScheduleError, Scheduler, Worker}; -use txn_types::Key; - -// TODO: Use new error type for GcWorker instead of storage::Error. -use super::{Error, ErrorInner, Result}; -use crate::storage::{ - mvcc::{ErrorInner as MvccErrorInner, Lock, TimeStamp}, - txn::Error as TxnError, -}; - -const MAX_COLLECT_SIZE: usize = 1024; - -/// The state of the observer. Shared between all clones. -#[derive(Default)] -struct LockObserverState { - max_ts: AtomicU64, - - /// `is_clean` is true, only it's sure that all applying of stale locks - /// (locks with start_ts <= specified max_ts) are monitored and collected. - /// If there are too many stale locks or any error happens, `is_clean` - /// must be set to `false`. - is_clean: AtomicBool, -} - -impl LockObserverState { - fn load_max_ts(&self) -> TimeStamp { - self.max_ts.load(Ordering::Acquire).into() - } - - fn store_max_ts(&self, max_ts: TimeStamp) { - self.max_ts.store(max_ts.into_inner(), Ordering::Release) - } - - fn is_clean(&self) -> bool { - self.is_clean.load(Ordering::Acquire) - } - - fn mark_clean(&self) { - self.is_clean.store(true, Ordering::Release); - } - - fn mark_dirty(&self) { - self.is_clean.store(false, Ordering::Release); - } -} - -pub type Callback = Box) + Send>; - -enum LockCollectorTask { - // Messages from observer - ObservedLocks(Vec<(Key, Lock)>), - - // Messages from client - StartCollecting { - max_ts: TimeStamp, - callback: Callback<()>, - }, - GetCollectedLocks { - max_ts: TimeStamp, - callback: Callback<(Vec, bool)>, - }, - StopCollecting { - max_ts: TimeStamp, - callback: Callback<()>, - }, -} - -impl Debug for LockCollectorTask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - LockCollectorTask::ObservedLocks(locks) => f - .debug_struct("ObservedLocks") - .field("locks", locks) - .finish(), - LockCollectorTask::StartCollecting { max_ts, .. } => f - .debug_struct("StartCollecting") - .field("max_ts", max_ts) - .finish(), - LockCollectorTask::GetCollectedLocks { max_ts, .. } => f - .debug_struct("GetCollectedLocks") - .field("max_ts", max_ts) - .finish(), - LockCollectorTask::StopCollecting { max_ts, .. } => f - .debug_struct("StopCollecting") - .field("max_ts", max_ts) - .finish(), - } - } -} - -impl Display for LockCollectorTask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - Debug::fmt(&self, f) - } -} - -/// `LockObserver` observes apply events and apply snapshot events. If it -/// happens in CF_LOCK, it checks the `start_ts`s of the locks being written. If -/// a lock's `start_ts` <= specified `max_ts` in the `state`, it will send the -/// lock to through the `sender`, so the receiver can collect it. -#[derive(Clone)] -struct LockObserver { - state: Arc, - sender: Scheduler, -} - -impl LockObserver { - pub fn new(state: Arc, sender: Scheduler) -> Self { - Self { state, sender } - } - - pub fn register(self, coprocessor_host: &mut CoprocessorHost) { - coprocessor_host - .registry - .register_apply_snapshot_observer(1, BoxApplySnapshotObserver::new(self.clone())); - coprocessor_host - .registry - .register_query_observer(1, BoxQueryObserver::new(self)); - } - - fn send(&self, locks: Vec<(Key, Lock)>) { - #[cfg(feature = "failpoints")] - let injected_full = (|| { - fail_point!("lock_observer_send_full", |_| { - info!("[failpoint] injected lock observer channel full"; "locks" => ?locks); - true - }); - false - })(); - #[cfg(not(feature = "failpoints"))] - let injected_full = false; - - let res = if injected_full { - Err(ScheduleError::Full(LockCollectorTask::ObservedLocks(locks))) - } else { - self.sender - .schedule(LockCollectorTask::ObservedLocks(locks)) - }; - - match res { - Ok(()) => (), - Err(ScheduleError::Stopped(_)) => { - error!("lock observer failed to send locks because collector is stopped"); - } - Err(ScheduleError::Full(_)) => { - fail_point!("lock_observer_before_mark_dirty_on_full"); - self.state.mark_dirty(); - warn!("cannot collect all applied lock because channel is full"); - } - } - } -} - -impl Coprocessor for LockObserver {} - -impl QueryObserver for LockObserver { - fn post_apply_query(&self, _: &mut ObserverContext<'_>, cmd: &Cmd) { - fail_point!("notify_lock_observer_query"); - let max_ts = self.state.load_max_ts(); - if max_ts.is_zero() { - return; - } - - if !self.state.is_clean() { - return; - } - - let mut locks = vec![]; - // For each put in CF_LOCK, collect it if its ts <= max_ts. - for req in cmd.request.get_requests() { - if req.get_cmd_type() != CmdType::Put { - continue; - } - let put_request = req.get_put(); - if put_request.get_cf() != CF_LOCK { - continue; - } - - let lock = match Lock::parse(put_request.get_value()) { - Ok(l) => l, - Err(e) => { - error!(?e; - "cannot parse lock"; - "value" => log_wrappers::Value::value(put_request.get_value()), - ); - self.state.mark_dirty(); - return; - } - }; - - if lock.ts <= max_ts { - let key = Key::from_encoded_slice(put_request.get_key()); - locks.push((key, lock)); - } - } - if !locks.is_empty() { - self.send(locks); - } - } -} - -impl ApplySnapshotObserver for LockObserver { - fn apply_plain_kvs( - &self, - _: &mut ObserverContext<'_>, - cf: CfName, - kv_pairs: &[(Vec, Vec)], - ) { - fail_point!("notify_lock_observer_snapshot"); - if cf != CF_LOCK { - return; - } - - let max_ts = self.state.load_max_ts(); - if max_ts.is_zero() { - return; - } - - if !self.state.is_clean() { - return; - } - - let locks: Result> = kv_pairs - .iter() - .map(|(key, value)| { - Lock::parse(value) - .map(|lock| (key, lock)) - .map_err(|e| ErrorInner::Txn(TxnError::from_mvcc(e)).into()) - }) - .filter(|result| result.is_err() || result.as_ref().unwrap().1.ts <= max_ts) - .map(|result| { - // `apply_plain_keys` will be invoked with the data_key in RocksDB layer. So we - // need to remove the `z` prefix. - result.map(|(key, lock)| (Key::from_encoded_slice(origin_key(key)), lock)) - }) - .collect(); - - match locks { - Err(e) => { - error!(?e; "cannot parse lock"); - self.state.mark_dirty() - } - Ok(l) => self.send(l), - } - } - - fn apply_sst(&self, _: &mut ObserverContext<'_>, cf: CfName, _path: &str) { - if cf == CF_LOCK { - error!("cannot collect all applied lock: snapshot of lock cf applied from sst file"); - self.state.mark_dirty(); - } - } -} - -struct LockCollectorRunner { - observer_state: Arc, - - collected_locks: Vec<(Key, Lock)>, -} - -impl LockCollectorRunner { - pub fn new(observer_state: Arc) -> Self { - Self { - observer_state, - collected_locks: vec![], - } - } - - fn handle_observed_locks(&mut self, mut locks: Vec<(Key, Lock)>) { - if self.collected_locks.len() >= MAX_COLLECT_SIZE { - return; - } - - if locks.len() + self.collected_locks.len() >= MAX_COLLECT_SIZE { - self.observer_state.mark_dirty(); - info!("lock collector marked dirty because received too many locks"); - locks.truncate(MAX_COLLECT_SIZE - self.collected_locks.len()); - } - self.collected_locks.extend(locks); - } - - fn start_collecting(&mut self, max_ts: TimeStamp) -> Result<()> { - let curr_max_ts = self.observer_state.load_max_ts(); - match max_ts.cmp(&curr_max_ts) { - Less => Err(box_err!( - "collecting locks with a greater max_ts: {}", - curr_max_ts - )), - Equal => { - // Stale request. Ignore it. - Ok(()) - } - Greater => { - info!("start collecting locks"; "max_ts" => max_ts); - self.collected_locks.clear(); - // TODO: `is_clean` may be unexpectedly set to false here, if any error happens - // on a previous observing. It need to be solved, although it's very unlikely to - // happen and doesn't affect correctness of data. - self.observer_state.mark_clean(); - self.observer_state.store_max_ts(max_ts); - Ok(()) - } - } - } - - fn get_collected_locks(&mut self, max_ts: TimeStamp) -> Result<(Vec, bool)> { - let curr_max_ts = self.observer_state.load_max_ts(); - if curr_max_ts != max_ts { - warn!( - "trying to fetch collected locks but now collecting with another max_ts"; - "req_max_ts" => max_ts, - "current_max_ts" => curr_max_ts, - ); - return Err(box_err!( - "trying to fetch collected locks but now collecting with another max_ts" - )); - } - - let locks: Result<_> = self - .collected_locks - .iter() - .map(|(k, l)| { - k.to_raw() - .map(|raw_key| l.clone().into_lock_info(raw_key)) - .map_err(|e| Error::from(TxnError::from_mvcc(e))) - }) - .collect(); - - Ok((locks?, self.observer_state.is_clean())) - } - - fn stop_collecting(&mut self, max_ts: TimeStamp) -> Result<()> { - let res = self.observer_state.max_ts.compare_exchange( - max_ts.into_inner(), - 0, - Ordering::SeqCst, - Ordering::SeqCst, - ); - if res.is_ok() { - self.collected_locks.clear(); - info!("stop collecting locks"; "max_ts" => max_ts); - Ok(()) - } else { - warn!( - "trying to stop collecting locks, but now collecting with a different max_ts"; - "stopping_max_ts" => max_ts, - "current_max_ts" => TimeStamp::new(res.unwrap_err()), - ); - Err(box_err!("collecting locks with another max_ts")) - } - } -} - -impl Runnable for LockCollectorRunner { - type Task = LockCollectorTask; - - fn run(&mut self, task: LockCollectorTask) { - match task { - LockCollectorTask::ObservedLocks(locks) => self.handle_observed_locks(locks), - LockCollectorTask::StartCollecting { max_ts, callback } => { - callback(self.start_collecting(max_ts)) - } - LockCollectorTask::GetCollectedLocks { max_ts, callback } => { - callback(self.get_collected_locks(max_ts)) - } - LockCollectorTask::StopCollecting { max_ts, callback } => { - callback(self.stop_collecting(max_ts)) - } - } - } -} - -pub struct AppliedLockCollector { - worker: Mutex, - scheduler: Scheduler, - concurrency_manager: ConcurrencyManager, -} - -impl AppliedLockCollector { - pub fn new( - coprocessor_host: &mut CoprocessorHost, - concurrency_manager: ConcurrencyManager, - ) -> Result { - let worker = Mutex::new(WorkerBuilder::new("lock-collector").create()); - - let state = Arc::new(LockObserverState::default()); - let runner = LockCollectorRunner::new(Arc::clone(&state)); - let scheduler = worker.lock().unwrap().start("lock-collector", runner); - let observer = LockObserver::new(state, scheduler.clone()); - - observer.register(coprocessor_host); - - // Start the worker - - Ok(Self { - worker, - scheduler, - concurrency_manager, - }) - } - - pub fn stop(&self) { - self.worker.lock().unwrap().stop(); - } - - /// Starts collecting applied locks whose `start_ts` <= `max_ts`. Only one - /// `max_ts` is valid at one time. - pub fn start_collecting(&self, max_ts: TimeStamp, callback: Callback<()>) -> Result<()> { - // Before starting collecting, check the concurrency manager to avoid later - // prewrite requests uses a min_commit_ts less than the safepoint. - // `max_ts` here is the safepoint of the current round of GC. - // Ths is similar to that we update max_ts and check memory lock when handling - // other transactional read requests. However this is done at start_collecting - // instead of physical_scan_locks. The reason is that, to fully scan a TiKV - // store, it might needs more than one physical_scan_lock requests. However - // memory lock needs to be checked before scanning the locks, and we can't know - // the `end_key` of the scan range at that time. As a result, each - // physical_scan_lock request will cause scanning memory lock from the start_key - // to the very-end of the TiKV node, which is a waste. But since we always start - // collecting applied locks before physical scan lock, so a better idea is to - // check the memory lock before physical_scan_lock. - self.concurrency_manager.update_max_ts(max_ts); - self.concurrency_manager - .read_range_check(None, None, |key, lock| { - // `Lock::check_ts_conflict` can't be used here, because LockType::Lock - // can't be ignored in this case. - if lock.ts <= max_ts { - Err(TxnError::from_mvcc(MvccErrorInner::KeyIsLocked( - lock.clone().into_lock_info(key.to_raw()?), - ))) - } else { - Ok(()) - } - })?; - self.scheduler - .schedule(LockCollectorTask::StartCollecting { max_ts, callback }) - .map_err(|e| box_err!("failed to schedule task: {:?}", e)) - } - - /// Get the collected locks after `start_collecting`. Only valid when - /// `max_ts` matches the `max_ts` provided to `start_collecting`. - /// Collects at most `MAX_COLLECT_SIZE` locks. If there are (even - /// potentially) more locks than `MAX_COLLECT_SIZE` or any error happens, - /// the flag `is_clean` will be unset, which represents - /// `AppliedLockCollector` cannot collect all locks. - pub fn get_collected_locks( - &self, - max_ts: TimeStamp, - callback: Callback<(Vec, bool)>, - ) -> Result<()> { - self.scheduler - .schedule(LockCollectorTask::GetCollectedLocks { max_ts, callback }) - .map_err(|e| box_err!("failed to schedule task: {:?}", e)) - } - - /// Stop collecting locks. Only valid when `max_ts` matches the `max_ts` - /// provided to `start_collecting`. - pub fn stop_collecting(&self, max_ts: TimeStamp, callback: Callback<()>) -> Result<()> { - self.scheduler - .schedule(LockCollectorTask::StopCollecting { max_ts, callback }) - .map_err(|e| box_err!("failed to schedule task: {:?}", e)) - } -} - -impl Drop for AppliedLockCollector { - fn drop(&mut self) { - self.stop(); - } -} - -#[cfg(test)] -mod tests { - use std::sync::mpsc::channel; - - use engine_test::kv::KvTestEngine; - use engine_traits::CF_DEFAULT; - use futures::executor::block_on; - use kvproto::{ - kvrpcpb::Op, - metapb::Region, - raft_cmdpb::{PutRequest, RaftCmdRequest, RaftCmdResponse, Request as RaftRequest}, - }; - use txn_types::LockType; - - use super::*; - - fn lock_info_to_kv(mut lock_info: LockInfo) -> (Vec, Vec) { - let key = Key::from_raw(lock_info.get_key()).into_encoded(); - let lock = Lock::new( - match lock_info.get_lock_type() { - Op::Put => LockType::Put, - Op::Del => LockType::Delete, - Op::Lock => LockType::Lock, - Op::PessimisticLock => LockType::Pessimistic, - _ => unreachable!(), - }, - lock_info.take_primary_lock(), - lock_info.get_lock_version().into(), - lock_info.get_lock_ttl(), - None, - 0.into(), - lock_info.get_txn_size(), - 0.into(), - ); - let value = lock.to_bytes(); - (key, value) - } - - fn make_apply_request( - key: Vec, - value: Vec, - cf: &str, - cmd_type: CmdType, - ) -> RaftRequest { - let mut put_req = PutRequest::default(); - put_req.set_cf(cf.to_owned()); - put_req.set_key(key); - put_req.set_value(value); - - let mut req = RaftRequest::default(); - req.set_cmd_type(cmd_type); - req.set_put(put_req); - req - } - - fn make_raft_cmd(requests: Vec) -> Cmd { - let mut req = RaftCmdRequest::default(); - req.set_requests(requests.into()); - Cmd::new(0, 0, req, RaftCmdResponse::default()) - } - - fn new_test_collector() -> (AppliedLockCollector, CoprocessorHost) { - let mut coprocessor_host = CoprocessorHost::default(); - let collector = - AppliedLockCollector::new(&mut coprocessor_host, ConcurrencyManager::new(1.into())) - .unwrap(); - (collector, coprocessor_host) - } - - fn start_collecting(c: &AppliedLockCollector, max_ts: u64) -> Result<()> { - let (tx, rx) = channel(); - c.start_collecting(max_ts.into(), Box::new(move |r| tx.send(r).unwrap())) - .and_then(move |()| rx.recv().unwrap()) - } - - fn get_collected_locks(c: &AppliedLockCollector, max_ts: u64) -> Result<(Vec, bool)> { - let (tx, rx) = channel(); - c.get_collected_locks(max_ts.into(), Box::new(move |r| tx.send(r).unwrap())) - .unwrap(); - rx.recv().unwrap() - } - - fn stop_collecting(c: &AppliedLockCollector, max_ts: u64) -> Result<()> { - let (tx, rx) = channel(); - c.stop_collecting(max_ts.into(), Box::new(move |r| tx.send(r).unwrap())) - .unwrap(); - rx.recv().unwrap() - } - - #[test] - fn test_start_stop() { - let (c, _) = new_test_collector(); - // Not started. - get_collected_locks(&c, 1).unwrap_err(); - stop_collecting(&c, 1).unwrap_err(); - - // Started. - start_collecting(&c, 2).unwrap(); - assert_eq!(c.concurrency_manager.max_ts(), 2.into()); - get_collected_locks(&c, 2).unwrap(); - stop_collecting(&c, 2).unwrap(); - // Stopped. - get_collected_locks(&c, 2).unwrap_err(); - stop_collecting(&c, 2).unwrap_err(); - - // When start_collecting is invoked with a larger ts, the later one will - // ovewrite the previous one. - start_collecting(&c, 3).unwrap(); - assert_eq!(c.concurrency_manager.max_ts(), 3.into()); - get_collected_locks(&c, 3).unwrap(); - get_collected_locks(&c, 4).unwrap_err(); - start_collecting(&c, 4).unwrap(); - assert_eq!(c.concurrency_manager.max_ts(), 4.into()); - get_collected_locks(&c, 3).unwrap_err(); - get_collected_locks(&c, 4).unwrap(); - // Do not allow aborting previous observing with a smaller max_ts. - start_collecting(&c, 3).unwrap_err(); - get_collected_locks(&c, 3).unwrap_err(); - get_collected_locks(&c, 4).unwrap(); - // Do not allow stoping observing with a different max_ts. - stop_collecting(&c, 3).unwrap_err(); - stop_collecting(&c, 5).unwrap_err(); - stop_collecting(&c, 4).unwrap(); - } - - #[test] - fn test_check_memlock_on_start() { - let (c, _) = new_test_collector(); - let cm = c.concurrency_manager.clone(); - - let mem_lock = |k: &[u8], ts: u64, lock_type| { - let key = Key::from_raw(k); - let guard = block_on(cm.lock_key(&key)); - guard.with_lock(|lock| { - *lock = Some(txn_types::Lock::new( - lock_type, - k.to_vec(), - ts.into(), - 100, - None, - 0.into(), - 1, - 20.into(), - )); - }); - guard - }; - - let guard = mem_lock(b"a", 100, LockType::Put); - start_collecting(&c, 90).unwrap(); - stop_collecting(&c, 90).unwrap(); - start_collecting(&c, 100).unwrap_err(); - // Use get_collected_locks to check it's not collecting. - get_collected_locks(&c, 100).unwrap_err(); - start_collecting(&c, 110).unwrap_err(); - get_collected_locks(&c, 110).unwrap_err(); - drop(guard); - - let guard = mem_lock(b"b", 100, LockType::Lock); - start_collecting(&c, 90).unwrap(); - stop_collecting(&c, 90).unwrap(); - start_collecting(&c, 100).unwrap_err(); - get_collected_locks(&c, 100).unwrap_err(); - start_collecting(&c, 110).unwrap_err(); - get_collected_locks(&c, 110).unwrap_err(); - drop(guard); - - start_collecting(&c, 200).unwrap(); - stop_collecting(&c, 200).unwrap(); - } - - #[test] - fn test_apply() { - let locks: Vec<_> = vec![ - (b"k0", 10), - (b"k1", 110), - (b"k5", 100), - (b"k2", 101), - (b"k3", 90), - (b"k2", 99), - ] - .into_iter() - .map(|(k, ts)| { - let mut lock_info = LockInfo::default(); - lock_info.set_key(k.to_vec()); - lock_info.set_primary_lock(k.to_vec()); - lock_info.set_lock_type(Op::Put); - lock_info.set_lock_version(ts); - lock_info - }) - .collect(); - let lock_kvs: Vec<_> = locks - .iter() - .map(|lock| lock_info_to_kv(lock.clone())) - .collect(); - - let (c, coprocessor_host) = new_test_collector(); - let mut expected_result = vec![]; - - start_collecting(&c, 100).unwrap(); - assert_eq!(get_collected_locks(&c, 100).unwrap(), (vec![], true)); - - // Only puts in lock cf will be monitered. - let req = vec![ - make_apply_request( - lock_kvs[0].0.clone(), - lock_kvs[0].1.clone(), - CF_LOCK, - CmdType::Put, - ), - make_apply_request(b"1".to_vec(), b"1".to_vec(), CF_DEFAULT, CmdType::Put), - make_apply_request(b"2".to_vec(), b"2".to_vec(), CF_LOCK, CmdType::Delete), - ]; - coprocessor_host.post_apply(&Region::default(), &make_raft_cmd(req)); - expected_result.push(locks[0].clone()); - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_result.clone(), true) - ); - - // When start collecting with the same max_ts again, shouldn't clean up the - // observer state. - start_collecting(&c, 100).unwrap(); - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_result.clone(), true) - ); - - // Only locks with ts <= 100 will be collected. - let req: Vec<_> = lock_kvs - .iter() - .map(|(k, v)| make_apply_request(k.clone(), v.clone(), CF_LOCK, CmdType::Put)) - .collect(); - expected_result.extend( - locks - .iter() - .filter(|l| l.get_lock_version() <= 100) - .cloned(), - ); - coprocessor_host.post_apply(&Region::default(), &make_raft_cmd(req.clone())); - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_result, true) - ); - - // When start_collecting is double-invoked again with larger ts, the previous - // results are dropped. - start_collecting(&c, 110).unwrap(); - assert_eq!(get_collected_locks(&c, 110).unwrap(), (vec![], true)); - coprocessor_host.post_apply(&Region::default(), &make_raft_cmd(req)); - assert_eq!(get_collected_locks(&c, 110).unwrap(), (locks, true)); - } - - #[test] - fn test_apply_snapshot() { - let locks: Vec<_> = vec![ - (b"k0", 10), - (b"k1", 110), - (b"k5", 100), - (b"k2", 101), - (b"k3", 90), - (b"k2", 99), - ] - .into_iter() - .map(|(k, ts)| { - let mut lock_info = LockInfo::default(); - lock_info.set_key(k.to_vec()); - lock_info.set_primary_lock(k.to_vec()); - lock_info.set_lock_type(Op::Put); - lock_info.set_lock_version(ts); - lock_info - }) - .collect(); - let lock_kvs: Vec<_> = locks - .iter() - .map(|lock| lock_info_to_kv(lock.clone())) - .map(|(k, v)| (keys::data_key(&k), v)) - .collect(); - - let (c, coprocessor_host) = new_test_collector(); - start_collecting(&c, 100).unwrap(); - - // Apply plain file to other CFs. Nothing happens. - coprocessor_host.post_apply_plain_kvs_from_snapshot( - &Region::default(), - CF_DEFAULT, - &lock_kvs, - ); - assert_eq!(get_collected_locks(&c, 100).unwrap(), (vec![], true)); - - // Apply plain file to lock cf. Locks with ts before 100 will be collected. - let expected_locks: Vec<_> = locks - .iter() - .filter(|l| l.get_lock_version() <= 100) - .cloned() - .collect(); - coprocessor_host.post_apply_plain_kvs_from_snapshot(&Region::default(), CF_LOCK, &lock_kvs); - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_locks.clone(), true) - ); - // Fetch result twice gets the same result. - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_locks.clone(), true) - ); - - // When stale start_collecting request arrives, the previous collected results - // shouldn't be dropped. - start_collecting(&c, 100).unwrap(); - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_locks.clone(), true) - ); - start_collecting(&c, 90).unwrap_err(); - assert_eq!( - get_collected_locks(&c, 100).unwrap(), - (expected_locks, true) - ); - - // When start_collecting is double-invoked again with larger ts, the previous - // results are dropped. - start_collecting(&c, 110).unwrap(); - assert_eq!(get_collected_locks(&c, 110).unwrap(), (vec![], true)); - coprocessor_host.post_apply_plain_kvs_from_snapshot(&Region::default(), CF_LOCK, &lock_kvs); - assert_eq!(get_collected_locks(&c, 110).unwrap(), (locks.clone(), true)); - - // Apply SST file to other cfs. Nothing happens. - coprocessor_host.post_apply_sst_from_snapshot(&Region::default(), CF_DEFAULT, ""); - assert_eq!(get_collected_locks(&c, 110).unwrap(), (locks.clone(), true)); - - // Apply SST file to lock cf is not supported. This will cause error and - // therefore `is_clean` will be set to false. - coprocessor_host.post_apply_sst_from_snapshot(&Region::default(), CF_LOCK, ""); - assert_eq!(get_collected_locks(&c, 110).unwrap(), (locks, false)); - } - - #[test] - fn test_not_clean() { - let (c, coprocessor_host) = new_test_collector(); - start_collecting(&c, 1).unwrap(); - // When error happens, `is_clean` should be set to false. - // The value is not a valid lock. - let (k, v) = (Key::from_raw(b"k1").into_encoded(), b"v1".to_vec()); - let req = make_apply_request(k.clone(), v.clone(), CF_LOCK, CmdType::Put); - coprocessor_host.post_apply(&Region::default(), &make_raft_cmd(vec![req])); - assert_eq!(get_collected_locks(&c, 1).unwrap(), (vec![], false)); - - // `is_clean` should be reset after invoking `start_collecting`. - start_collecting(&c, 2).unwrap(); - assert_eq!(get_collected_locks(&c, 2).unwrap(), (vec![], true)); - coprocessor_host.post_apply_plain_kvs_from_snapshot( - &Region::default(), - CF_LOCK, - &[(keys::data_key(&k), v)], - ); - assert_eq!(get_collected_locks(&c, 2).unwrap(), (vec![], false)); - - start_collecting(&c, 3).unwrap(); - assert_eq!(get_collected_locks(&c, 3).unwrap(), (vec![], true)); - - // If there are too many locks, `is_clean` should be set to false. - let mut lock = LockInfo::default(); - lock.set_key(b"k2".to_vec()); - lock.set_primary_lock(b"k2".to_vec()); - lock.set_lock_type(Op::Put); - lock.set_lock_version(1); - - let batch_generate_locks = |count| { - let (k, v) = lock_info_to_kv(lock.clone()); - let req = make_apply_request(k, v, CF_LOCK, CmdType::Put); - let raft_cmd = make_raft_cmd(vec![req; count]); - coprocessor_host.post_apply(&Region::default(), &raft_cmd); - }; - - batch_generate_locks(MAX_COLLECT_SIZE - 1); - let (locks, is_clean) = get_collected_locks(&c, 3).unwrap(); - assert_eq!(locks.len(), MAX_COLLECT_SIZE - 1); - assert!(is_clean); - - batch_generate_locks(1); - let (locks, is_clean) = get_collected_locks(&c, 3).unwrap(); - assert_eq!(locks.len(), MAX_COLLECT_SIZE); - assert!(!is_clean); - - batch_generate_locks(1); - // If there are more locks, they will be dropped. - let (locks, is_clean) = get_collected_locks(&c, 3).unwrap(); - assert_eq!(locks.len(), MAX_COLLECT_SIZE); - assert!(!is_clean); - - start_collecting(&c, 4).unwrap(); - assert_eq!(get_collected_locks(&c, 4).unwrap(), (vec![], true)); - - batch_generate_locks(MAX_COLLECT_SIZE - 5); - let (locks, is_clean) = get_collected_locks(&c, 4).unwrap(); - assert_eq!(locks.len(), MAX_COLLECT_SIZE - 5); - assert!(is_clean); - - batch_generate_locks(10); - let (locks, is_clean) = get_collected_locks(&c, 4).unwrap(); - assert_eq!(locks.len(), MAX_COLLECT_SIZE); - assert!(!is_clean); - } -} diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index 01e37727f11..4f528d8c356 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -653,7 +653,6 @@ mod tests { } => callback, GcTask::GcKeys { .. } => unreachable!(), GcTask::RawGcKeys { .. } => unreachable!(), - GcTask::PhysicalScanLock { .. } => unreachable!(), GcTask::OrphanVersions { .. } => unreachable!(), GcTask::Validate(_) => unreachable!(), }; diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 81de11cbae9..9e3f79654bc 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -22,16 +22,9 @@ use engine_traits::{ }; use file_system::{IoType, WithIoType}; use futures::executor::block_on; -use kvproto::{ - kvrpcpb::{Context, LockInfo}, - metapb::Region, -}; +use kvproto::{kvrpcpb::Context, metapb::Region}; use pd_client::{FeatureGate, PdClient}; -use raftstore::{ - coprocessor::{CoprocessorHost, RegionInfoProvider}, - router::RaftStoreRouter, - store::msg::StoreMsg, -}; +use raftstore::{coprocessor::RegionInfoProvider, router::RaftStoreRouter, store::msg::StoreMsg}; use tikv_kv::{CfStatistics, CursorBuilder, Modify, SnapContext}; use tikv_util::{ config::{Tracker, VersionTrack}, @@ -43,7 +36,6 @@ use tikv_util::{ use txn_types::{Key, TimeStamp}; use super::{ - applied_lock_collector::{AppliedLockCollector, Callback as LockCollectorCallback}, check_need_gc, compaction_filter::{ CompactionFilterInitializer, GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED, @@ -115,14 +107,6 @@ where callback: Callback<()>, region_info_provider: Arc, }, - PhysicalScanLock { - ctx: Context, - max_ts: TimeStamp, - start_key: Key, - limit: usize, - callback: Callback>, - region_info_provider: Arc, - }, /// If GC in compaction filter is enabled, versions on default CF will be /// handled with `DB::delete` in write CF's compaction filter. However if /// the compaction filter finds the DB is stalled, it will send the task @@ -149,7 +133,6 @@ where GcTask::GcKeys { .. } => GcCommandKind::gc_keys, GcTask::RawGcKeys { .. } => GcCommandKind::raw_gc_keys, GcTask::UnsafeDestroyRange { .. } => GcCommandKind::unsafe_destroy_range, - GcTask::PhysicalScanLock { .. } => GcCommandKind::physical_scan_lock, GcTask::OrphanVersions { .. } => GcCommandKind::orphan_versions, #[cfg(any(test, feature = "testexport"))] GcTask::Validate(_) => GcCommandKind::validate_config, @@ -179,10 +162,6 @@ where .field("start_key", &format!("{}", start_key)) .field("end_key", &format!("{}", end_key)) .finish(), - GcTask::PhysicalScanLock { max_ts, .. } => f - .debug_struct("PhysicalScanLock") - .field("max_ts", max_ts) - .finish(), GcTask::OrphanVersions { id, wb } => f .debug_struct("OrphanVersions") .field("id", id) @@ -870,47 +849,6 @@ where Ok(()) } - fn handle_physical_scan_lock( - &mut self, - _: &Context, - max_ts: TimeStamp, - start_key: &Key, - limit: usize, - regions_provider: Arc, - ) -> Result> { - let store_id = self.store_id; - let regions = box_try!(regions_provider.get_regions_in_range(start_key.as_encoded(), &[])) - .into_iter() - .filter(move |r| find_peer(r, store_id).is_some()); - - let mut first_round = true; - let mut locks = Vec::new(); - for region in regions { - let start_key = { - if first_round { - first_round = false; - start_key.clone() - } else { - Key::from_raw(region.get_start_key()) - } - }; - let snap = self.get_snapshot(store_id, ®ion)?; - let mut reader = MvccReader::new(snap, Some(ScanMode::Forward), false); - let (locks_this_region, _) = reader - .scan_locks(Some(&start_key), None, |l| l.ts <= max_ts, limit) - .map_err(TxnError::from_mvcc)?; - - locks.extend(locks_this_region); - } - - let mut lock_infos = Vec::with_capacity(locks.len()); - for (key, lock) in locks { - let raw_key = key.into_raw().map_err(TxnError::from_mvcc)?; - lock_infos.push(lock.into_lock_info(raw_key)); - } - Ok(lock_infos) - } - fn update_statistics_metrics(&mut self, key_mode: GcKeyMode) { if let Some(mut_stats) = self.stats_map.get_mut(&key_mode) { let stats = mem::take(mut_stats); @@ -1064,31 +1002,6 @@ where end_key ); } - GcTask::PhysicalScanLock { - ctx, - max_ts, - start_key, - limit, - callback, - region_info_provider, - } => { - let res = self.handle_physical_scan_lock( - &ctx, - max_ts, - &start_key, - limit, - region_info_provider, - ); - update_metrics(res.is_err()); - callback(res); - slow_log!( - T timer, - "PhysicalScanLock start_key {:?}, max_ts {}, limit {}", - start_key, - max_ts, - limit, - ); - } GcTask::OrphanVersions { mut wb, id } => { info!("handling GcTask::OrphanVersions"; "id" => id); let mut wopts = WriteOptions::default(); @@ -1121,9 +1034,6 @@ fn handle_gc_task_schedule_error(e: ScheduleError>) -> Res GcTask::Gc { callback, .. } | GcTask::UnsafeDestroyRange { callback, .. } => { callback(Err(Error::from(ErrorInner::GcWorkerTooBusy))) } - GcTask::PhysicalScanLock { callback, .. } => { - callback(Err(Error::from(ErrorInner::GcWorkerTooBusy))) - } // Attention: If you are adding a new GcTask, do not forget to call the callback if it has a // callback. GcTask::GcKeys { .. } | GcTask::RawGcKeys { .. } | GcTask::OrphanVersions { .. } => {} @@ -1184,8 +1094,6 @@ where worker: Arc>>>, worker_scheduler: Scheduler>, - applied_lock_collector: Option>, - gc_manager_handle: Arc>>, feature_gate: FeatureGate, } @@ -1207,7 +1115,6 @@ where refs: self.refs.clone(), worker: self.worker.clone(), worker_scheduler: self.worker_scheduler.clone(), - applied_lock_collector: self.applied_lock_collector.clone(), gc_manager_handle: self.gc_manager_handle.clone(), feature_gate: self.feature_gate.clone(), region_info_provider: self.region_info_provider.clone(), @@ -1259,7 +1166,6 @@ where refs: Arc::new(AtomicUsize::new(1)), worker: Arc::new(Mutex::new(worker)), worker_scheduler, - applied_lock_collector: None, gc_manager_handle: Arc::new(Mutex::new(None)), feature_gate, region_info_provider, @@ -1314,20 +1220,6 @@ where Ok(()) } - pub fn start_observe_lock_apply( - &mut self, - coprocessor_host: &mut CoprocessorHost, - concurrency_manager: ConcurrencyManager, - ) -> Result<()> { - assert!(self.applied_lock_collector.is_none()); - let collector = Arc::new(AppliedLockCollector::new( - coprocessor_host, - concurrency_manager, - )?); - self.applied_lock_collector = Some(collector); - Ok(()) - } - pub fn stop(&self) -> Result<()> { // Stop GcManager. if let Some(h) = self.gc_manager_handle.lock().unwrap().take() { @@ -1387,61 +1279,6 @@ where pub fn get_config_manager(&self) -> GcWorkerConfigManager { self.config_manager.clone() } - - pub fn physical_scan_lock( - &self, - ctx: Context, - max_ts: TimeStamp, - start_key: Key, - limit: usize, - callback: Callback>, - ) -> Result<()> { - GC_COMMAND_COUNTER_VEC_STATIC.physical_scan_lock.inc(); - - self.worker_scheduler - .schedule(GcTask::PhysicalScanLock { - ctx, - max_ts, - start_key, - limit, - callback, - region_info_provider: self.region_info_provider.clone(), - }) - .or_else(handle_gc_task_schedule_error) - } - - pub fn start_collecting( - &self, - max_ts: TimeStamp, - callback: LockCollectorCallback<()>, - ) -> Result<()> { - self.applied_lock_collector - .as_ref() - .ok_or_else(|| box_err!("applied_lock_collector not supported")) - .and_then(move |c| c.start_collecting(max_ts, callback)) - } - - pub fn get_collected_locks( - &self, - max_ts: TimeStamp, - callback: LockCollectorCallback<(Vec, bool)>, - ) -> Result<()> { - self.applied_lock_collector - .as_ref() - .ok_or_else(|| box_err!("applied_lock_collector not supported")) - .and_then(move |c| c.get_collected_locks(max_ts, callback)) - } - - pub fn stop_collecting( - &self, - max_ts: TimeStamp, - callback: LockCollectorCallback<()>, - ) -> Result<()> { - self.applied_lock_collector - .as_ref() - .ok_or_else(|| box_err!("applied_lock_collector not supported")) - .and_then(move |c| c.stop_collecting(max_ts, callback)) - } } #[cfg(any(test, feature = "testexport"))] @@ -1626,7 +1463,7 @@ mod tests { use std::{ collections::{BTreeMap, BTreeSet}, path::Path, - sync::mpsc::{self, channel}, + sync::mpsc, thread, time::Duration, }; @@ -1635,23 +1472,18 @@ mod tests { use engine_rocks::{util::get_cf_handle, RocksEngine}; use engine_traits::Peekable as _; use futures::executor::block_on; - use kvproto::{ - kvrpcpb::{ApiVersion, Op}, - metapb::Peer, - }; + use kvproto::{kvrpcpb::ApiVersion, metapb::Peer}; use raft::StateRole; use raftstore::{ coprocessor::{ region_info_accessor::{MockRegionInfoProvider, RegionInfoAccessor}, - RegionChangeEvent, + CoprocessorHost, RegionChangeEvent, }, router::RaftStoreBlackHole, }; use tempfile::Builder; use tikv_kv::Snapshot; - use tikv_util::{ - codec::number::NumberEncoder, future::paired_future_callback, store::new_peer, - }; + use tikv_util::store::new_peer; use txn_types::Mutation; use super::{test_gc_worker::MultiRocksEngine, *}; @@ -1955,88 +1787,6 @@ mod tests { .unwrap(); } - #[test] - fn test_physical_scan_lock() { - let store_id = 1; - let engine = TestEngineBuilder::new().build().unwrap(); - let prefixed_engine = PrefixedEngine(engine); - let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr( - prefixed_engine.clone(), - MockLockManager::new(), - ) - .build() - .unwrap(); - let (tx, _rx) = mpsc::channel(); - let mut region = Region::default(); - region.mut_peers().push(new_peer(store_id, 0)); - let mut gc_worker = GcWorker::new( - prefixed_engine, - RaftStoreBlackHole, - tx, - GcConfig::default(), - FeatureGate::default(), - Arc::new(MockRegionInfoProvider::new(vec![region])), - ); - gc_worker.start(store_id).unwrap(); - - let physical_scan_lock = |max_ts: u64, start_key, limit| { - let (cb, f) = paired_future_callback(); - gc_worker - .physical_scan_lock(Context::default(), max_ts.into(), start_key, limit, cb) - .unwrap(); - block_on(f).unwrap() - }; - - let mut expected_lock_info = Vec::new(); - - // Put locks into the storage. - for i in 0..50 { - let mut k = vec![]; - k.encode_u64(i).unwrap(); - let v = k.clone(); - - let mutation = Mutation::make_put(Key::from_raw(&k), v); - - let lock_ts = 10 + i % 3; - - // Collect all locks with ts <= 11 to check the result of physical_scan_lock. - if lock_ts <= 11 { - let mut info = LockInfo::default(); - info.set_primary_lock(k.clone()); - info.set_lock_version(lock_ts); - info.set_key(k.clone()); - info.set_lock_type(Op::Put); - expected_lock_info.push(info) - } - - let (tx, rx) = channel(); - storage - .sched_txn_command( - commands::Prewrite::with_defaults(vec![mutation], k, lock_ts.into()), - Box::new(move |res| tx.send(res).unwrap()), - ) - .unwrap(); - rx.recv() - .unwrap() - .unwrap() - .locks - .into_iter() - .for_each(|r| r.unwrap()); - } - - let res = physical_scan_lock(11, Key::from_raw(b""), 50).unwrap(); - assert_eq!(res, expected_lock_info); - - let res = physical_scan_lock(11, Key::from_raw(b""), 5).unwrap(); - assert_eq!(res[..], expected_lock_info[..5]); - - let mut start_key = vec![]; - start_key.encode_u64(4).unwrap(); - let res = physical_scan_lock(11, Key::from_raw(&start_key), 6).unwrap(); - // expected_locks[3] is the key 4. - assert_eq!(res[..], expected_lock_info[3..9]); - } - #[test] fn test_gc_keys_with_region_info_provider() { let store_id = 1; diff --git a/src/server/gc_worker/mod.rs b/src/server/gc_worker/mod.rs index 5b43b9b4be3..a5b8837cd2e 100644 --- a/src/server/gc_worker/mod.rs +++ b/src/server/gc_worker/mod.rs @@ -1,6 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -mod applied_lock_collector; pub mod compaction_filter; mod config; mod gc_manager; diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 3e07a75899f..23f8256835b 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -51,7 +51,6 @@ make_auto_flush_static_metric! { raw_compare_and_swap, raw_checksum, unsafe_destroy_range, - physical_scan_lock, register_lock_observer, check_lock_observer, remove_lock_observer, @@ -71,7 +70,6 @@ make_auto_flush_static_metric! { gc_keys, raw_gc_keys, unsafe_destroy_range, - physical_scan_lock, validate_config, orphan_versions, } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 54b0dc6782b..fa2235b51e7 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -20,7 +20,6 @@ use kvproto::{ errorpb::{Error as RegionError, *}, kvrpcpb::*, mpp::*, - raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request as RaftRequest}, raft_serverpb::*, tikvpb::*, }; @@ -31,7 +30,7 @@ use raftstore::{ store::{ memory::{MEMTRACE_APPLYS, MEMTRACE_RAFT_ENTRIES, MEMTRACE_RAFT_MESSAGES}, metrics::RAFT_ENTRIES_CACHES_GAUGE, - Callback, CasualMessage, CheckLeaderTask, RaftCmdExtraOpts, + Callback, CasualMessage, CheckLeaderTask, }, DiscardReason, Error as RaftStoreError, Result as RaftStoreResult, }; @@ -519,169 +518,6 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ctx.spawn(task); } - fn register_lock_observer( - &mut self, - ctx: RpcContext<'_>, - req: RegisterLockObserverRequest, - sink: UnarySink, - ) { - let begin_instant = Instant::now(); - - let (cb, f) = paired_future_callback(); - let res = self.gc_worker.start_collecting(req.get_max_ts().into(), cb); - - let task = async move { - // Here except for the receiving error of `futures::channel::oneshot`, - // other errors will be returned as the successful response of rpc. - let res = match res { - Err(e) => Err(e), - Ok(_) => f.await?, - }; - let mut resp = RegisterLockObserverResponse::default(); - if let Err(e) = res { - resp.set_error(format!("{}", e)); - } - sink.success(resp).await?; - GRPC_MSG_HISTOGRAM_STATIC - .register_lock_observer - .observe(duration_to_sec(begin_instant.saturating_elapsed())); - ServerResult::Ok(()) - } - .map_err(|e| { - log_net_error!(e, "kv rpc failed"; - "request" => "register_lock_observer" - ); - GRPC_MSG_FAIL_COUNTER.register_lock_observer.inc(); - }) - .map(|_| ()); - - ctx.spawn(task); - } - - fn check_lock_observer( - &mut self, - ctx: RpcContext<'_>, - req: CheckLockObserverRequest, - sink: UnarySink, - ) { - let begin_instant = Instant::now(); - - let (cb, f) = paired_future_callback(); - let res = self - .gc_worker - .get_collected_locks(req.get_max_ts().into(), cb); - - let task = async move { - let res = match res { - Err(e) => Err(e), - Ok(_) => f.await?, - }; - let mut resp = CheckLockObserverResponse::default(); - match res { - Ok((locks, is_clean)) => { - resp.set_is_clean(is_clean); - resp.set_locks(locks.into()); - } - Err(e) => resp.set_error(format!("{}", e)), - } - sink.success(resp).await?; - GRPC_MSG_HISTOGRAM_STATIC - .check_lock_observer - .observe(duration_to_sec(begin_instant.saturating_elapsed())); - ServerResult::Ok(()) - } - .map_err(|e| { - log_net_error!(e, "kv rpc failed"; - "request" => "check_lock_observer" - ); - GRPC_MSG_FAIL_COUNTER.check_lock_observer.inc(); - }) - .map(|_| ()); - - ctx.spawn(task); - } - - fn remove_lock_observer( - &mut self, - ctx: RpcContext<'_>, - req: RemoveLockObserverRequest, - sink: UnarySink, - ) { - let begin_instant = Instant::now(); - - let (cb, f) = paired_future_callback(); - let res = self.gc_worker.stop_collecting(req.get_max_ts().into(), cb); - - let task = async move { - let res = match res { - Err(e) => Err(e), - Ok(_) => f.await?, - }; - let mut resp = RemoveLockObserverResponse::default(); - if let Err(e) = res { - resp.set_error(format!("{}", e)); - } - sink.success(resp).await?; - GRPC_MSG_HISTOGRAM_STATIC - .remove_lock_observer - .observe(duration_to_sec(begin_instant.saturating_elapsed())); - ServerResult::Ok(()) - } - .map_err(|e| { - log_net_error!(e, "kv rpc failed"; - "request" => "remove_lock_observer" - ); - GRPC_MSG_FAIL_COUNTER.remove_lock_observer.inc(); - }) - .map(|_| ()); - - ctx.spawn(task); - } - - fn physical_scan_lock( - &mut self, - ctx: RpcContext<'_>, - mut req: PhysicalScanLockRequest, - sink: UnarySink, - ) { - let begin_instant = Instant::now(); - - let (cb, f) = paired_future_callback(); - let res = self.gc_worker.physical_scan_lock( - req.take_context(), - req.get_max_ts().into(), - Key::from_raw(req.get_start_key()), - req.get_limit() as _, - cb, - ); - - let task = async move { - let res = match res { - Err(e) => Err(e), - Ok(_) => f.await?, - }; - let mut resp = PhysicalScanLockResponse::default(); - match res { - Ok(locks) => resp.set_locks(locks.into()), - Err(e) => resp.set_error(format!("{}", e)), - } - sink.success(resp).await?; - GRPC_MSG_HISTOGRAM_STATIC - .physical_scan_lock - .observe(duration_to_sec(begin_instant.saturating_elapsed())); - ServerResult::Ok(()) - } - .map_err(|e| { - log_net_error!(e, "kv rpc failed"; - "request" => "physical_scan_lock" - ); - GRPC_MSG_FAIL_COUNTER.physical_scan_lock.inc(); - }) - .map(|_| ()); - - ctx.spawn(task); - } - fn unsafe_destroy_range( &mut self, ctx: RpcContext<'_>, @@ -976,103 +812,6 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ctx.spawn(task); } - fn read_index( - &mut self, - ctx: RpcContext<'_>, - req: ReadIndexRequest, - sink: UnarySink, - ) { - forward_unary!(self.proxy, read_index, ctx, req, sink); - let begin_instant = Instant::now(); - - let region_id = req.get_context().get_region_id(); - let mut cmd = RaftCmdRequest::default(); - let mut header = RaftRequestHeader::default(); - let mut inner_req = RaftRequest::default(); - inner_req.set_cmd_type(CmdType::ReadIndex); - inner_req.mut_read_index().set_start_ts(req.get_start_ts()); - for r in req.get_ranges() { - let mut range = kvproto::kvrpcpb::KeyRange::default(); - range.set_start_key(Key::from_raw(r.get_start_key()).into_encoded()); - range.set_end_key(Key::from_raw(r.get_end_key()).into_encoded()); - inner_req.mut_read_index().mut_key_ranges().push(range); - } - header.set_region_id(req.get_context().get_region_id()); - header.set_peer(req.get_context().get_peer().clone()); - header.set_region_epoch(req.get_context().get_region_epoch().clone()); - if req.get_context().get_term() != 0 { - header.set_term(req.get_context().get_term()); - } - header.set_sync_log(req.get_context().get_sync_log()); - header.set_read_quorum(true); - cmd.set_header(header); - cmd.set_requests(vec![inner_req].into()); - - let (cb, f) = paired_future_callback(); - - // We must deal with all requests which acquire read-quorum in raftstore-thread, - // so just send it as an command. - if let Err(e) = self - .ch - .send_command(cmd, Callback::read(cb), RaftCmdExtraOpts::default()) - { - // Retrun region error instead a gRPC error. - let mut resp = ReadIndexResponse::default(); - resp.set_region_error(raftstore_error_to_region_error(e, region_id)); - ctx.spawn( - async move { - sink.success(resp).await?; - ServerResult::Ok(()) - } - .map_err(|_| ()) - .map(|_| ()), - ); - return; - } - - let task = async move { - let mut res = f.await?; - let mut resp = ReadIndexResponse::default(); - if res.response.get_header().has_error() { - resp.set_region_error(res.response.mut_header().take_error()); - } else { - let mut raft_resps = res.response.take_responses(); - if raft_resps.len() != 1 { - error!( - "invalid read index response"; - "region_id" => region_id, - "response" => ?raft_resps - ); - resp.mut_region_error().set_message(format!( - "Internal Error: invalid response: {:?}", - raft_resps - )); - } else { - let mut read_index_resp = raft_resps[0].take_read_index(); - if read_index_resp.has_locked() { - resp.set_locked(read_index_resp.take_locked()); - } else { - resp.set_read_index(read_index_resp.get_read_index()); - } - } - } - sink.success(resp).await?; - GRPC_MSG_HISTOGRAM_STATIC - .read_index - .observe(begin_instant.saturating_elapsed_secs()); - ServerResult::Ok(()) - } - .map_err(|e| { - log_net_error!(e, "kv rpc failed"; - "request" => "read_index" - ); - GRPC_MSG_FAIL_COUNTER.read_index.inc(); - }) - .map(|_| ()); - - ctx.spawn(task); - } - fn batch_commands( &mut self, ctx: RpcContext<'_>, diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index 5845d4d4eb7..3dbb7ffc7b0 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -6,7 +6,6 @@ use std::{ time::Duration, }; -use collections::HashMap; use engine_traits::{Peekable, WriteBatch}; use grpcio::{ChannelBuilder, Environment}; use keys::data_key; @@ -28,289 +27,6 @@ use tikv::{ use tikv_util::HandyRwLock; use txn_types::{Key, TimeStamp}; -// In theory, raft can propose conf change as long as there is no pending one. -// Replicas don't apply logs synchronously, so it's possible the old leader is -// removed before the new leader applies all logs. -// In the current implementation, the new leader rejects conf change until it -// applies all logs. It guarantees the correctness of green GC. This test is to -// prevent breaking it in the future. -#[test] -fn test_collect_lock_from_stale_leader() { - let mut cluster = new_server_cluster(0, 2); - cluster.pd_client.disable_default_operator(); - let region_id = cluster.run_conf_change(); - let leader = cluster.leader_of_region(region_id).unwrap(); - - // Create clients. - let env = Arc::new(Environment::new(1)); - let mut clients = HashMap::default(); - for node_id in cluster.get_node_ids() { - let channel = - ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(node_id)); - let client = TikvClient::new(channel); - clients.insert(node_id, client); - } - - // Start transferring the region to store 2. - let new_peer = new_peer(2, 1003); - cluster.pd_client.must_add_peer(region_id, new_peer.clone()); - - // Create the ctx of the first region. - let leader_client = clients.get(&leader.get_store_id()).unwrap(); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader.clone()); - ctx.set_region_epoch(cluster.get_region_epoch(region_id)); - - // Pause the new peer applying so that when it becomes the leader, it doesn't - // apply all logs. - let new_leader_apply_fp = "on_handle_apply_1003"; - fail::cfg(new_leader_apply_fp, "pause").unwrap(); - must_kv_prewrite( - leader_client, - ctx, - vec![new_mutation(Op::Put, b"k1", b"v")], - b"k1".to_vec(), - 10, - ); - - // Leader election only considers the progress of appending logs, so it can - // succeed. - cluster.must_transfer_leader(region_id, new_peer.clone()); - // It shouldn't succeed in the current implementation. - cluster.pd_client.remove_peer(region_id, leader.clone()); - std::thread::sleep(Duration::from_secs(1)); - cluster.pd_client.must_have_peer(region_id, leader); - - // Must scan the lock from the old leader. - let locks = must_physical_scan_lock(leader_client, Context::default(), 100, b"", 10); - assert_eq!(locks.len(), 1); - assert_eq!(locks[0].get_key(), b"k1"); - - // Can't scan the lock from the new leader. - let leader_client = clients.get(&new_peer.get_store_id()).unwrap(); - must_register_lock_observer(leader_client, 100); - let locks = must_check_lock_observer(leader_client, 100, true); - assert!(locks.is_empty()); - let locks = must_physical_scan_lock(leader_client, Context::default(), 100, b"", 10); - assert!(locks.is_empty()); - - fail::remove(new_leader_apply_fp); -} - -#[test] -fn test_observer_send_error() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); - - let max_ts = 100; - must_register_lock_observer(&client, max_ts); - must_kv_prewrite( - &client, - ctx.clone(), - vec![new_mutation(Op::Put, b"k1", b"v")], - b"k1".to_vec(), - 10, - ); - assert_eq!(must_check_lock_observer(&client, max_ts, true).len(), 1); - - let observer_send_fp = "lock_observer_send_full"; - fail::cfg(observer_send_fp, "return").unwrap(); - must_kv_prewrite( - &client, - ctx, - vec![new_mutation(Op::Put, b"k2", b"v")], - b"k1".to_vec(), - 10, - ); - let resp = check_lock_observer(&client, max_ts); - assert!(resp.get_error().is_empty(), "{:?}", resp.get_error()); - // Should mark dirty if fails to send locks. - assert!(!resp.get_is_clean()); -} - -#[test] -fn test_notify_observer_after_apply() { - fn retry_until(mut f: impl FnMut() -> bool) { - for _ in 0..100 { - sleep_ms(10); - if f() { - break; - } - } - } - - let (mut cluster, client, ctx) = must_new_cluster_and_kv_client(); - cluster.pd_client.disable_default_operator(); - let post_apply_query_fp = "notify_lock_observer_query"; - let apply_plain_kvs_fp = "notify_lock_observer_snapshot"; - - // Write a lock and pause before notifying the lock observer. - let max_ts = 100; - must_register_lock_observer(&client, max_ts); - fail::cfg(post_apply_query_fp, "pause").unwrap(); - let key = b"k"; - let (client_clone, ctx_clone) = (client.clone(), ctx.clone()); - let handle = std::thread::spawn(move || { - must_kv_prewrite( - &client_clone, - ctx_clone, - vec![new_mutation(Op::Put, key, b"v")], - key.to_vec(), - 10, - ); - }); - // We can use physical_scan_lock to get the lock because we notify the lock - // observer after writing data to the rocskdb. - let mut locks = vec![]; - retry_until(|| { - assert!(must_check_lock_observer(&client, max_ts, true).is_empty()); - locks.extend(must_physical_scan_lock( - &client, - ctx.clone(), - max_ts, - b"", - 100, - )); - !locks.is_empty() - }); - assert_eq!(locks.len(), 1); - assert_eq!(locks[0].get_key(), key); - assert!(must_check_lock_observer(&client, max_ts, true).is_empty()); - fail::remove(post_apply_query_fp); - handle.join().unwrap(); - assert_eq!(must_check_lock_observer(&client, max_ts, true).len(), 1); - - // Add a new store. - let store_id = cluster.add_new_engine(); - let channel = ChannelBuilder::new(Arc::new(Environment::new(1))) - .connect(&cluster.sim.rl().get_addr(store_id)); - let replica_client = TikvClient::new(channel); - - // Add a new peer and pause before notifying the lock observer. - must_register_lock_observer(&replica_client, max_ts); - fail::cfg(apply_plain_kvs_fp, "pause").unwrap(); - cluster - .pd_client - .must_add_peer(ctx.get_region_id(), new_peer(store_id, store_id)); - // We can use physical_scan_lock to get the lock because we notify the lock - // observer after writing data to the rocksdb. - let mut locks = vec![]; - retry_until(|| { - assert!(must_check_lock_observer(&replica_client, max_ts, true).is_empty()); - locks.extend(must_physical_scan_lock( - &replica_client, - ctx.clone(), - max_ts, - b"", - 100, - )); - !locks.is_empty() - }); - assert_eq!(locks.len(), 1); - assert_eq!(locks[0].get_key(), key); - assert!(must_check_lock_observer(&replica_client, max_ts, true).is_empty()); - fail::remove(apply_plain_kvs_fp); - retry_until(|| !must_check_lock_observer(&replica_client, max_ts, true).is_empty()); - assert_eq!( - must_check_lock_observer(&replica_client, max_ts, true).len(), - 1 - ); -} - -// It may cause locks missing during green GC if the raftstore notifies the lock -// observer before writing data to the rocksdb: -// - Store-1 transfers a region to store-2 and store-2 is applying logs. -// - GC worker registers lock observer on store-2 after calling lock observer's -// callback and before finishing applying which means the lock won't be -// observed. -// - GC worker scans locks on each store independently. It's possible GC worker -// has scanned all locks on store-2 and hasn't scanned locks on store-1. -// - Store-2 applies all logs and removes the peer on store-1. -// - GC worker can't scan the lock on store-1 because the peer has been -// destroyed. -// - GC worker can't get the lock from store-2 because it can't observe the lock -// and has scanned it. -#[test] -fn test_collect_applying_locks() { - let mut cluster = new_server_cluster(0, 2); - cluster.pd_client.disable_default_operator(); - let region_id = cluster.run_conf_change(); - let leader = cluster.leader_of_region(region_id).unwrap(); - - // Create clients. - let env = Arc::new(Environment::new(1)); - let mut clients = HashMap::default(); - for node_id in cluster.get_node_ids() { - let channel = - ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(node_id)); - let client = TikvClient::new(channel); - clients.insert(node_id, client); - } - - // Start transferring the region to store 2. - let new_peer = new_peer(2, 1003); - cluster.pd_client.must_add_peer(region_id, new_peer.clone()); - - // Create the ctx of the first region. - let store_1_client = clients.get(&leader.get_store_id()).unwrap(); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader.clone()); - ctx.set_region_epoch(cluster.get_region_epoch(region_id)); - - // Pause store-2 after calling observer callbacks and before writing to the - // rocksdb. - let new_leader_apply_fp = "post_handle_apply_1003"; - fail::cfg(new_leader_apply_fp, "pause").unwrap(); - - // Write 1 lock. - must_kv_prewrite( - store_1_client, - ctx, - vec![new_mutation(Op::Put, b"k1", b"v")], - b"k1".to_vec(), - 10, - ); - // Wait for store-2 applying. - std::thread::sleep(Duration::from_secs(3)); - - // Starting the process of green GC at safe point 20: - // 1. Register lock observers on all stores. - // 2. Scan locks physically on each store independently. - // 3. Get locks from all observers. - let safe_point = 20; - - // Register lock observers. - clients.iter().for_each(|(_, c)| { - must_register_lock_observer(c, safe_point); - }); - - // Finish scanning locks on store-2 and find nothing. - let store_2_client = clients.get(&new_peer.get_store_id()).unwrap(); - let locks = must_physical_scan_lock(store_2_client, Context::default(), safe_point, b"", 1); - assert!(locks.is_empty(), "{:?}", locks); - - // Transfer the region from store-1 to store-2. - fail::remove(new_leader_apply_fp); - cluster.must_transfer_leader(region_id, new_peer); - cluster.pd_client.must_remove_peer(region_id, leader); - // Wait for store-1 desroying the region. - std::thread::sleep(Duration::from_secs(3)); - - // Scan locks on store-1 after the region has been destroyed. - let locks = must_physical_scan_lock(store_1_client, Context::default(), safe_point, b"", 1); - assert!(locks.is_empty(), "{:?}", locks); - - // Check lock observers. - let mut locks = vec![]; - clients.iter().for_each(|(_, c)| { - locks.extend(must_check_lock_observer(c, safe_point, true)); - }); - // Must observe the applying lock even through we can't use scan to get it. - assert_eq!(locks.len(), 1); - assert_eq!(locks[0].get_key(), b"k1"); -} - // Test write CF's compaction filter can call `orphan_versions_handler` // correctly. #[test] diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index 1f7e35b5691..b81673af0e2 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -42,95 +42,6 @@ fn test_kv_scan_memory_lock() { fail::remove("raftkv_async_snapshot_err"); } -#[test] -fn test_scan_lock_push_async_commit() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); - - for (use_green_gc, ts) in &[(false, 100), (true, 200)] { - // We will perform a async commit transaction with start_ts == `ts`. - // First, try pushing max_ts to `ts + 10`. - if *use_green_gc { - let mut req = RegisterLockObserverRequest::default(); - req.set_max_ts(ts + 10); - let resp = client.register_lock_observer(&req).unwrap(); - assert_eq!(resp.error.len(), 0); - } else { - let mut req = ScanLockRequest::default(); - req.set_context(ctx.clone()); - req.set_max_version(ts + 10); - let resp = client.kv_scan_lock(&req).unwrap(); - assert!(!resp.has_region_error()); - assert!(!resp.has_error()); - } - - let k1 = b"k1"; - let v1 = b"v1"; - - // The following code simulates another case: prewrite is locking the memlock, - // and then another scan lock operation request meets the memlock. - - fail::cfg("before-set-lock-in-memory", "pause").unwrap(); - let client1 = client.clone(); - let ctx1 = ctx.clone(); - let handle1 = std::thread::spawn(move || { - let mut prewrite = PrewriteRequest::default(); - prewrite.set_context(ctx1); - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k1.to_vec()); - mutation.set_value(v1.to_vec()); - prewrite.set_mutations(vec![mutation].into()); - prewrite.set_primary_lock(k1.to_vec()); - prewrite.set_start_version(*ts); - prewrite.set_lock_ttl(1000); - prewrite.set_use_async_commit(true); - - let resp = client1.kv_prewrite(&prewrite).unwrap(); - assert!(!resp.has_region_error()); - assert_eq!(resp.get_errors(), &[]); - // min_commit_ts should be the last scan_lock ts + 1. - assert_eq!(resp.min_commit_ts, ts + 11); - }); - - // Wait for the prewrite acquires the memlock - std::thread::sleep(Duration::from_millis(200)); - - let client1 = client.clone(); - let ctx1 = ctx.clone(); - let handle2 = std::thread::spawn(move || { - if *use_green_gc { - let mut req = RegisterLockObserverRequest::default(); - req.set_max_ts(ts + 20); - let resp = client1.register_lock_observer(&req).unwrap(); - assert!(!resp.error.is_empty()); - } else { - let mut req = ScanLockRequest::default(); - req.set_context(ctx1); - req.set_max_version(ts + 20); - let resp = client1.kv_scan_lock(&req).unwrap(); - assert!(!resp.has_region_error()); - assert!(resp.has_error()); - } - }); - - fail::remove("before-set-lock-in-memory"); - - handle1.join().unwrap(); - handle2.join().unwrap(); - - // Commit the key so that next turn of test will work. - let mut req = CommitRequest::default(); - req.set_context(ctx.clone()); - req.set_start_version(*ts); - req.set_commit_version(ts + 11); - req.set_keys(vec![k1.to_vec()].into()); - let resp = client.kv_commit(&req).unwrap(); - assert!(!resp.has_region_error()); - assert!(!resp.has_error()); - assert_eq!(resp.commit_version, ts + 11); - } -} - #[test] fn test_snapshot_not_block_grpc() { let (cluster, leader, ctx) = must_new_cluster_mul(1); diff --git a/tests/integrations/server/gc_worker.rs b/tests/integrations/server/gc_worker.rs index 36f9eed9ca8..cfadde84405 100644 --- a/tests/integrations/server/gc_worker.rs +++ b/tests/integrations/server/gc_worker.rs @@ -2,271 +2,15 @@ use std::sync::Arc; -use collections::HashMap; use engine_traits::{Peekable, CF_WRITE}; use grpcio::{ChannelBuilder, Environment}; use keys::data_key; -use kvproto::{kvrpcpb::*, metapb, tikvpb::TikvClient}; +use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use test_raftstore::*; use tikv::server::gc_worker::sync_gc; use tikv_util::HandyRwLock; use txn_types::Key; -#[test] -fn test_physical_scan_lock() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); - - // Generate kvs like k10, v10, ts=10; k11, v11, ts=11; ... - let kv: Vec<_> = (10..20) - .map(|i| (i, vec![b'k', i as u8], vec![b'v', i as u8])) - .collect(); - - for (ts, k, v) in &kv { - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k.clone()); - mutation.set_value(v.clone()); - must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), *ts); - } - - let all_locks: Vec<_> = kv - .into_iter() - .map(|(ts, k, _)| { - // Create a LockInfo that matches the prewrite request in `must_kv_prewrite`. - let mut lock_info = LockInfo::default(); - lock_info.set_primary_lock(k.clone()); - lock_info.set_lock_version(ts); - lock_info.set_key(k); - lock_info.set_lock_ttl(3000); - lock_info.set_lock_type(Op::Put); - lock_info.set_min_commit_ts(ts + 1); - lock_info - }) - .collect(); - - let check_result = |got_locks: &[_], expected_locks: &[_]| { - for i in 0..std::cmp::max(got_locks.len(), expected_locks.len()) { - assert_eq!(got_locks[i], expected_locks[i], "lock {} mismatch", i); - } - }; - - check_result( - &must_physical_scan_lock(&client, ctx.clone(), 30, b"", 100), - &all_locks, - ); - check_result( - &must_physical_scan_lock(&client, ctx.clone(), 15, b"", 100), - &all_locks[0..=5], - ); - check_result( - &must_physical_scan_lock(&client, ctx.clone(), 10, b"", 100), - &all_locks[0..1], - ); - check_result( - &must_physical_scan_lock(&client, ctx.clone(), 9, b"", 100), - &[], - ); - check_result( - &must_physical_scan_lock(&client, ctx, 30, &[b'k', 13], 5), - &all_locks[3..8], - ); -} - -#[test] -fn test_applied_lock_collector() { - let mut cluster = new_server_cluster(0, 3); - cluster.pd_client.disable_default_operator(); - cluster.run(); - - // Create all stores' clients. - let env = Arc::new(Environment::new(1)); - let mut clients = HashMap::default(); - for node_id in cluster.get_node_ids() { - let channel = - ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(node_id)); - let client = TikvClient::new(channel); - clients.insert(node_id, client); - } - - // Create the ctx of the first region. - let region = cluster.get_region(b""); - let region_id = region.get_id(); - let leader_peer = cluster.leader_of_region(region_id).unwrap(); - let leader_store_id = leader_peer.get_store_id(); - let leader_client = clients.get(&leader_store_id).unwrap(); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader_peer); - ctx.set_region_epoch(cluster.get_region_epoch(region_id)); - - // It's used to make sure all stores applies all logs. - let wait_for_apply = |cluster: &mut Cluster<_>, region: &metapb::Region| { - let cluster = &mut *cluster; - region.get_peers().iter().for_each(|p| { - let mut retry_times = 1; - loop { - let resp = - async_read_on_peer(cluster, p.clone(), region.clone(), b"key", true, true) - .recv() - .unwrap(); - if !resp.get_header().has_error() { - return; - } - if retry_times >= 50 { - panic!("failed to read on {:?}: {:?}", p, resp); - } - retry_times += 1; - sleep_ms(20); - } - }); - }; - - let check_lock = |lock: &LockInfo, k: &[u8], pk: &[u8], ts| { - assert_eq!(lock.get_key(), k); - assert_eq!(lock.get_primary_lock(), pk); - assert_eq!(lock.get_lock_version(), ts); - }; - - // Register lock observer at safe point 10000. - let mut safe_point = 10000; - clients.iter().for_each(|(_, c)| { - // Should report error when checking non-existent observer. - assert!(!check_lock_observer(c, safe_point).get_error().is_empty()); - must_register_lock_observer(c, safe_point); - assert!(must_check_lock_observer(c, safe_point, true).is_empty()); - }); - - // Lock observer should only collect values in lock CF. - let key = b"key0"; - must_kv_prewrite( - leader_client, - ctx.clone(), - vec![new_mutation(Op::Put, key, &b"v".repeat(1024))], - key.to_vec(), - 1, - ); - must_kv_commit(leader_client, ctx.clone(), vec![key.to_vec()], 1, 2, 2); - wait_for_apply(&mut cluster, ®ion); - clients.iter().for_each(|(_, c)| { - let locks = must_check_lock_observer(c, safe_point, true); - assert_eq!(locks.len(), 1); - check_lock(&locks[0], key, key, 1); - }); - - // Lock observer shouldn't collect locks after the safe point. - must_kv_prewrite( - leader_client, - ctx.clone(), - vec![new_mutation(Op::Put, key, b"v")], - key.to_vec(), - safe_point + 1, - ); - wait_for_apply(&mut cluster, ®ion); - clients.iter().for_each(|(_, c)| { - let locks = must_check_lock_observer(c, safe_point, true); - assert_eq!(locks.len(), 1); - check_lock(&locks[0], key, key, 1); - }); - - // Write 999 locks whose timestamp is less than the safe point. - let mutations = (1..1000) - .map(|i| new_mutation(Op::Put, format!("key{}", i).as_bytes(), b"v")) - .collect(); - must_kv_prewrite(leader_client, ctx.clone(), mutations, b"key1".to_vec(), 10); - wait_for_apply(&mut cluster, ®ion); - clients.iter().for_each(|(_, c)| { - let locks = must_check_lock_observer(c, safe_point, true); - // Plus the first lock. - assert_eq!(locks.len(), 1000); - }); - - // Add a new store and register lock observer. - let store_id = cluster.add_new_engine(); - let channel = - ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(store_id)); - let client = TikvClient::new(channel); - must_register_lock_observer(&client, safe_point); - - // Add a new peer. Lock observer should collect all locks from snapshot. - let peer = new_peer(store_id, store_id); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - cluster.pd_client.must_none_pending_peer(peer); - wait_for_apply(&mut cluster, ®ion); - let locks = must_check_lock_observer(&client, safe_point, true); - assert_eq!(locks.len(), 999); - - // Should be dirty when collects too many locks. - let mutations = (1000..1100) - .map(|i| new_mutation(Op::Put, format!("key{}", i).as_bytes(), b"v")) - .collect(); - must_kv_prewrite( - leader_client, - ctx.clone(), - mutations, - b"key1000".to_vec(), - 100, - ); - wait_for_apply(&mut cluster, ®ion); - clients.insert(store_id, client); - clients.iter().for_each(|(_, c)| { - let resp = check_lock_observer(c, safe_point); - assert!(resp.get_error().is_empty(), "{:?}", resp.get_error()); - assert!(!resp.get_is_clean()); - // MAX_COLLECT_SIZE is 1024. - assert_eq!(resp.get_locks().len(), 1024); - }); - - // Reregister and check. It shouldn't clean up state. - clients.iter().for_each(|(_, c)| { - must_register_lock_observer(c, safe_point); - let resp = check_lock_observer(c, safe_point); - assert!(resp.get_error().is_empty(), "{:?}", resp.get_error()); - assert!(!resp.get_is_clean()); - // MAX_COLLECT_SIZE is 1024. - assert_eq!(resp.get_locks().len(), 1024); - }); - - // Register lock observer at a later safe point. Lock observer should reset its - // state. - safe_point += 1; - clients.iter().for_each(|(_, c)| { - must_register_lock_observer(c, safe_point); - assert!(must_check_lock_observer(c, safe_point, true).is_empty()); - // Can't register observer with smaller max_ts. - assert!( - !register_lock_observer(c, safe_point - 1) - .get_error() - .is_empty() - ); - assert!(must_check_lock_observer(c, safe_point, true).is_empty()); - }); - let leader_client = clients.get(&leader_store_id).unwrap(); - must_kv_prewrite( - leader_client, - ctx, - vec![new_mutation(Op::Put, b"key1100", b"v")], - b"key1100".to_vec(), - safe_point, - ); - wait_for_apply(&mut cluster, ®ion); - clients.iter().for_each(|(_, c)| { - // Should collect locks according to the new max ts. - let locks = must_check_lock_observer(c, safe_point, true); - assert_eq!(locks.len(), 1, "{:?}", locks); - // Shouldn't remove it with a wrong max ts. - assert!( - !remove_lock_observer(c, safe_point - 1) - .get_error() - .is_empty() - ); - let locks = must_check_lock_observer(c, safe_point, true); - assert_eq!(locks.len(), 1, "{:?}", locks); - // Remove lock observers. - must_remove_lock_observer(c, safe_point); - assert!(!check_lock_observer(c, safe_point).get_error().is_empty()); - }); -} - // Since v5.0 GC bypasses Raft, which means GC scans/deletes records with // `keys::DATA_PREFIX`. This case ensures it's performed correctly. #[test] diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index cfbe6ff504e..7e3f718dac9 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -20,7 +20,7 @@ use grpcio_health::{proto::HealthCheckRequest, *}; use kvproto::{ coprocessor::*, debugpb, - kvrpcpb::{self, PrewriteRequestPessimisticAction::*, *}, + kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, metapb, raft_serverpb, raft_serverpb::*, tikvpb::*, @@ -936,32 +936,6 @@ fn test_split_region_impl(is_raw_kv: bool) { ); } -#[test] -fn test_read_index() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); - - // Read index - let mut req = ReadIndexRequest::default(); - req.set_context(ctx.clone()); - let mut resp = client.read_index(&req).unwrap(); - let last_index = resp.get_read_index(); - assert_eq!(last_index > 0, true); - - // Raw put - let (k, v) = (b"key".to_vec(), b"value".to_vec()); - let mut put_req = RawPutRequest::default(); - put_req.set_context(ctx); - put_req.key = k; - put_req.value = v; - let put_resp = client.raw_put(&put_req).unwrap(); - assert!(!put_resp.has_region_error()); - assert!(put_resp.error.is_empty()); - - // Read index again - resp = client.read_index(&req).unwrap(); - assert_eq!(last_index + 1, resp.get_read_index()); -} - #[test] fn test_debug_get() { let (cluster, debug_client, store_id) = must_new_cluster_and_debug_client(); @@ -1457,90 +1431,6 @@ fn test_async_commit_check_txn_status() { assert_ne!(resp.get_action(), Action::MinCommitTsPushed); } -#[test] -fn test_read_index_check_memory_locks() { - let mut cluster = new_server_cluster(0, 3); - cluster.cfg.raft_store.hibernate_regions = false; - cluster.run(); - - // make sure leader has been elected. - assert_eq!(cluster.must_get(b"k"), None); - - let region = cluster.get_region(b""); - let leader = cluster.leader_of_region(region.get_id()).unwrap(); - let leader_cm = cluster.sim.rl().get_concurrency_manager(leader.get_id()); - - let keys: Vec<_> = vec![b"k", b"l"] - .into_iter() - .map(|k| Key::from_raw(k)) - .collect(); - let guards = block_on(leader_cm.lock_keys(keys.iter())); - let lock = Lock::new( - LockType::Put, - b"k".to_vec(), - 1.into(), - 20000, - None, - 1.into(), - 1, - 2.into(), - ); - guards[0].with_lock(|l| *l = Some(lock.clone())); - - // read on follower - let mut follower_peer = None; - let peers = region.get_peers(); - for p in peers { - if p.get_id() != leader.get_id() { - follower_peer = Some(p.clone()); - break; - } - } - let follower_peer = follower_peer.unwrap(); - let addr = cluster.sim.rl().get_addr(follower_peer.get_store_id()); - - let env = Arc::new(Environment::new(1)); - let channel = ChannelBuilder::new(env).connect(&addr); - let client = TikvClient::new(channel); - - let mut ctx = Context::default(); - ctx.set_region_id(region.get_id()); - ctx.set_region_epoch(region.get_region_epoch().clone()); - ctx.set_peer(follower_peer); - - let read_index = |ranges: &[(&[u8], &[u8])]| { - let mut req = ReadIndexRequest::default(); - let start_ts = block_on(cluster.pd_client.get_tso()).unwrap(); - req.set_context(ctx.clone()); - req.set_start_ts(start_ts.into_inner()); - for &(start_key, end_key) in ranges { - let mut range = kvrpcpb::KeyRange::default(); - range.set_start_key(start_key.to_vec()); - range.set_end_key(end_key.to_vec()); - req.mut_ranges().push(range); - } - let resp = client.read_index(&req).unwrap(); - (resp, start_ts) - }; - - // wait a while until the node updates its own max ts - thread::sleep(Duration::from_millis(300)); - - let (resp, start_ts) = read_index(&[(b"l", b"yz")]); - assert!(!resp.has_locked()); - assert_eq!(leader_cm.max_ts(), start_ts); - - let (resp, start_ts) = read_index(&[(b"a", b"b"), (b"j", b"k0")]); - assert_eq!(resp.get_locked(), &lock.into_lock_info(b"k".to_vec())); - assert_eq!(leader_cm.max_ts(), start_ts); - - drop(guards); - - let (resp, start_ts) = read_index(&[(b"a", b"z")]); - assert!(!resp.has_locked()); - assert_eq!(leader_cm.max_ts(), start_ts); -} - #[test] fn test_prewrite_check_max_commit_ts() { let mut cluster = new_server_cluster(0, 1); @@ -1882,7 +1772,6 @@ fn test_tikv_forwarding() { req.set_split_key(b"k1".to_vec()); req }); - test_func_init!(client, ctx, call_opt, read_index, ReadIndexRequest); // Test if duplex can be redirect correctly. let cases = vec![ From 71980d382426b77d135c5d1c9576d363c298dc2d Mon Sep 17 00:00:00 2001 From: Hu# Date: Wed, 23 Nov 2022 08:37:57 +0800 Subject: [PATCH 0352/1149] *: remove redundant code for prewrite and commit. (#13747) ref tikv/tikv#13303 remove redundant code for prewrite and commit. Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- components/test_raftstore/src/util.rs | 16 ++- .../resource_metering/test_read_keys.rs | 26 +---- tests/integrations/server/kv_service.rs | 102 ++---------------- 3 files changed, 21 insertions(+), 123 deletions(-) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 06c2da432c0..14661344316 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -817,10 +817,15 @@ pub fn must_kv_read_equal(client: &TikvClient, ctx: Context, key: Vec, val: assert_eq!(get_resp.take_value(), val); } -// TODO: replace the redundant code -pub fn complete_data_commit(client: &TikvClient, ctx: &Context, ts: u64, k: Vec, v: Vec) { +pub fn write_and_read_key( + client: &TikvClient, + ctx: &Context, + ts: &mut u64, + k: Vec, + v: Vec, +) { // Prewrite - let prewrite_start_version = ts + 1; + let prewrite_start_version = *ts + 1; let mut mutation = Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(k.clone()); @@ -833,7 +838,7 @@ pub fn complete_data_commit(client: &TikvClient, ctx: &Context, ts: u64, k: Vec< prewrite_start_version, ); // Commit - let commit_version = ts + 2; + let commit_version = *ts + 2; must_kv_commit( client, ctx.clone(), @@ -843,7 +848,8 @@ pub fn complete_data_commit(client: &TikvClient, ctx: &Context, ts: u64, k: Vec< commit_version, ); // Get - must_kv_read_equal(client, ctx.clone(), k, v, ts + 3); + *ts += 3; + must_kv_read_equal(client, ctx.clone(), k, v, *ts); } pub fn kv_read(client: &TikvClient, ctx: Context, key: Vec, ts: u64) -> GetResponse { diff --git a/tests/integrations/resource_metering/test_read_keys.rs b/tests/integrations/resource_metering/test_read_keys.rs index 87ad50024ad..35ef0e2ba88 100644 --- a/tests/integrations/resource_metering/test_read_keys.rs +++ b/tests/integrations/resource_metering/test_read_keys.rs @@ -50,31 +50,7 @@ pub fn test_read_keys() { let (k, v) = (n.clone(), n); // Prewrite. - ts += 1; - let prewrite_start_version = ts; - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k.clone()); - mutation.set_value(v.clone()); - must_kv_prewrite( - &client, - ctx.clone(), - vec![mutation], - k.clone(), - prewrite_start_version, - ); - - // Commit. - ts += 1; - let commit_version = ts; - must_kv_commit( - &client, - ctx.clone(), - vec![k.clone()], - prewrite_start_version, - commit_version, - commit_version, - ); + write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); } // PointGet diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 7e3f718dac9..f6db3386007 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -276,33 +276,7 @@ fn test_mvcc_basic() { let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; - - // Prewrite - ts += 1; - let prewrite_start_version = ts; - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k.clone()); - mutation.set_value(v.clone()); - must_kv_prewrite( - &client, - ctx.clone(), - vec![mutation], - k.clone(), - prewrite_start_version, - ); - - // Commit - ts += 1; - let commit_version = ts; - must_kv_commit( - &client, - ctx.clone(), - vec![k.clone()], - prewrite_start_version, - commit_version, - commit_version, - ); + write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); // Get ts += 1; @@ -365,33 +339,7 @@ fn test_mvcc_rollback_and_cleanup() { let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; - - // Prewrite - ts += 1; - let prewrite_start_version = ts; - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k.clone()); - mutation.set_value(v); - must_kv_prewrite( - &client, - ctx.clone(), - vec![mutation], - k.clone(), - prewrite_start_version, - ); - - // Commit - ts += 1; - let commit_version = ts; - must_kv_commit( - &client, - ctx.clone(), - vec![k.clone()], - prewrite_start_version, - commit_version, - commit_version, - ); + write_and_read_key(&client, &ctx, &mut ts, k.clone(), v); // Prewrite puts some locks. ts += 1; @@ -607,13 +555,15 @@ fn test_mvcc_flashback_failed_after_first_batch() { for i in 0..FLASHBACK_BATCH_SIZE * 2 { // Meet the constraints of the alphabetical order for test let k = format!("key@{}", from_u32(i as u32).unwrap()).into_bytes(); - complete_data_commit(&client, &ctx, ts, k.clone(), b"value@0".to_vec()); + write_and_read_key(&client, &ctx, &mut ts, k.clone(), b"value@0".to_vec()); + ts -= 3; } ts += 3; let check_ts = ts; for i in 0..FLASHBACK_BATCH_SIZE * 2 { let k = format!("key@{}", from_u32(i as u32).unwrap()).into_bytes(); - complete_data_commit(&client, &ctx, ts, k.clone(), b"value@1".to_vec()); + write_and_read_key(&client, &ctx, &mut ts, k.clone(), b"value@1".to_vec()); + ts -= 3; } ts += 3; // Flashback @@ -716,34 +666,7 @@ fn test_mvcc_flashback() { for i in 0..2000 { let v = format!("value@{}", i).into_bytes(); let k = format!("key@{}", i % 1000).into_bytes(); - // Prewrite - ts += 1; - let prewrite_start_version = ts; - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k.clone()); - mutation.set_value(v.clone()); - must_kv_prewrite( - &client, - ctx.clone(), - vec![mutation], - k.clone(), - prewrite_start_version, - ); - // Commit - ts += 1; - let commit_version = ts; - must_kv_commit( - &client, - ctx.clone(), - vec![k.clone()], - prewrite_start_version, - commit_version, - commit_version, - ); - // Get - ts += 1; - must_kv_read_equal(&client, ctx.clone(), k.clone(), v.clone(), ts) + write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); } // Prewrite to leave a lock. let k = b"key@1".to_vec(); @@ -837,15 +760,8 @@ fn test_mvcc_flashback_block_scheduling() { fn test_mvcc_flashback_unprepared() { let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); - // Prewrite - let mut mutation = Mutation::default(); - mutation.set_op(Op::Put); - mutation.set_key(k.clone()); - mutation.set_value(v.clone()); - must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 1); - // Commit - must_kv_commit(&client, ctx.clone(), vec![k.clone()], 1, 2, 2); - must_kv_read_equal(&client, ctx.clone(), k.clone(), v.clone(), 3); + let mut ts = 0; + write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); // Try to flashback without preparing first. let mut req = FlashbackToVersionRequest::default(); req.set_context(ctx.clone()); From fd197f08314ce31860f439d2f84f7d77058745c7 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 23 Nov 2022 10:27:58 +0800 Subject: [PATCH 0353/1149] server: wait for reset-to-version task completion (#13830) close tikv/tikv#13829 Fix a segfault when executing reset-to-version with tikv-ctl Signed-off-by: tabokie --- src/server/reset_to_version.rs | 82 ++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index e1faccd9b3f..1ea98acc1c8 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -85,10 +85,7 @@ impl ResetToVersionWorker { fn next_write(&mut self) -> Result, Write)>> { if self.write_iter.valid().unwrap() { - let mut state = self - .state - .lock() - .expect("failed to lock ResetToVersionWorker::state"); + let mut state = self.state.lock().unwrap(); debug_assert!(matches!( *state, ResetToVersionState::RemovingWrite { scanned: _ } @@ -149,16 +146,14 @@ impl ResetToVersionWorker { let mut has_more = true; for _ in 0..batch_size { if self.lock_iter.valid().unwrap() { - let mut state = self - .state - .lock() - .expect("failed to lock ResetToVersionWorker::state"); - debug_assert!(matches!( - *state, - ResetToVersionState::RemovingLock { scanned: _ } - )); - *state.scanned() += 1; - drop(state); + { + let mut state = self.state.lock().unwrap(); + debug_assert!(matches!( + *state, + ResetToVersionState::RemovingLock { scanned: _ } + )); + *state.scanned() += 1; + } box_try!(wb.delete_cf(CF_LOCK, self.lock_iter.key())); self.lock_iter.next().unwrap(); @@ -197,6 +192,12 @@ impl Clone for ResetToVersionManager { } } +impl Drop for ResetToVersionManager { + fn drop(&mut self) { + self.wait(); + } +} + #[allow(dead_code)] impl ResetToVersionManager { pub fn new(engine: RocksEngine) -> Self { @@ -221,30 +222,32 @@ impl ResetToVersionManager { let mut worker = ResetToVersionWorker::new(write_iter, lock_iter, ts, self.state.clone()); let mut wb = self.engine.write_batch(); let props = tikv_util::thread_group::current_properties(); - if self.worker_handle.borrow().is_some() { - warn!("A reset-to-version process is already in progress! Wait until it finish first."); - self.wait(); - } - *self.worker_handle.borrow_mut() = Some(std::thread::Builder::new() - .name("reset_to_version".to_string()) - .spawn_wrapper(move || { - tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); + self.wait(); - while worker.process_next_batch(BATCH_SIZE, &mut wb).expect("reset_to_version failed when removing invalid writes") { - } - *worker.state.lock() - .expect("failed to lock `ResetToVersionWorker::state` in `ResetToVersionWorker::process_next_batch`") - = ResetToVersionState::RemovingLock { scanned: 0 }; - while worker.process_next_batch_lock(BATCH_SIZE, &mut wb).expect("reset_to_version failed when removing invalid locks") { - } - *worker.state.lock() - .expect("failed to lock `ResetToVersionWorker::state` in `ResetToVersionWorker::process_next_batch_lock`") - = ResetToVersionState::Done; - info!("Reset to version done!"); - tikv_alloc::remove_thread_memory_accessor(); - }) - .expect("failed to spawn reset_to_version thread")); + *self.worker_handle.borrow_mut() = Some( + std::thread::Builder::new() + .name("reset_to_version".to_string()) + .spawn_wrapper(move || { + tikv_util::thread_group::set_properties(props); + tikv_alloc::add_thread_memory_accessor(); + + while worker + .process_next_batch(BATCH_SIZE, &mut wb) + .expect("process_next_batch") + {} + *worker.state.lock().unwrap() = + ResetToVersionState::RemovingLock { scanned: 0 }; + while worker + .process_next_batch_lock(BATCH_SIZE, &mut wb) + .expect("process_next_batch_lock") + {} + *worker.state.lock().unwrap() = ResetToVersionState::Done; + info!("Reset to version done!"); + + tikv_alloc::remove_thread_memory_accessor(); + }) + .expect("failed to spawn reset_to_version thread"), + ); } /// Current process state. @@ -257,7 +260,10 @@ impl ResetToVersionManager { /// Wait until the process finished. pub fn wait(&self) { - self.worker_handle.take().unwrap().join().unwrap(); + if let Some(handle) = self.worker_handle.take() { + info!("Wait for the reset-to-version task to complete."); + handle.join().unwrap(); + } } } From 101032b97ea1ceaeef7f01459e1523fcd3c56509 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Wed, 23 Nov 2022 10:53:58 +0800 Subject: [PATCH 0354/1149] support restoring a batch of KV files (#13786) ref tikv/tikv#13788 Signed-off-by: joccau Signed-off-by: Zak Zhao <57036248+joccau@users.noreply.github.com> Co-authored-by: Ti Chi Robot --- .../external_storage/export/src/export.rs | 9 +- components/sst_importer/src/metrics.rs | 5 + src/import/sst_service.rs | 313 +++++++++++++----- 3 files changed, 239 insertions(+), 88 deletions(-) diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index a36f3eba11e..ea02ebe2c6f 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -22,13 +22,10 @@ use encryption::DataKeyManager; use external_storage::dylib_client; #[cfg(feature = "cloud-storage-grpc")] use external_storage::grpc_client; -use external_storage::{ - compression_reader_dispatcher, encrypt_wrap_reader, record_storage_create, BackendConfig, - HdfsStorage, -}; pub use external_storage::{ - read_external_storage_into_file, ExternalStorage, LocalStorage, NoopStorage, RestoreConfig, - UnpinReader, + compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_into_file, + record_storage_create, BackendConfig, ExternalStorage, HdfsStorage, LocalStorage, NoopStorage, + RestoreConfig, UnpinReader, }; use futures_io::AsyncRead; #[cfg(feature = "cloud-gcp")] diff --git a/components/sst_importer/src/metrics.rs b/components/sst_importer/src/metrics.rs index 08f095078d5..cd14f6feb56 100644 --- a/components/sst_importer/src/metrics.rs +++ b/components/sst_importer/src/metrics.rs @@ -56,6 +56,11 @@ lazy_static! { "tikv_import_download_bytes", "Bucketed histogram of importer download bytes", exponential_buckets(1024.0, 2.0, 20).unwrap() + ).unwrap(); + pub static ref IMPORTER_APPLY_BYTES: Histogram = register_histogram!( + "tikv_import_apply_bytes", + "Bucketed histogram of importer apply bytes", + exponential_buckets(1024.0, 2.0, 20).unwrap() ) .unwrap(); pub static ref IMPORTER_INGEST_DURATION: HistogramVec = register_histogram_vec!( diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index fff9c79cec2..61d181b5c2f 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -443,38 +443,82 @@ where sst_importer::metrics::IMPORTER_APPLY_DURATION .with_label_values(&["queue"]) .observe(start.saturating_elapsed().as_secs_f64()); - + let mut start_apply = Instant::now(); let mut futs = vec![]; let mut apply_resp = ApplyResponse::default(); let context = req.take_context(); - let meta = req.get_meta(); + let mut rules = req.take_rewrite_rules(); + let mut metas = req.take_metas(); + // For compatibility with old requests. + if req.has_meta() { + metas.push(req.take_meta()); + rules.push(req.take_rewrite_rule()); + } let result = (|| -> Result<()> { - let temp_file = - importer.do_download_kv_file(meta, req.get_storage_backend(), &limiter)?; - let mut reqs = RequestCollector::from_cf(meta.get_cf()); let mut cmd_reqs = vec![]; - let mut build_req_fn = build_apply_request( - raft_size.0, - &mut reqs, - cmd_reqs.as_mut(), - meta.get_is_delete(), - meta.get_cf(), - context.clone(), - ); - let range = importer.do_apply_kv_file( - meta.get_start_key(), - meta.get_end_key(), - meta.get_restore_ts(), - temp_file, - req.get_rewrite_rule(), - &mut build_req_fn, - )?; - drop(build_req_fn); - if !reqs.is_empty() { - let cmd = make_request(&mut reqs, context); + let mut reqs_default = RequestCollector::from_cf(CF_DEFAULT); + let mut reqs_write = RequestCollector::from_cf(CF_WRITE); + let mut req_default_size = 0_u64; + let mut req_write_size = 0_u64; + let mut range: Option = None; + + for (i, meta) in metas.iter().enumerate() { + let (reqs, req_size) = if meta.get_cf() == CF_DEFAULT { + (&mut reqs_default, &mut req_default_size) + } else { + (&mut reqs_write, &mut req_write_size) + }; + + let mut build_req_fn = build_apply_request( + req_size, + raft_size.0, + reqs, + cmd_reqs.as_mut(), + meta.get_is_delete(), + meta.get_cf(), + context.clone(), + ); + + let temp_file = + importer.do_download_kv_file(meta, req.get_storage_backend(), &limiter)?; + let r: Option = importer.do_apply_kv_file( + meta.get_start_key(), + meta.get_end_key(), + meta.get_restore_ts(), + temp_file, + &rules[i], + &mut build_req_fn, + )?; + + if let Some(mut r) = r { + range = match range { + Some(mut v) => { + let s = v.take_start().min(r.take_start()); + let e = v.take_end().max(r.take_end()); + Some(Range { + start: s, + end: e, + ..Default::default() + }) + } + None => Some(r), + }; + } + } + + if !reqs_default.is_empty() { + let cmd = make_request(&mut reqs_default, context.clone()); cmd_reqs.push(cmd); + IMPORTER_APPLY_BYTES.observe(req_default_size as _); } + if !reqs_write.is_empty() { + let cmd = make_request(&mut reqs_write, context); + cmd_reqs.push(cmd); + IMPORTER_APPLY_BYTES.observe(req_write_size as _); + } + + start_apply = Instant::now(); for cmd in cmd_reqs { let (cb, future) = paired_future_callback(); match router.send_command(cmd, Callback::write(cb), RaftCmdExtraOpts::default()) @@ -507,19 +551,21 @@ where if r.response.get_header().has_error() { let mut import_err = kvproto::import_sstpb::Error::default(); let err = r.response.get_header().get_error(); - import_err - .set_message("failed to complete raft command".to_string()); + import_err.set_message("failed to complete raft command".to_string()); // FIXME: if there are many errors, we may lose some of them here. - import_err - .set_store_error(err.clone()); - warn!("failed to apply the file to the store"; "error" => ?err, "file" => %meta.get_name()); + import_err.set_store_error(err.clone()); + warn!("failed to apply the file to the store"; "error" => ?err); resp.set_error(import_err); } } } resp })); + // Records how long the apply task waits to be scheduled. + sst_importer::metrics::IMPORTER_APPLY_DURATION + .with_label_values(&["apply"]) + .observe(start_apply.saturating_elapsed().as_secs_f64()); sst_importer::metrics::IMPORTER_APPLY_DURATION .with_label_values(&["finish"]) .observe(start.saturating_elapsed().as_secs_f64()); @@ -861,9 +907,9 @@ enum RequestCollector { /// This is used for write CF because resolved ts observer hates duplicated /// key in the same request. RetainLastTs(HashMap, (Request, u64)>), - /// Collector favor that simple collect all items. - /// This is used for default CF. - KeepAll(Vec), + /// Collector favor that simple collect all items, and it do not contains + /// duplicated key-value. This is used for default CF. + KeepAll(HashMap, Request>), } impl RequestCollector { @@ -879,9 +925,9 @@ impl RequestCollector { } fn accept(&mut self, req: Request) { + let k = key_from_request(&req); match self { RequestCollector::RetainLastTs(ref mut reqs) => { - let k = key_from_request(&req); let (encoded_key, ts) = match Key::split_on_ts_for(k) { Ok(k) => k, Err(err) => { @@ -897,7 +943,9 @@ impl RequestCollector { reqs.insert(encoded_key.to_owned(), (req, ts.into_inner())); } } - RequestCollector::KeepAll(ref mut a) => a.push(req), + RequestCollector::KeepAll(ref mut reqs) => { + reqs.insert(k.to_owned(), req); + } } } @@ -906,7 +954,7 @@ impl RequestCollector { RequestCollector::RetainLastTs(ref mut reqs) => { reqs.drain().map(|(_, (req, _))| req).collect() } - RequestCollector::KeepAll(ref mut reqs) => std::mem::take(reqs), + RequestCollector::KeepAll(ref mut reqs) => reqs.drain().map(|(_, req)| req).collect(), } } @@ -956,6 +1004,7 @@ fn make_request(reqs: &mut RequestCollector, context: Context) -> RaftCmdRequest // in https://github.com/tikv/tikv/blob/a401f78bc86f7e6ea6a55ad9f453ae31be835b55/components/resolved_ts/src/cmd.rs#L204 // will panic if found duplicated entry during Vec. fn build_apply_request<'a, 'b>( + req_size: &'a mut u64, raft_size: u64, reqs: &'a mut RequestCollector, cmd_reqs: &'a mut Vec, @@ -966,51 +1015,41 @@ fn build_apply_request<'a, 'b>( where 'a: 'b, { - let mut req_size = 0_u64; - // use callback to collect kv data. - if is_delete { - Box::new(move |k: Vec, _v: Vec| { - let mut req = Request::default(); - let mut del = DeleteRequest::default(); + Box::new(move |k: Vec, v: Vec| { + let mut req = Request::default(); + if is_delete { + let mut del = DeleteRequest::default(); del.set_key(k); del.set_cf(cf.to_string()); req.set_cmd_type(CmdType::Delete); req.set_delete(del); - req_size += req.compute_size() as u64; - reqs.accept(req); - // When the request size get grow to half of the max request size, - // build the request and add it to a batch. - if req_size > raft_size / 2 { - req_size = 0; - let cmd = make_request(reqs, context.clone()); - cmd_reqs.push(cmd); - } - }) - } else { - Box::new(move |k: Vec, v: Vec| { + } else { if cf == CF_WRITE && !write_needs_restore(&v) { return; } - let mut req = Request::default(); let mut put = PutRequest::default(); - put.set_key(k); put.set_value(v); put.set_cf(cf.to_string()); req.set_cmd_type(CmdType::Put); req.set_put(put); - req_size += req.compute_size() as u64; - reqs.accept(req); - if req_size > raft_size / 2 { - req_size = 0; - let cmd = make_request(reqs, context.clone()); - cmd_reqs.push(cmd); - } - }) - } + } + + // When the request size get grow to max request size, + // build the request and add it to a batch. + if *req_size + req.compute_size() as u64 > raft_size * 7 / 8 { + IMPORTER_APPLY_BYTES.observe(*req_size as _); + *req_size = 0; + let cmd = make_request(reqs, context.clone()); + cmd_reqs.push(cmd); + } + + *req_size += req.compute_size() as u64; + reqs.accept(req); + }) } fn write_needs_restore(write: &[u8]) -> bool { @@ -1063,23 +1102,42 @@ mod test { fn default_req(key: &[u8], val: &[u8], start_ts: u64) -> Request { let (k, v) = default(key, val, start_ts); - req(k, v, CF_DEFAULT) + req(k, v, CF_DEFAULT, CmdType::Put) } fn write_req(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> Request { let (k, v) = write(key, ty, commit_ts, start_ts); - req(k, v, CF_WRITE) + let cmd_type = if ty == WriteType::Delete { + CmdType::Delete + } else { + CmdType::Put + }; + + req(k, v, CF_WRITE, cmd_type) } - fn req(k: Vec, v: Vec, cf: &str) -> Request { + fn req(k: Vec, v: Vec, cf: &str, cmd_type: CmdType) -> Request { let mut req = Request::default(); - let mut put = PutRequest::default(); + req.set_cmd_type(cmd_type); + + match cmd_type { + CmdType::Put => { + let mut put = PutRequest::default(); + put.set_key(k); + put.set_value(v); + put.set_cf(cf.to_string()); - put.set_key(k); - put.set_value(v); - put.set_cf(cf.to_string()); - req.set_cmd_type(CmdType::Put); - req.set_put(put); + req.set_put(put) + } + CmdType::Delete => { + let mut del = DeleteRequest::default(); + del.set_cf(cf.to_string()); + del.set_key(k); + + req.set_delete(del); + } + _ => panic!("invalid input cmd_type"), + } req } @@ -1088,26 +1146,36 @@ mod test { #[derive(Debug)] struct Case { cf: &'static str, + is_delete: bool, mutations: Vec<(Vec, Vec)>, expected_reqs: Vec, } fn run_case(c: &Case) { - let mut v = vec![]; - let mut coll = RequestCollector::from_cf(c.cf); - let mut builder = - build_apply_request(1024, &mut coll, &mut v, false, c.cf, Context::new()); + let mut cmds = vec![]; + let mut reqs = RequestCollector::from_cf(c.cf); + let mut req_size = 0_u64; + + let mut builder = build_apply_request( + &mut req_size, + 1024, + &mut reqs, + &mut cmds, + c.is_delete, + c.cf, + Context::new(), + ); for (k, v) in c.mutations.clone() { builder(k, v); } drop(builder); - if !coll.is_empty() { - let cmd = make_request(&mut coll, Context::new()); - v.push(cmd); + if !reqs.is_empty() { + let cmd = make_request(&mut reqs, Context::new()); + cmds.push(cmd); } - let mut req1: HashMap<_, _> = v + let mut req1: HashMap<_, _> = cmds .into_iter() .flat_map(|mut x| x.take_requests().into_iter()) .map(|req| { @@ -1126,12 +1194,13 @@ mod test { let cases = vec![ Case { cf: CF_WRITE, + is_delete: false, mutations: vec![ write(b"foo", Lock, 42, 41), write(b"foo", Put, 40, 39), write(b"bar", Put, 38, 37), write(b"baz", Put, 34, 31), - write(b"bar", Delete, 28, 17), + write(b"bar", Put, 28, 17), ], expected_reqs: vec![ write_req(b"foo", Put, 40, 39), @@ -1139,8 +1208,24 @@ mod test { write_req(b"baz", Put, 34, 31), ], }, + Case { + cf: CF_WRITE, + is_delete: true, + mutations: vec![ + write(b"foo", Delete, 40, 39), + write(b"bar", Delete, 38, 37), + write(b"baz", Delete, 34, 31), + write(b"bar", Delete, 28, 17), + ], + expected_reqs: vec![ + write_req(b"foo", Delete, 40, 39), + write_req(b"bar", Delete, 38, 37), + write_req(b"baz", Delete, 34, 31), + ], + }, Case { cf: CF_DEFAULT, + is_delete: false, mutations: vec![ default(b"aria", b"The planet where flowers bloom.", 123), default( @@ -1149,6 +1234,7 @@ mod test { 178, ), default(b"beyond", b"Calling your name.", 278), + default(b"beyond", b"Calling your name.", 278), ], expected_reqs: vec![ default_req(b"aria", b"The planet where flowers bloom.", 123), @@ -1166,4 +1252,67 @@ mod test { run_case(&case); } } + + #[test] + fn test_request_collector_with_write_cf() { + let mut request_collector = RequestCollector::from_cf(CF_WRITE); + assert_eq!(request_collector.is_empty(), true); + let reqs = vec![ + write_req(b"foo", WriteType::Put, 40, 39), + write_req(b"aar", WriteType::Put, 38, 37), + write_req(b"foo", WriteType::Put, 34, 31), + write_req(b"zzz", WriteType::Put, 41, 40), + ]; + let reqs_result = vec![ + write_req(b"aar", WriteType::Put, 38, 37), + write_req(b"foo", WriteType::Put, 40, 39), + write_req(b"zzz", WriteType::Put, 41, 40), + ]; + + for req in reqs { + request_collector.accept(req); + } + assert_eq!(request_collector.is_empty(), false); + let mut reqs = request_collector.drain(); + reqs.sort_by(|r1, r2| { + let k1 = key_from_request(r1); + let k2 = key_from_request(r2); + k1.cmp(k2) + }); + assert_eq!(reqs, reqs_result); + assert_eq!(request_collector.is_empty(), true); + } + + #[test] + fn test_request_collector_with_default_cf() { + let mut request_collector = RequestCollector::from_cf(CF_DEFAULT); + assert_eq!(request_collector.is_empty(), true); + let reqs = vec![ + default_req(b"foo", b"", 39), + default_req(b"zzz", b"", 40), + default_req(b"foo", b"", 37), + default_req(b"foo", b"", 39), + ]; + let reqs_result = vec![ + default_req(b"foo", b"", 37), + default_req(b"foo", b"", 39), + default_req(b"zzz", b"", 40), + ]; + + for req in reqs { + request_collector.accept(req); + } + assert_eq!(request_collector.is_empty(), false); + let mut reqs = request_collector.drain(); + reqs.sort_by(|r1, r2| { + let k1 = key_from_request(r1); + let (k1, ts1) = Key::split_on_ts_for(k1).unwrap(); + let k2 = key_from_request(r2); + let (k2, ts2) = Key::split_on_ts_for(k2).unwrap(); + + k1.cmp(k2).then(ts1.cmp(&ts2)) + }); + assert_eq!(reqs, reqs_result); + assert_eq!(request_collector.is_empty(), true); + } } From 8ce818be228796492eb843048d2996b75a4d714d Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Wed, 23 Nov 2022 13:41:57 +0800 Subject: [PATCH 0355/1149] txn: revert "calculate last_change_ts in rollback" (#13834) ref tikv/tikv#13694, ref tikv/tikv#13749 This commit reverts #13749 (calculate last_change_ts in rollback). If we calculate last_change_ts, consider the following case: Key k has a write record with commit_ts = 5. 1. Prewrite k, start_ts = 10 2. Rollback k, start_ts = 30, last_commit_ts = 5. 3. Commit k, start_ts = 10, commit_ts = 20 Then, read with ts = 40, it will get an incorrect last_commit_ts from the rollback record. There is no easy way to handle the rollback case. I choose to give up calculating it. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- src/storage/mvcc/reader/reader.rs | 1 - src/storage/mvcc/reader/scanner/forward.rs | 11 +- src/storage/txn/actions/check_txn_status.rs | 41 +----- .../txn/commands/check_secondary_locks.rs | 129 +----------------- src/storage/txn/commands/check_txn_status.rs | 108 ++------------- 5 files changed, 23 insertions(+), 267 deletions(-) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index c8ca1a5f671..a6aae85761f 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -2541,7 +2541,6 @@ pub mod tests { engine.commit(k, 1, 2); // Write enough ROLLBACK/LOCK recrods - engine.rollback(k, 5); for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 12300187739..6672842fab9 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -1625,7 +1625,8 @@ mod latest_kv_tests { for start_ts in (10..30).into_iter().step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); - must_rollback(&mut engine, b"k3", start_ts + 1, true); + must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); + must_commit(&mut engine, b"k3", start_ts, start_ts + 1); } must_prewrite_put(&mut engine, b"k1", b"v13", b"k1", 40); @@ -1647,10 +1648,10 @@ mod latest_kv_tests { // k2 | 46 | PUT | v22 // k2 | 6 | PUT | v21 // k3 | 47 | PUT | v32 - // k3 | 29 | ROLLBACK | - // k3 | 27 | ROLLBACK | - // k3 | ... | ROLLBACK | - // k3 | 11 | ROLLBACK | + // k3 | 29 | LOCK | + // k3 | 27 | LOCK | + // k3 | ... | LOCK | + // k3 | 11 | LOCK | // k3 | 7 | PUT | v31 let snapshot = engine.snapshot(Default::default()).unwrap(); diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index 126c34ade92..4c900e5a438 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -8,7 +8,6 @@ use crate::storage::{ metrics::MVCC_CHECK_TXN_STATUS_COUNTER_VEC, reader::OverlappedWrite, ErrorInner, LockType, MvccTxn, ReleasedLock, Result, SnapshotReader, TxnCommitRecord, }, - txn::{sched_pool::tls_can_enable, scheduler::LAST_CHANGE_TS}, Snapshot, TxnStatus, }; @@ -135,8 +134,7 @@ pub fn check_txn_status_missing_lock( // Insert a Rollback to Write CF in case that a stale prewrite // command is received after a cleanup command. - if let Some(mut write) = action.construct_write(ts, overlapped_write) { - update_last_change_for_rollback(reader, &mut write, &primary_key, ts)?; + if let Some(write) = action.construct_write(ts, overlapped_write) { txn.put_write(primary_key, ts, write.as_ref().to_bytes()); } MVCC_CHECK_TXN_STATUS_COUNTER_VEC.rollback.inc(); @@ -170,8 +168,7 @@ pub fn rollback_lock( // Only the primary key of a pessimistic transaction needs to be protected. let protected: bool = is_pessimistic_txn && key.is_encoded_from(&lock.primary); - if let Some(mut write) = make_rollback(reader.start_ts, protected, overlapped_write) { - update_last_change_for_rollback(reader, &mut write, &key, lock.ts)?; + if let Some(write) = make_rollback(reader.start_ts, protected, overlapped_write) { txn.put_write(key.clone(), reader.start_ts, write.as_ref().to_bytes()); } @@ -195,40 +192,6 @@ pub fn collapse_prev_rollback( Ok(()) } -/// Updates the last_change_ts of a new Rollback record. -/// -/// When writing a new Rollback record, we don't always know about the -/// information about the last change. So, we will call `seek_write` again to -/// calculate the last_change_ts. -/// -/// The `seek_write` here is usually cheap because this functions is typically -/// called after `get_txn_commit_record` and `get_txn_commit_record` should have -/// moved the cursor around the record we want. -pub fn update_last_change_for_rollback( - reader: &mut SnapshotReader, - write: &mut Write, - key: &Key, - ts: TimeStamp, -) -> Result<()> { - // Also update the last_change_ts if we are writing an overlapped rollback to a - // LOCK record. Actually, because overlapped rollbacks are rare, it does not - // solve the inaccuracy caused by inserted rollback (and we don't intend it - // because it's uncommon). Just do it when it happens. - if tls_can_enable(LAST_CHANGE_TS) - && (write.write_type == WriteType::Lock || write.write_type == WriteType::Rollback) - { - if let Some((commit_ts, w)) = reader.seek_write(key, ts)? { - // Even with collapsed rollback, the deleted rollbacks will become tombstones - // which we probably need to skip them one by one. That's why we always use - // `next_last_change_info` here to calculate and count them in - // `versions_to_last_change`. - (write.last_change_ts, write.versions_to_last_change) = - w.next_last_change_info(commit_ts); - } - } - Ok(()) -} - /// Generate the Write record that should be written that means to perform a /// specified rollback operation. pub fn make_rollback( diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 71adda7a274..4802535c054 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -8,9 +8,7 @@ use crate::storage::{ lock_manager::LockManager, mvcc::{LockType, MvccTxn, SnapshotReader, TimeStamp, TxnCommitRecord}, txn::{ - actions::check_txn_status::{ - collapse_prev_rollback, make_rollback, update_last_change_for_rollback, - }, + actions::check_txn_status::{collapse_prev_rollback, make_rollback}, commands::{ Command, CommandExt, ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, @@ -121,10 +119,7 @@ impl WriteCommand for CheckSecondaryLocks { } // We must protect this rollback in case this rollback is collapsed and a stale // acquire_pessimistic_lock and prewrite succeed again. - if let Some(mut write) = - make_rollback(self.start_ts, true, rollback_overlapped_write) - { - update_last_change_for_rollback(&mut reader, &mut write, &key, self.start_ts)?; + if let Some(write) = make_rollback(self.start_ts, true, rollback_overlapped_write) { txn.put_write(key.clone(), self.start_ts, write.as_ref().to_bytes()); collapse_prev_rollback(&mut txn, &mut reader, &key)?; } @@ -170,20 +165,14 @@ impl WriteCommand for CheckSecondaryLocks { pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::Context; - use tikv_kv::Statistics; use tikv_util::deadline::Deadline; - use txn_types::Mutation; use super::*; use crate::storage::{ kv::TestEngineBuilder, lock_manager::MockLockManager, mvcc::tests::*, - txn::{ - commands::{test_util::prewrite_with_cm, WriteCommand}, - scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, - tests::*, - }, + txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, Engine, }; @@ -354,116 +343,4 @@ pub mod tests { } must_get_overlapped_rollback(&mut engine, b"k1", 15, 13, WriteType::Lock, Some(0)); } - - // The main logic is almost identical to - // test_rollback_calculate_last_change_info of check_txn_status. But the small - // differences about handling lock CF make it difficult to reuse code. - #[test] - fn test_rollback_calculate_last_change_info() { - use pd_client::FeatureGate; - - use crate::storage::txn::sched_pool::set_tls_feature_gate; - - let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - let cm = ConcurrencyManager::new(1.into()); - let k = b"k"; - let mut statistics = Statistics::default(); - - must_prewrite_put(&mut engine, k, b"v1", k, 5); - must_commit(&mut engine, k, 5, 6); - must_prewrite_put(&mut engine, k, b"v2", k, 7); - must_commit(&mut engine, k, 7, 8); - must_prewrite_put(&mut engine, k, b"v3", k, 30); - must_commit(&mut engine, k, 30, 35); - - // TiKV 6.4 should not write last_change_ts. - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.4.0").unwrap(); - set_tls_feature_gate(feature_gate); - must_success(&mut engine, k, 40, SecondaryLocksStatus::RolledBack); - let rollback = must_written(&mut engine, k, 40, 40, WriteType::Rollback); - assert!(rollback.last_change_ts.is_zero()); - assert_eq!(rollback.versions_to_last_change, 0); - - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.5.0").unwrap(); - set_tls_feature_gate(feature_gate); - - must_prewrite_put(&mut engine, k, b"v4", k, 45); - must_commit(&mut engine, k, 45, 50); - - // Rollback when there is no lock; prev writes: - // - 50: PUT - must_success(&mut engine, k, 55, SecondaryLocksStatus::RolledBack); - let rollback = must_written(&mut engine, k, 55, 55, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 50.into()); - assert_eq!(rollback.versions_to_last_change, 1); - - // Write a LOCK; prev writes: - // - 55: ROLLBACK - // - 50: PUT - let res = prewrite_with_cm( - &mut engine, - cm, - &mut statistics, - vec![Mutation::make_lock(Key::from_raw(k))], - k.to_vec(), - 60, - Some(70), - ) - .unwrap(); - assert!(!res.one_pc_commit_ts.is_zero()); - let lock_commit_ts = res.one_pc_commit_ts; - let lock = must_written(&mut engine, k, 60, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(lock.last_change_ts, 50.into()); - assert_eq!(lock.versions_to_last_change, 2); - - // Write another ROLLBACK by rolling back a pessimistic lock; prev writes: - // - 61: LOCK - // - 55: ROLLBACK - // - 50: PUT - must_acquire_pessimistic_lock(&mut engine, k, b"pk", 70, 75); - must_success(&mut engine, k, 70, SecondaryLocksStatus::RolledBack); - let rollback = must_written(&mut engine, k, 70, 70, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 50.into()); - assert_eq!(rollback.versions_to_last_change, 3); - - // last_change_ts should point to the latest record before start_ts; prev - // writes: - // - 8: PUT - must_acquire_pessimistic_lock(&mut engine, k, k, 10, 75); - must_success(&mut engine, k, 10, SecondaryLocksStatus::RolledBack); - must_unlocked(&mut engine, k); - let rollback = must_written(&mut engine, k, 10, 10, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 8.into()); - assert_eq!(rollback.versions_to_last_change, 1); - - // Overlapped rollback should not update the last_change_ts of PUT; prev writes: - // - 8: PUT <- rollback overlaps - // - 6: PUT - must_success(&mut engine, k, 8, SecondaryLocksStatus::RolledBack); - let put = must_written(&mut engine, k, 7, 8, WriteType::Put); - assert!(put.last_change_ts.is_zero()); - assert_eq!(put.versions_to_last_change, 0); - assert!(put.has_overlapped_rollback); - - // Overlapped rollback can update the last_change_ts of LOCK; writes: - // - 61: PUT <- rollback overlaps - // - 57: ROLLBACK (inserted later) - // - 55: ROLLBACK - // - 50: PUT - must_rollback(&mut engine, k, 57, true); - let rollback = must_written(&mut engine, k, 57, 57, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 50.into()); - assert_eq!(rollback.versions_to_last_change, 2); - must_success( - &mut engine, - k, - lock_commit_ts, - SecondaryLocksStatus::RolledBack, - ); - let lock = must_written(&mut engine, k, 60, lock_commit_ts, WriteType::Lock); - assert_eq!(lock.last_change_ts, 50.into()); - assert_eq!(lock.versions_to_last_change, 3); - } } diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index a118769a5db..34948109f4b 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -144,9 +144,8 @@ impl WriteCommand for CheckTxnStatus { pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::{Context, PrewriteRequestPessimisticAction::*}; - use tikv_kv::Statistics; use tikv_util::deadline::Deadline; - use txn_types::{Key, Mutation, WriteType}; + use txn_types::{Key, WriteType}; use super::{TxnStatus::*, *}; use crate::storage::{ @@ -154,9 +153,7 @@ pub mod tests { lock_manager::MockLockManager, mvcc::tests::*, txn::{ - commands::{ - pessimistic_rollback, test_util::prewrite_with_cm, WriteCommand, WriteContext, - }, + commands::{pessimistic_rollback, WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, }, @@ -1169,105 +1166,24 @@ pub mod tests { #[test] fn test_rollback_calculate_last_change_info() { - use pd_client::FeatureGate; - - use crate::storage::txn::sched_pool::set_tls_feature_gate; - let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); - let cm = ConcurrencyManager::new(1.into()); let k = b"k"; - let mut statistics = Statistics::default(); + + // Below is a case explaining why we don't calculate last_change_ts for + // rollback. must_prewrite_put(&mut engine, k, b"v1", k, 5); must_commit(&mut engine, k, 5, 6); + must_prewrite_put(&mut engine, k, b"v2", k, 7); + // When we calculate last_change_ts here, we will get 6. + must_rollback(&mut engine, k, 10, true); + // But we can still commit with ts 8, then the last_change_ts of the rollback + // will be incorrect. must_commit(&mut engine, k, 7, 8); - must_prewrite_put(&mut engine, k, b"v3", k, 30); - must_commit(&mut engine, k, 30, 35); - - // TiKV 6.4 should not write last_change_ts. - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.4.0").unwrap(); - set_tls_feature_gate(feature_gate); - must_rollback(&mut engine, k, 40, true); - let rollback = must_written(&mut engine, k, 40, 40, WriteType::Rollback); - assert!(rollback.last_change_ts.is_zero()); - assert_eq!(rollback.versions_to_last_change, 0); - - let feature_gate = FeatureGate::default(); - feature_gate.set_version("6.5.0").unwrap(); - set_tls_feature_gate(feature_gate); - must_prewrite_put(&mut engine, k, b"v4", k, 45); - must_commit(&mut engine, k, 45, 50); - - // Rollback when there is no lock; prev writes: - // - 50: PUT - must_rollback(&mut engine, k, 55, true); - let rollback = must_written(&mut engine, k, 55, 55, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 50.into()); - assert_eq!(rollback.versions_to_last_change, 1); - - // Write a LOCK; prev writes: - // - 55: ROLLBACK - // - 50: PUT - let res = prewrite_with_cm( - &mut engine, - cm, - &mut statistics, - vec![Mutation::make_lock(Key::from_raw(k))], - k.to_vec(), - 60, - Some(70), - ) - .unwrap(); - assert!(!res.one_pc_commit_ts.is_zero()); - let lock_commit_ts = res.one_pc_commit_ts; - let lock = must_written(&mut engine, k, 60, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(lock.last_change_ts, 50.into()); - assert_eq!(lock.versions_to_last_change, 2); - - // Write another ROLLBACK; prev writes: - // - 61: LOCK - // - 55: ROLLBACK - // - 50: PUT - must_rollback(&mut engine, k, 70, true); - let rollback = must_written(&mut engine, k, 70, 70, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 50.into()); - assert_eq!(rollback.versions_to_last_change, 3); - - // last_change_ts should point to the latest record before start_ts; prev - // writes: - // - 8: PUT - must_acquire_pessimistic_lock(&mut engine, k, k, 10, 75); - must_pessimistic_prewrite_put(&mut engine, k, b"v5", k, 10, 75, DoPessimisticCheck); - must_rollback(&mut engine, k, 10, true); - must_unlocked(&mut engine, k); let rollback = must_written(&mut engine, k, 10, 10, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 8.into()); - assert_eq!(rollback.versions_to_last_change, 1); - - // Overlapped rollback should not update the last_change_ts of PUT; prev writes: - // - 8: PUT <- rollback overlaps - // - 6: PUT - must_rollback(&mut engine, k, 8, true); - let put = must_written(&mut engine, k, 7, 8, WriteType::Put); - assert!(put.last_change_ts.is_zero()); - assert_eq!(put.versions_to_last_change, 0); - assert!(put.has_overlapped_rollback); - - // Overlapped rollback can update the last_change_ts of LOCK; writes: - // - 61: PUT <- rollback overlaps - // - 57: ROLLBACK (inserted later) - // - 55: ROLLBACK - // - 50: PUT - must_rollback(&mut engine, k, 57, true); - let rollback = must_written(&mut engine, k, 57, 57, WriteType::Rollback); - assert_eq!(rollback.last_change_ts, 50.into()); - assert_eq!(rollback.versions_to_last_change, 2); - must_rollback(&mut engine, k, lock_commit_ts, true); - let lock = must_written(&mut engine, k, 60, lock_commit_ts, WriteType::Lock); - assert_eq!(lock.last_change_ts, 50.into()); - assert_eq!(lock.versions_to_last_change, 3); + assert!(rollback.last_change_ts.is_zero()); + assert_eq!(rollback.versions_to_last_change, 0); } } From 3ab299cff9273ac53e9f57b751c3f45e116c5958 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 23 Nov 2022 14:07:58 +0800 Subject: [PATCH 0356/1149] tikv_kv: make async_snapshot return future (#13836) ref tikv/tikv#13827 so we can reduce allocation and keep compatible with both v1 and v2. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/tikv_kv/src/btree_engine.rs | 11 +- components/tikv_kv/src/lib.rs | 43 +++---- components/tikv_kv/src/mock_engine.rs | 5 +- components/tikv_kv/src/rocksdb_engine.rs | 36 +++--- src/lib.rs | 1 + src/server/gc_worker/gc_worker.rs | 36 +++--- src/server/raftkv.rs | 126 +++++++++++---------- src/storage/mod.rs | 18 ++- tests/benches/misc/raftkv/mod.rs | 10 +- tests/failpoints/cases/test_coprocessor.rs | 4 +- 10 files changed, 144 insertions(+), 146 deletions(-) diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index 473b993bf39..45ce6a6ffe8 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -14,6 +14,7 @@ use std::{ use collections::HashMap; use engine_panic::PanicEngine; use engine_traits::{CfName, IterOptions, ReadOptions, CF_DEFAULT, CF_LOCK, CF_WRITE}; +use futures::Future; use kvproto::kvrpcpb::Context; use txn_types::{Key, Value}; @@ -100,15 +101,11 @@ impl Engine for BTreeEngine { Ok(()) } + type SnapshotRes = impl Future> + Send; /// warning: It returns a fake snapshot whose content will be affected by /// the later modifies! - fn async_snapshot( - &mut self, - _ctx: SnapContext<'_>, - cb: EngineCallback, - ) -> EngineResult<()> { - cb(Ok(BTreeEngineSnapshot::new(self))); - Ok(()) + fn async_snapshot(&mut self, _ctx: SnapContext<'_>) -> Self::SnapshotRes { + futures::future::ready(Ok(BTreeEngineSnapshot::new(self))) } } diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 3e15b399796..b5f19832419 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -6,6 +6,7 @@ //! [`RocksEngine`](RocksEngine) are used for testing only. #![feature(min_specialization)] +#![feature(type_alias_impl_trait)] #[macro_use(fail_point)] extern crate fail; @@ -35,7 +36,7 @@ use engine_traits::{ CF_DEFAULT, CF_LOCK, }; use error_code::{self, ErrorCode, ErrorCodeExt}; -use futures::{future::BoxFuture, prelude::*}; +use futures::{compat::Future01CompatExt, future::BoxFuture, prelude::*}; use into_other::IntoOther; use kvproto::{ errorpb::Error as ErrorHeader, @@ -45,7 +46,7 @@ use kvproto::{ use pd_client::BucketMeta; use raftstore::store::{PessimisticLockPair, TxnExt}; use thiserror::Error; -use tikv_util::{deadline::Deadline, escape, time::ThreadReadId}; +use tikv_util::{deadline::Deadline, escape, time::ThreadReadId, timer::GLOBAL_TIMER_HANDLE}; use tracker::with_tls_tracker; use txn_types::{Key, PessimisticLock, TimeStamp, TxnExtra, Value}; @@ -61,7 +62,7 @@ pub use self::{ }; pub const SEEK_BOUND: u64 = 8; -const DEFAULT_TIMEOUT_SECS: u64 = 5; +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(5); pub type Callback = Box) + Send>; pub type ExtCallback = Box; @@ -277,7 +278,8 @@ pub trait Engine: Send + Clone + 'static { /// region_modifies records each region's modifications. fn modify_on_kv_engine(&self, region_modifies: HashMap>) -> Result<()>; - fn async_snapshot(&mut self, ctx: SnapContext<'_>, cb: Callback) -> Result<()>; + type SnapshotRes: Future> + Send + 'static; + fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes; /// Precheck request which has write with it's context. fn precheck_write_with_ctx(&self, _ctx: &Context) -> Result<()> { @@ -302,17 +304,21 @@ pub trait Engine: Send + Clone + 'static { } fn write(&self, ctx: &Context, batch: WriteData) -> Result<()> { - let timeout = Duration::from_secs(DEFAULT_TIMEOUT_SECS); - wait_op!(|cb| self.async_write(ctx, batch, cb), timeout) - .unwrap_or_else(|| Err(Error::from(ErrorInner::Timeout(timeout)))) + wait_op!(|cb| self.async_write(ctx, batch, cb), DEFAULT_TIMEOUT) + .unwrap_or_else(|| Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT)))) } fn release_snapshot(&mut self) {} fn snapshot(&mut self, ctx: SnapContext<'_>) -> Result { - let timeout = Duration::from_secs(DEFAULT_TIMEOUT_SECS); - wait_op!(|cb| self.async_snapshot(ctx, cb), timeout) - .unwrap_or_else(|| Err(Error::from(ErrorInner::Timeout(timeout)))) + let deadline = Instant::now() + DEFAULT_TIMEOUT; + let timeout = GLOBAL_TIMER_HANDLE.delay(deadline).compat(); + futures::executor::block_on(async move { + futures::select! { + res = self.async_snapshot(ctx).fuse() => res, + _ = timeout.fuse() => Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT))), + } + }) } fn put(&self, ctx: &Context, key: Key, value: Value) -> Result<()> { @@ -598,15 +604,10 @@ pub fn snapshot( ctx: SnapContext<'_>, ) -> impl std::future::Future> { let begin = Instant::now(); - let (callback, future) = - tikv_util::future::paired_must_called_future_callback(drop_snapshot_callback::); - let val = engine.async_snapshot(ctx, callback); + let val = engine.async_snapshot(ctx); // make engine not cross yield point async move { - val?; // propagate error - let result = future - .map_err(|cancel| Error::from(ErrorInner::Other(box_err!(cancel)))) - .await?; + let result = val.await; with_tls_tracker(|tracker| { tracker.metrics.get_snapshot_nanos += begin.elapsed().as_nanos() as u64; }); @@ -615,14 +616,6 @@ pub fn snapshot( } } -pub fn drop_snapshot_callback() -> Result { - let bt = backtrace::Backtrace::new(); - warn!("async snapshot callback is dropped"; "backtrace" => ?bt); - let mut err = ErrorHeader::default(); - err.set_message("async snapshot callback is dropped".to_string()); - Err(Error::from(ErrorInner::Request(err))) -} - /// Write modifications into a `BaseRocksEngine` instance. pub fn write_modifies(kv_engine: &impl LocalEngine, modifies: Vec) -> Result<()> { fail_point!("rockskv_write_modifies", |_| Err(box_err!("write failed"))); diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index 84605a04084..376c2d1fb1f 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -157,8 +157,9 @@ impl Engine for MockEngine { self.base.modify_on_kv_engine(region_modifies) } - fn async_snapshot(&mut self, ctx: SnapContext<'_>, cb: Callback) -> Result<()> { - self.base.async_snapshot(ctx, cb) + type SnapshotRes = ::SnapshotRes; + fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes { + self.base.async_snapshot(ctx) } fn async_write(&self, ctx: &Context, batch: WriteData, write_cb: Callback<()>) -> Result<()> { diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 0ef9b5b274c..8b0dd28646a 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -18,6 +18,7 @@ use engine_traits::{ CfName, Engines, IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, }; use file_system::IoRateLimiter; +use futures::{channel::oneshot, Future}; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb}; use raftstore::coprocessor::CoprocessorHost; use tempfile::{Builder, TempDir}; @@ -34,7 +35,7 @@ const TEMP_DIR: &str = ""; enum Task { Write(Vec, Callback<()>), - Snapshot(Callback>), + Snapshot(oneshot::Sender>), Pause(Duration), } @@ -56,7 +57,9 @@ impl Runnable for Runner { fn run(&mut self, t: Task) { match t { Task::Write(modifies, cb) => cb(write_modifies(&self.0.kv, modifies)), - Task::Snapshot(cb) => cb(Ok(Arc::new(self.0.kv.snapshot()))), + Task::Snapshot(sender) => { + let _ = sender.send(Arc::new(self.0.kv.snapshot())); + } Task::Pause(dur) => std::thread::sleep(dur), } } @@ -253,18 +256,23 @@ impl Engine for RocksEngine { Ok(()) } - fn async_snapshot(&mut self, _: SnapContext<'_>, cb: Callback) -> Result<()> { - fail_point!("rockskv_async_snapshot", |_| Err(box_err!( - "snapshot failed" - ))); - fail_point!("rockskv_async_snapshot_not_leader", |_| { - Err(self.not_leader_error()) - }); - if self.not_leader.load(Ordering::SeqCst) { - return Err(self.not_leader_error()); - } - box_try!(self.sched.schedule(Task::Snapshot(cb))); - Ok(()) + type SnapshotRes = impl Future> + Send; + fn async_snapshot(&mut self, _: SnapContext<'_>) -> Self::SnapshotRes { + let res = (|| { + fail_point!("rockskv_async_snapshot", |_| Err(box_err!( + "snapshot failed" + ))); + if self.not_leader.load(Ordering::SeqCst) { + return Err(self.not_leader_error()); + } + let (tx, rx) = oneshot::channel(); + if self.sched.schedule(Task::Snapshot(tx)).is_err() { + return Err(box_err!("failed to schedule snapshot")); + } + Ok(rx) + })(); + + async move { Ok(res?.await.unwrap()) } } } diff --git a/src/lib.rs b/src/lib.rs index f4fcd1cd97c..43d5db81458 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ #![feature(drain_filter)] #![feature(deadline_api)] #![feature(let_chains)] +#![feature(type_alias_impl_trait)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 9e3f79654bc..9c3c289ecf7 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1287,6 +1287,7 @@ pub mod test_gc_worker { use collections::HashMap; use engine_rocks::{RocksEngine, RocksSnapshot}; + use futures::Future; use kvproto::{ kvrpcpb::Context, metapb::{Peer, Region}, @@ -1378,22 +1379,16 @@ pub mod test_gc_worker { self.0.async_write(ctx, batch, callback) } - fn async_snapshot( - &mut self, - ctx: SnapContext<'_>, - callback: EngineCallback, - ) -> EngineResult<()> { - self.0.async_snapshot( - ctx, - Box::new(move |r| { - callback(r.map(|snap| { - let mut region = Region::default(); - // Add a peer to pass initialized check. - region.mut_peers().push(Peer::default()); - RegionSnapshot::from_snapshot(snap, Arc::new(region)) - })) - }), - ) + type SnapshotRes = impl Future> + Send; + fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes { + let f = self.0.async_snapshot(ctx); + async move { + let snap = f.await?; + let mut region = Region::default(); + // Add a peer to pass initialized check. + region.mut_peers().push(Peer::default()); + Ok(RegionSnapshot::from_snapshot(snap, Arc::new(region))) + } } } @@ -1441,18 +1436,15 @@ pub mod test_gc_worker { self.engines.lock().unwrap()[&ctx.region_id].async_write(ctx, batch, callback) } - fn async_snapshot( - &mut self, - ctx: SnapContext<'_>, - callback: EngineCallback, - ) -> EngineResult<()> { + type SnapshotRes = impl Future> + Send; + fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes { let region_id = ctx.pb_ctx.region_id; self.engines .lock() .unwrap() .get_mut(®ion_id) .unwrap() - .async_snapshot(ctx, callback) + .async_snapshot(ctx) } } } diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index 8bef31eaebd..6dc84f951ee 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -15,7 +15,7 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; -use futures::future::BoxFuture; +use futures::{future::BoxFuture, Future}; use kvproto::{ errorpb, kvrpcpb::{Context, IsolationLevel}, @@ -41,7 +41,11 @@ use raftstore::{ }; use thiserror::Error; use tikv_kv::write_modifies; -use tikv_util::{codec::number::NumberEncoder, future::paired_future_callback, time::Instant}; +use tikv_util::{ + codec::number::NumberEncoder, + future::{paired_future_callback, paired_must_called_future_callback}, + time::Instant, +}; use txn_types::{Key, TimeStamp, TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::metrics::*; @@ -196,6 +200,14 @@ fn exec_admin>( }) } +pub fn drop_snapshot_callback() -> kv::Result { + let bt = backtrace::Backtrace::new(); + warn!("async snapshot callback is dropped"; "backtrace" => ?bt); + let mut err = errorpb::Error::default(); + err.set_message("async snapshot callback is dropped".to_string()); + Err(kv::Error::from(kv::ErrorInner::Request(err))) +} + /// `RaftKv` is a storage engine base on `RaftStore`. #[derive(Clone)] pub struct RaftKv @@ -228,41 +240,6 @@ where self.txn_extra_scheduler = Some(txn_extra_scheduler); } - fn exec_snapshot( - &mut self, - ctx: SnapContext<'_>, - req: Request, - cb: Callback>, - ) -> Result<()> { - let mut header = new_request_header(ctx.pb_ctx); - let mut flags = 0; - if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { - let mut data = [0u8; 8]; - (&mut data[..]) - .encode_u64(ctx.start_ts.unwrap_or_default().into_inner()) - .unwrap(); - flags |= WriteBatchFlags::STALE_READ.bits(); - header.set_flag_data(data.into()); - } - if ctx.for_flashback { - flags |= WriteBatchFlags::FLASHBACK.bits(); - } - header.set_flags(flags); - - let mut cmd = RaftCmdRequest::default(); - cmd.set_header(header); - cmd.set_requests(vec![req].into()); - self.router - .read( - ctx.read_id, - cmd, - StoreCallback::read(Box::new(move |resp| { - cb(on_read_result(resp).map_err(Error::into)); - })), - ) - .map_err(From::from) - } - fn exec_write_requests( &self, ctx: &Context, @@ -462,14 +439,14 @@ where }) } - fn async_snapshot( - &mut self, - mut ctx: SnapContext<'_>, - cb: Callback, - ) -> kv::Result<()> { - fail_point!("raftkv_async_snapshot_err", |_| Err(box_err!( - "injected error for async_snapshot" - ))); + type SnapshotRes = impl Future> + Send; + fn async_snapshot(&mut self, mut ctx: SnapContext<'_>) -> Self::SnapshotRes { + let mut res: kv::Result<()> = (|| { + fail_point!("raftkv_async_snapshot_err", |_| { + Err(box_err!("injected error for async_snapshot")) + }); + Ok(()) + })(); let mut req = Request::default(); req.set_cmd_type(CmdType::Snap); @@ -481,10 +458,46 @@ where } ASYNC_REQUESTS_COUNTER_VEC.snapshot.all.inc(); let begin_instant = Instant::now_coarse(); - self.exec_snapshot( - ctx, - req, - Box::new(move |res| match res { + let (cb, f) = paired_must_called_future_callback(drop_snapshot_callback); + + let mut header = new_request_header(ctx.pb_ctx); + let mut flags = 0; + if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { + let mut data = [0u8; 8]; + (&mut data[..]) + .encode_u64(ctx.start_ts.unwrap_or_default().into_inner()) + .unwrap(); + flags |= WriteBatchFlags::STALE_READ.bits(); + header.set_flag_data(data.into()); + } + if ctx.for_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } + header.set_flags(flags); + + let mut cmd = RaftCmdRequest::default(); + cmd.set_header(header); + cmd.set_requests(vec![req].into()); + if res.is_ok() { + res = self + .router + .read( + ctx.read_id, + cmd, + StoreCallback::read(Box::new(move |resp| { + cb(on_read_result(resp).map_err(Error::into)); + })), + ) + .map_err(kv::Error::from); + } + async move { + // It's impossible to return cancel because the callback will be invoked if it's + // destroyed. + let res = match res { + Ok(()) => f.await.unwrap(), + Err(e) => Err(e), + }; + match res { Ok(CmdRes::Resp(mut r)) => { let e = if r .get(0) @@ -496,27 +509,22 @@ where } else { invalid_resp_type(CmdType::Snap, r[0].get_cmd_type()).into() }; - cb(Err(e)) + Err(e) } Ok(CmdRes::Snap(s)) => { ASYNC_REQUESTS_DURATIONS_VEC .snapshot .observe(begin_instant.saturating_elapsed_secs()); ASYNC_REQUESTS_COUNTER_VEC.snapshot.success.inc(); - cb(Ok(s)) + Ok(s) } Err(e) => { let status_kind = get_status_kind_from_engine_error(&e); ASYNC_REQUESTS_COUNTER_VEC.snapshot.get(status_kind).inc(); - cb(Err(e)) + Err(e) } - }), - ) - .map_err(|e| { - let status_kind = get_status_kind_from_error(&e); - ASYNC_REQUESTS_COUNTER_VEC.snapshot.get(status_kind).inc(); - e.into() - }) + } + } } fn release_snapshot(&mut self) { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 2032ffd86ae..55d8575101c 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2983,18 +2983,14 @@ impl Engine for TxnTestEngine { self.engine.modify_on_kv_engine(region_modifies) } - fn async_snapshot( - &mut self, - ctx: SnapContext<'_>, - cb: tikv_kv::Callback, - ) -> tikv_kv::Result<()> { + type SnapshotRes = impl Future> + Send; + fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes { let txn_ext = self.txn_ext.clone(); - self.engine.async_snapshot( - ctx, - Box::new(move |snapshot| { - cb(snapshot.map(|snapshot| TxnTestSnapshot { snapshot, txn_ext })) - }), - ) + let f = self.engine.async_snapshot(ctx); + async move { + let snapshot = f.await?; + Ok(TxnTestSnapshot { snapshot, txn_ext }) + } } fn async_write( diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index a949570ebe1..bc4786ae73e 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -6,6 +6,7 @@ use collections::HashSet; use crossbeam::channel::TrySendError; use engine_rocks::{RocksEngine, RocksSnapshot}; use engine_traits::{KvEngine, ALL_CFS, CF_DEFAULT}; +use futures::future::FutureExt; use kvproto::{ kvrpcpb::{Context, ExtraOp as TxnExtraOp}, metapb::Region, @@ -191,14 +192,15 @@ fn bench_async_snapshot(b: &mut test::Bencher) { ctx.set_region_epoch(region.get_region_epoch().clone()); ctx.set_peer(leader); b.iter(|| { - let on_finished: EngineCallback> = Box::new(move |results| { - let _ = test::black_box(results); - }); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() }; - kv.async_snapshot(snap_ctx, on_finished).unwrap(); + let f = kv.async_snapshot(snap_ctx); + let res = f.map(|res| { + let _ = test::black_box(res); + }); + let _ = test::black_box(res); }); } diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 481e533a879..c515b8d66cb 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -138,10 +138,10 @@ fn test_snapshot_failed() { #[test] fn test_snapshot_failed_2() { let product = ProductTable::new(); - let (_, endpoint) = init_with_data(&product, &[]); + let (store, endpoint) = init_with_data(&product, &[]); let req = DagSelect::from(&product).build(); - fail::cfg("rockskv_async_snapshot_not_leader", "return()").unwrap(); + store.get_engine().trigger_not_leader(); let resp = handle_request(&endpoint, req); assert!(resp.get_region_error().has_not_leader()); From 970f5623672ae13c411092f9f208a0674dfd9ce8 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Wed, 23 Nov 2022 20:23:58 +0800 Subject: [PATCH 0357/1149] mvcc: fix get_write may return Delete when skipping versions (#13840) close tikv/tikv#13839 The mvcc get_write methods should return None if the found record is a Delete. This constraint is broken after the introduction of skipping locks, causing panic when reading values. Now, it's changed to use the same logic in the loop (just like the PointGetter and the ForwardScanner) to avoid the problem. Signed-off-by: Yilin Chen --- src/storage/mvcc/reader/reader.rs | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index a6aae85761f..dd6bff6a157 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -367,8 +367,9 @@ impl MvccReader { mut ts: TimeStamp, gc_fence_limit: Option, ) -> Result> { + let mut seek_res = self.seek_write(key, ts)?; loop { - match self.seek_write(key, ts)? { + match seek_res { Some((commit_ts, write)) => { if let Some(limit) = gc_fence_limit { if !write.as_ref().check_gc_fence_as_latest_version(limit) { @@ -404,13 +405,15 @@ impl MvccReader { commit_ts, write.write_type, ); - return Ok(Some((write, commit_ts))); + seek_res = Some((commit_ts, write)); + continue; } } } } None => return Ok(None), } + seek_res = self.seek_write(key, ts)?; } } @@ -2540,11 +2543,20 @@ pub mod tests { engine.prewrite(m, k, 1); engine.commit(k, 1, 2); - // Write enough ROLLBACK/LOCK recrods + // Write enough LOCK recrods for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } + let m = Mutation::make_delete(Key::from_raw(k)); + engine.prewrite(m, k, 45); + engine.commit(k, 45, 46); + + // Write enough LOCK recrods + for start_ts in (50..80).into_iter().step_by(2) { + engine.lock(k, start_ts, start_ts + 1); + } + let snap = RegionSnapshot::::from_raw(db, region); let mut reader = MvccReader::new(snap, None, false); @@ -2567,5 +2579,17 @@ pub mod tests { // instead of calling a series of next, so the count of next should be 0 instead assert_eq!(reader.statistics.write.next, 0); assert_eq!(reader.statistics.write.get, 1); + + // Clear statistics first + reader.statistics = Statistics::default(); + let res = reader + .get_write_with_commit_ts(&key, 80.into(), None) + .unwrap(); + // If the type is Delete, get_write_with_commit_ts should return None. + assert!(res.is_none()); + // versions_to_last_change should be large enough to trigger a second get + // instead of calling a series of next, so the count of next should be 0 instead + assert_eq!(reader.statistics.write.next, 0); + assert_eq!(reader.statistics.write.get, 1); } } From 213e5020c96404f0a5d56fc45f37c22359c7a1f9 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 24 Nov 2022 11:27:59 +0800 Subject: [PATCH 0358/1149] raftstore-v2: pd worker (#13773) ref tikv/tikv#12842 Signed-off-by: tabokie Co-authored-by: SpadeA-Tang --- Cargo.lock | 6 + components/raftstore-v2/Cargo.toml | 6 + components/raftstore-v2/src/batch/store.rs | 52 ++- components/raftstore-v2/src/fsm/peer.rs | 2 +- components/raftstore-v2/src/fsm/store.rs | 42 ++- components/raftstore-v2/src/lib.rs | 2 + .../operation/command/admin/conf_change.rs | 40 ++- .../src/operation/command/admin/split.rs | 23 +- .../raftstore-v2/src/operation/command/mod.rs | 3 +- components/raftstore-v2/src/operation/life.rs | 4 +- components/raftstore-v2/src/operation/mod.rs | 1 + components/raftstore-v2/src/operation/pd.rs | 230 ++++++++++++ .../raftstore-v2/src/operation/ready/mod.rs | 18 +- components/raftstore-v2/src/raft/peer.rs | 92 ++++- components/raftstore-v2/src/worker/mod.rs | 5 + components/raftstore-v2/src/worker/pd/mod.rs | 327 ++++++++++++++++++ .../src/worker/pd/region_heartbeat.rs | 256 ++++++++++++++ .../raftstore-v2/src/worker/pd/split.rs | 99 ++++++ .../src/worker/pd/store_heartbeat.rs | 293 ++++++++++++++++ .../src/worker/pd/update_max_timestamp.rs | 114 ++++++ .../tests/integrations/cluster.rs | 30 +- .../raftstore-v2/tests/integrations/mod.rs | 1 + .../tests/integrations/test_pd_heartbeat.rs | 60 ++++ .../tests/integrations/test_split.rs | 2 +- components/test_pd/src/mocker/service.rs | 27 +- 25 files changed, 1682 insertions(+), 53 deletions(-) create mode 100644 components/raftstore-v2/src/operation/pd.rs create mode 100644 components/raftstore-v2/src/worker/mod.rs create mode 100644 components/raftstore-v2/src/worker/pd/mod.rs create mode 100644 components/raftstore-v2/src/worker/pd/region_heartbeat.rs create mode 100644 components/raftstore-v2/src/worker/pd/split.rs create mode 100644 components/raftstore-v2/src/worker/pd/store_heartbeat.rs create mode 100644 components/raftstore-v2/src/worker/pd/update_max_timestamp.rs create mode 100644 components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs diff --git a/Cargo.lock b/Cargo.lock index 7425528342d..487d2712249 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4282,22 +4282,27 @@ name = "raftstore-v2" version = "0.1.0" dependencies = [ "batch-system", + "causal_ts", "collections", + "concurrency_manager", "crossbeam", "engine_test", "engine_traits", "error_code", "fail", "file_system", + "fs2", "futures 0.3.15", "keys", "kvproto", "log_wrappers", "pd_client", + "prometheus", "protobuf", "raft", "raft-proto", "raftstore", + "resource_metering", "slog", "slog-global", "smallvec", @@ -4308,6 +4313,7 @@ dependencies = [ "time", "tracker", "txn_types", + "yatp", ] [[package]] diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 8bb91b40bb9..1679732ccda 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -30,27 +30,33 @@ cloud-azure = ["raftstore/cloud-azure"] [dependencies] batch-system = { workspace = true } +causal_ts = { workspace = true } collections = { workspace = true } +concurrency_manager = { workspace = true } crossbeam = "0.8" engine_traits = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } +fs2 = "0.4" futures = { version = "0.3", features = ["compat"] } keys = { workspace = true } kvproto = { git = "https://github.com/pingcap/kvproto.git" } log_wrappers = { workspace = true } pd_client = { workspace = true } +prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0" } raftstore = { workspace = true } +resource_metering = { workspace = true } slog = "2.3" smallvec = "1.4" tikv_util = { workspace = true } time = "0.1" tracker = { workspace = true } txn_types = { workspace = true } +yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] engine_test = { workspace = true } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 605bbb95131..1eea2017571 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -3,14 +3,19 @@ use std::{ ops::{Deref, DerefMut}, path::Path, - sync::{Arc, Mutex}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, time::Duration, }; use batch_system::{ BasicMailbox, BatchRouter, BatchSystem, HandleResult, HandlerBuilder, PollHandler, }; +use causal_ts::CausalTsProviderImpl; use collections::HashMap; +use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{Sender, TrySendError}; use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; use file_system::{set_io_type, IoType}; @@ -19,6 +24,7 @@ use kvproto::{ metapb::Store, raft_serverpb::{PeerState, RaftMessage}, }; +use pd_client::PdClient; use raft::INVALID_ID; use raftstore::store::{ fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, @@ -43,6 +49,7 @@ use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, + worker::{PdRunner, PdTask}, Error, Result, }; @@ -70,6 +77,7 @@ pub struct StoreContext { pub tablet_factory: Arc>, pub apply_pool: FuturePool, pub read_scheduler: Scheduler>, + pub pd_scheduler: Scheduler, } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -217,6 +225,7 @@ struct StorePollerBuilder { trans: T, router: StoreRouter, read_scheduler: Scheduler>, + pd_scheduler: Scheduler, write_senders: WriteSenders, apply_pool: FuturePool, logger: Logger, @@ -232,6 +241,7 @@ impl StorePollerBuilder { trans: T, router: StoreRouter, read_scheduler: Scheduler>, + pd_scheduler: Scheduler, store_writers: &mut StoreWriters, logger: Logger, store_meta: Arc>>, @@ -254,6 +264,7 @@ impl StorePollerBuilder { trans, router, read_scheduler, + pd_scheduler, apply_pool, logger, write_senders: store_writers.senders(), @@ -330,6 +341,7 @@ where tablet_factory: self.tablet_factory.clone(), apply_pool: self.apply_pool.clone(), read_scheduler: self.read_scheduler.clone(), + pd_scheduler: self.pd_scheduler.clone(), }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); StorePoller::new(poll_ctx, cfg_tracker) @@ -341,6 +353,7 @@ where struct Workers { /// Worker for fetching raft logs asynchronously async_read_worker: Worker, + pd_worker: Worker, store_writers: StoreWriters, } @@ -348,6 +361,7 @@ impl Default for Workers { fn default() -> Self { Self { async_read_worker: Worker::new("async-read-worker"), + pd_worker: Worker::new("pd-worker"), store_writers: StoreWriters::default(), } } @@ -358,23 +372,36 @@ pub struct StoreSystem { system: BatchSystem, StoreFsm>, workers: Option>, logger: Logger, + shutdown: Arc, } impl StoreSystem { - pub fn start( + pub fn start( &mut self, store_id: u64, cfg: Arc>, raft_engine: ER, tablet_factory: Arc>, trans: T, + pd_client: Arc, router: &StoreRouter, store_meta: Arc>>, snap_mgr: TabletSnapManager, + concurrency_manager: ConcurrencyManager, + causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Result<()> where T: Transport + 'static, + C: PdClient + 'static, { + let sync_router = Mutex::new(router.clone()); + pd_client.handle_reconnect(move || { + sync_router + .lock() + .unwrap() + .broadcast_normal(|| PeerMsg::Tick(PeerTick::PdHeartbeat)); + }); + let mut workers = Workers::default(); workers .store_writers @@ -386,6 +413,22 @@ impl StoreSystem { .async_read_worker .start("async-read-worker", read_runner); + let pd_scheduler = workers.pd_worker.start( + "pd-worker", + PdRunner::new( + store_id, + pd_client, + raft_engine.clone(), + tablet_factory.clone(), + router.clone(), + workers.pd_worker.remote(), + concurrency_manager, + causal_ts_provider, + self.logger.clone(), + self.shutdown.clone(), + ), + ); + let mut builder = StorePollerBuilder::new( cfg.clone(), store_id, @@ -394,6 +437,7 @@ impl StoreSystem { trans, router.clone(), read_scheduler, + pd_scheduler, &mut workers.store_writers, self.logger.clone(), store_meta.clone(), @@ -433,6 +477,8 @@ impl StoreSystem { } pub fn shutdown(&mut self) { + self.shutdown.store(true, Ordering::Relaxed); + if self.workers.is_none() { return; } @@ -444,6 +490,7 @@ impl StoreSystem { workers.store_writers.shutdown(); workers.async_read_worker.stop(); + workers.pd_worker.stop(); } } @@ -520,6 +567,7 @@ where system, workers: None, logger: logger.clone(), + shutdown: Arc::new(AtomicBool::new(false)), }; (StoreRouter { router, logger }, system) } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 6fac2d88db0..cd93463a524 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -193,9 +193,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, fn on_tick(&mut self, tick: PeerTick) { match tick { PeerTick::Raft => self.on_raft_tick(), + PeerTick::PdHeartbeat => self.on_pd_heartbeat(), PeerTick::RaftLogGc => unimplemented!(), PeerTick::SplitRegionCheck => unimplemented!(), - PeerTick::PdHeartbeat => unimplemented!(), PeerTick::CheckMerge => unimplemented!(), PeerTick::CheckPeerStaleState => unimplemented!(), PeerTick::EntryCacheEvict => unimplemented!(), diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 0d390d5b51d..546ec95a604 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -1,17 +1,22 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::SystemTime; +use std::time::{Duration, SystemTime}; use batch_system::Fsm; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; +use futures::{compat::Future01CompatExt, FutureExt}; use kvproto::{metapb::Region, raft_serverpb::RaftMessage}; use raftstore::{ coprocessor::RegionChangeReason, store::{Config, ReadDelegate, RegionReadProgressRegistry}, }; -use slog::{o, Logger}; -use tikv_util::mpsc::{self, LooseBoundedSender, Receiver}; +use slog::{info, o, Logger}; +use tikv_util::{ + future::poll_future_notify, + is_zero_duration, + mpsc::{self, LooseBoundedSender, Receiver}, +}; use crate::{ batch::StoreContext, @@ -82,7 +87,7 @@ impl Store { } pub struct StoreFsm { - store: Store, + pub store: Store, receiver: Receiver, } @@ -126,8 +131,8 @@ impl Fsm for StoreFsm { } pub struct StoreFsmDelegate<'a, EK: KvEngine, ER: RaftEngine, T> { - fsm: &'a mut StoreFsm, - store_ctx: &'a mut StoreContext, + pub fsm: &'a mut StoreFsm, + pub store_ctx: &'a mut StoreContext, } impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { @@ -145,10 +150,33 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { .duration_since(SystemTime::UNIX_EPOCH) .map_or(0, |d| d.as_secs()), ); + + self.on_pd_store_heartbeat(); + } + + pub fn schedule_tick(&mut self, tick: StoreTick, timeout: Duration) { + if !is_zero_duration(&timeout) { + let mb = self.store_ctx.router.control_mailbox(); + let logger = self.fsm.store.logger().clone(); + let delay = self.store_ctx.timer.delay(timeout).compat().map(move |_| { + if let Err(e) = mb.force_send(StoreMsg::Tick(tick)) { + info!( + logger, + "failed to schedule store tick, are we shutting down?"; + "tick" => ?tick, + "err" => ?e + ); + } + }); + poll_future_notify(delay); + } } fn on_tick(&mut self, tick: StoreTick) { - unimplemented!() + match tick { + StoreTick::PdStoreHeartbeat => self.on_pd_store_heartbeat(), + _ => unimplemented!(), + } } pub fn handle_msgs(&mut self, store_msg_buf: &mut Vec) { diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 15dd6b4afc1..7dea9d55901 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -24,6 +24,7 @@ #![allow(unused)] #![feature(let_chains)] #![feature(array_windows)] +#![feature(div_duration)] mod batch; mod bootstrap; @@ -32,6 +33,7 @@ mod operation; mod raft; pub mod router; mod tablet; +mod worker; pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 8b4b7fe293f..69e318c3a2e 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -7,6 +7,8 @@ //! - Apply after conf change is committed //! - Update raft state using the result of conf change +use std::time::Instant; + use collections::HashSet; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ @@ -39,12 +41,12 @@ use crate::{ #[derive(Default, Debug)] pub struct ConfChangeResult { pub index: u64, - // The proposed ConfChangeV2 or (legacy) ConfChange - // ConfChange (if it is) will convert to ConfChangeV2 + // The proposed ConfChangeV2 or (legacy) ConfChange. + // ConfChange (if it is) will be converted to ConfChangeV2. pub conf_change: ConfChangeV2, // The change peer requests come along with ConfChangeV2 - // or (legacy) ConfChange, for ConfChange, it only contains - // one element + // or (legacy) ConfChange. For ConfChange, it only contains + // one element. pub changes: Vec, pub region_state: RegionLocalState, } @@ -127,7 +129,11 @@ impl Peer { Ok(proposal_index) } - pub fn on_apply_res_conf_change(&mut self, conf_change: ConfChangeResult) { + pub fn on_apply_res_conf_change( + &mut self, + ctx: &mut StoreContext, + conf_change: ConfChangeResult, + ) { // TODO: cancel generating snapshot. // Snapshot is applied in memory without waiting for all entries being @@ -150,6 +156,7 @@ impl Peer { "notify pd with change peer region"; "region" => ?self.region(), ); + self.region_heartbeat_pd(ctx); let demote_self = tikv_util::store::is_learner(self.peer()); if remove_self || demote_self { warn!(self.logger, "removing or demoting leader"; "remove" => remove_self, "demote" => demote_self); @@ -157,12 +164,23 @@ impl Peer { self.raft_group_mut() .raft .become_follower(term, raft::INVALID_ID); - } else if conf_change.changes.iter().any(|c| { - matches!( - c.get_change_type(), - ConfChangeType::AddNode | ConfChangeType::AddLearnerNode - ) - }) { + } + let mut has_new_peer = None; + for c in conf_change.changes { + let peer_id = c.get_peer().get_id(); + match c.get_change_type() { + ConfChangeType::AddNode | ConfChangeType::AddLearnerNode => { + if has_new_peer.is_none() { + has_new_peer = Some(Instant::now()); + } + self.add_peer_heartbeat(peer_id, has_new_peer.unwrap()); + } + ConfChangeType::RemoveNode => { + self.remove_peer_heartbeat(peer_id); + } + } + } + if has_new_peer.is_some() { // Speed up snapshot instead of waiting another heartbeat. self.raft_group_mut().ping(); self.set_has_ready(); diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 2e43e69b44c..2782b436439 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -305,6 +305,21 @@ impl Peer { self.post_split(); + if self.is_leader() { + self.region_heartbeat_pd(store_ctx); + // Notify pd immediately to let it update the region meta. + info!( + self.logger, + "notify pd with split"; + "region_id" => self.region_id(), + "peer_id" => self.peer_id(), + "split_count" => regions.len(), + ); + // Now pd only uses ReportBatchSplit for history operation show, + // so we send it independently here. + self.report_batch_split_pd(store_ctx, regions.to_vec()); + } + let last_region_id = regions.last().unwrap().get_id(); for (new_region, locks) in regions.into_iter().zip(region_locks) { let new_region_id = new_region.get_id(); @@ -397,7 +412,7 @@ impl Peer { } self.set_raft_group(raft_group); } else { - // todo: when reaching here (peer is initalized before and cannot be replaced), + // TODO: when reaching here (peer is initalized before and cannot be replaced), // it is much complexer. return; } @@ -411,7 +426,7 @@ impl Peer { "region" => ?split_init.region, ); - // todo: GlobalReplicationState + // TODO: GlobalReplicationState for p in split_init.region.get_peers() { self.insert_peer_cache(p.clone()); @@ -425,7 +440,7 @@ impl Peer { *self.txn_ext().pessimistic_locks.write() = split_init.locks; // The new peer is likely to become leader, send a heartbeat immediately to // reduce client query miss. - self.heartbeat_pd(store_ctx); + self.region_heartbeat_pd(store_ctx); } meta.tablet_caches.insert(region_id, self.tablet().clone()); @@ -436,7 +451,7 @@ impl Peer { } if split_init.check_split { - // todo: check if the last region needs to split again + // TODO: check if the last region needs to split again } self.schedule_apply_fsm(store_ctx); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 5d308986229..7e69a3f1c7c 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -299,14 +299,13 @@ impl Peer { for admin_res in apply_res.admin_result { match admin_res { AdminCmdResult::ConfChange(conf_change) => { - self.on_apply_res_conf_change(conf_change) + self.on_apply_res_conf_change(ctx, conf_change) } AdminCmdResult::SplitRegion(SplitResult { regions, derived_index, tablet_index, }) => self.on_ready_split_region(ctx, derived_index, tablet_index, regions), - AdminCmdResult::SplitRegion(_) => unimplemented!(), } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 58628637159..60884f63b03 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -280,7 +280,7 @@ impl Peer { /// are split. It's a waste to use snapshot to restore newly split /// tablet. #[inline] - pub fn postpond_destroy(&self) -> bool { + pub fn postponed_destroy(&self) -> bool { let entry_storage = self.storage().entry_storage(); // TODO: check actual split index instead of commit index. entry_storage.applied_index() != entry_storage.commit_index() @@ -293,7 +293,7 @@ impl Peer { /// memory states. pub fn start_destroy(&mut self, write_task: &mut WriteTask) { let entry_storage = self.storage().entry_storage(); - if self.postpond_destroy() { + if self.postponed_destroy() { return; } let first_index = entry_storage.first_index(); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 84835231398..7df897f2b26 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -2,6 +2,7 @@ mod command; mod life; +mod pd; mod query; mod ready; diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs new file mode 100644 index 00000000000..659fab00754 --- /dev/null +++ b/components/raftstore-v2/src/operation/pd.rs @@ -0,0 +1,230 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module implements the interactions with pd. + +use std::cmp; + +use engine_traits::{KvEngine, RaftEngine}; +use fail::fail_point; +use kvproto::{metapb, pdpb}; +use raftstore::store::Transport; +use slog::error; +use tikv_util::time::InstantExt; + +use crate::{ + batch::StoreContext, + fsm::{PeerFsmDelegate, Store, StoreFsmDelegate}, + raft::Peer, + router::{PeerTick, StoreTick}, + worker::{PdRegionHeartbeatTask, PdTask}, +}; + +impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { + #[inline] + pub fn on_pd_store_heartbeat(&mut self) { + self.fsm.store.store_heartbeat_pd(self.store_ctx); + self.schedule_tick( + StoreTick::PdStoreHeartbeat, + self.store_ctx.cfg.pd_store_heartbeat_tick_interval.0, + ); + } +} + +impl Store { + pub fn store_heartbeat_pd(&self, ctx: &StoreContext) + where + EK: KvEngine, + ER: RaftEngine, + { + let mut stats = pdpb::StoreStats::default(); + + stats.set_store_id(self.store_id()); + { + let meta = ctx.store_meta.lock().unwrap(); + stats.set_region_count(meta.tablet_caches.len() as u32); + } + + stats.set_sending_snap_count(0); + stats.set_receiving_snap_count(0); + + stats.set_start_time(self.start_time().unwrap() as u32); + + stats.set_bytes_written(0); + stats.set_keys_written(0); + stats.set_is_busy(false); + + // stats.set_query_stats(query_stats); + + let task = PdTask::StoreHeartbeat { stats }; + if let Err(e) = ctx.pd_scheduler.schedule(task) { + error!(self.logger(), "notify pd failed"; + "store_id" => self.store_id(), + "err" => ?e + ); + } + } +} + +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { + #[inline] + pub fn on_pd_heartbeat(&mut self) { + self.fsm.peer_mut().update_peer_statistics(); + if self.fsm.peer().is_leader() { + self.fsm.peer_mut().region_heartbeat_pd(self.store_ctx); + } + // TODO: hibernate region + self.schedule_tick(PeerTick::PdHeartbeat); + } +} + +impl Peer { + #[inline] + pub fn region_heartbeat_pd(&self, ctx: &StoreContext) { + let task = PdTask::RegionHeartbeat(PdRegionHeartbeatTask { + term: self.term(), + region: self.region().clone(), + down_peers: self.collect_down_peers(ctx.cfg.max_peer_down_duration.0), + peer: self.peer().clone(), + pending_peers: self.collect_pending_peers(ctx), + written_bytes: self.self_stat().written_bytes, + written_keys: self.self_stat().written_keys, + approximate_size: None, + approximate_keys: None, + wait_data_peers: Vec::new(), + }); + if let Err(e) = ctx.pd_scheduler.schedule(task) { + error!( + self.logger, + "failed to notify pd"; + "region_id" => self.region_id(), + "peer_id" => self.peer_id(), + "err" => ?e, + ); + return; + } + fail_point!("schedule_check_split"); + } + + /// Collects all pending peers and update `peers_start_pending_time`. + fn collect_pending_peers(&self, ctx: &StoreContext) -> Vec { + let mut pending_peers = Vec::with_capacity(self.region().get_peers().len()); + let status = self.raft_group().status(); + let truncated_idx = self + .storage() + .apply_state() + .get_truncated_state() + .get_index(); + + if status.progress.is_none() { + return pending_peers; + } + + // TODO: update `peers_start_pending_time`. + + let progresses = status.progress.unwrap().iter(); + for (&id, progress) in progresses { + if id == self.peer_id() { + continue; + } + // The `matched` is 0 only in these two cases: + // 1. Current leader hasn't communicated with this peer. + // 2. This peer does not exist yet(maybe it is created but not initialized) + // + // The correctness of region merge depends on the fact that all target peers + // must exist during merging. (PD rely on `pending_peers` to check whether all + // target peers exist) + // + // So if the `matched` is 0, it must be a pending peer. + // It can be ensured because `truncated_index` must be greater than + // `RAFT_INIT_LOG_INDEX`(5). + if progress.matched < truncated_idx { + if let Some(p) = self.peer_from_cache(id) { + pending_peers.push(p); + } else { + if ctx.cfg.dev_assert { + panic!( + "{:?} failed to get peer {} from cache", + self.logger.list(), + id + ); + } + error!( + self.logger, + "failed to get peer from cache"; + "region_id" => self.region_id(), + "peer_id" => self.peer_id(), + "get_peer_id" => id, + ); + } + } + } + pending_peers + } + + #[inline] + pub fn destroy_peer_pd(&self, ctx: &StoreContext) { + let task = PdTask::DestroyPeer { + region_id: self.region_id(), + }; + if let Err(e) = ctx.pd_scheduler.schedule(task) { + error!( + self.logger, + "failed to notify pd with DestroyPeer"; + "region_id" => self.region_id(), + "peer_id" => self.peer_id(), + "err" => %e, + ); + } + } + + #[inline] + pub fn ask_batch_split_pd(&self, ctx: &StoreContext, split_keys: Vec>) { + let task = PdTask::AskBatchSplit { + region: self.region().clone(), + split_keys, + peer: self.peer().clone(), + right_derive: ctx.cfg.right_derive_when_split, + }; + if let Err(e) = ctx.pd_scheduler.schedule(task) { + error!( + self.logger, + "failed to notify pd with AskBatchSplit"; + "region_id" => self.region_id(), + "peer_id" => self.peer_id(), + "err" => %e, + ); + } + } + + #[inline] + pub fn report_batch_split_pd( + &self, + ctx: &StoreContext, + regions: Vec, + ) { + let task = PdTask::ReportBatchSplit { regions }; + if let Err(e) = ctx.pd_scheduler.schedule(task) { + error!( + self.logger, + "failed to notify pd with ReportBatchSplit"; + "err" => %e, + ); + } + } + + #[inline] + pub fn update_max_timestamp_pd(&self, ctx: &StoreContext, initial_status: u64) { + let task = PdTask::UpdateMaxTimestamp { + region_id: self.region_id(), + initial_status, + txn_ext: self.txn_ext().clone(), + }; + if let Err(e) = ctx.pd_scheduler.schedule(task) { + error!( + self.logger, + "failed to notify pd with UpdateMaxTimestamp"; + "err" => %e, + ); + } + } +} diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 6f6866b9671..9e639f233cc 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -20,13 +20,13 @@ mod async_writer; mod snapshot; -use std::cmp; +use std::{cmp, time::Instant}; use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; -use raft::{eraftpb, Ready, StateRole}; +use raft::{eraftpb, Ready, StateRole, INVALID_ID}; use raftstore::store::{util, ExtraStates, FetchedLogs, ReadProgress, Transport, WriteTask}; use slog::{debug, error, trace, warn}; use tikv_util::time::{duration_to_sec, monotonic_raw_now}; @@ -41,6 +41,7 @@ use crate::{ raft::{Peer, Storage}, router::{ApplyTask, PeerTick}, }; + impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { /// Raft relies on periodic ticks to keep the state machine sync with other /// peers. @@ -111,7 +112,11 @@ impl Peer { } // TODO: drop all msg append when the peer is uninitialized and has conflict // ranges with other peers. - self.insert_peer_cache(msg.take_from_peer()); + let from_peer = msg.take_from_peer(); + if self.is_leader() && from_peer.get_id() != INVALID_ID { + self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); + } + self.insert_peer_cache(from_peer); if let Err(e) = self.raft_group_mut().step(msg.take_message()) { error!(self.logger, "raft step error"; "err" => ?e); } @@ -271,7 +276,7 @@ impl Peer { } ctx.has_ready = true; - if !self.raft_group().has_ready() && (self.serving() || self.postpond_destroy()) { + if !self.raft_group().has_ready() && (self.serving() || self.postponed_destroy()) { #[cfg(feature = "testexport")] self.async_writer.notify_flush(); return; @@ -443,8 +448,13 @@ impl Peer { // latency. self.raft_group_mut().skip_bcast_commit(false); + // A more recent read may happen on the old leader. So max ts should + // be updated after a peer becomes leader. + self.require_updating_max_ts(ctx); // Exit entry cache warmup state when the peer becomes leader. self.entry_storage_mut().clear_entry_cache_warmup_state(); + + self.region_heartbeat_pd(ctx); } StateRole::Follower => { self.leader_lease_mut().expire(); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 6ebb3ed2056..a9730a036e7 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -1,11 +1,15 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{mem, sync::Arc}; +use std::{ + mem, + sync::{atomic::Ordering, Arc}, + time::{Duration, Instant}, +}; use collections::HashMap; use crossbeam::atomic::AtomicCell; use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; -use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_serverpb::RegionLocalState}; +use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; use raftstore::{ @@ -35,6 +39,7 @@ use crate::{ operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, router::{CmdResChannel, QueryResChannel}, tablet::CachedTablet, + worker::PdTask, Result, }; @@ -44,10 +49,16 @@ const REGION_READ_PROGRESS_CAP: usize = 128; pub struct Peer { raft_group: RawNode>, tablet: CachedTablet, + + /// Statistics for self. + self_stat: PeerStat, + /// We use a cache for looking up peers. Not all peers exist in region's /// peer list, for example, an isolated peer may need to send/receive /// messages with unknown peers after recovery. peer_cache: Vec, + /// Statistics for other peers, only maintained when self is the leader. + peer_heartbeats: HashMap, /// Encoder for batching proposals and encoding them in a more efficient way /// than protobuf. @@ -123,7 +134,9 @@ impl Peer { let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { tablet, + self_stat: PeerStat::default(), peer_cache: vec![], + peer_heartbeats: HashMap::default(), raw_write_encoder: None, proposals: ProposalQueue::new(region_id, raft_group.raft.id), async_writer: AsyncWriter::new(region_id, peer_id), @@ -229,7 +242,7 @@ impl Peer { pessimistic_locks.version = self.region().get_region_epoch().get_version(); } - // todo: CoprocessorHost + // TODO: CoprocessorHost } #[inline] @@ -317,6 +330,11 @@ impl Peer { self.raft_group = raft_group; } + #[inline] + pub fn self_stat(&self) -> &PeerStat { + &self.self_stat + } + /// Mark the peer has a ready so it will be checked at the end of every /// processing round. #[inline] @@ -364,6 +382,57 @@ impl Peer { .cloned() } + #[inline] + pub fn update_peer_statistics(&mut self) { + if !self.is_leader() { + self.peer_heartbeats.clear(); + return; + } + + if self.peer_heartbeats.len() == self.region().get_peers().len() { + return; + } + + // Insert heartbeats in case that some peers never response heartbeats. + let region = self.raft_group.store().region(); + for peer in region.get_peers() { + self.peer_heartbeats + .entry(peer.get_id()) + .or_insert_with(Instant::now); + } + } + + #[inline] + pub fn add_peer_heartbeat(&mut self, peer_id: u64, now: Instant) { + self.peer_heartbeats.insert(peer_id, now); + } + + #[inline] + pub fn remove_peer_heartbeat(&mut self, peer_id: u64) { + self.peer_heartbeats.remove(&peer_id); + } + + pub fn collect_down_peers(&self, max_duration: Duration) -> Vec { + let mut down_peers = Vec::new(); + let now = Instant::now(); + for p in self.region().get_peers() { + if p.get_id() == self.peer_id() { + continue; + } + if let Some(instant) = self.peer_heartbeats.get(&p.get_id()) { + let elapsed = instant.saturating_duration_since(now); + if elapsed >= max_duration { + let mut stats = pdpb::PeerStats::default(); + stats.set_peer(p.clone()); + stats.set_down_seconds(elapsed.as_secs()); + down_peers.push(stats); + } + } + } + // TODO: `refill_disk_full_peers` + down_peers + } + #[inline] pub fn is_leader(&self) -> bool { self.raft_group.raft.state == StateRole::Leader @@ -486,10 +555,6 @@ impl Peer { &self.txn_ext } - pub fn heartbeat_pd(&self, store_ctx: &StoreContext) { - // todo - } - pub fn generate_read_delegate(&self) -> ReadDelegate { let peer_id = self.peer().get_id(); @@ -522,4 +587,17 @@ impl Peer { self.proposal_control .advance_apply(apply_index, term, region); } + + // TODO: find a better place to put all txn related stuff. + pub fn require_updating_max_ts(&self, ctx: &StoreContext) { + let epoch = self.region().get_region_epoch(); + let term_low_bits = self.term() & ((1 << 32) - 1); // 32 bits + let version_lot_bits = epoch.get_version() & ((1 << 31) - 1); // 31 bits + let initial_status = (term_low_bits << 32) | (version_lot_bits << 1); + self.txn_ext + .max_ts_sync_status + .store(initial_status, Ordering::SeqCst); + + self.update_max_timestamp_pd(ctx, initial_status); + } } diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs new file mode 100644 index 00000000000..ad8249d22a4 --- /dev/null +++ b/components/raftstore-v2/src/worker/mod.rs @@ -0,0 +1,5 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod pd; + +pub use pd::{RegionHeartbeatTask as PdRegionHeartbeatTask, Runner as PdRunner, Task as PdTask}; diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs new file mode 100644 index 00000000000..132678e21f2 --- /dev/null +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -0,0 +1,327 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::{self, Display, Formatter}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; + +use causal_ts::CausalTsProviderImpl; +use collections::HashMap; +use concurrency_manager::ConcurrencyManager; +use engine_traits::{KvEngine, RaftEngine, TabletFactory}; +use kvproto::{metapb, pdpb}; +use pd_client::PdClient; +use raftstore::store::{util::KeysInfoFormatter, TxnExt}; +use slog::{error, info, Logger}; +use tikv_util::{time::UnixSecs, worker::Runnable}; +use yatp::{task::future::TaskCell, Remote}; + +use crate::{batch::StoreRouter, router::PeerMsg}; + +mod region_heartbeat; +mod split; +mod store_heartbeat; +mod update_max_timestamp; + +pub use region_heartbeat::RegionHeartbeatTask; + +pub enum Task { + RegionHeartbeat(RegionHeartbeatTask), + StoreHeartbeat { + stats: pdpb::StoreStats, + // TODO: StoreReport, StoreDrAutoSyncStatus + }, + DestroyPeer { + region_id: u64, + }, + AskBatchSplit { + region: metapb::Region, + split_keys: Vec>, + peer: metapb::Peer, + right_derive: bool, + }, + ReportBatchSplit { + regions: Vec, + }, + UpdateMaxTimestamp { + region_id: u64, + initial_status: u64, + txn_ext: Arc, + }, +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match *self { + Task::RegionHeartbeat(ref hb_task) => write!( + f, + "region heartbeat for region {:?}, leader {}", + hb_task.region, + hb_task.peer.get_id(), + ), + Task::StoreHeartbeat { ref stats, .. } => { + write!(f, "store heartbeat stats: {:?}", stats) + } + Task::DestroyPeer { ref region_id } => { + write!(f, "destroy peer of region {}", region_id) + } + Task::AskBatchSplit { + ref region, + ref split_keys, + .. + } => write!( + f, + "ask split region {} with {}", + region.get_id(), + KeysInfoFormatter(split_keys.iter()) + ), + Task::ReportBatchSplit { ref regions } => write!(f, "report split {:?}", regions), + Task::UpdateMaxTimestamp { region_id, .. } => write!( + f, + "update the max timestamp for region {} in the concurrency manager", + region_id + ), + } + } +} + +pub struct Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + store_id: u64, + pd_client: Arc, + raft_engine: ER, + tablet_factory: Arc>, + router: StoreRouter, + + remote: Remote, + + region_peers: HashMap, + + // For store_heartbeat. + start_ts: UnixSecs, + store_stat: store_heartbeat::StoreStat, + + // For region_heartbeat. + region_cpu_records: HashMap, + is_hb_receiver_scheduled: bool, + + // For update_max_timestamp. + concurrency_manager: ConcurrencyManager, + causal_ts_provider: Option>, + + logger: Logger, + shutdown: Arc, +} + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + pub fn new( + store_id: u64, + pd_client: Arc, + raft_engine: ER, + tablet_factory: Arc>, + router: StoreRouter, + remote: Remote, + concurrency_manager: ConcurrencyManager, + causal_ts_provider: Option>, // used for rawkv apiv2 + logger: Logger, + shutdown: Arc, + ) -> Self { + Self { + store_id, + pd_client, + raft_engine, + tablet_factory, + router, + remote, + region_peers: HashMap::default(), + start_ts: UnixSecs::zero(), + store_stat: store_heartbeat::StoreStat::default(), + region_cpu_records: HashMap::default(), + is_hb_receiver_scheduled: false, + concurrency_manager, + causal_ts_provider, + logger, + shutdown, + } + } +} + +impl Runnable for Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + type Task = Task; + + fn run(&mut self, task: Task) { + self.maybe_schedule_heartbeat_receiver(); + match task { + Task::RegionHeartbeat(task) => self.handle_region_heartbeat(task), + Task::StoreHeartbeat { stats } => self.handle_store_heartbeat(stats), + Task::DestroyPeer { region_id } => self.handle_destroy_peer(region_id), + Task::AskBatchSplit { + region, + split_keys, + peer, + right_derive, + } => self.handle_ask_batch_split(region, split_keys, peer, right_derive), + Task::ReportBatchSplit { regions } => self.handle_report_batch_split(regions), + Task::UpdateMaxTimestamp { + region_id, + initial_status, + txn_ext, + } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), + } + } +} + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + fn handle_destroy_peer(&mut self, region_id: u64) { + match self.region_peers.remove(®ion_id) { + None => {} + Some(_) => { + info!(self.logger, "remove peer statistic record in pd"; "region_id" => region_id) + } + } + } +} + +pub mod requests { + use kvproto::raft_cmdpb::{ + AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, + SplitRequest, + }; + use raft::eraftpb::ConfChangeType; + + use super::*; + + pub fn send_admin_request( + logger: &Logger, + router: &StoreRouter, + region_id: u64, + epoch: metapb::RegionEpoch, + peer: metapb::Peer, + request: AdminRequest, + ) where + EK: KvEngine, + ER: RaftEngine, + { + let cmd_type = request.get_cmd_type(); + + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header().set_region_epoch(epoch); + req.mut_header().set_peer(peer); + req.set_admin_request(request); + + let (msg, _) = PeerMsg::raft_command(req); + if let Err(e) = router.send(region_id, msg) { + error!( + logger, + "send request failed"; + "region_id" => region_id, "cmd_type" => ?cmd_type, "err" => ?e, + ); + } + } + + pub fn new_change_peer_request( + change_type: ConfChangeType, + peer: metapb::Peer, + ) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::ChangePeer); + req.mut_change_peer().set_change_type(change_type); + req.mut_change_peer().set_peer(peer); + req + } + + pub fn new_change_peer_v2_request(changes: Vec) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::ChangePeerV2); + let change_peer_reqs = changes + .into_iter() + .map(|mut c| { + let mut cp = ChangePeerRequest::default(); + cp.set_change_type(c.get_change_type()); + cp.set_peer(c.take_peer()); + cp + }) + .collect(); + let mut cp = ChangePeerV2Request::default(); + cp.set_changes(change_peer_reqs); + req.set_change_peer_v2(cp); + req + } + + pub fn new_split_region_request( + split_key: Vec, + new_region_id: u64, + peer_ids: Vec, + right_derive: bool, + ) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::Split); + req.mut_split().set_split_key(split_key); + req.mut_split().set_new_region_id(new_region_id); + req.mut_split().set_new_peer_ids(peer_ids); + req.mut_split().set_right_derive(right_derive); + req + } + + pub fn new_batch_split_region_request( + split_keys: Vec>, + ids: Vec, + right_derive: bool, + ) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::BatchSplit); + req.mut_splits().set_right_derive(right_derive); + let mut requests = Vec::with_capacity(ids.len()); + for (mut id, key) in ids.into_iter().zip(split_keys) { + let mut split = SplitRequest::default(); + split.set_split_key(key); + split.set_new_region_id(id.get_new_region_id()); + split.set_new_peer_ids(id.take_new_peer_ids()); + requests.push(split); + } + req.mut_splits().set_requests(requests.into()); + req + } + + pub fn new_transfer_leader_request( + peer: metapb::Peer, + peers: Vec, + ) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::TransferLeader); + req.mut_transfer_leader().set_peer(peer); + req.mut_transfer_leader().set_peers(peers.into()); + req + } + + pub fn new_merge_request(merge: pdpb::Merge) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::PrepareMerge); + req.mut_prepare_merge() + .set_target(merge.get_target().to_owned()); + req + } +} diff --git a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs b/components/raftstore-v2/src/worker/pd/region_heartbeat.rs new file mode 100644 index 00000000000..ad0293d0b6d --- /dev/null +++ b/components/raftstore-v2/src/worker/pd/region_heartbeat.rs @@ -0,0 +1,256 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{ + metapb, pdpb, + raft_cmdpb::{ + AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, + SplitRequest, + }, + raft_serverpb::RaftMessage, + replication_modepb::{RegionReplicationStatus, StoreDrAutoSyncStatus}, +}; +use pd_client::{metrics::PD_HEARTBEAT_COUNTER_VEC, PdClient, RegionStat}; +use raft::eraftpb::ConfChangeType; +use slog::{debug, error, info}; +use tikv_util::{store::QueryStats, time::UnixSecs}; + +use super::{requests::*, Runner}; + +pub struct RegionHeartbeatTask { + pub term: u64, + pub region: metapb::Region, + pub peer: metapb::Peer, + pub down_peers: Vec, + pub pending_peers: Vec, + pub written_bytes: u64, + pub written_keys: u64, + pub approximate_size: Option, + pub approximate_keys: Option, + pub wait_data_peers: Vec, + // TODO: RegionReplicationStatus +} + +#[derive(Default)] +pub struct PeerStat { + pub read_bytes: u64, + pub read_keys: u64, + pub query_stats: QueryStats, + // last_region_report_attributes records the state of the last region heartbeat + pub last_region_report_read_bytes: u64, + pub last_region_report_read_keys: u64, + pub last_region_report_query_stats: QueryStats, + pub last_region_report_written_bytes: u64, + pub last_region_report_written_keys: u64, + pub last_region_report_ts: UnixSecs, + // last_store_report_attributes records the state of the last store heartbeat + pub last_store_report_read_bytes: u64, + pub last_store_report_read_keys: u64, + pub last_store_report_query_stats: QueryStats, + pub approximate_keys: u64, + pub approximate_size: u64, +} + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + pub fn handle_region_heartbeat(&mut self, task: RegionHeartbeatTask) { + // HACK! In order to keep the compatible of protos, we use 0 to identify + // the size uninitialized regions, and use 1 to identify the empty regions. + // + // See tikv/tikv#11114 for details. + let approximate_size = match task.approximate_size { + Some(0) => 1, + Some(v) => v, + None => 0, // size uninitialized + }; + let approximate_keys = task.approximate_keys.unwrap_or_default(); + let region_id = task.region.get_id(); + + let peer_stat = self + .region_peers + .entry(region_id) + .or_insert_with(PeerStat::default); + peer_stat.approximate_size = approximate_size; + peer_stat.approximate_keys = approximate_keys; + + let read_bytes_delta = peer_stat.read_bytes - peer_stat.last_region_report_read_bytes; + let read_keys_delta = peer_stat.read_keys - peer_stat.last_region_report_read_keys; + let written_bytes_delta = task.written_bytes - peer_stat.last_region_report_written_bytes; + let written_keys_delta = task.written_keys - peer_stat.last_region_report_written_keys; + let query_stats = peer_stat + .query_stats + .sub_query_stats(&peer_stat.last_region_report_query_stats); + let mut last_report_ts = peer_stat.last_region_report_ts; + if last_report_ts.is_zero() { + last_report_ts = self.start_ts; + } + peer_stat.last_region_report_written_bytes = task.written_bytes; + peer_stat.last_region_report_written_keys = task.written_keys; + peer_stat.last_region_report_read_bytes = peer_stat.read_bytes; + peer_stat.last_region_report_read_keys = peer_stat.read_keys; + peer_stat.last_region_report_query_stats = peer_stat.query_stats.clone(); + let unix_secs_now = UnixSecs::now(); + peer_stat.last_region_report_ts = unix_secs_now; + + // Calculate the CPU usage since the last region heartbeat. + let cpu_usage = { + // Take out the region CPU record. + let cpu_time_duration = Duration::from_millis( + self.region_cpu_records.remove(®ion_id).unwrap_or(0) as u64, + ); + let interval_second = unix_secs_now.into_inner() - last_report_ts.into_inner(); + // Keep consistent with the calculation of cpu_usages in a store heartbeat. + // See components/tikv_util/src/metrics/threads_linux.rs for more details. + if interval_second > 0 { + ((cpu_time_duration.as_secs_f64() * 100.0) / interval_second as f64) as u64 + } else { + 0 + } + }; + + let region_stat = RegionStat { + down_peers: task.down_peers, + pending_peers: task.pending_peers, + written_bytes: written_bytes_delta, + written_keys: written_keys_delta, + read_bytes: read_bytes_delta, + read_keys: read_keys_delta, + query_stats: query_stats.0, + approximate_size, + approximate_keys, + last_report_ts, + cpu_usage, + }; + self.store_stat + .region_bytes_written + .observe(region_stat.written_bytes as f64); + self.store_stat + .region_keys_written + .observe(region_stat.written_keys as f64); + self.store_stat + .region_bytes_read + .observe(region_stat.read_bytes as f64); + self.store_stat + .region_keys_read + .observe(region_stat.read_keys as f64); + + let resp = self.pd_client.region_heartbeat( + task.term, + task.region.clone(), + task.peer, + region_stat, + None, + ); + let logger = self.logger.clone(); + let f = async move { + if let Err(e) = resp.await { + debug!( + logger, + "failed to send heartbeat"; + "region_id" => task.region.get_id(), + "err" => ?e + ); + } + }; + self.remote.spawn(f); + } + + pub fn maybe_schedule_heartbeat_receiver(&mut self) { + if self.is_hb_receiver_scheduled { + return; + } + let router = self.router.clone(); + let store_id = self.store_id; + let logger = self.logger.clone(); + + let fut = + self.pd_client + .handle_region_heartbeat_response(self.store_id, move |mut resp| { + let region_id = resp.get_region_id(); + let epoch = resp.take_region_epoch(); + let peer = resp.take_target_peer(); + + if resp.has_change_peer() { + PD_HEARTBEAT_COUNTER_VEC + .with_label_values(&["change peer"]) + .inc(); + + let mut change_peer = resp.take_change_peer(); + info!( + logger, + "try to change peer"; + "region_id" => region_id, + "change_type" => ?change_peer.get_change_type(), + "peer" => ?change_peer.get_peer() + ); + let req = new_change_peer_request( + change_peer.get_change_type(), + change_peer.take_peer(), + ); + send_admin_request(&logger, &router, region_id, epoch, peer, req); + } else if resp.has_change_peer_v2() { + PD_HEARTBEAT_COUNTER_VEC + .with_label_values(&["change peer"]) + .inc(); + + let mut change_peer_v2 = resp.take_change_peer_v2(); + info!( + logger, + "try to change peer"; + "region_id" => region_id, + "changes" => ?change_peer_v2.get_changes(), + ); + let req = new_change_peer_v2_request(change_peer_v2.take_changes().into()); + send_admin_request(&logger, &router, region_id, epoch, peer, req); + } else if resp.has_transfer_leader() { + PD_HEARTBEAT_COUNTER_VEC + .with_label_values(&["transfer leader"]) + .inc(); + + let mut transfer_leader = resp.take_transfer_leader(); + info!( + logger, + "try to transfer leader"; + "region_id" => region_id, + "from_peer" => ?peer, + "to_peer" => ?transfer_leader.get_peer(), + "to_peers" => ?transfer_leader.get_peers(), + ); + let req = new_transfer_leader_request( + transfer_leader.take_peer(), + transfer_leader.take_peers().into(), + ); + send_admin_request(&logger, &router, region_id, epoch, peer, req); + } else if resp.has_split_region() { + // TODO + info!(logger, "pd asks for split but ignored"); + } else if resp.has_merge() { + // TODO + info!(logger, "pd asks for merge but ignored"); + } else { + PD_HEARTBEAT_COUNTER_VEC.with_label_values(&["noop"]).inc(); + } + }); + let logger = self.logger.clone(); + let f = async move { + match fut.await { + Ok(_) => { + info!( + logger, + "region heartbeat response handler exit"; + "store_id" => store_id, + ); + } + Err(e) => panic!("unexpected error: {:?}", e), + } + }; + self.remote.spawn(f); + self.is_hb_receiver_scheduled = true; + } +} diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs new file mode 100644 index 00000000000..3cb85f6698c --- /dev/null +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -0,0 +1,99 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{ + metapb, pdpb, + raft_cmdpb::{AdminCmdType, AdminRequest, SplitRequest}, +}; +use pd_client::PdClient; +use slog::{info, warn}; + +use super::{requests::*, Runner}; + +fn new_batch_split_region_request( + split_keys: Vec>, + ids: Vec, + right_derive: bool, +) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::BatchSplit); + req.mut_splits().set_right_derive(right_derive); + let mut requests = Vec::with_capacity(ids.len()); + for (mut id, key) in ids.into_iter().zip(split_keys) { + let mut split = SplitRequest::default(); + split.set_split_key(key); + split.set_new_region_id(id.get_new_region_id()); + split.set_new_peer_ids(id.take_new_peer_ids()); + requests.push(split); + } + req.mut_splits().set_requests(requests.into()); + req +} + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + pub fn handle_ask_batch_split( + &mut self, + mut region: metapb::Region, + split_keys: Vec>, + peer: metapb::Peer, + right_derive: bool, + ) { + if split_keys.is_empty() { + info!(self.logger, "empty split key, skip ask batch split"; + "region_id" => region.get_id()); + return; + } + let resp = self + .pd_client + .ask_batch_split(region.clone(), split_keys.len()); + let router = self.router.clone(); + let logger = self.logger.clone(); + let f = async move { + match resp.await { + Ok(mut resp) => { + info!( + logger, + "try to batch split region"; + "region_id" => region.get_id(), + "new_region_ids" => ?resp.get_ids(), + "region" => ?region, + ); + + let req = new_batch_split_region_request( + split_keys, + resp.take_ids().into(), + right_derive, + ); + let region_id = region.get_id(); + let epoch = region.take_region_epoch(); + send_admin_request(&logger, &router, region_id, epoch, peer, req); + } + Err(e) => { + warn!( + logger, + "ask batch split failed"; + "region_id" => region.get_id(), + "err" => ?e, + ); + } + } + }; + self.remote.spawn(f); + } + + pub fn handle_report_batch_split(&mut self, regions: Vec) { + let resp = self.pd_client.report_batch_split(regions); + let logger = self.logger.clone(); + let f = async move { + if let Err(e) = resp.await { + warn!(logger, "report split failed"; "err" => ?e); + } + }; + self.remote.spawn(f); + } +} diff --git a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs new file mode 100644 index 00000000000..1caa96a5225 --- /dev/null +++ b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs @@ -0,0 +1,293 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::cmp; + +use collections::HashMap; +use engine_traits::{KvEngine, RaftEngine}; +use fail::fail_point; +use kvproto::pdpb; +use pd_client::{ + metrics::{ + REGION_READ_BYTES_HISTOGRAM, REGION_READ_KEYS_HISTOGRAM, REGION_WRITTEN_BYTES_HISTOGRAM, + REGION_WRITTEN_KEYS_HISTOGRAM, STORE_SIZE_GAUGE_VEC, + }, + PdClient, +}; +use prometheus::local::LocalHistogram; +use slog::{error, warn}; +use tikv_util::{metrics::RecordPairVec, store::QueryStats, time::UnixSecs, topn::TopN}; + +use super::Runner; + +const HOTSPOT_REPORT_CAPACITY: usize = 1000; + +fn hotspot_key_report_threshold() -> u64 { + const HOTSPOT_KEY_RATE_THRESHOLD: u64 = 128; + fail_point!("mock_hotspot_threshold", |_| { 0 }); + HOTSPOT_KEY_RATE_THRESHOLD * 10 +} + +fn hotspot_byte_report_threshold() -> u64 { + const HOTSPOT_BYTE_RATE_THRESHOLD: u64 = 8 * 1024; + fail_point!("mock_hotspot_threshold", |_| { 0 }); + HOTSPOT_BYTE_RATE_THRESHOLD * 10 +} + +fn hotspot_query_num_report_threshold() -> u64 { + const HOTSPOT_QUERY_RATE_THRESHOLD: u64 = 128; + fail_point!("mock_hotspot_threshold", |_| { 0 }); + HOTSPOT_QUERY_RATE_THRESHOLD * 10 +} + +pub struct StoreStat { + pub engine_total_bytes_read: u64, + pub engine_total_keys_read: u64, + pub engine_total_query_num: QueryStats, + pub engine_last_total_bytes_read: u64, + pub engine_last_total_keys_read: u64, + pub engine_last_query_num: QueryStats, + pub last_report_ts: UnixSecs, + + pub region_bytes_read: LocalHistogram, + pub region_keys_read: LocalHistogram, + pub region_bytes_written: LocalHistogram, + pub region_keys_written: LocalHistogram, + + pub store_cpu_usages: RecordPairVec, + pub store_read_io_rates: RecordPairVec, + pub store_write_io_rates: RecordPairVec, +} + +impl Default for StoreStat { + fn default() -> StoreStat { + StoreStat { + region_bytes_read: REGION_READ_BYTES_HISTOGRAM.local(), + region_keys_read: REGION_READ_KEYS_HISTOGRAM.local(), + region_bytes_written: REGION_WRITTEN_BYTES_HISTOGRAM.local(), + region_keys_written: REGION_WRITTEN_KEYS_HISTOGRAM.local(), + + last_report_ts: UnixSecs::zero(), + engine_total_bytes_read: 0, + engine_total_keys_read: 0, + engine_last_total_bytes_read: 0, + engine_last_total_keys_read: 0, + engine_total_query_num: QueryStats::default(), + engine_last_query_num: QueryStats::default(), + + store_cpu_usages: RecordPairVec::default(), + store_read_io_rates: RecordPairVec::default(), + store_write_io_rates: RecordPairVec::default(), + } + } +} + +#[derive(Default, Clone)] +struct PeerCmpReadStat { + pub region_id: u64, + pub report_stat: u64, +} + +impl Ord for PeerCmpReadStat { + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.report_stat.cmp(&other.report_stat) + } +} + +impl Eq for PeerCmpReadStat {} + +impl PartialEq for PeerCmpReadStat { + fn eq(&self, other: &Self) -> bool { + self.report_stat == other.report_stat + } +} + +impl PartialOrd for PeerCmpReadStat { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.report_stat.cmp(&other.report_stat)) + } +} + +fn collect_report_read_peer_stats( + capacity: usize, + mut report_read_stats: HashMap, + mut stats: pdpb::StoreStats, +) -> pdpb::StoreStats { + if report_read_stats.len() < capacity * 3 { + for (_, read_stat) in report_read_stats { + stats.peer_stats.push(read_stat); + } + return stats; + } + let mut keys_topn_report = TopN::new(capacity); + let mut bytes_topn_report = TopN::new(capacity); + let mut stats_topn_report = TopN::new(capacity); + for read_stat in report_read_stats.values() { + let mut cmp_stat = PeerCmpReadStat::default(); + cmp_stat.region_id = read_stat.region_id; + let mut key_cmp_stat = cmp_stat.clone(); + key_cmp_stat.report_stat = read_stat.read_keys; + keys_topn_report.push(key_cmp_stat); + let mut byte_cmp_stat = cmp_stat.clone(); + byte_cmp_stat.report_stat = read_stat.read_bytes; + bytes_topn_report.push(byte_cmp_stat); + let mut query_cmp_stat = cmp_stat.clone(); + query_cmp_stat.report_stat = get_read_query_num(read_stat.get_query_stats()); + stats_topn_report.push(query_cmp_stat); + } + + for x in keys_topn_report { + if let Some(report_stat) = report_read_stats.remove(&x.region_id) { + stats.peer_stats.push(report_stat); + } + } + + for x in bytes_topn_report { + if let Some(report_stat) = report_read_stats.remove(&x.region_id) { + stats.peer_stats.push(report_stat); + } + } + + for x in stats_topn_report { + if let Some(report_stat) = report_read_stats.remove(&x.region_id) { + stats.peer_stats.push(report_stat); + } + } + stats +} + +fn get_read_query_num(stat: &pdpb::QueryStats) -> u64 { + stat.get_get() + stat.get_coprocessor() + stat.get_scan() +} + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + pub fn handle_store_heartbeat(&mut self, mut stats: pdpb::StoreStats) { + let mut report_peers = HashMap::default(); + for (region_id, region_peer) in &mut self.region_peers { + let read_bytes = region_peer.read_bytes - region_peer.last_store_report_read_bytes; + let read_keys = region_peer.read_keys - region_peer.last_store_report_read_keys; + let query_stats = region_peer + .query_stats + .sub_query_stats(®ion_peer.last_store_report_query_stats); + region_peer.last_store_report_read_bytes = region_peer.read_bytes; + region_peer.last_store_report_read_keys = region_peer.read_keys; + region_peer + .last_store_report_query_stats + .fill_query_stats(®ion_peer.query_stats); + if read_bytes < hotspot_byte_report_threshold() + && read_keys < hotspot_key_report_threshold() + && query_stats.get_read_query_num() < hotspot_query_num_report_threshold() + { + continue; + } + let mut read_stat = pdpb::PeerStat::default(); + read_stat.set_region_id(*region_id); + read_stat.set_read_keys(read_keys); + read_stat.set_read_bytes(read_bytes); + read_stat.set_query_stats(query_stats.0); + report_peers.insert(*region_id, read_stat); + } + + stats = collect_report_read_peer_stats(HOTSPOT_REPORT_CAPACITY, report_peers, stats); + let (capacity, used_size, available) = self.collect_engine_size().unwrap_or_default(); + if available == 0 { + warn!(self.logger, "no available space"); + } + + stats.set_capacity(capacity); + stats.set_used_size(used_size); + stats.set_available(available); + stats.set_bytes_read( + self.store_stat.engine_total_bytes_read - self.store_stat.engine_last_total_bytes_read, + ); + stats.set_keys_read( + self.store_stat.engine_total_keys_read - self.store_stat.engine_last_total_keys_read, + ); + + self.store_stat + .engine_total_query_num + .add_query_stats(stats.get_query_stats()); // add write query stat + let res = self + .store_stat + .engine_total_query_num + .sub_query_stats(&self.store_stat.engine_last_query_num); + stats.set_query_stats(res.0); + + stats.set_cpu_usages(self.store_stat.store_cpu_usages.clone().into()); + stats.set_read_io_rates(self.store_stat.store_read_io_rates.clone().into()); + stats.set_write_io_rates(self.store_stat.store_write_io_rates.clone().into()); + + let mut interval = pdpb::TimeInterval::default(); + interval.set_start_timestamp(self.store_stat.last_report_ts.into_inner()); + stats.set_interval(interval); + self.store_stat.engine_last_total_bytes_read = self.store_stat.engine_total_bytes_read; + self.store_stat.engine_last_total_keys_read = self.store_stat.engine_total_keys_read; + self.store_stat + .engine_last_query_num + .fill_query_stats(&self.store_stat.engine_total_query_num); + self.store_stat.last_report_ts = UnixSecs::now(); + self.store_stat.region_bytes_written.flush(); + self.store_stat.region_keys_written.flush(); + self.store_stat.region_bytes_read.flush(); + self.store_stat.region_keys_read.flush(); + + STORE_SIZE_GAUGE_VEC + .with_label_values(&["capacity"]) + .set(capacity as i64); + STORE_SIZE_GAUGE_VEC + .with_label_values(&["available"]) + .set(available as i64); + STORE_SIZE_GAUGE_VEC + .with_label_values(&["used"]) + .set(used_size as i64); + + // TODO: slow score + + let router = self.router.clone(); + let resp = self.pd_client.store_heartbeat(stats, None, None); + let logger = self.logger.clone(); + let f = async move { + if let Err(e) = resp.await { + error!(logger, "store heartbeat failed"; "err" => ?e); + } + }; + self.remote.spawn(f); + } + + /// Returns (capacity, used, available). + fn collect_engine_size(&self) -> Option<(u64, u64, u64)> { + let disk_stats = match fs2::statvfs(self.tablet_factory.tablets_path()) { + Err(e) => { + error!( + self.logger, + "get disk stat for rocksdb failed"; + "engine_path" => self.tablet_factory.tablets_path().display(), + "err" => ?e + ); + return None; + } + Ok(stats) => stats, + }; + let disk_cap = disk_stats.total_space(); + // TODO: custom capacity. + let capacity = disk_cap; + // TODO: accurate snapshot size and kv engines size. + let snap_size = 0; + let kv_size = 0; + let used_size = snap_size + + kv_size + + self + .raft_engine + .get_engine_size() + .expect("raft engine used size"); + let mut available = capacity.checked_sub(used_size).unwrap_or_default(); + // We only care about rocksdb SST file size, so we should check disk available + // here. + available = cmp::min(available, disk_stats.available_space()); + Some((capacity, used_size, available)) + } +} diff --git a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs b/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs new file mode 100644 index 00000000000..cbfecb8171d --- /dev/null +++ b/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs @@ -0,0 +1,114 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{atomic::Ordering, Arc}, + time::{Duration, Instant}, +}; + +use causal_ts::CausalTsProvider; +use engine_traits::{KvEngine, RaftEngine}; +use fail::fail_point; +use futures::{compat::Future01CompatExt, FutureExt}; +use pd_client::PdClient; +use raftstore::{store::TxnExt, Result}; +use slog::{info, warn}; +use tikv_util::{box_err, timer::GLOBAL_TIMER_HANDLE}; +use txn_types::TimeStamp; + +use super::Runner; + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + pub fn handle_update_max_timestamp( + &mut self, + region_id: u64, + initial_status: u64, + txn_ext: Arc, + ) { + let pd_client = self.pd_client.clone(); + let concurrency_manager = self.concurrency_manager.clone(); + let causal_ts_provider = self.causal_ts_provider.clone(); + let logger = self.logger.clone(); + let shutdown = self.shutdown.clone(); + + let f = async move { + let mut success = false; + while txn_ext.max_ts_sync_status.load(Ordering::SeqCst) == initial_status + && !shutdown.load(Ordering::Relaxed) + { + // On leader transfer / region merge, RawKV API v2 need to + // invoke causal_ts_provider.flush() to renew + // cached TSO, to ensure that the next TSO + // returned by causal_ts_provider.get_ts() on current + // store must be larger than the store where the leader is on + // before. + // + // And it won't break correctness of transaction commands, as + // causal_ts_provider.flush() is implemented as + // pd_client.get_tso() + renew TSO cached. + let res: Result = if let Some(causal_ts_provider) = &causal_ts_provider { + causal_ts_provider + .async_flush() + .await + .map_err(|e| box_err!(e)) + } else { + pd_client.get_tso().await.map_err(Into::into) + }; + + match res { + Ok(ts) => { + concurrency_manager.update_max_ts(ts); + success = txn_ext + .max_ts_sync_status + .compare_exchange( + initial_status, + initial_status | 1, + Ordering::SeqCst, + Ordering::SeqCst, + ) + .is_ok(); + break; + } + Err(e) => { + warn!( + logger, + "failed to update max timestamp for region {}: {:?}", region_id, e + ); + } + } + } + + if success { + info!(logger, "succeed to update max timestamp"; "region_id" => region_id); + } else { + info!( + logger, + "updating max timestamp is stale"; + "region_id" => region_id, + "initial_status" => initial_status, + ); + } + }; + + #[cfg(feature = "failpoints")] + let delay = (|| { + fail_point!("delay_update_max_ts", |_| true); + false + })(); + #[cfg(not(feature = "failpoints"))] + let delay = false; + + if delay { + info!(self.logger, "[failpoint] delay update max ts for 1s"; "region_id" => region_id); + let deadline = Instant::now() + Duration::from_secs(1); + self.remote + .spawn(GLOBAL_TIMER_HANDLE.delay(deadline).compat().then(|_| f)); + } else { + self.remote.spawn(f); + } + } +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index d99c982fc97..ef1f7411ac9 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -11,7 +11,9 @@ use std::{ time::{Duration, Instant}, }; +use causal_ts::CausalTsProviderImpl; use collections::HashSet; +use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{self, Receiver, Sender, TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, @@ -189,10 +191,12 @@ pub struct RunningState { impl RunningState { fn new( - pd_client: &RpcClient, + pd_client: &Arc, path: &Path, cfg: Arc>, transport: TestTransport, + concurrency_manager: ConcurrencyManager, + causal_ts_provider: Option>, logger: &Logger, ) -> (TestRouter, Self) { let cf_opts = ALL_CFS @@ -208,7 +212,7 @@ impl RunningState { let raft_engine = engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) .unwrap(); - let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client, logger.clone()); + let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client.as_ref(), logger.clone()); let store_id = bootstrap.bootstrap_store().unwrap(); let mut store = Store::default(); store.set_id(store_id); @@ -243,9 +247,12 @@ impl RunningState { raft_engine.clone(), factory.clone(), transport.clone(), + pd_client.clone(), router.store_router(), store_meta.clone(), snap_mgr, + concurrency_manager, + causal_ts_provider, ) .unwrap(); @@ -269,7 +276,7 @@ impl Drop for RunningState { } pub struct TestNode { - pd_client: RpcClient, + pd_client: Arc, path: TempDir, running_state: Option, logger: Logger, @@ -277,7 +284,7 @@ pub struct TestNode { impl TestNode { fn with_pd(pd_server: &test_pd::Server, logger: Logger) -> TestNode { - let pd_client = test_pd::util::new_client(pd_server.bind_addrs(), None); + let pd_client = Arc::new(test_pd::util::new_client(pd_server.bind_addrs(), None)); let path = TempDir::new().unwrap(); TestNode { @@ -289,8 +296,15 @@ impl TestNode { } fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { - let (router, state) = - RunningState::new(&self.pd_client, self.path.path(), cfg, trans, &self.logger); + let (router, state) = RunningState::new( + &self.pd_client, + self.path.path(), + cfg, + trans, + ConcurrencyManager::new(1.into()), + None, + &self.logger, + ); self.running_state = Some(state); router } @@ -299,6 +313,10 @@ impl TestNode { &self.running_state().unwrap().factory } + pub fn pd_client(&self) -> &Arc { + &self.pd_client + } + fn stop(&mut self) { if let Some(state) = std::mem::take(&mut self.running_state) { let mut meta = state.store_meta.lock().unwrap(); diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index 4fb9ebcc323..52c8ba5e1f8 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -11,6 +11,7 @@ mod cluster; mod test_basic_write; mod test_conf_change; mod test_life; +mod test_pd_heartbeat; mod test_read; mod test_split; mod test_status; diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs new file mode 100644 index 00000000000..c22ef4908bf --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -0,0 +1,60 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use futures::executor::block_on; +use kvproto::raft_cmdpb::{RaftCmdRequest, StatusCmdType}; +use pd_client::PdClient; +use tikv_util::store::new_peer; + +use crate::cluster::Cluster; + +#[test] +fn test_region_heartbeat() { + let region_id = 2; + let cluster = Cluster::with_node_count(1, None); + let router = cluster.router(0); + + // When there is only one peer, it should campaign immediately. + let mut req = RaftCmdRequest::default(); + req.mut_header().set_peer(new_peer(1, 3)); + req.mut_status_request() + .set_cmd_type(StatusCmdType::RegionLeader); + let res = router.query(region_id, req.clone()).unwrap(); + let status_resp = res.response().unwrap().get_status_response(); + assert_eq!( + *status_resp.get_region_leader().get_leader(), + new_peer(1, 3) + ); + + for _ in 0..5 { + let resp = block_on( + cluster + .node(0) + .pd_client() + .get_region_leader_by_id(region_id), + ) + .unwrap(); + if let Some((region, peer)) = resp { + assert_eq!(region.get_id(), region_id); + assert_eq!(peer.get_id(), 3); + assert_eq!(peer.get_store_id(), 1); + return; + } + std::thread::sleep(std::time::Duration::from_millis(50)); + } + panic!("failed to get region leader"); +} + +#[test] +fn test_store_heartbeat() { + let cluster = Cluster::with_node_count(1, None); + let store_id = cluster.node(0).id(); + for _ in 0..5 { + let stats = block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); + if stats.get_start_time() > 0 { + assert_ne!(stats.get_capacity(), 0); + return; + } + std::thread::sleep(std::time::Duration::from_millis(50)); + } + panic!("failed to get store stats"); +} diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index 97487a5d0c2..336a9c9d038 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -39,7 +39,7 @@ fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { router.send(region_id, msg).unwrap(); block_on(sub.result()).unwrap(); - // todo: when persistent implementation is ready, we can use tablet index of + // TODO: when persistent implementation is ready, we can use tablet index of // the parent to check whether the split is done. Now, just sleep a second. thread::sleep(Duration::from_secs(1)); } diff --git a/components/test_pd/src/mocker/service.rs b/components/test_pd/src/mocker/service.rs index 2ff5c178c67..45dd6e5661d 100644 --- a/components/test_pd/src/mocker/service.rs +++ b/components/test_pd/src/mocker/service.rs @@ -19,7 +19,7 @@ pub struct Service { id_allocator: AtomicUsize, members_resp: Mutex>, is_bootstrapped: AtomicBool, - stores: Mutex>, + stores: Mutex>, regions: Mutex>, leaders: Mutex>, feature_gate: Mutex, @@ -47,7 +47,10 @@ impl Service { /// Add an arbitrary store. pub fn add_store(&self, store: Store) { let store_id = store.get_id(); - self.stores.lock().unwrap().insert(store_id, store); + self.stores + .lock() + .unwrap() + .insert(store_id, (store, StoreStats::new())); } pub fn set_cluster_version(&self, version: String) { @@ -107,7 +110,7 @@ impl PdMocker for Service { self.stores .lock() .unwrap() - .insert(store.get_id(), store.clone()); + .insert(store.get_id(), (store.clone(), StoreStats::new())); self.regions .lock() .unwrap() @@ -138,9 +141,10 @@ impl PdMocker for Service { let mut resp = GetStoreResponse::default(); let stores = self.stores.lock().unwrap(); match stores.get(&req.get_store_id()) { - Some(store) => { + Some((store, stats)) => { resp.set_header(Service::header()); resp.set_store(store.clone()); + resp.set_stats(stats.clone()); Some(Ok(resp)) } None => { @@ -160,7 +164,7 @@ impl PdMocker for Service { resp.set_header(Service::header()); let exclude_tombstone = req.get_exclude_tombstone_stores(); let stores = self.stores.lock().unwrap(); - for store in stores.values() { + for (store, _) in stores.values() { if exclude_tombstone && store.get_state() == StoreState::Tombstone { continue; } @@ -244,11 +248,22 @@ impl PdMocker for Service { Some(Ok(resp)) } - fn store_heartbeat(&self, _: &StoreHeartbeatRequest) -> Option> { + fn store_heartbeat( + &self, + req: &StoreHeartbeatRequest, + ) -> Option> { let mut resp = StoreHeartbeatResponse::default(); let header = Service::header(); resp.set_header(header); resp.set_cluster_version(self.feature_gate.lock().unwrap().to_owned()); + if let Some((_, stats)) = self + .stores + .lock() + .unwrap() + .get_mut(&req.get_stats().get_store_id()) + { + *stats = req.get_stats().clone(); + } Some(Ok(resp)) } From dc347f98ae70e8f829a55d4df1f002b4ad17e72d Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Thu, 24 Nov 2022 12:43:58 +0800 Subject: [PATCH 0359/1149] scheduler: Support keeping some latches for another command when releasing latches. (#13833) ref tikv/tikv#13298, ref tikv/tikv#13826 Makes latches support keeping some of the latch slots when releasing, and these kept latch slots can be derived by another scheduler command. This is necessary for supporting resuming pessimistic lock requests when releasing pessimistic locks. It provides a mechanism to avoid the latch from being acquired by other later-arriving requests at the time between the lock-releasing's end and the resumed pessimistic lock command's beginning. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- src/storage/txn/latch.rs | 230 +++++++++++++++++++++++++++++++++-- src/storage/txn/scheduler.rs | 4 +- 2 files changed, 224 insertions(+), 10 deletions(-) diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index 12cc51207bb..a662d9bab79 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -79,6 +79,11 @@ impl Latch { self.waiting.push_back(Some((key_hash, cid))); } + /// Pushes the cid to the front of the queue. Be careful when using it. + fn push_preemptive(&mut self, key_hash: u64, cid: u64) { + self.waiting.push_front(Some((key_hash, cid))); + } + /// For some hot keys, the waiting list maybe very long, so we should shrink /// the waiting VecDeque after pop. fn maybe_shrink(&mut self) { @@ -137,6 +142,12 @@ impl Lock { self.required_hashes.len() == self.owned_count } + /// Force set the state of the `Lock` to be already-acquired. Be careful + /// when using it. + pub fn force_assume_acquired(&mut self) { + self.owned_count = self.required_hashes.len(); + } + pub fn is_write_lock(&self) -> bool { !self.required_hashes.is_empty() } @@ -196,19 +207,62 @@ impl Latches { /// Releases all latches owned by the `lock` of command with ID `who`, /// returns the wakeup list. /// + /// Optionally, this function can release partial of the given `Lock` while + /// leaving the renaming unlocked, so that some of the latches can be + /// used in another command. This can be done by passing the cid of the + /// command who will use the kept latch slots later, and the `Lock` that + /// need to be kept via the parameter `keep_latches_for_next_cmd`. Note + /// that the lock in it is assumed to be a subset of the parameter + /// `lock` which is going to be released. + /// /// Preconditions: the caller must ensure the command is at the front of the /// latches. - pub fn release(&self, lock: &Lock, who: u64) -> Vec { + pub fn release( + &self, + lock: &Lock, + who: u64, + keep_latches_for_next_cmd: Option<(u64, &Lock)>, + ) -> Vec { + // Used to + let dummy_vec = vec![]; + let (keep_latches_for_cid, mut keep_latches_it) = match keep_latches_for_next_cmd { + Some((cid, lock)) => (Some(cid), lock.required_hashes.iter().peekable()), + None => (None, dummy_vec.iter().peekable()), + }; + + // `keep_latches_it` must be sorted and deduped since it's retrieved from a + // `Lock` object. + let mut wakeup_list: Vec = vec![]; for &key_hash in &lock.required_hashes[..lock.owned_count] { let mut latch = self.lock_latch(key_hash); let (v, front) = latch.pop_front(key_hash).unwrap(); assert_eq!(front, who); assert_eq!(v, key_hash); - if let Some(wakeup) = latch.get_first_req_by_hash(key_hash) { - wakeup_list.push(wakeup); + + let keep_for_next_cmd = if let Some(&&next_keep_hash) = keep_latches_it.peek() { + assert!(next_keep_hash >= key_hash); + if next_keep_hash == key_hash { + keep_latches_it.next(); + true + } else { + false + } + } else { + false + }; + + if !keep_for_next_cmd { + if let Some(wakeup) = latch.get_first_req_by_hash(key_hash) { + wakeup_list.push(wakeup); + } + } else { + latch.push_preemptive(key_hash, keep_latches_for_cid.unwrap()); } } + + assert!(keep_latches_it.next().is_none()); + wakeup_list } @@ -220,6 +274,8 @@ impl Latches { #[cfg(test)] mod tests { + use std::iter::once; + use super::*; #[test] @@ -242,7 +298,7 @@ mod tests { assert_eq!(acquired_b, false); // a release lock, and get wakeup list - let wakeup = latches.release(&lock_a, cid_a); + let wakeup = latches.release(&lock_a, cid_a, None); assert_eq!(wakeup[0], cid_b); // b acquire lock success @@ -277,7 +333,7 @@ mod tests { assert_eq!(acquired_c, false); // a release lock, and get wakeup list - let wakeup = latches.release(&lock_a, cid_a); + let wakeup = latches.release(&lock_a, cid_a, None); assert_eq!(wakeup[0], cid_c); // c acquire lock failed again, cause b occupied slot 4 @@ -285,7 +341,7 @@ mod tests { assert_eq!(acquired_c, false); // b release lock, and get wakeup list - let wakeup = latches.release(&lock_b, cid_b); + let wakeup = latches.release(&lock_b, cid_b, None); assert_eq!(wakeup[0], cid_c); // finally c acquire lock success @@ -326,7 +382,7 @@ mod tests { assert_eq!(acquired_d, false); // a release lock, and get wakeup list - let wakeup = latches.release(&lock_a, cid_a); + let wakeup = latches.release(&lock_a, cid_a, None); assert_eq!(wakeup[0], cid_c); // c acquire lock success @@ -334,11 +390,169 @@ mod tests { assert_eq!(acquired_c, true); // b release lock, and get wakeup list - let wakeup = latches.release(&lock_b, cid_b); + let wakeup = latches.release(&lock_b, cid_b, None); assert_eq!(wakeup[0], cid_d); // finally d acquire lock success acquired_d = latches.acquire(&mut lock_d, cid_d); assert_eq!(acquired_d, true); } + + fn check_latch_holder(latches: &Latches, key: &[u8], expected_holder_cid: Option) { + let hash = Lock::hash(&key); + let actual_holder = latches.lock_latch(hash).get_first_req_by_hash(hash); + assert_eq!(actual_holder, expected_holder_cid); + } + + fn is_latches_empty(latches: &Latches) -> bool { + for i in 0..(latches.size as u64) { + if !latches.lock_latch(i).waiting.iter().all(|x| x.is_none()) { + return false; + } + } + true + } + + fn test_partially_releasing_impl(size: usize) { + let latches = Latches::new(size); + + // Single key. + let key = b"k1"; + let mut lock = Lock::new(once(key)); + assert!(latches.acquire(&mut lock, 1)); + assert!(!is_latches_empty(&latches)); + let mut lock2 = Lock::new(once(key)); + let wakeup = latches.release(&lock, 1, Some((2, &lock2))); + assert!(wakeup.is_empty()); + check_latch_holder(&latches, key, Some(2)); + lock2.force_assume_acquired(); + let wakeup = latches.release(&lock2, 2, None); + assert!(wakeup.is_empty()); + assert!(is_latches_empty(&latches)); + + // Single key with queueing commands. + let mut lock = Lock::new(once(key)); + let mut queueing_lock = Lock::new(once(key)); + assert!(latches.acquire(&mut lock, 3)); + assert!(!latches.acquire(&mut queueing_lock, 4)); + let mut lock2 = Lock::new(once(key)); + let wakeup = latches.release(&lock, 3, Some((5, &lock2))); + assert!(wakeup.is_empty()); + check_latch_holder(&latches, key, Some(5)); + lock2.force_assume_acquired(); + let wakeup = latches.release(&lock2, 5, None); + assert_eq!(wakeup, vec![4u64]); + assert!(latches.acquire(&mut queueing_lock, 4)); + let wakeup = latches.release(&queueing_lock, 4, None); + assert!(wakeup.is_empty()); + assert!(is_latches_empty(&latches)); + + // Multi keys, keep all. + let keys = vec![b"k1", b"k2", b"k3", b"k4"]; + let mut lock = Lock::new(keys.iter()); + assert!(latches.acquire(&mut lock, 11)); + let mut lock2 = Lock::new(keys.iter()); + let wakeup = latches.release(&lock, 11, Some((12, &lock2))); + assert!(wakeup.is_empty()); + for &key in &keys { + check_latch_holder(&latches, key, Some(12)); + } + assert!(!is_latches_empty(&latches)); + lock2.force_assume_acquired(); + let wakeup = latches.release(&lock2, 12, None); + assert!(wakeup.is_empty()); + assert!(is_latches_empty(&latches)); + + // Multi keys, keep all, with queueing command. + let mut lock = Lock::new(keys.iter()); + assert!(latches.acquire(&mut lock, 11)); + let mut queueing_locks: Vec<_> = keys.iter().map(|k| Lock::new(once(k))).collect(); + for (cid, lock) in (12..16).zip(queueing_locks.iter_mut()) { + assert!(!latches.acquire(lock, cid)); + } + let mut lock2 = Lock::new(keys.iter()); + let wakeup = latches.release(&lock, 11, Some((17, &lock2))); + assert!(wakeup.is_empty()); + for &key in &keys { + check_latch_holder(&latches, key, Some(17)); + } + assert!(!is_latches_empty(&latches)); + lock2.force_assume_acquired(); + let mut wakeup = latches.release(&lock2, 17, None); + wakeup.sort_unstable(); + // Wake up queueing commands. + assert_eq!(wakeup, vec![12u64, 13, 14, 15]); + for (cid, mut lock) in (12..16).zip(queueing_locks) { + assert!(latches.acquire(&mut lock, cid)); + let wakeup = latches.release(&lock, cid, None); + assert!(wakeup.is_empty()); + } + assert!(is_latches_empty(&latches)); + + // 4 keys, keep 2 of them. + for (i1, &k1) in keys[0..3].iter().enumerate() { + for &k2 in keys[i1 + 1..4].iter() { + let mut lock = Lock::new(keys.iter()); + assert!(latches.acquire(&mut lock, 21)); + let mut lock2 = Lock::new(vec![k1, k2]); + let wakeup = latches.release(&lock, 21, Some((22, &lock2))); + assert!(wakeup.is_empty()); + check_latch_holder(&latches, k1, Some(22)); + check_latch_holder(&latches, k2, Some(22)); + lock2.force_assume_acquired(); + let wakeup = latches.release(&lock2, 22, None); + assert!(wakeup.is_empty()); + assert!(is_latches_empty(&latches)); + } + } + + // 4 keys keep 2 of them, with queueing commands. + for (i1, &k1) in keys[0..3].iter().enumerate() { + for (i2, &k2) in keys[i1 + 1..4].iter().enumerate() { + let mut lock = Lock::new(keys.iter()); + assert!(latches.acquire(&mut lock, 21)); + + let mut queueing_locks: Vec<_> = keys.iter().map(|k| Lock::new(once(k))).collect(); + for (cid, lock) in (22..26).zip(queueing_locks.iter_mut()) { + assert!(!latches.acquire(lock, cid)); + } + + let mut lock2 = Lock::new(vec![k1, k2]); + let mut wakeup = latches.release(&lock, 21, Some((27, &lock2))); + assert_eq!(wakeup.len(), 2); + + // The latch of k1 and k2 is preempted, and queueing locks on the other two keys + // will be woken up. + let preempted_cids = vec![(i1 + 22) as u64, (i1 + 1 + i2 + 22) as u64]; + let expected_wakeup_cids: Vec<_> = (22..26u64) + .filter(|x| !preempted_cids.contains(x)) + .collect(); + wakeup.sort_unstable(); + assert_eq!(wakeup, expected_wakeup_cids); + + check_latch_holder(&latches, k1, Some(27)); + check_latch_holder(&latches, k2, Some(27)); + + lock2.force_assume_acquired(); + let mut wakeup = latches.release(&lock2, 27, None); + wakeup.sort_unstable(); + assert_eq!(wakeup, preempted_cids); + + for (cid, mut lock) in (22..26).zip(queueing_locks) { + assert!(latches.acquire(&mut lock, cid)); + let wakeup = latches.release(&lock, cid, None); + assert!(wakeup.is_empty()); + } + + assert!(is_latches_empty(&latches)); + } + } + } + + #[test] + fn test_partially_releasing() { + test_partially_releasing_impl(256); + test_partially_releasing_impl(4); + test_partially_releasing_impl(2); + } } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 24ef7466e63..bc1598d65fa 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -449,7 +449,7 @@ impl Scheduler { /// Releases all the latches held by a command. fn release_lock(&self, lock: &Lock, cid: u64) { - let wakeup_list = self.inner.latches.release(lock, cid); + let wakeup_list = self.inner.latches.release(lock, cid, None); for wcid in wakeup_list { self.try_to_wake_up(wcid); } @@ -1660,7 +1660,7 @@ mod tests { if id != 0 { assert!(latches.acquire(&mut lock, id)); } - let unlocked = latches.release(&lock, id); + let unlocked = latches.release(&lock, id, None); if id == max_id { assert!(unlocked.is_empty()); } else { From f7ba20232fa232329e55eac00898042bf0560721 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 25 Nov 2022 10:21:59 +0800 Subject: [PATCH 0360/1149] reader: fix the invisible write record flashback bug (#13845) close tikv/tikv#13844 Fix the bug that when a key's last write record is `WriteType::Lock` or `WriteType::Rollback`, the `scan_writes` will return the invisible write record as the result, which will affect the flashback correctness. Signed-off-by: JmPotato --- src/storage/mvcc/reader/reader.rs | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index dd6bff6a157..c7cb9194068 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -638,13 +638,18 @@ impl MvccReader { break; } WriteType::Lock | WriteType::Rollback => { - // We should find the latest visible version after it. + // Only return the PUT/DELETE write record. + write = None; + // Reach the end. + if !cursor.valid()? { + break; + } + // Try to find the latest visible version before it. let key = Key::from_encoded_slice(cursor.key(&mut self.statistics.write)); // Could not find the visible version, current cursor is on the next - // key, so we set both `write` and `cur_key` to `None`. + // key, so we set `cur_key` to `None`. if key.truncate_ts()? != user_key { - write = None; cur_key = None; break; } @@ -1836,6 +1841,13 @@ pub mod tests { 8, ); engine.commit(b"k3", 8, 9); + // Prewrite and rollback k4. + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k4"), b"v4@1".to_vec()), + b"k4", + 10, + ); + engine.rollback(b"k4", 10); // Current MVCC keys in `CF_WRITE` should be: // PUT k0 -> v0@999 @@ -1847,6 +1859,7 @@ pub mod tests { // PUT k3 -> v3@8 // ROLLBACK k3 -> v3@7 // PUT k3 -> v3@5 + // ROLLBACK k4 -> v4@1 struct Case { start_key: Option, @@ -2083,15 +2096,25 @@ pub mod tests { start_key: None, end_key: None, version: Some(0), - limit: 5, + limit: 6, expect_res: vec![ (Key::from_raw(b"k0"), None), (Key::from_raw(b"k1"), None), (Key::from_raw(b"k2"), None), (Key::from_raw(b"k3"), None), + (Key::from_raw(b"k4"), None), ], expect_is_remain: false, }, + // Test the invisible record. + Case { + start_key: Some(Key::from_raw(b"k4")), + end_key: None, + version: Some(10), + limit: 1, + expect_res: vec![(Key::from_raw(b"k4"), None)], + expect_is_remain: true, + }, ]; for (idx, case) in cases.iter().enumerate() { From 998cb30d4d6087a40454a5787b56f0a151ae24e0 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 25 Nov 2022 14:47:59 +0800 Subject: [PATCH 0361/1149] raftstore-v2: support apply snapshot (#13734) ref tikv/tikv#12842 aftstore-v2: support apply snapshot This PR implementation apply snapshot relative changes. - add snap worker for multi-rocksdb Signed-off-by: bufferflies <1045931706@qq.com> --- components/engine_test/src/lib.rs | 7 +- components/raftstore-v2/src/batch/store.rs | 12 ++- .../src/operation/ready/async_writer.rs | 11 ++- .../raftstore-v2/src/operation/ready/mod.rs | 44 +++++++-- .../src/operation/ready/snapshot.rs | 97 ++++++++++++++++++- components/raftstore-v2/src/raft/storage.rs | 66 ++++++++++++- .../tests/integrations/cluster.rs | 47 +++++++-- .../tests/integrations/test_conf_change.rs | 40 +++++++- .../raftstore/src/store/async_io/read.rs | 2 +- .../raftstore/src/store/async_io/write.rs | 21 +++- .../raftstore/src/store/entry_storage.rs | 10 ++ components/raftstore/src/store/snap.rs | 9 +- src/server/tablet_snap.rs | 10 +- 13 files changed, 331 insertions(+), 45 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index ae834457757..77bd2d3be7c 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -128,12 +128,17 @@ pub mod kv { db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>, ) -> Self { - Self { + let factory = Self { root_path: root_path.to_path_buf(), db_opt, cf_opts, root_db: Arc::new(Mutex::default()), + }; + let tablet_path = factory.tablets_path(); + if !tablet_path.exists() { + std::fs::create_dir_all(tablet_path).unwrap(); } + factory } fn create_tablet(&self, tablet_path: &Path) -> Result { diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1eea2017571..199e8cafbd8 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -77,6 +77,7 @@ pub struct StoreContext { pub tablet_factory: Arc>, pub apply_pool: FuturePool, pub read_scheduler: Scheduler>, + pub snap_mgr: TabletSnapManager, pub pd_scheduler: Scheduler, } @@ -230,6 +231,7 @@ struct StorePollerBuilder { apply_pool: FuturePool, logger: Logger, store_meta: Arc>>, + snap_mgr: TabletSnapManager, } impl StorePollerBuilder { @@ -245,6 +247,7 @@ impl StorePollerBuilder { store_writers: &mut StoreWriters, logger: Logger, store_meta: Arc>>, + snap_mgr: TabletSnapManager, ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; let max_pool_size = std::cmp::max( @@ -269,6 +272,7 @@ impl StorePollerBuilder { logger, write_senders: store_writers.senders(), store_meta, + snap_mgr, } } @@ -341,6 +345,7 @@ where tablet_factory: self.tablet_factory.clone(), apply_pool: self.apply_pool.clone(), read_scheduler: self.read_scheduler.clone(), + snap_mgr: self.snap_mgr.clone(), pd_scheduler: self.pd_scheduler.clone(), }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); @@ -408,7 +413,7 @@ impl StoreSystem { .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; let mut read_runner = ReadRunner::new(router.clone(), raft_engine.clone()); - read_runner.set_snap_mgr(snap_mgr); + read_runner.set_snap_mgr(snap_mgr.clone()); let read_scheduler = workers .async_read_worker .start("async-read-worker", read_runner); @@ -441,6 +446,7 @@ impl StoreSystem { &mut workers.store_writers, self.logger.clone(), store_meta.clone(), + snap_mgr, ); self.workers = Some(workers); let peers = builder.init()?; @@ -512,7 +518,7 @@ impl StoreRouter { ) -> std::result::Result<(), TrySendError>> { let id = msg.get_region_id(); let peer_msg = PeerMsg::RaftMessage(msg); - let store_msg = match self.try_send(id, peer_msg) { + let store_msg = match self.router.try_send(id, peer_msg) { Either::Left(Ok(())) => return Ok(()), Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(m)))) => { return Err(TrySendError::Full(m)); @@ -523,7 +529,7 @@ impl StoreRouter { Either::Right(PeerMsg::RaftMessage(m)) => StoreMsg::RaftMessage(m), _ => unreachable!(), }; - match self.send_control(store_msg) { + match self.router.send_control(store_msg) { Ok(()) => Ok(()), Err(TrySendError::Full(StoreMsg::RaftMessage(m))) => Err(TrySendError::Full(m)), Err(TrySendError::Disconnected(StoreMsg::RaftMessage(m))) => { diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index d5673d76a40..a7bce44fe05 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -22,6 +22,7 @@ struct UnpersistedReady { /// Max number of following ready whose data to be persisted is empty. max_empty_number: u64, raft_msgs: Vec>, + has_snapshot: bool, } /// A writer that handles asynchronous writes. @@ -70,6 +71,7 @@ impl AsyncWriter { fn send(&mut self, ctx: &mut impl WriteRouterContext, task: WriteTask) { let ready_number = task.ready_number(); + let has_snapshot = task.has_snapshot; self.write_router.send_write_msg( ctx, self.unpersisted_readies.back().map(|r| r.number), @@ -79,6 +81,7 @@ impl AsyncWriter { number: ready_number, max_empty_number: ready_number, raft_msgs: vec![], + has_snapshot, }); } @@ -108,9 +111,9 @@ impl AsyncWriter { ctx: &mut impl WriteRouterContext, ready_number: u64, logger: &Logger, - ) -> Vec> { + ) -> (Vec>, bool) { if self.persisted_number >= ready_number { - return vec![]; + return (vec![], false); } let last_unpersisted = self.unpersisted_readies.back(); @@ -124,11 +127,13 @@ impl AsyncWriter { } let mut raft_messages = vec![]; + let mut has_snapshot = false; // There must be a match in `self.unpersisted_readies`. loop { let Some(v) = self.unpersisted_readies.pop_front() else { panic!("{:?} ready number not found {}", logger.list(), ready_number); }; + has_snapshot |= v.has_snapshot; if v.number > ready_number { panic!( "{:?} ready number not matched {:?} vs {}", @@ -151,7 +156,7 @@ impl AsyncWriter { self.write_router .check_new_persisted(ctx, self.persisted_number); - raft_messages + (raft_messages, has_snapshot) } pub fn persisted_number(&self) -> u64 { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 9e639f233cc..1c8c9d80338 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -22,9 +22,12 @@ mod snapshot; use std::{cmp, time::Instant}; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, MiscExt, OpenOptions, RaftEngine, TabletFactory}; use error_code::ErrorCodeExt; -use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; +use kvproto::{ + raft_cmdpb::AdminCmdType, + raft_serverpb::{PeerState, RaftMessage, RaftSnapshotData}, +}; use protobuf::Message as _; use raft::{eraftpb, Ready, StateRole, INVALID_ID}; use raftstore::store::{util, ExtraStates, FetchedLogs, ReadProgress, Transport, WriteTask}; @@ -40,6 +43,7 @@ use crate::{ fsm::PeerFsmDelegate, raft::{Peer, Storage}, router::{ApplyTask, PeerTick}, + Result, }; impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { @@ -334,7 +338,7 @@ impl Peer { let ready_number = ready.number(); let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); self.storage_mut() - .handle_raft_ready(&mut ready, &mut write_task); + .handle_raft_ready(ctx, &mut ready, &mut write_task); if !ready.persisted_messages().is_empty() { write_task.messages = ready .take_persisted_messages() @@ -388,17 +392,27 @@ impl Peer { error!(self.logger, "peer id not matched"; "persisted_peer_id" => peer_id, "persisted_number" => ready_number); return; } - let persisted_message = self - .async_writer - .on_persisted(ctx, ready_number, &self.logger); + let (persisted_message, has_snapshot) = + self.async_writer + .on_persisted(ctx, ready_number, &self.logger); for msgs in persisted_message { for msg in msgs { self.send_raft_message(ctx, msg); } } + let persisted_number = self.async_writer.persisted_number(); self.raft_group_mut().on_persist_ready(persisted_number); let persisted_index = self.raft_group().raft.raft_log.persisted; + /// The apply snapshot process order would be: + /// - Get the snapshot from the ready + /// - Wait for async writer to load this tablet + /// In this step, the snapshot has loaded finish, but some apply state + /// need to update. + if has_snapshot { + self.on_applied_snapshot(ctx); + } + self.storage_mut() .entry_storage_mut() .update_cache_persisted(persisted_index); @@ -509,11 +523,25 @@ impl Peer { impl Storage { /// Apply the ready to the storage. If there is any states need to be /// persisted, it will be written to `write_task`. - fn handle_raft_ready(&mut self, ready: &mut Ready, write_task: &mut WriteTask) { + fn handle_raft_ready( + &mut self, + ctx: &mut StoreContext, + ready: &mut Ready, + write_task: &mut WriteTask, + ) { let prev_raft_state = self.entry_storage().raft_state().clone(); let ever_persisted = self.ever_persisted(); - // TODO: handle snapshot + if !ready.snapshot().is_empty() { + if let Err(e) = self.apply_snapshot( + ready.snapshot(), + write_task, + ctx.snap_mgr.clone(), + ctx.tablet_factory.clone(), + ) { + error!(self.logger(),"failed to apply snapshot";"error" => ?e) + } + } let entry_storage = self.entry_storage_mut(); if !ready.entries().is_empty() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index ad836ed2455..32e8a3f8ff8 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -28,19 +28,22 @@ use std::{ }, }; -use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_serverpb::{RaftSnapshotData, RegionLocalState}; +use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; +use kvproto::raft_serverpb::{PeerState, RaftSnapshotData, RegionLocalState}; use protobuf::Message; use raft::eraftpb::Snapshot; -use raftstore::store::{metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask}; +use raftstore::store::{ + metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask, TabletSnapKey, + TabletSnapManager, Transport, WriteTask, +}; use slog::{error, info, warn}; -use tikv_util::{box_try, worker::Scheduler}; +use tikv_util::{box_err, box_try, worker::Scheduler}; use crate::{ fsm::ApplyResReporter, raft::{Apply, Peer, Storage}, router::{ApplyTask, PeerTick}, - Result, + Result, StoreContext, }; #[derive(Debug)] @@ -115,6 +118,25 @@ impl Peer { self.set_has_ready(); } } + + pub fn on_applied_snapshot(&mut self, ctx: &mut StoreContext) { + let persisted_index = self.raft_group().raft.raft_log.persisted; + let first_index = self.storage().entry_storage().first_index(); + if first_index == persisted_index + 1 { + let region_id = self.region_id(); + let tablet = ctx + .tablet_factory + .open_tablet(region_id, Some(persisted_index), OpenOptions::default()) + .unwrap(); + self.tablet_mut().set(tablet); + self.schedule_apply_fsm(ctx); + self.storage_mut().on_applied_snapshot(); + self.raft_group_mut().advance_apply_to(persisted_index); + self.read_progress_mut() + .update_applied_core(persisted_index); + info!(self.logger, "apply tablet snapshot completely"); + } + } } impl Apply { @@ -313,4 +335,69 @@ impl Storage { *snap_state = SnapState::Generated(snap); true } + + pub fn on_applied_snapshot(&mut self) { + let mut entry = self.entry_storage_mut(); + let term = entry.truncated_term(); + let index = entry.truncated_index(); + entry.set_applied_term(term); + entry.apply_state_mut().set_applied_index(index); + self.region_state_mut().set_tablet_index(index); + } + + pub fn apply_snapshot( + &mut self, + snap: &Snapshot, + task: &mut WriteTask, + snap_mgr: TabletSnapManager, + tablet_factory: Arc>, + ) -> Result<()> { + let region_id = self.region().get_id(); + let peer_id = self.peer().get_id(); + info!( + self.logger(), + "begin to apply snapshot"; + ); + + let mut snap_data = RaftSnapshotData::default(); + snap_data.merge_from_bytes(snap.get_data())?; + let region = snap_data.take_region(); + if region.get_id() != region_id { + return Err(box_err!( + "mismatch region id {}!={}", + region_id, + region.get_id() + )); + } + + let last_index = snap.get_metadata().get_index(); + let last_term = snap.get_metadata().get_term(); + self.region_state_mut().set_state(PeerState::Normal); + self.region_state_mut().set_region(region); + self.entry_storage_mut() + .raft_state_mut() + .set_last_index(last_index); + self.entry_storage_mut().set_truncated_index(last_index); + self.entry_storage_mut().set_truncated_term(last_term); + self.entry_storage_mut().set_last_term(last_term); + + let key = TabletSnapKey::new(region_id, peer_id, last_term, last_index); + let mut path = snap_mgr.final_recv_path(&key); + let logger = self.logger().clone(); + // The snapshot require no additional processing such as ingest them to DB, but + // it should load it into the factory after it persisted. + let hook = move || { + if let Err(e) = tablet_factory.load_tablet(path.as_path(), region_id, last_index) { + panic!( + "{:?} failed to load tablet, path: {}, {:?}", + logger.list(), + path.display(), + e + ); + } + }; + task.persisted_cb = (Some(Box::new(hook))); + task.has_snapshot = true; + Ok(()) + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index d2abb6818d8..b3ad56af4fd 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -15,7 +15,9 @@ use raft::{ eraftpb::{ConfState, Entry, Snapshot}, GetEntriesContext, RaftState, INVALID_ID, }; -use raftstore::store::{util, EntryStorage, ReadTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use raftstore::store::{ + util, EntryStorage, ReadTask, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, +}; use slog::{info, o, Logger}; use tikv_util::{box_err, store::find_peer, worker::Scheduler}; @@ -282,6 +284,11 @@ impl Storage { }) } + #[inline] + pub fn region_state_mut(&mut self) -> &mut RegionLocalState { + &mut self.region_state + } + #[inline] pub fn raft_state(&self) -> &RaftLocalState { self.entry_storage.raft_state() @@ -413,8 +420,8 @@ mod tests { }; use raft::{eraftpb::Snapshot as RaftSnapshot, Error as RaftError, StorageError}; use raftstore::store::{ - AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask, TabletSnapKey, - TabletSnapManager, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + util::new_empty_snapshot, AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask, + TabletSnapKey, TabletSnapManager, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; use slog::o; use tempfile::TempDir; @@ -490,6 +497,57 @@ mod tests { assert_eq!(ts.get_term(), RAFT_INIT_LOG_TERM); } + #[test] + fn test_apply_snapshot() { + let region = new_region(); + let path = TempDir::new().unwrap(); + let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()); + mgr.init().unwrap(); + let raft_engine = + engine_test::raft::new_engine(&format!("{}", path.path().join("raft").display()), None) + .unwrap(); + let mut wb = raft_engine.log_batch(10); + write_initial_states(&mut wb, region.clone()).unwrap(); + assert!(!wb.is_empty()); + raft_engine.consume(&mut wb, true).unwrap(); + // building a tablet factory + let ops = DbOptions::default(); + let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let factory = Arc::new(TestTabletFactoryV2::new( + path.path().join("tablet").as_path(), + ops, + cf_opts, + )); + let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); + let sched = worker.scheduler(); + let logger = slog_global::borrow_global().new(o!()); + let mut s = Storage::new(4, 6, raft_engine.clone(), sched, &logger.clone()) + .unwrap() + .unwrap(); + + let snapshot = new_empty_snapshot(region.clone(), 10, 1, false); + let mut task = WriteTask::new(region.get_id(), 5, 0); + s.apply_snapshot(&snapshot, &mut task, mgr, factory) + .unwrap(); + + // It can be set before load tablet. + assert_eq!(PeerState::Normal, s.region_state().get_state()); + assert_eq!(10, s.entry_storage().truncated_index()); + assert_eq!(1, s.entry_storage().truncated_term()); + assert_eq!(1, s.entry_storage().last_term()); + assert_eq!(10, s.entry_storage().raft_state().last_index); + // This index can't be set before load tablet. + assert_ne!(10, s.entry_storage().applied_index()); + assert_ne!(1, s.entry_storage().applied_term()); + assert_ne!(10, s.region_state().get_tablet_index()); + assert!(task.persisted_cb.is_some()); + + s.on_applied_snapshot(); + assert_eq!(10, s.entry_storage().applied_index()); + assert_eq!(1, s.entry_storage().applied_term()); + assert_eq!(10, s.region_state().get_tablet_index()); + } + #[test] fn test_storage_create_snapshot() { let region = new_region(); @@ -553,7 +611,7 @@ mod tests { assert_eq!(snap.get_metadata().get_term(), 0); assert_eq!(snap.get_data().is_empty(), false); let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); - let checkpointer_path = mgr.get_final_path_for_gen(&snap_key); + let checkpointer_path = mgr.tablet_gen_path(&snap_key); assert!(checkpointer_path.exists()); // Test cancel snapshot diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index ef1f7411ac9..24184233117 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -28,9 +28,10 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use pd_client::RpcClient; +use raft::eraftpb::MessageType; use raftstore::store::{ region_meta::{RegionLocalState, RegionMeta}, - Config, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, + Config, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, }; use raftstore_v2::{ create_store_batch_system, @@ -198,7 +199,7 @@ impl RunningState { concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, logger: &Logger, - ) -> (TestRouter, Self) { + ) -> (TestRouter, TabletSnapManager, Self) { let cf_opts = ALL_CFS .iter() .copied() @@ -240,6 +241,7 @@ impl RunningState { let router = RaftRouter::new(store_id, router); let store_meta = router.store_meta().clone(); let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()); + snap_mgr.init().unwrap(); system .start( store_id, @@ -250,7 +252,7 @@ impl RunningState { pd_client.clone(), router.store_router(), store_meta.clone(), - snap_mgr, + snap_mgr.clone(), concurrency_manager, causal_ts_provider, ) @@ -265,7 +267,7 @@ impl RunningState { transport, store_meta, }; - (TestRouter(router), state) + (TestRouter(router), snap_mgr, state) } } @@ -280,23 +282,24 @@ pub struct TestNode { path: TempDir, running_state: Option, logger: Logger, + snap_mgr: Option, } impl TestNode { fn with_pd(pd_server: &test_pd::Server, logger: Logger) -> TestNode { let pd_client = Arc::new(test_pd::util::new_client(pd_server.bind_addrs(), None)); let path = TempDir::new().unwrap(); - TestNode { pd_client, path, running_state: None, logger, + snap_mgr: None, } } fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { - let (router, state) = RunningState::new( + let (router, snap_mgr, state) = RunningState::new( &self.pd_client, self.path.path(), cfg, @@ -306,6 +309,7 @@ impl TestNode { &self.logger, ); self.running_state = Some(state); + self.snap_mgr = Some(snap_mgr); router } @@ -336,6 +340,10 @@ impl TestNode { self.running_state.as_ref() } + pub fn snap_mgr(&self) -> Option<&TabletSnapManager> { + self.snap_mgr.as_ref() + } + pub fn id(&self) -> u64 { self.running_state().unwrap().store_id } @@ -485,6 +493,33 @@ impl Cluster { continue; } }; + // Simulate already received the snapshot. + if msg.get_message().get_msg_type() == MessageType::MsgSnapshot { + let from_offset = match self + .nodes + .iter() + .position(|n| n.id() == msg.get_from_peer().get_store_id()) + { + Some(offset) => offset, + None => { + debug!(self.logger, "failed to find snapshot source node"; "message" => ?msg); + continue; + } + }; + let key = TabletSnapKey::new( + region_id, + msg.get_to_peer().get_id(), + msg.get_message().get_snapshot().get_metadata().get_term(), + msg.get_message().get_snapshot().get_metadata().get_index(), + ); + let from_snap_mgr = self.node(from_offset).snap_mgr().unwrap(); + let to_snap_mgr = self.node(offset).snap_mgr().unwrap(); + let gen_path = from_snap_mgr.tablet_gen_path(&key); + let recv_path = to_snap_mgr.final_recv_path(&key); + assert!(gen_path.exists()); + std::fs::rename(gen_path, recv_path.clone()).unwrap(); + assert!(recv_path.exists()); + } regions.insert(msg.get_region_id()); if let Err(e) = self.routers[offset].send_raft_message(msg) { debug!(self.logger, "failed to send raft message"; "err" => ?e); diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index f9479786a7b..558962f8ef6 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -1,9 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{self, time::Duration}; -use kvproto::raft_cmdpb::AdminCmdType; +use engine_traits::{OpenOptions, Peekable, TabletFactory}; +use kvproto::raft_cmdpb::{AdminCmdType, CmdType, Request}; use raft::prelude::ConfChangeType; +use raftstore_v2::router::{PeerMsg, PeerTick}; use tikv_util::store::new_learner_peer; use crate::cluster::Cluster; @@ -11,6 +13,7 @@ use crate::cluster::Cluster; #[test] fn test_simple_change() { let cluster = Cluster::with_node_count(2, None); + let region_id = 2; let router0 = cluster.router(0); let mut req = router0.new_request_for(2); let admin_req = req.mut_admin_request(); @@ -29,6 +32,7 @@ fn test_simple_change() { let meta = router0 .must_query_debug_info(2, Duration::from_secs(3)) .unwrap(); + let match_index = meta.raft_apply.applied_index; assert_eq!(meta.region_state.epoch.version, epoch.get_version()); assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); assert_eq!(meta.region_state.peers, vec![leader_peer, new_peer]); @@ -46,6 +50,38 @@ fn test_simple_change() { meta.raft_status.soft_state.leader_id, req.get_header().get_peer().get_id() ); + // Trigger the raft tick to replica the log to the learner and execute the + // snapshot task. + router0 + .send(region_id, PeerMsg::Tick(PeerTick::Raft)) + .unwrap(); + cluster.dispatch(region_id, vec![]); + + // write one kv after snapshot + let (key, val) = (b"key", b"value"); + let mut write_req = router0.new_request_for(region_id); + let mut put_req = Request::default(); + put_req.set_cmd_type(CmdType::Put); + put_req.mut_put().set_key(key.to_vec()); + put_req.mut_put().set_value(val.to_vec()); + write_req.mut_requests().push(put_req); + let (msg, _) = PeerMsg::raft_command(write_req.clone()); + router0.send(region_id, msg).unwrap(); + std::thread::sleep(Duration::from_millis(1000)); + cluster.dispatch(region_id, vec![]); + + let meta = router1 + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + // the learner truncated index muse be equal the leader applied index and can + // read the new written kv. + assert_eq!(match_index, meta.raft_apply.truncated_state.index); + assert!(meta.raft_apply.applied_index >= match_index); + let tablet_factory = cluster.node(1).tablet_factory(); + let tablet = tablet_factory + .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) + .unwrap(); + assert_eq!(tablet.get_value(key).unwrap().unwrap(), val); req.mut_header() .mut_region_epoch() diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 9e0215ca9c1..5dc01b40ef3 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -120,7 +120,7 @@ impl ReadRunner { } fn generate_snap(&self, snap_key: &TabletSnapKey, tablet: EK) -> crate::Result<()> { - let checkpointer_path = self.snap_mgr().get_final_path_for_gen(snap_key); + let checkpointer_path = self.snap_mgr().tablet_gen_path(snap_key); if checkpointer_path.as_path().exists() { // Remove the old checkpoint directly. std::fs::remove_dir_all(checkpointer_path.as_path())?; diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index e534a17fad1..354a796c99c 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -169,12 +169,15 @@ where ready_number: u64, pub send_time: Instant, pub raft_wb: Option, + // called after writing to kvdb and raftdb. + pub persisted_cb: Option>, pub entries: Vec, pub cut_logs: Option<(u64, u64)>, pub raft_state: Option, pub extra_write: ExtraWrite, pub messages: Vec, pub trackers: Vec, + pub has_snapshot: bool, } impl WriteTask @@ -195,6 +198,8 @@ where extra_write: ExtraWrite::None, messages: vec![], trackers: vec![], + persisted_cb: None, + has_snapshot: false, } } @@ -361,6 +366,7 @@ where pub extra_batch_write: ExtraBatchWrite, pub state_size: usize, pub tasks: Vec>, + pub persisted_cbs: Vec>, // region_id -> (peer_id, ready_number) pub readies: HashMap, } @@ -377,6 +383,7 @@ where extra_batch_write: ExtraBatchWrite::None, state_size: 0, tasks: vec![], + persisted_cbs: vec![], readies: HashMap::default(), } } @@ -430,7 +437,9 @@ where ); } } - + if let Some(v) = task.persisted_cb.take() { + self.persisted_cbs.push(v); + }; self.tasks.push(task); } @@ -511,6 +520,12 @@ where } } + fn after_write_all(&mut self) { + for hook in mem::take(&mut self.persisted_cbs) { + hook(); + } + } + fn after_write_to_raft_db(&mut self, metrics: &StoreWriteMetrics) { if metrics.waterfall_metrics { let now = std::time::Instant::now(); @@ -706,10 +721,8 @@ where write_kv_time = duration_to_sec(now.saturating_elapsed()); STORE_WRITE_KVDB_DURATION_HISTOGRAM.observe(write_kv_time); } - self.batch.after_write_to_kv_db(&self.metrics); } - fail_point!("raft_between_save"); let mut write_raft_time = 0f64; @@ -746,6 +759,8 @@ where self.batch.after_write_to_raft_db(&self.metrics); + self.batch.after_write_all(); + fail_point!("raft_before_follower_send"); let mut now = Instant::now(); diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 705e2a776fa..c6278c890f7 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -958,6 +958,16 @@ impl EntryStorage { } } + #[inline] + pub fn set_truncated_index(&mut self, index: u64) { + self.apply_state.mut_truncated_state().set_index(index) + } + + #[inline] + pub fn set_truncated_term(&mut self, term: u64) { + self.apply_state.mut_truncated_state().set_term(term) + } + #[inline] pub fn first_index(&self) -> u64 { first_index(&self.apply_state) diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index a9f50d61edb..19b9622657d 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1956,22 +1956,23 @@ impl TabletSnapManager { Ok(()) } - pub fn get_final_path_for_gen(&self, key: &TabletSnapKey) -> PathBuf { + pub fn tablet_gen_path(&self, key: &TabletSnapKey) -> PathBuf { let prefix = format!("{}_{}", SNAP_GEN_PREFIX, key); PathBuf::from(&self.base).join(prefix) } - pub fn get_final_path_for_recv(&self, key: &TabletSnapKey) -> PathBuf { + pub fn final_recv_path(&self, key: &TabletSnapKey) -> PathBuf { let prefix = format!("{}_{}", SNAP_REV_PREFIX, key); PathBuf::from(&self.base).join(prefix) } - pub fn get_tmp_path_for_recv(&self, key: &TabletSnapKey) -> PathBuf { + + pub fn tmp_recv_path(&self, key: &TabletSnapKey) -> PathBuf { let prefix = format!("{}_{}{}", SNAP_REV_PREFIX, key, TMP_FILE_SUFFIX); PathBuf::from(&self.base).join(prefix) } pub fn delete_snapshot(&self, key: &TabletSnapKey) -> bool { - let path = self.get_final_path_for_gen(key); + let path = self.tablet_gen_path(key); if path.exists() && let Err(e) = fs::remove_dir_all(path.as_path()) { error!( "delete snapshot failed"; diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index cbda159a83e..837ec294fce 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -112,7 +112,7 @@ async fn send_snap_files( key: TabletSnapKey, limiter: Limiter, ) -> Result { - let path = mgr.get_final_path_for_gen(&key); + let path = mgr.tablet_gen_path(&key); info!("begin to send snapshot file";"snap_key" => %key); let files = fs::read_dir(&path)? .map(|f| Ok(f?.path())) @@ -236,7 +236,7 @@ async fn recv_snap_files( .ok_or_else(|| Error::Other("empty gRPC stream".into()))?; let context = RecvTabletSnapContext::new(head)?; let chunk_size = context.chunk_size; - let path = snap_mgr.get_tmp_path_for_recv(&context.key); + let path = snap_mgr.tmp_recv_path(&context.key); info!("begin to receive tablet snapshot files"; "file" => %path.display()); fs::create_dir_all(&path)?; let _with_io_type = WithIoType::new(context.io_type); @@ -274,7 +274,7 @@ async fn recv_snap_files( f.sync_data()?; } info!("received all tablet snapshot file"; "snap_key" => %context.key); - let final_path = snap_mgr.get_final_path_for_recv(&context.key); + let final_path = snap_mgr.final_recv_path(&context.key); fs::rename(&path, final_path)?; Ok(context) } @@ -514,7 +514,7 @@ mod tests { let send_path = TempDir::new().unwrap(); let send_snap_mgr = TabletSnapManager::new(send_path.path().join("snap_dir").to_str().unwrap()); - let snap_path = send_snap_mgr.get_final_path_for_gen(&snap_key); + let snap_path = send_snap_mgr.tablet_gen_path(&snap_key); create_dir_all(snap_path.as_path()).unwrap(); // send file should skip directory create_dir_all(snap_path.join("dir")).unwrap(); @@ -545,7 +545,7 @@ mod tests { .unwrap(); let stream = rx.map(|x: (SnapshotChunk, WriteFlags)| Ok(x.0)); - let final_path = recv_snap_manager.get_final_path_for_recv(&snap_key); + let final_path = recv_snap_manager.final_recv_path(&snap_key); let r = block_on(recv_snap_files(recv_snap_manager, stream, limiter)).unwrap(); assert_eq!(r.key, snap_key); std::thread::sleep(std::time::Duration::from_secs(1)); From 531f7a00f75793ab246f89fb7ee597e097d58494 Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Fri, 25 Nov 2022 17:25:59 +0800 Subject: [PATCH 0362/1149] storage: skip accumulated locks on non-existing keys (#13820) ref tikv/tikv#13694 For non-existing keys, we will record last_change_ts as 0 and set a non-zero versions_to_last_change. So, if we encounter such a write record when reading, we can quickly know this key does not exist. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- components/txn_types/src/lock.rs | 10 +++-- components/txn_types/src/write.rs | 15 ++++--- src/storage/mod.rs | 2 +- src/storage/mvcc/reader/point_getter.rs | 27 +++++++++++- src/storage/mvcc/reader/reader.rs | 42 +++++++++++++++++-- src/storage/mvcc/reader/scanner/forward.rs | 24 +++++++---- .../txn/actions/acquire_pessimistic_lock.rs | 20 ++++++--- src/storage/txn/actions/prewrite.rs | 17 ++++++++ 8 files changed, 128 insertions(+), 29 deletions(-) diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 040487388f9..c8e37823bc4 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -91,7 +91,9 @@ pub struct Lock { /// The commit TS of the latest PUT/DELETE record pub last_change_ts: TimeStamp, /// The number of versions that need skipping from the latest version to - /// find the latest PUT/DELETE record + /// find the latest PUT/DELETE record. + /// If versions_to_last_change > 0 but last_change_ts == 0, the key does not + /// have a PUT/DELETE record. pub versions_to_last_change: u64, /// The source of this txn. It is used by ticdc, if the value is 0 ticdc /// will sync the kv change event to downstream, if it is not 0, ticdc @@ -228,7 +230,7 @@ impl Lock { b.encode_u64(ts.into_inner()).unwrap(); } } - if !self.last_change_ts.is_zero() { + if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { b.push(LAST_CHANGE_PREFIX); b.encode_u64(self.last_change_ts.into_inner()).unwrap(); b.encode_var_u64(self.versions_to_last_change).unwrap(); @@ -266,7 +268,7 @@ impl Lock { if !self.rollback_ts.is_empty() { size += 1 + MAX_VAR_U64_LEN + size_of::() * self.rollback_ts.len(); } - if !self.last_change_ts.is_zero() { + if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { size += 1 + size_of::() + MAX_VAR_U64_LEN; } if self.txn_source != 0 { @@ -773,7 +775,7 @@ mod tests { 16, 8.into(), ) - .set_last_change(4.into(), 2), + .set_last_change(0.into(), 2), Lock::new( LockType::Lock, b"pk".to_vec(), diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 52777e5e4b2..1a20518e423 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -281,11 +281,11 @@ impl Write { match self.write_type { WriteType::Put | WriteType::Delete => (commit_ts, 1), WriteType::Lock | WriteType::Rollback => { - // If `last_change_ts` is zero, do not set `last_change_ts` to indicate we don't - // know where is the last change. + // If neither `last_change_ts` nor `versions_to_last_change` exists, do not + // set `last_change_ts` to indicate we don't know where is the last change. // This should not happen if data is written in new version TiKV. If we hope to // support data from old TiKV, consider iterating to the last change to find it. - if !self.last_change_ts.is_zero() { + if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { (self.last_change_ts, self.versions_to_last_change + 1) } else { (TimeStamp::zero(), 0) @@ -320,7 +320,9 @@ pub struct WriteRef<'a> { /// It only exists if this is a LOCK/ROLLBACK record. pub last_change_ts: TimeStamp, /// The number of versions that need skipping from this record - /// to find the latest PUT/DELETE record + /// to find the latest PUT/DELETE record. + /// If versions_to_last_change > 0 but last_change_ts == 0, the key does not + /// have a PUT/DELETE record before this write record. pub versions_to_last_change: u64, /// The source of this txn. pub txn_source: u64, @@ -411,7 +413,7 @@ impl WriteRef<'_> { b.push(GC_FENCE_PREFIX); b.encode_u64(ts.into_inner()).unwrap(); } - if !self.last_change_ts.is_zero() { + if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { b.push(LAST_CHANGE_PREFIX); b.encode_u64(self.last_change_ts.into_inner()).unwrap(); b.encode_var_u64(self.versions_to_last_change).unwrap(); @@ -432,7 +434,7 @@ impl WriteRef<'_> { if self.gc_fence.is_some() { size += 1 + size_of::(); } - if !self.last_change_ts.is_zero() { + if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { size += 1 + size_of::() + MAX_VAR_U64_LEN; } if self.txn_source != 0 { @@ -547,6 +549,7 @@ mod tests { Write::new(WriteType::Put, 456.into(), Some(b"short_value".to_vec())) .set_overlapped_rollback(true, Some(421397468076048385.into())), Write::new(WriteType::Lock, 456.into(), None).set_last_change(345.into(), 11), + Write::new(WriteType::Lock, 456.into(), None).set_last_change(0.into(), 11), Write::new(WriteType::Lock, 456.into(), None).set_txn_source(1), ]; for (i, write) in writes.drain(..).enumerate() { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 55d8575101c..3e55d81f5d2 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -9767,7 +9767,7 @@ mod tests { for_update_ts: 10.into(), min_commit_ts: 11.into(), last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + versions_to_last_change: 1, }, false ) diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 651762aa88e..2f215986ca9 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -315,8 +315,10 @@ impl PointGetter { return Ok(None); } WriteType::Lock | WriteType::Rollback => { - if write.versions_to_last_change < SEEK_BOUND || write.last_change_ts.is_zero() - { + if write.versions_to_last_change > 0 && write.last_change_ts.is_zero() { + return Ok(None); + } + if write.versions_to_last_change < SEEK_BOUND { // Continue iterate next `write`. } else { let commit_ts = write.last_change_ts; @@ -1266,4 +1268,25 @@ mod tests { must_get_value(&mut batch_getter_ok, key4, val4); must_get_value(&mut batch_getter_ok, key5, val5); } + + #[test] + fn test_point_get_non_exist_skip_lock() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let k = b"k"; + + // Write enough LOCK recrods + for start_ts in (1..30).into_iter().step_by(2) { + must_prewrite_lock(&mut engine, k, k, start_ts); + must_commit(&mut engine, k, start_ts, start_ts + 1); + } + + let mut getter = new_point_getter(&mut engine, 40.into()); + must_get_none(&mut getter, k); + let s = getter.take_statistics(); + // We can know the key doesn't exist without skipping all these locks according + // to last_change_ts and versions_to_last_change. + assert_eq!(s.write.seek, 1); + assert_eq!(s.write.next, 0); + assert_eq!(s.write.get, 0); + } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index c7cb9194068..752a8f0d00a 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -384,9 +384,10 @@ impl MvccReader { return Ok(None); } WriteType::Lock | WriteType::Rollback => { - if write.versions_to_last_change < SEEK_BOUND - || write.last_change_ts.is_zero() - { + if write.versions_to_last_change > 0 && write.last_change_ts.is_zero() { + return Ok(None); + } + if write.versions_to_last_change < SEEK_BOUND { ts = commit_ts.prev(); } else { let commit_ts = write.last_change_ts; @@ -1679,6 +1680,10 @@ pub mod tests { for_update_ts, 0, TimeStamp::zero(), + ) + .set_last_change( + TimeStamp::zero(), + (lock_type == LockType::Lock || lock_type == LockType::Pessimistic) as u64, ), ) }) @@ -2615,4 +2620,35 @@ pub mod tests { assert_eq!(reader.statistics.write.next, 0); assert_eq!(reader.statistics.write.get, 1); } + + #[test] + fn test_get_write_not_exist_skip_lock() { + let path = tempfile::Builder::new() + .prefix("_test_storage_mvcc_reader_get_write_not_exist_skip_lock") + .tempdir() + .unwrap(); + let path = path.path().to_str().unwrap(); + let region = make_region(1, vec![], vec![]); + let db = open_db(path, true); + let mut engine = RegionEngine::new(&db, ®ion); + let k = b"k"; + + // Write enough LOCK recrods + for start_ts in (6..30).into_iter().step_by(2) { + engine.lock(k, start_ts, start_ts + 1); + } + + let snap = RegionSnapshot::::from_raw(db, region); + let mut reader = MvccReader::new(snap, None, false); + + let res = reader + .get_write_with_commit_ts(&Key::from_raw(k), 40.into(), None) + .unwrap(); + // We can know the key doesn't exist without skipping all these locks according + // to last_change_ts and versions_to_last_change. + assert!(res.is_none()); + assert_eq!(reader.statistics.write.seek, 1); + assert_eq!(reader.statistics.write.next, 0); + assert_eq!(reader.statistics.write.get, 0); + } } diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 6672842fab9..8828033c8a1 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -472,8 +472,10 @@ impl ScanPolicy for LatestKvPolicy { } WriteType::Delete => break None, WriteType::Lock | WriteType::Rollback => { - if write.versions_to_last_change < SEEK_BOUND || write.last_change_ts.is_zero() - { + if write.versions_to_last_change > 0 && write.last_change_ts.is_zero() { + break None; + } + if write.versions_to_last_change < SEEK_BOUND { // Continue iterate next `write`. cursors.write.next(&mut statistics.write); } else { @@ -1619,14 +1621,16 @@ mod latest_kv_tests { must_commit(&mut engine, b"k1", 6, 8); must_prewrite_put(&mut engine, b"k2", b"v21", b"k2", 2); must_commit(&mut engine, b"k2", 2, 6); - must_prewrite_put(&mut engine, b"k3", b"v31", b"k3", 3); - must_commit(&mut engine, b"k3", 3, 7); + must_prewrite_put(&mut engine, b"k4", b"v41", b"k4", 3); + must_commit(&mut engine, b"k4", 3, 7); for start_ts in (10..30).into_iter().step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); must_commit(&mut engine, b"k3", start_ts, start_ts + 1); + must_prewrite_lock(&mut engine, b"k4", b"k1", start_ts); + must_commit(&mut engine, b"k4", start_ts, start_ts + 1); } must_prewrite_put(&mut engine, b"k1", b"v13", b"k1", 40); @@ -1652,7 +1656,11 @@ mod latest_kv_tests { // k3 | 27 | LOCK | // k3 | ... | LOCK | // k3 | 11 | LOCK | - // k3 | 7 | PUT | v31 + // k4 | 29 | LOCK | + // k4 | 27 | LOCK | + // k4 | ... | LOCK | + // k4 | 11 | LOCK | + // k4 | 7 | PUT | v41 let snapshot = engine.snapshot(Default::default()).unwrap(); let mut scanner = ScannerBuilder::new(snapshot, 35.into()) @@ -1676,11 +1684,11 @@ mod latest_kv_tests { assert_eq!( scanner.next().unwrap(), - Some((Key::from_raw(b"k3"), b"v31".to_vec())) + Some((Key::from_raw(b"k4"), b"v41".to_vec())) ); let stats = scanner.take_statistics(); - assert_le!(stats.write.next, 2); // skip k2@6, k3@47 - assert_eq!(stats.write.seek, 1); // seek k3@7 + assert_le!(stats.write.next, 1 + SEEK_BOUND as usize); // skip k2@6, near_seek to k4 (8 times next) + assert_eq!(stats.write.seek, 2); // seek k4, k4@7 } } diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index db4c2485d09..8e7c4d95118 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -192,8 +192,7 @@ pub fn acquire_pessimistic_lock( // Following seek_write read the previous write. let (prev_write_loaded, mut prev_write) = (true, None); - let mut last_change_ts = TimeStamp::zero(); - let mut versions_to_last_change = 0; + let (mut last_change_ts, mut versions_to_last_change); if let Some((commit_ts, write)) = reader.seek_write(&key, TimeStamp::max())? { // Find a previous write. if need_old_value { @@ -265,9 +264,7 @@ pub fn acquire_pessimistic_lock( check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; } - if tls_can_enable(LAST_CHANGE_TS) { - (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); - } + (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); // Load value if locked_with_conflict, so that when the client (TiDB) need to // read the value during statement retry, it will be possible to read the value @@ -296,6 +293,13 @@ pub fn acquire_pessimistic_lock( } }; } + } else { + // last_change_ts == 0 && versions_to_last_change > 0 means the key actually + // does not exist. + (last_change_ts, versions_to_last_change) = (TimeStamp::zero(), 1); + } + if !tls_can_enable(LAST_CHANGE_TS) { + (last_change_ts, versions_to_last_change) = (TimeStamp::zero(), 0); } let old_value = load_old_value( @@ -1731,6 +1735,12 @@ pub mod tests { assert_eq!(lock.last_change_ts, 40.into()); assert_eq!(lock.versions_to_last_change, 6); pessimistic_rollback::tests::must_success(&mut engine, key, 140, 140); + + // Lock on a key with no write record + must_succeed(&mut engine, b"k2", b"k2", 150, 150); + let lock = must_pessimistic_locked(&mut engine, b"k2", 150, 150); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 1); } #[test] diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 46c9774dd52..f2de9df0004 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -439,6 +439,12 @@ impl<'a> PrewriteMutation<'a> { return Ok(Some((write, commit_ts))); } + // If seek_ts is max and it goes here, there is no write record for this key. + if seek_ts == TimeStamp::max() { + // last_change_ts == 0 && versions_to_last_change > 0 means the key actually + // does not exist. + (self.last_change_ts, self.versions_to_last_change) = (TimeStamp::zero(), 1); + } Ok(None) } @@ -750,6 +756,10 @@ fn amend_pessimistic_lock( } (mutation.last_change_ts, mutation.versions_to_last_change) = write.next_last_change_info(*commit_ts); + } else { + // last_change_ts == 0 && versions_to_last_change > 0 means the key actually + // does not exist. + (mutation.last_change_ts, mutation.versions_to_last_change) = (TimeStamp::zero(), 1); } // Used pipelined pessimistic lock acquiring in this txn but failed // Luckily no other txn modified this lock, amend it by treat it as optimistic @@ -2241,6 +2251,13 @@ pub mod tests { let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let key = b"k"; + // Latest version does not exist + prewrite_func(&mut engine, LockType::Lock, 2); + let lock = must_locked(&mut engine, key, 2); + assert!(lock.last_change_ts.is_zero()); + assert_eq!(lock.versions_to_last_change, 1); + must_rollback(&mut engine, key, 2, false); + // Latest change ts should not be enabled on TiKV 6.4 let feature_gate = FeatureGate::default(); feature_gate.set_version("6.4.0").unwrap(); From c26a7cd6e1be2ff6fc25f138a788db0b6baf0b54 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 28 Nov 2022 13:46:00 +0800 Subject: [PATCH 0363/1149] storage: introduce hint_min_ts to speed up the flashback progress (#13842) ref tikv/tikv#13800 Introduce `hint_min_ts` during the flashback progress to only flashback those keys that have version changed as much as possible. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/cdc/src/old_value.rs | 4 +- components/tikv_kv/src/cursor.rs | 24 +- components/tikv_kv/src/lib.rs | 1 + src/storage/mvcc/reader/reader.rs | 344 +++--------------- src/storage/mvcc/reader/scanner/mod.rs | 6 +- .../txn/actions/flashback_to_version.rs | 59 +-- .../txn/commands/flashback_to_version.rs | 16 +- .../flashback_to_version_read_phase.rs | 20 +- 8 files changed, 128 insertions(+), 346 deletions(-) diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index 37e2781b766..d91266c92c2 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -1,6 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::ops::Deref; +use std::ops::{Bound, Deref}; use engine_traits::{ReadOptions, CF_DEFAULT, CF_WRITE}; use getset::CopyGetters; @@ -261,7 +261,7 @@ fn new_write_cursor_on_key(snapshot: &S, key: &Key) -> Cursor .range(Some(key.clone()), upper) // Use bloom filter to speed up seeking on a given prefix. .prefix_seek(true) - .hint_max_ts(Some(ts)) + .hint_max_ts(Some(Bound::Included(ts))) .build() .unwrap() } diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 2c9a071fbbb..576aa5cfa76 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -432,10 +432,10 @@ pub struct CursorBuilder<'a, S: Snapshot> { prefix_seek: bool, upper_bound: Option, lower_bound: Option, - // hint for we will only scan data with commit ts >= hint_min_ts - hint_min_ts: Option, - // hint for we will only scan data with commit ts <= hint_max_ts - hint_max_ts: Option, + // hint for we will only scan data with commit_ts >/>= hint_min_ts + hint_min_ts: Option>, + // hint for we will only scan data with commit_ts >, key_only: bool, max_skippable_internal_keys: u64, } @@ -506,8 +506,8 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { /// Default is empty. #[inline] #[must_use] - pub fn hint_min_ts(mut self, min_ts: Option) -> Self { - self.hint_min_ts = min_ts; + pub fn hint_min_ts(mut self, ts_bound: Option>) -> Self { + self.hint_min_ts = ts_bound; self } @@ -516,8 +516,8 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { /// Default is empty. #[inline] #[must_use] - pub fn hint_max_ts(mut self, max_ts: Option) -> Self { - self.hint_max_ts = max_ts; + pub fn hint_max_ts(mut self, ts_bound: Option>) -> Self { + self.hint_max_ts = ts_bound; self } @@ -550,11 +550,11 @@ impl<'a, S: 'a + Snapshot> CursorBuilder<'a, S> { None }; let mut iter_opt = IterOptions::new(l_bound, u_bound, self.fill_cache); - if let Some(ts) = self.hint_min_ts { - iter_opt.set_hint_min_ts(Bound::Included(ts.into_inner())); + if let Some(ts_bound) = self.hint_min_ts { + iter_opt.set_hint_min_ts(ts_bound.map(TimeStamp::into_inner)); } - if let Some(ts) = self.hint_max_ts { - iter_opt.set_hint_max_ts(Bound::Included(ts.into_inner())); + if let Some(ts_bound) = self.hint_max_ts { + iter_opt.set_hint_max_ts(ts_bound.map(TimeStamp::into_inner)); } iter_opt.set_key_only(self.key_only); iter_opt.set_max_skippable_internal_keys(self.max_skippable_internal_keys); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index b5f19832419..ac452fead37 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -5,6 +5,7 @@ //! [`Server`](crate::server::Server). The [`BTreeEngine`](kv::BTreeEngine) and //! [`RocksEngine`](RocksEngine) are used for testing only. +#![feature(bound_map)] #![feature(min_specialization)] #![feature(type_alias_impl_trait)] diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 752a8f0d00a..8e92ffd6be2 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -1,6 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] +use std::ops::Bound; + use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::{ errorpb::{self, EpochNotMatch, StaleCommand}, @@ -127,6 +129,8 @@ pub struct MvccReader { lower_bound: Option, upper_bound: Option, + hint_min_ts: Option>, + /// None means following operations are performed on a single user key, /// i.e., different versions of the same key. It can use prefix seek to /// speed up reads from the write-cf. @@ -154,6 +158,7 @@ impl MvccReader { write_cursor: None, lower_bound: None, upper_bound: None, + hint_min_ts: None, scan_mode, current_key: None, fill_cache, @@ -171,6 +176,7 @@ impl MvccReader { write_cursor: None, lower_bound: None, upper_bound: None, + hint_min_ts: None, scan_mode, current_key: None, fill_cache: !ctx.get_not_fill_cache(), @@ -471,6 +477,8 @@ impl MvccReader { .prefix_seek(self.scan_mode.is_none()) .scan_mode(self.get_scan_mode(true)) .range(self.lower_bound.clone(), self.upper_bound.clone()) + // `hint_min_ts` filters data by the `commit_ts`. + .hint_min_ts(self.hint_min_ts) .build()?; self.write_cursor = Some(cursor); } @@ -559,29 +567,23 @@ impl MvccReader { Ok((locks, has_remain)) } - /// Scan the writes to get all the latest keys with their corresponding - /// PUT/DELETE write records at the given version, if the version is not - /// specified, it will scan the latest version for each key, if the key - /// does not exist or is not visible at that point, an `Option::None` will - /// be placed. The return type is: - /// * `(Vec<(key, Option)>, has_remain)`. - /// - `key` is the encoded key without commit ts. - /// - `write` is the PUT/DELETE write record at the given version. - /// - `has_remain` indicates whether there MAY be remaining writes that + /// Scan the writes to get all the latest user keys. The return type is: + /// * `(Vec, has_remain)`. + /// - `key` is the encoded user key without `commit_ts`. + /// - `has_remain` indicates whether there MAY be remaining user keys that /// can be scanned. /// /// This function is mainly used by /// `txn::commands::FlashbackToVersionReadPhase` /// and `txn::commands::FlashbackToVersion` to achieve the MVCC /// overwriting. - pub fn scan_writes( + pub fn scan_latest_user_keys( &mut self, start: Option<&Key>, end: Option<&Key>, - version: Option, filter: F, limit: usize, - ) -> Result<(Vec<(Key, Option)>, bool)> + ) -> Result<(Vec, bool)> where F: Fn(&Key /* user key */, TimeStamp /* latest `commit_ts` */) -> bool, { @@ -594,10 +596,8 @@ impl MvccReader { if !ok { return Ok((vec![], false)); } - // Use the latest version as the default value if the version is not given. - let version = version.unwrap_or_else(TimeStamp::max); - let mut cur_key = None; - let mut key_writes = Vec::with_capacity(limit); + let mut cur_user_key = None; + let mut keys = Vec::with_capacity(limit); let mut has_remain = false; while cursor.valid()? { let key = Key::from_encoded_slice(cursor.key(&mut self.statistics.write)); @@ -611,62 +611,28 @@ impl MvccReader { let user_key = key.truncate_ts()?; // To make sure we only check each unique user key once and the filter returns // true. - let is_same_user_key = cur_key.as_ref() == Some(&user_key); + let is_same_user_key = cur_user_key.as_ref() == Some(&user_key); if !is_same_user_key { - cur_key = Some(user_key.clone()); + cur_user_key = Some(user_key.clone()); } if is_same_user_key || !filter(&user_key, commit_ts) { cursor.next(&mut self.statistics.write); continue; } - - let mut write = None; - let version_key = user_key.clone().append_ts(version); - // Try to seek to the key with the specified version. - if cursor.near_seek(&version_key, &mut self.statistics.write)? - && Key::is_user_key_eq( - cursor.key(&mut self.statistics.write), - user_key.as_encoded(), - ) - { - while cursor.valid()? { - write = - Some(WriteRef::parse(cursor.value(&mut self.statistics.write))?.to_owned()); - // Move to the next key. - cursor.next(&mut self.statistics.write); - match write.as_ref().unwrap().write_type { - WriteType::Put | WriteType::Delete => { - break; - } - WriteType::Lock | WriteType::Rollback => { - // Only return the PUT/DELETE write record. - write = None; - // Reach the end. - if !cursor.valid()? { - break; - } - // Try to find the latest visible version before it. - let key = - Key::from_encoded_slice(cursor.key(&mut self.statistics.write)); - // Could not find the visible version, current cursor is on the next - // key, so we set `cur_key` to `None`. - if key.truncate_ts()? != user_key { - cur_key = None; - break; - } - } - } - } - } - key_writes.push((user_key, write)); - if limit > 0 && key_writes.len() == limit { + keys.push(user_key.clone()); + if limit > 0 && keys.len() == limit { has_remain = true; break; } + // Seek once to skip all the writes of the same user key. + cursor.near_seek( + &user_key.append_ts(TimeStamp::zero()), + &mut self.statistics.write, + )?; } - self.statistics.write.processed_keys += key_writes.len(); - resource_metering::record_read_keys(key_writes.len() as u32); - Ok((key_writes, has_remain)) + self.statistics.write.processed_keys += keys.len(); + resource_metering::record_read_keys(keys.len() as u32); + Ok((keys, has_remain)) } pub fn scan_keys( @@ -778,6 +744,10 @@ impl MvccReader { self.lower_bound = lower; self.upper_bound = upper; } + + pub fn set_hint_min_ts(&mut self, ts_bound: Option>) { + self.hint_min_ts = ts_bound; + } } #[cfg(test)] @@ -1767,9 +1737,9 @@ pub mod tests { } #[test] - fn test_scan_writes() { + fn test_scan_latest_user_keys() { let path = tempfile::Builder::new() - .prefix("_test_storage_mvcc_reader_scan_writes") + .prefix("_test_storage_mvcc_reader_scan_latest_user_keys") .tempdir() .unwrap(); let path = path.path().to_str().unwrap(); @@ -1869,267 +1839,65 @@ pub mod tests { struct Case { start_key: Option, end_key: Option, - version: Option, limit: usize, - expect_res: Vec<(Key, Option)>, + expect_res: Vec, expect_is_remain: bool, } let cases = vec![ - // Get all latest writes with the unspecified version. - Case { - start_key: None, - end_key: None, - version: None, - limit: 4, - expect_res: vec![ - ( - Key::from_raw(b"k0"), - Some(Write::new( - WriteType::Put, - 999.into(), - Some(b"v0@999".to_vec()), - )), - ), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - ( - Key::from_raw(b"k3"), - Some(Write::new(WriteType::Put, 8.into(), Some(b"v3@8".to_vec()))), - ), - ], - expect_is_remain: true, - }, - // k0 is invisible at version 9. - Case { - start_key: None, - end_key: None, - version: Some(9), - limit: 4, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - ( - Key::from_raw(b"k3"), - Some(Write::new(WriteType::Put, 8.into(), Some(b"v3@8".to_vec()))), - ), - ], - expect_is_remain: true, - }, - // k3 has an old version write at version 8. - Case { - start_key: None, - end_key: None, - version: Some(8), - limit: 4, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - ( - Key::from_raw(b"k3"), - Some(Write::new(WriteType::Put, 5.into(), Some(b"v3@5".to_vec()))), - ), - ], - expect_is_remain: true, - }, - Case { - start_key: None, - end_key: None, - version: Some(7), - limit: 4, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - ( - Key::from_raw(b"k3"), - Some(Write::new(WriteType::Put, 5.into(), Some(b"v3@5".to_vec()))), - ), - ], - expect_is_remain: true, - }, - Case { - start_key: None, - end_key: None, - version: Some(6), - limit: 4, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - ( - Key::from_raw(b"k3"), - Some(Write::new(WriteType::Put, 5.into(), Some(b"v3@5".to_vec()))), - ), - ], - expect_is_remain: true, - }, - // k3 doesn't exist at version 5. - Case { - start_key: None, - end_key: None, - version: Some(5), - limit: 4, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - (Key::from_raw(b"k3"), None), - ], - expect_is_remain: true, - }, + // Test the limit. Case { start_key: None, end_key: None, - version: Some(4), - limit: 4, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v1@3".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 3.into(), Some(b"v2@3".to_vec()))), - ), - (Key::from_raw(b"k3"), None), - ], + limit: 1, + expect_res: vec![Key::from_raw(b"k0")], expect_is_remain: true, }, - // k1 and k2 have old version writes at version 3. Case { start_key: None, end_key: None, - version: Some(3), - limit: 4, + limit: 6, expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec()))), - ), - (Key::from_raw(b"k3"), None), + Key::from_raw(b"k0"), + Key::from_raw(b"k1"), + Key::from_raw(b"k2"), + Key::from_raw(b"k3"), + Key::from_raw(b"k4"), ], - expect_is_remain: true, + expect_is_remain: false, }, + // Test the start/end key. Case { - start_key: None, + start_key: Some(Key::from_raw(b"k2")), end_key: None, - version: Some(2), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), None), - ( - Key::from_raw(b"k1"), - Some(Write::new(WriteType::Put, 1.into(), Some(b"v1@1".to_vec()))), - ), - ( - Key::from_raw(b"k2"), - Some(Write::new(WriteType::Put, 1.into(), Some(b"v2@1".to_vec()))), - ), - (Key::from_raw(b"k3"), None), + Key::from_raw(b"k2"), + Key::from_raw(b"k3"), + Key::from_raw(b"k4"), ], - expect_is_remain: true, + expect_is_remain: false, }, - // All keys don't exist at version 1. Case { start_key: None, - end_key: None, - version: Some(1), + end_key: Some(Key::from_raw(b"k3")), limit: 4, expect_res: vec![ - (Key::from_raw(b"k0"), None), - (Key::from_raw(b"k1"), None), - (Key::from_raw(b"k2"), None), - (Key::from_raw(b"k3"), None), - ], - expect_is_remain: true, - }, - // Test the limit. - Case { - start_key: None, - end_key: None, - version: Some(0), - limit: 1, - expect_res: vec![(Key::from_raw(b"k0"), None)], - expect_is_remain: true, - }, - Case { - start_key: None, - end_key: None, - version: Some(0), - limit: 6, - expect_res: vec![ - (Key::from_raw(b"k0"), None), - (Key::from_raw(b"k1"), None), - (Key::from_raw(b"k2"), None), - (Key::from_raw(b"k3"), None), - (Key::from_raw(b"k4"), None), + Key::from_raw(b"k0"), + Key::from_raw(b"k1"), + Key::from_raw(b"k2"), ], expect_is_remain: false, }, - // Test the invisible record. - Case { - start_key: Some(Key::from_raw(b"k4")), - end_key: None, - version: Some(10), - limit: 1, - expect_res: vec![(Key::from_raw(b"k4"), None)], - expect_is_remain: true, - }, ]; for (idx, case) in cases.iter().enumerate() { let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, Some(ScanMode::Forward), false); let res = reader - .scan_writes( + .scan_latest_user_keys( case.start_key.as_ref(), case.end_key.as_ref(), - case.version.map(Into::into), |_, _| true, case.limit, ) diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index 664a4fed99e..5b87cca7f7a 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -4,6 +4,8 @@ mod backward; mod forward; +use std::ops::Bound; + use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::kvrpcpb::{ExtraOp, IsolationLevel}; use txn_types::{ @@ -330,8 +332,8 @@ impl ScannerConfig { .range(lower, upper) .fill_cache(self.fill_cache) .scan_mode(scan_mode) - .hint_min_ts(hint_min_ts) - .hint_max_ts(hint_max_ts) + .hint_min_ts(hint_min_ts.map(|ts| Bound::Included(ts))) + .hint_max_ts(hint_max_ts.map(|ts| Bound::Included(ts))) .build()?; Ok(cursor) } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 98e2e433632..71f50715a20 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -1,5 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::ops::Bound; + use txn_types::{Key, Lock, TimeStamp, Write, WriteType}; use crate::storage::{ @@ -34,14 +36,17 @@ pub fn flashback_to_version_read_write( flashback_version: TimeStamp, flashback_commit_ts: TimeStamp, statistics: &mut Statistics, -) -> TxnResult)>> { - // To flashback the data, we need to get all the latest keys first by scanning - // every unique key in `CF_WRITE` and to get its corresponding old MVCC write - // record if exists. - let result = reader.scan_writes( +) -> TxnResult> { + // Filter out the SST that does not have a newer version than + // `flashback_version` in `CF_WRITE`, i.e, whose latest `commit_ts` <= + // `flashback_version`. By doing this, we can only flashback those keys that + // have version changed since `flashback_version` as much as possible. + reader.set_hint_min_ts(Some(Bound::Excluded(flashback_version))); + // To flashback the data, we need to get all the latest visible keys first by + // scanning every unique key in `CF_WRITE`. + let keys_result = reader.scan_latest_user_keys( Some(&next_write_key), Some(end_key), - Some(flashback_version), |_, latest_commit_ts| { // There is no any other write could happen after the flashback begins. assert!(latest_commit_ts <= flashback_commit_ts); @@ -54,8 +59,8 @@ pub fn flashback_to_version_read_write( FLASHBACK_BATCH_SIZE, ); statistics.add(&reader.statistics); - let (key_old_writes, _) = result?; - Ok(key_old_writes) + let (keys, _) = keys_result?; + Ok(keys) } // To flashback the `CF_LOCK`, we need to delete all locks records whose @@ -87,20 +92,22 @@ pub fn flashback_to_version_lock( } // To flashback the `CF_WRITE` and `CF_DEFAULT`, we need to write a new MVCC -// record for each key in `self.keys` with its old value at `self.version`, +// record for each key in keys with its old value at `flashback_version`, // specifically, the flashback will have the following behavior: -// - If a key doesn't exist at `self.version`, it will be put a -// `WriteType::Delete`. -// - If a key exists at `self.version`, it will be put the exact same record -// in `CF_WRITE` and `CF_DEFAULT` with `self.commit_ts` and `self.start_ts`. +// - If a key doesn't exist or isn't invisible at `flashback_version`, it will +// be put a `WriteType::Delete`. +// - If a key exists and is visible at `flashback_version`, it will be put the +// exact same record in `CF_WRITE` and `CF_DEFAULT` with `self.commit_ts` +// and `self.start_ts`. pub fn flashback_to_version_write( txn: &mut MvccTxn, reader: &mut SnapshotReader, - key_old_writes: Vec<(Key, Option)>, - start_ts: TimeStamp, - commit_ts: TimeStamp, + keys: Vec, + flashback_version: TimeStamp, + flashback_start_ts: TimeStamp, + flashback_commit_ts: TimeStamp, ) -> TxnResult> { - for (key, old_write) in key_old_writes { + for key in keys { #[cfg(feature = "failpoints")] { let should_skip = || { @@ -114,27 +121,32 @@ pub fn flashback_to_version_write( if txn.write_size() >= MAX_TXN_WRITE_SIZE { return Ok(Some(key.clone())); } + let old_write = reader.get_write(&key, flashback_version)?; let new_write = if let Some(old_write) = old_write { // If it's not a short value and it's a `WriteType::Put`, we should put the old // value in `CF_DEFAULT` with `self.start_ts` as well. if old_write.short_value.is_none() && old_write.write_type == WriteType::Put { txn.put_value( key.clone(), - start_ts, + flashback_start_ts, reader.load_data(&key, old_write.clone())?, ); } Write::new( old_write.write_type, - start_ts, + flashback_start_ts, old_write.short_value.clone(), ) } else { // If the old write doesn't exist, we should put a `WriteType::Delete` record to // delete the current key when needed. - Write::new(WriteType::Delete, start_ts, None) + Write::new(WriteType::Delete, flashback_start_ts, None) }; - txn.put_write(key.clone(), commit_ts, new_write.as_ref().to_bytes()); + txn.put_write( + key.clone(), + flashback_commit_ts, + new_write.as_ref().to_bytes(), + ); } Ok(None) } @@ -187,7 +199,7 @@ pub mod tests { let mut rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); // Flashback the writes. - let key_old_writes = flashback_to_version_read_write( + let keys = flashback_to_version_read_write( &mut reader, key, &next_key, @@ -202,7 +214,8 @@ pub mod tests { flashback_to_version_write( &mut txn, &mut snap_reader, - key_old_writes, + keys, + version, start_ts, commit_ts, ) diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index dabb6acfcc5..d53a3a5c3be 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -45,9 +45,7 @@ impl CommandExt for FlashbackToVersion { FlashbackToVersionState::ScanLock { key_locks, .. } => { latch::Lock::new(key_locks.iter().map(|(key, _)| key)) } - FlashbackToVersionState::ScanWrite { key_old_writes, .. } => { - latch::Lock::new(key_old_writes.iter().map(|(key, _)| key)) - } + FlashbackToVersionState::ScanWrite { keys, .. } => latch::Lock::new(keys.iter()), } } @@ -57,10 +55,9 @@ impl CommandExt for FlashbackToVersion { .iter() .map(|(key, _)| key.as_encoded().len()) .sum(), - FlashbackToVersionState::ScanWrite { key_old_writes, .. } => key_old_writes - .iter() - .map(|(key, _)| key.as_encoded().len()) - .sum(), + FlashbackToVersionState::ScanWrite { keys, .. } => { + keys.iter().map(|key| key.as_encoded().len()).sum() + } } } } @@ -86,12 +83,13 @@ impl WriteCommand for FlashbackToVersion { } FlashbackToVersionState::ScanWrite { ref mut next_write_key, - ref mut key_old_writes, + ref mut keys, } => { if let Some(new_next_write_key) = flashback_to_version_write( &mut txn, &mut reader, - mem::take(key_old_writes), + mem::take(keys), + self.version, self.start_ts, self.commit_ts, )? { diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index b1a83a49ff8..b41506c320b 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use txn_types::{Key, Lock, TimeStamp, Write}; +use txn_types::{Key, Lock, TimeStamp}; use crate::storage::{ mvcc::MvccReader, @@ -24,7 +24,7 @@ pub enum FlashbackToVersionState { }, ScanWrite { next_write_key: Key, - key_old_writes: Vec<(Key, Option)>, + keys: Vec, }, } @@ -110,7 +110,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { read_again = true; FlashbackToVersionState::ScanWrite { next_write_key: self.start_key.clone(), - key_old_writes: Vec::new(), + keys: Vec::new(), } } else { assert!(!key_locks.is_empty()); @@ -128,7 +128,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { } } FlashbackToVersionState::ScanWrite { next_write_key, .. } => { - let mut key_old_writes = flashback_to_version_read_write( + let mut keys = flashback_to_version_read_write( &mut reader, next_write_key, &self.end_key, @@ -136,18 +136,18 @@ impl ReadCommand for FlashbackToVersionReadPhase { self.commit_ts, statistics, )?; - if key_old_writes.is_empty() { + if keys.is_empty() { // No more writes to flashback, just return. return Ok(ProcessResult::Res); } - tls_collect_keyread_histogram_vec(tag, key_old_writes.len() as f64); + tls_collect_keyread_histogram_vec(tag, keys.len() as f64); FlashbackToVersionState::ScanWrite { - next_write_key: if key_old_writes.len() > 1 { - key_old_writes.pop().map(|(key, _)| key).unwrap() + next_write_key: if keys.len() > 1 { + keys.pop().unwrap() } else { - key_old_writes.last().map(|(key, _)| key.clone()).unwrap() + keys.last().unwrap().clone() }, - key_old_writes, + keys, } } }; From bfe29655a2a1634c268a7fb3986892be7dc1f305 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Mon, 28 Nov 2022 17:54:00 +0800 Subject: [PATCH 0364/1149] scheduler: Support resuming after meeting lock for AcquirePessimisticLock requests (#13826) ref tikv/tikv#13298 Update scheduler's logic to support resuming a woken-up AcquirePessimisticLock request. It also includes changes to Latches to support deriving latches after a command released some locks (instead of releasing the latches and acquire them again). Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/test_raftstore/src/util.rs | 45 +- src/server/service/kv.rs | 58 +- src/storage/errors.rs | 47 +- src/storage/lock_manager/lock_wait_context.rs | 20 +- .../lock_manager/lock_waiting_queue.rs | 4 +- src/storage/mod.rs | 628 +++++++++++++++++- src/storage/txn/commands/mod.rs | 7 +- src/storage/txn/scheduler.rs | 381 +++++++++-- src/storage/types.rs | 42 ++ tests/integrations/server/kv_service.rs | 226 ++++++- tests/integrations/server/lock_manager.rs | 5 +- 12 files changed, 1358 insertions(+), 107 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 487d2712249..14c12716ee2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2694,7 +2694,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#51120697d051df163ec8aa313ee1916a68b07984" +source = "git+https://github.com/pingcap/kvproto.git#fdbd9fa2b8f402420c9f7bc8fe47b0e41412ad55" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 14661344316..e4b185b9509 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1022,6 +1022,39 @@ pub fn kv_pessimistic_lock( kv_pessimistic_lock_with_ttl(client, ctx, keys, ts, for_update_ts, return_values, 20) } +pub fn kv_pessimistic_lock_resumable( + client: &TikvClient, + ctx: Context, + keys: Vec>, + ts: u64, + for_update_ts: u64, + wait_timeout: Option, + return_values: bool, + check_existence: bool, +) -> PessimisticLockResponse { + let mut req = PessimisticLockRequest::default(); + req.set_context(ctx); + let primary = keys[0].clone(); + let mut mutations = vec![]; + for key in keys { + let mut mutation = Mutation::default(); + mutation.set_op(Op::PessimisticLock); + mutation.set_key(key); + mutations.push(mutation); + } + req.set_mutations(mutations.into()); + req.primary_lock = primary; + req.start_version = ts; + req.for_update_ts = for_update_ts; + req.lock_ttl = 20; + req.is_first_lock = false; + req.wait_timeout = wait_timeout.unwrap_or(-1); + req.set_wake_up_mode(PessimisticLockWakeUpMode::WakeUpModeForceLock); + req.return_values = return_values; + req.check_existence = check_existence; + client.kv_pessimistic_lock(&req).unwrap() +} + pub fn kv_pessimistic_lock_with_ttl( client: &TikvClient, ctx: Context, @@ -1057,12 +1090,18 @@ pub fn must_kv_pessimistic_lock(client: &TikvClient, ctx: Context, key: Vec, assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); } -pub fn must_kv_pessimistic_rollback(client: &TikvClient, ctx: Context, key: Vec, ts: u64) { +pub fn must_kv_pessimistic_rollback( + client: &TikvClient, + ctx: Context, + key: Vec, + ts: u64, + for_update_ts: u64, +) { let mut req = PessimisticRollbackRequest::default(); req.set_context(ctx); req.set_keys(vec![key].into_iter().collect()); req.start_version = ts; - req.for_update_ts = ts; + req.for_update_ts = for_update_ts; let resp = client.kv_pessimistic_rollback(&req).unwrap(); assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); @@ -1306,7 +1345,7 @@ impl PeerClient { } pub fn must_kv_pessimistic_rollback(&self, key: Vec, ts: u64) { - must_kv_pessimistic_rollback(&self.cli, self.ctx.clone(), key, ts) + must_kv_pessimistic_rollback(&self.cli, self.ctx.clone(), key, ts, ts) } } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index fa2235b51e7..7a61a313eca 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -57,7 +57,7 @@ use crate::{ self, errors::{ extract_committed, extract_key_error, extract_key_errors, extract_kv_pairs, - extract_region_error, map_kv_pairs, + extract_region_error, extract_region_error_from_error, map_kv_pairs, }, kv::Engine, lock_manager::LockManager, @@ -1887,12 +1887,12 @@ fn future_raw_coprocessor( } macro_rules! txn_command_future { - ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) $prelude: stmt; ($v: ident, $resp: ident, $tracker: ident) { $else_branch: expr }) => { + ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) {$($prelude: stmt)*}; ($v: ident, $resp: ident, $tracker: ident) { $else_branch: expr }) => { fn $fn_name( storage: &Storage, $req: $req_ty, ) -> impl Future> { - $prelude + $($prelude)* let $tracker = GLOBAL_TRACKERS.insert(Tracker::new(RequestInfo::new( $req.get_context(), RequestType::Unknown, @@ -1939,22 +1939,42 @@ txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp } resp.set_errors(extract_key_errors(v.map(|v| v.locks)).into()); }}); -txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (v, resp, tracker) {{ - match v { - Ok(Ok(res)) => { - let (values, not_founds) = res.into_legacy_values_and_not_founds(); - resp.set_values(values.into()); - resp.set_not_founds(not_founds); - }, - Err(e) | Ok(Err(e)) => { - resp.set_errors(vec![extract_key_error(&e)].into()) - }, - } - GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); - tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); - }); -}}); +txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, + (req) { + let mode = req.get_wake_up_mode() + }; + (v, resp, tracker) {{ + match v { + Ok(Ok(res)) => { + match mode { + PessimisticLockWakeUpMode::WakeUpModeForceLock => { + let (res, error) = res.into_pb(); + resp.set_results(res.into()); + if let Some(e) = error { + if let Some(region_error) = extract_region_error_from_error(&e.0) { + resp.set_region_error(region_error); + } else { + resp.set_errors(vec![extract_key_error(&e.0)].into()); + } + } + } + PessimisticLockWakeUpMode::WakeUpModeNormal => { + let (values, not_founds) = res.into_legacy_values_and_not_founds(); + resp.set_values(values.into()); + resp.set_not_founds(not_founds); + } + } + }, + Err(e) | Ok(Err(e)) => { + resp.set_errors(vec![extract_key_error(&e)].into()) + }, + } + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); + }); + }} +); txn_command_future!(future_pessimistic_rollback, PessimisticRollbackRequest, PessimisticRollbackResponse, (v, resp) { resp.set_errors(extract_key_errors(v).into()) }); diff --git a/src/storage/errors.rs b/src/storage/errors.rs index b5498e807f0..2b41cf23ea2 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -238,45 +238,45 @@ pub fn get_tag_from_header(header: &errorpb::Error) -> &'static str { get_error_kind_from_header(header).get_str() } -pub fn extract_region_error(res: &Result) -> Option { - match *res { +pub fn extract_region_error_from_error(e: &Error) -> Option { + match e { // TODO: use `Error::cause` instead. - Err(Error(box ErrorInner::Kv(KvError(box KvErrorInner::Request(ref e))))) - | Err(Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Engine(KvError( + Error(box ErrorInner::Kv(KvError(box KvErrorInner::Request(ref e)))) + | Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Engine(KvError( box KvErrorInner::Request(ref e), - )))))) - | Err(Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + ))))) + | Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( box MvccErrorInner::Kv(KvError(box KvErrorInner::Request(ref e))), - )))))) => Some(e.to_owned()), - Err(Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::MaxTimestampNotSynced { + ))))) => Some(e.to_owned()), + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::MaxTimestampNotSynced { .. - })))) => { + }))) => { let mut err = errorpb::Error::default(); err.set_max_timestamp_not_synced(Default::default()); Some(err) } - Err(Error(box ErrorInner::SchedTooBusy)) => { + Error(box ErrorInner::SchedTooBusy) => { let mut err = errorpb::Error::default(); let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(SCHEDULER_IS_BUSY.to_owned()); err.set_server_is_busy(server_is_busy_err); Some(err) } - Err(Error(box ErrorInner::GcWorkerTooBusy)) => { + Error(box ErrorInner::GcWorkerTooBusy) => { let mut err = errorpb::Error::default(); let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(GC_WORKER_IS_BUSY.to_owned()); err.set_server_is_busy(server_is_busy_err); Some(err) } - Err(Error(box ErrorInner::Closed)) => { + Error(box ErrorInner::Closed) => { // TiKV is closing, return an RegionError to tell the client that this region is // unavailable temporarily, the client should retry the request in other TiKVs. let mut err = errorpb::Error::default(); err.set_message("TiKV is Closing".to_string()); Some(err) } - Err(Error(box ErrorInner::DeadlineExceeded)) => { + Error(box ErrorInner::DeadlineExceeded) => { let mut err = errorpb::Error::default(); let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(DEADLINE_EXCEEDED.to_owned()); @@ -287,6 +287,13 @@ pub fn extract_region_error(res: &Result) -> Option { } } +pub fn extract_region_error(res: &Result) -> Option { + match res { + Ok(_) => None, + Err(e) => extract_region_error_from_error(e), + } +} + pub fn extract_committed(err: &Error) -> Option { match *err { Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( @@ -463,17 +470,23 @@ pub fn extract_key_errors(res: Result>>) -> Vec); +pub struct SharedError(pub Arc); + +impl SharedError { + pub fn inner(&self) -> &ErrorInner { + &self.0.0 + } +} impl From for SharedError { fn from(e: ErrorInner) -> Self { - Self(Arc::new(e)) + Self(Arc::new(Error::from(e))) } } impl From for SharedError { fn from(e: Error) -> Self { - Self(Arc::from(e.0)) + Self(Arc::new(e)) } } @@ -483,7 +496,7 @@ impl TryFrom for Error { type Error = (); fn try_from(e: SharedError) -> std::result::Result { - Arc::try_unwrap(e.0).map(Into::into).map_err(|_| ()) + Arc::try_unwrap(e.0).map_err(|_| ()) } } diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 1d53bdc38ea..b8aaa7f1927 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -23,7 +23,7 @@ use crate::storage::{ LockManager, LockWaitToken, }, types::PessimisticLockKeyResult, - Error as StorageError, ProcessResult, StorageCallback, + Error as StorageError, PessimisticLockResults, ProcessResult, StorageCallback, }; pub struct LockWaitContextInner { @@ -157,9 +157,19 @@ impl LockWaitContext { return; } - // The following code is only valid after implementing the new lock-waiting - // model. - unreachable!(); + let key_res = match result { + Ok(key_res) => { + assert!(!matches!(key_res, PessimisticLockKeyResult::Waiting)); + key_res + } + Err(e) => PessimisticLockKeyResult::Failed(e), + }; + + let mut res = PessimisticLockResults::with_capacity(1); + res.push(key_res); + let pr = ProcessResult::PessimisticLockRes { res: Ok(res) }; + + ctx_inner.cb.execute(pr); } } @@ -176,7 +186,7 @@ mod tests { lock_manager::{lock_waiting_queue::LockWaitEntry, MockLockManager}, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error as TxnError, ErrorInner as TxnErrorInner}, - types::{PessimisticLockParameters, PessimisticLockResults}, + types::PessimisticLockParameters, ErrorInner as StorageErrorInner, Result as StorageResult, }; diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 4069bab5643..90a2c369cca 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -841,11 +841,11 @@ mod tests { } fn expect_write_conflict( - err: &StorageErrorInner, + err: &StorageError, expect_conflict_start_ts: impl Into, expect_conflict_commit_ts: impl Into, ) { - match err { + match &*err.0 { StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( box MvccErrorInner::WriteConflict { conflict_start_ts, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 3e55d81f5d2..32d033e7497 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3266,6 +3266,17 @@ pub mod test_util { }) } + pub fn expect_value_with_checker_callback( + done: Sender, + id: i32, + check: impl FnOnce(T) + Send + 'static, + ) -> Callback { + Box::new(move |x: Result| { + check(x.unwrap()); + done.send(id).unwrap(); + }) + } + pub fn expect_pessimistic_lock_res_callback( done: Sender, pessimistic_lock_res: PessimisticLockResults, @@ -3330,6 +3341,40 @@ pub mod test_util { type PessimisticLockCommand = TypedCommand>; + impl PessimisticLockCommand { + pub fn allow_lock_with_conflict(mut self, v: bool) -> Self { + if let Command::AcquirePessimisticLock(commands::AcquirePessimisticLock { + allow_lock_with_conflict, + .. + }) = &mut self.cmd + { + *allow_lock_with_conflict = v; + } else { + panic!( + "expects AcquirePessimisticLock command, got: {:?}", + self.cmd + ); + } + self + } + + pub fn lock_wait_timeout(mut self, timeout: Option) -> Self { + if let Command::AcquirePessimisticLock(commands::AcquirePessimisticLock { + wait_timeout, + .. + }) = &mut self.cmd + { + *wait_timeout = timeout; + } else { + panic!( + "expects AcquirePessimisticLock command, got: {:?}", + self.cmd + ); + } + self + } + } + pub fn new_acquire_pessimistic_lock_command( keys: Vec<(Key, bool)>, start_ts: impl Into, @@ -3337,7 +3382,27 @@ pub mod test_util { return_values: bool, check_existence: bool, ) -> PessimisticLockCommand { - let primary = keys[0].0.clone().to_raw().unwrap(); + new_acquire_pessimistic_lock_command_with_pk( + keys, + None, + start_ts, + for_update_ts, + return_values, + check_existence, + ) + } + + pub fn new_acquire_pessimistic_lock_command_with_pk( + keys: Vec<(Key, bool)>, + pk: Option<&[u8]>, + start_ts: impl Into, + for_update_ts: impl Into, + return_values: bool, + check_existence: bool, + ) -> PessimisticLockCommand { + let primary = pk + .map(|k| k.to_vec()) + .unwrap_or_else(|| keys[0].0.clone().to_raw().unwrap()); let for_update_ts: TimeStamp = for_update_ts.into(); commands::AcquirePessimisticLock::new( keys, @@ -8074,6 +8139,567 @@ mod tests { test_pessimistic_lock_impl(true); } + fn must_have_locks( + storage: &Storage, + ts: u64, + start_key: &[u8], + end_key: &[u8], + expected_locks: &[( + // key + &[u8], + Op, + // start_ts + u64, + // for_update_ts + u64, + )], + ) { + let locks = block_on(storage.scan_lock( + Context::default(), + ts.into(), + Some(Key::from_raw(start_key)), + Some(Key::from_raw(end_key)), + 100, + )) + .unwrap(); + assert_eq!( + locks.len(), + expected_locks.len(), + "lock count not match, expected: {:?}; got: {:?}", + expected_locks, + locks + ); + for (lock_info, (expected_key, expected_op, expected_start_ts, expected_for_update_ts)) in + locks.into_iter().zip(expected_locks.iter()) + { + assert_eq!(lock_info.get_key(), *expected_key); + assert_eq!(lock_info.get_lock_type(), *expected_op); + assert_eq!(lock_info.get_lock_version(), *expected_start_ts); + assert_eq!(lock_info.get_lock_for_update_ts(), *expected_for_update_ts); + } + } + + fn test_pessimistic_lock_resumable_impl( + pipelined_pessimistic_lock: bool, + in_memory_lock: bool, + ) { + type Res = PessimisticLockKeyResult; + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .pipelined_pessimistic_lock(pipelined_pessimistic_lock) + .in_memory_pessimistic_lock(in_memory_lock) + .build() + .unwrap(); + let (tx, rx) = channel(); + + let results_empty = + |len| PessimisticLockResults(vec![PessimisticLockKeyResult::Empty; len]); + + for case_num in 0..4 { + let key = |i| vec![b'k', case_num, i]; + // Put key "k1". + storage + .sched_txn_command( + commands::Prewrite::new( + vec![Mutation::make_put(Key::from_raw(&key(1)), b"v1".to_vec())], + key(1), + 10.into(), + 3000, + false, + 1, + TimeStamp::zero(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(&key(1))], + 10.into(), + 20.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // Put key "k2". + storage + .sched_txn_command( + commands::Prewrite::new( + vec![Mutation::make_put(Key::from_raw(&key(2)), b"v2".to_vec())], + key(2), + 30.into(), + 3000, + false, + 1, + TimeStamp::zero(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(&key(2))], + 30.into(), + 40.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // Lock "k3", and we will pessimistic-rollback it. + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(3)), false)], + 20, + 20, + false, + false, + ), + expect_pessimistic_lock_res_callback(tx.clone(), results_empty(1)), + ) + .unwrap(); + rx.recv().unwrap(); + + // Prewrite "k4", and we will commit it + storage + .sched_txn_command( + commands::Prewrite::new( + vec![Mutation::make_put(Key::from_raw(&key(4)), b"v4".to_vec())], + key(4), + 30.into(), + 3000, + false, + 1, + TimeStamp::zero(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // Prewrite "k5", and we will roll it back + storage + .sched_txn_command( + commands::Prewrite::new( + vec![Mutation::make_put(Key::from_raw(&key(5)), b"v5".to_vec())], + key(5), + 30.into(), + 3000, + false, + 1, + TimeStamp::zero(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // Prewrite "k6", and it won't cause conflict after committing. + storage + .sched_txn_command( + commands::Prewrite::new( + vec![Mutation::make_put(Key::from_raw(&key(6)), b"v6".to_vec())], + key(6), + 10.into(), + 3000, + false, + 1, + TimeStamp::zero(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + } + + for &(case_num, return_values, check_existence) in &[ + (0, false, false), + (1, false, true), + (2, true, false), + (3, true, true), + ] { + let key = |i| vec![b'k', case_num, i]; + let expected_results = if return_values { + vec![ + Res::Value(Some(b"v1".to_vec())), + Res::LockedWithConflict { + value: Some(b"v2".to_vec()), + conflict_ts: 40.into(), + }, + Res::Value(None), + Res::LockedWithConflict { + value: Some(b"v4".to_vec()), + conflict_ts: 40.into(), + }, + Res::LockedWithConflict { + value: None, + conflict_ts: 30.into(), + }, + Res::Value(Some(b"v6".to_vec())), + ] + } else if check_existence { + vec![ + Res::Existence(true), + Res::LockedWithConflict { + value: Some(b"v2".to_vec()), + conflict_ts: 40.into(), + }, + Res::Existence(false), + Res::LockedWithConflict { + value: Some(b"v4".to_vec()), + conflict_ts: 40.into(), + }, + Res::LockedWithConflict { + value: None, + conflict_ts: 30.into(), + }, + Res::Existence(true), + ] + } else { + vec![ + Res::Empty, + Res::LockedWithConflict { + value: Some(b"v2".to_vec()), + conflict_ts: 40.into(), + }, + Res::Empty, + Res::LockedWithConflict { + value: Some(b"v4".to_vec()), + conflict_ts: 40.into(), + }, + Res::LockedWithConflict { + value: None, + conflict_ts: 30.into(), + }, + Res::Empty, + ] + }; + + // k1 & k2 + for (i, k) in &[(0, key(1)), (1, key(2))] { + let i = *i; + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(k), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true), + expect_pessimistic_lock_res_callback( + tx.clone(), + PessimisticLockResults(vec![expected_results[i].clone()]), + ), + ) + .unwrap(); + rx.recv().unwrap(); + } + + // k3 + // Report KeyIsLocked if no wait + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(3)), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(None), + expect_value_with_checker_callback( + tx.clone(), + 0, + |res: Result| { + let e = res.unwrap().0[0].unwrap_err(); + match e.inner() { + ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + mvcc::Error(box mvcc::ErrorInner::KeyIsLocked(..)), + ))) => (), + e => panic!("unexpected error chain: {:?}", e), + } + }, + ), + ) + .unwrap(); + rx.recv().unwrap(); + + // Lock wait + let (tx1, rx1) = channel(); + // k3 + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(3)), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback( + tx1.clone(), + PessimisticLockResults(vec![expected_results[2].clone()]), + ), + ) + .unwrap(); + rx1.recv_timeout(Duration::from_millis(100)).unwrap_err(); + + delete_pessimistic_lock(&storage, Key::from_raw(&key(3)), 20, 20); + rx1.recv().unwrap(); + + // k4 + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(4)), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback( + tx1.clone(), + PessimisticLockResults(vec![expected_results[3].clone()]), + ), + ) + .unwrap(); + rx1.recv_timeout(Duration::from_millis(100)).unwrap_err(); + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(&key(4))], + 30.into(), + 40.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + rx1.recv().unwrap(); + + // k5 + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(5)), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback( + tx1.clone(), + PessimisticLockResults(vec![expected_results[4].clone()]), + ), + ) + .unwrap(); + rx1.recv_timeout(Duration::from_millis(100)).unwrap_err(); + storage + .sched_txn_command( + commands::Rollback::new( + vec![Key::from_raw(&key(5))], + 30.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + rx1.recv().unwrap(); + + // k6 + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(6)), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback( + tx1.clone(), + PessimisticLockResults(vec![expected_results[5].clone()]), + ), + ) + .unwrap(); + rx1.recv_timeout(Duration::from_millis(100)).unwrap_err(); + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(&key(6))], + 10.into(), + 20.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + rx1.recv().unwrap(); + + must_have_locks( + &storage, + 50, + &key(0), + &key(10), + &[ + (&key(1), Op::PessimisticLock, 25, 25), + (&key(2), Op::PessimisticLock, 25, 40), + (&key(3), Op::PessimisticLock, 25, 25), + (&key(4), Op::PessimisticLock, 25, 40), + (&key(5), Op::PessimisticLock, 25, 30), + (&key(6), Op::PessimisticLock, 25, 25), + ], + ); + + // Test idempotency + for i in 0..6usize { + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(&key(i as u8 + 1)), false)], + 25, + 25, + return_values, + check_existence, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback( + tx1.clone(), + PessimisticLockResults(vec![expected_results[i].clone()]), + ), + ) + .unwrap(); + rx1.recv().unwrap(); + } + } + + // Check the channel is clear to avoid misusing in the above test code. + tx.send(100).unwrap(); + assert_eq!(rx.recv().unwrap(), 100); + + // Test request queueing. + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(b"k21"), false)], + 10, + 10, + false, + false, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback(tx, results_empty(1)), + ) + .unwrap(); + rx.recv().unwrap(); + + let channels: Vec<_> = (0..4).map(|_| channel()).collect(); + let start_ts = &[20, 50, 30, 40]; + for i in 0..4 { + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command( + vec![(Key::from_raw(b"k21"), false)], + start_ts[i], + start_ts[i], + false, + false, + ) + .allow_lock_with_conflict(true) + .lock_wait_timeout(Some(WaitTimeout::Default)), + expect_pessimistic_lock_res_callback(channels[i].0.clone(), results_empty(1)), + ) + .unwrap(); + channels[i] + .1 + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + } + + delete_pessimistic_lock(&storage, Key::from_raw(b"k21"), 10, 10); + channels[0].1.recv().unwrap(); + channels[2] + .1 + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + + delete_pessimistic_lock(&storage, Key::from_raw(b"k21"), 20, 20); + channels[2].1.recv().unwrap(); + channels[3] + .1 + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + + delete_pessimistic_lock(&storage, Key::from_raw(b"k21"), 30, 30); + channels[3].1.recv().unwrap(); + channels[1] + .1 + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + + delete_pessimistic_lock(&storage, Key::from_raw(b"k21"), 40, 40); + channels[1].1.recv().unwrap(); + } + + #[test] + fn test_pessimistic_lock_resumable() { + for &pipelined_pessimistic_lock in &[false, true] { + for &in_memory_lock in &[false, true] { + test_pessimistic_lock_resumable_impl(pipelined_pessimistic_lock, in_memory_lock); + } + } + } + #[allow(clippy::large_enum_variant)] pub enum Msg { WaitFor { diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 2de3687d18d..c09ca934fa0 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -212,6 +212,11 @@ impl From for TypedCommand false, + PessimisticLockWakeUpMode::WakeUpModeForceLock => true, + }; + AcquirePessimisticLock::new( keys, req.take_primary_lock(), @@ -224,7 +229,7 @@ impl From for TypedCommand = SmallVec<[T; 4]>; + /// Task is a running command. pub(super) struct Task { pub(super) cid: u64, @@ -135,8 +143,9 @@ struct TaskContext { task: Option, lock: Lock, - cb: Option, + cb: Option, pr: Option, + woken_up_resumable_lock_requests: SVec>, // The one who sets `owned` from false to true is allowed to take // `cb` and `pr` safely. owned: AtomicBool, @@ -150,9 +159,11 @@ struct TaskContext { } impl TaskContext { - fn new(task: Task, cb: StorageCallback) -> TaskContext { + fn new(task: Task, cb: SchedulerTaskCallback, prepared_latches: Option) -> TaskContext { let tag = task.cmd.tag(); - let lock = task.cmd.gen_lock(); + let lock = prepared_latches.unwrap_or_else(|| task.cmd.gen_lock()); + // The initial locks should be either all acquired or all not acquired. + assert!(lock.owned_count == 0 || lock.owned_count == lock.required_hashes.len()); // Write command should acquire write lock. if !task.cmd.readonly() && !lock.is_write_lock() { panic!("write lock is expected for command {}", task.cmd); @@ -168,6 +179,7 @@ impl TaskContext { lock, cb: Some(cb), pr: None, + woken_up_resumable_lock_requests: smallvec![], owned: AtomicBool::new(false), write_bytes, tag, @@ -194,6 +206,42 @@ impl TaskContext { } } +pub enum SchedulerTaskCallback { + NormalRequestCallback(StorageCallback), + LockKeyCallbacks(Vec>), +} + +impl SchedulerTaskCallback { + fn execute(self, pr: ProcessResult) { + match self { + Self::NormalRequestCallback(cb) => cb.execute(pr), + Self::LockKeyCallbacks(cbs) => match pr { + ProcessResult::Failed { err } + | ProcessResult::PessimisticLockRes { res: Err(err) } => { + let err = SharedError::from(err); + for cb in cbs { + cb(Err(err.clone())); + } + } + ProcessResult::PessimisticLockRes { res: Ok(v) } => { + assert_eq!(v.0.len(), cbs.len()); + for (res, cb) in v.0.into_iter().zip(cbs) { + cb(Ok(res)) + } + } + _ => unreachable!(), + }, + } + } + + fn unwrap_normal_request_callback(self) -> StorageCallback { + match self { + Self::NormalRequestCallback(cb) => cb, + _ => panic!(""), + } + } +} + struct SchedulerInner { // slot_id -> { cid -> `TaskContext` } in the slot. task_slots: Vec>>>, @@ -260,8 +308,13 @@ impl SchedulerInner { self.task_slots[id_index(cid)].lock() } - fn new_task_context(&self, task: Task, callback: StorageCallback) -> TaskContext { - let tctx = TaskContext::new(task, callback); + fn new_task_context( + &self, + task: Task, + callback: SchedulerTaskCallback, + prepared_latches: Option, + ) -> TaskContext { + let tctx = TaskContext::new(task, callback, prepared_latches); let running_write_bytes = self .running_write_bytes .fetch_add(tctx.write_bytes, Ordering::AcqRel) as i64; @@ -287,13 +340,16 @@ impl SchedulerInner { /// If the task is been processing, it should be owned. /// If it has been finished, then it is not in the slot. /// In both cases, cb should be None. Otherwise, cb should be some. - fn try_own_and_take_cb(&self, cid: u64) -> Option { + fn try_own_and_take_cb(&self, cid: u64) -> Option { self.get_task_slot(cid) .get_mut(&cid) .and_then(|tctx| if tctx.try_own() { tctx.cb.take() } else { None }) } - fn take_task_cb_and_pr(&self, cid: u64) -> (Option, Option) { + fn take_task_cb_and_pr( + &self, + cid: u64, + ) -> (Option, Option) { self.get_task_slot(cid) .get_mut(&cid) .map(|tctx| (tctx.cb.take(), tctx.pr.take())) @@ -304,6 +360,20 @@ impl SchedulerInner { self.get_task_slot(cid).get_mut(&cid).unwrap().pr = Some(pr); } + fn store_lock_changes( + &self, + cid: u64, + woken_up_resumable_lock_requests: SVec>, + ) { + self.get_task_slot(cid) + .get_mut(&cid) + .map(move |tctx| { + assert!(tctx.woken_up_resumable_lock_requests.is_empty()); + tctx.woken_up_resumable_lock_requests = woken_up_resumable_lock_requests; + }) + .unwrap(); + } + fn too_busy(&self, region_id: u64) -> bool { fail_point!("txn_scheduler_busy", |_| true); self.running_write_bytes.load(Ordering::Acquire) >= self.sched_pending_write_threshold @@ -444,21 +514,41 @@ impl Scheduler { }); return; } - self.schedule_command(cmd, callback); + self.schedule_command( + None, + cmd, + SchedulerTaskCallback::NormalRequestCallback(callback), + None, + ); } /// Releases all the latches held by a command. - fn release_lock(&self, lock: &Lock, cid: u64) { - let wakeup_list = self.inner.latches.release(lock, cid, None); + fn release_latches( + &self, + lock: Lock, + cid: u64, + keep_latches_for_next_cmd: Option<(u64, &Lock)>, + ) { + let wakeup_list = self + .inner + .latches + .release(&lock, cid, keep_latches_for_next_cmd); for wcid in wakeup_list { self.try_to_wake_up(wcid); } } - fn schedule_command(&self, cmd: Command, callback: StorageCallback) { - let cid = self.inner.gen_id(); + fn schedule_command( + &self, + specified_cid: Option, + cmd: Command, + callback: SchedulerTaskCallback, + prepared_latches: Option, + ) { + let cid = specified_cid.unwrap_or_else(|| self.inner.gen_id()); let tracker = get_tls_tracker_token(); debug!("received new command"; "cid" => cid, "cmd" => ?cmd, "tracker" => ?tracker); + let tag = cmd.tag(); let priority_tag = get_priority_tag(cmd.priority()); SCHED_STAGE_COUNTER_VEC.get(tag).new.inc(); @@ -469,7 +559,7 @@ impl Scheduler { let mut task_slot = self.inner.get_task_slot(cid); let tctx = task_slot.entry(cid).or_insert_with(|| { self.inner - .new_task_context(Task::new(cid, tracker, cmd), callback) + .new_task_context(Task::new(cid, tracker, cmd), callback, prepared_latches) }); if self.inner.latches.acquire(&mut tctx.lock, cid) { @@ -567,6 +657,28 @@ impl Scheduler { } } + fn schedule_awakened_pessimistic_locks( + &self, + cid: u64, + mut awakened_entries: SVec>, + latches: Lock, + ) { + let key_callbacks: Vec<_> = awakened_entries + .iter_mut() + .map(|i| i.key_cb.take().unwrap().into_inner()) + .collect(); + + let cmd = commands::AcquirePessimisticLockResumed::from_lock_wait_entries(awakened_entries); + + // TODO: Make flow control take effect on this thing. + self.schedule_command( + Some(cid), + cmd.into(), + SchedulerTaskCallback::LockKeyCallbacks(key_callbacks), + Some(latches), + ); + } + // pub for test pub fn get_sched_pool(&self, priority: CommandPri) -> &SchedPool { if priority == CommandPri::High { @@ -662,7 +774,10 @@ impl Scheduler { cb.execute(pr); } - self.release_lock(&tctx.lock, cid); + if !tctx.woken_up_resumable_lock_requests.is_empty() { + self.put_back_lock_wait_entries(tctx.woken_up_resumable_lock_requests); + } + self.release_latches(tctx.lock, cid, None); } /// Event handler for the success of read. @@ -676,12 +791,12 @@ impl Scheduler { let tctx = self.inner.dequeue_task_context(cid); if let ProcessResult::NextCommand { cmd } = pr { SCHED_STAGE_COUNTER_VEC.get(tag).next_cmd.inc(); - self.schedule_command(cmd, tctx.cb.unwrap()); + self.schedule_command(None, cmd, tctx.cb.unwrap(), None); } else { tctx.cb.unwrap().execute(pr); } - self.release_lock(&tctx.lock, cid); + self.release_latches(tctx.lock, cid, None); } /// Event handler for the success of write. @@ -715,19 +830,25 @@ impl Scheduler { drop(lock_guards); let tctx = self.inner.dequeue_task_context(cid); + let mut do_wake_up = !tctx.woken_up_resumable_lock_requests.is_empty(); // If pipelined pessimistic lock or async apply prewrite takes effect, it's not // guaranteed that the proposed or committed callback is surely invoked, which // takes and invokes `tctx.cb(tctx.pr)`. if let Some(cb) = tctx.cb { let pr = match result { Ok(()) => pr.or(tctx.pr).unwrap(), - Err(e) => ProcessResult::Failed { - err: StorageError::from(e), - }, + Err(e) => { + if !Self::is_undetermined_error(&e) { + do_wake_up = false; + } + ProcessResult::Failed { + err: StorageError::from(e), + } + } }; if let ProcessResult::NextCommand { cmd } = pr { SCHED_STAGE_COUNTER_VEC.get(tag).next_cmd.inc(); - self.schedule_command(cmd, cb); + self.schedule_command(None, cmd, cb, None); } else { cb.execute(pr); } @@ -735,7 +856,34 @@ impl Scheduler { assert!(pipelined || async_apply_prewrite); } - self.release_lock(&tctx.lock, cid); + // TODO: Update lock wait relationships after acquiring some locks. + + if do_wake_up { + let woken_up_resumable_lock_requests = tctx.woken_up_resumable_lock_requests; + let next_cid = self.inner.gen_id(); + let mut next_latches = + Self::gen_latches_for_lock_wait_entries(woken_up_resumable_lock_requests.iter()); + + self.release_latches(tctx.lock, cid, Some((next_cid, &next_latches))); + + next_latches.force_assume_acquired(); + self.schedule_awakened_pessimistic_locks( + next_cid, + woken_up_resumable_lock_requests, + next_latches, + ); + } else { + if !tctx.woken_up_resumable_lock_requests.is_empty() { + self.put_back_lock_wait_entries(tctx.woken_up_resumable_lock_requests); + } + self.release_latches(tctx.lock, cid, None); + } + } + + fn gen_latches_for_lock_wait_entries<'a>( + entries: impl IntoIterator>, + ) -> Lock { + Lock::new(entries.into_iter().map(|entry| &entry.key)) } /// Event handler for the request of waiting for lock @@ -793,18 +941,19 @@ impl Scheduler { ); } - fn on_release_locks(&self, released_locks: ReleasedLocks) { + fn on_release_locks(&self, released_locks: ReleasedLocks) -> SVec> { // This function is always called when holding the latch of the involved keys. // So if we found the lock waiting queues are empty, there's no chance // that other threads/commands adds new lock-wait entries to the keys // concurrently. Therefore it's safe to skip waking up when we found the // lock waiting queues are empty. if self.inner.lock_wait_queues.is_empty() { - return; + return smallvec![]; } - let mut legacy_wake_up_list = SmallVec::<[_; 4]>::new(); - let mut delay_wake_up_futures = SmallVec::<[_; 4]>::new(); + let mut legacy_wake_up_list = SVec::new(); + let mut delay_wake_up_futures = SVec::new(); + let mut resumable_wake_up_list = SVec::new(); let wake_up_delay_duration_ms = self .inner .pessimistic_lock_wake_up_delay_duration_ms @@ -822,19 +971,21 @@ impl Scheduler { None => return, }; - // TODO: Currently there are only legacy requests. When resumable requests are - // supported, do not put them to the `legacy_wake_up_list`. - legacy_wake_up_list.push((lock_wait_entry, released_lock)); + if lock_wait_entry.parameters.allow_lock_with_conflict { + resumable_wake_up_list.push(lock_wait_entry); + } else { + legacy_wake_up_list.push((lock_wait_entry, released_lock)); + } if let Some(f) = delay_wake_up_future { delay_wake_up_futures.push(f); } }); - if legacy_wake_up_list.is_empty() && delay_wake_up_futures.is_empty() { - return; + if !legacy_wake_up_list.is_empty() || !delay_wake_up_futures.is_empty() { + self.wake_up_legacy_pessimistic_locks(legacy_wake_up_list, delay_wake_up_futures); } - self.wake_up_legacy_pessimistic_locks(legacy_wake_up_list, delay_wake_up_futures); + resumable_wake_up_list } fn wake_up_legacy_pessimistic_locks( @@ -880,9 +1031,15 @@ impl Scheduler { .unwrap(); } + fn is_undetermined_error(_e: &tikv_kv::Error) -> bool { + // TODO: If there's some cases that `engine.async_write` returns error but it's + // still possible that the data is successfully written, return true. + false + } + fn early_response( cid: u64, - cb: StorageCallback, + cb: SchedulerTaskCallback, pr: ProcessResult, tag: CommandKind, stage: CommandStageKind, @@ -1076,24 +1233,50 @@ impl Scheduler { let mut pr = Some(pr); if !lock_info.is_empty() { - assert_eq!(lock_info.len(), 1); - let lock_info = lock_info.into_iter().next().unwrap(); - - // Only handle lock waiting if `wait_timeout` is set. Otherwise it indicates - // that it's a lock-no-wait request and we need to report error - // immediately. - if lock_info.parameters.wait_timeout.is_some() { - assert_eq!(to_be_write.size(), 0); - pr = Some(ProcessResult::Res); - // allow_lock_with_conflict is not supported yet in this version. - assert!(!lock_info.parameters.allow_lock_with_conflict); - - scheduler.on_wait_for_lock(&ctx, cid, lock_info, tracker); + if tag == CommandKind::acquire_pessimistic_lock { + assert_eq!(lock_info.len(), 1); + let lock_info = lock_info.into_iter().next().unwrap(); + + // Only handle lock waiting if `wait_timeout` is set. Otherwise it indicates + // that it's a lock-no-wait request and we need to report error + // immediately. + if lock_info.parameters.wait_timeout.is_some() { + assert_eq!(to_be_write.size(), 0); + pr = Some(ProcessResult::Res); + + scheduler.on_wait_for_lock(&ctx, cid, lock_info, tracker); + } else { + // For requests with `allow_lock_with_conflict`, key errors are set key-wise. + // TODO: It's better to return this error from + // `commands::AcquirePessimisticLocks::process_write`. + if lock_info.parameters.allow_lock_with_conflict { + pr = Some(ProcessResult::PessimisticLockRes { + res: Err(StorageError::from(Error::from(MvccError::from( + MvccErrorInner::KeyIsLocked(lock_info.lock_info_pb), + )))), + }); + } + } + } else if tag == CommandKind::acquire_pessimistic_lock_resumed { + // Some requests meets lock again after waiting and resuming. + scheduler.on_wait_for_lock_after_resuming(cid, pr.as_mut().unwrap(), lock_info); + } else { + // WriteResult returning lock info is only expected to exist for pessimistic + // lock requests. + unreachable!(); } } - if !released_locks.is_empty() { - scheduler.on_release_locks(released_locks); + let woken_up_resumable_entries = if !released_locks.is_empty() { + scheduler.on_release_locks(released_locks) + } else { + smallvec![] + }; + + if !woken_up_resumable_entries.is_empty() { + scheduler + .inner + .store_lock_changes(cid, woken_up_resumable_entries); } if to_be_write.modifies.is_empty() { @@ -1101,7 +1284,8 @@ impl Scheduler { return; } - if tag == CommandKind::acquire_pessimistic_lock + if (tag == CommandKind::acquire_pessimistic_lock + || tag == CommandKind::acquire_pessimistic_lock_resumed) && pessimistic_lock_mode == PessimisticLockMode::InMemory && self.try_write_in_memory_pessimistic_locks( txn_ext.as_deref(), @@ -1418,11 +1602,11 @@ impl Scheduler { lock_info.key.clone(), self.inner.lock_wait_queues.clone(), lock_wait_token, - cb, + cb.unwrap_normal_request_callback(), lock_info.parameters.allow_lock_with_conflict, ); let first_batch_cb = ctx.get_callback_for_first_write_batch(); - task_ctx.cb = Some(first_batch_cb); + task_ctx.cb = Some(SchedulerTaskCallback::NormalRequestCallback(first_batch_cb)); drop(slot); let lock_wait_entry = Box::new(LockWaitEntry { @@ -1437,6 +1621,95 @@ impl Scheduler { (ctx, lock_wait_entry, lock_info.lock_info_pb) } + + fn make_lock_waiting_after_resuming( + &self, + lock_info: WriteResultLockInfo, + cb: PessimisticLockKeyCallback, + ) -> Box { + Box::new(LockWaitEntry { + key: lock_info.key, + lock_hash: lock_info.lock_digest.hash, + parameters: lock_info.parameters, + should_not_exist: lock_info.should_not_exist, + lock_wait_token: lock_info.lock_wait_token, + legacy_wake_up_index: None, + key_cb: Some(cb.into()), + }) + } + + fn on_wait_for_lock_after_resuming( + &self, + cid: u64, + pr: &mut ProcessResult, + lock_info: Vec, + ) { + if lock_info.is_empty() { + return; + } + + // TODO: Update lock wait relationship. + + let results = match pr { + ProcessResult::PessimisticLockRes { + res: Ok(PessimisticLockResults(res)), + } => res, + _ => unreachable!(), + }; + + let mut slot = self.inner.get_task_slot(cid); + let task_ctx = slot.get_mut(&cid).unwrap(); + let cbs = match task_ctx.cb { + Some(SchedulerTaskCallback::LockKeyCallbacks(ref mut v)) => v, + _ => unreachable!(), + }; + assert_eq!(results.len(), cbs.len()); + + let finished_len = results.len() - lock_info.len(); + + let original_results = std::mem::replace(results, Vec::with_capacity(finished_len)); + let original_cbs = std::mem::replace(cbs, Vec::with_capacity(finished_len)); + let mut lock_wait_entries = SmallVec::<[_; 10]>::with_capacity(lock_info.len()); + let mut lock_info_it = lock_info.into_iter(); + + for (result, cb) in original_results.into_iter().zip(original_cbs) { + if let PessimisticLockKeyResult::Waiting = &result { + let lock_info = lock_info_it.next().unwrap(); + let lock_info_pb = lock_info.lock_info_pb.clone(); + let entry = self.make_lock_waiting_after_resuming(lock_info, cb); + lock_wait_entries.push((entry, lock_info_pb)); + } else { + results.push(result); + cbs.push(cb); + } + } + + assert!(lock_info_it.next().is_none()); + assert_eq!(results.len(), cbs.len()); + + // Release the mutex in the latch slot. + drop(slot); + + // Add to the lock waiting queue. + // TODO: the request may be canceled from lock manager at this time. If so, it + // should not be added to the queue. + for (entry, lock_info_pb) in lock_wait_entries { + self.inner + .lock_wait_queues + .push_lock_wait(entry, lock_info_pb); + } + } + + fn put_back_lock_wait_entries(&self, entries: impl IntoIterator>) { + for entry in entries.into_iter() { + // TODO: Do not pass `default` as the lock info. Here we need another method + // `put_back_lock_wait`, which doesn't require updating lock info and + // additionally checks if the lock wait entry is already canceled. + self.inner + .lock_wait_queues + .push_lock_wait(entry, Default::default()); + } + } } pub async fn get_raw_ext( @@ -1717,7 +1990,7 @@ mod tests { block_on(f).unwrap(), Err(StorageError(box StorageErrorInner::DeadlineExceeded)) )); - scheduler.release_lock(&lock, cid); + scheduler.release_latches(lock, cid, None); // A new request should not be blocked. let mut req = BatchRollbackRequest::default(); @@ -1952,7 +2225,7 @@ mod tests { // When releasing the lock, the queuing tasks should be all waken up without // stack overflow. - scheduler.release_lock(&lock, cid); + scheduler.release_latches(lock, cid, None); // A new request should not be blocked. let mut req = BatchRollbackRequest::default(); diff --git a/src/storage/types.rs b/src/storage/types.rs index 63bab09eb5c..c7da00c9ace 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -299,6 +299,48 @@ impl PessimisticLockResults { self.0.push(key_res); } + pub fn into_pb(self) -> (Vec, Option) { + let mut error = None; + let res = self + .0 + .into_iter() + .map(|res| { + let mut res_pb = kvrpcpb::PessimisticLockKeyResult::default(); + match res { + PessimisticLockKeyResult::Empty => { + res_pb.set_type(kvrpcpb::PessimisticLockKeyResultType::LockResultNormal) + } + PessimisticLockKeyResult::Value(v) => { + res_pb.set_type(kvrpcpb::PessimisticLockKeyResultType::LockResultNormal); + res_pb.set_existence(v.is_some()); + res_pb.set_value(v.unwrap_or_default()); + } + PessimisticLockKeyResult::Existence(e) => { + res_pb.set_type(kvrpcpb::PessimisticLockKeyResultType::LockResultNormal); + res_pb.set_existence(e); + } + PessimisticLockKeyResult::LockedWithConflict { value, conflict_ts } => { + res_pb.set_type( + kvrpcpb::PessimisticLockKeyResultType::LockResultLockedWithConflict, + ); + res_pb.set_existence(value.is_some()); + res_pb.set_value(value.unwrap_or_default()); + res_pb.set_locked_with_conflict_ts(conflict_ts.into_inner()); + } + PessimisticLockKeyResult::Waiting => unreachable!(), + PessimisticLockKeyResult::Failed(e) => { + if error.is_none() { + error = Some(e) + } + res_pb.set_type(kvrpcpb::PessimisticLockKeyResultType::LockResultFailed); + } + } + res_pb + }) + .collect(); + (res, error) + } + pub fn into_legacy_values_and_not_founds(self) -> (Vec, Vec) { if self.0.is_empty() { return (vec![], vec![]); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index f6db3386007..effe9698f30 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1198,7 +1198,229 @@ fn test_pessimistic_lock() { assert_eq!(resp.get_values().to_vec(), vec![v.clone(), vec![]]); assert_eq!(resp.get_not_founds().to_vec(), vec![false, true]); } - must_kv_pessimistic_rollback(&client, ctx.clone(), k.clone(), 40); + must_kv_pessimistic_rollback(&client, ctx.clone(), k.clone(), 40, 40); + } +} + +#[test] +fn test_pessimistic_lock_resumable() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + + // Resumable pessimistic lock request with multi-key is not supported yet. + let resp = kv_pessimistic_lock_resumable( + &client, + ctx.clone(), + vec![b"k1".to_vec(), b"k2".to_vec()], + 1, + 1, + None, + false, + false, + ); + assert_eq!(resp.get_results(), &[]); + assert_ne!(resp.get_errors().len(), 0); + + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + + // Prewrite + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v.clone()); + must_kv_prewrite(&client, ctx.clone(), vec![mutation.clone()], k.clone(), 5); + + // No wait + let start_time = Instant::now(); + let resp = kv_pessimistic_lock_resumable( + &client, + ctx.clone(), + vec![k.clone()], + 8, + 8, + None, + false, + false, + ); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(start_time.elapsed() < Duration::from_millis(200)); + assert_eq!(resp.errors.len(), 1); + assert!(resp.errors[0].has_locked()); + assert_eq!(resp.get_results().len(), 1); + assert_eq!( + resp.get_results()[0].get_type(), + PessimisticLockKeyResultType::LockResultFailed + ); + + // Wait Timeout + let resp = kv_pessimistic_lock_resumable( + &client, + ctx.clone(), + vec![k.clone()], + 8, + 8, + Some(1), + false, + false, + ); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert_eq!(resp.errors.len(), 1); + assert!(resp.errors[0].has_locked()); + assert_eq!(resp.get_results().len(), 1); + assert_eq!( + resp.get_results()[0].get_type(), + PessimisticLockKeyResultType::LockResultFailed + ); + + must_kv_commit(&client, ctx.clone(), vec![k.clone()], 5, 9, 9); + + let mut curr_ts = 10; + + for &(return_values, check_existence) in + &[(false, false), (false, true), (true, false), (true, true)] + { + let prewrite_start_ts = curr_ts; + let commit_ts = curr_ts + 5; + let test_lock_ts = curr_ts + 10; + curr_ts += 20; + + // Prewrite + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation.clone()], + k.clone(), + prewrite_start_ts, + ); + + let (tx, rx) = std::sync::mpsc::channel(); + let handle = { + let client = client.clone(); + let k = k.clone(); + let ctx = ctx.clone(); + thread::spawn(move || { + let res = kv_pessimistic_lock_resumable( + &client, + ctx, + vec![k], + test_lock_ts, + test_lock_ts, + Some(1000), + return_values, + check_existence, + ); + tx.send(()).unwrap(); + res + }) + }; + // Blocked for lock waiting. + rx.recv_timeout(Duration::from_millis(100)).unwrap_err(); + + must_kv_commit( + &client, + ctx.clone(), + vec![k.clone()], + prewrite_start_ts, + commit_ts, + commit_ts, + ); + rx.recv_timeout(Duration::from_millis(1000)).unwrap(); + let resp = handle.join().unwrap(); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert_eq!(resp.errors.len(), 0); + assert_eq!(resp.get_results().len(), 1); + let res = &resp.get_results()[0]; + if return_values { + assert_eq!( + res.get_type(), + PessimisticLockKeyResultType::LockResultNormal + ); + assert_eq!(res.get_value(), b"value"); + assert_eq!(res.get_existence(), true); + assert_eq!(res.get_locked_with_conflict_ts(), 0); + } else if check_existence { + assert_eq!( + res.get_type(), + PessimisticLockKeyResultType::LockResultNormal + ); + assert_eq!(res.get_value(), b""); + assert_eq!(res.get_existence(), true); + assert_eq!(res.get_locked_with_conflict_ts(), 0); + } else { + assert_eq!( + res.get_type(), + PessimisticLockKeyResultType::LockResultNormal + ); + assert_eq!(res.get_value(), b""); + assert_eq!(res.get_existence(), false); + assert_eq!(res.get_locked_with_conflict_ts(), 0); + } + + must_kv_pessimistic_rollback(&client, ctx.clone(), k.clone(), test_lock_ts, test_lock_ts); + } + + for &(return_values, check_existence) in + &[(false, false), (false, true), (true, false), (true, true)] + { + let test_lock_ts = curr_ts; + let prewrite_start_ts = curr_ts + 10; + let commit_ts = curr_ts + 11; + curr_ts += 20; + // Prewrite + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation.clone()], + k.clone(), + prewrite_start_ts, + ); + + let (tx, rx) = std::sync::mpsc::channel(); + let handle = { + let client = client.clone(); + let k = k.clone(); + let ctx = ctx.clone(); + thread::spawn(move || { + let res = kv_pessimistic_lock_resumable( + &client, + ctx, + vec![k], + test_lock_ts, + test_lock_ts, + Some(1000), + return_values, + check_existence, + ); + tx.send(()).unwrap(); + res + }) + }; + // Blocked for lock waiting. + rx.recv_timeout(Duration::from_millis(100)).unwrap_err(); + must_kv_commit( + &client, + ctx.clone(), + vec![k.clone()], + prewrite_start_ts, + commit_ts, + commit_ts, + ); + rx.recv_timeout(Duration::from_millis(1000)).unwrap(); + let resp = handle.join().unwrap(); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert_eq!(resp.errors.len(), 0); + assert_eq!(resp.get_results().len(), 1); + assert_eq!( + resp.get_results()[0].get_type(), + PessimisticLockKeyResultType::LockResultLockedWithConflict + ); + assert_eq!(resp.get_results()[0].get_value(), v); + assert_eq!(resp.get_results()[0].get_existence(), true); + assert_eq!( + resp.get_results()[0].get_locked_with_conflict_ts(), + commit_ts + ); + + must_kv_pessimistic_rollback(&client, ctx.clone(), k.clone(), test_lock_ts, commit_ts); } } @@ -1816,7 +2038,7 @@ fn test_get_lock_wait_info_api() { entries[0].resource_group_tag, b"resource_group_tag2".to_vec() ); - must_kv_pessimistic_rollback(&client, ctx, b"a".to_vec(), 20); + must_kv_pessimistic_rollback(&client, ctx, b"a".to_vec(), 20, 20); handle.join().unwrap(); } diff --git a/tests/integrations/server/lock_manager.rs b/tests/integrations/server/lock_manager.rs index d796d9c1f66..43032dd8cc3 100644 --- a/tests/integrations/server/lock_manager.rs +++ b/tests/integrations/server/lock_manager.rs @@ -42,8 +42,9 @@ fn deadlock(client: &TikvClient, ctx: Context, key1: &[u8], ts: u64) -> bool { handle.join().unwrap(); // Clean up - must_kv_pessimistic_rollback(client, ctx.clone(), key1.clone(), ts); - must_kv_pessimistic_rollback(client, ctx, key2.clone(), ts + 1); + + must_kv_pessimistic_rollback(client, ctx.clone(), key1.clone(), ts, ts); + must_kv_pessimistic_rollback(client, ctx, key2.clone(), ts + 1, ts + 1); assert_eq!(resp.errors.len(), 1); if resp.errors[0].has_deadlock() { From 31ca8b9bc127b820dfabc6ee259514be819a3eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 29 Nov 2022 13:09:59 +0800 Subject: [PATCH 0365/1149] tikv_utils: make retry returns a `Send` future even the result type is `!Sync` (#13753) close tikv/tikv#13811 This PR have modified the `retry_ext` function in the `tikv_utils::stream` package, making the `final_result` not live across await points any more. So even the result type is `!Sync`, `retry` returns a `Send` future now. Signed-off-by: hillium --- components/tikv_util/src/stream.rs | 86 ++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 15 deletions(-) diff --git a/components/tikv_util/src/stream.rs b/components/tikv_util/src/stream.rs index 8f892659f68..fb29d1c91f0 100644 --- a/components/tikv_util/src/stream.rs +++ b/components/tikv_util/src/stream.rs @@ -152,24 +152,28 @@ where })(); let mut retry_wait_dur = Duration::from_secs(1); - - let mut final_result = action().await; - for _ in 1..max_retry_times { - if let Err(e) = &final_result { - if let Some(ref mut f) = ext.on_failure { - f(e); - } - if e.is_retryable() { - let backoff = thread_rng().gen_range(0..1000); - sleep(retry_wait_dur + Duration::from_millis(backoff)).await; - retry_wait_dur = MAX_RETRY_DELAY.min(retry_wait_dur * 2); - final_result = action().await; - continue; + let mut retry_time = 0; + loop { + match action().await { + Ok(r) => return Ok(r), + Err(e) => { + if let Some(ref mut f) = ext.on_failure { + f(&e); + } + if !e.is_retryable() { + return Err(e); + } + retry_time += 1; + if retry_time > max_retry_times { + return Err(e); + } } } - break; + + let backoff = thread_rng().gen_range(0..1000); + sleep(retry_wait_dur + Duration::from_millis(backoff)).await; + retry_wait_dur = MAX_RETRY_DELAY.min(retry_wait_dur * 2); } - final_result } // Return an error if the future does not finish by the timeout @@ -206,3 +210,55 @@ impl RetryError for HttpDispatchError { true } } + +#[cfg(test)] +mod tests { + use std::{cell::RefCell, pin::Pin}; + + use futures::{Future, FutureExt}; + use rusoto_core::HttpDispatchError; + + use super::RetryError; + use crate::stream::retry; + + #[derive(Debug)] + struct TriviallyRetry; + + impl RetryError for TriviallyRetry { + fn is_retryable(&self) -> bool { + true + } + } + + fn assert_send(_t: T) {} + + #[test] + fn test_retry_is_send_even_return_type_not_sync() { + struct BangSync(Option>); + let fut = retry(|| futures::future::ok::<_, HttpDispatchError>(BangSync(None))); + assert_send(fut) + } + + fn gen_action_fail_for( + n_times: usize, + ) -> impl FnMut() -> Pin>>> { + let mut n = 0; + move || { + if n < n_times { + n += 1; + futures::future::err(TriviallyRetry).boxed() + } else { + futures::future::ok(()).boxed() + } + } + } + + #[tokio::test] + async fn test_failure() { + fail::cfg("retry_count", "return(2)").unwrap(); + let r = retry(gen_action_fail_for(3)).await; + assert!(r.is_err(), "{:?}", r); + let r = retry(gen_action_fail_for(1)).await; + assert!(r.is_ok(), "{:?}", r); + } +} From 2f4374ee4e15b2c8054c06e5a50bdaeeea475472 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Tue, 29 Nov 2022 13:51:59 +0800 Subject: [PATCH 0366/1149] filter out invalid k-v events when applying kv-file on PiTR (#13852) close tikv/tikv#13853, ref pingcap/tidb#39398 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- src/import/sst_service.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 61d181b5c2f..a0d2ab5f4ee 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1017,8 +1017,13 @@ where { // use callback to collect kv data. Box::new(move |k: Vec, v: Vec| { - let mut req = Request::default(); + // Need to skip the empty key/value that could break the transaction or cause + // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. + if k.is_empty() || v.is_empty() { + return; + } + let mut req = Request::default(); if is_delete { let mut del = DeleteRequest::default(); del.set_key(k); @@ -1201,6 +1206,7 @@ mod test { write(b"bar", Put, 38, 37), write(b"baz", Put, 34, 31), write(b"bar", Put, 28, 17), + (Vec::default(), Vec::default()), ], expected_reqs: vec![ write_req(b"foo", Put, 40, 39), @@ -1235,6 +1241,7 @@ mod test { ), default(b"beyond", b"Calling your name.", 278), default(b"beyond", b"Calling your name.", 278), + default(b"PingCap", b"", 300), ], expected_reqs: vec![ default_req(b"aria", b"The planet where flowers bloom.", 123), From f8a397657072539501ca636e2f2dd4f3a85693f4 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Tue, 29 Nov 2022 18:32:00 +0800 Subject: [PATCH 0367/1149] cdc: filter out the event cause by cdc write (#13796) ref tikv/tikv#13779 filter out the event cause by cdc write Signed-off-by: xiongjiwei Co-authored-by: Ti Chi Robot --- components/cdc/src/delegate.rs | 80 ++++++++++- components/cdc/src/endpoint.rs | 20 +++ components/cdc/src/initializer.rs | 78 ++++++++++- components/cdc/src/service.rs | 10 +- components/cdc/tests/integrations/test_cdc.rs | 126 ++++++++++++++++++ components/cdc/tests/mod.rs | 30 ++++- 6 files changed, 332 insertions(+), 12 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index de38a7b1fc8..120806588dc 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -129,6 +129,7 @@ pub struct Downstream { sink: Option, state: Arc>, kv_api: ChangeDataRequestKvApi, + filter_loop: bool, } impl Downstream { @@ -142,6 +143,7 @@ impl Downstream { req_id: u64, conn_id: ConnId, kv_api: ChangeDataRequestKvApi, + filter_loop: bool, ) -> Downstream { Downstream { id: DownstreamId::new(), @@ -152,6 +154,7 @@ impl Downstream { sink: None, state: Arc::new(AtomicCell::new(DownstreamState::default())), kv_api, + filter_loop, } } @@ -203,6 +206,10 @@ impl Downstream { self.id } + pub fn get_filter_loop(&self) -> bool { + self.filter_loop + } + pub fn get_state(&self) -> Arc> { self.state.clone() } @@ -471,6 +478,7 @@ impl Delegate { region_id: u64, request_id: u64, entries: Vec>, + filter_loop: bool, ) -> Result> { let entries_len = entries.len(); let mut rows = vec![Vec::with_capacity(entries_len)]; @@ -527,6 +535,10 @@ impl Delegate { row_size = 0; } } + // if the `txn_source` is not 0 and we should filter it out, skip this event. + if row.txn_source != 0 && filter_loop { + continue; + } if current_rows_size + row_size >= CDC_EVENT_MAX_BYTES { rows.push(Vec::with_capacity(entries_len)); current_rows_size = 0; @@ -620,6 +632,48 @@ impl Delegate { if entries.is_empty() { return Ok(()); } + + let downstreams = self.downstreams(); + assert!( + !downstreams.is_empty(), + "region {} miss downstream", + self.region_id + ); + + let mut need_filter = false; + for ds in downstreams { + if ds.filter_loop { + need_filter = true; + break; + } + } + + // collect the change event cause by user write, which is `txn_source` = 0. + // for changefeed which only need the user write, send the `filtered`, or else, + // send them all. + let filtered = if need_filter { + let filtered = entries + .iter() + .filter(|x| x.txn_source == 0) + .cloned() + .collect::>(); + if filtered.is_empty() { + None + } else { + Some(Event { + region_id: self.region_id, + index, + event: Some(Event_oneof_event::Entries(EventEntries { + entries: filtered.into(), + ..Default::default() + })), + ..Default::default() + }) + } + } else { + None + }; + let event_entries = EventEntries { entries: entries.into(), ..Default::default() @@ -630,6 +684,7 @@ impl Delegate { event: Some(Event_oneof_event::Entries(event_entries)), ..Default::default() }; + let send = move |downstream: &Downstream| { // No ready downstream or a downstream that does not match the kv_api type, will // be ignored. There will be one region that contains both Txn & Raw entries. @@ -637,7 +692,15 @@ impl Delegate { if !downstream.state.load().ready_for_change_events() || downstream.kv_api != kv_api { return Ok(()); } - let event = change_data_event.clone(); + if downstream.filter_loop && filtered.is_none() { + return Ok(()); + } + + let event = if downstream.filter_loop { + filtered.clone().unwrap() + } else { + change_data_event.clone() + }; // Do not force send for real time change data events. let force_send = false; downstream.sink_event(event, force_send) @@ -918,6 +981,7 @@ fn decode_write( } }; let commit_ts = if write.write_type == WriteType::Rollback { + assert_eq!(write.txn_source, 0); 0 } else { key.decode_ts().unwrap().into_inner() @@ -926,6 +990,8 @@ fn decode_write( row.commit_ts = commit_ts; row.key = key.truncate_ts().unwrap().into_raw().unwrap(); row.op_type = op_type as _; + // used for filter out the event. see `txn_source` field for more detail. + row.txn_source = write.txn_source; set_event_row_type(row, r_type); if let Some(value) = write.short_value { row.value = value; @@ -952,6 +1018,8 @@ fn decode_lock(key: Vec, lock: Lock, row: &mut EventRow, has_value: &mut boo row.start_ts = lock.ts.into_inner(); row.key = key.into_raw().unwrap(); row.op_type = op_type as _; + // used for filter out the event. see `txn_source` field for more detail. + row.txn_source = lock.txn_source; set_event_row_type(row, EventLogType::Prewrite); if let Some(value) = lock.short_value { row.value = value; @@ -1021,6 +1089,7 @@ mod tests { request_id, ConnId::new(), ChangeDataRequestKvApi::TiDb, + false, ); downstream.set_sink(sink); let mut delegate = Delegate::new(region_id, Default::default()); @@ -1138,7 +1207,14 @@ mod tests { let mut epoch = RegionEpoch::default(); epoch.set_conf_ver(region_version); epoch.set_version(region_version); - Downstream::new(peer, epoch, id, ConnId::new(), ChangeDataRequestKvApi::TiDb) + Downstream::new( + peer, + epoch, + id, + ConnId::new(), + ChangeDataRequestKvApi::TiDb, + false, + ) }; // Create a new delegate. diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 4b6bbad6d35..6d64754d042 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -620,6 +620,7 @@ impl, E: KvEngine> Endpoint { let api_version = self.api_version; let downstream_id = downstream.get_id(); let downstream_state = downstream.get_state(); + let filter_loop = downstream.get_filter_loop(); // Register must follow OpenConn, so the connection must be available. let conn = self.connections.get_mut(&conn_id).unwrap(); @@ -746,6 +747,7 @@ impl, E: KvEngine> Endpoint { build_resolver: is_new_delegate, ts_filter_ratio: self.config.incremental_scan_ts_filter_ratio, kv_api, + filter_loop, }; let raft_router = self.raft_router.clone(); @@ -1423,6 +1425,7 @@ mod tests { 1, conn_id, ChangeDataRequestKvApi::RawKv, + false, ); req.set_kv_api(ChangeDataRequestKvApi::RawKv); suite.run(Task::Register { @@ -1458,6 +1461,7 @@ mod tests { 2, conn_id, ChangeDataRequestKvApi::TxnKv, + false, ); req.set_kv_api(ChangeDataRequestKvApi::TxnKv); suite.run(Task::Register { @@ -1494,6 +1498,7 @@ mod tests { 3, conn_id, ChangeDataRequestKvApi::TxnKv, + false, ); req.set_kv_api(ChangeDataRequestKvApi::TxnKv); suite.run(Task::Register { @@ -1672,6 +1677,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req, @@ -1718,6 +1724,7 @@ mod tests { 1, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); @@ -1740,6 +1747,7 @@ mod tests { 2, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req.clone(), @@ -1776,6 +1784,7 @@ mod tests { 3, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req, @@ -1820,6 +1829,7 @@ mod tests { 1, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); suite.add_local_reader(100); suite.run(Task::Register { @@ -1851,6 +1861,7 @@ mod tests { 1, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req, @@ -1926,6 +1937,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); downstream.get_state().store(DownstreamState::Normal); // Enable batch resolved ts in the test. @@ -1962,6 +1974,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); downstream.get_state().store(DownstreamState::Normal); suite.add_region(2, 100); @@ -2007,6 +2020,7 @@ mod tests { 3, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); downstream.get_state().store(DownstreamState::Normal); suite.add_region(3, 100); @@ -2077,6 +2091,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); let downstream_id = downstream.get_id(); suite.run(Task::Register { @@ -2119,6 +2134,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); let new_downstream_id = downstream.get_id(); suite.run(Task::Register { @@ -2170,6 +2186,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req, @@ -2224,6 +2241,7 @@ mod tests { 0, conn_id, ChangeDataRequestKvApi::TiDb, + false, ); downstream.get_state().store(DownstreamState::Normal); suite.run(Task::Register { @@ -2341,6 +2359,7 @@ mod tests { 0, conn_id_a, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req.clone(), @@ -2364,6 +2383,7 @@ mod tests { 0, conn_id_b, ChangeDataRequestKvApi::TiDb, + false, ); suite.run(Task::Register { request: req.clone(), diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 36c1636a7e8..38c8603900e 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -96,6 +96,8 @@ pub(crate) struct Initializer { pub(crate) ts_filter_ratio: f64, pub(crate) kv_api: ChangeDataRequestKvApi, + + pub(crate) filter_loop: bool, } impl Initializer { @@ -425,8 +427,12 @@ impl Initializer { async fn sink_scan_events(&mut self, entries: Vec>, done: bool) -> Result<()> { let mut barrier = None; - let mut events = - Delegate::convert_to_grpc_events(self.region_id, self.request_id, entries)?; + let mut events = Delegate::convert_to_grpc_events( + self.region_id, + self.request_id, + entries, + self.filter_loop, + )?; if done { let (cb, fut) = tikv_util::future::paired_future_callback(); events.push(CdcEvent::Barrier(Some(cb))); @@ -558,13 +564,17 @@ mod tests { use engine_rocks::RocksEngine; use engine_traits::{MiscExt, CF_WRITE}; use futures::{executor::block_on, StreamExt}; - use kvproto::{cdcpb::Event_oneof_event, errorpb::Error as ErrorHeader}; + use kvproto::{ + cdcpb::{EventLogType, Event_oneof_event}, + errorpb::Error as ErrorHeader, + }; use raftstore::{coprocessor::ObserveHandle, store::RegionSnapshot}; use test_raftstore::MockRaftStoreRouter; use tikv::storage::{ kv::Engine, txn::tests::{ must_acquire_pessimistic_lock, must_commit, must_prewrite_delete, must_prewrite_put, + must_prewrite_put_with_txn_soucre, }, TestEngineBuilder, }; @@ -601,6 +611,7 @@ mod tests { buffer: usize, engine: Option, kv_api: ChangeDataRequestKvApi, + filter_loop: bool, ) -> ( LazyWorker, Runtime, @@ -645,6 +656,7 @@ mod tests { build_resolver: true, ts_filter_ratio: 1.0, // always enable it. kv_api, + filter_loop, }; (receiver_worker, pool, initializer, rx, drain) @@ -686,6 +698,7 @@ mod tests { buffer, engine.kv_engine(), ChangeDataRequestKvApi::TiDb, + false, ); let check_result = || loop { let task = rx.recv().unwrap(); @@ -754,6 +767,53 @@ mod tests { worker.stop(); } + #[test] + fn test_initializer_filter_loop() { + let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); + + let mut total_bytes = 0; + + for i in 10..100 { + let (k, v) = (&[b'k', i], &[b'v', i]); + total_bytes += k.len(); + total_bytes += v.len(); + let ts = TimeStamp::new(i as _); + must_prewrite_put_with_txn_soucre(&mut engine, k, v, k, ts, 1); + } + + let snap = engine.snapshot(Default::default()).unwrap(); + // Buffer must be large enough to unblock async incremental scan. + let buffer = 1000; + let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + total_bytes, + buffer, + engine.kv_engine(), + ChangeDataRequestKvApi::TiDb, + true, + ); + let th = pool.spawn(async move { + initializer + .async_incremental_scan(snap, Region::default()) + .await + .unwrap(); + }); + let mut drain = drain.drain(); + while let Some((event, _)) = block_on(drain.next()) { + let event = match event { + CdcEvent::Event(x) if x.event.is_some() => x.event.unwrap(), + _ => continue, + }; + let entries = match event { + Event_oneof_event::Entries(mut x) => x.take_entries().into_vec(), + _ => continue, + }; + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].get_type(), EventLogType::Initialized); + } + block_on(th).unwrap(); + worker.stop(); + } + // Test `hint_min_ts` works fine with `ExtraOp::ReadOldValue`. // Whether `DeltaScanner` emits correct old values or not is already tested by // another case `test_old_value_with_hint_min_ts`, so here we only care about @@ -782,6 +842,7 @@ mod tests { 1000, engine.kv_engine(), ChangeDataRequestKvApi::TiDb, + false, ); initializer.checkpoint_ts = checkpoint_ts.into(); let mut drain = drain.drain(); @@ -840,8 +901,13 @@ mod tests { fn test_initializer_deregister_downstream() { let total_bytes = 1; let buffer = 1; - let (mut worker, _pool, mut initializer, rx, _drain) = - mock_initializer(total_bytes, buffer, None, ChangeDataRequestKvApi::TiDb); + let (mut worker, _pool, mut initializer, rx, _drain) = mock_initializer( + total_bytes, + buffer, + None, + ChangeDataRequestKvApi::TiDb, + false, + ); // Errors reported by region should deregister region. initializer.build_resolver = false; @@ -891,7 +957,7 @@ mod tests { let total_bytes = 1; let buffer = 1; let (mut worker, pool, mut initializer, _rx, _drain) = - mock_initializer(total_bytes, buffer, None, kv_api); + mock_initializer(total_bytes, buffer, None, kv_api, false); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = MockRaftStoreRouter::new(); diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index e7bec568f67..f9665283c45 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -240,8 +240,14 @@ impl ChangeData for Service { semver::Version::new(0, 0, 0) } }; - let downstream = - Downstream::new(peer.clone(), region_epoch, req_id, conn_id, req_kvapi); + let downstream = Downstream::new( + peer.clone(), + region_epoch, + req_id, + conn_id, + req_kvapi, + request.filter_loop, + ); let ret = scheduler .schedule(Task::Register { request, diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 3be68c5905c..f2f09622a52 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -2359,3 +2359,129 @@ fn test_prewrite_without_value() { let event = receive_event(false); assert_eq!(event.get_events()[0].get_entries().entries[0].commit_ts, 14); } + +#[test] +fn test_filter_loop() { + test_kv_format_impl!(test_filter_loop_impl); +} + +fn test_filter_loop_impl() { + let mut suite = TestSuite::new(1, F::TAG); + let mut req = suite.new_changedata_request(1); + req.set_extra_op(ExtraOp::ReadOldValue); + req.set_filter_loop(true); + let (mut req_tx, event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let mut events = receive_event(false).events.to_vec(); + match events.remove(0).event.unwrap() { + Event_oneof_event::Entries(mut es) => { + let row = &es.take_entries().to_vec()[0]; + assert_eq!(row.get_type(), EventLogType::Initialized); + } + other => panic!("unknown event {:?}", other), + } + + // Insert value, simulate INSERT INTO. + let mut m1 = Mutation::default(); + let k1 = b"xk1".to_vec(); + m1.set_op(Op::Insert); + m1.key = k1.clone(); + m1.value = b"v1".to_vec(); + suite.must_kv_prewrite_with_source(1, vec![m1], k1.clone(), 10.into(), 1); + let mut m2 = Mutation::default(); + let k2 = b"xk2".to_vec(); + m2.set_op(Op::Insert); + m2.key = k2.clone(); + m2.value = b"v2".to_vec(); + suite.must_kv_prewrite_with_source(1, vec![m2], k2.clone(), 12.into(), 0); + let mut events = receive_event(false).events.to_vec(); + match events.remove(0).event.unwrap() { + Event_oneof_event::Entries(mut es) => { + let events = es.take_entries().to_vec(); + assert_eq!(events.len(), 1); + let row = &events[0]; + assert_eq!(row.get_value(), b"v2"); + assert_eq!(row.get_old_value(), b""); + assert_eq!(row.get_type(), EventLogType::Prewrite); + assert_eq!(row.get_start_ts(), 12); + } + other => panic!("unknown event {:?}", other), + } + suite.must_kv_commit_with_source(1, vec![k1], 10.into(), 15.into(), 1); + suite.must_kv_commit_with_source(1, vec![k2], 12.into(), 17.into(), 0); + let mut events = receive_event(false).events.to_vec(); + match events.remove(0).event.unwrap() { + Event_oneof_event::Entries(mut es) => { + let events = es.take_entries().to_vec(); + assert_eq!(events.len(), 1); + let row = &events[0]; + assert_eq!(row.get_type(), EventLogType::Commit); + assert_eq!(row.get_commit_ts(), 17); + } + other => panic!("unknown event {:?}", other), + } + + // Rollback + let mut m3 = Mutation::default(); + let k3 = b"xk3".to_vec(); + m3.set_op(Op::Put); + m3.key = k3.clone(); + m3.value = b"v3".to_vec(); + suite.must_kv_prewrite_with_source(1, vec![m3], k3.clone(), 30.into(), 1); + suite.must_kv_rollback(1, vec![k3], 30.into()); + let mut events = receive_event(false).events.to_vec(); + match events.remove(0).event.unwrap() { + Event_oneof_event::Entries(mut es) => { + let events = es.take_entries().to_vec(); + assert_eq!(events.len(), 1); + let row = &events[0]; + assert_eq!(row.get_type(), EventLogType::Rollback); + assert_eq!(row.get_commit_ts(), 0); + } + other => panic!("unknown event {:?}", other), + } + + // Update value + let k1 = b"xk1".to_vec(); + let mut m4 = Mutation::default(); + m4.set_op(Op::Put); + m4.key = k1.clone(); + m4.value = vec![b'3'; 5120]; + suite.must_kv_prewrite_with_source(1, vec![m4], k1.clone(), 40.into(), 1); + suite.must_kv_commit_with_source(1, vec![k1], 40.into(), 42.into(), 1); + let k2 = b"xk2".to_vec(); + let mut m5 = Mutation::default(); + m5.set_op(Op::Put); + m5.key = k2.clone(); + m5.value = vec![b'4'; 5121]; + suite.must_kv_prewrite(1, vec![m5], k2.clone(), 44.into()); + suite.must_kv_commit(1, vec![k2.clone()], 44.into(), 46.into()); + let mut events = receive_event(false).events.to_vec(); + if events.len() == 1 { + events.extend(receive_event(false).events.into_iter()); + } + match events.remove(0).event.unwrap() { + Event_oneof_event::Entries(mut es) => { + let events = es.take_entries().to_vec(); + assert_eq!(events.len(), 1); + assert_eq!(events[0].get_type(), EventLogType::Prewrite); + assert_eq!(events[0].get_start_ts(), 44); + assert_eq!(events[0].get_key(), k2.as_slice()); + } + other => panic!("unknown event {:?}", other), + } + match events.remove(0).event.unwrap() { + Event_oneof_event::Entries(mut es) => { + let events = es.take_entries().to_vec(); + assert_eq!(events.len(), 1); + assert_eq!(events[0].get_type(), EventLogType::Commit); + assert_eq!(events[0].get_commit_ts(), 46); + assert_eq!(events[0].get_key(), k2.as_slice()); + } + other => panic!("unknown event {:?}", other), + } + + event_feed_wrap.replace(None); + suite.stop(); +} diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 9e6621ffbdf..feb994f8bb1 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -269,9 +269,22 @@ impl TestSuite { muts: Vec, pk: Vec, ts: TimeStamp, + ) { + self.must_kv_prewrite_with_source(region_id, muts, pk, ts, 0); + } + + pub fn must_kv_prewrite_with_source( + &mut self, + region_id: u64, + muts: Vec, + pk: Vec, + ts: TimeStamp, + txn_source: u64, ) { let mut prewrite_req = PrewriteRequest::default(); - prewrite_req.set_context(self.get_context(region_id)); + let mut context = self.get_context(region_id); + context.set_txn_source(txn_source); + prewrite_req.set_context(context); prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; prewrite_req.start_version = ts.into_inner(); @@ -314,9 +327,22 @@ impl TestSuite { keys: Vec>, start_ts: TimeStamp, commit_ts: TimeStamp, + ) { + self.must_kv_commit_with_source(region_id, keys, start_ts, commit_ts, 0); + } + + pub fn must_kv_commit_with_source( + &mut self, + region_id: u64, + keys: Vec>, + start_ts: TimeStamp, + commit_ts: TimeStamp, + txn_source: u64, ) { let mut commit_req = CommitRequest::default(); - commit_req.set_context(self.get_context(region_id)); + let mut context = self.get_context(region_id); + context.set_txn_source(txn_source); + commit_req.set_context(context); commit_req.start_version = start_ts.into_inner(); commit_req.set_keys(keys.into_iter().collect()); commit_req.commit_version = commit_ts.into_inner(); From d23618e25c9ad435de81a4d98c657fdea59a49b6 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Tue, 29 Nov 2022 21:00:00 +0800 Subject: [PATCH 0368/1149] cop: support batch coprocessor processing in tikv (#13850) close tikv/tikv#13849 Support batch coprocessor task processing in tikv. All the tasks would be passed to tikv in a single RPC request, they would be executed concurrently and all the results would be sent back in one response. More tests and investigations are needed together with the coprocessor client changes. Signed-off-by: cfzjywxk --- Cargo.lock | 6 +- src/coprocessor/endpoint.rs | 131 ++++++++- tests/integrations/coprocessor/test_select.rs | 252 +++++++++++++++++- 3 files changed, 372 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14c12716ee2..7a036117bfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2251,9 +2251,9 @@ dependencies = [ [[package]] name = "grpcio" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9bcdd3694fa08158334501af37bdf5b4f00b1865b602d917e3cd74ecf80cd0a" +checksum = "1f2506de56197d01821c2d1d21082d2dcfd6c82d7a1d6e04d33f37aab6130632" dependencies = [ "futures-executor", "futures-util", @@ -2694,7 +2694,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#fdbd9fa2b8f402420c9f7bc8fe47b0e41412ad55" +source = "git+https://github.com/pingcap/kvproto.git#e53d558bc6d7d8b7bb2d283cdf6dda52a2615632" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 5123534db88..51927cd6b56 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -1,6 +1,8 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{borrow::Cow, future::Future, marker::PhantomData, sync::Arc, time::Duration}; +use std::{ + borrow::Cow, future::Future, iter::FromIterator, marker::PhantomData, sync::Arc, time::Duration, +}; use ::tracker::{ set_tls_tracker_token, with_tls_tracker, RequestInfo, RequestType, GLOBAL_TRACKERS, @@ -485,7 +487,7 @@ impl Endpoint { #[inline] pub fn parse_and_handle_unary_request( &self, - req: coppb::Request, + mut req: coppb::Request, peer: Option, ) -> impl Future> { let tracker = GLOBAL_TRACKERS.insert(::tracker::Tracker::new(RequestInfo::new( @@ -493,23 +495,27 @@ impl Endpoint { RequestType::Unknown, req.start_ts, ))); + let result_of_batch = self.process_batch_tasks(&mut req, &peer); set_tls_tracker_token(tracker); let result_of_future = self .parse_request_and_check_memory_locks(req, peer, false) .map(|(handler_builder, req_ctx)| self.handle_unary_request(req_ctx, handler_builder)); - async move { let res = match result_of_future { - Err(e) => make_error_response(e).into(), + Err(e) => { + let mut res = make_error_response(e); + let batch_res = result_of_batch.await; + res.set_batch_responses(batch_res.into()); + res.into() + } Ok(handle_fut) => { - let mut response = handle_fut - .await - .unwrap_or_else(|e| make_error_response(e).into()); - let scan_detail_v2 = response.mut_exec_details_v2().mut_scan_detail_v2(); + let (handle_res, batch_res) = futures::join!(handle_fut, result_of_batch); + let mut res = handle_res.unwrap_or_else(|e| make_error_response(e).into()); + res.set_batch_responses(batch_res.into()); GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(scan_detail_v2); + tracker.write_scan_detail(res.mut_exec_details_v2().mut_scan_detail_v2()); }); - response + res } }; GLOBAL_TRACKERS.remove(tracker); @@ -517,6 +523,75 @@ impl Endpoint { } } + // process_batch_tasks process the input batched coprocessor tasks if any, + // prepare all the requests and schedule them into the read pool, then + // collect all the responses and convert them into the `StoreBatchResponse` + // type. + pub fn process_batch_tasks( + &self, + req: &mut coppb::Request, + peer: &Option, + ) -> impl Future> { + let mut batch_futs = Vec::with_capacity(req.tasks.len()); + let batch_reqs: Vec<(coppb::Request, u64)> = req + .take_tasks() + .iter_mut() + .map(|task| { + let mut new_req = req.clone(); + new_req.ranges = task.take_ranges(); + let new_context = new_req.mut_context(); + new_context.set_region_id(task.get_region_id()); + new_context.set_region_epoch(task.take_region_epoch()); + new_context.set_peer(task.take_peer()); + (new_req, task.get_task_id()) + }) + .collect(); + for (cur_req, task_id) in batch_reqs.into_iter() { + let request_info = RequestInfo::new( + cur_req.get_context(), + RequestType::Unknown, + cur_req.start_ts, + ); + let mut response = coppb::StoreBatchTaskResponse::new(); + response.set_task_id(task_id); + match self.parse_request_and_check_memory_locks(cur_req, peer.clone(), false) { + Ok((handler_builder, req_ctx)) => { + let cur_tracker = GLOBAL_TRACKERS.insert(::tracker::Tracker::new(request_info)); + set_tls_tracker_token(cur_tracker); + let fut = self.handle_unary_request(req_ctx, handler_builder); + let fut = async move { + let res = fut.await; + match res { + Ok(mut resp) => { + response.set_data(resp.take_data()); + response.set_region_error(resp.take_region_error()); + response.set_locked(resp.take_locked()); + response.set_other_error(resp.take_other_error()); + GLOBAL_TRACKERS.with_tracker(cur_tracker, |tracker| { + tracker.write_scan_detail( + response.mut_exec_details_v2().mut_scan_detail_v2(), + ); + }); + } + Err(e) => { + make_error_batch_response(&mut response, e); + } + } + GLOBAL_TRACKERS.remove(cur_tracker); + response + }; + + batch_futs.push(future::Either::Left(fut)); + } + Err(e) => batch_futs.push(future::Either::Right(async move { + make_error_batch_response(&mut response, e); + response + })), + } + } + stream::FuturesOrdered::from_iter(batch_futs).collect() + } + /// The real implementation of handling a stream request. /// /// It first retrieves a snapshot, then builds the `RequestHandler` over the @@ -654,6 +729,42 @@ impl Endpoint { } } +fn make_error_batch_response(batch_resp: &mut coppb::StoreBatchTaskResponse, e: Error) { + warn!( + "batch cop task error-response"; + "err" => %e + ); + let tag; + match e { + Error::Region(e) => { + tag = storage::get_tag_from_header(&e); + batch_resp.set_region_error(e); + } + Error::Locked(info) => { + tag = "meet_lock"; + batch_resp.set_locked(info); + } + Error::DeadlineExceeded => { + tag = "deadline_exceeded"; + batch_resp.set_other_error(e.to_string()); + } + Error::MaxPendingTasksExceeded => { + tag = "max_pending_tasks_exceeded"; + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason(e.to_string()); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(e.to_string()); + errorpb.set_server_is_busy(server_is_busy_err); + batch_resp.set_region_error(errorpb); + } + Error::Other(_) => { + tag = "other"; + batch_resp.set_other_error(e.to_string()); + } + }; + COPR_REQ_ERROR.with_label_values(&[tag]).inc(); +} + fn make_error_response(e: Error) -> coppb::Response { warn!( "error-response"; diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 96ceb1c5c8c..c802b697872 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2,13 +2,16 @@ use std::{cmp, thread, time::Duration}; +use engine_traits::CF_LOCK; use kvproto::{ - coprocessor::{Request, Response}, - kvrpcpb::{Context, IsolationLevel}, + coprocessor::{Request, Response, StoreBatchTask}, + errorpb, + kvrpcpb::{Context, IsolationLevel, LockInfo}, }; -use protobuf::Message; +use protobuf::{Message, SingularPtrField}; use raftstore::store::Bucket; use test_coprocessor::*; +use test_raftstore::{Cluster, ServerCluster}; use test_storage::*; use tidb_query_datatype::{ codec::{datum, Datum}, @@ -24,7 +27,7 @@ use tipb::{ AnalyzeColumnsReq, AnalyzeReq, AnalyzeType, ChecksumRequest, Chunk, Expr, ExprType, ScalarFuncSig, SelectResponse, }; -use txn_types::TimeStamp; +use txn_types::{Key, Lock, LockType, TimeStamp}; const FLAG_IGNORE_TRUNCATE: u64 = 1; const FLAG_TRUNCATE_AS_WARNING: u64 = 1 << 1; @@ -2006,3 +2009,244 @@ fn test_buckets() { wait_refresh_buckets(0); } + +#[test] +fn test_batch_request() { + let data = vec![ + (1, Some("name:0"), 2), + (2, Some("name:4"), 3), + (4, Some("name:3"), 1), + (5, Some("name:1"), 4), + (9, Some("name:8"), 7), + (10, Some("name:6"), 8), + ]; + + let product = ProductTable::new(); + let (mut cluster, raft_engine, ctx) = new_raft_engine(1, ""); + let (_, endpoint, _) = + init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); + + // Split the region into [1, 2], [4, 5], [9, 10]. + let region = + cluster.get_region(Key::from_raw(&product.get_record_range(1, 1).start).as_encoded()); + let split_key = Key::from_raw(&product.get_record_range(3, 3).start); + cluster.must_split(®ion, split_key.as_encoded()); + let second_region = + cluster.get_region(Key::from_raw(&product.get_record_range(4, 4).start).as_encoded()); + let second_split_key = Key::from_raw(&product.get_record_range(8, 8).start); + cluster.must_split(&second_region, second_split_key.as_encoded()); + + struct HandleRange { + start: i64, + end: i64, + } + + enum QueryResult { + Valid(Vec<(i64, Option<&'static str>, i64)>), + ErrRegion, + ErrLocked, + ErrOther, + } + + // Each case has four fields: + // 1. The input scan handle range. + // 2. The expected output results. + // 3. Should the coprocessor request contain invalid region epoch. + // 4. Should the scanned key be locked. + let cases = vec![ + // Basic valid case. + ( + vec![ + HandleRange { start: 1, end: 2 }, + HandleRange { start: 3, end: 5 }, + ], + vec![ + QueryResult::Valid(vec![(1_i64, Some("name:0"), 2_i64), (2, Some("name:4"), 3)]), + QueryResult::Valid(vec![(4, Some("name:3"), 1), (5, Some("name:1"), 4)]), + ], + false, + false, + ), + // Original task is valid, batch tasks are not all valid. + ( + vec![ + HandleRange { start: 1, end: 2 }, + HandleRange { start: 4, end: 6 }, + HandleRange { start: 9, end: 11 }, + HandleRange { start: 1, end: 3 }, // Input range [1, 4) crosses two region ranges. + HandleRange { start: 4, end: 8 }, // Input range [4, 9] crosses two region ranges. + ], + vec![ + QueryResult::Valid(vec![(1, Some("name:0"), 2), (2, Some("name:4"), 3)]), + QueryResult::Valid(vec![(4, Some("name:3"), 1), (5, Some("name:1"), 4)]), + QueryResult::Valid(vec![(9, Some("name:8"), 7), (10, Some("name:6"), 8)]), + QueryResult::ErrOther, + QueryResult::ErrOther, + ], + false, + false, + ), + // Original task is invalid, batch tasks are not all valid. + ( + vec![HandleRange { start: 1, end: 3 }], + vec![QueryResult::ErrOther], + false, + false, + ), + // Invalid epoch case. + ( + vec![ + HandleRange { start: 1, end: 3 }, + HandleRange { start: 4, end: 6 }, + ], + vec![QueryResult::ErrRegion, QueryResult::ErrRegion], + true, + false, + ), + // Locked error case. + ( + vec![ + HandleRange { start: 1, end: 2 }, + HandleRange { start: 4, end: 6 }, + ], + vec![QueryResult::ErrLocked, QueryResult::ErrLocked], + false, + true, + ), + ]; + let prepare_req = + |cluster: &mut Cluster, ranges: &Vec| -> Request { + let original_range = ranges.get(0).unwrap(); + let key_range = product.get_record_range(original_range.start, original_range.end); + let region_key = Key::from_raw(&key_range.start); + let mut req = DagSelect::from(&product) + .key_ranges(vec![key_range]) + .build_with(ctx.clone(), &[0]); + let mut new_ctx = Context::default(); + let new_region = cluster.get_region(region_key.as_encoded()); + let leader = cluster.leader_of_region(new_region.get_id()).unwrap(); + new_ctx.set_region_id(new_region.get_id()); + new_ctx.set_region_epoch(new_region.get_region_epoch().clone()); + new_ctx.set_peer(leader); + req.set_context(new_ctx); + req.set_start_ts(100); + + let batch_handle_ranges = &ranges.as_slice()[1..]; + for handle_range in batch_handle_ranges.iter() { + let range_start_key = Key::from_raw( + &product + .get_record_range(handle_range.start, handle_range.end) + .start, + ); + let batch_region = cluster.get_region(range_start_key.as_encoded()); + let batch_leader = cluster.leader_of_region(batch_region.get_id()).unwrap(); + let batch_key_ranges = + vec![product.get_record_range(handle_range.start, handle_range.end)]; + let mut store_batch_task = StoreBatchTask::new(); + store_batch_task.set_region_id(batch_region.get_id()); + store_batch_task.set_region_epoch(batch_region.get_region_epoch().clone()); + store_batch_task.set_peer(batch_leader); + store_batch_task.set_ranges(batch_key_ranges.into()); + req.tasks.push(store_batch_task); + } + req + }; + let verify_response = |result: &QueryResult, + data: &[u8], + region_err: &SingularPtrField, + locked: &SingularPtrField, + other_err: &String| { + match result { + QueryResult::Valid(res) => { + let expected_len = res.len(); + let mut sel_resp = SelectResponse::default(); + sel_resp.merge_from_bytes(data).unwrap(); + let mut row_count = 0; + let spliter = DagChunkSpliter::new(sel_resp.take_chunks().into(), 3); + for (row, (id, name, cnt)) in spliter.zip(res) { + let name_datum = name.map(|s| s.as_bytes()).into(); + let expected_encoded = datum::encode_value( + &mut EvalContext::default(), + &[Datum::I64(*id), name_datum, Datum::I64(*cnt)], + ) + .unwrap(); + let result_encoded = + datum::encode_value(&mut EvalContext::default(), &row).unwrap(); + assert_eq!(result_encoded, &*expected_encoded); + row_count += 1; + } + assert_eq!(row_count, expected_len); + } + QueryResult::ErrRegion => { + assert!(region_err.is_some()); + } + QueryResult::ErrLocked => { + assert!(locked.is_some()); + } + QueryResult::ErrOther => { + assert!(!other_err.is_empty()) + } + } + }; + + for (ranges, results, invalid_epoch, key_is_locked) in cases.iter() { + let mut req = prepare_req(&mut cluster, ranges); + if *invalid_epoch { + req.context + .as_mut() + .unwrap() + .region_epoch + .as_mut() + .unwrap() + .version -= 1; + for batch_task in req.tasks.iter_mut() { + batch_task.region_epoch.as_mut().unwrap().version -= 1; + } + } else if *key_is_locked { + for range in ranges.iter() { + let lock_key = + Key::from_raw(&product.get_record_range(range.start, range.start).start); + let lock = Lock::new( + LockType::Put, + lock_key.as_encoded().clone(), + 10.into(), + 10, + None, + TimeStamp::zero(), + 1, + TimeStamp::zero(), + ); + cluster.must_put_cf(CF_LOCK, lock_key.as_encoded(), lock.to_bytes().as_slice()); + } + } + let mut resp = handle_request(&endpoint, req); + let batch_results = resp.take_batch_responses().to_vec(); + for (i, result) in results.iter().enumerate() { + if i == 0 { + verify_response( + result, + resp.get_data(), + &resp.region_error, + &resp.locked, + &resp.other_error, + ); + } else { + let batch_resp = batch_results.get(i - 1).unwrap(); + verify_response( + result, + batch_resp.get_data(), + &batch_resp.region_error, + &batch_resp.locked, + &batch_resp.other_error, + ); + }; + } + if *key_is_locked { + for range in ranges.iter() { + let lock_key = + Key::from_raw(&product.get_record_range(range.start, range.start).start); + cluster.must_delete_cf(CF_LOCK, lock_key.as_encoded()); + } + } + } +} From c1aceb003b9da06b75b70a1e545d52b994ab67dc Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 29 Nov 2022 21:58:00 +0800 Subject: [PATCH 0369/1149] server, storage: make flashback compatible with resolved_ts (#13823) ref tikv/tikv#13787 - Prewrite and commit `self.start_key` independently to prevent `resolved_ts` from advancing during the flashback process. - Roll back all keys before prewriting `self.start_key` during the preparing flashback. - Add a test case for CDC compatibility. Signed-off-by: JmPotato --- components/cdc/tests/integrations/test_cdc.rs | 115 ++++++++- components/cdc/tests/mod.rs | 48 ++++ components/raftstore/src/store/fsm/peer.rs | 9 +- components/test_raftstore/src/util.rs | 10 +- src/server/service/kv.rs | 16 +- src/storage/mod.rs | 157 ++++++------ src/storage/mvcc/reader/reader.rs | 46 ++-- .../txn/actions/flashback_to_version.rs | 233 +++++++++++++++--- .../txn/commands/flashback_to_version.rs | 83 +++++-- .../flashback_to_version_read_phase.rs | 165 +++++++------ src/storage/txn/commands/mod.rs | 17 +- src/storage/txn/mod.rs | 4 +- 12 files changed, 672 insertions(+), 231 deletions(-) diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index f2f09622a52..b9c285406d4 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -12,7 +12,7 @@ use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; use tikv::server::DEFAULT_CLUSTER_ID; -use tikv_util::HandyRwLock; +use tikv_util::{config::ReadableDuration, HandyRwLock}; use txn_types::{Key, Lock, LockType}; use crate::{new_event_feed, TestSuite, TestSuiteBuilder}; @@ -2485,3 +2485,116 @@ fn test_filter_loop_impl() { event_feed_wrap.replace(None); suite.stop(); } + +#[test] +fn test_flashback() { + let mut cluster = new_server_cluster(0, 1); + cluster.cfg.resolved_ts.advance_ts_interval = ReadableDuration::millis(50); + let mut suite = TestSuiteBuilder::new().cluster(cluster).build(); + + let key = Key::from_raw(b"a"); + let region = suite.cluster.get_region(key.as_encoded()); + let region_id = region.get_id(); + let req = suite.new_changedata_request(region_id); + let (mut req_tx, _, receive_event) = new_event_feed(suite.get_region_cdc_client(region_id)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let event = receive_event(false); + event.events.into_iter().for_each(|e| { + match e.event.unwrap() { + // Even if there is no write, + // it should always outputs an Initialized event. + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + } + }); + // Sleep a while to make sure the stream is registered. + sleep_ms(1000); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + for i in 0..2 { + let (k, v) = ( + format!("key{}", i).as_bytes().to_vec(), + format!("value{}", i).as_bytes().to_vec(), + ); + // Prewrite + let start_ts1 = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k.clone(), start_ts1); + // Commit + let commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + suite.must_kv_commit(1, vec![k.clone()], start_ts1, commit_ts); + } + let (start_key, end_key) = (b"key0".to_vec(), b"key2".to_vec()); + // Prepare flashback. + let flashback_start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + suite.must_kv_prepare_flashback(region_id, &start_key, flashback_start_ts); + // resolved ts should not be advanced anymore. + let mut counter = 0; + let mut last_resolved_ts = 0; + loop { + let event = receive_event(true); + if let Some(resolved_ts) = event.resolved_ts.as_ref() { + if resolved_ts.ts == last_resolved_ts { + counter += 1; + } + last_resolved_ts = resolved_ts.ts; + } + if counter > 20 { + break; + } + sleep_ms(50); + } + // Flashback. + let flashback_commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + suite.must_kv_flashback( + region_id, + &start_key, + &end_key, + flashback_start_ts, + flashback_commit_ts, + start_ts, + ); + // Check the flashback event. + let mut resolved_ts = 0; + let mut event_counter = 0; + loop { + let mut cde = receive_event(true); + if cde.get_resolved_ts().get_ts() > resolved_ts { + resolved_ts = cde.get_resolved_ts().get_ts(); + } + let events = cde.mut_events(); + if !events.is_empty() { + assert_eq!(events.len(), 1); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Entries(entries) => { + assert_eq!(entries.entries.len(), 1); + event_counter += 1; + let e = &entries.entries[0]; + assert!(e.commit_ts > resolved_ts); + assert_eq!(e.get_op_type(), EventRowOpType::Delete); + match e.get_type() { + EventLogType::Committed => { + // First entry should be a 1PC flashback. + assert_eq!(e.get_key(), b"key1"); + assert_eq!(event_counter, 1); + } + EventLogType::Commit => { + // Second entry should be a 2PC commit. + assert_eq!(e.get_key(), b"key0"); + assert_eq!(event_counter, 2); + break; + } + _ => panic!("unknown event type {:?}", e.get_type()), + } + } + other => panic!("unknown event {:?}", other), + } + } + } +} diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index feb994f8bb1..87619deb92b 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -581,4 +581,52 @@ impl TestSuite { } } } + + pub fn must_kv_prepare_flashback( + &mut self, + region_id: u64, + start_key: &[u8], + start_ts: TimeStamp, + ) { + let mut prepare_flashback_req = PrepareFlashbackToVersionRequest::default(); + prepare_flashback_req.set_context(self.get_context(region_id)); + prepare_flashback_req.set_start_key(start_key.to_vec()); + prepare_flashback_req.set_start_ts(start_ts.into_inner()); + let prepare_flashback_resp = self + .get_tikv_client(region_id) + .kv_prepare_flashback_to_version(&prepare_flashback_req) + .unwrap(); + assert!( + !prepare_flashback_resp.has_region_error(), + "{:?}", + prepare_flashback_resp.get_region_error() + ); + } + + pub fn must_kv_flashback( + &mut self, + region_id: u64, + start_key: &[u8], + end_key: &[u8], + start_ts: TimeStamp, + commit_ts: TimeStamp, + version: TimeStamp, + ) { + let mut flashback_req = FlashbackToVersionRequest::default(); + flashback_req.set_context(self.get_context(region_id)); + flashback_req.set_start_key(start_key.to_vec()); + flashback_req.set_end_key(end_key.to_vec()); + flashback_req.set_start_ts(start_ts.into_inner()); + flashback_req.set_commit_ts(commit_ts.into_inner()); + flashback_req.set_version(version.into_inner()); + let flashback_resp = self + .get_tikv_client(region_id) + .kv_flashback_to_version(&flashback_req) + .unwrap(); + assert!( + !flashback_resp.has_region_error(), + "{:?}", + flashback_resp.get_region_error() + ); + } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index b4c7d1fb097..f6498222d27 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1330,8 +1330,15 @@ where ) { fail_point!("raft_on_capture_change"); let region_id = self.region_id(); - let msg = + let mut msg = new_read_index_request(region_id, region_epoch.clone(), self.fsm.peer.peer.clone()); + // Allow to capture change even is in flashback state. + // TODO: add a test case for this kind of situation. + if self.fsm.peer.is_in_flashback { + let mut flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); + flags.insert(WriteBatchFlags::FLASHBACK); + msg.mut_header().set_flags(flags.bits()); + } let apply_router = self.ctx.apply_router.clone(); self.propose_raft_command_internal( msg, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index e4b185b9509..64bdca19025 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1249,6 +1249,10 @@ pub fn must_flashback_to_version( ) { let mut prepare_req = PrepareFlashbackToVersionRequest::default(); prepare_req.set_context(ctx.clone()); + prepare_req.set_start_ts(start_ts); + prepare_req.set_version(version); + prepare_req.set_start_key(b"a".to_vec()); + prepare_req.set_end_key(b"z".to_vec()); client .kv_prepare_flashback_to_version(&prepare_req) .unwrap(); @@ -1256,9 +1260,9 @@ pub fn must_flashback_to_version( req.set_context(ctx); req.set_start_ts(start_ts); req.set_commit_ts(commit_ts); - req.version = version; - req.start_key = b"a".to_vec(); - req.end_key = b"z".to_vec(); + req.set_version(version); + req.set_start_key(b"a".to_vec()); + req.set_end_key(b"z".to_vec()); let resp = client.kv_flashback_to_version(&req).unwrap(); assert!(!resp.has_region_error()); assert!(resp.get_error().is_empty()); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 7a61a313eca..7c40ab659eb 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1452,14 +1452,26 @@ fn future_delete_range( // Preparing the flashback for a region will "lock" the region so that // there is no any read, write or scheduling operation could be proposed before // the actual flashback operation. +// NOTICE: the caller needs to make sure the version we want to flashback won't +// be between any transactions that have not been fully committed. fn future_prepare_flashback_to_version( // Keep this param to hint the type of E for the compiler. storage: &Storage, req: PrepareFlashbackToVersionRequest, ) -> impl Future> { - let f = storage.get_engine().start_flashback(req.get_context()); + let storage = storage.clone(); async move { - let res = f.await.map_err(storage::Error::from); + let f = storage.get_engine().start_flashback(req.get_context()); + let mut res = f.await.map_err(storage::Error::from); + if matches!(res, Ok(())) { + // After the region is put into the flashback state, we need to do a special + // prewrite to prevent `resolved_ts` from advancing. + let (cb, f) = paired_future_callback(); + res = storage.sched_txn_command(req.clone().into(), cb); + if matches!(res, Ok(())) { + res = f.await.unwrap_or_else(|e| Err(box_err!(e))); + } + } let mut resp = PrepareFlashbackToVersionResponse::default(); if let Some(e) = extract_region_error(&res) { resp.set_region_error(e); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 32d033e7497..b87ab8c4a6d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3545,7 +3545,10 @@ mod tests { use super::{ mvcc::tests::{must_unlocked, must_written}, test_util::*, - txn::{commands::new_flashback_to_version_read_phase_cmd, FLASHBACK_BATCH_SIZE}, + txn::{ + commands::{new_flashback_rollback_lock_cmd, new_flashback_write_cmd}, + FLASHBACK_BATCH_SIZE, + }, *, }; use crate::{ @@ -4816,20 +4819,14 @@ mod tests { let (key, value) = write.0.clone().into_key_value(); // The version we want to flashback to. let version = write.2; - storage - .sched_txn_command( - new_flashback_to_version_read_phase_cmd( - start_ts, - commit_ts, - version, - key.clone(), - Key::from_raw(b"z"), - Context::default(), - ), - expect_ok_callback(tx.clone(), 2), - ) - .unwrap(); - rx.recv().unwrap(); + run_flashback_to_version( + &storage, + start_ts, + commit_ts, + version, + key.clone(), + Key::from_raw(b"z"), + ); if let Mutation::Put(..) = write.0 { expect_value( value.unwrap(), @@ -4847,6 +4844,44 @@ mod tests { } } + fn run_flashback_to_version( + storage: &Storage, + start_ts: TimeStamp, + commit_ts: TimeStamp, + version: TimeStamp, + start_key: Key, + end_key: Key, + ) { + let (tx, rx) = channel(); + storage + .sched_txn_command( + new_flashback_rollback_lock_cmd( + start_ts, + version, + start_key.clone(), + end_key.clone(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + new_flashback_write_cmd( + start_ts, + commit_ts, + version, + start_key, + end_key, + Context::default(), + ), + expect_ok_callback(tx, 1), + ) + .unwrap(); + rx.recv().unwrap(); + } + #[test] fn test_flashback_to_version_lock() { let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) @@ -4890,7 +4925,7 @@ mod tests { b"k".to_vec(), *ts.incr(), ), - expect_ok_callback(tx.clone(), 2), + expect_ok_callback(tx, 2), ) .unwrap(); rx.recv().unwrap(); @@ -4906,20 +4941,14 @@ mod tests { let start_ts = *ts.incr(); let commit_ts = *ts.incr(); - storage - .sched_txn_command( - new_flashback_to_version_read_phase_cmd( - start_ts, - commit_ts, - 2.into(), - Key::from_raw(b"k"), - Key::from_raw(b"z"), - Context::default(), - ), - expect_ok_callback(tx.clone(), 3), - ) - .unwrap(); - rx.recv().unwrap(); + run_flashback_to_version( + &storage, + start_ts, + commit_ts, + 2.into(), + Key::from_raw(b"k"), + Key::from_raw(b"z"), + ); expect_value( b"v@1".to_vec(), block_on(storage.get(Context::default(), Key::from_raw(b"k"), commit_ts)) @@ -4928,20 +4957,14 @@ mod tests { ); let start_ts = *ts.incr(); let commit_ts = *ts.incr(); - storage - .sched_txn_command( - new_flashback_to_version_read_phase_cmd( - start_ts, - commit_ts, - 1.into(), - Key::from_raw(b"k"), - Key::from_raw(b"z"), - Context::default(), - ), - expect_ok_callback(tx, 4), - ) - .unwrap(); - rx.recv().unwrap(); + run_flashback_to_version( + &storage, + start_ts, + commit_ts, + 1.into(), + Key::from_raw(b"k"), + Key::from_raw(b"z"), + ); expect_none( block_on(storage.get(Context::default(), Key::from_raw(b"k"), commit_ts)) .unwrap() @@ -5025,20 +5048,14 @@ mod tests { let flashback_start_ts = *ts.incr(); let flashback_commit_ts = *ts.incr(); for _ in 0..10 { - storage - .sched_txn_command( - new_flashback_to_version_read_phase_cmd( - flashback_start_ts, - flashback_commit_ts, - TimeStamp::zero(), - Key::from_raw(b"k"), - Key::from_raw(b"z"), - Context::default(), - ), - expect_ok_callback(tx.clone(), 2), - ) - .unwrap(); - rx.recv().unwrap(); + run_flashback_to_version( + &storage, + flashback_start_ts, + flashback_commit_ts, + TimeStamp::zero(), + Key::from_raw(b"k"), + Key::from_raw(b"z"), + ); for i in 1..=FLASHBACK_BATCH_SIZE * 4 { let key = Key::from_raw(format!("k{}", i).as_bytes()); expect_none( @@ -5098,7 +5115,7 @@ mod tests { storage .sched_txn_command( commands::Commit::new(vec![k.clone()], ts, *ts.incr(), Context::default()), - expect_value_callback(tx.clone(), 3, TxnStatus::committed(ts)), + expect_value_callback(tx, 3, TxnStatus::committed(ts)), ) .unwrap(); rx.recv().unwrap(); @@ -5110,20 +5127,14 @@ mod tests { // Flashback the key. let flashback_start_ts = *ts.incr(); let flashback_commit_ts = *ts.incr(); - storage - .sched_txn_command( - new_flashback_to_version_read_phase_cmd( - flashback_start_ts, - flashback_commit_ts, - 1.into(), - Key::from_raw(b"k"), - Key::from_raw(b"z"), - Context::default(), - ), - expect_ok_callback(tx, 4), - ) - .unwrap(); - rx.recv().unwrap(); + run_flashback_to_version( + &storage, + flashback_start_ts, + flashback_commit_ts, + 1.into(), + Key::from_raw(b"k"), + Key::from_raw(b"z"), + ); expect_none( block_on(storage.get(Context::default(), k, flashback_commit_ts)) .unwrap() diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 8e92ffd6be2..0ada3a12d5d 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -185,17 +185,12 @@ impl MvccReader { } } - /// load the value associated with `key` and pointed by `write` - fn load_data(&mut self, key: &Key, write: Write) -> Result { - assert_eq!(write.write_type, WriteType::Put); - if let Some(val) = write.short_value { - return Ok(val); - } + /// get the value of a user key with the given `start_ts`. + pub fn get_value(&mut self, key: &Key, start_ts: TimeStamp) -> Result> { if self.scan_mode.is_some() { self.create_data_cursor()?; } - - let k = key.clone().append_ts(write.start_ts); + let k = key.clone().append_ts(start_ts); let val = if let Some(ref mut cursor) = self.data_cursor { cursor .get(&k, &mut self.statistics.data)? @@ -204,13 +199,25 @@ impl MvccReader { self.statistics.data.get += 1; self.snapshot.get(&k)? }; + if val.is_some() { + self.statistics.data.processed_keys += 1; + } + Ok(val) + } - match val { - Some(val) => { - self.statistics.data.processed_keys += 1; - Ok(val) - } - None => Err(default_not_found_error(k.into_encoded(), "get")), + /// load the value associated with `key` and pointed by `write` + fn load_data(&mut self, key: &Key, write: Write) -> Result { + assert_eq!(write.write_type, WriteType::Put); + if let Some(val) = write.short_value { + return Ok(val); + } + let start_ts = write.start_ts; + match self.get_value(key, start_ts)? { + Some(val) => Ok(val), + None => Err(default_not_found_error( + key.clone().append_ts(start_ts).into_encoded(), + "get", + )), } } @@ -2011,8 +2018,17 @@ pub mod tests { engine.write(case.modifies); let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, case.scan_mode, false); - let result = reader.load_data(&case.key, case.write); + let result = reader.load_data(&case.key, case.write.clone()); assert_eq!(format!("{:?}", result), format!("{:?}", case.expected)); + if let Ok(expected) = case.expected { + if expected == long_value.to_vec() { + let result = reader + .get_value(&case.key, case.write.start_ts) + .unwrap() + .unwrap(); + assert_eq!(format!("{:?}", result), format!("{:?}", expected)); + } + } } } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 71f50715a20..e719ca24a26 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -2,7 +2,7 @@ use std::ops::Bound; -use txn_types::{Key, Lock, TimeStamp, Write, WriteType}; +use txn_types::{Key, Lock, LockType, TimeStamp, Write, WriteType}; use crate::storage::{ mvcc::{MvccReader, MvccTxn, SnapshotReader, MAX_TXN_WRITE_SIZE}, @@ -12,26 +12,27 @@ use crate::storage::{ pub const FLASHBACK_BATCH_SIZE: usize = 256 + 1 /* To store the next key for multiple batches */; -pub fn flashback_to_version_read_lock( - reader: &mut MvccReader, +pub fn flashback_to_version_read_lock( + reader: &mut MvccReader, next_lock_key: Key, end_key: &Key, statistics: &mut Statistics, -) -> TxnResult<(Vec<(Key, Lock)>, bool)> { - let key_locks_result = reader.scan_locks( +) -> TxnResult> { + let result = reader.scan_locks( Some(&next_lock_key), Some(end_key), - // To flashback `CF_LOCK`, we need to delete all locks. |_| true, FLASHBACK_BATCH_SIZE, ); statistics.add(&reader.statistics); - Ok(key_locks_result?) + let (key_locks, _) = result?; + Ok(key_locks) } -pub fn flashback_to_version_read_write( - reader: &mut MvccReader, +pub fn flashback_to_version_read_write( + reader: &mut MvccReader, next_write_key: Key, + start_key: &Key, end_key: &Key, flashback_version: TimeStamp, flashback_commit_ts: TimeStamp, @@ -47,14 +48,17 @@ pub fn flashback_to_version_read_write( let keys_result = reader.scan_latest_user_keys( Some(&next_write_key), Some(end_key), - |_, latest_commit_ts| { + |key, latest_commit_ts| { // There is no any other write could happen after the flashback begins. assert!(latest_commit_ts <= flashback_commit_ts); + // - Skip the `start_key`. // - No need to find an old version for the key if its latest `commit_ts` is // smaller than or equal to the flashback version. // - No need to flashback a key twice if its latest `commit_ts` is equal to the // flashback `commit_ts`. - latest_commit_ts > flashback_version && latest_commit_ts < flashback_commit_ts + key != start_key + && latest_commit_ts > flashback_version + && latest_commit_ts < flashback_commit_ts }, FLASHBACK_BATCH_SIZE, ); @@ -63,12 +67,9 @@ pub fn flashback_to_version_read_write( Ok(keys) } -// To flashback the `CF_LOCK`, we need to delete all locks records whose -// `start_ts` is greater than the specified version, and if it's not a -// short-value `LockType::Put`, we need to delete the actual data from -// `CF_DEFAULT` as well. -// TODO: `resolved_ts` should be taken into account. -pub fn flashback_to_version_lock( +// At the very first beginning of flashback, we need to rollback all locks in +// `CF_LOCK`. +pub fn rollback_locks( txn: &mut MvccTxn, reader: &mut SnapshotReader, key_locks: Vec<(Key, Lock)>, @@ -123,9 +124,9 @@ pub fn flashback_to_version_write( } let old_write = reader.get_write(&key, flashback_version)?; let new_write = if let Some(old_write) = old_write { - // If it's not a short value and it's a `WriteType::Put`, we should put the old + // If it's a `WriteType::Put` without the short value, we should put the old // value in `CF_DEFAULT` with `self.start_ts` as well. - if old_write.short_value.is_none() && old_write.write_type == WriteType::Put { + if old_write.write_type == WriteType::Put && old_write.short_value.is_none() { txn.put_value( key.clone(), flashback_start_ts, @@ -142,21 +143,94 @@ pub fn flashback_to_version_write( // delete the current key when needed. Write::new(WriteType::Delete, flashback_start_ts, None) }; + txn.put_write(key, flashback_commit_ts, new_write.as_ref().to_bytes()); + } + Ok(None) +} + +// Prewrite the `key_to_lock`, namely the `self.start_key`, to do a special 2PC +// transaction. +pub fn prewrite_flashback_key( + txn: &mut MvccTxn, + reader: &mut SnapshotReader, + key_to_lock: &Key, + flashback_version: TimeStamp, + flashback_start_ts: TimeStamp, +) -> TxnResult<()> { + let old_write = reader.get_write(key_to_lock, flashback_version)?; + // Flashback the value in `CF_DEFAULT` as well if the old write is a + // `WriteType::Put` without the short value. + if let Some(old_write) = old_write.as_ref() { + if old_write.write_type == WriteType::Put + && old_write.short_value.is_none() + // If the value with `flashback_start_ts` already exists, we don't need to write again. + && reader.reader.get_value(key_to_lock, flashback_start_ts)?.is_none() + { + txn.put_value( + key_to_lock.clone(), + flashback_start_ts, + reader.load_data(key_to_lock, old_write.clone())?, + ); + } + } + txn.put_lock( + key_to_lock.clone(), + &Lock::new( + old_write.as_ref().map_or(LockType::Delete, |write| { + if write.write_type == WriteType::Delete { + LockType::Delete + } else { + LockType::Put + } + }), + key_to_lock.as_encoded().to_vec(), + flashback_start_ts, + 0, + old_write.and_then(|write| write.short_value), + TimeStamp::zero(), + 1, + TimeStamp::zero(), + ), + ); + Ok(()) +} + +pub fn commit_flashback_key( + txn: &mut MvccTxn, + reader: &mut SnapshotReader, + key_to_commit: &Key, + flashback_start_ts: TimeStamp, + flashback_commit_ts: TimeStamp, +) -> TxnResult<()> { + if let Some(mut lock) = reader.load_lock(key_to_commit)? { txn.put_write( - key.clone(), + key_to_commit.clone(), + flashback_commit_ts, + Write::new( + WriteType::from_lock_type(lock.lock_type).unwrap(), + flashback_start_ts, + lock.short_value.take(), + ) + .set_last_change(lock.last_change_ts, lock.versions_to_last_change) + .set_txn_source(lock.txn_source) + .as_ref() + .to_bytes(), + ); + txn.unlock_key( + key_to_commit.clone(), + lock.is_pessimistic_txn(), flashback_commit_ts, - new_write.as_ref().to_bytes(), ); } - Ok(None) + Ok(()) } #[cfg(test)] pub mod tests { use concurrency_manager::ConcurrencyManager; - use kvproto::kvrpcpb::Context; + use kvproto::kvrpcpb::{Context, PrewriteRequestPessimisticAction::DoPessimisticCheck}; use tikv_kv::ScanMode; - use txn_types::TimeStamp; + use txn_types::{TimeStamp, SHORT_VALUE_MAX_LEN}; use super::*; use crate::storage::{ @@ -172,42 +246,82 @@ pub mod tests { Engine, TestEngineBuilder, }; - fn must_flashback_to_version( + fn must_rollback_lock( engine: &mut E, key: &[u8], version: impl Into, start_ts: impl Into, - commit_ts: impl Into, ) -> usize { let next_key = Key::from_raw(keys::next_key(key).as_slice()); let key = Key::from_raw(key); - let (version, start_ts, commit_ts) = (version.into(), start_ts.into(), commit_ts.into()); + let (version, start_ts) = (version.into(), start_ts.into()); let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); let mut statistics = Statistics::default(); - // Flashback the locks. - let (key_locks, has_remain_locks) = - flashback_to_version_read_lock(&mut reader, key.clone(), &next_key, &mut statistics) - .unwrap(); - assert!(!has_remain_locks); + let key_locks = + flashback_to_version_read_lock(&mut reader, key, &next_key, &mut statistics).unwrap(); let cm = ConcurrencyManager::new(TimeStamp::zero()); - let mut txn = MvccTxn::new(start_ts, cm.clone()); + let mut txn = MvccTxn::new(start_ts, cm); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); - flashback_to_version_lock(&mut txn, &mut snap_reader, key_locks).unwrap(); - let mut rows = txn.modifies.len(); + rollback_locks(&mut txn, &mut snap_reader, key_locks).unwrap(); + let rows = txn.modifies.len(); + write(engine, &ctx, txn.into_modifies()); + rows + } + + fn must_prewrite_flashback_key( + engine: &mut E, + key: &[u8], + version: impl Into, + start_ts: impl Into, + ) -> usize { + let (version, start_ts) = (version.into(), start_ts.into()); + let cm = ConcurrencyManager::new(TimeStamp::zero()); + let mut txn = MvccTxn::new(start_ts, cm); + let snapshot = engine.snapshot(Default::default()).unwrap(); + let ctx = Context::default(); + let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); + prewrite_flashback_key( + &mut txn, + &mut snap_reader, + &Key::from_raw(key), + version, + start_ts, + ) + .unwrap(); + let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); + rows + } + + fn must_flashback_to_version( + engine: &mut E, + key: &[u8], + version: impl Into, + start_ts: impl Into, + commit_ts: impl Into, + ) -> usize { + let next_key = Key::from_raw(keys::next_key(key).as_slice()); + let key = Key::from_raw(key); + let (version, start_ts, commit_ts) = (version.into(), start_ts.into(), commit_ts.into()); + let ctx = Context::default(); + let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); + let mut statistics = Statistics::default(); // Flashback the writes. let keys = flashback_to_version_read_write( &mut reader, key, + &Key::from_raw(b""), &next_key, version, commit_ts, &mut statistics, ) .unwrap(); + let cm = ConcurrencyManager::new(TimeStamp::zero()); let mut txn = MvccTxn::new(start_ts, cm); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); @@ -220,7 +334,7 @@ pub mod tests { commit_ts, ) .unwrap(); - rows += txn.modifies.len(); + let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); rows } @@ -317,8 +431,6 @@ pub mod tests { #[test] fn test_flashback_to_version_pessimistic() { - use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; - let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k"; let (v1, v2, v3) = (b"v1", b"v2", b"v3"); @@ -335,7 +447,8 @@ pub mod tests { // Flashback to version 17 with start_ts = 35, commit_ts = 40. // Distinguish from pessimistic start_ts 30 to make sure rollback ts is by lock // ts. - assert_eq!(must_flashback_to_version(&mut engine, k, 17, 35, 40), 3); + assert_eq!(must_rollback_lock(&mut engine, k, 17, 35), 2); + assert_eq!(must_flashback_to_version(&mut engine, k, 17, 35, 40), 1); // Pessimistic Prewrite Put(k -> v3) with stat_ts = 30 will be error with // Rollback. @@ -365,4 +478,46 @@ pub mod tests { 0 ); } + + #[test] + fn test_duplicated_prewrite_flashback_key() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut ts = TimeStamp::zero(); + let (k, v) = (b"k", [u8::MAX; SHORT_VALUE_MAX_LEN + 1]); + must_prewrite_put(&mut engine, k, &v, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, ts, &v); + + let flashback_start_ts = *ts.incr(); + // Rollback nothing. + assert_eq!( + must_rollback_lock(&mut engine, k, ts, flashback_start_ts), + 0 + ); + // Lock and write the value of `k`. + assert_eq!( + must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), + 2 + ); + // Unlock `k`, put rollback record and delete the value of `k`. + assert_eq!( + must_rollback_lock(&mut engine, k, ts, flashback_start_ts), + 3 + ); + // Lock and write the value of `k`. + assert_eq!( + must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), + 2 + ); + // Only unlock `k` since there is an overlapped rollback record. + assert_eq!( + must_rollback_lock(&mut engine, k, ts, flashback_start_ts), + 1 + ); + // Only lock `k` since the value of `k` has already existed. + assert_eq!( + must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), + 1 + ); + } } diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index d53a3a5c3be..a1936cee647 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -10,7 +10,10 @@ use crate::storage::{ lock_manager::LockManager, mvcc::{MvccTxn, SnapshotReader}, txn::{ - actions::flashback_to_version::{flashback_to_version_lock, flashback_to_version_write}, + actions::flashback_to_version::{ + commit_flashback_key, flashback_to_version_write, prewrite_flashback_key, + rollback_locks, + }, commands::{ Command, CommandExt, FlashbackToVersionReadPhase, FlashbackToVersionState, ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, @@ -42,22 +45,26 @@ impl CommandExt for FlashbackToVersion { fn gen_lock(&self) -> latch::Lock { match &self.state { - FlashbackToVersionState::ScanLock { key_locks, .. } => { + FlashbackToVersionState::RollbackLock { key_locks, .. } => { latch::Lock::new(key_locks.iter().map(|(key, _)| key)) } - FlashbackToVersionState::ScanWrite { keys, .. } => latch::Lock::new(keys.iter()), + FlashbackToVersionState::Prewrite { key_to_lock } => latch::Lock::new([key_to_lock]), + FlashbackToVersionState::FlashbackWrite { keys, .. } => latch::Lock::new(keys.iter()), + FlashbackToVersionState::Commit { key_to_commit } => latch::Lock::new([key_to_commit]), } } fn write_bytes(&self) -> usize { match &self.state { - FlashbackToVersionState::ScanLock { key_locks, .. } => key_locks + FlashbackToVersionState::RollbackLock { key_locks, .. } => key_locks .iter() .map(|(key, _)| key.as_encoded().len()) .sum(), - FlashbackToVersionState::ScanWrite { keys, .. } => { + FlashbackToVersionState::Prewrite { key_to_lock } => key_to_lock.as_encoded().len(), + FlashbackToVersionState::FlashbackWrite { keys, .. } => { keys.iter().map(|key| key.as_encoded().len()).sum() } + FlashbackToVersionState::Commit { key_to_commit } => key_to_commit.as_encoded().len(), } } } @@ -69,19 +76,26 @@ impl WriteCommand for FlashbackToVersion { context.statistics, ); let mut txn = MvccTxn::new(TimeStamp::zero(), context.concurrency_manager); - // The state must be `ScanLock` or `ScanWrite` here. match self.state { - FlashbackToVersionState::ScanLock { + FlashbackToVersionState::RollbackLock { ref mut next_lock_key, ref mut key_locks, } => { if let Some(new_next_lock_key) = - flashback_to_version_lock(&mut txn, &mut reader, mem::take(key_locks))? + rollback_locks(&mut txn, &mut reader, mem::take(key_locks))? { *next_lock_key = new_next_lock_key; } } - FlashbackToVersionState::ScanWrite { + // TODO: add some test cases for the special prewrite key. + FlashbackToVersionState::Prewrite { ref key_to_lock } => prewrite_flashback_key( + &mut txn, + &mut reader, + key_to_lock, + self.version, + self.start_ts, + )?, + FlashbackToVersionState::FlashbackWrite { ref mut next_write_key, ref mut keys, } => { @@ -96,30 +110,53 @@ impl WriteCommand for FlashbackToVersion { *next_write_key = new_next_write_key; } } + FlashbackToVersionState::Commit { ref key_to_commit } => commit_flashback_key( + &mut txn, + &mut reader, + key_to_commit, + self.start_ts, + self.commit_ts, + )?, } let rows = txn.modifies.len(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); + // To let the flashback modification could be proposed and applied successfully. write_data.extra.for_flashback = true; + // To let the CDC treat the flashback modification as an 1PC transaction. + if matches!(self.state, FlashbackToVersionState::FlashbackWrite { .. }) { + write_data.extra.one_pc = true; + } Ok(WriteResult { ctx: self.ctx.clone(), to_be_write: write_data, rows, pr: (move || { - fail_point!("flashback_failed_after_first_batch", |_| { - ProcessResult::Res - }); - let next_cmd = FlashbackToVersionReadPhase { - ctx: self.ctx, - deadline: self.deadline, - start_ts: self.start_ts, - commit_ts: self.commit_ts, - version: self.version, - start_key: self.start_key, - end_key: self.end_key, - state: self.state, - }; + if matches!( + self.state, + FlashbackToVersionState::Prewrite { .. } + | FlashbackToVersionState::Commit { .. } + ) { + return ProcessResult::Res; + } + + #[cfg(feature = "failpoints")] + if matches!(self.state, FlashbackToVersionState::FlashbackWrite { .. }) { + fail_point!("flashback_failed_after_first_batch", |_| { + ProcessResult::Res + }); + } + ProcessResult::NextCommand { - cmd: Command::FlashbackToVersionReadPhase(next_cmd), + cmd: Command::FlashbackToVersionReadPhase(FlashbackToVersionReadPhase { + ctx: self.ctx, + deadline: self.deadline, + start_ts: self.start_ts, + commit_ts: self.commit_ts, + version: self.version, + start_key: self.start_key, + end_key: self.end_key, + state: self.state, + }), } })(), lock_info: vec![], diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index b41506c320b..d27225a9bf7 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -18,19 +18,24 @@ use crate::storage::{ #[derive(Debug)] pub enum FlashbackToVersionState { - ScanLock { + RollbackLock { next_lock_key: Key, key_locks: Vec<(Key, Lock)>, }, - ScanWrite { + Prewrite { + key_to_lock: Key, + }, + FlashbackWrite { next_write_key: Key, keys: Vec, }, + Commit { + key_to_commit: Key, + }, } -pub fn new_flashback_to_version_read_phase_cmd( +pub fn new_flashback_rollback_lock_cmd( start_ts: TimeStamp, - commit_ts: TimeStamp, version: TimeStamp, start_key: Key, end_key: Key, @@ -38,11 +43,11 @@ pub fn new_flashback_to_version_read_phase_cmd( ) -> TypedCommand<()> { FlashbackToVersionReadPhase::new( start_ts, - commit_ts, + TimeStamp::zero(), version, start_key.clone(), end_key, - FlashbackToVersionState::ScanLock { + FlashbackToVersionState::RollbackLock { next_lock_key: start_key, key_locks: Vec::new(), }, @@ -50,6 +55,28 @@ pub fn new_flashback_to_version_read_phase_cmd( ) } +pub fn new_flashback_write_cmd( + start_ts: TimeStamp, + commit_ts: TimeStamp, + version: TimeStamp, + start_key: Key, + end_key: Key, + ctx: Context, +) -> TypedCommand<()> { + FlashbackToVersionReadPhase::new( + start_ts, + commit_ts, + version, + start_key.clone(), + end_key, + FlashbackToVersionState::FlashbackWrite { + next_write_key: start_key, + keys: Vec::new(), + }, + ctx, + ) +} + command! { FlashbackToVersionReadPhase: cmd_ty => (), @@ -76,48 +103,41 @@ impl CommandExt for FlashbackToVersionReadPhase { } } -/// FlashbackToVersion contains two phases: -/// 1. Read phase: -/// - Scan all locks to delete them all later. -/// - Scan all the latest writes to flashback them all later. -/// 2. Write phase: -/// - Delete all locks we scanned at the read phase. -/// - Write the old MVCC version writes for the keys we scanned at the read -/// phase. +/// The whole flashback progress contains four phases: +/// 1. [PrepareFlashback] RollbackLock phase: +/// - Scan all locks. +/// - Rollback all these locks. +/// 2. [PrepareFlashback] Prewrite phase: +/// - Prewrite the `self.start_key` specifically to prevent the +/// `resolved_ts` from advancing. +/// 3. [FinishFlashback] FlashbackWrite phase: +/// - Scan all the latest writes and their corresponding values at +/// `self.version`. +/// - Write the old MVCC version writes again for all these keys with +/// `self.commit_ts` excluding the `self.start_key`. +/// 4. [FinishFlashback] Commit phase: +/// - Commit the `self.start_key` we write at the second phase to finish the +/// flashback. impl ReadCommand for FlashbackToVersionReadPhase { fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result { - if self.commit_ts <= self.start_ts { - return Err(Error::from(ErrorInner::InvalidTxnTso { - start_ts: self.start_ts, - commit_ts: self.commit_ts, - })); - } let tag = self.tag().get_str(); - let mut read_again = false; let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); - // Separate the lock and write flashback to prevent from putting two writes for - // the same key in a single batch to make the TiCDC panic. let next_state = match self.state { - FlashbackToVersionState::ScanLock { next_lock_key, .. } => { - let (mut key_locks, has_remain_locks) = flashback_to_version_read_lock( + FlashbackToVersionState::RollbackLock { next_lock_key, .. } => { + let mut key_locks = flashback_to_version_read_lock( &mut reader, next_lock_key, &self.end_key, statistics, )?; - if key_locks.is_empty() && !has_remain_locks { - // No more locks to flashback, continue to scan the writes. - read_again = true; - FlashbackToVersionState::ScanWrite { - next_write_key: self.start_key.clone(), - keys: Vec::new(), + if key_locks.is_empty() { + // No more locks to rollback, continue to the prewrite phase. + FlashbackToVersionState::Prewrite { + key_to_lock: self.start_key.clone(), } } else { - assert!(!key_locks.is_empty()); tls_collect_keyread_histogram_vec(tag, key_locks.len() as f64); - FlashbackToVersionState::ScanLock { - // DO NOT pop the last key as the next key when it's the only key to prevent - // from making flashback fall into a dead loop. + FlashbackToVersionState::RollbackLock { next_lock_key: if key_locks.len() > 1 { key_locks.pop().map(|(key, _)| key).unwrap() } else { @@ -127,54 +147,59 @@ impl ReadCommand for FlashbackToVersionReadPhase { } } } - FlashbackToVersionState::ScanWrite { next_write_key, .. } => { + FlashbackToVersionState::FlashbackWrite { next_write_key, .. } => { + if self.commit_ts <= self.start_ts { + return Err(Error::from(ErrorInner::InvalidTxnTso { + start_ts: self.start_ts, + commit_ts: self.commit_ts, + })); + } + // If the key is not locked, it means that the key has been committed before and + // we are in a retry. + if next_write_key == self.start_key && reader.load_lock(&next_write_key)?.is_none() + { + return Ok(ProcessResult::Res); + } let mut keys = flashback_to_version_read_write( &mut reader, next_write_key, + &self.start_key, &self.end_key, self.version, self.commit_ts, statistics, )?; if keys.is_empty() { - // No more writes to flashback, just return. - return Ok(ProcessResult::Res); - } - tls_collect_keyread_histogram_vec(tag, keys.len() as f64); - FlashbackToVersionState::ScanWrite { - next_write_key: if keys.len() > 1 { - keys.pop().unwrap() - } else { - keys.last().unwrap().clone() - }, - keys, + FlashbackToVersionState::Commit { + key_to_commit: self.start_key.clone(), + } + } else { + tls_collect_keyread_histogram_vec(tag, keys.len() as f64); + FlashbackToVersionState::FlashbackWrite { + // DO NOT pop the last key as the next key when it's the only key to prevent + // from making flashback fall into a dead loop. + next_write_key: if keys.len() > 1 { + keys.pop().unwrap() + } else { + keys.last().unwrap().clone() + }, + keys, + } } } + _ => unreachable!(), }; Ok(ProcessResult::NextCommand { - cmd: if read_again { - Command::FlashbackToVersionReadPhase(FlashbackToVersionReadPhase { - ctx: self.ctx, - deadline: self.deadline, - start_ts: self.start_ts, - commit_ts: self.commit_ts, - version: self.version, - start_key: self.start_key, - end_key: self.end_key, - state: next_state, - }) - } else { - Command::FlashbackToVersion(FlashbackToVersion { - ctx: self.ctx, - deadline: self.deadline, - start_ts: self.start_ts, - commit_ts: self.commit_ts, - version: self.version, - start_key: self.start_key, - end_key: self.end_key, - state: next_state, - }) - }, + cmd: Command::FlashbackToVersion(FlashbackToVersion { + ctx: self.ctx, + deadline: self.deadline, + start_ts: self.start_ts, + commit_ts: self.commit_ts, + version: self.version, + start_key: self.start_key, + end_key: self.end_key, + state: next_state, + }), }) } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index c09ca934fa0..4d3f32fa9cd 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -43,7 +43,8 @@ pub use compare_and_swap::RawCompareAndSwap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; pub use flashback_to_version::FlashbackToVersion; pub use flashback_to_version_read_phase::{ - new_flashback_to_version_read_phase_cmd, FlashbackToVersionReadPhase, FlashbackToVersionState, + new_flashback_rollback_lock_cmd, new_flashback_write_cmd, FlashbackToVersionReadPhase, + FlashbackToVersionState, }; use kvproto::kvrpcpb::*; pub use mvcc_by_key::MvccByKey; @@ -361,9 +362,21 @@ impl From for TypedCommand> { } } +impl From for TypedCommand<()> { + fn from(mut req: PrepareFlashbackToVersionRequest) -> Self { + new_flashback_rollback_lock_cmd( + req.get_start_ts().into(), + req.get_version().into(), + Key::from_raw(req.get_start_key()), + Key::from_raw(req.get_end_key()), + req.take_context(), + ) + } +} + impl From for TypedCommand<()> { fn from(mut req: FlashbackToVersionRequest) -> Self { - new_flashback_to_version_read_phase_cmd( + new_flashback_write_cmd( req.get_start_ts().into(), req.get_commit_ts().into(), req.get_version().into(), diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 86ceda2bdf1..f6884b0efb8 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -24,8 +24,8 @@ pub use self::{ cleanup::cleanup, commit::commit, flashback_to_version::{ - flashback_to_version_lock, flashback_to_version_read_lock, - flashback_to_version_read_write, flashback_to_version_write, FLASHBACK_BATCH_SIZE, + flashback_to_version_read_lock, flashback_to_version_read_write, + flashback_to_version_write, rollback_locks, FLASHBACK_BATCH_SIZE, }, gc::gc, prewrite::{prewrite, CommitKind, TransactionKind, TransactionProperties}, From 916d5137b475da88452fde3e20b08126d5cdf3cf Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Tue, 29 Nov 2022 22:50:00 +0800 Subject: [PATCH 0370/1149] cop: set the error field properly for the batch cop task response (#13857) close tikv/tikv#13856 Fill the batch task response error field properly, the error field should be None if no error happens. Signed-off-by: cfzjywxk --- src/coprocessor/endpoint.rs | 8 ++++++-- tests/integrations/coprocessor/test_select.rs | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 51927cd6b56..1fefb2a55ae 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -564,8 +564,12 @@ impl Endpoint { match res { Ok(mut resp) => { response.set_data(resp.take_data()); - response.set_region_error(resp.take_region_error()); - response.set_locked(resp.take_locked()); + if let Some(err) = resp.region_error.take() { + response.set_region_error(err); + } + if let Some(lock_info) = resp.locked.take() { + response.set_locked(lock_info); + } response.set_other_error(resp.take_other_error()); GLOBAL_TRACKERS.with_tracker(cur_tracker, |tracker| { tracker.write_scan_detail( diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index c802b697872..ad195f62774 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2176,14 +2176,23 @@ fn test_batch_request() { row_count += 1; } assert_eq!(row_count, expected_len); + assert!(region_err.is_none()); + assert!(locked.is_none()); + assert!(other_err.is_empty()); } QueryResult::ErrRegion => { assert!(region_err.is_some()); + assert!(locked.is_none()); + assert!(other_err.is_empty()); } QueryResult::ErrLocked => { + assert!(region_err.is_none()); assert!(locked.is_some()); + assert!(other_err.is_empty()); } QueryResult::ErrOther => { + assert!(region_err.is_none()); + assert!(locked.is_none()); assert!(!other_err.is_empty()) } } From 05aed39fb8d693bde91cff2ef94c7251ed513f56 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 30 Nov 2022 11:20:00 +0800 Subject: [PATCH 0371/1149] cop: disable the coprocessor cache path for batched task processing (#13859) ref tikv/tikv#13858 Disable the coprocessor cache path for the batched task processing, the derived fields from the original task could not be used by the batched tasks. Signed-off-by: cfzjywxk --- src/coprocessor/endpoint.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 1fefb2a55ae..3274700d812 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -538,6 +538,9 @@ impl Endpoint { .iter_mut() .map(|task| { let mut new_req = req.clone(); + // Disable the coprocessor cache path for the batched tasks, the + // coprocessor cache related fields are not passed in the "task" by now. + new_req.is_cache_enabled = false; new_req.ranges = task.take_ranges(); let new_context = new_req.mut_context(); new_context.set_region_id(task.get_region_id()); From 2e18d0da5f19d1231ad0b48a38d6c85cf4ac32db Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 30 Nov 2022 14:50:00 +0800 Subject: [PATCH 0372/1149] tikv_kv: make async_write return stream (#13854) ref tikv/tikv#13827 This PR abstracts write interface with Stream trait so that we can keep compatible with both v1 and v2. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/tikv_kv/src/btree_engine.rs | 23 +- components/tikv_kv/src/lib.rs | 100 ++++++-- components/tikv_kv/src/mock_engine.rs | 23 +- components/tikv_kv/src/rocksdb_engine.rs | 72 ++++-- components/tikv_util/src/future.rs | 20 ++ components/tikv_util/src/mpsc/future.rs | 74 +++++- src/server/gc_worker/gc_worker.rs | 24 +- src/server/raftkv.rs | 310 ++++++++++++++--------- src/storage/mod.rs | 95 +++---- src/storage/raw/raw_mvcc.rs | 22 +- src/storage/txn/scheduler.rs | 182 ++++++------- tests/Cargo.toml | 1 + tests/benches/hierarchy/mvcc/mod.rs | 2 +- tests/benches/misc/raftkv/mod.rs | 15 +- 15 files changed, 591 insertions(+), 373 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a036117bfb..f1d02f06af9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5983,6 +5983,7 @@ dependencies = [ "tidb_query_executors", "tidb_query_expr", "tikv", + "tikv_kv", "tikv_util", "time", "tipb", diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index 45ce6a6ffe8..35f666896f3 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -14,14 +14,14 @@ use std::{ use collections::HashMap; use engine_panic::PanicEngine; use engine_traits::{CfName, IterOptions, ReadOptions, CF_DEFAULT, CF_LOCK, CF_WRITE}; -use futures::Future; +use futures::{future, stream, Future, Stream}; use kvproto::kvrpcpb::Context; use txn_types::{Key, Value}; use super::SnapContext; use crate::{ - Callback as EngineCallback, DummySnapshotExt, Engine, Error as EngineError, - ErrorInner as EngineErrorInner, Iterator, Modify, Result as EngineResult, Snapshot, WriteData, + DummySnapshotExt, Engine, Error as EngineError, ErrorInner as EngineErrorInner, Iterator, + Modify, OnAppliedCb, Result as EngineResult, Snapshot, WriteData, WriteEvent, }; type RwLockTree = RwLock>; @@ -87,18 +87,21 @@ impl Engine for BTreeEngine { unimplemented!(); } + type WriteRes = impl Stream + Send; fn async_write( &self, _ctx: &Context, batch: WriteData, - cb: EngineCallback<()>, - ) -> EngineResult<()> { - if batch.modifies.is_empty() { - return Err(EngineError::from(EngineErrorInner::EmptyRequest)); - } - cb(write_modifies(self, batch.modifies)); + _subscribed: u8, + _on_applied: Option, + ) -> Self::WriteRes { + let res = if batch.modifies.is_empty() { + Err(EngineError::from(EngineErrorInner::EmptyRequest)) + } else { + write_modifies(self, batch.modifies) + }; - Ok(()) + stream::once(future::ready(WriteEvent::Finished(res))) } type SnapshotRes = impl Future> + Send; diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index ac452fead37..07cae3ace65 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -66,6 +66,7 @@ pub const SEEK_BOUND: u64 = 8; const DEFAULT_TIMEOUT: Duration = Duration::from_secs(5); pub type Callback = Box) + Send>; +pub type OnAppliedCb = Box) + Send>; pub type ExtCallback = Box; pub type Result = result::Result; @@ -154,7 +155,7 @@ impl From for raft_cmdpb::Request { // For test purpose only. // It's used to simulate observer actions in `rocksdb_engine`. See -// `RocksEngine::async_write_ext()`. +// `RocksEngine::async_write()`. impl From for Modify { fn from(mut req: raft_cmdpb::Request) -> Modify { let name_to_cf = |name: &str| -> Option { @@ -249,6 +250,37 @@ impl WriteData { } } +/// Events that can subscribed from the `WriteSubscriber`. +pub enum WriteEvent { + Proposed, + Committed, + /// The write is either aborted or applied. + Finished(Result<()>), +} + +impl WriteEvent { + pub const EVENT_PROPOSED: u8 = 1; + pub const EVENT_COMMITTED: u8 = 1 << 1; + pub const ALL_EVENTS: u8 = Self::EVENT_PROPOSED | Self::EVENT_COMMITTED; + pub const BASIC_EVENT: u8 = 0; + + #[inline] + pub fn event_capacity(subscribed: u8) -> usize { + 1 + Self::subscribed_proposed(subscribed) as usize + + Self::subscribed_committed(subscribed) as usize + } + + #[inline] + pub fn subscribed_proposed(ev: u8) -> bool { + ev & Self::EVENT_PROPOSED != 0 + } + + #[inline] + pub fn subscribed_committed(ev: u8) -> bool { + ev & Self::EVENT_COMMITTED != 0 + } +} + #[derive(Debug, Clone, Default)] pub struct SnapContext<'a> { pub pb_ctx: &'a Context, @@ -280,6 +312,10 @@ pub trait Engine: Send + Clone + 'static { fn modify_on_kv_engine(&self, region_modifies: HashMap>) -> Result<()>; type SnapshotRes: Future> + Send + 'static; + /// Get a snapshot asynchronously. + /// + /// Note the snapshot is queried immediately no matter whether the returned + /// future is polled or not. fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes; /// Precheck request which has write with it's context. @@ -287,26 +323,42 @@ pub trait Engine: Send + Clone + 'static { Ok(()) } - fn async_write(&self, ctx: &Context, batch: WriteData, write_cb: Callback<()>) -> Result<()>; - - /// Writes data to the engine asynchronously with some extensions. + type WriteRes: Stream + Unpin + Send + 'static; + /// Writes data to the engine asynchronously. + /// + /// You can subscribe special events like `EVENT_PROPOSED` and + /// `EVENT_COMMITTED`. /// - /// When the write request is proposed successfully, the `proposed_cb` is - /// invoked. When the write request is finished, the `write_cb` is invoked. - fn async_write_ext( + /// `on_applied` is called right in the processing thread before being + /// fed to the stream. + /// + /// Note the write is started no matter whether the returned stream is + /// polled or not. + fn async_write( &self, ctx: &Context, batch: WriteData, - write_cb: Callback<()>, - _proposed_cb: Option, - _committed_cb: Option, - ) -> Result<()> { - self.async_write(ctx, batch, write_cb) - } + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes; fn write(&self, ctx: &Context, batch: WriteData) -> Result<()> { - wait_op!(|cb| self.async_write(ctx, batch, cb), DEFAULT_TIMEOUT) - .unwrap_or_else(|| Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT)))) + let f = write(self, ctx, batch, None); + let timeout = GLOBAL_TIMER_HANDLE + .delay(Instant::now() + DEFAULT_TIMEOUT) + .compat(); + + futures::executor::block_on(async move { + futures::select! { + res = f.fuse() => { + if let Some(res) = res { + return res; + } + }, + _ = timeout.fuse() => (), + }; + Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT))) + }) } fn release_snapshot(&mut self) {} @@ -617,6 +669,24 @@ pub fn snapshot( } } +pub fn write( + engine: &E, + ctx: &Context, + batch: WriteData, + on_applied: Option, +) -> impl std::future::Future>> { + let mut res = engine.async_write(ctx, batch, WriteEvent::BASIC_EVENT, on_applied); + async move { + loop { + match res.next().await { + Some(WriteEvent::Finished(res)) => return Some(res), + Some(_) => (), + None => return None, + } + } + } +} + /// Write modifications into a `BaseRocksEngine` instance. pub fn write_modifies(kv_engine: &impl LocalEngine, modifies: Vec) -> Result<()> { fail_point!("rockskv_write_modifies", |_| Err(box_err!("write failed"))); diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index 376c2d1fb1f..f3d89940f4e 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -9,7 +9,7 @@ use collections::HashMap; use kvproto::kvrpcpb::Context; use super::Result; -use crate::{Callback, Engine, ExtCallback, Modify, RocksEngine, SnapContext, WriteData}; +use crate::{Engine, Modify, OnAppliedCb, RocksEngine, SnapContext, WriteData, WriteEvent}; /// A mock engine is a simple wrapper around RocksEngine /// but with the ability to assert the modifies, @@ -162,31 +162,26 @@ impl Engine for MockEngine { self.base.async_snapshot(ctx) } - fn async_write(&self, ctx: &Context, batch: WriteData, write_cb: Callback<()>) -> Result<()> { - self.async_write_ext(ctx, batch, write_cb, None, None) - } - - fn async_write_ext( + type WriteRes = ::WriteRes; + fn async_write( &self, ctx: &Context, batch: WriteData, - write_cb: Callback<()>, - proposed_cb: Option, - committed_cb: Option, - ) -> Result<()> { + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { if let Some(expected_modifies) = self.expected_modifies.as_ref() { let mut expected_writes = expected_modifies.0.lock().unwrap(); check_expected_write( &mut expected_writes, &batch.modifies, - proposed_cb.is_some(), - committed_cb.is_some(), + WriteEvent::subscribed_proposed(subscribed), + WriteEvent::subscribed_committed(subscribed), ); } let mut last_modifies = self.last_modifies.lock().unwrap(); last_modifies.push(batch.modifies.clone()); - self.base - .async_write_ext(ctx, batch, write_cb, proposed_cb, committed_cb) + self.base.async_write(ctx, batch, subscribed, on_applied) } } diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 8b0dd28646a..565ea0accaa 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -2,10 +2,12 @@ use std::{ fmt::{self, Debug, Display, Formatter}, + pin::Pin, sync::{ atomic::{AtomicBool, Ordering}, Arc, Mutex, }, + task::Poll, time::Duration, }; @@ -18,7 +20,10 @@ use engine_traits::{ CfName, Engines, IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, }; use file_system::IoRateLimiter; -use futures::{channel::oneshot, Future}; +use futures::{ + channel::{mpsc, oneshot}, + stream, Future, Stream, +}; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb}; use raftstore::coprocessor::CoprocessorHost; use tempfile::{Builder, TempDir}; @@ -26,9 +31,10 @@ use tikv_util::worker::{Runnable, Scheduler, Worker}; use txn_types::{Key, Value}; use super::{ - write_modifies, Callback, DummySnapshotExt, Engine, Error, ErrorInner, ExtCallback, + write_modifies, Callback, DummySnapshotExt, Engine, Error, ErrorInner, Iterator as EngineIterator, Modify, Result, SnapContext, Snapshot, WriteData, }; +use crate::{OnAppliedCb, WriteEvent}; // Duplicated in test_engine_builder const TEMP_DIR: &str = ""; @@ -226,34 +232,48 @@ impl Engine for RocksEngine { Ok(()) } - fn async_write(&self, ctx: &Context, batch: WriteData, cb: Callback<()>) -> Result<()> { - self.async_write_ext(ctx, batch, cb, None, None) - } - - fn async_write_ext( + type WriteRes = impl Stream + Send + 'static; + fn async_write( &self, - _: &Context, + _ctx: &Context, batch: WriteData, - cb: Callback<()>, - proposed_cb: Option, - committed_cb: Option, - ) -> Result<()> { - fail_point!("rockskv_async_write", |_| Err(box_err!("write failed"))); - - if batch.modifies.is_empty() { - return Err(Error::from(ErrorInner::EmptyRequest)); - } + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { + let (mut tx, mut rx) = mpsc::channel::(WriteEvent::event_capacity(subscribed)); + let res = (move || { + fail_point!("rockskv_async_write", |_| Err(box_err!("write failed"))); + + if batch.modifies.is_empty() { + return Err(Error::from(ErrorInner::EmptyRequest)); + } - let batch = self.pre_propose(batch)?; + let batch = self.pre_propose(batch)?; - if let Some(cb) = proposed_cb { - cb(); - } - if let Some(cb) = committed_cb { - cb(); - } - box_try!(self.sched.schedule(Task::Write(batch.modifies, cb))); - Ok(()) + if WriteEvent::subscribed_proposed(subscribed) { + let _ = tx.try_send(WriteEvent::Proposed); + } + if WriteEvent::subscribed_committed(subscribed) { + let _ = tx.try_send(WriteEvent::Committed); + } + let cb = Box::new(move |mut res| { + if let Some(cb) = on_applied { + cb(&mut res); + } + let _ = tx.try_send(WriteEvent::Finished(res)); + }); + box_try!(self.sched.schedule(Task::Write(batch.modifies, cb))); + Ok(()) + })(); + let mut res = Some(res); + stream::poll_fn(move |cx| { + if res.as_ref().map_or(false, |r| r.is_err()) { + return Poll::Ready(res.take().map(WriteEvent::Finished)); + } + // If it's none, it means an error is returned, it should not be polled again. + assert!(res.is_some()); + Pin::new(&mut rx).poll_next(cx) + }) } type SnapshotRes = impl Future> + Send; diff --git a/components/tikv_util/src/future.rs b/components/tikv_util/src/future.rs index 5f4c5b43817..7b22bebb482 100644 --- a/components/tikv_util/src/future.rs +++ b/components/tikv_util/src/future.rs @@ -197,6 +197,18 @@ impl ArcWake for PollAtWake { } } +/// Poll the future immediately. If the future is ready, returns the result. +/// Otherwise just ignore the future. +#[inline] +pub fn try_poll(f: impl Future) -> Option { + futures::executor::block_on(async move { + futures::select_biased! { + res = f.fuse() => Some(res), + _ = futures::future::ready(()).fuse() => None, + } + }) +} + #[cfg(test)] mod tests { use std::sync::atomic::AtomicUsize; @@ -232,4 +244,12 @@ mod tests { // 3. future gets ready, ignore NOTIFIED assert_eq!(poll_times.load(Ordering::SeqCst), 2); } + + #[test] + fn test_try_poll() { + let f = futures::future::ready(1); + assert_eq!(try_poll(f), Some(1)); + let f = futures::future::pending::<()>(); + assert_eq!(try_poll(f), None); + } } diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index 1e9f94c2f2d..00598f5295d 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -10,10 +10,49 @@ use std::{ use crossbeam::{ channel::{SendError, TryRecvError}, - queue::SegQueue, + queue::{ArrayQueue, SegQueue}, }; use futures::{task::AtomicWaker, Stream, StreamExt}; +enum QueueType { + Unbounded(SegQueue), + Bounded(ArrayQueue), +} + +impl QueueType { + fn len(&self) -> usize { + match self { + QueueType::Unbounded(q) => q.len(), + QueueType::Bounded(q) => q.len(), + } + } + + fn bounded(cap: usize) -> QueueType { + QueueType::Bounded(ArrayQueue::new(cap)) + } + + fn unbounded() -> QueueType { + QueueType::Unbounded(SegQueue::new()) + } + + fn push_back(&self, t: T) -> Result<(), SendError> { + match self { + QueueType::Unbounded(q) => { + q.push(t); + Ok(()) + } + QueueType::Bounded(q) => q.push(t).map_err(SendError), + } + } + + fn pop_front(&self) -> Option { + match self { + QueueType::Unbounded(q) => q.pop(), + QueueType::Bounded(q) => q.pop(), + } + } +} + #[derive(Clone, Copy)] pub enum WakePolicy { Immediately, @@ -21,7 +60,7 @@ pub enum WakePolicy { } struct Queue { - queue: SegQueue, + queue: QueueType, waker: AtomicWaker, liveness: AtomicUsize, policy: WakePolicy, @@ -62,9 +101,9 @@ impl Sender { pub fn send_with(&self, t: T, policy: WakePolicy) -> Result<(), SendError> { let queue = unsafe { &*self.queue }; if queue.liveness.load(Ordering::Acquire) & RECEIVER_COUNT_BASE != 0 { - queue.queue.push(t); + let res = queue.queue.push_back(t); queue.wake(policy); - return Ok(()); + return res; } Err(SendError(t)) } @@ -110,12 +149,12 @@ impl Stream for Receiver { #[inline] fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let queue = unsafe { &*self.queue }; - if let Some(t) = queue.queue.pop() { + if let Some(t) = queue.queue.pop_front() { return Poll::Ready(Some(t)); } queue.waker.register(cx.waker()); // In case the message is pushed right before registering waker. - if let Some(t) = queue.queue.pop() { + if let Some(t) = queue.queue.pop_front() { return Poll::Ready(Some(t)); } if queue.liveness.load(Ordering::Acquire) & !RECEIVER_COUNT_BASE != 0 { @@ -129,7 +168,7 @@ impl Receiver { #[inline] pub fn try_recv(&mut self) -> Result { let queue = unsafe { &*self.queue }; - if let Some(t) = queue.queue.pop() { + if let Some(t) = queue.queue.pop_front() { return Ok(t); } if queue.liveness.load(Ordering::Acquire) & !RECEIVER_COUNT_BASE != 0 { @@ -156,9 +195,19 @@ impl Drop for Receiver { unsafe impl Send for Receiver {} +#[inline] pub fn unbounded(policy: WakePolicy) -> (Sender, Receiver) { + with_queue(QueueType::unbounded(), policy) +} + +#[inline] +pub fn bounded(cap: usize, policy: WakePolicy) -> (Sender, Receiver) { + with_queue(QueueType::bounded(cap), policy) +} + +fn with_queue(queue: QueueType, policy: WakePolicy) -> (Sender, Receiver) { let queue = Box::into_raw(Box::new(Queue { - queue: SegQueue::new(), + queue, waker: AtomicWaker::new(), liveness: AtomicUsize::new(SENDER_COUNT_BASE | RECEIVER_COUNT_BASE), policy, @@ -430,4 +479,13 @@ mod tests { drop(tx1); assert!(dropped.load(Ordering::SeqCst)); } + + #[test] + fn test_bounded() { + let (tx, mut rx) = super::bounded(1, WakePolicy::Immediately); + tx.send(1).unwrap(); + tx.send(2).unwrap_err(); + assert_eq!(rx.try_recv().unwrap(), 1); + rx.try_recv().unwrap_err(); + } } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 9c3c289ecf7..0a162a58230 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1293,16 +1293,13 @@ pub mod test_gc_worker { metapb::{Peer, Region}, }; use raftstore::store::RegionSnapshot; - use tikv_kv::write_modifies; + use tikv_kv::{write_modifies, OnAppliedCb}; use txn_types::{Key, TimeStamp}; use crate::{ server::gc_worker::{GcSafePointProvider, Result as GcWorkerResult}, storage::{ - kv::{ - self, Callback as EngineCallback, Modify, Result as EngineResult, SnapContext, - WriteData, - }, + kv::{self, Modify, Result as EngineResult, SnapContext, WriteData}, Engine, }, }; @@ -1355,12 +1352,14 @@ pub mod test_gc_worker { write_modifies(&self.kv_engine().unwrap(), modifies) } + type WriteRes = ::WriteRes; fn async_write( &self, ctx: &Context, mut batch: WriteData, - callback: EngineCallback<()>, - ) -> EngineResult<()> { + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { batch.modifies.iter_mut().for_each(|modify| match modify { Modify::Delete(_, ref mut key) => { *key = Key::from_encoded(keys::data_key(key.as_encoded())); @@ -1376,7 +1375,7 @@ pub mod test_gc_worker { *end_key = Key::from_encoded(keys::data_end_key(end_key.as_encoded())); } }); - self.0.async_write(ctx, batch, callback) + self.0.async_write(ctx, batch, subscribed, on_applied) } type SnapshotRes = impl Future> + Send; @@ -1427,13 +1426,16 @@ pub mod test_gc_worker { Ok(()) } + type WriteRes = ::WriteRes; fn async_write( &self, ctx: &Context, batch: WriteData, - callback: EngineCallback<()>, - ) -> EngineResult<()> { - self.engines.lock().unwrap()[&ctx.region_id].async_write(ctx, batch, callback) + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { + self.engines.lock().unwrap()[&ctx.region_id] + .async_write(ctx, batch, subscribed, on_applied) } type SnapshotRes = impl Future> + Send; diff --git a/src/server/raftkv.rs b/src/server/raftkv.rs index 6dc84f951ee..b6890262007 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv.rs @@ -3,19 +3,25 @@ // #[PerformanceCriticalPath] use std::{ borrow::Cow, + cell::UnsafeCell, fmt::{self, Debug, Display, Formatter}, io::Error as IoError, mem, num::NonZeroU64, + pin::Pin, result, - sync::{Arc, RwLock}, + sync::{ + atomic::{AtomicU8, Ordering}, + Arc, RwLock, + }, + task::Poll, time::Duration, }; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; -use futures::{future::BoxFuture, Future}; +use futures::{future::BoxFuture, task::AtomicWaker, Future, Stream, StreamExt}; use kvproto::{ errorpb, kvrpcpb::{Context, IsolationLevel}, @@ -35,12 +41,12 @@ use raftstore::{ errors::Error as RaftServerError, router::{LocalReadRouter, RaftStoreRouter}, store::{ - Callback as StoreCallback, RaftCmdExtraOpts, ReadIndexContext, ReadResponse, + self, Callback as StoreCallback, RaftCmdExtraOpts, ReadIndexContext, ReadResponse, RegionSnapshot, WriteResponse, }, }; use thiserror::Error; -use tikv_kv::write_modifies; +use tikv_kv::{write_modifies, OnAppliedCb, WriteEvent}; use tikv_util::{ codec::number::NumberEncoder, future::{paired_future_callback, paired_must_called_future_callback}, @@ -51,10 +57,7 @@ use txn_types::{Key, TimeStamp, TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::metrics::*; use crate::storage::{ self, kv, - kv::{ - Callback, Engine, Error as KvError, ErrorInner as KvErrorInner, ExtCallback, Modify, - SnapContext, WriteData, - }, + kv::{Engine, Error as KvError, ErrorInner as KvErrorInner, Modify, SnapContext, WriteData}, }; #[derive(Debug, Error)] @@ -78,19 +81,6 @@ pub enum Error { Timeout(Duration), } -fn get_status_kind_from_error(e: &Error) -> RequestStatusKind { - match *e { - Error::RequestFailed(ref header) => { - RequestStatusKind::from(storage::get_error_kind_from_header(header)) - } - Error::Io(_) => RequestStatusKind::err_io, - Error::Server(_) => RequestStatusKind::err_server, - Error::InvalidResponse(_) => RequestStatusKind::err_invalid_resp, - Error::InvalidRequest(_) => RequestStatusKind::err_invalid_req, - Error::Timeout(_) => RequestStatusKind::err_timeout, - } -} - fn get_status_kind_from_engine_error(e: &kv::Error) -> RequestStatusKind { match *e { KvError(box KvErrorInner::Request(ref header)) => { @@ -208,6 +198,95 @@ pub fn drop_snapshot_callback() -> kv::Result { Err(kv::Error::from(kv::ErrorInner::Request(err))) } +struct WriteResCore { + ev: AtomicU8, + result: UnsafeCell>>, + wake: AtomicWaker, +} + +struct WriteResSub { + notified_ev: u8, + core: Arc, +} + +unsafe impl Send for WriteResSub {} + +impl Stream for WriteResSub { + type Item = WriteEvent; + + #[inline] + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let mut s = self.as_mut(); + let mut cur_ev = s.core.ev.load(Ordering::Acquire); + if cur_ev == s.notified_ev { + s.core.wake.register(cx.waker()); + cur_ev = s.core.ev.load(Ordering::Acquire); + if cur_ev == s.notified_ev { + return Poll::Pending; + } + } + s.notified_ev = cur_ev; + match cur_ev { + WriteEvent::EVENT_PROPOSED => Poll::Ready(Some(WriteEvent::Proposed)), + WriteEvent::EVENT_COMMITTED => Poll::Ready(Some(WriteEvent::Committed)), + u8::MAX => { + let result = unsafe { (*s.core.result.get()).take().unwrap() }; + Poll::Ready(Some(WriteEvent::Finished(result))) + } + e => panic!("unexpected event {}", e), + } + } +} + +#[derive(Clone)] +struct WriteResFeed { + core: Arc, +} + +unsafe impl Send for WriteResFeed {} + +impl WriteResFeed { + fn pair() -> (Self, WriteResSub) { + let core = Arc::new(WriteResCore { + ev: AtomicU8::new(0), + result: UnsafeCell::new(None), + wake: AtomicWaker::new(), + }); + ( + Self { core: core.clone() }, + WriteResSub { + notified_ev: 0, + core, + }, + ) + } + + fn notify_proposed(&self) { + self.core + .ev + .store(WriteEvent::EVENT_PROPOSED, Ordering::Release); + self.core.wake.wake(); + } + + fn notify_committed(&self) { + self.core + .ev + .store(WriteEvent::EVENT_COMMITTED, Ordering::Release); + self.core.wake.wake(); + } + + fn notify(&self, result: kv::Result<()>) { + unsafe { + (*self.core.result.get()) = Some(result); + } + self.core.ev.store(u8::MAX, Ordering::Release); + self.core.wake.wake(); + } +} + /// `RaftKv` is a storage engine base on `RaftStore`. #[derive(Clone)] pub struct RaftKv @@ -239,66 +318,6 @@ where pub fn set_txn_extra_scheduler(&mut self, txn_extra_scheduler: Arc) { self.txn_extra_scheduler = Some(txn_extra_scheduler); } - - fn exec_write_requests( - &self, - ctx: &Context, - batch: WriteData, - write_cb: Callback>, - proposed_cb: Option, - committed_cb: Option, - ) -> Result<()> { - #[cfg(feature = "failpoints")] - { - // If rid is some, only the specified region reports error. - // If rid is None, all regions report error. - let raftkv_early_error_report_fp = || -> Result<()> { - fail_point!("raftkv_early_error_report", |rid| { - let region_id = ctx.get_region_id(); - rid.and_then(|rid| { - let rid: u64 = rid.parse().unwrap(); - if rid == region_id { None } else { Some(()) } - }) - .ok_or_else(|| RaftServerError::RegionNotFound(region_id).into()) - }); - Ok(()) - }; - raftkv_early_error_report_fp()?; - } - - let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); - let txn_extra = batch.extra; - let mut header = new_request_header(ctx); - let mut flags = 0; - if txn_extra.one_pc { - flags |= WriteBatchFlags::ONE_PC.bits(); - } - if txn_extra.for_flashback { - flags |= WriteBatchFlags::FLASHBACK.bits(); - } - header.set_flags(flags); - - let mut cmd = RaftCmdRequest::default(); - cmd.set_header(header); - cmd.set_requests(reqs.into()); - - self.schedule_txn_extra(txn_extra); - - let cb = StoreCallback::write_ext( - Box::new(move |resp| { - write_cb(on_write_result(resp).map_err(Error::into)); - }), - proposed_cb, - committed_cb, - ); - let extra_opts = RaftCmdExtraOpts { - deadline: batch.deadline, - disk_full_opt: batch.disk_full_opt, - }; - self.router.send_command(cmd, cb, extra_opts)?; - - Ok(()) - } } fn invalid_resp_type(exp: CmdType, act: CmdType) -> Error { @@ -383,59 +402,116 @@ where } } + type WriteRes = impl Stream + Send + Unpin; fn async_write( &self, ctx: &Context, batch: WriteData, - write_cb: Callback<()>, - ) -> kv::Result<()> { - self.async_write_ext(ctx, batch, write_cb, None, None) - } - - fn async_write_ext( - &self, - ctx: &Context, - batch: WriteData, - write_cb: Callback<()>, - proposed_cb: Option, - committed_cb: Option, - ) -> kv::Result<()> { - fail_point!("raftkv_async_write"); - if batch.modifies.is_empty() { - return Err(KvError::from(KvErrorInner::EmptyRequest)); - } + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { + let mut res = (|| { + fail_point!("raftkv_async_write"); + if batch.modifies.is_empty() { + return Err(KvError::from(KvErrorInner::EmptyRequest)); + } + Ok(()) + })(); ASYNC_REQUESTS_COUNTER_VEC.write.all.inc(); let begin_instant = Instant::now_coarse(); - self.exec_write_requests( - ctx, - batch, - Box::new(move |res| match res { + if res.is_ok() { + // If rid is some, only the specified region reports error. + // If rid is None, all regions report error. + res = (|| { + fail_point!("raftkv_early_error_report", |rid| { + let region_id = ctx.get_region_id(); + rid.and_then(|rid| { + let rid: u64 = rid.parse().unwrap(); + if rid == region_id { None } else { Some(()) } + }) + .ok_or_else(|| RaftServerError::RegionNotFound(region_id).into()) + }); + Ok(()) + })(); + } + + let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); + let txn_extra = batch.extra; + let mut header = new_request_header(ctx); + let mut flags = 0; + if txn_extra.one_pc { + flags |= WriteBatchFlags::ONE_PC.bits(); + } + if txn_extra.for_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } + header.set_flags(flags); + + let mut cmd = RaftCmdRequest::default(); + cmd.set_header(header); + cmd.set_requests(reqs.into()); + + self.schedule_txn_extra(txn_extra); + + let (tx, rx) = WriteResFeed::pair(); + let proposed_cb = if !WriteEvent::subscribed_proposed(subscribed) { + None + } else { + let tx = tx.clone(); + Some(Box::new(move || tx.notify_proposed()) as store::ExtCallback) + }; + let committed_cb = if !WriteEvent::subscribed_committed(subscribed) { + None + } else { + let tx = tx.clone(); + Some(Box::new(move || tx.notify_committed()) as store::ExtCallback) + }; + let applied_tx = tx.clone(); + let applied_cb = Box::new(move |resp: WriteResponse| { + let mut res = match on_write_result::(resp) { Ok(CmdRes::Resp(_)) => { + fail_point!("raftkv_async_write_finish"); + Ok(()) + } + Ok(CmdRes::Snap(_)) => Err(box_err!("unexpect snapshot, should mutate instead.")), + Err(e) => Err(kv::Error::from(e)), + }; + if let Some(cb) = on_applied { + cb(&mut res); + } + applied_tx.notify(res); + }); + + let cb = StoreCallback::write_ext(applied_cb, proposed_cb, committed_cb); + let extra_opts = RaftCmdExtraOpts { + deadline: batch.deadline, + disk_full_opt: batch.disk_full_opt, + }; + if res.is_ok() { + res = self + .router + .send_command(cmd, cb, extra_opts) + .map_err(kv::Error::from); + } + if res.is_err() { + tx.notify(res); + } + rx.inspect(move |ev| { + let WriteEvent::Finished(res) = ev else { return }; + match res { + Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); ASYNC_REQUESTS_DURATIONS_VEC .write .observe(begin_instant.saturating_elapsed_secs()); - fail_point!("raftkv_async_write_finish"); - write_cb(Ok(())) - } - Ok(CmdRes::Snap(_)) => { - write_cb(Err(box_err!("unexpect snapshot, should mutate instead."))) } Err(e) => { - let status_kind = get_status_kind_from_engine_error(&e); + let status_kind = get_status_kind_from_engine_error(e); ASYNC_REQUESTS_COUNTER_VEC.write.get(status_kind).inc(); - write_cb(Err(e)) } - }), - proposed_cb, - committed_cb, - ) - .map_err(|e| { - let status_kind = get_status_kind_from_error(&e); - ASYNC_REQUESTS_COUNTER_VEC.write.get(status_kind).inc(); - e.into() + } }) } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index b87ab8c4a6d..32cd7c11000 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -64,6 +64,7 @@ use std::{ borrow::Cow, iter, marker::PhantomData, + mem, sync::{ atomic::{self, AtomicBool, AtomicU64}, Arc, @@ -87,9 +88,10 @@ use pd_client::FeatureGate; use raftstore::store::{util::build_key_range, ReadStats, TxnExt, WriteStats}; use rand::prelude::*; use resource_metering::{FutureExt, ResourceTagFactory}; -use tikv_kv::SnapshotExt; +use tikv_kv::{OnAppliedCb, SnapshotExt}; use tikv_util::{ deadline::Deadline, + future::try_poll, quota_limiter::QuotaLimiter, time::{duration_to_ms, Instant, ThreadReadId}, }; @@ -1548,11 +1550,18 @@ impl Storage { let mut batch = WriteData::from_modifies(modifies); batch.set_allowed_on_disk_almost_full(); - self.engine.async_write( + let res = kv::write( + &self.engine, &ctx, batch, - Box::new(|res| callback(res.map_err(Error::from))), - )?; + Some(Box::new(|res| { + callback(mem::replace(res, Ok(())).map_err(Error::from)) + })), + ); + // TODO: perhaps change delete_range API to return future. + if let Some(Some(Err(e))) = try_poll(res) { + return Err(Error::from(e)); + } KV_COMMAND_COUNTER_VEC_STATIC.delete_range.inc(); Ok(()) } @@ -1951,14 +1960,12 @@ impl Storage { let mut batch = WriteData::from_modifies(vec![m]); batch.set_allowed_on_disk_almost_full(); - let (cb, f) = tikv_util::future::paired_future_callback(); - let async_ret = - engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); - let v: Result<()> = match async_ret { - Err(e) => Err(Error::from(e)), - Ok(_) => f.await.unwrap(), - }; - callback(v); + let res = kv::write(&engine, &ctx, batch, None); + callback( + res.await + .unwrap_or_else(|| Err(box_err!("stale command"))) + .map_err(Error::from), + ); KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); SCHED_HISTOGRAM_VEC_STATIC @@ -2054,14 +2061,12 @@ impl Storage { let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls, ts.unwrap()); let mut batch = WriteData::from_modifies(modifies); batch.set_allowed_on_disk_almost_full(); - let (cb, f) = tikv_util::future::paired_future_callback(); - let async_ret = - engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); - let v: Result<()> = match async_ret { - Err(e) => Err(Error::from(e)), - Ok(_) => f.await.unwrap(), - }; - callback(v); + let res = kv::write(&engine, &ctx, batch, None); + callback( + res.await + .unwrap_or_else(|| Err(box_err!("stale command"))) + .map_err(Error::from), + ); KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); SCHED_HISTOGRAM_VEC_STATIC @@ -2118,14 +2123,12 @@ impl Storage { let m = Self::raw_delete_request_to_modify(cf, key, ts.unwrap()); let mut batch = WriteData::from_modifies(vec![m]); batch.set_allowed_on_disk_almost_full(); - let (cb, f) = tikv_util::future::paired_future_callback(); - let async_ret = - engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); - let v: Result<()> = match async_ret { - Err(e) => Err(Error::from(e)), - Ok(_) => f.await.unwrap(), - }; - callback(v); + let res = kv::write(&engine, &ctx, batch, None); + callback( + res.await + .unwrap_or_else(|| Err(box_err!("stale command"))) + .map_err(Error::from), + ); KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); SCHED_HISTOGRAM_VEC_STATIC @@ -2171,14 +2174,12 @@ impl Storage { batch.set_allowed_on_disk_almost_full(); // TODO: special notification channel for API V2. - let (cb, f) = tikv_util::future::paired_future_callback(); - let async_ret = - engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); - let v: Result<()> = match async_ret { - Err(e) => Err(Error::from(e)), - Ok(_) => f.await.unwrap(), - }; - callback(v); + let res = kv::write(&engine, &ctx, batch, None); + callback( + res.await + .unwrap_or_else(|| Err(box_err!("stale command"))) + .map_err(Error::from), + ); KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); SCHED_HISTOGRAM_VEC_STATIC @@ -2231,14 +2232,12 @@ impl Storage { .collect(); let mut batch = WriteData::from_modifies(modifies); batch.set_allowed_on_disk_almost_full(); - let (cb, f) = tikv_util::future::paired_future_callback(); - let async_ret = - engine.async_write(&ctx, batch, Box::new(|res| cb(res.map_err(Error::from)))); - let v: Result<()> = match async_ret { - Err(e) => Err(Error::from(e)), - Ok(_) => f.await.unwrap(), - }; - callback(v); + let res = kv::write(&engine, &ctx, batch, None); + callback( + res.await + .unwrap_or_else(|| Err(box_err!("stale command"))) + .map_err(Error::from), + ); KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_STAGE_COUNTER_VEC.get(CMD).write_finish.inc(); SCHED_HISTOGRAM_VEC_STATIC @@ -2993,13 +2992,15 @@ impl Engine for TxnTestEngine { } } + type WriteRes = E::WriteRes; fn async_write( &self, ctx: &Context, batch: WriteData, - write_cb: tikv_kv::Callback<()>, - ) -> tikv_kv::Result<()> { - self.engine.async_write(ctx, batch, write_cb) + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { + self.engine.async_write(ctx, batch, subscribed, on_applied) } } diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 6d86203e8f2..8c4ad5da08b 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -232,11 +232,7 @@ impl Iterator for RawMvccIterator { #[cfg(test)] mod tests { - use std::{ - fmt::Debug, - iter::Iterator as StdIterator, - sync::mpsc::{channel, Sender}, - }; + use std::iter::Iterator as StdIterator; use api_version::{ApiV2, KvFormat, RawValue}; use engine_traits::{raw_ttl::ttl_to_expire_ts, CF_DEFAULT}; @@ -244,21 +240,13 @@ mod tests { use tikv_kv::{Engine, Iterator as EngineIterator, Modify, WriteData}; use super::*; - use crate::storage::{raw::encoded::RawEncodeSnapshot, TestEngineBuilder}; - - fn expect_ok_callback(done: Sender, id: i32) -> tikv_kv::Callback { - Box::new(move |x: tikv_kv::Result| { - x.unwrap(); - done.send(id).unwrap(); - }) - } + use crate::storage::{kv, raw::encoded::RawEncodeSnapshot, TestEngineBuilder}; #[test] fn test_raw_mvcc_snapshot() { // Use `Engine` to be independent to `Storage`. // Do not set "api version" to use `Engine` as a raw RocksDB. let mut engine = TestEngineBuilder::new().build().unwrap(); - let (tx, rx) = channel(); let ctx = Context::default(); // TODO: Consider another way other than hard coding, to generate keys' prefix @@ -291,10 +279,8 @@ mod tests { ApiV2::encode_raw_value_owned(raw_value), ); let batch = WriteData::from_modifies(vec![m]); - engine - .async_write(&ctx, batch, expect_ok_callback(tx.clone(), 0)) - .unwrap(); - rx.recv().unwrap(); + let res = futures::executor::block_on(kv::write(&engine, &ctx, batch, None)).unwrap(); + res.unwrap(); } // snapshot diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 6fd9d150478..4657decf66f 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -39,7 +39,7 @@ use collections::HashMap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; use crossbeam::utils::CachePadded; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; -use futures::compat::Future01CompatExt; +use futures::{compat::Future01CompatExt, StreamExt}; use kvproto::{ kvrpcpb::{self, CommandPri, Context, DiskFullOpt, ExtraOp}, pdpb::QueryKind, @@ -49,7 +49,7 @@ use pd_client::{Feature, FeatureGate}; use raftstore::store::TxnExt; use resource_metering::{FutureExt, ResourceTagFactory}; use smallvec::{smallvec, SmallVec}; -use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData}; +use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData, WriteEvent}; use tikv_util::{ deadline::Deadline, quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE, }; @@ -63,8 +63,8 @@ use crate::{ errors::SharedError, get_causal_ts, get_priority_tag, get_raw_key_guard, kv::{ - self, with_tls_engine, Engine, ExtCallback, FlowStatsReporter, Result as EngineResult, - SnapContext, Statistics, + self, with_tls_engine, Engine, FlowStatsReporter, Result as EngineResult, SnapContext, + Statistics, }, lock_manager::{ self, @@ -346,18 +346,11 @@ impl SchedulerInner { .and_then(|tctx| if tctx.try_own() { tctx.cb.take() } else { None }) } - fn take_task_cb_and_pr( - &self, - cid: u64, - ) -> (Option, Option) { + fn take_task_cb(&self, cid: u64) -> Option { self.get_task_slot(cid) .get_mut(&cid) - .map(|tctx| (tctx.cb.take(), tctx.pr.take())) - .unwrap_or((None, None)) - } - - fn store_pr(&self, cid: u64, pr: ProcessResult) { - self.get_task_slot(cid).get_mut(&cid).unwrap().pr = Some(pr); + .map(|tctx| tctx.cb.take()) + .unwrap_or(None) } fn store_lock_changes( @@ -1133,7 +1126,6 @@ impl Scheduler { let write_bytes = task.cmd.write_bytes(); let tag = task.cmd.tag(); let cid = task.cid; - let priority = task.cmd.priority(); let tracker = task.tracker; let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); @@ -1313,65 +1305,16 @@ impl Scheduler { to_be_write.deadline = Some(deadline); let sched = scheduler.clone(); - let sched_pool = scheduler.get_sched_pool(priority).pool.clone(); - - let (proposed_cb, committed_cb): (Option, Option) = - match response_policy { - ResponsePolicy::OnApplied => (None, None), - ResponsePolicy::OnCommitted => { - self.inner.store_pr(cid, pr.take().unwrap()); - let sched = scheduler.clone(); - // Currently, the only case that response is returned after finishing - // commit is async applying prewrites for async commit transactions. - // The committed callback is not guaranteed to be invoked. So store - // the `pr` to the tctx instead of capturing it to the closure. - let committed_cb = Box::new(move || { - fail_point!("before_async_apply_prewrite_finish", |_| {}); - let (cb, pr) = sched.inner.take_task_cb_and_pr(cid); - Self::early_response( - cid, - cb.unwrap(), - pr.unwrap(), - tag, - CommandStageKind::async_apply_prewrite, - ); - }); - is_async_apply_prewrite = true; - (None, Some(committed_cb)) - } - ResponsePolicy::OnProposed => { - if pipelined { - // The normal write process is respond to clients and release - // latches after async write finished. If pipelined pessimistic - // locking is enabled, the process becomes parallel and there are - // two msgs for one command: - // 1. Msg::PipelinedWrite: respond to clients - // 2. Msg::WriteFinished: deque context and release latches - // The proposed callback is not guaranteed to be invoked. So store - // the `pr` to the tctx instead of capturing it to the closure. - self.inner.store_pr(cid, pr.take().unwrap()); - let sched = scheduler.clone(); - // Currently, the only case that response is returned after finishing - // proposed phase is pipelined pessimistic lock. - // TODO: Unify the code structure of pipelined pessimistic lock and - // async apply prewrite. - let proposed_cb = Box::new(move || { - fail_point!("before_pipelined_write_finish", |_| {}); - let (cb, pr) = sched.inner.take_task_cb_and_pr(cid); - Self::early_response( - cid, - cb.unwrap(), - pr.unwrap(), - tag, - CommandStageKind::pipelined_write, - ); - }); - (Some(proposed_cb), None) - } else { - (None, None) - } - } - }; + + let mut subscribed = WriteEvent::BASIC_EVENT; + match response_policy { + ResponsePolicy::OnCommitted => { + subscribed |= WriteEvent::EVENT_COMMITTED; + is_async_apply_prewrite = true; + } + ResponsePolicy::OnProposed if pipelined => subscribed |= WriteEvent::EVENT_PROPOSED, + _ => (), + } if self.inner.flow_controller.enabled() { if self.inner.flow_controller.is_unlimited(region_id) { @@ -1447,15 +1390,11 @@ impl Scheduler { // transfer leader command must be later than this write command because this // write command has been sent to the raftstore. Then, we don't need to worry // this request will fail due to the voluntary leader transfer. - let _downgraded_guard = pessimistic_locks_guard.and_then(|guard| { + let downgraded_guard = pessimistic_locks_guard.and_then(|guard| { (!removed_pessimistic_locks.is_empty()).then(|| RwLockWriteGuard::downgrade(guard)) }); - - // The callback to receive async results of write prepare from the storage - // engine. - let engine_cb = Box::new(move |result: EngineResult<()>| { - let ok = result.is_ok(); - if ok && !removed_pessimistic_locks.is_empty() { + let on_applied = Box::new(move |res: &mut kv::Result<()>| { + if res.is_ok() && !removed_pessimistic_locks.is_empty() { // Removing pessimistic locks when it succeeds to apply. This should be done in // the apply thread, to make sure it happens before other admin commands are // executed. @@ -1472,15 +1411,69 @@ impl Scheduler { } } } + }); - sched_pool - .spawn(async move { + let mut res = unsafe { + with_tls_engine(|e: &mut E| { + e.async_write(&ctx, to_be_write, subscribed, Some(on_applied)) + }) + }; + drop(downgraded_guard); + + while let Some(ev) = res.next().await { + match ev { + WriteEvent::Committed => { + let early_return = (|| { + fail_point!("before_async_apply_prewrite_finish", |_| false); + true + })(); + if WriteEvent::subscribed_committed(subscribed) && early_return { + // Currently, the only case that response is returned after finishing + // commit is async applying prewrites for async commit transactions. + let cb = scheduler.inner.take_task_cb(cid); + Self::early_response( + cid, + cb.unwrap(), + pr.take().unwrap(), + tag, + CommandStageKind::async_apply_prewrite, + ); + } + } + WriteEvent::Proposed => { + let early_return = (|| { + fail_point!("before_pipelined_write_finish", |_| false); + true + })(); + if WriteEvent::subscribed_proposed(subscribed) && early_return { + // The normal write process is respond to clients and release + // latches after async write finished. If pipelined pessimistic + // locking is enabled, the process becomes parallel and there are + // two msgs for one command: + // 1. Msg::PipelinedWrite: respond to clients + // 2. Msg::WriteFinished: deque context and release latches + // Currently, the only case that response is returned after finishing + // proposed phase is pipelined pessimistic lock. + // TODO: Unify the code structure of pipelined pessimistic lock and + // async apply prewrite. + let cb = scheduler.inner.take_task_cb(cid); + Self::early_response( + cid, + cb.unwrap(), + pr.take().unwrap(), + tag, + CommandStageKind::pipelined_write, + ); + } + } + WriteEvent::Finished(res) => { fail_point!("scheduler_async_write_finish"); + let ok = res.is_ok(); sched.on_write_finished( cid, pr, - result, + res, lock_guards, pipelined, is_async_apply_prewrite, @@ -1498,23 +1491,14 @@ impl Scheduler { sched.inner.flow_controller.unconsume(region_id, write_size); } } - }) - .unwrap() - }); - - // Safety: `self.sched_pool` ensures a TLS engine exists. - unsafe { - with_tls_engine(|engine: &mut E| { - if let Err(e) = - engine.async_write_ext(&ctx, to_be_write, engine_cb, proposed_cb, committed_cb) - { - SCHED_STAGE_COUNTER_VEC.get(tag).async_write_err.inc(); - - info!("engine async_write failed"; "cid" => cid, "err" => ?e); - scheduler.finish_with_err(cid, e); + return; } - }) + } } + // If it's not finished while the channel is closed, it means the write + // is undeterministic. in this case, we don't know whether the + // request is finished or not, so we should not release latch as + // it may break correctness. } /// Returns whether it succeeds to write pessimistic locks to the in-memory diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 043e3ad2d23..5f29d44a53d 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -146,6 +146,7 @@ test_sst_importer = { workspace = true } test_storage = { workspace = true } test_util = { workspace = true } tidb_query_datatype = { workspace = true } +tikv_kv = { workspace = true } tipb_helper = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index 20740b4cb16..7a79b984aaf 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -60,7 +60,7 @@ where .unwrap(); } let write_data = WriteData::from_modifies(txn.into_modifies()); - let _ = engine.async_write(&ctx, write_data, Box::new(move |_| {})); + let _ = tikv_kv::write(engine, &ctx, write_data, None); let keys: Vec = kvs.iter().map(|(k, _)| Key::from_raw(k)).collect(); let snapshot = engine.snapshot(Default::default()).unwrap(); (snapshot, keys) diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index bc4786ae73e..d567edd5add 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -226,17 +226,18 @@ fn bench_async_write(b: &mut test::Bencher) { ctx.set_region_epoch(region.get_region_epoch().clone()); ctx.set_peer(leader); b.iter(|| { - let on_finished: EngineCallback<()> = Box::new(|_| { - test::black_box(()); - }); - kv.async_write( + let f = tikv_kv::write( + &kv, &ctx, WriteData::from_modifies(vec![Modify::Delete( CF_DEFAULT, Key::from_encoded(b"fooo".to_vec()), )]), - on_finished, - ) - .unwrap(); + None, + ); + let res = f.map(|res| { + let _ = test::black_box(res); + }); + let _ = test::black_box(res); }); } From fbaaab32100292a54909b69649d15ee0e75fe58e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 30 Nov 2022 17:50:00 +0800 Subject: [PATCH 0373/1149] log-backup: implement subscribe flushing (#13810) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#13824 Implements the new gRPC interface "SubscribeFlush". Signed-off-by: hillium Signed-off-by: Yu Juncen Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 164 +++++++++++++++++- components/backup-stream/src/endpoint.rs | 22 ++- components/backup-stream/src/errors.rs | 20 +++ components/backup-stream/src/service.rs | 12 ++ components/backup-stream/tests/mod.rs | 118 ++++++++++++- components/error_code/src/backup_stream.rs | 7 +- 6 files changed, 328 insertions(+), 15 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 3a13acd2f4c..e9f930e8563 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -2,16 +2,25 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; +use futures::{ + channel::mpsc::{self as async_mpsc, Receiver, Sender}, + SinkExt, StreamExt, +}; +use grpcio::{RpcStatus, RpcStatusCode, ServerStreamingSink, WriteFlags}; use kvproto::{ errorpb::{Error as PbError, *}, + logbackuppb::{FlushEvent, SubscribeFlushEventResponse}, metapb::Region, }; use pd_client::PdClient; -use tikv_util::{info, worker::Scheduler}; +use tikv_util::{box_err, defer, info, warn, worker::Scheduler}; use txn_types::TimeStamp; +use uuid::Uuid; use crate::{ - errors::{Error, Result}, + annotate, + errors::{Error, ReportableResult, Result}, + future, metadata::{store::MetaStore, Checkpoint, CheckpointProvider, MetadataClient}, metrics, try_send, RegionCheckpointOperation, Task, }; @@ -20,11 +29,85 @@ use crate::{ /// This information is provided for the `advancer` in checkpoint V3, /// which involved a central node (typically TiDB) for collecting all regions' /// checkpoint then advancing the global checkpoint. -#[derive(Debug, Default)] +#[derive(Default)] pub struct CheckpointManager { items: HashMap, + manager_handle: Option>, +} + +impl std::fmt::Debug for CheckpointManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CheckpointManager") + .field("items", &self.items) + .finish() + } +} + +enum SubscriptionOp { + Add(Subscription), + Emit(Box<[FlushEvent]>), +} + +struct SubscriptionManager { + subscribers: HashMap, + input: Receiver, } +impl SubscriptionManager { + pub async fn main_loop(mut self) { + info!("subscription manager started!"); + defer! { info!("subscription manager exit.") } + while let Some(msg) = self.input.next().await { + match msg { + SubscriptionOp::Add(sub) => { + self.subscribers.insert(Uuid::new_v4(), sub); + } + SubscriptionOp::Emit(events) => { + let mut canceled = vec![]; + for (id, sub) in &mut self.subscribers { + let send_all = async { + for es in events.chunks(1024) { + let mut resp = SubscribeFlushEventResponse::new(); + resp.set_events(es.to_vec().into()); + sub.feed((resp, WriteFlags::default())).await?; + } + sub.flush().await + }; + + match send_all.await { + Err(grpcio::Error::RemoteStopped) => { + canceled.push(*id); + } + Err(err) => { + Error::from(err).report("sending subscription"); + } + _ => {} + } + } + + for c in canceled { + match self.subscribers.remove(&c) { + Some(mut sub) => { + info!("client is gone, removing subscription"; "id" => %c); + sub.close().await.report_if_err(format_args!( + "during removing subscription {}", + c + )) + } + None => { + warn!("BUG: the subscriber has been removed before we are going to remove it."; "id" => %c); + } + } + } + } + } + } + } +} + +// Note: can we make it more generic...? +pub type Subscription = ServerStreamingSink; + /// The result of getting a checkpoint. /// The possibility of failed to getting checkpoint is pretty high: /// because there is a gap between region leader change and flushing. @@ -76,8 +159,81 @@ impl CheckpointManager { self.items.clear(); } + pub fn spawn_subscription_mgr(&mut self) -> future![()] { + let (tx, rx) = async_mpsc::channel(1024); + let sub = SubscriptionManager { + subscribers: Default::default(), + input: rx, + }; + self.manager_handle = Some(tx); + sub.main_loop() + } + + pub fn update_region_checkpoints(&mut self, region_and_checkpoint: Vec<(Region, TimeStamp)>) { + for (region, checkpoint) in ®ion_and_checkpoint { + self.do_update(region, *checkpoint); + } + + self.notify(region_and_checkpoint.into_iter()); + } + /// update a region checkpoint in need. + #[cfg(test)] pub fn update_region_checkpoint(&mut self, region: &Region, checkpoint: TimeStamp) { + self.do_update(region, checkpoint); + self.notify(std::iter::once((region.clone(), checkpoint))); + } + + pub fn add_subscriber(&mut self, sub: Subscription) -> future![Result<()>] { + let mgr = self.manager_handle.as_ref().cloned(); + + // NOTE: we cannot send the real error into the client directly because once + // we send the subscription into the sink, we cannot fetch it again :( + async move { + let mgr = mgr.ok_or(Error::Other(box_err!("subscription manager not get ready"))); + let mut mgr = match mgr { + Ok(mgr) => mgr, + Err(err) => { + sub.fail(RpcStatus::with_message( + RpcStatusCode::UNAVAILABLE, + "subscription manager not get ready.".to_owned(), + )) + .await + .map_err(|err| { + annotate!(err, "failed to send request to subscriber manager") + })?; + return Err(err); + } + }; + mgr.send(SubscriptionOp::Add(sub)) + .await + .map_err(|err| annotate!(err, "failed to send request to subscriber manager"))?; + Ok(()) + } + } + + fn notify(&mut self, items: impl Iterator) { + if let Some(mgr) = self.manager_handle.as_mut() { + let r = items + .map(|(r, ts)| { + let mut f = FlushEvent::new(); + f.set_checkpoint(ts.into_inner()); + f.set_start_key(r.start_key); + f.set_end_key(r.end_key); + f + }) + .collect::>(); + let event_size = r.len(); + let res = mgr.try_send(SubscriptionOp::Emit(r)); + // Note: perhaps don't batch in the channel but batch in the receiver side? + // If so, we can control the memory usage better. + if let Err(err) = res { + warn!("the channel is full, dropping some events."; "length" => %event_size, "err" => %err); + } + } + } + + fn do_update(&mut self, region: &Region, checkpoint: TimeStamp) { let e = self.items.entry(region.get_id()); e.and_modify(|old_cp| { if old_cp.checkpoint < checkpoint @@ -199,7 +355,7 @@ impl FlushObserver for BasicFlushObserver { .pd_cli .update_service_safe_point( format!("backup-stream-{}-{}", task, self.store_id), - TimeStamp::new(rts - 1), + TimeStamp::new(rts.saturating_sub(1)), // Add a service safe point for 30 mins (6x the default flush interval). // It would probably be safe. Duration::from_secs(1800), diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 22a415ca6bb..2ebeee2ea66 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -42,7 +42,7 @@ use crate::{ annotate, checkpoint_manager::{ BasicFlushObserver, CheckpointManager, CheckpointV3FlushObserver, FlushObserver, - GetCheckpointResult, RegionIdWithVersion, + GetCheckpointResult, RegionIdWithVersion, Subscription, }, errors::{Error, Result}, event_loader::{InitialDataLoader, PendingMemoryQuota}, @@ -165,6 +165,8 @@ where ((config.num_threads + 1) / 2).max(1), ); pool.spawn(op_loop); + let mut checkpoint_mgr = CheckpointManager::default(); + pool.spawn(checkpoint_mgr.spawn_subscription_mgr()); Endpoint { meta_client, range_router, @@ -183,7 +185,7 @@ where region_operator, failover_time: None, config, - checkpoint_mgr: Default::default(), + checkpoint_mgr, } } } @@ -887,11 +889,7 @@ where // Let's clear all stale checkpoints first. // Or they may slow down the global checkpoint. self.checkpoint_mgr.clear(); - for (region, checkpoint) in u { - debug!("setting region checkpoint"; "region" => %region.get_id(), "ts" => %checkpoint); - self.checkpoint_mgr - .update_region_checkpoint(®ion, checkpoint) - } + self.checkpoint_mgr.update_region_checkpoints(u); } RegionCheckpointOperation::Get(g, cb) => { let _guard = self.pool.handle().enter(); @@ -911,6 +909,14 @@ where .collect()), } } + RegionCheckpointOperation::Subscribe(sub) => { + let fut = self.checkpoint_mgr.add_subscriber(sub); + self.pool.spawn(async move { + if let Err(err) = fut.await { + err.report("adding subscription"); + } + }); + } } } @@ -957,6 +963,7 @@ pub enum RegionSet { pub enum RegionCheckpointOperation { Update(Vec<(Region, TimeStamp)>), Get(RegionSet, Box) + Send>), + Subscribe(Subscription), } impl fmt::Debug for RegionCheckpointOperation { @@ -964,6 +971,7 @@ impl fmt::Debug for RegionCheckpointOperation { match self { Self::Update(arg0) => f.debug_tuple("Update").field(arg0).finish(), Self::Get(arg0, _) => f.debug_tuple("Get").field(arg0).finish(), + Self::Subscribe(_) => f.debug_tuple("Subscription").finish(), } } } diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index b34e7126360..a3f76e0255f 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -6,6 +6,7 @@ use std::{ use error_code::ErrorCodeExt; use etcd_client::Error as EtcdError; +use grpcio::Error as GrpcError; use kvproto::{errorpb::Error as StoreError, metapb::*}; use pd_client::Error as PdError; use protobuf::ProtobufError; @@ -18,6 +19,8 @@ use crate::{endpoint::Task, metrics}; #[derive(ThisError, Debug)] pub enum Error { + #[error("gRPC meet error {0}")] + Grpc(#[from] GrpcError), #[error("Etcd meet error {0}")] Etcd(#[from] EtcdError), #[error("Protobuf meet error {0}")] @@ -66,6 +69,7 @@ impl ErrorCodeExt for Error { Error::Other(_) => OTHER, Error::RaftStore(_) => RAFTSTORE, Error::ObserveCanceled(..) => OBSERVE_CANCELED, + Error::Grpc(_) => GRPC, } } } @@ -115,6 +119,22 @@ where } } +pub trait ReportableResult { + fn report_if_err(self, context: impl ToString); +} + +impl ReportableResult for StdResult<(), E> +where + Error: From, +{ + #[inline(always)] + fn report_if_err(self, context: impl ToString) { + if let Err(err) = self { + Error::from(err).report(context.to_string()) + } + } +} + /// Like `errors.Annotate` in Go. /// Wrap an unknown error with [`Error::Other`]. #[macro_export(crate)] diff --git a/components/backup-stream/src/service.rs b/components/backup-stream/src/service.rs index 47a149973b2..9d312a984d1 100644 --- a/components/backup-stream/src/service.rs +++ b/components/backup-stream/src/service.rs @@ -89,4 +89,16 @@ impl LogBackup for Service { )); try_send!(self.endpoint, t); } + + fn subscribe_flush_event( + &mut self, + _ctx: grpcio::RpcContext<'_>, + _req: kvproto::logbackuppb::SubscribeFlushEventRequest, + sink: grpcio::ServerStreamingSink, + ) { + try_send!( + self.endpoint, + Task::RegionCheckpointsOp(RegionCheckpointOperation::Subscribe(sink)) + ); + } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 2cc6016aeb1..57932acae0d 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -19,13 +19,15 @@ use backup_stream::{ }, observer::BackupStreamObserver, router::Router, - Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, + Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, }; -use futures::{executor::block_on, AsyncWriteExt, Future}; -use grpcio::ChannelBuilder; +use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt, TryStreamExt}; +use grpcio::{ChannelBuilder, Server, ServerBuilder}; use kvproto::{ brpb::{CompressionType, Local, Metadata, StorageBackend}, kvrpcpb::*, + logbackuppb::{SubscribeFlushEventRequest, SubscribeFlushEventResponse}, + logbackuppb_grpc::{create_log_backup, LogBackupClient}, tikvpb::*, }; use pd_client::PdClient; @@ -156,6 +158,8 @@ impl SuiteBuilder { }, obs: Default::default(), tikv_cli: Default::default(), + log_backup_cli: Default::default(), + servers: Default::default(), env: Arc::new(grpcio::Environment::new(1)), cluster, @@ -172,6 +176,8 @@ impl SuiteBuilder { cfg_f(&mut cfg); for id in 1..=(n as u64) { suite.start_endpoint(id, cfg.clone()); + let cli = suite.start_log_backup_client_on(id); + suite.log_backup_cli.insert(id, cli); } // We must wait until the endpoints get ready to watching the metastore, or some // modifies may be lost. Either make Endpoint::with_client wait until watch did @@ -222,8 +228,11 @@ pub struct Suite { meta_store: ErrorStore, cluster: Cluster, tikv_cli: HashMap, + log_backup_cli: HashMap, obs: HashMap, env: Arc, + // The place to make services live as long as suite. + servers: Vec, temp_files: TempDir, flushed_files: TempDir, @@ -263,6 +272,51 @@ impl Suite { worker } + /// create a subscription stream. this has simply asserted no error, because + /// in theory observing flushing should not emit error. change that if + /// needed. + fn flush_stream(&self) -> impl Stream { + let streams = self + .log_backup_cli + .iter() + .map(|(id, cli)| { + let stream = cli + .subscribe_flush_event(&{ + let mut r = SubscribeFlushEventRequest::default(); + r.set_client_id(format!("test-{}", id)); + r + }) + .unwrap_or_else(|err| panic!("failed to subscribe on {} because {}", id, err)); + let id = *id; + stream.map_ok(move |x| (id, x)).map(move |x| { + x.unwrap_or_else(move |err| panic!("failed to rec from {} because {}", id, err)) + }) + }) + .collect::>(); + + futures::stream::select_all(streams) + } + + fn start_log_backup_client_on(&mut self, id: u64) -> LogBackupClient { + let endpoint = self + .endpoints + .get(&id) + .expect("must register endpoint first"); + + let serv = Service::new(endpoint.scheduler()); + let builder = + ServerBuilder::new(self.env.clone()).register_service(create_log_backup(serv)); + let mut server = builder.bind("127.0.0.1", 0).build().unwrap(); + server.start(); + let (_, port) = server.bind_addrs().next().unwrap(); + let addr = format!("127.0.0.1:{}", port); + let channel = ChannelBuilder::new(self.env.clone()).connect(&addr); + println!("connecting channel to {} for store {}", addr, id); + let client = LogBackupClient::new(channel); + self.servers.push(server); + client + } + fn start_endpoint(&mut self, id: u64, mut cfg: BackupStreamConfig) { let cluster = &mut self.cluster; let worker = self.endpoints.get_mut(&id).unwrap(); @@ -747,8 +801,10 @@ mod test { errors::Error, router::TaskSelector, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; + use futures::{Stream, StreamExt}; use pd_client::PdClient; use tikv_util::{box_err, defer, info, HandyRwLock}; + use tokio::time::timeout; use txn_types::{Key, TimeStamp}; use crate::{ @@ -1174,4 +1230,60 @@ mod test { checkpoint ); } + + async fn collect_current(mut s: impl Stream + Unpin, goal: usize) -> Vec { + let mut r = vec![]; + while let Ok(Some(x)) = timeout(Duration::from_secs(10), s.next()).await { + r.push(x); + if r.len() >= goal { + return r; + } + } + r + } + + #[test] + fn subscribe_flushing() { + let mut suite = super::SuiteBuilder::new_named("sub_flush").build(); + let stream = suite.flush_stream(); + for i in 1..10 { + let split_key = make_split_key_at_record(1, i * 20); + suite.must_split(&split_key); + suite.must_shuffle_leader(suite.cluster.get_region_id(&split_key)); + } + + let round1 = run_async_test(suite.write_records(0, 128, 1)); + suite.must_register_task(1, "sub_flush"); + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite.sync(); + suite.force_flush_files("sub_flush"); + + let mut items = run_async_test(async { + collect_current( + stream.flat_map(|(_, r)| futures::stream::iter(r.events.into_iter())), + 10, + ) + .await + }); + + items.sort_by(|x, y| x.start_key.cmp(&y.start_key)); + + println!("{:?}", items); + assert_eq!(items.len(), 10); + + assert_eq!(items.first().unwrap().start_key, Vec::::default()); + for w in items.windows(2) { + let a = &w[0]; + let b = &w[1]; + assert!(a.checkpoint > 512); + assert!(b.checkpoint > 512); + assert_eq!(a.end_key, b.start_key); + } + assert_eq!(items.last().unwrap().end_key, Vec::::default()); + + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(|x| x.as_slice()), + )); + } } diff --git a/components/error_code/src/backup_stream.rs b/components/error_code/src/backup_stream.rs index 9448169cc05..a4b28b0e9ee 100644 --- a/components/error_code/src/backup_stream.rs +++ b/components/error_code/src/backup_stream.rs @@ -41,12 +41,17 @@ define_error_codes! { ), RAFTREQ => ("RaftReq", "Error happened when sending raft command.", - "This is an internal error, please ask the community for help." + "This is an internal error, most of them are happen while initial scanning and can be simply retried." ), RAFTSTORE => ("RaftStore", "Error happened reported from raft store.", "This is an internal error, please ask the community for help." ), + GRPC => ("gRPC", + "Error happened during executing gRPC", + "This error is often relative to the network, please check the network connection and network config, say, TLS config." + ), + OTHER => ("Unknown", "Some random error happens.", "This is an generic error, please check the error message for further information." From 372ea1af320e8ba4e63dce989ddf6331e37142ac Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Wed, 30 Nov 2022 21:48:02 +0800 Subject: [PATCH 0374/1149] pitr: support skipping download kv files when pitr (#13802) close tikv/tikv#13788, close pingcap/tidb#39102 Signed-off-by: joccau Signed-off-by: Zak Zhao <57036248+joccau@users.noreply.github.com> Co-authored-by: Ti Chi Robot Co-authored-by: 3pointer --- components/backup-stream/tests/mod.rs | 2 +- components/error_code/src/sst_importer.rs | 3 +- .../external_storage/export/src/export.rs | 6 +- components/external_storage/src/lib.rs | 85 ++ components/sst_importer/src/config.rs | 10 + components/sst_importer/src/errors.rs | 20 +- components/sst_importer/src/sst_importer.rs | 782 ++++++++++++++++-- .../tikv_util/src/codec/stream_event.rs | 12 +- src/import/sst_service.rs | 31 +- tests/integrations/config/mod.rs | 1 + 10 files changed, 848 insertions(+), 104 deletions(-) diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 57932acae0d..7256cd62c03 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -530,7 +530,7 @@ impl Suite { decoder.close().await.unwrap(); let content = decoder.into_inner(); - let mut iter = EventIterator::new(content); + let mut iter = EventIterator::new(&content); loop { if !iter.valid() { break; diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 2eb6177458b..001f4f146f6 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -21,5 +21,6 @@ define_error_codes!( TTL_NOT_ENABLED => ("TtlNotEnabled", "", ""), TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""), INCOMPATIBLE_API_VERSION => ("IncompatibleApiVersion", "", ""), - INVALID_KEY_MODE => ("InvalidKeyMode", "", "") + INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), + RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", "") ); diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index ea02ebe2c6f..10363bf92b2 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -23,9 +23,9 @@ use external_storage::dylib_client; #[cfg(feature = "cloud-storage-grpc")] use external_storage::grpc_client; pub use external_storage::{ - compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_into_file, - record_storage_create, BackendConfig, ExternalStorage, HdfsStorage, LocalStorage, NoopStorage, - RestoreConfig, UnpinReader, + compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_info_buff, + read_external_storage_into_file, record_storage_create, BackendConfig, ExternalStorage, + HdfsStorage, LocalStorage, NoopStorage, RestoreConfig, UnpinReader, MIN_READ_SPEED, }; use futures_io::AsyncRead; #[cfg(feature = "cloud-gcp")] diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 6bcbcfc839f..e1c57608197 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -296,3 +296,88 @@ pub async fn read_external_storage_into_file( Ok(()) } + +pub const MIN_READ_SPEED: usize = 8192; + +pub async fn read_external_storage_info_buff( + reader: &mut (dyn AsyncRead + Unpin), + speed_limiter: &Limiter, + expected_length: u64, + expected_sha256: Option>, + min_read_speed: usize, +) -> io::Result> { + // the minimum speed of reading data, in bytes/second. + // if reading speed is slower than this rate, we will stop with + // a "TimedOut" error. + // (at 8 KB/s for a 2 MB buffer, this means we timeout after 4m16s.) + let read_speed = if min_read_speed > 0 { + min_read_speed + } else { + MIN_READ_SPEED + }; + let dur = Duration::from_secs((READ_BUF_SIZE / read_speed) as u64); + let mut output = Vec::new(); + let mut buffer = vec![0u8; READ_BUF_SIZE]; + + loop { + // separate the speed limiting from actual reading so it won't + // affect the timeout calculation. + let bytes_read = timeout(dur, reader.read(&mut buffer)) + .await + .map_err(|_| io::ErrorKind::TimedOut)??; + if bytes_read == 0 { + break; + } + + speed_limiter.consume(bytes_read).await; + output.append(&mut buffer[..bytes_read].to_vec()); + } + + // check length of file + if expected_length > 0 && output.len() != expected_length as usize { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "length not match, downloaded size {}, expected {}", + output.len(), + expected_length + ), + )); + } + // check sha256 of file + if let Some(sha256) = expected_sha256 { + let mut hasher = Hasher::new(MessageDigest::sha256()).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("openssl hasher failed to init: {}", err), + ) + })?; + hasher.update(&output).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("openssl hasher udpate failed: {}", err), + ) + })?; + + let cal_sha256 = hasher.finish().map_or_else( + |err| { + Err(io::Error::new( + io::ErrorKind::Other, + format!("openssl hasher finish failed: {}", err), + )) + }, + |bytes| Ok(bytes.to_vec()), + )?; + if !sha256.eq(&cal_sha256) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "sha256 not match, expect: {:?}, calculate: {:?}", + sha256, cal_sha256, + ), + )); + } + } + + Ok(output) +} diff --git a/components/sst_importer/src/config.rs b/components/sst_importer/src/config.rs index ef74a40fd01..ac789e2f4ae 100644 --- a/components/sst_importer/src/config.rs +++ b/components/sst_importer/src/config.rs @@ -14,6 +14,8 @@ pub struct Config { /// /// Default is 10m. pub import_mode_timeout: ReadableDuration, + /// the ratio of system memory used for import. + pub memory_use_ratio: f64, } impl Default for Config { @@ -22,6 +24,7 @@ impl Default for Config { num_threads: 8, stream_channel_window: 128, import_mode_timeout: ReadableDuration::minutes(10), + memory_use_ratio: 0.3, } } } @@ -43,6 +46,13 @@ impl Config { ); self.stream_channel_window = default_cfg.stream_channel_window; } + if self.memory_use_ratio > 0.5 || self.memory_use_ratio < 0.0 { + warn!( + "import.mem_ratio should belong to [0.0, 0.5], change it to {}", + default_cfg.memory_use_ratio, + ); + self.memory_use_ratio = default_cfg.memory_use_ratio; + } Ok(()) } } diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index 51aabcbec01..7ff940fff12 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -8,7 +8,7 @@ use encryption::Error as EncryptionError; use error_code::{self, ErrorCode, ErrorCodeExt}; use futures::channel::oneshot::Canceled; use grpcio::Error as GrpcError; -use kvproto::{import_sstpb, kvrpcpb::ApiVersion}; +use kvproto::{errorpb, import_sstpb, kvrpcpb::ApiVersion}; use tikv_util::codec::Error as CodecError; use uuid::Error as UuidError; @@ -122,6 +122,9 @@ pub enum Error { storage_api_version: ApiVersion, key: String, }, + + #[error("resource is not enough {0}")] + ResourceNotEnough(String), } impl Error { @@ -149,7 +152,19 @@ pub type Result = result::Result; impl From for import_sstpb::Error { fn from(e: Error) -> import_sstpb::Error { let mut err = import_sstpb::Error::default(); - err.set_message(format!("{}", e)); + match e { + Error::ResourceNotEnough(ref msg) => { + let mut import_err = errorpb::Error::default(); + import_err.set_message(msg.clone()); + import_err.set_server_is_busy(errorpb::ServerIsBusy::default()); + err.set_store_error(import_err); + err.set_message(format!("{}", e)); + } + _ => { + err.set_message(format!("{}", e)); + } + } + err } } @@ -181,6 +196,7 @@ impl ErrorCodeExt for Error { Error::TtlLenNotEqualsToPairs => error_code::sst_importer::TTL_LEN_NOT_EQUALS_TO_PAIRS, Error::IncompatibleApiVersion => error_code::sst_importer::INCOMPATIBLE_API_VERSION, Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, + Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, } } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index abd616c5bc9..c024bca8e6d 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -4,10 +4,14 @@ use std::{ borrow::Cow, collections::HashMap, fs::File, - io::{self, prelude::*, BufReader}, + io::{self, BufReader, Read}, ops::Bound, path::{Path, PathBuf}, - sync::Arc, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::Duration, }; use dashmap::DashMap; @@ -18,6 +22,7 @@ use engine_traits::{ IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; +use external_storage_export::{compression_reader_dispatcher, encrypt_wrap_reader, RestoreConfig}; use file_system::{get_io_rate_limiter, OpenOptions}; use futures::executor::ThreadPool; use kvproto::{ @@ -26,7 +31,10 @@ use kvproto::{ kvrpcpb::ApiVersion, }; use tikv_util::{ - codec::stream_event::{EventIterator, Iterator as EIterator}, + codec::stream_event::{EventEncoder, EventIterator, Iterator as EIterator}, + config::ReadableSize, + stream::block_on_external_io, + sys::SysQuota, time::{Instant, Limiter}, }; use txn_types::{Key, TimeStamp, WriteRef}; @@ -39,6 +47,32 @@ use crate::{ Config, Error, Result, }; +#[derive(Clone, PartialEq, Debug)] +pub enum CacheKvFile { + Mem(Arc>), + Fs(Arc), +} + +impl CacheKvFile { + // get the ref count of item. + pub fn ref_count(&self) -> usize { + match self { + CacheKvFile::Mem(buff) => Arc::strong_count(buff), + CacheKvFile::Fs(path) => Arc::strong_count(path), + } + } + + // check the item is expired. + pub fn is_expired(&self, start: &Instant) -> bool { + match self { + // The expired duration for memeory is 60s. + CacheKvFile::Mem(_) => start.saturating_elapsed() >= Duration::from_secs(60), + // The expired duration for local file is 10min. + CacheKvFile::Fs(_) => start.saturating_elapsed() >= Duration::from_secs(600), + } + } +} + /// SstImporter manages SST files that are waiting for ingesting. pub struct SstImporter { dir: ImportDir, @@ -47,7 +81,9 @@ pub struct SstImporter { // TODO: lift api_version as a type parameter. api_version: ApiVersion, compression_types: HashMap, - file_locks: Arc>, + file_locks: Arc>, + mem_use: AtomicU64, + mem_limit: ReadableSize, } impl SstImporter { @@ -58,6 +94,10 @@ impl SstImporter { api_version: ApiVersion, ) -> Result { let switcher = ImportModeSwitcher::new(cfg); + + let memory_limit = (SysQuota::memory_limit_in_bytes() as f64) * cfg.memory_use_ratio; + info!("sst importer memory limit when apply"; "size" => ?memory_limit); + Ok(SstImporter { dir: ImportDir::new(root)?, key_manager, @@ -65,6 +105,8 @@ impl SstImporter { api_version, compression_types: HashMap::with_capacity(2), file_locks: Arc::new(DashMap::default()), + mem_use: AtomicU64::new(0), + mem_limit: ReadableSize(memory_limit as u64), }) } @@ -292,12 +334,249 @@ impl SstImporter { Ok(()) } + pub fn shrink_by_tick(&self) -> usize { + let mut shrink_buff_size: usize = 0; + let mut retain_buff_size: usize = 0; + let mut shrink_files: Vec = Vec::default(); + let mut retain_file_count = 0_usize; + + self.file_locks.retain(|_, (c, start)| { + let mut need_retain = true; + match c { + CacheKvFile::Mem(buff) => { + let buflen = buff.len(); + // The term of recycle memeory is 60s. + if c.ref_count() == 1 && c.is_expired(start) { + need_retain = false; + shrink_buff_size += buflen; + } else { + retain_buff_size += buflen; + } + } + CacheKvFile::Fs(path) => { + let p = path.to_path_buf(); + // The term of recycle file is 10min. + if c.ref_count() == 1 && c.is_expired(start) { + need_retain = false; + shrink_files.push(p); + } else { + retain_file_count += 1; + } + } + } + + need_retain + }); + + if self.import_support_download() { + let shrink_file_count = shrink_files.len(); + info!("shrink space by tick"; "shrink files count" => shrink_file_count, "retain files count" => retain_file_count); + + for f in shrink_files { + if let Err(e) = file_system::remove_file(&f) { + info!("failed to remove file"; "filename" => ?f, "error" => ?e); + } + } + shrink_file_count + } else { + info!("shrink cache by tick"; "shrink size" => shrink_buff_size, "retain size" => retain_buff_size); + self.dec_mem(shrink_buff_size as _); + shrink_buff_size + } + } + + // If mem_limit is 0, which represent download kv-file when import. + // Or read kv-file into buffer directly. + pub fn import_support_download(&self) -> bool { + self.mem_limit == ReadableSize(0) + } + + fn inc_mem_and_check(&self, meta: &KvMeta) -> bool { + let size = meta.get_length(); + let old = self.mem_use.fetch_add(size, Ordering::SeqCst); + + // If the memory is limited, roll backup the mem_use and return false. + if old + size > self.mem_limit.0 { + self.mem_use.fetch_sub(size, Ordering::SeqCst); + false + } else { + true + } + } + + fn dec_mem(&self, size: u64) { + self.mem_use.fetch_sub(size, Ordering::SeqCst); + } + + pub fn do_read_kv_file( + &self, + meta: &KvMeta, + rewrite_rule: &RewriteRule, + ext_storage: Arc, + speed_limiter: &Limiter, + ) -> Result { + let start = Instant::now(); + let dst_name = format!("{}_{}", meta.get_name(), meta.get_range_offset()); + + let mut lock = self + .file_locks + .entry(dst_name) + .or_insert((CacheKvFile::Mem(Arc::default()), Instant::now())); + + if let CacheKvFile::Mem(buff) = &lock.0 { + if !buff.is_empty() { + lock.1 = Instant::now(); + return Ok(lock.0.clone()); + } + } + + if !self.inc_mem_and_check(meta) { + return Err(Error::ResourceNotEnough(String::from("memory is limited"))); + } + + let expected_sha256 = { + let sha256 = meta.get_sha256().to_vec(); + if !sha256.is_empty() { + Some(sha256) + } else { + None + } + }; + let file_length = meta.get_length(); + let range = { + let range_length = meta.get_range_length(); + if range_length == 0 { + None + } else { + Some((meta.get_range_offset(), range_length)) + } + }; + let restore_config = external_storage_export::RestoreConfig { + range, + compression_type: Some(meta.get_compression_type()), + expected_sha256, + file_crypter: None, + }; + + let buff = self.read_kv_files_from_external_storage( + file_length, + meta.get_name(), + ext_storage, + speed_limiter, + restore_config, + )?; + + IMPORTER_DOWNLOAD_BYTES.observe(file_length as _); + IMPORTER_APPLY_DURATION + .with_label_values(&["download"]) + .observe(start.saturating_elapsed().as_secs_f64()); + + let rewrite_buff = self.rewrite_kv_file(buff, rewrite_rule)?; + *lock = (CacheKvFile::Mem(Arc::new(rewrite_buff)), Instant::now()); + Ok(lock.0.clone()) + } + + pub fn create_external_storage( + &self, + backend: &StorageBackend, + support_kms: bool, + ) -> Result> { + let ext_storage = external_storage_export::create_storage(backend, Default::default())?; + // kv-files needn't are decrypted with KMS when download currently because these + // files are not encrypted when log-backup. It is different from + // sst-files because sst-files is encrypted when saved with rocksdb env + // with KMS. to do: support KMS when log-backup and restore point. + let ext_storage = match (support_kms, self.key_manager.clone()) { + (true, Some(key_manager)) => { + Box::new(external_storage_export::EncryptedExternalStorage { + key_manager, + storage: ext_storage, + }) + } + _ => ext_storage, + }; + Ok(ext_storage) + } + + fn read_kv_files_from_external_storage( + &self, + file_length: u64, + file_name: &str, + ext_storage: Arc, + speed_limiter: &Limiter, + restore_config: RestoreConfig, + ) -> Result> { + let RestoreConfig { + range, + compression_type, + expected_sha256, + file_crypter, + } = restore_config; + + let mut reader = { + let inner = if let Some((off, len)) = range { + ext_storage.read_part(file_name, off, len) + } else { + ext_storage.read(file_name) + }; + + let inner = compression_reader_dispatcher(compression_type, inner)?; + encrypt_wrap_reader(file_crypter, inner)? + }; + + let r = block_on_external_io(external_storage_export::read_external_storage_info_buff( + &mut reader, + speed_limiter, + file_length, + expected_sha256, + external_storage_export::MIN_READ_SPEED, + )); + let url = ext_storage.url()?.to_string(); + let buff = r.map_err(|e| Error::CannotReadExternalStorage { + url: url.to_string(), + name: file_name.to_string(), + err: e, + local_path: PathBuf::default(), + })?; + + Ok(buff) + } + + pub fn read_from_kv_file( + &self, + meta: &KvMeta, + rewrite_rule: &RewriteRule, + ext_storage: Arc, + backend: &StorageBackend, + speed_limiter: &Limiter, + ) -> Result>> { + let c = if self.import_support_download() { + self.do_download_kv_file(meta, backend, speed_limiter)? + } else { + self.do_read_kv_file(meta, rewrite_rule, ext_storage, speed_limiter)? + }; + match c { + // If cache memroy, it has been rewrite, return buffer directly. + CacheKvFile::Mem(buff) => Ok(buff), + // If cache file name, it need to read and rewrite. + CacheKvFile::Fs(path) => { + let file = File::open(path.as_ref())?; + let mut reader = BufReader::new(file); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer)?; + + let rewrite_buff = self.rewrite_kv_file(buffer, rewrite_rule)?; + Ok(Arc::new(rewrite_buff)) + } + } + } + pub fn do_download_kv_file( &self, meta: &KvMeta, backend: &StorageBackend, speed_limiter: &Limiter, - ) -> Result { + ) -> Result { let offset = meta.get_range_offset(); let src_name = meta.get_name(); let dst_name = format!("{}_{}", src_name, offset); @@ -309,14 +588,15 @@ impl SstImporter { } else { None }; - if path.save.exists() { - return Ok(path.save); - } - let lock = self.file_locks.entry(dst_name.to_string()).or_default(); + let mut lock = self + .file_locks + .entry(dst_name) + .or_insert((CacheKvFile::Fs(Arc::new(path.save.clone())), Instant::now())); if path.save.exists() { - return Ok(path.save); + lock.1 = Instant::now(); + return Ok(lock.0.clone()); } let range_length = meta.get_range_length(); @@ -336,16 +616,17 @@ impl SstImporter { src_name, path.temp.clone(), backend, - // kv-files needn't are decrypted with KMS when download currently because these files - // are not encrypted when log-backup. It is different from sst-files - // because sst-files is encrypted when saved with rocksdb env with KMS. - // to do: support KMS when log-backup and restore point. false, // don't support encrypt for now. speed_limiter, restore_config, )?; - info!("download file finished {}, offset {}", src_name, offset); + info!( + "download file finished {}, offset {}, length {}", + src_name, + offset, + meta.get_length() + ); if let Some(p) = path.save.parent() { // we have v1 prefix in file name. @@ -358,89 +639,100 @@ impl SstImporter { })?; } - file_system::rename(path.temp, path.save.clone())?; - - drop(lock); - self.file_locks.remove(&dst_name); - + file_system::rename(path.temp, path.save)?; IMPORTER_APPLY_DURATION .with_label_values(&["download"]) .observe(start.saturating_elapsed().as_secs_f64()); - Ok(path.save) + lock.1 = Instant::now(); + Ok(lock.0.clone()) } - pub fn do_apply_kv_file>( + pub fn rewrite_kv_file( &self, - start_key: &[u8], - end_key: &[u8], - restore_ts: u64, - file_path: P, + file_buff: Vec, rewrite_rule: &RewriteRule, - build_fn: &mut dyn FnMut(Vec, Vec), - ) -> Result> { - // iterator file and performs rewrites and apply. - let file = File::open(&file_path)?; - let mut reader = BufReader::new(file); - let mut buffer = Vec::new(); - reader.read_to_end(&mut buffer)?; - - let mut event_iter = EventIterator::new(buffer); - + ) -> Result> { let old_prefix = rewrite_rule.get_old_key_prefix(); let new_prefix = rewrite_rule.get_new_key_prefix(); - - let perform_rewrite = old_prefix != new_prefix; + // if old_prefix equals new_prefix, do not need rewrite. + if old_prefix == new_prefix { + return Ok(file_buff); + } // perform iteration and key rewrite. + let mut new_buff = Vec::with_capacity(file_buff.len()); + let mut event_iter = EventIterator::new(file_buff.as_slice()); let mut key = new_prefix.to_vec(); let new_prefix_data_key_len = key.len(); + + let start = Instant::now(); + loop { + if !event_iter.valid() { + break; + } + event_iter.next()?; + + // perform rewrite + let old_key = event_iter.key(); + if !old_key.starts_with(old_prefix) { + return Err(Error::WrongKeyPrefix { + what: "Key in file", + key: old_key.to_vec(), + prefix: old_prefix.to_vec(), + }); + } + key.truncate(new_prefix_data_key_len); + key.extend_from_slice(&old_key[old_prefix.len()..]); + let value = event_iter.value(); + + let encoded = EventEncoder::encode_event(&key, value); + for slice in encoded { + new_buff.append(&mut slice.as_ref().to_owned()); + } + } + + IMPORTER_APPLY_DURATION + .with_label_values(&["rewrite"]) + .observe(start.saturating_elapsed().as_secs_f64()); + Ok(new_buff) + } + + pub fn do_apply_kv_file( + &self, + start_key: &[u8], + end_key: &[u8], + start_ts: u64, + restore_ts: u64, + file_buff: Arc>, + build_fn: &mut dyn FnMut(Vec, Vec), + ) -> Result> { + let mut event_iter = EventIterator::new(file_buff.as_slice()); let mut smallest_key = None; let mut largest_key = None; - let mut total_key = 0; let mut ts_not_expected = 0; let mut not_in_range = 0; - let start = Instant::now(); + loop { if !event_iter.valid() { break; } total_key += 1; event_iter.next()?; - INPORTER_APPLY_COUNT.with_label_values(&["key_meet"]).inc(); - let ts = Key::decode_ts_from(event_iter.key())?; - if ts > TimeStamp::new(restore_ts) { + + let key = event_iter.key().to_vec(); + let value = event_iter.value().to_vec(); + let ts = Key::decode_ts_from(&key)?; + if ts < TimeStamp::new(start_ts) || ts > TimeStamp::new(restore_ts) { // we assume the keys in file are sorted by ts. // so if we met the key not satisfy the ts. // we can easily filter the remain keys. ts_not_expected += 1; continue; } - if perform_rewrite { - let old_key = event_iter.key(); - - if !old_key.starts_with(old_prefix) { - return Err(Error::WrongKeyPrefix { - what: "Key in file", - key: old_key.to_vec(), - prefix: old_prefix.to_vec(), - }); - } - key.truncate(new_prefix_data_key_len); - key.extend_from_slice(&old_key[old_prefix.len()..]); - - debug!( - "perform rewrite new key: {:?}, new key prefix: {:?}, old key prefix: {:?}", - log_wrappers::Value::key(&key), - log_wrappers::Value::key(new_prefix), - log_wrappers::Value::key(old_prefix), - ); - } else { - key = event_iter.key().to_vec(); - } if check_key_in_range(&key, 0, start_key, end_key).is_err() { // key not in range, we can simply skip this key here. // the client make sure the correct region will download and apply the same @@ -451,28 +743,21 @@ impl SstImporter { not_in_range += 1; continue; } - let value = event_iter.value().to_vec(); - build_fn(key.clone(), value); - let iter_key = key.clone(); - smallest_key = smallest_key.map_or_else( - || Some(iter_key.clone()), - |v: Vec| Some(v.min(iter_key.clone())), - ); - - largest_key = largest_key.map_or_else( - || Some(iter_key.clone()), - |v: Vec| Some(v.max(iter_key.clone())), - ); + build_fn(key.clone(), value); + smallest_key = smallest_key + .map_or_else(|| Some(key.clone()), |v: Vec| Some(v.min(key.clone()))); + largest_key = largest_key + .map_or_else(|| Some(key.clone()), |v: Vec| Some(v.max(key.clone()))); } - info!("build download request file done"; "total keys" => %total_key, + if total_key != not_in_range { + info!("build download request file done"; "total keys" => %total_key, "ts filtered keys" => %ts_not_expected, - "range filtered keys" => %not_in_range, - "file" => %file_path.as_ref().display()); + "range filtered keys" => %not_in_range); + } - let label = if perform_rewrite { "rewrite" } else { "normal" }; IMPORTER_APPLY_DURATION - .with_label_values(&[label]) + .with_label_values(&["normal"]) .observe(start.saturating_elapsed().as_secs_f64()); match (smallest_key, largest_key) { @@ -809,12 +1094,17 @@ fn is_after_end_bound>(value: &[u8], bound: &Bound) -> bool { #[cfg(test)] mod tests { - use std::io::{self, BufWriter}; + use std::{ + io::{self, BufWriter, Write}, + ops::Sub, + usize, + }; use engine_traits::{ collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; + use external_storage_export::read_external_storage_info_buff; use file_system::File; use openssl::hash::{Hasher, MessageDigest}; use tempfile::Builder; @@ -1035,7 +1325,8 @@ mod tests { }) } - fn create_sample_external_kv_file() -> Result<(tempfile::TempDir, StorageBackend, KvMeta)> { + fn create_sample_external_kv_file() + -> Result<(tempfile::TempDir, StorageBackend, KvMeta, Vec)> { let ext_dir = tempfile::tempdir()?; let file_name = "v1/t000001/abc.log"; let file_path = ext_dir.path().join(file_name); @@ -1047,6 +1338,7 @@ mod tests { (b"t1_r01".to_vec(), b"tidb".to_vec()), (b"t1_r02".to_vec(), b"tikv".to_vec()), (b"t1_r03".to_vec(), b"pingcap".to_vec()), + (b"t1_r04".to_vec(), b"test for PITR".to_vec()), ]; let mut sha256 = Hasher::new(MessageDigest::sha256()).unwrap(); @@ -1067,7 +1359,7 @@ mod tests { kv_meta.set_sha256(sha256.finish().unwrap().to_vec()); let backend = external_storage_export::make_local_backend(ext_dir.path()); - Ok((ext_dir, backend, kv_meta)) + Ok((ext_dir, backend, kv_meta, buff.buffer().to_vec())) } fn create_sample_external_rawkv_sst_file( @@ -1245,6 +1537,249 @@ mod tests { assert_eq!(err.kind(), io::ErrorKind::TimedOut); } + #[test] + fn test_read_external_storage_info_buff() { + let data = &b"input some data, used to test read buff"[..]; + let mut reader = data; + let len = reader.len() as _; + let sha_256 = { + let mut hasher = Hasher::new(MessageDigest::sha256()).unwrap(); + hasher.update(data).unwrap(); + hasher.finish().unwrap().to_vec() + }; + + // test successfully. + let output = block_on_external_io(read_external_storage_info_buff( + &mut reader, + &Limiter::new(f64::INFINITY), + len, + Some(sha_256.clone()), + 0, + )) + .unwrap(); + assert_eq!(&output, data); + + // test without expected_sha245. + reader = data; + let output = block_on_external_io(read_external_storage_info_buff( + &mut reader, + &Limiter::new(f64::INFINITY), + len, + None, + 0, + )) + .unwrap(); + assert_eq!(&output, data); + + // test with wrong expectd_len. + reader = data; + let err = block_on_external_io(read_external_storage_info_buff( + &mut reader, + &Limiter::new(f64::INFINITY), + len + 1, + Some(sha_256.clone()), + 0, + )) + .unwrap_err(); + assert!(err.to_string().contains("length not match")); + + // test with wrong expected_sha256. + reader = data; + let err = block_on_external_io(read_external_storage_info_buff( + &mut reader, + &Limiter::new(f64::INFINITY), + len, + Some(sha_256[..sha_256.len() - 1].to_vec()), + 0, + )) + .unwrap_err(); + assert!(err.to_string().contains("sha256 not match")); + } + + #[test] + fn test_read_external_storage_info_buff_timed_out() { + use futures_util::stream::{pending, TryStreamExt}; + + let mut input = pending::>().into_async_read(); + let err = block_on_external_io(read_external_storage_info_buff( + &mut input, + &Limiter::new(f64::INFINITY), + 0, + None, + usize::MAX, + )) + .unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::TimedOut); + } + + #[test] + fn test_do_read_kv_file() { + // create a sample kv file. + let (_temp_dir, backend, kv_meta, buff) = create_sample_external_kv_file().unwrap(); + + // create importer object. + let import_dir = tempfile::tempdir().unwrap(); + let (_, key_manager) = new_key_manager_for_test(); + let importer = SstImporter::new( + &Config::default(), + import_dir, + Some(key_manager), + ApiVersion::V1, + ) + .unwrap(); + let ext_storage = { + let inner = importer.create_external_storage(&backend, false).unwrap(); + Arc::new(inner) + }; + + // test do_read_kv_file() + let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); + let output = importer + .do_read_kv_file( + &kv_meta, + rewrite_rule, + ext_storage, + &Limiter::new(f64::INFINITY), + ) + .unwrap(); + + assert_eq!(CacheKvFile::Mem(Arc::new(buff.clone())), output); + + // Do not shrint nothing. + let shrink_size = importer.shrink_by_tick(); + assert_eq!(shrink_size, 0); + assert_eq!(importer.file_locks.len(), 1); + + // drop the refcnt + drop(output); + let shrink_size = importer.shrink_by_tick(); + assert_eq!(shrink_size, 0); + assert_eq!(importer.file_locks.len(), 1); + + // set expired instance in Dashmap + for mut kv in importer.file_locks.iter_mut() { + kv.1 = Instant::now().sub(Duration::from_secs(61)); + } + let shrink_size = importer.shrink_by_tick(); + assert_eq!(shrink_size, buff.len()); + assert!(importer.file_locks.is_empty()); + } + + #[test] + fn test_read_kv_files_from_external_storage() { + // create a sample kv file. + let (_temp_dir, backend, kv_meta, buff) = create_sample_external_kv_file().unwrap(); + + // create importer object. + let import_dir = tempfile::tempdir().unwrap(); + let (_, key_manager) = new_key_manager_for_test(); + let importer = SstImporter::new( + &Config::default(), + import_dir, + Some(key_manager), + ApiVersion::V1, + ) + .unwrap(); + let ext_storage = { + let inner = importer.create_external_storage(&backend, false).unwrap(); + Arc::new(inner) + }; + + // test read all of the file. + let restore_config = external_storage_export::RestoreConfig { + expected_sha256: Some(kv_meta.get_sha256().to_vec()), + ..Default::default() + }; + + let output = importer + .read_kv_files_from_external_storage( + kv_meta.get_length(), + kv_meta.get_name(), + ext_storage.clone(), + &Limiter::new(f64::INFINITY), + restore_config, + ) + .unwrap(); + assert_eq!( + buff, + output, + "we are testing addition with {} and {}", + buff.len(), + output.len() + ); + + // test read range of the file. + let (offset, len) = (5, 16); + let restore_config = external_storage_export::RestoreConfig { + range: Some((offset, len)), + ..Default::default() + }; + + let output = importer + .read_kv_files_from_external_storage( + len, + kv_meta.get_name(), + ext_storage, + &Limiter::new(f64::INFINITY), + restore_config, + ) + .unwrap(); + assert_eq!(&buff[offset as _..(offset + len) as _], &output[..]); + } + + #[test] + fn test_do_download_kv_file() { + // create a sample kv file. + let (_temp_dir, backend, kv_meta, buff) = create_sample_external_kv_file().unwrap(); + + // create importer object. + let import_dir = tempfile::tempdir().unwrap(); + let (_, key_manager) = new_key_manager_for_test(); + let cfg = Config { + memory_use_ratio: 0.0, + ..Default::default() + }; + let importer = + SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1).unwrap(); + let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); + let ext_storage = { + let inner = importer.create_external_storage(&backend, false).unwrap(); + Arc::new(inner) + }; + let path = importer + .dir + .get_import_path( + format!("{}_{}", kv_meta.get_name(), kv_meta.get_range_offset()).as_str(), + ) + .unwrap(); + + // test do_download_kv_file(). + assert!(importer.import_support_download()); + let output = importer + .read_from_kv_file( + &kv_meta, + rewrite_rule, + ext_storage, + &backend, + &Limiter::new(f64::INFINITY), + ) + .unwrap(); + assert_eq!(*output, buff); + check_file_exists(&path.save, None); + + // test shrink nothing. + let shrint_files_cnt = importer.shrink_by_tick(); + assert_eq!(shrint_files_cnt, 0); + + // set expired instance in Dashmap. + for mut kv in importer.file_locks.iter_mut() { + kv.1 = Instant::now().sub(Duration::from_secs(601)); + } + let shrint_files_cnt = importer.shrink_by_tick(); + assert_eq!(shrint_files_cnt, 1); + check_file_not_exists(&path.save, None); + } + #[test] fn test_download_file_from_external_storage_for_sst() { // creates a sample SST file. @@ -1285,7 +1820,7 @@ mod tests { #[test] fn test_download_file_from_external_storage_for_kv() { - let (_temp_dir, backend, kv_meta) = create_sample_external_kv_file().unwrap(); + let (_temp_dir, backend, kv_meta, _) = create_sample_external_kv_file().unwrap(); let (_, key_manager) = new_key_manager_for_test(); let import_dir = tempfile::tempdir().unwrap(); @@ -2081,4 +2616,79 @@ mod tests { assert_eq!(sst_reader.compression_name(), expected_compression_name); } } + + #[test] + fn test_import_support_download() { + let import_dir = tempfile::tempdir().unwrap(); + let importer = + SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); + assert_eq!(importer.import_support_download(), false); + + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::new( + &Config { + memory_use_ratio: 0.0, + ..Default::default() + }, + import_dir, + None, + ApiVersion::V1, + ) + .unwrap(); + assert_eq!(importer.import_support_download(), true); + } + + #[test] + fn test_inc_mem_and_check() { + // create importer object. + let import_dir = tempfile::tempdir().unwrap(); + let importer = + SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); + assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); + + // test inc_mem_and_check() and dec_mem() successfully. + let meta = KvMeta { + length: 100, + ..Default::default() + }; + let check = importer.inc_mem_and_check(&meta); + assert!(check); + assert_eq!(importer.mem_use.load(Ordering::SeqCst), meta.get_length()); + + importer.dec_mem(meta.get_length()); + assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); + + // test inc_mem_and_check() failed. + let meta = KvMeta { + length: u64::MAX, + ..Default::default() + }; + let check = importer.inc_mem_and_check(&meta); + assert!(!check); + } + + #[test] + fn test_dashmap_lock() { + let import_dir = tempfile::tempdir().unwrap(); + let importer = + SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); + + let key = "file1"; + let value = (CacheKvFile::Mem(Arc::default()), Instant::now()); + let lock = importer.file_locks.entry(key.to_string()).or_insert(value); + + // test locked by try_entry() + let lock2 = importer.file_locks.try_entry(key.to_string()); + assert!(lock2.is_none()); + let lock2 = importer.file_locks.try_get(key); + assert!(lock2.is_locked()); + + // test unlocked by entry() + drop(lock); + let v = importer.file_locks.get(key).unwrap(); + assert_eq!(v.0.ref_count(), 1); + + let _buff = v.0.clone(); + assert_eq!(v.0.ref_count(), 2); + } } diff --git a/components/tikv_util/src/codec/stream_event.rs b/components/tikv_util/src/codec/stream_event.rs index b44d239197b..5b00cad6372 100644 --- a/components/tikv_util/src/codec/stream_event.rs +++ b/components/tikv_util/src/codec/stream_event.rs @@ -16,8 +16,8 @@ pub trait Iterator { fn value(&self) -> &[u8]; } -pub struct EventIterator { - buf: Vec, +pub struct EventIterator<'a> { + buf: &'a [u8], offset: usize, key_offset: usize, value_offset: usize, @@ -25,8 +25,8 @@ pub struct EventIterator { value_len: usize, } -impl EventIterator { - pub fn new(buf: Vec) -> EventIterator { +impl EventIterator<'_> { + pub fn new(buf: &[u8]) -> EventIterator<'_> { EventIterator { buf, offset: 0, @@ -44,7 +44,7 @@ impl EventIterator { } } -impl Iterator for EventIterator { +impl Iterator for EventIterator<'_> { fn next(&mut self) -> Result<()> { if self.valid() { self.key_len = self.get_size() as usize; @@ -141,7 +141,7 @@ mod tests { vals.push(val); } - let mut iter = EventIterator::new(event); + let mut iter = EventIterator::new(&event); let mut index = 0_usize; loop { diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index a0d2ab5f4ee..b28f745267e 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -5,6 +5,7 @@ use std::{ future::Future, path::PathBuf, sync::{Arc, Mutex}, + time::Duration, }; use collections::HashSet; @@ -39,6 +40,7 @@ use tikv_util::{ sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, }; +use tokio::time::sleep; use txn_types::{Key, WriteRef, WriteType}; use super::make_rpc_error; @@ -82,7 +84,7 @@ where ) -> ImportSstService { let props = tikv_util::thread_group::current_properties(); let threads = ThreadPoolBuilder::new() - .pool_size(cfg.num_threads) + .pool_size(cfg.num_threads + 1) .name_prefix("sst-importer") .after_start_wrapper(move || { tikv_util::thread_group::set_properties(props.clone()); @@ -93,6 +95,8 @@ where .create() .unwrap(); importer.start_switch_mode_check(&threads, engine.clone()); + threads.spawn_ok(Self::tick(importer.clone())); + ImportSstService { cfg, engine, @@ -105,6 +109,13 @@ where } } + async fn tick(importer: Arc) { + loop { + sleep(Duration::from_secs(10)).await; + importer.shrink_by_tick(); + } + } + fn acquire_lock(task_slots: &Arc>>, meta: &SstMeta) -> Result { let mut slots = task_slots.lock().unwrap(); let p = sst_meta_to_path(meta)?; @@ -462,6 +473,11 @@ where let mut req_default_size = 0_u64; let mut req_write_size = 0_u64; let mut range: Option = None; + let ext_storage = { + let inner = + importer.create_external_storage(req.get_storage_backend(), false)?; + Arc::from(inner) + }; for (i, meta) in metas.iter().enumerate() { let (reqs, req_size) = if meta.get_cf() == CF_DEFAULT { @@ -480,14 +496,19 @@ where context.clone(), ); - let temp_file = - importer.do_download_kv_file(meta, req.get_storage_backend(), &limiter)?; + let buff = importer.read_from_kv_file( + meta, + &rules[i], + Arc::clone(&ext_storage), + req.get_storage_backend(), + &limiter, + )?; let r: Option = importer.do_apply_kv_file( meta.get_start_key(), meta.get_end_key(), + meta.get_start_ts(), meta.get_restore_ts(), - temp_file, - &rules[i], + buff, &mut build_req_fn, )?; diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 93c07f2f411..ff01788c370 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -765,6 +765,7 @@ fn test_serde_custom_tikv_config() { num_threads: 123, stream_channel_window: 123, import_mode_timeout: ReadableDuration::secs(1453), + memory_use_ratio: 0.3, }; value.panic_when_unexpected_key_or_data = true; value.gc = GcConfig { From e52eb4d59c796a1f585c9d26660463d0e95a9d61 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Thu, 1 Dec 2022 11:06:01 +0800 Subject: [PATCH 0375/1149] fix the issue that TiKV starts failed. (#13863) close tikv/tikv#13862 Signed-off-by: joccau --- src/import/sst_service.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index b28f745267e..2bf0226136f 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -5,6 +5,7 @@ use std::{ future::Future, path::PathBuf, sync::{Arc, Mutex}, + thread::sleep, time::Duration, }; @@ -40,7 +41,6 @@ use tikv_util::{ sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, }; -use tokio::time::sleep; use txn_types::{Key, WriteRef, WriteType}; use super::make_rpc_error; @@ -95,7 +95,8 @@ where .create() .unwrap(); importer.start_switch_mode_check(&threads, engine.clone()); - threads.spawn_ok(Self::tick(importer.clone())); + let importer_clone = importer.clone(); + threads.spawn_ok(async { Self::tick(importer_clone) }); ImportSstService { cfg, @@ -109,9 +110,9 @@ where } } - async fn tick(importer: Arc) { + fn tick(importer: Arc) { loop { - sleep(Duration::from_secs(10)).await; + sleep(Duration::from_secs(10)); importer.shrink_by_tick(); } } From 0f1d45a8e6ff420ee76b08b43226b801c7d033a8 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 1 Dec 2022 13:06:01 +0800 Subject: [PATCH 0376/1149] tikv_kv: introduce raft extension (#13864) ref tikv/tikv#13827 So anything related to raft will call raft extension instead of router. This makes it easier to introduce new raftstore implementations. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/raftstore/src/store/region_meta.rs | 20 +- components/server/src/server.rs | 37 ++-- components/test_raftstore/src/server.rs | 28 +-- components/test_storage/src/sync_storage.rs | 8 +- components/tikv_kv/Cargo.toml | 1 + components/tikv_kv/src/lib.rs | 14 ++ components/tikv_kv/src/mock_engine.rs | 5 + components/tikv_kv/src/raft_extension.rs | 69 +++++++ components/tikv_kv/src/rocksdb_engine.rs | 32 +++- src/server/gc_worker/gc_worker.rs | 85 ++------- src/server/raft_client.rs | 109 ++++------- src/server/{raftkv.rs => raftkv/mod.rs} | 28 ++- src/server/raftkv/raft_extension.rs | 177 ++++++++++++++++++ src/server/resolve.rs | 44 ++--- src/server/server.rs | 53 +++--- src/server/service/debug.rs | 117 ++---------- src/server/service/kv.rs | 146 +++++---------- src/server/snap.rs | 38 +--- src/server/tablet_snap.rs | 38 +--- src/server/transport.rs | 37 ++-- tests/failpoints/cases/test_gc_metrics.rs | 3 - .../integrations/config/dynamic/gc_worker.rs | 10 +- tests/integrations/config/dynamic/snap.rs | 3 +- tests/integrations/server/raft_client.rs | 38 ++-- 25 files changed, 578 insertions(+), 563 deletions(-) create mode 100644 components/tikv_kv/src/raft_extension.rs rename src/server/{raftkv.rs => raftkv/mod.rs} (96%) create mode 100644 src/server/raftkv/raft_extension.rs diff --git a/Cargo.lock b/Cargo.lock index f1d02f06af9..1ccf961796e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6453,6 +6453,7 @@ dependencies = [ "pd_client", "prometheus", "prometheus-static-metric", + "raft", "raftstore", "slog", "slog-global", diff --git a/components/raftstore/src/store/region_meta.rs b/components/raftstore/src/store/region_meta.rs index 0370c7604ec..7de687e9dbb 100644 --- a/components/raftstore/src/store/region_meta.rs +++ b/components/raftstore/src/store/region_meta.rs @@ -60,7 +60,7 @@ pub struct RaftHardState { pub commit: u64, } -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq)] pub enum RaftStateRole { Follower, Candidate, @@ -178,12 +178,27 @@ pub struct RegionPeer { pub id: u64, pub store_id: u64, pub role: RaftPeerRole, + pub is_witness: bool, } impl PartialEq for RegionPeer { #[inline] fn eq(&self, other: &metapb::Peer) -> bool { - self.id == other.id && self.store_id == other.store_id && self.role == other.role + // May not be sufficent, but always correct. + let s: metapb::Peer = (*self).into(); + s == *other + } +} + +impl From for metapb::Peer { + fn from(p: RegionPeer) -> Self { + metapb::Peer { + id: p.id, + store_id: p.store_id, + role: p.role.into(), + is_witness: p.is_witness, + ..Default::default() + } } } @@ -247,6 +262,7 @@ impl RegionMeta { id: peer.get_id(), store_id: peer.get_store_id(), role: peer.get_role().into(), + is_witness: peer.is_witness, }); } let merge_state = if local_state.has_merge_state() { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 625db3e951f..3ce38d0c79e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -76,8 +76,8 @@ use raftstore::{ RaftBatchSystem, RaftRouter, StoreMeta, MULTI_FILES_SNAPSHOT_FEATURE, PENDING_MSG_CAP, }, memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, - AutoSplitController, CheckLeaderRunner, GlobalReplicationState, LocalReader, SnapManager, - SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, + AutoSplitController, CheckLeaderRunner, LocalReader, SnapManager, SnapManagerBuilder, + SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, }, RaftRouterCompactedEventSender, }; @@ -221,8 +221,7 @@ struct TikvServer { flow_info_sender: Option>, flow_info_receiver: Option>, system: Option>, - resolver: resolve::PdStoreAddrResolver, - state: Arc>, + resolver: Option, store_path: PathBuf, snap_mgr: Option, // Will be filled in `init_servers`. encryption_key_manager: Option>, @@ -260,8 +259,7 @@ struct Servers { backup_stream_scheduler: Option>, } -type LocalServer = - Server, resolve::PdStoreAddrResolver, LocalRaftKv>; +type LocalServer = Server>; type LocalRaftKv = RaftKv>; impl TikvServer @@ -323,8 +321,6 @@ where let background_worker = WorkerBuilder::new("background") .thread_count(thread_count) .create(); - let (resolver, state) = - resolve::new_resolver(Arc::clone(&pd_client), &background_worker, router.clone()); let mut coprocessor_host = Some(CoprocessorHost::new( router.clone(), @@ -375,8 +371,7 @@ where pd_client, router, system: Some(system), - resolver, - state, + resolver: None, store_path, snap_mgr: None, encryption_key_manager: None, @@ -655,14 +650,10 @@ where fn init_gc_worker( &mut self, - ) -> GcWorker< - RaftKv>, - RaftRouter, - > { + ) -> GcWorker>> { let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( engines.engine.clone(), - self.router.clone(), self.flow_info_sender.take().unwrap(), self.config.gc.clone(), self.pd_client.feature_gate().clone(), @@ -823,6 +814,13 @@ where )), ); + let (resolver, state) = resolve::new_resolver( + self.pd_client.clone(), + &self.background_worker, + storage.get_engine().raft_extension().clone(), + ); + self.resolver = Some(resolver); + ReplicaReadLockChecker::new(self.concurrency_manager.clone()) .register(self.coprocessor_host.as_mut().unwrap()); @@ -930,7 +928,7 @@ where raft_store.clone(), self.config.storage.api_version(), self.pd_client.clone(), - self.state.clone(), + state, self.background_worker.clone(), Some(health_service.clone()), None, @@ -953,8 +951,7 @@ where Arc::clone(&self.quota_limiter), ), coprocessor_v2::Endpoint::new(&self.config.coprocessor_v2), - self.router.clone(), - self.resolver.clone(), + self.resolver.clone().unwrap(), snap_mgr.clone(), gc_worker.clone(), check_leader_scheduler, @@ -1203,7 +1200,7 @@ where let debug_service = DebugService::new( engines.engines.clone(), servers.server.get_debug_thread_pool().clone(), - self.router.clone(), + engines.engine.raft_extension().clone(), self.cfg_controller.as_ref().unwrap().clone(), ); if servers @@ -1242,7 +1239,7 @@ where .start( servers.node.id(), self.pd_client.clone(), - self.resolver.clone(), + self.resolver.clone().unwrap(), self.security_mgr.clone(), &self.config.pessimistic_txn, ) diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 1b532932b30..ea9868afdbd 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -33,7 +33,7 @@ use pd_client::PdClient; use raftstore::{ coprocessor::{CoprocessorHost, RegionInfoAccessor}, errors::Error as RaftError, - router::{LocalReadRouter, RaftStoreBlackHole, RaftStoreRouter, ServerRaftStoreRouter}, + router::{LocalReadRouter, RaftStoreRouter, ServerRaftStoreRouter}, store::{ fsm::{store::StoreMeta, ApplyRouter, RaftBatchSystem, RaftRouter}, msg::RaftCmdExtraOpts, @@ -64,7 +64,7 @@ use tikv::{ }, storage::{ self, - kv::SnapContext, + kv::{FakeExtension, SnapContext}, txn::flow_controller::{EngineFlowController, FlowController}, Engine, }, @@ -84,10 +84,11 @@ use super::*; use crate::Config; type SimulateStoreTransport = SimulateTransport>; -type SimulateServerTransport = - SimulateTransport>; pub type SimulateEngine = RaftKv; +type SimulateRaftExtension = ::RaftExtension; +type SimulateServerTransport = + SimulateTransport>; #[derive(Default, Clone)] pub struct AddressMap { @@ -125,12 +126,12 @@ impl StoreAddrResolver for AddressMap { struct ServerMeta { node: Node, - server: Server, + server: Server, sim_router: SimulateStoreTransport, sim_trans: SimulateServerTransport, raw_router: RaftRouter, raw_apply_router: ApplyRouter, - gc_worker: GcWorker, SimulateStoreTransport>, + gc_worker: GcWorker>, rts_worker: Option>, rsmeter_cleanup: Box, } @@ -152,7 +153,7 @@ pub struct ServerCluster { snap_paths: HashMap, snap_mgrs: HashMap, pd_client: Arc, - raft_client: RaftClient, + raft_client: RaftClient, concurrency_managers: HashMap, env: Arc, pub causal_ts_providers: HashMap>, @@ -176,7 +177,7 @@ impl ServerCluster { Arc::default(), security_mgr.clone(), map.clone(), - RaftStoreBlackHole, + FakeExtension, worker.scheduler(), Arc::new(ThreadLoadPool::with_threshold(usize::MAX)), ); @@ -218,7 +219,7 @@ impl ServerCluster { pub fn get_gc_worker( &self, node_id: u64, - ) -> &GcWorker, SimulateStoreTransport> { + ) -> &GcWorker> { &self.metas.get(&node_id).unwrap().gc_worker } @@ -334,7 +335,6 @@ impl ServerCluster { let (tx, _rx) = std::sync::mpsc::channel(); let mut gc_worker = GcWorker::new( engine.clone(), - sim_router.clone(), tx, cfg.gc.clone(), Default::default(), @@ -353,7 +353,7 @@ impl ServerCluster { let rts_endpoint = resolved_ts::Endpoint::new( &cfg.resolved_ts, rts_worker.scheduler(), - raft_router.clone(), + raft_router, store_meta.clone(), self.pd_client.clone(), concurrency_manager.clone(), @@ -401,6 +401,7 @@ impl ServerCluster { cfg.quota.max_delay_duration, cfg.quota.enable_auto_tune, )); + let extension = engine.raft_extension().clone(); let store = create_raft_storage::<_, _, _, F, _>( engine, &cfg.storage, @@ -445,7 +446,7 @@ impl ServerCluster { // Create pd client, snapshot manager, server. let (resolver, state) = - resolve::new_resolver(Arc::clone(&self.pd_client), &bg_worker, router.clone()); + resolve::new_resolver(Arc::clone(&self.pd_client), &bg_worker, extension.clone()); let snap_mgr = SnapManagerBuilder::default() .max_write_bytes_per_sec(cfg.server.snap_max_write_bytes_per_sec.0 as i64) .max_total_size(cfg.server.snap_max_total_size.0) @@ -483,7 +484,7 @@ impl ServerCluster { let debug_service = DebugService::new( engines.clone(), debug_thread_handle, - raft_router, + extension, ConfigController::default(), ); @@ -520,7 +521,6 @@ impl ServerCluster { store.clone(), copr.clone(), copr_v2.clone(), - sim_router.clone(), resolver.clone(), snap_mgr.clone(), gc_worker.clone(), diff --git a/components/test_storage/src/sync_storage.rs b/components/test_storage/src/sync_storage.rs index fa53688ea75..3d6e1e139e5 100644 --- a/components/test_storage/src/sync_storage.rs +++ b/components/test_storage/src/sync_storage.rs @@ -12,10 +12,7 @@ use kvproto::{ kvrpcpb::{ChecksumAlgorithm, Context, GetRequest, KeyRange, LockInfo, RawGetRequest}, metapb, }; -use raftstore::{ - coprocessor::{region_info_accessor::MockRegionInfoProvider, RegionInfoProvider}, - router::RaftStoreBlackHole, -}; +use raftstore::coprocessor::{region_info_accessor::MockRegionInfoProvider, RegionInfoProvider}; use tikv::{ server::gc_worker::{AutoGcConfig, GcConfig, GcSafePointProvider, GcWorker}, storage::{ @@ -106,7 +103,7 @@ impl SyncTestStorageBuilder { /// Only used for test purpose. #[derive(Clone)] pub struct SyncTestStorage { - gc_worker: GcWorker, + gc_worker: GcWorker, store: Storage, } @@ -123,7 +120,6 @@ impl SyncTestStorage { let (tx, _rx) = std::sync::mpsc::channel(); let mut gc_worker = GcWorker::new( storage.get_engine(), - RaftStoreBlackHole, tx, config, Default::default(), diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 6ee74371674..8aa64d0def6 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -41,6 +41,7 @@ log_wrappers = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" +raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raftstore = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 07cae3ace65..f78b2243331 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -8,6 +8,7 @@ #![feature(bound_map)] #![feature(min_specialization)] #![feature(type_alias_impl_trait)] +#![feature(associated_type_defaults)] #[macro_use(fail_point)] extern crate fail; @@ -18,6 +19,7 @@ mod btree_engine; mod cursor; pub mod metrics; mod mock_engine; +mod raft_extension; mod raftstore_impls; mod rocksdb_engine; mod stats; @@ -55,6 +57,7 @@ pub use self::{ btree_engine::{BTreeEngine, BTreeEngineIterator, BTreeEngineSnapshot}, cursor::{Cursor, CursorBuilder}, mock_engine::{ExpectedWrite, MockEngineBuilder}, + raft_extension::{FakeExtension, RaftExtension}, rocksdb_engine::{RocksEngine, RocksSnapshot}, stats::{ CfStatistics, FlowStatistics, FlowStatsReporter, StageLatencyStats, Statistics, @@ -306,6 +309,12 @@ pub trait Engine: Send + Clone + 'static { /// Currently, only multi-rocksdb version will return `None`. fn kv_engine(&self) -> Option; + type RaftExtension: raft_extension::RaftExtension = FakeExtension; + /// Get the underlying raft extension. + fn raft_extension(&self) -> &Self::RaftExtension { + unimplemented!() + } + /// Write modifications into internal local engine directly. /// /// region_modifies records each region's modifications. @@ -418,6 +427,11 @@ pub trait Engine: Send + Clone + 'static { fn end_flashback(&self, _ctx: &Context) -> BoxFuture<'static, Result<()>> { Box::pin(futures::future::ready(Ok(()))) } + + /// Application may operate on local engine directly, the method is to hint + /// the engine there is probably a notable difference in range, so + /// engine may update its statistics. + fn hint_change_in_range(&self, _start_key: Vec, _end_key: Vec) {} } /// A Snapshot is a consistent view of the underlying engine at a given point in diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index f3d89940f4e..dc812e84d93 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -153,6 +153,11 @@ impl Engine for MockEngine { self.base.kv_engine() } + type RaftExtension = ::RaftExtension; + fn raft_extension(&self) -> &Self::RaftExtension { + self.base.raft_extension() + } + fn modify_on_kv_engine(&self, region_modifies: HashMap>) -> Result<()> { self.base.modify_on_kv_engine(region_modifies) } diff --git a/components/tikv_kv/src/raft_extension.rs b/components/tikv_kv/src/raft_extension.rs new file mode 100644 index 00000000000..26c9e687ef6 --- /dev/null +++ b/components/tikv_kv/src/raft_extension.rs @@ -0,0 +1,69 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! TiKV uses raft under the hook to provide consistency between replicas. +//! Though technically, `Engine` trait should hide the details of raft, but in +//! some cases it's unavoidable to access raft interface somehow. This module +//! supports the access pattern via extension. + +use futures::future::BoxFuture; +use kvproto::{ + metapb::{Region, RegionEpoch}, + raft_serverpb::RaftMessage, +}; +use raft::SnapshotStatus; +use raftstore::store::region_meta::RegionMeta; + +use crate::Result; + +/// An interface to provide direct access to raftstore layer. +pub trait RaftExtension: Clone + Send { + /// Feed the message to the raft group. + /// + /// If it's a `key_message` is true, it will log a warning if the message + /// failed to send. + fn feed(&self, _msg: RaftMessage, _key_message: bool) {} + + /// Retport the message is rejected by the remote peer. + fn report_reject_message(&self, _region_id: u64, _from_peer_id: u64) {} + + /// Report the target peer is unreachable. + fn report_peer_unreachable(&self, _region_id: u64, _to_peer_id: u64) {} + + /// Report the target store is unreachable. + fn report_store_unreachable(&self, _store_id: u64) {} + + /// Report the status of snapshot. + fn report_snapshot_status(&self, _region_id: u64, _to_peer_id: u64, _status: SnapshotStatus) {} + + /// Report the address of a store is resolved. + fn report_resolved(&self, _store_id: u64, _group_id: u64) {} + + /// Split the region with the given keys. + /// + /// Use `BoxFuture` for simplicity as it's not performance critical path. + fn split( + &self, + _region_id: u64, + _region_epoch: RegionEpoch, + _split_keys: Vec>, + _source: String, + ) -> BoxFuture<'static, Result>> { + Box::pin(async move { Err(box_err!("raft split is not supported")) }) + } + + /// Get the region meta of the given region. + fn query_region(&self, _region_id: u64) -> BoxFuture<'static, Result> { + Box::pin(async move { Err(box_err!("query region is not supported")) }) + } + + /// Ask the raft group to do a consistency check. + fn check_consistency(&self, _region_id: u64) -> BoxFuture<'static, Result<()>> { + Box::pin(async move { Err(box_err!("consistency check is not supported")) }) + } +} + +/// An extension that does nothing or panic on all operations. +#[derive(Clone)] +pub struct FakeExtension; + +impl RaftExtension for FakeExtension {} diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 565ea0accaa..26e2c735254 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -34,7 +34,7 @@ use super::{ write_modifies, Callback, DummySnapshotExt, Engine, Error, ErrorInner, Iterator as EngineIterator, Modify, Result, SnapContext, Snapshot, WriteData, }; -use crate::{OnAppliedCb, WriteEvent}; +use crate::{FakeExtension, OnAppliedCb, RaftExtension, WriteEvent}; // Duplicated in test_engine_builder const TEMP_DIR: &str = ""; @@ -87,12 +87,26 @@ impl Drop for RocksEngineCore { /// /// This is intended for **testing use only**. #[derive(Clone)] -pub struct RocksEngine { +pub struct RocksEngine { core: Arc>, sched: Scheduler, engines: Engines, not_leader: Arc, coprocessor: CoprocessorHost, + ext: RE, +} + +impl RocksEngine { + pub fn with_raft_extension(self, ext: NRE) -> RocksEngine { + RocksEngine { + core: self.core, + sched: self.sched, + engines: self.engines, + not_leader: self.not_leader, + coprocessor: self.coprocessor, + ext, + } + } } impl RocksEngine { @@ -132,9 +146,12 @@ impl RocksEngine { not_leader: Arc::new(AtomicBool::new(false)), engines, coprocessor: CoprocessorHost::default(), + ext: FakeExtension, }) } +} +impl RocksEngine { pub fn trigger_not_leader(&self) { self.not_leader.store(true, Ordering::SeqCst); } @@ -196,13 +213,13 @@ impl RocksEngine { } } -impl Display for RocksEngine { +impl Display for RocksEngine { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "RocksDB") } } -impl Debug for RocksEngine { +impl Debug for RocksEngine { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, @@ -212,7 +229,7 @@ impl Debug for RocksEngine { } } -impl Engine for RocksEngine { +impl Engine for RocksEngine { type Snap = Arc; type Local = BaseRocksEngine; @@ -220,6 +237,11 @@ impl Engine for RocksEngine { Some(self.engines.kv.clone()) } + type RaftExtension = RE; + fn raft_extension(&self) -> &Self::RaftExtension { + &self.ext + } + fn modify_on_kv_engine(&self, region_modifies: HashMap>) -> Result<()> { let modifies = region_modifies.into_values().flatten().collect(); write_modifies(&self.engines.kv, modifies) diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 0a162a58230..1ccac8860c6 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -24,7 +24,7 @@ use file_system::{IoType, WithIoType}; use futures::executor::block_on; use kvproto::{kvrpcpb::Context, metapb::Region}; use pd_client::{FeatureGate, PdClient}; -use raftstore::{coprocessor::RegionInfoProvider, router::RaftStoreRouter, store::msg::StoreMsg}; +use raftstore::coprocessor::RegionInfoProvider; use tikv_kv::{CfStatistics, CursorBuilder, Modify, SnapContext}; use tikv_util::{ config::{Tracker, VersionTrack}, @@ -174,15 +174,10 @@ where } /// Used to perform GC operations on the engine. -pub struct GcRunner -where - E: Engine, - RR: RaftStoreRouter, -{ +pub struct GcRunner { store_id: u64, engine: E, - raft_store_router: RR, flow_info_sender: Sender, /// Used to limit the write flow of GC. @@ -283,15 +278,10 @@ fn init_snap_ctx(store_id: u64, region: &Region) -> Context { ctx } -impl GcRunner -where - E: Engine, - RR: RaftStoreRouter, -{ +impl GcRunner { pub fn new( store_id: u64, engine: E, - raft_store_router: RR, flow_info_sender: Sender, cfg_tracker: Tracker, cfg: GcConfig, @@ -304,7 +294,6 @@ where Self { store_id, engine, - raft_store_router, flow_info_sender, limiter, cfg, @@ -797,15 +786,10 @@ where .send(FlowInfo::AfterUnsafeDestroyRange(ctx.region_id)) .unwrap(); - self.raft_store_router - .send_store_msg(StoreMsg::ClearRegionSizeInRange { - start_key: start_key.as_encoded().to_vec(), - end_key: end_key.as_encoded().to_vec(), - }) - .unwrap_or_else(|e| { - // Warn and ignore it. - warn!("unsafe destroy range: failed sending ClearRegionSizeInRange"; "err" => ?e); - }); + self.engine.hint_change_in_range( + start_key.as_encoded().to_vec(), + end_key.as_encoded().to_vec(), + ); } else { let cfs = &[CF_LOCK, CF_DEFAULT, CF_WRITE]; let keys = vec![start_key.clone(), end_key.clone()]; @@ -889,11 +873,7 @@ where } } -impl Runnable for GcRunner -where - E: Engine, - RR: RaftStoreRouter, -{ +impl Runnable for GcRunner { type Task = GcTask; #[inline] @@ -1072,16 +1052,12 @@ pub fn sync_gc( } /// Used to schedule GC operations. -pub struct GcWorker +pub struct GcWorker where E: Engine, - RR: RaftStoreRouter + 'static, { engine: E, - /// `raft_store_router` is useful to signal raftstore clean region size - /// informations. - raft_store_router: RR, /// Used to signal unsafe destroy range is executed. flow_info_sender: Option>, region_info_provider: Arc, @@ -1098,18 +1074,13 @@ where feature_gate: FeatureGate, } -impl Clone for GcWorker -where - E: Engine, - RR: RaftStoreRouter, -{ +impl Clone for GcWorker { #[inline] fn clone(&self) -> Self { self.refs.fetch_add(1, Ordering::SeqCst); Self { engine: self.engine.clone(), - raft_store_router: self.raft_store_router.clone(), flow_info_sender: self.flow_info_sender.clone(), config_manager: self.config_manager.clone(), refs: self.refs.clone(), @@ -1122,11 +1093,7 @@ where } } -impl Drop for GcWorker -where - E: Engine, - RR: RaftStoreRouter + 'static, -{ +impl Drop for GcWorker { #[inline] fn drop(&mut self) { let refs = self.refs.fetch_sub(1, Ordering::SeqCst); @@ -1142,25 +1109,19 @@ where } } -impl GcWorker -where - E: Engine, - RR: RaftStoreRouter, -{ +impl GcWorker { pub fn new( engine: E, - raft_store_router: RR, flow_info_sender: Sender, cfg: GcConfig, feature_gate: FeatureGate, region_info_provider: Arc, - ) -> GcWorker { + ) -> Self { let worker_builder = WorkerBuilder::new("gc-worker").pending_capacity(GC_MAX_PENDING_TASKS); let worker = worker_builder.create().lazy_build("gc-worker"); let worker_scheduler = worker.scheduler(); GcWorker { engine, - raft_store_router, flow_info_sender: Some(flow_info_sender), config_manager: GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg))), refs: Arc::new(AtomicUsize::new(1)), @@ -1211,7 +1172,6 @@ where let runner = GcRunner::new( store_id, self.engine.clone(), - self.raft_store_router.clone(), self.flow_info_sender.take().unwrap(), self.config_manager.0.clone().tracker("gc-woker".to_owned()), self.config_manager.value().clone(), @@ -1468,12 +1428,9 @@ mod tests { use futures::executor::block_on; use kvproto::{kvrpcpb::ApiVersion, metapb::Peer}; use raft::StateRole; - use raftstore::{ - coprocessor::{ - region_info_accessor::{MockRegionInfoProvider, RegionInfoAccessor}, - CoprocessorHost, RegionChangeEvent, - }, - router::RaftStoreBlackHole, + use raftstore::coprocessor::{ + region_info_accessor::{MockRegionInfoProvider, RegionInfoAccessor}, + CoprocessorHost, RegionChangeEvent, }; use tempfile::Builder; use tikv_kv::Snapshot; @@ -1620,7 +1577,6 @@ mod tests { let mut gc_worker = GcWorker::new( engine, - RaftStoreBlackHole, tx, GcConfig::default(), gate, @@ -1797,7 +1753,6 @@ mod tests { let mut gc_worker = GcWorker::new( prefixed_engine.clone(), - RaftStoreBlackHole, tx, GcConfig::default(), feature_gate, @@ -1889,7 +1844,6 @@ mod tests { let mut runner = GcRunner::new( store_id, prefixed_engine.clone(), - RaftStoreBlackHole, tx, GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) .0 @@ -1952,7 +1906,6 @@ mod tests { let mut runner = GcRunner::new( store_id, prefixed_engine.clone(), - RaftStoreBlackHole, tx, GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) .0 @@ -2054,7 +2007,6 @@ mod tests { let mut runner = GcRunner::new( 1, prefixed_engine.clone(), - RaftStoreBlackHole, tx, GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) .0 @@ -2183,7 +2135,6 @@ mod tests { let mut gc_worker = GcWorker::new( engine.clone(), - RaftStoreBlackHole, tx, GcConfig::default(), gate, @@ -2313,7 +2264,7 @@ mod tests { ) -> ( MultiRocksEngine, Arc, - GcRunner, + GcRunner, Vec, mpsc::Receiver, ) { @@ -2369,7 +2320,6 @@ mod tests { let gc_runner = GcRunner::new( store_id, engine.clone(), - RaftStoreBlackHole, tx, GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) .0 @@ -2548,7 +2498,6 @@ mod tests { let mut gc_runner = GcRunner::new( store_id, engine.clone(), - RaftStoreBlackHole, tx, GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) .0 diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 0230174fb42..fa12600bb98 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -3,7 +3,7 @@ use std::{ collections::VecDeque, ffi::CString, - marker::{PhantomData, Unpin}, + marker::Unpin, mem, pin::Pin, result, @@ -16,7 +16,6 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::queue::ArrayQueue; -use engine_traits::KvEngine; use futures::{ channel::oneshot, compat::Future01CompatExt, @@ -35,8 +34,9 @@ use kvproto::{ }; use protobuf::Message; use raft::SnapshotStatus; -use raftstore::{errors::DiscardReason, router::RaftStoreRouter}; +use raftstore::errors::DiscardReason; use security::SecurityManager; +use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, lru::LruCache, @@ -346,18 +346,16 @@ impl Buffer for MessageBuffer { } /// Reporter reports whether a snapshot is sent successfully. -struct SnapshotReporter { - raft_router: T, - engine: PhantomData, +struct SnapshotReporter { + raft_router: R, region_id: u64, to_peer_id: u64, to_store_id: u64, } -impl SnapshotReporter +impl SnapshotReporter where - T: RaftStoreRouter + 'static, - E: KvEngine, + R: RaftExtension + 'static, { pub fn report(&self, status: SnapshotStatus) { debug!( @@ -374,43 +372,21 @@ where .inc(); } - if let Err(e) = - self.raft_router - .report_snapshot_status(self.region_id, self.to_peer_id, status) - { - error!(?e; - "report snapshot to peer failes"; - "to_peer_id" => self.to_peer_id, - "to_store_id" => self.to_store_id, - "region_id" => self.region_id, - ); - } + self.raft_router + .report_snapshot_status(self.region_id, self.to_peer_id, status); } } -fn report_unreachable(router: &R, msg: &RaftMessage) -where - R: RaftStoreRouter, - E: KvEngine, -{ +fn report_unreachable(router: &impl RaftExtension, msg: &RaftMessage) { let to_peer = msg.get_to_peer(); if msg.get_message().has_snapshot() { let store = to_peer.store_id.to_string(); REPORT_FAILURE_MSG_COUNTER .with_label_values(&["snapshot", &*store]) .inc(); - let res = router.report_snapshot_status(msg.region_id, to_peer.id, SnapshotStatus::Failure); - if let Err(e) = res { - error!( - ?e; - "reporting snapshot to peer fails"; - "to_peer_id" => to_peer.id, - "to_store_id" => to_peer.store_id, - "region_id" => msg.region_id, - ); - } + router.report_snapshot_status(msg.region_id, to_peer.id, SnapshotStatus::Failure); } - let _ = router.report_unreachable(msg.region_id, to_peer.id); + router.report_peer_unreachable(msg.region_id, to_peer.id); } fn grpc_error_is_unimplemented(e: &grpcio::Error) -> bool { @@ -422,7 +398,7 @@ fn grpc_error_is_unimplemented(e: &grpcio::Error) -> bool { } /// Struct tracks the lifetime of a `raft` or `batch_raft` RPC. -struct AsyncRaftSender { +struct AsyncRaftSender { sender: ClientCStreamSender, queue: Arc, buffer: B, @@ -430,23 +406,20 @@ struct AsyncRaftSender { snap_scheduler: Scheduler, addr: String, flush_timeout: Option, - _engine: PhantomData, } -impl AsyncRaftSender +impl AsyncRaftSender where - R: RaftStoreRouter + 'static, + R: RaftExtension + 'static, B: Buffer, - E: KvEngine, { - fn new_snapshot_reporter(&self, msg: &RaftMessage) -> SnapshotReporter { + fn new_snapshot_reporter(&self, msg: &RaftMessage) -> SnapshotReporter { let region_id = msg.get_region_id(); let to_peer_id = msg.get_to_peer().get_id(); let to_store_id = msg.get_to_peer().get_store_id(); SnapshotReporter { raft_router: self.router.clone(), - engine: PhantomData, region_id, to_peer_id, to_store_id, @@ -499,11 +472,10 @@ where } } -impl Future for AsyncRaftSender +impl Future for AsyncRaftSender where - R: RaftStoreRouter + Unpin + 'static, + R: RaftExtension + Unpin + 'static, B: Buffer + Unpin, - E: KvEngine, { type Output = grpcio::Result<()>; @@ -564,18 +536,17 @@ enum RaftCallRes { Disconnected, } -struct RaftCall { - sender: AsyncRaftSender, +struct RaftCall { + sender: AsyncRaftSender, receiver: ClientCStreamReceiver, lifetime: Option>, store_id: u64, } -impl RaftCall +impl RaftCall where - R: RaftStoreRouter + Unpin + 'static, + R: RaftExtension + Unpin + 'static, B: Buffer + Unpin, - E: KvEngine, { async fn poll(&mut self) { let res = futures::join!(&mut self.sender, &mut self.receiver); @@ -640,18 +611,16 @@ impl ConnectionBuilder { /// StreamBackEnd watches lifetime of a connection and handles reconnecting, /// spawn new RPC. -struct StreamBackEnd { +struct StreamBackEnd { store_id: u64, queue: Arc, builder: ConnectionBuilder, - engine: PhantomData, } -impl StreamBackEnd +impl StreamBackEnd where S: StoreAddrResolver, - R: RaftStoreRouter + Unpin + 'static, - E: KvEngine, + R: RaftExtension + Unpin + 'static, { fn resolve(&self) -> impl Future> { let (tx, rx) = oneshot::channel(); @@ -735,7 +704,6 @@ where snap_scheduler: self.builder.snap_scheduler.clone(), addr, flush_timeout: None, - _engine: PhantomData::, }, receiver: batch_stream, lifetime: Some(tx), @@ -760,7 +728,6 @@ where snap_scheduler: self.builder.snap_scheduler.clone(), addr, flush_timeout: None, - _engine: PhantomData::, }, receiver: stream, lifetime: Some(tx), @@ -802,14 +769,13 @@ async fn maybe_backoff(backoff: Duration, last_wake_time: &mut Option) /// 4. fallback to legacy API if incompatible /// /// Every failure during the process should trigger retry automatically. -async fn start( - back_end: StreamBackEnd, +async fn start( + back_end: StreamBackEnd, conn_id: usize, pool: Arc>, ) where S: StoreAddrResolver + Send, - R: RaftStoreRouter + Unpin + Send + 'static, - E: KvEngine, + R: RaftExtension + Unpin + Send + 'static, { let mut last_wake_time = None; let mut first_time = true; @@ -865,7 +831,7 @@ async fn start( back_end .builder .router - .broadcast_unreachable(back_end.store_id); + .report_store_unreachable(back_end.store_id); } continue; } else { @@ -896,7 +862,7 @@ async fn start( back_end .builder .router - .broadcast_unreachable(back_end.store_id); + .report_store_unreachable(back_end.store_id); addr_channel = None; first_time = false; } @@ -955,24 +921,22 @@ struct CachedQueue { /// } /// raft_client.flush(); /// ``` -pub struct RaftClient { +pub struct RaftClient { pool: Arc>, cache: LruCache<(u64, usize), CachedQueue>, need_flush: Vec<(u64, usize)>, full_stores: Vec<(u64, usize)>, future_pool: Arc>, builder: ConnectionBuilder, - engine: PhantomData, last_hash: (u64, u64), } -impl RaftClient +impl RaftClient where S: StoreAddrResolver + Send + 'static, - R: RaftStoreRouter + Unpin + Send + 'static, - E: KvEngine, + R: RaftExtension + Unpin + Send + 'static, { - pub fn new(builder: ConnectionBuilder) -> RaftClient { + pub fn new(builder: ConnectionBuilder) -> Self { let future_pool = Arc::new( yatp::Builder::new(thd_name!("raft-stream")) .max_thread_count(1) @@ -985,7 +949,6 @@ where full_stores: vec![], future_pool, builder, - engine: PhantomData::, last_hash: (0, 0), } } @@ -1018,7 +981,6 @@ where store_id, queue: queue.clone(), builder: self.builder.clone(), - engine: PhantomData::, }; self.future_pool .spawn(start(back_end, conn_id, self.pool.clone())); @@ -1170,7 +1132,7 @@ where } } -impl Clone for RaftClient +impl Clone for RaftClient where S: Clone, R: Clone, @@ -1183,7 +1145,6 @@ where full_stores: vec![], future_pool: self.future_pool.clone(), builder: self.builder.clone(), - engine: PhantomData::, last_hash: (0, 0), } } diff --git a/src/server/raftkv.rs b/src/server/raftkv/mod.rs similarity index 96% rename from src/server/raftkv.rs rename to src/server/raftkv/mod.rs index b6890262007..6c7169d043c 100644 --- a/src/server/raftkv.rs +++ b/src/server/raftkv/mod.rs @@ -1,5 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +mod raft_extension; + // #[PerformanceCriticalPath] use std::{ borrow::Cow, @@ -34,6 +36,7 @@ use raft::{ eraftpb::{self, MessageType}, StateRole, }; +pub use raft_extension::RaftRouterWrap; use raftstore::{ coprocessor::{ dispatcher::BoxReadIndexObserver, Coprocessor, CoprocessorHost, ReadIndexObserver, @@ -42,7 +45,7 @@ use raftstore::{ router::{LocalReadRouter, RaftStoreRouter}, store::{ self, Callback as StoreCallback, RaftCmdExtraOpts, ReadIndexContext, ReadResponse, - RegionSnapshot, WriteResponse, + RegionSnapshot, StoreMsg, WriteResponse, }, }; use thiserror::Error; @@ -294,7 +297,7 @@ where E: KvEngine, S: RaftStoreRouter + LocalReadRouter + 'static, { - router: S, + router: RaftRouterWrap, engine: E, txn_extra_scheduler: Option>, region_leaders: Arc>>, @@ -308,7 +311,7 @@ where /// Create a RaftKv using specified configuration. pub fn new(router: S, engine: E, region_leaders: Arc>>) -> RaftKv { RaftKv { - router, + router: RaftRouterWrap::new(router), engine, txn_extra_scheduler: None, region_leaders, @@ -359,6 +362,12 @@ where Some(self.engine.clone()) } + type RaftExtension = RaftRouterWrap; + #[inline] + fn raft_extension(&self) -> &Self::RaftExtension { + &self.router + } + fn modify_on_kv_engine( &self, mut region_modifies: HashMap>, @@ -635,7 +644,7 @@ where // and scheduling operations for this region when propose/apply before we // start the actual data flashback transaction command in the next phase. let req = new_flashback_req(ctx, AdminCmdType::PrepareFlashback); - exec_admin(&self.router, req) + exec_admin(&*self.router, req) } fn end_flashback(&self, ctx: &Context) -> BoxFuture<'static, kv::Result<()>> { @@ -643,7 +652,16 @@ where // in `RegionLocalState` and region's meta, and when that admin cmd is applied, // will update the memory state of the flashback let req = new_flashback_req(ctx, AdminCmdType::FinishFlashback); - exec_admin(&self.router, req) + exec_admin(&*self.router, req) + } + + fn hint_change_in_range(&self, start_key: Vec, end_key: Vec) { + self.router + .send_store_msg(StoreMsg::ClearRegionSizeInRange { start_key, end_key }) + .unwrap_or_else(|e| { + // Warn and ignore it. + warn!("unsafe destroy range: failed sending ClearRegionSizeInRange"; "err" => ?e); + }); } } diff --git a/src/server/raftkv/raft_extension.rs b/src/server/raftkv/raft_extension.rs new file mode 100644 index 00000000000..d3178842489 --- /dev/null +++ b/src/server/raftkv/raft_extension.rs @@ -0,0 +1,177 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + marker::PhantomData, + ops::{Deref, DerefMut}, +}; + +use futures::future::BoxFuture; +use kvproto::{ + metapb::{Region, RegionEpoch}, + raft_cmdpb::{AdminCmdType, RaftCmdRequest}, + raft_serverpb::RaftMessage, +}; +use raft::SnapshotStatus; +use raftstore::{ + router::RaftStoreRouter, + store::{ + region_meta::{RaftStateRole, RegionMeta}, + CasualMessage, + }, +}; +use tikv_util::future::paired_future_callback; + +use crate::storage::kv; + +#[derive(Clone)] +pub struct RaftRouterWrap { + router: S, + _phantom: PhantomData, +} + +impl RaftRouterWrap { + pub fn new(router: S) -> Self { + Self { + router, + _phantom: PhantomData, + } + } +} + +impl Deref for RaftRouterWrap { + type Target = S; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.router + } +} + +impl DerefMut for RaftRouterWrap { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.router + } +} + +impl tikv_kv::RaftExtension for RaftRouterWrap +where + S: RaftStoreRouter + 'static, + E: engine_traits::KvEngine, +{ + #[inline] + fn feed(&self, msg: RaftMessage, key_message: bool) { + let region_id = msg.get_region_id(); + let msg_ty = msg.get_message().get_msg_type(); + // Channel full and region not found are ignored unless it's a key message. + if let Err(e) = self.router.send_raft_msg(msg) && key_message { + error!("failed to send raft message"; "region_id" => region_id, "msg_ty" => ?msg_ty, "err" => ?e); + } + } + + #[inline] + fn report_reject_message(&self, region_id: u64, from_peer_id: u64) { + let m = CasualMessage::RejectRaftAppend { + peer_id: from_peer_id, + }; + let _ = self.router.send_casual_msg(region_id, m); + } + + #[inline] + fn report_peer_unreachable(&self, region_id: u64, to_peer_id: u64) { + let _ = self.router.report_unreachable(region_id, to_peer_id); + } + + #[inline] + fn report_store_unreachable(&self, store_id: u64) { + self.router.broadcast_unreachable(store_id); + } + + #[inline] + fn report_snapshot_status(&self, region_id: u64, to_peer_id: u64, status: SnapshotStatus) { + if let Err(e) = self + .router + .report_snapshot_status(region_id, to_peer_id, status) + { + error!(?e; + "report snapshot to peer failes"; + "to_peer_id" => to_peer_id, + "status" => ?status, + "region_id" => region_id, + ); + } + } + + #[inline] + fn report_resolved(&self, store_id: u64, group_id: u64) { + self.router.report_resolved(store_id, group_id); + } + + #[inline] + fn split( + &self, + region_id: u64, + region_epoch: RegionEpoch, + split_keys: Vec>, + source: String, + ) -> BoxFuture<'static, kv::Result>> { + let (cb, rx) = paired_future_callback(); + let req = CasualMessage::SplitRegion { + region_epoch, + split_keys, + callback: raftstore::store::Callback::write(cb), + source: source.into(), + }; + let res = self.router.send_casual_msg(region_id, req); + Box::pin(async move { + res?; + let mut admin_resp = box_try!(rx.await); + super::check_raft_cmd_response(&mut admin_resp.response)?; + let regions = admin_resp + .response + .mut_admin_response() + .mut_splits() + .take_regions(); + Ok(regions.into()) + }) + } + + /// Get the region meta of the given region. + #[inline] + fn query_region(&self, region_id: u64) -> BoxFuture<'static, kv::Result> { + let (cb, rx) = paired_future_callback(); + let res = self + .router + .send_casual_msg(region_id, CasualMessage::AccessPeer(cb)); + Box::pin(async move { + res?; + Ok(box_try!(rx.await)) + }) + } + + /// Ask the raft group to do a consistency check. + fn check_consistency(&self, region_id: u64) -> BoxFuture<'static, kv::Result<()>> { + let region = self.query_region(region_id); + let router = self.router.clone(); + Box::pin(async move { + let meta: RegionMeta = region.await?; + let leader_id = meta.raft_status.soft_state.leader_id; + let mut leader = None; + for peer in meta.region_state.peers { + if peer.id == leader_id { + leader = Some(peer.into()); + } + } + if meta.raft_status.soft_state.raft_state != RaftStateRole::Leader { + return Err(raftstore::Error::NotLeader(region_id, leader).into()); + } + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header().set_peer(leader.unwrap()); + req.mut_admin_request() + .set_cmd_type(AdminCmdType::ComputeHash); + let f = super::exec_admin(&router, req); + f.await + }) + } +} diff --git a/src/server/resolve.rs b/src/server/resolve.rs index acf60ae783f..c831ff28d17 100644 --- a/src/server/resolve.rs +++ b/src/server/resolve.rs @@ -2,15 +2,14 @@ use std::{ fmt::{self, Display, Formatter}, - marker::PhantomData, sync::{Arc, Mutex}, }; use collections::HashMap; -use engine_traits::KvEngine; use kvproto::replication_modepb::ReplicationMode; use pd_client::{take_peer_address, PdClient}; -use raftstore::{router::RaftStoreRouter, store::GlobalReplicationState}; +use raftstore::store::GlobalReplicationState; +use tikv_kv::RaftExtension; use tikv_util::{ time::Instant, worker::{Runnable, Scheduler, Worker}, @@ -52,24 +51,21 @@ struct StoreAddr { } /// A runner for resolving store addresses. -struct Runner +struct Runner where T: PdClient, - RR: RaftStoreRouter, - E: KvEngine, + R: RaftExtension, { pd_client: Arc, store_addrs: HashMap, state: Arc>, - router: RR, - engine: PhantomData, + router: R, } -impl Runner +impl Runner where T: PdClient, - RR: RaftStoreRouter, - E: KvEngine, + R: RaftExtension, { fn resolve(&mut self, store_id: u64) -> Result { if let Some(s) = self.store_addrs.get(&store_id) { @@ -128,11 +124,10 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where T: PdClient, - RR: RaftStoreRouter, - E: KvEngine, + R: RaftExtension, { type Task = Task; fn run(&mut self, task: Task) { @@ -157,15 +152,14 @@ impl PdStoreAddrResolver { } /// Creates a new `PdStoreAddrResolver`. -pub fn new_resolver( +pub fn new_resolver( pd_client: Arc, worker: &Worker, - router: RR, + router: R, ) -> (PdStoreAddrResolver, Arc>) where T: PdClient + 'static, - RR: RaftStoreRouter, - E: KvEngine, + R: RaftExtension + 'static, { let state = Arc::new(Mutex::new(GlobalReplicationState::default())); let runner = Runner { @@ -173,7 +167,6 @@ where store_addrs: HashMap::default(), state: state.clone(), router, - engine: PhantomData, }; let scheduler = worker.start("addr-resolver", runner); let resolver = PdStoreAddrResolver::new(scheduler); @@ -190,16 +183,12 @@ impl StoreAddrResolver for PdStoreAddrResolver { #[cfg(test)] mod tests { - use std::{ - marker::PhantomData, net::SocketAddr, ops::Sub, str::FromStr, sync::Arc, thread, - time::Duration, - }; + use std::{net::SocketAddr, ops::Sub, str::FromStr, sync::Arc, thread, time::Duration}; use collections::HashMap; - use engine_test::kv::KvTestEngine; use kvproto::metapb; use pd_client::{PdClient, Result}; - use raftstore::router::RaftStoreBlackHole; + use tikv_kv::FakeExtension; use super::*; @@ -236,7 +225,7 @@ mod tests { store } - fn new_runner(store: metapb::Store) -> Runner { + fn new_runner(store: metapb::Store) -> Runner { let client = MockPdClient { start: Instant::now(), store, @@ -245,8 +234,7 @@ mod tests { pd_client: Arc::new(client), store_addrs: HashMap::default(), state: Default::default(), - router: RaftStoreBlackHole, - engine: PhantomData, + router: FakeExtension, } } diff --git a/src/server/server.rs b/src/server/server.rs index a4d82f1e347..1921483e37b 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -13,10 +13,7 @@ use futures::{compat::Stream01CompatExt, stream::StreamExt}; use grpcio::{ChannelBuilder, Environment, ResourceQuota, Server as GrpcServer, ServerBuilder}; use grpcio_health::{create_health, HealthService, ServingStatus}; use kvproto::tikvpb::*; -use raftstore::{ - router::RaftStoreRouter, - store::{CheckLeaderTask, SnapManager}, -}; +use raftstore::store::{CheckLeaderTask, SnapManager}; use security::SecurityManager; use tikv_util::{ config::VersionTrack, @@ -58,8 +55,7 @@ pub const STATS_THREAD_PREFIX: &str = "transport-stats"; /// /// It hosts various internal components, including gRPC, the raftstore router /// and a snapshot worker. -pub struct Server + 'static, S: StoreAddrResolver + 'static, E: Engine> -{ +pub struct Server { env: Arc, /// A GrpcServer builder or a GrpcServer. /// @@ -68,8 +64,8 @@ pub struct Server + 'static, S: StoreAddrResolver + grpc_mem_quota: ResourceQuota, local_addr: SocketAddr, // Transport. - trans: ServerTransport, - raft_router: T, + trans: ServerTransport, + raft_router: E::RaftExtension, // For sending/receiving snapshots. snap_mgr: SnapManager, snap_worker: LazyWorker, @@ -83,8 +79,11 @@ pub struct Server + 'static, S: StoreAddrResolver + timer: Handle, } -impl + Unpin, S: StoreAddrResolver + 'static, E: Engine> - Server +impl Server +where + S: StoreAddrResolver + 'static, + E: Engine, + E::RaftExtension: Unpin, { #[allow(clippy::too_many_arguments)] pub fn new( @@ -94,10 +93,9 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En storage: Storage, copr: Endpoint, copr_v2: coprocessor_v2::Endpoint, - raft_router: T, resolver: S, snap_mgr: SnapManager, - gc_worker: GcWorker, + gc_worker: GcWorker, check_leader_scheduler: Scheduler, env: Arc, yatp_read_pool: Option, @@ -124,6 +122,7 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En let snap_worker = Worker::new("snap-handler"); let lazy_worker = snap_worker.lazy_build("snap-handler"); + let raft_ext = storage.get_engine().raft_extension().clone(); let proxy = Proxy::new(security_mgr.clone(), &env, Arc::new(cfg.value().clone())); let kv_service = KvService::new( @@ -132,7 +131,6 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En gc_worker, copr, copr_v2, - raft_router.clone(), lazy_worker.scheduler(), check_leader_scheduler, Arc::clone(&grpc_thread_load), @@ -170,7 +168,7 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En Arc::clone(cfg), security_mgr.clone(), resolver, - raft_router.clone(), + raft_ext.clone(), lazy_worker.scheduler(), grpc_thread_load.clone(), ); @@ -185,7 +183,7 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En grpc_mem_quota: mem_quota, local_addr: addr, trans, - raft_router, + raft_router: raft_ext, snap_mgr, snap_worker: lazy_worker, stats_pool, @@ -207,7 +205,7 @@ impl + Unpin, S: StoreAddrResolver + 'static, E: En self.snap_worker.scheduler() } - pub fn transport(&self) -> ServerTransport { + pub fn transport(&self) -> ServerTransport { self.trans.clone() } @@ -341,7 +339,7 @@ pub mod test_router { use engine_rocks::{RocksEngine, RocksSnapshot}; use kvproto::raft_serverpb::RaftMessage; - use raftstore::{store::*, Result as RaftStoreResult}; + use raftstore::{router::RaftStoreRouter, store::*, Result as RaftStoreResult}; use super::*; @@ -428,6 +426,7 @@ mod tests { use kvproto::raft_serverpb::RaftMessage; use raftstore::{ coprocessor::region_info_accessor::MockRegionInfoProvider, + router::RaftStoreRouter, store::{transport::Transport, *}, }; use resource_metering::ResourceTagFactory; @@ -445,8 +444,8 @@ mod tests { use crate::{ config::CoprReadPoolConfig, coprocessor::{self, readpool_impl}, - server::TestRaftStoreRouter, - storage::{lock_manager::MockLockManager, TestStorageBuilderApiV1}, + server::{raftkv::RaftRouterWrap, TestRaftStoreRouter}, + storage::{lock_manager::MockLockManager, TestEngineBuilder, TestStorageBuilderApiV1}, }; #[derive(Clone)] @@ -497,13 +496,19 @@ mod tests { ..Default::default() }; - let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) - .build() - .unwrap(); - let (tx, rx) = mpsc::channel(); let (significant_msg_sender, significant_msg_receiver) = mpsc::channel(); let router = TestRaftStoreRouter::new(tx, significant_msg_sender); + let engine = TestEngineBuilder::new() + .build() + .unwrap() + .with_raft_extension(RaftRouterWrap::new(router.clone())); + + let storage = + TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) + .build() + .unwrap(); + let env = Arc::new( EnvBuilder::new() .cq_count(1) @@ -514,7 +519,6 @@ mod tests { let (tx, _rx) = mpsc::channel(); let mut gc_worker = GcWorker::new( storage.get_engine(), - router.clone(), tx, Default::default(), Default::default(), @@ -556,7 +560,6 @@ mod tests { storage, copr, copr_v2, - router.clone(), MockResolver { quick_fail: Arc::clone(&quick_fail), addr: Arc::clone(&addr), diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 30cc8342959..ae0d53bacda 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -1,9 +1,8 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use engine_rocks::RocksEngine; -use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine}; +use engine_traits::{Engines, MiscExt, RaftEngine}; use futures::{ - channel::oneshot, future::{Future, FutureExt, TryFutureExt}, sink::SinkExt, stream::{self, TryStreamExt}, @@ -12,17 +11,8 @@ use grpcio::{ Error as GrpcError, RpcContext, RpcStatus, RpcStatusCode, ServerStreamingSink, UnarySink, WriteFlags, }; -use kvproto::{ - debugpb::{self, *}, - raft_cmdpb::{ - AdminCmdType, AdminRequest, RaftCmdRequest, RaftRequestHeader, RegionDetailResponse, - StatusCmdType, StatusRequest, - }, -}; -use raftstore::{ - router::RaftStoreRouter, - store::msg::{Callback, RaftCmdExtraOpts}, -}; +use kvproto::debugpb::{self, *}; +use tikv_kv::RaftExtension; use tikv_util::metrics; use tokio::runtime::Handle; @@ -53,28 +43,26 @@ fn error_to_grpc_error(tag: &'static str, e: Error) -> GrpcError { /// Service handles the RPC messages for the `Debug` service. #[derive(Clone)] -pub struct Service> { +pub struct Service { pool: Handle, debugger: Debugger, raft_router: T, - _phantom: std::marker::PhantomData, } -impl> Service { - /// Constructs a new `Service` with `Engines`, a `RaftStoreRouter` and a +impl Service { + /// Constructs a new `Service` with `Engines`, a `RaftExtension` and a /// `GcWorker`. pub fn new( engines: Engines, pool: Handle, raft_router: T, cfg_controller: ConfigController, - ) -> Service { + ) -> Self { let debugger = Debugger::new(engines, cfg_controller); Service { pool, debugger, raft_router, - _phantom: Default::default(), } } @@ -99,9 +87,7 @@ impl> Service { } } -impl + 'static> debugpb::Debug - for Service -{ +impl debugpb::Debug for Service { fn get(&mut self, ctx: RpcContext<'_>, mut req: GetRequest, sink: UnarySink) { const TAG: &str = "debug_get"; @@ -386,18 +372,14 @@ impl + 'static> debugpb::De sink: UnarySink, ) { let region_id = req.get_region_id(); - let debugger = self.debugger.clone(); - let router1 = self.raft_router.clone(); - let router2 = self.raft_router.clone(); - - let consistency_check_task = async move { - let store_id = debugger.get_store_ident()?.store_id; - let detail = region_detail(router2, region_id, store_id).await?; - consistency_check(router1, detail).await + let f = self.raft_router.check_consistency(region_id); + let task = async move { + box_try!(f.await); + Ok(()) }; let f = self .pool - .spawn(consistency_check_task) + .spawn(task) .map(|res| res.unwrap()) .map_ok(|_| RegionConsistencyCheckResponse::default()); self.handle_response(ctx, sink, f, "check_region_consistency"); @@ -537,79 +519,6 @@ impl + 'static> debugpb::De } } -fn region_detail>( - raft_router: T, - region_id: u64, - store_id: u64, -) -> impl Future> { - let mut header = RaftRequestHeader::default(); - header.set_region_id(region_id); - header.mut_peer().set_store_id(store_id); - let mut status_request = StatusRequest::default(); - status_request.set_cmd_type(StatusCmdType::RegionDetail); - let mut raft_cmd = RaftCmdRequest::default(); - raft_cmd.set_header(header); - raft_cmd.set_status_request(status_request); - - let (tx, rx) = oneshot::channel(); - let cb = Callback::read(Box::new(|resp| tx.send(resp).unwrap())); - - async move { - raft_router - .send_command(raft_cmd, cb, RaftCmdExtraOpts::default()) - .map_err(|e| Error::Other(Box::new(e)))?; - - let mut r = rx.map_err(|e| Error::Other(Box::new(e))).await?; - - if r.response.get_header().has_error() { - let e = r.response.get_header().get_error(); - warn!("region_detail got error"; "err" => ?e); - return Err(Error::Other(e.message.clone().into())); - } - - let detail = r.response.take_status_response().take_region_detail(); - debug!("region_detail got region detail"; "detail" => ?detail); - let leader_store_id = detail.get_leader().get_store_id(); - if leader_store_id != store_id { - let msg = format!("Leader is on store {}", leader_store_id); - return Err(Error::Other(msg.into())); - } - Ok(detail) - } -} - -fn consistency_check>( - raft_router: T, - mut detail: RegionDetailResponse, -) -> impl Future> { - let mut header = RaftRequestHeader::default(); - header.set_region_id(detail.get_region().get_id()); - header.set_peer(detail.take_leader()); - let mut admin_request = AdminRequest::default(); - admin_request.set_cmd_type(AdminCmdType::ComputeHash); - let mut raft_cmd = RaftCmdRequest::default(); - raft_cmd.set_header(header); - raft_cmd.set_admin_request(admin_request); - - let (tx, rx) = oneshot::channel(); - let cb = Callback::read(Box::new(|resp| tx.send(resp).unwrap())); - - async move { - raft_router - .send_command(raft_cmd, cb, RaftCmdExtraOpts::default()) - .map_err(|e| Error::Other(Box::new(e)))?; - - let r = rx.map_err(|e| Error::Other(Box::new(e))).await?; - - if r.response.get_header().has_error() { - let e = r.response.get_header().get_error(); - warn!("consistency-check got error"; "err" => ?e); - return Err(Error::Other(e.message.clone().into())); - } - Ok(()) - } -} - mod region_size_response { pub type Entry = kvproto::debugpb::RegionSizeResponseEntry; } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 7c40ab659eb..db50dfe459e 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -15,26 +15,19 @@ use grpcio::{ ClientStreamingSink, DuplexSink, Error as GrpcError, RequestStream, Result as GrpcResult, RpcContext, RpcStatus, RpcStatusCode, ServerStreamingSink, UnarySink, WriteFlags, }; -use kvproto::{ - coprocessor::*, - errorpb::{Error as RegionError, *}, - kvrpcpb::*, - mpp::*, - raft_serverpb::*, - tikvpb::*, -}; +use kvproto::{coprocessor::*, kvrpcpb::*, mpp::*, raft_serverpb::*, tikvpb::*}; use protobuf::RepeatedField; use raft::eraftpb::MessageType; use raftstore::{ - router::RaftStoreRouter, store::{ memory::{MEMTRACE_APPLYS, MEMTRACE_RAFT_ENTRIES, MEMTRACE_RAFT_MESSAGES}, metrics::RAFT_ENTRIES_CACHES_GAUGE, - Callback, CasualMessage, CheckLeaderTask, + CheckLeaderTask, }, - DiscardReason, Error as RaftStoreError, Result as RaftStoreResult, + Error as RaftStoreError, Result as RaftStoreResult, }; use tikv_alloc::trace::MemoryTraceGuard; +use tikv_kv::RaftExtension; use tikv_util::{ future::{paired_future_callback, poll_future_notify}, mpsc::future::{unbounded, BatchReceiver, Sender, WakePolicy}, @@ -69,18 +62,16 @@ const GRPC_MSG_MAX_BATCH_SIZE: usize = 128; const GRPC_MSG_NOTIFY_SIZE: usize = 8; /// Service handles the RPC messages for the `Tikv` service. -pub struct Service + 'static, E: Engine, L: LockManager, F: KvFormat> { +pub struct Service { store_id: u64, /// Used to handle requests related to GC. - gc_worker: GcWorker, + gc_worker: GcWorker, // For handling KV requests. storage: Storage, // For handling coprocessor requests. copr: Endpoint, // For handling corprocessor v2 requests. copr_v2: coprocessor_v2::Endpoint, - // For handling raft messages. - ch: T, // For handling snapshot. snap_scheduler: Scheduler, // For handling `CheckLeader` request. @@ -96,13 +87,7 @@ pub struct Service + 'static, E: Engine, L: LockMan reject_messages_on_memory_ratio: f64, } -impl< - T: RaftStoreRouter + Clone + 'static, - E: Engine + Clone, - L: LockManager + Clone, - F: KvFormat, -> Clone for Service -{ +impl Clone for Service { fn clone(&self) -> Self { Service { store_id: self.store_id, @@ -110,7 +95,6 @@ impl< storage: self.storage.clone(), copr: self.copr.clone(), copr_v2: self.copr_v2.clone(), - ch: self.ch.clone(), snap_scheduler: self.snap_scheduler.clone(), check_leader_scheduler: self.check_leader_scheduler.clone(), enable_req_batch: self.enable_req_batch, @@ -121,17 +105,14 @@ impl< } } -impl + 'static, E: Engine, L: LockManager, F: KvFormat> - Service -{ +impl Service { /// Constructs a new `Service` which provides the `Tikv` service. pub fn new( store_id: u64, storage: Storage, - gc_worker: GcWorker, + gc_worker: GcWorker, copr: Endpoint, copr_v2: coprocessor_v2::Endpoint, - ch: T, snap_scheduler: Scheduler, check_leader_scheduler: Scheduler, grpc_thread_load: Arc, @@ -145,7 +126,6 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor storage, copr, copr_v2, - ch, snap_scheduler, check_leader_scheduler, enable_req_batch, @@ -157,7 +137,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor fn handle_raft_message( store_id: u64, - ch: &T, + ch: &E::RaftExtension, msg: RaftMessage, reject: bool, ) -> RaftStoreResult<()> { @@ -172,13 +152,11 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor RAFT_APPEND_REJECTS.inc(); let id = msg.get_region_id(); let peer_id = msg.get_message().get_from(); - let m = CasualMessage::RejectRaftAppend { peer_id }; - let _ = ch.send_casual_msg(id, m); + ch.report_reject_message(id, peer_id); return Ok(()); } - // `send_raft_msg` may return `RaftStoreError::RegionNotFound` or - // `RaftStoreError::Transport(DiscardReason::Full)` - ch.send_raft_msg(msg) + ch.feed(msg, false); + Ok(()) } } @@ -228,9 +206,7 @@ macro_rules! set_total_time { }; } -impl + 'static, E: Engine, L: LockManager, F: KvFormat> Tikv - for Service -{ +impl Tikv for Service { handle_request!(kv_get, future_get, GetRequest, GetResponse, has_time_detail); handle_request!(kv_scan, future_scan, ScanRequest, ScanResponse); handle_request!( @@ -614,7 +590,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor sink: ClientStreamingSink, ) { let store_id = self.store_id; - let ch = self.ch.clone(); + let ch = self.storage.get_engine().raft_extension().clone(); let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; let res = async move { @@ -657,7 +633,7 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor ) { info!("batch_raft RPC is called, new gRPC stream established"); let store_id = self.store_id; - let ch = self.ch.clone(); + let ch = self.storage.get_engine().raft_extension().clone(); let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; let res = async move { @@ -726,7 +702,6 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor let begin_instant = Instant::now(); let region_id = req.get_context().get_region_id(); - let (cb, f) = paired_future_callback(); let mut split_keys = if req.is_raw_kv { if !req.get_split_key().is_empty() { vec![F::encode_raw_key_owned(req.take_split_key(), None).into_encoded()] @@ -747,52 +722,45 @@ impl + 'static, E: Engine, L: LockManager, F: KvFor } }; split_keys.sort(); - let req = CasualMessage::SplitRegion { - region_epoch: req.take_context().take_region_epoch(), + let engine = self.storage.get_engine(); + let f = engine.raft_extension().split( + region_id, + req.take_context().take_region_epoch(), split_keys, - callback: Callback::write(cb), - source: ctx.peer().into(), - }; - - if let Err(e) = self.ch.send_casual_msg(region_id, req) { - // Retrun region error instead a gRPC error. - let mut resp = SplitRegionResponse::default(); - resp.set_region_error(raftstore_error_to_region_error(e, region_id)); - ctx.spawn( - async move { - sink.success(resp).await?; - ServerResult::Ok(()) - } - .map_err(|_| ()) - .map(|_| ()), - ); - return; - } + ctx.peer(), + ); let task = async move { - let mut res = f.await?; + let res = f.await; let mut resp = SplitRegionResponse::default(); - if res.response.get_header().has_error() { - resp.set_region_error(res.response.mut_header().take_error()); - } else { - let admin_resp = res.response.mut_admin_response(); - let regions: Vec<_> = admin_resp.mut_splits().take_regions().into(); - if regions.len() < 2 { - error!( - "invalid split response"; - "region_id" => region_id, - "resp" => ?admin_resp - ); - resp.mut_region_error().set_message(format!( - "Internal Error: invalid response: {:?}", - admin_resp - )); - } else { - if regions.len() == 2 { - resp.set_left(regions[0].clone()); - resp.set_right(regions[1].clone()); + match res { + Ok(regions) => { + if regions.len() < 2 { + error!( + "invalid split response"; + "region_id" => region_id, + "resp" => ?regions + ); + resp.mut_region_error().set_message(format!( + "Internal Error: invalid response: {:?}", + regions + )); + } else { + if regions.len() == 2 { + resp.set_left(regions[0].clone()); + resp.set_right(regions[1].clone()); + } + resp.set_regions(regions.into()); + } + } + Err(e) => { + let err: crate::storage::Result<()> = Err(e.into()); + if let Some(err) = extract_region_error(&err) { + resp.set_region_error(err) + } else { + resp.mut_region_error() + .set_message(format!("failed to split: {:?}", err)); } - resp.set_regions(regions.into()); } } sink.success(resp).await?; @@ -2159,20 +2127,6 @@ fn collect_batch_resp(v: &mut MeasuredBatchResponse, mut e: MeasuredSingleRespon v.measures.push(e.measure); } -fn raftstore_error_to_region_error(e: RaftStoreError, region_id: u64) -> RegionError { - if let RaftStoreError::Transport(DiscardReason::Disconnected) = e { - // `From::from(RaftStoreError) -> RegionError` treats `Disconnected` as `Other`. - let mut region_error = RegionError::default(); - let region_not_found = RegionNotFound { - region_id, - ..Default::default() - }; - region_error.set_region_not_found(region_not_found); - return region_error; - } - e.into() -} - fn needs_reject_raft_append(reject_messages_on_memory_ratio: f64) -> bool { fail_point!("needs_reject_raft_append", |_| true); if reject_messages_on_memory_ratio < f64::EPSILON { diff --git a/src/server/snap.rs b/src/server/snap.rs index 0200c779383..8fe737c2e60 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -3,7 +3,6 @@ use std::{ fmt::{self, Display, Formatter}, io::{Read, Write}, - marker::PhantomData, pin::Pin, sync::{ atomic::{AtomicUsize, Ordering}, @@ -12,7 +11,6 @@ use std::{ time::Duration, }; -use engine_traits::KvEngine; use file_system::{IoType, WithIoType}; use futures::{ future::{Future, TryFutureExt}, @@ -29,11 +27,9 @@ use kvproto::{ tikvpb::TikvClient, }; use protobuf::Message; -use raftstore::{ - router::RaftStoreRouter, - store::{SnapEntry, SnapKey, SnapManager, Snapshot}, -}; +use raftstore::store::{SnapEntry, SnapKey, SnapManager, Snapshot}; use security::SecurityManager; +use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, time::Instant, @@ -260,7 +256,7 @@ impl RecvSnapContext { }) } - fn finish>(self, raft_router: R) -> Result<()> { + fn finish(self, raft_router: R) -> Result<()> { let _with_io_type = WithIoType::new(self.io_type); let key = self.key; if let Some(mut file) = self.file { @@ -271,15 +267,13 @@ impl RecvSnapContext { return Err(e); } } - if let Err(e) = raft_router.send_raft_msg(self.raft_msg) { - return Err(box_err!("{} failed to send snapshot to raft: {}", key, e)); - } + raft_router.feed(self.raft_msg, true); info!("saving all snapshot files"; "snap_key" => %key, "takes" => ?self.start.saturating_elapsed()); Ok(()) } } -fn recv_snap + 'static>( +fn recv_snap( stream: RequestStream, sink: ClientStreamingSink, snap_mgr: SnapManager, @@ -331,11 +325,7 @@ fn recv_snap + 'static>( } } -pub struct Runner -where - E: KvEngine, - R: RaftStoreRouter + 'static, -{ +pub struct Runner { env: Arc, snap_mgr: SnapManager, pool: Runtime, @@ -345,21 +335,16 @@ where cfg: Config, sending_count: Arc, recving_count: Arc, - engine: PhantomData, } -impl Runner -where - E: KvEngine, - R: RaftStoreRouter + 'static, -{ +impl Runner { pub fn new( env: Arc, snap_mgr: SnapManager, r: R, security_mgr: Arc, cfg: Arc>, - ) -> Runner { + ) -> Self { let cfg_tracker = cfg.clone().tracker("snap-sender".to_owned()); let snap_worker = Runner { env, @@ -377,7 +362,6 @@ where cfg: cfg.value().clone(), sending_count: Arc::new(AtomicUsize::new(0)), recving_count: Arc::new(AtomicUsize::new(0)), - engine: PhantomData, }; snap_worker } @@ -404,11 +388,7 @@ where } } -impl Runnable for Runner -where - E: KvEngine, - R: RaftStoreRouter + 'static, -{ +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 837ec294fce..5dd83deb092 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -4,7 +4,6 @@ use std::{ convert::{TryFrom, TryInto}, fs::{self, File}, io::{Read, Write}, - marker::PhantomData, sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -12,7 +11,6 @@ use std::{ time::Duration, }; -use engine_traits::KvEngine; use file_system::{IoType, WithIoType}; use futures::{ future::{Future, TryFutureExt}, @@ -28,11 +26,9 @@ use kvproto::{ tikvpb::TikvClient, }; use protobuf::Message; -use raftstore::{ - router::RaftStoreRouter, - store::snap::{TabletSnapKey, TabletSnapManager}, -}; +use raftstore::store::snap::{TabletSnapKey, TabletSnapManager}; use security::SecurityManager; +use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, time::Instant, @@ -82,11 +78,9 @@ impl RecvTabletSnapContext { }) } - fn finish>(self, raft_router: R) -> Result<()> { + fn finish(self, raft_router: R) -> Result<()> { let key = self.key; - if let Err(e) = raft_router.send_raft_msg(self.raft_msg) { - return Err(box_err!("{} failed to send snapshot to raft: {}", key, e)); - } + raft_router.feed(self.raft_msg, true); info!("saving all snapshot files"; "snap_key" => %key, "takes" => ?self.start.saturating_elapsed()); Ok(()) } @@ -279,7 +273,7 @@ async fn recv_snap_files( Ok(context) } -fn recv_snap + 'static>( +fn recv_snap( stream: RequestStream, sink: ClientStreamingSink, snap_mgr: TabletSnapManager, @@ -302,11 +296,7 @@ fn recv_snap + 'static>( } } -pub struct TabletRunner -where - E: KvEngine, - R: RaftStoreRouter + 'static, -{ +pub struct TabletRunner { env: Arc, snap_mgr: TabletSnapManager, security_mgr: Arc, @@ -316,22 +306,17 @@ where cfg: Config, sending_count: Arc, recving_count: Arc, - engine: PhantomData, limiter: Limiter, } -impl TabletRunner -where - E: KvEngine, - R: RaftStoreRouter + 'static, -{ +impl TabletRunner { pub fn new( env: Arc, snap_mgr: TabletSnapManager, r: R, security_mgr: Arc, cfg: Arc>, - ) -> TabletRunner { + ) -> Self { let config = cfg.value().clone(); let cfg_tracker = cfg.tracker("tablet-sender".to_owned()); let limit = i64::try_from(config.snap_max_write_bytes_per_sec.0) @@ -358,7 +343,6 @@ where cfg: config, sending_count: Arc::new(AtomicUsize::new(0)), recving_count: Arc::new(AtomicUsize::new(0)), - engine: PhantomData, limiter, }; snap_worker @@ -385,11 +369,7 @@ pub struct SendStat { elapsed: Duration, } -impl Runnable for TabletRunner -where - E: KvEngine, - R: RaftStoreRouter + 'static, -{ +impl Runnable for TabletRunner { type Task = Task; fn run(&mut self, task: Task) { diff --git a/src/server/transport.rs b/src/server/transport.rs index e52bead3934..1303eff81f5 100644 --- a/src/server/transport.rs +++ b/src/server/transport.rs @@ -1,56 +1,45 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use std::marker::PhantomData; - -use engine_traits::KvEngine; use kvproto::raft_serverpb::RaftMessage; -use raftstore::{router::RaftStoreRouter, store::Transport, Result as RaftStoreResult}; +use raftstore::{store::Transport, Result as RaftStoreResult}; +use tikv_kv::RaftExtension; use crate::server::{raft_client::RaftClient, resolve::StoreAddrResolver}; -pub struct ServerTransport +pub struct ServerTransport where - T: RaftStoreRouter + 'static, + T: RaftExtension + 'static, S: StoreAddrResolver + 'static, - E: KvEngine, { - raft_client: RaftClient, - engine: PhantomData, + raft_client: RaftClient, } -impl Clone for ServerTransport +impl Clone for ServerTransport where - T: RaftStoreRouter + 'static, + T: RaftExtension + 'static, S: StoreAddrResolver + 'static, - E: KvEngine, { fn clone(&self) -> Self { ServerTransport { raft_client: self.raft_client.clone(), - engine: PhantomData, } } } -impl ServerTransport +impl ServerTransport where - E: KvEngine, - T: RaftStoreRouter + 'static, + T: RaftExtension + 'static, S: StoreAddrResolver + 'static, { - pub fn new(raft_client: RaftClient) -> ServerTransport { - ServerTransport { - raft_client, - engine: PhantomData, - } + pub fn new(raft_client: RaftClient) -> Self { + ServerTransport { raft_client } } } -impl Transport for ServerTransport +impl Transport for ServerTransport where - T: RaftStoreRouter + Unpin + 'static, + T: RaftExtension + Unpin + 'static, S: StoreAddrResolver + Unpin + 'static, - E: KvEngine, { fn send(&mut self, msg: RaftMessage) -> RaftStoreResult<()> { match self.raft_client.send(msg) { diff --git a/tests/failpoints/cases/test_gc_metrics.rs b/tests/failpoints/cases/test_gc_metrics.rs index e698031f0bc..348b81aaea7 100644 --- a/tests/failpoints/cases/test_gc_metrics.rs +++ b/tests/failpoints/cases/test_gc_metrics.rs @@ -19,7 +19,6 @@ use raftstore::{ coprocessor::{ region_info_accessor::MockRegionInfoProvider, CoprocessorHost, RegionChangeEvent, }, - router::RaftStoreBlackHole, RegionInfoAccessor, }; use tikv::{ @@ -142,7 +141,6 @@ fn test_txn_gc_keys_handled() { feature_gate.set_version("5.0.0").unwrap(); let mut gc_worker = GcWorker::new( prefixed_engine.clone(), - RaftStoreBlackHole, tx, GcConfig::default(), feature_gate, @@ -286,7 +284,6 @@ fn test_raw_gc_keys_handled() { let feature_gate = FeatureGate::default(); let mut gc_worker = GcWorker::new( prefixed_engine, - RaftStoreBlackHole, tx, GcConfig::default(), feature_gate, diff --git a/tests/integrations/config/dynamic/gc_worker.rs b/tests/integrations/config/dynamic/gc_worker.rs index e8b437f941a..623833c3b27 100644 --- a/tests/integrations/config/dynamic/gc_worker.rs +++ b/tests/integrations/config/dynamic/gc_worker.rs @@ -5,9 +5,7 @@ use std::{ time::Duration, }; -use raftstore::{ - coprocessor::region_info_accessor::MockRegionInfoProvider, router::RaftStoreBlackHole, -}; +use raftstore::coprocessor::region_info_accessor::MockRegionInfoProvider; use tikv::{ config::{ConfigController, Module, TikvConfig}, server::gc_worker::{GcConfig, GcTask, GcWorker}, @@ -27,15 +25,11 @@ fn test_gc_config_validate() { fn setup_cfg_controller( cfg: TikvConfig, -) -> ( - GcWorker, - ConfigController, -) { +) -> (GcWorker, ConfigController) { let engine = TestEngineBuilder::new().build().unwrap(); let (tx, _rx) = std::sync::mpsc::channel(); let mut gc_worker = GcWorker::new( engine, - RaftStoreBlackHole, tx, cfg.gc.clone(), Default::default(), diff --git a/tests/integrations/config/dynamic/snap.rs b/tests/integrations/config/dynamic/snap.rs index 5b9ef72b4c3..1a82ec8005e 100644 --- a/tests/integrations/config/dynamic/snap.rs +++ b/tests/integrations/config/dynamic/snap.rs @@ -15,6 +15,7 @@ use tikv::{ config::{ConfigController, TikvConfig}, server::{ config::{Config as ServerConfig, ServerConfigManager}, + raftkv::RaftRouterWrap, snap::{Runner as SnapHandler, Task as SnapTask}, }, }; @@ -60,7 +61,7 @@ fn start_server( let snap_runner = SnapHandler::new( Arc::clone(&env), snap_mgr.clone(), - raft_router, + RaftRouterWrap::new(raft_router), security_mgr, Arc::clone(&server_config), ); diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index edf4d0f1c65..fa7a86f12c4 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -9,7 +9,6 @@ use std::{ time::Duration, }; -use engine_rocks::RocksEngine; use futures::{FutureExt, StreamExt, TryStreamExt}; use grpcio::{ ClientStreamingSink, Environment, RequestStream, RpcContext, RpcStatus, RpcStatusCode, Server, @@ -20,15 +19,12 @@ use kvproto::{ tikvpb::BatchRaftMessage, }; use raft::eraftpb::Entry; -use raftstore::{ - errors::DiscardReason, - router::{RaftStoreBlackHole, RaftStoreRouter}, - store::StoreMsg, -}; +use raftstore::{errors::DiscardReason, store::StoreMsg}; use tikv::server::{ - self, load_statistics::ThreadLoadPool, resolve, resolve::Callback, Config, ConnectionBuilder, - RaftClient, StoreAddrResolver, TestRaftStoreRouter, + self, load_statistics::ThreadLoadPool, raftkv::RaftRouterWrap, resolve, resolve::Callback, + Config, ConnectionBuilder, RaftClient, StoreAddrResolver, TestRaftStoreRouter, }; +use tikv_kv::{FakeExtension, RaftExtension}; use tikv_util::{ config::{ReadableDuration, VersionTrack}, worker::{Builder as WorkerBuilder, LazyWorker}, @@ -55,9 +51,9 @@ impl StoreAddrResolver for StaticResolver { } } -fn get_raft_client(router: R, resolver: T) -> RaftClient +fn get_raft_client(router: R, resolver: T) -> RaftClient where - R: RaftStoreRouter + Unpin + 'static, + R: RaftExtension + Unpin + 'static, T: StoreAddrResolver + 'static, { let env = Arc::new(Environment::new(2)); @@ -80,10 +76,8 @@ where RaftClient::new(builder) } -fn get_raft_client_by_port( - port: u16, -) -> RaftClient { - get_raft_client(RaftStoreBlackHole, StaticResolver::new(port)) +fn get_raft_client_by_port(port: u16) -> RaftClient { + get_raft_client(FakeExtension, StaticResolver::new(port)) } #[derive(Clone)] @@ -183,7 +177,8 @@ fn test_raft_client_reconnect() { let (tx, rx) = mpsc::channel(); let (significant_msg_sender, _significant_msg_receiver) = mpsc::channel(); let router = TestRaftStoreRouter::new(tx, significant_msg_sender); - let mut raft_client = get_raft_client(router, StaticResolver::new(port)); + let wrap = RaftRouterWrap::new(router); + let mut raft_client = get_raft_client(wrap, StaticResolver::new(port)); (0..50).for_each(|_| raft_client.send(RaftMessage::default()).unwrap()); raft_client.flush(); @@ -223,7 +218,8 @@ fn test_raft_client_report_unreachable() { let (tx, rx) = mpsc::channel(); let (significant_msg_sender, _significant_msg_receiver) = mpsc::channel(); let router = TestRaftStoreRouter::new(tx, significant_msg_sender); - let mut raft_client = get_raft_client(router, StaticResolver::new(port)); + let wrap = RaftRouterWrap::new(router); + let mut raft_client = get_raft_client(wrap, StaticResolver::new(port)); // server is disconnected mock_server.shutdown(); @@ -386,15 +382,14 @@ fn test_tombstone_block_list() { let bg_worker = WorkerBuilder::new(thd_name!("background")) .thread_count(2) .create(); - let resolver = - resolve::new_resolver::<_, _, RocksEngine>(pd_client, &bg_worker, RaftStoreBlackHole).0; + let resolver = resolve::new_resolver(pd_client, &bg_worker, FakeExtension).0; let msg_count = Arc::new(AtomicUsize::new(0)); let batch_msg_count = Arc::new(AtomicUsize::new(0)); let service = MockKvForRaft::new(Arc::clone(&msg_count), Arc::clone(&batch_msg_count), true); let (_mock_server, port) = create_mock_server(service, 60200, 60300).unwrap(); - let mut raft_client = get_raft_client(RaftStoreBlackHole, resolver); + let mut raft_client = get_raft_client(FakeExtension, resolver); let mut store1 = metapb::Store::default(); store1.set_id(1); @@ -443,9 +438,8 @@ fn test_store_allowlist() { let bg_worker = WorkerBuilder::new(thd_name!("background")) .thread_count(2) .create(); - let resolver = - resolve::new_resolver::<_, _, RocksEngine>(pd_client, &bg_worker, RaftStoreBlackHole).0; - let mut raft_client = get_raft_client(RaftStoreBlackHole, resolver); + let resolver = resolve::new_resolver(pd_client, &bg_worker, FakeExtension).0; + let mut raft_client = get_raft_client(FakeExtension, resolver); let msg_count1 = Arc::new(AtomicUsize::new(0)); let batch_msg_count1 = Arc::new(AtomicUsize::new(0)); From 4df0ad4856b4acd65c3a0c5e1dd8f9a71443dfab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 1 Dec 2022 15:18:01 +0800 Subject: [PATCH 0377/1149] log-backup: use `openssl` to replace `rustls` implement in backup-stream (#13851) ref tikv/tikv#13867 Signed-off-by: hillium Signed-off-by: Yu Juncen Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- Cargo.lock | 466 ++++++++++-------- components/backup-stream/Cargo.toml | 8 +- .../backup-stream/src/checkpoint_manager.rs | 2 +- .../src/metadata/store/lazy_etcd.rs | 28 +- components/cloud/aws/Cargo.toml | 2 +- components/security/Cargo.toml | 4 - components/security/src/lib.rs | 41 +- components/server/Cargo.toml | 2 +- components/server/src/server.rs | 8 +- 9 files changed, 334 insertions(+), 227 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ccf961796e..a553d16f822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,7 +130,7 @@ dependencies = [ "lexical-core", "multiversion", "num 0.4.0", - "rand 0.8.3", + "rand 0.8.5", "regex", "serde", "serde_derive", @@ -231,9 +231,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.22" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8df72488e87761e772f14ae0c2480396810e51b2c2ade912f97f0f7e5b95e3c" +checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" dependencies = [ "proc-macro2", "quote", @@ -299,6 +299,51 @@ dependencies = [ "uuid 0.8.2", ] +[[package]] +name = "axum" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acee9fd5073ab6b045a275b3e709c163dd36c90685219cb21804a147b58dba43" +dependencies = [ + "async-trait", + "axum-core", + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "hyper", + "itoa 1.0.1", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde", + "sync_wrapper", + "tokio", + "tower", + "tower-http", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e5939e02c56fecd5c017c37df4238c0a839fa76b7f97acdd7efb804fd181cc" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "mime", + "tower-layer", + "tower-service", +] + [[package]] name = "azure" version = "0.0.1" @@ -336,7 +381,7 @@ dependencies = [ "http", "log", "oauth2", - "rand 0.8.3", + "rand 0.8.5", "reqwest", "rustc_version 0.4.0", "serde", @@ -438,7 +483,7 @@ dependencies = [ "prometheus", "raft", "raftstore", - "rand 0.8.3", + "rand 0.8.5", "security", "serde", "serde_derive", @@ -481,6 +526,7 @@ dependencies = [ "futures-io", "grpcio", "hex 0.4.2", + "indexmap", "kvproto", "lazy_static", "log_wrappers", @@ -491,9 +537,10 @@ dependencies = [ "protobuf", "raft", "raftstore", - "rand 0.8.3", + "rand 0.8.5", "regex", "resolved_ts", + "security", "slog", "slog-global", "tempdir", @@ -508,7 +555,7 @@ dependencies = [ "tikv_util", "tokio", "tokio-stream", - "tokio-util 0.7.2", + "tokio-util", "tonic", "txn_types", "url", @@ -971,7 +1018,7 @@ dependencies = [ "libc 0.2.132", "panic_hook", "protobuf", - "rand 0.8.3", + "rand 0.8.5", "static_assertions", "thiserror", "tikv_alloc", @@ -995,7 +1042,7 @@ dependencies = [ "futures 0.3.15", "kvproto", "parking_lot 0.12.0", - "rand 0.8.3", + "rand 0.8.5", "tikv_alloc", "tikv_util", "tokio", @@ -1455,7 +1502,7 @@ dependencies = [ "openssl", "prometheus", "protobuf", - "rand 0.8.3", + "rand 0.8.5", "serde", "serde_derive", "slog", @@ -1525,7 +1572,7 @@ dependencies = [ "prometheus-static-metric", "protobuf", "raft", - "rand 0.8.3", + "rand 0.8.5", "regex", "rocksdb", "serde", @@ -1674,15 +1721,19 @@ dependencies = [ [[package]] name = "etcd-client" -version = "0.7.2" -source = "git+https://github.com/pingcap/etcd-client?rev=e0321a1990ee561cf042973666c0db61c8d82364#e0321a1990ee561cf042973666c0db61c8d82364" +version = "0.10.2" +source = "git+https://github.com/pingcap/etcd-client?rev=14a6f8731f1890d5fd2f6e16a9f0d0a306b0599e#14a6f8731f1890d5fd2f6e16a9f0d0a306b0599e" dependencies = [ "http", + "hyper", + "hyper-openssl", + "openssl", "prost", "tokio", "tokio-stream", "tonic", "tonic-build", + "tower", "tower-service", "visible", ] @@ -1724,7 +1775,7 @@ dependencies = [ "openssl", "prometheus", "protobuf", - "rand 0.8.3", + "rand 0.8.5", "rusoto_core", "rust-ini", "slog", @@ -1734,7 +1785,7 @@ dependencies = [ "tikv_alloc", "tikv_util", "tokio", - "tokio-util 0.7.2", + "tokio-util", "url", ] @@ -1775,7 +1826,7 @@ dependencies = [ "tempfile", "tikv_util", "tokio", - "tokio-util 0.7.2", + "tokio-util", "url", ] @@ -1787,7 +1838,7 @@ checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011" dependencies = [ "lazy_static", "log", - "rand 0.8.3", + "rand 0.8.5", ] [[package]] @@ -1823,7 +1874,7 @@ dependencies = [ "parking_lot 0.12.0", "prometheus", "prometheus-static-metric", - "rand 0.8.3", + "rand 0.8.5", "serde", "slog", "slog-global", @@ -1872,9 +1923,9 @@ dependencies = [ [[package]] name = "fixedbitset" -version = "0.2.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" @@ -2304,9 +2355,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.3" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "825343c4eef0b63f541f8903f395dc5beb362a979b5799a84062527ef1e37726" +checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" dependencies = [ "bytes", "fnv", @@ -2317,7 +2368,7 @@ dependencies = [ "indexmap", "slab", "tokio", - "tokio-util 0.6.6", + "tokio-util", "tracing", ] @@ -2401,31 +2452,37 @@ dependencies = [ [[package]] name = "http" -version = "0.2.4" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", - "itoa 0.4.4", + "itoa 1.0.1", ] [[package]] name = "http-body" -version = "0.4.2" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60daa14be0e0786db0f03a9e57cb404c9d756eed2b6c62b9ea98ec5743ec75a9" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", "http", "pin-project-lite", ] +[[package]] +name = "http-range-header" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" + [[package]] name = "httparse" -version = "1.4.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a87b616e37e93c22fb19bcd386f02f3af5ea98a25670ad0fce773de23c5e68" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" [[package]] name = "httpdate" @@ -2441,9 +2498,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.11" +version = "0.14.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b61cf2d1aebcf6e6352c97b81dc2244ca29194be1b276f5d8ad5c6330fffb11" +checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" dependencies = [ "bytes", "futures-channel", @@ -2454,7 +2511,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 0.4.4", + "itoa 1.0.1", "pin-project-lite", "socket2", "tokio", @@ -2943,6 +3000,12 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +[[package]] +name = "matchit" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73cbba799671b762df5a175adf59ce145165747bb891505c43d09aefbbf38beb" + [[package]] name = "md-5" version = "0.9.1" @@ -3016,9 +3079,9 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.14" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd1d63acd1b78403cc0c325605908475dd9b9a3acbf65ed8bcab97e27014afcf" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" [[package]] name = "minimal-lexical" @@ -3058,7 +3121,7 @@ dependencies = [ "kernel32-sys", "libc 0.2.132", "log", - "miow 0.2.2", + "miow", "net2", "slab", "winapi 0.2.8", @@ -3066,15 +3129,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.0" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba272f85fa0b41fc91872be579b3bbe0f56b792aa361a380eb669469f68dafb2" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc 0.2.132", "log", - "miow 0.3.7", - "ntapi", - "winapi 0.3.9", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.42.0", ] [[package]] @@ -3101,15 +3163,6 @@ dependencies = [ "ws2_32-sys", ] -[[package]] -name = "miow" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" -dependencies = [ - "winapi 0.3.9", -] - [[package]] name = "mmap" version = "0.1.1" @@ -3445,7 +3498,7 @@ dependencies = [ "chrono", "getrandom 0.2.3", "http", - "rand 0.8.3", + "rand 0.8.5", "reqwest", "serde", "serde_json", @@ -3466,9 +3519,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.10.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" +checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" [[package]] name = "online_config" @@ -3633,7 +3686,7 @@ dependencies = [ "libc 0.2.132", "redox_syscall 0.2.11", "smallvec", - "windows-sys", + "windows-sys 0.32.0", ] [[package]] @@ -3724,9 +3777,9 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" dependencies = [ "fixedbitset", "indexmap", @@ -3758,7 +3811,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082" dependencies = [ "phf_shared", - "rand 0.8.3", + "rand 0.8.5", ] [[package]] @@ -3792,9 +3845,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.6" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc0e1f259c92177c30a4c9d177246edd0a3568b25756a977d0632cf8fa37e905" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" [[package]] name = "pin-utils" @@ -3895,6 +3948,16 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +[[package]] +name = "prettyplease" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c142c0e46b57171fe0c528bee8c5b7569e80f0c17e377cd0e30ea57dbc11bb51" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -3933,11 +3996,11 @@ checksum = "369a6ed065f249a159e06c45752c780bda2fb53c995718f9e484d08daa9eb42e" [[package]] name = "proc-macro2" -version = "1.0.36" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] @@ -4006,9 +4069,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.8.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de5e2533f59d08fcf364fd374ebda0692a70bd6d7e66ef97f306f45c6c5d8020" +checksum = "a0841812012b2d4a6145fae9a6af1534873c32aa67fff26bd09f8fa42c83f95a" dependencies = [ "bytes", "prost-derive", @@ -4016,27 +4079,31 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.8.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603" +checksum = "1d8b442418ea0822409d9e7d047cbf1e7e9e1760b172bf9982cf29d517c93511" dependencies = [ "bytes", - "heck 0.3.1", + "heck 0.4.0", "itertools", + "lazy_static", "log", "multimap", "petgraph", + "prettyplease", "prost", "prost-types", + "regex", + "syn", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.8.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba" +checksum = "164ae68b6587001ca506d3bf7f1000bfa248d0e1217b618108fba4ec1d0cc306" dependencies = [ "anyhow", "itertools", @@ -4047,9 +4114,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.8.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "603bbd6394701d13f3f25aada59c7de9d35a6a5887cfc156181234a44002771b" +checksum = "747761bc3dc48f9a34553bf65605cf6cb6288ba219f3450b4275dbd81539551a" dependencies = [ "bytes", "prost", @@ -4125,7 +4192,7 @@ dependencies = [ "getset", "protobuf", "raft-proto", - "rand 0.8.3", + "rand 0.8.5", "slog", "thiserror", ] @@ -4254,7 +4321,7 @@ dependencies = [ "protobuf", "raft", "raft-proto", - "rand 0.8.3", + "rand 0.8.5", "resource_metering", "serde", "serde_derive", @@ -4339,19 +4406,18 @@ dependencies = [ "libc 0.2.132", "rand_chacha 0.2.1", "rand_core 0.5.1", - "rand_hc 0.2.0", + "rand_hc", ] [[package]] name = "rand" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc 0.2.132", "rand_chacha 0.3.0", "rand_core 0.6.2", - "rand_hc 0.3.0", ] [[package]] @@ -4416,15 +4482,6 @@ dependencies = [ "rand_core 0.5.1", ] -[[package]] -name = "rand_hc" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" -dependencies = [ - "rand_core 0.6.2", -] - [[package]] name = "rand_isaac" version = "0.3.0" @@ -4636,7 +4693,7 @@ dependencies = [ "pin-project", "procinfo", "prometheus", - "rand 0.8.3", + "rand 0.8.5", "serde", "serde_derive", "slog", @@ -4881,19 +4938,6 @@ dependencies = [ "semver 1.0.4", ] -[[package]] -name = "rustls" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" -dependencies = [ - "base64", - "log", - "ring", - "sct", - "webpki", -] - [[package]] name = "rustversion" version = "1.0.4" @@ -4937,16 +4981,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "sct" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3042af939fca8c3453b7af0f1c66e533a15a86169e39de2657310ade8f98d3c" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "seahash" version = "4.1.0" @@ -4965,7 +4999,6 @@ dependencies = [ "serde_json", "tempfile", "tikv_util", - "tonic", ] [[package]] @@ -5193,7 +5226,7 @@ dependencies = [ "raft", "raft_log_engine", "raftstore", - "rand 0.8.3", + "rand 0.8.5", "resolved_ts", "resource_metering", "security", @@ -5410,9 +5443,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.4.4" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" +checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" dependencies = [ "libc 0.2.132", "winapi 0.3.9", @@ -5600,13 +5633,13 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.86" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" +checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] @@ -5708,7 +5741,7 @@ checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if 1.0.0", "libc 0.2.132", - "rand 0.8.3", + "rand 0.8.5", "redox_syscall 0.2.11", "remove_dir_all", "winapi 0.3.9", @@ -5752,7 +5785,7 @@ dependencies = [ "grpcio", "kvproto", "protobuf", - "rand 0.8.3", + "rand 0.8.5", "tempfile", "test_raftstore", "tidb_query_common", @@ -5847,7 +5880,7 @@ dependencies = [ "protobuf", "raft", "raftstore", - "rand 0.8.3", + "rand 0.8.5", "resolved_ts", "resource_metering", "security", @@ -5903,7 +5936,7 @@ dependencies = [ "fail", "grpcio", "kvproto", - "rand 0.8.3", + "rand 0.8.5", "rand_isaac", "security", "slog", @@ -5960,7 +5993,7 @@ dependencies = [ "raft", "raft_log_engine", "raftstore", - "rand 0.8.3", + "rand 0.8.5", "rand_xorshift", "resource_metering", "security", @@ -6174,7 +6207,7 @@ dependencies = [ "panic_hook", "profiler", "protobuf", - "rand 0.8.3", + "rand 0.8.5", "regex", "safemem", "serde", @@ -6351,7 +6384,7 @@ dependencies = [ "raft-engine-ctl", "raft_log_engine", "raftstore", - "rand 0.8.3", + "rand 0.8.5", "regex", "security", "serde_json", @@ -6506,7 +6539,7 @@ dependencies = [ "prometheus", "prometheus-static-metric", "protobuf", - "rand 0.8.3", + "rand 0.8.5", "regex", "rusoto_core", "serde", @@ -6574,16 +6607,16 @@ dependencies = [ [[package]] name = "tokio" -version = "1.17.0" +version = "1.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" +checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" dependencies = [ + "autocfg", "bytes", "libc 0.2.132", "memchr", - "mio 0.8.0", + "mio 0.8.5", "num_cpus", - "once_cell", "parking_lot 0.12.0", "pin-project-lite", "signal-hook-registry", @@ -6645,22 +6678,11 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6" -dependencies = [ - "rustls", - "tokio", - "webpki", -] - [[package]] name = "tokio-stream" -version = "0.1.8" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" +checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" dependencies = [ "futures-core", "pin-project-lite", @@ -6678,20 +6700,6 @@ dependencies = [ "tokio-executor", ] -[[package]] -name = "tokio-util" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "log", - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-util" version = "0.7.2" @@ -6704,6 +6712,7 @@ dependencies = [ "futures-sink", "pin-project-lite", "tokio", + "tracing", ] [[package]] @@ -6717,12 +6726,13 @@ dependencies = [ [[package]] name = "tonic" -version = "0.5.2" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796c5e1cd49905e65dd8e700d4cb1dffcbfdb4fc9d017de08c1a537afd83627c" +checksum = "55b9af819e54b8f33d453655bef9b9acc171568fb49523078d0cc4e7484200ec" dependencies = [ "async-stream 0.3.3", "async-trait", + "axum", "base64", "bytes", "futures-core", @@ -6737,9 +6747,8 @@ dependencies = [ "prost", "prost-derive", "tokio", - "tokio-rustls", "tokio-stream", - "tokio-util 0.6.6", + "tokio-util", "tower", "tower-layer", "tower-service", @@ -6749,10 +6758,11 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.5.2" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12b52d07035516c2b74337d2ac7746075e7dcae7643816c1b12c5ff8a7484c08" +checksum = "48c6fd7c2581e36d63388a9e04c350c21beb7a8b059580b2e93993c526899ddc" dependencies = [ + "prettyplease", "proc-macro2", "prost-build", "quote", @@ -6761,24 +6771,43 @@ dependencies = [ [[package]] name = "tower" -version = "0.4.8" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60422bc7fefa2f3ec70359b8ff1caff59d785877eb70595904605bcc412470f" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", "indexmap", "pin-project", - "rand 0.8.3", + "pin-project-lite", + "rand 0.8.5", "slab", "tokio", - "tokio-stream", - "tokio-util 0.6.6", + "tokio-util", "tower-layer", "tower-service", "tracing", ] +[[package]] +name = "tower-http" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c530c8675c1dbf98facee631536fa116b5fb6382d7dd6dc1b118d970eafe3ba" +dependencies = [ + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-range-header", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.1" @@ -6787,9 +6816,9 @@ checksum = "343bc9466d3fe6b0f960ef45960509f84480bf4fd96f92901afe7ff3df9d3a62" [[package]] name = "tower-service" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" @@ -6883,7 +6912,7 @@ dependencies = [ "kvproto", "log_wrappers", "panic_hook", - "rand 0.8.3", + "rand 0.8.5", "slog", "thiserror", "tikv_alloc", @@ -6917,6 +6946,12 @@ dependencies = [ "matches", ] +[[package]] +name = "unicode-ident" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" + [[package]] name = "unicode-normalization" version = "0.1.12" @@ -6938,12 +6973,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" -[[package]] -name = "unicode-xid" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" - [[package]] name = "untrusted" version = "0.7.1" @@ -7063,6 +7092,12 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "wasm-bindgen" version = "0.2.79" @@ -7141,16 +7176,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab146130f5f790d45f82aeeb09e55a256573373ec64409fc19a6fb82fb1032ae" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "which" version = "4.2.4" @@ -7211,43 +7236,100 @@ version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.32.0", + "windows_i686_gnu 0.32.0", + "windows_i686_msvc 0.32.0", + "windows_x86_64_gnu 0.32.0", + "windows_x86_64_msvc 0.32.0", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + [[package]] name = "windows_aarch64_msvc" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" + [[package]] name = "windows_i686_gnu" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615" +[[package]] +name = "windows_i686_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" + [[package]] name = "windows_i686_msvc" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172" +[[package]] +name = "windows_i686_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" + [[package]] name = "windows_x86_64_gnu" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" + [[package]] name = "windows_x86_64_msvc" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" + [[package]] name = "winreg" version = "0.7.0" @@ -7306,7 +7388,7 @@ dependencies = [ "num_cpus", "parking_lot_core 0.9.1", "prometheus", - "rand 0.8.3", + "rand 0.8.5", ] [[package]] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 0f3b97461bb..e5863f44c4d 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -32,7 +32,7 @@ engine_traits = { workspace = true } error_code = { workspace = true } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "e0321a1990ee561cf042973666c0db61c8d82364", features = ["pub-response-field", "tls"] } +etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "14a6f8731f1890d5fd2f6e16a9f0d0a306b0599e", features = ["pub-response-field", "tls-openssl-vendored"] } external_storage = { workspace = true } external_storage_export = { workspace = true } fail = "0.5" @@ -42,6 +42,9 @@ futures-io = "0.3" grpcio = { workspace = true } hex = "0.4" + +# Fixing ahash cyclic dep: https://github.com/tkaitchuck/ahash/issues/95 +indexmap = "=1.6.2" kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.4" log_wrappers = { workspace = true } @@ -54,6 +57,7 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code raftstore = { workspace = true } regex = "1" resolved_ts = { workspace = true } +security = { path = "../security" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } thiserror = "1" @@ -65,7 +69,7 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } -tonic = "0.5" +tonic = "0.8" txn_types = { workspace = true } uuid = "0.8" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index e9f930e8563..f34211ef7a5 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -329,7 +329,7 @@ pub trait FlushObserver: Send + 'static { /// Note the new resolved ts cannot be greater than the old resolved ts. async fn rewrite_resolved_ts( &mut self, - #[allow(unused_variables)] task: &str, + #[allow(unused_variables)] _task: &str, ) -> Option { None } diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 8cd6b87ec71..6fc3a5332ea 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -2,9 +2,9 @@ use std::{sync::Arc, time::Duration}; -use etcd_client::{ConnectOptions, Error as EtcdError, TlsOptions}; +use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; use futures::Future; -use tikv_util::stream::RetryError; +use tikv_util::stream::{RetryError, RetryExt}; use tokio::sync::OnceCell; use super::{etcd::EtcdSnapshot, EtcdStore, MetaStore}; @@ -15,8 +15,9 @@ const RPC_TIMEOUT: Duration = Duration::from_secs(30); #[derive(Clone)] pub struct LazyEtcdClient(Arc); +#[derive(Debug)] pub struct ConnectionConfig { - pub tls: Option, + pub tls: Option, pub keep_alive_interval: Duration, pub keep_alive_timeout: Duration, } @@ -26,12 +27,16 @@ impl ConnectionConfig { fn to_connection_options(&self) -> ConnectOptions { let mut opts = ConnectOptions::new(); if let Some(tls) = &self.tls { - opts = opts.with_tls(tls.clone()) + opts = opts.with_openssl_tls( + OpenSslClientConfig::default() + .ca_cert_pem(&tls.ca) + .client_cert_pem_and_key(&tls.client_cert, &tls.client_key.0), + ) } opts = opts .with_keep_alive(self.keep_alive_interval, self.keep_alive_timeout) - .with_timeout(RPC_TIMEOUT) - .keep_alive_while_idle(false); + .with_keep_alive_while_idle(false) + .with_timeout(RPC_TIMEOUT); opts } @@ -68,7 +73,9 @@ fn etcd_error_is_retryable(etcd_err: &EtcdError) -> bool { EtcdError::InvalidArgs(_) | EtcdError::InvalidUri(_) | EtcdError::Utf8Error(_) - | EtcdError::InvalidHeaderValue(_) => false, + | EtcdError::InvalidHeaderValue(_) + | EtcdError::EndpointError(_) + | EtcdError::OpenSsl(_) => false, EtcdError::TransportError(_) | EtcdError::IoError(_) | EtcdError::WatchError(_) @@ -84,6 +91,7 @@ fn etcd_error_is_retryable(etcd_err: &EtcdError) -> bool { } } +#[derive(Debug)] struct RetryableEtcdError(EtcdError); impl RetryError for RetryableEtcdError { @@ -103,7 +111,11 @@ where F: Future>, { use futures::TryFutureExt; - let r = tikv_util::stream::retry(move || action().err_into::()).await; + let r = tikv_util::stream::retry_ext( + move || action().err_into::(), + RetryExt::default().with_fail_hook(|err| println!("meet error {:?}", err)), + ) + .await; r.map_err(|err| err.0.into()) } diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 964048121d6..e539c67f571 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -38,7 +38,7 @@ tikv_util = { workspace = true } # better to not use slog-global, but pass in the logger tokio = { version = "1.5", features = ["time"] } url = "2.0" -uuid = "0.8" +uuid = { version = "0.8", features = ["v4"] } [dev-dependencies] futures = "0.3" diff --git a/components/security/Cargo.toml b/components/security/Cargo.toml index 4599b1df43e..a9cdd620d12 100644 --- a/components/security/Cargo.toml +++ b/components/security/Cargo.toml @@ -4,9 +4,6 @@ version = "0.0.1" edition = "2018" publish = false -[features] -tonic = ["dep:tonic"] - [dependencies] collections = { workspace = true } encryption = { workspace = true } @@ -15,7 +12,6 @@ serde = "1.0" serde_derive = "1.0" serde_json = "1.0" tikv_util = { workspace = true } -tonic = { version = "0.5", features = ["tls"], optional = true } [dev-dependencies] tempfile = "3.0" diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index cc87469426c..52f438236fd 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -18,8 +18,6 @@ use grpcio::{ RpcContext, RpcStatus, RpcStatusCode, ServerBuilder, ServerChecker, ServerCredentialsBuilder, ServerCredentialsFetcher, }; -#[cfg(feature = "tonic")] -use tonic::transport::{channel::ClientTlsConfig, Certificate, Identity}; #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)] #[serde(default)] @@ -70,6 +68,23 @@ fn load_key(tag: &str, path: &str) -> Result, Box> { type CertResult = Result<(Vec, Vec, Vec), Box>; +type Pem = Box<[u8]>; + +pub struct Secret(pub Pem); + +impl std::fmt::Debug for Secret { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Secret").finish() + } +} + +#[derive(Debug)] +pub struct ClientSuite { + pub ca: Pem, + pub client_cert: Pem, + pub client_key: Secret, +} + impl SecurityConfig { /// Validates ca, cert and private key. pub fn validate(&self) -> Result<(), Box> { @@ -124,21 +139,13 @@ impl SecurityManager { }) } - #[cfg(feature = "tonic")] - /// Make a tonic tls config via the config. - pub fn tonic_tls_config(&self) -> Option { - let (ca, cert, key) = self.cfg.load_certs().unwrap_or_default(); - if ca.is_empty() && cert.is_empty() && key.is_empty() { - return None; - } - let mut cfg = ClientTlsConfig::new(); - if !ca.is_empty() { - cfg = cfg.ca_certificate(Certificate::from_pem(ca)); - } - if !cert.is_empty() && !key.is_empty() { - cfg = cfg.identity(Identity::from_pem(cert, key)); - } - Some(cfg) + pub fn client_suite(&self) -> Result> { + let (ca, cert, key) = self.cfg.load_certs()?; + Ok(ClientSuite { + ca: ca.into_boxed_slice(), + client_cert: cert.into_boxed_slice(), + client_key: Secret(key.into_boxed_slice()), + }) } pub fn connect(&self, mut cb: ChannelBuilder, addr: &str) -> Channel { diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 1f4d98b2847..7a40340b64e 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -69,7 +69,7 @@ raftstore = { workspace = true, features = ["engine_rocks"] } rand = "0.8" resolved_ts = { workspace = true } resource_metering = { workspace = true } -security = { workspace = true, features = ["tonic"] } +security = { workspace = true } serde_json = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 3ce38d0c79e..b52abc960d8 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -990,7 +990,13 @@ where ConnectionConfig { keep_alive_interval: self.config.server.grpc_keepalive_time.0, keep_alive_timeout: self.config.server.grpc_keepalive_timeout.0, - tls: self.security_mgr.tonic_tls_config(), + tls: self + .security_mgr + .client_suite() + .map_err(|err| { + warn!("Failed to load client TLS suite, ignoring TLS config."; "err" => %err); + }) + .ok(), }, ); let backup_stream_endpoint = backup_stream::Endpoint::new( From 6bccbf89dd579ddd7df79f10b77441efb4e39bab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 1 Dec 2022 16:50:01 +0800 Subject: [PATCH 0378/1149] import: cache storage when possible (#13783) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#13798 - Make the import process asynchronous. - Added caching if client requires. Signed-off-by: hillium Signed-off-by: Yu Juncen Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Signed-off-by: hillium --- Cargo.lock | 9 +- components/backup-stream/src/router.rs | 18 +- components/cloud/aws/src/s3.rs | 6 +- components/cloud/azure/src/azblob.rs | 6 +- components/cloud/gcp/src/gcs.rs | 8 +- components/cloud/src/blob.rs | 10 +- .../external_storage/export/src/dylib.rs | 2 +- .../external_storage/export/src/export.rs | 41 ++-- .../external_storage/src/dylib_client.rs | 2 +- .../external_storage/src/grpc_client.rs | 2 +- components/external_storage/src/hdfs.rs | 11 +- components/external_storage/src/lib.rs | 91 ++++++-- components/external_storage/src/local.rs | 6 +- components/external_storage/src/noop.rs | 9 +- components/sst_importer/Cargo.toml | 1 + .../sst_importer/src/caching/cache_map.rs | 211 ++++++++++++++++++ components/sst_importer/src/caching/mod.rs | 4 + .../sst_importer/src/caching/storage_cache.rs | 58 +++++ components/sst_importer/src/import_mode.rs | 27 ++- components/sst_importer/src/lib.rs | 1 + components/sst_importer/src/metrics.rs | 5 + components/sst_importer/src/sst_importer.rs | 189 ++++++++++++---- components/sst_importer/src/util.rs | 8 + src/import/sst_service.rs | 75 ++++--- 24 files changed, 604 insertions(+), 196 deletions(-) create mode 100644 components/sst_importer/src/caching/cache_map.rs create mode 100644 components/sst_importer/src/caching/mod.rs create mode 100644 components/sst_importer/src/caching/storage_cache.rs diff --git a/Cargo.lock b/Cargo.lock index a553d16f822..2b237c8c25f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1373,9 +1373,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.2.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8858831f7781322e539ea39e72449c46b059638250c14344fec8d0aa6e539c" +checksum = "c0834a35a3fce649144119e18da2a4d8ed12ef3862f47183fd46f625d072d96c" dependencies = [ "cfg-if 1.0.0", "num_cpus", @@ -5368,9 +5368,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.9.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "smartstring" @@ -5479,6 +5479,7 @@ dependencies = [ "log_wrappers", "openssl", "prometheus", + "rand 0.8.3", "serde", "serde_derive", "slog", diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 56bd00bba87..ead124c103a 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -1506,11 +1506,10 @@ struct TaskRange { #[cfg(test)] mod tests { - use std::{ffi::OsStr, marker::Unpin, time::Duration}; + use std::{ffi::OsStr, time::Duration}; - use external_storage::NoopStorage; + use external_storage::{ExternalData, NoopStorage}; use futures::AsyncReadExt; - use futures_io::AsyncRead; use kvproto::brpb::{Local, Noop, StorageBackend, StreamBackupTaskInfo}; use tikv_util::{ codec::number::NumberEncoder, @@ -1929,16 +1928,11 @@ mod tests { self.inner.write(name, reader, content_length).await } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> ExternalData<'_> { self.inner.read(name) } - fn read_part( - &self, - name: &str, - off: u64, - len: u64, - ) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> ExternalData<'_> { self.inner.read_part(name, off, len) } } @@ -2277,11 +2271,11 @@ mod tests { } } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> external_storage::ExternalData<'_> { self.s.read(name) } - fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> external_storage::ExternalData<'_> { self.s.read_part(name, off, len) } } diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 469cac97d6c..a7ea47ec9d2 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -222,7 +222,7 @@ impl S3Storage { key.to_owned() } - fn get_range(&self, name: &str, range: Option) -> Box { + fn get_range(&self, name: &str, range: Option) -> cloud::blob::BlobStream<'_> { let key = self.maybe_prefix_key(name); let bucket = self.config.bucket.bucket.clone(); debug!("read file from s3 storage"; "key" => %key); @@ -595,11 +595,11 @@ impl BlobStorage for S3Storage { }) } - fn get(&self, name: &str) -> Box { + fn get(&self, name: &str) -> cloud::blob::BlobStream<'_> { self.get_range(name, None) } - fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + fn get_part(&self, name: &str, off: u64, len: u64) -> cloud::blob::BlobStream<'_> { // inclusive, bytes=0-499 -> [0, 499] self.get_range(name, Some(format!("bytes={}-{}", off, off + len - 1))) } diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 5bf02696de7..12b6149fad5 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -558,7 +558,7 @@ impl AzureStorage { &self, name: &str, range: Option>, - ) -> Box { + ) -> cloud::blob::BlobStream<'_> { let name = self.maybe_prefix_key(name); debug!("read file from Azure storage"; "key" => %name); let t = async move { @@ -602,11 +602,11 @@ impl BlobStorage for AzureStorage { uploader.run(&mut reader, content_length).await } - fn get(&self, name: &str) -> Box { + fn get(&self, name: &str) -> cloud::blob::BlobStream<'_> { self.get_range(name, None) } - fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + fn get_part(&self, name: &str, off: u64, len: u64) -> cloud::blob::BlobStream<'_> { self.get_range(name, Some(off..off + len)) } } diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index 01f69a6d245..61e432c9431 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -347,14 +347,14 @@ impl GcsStorage { Ok(res) } - fn error_to_async_read(kind: io::ErrorKind, e: E) -> Box + fn error_to_async_read(kind: io::ErrorKind, e: E) -> cloud::blob::BlobStream<'static> where E: Into>, { Box::new(error_stream(io::Error::new(kind, e)).into_async_read()) } - fn get_range(&self, name: &str, range: Option) -> Box { + fn get_range(&self, name: &str, range: Option) -> cloud::blob::BlobStream<'_> { let bucket = self.config.bucket.bucket.to_string(); let name = self.maybe_prefix_key(name); debug!("read file from GCS storage"; "key" => %name); @@ -513,11 +513,11 @@ impl BlobStorage for GcsStorage { Ok::<_, io::Error>(()) } - fn get(&self, name: &str) -> Box { + fn get(&self, name: &str) -> cloud::blob::BlobStream<'_> { self.get_range(name, None) } - fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + fn get_part(&self, name: &str, off: u64, len: u64) -> cloud::blob::BlobStream<'_> { // inclusive, bytes=0-499 -> [0, 499] self.get_range(name, Some(format!("bytes={}-{}", off, off + len - 1))) } diff --git a/components/cloud/src/blob.rs b/components/cloud/src/blob.rs index d80d3a47a28..84ca77042d7 100644 --- a/components/cloud/src/blob.rs +++ b/components/cloud/src/blob.rs @@ -19,6 +19,8 @@ pub trait BlobConfig: 'static + Send + Sync { /// wrappers exists. pub struct PutResource(pub Box); +pub type BlobStream<'a> = Box; + impl AsyncRead for PutResource { fn poll_read( self: Pin<&mut Self>, @@ -45,10 +47,10 @@ pub trait BlobStorage: 'static + Send + Sync { async fn put(&self, name: &str, reader: PutResource, content_length: u64) -> io::Result<()>; /// Read all contents of the given path. - fn get(&self, name: &str) -> Box; + fn get(&self, name: &str) -> BlobStream<'_>; /// Read part of contents of the given path. - fn get_part(&self, name: &str, off: u64, len: u64) -> Box; + fn get_part(&self, name: &str, off: u64, len: u64) -> BlobStream<'_>; } impl BlobConfig for dyn BlobStorage { @@ -72,11 +74,11 @@ impl BlobStorage for Box { fut.await } - fn get(&self, name: &str) -> Box { + fn get(&self, name: &str) -> BlobStream<'_> { (**self).get(name) } - fn get_part(&self, name: &str, off: u64, len: u64) -> Box { + fn get_part(&self, name: &str, off: u64, len: u64) -> BlobStream<'_> { (**self).get_part(name, off, len) } } diff --git a/components/external_storage/export/src/dylib.rs b/components/external_storage/export/src/dylib.rs index a02f5f2fade..308973de95e 100644 --- a/components/external_storage/export/src/dylib.rs +++ b/components/external_storage/export/src/dylib.rs @@ -188,7 +188,7 @@ pub mod staticlib { .map_err(anyhow_to_io_log_error) } - fn read(&self, _name: &str) -> Box { + fn read(&self, _name: &str) -> crate::ExternalData<'_> { unimplemented!("use restore instead of read") } diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/export/src/export.rs index 10363bf92b2..ad31dc363ae 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/export/src/export.rs @@ -3,11 +3,7 @@ //! To use External storage with protobufs as an application, import this //! module. external_storage contains the actual library code //! Cloud provider backends are under components/cloud -use std::{ - io::{self, Write}, - path::Path, - sync::Arc, -}; +use std::{io, path::Path, sync::Arc}; use async_trait::async_trait; #[cfg(feature = "cloud-aws")] @@ -24,22 +20,19 @@ use external_storage::dylib_client; use external_storage::grpc_client; pub use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_info_buff, - read_external_storage_into_file, record_storage_create, BackendConfig, ExternalStorage, - HdfsStorage, LocalStorage, NoopStorage, RestoreConfig, UnpinReader, MIN_READ_SPEED, + read_external_storage_into_file, record_storage_create, BackendConfig, ExternalData, + ExternalStorage, HdfsStorage, LocalStorage, NoopStorage, RestoreConfig, UnpinReader, + MIN_READ_SPEED, }; -use futures_io::AsyncRead; #[cfg(feature = "cloud-gcp")] pub use gcp::{Config as GcsConfig, GcsStorage}; pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; #[cfg(any(feature = "cloud-gcp", feature = "cloud-aws", feature = "cloud-azure"))] use kvproto::brpb::{AzureBlobStorage, Gcs, S3}; use kvproto::brpb::{CloudDynamic, Noop, StorageBackend}; +use tikv_util::time::{Instant, Limiter}; #[cfg(feature = "cloud-storage-dylib")] use tikv_util::warn; -use tikv_util::{ - stream::block_on_external_io, - time::{Instant, Limiter}, -}; #[cfg(feature = "cloud-storage-dylib")] use crate::dylib; @@ -307,13 +300,13 @@ impl std::ops::Deref for BlobStore { } } -pub struct EncryptedExternalStorage { +pub struct EncryptedExternalStorage { pub key_manager: Arc, - pub storage: Box, + pub storage: S, } #[async_trait] -impl ExternalStorage for EncryptedExternalStorage { +impl ExternalStorage for EncryptedExternalStorage { fn name(&self) -> &'static str { self.storage.name() } @@ -323,13 +316,13 @@ impl ExternalStorage for EncryptedExternalStorage { async fn write(&self, name: &str, reader: UnpinReader, content_length: u64) -> io::Result<()> { self.storage.write(name, reader, content_length).await } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> ExternalData<'_> { self.storage.read(name) } - fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> ExternalData<'_> { self.storage.read_part(name, off, len) } - fn restore( + async fn restore( &self, storage_name: &str, restore_name: std::path::PathBuf, @@ -353,19 +346,19 @@ impl ExternalStorage for EncryptedExternalStorage { compression_reader_dispatcher(compression_type, inner)? }; - let file_writer: &mut dyn Write = - &mut self.key_manager.create_file_for_write(restore_name)?; + let file_writer = self.key_manager.create_file_for_write(&restore_name)?; let min_read_speed: usize = 8192; let mut input = encrypt_wrap_reader(file_crypter, reader)?; - block_on_external_io(read_external_storage_into_file( + read_external_storage_into_file( &mut input, file_writer, speed_limiter, expected_length, expected_sha256, min_read_speed, - )) + ) + .await } } @@ -383,11 +376,11 @@ impl ExternalStorage for BlobStore { .await } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> ExternalData<'_> { (**self).get(name) } - fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> ExternalData<'_> { (**self).get_part(name, off, len) } } diff --git a/components/external_storage/src/dylib_client.rs b/components/external_storage/src/dylib_client.rs index 6d6dc35cf8a..9e2748c2011 100644 --- a/components/external_storage/src/dylib_client.rs +++ b/components/external_storage/src/dylib_client.rs @@ -92,7 +92,7 @@ impl ExternalStorage for ExternalStorageClient { .map_err(anyhow_to_io_log_error) } - fn read(&self, _name: &str) -> Box { + fn read(&self, _name: &str) -> crate::ExternalData<'_> { unimplemented!("use restore instead of read") } diff --git a/components/external_storage/src/grpc_client.rs b/components/external_storage/src/grpc_client.rs index 3d715dfcd47..e836d8fb58a 100644 --- a/components/external_storage/src/grpc_client.rs +++ b/components/external_storage/src/grpc_client.rs @@ -95,7 +95,7 @@ impl ExternalStorage for ExternalStorageClient { .map_err(anyhow_to_io_log_error) } - fn read(&self, _name: &str) -> Box { + fn read(&self, _name: &str) -> crate::ExternalData<'_> { unimplemented!("use restore instead of read") } diff --git a/components/external_storage/src/hdfs.rs b/components/external_storage/src/hdfs.rs index a9fa65dcdcf..17556490320 100644 --- a/components/external_storage/src/hdfs.rs +++ b/components/external_storage/src/hdfs.rs @@ -7,7 +7,7 @@ use tokio::{io as async_io, process::Command}; use tokio_util::compat::FuturesAsyncReadCompatExt; use url::Url; -use crate::{ExternalStorage, UnpinReader}; +use crate::{ExternalData, ExternalStorage, UnpinReader}; /// Convert `hdfs:///path` to `/path` fn try_convert_to_path(url: &Url) -> &str { @@ -131,16 +131,11 @@ impl ExternalStorage for HdfsStorage { } } - fn read(&self, _name: &str) -> Box { + fn read(&self, _name: &str) -> ExternalData<'_> { unimplemented!("currently only HDFS export is implemented") } - fn read_part( - &self, - _name: &str, - _off: u64, - _len: u64, - ) -> Box { + fn read_part(&self, _name: &str, _off: u64, _len: u64) -> ExternalData<'_> { unimplemented!("currently only HDFS export is implemented") } } diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index e1c57608197..c344f09968b 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -26,7 +26,7 @@ use futures_util::AsyncReadExt; use kvproto::brpb::CompressionType; use openssl::hash::{Hasher, MessageDigest}; use tikv_util::{ - stream::{block_on_external_io, READ_BUF_SIZE}, + stream::READ_BUF_SIZE, time::{Instant, Limiter}, }; use tokio::time::timeout; @@ -58,6 +58,8 @@ pub fn record_storage_create(start: Instant, storage: &dyn ExternalStorage) { /// signature of write.) see https://github.com/rust-lang/rust/issues/63033 pub struct UnpinReader(pub Box); +pub type ExternalData<'a> = Box; + #[derive(Debug, Default)] pub struct BackendConfig { pub s3_multi_part_size: usize, @@ -73,10 +75,10 @@ pub struct RestoreConfig { } /// a reader dispatcher for different compression type. -pub fn compression_reader_dispatcher<'a>( +pub fn compression_reader_dispatcher( compression_type: Option, - inner: Box, -) -> io::Result> { + inner: ExternalData<'_>, +) -> io::Result> { match compression_type { Some(c) => match c { // The log files generated from TiKV v6.2.0 use the default value (0). @@ -107,13 +109,13 @@ pub trait ExternalStorage: 'static + Send + Sync { async fn write(&self, name: &str, reader: UnpinReader, content_length: u64) -> io::Result<()>; /// Read all contents of the given path. - fn read(&self, name: &str) -> Box; + fn read(&self, name: &str) -> ExternalData<'_>; /// Read part of contents of the given path. - fn read_part(&self, name: &str, off: u64, len: u64) -> Box; + fn read_part(&self, name: &str, off: u64, len: u64) -> ExternalData<'_>; /// Read from external storage and restore to the given path - fn restore( + async fn restore( &self, storage_name: &str, restore_name: std::path::PathBuf, @@ -137,22 +139,23 @@ pub trait ExternalStorage: 'static + Send + Sync { compression_reader_dispatcher(compression_type, inner)? }; - let output: &mut dyn Write = &mut File::create(restore_name)?; + let output = File::create(restore_name)?; // the minimum speed of reading data, in bytes/second. // if reading speed is slower than this rate, we will stop with // a "TimedOut" error. // (at 8 KB/s for a 2 MB buffer, this means we timeout after 4m16s.) let min_read_speed: usize = 8192; - let mut input = encrypt_wrap_reader(file_crypter, reader)?; + let input = encrypt_wrap_reader(file_crypter, reader)?; - block_on_external_io(read_external_storage_into_file( - &mut input, + read_external_storage_into_file( + input, output, speed_limiter, expected_length, expected_sha256, min_read_speed, - )) + ) + .await } } @@ -170,13 +173,32 @@ impl ExternalStorage for Arc { (**self).write(name, reader, content_length).await } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> ExternalData<'_> { (**self).read(name) } - fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> ExternalData<'_> { (**self).read_part(name, off, len) } + + async fn restore( + &self, + storage_name: &str, + restore_name: std::path::PathBuf, + expected_length: u64, + speed_limiter: &Limiter, + restore_config: RestoreConfig, + ) -> io::Result<()> { + self.as_ref() + .restore( + storage_name, + restore_name, + expected_length, + speed_limiter, + restore_config, + ) + .await + } } #[async_trait] @@ -193,21 +215,40 @@ impl ExternalStorage for Box { self.as_ref().write(name, reader, content_length).await } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> ExternalData<'_> { self.as_ref().read(name) } - fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> ExternalData<'_> { self.as_ref().read_part(name, off, len) } + + async fn restore( + &self, + storage_name: &str, + restore_name: std::path::PathBuf, + expected_length: u64, + speed_limiter: &Limiter, + restore_config: RestoreConfig, + ) -> io::Result<()> { + self.as_ref() + .restore( + storage_name, + restore_name, + expected_length, + speed_limiter, + restore_config, + ) + .await + } } /// Wrap the reader with file_crypter. /// Return the reader directly if file_crypter is None. -pub fn encrypt_wrap_reader<'a>( +pub fn encrypt_wrap_reader( file_crypter: Option, - reader: Box, -) -> io::Result> { + reader: ExternalData<'_>, +) -> io::Result> { let input = match file_crypter { Some(x) => Box::new(DecrypterReader::new( reader, @@ -221,14 +262,18 @@ pub fn encrypt_wrap_reader<'a>( Ok(input) } -pub async fn read_external_storage_into_file( - input: &mut (dyn AsyncRead + Unpin), - output: &mut dyn Write, +pub async fn read_external_storage_into_file( + mut input: In, + mut output: Out, speed_limiter: &Limiter, expected_length: u64, expected_sha256: Option>, min_read_speed: usize, -) -> io::Result<()> { +) -> io::Result<()> +where + In: AsyncRead + Unpin, + Out: Write, +{ let dur = Duration::from_secs((READ_BUF_SIZE / min_read_speed) as u64); // do the I/O copy from external_storage to the local file. diff --git a/components/external_storage/src/local.rs b/components/external_storage/src/local.rs index 4b22de96a6a..0bf6be65107 100644 --- a/components/external_storage/src/local.rs +++ b/components/external_storage/src/local.rs @@ -3,14 +3,12 @@ use std::{ fs::File as StdFile, io::{self, BufReader, Read, Seek}, - marker::Unpin, path::{Path, PathBuf}, sync::Arc, }; use async_trait::async_trait; use futures::io::AllowStdIo; -use futures_io::AsyncRead; use futures_util::stream::TryStreamExt; use rand::Rng; use tikv_util::stream::error_stream; @@ -119,7 +117,7 @@ impl ExternalStorage for LocalStorage { self.base_dir.sync_all().await } - fn read(&self, name: &str) -> Box { + fn read(&self, name: &str) -> crate::ExternalData<'_> { debug!("read file from local storage"; "name" => %name, "base" => %self.base.display()); // We used std i/o here for removing the requirement of tokio reactor when @@ -131,7 +129,7 @@ impl ExternalStorage for LocalStorage { } } - fn read_part(&self, name: &str, off: u64, len: u64) -> Box { + fn read_part(&self, name: &str, off: u64, len: u64) -> crate::ExternalData<'_> { debug!("read part of file from local storage"; "name" => %name, "off" => %off, "len" => %len, "base" => %self.base.display()); diff --git a/components/external_storage/src/noop.rs b/components/external_storage/src/noop.rs index 42746742624..50e9c43c7bc 100644 --- a/components/external_storage/src/noop.rs +++ b/components/external_storage/src/noop.rs @@ -1,14 +1,11 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::marker::Unpin; - use async_trait::async_trait; -use futures_io::AsyncRead; use tokio::io; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use super::ExternalStorage; -use crate::UnpinReader; +use crate::{ExternalData, UnpinReader}; /// A storage saves files into void. /// It is mainly for test use. @@ -44,11 +41,11 @@ impl ExternalStorage for NoopStorage { Ok(()) } - fn read(&self, _name: &str) -> Box { + fn read(&self, _name: &str) -> ExternalData<'_> { Box::new(io::empty().compat()) } - fn read_part(&self, _name: &str, _off: u64, _len: u64) -> Box { + fn read_part(&self, _name: &str, _off: u64, _len: u64) -> ExternalData<'_> { Box::new(io::empty().compat()) } } diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index 6b5fbd9127f..0bba773418b 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -31,6 +31,7 @@ lazy_static = "1.3" log_wrappers = { workspace = true } openssl = "0.10" prometheus = { version = "0.13", default-features = false } +rand = "0.8" serde = "1.0" serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } diff --git a/components/sst_importer/src/caching/cache_map.rs b/components/sst_importer/src/caching/cache_map.rs new file mode 100644 index 00000000000..e88e5c3545d --- /dev/null +++ b/components/sst_importer/src/caching/cache_map.rs @@ -0,0 +1,211 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use dashmap::{mapref::entry::Entry, DashMap}; +use futures::Future; + +use crate::metrics::EXT_STORAGE_CACHE_COUNT; + +#[derive(Clone, Default)] +pub struct CacheMap(Arc>); + +impl CacheMap { + #[cfg(test)] + pub fn with_inner(inner: CacheMapInner) -> Self { + Self(Arc::new(inner)) + } +} + +pub trait ShareOwned { + type Shared: 'static; + + fn share_owned(&self) -> Self::Shared; +} + +impl ShareOwned for T { + type Shared = T; + + fn share_owned(&self) -> Self::Shared { + *self + } +} + +pub trait MakeCache: 'static { + type Cached: std::fmt::Debug + ShareOwned + Send + Sync + 'static; + type Error; + + fn make_cache(&self) -> std::result::Result; +} + +#[derive(Debug)] +pub struct CacheMapInner { + cached: DashMap>, + now: AtomicUsize, + + gc_threshold: usize, +} + +impl Default for CacheMapInner { + fn default() -> Self { + Self { + cached: DashMap::default(), + now: Default::default(), + gc_threshold: 20, + } + } +} + +impl CacheMapInner { + #[cfg(test)] + pub fn with_gc_threshold(n: usize) -> Self { + Self { + gc_threshold: n, + ..Self::default() + } + } +} + +#[derive(Debug)] +struct Cached { + resource: R, + last_used: usize, +} + +impl Cached { + fn new(resource: R) -> Self { + Self { + resource, + last_used: 0, + } + } + + fn resource_owned(&mut self, now: usize) -> ::Shared { + self.last_used = now; + self.resource.share_owned() + } +} + +impl CacheMapInner { + fn now(&self) -> usize { + self.now.load(Ordering::SeqCst) + } + + fn tick(&self) { + let now = self.now.fetch_add(1usize, Ordering::SeqCst); + self.cached.retain(|name, cache| { + let need_hold = now.saturating_sub(cache.last_used) < self.gc_threshold; + if !need_hold { + info!("Removing cache due to expired."; "name" => %name, "entry" => ?cache); + } + need_hold + }); + } +} + +impl CacheMap { + pub fn gc_loop(&self) -> impl Future + Send + 'static { + let this = Arc::downgrade(&self.0); + async move { + loop { + tokio::time::sleep(Duration::from_secs(30)).await; + match this.upgrade() { + Some(inner) => inner.tick(), + None => return, + } + } + } + } + + pub fn cached_or_create( + &self, + cache_key: &str, + backend: &M, + ) -> std::result::Result<::Shared, M::Error> { + let s = self.0.cached.get_mut(cache_key); + match s { + Some(mut s) => { + EXT_STORAGE_CACHE_COUNT.with_label_values(&["hit"]).inc(); + Ok(s.value_mut().resource_owned(self.0.now())) + } + None => { + drop(s); + let e = self.0.cached.entry(cache_key.to_owned()); + match e { + Entry::Occupied(mut v) => { + EXT_STORAGE_CACHE_COUNT.with_label_values(&["hit"]).inc(); + Ok(v.get_mut().resource_owned(self.0.now())) + } + Entry::Vacant(v) => { + EXT_STORAGE_CACHE_COUNT.with_label_values(&["miss"]).inc(); + let pool = backend.make_cache()?; + info!("Insert storage cache."; "name" => %cache_key, "cached" => ?pool); + let shared = pool.share_owned(); + v.insert(Cached::new(pool)); + Ok(shared) + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::{ + convert::Infallible, + sync::atomic::{AtomicBool, Ordering}, + }; + + use super::{CacheMap, CacheMapInner, MakeCache}; + + #[derive(Default)] + struct CacheChecker(AtomicBool); + + impl MakeCache for CacheChecker { + type Cached = (); + type Error = Infallible; + + fn make_cache(&self) -> std::result::Result { + self.0.store(true, Ordering::SeqCst); + Ok(()) + } + } + + impl CacheChecker { + fn made_cache(&self) -> bool { + self.0.load(Ordering::SeqCst) + } + } + + #[test] + fn test_basic() { + let cached = CacheMapInner::with_gc_threshold(1); + let cached = CacheMap::with_inner(cached); + + let check_cache = |key, should_make_cache: bool| { + let c = CacheChecker::default(); + cached.cached_or_create(key, &c).unwrap(); + assert_eq!(c.made_cache(), should_make_cache); + }; + + check_cache("hello", true); + check_cache("hello", false); + check_cache("world", true); + + cached.0.tick(); + check_cache("hello", false); + + cached.0.tick(); + check_cache("world", true); + + cached.0.tick(); + check_cache("hello", true); + } +} diff --git a/components/sst_importer/src/caching/mod.rs b/components/sst_importer/src/caching/mod.rs new file mode 100644 index 00000000000..9e55717c601 --- /dev/null +++ b/components/sst_importer/src/caching/mod.rs @@ -0,0 +1,4 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +pub mod cache_map; +pub mod storage_cache; diff --git a/components/sst_importer/src/caching/storage_cache.rs b/components/sst_importer/src/caching/storage_cache.rs new file mode 100644 index 00000000000..23732545b92 --- /dev/null +++ b/components/sst_importer/src/caching/storage_cache.rs @@ -0,0 +1,58 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Arc; + +use external_storage_export::ExternalStorage; +use kvproto::brpb::StorageBackend; + +use super::cache_map::{MakeCache, ShareOwned}; +use crate::{Error, Result}; + +impl ShareOwned for StoragePool { + type Shared = Arc; + + fn share_owned(&self) -> Self::Shared { + self.get() + } +} + +impl MakeCache for StorageBackend { + type Cached = StoragePool; + type Error = Error; + + fn make_cache(&self) -> Result { + StoragePool::create(self, 16) + } +} + +pub struct StoragePool(Box<[Arc]>); + +impl StoragePool { + fn create(backend: &StorageBackend, size: usize) -> Result { + let mut r = Vec::with_capacity(size); + for _ in 0..size { + let s = external_storage_export::create_storage(backend, Default::default())?; + r.push(Arc::from(s)); + } + Ok(Self(r.into_boxed_slice())) + } + + fn get(&self) -> Arc { + use rand::Rng; + let idx = rand::thread_rng().gen_range(0..self.0.len()); + Arc::clone(&self.0[idx]) + } +} + +impl std::fmt::Debug for StoragePool { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let url = self + .get() + .url() + .map(|u| u.to_string()) + .unwrap_or_else(|_| "".to_owned()); + f.debug_tuple("StoragePool") + .field(&format_args!("{}", url)) + .finish() + } +} diff --git a/components/sst_importer/src/import_mode.rs b/components/sst_importer/src/import_mode.rs index 0e793e2bc2b..5f5b5d1060e 100644 --- a/components/sst_importer/src/import_mode.rs +++ b/components/sst_importer/src/import_mode.rs @@ -9,10 +9,10 @@ use std::{ }; use engine_traits::{CfOptions, DbOptions, KvEngine}; -use futures::executor::ThreadPool; use futures_util::compat::Future01CompatExt; use kvproto::import_sstpb::*; use tikv_util::timer::GLOBAL_TIMER_HANDLE; +use tokio::runtime::Handle; use super::{Config, Result}; @@ -88,7 +88,7 @@ impl ImportModeSwitcher { ImportModeSwitcher { inner, is_import } } - pub fn start(&self, executor: &ThreadPool, db: E) { + pub fn start(&self, executor: &Handle, db: E) { // spawn a background future to put TiKV back into normal mode after timeout let inner = self.inner.clone(); let switcher = Arc::downgrade(&inner); @@ -117,7 +117,7 @@ impl ImportModeSwitcher { } } }; - executor.spawn_ok(timer_loop); + executor.spawn(timer_loop); } pub fn enter_normal_mode(&self, db: &E, mf: RocksDbMetricsFn) -> Result { @@ -243,7 +243,6 @@ mod tests { use std::thread; use engine_traits::{KvEngine, CF_DEFAULT}; - use futures::executor::ThreadPoolBuilder; use tempfile::Builder; use test_sst_importer::{new_test_engine, new_test_engine_with_options}; use tikv_util::config::ReadableDuration; @@ -306,14 +305,13 @@ mod tests { fn mf(_cf: &str, _name: &str, _v: f64) {} let cfg = Config::default(); - let threads = ThreadPoolBuilder::new() - .pool_size(cfg.num_threads) - .name_prefix("sst-importer") - .create() + let threads = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() .unwrap(); let switcher = ImportModeSwitcher::new(&cfg); - switcher.start(&threads, db.clone()); + switcher.start(threads.handle(), db.clone()); check_import_options(&db, &normal_db_options, &normal_cf_options); assert!(switcher.enter_import_mode(&db, mf).unwrap()); check_import_options(&db, &import_db_options, &import_cf_options); @@ -344,19 +342,20 @@ mod tests { import_mode_timeout: ReadableDuration::millis(300), ..Config::default() }; - let threads = ThreadPoolBuilder::new() - .pool_size(cfg.num_threads) - .name_prefix("sst-importer") - .create() + + let threads = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() .unwrap(); let switcher = ImportModeSwitcher::new(&cfg); - switcher.start(&threads, db.clone()); + switcher.start(threads.handle(), db.clone()); check_import_options(&db, &normal_db_options, &normal_cf_options); switcher.enter_import_mode(&db, mf).unwrap(); check_import_options(&db, &import_db_options, &import_cf_options); thread::sleep(Duration::from_secs(1)); + threads.block_on(tokio::task::yield_now()); check_import_options(&db, &normal_db_options, &normal_cf_options); } diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index ec0222d416a..4d25201253a 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -19,6 +19,7 @@ mod sst_writer; mod util; #[macro_use] pub mod import_mode; +mod caching; pub mod metrics; pub mod sst_importer; diff --git a/components/sst_importer/src/metrics.rs b/components/sst_importer/src/metrics.rs index cd14f6feb56..e7eeefd3e82 100644 --- a/components/sst_importer/src/metrics.rs +++ b/components/sst_importer/src/metrics.rs @@ -101,4 +101,9 @@ lazy_static! { "Bucketed histogram of importer apply count", &["type"] ).unwrap(); + pub static ref EXT_STORAGE_CACHE_COUNT: IntCounterVec = register_int_counter_vec!( + "tikv_import_storage_cache", + "The operations over storage cache", + &["operation"] + ).unwrap(); } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index c024bca8e6d..3e06eb76899 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -22,9 +22,10 @@ use engine_traits::{ IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; -use external_storage_export::{compression_reader_dispatcher, encrypt_wrap_reader, RestoreConfig}; +use external_storage_export::{ + compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, +}; use file_system::{get_io_rate_limiter, OpenOptions}; -use futures::executor::ThreadPool; use kvproto::{ brpb::{CipherInfo, StorageBackend}, import_sstpb::*, @@ -37,16 +38,31 @@ use tikv_util::{ sys::SysQuota, time::{Instant, Limiter}, }; +use tokio::runtime::{Handle, Runtime}; use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ + caching::cache_map::CacheMap, import_file::{ImportDir, ImportFile}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, metrics::*, sst_writer::{RawSstWriter, TxnSstWriter}, - Config, Error, Result, + util, Config, Error, Result, }; +#[derive(Default, Debug, Clone)] +pub struct DownloadExt<'a> { + cache_key: Option<&'a str>, +} + +impl<'a> DownloadExt<'a> { + pub fn cache_key(self, key: &'a str) -> Self { + Self { + cache_key: Some(key), + } + } +} + #[derive(Clone, PartialEq, Debug)] pub enum CacheKvFile { Mem(Arc>), @@ -81,6 +97,9 @@ pub struct SstImporter { // TODO: lift api_version as a type parameter. api_version: ApiVersion, compression_types: HashMap, + + cached_storage: CacheMap, + download_rt: Runtime, file_locks: Arc>, mem_use: AtomicU64, mem_limit: ReadableSize, @@ -94,6 +113,11 @@ impl SstImporter { api_version: ApiVersion, ) -> Result { let switcher = ImportModeSwitcher::new(cfg); + let cached_storage = CacheMap::default(); + let download_rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + download_rt.spawn(cached_storage.gc_loop()); let memory_limit = (SysQuota::memory_limit_in_bytes() as f64) * cfg.memory_use_ratio; info!("sst importer memory limit when apply"; "size" => ?memory_limit); @@ -105,6 +129,8 @@ impl SstImporter { api_version, compression_types: HashMap::with_capacity(2), file_locks: Arc::new(DashMap::default()), + cached_storage, + download_rt, mem_use: AtomicU64::new(0), mem_limit: ReadableSize(memory_limit as u64), }) @@ -122,7 +148,7 @@ impl SstImporter { } } - pub fn start_switch_mode_check(&self, executor: &ThreadPool, db: E) { + pub fn start_switch_mode_check(&self, executor: &Handle, db: E) { self.switcher.start(executor, db); } @@ -216,7 +242,7 @@ impl SstImporter { // // This method returns the *inclusive* key range (`[start, end]`) of SST // file created, or returns None if the SST is empty. - pub fn download( + pub async fn download_ext( &self, meta: &SstMeta, backend: &StorageBackend, @@ -225,6 +251,7 @@ impl SstImporter { crypter: Option, speed_limiter: Limiter, engine: E, + ext: DownloadExt<'_>, ) -> Result> { debug!("download start"; "meta" => ?meta, @@ -233,7 +260,7 @@ impl SstImporter { "rewrite_rule" => ?rewrite_rule, "speed_limit" => speed_limiter.speed_limit(), ); - match self.do_download::( + let r = self.do_download_ext::( meta, backend, name, @@ -241,7 +268,9 @@ impl SstImporter { crypter, &speed_limiter, engine, - ) { + ext, + ); + match r.await { Ok(r) => { info!("download"; "meta" => ?meta, "name" => name, "range" => ?r); Ok(r) @@ -274,6 +303,49 @@ impl SstImporter { support_kms: bool, speed_limiter: &Limiter, restore_config: external_storage_export::RestoreConfig, + ) -> Result<()> { + self.download_rt + .block_on(self.async_download_file_from_external_storage( + file_length, + src_file_name, + dst_file, + backend, + support_kms, + speed_limiter, + "", + restore_config, + )) + } + + /// Create an external storage by the backend, and cache it with the key. + /// If the cache exists, return it directly. + pub fn external_storage_or_cache( + &self, + backend: &StorageBackend, + cache_id: &str, + ) -> Result> { + // prepare to download the file from the external_storage + // TODO: pass a config to support hdfs + let ext_storage = if cache_id.is_empty() { + EXT_STORAGE_CACHE_COUNT.with_label_values(&["skip"]).inc(); + let s = external_storage_export::create_storage(backend, Default::default())?; + Arc::from(s) + } else { + self.cached_storage.cached_or_create(cache_id, backend)? + }; + Ok(ext_storage) + } + + async fn async_download_file_from_external_storage( + &self, + file_length: u64, + src_file_name: &str, + dst_file: std::path::PathBuf, + backend: &StorageBackend, + support_kms: bool, + speed_limiter: &Limiter, + cache_key: &str, + restore_config: external_storage_export::RestoreConfig, ) -> Result<()> { let start_read = Instant::now(); if let Some(p) = dst_file.parent() { @@ -285,34 +357,22 @@ impl SstImporter { } })?; } - // prepare to download the file from the external_storage - // TODO: pass a config to support hdfs - let ext_storage = external_storage_export::create_storage(backend, Default::default())?; - let url = ext_storage.url()?.to_string(); - let ext_storage: Box = if support_kms { - if let Some(key_manager) = &self.key_manager { - Box::new(external_storage_export::EncryptedExternalStorage { - key_manager: (*key_manager).clone(), - storage: ext_storage, - }) as _ - } else { - ext_storage as _ - } - } else { - ext_storage as _ - }; + let ext_storage = self.external_storage_or_cache(backend, cache_key)?; + let ext_storage = self.wrap_kms(ext_storage, support_kms); - let result = ext_storage.restore( - src_file_name, - dst_file.clone(), - file_length, - speed_limiter, - restore_config, - ); + let result = ext_storage + .restore( + src_file_name, + dst_file.clone(), + file_length, + speed_limiter, + restore_config, + ) + .await; IMPORTER_DOWNLOAD_BYTES.observe(file_length as _); result.map_err(|e| Error::CannotReadExternalStorage { - url: url.to_string(), + url: util::url_for(&ext_storage), name: src_file_name.to_owned(), local_path: dst_file.clone(), err: e, @@ -329,7 +389,7 @@ impl SstImporter { debug!("downloaded file succeed"; "name" => src_file_name, - "url" => %url, + "url" => %util::url_for(&ext_storage), ); Ok(()) } @@ -476,26 +536,24 @@ impl SstImporter { Ok(lock.0.clone()) } - pub fn create_external_storage( + pub fn wrap_kms( &self, - backend: &StorageBackend, + ext_storage: Arc, support_kms: bool, - ) -> Result> { - let ext_storage = external_storage_export::create_storage(backend, Default::default())?; + ) -> Arc { // kv-files needn't are decrypted with KMS when download currently because these // files are not encrypted when log-backup. It is different from // sst-files because sst-files is encrypted when saved with rocksdb env // with KMS. to do: support KMS when log-backup and restore point. - let ext_storage = match (support_kms, self.key_manager.clone()) { + match (support_kms, self.key_manager.clone()) { (true, Some(key_manager)) => { - Box::new(external_storage_export::EncryptedExternalStorage { + Arc::new(external_storage_export::EncryptedExternalStorage { key_manager, storage: ext_storage, }) } _ => ext_storage, - }; - Ok(ext_storage) + } } fn read_kv_files_from_external_storage( @@ -771,7 +829,31 @@ impl SstImporter { } } - fn do_download( + // raw download, without ext, compatibility to old tests. + #[cfg(test)] + fn download( + &self, + meta: &SstMeta, + backend: &StorageBackend, + name: &str, + rewrite_rule: &RewriteRule, + crypter: Option, + speed_limiter: Limiter, + engine: E, + ) -> Result> { + self.download_rt.block_on(self.download_ext( + meta, + backend, + name, + rewrite_rule, + crypter, + speed_limiter, + engine, + DownloadExt::default(), + )) + } + + async fn do_download_ext( &self, meta: &SstMeta, backend: &StorageBackend, @@ -780,6 +862,7 @@ impl SstImporter { crypter: Option, speed_limiter: &Limiter, engine: E, + ext: DownloadExt<'_>, ) -> Result> { let path = self.dir.join(meta)?; @@ -794,15 +877,17 @@ impl SstImporter { ..Default::default() }; - self.download_file_from_external_storage( + self.async_download_file_from_external_storage( meta.length, name, path.temp.clone(), backend, true, speed_limiter, + ext.cache_key.unwrap_or(""), restore_config, - )?; + ) + .await?; // now validate the SST file. let env = get_env(self.key_manager.clone(), get_io_rate_limiter())?; @@ -1628,8 +1713,11 @@ mod tests { ) .unwrap(); let ext_storage = { - let inner = importer.create_external_storage(&backend, false).unwrap(); - Arc::new(inner) + let inner = importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ); + inner }; // test do_read_kv_file() @@ -1681,7 +1769,10 @@ mod tests { ) .unwrap(); let ext_storage = { - let inner = importer.create_external_storage(&backend, false).unwrap(); + let inner = importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ); Arc::new(inner) }; @@ -1743,8 +1834,10 @@ mod tests { SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1).unwrap(); let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); let ext_storage = { - let inner = importer.create_external_storage(&backend, false).unwrap(); - Arc::new(inner) + importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ) }; let path = importer .dir diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index dce63314073..501061e92c0 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -4,6 +4,7 @@ use std::path::Path; use encryption::DataKeyManager; use engine_traits::EncryptionKeyManager; +use external_storage_export::ExternalStorage; use file_system::File; use super::Result; @@ -64,6 +65,13 @@ pub fn prepare_sst_for_ingestion, Q: AsRef>( Ok(()) } +pub fn url_for(storage: &E) -> String { + storage + .url() + .map(|url| url.to_string()) + .unwrap_or_else(|err| format!("ErrUrl({})", err)) +} + #[cfg(test)] mod tests { use std::{path::Path, sync::Arc}; diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 2bf0226136f..9d45052fea9 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -5,20 +5,13 @@ use std::{ future::Future, path::PathBuf, sync::{Arc, Mutex}, - thread::sleep, time::Duration, }; use collections::HashSet; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; -use futures::{ - executor::{ThreadPool, ThreadPoolBuilder}, - future::join_all, - sink::SinkExt, - stream::TryStreamExt, - TryFutureExt, -}; +use futures::{future::join_all, sink::SinkExt, stream::TryStreamExt, TryFutureExt}; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -34,13 +27,17 @@ use raftstore::{ router::RaftStoreRouter, store::{Callback, RaftCmdExtraOpts, RegionSnapshot}, }; -use sst_importer::{error_inc, metrics::*, sst_meta_to_path, Config, Error, Result, SstImporter}; +use sst_importer::{ + error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, Error, Result, + SstImporter, +}; use tikv_util::{ config::ReadableSize, future::{create_stream_with_buffer, paired_future_callback}, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, }; +use tokio::{runtime::Runtime, time::sleep}; use txn_types::{Key, WriteRef, WriteType}; use super::make_rpc_error; @@ -58,7 +55,7 @@ where cfg: Config, engine: E, router: Router, - threads: ThreadPool, + threads: Arc, importer: Arc, limiter: Limiter, task_slots: Arc>>, @@ -83,25 +80,25 @@ where importer: Arc, ) -> ImportSstService { let props = tikv_util::thread_group::current_properties(); - let threads = ThreadPoolBuilder::new() - .pool_size(cfg.num_threads + 1) - .name_prefix("sst-importer") + let threads = tokio::runtime::Builder::new_multi_thread() + .worker_threads(cfg.num_threads) + .enable_all() + .thread_name("sst-importer") .after_start_wrapper(move || { tikv_util::thread_group::set_properties(props.clone()); tikv_alloc::add_thread_memory_accessor(); set_io_type(IoType::Import); }) .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) - .create() + .build() .unwrap(); - importer.start_switch_mode_check(&threads, engine.clone()); - let importer_clone = importer.clone(); - threads.spawn_ok(async { Self::tick(importer_clone) }); + importer.start_switch_mode_check(threads.handle(), engine.clone()); + threads.spawn(Self::tick(importer.clone())); ImportSstService { cfg, engine, - threads, + threads: Arc::new(threads), router, importer, limiter: Limiter::new(f64::INFINITY), @@ -110,9 +107,9 @@ where } } - fn tick(importer: Arc) { + async fn tick(importer: Arc) { loop { - sleep(Duration::from_secs(10)); + sleep(Duration::from_secs(10)).await; importer.shrink_by_tick(); } } @@ -311,8 +308,8 @@ macro_rules! impl_write { $crate::send_rpc_response!(res, sink, label, timer); }; - self.threads.spawn_ok(buf_driver); - self.threads.spawn_ok(handle_task); + self.threads.spawn(buf_driver); + self.threads.spawn(handle_task); } }; } @@ -395,8 +392,8 @@ where crate::send_rpc_response!(res, sink, label, timer); }; - self.threads.spawn_ok(buf_driver); - self.threads.spawn_ok(handle_task); + self.threads.spawn(buf_driver); + self.threads.spawn(handle_task); } // clear_files the KV files after apply finished. @@ -431,7 +428,7 @@ where let resp = Ok(resp); crate::send_rpc_response!(resp, sink, label, timer); }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } // Downloads KV file and performs key-rewrite then apply kv into this tikv @@ -475,9 +472,14 @@ where let mut req_write_size = 0_u64; let mut range: Option = None; let ext_storage = { - let inner = - importer.create_external_storage(req.get_storage_backend(), false)?; - Arc::from(inner) + let inner = importer.wrap_kms( + importer.external_storage_or_cache( + req.get_storage_backend(), + req.get_storage_cache_id(), + )?, + false, + ); + inner }; for (i, meta) in metas.iter().enumerate() { @@ -594,7 +596,7 @@ where debug!("finished apply kv file with {:?}", resp); crate::send_rpc_response!(resp, sink, label, timer); }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } /// Downloads the file and performs key-rewrite for later ingesting. @@ -627,7 +629,7 @@ where .into_option() .filter(|c| c.cipher_type != EncryptionMethod::Plaintext); - let res = importer.download::( + let res = importer.download_ext::( req.get_sst(), req.get_storage_backend(), req.get_name(), @@ -635,9 +637,10 @@ where cipher, limiter, engine, + DownloadExt::default().cache_key(req.get_storage_cache_id()), ); let mut resp = DownloadResponse::default(); - match res { + match res.await { Ok(range) => match range { Some(r) => resp.set_range(r), None => resp.set_is_empty(true), @@ -648,7 +651,7 @@ where crate::send_rpc_response!(resp, sink, label, timer); }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } /// Ingest the file by sending a raft command to raftstore. @@ -694,7 +697,7 @@ where Self::release_lock(&task_slots, &meta).unwrap(); crate::send_rpc_response!(res, sink, label, timer); }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } /// Ingest multiple files by sending a raft command to raftstore. @@ -745,7 +748,7 @@ where } crate::send_rpc_response!(res, sink, label, timer); }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } fn compact( @@ -794,7 +797,7 @@ where crate::send_rpc_response!(res, sink, label, timer); }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } fn set_download_speed_limit( @@ -885,7 +888,7 @@ where } let _ = sink.close().await; }; - self.threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } impl_write!(write, WriteRequest, WriteResponse, Chunk, new_txn_writer); From 8d9698f82bbd9a0a1c4ace0dffe1fadcd37df07e Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 1 Dec 2022 19:38:02 +0800 Subject: [PATCH 0379/1149] storage: Unify the flashback reader and fix the start_key bug (#13860) close pingcap/tiflash#6379, ref tikv/tikv#13800, close tikv/tikv#13861 SnapshotReader typically uses its own start_ts for something. Since it doesn't need the start_ts of SnapshotReader itself, we can unify the reader into MvccReader. And the start key from the client is actually a range, which is used to limit the upper bound of this flashback when scanning data, so it may not be a real key. Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- components/cdc/tests/integrations/test_cdc.rs | 2 +- components/cdc/tests/mod.rs | 2 + src/storage/mvcc/reader/reader.rs | 2 +- .../txn/actions/flashback_to_version.rs | 234 ++++++++++++------ .../txn/commands/flashback_to_version.rs | 16 +- .../flashback_to_version_read_phase.rs | 76 ++++-- tests/integrations/server/kv_service.rs | 32 ++- 7 files changed, 249 insertions(+), 115 deletions(-) diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index b9c285406d4..73f46fe6427 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -2533,7 +2533,7 @@ fn test_flashback() { let (start_key, end_key) = (b"key0".to_vec(), b"key2".to_vec()); // Prepare flashback. let flashback_start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); - suite.must_kv_prepare_flashback(region_id, &start_key, flashback_start_ts); + suite.must_kv_prepare_flashback(region_id, &start_key, &end_key, flashback_start_ts); // resolved ts should not be advanced anymore. let mut counter = 0; let mut last_resolved_ts = 0; diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 87619deb92b..77e50bb10b2 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -586,11 +586,13 @@ impl TestSuite { &mut self, region_id: u64, start_key: &[u8], + end_key: &[u8], start_ts: TimeStamp, ) { let mut prepare_flashback_req = PrepareFlashbackToVersionRequest::default(); prepare_flashback_req.set_context(self.get_context(region_id)); prepare_flashback_req.set_start_key(start_key.to_vec()); + prepare_flashback_req.set_end_key(end_key.to_vec()); prepare_flashback_req.set_start_ts(start_ts.into_inner()); let prepare_flashback_resp = self .get_tikv_client(region_id) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 0ada3a12d5d..4847dbb8428 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -206,7 +206,7 @@ impl MvccReader { } /// load the value associated with `key` and pointed by `write` - fn load_data(&mut self, key: &Key, write: Write) -> Result { + pub fn load_data(&mut self, key: &Key, write: Write) -> Result { assert_eq!(write.write_type, WriteType::Put); if let Some(val) = write.short_value { return Ok(val); diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index e719ca24a26..4b05c8eef8f 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -7,7 +7,7 @@ use txn_types::{Key, Lock, LockType, TimeStamp, Write, WriteType}; use crate::storage::{ mvcc::{MvccReader, MvccTxn, SnapshotReader, MAX_TXN_WRITE_SIZE}, txn::{actions::check_txn_status::rollback_lock, Result as TxnResult}, - Snapshot, Statistics, + Snapshot, }; pub const FLASHBACK_BATCH_SIZE: usize = 256 + 1 /* To store the next key for multiple batches */; @@ -16,7 +16,6 @@ pub fn flashback_to_version_read_lock( reader: &mut MvccReader, next_lock_key: Key, end_key: &Key, - statistics: &mut Statistics, ) -> TxnResult> { let result = reader.scan_locks( Some(&next_lock_key), @@ -24,7 +23,6 @@ pub fn flashback_to_version_read_lock( |_| true, FLASHBACK_BATCH_SIZE, ); - statistics.add(&reader.statistics); let (key_locks, _) = result?; Ok(key_locks) } @@ -36,7 +34,6 @@ pub fn flashback_to_version_read_write( end_key: &Key, flashback_version: TimeStamp, flashback_commit_ts: TimeStamp, - statistics: &mut Statistics, ) -> TxnResult> { // Filter out the SST that does not have a newer version than // `flashback_version` in `CF_WRITE`, i.e, whose latest `commit_ts` <= @@ -51,7 +48,7 @@ pub fn flashback_to_version_read_write( |key, latest_commit_ts| { // There is no any other write could happen after the flashback begins. assert!(latest_commit_ts <= flashback_commit_ts); - // - Skip the `start_key`. + // - Skip the `start_key` which as prewrite key. // - No need to find an old version for the key if its latest `commit_ts` is // smaller than or equal to the flashback version. // - No need to flashback a key twice if its latest `commit_ts` is equal to the @@ -62,7 +59,6 @@ pub fn flashback_to_version_read_write( }, FLASHBACK_BATCH_SIZE, ); - statistics.add(&reader.statistics); let (keys, _) = keys_result?; Ok(keys) } @@ -71,9 +67,10 @@ pub fn flashback_to_version_read_write( // `CF_LOCK`. pub fn rollback_locks( txn: &mut MvccTxn, - reader: &mut SnapshotReader, + snapshot: impl Snapshot, key_locks: Vec<(Key, Lock)>, ) -> TxnResult> { + let mut reader = SnapshotReader::new(txn.start_ts, snapshot, false); for (key, lock) in key_locks { if txn.write_size() >= MAX_TXN_WRITE_SIZE { return Ok(Some(key)); @@ -82,7 +79,7 @@ pub fn rollback_locks( reader.start_ts = lock.ts; rollback_lock( txn, - reader, + &mut reader, key.clone(), &lock, lock.is_pessimistic_txn(), @@ -102,7 +99,7 @@ pub fn rollback_locks( // and `self.start_ts`. pub fn flashback_to_version_write( txn: &mut MvccTxn, - reader: &mut SnapshotReader, + reader: &mut MvccReader, keys: Vec, flashback_version: TimeStamp, flashback_start_ts: TimeStamp, @@ -122,7 +119,7 @@ pub fn flashback_to_version_write( if txn.write_size() >= MAX_TXN_WRITE_SIZE { return Ok(Some(key.clone())); } - let old_write = reader.get_write(&key, flashback_version)?; + let old_write = reader.get_write(&key, flashback_version, None)?; let new_write = if let Some(old_write) = old_write { // If it's a `WriteType::Put` without the short value, we should put the old // value in `CF_DEFAULT` with `self.start_ts` as well. @@ -152,19 +149,19 @@ pub fn flashback_to_version_write( // transaction. pub fn prewrite_flashback_key( txn: &mut MvccTxn, - reader: &mut SnapshotReader, + reader: &mut MvccReader, key_to_lock: &Key, flashback_version: TimeStamp, flashback_start_ts: TimeStamp, ) -> TxnResult<()> { - let old_write = reader.get_write(key_to_lock, flashback_version)?; + let old_write = reader.get_write(key_to_lock, flashback_version, None)?; // Flashback the value in `CF_DEFAULT` as well if the old write is a // `WriteType::Put` without the short value. if let Some(old_write) = old_write.as_ref() { if old_write.write_type == WriteType::Put && old_write.short_value.is_none() // If the value with `flashback_start_ts` already exists, we don't need to write again. - && reader.reader.get_value(key_to_lock, flashback_start_ts)?.is_none() + && reader.get_value(key_to_lock, flashback_start_ts)?.is_none() { txn.put_value( key_to_lock.clone(), @@ -197,7 +194,7 @@ pub fn prewrite_flashback_key( pub fn commit_flashback_key( txn: &mut MvccTxn, - reader: &mut SnapshotReader, + reader: &mut MvccReader, key_to_commit: &Key, flashback_start_ts: TimeStamp, flashback_commit_ts: TimeStamp, @@ -225,6 +222,16 @@ pub fn commit_flashback_key( Ok(()) } +pub fn get_first_user_key( + reader: &mut MvccReader, + start_key: &Key, + end_key: &Key, +) -> TxnResult> { + let (mut keys_result, _) = + reader.scan_latest_user_keys(Some(start_key), Some(end_key), |_, _| true, 1)?; + Ok(keys_result.pop()) +} + #[cfg(test)] pub mod tests { use concurrency_manager::ConcurrencyManager; @@ -249,23 +256,17 @@ pub mod tests { fn must_rollback_lock( engine: &mut E, key: &[u8], - version: impl Into, start_ts: impl Into, ) -> usize { let next_key = Key::from_raw(keys::next_key(key).as_slice()); let key = Key::from_raw(key); - let (version, start_ts) = (version.into(), start_ts.into()); let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); - let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); - let mut statistics = Statistics::default(); - let key_locks = - flashback_to_version_read_lock(&mut reader, key, &next_key, &mut statistics).unwrap(); + let mut reader = MvccReader::new_with_ctx(snapshot.clone(), Some(ScanMode::Forward), &ctx); + let key_locks = flashback_to_version_read_lock(&mut reader, key, &next_key).unwrap(); let cm = ConcurrencyManager::new(TimeStamp::zero()); - let mut txn = MvccTxn::new(start_ts, cm); - let snapshot = engine.snapshot(Default::default()).unwrap(); - let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); - rollback_locks(&mut txn, &mut snap_reader, key_locks).unwrap(); + let mut txn = MvccTxn::new(start_ts.into(), cm); + rollback_locks(&mut txn, snapshot, key_locks).unwrap(); let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); rows @@ -282,21 +283,22 @@ pub mod tests { let mut txn = MvccTxn::new(start_ts, cm); let snapshot = engine.snapshot(Default::default()).unwrap(); let ctx = Context::default(); - let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); - prewrite_flashback_key( - &mut txn, - &mut snap_reader, - &Key::from_raw(key), - version, - start_ts, - ) - .unwrap(); + let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); + let prewrite_key = if let Some(first_key) = + get_first_user_key(&mut reader, &Key::from_raw(key), &Key::from_raw(b"z")).unwrap() + { + first_key + } else { + // If the key is None return directly + return 0; + }; + prewrite_flashback_key(&mut txn, &mut reader, &prewrite_key, version, start_ts).unwrap(); let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); rows } - fn must_flashback_to_version( + fn must_flashback_write_to_version( engine: &mut E, key: &[u8], version: impl Into, @@ -309,7 +311,6 @@ pub mod tests { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); - let mut statistics = Statistics::default(); // Flashback the writes. let keys = flashback_to_version_read_write( &mut reader, @@ -318,29 +319,41 @@ pub mod tests { &next_key, version, commit_ts, - &mut statistics, ) .unwrap(); let cm = ConcurrencyManager::new(TimeStamp::zero()); let mut txn = MvccTxn::new(start_ts, cm); + flashback_to_version_write(&mut txn, &mut reader, keys, version, start_ts, commit_ts) + .unwrap(); + let rows = txn.modifies.len(); + write(engine, &ctx, txn.into_modifies()); + rows + } + + fn must_commit_flashback_key( + engine: &mut E, + key: &[u8], + start_ts: impl Into, + commit_ts: impl Into, + ) -> usize { + let (start_ts, commit_ts) = (start_ts.into(), commit_ts.into()); + let cm = ConcurrencyManager::new(TimeStamp::zero()); + let mut txn = MvccTxn::new(start_ts, cm); let snapshot = engine.snapshot(Default::default()).unwrap(); - let mut snap_reader = SnapshotReader::new_with_ctx(version, snapshot, &ctx); - flashback_to_version_write( - &mut txn, - &mut snap_reader, - keys, - version, - start_ts, - commit_ts, - ) - .unwrap(); + let ctx = Context::default(); + let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); + let key_to_lock = + get_first_user_key(&mut reader, &Key::from_raw(key), &Key::from_raw(b"z")) + .unwrap() + .unwrap(); + commit_flashback_key(&mut txn, &mut reader, &key_to_lock, start_ts, commit_ts).unwrap(); let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); rows } #[test] - fn test_flashback_to_version() { + fn test_flashback_write_to_version() { let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); let k = b"k"; @@ -368,50 +381,50 @@ pub mod tests { must_get(&mut engine, k, *ts.incr(), v2); // Flashback to version 1 with start_ts = 14, commit_ts = 15. assert_eq!( - must_flashback_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, *ts.incr()); // Flashback to version 2 with start_ts = 17, commit_ts = 18. assert_eq!( - must_flashback_to_version(&mut engine, k, 2, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 2, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 5 with start_ts = 20, commit_ts = 21. assert_eq!( - must_flashback_to_version(&mut engine, k, 5, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 5, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 7 with start_ts = 23, commit_ts = 24. assert_eq!( - must_flashback_to_version(&mut engine, k, 7, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 7, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v1); // Flashback to version 10 with start_ts = 26, commit_ts = 27. assert_eq!( - must_flashback_to_version(&mut engine, k, 10, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 10, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, *ts.incr()); // Flashback to version 13 with start_ts = 29, commit_ts = 30. assert_eq!( - must_flashback_to_version(&mut engine, k, 13, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 13, *ts.incr(), *ts.incr()), 1 ); must_get(&mut engine, k, *ts.incr(), v2); // Flashback to version 27 with start_ts = 32, commit_ts = 33. assert_eq!( - must_flashback_to_version(&mut engine, k, 27, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 27, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, *ts.incr()); } #[test] - fn test_flashback_to_version_deleted() { + fn test_flashback_write_to_version_deleted() { let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); let (k, v) = (b"k", b"v"); @@ -423,14 +436,14 @@ pub mod tests { // Though the key has been deleted, flashback to version 1 still needs to write // a new `WriteType::Delete` with the flashback `commit_ts`. assert_eq!( - must_flashback_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), + must_flashback_write_to_version(&mut engine, k, 1, *ts.incr(), *ts.incr()), 1 ); must_get_none(&mut engine, k, ts); } #[test] - fn test_flashback_to_version_pessimistic() { + fn test_flashback_write_to_version_pessimistic() { let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k"; let (v1, v2, v3) = (b"v1", b"v2", b"v3"); @@ -447,8 +460,11 @@ pub mod tests { // Flashback to version 17 with start_ts = 35, commit_ts = 40. // Distinguish from pessimistic start_ts 30 to make sure rollback ts is by lock // ts. - assert_eq!(must_rollback_lock(&mut engine, k, 17, 35), 2); - assert_eq!(must_flashback_to_version(&mut engine, k, 17, 35, 40), 1); + assert_eq!(must_rollback_lock(&mut engine, k, 35), 2); + assert_eq!( + must_flashback_write_to_version(&mut engine, k, 17, 35, 40), + 1 + ); // Pessimistic Prewrite Put(k -> v3) with stat_ts = 30 will be error with // Rollback. @@ -457,7 +473,7 @@ pub mod tests { } #[test] - fn test_duplicated_flashback_to_version() { + fn test_duplicated_flashback_write_to_version() { let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); let (k, v) = (b"k", b"v"); @@ -467,14 +483,14 @@ pub mod tests { let start_ts = *ts.incr(); let commit_ts = *ts.incr(); assert_eq!( - must_flashback_to_version(&mut engine, k, 1, start_ts, commit_ts), + must_flashback_write_to_version(&mut engine, k, 1, start_ts, commit_ts), 1 ); must_get_none(&mut engine, k, ts); // Flashback again with the same `start_ts` and `commit_ts` should not do // anything. assert_eq!( - must_flashback_to_version(&mut engine, k, 1, start_ts, commit_ts), + must_flashback_write_to_version(&mut engine, k, 1, start_ts, commit_ts), 0 ); } @@ -490,34 +506,106 @@ pub mod tests { let flashback_start_ts = *ts.incr(); // Rollback nothing. - assert_eq!( - must_rollback_lock(&mut engine, k, ts, flashback_start_ts), - 0 - ); + assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 0); // Lock and write the value of `k`. assert_eq!( must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), 2 ); + // Retry Prepare // Unlock `k`, put rollback record and delete the value of `k`. - assert_eq!( - must_rollback_lock(&mut engine, k, ts, flashback_start_ts), - 3 - ); + assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 3); // Lock and write the value of `k`. assert_eq!( must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), 2 ); + // Retry Prepare // Only unlock `k` since there is an overlapped rollback record. + assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 1); + // Only lock `k` since the value of `k` has already existed. assert_eq!( - must_rollback_lock(&mut engine, k, ts, flashback_start_ts), + must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), 1 ); - // Only lock `k` since the value of `k` has already existed. + } + + #[test] + fn test_prewrite_with_special_key() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut ts = TimeStamp::zero(); + let (prewrite_key, prewrite_val) = (b"b", b"val"); + must_prewrite_put( + &mut engine, + prewrite_key, + prewrite_val, + prewrite_key, + *ts.incr(), + ); + must_commit(&mut engine, prewrite_key, ts, *ts.incr()); + must_get(&mut engine, prewrite_key, ts, prewrite_val); + let (k, v1, v2) = (b"c", b"v1", b"v2"); + must_prewrite_put(&mut engine, k, v1, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_prewrite_put(&mut engine, k, v2, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, ts, v2); + // Check for prewrite key b"b". + let ctx = Context::default(); + let snapshot = engine.snapshot(Default::default()).unwrap(); + let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); + let first_key = get_first_user_key(&mut reader, &Key::from_raw(b""), &Key::from_raw(b"z")) + .unwrap_or_else(|_| Some(Key::from_raw(b""))) + .unwrap(); + assert_eq!(first_key, Key::from_raw(prewrite_key)); + + // case 1: start key is before all keys, flashback b"c". + let start_key = b"a"; + let (flashback_start_ts, flashback_commit_ts) = (*ts.incr(), *ts.incr()); + // Rollback nothing. + assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 0); + // Prewrite "prewrite_key" not "start_key". assert_eq!( - must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), + must_prewrite_flashback_key(&mut engine, start_key, 4, flashback_start_ts), + 1 + ); + // Flashback (b"c", v2) to (b"c", v1). + assert_eq!( + must_flashback_write_to_version( + &mut engine, + k, + 4, + flashback_start_ts, + flashback_commit_ts + ), 1 ); + // Put prewrite record and Unlock, will commit "prewrite_key" not "start_key". + assert_eq!( + must_commit_flashback_key( + &mut engine, + start_key, + flashback_start_ts, + flashback_commit_ts + ), + 2 + ); + must_get(&mut engine, k, ts, v1); + must_get(&mut engine, prewrite_key, ts, prewrite_val); + + // case 2: start key is after all keys, prewrite will return None. + let start_key = b"d"; + let flashback_start_ts = *ts.incr(); + // Rollback nothing. + assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 0); + // Prewrite null. + assert_eq!( + must_prewrite_flashback_key(&mut engine, start_key, 4, flashback_start_ts), + 0 + ); + // case 3: start key is valid, end_key is invalid, prewrite key will be None. + let first_key = get_first_user_key(&mut reader, &Key::from_raw(b"a"), &Key::from_raw(b"")) + .unwrap_or_else(|_| Some(Key::from_raw(b""))); + assert_eq!(first_key, None); } } diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index a1936cee647..13de0c9b183 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -3,12 +3,13 @@ // #[PerformanceCriticalPath] use std::mem; +use tikv_kv::ScanMode; use txn_types::{Key, TimeStamp}; use crate::storage::{ kv::WriteData, lock_manager::LockManager, - mvcc::{MvccTxn, SnapshotReader}, + mvcc::{MvccReader, MvccTxn}, txn::{ actions::flashback_to_version::{ commit_flashback_key, flashback_to_version_write, prewrite_flashback_key, @@ -16,8 +17,7 @@ use crate::storage::{ }, commands::{ Command, CommandExt, FlashbackToVersionReadPhase, FlashbackToVersionState, - ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, - WriteContext, WriteResult, + ReleasedLocks, ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, }, latch, Result, }, @@ -71,10 +71,8 @@ impl CommandExt for FlashbackToVersion { impl WriteCommand for FlashbackToVersion { fn process_write(mut self, snapshot: S, context: WriteContext<'_, L>) -> Result { - let mut reader = ReaderWithStats::new( - SnapshotReader::new_with_ctx(self.version, snapshot, &self.ctx), - context.statistics, - ); + let mut reader = + MvccReader::new_with_ctx(snapshot.clone(), Some(ScanMode::Forward), &self.ctx); let mut txn = MvccTxn::new(TimeStamp::zero(), context.concurrency_manager); match self.state { FlashbackToVersionState::RollbackLock { @@ -82,12 +80,11 @@ impl WriteCommand for FlashbackToVersion { ref mut key_locks, } => { if let Some(new_next_lock_key) = - rollback_locks(&mut txn, &mut reader, mem::take(key_locks))? + rollback_locks(&mut txn, snapshot, mem::take(key_locks))? { *next_lock_key = new_next_lock_key; } } - // TODO: add some test cases for the special prewrite key. FlashbackToVersionState::Prewrite { ref key_to_lock } => prewrite_flashback_key( &mut txn, &mut reader, @@ -126,6 +123,7 @@ impl WriteCommand for FlashbackToVersion { if matches!(self.state, FlashbackToVersionState::FlashbackWrite { .. }) { write_data.extra.one_pc = true; } + context.statistics.add(&reader.statistics); Ok(WriteResult { ctx: self.ctx.clone(), to_be_write: write_data, diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index d27225a9bf7..9ac5014b7f3 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -6,6 +6,7 @@ use txn_types::{Key, Lock, TimeStamp}; use crate::storage::{ mvcc::MvccReader, txn::{ + actions::flashback_to_version::get_first_user_key, commands::{ Command, CommandExt, FlashbackToVersion, ProcessResult, ReadCommand, TypedCommand, }, @@ -122,19 +123,31 @@ impl ReadCommand for FlashbackToVersionReadPhase { fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result { let tag = self.tag().get_str(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); + let mut start_key = self.start_key.clone(); let next_state = match self.state { FlashbackToVersionState::RollbackLock { next_lock_key, .. } => { - let mut key_locks = flashback_to_version_read_lock( - &mut reader, - next_lock_key, - &self.end_key, - statistics, - )?; + let mut key_locks = + flashback_to_version_read_lock(&mut reader, next_lock_key, &self.end_key)?; if key_locks.is_empty() { - // No more locks to rollback, continue to the prewrite phase. - FlashbackToVersionState::Prewrite { - key_to_lock: self.start_key.clone(), - } + // - No more locks to rollback, continue to the Prewrite Phase. + // - The start key from the client is actually a range which is used to limit + // the upper bound of this flashback when scanning data, so it may not be a + // real key. In the Prewrite Phase, we make sure that the start key is a real + // key and take this key as a lock for the 2pc. So When overwriting the write, + // we skip the immediate write of this key and instead put it after the + // completion of the 2pc. + // - To make sure the key locked in the latch is the same as the actual key + // written, we pass it to the key in `process_write' after getting it. + let key_to_lock = if let Some(first_key) = + get_first_user_key(&mut reader, &self.start_key, &self.end_key)? + { + first_key + } else { + // If the key is None return directly + statistics.add(&reader.statistics); + return Ok(ProcessResult::Res); + }; + FlashbackToVersionState::Prewrite { key_to_lock } } else { tls_collect_keyread_histogram_vec(tag, key_locks.len() as f64); FlashbackToVersionState::RollbackLock { @@ -147,31 +160,53 @@ impl ReadCommand for FlashbackToVersionReadPhase { } } } - FlashbackToVersionState::FlashbackWrite { next_write_key, .. } => { + FlashbackToVersionState::FlashbackWrite { + mut next_write_key, .. + } => { if self.commit_ts <= self.start_ts { return Err(Error::from(ErrorInner::InvalidTxnTso { start_ts: self.start_ts, commit_ts: self.commit_ts, })); } - // If the key is not locked, it means that the key has been committed before and - // we are in a retry. - if next_write_key == self.start_key && reader.load_lock(&next_write_key)?.is_none() - { - return Ok(ProcessResult::Res); + if next_write_key == self.start_key { + // The start key from the client is actually a range which is used to limit the + // upper bound of this flashback when scanning data, so it may not be a real + // key. In the Prewrite Phase, we make sure that the start + // key is a real key and take this key as a lock for the + // 2pc. So When overwriting the write, we skip the immediate + // write of this key and instead put it after the completion + // of the 2pc. + next_write_key = if let Some(first_key) = + get_first_user_key(&mut reader, &self.start_key, &self.end_key)? + { + first_key + } else { + // If the key is None return directly + statistics.add(&reader.statistics); + return Ok(ProcessResult::Res); + }; + // Commit key needs to match the Prewrite key, which is set as the first user + // key. + start_key = next_write_key.clone(); + // If the key is not locked, it means that the key has been committed before and + // we are in a retry. + if reader.load_lock(&next_write_key)?.is_none() { + statistics.add(&reader.statistics); + return Ok(ProcessResult::Res); + } } let mut keys = flashback_to_version_read_write( &mut reader, next_write_key, - &self.start_key, + &start_key, &self.end_key, self.version, self.commit_ts, - statistics, )?; if keys.is_empty() { FlashbackToVersionState::Commit { - key_to_commit: self.start_key.clone(), + key_to_commit: start_key.clone(), } } else { tls_collect_keyread_histogram_vec(tag, keys.len() as f64); @@ -189,6 +224,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { } _ => unreachable!(), }; + statistics.add(&reader.statistics); Ok(ProcessResult::NextCommand { cmd: Command::FlashbackToVersion(FlashbackToVersion { ctx: self.ctx, @@ -196,7 +232,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { start_ts: self.start_ts, commit_ts: self.commit_ts, version: self.version, - start_key: self.start_key, + start_key, end_key: self.end_key, state: next_state, }), diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index effe9698f30..12cff74861d 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -572,11 +572,11 @@ fn test_mvcc_flashback_failed_after_first_batch() { must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); fail::remove("flashback_skip_1_key_in_write"); fail::remove("flashback_failed_after_first_batch"); - // skip for key@0 + // skip for key@1 must_kv_read_equal( &client, ctx.clone(), - format!("key@{}", from_u32(0_u32).unwrap()) + format!("key@{}", from_u32(1_u32).unwrap()) .as_bytes() .to_vec(), b"value@1".to_vec(), @@ -586,7 +586,7 @@ fn test_mvcc_flashback_failed_after_first_batch() { must_kv_read_equal( &client, ctx.clone(), - format!("key@{}", from_u32(1_u32).unwrap()) + format!("key@{}", from_u32(2_u32).unwrap()) .as_bytes() .to_vec(), b"value@0".to_vec(), @@ -596,7 +596,7 @@ fn test_mvcc_flashback_failed_after_first_batch() { must_kv_read_equal( &client, ctx.clone(), - format!("key@{}", from_u32(FLASHBACK_BATCH_SIZE as u32 - 1).unwrap()) + format!("key@{}", from_u32(FLASHBACK_BATCH_SIZE as u32).unwrap()) .as_bytes() .to_vec(), b"value@1".to_vec(), @@ -606,11 +606,11 @@ fn test_mvcc_flashback_failed_after_first_batch() { fail::cfg("flashback_failed_after_first_batch", "return").unwrap(); must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); fail::remove("flashback_failed_after_first_batch"); - // key@0 must be flahsbacked in the second batch firstly. + // key@1 must be flahsbacked in the second batch firstly. must_kv_read_equal( &client, ctx.clone(), - format!("key@{}", from_u32(0_u32).unwrap()) + format!("key@{}", from_u32(1_u32).unwrap()) .as_bytes() .to_vec(), b"value@0".to_vec(), @@ -619,19 +619,19 @@ fn test_mvcc_flashback_failed_after_first_batch() { must_kv_read_equal( &client, ctx.clone(), - format!("key@{}", from_u32(FLASHBACK_BATCH_SIZE as u32 - 1).unwrap()) + format!("key@{}", from_u32(FLASHBACK_BATCH_SIZE as u32).unwrap()) .as_bytes() .to_vec(), b"value@0".to_vec(), ts + 2, ); - // 2 * (FLASHBACK_BATCH_SIZE - 1) - 1 keys are flashbacked. + // 2 * (FLASHBACK_BATCH_SIZE - 1) keys are flashbacked. must_kv_read_equal( &client, ctx.clone(), format!( "key@{}", - from_u32(2 * FLASHBACK_BATCH_SIZE as u32 - 3).unwrap() + from_u32(2 * FLASHBACK_BATCH_SIZE as u32 - 2).unwrap() ) .as_bytes() .to_vec(), @@ -646,16 +646,26 @@ fn test_mvcc_flashback_failed_after_first_batch() { // Subsequent batches of writes are flashbacked. must_kv_read_equal( &client, - ctx, + ctx.clone(), format!( "key@{}", - from_u32(2 * FLASHBACK_BATCH_SIZE as u32 - 3).unwrap() + from_u32(2 * FLASHBACK_BATCH_SIZE as u32 - 2).unwrap() ) .as_bytes() .to_vec(), b"value@0".to_vec(), ts, ); + // key@0 which used as prewrite lock also need to be flahsbacked. + must_kv_read_equal( + &client, + ctx, + format!("key@{}", from_u32(0_u32).unwrap()) + .as_bytes() + .to_vec(), + b"value@0".to_vec(), + ts, + ); } #[test] From a4a4a43692b09b157b4d5cae1f2471ff57573ed4 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Thu, 1 Dec 2022 22:28:02 +0800 Subject: [PATCH 0380/1149] scheduler/lock_manager: Handle the corner case that resumable pessimstic lock request is pushed to queue after cancelling (#13865) close tikv/tikv#13298 scheduler/lock_manager: Handle the corner case that resumable pessimstic lock request is pushed to queue after cancelling. When a lock-waiting request is woken up and continues its execution, in some cases it's possible that it encounters other transaction's lock agian. In this case, the entry will be put to the lock waiting queue again. However, there might be problem when LockManager tries to cancel the request (due to timeout or other possible errors. This PR handles this case. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- src/server/lock_manager/mod.rs | 8 +- src/server/lock_manager/waiter_manager.rs | 43 +--- src/storage/lock_manager/lock_wait_context.rs | 203 +++++++++++++++--- .../lock_manager/lock_waiting_queue.rs | 86 ++++++-- src/storage/lock_manager/mod.rs | 25 ++- src/storage/mod.rs | 99 +++++---- .../acquire_pessimistic_lock_resumed.rs | 35 ++- src/storage/txn/commands/mod.rs | 10 +- src/storage/txn/scheduler.rs | 47 ++-- src/storage/types.rs | 1 - tests/failpoints/cases/test_storage.rs | 166 +++++++++++++- 11 files changed, 553 insertions(+), 170 deletions(-) diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index ae60467124b..243d533a0e5 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -37,10 +37,10 @@ use crate::{ }, storage::{ lock_manager::{ - DiagnosticContext, KeyLockWaitInfo, LockManager as LockManagerTrait, LockWaitToken, - UpdateWaitForEvent, WaitTimeout, + CancellationCallback, DiagnosticContext, KeyLockWaitInfo, + LockManager as LockManagerTrait, LockWaitToken, UpdateWaitForEvent, WaitTimeout, }, - DynamicConfigs as StorageDynamicConfigs, Error as StorageError, + DynamicConfigs as StorageDynamicConfigs, }, }; @@ -248,7 +248,7 @@ impl LockManagerTrait for LockManager { wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, - cancel_callback: Box, + cancel_callback: CancellationCallback, diag_ctx: DiagnosticContext, ) { let timeout = match timeout { diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 8cce7bc5da6..467580645d3 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -27,13 +27,12 @@ use tikv_util::{ }; use tokio::task::spawn_local; use tracker::GLOBAL_TRACKERS; -use txn_types::Key; use super::{config::Config, deadlock::Scheduler as DetectorScheduler, metrics::*}; use crate::storage::{ lock_manager::{ - DiagnosticContext, KeyLockWaitInfo, LockDigest, LockWaitToken, UpdateWaitForEvent, - WaitTimeout, + CancellationCallback, DiagnosticContext, KeyLockWaitInfo, LockDigest, LockWaitToken, + UpdateWaitForEvent, WaitTimeout, }, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, TimeStamp}, txn::Error as TxnError, @@ -107,9 +106,6 @@ pub type Callback = Box) + Send>; #[allow(clippy::large_enum_variant)] pub enum Task { - SetKeyWakeUpDelayCallback { - cb: Box, - }, WaitFor { token: LockWaitToken, region_id: u64, @@ -119,7 +115,7 @@ pub enum Task { start_ts: TimeStamp, wait_info: KeyLockWaitInfo, timeout: WaitTimeout, - cancel_callback: Box, + cancel_callback: CancellationCallback, diag_ctx: DiagnosticContext, start_waiting_time: Instant, }, @@ -158,9 +154,6 @@ impl Debug for Task { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - Task::SetKeyWakeUpDelayCallback { .. } => { - write!(f, "setting key wake up delay callback") - } Task::WaitFor { token, start_ts, @@ -206,7 +199,7 @@ pub(crate) struct Waiter { // term: u64, pub(crate) start_ts: TimeStamp, pub(crate) wait_info: KeyLockWaitInfo, - pub(crate) cancel_callback: Box, + pub(crate) cancel_callback: CancellationCallback, pub diag_ctx: DiagnosticContext, delay: Delay, start_waiting_time: Instant, @@ -219,7 +212,7 @@ impl Waiter { _term: u64, start_ts: TimeStamp, wait_info: KeyLockWaitInfo, - cancel_callback: Box, + cancel_callback: CancellationCallback, deadline: Instant, diag_ctx: DiagnosticContext, start_waiting_time: Instant, @@ -280,7 +273,7 @@ impl Waiter { pub(super) fn cancel_no_timeout( wait_info: KeyLockWaitInfo, - cancel_callback: Box, + cancel_callback: CancellationCallback, ) { let lock_info = wait_info.lock_info; let error = MvccError::from(MvccErrorInner::KeyIsLocked(lock_info)); @@ -311,8 +304,6 @@ struct WaitTable { wait_table: HashMap<(u64, TimeStamp), LockWaitToken>, waiter_pool: HashMap, waiter_count: Arc, - - wake_up_key_delay_callback: Option>, } impl WaitTable { @@ -321,17 +312,9 @@ impl WaitTable { wait_table: HashMap::default(), waiter_pool: HashMap::default(), waiter_count, - wake_up_key_delay_callback: None, } } - fn set_wake_up_key_delay_callback( - &mut self, - cb: Option>, - ) { - self.wake_up_key_delay_callback = cb; - } - #[cfg(test)] fn count(&self) -> usize { self.waiter_pool.len() @@ -430,7 +413,7 @@ impl Scheduler { start_ts: TimeStamp, wait_info: KeyLockWaitInfo, timeout: WaitTimeout, - cancel_callback: Box, + cancel_callback: CancellationCallback, diag_ctx: DiagnosticContext, ) { self.notify_scheduler(Task::WaitFor { @@ -447,13 +430,6 @@ impl Scheduler { }); } - pub fn set_key_wake_up_delay_callback( - &self, - cb: Box, - ) { - self.notify_scheduler(Task::SetKeyWakeUpDelayCallback { cb }); - } - pub fn remove_lock_wait(&self, token: LockWaitToken) { self.notify_scheduler(Task::RemoveLockWait { token }); } @@ -610,11 +586,6 @@ impl WaiterManager { impl FutureRunnable for WaiterManager { fn run(&mut self, task: Task) { match task { - Task::SetKeyWakeUpDelayCallback { cb } => { - self.wait_table - .borrow_mut() - .set_wake_up_key_delay_callback(Some(cb)); - } Task::WaitFor { token, region_id, diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index b8aaa7f1927..32c99867a3f 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -11,21 +11,30 @@ //! of a single `AcquirePessimisticLock` request, and ensuring the internal //! callback for returning response through RPC is called at most only once. -use std::{convert::TryInto, result::Result, sync::Arc}; +use std::{ + convert::TryInto, + result::Result, + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc, Arc, + }, +}; use parking_lot::Mutex; use txn_types::Key; use crate::storage::{ errors::SharedError, - lock_manager::{ - lock_waiting_queue::{LockWaitQueues, PessimisticLockKeyCallback}, - LockManager, LockWaitToken, - }, + lock_manager::{lock_waiting_queue::LockWaitQueues, LockManager, LockWaitToken}, types::PessimisticLockKeyResult, Error as StorageError, PessimisticLockResults, ProcessResult, StorageCallback, }; +// The arguments are: (result, is_canceled_before_enqueueing). +pub type PessimisticLockKeyCallback = + Box, bool) + Send + 'static>; +pub type CancellationCallback = Box; + pub struct LockWaitContextInner { /// The callback for finishing the current AcquirePessimisticLock request. /// Usually, requests are accepted from RPC, and in this case calling @@ -53,6 +62,123 @@ pub struct LockWaitContextSharedState { /// The key on which lock waiting occurs. key: Key, + + /// When a lock-waiting request (allow_lock_with_conflict == true) is + /// resumed, it's theoretically possible that the request meets lock + /// again, therefore it may need to be pushed to the lock waiting queue + /// again. Since the request is popped out from the queue when resuming + /// (which means the lock wait entry doesn't exist in the lock waiting + /// queue during the resumed execution), it's possible that timeout or + /// deadlock happens from `WaiterManager` during that time, which will + /// try to cancel the request. Therefore it leads to such a corner case: + /// + /// 1. (scheduler) A request enters lock waiting state, so an entry is + /// pushed to the `LockWaitQueues`, and a message is sent to + /// `LockManager`. + /// 2. (scheduler) After a while the entry is popped out and resumed + /// from the `LockWaitQueues`. + /// 3. (scheduler) The request resumes execution but still finds lock + /// on the key. + /// * This is possible to be caused by delayed-waking up or encountering + /// error when writing a lock-releasing command to the engine. + /// 4. (lock_manager) At the same time, `LockManager` tries to cancel + /// the request due to timeout. But when calling `finish_request`, + /// the entry cannot be found from the `LockWaitQueues`. So it + /// believes that the entry is already popped out and resumed and does + /// nothing. + /// 5. (scheduler) An entry is pushed to the `LockWaitQueues` due to + /// encountering lock at step 3. 6. Then the request becomes unable to + /// be canceled by timeout or other possible errors. In worst cases, + /// the request may stuck in TiKV forever. + /// + /// To solve this problem, a `is_canceled` flag should be set when + /// `LockManager` tries to cancel it, before accessing the + /// `LockWaitQueues`; when an entry is pushed to the `LockWaitQueues`, + /// check if `is_canceled` is set after locking its inner map (ensures + /// exclusive access with `LockManager`), and if it's set, cancel the + /// request like how `LockManager` should have done. + /// + /// The request should be canceled with the error that occurs in + /// `LockManager`. `external_error_tx` and `external_error_rx` are used + /// to pass this error in this case. + /// + /// `is_canceled` marks if the request is canceled from outside. Usually + /// this is caused by timeout or deadlock detected. When this flag is + /// marked true, the request must not be put into the lock waiting queue + /// since nobody will wake it up for timeout and it may stuck forever. + is_canceled: AtomicBool, + + /// The sender for passing errors in some cancellation cases. See comments + /// in [`is_canceled`](LockWaitContextSharedState::is_canceled) for details. + /// It's only possible to be used in `LockManager`, so there's no contention + /// on the mutex. + external_error_tx: Mutex>>, + + /// The sender for passing errors in some cancellation cases. See comments + /// in [`is_canceled`](LockWaitContextSharedState::is_canceled) for details. + /// It's only possible to be used when scheduler tries to push to + /// `LockWaitQueues`, so there's no contention on the mutex. + external_error_rx: Mutex>>, +} + +impl LockWaitContextSharedState { + fn new(lock_wait_token: LockWaitToken, key: Key, cb: StorageCallback) -> Self { + let inner = LockWaitContextInner { cb }; + let (tx, rx) = mpsc::channel(); + Self { + ctx_inner: Mutex::new(Some(inner)), + key, + lock_wait_token, + is_canceled: AtomicBool::new(false), + external_error_tx: Mutex::new(Some(tx)), + external_error_rx: Mutex::new(Some(rx)), + } + } + + #[cfg(test)] + pub fn new_dummy(lock_wait_token: LockWaitToken, key: Key) -> Self { + let (tx, rx) = mpsc::channel(); + Self { + ctx_inner: Mutex::new(None), + key, + lock_wait_token, + is_canceled: AtomicBool::new(false), + external_error_tx: Mutex::new(Some(tx)), + external_error_rx: Mutex::new(Some(rx)), + } + } + + pub fn is_canceled(&self) -> bool { + self.is_canceled.load(Ordering::Acquire) + } + + /// Gets the external error. It's assumed that the external error must have + /// been set and consumes it. This function is expected to be called at + /// most only once. Only used to handle the case that cancelling and + /// resuming happens concurrently. + pub(in crate::storage) fn get_external_error(&self) -> StorageError { + self.external_error_rx + .lock() + .take() + .unwrap() + .recv() + .unwrap() + } + + /// Stores the external error. This function is expected to be called at + /// most only once. Only used to handle the case that cancelling and + /// resuming happens concurrently. + fn put_external_error(&self, error: StorageError) { + if let Err(e) = self.external_error_tx.lock().take().unwrap().send(error) { + debug!("failed to set external error"; "err" => ?e); + } + } +} + +enum FinishRequestKind { + Executed, + Canceled, + CanceledBeforeEnqueueing, } #[derive(Clone)] @@ -70,13 +196,8 @@ impl LockWaitContext { cb: StorageCallback, allow_lock_with_conflict: bool, ) -> Self { - let inner = LockWaitContextInner { cb }; Self { - shared_states: Arc::new(LockWaitContextSharedState { - ctx_inner: Mutex::new(Some(inner)), - key, - lock_wait_token, - }), + shared_states: Arc::new(LockWaitContextSharedState::new(lock_wait_token, key, cb)), lock_wait_queues, allow_lock_with_conflict, } @@ -105,8 +226,13 @@ impl LockWaitContext { /// key. pub fn get_callback_for_blocked_key(&self) -> PessimisticLockKeyCallback { let ctx = self.clone(); - Box::new(move |res| { - ctx.finish_request(res, false); + Box::new(move |res, is_canceled_before_enqueueing| { + let kind = if is_canceled_before_enqueueing { + FinishRequestKind::CanceledBeforeEnqueueing + } else { + FinishRequestKind::Executed + }; + ctx.finish_request(res, kind); }) } @@ -118,31 +244,45 @@ impl LockWaitContext { /// This function is assumed to be called when the lock-waiting request is /// queueing but canceled outside, so it includes an operation to actively /// remove the entry from the lock waiting queue. - pub fn get_callback_for_cancellation(&self) -> impl FnOnce(StorageError) { + pub fn get_callback_for_cancellation(&self) -> CancellationCallback { let ctx = self.clone(); - move |e| { - ctx.finish_request(Err(e.into()), true); - } + Box::new(move |e| { + ctx.finish_request(Err(e.into()), FinishRequestKind::Canceled); + }) } fn finish_request( &self, result: Result, - is_canceling: bool, + finish_kind: FinishRequestKind, ) { - if is_canceling { - let entry = self - .lock_wait_queues - .remove_by_token(&self.shared_states.key, self.shared_states.lock_wait_token); - if entry.is_none() { - // Already popped out from the queue so that it will be woken up normally. Do - // nothing. - return; + match finish_kind { + FinishRequestKind::Executed => { + self.lock_wait_queues + .get_lock_mgr() + .remove_lock_wait(self.shared_states.lock_wait_token); + } + FinishRequestKind::Canceled => { + self.shared_states + .is_canceled + .store(true, Ordering::Release); + + let entry = self + .lock_wait_queues + .remove_by_token(&self.shared_states.key, self.shared_states.lock_wait_token); + if entry.is_none() { + // It's absent in the queue infers that it's already popped out from the queue + // so that it will be woken up normally. However + // it may still meet lock and tries to enter waiting state again. In such case, + // the request should be canceled. Store the error here so + // that it can be used for cancellation in that case, where + // there will be a `finish_request(None, false)` invocation). + self.shared_states + .put_external_error(result.unwrap_err().try_into().unwrap()); + return; + } } - } else { - self.lock_wait_queues - .get_lock_mgr() - .remove_lock_wait(self.shared_states.lock_wait_token); + FinishRequestKind::CanceledBeforeEnqueueing => {} } // When this is executed, the waiter is either woken up from the queue or @@ -243,7 +383,7 @@ mod tests { // Nothing happens currently. (ctx.get_callback_for_first_write_batch()).execute(ProcessResult::Res); rx.recv_timeout(Duration::from_millis(20)).unwrap_err(); - (ctx.get_callback_for_blocked_key())(Err(SharedError::from(write_conflict()))); + (ctx.get_callback_for_blocked_key())(Err(SharedError::from(write_conflict())), false); let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, @@ -270,6 +410,7 @@ mod tests { }, should_not_exist: false, lock_wait_token: token, + req_states: ctx.get_shared_states().clone(), legacy_wake_up_index: None, key_cb: None, }), diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 90a2c369cca..663c6729962 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -57,7 +57,6 @@ use std::{ future::Future, pin::Pin, - result::Result, sync::{ atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, @@ -65,7 +64,7 @@ use std::{ time::{Duration, Instant}, }; -use dashmap; +use dashmap::{self, mapref::entry::Entry as DashMapEntry}; use futures_util::compat::Future01CompatExt; use keyed_priority_queue::KeyedPriorityQueue; use kvproto::kvrpcpb; @@ -75,18 +74,17 @@ use tikv_util::{time::InstantExt, timer::GLOBAL_TIMER_HANDLE}; use txn_types::{Key, TimeStamp}; use crate::storage::{ - errors::SharedError, - lock_manager::{LockManager, LockWaitToken}, + lock_manager::{ + lock_wait_context::{LockWaitContextSharedState, PessimisticLockKeyCallback}, + LockManager, LockWaitToken, + }, metrics::*, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, - txn::Error as TxnError, - types::{PessimisticLockKeyResult, PessimisticLockParameters}, - Error as StorageError, + txn::{Error as TxnError, ErrorInner as TxnErrorInner}, + types::PessimisticLockParameters, + Error as StorageError, ErrorInner as StorageErrorInner, }; -pub type CallbackWithSharedError = Box) + Send + 'static>; -pub type PessimisticLockKeyCallback = CallbackWithSharedError; - /// Represents an `AcquirePessimisticLock` request that's waiting for a lock, /// and contains the request's parameters. pub struct LockWaitEntry { @@ -97,6 +95,7 @@ pub struct LockWaitEntry { // Put it in a separated field. pub should_not_exist: bool, pub lock_wait_token: LockWaitToken, + pub req_states: Arc, pub legacy_wake_up_index: Option, pub key_cb: Option>, } @@ -248,15 +247,26 @@ impl LockWaitQueues { current_lock: kvrpcpb::LockInfo, ) { let mut new_key = false; - let mut key_state = self - .inner - .queue_map - .entry(lock_wait_entry.key.clone()) - .or_insert_with(|| { - new_key = true; - KeyLockWaitState::new() - }); - key_state.current_lock = current_lock; + + let map_entry = self.inner.queue_map.entry(lock_wait_entry.key.clone()); + + // If it's not the first time the request is put into the queue, the request + // might be canceled from outside when the entry is temporarily absent + // in the queue. In this case, the cancellation operation is not done. + // Do it here. For details about this corner case, see document of + // `LockWaitContext::is_canceled` field. + if lock_wait_entry.req_states.is_canceled() { + self.on_push_canceled_entry(lock_wait_entry, map_entry); + return; + } + + let mut key_state = map_entry.or_insert_with(|| { + new_key = true; + KeyLockWaitState::new() + }); + if !current_lock.key.is_empty() { + key_state.current_lock = current_lock; + } if lock_wait_entry.legacy_wake_up_index.is_none() { lock_wait_entry.legacy_wake_up_index = Some(key_state.value().legacy_wake_up_index); @@ -277,6 +287,32 @@ impl LockWaitQueues { } } + fn on_push_canceled_entry( + &self, + lock_wait_entry: Box, + key_state: DashMapEntry<'_, Key, KeyLockWaitState, impl std::hash::BuildHasher>, + ) { + let mut err = lock_wait_entry.req_states.get_external_error(); + + if let DashMapEntry::Occupied(key_state_entry) = key_state { + if let StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + MvccError(box MvccErrorInner::KeyIsLocked(lock_info)), + )))) = &mut err + { + // Update the lock info in the error to the latest if possible. + let latest_lock_info = &key_state_entry.get().current_lock; + if !latest_lock_info.key.is_empty() { + *lock_info = latest_lock_info.clone(); + } + } + } + + // `key_state` is dropped here, so the mutex in the queue map is released. + + let cb = lock_wait_entry.key_cb.unwrap().into_inner(); + cb(Err(err.into()), true); + } + /// Dequeues the head of the lock waiting queue of the specified key, /// assuming the popped entry will be woken up. /// @@ -437,6 +473,8 @@ impl LockWaitQueues { prev_delay_ms = current_delay_ms; } + fail_point!("lock_waiting_queue_before_delayed_notify_all"); + self.delayed_notify_all(&key, notify_id) } @@ -523,7 +561,7 @@ impl LockWaitQueues { reason: kvrpcpb::WriteConflictReason::PessimisticRetry, }, ))); - cb(Err(e.into())); + cb(Err(e.into()), false); } // Return the item to be woken up in resumable way. @@ -612,9 +650,10 @@ mod tests { use super::*; use crate::storage::{ + errors::SharedError, lock_manager::{lock_wait_context::LockWaitContext, MockLockManager, WaitTimeout}, txn::ErrorInner as TxnErrorInner, - ErrorInner as StorageErrorInner, StorageCallback, + ErrorInner as StorageErrorInner, PessimisticLockKeyResult, StorageCallback, }; struct TestLockWaitEntryHandle { @@ -703,8 +742,11 @@ mod tests { parameters, should_not_exist: false, lock_wait_token: token, + req_states: dummy_ctx.get_shared_states().clone(), legacy_wake_up_index: None, - key_cb: Some(SyncWrapper::new(Box::new(move |res| tx.send(res).unwrap()))), + key_cb: Some(SyncWrapper::new(Box::new(move |res, _| { + tx.send(res).unwrap() + }))), }); let cancel_callback = dummy_ctx.get_callback_for_cancellation(); diff --git a/src/storage/lock_manager/mod.rs b/src/storage/lock_manager/mod.rs index 3ba9c7f7905..75b133a808f 100644 --- a/src/storage/lock_manager/mod.rs +++ b/src/storage/lock_manager/mod.rs @@ -9,12 +9,13 @@ use std::{ time::Duration, }; -use collections::HashMap; +use collections::{HashMap, HashSet}; use kvproto::{kvrpcpb::LockInfo, metapb::RegionEpoch}; use parking_lot::Mutex; use tracker::TrackerToken; use txn_types::{Key, TimeStamp}; +pub use crate::storage::lock_manager::lock_wait_context::CancellationCallback; use crate::{ server::lock_manager::{waiter_manager, waiter_manager::Callback}, storage::{ @@ -147,7 +148,7 @@ pub trait LockManager: Clone + Send + Sync + 'static { wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, - cancel_callback: Box, + cancel_callback: CancellationCallback, diag_ctx: DiagnosticContext, ); @@ -170,8 +171,7 @@ pub trait LockManager: Clone + Send + Sync + 'static { #[derive(Clone)] pub struct MockLockManager { allocated_token: Arc, - waiters: - Arc)>>>, + waiters: Arc>>, } impl MockLockManager { @@ -205,7 +205,7 @@ impl LockManager for MockLockManager { wait_info: KeyLockWaitInfo, _is_first_lock: bool, _timeout: Option, - cancel_callback: Box, + cancel_callback: CancellationCallback, _diag_ctx: DiagnosticContext, ) { self.waiters @@ -230,4 +230,19 @@ impl MockLockManager { cancel_callback(StorageError::from(TxnError::from(error))); } } + + pub fn simulate_timeout(&self, token: LockWaitToken) { + if let Some((wait_info, cancel_callback)) = self.waiters.lock().remove(&token) { + let error = MvccError::from(MvccErrorInner::KeyIsLocked(wait_info.lock_info)); + cancel_callback(StorageError::from(TxnError::from(error))); + } + } + + pub fn get_all_tokens(&self) -> HashSet { + self.waiters + .lock() + .iter() + .map(|(&token, _)| token) + .collect() + } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 32cd7c11000..caed0f57c91 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -66,7 +66,7 @@ use std::{ marker::PhantomData, mem, sync::{ - atomic::{self, AtomicBool, AtomicU64}, + atomic::{self, AtomicBool, AtomicU64, Ordering}, Arc, }, }; @@ -3102,6 +3102,12 @@ impl TestStorageBuilder { self } + pub fn wake_up_delay_duration(self, duration_ms: u64) -> Self { + self.wake_up_delay_duration_ms + .store(duration_ms, Ordering::Relaxed); + self + } + pub fn set_api_version(mut self, api_version: ApiVersion) -> Self { self.config.set_api_version(api_version); self @@ -3196,6 +3202,9 @@ pub mod test_util { }, }; + use futures_executor::block_on; + use kvproto::kvrpcpb::Op; + use super::*; use crate::storage::{ lock_manager::WaitTimeout, @@ -3506,6 +3515,46 @@ pub mod test_util { feature_gate.set_version(env!("CARGO_PKG_VERSION")).unwrap(); feature_gate } + + pub fn must_have_locks( + storage: &Storage, + ts: u64, + start_key: &[u8], + end_key: &[u8], + expected_locks: &[( + // key + &[u8], + Op, + // start_ts + u64, + // for_update_ts + u64, + )], + ) { + let locks = block_on(storage.scan_lock( + Context::default(), + ts.into(), + Some(Key::from_raw(start_key)), + Some(Key::from_raw(end_key)), + 100, + )) + .unwrap(); + assert_eq!( + locks.len(), + expected_locks.len(), + "lock count not match, expected: {:?}; got: {:?}", + expected_locks, + locks + ); + for (lock_info, (expected_key, expected_op, expected_start_ts, expected_for_update_ts)) in + locks.into_iter().zip(expected_locks.iter()) + { + assert_eq!(lock_info.get_key(), *expected_key); + assert_eq!(lock_info.get_lock_type(), *expected_op); + assert_eq!(lock_info.get_lock_version(), *expected_start_ts); + assert_eq!(lock_info.get_lock_for_update_ts(), *expected_for_update_ts); + } + } } /// All statistics related to KvGet/KvBatchGet. @@ -3561,8 +3610,8 @@ mod tests { Error as KvError, ErrorInner as EngineErrorInner, ExpectedWrite, MockEngineBuilder, }, lock_manager::{ - DiagnosticContext, KeyLockWaitInfo, LockDigest, LockWaitToken, UpdateWaitForEvent, - WaitTimeout, + CancellationCallback, DiagnosticContext, KeyLockWaitInfo, LockDigest, + LockWaitToken, UpdateWaitForEvent, WaitTimeout, }, mvcc::LockType, txn::{ @@ -8151,46 +8200,6 @@ mod tests { test_pessimistic_lock_impl(true); } - fn must_have_locks( - storage: &Storage, - ts: u64, - start_key: &[u8], - end_key: &[u8], - expected_locks: &[( - // key - &[u8], - Op, - // start_ts - u64, - // for_update_ts - u64, - )], - ) { - let locks = block_on(storage.scan_lock( - Context::default(), - ts.into(), - Some(Key::from_raw(start_key)), - Some(Key::from_raw(end_key)), - 100, - )) - .unwrap(); - assert_eq!( - locks.len(), - expected_locks.len(), - "lock count not match, expected: {:?}; got: {:?}", - expected_locks, - locks - ); - for (lock_info, (expected_key, expected_op, expected_start_ts, expected_for_update_ts)) in - locks.into_iter().zip(expected_locks.iter()) - { - assert_eq!(lock_info.get_key(), *expected_key); - assert_eq!(lock_info.get_lock_type(), *expected_op); - assert_eq!(lock_info.get_lock_version(), *expected_start_ts); - assert_eq!(lock_info.get_lock_for_update_ts(), *expected_for_update_ts); - } - } - fn test_pessimistic_lock_resumable_impl( pipelined_pessimistic_lock: bool, in_memory_lock: bool, @@ -8723,7 +8732,7 @@ mod tests { wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, - cancel_callback: Box, + cancel_callback: CancellationCallback, diag_ctx: DiagnosticContext, }, RemoveLockWait { @@ -8763,7 +8772,7 @@ mod tests { wait_info: KeyLockWaitInfo, is_first_lock: bool, timeout: Option, - cancel_callback: Box, + cancel_callback: CancellationCallback, diag_ctx: DiagnosticContext, ) { self.tx diff --git a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs index 3a35fe6d1a7..a66f8228755 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs @@ -1,11 +1,19 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::{ + fmt::{Debug, Formatter}, + sync::Arc, +}; + // #[PerformanceCriticalPath] use kvproto::kvrpcpb::ExtraOp; use txn_types::{insert_old_value_if_resolved, Key, OldValues}; use crate::storage::{ - lock_manager::{lock_waiting_queue::LockWaitEntry, LockManager, LockWaitToken}, + lock_manager::{ + lock_wait_context::LockWaitContextSharedState, lock_waiting_queue::LockWaitEntry, + LockManager, LockWaitToken, + }, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, SnapshotReader}, txn::{ acquire_pessimistic_lock, @@ -21,12 +29,23 @@ use crate::storage::{ Snapshot, }; -#[derive(Debug)] pub struct ResumedPessimisticLockItem { pub key: Key, pub should_not_exist: bool, pub params: PessimisticLockParameters, pub lock_wait_token: LockWaitToken, + pub req_states: Arc, +} + +impl Debug for ResumedPessimisticLockItem { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ResumedPessimisticLockItem") + .field("key", &self.key) + .field("should_not_exist", &self.should_not_exist) + .field("params", &self.params) + .field("lock_wait_token", &self.lock_wait_token) + .finish() + } } command! { @@ -61,6 +80,7 @@ impl CommandExt for AcquirePessimisticLockResumed { impl WriteCommand for AcquirePessimisticLockResumed { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { + fail_point!("acquire_pessimistic_lock_resumed_before_process_write"); let mut modifies = vec![]; let mut txn = None; let mut reader: Option> = None; @@ -79,6 +99,7 @@ impl WriteCommand for AcquirePessimisticLockR should_not_exist, params, lock_wait_token, + req_states, } = item; // TODO: Refine the code for rebuilding txn state. @@ -136,6 +157,7 @@ impl WriteCommand for AcquirePessimisticLockR let mut lock_info = WriteResultLockInfo::new(lock_info, params, key, should_not_exist); lock_info.lock_wait_token = lock_wait_token; + lock_info.req_states = Some(req_states); res.push(PessimisticLockKeyResult::Waiting); encountered_locks.push(lock_info); } @@ -185,6 +207,7 @@ impl AcquirePessimisticLockResumed { should_not_exist: item.should_not_exist, params: item.parameters, lock_wait_token: item.lock_wait_token, + req_states: item.req_states, } }) .collect(); @@ -304,16 +327,20 @@ mod tests { let key = Key::from_raw(key); let lock_hash = key.gen_hash(); + let token = LockWaitToken(Some(random())); + // The tests in this file doesn't need a valid req_state. Set a dummy value + // here. + let req_states = Arc::new(LockWaitContextSharedState::new_dummy(token, key.clone())); let entry = LockWaitEntry { key, lock_hash, parameters, should_not_exist: false, - lock_wait_token: LockWaitToken(Some(random())), + lock_wait_token: token, legacy_wake_up_index: Some(0), + req_states, key_cb: None, }; - Box::new(entry) } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 4d3f32fa9cd..7d835462acf 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -30,6 +30,7 @@ use std::{ iter, marker::PhantomData, ops::{Deref, DerefMut}, + sync::Arc, }; pub use acquire_pessimistic_lock::AcquirePessimisticLock; @@ -63,7 +64,10 @@ use txn_types::{Key, TimeStamp, Value, Write}; use crate::storage::{ kv::WriteData, - lock_manager::{self, LockManager, LockWaitToken, WaitTimeout}, + lock_manager::{ + self, lock_wait_context::LockWaitContextSharedState, LockManager, LockWaitToken, + WaitTimeout, + }, metrics, mvcc::{Lock as MvccLock, MvccReader, ReleasedLock, SnapshotReader}, txn::{latch, ProcessResult, Result}, @@ -428,6 +432,9 @@ pub struct WriteResultLockInfo { /// another lock again after resuming, this field will carry the token /// that was already allocated before. pub lock_wait_token: LockWaitToken, + /// For resumed pessimistic lock requests, this is needed to check if it's + /// canceled outside. + pub req_states: Option>, } impl WriteResultLockInfo { @@ -450,6 +457,7 @@ impl WriteResultLockInfo { parameters, hash_for_latch, lock_wait_token: LockWaitToken(None), + req_states: None, } } } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 4657decf66f..bfbb860e545 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -68,11 +68,8 @@ use crate::{ }, lock_manager::{ self, - lock_wait_context::LockWaitContext, - lock_waiting_queue::{ - CallbackWithSharedError, DelayedNotifyAllFuture, LockWaitEntry, LockWaitQueues, - PessimisticLockKeyCallback, - }, + lock_wait_context::{LockWaitContext, PessimisticLockKeyCallback}, + lock_waiting_queue::{DelayedNotifyAllFuture, LockWaitEntry, LockWaitQueues}, DiagnosticContext, LockManager, LockWaitToken, }, metrics::*, @@ -208,7 +205,7 @@ impl TaskContext { pub enum SchedulerTaskCallback { NormalRequestCallback(StorageCallback), - LockKeyCallbacks(Vec>), + LockKeyCallbacks(Vec), } impl SchedulerTaskCallback { @@ -220,13 +217,13 @@ impl SchedulerTaskCallback { | ProcessResult::PessimisticLockRes { res: Err(err) } => { let err = SharedError::from(err); for cb in cbs { - cb(Err(err.clone())); + cb(Err(err.clone()), false); } } ProcessResult::PessimisticLockRes { res: Ok(v) } => { assert_eq!(v.0.len(), cbs.len()); for (res, cb) in v.0.into_iter().zip(cbs) { - cb(Ok(res)) + cb(Ok(res), false) } } _ => unreachable!(), @@ -652,9 +649,9 @@ impl Scheduler { fn schedule_awakened_pessimistic_locks( &self, - cid: u64, + specified_cid: Option, + prepared_latches: Option, mut awakened_entries: SVec>, - latches: Lock, ) { let key_callbacks: Vec<_> = awakened_entries .iter_mut() @@ -665,10 +662,10 @@ impl Scheduler { // TODO: Make flow control take effect on this thing. self.schedule_command( - Some(cid), + specified_cid, cmd.into(), SchedulerTaskCallback::LockKeyCallbacks(key_callbacks), - Some(latches), + prepared_latches, ); } @@ -861,9 +858,9 @@ impl Scheduler { next_latches.force_assume_acquired(); self.schedule_awakened_pessimistic_locks( - next_cid, + Some(next_cid), + Some(next_latches), woken_up_resumable_lock_requests, - next_latches, ); } else { if !tctx.woken_up_resumable_lock_requests.is_empty() { @@ -929,7 +926,7 @@ impl Scheduler { wait_info, is_first_lock, wait_timeout, - Box::new(lock_req_ctx.get_callback_for_cancellation()), + lock_req_ctx.get_callback_for_cancellation(), diag_ctx, ); } @@ -1004,19 +1001,23 @@ impl Scheduler { reason: kvrpcpb::WriteConflictReason::PessimisticRetry, }, ))); - cb(Err(e.into())); + cb(Err(e.into()), false); } for f in delayed_wake_up_futures { + let self2 = self1.clone(); self1 .get_sched_pool(CommandPri::High) .pool .spawn(async move { let res = f.await; - // It returns only None currently. - // TODO: Handle not-none case when supporting resumable pessimistic lock - // requests. - assert!(res.is_none()); + if let Some(resumable_lock_wait_entry) = res { + self2.schedule_awakened_pessimistic_locks( + None, + None, + smallvec![resumable_lock_wait_entry], + ); + } }) .unwrap(); } @@ -1593,12 +1594,15 @@ impl Scheduler { task_ctx.cb = Some(SchedulerTaskCallback::NormalRequestCallback(first_batch_cb)); drop(slot); + assert!(lock_info.req_states.is_none()); + let lock_wait_entry = Box::new(LockWaitEntry { key: lock_info.key, lock_hash: lock_info.lock_digest.hash, parameters: lock_info.parameters, should_not_exist: lock_info.should_not_exist, lock_wait_token, + req_states: ctx.get_shared_states().clone(), legacy_wake_up_index: None, key_cb: Some(ctx.get_callback_for_blocked_key().into()), }); @@ -1617,6 +1621,9 @@ impl Scheduler { parameters: lock_info.parameters, should_not_exist: lock_info.should_not_exist, lock_wait_token: lock_info.lock_wait_token, + // This must be called after an execution fo AcquirePessimisticLockResumed, in which + // case there must be a valid req_state. + req_states: lock_info.req_states.unwrap(), legacy_wake_up_index: None, key_cb: Some(cb.into()), }) diff --git a/src/storage/types.rs b/src/storage/types.rs index c7da00c9ace..b4e91811843 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -271,7 +271,6 @@ impl PessimisticLockKeyResult { assert!(matches!(self, Self::Waiting)); } - #[cfg(test)] pub fn unwrap_err(&self) -> SharedError { match self { Self::Failed(e) => e.clone(), diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index dd8f49bbde3..40c78dfabde 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -4,7 +4,7 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, mpsc::{channel, RecvTimeoutError}, - Arc, + Arc, Mutex, }, thread, time::Duration, @@ -512,6 +512,170 @@ fn test_pipelined_pessimistic_lock() { delete_pessimistic_lock(&storage, key, 60, 60); } +fn test_pessimistic_lock_resumable_blocked_twice_impl(canceled_when_resumed: bool) { + let lock_mgr = MockLockManager::new(); + let storage = TestStorageBuilderApiV1::new(lock_mgr.clone()) + .wake_up_delay_duration(100) + .build() + .unwrap(); + let (tx, rx) = channel(); + + let empty = PessimisticLockResults(vec![PessimisticLockKeyResult::Empty]); + + fail::cfg("lock_waiting_queue_before_delayed_notify_all", "pause").unwrap(); + let (first_resume_tx, first_resume_rx) = channel(); + let (first_resume_continue_tx, first_resume_continue_rx) = channel(); + let first_resume_tx = Mutex::new(first_resume_tx); + let first_resume_continue_rx = Mutex::new(first_resume_continue_rx); + fail::cfg_callback( + "acquire_pessimistic_lock_resumed_before_process_write", + move || { + // Notify that the failpoint is reached, and block until it receives a continue + // signal. + first_resume_tx.lock().unwrap().send(()).unwrap(); + first_resume_continue_rx.lock().unwrap().recv().unwrap(); + }, + ) + .unwrap(); + + let key = Key::from_raw(b"key"); + + // Lock the key. + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command(vec![(key.clone(), false)], 10, 10, false, false), + expect_pessimistic_lock_res_callback(tx, empty.clone()), + ) + .unwrap(); + rx.recv_timeout(Duration::from_secs(1)).unwrap(); + + // Another non-resumable request blocked. + let (tx_blocked_1, rx_blocked_1) = channel(); + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command(vec![(key.clone(), false)], 11, 11, false, false), + expect_fail_callback(tx_blocked_1, 0, |e| match e { + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::WriteConflict { .. }, + ))))) => (), + e => panic!("unexpected error chain: {:?}", e), + }), + ) + .unwrap(); + rx_blocked_1 + .recv_timeout(Duration::from_millis(50)) + .unwrap_err(); + + let tokens_before = lock_mgr.get_all_tokens(); + // Another resumable request blocked, and is queued behind the above one. + let (tx_blocked_2, rx_blocked_2) = channel(); + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command(vec![(key.clone(), false)], 12, 12, false, false) + .allow_lock_with_conflict(true), + if !canceled_when_resumed { + expect_pessimistic_lock_res_callback(tx_blocked_2, empty.clone()) + } else { + expect_value_with_checker_callback( + tx_blocked_2, + 0, + |res: storage::Result| { + let res = res.unwrap().0; + assert_eq!(res.len(), 1); + let e = res[0].unwrap_err(); + match e.inner() { + ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::KeyIsLocked(_), + )))) => (), + e => panic!("unexpected error chain: {:?}", e), + } + }, + ) + }, + ) + .unwrap(); + rx_blocked_2 + .recv_timeout(Duration::from_millis(50)) + .unwrap_err(); + // Find the lock wait token of the above request. + let tokens_after = lock_mgr.get_all_tokens(); + let token_of_12 = { + use std::ops::Sub; + let diff = tokens_after.sub(&tokens_before); + assert_eq!(diff.len(), 1); + diff.into_iter().next().unwrap() + }; + + // Release the lock, so that the former (non-resumable) request will be woken + // up, and the other one (resumable) will be woken up after delaying for + // `wake_up_delay_duration`. + delete_pessimistic_lock(&storage, key.clone(), 10, 10); + rx_blocked_1.recv_timeout(Duration::from_secs(1)).unwrap(); + + // The key should be unlocked at this time. + must_have_locks(&storage, 100, b"", b"\xff\xff\xff", &[]); + + // Simulate the transaction at ts=11 retries the pessimistic lock request, and + // succeeds. + let (tx, rx) = channel(); + storage + .sched_txn_command( + new_acquire_pessimistic_lock_command(vec![(key.clone(), false)], 11, 11, false, false), + expect_pessimistic_lock_res_callback(tx, empty), + ) + .unwrap(); + rx.recv_timeout(Duration::from_secs(1)).unwrap(); + + // Remove `pause` in delayed wake up, so that the request of txn 12 can be woken + // up. + fail::remove("lock_waiting_queue_before_delayed_notify_all"); + first_resume_rx.recv().unwrap(); + + if canceled_when_resumed { + lock_mgr.simulate_timeout(token_of_12); + } + + fail::remove("acquire_pessimistic_lock_resumed_before_process_write"); + first_resume_continue_tx.send(()).unwrap(); + + if canceled_when_resumed { + rx_blocked_2.recv_timeout(Duration::from_secs(1)).unwrap(); + must_have_locks( + &storage, + 100, + b"", + b"\xff\xff\xff", + &[(&key.to_raw().unwrap(), Op::PessimisticLock, 11, 11)], + ); + } else { + rx_blocked_2 + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + must_have_locks( + &storage, + 100, + b"", + b"\xff\xff\xff", + &[(&key.to_raw().unwrap(), Op::PessimisticLock, 11, 11)], + ); + delete_pessimistic_lock(&storage, key.clone(), 11, 11); + rx_blocked_2.recv_timeout(Duration::from_secs(1)).unwrap(); + must_have_locks( + &storage, + 100, + b"", + b"\xff\xff\xff", + &[(&key.to_raw().unwrap(), Op::PessimisticLock, 12, 12)], + ); + } +} + +#[test] +fn test_pessimistic_lock_resumable_blocked_twice() { + test_pessimistic_lock_resumable_blocked_twice_impl(false); + test_pessimistic_lock_resumable_blocked_twice_impl(true); +} + #[test] fn test_async_commit_prewrite_with_stale_max_ts() { test_async_commit_prewrite_with_stale_max_ts_impl::(); From 7a3764fcb79c33f65303efc463cf4d0188fc7d0c Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 2 Dec 2022 10:50:01 +0800 Subject: [PATCH 0381/1149] pd-client: pd client version 2 (#13696) close tikv/tikv#13673 None Signed-off-by: tabokie Signed-off-by: Xinye Tao --- Cargo.lock | 8 +- Cargo.toml | 21 +- cmd/tikv-ctl/Cargo.toml | 2 +- components/api_version/Cargo.toml | 2 +- components/backup-stream/Cargo.toml | 6 +- components/backup/Cargo.toml | 4 +- components/causal_ts/Cargo.toml | 2 +- components/cdc/Cargo.toml | 2 +- components/cloud/Cargo.toml | 2 +- components/cloud/aws/Cargo.toml | 2 +- components/cloud/azure/Cargo.toml | 2 +- components/cloud/gcp/Cargo.toml | 2 +- components/concurrency_manager/Cargo.toml | 2 +- components/encryption/Cargo.toml | 2 +- components/encryption/export/Cargo.toml | 2 +- components/engine_panic/Cargo.toml | 2 +- components/engine_rocks/Cargo.toml | 2 +- components/engine_rocks_helper/Cargo.toml | 2 +- components/engine_tirocks/Cargo.toml | 2 +- components/engine_traits/Cargo.toml | 2 +- components/error_code/Cargo.toml | 2 +- components/external_storage/Cargo.toml | 2 +- components/external_storage/export/Cargo.toml | 2 +- components/into_other/Cargo.toml | 2 +- components/keys/Cargo.toml | 2 +- components/pd_client/Cargo.toml | 7 +- components/pd_client/src/client.rs | 39 +- components/pd_client/src/client_v2.rs | 1408 +++++++++++++++++ components/pd_client/src/lib.rs | 7 +- components/pd_client/src/tso.rs | 59 +- components/pd_client/src/util.rs | 64 +- components/raft_log_engine/Cargo.toml | 2 +- components/raftstore-v2/Cargo.toml | 2 +- components/raftstore/Cargo.toml | 4 +- components/resolved_ts/Cargo.toml | 2 +- components/resource_metering/Cargo.toml | 2 +- components/server/Cargo.toml | 4 +- components/sst_importer/Cargo.toml | 2 +- components/test_backup/Cargo.toml | 2 +- components/test_coprocessor/Cargo.toml | 4 +- components/test_pd/Cargo.toml | 2 +- components/test_pd/src/server.rs | 25 +- components/test_pd/src/util.rs | 21 +- components/test_pd_client/Cargo.toml | 4 +- components/test_raftstore/Cargo.toml | 4 +- components/test_sst_importer/Cargo.toml | 2 +- components/test_storage/Cargo.toml | 2 +- components/test_util/Cargo.toml | 2 +- components/tidb_query_aggr/Cargo.toml | 2 +- components/tidb_query_common/Cargo.toml | 4 +- components/tidb_query_datatype/Cargo.toml | 4 +- components/tidb_query_executors/Cargo.toml | 6 +- components/tidb_query_expr/Cargo.toml | 2 +- components/tikv_kv/Cargo.toml | 2 +- components/tikv_util/Cargo.toml | 6 +- components/tipb_helper/Cargo.toml | 2 +- components/tracker/Cargo.toml | 2 +- components/txn_types/Cargo.toml | 2 +- tests/Cargo.toml | 8 +- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_pd_client.rs | 141 +- .../failpoints/cases/test_pd_client_legacy.rs | 230 +++ tests/integrations/pd/mod.rs | 1 + tests/integrations/pd/test_rpc_client.rs | 305 ++-- .../integrations/pd/test_rpc_client_legacy.rs | 691 ++++++++ 65 files changed, 2756 insertions(+), 404 deletions(-) create mode 100644 components/pd_client/src/client_v2.rs create mode 100644 tests/failpoints/cases/test_pd_client_legacy.rs create mode 100644 tests/integrations/pd/test_rpc_client_legacy.rs diff --git a/Cargo.lock b/Cargo.lock index 2b237c8c25f..8b178015fa1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2317,18 +2317,18 @@ dependencies = [ [[package]] name = "grpcio-compiler" -version = "0.9.0" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caa0700833147dcfbe4f0758bd92545cc0f4506ee7fa154e499745a8b24e86c" +checksum = "ed97a17310fd00ff4109357584a00244e2a785d05b7ee0ef4d1e8fb1d84266df" dependencies = [ "protobuf", ] [[package]] name = "grpcio-health" -version = "0.10.0" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "641a95bace445aed36b31ae8731513c4c4d1d3dcdbc05aaeeefefe4fd673ada1" +checksum = "a37eae605cd21f144b7c7fd0e64e57af9f73d132756fef5b706db110c3ec7ea0" dependencies = [ "futures-executor", "futures-util", diff --git a/Cargo.toml b/Cargo.toml index f75a4a6511f..104157fdf24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,8 +97,8 @@ futures-timer = "3.0" futures-util = { version = "0.3.1", default-features = false, features = ["io", "async-await"] } fxhash = "0.2.1" getset = "0.1" -grpcio = { version = "0.10.3", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } -grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } +grpcio = { workspace = true } +grpcio-health = { workspace = true } hex = "0.4" http = "0" hyper = { version = "0.14", features = ["full"] } @@ -107,7 +107,7 @@ into_other = { workspace = true } itertools = "0.10" keyed_priority_queue = "0.4" keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" libc = "0.2" libloading = "0.7" @@ -165,17 +165,17 @@ tikv_alloc = { workspace = true } tikv_kv = { workspace = true } tikv_util = { workspace = true } time = "0.1" -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } tokio = { version = "1.17", features = ["full"] } tokio-openssl = "0.6" -tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-timer = { workspace = true } toml = "0.5" tracker = { workspace = true } txn_types = { workspace = true } url = "2" uuid = { version = "0.8.1", features = ["serde", "v4"] } walkdir = "2" -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [dev-dependencies] api_version = { workspace = true, features = ["testexport"] } @@ -221,7 +221,7 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # kvproto at the same time. # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. # [patch.'https://github.com/pingcap/kvproto'] -# kvproto = { git = "https://github.com/your_github_id/kvproto", branch="your_branch" } +# kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md @@ -368,7 +368,12 @@ tipb_helper = { path = "components/tipb_helper" } tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types" } # External libs -grpcio = { version = "0.10", default-features = false, features = ["openssl-vendored", "protobuf-codec"] } +grpcio = { version = "0.10.4", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } +grpcio-health = { version = "0.10.4", default-features = false, features = ["protobuf-codec"] } +tipb = { git = "https://github.com/pingcap/tipb.git" } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } +yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } [profile.dev.package.grpcio-sys] debug = false diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 3b2d1dd2f75..1e0699f64cf 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -62,7 +62,7 @@ gag = "1.0" grpcio = { workspace = true } hex = "0.4" keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } diff --git a/components/api_version/Cargo.toml b/components/api_version/Cargo.toml index 421c01a1514..7362ca25ccc 100644 --- a/components/api_version/Cargo.toml +++ b/components/api_version/Cargo.toml @@ -11,7 +11,7 @@ testexport = [] bitflags = "1.0.1" codec = { workspace = true } engine_traits = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } match-template = "0.0.1" thiserror = "1.0" tikv_alloc = { workspace = true } diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index e5863f44c4d..b1a61580cb6 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -39,13 +39,11 @@ fail = "0.5" file_system = { workspace = true } futures = "0.3" futures-io = "0.3" - grpcio = { workspace = true } hex = "0.4" - # Fixing ahash cyclic dep: https://github.com/tkaitchuck/ahash/issues/95 indexmap = "=1.6.2" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } online_config = { workspace = true } @@ -72,7 +70,7 @@ tokio-util = { version = "0.7", features = ["compat"] } tonic = "0.8" txn_types = { workspace = true } uuid = "0.8" -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [dev-dependencies] async-trait = "0.1" diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 17439a0f615..27f7d68e8e3 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -52,7 +52,7 @@ futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { workspace = true } hex = "0.4" keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } online_config = { workspace = true } @@ -74,7 +74,7 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-stream = "0.1" txn_types = { workspace = true } -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [dev-dependencies] rand = "0.8" diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index d05e9b66ddd..a5dd62cd5d2 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -16,7 +16,7 @@ enum_dispatch = "0.3.8" error_code = { workspace = true } fail = "0.5" futures = { version = "0.3" } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } parking_lot = "0.12" diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 27ce81c57b4..94d80bf1d9f 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -42,7 +42,7 @@ futures-timer = "3.0" getset = "0.1" grpcio = { workspace = true } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } online_config = { workspace = true } diff --git a/components/cloud/Cargo.toml b/components/cloud/Cargo.toml index 45ae2b40b23..10f8b113b2b 100644 --- a/components/cloud/Cargo.toml +++ b/components/cloud/Cargo.toml @@ -9,7 +9,7 @@ async-trait = "0.1" derive_more = "0.99.3" error_code = { workspace = true } futures-io = "0.3" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" openssl = "0.10" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index e539c67f571..5d28e09e8f4 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -22,7 +22,7 @@ grpcio = { workspace = true } http = "0.2.0" hyper = "0.14" hyper-tls = "0.5" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" md5 = "0.7.0" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 3d8b01e893b..c08dc76fdff 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -14,7 +14,7 @@ chrono = "0.4" cloud = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } oauth2 = { version = "4.0.0", default-features = false } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index f0446fa096d..5074a3c9da4 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -11,7 +11,7 @@ futures-util = { version = "0.3", default-features = false, features = ["io"] } http = "0.2.0" hyper = "0.14" hyper-tls = "0.5" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } # better to not use slog-global, but pass in the logger slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index 2d008cf49f1..e225cbe0519 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -6,7 +6,7 @@ version = "0.0.1" [dependencies] fail = "0.5" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } parking_lot = "0.12" tikv_util = { workspace = true } tokio = { version = "1.5", features = ["macros", "sync", "time"] } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index b66ef2aa147..18b6cb7305c 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -21,7 +21,7 @@ file_system = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["std", "io"] } hex = "0.4.2" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" online_config = { workspace = true } openssl = "0.10" diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index f76c2b8f03c..fc4fe59d3fb 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -18,7 +18,7 @@ derive_more = "0.99.3" encryption = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } openssl = "0.10" protobuf = { version = "2.8", features = ["bytes"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index c5703994c73..55e42f2595f 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] engine_traits = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tikv_alloc = { workspace = true } # FIXME: Remove this dep from the engine_traits interface diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index 44dd708271d..a0e3e878c54 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -32,7 +32,7 @@ engine_traits = { workspace = true } fail = "0.5" file_system = { workspace = true } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.4.0" log_wrappers = { workspace = true } num_cpus = "1" diff --git a/components/engine_rocks_helper/Cargo.toml b/components/engine_rocks_helper/Cargo.toml index 16e79a3b007..ec66aa474a9 100644 --- a/components/engine_rocks_helper/Cargo.toml +++ b/components/engine_rocks_helper/Cargo.toml @@ -24,5 +24,5 @@ tikv_util = { workspace = true } [dev-dependencies] engine_test = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } tempfile = "3.0" diff --git a/components/engine_tirocks/Cargo.toml b/components/engine_tirocks/Cargo.toml index 8ecce112579..07c2a7ec42c 100644 --- a/components/engine_tirocks/Cargo.toml +++ b/components/engine_tirocks/Cargo.toml @@ -24,6 +24,6 @@ tracker = { workspace = true } txn_types = { workspace = true } [dev-dependencies] -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } rand = "0.8" tempfile = "3.0" diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index c2e9d729868..d38962e71c9 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -12,7 +12,7 @@ case_macros = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } protobuf = "2" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } diff --git a/components/error_code/Cargo.toml b/components/error_code/Cargo.toml index 484f8d24ad3..b98fc8dfcb5 100644 --- a/components/error_code/Cargo.toml +++ b/components/error_code/Cargo.toml @@ -14,7 +14,7 @@ path = "bin.rs" [dependencies] grpcio = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } serde = { version = "1.0", features = ["derive"] } diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 8c92b79583e..839e34e3f22 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -29,7 +29,7 @@ futures-executor = "0.3" futures-io = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { workspace = true, optional = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" libloading = { optional = true, version = "0.7.0" } openssl = "0.10" diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml index 076bdd9d0dd..61e9bfa58df 100644 --- a/components/external_storage/export/Cargo.toml +++ b/components/external_storage/export/Cargo.toml @@ -65,7 +65,7 @@ futures-io = { version = "0.3" } futures-util = { version = "0.3", default-features = false, features = ["io"] } gcp = { optional = true, workspace = true } grpcio = { workspace = true, optional = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = { optional = true, version = "1.3" } libloading = { optional = true, version = "0.7.0" } once_cell = { optional = true, version = "1.3.1" } diff --git a/components/into_other/Cargo.toml b/components/into_other/Cargo.toml index 39989a4bf75..d31f04f4e12 100644 --- a/components/into_other/Cargo.toml +++ b/components/into_other/Cargo.toml @@ -6,5 +6,5 @@ publish = false [dependencies] engine_traits = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } diff --git a/components/keys/Cargo.toml b/components/keys/Cargo.toml index f8318237b20..5f2bf5935ee 100644 --- a/components/keys/Cargo.toml +++ b/components/keys/Cargo.toml @@ -6,7 +6,7 @@ publish = false [dependencies] byteorder = "1.2" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } diff --git a/components/pd_client/Cargo.toml b/components/pd_client/Cargo.toml index c2ee9982bcd..c25e37f23b5 100644 --- a/components/pd_client/Cargo.toml +++ b/components/pd_client/Cargo.toml @@ -6,6 +6,7 @@ publish = false [features] failpoints = ["fail/failpoints"] +testexport = [] [dependencies] collections = { workspace = true } @@ -13,7 +14,7 @@ error_code = { workspace = true } fail = "0.5" futures = "0.3" grpcio = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } @@ -28,6 +29,6 @@ thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1", features = ["sync"] } -tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-timer = { workspace = true } txn_types = { workspace = true } -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index e25e4a595bb..9f466a6a351 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -42,8 +42,8 @@ use super::{ UnixSecs, REQUEST_TIMEOUT, }; -const CQ_COUNT: usize = 1; -const CLIENT_PREFIX: &str = "pd"; +pub const CQ_COUNT: usize = 1; +pub const CLIENT_PREFIX: &str = "pd"; pub struct RpcClient { cluster_id: u64, @@ -86,7 +86,7 @@ impl RpcClient { ); let pd_connector = PdConnector::new(env.clone(), security_mgr.clone()); for i in 0..retries { - match pd_connector.validate_endpoints(cfg).await { + match pd_connector.validate_endpoints(cfg, true).await { Ok((client, target, members, tso)) => { let cluster_id = members.get_header().get_cluster_id(); let rpc_client = RpcClient { @@ -97,7 +97,7 @@ impl RpcClient { client, members, target, - tso, + tso.unwrap(), cfg.enable_forwarding, )), monitor: monitor.clone(), @@ -554,13 +554,16 @@ impl PdClient for RpcClient { .client_stub .get_region_by_id_async_opt(&req, call_option_inner(&inner)) .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_region_by_id", e) + panic!( + "fail to request PD {} err {:?}", + "get_region_leader_by_id", e + ) }) }; Box::pin(async move { let mut resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region_by_id"]) + .with_label_values(&["get_region_leader_by_id"]) .observe(duration_to_sec(timer.saturating_elapsed())); check_resp_header(resp.get_header())?; if resp.has_region() && resp.has_leader() { @@ -1088,27 +1091,3 @@ impl PdClient for RpcClient { .execute() } } - -pub struct DummyPdClient { - pub next_ts: TimeStamp, -} - -impl DummyPdClient { - pub fn new() -> DummyPdClient { - DummyPdClient { - next_ts: TimeStamp::zero(), - } - } -} - -impl Default for DummyPdClient { - fn default() -> Self { - Self::new() - } -} - -impl PdClient for DummyPdClient { - fn batch_get_tso(&self, _count: u32) -> PdFuture { - Box::pin(future::ok(self.next_ts)) - } -} diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs new file mode 100644 index 00000000000..55f0c31b3c5 --- /dev/null +++ b/components/pd_client/src/client_v2.rs @@ -0,0 +1,1408 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! PD Client V2 +//! +//! In V1, the connection to PD and related states are all shared under a +//! `RwLock`. The maintenance of these states are implemented in a +//! decentralized way: each request will try to rebuild the connection on its +//! own if it encounters a network error. +//! +//! In V2, the responsibility to maintain the connection is moved into one +//! single long-running coroutine, namely [`reconnect_loop`]. Users of the +//! connection subscribe changes instead of altering it themselves. + +use std::{ + collections::HashMap, + fmt::Debug, + pin::Pin, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, + time::{Duration, Instant as StdInstant}, + u64, +}; + +use fail::fail_point; +use futures::{ + compat::{Compat, Future01CompatExt}, + executor::block_on, + future::FutureExt, + select, + sink::SinkExt, + stream::{Stream, StreamExt}, + task::{Context, Poll}, +}; +use grpcio::{ + CallOption, Channel, ClientDuplexReceiver, ConnectivityState, EnvBuilder, Environment, + Error as GrpcError, Result as GrpcResult, WriteFlags, +}; +use kvproto::{ + metapb, + pdpb::{ + self, GetMembersResponse, PdClient as PdClientStub, RegionHeartbeatRequest, + RegionHeartbeatResponse, ReportBucketsRequest, TsoRequest, TsoResponse, + }, + replication_modepb::{ReplicationStatus, StoreDrAutoSyncStatus}, +}; +use security::SecurityManager; +use tikv_util::{ + box_err, + config::ReadableDuration, + error, info, + mpsc::future as mpsc, + slow_log, thd_name, + time::{duration_to_sec, Instant}, + timer::GLOBAL_TIMER_HANDLE, + warn, +}; +use tokio::sync::{broadcast, mpsc as tokio_mpsc}; +use txn_types::TimeStamp; + +use super::{ + client::{CLIENT_PREFIX, CQ_COUNT}, + metrics::*, + util::{check_resp_header, PdConnector, TargetInfo}, + Config, Error, FeatureGate, RegionInfo, Result, UnixSecs, + REQUEST_TIMEOUT as REQUEST_TIMEOUT_SEC, +}; +use crate::PdFuture; + +fn request_timeout() -> Duration { + fail_point!("pd_client_v2_request_timeout", |s| { + use std::str::FromStr; + ReadableDuration::from_str(&s.unwrap()).unwrap().0 + }); + Duration::from_secs(REQUEST_TIMEOUT_SEC) +} + +/// Immutable context for making new connections. +struct ConnectContext { + cfg: Config, + connector: PdConnector, +} + +#[derive(Clone)] +struct RawClient { + stub: PdClientStub, + target_info: TargetInfo, + members: GetMembersResponse, +} + +impl RawClient { + async fn connect(ctx: &ConnectContext) -> Result { + // -1 means the max. + let retries = match ctx.cfg.retry_max_count { + -1 => std::isize::MAX, + v => v.saturating_add(1), + }; + for i in 0..retries { + match ctx.connector.validate_endpoints(&ctx.cfg, false).await { + Ok((stub, target_info, members, _)) => { + return Ok(RawClient { + stub, + target_info, + members, + }); + } + Err(e) => { + if i as usize % ctx.cfg.retry_log_every == 0 { + warn!("validate PD endpoints failed"; "err" => ?e); + } + let _ = GLOBAL_TIMER_HANDLE + .delay(StdInstant::now() + ctx.cfg.retry_interval.0) + .compat() + .await; + } + } + } + Err(box_err!("PD endpoints are invalid")) + } + + /// Returns Ok(true) when a new connection is established. + async fn maybe_reconnect(&mut self, ctx: &ConnectContext, force: bool) -> Result { + PD_RECONNECT_COUNTER_VEC.with_label_values(&["try"]).inc(); + let start = Instant::now(); + + let members = self.members.clone(); + let direct_connected = self.target_info.direct_connected(); + slow_log!(start.saturating_elapsed(), "try reconnect pd"); + let (stub, target_info, members, _) = match ctx + .connector + .reconnect_pd( + members, + direct_connected, + force, + ctx.cfg.enable_forwarding, + false, + ) + .await + { + Err(e) => { + PD_RECONNECT_COUNTER_VEC + .with_label_values(&["failure"]) + .inc(); + return Err(e); + } + Ok(None) => { + PD_RECONNECT_COUNTER_VEC + .with_label_values(&["no-need"]) + .inc(); + return Ok(false); + } + Ok(Some(tuple)) => { + PD_RECONNECT_COUNTER_VEC + .with_label_values(&["success"]) + .inc(); + tuple + } + }; + + fail_point!("pd_client_v2_reconnect", |_| Ok(true)); + + self.stub = stub; + self.target_info = target_info; + self.members = members; + + info!("trying to update PD client done"; "spend" => ?start.saturating_elapsed()); + Ok(true) + } +} + +struct CachedRawClientCore { + context: ConnectContext, + + latest: Mutex, + version: AtomicU64, + on_reconnect_tx: broadcast::Sender<()>, +} + +/// A shared [`RawClient`] with a local copy of cache. +pub struct CachedRawClient { + core: Arc, + should_reconnect_tx: broadcast::Sender, + on_reconnect_rx: broadcast::Receiver<()>, + + cache: RawClient, + cache_version: u64, +} + +impl Clone for CachedRawClient { + fn clone(&self) -> Self { + Self { + core: self.core.clone(), + should_reconnect_tx: self.should_reconnect_tx.clone(), + on_reconnect_rx: self.core.on_reconnect_tx.subscribe(), + cache: self.cache.clone(), + cache_version: self.cache_version, + } + } +} + +impl CachedRawClient { + fn new( + cfg: Config, + env: Arc, + security_mgr: Arc, + should_reconnect_tx: broadcast::Sender, + ) -> Self { + let lame_stub = PdClientStub::new(Channel::lame(env.clone(), "0.0.0.0:0")); + let client = RawClient { + stub: lame_stub, + target_info: TargetInfo::new("0.0.0.0:0".to_string(), ""), + members: GetMembersResponse::new(), + }; + let context = ConnectContext { + cfg, + connector: PdConnector::new(env, security_mgr), + }; + let (tx, rx) = broadcast::channel(1); + let core = CachedRawClientCore { + context, + latest: Mutex::new(client.clone()), + version: AtomicU64::new(0), + on_reconnect_tx: tx, + }; + Self { + core: Arc::new(core), + should_reconnect_tx, + on_reconnect_rx: rx, + cache: client, + cache_version: 0, + } + } + + #[inline] + fn refresh_cache(&mut self) -> bool { + if self.cache_version < self.core.version.load(Ordering::Acquire) { + let latest = self.core.latest.lock().unwrap(); + self.cache = (*latest).clone(); + self.cache_version = self.core.version.load(Ordering::Relaxed); + true + } else { + false + } + } + + #[inline] + fn publish_cache(&mut self) { + let latest_version = { + let mut latest = self.core.latest.lock().unwrap(); + *latest = self.cache.clone(); + let _ = self.core.on_reconnect_tx.send(()); + self.core.version.fetch_add(1, Ordering::Relaxed) + 1 + }; + debug_assert!(self.cache_version < latest_version); + self.cache_version = latest_version; + } + + #[inline] + async fn wait_for_a_new_client( + rx: &mut broadcast::Receiver<()>, + current_version: u64, + latest_version: &AtomicU64, + ) -> bool { + let deadline = StdInstant::now() + request_timeout(); + loop { + if GLOBAL_TIMER_HANDLE + .timeout(Compat::new(Box::pin(rx.recv())), deadline) + .compat() + .await + .is_ok() + { + if current_version < latest_version.load(Ordering::Acquire) { + return true; + } + } else { + return false; + } + } + } + + /// Refreshes the local cache with latest client, then waits for the + /// connection to be ready. + /// The connection must be available if this function returns `Ok(())`. + async fn wait_for_ready(&mut self) -> Result<()> { + self.refresh_cache(); + if self.channel().check_connectivity_state(false) == ConnectivityState::GRPC_CHANNEL_READY { + return Ok(()); + } + select! { + r = self + .cache + .stub + .client + .channel() + .wait_for_connected(request_timeout()) + .fuse() => + { + if r { + return Ok(()); + } + } + r = Self::wait_for_a_new_client( + &mut self.on_reconnect_rx, + self.cache_version, + &self.core.version, + ).fuse() => { + if r { + assert!(self.refresh_cache()); + return Ok(()); + } + } + } + let _ = self.should_reconnect_tx.send(self.cache_version); + Err(box_err!( + "Connection unavailable {:?}", + self.channel().check_connectivity_state(false) + )) + } + + /// Makes the first connection. + async fn connect(&mut self) -> Result<()> { + self.cache = RawClient::connect(&self.core.context).await?; + self.publish_cache(); + Ok(()) + } + + /// Increases global version only when a new connection is established. + /// Might panic if `wait_for_ready` isn't called up-front. + async fn reconnect(&mut self) -> Result { + let force = (|| { + fail_point!("pd_client_force_reconnect", |_| true); + self.channel().check_connectivity_state(true) + == ConnectivityState::GRPC_CHANNEL_SHUTDOWN + })(); + if self + .cache + .maybe_reconnect(&self.core.context, force) + .await? + { + self.publish_cache(); + return Ok(true); + } + Ok(false) + } + + #[inline] + fn check_resp(&mut self, resp: GrpcResult) -> GrpcResult { + if matches!( + resp, + Err(GrpcError::RpcFailure(_) | GrpcError::RemoteStopped | GrpcError::RpcFinished(_)) + ) { + let _ = self.should_reconnect_tx.send(self.cache_version); + } + resp + } + + /// Might panic if `wait_for_ready` isn't called up-front. + #[inline] + fn stub(&self) -> &PdClientStub { + &self.cache.stub + } + + /// Might panic if `wait_for_ready` isn't called up-front. + #[inline] + fn channel(&self) -> &Channel { + self.cache.stub.client.channel() + } + + /// Might panic if `wait_for_ready` isn't called up-front. + #[inline] + fn call_option(&self) -> CallOption { + self.cache.target_info.call_option() + } + + /// Might panic if `wait_for_ready` isn't called up-front. + #[inline] + fn cluster_id(&self) -> u64 { + self.cache.members.get_header().get_cluster_id() + } + + /// Might panic if `wait_for_ready` isn't called up-front. + #[inline] + fn header(&self) -> pdpb::RequestHeader { + let mut header = pdpb::RequestHeader::default(); + header.set_cluster_id(self.cluster_id()); + header + } + + /// Might panic if `wait_for_ready` isn't called up-front. + #[cfg(feature = "testexport")] + #[inline] + fn leader(&self) -> pdpb::Member { + self.cache.members.get_leader().clone() + } + + #[inline] + fn initialized(&self) -> bool { + self.cache_version != 0 + } +} + +async fn reconnect_loop( + mut client: CachedRawClient, + cfg: Config, + mut should_reconnect: broadcast::Receiver, +) { + if let Err(e) = client.connect().await { + error!("failed to connect pd"; "err" => ?e); + return; + } + let backoff = (|| { + fail_point!("pd_client_v2_backoff", |s| { + use std::str::FromStr; + ReadableDuration::from_str(&s.unwrap()).unwrap().0 + }); + request_timeout() + })(); + let mut last_connect = StdInstant::now(); + loop { + if client.channel().wait_for_connected(request_timeout()).await { + let state = ConnectivityState::GRPC_CHANNEL_READY; + select! { + // Checks for leader change periodically. + _ = client + .channel() + .wait_for_state_change(state, cfg.update_interval.0) + .fuse() => {} + v = should_reconnect.recv().fuse() => { + match v { + Ok(v) if v < client.cache_version => continue, + Ok(_) => {} + Err(broadcast::error::RecvError::Lagged(_)) => continue, + Err(broadcast::error::RecvError::Closed) => break, + } + } + } + } + let target = last_connect + backoff; + if target > StdInstant::now() { + let _ = GLOBAL_TIMER_HANDLE.delay(target).compat().await; + } + last_connect = StdInstant::now(); + if let Err(e) = client.reconnect().await { + warn!("failed to reconnect pd"; "err" => ?e); + } + } +} + +#[derive(Clone)] +pub struct RpcClient { + pub raw_client: CachedRawClient, + feature_gate: FeatureGate, +} + +impl RpcClient { + pub fn new( + cfg: &Config, + shared_env: Option>, + security_mgr: Arc, + ) -> Result { + let env = shared_env.unwrap_or_else(|| { + Arc::new( + EnvBuilder::new() + .cq_count(CQ_COUNT) + .name_prefix(thd_name!(CLIENT_PREFIX)) + .build(), + ) + }); + + // Use broadcast channel for the lagging feature. + let (tx, rx) = broadcast::channel(1); + let raw_client = CachedRawClient::new(cfg.clone(), env, security_mgr, tx); + raw_client + .stub() + .spawn(reconnect_loop(raw_client.clone(), cfg.clone(), rx)); + + Ok(Self { + raw_client, + feature_gate: Default::default(), + }) + } + + #[inline] + pub fn subscribe_reconnect(&self) -> broadcast::Receiver<()> { + self.raw_client.clone().on_reconnect_rx + } + + #[cfg(feature = "testexport")] + pub fn feature_gate(&self) -> &FeatureGate { + &self.feature_gate + } + + #[cfg(feature = "testexport")] + pub fn get_leader(&mut self) -> pdpb::Member { + block_on(self.raw_client.wait_for_ready()).unwrap(); + self.raw_client.leader() + } + + #[cfg(feature = "testexport")] + pub fn reconnect(&mut self) -> Result { + block_on(self.raw_client.wait_for_ready())?; + block_on(self.raw_client.reconnect()) + } + + #[cfg(feature = "testexport")] + pub fn reset_to_lame_client(&mut self) { + let env = self.raw_client.core.context.connector.env.clone(); + let lame = PdClientStub::new(Channel::lame(env, "0.0.0.0:0")); + self.raw_client.core.latest.lock().unwrap().stub = lame.clone(); + self.raw_client.cache.stub = lame; + } + + #[cfg(feature = "testexport")] + pub fn initialized(&self) -> bool { + self.raw_client.initialized() + } +} + +pub trait PdClient { + type ResponseChannel: Stream>; + + fn create_region_heartbeat_stream( + &mut self, + wake_policy: mpsc::WakePolicy, + ) -> Result<( + mpsc::Sender, + Self::ResponseChannel, + )>; + + fn create_report_region_buckets_stream( + &mut self, + wake_policy: mpsc::WakePolicy, + ) -> Result>; + + fn create_tso_stream( + &mut self, + wake_policy: mpsc::WakePolicy, + ) -> Result<(mpsc::Sender, Self::ResponseChannel)>; + + fn fetch_cluster_id(&mut self) -> Result; + + fn load_global_config(&mut self, list: Vec) -> PdFuture>; + + fn watch_global_config( + &mut self, + ) -> Result>; + + fn bootstrap_cluster( + &mut self, + stores: metapb::Store, + region: metapb::Region, + ) -> Result>; + + fn is_cluster_bootstrapped(&mut self) -> Result; + + fn alloc_id(&mut self) -> Result; + + fn is_recovering_marked(&mut self) -> Result; + + fn put_store(&mut self, store: metapb::Store) -> Result>; + + fn get_store_and_stats(&mut self, store_id: u64) + -> PdFuture<(metapb::Store, pdpb::StoreStats)>; + + fn get_store(&mut self, store_id: u64) -> Result { + block_on(self.get_store_and_stats(store_id)).map(|r| r.0) + } + + fn get_all_stores(&mut self, exclude_tombstone: bool) -> Result>; + + fn get_cluster_config(&mut self) -> Result; + + fn get_region_and_leader( + &mut self, + key: &[u8], + ) -> PdFuture<(metapb::Region, Option)>; + + fn get_region(&mut self, key: &[u8]) -> Result { + block_on(self.get_region_and_leader(key)).map(|r| r.0) + } + + fn get_region_info(&mut self, key: &[u8]) -> Result { + block_on(self.get_region_and_leader(key)).map(|r| RegionInfo::new(r.0, r.1)) + } + + fn get_region_by_id(&mut self, region_id: u64) -> PdFuture>; + + fn get_region_leader_by_id( + &mut self, + region_id: u64, + ) -> PdFuture>; + + fn ask_split(&mut self, region: metapb::Region) -> PdFuture; + + fn ask_batch_split( + &mut self, + region: metapb::Region, + count: usize, + ) -> PdFuture; + + fn store_heartbeat( + &mut self, + stats: pdpb::StoreStats, + store_report: Option, + dr_autosync_status: Option, + ) -> PdFuture; + + fn report_batch_split(&mut self, regions: Vec) -> PdFuture<()>; + + fn scatter_region(&mut self, region: RegionInfo) -> Result<()>; + + fn get_gc_safe_point(&mut self) -> PdFuture; + + fn get_operator(&mut self, region_id: u64) -> Result; + + fn update_service_safe_point( + &mut self, + name: String, + safe_point: TimeStamp, + ttl: Duration, + ) -> PdFuture<()>; + + fn report_min_resolved_ts(&mut self, store_id: u64, min_resolved_ts: u64) -> PdFuture<()>; +} + +pub struct CachedDuplexResponse { + latest: tokio_mpsc::Receiver>, + cache: Option>, +} + +impl CachedDuplexResponse { + fn new() -> (tokio_mpsc::Sender>, Self) { + let (tx, rx) = tokio_mpsc::channel(1); + ( + tx, + Self { + latest: rx, + cache: None, + }, + ) + } +} + +impl Stream for CachedDuplexResponse { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + loop { + if let Some(ref mut receiver) = self.cache { + match Pin::new(receiver).poll_next(cx) { + Poll::Ready(Some(Ok(item))) => return Poll::Ready(Some(Ok(item))), + Poll::Pending => return Poll::Pending, + // If it's None or there's error, we need to update receiver. + _ => {} + } + } + + match Pin::new(&mut self.latest).poll_recv(cx) { + Poll::Ready(Some(receiver)) => self.cache = Some(receiver), + Poll::Ready(None) => return Poll::Ready(None), + Poll::Pending => return Poll::Pending, + } + } + } +} + +impl PdClient for RpcClient { + type ResponseChannel = CachedDuplexResponse; + + fn create_region_heartbeat_stream( + &mut self, + wake_policy: mpsc::WakePolicy, + ) -> Result<( + mpsc::Sender, + Self::ResponseChannel, + )> { + // TODO: use bounded channel. + let (tx, rx) = mpsc::unbounded(wake_policy); + let (resp_tx, resp_rx) = CachedDuplexResponse::::new(); + let mut raw_client = self.raw_client.clone(); + let mut requests = Box::pin(rx).map(|r| { + fail::fail_point!("region_heartbeat_send_failed", |_| { + Err(grpcio::Error::RemoteStopped) + }); + Ok((r, WriteFlags::default())) + }); + self.raw_client.stub().spawn(async move { + loop { + if let Err(e) = raw_client.wait_for_ready().await { + warn!("failed to acquire client for RegionHeartbeat stream"; "err" => ?e); + continue; + } + let (mut hb_tx, hb_rx) = raw_client + .stub() + .region_heartbeat_opt(raw_client.call_option()) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "region_heartbeat", e) + }); + if resp_tx.send(hb_rx).await.is_err() { + break; + } + let res = hb_tx.send_all(&mut requests).await; + if res.is_ok() { + // requests are drained. + break; + } else { + let res = raw_client.check_resp(res); + warn!("region heartbeat stream exited"; "res" => ?res); + } + let _ = hb_tx.close().await; + } + }); + Ok((tx, resp_rx)) + } + + fn create_report_region_buckets_stream( + &mut self, + wake_policy: mpsc::WakePolicy, + ) -> Result> { + let (tx, rx) = mpsc::unbounded(wake_policy); + let mut raw_client = self.raw_client.clone(); + let mut requests = Box::pin(rx).map(|r| Ok((r, WriteFlags::default()))); + self.raw_client.stub().spawn(async move { + loop { + if let Err(e) = raw_client.wait_for_ready().await { + warn!("failed to acquire client for ReportRegionBuckets stream"; "err" => ?e); + continue; + } + let (mut bk_tx, bk_rx) = raw_client + .stub() + .report_buckets_opt(raw_client.call_option()) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "report_region_buckets", e) + }); + select! { + send_res = bk_tx.send_all(&mut requests).fuse() => { + if send_res.is_ok() { + // requests are drained. + break; + } else { + let res = raw_client.check_resp(send_res); + warn!("region buckets stream exited: {:?}", res); + } + } + recv_res = bk_rx.fuse() => { + let res = raw_client.check_resp(recv_res); + warn!("region buckets stream exited: {:?}", res); + } + } + let _ = bk_tx.close().await; + } + }); + Ok(tx) + } + + fn create_tso_stream( + &mut self, + wake_policy: mpsc::WakePolicy, + ) -> Result<(mpsc::Sender, Self::ResponseChannel)> { + let (tx, rx) = mpsc::unbounded(wake_policy); + let (resp_tx, resp_rx) = CachedDuplexResponse::::new(); + let mut raw_client = self.raw_client.clone(); + let mut requests = Box::pin(rx).map(|r| Ok((r, WriteFlags::default()))); + self.raw_client.stub().spawn(async move { + loop { + if let Err(e) = raw_client.wait_for_ready().await { + warn!("failed to acquire client for Tso stream"; "err" => ?e); + continue; + } + let (mut tso_tx, tso_rx) = raw_client + .stub() + .tso_opt(raw_client.call_option()) + .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "tso", e)); + if resp_tx.send(tso_rx).await.is_err() { + break; + } + let res = tso_tx.send_all(&mut requests).await; + if res.is_ok() { + // requests are drained. + break; + } else { + let res = raw_client.check_resp(res); + warn!("tso exited"; "res" => ?res); + } + let _ = tso_tx.close().await; + } + }); + Ok((tx, resp_rx)) + } + + fn load_global_config(&mut self, list: Vec) -> PdFuture> { + use kvproto::pdpb::LoadGlobalConfigRequest; + let mut req = LoadGlobalConfigRequest::new(); + req.set_names(list.into()); + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + let fut = raw_client.stub().load_global_config_async(&req)?; + match fut.await { + Ok(grpc_response) => { + let mut res = HashMap::with_capacity(grpc_response.get_items().len()); + for c in grpc_response.get_items() { + if c.has_error() { + error!("failed to load global config with key {:?}", c.get_error()); + } else { + res.insert(c.get_name().to_owned(), c.get_value().to_owned()); + } + } + Ok(res) + } + Err(err) => Err(box_err!("{:?}", err)), + } + }) + } + + fn watch_global_config( + &mut self, + ) -> Result> { + let req = pdpb::WatchGlobalConfigRequest::default(); + block_on(self.raw_client.wait_for_ready())?; + Ok(self.raw_client.stub().watch_global_config(&req)?) + } + + fn fetch_cluster_id(&mut self) -> Result { + if !self.raw_client.initialized() { + block_on(self.raw_client.wait_for_ready())?; + } + let id = self.raw_client.cluster_id(); + assert!(id > 0); + Ok(id) + } + + fn bootstrap_cluster( + &mut self, + stores: metapb::Store, + region: metapb::Region, + ) -> Result> { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["bootstrap_cluster"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::BootstrapRequest::default(); + req.set_header(self.raw_client.header()); + req.set_store(stores); + req.set_region(region); + + let resp = self.raw_client.stub().bootstrap_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let mut resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(resp.replication_status.take()) + } + + fn is_cluster_bootstrapped(&mut self) -> Result { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["is_cluster_bootstrapped"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::IsBootstrappedRequest::default(); + req.set_header(self.raw_client.header()); + + let resp = self.raw_client.stub().is_bootstrapped_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + Ok(resp.get_bootstrapped()) + } + + fn alloc_id(&mut self) -> Result { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["alloc_id"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::AllocIdRequest::default(); + req.set_header(self.raw_client.header()); + + let resp = self.raw_client.stub().alloc_id_opt( + &req, + self.raw_client + .call_option() + .timeout(Duration::from_secs(10)), + ); + let resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + let id = resp.get_id(); + if id == 0 { + return Err(box_err!("pd alloc weird id 0")); + } + Ok(id) + } + + fn is_recovering_marked(&mut self) -> Result { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["is_recovering_marked"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::IsSnapshotRecoveringRequest::default(); + req.set_header(self.raw_client.header()); + + let resp = self.raw_client.stub().is_snapshot_recovering_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + Ok(resp.get_marked()) + } + + fn put_store(&mut self, store: metapb::Store) -> Result> { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["put_store"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::PutStoreRequest::default(); + req.set_header(self.raw_client.header()); + req.set_store(store); + + let resp = self.raw_client.stub().put_store_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let mut resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + Ok(resp.replication_status.take()) + } + + fn get_store_and_stats( + &mut self, + store_id: u64, + ) -> PdFuture<(metapb::Store, pdpb::StoreStats)> { + let timer = Instant::now_coarse(); + + let mut req = pdpb::GetStoreRequest::default(); + req.set_store_id(store_id); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .get_store_async_opt(&req, raw_client.call_option().timeout(request_timeout())) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_store_and_stats", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_store_and_stats"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let mut resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + let store = resp.take_store(); + if store.get_state() != metapb::StoreState::Tombstone { + Ok((store, resp.take_stats())) + } else { + Err(Error::StoreTombstone(format!("{:?}", store))) + } + }) + } + + fn get_all_stores(&mut self, exclude_tombstone: bool) -> Result> { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_all_stores"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::GetAllStoresRequest::default(); + req.set_header(self.raw_client.header()); + req.set_exclude_tombstone_stores(exclude_tombstone); + + let resp = self.raw_client.stub().get_all_stores_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let mut resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + Ok(resp.take_stores().into()) + } + + fn get_cluster_config(&mut self) -> Result { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_cluster_config"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::GetClusterConfigRequest::default(); + req.set_header(self.raw_client.header()); + + let resp = self.raw_client.stub().get_cluster_config_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let mut resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + Ok(resp.take_cluster()) + } + + fn get_region_and_leader( + &mut self, + key: &[u8], + ) -> PdFuture<(metapb::Region, Option)> { + let timer = Instant::now_coarse(); + + let mut req = pdpb::GetRegionRequest::default(); + req.set_region_key(key.to_vec()); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .get_region_async_opt(&req, raw_client.call_option().timeout(request_timeout())) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_async_opt", e) + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_region"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let mut resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + let region = if resp.has_region() { + resp.take_region() + } else { + return Err(Error::RegionNotFound(req.region_key)); + }; + let leader = if resp.has_leader() { + Some(resp.take_leader()) + } else { + None + }; + Ok((region, leader)) + }) + } + + fn get_region_by_id(&mut self, region_id: u64) -> PdFuture> { + let timer = Instant::now_coarse(); + + let mut req = pdpb::GetRegionByIdRequest::default(); + req.set_region_id(region_id); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .get_region_by_id_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_by_id", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_region_by_id"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let mut resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + if resp.has_region() { + Ok(Some(resp.take_region())) + } else { + Ok(None) + } + }) + } + + fn get_region_leader_by_id( + &mut self, + region_id: u64, + ) -> PdFuture> { + let timer = Instant::now_coarse(); + + let mut req = pdpb::GetRegionByIdRequest::default(); + req.set_region_id(region_id); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .get_region_by_id_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!( + "fail to request PD {} err {:?}", + "get_region_leader_by_id", e + ); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_region_leader_by_id"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let mut resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + if resp.has_region() && resp.has_leader() { + Ok(Some((resp.take_region(), resp.take_leader()))) + } else { + Ok(None) + } + }) + } + + fn ask_split(&mut self, region: metapb::Region) -> PdFuture { + let timer = Instant::now_coarse(); + + let mut req = pdpb::AskSplitRequest::default(); + req.set_region(region); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .ask_split_async_opt(&req, raw_client.call_option().timeout(request_timeout())) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "ask_split", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["ask_split"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(resp) + }) + } + + fn ask_batch_split( + &mut self, + region: metapb::Region, + count: usize, + ) -> PdFuture { + let timer = Instant::now_coarse(); + + let mut req = pdpb::AskBatchSplitRequest::default(); + req.set_region(region); + req.set_split_count(count as u32); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .ask_batch_split_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "ask_batch_split", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["ask_batch_split"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(resp) + }) + } + + fn store_heartbeat( + &mut self, + mut stats: pdpb::StoreStats, + store_report: Option, + dr_autosync_status: Option, + ) -> PdFuture { + let timer = Instant::now_coarse(); + + let mut req = pdpb::StoreHeartbeatRequest::default(); + stats + .mut_interval() + .set_end_timestamp(UnixSecs::now().into_inner()); + req.set_stats(stats); + if let Some(report) = store_report { + req.set_store_report(report); + } + if let Some(status) = dr_autosync_status { + req.set_dr_autosync_status(status); + } + + let mut raw_client = self.raw_client.clone(); + let feature_gate = self.feature_gate.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .store_heartbeat_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "store_heartbeat", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["store_heartbeat"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + match feature_gate.set_version(resp.get_cluster_version()) { + Err(_) => warn!("invalid cluster version: {}", resp.get_cluster_version()), + Ok(true) => info!("set cluster version to {}", resp.get_cluster_version()), + _ => {} + }; + Ok(resp) + }) + } + + fn report_batch_split(&mut self, regions: Vec) -> PdFuture<()> { + let timer = Instant::now_coarse(); + + let mut req = pdpb::ReportBatchSplitRequest::default(); + req.set_regions(regions.into()); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .report_batch_split_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "report_batch_split", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["report_batch_split"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(()) + }) + } + + fn scatter_region(&mut self, mut region: RegionInfo) -> Result<()> { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["scatter_region"]) + .start_coarse_timer(); + + let mut req = pdpb::ScatterRegionRequest::default(); + req.set_region_id(region.get_id()); + if let Some(leader) = region.leader.take() { + req.set_leader(leader); + } + req.set_region(region.region); + + block_on(self.raw_client.wait_for_ready())?; + req.set_header(self.raw_client.header()); + let resp = self.raw_client.stub().scatter_region_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header()) + } + + fn get_gc_safe_point(&mut self) -> PdFuture { + let timer = Instant::now_coarse(); + + let mut req = pdpb::GetGcSafePointRequest::default(); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .get_gc_safe_point_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_gc_saft_point", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_gc_saft_point"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(resp.get_safe_point()) + }) + } + + fn get_operator(&mut self, region_id: u64) -> Result { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["get_operator"]) + .start_coarse_timer(); + + block_on(self.raw_client.wait_for_ready())?; + + let mut req = pdpb::GetOperatorRequest::default(); + req.set_header(self.raw_client.header()); + req.set_region_id(region_id); + + let resp = self.raw_client.stub().get_operator_opt( + &req, + self.raw_client.call_option().timeout(request_timeout()), + ); + let resp = self.raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + + Ok(resp) + } + + fn update_service_safe_point( + &mut self, + name: String, + safe_point: TimeStamp, + ttl: Duration, + ) -> PdFuture<()> { + let timer = Instant::now_coarse(); + let mut req = pdpb::UpdateServiceGcSafePointRequest::default(); + req.set_service_id(name.into()); + req.set_ttl(ttl.as_secs() as _); + req.set_safe_point(safe_point.into_inner()); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .update_service_gc_safe_point_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!( + "fail to request PD {} err {:?}", + "update_service_safe_point", e + ); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["update_service_safe_point"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(()) + }) + } + + fn report_min_resolved_ts(&mut self, store_id: u64, min_resolved_ts: u64) -> PdFuture<()> { + let timer = Instant::now_coarse(); + + let mut req = pdpb::ReportMinResolvedTsRequest::default(); + req.set_store_id(store_id); + req.set_min_resolved_ts(min_resolved_ts); + + let mut raw_client = self.raw_client.clone(); + Box::pin(async move { + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .report_min_resolved_ts_async_opt( + &req, + raw_client.call_option().timeout(request_timeout()), + ) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "min_resolved_ts", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["min_resolved_ts"]) + .observe(duration_to_sec(timer.saturating_elapsed())); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(()) + }) + } +} diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index e4350e3d396..8674130c799 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -1,8 +1,12 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(let_chains)] + #[allow(unused_extern_crates)] extern crate tikv_alloc; mod client; +mod client_v2; mod feature_gate; pub mod metrics; mod tso; @@ -23,7 +27,8 @@ use tikv_util::time::{Instant, UnixSecs}; use txn_types::TimeStamp; pub use self::{ - client::{DummyPdClient, RpcClient}, + client::RpcClient, + client_v2::{PdClient as PdClientV2, RpcClient as RpcClientV2}, config::Config, errors::{Error, Result}, feature_gate::{Feature, FeatureGate}, diff --git a/components/pd_client/src/tso.rs b/components/pd_client/src/tso.rs index a19d7af8f06..feec5061a8c 100644 --- a/components/pd_client/src/tso.rs +++ b/components/pd_client/src/tso.rs @@ -180,40 +180,41 @@ impl<'a> Stream for TsoRequestStream<'a> { fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let pending_requests = self.pending_requests.clone(); let mut pending_requests = pending_requests.borrow_mut(); - let mut requests = Vec::new(); - while requests.len() < MAX_BATCH_SIZE && pending_requests.len() < MAX_PENDING_COUNT { - match self.request_rx.poll_recv(cx) { - Poll::Ready(Some(sender)) => { - requests.push(sender); + if pending_requests.len() < MAX_PENDING_COUNT { + let mut requests = Vec::new(); + while requests.len() < MAX_BATCH_SIZE { + match self.request_rx.poll_recv(cx) { + Poll::Ready(Some(sender)) => { + requests.push(sender); + } + Poll::Ready(None) if requests.is_empty() => { + return Poll::Ready(None); + } + _ => break, } - Poll::Ready(None) if requests.is_empty() => { - return Poll::Ready(None); - } - _ => break, + } + if !requests.is_empty() { + let mut req = TsoRequest::default(); + req.mut_header().cluster_id = self.cluster_id; + req.count = requests.iter().map(|r| r.count).sum(); + + let request_group = RequestGroup { + tso_request: req.clone(), + requests, + }; + pending_requests.push_back(request_group); + PD_PENDING_TSO_REQUEST_GAUGE.set(pending_requests.len() as i64); + + let write_flags = WriteFlags::default().buffer_hint(false); + return Poll::Ready(Some((req, write_flags))); } } - if !requests.is_empty() { - let mut req = TsoRequest::default(); - req.mut_header().cluster_id = self.cluster_id; - req.count = requests.iter().map(|r| r.count).sum(); - - let request_group = RequestGroup { - tso_request: req.clone(), - requests, - }; - pending_requests.push_back(request_group); - PD_PENDING_TSO_REQUEST_GAUGE.set(pending_requests.len() as i64); - - let write_flags = WriteFlags::default().buffer_hint(false); - Poll::Ready(Some((req, write_flags))) - } else { - // Set the waker to the context, then the stream can be waked up after the - // pending queue is no longer full. - self.self_waker.register(cx.waker()); - Poll::Pending - } + // Set the waker to the context, then the stream can be waked up after the + // pending queue is no longer full. + self.self_waker.register(cx.waker()); + Poll::Pending } } diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index da77783c167..72c8cc16b04 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -51,13 +51,14 @@ const MAX_RETRY_DURATION: Duration = Duration::from_secs(10); const GLOBAL_RECONNECT_INTERVAL: Duration = Duration::from_millis(100); // 0.1s pub const REQUEST_RECONNECT_INTERVAL: Duration = Duration::from_secs(1); // 1s +#[derive(Clone)] pub struct TargetInfo { target_url: String, via: String, } impl TargetInfo { - fn new(target_url: String, via: &str) -> TargetInfo { + pub(crate) fn new(target_url: String, via: &str) -> TargetInfo { TargetInfo { target_url, via: trim_http_prefix(via).to_string(), @@ -340,7 +341,13 @@ impl Client { async move { let direct_connected = self.inner.rl().target_info().direct_connected(); connector - .reconnect_pd(members, direct_connected, force, self.enable_forwarding) + .reconnect_pd( + members, + direct_connected, + force, + self.enable_forwarding, + true, + ) .await } }; @@ -383,7 +390,7 @@ impl Client { fail_point!("pd_client_reconnect", |_| Ok(())); - self.update_client(client, target_info, members, tso); + self.update_client(client, target_info, members, tso.unwrap()); info!("trying to update PD client done"; "spend" => ?start.saturating_elapsed()); Ok(()) } @@ -521,11 +528,13 @@ pub type StubTuple = ( PdClientStub, TargetInfo, GetMembersResponse, - TimestampOracle, + // Only used by RpcClient, not by RpcClientV2. + Option, ); +#[derive(Clone)] pub struct PdConnector { - env: Arc, + pub(crate) env: Arc, security_mgr: Arc, } @@ -534,7 +543,7 @@ impl PdConnector { PdConnector { env, security_mgr } } - pub async fn validate_endpoints(&self, cfg: &Config) -> Result { + pub async fn validate_endpoints(&self, cfg: &Config, build_tso: bool) -> Result { let len = cfg.endpoints.len(); let mut endpoints_set = HashSet::with_capacity_and_hasher(len, Default::default()); let mut members = None; @@ -575,7 +584,7 @@ impl PdConnector { match members { Some(members) => { let res = self - .reconnect_pd(members, true, true, cfg.enable_forwarding) + .reconnect_pd(members, true, true, cfg.enable_forwarding, build_tso) .await? .unwrap(); info!("all PD endpoints are consistent"; "endpoints" => ?cfg.endpoints); @@ -593,7 +602,9 @@ impl PdConnector { .max_send_message_len(-1) .max_receive_message_len(-1) .keepalive_time(Duration::from_secs(10)) - .keepalive_timeout(Duration::from_secs(3)); + .keepalive_timeout(Duration::from_secs(3)) + .max_reconnect_backoff(Duration::from_secs(5)) + .initial_reconnect_backoff(Duration::from_secs(1)); self.security_mgr.connect(cb, addr_trim) }; fail_point!("cluster_id_is_not_ready", |_| { @@ -602,7 +613,7 @@ impl PdConnector { GetMembersResponse::default(), )) }); - let client = PdClientStub::new(channel); + let client = PdClientStub::new(channel.clone()); let option = CallOption::default().timeout(Duration::from_secs(REQUEST_TIMEOUT)); let response = client .get_members_async_opt(&GetMembersRequest::default(), option) @@ -680,12 +691,13 @@ impl PdConnector { // not empty and it can connect the leader now which represents the network // partition problem to leader may be recovered 3. the member information of // PD has been changed - async fn reconnect_pd( + pub async fn reconnect_pd( &self, members_resp: GetMembersResponse, direct_connected: bool, force: bool, enable_forwarding: bool, + build_tso: bool, ) -> Result> { let resp = self.load_members(&members_resp).await?; let leader = resp.get_leader(); @@ -699,11 +711,15 @@ impl PdConnector { match res { Some((client, target_url)) => { let info = TargetInfo::new(target_url, ""); - let tso = TimestampOracle::new( - resp.get_header().get_cluster_id(), - &client, - info.call_option(), - )?; + let tso = if build_tso { + Some(TimestampOracle::new( + resp.get_header().get_cluster_id(), + &client, + info.call_option(), + )?) + } else { + None + }; return Ok(Some((client, info, resp, tso))); } None => { @@ -714,11 +730,15 @@ impl PdConnector { } if enable_forwarding && has_network_error { if let Ok(Some((client, info))) = self.try_forward(members, leader).await { - let tso = TimestampOracle::new( - resp.get_header().get_cluster_id(), - &client, - info.call_option(), - )?; + let tso = if build_tso { + Some(TimestampOracle::new( + resp.get_header().get_cluster_id(), + &client, + info.call_option(), + )?) + } else { + None + }; return Ok(Some((client, info, resp, tso))); } } @@ -774,7 +794,9 @@ impl PdConnector { loop { let (res, has_network_err) = self.connect_member(leader).await?; match res { - Some((client, ep, _)) => return Ok((Some((client, ep)), has_network_err)), + Some((client, ep, _)) => { + return Ok((Some((client, ep)), has_network_err)); + } None => { if has_network_err && retry_times > 0 diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 2b9d2de73ff..0ee185fd365 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -8,7 +8,7 @@ edition = "2018" encryption = { workspace = true } engine_traits = { workspace = true } file_system = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.4.0" num_cpus = "1" online_config = { workspace = true } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 1679732ccda..46ed20f8d10 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -41,7 +41,7 @@ file_system = { workspace = true } fs2 = "0.4" futures = { version = "0.3", features = ["compat"] } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 54eb07e8161..548693b71ac 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -56,7 +56,7 @@ grpcio-health = { version = "0.10", default-features = false, features = ["proto into_other = { workspace = true } itertools = "0.10" keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } @@ -90,7 +90,7 @@ tokio = { version = "1.5", features = ["sync", "rt-multi-thread"] } tracker = { workspace = true } txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [dev-dependencies] encryption_export = { workspace = true } diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index d4a7e3d1ca2..10a555678c3 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -31,7 +31,7 @@ fail = "0.5" futures = "0.3" grpcio = { workspace = true } hex = "0.4" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } online_config = { workspace = true } diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index acb2dff89d3..20ed4ea2eda 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -8,7 +8,7 @@ collections = { workspace = true } crossbeam = "0.8" futures = "0.3" grpcio = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 7a40340b64e..b27846ad5a3 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -56,7 +56,7 @@ grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } hex = "0.4" keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } @@ -81,7 +81,7 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } toml = "0.5" txn_types = { workspace = true } -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [target.'cfg(unix)'.dependencies] signal-hook = "0.3" diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index 0bba773418b..d0e2ff7eca8 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -26,7 +26,7 @@ futures = { version = "0.3", features = ["thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { workspace = true } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } openssl = "0.10" diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index 902e57d5eed..1798b50c82b 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -23,7 +23,7 @@ futures = "0.3" futures-executor = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } protobuf = "2" rand = "0.8" tempfile = "3.0" diff --git a/components/test_coprocessor/Cargo.toml b/components/test_coprocessor/Cargo.toml index a3bb3f8e476..03047d75e87 100644 --- a/components/test_coprocessor/Cargo.toml +++ b/components/test_coprocessor/Cargo.toml @@ -25,7 +25,7 @@ collections = { workspace = true } concurrency_manager = { workspace = true } engine_rocks = { workspace = true } futures = "0.3" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } protobuf = "2" resource_metering = { workspace = true } test_storage = { workspace = true } @@ -33,5 +33,5 @@ tidb_query_common = { workspace = true } tidb_query_datatype = { workspace = true } tikv = { workspace = true } tikv_util = { workspace = true } -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } txn_types = { workspace = true } diff --git a/components/test_pd/Cargo.toml b/components/test_pd/Cargo.toml index d9163706895..a478e6ee325 100644 --- a/components/test_pd/Cargo.toml +++ b/components/test_pd/Cargo.toml @@ -9,7 +9,7 @@ collections = { workspace = true } fail = "0.5" futures = "0.3" grpcio = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } pd_client = { workspace = true } security = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 79b095ef0d9..9e1a2b3bb0f 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -242,18 +242,19 @@ impl Pd for PdMock { let header = Service::header(); let tso_logical = self.tso_logical.clone(); let fut = async move { - resp.send_all(&mut req.map_ok(move |r| { - let logical = - tso_logical.fetch_add(r.count as i64, Ordering::SeqCst) + r.count as i64; - let mut res = TsoResponse::default(); - res.set_header(header.clone()); - res.mut_timestamp().physical = 42; - res.mut_timestamp().logical = logical; - res.count = r.count; - (res, WriteFlags::default()) - })) - .await - .unwrap(); + // Tolerate errors like RpcFinished(None). + let _ = resp + .send_all(&mut req.map_ok(move |r| { + let logical = + tso_logical.fetch_add(r.count as i64, Ordering::SeqCst) + r.count as i64; + let mut res = TsoResponse::default(); + res.set_header(header.clone()); + res.mut_timestamp().physical = 42; + res.mut_timestamp().logical = logical; + res.count = r.count; + (res, WriteFlags::default()) + })) + .await; let _ = resp.close().await; }; ctx.spawn(fut); diff --git a/components/test_pd/src/util.rs b/components/test_pd/src/util.rs index 1b05196c346..b1a22b93c47 100644 --- a/components/test_pd/src/util.rs +++ b/components/test_pd/src/util.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use pd_client::{Config, RpcClient}; +use pd_client::{Config, RpcClient, RpcClientV2}; use security::{SecurityConfig, SecurityManager}; use tikv_util::config::ReadableDuration; @@ -23,6 +23,13 @@ pub fn new_client(eps: Vec<(String, u16)>, mgr: Option>) -> RpcClient::new(&cfg, None, mgr).unwrap() } +pub fn new_client_v2(eps: Vec<(String, u16)>, mgr: Option>) -> RpcClientV2 { + let cfg = new_config(eps); + let mgr = + mgr.unwrap_or_else(|| Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap())); + RpcClientV2::new(&cfg, None, mgr).unwrap() +} + pub fn new_client_with_update_interval( eps: Vec<(String, u16)>, mgr: Option>, @@ -34,3 +41,15 @@ pub fn new_client_with_update_interval( mgr.unwrap_or_else(|| Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap())); RpcClient::new(&cfg, None, mgr).unwrap() } + +pub fn new_client_v2_with_update_interval( + eps: Vec<(String, u16)>, + mgr: Option>, + interval: ReadableDuration, +) -> RpcClientV2 { + let mut cfg = new_config(eps); + cfg.update_interval = interval; + let mgr = + mgr.unwrap_or_else(|| Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap())); + RpcClientV2::new(&cfg, None, mgr).unwrap() +} diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml index ad2b20de5a0..9f67752b4c5 100644 --- a/components/test_pd_client/Cargo.toml +++ b/components/test_pd_client/Cargo.toml @@ -10,7 +10,7 @@ fail = "0.5" futures = "0.3" grpcio = { workspace = true } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } pd_client = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } @@ -18,5 +18,5 @@ slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debu slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } -tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-timer = { workspace = true } txn_types = { workspace = true } diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index fb627dccb11..71c214ae21d 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -40,7 +40,7 @@ futures = "0.3" grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } pd_client = { workspace = true } @@ -61,5 +61,5 @@ test_util = { workspace = true } tikv = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } -tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-timer = { workspace = true } txn_types = { workspace = true } diff --git a/components/test_sst_importer/Cargo.toml b/components/test_sst_importer/Cargo.toml index b0c3e96ef5a..f951a6755e6 100644 --- a/components/test_sst_importer/Cargo.toml +++ b/components/test_sst_importer/Cargo.toml @@ -13,5 +13,5 @@ crc32fast = "1.2" engine_rocks = { workspace = true } engine_traits = { workspace = true } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index 04adc4e6de4..b1172b5d559 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -24,7 +24,7 @@ test-engines-panic = [ api_version = { workspace = true } collections = { workspace = true } futures = "0.3" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } pd_client = { workspace = true } raftstore = { workspace = true } test_raftstore = { workspace = true } diff --git a/components/test_util/Cargo.toml b/components/test_util/Cargo.toml index 8aca28b092b..740132353f3 100644 --- a/components/test_util/Cargo.toml +++ b/components/test_util/Cargo.toml @@ -16,7 +16,7 @@ collections = { workspace = true } encryption_export = { workspace = true } fail = "0.5" grpcio = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } rand = "0.8" rand_isaac = "0.3" security = { workspace = true } diff --git a/components/tidb_query_aggr/Cargo.toml b/components/tidb_query_aggr/Cargo.toml index db8d9d64faf..facc9d32f36 100644 --- a/components/tidb_query_aggr/Cargo.toml +++ b/components/tidb_query_aggr/Cargo.toml @@ -12,7 +12,7 @@ tidb_query_common = { workspace = true } tidb_query_datatype = { workspace = true } tidb_query_expr = { workspace = true } tikv_util = { workspace = true } -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } [dev-dependencies] panic_hook = { workspace = true } diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index 05133b130e7..3dd1693ba0d 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -11,7 +11,7 @@ async-trait = "0.1" derive_more = "0.99.3" error_code = { workspace = true } futures = "0.3" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } @@ -20,7 +20,7 @@ serde_json = "1.0" thiserror = "1.0" tikv_util = { workspace = true } time = "0.1" -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [dev-dependencies] byteorder = "1.2" diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index af7e7e08b9d..e9d96e16284 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -18,7 +18,7 @@ collections = { workspace = true } encoding_rs = { git = "https://github.com/xiongjiwei/encoding_rs.git", rev = "68e0bc5a72a37a78228d80cd98047326559cf43c" } error_code = { workspace = true } hex = "0.4" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } match-template = "0.0.1" @@ -38,4 +38,4 @@ thiserror = "1.0" tidb_query_common = { workspace = true } tikv_alloc = { workspace = true } tikv_util = { workspace = true } -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index e448340eddf..123c306c125 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -12,7 +12,7 @@ collections = { workspace = true } fail = "0.5" futures = { version = "0.3", features = ["compat"] } itertools = "0.10" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } match-template = "0.0.1" protobuf = { version = "2.8", features = ["bytes"] } @@ -24,8 +24,8 @@ tidb_query_common = { workspace = true } tidb_query_datatype = { workspace = true } tidb_query_expr = { workspace = true } tikv_util = { workspace = true } -tipb = { git = "https://github.com/pingcap/tipb.git" } -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +tipb = { workspace = true } +yatp = { workspace = true } [dev-dependencies] anyhow = "1.0" diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 1ca4a46b6dd..95f37308e59 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -30,7 +30,7 @@ tidb_query_common = { workspace = true } tidb_query_datatype = { workspace = true } tikv_util = { workspace = true } time = "0.1" -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } twoway = "0.2.0" uuid = { version = "0.8.1", features = ["v4"] } diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 8aa64d0def6..2911c7738c6 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -36,7 +36,7 @@ fail = "0.5" file_system = { workspace = true } futures = { version = "0.3", features = ["thread-pool", "compat"] } into_other = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 36faa552804..12c3983ef2d 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -26,7 +26,7 @@ futures = { version = "0.3", features = ["compat", "thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { workspace = true } http = "0.2.0" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } @@ -56,10 +56,10 @@ tikv_alloc = { workspace = true } time = "0.1" tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-executor = "0.1" -tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-timer = { workspace = true } tracker = { workspace = true } url = "2" -yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } +yatp = { workspace = true } [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } diff --git a/components/tipb_helper/Cargo.toml b/components/tipb_helper/Cargo.toml index 31d2c290fdc..bfbadabaea3 100644 --- a/components/tipb_helper/Cargo.toml +++ b/components/tipb_helper/Cargo.toml @@ -7,4 +7,4 @@ publish = false [dependencies] codec = { workspace = true } tidb_query_datatype = { workspace = true } -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } diff --git a/components/tracker/Cargo.toml b/components/tracker/Cargo.toml index b369fab9628..84a3f5da0ab 100644 --- a/components/tracker/Cargo.toml +++ b/components/tracker/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] collections = { workspace = true } crossbeam-utils = "0.8" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1" parking_lot = "0.12" pin-project = "1" diff --git a/components/txn_types/Cargo.toml b/components/txn_types/Cargo.toml index 9ccfe0bb323..0c357ef1dd6 100644 --- a/components/txn_types/Cargo.toml +++ b/components/txn_types/Cargo.toml @@ -11,7 +11,7 @@ codec = { workspace = true } collections = { workspace = true } error_code = { workspace = true } farmhash = "1.1.5" -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } log_wrappers = { workspace = true } slog = "2.3" thiserror = "1.0" diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 5f29d44a53d..ae6c6984487 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -41,11 +41,11 @@ path = "benches/deadlock_detector/mod.rs" [features] default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -failpoints = ["fail/failpoints", "tikv/failpoints"] +failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] cloud-aws = ["external_storage_export/cloud-aws"] cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] -testexport = ["raftstore/testexport", "tikv/testexport"] +testexport = ["raftstore/testexport", "tikv/testexport", "pd_client/testexport"] profiling = ["profiler/profiling"] test-engine-kv-rocksdb = [ @@ -83,7 +83,7 @@ file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } libc = "0.2" log_wrappers = { workspace = true } more-asserts = "0.2" @@ -106,7 +106,7 @@ tidb_query_expr = { workspace = true } tikv = { workspace = true } tikv_util = { workspace = true } time = "0.1" -tipb = { git = "https://github.com/pingcap/tipb.git" } +tipb = { workspace = true } toml = "0.5" txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 24a05f2ab9f..9c90211c073 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -20,6 +20,7 @@ mod test_memory_usage_limit; mod test_merge; mod test_metrics_overflow; mod test_pd_client; +mod test_pd_client_legacy; mod test_pending_peers; mod test_rawkv; mod test_read_execution_tracker; diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index eb22ac29e45..635b199291b 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -1,4 +1,4 @@ -// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ sync::{mpsc, Arc}, @@ -6,34 +6,35 @@ use std::{ time::Duration, }; +use futures::executor::block_on; use grpcio::EnvBuilder; use kvproto::metapb::*; -use pd_client::{PdClient, RegionInfo, RegionStat, RpcClient}; +use pd_client::{PdClientV2, RegionInfo, RpcClientV2}; use security::{SecurityConfig, SecurityManager}; use test_pd::{mocker::*, util::*, Server as MockServer}; use tikv_util::config::ReadableDuration; fn new_test_server_and_client( update_interval: ReadableDuration, -) -> (MockServer, RpcClient) { +) -> (MockServer, RpcClientV2) { let server = MockServer::new(1); let eps = server.bind_addrs(); - let client = new_client_with_update_interval(eps, None, update_interval); + let client = new_client_v2_with_update_interval(eps, None, update_interval); (server, client) } macro_rules! request { ($client: ident => block_on($func: tt($($arg: expr),*))) => { (stringify!($func), { - let client = $client.clone(); + let mut client = $client.clone(); Box::new(move || { - let _ = futures::executor::block_on(client.$func($($arg),*)); + let _ = block_on(client.$func($($arg),*)); }) }) }; ($client: ident => $func: tt($($arg: expr),*)) => { (stringify!($func), { - let client = $client.clone(); + let mut client = $client.clone(); Box::new(move || { let _ = client.$func($($arg),*); }) @@ -44,13 +45,12 @@ macro_rules! request { #[test] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); - let client = Arc::new(client); let pd_client_reconnect_fp = "pd_client_reconnect"; // It contains all interfaces of PdClient. let test_funcs: Vec<(_, Box)> = vec![ request!(client => reconnect()), - request!(client => get_cluster_id()), + request!(client => fetch_cluster_id()), request!(client => bootstrap_cluster(Store::default(), Region::default())), request!(client => is_cluster_bootstrapped()), request!(client => alloc_id()), @@ -60,19 +60,15 @@ fn test_pd_client_deadlock() { request!(client => get_cluster_config()), request!(client => get_region(b"")), request!(client => get_region_info(b"")), - request!(client => block_on(get_region_async(b""))), - request!(client => block_on(get_region_info_async(b""))), request!(client => block_on(get_region_by_id(0))), - request!(client => block_on(region_heartbeat(0, Region::default(), Peer::default(), RegionStat::default(), None))), request!(client => block_on(ask_split(Region::default()))), request!(client => block_on(ask_batch_split(Region::default(), 1))), request!(client => block_on(store_heartbeat(Default::default(), None, None))), request!(client => block_on(report_batch_split(vec![]))), request!(client => scatter_region(RegionInfo::new(Region::default(), None))), request!(client => block_on(get_gc_safe_point())), - request!(client => block_on(get_store_stats_async(0))), + request!(client => block_on(get_store_and_stats(0))), request!(client => get_operator(0)), - request!(client => block_on(get_tso())), request!(client => load_global_config(vec![])), ]; @@ -87,10 +83,6 @@ fn test_pd_client_deadlock() { func(); tx.send(()).unwrap(); }); - // Only allow to reconnect once for a func. - client.handle_reconnect(move || { - fail::cfg(pd_client_reconnect_fp, "return").unwrap(); - }); // Remove the fail point to let the PD client thread go on. fail::remove(pd_client_reconnect_fp); @@ -107,7 +99,7 @@ fn test_pd_client_deadlock() { #[test] fn test_load_global_config() { - let (mut _server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let (mut _server, mut client) = new_test_server_and_client(ReadableDuration::millis(100)); let res = futures::executor::block_on(async move { client .load_global_config( @@ -125,12 +117,11 @@ fn test_load_global_config() { #[test] fn test_watch_global_config_on_closed_server() { - let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); - let client = Arc::new(client); + let (mut server, mut client) = new_test_server_and_client(ReadableDuration::millis(100)); use futures::StreamExt; let j = std::thread::spawn(move || { - futures::executor::block_on(async move { - let mut r = client.watch_global_config().unwrap(); + let mut r = client.watch_global_config().unwrap(); + block_on(async move { let mut i: usize = 0; while let Some(r) = r.next().await { match r { @@ -181,11 +172,11 @@ fn test_slow_periodical_update() { // client1 updates leader frequently (100ms). cfg.update_interval = ReadableDuration(Duration::from_millis(100)); - let _client1 = RpcClient::new(&cfg, Some(env.clone()), mgr.clone()).unwrap(); + let _client1 = RpcClientV2::new(&cfg, Some(env.clone()), mgr.clone()).unwrap(); // client2 never updates leader in the test. cfg.update_interval = ReadableDuration(Duration::from_secs(100)); - let client2 = RpcClient::new(&cfg, Some(env), mgr).unwrap(); + let mut client2 = RpcClientV2::new(&cfg, Some(env), mgr).unwrap(); fail::cfg(pd_client_reconnect_fp, "pause").unwrap(); // Wait for the PD client thread blocking on the fail point. @@ -208,23 +199,95 @@ fn test_slow_periodical_update() { handle.join().unwrap(); } -// Reconnection will be speed limited. +fn run_on_bad_connection(client: &mut RpcClientV2, mut f: F) +where + F: FnMut(&mut RpcClientV2), +{ + let pd_client_force_reconnect_fp = "pd_client_force_reconnect"; + if !client.initialized() { + client.is_cluster_bootstrapped().unwrap(); + } + client.reset_to_lame_client(); + fail::cfg(pd_client_force_reconnect_fp, "return").unwrap(); + f(client); + fail::remove(pd_client_force_reconnect_fp); +} + #[test] -fn test_reconnect_limit() { - let pd_client_reconnect_fp = "pd_client_reconnect"; - let (_server, client) = new_test_server_and_client(ReadableDuration::secs(100)); +fn test_backoff() { + let pd_client_v2_timeout_fp = "pd_client_v2_request_timeout"; + fail::cfg(pd_client_v2_timeout_fp, "return(5ms)").unwrap(); + // Backoff larger than timeout, so that the second request following the failed + // one can hit backoff. + let pd_client_v2_backoff_fp = "pd_client_v2_backoff"; + fail::cfg(pd_client_v2_backoff_fp, "return(100ms)").unwrap(); + let (_server, mut client) = new_test_server_and_client(ReadableDuration::secs(100)); - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + run_on_bad_connection(&mut client, |c| { + c.is_cluster_bootstrapped().unwrap_err(); + if c.is_cluster_bootstrapped().is_ok() { + // try again in case the first connect is too early. + run_on_bad_connection(c, |c2| { + c2.is_cluster_bootstrapped().unwrap_err(); + c2.is_cluster_bootstrapped().unwrap_err(); + std::thread::sleep(Duration::from_millis(100)); + c2.is_cluster_bootstrapped().unwrap(); + }); + return; + } + std::thread::sleep(Duration::from_millis(100)); + c.is_cluster_bootstrapped().unwrap(); + }); + + fail::remove(pd_client_v2_timeout_fp); + fail::remove(pd_client_v2_backoff_fp); +} + +#[test] +fn test_retry() { + let pd_client_v2_timeout_fp = "pd_client_v2_request_timeout"; + fail::cfg(pd_client_v2_timeout_fp, "return(10ms)").unwrap(); + // Disable backoff. + let pd_client_v2_backoff_fp = "pd_client_v2_backoff"; + fail::cfg(pd_client_v2_backoff_fp, "return(0s)").unwrap(); + let (_server, mut client) = new_test_server_and_client(ReadableDuration::secs(100)); - // The first reconnection will succeed, and the last_update will not be updated. - fail::cfg(pd_client_reconnect_fp, "return").unwrap(); - client.reconnect().unwrap(); - // The subsequent reconnection will be cancelled. - for _ in 0..10 { - let ret = client.reconnect(); - assert!(format!("{:?}", ret.unwrap_err()).contains("cancel reconnection")); + fn test_retry_success(client: &mut RpcClientV2, mut f: F) + where + F: FnMut(&mut RpcClientV2) -> pd_client::Result, + R: std::fmt::Debug, + { + run_on_bad_connection(client, |c| { + f(c).unwrap_err(); + f(c).unwrap(); + }); } - fail::remove(pd_client_reconnect_fp); + test_retry_success(&mut client, |c| { + c.bootstrap_cluster(Store::default(), Region::default()) + }); + test_retry_success(&mut client, |c| c.is_cluster_bootstrapped()); + test_retry_success(&mut client, |c| c.alloc_id()); + test_retry_success(&mut client, |c| c.put_store(Store::default())); + test_retry_success(&mut client, |c| c.get_store(0)); + test_retry_success(&mut client, |c| c.get_all_stores(false)); + test_retry_success(&mut client, |c| c.get_cluster_config()); + test_retry_success(&mut client, |c| c.get_region_info(b"")); + test_retry_success(&mut client, |c| block_on(c.get_region_by_id(0))); + test_retry_success(&mut client, |c| { + block_on(c.ask_batch_split(Region::default(), 1)) + }); + test_retry_success(&mut client, |c| { + block_on(c.store_heartbeat(Default::default(), None, None)) + }); + test_retry_success(&mut client, |c| block_on(c.report_batch_split(vec![]))); + test_retry_success(&mut client, |c| { + c.scatter_region(RegionInfo::new(Region::default(), None)) + }); + test_retry_success(&mut client, |c| block_on(c.get_gc_safe_point())); + test_retry_success(&mut client, |c| c.get_operator(0)); + test_retry_success(&mut client, |c| block_on(c.load_global_config(vec![]))); + + fail::remove(pd_client_v2_timeout_fp); + fail::remove(pd_client_v2_backoff_fp); } diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs new file mode 100644 index 00000000000..eb22ac29e45 --- /dev/null +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -0,0 +1,230 @@ +// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{mpsc, Arc}, + thread, + time::Duration, +}; + +use grpcio::EnvBuilder; +use kvproto::metapb::*; +use pd_client::{PdClient, RegionInfo, RegionStat, RpcClient}; +use security::{SecurityConfig, SecurityManager}; +use test_pd::{mocker::*, util::*, Server as MockServer}; +use tikv_util::config::ReadableDuration; + +fn new_test_server_and_client( + update_interval: ReadableDuration, +) -> (MockServer, RpcClient) { + let server = MockServer::new(1); + let eps = server.bind_addrs(); + let client = new_client_with_update_interval(eps, None, update_interval); + (server, client) +} + +macro_rules! request { + ($client: ident => block_on($func: tt($($arg: expr),*))) => { + (stringify!($func), { + let client = $client.clone(); + Box::new(move || { + let _ = futures::executor::block_on(client.$func($($arg),*)); + }) + }) + }; + ($client: ident => $func: tt($($arg: expr),*)) => { + (stringify!($func), { + let client = $client.clone(); + Box::new(move || { + let _ = client.$func($($arg),*); + }) + }) + }; +} + +#[test] +fn test_pd_client_deadlock() { + let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let client = Arc::new(client); + let pd_client_reconnect_fp = "pd_client_reconnect"; + + // It contains all interfaces of PdClient. + let test_funcs: Vec<(_, Box)> = vec![ + request!(client => reconnect()), + request!(client => get_cluster_id()), + request!(client => bootstrap_cluster(Store::default(), Region::default())), + request!(client => is_cluster_bootstrapped()), + request!(client => alloc_id()), + request!(client => put_store(Store::default())), + request!(client => get_store(0)), + request!(client => get_all_stores(false)), + request!(client => get_cluster_config()), + request!(client => get_region(b"")), + request!(client => get_region_info(b"")), + request!(client => block_on(get_region_async(b""))), + request!(client => block_on(get_region_info_async(b""))), + request!(client => block_on(get_region_by_id(0))), + request!(client => block_on(region_heartbeat(0, Region::default(), Peer::default(), RegionStat::default(), None))), + request!(client => block_on(ask_split(Region::default()))), + request!(client => block_on(ask_batch_split(Region::default(), 1))), + request!(client => block_on(store_heartbeat(Default::default(), None, None))), + request!(client => block_on(report_batch_split(vec![]))), + request!(client => scatter_region(RegionInfo::new(Region::default(), None))), + request!(client => block_on(get_gc_safe_point())), + request!(client => block_on(get_store_stats_async(0))), + request!(client => get_operator(0)), + request!(client => block_on(get_tso())), + request!(client => load_global_config(vec![])), + ]; + + for (name, func) in test_funcs { + fail::cfg(pd_client_reconnect_fp, "pause").unwrap(); + // Wait for the PD client thread blocking on the fail point. + // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. + thread::sleep(Duration::from_millis(200)); + + let (tx, rx) = mpsc::channel(); + let handle = thread::spawn(move || { + func(); + tx.send(()).unwrap(); + }); + // Only allow to reconnect once for a func. + client.handle_reconnect(move || { + fail::cfg(pd_client_reconnect_fp, "return").unwrap(); + }); + // Remove the fail point to let the PD client thread go on. + fail::remove(pd_client_reconnect_fp); + + let timeout = Duration::from_millis(500); + if rx.recv_timeout(timeout).is_err() { + panic!("PdClient::{}() hangs", name); + } + handle.join().unwrap(); + } + + drop(client); + fail::remove(pd_client_reconnect_fp); +} + +#[test] +fn test_load_global_config() { + let (mut _server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let res = futures::executor::block_on(async move { + client + .load_global_config( + ["abc", "123", "xyz"] + .iter() + .map(|x| x.to_string()) + .collect::>(), + ) + .await + }); + for (k, v) in res.unwrap() { + assert_eq!(k, format!("/global/config/{}", v)) + } +} + +#[test] +fn test_watch_global_config_on_closed_server() { + let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let client = Arc::new(client); + use futures::StreamExt; + let j = std::thread::spawn(move || { + futures::executor::block_on(async move { + let mut r = client.watch_global_config().unwrap(); + let mut i: usize = 0; + while let Some(r) = r.next().await { + match r { + Ok(res) => { + let change = &res.get_changes()[0]; + assert_eq!( + change + .get_name() + .split('/') + .collect::>() + .last() + .unwrap() + .to_owned(), + format!("{:?}", i) + ); + assert_eq!(change.get_value().to_owned(), format!("{:?}", i)); + i += 1; + } + Err(e) => { + if let grpcio::Error::RpcFailure(e) = e { + // 14-UNAVAILABLE + assert_eq!(e.code(), grpcio::RpcStatusCode::from(14)); + break; + } else { + panic!("other error occur {:?}", e) + } + } + } + } + }); + }); + thread::sleep(Duration::from_millis(200)); + server.stop(); + j.join().unwrap(); +} + +// Updating pd leader may be slow, we need to make sure it does not block other +// RPC in the same gRPC Environment. +#[test] +fn test_slow_periodical_update() { + let pd_client_reconnect_fp = "pd_client_reconnect"; + let server = MockServer::new(1); + let eps = server.bind_addrs(); + + let mut cfg = new_config(eps); + let env = Arc::new(EnvBuilder::new().cq_count(1).build()); + let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); + + // client1 updates leader frequently (100ms). + cfg.update_interval = ReadableDuration(Duration::from_millis(100)); + let _client1 = RpcClient::new(&cfg, Some(env.clone()), mgr.clone()).unwrap(); + + // client2 never updates leader in the test. + cfg.update_interval = ReadableDuration(Duration::from_secs(100)); + let client2 = RpcClient::new(&cfg, Some(env), mgr).unwrap(); + + fail::cfg(pd_client_reconnect_fp, "pause").unwrap(); + // Wait for the PD client thread blocking on the fail point. + // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. + thread::sleep(Duration::from_millis(200)); + + let (tx, rx) = mpsc::channel(); + let handle = thread::spawn(move || { + client2.alloc_id().unwrap(); + tx.send(()).unwrap(); + }); + + let timeout = Duration::from_millis(500); + if rx.recv_timeout(timeout).is_err() { + panic!("pd client2 is blocked"); + } + + // Clean up the fail point. + fail::remove(pd_client_reconnect_fp); + handle.join().unwrap(); +} + +// Reconnection will be speed limited. +#[test] +fn test_reconnect_limit() { + let pd_client_reconnect_fp = "pd_client_reconnect"; + let (_server, client) = new_test_server_and_client(ReadableDuration::secs(100)); + + // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. + thread::sleep(Duration::from_millis(200)); + + // The first reconnection will succeed, and the last_update will not be updated. + fail::cfg(pd_client_reconnect_fp, "return").unwrap(); + client.reconnect().unwrap(); + // The subsequent reconnection will be cancelled. + for _ in 0..10 { + let ret = client.reconnect(); + assert!(format!("{:?}", ret.unwrap_err()).contains("cancel reconnection")); + } + + fail::remove(pd_client_reconnect_fp); +} diff --git a/tests/integrations/pd/mod.rs b/tests/integrations/pd/mod.rs index 2cadf7db2b5..eb9b6cc092a 100644 --- a/tests/integrations/pd/mod.rs +++ b/tests/integrations/pd/mod.rs @@ -1,3 +1,4 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. mod test_rpc_client; +mod test_rpc_client_legacy; diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index 5f44cc0137b..23841ba5dfd 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -1,26 +1,40 @@ -// Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - sync::{ - atomic::{AtomicUsize, Ordering}, - mpsc, Arc, - }, - thread, - time::Duration, -}; +use std::{sync::Arc, thread, time::Duration}; use error_code::ErrorCodeExt; -use futures::executor::block_on; +use futures::{executor::block_on, StreamExt}; use grpcio::{EnvBuilder, Error as GrpcError, RpcStatus, RpcStatusCode}; use kvproto::{metapb, pdpb}; -use pd_client::{Error as PdError, Feature, PdClient, PdConnector, RegionStat, RpcClient}; -use raftstore::store; +use pd_client::{Error as PdError, Feature, PdClientV2, PdConnector, RpcClientV2}; use security::{SecurityConfig, SecurityManager}; use test_pd::{mocker::*, util::*, Server as MockServer}; -use tikv_util::config::ReadableDuration; -use tokio::runtime::Builder; +use tikv_util::{config::ReadableDuration, mpsc::future::WakePolicy, thd_name}; +use tokio::runtime::{Builder, Runtime}; use txn_types::TimeStamp; +fn setup_runtime() -> Runtime { + Builder::new_multi_thread() + .thread_name(thd_name!("poller")) + .worker_threads(1) + .enable_all() + .build() + .unwrap() +} + +fn must_get_tso(client: &mut RpcClientV2, count: u32) -> TimeStamp { + let (tx, mut responses) = client.create_tso_stream(WakePolicy::Immediately).unwrap(); + let mut req = pdpb::TsoRequest::default(); + req.mut_header().cluster_id = client.fetch_cluster_id().unwrap(); + req.count = count; + tx.send(req).unwrap(); + let resp = block_on(responses.next()).unwrap().unwrap(); + let ts = resp.timestamp.unwrap(); + let physical = ts.physical as u64; + let logical = ts.logical as u64; + TimeStamp::compose(physical, logical) +} + #[test] fn test_retry_rpc_client() { let eps_count = 1; @@ -32,7 +46,7 @@ fn test_retry_rpc_client() { server.stop(); let child = thread::spawn(move || { let cfg = new_config(m_eps); - RpcClient::new(&cfg, None, m_mgr).unwrap(); + RpcClientV2::new(&cfg, None, m_mgr).unwrap(); }); thread::sleep(Duration::from_millis(500)); server.start(&mgr, eps); @@ -41,12 +55,14 @@ fn test_retry_rpc_client() { #[test] fn test_rpc_client() { + let rt = setup_runtime(); + let _g = rt.enter(); let eps_count = 1; let server = MockServer::new(eps_count); let eps = server.bind_addrs(); - let client = new_client(eps.clone(), None); - assert_ne!(client.get_cluster_id().unwrap(), 0); + let mut client = new_client_v2(eps.clone(), None); + assert_ne!(client.fetch_cluster_id().unwrap(), 0); let store_id = client.alloc_id().unwrap(); let mut store = metapb::Store::default(); @@ -89,38 +105,32 @@ fn test_rpc_client() { .unwrap(); assert_eq!(tmp_region.get_id(), region.get_id()); - let ts = block_on(client.get_tso()).unwrap(); + let ts = must_get_tso(&mut client, 1); assert_ne!(ts, TimeStamp::zero()); - let ts100 = block_on(client.batch_get_tso(100)).unwrap(); + let ts100 = must_get_tso(&mut client, 100); assert_eq!(ts.logical() + 100, ts100.logical()); let mut prev_id = 0; for _ in 0..100 { - let client = new_client(eps.clone(), None); + let mut client = new_client_v2(eps.clone(), None); let alloc_id = client.alloc_id().unwrap(); assert!(alloc_id > prev_id); prev_id = alloc_id; } - let poller = Builder::new_multi_thread() - .thread_name(thd_name!("poller")) - .worker_threads(1) - .build() + let (tx, mut responses) = client + .create_region_heartbeat_stream(WakePolicy::Immediately) .unwrap(); - let (tx, rx) = mpsc::channel(); - let f = client.handle_region_heartbeat_response(1, move |resp| { - let _ = tx.send(resp); - }); - poller.spawn(f); - poller.spawn(client.region_heartbeat( - store::RAFT_INIT_LOG_TERM, - region.clone(), - peer.clone(), - RegionStat::default(), - None, - )); - rx.recv_timeout(Duration::from_secs(3)).unwrap(); + let mut req = pdpb::RegionHeartbeatRequest::default(); + req.set_region(region.clone()); + req.set_leader(peer.clone()); + tx.send(req).unwrap(); + block_on(tokio::time::timeout( + Duration::from_secs(3), + responses.next(), + )) + .unwrap(); let region_info = client.get_region_info(region_key).unwrap(); assert_eq!(region_info.region, region); @@ -150,26 +160,14 @@ fn test_connect_follower() { // test switch cfg.enable_forwarding = false; let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); - let client1 = RpcClient::new(&cfg, None, mgr).unwrap(); + let mut client1 = RpcClientV2::new(&cfg, None, mgr).unwrap(); fail::cfg(connect_leader_fp, "return").unwrap(); - // RECONNECT_INTERVAL_SEC is 1s. - thread::sleep(Duration::from_secs(1)); - let res = format!("{}", client1.alloc_id().unwrap_err()); - let err = format!( - "{}", - PdError::Grpc(GrpcError::RpcFailure(RpcStatus::with_message( - RpcStatusCode::UNAVAILABLE, - "".to_string(), - ))) - ); - assert_eq!(res, err); + client1.alloc_id().unwrap_err(); cfg.enable_forwarding = true; let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); - let client = RpcClient::new(&cfg, None, mgr).unwrap(); - // RECONNECT_INTERVAL_SEC is 1s. - thread::sleep(Duration::from_secs(1)); - let leader_addr = client1.get_leader().get_client_urls()[0].clone(); + let mut client = RpcClientV2::new(&cfg, None, mgr).unwrap(); + let leader_addr = client.get_leader().get_client_urls()[0].clone(); let res = format!("{}", client.alloc_id().unwrap_err()); let err = format!( "{}", @@ -188,7 +186,7 @@ fn test_get_tombstone_stores() { let eps_count = 1; let server = MockServer::new(eps_count); let eps = server.bind_addrs(); - let client = new_client(eps, None); + let mut client = new_client_v2(eps, None); let mut all_stores = vec![]; let store_id = client.alloc_id().unwrap(); @@ -242,7 +240,7 @@ fn test_get_tombstone_store() { let eps_count = 1; let server = MockServer::new(eps_count); let eps = server.bind_addrs(); - let client = new_client(eps, None); + let mut client = new_client_v2(eps, None); let mut all_stores = vec![]; let store_id = client.alloc_id().unwrap(); @@ -264,7 +262,7 @@ fn test_get_tombstone_store() { store99.set_state(metapb::StoreState::Tombstone); server.default_handler().add_store(store99.clone()); - let r = block_on(client.get_store_async(99)); + let r = client.get_store(99); assert_eq!(r.unwrap_err().error_code(), error_code::pd::STORE_TOMBSTONE); } @@ -273,7 +271,7 @@ fn test_reboot() { let eps_count = 1; let server = MockServer::with_case(eps_count, Arc::new(AlreadyBootstrapped)); let eps = server.bind_addrs(); - let client = new_client(eps, None); + let mut client = new_client_v2(eps, None); assert!(!client.is_cluster_bootstrapped().unwrap()); @@ -299,7 +297,7 @@ fn test_validate_endpoints() { let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); let connector = PdConnector::new(env, mgr); - assert!(block_on(connector.validate_endpoints(&new_config(eps))).is_err()); + assert!(block_on(connector.validate_endpoints(&new_config(eps), true)).is_err()); } #[test] @@ -318,66 +316,7 @@ fn test_validate_endpoints_retry() { eps.pop(); let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); let connector = PdConnector::new(env, mgr); - assert!(block_on(connector.validate_endpoints(&new_config(eps))).is_err()); -} - -fn test_retry(func: F) { - let eps_count = 1; - // Retry mocker returns `Err(_)` for most request, here two thirds are `Err(_)`. - let retry = Arc::new(Retry::new(3)); - let server = MockServer::with_case(eps_count, retry); - let eps = server.bind_addrs(); - - let client = new_client(eps, None); - - for _ in 0..3 { - func(&client); - } -} - -#[test] -fn test_retry_async() { - let r#async = |client: &RpcClient| { - block_on(client.get_region_by_id(1)).unwrap(); - }; - test_retry(r#async); -} - -#[test] -fn test_retry_sync() { - let sync = |client: &RpcClient| { - client.get_store(1).unwrap(); - }; - test_retry(sync) -} - -fn test_not_retry(func: F) { - let eps_count = 1; - // NotRetry mocker returns Ok() with error header first, and next returns Ok() - // without any error header. - let not_retry = Arc::new(NotRetry::new()); - let server = MockServer::with_case(eps_count, not_retry); - let eps = server.bind_addrs(); - - let client = new_client(eps, None); - - func(&client); -} - -#[test] -fn test_not_retry_async() { - let r#async = |client: &RpcClient| { - block_on(client.get_region_by_id(1)).unwrap_err(); - }; - test_not_retry(r#async); -} - -#[test] -fn test_not_retry_sync() { - let sync = |client: &RpcClient| { - client.get_store(1).unwrap_err(); - }; - test_not_retry(sync); + assert!(block_on(connector.validate_endpoints(&new_config(eps), true)).is_err()); } #[test] @@ -386,7 +325,7 @@ fn test_incompatible_version() { let server = MockServer::with_case(1, incompatible); let eps = server.bind_addrs(); - let client = new_client(eps, None); + let mut client = new_client_v2(eps, None); let resp = block_on(client.ask_batch_split(metapb::Region::default(), 2)); assert_eq!( @@ -402,7 +341,7 @@ fn restart_leader(mgr: SecurityManager) { MockServer::::with_configuration(&mgr, vec![("127.0.0.1".to_owned(), 0); 3], None); let eps = server.bind_addrs(); - let client = new_client(eps.clone(), Some(Arc::clone(&mgr))); + let mut client = new_client_v2(eps.clone(), Some(Arc::clone(&mgr))); // Put a region. let store_id = client.alloc_id().unwrap(); let mut store = metapb::Store::default(); @@ -453,12 +392,8 @@ fn test_change_leader_async() { let server = MockServer::with_case(eps_count, Arc::new(LeaderChange::new())); let eps = server.bind_addrs(); - let counter = Arc::new(AtomicUsize::new(0)); - let client = new_client(eps, None); - let counter1 = Arc::clone(&counter); - client.handle_reconnect(move || { - counter1.fetch_add(1, Ordering::SeqCst); - }); + let mut client = new_client_v2(eps, None); + let mut reconnect_recv = client.subscribe_reconnect(); let leader = client.get_leader(); for _ in 0..5 { @@ -467,7 +402,10 @@ fn test_change_leader_async() { let new = client.get_leader(); if new != leader { - assert!(counter.load(Ordering::SeqCst) >= 1); + assert!(matches!( + reconnect_recv.try_recv(), + Ok(_) | Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) + )); return; } thread::sleep(LeaderChange::get_leader_interval()); @@ -482,7 +420,7 @@ fn test_pd_client_ok_when_cluster_not_ready() { let server = MockServer::with_case(3, Arc::new(AlreadyBootstrapped)); let eps = server.bind_addrs(); - let client = new_client(eps, None); + let mut client = new_client_v2(eps, None); fail::cfg(pd_client_cluster_id_zero, "return()").unwrap(); // wait 100ms to let client load member. thread::sleep(Duration::from_millis(101)); @@ -492,36 +430,33 @@ fn test_pd_client_ok_when_cluster_not_ready() { #[test] fn test_pd_client_heartbeat_send_failed() { + let rt = setup_runtime(); + let _g = rt.enter(); let pd_client_send_fail_fp = "region_heartbeat_send_failed"; fail::cfg(pd_client_send_fail_fp, "return()").unwrap(); let server = MockServer::with_case(1, Arc::new(AlreadyBootstrapped)); let eps = server.bind_addrs(); - let client = new_client(eps, None); - let poller = Builder::new_multi_thread() - .thread_name(thd_name!("poller")) - .worker_threads(1) - .build() + let mut client = new_client_v2(eps, None); + + let (tx, mut responses) = client + .create_region_heartbeat_stream(WakePolicy::Immediately) .unwrap(); - let (tx, rx) = mpsc::channel(); - let f = - client.handle_region_heartbeat_response(1, move |resp| tx.send(resp).unwrap_or_default()); - poller.spawn(f); - let heartbeat_send_fail = |ok| { + let mut heartbeat_send_fail = |ok| { let mut region = metapb::Region::default(); region.set_id(1); - poller.spawn(client.region_heartbeat( - store::RAFT_INIT_LOG_TERM, - region, - metapb::Peer::default(), - RegionStat::default(), - None, + let mut req = pdpb::RegionHeartbeatRequest::default(); + req.set_region(region); + tx.send(req).unwrap(); + + let rsp = block_on(tokio::time::timeout( + Duration::from_millis(100), + responses.next(), )); - let rsp = rx.recv_timeout(Duration::from_millis(100)); if ok { assert!(rsp.is_ok()); - assert_eq!(rsp.unwrap().get_region_id(), 1); + assert_eq!(rsp.unwrap().unwrap().unwrap().get_region_id(), 1); } else { rsp.unwrap_err(); } @@ -545,35 +480,28 @@ fn test_pd_client_heartbeat_send_failed() { #[test] fn test_region_heartbeat_on_leader_change() { + let rt = setup_runtime(); + let _g = rt.enter(); let eps_count = 3; let server = MockServer::with_case(eps_count, Arc::new(LeaderChange::new())); let eps = server.bind_addrs(); - let client = new_client(eps, None); - let poller = Builder::new_multi_thread() - .thread_name(thd_name!("poller")) - .worker_threads(1) - .build() - .unwrap(); - let (tx, rx) = mpsc::channel(); - let f = client.handle_region_heartbeat_response(1, move |resp| { - tx.send(resp).unwrap(); - }); - poller.spawn(f); - let region = metapb::Region::default(); - let peer = metapb::Peer::default(); - let stat = RegionStat::default(); - poller.spawn(client.region_heartbeat( - store::RAFT_INIT_LOG_TERM, - region.clone(), - peer.clone(), - stat.clone(), - None, - )); - rx.recv_timeout(LeaderChange::get_leader_interval()) + let mut client = new_client_v2(eps, None); + + let (tx, mut responses) = client + .create_region_heartbeat_stream(WakePolicy::Immediately) .unwrap(); - let heartbeat_on_leader_change = |count| { + tx.send(pdpb::RegionHeartbeatRequest::default()).unwrap(); + block_on(tokio::time::timeout( + LeaderChange::get_leader_interval(), + responses.next(), + )) + .unwrap() + .unwrap() + .unwrap(); + + let mut heartbeat_on_leader_change = |count| { let mut leader = client.get_leader(); for _ in 0..count { loop { @@ -587,15 +515,14 @@ fn test_region_heartbeat_on_leader_change() { thread::sleep(LeaderChange::get_leader_interval()); } } - poller.spawn(client.region_heartbeat( - store::RAFT_INIT_LOG_TERM, - region.clone(), - peer.clone(), - stat.clone(), - None, - )); - rx.recv_timeout(LeaderChange::get_leader_interval()) - .unwrap(); + tx.send(pdpb::RegionHeartbeatRequest::default()).unwrap(); + block_on(tokio::time::timeout( + LeaderChange::get_leader_interval(), + responses.next(), + )) + .unwrap() + .unwrap() + .unwrap(); }; // Change PD leader once then heartbeat PD. @@ -612,18 +539,17 @@ fn test_periodical_update() { let server = MockServer::with_case(eps_count, Arc::new(LeaderChange::new())); let eps = server.bind_addrs(); - let counter = Arc::new(AtomicUsize::new(0)); - let client = new_client_with_update_interval(eps, None, ReadableDuration::secs(3)); - let counter1 = Arc::clone(&counter); - client.handle_reconnect(move || { - counter1.fetch_add(1, Ordering::SeqCst); - }); + let mut client = new_client_v2_with_update_interval(eps, None, ReadableDuration::secs(3)); + let mut reconnect_recv = client.subscribe_reconnect(); let leader = client.get_leader(); for _ in 0..5 { let new = client.get_leader(); if new != leader { - assert!(counter.load(Ordering::SeqCst) >= 1); + assert!(matches!( + reconnect_recv.try_recv(), + Ok(_) | Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) + )); return; } thread::sleep(LeaderChange::get_leader_interval()); @@ -641,13 +567,14 @@ fn test_cluster_version() { let feature_b = Feature::require(5, 0, 0); let feature_c = Feature::require(5, 0, 1); - let client = new_client(eps, None); - let feature_gate = client.feature_gate(); + let mut client = new_client_v2(eps, None); + let feature_gate = client.feature_gate().clone(); assert!(!feature_gate.can_enable(feature_a)); - let emit_heartbeat = || { + let mut client_clone = client.clone(); + let mut emit_heartbeat = || { let req = pdpb::StoreStats::default(); - block_on(client.store_heartbeat(req, /* store_report= */ None, None)).unwrap(); + block_on(client_clone.store_heartbeat(req, /* store_report= */ None, None)).unwrap(); }; let set_cluster_version = |version: &str| { diff --git a/tests/integrations/pd/test_rpc_client_legacy.rs b/tests/integrations/pd/test_rpc_client_legacy.rs new file mode 100644 index 00000000000..d2ff6d6ac11 --- /dev/null +++ b/tests/integrations/pd/test_rpc_client_legacy.rs @@ -0,0 +1,691 @@ +// Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + mpsc, Arc, + }, + thread, + time::Duration, +}; + +use error_code::ErrorCodeExt; +use futures::executor::block_on; +use grpcio::{EnvBuilder, Error as GrpcError, RpcStatus, RpcStatusCode}; +use kvproto::{metapb, pdpb}; +use pd_client::{Error as PdError, Feature, PdClient, PdConnector, RegionStat, RpcClient}; +use raftstore::store; +use security::{SecurityConfig, SecurityManager}; +use test_pd::{mocker::*, util::*, Server as MockServer}; +use tikv_util::config::ReadableDuration; +use tokio::runtime::Builder; +use txn_types::TimeStamp; + +#[test] +fn test_retry_rpc_client() { + let eps_count = 1; + let mut server = MockServer::new(eps_count); + let eps = server.bind_addrs(); + let m_eps = eps.clone(); + let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); + let m_mgr = mgr.clone(); + server.stop(); + let child = thread::spawn(move || { + let cfg = new_config(m_eps); + RpcClient::new(&cfg, None, m_mgr).unwrap(); + }); + thread::sleep(Duration::from_millis(500)); + server.start(&mgr, eps); + child.join().unwrap(); +} + +#[test] +fn test_rpc_client() { + let eps_count = 1; + let server = MockServer::new(eps_count); + let eps = server.bind_addrs(); + + let client = new_client(eps.clone(), None); + assert_ne!(client.get_cluster_id().unwrap(), 0); + + let store_id = client.alloc_id().unwrap(); + let mut store = metapb::Store::default(); + store.set_id(store_id); + debug!("bootstrap store {:?}", store); + + let peer_id = client.alloc_id().unwrap(); + let mut peer = metapb::Peer::default(); + peer.set_id(peer_id); + peer.set_store_id(store_id); + + let region_id = client.alloc_id().unwrap(); + let mut region = metapb::Region::default(); + region.set_id(region_id); + region.mut_peers().push(peer.clone()); + debug!("bootstrap region {:?}", region); + + client + .bootstrap_cluster(store.clone(), region.clone()) + .unwrap(); + assert_eq!(client.is_cluster_bootstrapped().unwrap(), true); + + let tmp_stores = client.get_all_stores(false).unwrap(); + assert_eq!(tmp_stores.len(), 1); + assert_eq!(tmp_stores[0], store); + + let tmp_store = client.get_store(store_id).unwrap(); + assert_eq!(tmp_store.get_id(), store.get_id()); + + let region_key = region.get_start_key(); + let tmp_region = client.get_region(region_key).unwrap(); + assert_eq!(tmp_region.get_id(), region.get_id()); + + let region_info = client.get_region_info(region_key).unwrap(); + assert_eq!(region_info.region, region); + assert_eq!(region_info.leader, None); + + let tmp_region = block_on(client.get_region_by_id(region_id)) + .unwrap() + .unwrap(); + assert_eq!(tmp_region.get_id(), region.get_id()); + + let ts = block_on(client.get_tso()).unwrap(); + assert_ne!(ts, TimeStamp::zero()); + + let ts100 = block_on(client.batch_get_tso(100)).unwrap(); + assert_eq!(ts.logical() + 100, ts100.logical()); + + let mut prev_id = 0; + for _ in 0..100 { + let client = new_client(eps.clone(), None); + let alloc_id = client.alloc_id().unwrap(); + assert!(alloc_id > prev_id); + prev_id = alloc_id; + } + + let poller = Builder::new_multi_thread() + .thread_name(thd_name!("poller")) + .worker_threads(1) + .build() + .unwrap(); + let (tx, rx) = mpsc::channel(); + let f = client.handle_region_heartbeat_response(1, move |resp| { + let _ = tx.send(resp); + }); + poller.spawn(f); + poller.spawn(client.region_heartbeat( + store::RAFT_INIT_LOG_TERM, + region.clone(), + peer.clone(), + RegionStat::default(), + None, + )); + rx.recv_timeout(Duration::from_secs(3)).unwrap(); + + let region_info = client.get_region_info(region_key).unwrap(); + assert_eq!(region_info.region, region); + assert_eq!(region_info.leader.unwrap(), peer); + + block_on(client.store_heartbeat( + pdpb::StoreStats::default(), + None, // store_report + None, + )) + .unwrap(); + block_on(client.ask_batch_split(metapb::Region::default(), 1)).unwrap(); + block_on(client.report_batch_split(vec![metapb::Region::default(), metapb::Region::default()])) + .unwrap(); + + let region_info = client.get_region_info(region_key).unwrap(); + client.scatter_region(region_info).unwrap(); +} + +#[test] +fn test_connect_follower() { + let connect_leader_fp = "connect_leader"; + let server = MockServer::new(2); + let eps = server.bind_addrs(); + let mut cfg = new_config(eps); + + // test switch + cfg.enable_forwarding = false; + let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); + let client1 = RpcClient::new(&cfg, None, mgr).unwrap(); + fail::cfg(connect_leader_fp, "return").unwrap(); + // RECONNECT_INTERVAL_SEC is 1s. + thread::sleep(Duration::from_secs(1)); + let res = format!("{}", client1.alloc_id().unwrap_err()); + let err = format!( + "{}", + PdError::Grpc(GrpcError::RpcFailure(RpcStatus::with_message( + RpcStatusCode::UNAVAILABLE, + "".to_string(), + ))) + ); + assert_eq!(res, err); + + cfg.enable_forwarding = true; + let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); + let client = RpcClient::new(&cfg, None, mgr).unwrap(); + // RECONNECT_INTERVAL_SEC is 1s. + thread::sleep(Duration::from_secs(1)); + let leader_addr = client1.get_leader().get_client_urls()[0].clone(); + let res = format!("{}", client.alloc_id().unwrap_err()); + let err = format!( + "{}", + PdError::Grpc(GrpcError::RpcFailure(RpcStatus::with_message( + RpcStatusCode::UNAVAILABLE, + leader_addr, + ))) + ); + assert_eq!(res, err); + + fail::remove(connect_leader_fp); +} + +#[test] +fn test_get_tombstone_stores() { + let eps_count = 1; + let server = MockServer::new(eps_count); + let eps = server.bind_addrs(); + let client = new_client(eps, None); + + let mut all_stores = vec![]; + let store_id = client.alloc_id().unwrap(); + let mut store = metapb::Store::default(); + store.set_id(store_id); + let region_id = client.alloc_id().unwrap(); + let mut region = metapb::Region::default(); + region.set_id(region_id); + client.bootstrap_cluster(store.clone(), region).unwrap(); + + all_stores.push(store); + assert_eq!(client.is_cluster_bootstrapped().unwrap(), true); + let s = client.get_all_stores(false).unwrap(); + assert_eq!(s, all_stores); + + // Add tombstone store. + let mut store99 = metapb::Store::default(); + store99.set_id(99); + store99.set_state(metapb::StoreState::Tombstone); + server.default_handler().add_store(store99.clone()); + + // do not include tombstone. + let s = client.get_all_stores(true).unwrap(); + assert_eq!(s, all_stores); + + all_stores.push(store99.clone()); + all_stores.sort_by_key(|a| a.get_id()); + // include tombstone, there should be 2 stores. + let mut s = client.get_all_stores(false).unwrap(); + s.sort_by_key(|a| a.get_id()); + assert_eq!(s, all_stores); + + // Add another tombstone store. + let mut store199 = store99; + store199.set_id(199); + server.default_handler().add_store(store199.clone()); + + all_stores.push(store199); + all_stores.sort_by_key(|a| a.get_id()); + let mut s = client.get_all_stores(false).unwrap(); + s.sort_by_key(|a| a.get_id()); + assert_eq!(s, all_stores); + + client.get_store(store_id).unwrap(); + client.get_store(99).unwrap_err(); + client.get_store(199).unwrap_err(); +} + +#[test] +fn test_get_tombstone_store() { + let eps_count = 1; + let server = MockServer::new(eps_count); + let eps = server.bind_addrs(); + let client = new_client(eps, None); + + let mut all_stores = vec![]; + let store_id = client.alloc_id().unwrap(); + let mut store = metapb::Store::default(); + store.set_id(store_id); + let region_id = client.alloc_id().unwrap(); + let mut region = metapb::Region::default(); + region.set_id(region_id); + client.bootstrap_cluster(store.clone(), region).unwrap(); + + all_stores.push(store); + assert_eq!(client.is_cluster_bootstrapped().unwrap(), true); + let s = client.get_all_stores(false).unwrap(); + assert_eq!(s, all_stores); + + // Add tombstone store. + let mut store99 = metapb::Store::default(); + store99.set_id(99); + store99.set_state(metapb::StoreState::Tombstone); + server.default_handler().add_store(store99.clone()); + + let r = block_on(client.get_store_async(99)); + assert_eq!(r.unwrap_err().error_code(), error_code::pd::STORE_TOMBSTONE); +} + +#[test] +fn test_reboot() { + let eps_count = 1; + let server = MockServer::with_case(eps_count, Arc::new(AlreadyBootstrapped)); + let eps = server.bind_addrs(); + let client = new_client(eps, None); + + assert!(!client.is_cluster_bootstrapped().unwrap()); + + match client.bootstrap_cluster(metapb::Store::default(), metapb::Region::default()) { + Err(PdError::ClusterBootstrapped(_)) => (), + _ => { + panic!("failed, should return ClusterBootstrapped"); + } + } +} + +#[test] +fn test_validate_endpoints() { + let eps_count = 3; + let server = MockServer::with_case(eps_count, Arc::new(Split::new())); + let env = Arc::new( + EnvBuilder::new() + .cq_count(1) + .name_prefix(thd_name!("test-pd")) + .build(), + ); + let eps = server.bind_addrs(); + + let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); + let connector = PdConnector::new(env, mgr); + assert!(block_on(connector.validate_endpoints(&new_config(eps), false)).is_err()); +} + +#[test] +fn test_validate_endpoints_retry() { + let eps_count = 3; + let server = MockServer::with_case(eps_count, Arc::new(Split::new())); + let env = Arc::new( + EnvBuilder::new() + .cq_count(1) + .name_prefix(thd_name!("test-pd")) + .build(), + ); + let mut eps = server.bind_addrs(); + let mock_port = 65535; + eps.insert(0, ("127.0.0.1".to_string(), mock_port)); + eps.pop(); + let mgr = Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()); + let connector = PdConnector::new(env, mgr); + assert!(block_on(connector.validate_endpoints(&new_config(eps), false)).is_err()); +} + +fn test_retry(func: F) { + let eps_count = 1; + // Retry mocker returns `Err(_)` for most request, here two thirds are `Err(_)`. + let retry = Arc::new(Retry::new(3)); + let server = MockServer::with_case(eps_count, retry); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + + for _ in 0..3 { + func(&client); + } +} + +#[test] +fn test_retry_async() { + let r#async = |client: &RpcClient| { + block_on(client.get_region_by_id(1)).unwrap(); + }; + test_retry(r#async); +} + +#[test] +fn test_retry_sync() { + let sync = |client: &RpcClient| { + client.get_store(1).unwrap(); + }; + test_retry(sync) +} + +fn test_not_retry(func: F) { + let eps_count = 1; + // NotRetry mocker returns Ok() with error header first, and next returns Ok() + // without any error header. + let not_retry = Arc::new(NotRetry::new()); + let server = MockServer::with_case(eps_count, not_retry); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + + func(&client); +} + +#[test] +fn test_not_retry_async() { + let r#async = |client: &RpcClient| { + block_on(client.get_region_by_id(1)).unwrap_err(); + }; + test_not_retry(r#async); +} + +#[test] +fn test_not_retry_sync() { + let sync = |client: &RpcClient| { + client.get_store(1).unwrap_err(); + }; + test_not_retry(sync); +} + +#[test] +fn test_incompatible_version() { + let incompatible = Arc::new(Incompatible); + let server = MockServer::with_case(1, incompatible); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + + let resp = block_on(client.ask_batch_split(metapb::Region::default(), 2)); + assert_eq!( + resp.unwrap_err().to_string(), + PdError::Incompatible.to_string() + ); +} + +fn restart_leader(mgr: SecurityManager) { + let mgr = Arc::new(mgr); + // Service has only one GetMembersResponse, so the leader never changes. + let mut server = + MockServer::::with_configuration(&mgr, vec![("127.0.0.1".to_owned(), 0); 3], None); + let eps = server.bind_addrs(); + + let client = new_client(eps.clone(), Some(Arc::clone(&mgr))); + // Put a region. + let store_id = client.alloc_id().unwrap(); + let mut store = metapb::Store::default(); + store.set_id(store_id); + + let peer_id = client.alloc_id().unwrap(); + let mut peer = metapb::Peer::default(); + peer.set_id(peer_id); + peer.set_store_id(store_id); + + let region_id = client.alloc_id().unwrap(); + let mut region = metapb::Region::default(); + region.set_id(region_id); + region.mut_peers().push(peer); + client.bootstrap_cluster(store, region.clone()).unwrap(); + + let region = block_on(client.get_region_by_id(region.get_id())) + .unwrap() + .unwrap(); + + // Stop servers and restart them again. + server.stop(); + server.start(&mgr, eps); + + // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. + thread::sleep(Duration::from_millis(200)); + + let region = block_on(client.get_region_by_id(region.get_id())).unwrap(); + assert_eq!(region.unwrap().get_id(), region_id); +} + +#[test] +fn test_restart_leader_insecure() { + let mgr = SecurityManager::new(&SecurityConfig::default()).unwrap(); + restart_leader(mgr) +} + +#[test] +fn test_restart_leader_secure() { + let security_cfg = test_util::new_security_cfg(None); + let mgr = SecurityManager::new(&security_cfg).unwrap(); + restart_leader(mgr) +} + +#[test] +fn test_change_leader_async() { + let eps_count = 3; + let server = MockServer::with_case(eps_count, Arc::new(LeaderChange::new())); + let eps = server.bind_addrs(); + + let counter = Arc::new(AtomicUsize::new(0)); + let client = new_client(eps, None); + let counter1 = Arc::clone(&counter); + client.handle_reconnect(move || { + counter1.fetch_add(1, Ordering::SeqCst); + }); + let leader = client.get_leader(); + + for _ in 0..5 { + let region = block_on(client.get_region_by_id(1)); + region.ok(); + + let new = client.get_leader(); + if new != leader { + assert!(counter.load(Ordering::SeqCst) >= 1); + return; + } + thread::sleep(LeaderChange::get_leader_interval()); + } + + panic!("failed, leader should changed"); +} + +#[test] +fn test_pd_client_ok_when_cluster_not_ready() { + let pd_client_cluster_id_zero = "cluster_id_is_not_ready"; + let server = MockServer::with_case(3, Arc::new(AlreadyBootstrapped)); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + fail::cfg(pd_client_cluster_id_zero, "return()").unwrap(); + // wait 100ms to let client load member. + thread::sleep(Duration::from_millis(101)); + assert_eq!(client.reconnect().is_err(), true); + fail::remove(pd_client_cluster_id_zero); +} + +#[test] +fn test_pd_client_heartbeat_send_failed() { + let pd_client_send_fail_fp = "region_heartbeat_send_failed"; + fail::cfg(pd_client_send_fail_fp, "return()").unwrap(); + let server = MockServer::with_case(1, Arc::new(AlreadyBootstrapped)); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + let poller = Builder::new_multi_thread() + .thread_name(thd_name!("poller")) + .worker_threads(1) + .build() + .unwrap(); + let (tx, rx) = mpsc::channel(); + let f = + client.handle_region_heartbeat_response(1, move |resp| tx.send(resp).unwrap_or_default()); + poller.spawn(f); + + let heartbeat_send_fail = |ok| { + let mut region = metapb::Region::default(); + region.set_id(1); + poller.spawn(client.region_heartbeat( + store::RAFT_INIT_LOG_TERM, + region, + metapb::Peer::default(), + RegionStat::default(), + None, + )); + let rsp = rx.recv_timeout(Duration::from_millis(100)); + if ok { + assert!(rsp.is_ok()); + assert_eq!(rsp.unwrap().get_region_id(), 1); + } else { + rsp.unwrap_err(); + } + + let region = block_on(client.get_region_by_id(1)); + if ok { + assert!(region.is_ok()); + let r = region.unwrap(); + assert!(r.is_some()); + assert_eq!(1, r.unwrap().get_id()); + } else { + region.unwrap_err(); + } + }; + // send fail if network is block. + heartbeat_send_fail(false); + fail::remove(pd_client_send_fail_fp); + // send success after network recovered. + heartbeat_send_fail(true); +} + +#[test] +fn test_region_heartbeat_on_leader_change() { + let eps_count = 3; + let server = MockServer::with_case(eps_count, Arc::new(LeaderChange::new())); + let eps = server.bind_addrs(); + + let client = new_client(eps, None); + let poller = Builder::new_multi_thread() + .thread_name(thd_name!("poller")) + .worker_threads(1) + .build() + .unwrap(); + let (tx, rx) = mpsc::channel(); + let f = client.handle_region_heartbeat_response(1, move |resp| { + tx.send(resp).unwrap(); + }); + poller.spawn(f); + let region = metapb::Region::default(); + let peer = metapb::Peer::default(); + let stat = RegionStat::default(); + poller.spawn(client.region_heartbeat( + store::RAFT_INIT_LOG_TERM, + region.clone(), + peer.clone(), + stat.clone(), + None, + )); + rx.recv_timeout(LeaderChange::get_leader_interval()) + .unwrap(); + + let heartbeat_on_leader_change = |count| { + let mut leader = client.get_leader(); + for _ in 0..count { + loop { + let _ = block_on(client.get_region_by_id(1)); + let new = client.get_leader(); + if leader != new { + leader = new; + info!("leader changed!"); + break; + } + thread::sleep(LeaderChange::get_leader_interval()); + } + } + poller.spawn(client.region_heartbeat( + store::RAFT_INIT_LOG_TERM, + region.clone(), + peer.clone(), + stat.clone(), + None, + )); + rx.recv_timeout(LeaderChange::get_leader_interval()) + .unwrap(); + }; + + // Change PD leader once then heartbeat PD. + heartbeat_on_leader_change(1); + + // Change PD leader twice without update the heartbeat sender, then heartbeat + // PD. + heartbeat_on_leader_change(2); +} + +#[test] +fn test_periodical_update() { + let eps_count = 3; + let server = MockServer::with_case(eps_count, Arc::new(LeaderChange::new())); + let eps = server.bind_addrs(); + + let counter = Arc::new(AtomicUsize::new(0)); + let client = new_client_with_update_interval(eps, None, ReadableDuration::secs(3)); + let counter1 = Arc::clone(&counter); + client.handle_reconnect(move || { + counter1.fetch_add(1, Ordering::SeqCst); + }); + let leader = client.get_leader(); + + for _ in 0..5 { + let new = client.get_leader(); + if new != leader { + assert!(counter.load(Ordering::SeqCst) >= 1); + return; + } + thread::sleep(LeaderChange::get_leader_interval()); + } + + panic!("failed, leader should changed"); +} + +#[test] +fn test_cluster_version() { + let server = MockServer::::new(3); + let eps = server.bind_addrs(); + + let feature_a = Feature::require(0, 0, 1); + let feature_b = Feature::require(5, 0, 0); + let feature_c = Feature::require(5, 0, 1); + + let client = new_client(eps, None); + let feature_gate = client.feature_gate(); + assert!(!feature_gate.can_enable(feature_a)); + + let emit_heartbeat = || { + let req = pdpb::StoreStats::default(); + block_on(client.store_heartbeat(req, /* store_report= */ None, None)).unwrap(); + }; + + let set_cluster_version = |version: &str| { + let h = server.default_handler(); + h.set_cluster_version(version.to_owned()); + }; + + // Empty version string will be treated as invalid. + emit_heartbeat(); + assert!(!feature_gate.can_enable(feature_a)); + + // Explicitly invalid version string. + set_cluster_version("invalid-version"); + emit_heartbeat(); + assert!(!feature_gate.can_enable(feature_a)); + + // Correct version string. + set_cluster_version("5.0.0"); + emit_heartbeat(); + assert!(feature_gate.can_enable(feature_a)); + assert!(feature_gate.can_enable(feature_b)); + assert!(!feature_gate.can_enable(feature_c)); + + // Version can't go backwards. + set_cluster_version("4.99"); + emit_heartbeat(); + assert!(feature_gate.can_enable(feature_b)); + assert!(!feature_gate.can_enable(feature_c)); + + // After reconnect the version should be still accessable. + // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. + thread::sleep(Duration::from_millis(200)); + client.reconnect().unwrap(); + assert!(feature_gate.can_enable(feature_b)); + assert!(!feature_gate.can_enable(feature_c)); + + // Version can go forwards. + set_cluster_version("5.0.1"); + emit_heartbeat(); + assert!(feature_gate.can_enable(feature_c)); +} From 4b4fc4390d90e72eb87e49a223e756e6ba1e2688 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 2 Dec 2022 18:50:01 +0800 Subject: [PATCH 0382/1149] raftstore: allow a read-only flashback request to be propsed (#13871) close tikv/tikv#13870, fix tikv/tikv#13870 Because the flashback read request must be proposed after the `PrepareFlashback` and it won't have any side effects, it's safe to allow a read-only flashback request to be proposed. In this way, we can also fix #13870. Signed-off-by: JmPotato --- Cargo.lock | 2 +- components/raftstore/src/store/fsm/apply.rs | 7 +++- components/raftstore/src/store/fsm/peer.rs | 14 +++++-- components/raftstore/src/store/util.rs | 3 +- components/raftstore/src/store/worker/read.rs | 2 +- .../txn/actions/flashback_to_version.rs | 41 ++++++++++++++++++- .../flashback_to_version_read_phase.rs | 13 ++++-- .../integrations/raftstore/test_flashback.rs | 27 +++++++++++- tests/integrations/server/kv_service.rs | 2 +- 9 files changed, 95 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b178015fa1..063657d29bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5479,7 +5479,7 @@ dependencies = [ "log_wrappers", "openssl", "prometheus", - "rand 0.8.3", + "rand 0.8.5", "serde", "serde_derive", "slog", diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index bd582d1c24a..a3d0bdb2712 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1588,7 +1588,12 @@ where let include_region = req.get_header().get_region_epoch().get_version() >= self.last_merge_version; check_region_epoch(req, &self.region, include_region)?; - check_flashback_state(self.region.get_is_in_flashback(), req, self.region_id())?; + check_flashback_state( + self.region.get_is_in_flashback(), + req, + self.region_id(), + false, + )?; if req.has_admin_request() { self.exec_admin_cmd(ctx, req) } else { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index f6498222d27..e3f268bf02c 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5107,8 +5107,13 @@ where _ => {} }; // Check whether the region is in the flashback state and the request could be - // proposed. - if let Err(e) = util::check_flashback_state(self.fsm.peer.is_in_flashback, msg, region_id) { + // proposed. Skip the not prepared error because the + // `self.fsm.peer.is_in_flashback` may not be the latest right after applying + // the `PrepareFlashback` admin command, we will let it pass here and check in + // the apply phase. + if let Err(e) = + util::check_flashback_state(self.fsm.peer.is_in_flashback, msg, region_id, true) + { match e { Error::FlashbackInProgress(_) => self .ctx @@ -6278,7 +6283,10 @@ where fn on_set_flashback_state(&mut self, is_in_flashback: bool) { // Set flashback memory - self.fsm.peer.is_in_flashback = is_in_flashback; + self.fsm.peer.is_in_flashback = (|| { + fail_point!("keep_peer_fsm_flashback_state_false", |_| false); + is_in_flashback + })(); // Let the leader lease to None to ensure that local reads are not executed. self.fsm.peer.leader_lease_mut().expire_remote_lease(); } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 5f78065d32b..df5f4543f76 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -318,6 +318,7 @@ pub fn check_flashback_state( is_in_flashback: bool, req: &RaftCmdRequest, region_id: u64, + skip_not_prepared: bool, ) -> Result<()> { // The admin flashback cmd could be proposed/applied under any state. if req.has_admin_request() @@ -335,7 +336,7 @@ pub fn check_flashback_state( } // If the region is not in the flashback state, the flashback request itself // should be rejected. - if !is_in_flashback && is_flashback_request { + if !is_in_flashback && is_flashback_request && !skip_not_prepared { return Err(Error::FlashbackNotPrepared(region_id)); } Ok(()) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 08e56aa7481..c78a51866ae 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -813,7 +813,7 @@ where // Check whether the region is in the flashback state and the local read could // be performed. let is_in_flashback = delegate.region.is_in_flashback; - if let Err(e) = util::check_flashback_state(is_in_flashback, req, region_id) { + if let Err(e) = util::check_flashback_state(is_in_flashback, req, region_id, false) { TLS_LOCAL_READ_METRICS.with(|m| match e { Error::FlashbackNotPrepared(_) => { m.borrow_mut().reject_reason.flashback_not_prepared.inc() diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 4b05c8eef8f..8a65debcdbf 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -5,8 +5,8 @@ use std::ops::Bound; use txn_types::{Key, Lock, LockType, TimeStamp, Write, WriteType}; use crate::storage::{ - mvcc::{MvccReader, MvccTxn, SnapshotReader, MAX_TXN_WRITE_SIZE}, - txn::{actions::check_txn_status::rollback_lock, Result as TxnResult}, + mvcc::{self, MvccReader, MvccTxn, SnapshotReader, MAX_TXN_WRITE_SIZE}, + txn::{self, actions::check_txn_status::rollback_lock, Result as TxnResult}, Snapshot, }; @@ -218,10 +218,47 @@ pub fn commit_flashback_key( lock.is_pessimistic_txn(), flashback_commit_ts, ); + } else { + return Err(txn::Error::from_mvcc(mvcc::ErrorInner::TxnLockNotFound { + start_ts: flashback_start_ts, + commit_ts: flashback_commit_ts, + key: key_to_commit.to_raw()?, + })); } Ok(()) } +// Check if the flashback has been finished before. +pub fn check_flashback_commit( + reader: &mut MvccReader, + key_to_commit: &Key, + flashback_start_ts: TimeStamp, + flashback_commit_ts: TimeStamp, +) -> TxnResult { + match reader.load_lock(key_to_commit)? { + // If the lock exists, it means the flashback hasn't been finished. + Some(lock) => { + if lock.ts == flashback_start_ts { + return Ok(false); + } + } + // If the lock doesn't exist and the flashback commit record exists, it means the flashback + // has been finished. + None => { + if let Some(write) = reader.get_write(key_to_commit, flashback_commit_ts, None)? { + if write.start_ts == flashback_start_ts { + return Ok(true); + } + } + } + } + Err(txn::Error::from_mvcc(mvcc::ErrorInner::TxnLockNotFound { + start_ts: flashback_start_ts, + commit_ts: flashback_commit_ts, + key: key_to_commit.to_raw()?, + })) +} + pub fn get_first_user_key( reader: &mut MvccReader, start_key: &Key, diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 9ac5014b7f3..672a504a1f1 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -6,7 +6,7 @@ use txn_types::{Key, Lock, TimeStamp}; use crate::storage::{ mvcc::MvccReader, txn::{ - actions::flashback_to_version::get_first_user_key, + actions::flashback_to_version::{check_flashback_commit, get_first_user_key}, commands::{ Command, CommandExt, FlashbackToVersion, ProcessResult, ReadCommand, TypedCommand, }, @@ -189,9 +189,14 @@ impl ReadCommand for FlashbackToVersionReadPhase { // Commit key needs to match the Prewrite key, which is set as the first user // key. start_key = next_write_key.clone(); - // If the key is not locked, it means that the key has been committed before and - // we are in a retry. - if reader.load_lock(&next_write_key)?.is_none() { + // If the key has already been committed by the flashback, it means that we are + // in a retry. It's safe to just return directly. + if check_flashback_commit( + &mut reader, + &start_key, + self.start_ts, + self.commit_ts, + )? { statistics.add(&reader.statistics); return Ok(ProcessResult::Res); } diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 89a61223fa2..afc2a658081 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -18,6 +18,28 @@ use txn_types::WriteBatchFlags; const TEST_KEY: &[u8] = b"k1"; const TEST_VALUE: &[u8] = b"v1"; +#[test] +#[cfg(feature = "failpoints")] +fn test_read_after_prepare_flashback() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + let region = cluster.get_region(TEST_KEY); + fail::cfg("keep_peer_fsm_flashback_state_false", "return").unwrap(); + // Prepare flashback. + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + // Read with flashback flag will succeed even the peer fsm does not updated its + // `is_in_flashback` flag. + must_request_with_flashback_flag(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); + // Writing with flashback flag will succeed since the ApplyFSM owns the + // latest `is_in_flashback` flag. + must_request_with_flashback_flag(&mut cluster, &mut region.clone(), new_get_cmd(TEST_KEY)); + fail::remove("keep_peer_fsm_flashback_state_false"); + // Finish flashback. + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); +} + #[test] fn test_prepare_flashback_after_split() { let mut cluster = new_node_cluster(0, 3); @@ -281,8 +303,9 @@ fn test_flashback_for_local_read() { // Check the leader does a local read. let state = cluster.raft_local_state(region.get_id(), store_id); assert_eq!(state.get_last_index(), last_index); - // A local read with flashback flag will also be blocked. - must_get_flashback_not_prepared_error(&mut cluster, &mut region, new_get_cmd(TEST_KEY)); + // A local read with flashback flag will not be blocked since it won't have any + // side effects. + must_request_with_flashback_flag(&mut cluster, &mut region, new_get_cmd(TEST_KEY)); } #[test] diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 12cff74861d..f4200ab20da 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -781,7 +781,7 @@ fn test_mvcc_flashback_unprepared() { req.start_key = b"a".to_vec(); req.end_key = b"z".to_vec(); let resp = client.kv_flashback_to_version(&req).unwrap(); - assert!(resp.get_region_error().has_flashback_not_prepared()); + assert!(resp.get_error().contains("txn lock not found")); must_kv_read_equal(&client, ctx, k, v, 6); } From 76844d2e2c9a32d2ddedf204501506a2c764548d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 5 Dec 2022 11:36:03 +0800 Subject: [PATCH 0383/1149] log_backup: fix pitr panic (#13875) close tikv/tikv#13874 Signed-off-by: hillium --- src/import/sst_service.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 9d45052fea9..283f8f802e3 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -12,6 +12,7 @@ use collections::HashSet; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{future::join_all, sink::SinkExt, stream::TryStreamExt, TryFutureExt}; +use futures_executor::{ThreadPool, ThreadPoolBuilder}; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -56,6 +57,12 @@ where engine: E, router: Router, threads: Arc, + // For now, PiTR cannot be executed in the tokio runtime because it is synchronous and may + // blocks. (tokio is so strict... it panics if we do insane things like blocking in an async + // context.) + // We need to execute these code in a context which allows blocking. + // FIXME: Make PiTR restore asynchronous. Get rid of this pool. + block_threads: Arc, importer: Arc, limiter: Limiter, task_slots: Arc>>, @@ -92,6 +99,18 @@ where .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) .build() .unwrap(); + let props = tikv_util::thread_group::current_properties(); + let block_threads = ThreadPoolBuilder::new() + .pool_size(cfg.num_threads) + .name_prefix("sst-importer") + .after_start_wrapper(move || { + tikv_util::thread_group::set_properties(props.clone()); + tikv_alloc::add_thread_memory_accessor(); + set_io_type(IoType::Import); + }) + .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) + .create() + .unwrap(); importer.start_switch_mode_check(threads.handle(), engine.clone()); threads.spawn(Self::tick(importer.clone())); @@ -99,6 +118,7 @@ where cfg, engine, threads: Arc::new(threads), + block_threads: Arc::new(block_threads), router, importer, limiter: Limiter::new(f64::INFINITY), @@ -596,7 +616,7 @@ where debug!("finished apply kv file with {:?}", resp); crate::send_rpc_response!(resp, sink, label, timer); }; - self.threads.spawn(handle_task); + self.block_threads.spawn_ok(handle_task); } /// Downloads the file and performs key-rewrite for later ingesting. From e0c9c1aa470d530f5868269e6e02cf70517344ef Mon Sep 17 00:00:00 2001 From: Hu# Date: Mon, 5 Dec 2022 13:50:02 +0800 Subject: [PATCH 0384/1149] storage: Resolve EBS flashback unlimit range (#13881) close tikv/tikv#13879 Resolve EBS flashback unlimit range. Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- src/storage/mod.rs | 12 ++--- .../txn/actions/flashback_to_version.rs | 54 +++++++++++-------- .../txn/commands/flashback_to_version.rs | 2 +- .../flashback_to_version_read_phase.rs | 19 ++++--- src/storage/txn/commands/mod.rs | 4 +- tests/integrations/server/kv_service.rs | 46 ++++++++++++++-- 6 files changed, 95 insertions(+), 42 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index caed0f57c91..6f06e55937f 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4875,7 +4875,7 @@ mod tests { commit_ts, version, key.clone(), - Key::from_raw(b"z"), + Some(Key::from_raw(b"z")), ); if let Mutation::Put(..) = write.0 { expect_value( @@ -4900,7 +4900,7 @@ mod tests { commit_ts: TimeStamp, version: TimeStamp, start_key: Key, - end_key: Key, + end_key: Option, ) { let (tx, rx) = channel(); storage @@ -4997,7 +4997,7 @@ mod tests { commit_ts, 2.into(), Key::from_raw(b"k"), - Key::from_raw(b"z"), + Some(Key::from_raw(b"z")), ); expect_value( b"v@1".to_vec(), @@ -5013,7 +5013,7 @@ mod tests { commit_ts, 1.into(), Key::from_raw(b"k"), - Key::from_raw(b"z"), + Some(Key::from_raw(b"z")), ); expect_none( block_on(storage.get(Context::default(), Key::from_raw(b"k"), commit_ts)) @@ -5104,7 +5104,7 @@ mod tests { flashback_commit_ts, TimeStamp::zero(), Key::from_raw(b"k"), - Key::from_raw(b"z"), + Some(Key::from_raw(b"z")), ); for i in 1..=FLASHBACK_BATCH_SIZE * 4 { let key = Key::from_raw(format!("k{}", i).as_bytes()); @@ -5183,7 +5183,7 @@ mod tests { flashback_commit_ts, 1.into(), Key::from_raw(b"k"), - Key::from_raw(b"z"), + Some(Key::from_raw(b"z")), ); expect_none( block_on(storage.get(Context::default(), k, flashback_commit_ts)) diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 8a65debcdbf..c1127142f14 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -15,11 +15,11 @@ pub const FLASHBACK_BATCH_SIZE: usize = 256 + 1 /* To store the next key for mul pub fn flashback_to_version_read_lock( reader: &mut MvccReader, next_lock_key: Key, - end_key: &Key, + end_key: Option<&Key>, ) -> TxnResult> { let result = reader.scan_locks( Some(&next_lock_key), - Some(end_key), + end_key, |_| true, FLASHBACK_BATCH_SIZE, ); @@ -31,7 +31,7 @@ pub fn flashback_to_version_read_write( reader: &mut MvccReader, next_write_key: Key, start_key: &Key, - end_key: &Key, + end_key: Option<&Key>, flashback_version: TimeStamp, flashback_commit_ts: TimeStamp, ) -> TxnResult> { @@ -44,7 +44,7 @@ pub fn flashback_to_version_read_write( // scanning every unique key in `CF_WRITE`. let keys_result = reader.scan_latest_user_keys( Some(&next_write_key), - Some(end_key), + end_key, |key, latest_commit_ts| { // There is no any other write could happen after the flashback begins. assert!(latest_commit_ts <= flashback_commit_ts); @@ -262,10 +262,10 @@ pub fn check_flashback_commit( pub fn get_first_user_key( reader: &mut MvccReader, start_key: &Key, - end_key: &Key, + end_key: Option<&Key>, ) -> TxnResult> { let (mut keys_result, _) = - reader.scan_latest_user_keys(Some(start_key), Some(end_key), |_, _| true, 1)?; + reader.scan_latest_user_keys(Some(start_key), end_key, |_, _| true, 1)?; Ok(keys_result.pop()) } @@ -300,7 +300,8 @@ pub mod tests { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot.clone(), Some(ScanMode::Forward), &ctx); - let key_locks = flashback_to_version_read_lock(&mut reader, key, &next_key).unwrap(); + let key_locks = + flashback_to_version_read_lock(&mut reader, key, Some(next_key).as_ref()).unwrap(); let cm = ConcurrencyManager::new(TimeStamp::zero()); let mut txn = MvccTxn::new(start_ts.into(), cm); rollback_locks(&mut txn, snapshot, key_locks).unwrap(); @@ -321,8 +322,12 @@ pub mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let ctx = Context::default(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); - let prewrite_key = if let Some(first_key) = - get_first_user_key(&mut reader, &Key::from_raw(key), &Key::from_raw(b"z")).unwrap() + let prewrite_key = if let Some(first_key) = get_first_user_key( + &mut reader, + &Key::from_raw(key), + Some(Key::from_raw(b"z")).as_ref(), + ) + .unwrap() { first_key } else { @@ -342,7 +347,7 @@ pub mod tests { start_ts: impl Into, commit_ts: impl Into, ) -> usize { - let next_key = Key::from_raw(keys::next_key(key).as_slice()); + let next_key = Key::from_raw_maybe_unbounded(keys::next_key(key).as_slice()); let key = Key::from_raw(key); let (version, start_ts, commit_ts) = (version.into(), start_ts.into(), commit_ts.into()); let ctx = Context::default(); @@ -353,7 +358,7 @@ pub mod tests { &mut reader, key, &Key::from_raw(b""), - &next_key, + next_key.as_ref(), version, commit_ts, ) @@ -379,10 +384,13 @@ pub mod tests { let snapshot = engine.snapshot(Default::default()).unwrap(); let ctx = Context::default(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); - let key_to_lock = - get_first_user_key(&mut reader, &Key::from_raw(key), &Key::from_raw(b"z")) - .unwrap() - .unwrap(); + let key_to_lock = get_first_user_key( + &mut reader, + &Key::from_raw(key), + Some(Key::from_raw(b"z")).as_ref(), + ) + .unwrap() + .unwrap(); commit_flashback_key(&mut txn, &mut reader, &key_to_lock, start_ts, commit_ts).unwrap(); let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); @@ -591,9 +599,13 @@ pub mod tests { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); - let first_key = get_first_user_key(&mut reader, &Key::from_raw(b""), &Key::from_raw(b"z")) - .unwrap_or_else(|_| Some(Key::from_raw(b""))) - .unwrap(); + let first_key = get_first_user_key( + &mut reader, + &Key::from_raw(b""), + Some(Key::from_raw(b"z")).as_ref(), + ) + .unwrap_or_else(|_| Some(Key::from_raw(b""))) + .unwrap(); assert_eq!(first_key, Key::from_raw(prewrite_key)); // case 1: start key is before all keys, flashback b"c". @@ -640,9 +652,9 @@ pub mod tests { must_prewrite_flashback_key(&mut engine, start_key, 4, flashback_start_ts), 0 ); - // case 3: start key is valid, end_key is invalid, prewrite key will be None. - let first_key = get_first_user_key(&mut reader, &Key::from_raw(b"a"), &Key::from_raw(b"")) + // case 3: for last region, end_key will be None, prewrite key will valid. + let first_key = get_first_user_key(&mut reader, &Key::from_raw(b"a"), None) .unwrap_or_else(|_| Some(Key::from_raw(b""))); - assert_eq!(first_key, None); + assert_eq!(first_key, Some(Key::from_raw(prewrite_key))); } } diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 13de0c9b183..3999042fe27 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -33,7 +33,7 @@ command! { commit_ts: TimeStamp, version: TimeStamp, start_key: Key, - end_key: Key, + end_key: Option, state: FlashbackToVersionState, } } diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 672a504a1f1..d885c974db4 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -39,7 +39,7 @@ pub fn new_flashback_rollback_lock_cmd( start_ts: TimeStamp, version: TimeStamp, start_key: Key, - end_key: Key, + end_key: Option, ctx: Context, ) -> TypedCommand<()> { FlashbackToVersionReadPhase::new( @@ -61,7 +61,7 @@ pub fn new_flashback_write_cmd( commit_ts: TimeStamp, version: TimeStamp, start_key: Key, - end_key: Key, + end_key: Option, ctx: Context, ) -> TypedCommand<()> { FlashbackToVersionReadPhase::new( @@ -87,7 +87,7 @@ command! { commit_ts: TimeStamp, version: TimeStamp, start_key: Key, - end_key: Key, + end_key: Option, state: FlashbackToVersionState, } } @@ -126,8 +126,11 @@ impl ReadCommand for FlashbackToVersionReadPhase { let mut start_key = self.start_key.clone(); let next_state = match self.state { FlashbackToVersionState::RollbackLock { next_lock_key, .. } => { - let mut key_locks = - flashback_to_version_read_lock(&mut reader, next_lock_key, &self.end_key)?; + let mut key_locks = flashback_to_version_read_lock( + &mut reader, + next_lock_key, + self.end_key.as_ref(), + )?; if key_locks.is_empty() { // - No more locks to rollback, continue to the Prewrite Phase. // - The start key from the client is actually a range which is used to limit @@ -139,7 +142,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { // - To make sure the key locked in the latch is the same as the actual key // written, we pass it to the key in `process_write' after getting it. let key_to_lock = if let Some(first_key) = - get_first_user_key(&mut reader, &self.start_key, &self.end_key)? + get_first_user_key(&mut reader, &self.start_key, self.end_key.as_ref())? { first_key } else { @@ -178,7 +181,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { // write of this key and instead put it after the completion // of the 2pc. next_write_key = if let Some(first_key) = - get_first_user_key(&mut reader, &self.start_key, &self.end_key)? + get_first_user_key(&mut reader, &self.start_key, self.end_key.as_ref())? { first_key } else { @@ -205,7 +208,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { &mut reader, next_write_key, &start_key, - &self.end_key, + self.end_key.as_ref(), self.version, self.commit_ts, )?; diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 7d835462acf..7eee81ae23e 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -372,7 +372,7 @@ impl From for TypedCommand<()> { req.get_start_ts().into(), req.get_version().into(), Key::from_raw(req.get_start_key()), - Key::from_raw(req.get_end_key()), + Key::from_raw_maybe_unbounded(req.get_end_key()), req.take_context(), ) } @@ -385,7 +385,7 @@ impl From for TypedCommand<()> { req.get_commit_ts().into(), req.get_version().into(), Key::from_raw(req.get_start_key()), - Key::from_raw(req.get_end_key()), + Key::from_raw_maybe_unbounded(req.get_end_key()), req.take_context(), ) } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index f4200ab20da..3dec0b57798 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -606,7 +606,7 @@ fn test_mvcc_flashback_failed_after_first_batch() { fail::cfg("flashback_failed_after_first_batch", "return").unwrap(); must_flashback_to_version(&client, ctx.clone(), check_ts, ts + 1, ts + 2); fail::remove("flashback_failed_after_first_batch"); - // key@1 must be flahsbacked in the second batch firstly. + // key@1 must be flashbacked in the second batch firstly. must_kv_read_equal( &client, ctx.clone(), @@ -777,14 +777,52 @@ fn test_mvcc_flashback_unprepared() { req.set_context(ctx.clone()); req.set_start_ts(4); req.set_commit_ts(5); - req.version = 0; - req.start_key = b"a".to_vec(); - req.end_key = b"z".to_vec(); + req.set_version(0); + req.set_start_key(b"a".to_vec()); + req.set_end_key(b"z".to_vec()); let resp = client.kv_flashback_to_version(&req).unwrap(); assert!(resp.get_error().contains("txn lock not found")); must_kv_read_equal(&client, ctx, k, v, 6); } +#[test] +fn test_mvcc_flashback_with_unlimit_range() { + let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + let mut ts = 0; + write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); + must_kv_read_equal(&client, ctx.clone(), k.clone(), v, 6); + + let mut prepare_req = PrepareFlashbackToVersionRequest::default(); + prepare_req.set_context(ctx.clone()); + prepare_req.set_start_ts(6); + prepare_req.set_version(0); + prepare_req.set_start_key(b"".to_vec()); + prepare_req.set_end_key(b"".to_vec()); + client + .kv_prepare_flashback_to_version(&prepare_req) + .unwrap(); + let mut req = FlashbackToVersionRequest::default(); + req.set_context(ctx.clone()); + req.set_start_ts(6); + req.set_commit_ts(7); + req.set_version(0); + req.set_start_key(b"".to_vec()); + req.set_end_key(b"".to_vec()); + let resp = client.kv_flashback_to_version(&req).unwrap(); + assert!(!resp.has_region_error()); + assert!(resp.get_error().is_empty()); + + let mut get_req = GetRequest::default(); + get_req.set_context(ctx); + get_req.key = k; + get_req.version = 7; + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(!get_resp.has_region_error()); + assert!(!get_resp.has_error()); + assert_eq!(get_resp.value, b"".to_vec()); +} + // raft related RPC is tested as parts of test_snapshot.rs, so skip here. #[test] From c8250e58e7316911617fde5d2d43c578bbd23100 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Mon, 5 Dec 2022 15:04:03 +0800 Subject: [PATCH 0385/1149] raftstore: split raft write batch on 1GiB limit (#13872) close tikv/tikv#13848 Fix panic when the size of one single write exceeds 2GiB. Signed-off-by: tabokie Co-authored-by: Ti Chi Robot --- components/raft_log_engine/src/engine.rs | 23 +-- .../raftstore/src/store/async_io/write.rs | 163 ++++++++++-------- .../src/store/async_io/write_tests.rs | 124 ++++++++++++- tests/integrations/pd/test_rpc_client.rs | 2 +- 4 files changed, 220 insertions(+), 92 deletions(-) diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index a376adc25b7..c952f18dbc4 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -540,30 +540,21 @@ impl RaftEngine for RaftLogEngine { fn append(&self, raft_group_id: u64, entries: Vec) -> Result { let mut batch = Self::LogBatch::default(); - batch - .0 - .add_entries::(raft_group_id, &entries) - .map_err(transfer_error)?; - self.0.write(&mut batch.0, false).map_err(transfer_error) + batch.append(raft_group_id, entries)?; + self.consume(&mut batch, false) } fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { let mut batch = Self::LogBatch::default(); - batch - .0 - .put_message(STORE_STATE_ID, STORE_IDENT_KEY.to_vec(), ident) - .map_err(transfer_error)?; - self.0.write(&mut batch.0, true).map_err(transfer_error)?; + batch.put_store_ident(ident)?; + self.consume(&mut batch, true)?; Ok(()) } fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { let mut batch = Self::LogBatch::default(); - batch - .0 - .put_message(raft_group_id, RAFT_LOG_STATE_KEY.to_vec(), state) - .map_err(transfer_error)?; - self.0.write(&mut batch.0, false).map_err(transfer_error)?; + batch.put_raft_state(raft_group_id, state)?; + self.consume(&mut batch, false)?; Ok(()) } @@ -585,7 +576,7 @@ impl RaftEngine for RaftLogEngine { old_first_index.push(self.0.first_index(task.raft_group_id)); } - self.0.write(&mut batch.0, false).map_err(transfer_error)?; + self.consume(&mut batch, false)?; let mut total = 0; for (old_first_index, task) in old_first_index.iter().zip(tasks) { diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 354a796c99c..d17223e5acf 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -27,7 +27,7 @@ use protobuf::Message; use raft::eraftpb::Entry; use tikv_util::{ box_err, - config::{Tracker, VersionTrack}, + config::{ReadableSize, Tracker, VersionTrack}, debug, info, slow_log, sys::thread::StdThreadBuildWrapper, thd_name, @@ -54,6 +54,7 @@ const KV_WB_SHRINK_SIZE: usize = 1024 * 1024; const KV_WB_DEFAULT_SIZE: usize = 16 * 1024; const RAFT_WB_SHRINK_SIZE: usize = 10 * 1024 * 1024; const RAFT_WB_DEFAULT_SIZE: usize = 256 * 1024; +const RAFT_WB_SPLIT_SIZE: usize = ReadableSize::gb(1).0 as usize; /// Notify the event to the specified region. pub trait PersistedNotifier: Clone + Send + 'static { @@ -360,8 +361,12 @@ where EK: KvEngine, ER: RaftEngine, { - pub raft_wb: ER::LogBatch, - // Write raft state once for a region everytime writing to disk + // When a single batch becomes too large, we uses multiple batches each containing atomic + // writes. + pub raft_wbs: Vec, + // Write states once for a region everytime writing to disk. + // These states only corresponds to entries inside `raft_wbs.last()`. States for other write + // batches must be inlined early. pub raft_states: HashMap, pub extra_batch_write: ExtraBatchWrite, pub state_size: usize, @@ -369,6 +374,7 @@ where pub persisted_cbs: Vec>, // region_id -> (peer_id, ready_number) pub readies: HashMap, + pub(crate) raft_wb_split_size: usize, } impl WriteTaskBatch @@ -378,41 +384,77 @@ where { fn new(raft_wb: ER::LogBatch) -> Self { Self { - raft_wb, + raft_wbs: vec![raft_wb], raft_states: HashMap::default(), extra_batch_write: ExtraBatchWrite::None, state_size: 0, tasks: vec![], persisted_cbs: vec![], readies: HashMap::default(), + raft_wb_split_size: RAFT_WB_SPLIT_SIZE, } } + #[inline] + fn flush_states_to_raft_wb(&mut self, raft_engine: &ER) { + let wb = self.raft_wbs.last_mut().unwrap(); + for (region_id, state) in self.raft_states.drain() { + wb.put_raft_state(region_id, &state).unwrap(); + } + if let ExtraBatchWrite::V2(extra_states_map) = &mut self.extra_batch_write { + for (region_id, state) in extra_states_map.drain() { + let mut tombstone = false; + if let Some(region_state) = state.region_state { + if region_state.get_state() == PeerState::Tombstone { + tombstone = true; + raft_engine + .clean( + region_id, + first_index(&state.apply_state), + state.raft_state.as_ref().unwrap(), + wb, + ) + .unwrap(); + } + wb.put_region_state(region_id, ®ion_state).unwrap(); + } + if !tombstone { + wb.put_apply_state(region_id, &state.apply_state).unwrap(); + } + } + } + self.state_size = 0; + } + /// Add write task to this batch - fn add_write_task(&mut self, mut task: WriteTask) { + fn add_write_task(&mut self, raft_engine: &ER, mut task: WriteTask) { if let Err(e) = task.valid() { panic!("task is not valid: {:?}", e); } - if let Some(raft_wb) = task.raft_wb.take() { - self.raft_wb.merge(raft_wb).unwrap(); + + if self.raft_wb_split_size > 0 + && self.raft_wbs.last().unwrap().persist_size() >= self.raft_wb_split_size + { + self.flush_states_to_raft_wb(raft_engine); + self.raft_wbs + .push(raft_engine.log_batch(RAFT_WB_DEFAULT_SIZE)); } - let entries = std::mem::take(&mut task.entries); - self.raft_wb.append(task.region_id, entries).unwrap(); + let raft_wb = self.raft_wbs.last_mut().unwrap(); + if let Some(wb) = task.raft_wb.take() { + raft_wb.merge(wb).unwrap(); + } + raft_wb + .append(task.region_id, std::mem::take(&mut task.entries)) + .unwrap(); if let Some((from, to)) = task.cut_logs { - self.raft_wb.cut_logs(task.region_id, from, to); + raft_wb.cut_logs(task.region_id, from, to); } - if let Some(raft_state) = task.raft_state.take() { - if self - .raft_states - .insert(task.region_id, raft_state) - .is_none() - { - self.state_size += std::mem::size_of::(); - } + if let Some(raft_state) = task.raft_state.take() + && self.raft_states.insert(task.region_id, raft_state).is_none() { + self.state_size += std::mem::size_of::(); } - self.state_size += self .extra_batch_write .merge(task.region_id, &mut task.extra_write); @@ -460,41 +502,16 @@ where #[inline] fn get_raft_size(&self) -> usize { - self.state_size + self.raft_wb.persist_size() + self.state_size + + self + .raft_wbs + .iter() + .map(|wb| wb.persist_size()) + .sum::() } fn before_write_to_db(&mut self, engine: &ER, metrics: &StoreWriteMetrics) { - // Put raft state to raft writebatch - for (region_id, state) in self.raft_states.drain() { - self.raft_wb.put_raft_state(region_id, &state).unwrap(); - } - if let ExtraBatchWrite::V2(extra_states_map) = &mut self.extra_batch_write { - for (region_id, state) in extra_states_map.drain() { - let mut tombstone = false; - if let Some(region_state) = state.region_state { - if region_state.get_state() == PeerState::Tombstone { - tombstone = true; - engine - .clean( - region_id, - first_index(&state.apply_state), - state.raft_state.as_ref().unwrap(), - &mut self.raft_wb, - ) - .unwrap(); - } - self.raft_wb - .put_region_state(region_id, ®ion_state) - .unwrap(); - } - if !tombstone { - self.raft_wb - .put_apply_state(region_id, &state.apply_state) - .unwrap(); - } - } - } - self.state_size = 0; + self.flush_states_to_raft_wb(engine); if metrics.waterfall_metrics { let now = std::time::Instant::now(); for task in &self.tasks { @@ -677,7 +694,7 @@ where } pub fn handle_write_task(&mut self, task: WriteTask) { - self.batch.add_write_task(task); + self.batch.add_write_task(&self.raft_engine, task); } pub fn write_to_db(&mut self, notify: bool) { @@ -726,24 +743,27 @@ where fail_point!("raft_between_save"); let mut write_raft_time = 0f64; - if !self.batch.raft_wb.is_empty() { + if !self.batch.raft_wbs[0].is_empty() { fail_point!("raft_before_save_on_store_1", self.store_id == 1, |_| {}); let now = Instant::now(); self.perf_context.start_observe(); - self.raft_engine - .consume_and_shrink( - &mut self.batch.raft_wb, - true, - RAFT_WB_SHRINK_SIZE, - RAFT_WB_DEFAULT_SIZE, - ) - .unwrap_or_else(|e| { - panic!( - "store {}: {} failed to write to raft engine: {:?}", - self.store_id, self.tag, e - ); - }); + for i in 0..self.batch.raft_wbs.len() { + self.raft_engine + .consume_and_shrink( + &mut self.batch.raft_wbs[i], + true, + RAFT_WB_SHRINK_SIZE, + RAFT_WB_DEFAULT_SIZE, + ) + .unwrap_or_else(|e| { + panic!( + "store {}: {} failed to write to raft engine: {:?}", + self.store_id, self.tag, e + ); + }); + } + self.batch.raft_wbs.truncate(1); let trackers: Vec<_> = self .batch .tasks @@ -946,7 +966,7 @@ pub fn write_to_db_for_test( ER: RaftEngine, { let mut batch = WriteTaskBatch::new(engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE)); - batch.add_write_task(task); + batch.add_write_task(&engines.raft, task); batch.before_write_to_db(&engines.raft, &StoreWriteMetrics::new(false)); if let ExtraBatchWrite::V1(kv_wb) = &mut batch.extra_batch_write { if !kv_wb.is_empty() { @@ -957,13 +977,12 @@ pub fn write_to_db_for_test( }); } } - if !batch.raft_wb.is_empty() { - engines - .raft - .consume(&mut batch.raft_wb, true) - .unwrap_or_else(|e| { + if !batch.raft_wbs[0].is_empty() { + for wb in &mut batch.raft_wbs { + engines.raft.consume(wb, true).unwrap_or_else(|e| { panic!("test failed to write to raft engine: {:?}", e); }); + } } } diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 1642c90d075..727502b6ca4 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -273,7 +273,7 @@ fn test_worker() { task_1.raft_state = Some(new_raft_state(5, 123, 6, 8)); task_1.messages.append(&mut vec![RaftMessage::default()]); - t.worker.batch.add_write_task(task_1); + t.worker.batch.add_write_task(&engines.raft, task_1); let mut task_2 = WriteTask::::new(region_2, 2, 15); init_write_batch(&engines, &mut task_2); @@ -287,7 +287,7 @@ fn test_worker() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.worker.batch.add_write_task(task_2); + t.worker.batch.add_write_task(&engines.raft, task_2); let mut task_3 = WriteTask::::new(region_1, 1, 11); init_write_batch(&engines, &mut task_3); @@ -303,7 +303,7 @@ fn test_worker() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.worker.batch.add_write_task(task_3); + t.worker.batch.add_write_task(&engines.raft, task_3); t.worker.write_to_db(true); @@ -337,6 +337,124 @@ fn test_worker() { must_have_same_count_msg(5, &t.msg_rx); } +#[test] +fn test_worker_split_raft_wb() { + let path = Builder::new().prefix("async-io-worker").tempdir().unwrap(); + let engines = new_temp_engine(&path); + let mut t = TestWorker::new(&Config::default(), &engines); + + let mut run_test = |region_1: u64, region_2: u64, split: (bool, bool)| { + let raft_key_1 = 17 + region_1; + let raft_key_2 = 27 + region_1; + let raft_key_3 = 37 + region_1; + let mut expected_wbs = 1; + + let mut task_1 = WriteTask::::new(region_1, 1, 10); + init_write_batch(&engines, &mut task_1); + task_1.extra_write = ExtraWrite::V2(ExtraStates::new(RaftApplyState { + applied_index: 10, + ..Default::default() + })); + put_raft_kv(task_1.raft_wb.as_mut(), raft_key_1); + task_1.entries.append(&mut vec![ + new_entry(5, 5), + new_entry(6, 5), + new_entry(7, 5), + new_entry(8, 5), + ]); + task_1.raft_state = Some(new_raft_state(5, 123, 6, 8)); + t.worker.batch.add_write_task(&engines.raft, task_1); + + let mut task_2 = WriteTask::::new(region_2, 2, 15); + init_write_batch(&engines, &mut task_2); + task_2.extra_write = ExtraWrite::V2(ExtraStates::new(RaftApplyState { + applied_index: 16, + ..Default::default() + })); + put_raft_kv(task_2.raft_wb.as_mut(), raft_key_2); + task_2 + .entries + .append(&mut vec![new_entry(20, 15), new_entry(21, 15)]); + task_2.raft_state = Some(new_raft_state(15, 234, 20, 21)); + if split.0 { + expected_wbs += 1; + t.worker.batch.raft_wb_split_size = 1; + } else { + t.worker.batch.raft_wb_split_size = 0; + } + t.worker.batch.add_write_task(&engines.raft, task_2); + + let mut task_3 = WriteTask::::new(region_1, 1, 11); + init_write_batch(&engines, &mut task_3); + task_3.extra_write = ExtraWrite::V2(ExtraStates::new(RaftApplyState { + applied_index: 25, + ..Default::default() + })); + put_raft_kv(task_3.raft_wb.as_mut(), raft_key_3); + delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), raft_key_1); + task_3 + .entries + .append(&mut vec![new_entry(6, 6), new_entry(7, 7)]); + task_3.cut_logs = Some((8, 9)); + task_3.raft_state = Some(new_raft_state(7, 124, 6, 7)); + if split.1 { + expected_wbs += 1; + t.worker.batch.raft_wb_split_size = 1; + } else { + t.worker.batch.raft_wb_split_size = 0; + } + t.worker.batch.add_write_task(&engines.raft, task_3); + + assert_eq!(t.worker.batch.raft_wbs.len(), expected_wbs); + t.worker.write_to_db(true); + assert_eq!(t.worker.batch.raft_wbs.len(), 1); + + must_have_same_notifies(vec![(region_1, (1, 11)), (region_2, (2, 15))], &t.notify_rx); + + assert_eq!(test_raft_kv(&engines.raft, raft_key_1), false); + assert_eq!(test_raft_kv(&engines.raft, raft_key_2), true); + assert_eq!(test_raft_kv(&engines.raft, raft_key_3), true); + + must_have_entries_and_state( + &engines.raft, + vec![ + ( + region_1, + vec![new_entry(5, 5), new_entry(6, 6), new_entry(7, 7)], + new_raft_state(7, 124, 6, 7), + ), + ( + region_2, + vec![new_entry(20, 15), new_entry(21, 15)], + new_raft_state(15, 234, 20, 21), + ), + ], + ); + assert_eq!( + engines.raft.get_apply_state(region_1).unwrap(), + Some(RaftApplyState { + applied_index: 25, + ..Default::default() + }) + ); + assert_eq!( + engines.raft.get_apply_state(region_2).unwrap(), + Some(RaftApplyState { + applied_index: 16, + ..Default::default() + }) + ); + }; + + let mut first_region = 1; + for a in [true, false] { + for b in [true, false] { + run_test(first_region, first_region + 1, (a, b)); + first_region += 10; + } + } +} + #[test] fn test_basic_flow() { let region_1 = 1; diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index 23841ba5dfd..ca37318aa8b 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -112,7 +112,7 @@ fn test_rpc_client() { assert_eq!(ts.logical() + 100, ts100.logical()); let mut prev_id = 0; - for _ in 0..100 { + for _ in 0..10 { let mut client = new_client_v2(eps.clone(), None); let alloc_id = client.alloc_id().unwrap(); assert!(alloc_id > prev_id); From b79b86e965e18e4196d0a8db5a5d13c9a868c77d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 5 Dec 2022 16:44:03 +0800 Subject: [PATCH 0386/1149] log-backup: make the safepoint lifetime 24hours (#13885) close tikv/tikv#13889, close pingcap/tidb#39603 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/checkpoint_manager.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index f34211ef7a5..e316b6e05c3 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -356,9 +356,13 @@ impl FlushObserver for BasicFlushObserver { .update_service_safe_point( format!("backup-stream-{}-{}", task, self.store_id), TimeStamp::new(rts.saturating_sub(1)), - // Add a service safe point for 30 mins (6x the default flush interval). - // It would probably be safe. - Duration::from_secs(1800), + // Add a service safe point for 24 hours. (the same as fatal error.) + // We make it the same duration as we meet fatal errors because TiKV may be + // SIGKILL'ed after it meets fatal error and before it successfully updated the + // fatal error safepoint. + // TODO: We'd better make the coordinator, who really + // calculates the checkpoint to register service safepoint. + Duration::from_secs(60 * 60 * 24), ) .await { From 7d4b6c6d65537d4ce6cd2ff98573b539c38a35b8 Mon Sep 17 00:00:00 2001 From: Hu# Date: Mon, 5 Dec 2022 17:26:03 +0800 Subject: [PATCH 0387/1149] pd_client: move ReadableDuration to failpoint (#13878) ref tikv/tikv#13673 remove `unused_imports` when make release. Signed-off-by: husharp Signed-off-by: Xinye Tao Co-authored-by: Xinye Tao --- components/pd_client/src/client_v2.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index 55f0c31b3c5..3d17a94a494 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -47,9 +47,7 @@ use kvproto::{ }; use security::SecurityManager; use tikv_util::{ - box_err, - config::ReadableDuration, - error, info, + box_err, error, info, mpsc::future as mpsc, slow_log, thd_name, time::{duration_to_sec, Instant}, @@ -71,6 +69,8 @@ use crate::PdFuture; fn request_timeout() -> Duration { fail_point!("pd_client_v2_request_timeout", |s| { use std::str::FromStr; + + use tikv_util::config::ReadableDuration; ReadableDuration::from_str(&s.unwrap()).unwrap().0 }); Duration::from_secs(REQUEST_TIMEOUT_SEC) @@ -412,6 +412,8 @@ async fn reconnect_loop( let backoff = (|| { fail_point!("pd_client_v2_backoff", |s| { use std::str::FromStr; + + use tikv_util::config::ReadableDuration; ReadableDuration::from_str(&s.unwrap()).unwrap().0 }); request_timeout() From 280d53b75c1d3e821435c440eb6493af536edabb Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 6 Dec 2022 02:50:03 +0800 Subject: [PATCH 0388/1149] raftstore: remove is_in_flashback field in peer fsm (#13877) close tikv/tikv#13868 - Remove `is_in_flashback` field and use the region meta as the only source of truth in `PeerFSM`. - Add a corresponding test case. - Some minor refinement to the code and tests. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/peer.rs | 27 +++-- components/raftstore/src/store/peer.rs | 5 +- .../txn/actions/flashback_to_version.rs | 109 +++++++++++------- .../flashback_to_version_read_phase.rs | 35 ++++-- .../integrations/raftstore/test_flashback.rs | 52 +++++++-- tests/integrations/server/kv_service.rs | 27 ++++- 6 files changed, 176 insertions(+), 79 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index e3f268bf02c..62eadb97076 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1334,7 +1334,7 @@ where new_read_index_request(region_id, region_epoch.clone(), self.fsm.peer.peer.clone()); // Allow to capture change even is in flashback state. // TODO: add a test case for this kind of situation. - if self.fsm.peer.is_in_flashback { + if self.region().is_in_flashback { let mut flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); flags.insert(WriteBatchFlags::FLASHBACK); msg.mut_header().set_flags(flags.bits()); @@ -4894,9 +4894,7 @@ where } ExecResult::IngestSst { ssts } => self.on_ingest_sst_result(ssts), ExecResult::TransferLeader { term } => self.on_transfer_leader(term), - ExecResult::SetFlashbackState { region } => { - self.on_set_flashback_state(region.get_is_in_flashback()) - } + ExecResult::SetFlashbackState { region } => self.on_set_flashback_state(region), } } @@ -5108,11 +5106,11 @@ where }; // Check whether the region is in the flashback state and the request could be // proposed. Skip the not prepared error because the - // `self.fsm.peer.is_in_flashback` may not be the latest right after applying + // `self.region().is_in_flashback` may not be the latest right after applying // the `PrepareFlashback` admin command, we will let it pass here and check in // the apply phase. if let Err(e) = - util::check_flashback_state(self.fsm.peer.is_in_flashback, msg, region_id, true) + util::check_flashback_state(self.region().is_in_flashback, msg, region_id, true) { match e { Error::FlashbackInProgress(_) => self @@ -6281,12 +6279,17 @@ where self.fsm.has_ready = true; } - fn on_set_flashback_state(&mut self, is_in_flashback: bool) { - // Set flashback memory - self.fsm.peer.is_in_flashback = (|| { - fail_point!("keep_peer_fsm_flashback_state_false", |_| false); - is_in_flashback - })(); + fn on_set_flashback_state(&mut self, region: metapb::Region) { + // Update the region meta. + self.update_region((|| { + #[cfg(feature = "failpoints")] + fail_point!("keep_peer_fsm_flashback_state_false", |_| { + let mut region = region.clone(); + region.is_in_flashback = false; + region + }); + region + })()); // Let the leader lease to None to ensure that local reads are not executed. self.fsm.peer.leader_lease_mut().expire_remote_lease(); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 9614161739a..100544bd0f4 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1030,8 +1030,6 @@ where /// lead_transferee if this peer(leader) is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, - // Used as the memory state for Flashback to reject RW/Schedule before proposing. - pub is_in_flashback: bool, pub snapshot_recovery_state: Option, } @@ -1167,7 +1165,6 @@ where last_region_buckets: None, lead_transferee: raft::INVALID_ID, unsafe_recovery_state: None, - is_in_flashback: region.get_is_in_flashback(), snapshot_recovery_state: None, }; @@ -3531,7 +3528,7 @@ where self.force_leader.is_some(), ) { None - } else if self.is_in_flashback { + } else if self.region().is_in_flashback { debug!( "prevents renew lease while in flashback state"; "region_id" => self.region_id, diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index c1127142f14..819cfd0631c 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -1,7 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::ops::Bound; - use txn_types::{Key, Lock, LockType, TimeStamp, Write, WriteType}; use crate::storage::{ @@ -35,11 +33,6 @@ pub fn flashback_to_version_read_write( flashback_version: TimeStamp, flashback_commit_ts: TimeStamp, ) -> TxnResult> { - // Filter out the SST that does not have a newer version than - // `flashback_version` in `CF_WRITE`, i.e, whose latest `commit_ts` <= - // `flashback_version`. By doing this, we can only flashback those keys that - // have version changed since `flashback_version` as much as possible. - reader.set_hint_min_ts(Some(Bound::Excluded(flashback_version))); // To flashback the data, we need to get all the latest visible keys first by // scanning every unique key in `CF_WRITE`. let keys_result = reader.scan_latest_user_keys( @@ -241,15 +234,30 @@ pub fn check_flashback_commit( if lock.ts == flashback_start_ts { return Ok(false); } + error!( + "check flashback commit exception: lock not found"; + "key_to_commit" => log_wrappers::Value::key(key_to_commit.as_encoded()), + "flashback_start_ts" => flashback_start_ts, + "flashback_commit_ts" => flashback_commit_ts, + "lock" => ?lock, + ); } // If the lock doesn't exist and the flashback commit record exists, it means the flashback // has been finished. None => { - if let Some(write) = reader.get_write(key_to_commit, flashback_commit_ts, None)? { - if write.start_ts == flashback_start_ts { + let write_res = reader.seek_write(key_to_commit, flashback_commit_ts)?; + if let Some((commit_ts, ref write)) = write_res { + if commit_ts == flashback_commit_ts && write.start_ts == flashback_start_ts { return Ok(true); } } + error!( + "check flashback commit exception: write record mismatched"; + "key_to_commit" => log_wrappers::Value::key(key_to_commit.as_encoded()), + "flashback_start_ts" => flashback_start_ts, + "flashback_commit_ts" => flashback_commit_ts, + "write" => ?write_res, + ); } } Err(txn::Error::from_mvcc(mvcc::ErrorInner::TxnLockNotFound { @@ -263,9 +271,15 @@ pub fn get_first_user_key( reader: &mut MvccReader, start_key: &Key, end_key: Option<&Key>, + flashback_version: TimeStamp, ) -> TxnResult> { - let (mut keys_result, _) = - reader.scan_latest_user_keys(Some(start_key), end_key, |_, _| true, 1)?; + let (mut keys_result, _) = reader.scan_latest_user_keys( + Some(start_key), + end_key, + // Make sure we will get the same first user key each time. + |_, latest_commit_ts| latest_commit_ts > flashback_version, + 1, + )?; Ok(keys_result.pop()) } @@ -326,6 +340,7 @@ pub mod tests { &mut reader, &Key::from_raw(key), Some(Key::from_raw(b"z")).as_ref(), + version, ) .unwrap() { @@ -375,10 +390,11 @@ pub mod tests { fn must_commit_flashback_key( engine: &mut E, key: &[u8], + version: impl Into, start_ts: impl Into, commit_ts: impl Into, ) -> usize { - let (start_ts, commit_ts) = (start_ts.into(), commit_ts.into()); + let (version, start_ts, commit_ts) = (version.into(), start_ts.into(), commit_ts.into()); let cm = ConcurrencyManager::new(TimeStamp::zero()); let mut txn = MvccTxn::new(start_ts, cm); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -388,6 +404,7 @@ pub mod tests { &mut reader, &Key::from_raw(key), Some(Key::from_raw(b"z")).as_ref(), + version, ) .unwrap() .unwrap(); @@ -545,9 +562,11 @@ pub mod tests { let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); let (k, v) = (b"k", [u8::MAX; SHORT_VALUE_MAX_LEN + 1]); - must_prewrite_put(&mut engine, k, &v, k, *ts.incr()); - must_commit(&mut engine, k, ts, *ts.incr()); - must_get(&mut engine, k, ts, &v); + for _ in 0..2 { + must_prewrite_put(&mut engine, k, &v, k, *ts.incr()); + must_commit(&mut engine, k, ts, *ts.incr()); + must_get(&mut engine, k, ts, &v); + } let flashback_start_ts = *ts.incr(); // Rollback nothing. @@ -579,30 +598,23 @@ pub mod tests { fn test_prewrite_with_special_key() { let mut engine = TestEngineBuilder::new().build().unwrap(); let mut ts = TimeStamp::zero(); - let (prewrite_key, prewrite_val) = (b"b", b"val"); - must_prewrite_put( - &mut engine, - prewrite_key, - prewrite_val, - prewrite_key, - *ts.incr(), - ); - must_commit(&mut engine, prewrite_key, ts, *ts.incr()); - must_get(&mut engine, prewrite_key, ts, prewrite_val); - let (k, v1, v2) = (b"c", b"v1", b"v2"); - must_prewrite_put(&mut engine, k, v1, k, *ts.incr()); - must_commit(&mut engine, k, ts, *ts.incr()); - must_prewrite_put(&mut engine, k, v2, k, *ts.incr()); - must_commit(&mut engine, k, ts, *ts.incr()); - must_get(&mut engine, k, ts, v2); + let (prewrite_key, k, v) = (b"b", b"c", b"val"); + for k in [prewrite_key, k] { + let (start_ts, commit_ts) = (*ts.incr(), *ts.incr()); + must_prewrite_put(&mut engine, k, v, k, start_ts); + must_commit(&mut engine, k, start_ts, commit_ts); + must_get(&mut engine, k, commit_ts, v); + } // Check for prewrite key b"b". let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); + let flashback_version = TimeStamp::zero(); let first_key = get_first_user_key( &mut reader, &Key::from_raw(b""), Some(Key::from_raw(b"z")).as_ref(), + flashback_version, ) .unwrap_or_else(|_| Some(Key::from_raw(b""))) .unwrap(); @@ -615,7 +627,12 @@ pub mod tests { assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 0); // Prewrite "prewrite_key" not "start_key". assert_eq!( - must_prewrite_flashback_key(&mut engine, start_key, 4, flashback_start_ts), + must_prewrite_flashback_key( + &mut engine, + start_key, + flashback_version, + flashback_start_ts + ), 1 ); // Flashback (b"c", v2) to (b"c", v1). @@ -623,7 +640,7 @@ pub mod tests { must_flashback_write_to_version( &mut engine, k, - 4, + flashback_version, flashback_start_ts, flashback_commit_ts ), @@ -634,14 +651,14 @@ pub mod tests { must_commit_flashback_key( &mut engine, start_key, + flashback_version, flashback_start_ts, flashback_commit_ts ), 2 ); - must_get(&mut engine, k, ts, v1); - must_get(&mut engine, prewrite_key, ts, prewrite_val); - + must_get_none(&mut engine, prewrite_key, ts); + must_get_none(&mut engine, k, ts); // case 2: start key is after all keys, prewrite will return None. let start_key = b"d"; let flashback_start_ts = *ts.incr(); @@ -649,12 +666,22 @@ pub mod tests { assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 0); // Prewrite null. assert_eq!( - must_prewrite_flashback_key(&mut engine, start_key, 4, flashback_start_ts), + must_prewrite_flashback_key( + &mut engine, + start_key, + flashback_version, + flashback_start_ts + ), 0 ); - // case 3: for last region, end_key will be None, prewrite key will valid. - let first_key = get_first_user_key(&mut reader, &Key::from_raw(b"a"), None) - .unwrap_or_else(|_| Some(Key::from_raw(b""))); - assert_eq!(first_key, Some(Key::from_raw(prewrite_key))); + must_get_none(&mut engine, prewrite_key, ts); + must_get_none(&mut engine, k, ts); + // case 3: for last region, end_key will be None, prewrite key will be valid. + assert_eq!( + get_first_user_key(&mut reader, &Key::from_raw(b"a"), None, flashback_version) + .unwrap() + .unwrap(), + Key::from_raw(prewrite_key) + ); } } diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index d885c974db4..769171d46e0 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -1,5 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::ops::Bound; + // #[PerformanceCriticalPath] use txn_types::{Key, Lock, TimeStamp}; @@ -109,20 +111,25 @@ impl CommandExt for FlashbackToVersionReadPhase { /// - Scan all locks. /// - Rollback all these locks. /// 2. [PrepareFlashback] Prewrite phase: -/// - Prewrite the `self.start_key` specifically to prevent the -/// `resolved_ts` from advancing. +/// - Prewrite the first user key after `self.start_key` specifically to +/// prevent the `resolved_ts` from advancing. /// 3. [FinishFlashback] FlashbackWrite phase: /// - Scan all the latest writes and their corresponding values at /// `self.version`. /// - Write the old MVCC version writes again for all these keys with -/// `self.commit_ts` excluding the `self.start_key`. +/// `self.commit_ts` excluding the first user key after `self.start_key`. /// 4. [FinishFlashback] Commit phase: -/// - Commit the `self.start_key` we write at the second phase to finish the -/// flashback. +/// - Commit the first user key after `self.start_key` we write at the +/// second phase to finish the flashback. impl ReadCommand for FlashbackToVersionReadPhase { fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result { let tag = self.tag().get_str(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); + // Filter out the SST that does not have a newer version than `self.version` in + // `CF_WRITE`, i.e, whose latest `commit_ts` <= `self.version` in the later + // scan. By doing this, we can only flashback those keys that have version + // changed since `self.version` as much as possible. + reader.set_hint_min_ts(Some(Bound::Excluded(self.version))); let mut start_key = self.start_key.clone(); let next_state = match self.state { FlashbackToVersionState::RollbackLock { next_lock_key, .. } => { @@ -141,9 +148,12 @@ impl ReadCommand for FlashbackToVersionReadPhase { // completion of the 2pc. // - To make sure the key locked in the latch is the same as the actual key // written, we pass it to the key in `process_write' after getting it. - let key_to_lock = if let Some(first_key) = - get_first_user_key(&mut reader, &self.start_key, self.end_key.as_ref())? - { + let key_to_lock = if let Some(first_key) = get_first_user_key( + &mut reader, + &self.start_key, + self.end_key.as_ref(), + self.version, + )? { first_key } else { // If the key is None return directly @@ -180,9 +190,12 @@ impl ReadCommand for FlashbackToVersionReadPhase { // 2pc. So When overwriting the write, we skip the immediate // write of this key and instead put it after the completion // of the 2pc. - next_write_key = if let Some(first_key) = - get_first_user_key(&mut reader, &self.start_key, self.end_key.as_ref())? - { + next_write_key = if let Some(first_key) = get_first_user_key( + &mut reader, + &self.start_key, + self.end_key.as_ref(), + self.version, + )? { first_key } else { // If the key is None return directly diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index afc2a658081..7d0ec219534 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -359,30 +359,62 @@ fn test_flashback_for_apply_snapshot() { must_check_flashback_state(&mut cluster, 1, 1, false); must_check_flashback_state(&mut cluster, 1, 3, false); - // Make store 3 isolated. cluster.add_send_filter(IsolationFilterFactory::new(3)); - // Write some data to trigger snapshot. - for i in 100..110 { - let key = format!("k{}", i); - let value = format!("v{}", i); - cluster.must_put_cf("write", key.as_bytes(), value.as_bytes()); + let mut region = cluster.get_region(TEST_KEY); + for _ in 0..10 { + must_request_without_flashback_flag( + &mut cluster, + &mut region.clone(), + new_put_cf_cmd("write", TEST_KEY, TEST_VALUE), + ) } - // Prepare for flashback cluster.must_send_wait_flashback_msg(1, AdminCmdType::PrepareFlashback); must_check_flashback_state(&mut cluster, 1, 1, true); must_check_flashback_state(&mut cluster, 1, 3, false); - // Add store 3 back. cluster.clear_send_filters(); must_check_flashback_state(&mut cluster, 1, 1, true); must_check_flashback_state(&mut cluster, 1, 3, true); + cluster.must_send_wait_flashback_msg(1, AdminCmdType::FinishFlashback); + must_check_flashback_state(&mut cluster, 1, 1, false); + must_check_flashback_state(&mut cluster, 1, 3, false); + // Prepare for flashback + cluster.must_send_wait_flashback_msg(1, AdminCmdType::PrepareFlashback); + must_check_flashback_state(&mut cluster, 1, 1, true); + must_check_flashback_state(&mut cluster, 1, 3, true); + // Make store 3 isolated. + cluster.add_send_filter(IsolationFilterFactory::new(3)); + // Write some flashback data to trigger snapshot. + for _ in 0..10 { + must_request_with_flashback_flag( + &mut cluster, + &mut region.clone(), + new_put_cf_cmd("write", TEST_KEY, TEST_VALUE), + ) + } + // Finish flashback. cluster.must_send_wait_flashback_msg(1, AdminCmdType::FinishFlashback); must_check_flashback_state(&mut cluster, 1, 1, false); + must_check_flashback_state(&mut cluster, 1, 3, true); + // Wait for a while before adding store 3 back to make sure only it does not + // receive the `FinishFlashback` message. + sleep(Duration::from_secs(1)); + // Add store 3 back. + cluster.clear_send_filters(); + must_check_flashback_state(&mut cluster, 1, 1, false); must_check_flashback_state(&mut cluster, 1, 3, false); + // Make store 3 become leader. + cluster.must_transfer_leader(region.get_id(), new_peer(3, 3)); + // Region should not in the flashback state. + must_request_without_flashback_flag( + &mut cluster, + &mut region, + new_put_cmd(TEST_KEY, TEST_VALUE), + ); } fn must_check_flashback_state( @@ -438,7 +470,7 @@ fn must_request_with_flashback_flag( req: Request, ) { let resp = request(cluster, region, req, true); - assert!(!resp.get_header().has_error()); + assert!(!resp.get_header().has_error(), "{:?}", resp); } fn must_get_flashback_not_prepared_error( @@ -457,7 +489,7 @@ fn must_request_without_flashback_flag( req: Request, ) { let resp = request(cluster, region, req, false); - assert!(!resp.get_header().has_error()); + assert!(!resp.get_header().has_error(), "{:?}", resp); } fn must_get_flashback_in_progress_error( diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 3dec0b57798..5c536fce124 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -782,7 +782,32 @@ fn test_mvcc_flashback_unprepared() { req.set_end_key(b"z".to_vec()); let resp = client.kv_flashback_to_version(&req).unwrap(); assert!(resp.get_error().contains("txn lock not found")); - must_kv_read_equal(&client, ctx, k, v, 6); + must_kv_read_equal(&client, ctx.clone(), k.clone(), v, 6); + // Flashback with preparing. + must_flashback_to_version(&client, ctx.clone(), 0, 6, 7); + let mut get_req = GetRequest::default(); + get_req.set_context(ctx.clone()); + get_req.key = k; + get_req.version = 7; + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(!get_resp.has_region_error()); + assert!(!get_resp.has_error()); + assert_eq!(get_resp.value, b"".to_vec()); + // Mock the flashback retry. + let mut req = FlashbackToVersionRequest::default(); + req.set_context(ctx); + req.set_start_ts(6); + req.set_commit_ts(7); + req.version = 0; + req.start_key = b"a".to_vec(); + req.end_key = b"z".to_vec(); + let resp = client.kv_flashback_to_version(&req).unwrap(); + assert!(!resp.has_region_error()); + assert!(resp.get_error().is_empty()); + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(!get_resp.has_region_error()); + assert!(!get_resp.has_error()); + assert_eq!(get_resp.value, b"".to_vec()); } #[test] From 909787e828098d68ae86a70df3175cbbbacd2796 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 7 Dec 2022 13:18:04 +0800 Subject: [PATCH 0389/1149] raft_engine: remove redundant methods (#13900) ref tikv/tikv#12842 These methods are defined in log batch already, redefine them again introduce unnecessary maintenance. And these methods are also confusing as they don't sync by default, which is very easy to make mistake. Signed-off-by: Jay Lee --- components/engine_panic/src/raft_engine.rs | 20 ++------- components/engine_rocks/src/raft_engine.rs | 27 +++--------- components/engine_traits/src/raft_engine.rs | 21 +++------- components/raft_log_engine/src/engine.rs | 36 +++------------- components/raftstore-v2/src/bootstrap.rs | 5 ++- .../raftstore/src/store/peer_storage.rs | 41 +++++++++++-------- components/raftstore/src/store/snap.rs | 9 ++-- components/server/src/raft_engine_switch.rs | 7 +++- src/server/debug.rs | 9 ++-- tests/integrations/server/kv_service.rs | 12 ++++-- 10 files changed, 74 insertions(+), 113 deletions(-) diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index ad05e66c6fa..603eb118c5c 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -108,14 +108,6 @@ impl RaftEngine for PanicEngine { panic!() } - fn append(&self, raft_group_id: u64, entries: Vec) -> Result { - panic!() - } - - fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { - panic!() - } - fn gc(&self, raft_group_id: u64, mut from: u64, to: u64) -> Result { panic!() } @@ -148,10 +140,6 @@ impl RaftEngine for PanicEngine { panic!() } - fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { - panic!() - } - fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> where F: FnMut(u64) -> std::result::Result<(), E>, @@ -159,10 +147,6 @@ impl RaftEngine for PanicEngine { { panic!() } - - fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()> { - panic!() - } } impl RaftLogBatch for PanicWriteBatch { @@ -209,4 +193,8 @@ impl RaftLogBatch for PanicWriteBatch { fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { panic!() } + + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { + panic!() + } } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index da15b1708b8..79cd8350519 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -3,8 +3,8 @@ // #[PerformanceCriticalPath] use engine_traits::{ Error, Iterable, KvEngine, MiscExt, Mutable, Peekable, RaftEngine, RaftEngineDebug, - RaftEngineReadOnly, RaftLogBatch, RaftLogGcTask, Result, SyncMutable, WriteBatch, - WriteBatchExt, WriteOptions, CF_DEFAULT, RAFT_LOG_MULTI_GET_CNT, + RaftEngineReadOnly, RaftLogBatch, RaftLogGcTask, Result, WriteBatch, WriteBatchExt, + WriteOptions, CF_DEFAULT, RAFT_LOG_MULTI_GET_CNT, }; use kvproto::{ metapb::Region, @@ -286,17 +286,6 @@ impl RaftEngine for RocksEngine { Ok(()) } - fn append(&self, raft_group_id: u64, entries: Vec) -> Result { - let mut wb = self.write_batch(); - let buf = Vec::with_capacity(1024); - wb.append_impl(raft_group_id, &entries, buf)?; - self.consume(&mut wb, false) - } - - fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { - self.put_msg(&keys::raft_state_key(raft_group_id), state) - } - fn batch_gc(&self, groups: Vec) -> Result { let mut total = 0; let mut raft_wb = self.write_batch_with_cap(4 * 1024); @@ -343,10 +332,6 @@ impl RaftEngine for RocksEngine { self.as_inner().path() } - fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { - self.put_msg(keys::STORE_IDENT_KEY, ident) - } - fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> where F: FnMut(u64) -> std::result::Result<(), E>, @@ -374,10 +359,6 @@ impl RaftEngine for RocksEngine { Some(e) => Err(e), } } - - fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()> { - self.put_msg(keys::RECOVER_STATE_KEY, state) - } } impl RaftLogBatch for RocksWriteBatchVec { @@ -431,6 +412,10 @@ impl RaftLogBatch for RocksWriteBatchVec { fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { self.put_msg(&keys::apply_state_key(raft_group_id), state) } + + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { + self.put_msg(keys::RECOVER_STATE_KEY, state) + } } impl RocksWriteBatchVec { diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 7df681c96d5..0c5e0f49854 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -98,15 +98,6 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send batch: &mut Self::LogBatch, ) -> Result<()>; - /// Append some log entries and return written bytes. - /// - /// Note: `RaftLocalState` won't be updated in this call. - fn append(&self, raft_group_id: u64, entries: Vec) -> Result; - - fn put_store_ident(&self, ident: &StoreIdent) -> Result<()>; - - fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()>; - /// Like `cut_logs` but the range could be very large. Return the deleted /// count. Generally, `from` can be passed in `0`. fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result; @@ -151,12 +142,6 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send where F: FnMut(u64) -> std::result::Result<(), E>, E: From; - - /// Indicate whether region states should be recovered from raftdb and - /// replay raft logs. - /// When kvdb's write-ahead-log is disabled, the sequence number of the last - /// boot time is saved. - fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()>; } pub trait RaftLogBatch: Send { @@ -175,6 +160,12 @@ pub trait RaftLogBatch: Send { fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()>; fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()>; + /// Indicate whether region states should be recovered from raftdb and + /// replay raft logs. + /// When kvdb's write-ahead-log is disabled, the sequence number of the last + /// boot time is saved. + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()>; + /// The data size of this RaftLogBatch. fn persist_size(&self) -> usize; diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index c952f18dbc4..587f31bae93 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -412,6 +412,12 @@ impl RaftLogBatchTrait for RaftLogBatch { .put_message(raft_group_id, APPLY_STATE_KEY.to_vec(), state) .map_err(transfer_error) } + + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { + self.0 + .put_message(STORE_STATE_ID, RECOVER_STATE_KEY.to_vec(), state) + .map_err(transfer_error) + } } impl RaftEngineReadOnly for RaftLogEngine { @@ -538,26 +544,6 @@ impl RaftEngine for RaftLogEngine { Ok(()) } - fn append(&self, raft_group_id: u64, entries: Vec) -> Result { - let mut batch = Self::LogBatch::default(); - batch.append(raft_group_id, entries)?; - self.consume(&mut batch, false) - } - - fn put_store_ident(&self, ident: &StoreIdent) -> Result<()> { - let mut batch = Self::LogBatch::default(); - batch.put_store_ident(ident)?; - self.consume(&mut batch, true)?; - Ok(()) - } - - fn put_raft_state(&self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { - let mut batch = Self::LogBatch::default(); - batch.put_raft_state(raft_group_id, state)?; - self.consume(&mut batch, false)?; - Ok(()) - } - fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result { self.batch_gc(vec![RaftLogGcTask { raft_group_id, @@ -626,16 +612,6 @@ impl RaftEngine for RaftLogEngine { } Ok(()) } - - fn put_recover_state(&self, state: &StoreRecoverState) -> Result<()> { - let mut batch = Self::LogBatch::default(); - batch - .0 - .put_message(STORE_STATE_ID, RECOVER_STATE_KEY.to_vec(), state) - .map_err(transfer_error)?; - self.0.write(&mut batch.0, true).map_err(transfer_error)?; - Ok(()) - } } fn transfer_error(e: RaftEngineError) -> engine_traits::Error { diff --git a/components/raftstore-v2/src/bootstrap.rs b/components/raftstore-v2/src/bootstrap.rs index 6700db4d45f..b505b37a75b 100644 --- a/components/raftstore-v2/src/bootstrap.rs +++ b/components/raftstore-v2/src/bootstrap.rs @@ -97,8 +97,9 @@ impl<'a, ER: RaftEngine> Bootstrap<'a, ER> { let mut ident = StoreIdent::default(); ident.set_cluster_id(self.cluster_id); ident.set_store_id(id); - self.engine.put_store_ident(&ident)?; - self.engine.sync()?; + let mut lb = self.engine.log_batch(1); + lb.put_store_ident(&ident)?; + self.engine.consume(&mut lb, true)?; fail_point!("node_after_bootstrap_store", |_| Err(box_err!( "injected error: node_after_bootstrap_store" ))); diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 0d10b1f36cf..ce25544bcd8 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -183,7 +183,9 @@ fn init_raft_state( raft_state.last_index = RAFT_INIT_LOG_INDEX; raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); raft_state.mut_hard_state().set_commit(RAFT_INIT_LOG_INDEX); - engines.raft.put_raft_state(region.get_id(), &raft_state)?; + let mut lb = engines.raft.log_batch(0); + lb.put_raft_state(region.get_id(), &raft_state)?; + engines.raft.consume(&mut lb, true)?; } Ok(raft_state) } @@ -2077,32 +2079,35 @@ pub mod tests { let initial_state = s.initial_state().unwrap(); assert_eq!(initial_state.hard_state, *raft_state.get_hard_state()); + let mut lb = engines.raft.log_batch(4096); // last_index < commit_index is invalid. raft_state.set_last_index(11); - engines - .raft - .append(1, vec![new_entry(11, RAFT_INIT_LOG_TERM)]) + lb.append(1, vec![new_entry(11, RAFT_INIT_LOG_TERM)]) .unwrap(); raft_state.mut_hard_state().set_commit(12); - engines.raft.put_raft_state(1, &raft_state).unwrap(); + lb.put_raft_state(1, &raft_state).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); assert!(build_storage().is_err()); raft_state.set_last_index(20); let entries = (12..=20) .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); - engines.raft.append(1, entries).unwrap(); - engines.raft.put_raft_state(1, &raft_state).unwrap(); + lb.append(1, entries).unwrap(); + lb.put_raft_state(1, &raft_state).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); s = build_storage().unwrap(); let initial_state = s.initial_state().unwrap(); assert_eq!(initial_state.hard_state, *raft_state.get_hard_state()); // Missing last log is invalid. raft_state.set_last_index(21); - engines.raft.put_raft_state(1, &raft_state).unwrap(); + lb.put_raft_state(1, &raft_state).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); assert!(build_storage().is_err()); raft_state.set_last_index(20); - engines.raft.put_raft_state(1, &raft_state).unwrap(); + lb.put_raft_state(1, &raft_state).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); // applied_index > commit_index is invalid. let mut apply_state = RaftApplyState::default(); @@ -2132,7 +2137,8 @@ pub mod tests { .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); engines.raft.gc(1, 0, 21).unwrap(); - engines.raft.append(1, entries).unwrap(); + lb.append(1, entries).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); raft_state.mut_hard_state().set_commit(14); s = build_storage().unwrap(); let initial_state = s.initial_state().unwrap(); @@ -2143,27 +2149,28 @@ pub mod tests { .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); entries[0].set_term(RAFT_INIT_LOG_TERM - 1); - engines.raft.append(1, entries).unwrap(); + lb.append(1, entries).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); assert!(build_storage().is_err()); // hard state term miss match is invalid. let entries = (14..=20) .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); - engines.raft.append(1, entries).unwrap(); + lb.append(1, entries).unwrap(); raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM - 1); - engines.raft.put_raft_state(1, &raft_state).unwrap(); + lb.put_raft_state(1, &raft_state).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); assert!(build_storage().is_err()); // last index < recorded_commit_index is invalid. engines.raft.gc(1, 0, 21).unwrap(); raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); raft_state.set_last_index(13); - engines - .raft - .append(1, vec![new_entry(13, RAFT_INIT_LOG_TERM)]) + lb.append(1, vec![new_entry(13, RAFT_INIT_LOG_TERM)]) .unwrap(); - engines.raft.put_raft_state(1, &raft_state).unwrap(); + lb.put_raft_state(1, &raft_state).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); assert!(build_storage().is_err()); } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 19b9622657d..8cb44e3718c 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2006,8 +2006,9 @@ pub mod tests { raft::RaftTestEngine, }; use engine_traits::{ - Engines, ExternalSstFileInfo, KvEngine, RaftEngine, Snapshot as EngineSnapshot, SstExt, - SstWriter, SstWriterBuilder, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + Engines, ExternalSstFileInfo, KvEngine, RaftEngine, RaftLogBatch, + Snapshot as EngineSnapshot, SstExt, SstWriter, SstWriterBuilder, SyncMutable, ALL_CFS, + CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ encryptionpb::EncryptionMethod, @@ -2114,6 +2115,7 @@ pub mod tests { let kv: KvTestEngine = open_test_db(p.join("kv").as_path(), kv_db_opt, kv_cf_opts)?; let raft: RaftTestEngine = engine_test::raft::new_engine(p.join("raft").to_str().unwrap(), raft_db_opt)?; + let mut lb = raft.log_batch(regions.len() * 128); for ®ion_id in regions { // Put apply state into kv engine. let mut apply_state = RaftApplyState::default(); @@ -2123,7 +2125,7 @@ pub mod tests { apply_entry.set_term(0); apply_state.mut_truncated_state().set_index(10); kv.put_msg_cf(CF_RAFT, &keys::apply_state_key(region_id), &apply_state)?; - raft.append(region_id, vec![apply_entry])?; + lb.append(region_id, vec![apply_entry])?; // Put region info into kv engine. let region = gen_test_region(region_id, 1, 1); @@ -2131,6 +2133,7 @@ pub mod tests { region_state.set_region(region); kv.put_msg_cf(CF_RAFT, &keys::region_state_key(region_id), ®ion_state)?; } + raft.consume(&mut lb, false).unwrap(); Ok(Engines::new(kv, raft)) } diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index 29144c8ca18..637088efa88 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -193,11 +193,11 @@ fn run_dump_raft_engine_worker( new_engine: &RocksEngine, count_size: &Arc, ) { + let mut batch = new_engine.log_batch(0); while let Ok(id) = rx.recv() { let state = old_engine.get_raft_state(id).unwrap().unwrap(); - new_engine.put_raft_state(id, &state).unwrap(); + batch.put_raft_state(id, &state).unwrap(); if let Some(last_index) = old_engine.last_index(id) { - let mut batch = new_engine.log_batch(0); let mut begin = old_engine.first_index(id).unwrap(); while begin <= last_index { let end = std::cmp::min(begin + 1024, last_index + 1); @@ -210,6 +210,9 @@ fn run_dump_raft_engine_worker( count_size.fetch_add(size, Ordering::Relaxed); } } + if !batch.is_empty() { + new_engine.consume(&mut batch, false).unwrap(); + } } } diff --git a/src/server/debug.rs b/src/server/debug.rs index 48435f72163..666e2ca33e7 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -16,8 +16,8 @@ use engine_rocks::{ }; use engine_traits::{ Engines, IterOptions, Iterable, Iterator as EngineIterator, Mutable, MvccProperties, Peekable, - RaftEngine, Range, RangePropertiesExt, SyncMutable, WriteBatch, WriteBatchExt, WriteOptions, - CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + RaftEngine, RaftLogBatch, Range, RangePropertiesExt, SyncMutable, WriteBatch, WriteBatchExt, + WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ debugpb::{self, Db as DbType}, @@ -735,7 +735,10 @@ impl Debugger { &keys::apply_state_key(region_id), &new_raft_apply_state )); - box_try!(raft.put_raft_state(region_id, &new_raft_local_state)); + let mut lb = raft.log_batch(0); + box_try!(lb.put_raft_state(region_id, &new_raft_local_state)); + // Will sync later. + box_try!(raft.consume(&mut lb, false)); let deleted_logs = box_try!(raft.gc(region_id, applied_index + 1, last_index + 1)); raft.sync().unwrap(); kv.sync().unwrap(); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 5c536fce124..496c587a7b9 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -11,8 +11,8 @@ use std::{ use api_version::{ApiV1, ApiV1Ttl, ApiV2, KvFormat}; use concurrency_manager::ConcurrencyManager; use engine_traits::{ - MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, SyncMutable, CF_DEFAULT, CF_LOCK, CF_RAFT, - CF_WRITE, + MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, SyncMutable, CF_DEFAULT, + CF_LOCK, CF_RAFT, CF_WRITE, }; use futures::{executor::block_on, future, SinkExt, StreamExt, TryStreamExt}; use grpcio::*; @@ -965,7 +965,9 @@ fn test_debug_raft_log() { entry.set_index(log_index); entry.set_entry_type(eraftpb::EntryType::EntryNormal); entry.set_data(vec![42].into()); - engine.append(region_id, vec![entry.clone()]).unwrap(); + let mut lb = engine.log_batch(0); + lb.append(region_id, vec![entry.clone()]).unwrap(); + engine.consume(&mut lb, false).unwrap(); assert_eq!( engine.get_entry(region_id, log_index).unwrap().unwrap(), entry @@ -999,7 +1001,9 @@ fn test_debug_region_info() { let region_id = 100; let mut raft_state = raft_serverpb::RaftLocalState::default(); raft_state.set_last_index(42); - raft_engine.put_raft_state(region_id, &raft_state).unwrap(); + let mut lb = raft_engine.log_batch(0); + lb.put_raft_state(region_id, &raft_state).unwrap(); + raft_engine.consume(&mut lb, false).unwrap(); assert_eq!( raft_engine.get_raft_state(region_id).unwrap().unwrap(), raft_state From 044aa15c6bf1474d5c38d2c45311acc8cffe5b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Wed, 7 Dec 2022 06:30:04 +0100 Subject: [PATCH 0390/1149] *: Update sysinfo dependency (#13385) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/tikv#11276 Signed-off-by: Daniël van Eeden Co-authored-by: Ti Chi Robot --- Cargo.lock | 24 ++++-------- Cargo.toml | 2 +- components/tikv_util/Cargo.toml | 2 +- components/tikv_util/src/sys/mod.rs | 4 +- src/server/service/diagnostics/mod.rs | 2 +- src/server/service/diagnostics/sys.rs | 55 +++++++++++++-------------- 6 files changed, 39 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 063657d29bb..eb5145959af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1443,12 +1443,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "doc-comment" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" - [[package]] name = "dyn-clone" version = "1.0.4" @@ -3341,9 +3335,9 @@ dependencies = [ [[package]] name = "ntapi" -version = "0.3.3" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26e041cd983acbc087e30fcba770380cfa352d0e392e175b2344ebaf7ea0602" +checksum = "bc51db7b362b205941f71232e56c625156eb9a929f8cf74a428fd5bc094a4afc" dependencies = [ "winapi 0.3.9", ] @@ -4511,9 +4505,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.5.0" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" dependencies = [ "autocfg", "crossbeam-deque", @@ -4523,14 +4517,13 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.9.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils 0.8.8", - "lazy_static", "num_cpus", ] @@ -5651,13 +5644,12 @@ checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" [[package]] name = "sysinfo" -version = "0.16.4" +version = "0.26.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c280c91abd1aed2e36be1bc8f56fbc7a2acbb2b58fbcac9641510179cc72dd9" +checksum = "ade661fa5e048ada64ad7901713301c21d2dbc5b65ee7967de8826c111452960" dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", - "doc-comment", "libc 0.2.132", "ntapi", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 104157fdf24..61d6da6946d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -153,7 +153,7 @@ smallvec = "1.4" sst_importer = { workspace = true } strum = { version = "0.20", features = ["derive"] } sync_wrapper = "0.1.1" -sysinfo = "0.16" +sysinfo = "0.26" tempfile = "3.0" thiserror = "1.0" tidb_query_aggr = { workspace = true } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 12c3983ef2d..663eb2b681f 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -50,7 +50,7 @@ slog-async = "2.3" slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } slog-json = "2.3" slog-term = "2.4" -sysinfo = "0.16" +sysinfo = "0.26" thiserror = "1.0" tikv_alloc = { workspace = true } time = "0.1" diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index 8b5e846592f..35d417db650 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -19,7 +19,7 @@ use lazy_static::lazy_static; #[cfg(target_os = "linux")] use mnt::get_mount; use sysinfo::RefreshKind; -pub use sysinfo::{DiskExt, NetworkExt, ProcessExt, ProcessorExt, SystemExt}; +pub use sysinfo::{CpuExt, DiskExt, NetworkExt, ProcessExt, SystemExt}; use crate::config::{ReadableSize, KIB}; @@ -92,7 +92,7 @@ impl SysQuota { fn sysinfo_memory_limit_in_bytes() -> u64 { let system = sysinfo::System::new_with_specifics(RefreshKind::new().with_memory()); - system.get_total_memory() * KIB + system.total_memory() * KIB } } diff --git a/src/server/service/diagnostics/mod.rs b/src/server/service/diagnostics/mod.rs index abede000858..354108e6ab9 100644 --- a/src/server/service/diagnostics/mod.rs +++ b/src/server/service/diagnostics/mod.rs @@ -119,7 +119,7 @@ impl Diagnostics for Service { let load = ( sys::cpu_time_snapshot(), system - .get_networks() + .networks() .into_iter() .map(|(n, d)| (n.to_owned(), sys::NicSnapshot::from_network_data(d))) .collect(), diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 17ed9a78b3f..6e9585ab2c9 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -26,12 +26,12 @@ pub struct NicSnapshot { impl NicSnapshot { pub fn from_network_data(data: &impl NetworkExt) -> NicSnapshot { NicSnapshot { - rx_bytes: data.get_total_received(), - tx_bytes: data.get_total_transmitted(), - rx_packets: data.get_total_packets_received(), - tx_packets: data.get_total_packets_transmitted(), - rx_errors: data.get_total_errors_on_received(), - tx_errors: data.get_total_errors_on_transmitted(), + rx_bytes: data.total_received(), + tx_bytes: data.total_transmitted(), + rx_packets: data.total_packets_received(), + tx_packets: data.total_packets_transmitted(), + rx_errors: data.total_errors_on_received(), + tx_errors: data.total_errors_on_transmitted(), } } @@ -62,7 +62,7 @@ fn cpu_load_info(prev_cpu: CpuTimeSnapshot, collector: &mut Vec) let infos = { let mut system = SYS_INFO.lock().unwrap(); system.refresh_system(); - let load = system.get_load_average(); + let load = system.load_average(); vec![ ("load1", load.one), ("load5", load.five), @@ -129,12 +129,12 @@ fn cpu_load_info(prev_cpu: CpuTimeSnapshot, collector: &mut Vec) fn mem_load_info(collector: &mut Vec) { let mut system = SYS_INFO.lock().unwrap(); system.refresh_memory(); - let total_memory = system.get_total_memory() * KIB; - let used_memory = system.get_used_memory() * KIB; - let free_memory = system.get_free_memory() * KIB; - let total_swap = system.get_total_swap() * KIB; - let used_swap = system.get_used_swap() * KIB; - let free_swap = system.get_free_swap() * KIB; + let total_memory = system.total_memory() * KIB; + let used_memory = system.used_memory() * KIB; + let free_memory = system.free_memory() * KIB; + let total_swap = system.total_swap() * KIB; + let used_swap = system.used_swap() * KIB; + let free_swap = system.free_swap() * KIB; drop(system); let used_memory_pct = (used_memory as f64) / (total_memory as f64); let free_memory_pct = (free_memory as f64) / (total_memory as f64); @@ -182,7 +182,7 @@ fn nic_load_info(prev_nic: HashMap, collector: &mut Vec) { let mut system = SYS_INFO.lock().unwrap(); system.refresh_cpu(); - let processor = match system.get_processors().iter().next() { + let processor = match system.cpus().iter().next() { Some(p) => p, None => return, }; let mut infos = vec![ ("cpu-logical-cores", SysQuota::cpu_cores_quota().to_string()), ("cpu-physical-cores", num_cpus::get_physical().to_string()), - ("cpu-frequency", format!("{}MHz", processor.get_frequency())), - ("cpu-vendor-id", processor.get_vendor_id().to_string()), + ("cpu-frequency", format!("{}MHz", processor.frequency())), + ("cpu-vendor-id", processor.vendor_id().to_string()), ]; // Depend on Rust lib return CPU arch not matching // Golang lib so need this match loop to conversion @@ -362,26 +362,23 @@ fn disk_hardware_info(collector: &mut Vec) { let mut system = SYS_INFO.lock().unwrap(); system.refresh_disks_list(); system.refresh_disks(); - let disks = system.get_disks(); + let disks = system.disks(); for disk in disks { - let file_sys = std::str::from_utf8(disk.get_file_system()).unwrap_or("unknown"); + let file_sys = std::str::from_utf8(disk.file_system()).unwrap_or("unknown"); if file_sys == "rootfs" { continue; } - let total = disk.get_total_space(); - let free = disk.get_available_space(); + let total = disk.total_space(); + let free = disk.available_space(); let used = total - free; let free_pct = (free as f64) / (total as f64); let used_pct = (used as f64) / (total as f64); let infos = vec![ - ("type", format!("{:?}", disk.get_type())), + ("type", format!("{:?}", disk.type_())), ("fstype", file_sys.to_string()), ( "path", - disk.get_mount_point() - .to_str() - .unwrap_or("unknown") - .to_string(), + disk.mount_point().to_str().unwrap_or("unknown").to_string(), ), ("total", total.to_string()), ("free", free.to_string()), @@ -398,7 +395,7 @@ fn disk_hardware_info(collector: &mut Vec) { } let mut item = ServerInfoItem::default(); item.set_tp("disk".to_string()); - item.set_name(disk.get_name().to_str().unwrap_or("disk").to_string()); + item.set_name(disk.name().to_str().unwrap_or("disk").to_string()); item.set_pairs(pairs.into()); collector.push(item); } @@ -515,7 +512,7 @@ fn get_transparent_hugepage() -> Option { pub fn process_info(collector: &mut Vec) { let mut system = SYS_INFO.lock().unwrap(); system.refresh_processes(); - let processes = system.get_processes(); + let processes = system.processes(); for (pid, p) in processes.iter() { if p.cmd().is_empty() { continue; @@ -555,7 +552,7 @@ mod tests { system.refresh_networks_list(); system.refresh_all(); system - .get_networks() + .networks() .into_iter() .map(|(n, d)| (n.to_owned(), NicSnapshot::from_network_data(d))) .collect() From e9eb8c95f5660dbf979decc4739c27b5c9c55080 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 7 Dec 2022 17:28:04 +0800 Subject: [PATCH 0391/1149] backup: allow to backup during the flashback (#13895) ref tikv/tikv#13787, close pingcap/tidb#39639 - Allow to backup during the flashback by passing the flashback flag. - Allow the checksum request to get the snapshot during the flashback progress. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- components/backup/src/endpoint.rs | 1 + components/raftstore/src/store/fsm/peer.rs | 4 ++- components/tikv_kv/src/lib.rs | 4 +-- components/txn_types/src/types.rs | 4 +-- src/coprocessor/endpoint.rs | 6 +++- src/coprocessor/mod.rs | 4 +++ src/server/raftkv/mod.rs | 4 +-- .../txn/commands/acquire_pessimistic_lock.rs | 2 +- .../txn/commands/flashback_to_version.rs | 2 +- src/storage/txn/commands/prewrite.rs | 2 +- src/storage/txn/scheduler.rs | 2 +- tests/integrations/backup/mod.rs | 30 ++++++++++++++++ .../integrations/raftstore/test_flashback.rs | 35 ++++++++++++++++++- 13 files changed, 87 insertions(+), 13 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index b880da7a3dc..0469ffa30a7 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -330,6 +330,7 @@ impl BackupRange { assert!(!ctx.get_replica_read()); let snap_ctx = SnapContext { pb_ctx: &ctx, + allowed_in_flashback: self.region.is_in_flashback, ..Default::default() }; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 62eadb97076..9460daf812d 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5108,7 +5108,9 @@ where // proposed. Skip the not prepared error because the // `self.region().is_in_flashback` may not be the latest right after applying // the `PrepareFlashback` admin command, we will let it pass here and check in - // the apply phase. + // the apply phase and because a read-only request doesn't need to be applied, + // so it will be allowed during the flashback progress, for example, a snapshot + // request. if let Err(e) = util::check_flashback_state(self.region().is_in_flashback, msg, region_id, true) { diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index f78b2243331..bf277282bd8 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -294,8 +294,8 @@ pub struct SnapContext<'a> { // `key_ranges` is used in replica read. It will send to // the leader via raft "read index" to check memory locks. pub key_ranges: Vec, - // Marks that this read is a FlashbackToVersionReadPhase. - pub for_flashback: bool, + // Marks that this snapshot request is allowed in the flashback state. + pub allowed_in_flashback: bool, } /// Engine defines the common behaviour for a storage engine type. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 6a2c953afc1..60e64bf444a 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -532,8 +532,8 @@ pub struct TxnExtra { // Marks that this transaction is a 1PC transaction. RaftKv should set this flag // in the raft command request. pub one_pc: bool, - // Marks that this transaction is a flashback transaction. - pub for_flashback: bool, + // Marks that this transaction is allowed in the flashback state. + pub allowed_in_flashback: bool, } impl TxnExtra { diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 3274700d812..54fcaeb0489 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -171,7 +171,7 @@ impl Endpoint { let mut input = CodedInputStream::from_bytes(&data); input.set_recursion_limit(self.recursion_limit); - let req_ctx: ReqContext; + let mut req_ctx: ReqContext; let builder: RequestHandlerBuilder; match req.get_tp() { @@ -316,6 +316,9 @@ impl Endpoint { cache_match_version, self.perf_level, ); + // Checksum is allowed during the flashback period to make sure the tool such + // like BR can work. + req_ctx.allowed_in_flashback = true; with_tls_tracker(|tracker| { tracker.req_info.request_type = RequestType::CoprocessorChecksum; tracker.req_info.start_ts = start_ts; @@ -358,6 +361,7 @@ impl Endpoint { let mut snap_ctx = SnapContext { pb_ctx: &ctx.context, start_ts: Some(ctx.txn_start_ts), + allowed_in_flashback: ctx.allowed_in_flashback, ..Default::default() }; // need to pass start_ts and ranges to check memory locks for replica read diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 8acd5325a1e..140d3c0476e 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -142,6 +142,9 @@ pub struct ReqContext { /// Perf level pub perf_level: PerfLevel, + + /// Whether the request is allowed in the flashback state. + pub allowed_in_flashback: bool, } impl ReqContext { @@ -181,6 +184,7 @@ impl ReqContext { lower_bound, upper_bound, perf_level, + allowed_in_flashback: false, } } diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 6c7169d043c..b12e56ee7a0 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -453,7 +453,7 @@ where if txn_extra.one_pc { flags |= WriteBatchFlags::ONE_PC.bits(); } - if txn_extra.for_flashback { + if txn_extra.allowed_in_flashback { flags |= WriteBatchFlags::FLASHBACK.bits(); } header.set_flags(flags); @@ -555,7 +555,7 @@ where flags |= WriteBatchFlags::STALE_READ.bits(); header.set_flag_data(data.into()); } - if ctx.for_flashback { + if ctx.allowed_in_flashback { flags |= WriteBatchFlags::FLASHBACK.bits(); } header.set_flags(flags); diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 6bd147cf02e..2afdadaad80 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -191,7 +191,7 @@ pub(super) fn make_write_data(modifies: Vec, old_values: OldValues) -> W old_values, // One pc status is unknown in AcquirePessimisticLock stage. one_pc: false, - for_flashback: false, + allowed_in_flashback: false, }; WriteData::new(modifies, extra) } else { diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 3999042fe27..72b100f567b 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -118,7 +118,7 @@ impl WriteCommand for FlashbackToVersion { let rows = txn.modifies.len(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); // To let the flashback modification could be proposed and applied successfully. - write_data.extra.for_flashback = true; + write_data.extra.allowed_in_flashback = true; // To let the CDC treat the flashback modification as an 1PC transaction. if matches!(self.state, FlashbackToVersionState::FlashbackWrite { .. }) { write_data.extra.one_pc = true; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index cd24f54d13b..b34c4eb752b 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -672,7 +672,7 @@ impl Prewriter { old_values: self.old_values, // Set one_pc flag in TxnExtra to let CDC skip handling the resolver. one_pc: self.try_one_pc, - for_flashback: false, + allowed_in_flashback: false, }; // Here the lock guards are taken and will be released after the write finishes. // If an error (KeyIsLocked or WriteConflict) occurs before, these lock guards diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index bfbb860e545..13a74895803 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -702,7 +702,7 @@ impl Scheduler { Command::FlashbackToVersionReadPhase { .. } | Command::FlashbackToVersion { .. } ) { - snap_ctx.for_flashback = true; + snap_ctx.allowed_in_flashback = true; } // The program is currently in scheduler worker threads. // Safety: `self.inner.worker_pool` should ensure that a TLS engine exists. diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index f432fd72246..4cfd4be07be 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -598,3 +598,33 @@ fn calculated_commit_ts_after_commit() { commit_ts }); } + +#[test] +fn test_backup_in_flashback() { + let mut suite = TestSuite::new(3, 144 * 1024 * 1024, ApiVersion::V1); + suite.must_kv_put(3, 1); + // Prepare the flashback. + let region = suite.cluster.get_region(b"key_0"); + suite.cluster.must_send_wait_flashback_msg( + region.get_id(), + kvproto::raft_cmdpb::AdminCmdType::PrepareFlashback, + ); + // Start the backup. + let tmp = Builder::new().tempdir().unwrap(); + let backup_ts = suite.alloc_ts(); + let storage_path = make_unique_dir(tmp.path()); + let rx = suite.backup( + vec![], // start + vec![], // end + 0.into(), // begin_ts + backup_ts, + &storage_path, + ); + let resp = block_on(rx.collect::>()); + assert!(!resp[0].has_error()); + // Finish the flashback. + suite.cluster.must_send_wait_flashback_msg( + region.get_id(), + kvproto::raft_cmdpb::AdminCmdType::FinishFlashback, + ); +} diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 7d0ec219534..e50ca59fdff 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -9,7 +9,7 @@ use futures::{channel::oneshot, executor::block_on}; use kvproto::{ errorpb::FlashbackInProgress, metapb, - raft_cmdpb::{AdminCmdType, RaftCmdResponse, Request}, + raft_cmdpb::{AdminCmdType, CmdType, RaftCmdResponse, Request}, }; use raftstore::store::Callback; use test_raftstore::*; @@ -18,6 +18,39 @@ use txn_types::WriteBatchFlags; const TEST_KEY: &[u8] = b"k1"; const TEST_VALUE: &[u8] = b"v1"; +#[test] +fn test_allow_read_only_request() { + let mut cluster = new_node_cluster(0, 3); + cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + let mut region = cluster.get_region(TEST_KEY); + let mut snap_req = Request::default(); + snap_req.set_cmd_type(CmdType::Snap); + // Get snapshot normally. + let snap_resp = request(&mut cluster, &mut region.clone(), snap_req.clone(), false); + assert!(!snap_resp.get_header().has_error()); + // Get snapshot with flashback flag without in the flashback state. + let snap_resp = request(&mut cluster, &mut region.clone(), snap_req.clone(), true); + assert!(!snap_resp.get_header().has_error()); + // Get snapshot with flashback flag with in the flashback state. + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + let snap_resp = request(&mut cluster, &mut region.clone(), snap_req.clone(), true); + assert!(!snap_resp.get_header().has_error()); + // Get snapshot without flashback flag with in the flashback state. + let snap_resp = request(&mut cluster, &mut region, snap_req, false); + assert!( + snap_resp + .get_header() + .get_error() + .has_flashback_in_progress(), + "{:?}", + snap_resp + ); + // Finish flashback. + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); +} + #[test] #[cfg(feature = "failpoints")] fn test_read_after_prepare_flashback() { From d7096ebae6bbedeb3993cb276cf6420daf7f769a Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 7 Dec 2022 18:06:05 +0800 Subject: [PATCH 0392/1149] raftstore-v2: use snapshot to initialize split (#13886) ref tikv/tikv#12842 Create a new storage introduces unnecessary complexity and corner cases. As split is an initialization just like snapshot, this PR reuses snapshot to make the process a lot simpler and more robust. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/fsm/apply.rs | 2 +- components/raftstore-v2/src/fsm/peer.rs | 4 + .../src/operation/command/admin/split.rs | 202 ++++++++++-------- .../raftstore-v2/src/operation/command/mod.rs | 2 +- .../raftstore-v2/src/operation/query/lease.rs | 33 --- .../raftstore-v2/src/operation/query/mod.rs | 4 +- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/operation/ready/snapshot.rs | 49 ++++- components/raftstore-v2/src/raft/peer.rs | 16 +- components/raftstore-v2/src/raft/storage.rs | 52 +---- components/raftstore-v2/src/router/message.rs | 8 + 11 files changed, 194 insertions(+), 180 deletions(-) diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index b8faf589760..2aa42da2e42 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -37,7 +37,7 @@ pub trait ApplyResReporter { impl, S: FsmScheduler> ApplyResReporter for Mailbox { fn report(&self, apply_res: ApplyRes) { // TODO: check shutdown. - self.force_send(PeerMsg::ApplyRes(apply_res)).unwrap(); + let _ = self.force_send(PeerMsg::ApplyRes(apply_res)); } } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index cd93463a524..cf85522df90 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -221,6 +221,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::Tick(tick) => self.on_tick(tick), PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(self.store_ctx, res), PeerMsg::SplitInit(msg) => self.fsm.peer.on_split_init(self.store_ctx, msg), + PeerMsg::SplitInitFinish(region_id) => self + .fsm + .peer + .on_split_init_finish(self.store_ctx, region_id), PeerMsg::Start => self.on_start(), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 2782b436439..0b97d726a2e 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,8 +25,9 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::collections::VecDeque; +use std::{cmp, collections::VecDeque}; +use collections::HashSet; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{ Checkpointer, DeleteStrategy, KvEngine, OpenOptions, RaftEngine, RaftLogBatch, Range, @@ -37,17 +38,18 @@ use keys::enc_end_key; use kvproto::{ metapb::{self, Region, RegionEpoch}, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, - raft_serverpb::RegionLocalState, + raft_serverpb::{RaftMessage, RaftSnapshotData, RegionLocalState}, }; use protobuf::Message; -use raft::RawNode; +use raft::{prelude::Snapshot, RawNode, INVALID_ID}; use raftstore::{ coprocessor::RegionChangeReason, store::{ fsm::apply::validate_batch_split, metrics::PEER_ADMIN_CMD_COUNTER, + snap::TABLET_SNAPSHOT_VERSION, util::{self, KeysInfoFormatter}, - PeerPessimisticLocks, PeerStat, ProposalContext, RAFT_INIT_LOG_INDEX, + PeerPessimisticLocks, PeerStat, ProposalContext, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, Result, }; @@ -69,16 +71,38 @@ pub struct SplitResult { pub derived_index: usize, pub tablet_index: u64, } + +#[derive(Debug)] pub struct SplitInit { /// Split region pub region: metapb::Region, pub check_split: bool, - pub parent_is_leader: bool, + pub scheduled: bool, + pub source_leader: bool, + pub source_id: u64, /// In-memory pessimistic locks that should be inherited from parent region pub locks: PeerPessimisticLocks, } +impl SplitInit { + fn to_snapshot(&self) -> Snapshot { + let mut snapshot = Snapshot::default(); + // Set snapshot metadata. + snapshot.mut_metadata().set_term(RAFT_INIT_LOG_TERM); + snapshot.mut_metadata().set_index(RAFT_INIT_LOG_INDEX); + let conf_state = util::conf_state_from_region(&self.region); + snapshot.mut_metadata().set_conf_state(conf_state); + // Set snapshot data. + let mut snap_data = RaftSnapshotData::default(); + snap_data.set_region(self.region.clone()); + snap_data.set_version(TABLET_SNAPSHOT_VERSION); + snap_data.mut_meta().set_for_balance(false); + snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); + snapshot + } +} + impl Peer { pub fn propose_split( &mut self, @@ -263,7 +287,7 @@ impl Apply { } impl Peer { - pub fn on_ready_split_region( + pub fn on_apply_res_split( &mut self, store_ctx: &mut StoreContext, derived_index: usize, @@ -321,16 +345,20 @@ impl Peer { } let last_region_id = regions.last().unwrap().get_id(); + let mut new_ids = HashSet::default(); for (new_region, locks) in regions.into_iter().zip(region_locks) { let new_region_id = new_region.get_id(); if new_region_id == region_id { continue; } + new_ids.insert(new_region_id); let split_init = PeerMsg::SplitInit(Box::new(SplitInit { region: new_region, - parent_is_leader: self.is_leader(), + source_leader: self.is_leader(), + source_id: region_id, check_split: last_region_id == new_region_id, + scheduled: false, locks, })); @@ -353,108 +381,104 @@ impl Peer { _ => unreachable!(), } } + self.split_trace_mut().push((tablet_index, new_ids)); } pub fn on_split_init( &mut self, store_ctx: &mut StoreContext, - split_init: Box, + mut split_init: Box, ) { let region_id = split_init.region.id; - let replace = split_init.region.get_region_epoch().get_version() - > self - .storage() - .region_state() - .get_region() - .get_region_epoch() - .get_version(); - - if !self.storage().is_initialized() || replace { - let split_temp_path = store_ctx.tablet_factory.tablet_path_with_prefix( - SPLIT_PREFIX, - region_id, - RAFT_INIT_LOG_INDEX, - ); - - let tablet = store_ctx - .tablet_factory - .load_tablet(&split_temp_path, region_id, RAFT_INIT_LOG_INDEX) - .unwrap_or_else(|e| { - panic!( - "{:?} fails to load tablet {:?} :{:?}", - self.logger.list(), - split_temp_path, - e - ) - }); - - self.tablet_mut().set(tablet); - - let storage = Storage::with_split( - self.peer().get_store_id(), - &split_init.region, - store_ctx.engine.clone(), - store_ctx.read_scheduler.clone(), - &store_ctx.logger, - ) - .unwrap_or_else(|e| panic!("fail to create storage: {:?}", e)) - .unwrap(); - - let applied_index = storage.apply_state().get_applied_index(); - let peer_id = storage.peer().get_id(); - let raft_cfg = store_ctx.cfg.new_raft_config(peer_id, applied_index); - - let mut raft_group = RawNode::new(&raft_cfg, storage, &self.logger).unwrap(); - // If this region has only one peer and I am the one, campaign directly. - if split_init.region.get_peers().len() == 1 { - raft_group.campaign().unwrap(); - self.set_has_ready(); - } - self.set_raft_group(raft_group); - } else { - // TODO: when reaching here (peer is initalized before and cannot be replaced), - // it is much complexer. + if self.storage().is_initialized() && self.persisted_index() >= RAFT_INIT_LOG_INDEX { + let _ = store_ctx + .router + .force_send(split_init.source_id, PeerMsg::SplitInitFinish(region_id)); return; } - { - let mut meta = store_ctx.store_meta.lock().unwrap(); + if self.storage().is_initialized() || self.raft_group().snap().is_some() { + // It accepts a snapshot already but not finish applied yet. + let prev = self.storage_mut().split_init_mut().replace(split_init); + assert!(prev.is_none(), "{:?}", prev); + return; + } - info!( - self.logger, - "init split region"; - "region" => ?split_init.region, + split_init.scheduled = true; + let snap = split_init.to_snapshot(); + let mut msg = raft::eraftpb::Message::default(); + msg.set_to(self.peer_id()); + msg.set_from(self.leader_id()); + msg.set_msg_type(raft::eraftpb::MessageType::MsgSnapshot); + msg.set_snapshot(snap); + msg.set_term(cmp::max(self.term(), RAFT_INIT_LOG_TERM)); + let res = self.raft_group_mut().step(msg); + let accept_snap = self.raft_group().snap().is_some(); + if res.is_err() || !accept_snap { + panic!( + "{:?} failed to accept snapshot {:?} with error {}", + self.logger.list(), + res, + accept_snap ); + } + let prev = self.storage_mut().split_init_mut().replace(split_init); + assert!(prev.is_none(), "{:?}", prev); + self.set_has_ready(); + } - // TODO: GlobalReplicationState - - for p in split_init.region.get_peers() { - self.insert_peer_cache(p.clone()); - } - - if split_init.parent_is_leader { - if self.maybe_campaign() { - self.set_has_ready(); - } - - *self.txn_ext().pessimistic_locks.write() = split_init.locks; - // The new peer is likely to become leader, send a heartbeat immediately to - // reduce client query miss. - self.region_heartbeat_pd(store_ctx); - } + pub fn post_split_init( + &mut self, + store_ctx: &mut StoreContext, + split_init: Box, + ) { + if split_init.source_leader + && self.leader_id() == INVALID_ID + && self.term() == RAFT_INIT_LOG_TERM + { + let _ = self.raft_group_mut().campaign(); + self.set_has_ready(); - meta.tablet_caches.insert(region_id, self.tablet().clone()); - meta.readers - .insert(region_id, self.generate_read_delegate()); - meta.region_read_progress - .insert(region_id, self.read_progress().clone()); + *self.txn_ext().pessimistic_locks.write() = split_init.locks; + // The new peer is likely to become leader, send a heartbeat immediately to + // reduce client query miss. + self.region_heartbeat_pd(store_ctx); } + let region_id = self.region_id(); if split_init.check_split { // TODO: check if the last region needs to split again } + let _ = store_ctx + .router + .force_send(split_init.source_id, PeerMsg::SplitInitFinish(region_id)); + } - self.schedule_apply_fsm(store_ctx); + pub fn on_split_init_finish(&mut self, ctx: &mut StoreContext, region_id: u64) { + let mut found = false; + for (tablet_index, ids) in self.split_trace_mut() { + if ids.remove(®ion_id) { + found = true; + break; + } + } + assert!(found, "{:?} {}", self.logger.list(), region_id); + let split_trace = self.split_trace_mut(); + let mut off = 0; + let mut admin_flushed = 0; + for (tablet_index, ids) in split_trace.iter() { + if !ids.is_empty() { + break; + } + admin_flushed = *tablet_index; + off += 1; + } + if off > 0 { + // There should be very few elements in the vector. + split_trace.drain(..off); + // Persist admin flushed. + self.set_has_ready(); + } } } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 7e69a3f1c7c..bd175ef7a4d 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -305,7 +305,7 @@ impl Peer { regions, derived_index, tablet_index, - }) => self.on_ready_split_region(ctx, derived_index, tablet_index, regions), + }) => self.on_apply_res_split(ctx, derived_index, tablet_index, regions), } } diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 114080bcdbb..bbff28b272f 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -175,45 +175,12 @@ impl Peer { self.maybe_update_read_progress(reader, progress); } if let Some(progress) = read_progress { - // TODO: remove it - self.add_reader_if_necessary(store_meta); - let mut meta = store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&self.region_id()).unwrap(); self.maybe_update_read_progress(reader, progress); } } - // TODO: remove this block of code when snapshot is done; add the logic into - // on_persist_snapshot. - pub(crate) fn add_reader_if_necessary(&mut self, store_meta: &Mutex>) { - let mut meta = store_meta.lock().unwrap(); - // TODO: remove this block of code when snapshot is done; add the logic into - // on_persist_snapshot. - let reader = meta.readers.get_mut(&self.region_id()); - if reader.is_none() { - let region = self.region().clone(); - let region_id = region.get_id(); - let peer_id = self.peer_id(); - let delegate = ReadDelegate { - region: Arc::new(region), - peer_id, - term: self.term(), - applied_term: self.entry_storage().applied_term(), - leader_lease: None, - last_valid_ts: Timespec::new(0, 0), - tag: format!("[region {}] {}", region_id, peer_id), - read_progress: self.read_progress().clone(), - pending_remove: false, - bucket_meta: None, - txn_extra_op: Default::default(), - txn_ext: Default::default(), - track_ver: TrackVer::new(), - }; - meta.readers.insert(self.region_id(), delegate); - } - } - pub(crate) fn maybe_update_read_progress( &self, reader: &mut ReadDelegate, diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 77ca7b90074..3a3052ab902 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -388,7 +388,7 @@ impl Peer { // V2 doesn't persist commit index and term, fill them with in-memory values. meta.raft_apply.commit_index = cmp::min( self.raft_group().raft.raft_log.committed, - self.raft_group().raft.raft_log.persisted, + self.persisted_index(), ); meta.raft_apply.commit_term = self .raft_group() @@ -426,8 +426,6 @@ impl Peer { if progress_to_be_updated && self.is_leader() { // TODO: add coprocessor_host hook let progress = ReadProgress::applied_term(applied_term); - // TODO: remove it - self.add_reader_if_necessary(&ctx.store_meta); let mut meta = ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&self.region_id()).unwrap(); self.maybe_update_read_progress(reader, progress); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 1c8c9d80338..6a91c25f1f6 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -403,7 +403,7 @@ impl Peer { let persisted_number = self.async_writer.persisted_number(); self.raft_group_mut().on_persist_ready(persisted_number); - let persisted_index = self.raft_group().raft.raft_log.persisted; + let persisted_index = self.persisted_index(); /// The apply snapshot process order would be: /// - Get the snapshot from the ready /// - Wait for async writer to load this tablet diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 32e8a3f8ff8..4cd4b5265d8 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -21,20 +21,20 @@ use std::{ borrow::BorrowMut, fmt::{self, Debug}, - mem, + fs, mem, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, mpsc, Arc, }, }; -use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; +use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory, SPLIT_PREFIX}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData, RegionLocalState}; use protobuf::Message; use raft::eraftpb::Snapshot; use raftstore::store::{ metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask, TabletSnapKey, - TabletSnapManager, Transport, WriteTask, + TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, }; use slog::{error, info, warn}; use tikv_util::{box_err, box_try, worker::Scheduler}; @@ -120,7 +120,7 @@ impl Peer { } pub fn on_applied_snapshot(&mut self, ctx: &mut StoreContext) { - let persisted_index = self.raft_group().raft.raft_log.persisted; + let persisted_index = self.persisted_index(); let first_index = self.storage().entry_storage().first_index(); if first_index == persisted_index + 1 { let region_id = self.region_id(); @@ -132,9 +132,26 @@ impl Peer { self.schedule_apply_fsm(ctx); self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(persisted_index); + { + let mut meta = ctx.store_meta.lock().unwrap(); + meta.tablet_caches.insert(region_id, self.tablet().clone()); + meta.readers + .insert(region_id, self.generate_read_delegate()); + meta.region_read_progress + .insert(region_id, self.read_progress().clone()); + } self.read_progress_mut() .update_applied_core(persisted_index); - info!(self.logger, "apply tablet snapshot completely"); + let split = self.storage_mut().split_init_mut().take(); + if split.as_ref().map_or(true, |s| { + !s.scheduled || persisted_index != RAFT_INIT_LOG_INDEX + }) { + info!(self.logger, "apply tablet snapshot completely"); + } + if let Some(init) = split { + info!(self.logger, "init with snapshot finished"); + self.post_split_init(ctx, init); + } } } } @@ -381,8 +398,18 @@ impl Storage { self.entry_storage_mut().set_truncated_term(last_term); self.entry_storage_mut().set_last_term(last_term); - let key = TabletSnapKey::new(region_id, peer_id, last_term, last_index); - let mut path = snap_mgr.final_recv_path(&key); + let (path, clean_split) = match self.split_init_mut() { + // If index not match, the peer may accept a newer snapshot after split. + Some(init) if init.scheduled && last_index == RAFT_INIT_LOG_INDEX => ( + tablet_factory.tablet_path_with_prefix(SPLIT_PREFIX, region_id, last_index), + false, + ), + si => { + let key = TabletSnapKey::new(region_id, peer_id, last_term, last_index); + (snap_mgr.final_recv_path(&key), si.is_some()) + } + }; + let logger = self.logger().clone(); // The snapshot require no additional processing such as ingest them to DB, but // it should load it into the factory after it persisted. @@ -395,6 +422,14 @@ impl Storage { e ); } + if clean_split { + let path = tablet_factory.tablet_path_with_prefix( + SPLIT_PREFIX, + region_id, + RAFT_INIT_LOG_INDEX, + ); + let _ = fs::remove_dir_all(path); + } }; task.persisted_cb = (Some(Box::new(hook))); task.has_snapshot = true; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index a9730a036e7..16e3e54d5f2 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -6,7 +6,7 @@ use std::{ time::{Duration, Instant}, }; -use collections::HashMap; +use collections::{HashMap, HashSet}; use crossbeam::atomic::AtomicCell; use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; @@ -88,6 +88,9 @@ pub struct Peer { /// Check whether this proposal can be proposed based on its epoch. proposal_control: ProposalControl, + + // Trace which peers have not finished split. + split_trace: Vec<(u64, HashSet)>, } impl Peer { @@ -161,6 +164,7 @@ impl Peer { txn_ext: Arc::default(), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), proposal_control: ProposalControl::new(0), + split_trace: vec![], }; // If this region has only one peer and I am the one, campaign directly. @@ -330,6 +334,11 @@ impl Peer { self.raft_group = raft_group; } + #[inline] + pub fn persisted_index(&self) -> u64 { + self.raft_group.raft.raft_log.persisted + } + #[inline] pub fn self_stat(&self) -> &PeerStat { &self.self_stat @@ -600,4 +609,9 @@ impl Peer { self.update_max_timestamp_pd(ctx, initial_status); } + + #[inline] + pub fn split_trace_mut(&mut self) -> &mut Vec<(u64, HashSet)> { + &mut self.split_trace + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index b3ad56af4fd..369a25984bf 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -22,7 +22,7 @@ use slog::{info, o, Logger}; use tikv_util::{box_err, store::find_peer, worker::Scheduler}; use crate::{ - operation::{GenSnapTask, SnapState}, + operation::{GenSnapTask, SnapState, SplitInit}, Result, }; @@ -69,6 +69,7 @@ pub struct Storage { /// Snapshot part. snap_state: RefCell, gen_snap_task: RefCell>>, + split_init: Option>, } impl Debug for Storage { @@ -202,49 +203,6 @@ impl Storage { .map(Some) } - /// Creates a new storage for split peer. - /// - /// Except for region local state which uses the `region` provided with the - /// inital tablet index, all uses the inital states. - pub fn with_split( - store_id: u64, - region: &metapb::Region, - engine: ER, - read_scheduler: Scheduler>, - logger: &Logger, - ) -> Result>> { - let mut region_state = RegionLocalState::default(); - region_state.set_region(region.clone()); - region_state.set_state(PeerState::Normal); - region_state.set_tablet_index(RAFT_INIT_LOG_INDEX); - - let mut apply_state = RaftApplyState::default(); - apply_state.set_applied_index(RAFT_INIT_LOG_INDEX); - apply_state - .mut_truncated_state() - .set_index(RAFT_INIT_LOG_INDEX); - apply_state - .mut_truncated_state() - .set_term(RAFT_INIT_LOG_TERM); - - let mut raft_state = RaftLocalState::default(); - raft_state.set_last_index(RAFT_INIT_LOG_INDEX); - raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); - raft_state.mut_hard_state().set_commit(RAFT_INIT_LOG_INDEX); - - Self::create( - store_id, - region_state, - raft_state, - apply_state, - engine, - read_scheduler, - true, - logger, - ) - .map(Some) - } - fn create( store_id: u64, region_state: RegionLocalState, @@ -281,6 +239,7 @@ impl Storage { logger, snap_state: RefCell::new(SnapState::Relax), gen_snap_task: RefCell::new(Box::new(None)), + split_init: None, }) } @@ -289,6 +248,11 @@ impl Storage { &mut self.region_state } + #[inline] + pub fn split_init_mut(&mut self) -> &mut Option> { + &mut self.split_init + } + #[inline] pub fn raft_state(&self) -> &RaftLocalState { self.entry_storage.raft_state() diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index a4681d8a873..13037bd1a26 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -132,6 +132,7 @@ pub enum PeerMsg { Start, /// Messages from peer to peer in the same store SplitInit(Box), + SplitInitFinish(u64), /// A message only used to notify a peer. Noop, /// A message that indicates an asynchronous write has finished. @@ -173,6 +174,13 @@ impl fmt::Debug for PeerMsg { PeerMsg::SplitInit(_) => { write!(fmt, "Split initialization") } + PeerMsg::SplitInitFinish(region_id) => { + write!( + fmt, + "Split initialization finished from region {}", + region_id + ) + } PeerMsg::Noop => write!(fmt, "Noop"), PeerMsg::Persisted { peer_id, From 0d5a292a6d62bb29cd686e7b91538f6c44040866 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 7 Dec 2022 19:44:04 +0800 Subject: [PATCH 0393/1149] *: always share block cache (#13903) close tikv/tikv#12936 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- cmd/tikv-ctl/src/executor.rs | 7 +- components/engine_rocks/src/engine.rs | 12 +- components/engine_rocks/src/rocks_metrics.rs | 28 +-- components/server/src/raft_engine_switch.rs | 3 +- components/server/src/server.rs | 50 ++--- components/snap_recovery/src/init_cluster.rs | 9 +- .../test_raftstore/src/common-test.toml | 1 - components/test_raftstore/src/util.rs | 7 +- components/tikv_kv/src/rocksdb_engine.rs | 7 +- etc/config-template.toml | 8 +- src/config.rs | 188 ++++++------------ src/server/engine_factory.rs | 30 ++- src/server/engine_factory_v2.rs | 59 ++---- src/storage/config.rs | 12 +- src/storage/config_manager.rs | 6 - src/storage/kv/test_engine_builder.rs | 3 +- src/storage/mod.rs | 6 +- tests/failpoints/cases/test_storage.rs | 3 +- tests/integrations/config/mod.rs | 3 +- tests/integrations/config/test-custom.toml | 1 - 20 files changed, 133 insertions(+), 310 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index b2d25a32d5b..1c42d728ca9 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -62,7 +62,6 @@ pub fn new_debug_executor( .map(Arc::new); let cache = cfg.storage.block_cache.build_shared_cache(); - let shared_block_cache = cache.is_some(); let env = cfg .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); @@ -75,11 +74,10 @@ pub fn new_debug_executor( .build_cf_opts(&cache, None, cfg.storage.api_version()); let kv_path = PathBuf::from(kv_path).canonicalize().unwrap(); let kv_path = kv_path.to_str().unwrap(); - let mut kv_db = match new_engine_opt(kv_path, kv_db_opts, kv_cfs_opts) { + let kv_db = match new_engine_opt(kv_path, kv_db_opts, kv_cfs_opts) { Ok(db) => db, Err(e) => handle_engine_error(e), }; - kv_db.set_shared_block_cache(shared_block_cache); let cfg_controller = ConfigController::default(); if !cfg.raft_engine.enable { @@ -91,11 +89,10 @@ pub fn new_debug_executor( error!("raft db not exists: {}", raft_path); tikv_util::logger::exit_process_gracefully(-1); } - let mut raft_db = match new_engine_opt(&raft_path, raft_db_opts, raft_db_cf_opts) { + let raft_db = match new_engine_opt(&raft_path, raft_db_opts, raft_db_cf_opts) { Ok(db) => db, Err(e) => handle_engine_error(e), }; - raft_db.set_shared_block_cache(shared_block_cache); let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); Box::new(debugger) as Box } else { diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 41066c85756..720a92a8bdd 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -25,7 +25,6 @@ use crate::{ #[derive(Clone, Debug)] pub struct RocksEngine { db: Arc, - shared_block_cache: bool, support_multi_batch_write: bool, } @@ -37,7 +36,6 @@ impl RocksEngine { pub fn from_db(db: Arc) -> Self { RocksEngine { db: db.clone(), - shared_block_cache: false, support_multi_batch_write: db.get_db_options().is_enable_multi_batch_write(), } } @@ -50,14 +48,6 @@ impl RocksEngine { self.db.clone() } - pub fn set_shared_block_cache(&mut self, enable: bool) { - self.shared_block_cache = enable; - } - - pub fn shared_block_cache(&self) -> bool { - self.shared_block_cache - } - pub fn support_multi_batch_write(&self) -> bool { self.support_multi_batch_write } @@ -95,7 +85,7 @@ impl KvEngine for RocksEngine { } } } - flush_engine_properties(&self.db, instance, self.shared_block_cache); + flush_engine_properties(&self.db, instance); flush_engine_iostall_properties(&self.db, instance); } diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 4a88c6675ed..026ef36cce7 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -931,7 +931,7 @@ pub fn flush_engine_iostall_properties(engine: &DB, name: &str) { } } -pub fn flush_engine_properties(engine: &DB, name: &str, shared_block_cache: bool) { +pub fn flush_engine_properties(engine: &DB, name: &str) { for cf in engine.cf_names() { let handle = crate::util::get_cf_handle(engine, cf).unwrap(); // It is important to monitor each cf's size, especially the "raft" and "lock" @@ -941,13 +941,6 @@ pub fn flush_engine_properties(engine: &DB, name: &str, shared_block_cache: bool .with_label_values(&[name, cf]) .set(cf_used_size as i64); - if !shared_block_cache { - let block_cache_usage = engine.get_block_cache_usage_cf(handle); - STORE_ENGINE_BLOCK_CACHE_USAGE_GAUGE_VEC - .with_label_values(&[name, cf]) - .set(block_cache_usage as i64); - } - let blob_cache_usage = engine.get_blob_cache_usage_cf(handle); STORE_ENGINE_BLOB_CACHE_USAGE_GAUGE_VEC .with_label_values(&[name, cf]) @@ -1110,15 +1103,13 @@ pub fn flush_engine_properties(engine: &DB, name: &str, shared_block_cache: bool .set(d as i64); } - if shared_block_cache { - // Since block cache is shared, getting cache size from any CF is fine. Here we - // get from default CF. - let handle = crate::util::get_cf_handle(engine, CF_DEFAULT).unwrap(); - let block_cache_usage = engine.get_block_cache_usage_cf(handle); - STORE_ENGINE_BLOCK_CACHE_USAGE_GAUGE_VEC - .with_label_values(&[name, "all"]) - .set(block_cache_usage as i64); - } + // Since block cache is shared, getting cache size from any CF is fine. Here we + // get from default CF. + let handle = crate::util::get_cf_handle(engine, CF_DEFAULT).unwrap(); + let block_cache_usage = engine.get_block_cache_usage_cf(handle); + STORE_ENGINE_BLOCK_CACHE_USAGE_GAUGE_VEC + .with_label_values(&[name, "all"]) + .set(block_cache_usage as i64); } // For property metrics @@ -1627,8 +1618,7 @@ mod tests { flush_engine_histogram_metrics(*tp, HistogramData::default(), "kv"); } - let shared_block_cache = false; - flush_engine_properties(engine.as_inner(), "kv", shared_block_cache); + flush_engine_properties(engine.as_inner(), "kv"); let handle = engine.as_inner().cf_handle("default").unwrap(); let info = engine .as_inner() diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index 637088efa88..ba489f1be0f 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -237,6 +237,7 @@ mod tests { cfg.raft_store.raftdb_path = raftdb_path.to_str().unwrap().to_owned(); cfg.raftdb.wal_dir = raftdb_wal_path.to_str().unwrap().to_owned(); cfg.raft_engine.mut_config().dir = raft_engine_path.to_str().unwrap().to_owned(); + let cache = cfg.storage.block_cache.build_shared_cache(); // Dump logs from RocksEngine to RaftLogEngine. let raft_engine = RaftLogEngine::new( @@ -251,7 +252,7 @@ mod tests { let raftdb = engine_rocks::util::new_engine_opt( &cfg.raft_store.raftdb_path, cfg.raftdb.build_opt(), - cfg.raftdb.build_cf_opts(&None), + cfg.raftdb.build_cf_opts(&cache), ) .unwrap(); let mut batch = raftdb.log_batch(0); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index b52abc960d8..e93b18fed96 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -807,7 +807,6 @@ where tikv::config::Module::Storage, Box::new(StorageConfigManger::new( self.tablet_factory.as_ref().unwrap().clone(), - self.config.storage.block_cache.shared, ttl_scheduler, flow_controller, storage.get_scheduler(), @@ -1676,14 +1675,9 @@ where } pub trait ConfiguredRaftEngine: RaftEngine { - fn build( - _: &TikvConfig, - _: &Arc, - _: &Option>, - _: &Option, - ) -> Self; + fn build(_: &TikvConfig, _: &Arc, _: &Option>, _: &Cache) -> Self; fn as_rocks_engine(&self) -> Option<&RocksEngine>; - fn register_config(&self, _cfg_controller: &mut ConfigController, _share_cache: bool); + fn register_config(&self, _cfg_controller: &mut ConfigController); } impl ConfiguredRaftEngine for T { @@ -1691,14 +1685,14 @@ impl ConfiguredRaftEngine for T { _: &TikvConfig, _: &Arc, _: &Option>, - _: &Option, + _: &Cache, ) -> Self { unimplemented!() } default fn as_rocks_engine(&self) -> Option<&RocksEngine> { None } - default fn register_config(&self, _cfg_controller: &mut ConfigController, _share_cache: bool) {} + default fn register_config(&self, _cfg_controller: &mut ConfigController) {} } impl ConfiguredRaftEngine for RocksEngine { @@ -1706,7 +1700,7 @@ impl ConfiguredRaftEngine for RocksEngine { config: &TikvConfig, env: &Arc, key_manager: &Option>, - block_cache: &Option, + block_cache: &Cache, ) -> Self { let mut raft_data_state_machine = RaftDataStateMachine::new( &config.storage.data_dir, @@ -1720,10 +1714,8 @@ impl ConfiguredRaftEngine for RocksEngine { let mut raft_db_opts = config_raftdb.build_opt(); raft_db_opts.set_env(env.clone()); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let mut raftdb = - engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) - .expect("failed to open raftdb"); - raftdb.set_shared_block_cache(block_cache.is_some()); + let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) + .expect("failed to open raftdb"); if should_dump { let raft_engine = @@ -1741,14 +1733,10 @@ impl ConfiguredRaftEngine for RocksEngine { Some(self) } - fn register_config(&self, cfg_controller: &mut ConfigController, share_cache: bool) { + fn register_config(&self, cfg_controller: &mut ConfigController) { cfg_controller.register( tikv::config::Module::Raftdb, - Box::new(DbConfigManger::new( - Arc::new(self.clone()), - DbType::Raft, - share_cache, - )), + Box::new(DbConfigManger::new(Arc::new(self.clone()), DbType::Raft)), ); } } @@ -1758,7 +1746,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { config: &TikvConfig, env: &Arc, key_manager: &Option>, - block_cache: &Option, + block_cache: &Cache, ) -> Self { let mut raft_data_state_machine = RaftDataStateMachine::new( &config.storage.data_dir, @@ -1812,16 +1800,13 @@ impl TikvServer { ); // Create kv engine. - let mut builder = KvEngineFactoryBuilder::new(env, &self.config, &self.store_path) + let builder = KvEngineFactoryBuilder::new(env, &self.config, &self.store_path, block_cache) .compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { router: Mutex::new(self.router.clone()), })) .region_info_accessor(self.region_info_accessor.clone()) .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); - if let Some(cache) = block_cache { - builder = builder.block_cache(cache); - } let factory = Arc::new(builder.build()); let kv_engine = factory .create_shared_db() @@ -1831,16 +1816,10 @@ impl TikvServer { let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DbConfigManger::new( - factory.clone(), - DbType::Kv, - self.config.storage.block_cache.shared, - )), + Box::new(DbConfigManger::new(factory.clone(), DbType::Kv)), ); self.tablet_factory = Some(factory.clone()); - engines - .raft - .register_config(cfg_controller, self.config.storage.block_cache.shared); + engines.raft.register_config(cfg_controller); let engines_info = Arc::new(EnginesResourceInfo::new( factory, @@ -2129,8 +2108,9 @@ mod test { config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); let env = Arc::new(Env::default()); let path = Builder::new().prefix("test-update").tempdir().unwrap(); + let cache = config.storage.block_cache.build_shared_cache(); - let builder = KvEngineFactoryBuilder::new(env, &config, path.path()); + let builder = KvEngineFactoryBuilder::new(env, &config, path.path(), cache); let factory = builder.build_v2(); for i in 1..6 { diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index fe6c559da27..08a45073309 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -316,14 +316,11 @@ pub fn create_local_engine_service( let db_path = config .infer_kv_engine_path(None) .map_err(|e| format!("infer kvdb path: {}", e))?; - let mut kv_db = match new_engine_opt(&db_path, db_opts, cf_opts) { + let kv_db = match new_engine_opt(&db_path, db_opts, cf_opts) { Ok(db) => db, Err(e) => handle_engine_error(e), }; - let shared_block_cache = block_cache.is_some(); - kv_db.set_shared_block_cache(shared_block_cache); - // init raft engine, either is rocksdb or raft engine if !config.raft_engine.enable { // rocksdb @@ -333,12 +330,10 @@ pub fn create_local_engine_service( let raft_path = config .infer_raft_db_path(None) .map_err(|e| format!("infer raftdb path: {}", e))?; - let mut raft_db = match new_engine_opt(&raft_path, raft_db_opts, raft_db_cf_opts) { + let raft_db = match new_engine_opt(&raft_path, raft_db_opts, raft_db_cf_opts) { Ok(db) => db, Err(e) => handle_engine_error(e), }; - // let mut raft_db = RocksEngine::from_db(Arc::new(raft_db)); - raft_db.set_shared_block_cache(shared_block_cache); let local_engines = LocalEngines::new(Engines::new(kv_db, raft_db)); Ok(Box::new(local_engines) as Box) diff --git a/components/test_raftstore/src/common-test.toml b/components/test_raftstore/src/common-test.toml index a121a6c1e0e..334291f7213 100644 --- a/components/test_raftstore/src/common-test.toml +++ b/components/test_raftstore/src/common-test.toml @@ -34,7 +34,6 @@ scheduler-concurrency = 10 scheduler-worker-pool-size = 1 [storage.block-cache] -shared = true capacity = "64MB" [pd] diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 64bdca19025..c6b70fa24f0 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -596,11 +596,8 @@ pub fn create_test_engine( let raft_engine = RaftTestEngine::build(&cfg, &env, &key_manager, &cache); - let mut builder = - KvEngineFactoryBuilder::new(env, &cfg, dir.path()).sst_recovery_sender(Some(scheduler)); - if let Some(cache) = cache { - builder = builder.block_cache(cache); - } + let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path(), cache) + .sst_recovery_sender(Some(scheduler)); if let Some(router) = router { builder = builder.compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { router: Mutex::new(router), diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 26e2c735254..065766ae254 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -114,7 +114,6 @@ impl RocksEngine { path: &str, db_opts: Option, cfs_opts: Vec<(CfName, RocksCfOptions)>, - shared_block_cache: bool, io_rate_limiter: Option>, ) -> Result { info!("RocksEngine: creating for path"; "path" => path); @@ -134,11 +133,7 @@ impl RocksEngine { let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; // It does not use the raft_engine, so it is ok to fill with the same // rocksdb. - let mut kv_engine = db.clone(); - let mut raft_engine = db; - kv_engine.set_shared_block_cache(shared_block_cache); - raft_engine.set_shared_block_cache(shared_block_cache); - let engines = Engines::new(kv_engine, raft_engine); + let engines = Engines::new(db.clone(), db); let sched = worker.start("engine-rocksdb", Runner(engines.clone())); Ok(RocksEngine { sched, diff --git a/etc/config-template.toml b/etc/config-template.toml index a2b3ab13b00..3ddbb6fc879 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -271,17 +271,11 @@ ## Set to 0 to disable this feature if you want to panic immediately when encountering such an error. # background-error-recovery-window = "1h" -[storage.block-cache] -## Whether to create a shared block cache for all RocksDB column families. -## ## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read. ## It is recommended to turn on shared block cache. Since only the total cache size need to be ## set, it is easier to config. In most cases it should be able to auto-balance cache usage ## between column families with standard LRU algorithm. -## -## The rest of config in the storage.block-cache session is effective only when shared block cache -## is on. -# shared = true +[storage.block-cache] ## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory. ## When the config is not set, it is decided by the sum of the following fields or their default diff --git a/src/config.rs b/src/config.rs index e9eca154d6e..97bab103af2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -27,8 +27,7 @@ use engine_rocks::{ properties::MvccPropertiesCollectorFactory, raw::{ BlockBasedOptions, Cache, ChecksumType, CompactionPriority, DBCompactionStyle, - DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, LRUCacheOptions, - PrepopulateBlockCache, + DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, PrepopulateBlockCache, }, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, RaftDbLogger, RangePropertiesCollectorFactory, RawMvccPropertiesCollectorFactory, @@ -503,17 +502,11 @@ macro_rules! write_into_metrics { } macro_rules! build_cf_opt { - ($opt:ident, $cf_name:ident, $cache:ident, $region_info_provider:ident) => {{ + ($opt:ident, $cf_name:ident, $cache:expr, $region_info_provider:ident) => {{ let mut block_base_opts = BlockBasedOptions::new(); block_base_opts.set_block_size($opt.block_size.0 as usize); block_base_opts.set_no_block_cache($opt.disable_block_cache); - if let Some(cache) = $cache { - block_base_opts.set_block_cache(cache); - } else { - let mut cache_opts = LRUCacheOptions::new(); - cache_opts.set_capacity($opt.block_cache_size.0 as usize); - block_base_opts.set_block_cache(&Cache::new_lru_cache(cache_opts)); - } + block_base_opts.set_block_cache($cache); block_base_opts.set_cache_index_and_filter_blocks($opt.cache_index_and_filter_blocks); block_base_opts .set_pin_l0_filter_and_index_blocks_in_cache($opt.pin_l0_filter_and_index_blocks); @@ -664,7 +657,7 @@ impl Default for DefaultCfConfig { impl DefaultCfConfig { pub fn build_opt( &self, - cache: &Option, + cache: &Cache, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, ) -> RocksCfOptions { @@ -780,7 +773,7 @@ impl Default for WriteCfConfig { impl WriteCfConfig { pub fn build_opt( &self, - cache: &Option, + cache: &Cache, region_info_accessor: Option<&RegionInfoAccessor>, ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!(self, CF_WRITE, cache, region_info_accessor); @@ -876,7 +869,7 @@ impl Default for LockCfConfig { } impl LockCfConfig { - pub fn build_opt(&self, cache: &Option) -> RocksCfOptions { + pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_LOCK, cache, no_region_info_accessor); cf_opts @@ -952,7 +945,7 @@ impl Default for RaftCfConfig { } impl RaftCfConfig { - pub fn build_opt(&self, cache: &Option) -> RocksCfOptions { + pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_RAFT, cache, no_region_info_accessor); cf_opts @@ -1192,7 +1185,7 @@ impl DbConfig { pub fn build_cf_opts( &self, - cache: &Option, + cache: &Cache, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, ) -> Vec<(&'static str, RocksCfOptions)> { @@ -1328,7 +1321,7 @@ impl Default for RaftDefaultCfConfig { } impl RaftDefaultCfConfig { - pub fn build_opt(&self, cache: &Option) -> RocksCfOptions { + pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_DEFAULT, cache, no_region_info_accessor); let f = FixedPrefixSliceTransform::new(region_raft_prefix_len()); @@ -1483,7 +1476,7 @@ impl RaftDbConfig { opts } - pub fn build_cf_opts(&self, cache: &Option) -> Vec<(&'static str, RocksCfOptions)> { + pub fn build_cf_opts(&self, cache: &Cache) -> Vec<(&'static str, RocksCfOptions)> { vec![(CF_DEFAULT, self.defaultcf.build_opt(cache))] } @@ -1549,15 +1542,13 @@ pub enum DbType { pub struct DbConfigManger> { tablet_accessor: Arc, db_type: DbType, - shared_block_cache: bool, } impl> DbConfigManger { - pub fn new(tablet_accessor: Arc, db_type: DbType, shared_block_cache: bool) -> Self { + pub fn new(tablet_accessor: Arc, db_type: DbType) -> Self { DbConfigManger { tablet_accessor, db_type, - shared_block_cache, } } @@ -1595,33 +1586,6 @@ impl> DbConfigManger { Ok(()) } - fn set_block_cache_size(&self, cf: &str, size: ReadableSize) -> Result<(), Box> { - self.validate_cf(cf)?; - if self.shared_block_cache { - return Err("shared block cache is enabled, change cache size through \ - block-cache.capacity in storage module instead" - .into()); - } - // for multi-rocks, shared block cache has to be enabled and thus should - // shortcut in the above if statement. - assert!(self.tablet_accessor.is_single_engine()); - let mut error_collector = TabletErrorCollector::new(); - self.tablet_accessor - .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - let r = db - .get_options_cf(cf) - .and_then(|opt| opt.set_block_cache_capacity(size.0)); - if r.is_err() { - error_collector.add_result(region_id, suffix, r); - } - }); - // Write config to metric - CONFIG_ROCKSDB_GAUGE - .with_label_values(&[cf, "block_cache_size"]) - .set(size.0 as f64); - error_collector.take_result() - } - fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> Result<(), Box> { let mut error_collector = TabletErrorCollector::new(); self.tablet_accessor @@ -1710,9 +1674,11 @@ impl + Send + Sync> ConfigManager for DbConfigMan if let ConfigValue::Module(mut cf_change) = cf_change { // defaultcf -> default let cf_name = &cf_name[..(cf_name.len() - 2)]; - if let Some(v) = cf_change.remove("block_cache_size") { + if cf_change.remove("block_cache_size").is_some() { // currently we can't modify block_cache_size via set_options_cf - self.set_block_cache_size(cf_name, v.into())?; + return Err("shared block cache is enabled, change cache size through \ + block-cache.capacity in storage module instead" + .into()); } if let Some(ConfigValue::Module(titan_change)) = cf_change.remove("titan") { for (name, value) in titan_change { @@ -3228,20 +3194,11 @@ impl TikvConfig { } } else { // Adjust `memory_usage_limit` if necessary. - if self.storage.block_cache.shared { - if let Some(cap) = self.storage.block_cache.capacity { - let limit = (cap.0 as f64 / BLOCK_CACHE_RATE * MEMORY_USAGE_LIMIT_RATE) as u64; - self.memory_usage_limit = Some(ReadableSize(limit)); - } else { - self.memory_usage_limit = Some(Self::suggested_memory_usage_limit()); - } - } else { - let cap = self.rocksdb.defaultcf.block_cache_size.0 - + self.rocksdb.writecf.block_cache_size.0 - + self.rocksdb.lockcf.block_cache_size.0 - + self.raftdb.defaultcf.block_cache_size.0; - let limit = (cap as f64 / BLOCK_CACHE_RATE * MEMORY_USAGE_LIMIT_RATE) as u64; + if let Some(cap) = self.storage.block_cache.capacity { + let limit = (cap.0 as f64 / BLOCK_CACHE_RATE * MEMORY_USAGE_LIMIT_RATE) as u64; self.memory_usage_limit = Some(ReadableSize(limit)); + } else { + self.memory_usage_limit = Some(Self::suggested_memory_usage_limit()); } } @@ -3407,7 +3364,7 @@ impl TikvConfig { // individual block cache sizes. Otherwise use the sum of block cache // size of all column families as the shared cache size. let cache_cfg = &mut self.storage.block_cache; - if cache_cfg.shared && cache_cfg.capacity.is_none() { + if cache_cfg.capacity.is_none() { cache_cfg.capacity = Some(ReadableSize( self.rocksdb.defaultcf.block_cache_size.0 + self.rocksdb.writecf.block_cache_size.0 @@ -4061,6 +4018,7 @@ mod tests { use api_version::{ApiV1, KvFormat}; use case_macros::*; + use engine_rocks::raw::LRUCacheOptions; use engine_traits::{CfOptions as _, DbOptions as _, DummyFactory}; use futures::executor::block_on; use grpcio::ResourceQuota; @@ -4487,7 +4445,6 @@ mod tests { None, cfg.storage.api_version(), ), - true, None, ) .unwrap(); @@ -4504,21 +4461,16 @@ mod tests { rx, ))); - let (shared, cfg_controller) = (cfg.storage.block_cache.shared, ConfigController::new(cfg)); + let cfg_controller = ConfigController::new(cfg); cfg_controller.register( Module::Rocksdb, - Box::new(DbConfigManger::new( - Arc::new(engine.clone()), - DbType::Kv, - shared, - )), + Box::new(DbConfigManger::new(Arc::new(engine.clone()), DbType::Kv)), ); let (scheduler, receiver) = dummy_scheduler(); cfg_controller.register( Module::Storage, Box::new(StorageConfigManger::new( Arc::new(DummyFactory::new(Some(engine), "".to_string())), - shared, scheduler, flow_controller.clone(), storage.get_scheduler(), @@ -4651,7 +4603,6 @@ mod tests { cfg.rocksdb.defaultcf.block_cache_size = ReadableSize::mb(8); cfg.rocksdb.rate_bytes_per_sec = ReadableSize::mb(64); cfg.rocksdb.rate_limiter_auto_tuned = false; - cfg.storage.block_cache.shared = false; cfg.validate().unwrap(); let (storage, cfg_controller, ..) = new_engines::(cfg); let db = storage.get_engine().get_rocksdb(); @@ -4690,7 +4641,6 @@ mod tests { let cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); assert_eq!(cf_opts.get_disable_auto_compactions(), false); assert_eq!(cf_opts.get_target_file_size_base(), ReadableSize::mb(64).0); - assert_eq!(cf_opts.get_block_cache_capacity(), ReadableSize::mb(8).0); let mut change = HashMap::new(); change.insert( @@ -4701,22 +4651,11 @@ mod tests { "rocksdb.defaultcf.target-file-size-base".to_owned(), "32MB".to_owned(), ); - change.insert( - "rocksdb.defaultcf.block-cache-size".to_owned(), - "256MB".to_owned(), - ); cfg_controller.update(change).unwrap(); let cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); assert_eq!(cf_opts.get_disable_auto_compactions(), true); assert_eq!(cf_opts.get_target_file_size_base(), ReadableSize::mb(32).0); - assert_eq!(cf_opts.get_block_cache_capacity(), ReadableSize::mb(256).0); - - // Can not update block cache through storage module - // when shared block cache is disabled - cfg_controller - .update_config("storage.block-cache.capacity", "512MB") - .unwrap_err(); } #[test] @@ -4746,7 +4685,6 @@ mod tests { #[test] fn test_change_shared_block_cache() { let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); - cfg.storage.block_cache.shared = true; cfg.validate().unwrap(); let (storage, cfg_controller, ..) = new_engines::(cfg); let db = storage.get_engine().get_rocksdb(); @@ -4813,7 +4751,6 @@ mod tests { #[test] fn test_change_ttl_check_poll_interval() { let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); - cfg.storage.block_cache.shared = true; cfg.validate().unwrap(); let (_, cfg_controller, mut rx, _) = new_engines::(cfg); @@ -5118,50 +5055,47 @@ mod tests { #[test] fn test_compaction_guard() { + let cache = Cache::new_lru_cache(LRUCacheOptions::new()); // Test comopaction guard disabled. - { - let config = DefaultCfConfig { - target_file_size_base: ReadableSize::mb(16), - enable_compaction_guard: false, - ..Default::default() - }; - let provider = Some(MockRegionInfoProvider::new(vec![])); - let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /* cache */, provider); - assert_eq!( - config.target_file_size_base.0, - cf_opts.get_target_file_size_base() - ); - } + let config = DefaultCfConfig { + target_file_size_base: ReadableSize::mb(16), + enable_compaction_guard: false, + ..Default::default() + }; + let provider = Some(MockRegionInfoProvider::new(vec![])); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, provider); + assert_eq!( + config.target_file_size_base.0, + cf_opts.get_target_file_size_base() + ); + // Test compaction guard enabled but region info provider is missing. - { - let config = DefaultCfConfig { - target_file_size_base: ReadableSize::mb(16), - enable_compaction_guard: true, - ..Default::default() - }; - let provider: Option = None; - let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /* cache */, provider); - assert_eq!( - config.target_file_size_base.0, - cf_opts.get_target_file_size_base() - ); - } + let config = DefaultCfConfig { + target_file_size_base: ReadableSize::mb(16), + enable_compaction_guard: true, + ..Default::default() + }; + let provider: Option = None; + let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, provider); + assert_eq!( + config.target_file_size_base.0, + cf_opts.get_target_file_size_base() + ); + // Test compaction guard enabled. - { - let config = DefaultCfConfig { - target_file_size_base: ReadableSize::mb(16), - enable_compaction_guard: true, - compaction_guard_min_output_file_size: ReadableSize::mb(4), - compaction_guard_max_output_file_size: ReadableSize::mb(64), - ..Default::default() - }; - let provider = Some(MockRegionInfoProvider::new(vec![])); - let cf_opts = build_cf_opt!(config, CF_DEFAULT, None /* cache */, provider); - assert_eq!( - config.compaction_guard_max_output_file_size.0, - cf_opts.get_target_file_size_base() - ); - } + let config = DefaultCfConfig { + target_file_size_base: ReadableSize::mb(16), + enable_compaction_guard: true, + compaction_guard_min_output_file_size: ReadableSize::mb(4), + compaction_guard_max_output_file_size: ReadableSize::mb(64), + ..Default::default() + }; + let provider = Some(MockRegionInfoProvider::new(vec![])); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, provider); + assert_eq!( + config.compaction_guard_max_output_file_size.0, + cf_opts.get_target_file_size_base() + ); } #[test] diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 7e8a1457500..7e36efcb98f 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -24,7 +24,7 @@ use crate::config::{DbConfig, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}; struct FactoryInner { env: Arc, region_info_accessor: Option, - block_cache: Option, + block_cache: Cache, rocksdb_config: Arc, store_path: PathBuf, api_version: ApiVersion, @@ -39,12 +39,17 @@ pub struct KvEngineFactoryBuilder { } impl KvEngineFactoryBuilder { - pub fn new(env: Arc, config: &TikvConfig, store_path: impl Into) -> Self { + pub fn new( + env: Arc, + config: &TikvConfig, + store_path: impl Into, + cache: Cache, + ) -> Self { Self { inner: FactoryInner { env, region_info_accessor: None, - block_cache: None, + block_cache: cache, rocksdb_config: Arc::new(config.rocksdb.clone()), store_path: store_path.into(), api_version: config.storage.api_version(), @@ -61,11 +66,6 @@ impl KvEngineFactoryBuilder { self } - pub fn block_cache(mut self, cache: Cache) -> Self { - self.inner.block_cache = Some(cache); - self - } - pub fn flow_listener(mut self, listener: FlowListener) -> Self { self.inner.flow_listener = Some(listener); self @@ -158,16 +158,10 @@ impl KvEngineFactory { kv_db_opts, kv_cfs_opts, ); - let mut kv_engine = match kv_engine { - Ok(e) => e, - Err(e) => { - error!("failed to create kv engine"; "path" => %tablet_path.display(), "err" => ?e); - return Err(e); - } - }; - let shared_block_cache = self.inner.block_cache.is_some(); - kv_engine.set_shared_block_cache(shared_block_cache); - Ok(kv_engine) + if let Err(e) = &kv_engine { + error!("failed to create kv engine"; "path" => %tablet_path.display(), "err" => ?e); + } + kv_engine } pub fn on_tablet_created(&self, region_id: u64, suffix: u64) { diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs index f370a08e280..a55ebca6555 100644 --- a/src/server/engine_factory_v2.rs +++ b/src/server/engine_factory_v2.rs @@ -237,7 +237,8 @@ impl TabletAccessor for KvEngineFactoryV2 { #[cfg(test)] mod tests { - use engine_traits::{OpenOptions, TabletFactory, CF_WRITE, SPLIT_PREFIX}; + use engine_traits::{OpenOptions, CF_WRITE, SPLIT_PREFIX}; + use tempfile::TempDir; use super::*; use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; @@ -257,18 +258,19 @@ mod tests { }; } - #[test] - fn test_kvengine_factory() { + fn create_test_tablet_factory(name: &'static str) -> (TempDir, KvEngineFactoryBuilder) { let cfg = TEST_CONFIG.clone(); - assert!(cfg.storage.block_cache.shared); let cache = cfg.storage.block_cache.build_shared_cache(); - let dir = test_util::temp_dir("test_kvengine_factory", false); + let dir = test_util::temp_dir(name, false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); - if let Some(cache) = cache { - builder = builder.block_cache(cache); - } + let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path(), cache); + (dir, builder) + } + + #[test] + fn test_kvengine_factory() { + let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory"); let factory = builder.build(); let shared_db = factory.create_shared_db().unwrap(); @@ -307,16 +309,7 @@ mod tests { #[test] fn test_kvengine_factory_root_db_implicit_creation() { - let cfg = TEST_CONFIG.clone(); - assert!(cfg.storage.block_cache.shared); - let cache = cfg.storage.block_cache.build_shared_cache(); - let dir = test_util::temp_dir("test_kvengine_factory", false); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); - if let Some(cache) = cache { - builder = builder.block_cache(cache); - } + let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory"); let factory = builder.build(); // root_db should be created implicitly here @@ -346,16 +339,7 @@ mod tests { #[test] fn test_kvengine_factory_v2() { - let cfg = TEST_CONFIG.clone(); - assert!(cfg.storage.block_cache.shared); - let cache = cfg.storage.block_cache.build_shared_cache(); - let dir = test_util::temp_dir("test_kvengine_factory_v2", false); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); - if let Some(cache) = cache { - builder = builder.block_cache(cache); - } + let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory_v2"); let factory = builder.build_v2(); let tablet = factory @@ -443,16 +427,7 @@ mod tests { #[test] fn test_existed_db_not_in_registry() { - let cfg = TEST_CONFIG.clone(); - assert!(cfg.storage.block_cache.shared); - let cache = cfg.storage.block_cache.build_shared_cache(); - let dir = test_util::temp_dir("test_kvengine_factory_v2", false); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); - if let Some(cache) = cache { - builder = builder.block_cache(cache); - } + let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory_v2"); let factory = builder.build_v2(); let tablet = factory @@ -493,11 +468,7 @@ mod tests { #[test] fn test_get_live_tablets() { - let cfg = TEST_CONFIG.clone(); - let dir = test_util::temp_dir("test_get_live_tablets", false); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path()); + let (_dir, builder) = create_test_tablet_factory("test_get_live_tablets"); let factory = builder.build_v2(); factory .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) diff --git a/src/storage/config.rs b/src/storage/config.rs index 313f86ba048..3501cefa252 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -194,7 +194,7 @@ impl Default for FlowControlConfig { #[serde(rename_all = "kebab-case")] pub struct BlockCacheConfig { #[online_config(skip)] - pub shared: bool, + pub shared: Option, pub capacity: Option, #[online_config(skip)] pub num_shard_bits: i32, @@ -209,7 +209,7 @@ pub struct BlockCacheConfig { impl Default for BlockCacheConfig { fn default() -> BlockCacheConfig { BlockCacheConfig { - shared: true, + shared: None, capacity: None, num_shard_bits: 6, strict_capacity_limit: false, @@ -229,9 +229,9 @@ impl BlockCacheConfig { } } - pub fn build_shared_cache(&self) -> Option { - if !self.shared { - return None; + pub fn build_shared_cache(&self) -> Cache { + if self.shared == Some(false) { + warn!("storage.block-cache.shared is deprecated, cache is always shared."); } let capacity = match self.capacity { None => { @@ -248,7 +248,7 @@ impl BlockCacheConfig { if let Some(allocator) = self.new_memory_allocator() { cache_opts.set_memory_allocator(allocator); } - Some(Cache::new_lru_cache(cache_opts)) + Cache::new_lru_cache(cache_opts) } fn new_memory_allocator(&self) -> Option { diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index de3b13408f0..3cda77ab5a2 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -21,7 +21,6 @@ use crate::{ pub struct StorageConfigManger { tablet_factory: Arc + Send + Sync>, - shared_block_cache: bool, ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, @@ -33,14 +32,12 @@ unsafe impl Sync for StorageConfigManger impl StorageConfigManger { pub fn new( tablet_factory: Arc + Send + Sync>, - shared_block_cache: bool, ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, ) -> Self { StorageConfigManger { tablet_factory, - shared_block_cache, ttl_checker_scheduler, flow_controller, scheduler, @@ -51,9 +48,6 @@ impl StorageConfigManger { impl ConfigManager for StorageConfigManger { fn dispatch(&mut self, mut change: ConfigChange) -> CfgResult<()> { if let Some(ConfigValue::Module(mut block_cache)) = change.remove("block_cache") { - if !self.shared_block_cache { - return Err("shared block cache is disabled".into()); - } if let Some(size) = block_cache.remove("capacity") { if size != ConfigValue::None { let s: ReadableSize = size.into(); diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index f0192372e4b..f02ee31c5f2 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -110,8 +110,7 @@ impl TestEngineBuilder { _ => (*cf, RocksCfOptions::default()), }) .collect(); - let engine = - RocksEngine::new(&path, None, cfs_opts, cache.is_some(), self.io_rate_limiter)?; + let engine = RocksEngine::new(&path, None, cfs_opts, self.io_rate_limiter)?; Ok(engine) } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6f06e55937f..79f48c68a88 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4146,11 +4146,7 @@ mod tests { (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), ]; RocksEngine::new( - &path, - None, - cfs_opts, - cache.is_some(), - None, // io_rate_limiter + &path, None, cfs_opts, None, // io_rate_limiter ) } .unwrap(); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 40c78dfabde..2ea66ef1222 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -262,13 +262,12 @@ fn test_scale_scheduler_pool() { rx, ))); - let cfg_controller = ConfigController::new(cfg.clone()); + let cfg_controller = ConfigController::new(cfg); let (scheduler, _receiver) = dummy_scheduler(); cfg_controller.register( Module::Storage, Box::new(StorageConfigManger::new( Arc::new(DummyFactory::new(Some(kv_engine), "".to_string())), - cfg.storage.block_cache.shared, scheduler, flow_controller, storage.get_scheduler(), diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index ff01788c370..6341f3a9e27 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -684,7 +684,7 @@ fn test_serde_custom_tikv_config() { hard_pending_compaction_bytes_limit: ReadableSize(1), }, block_cache: BlockCacheConfig { - shared: true, + shared: None, capacity: Some(ReadableSize::gb(40)), num_shard_bits: 10, strict_capacity_limit: true, @@ -886,7 +886,6 @@ fn test_do_not_use_unified_readpool_with_legacy_config() { fn test_block_cache_backward_compatible() { let content = read_file_in_project_dir("integrations/config/test-cache-compatible.toml"); let mut cfg: TikvConfig = toml::from_str(&content).unwrap(); - assert!(cfg.storage.block_cache.shared); assert!(cfg.storage.block_cache.capacity.is_none()); cfg.compatible_adjust(); assert!(cfg.storage.block_cache.capacity.is_some()); diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index e5c896238bc..f22538a6f78 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -102,7 +102,6 @@ enable-ttl = true ttl-check-poll-interval = "0s" [storage.block-cache] -shared = true capacity = "40GB" num-shard-bits = 10 strict-capacity-limit = true From 3122786dddb7f85732a5515e1a367adc8865c33c Mon Sep 17 00:00:00 2001 From: hehechen Date: Thu, 8 Dec 2022 11:34:04 +0800 Subject: [PATCH 0394/1149] resolved_ts: pass full safe_ts timestamp to observer (#13897) close tikv/tikv#13896 Signed-off-by: hehechen Co-authored-by: Xinye Tao --- components/raftstore/src/store/util.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index df5f4543f76..41409a49448 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -35,7 +35,7 @@ use tikv_util::{ Either, }; use time::{Duration, Timespec}; -use txn_types::{TimeStamp, WriteBatchFlags}; +use txn_types::WriteBatchFlags; use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; use crate::{coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result}; @@ -1186,11 +1186,7 @@ impl RegionReadProgress { if !core.pause { self.safe_ts.store(ts, AtomicOrdering::Release); // No need to update leader safe ts here. - coprocessor.on_update_safe_ts( - core.region_id, - TimeStamp::new(ts).physical(), - INVALID_TIMESTAMP, - ) + coprocessor.on_update_safe_ts(core.region_id, ts, INVALID_TIMESTAMP) } } } @@ -1232,11 +1228,7 @@ impl RegionReadProgress { self.safe_ts.store(ts, AtomicOrdering::Release); // After region merge, self safe ts may decrease, so leader safe ts should be // reset. - coprocessor.on_update_safe_ts( - core.region_id, - TimeStamp::new(ts).physical(), - TimeStamp::new(ts).physical(), - ) + coprocessor.on_update_safe_ts(core.region_id, ts, ts) } } } @@ -1261,9 +1253,7 @@ impl RegionReadProgress { } } } - let self_phy_ts = TimeStamp::new(self.safe_ts()).physical(); - let leader_phy_ts = TimeStamp::new(rs.get_safe_ts()).physical(); - coprocessor.on_update_safe_ts(leader_info.region_id, self_phy_ts, leader_phy_ts) + coprocessor.on_update_safe_ts(leader_info.region_id, self.safe_ts(), rs.get_safe_ts()) } // whether the provided `LeaderInfo` is same as ours core.leader_info.leader_term == leader_info.term From 3e0b8ddc6ab5fdf80afcce5884021c2015204256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 8 Dec 2022 14:30:04 +0800 Subject: [PATCH 0395/1149] gc_worker: disable gc if have negative ratio (#13904) close tikv/tikv#13909, ref pingcap/tidb#39602 GC would be skipped once the `ratio_threshold` is negative or infinity. Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- src/server/gc_worker/compaction_filter.rs | 16 ++++++++++++++++ src/server/gc_worker/mod.rs | 16 ++++++++++++++++ tests/failpoints/cases/test_table_properties.rs | 2 +- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index bd5896296bb..4c494d6f01f 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -685,6 +685,15 @@ pub fn check_need_gc( context: &CompactionFilterContext, ) -> bool { let check_props = |props: &MvccProperties| -> (bool, bool /* skip_more_checks */) { + // Disable GC directly once the config is negative or +inf. + // Disabling GC is useful in some abnormal scenarios where the transaction model + // would be break (e.g. writes with higher commit TS would be written BEFORE + // writes with lower commit TS, or write data with TS lower than current GC safe + // point). Use this at your own risk. + if ratio_threshold.is_sign_negative() || ratio_threshold.is_infinite() { + return (false, false); + } + if props.min_ts > safe_point { return (false, false); } @@ -970,6 +979,13 @@ pub mod tests { let default_key = Key::from_encoded_slice(b"zkey").append_ts(100.into()); let default_key = default_key.into_encoded(); assert!(raw_engine.get_value(&default_key).unwrap().is_none()); + + // If the ratio threshold is less than 0, GC would be skipped. + must_prewrite_put(&mut engine, b"zkey", &value, b"zkey", 210); + must_commit(&mut engine, b"zkey", 210, 220); + gc_runner.ratio_threshold = Some(-1.0); + gc_runner.safe_point(256).gc(&raw_engine); + must_get(&mut engine, b"zkey", 210, &value); } // Test dirty versions before a deletion mark can be handled correctly. diff --git a/src/server/gc_worker/mod.rs b/src/server/gc_worker/mod.rs index a5b8837cd2e..75b7441fbcb 100644 --- a/src/server/gc_worker/mod.rs +++ b/src/server/gc_worker/mod.rs @@ -26,6 +26,14 @@ pub use crate::storage::{Callback, Error, ErrorInner, Result}; // Returns true if it needs gc. // This is for optimization purpose, does not mean to be accurate. fn check_need_gc(safe_point: TimeStamp, ratio_threshold: f64, props: &MvccProperties) -> bool { + // Disable GC directly once the config is negative or +inf. + // Disabling GC is useful in some abnormal scenarios where the transaction model + // would be break (e.g. writes with higher commit TS would be written BEFORE + // writes with lower commit TS, or write data with TS lower than current GC safe + // point). Use this at your own risk. + if ratio_threshold.is_sign_negative() || ratio_threshold.is_infinite() { + return false; + } // Always GC. if ratio_threshold < 1.0 { return true; @@ -77,6 +85,14 @@ mod tests { props } + #[test] + fn test_check_need_gc() { + let props = MvccProperties::default(); + assert!(!check_need_gc(TimeStamp::max(), -1.0, &props)); + assert!(!check_need_gc(TimeStamp::max(), f64::INFINITY, &props)); + assert!(check_need_gc(TimeStamp::max(), 0.9, &props)); + } + #[test] fn test_need_gc() { let path = tempfile::Builder::new() diff --git a/tests/failpoints/cases/test_table_properties.rs b/tests/failpoints/cases/test_table_properties.rs index 905bcfbd690..536149d48b5 100644 --- a/tests/failpoints/cases/test_table_properties.rs +++ b/tests/failpoints/cases/test_table_properties.rs @@ -91,7 +91,7 @@ fn test_check_need_gc() { // Set ratio_threshold, let (props.num_versions as f64 > props.num_rows as // f64 * ratio_threshold) return true - gc_runner.ratio_threshold = Option::Some(f64::MIN); + gc_runner.ratio_threshold = Option::Some(0.0f64); // is_bottommost_level = false do_gc(&raw_engine, 1, &mut gc_runner, &dir); From daa0c8f47ec11e1e4fc5acd543a113b95a6c3551 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Thu, 8 Dec 2022 22:26:04 +0800 Subject: [PATCH 0396/1149] =?UTF-8?q?log-backup=EF=BC=9ARetry=20to=20get?= =?UTF-8?q?=20tasks=20with=20etcd-cli=20from=20etcd=20when=20TiKV=20starts?= =?UTF-8?q?.=20(#13907)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#13898 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 4 +- components/backup-stream/src/endpoint.rs | 55 ++++++++++++++++++- .../backup-stream/src/metadata/client.rs | 5 ++ components/backup-stream/src/metadata/mod.rs | 2 +- .../src/metadata/store/lazy_etcd.rs | 7 ++- components/backup-stream/src/metadata/test.rs | 4 +- src/import/sst_service.rs | 2 +- 7 files changed, 70 insertions(+), 9 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index e316b6e05c3..8c3de3d34ce 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -458,7 +458,7 @@ where } #[cfg(test)] -mod tests { +pub mod tests { use std::{ assert_matches, collections::HashMap, @@ -510,7 +510,7 @@ mod tests { assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 24); } - struct MockPdClient { + pub struct MockPdClient { safepoint: RwLock>, } diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 2ebeee2ea66..ec6b0dd41fb 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -273,7 +273,22 @@ where meta_client: MetadataClient, scheduler: Scheduler, ) -> Result<()> { - let tasks = meta_client.get_tasks().await?; + let tasks; + loop { + let r = meta_client.get_tasks().await; + match r { + Ok(t) => { + tasks = t; + break; + } + Err(e) => { + e.report("failed to get backup stream task"); + tokio::time::sleep(Duration::from_secs(5)).await; + continue; + } + } + } + for task in tasks.inner { info!("backup stream watch task"; "task" => ?task); if task.is_paused { @@ -1165,3 +1180,41 @@ where self.run_task(task) } } + +#[cfg(test)] +mod test { + use engine_rocks::RocksEngine; + use raftstore::coprocessor::region_info_accessor::MockRegionInfoProvider; + use test_raftstore::MockRaftStoreRouter; + use tikv_util::worker::dummy_scheduler; + + use crate::{ + checkpoint_manager::tests::MockPdClient, endpoint, endpoint::Endpoint, metadata::test, Task, + }; + + #[tokio::test] + async fn test_start() { + let cli = test::test_meta_cli(); + let (sched, mut rx) = dummy_scheduler(); + let task = test::simple_task("simple_3"); + cli.insert_task_with_range(&task, &[]).await.unwrap(); + + fail::cfg("failed_to_get_tasks", "1*return").unwrap(); + Endpoint::<_, MockRegionInfoProvider, RocksEngine, MockRaftStoreRouter, MockPdClient>::start_and_watch_tasks(cli, sched).await.unwrap(); + fail::remove("failed_to_get_tasks"); + + let _t1 = rx.recv().unwrap(); + let t2 = rx.recv().unwrap(); + + match t2 { + Task::WatchTask(t) => match t { + endpoint::TaskOp::AddTask(t) => { + assert_eq!(t.info, task.info); + assert!(!t.is_paused); + } + _ => panic!("not match TaskOp type"), + }, + _ => panic!("not match Task type {:?}", t2), + } + } +} diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index b7f1fcb2025..2c0fd2577fc 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -353,6 +353,11 @@ impl MetadataClient { defer! { super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_fetch"]).observe(now.saturating_elapsed().as_secs_f64()) } + fail::fail_point!("failed_to_get_tasks", |_| { + Err(Error::MalformedMetadata( + "faild to connect etcd client".to_string(), + )) + }); let snap = self.meta_store.snapshot().await?; let kvs = snap.get(Keys::Prefix(MetaKey::tasks())).await?; diff --git a/components/backup-stream/src/metadata/mod.rs b/components/backup-stream/src/metadata/mod.rs index 4c387533e49..a616ace2dc6 100644 --- a/components/backup-stream/src/metadata/mod.rs +++ b/components/backup-stream/src/metadata/mod.rs @@ -4,7 +4,7 @@ mod client; pub mod keys; mod metrics; pub mod store; -mod test; +pub mod test; pub use client::{Checkpoint, CheckpointProvider, MetadataClient, MetadataEvent, StreamTask}; pub use store::lazy_etcd::{ConnectionConfig, LazyEtcdClient}; diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 6fc3a5332ea..88d44b09252 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -4,7 +4,10 @@ use std::{sync::Arc, time::Duration}; use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; use futures::Future; -use tikv_util::stream::{RetryError, RetryExt}; +use tikv_util::{ + info, + stream::{RetryError, RetryExt}, +}; use tokio::sync::OnceCell; use super::{etcd::EtcdSnapshot, EtcdStore, MetaStore}; @@ -113,7 +116,7 @@ where use futures::TryFutureExt; let r = tikv_util::stream::retry_ext( move || action().err_into::(), - RetryExt::default().with_fail_hook(|err| println!("meet error {:?}", err)), + RetryExt::default().with_fail_hook(|err| info!("retry it"; "err" => ?err)), ) .await; r.map_err(|err| err.0.into()) diff --git a/components/backup-stream/src/metadata/test.rs b/components/backup-stream/src/metadata/test.rs index ec2a30efbf3..a57722089bf 100644 --- a/components/backup-stream/src/metadata/test.rs +++ b/components/backup-stream/src/metadata/test.rs @@ -16,11 +16,11 @@ use crate::{ metadata::{store::SlashEtcStore, MetadataEvent}, }; -fn test_meta_cli() -> MetadataClient { +pub fn test_meta_cli() -> MetadataClient { MetadataClient::new(SlashEtcStore::default(), 42) } -fn simple_task(name: &str) -> StreamTask { +pub fn simple_task(name: &str) -> StreamTask { let mut task = StreamTask::default(); task.info.set_name(name.to_owned()); task.info.set_start_ts(1); diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 283f8f802e3..bdb552e8923 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1064,7 +1064,7 @@ where Box::new(move |k: Vec, v: Vec| { // Need to skip the empty key/value that could break the transaction or cause // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. - if k.is_empty() || v.is_empty() { + if k.is_empty() || (!is_delete && v.is_empty()) { return; } From 56ed2f62dcf668f6b2ac720002d9d05dc0671fcf Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 9 Dec 2022 12:48:05 +0800 Subject: [PATCH 0397/1149] metrics: fix coprocessor cpu alert (#10878) close tikv/tikv#13918 Signed-off-by: tabokie Signed-off-by: Xinye Tao --- metrics/alertmanager/tikv.rules.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index 9b25637d14f..19f8085866e 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -253,17 +253,17 @@ groups: value: '{{ $value }}' summary: TiKV pending {{ $labels.type }} request is high - - alert: TiKV_batch_request_snapshot_nums - expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"cop_.*"}[1m])) by (instance) / ( count(tikv_thread_cpu_seconds_total{name=~"cop_.*"}) * 0.9 ) / count(count(tikv_thread_cpu_seconds_total) by (instance)) > 0 + - alert: TiKV_coprocessor_cpu_util + expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"cop_.*"}[1m])) by (instance) / (count(tikv_thread_cpu_seconds_total{name=~"cop_.*"}) by (instance) * 0.9) >= 1 for: 1m labels: env: ENV_LABELS_ENV level: warning - expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"cop_.*"}[1m])) by (instance) / ( count(tikv_thread_cpu_seconds_total{name=~"cop_.*"}) * 0.9 ) / count(count(tikv_thread_cpu_seconds_total) by (instance)) > 0 + expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"cop_.*"}[1m])) by (instance) / (count(tikv_thread_cpu_seconds_total{name=~"cop_.*"}) by (instance) * 0.9) >= 1 annotations: description: 'cluster: ENV_LABELS_ENV, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' value: '{{ $value }}' - summary: TiKV batch request snapshot nums is high + summary: TiKV coprocessor CPU utilization exceeds 90% - alert: TiKV_pending_task expr: sum(tikv_worker_pending_task_total) BY (instance,name) > 1000 From 97585fb86f1437438ff7e5514965d48a427bb94c Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 8 Dec 2022 21:52:04 -0800 Subject: [PATCH 0398/1149] Improve readability through syntax sugar (#13899) ref tikv/tikv#13908 Nit: use lib API to improve readability Signed-off-by: Yang Zhang Co-authored-by: Xinye Tao --- components/raftstore/src/store/fsm/peer.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 9460daf812d..47c9357e1c4 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5028,13 +5028,12 @@ where // ReadIndex can be processed on the replicas. let is_read_index_request = request.len() == 1 && request[0].get_cmd_type() == CmdType::ReadIndex; - let mut read_only = true; - for r in msg.get_requests() { - match r.get_cmd_type() { - CmdType::Get | CmdType::Snap | CmdType::ReadIndex => (), - _ => read_only = false, - } - } + let read_only = msg.get_requests().iter().all(|r| { + matches!( + r.get_cmd_type(), + CmdType::Get | CmdType::Snap | CmdType::ReadIndex, + ) + }); let region_id = self.region_id(); let allow_replica_read = read_only && msg.get_header().get_replica_read(); let flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); From 0921ad0d3b6be791f067e9a45b74cffccf9d5810 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 9 Dec 2022 15:22:04 +0800 Subject: [PATCH 0399/1149] *: clean up tablet factory (#13912) ref tikv/tikv#12842 In the past, there are 5 types of tablet factory: - Dummy factory - Test Factory for v1 and v2 - Production factory for v1 and v2. Tablet factory also requires TabletAccessor trait, so there is 10 implementations. If there are bugs, we need to change at least 5 places, and 10 places in worst cases. This PR simplifies the code by limiting the scope of engine factory, so it only needs to provide create, destroy and exist check three functions. All other functions are moved to type `TabletRegistry`. Also `TabletFactory` is only for v2 usage, so there will be only 3 different tablet factories and no accessor trait. It should be a lot easier to adjust the behavior of tablet factory. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/engine_panic/src/engine.rs | 12 +- components/engine_rocks/src/engine.rs | 14 +- components/engine_test/src/lib.rs | 315 +---------- components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/engine.rs | 343 +----------- components/engine_traits/src/lib.rs | 2 + components/engine_traits/src/tablet.rs | 398 ++++++++++++++ components/raftstore-v2/src/batch/store.rs | 30 +- components/raftstore-v2/src/fsm/apply.rs | 9 +- components/raftstore-v2/src/fsm/peer.rs | 6 +- components/raftstore-v2/src/fsm/store.rs | 28 +- components/raftstore-v2/src/lib.rs | 1 - .../src/operation/command/admin/mod.rs | 2 +- .../src/operation/command/admin/split.rs | 59 +-- .../raftstore-v2/src/operation/command/mod.rs | 5 +- components/raftstore-v2/src/operation/life.rs | 2 +- components/raftstore-v2/src/operation/pd.rs | 2 +- .../raftstore-v2/src/operation/query/lease.rs | 2 +- .../raftstore-v2/src/operation/query/local.rs | 65 ++- .../raftstore-v2/src/operation/ready/mod.rs | 4 +- .../src/operation/ready/snapshot.rs | 34 +- components/raftstore-v2/src/raft/apply.rs | 17 +- components/raftstore-v2/src/raft/peer.rs | 29 +- components/raftstore-v2/src/raft/storage.rs | 36 +- components/raftstore-v2/src/router/imp.rs | 10 +- components/raftstore-v2/src/tablet.rs | 102 ---- components/raftstore-v2/src/worker/pd/mod.rs | 8 +- .../src/worker/pd/store_heartbeat.rs | 4 +- .../tests/failpoints/test_basic_write.rs | 10 +- .../tests/integrations/cluster.rs | 61 +-- .../tests/integrations/test_basic_write.rs | 12 +- .../tests/integrations/test_conf_change.rs | 12 +- .../tests/integrations/test_life.rs | 20 +- .../tests/integrations/test_pd_heartbeat.rs | 2 +- .../tests/integrations/test_read.rs | 12 +- .../tests/integrations/test_split.rs | 10 +- .../tests/integrations/test_status.rs | 2 +- components/server/src/server.rs | 89 ++-- components/test_raftstore/src/util.rs | 9 +- src/config/configurable.rs | 141 +++++ src/{config.rs => config/mod.rs} | 146 ++---- src/server/engine_factory.rs | 261 ++++------ src/server/engine_factory_v2.rs | 487 ------------------ src/server/mod.rs | 1 - src/storage/config_manager.rs | 38 +- .../flow_controller/tablet_flow_controller.rs | 121 +++-- tests/failpoints/cases/test_storage.rs | 3 +- 48 files changed, 1027 insertions(+), 1951 deletions(-) create mode 100644 components/engine_traits/src/tablet.rs delete mode 100644 components/raftstore-v2/src/tablet.rs create mode 100644 src/config/configurable.rs rename src/{config.rs => config/mod.rs} (97%) delete mode 100644 src/server/engine_factory_v2.rs diff --git a/Cargo.lock b/Cargo.lock index eb5145959af..a7d72121032 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1626,6 +1626,7 @@ name = "engine_traits" version = "0.0.1" dependencies = [ "case_macros", + "collections", "error_code", "fail", "file_system", diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index a296c3df9d8..6bca7d46485 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -2,7 +2,7 @@ use engine_traits::{ IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SyncMutable, - TabletAccessor, WriteOptions, + WriteOptions, }; use crate::{db_vector::PanicDbVector, snapshot::PanicSnapshot, write_batch::PanicWriteBatch}; @@ -24,16 +24,6 @@ impl KvEngine for PanicEngine { } } -impl TabletAccessor for PanicEngine { - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &PanicEngine)) { - panic!() - } - - fn is_single_engine(&self) -> bool { - panic!() - } -} - impl Peekable for PanicEngine { type DbVector = PanicDbVector; diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 720a92a8bdd..0c37120e7fc 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -2,9 +2,7 @@ use std::{any::Any, sync::Arc}; -use engine_traits::{ - IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, TabletAccessor, -}; +use engine_traits::{IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable}; use rocksdb::{DBIterator, Writable, DB}; use crate::{ @@ -99,16 +97,6 @@ impl KvEngine for RocksEngine { } } -impl TabletAccessor for RocksEngine { - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { - f(0, 0, self); - } - - fn is_single_engine(&self) -> bool { - true - } -} - impl Iterable for RocksEngine { type Iterator = RocksEngineIterator; diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 77bd2d3be7c..605feedc7bd 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -76,12 +76,8 @@ pub mod raft { /// Types and constructors for the "kv" engine pub mod kv { - use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, - }; + use std::path::Path; - use collections::HashMap; #[cfg(feature = "test-engine-kv-panic")] pub use engine_panic::{ PanicEngine as KvTestEngine, PanicEngineIterator as KvTestEngineIterator, @@ -92,11 +88,7 @@ pub mod kv { RocksEngine as KvTestEngine, RocksEngineIterator as KvTestEngineIterator, RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; - use engine_traits::{ - CfOptions, CfOptionsExt, MiscExt, OpenOptions, Result, TabletAccessor, TabletFactory, - CF_DEFAULT, - }; - use tikv_util::box_err; + use engine_traits::{MiscExt, Result, TabletFactory}; use crate::ctor::{CfOptions as KvTestCfOptions, DbOptions, KvEngineConstructorExt}; @@ -112,317 +104,40 @@ pub mod kv { KvTestEngine::new_kv_engine_opt(path, db_opt, cfs_opts) } - const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; + const TOMBSTONE_SUFFIX: &str = ".tombstone"; #[derive(Clone)] pub struct TestTabletFactory { - root_path: PathBuf, db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>, - root_db: Arc>>, } impl TestTabletFactory { - pub fn new( - root_path: &Path, - db_opt: DbOptions, - cf_opts: Vec<(&'static str, KvTestCfOptions)>, - ) -> Self { - let factory = Self { - root_path: root_path.to_path_buf(), - db_opt, - cf_opts, - root_db: Arc::new(Mutex::default()), - }; - let tablet_path = factory.tablets_path(); - if !tablet_path.exists() { - std::fs::create_dir_all(tablet_path).unwrap(); - } - factory + pub fn new(db_opt: DbOptions, cf_opts: Vec<(&'static str, KvTestCfOptions)>) -> Self { + Self { db_opt, cf_opts } } + } - fn create_tablet(&self, tablet_path: &Path) -> Result { + impl TabletFactory for TestTabletFactory { + fn open_tablet(&self, _id: u64, _suffix: Option, path: &Path) -> Result { KvTestEngine::new_kv_engine_opt( - tablet_path.to_str().unwrap(), + path.to_str().unwrap(), self.db_opt.clone(), self.cf_opts.clone(), ) } - } - - impl TabletFactory for TestTabletFactory { - fn create_shared_db(&self) -> Result { - let tablet_path = self.tablet_path(0, 0); - let tablet = self.create_tablet(&tablet_path)?; - let mut root_db = self.root_db.lock().unwrap(); - root_db.replace(tablet.clone()); - Ok(tablet) - } - - /// See the comment above the same name method in KvEngineFactory - fn open_tablet( - &self, - _id: u64, - _suffix: Option, - options: OpenOptions, - ) -> Result { - if let Some(db) = self.root_db.lock().unwrap().as_ref() { - if options.create_new() { - return Err(box_err!("root tablet {} already exists", db.path())); - } - return Ok(db.clone()); - } - // No need for mutex protection here since root_db creation only occurs at - // tikv bootstrap time when there is no racing issue. - if options.create_new() || options.create() { - return self.create_shared_db(); - } - - Err(box_err!("root tablet has not been initialized")) - } - - fn open_tablet_raw( - &self, - _path: &Path, - _id: u64, - _suffix: u64, - _options: OpenOptions, - ) -> Result { - self.create_shared_db() - } - - fn exists_raw(&self, _path: &Path) -> bool { - false - } - - #[inline] - fn tablet_path_with_prefix(&self, _prefix: &str, _id: u64, _suffix: u64) -> PathBuf { - self.root_path.join("db") - } - #[inline] - fn tablets_path(&self) -> PathBuf { - Path::new(&self.root_path).join("tablets") - } - - #[inline] - fn destroy_tablet(&self, _id: u64, _suffix: u64) -> engine_traits::Result<()> { - Ok(()) - } - - fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { - let db = self.root_db.lock().unwrap(); - let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(capacity)?; + fn destroy_tablet(&self, _id: u64, _suffix: Option, path: &Path) -> Result<()> { + let tombstone_path = path.join(TOMBSTONE_SUFFIX); + std::fs::remove_dir_all(&tombstone_path)?; + std::fs::rename(path, &tombstone_path)?; + std::fs::remove_dir_all(tombstone_path)?; Ok(()) } - } - - impl TabletAccessor for TestTabletFactory { - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &KvTestEngine)) { - let db = self.root_db.lock().unwrap(); - let db = db.as_ref().unwrap(); - f(0, 0, db); - } - - fn is_single_engine(&self) -> bool { - true - } - } - - #[derive(Clone)] - pub struct TestTabletFactoryV2 { - inner: TestTabletFactory, - // region_id -> (tablet, tablet_suffix) - registry: Arc>>, - } - - impl TestTabletFactoryV2 { - pub fn new( - root_path: &Path, - db_opt: DbOptions, - cf_opts: Vec<(&'static str, KvTestCfOptions)>, - ) -> Self { - Self { - inner: TestTabletFactory::new(root_path, db_opt, cf_opts), - registry: Arc::default(), - } - } - } - - impl TabletFactory for TestTabletFactoryV2 { - /// See the comment above the same name method in KvEngineFactoryV2 - fn open_tablet( - &self, - id: u64, - suffix: Option, - mut options: OpenOptions, - ) -> Result { - if options.create_new() && suffix.is_none() { - return Err(box_err!( - "suffix should be provided when creating new tablet" - )); - } - - if options.create_new() || options.create() { - options = options.set_cache_only(false); - } - - let mut reg = self.registry.lock().unwrap(); - if let Some(suffix) = suffix { - if let Some((cached_tablet, cached_suffix)) = reg.get(&id) && *cached_suffix == suffix { - // Target tablet exist in the cache - if options.create_new() { - return Err(box_err!("region {} {} already exists", id, cached_tablet.path())); - } - return Ok(cached_tablet.clone()); - } else if !options.cache_only() { - let tablet_path = self.tablet_path(id, suffix); - let tablet = self.open_tablet_raw(&tablet_path, id, suffix, options.clone())?; - if !options.skip_cache() { - reg.insert(id, (tablet.clone(), suffix)); - } - return Ok(tablet); - } - } else if let Some((tablet, _)) = reg.get(&id) { - return Ok(tablet.clone()); - } - - Err(box_err!( - "tablet with region id {} suffix {:?} does not exist", - id, - suffix - )) - } - - fn open_tablet_raw( - &self, - path: &Path, - id: u64, - _suffix: u64, - options: OpenOptions, - ) -> Result { - let engine_exist = KvTestEngine::exists(path.to_str().unwrap_or_default()); - // Even though neither options.create nor options.create_new are true, if the - // tablet files already exists, we will open it by calling - // inner.create_tablet. In this case, the tablet exists but not in the cache - // (registry). - if !options.create() && !options.create_new() && !engine_exist { - return Err(box_err!( - "path {} does not have db", - path.to_str().unwrap_or_default() - )); - }; - - if options.create_new() && engine_exist { - return Err(box_err!( - "region {} {} already exists", - id, - path.to_str().unwrap() - )); - } - - self.inner.create_tablet(path) - } - #[inline] - fn create_shared_db(&self) -> Result { - self.open_tablet(0, Some(0), OpenOptions::default().set_create_new(true)) - } - - #[inline] - fn exists_raw(&self, path: &Path) -> bool { + fn exists(&self, path: &Path) -> bool { KvTestEngine::exists(path.to_str().unwrap_or_default()) } - - #[inline] - fn tablets_path(&self) -> PathBuf { - self.inner.root_path.join("tablets") - } - - #[inline] - fn tablet_path_with_prefix(&self, prefix: &str, id: u64, suffix: u64) -> PathBuf { - self.inner - .root_path - .join(format!("tablets/{}{}_{}", prefix, id, suffix)) - } - - #[inline] - fn mark_tombstone(&self, region_id: u64, suffix: u64) { - let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); - // When the full directory path does not exsit, create will return error and in - // this case, we just ignore it. - let _ = std::fs::File::create(path); - { - let mut reg = self.registry.lock().unwrap(); - if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { - reg.insert(region_id, (cached_tablet, cached_suffix)); - } - } - } - - #[inline] - fn is_tombstoned(&self, region_id: u64, suffix: u64) -> bool { - self.tablet_path(region_id, suffix) - .join(TOMBSTONE_MARK) - .exists() - } - - #[inline] - fn destroy_tablet(&self, region_id: u64, suffix: u64) -> engine_traits::Result<()> { - let path = self.tablet_path(region_id, suffix); - { - let mut reg = self.registry.lock().unwrap(); - if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { - reg.insert(region_id, (cached_tablet, cached_suffix)); - } - } - let _ = std::fs::remove_dir_all(path); - Ok(()) - } - - #[inline] - fn load_tablet(&self, path: &Path, region_id: u64, suffix: u64) -> Result { - { - let reg = self.registry.lock().unwrap(); - if let Some((db, db_suffix)) = reg.get(®ion_id) && *db_suffix == suffix { - return Err(box_err!("region {} {} already exists", region_id, db.path())); - } - } - - let db_path = self.tablet_path(region_id, suffix); - std::fs::rename(path, db_path)?; - self.open_tablet( - region_id, - Some(suffix), - OpenOptions::default().set_create(true), - ) - } - - fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { - let reg = self.registry.lock().unwrap(); - // pick up any tablet and set the shared block cache capacity - if let Some((_id, (tablet, _suffix))) = (*reg).iter().next() { - let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(capacity)?; - } - Ok(()) - } - } - - impl TabletAccessor for TestTabletFactoryV2 { - #[inline] - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &KvTestEngine)) { - let reg = self.registry.lock().unwrap(); - for (id, (tablet, suffix)) in &*reg { - f(*id, *suffix, tablet) - } - } - - // it have multi tablets. - fn is_single_engine(&self) -> bool { - false - } } } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index d38962e71c9..fcfcbdb2799 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -9,6 +9,7 @@ failpoints = ["fail/failpoints"] [dependencies] case_macros = { workspace = true } +collections = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 55ab5d63caa..e12ea074015 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -1,14 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - fmt::Debug, - io::Write, - path::{Path, PathBuf}, - str, - vec::Vec, -}; - -use tikv_util::error; +use std::{fmt::Debug, str}; use crate::*; @@ -74,336 +66,3 @@ pub trait KvEngine: true } } - -/// TabletAccessor is the trait to access all the tablets with provided accessor -/// -/// For single rocksdb instance, it essentially accesses the global kvdb with -/// the accessor For multi rocksdb instances, it accesses all the tablets with -/// the accessor -pub trait TabletAccessor { - /// Loop visit all opened tablets by the specified function. - fn for_each_opened_tablet(&self, _f: &mut (dyn FnMut(u64, u64, &EK))); - - /// return true if it's single engine; - /// return false if it's a multi-tablet factory; - fn is_single_engine(&self) -> bool; -} - -/// max error count to log -const MAX_ERROR_COUNT: u32 = 5; - -/// TabletErrorCollector is the facility struct to handle errors when using -/// TabletAccessor::for_each_opened_tablet -/// -/// It will choose the last failed result as the final result, meanwhile logging -/// errors up to MAX_ERROR_COUNT. -pub struct TabletErrorCollector { - errors: Vec, - max_error_count: u32, - error_count: u32, - result: std::result::Result<(), Box>, -} - -impl TabletErrorCollector { - pub fn new() -> Self { - Self { - errors: vec![], - max_error_count: MAX_ERROR_COUNT, - error_count: 0, - result: Ok(()), - } - } - - pub fn add_result(&mut self, region_id: u64, suffix: u64, result: Result<()>) { - if result.is_ok() { - return; - } - self.result = Err(Box::from(result.err().unwrap())); - self.error_count += 1; - if self.error_count > self.max_error_count { - return; - } - writeln!( - &mut self.errors, - "Tablet {}_{} encountered error: {:?}.", - region_id, suffix, self.result - ) - .unwrap(); - } - - fn flush_error(&self) { - if self.error_count > 0 { - error!( - "Total count {}. Sample errors: {}", - self.error_count, - str::from_utf8(&self.errors).unwrap() - ); - } - } - - pub fn take_result(&mut self) -> std::result::Result<(), Box> { - std::mem::replace(&mut self.result, Ok(())) - } - - pub fn get_error_count(&self) -> u32 { - self.error_count - } -} - -impl Default for TabletErrorCollector { - fn default() -> Self { - Self::new() - } -} - -impl Drop for TabletErrorCollector { - fn drop(&mut self) { - self.flush_error() - } -} - -/// OpenOptionsn is used for specifiying the way of opening a tablet. -#[derive(Default, Clone)] -pub struct OpenOptions { - // create tablet if non-exist - create: bool, - create_new: bool, - read_only: bool, - cache_only: bool, - skip_cache: bool, -} - -impl OpenOptions { - /// Sets the option to create a tablet, or open it if it already exists. - pub fn set_create(mut self, create: bool) -> Self { - self.create = create; - self - } - - /// Sets the option to create a new tablet, failing if it already exists. - pub fn set_create_new(mut self, create_new: bool) -> Self { - self.create_new = create_new; - self - } - - /// Sets the option for read only - pub fn set_read_only(mut self, read_only: bool) -> Self { - self.read_only = read_only; - self - } - - /// Sets the option for only reading from cache. - pub fn set_cache_only(mut self, cache_only: bool) -> Self { - self.cache_only = cache_only; - self - } - - /// Sets the option to open a tablet without updating the cache. - pub fn set_skip_cache(mut self, skip_cache: bool) -> Self { - self.skip_cache = skip_cache; - self - } - - pub fn create(&self) -> bool { - self.create - } - - pub fn create_new(&self) -> bool { - self.create_new - } - - pub fn read_only(&self) -> bool { - self.read_only - } - - pub fn cache_only(&self) -> bool { - self.cache_only - } - - pub fn skip_cache(&self) -> bool { - self.skip_cache - } -} - -pub const SPLIT_PREFIX: &str = "split_"; -pub const MERGE_PREFIX: &str = "merge_"; - -/// A factory trait to create new engine. -// It should be named as `EngineFactory` for consistency, but we are about to -// rename engine to tablet, so always use tablet for new traits/types. -pub trait TabletFactory: TabletAccessor + Send + Sync { - /// Open the tablet with id and suffix according to the OpenOptions. - /// - /// The id is likely the region Id, the suffix could be the current raft log - /// index. They together could specify a unique path for a region's - /// tablet. The reason to have suffix is that we can keep more than one - /// tablet for a region. - fn open_tablet(&self, id: u64, suffix: Option, options: OpenOptions) -> Result; - - /// Open tablet by raw path without updating cache. - fn open_tablet_raw( - &self, - path: &Path, - id: u64, - suffix: u64, - options: OpenOptions, - ) -> Result; - - /// Create the shared db for v1 - fn create_shared_db(&self) -> Result; - - /// Destroy the tablet and its data - fn destroy_tablet(&self, id: u64, suffix: u64) -> Result<()>; - - /// Check if the tablet with specified id/suffix exists - #[inline] - fn exists(&self, id: u64, suffix: u64) -> bool { - self.exists_raw(&self.tablet_path(id, suffix)) - } - - /// Check if the tablet with specified path exists - fn exists_raw(&self, path: &Path) -> bool; - - /// Get the tablet path by id and suffix - fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { - self.tablet_path_with_prefix("", id, suffix) - } - - /// Get the tablet path by id and suffix - /// - /// Used in special situations - /// Ex: split/merge. - fn tablet_path_with_prefix(&self, prefix: &str, id: u64, suffix: u64) -> PathBuf; - - /// Tablets root path - fn tablets_path(&self) -> PathBuf; - - /// Load the tablet from path for id and suffix--for scenarios such as - /// applying snapshot - fn load_tablet(&self, _path: &Path, _id: u64, _suffix: u64) -> Result { - unimplemented!(); - } - - /// Mark the tablet with specified id and suffix tombostone - fn mark_tombstone(&self, _id: u64, _suffix: u64) { - unimplemented!(); - } - - /// Check if the tablet with specified id and suffix tombostone - fn is_tombstoned(&self, _region_id: u64, _suffix: u64) -> bool { - unimplemented!(); - } - - fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()>; -} - -pub struct DummyFactory -where - EK: CfOptionsExt + Clone + Send + 'static, -{ - pub engine: Option, - pub root_path: String, -} - -impl TabletFactory for DummyFactory -where - EK: CfOptionsExt + Clone + Send + Sync + 'static, -{ - fn create_shared_db(&self) -> Result { - Ok(self.engine.as_ref().unwrap().clone()) - } - - fn open_tablet(&self, _id: u64, _suffix: Option, _options: OpenOptions) -> Result { - Ok(self.engine.as_ref().unwrap().clone()) - } - - fn open_tablet_raw( - &self, - _path: &Path, - _id: u64, - _suffix: u64, - _options: OpenOptions, - ) -> Result { - Ok(self.engine.as_ref().unwrap().clone()) - } - - fn destroy_tablet(&self, _id: u64, _suffix: u64) -> Result<()> { - Ok(()) - } - - fn exists_raw(&self, _path: &Path) -> bool { - true - } - - fn tablet_path_with_prefix(&self, _prefix: &str, _id: u64, _suffix: u64) -> PathBuf { - PathBuf::from(&self.root_path) - } - - fn tablets_path(&self) -> PathBuf { - PathBuf::from(&self.root_path) - } - - fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { - let opt = self - .engine - .as_ref() - .unwrap() - .get_options_cf(CF_DEFAULT) - .unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(capacity) - } -} - -impl TabletAccessor for DummyFactory -where - EK: CfOptionsExt + Clone + Send + 'static, -{ - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &EK)) { - if let Some(engine) = &self.engine { - f(0, 0, engine); - } - } - - fn is_single_engine(&self) -> bool { - true - } -} - -impl DummyFactory -where - EK: CfOptionsExt + Clone + Send + 'static, -{ - pub fn new(engine: Option, root_path: String) -> DummyFactory { - DummyFactory { engine, root_path } - } -} - -impl Default for DummyFactory { - fn default() -> Self { - Self::new(None, "/tmp".to_string()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_tablet_error_collector_ok() { - let mut err = TabletErrorCollector::new(); - err.add_result(1, 1, Ok(())); - err.take_result().unwrap(); - assert_eq!(err.get_error_count(), 0); - } - - #[test] - fn test_tablet_error_collector_err() { - let mut err = TabletErrorCollector::new(); - err.add_result(1, 1, Ok(())); - err.add_result(1, 1, Err(Status::with_code(Code::Aborted).into())); - err.add_result(1, 1, Err(Status::with_code(Code::NotFound).into())); - err.add_result(1, 1, Ok(())); - err.take_result().unwrap_err(); - assert_eq!(err.get_error_count(), 2); - } -} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index b9cf8847751..6a140230fd5 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -294,6 +294,8 @@ mod sst_partitioner; pub use crate::sst_partitioner::*; mod range_properties; pub use crate::{mvcc_properties::*, range_properties::*}; +mod tablet; +pub use tablet::*; mod ttl_properties; pub use crate::ttl_properties::*; mod perf_context; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs new file mode 100644 index 00000000000..988cd343fe3 --- /dev/null +++ b/components/engine_traits/src/tablet.rs @@ -0,0 +1,398 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + path::{Path, PathBuf}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, +}; + +use collections::HashMap; +use tikv_util::box_err; + +use crate::{Error, Result}; + +#[derive(Debug)] +struct LatestTablet { + data: Mutex>, + version: AtomicU64, +} + +/// Tablet may change during split, merge and applying snapshot. So we need a +/// shared value to reflect the latest tablet. `CachedTablet` provide cache that +/// can speed up common access. +#[derive(Clone, Debug)] +pub struct CachedTablet { + latest: Arc>, + cache: Option, + version: u64, +} + +impl CachedTablet { + #[inline] + fn new(data: Option) -> Self { + CachedTablet { + latest: Arc::new(LatestTablet { + data: Mutex::new(data.clone()), + version: AtomicU64::new(0), + }), + cache: data, + version: 0, + } + } + + pub fn set(&mut self, data: EK) { + self.version = { + let mut latest_data = self.latest.data.lock().unwrap(); + *latest_data = Some(data.clone()); + self.latest.version.fetch_add(1, Ordering::Relaxed) + 1 + }; + self.cache = Some(data); + } + + /// Get the tablet from cache without checking if it's up to date. + #[inline] + pub fn cache(&self) -> Option<&EK> { + self.cache.as_ref() + } + + /// Get the latest tablet. + #[inline] + pub fn latest(&mut self) -> Option<&EK> { + if self.latest.version.load(Ordering::Relaxed) > self.version { + let latest_data = self.latest.data.lock().unwrap(); + self.version = self.latest.version.load(Ordering::Relaxed); + self.cache = latest_data.clone(); + } + self.cache() + } +} + +/// A factory trait to create new tablet for multi-rocksdb architecture. +// It should be named as `EngineFactory` for consistency, but we are about to +// rename engine to tablet, so always use tablet for new traits/types. +pub trait TabletFactory: Send + Sync { + /// Open the tablet in `path`. + /// + /// `id` and `suffix` is used to mark the identity of tablet. The id is + /// likely the region Id, the suffix could be the current raft log + /// index. The reason to have suffix is that we can keep more than one + /// tablet for a region. + fn open_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result; + + /// Destroy the tablet and its data + fn destroy_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result<()>; + + /// Check if the tablet with specified path exists + fn exists(&self, path: &Path) -> bool; +} + +pub struct SingletonFactory { + tablet: EK, +} + +impl SingletonFactory { + pub fn new(tablet: EK) -> Self { + SingletonFactory { tablet } + } +} + +impl TabletFactory for SingletonFactory { + /// Open the tablet in `path`. + /// + /// `id` and `suffix` is used to mark the identity of tablet. The id is + /// likely the region Id, the suffix could be the current raft log + /// index. The reason to have suffix is that we can keep more than one + /// tablet for a region. + fn open_tablet(&self, _id: u64, _suffix: Option, _path: &Path) -> Result { + Ok(self.tablet.clone()) + } + + /// Destroy the tablet and its data + fn destroy_tablet(&self, _id: u64, _suffix: Option, _path: &Path) -> Result<()> { + Ok(()) + } + + /// Check if the tablet with specified path exists + fn exists(&self, _path: &Path) -> bool { + true + } +} + +/// A global registry for all tablets. +struct TabletRegistryInner { + // region_id, suffix -> tablet + tablets: Mutex>>, + tombstone: Mutex>, + factory: Box>, + root: PathBuf, +} + +pub struct TabletRegistry { + // One may consider to add cache to speed up access. But it also makes it more + // difficult to gc stale cache. + tablets: Arc>, +} + +impl Clone for TabletRegistry { + fn clone(&self) -> Self { + Self { + tablets: self.tablets.clone(), + } + } +} + +unsafe impl Send for TabletRegistry {} +unsafe impl Sync for TabletRegistry {} + +impl TabletRegistry { + pub fn new(factory: Box>, path: impl Into) -> Result { + let root = path.into(); + std::fs::create_dir_all(&root)?; + Ok(TabletRegistry { + tablets: Arc::new(TabletRegistryInner { + tablets: Mutex::new(HashMap::default()), + factory, + root, + tombstone: Mutex::default(), + }), + }) + } + + pub fn tablet_name(&self, prefix: &str, id: u64, suffix: u64) -> String { + format!("{}{}_{}", prefix, id, suffix) + } + + pub fn tablet_root(&self) -> &Path { + &self.tablets.root + } + + pub fn tablet_path(&self, id: u64, suffix: u64) -> PathBuf { + let name = self.tablet_name("", id, suffix); + self.tablets.root.join(name) + } + + /// Gets a tablet. + pub fn get(&self, id: u64) -> Option> + where + EK: Clone, + { + let tablets = self.tablets.tablets.lock().unwrap(); + tablets.get(&id).cloned() + } + + /// Gets a tablet, create a default one if it doesn't exist. + pub fn get_or_default(&self, id: u64) -> CachedTablet + where + EK: Clone, + { + let mut tablets = self.tablets.tablets.lock().unwrap(); + tablets + .entry(id) + .or_insert_with(|| CachedTablet::new(None)) + .clone() + } + + pub fn tablet_factory(&self) -> &dyn TabletFactory { + self.tablets.factory.as_ref() + } + + pub fn remove(&self, id: u64) { + self.tablets.tablets.lock().unwrap().remove(&id); + } + + /// Load the tablet and set it as the latest. + /// + /// If the tablet doesn't exist, it will create an empty one. + pub fn load(&self, id: u64, suffix: u64, create: bool) -> Result> + where + EK: Clone, + { + let path = self.tablet_path(id, suffix); + if !create && !self.tablets.factory.exists(&path) { + return Err(Error::Other(box_err!( + "tablet ({}, {:?}) doesn't exist", + id, + suffix + ))); + } + let tablet = self.tablets.factory.open_tablet(id, Some(suffix), &path)?; + let mut cached = self.get_or_default(id); + cached.set(tablet); + Ok(cached) + } + + /// Destroy the tablet and its data + pub fn mark_tombstone(&self, id: u64, suffix: u64) { + self.tablets.tombstone.lock().unwrap().push((id, suffix)); + } + + /// Loop over all opened tablets. Note, it's possible that the visited + /// tablet is not the latest one. If latest one is required, you may + /// either: + /// - loop several times to make it likely to visit all tablets. + /// - send commands to fsms instead, which can guarantee latest tablet is + /// visisted. + pub fn for_each_opened_tablet(&self, mut f: impl FnMut(u64, &mut CachedTablet) -> bool) { + let mut tablets = self.tablets.tablets.lock().unwrap(); + for (id, tablet) in tablets.iter_mut() { + if !f(*id, tablet) { + return; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cached_tablet() { + let mut cached_tablet = CachedTablet::new(None); + assert_eq!(cached_tablet.cache(), None); + assert_eq!(cached_tablet.latest(), None); + + cached_tablet = CachedTablet::new(Some(1)); + assert_eq!(cached_tablet.cache().cloned(), Some(1)); + assert_eq!(cached_tablet.latest().cloned(), Some(1)); + + // Setting tablet will refresh cache immediately. + cached_tablet.set(2); + assert_eq!(cached_tablet.cache().cloned(), Some(2)); + + // Test `latest()` will use cache. + // Unsafe modify the data. + let old_data = *cached_tablet.latest.data.lock().unwrap(); + *cached_tablet.latest.data.lock().unwrap() = Some(0); + assert_eq!(cached_tablet.latest().cloned(), old_data); + // Restore the data. + *cached_tablet.latest.data.lock().unwrap() = old_data; + + let mut cloned = cached_tablet.clone(); + // Clone should reuse cache. + assert_eq!(cloned.cache().cloned(), Some(2)); + cloned.set(1); + assert_eq!(cloned.cache().cloned(), Some(1)); + assert_eq!(cloned.latest().cloned(), Some(1)); + + // Local cache won't be refreshed until querying latest. + assert_eq!(cached_tablet.cache().cloned(), Some(2)); + assert_eq!(cached_tablet.latest().cloned(), Some(1)); + assert_eq!(cached_tablet.cache().cloned(), Some(1)); + } + + #[test] + fn test_singleton_factory() { + let tablet = Arc::new(1); + let singleton = SingletonFactory::new(tablet.clone()); + let registry = TabletRegistry::new(Box::new(singleton), "").unwrap(); + registry.load(1, 1, true).unwrap(); + let mut cached = registry.get(1).unwrap(); + assert_eq!(cached.latest().cloned(), Some(tablet.clone())); + + registry.load(2, 1, true).unwrap(); + let mut count = 0; + registry.for_each_opened_tablet(|id, cached| { + assert!(&[1, 2].contains(&id), "{}", id); + assert_eq!(cached.latest().cloned(), Some(tablet.clone())); + count += 1; + true + }); + assert_eq!(count, 2); + + // Destroy should be ignored. + registry + .tablet_factory() + .destroy_tablet(2, Some(1), ®istry.tablet_path(2, 1)) + .unwrap(); + + // Exist check should always succeed. + registry.load(3, 1, false).unwrap(); + let mut cached = registry.get(3).unwrap(); + assert_eq!(cached.latest().cloned(), Some(tablet)); + } + + type Record = Arc<(u64, u64)>; + + struct MemoryTablet { + tablet: Mutex>, + } + + impl TabletFactory for MemoryTablet { + fn open_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result { + let mut tablet = self.tablet.lock().unwrap(); + if tablet.contains_key(path) { + return Err(Error::Other(box_err!("tablet is opened"))); + } + tablet.insert(path.to_owned(), Arc::new((id, suffix.unwrap_or(0)))); + Ok(tablet[path].clone()) + } + + fn exists(&self, path: &Path) -> bool { + let tablet = self.tablet.lock().unwrap(); + tablet.contains_key(path) + } + + fn destroy_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result<()> { + let prev = self.tablet.lock().unwrap().remove(path).unwrap(); + assert_eq!((id, suffix.unwrap_or(0)), *prev); + Ok(()) + } + } + + #[test] + fn test_tablet_registry() { + let factory = MemoryTablet { + tablet: Mutex::new(HashMap::default()), + }; + let registry = TabletRegistry::new(Box::new(factory), "").unwrap(); + + let mut tablet_1_10 = registry.load(1, 10, true).unwrap(); + // It's open already, load it twice should report lock error. + registry.load(1, 10, true).unwrap_err(); + let mut cached = registry.get(1).unwrap(); + assert_eq!(cached.latest(), tablet_1_10.latest()); + + let tablet_path = registry.tablet_path(1, 10); + assert!(registry.tablet_factory().exists(&tablet_path)); + + let tablet_path = registry.tablet_path(1, 11); + assert!(!registry.tablet_factory().exists(&tablet_path)); + // Not exist tablet should report error. + registry.load(1, 11, false).unwrap_err(); + assert!(registry.get(2).is_none()); + // Though path not exist, but we should be able to create an empty one. + assert_eq!(registry.get_or_default(2).latest(), None); + assert!(!registry.tablet_factory().exists(&tablet_path)); + + // Load new suffix should update cache. + registry.load(1, 11, true).unwrap(); + assert_ne!(cached.latest(), tablet_1_10.cache()); + let tablet_path = registry.tablet_path(1, 11); + assert!(registry.tablet_factory().exists(&tablet_path)); + + let mut count = 0; + registry.for_each_opened_tablet(|_, _| { + count += 1; + true + }); + assert_eq!(count, 2); + + registry.remove(2); + assert!(registry.get(2).is_none()); + count = 0; + registry.for_each_opened_tablet(|_, _| { + count += 1; + true + }); + assert_eq!(count, 1); + + let name = registry.tablet_name("prefix_", 12, 30); + assert_eq!(name, "prefix_12_30"); + let normal_name = registry.tablet_name("", 20, 15); + let normal_tablet_path = registry.tablet_path(20, 15); + assert_eq!(registry.tablet_root().join(normal_name), normal_tablet_path); + } +} diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 199e8cafbd8..96cbee19e4e 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -17,7 +17,7 @@ use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{Sender, TrySendError}; -use engine_traits::{Engines, KvEngine, RaftEngine, TabletFactory}; +use engine_traits::{Engines, KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType}; use futures::{compat::Future01CompatExt, FutureExt}; use kvproto::{ @@ -72,9 +72,9 @@ pub struct StoreContext { pub timer: SteadyTimer, pub write_senders: WriteSenders, /// store meta - pub store_meta: Arc>>, + pub store_meta: Arc>, pub engine: ER, - pub tablet_factory: Arc>, + pub tablet_registry: TabletRegistry, pub apply_pool: FuturePool, pub read_scheduler: Scheduler>, pub snap_mgr: TabletSnapManager, @@ -222,7 +222,7 @@ struct StorePollerBuilder { cfg: Arc>, store_id: u64, engine: ER, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, trans: T, router: StoreRouter, read_scheduler: Scheduler>, @@ -230,7 +230,7 @@ struct StorePollerBuilder { write_senders: WriteSenders, apply_pool: FuturePool, logger: Logger, - store_meta: Arc>>, + store_meta: Arc>, snap_mgr: TabletSnapManager, } @@ -239,14 +239,14 @@ impl StorePollerBuilder { cfg: Arc>, store_id: u64, engine: ER, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, trans: T, router: StoreRouter, read_scheduler: Scheduler>, pd_scheduler: Scheduler, store_writers: &mut StoreWriters, logger: Logger, - store_meta: Arc>>, + store_meta: Arc>, snap_mgr: TabletSnapManager, ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; @@ -263,7 +263,7 @@ impl StorePollerBuilder { cfg, store_id, engine, - tablet_factory, + tablet_registry, trans, router, read_scheduler, @@ -294,7 +294,7 @@ impl StorePollerBuilder { Some(p) => p, None => return Ok(()), }; - let (sender, peer_fsm) = PeerFsm::new(&cfg, &*self.tablet_factory, storage)?; + let (sender, peer_fsm) = PeerFsm::new(&cfg, &self.tablet_registry, storage)?; meta.region_read_progress .insert(region_id, peer_fsm.as_ref().peer().read_progress().clone()); @@ -342,7 +342,7 @@ where write_senders: self.write_senders.clone(), store_meta: self.store_meta.clone(), engine: self.engine.clone(), - tablet_factory: self.tablet_factory.clone(), + tablet_registry: self.tablet_registry.clone(), apply_pool: self.apply_pool.clone(), read_scheduler: self.read_scheduler.clone(), snap_mgr: self.snap_mgr.clone(), @@ -386,11 +386,11 @@ impl StoreSystem { store_id: u64, cfg: Arc>, raft_engine: ER, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, trans: T, pd_client: Arc, router: &StoreRouter, - store_meta: Arc>>, + store_meta: Arc>, snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -424,7 +424,7 @@ impl StoreSystem { store_id, pd_client, raft_engine.clone(), - tablet_factory.clone(), + tablet_registry.clone(), router.clone(), workers.pd_worker.remote(), concurrency_manager, @@ -438,7 +438,7 @@ impl StoreSystem { cfg.clone(), store_id, raft_engine, - tablet_factory, + tablet_registry, trans, router.clone(), read_scheduler, @@ -462,8 +462,6 @@ impl StoreSystem { for (region_id, (tx, fsm)) in peers { meta.readers .insert(region_id, fsm.peer().generate_read_delegate()); - meta.tablet_caches - .insert(region_id, fsm.peer().tablet().clone()); address.push(region_id); mailboxes.push(( diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 2aa42da2e42..6e2921a0c0d 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -11,7 +11,7 @@ use std::{ use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; -use engine_traits::{KvEngine, TabletFactory}; +use engine_traits::{KvEngine, TabletRegistry}; use futures::{Future, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use raftstore::store::ReadTask; @@ -24,7 +24,6 @@ use tikv_util::{ use crate::{ raft::Apply, router::{ApplyRes, ApplyTask, PeerMsg}, - tablet::CachedTablet, }; /// A trait for reporting apply result. @@ -64,8 +63,7 @@ impl ApplyFsm { peer: metapb::Peer, region_state: RegionLocalState, res_reporter: R, - remote_tablet: CachedTablet, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, read_scheduler: Scheduler>, logger: Logger, ) -> (ApplyScheduler, Self) { @@ -74,8 +72,7 @@ impl ApplyFsm { peer, region_state, res_reporter, - remote_tablet, - tablet_factory, + tablet_registry, read_scheduler, logger, ); diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index cf85522df90..6254e1975fd 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -6,7 +6,7 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; -use engine_traits::{KvEngine, RaftEngine, TabletFactory}; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use raftstore::store::{Config, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ @@ -39,10 +39,10 @@ pub struct PeerFsm { impl PeerFsm { pub fn new( cfg: &Config, - tablet_factory: &dyn TabletFactory, + tablet_registry: &TabletRegistry, storage: Storage, ) -> Result> { - let peer = Peer::new(cfg, tablet_factory, storage)?; + let peer = Peer::new(cfg, tablet_registry, storage)?; info!(peer.logger, "create peer"); let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); let fsm = Box::new(PeerFsm { diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 546ec95a604..73702500e19 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -22,41 +22,17 @@ use crate::{ batch::StoreContext, raft::Peer, router::{StoreMsg, StoreTick}, - tablet::CachedTablet, }; -pub struct StoreMeta -where - E: KvEngine, -{ +#[derive(Default)] +pub struct StoreMeta { pub store_id: Option, /// region_id -> reader pub readers: HashMap, - /// region_id -> tablet cache - pub tablet_caches: HashMap>, /// region_id -> `RegionReadProgress` pub region_read_progress: RegionReadProgressRegistry, } -impl StoreMeta -where - E: KvEngine, -{ - pub fn new() -> StoreMeta { - StoreMeta { - store_id: None, - readers: HashMap::default(), - tablet_caches: HashMap::default(), - region_read_progress: RegionReadProgressRegistry::new(), - } - } -} - -impl Default for StoreMeta { - fn default() -> Self { - Self::new() - } -} pub struct Store { id: u64, // Unix time when it's started. diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 7dea9d55901..2a9d5faabd5 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -32,7 +32,6 @@ mod fsm; mod operation; mod raft; pub mod router; -mod tablet; mod worker; pub(crate) use batch::StoreContext; diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index eb6560d239e..c1e25474701 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -17,7 +17,7 @@ use raftstore::{ Result, }; use slog::info; -pub use split::{SplitInit, SplitResult}; +pub use split::{SplitInit, SplitResult, SPLIT_PREFIX}; use tikv_util::box_err; use self::conf_change::ConfChangeResult; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0b97d726a2e..13a5d168915 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -30,8 +30,7 @@ use std::{cmp, collections::VecDeque}; use collections::HashSet; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{ - Checkpointer, DeleteStrategy, KvEngine, OpenOptions, RaftEngine, RaftLogBatch, Range, - CF_DEFAULT, SPLIT_PREFIX, + Checkpointer, DeleteStrategy, KvEngine, RaftEngine, RaftLogBatch, Range, CF_DEFAULT, }; use fail::fail_point; use keys::enc_end_key; @@ -64,6 +63,8 @@ use crate::{ router::{ApplyRes, PeerMsg, StoreMsg}, }; +pub const SPLIT_PREFIX: &str = "split_"; + #[derive(Debug)] pub struct SplitResult { pub regions: Vec, @@ -225,17 +226,15 @@ impl Apply { ) }); + let reg = self.tablet_registry(); for new_region in ®ions { let new_region_id = new_region.id; if new_region_id == region_id { continue; } - let split_temp_path = self.tablet_factory().tablet_path_with_prefix( - SPLIT_PREFIX, - new_region_id, - RAFT_INIT_LOG_INDEX, - ); + let name = reg.tablet_name(SPLIT_PREFIX, new_region_id, RAFT_INIT_LOG_INDEX); + let split_temp_path = reg.tablet_root().join(name); checkpointer .create_at(&split_temp_path, None, 0) .unwrap_or_else(|e| { @@ -248,7 +247,7 @@ impl Apply { }); } - let derived_path = self.tablet_factory().tablet_path(region_id, log_index); + let derived_path = self.tablet_registry().tablet_path(region_id, log_index); checkpointer .create_at(&derived_path, None, 0) .unwrap_or_else(|e| { @@ -259,9 +258,11 @@ impl Apply { e ) }); - let tablet = self + let reg = self.tablet_registry(); + let path = reg.tablet_path(region_id, log_index); + let tablet = reg .tablet_factory() - .open_tablet(region_id, Some(log_index), OpenOptions::default()) + .open_tablet(region_id, Some(log_index), &path) .unwrap(); // Remove the old write batch. self.write_batch_mut().take(); @@ -492,10 +493,10 @@ mod test { use collections::HashMap; use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::TestTabletFactoryV2, + kv::TestTabletFactory, raft, }; - use engine_traits::{CfOptionsExt, Peekable, TabletFactory, WriteBatch, ALL_CFS}; + use engine_traits::{CfOptionsExt, Peekable, TabletRegistry, WriteBatch, ALL_CFS}; use futures::channel::mpsc::unbounded; use kvproto::{ metapb::RegionEpoch, @@ -516,7 +517,6 @@ mod test { use crate::{ fsm::{ApplyFsm, ApplyResReporter}, raft::Apply, - tablet::CachedTablet, }; struct MockReporter { @@ -546,7 +546,6 @@ mod test { fn assert_split( apply: &mut Apply, - factory: &Arc, parent_id: u64, right_derived: bool, new_region_ids: Vec, @@ -589,8 +588,9 @@ mod test { let state = apply.region_state(); assert_eq!(state.tablet_index, log_index); assert_eq!(state.get_region(), region); - let tablet_path = factory.tablet_path(region.id, log_index); - assert!(factory.exists_raw(&tablet_path)); + let reg = apply.tablet_registry(); + let tablet_path = reg.tablet_path(region.id, log_index); + assert!(reg.tablet_factory().exists(&tablet_path)); match apply_res { AdminCmdResult::SplitRegion(SplitResult { @@ -610,9 +610,10 @@ mod test { } child_idx += 1; - let tablet_path = - factory.tablet_path_with_prefix(SPLIT_PREFIX, region.id, RAFT_INIT_LOG_INDEX); - assert!(factory.exists_raw(&tablet_path)); + let reg = apply.tablet_registry(); + let tablet_name = reg.tablet_name(SPLIT_PREFIX, region.id, RAFT_INIT_LOG_INDEX); + let path = reg.tablet_root().join(tablet_name); + assert!(reg.tablet_factory().exists(&path)); } } } @@ -635,19 +636,9 @@ mod test { .copied() .map(|cf| (cf, CfOptions::default())) .collect(); - let factory = Arc::new(TestTabletFactoryV2::new( - path.path(), - DbOptions::default(), - cf_opts, - )); - - let tablet = factory - .open_tablet( - region.id, - Some(5), - OpenOptions::default().set_create_new(true), - ) - .unwrap(); + let factory = Box::new(TestTabletFactory::new(DbOptions::default(), cf_opts)); + let reg = TabletRegistry::new(factory, path.path()).unwrap(); + reg.load(region.id, 5, true).unwrap(); let mut region_state = RegionLocalState::default(); region_state.set_state(PeerState::Normal); @@ -665,8 +656,7 @@ mod test { .clone(), region_state, reporter, - CachedTablet::new(Some(tablet)), - factory.clone(), + reg, read_scheduler, logger.clone(), ); @@ -827,7 +817,6 @@ mod test { assert_split( &mut apply, - &factory, parent_id, right_derive, new_region_ids, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index bd175ef7a4d..3bb6b7b3852 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -59,7 +59,7 @@ mod admin; mod control; mod write; -pub use admin::{AdminCmdResult, SplitInit, SplitResult}; +pub use admin::{AdminCmdResult, SplitInit, SplitResult, SPLIT_PREFIX}; pub use control::ProposalControl; pub use write::{SimpleWriteDecoder, SimpleWriteEncoder}; @@ -129,8 +129,7 @@ impl Peer { self.peer().clone(), region_state, mailbox, - tablet, - store_ctx.tablet_factory.clone(), + store_ctx.tablet_registry.clone(), read_scheduler, logger, ); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 60884f63b03..ca610de1bfc 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -230,7 +230,7 @@ impl Store { ctx.read_scheduler.clone(), &ctx.logger, ) - .and_then(|s| PeerFsm::new(&ctx.cfg, &*ctx.tablet_factory, s)) + .and_then(|s| PeerFsm::new(&ctx.cfg, &ctx.tablet_registry, s)) { Ok(p) => p, res => { diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 659fab00754..7df27670a35 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -41,7 +41,7 @@ impl Store { stats.set_store_id(self.store_id()); { let meta = ctx.store_meta.lock().unwrap(); - stats.set_region_count(meta.tablet_caches.len() as u32); + stats.set_region_count(meta.readers.len() as u32); } stats.set_sending_snap_count(0); diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index bbff28b272f..4455ea099f4 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -151,7 +151,7 @@ impl Peer { pub(crate) fn maybe_renew_leader_lease( &mut self, ts: Timespec, - store_meta: &Mutex>, + store_meta: &Mutex, progress: Option, ) { // A nonleader peer should never has leader lease. diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 0736bc13fd8..120e64cb872 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -8,7 +8,7 @@ use std::{ use batch_system::Router; use crossbeam::channel::TrySendError; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletRegistry}; use kvproto::{ errorpb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse}, @@ -34,7 +34,6 @@ use txn_types::WriteBatchFlags; use crate::{ fsm::StoreMeta, router::{PeerMsg, QueryResult}, - tablet::CachedTablet, StoreRouter, }; @@ -69,15 +68,20 @@ where E: KvEngine, C: MsgRouter, { - pub fn new(store_meta: Arc>>, router: C, logger: Logger) -> Self { + pub fn new( + store_meta: Arc>, + reg: TabletRegistry, + router: C, + logger: Logger, + ) -> Self { Self { - local_reader: LocalReaderCore::new(StoreMetaDelegate::new(store_meta)), + local_reader: LocalReaderCore::new(StoreMetaDelegate::new(store_meta, reg)), router, logger, } } - pub fn store_meta(&self) -> &Arc>> { + pub fn store_meta(&self) -> &Arc> { self.local_reader.store_meta() } @@ -300,15 +304,16 @@ struct StoreMetaDelegate where E: KvEngine, { - store_meta: Arc>>, + store_meta: Arc>, + reg: TabletRegistry, } impl StoreMetaDelegate where E: KvEngine, { - pub fn new(store_meta: Arc>>) -> StoreMetaDelegate { - StoreMetaDelegate { store_meta } + pub fn new(store_meta: Arc>, reg: TabletRegistry) -> StoreMetaDelegate { + StoreMetaDelegate { store_meta, reg } } } @@ -317,7 +322,7 @@ where E: KvEngine, { type Executor = CachedReadDelegate; - type StoreMeta = Arc>>; + type StoreMeta = Arc>; fn store_id(&self) -> Option { self.store_meta.as_ref().lock().unwrap().store_id @@ -330,7 +335,7 @@ where let reader = meta.readers.get(®ion_id).cloned(); if let Some(reader) = reader { // If reader is not None, cache must not be None. - let cached_tablet = meta.tablet_caches.get(®ion_id).cloned().unwrap(); + let cached_tablet = self.reg.get(region_id).unwrap(); return ( meta.readers.len(), Some(CachedReadDelegate { @@ -431,9 +436,9 @@ mod tests { use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, TestTabletFactoryV2}, + kv::{KvTestEngine, TestTabletFactory}, }; - use engine_traits::{MiscExt, OpenOptions, Peekable, SyncMutable, TabletFactory, ALL_CFS}; + use engine_traits::{MiscExt, Peekable, SyncMutable, ALL_CFS}; use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; use raftstore::store::{ @@ -470,7 +475,8 @@ mod tests { #[allow(clippy::type_complexity)] fn new_reader( store_id: u64, - store_meta: Arc>>, + store_meta: Arc>, + reg: TabletRegistry, ) -> ( LocalReader, Receiver<(u64, PeerMsg)>, @@ -478,6 +484,7 @@ mod tests { let (ch, rx) = MockRouter::new(); let mut reader = LocalReader::new( store_meta, + reg, ch, Logger::root(slog::Discard, o!("key1" => "value1")), ); @@ -544,10 +551,11 @@ mod tests { .prefix("test-local-reader") .tempdir() .unwrap(); - let factory = Arc::new(TestTabletFactoryV2::new(path.path(), ops, cf_opts)); + let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); + let reg = TabletRegistry::new(factory, path.path()).unwrap(); - let store_meta = Arc::new(Mutex::new(StoreMeta::new())); - let (mut reader, mut rx) = new_reader(store_id, store_meta.clone()); + let store_meta = Arc::new(Mutex::new(StoreMeta::default())); + let (mut reader, mut rx) = new_reader(store_id, store_meta.clone(), reg.clone()); let (mix_tx, mix_rx) = sync_channel(1); let handler = mock_raftstore(mix_rx); @@ -623,11 +631,7 @@ mod tests { }; meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data - let tablet1 = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - let cache = CachedTablet::new(Some(tablet1)); - meta.tablet_caches.insert(1, cache); + reg.load(1, 10, true).unwrap(); } let (ch_tx, ch_rx) = sync_channel(1); @@ -738,10 +742,11 @@ mod tests { .prefix("test-local-reader") .tempdir() .unwrap(); - let factory = Arc::new(TestTabletFactoryV2::new(path.path(), ops, cf_opts)); + let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); + let reg = TabletRegistry::new(factory, path.path()).unwrap(); let store_meta = - StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::::new()))); + StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::default())), reg.clone()); let tablet1; let tablet2; @@ -753,24 +758,18 @@ mod tests { meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data - tablet1 = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); + reg.load(1, 10, true).unwrap(); + tablet1 = reg.get(1).unwrap().latest().unwrap().clone(); tablet1.put(b"a1", b"val1").unwrap(); - let cache = CachedTablet::new(Some(tablet1.clone())); - meta.tablet_caches.insert(1, cache); // Create read_delegate with region id 2 let read_delegate = ReadDelegate::mock(2); meta.readers.insert(2, read_delegate); // create tablet with region_id 1 and prepare some data - tablet2 = factory - .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); + reg.load(2, 10, true).unwrap(); + tablet2 = reg.get(2).unwrap().latest().unwrap().clone(); tablet2.put(b"a2", b"val2").unwrap(); - let cache = CachedTablet::new(Some(tablet2.clone())); - meta.tablet_caches.insert(2, cache); } let (_, delegate) = store_meta.get_executor_and_len(1); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 6a91c25f1f6..c252ad7d231 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -22,7 +22,7 @@ mod snapshot; use std::{cmp, time::Instant}; -use engine_traits::{KvEngine, MiscExt, OpenOptions, RaftEngine, TabletFactory}; +use engine_traits::{KvEngine, MiscExt, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::{ raft_cmdpb::AdminCmdType, @@ -537,7 +537,7 @@ impl Storage { ready.snapshot(), write_task, ctx.snap_mgr.clone(), - ctx.tablet_factory.clone(), + ctx.tablet_registry.clone(), ) { error!(self.logger(),"failed to apply snapshot";"error" => ?e) } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 4cd4b5265d8..8ac27ba2466 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -28,7 +28,7 @@ use std::{ }, }; -use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory, SPLIT_PREFIX}; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData, RegionLocalState}; use protobuf::Message; use raft::eraftpb::Snapshot; @@ -41,6 +41,7 @@ use tikv_util::{box_err, box_try, worker::Scheduler}; use crate::{ fsm::ApplyResReporter, + operation::command::SPLIT_PREFIX, raft::{Apply, Peer, Storage}, router::{ApplyTask, PeerTick}, Result, StoreContext, @@ -124,17 +125,14 @@ impl Peer { let first_index = self.storage().entry_storage().first_index(); if first_index == persisted_index + 1 { let region_id = self.region_id(); - let tablet = ctx - .tablet_factory - .open_tablet(region_id, Some(persisted_index), OpenOptions::default()) + ctx.tablet_registry + .load(region_id, persisted_index, false) .unwrap(); - self.tablet_mut().set(tablet); self.schedule_apply_fsm(ctx); self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(persisted_index); { let mut meta = ctx.store_meta.lock().unwrap(); - meta.tablet_caches.insert(region_id, self.tablet().clone()); meta.readers .insert(region_id, self.generate_read_delegate()); meta.region_read_progress @@ -217,6 +215,7 @@ impl Storage { } } SnapState::Generated(ref s) => { + // TODO: `to` may not be equal to the generated snapshot. let SnapState::Generated(snap) = mem::replace(&mut *snap_state, SnapState::Relax) else { unreachable!() }; if self.validate_snap(&snap, request_index) { return Ok(*snap); @@ -367,7 +366,7 @@ impl Storage { snap: &Snapshot, task: &mut WriteTask, snap_mgr: TabletSnapManager, - tablet_factory: Arc>, + reg: TabletRegistry, ) -> Result<()> { let region_id = self.region().get_id(); let peer_id = self.peer().get_id(); @@ -400,10 +399,10 @@ impl Storage { let (path, clean_split) = match self.split_init_mut() { // If index not match, the peer may accept a newer snapshot after split. - Some(init) if init.scheduled && last_index == RAFT_INIT_LOG_INDEX => ( - tablet_factory.tablet_path_with_prefix(SPLIT_PREFIX, region_id, last_index), - false, - ), + Some(init) if init.scheduled && last_index == RAFT_INIT_LOG_INDEX => { + let name = reg.tablet_name(SPLIT_PREFIX, region_id, last_index); + (reg.tablet_root().join(name), false) + } si => { let key = TabletSnapKey::new(region_id, peer_id, last_term, last_index); (snap_mgr.final_recv_path(&key), si.is_some()) @@ -414,20 +413,19 @@ impl Storage { // The snapshot require no additional processing such as ingest them to DB, but // it should load it into the factory after it persisted. let hook = move || { - if let Err(e) = tablet_factory.load_tablet(path.as_path(), region_id, last_index) { + let target_path = reg.tablet_path(region_id, last_index); + if let Err(e) = std::fs::rename(&path, &target_path) { panic!( - "{:?} failed to load tablet, path: {}, {:?}", + "{:?} failed to load tablet, path: {} -> {}, {:?}", logger.list(), path.display(), + target_path.display(), e ); } if clean_split { - let path = tablet_factory.tablet_path_with_prefix( - SPLIT_PREFIX, - region_id, - RAFT_INIT_LOG_INDEX, - ); + let name = reg.tablet_name(SPLIT_PREFIX, region_id, last_index); + let path = reg.tablet_root().join(name); let _ = fs::remove_dir_all(path); } }; diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 06101da8d83..421c2c476f7 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -2,7 +2,7 @@ use std::{mem, sync::Arc}; -use engine_traits::{KvEngine, TabletFactory}; +use engine_traits::{CachedTablet, KvEngine, TabletRegistry}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; use slog::Logger; @@ -13,7 +13,6 @@ use crate::{ fsm::ApplyResReporter, operation::AdminCmdResult, router::{ApplyRes, CmdResChannel}, - tablet::CachedTablet, }; /// Apply applies all the committed commands to kv db. @@ -24,7 +23,7 @@ pub struct Apply { tablet: EK, write_batch: Option, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, callbacks: Vec<(Vec, RaftCmdResponse)>, @@ -48,11 +47,13 @@ impl Apply { peer: metapb::Peer, region_state: RegionLocalState, res_reporter: R, - mut remote_tablet: CachedTablet, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, read_scheduler: Scheduler>, logger: Logger, ) -> Self { + let mut remote_tablet = tablet_registry + .get(region_state.get_region().get_id()) + .unwrap(); Apply { peer, tablet: remote_tablet.latest().unwrap().clone(), @@ -64,7 +65,7 @@ impl Apply { applied_term: 0, admin_cmd_result: vec![], region_state, - tablet_factory, + tablet_registry, read_scheduler, res_reporter, logger, @@ -72,8 +73,8 @@ impl Apply { } #[inline] - pub fn tablet_factory(&self) -> &Arc> { - &self.tablet_factory + pub fn tablet_registry(&self) -> &TabletRegistry { + &self.tablet_registry } #[inline] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 16e3e54d5f2..9101a9328f3 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -8,7 +8,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::atomic::AtomicCell; -use engine_traits::{KvEngine, OpenOptions, RaftEngine, TabletFactory}; +use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletRegistry}; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; @@ -38,7 +38,6 @@ use crate::{ fsm::{ApplyFsm, ApplyScheduler}, operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, router::{CmdResChannel, QueryResChannel}, - tablet::CachedTablet, worker::PdTask, Result, }; @@ -99,7 +98,7 @@ impl Peer { /// If peer is destroyed, `None` is returned. pub fn new( cfg: &Config, - tablet_factory: &dyn TabletFactory, + tablet_registry: &TabletRegistry, storage: Storage, ) -> Result { let logger = storage.logger().clone(); @@ -110,33 +109,19 @@ impl Peer { let region_id = storage.region().get_id(); let tablet_index = storage.region_state().get_tablet_index(); + let cached_tablet = tablet_registry.get_or_default(region_id); // Another option is always create tablet even if tablet index is 0. But this // can introduce race when gc old tablet and create new peer. - let tablet = if tablet_index != 0 { - if !tablet_factory.exists(region_id, tablet_index) { - return Err(box_err!( - "missing tablet {} for region {}", - tablet_index, - region_id - )); - } + if tablet_index != 0 { // TODO: Perhaps we should stop create the tablet automatically. - Some(tablet_factory.open_tablet( - region_id, - Some(tablet_index), - OpenOptions::default().set_create(true), - )?) - } else { - None - }; - - let tablet = CachedTablet::new(tablet); + tablet_registry.load(region_id, tablet_index, false)?; + } let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { - tablet, + tablet: cached_tablet, self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 369a25984bf..5211d293e0f 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -372,11 +372,11 @@ mod tests { use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, TestTabletFactoryV2}, + kv::{KvTestEngine, TestTabletFactory}, raft::RaftTestEngine, }; use engine_traits::{ - KvEngine, OpenOptions, RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletFactory, ALL_CFS, + KvEngine, RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletRegistry, ALL_CFS, }; use kvproto::{ metapb::{Peer, Region}, @@ -392,7 +392,7 @@ mod tests { use tikv_util::worker::{Runnable, Worker}; use super::*; - use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes, tablet::CachedTablet}; + use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; #[derive(Clone)] pub struct TestRouter { @@ -477,11 +477,8 @@ mod tests { // building a tablet factory let ops = DbOptions::default(); let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); - let factory = Arc::new(TestTabletFactoryV2::new( - path.path().join("tablet").as_path(), - ops, - cf_opts, - )); + let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); + let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); let sched = worker.scheduler(); let logger = slog_global::borrow_global().new(o!()); @@ -491,8 +488,7 @@ mod tests { let snapshot = new_empty_snapshot(region.clone(), 10, 1, false); let mut task = WriteTask::new(region.get_id(), 5, 0); - s.apply_snapshot(&snapshot, &mut task, mgr, factory) - .unwrap(); + s.apply_snapshot(&snapshot, &mut task, mgr, reg).unwrap(); // It can be set before load tablet. assert_eq!(PeerState::Normal, s.region_state().get_state()); @@ -528,15 +524,9 @@ mod tests { // building a tablet factory let ops = DbOptions::default(); let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); - let factory = Arc::new(TestTabletFactoryV2::new( - path.path().join("tablet").as_path(), - ops, - cf_opts, - )); - // create tablet with region_id 1 - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); + let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); + let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); + reg.load(region.get_id(), 10, true).unwrap(); // setup read runner worker and peer storage let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); let sched = worker.scheduler(); @@ -548,13 +538,14 @@ mod tests { let mut read_runner = ReadRunner::new(router.clone(), raft_engine); read_runner.set_snap_mgr(mgr.clone()); worker.start(read_runner); + let mut state = RegionLocalState::default(); + state.set_region(region.clone()); // setup peer applyer let mut apply = Apply::new( region.get_peers()[0].clone(), - RegionLocalState::default(), + state, router, - CachedTablet::new(Some(tablet)), - factory, + reg, sched, logger, ); @@ -577,6 +568,7 @@ mod tests { let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); let checkpointer_path = mgr.tablet_gen_path(&snap_key); assert!(checkpointer_path.exists()); + s.snapshot(0, 7).unwrap(); // Test cancel snapshot let snap = s.snapshot(0, 0); diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 8cb65e40a3c..3dda00eb270 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, Mutex}; use crossbeam::channel::TrySendError; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -49,15 +49,15 @@ where } impl RaftRouter { - pub fn new(store_id: u64, router: StoreRouter) -> Self { - let mut store_meta = StoreMeta::new(); + pub fn new(store_id: u64, reg: TabletRegistry, router: StoreRouter) -> Self { + let mut store_meta = StoreMeta::default(); store_meta.store_id = Some(store_id); let store_meta = Arc::new(Mutex::new(store_meta)); let logger = router.logger().clone(); RaftRouter { router: router.clone(), - local_reader: LocalReader::new(store_meta, router, logger), + local_reader: LocalReader::new(store_meta, reg, router, logger), } } @@ -69,7 +69,7 @@ impl RaftRouter { self.router.send(addr, msg) } - pub fn store_meta(&self) -> &Arc>> { + pub fn store_meta(&self) -> &Arc> { self.local_reader.store_meta() } diff --git a/components/raftstore-v2/src/tablet.rs b/components/raftstore-v2/src/tablet.rs deleted file mode 100644 index 7765f5c07b6..00000000000 --- a/components/raftstore-v2/src/tablet.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, -}; - -#[derive(Debug)] -struct LatestTablet { - data: Mutex>, - version: AtomicU64, -} - -/// Tablet may change during split, merge and applying snapshot. So we need a -/// shared value to reflect the latest tablet. `CachedTablet` provide cache that -/// can speed up common access. -#[derive(Clone, Debug)] -pub struct CachedTablet { - latest: Arc>, - cache: Option, - version: u64, -} - -impl CachedTablet { - #[inline] - pub fn new(data: Option) -> Self { - CachedTablet { - latest: Arc::new(LatestTablet { - data: Mutex::new(data.clone()), - version: AtomicU64::new(0), - }), - cache: data, - version: 0, - } - } - - pub fn set(&mut self, data: EK) { - self.version = { - let mut latest_data = self.latest.data.lock().unwrap(); - *latest_data = Some(data.clone()); - self.latest.version.fetch_add(1, Ordering::Relaxed) + 1 - }; - self.cache = Some(data); - } - - /// Get the tablet from cache without checking if it's up to date. - #[inline] - pub fn cache(&self) -> Option<&EK> { - self.cache.as_ref() - } - - /// Get the latest tablet. - #[inline] - pub fn latest(&mut self) -> Option<&EK> { - if self.latest.version.load(Ordering::Relaxed) > self.version { - let latest_data = self.latest.data.lock().unwrap(); - self.version = self.latest.version.load(Ordering::Relaxed); - self.cache = latest_data.clone(); - } - self.cache() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_cached_tablet() { - let mut cached_tablet = CachedTablet::new(None); - assert_eq!(cached_tablet.cache(), None); - assert_eq!(cached_tablet.latest(), None); - - cached_tablet = CachedTablet::new(Some(1)); - assert_eq!(cached_tablet.cache().cloned(), Some(1)); - assert_eq!(cached_tablet.latest().cloned(), Some(1)); - - // Setting tablet will refresh cache immediately. - cached_tablet.set(2); - assert_eq!(cached_tablet.cache().cloned(), Some(2)); - - // Test `latest()` will use cache. - // Unsafe modify the data. - let old_data = *cached_tablet.latest.data.lock().unwrap(); - *cached_tablet.latest.data.lock().unwrap() = Some(0); - assert_eq!(cached_tablet.latest().cloned(), old_data); - // Restore the data. - *cached_tablet.latest.data.lock().unwrap() = old_data; - - let mut cloned = cached_tablet.clone(); - // Clone should reuse cache. - assert_eq!(cloned.cache().cloned(), Some(2)); - cloned.set(1); - assert_eq!(cloned.cache().cloned(), Some(1)); - assert_eq!(cloned.latest().cloned(), Some(1)); - - // Local cache won't be refreshed until querying latest. - assert_eq!(cached_tablet.cache().cloned(), Some(2)); - assert_eq!(cached_tablet.latest().cloned(), Some(1)); - assert_eq!(cached_tablet.cache().cloned(), Some(1)); - } -} diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 132678e21f2..9803039e392 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -11,7 +11,7 @@ use std::{ use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::ConcurrencyManager; -use engine_traits::{KvEngine, RaftEngine, TabletFactory}; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{metapb, pdpb}; use pd_client::PdClient; use raftstore::store::{util::KeysInfoFormatter, TxnExt}; @@ -97,7 +97,7 @@ where store_id: u64, pd_client: Arc, raft_engine: ER, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, router: StoreRouter, remote: Remote, @@ -130,7 +130,7 @@ where store_id: u64, pd_client: Arc, raft_engine: ER, - tablet_factory: Arc>, + tablet_registry: TabletRegistry, router: StoreRouter, remote: Remote, concurrency_manager: ConcurrencyManager, @@ -142,7 +142,7 @@ where store_id, pd_client, raft_engine, - tablet_factory, + tablet_registry, router, remote, region_peers: HashMap::default(), diff --git a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs index 1caa96a5225..8f49e7f025f 100644 --- a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs @@ -260,12 +260,12 @@ where /// Returns (capacity, used, available). fn collect_engine_size(&self) -> Option<(u64, u64, u64)> { - let disk_stats = match fs2::statvfs(self.tablet_factory.tablets_path()) { + let disk_stats = match fs2::statvfs(self.tablet_registry.tablet_root()) { Err(e) => { error!( self.logger, "get disk stat for rocksdb failed"; - "engine_path" => self.tablet_factory.tablets_path().display(), + "engine_path" => self.tablet_registry.tablet_root().display(), "err" => ?e ); return None; diff --git a/components/raftstore-v2/tests/failpoints/test_basic_write.rs b/components/raftstore-v2/tests/failpoints/test_basic_write.rs index 4bf4201f67c..be5ccf8316c 100644 --- a/components/raftstore-v2/tests/failpoints/test_basic_write.rs +++ b/components/raftstore-v2/tests/failpoints/test_basic_write.rs @@ -2,7 +2,7 @@ use std::{assert_matches::assert_matches, time::Duration}; -use engine_traits::{OpenOptions, Peekable, TabletFactory}; +use engine_traits::Peekable; use futures::executor::block_on; use kvproto::raft_cmdpb::{CmdType, Request}; use raftstore_v2::router::PeerMsg; @@ -13,7 +13,7 @@ use crate::cluster::Cluster; #[test] fn test_write_batch_rollback() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); @@ -25,10 +25,8 @@ fn test_write_batch_rollback() { // Make several entries to batch in apply thread. fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); - let tablet_factory = cluster.node(0).tablet_factory(); - let tablet = tablet_factory - .open_tablet(2, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); + let tablet_registry = cluster.node(0).tablet_registry(); + let tablet = tablet_registry.get(2).unwrap().latest().unwrap().clone(); // Good proposal should be committed. let (msg, mut sub0) = PeerMsg::raft_command(req.clone()); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 24184233117..3e2ced3df3c 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -5,7 +5,7 @@ use std::{ path::Path, sync::{ atomic::{AtomicUsize, Ordering}, - Arc, Mutex, + Arc, }, thread, time::{Duration, Instant}, @@ -17,10 +17,10 @@ use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{self, Receiver, Sender, TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, TestTabletFactoryV2}, + kv::{KvTestEngine, TestTabletFactory}, raft::RaftTestEngine, }; -use engine_traits::{OpenOptions, TabletFactory, ALL_CFS}; +use engine_traits::{TabletRegistry, ALL_CFS}; use futures::executor::block_on; use kvproto::{ metapb::{self, RegionEpoch, Store}, @@ -36,7 +36,7 @@ use raftstore::store::{ use raftstore_v2::{ create_store_batch_system, router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, - Bootstrap, StoreMeta, StoreSystem, + Bootstrap, StoreSystem, }; use slog::{debug, o, Logger}; use tempfile::TempDir; @@ -46,7 +46,6 @@ use tikv_util::{ store::new_peer, }; -#[derive(Clone)] pub struct TestRouter(RaftRouter); impl Deref for TestRouter { @@ -182,12 +181,10 @@ impl TestRouter { pub struct RunningState { store_id: u64, pub raft_engine: RaftTestEngine, - pub factory: Arc, + pub registry: TabletRegistry, pub system: StoreSystem, pub cfg: Arc>, pub transport: TestTransport, - // We need this to clear the ref counts of CachedTablet when shutdown - store_meta: Arc>>, } impl RunningState { @@ -205,11 +202,8 @@ impl RunningState { .copied() .map(|cf| (cf, CfOptions::default())) .collect(); - let factory = Arc::new(TestTabletFactoryV2::new( - path, - DbOptions::default(), - cf_opts, - )); + let factory = Box::new(TestTabletFactory::new(DbOptions::default(), cf_opts)); + let registry = TabletRegistry::new(factory, path).unwrap(); let raft_engine = engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) .unwrap(); @@ -218,17 +212,17 @@ impl RunningState { let mut store = Store::default(); store.set_id(store_id); if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { - if factory.exists(region.get_id(), RAFT_INIT_LOG_INDEX) { + let factory = registry.tablet_factory(); + let path = registry.tablet_path(region.get_id(), RAFT_INIT_LOG_INDEX); + if factory.exists(&path) { + registry.remove(region.get_id()); factory - .destroy_tablet(region.get_id(), RAFT_INIT_LOG_INDEX) + .destroy_tablet(region.get_id(), Some(RAFT_INIT_LOG_INDEX), &path) .unwrap(); } + // Create the tablet without loading it in cache. factory - .open_tablet( - region.get_id(), - Some(RAFT_INIT_LOG_INDEX), - OpenOptions::default().set_create_new(true), - ) + .open_tablet(region.get_id(), Some(RAFT_INIT_LOG_INDEX), &path) .unwrap(); } @@ -238,7 +232,7 @@ impl RunningState { logger.clone(), ); - let router = RaftRouter::new(store_id, router); + let router = RaftRouter::new(store_id, registry.clone(), router); let store_meta = router.store_meta().clone(); let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()); snap_mgr.init().unwrap(); @@ -247,11 +241,11 @@ impl RunningState { store_id, cfg.clone(), raft_engine.clone(), - factory.clone(), + registry.clone(), transport.clone(), pd_client.clone(), router.store_router(), - store_meta.clone(), + store_meta, snap_mgr.clone(), concurrency_manager, causal_ts_provider, @@ -261,11 +255,10 @@ impl RunningState { let state = Self { store_id, raft_engine, - factory, + registry, system, cfg, transport, - store_meta, }; (TestRouter(router), snap_mgr, state) } @@ -313,8 +306,8 @@ impl TestNode { router } - pub fn tablet_factory(&self) -> &Arc { - &self.running_state().unwrap().factory + pub fn tablet_registry(&self) -> &TabletRegistry { + &self.running_state().unwrap().registry } pub fn pd_client(&self) -> &Arc { @@ -322,10 +315,7 @@ impl TestNode { } fn stop(&mut self) { - if let Some(state) = std::mem::take(&mut self.running_state) { - let mut meta = state.store_meta.lock().unwrap(); - meta.tablet_caches.clear(); - } + self.running_state.take(); } fn restart(&mut self) -> TestRouter { @@ -420,7 +410,7 @@ pub struct Cluster { pd_server: test_pd::Server, nodes: Vec, receivers: Vec>, - routers: Vec, + pub routers: Vec, logger: Logger, } @@ -463,18 +453,15 @@ impl Cluster { } pub fn restart(&mut self, offset: usize) { + self.routers.remove(offset); let router = self.nodes[offset].restart(); - self.routers[offset] = router; + self.routers.insert(offset, router); } pub fn node(&self, offset: usize) -> &TestNode { &self.nodes[offset] } - pub fn router(&self, offset: usize) -> TestRouter { - self.routers[offset].clone() - } - /// Send messages and wait for side effects are all handled. #[allow(clippy::vec_box)] pub fn dispatch(&self, region_id: u64, mut msgs: Vec>) { diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index fc23e46e12f..a0d3d1ac34a 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -2,7 +2,7 @@ use std::{assert_matches::assert_matches, time::Duration}; -use engine_traits::{OpenOptions, Peekable, TabletFactory}; +use engine_traits::Peekable; use futures::executor::block_on; use kvproto::{ raft_cmdpb::{CmdType, Request}, @@ -18,7 +18,7 @@ use crate::cluster::Cluster; #[test] fn test_basic_write() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); @@ -113,7 +113,7 @@ fn test_basic_write() { #[test] fn test_put_delete() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); @@ -123,10 +123,8 @@ fn test_put_delete() { router.wait_applied_to_current_term(2, Duration::from_secs(3)); - let tablet_factory = cluster.node(0).tablet_factory(); - let tablet = tablet_factory - .open_tablet(2, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); + let registry = cluster.node(0).tablet_registry(); + let tablet = registry.get(2).unwrap().latest().unwrap().clone(); assert!(tablet.get_value(b"key").unwrap().is_none()); let (msg, mut sub) = PeerMsg::raft_command(req.clone()); router.send(2, msg).unwrap(); diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 558962f8ef6..4f3ffbbf24c 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -2,7 +2,7 @@ use std::{self, time::Duration}; -use engine_traits::{OpenOptions, Peekable, TabletFactory}; +use engine_traits::Peekable; use kvproto::raft_cmdpb::{AdminCmdType, CmdType, Request}; use raft::prelude::ConfChangeType; use raftstore_v2::router::{PeerMsg, PeerTick}; @@ -14,7 +14,7 @@ use crate::cluster::Cluster; fn test_simple_change() { let cluster = Cluster::with_node_count(2, None); let region_id = 2; - let router0 = cluster.router(0); + let router0 = &cluster.routers[0]; let mut req = router0.new_request_for(2); let admin_req = req.mut_admin_request(); admin_req.set_cmd_type(AdminCmdType::ChangePeer); @@ -39,7 +39,7 @@ fn test_simple_change() { // So heartbeat will create a learner. cluster.dispatch(2, vec![]); - let router1 = cluster.router(1); + let router1 = &cluster.routers[1]; let meta = router1 .must_query_debug_info(2, Duration::from_secs(3)) .unwrap(); @@ -77,10 +77,8 @@ fn test_simple_change() { // read the new written kv. assert_eq!(match_index, meta.raft_apply.truncated_state.index); assert!(meta.raft_apply.applied_index >= match_index); - let tablet_factory = cluster.node(1).tablet_factory(); - let tablet = tablet_factory - .open_tablet(region_id, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); + let registry = cluster.node(1).tablet_registry(); + let tablet = registry.get(region_id).unwrap().latest().unwrap().clone(); assert_eq!(tablet.get_value(key).unwrap().unwrap(), val); req.mut_header() diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index ed0ebcc9b8a..805cda15471 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -64,11 +64,11 @@ fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb #[test] fn test_life_by_message() { let mut cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; let test_region_id = 4; let test_peer_id = 5; let test_leader_id = 6; - assert_peer_not_exist(test_region_id, test_peer_id, &router); + assert_peer_not_exist(test_region_id, test_peer_id, router); // Build a correct message. let mut msg = Box::::default(); @@ -85,7 +85,7 @@ fn test_life_by_message() { let mut wrong_msg = msg.clone(); f(&mut wrong_msg); router.send_raft_message(wrong_msg).unwrap(); - assert_peer_not_exist(test_region_id, test_peer_id, &router); + assert_peer_not_exist(test_region_id, test_peer_id, router); }; // Check mismatch store id. @@ -113,7 +113,7 @@ fn test_life_by_message() { // The peer should survive restart. cluster.restart(0); - let router = cluster.router(0); + let router = &cluster.routers[0]; let meta = router .must_query_debug_info(test_region_id, timeout) .unwrap(); @@ -129,13 +129,13 @@ fn test_life_by_message() { let mut tombstone_msg = msg.clone(); tombstone_msg.set_is_tombstone(true); router.send_raft_message(tombstone_msg).unwrap(); - assert_peer_not_exist(test_region_id, test_peer_id, &router); + assert_peer_not_exist(test_region_id, test_peer_id, router); assert_tombstone(raft_engine, test_region_id, &new_peer(1, test_peer_id)); // Restart should not recreate tombstoned peer. cluster.restart(0); - let router = cluster.router(0); - assert_peer_not_exist(test_region_id, test_peer_id, &router); + let router = &cluster.routers[0]; + assert_peer_not_exist(test_region_id, test_peer_id, router); let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; assert_tombstone(raft_engine, test_region_id, &new_peer(1, test_peer_id)); } @@ -143,7 +143,7 @@ fn test_life_by_message() { #[test] fn test_destroy_by_larger_id() { let mut cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; let test_region_id = 4; let test_peer_id = 6; let init_term = 5; @@ -180,7 +180,7 @@ fn test_destroy_by_larger_id() { let mut larger_id_msg = smaller_id_msg; larger_id_msg.set_to_peer(new_peer(1, test_peer_id + 1)); router.send_raft_message(larger_id_msg).unwrap(); - assert_peer_not_exist(test_region_id, test_peer_id, &router); + assert_peer_not_exist(test_region_id, test_peer_id, router); let meta = router .must_query_debug_info(test_region_id, timeout) .unwrap(); @@ -189,7 +189,7 @@ fn test_destroy_by_larger_id() { // New peer should survive restart. cluster.restart(0); - let router = cluster.router(0); + let router = &cluster.routers[0]; let meta = router .must_query_debug_info(test_region_id, timeout) .unwrap(); diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs index c22ef4908bf..96bcbbccf7a 100644 --- a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -11,7 +11,7 @@ use crate::cluster::Cluster; fn test_region_heartbeat() { let region_id = 2; let cluster = Cluster::with_node_count(1, None); - let router = cluster.router(0); + let router = &cluster.routers[0]; // When there is only one peer, it should campaign immediately. let mut req = RaftCmdRequest::default(); diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index 2155a4775c6..07ae8b44bf3 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -13,7 +13,7 @@ fn test_read_index() { let mut config = v2_default_config(); config.raft_store_max_leader_lease = ReadableDuration::millis(150); let cluster = Cluster::with_config(config); - let router = cluster.router(0); + let router = &cluster.routers[0]; std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; let mut req = router.new_request_for(region_id); @@ -58,7 +58,7 @@ fn test_read_index() { #[test] fn test_snap_without_read_index() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; let mut req = router.new_request_for(region_id); @@ -84,7 +84,7 @@ fn test_snap_without_read_index() { #[test] fn test_query_with_write_cmd() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; let mut req = router.new_request_for(2); @@ -111,7 +111,7 @@ fn test_query_with_write_cmd() { #[test] fn test_snap_with_invalid_parameter() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; let mut req = router.new_request_for(region_id); @@ -163,8 +163,8 @@ fn test_snap_with_invalid_parameter() { #[test] fn test_local_read() { - let cluster = Cluster::default(); - let mut router = cluster.router(0); + let mut cluster = Cluster::default(); + let router = &mut cluster.routers[0]; std::thread::sleep(std::time::Duration::from_millis(200)); let region_id = 2; let mut req = router.new_request_for(region_id); diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index 336a9c9d038..60495b151e8 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -126,9 +126,9 @@ fn split_region( #[test] fn test_split() { - let cluster = Cluster::default(); + let mut cluster = Cluster::default(); let store_id = cluster.node(0).id(); - let mut router = cluster.router(0); + let router = &mut cluster.routers[0]; // let factory = cluster.node(0).tablet_factory(); let region_id = 2; @@ -140,7 +140,7 @@ fn test_split() { // -> Region 2 ["", "k22"] peer(1, 3) // Region 1000 ["k22", ""] peer(1, 10) let (left, right) = split_region( - &mut router, + router, region, peer.clone(), 1000, @@ -155,7 +155,7 @@ fn test_split() { // -> Region 2 ["", "k11"] peer(1, 3) // Region 1001 ["k11", "k22"] peer(1, 11) let _ = split_region( - &mut router, + router, left, peer, 1001, @@ -170,7 +170,7 @@ fn test_split() { // -> Region 1000 ["k22", "k33"] peer(1, 10) // Region 1002 ["k33", ""] peer(1, 12) let _ = split_region( - &mut router, + router, right, new_peer(store_id, 10), 1002, diff --git a/components/raftstore-v2/tests/integrations/test_status.rs b/components/raftstore-v2/tests/integrations/test_status.rs index 1f7415d9da3..59c23c4180f 100644 --- a/components/raftstore-v2/tests/integrations/test_status.rs +++ b/components/raftstore-v2/tests/integrations/test_status.rs @@ -8,7 +8,7 @@ use crate::cluster::Cluster; #[test] fn test_status() { let cluster = Cluster::default(); - let router = cluster.router(0); + let router = &cluster.routers[0]; // When there is only one peer, it should campaign immediately. let mut req = RaftCmdRequest::default(); req.mut_header().set_peer(new_peer(1, 3)); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index e93b18fed96..a50e3a39667 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -44,8 +44,8 @@ use engine_rocks::{ }; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ - CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, RaftEngine, - TabletFactory, CF_DEFAULT, CF_LOCK, CF_WRITE, + CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, + RaftEngine, SingletonFactory, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use file_system::{ @@ -238,7 +238,7 @@ struct TikvServer { sst_worker: Option>>, quota_limiter: Arc, causal_ts_provider: Option>, // used for rawkv apiv2 - tablet_factory: Option + Send + Sync>>, + tablet_registry: Option>, br_snap_recovery_mode: bool, // use for br snapshot recovery } @@ -390,7 +390,7 @@ where sst_worker: None, quota_limiter, causal_ts_provider, - tablet_factory: None, + tablet_registry: None, br_snap_recovery_mode: is_recovering_marked, } } @@ -806,7 +806,7 @@ where cfg_controller.register( tikv::config::Module::Storage, Box::new(StorageConfigManger::new( - self.tablet_factory.as_ref().unwrap().clone(), + self.tablet_registry.as_ref().unwrap().clone(), ttl_scheduler, flow_controller, storage.get_scheduler(), @@ -1366,7 +1366,7 @@ where // for recording the latest tablet for each region. // `cached_latest_tablets` is passed to `update` to avoid memory // allocation each time when calling `update`. - let mut cached_latest_tablets: HashMap = HashMap::new(); + let mut cached_latest_tablets = HashMap::default(); self.background_worker .spawn_interval_task(DEFAULT_METRICS_FLUSH_INTERVAL, move || { let now = Instant::now(); @@ -1736,7 +1736,7 @@ impl ConfiguredRaftEngine for RocksEngine { fn register_config(&self, cfg_controller: &mut ConfigController) { cfg_controller.register( tikv::config::Module::Raftdb, - Box::new(DbConfigManger::new(Arc::new(self.clone()), DbType::Raft)), + Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), ); } } @@ -1800,29 +1800,33 @@ impl TikvServer { ); // Create kv engine. - let builder = KvEngineFactoryBuilder::new(env, &self.config, &self.store_path, block_cache) + let builder = KvEngineFactoryBuilder::new(env, &self.config, block_cache) .compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { router: Mutex::new(self.router.clone()), })) .region_info_accessor(self.region_info_accessor.clone()) .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); - let factory = Arc::new(builder.build()); + let factory = Box::new(builder.build()); let kv_engine = factory - .create_shared_db() + .create_shared_db(&self.store_path) .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); - let engines = Engines::new(kv_engine, raft_engine); + let engines = Engines::new(kv_engine.clone(), raft_engine); let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DbConfigManger::new(factory.clone(), DbType::Kv)), + Box::new(DbConfigManger::new(kv_engine.clone(), DbType::Kv)), ); - self.tablet_factory = Some(factory.clone()); + let reg = TabletRegistry::new(Box::new(SingletonFactory::new(kv_engine)), &self.store_path) + .unwrap(); + // It always use the singleton kv_engine, use arbitrary id and suffix. + reg.load(0, 0, false).unwrap(); + self.tablet_registry = Some(reg.clone()); engines.raft.register_config(cfg_controller); let engines_info = Arc::new(EnginesResourceInfo::new( - factory, + reg, engines.raft.as_rocks_engine().cloned(), 180, // max_samples_to_preserve )); @@ -1974,7 +1978,7 @@ impl EngineMetricsManager { } pub struct EnginesResourceInfo { - tablet_factory: Arc + Sync + Send>, + tablet_registry: TabletRegistry, raft_engine: Option, latest_normalized_pending_bytes: AtomicU32, normalized_pending_bytes_collector: MovingAvgU32, @@ -1984,12 +1988,12 @@ impl EnginesResourceInfo { const SCALE_FACTOR: u64 = 100; fn new( - tablet_factory: Arc + Sync + Send>, + tablet_registry: TabletRegistry, raft_engine: Option, max_samples_to_preserve: usize, ) -> Self { EnginesResourceInfo { - tablet_factory, + tablet_registry, raft_engine, latest_normalized_pending_bytes: AtomicU32::new(0), normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), @@ -1999,7 +2003,7 @@ impl EnginesResourceInfo { pub fn update( &self, _now: Instant, - cached_latest_tablets: &mut HashMap, + cached_latest_tablets: &mut HashMap>, ) { let mut normalized_pending_bytes = 0; @@ -2022,19 +2026,11 @@ impl EnginesResourceInfo { fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); } - self.tablet_factory - .for_each_opened_tablet( - &mut |id, suffix, db: &RocksEngine| match cached_latest_tablets.entry(id) { - collections::HashMapEntry::Occupied(mut slot) => { - if slot.get().0 < suffix { - slot.insert((suffix, db.clone())); - } - } - collections::HashMapEntry::Vacant(slot) => { - slot.insert((suffix, db.clone())); - } - }, - ); + self.tablet_registry + .for_each_opened_tablet(|id, db: &mut CachedTablet| { + cached_latest_tablets.insert(id, db.clone()); + true + }); // todo(SpadeA): Now, there's a potential race condition problem where the // tablet could be destroyed after the clone and before the fetching @@ -2045,7 +2041,8 @@ impl EnginesResourceInfo { // propose another PR to tackle it such as destory tablet lazily in a GC // thread. - for (_, (_, tablet)) in cached_latest_tablets.iter() { + for (_, cache) in cached_latest_tablets.iter_mut() { + let Some(tablet) = cache.latest() else { continue }; for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); } @@ -2089,10 +2086,8 @@ mod test { sync::{atomic::Ordering, Arc}, }; - use engine_rocks::{raw::Env, RocksEngine}; - use engine_traits::{ - FlowControlFactorsExt, MiscExt, OpenOptions, SyncMutable, TabletFactory, CF_DEFAULT, - }; + use engine_rocks::raw::Env; + use engine_traits::{FlowControlFactorsExt, MiscExt, SyncMutable, TabletRegistry, CF_DEFAULT}; use tempfile::Builder; use tikv::{config::TikvConfig, server::KvEngineFactoryBuilder}; use tikv_util::{config::ReadableSize, time::Instant}; @@ -2110,18 +2105,15 @@ mod test { let path = Builder::new().prefix("test-update").tempdir().unwrap(); let cache = config.storage.block_cache.build_shared_cache(); - let builder = KvEngineFactoryBuilder::new(env, &config, path.path(), cache); - let factory = builder.build_v2(); + let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); + let reg = TabletRegistry::new(Box::new(factory), path.path()).unwrap(); for i in 1..6 { - let _ = factory - .open_tablet(i, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); + reg.load(i, 10, true).unwrap(); } - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap(); + let mut cached = reg.get(1).unwrap(); + let mut tablet = cached.latest().unwrap(); // Prepare some data for two tablets of the same region. So we can test whether // we fetch the bytes from the latest one. for i in 1..21 { @@ -2135,9 +2127,8 @@ mod test { .unwrap() .unwrap(); - let tablet = factory - .open_tablet(1, Some(20), OpenOptions::default().set_create_new(true)) - .unwrap(); + reg.load(1, 20, true).unwrap(); + tablet = cached.latest().unwrap(); for i in 1..11 { tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); @@ -2152,9 +2143,9 @@ mod test { assert!(old_pending_compaction_bytes > new_pending_compaction_bytes); - let engines_info = Arc::new(EnginesResourceInfo::new(Arc::new(factory), None, 10)); + let engines_info = Arc::new(EnginesResourceInfo::new(reg, None, 10)); - let mut cached_latest_tablets: HashMap = HashMap::new(); + let mut cached_latest_tablets = HashMap::default(); engines_info.update(Instant::now(), &mut cached_latest_tablets); // The memory allocation should be reserved diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index c6b70fa24f0..3a4ed373e8c 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -16,8 +16,7 @@ use encryption_export::{ use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, TabletFactory, ALL_CFS, - CF_DEFAULT, CF_RAFT, + Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::executor::block_on; @@ -596,15 +595,15 @@ pub fn create_test_engine( let raft_engine = RaftTestEngine::build(&cfg, &env, &key_manager, &cache); - let mut builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path(), cache) - .sst_recovery_sender(Some(scheduler)); + let mut builder = + KvEngineFactoryBuilder::new(env, &cfg, cache).sst_recovery_sender(Some(scheduler)); if let Some(router) = router { builder = builder.compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { router: Mutex::new(router), })); } let factory = builder.build(); - let engine = factory.create_shared_db().unwrap(); + let engine = factory.create_shared_db(dir.path()).unwrap(); let engines = Engines::new(engine, raft_engine); (engines, key_manager, dir, sst_worker) } diff --git a/src/config/configurable.rs b/src/config/configurable.rs new file mode 100644 index 00000000000..7cbcc731eb6 --- /dev/null +++ b/src/config/configurable.rs @@ -0,0 +1,141 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{error::Error, io::Write}; + +use engine_rocks::RocksEngine; +use engine_traits::{ + CachedTablet, CfOptionsExt, DbOptions, DbOptionsExt, TabletRegistry, CF_DEFAULT, +}; + +pub type ConfigRes = Result<(), Box>; + +pub trait ConfigurableDb { + fn set_db_config(&self, opts: &[(&str, &str)]) -> ConfigRes; + fn set_cf_config(&self, cf: &str, opts: &[(&str, &str)]) -> ConfigRes; + fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes; + fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes; + fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes; +} + +impl ConfigurableDb for RocksEngine { + fn set_db_config(&self, opts: &[(&str, &str)]) -> ConfigRes { + self.set_db_options(opts).map_err(Box::from) + } + + fn set_cf_config(&self, cf: &str, opts: &[(&str, &str)]) -> ConfigRes { + self.set_options_cf(cf, opts).map_err(Box::from) + } + + fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes { + let mut opt = self.get_db_options(); + opt.set_rate_bytes_per_sec(rate_bytes_per_sec) + .map_err(Box::from) + } + + fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes { + let mut opt = self.get_db_options(); + opt.set_rate_limiter_auto_tuned(auto_tuned) + .map_err(Box::new)?; + // double check the new state + let new_auto_tuned = opt.get_rate_limiter_auto_tuned(); + if new_auto_tuned == Some(auto_tuned) { + Ok(()) + } else { + Err(engine_traits::Status::with_error( + engine_traits::Code::IoError, + "fail to set rate_limiter_auto_tuned", + ) + .into()) + } + } + + fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes { + let opt = self.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap + opt.set_block_cache_capacity(capacity as u64) + .map_err(Box::from) + } +} + +fn loop_registry( + registry: &TabletRegistry, + mut f: impl FnMut(&mut CachedTablet) -> std::result::Result>, +) -> ConfigRes { + let mut error_count = 0; + let mut res = Ok(()); + let mut error_samples: Vec = vec![]; + registry.for_each_opened_tablet(|id, cache| match f(cache) { + Ok(b) => b, + Err(e) => { + error_count += 1; + res = Err(e); + if error_count <= 3 { + writeln!( + error_samples, + "Tablet {} {:?} encountered error: {:?}.", + id, + cache.cache().map(|c| c.as_inner().path()), + res + ) + .unwrap(); + } + true + } + }); + if error_count > 0 { + error!( + "Total count {}. Sample errors: {}", + error_count, + std::str::from_utf8(&error_samples).unwrap() + ); + } + res +} + +impl ConfigurableDb for TabletRegistry { + fn set_db_config(&self, opts: &[(&str, &str)]) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_db_config(opts)?; + } + Ok(true) + }) + } + + fn set_cf_config(&self, cf: &str, opts: &[(&str, &str)]) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_cf_config(cf, opts)?; + } + Ok(true) + }) + } + + fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_rate_bytes_per_sec(rate_bytes_per_sec)? + } + Ok(true) + }) + } + + fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_rate_limiter_auto_tuned(auto_tuned)? + } + Ok(true) + }) + } + + fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_shared_block_cache_capacity(capacity)?; + Ok(false) + } else { + Ok(true) + } + }) + } +} diff --git a/src/config.rs b/src/config/mod.rs similarity index 97% rename from src/config.rs rename to src/config/mod.rs index 97bab103af2..2b0818e93d3 100644 --- a/src/config.rs +++ b/src/config/mod.rs @@ -5,6 +5,8 @@ //! TiKV is configured through the `TikvConfig` type, which is in turn //! made up of many other configuration types. +mod configurable; + use std::{ cmp, collections::{HashMap, HashSet}, @@ -20,6 +22,7 @@ use std::{ use api_version::ApiV1Ttl; use causal_ts::Config as CausalTsConfig; +pub use configurable::{ConfigRes, ConfigurableDb}; use encryption_export::DataKeyManager; use engine_rocks::{ config::{self as rocks_config, BlobRunMode, CompressionType, LogLevel as RocksLogLevel}, @@ -36,8 +39,8 @@ use engine_rocks::{ DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ - CfOptions as _, CfOptionsExt, DbOptions as _, DbOptionsExt, MiscExt, TabletAccessor, - TabletErrorCollector, TitanCfOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + CfOptions as _, DbOptions as _, MiscExt, TitanCfOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, + CF_WRITE, }; use file_system::IoRateLimiter; use keys::region_raft_prefix_len; @@ -1539,36 +1542,21 @@ pub enum DbType { Raft, } -pub struct DbConfigManger> { - tablet_accessor: Arc, +pub struct DbConfigManger { + db: D, db_type: DbType, } -impl> DbConfigManger { - pub fn new(tablet_accessor: Arc, db_type: DbType) -> Self { - DbConfigManger { - tablet_accessor, - db_type, - } - } - - fn set_db_config(&self, opts: &[(&str, &str)]) -> Result<(), Box> { - let mut error_collector = TabletErrorCollector::new(); - self.tablet_accessor - .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - error_collector.add_result(region_id, suffix, db.set_db_options(opts)); - }); - error_collector.take_result() +impl DbConfigManger { + pub fn new(db: D, db_type: DbType) -> Self { + DbConfigManger { db, db_type } } +} +impl DbConfigManger { fn set_cf_config(&self, cf: &str, opts: &[(&str, &str)]) -> Result<(), Box> { - let mut error_collector = TabletErrorCollector::new(); self.validate_cf(cf)?; - self.tablet_accessor - .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - error_collector.add_result(region_id, suffix, db.set_options_cf(cf, opts)); - }); - error_collector.take_result()?; + self.db.set_cf_config(cf, opts)?; // Write config to metric for (cfg_name, cfg_value) in opts { @@ -1586,73 +1574,6 @@ impl> DbConfigManger { Ok(()) } - fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> Result<(), Box> { - let mut error_collector = TabletErrorCollector::new(); - self.tablet_accessor - .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - let mut opt = db.get_db_options(); - let r = opt.set_rate_bytes_per_sec(rate_bytes_per_sec); - if r.is_err() { - error_collector.add_result(region_id, suffix, r); - } - }); - error_collector.take_result() - } - - fn set_rate_limiter_auto_tuned( - &self, - rate_limiter_auto_tuned: bool, - ) -> Result<(), Box> { - let mut error_collector = TabletErrorCollector::new(); - self.tablet_accessor - .for_each_opened_tablet(&mut |region_id, suffix, db: &RocksEngine| { - let mut opt = db.get_db_options(); - let r = opt.set_rate_limiter_auto_tuned(rate_limiter_auto_tuned); - if r.is_err() { - error_collector.add_result(region_id, suffix, r); - } else { - // double check the new state - let new_auto_tuned = opt.get_rate_limiter_auto_tuned(); - if new_auto_tuned.is_none() - || new_auto_tuned.unwrap() != rate_limiter_auto_tuned - { - error_collector.add_result( - region_id, - suffix, - Err(engine_traits::Status::with_error( - engine_traits::Code::IoError, - "fail to set rate_limiter_auto_tuned", - ) - .into()), - ); - } - } - }); - - error_collector.take_result() - } - - fn set_max_background_jobs(&self, max_background_jobs: i32) -> Result<(), Box> { - self.set_db_config(&[("max_background_jobs", &max_background_jobs.to_string())])?; - Ok(()) - } - - fn set_max_background_flushes( - &self, - max_background_flushes: i32, - ) -> Result<(), Box> { - self.set_db_config(&[( - "max_background_flushes", - &max_background_flushes.to_string(), - )])?; - Ok(()) - } - - fn set_max_subcompactions(&self, max_subcompactions: u32) -> Result<(), Box> { - self.set_db_config(&[("max_subcompactions", &max_subcompactions.to_string())])?; - Ok(()) - } - fn validate_cf(&self, cf: &str) -> Result<(), Box> { match (self.db_type, cf) { (DbType::Kv, CF_DEFAULT) @@ -1665,7 +1586,7 @@ impl> DbConfigManger { } } -impl + Send + Sync> ConfigManager for DbConfigManger { +impl ConfigManager for DbConfigManger { fn dispatch(&mut self, change: ConfigChange) -> Result<(), Box> { let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); @@ -1698,7 +1619,8 @@ impl + Send + Sync> ConfigManager for DbConfigMan .next() { let rate_bytes_per_sec: ReadableSize = rate_bytes_config.1.into(); - self.set_rate_bytes_per_sec(rate_bytes_per_sec.0 as i64)?; + self.db + .set_rate_bytes_per_sec(rate_bytes_per_sec.0 as i64)?; } if let Some(rate_bytes_config) = change @@ -1706,37 +1628,43 @@ impl + Send + Sync> ConfigManager for DbConfigMan .next() { let rate_limiter_auto_tuned: bool = rate_bytes_config.1.into(); - self.set_rate_limiter_auto_tuned(rate_limiter_auto_tuned)?; + self.db + .set_rate_limiter_auto_tuned(rate_limiter_auto_tuned)?; } if let Some(background_jobs_config) = change .drain_filter(|(name, _)| name == "max_background_jobs") .next() { - let max_background_jobs = background_jobs_config.1.into(); - self.set_max_background_jobs(max_background_jobs)?; + let max_background_jobs: i32 = background_jobs_config.1.into(); + self.db + .set_db_config(&[("max_background_jobs", &max_background_jobs.to_string())])?; } if let Some(background_subcompactions_config) = change .drain_filter(|(name, _)| name == "max_sub_compactions") .next() { - let max_subcompactions = background_subcompactions_config.1.into(); - self.set_max_subcompactions(max_subcompactions)?; + let max_subcompactions: u32 = background_subcompactions_config.1.into(); + self.db + .set_db_config(&[("max_subcompactions", &max_subcompactions.to_string())])?; } if let Some(background_flushes_config) = change .drain_filter(|(name, _)| name == "max_background_flushes") .next() { - let max_background_flushes = background_flushes_config.1.into(); - self.set_max_background_flushes(max_background_flushes)?; + let max_background_flushes: i32 = background_flushes_config.1.into(); + self.db.set_db_config(&[( + "max_background_flushes", + &max_background_flushes.to_string(), + )])?; } if !change.is_empty() { let change = config_value_to_string(change); let change_slice = config_to_slice(&change); - self.set_db_config(&change_slice)?; + self.db.set_db_config(&change_slice)?; } info!( "rocksdb config changed"; @@ -4019,7 +3947,7 @@ mod tests { use api_version::{ApiV1, KvFormat}; use case_macros::*; use engine_rocks::raw::LRUCacheOptions; - use engine_traits::{CfOptions as _, DbOptions as _, DummyFactory}; + use engine_traits::{CfOptions as _, CfOptionsExt, DbOptions as _, DbOptionsExt}; use futures::executor::block_on; use grpcio::ResourceQuota; use itertools::Itertools; @@ -4464,13 +4392,13 @@ mod tests { let cfg_controller = ConfigController::new(cfg); cfg_controller.register( Module::Rocksdb, - Box::new(DbConfigManger::new(Arc::new(engine.clone()), DbType::Kv)), + Box::new(DbConfigManger::new(engine.clone(), DbType::Kv)), ); let (scheduler, receiver) = dummy_scheduler(); cfg_controller.register( Module::Storage, Box::new(StorageConfigManger::new( - Arc::new(DummyFactory::new(Some(engine), "".to_string())), + engine, scheduler, flow_controller.clone(), storage.get_scheduler(), @@ -5305,9 +5233,11 @@ mod tests { ); } + static CONFIG_TEMPLATE: &str = include_str!("../../etc/config-template.toml"); + #[test] fn test_config_template_is_valid() { - let template_config = std::include_str!("../etc/config-template.toml") + let template_config = CONFIG_TEMPLATE .lines() .map(|l| l.strip_prefix('#').unwrap_or(l)) .join("\n"); @@ -5318,7 +5248,7 @@ mod tests { #[test] fn test_config_template_no_superfluous_keys() { - let template_config = std::include_str!("../etc/config-template.toml") + let template_config = CONFIG_TEMPLATE .lines() .map(|l| l.strip_prefix('#').unwrap_or(l)) .join("\n"); @@ -5336,7 +5266,7 @@ mod tests { #[test] fn test_config_template_matches_default() { - let template_config = std::include_str!("../etc/config-template.toml") + let template_config = CONFIG_TEMPLATE .lines() .map(|l| l.strip_prefix('#').unwrap_or(l)) .join("\n"); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 7e36efcb98f..01dc1e4a786 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -1,24 +1,17 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; +use std::{path::Path, sync::Arc}; use engine_rocks::{ raw::{Cache, Env}, - CompactedEventSender, CompactionListener, FlowListener, RocksCompactionJobInfo, RocksEngine, - RocksEventListener, -}; -use engine_traits::{ - CfOptions, CfOptionsExt, CompactionJobInfo, OpenOptions, Result, TabletAccessor, TabletFactory, - CF_DEFAULT, CF_WRITE, + CompactedEventSender, CompactionListener, FlowListener, RocksCfOptions, RocksCompactionJobInfo, + RocksDbOptions, RocksEngine, RocksEventListener, }; +use engine_traits::{CompactionJobInfo, MiscExt, Result, TabletFactory, CF_DEFAULT, CF_WRITE}; use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; use tikv_util::worker::Scheduler; -use super::engine_factory_v2::KvEngineFactoryV2; use crate::config::{DbConfig, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}; struct FactoryInner { @@ -26,11 +19,9 @@ struct FactoryInner { region_info_accessor: Option, block_cache: Cache, rocksdb_config: Arc, - store_path: PathBuf, api_version: ApiVersion, flow_listener: Option, sst_recovery_sender: Option>, - root_db: Mutex>, } pub struct KvEngineFactoryBuilder { @@ -39,23 +30,16 @@ pub struct KvEngineFactoryBuilder { } impl KvEngineFactoryBuilder { - pub fn new( - env: Arc, - config: &TikvConfig, - store_path: impl Into, - cache: Cache, - ) -> Self { + pub fn new(env: Arc, config: &TikvConfig, cache: Cache) -> Self { Self { inner: FactoryInner { env, region_info_accessor: None, block_cache: cache, rocksdb_config: Arc::new(config.rocksdb.clone()), - store_path: store_path.into(), api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, - root_db: Mutex::default(), }, compact_event_sender: None, } @@ -90,14 +74,6 @@ impl KvEngineFactoryBuilder { compact_event_sender: self.compact_event_sender.clone(), } } - - pub fn build_v2(self) -> KvEngineFactoryV2 { - let factory = KvEngineFactory { - inner: Arc::new(self.inner), - compact_event_sender: self.compact_event_sender.clone(), - }; - KvEngineFactoryV2::new(factory) - } } #[derive(Clone)] @@ -129,172 +105,125 @@ impl KvEngineFactory { )) } - pub fn create_tablet( - &self, - tablet_path: &Path, - region_id: u64, - suffix: u64, - ) -> Result { + fn db_opts(&self) -> RocksDbOptions { // Create kv engine. - let mut kv_db_opts = self.inner.rocksdb_config.build_opt(); - kv_db_opts.set_env(self.inner.env.clone()); - kv_db_opts.add_event_listener(RocksEventListener::new( + let mut db_opts = self.inner.rocksdb_config.build_opt(); + db_opts.set_env(self.inner.env.clone()); + db_opts.add_event_listener(RocksEventListener::new( "kv", self.inner.sst_recovery_sender.clone(), )); if let Some(filter) = self.create_raftstore_compaction_listener() { - kv_db_opts.add_event_listener(filter); + db_opts.add_event_listener(filter); } - if let Some(listener) = &self.inner.flow_listener { - kv_db_opts.add_event_listener(listener.clone_with(region_id, suffix)); - } - let kv_cfs_opts = self.inner.rocksdb_config.build_cf_opts( + db_opts + } + + fn cf_opts(&self) -> Vec<(&str, RocksCfOptions)> { + self.inner.rocksdb_config.build_cf_opts( &self.inner.block_cache, self.inner.region_info_accessor.as_ref(), self.inner.api_version, - ); - let kv_engine = engine_rocks::util::new_engine_opt( - tablet_path.to_str().unwrap(), - kv_db_opts, - kv_cfs_opts, - ); + ) + } + + /// Create a shared db. + /// + /// It will always create in path/DEFAULT_DB_SUB_DIR. + pub fn create_shared_db(&self, path: &Path) -> Result { + let mut db_opts = self.db_opts(); + let cf_opts = self.cf_opts(); + if let Some(listener) = &self.inner.flow_listener { + db_opts.add_event_listener(listener.clone()); + } + let target_path = path.join(DEFAULT_ROCKSDB_SUB_DIR); + let kv_engine = + engine_rocks::util::new_engine_opt(target_path.to_str().unwrap(), db_opts, cf_opts); if let Err(e) = &kv_engine { - error!("failed to create kv engine"; "path" => %tablet_path.display(), "err" => ?e); + error!("failed to create kv engine"; "path" => %path.display(), "err" => ?e); } kv_engine } +} - pub fn on_tablet_created(&self, region_id: u64, suffix: u64) { - if let Some(listener) = &self.inner.flow_listener { - let listener = listener.clone_with(region_id, suffix); - listener.on_created(); +impl TabletFactory for KvEngineFactory { + fn open_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result { + let mut db_opts = self.db_opts(); + let cf_opts = self.cf_opts(); + if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = suffix { + db_opts.add_event_listener(listener.clone_with(id, suffix)); } + let kv_engine = + engine_rocks::util::new_engine_opt(path.to_str().unwrap(), db_opts, cf_opts); + if let Err(e) = &kv_engine { + error!("failed to create tablet"; "id" => id, "suffix" => ?suffix, "path" => %path.display(), "err" => ?e); + } else if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = suffix { + listener.clone_with(id, suffix).on_created(); + } + kv_engine } - pub fn destroy_tablet(&self, tablet_path: &Path) -> engine_traits::Result<()> { - info!("destroy tablet"; "path" => %tablet_path.display()); + fn destroy_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result<()> { + info!("destroy tablet"; "path" => %path.display(), "id" => id, "suffix" => ?suffix); // Create kv engine. - let mut kv_db_opts = self.inner.rocksdb_config.build_opt(); - kv_db_opts.set_env(self.inner.env.clone()); - if let Some(filter) = self.create_raftstore_compaction_listener() { - kv_db_opts.add_event_listener(filter); - } - let _kv_cfs_opts = self.inner.rocksdb_config.build_cf_opts( - &self.inner.block_cache, - self.inner.region_info_accessor.as_ref(), - self.inner.api_version, - ); + let _db_opts = self.db_opts(); + let _cf_opts = self.cf_opts(); // TODOTODO: call rust-rocks or tirocks to destroy_engine; // engine_rocks::util::destroy_engine( - // tablet_path.to_str().unwrap(), + // path.to_str().unwrap(), // kv_db_opts, // kv_cfs_opts, // )?; - let _ = std::fs::remove_dir_all(tablet_path); - Ok(()) - } - - pub fn on_tablet_destroy(&self, region_id: u64, suffix: u64) { - if let Some(listener) = &self.inner.flow_listener { - let listener = listener.clone_with(region_id, suffix); - listener.on_destroyed(); - } - } - - pub fn store_path(&self) -> PathBuf { - self.inner.store_path.clone() - } - - #[inline] - fn kv_engine_path(&self) -> PathBuf { - self.inner.store_path.join(DEFAULT_ROCKSDB_SUB_DIR) - } -} - -impl TabletFactory for KvEngineFactory { - #[inline] - fn create_shared_db(&self) -> Result { - let root_path = self.kv_engine_path(); - let tablet = self.create_tablet(&root_path, 0, 0)?; - let mut root_db = self.inner.root_db.lock().unwrap(); - root_db.replace(tablet.clone()); - Ok(tablet) - } - - /// Open the root tablet according to the OpenOptions. - /// - /// If options.create_new is true, create the root tablet. If the tablet - /// exists, it will fail. - /// - /// If options.create is true, open the the root tablet if it exists or - /// create it otherwise. - fn open_tablet( - &self, - _id: u64, - _suffix: Option, - options: OpenOptions, - ) -> Result { - if let Some(db) = self.inner.root_db.lock().unwrap().as_ref() { - if options.create_new() { - return Err(box_err!( - "root tablet {} already exists", - db.as_inner().path() - )); - } - return Ok(db.clone()); + let _ = std::fs::remove_dir_all(path); + if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = suffix { + listener.clone_with(id, suffix).on_destroyed(); } - // No need for mutex protection here since root_db creation only occurs at - // tikv bootstrap time when there is no racing issue. - if options.create_new() || options.create() { - return self.create_shared_db(); - } - - Err(box_err!("root tablet has not been initialized")) - } - - fn open_tablet_raw( - &self, - _path: &Path, - _id: u64, - _suffix: u64, - _options: OpenOptions, - ) -> Result { - self.create_shared_db() - } - - fn exists_raw(&self, _path: &Path) -> bool { - false - } - - fn tablet_path_with_prefix(&self, _prefix: &str, _id: u64, _suffix: u64) -> PathBuf { - self.kv_engine_path() - } - - fn tablets_path(&self) -> PathBuf { - self.kv_engine_path() - } - - #[inline] - fn destroy_tablet(&self, _id: u64, _suffix: u64) -> engine_traits::Result<()> { Ok(()) } - fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { - let db = self.inner.root_db.lock().unwrap(); - let opt = db.as_ref().unwrap().get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(capacity)?; - Ok(()) + fn exists(&self, path: &Path) -> bool { + RocksEngine::exists(path.to_str().unwrap()) } } -impl TabletAccessor for KvEngineFactory { - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { - let db = self.inner.root_db.lock().unwrap(); - let db = db.as_ref().unwrap(); - f(0, 0, db); - } - - fn is_single_engine(&self) -> bool { - true +#[cfg(test)] +mod tests { + use std::path::Path; + + use engine_traits::TabletRegistry; + + use super::*; + use crate::config::TikvConfig; + + #[test] + fn test_engine_factory() { + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let common_test_cfg = manifest_dir.join("components/test_raftstore/src/common-test.toml"); + let cfg = TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { + panic!( + "invalid auto generated configuration file {}, err {}", + manifest_dir.display(), + e + ); + }); + let cache = cfg.storage.block_cache.build_shared_cache(); + let dir = test_util::temp_dir("test-engine-factory", false); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); + let reg = TabletRegistry::new(Box::new(factory), dir.path()).unwrap(); + let path = reg.tablet_path(1, 3); + assert!(!reg.tablet_factory().exists(&path)); + let engine = reg.tablet_factory().open_tablet(1, Some(3), &path).unwrap(); + assert!(reg.tablet_factory().exists(&path)); + // Second attempt should fail with lock. + reg.tablet_factory() + .open_tablet(1, Some(3), &path) + .unwrap_err(); + drop(engine); + reg.tablet_factory() + .destroy_tablet(1, Some(3), &path) + .unwrap(); + assert!(!reg.tablet_factory().exists(&path)); } } diff --git a/src/server/engine_factory_v2.rs b/src/server/engine_factory_v2.rs deleted file mode 100644 index a55ebca6555..00000000000 --- a/src/server/engine_factory_v2.rs +++ /dev/null @@ -1,487 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; - -use collections::HashMap; -use engine_rocks::RocksEngine; -use engine_traits::{ - CfOptions, CfOptionsExt, MiscExt, OpenOptions, Result, TabletAccessor, TabletFactory, - CF_DEFAULT, -}; - -use crate::server::engine_factory::KvEngineFactory; - -const TOMBSTONE_MARK: &str = "TOMBSTONE_TABLET"; - -#[derive(Clone)] -pub struct KvEngineFactoryV2 { - inner: KvEngineFactory, - // region_id -> (tablet, tablet_suffix) - registry: Arc>>, -} - -impl KvEngineFactoryV2 { - pub fn new(inner: KvEngineFactory) -> Self { - KvEngineFactoryV2 { - inner, - registry: Arc::new(Mutex::new(HashMap::default())), - } - } -} - -impl TabletFactory for KvEngineFactoryV2 { - /// open a tablet according to the OpenOptions. - /// - /// If options.cache_only is true, only open the relevant tablet from - /// `registry`. - /// - /// If options.create_new is true, create a tablet by id and suffix. If the - /// tablet exists, it will fail. - /// - /// If options.create is true, open the tablet with id and suffix if it - /// exists or create it otherwise. - /// - /// If options.skip_cache is true, cache will not be updated. - /// - /// Note: options.cache_only and options.create and/or options.create_new - /// cannot be true simultaneously - fn open_tablet( - &self, - id: u64, - suffix: Option, - mut options: OpenOptions, - ) -> Result { - if options.create_new() && suffix.is_none() { - return Err(box_err!( - "suffix should be provided when creating new tablet" - )); - } - - if options.create() || options.create_new() { - options = options.set_cache_only(false); - } - - let mut reg = self.registry.lock().unwrap(); - if let Some(suffix) = suffix { - if let Some((cached_tablet, cached_suffix)) = reg.get(&id) && *cached_suffix == suffix { - // Target tablet exist in the cache - if options.create_new() { - return Err(box_err!( - "region {} {} already exists", - id, - cached_tablet.as_inner().path() - )); - } - return Ok(cached_tablet.clone()); - } else if !options.cache_only() { - let tablet_path = self.tablet_path(id, suffix); - let tablet = self.open_tablet_raw(&tablet_path, id, suffix, options.clone())?; - if !options.skip_cache() { - debug!("Insert a tablet"; "key" => ?(id, suffix)); - reg.insert(id, (tablet.clone(), suffix)); - } - return Ok(tablet); - } - } else if let Some((tablet, _)) = reg.get(&id) { - return Ok(tablet.clone()); - } - - Err(box_err!( - "tablet with region id {} suffix {:?} does not exist", - id, - suffix - )) - } - - fn open_tablet_raw( - &self, - path: &Path, - id: u64, - suffix: u64, - options: OpenOptions, - ) -> Result { - let engine_exist = RocksEngine::exists(path.to_str().unwrap_or_default()); - // Even though neither options.create nor options.create_new are true, if the - // tablet files already exists, we will open it by calling - // inner.create_tablet. In this case, the tablet exists but not in the cache - // (registry). - if !options.create() && !options.create_new() && !engine_exist { - return Err(box_err!( - "path {} does not have db", - path.to_str().unwrap_or_default() - )); - }; - - if options.create_new() && engine_exist { - return Err(box_err!( - "region {} {} already exists", - id, - path.to_str().unwrap() - )); - } - - let tablet = self.inner.create_tablet(path, id, suffix)?; - debug!("open tablet"; "key" => ?(id, suffix)); - self.inner.on_tablet_created(id, suffix); - Ok(tablet) - } - - #[inline] - fn create_shared_db(&self) -> Result { - self.open_tablet(0, Some(0), OpenOptions::default().set_create_new(true)) - } - - #[inline] - fn exists_raw(&self, path: &Path) -> bool { - RocksEngine::exists(path.to_str().unwrap_or_default()) - } - - #[inline] - fn tablets_path(&self) -> PathBuf { - self.inner.store_path().join("tablets") - } - - #[inline] - fn tablet_path_with_prefix(&self, prefix: &str, id: u64, suffix: u64) -> PathBuf { - self.inner - .store_path() - .join(format!("tablets/{}{}_{}", prefix, id, suffix)) - } - - #[inline] - fn mark_tombstone(&self, region_id: u64, suffix: u64) { - let path = self.tablet_path(region_id, suffix).join(TOMBSTONE_MARK); - // When the full directory path does not exsit, create will return error and in - // this case, we just ignore it. - let _ = std::fs::File::create(path); - debug!("tombstone tablet"; "region_id" => region_id, "suffix" => suffix); - { - let mut reg = self.registry.lock().unwrap(); - if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { - reg.insert(region_id, (cached_tablet, cached_suffix)); - } - } - } - - #[inline] - fn is_tombstoned(&self, region_id: u64, suffix: u64) -> bool { - self.tablet_path(region_id, suffix) - .join(TOMBSTONE_MARK) - .exists() - } - - #[inline] - fn destroy_tablet(&self, region_id: u64, suffix: u64) -> engine_traits::Result<()> { - let path = self.tablet_path(region_id, suffix); - { - let mut reg = self.registry.lock().unwrap(); - if let Some((cached_tablet, cached_suffix)) = reg.remove(®ion_id) && cached_suffix != suffix { - reg.insert(region_id, (cached_tablet, cached_suffix)); - } - } - self.inner.destroy_tablet(&path)?; - self.inner.on_tablet_destroy(region_id, suffix); - Ok(()) - } - - #[inline] - fn load_tablet(&self, path: &Path, region_id: u64, suffix: u64) -> Result { - { - let reg = self.registry.lock().unwrap(); - if let Some((db, db_suffix)) = reg.get(®ion_id) && *db_suffix == suffix { - return Err(box_err!( - "region {} {} already exists", - region_id, - db.as_inner().path() - )); - } - } - - let db_path = self.tablet_path(region_id, suffix); - std::fs::rename(path, db_path)?; - self.open_tablet( - region_id, - Some(suffix), - OpenOptions::default().set_create(true), - ) - } - - fn set_shared_block_cache_capacity(&self, capacity: u64) -> Result<()> { - let reg = self.registry.lock().unwrap(); - // pick up any tablet and set the shared block cache capacity - if let Some((_id, (tablet, _suffix))) = (*reg).iter().next() { - let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap - opt.set_block_cache_capacity(capacity)?; - } - Ok(()) - } -} - -impl TabletAccessor for KvEngineFactoryV2 { - #[inline] - fn for_each_opened_tablet(&self, f: &mut dyn FnMut(u64, u64, &RocksEngine)) { - let reg = self.registry.lock().unwrap(); - for (id, (tablet, suffix)) in &*reg { - f(*id, *suffix, tablet) - } - } - - // it have multi tablets. - fn is_single_engine(&self) -> bool { - false - } -} - -#[cfg(test)] -mod tests { - use engine_traits::{OpenOptions, CF_WRITE, SPLIT_PREFIX}; - use tempfile::TempDir; - - use super::*; - use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; - - lazy_static! { - static ref TEST_CONFIG: TikvConfig = { - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - let common_test_cfg = - manifest_dir.join("components/test_raftstore/src/common-test.toml"); - TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { - panic!( - "invalid auto generated configuration file {}, err {}", - manifest_dir.display(), - e - ); - }) - }; - } - - fn create_test_tablet_factory(name: &'static str) -> (TempDir, KvEngineFactoryBuilder) { - let cfg = TEST_CONFIG.clone(); - let cache = cfg.storage.block_cache.build_shared_cache(); - let dir = test_util::temp_dir(name, false); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let builder = KvEngineFactoryBuilder::new(env, &cfg, dir.path(), cache); - (dir, builder) - } - - #[test] - fn test_kvengine_factory() { - let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory"); - let factory = builder.build(); - let shared_db = factory.create_shared_db().unwrap(); - - // V1 can only create tablet once - factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap_err(); - - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) - .unwrap(); - assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap(); - assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); - let tablet = factory - .open_tablet(1, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); - assert_eq!(tablet.as_inner().path(), shared_db.as_inner().path()); - let mut count = 0; - factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { - assert!(id == 0); - assert!(suffix == 0); - count += 1; - }); - assert_eq!(count, 1); - assert!(factory.is_single_engine()); - assert!(shared_db.is_single_engine()); - factory - .set_shared_block_cache_capacity(1024 * 1024) - .unwrap(); - let opt = shared_db.get_options_cf(CF_DEFAULT).unwrap(); - assert_eq!(opt.get_block_cache_capacity(), 1024 * 1024); - } - - #[test] - fn test_kvengine_factory_root_db_implicit_creation() { - let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory"); - let factory = builder.build(); - - // root_db should be created implicitly here - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) - .unwrap(); - - // error is expected since root_db is created already - factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap_err(); - - let mut count = 0; - factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { - assert!(id == 0); - assert!(suffix == 0); - count += 1; - }); - assert_eq!(count, 1); - assert!(factory.is_single_engine()); - factory - .set_shared_block_cache_capacity(1024 * 1024) - .unwrap(); - let opt = tablet.get_options_cf(CF_DEFAULT).unwrap(); - assert_eq!(opt.get_block_cache_capacity(), 1024 * 1024); - } - - #[test] - fn test_kvengine_factory_v2() { - let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory_v2"); - - let factory = builder.build_v2(); - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - let tablet2 = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) - .unwrap(); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet2 = factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap(); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - let tablet2 = factory - .open_tablet(1, None, OpenOptions::default().set_cache_only(true)) - .unwrap(); - assert_eq!(tablet.as_inner().path(), tablet2.as_inner().path()); - - // Only both region id and suffix match can get the tablet from the cache. - factory - .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) - .unwrap_err(); - - let tablet_path = factory.tablet_path(1, 10); - let result = factory.open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)); - result.unwrap_err(); - - factory - .set_shared_block_cache_capacity(1024 * 1024) - .unwrap(); - let opt = tablet.get_options_cf(CF_WRITE).unwrap(); - assert_eq!(opt.get_block_cache_capacity(), 1024 * 1024); - - assert!(factory.exists(1, 10)); - assert!(!factory.exists(1, 11)); - assert!(!factory.exists(2, 10)); - assert!(!factory.exists(2, 11)); - assert!(factory.exists_raw(&tablet_path)); - assert!(!factory.is_tombstoned(1, 10)); - factory.load_tablet(&tablet_path, 1, 10).unwrap_err(); - factory.load_tablet(&tablet_path, 1, 20).unwrap(); - // After we load it as with the new id or suffix, we should be unable to get it - // with the old id and suffix in the cache. - factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap_err(); - factory - .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) - .unwrap(); - - factory - .open_tablet(1, Some(30), OpenOptions::default().set_create_new(true)) - .unwrap(); - // After open a tablet with the same id but higher suffix, we cannot get the old - // one from cache. - factory - .open_tablet(1, Some(20), OpenOptions::default().set_cache_only(true)) - .unwrap_err(); - // Destroy/mark tombstone the old tablet will not unregister the new tablet in - // the cache - factory.mark_tombstone(1, 20); - factory - .open_tablet(1, Some(30), OpenOptions::default().set_cache_only(true)) - .unwrap(); - factory.destroy_tablet(1, 20).unwrap(); - factory - .open_tablet(1, Some(30), OpenOptions::default().set_cache_only(true)) - .unwrap(); - - factory.mark_tombstone(1, 30); - assert!(factory.is_tombstoned(1, 30)); - factory.destroy_tablet(1, 30).unwrap(); - - let result = factory.open_tablet(1, Some(30), OpenOptions::default()); - result.unwrap_err(); - - assert!(!factory.is_single_engine()); - - assert!( - factory - .tablet_path_with_prefix(SPLIT_PREFIX, 1, 10) - .ends_with("split_1_10") - ); - } - - #[test] - fn test_existed_db_not_in_registry() { - let (_dir, builder) = create_test_tablet_factory("test_kvengine_factory_v2"); - - let factory = builder.build_v2(); - let tablet = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - drop(tablet); - let (tablet, _) = factory.registry.lock().unwrap().remove(&1).unwrap(); - drop(tablet); - factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap_err(); - - let tablet_path = factory.tablet_path(1, 10); - let tablet = factory - .open_tablet_raw(&tablet_path, 1, 10, OpenOptions::default()) - .unwrap(); - // the tablet will not inserted in the cache - factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap_err(); - drop(tablet); - - let tablet_path = factory.tablet_path(1, 20); - // No such tablet, so error will be returned. - factory - .open_tablet_raw(&tablet_path, 1, 10, OpenOptions::default()) - .unwrap_err(); - - let _ = factory - .open_tablet(1, Some(10), OpenOptions::default().set_create(true)) - .unwrap(); - - // Now, it should be in the cache. - factory - .open_tablet(1, Some(10), OpenOptions::default().set_cache_only(true)) - .unwrap(); - } - - #[test] - fn test_get_live_tablets() { - let (_dir, builder) = create_test_tablet_factory("test_get_live_tablets"); - let factory = builder.build_v2(); - factory - .open_tablet(1, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - factory - .open_tablet(2, Some(10), OpenOptions::default().set_create_new(true)) - .unwrap(); - let mut count = 0; - factory.for_each_opened_tablet(&mut |id, suffix, _tablet| { - assert!(id == 1 || id == 2); - assert!(suffix == 10); - count += 1; - }); - assert_eq!(count, 2); - } -} diff --git a/src/server/mod.rs b/src/server/mod.rs index d926ca40b2a..1b41dfc4e56 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -6,7 +6,6 @@ mod raft_client; pub mod config; pub mod debug; mod engine_factory; -mod engine_factory_v2; pub mod errors; pub mod gc_worker; pub mod load_statistics; diff --git a/src/storage/config_manager.rs b/src/storage/config_manager.rs index 3cda77ab5a2..b6a5f9d58ab 100644 --- a/src/storage/config_manager.rs +++ b/src/storage/config_manager.rs @@ -4,7 +4,7 @@ use std::{convert::TryInto, sync::Arc}; -use engine_traits::{KvEngine, TabletFactory, CF_DEFAULT}; +use engine_traits::{ALL_CFS, CF_DEFAULT}; use file_system::{get_io_rate_limiter, IoPriority, IoType}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use strum::IntoEnumIterator; @@ -15,29 +15,30 @@ use tikv_util::{ }; use crate::{ + config::ConfigurableDb, server::{ttl::TtlCheckerTask, CONFIG_ROCKSDB_GAUGE}, storage::{lock_manager::LockManager, txn::flow_controller::FlowController, TxnScheduler}, }; -pub struct StorageConfigManger { - tablet_factory: Arc + Send + Sync>, +pub struct StorageConfigManger { + configurable_db: K, ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, } -unsafe impl Send for StorageConfigManger {} -unsafe impl Sync for StorageConfigManger {} +unsafe impl Send for StorageConfigManger {} +unsafe impl Sync for StorageConfigManger {} -impl StorageConfigManger { +impl StorageConfigManger { pub fn new( - tablet_factory: Arc + Send + Sync>, + configurable_db: K, ttl_checker_scheduler: Scheduler, flow_controller: Arc, scheduler: TxnScheduler, ) -> Self { StorageConfigManger { - tablet_factory, + configurable_db, ttl_checker_scheduler, flow_controller, scheduler, @@ -45,13 +46,16 @@ impl StorageConfigManger { } } -impl ConfigManager for StorageConfigManger { +impl ConfigManager + for StorageConfigManger +{ fn dispatch(&mut self, mut change: ConfigChange) -> CfgResult<()> { if let Some(ConfigValue::Module(mut block_cache)) = change.remove("block_cache") { if let Some(size) = block_cache.remove("capacity") { if size != ConfigValue::None { let s: ReadableSize = size.into(); - self.tablet_factory.set_shared_block_cache_capacity(s.0)?; + self.configurable_db + .set_shared_block_cache_capacity(s.0 as usize)?; // Write config to metric CONFIG_ROCKSDB_GAUGE .with_label_values(&[CF_DEFAULT, "block_cache_size"]) @@ -67,15 +71,11 @@ impl ConfigManager for StorageConfigMan if let Some(v) = flow_control.remove("enable") { let enable: bool = v.into(); let enable_str = if enable { "true" } else { "false" }; - self.tablet_factory.for_each_opened_tablet( - &mut |_region_id, _suffix, tablet: &K| { - for cf in tablet.cf_names() { - tablet - .set_options_cf(cf, &[("disable_write_stall", enable_str)]) - .unwrap(); - } - }, - ); + for cf in ALL_CFS { + self.configurable_db + .set_cf_config(cf, &[("disable_write_stall", enable_str)]) + .unwrap(); + } self.flow_controller.enable(enable); } } else if let Some(v) = change.get("scheduler_worker_pool_size") { diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index 17a5900bea7..973ed245ac8 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -11,9 +11,9 @@ use std::{ time::Duration, }; -use collections::HashMap; +use collections::{HashMap, HashMapEntry}; use engine_rocks::FlowInfo; -use engine_traits::{CfNamesExt, FlowControlFactorsExt, OpenOptions, TabletFactory}; +use engine_traits::{CfNamesExt, FlowControlFactorsExt, TabletRegistry}; use rand::Rng; use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; @@ -47,9 +47,9 @@ impl Drop for TabletFlowController { } impl TabletFlowController { - pub fn new( + pub fn new( config: &FlowControlConfig, - tablet_factory: Arc + Send + Sync>, + registry: TabletRegistry, flow_info_receiver: Receiver, ) -> Self { let (tx, rx) = mpsc::sync_channel(5); @@ -69,7 +69,7 @@ impl TabletFlowController { handle: Some(FlowInfoDispatcher::start( rx, flow_info_receiver, - tablet_factory, + registry, flow_checkers, limiters, config.clone(), @@ -86,10 +86,10 @@ impl TabletFlowController { struct FlowInfoDispatcher; impl FlowInfoDispatcher { - fn start( + fn start( rx: Receiver, flow_info_receiver: Receiver, - tablet_factory: Arc + Send + Sync>, + registry: TabletRegistry, flow_checkers: Arc>>>, limiters: Limiters, config: FlowControlConfig, @@ -116,32 +116,6 @@ impl FlowInfoDispatcher { Err(_) => {} } - let insert_limiter_and_checker = |region_id, suffix| -> FlowChecker { - let engine = tablet_factory - .open_tablet( - region_id, - Some(suffix), - OpenOptions::default().set_cache_only(true), - ) - .unwrap(); - let mut v = limiters.as_ref().write().unwrap(); - let discard_ratio = Arc::new(AtomicU32::new(0)); - let limiter = v.entry(region_id).or_insert(( - Arc::new( - ::builder(f64::INFINITY) - .refill(Duration::from_millis(1)) - .build(), - ), - discard_ratio, - )); - FlowChecker::new_with_tablet_suffix( - &config, - engine, - limiter.1.clone(), - limiter.0.clone(), - suffix, - ) - }; let msg = flow_info_receiver.recv_deadline(deadline); match msg.clone() { Ok(FlowInfo::L0(_cf, _, region_id, suffix)) @@ -165,22 +139,43 @@ impl FlowInfoDispatcher { } Ok(FlowInfo::Created(region_id, suffix)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); - let checker = checkers - .entry(region_id) - .or_insert_with(|| insert_limiter_and_checker(region_id, suffix)); + let checker = match checkers.entry(region_id) { + HashMapEntry::Occupied(e) => e.into_mut(), + HashMapEntry::Vacant(e) => { + let engine = if let Some(mut c) = registry.get(region_id) && let Some(t) = c.latest() { + t.clone() + } else { + continue; + }; + let mut v = limiters.as_ref().write().unwrap(); + let discard_ratio = Arc::new(AtomicU32::new(0)); + let limiter = v.entry(region_id).or_insert(( + Arc::new( + ::builder(f64::INFINITY) + .refill(Duration::from_millis(1)) + .build(), + ), + discard_ratio, + )); + e.insert(FlowChecker::new_with_tablet_suffix( + &config, + engine, + limiter.1.clone(), + limiter.0.clone(), + suffix, + )) + }, + }; // check if the checker's engine is exactly (region_id, suffix) // if checker.suffix < suffix, it means its tablet is old and needs the // refresh if checker.tablet_suffix() < suffix { - let engine = tablet_factory - .open_tablet( - region_id, - Some(suffix), - OpenOptions::default().set_cache_only(true), - ) - .unwrap(); - checker.set_engine(engine); - checker.set_tablet_suffix(suffix); + let cached = registry.get(region_id); + // None means the region is destroyed. + if let Some(mut c) = cached && let Some(engine) = c.latest() { + checker.set_engine(engine.clone()); + checker.set_tablet_suffix(suffix); + } } } Ok(FlowInfo::Destroyed(region_id, suffix)) => { @@ -296,35 +291,43 @@ impl TabletFlowController { #[cfg(test)] mod tests { use engine_rocks::FlowInfo; - use engine_traits::DummyFactory; + use engine_traits::SingletonFactory; + use tempfile::TempDir; use super::{ super::{singleton_flow_controller::tests::*, FlowController}, *, }; - fn create_tablet_flow_controller() -> (FlowController, mpsc::SyncSender, EngineStub) { + fn create_tablet_flow_controller() -> ( + TempDir, + FlowController, + mpsc::SyncSender, + TabletRegistry, + ) { let (tx, rx) = mpsc::sync_channel(0); - let root_path = "/tmp"; + let temp_dir = tempfile::tempdir().unwrap(); let stub = EngineStub::new(); - let factory = DummyFactory::::new(Some(stub.clone()), root_path.to_string()); - let tablet_factory = Arc::new(factory); + let factory = Box::new(SingletonFactory::new(stub)); + let registry = TabletRegistry::new(factory, temp_dir.path()).unwrap(); ( + temp_dir, FlowController::Tablet(TabletFlowController::new( &FlowControlConfig::default(), - tablet_factory, + registry.clone(), rx, )), tx, - stub, + registry, ) } #[test] fn test_tablet_flow_controller_basic() { - let (flow_controller, tx, _) = create_tablet_flow_controller(); + let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; + reg.load(region_id, tablet_suffix, false).unwrap(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); tx.send(FlowInfo::L0Intra( @@ -348,9 +351,11 @@ mod tests { #[test] fn test_tablet_flow_controller_memtable() { - let (flow_controller, tx, stub) = create_tablet_flow_controller(); + let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; + let mut cached = reg.load(region_id, tablet_suffix, false).unwrap(); + let stub = cached.latest().unwrap().clone(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); tx.send(FlowInfo::L0Intra( @@ -365,9 +370,11 @@ mod tests { #[test] fn test_tablet_flow_controller_l0() { - let (flow_controller, tx, stub) = create_tablet_flow_controller(); + let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; + let mut cached = reg.load(region_id, tablet_suffix, false).unwrap(); + let stub = cached.latest().unwrap().clone(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); tx.send(FlowInfo::L0Intra( @@ -382,9 +389,11 @@ mod tests { #[test] fn test_tablet_flow_controller_pending_compaction_bytes() { - let (flow_controller, tx, stub) = create_tablet_flow_controller(); + let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; + let mut cached = reg.load(region_id, tablet_suffix, false).unwrap(); + let stub = cached.latest().unwrap().clone(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); tx.send(FlowInfo::L0Intra( diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 2ea66ef1222..2508b544285 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -12,7 +12,6 @@ use std::{ use api_version::{ApiV1, ApiV2, KvFormat}; use collections::HashMap; -use engine_traits::DummyFactory; use errors::{extract_key_error, extract_region_error}; use futures::executor::block_on; use grpcio::*; @@ -267,7 +266,7 @@ fn test_scale_scheduler_pool() { cfg_controller.register( Module::Storage, Box::new(StorageConfigManger::new( - Arc::new(DummyFactory::new(Some(kv_engine), "".to_string())), + kv_engine, scheduler, flow_controller, storage.get_scheduler(), From d86a449d7f5b656cef28576f166e73291f501d77 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 9 Dec 2022 15:54:07 +0800 Subject: [PATCH 0400/1149] raftstore-v2: add DATA_PREFIX (#13917) ref tikv/tikv#12842 Because v2 doesn't share rocksdb, so it's possible to not write a prefix when writing keys. However, because rocksdb doesn't support specifying infinite upper bound in various APIs like properties, so we should solve those issues before landing prefix-less write. Signed-off-by: Jay Lee --- components/raftstore-v2/src/fsm/apply.rs | 23 ++++++++++++--- .../src/operation/command/admin/split.rs | 17 +++++++---- .../raftstore-v2/src/operation/command/mod.rs | 8 ++--- .../src/operation/command/write/mod.rs | 22 +++++++++++--- components/raftstore-v2/src/raft/apply.rs | 28 +++++++++++------- .../tests/failpoints/test_basic_write.rs | 18 ++++++------ .../tests/integrations/cluster.rs | 19 ++++++++++-- .../tests/integrations/test_basic_write.rs | 15 +++++----- .../tests/integrations/test_conf_change.rs | 29 +++++++++---------- 9 files changed, 115 insertions(+), 64 deletions(-) diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 6e2921a0c0d..7e9a135b498 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -7,17 +7,19 @@ use std::{ Arc, }, task::{Context, Poll}, + time::{Duration, Instant}, }; use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, TabletRegistry}; -use futures::{Future, StreamExt}; +use futures::{compat::Future01CompatExt, Future, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use raftstore::store::ReadTask; use slog::Logger; use tikv_util::{ mpsc::future::{self, Receiver, Sender, WakePolicy}, + timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, }; @@ -89,9 +91,22 @@ impl ApplyFsm { impl ApplyFsm { pub async fn handle_all_tasks(&mut self) { loop { - let mut task = match self.receiver.next().await { - Some(t) => t, - None => return, + let timeout = GLOBAL_TIMER_HANDLE + .delay(Instant::now() + Duration::from_secs(10)) + .compat(); + let res = futures::select! { + res = self.receiver.next().fuse() => res, + _ = timeout.fuse() => None, + }; + let mut task = match res { + Some(r) => r, + None => { + self.apply.release_memory(); + match self.receiver.next().await { + Some(t) => t, + None => return, + } + } }; loop { match task { diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 13a5d168915..7de49a716c3 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -265,7 +265,7 @@ impl Apply { .open_tablet(region_id, Some(log_index), &path) .unwrap(); // Remove the old write batch. - self.write_batch_mut().take(); + self.write_batch.take(); self.publish_tablet(tablet); self.region_state_mut() @@ -832,16 +832,21 @@ mod test { // Split will create checkpoint tablet, so if there are some writes before // split, they should be flushed immediately. apply.apply_put(CF_DEFAULT, b"k04", b"v4").unwrap(); - assert!(!WriteBatch::is_empty( - apply.write_batch_mut().as_ref().unwrap() - )); + assert!(!WriteBatch::is_empty(apply.write_batch.as_ref().unwrap())); splits.mut_requests().clear(); splits .mut_requests() .push(new_split_req(b"k05", 70, vec![71, 72, 73])); req.set_splits(splits); apply.apply_batch_split(&req, 50).unwrap(); - assert!(apply.write_batch_mut().is_none()); - assert_eq!(apply.tablet().get_value(b"k04").unwrap().unwrap(), b"v4"); + assert!(apply.write_batch.is_none()); + assert_eq!( + apply + .tablet() + .get_value(&keys::data_key(b"k04")) + .unwrap() + .unwrap(), + b"v4" + ); } } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 3bb6b7b3852..3ee3430a140 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -341,14 +341,14 @@ impl Apply { } if !e.get_data().is_empty() { let mut set_save_point = false; - if let Some(wb) = self.write_batch_mut() { + if let Some(wb) = &mut self.write_batch { wb.set_save_point(); set_save_point = true; } let resp = match self.apply_entry(&e).await { Ok(resp) => resp, Err(e) => { - if let Some(wb) = self.write_batch_mut() { + if let Some(wb) = &mut self.write_batch { if set_save_point { wb.rollback_to_save_point().unwrap(); } else { @@ -500,7 +500,7 @@ impl Apply { #[inline] pub fn flush(&mut self) { - if let Some(wb) = self.write_batch_mut() && !wb.is_empty() { + if let Some(wb) = &mut self.write_batch && !wb.is_empty() { let mut write_opt = WriteOptions::default(); write_opt.set_disable_wal(true); if let Err(e) = wb.write_opt(&write_opt) { @@ -509,7 +509,7 @@ impl Apply { if wb.data_size() <= APPLY_WB_SHRINK_SIZE { wb.clear(); } else { - self.write_batch_mut().take(); + self.write_batch.take(); } } let callbacks = self.callbacks_mut(); diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 59c5679f95f..f9cac15d899 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -113,11 +113,21 @@ impl Apply { #[inline] pub fn apply_put(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { util::check_key_in_region(key, self.region_state().get_region())?; + // Technically it's OK to remove prefix for raftstore v2. But rocksdb doesn't + // support specifying infinite upper bound in various APIs. + keys::data_key_with_buffer(key, &mut self.key_buffer); + self.ensure_write_buffer(); let res = if cf.is_empty() || cf == CF_DEFAULT { // TODO: use write_vector - self.write_batch_or_default().put(key, value) + self.write_batch + .as_mut() + .unwrap() + .put(&self.key_buffer, value) } else { - self.write_batch_or_default().put_cf(cf, key, value) + self.write_batch + .as_mut() + .unwrap() + .put_cf(cf, &self.key_buffer, value) }; res.unwrap_or_else(|e| { panic!( @@ -138,11 +148,15 @@ impl Apply { #[inline] pub fn apply_delete(&mut self, cf: &str, key: &[u8]) -> Result<()> { util::check_key_in_region(key, self.region_state().get_region())?; + keys::data_key_with_buffer(key, &mut self.key_buffer); let res = if cf.is_empty() || cf == CF_DEFAULT { // TODO: use write_vector - self.write_batch_or_default().delete(key) + self.write_batch.as_mut().unwrap().delete(&self.key_buffer) } else { - self.write_batch_or_default().delete_cf(cf, key) + self.write_batch + .as_mut() + .unwrap() + .delete_cf(cf, &self.key_buffer) }; res.unwrap_or_else(|e| { panic!( diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 421c2c476f7..d4a4cf61602 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -2,7 +2,7 @@ use std::{mem, sync::Arc}; -use engine_traits::{CachedTablet, KvEngine, TabletRegistry}; +use engine_traits::{CachedTablet, KvEngine, TabletRegistry, WriteBatch}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; use slog::Logger; @@ -21,7 +21,9 @@ pub struct Apply { /// publish the update of the tablet remote_tablet: CachedTablet, tablet: EK, - write_batch: Option, + pub write_batch: Option, + /// A buffer for encoding key. + pub key_buffer: Vec, tablet_registry: TabletRegistry, @@ -67,6 +69,7 @@ impl Apply { region_state, tablet_registry, read_scheduler, + key_buffer: vec![], res_reporter, logger, } @@ -88,16 +91,11 @@ impl Apply { } #[inline] - pub fn write_batch_mut(&mut self) -> &mut Option { - &mut self.write_batch - } - - #[inline] - pub fn write_batch_or_default(&mut self) -> &mut EK::WriteBatch { - if self.write_batch.is_none() { - self.write_batch = Some(self.tablet.write_batch_with_cap(DEFAULT_APPLY_WB_SIZE)); + pub fn ensure_write_buffer(&mut self) { + if self.write_batch.is_some() { + return; } - self.write_batch.as_mut().unwrap() + self.write_batch = Some(self.tablet.write_batch_with_cap(DEFAULT_APPLY_WB_SIZE)); } #[inline] @@ -170,4 +168,12 @@ impl Apply { pub fn take_admin_result(&mut self) -> Vec { mem::take(&mut self.admin_cmd_result) } + + #[inline] + pub fn release_memory(&mut self) { + mem::take(&mut self.key_buffer); + if self.write_batch.as_ref().map_or(false, |wb| wb.is_empty()) { + self.write_batch = None; + } + } } diff --git a/components/raftstore-v2/tests/failpoints/test_basic_write.rs b/components/raftstore-v2/tests/failpoints/test_basic_write.rs index be5ccf8316c..b20984a9837 100644 --- a/components/raftstore-v2/tests/failpoints/test_basic_write.rs +++ b/components/raftstore-v2/tests/failpoints/test_basic_write.rs @@ -12,8 +12,8 @@ use crate::cluster::Cluster; /// Check if write batch is correctly maintained during apply. #[test] fn test_write_batch_rollback() { - let cluster = Cluster::default(); - let router = &cluster.routers[0]; + let mut cluster = Cluster::default(); + let router = &mut cluster.routers[0]; let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); @@ -25,9 +25,6 @@ fn test_write_batch_rollback() { // Make several entries to batch in apply thread. fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); - let tablet_registry = cluster.node(0).tablet_registry(); - let tablet = tablet_registry.get(2).unwrap().latest().unwrap().clone(); - // Good proposal should be committed. let (msg, mut sub0) = PeerMsg::raft_command(req.clone()); router.send(2, msg).unwrap(); @@ -58,8 +55,10 @@ fn test_write_batch_rollback() { ); let resp = block_on(sub1.result()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); - assert_matches!(tablet.get_value(b"key"), Ok(None)); - assert_eq!(tablet.get_value(b"key1").unwrap().unwrap(), b"value"); + + let snap = router.stale_snapshot(2); + assert_matches!(snap.get_value(b"key"), Ok(None)); + assert_eq!(snap.get_value(b"key1").unwrap().unwrap(), b"value"); fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); @@ -91,6 +90,7 @@ fn test_write_batch_rollback() { ); let resp = block_on(sub1.result()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); - assert_matches!(tablet.get_value(b"key2"), Ok(None)); - assert_eq!(tablet.get_value(b"key3").unwrap().unwrap(), b"value"); + let snap = router.stale_snapshot(2); + assert_matches!(snap.get_value(b"key2"), Ok(None)); + assert_eq!(snap.get_value(b"key3").unwrap().unwrap(), b"value"); } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 3e2ced3df3c..11f8094612b 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -17,21 +17,21 @@ use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{self, Receiver, Sender, TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, TestTabletFactory}, + kv::{KvTestEngine, KvTestSnapshot, TestTabletFactory}, raft::RaftTestEngine, }; use engine_traits::{TabletRegistry, ALL_CFS}; use futures::executor::block_on; use kvproto::{ metapb::{self, RegionEpoch, Store}, - raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, Request}, raft_serverpb::RaftMessage, }; use pd_client::RpcClient; use raft::eraftpb::MessageType; use raftstore::store::{ region_meta::{RegionLocalState, RegionMeta}, - Config, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, + Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, }; use raftstore_v2::{ create_store_batch_system, @@ -45,6 +45,7 @@ use tikv_util::{ config::{ReadableDuration, VersionTrack}, store::new_peer, }; +use txn_types::WriteBatchFlags; pub struct TestRouter(RaftRouter); @@ -151,6 +152,17 @@ impl TestRouter { req } + pub fn stale_snapshot(&mut self, region_id: u64) -> RegionSnapshot { + let mut req = self.new_request_for(region_id); + let header = req.mut_header(); + header.set_flags(WriteBatchFlags::STALE_READ.bits()); + header.set_flag_data(vec![0; 8]); + let mut snap_req = Request::default(); + snap_req.set_cmd_type(CmdType::Snap); + req.mut_requests().push(snap_req); + block_on(self.get_snapshot(req)).unwrap() + } + pub fn region_detail(&self, region_id: u64) -> metapb::Region { let RegionLocalState { id, @@ -306,6 +318,7 @@ impl TestNode { router } + #[allow(dead_code)] pub fn tablet_registry(&self) -> &TabletRegistry { &self.running_state().unwrap().registry } diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index a0d3d1ac34a..807d64de756 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -112,8 +112,8 @@ fn test_basic_write() { #[test] fn test_put_delete() { - let cluster = Cluster::default(); - let router = &cluster.routers[0]; + let mut cluster = Cluster::default(); + let router = &mut cluster.routers[0]; let mut req = router.new_request_for(2); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); @@ -123,16 +123,16 @@ fn test_put_delete() { router.wait_applied_to_current_term(2, Duration::from_secs(3)); - let registry = cluster.node(0).tablet_registry(); - let tablet = registry.get(2).unwrap().latest().unwrap().clone(); - assert!(tablet.get_value(b"key").unwrap().is_none()); + let snap = router.stale_snapshot(2); + assert!(snap.get_value(b"key").unwrap().is_none()); let (msg, mut sub) = PeerMsg::raft_command(req.clone()); router.send(2, msg).unwrap(); assert!(block_on(sub.wait_proposed())); assert!(block_on(sub.wait_committed())); let resp = block_on(sub.result()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); - assert_eq!(tablet.get_value(b"key").unwrap().unwrap(), b"value"); + let snap = router.stale_snapshot(2); + assert_eq!(snap.get_value(b"key").unwrap().unwrap(), b"value"); let mut delete_req = Request::default(); delete_req.set_cmd_type(CmdType::Delete); @@ -145,5 +145,6 @@ fn test_put_delete() { assert!(block_on(sub.wait_committed())); let resp = block_on(sub.result()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); - assert_matches!(tablet.get_value(b"key"), Ok(None)); + let snap = router.stale_snapshot(2); + assert_matches!(snap.get_value(b"key"), Ok(None)); } diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 4f3ffbbf24c..1b9ca50daf7 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -12,10 +12,9 @@ use crate::cluster::Cluster; #[test] fn test_simple_change() { - let cluster = Cluster::with_node_count(2, None); + let mut cluster = Cluster::with_node_count(2, None); let region_id = 2; - let router0 = &cluster.routers[0]; - let mut req = router0.new_request_for(2); + let mut req = cluster.routers[0].new_request_for(2); let admin_req = req.mut_admin_request(); admin_req.set_cmd_type(AdminCmdType::ChangePeer); admin_req @@ -24,12 +23,12 @@ fn test_simple_change() { let store_id = cluster.node(1).id(); let new_peer = new_learner_peer(store_id, 10); admin_req.mut_change_peer().set_peer(new_peer.clone()); - let resp = router0.command(2, req.clone()).unwrap(); + let resp = cluster.routers[0].command(2, req.clone()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); let epoch = req.get_header().get_region_epoch(); let new_conf_ver = epoch.get_conf_ver() + 1; let leader_peer = req.get_header().get_peer().clone(); - let meta = router0 + let meta = cluster.routers[0] .must_query_debug_info(2, Duration::from_secs(3)) .unwrap(); let match_index = meta.raft_apply.applied_index; @@ -39,8 +38,7 @@ fn test_simple_change() { // So heartbeat will create a learner. cluster.dispatch(2, vec![]); - let router1 = &cluster.routers[1]; - let meta = router1 + let meta = cluster.routers[1] .must_query_debug_info(2, Duration::from_secs(3)) .unwrap(); assert_eq!(meta.raft_status.id, 10, "{:?}", meta); @@ -52,34 +50,33 @@ fn test_simple_change() { ); // Trigger the raft tick to replica the log to the learner and execute the // snapshot task. - router0 + cluster.routers[0] .send(region_id, PeerMsg::Tick(PeerTick::Raft)) .unwrap(); cluster.dispatch(region_id, vec![]); // write one kv after snapshot let (key, val) = (b"key", b"value"); - let mut write_req = router0.new_request_for(region_id); + let mut write_req = cluster.routers[0].new_request_for(region_id); let mut put_req = Request::default(); put_req.set_cmd_type(CmdType::Put); put_req.mut_put().set_key(key.to_vec()); put_req.mut_put().set_value(val.to_vec()); write_req.mut_requests().push(put_req); let (msg, _) = PeerMsg::raft_command(write_req.clone()); - router0.send(region_id, msg).unwrap(); + cluster.routers[0].send(region_id, msg).unwrap(); std::thread::sleep(Duration::from_millis(1000)); cluster.dispatch(region_id, vec![]); - let meta = router1 + let meta = cluster.routers[1] .must_query_debug_info(region_id, Duration::from_secs(3)) .unwrap(); // the learner truncated index muse be equal the leader applied index and can // read the new written kv. assert_eq!(match_index, meta.raft_apply.truncated_state.index); assert!(meta.raft_apply.applied_index >= match_index); - let registry = cluster.node(1).tablet_registry(); - let tablet = registry.get(region_id).unwrap().latest().unwrap().clone(); - assert_eq!(tablet.get_value(key).unwrap().unwrap(), val); + let snap = cluster.routers[1].stale_snapshot(2); + assert_eq!(snap.get_value(key).unwrap().unwrap(), val); req.mut_header() .mut_region_epoch() @@ -87,12 +84,12 @@ fn test_simple_change() { req.mut_admin_request() .mut_change_peer() .set_change_type(ConfChangeType::RemoveNode); - let resp = router0.command(2, req.clone()).unwrap(); + let resp = cluster.routers[0].command(2, req.clone()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); let epoch = req.get_header().get_region_epoch(); let new_conf_ver = epoch.get_conf_ver() + 1; let leader_peer = req.get_header().get_peer().clone(); - let meta = router0 + let meta = cluster.routers[0] .must_query_debug_info(2, Duration::from_secs(3)) .unwrap(); assert_eq!(meta.region_state.epoch.version, epoch.get_version()); From d20569b0922533b5b01a2bd34f5f778045148e15 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 12 Dec 2022 17:02:51 +0800 Subject: [PATCH 0401/1149] Raftstore-v2: transfer leader (#13793) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang --- Cargo.lock | 3 + components/raftstore-v2/Cargo.toml | 3 + components/raftstore-v2/src/batch/store.rs | 6 + components/raftstore-v2/src/fsm/peer.rs | 52 ++- .../src/operation/command/admin/mod.rs | 20 + .../command/admin/transfer_leader.rs | 421 ++++++++++++++++++ .../raftstore-v2/src/operation/command/mod.rs | 11 +- .../raftstore-v2/src/operation/ready/mod.rs | 19 +- components/raftstore-v2/src/raft/peer.rs | 49 +- .../raftstore-v2/tests/integrations/mod.rs | 1 + .../integrations/test_transfer_leader.rs | 154 +++++++ components/raftstore/src/store/fsm/mod.rs | 2 +- components/raftstore/src/store/mod.rs | 7 +- components/raftstore/src/store/peer.rs | 4 +- 14 files changed, 732 insertions(+), 20 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/transfer_leader.rs create mode 100644 components/raftstore-v2/tests/integrations/test_transfer_leader.rs diff --git a/Cargo.lock b/Cargo.lock index a7d72121032..9b3eccfda51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4344,6 +4344,7 @@ name = "raftstore-v2" version = "0.1.0" dependencies = [ "batch-system", + "bytes", "causal_ts", "collections", "concurrency_manager", @@ -4358,12 +4359,14 @@ dependencies = [ "keys", "kvproto", "log_wrappers", + "parking_lot 0.12.0", "pd_client", "prometheus", "protobuf", "raft", "raft-proto", "raftstore", + "rand 0.8.5", "resource_metering", "slog", "slog-global", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 46ed20f8d10..1d6b67ad129 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -30,6 +30,7 @@ cloud-azure = ["raftstore/cloud-azure"] [dependencies] batch-system = { workspace = true } +bytes = "1.0" causal_ts = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } @@ -43,12 +44,14 @@ futures = { version = "0.3", features = ["compat"] } keys = { workspace = true } kvproto = { workspace = true } log_wrappers = { workspace = true } +parking_lot = "0.12" pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0" } raftstore = { workspace = true } +rand = "0.8.3" resource_metering = { workspace = true } slog = "2.3" smallvec = "1.4" diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 96cbee19e4e..997f8da7a9c 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -21,6 +21,7 @@ use engine_traits::{Engines, KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType}; use futures::{compat::Future01CompatExt, FutureExt}; use kvproto::{ + disk_usage::DiskUsage, metapb::Store, raft_serverpb::{PeerState, RaftMessage}, }; @@ -77,6 +78,10 @@ pub struct StoreContext { pub tablet_registry: TabletRegistry, pub apply_pool: FuturePool, pub read_scheduler: Scheduler>, + + /// Disk usage for the store itself. + pub self_disk_usage: DiskUsage, + pub snap_mgr: TabletSnapManager, pub pd_scheduler: Scheduler, } @@ -345,6 +350,7 @@ where tablet_registry: self.tablet_registry.clone(), apply_pool: self.apply_pool.clone(), read_scheduler: self.read_scheduler.clone(), + self_disk_usage: DiskUsage::Normal, snap_mgr: self.snap_mgr.clone(), pd_scheduler: self.pd_scheduler.clone(), }; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 6254e1975fd..c4dded64e62 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -6,8 +6,8 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; -use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use raftstore::store::{Config, Transport}; +use engine_traits::{KvEngine, RaftEngine, TabletFactory, TabletRegistry}; +use raftstore::store::{Config, LocksStatus, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ is_zero_duration, @@ -34,6 +34,7 @@ pub struct PeerFsm { /// twice accidentally. tick_registry: u16, is_stopped: bool, + reactivate_memory_lock_ticks: usize, } impl PeerFsm { @@ -51,6 +52,7 @@ impl PeerFsm { receiver: rx, tick_registry: 0, is_stopped: false, + reactivate_memory_lock_ticks: 0, }); Ok((tx, fsm)) } @@ -127,6 +129,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, Self { fsm, store_ctx } } + #[inline] + fn schedule_pending_ticks(&mut self) { + let pending_ticks = self.fsm.peer.take_pending_ticks(); + for tick in pending_ticks { + if tick == PeerTick::ReactivateMemoryLock { + self.fsm.reactivate_memory_lock_ticks = 0; + } + self.schedule_tick(tick); + } + } + pub fn schedule_tick(&mut self, tick: PeerTick) { assert!(PeerTick::VARIANT_COUNT <= u16::BITS as usize); let idx = tick as usize; @@ -200,7 +213,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerTick::CheckPeerStaleState => unimplemented!(), PeerTick::EntryCacheEvict => unimplemented!(), PeerTick::CheckLeaderLease => unimplemented!(), - PeerTick::ReactivateMemoryLock => unimplemented!(), + PeerTick::ReactivateMemoryLock => self.on_reactivate_memory_lock_tick(), PeerTick::ReportBuckets => unimplemented!(), PeerTick::CheckLongUncommitted => unimplemented!(), } @@ -209,7 +222,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec) { for msg in peer_msgs_buf.drain(..) { match msg { - PeerMsg::RaftMessage(msg) => self.fsm.peer.on_raft_message(self.store_ctx, msg), + PeerMsg::RaftMessage(msg) => { + self.fsm.peer.on_raft_message(self.store_ctx, msg); + self.schedule_pending_ticks(); + } PeerMsg::RaftQuery(cmd) => { self.on_receive_command(cmd.send_time); self.on_query(cmd.request, cmd.ch) @@ -248,4 +264,32 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, // TODO: instead of propose pending commands immediately, we should use timeout. self.fsm.peer.propose_pending_writes(self.store_ctx); } + + pub fn on_reactivate_memory_lock_tick(&mut self) { + let mut pessimistic_locks = self.fsm.peer.txn_ext().pessimistic_locks.write(); + + // If it is not leader, we needn't reactivate by tick. In-memory pessimistic + // lock will be enabled when this region becomes leader again. + // And this tick is currently only used for the leader transfer failure case. + if !self.fsm.peer().is_leader() + || pessimistic_locks.status != LocksStatus::TransferringLeader + { + return; + } + + self.fsm.reactivate_memory_lock_ticks += 1; + let transferring_leader = self.fsm.peer.raft_group().raft.lead_transferee.is_some(); + // `lead_transferee` is not set immediately after the lock status changes. So, + // we need the tick count condition to avoid reactivating too early. + if !transferring_leader + && self.fsm.reactivate_memory_lock_ticks + >= self.store_ctx.cfg.reactive_memory_lock_timeout_tick + { + pessimistic_locks.status = LocksStatus::Normal; + self.fsm.reactivate_memory_lock_ticks = 0; + } else { + drop(pessimistic_locks); + self.schedule_tick(PeerTick::ReactivateMemoryLock); + } + } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index c1e25474701..388bf72e01e 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -2,6 +2,7 @@ mod conf_change; mod split; +mod transfer_leader; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest}; @@ -19,6 +20,7 @@ use raftstore::{ use slog::info; pub use split::{SplitInit, SplitResult, SPLIT_PREFIX}; use tikv_util::box_err; +use txn_types::WriteBatchFlags; use self::conf_change::ConfChangeResult; use crate::{ @@ -29,8 +31,11 @@ use crate::{ #[derive(Debug)] pub enum AdminCmdResult { + // No side effect produced by the command + None, SplitRegion(SplitResult), ConfChange(ConfChangeResult), + TransferLeader(u64), } impl Peer { @@ -81,6 +86,21 @@ impl Peer { "Split is deprecated. Please use BatchSplit instead." )), AdminCmdType::BatchSplit => self.propose_split(ctx, req), + AdminCmdType::TransferLeader => { + // Containing TRANSFER_LEADER_PROPOSAL flag means the this transfer leader + // request should be proposed to the raft group + if WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + .contains(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL) + { + let data = req.write_to_bytes().unwrap(); + self.propose_with_ctx(ctx, data, vec![]) + } else { + if self.propose_transfer_leader(ctx, req, ch) { + self.set_has_ready(); + } + return; + } + } _ => unimplemented!(), } }; diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs new file mode 100644 index 00000000000..71853d0007b --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -0,0 +1,421 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::cmp::Ordering; + +use bytes::Bytes; +use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; +use fail::fail_point; +use kvproto::{ + disk_usage::DiskUsage, + metapb, + raft_cmdpb::{ + AdminCmdType, AdminRequest, AdminResponse, CmdType, PutRequest, RaftCmdRequest, Request, + TransferLeaderRequest, + }, +}; +use parking_lot::RwLockWriteGuard; +use raft::{eraftpb, ProgressState, Storage}; +use raftstore::{ + store::{ + fsm::new_admin_request, make_transfer_leader_response, metrics::PEER_ADMIN_CMD_COUNTER, + LocksStatus, Transport, TRANSFER_LEADER_COMMAND_REPLY_CTX, + }, + Result, +}; +use rand::prelude::SliceRandom; +use slog::info; +use txn_types::WriteBatchFlags; + +use super::AdminCmdResult; +use crate::{ + batch::StoreContext, + fsm::{ApplyResReporter, PeerFsmDelegate}, + raft::{Apply, Peer}, + router::{CmdResChannel, PeerMsg, PeerTick}, +}; + +fn get_transfer_leader_cmd(msg: &RaftCmdRequest) -> Option<&TransferLeaderRequest> { + if !msg.has_admin_request() { + return None; + } + let req = msg.get_admin_request(); + if !req.has_transfer_leader() { + return None; + } + + Some(req.get_transfer_leader()) +} + +impl Peer { + /// Return true if the transfer leader request is accepted. + /// + /// When transferring leadership begins, leader sends a pre-transfer + /// to target follower first to ensures it's ready to become leader. + /// After that the real transfer leader process begin. + /// + /// 1. pre_transfer_leader on leader: + /// Leader will send a MsgTransferLeader to follower. + /// 2. execute_transfer_leader on follower + /// If follower passes all necessary checks, it will reply an + /// ACK with type MsgTransferLeader and its promised applied index. + /// 3. ready_to_transfer_leader on leader: + /// Leader checks if it's appropriate to transfer leadership. If it + /// does, it calls raft transfer_leader API to do the remaining work. + /// + /// Additional steps when there are remaining pessimistic + /// locks to propose (detected in function on_transfer_leader_msg). + /// 1. Leader firstly proposes pessimistic locks and then proposes a + /// TransferLeader command. + /// 2. The follower applies the TransferLeader command and replies an + /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// + /// See also: tikv/rfcs#37. + pub fn propose_transfer_leader( + &mut self, + ctx: &mut StoreContext, + req: RaftCmdRequest, + ch: CmdResChannel, + ) -> bool { + ctx.raft_metrics.propose.transfer_leader.inc(); + + let transfer_leader = get_transfer_leader_cmd(&req).unwrap(); + let prs = self.raft_group().raft.prs(); + + // Find the target with the largest matched index among the candidate + // transferee peers + let (_, peers) = transfer_leader + .get_peers() + .iter() + .filter(|peer| peer.id != self.peer().id) + .fold((0, vec![]), |(max_matched, mut chosen), p| { + if let Some(pr) = prs.get(p.id) { + match pr.matched.cmp(&max_matched) { + Ordering::Greater => (pr.matched, vec![p]), + Ordering::Equal => { + chosen.push(p); + (max_matched, chosen) + } + Ordering::Less => (max_matched, chosen), + } + } else { + (max_matched, chosen) + } + }); + let peer = match peers.len() { + 0 => transfer_leader.get_peer(), + 1 => peers.get(0).unwrap(), + _ => peers.choose(&mut rand::thread_rng()).unwrap(), + }; + + let transferee = if peer.id == self.peer().id { + false + } else { + self.pre_transfer_leader(peer) + }; + + // transfer leader command doesn't need to replicate log and apply, so we + // return immediately. Note that this command may fail, we can view it just as + // an advice + ch.set_result(make_transfer_leader_response()); + + transferee + } + + fn pre_transfer_leader(&mut self, peer: &metapb::Peer) -> bool { + if self.raft_group().raft.has_pending_conf() { + info!( + self.logger, + "reject transfer leader due to pending conf change"; + "peer" => ?peer, + ); + return false; + } + + // Broadcast heartbeat to make sure followers commit the entries immediately. + // It's only necessary to ping the target peer, but ping all for simplicity. + self.raft_group_mut().ping(); + + // todo: entry cache warmup + + let mut msg = eraftpb::Message::new(); + msg.set_to(peer.get_id()); + msg.set_msg_type(eraftpb::MessageType::MsgTransferLeader); + msg.set_from(self.peer_id()); + // log term here represents the term of last log. For leader, the term of last + // log is always its current term. Not just set term because raft library + // forbids setting it for MsgTransferLeader messages. + msg.set_log_term(self.term()); + self.raft_group_mut().raft.msgs.push(msg); + true + } + + pub fn on_transfer_leader_msg( + &mut self, + ctx: &mut StoreContext, + msg: &eraftpb::Message, + peer_disk_usage: DiskUsage, + ) { + // log_term is set by original leader, represents the term last log is written + // in, which should be equal to the original leader's term. + if msg.get_log_term() != self.term() { + return; + } + + if !self.is_leader() { + self.execute_transfer_leader(ctx, msg.get_from(), peer_disk_usage, false); + } else { + let from = match self.peer_from_cache(msg.get_from()) { + Some(p) => p, + None => return, + }; + match self.ready_to_transfer_leader(ctx, msg.get_index(), &from) { + Some(reason) => { + info!( + self.logger, + "reject to transfer leader"; + "to" => ?from, + "reason" => reason, + "index" => msg.get_index(), + "last_index" => self.storage().last_index().unwrap_or_default(), + ); + } + None => { + self.propose_pending_writes(ctx); + if self.propose_locks_before_transfer_leader(ctx, msg) { + // If some pessimistic locks are just proposed, we propose another + // TransferLeader command instead of transferring leader immediately. + info!( + self.logger, + "propose transfer leader command"; + "to" => ?from, + ); + let mut cmd = + new_admin_request(self.region().get_id(), self.peer().clone()); + cmd.mut_header() + .set_region_epoch(self.region().get_region_epoch().clone()); + // Set this flag to propose this command like a normal proposal. + cmd.mut_header() + .set_flags(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL.bits()); + cmd.mut_admin_request() + .set_cmd_type(AdminCmdType::TransferLeader); + cmd.mut_admin_request().mut_transfer_leader().set_peer(from); + if let (PeerMsg::RaftCommand(req), sub) = PeerMsg::raft_command(cmd) { + self.on_admin_command(ctx, req.request, req.ch); + } else { + unreachable!(); + } + } else { + info!( + self.logger, + "transfer leader"; + "peer" => ?from, + ); + self.raft_group_mut().transfer_leader(from.get_id()); + } + } + } + } + } + + pub fn execute_transfer_leader( + &mut self, + ctx: &mut StoreContext, + from: u64, + peer_disk_usage: DiskUsage, + reply_cmd: bool, // whether it is a reply to a TransferLeader command + ) { + let pending_snapshot = self.is_handling_snapshot() || self.has_pending_snapshot(); + if pending_snapshot + || from != self.leader_id() + // Transfer leader to node with disk full will lead to write availablity downback. + // But if the current leader is disk full, and send such request, we should allow it, + // because it may be a read leader balance request. + || (!matches!(ctx.self_disk_usage, DiskUsage::Normal) && + matches!(peer_disk_usage,DiskUsage::Normal)) + { + info!( + self.logger, + "reject transferring leader"; + "from" => from, + "pending_snapshot" => pending_snapshot, + "disk_usage" => ?ctx.self_disk_usage, + ); + return; + } + + let mut msg = eraftpb::Message::new(); + msg.set_from(self.peer_id()); + msg.set_to(self.leader_id()); + msg.set_msg_type(eraftpb::MessageType::MsgTransferLeader); + msg.set_index(self.storage().apply_state().applied_index); + msg.set_log_term(self.term()); + if reply_cmd { + msg.set_context(Bytes::from_static(TRANSFER_LEADER_COMMAND_REPLY_CTX)); + } + self.raft_group_mut().raft.msgs.push(msg); + } + + fn ready_to_transfer_leader( + &self, + ctx: &mut StoreContext, + mut index: u64, + peer: &metapb::Peer, + ) -> Option<&'static str> { + let status = self.raft_group().status(); + let progress = status.progress.unwrap(); + + if !progress.conf().voters().contains(peer.id) { + return Some("non voter"); + } + + for (id, pr) in progress.iter() { + if pr.state == ProgressState::Snapshot { + return Some("pending snapshot"); + } + if *id == peer.id && index == 0 { + // index will be zero if it's sent from an instance without + // pre-transfer-leader feature. Set it to matched to make it + // possible to transfer leader to an older version. It may be + // useful during rolling restart. + index = pr.matched; + } + } + + if self.raft_group().raft.has_pending_conf() + || self.raft_group().raft.pending_conf_index > index + { + return Some("pending conf change"); + } + + if self.storage().last_index().unwrap_or_default() + >= index + ctx.cfg.leader_transfer_max_log_lag + { + return Some("log gap"); + } + None + } + + // Returns whether we should propose another TransferLeader command. This is + // for: + // - Considering the amount of pessimistic locks can be big, it can reduce + // unavailable time caused by waiting for the transferee catching up logs. + // - Make transferring leader strictly after write commands that executes before + // proposing the locks, preventing unexpected lock loss. + fn propose_locks_before_transfer_leader( + &mut self, + ctx: &mut StoreContext, + msg: &eraftpb::Message, + ) -> bool { + // 1. Disable in-memory pessimistic locks. + + // Clone to make borrow checker happy when registering ticks. + let txn_ext = self.txn_ext().clone(); + let mut pessimistic_locks = txn_ext.pessimistic_locks.write(); + + // If the message context == TRANSFER_LEADER_COMMAND_REPLY_CTX, the message + // is a reply to a transfer leader command before. If the locks status remain + // in the TransferringLeader status, we can safely initiate transferring leader + // now. + // If it's not in TransferringLeader status now, it is probably because several + // ticks have passed after proposing the locks in the last time and we + // reactivate the memory locks. Then, we should propose the locks again. + if msg.get_context() == TRANSFER_LEADER_COMMAND_REPLY_CTX + && pessimistic_locks.status == LocksStatus::TransferringLeader + { + return false; + } + + // If it is not writable, it's probably because it's a retried TransferLeader + // and the locks have been proposed. But we still need to return true to + // propose another TransferLeader command. Otherwise, some write requests that + // have marked some locks as deleted will fail because raft rejects more + // proposals. + // It is OK to return true here if it's in other states like MergingRegion or + // NotLeader. In those cases, the locks will fail to propose and nothing will + // happen. + if !pessimistic_locks.is_writable() { + return true; + } + pessimistic_locks.status = LocksStatus::TransferringLeader; + self.add_pending_tick(PeerTick::ReactivateMemoryLock); + + // 2. Propose pessimistic locks + if pessimistic_locks.is_empty() { + return false; + } + // FIXME: Raft command has size limit. Either limit the total size of + // pessimistic locks in a region, or split commands here. + let mut cmd = RaftCmdRequest::default(); + { + // Downgrade to a read guard, do not block readers in the scheduler as far as + // possible. + let pessimistic_locks = RwLockWriteGuard::downgrade(pessimistic_locks); + fail_point!("invalidate_locks_before_transfer_leader"); + for (key, (lock, deleted)) in &*pessimistic_locks { + if *deleted { + continue; + } + let mut put = PutRequest::default(); + put.set_cf(CF_LOCK.to_string()); + put.set_key(key.as_encoded().to_owned()); + put.set_value(lock.to_lock().to_bytes()); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Put); + req.set_put(put); + cmd.mut_requests().push(req); + } + } + if cmd.get_requests().is_empty() { + // If the map is not empty but all locks are deleted, it is possible that a + // write command has just marked locks deleted but not proposed yet. + // It might cause that command to fail if we skip proposing the + // extra TransferLeader command here. + return true; + } + cmd.mut_header().set_region_id(self.region_id()); + cmd.mut_header() + .set_region_epoch(self.region().get_region_epoch().clone()); + cmd.mut_header().set_peer(self.peer().clone()); + info!( + self.logger, + "propose {} locks before transferring leader", cmd.get_requests().len(); + ); + let (PeerMsg::RaftCommand(req), sub) = PeerMsg::raft_command(cmd) else {unreachable!()}; + self.on_write_command(ctx, req.request, req.ch); + true + } +} + +impl Apply { + pub fn apply_transfer_leader( + &mut self, + req: &AdminRequest, + term: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + PEER_ADMIN_CMD_COUNTER.transfer_leader.all.inc(); + let resp = AdminResponse::default(); + + let peer = req.get_transfer_leader().get_peer(); + // Only execute TransferLeader if the expected new leader is self. + if peer.get_id() == self.peer().get_id() { + Ok((resp, AdminCmdResult::TransferLeader(term))) + } else { + Ok((resp, AdminCmdResult::None)) + } + } +} + +impl Peer { + pub fn on_transfer_leader(&mut self, ctx: &mut StoreContext, term: u64) { + // If the term has changed between proposing and executing the TransferLeader + // request, ignore it because this request may be stale. + if term != self.term() { + return; + } + + // Reply to leader that it is ready to transfer leader now. + self.execute_transfer_leader(ctx, self.leader_id(), DiskUsage::Normal, true); + + self.set_has_ready(); + } +} diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 3ee3430a140..3d0a17ece62 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -297,6 +297,7 @@ impl Peer { for admin_res in apply_res.admin_result { match admin_res { + AdminCmdResult::None => unreachable!(), AdminCmdResult::ConfChange(conf_change) => { self.on_apply_res_conf_change(ctx, conf_change) } @@ -305,6 +306,7 @@ impl Peer { derived_index, tablet_index, }) => self.on_apply_res_split(ctx, derived_index, tablet_index, regions), + AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(ctx, term), } } @@ -448,7 +450,9 @@ impl Apply { AdminCmdType::PrepareMerge => unimplemented!(), AdminCmdType::CommitMerge => unimplemented!(), AdminCmdType::RollbackMerge => unimplemented!(), - AdminCmdType::TransferLeader => unreachable!(), + AdminCmdType::TransferLeader => { + self.apply_transfer_leader(admin_req, entry.term)? + } AdminCmdType::ChangePeer => { self.apply_conf_change(entry.get_index(), admin_req, conf_change.unwrap())? } @@ -465,7 +469,10 @@ impl Apply { } }; - self.push_admin_result(admin_result); + match admin_result { + AdminCmdResult::None => (), + _ => self.push_admin_result(admin_result), + } let mut resp = new_response(req.get_header()); resp.set_admin_response(admin_resp); Ok(resp) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index c252ad7d231..baf66dfa6fc 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -29,7 +29,7 @@ use kvproto::{ raft_serverpb::{PeerState, RaftMessage, RaftSnapshotData}, }; use protobuf::Message as _; -use raft::{eraftpb, Ready, StateRole, INVALID_ID}; +use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::store::{util, ExtraStates, FetchedLogs, ReadProgress, Transport, WriteTask}; use slog::{debug, error, trace, warn}; use tikv_util::time::{duration_to_sec, monotonic_raw_now}; @@ -114,16 +114,20 @@ impl Peer { unimplemented!(); // return; } + // TODO: drop all msg append when the peer is uninitialized and has conflict // ranges with other peers. let from_peer = msg.take_from_peer(); if self.is_leader() && from_peer.get_id() != INVALID_ID { self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); } - self.insert_peer_cache(from_peer); - if let Err(e) = self.raft_group_mut().step(msg.take_message()) { + self.insert_peer_cache(msg.take_from_peer()); + if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { + self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) + } else if let Err(e) = self.raft_group_mut().step(msg.take_message()) { error!(self.logger, "raft step error"; "err" => ?e); } + self.set_has_ready(); } @@ -407,8 +411,8 @@ impl Peer { /// The apply snapshot process order would be: /// - Get the snapshot from the ready /// - Wait for async writer to load this tablet - /// In this step, the snapshot has loaded finish, but some apply state - /// need to update. + /// In this step, the snapshot loading has been finished, but some apply + /// state need to update. if has_snapshot { self.on_applied_snapshot(ctx); } @@ -462,9 +466,13 @@ impl Peer { // latency. self.raft_group_mut().skip_bcast_commit(false); + // Init the in-memory pessimistic lock table when the peer becomes leader. + self.activate_in_memory_pessimistic_locks(); + // A more recent read may happen on the old leader. So max ts should // be updated after a peer becomes leader. self.require_updating_max_ts(ctx); + // Exit entry cache warmup state when the peer becomes leader. self.entry_storage_mut().clear_entry_cache_warmup_state(); @@ -473,6 +481,7 @@ impl Peer { StateRole::Follower => { self.leader_lease_mut().expire(); self.storage_mut().cancel_generating_snap(None); + self.clear_in_memory_pessimistic_locks(); } _ => {} } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 9101a9328f3..02bbb03c35e 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -17,8 +17,8 @@ use raftstore::{ store::{ fsm::Proposal, util::{Lease, RegionReadProgress}, - Config, EntryStorage, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, ReadProgress, - TxnExt, + Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, + ReadProgress, TrackVer, TxnExt, }, Error, }; @@ -37,7 +37,7 @@ use crate::{ batch::StoreContext, fsm::{ApplyFsm, ApplyScheduler}, operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, - router::{CmdResChannel, QueryResChannel}, + router::{CmdResChannel, PeerTick, QueryResChannel}, worker::PdTask, Result, }; @@ -85,6 +85,8 @@ pub struct Peer { txn_ext: Arc, txn_extra_op: Arc>, + pending_ticks: Vec, + /// Check whether this proposal can be proposed based on its epoch. proposal_control: ProposalControl, @@ -149,6 +151,7 @@ impl Peer { txn_ext: Arc::default(), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), proposal_control: ProposalControl::new(0), + pending_ticks: Vec::new(), split_trace: vec![], }; @@ -521,6 +524,46 @@ impl Peer { self.apply_scheduler = Some(apply_scheduler); } + /// Whether the snapshot is handling. + /// See the comments of `check_snap_status` for more details. + #[inline] + pub fn is_handling_snapshot(&self) -> bool { + // todo: This method may be unnecessary now? + false + } + + /// Returns `true` if the raft group has replicated a snapshot but not + /// committed it yet. + #[inline] + pub fn has_pending_snapshot(&self) -> bool { + self.raft_group().snap().is_some() + } + + #[inline] + pub fn add_pending_tick(&mut self, tick: PeerTick) { + self.pending_ticks.push(tick); + } + + #[inline] + pub fn take_pending_ticks(&mut self) -> Vec { + mem::take(&mut self.pending_ticks) + } + + pub fn activate_in_memory_pessimistic_locks(&mut self) { + let mut pessimistic_locks = self.txn_ext.pessimistic_locks.write(); + pessimistic_locks.status = LocksStatus::Normal; + pessimistic_locks.term = self.term(); + pessimistic_locks.version = self.region().get_region_epoch().get_version(); + } + + pub fn clear_in_memory_pessimistic_locks(&mut self) { + let mut pessimistic_locks = self.txn_ext.pessimistic_locks.write(); + pessimistic_locks.status = LocksStatus::NotLeader; + pessimistic_locks.clear(); + pessimistic_locks.term = self.term(); + pessimistic_locks.version = self.region().get_region_epoch().get_version(); + } + #[inline] pub fn post_split(&mut self) { self.reset_region_buckets(); diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index 52c8ba5e1f8..c3061be0d2b 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -15,3 +15,4 @@ mod test_pd_heartbeat; mod test_read; mod test_split; mod test_status; +mod test_transfer_leader; diff --git a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs new file mode 100644 index 00000000000..7096f06b1d2 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs @@ -0,0 +1,154 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use engine_traits::Peekable; +use futures::executor::block_on; +use kvproto::{ + metapb, + raft_cmdpb::{AdminCmdType, CmdType, Request, TransferLeaderRequest}, +}; +use raft::prelude::ConfChangeType; +use raftstore_v2::router::PeerMsg; +use tikv_util::store::new_peer; + +use crate::cluster::Cluster; + +fn put_data( + region_id: u64, + cluster: &Cluster, + node_off: usize, + node_off_for_verify: usize, + key: &[u8], +) { + let router = &cluster.routers[node_off]; + let mut req = router.new_request_for(region_id); + let mut put_req = Request::default(); + put_req.set_cmd_type(CmdType::Put); + put_req.mut_put().set_key(key[1..].to_vec()); + put_req.mut_put().set_value(b"value".to_vec()); + req.mut_requests().push(put_req); + + router.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + + // router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let tablet_registry = cluster.node(node_off).tablet_registry(); + let tablet = tablet_registry + .get(region_id) + .unwrap() + .latest() + .unwrap() + .clone(); + assert!(tablet.get_value(key).unwrap().is_none()); + + let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + router.send(region_id, msg).unwrap(); + std::thread::sleep(std::time::Duration::from_millis(10)); + cluster.dispatch(region_id, vec![]); + assert!(block_on(sub.wait_proposed())); + + std::thread::sleep(std::time::Duration::from_millis(10)); + cluster.dispatch(region_id, vec![]); + // triage send snapshot + std::thread::sleep(std::time::Duration::from_millis(100)); + cluster.dispatch(region_id, vec![]); + assert!(block_on(sub.wait_committed())); + + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert_eq!(tablet.get_value(key).unwrap().unwrap(), b"value"); + + // Verify the data is ready in the other node + let tablet_registry = cluster.node(node_off_for_verify).tablet_registry(); + let tablet = tablet_registry + .get(region_id) + .unwrap() + .latest() + .unwrap() + .clone(); + assert_eq!(tablet.get_value(key).unwrap().unwrap(), b"value"); +} + +pub fn must_transfer_leader( + cluster: &Cluster, + region_id: u64, + from_off: usize, + to_off: usize, + to_peer: metapb::Peer, +) { + let router = &cluster.routers[from_off]; + let router2 = &cluster.routers[to_off]; + let mut req = router.new_request_for(region_id); + let mut transfer_req = TransferLeaderRequest::default(); + transfer_req.set_peer(to_peer.clone()); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::TransferLeader); + admin_req.set_transfer_leader(transfer_req); + let resp = router.command(region_id, req).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + cluster.dispatch(region_id, vec![]); + + let meta = router + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.raft_status.soft_state.leader_id, to_peer.id); + let meta = router2 + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.raft_status.soft_state.leader_id, to_peer.id); +} + +#[test] +fn test_transfer_leader() { + let cluster = Cluster::with_node_count(3, None); + let region_id = 2; + let router0 = &cluster.routers[0]; + + let mut req = router0.new_request_for(region_id); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::ChangePeer); + admin_req + .mut_change_peer() + .set_change_type(ConfChangeType::AddNode); + let store_id = cluster.node(1).id(); + let peer1 = new_peer(store_id, 10); + admin_req.mut_change_peer().set_peer(peer1.clone()); + let req_clone = req.clone(); + let resp = router0.command(region_id, req_clone).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let epoch = req.get_header().get_region_epoch(); + let new_conf_ver = epoch.get_conf_ver() + 1; + let leader_peer = req.get_header().get_peer().clone(); + let meta = router0 + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.region_state.epoch.version, epoch.get_version()); + assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); + assert_eq!(meta.region_state.peers, vec![leader_peer, peer1.clone()]); + let peer0_id = meta.raft_status.id; + + // So heartbeat will create a learner. + cluster.dispatch(region_id, vec![]); + let router1 = &cluster.routers[1]; + let meta = router1 + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + assert_eq!(peer0_id, meta.raft_status.soft_state.leader_id); + assert_eq!(meta.raft_status.id, peer1.id, "{:?}", meta); + assert_eq!(meta.region_state.epoch.version, epoch.get_version()); + assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); + cluster.dispatch(region_id, vec![]); + + // Ensure follower has latest entries before transfer leader. + put_data(region_id, &cluster, 0, 1, b"zkey1"); + + // Perform transfer leader + must_transfer_leader(&cluster, region_id, 0, 1, peer1); + + // Before transfer back to peer0, put some data again. + put_data(region_id, &cluster, 1, 0, b"zkey2"); + + // Perform transfer leader + let store_id = cluster.node(0).id(); + must_transfer_leader(&cluster, region_id, 1, 0, new_peer(store_id, peer0_id)); +} diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index a9b954552d3..2f700eec9bf 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -16,7 +16,7 @@ pub use self::{ ChangePeer, ExecResult, GenSnapTask, Msg as ApplyTask, Notifier as ApplyNotifier, Proposal, Registration, TaskRes as ApplyTaskRes, }, - peer::{DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO}, + peer::{new_admin_request, DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO}, store::{ create_raft_batch_system, RaftBatchSystem, RaftPollerBuilder, RaftRouter, StoreInfo, StoreMeta, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 5d7455b2d1c..65417732adf 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -54,9 +54,10 @@ pub use self::{ StoreMsg, StoreTick, WriteCallback, WriteResponse, }, peer::{ - can_amend_read, get_sync_log_from_request, propose_read_index, should_renew_lease, Peer, - PeerStat, ProposalContext, ProposalQueue, RequestInspector, RequestPolicy, - SnapshotRecoveryWaitApplySyncer, + can_amend_read, get_sync_log_from_request, make_transfer_leader_response, + propose_read_index, should_renew_lease, Peer, PeerStat, ProposalContext, ProposalQueue, + RequestInspector, RequestPolicy, SnapshotRecoveryWaitApplySyncer, + TRANSFER_LEADER_COMMAND_REPLY_CTX, }, peer_storage::{ clear_meta, do_snapshot, write_initial_apply_state, write_initial_raft_state, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 100544bd0f4..22b822c2115 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4513,7 +4513,7 @@ where self.raft_group.raft.msgs.push(msg); } - /// Return true to if the transfer leader request is accepted. + /// Return true if the transfer leader request is accepted. /// /// When transferring leadership begins, leader sends a pre-transfer /// to target follower first to ensures it's ready to become leader. @@ -5655,7 +5655,7 @@ fn is_request_urgent(req: &RaftCmdRequest) -> bool { ) } -fn make_transfer_leader_response() -> RaftCmdResponse { +pub fn make_transfer_leader_response() -> RaftCmdResponse { let mut response = AdminResponse::default(); response.set_cmd_type(AdminCmdType::TransferLeader); response.set_transfer_leader(TransferLeaderResponse::default()); From fa6122e43b68cd0787f43428f321814f5e25fad4 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 13 Dec 2022 15:24:51 +0800 Subject: [PATCH 0402/1149] *: add raftstore v2 only tablet optimization (#13924) ref tikv/tikv#12842 - No WAL should be written - raftcf is dropped - No concurrent write - No multi batch write - Use smaller bloomfilter ratio to reduce memory footprint Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 7 +- cmd/tikv-ctl/src/executor.rs | 27 ++--- cmd/tikv-ctl/src/main.rs | 15 ++- components/engine_rocks/src/options.rs | 1 + components/engine_rocks/src/raw.rs | 2 +- components/engine_rocks/src/util.rs | 4 +- components/engine_test/src/lib.rs | 6 +- components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/tablet.rs | 106 +++++++++++++---- .../src/operation/command/admin/split.rs | 16 +-- .../raftstore-v2/src/operation/query/local.rs | 15 ++- .../src/operation/ready/snapshot.rs | 7 +- components/raftstore-v2/src/raft/peer.rs | 16 ++- components/raftstore-v2/src/raft/storage.rs | 10 +- .../tests/integrations/cluster.rs | 25 ++-- .../tests/integrations/test_basic_write.rs | 6 +- .../tests/integrations/test_conf_change.rs | 6 +- components/server/src/server.rs | 15 ++- components/snap_recovery/src/init_cluster.rs | 20 ++-- src/config/mod.rs | 110 +++++++++++++----- src/server/engine_factory.rs | 89 +++++++++----- src/storage/config.rs | 10 ++ src/storage/kv/test_engine_builder.rs | 18 ++- src/storage/mod.rs | 20 +++- .../flow_controller/tablet_flow_controller.rs | 14 ++- tests/integrations/config/mod.rs | 17 ++- tests/integrations/config/test-custom.toml | 12 ++ tests/integrations/storage/test_titan.rs | 8 +- 28 files changed, 413 insertions(+), 190 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9b3eccfda51..494846ccb0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1630,6 +1630,7 @@ dependencies = [ "error_code", "fail", "file_system", + "keys", "kvproto", "log_wrappers", "protobuf", @@ -2875,7 +2876,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd07e9e598db63574cf06edaeea3c4687eadff59" +source = "git+https://github.com/tikv/rust-rocksdb.git#f94fdd30dd94f6fd22c8052edfd2c4039d9f2fbd" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2894,7 +2895,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd07e9e598db63574cf06edaeea3c4687eadff59" +source = "git+https://github.com/tikv/rust-rocksdb.git#f94fdd30dd94f6fd22c8052edfd2c4039d9f2fbd" dependencies = [ "bzip2-sys", "cc", @@ -4757,7 +4758,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd07e9e598db63574cf06edaeea3c4687eadff59" +source = "git+https://github.com/tikv/rust-rocksdb.git#f94fdd30dd94f6fd22c8052edfd2c4039d9f2fbd" dependencies = [ "libc 0.2.132", "librocksdb_sys", diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 1c42d728ca9..42b08c629e7 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1,8 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::ToOwned, cmp::Ordering, path::PathBuf, pin::Pin, str, string::ToString, sync::Arc, - time::Duration, u64, + borrow::ToOwned, cmp::Ordering, pin::Pin, str, string::ToString, sync::Arc, time::Duration, u64, }; use encryption_export::data_key_manager_from_config; @@ -28,7 +27,10 @@ use security::SecurityManager; use serde_json::json; use tikv::{ config::{ConfigController, TikvConfig}, - server::debug::{BottommostLevelCompaction, Debugger, RegionInfo}, + server::{ + debug::{BottommostLevelCompaction, Debugger, RegionInfo}, + KvEngineFactoryBuilder, + }, }; use tikv_util::escape; @@ -45,7 +47,6 @@ type MvccInfoStream = Pin, MvccInfo), Stri pub fn new_debug_executor( cfg: &TikvConfig, data_dir: Option<&str>, - skip_paranoid_checks: bool, host: Option<&str>, mgr: Arc, ) -> Box { @@ -55,7 +56,6 @@ pub fn new_debug_executor( // TODO: perhaps we should allow user skip specifying data path. let data_dir = data_dir.unwrap(); - let kv_path = cfg.infer_kv_engine_path(Some(data_dir)).unwrap(); let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .unwrap() @@ -66,15 +66,10 @@ pub fn new_debug_executor( .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); - let mut kv_db_opts = cfg.rocksdb.build_opt(); - kv_db_opts.set_env(env.clone()); - kv_db_opts.set_paranoid_checks(!skip_paranoid_checks); - let kv_cfs_opts = cfg - .rocksdb - .build_cf_opts(&cache, None, cfg.storage.api_version()); - let kv_path = PathBuf::from(kv_path).canonicalize().unwrap(); - let kv_path = kv_path.to_str().unwrap(); - let kv_db = match new_engine_opt(kv_path, kv_db_opts, kv_cfs_opts) { + let factory = KvEngineFactoryBuilder::new(env.clone(), cfg, cache) + .lite(true) + .build(); + let kv_db = match factory.create_shared_db(data_dir) { Ok(db) => db, Err(e) => handle_engine_error(e), }; @@ -83,7 +78,7 @@ pub fn new_debug_executor( if !cfg.raft_engine.enable { let mut raft_db_opts = cfg.raftdb.build_opt(); raft_db_opts.set_env(env); - let raft_db_cf_opts = cfg.raftdb.build_cf_opts(&cache); + let raft_db_cf_opts = cfg.raftdb.build_cf_opts(factory.block_cache()); let raft_path = cfg.infer_raft_db_path(Some(data_dir)).unwrap(); if !db_exist(&raft_path) { error!("raft db not exists: {}", raft_path); @@ -380,7 +375,7 @@ pub trait DebugExecutor { to_config: &TikvConfig, mgr: Arc, ) { - let rhs_debug_executor = new_debug_executor(to_config, to_data_dir, false, to_host, mgr); + let rhs_debug_executor = new_debug_executor(to_config, to_data_dir, to_host, mgr); let r1 = self.get_region_info(region); let r2 = rhs_debug_executor.get_region_info(region); diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 72078d07f62..77888f36fa7 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -59,7 +59,7 @@ fn main() { // Initialize configuration and security manager. let cfg_path = opt.config.as_ref(); - let cfg = cfg_path.map_or_else( + let mut cfg = cfg_path.map_or_else( || { let mut cfg = TikvConfig::default(); cfg.log.level = tikv_util::logger::get_level_by_string("warn") @@ -249,9 +249,8 @@ fn main() { .exit(); } - let skip_paranoid_checks = opt.skip_paranoid_checks; - let debug_executor = - new_debug_executor(&cfg, data_dir, skip_paranoid_checks, host, Arc::clone(&mgr)); + cfg.rocksdb.paranoid_checks = Some(!opt.skip_paranoid_checks); + let debug_executor = new_debug_executor(&cfg, data_dir, host, Arc::clone(&mgr)); match cmd { Cmd::Print { cf, key } => { @@ -643,7 +642,7 @@ fn compact_whole_cluster( .name(format!("compact-{}", addr)) .spawn_wrapper(move || { tikv_alloc::add_thread_memory_accessor(); - let debug_executor = new_debug_executor(&cfg, None, false, Some(&addr), mgr); + let debug_executor = new_debug_executor(&cfg, None, Some(&addr), mgr); for cf in cfs { debug_executor.compact( Some(&addr), @@ -687,14 +686,14 @@ fn run_ldb_command(args: Vec, cfg: &TikvConfig) { .unwrap() .map(Arc::new); let env = get_env(key_manager, None /* io_rate_limiter */).unwrap(); - let mut opts = cfg.rocksdb.build_opt(); + let mut opts = cfg.rocksdb.build_opt(None); opts.set_env(env); engine_rocks::raw::run_ldb_tool(&args, &opts); } fn run_sst_dump_command(args: Vec, cfg: &TikvConfig) { - let opts = cfg.rocksdb.build_opt(); + let opts = cfg.rocksdb.build_opt(None); engine_rocks::raw::run_sst_dump_tool(&args, &opts); } @@ -714,7 +713,7 @@ fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, let stderr = BufferRedirect::stderr().unwrap(); let stdout = BufferRedirect::stdout().unwrap(); - let opts = cfg.rocksdb.build_opt(); + let opts = cfg.rocksdb.build_opt(None); match run_and_wait_child_process(|| engine_rocks::raw::run_sst_dump_tool(&args, &opts)) { Ok(code) => { diff --git a/components/engine_rocks/src/options.rs b/components/engine_rocks/src/options.rs index c50c7734f79..7579c92ba79 100644 --- a/components/engine_rocks/src/options.rs +++ b/components/engine_rocks/src/options.rs @@ -40,6 +40,7 @@ impl From for RocksWriteOptions { let mut r = RawWriteOptions::default(); r.set_sync(opts.sync()); r.set_no_slowdown(opts.no_slowdown()); + r.disable_wal(opts.disable_wal()); // TODO: enable it. r.set_memtable_insert_hint_per_batch(false); RocksWriteOptions(r) diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index 1a8718588b2..4c2dd71b2a2 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -14,6 +14,6 @@ pub use rocksdb::{ DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, Env, EventListener, IngestExternalFileOptions, LRUCacheOptions, MemoryAllocator, PerfContext, - PrepopulateBlockCache, Range, SliceTransform, TablePropertiesCollector, + PrepopulateBlockCache, Range, SliceTransform, Statistics, TablePropertiesCollector, TablePropertiesCollectorFactory, }; diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 778e16c1a67..786dfec04d1 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -11,7 +11,7 @@ use slog_global::warn; use crate::{ cf_options::RocksCfOptions, db_options::RocksDbOptions, engine::RocksEngine, r2e, - rocks_metrics_defs::*, + raw::Statistics, rocks_metrics_defs::*, }; pub fn new_temp_engine(path: &tempfile::TempDir) -> Engines { @@ -28,7 +28,7 @@ pub fn new_default_engine(path: &str) -> Result { pub fn new_engine(path: &str, cfs: &[&str]) -> Result { let mut db_opts = RocksDbOptions::default(); - db_opts.enable_statistics(true); + db_opts.set_statistics(&Statistics::new_titan()); let cf_opts = cfs.iter().map(|name| (*name, Default::default())).collect(); new_engine_opt(path, db_opts, cf_opts) } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 605feedc7bd..16849acd5b8 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -88,7 +88,7 @@ pub mod kv { RocksEngine as KvTestEngine, RocksEngineIterator as KvTestEngineIterator, RocksSnapshot as KvTestSnapshot, RocksWriteBatchVec as KvTestWriteBatch, }; - use engine_traits::{MiscExt, Result, TabletFactory}; + use engine_traits::{MiscExt, Result, TabletContext, TabletFactory}; use crate::ctor::{CfOptions as KvTestCfOptions, DbOptions, KvEngineConstructorExt}; @@ -119,7 +119,7 @@ pub mod kv { } impl TabletFactory for TestTabletFactory { - fn open_tablet(&self, _id: u64, _suffix: Option, path: &Path) -> Result { + fn open_tablet(&self, _ctx: TabletContext, path: &Path) -> Result { KvTestEngine::new_kv_engine_opt( path.to_str().unwrap(), self.db_opt.clone(), @@ -127,7 +127,7 @@ pub mod kv { ) } - fn destroy_tablet(&self, _id: u64, _suffix: Option, path: &Path) -> Result<()> { + fn destroy_tablet(&self, _ctx: TabletContext, path: &Path) -> Result<()> { let tombstone_path = path.join(TOMBSTONE_SUFFIX); std::fs::remove_dir_all(&tombstone_path)?; std::fs::rename(path, &tombstone_path)?; diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index fcfcbdb2799..2370f1c9e7e 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -13,6 +13,7 @@ collections = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } +keys = { workspace = true } kvproto = { workspace = true } log_wrappers = { workspace = true } protobuf = "2" diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 988cd343fe3..acecb976f58 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + fmt::{self, Debug, Formatter}, path::{Path, PathBuf}, sync::{ atomic::{AtomicU64, Ordering}, @@ -9,6 +10,7 @@ use std::{ }; use collections::HashMap; +use kvproto::metapb::Region; use tikv_util::box_err; use crate::{Error, Result}; @@ -69,20 +71,67 @@ impl CachedTablet { } } +/// Context to be passed to `TabletFactory`. +#[derive(Clone)] +pub struct TabletContext { + /// ID of the tablet. It is usually the region ID. + pub id: u64, + /// Suffix the tablet. It is usually the index that the tablet starts accept + /// incremental modification. The reason to have suffix is that we can keep + /// more than one tablet for a region. + pub suffix: Option, + /// The expected start key of the tablet. The key should be in the format + /// tablet is actually stored, for example should have `z` prefix. + /// + /// Any key that is smaller than this key can be considered obsolete. + pub start_key: Box<[u8]>, + /// The expected end key of the tablet. The key should be in the format + /// tablet is actually stored, for example should have `z` prefix. + /// + /// Any key that is larger than or equal to this key can be considered + /// obsolete. + pub end_key: Box<[u8]>, +} + +impl Debug for TabletContext { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("TabletContext") + .field("id", &self.id) + .field("suffix", &self.suffix) + .field("start_key", &log_wrappers::Value::key(&self.start_key)) + .field("end_key", &log_wrappers::Value::key(&self.end_key)) + .finish() + } +} + +impl TabletContext { + pub fn new(region: &Region, suffix: Option) -> Self { + TabletContext { + id: region.get_id(), + suffix, + start_key: keys::data_key(region.get_start_key()).into_boxed_slice(), + end_key: keys::data_end_key(region.get_end_key()).into_boxed_slice(), + } + } + + /// Create a context that assumes there is only one region and it covers the + /// whole key space. Normally you should only use this in tests. + pub fn with_infinite_region(id: u64, suffix: Option) -> Self { + let mut region = Region::default(); + region.set_id(id); + Self::new(®ion, suffix) + } +} + /// A factory trait to create new tablet for multi-rocksdb architecture. // It should be named as `EngineFactory` for consistency, but we are about to // rename engine to tablet, so always use tablet for new traits/types. pub trait TabletFactory: Send + Sync { /// Open the tablet in `path`. - /// - /// `id` and `suffix` is used to mark the identity of tablet. The id is - /// likely the region Id, the suffix could be the current raft log - /// index. The reason to have suffix is that we can keep more than one - /// tablet for a region. - fn open_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result; + fn open_tablet(&self, ctx: TabletContext, path: &Path) -> Result; /// Destroy the tablet and its data - fn destroy_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result<()>; + fn destroy_tablet(&self, ctx: TabletContext, path: &Path) -> Result<()>; /// Check if the tablet with specified path exists fn exists(&self, path: &Path) -> bool; @@ -105,12 +154,12 @@ impl TabletFactory for SingletonFactory { /// likely the region Id, the suffix could be the current raft log /// index. The reason to have suffix is that we can keep more than one /// tablet for a region. - fn open_tablet(&self, _id: u64, _suffix: Option, _path: &Path) -> Result { + fn open_tablet(&self, _ctx: TabletContext, _path: &Path) -> Result { Ok(self.tablet.clone()) } /// Destroy the tablet and its data - fn destroy_tablet(&self, _id: u64, _suffix: Option, _path: &Path) -> Result<()> { + fn destroy_tablet(&self, _ctx: TabletContext, _path: &Path) -> Result<()> { Ok(()) } @@ -205,19 +254,21 @@ impl TabletRegistry { /// Load the tablet and set it as the latest. /// /// If the tablet doesn't exist, it will create an empty one. - pub fn load(&self, id: u64, suffix: u64, create: bool) -> Result> + pub fn load(&self, ctx: TabletContext, create: bool) -> Result> where EK: Clone, { - let path = self.tablet_path(id, suffix); + assert!(ctx.suffix.is_some()); + let id = ctx.id; + let path = self.tablet_path(id, ctx.suffix.unwrap()); if !create && !self.tablets.factory.exists(&path) { return Err(Error::Other(box_err!( "tablet ({}, {:?}) doesn't exist", id, - suffix + ctx.suffix ))); } - let tablet = self.tablets.factory.open_tablet(id, Some(suffix), &path)?; + let tablet = self.tablets.factory.open_tablet(ctx, &path)?; let mut cached = self.get_or_default(id); cached.set(tablet); Ok(cached) @@ -288,11 +339,13 @@ mod tests { let tablet = Arc::new(1); let singleton = SingletonFactory::new(tablet.clone()); let registry = TabletRegistry::new(Box::new(singleton), "").unwrap(); - registry.load(1, 1, true).unwrap(); + let mut ctx = TabletContext::with_infinite_region(1, Some(1)); + registry.load(ctx.clone(), true).unwrap(); let mut cached = registry.get(1).unwrap(); assert_eq!(cached.latest().cloned(), Some(tablet.clone())); - registry.load(2, 1, true).unwrap(); + ctx.id = 2; + registry.load(ctx.clone(), true).unwrap(); let mut count = 0; registry.for_each_opened_tablet(|id, cached| { assert!(&[1, 2].contains(&id), "{}", id); @@ -305,11 +358,12 @@ mod tests { // Destroy should be ignored. registry .tablet_factory() - .destroy_tablet(2, Some(1), ®istry.tablet_path(2, 1)) + .destroy_tablet(ctx.clone(), ®istry.tablet_path(2, 1)) .unwrap(); // Exist check should always succeed. - registry.load(3, 1, false).unwrap(); + ctx.id = 3; + registry.load(ctx, false).unwrap(); let mut cached = registry.get(3).unwrap(); assert_eq!(cached.latest().cloned(), Some(tablet)); } @@ -321,12 +375,12 @@ mod tests { } impl TabletFactory for MemoryTablet { - fn open_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result { + fn open_tablet(&self, ctx: TabletContext, path: &Path) -> Result { let mut tablet = self.tablet.lock().unwrap(); if tablet.contains_key(path) { return Err(Error::Other(box_err!("tablet is opened"))); } - tablet.insert(path.to_owned(), Arc::new((id, suffix.unwrap_or(0)))); + tablet.insert(path.to_owned(), Arc::new((ctx.id, ctx.suffix.unwrap_or(0)))); Ok(tablet[path].clone()) } @@ -335,9 +389,9 @@ mod tests { tablet.contains_key(path) } - fn destroy_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result<()> { + fn destroy_tablet(&self, ctx: TabletContext, path: &Path) -> Result<()> { let prev = self.tablet.lock().unwrap().remove(path).unwrap(); - assert_eq!((id, suffix.unwrap_or(0)), *prev); + assert_eq!((ctx.id, ctx.suffix.unwrap_or(0)), *prev); Ok(()) } } @@ -349,9 +403,10 @@ mod tests { }; let registry = TabletRegistry::new(Box::new(factory), "").unwrap(); - let mut tablet_1_10 = registry.load(1, 10, true).unwrap(); + let mut ctx = TabletContext::with_infinite_region(1, Some(10)); + let mut tablet_1_10 = registry.load(ctx.clone(), true).unwrap(); // It's open already, load it twice should report lock error. - registry.load(1, 10, true).unwrap_err(); + registry.load(ctx.clone(), true).unwrap_err(); let mut cached = registry.get(1).unwrap(); assert_eq!(cached.latest(), tablet_1_10.latest()); @@ -361,14 +416,15 @@ mod tests { let tablet_path = registry.tablet_path(1, 11); assert!(!registry.tablet_factory().exists(&tablet_path)); // Not exist tablet should report error. - registry.load(1, 11, false).unwrap_err(); + ctx.suffix = Some(11); + registry.load(ctx.clone(), false).unwrap_err(); assert!(registry.get(2).is_none()); // Though path not exist, but we should be able to create an empty one. assert_eq!(registry.get_or_default(2).latest(), None); assert!(!registry.tablet_factory().exists(&tablet_path)); // Load new suffix should update cache. - registry.load(1, 11, true).unwrap(); + registry.load(ctx, true).unwrap(); assert_ne!(cached.latest(), tablet_1_10.cache()); let tablet_path = registry.tablet_path(1, 11); assert!(registry.tablet_factory().exists(&tablet_path)); diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 7de49a716c3..157150126b4 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -30,7 +30,8 @@ use std::{cmp, collections::VecDeque}; use collections::HashSet; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{ - Checkpointer, DeleteStrategy, KvEngine, RaftEngine, RaftLogBatch, Range, CF_DEFAULT, + Checkpointer, DeleteStrategy, KvEngine, RaftEngine, RaftLogBatch, Range, TabletContext, + CF_DEFAULT, }; use fail::fail_point; use keys::enc_end_key; @@ -260,10 +261,8 @@ impl Apply { }); let reg = self.tablet_registry(); let path = reg.tablet_path(region_id, log_index); - let tablet = reg - .tablet_factory() - .open_tablet(region_id, Some(log_index), &path) - .unwrap(); + let ctx = TabletContext::new(®ions[derived_index], Some(log_index)); + let tablet = reg.tablet_factory().open_tablet(ctx, &path).unwrap(); // Remove the old write batch. self.write_batch.take(); self.publish_tablet(tablet); @@ -496,7 +495,7 @@ mod test { kv::TestTabletFactory, raft, }; - use engine_traits::{CfOptionsExt, Peekable, TabletRegistry, WriteBatch, ALL_CFS}; + use engine_traits::{CfOptionsExt, Peekable, TabletRegistry, WriteBatch, DATA_CFS}; use futures::channel::mpsc::unbounded; use kvproto::{ metapb::RegionEpoch, @@ -631,14 +630,15 @@ mod test { let logger = slog_global::borrow_global().new(o!()); let path = TempDir::new().unwrap(); - let cf_opts = ALL_CFS + let cf_opts = DATA_CFS .iter() .copied() .map(|cf| (cf, CfOptions::default())) .collect(); let factory = Box::new(TestTabletFactory::new(DbOptions::default(), cf_opts)); let reg = TabletRegistry::new(factory, path.path()).unwrap(); - reg.load(region.id, 5, true).unwrap(); + let ctx = TabletContext::new(®ion, Some(5)); + reg.load(ctx, true).unwrap(); let mut region_state = RegionLocalState::default(); region_state.set_state(PeerState::Normal); diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 120e64cb872..19f9a7e91b9 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -438,7 +438,7 @@ mod tests { ctor::{CfOptions, DbOptions}, kv::{KvTestEngine, TestTabletFactory}, }; - use engine_traits::{MiscExt, Peekable, SyncMutable, ALL_CFS}; + use engine_traits::{MiscExt, Peekable, SyncMutable, TabletContext, DATA_CFS}; use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; use raftstore::store::{ @@ -546,7 +546,7 @@ mod tests { // Building a tablet factory let ops = DbOptions::default(); - let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let path = Builder::new() .prefix("test-local-reader") .tempdir() @@ -631,7 +631,8 @@ mod tests { }; meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data - reg.load(1, 10, true).unwrap(); + let ctx = TabletContext::new(®ion1, Some(10)); + reg.load(ctx, true).unwrap(); } let (ch_tx, ch_rx) = sync_channel(1); @@ -737,7 +738,7 @@ mod tests { fn test_read_delegate() { // Building a tablet factory let ops = DbOptions::default(); - let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let path = Builder::new() .prefix("test-local-reader") .tempdir() @@ -758,7 +759,8 @@ mod tests { meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data - reg.load(1, 10, true).unwrap(); + let mut ctx = TabletContext::with_infinite_region(1, Some(10)); + reg.load(ctx, true).unwrap(); tablet1 = reg.get(1).unwrap().latest().unwrap().clone(); tablet1.put(b"a1", b"val1").unwrap(); @@ -767,7 +769,8 @@ mod tests { meta.readers.insert(2, read_delegate); // create tablet with region_id 1 and prepare some data - reg.load(2, 10, true).unwrap(); + ctx = TabletContext::with_infinite_region(2, Some(10)); + reg.load(ctx, true).unwrap(); tablet2 = reg.get(2).unwrap().latest().unwrap().clone(); tablet2.put(b"a2", b"val2").unwrap(); } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 8ac27ba2466..5bf9fc27269 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -28,7 +28,7 @@ use std::{ }, }; -use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData, RegionLocalState}; use protobuf::Message; use raft::eraftpb::Snapshot; @@ -125,9 +125,8 @@ impl Peer { let first_index = self.storage().entry_storage().first_index(); if first_index == persisted_index + 1 { let region_id = self.region_id(); - ctx.tablet_registry - .load(region_id, persisted_index, false) - .unwrap(); + let tablet_ctx = TabletContext::new(self.region(), Some(persisted_index)); + ctx.tablet_registry.load(tablet_ctx, false).unwrap(); self.schedule_apply_fsm(ctx); self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(persisted_index); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 02bbb03c35e..6111e75e691 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -8,7 +8,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::atomic::AtomicCell; -use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletRegistry}; +use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletContext, TabletRegistry}; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; @@ -111,16 +111,20 @@ impl Peer { let region_id = storage.region().get_id(); let tablet_index = storage.region_state().get_tablet_index(); + + let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; + let region = raft_group.store().region_state().get_region().clone(); + let cached_tablet = tablet_registry.get_or_default(region_id); - // Another option is always create tablet even if tablet index is 0. But this - // can introduce race when gc old tablet and create new peer. + // We can't create tablet if tablet index is 0. It can introduce race when gc + // old tablet and create new peer. We also can't get the correct range of the + // region, which is required for kv data gc. if tablet_index != 0 { + let ctx = TabletContext::new(®ion, Some(tablet_index)); // TODO: Perhaps we should stop create the tablet automatically. - tablet_registry.load(region_id, tablet_index, false)?; + tablet_registry.load(ctx, false)?; } - let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; - let region = raft_group.store().region_state().get_region().clone(); let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { tablet: cached_tablet, diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 5211d293e0f..a27e79549e1 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -376,7 +376,8 @@ mod tests { raft::RaftTestEngine, }; use engine_traits::{ - KvEngine, RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletRegistry, ALL_CFS, + KvEngine, RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletContext, TabletRegistry, + DATA_CFS, }; use kvproto::{ metapb::{Peer, Region}, @@ -476,7 +477,7 @@ mod tests { raft_engine.consume(&mut wb, true).unwrap(); // building a tablet factory let ops = DbOptions::default(); - let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); @@ -523,10 +524,11 @@ mod tests { mgr.init().unwrap(); // building a tablet factory let ops = DbOptions::default(); - let cf_opts = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); - reg.load(region.get_id(), 10, true).unwrap(); + let tablet_ctx = TabletContext::new(®ion, Some(10)); + reg.load(tablet_ctx, true).unwrap(); // setup read runner worker and peer storage let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); let sched = worker.scheduler(); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 11f8094612b..b09f351b066 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -20,7 +20,7 @@ use engine_test::{ kv::{KvTestEngine, KvTestSnapshot, TestTabletFactory}, raft::RaftTestEngine, }; -use engine_traits::{TabletRegistry, ALL_CFS}; +use engine_traits::{TabletContext, TabletRegistry, DATA_CFS}; use futures::executor::block_on; use kvproto::{ metapb::{self, RegionEpoch, Store}, @@ -47,6 +47,18 @@ use tikv_util::{ }; use txn_types::WriteBatchFlags; +pub fn check_skip_wal(path: &str) { + let mut found = false; + for f in std::fs::read_dir(path).unwrap() { + let e = f.unwrap(); + if e.path().extension().map_or(false, |ext| ext == "log") { + found = true; + assert_eq!(e.metadata().unwrap().len(), 0, "{}", e.path().display()); + } + } + assert!(found, "no WAL found in {}", path); +} + pub struct TestRouter(RaftRouter); impl Deref for TestRouter { @@ -209,7 +221,7 @@ impl RunningState { causal_ts_provider: Option>, logger: &Logger, ) -> (TestRouter, TabletSnapManager, Self) { - let cf_opts = ALL_CFS + let cf_opts = DATA_CFS .iter() .copied() .map(|cf| (cf, CfOptions::default())) @@ -226,16 +238,13 @@ impl RunningState { if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { let factory = registry.tablet_factory(); let path = registry.tablet_path(region.get_id(), RAFT_INIT_LOG_INDEX); + let ctx = TabletContext::new(®ion, Some(RAFT_INIT_LOG_INDEX)); if factory.exists(&path) { registry.remove(region.get_id()); - factory - .destroy_tablet(region.get_id(), Some(RAFT_INIT_LOG_INDEX), &path) - .unwrap(); + factory.destroy_tablet(ctx.clone(), &path).unwrap(); } // Create the tablet without loading it in cache. - factory - .open_tablet(region.get_id(), Some(RAFT_INIT_LOG_INDEX), &path) - .unwrap(); + factory.open_tablet(ctx, &path).unwrap(); } let (router, mut system) = create_store_batch_system::( diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index 807d64de756..29f665758d6 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -12,7 +12,7 @@ use raftstore::store::{INIT_EPOCH_CONF_VER, INIT_EPOCH_VER}; use raftstore_v2::router::PeerMsg; use tikv_util::store::new_peer; -use crate::cluster::Cluster; +use crate::cluster::{check_skip_wal, Cluster}; /// Test basic write flow. #[test] @@ -147,4 +147,8 @@ fn test_put_delete() { assert!(!resp.get_header().has_error(), "{:?}", resp); let snap = router.stale_snapshot(2); assert_matches!(snap.get_value(b"key"), Ok(None)); + + // Check if WAL is skipped for basic writes. + let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); + check_skip_wal(cached.latest().unwrap().as_inner().path()); } diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 1b9ca50daf7..db62ae4a75a 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -8,7 +8,7 @@ use raft::prelude::ConfChangeType; use raftstore_v2::router::{PeerMsg, PeerTick}; use tikv_util::store::new_learner_peer; -use crate::cluster::Cluster; +use crate::cluster::{check_skip_wal, Cluster}; #[test] fn test_simple_change() { @@ -97,4 +97,8 @@ fn test_simple_change() { assert_eq!(meta.region_state.peers, vec![leader_peer]); // TODO: check if the peer is removed once life trace is implemented or // snapshot is implemented. + + // Check if WAL is skipped for admin command. + let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); + check_skip_wal(cached.latest().unwrap().as_inner().path()); } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a50e3a39667..065afd8ec0c 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -45,7 +45,7 @@ use engine_rocks::{ use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, - RaftEngine, SingletonFactory, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, + RaftEngine, SingletonFactory, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use file_system::{ @@ -1821,7 +1821,8 @@ impl TikvServer { let reg = TabletRegistry::new(Box::new(SingletonFactory::new(kv_engine)), &self.store_path) .unwrap(); // It always use the singleton kv_engine, use arbitrary id and suffix. - reg.load(0, 0, false).unwrap(); + let ctx = TabletContext::with_infinite_region(0, Some(0)); + reg.load(ctx, false).unwrap(); self.tablet_registry = Some(reg.clone()); engines.raft.register_config(cfg_controller); @@ -2087,7 +2088,9 @@ mod test { }; use engine_rocks::raw::Env; - use engine_traits::{FlowControlFactorsExt, MiscExt, SyncMutable, TabletRegistry, CF_DEFAULT}; + use engine_traits::{ + FlowControlFactorsExt, MiscExt, SyncMutable, TabletContext, TabletRegistry, CF_DEFAULT, + }; use tempfile::Builder; use tikv::{config::TikvConfig, server::KvEngineFactoryBuilder}; use tikv_util::{config::ReadableSize, time::Instant}; @@ -2109,7 +2112,8 @@ mod test { let reg = TabletRegistry::new(Box::new(factory), path.path()).unwrap(); for i in 1..6 { - reg.load(i, 10, true).unwrap(); + let ctx = TabletContext::with_infinite_region(i, Some(10)); + reg.load(ctx, true).unwrap(); } let mut cached = reg.get(1).unwrap(); @@ -2127,7 +2131,8 @@ mod test { .unwrap() .unwrap(); - reg.load(1, 20, true).unwrap(); + let ctx = TabletContext::with_infinite_region(1, Some(20)); + reg.load(ctx, true).unwrap(); tablet = cached.latest().unwrap(); for i in 1..11 { diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 08a45073309..9147810f03c 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -10,7 +10,10 @@ use pd_client::{Error as PdError, PdClient}; use raft_log_engine::RaftLogEngine; use raftstore::store::initial_region; use thiserror::Error; -use tikv::{config::TikvConfig, server::config::Config as ServerConfig}; +use tikv::{ + config::TikvConfig, + server::{config::Config as ServerConfig, KvEngineFactoryBuilder}, +}; use tikv_util::config::{ReadableDuration, ReadableSize, VersionTrack}; const CLUSTER_BOOTSTRAPPED_MAX_RETRY: u64 = 60; @@ -308,15 +311,10 @@ pub fn create_local_engine_service( let block_cache = config.storage.block_cache.build_shared_cache(); // init rocksdb / kv db - let mut db_opts = config.rocksdb.build_opt(); - db_opts.set_env(env.clone()); - let cf_opts = config - .rocksdb - .build_cf_opts(&block_cache, None, config.storage.api_version()); - let db_path = config - .infer_kv_engine_path(None) - .map_err(|e| format!("infer kvdb path: {}", e))?; - let kv_db = match new_engine_opt(&db_path, db_opts, cf_opts) { + let factory = KvEngineFactoryBuilder::new(env.clone(), config, block_cache) + .lite(true) + .build(); + let kv_db = match factory.create_shared_db(&config.storage.data_dir) { Ok(db) => db, Err(e) => handle_engine_error(e), }; @@ -326,7 +324,7 @@ pub fn create_local_engine_service( // rocksdb let mut raft_db_opts = config.raftdb.build_opt(); raft_db_opts.set_env(env); - let raft_db_cf_opts = config.raftdb.build_cf_opts(&block_cache); + let raft_db_cf_opts = config.raftdb.build_cf_opts(factory.block_cache()); let raft_path = config .infer_raft_db_path(None) .map_err(|e| format!("infer raftdb path: {}", e))?; diff --git a/src/config/mod.rs b/src/config/mod.rs index 2b0818e93d3..0945eb7ca21 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -31,6 +31,7 @@ use engine_rocks::{ raw::{ BlockBasedOptions, Cache, ChecksumType, CompactionPriority, DBCompactionStyle, DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, PrepopulateBlockCache, + Statistics, }, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, RaftDbLogger, RangePropertiesCollectorFactory, RawMvccPropertiesCollectorFactory, @@ -80,7 +81,7 @@ use crate::{ ttl::TtlCompactionFilterFactory, Config as ServerConfig, CONFIG_ROCKSDB_GAUGE, }, - storage::config::{Config as StorageConfig, DEFAULT_DATA_DIR}, + storage::config::{Config as StorageConfig, EngineType, DEFAULT_DATA_DIR}, }; pub const DEFAULT_ROCKSDB_SUB_DIR: &str = "db"; @@ -107,6 +108,15 @@ pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; const TMP_CONFIG_FILE: &str = "tmp_tikv.toml"; const MAX_BLOCK_SIZE: usize = 32 * MIB as usize; +fn bloom_filter_ratio(et: EngineType) -> f64 { + match et { + EngineType::RaftKv => 0.1, + // In v2, every peer has its own tablet. The data scale is about tens of + // GiBs. We only need a small portion for those key. + EngineType::RaftKv2 => 0.005, + } +} + fn memory_limit_for_cf(is_raft_db: bool, cf: &str, total_mem: u64) -> ReadableSize { let (ratio, min, max) = match (is_raft_db, cf) { (true, CF_DEFAULT) => (0.02, RAFT_MIN_MEM, RAFT_MAX_MEM), @@ -663,8 +673,10 @@ impl DefaultCfConfig { cache: &Cache, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, + for_engine: EngineType, ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!(self, CF_DEFAULT, cache, region_info_accessor); + cf_opts.set_memtable_prefix_bloom_size_ratio(bloom_filter_ratio(for_engine)); let f = RangePropertiesCollectorFactory { prop_size_index_distance: self.prop_size_index_distance, prop_keys_index_distance: self.prop_keys_index_distance, @@ -778,6 +790,7 @@ impl WriteCfConfig { &self, cache: &Cache, region_info_accessor: Option<&RegionInfoAccessor>, + for_engine: EngineType, ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!(self, CF_WRITE, cache, region_info_accessor); // Prefix extractor(trim the timestamp at tail) for write cf. @@ -788,7 +801,7 @@ impl WriteCfConfig { ) .unwrap(); // Create prefix bloom filter for memtable. - cf_opts.set_memtable_prefix_bloom_size_ratio(0.1); + cf_opts.set_memtable_prefix_bloom_size_ratio(bloom_filter_ratio(for_engine)); // Collects user defined properties. cf_opts.add_table_properties_collector_factory( "tikv.mvcc-properties-collector", @@ -872,7 +885,7 @@ impl Default for LockCfConfig { } impl LockCfConfig { - pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { + pub fn build_opt(&self, cache: &Cache, for_engine: EngineType) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!(self, CF_LOCK, cache, no_region_info_accessor); cf_opts @@ -883,7 +896,7 @@ impl LockCfConfig { prop_keys_index_distance: self.prop_keys_index_distance, }; cf_opts.add_table_properties_collector_factory("tikv.range-properties-collector", f); - cf_opts.set_memtable_prefix_bloom_size_ratio(0.1); + cf_opts.set_memtable_prefix_bloom_size_ratio(bloom_filter_ratio(for_engine)); cf_opts.set_titan_cf_options(&self.titan.build_opts()); cf_opts } @@ -1058,14 +1071,16 @@ pub struct DbConfig { pub use_direct_io_for_flush_and_compaction: bool, #[online_config(skip)] pub enable_pipelined_write: bool, - // deprecated. TiKV will use a new write mode when set `enable_pipelined_write` false and fall - // back to write mode in 3.0 when set `enable_pipelined_write` true. The code of - // multi-batch-write in RocksDB has been removed. #[online_config(skip)] - #[serde(skip_serializing)] - pub enable_multi_batch_write: bool, + pub enable_multi_batch_write: Option, #[online_config(skip)] pub enable_unordered_write: bool, + #[online_config(skip)] + pub allow_concurrent_memtable_write: Option, + // Dangerous option only for programming use. + #[online_config(skip)] + #[serde(skip)] + pub paranoid_checks: Option, #[online_config(submodule)] pub defaultcf: DefaultCfConfig, #[online_config(submodule)] @@ -1115,8 +1130,10 @@ impl Default for DbConfig { writable_file_max_buffer_size: ReadableSize::mb(1), use_direct_io_for_flush_and_compaction: false, enable_pipelined_write: false, - enable_multi_batch_write: true, // deprecated + enable_multi_batch_write: None, // deprecated enable_unordered_write: false, + allow_concurrent_memtable_write: None, + paranoid_checks: None, defaultcf: DefaultCfConfig::default(), writecf: WriteCfConfig::default(), lockcf: LockCfConfig::default(), @@ -1127,7 +1144,19 @@ impl Default for DbConfig { } impl DbConfig { - pub fn build_opt(&self) -> RocksDbOptions { + pub fn optimize_for(&mut self, engine: EngineType) { + match engine { + EngineType::RaftKv => { + self.allow_concurrent_memtable_write.get_or_insert(true); + } + EngineType::RaftKv2 => { + self.enable_multi_batch_write.get_or_insert(false); + self.allow_concurrent_memtable_write.get_or_insert(false); + } + } + } + + pub fn build_opt(&self, stats: Option<&Statistics>) -> RocksDbOptions { let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { @@ -1143,7 +1172,12 @@ impl DbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - opts.enable_statistics(self.enable_statistics); + if self.enable_statistics { + match stats { + Some(stats) => opts.set_statistics(stats), + None => opts.set_statistics(&Statistics::new_titan()), + } + } opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); opts.set_max_log_file_size(self.info_log_max_size.0); @@ -1175,9 +1209,19 @@ impl DbConfig { self.use_direct_io_for_flush_and_compaction, ); opts.enable_pipelined_write(self.enable_pipelined_write); - let enable_multi_batch_write = !self.enable_pipelined_write && !self.enable_unordered_write; + let mut enable_multi_batch_write = + !self.enable_pipelined_write && !self.enable_unordered_write; + if self.allow_concurrent_memtable_write == Some(false) + && self.enable_multi_batch_write == Some(false) + { + enable_multi_batch_write = false + } opts.enable_multi_batch_write(enable_multi_batch_write); opts.enable_unordered_write(self.enable_unordered_write); + opts.allow_concurrent_memtable_write(self.allow_concurrent_memtable_write.unwrap_or(true)); + if let Some(b) = self.paranoid_checks { + opts.set_paranoid_checks(b); + } opts.set_info_log(RocksdbLogger::default()); opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { @@ -1191,21 +1235,24 @@ impl DbConfig { cache: &Cache, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, + for_engine: EngineType, ) -> Vec<(&'static str, RocksCfOptions)> { - vec![ - ( - CF_DEFAULT, - self.defaultcf - .build_opt(cache, region_info_accessor, api_version), - ), - (CF_LOCK, self.lockcf.build_opt(cache)), - ( - CF_WRITE, - self.writecf.build_opt(cache, region_info_accessor), - ), - // TODO: remove CF_RAFT. - (CF_RAFT, self.raftcf.build_opt(cache)), - ] + let mut cf_opts = Vec::with_capacity(4); + cf_opts.push(( + CF_DEFAULT, + self.defaultcf + .build_opt(cache, region_info_accessor, api_version, for_engine), + )); + cf_opts.push((CF_LOCK, self.lockcf.build_opt(cache, for_engine))); + cf_opts.push(( + CF_WRITE, + self.writecf + .build_opt(cache, region_info_accessor, for_engine), + )); + if for_engine == EngineType::RaftKv { + cf_opts.push((CF_RAFT, self.raftcf.build_opt(cache))); + } + cf_opts } fn validate(&mut self) -> Result<(), Box> { @@ -1452,7 +1499,9 @@ impl RaftDbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - opts.enable_statistics(self.enable_statistics); + if self.enable_statistics { + opts.set_statistics(&Statistics::new_titan()); + } opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); opts.set_max_log_file_size(self.info_log_max_size.0); @@ -3010,6 +3059,8 @@ impl TikvConfig { config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; } + self.rocksdb.optimize_for(self.storage.engine); + self.rocksdb.validate()?; self.raftdb.validate()?; self.raft_engine.validate()?; @@ -4367,11 +4418,12 @@ mod tests { assert_eq!(F::TAG, cfg.storage.api_version()); let engine = RocksDBEngine::new( &cfg.storage.data_dir, - Some(cfg.rocksdb.build_opt()), + Some(cfg.rocksdb.build_opt(None)), cfg.rocksdb.build_cf_opts( &cfg.storage.block_cache.build_shared_cache(), None, cfg.storage.api_version(), + cfg.storage.engine, ), None, ) diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 01dc1e4a786..2680c778f02 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -3,16 +3,21 @@ use std::{path::Path, sync::Arc}; use engine_rocks::{ - raw::{Cache, Env}, + raw::{Cache, Env, Statistics}, CompactedEventSender, CompactionListener, FlowListener, RocksCfOptions, RocksCompactionJobInfo, RocksDbOptions, RocksEngine, RocksEventListener, }; -use engine_traits::{CompactionJobInfo, MiscExt, Result, TabletFactory, CF_DEFAULT, CF_WRITE}; +use engine_traits::{ + CompactionJobInfo, MiscExt, Result, TabletContext, TabletFactory, CF_DEFAULT, CF_WRITE, +}; use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; use tikv_util::worker::Scheduler; -use crate::config::{DbConfig, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}; +use crate::{ + config::{DbConfig, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}, + storage::config::EngineType, +}; struct FactoryInner { env: Arc, @@ -22,6 +27,8 @@ struct FactoryInner { api_version: ApiVersion, flow_listener: Option, sst_recovery_sender: Option>, + statistics: Statistics, + lite: bool, } pub struct KvEngineFactoryBuilder { @@ -40,6 +47,8 @@ impl KvEngineFactoryBuilder { api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, + statistics: Statistics::new_titan(), + lite: false, }, compact_event_sender: None, } @@ -68,6 +77,14 @@ impl KvEngineFactoryBuilder { self } + /// Set whether enable lite mode. + /// + /// In lite mode, most listener/filters will not be installed. + pub fn lite(mut self, lite: bool) -> Self { + self.inner.lite = lite; + self + } + pub fn build(self) -> KvEngineFactory { KvEngineFactory { inner: Arc::new(self.inner), @@ -107,32 +124,43 @@ impl KvEngineFactory { fn db_opts(&self) -> RocksDbOptions { // Create kv engine. - let mut db_opts = self.inner.rocksdb_config.build_opt(); + let mut db_opts = self + .inner + .rocksdb_config + .build_opt(Some(&self.inner.statistics)); db_opts.set_env(self.inner.env.clone()); - db_opts.add_event_listener(RocksEventListener::new( - "kv", - self.inner.sst_recovery_sender.clone(), - )); - if let Some(filter) = self.create_raftstore_compaction_listener() { - db_opts.add_event_listener(filter); + if !self.inner.lite { + db_opts.add_event_listener(RocksEventListener::new( + "kv", + self.inner.sst_recovery_sender.clone(), + )); + if let Some(filter) = self.create_raftstore_compaction_listener() { + db_opts.add_event_listener(filter); + } } db_opts } - fn cf_opts(&self) -> Vec<(&str, RocksCfOptions)> { + fn cf_opts(&self, for_engine: EngineType) -> Vec<(&str, RocksCfOptions)> { self.inner.rocksdb_config.build_cf_opts( &self.inner.block_cache, self.inner.region_info_accessor.as_ref(), self.inner.api_version, + for_engine, ) } + pub fn block_cache(&self) -> &Cache { + &self.inner.block_cache + } + /// Create a shared db. /// /// It will always create in path/DEFAULT_DB_SUB_DIR. - pub fn create_shared_db(&self, path: &Path) -> Result { + pub fn create_shared_db(&self, path: impl AsRef) -> Result { + let path = path.as_ref(); let mut db_opts = self.db_opts(); - let cf_opts = self.cf_opts(); + let cf_opts = self.cf_opts(EngineType::RaftKv); if let Some(listener) = &self.inner.flow_listener { db_opts.add_event_listener(listener.clone()); } @@ -147,27 +175,27 @@ impl KvEngineFactory { } impl TabletFactory for KvEngineFactory { - fn open_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result { + fn open_tablet(&self, ctx: TabletContext, path: &Path) -> Result { let mut db_opts = self.db_opts(); - let cf_opts = self.cf_opts(); - if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = suffix { - db_opts.add_event_listener(listener.clone_with(id, suffix)); + let cf_opts = self.cf_opts(EngineType::RaftKv2); + if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { + db_opts.add_event_listener(listener.clone_with(ctx.id, suffix)); } let kv_engine = engine_rocks::util::new_engine_opt(path.to_str().unwrap(), db_opts, cf_opts); if let Err(e) = &kv_engine { - error!("failed to create tablet"; "id" => id, "suffix" => ?suffix, "path" => %path.display(), "err" => ?e); - } else if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = suffix { - listener.clone_with(id, suffix).on_created(); + error!("failed to create tablet"; "id" => ctx.id, "suffix" => ?ctx.suffix, "path" => %path.display(), "err" => ?e); + } else if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { + listener.clone_with(ctx.id, suffix).on_created(); } kv_engine } - fn destroy_tablet(&self, id: u64, suffix: Option, path: &Path) -> Result<()> { - info!("destroy tablet"; "path" => %path.display(), "id" => id, "suffix" => ?suffix); + fn destroy_tablet(&self, ctx: TabletContext, path: &Path) -> Result<()> { + info!("destroy tablet"; "path" => %path.display(), "id" => ctx.id, "suffix" => ?ctx.suffix); // Create kv engine. let _db_opts = self.db_opts(); - let _cf_opts = self.cf_opts(); + let _cf_opts = self.cf_opts(EngineType::RaftKv2); // TODOTODO: call rust-rocks or tirocks to destroy_engine; // engine_rocks::util::destroy_engine( // path.to_str().unwrap(), @@ -175,8 +203,8 @@ impl TabletFactory for KvEngineFactory { // kv_cfs_opts, // )?; let _ = std::fs::remove_dir_all(path); - if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = suffix { - listener.clone_with(id, suffix).on_destroyed(); + if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { + listener.clone_with(ctx.id, suffix).on_destroyed(); } Ok(()) } @@ -214,15 +242,20 @@ mod tests { let reg = TabletRegistry::new(Box::new(factory), dir.path()).unwrap(); let path = reg.tablet_path(1, 3); assert!(!reg.tablet_factory().exists(&path)); - let engine = reg.tablet_factory().open_tablet(1, Some(3), &path).unwrap(); + let mut tablet_ctx = TabletContext::with_infinite_region(1, Some(3)); + let engine = reg + .tablet_factory() + .open_tablet(tablet_ctx.clone(), &path) + .unwrap(); assert!(reg.tablet_factory().exists(&path)); // Second attempt should fail with lock. reg.tablet_factory() - .open_tablet(1, Some(3), &path) + .open_tablet(tablet_ctx.clone(), &path) .unwrap_err(); drop(engine); + tablet_ctx.suffix = Some(3); reg.tablet_factory() - .destroy_tablet(1, Some(3), &path) + .destroy_tablet(tablet_ctx, &path) .unwrap(); assert!(!reg.tablet_factory().exists(&path)); } diff --git a/src/storage/config.rs b/src/storage/config.rs index 3501cefa252..68d739c1639 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -31,12 +31,21 @@ const DEFAULT_SCHED_PENDING_WRITE_MB: u64 = 100; const DEFAULT_RESERVED_SPACE_GB: u64 = 5; const DEFAULT_RESERVED_RAFT_SPACE_GB: u64 = 1; +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "kebab-case")] +pub enum EngineType { + RaftKv, + RaftKv2, +} + #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct Config { #[online_config(skip)] pub data_dir: String, + #[online_config(skip)] + pub engine: EngineType, // Replaced by `GcConfig.ratio_threshold`. Keep it for backward compatibility. #[online_config(skip)] pub gc_ratio_threshold: f64, @@ -75,6 +84,7 @@ impl Default for Config { let cpu_num = SysQuota::cpu_cores_quota(); Config { data_dir: DEFAULT_DATA_DIR.to_owned(), + engine: EngineType::RaftKv, gc_ratio_threshold: DEFAULT_GC_RATIO_THRESHOLD, max_key_size: DEFAULT_MAX_KEY_SIZE, scheduler_concurrency: DEFAULT_SCHED_CONCURRENCY, diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index f02ee31c5f2..12a7776e434 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -12,7 +12,7 @@ use kvproto::kvrpcpb::ApiVersion; use tikv_util::config::ReadableSize; use crate::storage::{ - config::BlockCacheConfig, + config::{BlockCacheConfig, EngineType}, kv::{Result, RocksEngine}, }; @@ -102,10 +102,20 @@ impl TestEngineBuilder { .map(|cf| match *cf { CF_DEFAULT => ( CF_DEFAULT, - cfg_rocksdb.defaultcf.build_opt(&cache, None, api_version), + cfg_rocksdb + .defaultcf + .build_opt(&cache, None, api_version, EngineType::RaftKv), + ), + CF_LOCK => ( + CF_LOCK, + cfg_rocksdb.lockcf.build_opt(&cache, EngineType::RaftKv), + ), + CF_WRITE => ( + CF_WRITE, + cfg_rocksdb + .writecf + .build_opt(&cache, None, EngineType::RaftKv), ), - CF_LOCK => (CF_LOCK, cfg_rocksdb.lockcf.build_opt(&cache)), - CF_WRITE => (CF_WRITE, cfg_rocksdb.writecf.build_opt(&cache, None)), CF_RAFT => (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), _ => (*cf, RocksCfOptions::default()), }) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 79f48c68a88..05d5c743d76 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3593,6 +3593,7 @@ mod tests { use txn_types::{Mutation, PessimisticLock, WriteType, SHORT_VALUE_MAX_LEN}; use super::{ + config::EngineType, mvcc::tests::{must_unlocked, must_written}, test_util::*, txn::{ @@ -4137,12 +4138,23 @@ mod tests { let cfs_opts = vec![ ( CF_DEFAULT, + cfg_rocksdb.defaultcf.build_opt( + &cache, + None, + ApiVersion::V1, + EngineType::RaftKv, + ), + ), + ( + CF_LOCK, + cfg_rocksdb.lockcf.build_opt(&cache, EngineType::RaftKv), + ), + ( + CF_WRITE, cfg_rocksdb - .defaultcf - .build_opt(&cache, None, ApiVersion::V1), + .writecf + .build_opt(&cache, None, EngineType::RaftKv), ), - (CF_LOCK, cfg_rocksdb.lockcf.build_opt(&cache)), - (CF_WRITE, cfg_rocksdb.writecf.build_opt(&cache, None)), (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), ]; RocksEngine::new( diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index 973ed245ac8..922e986874a 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -291,7 +291,7 @@ impl TabletFlowController { #[cfg(test)] mod tests { use engine_rocks::FlowInfo; - use engine_traits::SingletonFactory; + use engine_traits::{SingletonFactory, TabletContext}; use tempfile::TempDir; use super::{ @@ -327,7 +327,8 @@ mod tests { let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; - reg.load(region_id, tablet_suffix, false).unwrap(); + let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); + reg.load(tablet_context, false).unwrap(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); tx.send(FlowInfo::L0Intra( @@ -354,7 +355,8 @@ mod tests { let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; - let mut cached = reg.load(region_id, tablet_suffix, false).unwrap(); + let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); + let mut cached = reg.load(tablet_context, false).unwrap(); let stub = cached.latest().unwrap().clone(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); @@ -373,7 +375,8 @@ mod tests { let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; - let mut cached = reg.load(region_id, tablet_suffix, false).unwrap(); + let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); + let mut cached = reg.load(tablet_context, false).unwrap(); let stub = cached.latest().unwrap().clone(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); @@ -392,7 +395,8 @@ mod tests { let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); let region_id = 5_u64; let tablet_suffix = 5_u64; - let mut cached = reg.load(region_id, tablet_suffix, false).unwrap(); + let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); + let mut cached = reg.load(tablet_context, false).unwrap(); let stub = cached.latest().unwrap().clone(); tx.send(FlowInfo::Created(region_id, tablet_suffix)) .unwrap(); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 6341f3a9e27..73dfdbaa977 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -32,7 +32,7 @@ use tikv::{ lock_manager::Config as PessimisticTxnConfig, Config as ServerConfig, }, storage::config::{ - BlockCacheConfig, Config as StorageConfig, FlowControlConfig, IoRateLimitConfig, + BlockCacheConfig, Config as StorageConfig, EngineType, FlowControlConfig, IoRateLimitConfig, }, }; use tikv_util::config::{LogFormat, ReadableDuration, ReadableSize}; @@ -308,7 +308,9 @@ fn test_serde_custom_tikv_config() { writable_file_max_buffer_size: ReadableSize::mb(12), use_direct_io_for_flush_and_compaction: true, enable_pipelined_write: false, - enable_multi_batch_write: true, + enable_multi_batch_write: Some(true), + paranoid_checks: None, + allow_concurrent_memtable_write: Some(false), enable_unordered_write: true, defaultcf: DefaultCfConfig { block_size: ReadableSize::kb(12), @@ -665,6 +667,7 @@ fn test_serde_custom_tikv_config() { raft_engine_config.memory_limit = Some(RaftEngineReadableSize::gb(1)); value.storage = StorageConfig { data_dir: "/var".to_owned(), + engine: EngineType::RaftKv2, gc_ratio_threshold: 1.2, max_key_size: 4096, scheduler_concurrency: 123, @@ -758,8 +761,13 @@ fn test_serde_custom_tikv_config() { ..Default::default() }; value.backup_stream = BackupStreamConfig { - num_threads: 12, - ..Default::default() + max_flush_interval: ReadableDuration::secs(11), + num_threads: 7, + enable: true, + temp_path: "./stream".to_string(), + file_size_limit: ReadableSize::gb(5), + initial_scan_pending_memory_quota: ReadableSize::kb(2), + initial_scan_rate_limit: ReadableSize::mb(3), }; value.import = ImportConfig { num_threads: 123, @@ -817,6 +825,7 @@ fn test_serde_custom_tikv_config() { } } +#[track_caller] fn diff_config(lhs: &TikvConfig, rhs: &TikvConfig) { let lhs_str = format!("{:?}", lhs); let rhs_str = format!("{:?}", rhs); diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index f22538a6f78..961eb59a77b 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -90,6 +90,7 @@ a = "b" [storage] data-dir = "/var" +engine = "raft-kv2" gc-ratio-threshold = 1.2 max-key-size = 4096 scheduler-concurrency = 123 @@ -268,7 +269,9 @@ max-sub-compactions = 12 writable-file-max-buffer-size = "12MB" use-direct-io-for-flush-and-compaction = true enable-pipelined-write = false +enable-multi-batch-write = true enable-unordered-write = true +allow-concurrent-memtable-write = false [rocksdb.titan] enabled = true @@ -624,6 +627,15 @@ batch-size = 7 s3-multi-part-size = "15MB" sst-max-size = "789MB" +[log-backup] +max-flush-interval = "11s" +num-threads = 7 +enable = true +temp-path = "./stream" +file-size-limit = "5GiB" +initial-scan-pending-memory-quota = "2KiB" +initial-scan-rate-limit = "3MiB" + [backup.hadoop] home = "/root/hadoop" linux-user = "hadoop" diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index c0a9ee8b1ed..d1abbcb924c 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -159,10 +159,10 @@ fn test_delete_files_in_range_for_titan() { cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize(0); cfg.rocksdb.defaultcf.titan.discardable_ratio = 0.4; cfg.rocksdb.defaultcf.titan.min_blob_size = ReadableSize(0); - let kv_db_opts = cfg.rocksdb.build_opt(); - let kv_cfs_opts = cfg - .rocksdb - .build_cf_opts(&cache, None, cfg.storage.api_version()); + let kv_db_opts = cfg.rocksdb.build_opt(None); + let kv_cfs_opts = + cfg.rocksdb + .build_cf_opts(&cache, None, cfg.storage.api_version(), cfg.storage.engine); let raft_path = path.path().join(Path::new("titan")); let engines = Engines::new( From 69cdc1e2e25a8dd623973295322e96138d77cf79 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 13 Dec 2022 17:08:52 +0800 Subject: [PATCH 0403/1149] raftstore-v2: deny unused (#13933) ref tikv/tikv#12842 Now most structs and functions are used, add the lint back to keep code clean. Signed-off-by: Jay Lee --- components/raftstore-v2/Cargo.toml | 4 +- components/raftstore-v2/src/batch/store.rs | 34 ++++----- components/raftstore-v2/src/fsm/apply.rs | 12 +--- components/raftstore-v2/src/fsm/peer.rs | 11 ++- components/raftstore-v2/src/fsm/store.rs | 7 +- components/raftstore-v2/src/lib.rs | 1 - .../operation/command/admin/conf_change.rs | 9 +-- .../src/operation/command/admin/mod.rs | 21 ++---- .../src/operation/command/admin/split.rs | 70 +++++++------------ .../command/admin/transfer_leader.rs | 8 +-- .../src/operation/command/control.rs | 13 +--- .../raftstore-v2/src/operation/command/mod.rs | 37 +++------- .../src/operation/command/write/mod.rs | 20 +++--- .../operation/command/write/simple_write.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 4 +- components/raftstore-v2/src/operation/pd.rs | 17 ++--- .../raftstore-v2/src/operation/query/lease.rs | 9 ++- .../raftstore-v2/src/operation/query/local.rs | 17 ++--- .../raftstore-v2/src/operation/query/mod.rs | 26 +++---- .../src/operation/query/replica.rs | 5 +- .../src/operation/ready/async_writer.rs | 1 - .../raftstore-v2/src/operation/ready/mod.rs | 30 +++----- .../src/operation/ready/snapshot.rs | 32 ++++----- components/raftstore-v2/src/raft/apply.rs | 9 +-- components/raftstore-v2/src/raft/peer.rs | 34 ++------- components/raftstore-v2/src/raft/storage.rs | 29 ++++---- components/raftstore-v2/src/router/imp.rs | 2 - .../src/router/internal_message.rs | 2 - components/raftstore-v2/src/router/message.rs | 2 - .../src/router/response_channel.rs | 2 +- components/raftstore-v2/src/worker/mod.rs | 4 +- components/raftstore-v2/src/worker/pd/mod.rs | 51 +------------- .../src/worker/pd/region_heartbeat.rs | 13 +--- .../src/worker/pd/store_heartbeat.rs | 1 - 34 files changed, 165 insertions(+), 376 deletions(-) diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 1d6b67ad129..4d3d44ec6fd 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -71,9 +71,9 @@ test_util = { workspace = true } [[test]] name = "raftstore-v2-failpoints" path = "tests/failpoints/mod.rs" -required-features = ["failpoints", "testexport"] +required-features = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] [[test]] name = "raftstore-v2-integrations" path = "tests/integrations/mod.rs" -required-features = ["testexport"] +required-features = ["testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 997f8da7a9c..ac767bcd7ce 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -2,7 +2,6 @@ use std::{ ops::{Deref, DerefMut}, - path::Path, sync::{ atomic::{AtomicBool, Ordering}, Arc, Mutex, @@ -16,15 +15,10 @@ use batch_system::{ use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::ConcurrencyManager; -use crossbeam::channel::{Sender, TrySendError}; -use engine_traits::{Engines, KvEngine, RaftEngine, TabletRegistry}; +use crossbeam::channel::TrySendError; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType}; -use futures::{compat::Future01CompatExt, FutureExt}; -use kvproto::{ - disk_usage::DiskUsage, - metapb::Store, - raft_serverpb::{PeerState, RaftMessage}, -}; +use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; use pd_client::PdClient; use raft::INVALID_ID; use raftstore::store::{ @@ -35,8 +29,6 @@ use slog::Logger; use tikv_util::{ box_err, config::{Tracker, VersionTrack}, - defer, - future::poll_future_notify, sys::SysQuota, time::Instant as TiInstant, timer::SteadyTimer, @@ -50,7 +42,7 @@ use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{PdRunner, PdTask}, + worker::pd, Error, Result, }; @@ -83,7 +75,7 @@ pub struct StoreContext { pub self_disk_usage: DiskUsage, pub snap_mgr: TabletSnapManager, - pub pd_scheduler: Scheduler, + pub pd_scheduler: Scheduler, } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -208,7 +200,7 @@ impl PollHandler>>]) {} + fn end(&mut self, _batch: &mut [Option>>]) {} fn pause(&mut self) { if self.poll_ctx.trans.need_flush() { @@ -231,7 +223,7 @@ struct StorePollerBuilder { trans: T, router: StoreRouter, read_scheduler: Scheduler>, - pd_scheduler: Scheduler, + pd_scheduler: Scheduler, write_senders: WriteSenders, apply_pool: FuturePool, logger: Logger, @@ -248,7 +240,7 @@ impl StorePollerBuilder { trans: T, router: StoreRouter, read_scheduler: Scheduler>, - pd_scheduler: Scheduler, + pd_scheduler: Scheduler, store_writers: &mut StoreWriters, logger: Logger, store_meta: Arc>, @@ -285,7 +277,7 @@ impl StorePollerBuilder { fn init(&self) -> Result>> { let mut regions = HashMap::default(); let cfg = self.cfg.value(); - let mut meta = self.store_meta.lock().unwrap(); + let meta = self.store_meta.lock().unwrap(); self.engine .for_each_raft_group::(&mut |region_id| { assert_ne!(region_id, INVALID_ID); @@ -317,7 +309,7 @@ impl StorePollerBuilder { Ok(regions) } - fn clean_up_tablets(&self, peers: &HashMap>) -> Result<()> { + fn clean_up_tablets(&self, _peers: &HashMap>) -> Result<()> { // TODO: list all available tablets and destroy those which are not in the // peers. Ok(()) @@ -332,7 +324,7 @@ where { type Handler = StorePoller; - fn build(&mut self, priority: batch_system::Priority) -> Self::Handler { + fn build(&mut self, _priority: batch_system::Priority) -> Self::Handler { let cfg = self.cfg.value().clone(); let poll_ctx = StoreContext { logger: self.logger.clone(), @@ -426,7 +418,7 @@ impl StoreSystem { let pd_scheduler = workers.pd_worker.start( "pd-worker", - PdRunner::new( + pd::Runner::new( store_id, pd_client, raft_engine.clone(), @@ -440,7 +432,7 @@ impl StoreSystem { ), ); - let mut builder = StorePollerBuilder::new( + let builder = StorePollerBuilder::new( cfg.clone(), store_id, raft_engine, diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 7e9a135b498..2065c5d7fd4 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -1,19 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - pin::Pin, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - task::{Context, Poll}, - time::{Duration, Instant}, -}; +use std::time::{Duration, Instant}; use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, TabletRegistry}; -use futures::{compat::Future01CompatExt, Future, FutureExt, StreamExt}; +use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use raftstore::store::ReadTask; use slog::Logger; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index c4dded64e62..8d497a7e4e5 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -6,17 +6,15 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; -use engine_traits::{KvEngine, RaftEngine, TabletFactory, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use raftstore::store::{Config, LocksStatus, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, time::{duration_to_sec, Instant}, - yatp_pool::FuturePool, }; -use super::ApplyFsm; use crate::{ batch::StoreContext, raft::{Peer, Storage}, @@ -237,10 +235,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::Tick(tick) => self.on_tick(tick), PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(self.store_ctx, res), PeerMsg::SplitInit(msg) => self.fsm.peer.on_split_init(self.store_ctx, msg), - PeerMsg::SplitInitFinish(region_id) => self - .fsm - .peer - .on_split_init_finish(self.store_ctx, region_id), + PeerMsg::SplitInitFinish(region_id) => { + self.fsm.peer.on_split_init_finish(region_id) + } PeerMsg::Start => self.on_start(), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 73702500e19..349d5ad3252 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -6,11 +6,7 @@ use batch_system::Fsm; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use futures::{compat::Future01CompatExt, FutureExt}; -use kvproto::{metapb::Region, raft_serverpb::RaftMessage}; -use raftstore::{ - coprocessor::RegionChangeReason, - store::{Config, ReadDelegate, RegionReadProgressRegistry}, -}; +use raftstore::store::{Config, ReadDelegate, RegionReadProgressRegistry}; use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, @@ -20,7 +16,6 @@ use tikv_util::{ use crate::{ batch::StoreContext, - raft::Peer, router::{StoreMsg, StoreTick}, }; diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 2a9d5faabd5..bac66b34acc 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -21,7 +21,6 @@ // Functionalities like read, write, etc should be implemented in [`operation`] // using a standalone modules. -#![allow(unused)] #![feature(let_chains)] #![feature(array_windows)] #![feature(div_duration)] diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 69e318c3a2e..4bda7eedf32 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -9,7 +9,6 @@ use std::time::Instant; -use collections::HashSet; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ metapb::{self, PeerRole}, @@ -18,7 +17,6 @@ use kvproto::{ }; use protobuf::Message; use raft::prelude::*; -use raft_proto::ConfChangeI; use raftstore::{ store::{ metrics::{PEER_ADMIN_CMD_COUNTER_VEC, PEER_PROPOSE_LOG_SIZE_HISTOGRAM}, @@ -34,7 +32,6 @@ use super::AdminCmdResult; use crate::{ batch::StoreContext, raft::{Apply, Peer}, - router::ApplyRes, }; /// The apply result of conf change. @@ -56,7 +53,7 @@ impl Peer { pub fn propose_conf_change( &mut self, ctx: &mut StoreContext, - mut req: RaftCmdRequest, + req: RaftCmdRequest, ) -> Result { if self.raft_group().raft.has_pending_conf() { info!( @@ -67,7 +64,6 @@ impl Peer { } let data = req.write_to_bytes()?; let admin = req.get_admin_request(); - let leader_role = self.peer().get_role(); if admin.has_change_peer() { self.propose_conf_change_imp(ctx, admin.get_change_peer(), data) } else if admin.has_change_peer_v2() { @@ -229,7 +225,6 @@ impl Apply { legacy: bool, ) -> Result<(AdminResponse, AdminCmdResult)> { let region = self.region_state().get_region(); - let peer_id = self.peer().get_id(); let change_kind = ConfChangeKind::confchange_kind(changes.len()); info!(self.logger, "exec ConfChangeV2"; "kind" => ?change_kind, "legacy" => legacy, "epoch" => ?region.get_region_epoch()); let mut new_region = region.clone(); @@ -284,7 +279,7 @@ impl Apply { } let mut resp = AdminResponse::default(); resp.mut_change_peer().set_region(new_region); - let mut conf_change = ConfChangeResult { + let conf_change = ConfChangeResult { index, conf_change: cc, changes: changes.to_vec(), diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 388bf72e01e..d07c1b4a35c 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -5,29 +5,16 @@ mod split; mod transfer_leader; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest}; +use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; use protobuf::Message; -use raft::prelude::ConfChangeV2; -use raftstore::{ - store::{ - self, cmd_resp, - fsm::apply, - msg::ErrorCallback, - util::{ChangePeerI, ConfChangeKind}, - }, - Result, -}; +use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; pub use split::{SplitInit, SplitResult, SPLIT_PREFIX}; use tikv_util::box_err; use txn_types::WriteBatchFlags; use self::conf_change::ConfChangeResult; -use crate::{ - batch::StoreContext, - raft::{Apply, Peer}, - router::CmdResChannel, -}; +use crate::{batch::StoreContext, raft::Peer, router::CmdResChannel}; #[derive(Debug)] pub enum AdminCmdResult { @@ -43,7 +30,7 @@ impl Peer { pub fn on_admin_command( &mut self, ctx: &mut StoreContext, - mut req: RaftCmdRequest, + req: RaftCmdRequest, ch: CmdResChannel, ) { if !self.serving() { diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 157150126b4..8ca4c7a55f6 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,43 +25,37 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::{cmp, collections::VecDeque}; +use std::cmp; use collections::HashSet; -use crossbeam::channel::{SendError, TrySendError}; -use engine_traits::{ - Checkpointer, DeleteStrategy, KvEngine, RaftEngine, RaftLogBatch, Range, TabletContext, - CF_DEFAULT, -}; +use crossbeam::channel::SendError; +use engine_traits::{Checkpointer, KvEngine, RaftEngine, TabletContext}; use fail::fail_point; -use keys::enc_end_key; use kvproto::{ - metapb::{self, Region, RegionEpoch}, + metapb::{self, Region}, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, - raft_serverpb::{RaftMessage, RaftSnapshotData, RegionLocalState}, + raft_serverpb::RaftSnapshotData, }; use protobuf::Message; -use raft::{prelude::Snapshot, RawNode, INVALID_ID}; +use raft::{prelude::Snapshot, INVALID_ID}; use raftstore::{ - coprocessor::RegionChangeReason, store::{ fsm::apply::validate_batch_split, metrics::PEER_ADMIN_CMD_COUNTER, snap::TABLET_SNAPSHOT_VERSION, util::{self, KeysInfoFormatter}, - PeerPessimisticLocks, PeerStat, ProposalContext, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + PeerPessimisticLocks, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, Result, }; -use slog::{error, info, warn, Logger}; -use tikv_util::box_err; +use slog::info; use crate::{ batch::StoreContext, - fsm::{ApplyResReporter, PeerFsmDelegate}, + fsm::ApplyResReporter, operation::AdminCmdResult, - raft::{write_initial_states, Apply, Peer, Storage}, - router::{ApplyRes, PeerMsg, StoreMsg}, + raft::{Apply, Peer}, + router::{PeerMsg, StoreMsg}, }; pub const SPLIT_PREFIX: &str = "split_"; @@ -314,17 +308,10 @@ impl Peer { }; fail_point!("on_split_invalidate_locks"); - // Roughly estimate the size and keys for new regions. - let new_region_count = regions.len() as u64; { let mut meta = store_ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&derived.get_id()).unwrap(); - self.set_region( - reader, - derived.clone(), - RegionChangeReason::Split, - tablet_index, - ); + self.set_region(reader, derived.clone(), tablet_index); } self.post_split(); @@ -454,9 +441,9 @@ impl Peer { .force_send(split_init.source_id, PeerMsg::SplitInitFinish(region_id)); } - pub fn on_split_init_finish(&mut self, ctx: &mut StoreContext, region_id: u64) { + pub fn on_split_init_finish(&mut self, region_id: u64) { let mut found = false; - for (tablet_index, ids) in self.split_trace_mut() { + for (_, ids) in self.split_trace_mut() { if ids.remove(®ion_id) { found = true; break; @@ -476,6 +463,8 @@ impl Peer { if off > 0 { // There should be very few elements in the vector. split_trace.drain(..off); + // TODO: save admin_flushed. + assert_ne!(admin_flushed, 0); // Persist admin flushed. self.set_has_ready(); } @@ -484,39 +473,30 @@ impl Peer { #[cfg(test)] mod test { - use std::sync::{ - mpsc::{channel, Receiver, Sender}, - Arc, - }; + use std::sync::mpsc::{channel, Receiver, Sender}; - use collections::HashMap; use engine_test::{ ctor::{CfOptions, DbOptions}, kv::TestTabletFactory, - raft, }; - use engine_traits::{CfOptionsExt, Peekable, TabletRegistry, WriteBatch, DATA_CFS}; - use futures::channel::mpsc::unbounded; + use engine_traits::{ + Peekable, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, DATA_CFS, + }; use kvproto::{ metapb::RegionEpoch, - raft_cmdpb::{AdminCmdType, BatchSplitRequest, PutRequest, RaftCmdResponse, SplitRequest}, - raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}, + raft_cmdpb::{BatchSplitRequest, SplitRequest}, + raft_serverpb::{PeerState, RegionLocalState}, }; - use raftstore::store::{cmd_resp::new_error, Config, ReadRunner}; + use raftstore::store::cmd_resp::new_error; use slog::o; use tempfile::TempDir; use tikv_util::{ - codec::bytes::encode_bytes, - config::VersionTrack, store::{new_learner_peer, new_peer}, - worker::{dummy_future_scheduler, dummy_scheduler, FutureScheduler, Scheduler, Worker}, + worker::dummy_scheduler, }; use super::*; - use crate::{ - fsm::{ApplyFsm, ApplyResReporter}, - raft::Apply, - }; + use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; struct MockReporter { sender: Sender, diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 71853d0007b..e8105a66322 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -18,7 +18,7 @@ use raft::{eraftpb, ProgressState, Storage}; use raftstore::{ store::{ fsm::new_admin_request, make_transfer_leader_response, metrics::PEER_ADMIN_CMD_COUNTER, - LocksStatus, Transport, TRANSFER_LEADER_COMMAND_REPLY_CTX, + LocksStatus, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, Result, }; @@ -29,7 +29,7 @@ use txn_types::WriteBatchFlags; use super::AdminCmdResult; use crate::{ batch::StoreContext, - fsm::{ApplyResReporter, PeerFsmDelegate}, + fsm::ApplyResReporter, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick}, }; @@ -199,7 +199,7 @@ impl Peer { cmd.mut_admin_request() .set_cmd_type(AdminCmdType::TransferLeader); cmd.mut_admin_request().mut_transfer_leader().set_peer(from); - if let (PeerMsg::RaftCommand(req), sub) = PeerMsg::raft_command(cmd) { + if let PeerMsg::RaftCommand(req) = PeerMsg::raft_command(cmd).0 { self.on_admin_command(ctx, req.request, req.ch); } else { unreachable!(); @@ -380,7 +380,7 @@ impl Peer { self.logger, "propose {} locks before transferring leader", cmd.get_requests().len(); ); - let (PeerMsg::RaftCommand(req), sub) = PeerMsg::raft_command(cmd) else {unreachable!()}; + let PeerMsg::RaftCommand(req) = PeerMsg::raft_command(cmd).0 else {unreachable!()}; self.on_write_command(ctx, req.request, req.ch); true } diff --git a/components/raftstore-v2/src/operation/command/control.rs b/components/raftstore-v2/src/operation/command/control.rs index 5fb25b4e20d..b330d0093fe 100644 --- a/components/raftstore-v2/src/operation/command/control.rs +++ b/components/raftstore-v2/src/operation/command/control.rs @@ -1,11 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::LinkedList, mem, num::NonZeroU64}; +use std::{collections::LinkedList, mem}; -use kvproto::{ - metapb, - raft_cmdpb::{AdminCmdType, RaftCmdRequest}, -}; +use kvproto::{metapb, raft_cmdpb::AdminCmdType}; use raftstore::{ store::{ cmd_resp, @@ -263,12 +260,6 @@ impl Drop for ProposalControl { mod tests { use super::*; - fn new_admin_request(cmd_type: AdminCmdType) -> RaftCmdRequest { - let mut request = RaftCmdRequest::default(); - request.mut_admin_request().set_cmd_type(cmd_type); - request - } - #[test] fn test_proposal_control() { let region = metapb::Region::default(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 3d0a17ece62..6daa8f2770c 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -16,13 +16,9 @@ //! - Applied result are sent back to peer fsm, and update memory state in //! `on_apply_res`. -use std::cmp; - -use batch_system::{Fsm, FsmScheduler, Mailbox}; use engine_traits::{KvEngine, RaftEngine, WriteBatch, WriteOptions}; -use kvproto::{ - raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader}, - raft_serverpb::RegionLocalState, +use kvproto::raft_cmdpb::{ + AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, }; use protobuf::Message; use raft::eraftpb::{ConfChange, ConfChangeV2, Entry, EntryType}; @@ -31,16 +27,12 @@ use raftstore::{ store::{ cmd_resp, fsm::{ - apply::{ - self, APPLY_WB_SHRINK_SIZE, DEFAULT_APPLY_WB_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP, - }, + apply::{self, APPLY_WB_SHRINK_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP}, Proposal, }, local_metrics::RaftMetrics, - metrics::*, msg::ErrorCallback, - util::{self, admin_cmd_epoch_lookup}, - WriteCallback, + util, WriteCallback, }, Error, Result, }; @@ -50,9 +42,8 @@ use tikv_util::{box_err, time::monotonic_raw_now}; use crate::{ batch::StoreContext, fsm::{ApplyFsm, ApplyResReporter, PeerFsmDelegate}, - operation::GenSnapTask, raft::{Apply, Peer}, - router::{ApplyRes, ApplyTask, CmdResChannel, PeerMsg}, + router::{ApplyRes, ApplyTask, CmdResChannel}, }; mod admin; @@ -122,7 +113,6 @@ impl Peer { pub fn schedule_apply_fsm(&mut self, store_ctx: &mut StoreContext) { let region_state = self.storage().region_state().clone(); let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); - let tablet = self.tablet().clone(); let logger = self.logger.clone(); let read_scheduler = self.storage().read_scheduler(); let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( @@ -165,7 +155,7 @@ impl Peer { return Err(e); } if let Err(mut e) = util::check_region_epoch(req, self.region(), true) { - if let Error::EpochNotMatch(_, new_regions) = &mut e { + if let Error::EpochNotMatch(_, _new_regions) = &mut e { // TODO: query sibling regions. metrics.invalid_proposal.epoch_not_match.inc(); } @@ -247,15 +237,10 @@ impl Peer { } #[inline] - pub fn schedule_apply_committed_entries( - &mut self, - ctx: &mut StoreContext, - committed_entries: Vec, - ) { - let last_entry = match committed_entries.last() { - Some(e) => e, - None => return, - }; + pub fn schedule_apply_committed_entries(&mut self, committed_entries: Vec) { + if committed_entries.is_empty() { + return; + } let current_term = self.term(); let mut entry_and_proposals = vec![]; let queue = self.proposals_mut(); @@ -511,7 +496,7 @@ impl Apply { let mut write_opt = WriteOptions::default(); write_opt.set_disable_wal(true); if let Err(e) = wb.write_opt(&write_opt) { - panic!("failed to write data: {:?}", self.logger.list()); + panic!("failed to write data: {:?}: {:?}", self.logger.list(), e); } if wb.data_size() <= APPLY_WB_SHRINK_SIZE { wb.clear(); diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index f9cac15d899..92f260bad26 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,16 +1,15 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{KvEngine, Mutable, RaftEngine, CF_DEFAULT}; -use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; +use kvproto::raft_cmdpb::RaftCmdRequest; use raftstore::{ store::{ cmd_resp, - fsm::{apply, Proposal, MAX_PROPOSAL_SIZE_RATIO}, + fsm::{apply, MAX_PROPOSAL_SIZE_RATIO}, msg::ErrorCallback, util::{self, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER}, - WriteCallback, }, - Error, Result, + Result, }; use crate::{ @@ -24,7 +23,6 @@ mod simple_write; pub use simple_write::{SimpleWriteDecoder, SimpleWriteEncoder}; pub use self::simple_write::SimpleWrite; -use super::CommittedEntries; impl Peer { #[inline] @@ -93,7 +91,7 @@ impl Peer { NORMAL_REQ_CHECK_VER, true, ); - if let Err(mut e) = res { + if let Err(e) = res { // TODO: query sibling regions. ctx.raft_metrics.invalid_proposal.epoch_not_match.inc(); encoder.encode().1.report_error(cmd_resp::new_error(e)); @@ -173,12 +171,12 @@ impl Apply { #[inline] pub fn apply_delete_range( &mut self, - cf: &str, - start_key: &[u8], - end_key: &[u8], - notify_only: bool, + _cf: &str, + _start_key: &[u8], + _end_key: &[u8], + _notify_only: bool, ) -> Result<()> { - /// TODO: reuse the same delete as split/merge. + // TODO: reuse the same delete as split/merge. Ok(()) } } diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index ca9e7d39366..c4cb9d6bc89 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -31,7 +31,7 @@ impl SimpleWriteEncoder { /// If `notify_proposed` is true, channels will be called `notify_proposed` /// when it's appended. pub fn new( - mut req: RaftCmdRequest, + req: RaftCmdRequest, size_limit: usize, notify_proposed: bool, ) -> Result { @@ -346,7 +346,7 @@ fn encode(req: &Request, buf: &mut Vec) { #[inline] fn decode<'a>(buf: &mut &'a [u8]) -> Option> { - let (tag, mut left) = buf.split_first()?; + let (tag, left) = buf.split_first()?; match *tag { PUT_TAG => { let (cf, left) = decode_cf(left); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index ca610de1bfc..60889908aa0 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -149,7 +149,6 @@ impl Store { } else { return; }; - let msg_type = msg.get_message().get_msg_type(); let from_peer = msg.get_from_peer(); let to_peer = msg.get_to_peer(); // Now the peer should not exist. @@ -239,9 +238,10 @@ impl Store { } }; let mailbox = BasicMailbox::new(tx, fsm, ctx.router.state_cnt().clone()); - if let Err((p, _)) = ctx + if ctx .router .send_and_register(region_id, mailbox, PeerMsg::Start) + .is_err() { panic!( "[region {}] {} failed to register peer", diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 7df27670a35..d80cee3c7d1 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -2,21 +2,18 @@ //! This module implements the interactions with pd. -use std::cmp; - use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; use kvproto::{metapb, pdpb}; use raftstore::store::Transport; use slog::error; -use tikv_util::time::InstantExt; use crate::{ batch::StoreContext, fsm::{PeerFsmDelegate, Store, StoreFsmDelegate}, raft::Peer, router::{PeerTick, StoreTick}, - worker::{PdRegionHeartbeatTask, PdTask}, + worker::pd, }; impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { @@ -55,7 +52,7 @@ impl Store { // stats.set_query_stats(query_stats); - let task = PdTask::StoreHeartbeat { stats }; + let task = pd::Task::StoreHeartbeat { stats }; if let Err(e) = ctx.pd_scheduler.schedule(task) { error!(self.logger(), "notify pd failed"; "store_id" => self.store_id(), @@ -80,7 +77,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, impl Peer { #[inline] pub fn region_heartbeat_pd(&self, ctx: &StoreContext) { - let task = PdTask::RegionHeartbeat(PdRegionHeartbeatTask { + let task = pd::Task::RegionHeartbeat(pd::RegionHeartbeatTask { term: self.term(), region: self.region().clone(), down_peers: self.collect_down_peers(ctx.cfg.max_peer_down_duration.0), @@ -163,7 +160,7 @@ impl Peer { #[inline] pub fn destroy_peer_pd(&self, ctx: &StoreContext) { - let task = PdTask::DestroyPeer { + let task = pd::Task::DestroyPeer { region_id: self.region_id(), }; if let Err(e) = ctx.pd_scheduler.schedule(task) { @@ -179,7 +176,7 @@ impl Peer { #[inline] pub fn ask_batch_split_pd(&self, ctx: &StoreContext, split_keys: Vec>) { - let task = PdTask::AskBatchSplit { + let task = pd::Task::AskBatchSplit { region: self.region().clone(), split_keys, peer: self.peer().clone(), @@ -202,7 +199,7 @@ impl Peer { ctx: &StoreContext, regions: Vec, ) { - let task = PdTask::ReportBatchSplit { regions }; + let task = pd::Task::ReportBatchSplit { regions }; if let Err(e) = ctx.pd_scheduler.schedule(task) { error!( self.logger, @@ -214,7 +211,7 @@ impl Peer { #[inline] pub fn update_max_timestamp_pd(&self, ctx: &StoreContext, initial_status: u64) { - let task = PdTask::UpdateMaxTimestamp { + let task = pd::Task::UpdateMaxTimestamp { region_id: self.region_id(), initial_status, txn_ext: self.txn_ext().clone(), diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 4455ea099f4..ca92729ee6f 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -1,13 +1,13 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::{Arc, Mutex}; +use std::sync::Mutex; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::RaftCmdRequest; use raftstore::store::{ can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::ReadCallback, propose_read_index, should_renew_lease, util::LeaseState, ReadDelegate, - ReadIndexRequest, ReadProgress, TrackVer, Transport, + ReadIndexRequest, ReadProgress, Transport, }; use slog::debug; use tikv_util::time::monotonic_raw_now; @@ -99,10 +99,9 @@ impl Peer { /// /// awake the read tasks waiting in frontend (such as unified thread pool) /// In v1, it's named as response_read. - pub(crate) fn respond_read_index( + pub(crate) fn respond_read_index( &self, read_index_req: &mut ReadIndexRequest, - ctx: &mut StoreContext, ) { debug!( self.logger, @@ -111,7 +110,7 @@ impl Peer { ); RAFT_READ_INDEX_PENDING_COUNT.sub(read_index_req.cmds().len() as i64); let time = monotonic_raw_now(); - for (req, ch, mut read_index) in read_index_req.take_cmds().drain(..) { + for (_, ch, mut read_index) in read_index_req.take_cmds().drain(..) { ch.read_tracker().map(|tracker| { GLOBAL_TRACKERS.with_tracker(*tracker, |t| { t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 19f9a7e91b9..d24a4b9d899 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -17,17 +17,12 @@ use raftstore::{ errors::RAFTSTORE_IS_BUSY, store::{ cmd_resp, util::LeaseState, LocalReadContext, LocalReaderCore, ReadDelegate, ReadExecutor, - ReadExecutorProvider, RegionSnapshot, RequestInspector, RequestPolicy, - TLS_LOCAL_READ_METRICS, + ReadExecutorProvider, RegionSnapshot, RequestPolicy, TLS_LOCAL_READ_METRICS, }, Error, Result, }; use slog::{debug, Logger}; -use tikv_util::{ - box_err, - codec::number::decode_u64, - time::{monotonic_raw_now, ThreadReadId}, -}; +use tikv_util::{box_err, codec::number::decode_u64, time::monotonic_raw_now}; use time::Timespec; use txn_types::WriteBatchFlags; @@ -202,13 +197,13 @@ where let mut err = errorpb::Error::default(); match MsgRouter::send(&self.router, region_id, msg) { Ok(()) => return Ok(sub.result().await), - Err(TrySendError::Full(c)) => { + Err(TrySendError::Full(_)) => { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.channel_full.inc()); err.set_message(RAFTSTORE_IS_BUSY.to_owned()); err.mut_server_is_busy() .set_reason(RAFTSTORE_IS_BUSY.to_owned()); } - Err(TrySendError::Disconnected(c)) => { + Err(TrySendError::Disconnected(_)) => { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); err.set_message(format!("region {} is missing", region_id)); err.mut_region_not_found().set_region_id(region_id); @@ -235,7 +230,7 @@ where let region_id = req.header.get_ref().region_id; TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().renew_lease_advance.inc()); // Send a read query which may renew the lease - let (msg, sub) = PeerMsg::raft_query(req.clone()); + let msg = PeerMsg::raft_query(req.clone()).0; if let Err(e) = MsgRouter::send(&self.router, region_id, msg) { debug!( self.logger, @@ -685,7 +680,7 @@ mod tests { ch_tx.clone(), )) .unwrap(); - let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); + block_on(reader.snapshot(cmd.clone())).unwrap(); // Updating lease makes cache miss. assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 3a3052ab902..ea66719314c 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -11,23 +11,21 @@ //! Follower's read index and replica read is implemenented replica module. //! Leader's read index and lease renew is implemented in lease module. -use std::{cmp, sync::Arc}; +use std::cmp; use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ errorpb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, StatusCmdType}, - raft_serverpb::RaftApplyState, }; use raft::Ready; use raftstore::{ errors::RAFTSTORE_IS_BUSY, store::{ - cmd_resp, fsm::ApplyMetrics, local_metrics::RaftMetrics, - metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::ErrorCallback, region_meta::RegionMeta, util, - util::LeaseState, GroupState, ReadCallback, ReadIndexContext, ReadProgress, RequestPolicy, - Transport, + cmd_resp, local_metrics::RaftMetrics, metrics::RAFT_READ_INDEX_PENDING_COUNT, + msg::ErrorCallback, region_meta::RegionMeta, util, util::LeaseState, GroupState, + ReadIndexContext, ReadProgress, RequestPolicy, Transport, }, Error, Result, }; @@ -40,8 +38,7 @@ use crate::{ fsm::PeerFsmDelegate, raft::Peer, router::{ - message::RaftRequest, ApplyRes, DebugInfoChannel, PeerMsg, QueryResChannel, QueryResult, - ReadResponse, + message::RaftRequest, DebugInfoChannel, PeerMsg, QueryResChannel, QueryResult, ReadResponse, }, }; @@ -146,7 +143,6 @@ impl Peer { // TODO: add flashback_state check // Check whether the store has the right peer to handle the request. - let leader_id = self.leader_id(); let request = msg.get_requests(); // TODO: add force leader @@ -158,7 +154,7 @@ impl Peer { let allow_replica_read = msg.get_header().get_replica_read(); if !self.is_leader() && !is_read_index_request && !allow_replica_read { raft_metrics.invalid_proposal.not_leader.inc(); - return Err(Error::NotLeader(self.region_id(), None)); + return Err(Error::NotLeader(self.region_id(), self.leader())); } // peer_id must be the same as peer's. @@ -186,7 +182,7 @@ impl Peer { fn read_index( &mut self, ctx: &mut StoreContext, - mut req: RaftCmdRequest, + req: RaftCmdRequest, ch: QueryResChannel, ) { // TODO: add pre_read_index to handle splitting or merging @@ -222,7 +218,7 @@ impl Peer { if self.ready_to_handle_read() { while let Some(mut read) = self.pending_reads_mut().pop_front() { - self.respond_read_index(&mut read, ctx); + self.respond_read_index(&mut read); } } } @@ -264,9 +260,9 @@ impl Peer { && read.cmds()[0].0.get_requests()[0].get_cmd_type() == CmdType::ReadIndex; if is_read_index_request { - self.respond_read_index(&mut read, ctx); + self.respond_read_index(&mut read); } else if self.ready_to_handle_unsafe_replica_read(read.read_index.unwrap()) { - self.respond_replica_read(&mut read, ctx); + self.respond_replica_read(&mut read); } else { // TODO: `ReadIndex` requests could be blocked. self.pending_reads_mut().push_front(read); @@ -416,7 +412,7 @@ impl Peer { self.post_pending_read_index_on_replica(ctx) } else if self.ready_to_handle_read() { while let Some(mut read) = self.pending_reads_mut().pop_front() { - self.respond_read_index(&mut read, ctx); + self.respond_read_index(&mut read); } } self.pending_reads_mut().gc(); diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index 9433cd10c52..fb00adbbc5a 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -62,10 +62,9 @@ impl Peer { self.set_has_ready(); } - pub(crate) fn respond_replica_read( + pub(crate) fn respond_replica_read( &self, read_index_req: &mut ReadIndexRequest, - ctx: &mut StoreContext, ) { debug!( self.logger, @@ -74,7 +73,7 @@ impl Peer { ); RAFT_READ_INDEX_PENDING_COUNT.sub(read_index_req.cmds().len() as i64); let time = monotonic_raw_now(); - for (req, ch, mut read_index) in read_index_req.take_cmds().drain(..) { + for (req, ch, _) in read_index_req.take_cmds().drain(..) { ch.read_tracker().map(|tracker| { GLOBAL_TRACKERS.with_tracker(*tracker, |t| { t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index a7bce44fe05..e89854f39f4 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -86,7 +86,6 @@ impl AsyncWriter { } fn merge(&mut self, task: WriteTask) -> Option> { - let ready_number = task.ready_number(); if self.unpersisted_readies.is_empty() { // If this ready don't need to be persisted and there is no previous unpersisted // ready, we can safely consider it is persisted so the persisted msgs can be diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index baf66dfa6fc..e9046af2831 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -22,12 +22,9 @@ mod snapshot; use std::{cmp, time::Instant}; -use engine_traits::{KvEngine, MiscExt, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; -use kvproto::{ - raft_cmdpb::AdminCmdType, - raft_serverpb::{PeerState, RaftMessage, RaftSnapshotData}, -}; +use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::store::{util, ExtraStates, FetchedLogs, ReadProgress, Transport, WriteTask}; @@ -43,7 +40,6 @@ use crate::{ fsm::PeerFsmDelegate, raft::{Peer, Storage}, router::{ApplyTask, PeerTick}, - Result, }; impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { @@ -169,11 +165,7 @@ impl Peer { /// /// If the recipient can't be found, `None` is returned. #[inline] - fn build_raft_message( - &mut self, - ctx: &mut StoreContext, - msg: eraftpb::Message, - ) -> Option { + fn build_raft_message(&mut self, msg: eraftpb::Message) -> Option { let to_peer = match self.peer_from_cache(msg.to) { Some(p) => p, None => { @@ -265,7 +257,7 @@ impl Peer { } } } - self.schedule_apply_committed_entries(ctx, committed_entries); + self.schedule_apply_committed_entries(committed_entries); } /// Processing the ready of raft. A detail description of how it's handled @@ -320,7 +312,7 @@ impl Peer { if !ready.messages().is_empty() { debug_assert!(self.is_leader()); for msg in ready.take_messages() { - if let Some(msg) = self.build_raft_message(ctx, msg) { + if let Some(msg) = self.build_raft_message(msg) { self.send_raft_message(ctx, msg); } } @@ -347,7 +339,7 @@ impl Peer { write_task.messages = ready .take_persisted_messages() .into_iter() - .flat_map(|m| self.build_raft_message(ctx, m)) + .flat_map(|m| self.build_raft_message(m)) .collect(); } if !self.serving() { @@ -408,11 +400,11 @@ impl Peer { let persisted_number = self.async_writer.persisted_number(); self.raft_group_mut().on_persist_ready(persisted_number); let persisted_index = self.persisted_index(); - /// The apply snapshot process order would be: - /// - Get the snapshot from the ready - /// - Wait for async writer to load this tablet - /// In this step, the snapshot loading has been finished, but some apply - /// state need to update. + // The apply snapshot process order would be: + // - Get the snapshot from the ready + // - Wait for async writer to load this tablet + // In this step, the snapshot loading has been finished, but some apply + // state need to update. if has_snapshot { self.on_applied_snapshot(ctx); } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 5bf9fc27269..86817ab17d3 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -19,17 +19,16 @@ //! peer fsm, then Raft will get the snapshot. use std::{ - borrow::BorrowMut, fmt::{self, Debug}, fs, mem, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, - mpsc, Arc, + Arc, }, }; use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; -use kvproto::raft_serverpb::{PeerState, RaftSnapshotData, RegionLocalState}; +use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; use raft::eraftpb::Snapshot; use raftstore::store::{ @@ -37,13 +36,12 @@ use raftstore::store::{ TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, }; use slog::{error, info, warn}; -use tikv_util::{box_err, box_try, worker::Scheduler}; +use tikv_util::box_err; use crate::{ fsm::ApplyResReporter, operation::command::SPLIT_PREFIX, raft::{Apply, Peer, Storage}, - router::{ApplyTask, PeerTick}, Result, StoreContext, }; @@ -60,11 +58,9 @@ pub enum SnapState { impl PartialEq for SnapState { fn eq(&self, other: &SnapState) -> bool { match (self, other) { - (&SnapState::Relax, &SnapState::Relax) - | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, - (&SnapState::Generated(ref snap1), &SnapState::Generated(ref snap2)) => { - *snap1 == *snap2 - } + (SnapState::Relax, SnapState::Relax) + | (SnapState::Generating { .. }, SnapState::Generating { .. }) => true, + (SnapState::Generated(snap1), SnapState::Generated(snap2)) => *snap1 == *snap2, _ => false, } } @@ -203,8 +199,8 @@ impl Storage { /// unavailable snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { let mut snap_state = self.snap_state_mut(); - match *snap_state { - SnapState::Generating { ref canceled, .. } => { + match &*snap_state { + SnapState::Generating { canceled, .. } => { if canceled.load(Ordering::SeqCst) { self.cancel_generating_snap(None); } else { @@ -213,7 +209,7 @@ impl Storage { )); } } - SnapState::Generated(ref s) => { + SnapState::Generated(_) => { // TODO: `to` may not be equal to the generated snapshot. let SnapState::Generated(snap) = mem::replace(&mut *snap_state, SnapState::Relax) else { unreachable!() }; if self.validate_snap(&snap, request_index) { @@ -331,9 +327,9 @@ impl Storage { let snap = res.unwrap(); let mut snap_state = self.snap_state_mut(); let SnapState::Generating { - ref canceled, - ref index, - } = *snap_state else { return false }; + index, + .. + } = &*snap_state else { return false }; if snap.get_metadata().get_index() < index.load(Ordering::SeqCst) { warn!( @@ -352,7 +348,7 @@ impl Storage { } pub fn on_applied_snapshot(&mut self) { - let mut entry = self.entry_storage_mut(); + let entry = self.entry_storage_mut(); let term = entry.truncated_term(); let index = entry.truncated_index(); entry.set_applied_term(term); @@ -428,7 +424,7 @@ impl Storage { let _ = fs::remove_dir_all(path); } }; - task.persisted_cb = (Some(Box::new(hook))); + task.persisted_cb = Some(Box::new(hook)); task.has_snapshot = true; Ok(()) } diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index d4a4cf61602..30ced7bdbd7 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{mem, sync::Arc}; +use std::mem; use engine_traits::{CachedTablet, KvEngine, TabletRegistry, WriteBatch}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; @@ -8,12 +8,7 @@ use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; use slog::Logger; use tikv_util::worker::Scheduler; -use super::Peer; -use crate::{ - fsm::ApplyResReporter, - operation::AdminCmdResult, - router::{ApplyRes, CmdResChannel}, -}; +use crate::{operation::AdminCmdResult, router::CmdResChannel}; /// Apply applies all the committed commands to kv db. pub struct Apply { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 6111e75e691..f211313e1b5 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -12,33 +12,19 @@ use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletContext, TabletReg use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; -use raftstore::{ - coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, - store::{ - fsm::Proposal, - util::{Lease, RegionReadProgress}, - Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, - ReadProgress, TrackVer, TxnExt, - }, - Error, +use raftstore::store::{ + util::{Lease, RegionReadProgress}, + Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, + ReadProgress, TxnExt, }; -use slog::{debug, error, info, o, warn, Logger}; -use tikv_util::{ - box_err, - config::ReadableSize, - time::{monotonic_raw_now, Instant as TiInstant}, - worker::Scheduler, - Either, -}; -use time::Timespec; +use slog::Logger; -use super::{storage::Storage, Apply}; +use super::storage::Storage; use crate::{ batch::StoreContext, - fsm::{ApplyFsm, ApplyScheduler}, + fsm::ApplyScheduler, operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, router::{CmdResChannel, PeerTick, QueryResChannel}, - worker::PdTask, Result, }; @@ -193,7 +179,6 @@ impl Peer { // host: &CoprocessorHost, reader: &mut ReadDelegate, region: metapb::Region, - reason: RegionChangeReason, tablet_index: u64, ) { if self.region().get_region_epoch().get_version() < region.get_region_epoch().get_version() @@ -489,11 +474,6 @@ impl Peer { &mut self.destroy_progress } - #[inline] - pub(crate) fn has_applied_to_current_term(&self) -> bool { - self.entry_storage().applied_term() == self.term() - } - #[inline] pub fn simple_write_encoder_mut(&mut self) -> &mut Option { &mut self.raw_write_encoder diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index a27e79549e1..49a0f547e1a 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -3,7 +3,6 @@ use std::{ cell::{RefCell, RefMut}, fmt::{self, Debug, Formatter}, - sync::{mpsc::Receiver, Arc}, }; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; @@ -15,10 +14,8 @@ use raft::{ eraftpb::{ConfState, Entry, Snapshot}, GetEntriesContext, RaftState, INVALID_ID, }; -use raftstore::store::{ - util, EntryStorage, ReadTask, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, -}; -use slog::{info, o, Logger}; +use raftstore::store::{util, EntryStorage, ReadTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use slog::{o, Logger}; use tikv_util::{box_err, store::find_peer, worker::Scheduler}; use crate::{ @@ -366,31 +363,29 @@ impl raft::Storage for Storage { #[cfg(test)] mod tests { use std::{ - sync::mpsc::{sync_channel, SyncSender}, + sync::mpsc::{sync_channel, Receiver, SyncSender}, time::Duration, }; use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::{KvTestEngine, TestTabletFactory}, - raft::RaftTestEngine, + kv::TestTabletFactory, }; use engine_traits::{ - KvEngine, RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletContext, TabletRegistry, - DATA_CFS, + RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS, }; use kvproto::{ metapb::{Peer, Region}, raft_serverpb::PeerState, }; - use raft::{eraftpb::Snapshot as RaftSnapshot, Error as RaftError, StorageError}; + use raft::{Error as RaftError, StorageError}; use raftstore::store::{ - util::new_empty_snapshot, AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask, - TabletSnapKey, TabletSnapManager, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + util::new_empty_snapshot, AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, + TabletSnapKey, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; use slog::o; use tempfile::TempDir; - use tikv_util::worker::{Runnable, Worker}; + use tikv_util::worker::Worker; use super::*; use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; @@ -480,7 +475,7 @@ mod tests { let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); - let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); + let worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); let sched = worker.scheduler(); let logger = slog_global::borrow_global().new(o!()); let mut s = Storage::new(4, 6, raft_engine.clone(), sched, &logger.clone()) @@ -533,7 +528,7 @@ mod tests { let mut worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); let sched = worker.scheduler(); let logger = slog_global::borrow_global().new(o!()); - let mut s = Storage::new(4, 6, raft_engine.clone(), sched.clone(), &logger.clone()) + let s = Storage::new(4, 6, raft_engine.clone(), sched.clone(), &logger.clone()) .unwrap() .unwrap(); let (router, rx) = TestRouter::new(); @@ -577,7 +572,7 @@ mod tests { assert_eq!(snap.unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); - let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + rx.recv_timeout(Duration::from_secs(1)).unwrap(); s.cancel_generating_snap(None); assert_eq!(*s.snap_state.borrow(), SnapState::Relax); diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 3dda00eb270..7208a6b5bef 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -8,9 +8,7 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; -use raft::eraftpb::Snapshot as RaftSnapshot; use raftstore::store::{AsyncReadNotifier, FetchedLogs, GenSnapRes, RegionSnapshot}; -use slog::Logger; use super::PeerMsg; use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 1507d404297..224723bf4ad 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,7 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use raftstore::store::fsm::ChangePeer; - use crate::operation::{AdminCmdResult, CommittedEntries, GenSnapTask}; #[derive(Debug)] diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 13037bd1a26..447efe8ee1a 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -3,9 +3,7 @@ // #[PerformanceCriticalPath] use std::fmt; -use engine_traits::Snapshot; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; -use raft::eraftpb::Snapshot as RaftSnapshot; use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, GenSnapRes}; use tikv_util::time::Instant; diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index b6da3c804f0..423c9e8e326 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -404,7 +404,7 @@ impl ReadCallback for QueryResChannel { type Response = QueryResult; #[inline] - fn set_result(mut self, res: QueryResult) { + fn set_result(self, res: QueryResult) { self.set_result(res); } diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index ad8249d22a4..3d4e69fdcf6 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,5 +1,3 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -mod pd; - -pub use pd::{RegionHeartbeatTask as PdRegionHeartbeatTask, Runner as PdRunner, Task as PdTask}; +pub mod pd; diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 9803039e392..80e12dc53c7 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -2,10 +2,7 @@ use std::{ fmt::{self, Display, Formatter}, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, + sync::{atomic::AtomicBool, Arc}, }; use causal_ts::CausalTsProviderImpl; @@ -204,10 +201,9 @@ where } } -pub mod requests { +mod requests { use kvproto::raft_cmdpb::{ AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, - SplitRequest, }; use raft::eraftpb::ConfChangeType; @@ -271,41 +267,6 @@ pub mod requests { req } - pub fn new_split_region_request( - split_key: Vec, - new_region_id: u64, - peer_ids: Vec, - right_derive: bool, - ) -> AdminRequest { - let mut req = AdminRequest::default(); - req.set_cmd_type(AdminCmdType::Split); - req.mut_split().set_split_key(split_key); - req.mut_split().set_new_region_id(new_region_id); - req.mut_split().set_new_peer_ids(peer_ids); - req.mut_split().set_right_derive(right_derive); - req - } - - pub fn new_batch_split_region_request( - split_keys: Vec>, - ids: Vec, - right_derive: bool, - ) -> AdminRequest { - let mut req = AdminRequest::default(); - req.set_cmd_type(AdminCmdType::BatchSplit); - req.mut_splits().set_right_derive(right_derive); - let mut requests = Vec::with_capacity(ids.len()); - for (mut id, key) in ids.into_iter().zip(split_keys) { - let mut split = SplitRequest::default(); - split.set_split_key(key); - split.set_new_region_id(id.get_new_region_id()); - split.set_new_peer_ids(id.take_new_peer_ids()); - requests.push(split); - } - req.mut_splits().set_requests(requests.into()); - req - } - pub fn new_transfer_leader_request( peer: metapb::Peer, peers: Vec, @@ -316,12 +277,4 @@ pub mod requests { req.mut_transfer_leader().set_peers(peers.into()); req } - - pub fn new_merge_request(merge: pdpb::Merge) -> AdminRequest { - let mut req = AdminRequest::default(); - req.set_cmd_type(AdminCmdType::PrepareMerge); - req.mut_prepare_merge() - .set_target(merge.get_target().to_owned()); - req - } } diff --git a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs b/components/raftstore-v2/src/worker/pd/region_heartbeat.rs index ad0293d0b6d..4096467087a 100644 --- a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/region_heartbeat.rs @@ -3,18 +3,9 @@ use std::time::Duration; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::{ - metapb, pdpb, - raft_cmdpb::{ - AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, - SplitRequest, - }, - raft_serverpb::RaftMessage, - replication_modepb::{RegionReplicationStatus, StoreDrAutoSyncStatus}, -}; +use kvproto::{metapb, pdpb}; use pd_client::{metrics::PD_HEARTBEAT_COUNTER_VEC, PdClient, RegionStat}; -use raft::eraftpb::ConfChangeType; -use slog::{debug, error, info}; +use slog::{debug, info}; use tikv_util::{store::QueryStats, time::UnixSecs}; use super::{requests::*, Runner}; diff --git a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs index 8f49e7f025f..2fbe378cff8 100644 --- a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs @@ -247,7 +247,6 @@ where // TODO: slow score - let router = self.router.clone(); let resp = self.pd_client.store_heartbeat(stats, None, None); let logger = self.logger.clone(); let f = async move { From 931cf7fd75c12900332a3a458f54ef7ef496c68b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Tue, 13 Dec 2022 15:04:52 +0100 Subject: [PATCH 0404/1149] *: Update security policy (#13929) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#13928 Signed-off-by: Daniël van Eeden Co-authored-by: Xiaoguang Sun Co-authored-by: Ti Chi Robot --- README.md | 2 +- security/SECURITY.md => SECURITY.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) rename security/SECURITY.md => SECURITY.md (98%) diff --git a/README.md b/README.md index 65bad6835ee..4b3e7e6c397 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ A third-party security auditing was performed by Cure53. See the full report [he To report a security vulnerability, please send an email to [TiKV-security](mailto:tikv-security@lists.cncf.io) group. -See [Security](./security/SECURITY.md) for the process and policy followed by the TiKV project. +See [Security](SECURITY.md) for the process and policy followed by the TiKV project. ## Communication diff --git a/security/SECURITY.md b/SECURITY.md similarity index 98% rename from security/SECURITY.md rename to SECURITY.md index 353a70f039f..30be9e0daf0 100644 --- a/security/SECURITY.md +++ b/SECURITY.md @@ -18,6 +18,8 @@ The following are the versions that we support for security updates | Version | Supported | | ------- | ------------------ | +| 6.x | :white_check_mark: | +| 5.x | :white_check_mark: | | 4.x | :white_check_mark: | | 3.x | :white_check_mark: | | 2.x | :white_check_mark: | @@ -94,4 +96,4 @@ IvCICV7zG1cyuM/Z2Y7/TJ+upvahP46nM3s3G15b8FYuTSmRN1Kp9+mBt2BHqOy1 ulx+VF4Lf9n3ydf593Nha9bMJ/rnSp01 =XbYK -----END PGP PUBLIC KEY BLOCK----- -``` \ No newline at end of file +``` From 5f2282594d356705abd39c42741ba902c1db6ede Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 14 Dec 2022 12:00:53 +0800 Subject: [PATCH 0405/1149] engine_trait: introduce flush state (#13925) ref tikv/tikv#12842 Flush state is used to trace persisted apply index. This is the first PR to remove WAL for raftstore v2. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 + components/engine_panic/src/raft_engine.rs | 40 +++- components/engine_rocks/src/engine.rs | 14 +- components/engine_rocks/src/event_listener.rs | 25 ++- components/engine_rocks/src/raft_engine.rs | 54 ++++- components/engine_traits/src/flush.rs | 202 ++++++++++++++++++ components/engine_traits/src/lib.rs | 2 + components/engine_traits/src/raft_engine.rs | 48 ++++- components/raft_log_engine/Cargo.toml | 4 + components/raft_log_engine/src/engine.rs | 200 ++++++++++++++++- components/raft_log_engine/src/lib.rs | 1 + components/raftstore-v2/src/operation/life.rs | 2 +- components/raftstore-v2/src/raft/storage.rs | 12 +- .../tests/integrations/test_life.rs | 6 +- .../raftstore/src/store/async_io/write.rs | 5 +- .../src/store/async_io/write_tests.rs | 12 +- 16 files changed, 579 insertions(+), 50 deletions(-) create mode 100644 components/engine_traits/src/flush.rs diff --git a/Cargo.lock b/Cargo.lock index 494846ccb0d..432d2ce3c26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4251,6 +4251,7 @@ dependencies = [ name = "raft_log_engine" version = "0.0.1" dependencies = [ + "codec", "encryption", "engine_traits", "file_system", @@ -4265,6 +4266,7 @@ dependencies = [ "serde_derive", "slog", "slog-global", + "tempfile", "tikv_util", "time", "tracker", diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 603eb118c5c..f5e0c424db0 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -47,11 +47,23 @@ impl RaftEngineReadOnly for PanicEngine { panic!() } - fn get_region_state(&self, raft_group_id: u64) -> Result> { + fn get_region_state( + &self, + raft_group_id: u64, + apply_index: u64, + ) -> Result> { + panic!() + } + + fn get_apply_state( + &self, + raft_group_id: u64, + apply_index: u64, + ) -> Result> { panic!() } - fn get_apply_state(&self, raft_group_id: u64) -> Result> { + fn get_flushed_index(&self, raft_group_id: u64, cf: &str) -> Result> { panic!() } @@ -186,11 +198,31 @@ impl RaftLogBatch for PanicWriteBatch { panic!() } - fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()> { + fn put_region_state( + &mut self, + raft_group_id: u64, + apply_index: u64, + state: &RegionLocalState, + ) -> Result<()> { + panic!() + } + + fn put_apply_state( + &mut self, + raft_group_id: u64, + apply_index: u64, + state: &RaftApplyState, + ) -> Result<()> { panic!() } - fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { + fn put_flushed_index( + &mut self, + raft_group_id: u64, + cf: &str, + tablet_index: u64, + apply_index: u64, + ) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 0c37120e7fc..70f6562e94b 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -2,7 +2,9 @@ use std::{any::Any, sync::Arc}; -use engine_traits::{IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable}; +use engine_traits::{ + FlushState, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, +}; use rocksdb::{DBIterator, Writable, DB}; use crate::{ @@ -24,6 +26,7 @@ use crate::{ pub struct RocksEngine { db: Arc, support_multi_batch_write: bool, + flush_state: Option>, } impl RocksEngine { @@ -35,6 +38,7 @@ impl RocksEngine { RocksEngine { db: db.clone(), support_multi_batch_write: db.get_db_options().is_enable_multi_batch_write(), + flush_state: None, } } @@ -49,6 +53,14 @@ impl RocksEngine { pub fn support_multi_batch_write(&self) -> bool { self.support_multi_batch_write } + + pub fn set_flush_state(&mut self, flush_state: Arc) { + self.flush_state = Some(flush_state); + } + + pub fn flush_state(&self) -> Option> { + self.flush_state.clone() + } } impl KvEngine for RocksEngine { diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index ad7a9de455f..8bf3035bc55 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -1,10 +1,11 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use engine_traits::{PersistenceListener, RaftEngine}; use file_system::{get_io_type, set_io_type, IoType}; use regex::Regex; use rocksdb::{ - CompactionJobInfo, DBBackgroundErrorReason, FlushJobInfo, IngestionInfo, MutableStatus, - SubcompactionJobInfo, WriteStallInfo, + CompactionJobInfo, DBBackgroundErrorReason, FlushJobInfo, IngestionInfo, MemTableInfo, + MutableStatus, SubcompactionJobInfo, WriteStallInfo, }; use tikv_util::{error, metrics::CRITICAL_ERROR, set_panic_mark, warn, worker::Scheduler}; @@ -178,6 +179,26 @@ fn resolve_sst_filename_from_err(err: &str) -> Option { Some(filename) } +pub struct RocksPersistenceListener(PersistenceListener); + +impl RocksPersistenceListener { + pub fn new(listener: PersistenceListener) -> RocksPersistenceListener { + RocksPersistenceListener(listener) + } +} + +impl rocksdb::EventListener for RocksPersistenceListener { + fn on_memtable_sealed(&self, info: &MemTableInfo) { + self.0 + .on_memtable_sealed(info.cf_name().to_string(), info.first_seqno()); + } + + fn on_flush_completed(&self, job: &FlushJobInfo) { + self.0 + .on_flush_completed(job.cf_name(), job.smallest_seqno()); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 79cd8350519..9095ef27dfd 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -144,14 +144,26 @@ impl RaftEngineReadOnly for RocksEngine { self.get_msg_cf(CF_DEFAULT, keys::PREPARE_BOOTSTRAP_KEY) } - fn get_region_state(&self, raft_group_id: u64) -> Result> { - let key = keys::region_state_key(raft_group_id); - self.get_msg_cf(CF_DEFAULT, &key) + // Following methods are used by raftstore v2 only, which always use raft log + // engine. + fn get_region_state( + &self, + _raft_group_id: u64, + _apply_index: u64, + ) -> Result> { + panic!() } - fn get_apply_state(&self, raft_group_id: u64) -> Result> { - let key = keys::apply_state_key(raft_group_id); - self.get_msg_cf(CF_DEFAULT, &key) + fn get_apply_state( + &self, + _raft_group_id: u64, + _apply_index: u64, + ) -> Result> { + panic!() + } + + fn get_flushed_index(&self, _raft_group_id: u64, _cf: &str) -> Result> { + panic!() } fn get_recover_state(&self) -> Result> { @@ -405,12 +417,34 @@ impl RaftLogBatch for RocksWriteBatchVec { self.delete(keys::PREPARE_BOOTSTRAP_KEY) } - fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()> { - self.put_msg(&keys::region_state_key(raft_group_id), state) + // Following methods are used by raftstore v2 only, which always use raft log + // engine. + fn put_region_state( + &mut self, + _raft_group_id: u64, + _apply_index: u64, + _state: &RegionLocalState, + ) -> Result<()> { + panic!() + } + + fn put_apply_state( + &mut self, + _raft_group_id: u64, + _apply_index: u64, + _state: &RaftApplyState, + ) -> Result<()> { + panic!() } - fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { - self.put_msg(&keys::apply_state_key(raft_group_id), state) + fn put_flushed_index( + &mut self, + _raft_group_id: u64, + _cf: &str, + _tablet_index: u64, + _apply_index: u64, + ) -> Result<()> { + panic!() } fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs new file mode 100644 index 00000000000..9de5369ab54 --- /dev/null +++ b/components/engine_traits/src/flush.rs @@ -0,0 +1,202 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! A helper class to detect flush event and trace apply index. +//! +//! The whole idea is when all CFs have flushed to disk, then the apply index +//! should be able to be advanced to the latest. The implementations depends on +//! the assumption that memtable/write buffer is frozen one by one and flushed +//! one by one. +//! +//! Because apply index can be arbitrary value after restart, so apply related +//! states like `RaftApplyState` and `RegionLocalState` are mapped to index. +//! Once apply index is confirmed, the latest states before apply index should +//! be used as the start state. + +use std::{ + mem, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, +}; + +use kvproto::raft_serverpb::{RaftApplyState, RegionLocalState}; +use tikv_util::Either; + +use crate::{RaftEngine, RaftLogBatch}; + +#[derive(Debug)] +enum StateChange { + ApplyState(RaftApplyState), + RegionState(RegionLocalState), +} + +/// States that is related to apply progress. +#[derive(Default, Debug)] +struct StateChanges { + /// apply index, state change + changes: Vec<(u64, StateChange)>, +} + +struct FlushProgress { + cf: String, + id: u64, + apply_index: u64, + state_changes: StateChanges, +} + +/// A share state between raftstore and underlying engine. +/// +/// raftstore will update state changes and corresponding apply index, when +/// flush, `PersistenceListener` will query states related to the memtable +/// and persist the relation to raft engine. +#[derive(Default, Debug)] +pub struct FlushState { + applied_index: AtomicU64, + changes: Mutex, +} + +impl FlushState { + /// Set the latest applied index. + #[inline] + pub fn set_applied_index(&self, index: u64) { + self.applied_index.store(index, Ordering::Release); + } + + /// Query the applied index. + #[inline] + pub fn applied_index(&self) -> u64 { + self.applied_index.load(Ordering::Acquire) + } + + /// Record an apply state change. + /// + /// This can be triggered by admin command like compact log. General log + /// apply will not trigger the change, instead they are recorded by + /// `set_applied_index`. + #[inline] + pub fn update_apply_state(&self, index: u64, state: RaftApplyState) { + self.changes + .lock() + .unwrap() + .changes + .push((index, StateChange::ApplyState(state))); + } + + /// Record a region state change. + /// + /// This can be triggered by admin command like split/merge. + #[inline] + pub fn update_region_state(&self, index: u64, state: RegionLocalState) { + self.changes + .lock() + .unwrap() + .changes + .push((index, StateChange::RegionState(state))); + } + + /// Check if there is any state change. + #[inline] + pub fn is_empty(&self) -> bool { + self.changes.lock().unwrap().changes.is_empty() + } + + /// Get the last changed state. + #[inline] + pub fn last_state(&self) -> Option<(u64, Either)> { + let changes = self.changes.lock().unwrap(); + let (index, state) = changes.changes.last()?; + let state = match state { + StateChange::ApplyState(state) => Either::Left(state.clone()), + StateChange::RegionState(state) => Either::Right(state.clone()), + }; + Some((*index, state)) + } +} + +/// A flush listener that maps memtable to apply index and persist the relation +/// to raft engine. +pub struct PersistenceListener { + region_id: u64, + tablet_index: u64, + state: Arc, + progress: Mutex>, + raft: ER, +} + +impl PersistenceListener { + pub fn new(region_id: u64, tablet_index: u64, state: Arc, raft: ER) -> Self { + Self { + region_id, + tablet_index, + state, + progress: Mutex::new(Vec::new()), + raft, + } + } +} + +impl PersistenceListener { + pub fn flush_state(&self) -> &Arc { + &self.state + } + + /// Called when memtable is frozen. + /// + /// `id` should be unique between memtables, which is used to identify + /// memtable in the flushed event. + pub fn on_memtable_sealed(&self, cf: String, id: u64) { + // The correctness relies on the assumption that there will be only one + // thread writting to the DB and increasing apply index. + let mut state_changes = self.state.changes.lock().unwrap(); + // Query within lock so it's correct even in manually flush. + let apply_index = self.state.applied_index.load(Ordering::SeqCst); + let changes = mem::take(&mut *state_changes); + drop(state_changes); + self.progress.lock().unwrap().push(FlushProgress { + cf, + id, + apply_index, + state_changes: changes, + }); + } + + /// Called a memtable finished flushing. + pub fn on_flush_completed(&self, cf: &str, id: u64) { + // Maybe we should hook the compaction to avoid the file is compacted before + // being recorded. + let pr = { + let mut prs = self.progress.lock().unwrap(); + let pos = prs + .iter() + .position(|pr| pr.cf == cf && pr.id == id) + .unwrap(); + prs.swap_remove(pos) + }; + let mut batch = self.raft.log_batch(1); + // TODO: It's possible that flush succeeds but fails to call + // `on_flush_completed` before exit. In this case the flushed data will + // be replayed again after restarted. To solve the problem, we need to + // (1) persist flushed file numbers in `on_flush_begin` and (2) check + // the file number in `on_compaction_begin`. After restart, (3) check if the + // file exists. If (1) && ((2) || (3)), then we don't need to replay the data. + for (index, change) in pr.state_changes.changes { + match &change { + StateChange::ApplyState(state) => { + batch.put_apply_state(self.region_id, index, state).unwrap(); + } + StateChange::RegionState(state) => { + batch + .put_region_state(self.region_id, index, state) + .unwrap(); + } + } + } + if pr.apply_index != 0 { + batch + .put_flushed_index(self.region_id, cf, self.tablet_index, pr.apply_index) + .unwrap(); + } + self.raft.consume(&mut batch, true).unwrap(); + } +} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 6a140230fd5..db95f5621e0 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -277,6 +277,8 @@ mod engine; pub use crate::engine::*; mod file_system; pub use crate::file_system::*; +mod flush; +pub use flush::*; mod import; pub use import::*; mod misc; diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 0c5e0f49854..8b29e07707a 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -19,8 +19,20 @@ pub trait RaftEngineReadOnly: Sync + Send + 'static { fn get_prepare_bootstrap_region(&self) -> Result>; fn get_raft_state(&self, raft_group_id: u64) -> Result>; - fn get_region_state(&self, raft_group_id: u64) -> Result>; - fn get_apply_state(&self, raft_group_id: u64) -> Result>; + /// Get the latest region state not after the apply index. + fn get_region_state( + &self, + raft_group_id: u64, + apply_index: u64, + ) -> Result>; + /// Get the latest apply state not after the apply index. + fn get_apply_state( + &self, + raft_group_id: u64, + apply_index: u64, + ) -> Result>; + /// Get the flushed index of the given CF. + fn get_flushed_index(&self, raft_group_id: u64, cf: &str) -> Result>; fn get_recover_state(&self) -> Result>; fn get_entry(&self, raft_group_id: u64, index: u64) -> Result>; @@ -157,8 +169,36 @@ pub trait RaftLogBatch: Send { fn remove_prepare_bootstrap_region(&mut self) -> Result<()>; fn put_raft_state(&mut self, raft_group_id: u64, state: &RaftLocalState) -> Result<()>; - fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()>; - fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()>; + fn put_region_state( + &mut self, + raft_group_id: u64, + apply_index: u64, + state: &RegionLocalState, + ) -> Result<()>; + fn put_apply_state( + &mut self, + raft_group_id: u64, + apply_index: u64, + state: &RaftApplyState, + ) -> Result<()>; + + /// Record the flushed apply index. + /// + /// There are two types of apply index: + /// 1. Normal apply index that only related to single tablet. These apply + /// indexes are recorded using its own CF. + /// 2. Apply index that can affect other tablets, like split, merge. These + /// apply indexes are recorded using special Raft CF. + /// + /// Because a peer may have multiple tablets (only one is latest), we use + /// `tablet_index` to avoid conflicts. + fn put_flushed_index( + &mut self, + raft_group_id: u64, + cf: &str, + tablet_index: u64, + apply_index: u64, + ) -> Result<()>; /// Indicate whether region states should be recovered from raftdb and /// replay raft logs. diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 0ee185fd365..8a336177706 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] encryption = { workspace = true } engine_traits = { workspace = true } +codec = { workspace = true } file_system = { workspace = true } kvproto = { workspace = true } lazy_static = "1.4.0" @@ -22,3 +23,6 @@ slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global tikv_util = { workspace = true } time = "0.1" tracker = { workspace = true } + +[dev-dependencies] +tempfile = "3.0" diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 587f31bae93..7be02e8b6e2 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -7,11 +7,12 @@ use std::{ sync::Arc, }; +use codec::number::NumberCodec; use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ CacheStats, EncryptionKeyManager, EncryptionMethod, PerfContextExt, PerfContextKind, PerfLevel, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, - RaftLogGcTask, Result, + RaftLogGcTask, Result, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use file_system::{IoOp, IoRateLimiter, IoType}; use kvproto::{ @@ -290,6 +291,36 @@ impl FileSystem for ManagedFileSystem { } } +/// Convert a cf to id for encoding. +fn cf_to_id(cf: &str) -> u8 { + match cf { + CF_DEFAULT => 0, + CF_LOCK => 1, + CF_WRITE => 2, + CF_RAFT => 3, + _ => panic!("unrecognized cf {}", cf), + } +} + +/// Encode a key in the format `{prefix}{num}`. +fn encode_key(prefix: &'static [u8], num: u64) -> [u8; 9] { + debug_assert_eq!(prefix.len(), 1); + let mut buf = [0; 9]; + buf[..prefix.len()].copy_from_slice(prefix); + NumberCodec::encode_u64(&mut buf[prefix.len()..], num); + buf +} + +/// Encode a flush key in the format `{flush key prefix}{cf_id}{tablet_index}`. +fn encode_flushed_key(cf: &str, tablet_index: u64) -> [u8; 10] { + debug_assert_eq!(FLUSH_STATE_KEY.len(), 1); + let mut buf = [0; 10]; + buf[..FLUSH_STATE_KEY.len()].copy_from_slice(FLUSH_STATE_KEY); + buf[FLUSH_STATE_KEY.len()] = cf_to_id(cf); + NumberCodec::encode_u64(&mut buf[FLUSH_STATE_KEY.len() + 1..], tablet_index); + buf +} + #[derive(Clone)] pub struct RaftLogEngine(Arc>); @@ -348,6 +379,7 @@ const PREPARE_BOOTSTRAP_REGION_KEY: &[u8] = &[0x02]; const REGION_STATE_KEY: &[u8] = &[0x03]; const APPLY_STATE_KEY: &[u8] = &[0x04]; const RECOVER_STATE_KEY: &[u8] = &[0x05]; +const FLUSH_STATE_KEY: &[u8] = &[0x06]; impl RaftLogBatchTrait for RaftLogBatch { fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { @@ -401,18 +433,44 @@ impl RaftLogBatchTrait for RaftLogBatch { Ok(()) } - fn put_region_state(&mut self, raft_group_id: u64, state: &RegionLocalState) -> Result<()> { + fn put_region_state( + &mut self, + raft_group_id: u64, + apply_index: u64, + state: &RegionLocalState, + ) -> Result<()> { + let key = encode_key(REGION_STATE_KEY, apply_index); self.0 - .put_message(raft_group_id, REGION_STATE_KEY.to_vec(), state) + .put_message(raft_group_id, key.to_vec(), state) .map_err(transfer_error) } - fn put_apply_state(&mut self, raft_group_id: u64, state: &RaftApplyState) -> Result<()> { + fn put_apply_state( + &mut self, + raft_group_id: u64, + apply_index: u64, + state: &RaftApplyState, + ) -> Result<()> { + let key = encode_key(APPLY_STATE_KEY, apply_index); self.0 - .put_message(raft_group_id, APPLY_STATE_KEY.to_vec(), state) + .put_message(raft_group_id, key.to_vec(), state) .map_err(transfer_error) } + fn put_flushed_index( + &mut self, + raft_group_id: u64, + cf: &str, + tablet_index: u64, + apply_index: u64, + ) -> Result<()> { + let key = encode_flushed_key(cf, tablet_index); + let mut value = vec![0; 8]; + NumberCodec::encode_u64(&mut value, apply_index); + self.0.put(raft_group_id, key.to_vec(), value); + Ok(()) + } + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { self.0 .put_message(STORE_STATE_ID, RECOVER_STATE_KEY.to_vec(), state) @@ -471,16 +529,72 @@ impl RaftEngineReadOnly for RaftLogEngine { .map_err(transfer_error) } - fn get_region_state(&self, raft_group_id: u64) -> Result> { + fn get_region_state( + &self, + raft_group_id: u64, + apply_index: u64, + ) -> Result> { + let mut state = None; self.0 - .get_message(raft_group_id, REGION_STATE_KEY) - .map_err(transfer_error) + .scan_messages( + raft_group_id, + Some(REGION_STATE_KEY), + Some(APPLY_STATE_KEY), + true, + |key, value| { + let index = NumberCodec::decode_u64(&key[REGION_STATE_KEY.len()..]); + if index > apply_index { + true + } else { + state = Some(value); + false + } + }, + ) + .map_err(transfer_error)?; + Ok(state) } - fn get_apply_state(&self, raft_group_id: u64) -> Result> { + fn get_apply_state( + &self, + raft_group_id: u64, + apply_index: u64, + ) -> Result> { + let mut state = None; self.0 - .get_message(raft_group_id, APPLY_STATE_KEY) - .map_err(transfer_error) + .scan_messages( + raft_group_id, + Some(APPLY_STATE_KEY), + Some(RECOVER_STATE_KEY), + true, + |key, value| { + let index = NumberCodec::decode_u64(&key[REGION_STATE_KEY.len()..]); + if index > apply_index { + true + } else { + state = Some(value); + false + } + }, + ) + .map_err(transfer_error)?; + Ok(state) + } + + fn get_flushed_index(&self, raft_group_id: u64, cf: &str) -> Result> { + let mut start = [0; 2]; + start[..FLUSH_STATE_KEY.len()].copy_from_slice(FLUSH_STATE_KEY); + start[FLUSH_STATE_KEY.len()] = cf_to_id(cf); + let mut end = start; + end[FLUSH_STATE_KEY.len()] += 1; + let mut index = None; + self.0 + .scan_raw_messages(raft_group_id, Some(&start), Some(&end), true, |_, v| { + index = Some(NumberCodec::decode_u64(v)); + false + }) + .map_err(transfer_error)?; + Ok(index) } fn get_recover_state(&self) -> Result> { @@ -624,3 +738,67 @@ fn transfer_error(e: RaftEngineError) -> engine_traits::Error { } } } + +#[cfg(test)] +mod tests { + use std::assert_matches::assert_matches; + + use engine_traits::ALL_CFS; + + use super::*; + + #[test] + fn test_apply_related_states() { + let dir = tempfile::tempdir().unwrap(); + let cfg = RaftEngineConfig { + dir: dir.path().to_str().unwrap().to_owned(), + ..Default::default() + }; + let engine = RaftLogEngine::new(cfg, None, None).unwrap(); + assert_matches!(engine.get_region_state(2, u64::MAX), Ok(None)); + assert_matches!(engine.get_apply_state(2, u64::MAX), Ok(None)); + for cf in ALL_CFS { + assert_matches!(engine.get_flushed_index(2, cf), Ok(None)); + } + + let mut wb = engine.log_batch(10); + let mut region_state = RegionLocalState::default(); + region_state.mut_region().set_id(3); + wb.put_region_state(2, 1, ®ion_state).unwrap(); + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(3); + wb.put_apply_state(2, 3, &apply_state).unwrap(); + for cf in ALL_CFS.iter().take(2) { + wb.put_flushed_index(2, cf, 5, 4).unwrap(); + } + engine.consume(&mut wb, false).unwrap(); + + for cf in ALL_CFS.iter().take(2) { + assert_matches!(engine.get_flushed_index(2, cf), Ok(Some(4))); + } + for cf in ALL_CFS.iter().skip(2) { + assert_matches!(engine.get_flushed_index(2, cf), Ok(None)); + } + + let mut region_state2 = region_state.clone(); + region_state2.mut_region().set_id(5); + wb.put_region_state(2, 4, ®ion_state2).unwrap(); + let mut apply_state2 = apply_state.clone(); + apply_state2.set_applied_index(5); + wb.put_apply_state(2, 5, &apply_state2).unwrap(); + for cf in ALL_CFS { + wb.put_flushed_index(2, cf, 6, 5).unwrap(); + } + engine.consume(&mut wb, false).unwrap(); + + assert_matches!(engine.get_region_state(2, 0), Ok(None)); + assert_matches!(engine.get_region_state(2, 1), Ok(Some(s)) if s == region_state); + assert_matches!(engine.get_region_state(2, 4), Ok(Some(s)) if s == region_state2); + assert_matches!(engine.get_apply_state(2, 0), Ok(None)); + assert_matches!(engine.get_apply_state(2, 3), Ok(Some(s)) if s == apply_state); + assert_matches!(engine.get_apply_state(2, 5), Ok(Some(s)) if s == apply_state2); + for cf in ALL_CFS { + assert_matches!(engine.get_flushed_index(2, cf), Ok(Some(5))); + } + } +} diff --git a/components/raft_log_engine/src/lib.rs b/components/raft_log_engine/src/lib.rs index 8eda4e5ae24..25899ddf2bb 100644 --- a/components/raft_log_engine/src/lib.rs +++ b/components/raft_log_engine/src/lib.rs @@ -16,6 +16,7 @@ //! Please read the engine_trait crate docs before hacking. #![cfg_attr(test, feature(test))] +#![feature(assert_matches)] #[macro_use] extern crate tikv_util; diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 60889908aa0..3e459340b0e 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -175,7 +175,7 @@ impl Store { return; } let from_epoch = msg.get_region_epoch(); - let local_state = match ctx.engine.get_region_state(region_id) { + let local_state = match ctx.engine.get_region_state(region_id, 0) { Ok(s) => s, Err(e) => { error!(self.logger(), "failed to get region state"; "region_id" => region_id, "err" => ?e); diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 49a0f547e1a..f3678767693 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -29,7 +29,7 @@ pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Resul let mut state = RegionLocalState::default(); state.set_region(region); state.set_tablet_index(RAFT_INIT_LOG_INDEX); - wb.put_region_state(region_id, &state)?; + wb.put_region_state(region_id, 0, &state)?; let mut apply_state = RaftApplyState::default(); apply_state.set_applied_index(RAFT_INIT_LOG_INDEX); @@ -39,7 +39,7 @@ pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Resul apply_state .mut_truncated_state() .set_term(RAFT_INIT_LOG_TERM); - wb.put_apply_state(region_id, &apply_state)?; + wb.put_apply_state(region_id, 0, &apply_state)?; let mut raft_state = RaftLocalState::default(); raft_state.set_last_index(RAFT_INIT_LOG_INDEX); @@ -158,7 +158,7 @@ impl Storage { read_scheduler: Scheduler>, logger: &Logger, ) -> Result>> { - let region_state = match engine.get_region_state(region_id) { + let region_state = match engine.get_region_state(region_id, 0) { Ok(Some(s)) => s, res => { return Err(box_err!( @@ -180,7 +180,7 @@ impl Storage { } }; - let apply_state = match engine.get_apply_state(region_id) { + let apply_state = match engine.get_apply_state(region_id, 0) { Ok(Some(s)) => s, res => { return Err(box_err!("failed to get apply state: {:?}", res)); @@ -439,7 +439,7 @@ mod tests { assert!(!wb.is_empty()); raft_engine.consume(&mut wb, true).unwrap(); - let local_state = raft_engine.get_region_state(4).unwrap().unwrap(); + let local_state = raft_engine.get_region_state(4, 0).unwrap().unwrap(); assert_eq!(local_state.get_state(), PeerState::Normal); assert_eq!(*local_state.get_region(), region); assert_eq!(local_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); @@ -450,7 +450,7 @@ mod tests { assert_eq!(hs.get_term(), RAFT_INIT_LOG_TERM); assert_eq!(hs.get_commit(), RAFT_INIT_LOG_INDEX); - let apply_state = raft_engine.get_apply_state(4).unwrap().unwrap(); + let apply_state = raft_engine.get_apply_state(4, 0).unwrap().unwrap(); assert_eq!(apply_state.get_applied_index(), RAFT_INIT_LOG_INDEX); let ts = apply_state.get_truncated_state(); assert_eq!(ts.get_index(), RAFT_INIT_LOG_INDEX); diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index 805cda15471..5f44b2d5813 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -49,8 +49,8 @@ fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb raft_engine.get_all_entries_to(region_id, &mut buf).unwrap(); assert!(buf.is_empty(), "{:?}", buf); assert_matches!(raft_engine.get_raft_state(region_id), Ok(None)); - assert_matches!(raft_engine.get_apply_state(region_id), Ok(None)); - let region_state = raft_engine.get_region_state(region_id).unwrap().unwrap(); + assert_matches!(raft_engine.get_apply_state(region_id, 0), Ok(None)); + let region_state = raft_engine.get_region_state(region_id, 0).unwrap().unwrap(); assert_matches!(region_state.get_state(), PeerState::Tombstone); assert!( region_state.get_region().get_peers().contains(peer), @@ -121,7 +121,7 @@ fn test_life_by_message() { let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; raft_engine.get_raft_state(test_region_id).unwrap().unwrap(); raft_engine - .get_apply_state(test_region_id) + .get_apply_state(test_region_id, 0) .unwrap() .unwrap(); diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index d17223e5acf..14fbd192d0d 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -416,10 +416,11 @@ where ) .unwrap(); } - wb.put_region_state(region_id, ®ion_state).unwrap(); + wb.put_region_state(region_id, 0, ®ion_state).unwrap(); } if !tombstone { - wb.put_apply_state(region_id, &state.apply_state).unwrap(); + wb.put_apply_state(region_id, 0, &state.apply_state) + .unwrap(); } } } diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 727502b6ca4..625e9f3c4a5 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -431,14 +431,14 @@ fn test_worker_split_raft_wb() { ], ); assert_eq!( - engines.raft.get_apply_state(region_1).unwrap(), + engines.raft.get_apply_state(region_1, 0).unwrap(), Some(RaftApplyState { applied_index: 25, ..Default::default() }) ); assert_eq!( - engines.raft.get_apply_state(region_2).unwrap(), + engines.raft.get_apply_state(region_2, 0).unwrap(), Some(RaftApplyState { applied_index: 16, ..Default::default() @@ -634,18 +634,18 @@ fn test_basic_flow_with_states() { ], ); assert_eq!( - engines.raft.get_apply_state(region_1).unwrap().unwrap(), + engines.raft.get_apply_state(region_1, 0).unwrap().unwrap(), apply_state_3 ); assert_eq!( - engines.raft.get_apply_state(region_2).unwrap().unwrap(), + engines.raft.get_apply_state(region_2, 0).unwrap().unwrap(), apply_state_2 ); assert_eq!( - engines.raft.get_region_state(region_1).unwrap().unwrap(), + engines.raft.get_region_state(region_1, 0).unwrap().unwrap(), region_state_1 ); - assert_eq!(engines.raft.get_region_state(region_2).unwrap(), None); + assert_eq!(engines.raft.get_region_state(region_2, 0).unwrap(), None); must_have_same_count_msg(6, &t.msg_rx); From cfdb31fe3679687a698490b1e783570672a0238d Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 14 Dec 2022 13:30:52 +0800 Subject: [PATCH 0406/1149] async-io: make v2 a log batch (#13935) ref tikv/tikv#12842 We now map states with apply index, so there is nothing to merge. Use a log batch for better flexibility to introduce more extra writes. Signed-off-by: Jay Lee --- components/raftstore-v2/src/operation/life.rs | 23 ++- .../raftstore-v2/src/operation/ready/mod.rs | 16 +- components/raftstore-v2/src/raft/peer.rs | 5 + components/raftstore-v2/src/raft/storage.rs | 6 +- .../tests/integrations/test_life.rs | 7 +- .../raftstore/src/store/async_io/write.rs | 157 +++++------------- .../src/store/async_io/write_tests.rs | 58 +++---- components/raftstore/src/store/mod.rs | 5 +- 8 files changed, 111 insertions(+), 166 deletions(-) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 3e459340b0e..d9f706c32a1 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -14,12 +14,12 @@ use std::cmp; use batch_system::BasicMailbox; use crossbeam::channel::{SendError, TrySendError}; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ metapb::Region, raft_serverpb::{PeerState, RaftMessage}, }; -use raftstore::store::{util, ExtraStates, WriteTask}; +use raftstore::store::{util, WriteTask}; use slog::{debug, error, info, warn}; use tikv_util::store::find_peer; @@ -175,7 +175,7 @@ impl Store { return; } let from_epoch = msg.get_region_epoch(); - let local_state = match ctx.engine.get_region_state(region_id, 0) { + let local_state = match ctx.engine.get_region_state(region_id, u64::MAX) { Ok(s) => s, Err(e) => { error!(self.logger(), "failed to get region state"; "region_id" => region_id, "err" => ?e); @@ -304,13 +304,20 @@ impl Peer { Some((f, l)) => Some((cmp::min(first_index, f), cmp::max(last_index, l))), }; } - let mut extra_states = ExtraStates::new(entry_storage.apply_state().clone()); + let raft_engine = self.entry_storage().raft_engine(); let mut region_state = self.storage().region_state().clone(); + let region_id = region_state.get_region().get_id(); + let lb = write_task + .extra_write + .ensure_v2(|| raft_engine.log_batch(2)); + // We only use raft-log-engine for v2, first index is not important. + let raft_state = self.entry_storage().raft_state(); + raft_engine.clean(region_id, 0, raft_state, lb).unwrap(); // Write worker will do the clean up when meeting tombstone state. region_state.set_state(PeerState::Tombstone); - extra_states.set_region_state(region_state); - extra_states.set_raft_state(entry_storage.raft_state().clone()); - write_task.extra_write.set_v2(extra_states); + let applied_index = self.entry_storage().applied_index(); + lb.put_region_state(region_id, applied_index, ®ion_state) + .unwrap(); self.destroy_progress_mut().start(); } @@ -325,6 +332,6 @@ impl Peer { // new peer. Ignore error as it's just a best effort. let _ = ctx.router.send_raft_message(msg); } - // TODO: close apply mailbox. + self.clear_apply_scheduler(); } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index e9046af2831..47f6523cc82 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -22,12 +22,12 @@ mod snapshot; use std::{cmp, time::Instant}; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use error_code::ErrorCodeExt; use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; -use raftstore::store::{util, ExtraStates, FetchedLogs, ReadProgress, Transport, WriteTask}; +use raftstore::store::{util, FetchedLogs, ReadProgress, Transport, WriteTask}; use slog::{debug, error, trace, warn}; use tikv_util::time::{duration_to_sec, monotonic_raw_now}; @@ -555,9 +555,15 @@ impl Storage { write_task.raft_state = Some(entry_storage.raft_state().clone()); } if !ever_persisted { - let mut extra_states = ExtraStates::new(self.apply_state().clone()); - extra_states.set_region_state(self.region_state().clone()); - write_task.extra_write.set_v2(extra_states); + let region_id = self.region().get_id(); + let raft_engine = self.entry_storage().raft_engine(); + let lb = write_task + .extra_write + .ensure_v2(|| raft_engine.log_batch(3)); + lb.put_apply_state(region_id, 0, self.apply_state()) + .unwrap(); + lb.put_region_state(region_id, 0, self.region_state()) + .unwrap(); self.set_ever_persisted(); } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index f211313e1b5..21795eb3293 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -508,6 +508,11 @@ impl Peer { self.apply_scheduler = Some(apply_scheduler); } + #[inline] + pub fn clear_apply_scheduler(&mut self) { + self.apply_scheduler.take(); + } + /// Whether the snapshot is handling. /// See the comments of `check_snap_status` for more details. #[inline] diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index f3678767693..889674c514c 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -158,7 +158,7 @@ impl Storage { read_scheduler: Scheduler>, logger: &Logger, ) -> Result>> { - let region_state = match engine.get_region_state(region_id, 0) { + let region_state = match engine.get_region_state(region_id, u64::MAX) { Ok(Some(s)) => s, res => { return Err(box_err!( @@ -180,7 +180,7 @@ impl Storage { } }; - let apply_state = match engine.get_apply_state(region_id, 0) { + let apply_state = match engine.get_apply_state(region_id, u64::MAX) { Ok(Some(s)) => s, res => { return Err(box_err!("failed to get apply state: {:?}", res)); @@ -450,7 +450,7 @@ mod tests { assert_eq!(hs.get_term(), RAFT_INIT_LOG_TERM); assert_eq!(hs.get_commit(), RAFT_INIT_LOG_INDEX); - let apply_state = raft_engine.get_apply_state(4, 0).unwrap().unwrap(); + let apply_state = raft_engine.get_apply_state(4, u64::MAX).unwrap().unwrap(); assert_eq!(apply_state.get_applied_index(), RAFT_INIT_LOG_INDEX); let ts = apply_state.get_truncated_state(); assert_eq!(ts.get_index(), RAFT_INIT_LOG_INDEX); diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index 5f44b2d5813..a2ae0bbb9f8 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -49,8 +49,11 @@ fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb raft_engine.get_all_entries_to(region_id, &mut buf).unwrap(); assert!(buf.is_empty(), "{:?}", buf); assert_matches!(raft_engine.get_raft_state(region_id), Ok(None)); - assert_matches!(raft_engine.get_apply_state(region_id, 0), Ok(None)); - let region_state = raft_engine.get_region_state(region_id, 0).unwrap().unwrap(); + assert_matches!(raft_engine.get_apply_state(region_id, u64::MAX), Ok(None)); + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); assert_matches!(region_state.get_state(), PeerState::Tombstone); assert!( region_state.get_region().get_peers().contains(peer), diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 14fbd192d0d..b69b3484e0c 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -20,9 +20,7 @@ use engine_traits::{ }; use error_code::ErrorCodeExt; use fail::fail_point; -use kvproto::raft_serverpb::{ - PeerState, RaftApplyState, RaftLocalState, RaftMessage, RegionLocalState, -}; +use kvproto::raft_serverpb::{RaftLocalState, RaftMessage}; use protobuf::Message; use raft::eraftpb::Entry; use tikv_util::{ @@ -39,7 +37,6 @@ use super::write_router::WriteSenders; use crate::{ store::{ config::Config, - entry_storage::first_index, fsm::RaftRouter, local_metrics::{RaftSendMessageMetrics, StoreWriteMetrics, TimeTracker}, metrics::*, @@ -89,22 +86,24 @@ where /// /// For now, applying snapshot needs to persist some extra states. For v1, /// these states are written to KvEngine. For v2, they are written to -/// RaftEngine. +/// RaftEngine. Although in v2 these states are also written to raft engine, +/// but we have to use `ExtraState` as they should be written as the last +/// updates. // TODO: perhaps we should always pass states instead of a write batch even // for v1. -pub enum ExtraWrite { +pub enum ExtraWrite { None, V1(W), - V2(ExtraStates), + V2(L), } -impl ExtraWrite { +impl ExtraWrite { #[inline] pub fn is_empty(&self) -> bool { match self { ExtraWrite::None => true, ExtraWrite::V1(w) => w.is_empty(), - _ => false, + ExtraWrite::V2(l) => l.is_empty(), } } @@ -113,7 +112,7 @@ impl ExtraWrite { match self { ExtraWrite::None => 0, ExtraWrite::V1(w) => w.data_size(), - ExtraWrite::V2(m) => mem::size_of_val(m), + ExtraWrite::V2(l) => l.persist_size(), } } @@ -140,18 +139,22 @@ impl ExtraWrite { } #[inline] - pub fn set_v2(&mut self, extra_states: ExtraStates) { - if let ExtraWrite::V1(_) = self { + pub fn ensure_v2(&mut self, log_batch: impl FnOnce() -> L) -> &mut L { + if let ExtraWrite::None = self { + *self = ExtraWrite::V2(log_batch()); + } else if let ExtraWrite::V1(_) = self { unreachable!("v1 and v2 are mixed used"); - } else { - *self = ExtraWrite::V2(extra_states); + } + match self { + ExtraWrite::V2(l) => l, + _ => unreachable!(), } } #[inline] - pub fn v2_mut(&mut self) -> Option<&mut ExtraStates> { - if let ExtraWrite::V2(m) = self { - Some(m) + pub fn v2_mut(&mut self) -> Option<&mut L> { + if let ExtraWrite::V2(l) = self { + Some(l) } else { None } @@ -175,7 +178,7 @@ where pub entries: Vec, pub cut_logs: Option<(u64, u64)>, pub raft_state: Option, - pub extra_write: ExtraWrite, + pub extra_write: ExtraWrite, pub messages: Vec, pub trackers: Vec, pub has_snapshot: bool, @@ -264,57 +267,27 @@ where } } -/// These states are set only in raftstore V2. -#[derive(Default)] -pub struct ExtraStates { - apply_state: RaftApplyState, - region_state: Option, - // Set only want to destroy the raft group in write worker. - raft_state: Option, -} - -impl ExtraStates { - #[inline] - pub fn new(apply_state: RaftApplyState) -> Self { - Self { - apply_state, - region_state: None, - raft_state: None, - } - } - - #[inline] - pub fn set_region_state(&mut self, region_state: RegionLocalState) { - self.region_state = Some(region_state); - } - - #[inline] - pub fn set_raft_state(&mut self, raft_state: RaftLocalState) { - self.raft_state = Some(raft_state); - } -} - -pub enum ExtraBatchWrite { +pub enum ExtraBatchWrite { None, V1(W), - V2(HashMap), + V2(L), } -impl ExtraBatchWrite { +impl ExtraBatchWrite { #[inline] fn clear(&mut self) { match self { ExtraBatchWrite::None => {} ExtraBatchWrite::V1(w) => w.clear(), - ExtraBatchWrite::V2(m) => m.clear(), + // No clear in in `RaftLogBatch`. + ExtraBatchWrite::V2(_) => *self = ExtraBatchWrite::None, } } /// Merge the extra_write with this batch. /// /// If there is any new states inserted, return the size of the state. - fn merge(&mut self, region_id: u64, extra_write: &mut ExtraWrite) -> usize { - let mut inserted = false; + fn merge(&mut self, extra_write: &mut ExtraWrite) { match mem::replace(extra_write, ExtraWrite::None) { ExtraWrite::None => (), ExtraWrite::V1(wb) => match self { @@ -322,35 +295,11 @@ impl ExtraBatchWrite { ExtraBatchWrite::V1(kv_wb) => kv_wb.merge(wb).unwrap(), ExtraBatchWrite::V2(_) => unreachable!("v2 and v1 are mixed used"), }, - ExtraWrite::V2(extra_states) => match self { - ExtraBatchWrite::None => { - let mut map = HashMap::default(); - map.insert(region_id, extra_states); - *self = ExtraBatchWrite::V2(map); - inserted = true; - } + ExtraWrite::V2(lb) => match self { + ExtraBatchWrite::None => *self = ExtraBatchWrite::V2(lb), ExtraBatchWrite::V1(_) => unreachable!("v2 and v1 are mixed used"), - ExtraBatchWrite::V2(extra_states_map) => match extra_states_map.entry(region_id) { - collections::HashMapEntry::Occupied(mut slot) => { - slot.get_mut().apply_state = extra_states.apply_state; - if let Some(region_state) = extra_states.region_state { - slot.get_mut().region_state = Some(region_state); - } - if let Some(raft_state) = extra_states.raft_state { - slot.get_mut().raft_state = Some(raft_state); - } - } - collections::HashMapEntry::Vacant(slot) => { - slot.insert(extra_states); - inserted = true; - } - }, + ExtraBatchWrite::V2(raft_wb) => raft_wb.merge(lb).unwrap(), }, - }; - if inserted { - std::mem::size_of::() - } else { - 0 } } } @@ -368,7 +317,7 @@ where // These states only corresponds to entries inside `raft_wbs.last()`. States for other write // batches must be inlined early. pub raft_states: HashMap, - pub extra_batch_write: ExtraBatchWrite, + pub extra_batch_write: ExtraBatchWrite, pub state_size: usize, pub tasks: Vec>, pub persisted_cbs: Vec>, @@ -396,35 +345,16 @@ where } #[inline] - fn flush_states_to_raft_wb(&mut self, raft_engine: &ER) { + fn flush_states_to_raft_wb(&mut self) { let wb = self.raft_wbs.last_mut().unwrap(); for (region_id, state) in self.raft_states.drain() { wb.put_raft_state(region_id, &state).unwrap(); } - if let ExtraBatchWrite::V2(extra_states_map) = &mut self.extra_batch_write { - for (region_id, state) in extra_states_map.drain() { - let mut tombstone = false; - if let Some(region_state) = state.region_state { - if region_state.get_state() == PeerState::Tombstone { - tombstone = true; - raft_engine - .clean( - region_id, - first_index(&state.apply_state), - state.raft_state.as_ref().unwrap(), - wb, - ) - .unwrap(); - } - wb.put_region_state(region_id, 0, ®ion_state).unwrap(); - } - if !tombstone { - wb.put_apply_state(region_id, 0, &state.apply_state) - .unwrap(); - } - } - } self.state_size = 0; + if let ExtraBatchWrite::V2(_) = self.extra_batch_write { + let ExtraBatchWrite::V2(lb) = mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) else { unreachable!() }; + wb.merge(lb).unwrap(); + } } /// Add write task to this batch @@ -436,7 +366,7 @@ where if self.raft_wb_split_size > 0 && self.raft_wbs.last().unwrap().persist_size() >= self.raft_wb_split_size { - self.flush_states_to_raft_wb(raft_engine); + self.flush_states_to_raft_wb(); self.raft_wbs .push(raft_engine.log_batch(RAFT_WB_DEFAULT_SIZE)); } @@ -456,9 +386,7 @@ where && self.raft_states.insert(task.region_id, raft_state).is_none() { self.state_size += std::mem::size_of::(); } - self.state_size += self - .extra_batch_write - .merge(task.region_id, &mut task.extra_write); + self.extra_batch_write.merge(&mut task.extra_write); if let Some(prev_readies) = self .readies @@ -511,8 +439,8 @@ where .sum::() } - fn before_write_to_db(&mut self, engine: &ER, metrics: &StoreWriteMetrics) { - self.flush_states_to_raft_wb(engine); + fn before_write_to_db(&mut self, metrics: &StoreWriteMetrics) { + self.flush_states_to_raft_wb(); if metrics.waterfall_metrics { let now = std::time::Instant::now(); for task in &self.tasks { @@ -705,8 +633,7 @@ where let timer = Instant::now(); - self.batch - .before_write_to_db(&self.raft_engine, &self.metrics); + self.batch.before_write_to_db(&self.metrics); fail_point!("raft_before_save"); @@ -968,7 +895,7 @@ pub fn write_to_db_for_test( { let mut batch = WriteTaskBatch::new(engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE)); batch.add_write_task(&engines.raft, task); - batch.before_write_to_db(&engines.raft, &StoreWriteMetrics::new(false)); + batch.before_write_to_db(&StoreWriteMetrics::new(false)); if let ExtraBatchWrite::V1(kv_wb) = &mut batch.extra_batch_write { if !kv_wb.is_empty() { let mut write_opts = WriteOptions::new(); diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 625e9f3c4a5..6007b39489e 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -6,7 +6,7 @@ use collections::HashSet; use crossbeam::channel::unbounded; use engine_test::{kv::KvTestEngine, new_temp_engine, raft::RaftTestEngine}; use engine_traits::{Engines, Mutable, Peekable, RaftEngineReadOnly, WriteBatchExt}; -use kvproto::raft_serverpb::RaftMessage; +use kvproto::raft_serverpb::{RaftApplyState, RaftMessage, RegionLocalState}; use tempfile::Builder; use super::*; @@ -350,11 +350,11 @@ fn test_worker_split_raft_wb() { let mut expected_wbs = 1; let mut task_1 = WriteTask::::new(region_1, 1, 10); - init_write_batch(&engines, &mut task_1); - task_1.extra_write = ExtraWrite::V2(ExtraStates::new(RaftApplyState { - applied_index: 10, - ..Default::default() - })); + task_1.raft_wb = Some(engines.raft.log_batch(0)); + let mut apply_state_1 = RaftApplyState::default(); + apply_state_1.set_applied_index(10); + let lb = task_1.extra_write.ensure_v2(|| engines.raft.log_batch(0)); + lb.put_apply_state(region_1, 10, &apply_state_1).unwrap(); put_raft_kv(task_1.raft_wb.as_mut(), raft_key_1); task_1.entries.append(&mut vec![ new_entry(5, 5), @@ -366,11 +366,11 @@ fn test_worker_split_raft_wb() { t.worker.batch.add_write_task(&engines.raft, task_1); let mut task_2 = WriteTask::::new(region_2, 2, 15); - init_write_batch(&engines, &mut task_2); - task_2.extra_write = ExtraWrite::V2(ExtraStates::new(RaftApplyState { - applied_index: 16, - ..Default::default() - })); + task_2.raft_wb = Some(engines.raft.log_batch(0)); + let mut apply_state_2 = RaftApplyState::default(); + apply_state_2.set_applied_index(16); + let lb = task_2.extra_write.ensure_v2(|| engines.raft.log_batch(0)); + lb.put_apply_state(region_2, 16, &apply_state_2).unwrap(); put_raft_kv(task_2.raft_wb.as_mut(), raft_key_2); task_2 .entries @@ -385,11 +385,11 @@ fn test_worker_split_raft_wb() { t.worker.batch.add_write_task(&engines.raft, task_2); let mut task_3 = WriteTask::::new(region_1, 1, 11); - init_write_batch(&engines, &mut task_3); - task_3.extra_write = ExtraWrite::V2(ExtraStates::new(RaftApplyState { - applied_index: 25, - ..Default::default() - })); + task_3.raft_wb = Some(engines.raft.log_batch(0)); + let mut apply_state_3 = RaftApplyState::default(); + apply_state_3.set_applied_index(25); + let lb = task_3.extra_write.ensure_v2(|| engines.raft.log_batch(0)); + lb.put_apply_state(region_1, 25, &apply_state_3).unwrap(); put_raft_kv(task_3.raft_wb.as_mut(), raft_key_3); delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), raft_key_1); task_3 @@ -431,14 +431,14 @@ fn test_worker_split_raft_wb() { ], ); assert_eq!( - engines.raft.get_apply_state(region_1, 0).unwrap(), + engines.raft.get_apply_state(region_1, 25).unwrap(), Some(RaftApplyState { applied_index: 25, ..Default::default() }) ); assert_eq!( - engines.raft.get_apply_state(region_2, 0).unwrap(), + engines.raft.get_apply_state(region_2, 16).unwrap(), Some(RaftApplyState { applied_index: 16, ..Default::default() @@ -559,14 +559,14 @@ fn test_basic_flow_with_states() { task_1.raft_wb = Some(engines.raft.log_batch(0)); let mut apply_state_1 = RaftApplyState::default(); apply_state_1.applied_index = 2; - let mut extra_state = ExtraStates::new(apply_state_1); let mut region_state_1 = RegionLocalState::default(); region_state_1 .mut_region() .mut_region_epoch() .set_version(3); - extra_state.region_state = Some(region_state_1.clone()); - task_1.extra_write.set_v2(extra_state); + let lb = task_1.extra_write.ensure_v2(|| engines.raft.log_batch(0)); + lb.put_apply_state(region_1, 2, &apply_state_1).unwrap(); + lb.put_region_state(region_1, 2, ®ion_state_1).unwrap(); put_raft_kv(task_1.raft_wb.as_mut(), 17); task_1 .entries @@ -582,8 +582,8 @@ fn test_basic_flow_with_states() { task_2.raft_wb = Some(engines.raft.log_batch(0)); let mut apply_state_2 = RaftApplyState::default(); apply_state_2.applied_index = 30; - let extra_state = ExtraStates::new(apply_state_2.clone()); - task_2.extra_write.set_v2(extra_state); + let lb = task_2.extra_write.ensure_v2(|| engines.raft.log_batch(0)); + lb.put_apply_state(2, 30, &apply_state_2).unwrap(); put_raft_kv(task_2.raft_wb.as_mut(), 27); task_2 .entries @@ -599,8 +599,8 @@ fn test_basic_flow_with_states() { task_3.raft_wb = Some(engines.raft.log_batch(0)); let mut apply_state_3 = RaftApplyState::default(); apply_state_3.applied_index = 5; - let extra_state = ExtraStates::new(apply_state_3.clone()); - task_3.extra_write.set_v2(extra_state); + let lb = task_3.extra_write.ensure_v2(|| engines.raft.log_batch(0)); + lb.put_apply_state(region_1, 5, &apply_state_3).unwrap(); put_raft_kv(task_3.raft_wb.as_mut(), 37); delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); task_3.entries.append(&mut vec![new_entry(6, 6)]); @@ -634,18 +634,18 @@ fn test_basic_flow_with_states() { ], ); assert_eq!( - engines.raft.get_apply_state(region_1, 0).unwrap().unwrap(), + engines.raft.get_apply_state(region_1, 5).unwrap().unwrap(), apply_state_3 ); assert_eq!( - engines.raft.get_apply_state(region_2, 0).unwrap().unwrap(), + engines.raft.get_apply_state(region_2, 30).unwrap().unwrap(), apply_state_2 ); assert_eq!( - engines.raft.get_region_state(region_1, 0).unwrap().unwrap(), + engines.raft.get_region_state(region_1, 2).unwrap().unwrap(), region_state_1 ); - assert_eq!(engines.raft.get_region_state(region_2, 0).unwrap(), None); + assert_eq!(engines.raft.get_region_state(region_2, 1).unwrap(), None); must_have_same_count_msg(6, &t.msg_rx); diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 65417732adf..0846e8362b3 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -31,10 +31,7 @@ pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ read::{AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask}, - write::{ - ExtraStates, PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, - WriteTask, - }, + write::{PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask}, write_router::{WriteRouter, WriteRouterContext, WriteSenders}, }, bootstrap::{ From 98e8bfb35e6cf87e51916ba229fc269b5c984187 Mon Sep 17 00:00:00 2001 From: lijie Date: Wed, 14 Dec 2022 14:41:04 +0800 Subject: [PATCH 0407/1149] chore: bump version to v6.6.0-alpha (#13938) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 432d2ce3c26..244ca504858 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6226,7 +6226,7 @@ dependencies = [ [[package]] name = "tikv" -version = "6.5.0-alpha" +version = "6.6.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 61d6da6946d..66516206dd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "6.5.0-alpha" +version = "6.6.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From e591a41bfe29899c42bbf3e8856ea6dd50deb548 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 16 Dec 2022 10:22:52 +0800 Subject: [PATCH 0408/1149] storage: Update lock wait info after acquiring locks (#13902) ref tikv/tikv#13298 Update lock wait info after acquiring locks, so that in case there is resumable pessimistic lock requests that's waiting in TiKV, the latest waiting relationship can be used to provide the diagnostic information and do deadlock detection. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- src/server/lock_manager/waiter_manager.rs | 12 +- .../lock_manager/lock_waiting_queue.rs | 72 ++++++++++- src/storage/lock_manager/mod.rs | 1 - src/storage/mvcc/txn.rs | 31 ++++- .../txn/actions/acquire_pessimistic_lock.rs | 4 +- src/storage/txn/actions/check_txn_status.rs | 2 +- .../txn/actions/flashback_to_version.rs | 1 + src/storage/txn/actions/prewrite.rs | 13 +- .../txn/commands/acquire_pessimistic_lock.rs | 2 + .../acquire_pessimistic_lock_resumed.rs | 8 +- src/storage/txn/commands/atomic_store.rs | 1 + .../txn/commands/check_secondary_locks.rs | 2 + src/storage/txn/commands/check_txn_status.rs | 2 + src/storage/txn/commands/cleanup.rs | 2 + src/storage/txn/commands/commit.rs | 2 + src/storage/txn/commands/compare_and_swap.rs | 1 + .../txn/commands/flashback_to_version.rs | 1 + src/storage/txn/commands/mod.rs | 1 + src/storage/txn/commands/pause.rs | 1 + .../txn/commands/pessimistic_rollback.rs | 2 + src/storage/txn/commands/prewrite.rs | 8 +- src/storage/txn/commands/resolve_lock.rs | 2 + src/storage/txn/commands/resolve_lock_lite.rs | 2 + src/storage/txn/commands/rollback.rs | 2 + src/storage/txn/commands/txn_heart_beat.rs | 4 +- src/storage/txn/scheduler.rs | 49 +++++++- tests/integrations/server/lock_manager.rs | 113 +++++++++++++++++- 27 files changed, 315 insertions(+), 26 deletions(-) diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 467580645d3..d8271998653 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -340,7 +340,10 @@ impl WaitTable { Some(waiter) } - fn update_waiter(&mut self, update_event: &UpdateWaitForEvent) -> Option { + fn update_waiter( + &mut self, + update_event: &UpdateWaitForEvent, + ) -> Option<(KeyLockWaitInfo, DiagnosticContext)> { let waiter = self.waiter_pool.get_mut(&update_event.token)?; assert_eq!(waiter.wait_info.key, update_event.wait_info.key); @@ -351,9 +354,8 @@ impl WaitTable { } let result = std::mem::replace(&mut waiter.wait_info, update_event.wait_info.clone()); - waiter.diag_ctx = update_event.diag_ctx.clone(); - Some(result) + Some((result, waiter.diag_ctx.clone())) } fn take_waiter_by_lock_digest( @@ -542,11 +544,11 @@ impl WaiterManager { continue; } - if let Some(previous_wait_info) = previous_wait_info { + if let Some((previous_wait_info, diag_ctx)) = previous_wait_info { self.detector_scheduler .clean_up_wait_for(event.start_ts, previous_wait_info); self.detector_scheduler - .detect(event.start_ts, event.wait_info, event.diag_ctx); + .detect(event.start_ts, event.wait_info, diag_ctx); } } } diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 663c6729962..a81248fe9e2 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -76,7 +76,7 @@ use txn_types::{Key, TimeStamp}; use crate::storage::{ lock_manager::{ lock_wait_context::{LockWaitContextSharedState, PessimisticLockKeyCallback}, - LockManager, LockWaitToken, + KeyLockWaitInfo, LockDigest, LockManager, LockWaitToken, UpdateWaitForEvent, }, metrics::*, mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, @@ -599,6 +599,36 @@ impl LockWaitQueues { result } + pub fn update_lock_wait(&self, lock_info: Vec) { + let mut update_wait_for_events = vec![]; + for lock_info in lock_info { + let key = Key::from_raw(lock_info.get_key()); + if let Some(mut key_state) = self.inner.queue_map.get_mut(&key) { + key_state.current_lock = lock_info; + update_wait_for_events.reserve(key_state.queue.len()); + for (&token, entry) in key_state.queue.iter() { + let event = UpdateWaitForEvent { + token, + start_ts: entry.parameters.start_ts, + is_first_lock: entry.parameters.is_first_lock, + wait_info: KeyLockWaitInfo { + key: key.clone(), + lock_digest: LockDigest { + ts: key_state.current_lock.lock_version.into(), + hash: entry.lock_hash, + }, + lock_info: key_state.current_lock.clone(), + }, + }; + update_wait_for_events.push(event); + } + } + } + if !update_wait_for_events.is_empty() { + self.inner.lock_mgr.update_wait_for(update_wait_for_events); + } + } + /// Gets the count of entries currently waiting in queues. /// /// Mind that the contents of the queues may be changed concurrently. @@ -1205,4 +1235,44 @@ mod tests { queues.must_not_contain_key(b"k1"); assert_eq!(queues.entry_count(), 0); } + + #[bench] + fn bench_update_lock_wait_empty(b: &mut test::Bencher) { + let queues = LockWaitQueues::new(MockLockManager::new()); + queues.mock_lock_wait(b"k1", 5, 6, false); + + let mut lock_info = kvrpcpb::LockInfo::default(); + let key = b"t\x00\x00\x00\x00\x00\x00\x00\x01_r\x00\x00\x00\x00\x00\x00\x00\x01"; + lock_info.set_key(key.to_vec()); + lock_info.set_primary_lock(key.to_vec()); + lock_info.set_lock_version(10); + lock_info.set_lock_for_update_ts(10); + let lock_info = vec![lock_info]; + + b.iter(|| { + queues.update_lock_wait(lock_info.clone()); + }); + } + + #[bench] + fn bench_update_lock_wait_queue_len_512(b: &mut test::Bencher) { + let queues = LockWaitQueues::new(MockLockManager::new()); + + let key = b"t\x00\x00\x00\x00\x00\x00\x00\x01_r\x00\x00\x00\x00\x00\x00\x00\x01"; + + for i in 0..512 { + queues.mock_lock_wait(key, 15 + i, 10, true); + } + + let mut lock_info = kvrpcpb::LockInfo::default(); + lock_info.set_key(key.to_vec()); + lock_info.set_primary_lock(key.to_vec()); + lock_info.set_lock_version(10); + lock_info.set_lock_for_update_ts(10); + let lock_info = vec![lock_info]; + + b.iter(|| { + queues.update_lock_wait(lock_info.clone()); + }); + } } diff --git a/src/storage/lock_manager/mod.rs b/src/storage/lock_manager/mod.rs index 75b133a808f..5c103f40f82 100644 --- a/src/storage/lock_manager/mod.rs +++ b/src/storage/lock_manager/mod.rs @@ -115,7 +115,6 @@ pub struct UpdateWaitForEvent { pub start_ts: TimeStamp, pub is_first_lock: bool, pub wait_info: KeyLockWaitInfo, - pub diag_ctx: DiagnosticContext, } /// `LockManager` manages transactions waiting for locks held by other diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 4cc0ab57ffb..9e87bf748b7 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -5,6 +5,7 @@ use std::fmt; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; +use kvproto::kvrpcpb::LockInfo; use txn_types::{Key, Lock, PessimisticLock, TimeStamp, Value}; use super::metrics::{GC_DELETE_VERSIONS_HISTOGRAM, MVCC_VERSIONS_HISTOGRAM}; @@ -64,6 +65,11 @@ pub struct MvccTxn { // `writes`, so it can be further processed. The elements are tuples representing // (key, lock, remove_pessimistic_lock) pub(crate) locks_for_1pc: Vec<(Key, Lock, bool)>, + // Collects the information of locks that are acquired in this MvccTxn. Locks that already + // exists but updated in this MvccTxn won't be collected. The collected information will be + // used to update the lock waiting information and redo deadlock detection, if there are some + // pessimistic lock requests waiting on the keys. + pub(crate) new_locks: Vec, // `concurrency_manager` is used to set memory locks for prewritten keys. // Prewritten locks of async commit transactions should be visible to // readers before they are written to the engine. @@ -84,7 +90,8 @@ impl MvccTxn { start_ts, write_size: 0, modifies: vec![], - locks_for_1pc: Vec::new(), + locks_for_1pc: vec![], + new_locks: vec![], concurrency_manager, guards: vec![], } @@ -99,6 +106,10 @@ impl MvccTxn { std::mem::take(&mut self.guards) } + pub fn take_new_locks(&mut self) -> Vec { + std::mem::take(&mut self.new_locks) + } + pub fn write_size(&self) -> usize { self.write_size } @@ -107,7 +118,12 @@ impl MvccTxn { self.modifies.len() == 0 && self.locks_for_1pc.len() == 0 } - pub(crate) fn put_lock(&mut self, key: Key, lock: &Lock) { + // Write a lock. If the key doesn't have lock before, `is_new` should be set. + pub(crate) fn put_lock(&mut self, key: Key, lock: &Lock, is_new: bool) { + if is_new { + self.new_locks + .push(lock.clone().into_lock_info(key.to_raw().unwrap())); + } let write = Modify::Put(CF_LOCK, key, lock.to_bytes()); self.write_size += write.size(); self.modifies.push(write); @@ -117,7 +133,13 @@ impl MvccTxn { self.locks_for_1pc.push((key, lock, remove_pessimstic_lock)); } - pub(crate) fn put_pessimistic_lock(&mut self, key: Key, lock: PessimisticLock) { + // Write a pessimistic lock. If the key doesn't have lock before, `is_new` + // should be set. + pub(crate) fn put_pessimistic_lock(&mut self, key: Key, lock: PessimisticLock, is_new: bool) { + if is_new { + self.new_locks + .push(lock.to_lock().into_lock_info(key.to_raw().unwrap())); + } self.modifies.push(Modify::PessimisticLock(key, lock)) } @@ -198,12 +220,13 @@ impl MvccTxn { } lock.rollback_ts.push(self.start_ts); - self.put_lock(key.clone(), &lock); + self.put_lock(key.clone(), &lock, false); } pub(crate) fn clear(&mut self) { self.write_size = 0; self.modifies.clear(); + self.new_locks.clear(); self.locks_for_1pc.clear(); self.guards.clear(); } diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 8e7c4d95118..fcffd500c8e 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -171,7 +171,7 @@ pub fn acquire_pessimistic_lock( last_change_ts: lock.last_change_ts, versions_to_last_change: lock.versions_to_last_change, }; - txn.put_pessimistic_lock(key, lock); + txn.put_pessimistic_lock(key, lock, false); } else { MVCC_DUPLICATE_CMD_COUNTER_VEC .acquire_pessimistic_lock @@ -325,7 +325,7 @@ pub fn acquire_pessimistic_lock( // When lock_only_if_exists is false, always acquire pessimistic lock, otherwise // do it when val exists if !lock_only_if_exists || val.is_some() { - txn.put_pessimistic_lock(key, lock); + txn.put_pessimistic_lock(key, lock, true); } // TODO don't we need to commit the modifies in txn? diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index 4c900e5a438..88982d6da72 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -70,7 +70,7 @@ pub fn check_txn_status_lock_exists( lock.min_commit_ts = current_ts; } - txn.put_lock(primary_key, &lock); + txn.put_lock(primary_key, &lock, false); MVCC_CHECK_TXN_STATUS_COUNTER_VEC.update_ts.inc(); } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 819cfd0631c..2710935efb1 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -181,6 +181,7 @@ pub fn prewrite_flashback_key( 1, TimeStamp::zero(), ), + false, // Assuming flashback transactions won't participate any lock conflicts. ); Ok(()) } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index f2de9df0004..1e655846d08 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -153,7 +153,9 @@ pub fn prewrite( OldValue::Unspecified }; - let final_min_commit_ts = mutation.write_lock(lock_status, txn)?; + let is_new_lock = !matches!(pessimistic_action, DoPessimisticCheck) || lock_amended; + + let final_min_commit_ts = mutation.write_lock(lock_status, txn, is_new_lock)?; fail_point!("after_prewrite_one_key"); @@ -448,7 +450,12 @@ impl<'a> PrewriteMutation<'a> { Ok(None) } - fn write_lock(self, lock_status: LockStatus, txn: &mut MvccTxn) -> Result { + fn write_lock( + self, + lock_status: LockStatus, + txn: &mut MvccTxn, + is_new_lock: bool, + ) -> Result { let mut try_one_pc = self.try_one_pc(); let mut lock = Lock::new( @@ -506,7 +513,7 @@ impl<'a> PrewriteMutation<'a> { if try_one_pc { txn.put_locks_for_1pc(self.key, lock, lock_status.has_pessimistic_lock()); } else { - txn.put_lock(self.key, &lock); + txn.put_lock(self.key, &lock, is_new_lock); } final_min_commit_ts diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 2afdadaad80..58c33706bbc 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -144,6 +144,7 @@ impl WriteCommand for AcquirePessimisticLock } } + let new_acquired_locks = txn.take_new_locks(); let modifies = txn.into_modifies(); let mut res = Ok(res); @@ -179,6 +180,7 @@ impl WriteCommand for AcquirePessimisticLock pr, lock_info: encountered_locks, released_locks: ReleasedLocks::new(), + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, }) diff --git a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs index a66f8228755..7640edd7c0c 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs @@ -82,6 +82,7 @@ impl WriteCommand for AcquirePessimisticLockR fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { fail_point!("acquire_pessimistic_lock_resumed_before_process_write"); let mut modifies = vec![]; + let mut new_acquired_locks = vec![]; let mut txn = None; let mut reader: Option> = None; @@ -107,10 +108,11 @@ impl WriteCommand for AcquirePessimisticLockR .as_ref() .map_or(true, |t: &MvccTxn| t.start_ts != params.start_ts) { - if let Some(prev_txn) = txn.replace(MvccTxn::new( + if let Some(mut prev_txn) = txn.replace(MvccTxn::new( params.start_ts, context.concurrency_manager.clone(), )) { + new_acquired_locks.extend(prev_txn.take_new_locks()); modifies.extend(prev_txn.into_modifies()); } // TODO: Is it possible to reuse the same reader but change the start_ts stored @@ -169,8 +171,9 @@ impl WriteCommand for AcquirePessimisticLockR }; } - if let Some(txn) = txn { + if let Some(mut txn) = txn { if !txn.is_empty() { + new_acquired_locks.extend(txn.take_new_locks()); modifies.extend(txn.into_modifies()); } } @@ -188,6 +191,7 @@ impl WriteCommand for AcquirePessimisticLockR pr, lock_info: encountered_locks, released_locks: ReleasedLocks::new(), + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, }) diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 1df5c5b2cf8..9a54895e7e2 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -60,6 +60,7 @@ impl WriteCommand for RawAtomicStore { pr: ProcessResult::Res, lock_info: vec![], released_locks: ReleasedLocks::new(), + new_acquired_locks: vec![], lock_guards: raw_ext.into_iter().map(|r| r.key_guard).collect(), response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 4802535c054..a19a5d82bb6 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -146,6 +146,7 @@ impl WriteCommand for CheckSecondaryLocks { rows = 1; } let pr = ProcessResult::SecondaryLocksStatus { status: result }; + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -155,6 +156,7 @@ impl WriteCommand for CheckSecondaryLocks { pr, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 34948109f4b..895c753b160 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -125,6 +125,7 @@ impl WriteCommand for CheckTxnStatus { released_locks.push(released); let pr = ProcessResult::TxnStatus { txn_status }; + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -134,6 +135,7 @@ impl WriteCommand for CheckTxnStatus { pr, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index a6c529420d3..302c4fe1308 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -67,6 +67,7 @@ impl WriteCommand for Cleanup { true, )?); + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -76,6 +77,7 @@ impl WriteCommand for Cleanup { pr: ProcessResult::Res, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index 910b7832ed1..4f05df8fe83 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -67,6 +67,7 @@ impl WriteCommand for Commit { let pr = ProcessResult::TxnStatus { txn_status: TxnStatus::committed(self.commit_ts), }; + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -76,6 +77,7 @@ impl WriteCommand for Commit { pr, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index 943fc6f69d1..ca9213b57d3 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -114,6 +114,7 @@ impl WriteCommand for RawCompareAndSwap { pr, lock_info: vec![], released_locks: ReleasedLocks::new(), + new_acquired_locks: vec![], lock_guards, response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 72b100f567b..da12bc8906c 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -159,6 +159,7 @@ impl WriteCommand for FlashbackToVersion { })(), lock_info: vec![], released_locks: ReleasedLocks::new(), + new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 7eee81ae23e..2d79ebc97cc 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -417,6 +417,7 @@ pub struct WriteResult { pub pr: ProcessResult, pub lock_info: Vec, pub released_locks: ReleasedLocks, + pub new_acquired_locks: Vec, pub lock_guards: Vec, pub response_policy: ResponsePolicy, } diff --git a/src/storage/txn/commands/pause.rs b/src/storage/txn/commands/pause.rs index 3dc7d06d5ef..5d3aa7f6d2f 100644 --- a/src/storage/txn/commands/pause.rs +++ b/src/storage/txn/commands/pause.rs @@ -50,6 +50,7 @@ impl WriteCommand for Pause { pr: ProcessResult::Res, lock_info: vec![], released_locks: ReleasedLocks::new(), + new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index c35c362f19e..b34399cb64a 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -83,6 +83,7 @@ impl WriteCommand for PessimisticRollback { released_locks.push(released_lock?); } + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -92,6 +93,7 @@ impl WriteCommand for PessimisticRollback { pr: ProcessResult::MultiRes { results: vec![] }, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index b34c4eb752b..fbd4bf5984a 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -678,6 +678,7 @@ impl Prewriter { // If an error (KeyIsLocked or WriteConflict) occurs before, these lock guards // are dropped along with `txn` automatically. let lock_guards = txn.take_guards(); + let new_acquired_locks = txn.take_new_locks(); let mut to_be_write = WriteData::new(txn.into_modifies(), extra); to_be_write.set_disk_full_opt(self.ctx.get_disk_full_opt()); @@ -688,6 +689,7 @@ impl Prewriter { pr, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards, response_policy: ResponsePolicy::OnApplied, } @@ -707,6 +709,7 @@ impl Prewriter { pr, lock_info: vec![], released_locks: ReleasedLocks::new(), + new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, } @@ -870,8 +873,9 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { /// Change all 1pc locks in txn to 2pc locks. pub(in crate::storage::txn) fn fallback_1pc_locks(txn: &mut MvccTxn) { - for (key, lock, _) in std::mem::take(&mut txn.locks_for_1pc) { - txn.put_lock(key, &lock); + for (key, lock, remove_pessimistic_lock) in std::mem::take(&mut txn.locks_for_1pc) { + let is_new_lock = !remove_pessimistic_lock; + txn.put_lock(key, &lock, is_new_lock); } } diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index 463275b2e1f..f3d141807e8 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -138,6 +138,7 @@ impl WriteCommand for ResolveLock { cmd: Command::ResolveLockReadPhase(next_cmd), } }; + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -147,6 +148,7 @@ impl WriteCommand for ResolveLock { pr, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index d336d88a9ca..63fe201596d 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -63,6 +63,7 @@ impl WriteCommand for ResolveLockLite { }); } + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -72,6 +73,7 @@ impl WriteCommand for ResolveLockLite { pr: ProcessResult::Res, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index 52c05ae34c7..f3b674f4916 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -58,6 +58,7 @@ impl WriteCommand for Rollback { released_locks.push(released_lock); } + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -67,6 +68,7 @@ impl WriteCommand for Rollback { pr: ProcessResult::Res, lock_info: vec![], released_locks, + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index f965b863494..448395fc436 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -67,7 +67,7 @@ impl WriteCommand for TxnHeartBeat { Some(mut lock) if lock.ts == self.start_ts => { if lock.ttl < self.advise_ttl { lock.ttl = self.advise_ttl; - txn.put_lock(self.primary_key.clone(), &lock); + txn.put_lock(self.primary_key.clone(), &lock, false); } lock } @@ -83,6 +83,7 @@ impl WriteCommand for TxnHeartBeat { let pr = ProcessResult::TxnStatus { txn_status: TxnStatus::uncommitted(lock, false), }; + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); Ok(WriteResult { @@ -92,6 +93,7 @@ impl WriteCommand for TxnHeartBeat { pr, lock_info: vec![], released_locks: ReleasedLocks::new(), + new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, }) diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 13a74895803..d96e3e7c97f 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -798,6 +798,7 @@ impl Scheduler { lock_guards: Vec, pipelined: bool, async_apply_prewrite: bool, + new_acquired_locks: Vec, tag: CommandKind, ) { // TODO: Does async apply prewrite worth a special metric here? @@ -846,7 +847,7 @@ impl Scheduler { assert!(pipelined || async_apply_prewrite); } - // TODO: Update lock wait relationships after acquiring some locks. + self.on_acquired_locks_finished(new_acquired_locks); if do_wake_up { let woken_up_resumable_lock_requests = tctx.woken_up_resumable_lock_requests; @@ -978,6 +979,28 @@ impl Scheduler { resumable_wake_up_list } + fn on_acquired_locks_finished(&self, new_acquired_locks: Vec) { + if new_acquired_locks.is_empty() || self.inner.lock_wait_queues.is_empty() { + return; + } + + // If there are not too many new locks, do not spawn the task to the high + // priority pool since it may consume more CPU. + if new_acquired_locks.len() < 30 { + self.inner + .lock_wait_queues + .update_lock_wait(new_acquired_locks); + } else { + let lock_wait_queues = self.inner.lock_wait_queues.clone(); + self.get_sched_pool(CommandPri::High) + .pool + .spawn(async move { + lock_wait_queues.update_lock_wait(new_acquired_locks); + }) + .unwrap(); + } + } + fn wake_up_legacy_pessimistic_locks( &self, legacy_wake_up_list: impl IntoIterator, ReleasedLock)> @@ -1201,6 +1224,7 @@ impl Scheduler { pr, lock_info, released_locks, + new_acquired_locks, lock_guards, response_policy, } = match deadline @@ -1273,7 +1297,16 @@ impl Scheduler { } if to_be_write.modifies.is_empty() { - scheduler.on_write_finished(cid, pr, Ok(()), lock_guards, false, false, tag); + scheduler.on_write_finished( + cid, + pr, + Ok(()), + lock_guards, + false, + false, + new_acquired_locks, + tag, + ); return; } @@ -1294,7 +1327,16 @@ impl Scheduler { engine.schedule_txn_extra(to_be_write.extra); }) } - scheduler.on_write_finished(cid, pr, Ok(()), lock_guards, false, false, tag); + scheduler.on_write_finished( + cid, + pr, + Ok(()), + lock_guards, + false, + false, + new_acquired_locks, + tag, + ); return; } @@ -1478,6 +1520,7 @@ impl Scheduler { lock_guards, pipelined, is_async_apply_prewrite, + new_acquired_locks, tag, ); KV_COMMAND_KEYWRITE_HISTOGRAM_VEC diff --git a/tests/integrations/server/lock_manager.rs b/tests/integrations/server/lock_manager.rs index 43032dd8cc3..289b10303a8 100644 --- a/tests/integrations/server/lock_manager.rs +++ b/tests/integrations/server/lock_manager.rs @@ -1,6 +1,14 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, thread, time::Duration}; +use std::{ + sync::{ + mpsc, + mpsc::{RecvTimeoutError, TryRecvError}, + Arc, + }, + thread, + time::Duration, +}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -283,3 +291,106 @@ fn test_detect_deadlock_when_merge_region() { must_transfer_leader(&mut cluster, b"", 1); } } + +#[test] +fn test_detect_deadlock_when_updating_wait_info() { + use kvproto::kvrpcpb::PessimisticLockKeyResultType::*; + let mut cluster = new_cluster_for_deadlock_test(3); + + let key1 = b"key1"; + let key2 = b"key2"; + let (client, ctx) = build_leader_client(&mut cluster, key1); + let client = Arc::new(client); + + fn async_pessimistic_lock( + client: Arc, + ctx: Context, + key: &[u8], + ts: u64, + ) -> mpsc::Receiver { + let (tx, rx) = mpsc::channel(); + let key = vec![key.to_vec()]; + thread::spawn(move || { + let resp = + kv_pessimistic_lock_resumable(&client, ctx, key, ts, ts, Some(1000), false, false); + tx.send(resp).unwrap(); + }); + rx + } + + // key1: txn 11 and 12 waits for 10 + // key2: txn 11 waits for 12 + let resp = kv_pessimistic_lock_resumable( + &client, + ctx.clone(), + vec![key1.to_vec()], + 10, + 10, + Some(1000), + false, + false, + ); + assert!(resp.region_error.is_none()); + assert!(resp.errors.is_empty()); + assert_eq!(resp.results[0].get_type(), LockResultNormal); + let resp = kv_pessimistic_lock_resumable( + &client, + ctx.clone(), + vec![key2.to_vec()], + 12, + 12, + Some(1000), + false, + false, + ); + assert!(resp.region_error.is_none()); + assert!(resp.errors.is_empty()); + assert_eq!(resp.results[0].get_type(), LockResultNormal); + let rx_txn11_k1 = async_pessimistic_lock(client.clone(), ctx.clone(), key1, 11); + let rx_txn12_k1 = async_pessimistic_lock(client.clone(), ctx.clone(), key1, 12); + let rx_txn11_k2 = async_pessimistic_lock(client.clone(), ctx.clone(), key2, 11); + // All blocked. + assert_eq!( + rx_txn11_k1 + .recv_timeout(Duration::from_millis(50)) + .unwrap_err(), + RecvTimeoutError::Timeout + ); + assert_eq!(rx_txn12_k1.try_recv().unwrap_err(), TryRecvError::Empty); + assert_eq!(rx_txn11_k2.try_recv().unwrap_err(), TryRecvError::Empty); + + // Release lock at ts=10 on key1 so that txn 11 will be granted the lock. + must_kv_pessimistic_rollback(&client, ctx.clone(), key1.to_vec(), 10, 10); + let resp = rx_txn11_k1 + .recv_timeout(Duration::from_millis(200)) + .unwrap(); + assert!(resp.region_error.is_none()); + assert!(resp.errors.is_empty()); + assert_eq!(resp.results[0].get_type(), LockResultNormal); + // And then 12 waits for k1 on key1, which forms a deadlock. + let resp = rx_txn12_k1 + .recv_timeout(Duration::from_millis(1000)) + .unwrap(); + assert!(resp.region_error.is_none()); + assert!(resp.errors[0].has_deadlock()); + assert_eq!(resp.results[0].get_type(), LockResultFailed); + // Check correctness of the wait chain. + let wait_chain = resp.errors[0].get_deadlock().get_wait_chain(); + assert_eq!(wait_chain[0].get_txn(), 11); + assert_eq!(wait_chain[0].get_wait_for_txn(), 12); + assert_eq!(wait_chain[0].get_key(), key2); + assert_eq!(wait_chain[1].get_txn(), 12); + assert_eq!(wait_chain[1].get_wait_for_txn(), 11); + assert_eq!(wait_chain[1].get_key(), key1); + + // Clean up. + must_kv_pessimistic_rollback(&client, ctx.clone(), key1.to_vec(), 11, 11); + must_kv_pessimistic_rollback(&client, ctx.clone(), key2.to_vec(), 12, 12); + let resp = rx_txn11_k2 + .recv_timeout(Duration::from_millis(500)) + .unwrap(); + assert!(resp.region_error.is_none()); + assert!(resp.errors.is_empty()); + assert_eq!(resp.results[0].get_type(), LockResultNormal); + must_kv_pessimistic_rollback(&client, ctx, key2.to_vec(), 11, 11); +} From 87b0eafff97155514fc9dbec82001dfb7efb2a1e Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 16 Dec 2022 15:28:52 +0800 Subject: [PATCH 0409/1149] ratstore-v2: strip ts from split keys before propose (#13948) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang --- Cargo.lock | 1 + components/raftstore-v2/Cargo.toml | 1 + .../src/operation/command/admin/split.rs | 100 +++++++++++++++++- 3 files changed, 99 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 244ca504858..11aa05f2140 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4359,6 +4359,7 @@ dependencies = [ "file_system", "fs2", "futures 0.3.15", + "itertools", "keys", "kvproto", "log_wrappers", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 4d3d44ec6fd..d9b1d65aebc 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -41,6 +41,7 @@ fail = "0.5" file_system = { workspace = true } fs2 = "0.4" futures = { version = "0.3", features = ["compat"] } +itertools = "0.10" keys = { workspace = true } kvproto = { workspace = true } log_wrappers = { workspace = true } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 8ca4c7a55f6..7ae0b68a327 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -31,6 +31,7 @@ use collections::HashSet; use crossbeam::channel::SendError; use engine_traits::{Checkpointer, KvEngine, RaftEngine, TabletContext}; use fail::fail_point; +use itertools::Itertools; use kvproto::{ metapb::{self, Region}, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, @@ -39,6 +40,7 @@ use kvproto::{ use protobuf::Message; use raft::{prelude::Snapshot, INVALID_ID}; use raftstore::{ + coprocessor::split_observer::{is_valid_split_key, strip_timestamp_if_exists}, store::{ fsm::apply::validate_batch_split, metrics::PEER_ADMIN_CMD_COUNTER, @@ -48,7 +50,8 @@ use raftstore::{ }, Result, }; -use slog::info; +use slog::{error, info, warn, Logger}; +use tikv_util::box_err; use crate::{ batch::StoreContext, @@ -99,13 +102,66 @@ impl SplitInit { } } +// validate split request and strip ts from split keys if needed +fn pre_propose_split(logger: &Logger, req: &mut AdminRequest, region: &Region) -> Result<()> { + if !req.has_splits() { + return Err(box_err!( + "cmd_type is BatchSplit but it doesn't have splits request, message maybe \ + corrupted!" + .to_owned() + )); + } + + let mut requests: Vec = req.mut_splits().take_requests().into(); + let ajusted_splits = std::mem::take(&mut requests) + .into_iter() + .enumerate() + .filter_map(|(i, mut split)| { + let key = split.take_split_key(); + let key = strip_timestamp_if_exists(key); + if is_valid_split_key(&key, i, region) { + split.split_key = key; + Some(split) + } else { + None + } + }) + .coalesce(|prev, curr| { + // Make sure that the split keys are sorted and unique. + if prev.split_key < curr.split_key { + Err((prev, curr)) + } else { + warn!( + logger, + "skip invalid split key: key should not be larger than the previous."; + "key" => log_wrappers::Value::key(&curr.split_key), + "previous" => log_wrappers::Value::key(&prev.split_key), + ); + Ok(prev) + } + }) + .collect::>(); + + if ajusted_splits.is_empty() { + error!( + logger, + "failed to handle split req, no valid key found for split"; + ); + Err(box_err!("no valid key found for split.".to_owned())) + } else { + // Rewrite the splits. + req.mut_splits().set_requests(ajusted_splits.into()); + Ok(()) + } +} + impl Peer { pub fn propose_split( &mut self, store_ctx: &mut StoreContext, - req: RaftCmdRequest, + mut req: RaftCmdRequest, ) -> Result { - validate_batch_split(req.get_admin_request(), self.region())?; + pre_propose_split(&self.logger, req.mut_admin_request(), self.region())?; // We rely on ConflictChecker to detect conflicts, so no need to set proposal // context. let data = req.write_to_bytes().unwrap(); @@ -494,6 +550,7 @@ mod test { store::{new_learner_peer, new_peer}, worker::dummy_scheduler, }; + use txn_types::Key; use super::*; use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; @@ -597,6 +654,43 @@ mod test { } } + #[test] + fn test_propose() { + let logger = slog_global::borrow_global().new(o!()); + + let mut region = Region::default(); + region.set_end_key(b"k10".to_vec()); + + let mut req = AdminRequest::default(); + let err = pre_propose_split(&logger, &mut req, ®ion).unwrap_err(); + assert!( + err.to_string() + .contains("cmd_type is BatchSplit but it doesn't have splits") + ); + + let mut splits = BatchSplitRequest::default(); + req.set_splits(splits.clone()); + let err = pre_propose_split(&logger, &mut req, ®ion).unwrap_err(); + assert!(err.to_string().contains("no valid key found")); + + splits.mut_requests().push(new_split_req(b"", 0, vec![])); + splits.mut_requests().push(new_split_req(b"k03", 0, vec![])); + splits.mut_requests().push(new_split_req(b"k02", 0, vec![])); + splits.mut_requests().push(new_split_req(b"k11", 0, vec![])); + let split_key = Key::from_raw(b"k06"); + let split_key_with_ts = split_key.clone().append_ts(10.into()); + splits + .mut_requests() + .push(new_split_req(split_key_with_ts.as_encoded(), 0, vec![])); + + req.set_splits(splits); + pre_propose_split(&logger, &mut req, ®ion).unwrap(); + let split_reqs = req.get_splits().get_requests(); + assert_eq!(split_reqs.len(), 2); + assert_eq!(split_reqs[0].get_split_key(), b"k03"); + assert_eq!(split_reqs[1].get_split_key(), split_key.as_encoded()); + } + #[test] fn test_split() { let store_id = 2; From 416f7b7504a2766edb2c7b7b4a5b8c6e24485440 Mon Sep 17 00:00:00 2001 From: Jay Date: Sat, 17 Dec 2022 13:08:53 +0800 Subject: [PATCH 0410/1149] raftstore-v2: introduce apply trace (#13939) ref tikv/tikv#12842 raftstore v2 disables WAL for all tablets and store all states to raft engine. To be able to recover from restart, we need to build some relations between raft engine and tablets flush. In the previous PR, flush indexes are stored in raft engine by `PersistenceListener`. In this PR, ApplyTrace is introduced to anaylze apply index after restart. And it will trigger persistence for more apply progress like split. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.toml | 2 +- components/engine_panic/src/misc.rs | 2 +- components/engine_rocks/src/engine.rs | 14 +- components/engine_rocks/src/event_listener.rs | 160 ++++- components/engine_rocks/src/file_system.rs | 4 +- components/engine_rocks/src/misc.rs | 9 +- components/engine_test/src/lib.rs | 65 +- components/engine_traits/src/cf_defs.rs | 1 + components/engine_traits/src/flush.rs | 187 +++--- components/engine_traits/src/lib.rs | 2 + components/engine_traits/src/misc.rs | 5 +- components/engine_traits/src/tablet.rs | 7 +- components/raftstore-v2/src/bootstrap.rs | 2 +- components/raftstore-v2/src/fsm/apply.rs | 12 +- components/raftstore-v2/src/fsm/peer.rs | 9 + components/raftstore-v2/src/lib.rs | 2 + .../src/operation/command/admin/split.rs | 33 +- .../raftstore-v2/src/operation/command/mod.rs | 96 +-- .../src/operation/command/write/mod.rs | 16 +- components/raftstore-v2/src/operation/mod.rs | 5 +- .../src/operation/ready/apply_trace.rs | 585 ++++++++++++++++++ .../raftstore-v2/src/operation/ready/mod.rs | 31 +- .../src/operation/ready/snapshot.rs | 39 +- components/raftstore-v2/src/raft/apply.rs | 51 +- components/raftstore-v2/src/raft/mod.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 63 +- components/raftstore-v2/src/raft/storage.rs | 179 +----- .../src/router/internal_message.rs | 5 +- components/raftstore-v2/src/router/message.rs | 14 + .../raftstore-v2/tests/failpoints/mod.rs | 1 + .../tests/failpoints/test_trace_apply.rs | 7 + .../tests/integrations/cluster.rs | 69 ++- .../raftstore-v2/tests/integrations/mod.rs | 1 + .../tests/integrations/test_split.rs | 73 +++ .../tests/integrations/test_trace_apply.rs | 211 +++++++ .../raftstore/src/store/async_io/write.rs | 11 + .../raftstore/src/store/compaction_guard.rs | 4 +- .../raftstore/src/store/region_snapshot.rs | 2 +- components/server/src/server.rs | 2 +- src/server/engine_factory.rs | 24 +- src/storage/mod.rs | 6 +- .../failpoints/cases/test_table_properties.rs | 10 +- tests/integrations/raftstore/test_stats.rs | 6 +- .../raftstore/test_update_region_size.rs | 2 +- tests/integrations/storage/test_titan.rs | 6 +- 45 files changed, 1613 insertions(+), 424 deletions(-) create mode 100644 components/raftstore-v2/src/operation/ready/apply_trace.rs create mode 100644 components/raftstore-v2/tests/failpoints/test_trace_apply.rs create mode 100644 components/raftstore-v2/tests/integrations/test_trace_apply.rs diff --git a/Cargo.toml b/Cargo.toml index 66516206dd0..e09b422299e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -338,7 +338,7 @@ pd_client = { path = "components/pd_client" } profiler = { path = "components/profiler" } raft_log_engine = { path = "components/raft_log_engine" } raftstore = { path = "components/raftstore", default-features = false } -raftstore_v2 = { path = "components/raftstore-v2", default-features = false } +raftstore-v2 = { path = "components/raftstore-v2", default-features = false } resolved_ts = { path = "components/resolved_ts" } resource_metering = { path = "components/resource_metering" } security = { path = "components/security" } diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 82012b84ed6..8c983051438 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -5,7 +5,7 @@ use engine_traits::{DeleteStrategy, MiscExt, Range, Result}; use crate::engine::PanicEngine; impl MiscExt for PanicEngine { - fn flush_cfs(&self, wait: bool) -> Result<()> { + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 70f6562e94b..0c37120e7fc 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -2,9 +2,7 @@ use std::{any::Any, sync::Arc}; -use engine_traits::{ - FlushState, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable, -}; +use engine_traits::{IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable}; use rocksdb::{DBIterator, Writable, DB}; use crate::{ @@ -26,7 +24,6 @@ use crate::{ pub struct RocksEngine { db: Arc, support_multi_batch_write: bool, - flush_state: Option>, } impl RocksEngine { @@ -38,7 +35,6 @@ impl RocksEngine { RocksEngine { db: db.clone(), support_multi_batch_write: db.get_db_options().is_enable_multi_batch_write(), - flush_state: None, } } @@ -53,14 +49,6 @@ impl RocksEngine { pub fn support_multi_batch_write(&self) -> bool { self.support_multi_batch_write } - - pub fn set_flush_state(&mut self, flush_state: Arc) { - self.flush_state = Some(flush_state); - } - - pub fn flush_state(&self) -> Option> { - self.flush_state.clone() - } } impl KvEngine for RocksEngine { diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 8bf3035bc55..b940fcb39f3 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{PersistenceListener, RaftEngine}; +use engine_traits::PersistenceListener; use file_system::{get_io_type, set_io_type, IoType}; use regex::Regex; use rocksdb::{ @@ -179,29 +179,40 @@ fn resolve_sst_filename_from_err(err: &str) -> Option { Some(filename) } -pub struct RocksPersistenceListener(PersistenceListener); +pub struct RocksPersistenceListener(PersistenceListener); -impl RocksPersistenceListener { - pub fn new(listener: PersistenceListener) -> RocksPersistenceListener { +impl RocksPersistenceListener { + pub fn new(listener: PersistenceListener) -> RocksPersistenceListener { RocksPersistenceListener(listener) } } -impl rocksdb::EventListener for RocksPersistenceListener { +impl rocksdb::EventListener for RocksPersistenceListener { fn on_memtable_sealed(&self, info: &MemTableInfo) { self.0 - .on_memtable_sealed(info.cf_name().to_string(), info.first_seqno()); + .on_memtable_sealed(info.cf_name().to_string(), info.earliest_seqno()); } fn on_flush_completed(&self, job: &FlushJobInfo) { self.0 - .on_flush_completed(job.cf_name(), job.smallest_seqno()); + .on_flush_completed(job.cf_name(), job.largest_seqno()); } } #[cfg(test)] mod tests { + use std::sync::{ + mpsc::{self, Sender}, + Arc, Mutex, + }; + + use engine_traits::{ + FlushProgress, FlushState, MiscExt, StateStorage, SyncMutable, CF_DEFAULT, DATA_CFS, + }; + use tempfile::Builder; + use super::*; + use crate::{util, RocksCfOptions, RocksDbOptions}; #[test] fn test_resolve_sst_filename() { @@ -209,4 +220,139 @@ mod tests { let filename = resolve_sst_filename_from_err(err).unwrap(); assert_eq!(filename, "/000398.sst"); } + + type Record = (u64, u64, FlushProgress); + + #[derive(Default)] + struct MemStorage { + records: Mutex>, + } + + impl StateStorage for MemStorage { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress) { + self.records + .lock() + .unwrap() + .push((region_id, tablet_index, pr)); + } + } + + struct FlushTrack { + sealed: Mutex>, + block_flush: Arc>, + } + + impl rocksdb::EventListener for FlushTrack { + fn on_memtable_sealed(&self, _: &MemTableInfo) { + let _ = self.sealed.lock().unwrap().send(()); + } + + fn on_flush_begin(&self, _: &FlushJobInfo) { + drop(self.block_flush.lock().unwrap()) + } + } + + #[test] + fn test_persistence_listener() { + let temp_dir = Builder::new() + .prefix("test_persistence_listener") + .tempdir() + .unwrap(); + let (region_id, tablet_index) = (2, 3); + + let storage = Arc::new(MemStorage::default()); + let state = Arc::new(FlushState::default()); + let listener = + PersistenceListener::new(region_id, tablet_index, state.clone(), storage.clone()); + let mut db_opt = RocksDbOptions::default(); + db_opt.add_event_listener(RocksPersistenceListener::new(listener)); + let (tx, rx) = mpsc::channel(); + let block_flush = Arc::new(Mutex::new(())); + db_opt.add_event_listener(FlushTrack { + sealed: Mutex::new(tx), + block_flush: block_flush.clone(), + }); + + let mut cf_opts: Vec<_> = DATA_CFS + .iter() + .map(|cf| (*cf, RocksCfOptions::default())) + .collect(); + cf_opts[0].1.set_max_write_buffer_number(4); + cf_opts[0].1.set_min_write_buffer_number_to_merge(2); + cf_opts[0].1.set_write_buffer_size(1024); + cf_opts[0].1.set_disable_auto_compactions(true); + let db = util::new_engine_opt(temp_dir.path().to_str().unwrap(), db_opt, cf_opts).unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + let sst_count = || { + std::fs::read_dir(temp_dir.path()) + .unwrap() + .filter(|p| { + let p = match p { + Ok(p) => p, + Err(_) => return false, + }; + p.path().extension().map_or(false, |ext| ext == "sst") + }) + .count() + }; + // Although flush is triggered, but there is nothing to flush. + assert_eq!(sst_count(), 0); + assert_eq!(storage.records.lock().unwrap().len(), 0); + + // Flush one key should work. + state.set_applied_index(2); + db.put_cf(CF_DEFAULT, b"k0", b"v0").unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + assert_eq!(sst_count(), 1); + let record = storage.records.lock().unwrap().pop().unwrap(); + assert_eq!(storage.records.lock().unwrap().len(), 0); + assert_eq!(record.0, region_id); + assert_eq!(record.1, tablet_index); + assert_eq!(record.2.applied_index(), 2); + + // When puts and deletes are mixed, the puts may be deleted during flush. + state.set_applied_index(3); + db.put_cf(CF_DEFAULT, b"k0", b"v0").unwrap(); + db.delete_cf(CF_DEFAULT, b"k0").unwrap(); + db.delete_cf(CF_DEFAULT, b"k1").unwrap(); + db.put_cf(CF_DEFAULT, b"k1", b"v1").unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + assert_eq!(sst_count(), 2); + let record = storage.records.lock().unwrap().pop().unwrap(); + assert_eq!(storage.records.lock().unwrap().len(), 0); + assert_eq!(record.0, region_id); + assert_eq!(record.1, tablet_index); + assert_eq!(record.2.applied_index(), 3); + // Detail check of `FlushProgress` will be done in raftstore-v2 tests. + + // Drain all the events. + while rx.try_recv().is_ok() {} + state.set_applied_index(4); + let block = block_flush.lock(); + // Seal twice to trigger flush. Seal third to make a seqno conflict, in + // which case flush largest seqno will be equal to seal earliest seqno. + let mut key_count = 2; + for i in 0..3 { + while rx.try_recv().is_err() { + db.put(format!("k{key_count}").as_bytes(), &[0; 512]) + .unwrap(); + key_count += 1; + } + state.set_applied_index(5 + i); + } + drop(block); + // Memtable is seal before put, so there must be still one KV in memtable. + db.flush_cf(CF_DEFAULT, true).unwrap(); + rx.try_recv().unwrap(); + // There is 2 sst before this round, and then 4 are merged into 2, so there + // should be 4 ssts. + assert_eq!(sst_count(), 4); + let records = storage.records.lock().unwrap(); + // Although it seals 4 times, but only create 2 SSTs, so only 2 records. + assert_eq!(records.len(), 2); + // The indexes of two merged flush state are 4 and 5, so merged value is 5. + assert_eq!(records[0].2.applied_index(), 5); + // The last two flush state is 6 and 7. + assert_eq!(records[1].2.applied_index(), 7); + } } diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index f3211d52d68..5fc0ed7f6e2 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -82,13 +82,13 @@ mod tests { db.put(&data_key(b"a1"), &value).unwrap(); db.put(&data_key(b"a2"), &value).unwrap(); assert_eq!(stats.fetch(IoType::Flush, IoOp::Write), 0); - db.flush_cfs(true /* wait */).unwrap(); + db.flush_cfs(&[], true /* wait */).unwrap(); assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); db.put(&data_key(b"a2"), &value).unwrap(); db.put(&data_key(b"a3"), &value).unwrap(); - db.flush_cfs(true /* wait */).unwrap(); + db.flush_cfs(&[], true /* wait */).unwrap(); assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 482686ffd1a..9ef2ed079b2 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -126,11 +126,16 @@ impl RocksEngine { } impl MiscExt for RocksEngine { - fn flush_cfs(&self, wait: bool) -> Result<()> { + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()> { let mut handles = vec![]; - for cf in self.cf_names() { + for cf in cfs { handles.push(util::get_cf_handle(self.as_inner(), cf)?); } + if handles.is_empty() { + for cf in self.cf_names() { + handles.push(util::get_cf_handle(self.as_inner(), cf)?); + } + } self.as_inner().flush_cfs(&handles, wait).map_err(r2e) } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 16849acd5b8..2d89929a4b2 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -119,9 +119,10 @@ pub mod kv { } impl TabletFactory for TestTabletFactory { - fn open_tablet(&self, _ctx: TabletContext, path: &Path) -> Result { - KvTestEngine::new_kv_engine_opt( + fn open_tablet(&self, ctx: TabletContext, path: &Path) -> Result { + KvTestEngine::new_tablet( path.to_str().unwrap(), + ctx, self.db_opt.clone(), self.cf_opts.clone(), ) @@ -155,7 +156,7 @@ pub mod ctor { use std::sync::Arc; use encryption::DataKeyManager; - use engine_traits::Result; + use engine_traits::{Result, StateStorage, TabletContext}; use file_system::IoRateLimiter; /// Kv engine construction @@ -188,6 +189,14 @@ pub mod ctor { db_opt: DbOptions, cf_opts: Vec<(&str, CfOptions)>, ) -> Result; + + /// Create a new engine specific for multi rocks. + fn new_tablet( + path: &str, + ctx: TabletContext, + db_opt: DbOptions, + cf_opts: Vec<(&str, CfOptions)>, + ) -> Result; } /// Raft engine construction @@ -200,6 +209,7 @@ pub mod ctor { pub struct DbOptions { key_manager: Option>, rate_limiter: Option>, + state_storage: Option>, enable_multi_batch_write: bool, } @@ -212,6 +222,10 @@ pub mod ctor { self.rate_limiter = rate_limiter; } + pub fn set_state_storage(&mut self, state_storage: Arc) { + self.state_storage = Some(state_storage); + } + pub fn set_enable_multi_batch_write(&mut self, enable: bool) { self.enable_multi_batch_write = enable; } @@ -329,6 +343,15 @@ pub mod ctor { ) -> Result { Ok(PanicEngine) } + + fn new_tablet( + _path: &str, + _ctx: engine_traits::TabletContext, + _db_opt: DbOptions, + _cf_opts: Vec<(&str, CfOptions)>, + ) -> Result { + Ok(PanicEngine) + } } impl RaftEngineConstructorExt for engine_panic::PanicEngine { @@ -343,9 +366,11 @@ pub mod ctor { get_env, properties::{MvccPropertiesCollectorFactory, RangePropertiesCollectorFactory}, util::new_engine_opt as rocks_new_engine_opt, - RocksCfOptions, RocksDbOptions, + RocksCfOptions, RocksDbOptions, RocksPersistenceListener, + }; + use engine_traits::{ + CfOptions as _, PersistenceListener, Result, TabletContext, CF_DEFAULT, }; - use engine_traits::{CfOptions as _, Result, CF_DEFAULT}; use super::{ CfOptions, DbOptions, KvEngineConstructorExt, RaftDbOptions, RaftEngineConstructorExt, @@ -376,6 +401,36 @@ pub mod ctor { .collect(); rocks_new_engine_opt(path, rocks_db_opts, rocks_cfs_opts) } + + fn new_tablet( + path: &str, + ctx: TabletContext, + db_opt: DbOptions, + cf_opts: Vec<(&str, CfOptions)>, + ) -> Result { + let mut rocks_db_opts = RocksDbOptions::default(); + let env = get_env(db_opt.key_manager.clone(), db_opt.rate_limiter)?; + rocks_db_opts.set_env(env); + rocks_db_opts.enable_unordered_write(false); + rocks_db_opts.enable_pipelined_write(false); + rocks_db_opts.enable_multi_batch_write(false); + rocks_db_opts.allow_concurrent_memtable_write(false); + if let Some(storage) = db_opt.state_storage + && let Some(flush_state) = ctx.flush_state { + let listener = PersistenceListener::new( + ctx.id, + ctx.suffix.unwrap(), + flush_state, + storage, + ); + rocks_db_opts.add_event_listener(RocksPersistenceListener::new(listener)); + } + let rocks_cfs_opts = cf_opts + .iter() + .map(|(name, opt)| (*name, get_rocks_cf_opts(opt))) + .collect(); + rocks_new_engine_opt(path, rocks_db_opts, rocks_cfs_opts) + } } impl RaftEngineConstructorExt for engine_rocks::RocksEngine { diff --git a/components/engine_traits/src/cf_defs.rs b/components/engine_traits/src/cf_defs.rs index e3fe95ec3b6..1658f49053c 100644 --- a/components/engine_traits/src/cf_defs.rs +++ b/components/engine_traits/src/cf_defs.rs @@ -9,6 +9,7 @@ pub const CF_RAFT: CfName = "raft"; pub const LARGE_CFS: &[CfName] = &[CF_DEFAULT, CF_LOCK, CF_WRITE]; pub const ALL_CFS: &[CfName] = &[CF_DEFAULT, CF_LOCK, CF_WRITE, CF_RAFT]; pub const DATA_CFS: &[CfName] = &[CF_DEFAULT, CF_LOCK, CF_WRITE]; +pub const DATA_CFS_LEN: usize = DATA_CFS.len(); pub fn name_to_cf(name: &str) -> Option { if name.is_empty() { diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 9de5369ab54..cfed95f0426 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -13,36 +13,36 @@ //! be used as the start state. use std::{ - mem, + collections::LinkedList, sync::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, }, }; -use kvproto::raft_serverpb::{RaftApplyState, RegionLocalState}; -use tikv_util::Either; - use crate::{RaftEngine, RaftLogBatch}; #[derive(Debug)] -enum StateChange { - ApplyState(RaftApplyState), - RegionState(RegionLocalState), +pub struct FlushProgress { + cf: String, + apply_index: u64, + earliest_seqno: u64, } -/// States that is related to apply progress. -#[derive(Default, Debug)] -struct StateChanges { - /// apply index, state change - changes: Vec<(u64, StateChange)>, -} +impl FlushProgress { + fn merge(&mut self, pr: FlushProgress) { + debug_assert_eq!(self.cf, pr.cf); + debug_assert!(self.apply_index <= pr.apply_index); + self.apply_index = pr.apply_index; + } -struct FlushProgress { - cf: String, - id: u64, - apply_index: u64, - state_changes: StateChanges, + pub fn applied_index(&self) -> u64 { + self.apply_index + } + + pub fn cf(&self) -> &str { + &self.cf + } } /// A share state between raftstore and underlying engine. @@ -53,7 +53,6 @@ struct FlushProgress { #[derive(Default, Debug)] pub struct FlushState { applied_index: AtomicU64, - changes: Mutex, } impl FlushState { @@ -68,135 +67,113 @@ impl FlushState { pub fn applied_index(&self) -> u64 { self.applied_index.load(Ordering::Acquire) } +} - /// Record an apply state change. - /// - /// This can be triggered by admin command like compact log. General log - /// apply will not trigger the change, instead they are recorded by - /// `set_applied_index`. - #[inline] - pub fn update_apply_state(&self, index: u64, state: RaftApplyState) { - self.changes - .lock() - .unwrap() - .changes - .push((index, StateChange::ApplyState(state))); - } - - /// Record a region state change. - /// - /// This can be triggered by admin command like split/merge. - #[inline] - pub fn update_region_state(&self, index: u64, state: RegionLocalState) { - self.changes - .lock() - .unwrap() - .changes - .push((index, StateChange::RegionState(state))); - } - - /// Check if there is any state change. - #[inline] - pub fn is_empty(&self) -> bool { - self.changes.lock().unwrap().changes.is_empty() - } - - /// Get the last changed state. - #[inline] - pub fn last_state(&self) -> Option<(u64, Either)> { - let changes = self.changes.lock().unwrap(); - let (index, state) = changes.changes.last()?; - let state = match state { - StateChange::ApplyState(state) => Either::Left(state.clone()), - StateChange::RegionState(state) => Either::Right(state.clone()), - }; - Some((*index, state)) - } +/// A helper trait to avoid exposing `RaftEngine` to `TabletFactory`. +pub trait StateStorage: Sync + Send { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress); } /// A flush listener that maps memtable to apply index and persist the relation /// to raft engine. -pub struct PersistenceListener { +pub struct PersistenceListener { region_id: u64, tablet_index: u64, state: Arc, - progress: Mutex>, - raft: ER, + progress: Mutex>, + storage: Arc, } -impl PersistenceListener { - pub fn new(region_id: u64, tablet_index: u64, state: Arc, raft: ER) -> Self { +impl PersistenceListener { + pub fn new( + region_id: u64, + tablet_index: u64, + state: Arc, + storage: Arc, + ) -> Self { Self { region_id, tablet_index, state, - progress: Mutex::new(Vec::new()), - raft, + progress: Mutex::new(LinkedList::new()), + storage, } } } -impl PersistenceListener { +impl PersistenceListener { pub fn flush_state(&self) -> &Arc { &self.state } /// Called when memtable is frozen. /// - /// `id` should be unique between memtables, which is used to identify - /// memtable in the flushed event. - pub fn on_memtable_sealed(&self, cf: String, id: u64) { + /// `earliest_seqno` should be the smallest seqno of the memtable. + pub fn on_memtable_sealed(&self, cf: String, earliest_seqno: u64) { // The correctness relies on the assumption that there will be only one // thread writting to the DB and increasing apply index. - let mut state_changes = self.state.changes.lock().unwrap(); - // Query within lock so it's correct even in manually flush. + // Apply index will be set within DB lock, so it's correct even with manual + // flush. let apply_index = self.state.applied_index.load(Ordering::SeqCst); - let changes = mem::take(&mut *state_changes); - drop(state_changes); - self.progress.lock().unwrap().push(FlushProgress { + self.progress.lock().unwrap().push_back(FlushProgress { cf, - id, apply_index, - state_changes: changes, + earliest_seqno, }); } /// Called a memtable finished flushing. - pub fn on_flush_completed(&self, cf: &str, id: u64) { + /// + /// `largest_seqno` should be the largest seqno of the generated file. + pub fn on_flush_completed(&self, cf: &str, largest_seqno: u64) { // Maybe we should hook the compaction to avoid the file is compacted before // being recorded. let pr = { let mut prs = self.progress.lock().unwrap(); - let pos = prs - .iter() - .position(|pr| pr.cf == cf && pr.id == id) - .unwrap(); - prs.swap_remove(pos) + let mut cursor = prs.cursor_front_mut(); + let mut flushed_pr = None; + while let Some(pr) = cursor.current() { + if pr.cf != cf { + cursor.move_next(); + continue; + } + // Note flushed largest_seqno equals to earliest_seqno of next memtable. + if pr.earliest_seqno < largest_seqno { + match &mut flushed_pr { + None => flushed_pr = cursor.remove_current(), + Some(flushed_pr) => { + flushed_pr.merge(cursor.remove_current().unwrap()); + } + } + continue; + } + break; + } + match flushed_pr { + Some(pr) => pr, + None => panic!("{} not found in {:?}", cf, prs), + } }; - let mut batch = self.raft.log_batch(1); + self.storage + .persist_progress(self.region_id, self.tablet_index, pr); + } +} + +impl StateStorage for R { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress) { + if pr.apply_index == 0 { + return; + } + let mut batch = self.log_batch(1); // TODO: It's possible that flush succeeds but fails to call // `on_flush_completed` before exit. In this case the flushed data will // be replayed again after restarted. To solve the problem, we need to // (1) persist flushed file numbers in `on_flush_begin` and (2) check // the file number in `on_compaction_begin`. After restart, (3) check if the // file exists. If (1) && ((2) || (3)), then we don't need to replay the data. - for (index, change) in pr.state_changes.changes { - match &change { - StateChange::ApplyState(state) => { - batch.put_apply_state(self.region_id, index, state).unwrap(); - } - StateChange::RegionState(state) => { - batch - .put_region_state(self.region_id, index, state) - .unwrap(); - } - } - } - if pr.apply_index != 0 { - batch - .put_flushed_index(self.region_id, cf, self.tablet_index, pr.apply_index) - .unwrap(); - } - self.raft.consume(&mut batch, true).unwrap(); + batch + .put_flushed_index(region_id, &pr.cf, tablet_index, pr.apply_index) + .unwrap(); + self.consume(&mut batch, true).unwrap(); } } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index db95f5621e0..b75c3e7b7c0 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -251,6 +251,8 @@ #![cfg_attr(test, feature(test))] #![feature(min_specialization)] #![feature(assert_matches)] +#![feature(linked_list_cursors)] +#![feature(let_chains)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 18991038ee8..34502634280 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -38,7 +38,10 @@ pub enum DeleteStrategy { } pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { - fn flush_cfs(&self, wait: bool) -> Result<()>; + /// Flush all specified column families at once. + /// + /// If `cfs` is empty, it will try to flush all available column families. + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()>; fn flush_cf(&self, cf: &str, wait: bool) -> Result<()>; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index acecb976f58..46b020cf138 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -13,7 +13,7 @@ use collections::HashMap; use kvproto::metapb::Region; use tikv_util::box_err; -use crate::{Error, Result}; +use crate::{Error, FlushState, Result}; #[derive(Debug)] struct LatestTablet { @@ -91,6 +91,10 @@ pub struct TabletContext { /// Any key that is larger than or equal to this key can be considered /// obsolete. pub end_key: Box<[u8]>, + /// The states to be persisted when flush is triggered. + /// + /// If not set, apply may not be resumed correctly. + pub flush_state: Option>, } impl Debug for TabletContext { @@ -111,6 +115,7 @@ impl TabletContext { suffix, start_key: keys::data_key(region.get_start_key()).into_boxed_slice(), end_key: keys::data_end_key(region.get_end_key()).into_boxed_slice(), + flush_state: None, } } diff --git a/components/raftstore-v2/src/bootstrap.rs b/components/raftstore-v2/src/bootstrap.rs index b505b37a75b..62bc9e4b8c5 100644 --- a/components/raftstore-v2/src/bootstrap.rs +++ b/components/raftstore-v2/src/bootstrap.rs @@ -15,7 +15,7 @@ use raftstore::store::initial_region; use slog::{debug, error, info, warn, Logger}; use tikv_util::{box_err, box_try}; -use crate::{raft::write_initial_states, Result}; +use crate::{operation::write_initial_states, Result}; const MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT: u64 = 60; const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs(3); diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 2065c5d7fd4..07a577e0c35 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -1,10 +1,13 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::{Duration, Instant}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; -use engine_traits::{KvEngine, TabletRegistry}; +use engine_traits::{FlushState, KvEngine, TabletRegistry}; use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use raftstore::store::ReadTask; @@ -16,6 +19,7 @@ use tikv_util::{ }; use crate::{ + operation::DataTrace, raft::Apply, router::{ApplyRes, ApplyTask, PeerMsg}, }; @@ -59,6 +63,8 @@ impl ApplyFsm { res_reporter: R, tablet_registry: TabletRegistry, read_scheduler: Scheduler>, + flush_state: Arc, + log_recovery: Option>, logger: Logger, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); @@ -68,6 +74,8 @@ impl ApplyFsm { res_reporter, tablet_registry, read_scheduler, + flush_state, + log_recovery, logger, ); ( diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 8d497a7e4e5..793e7a340f2 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -254,6 +254,15 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.fsm.peer_mut().on_snapshot_generated(snap_res) } PeerMsg::QueryDebugInfo(ch) => self.fsm.peer_mut().on_query_debug_info(ch), + PeerMsg::DataFlushed { + cf, + tablet_index, + flushed_index, + } => { + self.fsm + .peer_mut() + .on_data_flushed(cf, tablet_index, flushed_index); + } #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index bac66b34acc..9ddb577ab5c 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -24,6 +24,7 @@ #![feature(let_chains)] #![feature(array_windows)] #![feature(div_duration)] +#![feature(box_into_inner)] mod batch; mod bootstrap; @@ -37,4 +38,5 @@ pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; +pub use operation::StateStorage; pub use raftstore::{Error, Result}; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 7ae0b68a327..6255b3ba9b9 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -29,7 +29,7 @@ use std::cmp; use collections::HashSet; use crossbeam::channel::SendError; -use engine_traits::{Checkpointer, KvEngine, RaftEngine, TabletContext}; +use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletContext}; use fail::fail_point; use itertools::Itertools; use kvproto::{ @@ -309,12 +309,16 @@ impl Apply { e ) }); + // Remove the old write batch. + self.write_batch.take(); let reg = self.tablet_registry(); let path = reg.tablet_path(region_id, log_index); - let ctx = TabletContext::new(®ions[derived_index], Some(log_index)); + let mut ctx = TabletContext::new(®ions[derived_index], Some(log_index)); + // Now the tablet is flushed, so all previous states should be persisted. + // Reusing the tablet should not be a problem. + // TODO: Should we avoid flushing for the old tablet? + ctx.flush_state = Some(self.flush_state().clone()); let tablet = reg.tablet_factory().open_tablet(ctx, &path).unwrap(); - // Remove the old write batch. - self.write_batch.take(); self.publish_tablet(tablet); self.region_state_mut() @@ -425,6 +429,11 @@ impl Peer { } } self.split_trace_mut().push((tablet_index, new_ids)); + let region_state = self.storage().region_state().clone(); + self.state_changes_mut() + .put_region_state(region_id, tablet_index, ®ion_state) + .unwrap(); + self.set_has_extra_write(); } pub fn on_split_init( @@ -521,15 +530,21 @@ impl Peer { split_trace.drain(..off); // TODO: save admin_flushed. assert_ne!(admin_flushed, 0); + self.storage_mut() + .apply_trace_mut() + .on_admin_flush(admin_flushed); // Persist admin flushed. - self.set_has_ready(); + self.set_has_extra_write(); } } } #[cfg(test)] mod test { - use std::sync::mpsc::{channel, Receiver, Sender}; + use std::sync::{ + mpsc::{channel, Receiver, Sender}, + Arc, + }; use engine_test::{ ctor::{CfOptions, DbOptions}, @@ -732,6 +747,8 @@ mod test { reporter, reg, read_scheduler, + Arc::default(), + None, logger.clone(), ); @@ -905,14 +922,14 @@ mod test { // Split will create checkpoint tablet, so if there are some writes before // split, they should be flushed immediately. - apply.apply_put(CF_DEFAULT, b"k04", b"v4").unwrap(); + apply.apply_put(CF_DEFAULT, 50, b"k04", b"v4").unwrap(); assert!(!WriteBatch::is_empty(apply.write_batch.as_ref().unwrap())); splits.mut_requests().clear(); splits .mut_requests() .push(new_split_req(b"k05", 70, vec![71, 72, 73])); req.set_splits(splits); - apply.apply_batch_split(&req, 50).unwrap(); + apply.apply_batch_split(&req, 51).unwrap(); assert!(apply.write_batch.is_none()); assert_eq!( apply diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 6daa8f2770c..8d55beca636 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -121,6 +121,8 @@ impl Peer { mailbox, store_ctx.tablet_registry.clone(), read_scheduler, + self.flush_state().clone(), + self.storage().apply_trace().log_recovery(), logger, ); @@ -266,6 +268,7 @@ impl Peer { entry_and_proposals, }; self.apply_scheduler() + .unwrap() .send(ApplyTask::CommittedEntries(apply)); } @@ -280,7 +283,7 @@ impl Peer { return; } - for admin_res in apply_res.admin_result { + for admin_res in Vec::from(apply_res.admin_result) { match admin_res { AdminCmdResult::None => unreachable!(), AdminCmdResult::ConfChange(conf_change) => { @@ -290,7 +293,12 @@ impl Peer { regions, derived_index, tablet_index, - }) => self.on_apply_res_split(ctx, derived_index, tablet_index, regions), + }) => { + self.storage_mut() + .apply_trace_mut() + .on_admin_modify(tablet_index); + self.on_apply_res_split(ctx, derived_index, tablet_index, regions) + } AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(ctx, term), } } @@ -308,6 +316,7 @@ impl Peer { if !is_leader { entry_storage.compact_entry_cache(apply_res.applied_index + 1); } + self.on_data_modified(apply_res.modifications); self.handle_read_on_apply( ctx, apply_res.applied_term, @@ -317,6 +326,17 @@ impl Peer { } } +impl Apply { + #[inline] + fn should_skip(&self, off: usize, index: u64) -> bool { + let log_recovery = self.log_recovery(); + if log_recovery.is_none() { + return false; + } + log_recovery.as_ref().unwrap()[off] >= index + } +} + impl Apply { #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { @@ -357,11 +377,12 @@ impl Apply { #[inline] async fn apply_entry(&mut self, entry: &Entry) -> Result { let mut conf_change = None; + let log_index = entry.get_index(); let req = match entry.get_entry_type() { EntryType::EntryNormal => match SimpleWriteDecoder::new( &self.logger, entry.get_data(), - entry.get_index(), + log_index, entry.get_term(), ) { Ok(decoder) => { @@ -375,16 +396,21 @@ impl Apply { let res = Ok(new_response(decoder.header())); for req in decoder { match req { - SimpleWrite::Put(put) => self.apply_put(put.cf, put.key, put.value)?, + SimpleWrite::Put(put) => { + self.apply_put(put.cf, log_index, put.key, put.value)?; + } SimpleWrite::Delete(delete) => { - self.apply_delete(delete.cf, delete.key)? + self.apply_delete(delete.cf, log_index, delete.key)?; + } + SimpleWrite::DeleteRange(dr) => { + self.apply_delete_range( + dr.cf, + log_index, + dr.start_key, + dr.end_key, + dr.notify_only, + )?; } - SimpleWrite::DeleteRange(dr) => self.apply_delete_range( - dr.cf, - dr.start_key, - dr.end_key, - dr.notify_only, - )?, } } return res; @@ -392,34 +418,18 @@ impl Apply { Err(req) => req, }, EntryType::EntryConfChange => { - let cc: ConfChange = parse_at( - &self.logger, - entry.get_data(), - entry.get_index(), - entry.get_term(), - ); - let req: RaftCmdRequest = parse_at( - &self.logger, - cc.get_context(), - entry.get_index(), - entry.get_term(), - ); + let cc: ConfChange = + parse_at(&self.logger, entry.get_data(), log_index, entry.get_term()); + let req: RaftCmdRequest = + parse_at(&self.logger, cc.get_context(), log_index, entry.get_term()); conf_change = Some(cc.into_v2()); req } EntryType::EntryConfChangeV2 => { - let cc: ConfChangeV2 = parse_at( - &self.logger, - entry.get_data(), - entry.get_index(), - entry.get_term(), - ); - let req: RaftCmdRequest = parse_at( - &self.logger, - cc.get_context(), - entry.get_index(), - entry.get_term(), - ); + let cc: ConfChangeV2 = + parse_at(&self.logger, entry.get_data(), log_index, entry.get_term()); + let req: RaftCmdRequest = + parse_at(&self.logger, cc.get_context(), log_index, entry.get_term()); conf_change = Some(cc); req } @@ -430,8 +440,8 @@ impl Apply { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { AdminCmdType::CompactLog => unimplemented!(), - AdminCmdType::Split => self.apply_split(admin_req, entry.index)?, - AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, entry.index)?, + AdminCmdType::Split => self.apply_split(admin_req, log_index)?, + AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index)?, AdminCmdType::PrepareMerge => unimplemented!(), AdminCmdType::CommitMerge => unimplemented!(), AdminCmdType::RollbackMerge => unimplemented!(), @@ -439,10 +449,10 @@ impl Apply { self.apply_transfer_leader(admin_req, entry.term)? } AdminCmdType::ChangePeer => { - self.apply_conf_change(entry.get_index(), admin_req, conf_change.unwrap())? + self.apply_conf_change(log_index, admin_req, conf_change.unwrap())? } AdminCmdType::ChangePeerV2 => { - self.apply_conf_change_v2(entry.get_index(), admin_req, conf_change.unwrap())? + self.apply_conf_change_v2(log_index, admin_req, conf_change.unwrap())? } AdminCmdType::ComputeHash => unimplemented!(), AdminCmdType::VerifyHash => unimplemented!(), @@ -468,16 +478,17 @@ impl Apply { // backward compatibility. CmdType::Put => { let put = r.get_put(); - self.apply_put(put.get_cf(), put.get_key(), put.get_value())?; + self.apply_put(put.get_cf(), log_index, put.get_key(), put.get_value())?; } CmdType::Delete => { let delete = r.get_delete(); - self.apply_delete(delete.get_cf(), delete.get_key())?; + self.apply_delete(delete.get_cf(), log_index, delete.get_key())?; } CmdType::DeleteRange => { let dr = r.get_delete_range(); self.apply_delete_range( dr.get_cf(), + log_index, dr.get_start_key(), dr.get_end_key(), dr.get_notify_only(), @@ -515,7 +526,8 @@ impl Apply { let (index, term) = self.apply_progress(); apply_res.applied_index = index; apply_res.applied_term = term; - apply_res.admin_result = self.take_admin_result(); + apply_res.admin_result = self.take_admin_result().into_boxed_slice(); + apply_res.modifications = *self.modifications_mut(); self.res_reporter().report(apply_res); } } diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 92f260bad26..c4cc1646963 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -14,6 +14,7 @@ use raftstore::{ use crate::{ batch::StoreContext, + operation::cf_offset, raft::{Apply, Peer}, router::CmdResChannel, }; @@ -109,7 +110,11 @@ impl Peer { impl Apply { #[inline] - pub fn apply_put(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + pub fn apply_put(&mut self, cf: &str, index: u64, key: &[u8], value: &[u8]) -> Result<()> { + let off = cf_offset(cf); + if self.should_skip(off, index) { + return Ok(()); + } util::check_key_in_region(key, self.region_state().get_region())?; // Technically it's OK to remove prefix for raftstore v2. But rocksdb doesn't // support specifying infinite upper bound in various APIs. @@ -140,11 +145,16 @@ impl Apply { fail::fail_point!("APPLY_PUT", |_| Err(raftstore::Error::Other( "aborted by failpoint".into() ))); + self.modifications_mut()[off] = index; Ok(()) } #[inline] - pub fn apply_delete(&mut self, cf: &str, key: &[u8]) -> Result<()> { + pub fn apply_delete(&mut self, cf: &str, index: u64, key: &[u8]) -> Result<()> { + let off = cf_offset(cf); + if self.should_skip(off, index) { + return Ok(()); + } util::check_key_in_region(key, self.region_state().get_region())?; keys::data_key_with_buffer(key, &mut self.key_buffer); let res = if cf.is_empty() || cf == CF_DEFAULT { @@ -165,6 +175,7 @@ impl Apply { e ); }); + self.modifications_mut()[off] = index; Ok(()) } @@ -172,6 +183,7 @@ impl Apply { pub fn apply_delete_range( &mut self, _cf: &str, + _index: u64, _start_key: &[u8], _end_key: &[u8], _notify_only: bool, diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 7df897f2b26..beb47f9a08f 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -10,6 +10,9 @@ pub use command::{ AdminCmdResult, CommittedEntries, ProposalControl, SimpleWriteDecoder, SimpleWriteEncoder, }; pub use life::DestroyProgress; -pub use ready::{AsyncWriter, GenSnapTask, SnapState}; +pub use ready::{ + cf_offset, write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, SnapState, + StateStorage, +}; pub(crate) use self::{command::SplitInit, query::LocalReader}; diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs new file mode 100644 index 00000000000..0b7521f2634 --- /dev/null +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -0,0 +1,585 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! In raftstore v2, WAL is always disabled for tablet. So we need a way to +//! trace what have been persisted what haven't, and recover those missing +//! data when restart. +//! +//! In summary, we trace the persist progress by recording flushed event. +//! Because memtable is flushed one by one, so a flushed memtable must contain +//! all the data within the CF before some certain apply index. So the minimun +//! flushed apply index + 1 of all data CFs is the recovery start point. In +//! some cases, a CF may not have any updates at all for a long time. In some +//! cases, we may still need to recover from smaller index even if flushed +//! index of all data CFs have advanced. So a special flushed index is +//! introduced and stored with raft CF (only using the name, raft CF is +//! dropped). It's the recommended recovery start point. How these two indexes +//! interact with each other can be found in the `ApplyTrace::recover` and +//! `ApplyTrace::maybe_advance_admin_flushed`. +//! +//! The correctness of raft cf index relies on the fact that: +//! - apply is sequential, so if any apply index is updated to apply trace, all +//! modification events before that must be processed. +//! - admin commands that marked by raft cf index must flush all data before +//! being executed. Note this contraint is not just for recovery, but also +//! necessary to guarantee safety of operations like split init or log gc. +//! So data of logs before raft cf index must be applied and flushed to disk. +//! +//! All apply related states are associated with an apply index. During +//! recovery states corresponding to the start index should be used. + +use std::{cmp, sync::Mutex}; + +use engine_traits::{ + FlushProgress, KvEngine, RaftEngine, RaftLogBatch, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, + CF_WRITE, DATA_CFS, DATA_CFS_LEN, +}; +use kvproto::{ + metapb::Region, + raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, +}; +use raftstore::store::{ReadTask, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use slog::Logger; +use tikv_util::{box_err, worker::Scheduler}; + +use crate::{ + raft::{Peer, Storage}, + router::PeerMsg, + Result, StoreRouter, +}; + +/// Write states for the given region. The region is supposed to have all its +/// data persisted and not governed by any raft group before. +pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Result<()> { + let region_id = region.get_id(); + + let mut state = RegionLocalState::default(); + state.set_region(region); + state.set_tablet_index(RAFT_INIT_LOG_INDEX); + wb.put_region_state(region_id, RAFT_INIT_LOG_INDEX, &state)?; + + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(RAFT_INIT_LOG_INDEX); + apply_state + .mut_truncated_state() + .set_index(RAFT_INIT_LOG_INDEX); + apply_state + .mut_truncated_state() + .set_term(RAFT_INIT_LOG_TERM); + wb.put_apply_state(region_id, RAFT_INIT_LOG_INDEX, &apply_state)?; + + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(RAFT_INIT_LOG_INDEX); + raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); + raft_state.mut_hard_state().set_commit(RAFT_INIT_LOG_INDEX); + wb.put_raft_state(region_id, &raft_state)?; + + for cf in ALL_CFS { + wb.put_flushed_index(region_id, cf, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_INDEX)?; + } + + Ok(()) +} + +fn to_static_cf(cf: &str) -> &'static str { + match cf { + CF_DEFAULT => CF_DEFAULT, + CF_RAFT => CF_RAFT, + CF_WRITE => CF_WRITE, + CF_LOCK => CF_LOCK, + _ => unreachable!("unexpected cf: {cf}"), + } +} + +pub struct StateStorage { + raft_engine: ER, + router: Mutex>, +} + +impl StateStorage { + pub fn new(raft_engine: ER, router: StoreRouter) -> Self { + Self { + raft_engine, + router: Mutex::new(router), + } + } +} + +impl engine_traits::StateStorage for StateStorage { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress) { + let cf = to_static_cf(pr.cf()); + let flushed_index = pr.applied_index(); + self.raft_engine + .persist_progress(region_id, tablet_index, pr); + let _ = self.router.lock().unwrap().send( + region_id, + PeerMsg::DataFlushed { + cf, + tablet_index, + flushed_index, + }, + ); + } +} + +/// An alias of frequent use type that each data cf has a u64. +pub type DataTrace = [u64; DATA_CFS_LEN]; + +#[derive(Clone, Copy, Default)] +struct Progress { + flushed: u64, + /// The index of last entry that has modification to the CF. + /// + /// If `flushed` == `last_modified`, then all data in the CF is persisted. + last_modified: u64, +} + +pub fn cf_offset(cf: &str) -> usize { + let cf = if cf.is_empty() { CF_DEFAULT } else { cf }; + DATA_CFS.iter().position(|c| *c == cf).expect(cf) +} + +/// `ApplyTrace` is used to track the indexes of modifications and flushes. +/// +/// It has 3 core functionalities: +/// - recover from stopped state and figure out the correct log replay start +/// point. +/// - trace the admin flushed index and issue persistence once admin operation +/// is considered finished. Note only those admin commands that needs to +/// interact with other peers will be traced. +/// - support query the flushed progress without actually scanning raft engine, +/// which is useful for cleaning up stale flush records. +#[derive(Default)] +pub struct ApplyTrace { + /// The modified indexes and flushed index of each data CF. + data_cfs: Box<[Progress; DATA_CFS_LEN]>, + /// The modified indexes and flushed index of raft CF. + /// + /// raft CF is a virtual CF that only used for recording apply index of + /// certain admin commands (like split/merge). So there is no flush at all. + /// The `flushed` field is advanced when the admin command doesn't need to + /// be replayed after restart. A write should be triggered to persist the + /// record. + admin: Progress, + /// Index that is issued to be written. It may not be truely persisted. + persisted_applied: u64, + /// `true` means the raft cf record should be persisted in next ready. + try_persist: bool, +} + +impl ApplyTrace { + fn recover(region_id: u64, engine: &impl RaftEngine) -> Result<(Self, RegionLocalState)> { + let mut trace = ApplyTrace::default(); + // Get all the recorded apply index from data CFs. + for (off, cf) in DATA_CFS.iter().enumerate() { + // There should be at least one record. + let i = engine.get_flushed_index(region_id, cf)?.unwrap(); + trace.data_cfs[off].flushed = i; + trace.data_cfs[off].last_modified = i; + } + let i = engine.get_flushed_index(region_id, CF_RAFT)?.unwrap(); + // Index of raft CF means all data before that must be persisted. + trace.admin.flushed = i; + trace.admin.last_modified = i; + trace.persisted_applied = i; + let applied_region_state = engine + .get_region_state(region_id, trace.admin.flushed)? + .unwrap(); + Ok((trace, applied_region_state)) + } + + fn on_flush(&mut self, cf: &str, index: u64) { + let off = cf_offset(cf); + // Technically it should always be true. + if index > self.data_cfs[off].flushed { + self.data_cfs[off].flushed = index; + } + } + + fn on_modify(&mut self, cf: &str, index: u64) { + let off = cf_offset(cf); + self.data_cfs[off].last_modified = index; + } + + pub fn on_admin_flush(&mut self, index: u64) { + if index > self.admin.flushed { + self.admin.flushed = index; + self.try_persist = true; + } + } + + pub fn on_admin_modify(&mut self, index: u64) { + self.admin.last_modified = index; + } + + fn persisted_apply_index(&self) -> u64 { + self.admin.flushed + } + + // All events before `mem_index` must be consumed before calling this function. + fn maybe_advance_admin_flushed(&mut self, mem_index: u64) { + if self.admin.flushed < self.admin.last_modified { + return; + } + let min_flushed = self + .data_cfs + .iter() + // Only unflushed CFs are considered. Flushed CF always have uptodate changes + // persisted. + .filter_map(|pr| { + if pr.last_modified != pr.flushed { + Some(pr.flushed) + } else { + None + } + }) + .min(); + // At best effort, we can only advance the index to `mem_index`. + let candidate = cmp::min(mem_index, min_flushed.unwrap_or(u64::MAX)); + if candidate > self.admin.flushed { + self.admin.flushed = candidate; + if candidate > self.persisted_applied + 100 { + self.try_persist = true; + } + } + // TODO: persist admin.flushed every 10 minutes. + } + + /// Get the flushed indexes of all data CF that is needed when recoverying + /// logs. + /// + /// Logs may be replayed from the persisted apply index, but those data may + /// have been flushed in the past, so we need the flushed indexes to decide + /// what logs can be skipped for certain CFs. If all CFs are flushed before + /// the persisted apply index, then there is nothing to skipped, so + /// `None` is returned. + #[inline] + pub fn log_recovery(&self) -> Option> { + let mut flushed_indexes = [0; DATA_CFS_LEN]; + for (off, pr) in self.data_cfs.iter().enumerate() { + flushed_indexes[off] = pr.flushed; + } + for i in flushed_indexes { + if i > self.admin.flushed { + return Some(Box::new(flushed_indexes)); + } + } + None + } + + pub fn reset_snapshot(&mut self, index: u64) { + for pr in self.data_cfs.iter_mut() { + pr.flushed = index; + pr.last_modified = index; + } + self.admin.flushed = index; + self.persisted_applied = index; + self.try_persist = false; + } + + #[inline] + pub fn reset_should_persist(&mut self) { + self.try_persist = false; + } + + #[inline] + pub fn should_persist(&self) -> bool { + self.try_persist + } +} + +impl Storage { + /// Creates a new storage with uninit states. + /// + /// This should only be used for creating new peer from raft message. + pub fn uninit( + store_id: u64, + region: Region, + engine: ER, + read_scheduler: Scheduler>, + logger: &Logger, + ) -> Result { + let mut region_state = RegionLocalState::default(); + region_state.set_region(region); + Self::create( + store_id, + region_state, + RaftLocalState::default(), + RaftApplyState::default(), + engine, + read_scheduler, + false, + ApplyTrace::default(), + logger, + ) + } + + /// Creates a new storage. + /// + /// All metadata should be initialized before calling this method. If the + /// region is destroyed, `None` will be returned. + pub fn new( + region_id: u64, + store_id: u64, + engine: ER, + read_scheduler: Scheduler>, + logger: &Logger, + ) -> Result>> { + // Check latest region state to determine whether the peer is destroyed. + let region_state = match engine.get_region_state(region_id, u64::MAX) { + Ok(Some(s)) => s, + res => { + return Err(box_err!( + "failed to get region state for region {}: {:?}", + region_id, + res + )); + } + }; + + if region_state.get_state() == PeerState::Tombstone { + return Ok(None); + } + + let (trace, region_state) = ApplyTrace::recover(region_id, &engine)?; + + let raft_state = match engine.get_raft_state(region_id) { + Ok(Some(s)) => s, + res => { + return Err(box_err!("failed to get raft state: {:?}", res)); + } + }; + + let applied_index = trace.persisted_apply_index(); + let mut apply_state = match engine.get_apply_state(region_id, applied_index) { + Ok(Some(s)) => s, + res => { + return Err(box_err!("failed to get apply state: {:?}", res)); + } + }; + apply_state.set_applied_index(applied_index); + + Self::create( + store_id, + region_state, + raft_state, + apply_state, + engine, + read_scheduler, + true, + trace, + logger, + ) + .map(Some) + } + + /// Write initial persist trace for uninit peer. + pub fn init_apply_trace(&self, write_task: &mut WriteTask) { + let region_id = self.region().get_id(); + let raft_engine = self.entry_storage().raft_engine(); + let lb = write_task + .extra_write + .ensure_v2(|| raft_engine.log_batch(3)); + lb.put_apply_state(region_id, 0, self.apply_state()) + .unwrap(); + lb.put_region_state(region_id, 0, self.region_state()) + .unwrap(); + for cf in ALL_CFS { + lb.put_flushed_index(region_id, cf, 0, 0).unwrap(); + } + } + + pub fn record_apply_trace(&mut self, write_task: &mut WriteTask) { + let region_id = self.region().get_id(); + let raft_engine = self.entry_storage().raft_engine(); + let tablet_index = self.tablet_index(); + let lb = write_task + .extra_write + .ensure_v2(|| raft_engine.log_batch(1)); + let trace = self.apply_trace_mut(); + lb.put_flushed_index(region_id, CF_RAFT, tablet_index, trace.admin.flushed) + .unwrap(); + trace.try_persist = false; + trace.persisted_applied = trace.admin.flushed; + } +} + +impl Peer { + pub fn on_data_flushed(&mut self, cf: &str, tablet_index: u64, index: u64) { + if tablet_index < self.storage().tablet_index() { + // Stale tablet. + return; + } + let apply_index = self.storage().entry_storage().applied_index(); + let apply_trace = self.storage_mut().apply_trace_mut(); + apply_trace.on_flush(cf, index); + apply_trace.maybe_advance_admin_flushed(apply_index); + } + + pub fn on_data_modified(&mut self, modification: DataTrace) { + let apply_index = self.storage().entry_storage().applied_index(); + let apply_trace = self.storage_mut().apply_trace_mut(); + for (cf, index) in DATA_CFS.iter().zip(modification) { + if index != 0 { + apply_trace.on_modify(cf, index); + } + } + apply_trace.maybe_advance_admin_flushed(apply_index); + } +} + +#[cfg(test)] +mod tests { + use engine_traits::RaftEngineReadOnly; + use kvproto::metapb::Peer; + use tempfile::TempDir; + + use super::*; + + fn new_region() -> Region { + let mut region = Region::default(); + region.set_id(4); + let mut p = Peer::default(); + p.set_id(5); + p.set_store_id(6); + region.mut_peers().push(p); + region.mut_region_epoch().set_version(2); + region.mut_region_epoch().set_conf_ver(4); + region + } + + #[test] + fn test_write_initial_states() { + let region = new_region(); + let path = TempDir::new().unwrap(); + let engine = engine_test::new_temp_engine(&path); + let raft_engine = &engine.raft; + let mut wb = raft_engine.log_batch(10); + write_initial_states(&mut wb, region.clone()).unwrap(); + assert!(!wb.is_empty()); + raft_engine.consume(&mut wb, true).unwrap(); + + let local_state = raft_engine.get_region_state(4, u64::MAX).unwrap().unwrap(); + assert_eq!(local_state.get_state(), PeerState::Normal); + assert_eq!(*local_state.get_region(), region); + assert_eq!(local_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); + assert_eq!( + local_state, + raft_engine + .get_region_state(4, RAFT_INIT_LOG_INDEX) + .unwrap() + .unwrap() + ); + assert_eq!( + None, + raft_engine + .get_region_state(4, RAFT_INIT_LOG_INDEX - 1) + .unwrap() + ); + + let raft_state = raft_engine.get_raft_state(4).unwrap().unwrap(); + assert_eq!(raft_state.get_last_index(), RAFT_INIT_LOG_INDEX); + let hs = raft_state.get_hard_state(); + assert_eq!(hs.get_term(), RAFT_INIT_LOG_TERM); + assert_eq!(hs.get_commit(), RAFT_INIT_LOG_INDEX); + + let apply_state = raft_engine.get_apply_state(4, u64::MAX).unwrap().unwrap(); + assert_eq!(apply_state.get_applied_index(), RAFT_INIT_LOG_INDEX); + let ts = apply_state.get_truncated_state(); + assert_eq!(ts.get_index(), RAFT_INIT_LOG_INDEX); + assert_eq!(ts.get_term(), RAFT_INIT_LOG_TERM); + assert_eq!( + apply_state, + raft_engine + .get_apply_state(4, RAFT_INIT_LOG_INDEX) + .unwrap() + .unwrap() + ); + assert_eq!( + None, + raft_engine + .get_apply_state(4, RAFT_INIT_LOG_INDEX - 1) + .unwrap() + ); + } + + #[test] + fn test_apply_trace() { + let mut trace = ApplyTrace::default(); + assert_eq!(0, trace.persisted_apply_index()); + // If there is no modifications, index should be advanced anyway. + trace.maybe_advance_admin_flushed(2); + assert_eq!(2, trace.persisted_apply_index()); + for cf in DATA_CFS { + trace.on_modify(cf, 3); + } + trace.maybe_advance_admin_flushed(3); + // Modification is not flushed. + assert_eq!(2, trace.persisted_apply_index()); + for cf in DATA_CFS { + trace.on_flush(cf, 3); + } + trace.maybe_advance_admin_flushed(3); + // No admin is recorded, index should be advanced. + assert_eq!(3, trace.persisted_apply_index()); + trace.on_admin_modify(4); + for cf in DATA_CFS { + trace.on_flush(cf, 4); + } + for cf in DATA_CFS { + trace.on_modify(cf, 4); + } + trace.maybe_advance_admin_flushed(4); + // Unflushed admin modification should hold index. + assert_eq!(3, trace.persisted_apply_index()); + trace.on_admin_flush(4); + trace.maybe_advance_admin_flushed(4); + // Admin is flushed, index should be advanced. + assert_eq!(4, trace.persisted_apply_index()); + for cf in DATA_CFS { + trace.on_flush(cf, 5); + } + trace.maybe_advance_admin_flushed(4); + // Though all data CFs are flushed, but index should not be + // advanced as we don't know whether there is admin modification. + assert_eq!(4, trace.persisted_apply_index()); + for cf in DATA_CFS { + trace.on_modify(cf, 5); + } + trace.maybe_advance_admin_flushed(5); + // Because modify is recorded, so we know there should be no admin + // modification and index can be advanced. + assert_eq!(5, trace.persisted_apply_index()); + } + + #[test] + fn test_advance_admin_flushed() { + let cases = &[ + // When all are flushed, admin index should be advanced to latest. + ([(2, 2), (3, 3), (5, 5)], (3, 3), 5, 5), + ([(2, 2), (3, 3), (5, 5)], (5, 3), 6, 6), + // Any unflushed result should block advancing. + ([(2, 3), (3, 3), (5, 5)], (2, 2), 5, 2), + ([(2, 4), (3, 4), (5, 6)], (2, 2), 6, 2), + // But it should not make index go back. + ([(2, 4), (3, 4), (5, 6)], (3, 3), 6, 3), + // Unflush admin should not be advanced. + ([(2, 2), (3, 3), (5, 5)], (2, 3), 5, 2), + // Flushed may race with modification. + ([(2, 2), (3, 3), (6, 5)], (2, 2), 5, 5), + ([(8, 2), (9, 3), (7, 5)], (4, 4), 5, 5), + ([(8, 2), (9, 3), (7, 5)], (5, 5), 5, 5), + ([(2, 3), (9, 3), (7, 5)], (2, 2), 5, 2), + ]; + for (case, (data_cfs, admin, mem_index, exp)) in cases.iter().enumerate() { + let mut trace = ApplyTrace::default(); + for (i, (flushed, modified)) in data_cfs.iter().enumerate() { + trace.data_cfs[i].flushed = *flushed; + trace.data_cfs[i].last_modified = *modified; + } + trace.admin.flushed = admin.0; + trace.admin.last_modified = admin.1; + trace.maybe_advance_admin_flushed(*mem_index); + assert_eq!(trace.admin.flushed, *exp, "{case}"); + } + } +} diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 47f6523cc82..a6df9049285 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -17,12 +17,14 @@ //! //! There two steps can be processed concurrently. +mod apply_trace; mod async_writer; mod snapshot; use std::{cmp, time::Instant}; -use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +pub use apply_trace::{cf_offset, write_initial_states, ApplyTrace, DataTrace, StateStorage}; +use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; @@ -269,6 +271,7 @@ impl Peer { #[inline] pub fn handle_raft_ready(&mut self, ctx: &mut StoreContext) { let has_ready = self.reset_has_ready(); + let has_extra_write = self.reset_has_extra_write(); if !has_ready || self.destroy_progress().started() { #[cfg(feature = "testexport")] self.async_writer.notify_flush(); @@ -276,7 +279,10 @@ impl Peer { } ctx.has_ready = true; - if !self.raft_group().has_ready() && (self.serving() || self.postponed_destroy()) { + if !has_extra_write + && !self.raft_group().has_ready() + && (self.serving() || self.postponed_destroy()) + { #[cfg(feature = "testexport")] self.async_writer.notify_flush(); return; @@ -328,11 +334,14 @@ impl Peer { // Always sending snapshot task after apply task, so it gets latest // snapshot. if let Some(gen_task) = self.storage_mut().take_gen_snap_task() { - self.apply_scheduler().send(ApplyTask::Snapshot(gen_task)); + self.apply_scheduler() + .unwrap() + .send(ApplyTask::Snapshot(gen_task)); } let ready_number = ready.number(); let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); + self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); if !ready.persisted_messages().is_empty() { @@ -554,17 +563,13 @@ impl Storage { if !ever_persisted || prev_raft_state != *entry_storage.raft_state() { write_task.raft_state = Some(entry_storage.raft_state().clone()); } - if !ever_persisted { - let region_id = self.region().get_id(); - let raft_engine = self.entry_storage().raft_engine(); - let lb = write_task - .extra_write - .ensure_v2(|| raft_engine.log_batch(3)); - lb.put_apply_state(region_id, 0, self.apply_state()) - .unwrap(); - lb.put_region_state(region_id, 0, self.region_state()) - .unwrap(); + // If snapshot initializes the peer, we don't need to write apply trace again. + if !self.ever_persisted() { + self.init_apply_trace(write_task); self.set_ever_persisted(); } + if self.apply_trace().should_persist() { + self.record_apply_trace(write_task); + } } } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 86817ab17d3..1f4a1fee268 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -27,7 +27,7 @@ use std::{ }, }; -use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; use raft::eraftpb::Snapshot; @@ -121,7 +121,11 @@ impl Peer { let first_index = self.storage().entry_storage().first_index(); if first_index == persisted_index + 1 { let region_id = self.region_id(); - let tablet_ctx = TabletContext::new(self.region(), Some(persisted_index)); + self.reset_flush_state(); + let flush_state = self.flush_state().clone(); + let mut tablet_ctx = TabletContext::new(self.region(), Some(persisted_index)); + // Use a new FlushState to avoid conflicts with the old one. + tablet_ctx.flush_state = Some(flush_state); ctx.tablet_registry.load(tablet_ctx, false).unwrap(); self.schedule_apply_fsm(ctx); self.storage_mut().on_applied_snapshot(); @@ -353,7 +357,7 @@ impl Storage { let index = entry.truncated_index(); entry.set_applied_term(term); entry.apply_state_mut().set_applied_index(index); - self.region_state_mut().set_tablet_index(index); + self.apply_trace_mut().reset_snapshot(index); } pub fn apply_snapshot( @@ -383,14 +387,27 @@ impl Storage { let last_index = snap.get_metadata().get_index(); let last_term = snap.get_metadata().get_term(); - self.region_state_mut().set_state(PeerState::Normal); - self.region_state_mut().set_region(region); - self.entry_storage_mut() - .raft_state_mut() - .set_last_index(last_index); - self.entry_storage_mut().set_truncated_index(last_index); - self.entry_storage_mut().set_truncated_term(last_term); - self.entry_storage_mut().set_last_term(last_term); + let region_state = self.region_state_mut(); + region_state.set_state(PeerState::Normal); + region_state.set_region(region); + region_state.set_tablet_index(last_index); + let entry_storage = self.entry_storage_mut(); + entry_storage.raft_state_mut().set_last_index(last_index); + entry_storage.set_truncated_index(last_index); + entry_storage.set_truncated_term(last_term); + entry_storage.set_last_term(last_term); + + self.apply_trace_mut().reset_should_persist(); + self.set_ever_persisted(); + let lb = task + .extra_write + .ensure_v2(|| self.entry_storage().raft_engine().log_batch(3)); + lb.put_apply_state(region_id, last_index, self.apply_state()) + .unwrap(); + lb.put_region_state(region_id, last_index, self.region_state()) + .unwrap(); + lb.put_flushed_index(region_id, CF_RAFT, last_index, last_index) + .unwrap(); let (path, clean_split) = match self.split_init_mut() { // If index not match, the peer may accept a newer snapshot after split. diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 30ced7bdbd7..56379f2a15f 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -1,14 +1,17 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::mem; +use std::{mem, sync::Arc}; -use engine_traits::{CachedTablet, KvEngine, TabletRegistry, WriteBatch}; +use engine_traits::{CachedTablet, FlushState, KvEngine, TabletRegistry, WriteBatch, DATA_CFS_LEN}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; use slog::Logger; use tikv_util::worker::Scheduler; -use crate::{operation::AdminCmdResult, router::CmdResChannel}; +use crate::{ + operation::{AdminCmdResult, DataTrace}, + router::CmdResChannel, +}; /// Apply applies all the committed commands to kv db. pub struct Apply { @@ -27,9 +30,17 @@ pub struct Apply { /// A flag indicates whether the peer is destroyed by applying admin /// command. tombstone: bool, - applied_index: u64, applied_term: u64, + /// The largest index that have modified each column family. + modifications: DataTrace, admin_cmd_result: Vec, + flush_state: Arc, + /// The flushed indexes of each column family before being restarted. + /// + /// If an apply index is less than the flushed index, the log can be + /// skipped. `None` means logs should apply to all required column + /// families. + log_recovery: Option>, region_state: RegionLocalState, @@ -46,6 +57,8 @@ impl Apply { res_reporter: R, tablet_registry: TabletRegistry, read_scheduler: Scheduler>, + flush_state: Arc, + log_recovery: Option>, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry @@ -58,14 +71,16 @@ impl Apply { write_batch: None, callbacks: vec![], tombstone: false, - applied_index: 0, applied_term: 0, + modifications: [0; DATA_CFS_LEN], admin_cmd_result: vec![], region_state, tablet_registry, read_scheduler, key_buffer: vec![], res_reporter, + flush_state, + log_recovery, logger, } } @@ -95,13 +110,20 @@ impl Apply { #[inline] pub fn set_apply_progress(&mut self, index: u64, term: u64) { - self.applied_index = index; + self.flush_state.set_applied_index(index); self.applied_term = term; + if self.log_recovery.is_none() { + return; + } + let log_recovery = self.log_recovery.as_ref().unwrap(); + if log_recovery.iter().all(|v| index >= *v) { + self.log_recovery.take(); + } } #[inline] pub fn apply_progress(&self) -> (u64, u64) { - (self.applied_index, self.applied_term) + (self.flush_state.applied_index(), self.applied_term) } #[inline] @@ -171,4 +193,19 @@ impl Apply { self.write_batch = None; } } + + #[inline] + pub fn modifications_mut(&mut self) -> &mut DataTrace { + &mut self.modifications + } + + #[inline] + pub fn flush_state(&self) -> &Arc { + &self.flush_state + } + + #[inline] + pub fn log_recovery(&self) -> &Option> { + &self.log_recovery + } } diff --git a/components/raftstore-v2/src/raft/mod.rs b/components/raftstore-v2/src/raft/mod.rs index c1d6a522d79..495d7ad87ed 100644 --- a/components/raftstore-v2/src/raft/mod.rs +++ b/components/raftstore-v2/src/raft/mod.rs @@ -6,4 +6,4 @@ mod storage; pub use apply::Apply; pub use peer::Peer; -pub use storage::{write_initial_states, Storage}; +pub use storage::Storage; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 21795eb3293..82e9b6011ca 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -8,14 +8,16 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::atomic::AtomicCell; -use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletContext, TabletRegistry}; +use engine_traits::{ + CachedTablet, FlushState, KvEngine, RaftEngine, TabletContext, TabletRegistry, +}; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; use raftstore::store::{ util::{Lease, RegionReadProgress}, Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, - ReadProgress, TxnExt, + ReadProgress, TxnExt, WriteTask, }; use slog::Logger; @@ -53,6 +55,8 @@ pub struct Peer { /// Set to true if any side effect needs to be handled. has_ready: bool, + /// Sometimes there is no ready at all, but we need to trigger async write. + has_extra_write: bool, /// Writer for persisting side effects asynchronously. pub(crate) async_writer: AsyncWriter, @@ -78,6 +82,13 @@ pub struct Peer { // Trace which peers have not finished split. split_trace: Vec<(u64, HashSet)>, + + /// Apply related State changes that needs to be persisted to raft engine. + /// + /// To make recovery correct, we need to persist all state changes before + /// advancing apply index. + state_changes: Option>, + flush_state: Arc, } impl Peer { @@ -102,11 +113,13 @@ impl Peer { let region = raft_group.store().region_state().get_region().clone(); let cached_tablet = tablet_registry.get_or_default(region_id); + let flush_state: Arc = Arc::default(); // We can't create tablet if tablet index is 0. It can introduce race when gc // old tablet and create new peer. We also can't get the correct range of the // region, which is required for kv data gc. if tablet_index != 0 { - let ctx = TabletContext::new(®ion, Some(tablet_index)); + let mut ctx = TabletContext::new(®ion, Some(tablet_index)); + ctx.flush_state = Some(flush_state.clone()); // TODO: Perhaps we should stop create the tablet automatically. tablet_registry.load(ctx, false)?; } @@ -122,6 +135,7 @@ impl Peer { async_writer: AsyncWriter::new(region_id, peer_id), apply_scheduler: None, has_ready: false, + has_extra_write: false, destroy_progress: DestroyProgress::None, raft_group, logger, @@ -143,6 +157,8 @@ impl Peer { proposal_control: ProposalControl::new(0), pending_ticks: Vec::new(), split_trace: vec![], + state_changes: None, + flush_state, }; // If this region has only one peer and I am the one, campaign directly. @@ -334,6 +350,17 @@ impl Peer { mem::take(&mut self.has_ready) } + #[inline] + pub fn set_has_extra_write(&mut self) { + self.set_has_ready(); + self.has_extra_write = true; + } + + #[inline] + pub fn reset_has_extra_write(&mut self) -> bool { + mem::take(&mut self.has_extra_write) + } + #[inline] pub fn insert_peer_cache(&mut self, peer: metapb::Peer) { for p in self.raft_group.store().region().get_peers() { @@ -499,8 +526,8 @@ impl Peer { &self.proposals } - pub fn apply_scheduler(&self) -> &ApplyScheduler { - self.apply_scheduler.as_ref().unwrap() + pub fn apply_scheduler(&self) -> Option<&ApplyScheduler> { + self.apply_scheduler.as_ref() } #[inline] @@ -631,4 +658,30 @@ impl Peer { pub fn split_trace_mut(&mut self) -> &mut Vec<(u64, HashSet)> { &mut self.split_trace } + + #[inline] + pub fn flush_state(&self) -> &Arc { + &self.flush_state + } + + pub fn reset_flush_state(&mut self) { + self.flush_state = Arc::default(); + } + + #[inline] + pub fn state_changes_mut(&mut self) -> &mut ER::LogBatch { + if self.state_changes.is_none() { + self.state_changes = Some(Box::new(self.entry_storage().raft_engine().log_batch(0))); + } + self.state_changes.as_mut().unwrap() + } + + #[inline] + pub fn merge_state_changes_to(&mut self, task: &mut WriteTask) { + if self.state_changes.is_none() { + return; + } + task.extra_write + .merge_v2(Box::into_inner(self.state_changes.take().unwrap())); + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 889674c514c..de58d39cce5 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -5,51 +5,24 @@ use std::{ fmt::{self, Debug, Formatter}, }; -use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ - metapb::{self, Region}, + metapb, raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, }; use raft::{ eraftpb::{ConfState, Entry, Snapshot}, GetEntriesContext, RaftState, INVALID_ID, }; -use raftstore::store::{util, EntryStorage, ReadTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use raftstore::store::{util, EntryStorage, ReadTask}; use slog::{o, Logger}; use tikv_util::{box_err, store::find_peer, worker::Scheduler}; use crate::{ - operation::{GenSnapTask, SnapState, SplitInit}, + operation::{ApplyTrace, GenSnapTask, SnapState, SplitInit}, Result, }; -pub fn write_initial_states(wb: &mut impl RaftLogBatch, region: Region) -> Result<()> { - let region_id = region.get_id(); - - let mut state = RegionLocalState::default(); - state.set_region(region); - state.set_tablet_index(RAFT_INIT_LOG_INDEX); - wb.put_region_state(region_id, 0, &state)?; - - let mut apply_state = RaftApplyState::default(); - apply_state.set_applied_index(RAFT_INIT_LOG_INDEX); - apply_state - .mut_truncated_state() - .set_index(RAFT_INIT_LOG_INDEX); - apply_state - .mut_truncated_state() - .set_term(RAFT_INIT_LOG_TERM); - wb.put_apply_state(region_id, 0, &apply_state)?; - - let mut raft_state = RaftLocalState::default(); - raft_state.set_last_index(RAFT_INIT_LOG_INDEX); - raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); - raft_state.mut_hard_state().set_commit(RAFT_INIT_LOG_INDEX); - wb.put_raft_state(region_id, &raft_state)?; - - Ok(()) -} - /// A storage for raft. /// /// It's similar to `PeerStorage` in v1. @@ -67,6 +40,8 @@ pub struct Storage { snap_state: RefCell, gen_snap_task: RefCell>>, split_init: Option>, + /// The flushed index of all CFs. + apply_trace: ApplyTrace, } impl Debug for Storage { @@ -120,87 +95,20 @@ impl Storage { pub fn gen_snap_task_mut(&self) -> RefMut<'_, Box>> { self.gen_snap_task.borrow_mut() } -} -impl Storage { - /// Creates a new storage with uninit states. - /// - /// This should only be used for creating new peer from raft message. - pub fn uninit( - store_id: u64, - region: Region, - engine: ER, - read_scheduler: Scheduler>, - logger: &Logger, - ) -> Result { - let mut region_state = RegionLocalState::default(); - region_state.set_region(region); - Self::create( - store_id, - region_state, - RaftLocalState::default(), - RaftApplyState::default(), - engine, - read_scheduler, - false, - logger, - ) + #[inline] + pub fn apply_trace_mut(&mut self) -> &mut ApplyTrace { + &mut self.apply_trace } - /// Creates a new storage. - /// - /// All metadata should be initialized before calling this method. If the - /// region is destroyed, `None` will be returned. - pub fn new( - region_id: u64, - store_id: u64, - engine: ER, - read_scheduler: Scheduler>, - logger: &Logger, - ) -> Result>> { - let region_state = match engine.get_region_state(region_id, u64::MAX) { - Ok(Some(s)) => s, - res => { - return Err(box_err!( - "failed to get region state for region {}: {:?}", - region_id, - res - )); - } - }; - - if region_state.get_state() == PeerState::Tombstone { - return Ok(None); - } - - let raft_state = match engine.get_raft_state(region_id) { - Ok(Some(s)) => s, - res => { - return Err(box_err!("failed to get raft state: {:?}", res)); - } - }; - - let apply_state = match engine.get_apply_state(region_id, u64::MAX) { - Ok(Some(s)) => s, - res => { - return Err(box_err!("failed to get apply state: {:?}", res)); - } - }; - - Self::create( - store_id, - region_state, - raft_state, - apply_state, - engine, - read_scheduler, - true, - logger, - ) - .map(Some) + #[inline] + pub fn apply_trace(&self) -> &ApplyTrace { + &self.apply_trace } +} - fn create( +impl Storage { + pub(crate) fn create( store_id: u64, region_state: RegionLocalState, raft_state: RaftLocalState, @@ -208,6 +116,7 @@ impl Storage { engine: ER, read_scheduler: Scheduler>, persisted: bool, + apply_trace: ApplyTrace, logger: &Logger, ) -> Result { let peer = find_peer(region_state.get_region(), store_id); @@ -237,6 +146,7 @@ impl Storage { snap_state: RefCell::new(SnapState::Relax), gen_snap_task: RefCell::new(Box::new(None)), split_init: None, + apply_trace, }) } @@ -265,6 +175,9 @@ impl Storage { self.entry_storage.apply_state() } + /// Check if the storage is initialized. + /// + /// The storage is considered initialized when data is applied in memory. #[inline] pub fn is_initialized(&self) -> bool { self.region_state.get_tablet_index() != 0 @@ -363,7 +276,10 @@ impl raft::Storage for Storage { #[cfg(test)] mod tests { use std::{ - sync::mpsc::{sync_channel, Receiver, SyncSender}, + sync::{ + mpsc::{sync_channel, Receiver, SyncSender}, + Arc, + }, time::Duration, }; @@ -371,9 +287,7 @@ mod tests { ctor::{CfOptions, DbOptions}, kv::TestTabletFactory, }; - use engine_traits::{ - RaftEngine, RaftEngineReadOnly, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS, - }; + use engine_traits::{RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS}; use kvproto::{ metapb::{Peer, Region}, raft_serverpb::PeerState, @@ -381,14 +295,16 @@ mod tests { use raft::{Error as RaftError, StorageError}; use raftstore::store::{ util::new_empty_snapshot, AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, - TabletSnapKey, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + TabletSnapKey, TabletSnapManager, WriteTask, }; use slog::o; use tempfile::TempDir; use tikv_util::worker::Worker; use super::*; - use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; + use crate::{ + fsm::ApplyResReporter, operation::write_initial_states, raft::Apply, router::ApplyRes, + }; #[derive(Clone)] pub struct TestRouter { @@ -428,35 +344,6 @@ mod tests { region } - #[test] - fn test_write_initial_states() { - let region = new_region(); - let path = TempDir::new().unwrap(); - let engine = engine_test::new_temp_engine(&path); - let raft_engine = &engine.raft; - let mut wb = raft_engine.log_batch(10); - write_initial_states(&mut wb, region.clone()).unwrap(); - assert!(!wb.is_empty()); - raft_engine.consume(&mut wb, true).unwrap(); - - let local_state = raft_engine.get_region_state(4, 0).unwrap().unwrap(); - assert_eq!(local_state.get_state(), PeerState::Normal); - assert_eq!(*local_state.get_region(), region); - assert_eq!(local_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); - - let raft_state = raft_engine.get_raft_state(4).unwrap().unwrap(); - assert_eq!(raft_state.get_last_index(), RAFT_INIT_LOG_INDEX); - let hs = raft_state.get_hard_state(); - assert_eq!(hs.get_term(), RAFT_INIT_LOG_TERM); - assert_eq!(hs.get_commit(), RAFT_INIT_LOG_INDEX); - - let apply_state = raft_engine.get_apply_state(4, u64::MAX).unwrap().unwrap(); - assert_eq!(apply_state.get_applied_index(), RAFT_INIT_LOG_INDEX); - let ts = apply_state.get_truncated_state(); - assert_eq!(ts.get_index(), RAFT_INIT_LOG_INDEX); - assert_eq!(ts.get_term(), RAFT_INIT_LOG_TERM); - } - #[test] fn test_apply_snapshot() { let region = new_region(); @@ -474,7 +361,7 @@ mod tests { let ops = DbOptions::default(); let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); - let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); + let reg = TabletRegistry::new(factory, path.path().join("tablets")).unwrap(); let worker = Worker::new("test-read-worker").lazy_build("test-read-worker"); let sched = worker.scheduler(); let logger = slog_global::borrow_global().new(o!()); @@ -495,7 +382,7 @@ mod tests { // This index can't be set before load tablet. assert_ne!(10, s.entry_storage().applied_index()); assert_ne!(1, s.entry_storage().applied_term()); - assert_ne!(10, s.region_state().get_tablet_index()); + assert_eq!(10, s.region_state().get_tablet_index()); assert!(task.persisted_cb.is_some()); s.on_applied_snapshot(); @@ -521,7 +408,7 @@ mod tests { let ops = DbOptions::default(); let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); - let reg = TabletRegistry::new(factory, path.path().join("tablet")).unwrap(); + let reg = TabletRegistry::new(factory, path.path().join("tablets")).unwrap(); let tablet_ctx = TabletContext::new(®ion, Some(10)); reg.load(tablet_ctx, true).unwrap(); // setup read runner worker and peer storage @@ -544,6 +431,8 @@ mod tests { router, reg, sched, + Arc::default(), + None, logger, ); diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 224723bf4ad..67f2dec6160 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use crate::operation::{AdminCmdResult, CommittedEntries, GenSnapTask}; +use crate::operation::{AdminCmdResult, CommittedEntries, DataTrace, GenSnapTask}; #[derive(Debug)] pub enum ApplyTask { @@ -12,5 +12,6 @@ pub enum ApplyTask { pub struct ApplyRes { pub applied_index: u64, pub applied_term: u64, - pub admin_result: Vec, + pub admin_result: Box<[AdminCmdResult]>, + pub modifications: DataTrace, } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 447efe8ee1a..b387e729f8d 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -139,6 +139,11 @@ pub enum PeerMsg { ready_number: u64, }, QueryDebugInfo(DebugInfoChannel), + DataFlushed { + cf: &'static str, + tablet_index: u64, + flushed_index: u64, + }, /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), @@ -191,6 +196,15 @@ impl fmt::Debug for PeerMsg { PeerMsg::LogsFetched(fetched) => write!(fmt, "LogsFetched {:?}", fetched), PeerMsg::SnapshotGenerated(_) => write!(fmt, "SnapshotGenerated"), PeerMsg::QueryDebugInfo(_) => write!(fmt, "QueryDebugInfo"), + PeerMsg::DataFlushed { + cf, + tablet_index, + flushed_index, + } => write!( + fmt, + "DataFlushed cf {}, tablet_index {}, flushed_index {}", + cf, tablet_index, flushed_index + ), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(_) => write!(fmt, "FlushMessages"), } diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index 26403f2f0a3..84f1de2803d 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -10,3 +10,4 @@ mod cluster; mod test_basic_write; mod test_bootstrap; +mod test_trace_apply; diff --git a/components/raftstore-v2/tests/failpoints/test_trace_apply.rs b/components/raftstore-v2/tests/failpoints/test_trace_apply.rs new file mode 100644 index 00000000000..15bf39d17ba --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_trace_apply.rs @@ -0,0 +1,7 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +// TODO: check if it can recover from: +// - split not start +// - split not finish +// - two pending split the second one finished before the first one +// - all split finish diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index b09f351b066..55ad823b99d 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -36,7 +36,7 @@ use raftstore::store::{ use raftstore_v2::{ create_store_batch_system, router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, - Bootstrap, StoreSystem, + Bootstrap, StateStorage, StoreSystem, }; use slog::{debug, o, Logger}; use tempfile::TempDir; @@ -59,6 +59,13 @@ pub fn check_skip_wal(path: &str) { assert!(found, "no WAL found in {}", path); } +pub fn new_put_request(key: impl Into>, value: impl Into>) -> Request { + let mut req = Request::default(); + req.set_cmd_type(CmdType::Put); + req.mut_put().set_key(key.into()); + req.mut_put().set_value(value.into()); + req +} pub struct TestRouter(RaftRouter); impl Deref for TestRouter { @@ -209,6 +216,7 @@ pub struct RunningState { pub system: StoreSystem, pub cfg: Arc>, pub transport: TestTransport, + snap_mgr: TabletSnapManager, } impl RunningState { @@ -220,21 +228,33 @@ impl RunningState { concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, logger: &Logger, - ) -> (TestRouter, TabletSnapManager, Self) { - let cf_opts = DATA_CFS - .iter() - .copied() - .map(|cf| (cf, CfOptions::default())) - .collect(); - let factory = Box::new(TestTabletFactory::new(DbOptions::default(), cf_opts)); - let registry = TabletRegistry::new(factory, path).unwrap(); + ) -> (TestRouter, Self) { let raft_engine = engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) .unwrap(); + let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client.as_ref(), logger.clone()); let store_id = bootstrap.bootstrap_store().unwrap(); let mut store = Store::default(); store.set_id(store_id); + + let (router, mut system) = create_store_batch_system::( + &cfg.value(), + store_id, + logger.clone(), + ); + let cf_opts = DATA_CFS + .iter() + .copied() + .map(|cf| (cf, CfOptions::default())) + .collect(); + let mut db_opt = DbOptions::default(); + db_opt.set_state_storage(Arc::new(StateStorage::new( + raft_engine.clone(), + router.clone(), + ))); + let factory = Box::new(TestTabletFactory::new(db_opt, cf_opts)); + let registry = TabletRegistry::new(factory, path.join("tablets")).unwrap(); if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { let factory = registry.tablet_factory(); let path = registry.tablet_path(region.get_id(), RAFT_INIT_LOG_INDEX); @@ -247,12 +267,6 @@ impl RunningState { factory.open_tablet(ctx, &path).unwrap(); } - let (router, mut system) = create_store_batch_system::( - &cfg.value(), - store_id, - logger.clone(), - ); - let router = RaftRouter::new(store_id, registry.clone(), router); let store_meta = router.store_meta().clone(); let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()); @@ -280,8 +294,9 @@ impl RunningState { system, cfg, transport, + snap_mgr, }; - (TestRouter(router), snap_mgr, state) + (TestRouter(router), state) } } @@ -296,7 +311,6 @@ pub struct TestNode { path: TempDir, running_state: Option, logger: Logger, - snap_mgr: Option, } impl TestNode { @@ -308,12 +322,11 @@ impl TestNode { path, running_state: None, logger, - snap_mgr: None, } } fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { - let (router, snap_mgr, state) = RunningState::new( + let (router, state) = RunningState::new( &self.pd_client, self.path.path(), cfg, @@ -323,7 +336,6 @@ impl TestNode { &self.logger, ); self.running_state = Some(state); - self.snap_mgr = Some(snap_mgr); router } @@ -352,10 +364,6 @@ impl TestNode { self.running_state.as_ref() } - pub fn snap_mgr(&self) -> Option<&TabletSnapManager> { - self.snap_mgr.as_ref() - } - pub fn id(&self) -> u64 { self.running_state().unwrap().store_id } @@ -521,8 +529,8 @@ impl Cluster { msg.get_message().get_snapshot().get_metadata().get_term(), msg.get_message().get_snapshot().get_metadata().get_index(), ); - let from_snap_mgr = self.node(from_offset).snap_mgr().unwrap(); - let to_snap_mgr = self.node(offset).snap_mgr().unwrap(); + let from_snap_mgr = &self.node(from_offset).running_state().unwrap().snap_mgr; + let to_snap_mgr = &self.node(offset).running_state().unwrap().snap_mgr; let gen_path = from_snap_mgr.tablet_gen_path(&key); let recv_path = to_snap_mgr.final_recv_path(&key); assert!(gen_path.exists()); @@ -549,3 +557,12 @@ impl Cluster { } } } + +impl Drop for Cluster { + fn drop(&mut self) { + self.routers.clear(); + for node in &mut self.nodes { + node.stop(); + } + } +} diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index c3061be0d2b..fbf54eaa243 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -15,4 +15,5 @@ mod test_pd_heartbeat; mod test_read; mod test_split; mod test_status; +mod test_trace_apply; mod test_transfer_leader; diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index 60495b151e8..df806063249 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -2,6 +2,7 @@ use std::{thread, time::Duration}; +use engine_traits::{RaftEngineReadOnly, CF_RAFT}; use futures::executor::block_on; use kvproto::{ metapb, pdpb, @@ -9,6 +10,7 @@ use kvproto::{ AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, }, }; +use raftstore::store::{INIT_EPOCH_VER, RAFT_INIT_LOG_INDEX}; use raftstore_v2::router::PeerMsg; use tikv_util::store::new_peer; @@ -128,6 +130,7 @@ fn split_region( fn test_split() { let mut cluster = Cluster::default(); let store_id = cluster.node(0).id(); + let raft_engine = cluster.node(0).running_state().unwrap().raft_engine.clone(); let router = &mut cluster.routers[0]; // let factory = cluster.node(0).tablet_factory(); @@ -139,6 +142,8 @@ fn test_split() { // Region 2 ["", ""] peer(1, 3) // -> Region 2 ["", "k22"] peer(1, 3) // Region 1000 ["k22", ""] peer(1, 10) + let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); + assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); let (left, right) = split_region( router, region, @@ -150,6 +155,23 @@ fn test_split() { b"k22", false, ); + let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); + assert_ne!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); + assert_eq!( + region_state.get_region().get_region_epoch().get_version(), + INIT_EPOCH_VER + 1 + ); + let region_state0 = raft_engine + .get_region_state(2, region_state.get_tablet_index()) + .unwrap() + .unwrap(); + assert_eq!(region_state, region_state0); + let flushed_index = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + assert!( + flushed_index >= region_state.get_tablet_index(), + "{flushed_index} >= {}", + region_state.get_tablet_index() + ); // Region 2 ["", "k22"] peer(1, 3) // -> Region 2 ["", "k11"] peer(1, 3) @@ -165,10 +187,35 @@ fn test_split() { b"k11", false, ); + let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); + assert_ne!( + region_state.get_tablet_index(), + region_state0.get_tablet_index() + ); + assert_eq!( + region_state.get_region().get_region_epoch().get_version(), + INIT_EPOCH_VER + 2 + ); + let region_state1 = raft_engine + .get_region_state(2, region_state.get_tablet_index()) + .unwrap() + .unwrap(); + assert_eq!(region_state, region_state1); + let flushed_index = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + assert!( + flushed_index >= region_state.get_tablet_index(), + "{flushed_index} >= {}", + region_state.get_tablet_index() + ); // Region 1000 ["k22", ""] peer(1, 10) // -> Region 1000 ["k22", "k33"] peer(1, 10) // Region 1002 ["k33", ""] peer(1, 12) + let region_state = raft_engine + .get_region_state(1000, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); let _ = split_region( router, right, @@ -180,4 +227,30 @@ fn test_split() { b"k33", false, ); + let region_state = raft_engine + .get_region_state(1000, u64::MAX) + .unwrap() + .unwrap(); + assert_ne!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); + assert_eq!( + region_state.get_region().get_region_epoch().get_version(), + INIT_EPOCH_VER + 2 + ); + let region_state2 = raft_engine + .get_region_state(1000, region_state.get_tablet_index()) + .unwrap() + .unwrap(); + assert_eq!(region_state, region_state2); + let flushed_index = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + assert!( + flushed_index >= region_state.get_tablet_index(), + "{flushed_index} >= {}", + region_state.get_tablet_index() + ); } + +// TODO: test split race with +// - created peer +// - created peer with pending snapshot +// - created peer with persisting snapshot +// - created peer with persisted snapshot diff --git a/components/raftstore-v2/tests/integrations/test_trace_apply.rs b/components/raftstore-v2/tests/integrations/test_trace_apply.rs new file mode 100644 index 00000000000..def064e6d29 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_trace_apply.rs @@ -0,0 +1,211 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{path::Path, time::Duration}; + +use engine_traits::{DbOptionsExt, MiscExt, Peekable, CF_LOCK, CF_WRITE, DATA_CFS}; +use futures::executor::block_on; +use raftstore::store::RAFT_INIT_LOG_INDEX; +use raftstore_v2::router::PeerMsg; + +use crate::cluster::{new_put_request, Cluster}; + +fn count_file(path: &Path, pat: impl Fn(&Path) -> bool) -> usize { + let mut count = 0; + for path in std::fs::read_dir(path).unwrap() { + if pat(&path.unwrap().path()) { + count += 1; + } + } + count +} + +fn count_sst(path: &Path) -> usize { + count_file(path, |path| { + path.extension().map_or(false, |ext| ext == "sst") + }) +} + +fn count_info_log(path: &Path) -> usize { + count_file(path, |path| { + path.file_name() + .unwrap() + .to_string_lossy() + .starts_with("LOG") + }) +} + +/// Test if data will be recovered correctly after being restarted. +#[test] +fn test_data_recovery() { + let mut cluster = Cluster::default(); + let registry = cluster.node(0).tablet_registry(); + let tablet_2_path = registry.tablet_path(2, RAFT_INIT_LOG_INDEX); + // The rocksdb is a bootstrapped tablet, so it will be opened and closed in + // bootstrap, and then open again in fsm initialization. + assert_eq!(count_info_log(&tablet_2_path), 2); + let router = &mut cluster.routers[0]; + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + // Write 100 keys to default CF and not flush. + let mut req = router.new_request_for(2); + for i in 0..100 { + let put_req = new_put_request(format!("key{}", i), format!("value{}", i)); + req.clear_requests(); + req.mut_requests().push(put_req); + router + .send(2, PeerMsg::raft_command(req.clone()).0) + .unwrap(); + } + + // Write 100 keys to write CF and flush half. + let mut sub = None; + for i in 0..50 { + let mut put_req = new_put_request(format!("key{}", i), format!("value{}", i)); + put_req.mut_put().set_cf(CF_WRITE.to_owned()); + req.clear_requests(); + req.mut_requests().push(put_req); + let (ch, s) = PeerMsg::raft_command(req.clone()); + router.send(2, ch).unwrap(); + sub = Some(s); + } + let resp = block_on(sub.take().unwrap().result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); + cached.latest().unwrap().flush_cf(CF_WRITE, true).unwrap(); + let router = &mut cluster.routers[0]; + for i in 50..100 { + let mut put_req = new_put_request(format!("key{}", i), format!("value{}", i)); + put_req.mut_put().set_cf(CF_WRITE.to_owned()); + req.clear_requests(); + req.mut_requests().push(put_req); + router + .send(2, PeerMsg::raft_command(req.clone()).0) + .unwrap(); + } + + // Write 100 keys to lock CF and flush all. + for i in 0..100 { + let mut put_req = new_put_request(format!("key{}", i), format!("value{}", i)); + put_req.mut_put().set_cf(CF_LOCK.to_owned()); + req.clear_requests(); + req.mut_requests().push(put_req); + let (ch, s) = PeerMsg::raft_command(req.clone()); + router.send(2, ch).unwrap(); + sub = Some(s); + } + let resp = block_on(sub.take().unwrap().result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + cached = cluster.node(0).tablet_registry().get(2).unwrap(); + cached.latest().unwrap().flush_cf(CF_LOCK, true).unwrap(); + + // Make sure all keys must be written. + let router = &mut cluster.routers[0]; + let snap = router.stale_snapshot(2); + for cf in DATA_CFS { + for i in 0..100 { + let key = format!("key{}", i); + let value = snap.get_value_cf(cf, key.as_bytes()).unwrap(); + assert_eq!( + value.as_deref(), + Some(format!("value{}", i).as_bytes()), + "{} {}", + cf, + key + ); + } + } + let registry = cluster.node(0).tablet_registry(); + cached = registry.get(2).unwrap(); + cached + .latest() + .unwrap() + .set_db_options(&[("avoid_flush_during_shutdown", "true")]) + .unwrap(); + drop((snap, cached)); + + cluster.restart(0); + + let registry = cluster.node(0).tablet_registry(); + cached = registry.get(2).unwrap(); + cached + .latest() + .unwrap() + .set_db_options(&[("avoid_flush_during_shutdown", "true")]) + .unwrap(); + let router = &mut cluster.routers[0]; + + // Write another key to ensure all data are recovered. + let put_req = new_put_request("key101", "value101"); + req.clear_requests(); + req.mut_requests().push(put_req); + let (msg, sub) = PeerMsg::raft_command(req.clone()); + router.send(2, msg).unwrap(); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + // After being restarted, all unflushed logs should be applied again. So there + // should be no missing data. + let snap = router.stale_snapshot(2); + for cf in DATA_CFS { + for i in 0..100 { + let key = format!("key{}", i); + let value = snap.get_value_cf(cf, key.as_bytes()).unwrap(); + assert_eq!( + value.as_deref(), + Some(format!("value{}", i).as_bytes()), + "{} {}", + cf, + key + ); + } + } + + // There is a restart, so LOG file should be rotate. + assert_eq!(count_info_log(&tablet_2_path), 3); + // We only trigger Flush twice, so there should be only 2 files. And because WAL + // is disabled, so when rocksdb is restarted, there should be no WAL to recover, + // so no additional flush will be triggered. + assert_eq!(count_sst(&tablet_2_path), 2); + + cached = cluster.node(0).tablet_registry().get(2).unwrap(); + cached.latest().unwrap().flush_cfs(DATA_CFS, true).unwrap(); + + // Although all CFs are triggered again, but recovery should only write: + // 1. [0, 101) to CF_DEFAULT + // 2. [50, 100) to CF_WRITE + // + // So there will be only 2 memtables to be flushed. + assert_eq!(count_sst(&tablet_2_path), 4); + + drop((snap, cached)); + + cluster.restart(0); + + let router = &mut cluster.routers[0]; + + assert_eq!(count_info_log(&tablet_2_path), 4); + // Because data is flushed before restarted, so all data can be read + // immediately. + let snap = router.stale_snapshot(2); + for cf in DATA_CFS { + for i in 0..100 { + let key = format!("key{}", i); + let value = snap.get_value_cf(cf, key.as_bytes()).unwrap(); + assert_eq!( + value.as_deref(), + Some(format!("value{}", i).as_bytes()), + "{} {}", + cf, + key + ); + } + } + // Trigger flush again. + cached = cluster.node(0).tablet_registry().get(2).unwrap(); + cached.latest().unwrap().flush_cfs(DATA_CFS, true).unwrap(); + + // There is no recovery, so there should be nothing to flush. + assert_eq!(count_sst(&tablet_2_path), 4); +} diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index b69b3484e0c..9b13ce6af9b 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -151,6 +151,17 @@ impl ExtraWrite { } } + #[inline] + pub fn merge_v2(&mut self, log_batch: L) { + if let ExtraWrite::None = self { + *self = ExtraWrite::V2(log_batch); + } else if let ExtraWrite::V1(_) = self { + unreachable!("v1 and v2 are mixed used"); + } else if let ExtraWrite::V2(l) = self { + l.merge(log_batch).unwrap(); + } + } + #[inline] pub fn v2_mut(&mut self) -> Option<&mut L> { if let ExtraWrite::V2(l) = self { diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index 78dbccbf585..d43e33a4e08 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -447,14 +447,14 @@ mod tests { db.put(b"za1", b"").unwrap(); db.put(b"zb1", &value).unwrap(); db.put(b"zc1", &value).unwrap(); - db.flush_cfs(true /* wait */).unwrap(); + db.flush_cfs(&[], true /* wait */).unwrap(); db.put(b"zb2", &value).unwrap(); db.put(b"zc2", &value).unwrap(); db.put(b"zc3", &value).unwrap(); db.put(b"zc4", &value).unwrap(); db.put(b"zc5", &value).unwrap(); db.put(b"zc6", &value).unwrap(); - db.flush_cfs(true /* wait */).unwrap(); + db.flush_cfs(&[], true /* wait */).unwrap(); db.compact_range( CF_DEFAULT, None, // start_key None, // end_key diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index fe58a2587a7..ccf5f94e39e 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -460,7 +460,7 @@ mod tests { let db = &engines.kv; for &(ref k, level) in &levels { db.put(&data_key(k), k).unwrap(); - db.flush_cfs(true).unwrap(); + db.flush_cfs(&[], true).unwrap(); data.push((k.to_vec(), k.to_vec())); db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(level)) .unwrap(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 065afd8ec0c..8d64ff74c8b 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -2109,7 +2109,7 @@ mod test { let cache = config.storage.block_cache.build_shared_cache(); let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); - let reg = TabletRegistry::new(Box::new(factory), path.path()).unwrap(); + let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); for i in 1..6 { let ctx = TabletContext::with_infinite_region(i, Some(10)); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 2680c778f02..c21599f47a6 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -5,10 +5,11 @@ use std::{path::Path, sync::Arc}; use engine_rocks::{ raw::{Cache, Env, Statistics}, CompactedEventSender, CompactionListener, FlowListener, RocksCfOptions, RocksCompactionJobInfo, - RocksDbOptions, RocksEngine, RocksEventListener, + RocksDbOptions, RocksEngine, RocksEventListener, RocksPersistenceListener, }; use engine_traits::{ - CompactionJobInfo, MiscExt, Result, TabletContext, TabletFactory, CF_DEFAULT, CF_WRITE, + CompactionJobInfo, MiscExt, PersistenceListener, Result, StateStorage, TabletContext, + TabletFactory, CF_DEFAULT, CF_WRITE, }; use kvproto::kvrpcpb::ApiVersion; use raftstore::RegionInfoAccessor; @@ -28,6 +29,7 @@ struct FactoryInner { flow_listener: Option, sst_recovery_sender: Option>, statistics: Statistics, + state_storage: Option>, lite: bool, } @@ -48,6 +50,7 @@ impl KvEngineFactoryBuilder { flow_listener: None, sst_recovery_sender: None, statistics: Statistics::new_titan(), + state_storage: None, lite: false, }, compact_event_sender: None, @@ -85,6 +88,13 @@ impl KvEngineFactoryBuilder { self } + /// A storage for persisting flush states, which is used for recovering when + /// disable WAL. Only work for v2. + pub fn state_storage(mut self, storage: Arc) -> Self { + self.inner.state_storage = Some(storage); + self + } + pub fn build(self) -> KvEngineFactory { KvEngineFactory { inner: Arc::new(self.inner), @@ -181,6 +191,16 @@ impl TabletFactory for KvEngineFactory { if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { db_opts.add_event_listener(listener.clone_with(ctx.id, suffix)); } + if let Some(storage) = &self.inner.state_storage + && let Some(flush_state) = ctx.flush_state { + let listener = PersistenceListener::new( + ctx.id, + ctx.suffix.unwrap(), + flush_state, + storage.clone(), + ); + db_opts.add_event_listener(RocksPersistenceListener::new(listener)); + } let kv_engine = engine_rocks::util::new_engine_opt(path.to_str().unwrap(), db_opts, cf_opts); if let Err(e) = &kv_engine { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 05d5c743d76..9a383d71338 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -75,7 +75,9 @@ use api_version::{ApiV1, ApiV2, KeyMode, KvFormat, RawValue}; use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; use collections::HashMap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; -use engine_traits::{raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS}; +use engine_traits::{ + raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, DATA_CFS_LEN, +}; use futures::prelude::*; use kvproto::{ kvrpcpb::{ @@ -1538,7 +1540,7 @@ impl Storage { [(Some(start_key.as_encoded()), Some(end_key.as_encoded()))], )?; - let mut modifies = Vec::with_capacity(DATA_CFS.len()); + let mut modifies = Vec::with_capacity(DATA_CFS_LEN); for cf in DATA_CFS { modifies.push(Modify::DeleteRange( cf, diff --git a/tests/failpoints/cases/test_table_properties.rs b/tests/failpoints/cases/test_table_properties.rs index 536149d48b5..559ad5b0746 100644 --- a/tests/failpoints/cases/test_table_properties.rs +++ b/tests/failpoints/cases/test_table_properties.rs @@ -82,12 +82,12 @@ fn test_check_need_gc() { // TEST 2: props.num_versions as f64 > props.num_rows as f64 * ratio_threshold // return true. do_write(&engine, false, 5); - engine.get_rocksdb().flush_cfs(true).unwrap(); + engine.get_rocksdb().flush_cfs(&[], true).unwrap(); do_gc(&raw_engine, 2, &mut gc_runner, &dir); do_write(&engine, false, 5); - engine.get_rocksdb().flush_cfs(true).unwrap(); + engine.get_rocksdb().flush_cfs(&[], true).unwrap(); // Set ratio_threshold, let (props.num_versions as f64 > props.num_rows as // f64 * ratio_threshold) return true @@ -185,7 +185,7 @@ fn test_skip_gc_by_check() { let mut gc_runner = TestGcRunner::new(0); do_write(&engine, false, 5); - engine.get_rocksdb().flush_cfs(true).unwrap(); + engine.get_rocksdb().flush_cfs(&[], true).unwrap(); // The min_mvcc_ts ts > gc safepoint, check_need_gc return false, don't call // dofilter @@ -208,12 +208,12 @@ fn test_skip_gc_by_check() { // TEST 2:When is_bottommost_level = false, // write data to level2 do_write(&engine, false, 5); - engine.get_rocksdb().flush_cfs(true).unwrap(); + engine.get_rocksdb().flush_cfs(&[], true).unwrap(); do_gc(&raw_engine, 2, &mut gc_runner, &dir); do_write(&engine, false, 5); - engine.get_rocksdb().flush_cfs(true).unwrap(); + engine.get_rocksdb().flush_cfs(&[], true).unwrap(); // Set ratio_threshold, let (props.num_versions as f64 > props.num_rows as // f64 * ratio_threshold) return false diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 22d23f7adba..67e5e261dab 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -27,7 +27,7 @@ fn check_available(cluster: &mut Cluster) { for i in 0..1000 { let last_available = stats.get_available(); cluster.must_put(format!("k{}", i).as_bytes(), &value); - engine.flush_cfs(true).unwrap(); + engine.flush_cfs(&[], true).unwrap(); sleep_ms(20); let stats = pd_client.get_store_stats(1).unwrap(); @@ -58,7 +58,7 @@ fn test_simple_store_stats(cluster: &mut Cluster) { } let engine = cluster.get_engine(1); - engine.flush_cfs(true).unwrap(); + engine.flush_cfs(&[], true).unwrap(); let last_stats = pd_client.get_store_stats(1).unwrap(); assert_eq!(last_stats.get_region_count(), 1); @@ -67,7 +67,7 @@ fn test_simple_store_stats(cluster: &mut Cluster) { let region = pd_client.get_region(b"").unwrap(); cluster.must_split(®ion, b"k2"); - engine.flush_cfs(true).unwrap(); + engine.flush_cfs(&[], true).unwrap(); // wait report region count after split for _ in 0..100 { diff --git a/tests/integrations/raftstore/test_update_region_size.rs b/tests/integrations/raftstore/test_update_region_size.rs index ee4fb79ac62..f2ff0d4f217 100644 --- a/tests/integrations/raftstore/test_update_region_size.rs +++ b/tests/integrations/raftstore/test_update_region_size.rs @@ -9,7 +9,7 @@ use tikv_util::config::*; fn flush(cluster: &mut Cluster) { for engines in cluster.engines.values() { - engines.kv.flush_cfs(true).unwrap(); + engines.kv.flush_cfs(&[], true).unwrap(); } } diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index d1abbcb924c..412f9f5a777 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -211,7 +211,7 @@ fn test_delete_files_in_range_for_titan() { .unwrap(); // Flush and compact the kvs into L6. - engines.kv.flush_cfs(true).unwrap(); + engines.kv.flush_cfs(&[], true).unwrap(); engines.kv.compact_files_in_range(None, None, None).unwrap(); let db = engines.kv.as_inner(); let value = db.get_property_int("rocksdb.num-files-at-level0").unwrap(); @@ -254,9 +254,9 @@ fn test_delete_files_in_range_for_titan() { // Used to trigger titan gc let engine = &engines.kv; engine.put(b"1", b"1").unwrap(); - engine.flush_cfs(true).unwrap(); + engine.flush_cfs(&[], true).unwrap(); engine.put(b"2", b"2").unwrap(); - engine.flush_cfs(true).unwrap(); + engine.flush_cfs(&[], true).unwrap(); engine .compact_files_in_range(Some(b"0"), Some(b"3"), Some(1)) .unwrap(); From 77e4896507501059131e8951523d1050fb464b0f Mon Sep 17 00:00:00 2001 From: fengou1 <85682690+fengou1@users.noreply.github.com> Date: Mon, 19 Dec 2022 11:46:54 +0800 Subject: [PATCH 0411/1149] br: enlarge the raft client backoff in recovery mode since ebs restore volume very poor during restore (#13954) close tikv/tikv#13955 fix: enlarge the raft client backoff in recovery mode since ebs restore volume very poor during restor Signed-off-by: fengou1 --- components/snap_recovery/src/init_cluster.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 9147810f03c..42c1d0b1882 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -92,6 +92,12 @@ pub fn enter_snap_recovery_mode(config: &mut TikvConfig) { // disable resolve ts during the recovery config.resolved_ts.enable = false; + // ebs volume has very poor performance during restore, it easy to cause the + // raft client timeout, at the same time clean up all message included + // significant message. restore is not memory sensetive, we may keep + // messages as much as possible during the network disturbing in recovery mode + config.server.raft_client_max_backoff = ReadableDuration::secs(20); + // Disable region split during recovering. config.coprocessor.region_max_size = Some(ReadableSize::gb(MAX_REGION_SIZE)); config.coprocessor.region_split_size = ReadableSize::gb(MAX_REGION_SIZE); From a069c1b139622e274710a5ebbdfec539edb37e69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 19 Dec 2022 14:24:53 +0800 Subject: [PATCH 0412/1149] log-backup: make PITR available when using partial cert chain (#13961) close tikv/tikv#13959 This PR sets X509_V_FLAG_PARTIAL_CHAIN, so we would trust the CA even there isn't a root CA provided. Signed-off-by: hillium Co-authored-by: qupeng --- components/backup-stream/src/metadata/store/lazy_etcd.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 88d44b09252..b712a23973d 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -4,6 +4,7 @@ use std::{sync::Arc, time::Duration}; use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; use futures::Future; +use openssl::x509::verify::X509VerifyFlags; use tikv_util::{ info, stream::{RetryError, RetryExt}, @@ -33,6 +34,12 @@ impl ConnectionConfig { opts = opts.with_openssl_tls( OpenSslClientConfig::default() .ca_cert_pem(&tls.ca) + // Some of users may prefer using multi-level self-signed certs. + // In this scenario, we must set this flag or openssl would probably complain it cannot found the root CA. + // (Because the flags we provide allows users providing exactly one CA cert.) + // We haven't make it configurable because it is enabled in gRPC by default too. + // TODO: Perhaps implement grpc-io based etcd client, fully remove the difference between gRPC TLS and our custom TLS? + .manually(|c| c.cert_store_mut().set_flags(X509VerifyFlags::PARTIAL_CHAIN)) .client_cert_pem_and_key(&tls.client_cert, &tls.client_key.0), ) } From a0cff586f524df996438287c9c4d7cfc6a762b6c Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 19 Dec 2022 17:02:54 +0800 Subject: [PATCH 0413/1149] Makefile: check before sort deps (#13951) close tikv/tikv#13950, ref tikv/tikv#13950 cargo sort will somehow modify Cargo.toml even there is nothing to change. This PR avoid cache being invalidated by check first. Signed-off-by: Jay Lee --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f60fb16bcb0..6e8cada8b6f 100644 --- a/Makefile +++ b/Makefile @@ -334,7 +334,7 @@ pre-format: unset-override format: pre-format @cargo fmt - @cargo sort -w >/dev/null + @cargo sort -w -c &>/dev/null || cargo sort -w >/dev/null doc: @cargo doc --workspace --document-private-items \ From 27fa5bb0876a07bc9850343f036189feb7b9d978 Mon Sep 17 00:00:00 2001 From: Hu# Date: Mon, 19 Dec 2022 17:20:54 +0800 Subject: [PATCH 0414/1149] Skip write `prewrite_lock` in flashback locks (#13960) close tikv/tikv#13958, ref tikv/tikv#13958 Since the rollback ts for flashback are derived from the ts of the lock, we wrote prewrite lock which start_ts is flashback in Prewrite Phase. So the Prewrite lock we wrote in the flashback was rollbacked when we retry prepare. This introduces the case mentioned in https://github.com/tikv/tikv/issues/13958 The solution is: if such a lock exists, skip it and go to the Commit Phase. Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- src/storage/mod.rs | 68 +++++++++++++++++++ .../txn/actions/flashback_to_version.rs | 28 ++++---- .../flashback_to_version_read_phase.rs | 1 + 3 files changed, 82 insertions(+), 15 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 9a383d71338..60e9b965c5d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -5202,6 +5202,74 @@ mod tests { ); } + #[test] + fn test_mvcc_flashback_retry_prepare() { + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .build() + .unwrap(); + let (tx, rx) = channel(); + let mut ts = TimeStamp::zero(); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![Mutation::make_put(Key::from_raw(b"k"), b"v@1".to_vec())], + b"k".to_vec(), + *ts.incr(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k")], + ts, + *ts.incr(), + Context::default(), + ), + expect_value_callback(tx.clone(), 1, TxnStatus::committed(ts)), + ) + .unwrap(); + rx.recv().unwrap(); + expect_value( + b"v@1".to_vec(), + block_on(storage.get(Context::default(), Key::from_raw(b"k"), ts)) + .unwrap() + .0, + ); + // Try to prepare flashback first. + let flashback_start_ts = *ts.incr(); + let flashback_commit_ts = *ts.incr(); + storage + .sched_txn_command( + new_flashback_rollback_lock_cmd( + flashback_start_ts, + TimeStamp::zero(), + Key::from_raw(b"k"), + Some(Key::from_raw(b"z")), + Context::default(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + // Mock the prepare flashback retry. + run_flashback_to_version( + &storage, + flashback_start_ts, + flashback_commit_ts, + TimeStamp::zero(), + Key::from_raw(b"k"), + Some(Key::from_raw(b"z")), + ); + expect_none( + block_on(storage.get(Context::default(), Key::from_raw(b"k"), flashback_commit_ts)) + .unwrap() + .0, + ); + } + #[test] fn test_high_priority_get_put() { let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 2710935efb1..f44854159c0 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -14,11 +14,13 @@ pub fn flashback_to_version_read_lock( reader: &mut MvccReader, next_lock_key: Key, end_key: Option<&Key>, + flashback_start_ts: TimeStamp, ) -> TxnResult> { let result = reader.scan_locks( Some(&next_lock_key), end_key, - |_| true, + // Skip the `prewrite_lock`. This lock will appear when retrying prepare + |lock| lock.ts != flashback_start_ts, FLASHBACK_BATCH_SIZE, ); let (key_locks, _) = result?; @@ -147,6 +149,9 @@ pub fn prewrite_flashback_key( flashback_version: TimeStamp, flashback_start_ts: TimeStamp, ) -> TxnResult<()> { + if reader.load_lock(key_to_lock)?.is_some() { + return Ok(()); + } let old_write = reader.get_write(key_to_lock, flashback_version, None)?; // Flashback the value in `CF_DEFAULT` as well if the old write is a // `WriteType::Put` without the short value. @@ -310,15 +315,17 @@ pub mod tests { key: &[u8], start_ts: impl Into, ) -> usize { + let start_ts = start_ts.into(); let next_key = Key::from_raw(keys::next_key(key).as_slice()); let key = Key::from_raw(key); let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let mut reader = MvccReader::new_with_ctx(snapshot.clone(), Some(ScanMode::Forward), &ctx); let key_locks = - flashback_to_version_read_lock(&mut reader, key, Some(next_key).as_ref()).unwrap(); + flashback_to_version_read_lock(&mut reader, key, Some(next_key).as_ref(), start_ts) + .unwrap(); let cm = ConcurrencyManager::new(TimeStamp::zero()); - let mut txn = MvccTxn::new(start_ts.into(), cm); + let mut txn = MvccTxn::new(start_ts, cm); rollback_locks(&mut txn, snapshot, key_locks).unwrap(); let rows = txn.modifies.len(); write(engine, &ctx, txn.into_modifies()); @@ -578,20 +585,11 @@ pub mod tests { 2 ); // Retry Prepare - // Unlock `k`, put rollback record and delete the value of `k`. - assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 3); - // Lock and write the value of `k`. - assert_eq!( - must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), - 2 - ); - // Retry Prepare - // Only unlock `k` since there is an overlapped rollback record. - assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 1); - // Only lock `k` since the value of `k` has already existed. + // Skip `k` no need to write again. + assert_eq!(must_rollback_lock(&mut engine, k, flashback_start_ts), 0); assert_eq!( must_prewrite_flashback_key(&mut engine, k, 2, flashback_start_ts), - 1 + 0 ); } diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 769171d46e0..7fdc86288c2 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -137,6 +137,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { &mut reader, next_lock_key, self.end_key.as_ref(), + self.start_ts, )?; if key_locks.is_empty() { // - No more locks to rollback, continue to the Prewrite Phase. From 545ffb82db0475bd77d288207793720a2e71fd93 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 19 Dec 2022 18:32:54 +0800 Subject: [PATCH 0415/1149] *: impl raftkv2 snapshot and write (#13957) ref tikv/tikv#12842 This PR implements snapshot and write for raftkv2. Write are refactored to only use simple codec to reduce allocation and parse overhead, it also makes code simpler. Snapshot are refactored to return future instead of async function. Otherwise it can't `Engine` requirement. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + Cargo.toml | 1 + components/raftstore-v2/src/fsm/peer.rs | 21 +- components/raftstore-v2/src/fsm/store.rs | 4 + components/raftstore-v2/src/lib.rs | 3 +- .../src/operation/command/admin/mod.rs | 12 +- .../command/admin/transfer_leader.rs | 34 +- .../raftstore-v2/src/operation/command/mod.rs | 58 +--- .../src/operation/command/write/mod.rs | 44 ++- .../operation/command/write/simple_write.rs | 294 ++++++++--------- components/raftstore-v2/src/operation/mod.rs | 3 +- .../raftstore-v2/src/operation/query/local.rs | 98 +++--- .../raftstore-v2/src/operation/query/mod.rs | 10 +- .../raftstore-v2/src/operation/ready/mod.rs | 37 ++- components/raftstore-v2/src/raft/peer.rs | 8 +- components/raftstore-v2/src/router/imp.rs | 8 +- components/raftstore-v2/src/router/message.rs | 107 +++--- components/raftstore-v2/src/router/mod.rs | 4 +- .../src/router/response_channel.rs | 270 +++++++++++++-- components/raftstore-v2/src/worker/pd/mod.rs | 2 +- .../tests/failpoints/test_basic_write.rs | 31 +- .../tests/integrations/cluster.rs | 28 +- .../tests/integrations/test_basic_write.rs | 83 ++--- .../tests/integrations/test_conf_change.rs | 24 +- .../tests/integrations/test_read.rs | 16 +- .../tests/integrations/test_split.rs | 27 +- .../tests/integrations/test_trace_apply.rs | 68 ++-- .../integrations/test_transfer_leader.rs | 21 +- components/raftstore/src/store/fsm/apply.rs | 4 +- components/raftstore/src/store/fsm/peer.rs | 8 +- components/raftstore/src/store/peer.rs | 6 +- components/raftstore/src/store/util.rs | 101 +++--- components/raftstore/src/store/worker/read.rs | 8 +- src/server/mod.rs | 1 + src/server/raftkv/mod.rs | 2 +- src/server/raftkv2/mod.rs | 307 ++++++++++++++++++ 36 files changed, 1145 insertions(+), 609 deletions(-) create mode 100644 src/server/raftkv2/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 11aa05f2140..97f540aa100 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6306,6 +6306,7 @@ dependencies = [ "raft", "raft_log_engine", "raftstore", + "raftstore-v2", "rand 0.7.3", "regex", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index e09b422299e..4c8af61e554 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -136,6 +136,7 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } +raftstore-v2 = { workspace = true } rand = "0.7.3" regex = "1.3" resource_metering = { workspace = true } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 793e7a340f2..20e7a8f3c2b 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -228,9 +228,20 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.on_receive_command(cmd.send_time); self.on_query(cmd.request, cmd.ch) } - PeerMsg::RaftCommand(cmd) => { + PeerMsg::AdminCommand(cmd) => { self.on_receive_command(cmd.send_time); - self.on_command(cmd.request, cmd.ch) + self.fsm + .peer_mut() + .on_admin_command(self.store_ctx, cmd.request, cmd.ch) + } + PeerMsg::SimpleWrite(write) => { + self.on_receive_command(write.send_time); + self.fsm.peer_mut().on_simple_write( + self.store_ctx, + write.header, + write.data, + write.ch, + ); } PeerMsg::Tick(tick) => self.on_tick(tick), PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(self.store_ctx, res), @@ -263,6 +274,12 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_data_flushed(cf, tablet_index, flushed_index); } + PeerMsg::PeerUnreachable { to_peer_id } => { + self.fsm.peer_mut().on_peer_unreachable(to_peer_id) + } + PeerMsg::StoreUnreachable { to_store_id } => { + self.fsm.peer_mut().on_store_unreachable(to_store_id) + } #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 349d5ad3252..bd31de69496 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -157,6 +157,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { StoreMsg::Tick(tick) => self.on_tick(tick), StoreMsg::RaftMessage(msg) => self.fsm.store.on_raft_message(self.store_ctx, msg), StoreMsg::SplitInit(msg) => self.fsm.store.on_split_init(self.store_ctx, msg), + StoreMsg::StoreUnreachable { to_store_id } => self + .fsm + .store + .on_store_unreachable(self.store_ctx, to_store_id), } } } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 9ddb577ab5c..848e5fda8b2 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -25,6 +25,7 @@ #![feature(array_windows)] #![feature(div_duration)] #![feature(box_into_inner)] +#![feature(assert_matches)] mod batch; mod bootstrap; @@ -38,5 +39,5 @@ pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; -pub use operation::StateStorage; +pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{Error, Result}; diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index d07c1b4a35c..fcb968a2195 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -37,7 +37,16 @@ impl Peer { apply::notify_req_region_removed(self.region_id(), ch); return; } - if let Err(e) = self.validate_command(&req, &mut ctx.raft_metrics) { + if !req.has_admin_request() { + let e = box_err!("{:?} expect only execute admin command", self.logger.list()); + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } + let cmd_type = req.get_admin_request().get_cmd_type(); + if let Err(e) = + self.validate_command(req.get_header(), Some(cmd_type), &mut ctx.raft_metrics) + { let resp = cmd_resp::new_error(e); ch.report_error(resp); return; @@ -57,7 +66,6 @@ impl Peer { ch.report_error(resp); return; } - let cmd_type = req.get_admin_request().get_cmd_type(); if let Some(conflict) = self.proposal_control_mut().check_conflict(Some(cmd_type)) { conflict.delay_channel(ch); return; diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index e8105a66322..1c25b363d59 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -9,7 +9,7 @@ use kvproto::{ disk_usage::DiskUsage, metapb, raft_cmdpb::{ - AdminCmdType, AdminRequest, AdminResponse, CmdType, PutRequest, RaftCmdRequest, Request, + AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest, RaftRequestHeader, TransferLeaderRequest, }, }; @@ -30,6 +30,7 @@ use super::AdminCmdResult; use crate::{ batch::StoreContext, fsm::ApplyResReporter, + operation::command::write::SimpleWriteEncoder, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick}, }; @@ -199,7 +200,7 @@ impl Peer { cmd.mut_admin_request() .set_cmd_type(AdminCmdType::TransferLeader); cmd.mut_admin_request().mut_transfer_leader().set_peer(from); - if let PeerMsg::RaftCommand(req) = PeerMsg::raft_command(cmd).0 { + if let PeerMsg::AdminCommand(req) = PeerMsg::admin_command(cmd).0 { self.on_admin_command(ctx, req.request, req.ch); } else { unreachable!(); @@ -345,7 +346,8 @@ impl Peer { } // FIXME: Raft command has size limit. Either limit the total size of // pessimistic locks in a region, or split commands here. - let mut cmd = RaftCmdRequest::default(); + let mut encoder = SimpleWriteEncoder::with_capacity(512); + let mut lock_count = 0; { // Downgrade to a read guard, do not block readers in the scheduler as far as // possible. @@ -355,33 +357,27 @@ impl Peer { if *deleted { continue; } - let mut put = PutRequest::default(); - put.set_cf(CF_LOCK.to_string()); - put.set_key(key.as_encoded().to_owned()); - put.set_value(lock.to_lock().to_bytes()); - let mut req = Request::default(); - req.set_cmd_type(CmdType::Put); - req.set_put(put); - cmd.mut_requests().push(req); + lock_count += 1; + encoder.put(CF_LOCK, key.as_encoded(), &lock.to_lock().to_bytes()); } } - if cmd.get_requests().is_empty() { + if lock_count == 0 { // If the map is not empty but all locks are deleted, it is possible that a // write command has just marked locks deleted but not proposed yet. // It might cause that command to fail if we skip proposing the // extra TransferLeader command here. return true; } - cmd.mut_header().set_region_id(self.region_id()); - cmd.mut_header() - .set_region_epoch(self.region().get_region_epoch().clone()); - cmd.mut_header().set_peer(self.peer().clone()); + let mut header = Box::::default(); + header.set_region_id(self.region_id()); + header.set_region_epoch(self.region().get_region_epoch().clone()); + header.set_peer(self.peer().clone()); info!( self.logger, - "propose {} locks before transferring leader", cmd.get_requests().len(); + "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::RaftCommand(req) = PeerMsg::raft_command(cmd).0 else {unreachable!()}; - self.on_write_command(ctx, req.request, req.ch); + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; + self.on_simple_write(ctx, write.header, write.data, write.ch); true } } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 8d55beca636..f6ac6ac7077 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -36,12 +36,11 @@ use raftstore::{ }, Error, Result, }; -use slog::error; use tikv_util::{box_err, time::monotonic_raw_now}; use crate::{ batch::StoreContext, - fsm::{ApplyFsm, ApplyResReporter, PeerFsmDelegate}, + fsm::{ApplyFsm, ApplyResReporter}, raft::{Apply, Peer}, router::{ApplyRes, ApplyTask, CmdResChannel}, }; @@ -52,7 +51,9 @@ mod write; pub use admin::{AdminCmdResult, SplitInit, SplitResult, SPLIT_PREFIX}; pub use control::ProposalControl; -pub use write::{SimpleWriteDecoder, SimpleWriteEncoder}; +pub use write::{ + SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, +}; use self::write::SimpleWrite; @@ -86,23 +87,6 @@ fn new_response(header: &RaftRequestHeader) -> RaftCmdResponse { resp } -impl<'a, EK: KvEngine, ER: RaftEngine, T> PeerFsmDelegate<'a, EK, ER, T> { - #[inline] - pub fn on_command(&mut self, req: RaftCmdRequest, ch: CmdResChannel) { - if !req.get_requests().is_empty() { - self.fsm - .peer_mut() - .on_write_command(self.store_ctx, req, ch) - } else if req.has_admin_request() { - self.fsm - .peer_mut() - .on_admin_command(self.store_ctx, req, ch) - } else if req.has_status_request() { - error!(self.fsm.logger(), "status command should be sent by Query"); - } - } -} - impl Peer { /// Schedule an apply fsm to apply logs in the background. /// @@ -134,17 +118,17 @@ impl Peer { } #[inline] - fn validate_command(&self, req: &RaftCmdRequest, metrics: &mut RaftMetrics) -> Result<()> { - if let Err(e) = util::check_store_id(req, self.peer().get_store_id()) { + fn validate_command( + &self, + header: &RaftRequestHeader, + admin_type: Option, + metrics: &mut RaftMetrics, + ) -> Result<()> { + if let Err(e) = util::check_store_id(header, self.peer().get_store_id()) { metrics.invalid_proposal.mismatch_store_id.inc(); return Err(e); } - for r in req.get_requests() { - if let CmdType::Get | CmdType::Snap | CmdType::ReadIndex = r.get_cmd_type() { - return Err(box_err!("internal error: query can't be sent as command")); - } - } - if let Err(e) = util::check_peer_id(req, self.peer().get_id()) { + if let Err(e) = util::check_peer_id(header, self.peer().get_id()) { metrics.invalid_proposal.mismatch_peer_id.inc(); return Err(e); } @@ -152,11 +136,11 @@ impl Peer { metrics.invalid_proposal.not_leader.inc(); return Err(Error::NotLeader(self.region_id(), self.leader())); } - if let Err(e) = util::check_term(req, self.term()) { + if let Err(e) = util::check_term(header, self.term()) { metrics.invalid_proposal.stale_command.inc(); return Err(e); } - if let Err(mut e) = util::check_region_epoch(req, self.region(), true) { + if let Err(mut e) = util::check_region_epoch(header, admin_type, self.region(), true) { if let Error::EpochNotMatch(_, _new_regions) = &mut e { // TODO: query sibling regions. metrics.invalid_proposal.epoch_not_match.inc(); @@ -166,16 +150,6 @@ impl Peer { Ok(()) } - #[inline] - fn propose_command( - &mut self, - ctx: &mut StoreContext, - req: RaftCmdRequest, - ) -> Result { - let data = req.write_to_bytes().unwrap(); - self.propose(ctx, data) - } - #[inline] fn propose( &mut self, @@ -379,7 +353,7 @@ impl Apply { let mut conf_change = None; let log_index = entry.get_index(); let req = match entry.get_entry_type() { - EntryType::EntryNormal => match SimpleWriteDecoder::new( + EntryType::EntryNormal => match SimpleWriteReqDecoder::new( &self.logger, entry.get_data(), log_index, @@ -435,7 +409,7 @@ impl Apply { } }; - util::check_region_epoch(&req, self.region_state().get_region(), true)?; + util::check_req_region_epoch(&req, self.region_state().get_region(), true)?; if req.has_admin_request() { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index c4cc1646963..6ea6064a002 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{KvEngine, Mutable, RaftEngine, CF_DEFAULT}; -use kvproto::raft_cmdpb::RaftCmdRequest; +use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ cmd_resp, @@ -21,16 +21,19 @@ use crate::{ mod simple_write; -pub use simple_write::{SimpleWriteDecoder, SimpleWriteEncoder}; +pub use simple_write::{ + SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, +}; pub use self::simple_write::SimpleWrite; impl Peer { #[inline] - pub fn on_write_command( + pub fn on_simple_write( &mut self, ctx: &mut StoreContext, - mut req: RaftCmdRequest, + header: Box, + data: SimpleWriteBinary, ch: CmdResChannel, ) { if !self.serving() { @@ -38,16 +41,13 @@ impl Peer { return; } if let Some(encoder) = self.simple_write_encoder_mut() { - match encoder.amend(req) { - Ok(()) => { - encoder.add_response_channel(ch); - self.set_has_ready(); - return; - } - Err(r) => req = r, + if encoder.amend(&header, &data) { + encoder.add_response_channel(ch); + self.set_has_ready(); + return; } } - if let Err(e) = self.validate_command(&req, &mut ctx.raft_metrics) { + if let Err(e) = self.validate_command(&header, None, &mut ctx.raft_metrics) { let resp = cmd_resp::new_error(e); ch.report_error(resp); return; @@ -60,21 +60,15 @@ impl Peer { } // ProposalControl is reliable only when applied to current term. let call_proposed_on_success = self.applied_to_current_term(); - match SimpleWriteEncoder::new( - req, + let mut encoder = SimpleWriteReqEncoder::new( + header, + data, (ctx.cfg.raft_entry_max_size.0 as f64 * MAX_PROPOSAL_SIZE_RATIO) as usize, call_proposed_on_success, - ) { - Ok(mut encoder) => { - encoder.add_response_channel(ch); - self.set_has_ready(); - self.simple_write_encoder_mut().replace(encoder); - } - Err(req) => { - let res = self.propose_command(ctx, req); - self.post_propose_command(ctx, res, vec![ch], call_proposed_on_success); - } - } + ); + encoder.add_response_channel(ch); + self.set_has_ready(); + self.simple_write_encoder_mut().replace(encoder); } pub fn propose_pending_writes(&mut self, ctx: &mut StoreContext) { diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index c4cb9d6bc89..57c01fca9d8 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -1,8 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; -use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request}; -use protobuf::{CodedInputStream, Message, SingularPtrField}; +use kvproto::raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}; +use protobuf::{CodedInputStream, Message}; use raftstore::store::WriteCallback; use slog::Logger; @@ -13,83 +13,62 @@ use crate::{operation::command::parse_at, router::CmdResChannel}; // TODO: use protobuf blob request seems better. const MAGIC_PREFIX: u8 = 0x00; +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct SimpleWriteBinary { + buf: Box<[u8]>, +} + /// We usually use `RaftCmdRequest` for read write request. But the codec is /// not efficient enough for simple request. `SimpleWrite` is introduce to make /// codec alloc less and fast. #[derive(Debug)] -pub struct SimpleWriteEncoder { - header: SingularPtrField, +pub struct SimpleWriteReqEncoder { + header: Box, buf: Vec, channels: Vec, size_limit: usize, notify_proposed: bool, } -impl SimpleWriteEncoder { - /// Create an encoder. +impl SimpleWriteReqEncoder { + /// Create a request encoder. /// /// If `notify_proposed` is true, channels will be called `notify_proposed` /// when it's appended. pub fn new( - req: RaftCmdRequest, + header: Box, + bin: SimpleWriteBinary, size_limit: usize, notify_proposed: bool, - ) -> Result { - if !Self::allow_request(&req) { - return Err(req); - } - + ) -> SimpleWriteReqEncoder { let mut buf = Vec::with_capacity(256); buf.push(MAGIC_PREFIX); - req.get_header() - .write_length_delimited_to_vec(&mut buf) - .unwrap(); + header.write_length_delimited_to_vec(&mut buf).unwrap(); + buf.extend_from_slice(&bin.buf); - for r in req.get_requests() { - encode(r, &mut buf); - } - Ok(SimpleWriteEncoder { - header: req.header, + SimpleWriteReqEncoder { + header, buf, channels: vec![], size_limit, notify_proposed, - }) - } - - fn allow_request(req: &RaftCmdRequest) -> bool { - if !req.has_status_request() && !req.has_admin_request() { - // TODO: skip the check and make caller use `SimpleWrite` directly. - for r in req.get_requests() { - if r.get_cmd_type() != CmdType::Put - && r.get_cmd_type() != CmdType::Delete - && r.get_cmd_type() != CmdType::DeleteRange - { - return false; - } - } - } else { - return false; - }; - true + } } + /// Encode the simple write into the buffer dispite header check. + /// + /// Return false if the buffer limit is reached or the write can be amended. #[inline] - pub fn amend(&mut self, req: RaftCmdRequest) -> Result<(), RaftCmdRequest> { - if Self::allow_request(&req) && req.header == self.header { - let last_length = self.buf.len(); - for r in req.get_requests() { - encode(r, &mut self.buf); - } - // The default size limit is 8 * 0.4 = 3.2MiB. - if self.buf.len() < self.size_limit { - Ok(()) - } else { - self.buf.truncate(last_length); - Err(req) - } + pub fn amend(&mut self, header: &RaftRequestHeader, bin: &SimpleWriteBinary) -> bool { + if *self.header != *header { + return false; + } + if self.buf.len() + bin.buf.len() < self.size_limit { + self.buf.extend_from_slice(&bin.buf); + true } else { - Err(req) + false } } @@ -118,9 +97,7 @@ impl SimpleWriteEncoder { #[inline] pub fn header(&self) -> &RaftRequestHeader { - self.header - .as_ref() - .unwrap_or_else(|| RaftRequestHeader::default_instance()) + &self.header } } @@ -152,19 +129,63 @@ pub enum SimpleWrite<'a> { DeleteRange(DeleteRange<'a>), } +#[derive(Clone)] +pub struct SimpleWriteEncoder { + buf: Vec, +} + +impl SimpleWriteEncoder { + #[inline] + pub fn with_capacity(cap: usize) -> SimpleWriteEncoder { + SimpleWriteEncoder { + buf: Vec::with_capacity(cap), + } + } + + #[inline] + pub fn put(&mut self, cf: &str, key: &[u8], value: &[u8]) { + encode(SimpleWrite::Put(Put { cf, key, value }), &mut self.buf); + } + + #[inline] + pub fn delete(&mut self, cf: &str, key: &[u8]) { + encode(SimpleWrite::Delete(Delete { cf, key }), &mut self.buf); + } + + #[inline] + pub fn delete_range(&mut self, cf: &str, start_key: &[u8], end_key: &[u8], notify_only: bool) { + encode( + SimpleWrite::DeleteRange(DeleteRange { + cf, + start_key, + end_key, + notify_only, + }), + &mut self.buf, + ); + } + + #[inline] + pub fn encode(self) -> SimpleWriteBinary { + SimpleWriteBinary { + buf: self.buf.into_boxed_slice(), + } + } +} + #[derive(Debug)] -pub struct SimpleWriteDecoder<'a> { +pub struct SimpleWriteReqDecoder<'a> { header: RaftRequestHeader, buf: &'a [u8], } -impl<'a> SimpleWriteDecoder<'a> { +impl<'a> SimpleWriteReqDecoder<'a> { pub fn new( logger: &Logger, buf: &'a [u8], index: u64, term: u64, - ) -> Result, RaftCmdRequest> { + ) -> Result, RaftCmdRequest> { match buf.first().cloned() { Some(MAGIC_PREFIX) => { let mut is = CodedInputStream::from_bytes(&buf[1..]); @@ -179,7 +200,7 @@ impl<'a> SimpleWriteDecoder<'a> { ), }; let read = is.pos(); - Ok(SimpleWriteDecoder { + Ok(SimpleWriteReqDecoder { header, buf: &buf[1 + read as usize..], }) @@ -194,7 +215,7 @@ impl<'a> SimpleWriteDecoder<'a> { } } -impl<'a> Iterator for SimpleWriteDecoder<'a> { +impl<'a> Iterator for SimpleWriteReqDecoder<'a> { type Item = SimpleWrite<'a>; #[inline] @@ -310,37 +331,27 @@ fn decode_cf(buf: &[u8]) -> (&str, &[u8]) { } } -// TODO: we need a way to verify every field is encoded. -#[inline] -fn encode(req: &Request, buf: &mut Vec) { - match req.get_cmd_type() { - CmdType::Put => { +#[inline(always)] +fn encode(simple_write: SimpleWrite<'_>, buf: &mut Vec) { + match simple_write { + SimpleWrite::Put(put) => { buf.push(PUT_TAG); - let put_req = req.get_put(); - encode_cf(put_req.get_cf(), buf); - encode_bytes(put_req.get_key(), buf); - encode_bytes(put_req.get_value(), buf); + encode_cf(put.cf, buf); + encode_bytes(put.key, buf); + encode_bytes(put.value, buf); } - CmdType::Delete => { + SimpleWrite::Delete(delete) => { buf.push(DELETE_TAG); - let delete_req = req.get_delete(); - encode_cf(delete_req.get_cf(), buf); - encode_bytes(delete_req.get_key(), buf); + encode_cf(delete.cf, buf); + encode_bytes(delete.key, buf); } - CmdType::DeleteRange => { + SimpleWrite::DeleteRange(dr) => { buf.push(DELETE_RANGE_TAG); - let delete_range_req = req.get_delete_range(); - encode_cf(delete_range_req.get_cf(), buf); - encode_bytes(delete_range_req.get_start_key(), buf); - encode_bytes(delete_range_req.get_end_key(), buf); - buf.push(delete_range_req.get_notify_only() as u8); + encode_cf(dr.cf, buf); + encode_bytes(dr.start_key, buf); + encode_bytes(dr.end_key, buf); + buf.push(dr.notify_only as u8); } - CmdType::Invalid - | CmdType::Get - | CmdType::Snap - | CmdType::Prewrite - | CmdType::IngestSst - | CmdType::ReadIndex => unreachable!("not supported type should be filtered already"), } } @@ -380,57 +391,32 @@ fn decode<'a>(buf: &mut &'a [u8]) -> Option> { #[cfg(test)] mod tests { + use kvproto::raft_cmdpb::{CmdType, Request}; use slog::o; use super::*; #[test] fn test_codec() { - let mut cmd = RaftCmdRequest::default(); - cmd.mut_header().set_term(2); - - let mut req = Request::default(); - req.set_cmd_type(CmdType::Put); - let put_req = req.mut_put(); - put_req.set_cf(CF_DEFAULT.to_string()); - put_req.set_key(b"key".to_vec()); - put_req.set_value(b"".to_vec()); - cmd.mut_requests().push(req); - - req = Request::default(); - req.set_cmd_type(CmdType::Delete); - let delete_req = req.mut_delete(); + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.put(CF_DEFAULT, b"key", b""); let delete_key = vec![0; 1024]; - delete_req.set_cf(CF_WRITE.to_string()); - delete_req.set_key(delete_key.clone()); - cmd.mut_requests().push(req); - - let mut encoder = SimpleWriteEncoder::new(cmd.clone(), usize::MAX, false).unwrap(); - cmd.clear_requests(); - - req = Request::default(); - req.set_cmd_type(CmdType::DeleteRange); - let delete_range_req = req.mut_delete_range(); - delete_range_req.set_cf(CF_LOCK.to_string()); - delete_range_req.set_start_key(b"key".to_vec()); - delete_range_req.set_end_key(b"key".to_vec()); - delete_range_req.set_notify_only(true); - cmd.mut_requests().push(req); - - req = Request::default(); - req.set_cmd_type(CmdType::DeleteRange); - let delete_range_req = req.mut_delete_range(); - delete_range_req.set_cf("cf".to_string()); - delete_range_req.set_start_key(b"key".to_vec()); - delete_range_req.set_end_key(b"key".to_vec()); - delete_range_req.set_notify_only(false); - cmd.mut_requests().push(req); - - encoder.amend(cmd.clone()).unwrap(); - let (bytes, _) = encoder.encode(); + encoder.delete(CF_WRITE, &delete_key); + let bin = encoder.encode(); + + let mut header = Box::::default(); + header.set_term(2); + let mut req_encoder = SimpleWriteReqEncoder::new(header.clone(), bin, usize::MAX, false); + + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.delete_range(CF_LOCK, b"key", b"key", true); + encoder.delete_range("cf", b"key", b"key", false); + req_encoder.amend(&header, &encoder.encode()); + + let (bytes, _) = req_encoder.encode(); let logger = slog_global::borrow_global().new(o!()); - let mut decoder = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap(); - assert_eq!(decoder.header(), cmd.get_header()); + let mut decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); let SimpleWrite::Put(put) = write else { panic!("should be put") }; assert_eq!(put.cf, CF_DEFAULT); @@ -488,38 +474,40 @@ mod tests { #[test] fn test_invalid() { - let mut invalid_cmd = RaftCmdRequest::default(); - invalid_cmd.mut_header().set_term(2); + let mut raft_cmd = RaftCmdRequest::default(); + raft_cmd.mut_header().set_term(2); let mut req = Request::default(); req.set_cmd_type(CmdType::Invalid); - invalid_cmd.mut_requests().push(req); - let fallback = SimpleWriteEncoder::new(invalid_cmd.clone(), usize::MAX, false).unwrap_err(); - let bytes = fallback.write_to_bytes().unwrap(); + raft_cmd.mut_requests().push(req); + let bytes = raft_cmd.write_to_bytes().unwrap(); let logger = slog_global::borrow_global().new(o!()); - let decoded = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap_err(); - assert_eq!(decoded, invalid_cmd); + let decoded = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap_err(); + // SimpleWriteReqDecoder should be able to decode naive RaftCmdRequest. + assert_eq!(decoded, raft_cmd); - let mut valid_cmd = RaftCmdRequest::default(); - valid_cmd.mut_header().set_term(3); - let mut req = Request::default(); - req.set_cmd_type(CmdType::Put); - let put_req = req.mut_put(); - put_req.set_cf(CF_DEFAULT.to_string()); - put_req.set_key(b"key".to_vec()); - put_req.set_value(b"".to_vec()); - valid_cmd.mut_requests().push(req); - let mut encoder = SimpleWriteEncoder::new(valid_cmd.clone(), usize::MAX, false).unwrap(); - // Only simple write command can be batched. - encoder.amend(invalid_cmd.clone()).unwrap_err(); - let mut valid_cmd2 = valid_cmd.clone(); - valid_cmd2.mut_header().set_term(4); + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.put(CF_DEFAULT, b"key", b""); + let bin = encoder.encode(); + + let mut header = Box::::default(); + header.set_term(2); + let mut req_encoder = SimpleWriteReqEncoder::new(header.clone(), bin.clone(), 512, false); + + let mut header2 = Box::::default(); + header2.set_term(4); // Only simple write command with same header can be batched. - encoder.amend(valid_cmd2).unwrap_err(); + assert!(!req_encoder.amend(&header2, &bin)); + + // Batch should not excceed max size limit. + let large_value = vec![0; 512]; + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.put(CF_DEFAULT, b"key", &large_value); + assert!(!req_encoder.amend(&header, &encoder.encode())); - let (bytes, _) = encoder.encode(); - let mut decoder = SimpleWriteDecoder::new(&logger, &bytes, 0, 0).unwrap(); - assert_eq!(decoder.header(), valid_cmd.get_header()); + let (bytes, _) = req_encoder.encode(); + let mut decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); let SimpleWrite::Put(put) = req else { panic!("should be put") }; assert_eq!(put.cf, CF_DEFAULT); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index beb47f9a08f..5e6971b3346 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -7,7 +7,8 @@ mod query; mod ready; pub use command::{ - AdminCmdResult, CommittedEntries, ProposalControl, SimpleWriteDecoder, SimpleWriteEncoder, + AdminCmdResult, CommittedEntries, ProposalControl, SimpleWriteBinary, SimpleWriteEncoder, + SimpleWriteReqDecoder, SimpleWriteReqEncoder, }; pub use life::DestroyProgress; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index d24a4b9d899..1878ead40c2 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -9,6 +9,7 @@ use std::{ use batch_system::Router; use crossbeam::channel::TrySendError; use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletRegistry}; +use futures::Future; use kvproto::{ errorpb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse}, @@ -22,7 +23,7 @@ use raftstore::{ Error, Result, }; use slog::{debug, Logger}; -use tikv_util::{box_err, codec::number::decode_u64, time::monotonic_raw_now}; +use tikv_util::{box_err, codec::number::decode_u64, time::monotonic_raw_now, Either}; use time::Timespec; use txn_types::WriteBatchFlags; @@ -32,7 +33,7 @@ use crate::{ StoreRouter, }; -pub trait MsgRouter: Send { +pub trait MsgRouter: Clone + Send { fn send(&self, addr: u64, msg: PeerMsg) -> std::result::Result<(), TrySendError>; } @@ -103,9 +104,9 @@ where fn try_get_snapshot( &mut self, - req: RaftCmdRequest, + req: &RaftCmdRequest, ) -> std::result::Result>, RaftCmdResponse> { - match self.pre_propose_raft_command(&req) { + match self.pre_propose_raft_command(req) { Ok(Some((mut delegate, policy))) => match policy { RequestPolicy::ReadLocal => { let region = Arc::clone(&delegate.region); @@ -121,7 +122,7 @@ where TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); // Try renew lease in advance - self.maybe_renew_lease_in_advance(&delegate, &req, snapshot_ts); + self.maybe_renew_lease_in_advance(&delegate, req, snapshot_ts); Ok(Some(snap)) } RequestPolicy::StaleRead => { @@ -156,63 +157,84 @@ where } } - pub async fn snapshot( + pub fn snapshot( &mut self, mut req: RaftCmdRequest, - ) -> std::result::Result, RaftCmdResponse> { + ) -> impl Future, RaftCmdResponse>> + Send + { let region_id = req.header.get_ref().region_id; - if let Some(snap) = self.try_get_snapshot(req.clone())? { - return Ok(snap); - } + let res = match self.try_get_snapshot(&req) { + res @ (Ok(Some(_)) | Err(_)) => Either::Left(res), + Ok(None) => Either::Right((self.try_to_renew_lease(region_id, &req), self.clone())), + }; - if let Some(query_res) = self.try_to_renew_lease(region_id, &req).await? { - // If query successful, try again. - if query_res.read().is_some() { - req.mut_header().set_read_quorum(false); - if let Some(snap) = self.try_get_snapshot(req)? { - return Ok(snap); + async move { + match res { + Either::Left(Ok(Some(snap))) => return Ok(snap), + Either::Left(Err(e)) => return Err(e), + Either::Right((fut, mut reader)) => { + if let Some(query_res) = fut.await? + && query_res.read().is_some() + { + // If query successful, try again. + req.mut_header().set_read_quorum(false); + if let Some(snap) = reader.try_get_snapshot(&req)? { + return Ok(snap); + } + } } + Either::Left(Ok(None)) => unreachable!(), } - } - let mut err = errorpb::Error::default(); - err.set_message(format!( - "Fail to get snapshot from LocalReader for region {}. \ - Maybe due to `not leader`, `region not found` or `not applied to the current term`", - region_id - )); - let mut resp = RaftCmdResponse::default(); - resp.mut_header().set_error(err); - Err(resp) + let mut err = errorpb::Error::default(); + err.set_message(format!( + "Fail to get snapshot from LocalReader for region {}. \ + Maybe due to `not leader`, `region not found` or `not applied to the current term`", + region_id + )); + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + Err(resp) + } } // try to renew the lease by sending read query where the reading process may // renew the lease - async fn try_to_renew_lease( + fn try_to_renew_lease( &self, region_id: u64, req: &RaftCmdRequest, - ) -> std::result::Result, RaftCmdResponse> { + ) -> impl Future, RaftCmdResponse>> { let (msg, sub) = PeerMsg::raft_query(req.clone()); - let mut err = errorpb::Error::default(); - match MsgRouter::send(&self.router, region_id, msg) { - Ok(()) => return Ok(sub.result().await), + let res = match MsgRouter::send(&self.router, region_id, msg) { + Ok(()) => Ok(sub), Err(TrySendError::Full(_)) => { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.channel_full.inc()); + let mut err = errorpb::Error::default(); err.set_message(RAFTSTORE_IS_BUSY.to_owned()); err.mut_server_is_busy() .set_reason(RAFTSTORE_IS_BUSY.to_owned()); + Err(err) } Err(TrySendError::Disconnected(_)) => { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); + let mut err = errorpb::Error::default(); err.set_message(format!("region {} is missing", region_id)); err.mut_region_not_found().set_region_id(region_id); + Err(err) } - } + }; - let mut resp = RaftCmdResponse::default(); - resp.mut_header().set_error(err); - Err(resp) + async move { + match res { + Ok(sub) => Ok(sub.result().await), + Err(e) => { + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(e); + Err(resp) + } + } + } } // If the remote lease will be expired in near future send message @@ -449,6 +471,7 @@ mod tests { use super::*; use crate::router::{QueryResult, ReadResponse}; + #[derive(Clone)] struct MockRouter { p_router: SyncSender<(u64, PeerMsg)>, } @@ -681,10 +704,11 @@ mod tests { )) .unwrap(); block_on(reader.snapshot(cmd.clone())).unwrap(); - // Updating lease makes cache miss. + // Updating lease makes cache miss. And because the cache is updated on cloned + // copy, so the old cache will still need to be updated again. assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), - 4 + 5 ); assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()), diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index ea66719314c..eb58dcbbc23 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -128,7 +128,7 @@ impl Peer { } // Check store_id, make sure that the msg is dispatched to the right place. - if let Err(e) = util::check_store_id(msg, self.peer().get_store_id()) { + if let Err(e) = util::check_store_id(msg.get_header(), self.peer().get_store_id()) { raft_metrics.invalid_proposal.mismatch_store_id.inc(); return Err(e); } @@ -158,7 +158,7 @@ impl Peer { } // peer_id must be the same as peer's. - if let Err(e) = util::check_peer_id(msg, self.peer_id()) { + if let Err(e) = util::check_peer_id(msg.get_header(), self.peer_id()) { raft_metrics.invalid_proposal.mismatch_peer_id.inc(); return Err(e); } @@ -166,13 +166,13 @@ impl Peer { // TODO: check applying snapshot // Check whether the term is stale. - if let Err(e) = util::check_term(msg, self.term()) { + if let Err(e) = util::check_term(msg.get_header(), self.term()) { raft_metrics.invalid_proposal.stale_command.inc(); return Err(e); } // TODO: add check of sibling region for split - util::check_region_epoch(msg, self.region(), true) + util::check_req_region_epoch(msg, self.region(), true) } // For these cases it won't be proposed: @@ -340,7 +340,7 @@ impl Peer { } fn query_status(&mut self, req: &RaftCmdRequest, resp: &mut RaftCmdResponse) -> Result<()> { - util::check_store_id(req, self.peer().get_store_id())?; + util::check_store_id(req.get_header(), self.peer().get_store_id())?; let cmd_type = req.get_status_request().get_cmd_type(); let status_resp = resp.mut_status_response(); status_resp.set_cmd_type(cmd_type); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index a6df9049285..f9a6c3a34d4 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -31,7 +31,10 @@ use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::store::{util, FetchedLogs, ReadProgress, Transport, WriteTask}; use slog::{debug, error, trace, warn}; -use tikv_util::time::{duration_to_sec, monotonic_raw_now}; +use tikv_util::{ + store::find_peer, + time::{duration_to_sec, monotonic_raw_now}, +}; pub use self::{ async_writer::AsyncWriter, @@ -39,11 +42,25 @@ pub use self::{ }; use crate::{ batch::StoreContext, - fsm::PeerFsmDelegate, + fsm::{PeerFsmDelegate, Store}, raft::{Peer, Storage}, - router::{ApplyTask, PeerTick}, + router::{ApplyTask, PeerMsg, PeerTick}, }; +impl Store { + pub fn on_store_unreachable( + &mut self, + ctx: &mut StoreContext, + to_store_id: u64, + ) where + EK: KvEngine, + ER: RaftEngine, + { + ctx.router + .broadcast_normal(|| PeerMsg::StoreUnreachable { to_store_id }); + } +} + impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { /// Raft relies on periodic ticks to keep the state machine sync with other /// peers. @@ -61,6 +78,20 @@ impl Peer { self.raft_group_mut().tick() } + pub fn on_peer_unreachable(&mut self, to_peer_id: u64) { + if self.is_leader() { + self.raft_group_mut().report_unreachable(to_peer_id); + } + } + + pub fn on_store_unreachable(&mut self, to_store_id: u64) { + if self.is_leader() { + if let Some(peer_id) = find_peer(self.region(), to_store_id).map(|p| p.get_id()) { + self.raft_group_mut().report_unreachable(peer_id); + } + } + } + pub fn on_raft_message( &mut self, ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 82e9b6011ca..500b166065f 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -25,7 +25,7 @@ use super::storage::Storage; use crate::{ batch::StoreContext, fsm::ApplyScheduler, - operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteEncoder}, + operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteReqEncoder}, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, }; @@ -49,7 +49,7 @@ pub struct Peer { /// Encoder for batching proposals and encoding them in a more efficient way /// than protobuf. - raw_write_encoder: Option, + raw_write_encoder: Option, proposals: ProposalQueue>, apply_scheduler: Option, @@ -502,12 +502,12 @@ impl Peer { } #[inline] - pub fn simple_write_encoder_mut(&mut self) -> &mut Option { + pub fn simple_write_encoder_mut(&mut self) -> &mut Option { &mut self.raw_write_encoder } #[inline] - pub fn simple_write_encoder(&self) -> &Option { + pub fn simple_write_encoder(&self) -> &Option { &self.raw_write_encoder } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 7208a6b5bef..e838cefb743 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, Mutex}; use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; +use futures::Future; use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -78,10 +79,11 @@ impl RaftRouter { self.router.send_raft_message(msg) } - pub async fn get_snapshot( + pub fn snapshot( &mut self, req: RaftCmdRequest, - ) -> std::result::Result, RaftCmdResponse> { - self.local_reader.snapshot(req).await + ) -> impl Future, RaftCmdResponse>> + Send + { + self.local_reader.snapshot(req) } } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index b387e729f8d..d5635574978 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -1,9 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::fmt; -use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; +use kvproto::{ + raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, + raft_serverpb::RaftMessage, +}; use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, GenSnapRes}; use tikv_util::time::Instant; @@ -13,7 +15,7 @@ use super::{ }, ApplyRes, }; -use crate::operation::SplitInit; +use crate::operation::{SimpleWriteBinary, SplitInit}; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] @@ -91,6 +93,7 @@ impl StoreTick { } /// Command that can be handled by raftstore. +#[derive(Debug)] pub struct RaftRequest { pub send_time: Instant, pub request: RaftCmdRequest, @@ -107,7 +110,16 @@ impl RaftRequest { } } +#[derive(Debug)] +pub struct SimpleWrite { + pub send_time: Instant, + pub header: Box, + pub data: SimpleWriteBinary, + pub ch: CmdResChannel, +} + /// Message that can be sent to a peer. +#[derive(Debug)] pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target @@ -118,7 +130,9 @@ pub enum PeerMsg { RaftQuery(RaftRequest), /// Command changes the inernal states. It will be transformed into logs and /// applied on all replicas. - RaftCommand(RaftRequest), + SimpleWrite(SimpleWrite), + /// Command that contains admin requests. + AdminCommand(RaftRequest), /// Tick is periodical task. If target peer doesn't exist there is a /// potential that the raft node will not work anymore. Tick(PeerTick), @@ -144,6 +158,12 @@ pub enum PeerMsg { tablet_index: u64, flushed_index: u64, }, + PeerUnreachable { + to_peer_id: u64, + }, + StoreUnreachable { + to_store_id: u64, + }, /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), @@ -155,76 +175,33 @@ impl PeerMsg { (PeerMsg::RaftQuery(RaftRequest::new(req, ch)), sub) } - pub fn raft_command(req: RaftCmdRequest) -> (Self, CmdResSubscriber) { + pub fn admin_command(req: RaftCmdRequest) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); - (PeerMsg::RaftCommand(RaftRequest::new(req, ch)), sub) + (PeerMsg::AdminCommand(RaftRequest::new(req, ch)), sub) } -} -impl fmt::Debug for PeerMsg { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - PeerMsg::RaftMessage(_) => write!(fmt, "Raft Message"), - PeerMsg::RaftQuery(_) => write!(fmt, "Raft Query"), - PeerMsg::RaftCommand(_) => write!(fmt, "Raft Command"), - PeerMsg::Tick(tick) => write! { - fmt, - "{:?}", - tick - }, - PeerMsg::ApplyRes(res) => write!(fmt, "ApplyRes {:?}", res), - PeerMsg::Start => write!(fmt, "Startup"), - PeerMsg::SplitInit(_) => { - write!(fmt, "Split initialization") - } - PeerMsg::SplitInitFinish(region_id) => { - write!( - fmt, - "Split initialization finished from region {}", - region_id - ) - } - PeerMsg::Noop => write!(fmt, "Noop"), - PeerMsg::Persisted { - peer_id, - ready_number, - } => write!( - fmt, - "Persisted peer_id {}, ready_number {}", - peer_id, ready_number - ), - PeerMsg::LogsFetched(fetched) => write!(fmt, "LogsFetched {:?}", fetched), - PeerMsg::SnapshotGenerated(_) => write!(fmt, "SnapshotGenerated"), - PeerMsg::QueryDebugInfo(_) => write!(fmt, "QueryDebugInfo"), - PeerMsg::DataFlushed { - cf, - tablet_index, - flushed_index, - } => write!( - fmt, - "DataFlushed cf {}, tablet_index {}, flushed_index {}", - cf, tablet_index, flushed_index - ), - #[cfg(feature = "testexport")] - PeerMsg::WaitFlush(_) => write!(fmt, "FlushMessages"), - } + pub fn simple_write( + header: Box, + data: SimpleWriteBinary, + ) -> (Self, CmdResSubscriber) { + let (ch, sub) = CmdResChannel::pair(); + ( + PeerMsg::SimpleWrite(SimpleWrite { + send_time: Instant::now(), + header, + data, + ch, + }), + sub, + ) } } +#[derive(Debug)] pub enum StoreMsg { RaftMessage(Box), SplitInit(Box), Tick(StoreTick), Start, -} - -impl fmt::Debug for StoreMsg { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - StoreMsg::RaftMessage(_) => write!(fmt, "Raft Message"), - StoreMsg::SplitInit(_) => write!(fmt, "Split initialization"), - StoreMsg::Tick(tick) => write!(fmt, "StoreTick {:?}", tick), - StoreMsg::Start => write!(fmt, "Start store"), - } - } + StoreUnreachable { to_store_id: u64 }, } diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index a09b0593b80..d6846f61e4b 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -15,7 +15,7 @@ pub use self::{ internal_message::ApplyRes, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ - CmdResChannel, DebugInfoChannel, DebugInfoSubscriber, QueryResChannel, QueryResult, - ReadResponse, + CmdResChannel, CmdResChannelBuilder, CmdResEvent, CmdResStream, CmdResSubscriber, + DebugInfoChannel, DebugInfoSubscriber, QueryResChannel, QueryResult, ReadResponse, }, }; diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index 423c9e8e326..01c1565ec62 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -24,7 +24,7 @@ use std::{ task::{Context, Poll}, }; -use futures::task::AtomicWaker; +use futures::{task::AtomicWaker, FutureExt, Stream}; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, raft_cmdpb::RaftCmdResponse}; use raftstore::store::{ local_metrics::TimeTracker, msg::ErrorCallback, region_meta::RegionMeta, ReadCallback, @@ -47,7 +47,11 @@ struct EventCore { /// Event 0 and Event 31 is reserved as payload and cancel respectively. /// Other events should be defined within [1, 30]. event: AtomicU64, + /// Even a channel supports multiple events, it's not necessary to trigger + /// all of them. `event_mask` is used to filter unnecessary events. + event_mask: u32, res: UnsafeCell>, + before_set: UnsafeCell>>, // Waker can be changed, need to use `AtomicWaker` to guarantee no data race. waker: AtomicWaker, } @@ -57,6 +61,10 @@ unsafe impl Send for EventCore {} const PAYLOAD_EVENT: u64 = 0; const CANCEL_EVENT: u64 = 31; +const fn event_mask_bit_of(event: u64) -> u32 { + 1 << event +} + #[inline] const fn subscribed_bit_of(event: u64) -> u64 { 1 << (event * 2) @@ -67,23 +75,14 @@ const fn fired_bit_of(event: u64) -> u64 { 1 << (event * 2 + 1) } -impl Default for EventCore { - #[inline] - fn default() -> Self { - Self { - event: AtomicU64::new(0), - res: UnsafeCell::new(None), - waker: AtomicWaker::new(), - } - } -} - impl EventCore { #[inline] fn notify_event(&self, event: u64) { - let previous = self.event.fetch_or(fired_bit_of(event), Ordering::AcqRel); - if previous & subscribed_bit_of(event) != 0 { - self.waker.wake() + if self.event_mask & event_mask_bit_of(event) != 0 { + let previous = self.event.fetch_or(fired_bit_of(event), Ordering::AcqRel); + if previous & subscribed_bit_of(event) != 0 { + self.waker.wake() + } } } @@ -91,8 +90,11 @@ impl EventCore { /// /// After this call, no events should be notified. #[inline] - fn set_result(&self, result: Res) { + fn set_result(&self, mut result: Res) { unsafe { + if let Some(cb) = (*self.before_set.get()).take() { + cb(&mut result); + } *self.res.get() = Some(result); } let previous = self.event.fetch_or( @@ -173,7 +175,7 @@ impl<'a, Res> Future for WaitEvent<'a, Res> { } struct WaitResult<'a, Res> { - core: &'a EventCore, + sub: &'a BaseSubscriber, } impl<'a, Res> Future for WaitResult<'a, Res> { @@ -181,16 +183,16 @@ impl<'a, Res> Future for WaitResult<'a, Res> { #[inline] fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let event = &self.core.event; + let event = &self.sub.core.event; let fired_bit = fired_bit_of(PAYLOAD_EVENT); let mut e = event.load(Ordering::Relaxed); if check_bit(e, fired_bit).is_some() { unsafe { - return Poll::Ready((*self.core.res.get()).take()); + return Poll::Ready((*self.sub.core.res.get()).take()); } } let subscribed_bit = subscribed_bit_of(PAYLOAD_EVENT); - self.core.waker.register(cx.waker()); + self.sub.core.waker.register(cx.waker()); loop { match event.compare_exchange_weak( e, @@ -203,7 +205,7 @@ impl<'a, Res> Future for WaitResult<'a, Res> { }; if check_bit(e, fired_bit).is_some() { unsafe { - return Poll::Ready((*self.core.res.get()).take()); + return Poll::Ready((*self.sub.core.res.get()).take()); } } } @@ -219,7 +221,7 @@ impl BaseSubscriber { /// Wait for the result. #[inline] pub async fn result(self) -> Option { - WaitResult { core: &self.core }.await + WaitResult { sub: &self }.await } /// Test if the result is ready without any polling. @@ -242,7 +244,17 @@ impl BaseChannel { /// Creates a pair of channel and subscriber. #[inline] pub fn pair() -> (Self, BaseSubscriber) { - let core: Arc> = Arc::default(); + Self::with_mask(u32::MAX) + } + + fn with_mask(mask: u32) -> (Self, BaseSubscriber) { + let core: Arc> = Arc::new(EventCore { + event: AtomicU64::new(0), + res: UnsafeCell::new(None), + event_mask: mask, + before_set: UnsafeCell::new(None), + waker: AtomicWaker::new(), + }); (Self { core: core.clone() }, BaseSubscriber { core }) } @@ -283,6 +295,122 @@ impl CmdResSubscriber { } } +#[derive(Clone, Copy, Debug)] +enum CmdResPollStage { + ExpectProposed, + ExpectCommitted, + ExpectResult, + Drained, +} + +impl CmdResPollStage { + #[inline] + fn init(event_mask: u32) -> CmdResPollStage { + if event_mask & event_mask_bit_of(CmdResChannel::PROPOSED_EVENT) != 0 { + CmdResPollStage::ExpectProposed + } else if event_mask & event_mask_bit_of(CmdResChannel::COMMITTED_EVENT) != 0 { + CmdResPollStage::ExpectCommitted + } else { + CmdResPollStage::ExpectResult + } + } + + #[inline] + fn next(&mut self, event_mask: u32) { + *self = match self { + CmdResPollStage::ExpectProposed => { + if event_mask & event_mask_bit_of(CmdResChannel::COMMITTED_EVENT) == 0 { + CmdResPollStage::ExpectResult + } else { + CmdResPollStage::ExpectCommitted + } + } + CmdResPollStage::ExpectCommitted => CmdResPollStage::ExpectResult, + CmdResPollStage::ExpectResult => CmdResPollStage::Drained, + CmdResPollStage::Drained => CmdResPollStage::Drained, + } + } +} + +#[derive(Debug)] +pub enum CmdResEvent { + Proposed, + Committed, + Finished(RaftCmdResponse), +} + +pub struct CmdResStream { + sub: CmdResSubscriber, + stage: CmdResPollStage, +} + +impl CmdResStream { + #[inline] + pub fn new(sub: CmdResSubscriber) -> Self { + Self { + stage: CmdResPollStage::init(sub.core.event_mask), + sub, + } + } +} + +impl Stream for CmdResStream { + type Item = CmdResEvent; + + #[inline] + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let stream = self.get_mut(); + loop { + match stream.stage { + CmdResPollStage::ExpectProposed => { + match (WaitEvent { + event: CmdResChannel::PROPOSED_EVENT, + core: &stream.sub.core, + }) + .poll_unpin(cx) + { + Poll::Pending => return Poll::Pending, + Poll::Ready(b) => { + stream.stage.next(stream.sub.core.event_mask); + if b { + return Poll::Ready(Some(CmdResEvent::Proposed)); + } + } + } + } + CmdResPollStage::ExpectCommitted => { + match (WaitEvent { + event: CmdResChannel::COMMITTED_EVENT, + core: &stream.sub.core, + }) + .poll_unpin(cx) + { + Poll::Pending => return Poll::Pending, + Poll::Ready(b) => { + stream.stage.next(stream.sub.core.event_mask); + if b { + return Poll::Ready(Some(CmdResEvent::Committed)); + } + } + } + } + CmdResPollStage::ExpectResult => { + match (WaitResult { sub: &stream.sub }).poll_unpin(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(res) => { + stream.stage.next(stream.sub.core.event_mask); + if let Some(res) = res { + return Poll::Ready(Some(CmdResEvent::Finished(res))); + } + } + } + } + CmdResPollStage::Drained => return Poll::Ready(None), + } + } + } +} + pub type CmdResChannel = BaseChannel; impl Debug for CmdResChannel { @@ -291,6 +419,46 @@ impl Debug for CmdResChannel { } } +#[derive(Default)] +pub struct CmdResChannelBuilder { + event_mask: u32, + before_set: Option>, +} + +impl CmdResChannelBuilder { + #[inline] + pub fn subscribe_proposed(&mut self) -> &mut Self { + self.event_mask |= event_mask_bit_of(CmdResChannel::PROPOSED_EVENT); + self + } + + #[inline] + pub fn subscribe_committed(&mut self) -> &mut Self { + self.event_mask |= event_mask_bit_of(CmdResChannel::COMMITTED_EVENT); + self + } + + #[inline] + pub fn before_set( + &mut self, + f: impl FnOnce(&mut RaftCmdResponse) + Send + 'static, + ) -> &mut Self { + self.before_set = Some(Box::new(f)); + self + } + + #[inline] + pub fn build(self) -> (CmdResChannel, CmdResSubscriber) { + let (c, s) = CmdResChannel::with_mask(self.event_mask); + if let Some(f) = self.before_set { + unsafe { + *c.core.before_set.get() = Some(f); + } + } + (c, s) + } +} + impl CmdResChannel { // Valid range is [1, 30] const PROPOSED_EVENT: u64 = 1; @@ -424,14 +592,28 @@ impl fmt::Debug for QueryResChannel { pub type DebugInfoChannel = BaseChannel; pub type DebugInfoSubscriber = BaseSubscriber; +impl Debug for DebugInfoChannel { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "DebugInfoChannel") + } +} + #[cfg(feature = "testexport")] pub type FlushChannel = BaseChannel<()>; #[cfg(feature = "testexport")] pub type FlushSubscriber = BaseSubscriber<()>; +impl Debug for FlushChannel { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "FlushChannel") + } +} + #[cfg(test)] mod tests { - use futures::executor::block_on; + use std::assert_matches::assert_matches; + + use futures::{executor::block_on, StreamExt}; use super::*; @@ -482,4 +664,44 @@ mod tests { chan.set_result(read.clone()); assert_eq!(block_on(sub.result()).unwrap(), read); } + + #[test] + fn test_cmd_res_stream() { + let mut builder = CmdResChannelBuilder::default(); + builder.before_set(|res| { + res.mut_header().set_current_term(6); + }); + let (chan, sub) = builder.build(); + let mut stream = CmdResStream::new(sub); + chan.set_result(RaftCmdResponse::default()); + assert_matches!(block_on(stream.next()), Some(CmdResEvent::Finished(res)) if res.get_header().get_current_term() == 6); + + // When using builder, no event is subscribed by default. + let (mut chan, sub) = CmdResChannelBuilder::default().build(); + let mut stream = CmdResStream::new(sub); + chan.notify_proposed(); + chan.notify_committed(); + drop(chan); + assert_matches!(block_on(stream.next()), None); + + let mut builder = CmdResChannelBuilder::default(); + builder.subscribe_proposed(); + let (mut chan, sub) = builder.build(); + let mut stream = CmdResStream::new(sub); + chan.notify_proposed(); + chan.notify_committed(); + assert_matches!(block_on(stream.next()), Some(CmdResEvent::Proposed)); + drop(chan); + assert_matches!(block_on(stream.next()), None); + + let mut builder = CmdResChannelBuilder::default(); + builder.subscribe_committed(); + let (mut chan, sub) = builder.build(); + let mut stream = CmdResStream::new(sub); + chan.notify_proposed(); + chan.notify_committed(); + assert_matches!(block_on(stream.next()), Some(CmdResEvent::Committed)); + drop(chan); + assert_matches!(block_on(stream.next()), None); + } } diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 80e12dc53c7..15bb2e73ff8 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -228,7 +228,7 @@ mod requests { req.mut_header().set_peer(peer); req.set_admin_request(request); - let (msg, _) = PeerMsg::raft_command(req); + let (msg, _) = PeerMsg::admin_command(req); if let Err(e) = router.send(region_id, msg) { error!( logger, diff --git a/components/raftstore-v2/tests/failpoints/test_basic_write.rs b/components/raftstore-v2/tests/failpoints/test_basic_write.rs index b20984a9837..55d85b90fa4 100644 --- a/components/raftstore-v2/tests/failpoints/test_basic_write.rs +++ b/components/raftstore-v2/tests/failpoints/test_basic_write.rs @@ -2,10 +2,9 @@ use std::{assert_matches::assert_matches, time::Duration}; -use engine_traits::Peekable; +use engine_traits::{Peekable, CF_DEFAULT}; use futures::executor::block_on; -use kvproto::raft_cmdpb::{CmdType, Request}; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use crate::cluster::Cluster; @@ -14,27 +13,25 @@ use crate::cluster::Cluster; fn test_write_batch_rollback() { let mut cluster = Cluster::default(); let router = &mut cluster.routers[0]; - let mut req = router.new_request_for(2); - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(b"key".to_vec()); - put_req.mut_put().set_value(b"value".to_vec()); - req.mut_requests().push(put_req.clone()); + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); router.wait_applied_to_current_term(2, Duration::from_secs(3)); // Make several entries to batch in apply thread. fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); // Good proposal should be committed. - let (msg, mut sub0) = PeerMsg::raft_command(req.clone()); + let (msg, mut sub0) = PeerMsg::simple_write(header.clone(), put.encode()); router.send(2, msg).unwrap(); assert!(block_on(sub0.wait_proposed())); assert!(block_on(sub0.wait_committed())); // If the write batch is correctly initialized, next write should not contain // last result. - req.mut_requests()[0].mut_put().set_key(b"key1".to_vec()); - let (msg, mut sub1) = PeerMsg::raft_command(req.clone()); + put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key1", b"value"); + let (msg, mut sub1) = PeerMsg::simple_write(header.clone(), put.encode()); router.send(2, msg).unwrap(); assert!(block_on(sub1.wait_proposed())); assert!(block_on(sub1.wait_committed())); @@ -63,16 +60,18 @@ fn test_write_batch_rollback() { fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); // Trigger error again, so an initialized write batch should be rolled back. - req.mut_requests()[0].mut_put().set_key(b"key2".to_vec()); - let (msg, mut sub0) = PeerMsg::raft_command(req.clone()); + put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key2", b"value"); + let (msg, mut sub0) = PeerMsg::simple_write(header.clone(), put.encode()); router.send(2, msg).unwrap(); assert!(block_on(sub0.wait_proposed())); assert!(block_on(sub0.wait_committed())); // If the write batch is correctly rollbacked, next write should not contain // last result. - req.mut_requests()[0].mut_put().set_key(b"key3".to_vec()); - let (msg, mut sub1) = PeerMsg::raft_command(req.clone()); + put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key3", b"value"); + let (msg, mut sub1) = PeerMsg::simple_write(header, put.encode()); router.send(2, msg).unwrap(); assert!(block_on(sub1.wait_proposed())); assert!(block_on(sub1.wait_committed())); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 55ad823b99d..732afb38f98 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -24,7 +24,7 @@ use engine_traits::{TabletContext, TabletRegistry, DATA_CFS}; use futures::executor::block_on; use kvproto::{ metapb::{self, RegionEpoch, Store}, - raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, Request}, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, Request}, raft_serverpb::RaftMessage, }; use pd_client::RpcClient; @@ -36,7 +36,7 @@ use raftstore::store::{ use raftstore_v2::{ create_store_batch_system, router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, - Bootstrap, StateStorage, StoreSystem, + Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, }; use slog::{debug, o, Logger}; use tempfile::TempDir; @@ -59,13 +59,6 @@ pub fn check_skip_wal(path: &str) { assert!(found, "no WAL found in {}", path); } -pub fn new_put_request(key: impl Into>, value: impl Into>) -> Request { - let mut req = Request::default(); - req.set_cmd_type(CmdType::Put); - req.mut_put().set_key(key.into()); - req.mut_put().set_value(value.into()); - req -} pub struct TestRouter(RaftRouter); impl Deref for TestRouter { @@ -104,8 +97,19 @@ impl TestRouter { None } - pub fn command(&self, region_id: u64, req: RaftCmdRequest) -> Option { - let (msg, sub) = PeerMsg::raft_command(req); + pub fn simple_write( + &self, + region_id: u64, + header: Box, + write: SimpleWriteEncoder, + ) -> Option { + let (msg, sub) = PeerMsg::simple_write(header, write.encode()); + self.send(region_id, msg).unwrap(); + block_on(sub.result()) + } + + pub fn admin_command(&self, region_id: u64, req: RaftCmdRequest) -> Option { + let (msg, sub) = PeerMsg::admin_command(req); self.send(region_id, msg).unwrap(); block_on(sub.result()) } @@ -179,7 +183,7 @@ impl TestRouter { let mut snap_req = Request::default(); snap_req.set_cmd_type(CmdType::Snap); req.mut_requests().push(snap_req); - block_on(self.get_snapshot(req)).unwrap() + block_on(self.snapshot(req)).unwrap() } pub fn region_detail(&self, region_id: u64) -> metapb::Region { diff --git a/components/raftstore-v2/tests/integrations/test_basic_write.rs b/components/raftstore-v2/tests/integrations/test_basic_write.rs index 29f665758d6..cb8d71840cf 100644 --- a/components/raftstore-v2/tests/integrations/test_basic_write.rs +++ b/components/raftstore-v2/tests/integrations/test_basic_write.rs @@ -2,14 +2,11 @@ use std::{assert_matches::assert_matches, time::Duration}; -use engine_traits::Peekable; +use engine_traits::{Peekable, CF_DEFAULT}; use futures::executor::block_on; -use kvproto::{ - raft_cmdpb::{CmdType, Request}, - raft_serverpb::RaftMessage, -}; +use kvproto::raft_serverpb::RaftMessage; use raftstore::store::{INIT_EPOCH_CONF_VER, INIT_EPOCH_VER}; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use tikv_util::store::new_peer; use crate::cluster::{check_skip_wal, Cluster}; @@ -19,17 +16,14 @@ use crate::cluster::{check_skip_wal, Cluster}; fn test_basic_write() { let cluster = Cluster::default(); let router = &cluster.routers[0]; - let mut req = router.new_request_for(2); - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(b"key".to_vec()); - put_req.mut_put().set_value(b"value".to_vec()); - req.mut_requests().push(put_req); + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); router.wait_applied_to_current_term(2, Duration::from_secs(3)); // Good proposal should be committed. - let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + let (msg, mut sub) = PeerMsg::simple_write(header.clone(), put.clone().encode()); router.send(2, msg).unwrap(); assert!(block_on(sub.wait_proposed())); assert!(block_on(sub.wait_committed())); @@ -37,9 +31,9 @@ fn test_basic_write() { assert!(!resp.get_header().has_error(), "{:?}", resp); // Store id should be checked. - let mut invalid_req = req.clone(); - invalid_req.mut_header().set_peer(new_peer(3, 3)); - let resp = router.command(2, invalid_req.clone()).unwrap(); + let mut invalid_header = header.clone(); + invalid_header.set_peer(new_peer(3, 3)); + let resp = router.simple_write(2, invalid_header, put.clone()).unwrap(); assert!( resp.get_header().get_error().has_store_not_match(), "{:?}", @@ -47,36 +41,27 @@ fn test_basic_write() { ); // Peer id should be checked. - let mut invalid_req = req.clone(); - invalid_req.mut_header().set_peer(new_peer(1, 1)); - let resp = router.command(2, invalid_req.clone()).unwrap(); + invalid_header = header.clone(); + invalid_header.set_peer(new_peer(1, 1)); + let resp = router.simple_write(2, invalid_header, put.clone()).unwrap(); assert!(resp.get_header().has_error(), "{:?}", resp); // Epoch should be checked. - let mut invalid_req = req.clone(); - invalid_req - .mut_header() + invalid_header = header.clone(); + invalid_header .mut_region_epoch() .set_version(INIT_EPOCH_VER - 1); - let resp = router.command(2, invalid_req.clone()).unwrap(); + let resp = router.simple_write(2, invalid_header, put.clone()).unwrap(); assert!( resp.get_header().get_error().has_epoch_not_match(), "{:?}", resp ); - // It's wrong to send query to write command. - let mut invalid_req = req.clone(); - let mut snap_req = Request::default(); - snap_req.set_cmd_type(CmdType::Snap); - invalid_req.mut_requests().push(snap_req); - let resp = router.command(2, invalid_req.clone()).unwrap(); - assert!(resp.get_header().has_error(), "{:?}", resp); - // Term should be checked if set. - let mut invalid_req = req.clone(); - invalid_req.mut_header().set_term(1); - let resp = router.command(2, invalid_req).unwrap(); + invalid_header = header.clone(); + invalid_header.set_term(1); + let resp = router.simple_write(2, invalid_header, put.clone()).unwrap(); assert!( resp.get_header().get_error().has_stale_command(), "{:?}", @@ -84,11 +69,9 @@ fn test_basic_write() { ); // Too large message can cause regression and should be rejected. - let mut invalid_req = req.clone(); - invalid_req.mut_requests()[0] - .mut_put() - .set_value(vec![0; 8 * 1024 * 1024]); - let resp = router.command(2, invalid_req).unwrap(); + let mut invalid_put = SimpleWriteEncoder::with_capacity(9 * 1024 * 1024); + invalid_put.put(CF_DEFAULT, b"key", &vec![0; 8 * 1024 * 1024]); + let resp = router.simple_write(2, header.clone(), invalid_put).unwrap(); assert!( resp.get_header().get_error().has_raft_entry_too_large(), "{:?}", @@ -106,7 +89,7 @@ fn test_basic_write() { raft_message.set_from(4); raft_message.set_term(8); router.send_raft_message(msg).unwrap(); - let resp = router.command(2, req).unwrap(); + let resp = router.simple_write(2, header, put).unwrap(); assert!(resp.get_header().get_error().has_not_leader(), "{:?}", resp); } @@ -114,18 +97,15 @@ fn test_basic_write() { fn test_put_delete() { let mut cluster = Cluster::default(); let router = &mut cluster.routers[0]; - let mut req = router.new_request_for(2); - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(b"key".to_vec()); - put_req.mut_put().set_value(b"value".to_vec()); - req.mut_requests().push(put_req); + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); router.wait_applied_to_current_term(2, Duration::from_secs(3)); let snap = router.stale_snapshot(2); assert!(snap.get_value(b"key").unwrap().is_none()); - let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + let (msg, mut sub) = PeerMsg::simple_write(header.clone(), put.encode()); router.send(2, msg).unwrap(); assert!(block_on(sub.wait_proposed())); assert!(block_on(sub.wait_committed())); @@ -134,12 +114,9 @@ fn test_put_delete() { let snap = router.stale_snapshot(2); assert_eq!(snap.get_value(b"key").unwrap().unwrap(), b"value"); - let mut delete_req = Request::default(); - delete_req.set_cmd_type(CmdType::Delete); - delete_req.mut_delete().set_key(b"key".to_vec()); - req.clear_requests(); - req.mut_requests().push(delete_req); - let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + let mut delete = SimpleWriteEncoder::with_capacity(64); + delete.delete(CF_DEFAULT, b"key"); + let (msg, mut sub) = PeerMsg::simple_write(header, delete.encode()); router.send(2, msg).unwrap(); assert!(block_on(sub.wait_proposed())); assert!(block_on(sub.wait_committed())); diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index db62ae4a75a..8a075bb9a35 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -2,10 +2,13 @@ use std::{self, time::Duration}; -use engine_traits::Peekable; -use kvproto::raft_cmdpb::{AdminCmdType, CmdType, Request}; +use engine_traits::{Peekable, CF_DEFAULT}; +use kvproto::raft_cmdpb::AdminCmdType; use raft::prelude::ConfChangeType; -use raftstore_v2::router::{PeerMsg, PeerTick}; +use raftstore_v2::{ + router::{PeerMsg, PeerTick}, + SimpleWriteEncoder, +}; use tikv_util::store::new_learner_peer; use crate::cluster::{check_skip_wal, Cluster}; @@ -23,7 +26,7 @@ fn test_simple_change() { let store_id = cluster.node(1).id(); let new_peer = new_learner_peer(store_id, 10); admin_req.mut_change_peer().set_peer(new_peer.clone()); - let resp = cluster.routers[0].command(2, req.clone()).unwrap(); + let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); let epoch = req.get_header().get_region_epoch(); let new_conf_ver = epoch.get_conf_ver() + 1; @@ -57,13 +60,10 @@ fn test_simple_change() { // write one kv after snapshot let (key, val) = (b"key", b"value"); - let mut write_req = cluster.routers[0].new_request_for(region_id); - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(key.to_vec()); - put_req.mut_put().set_value(val.to_vec()); - write_req.mut_requests().push(put_req); - let (msg, _) = PeerMsg::raft_command(write_req.clone()); + let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, val); + let (msg, _) = PeerMsg::simple_write(header, put.encode()); cluster.routers[0].send(region_id, msg).unwrap(); std::thread::sleep(Duration::from_millis(1000)); cluster.dispatch(region_id, vec![]); @@ -84,7 +84,7 @@ fn test_simple_change() { req.mut_admin_request() .mut_change_peer() .set_change_type(ConfChangeType::RemoveNode); - let resp = cluster.routers[0].command(2, req.clone()).unwrap(); + let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); let epoch = req.get_header().get_region_epoch(); let new_conf_ver = epoch.get_conf_ver() + 1; diff --git a/components/raftstore-v2/tests/integrations/test_read.rs b/components/raftstore-v2/tests/integrations/test_read.rs index 07ae8b44bf3..f9575ff8da1 100644 --- a/components/raftstore-v2/tests/integrations/test_read.rs +++ b/components/raftstore-v2/tests/integrations/test_read.rs @@ -1,8 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use engine_traits::CF_DEFAULT; use futures::executor::block_on; use kvproto::raft_cmdpb::{CmdType, Request}; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use tikv_util::{config::ReadableDuration, store::new_peer}; use txn_types::WriteBatchFlags; @@ -39,14 +40,11 @@ fn test_read_index() { std::thread::sleep(std::time::Duration::from_millis(200)); let read_req = req.clone(); // the read lease should be expired and renewed by write - let mut req = router.new_request_for(region_id); - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(b"key".to_vec()); - put_req.mut_put().set_value(b"value".to_vec()); - req.mut_requests().push(put_req); + let header = Box::new(router.new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); - let (msg, sub) = PeerMsg::raft_command(req.clone()); + let (msg, sub) = PeerMsg::simple_write(header, put.encode()); router.send(region_id, msg).unwrap(); block_on(sub.result()).unwrap(); @@ -172,7 +170,7 @@ fn test_local_read() { request_inner.set_cmd_type(CmdType::Snap); req.mut_requests().push(request_inner); - block_on(async { router.get_snapshot(req.clone()).await.unwrap() }); + block_on(async { router.snapshot(req.clone()).await.unwrap() }); let res = router.query(region_id, req.clone()).unwrap(); let resp = res.read().unwrap(); // The read index will be 0 as the retry process in the `get_snapshot` will diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index df806063249..d5bc784857e 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -2,16 +2,14 @@ use std::{thread, time::Duration}; -use engine_traits::{RaftEngineReadOnly, CF_RAFT}; +use engine_traits::{RaftEngineReadOnly, CF_DEFAULT, CF_RAFT}; use futures::executor::block_on; use kvproto::{ metapb, pdpb, - raft_cmdpb::{ - AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, - }, + raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest, RaftCmdResponse, SplitRequest}, }; use raftstore::store::{INIT_EPOCH_VER, RAFT_INIT_LOG_INDEX}; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use tikv_util::store::new_peer; use crate::cluster::{Cluster, TestRouter}; @@ -37,7 +35,7 @@ fn new_batch_split_region_request( } fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { - let (msg, sub) = PeerMsg::raft_command(req); + let (msg, sub) = PeerMsg::admin_command(req); router.send(region_id, msg).unwrap(); block_on(sub.result()).unwrap(); @@ -47,19 +45,10 @@ fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { } fn put(router: &mut TestRouter, region_id: u64, key: &[u8]) -> RaftCmdResponse { - let mut req = router.new_request_for(region_id); - - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(key.to_vec()); - put_req.mut_put().set_value(b"v1".to_vec()); - req.mut_requests().push(put_req); - - let (msg, mut sub) = PeerMsg::raft_command(req.clone()); - router.send(region_id, msg).unwrap(); - assert!(block_on(sub.wait_proposed())); - assert!(block_on(sub.wait_committed())); - block_on(sub.result()).unwrap() + let header = Box::new(router.new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, b"v1"); + router.simple_write(region_id, header, put).unwrap() } // Split the region according to the parameters diff --git a/components/raftstore-v2/tests/integrations/test_trace_apply.rs b/components/raftstore-v2/tests/integrations/test_trace_apply.rs index def064e6d29..71682ff52a4 100644 --- a/components/raftstore-v2/tests/integrations/test_trace_apply.rs +++ b/components/raftstore-v2/tests/integrations/test_trace_apply.rs @@ -2,12 +2,12 @@ use std::{path::Path, time::Duration}; -use engine_traits::{DbOptionsExt, MiscExt, Peekable, CF_LOCK, CF_WRITE, DATA_CFS}; +use engine_traits::{DbOptionsExt, MiscExt, Peekable, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS}; use futures::executor::block_on; use raftstore::store::RAFT_INIT_LOG_INDEX; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; -use crate::cluster::{new_put_request, Cluster}; +use crate::cluster::Cluster; fn count_file(path: &Path, pat: impl Fn(&Path) -> bool) -> usize { let mut count = 0; @@ -47,25 +47,30 @@ fn test_data_recovery() { router.wait_applied_to_current_term(2, Duration::from_secs(3)); // Write 100 keys to default CF and not flush. - let mut req = router.new_request_for(2); + let header = Box::new(router.new_request_for(2).take_header()); for i in 0..100 { - let put_req = new_put_request(format!("key{}", i), format!("value{}", i)); - req.clear_requests(); - req.mut_requests().push(put_req); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put( + CF_DEFAULT, + format!("key{}", i).as_bytes(), + format!("value{}", i).as_bytes(), + ); router - .send(2, PeerMsg::raft_command(req.clone()).0) + .send(2, PeerMsg::simple_write(header.clone(), put.encode()).0) .unwrap(); } // Write 100 keys to write CF and flush half. let mut sub = None; for i in 0..50 { - let mut put_req = new_put_request(format!("key{}", i), format!("value{}", i)); - put_req.mut_put().set_cf(CF_WRITE.to_owned()); - req.clear_requests(); - req.mut_requests().push(put_req); - let (ch, s) = PeerMsg::raft_command(req.clone()); - router.send(2, ch).unwrap(); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put( + CF_WRITE, + format!("key{}", i).as_bytes(), + format!("value{}", i).as_bytes(), + ); + let (msg, s) = PeerMsg::simple_write(header.clone(), put.encode()); + router.send(2, msg).unwrap(); sub = Some(s); } let resp = block_on(sub.take().unwrap().result()).unwrap(); @@ -75,23 +80,27 @@ fn test_data_recovery() { cached.latest().unwrap().flush_cf(CF_WRITE, true).unwrap(); let router = &mut cluster.routers[0]; for i in 50..100 { - let mut put_req = new_put_request(format!("key{}", i), format!("value{}", i)); - put_req.mut_put().set_cf(CF_WRITE.to_owned()); - req.clear_requests(); - req.mut_requests().push(put_req); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put( + CF_WRITE, + format!("key{}", i).as_bytes(), + format!("value{}", i).as_bytes(), + ); router - .send(2, PeerMsg::raft_command(req.clone()).0) + .send(2, PeerMsg::simple_write(header.clone(), put.encode()).0) .unwrap(); } // Write 100 keys to lock CF and flush all. for i in 0..100 { - let mut put_req = new_put_request(format!("key{}", i), format!("value{}", i)); - put_req.mut_put().set_cf(CF_LOCK.to_owned()); - req.clear_requests(); - req.mut_requests().push(put_req); - let (ch, s) = PeerMsg::raft_command(req.clone()); - router.send(2, ch).unwrap(); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put( + CF_LOCK, + format!("key{}", i).as_bytes(), + format!("value{}", i).as_bytes(), + ); + let (msg, s) = PeerMsg::simple_write(header.clone(), put.encode()); + router.send(2, msg).unwrap(); sub = Some(s); } let resp = block_on(sub.take().unwrap().result()).unwrap(); @@ -137,12 +146,9 @@ fn test_data_recovery() { let router = &mut cluster.routers[0]; // Write another key to ensure all data are recovered. - let put_req = new_put_request("key101", "value101"); - req.clear_requests(); - req.mut_requests().push(put_req); - let (msg, sub) = PeerMsg::raft_command(req.clone()); - router.send(2, msg).unwrap(); - let resp = block_on(sub.result()).unwrap(); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key101", b"value101"); + let resp = router.simple_write(2, header, put).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); // After being restarted, all unflushed logs should be applied again. So there diff --git a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs index 7096f06b1d2..d031d6b1eba 100644 --- a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs +++ b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs @@ -2,14 +2,14 @@ use std::time::Duration; -use engine_traits::Peekable; +use engine_traits::{Peekable, CF_DEFAULT}; use futures::executor::block_on; use kvproto::{ metapb, - raft_cmdpb::{AdminCmdType, CmdType, Request, TransferLeaderRequest}, + raft_cmdpb::{AdminCmdType, TransferLeaderRequest}, }; use raft::prelude::ConfChangeType; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use tikv_util::store::new_peer; use crate::cluster::Cluster; @@ -22,12 +22,6 @@ fn put_data( key: &[u8], ) { let router = &cluster.routers[node_off]; - let mut req = router.new_request_for(region_id); - let mut put_req = Request::default(); - put_req.set_cmd_type(CmdType::Put); - put_req.mut_put().set_key(key[1..].to_vec()); - put_req.mut_put().set_value(b"value".to_vec()); - req.mut_requests().push(put_req); router.wait_applied_to_current_term(region_id, Duration::from_secs(3)); @@ -41,7 +35,10 @@ fn put_data( .clone(); assert!(tablet.get_value(key).unwrap().is_none()); - let (msg, mut sub) = PeerMsg::raft_command(req.clone()); + let header = Box::new(router.new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, &key[1..], b"value"); + let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); router.send(region_id, msg).unwrap(); std::thread::sleep(std::time::Duration::from_millis(10)); cluster.dispatch(region_id, vec![]); @@ -84,7 +81,7 @@ pub fn must_transfer_leader( let admin_req = req.mut_admin_request(); admin_req.set_cmd_type(AdminCmdType::TransferLeader); admin_req.set_transfer_leader(transfer_req); - let resp = router.command(region_id, req).unwrap(); + let resp = router.admin_command(region_id, req).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); cluster.dispatch(region_id, vec![]); @@ -114,7 +111,7 @@ fn test_transfer_leader() { let peer1 = new_peer(store_id, 10); admin_req.mut_change_peer().set_peer(peer1.clone()); let req_clone = req.clone(); - let resp = router0.command(region_id, req_clone).unwrap(); + let resp = router0.admin_command(region_id, req_clone).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); let epoch = req.get_header().get_region_epoch(); let new_conf_ver = epoch.get_conf_ver() + 1; diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index a3d0bdb2712..affa0205e8f 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -90,7 +90,7 @@ use crate::{ peer::Peer, peer_storage::{write_initial_apply_state, write_peer_state}, util::{ - self, admin_cmd_epoch_lookup, check_flashback_state, check_region_epoch, + self, admin_cmd_epoch_lookup, check_flashback_state, check_req_region_epoch, compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, LatencyInspector, }, Config, RegionSnapshot, RegionTask, WriteCallback, @@ -1587,7 +1587,7 @@ where // Include region for epoch not match after merge may cause key not in range. let include_region = req.get_header().get_region_epoch().get_version() >= self.last_merge_version; - check_region_epoch(req, &self.region, include_region)?; + check_req_region_epoch(req, &self.region, include_region)?; check_flashback_state( self.region.get_is_in_flashback(), req, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 47c9357e1c4..311e7e58a12 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4985,7 +4985,7 @@ where msg: &RaftCmdRequest, ) -> Result> { // Check store_id, make sure that the msg is dispatched to the right place. - if let Err(e) = util::check_store_id(msg, self.store_id()) { + if let Err(e) = util::check_store_id(msg.get_header(), self.store_id()) { self.ctx .raft_metrics .invalid_proposal @@ -5004,7 +5004,7 @@ where let request = msg.get_requests(); // peer_id must be the same as peer's. - if let Err(e) = util::check_peer_id(msg, self.fsm.peer.peer_id()) { + if let Err(e) = util::check_peer_id(msg.get_header(), self.fsm.peer.peer_id()) { self.ctx .raft_metrics .invalid_proposal @@ -5084,12 +5084,12 @@ where ))); } // Check whether the term is stale. - if let Err(e) = util::check_term(msg, self.fsm.peer.term()) { + if let Err(e) = util::check_term(msg.get_header(), self.fsm.peer.term()) { self.ctx.raft_metrics.invalid_proposal.stale_command.inc(); return Err(e); } - match util::check_region_epoch(msg, self.fsm.peer.region(), true) { + match util::check_req_region_epoch(msg, self.fsm.peer.region(), true) { Err(Error::EpochNotMatch(m, mut new_regions)) => { // Attach the region which might be split from the current region. But it // doesn't matter if the region is not split from the current region. If the diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 22b822c2115..86d16b07506 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -76,8 +76,8 @@ use super::{ read_queue::{ReadIndexQueue, ReadIndexRequest}, transport::Transport, util::{ - self, check_region_epoch, is_initial_msg, AdminCmdEpochState, ChangePeerI, ConfChangeKind, - Lease, LeaseState, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER, + self, check_req_region_epoch, is_initial_msg, AdminCmdEpochState, ChangePeerI, + ConfChangeKind, Lease, LeaseState, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER, }, DestroyPeerJob, LocalReadContext, }; @@ -4708,7 +4708,7 @@ where ) -> ReadResponse { let region = self.region().clone(); if check_epoch { - if let Err(e) = check_region_epoch(&req, ®ion, true) { + if let Err(e) = check_req_region_epoch(&req, ®ion, true) { debug!("epoch not match"; "region_id" => region.get_id(), "err" => ?e); let mut response = cmd_resp::new_error(e); cmd_resp::bind_term(&mut response, self.term()); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 41409a49448..78f024997cf 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -19,7 +19,9 @@ use engine_traits::KvEngine; use kvproto::{ kvrpcpb::{self, KeyRange, LeaderInfo}, metapb::{self, Peer, PeerRole, Region, RegionEpoch}, - raft_cmdpb::{AdminCmdType, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest}, + raft_cmdpb::{ + AdminCmdType, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, RaftRequestHeader, + }, raft_serverpb::{RaftMessage, RaftSnapshotData}, }; use protobuf::{self, Message}; @@ -235,28 +237,45 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat pub static NORMAL_REQ_CHECK_VER: bool = true; pub static NORMAL_REQ_CHECK_CONF_VER: bool = false; -pub fn check_region_epoch( +pub fn check_req_region_epoch( req: &RaftCmdRequest, region: &metapb::Region, include_region: bool, ) -> Result<()> { - let (check_ver, check_conf_ver) = if !req.has_admin_request() { - // for get/set/delete, we don't care conf_version. - (NORMAL_REQ_CHECK_VER, NORMAL_REQ_CHECK_CONF_VER) + let admin_ty = if !req.has_admin_request() { + None } else { - let epoch_state = admin_cmd_epoch_lookup(req.get_admin_request().get_cmd_type()); - (epoch_state.check_ver, epoch_state.check_conf_ver) + Some(req.get_admin_request().get_cmd_type()) + }; + check_region_epoch(req.get_header(), admin_ty, region, include_region) +} + +pub fn check_region_epoch( + header: &RaftRequestHeader, + admin_ty: Option, + region: &metapb::Region, + include_region: bool, +) -> Result<()> { + let (check_ver, check_conf_ver) = match admin_ty { + None => { + // for get/set/delete, we don't care conf_version. + (NORMAL_REQ_CHECK_VER, NORMAL_REQ_CHECK_CONF_VER) + } + Some(ty) => { + let epoch_state = admin_cmd_epoch_lookup(ty); + (epoch_state.check_ver, epoch_state.check_conf_ver) + } }; if !check_ver && !check_conf_ver { return Ok(()); } - if !req.get_header().has_region_epoch() { + if !header.has_region_epoch() { return Err(box_err!("missing epoch!")); } - let from_epoch = req.get_header().get_region_epoch(); + let from_epoch = header.get_region_epoch(); compare_region_epoch( from_epoch, region, @@ -351,8 +370,8 @@ pub fn is_region_epoch_equal( } #[inline] -pub fn check_store_id(req: &RaftCmdRequest, store_id: u64) -> Result<()> { - let peer = req.get_header().get_peer(); +pub fn check_store_id(header: &RaftRequestHeader, store_id: u64) -> Result<()> { + let peer = header.get_peer(); if peer.get_store_id() == store_id { Ok(()) } else { @@ -364,8 +383,7 @@ pub fn check_store_id(req: &RaftCmdRequest, store_id: u64) -> Result<()> { } #[inline] -pub fn check_term(req: &RaftCmdRequest, term: u64) -> Result<()> { - let header = req.get_header(); +pub fn check_term(header: &RaftRequestHeader, term: u64) -> Result<()> { if header.get_term() == 0 || term <= header.get_term() + 1 { Ok(()) } else { @@ -376,8 +394,7 @@ pub fn check_term(req: &RaftCmdRequest, term: u64) -> Result<()> { } #[inline] -pub fn check_peer_id(req: &RaftCmdRequest, peer_id: u64) -> Result<()> { - let header = req.get_header(); +pub fn check_peer_id(header: &RaftRequestHeader, peer_id: u64) -> Result<()> { if header.get_peer().get_id() == peer_id { Ok(()) } else { @@ -2001,34 +2018,34 @@ mod tests { #[test] fn test_check_store_id() { - let mut req = RaftCmdRequest::default(); - req.mut_header().mut_peer().set_store_id(1); - check_store_id(&req, 1).unwrap(); - check_store_id(&req, 2).unwrap_err(); + let mut header = RaftRequestHeader::default(); + header.mut_peer().set_store_id(1); + check_store_id(&header, 1).unwrap(); + check_store_id(&header, 2).unwrap_err(); } #[test] fn test_check_peer_id() { - let mut req = RaftCmdRequest::default(); - req.mut_header().mut_peer().set_id(1); - check_peer_id(&req, 1).unwrap(); - check_peer_id(&req, 2).unwrap_err(); + let mut header = RaftRequestHeader::default(); + header.mut_peer().set_id(1); + check_peer_id(&header, 1).unwrap(); + check_peer_id(&header, 2).unwrap_err(); } #[test] fn test_check_term() { - let mut req = RaftCmdRequest::default(); - req.mut_header().set_term(7); - check_term(&req, 7).unwrap(); - check_term(&req, 8).unwrap(); + let mut header = RaftRequestHeader::default(); + header.set_term(7); + check_term(&header, 7).unwrap(); + check_term(&header, 8).unwrap(); // If header's term is 2 verions behind current term, // leadership may have been changed away. - check_term(&req, 9).unwrap_err(); - check_term(&req, 10).unwrap_err(); + check_term(&header, 9).unwrap_err(); + check_term(&header, 10).unwrap_err(); } #[test] - fn test_check_region_epoch() { + fn test_check_req_region_epoch() { let mut epoch = RegionEpoch::default(); epoch.set_conf_ver(2); epoch.set_version(2); @@ -2036,7 +2053,7 @@ mod tests { region.set_region_epoch(epoch.clone()); // Epoch is required for most requests even if it's empty. - check_region_epoch(&RaftCmdRequest::default(), ®ion, false).unwrap_err(); + check_req_region_epoch(&RaftCmdRequest::default(), ®ion, false).unwrap_err(); // These admin commands do not require epoch. for ty in &[ @@ -2051,11 +2068,11 @@ mod tests { req.set_admin_request(admin); // It is Okay if req does not have region epoch. - check_region_epoch(&req, ®ion, false).unwrap(); + check_req_region_epoch(&req, ®ion, false).unwrap(); req.mut_header().set_region_epoch(epoch.clone()); - check_region_epoch(&req, ®ion, true).unwrap(); - check_region_epoch(&req, ®ion, false).unwrap(); + check_req_region_epoch(&req, ®ion, true).unwrap(); + check_req_region_epoch(&req, ®ion, false).unwrap(); } // These admin commands requires epoch.version. @@ -2073,7 +2090,7 @@ mod tests { req.set_admin_request(admin); // Error if req does not have region epoch. - check_region_epoch(&req, ®ion, false).unwrap_err(); + check_req_region_epoch(&req, ®ion, false).unwrap_err(); let mut stale_version_epoch = epoch.clone(); stale_version_epoch.set_version(1); @@ -2081,14 +2098,14 @@ mod tests { stale_region.set_region_epoch(stale_version_epoch.clone()); req.mut_header() .set_region_epoch(stale_version_epoch.clone()); - check_region_epoch(&req, &stale_region, false).unwrap(); + check_req_region_epoch(&req, &stale_region, false).unwrap(); let mut latest_version_epoch = epoch.clone(); latest_version_epoch.set_version(3); for epoch in &[stale_version_epoch, latest_version_epoch] { req.mut_header().set_region_epoch(epoch.clone()); - check_region_epoch(&req, ®ion, false).unwrap_err(); - check_region_epoch(&req, ®ion, true).unwrap_err(); + check_req_region_epoch(&req, ®ion, false).unwrap_err(); + check_req_region_epoch(&req, ®ion, true).unwrap_err(); } } @@ -2109,21 +2126,21 @@ mod tests { req.set_admin_request(admin); // Error if req does not have region epoch. - check_region_epoch(&req, ®ion, false).unwrap_err(); + check_req_region_epoch(&req, ®ion, false).unwrap_err(); let mut stale_conf_epoch = epoch.clone(); stale_conf_epoch.set_conf_ver(1); let mut stale_region = metapb::Region::default(); stale_region.set_region_epoch(stale_conf_epoch.clone()); req.mut_header().set_region_epoch(stale_conf_epoch.clone()); - check_region_epoch(&req, &stale_region, false).unwrap(); + check_req_region_epoch(&req, &stale_region, false).unwrap(); let mut latest_conf_epoch = epoch.clone(); latest_conf_epoch.set_conf_ver(3); for epoch in &[stale_conf_epoch, latest_conf_epoch] { req.mut_header().set_region_epoch(epoch.clone()); - check_region_epoch(&req, ®ion, false).unwrap_err(); - check_region_epoch(&req, ®ion, true).unwrap_err(); + check_req_region_epoch(&req, ®ion, false).unwrap_err(); + check_req_region_epoch(&req, ®ion, true).unwrap_err(); } } } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index c78a51866ae..a20fcefdbdb 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -760,7 +760,7 @@ where } let store_id = self.store_id.get().unwrap(); - if let Err(e) = util::check_store_id(req, store_id) { + if let Err(e) = util::check_store_id(req.get_header(), store_id) { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.store_id_mismatch.inc()); debug!("rejected by store id not match"; "err" => %e); return Err(e); @@ -780,13 +780,13 @@ where fail_point!("localreader_on_find_delegate"); // Check peer id. - if let Err(e) = util::check_peer_id(req, delegate.peer_id) { + if let Err(e) = util::check_peer_id(req.get_header(), delegate.peer_id) { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.peer_id_mismatch.inc()); return Err(e); } // Check term. - if let Err(e) = util::check_term(req, delegate.term) { + if let Err(e) = util::check_term(req.get_header(), delegate.term) { debug!( "check term"; "delegate_term" => delegate.term, @@ -797,7 +797,7 @@ where } // Check region epoch. - if util::check_region_epoch(req, &delegate.region, false).is_err() { + if util::check_req_region_epoch(req, &delegate.region, false).is_err() { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.epoch.inc()); // Stale epoch, redirect it to raftstore to get the latest region. debug!("rejected by epoch not match"; "tag" => &delegate.tag); diff --git a/src/server/mod.rs b/src/server/mod.rs index 1b41dfc4e56..0e4a3616a6c 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -13,6 +13,7 @@ pub mod lock_manager; pub mod node; mod proxy; pub mod raftkv; +mod raftkv2; mod reset_to_version; pub mod resolve; pub mod server; diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index b12e56ee7a0..607d5af71f3 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -84,7 +84,7 @@ pub enum Error { Timeout(Duration), } -fn get_status_kind_from_engine_error(e: &kv::Error) -> RequestStatusKind { +pub fn get_status_kind_from_engine_error(e: &kv::Error) -> RequestStatusKind { match *e { KvError(box KvErrorInner::Request(ref header)) => { RequestStatusKind::from(storage::get_error_kind_from_header(header)) diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs new file mode 100644 index 00000000000..5bcdd131d72 --- /dev/null +++ b/src/server/raftkv2/mod.rs @@ -0,0 +1,307 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{mem, pin::Pin, task::Poll}; + +use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; +use futures::{Future, Stream, StreamExt}; +use kvproto::{ + raft_cmdpb::{CmdType, RaftCmdRequest, Request}, + raft_serverpb::RaftMessage, +}; +use raftstore::store::RegionSnapshot; +use raftstore_v2::{ + router::{ + message::SimpleWrite, CmdResChannelBuilder, CmdResEvent, CmdResStream, PeerMsg, RaftRouter, + }, + SimpleWriteEncoder, StoreRouter, +}; +use tikv_kv::{Modify, RaftExtension, WriteEvent}; +use tikv_util::{codec::number::NumberEncoder, time::Instant}; +use txn_types::WriteBatchFlags; + +use super::{ + metrics::{ASYNC_REQUESTS_COUNTER_VEC, ASYNC_REQUESTS_DURATIONS_VEC}, + raftkv::{get_status_kind_from_engine_error, new_request_header}, +}; + +#[derive(Clone)] +pub struct RaftExtensionImpl { + router: StoreRouter, +} + +impl RaftExtension for RaftExtensionImpl { + #[inline] + fn feed(&self, msg: RaftMessage, key_message: bool) { + let region_id = msg.get_region_id(); + let msg_ty = msg.get_message().get_msg_type(); + // Channel full and region not found are ignored unless it's a key message. + if let Err(e) = self.router.send_raft_message(Box::new(msg)) && key_message { + error!("failed to send raft message"; "region_id" => region_id, "msg_ty" => ?msg_ty, "err" => ?e); + } + } + + fn report_reject_message(&self, _region_id: u64, _from_peer_id: u64) { + // TODO:reject the message on connection side instead of go through + // raft layer. + } + + fn report_peer_unreachable(&self, region_id: u64, to_peer_id: u64) { + let _ = self + .router + .send(region_id, PeerMsg::PeerUnreachable { to_peer_id }); + } + + fn report_store_unreachable(&self, _store_id: u64) {} + + fn report_snapshot_status( + &self, + _region_id: u64, + _to_peer_id: u64, + _status: raft::SnapshotStatus, + ) { + } + + fn report_resolved(&self, _store_id: u64, _group_id: u64) {} + + fn split( + &self, + _region_id: u64, + _region_epoch: kvproto::metapb::RegionEpoch, + _split_keys: Vec>, + _source: String, + ) -> futures::future::BoxFuture<'static, tikv_kv::Result>> { + Box::pin(async move { Err(box_err!("raft split is not supported")) }) + } + + fn query_region( + &self, + _region_id: u64, + ) -> futures::future::BoxFuture< + 'static, + tikv_kv::Result, + > { + Box::pin(async move { Err(box_err!("query region is not supported")) }) + } +} + +struct Transform { + resp: CmdResStream, + early_err: Option, +} + +impl Stream for Transform { + type Item = WriteEvent; + + fn poll_next( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let stream = self.get_mut(); + if stream.early_err.is_some() { + return Poll::Ready(Some(WriteEvent::Finished(Err(stream + .early_err + .take() + .unwrap())))); + } + match stream.resp.poll_next_unpin(cx) { + Poll::Pending => Poll::Pending, + Poll::Ready(Some(CmdResEvent::Proposed)) => Poll::Ready(Some(WriteEvent::Proposed)), + Poll::Ready(Some(CmdResEvent::Committed)) => Poll::Ready(Some(WriteEvent::Committed)), + Poll::Ready(Some(CmdResEvent::Finished(mut resp))) => { + let res = if !resp.get_header().has_error() { + Ok(()) + } else { + Err(tikv_kv::Error::from(resp.take_header().take_error())) + }; + Poll::Ready(Some(WriteEvent::Finished(res))) + } + Poll::Ready(None) => Poll::Ready(None), + } + } +} + +#[derive(Clone)] +pub struct RaftKv2 { + router: RaftRouter, +} + +impl RaftKv2 { + #[allow(unused)] + pub fn new(router: RaftRouter) -> RaftKv2 { + RaftKv2 { router } + } +} + +impl tikv_kv::Engine for RaftKv2 { + type Snap = RegionSnapshot; + type Local = EK; + + #[inline] + fn kv_engine(&self) -> Option { + None + } + + type RaftExtension = RaftExtensionImpl; + + fn modify_on_kv_engine( + &self, + _region_modifies: collections::HashMap>, + ) -> tikv_kv::Result<()> { + // TODO + Ok(()) + } + + type SnapshotRes = impl Future> + Send; + fn async_snapshot(&mut self, mut ctx: tikv_kv::SnapContext<'_>) -> Self::SnapshotRes { + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + if !ctx.key_ranges.is_empty() && ctx.start_ts.map_or(false, |ts| !ts.is_zero()) { + req.mut_read_index() + .set_start_ts(ctx.start_ts.as_ref().unwrap().into_inner()); + req.mut_read_index() + .set_key_ranges(mem::take(&mut ctx.key_ranges).into()); + } + ASYNC_REQUESTS_COUNTER_VEC.snapshot.all.inc(); + let begin_instant = Instant::now_coarse(); + + let mut header = new_request_header(ctx.pb_ctx); + let mut flags = 0; + if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { + let mut data = [0u8; 8]; + (&mut data[..]) + .encode_u64(ctx.start_ts.unwrap_or_default().into_inner()) + .unwrap(); + flags |= WriteBatchFlags::STALE_READ.bits(); + header.set_flag_data(data.into()); + } + if ctx.allowed_in_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } + header.set_flags(flags); + + let mut cmd = RaftCmdRequest::default(); + cmd.set_header(header); + cmd.set_requests(vec![req].into()); + let f = self.router.snapshot(cmd); + async move { + let res = f.await; + match res { + Ok(snap) => { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot + .observe(begin_instant.saturating_elapsed_secs()); + ASYNC_REQUESTS_COUNTER_VEC.snapshot.success.inc(); + Ok(snap) + } + Err(mut resp) => { + if resp + .get_responses() + .get(0) + .map_or(false, |r| r.get_read_index().has_locked()) + { + let locked = resp.mut_responses()[0].mut_read_index().take_locked(); + Err(tikv_kv::Error::from(tikv_kv::ErrorInner::KeyIsLocked( + locked, + ))) + } else if resp.get_header().has_error() { + let err = tikv_kv::Error::from(resp.take_header().take_error()); + let status_kind = get_status_kind_from_engine_error(&err); + ASYNC_REQUESTS_COUNTER_VEC.snapshot.get(status_kind).inc(); + Err(err) + } else { + Err(box_err!("unexpected response: {:?}", resp)) + } + } + } + } + } + + type WriteRes = impl Stream + Send + Unpin; + fn async_write( + &self, + ctx: &kvproto::kvrpcpb::Context, + batch: tikv_kv::WriteData, + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { + let region_id = ctx.region_id; + ASYNC_REQUESTS_COUNTER_VEC.write.all.inc(); + let begin_instant = Instant::now_coarse(); + let mut header = Box::new(new_request_header(ctx)); + let mut flags = 0; + if batch.extra.one_pc { + flags |= WriteBatchFlags::ONE_PC.bits(); + } + if batch.extra.allowed_in_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } + header.set_flags(flags); + + self.schedule_txn_extra(batch.extra); + let mut encoder = SimpleWriteEncoder::with_capacity(128); + for m in batch.modifies { + match m { + Modify::Put(cf, k, v) => encoder.put(cf, k.as_encoded(), &v), + Modify::Delete(cf, k) => encoder.delete(cf, k.as_encoded()), + Modify::PessimisticLock(k, lock) => { + encoder.put(CF_LOCK, k.as_encoded(), &lock.into_lock().to_bytes()) + } + Modify::DeleteRange(cf, start_key, end_key, notify_only) => encoder.delete_range( + cf, + start_key.as_encoded(), + end_key.as_encoded(), + notify_only, + ), + } + } + let data = encoder.encode(); + let mut builder = CmdResChannelBuilder::default(); + if WriteEvent::subscribed_proposed(subscribed) { + builder.subscribe_proposed(); + } + if WriteEvent::subscribed_committed(subscribed) { + builder.subscribe_committed(); + } + if let Some(cb) = on_applied { + builder.before_set(move |resp| { + let mut res = if !resp.get_header().has_error() { + Ok(()) + } else { + Err(tikv_kv::Error::from(resp.get_header().get_error().clone())) + }; + cb(&mut res); + }); + } + let (ch, sub) = builder.build(); + let msg = PeerMsg::SimpleWrite(SimpleWrite { + header, + data, + ch, + send_time: Instant::now_coarse(), + }); + let res = self + .router + .store_router() + .send(region_id, msg) + .map_err(|e| tikv_kv::Error::from(raftstore_v2::Error::from(e))); + (Transform { + resp: CmdResStream::new(sub), + early_err: res.err(), + }) + .inspect(move |ev| { + let WriteEvent::Finished(res) = ev else { return }; + match res { + Ok(()) => { + ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); + ASYNC_REQUESTS_DURATIONS_VEC + .write + .observe(begin_instant.saturating_elapsed_secs()); + } + Err(e) => { + let status_kind = get_status_kind_from_engine_error(e); + ASYNC_REQUESTS_COUNTER_VEC.write.get(status_kind).inc(); + } + } + }) + } +} From 70e15257e92524fa57c100619dd555bf58bf7853 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 20 Dec 2022 10:56:54 +0800 Subject: [PATCH 0416/1149] engine: adapt engine metrics for multi-rocks (#13942) ref tikv/tikv#12842 None Signed-off-by: tabokie --- Cargo.lock | 6 +- components/engine_panic/src/misc.rs | 20 +- components/engine_panic/src/raft_engine.rs | 4 - components/engine_rocks/src/engine.rs | 41 +- components/engine_rocks/src/lib.rs | 7 +- components/engine_rocks/src/misc.rs | 11 +- components/engine_rocks/src/raft_engine.rs | 4 - components/engine_rocks/src/rocks_metrics.rs | 499 ++++++++++++------ components/engine_rocks/src/util.rs | 4 +- components/engine_traits/src/engine.rs | 9 +- components/engine_traits/src/misc.rs | 19 + components/engine_traits/src/raft_engine.rs | 1 - components/server/src/lib.rs | 1 + components/server/src/server.rs | 99 +++- components/server/src/signal_handler.rs | 30 +- components/test_raftstore/src/cluster.rs | 10 +- components/test_raftstore/src/server.rs | 2 + components/test_raftstore/src/util.rs | 15 +- etc/config-template.toml | 6 - metrics/grafana/tikv_details.json | 4 +- src/config/mod.rs | 31 +- src/server/debug.rs | 37 +- src/server/engine_factory.rs | 15 +- src/server/service/debug.rs | 17 +- tests/failpoints/cases/test_pd_client.rs | 15 +- tests/integrations/config/mod.rs | 4 +- tests/integrations/config/test-custom.toml | 2 - .../raftstore/test_compact_lock_cf.rs | 8 +- 28 files changed, 599 insertions(+), 322 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 97f540aa100..cb371b739af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2876,7 +2876,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#f94fdd30dd94f6fd22c8052edfd2c4039d9f2fbd" +source = "git+https://github.com/tikv/rust-rocksdb.git#0ef7101a061c513c684ad68acd15f01c8548b43a" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2895,7 +2895,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#f94fdd30dd94f6fd22c8052edfd2c4039d9f2fbd" +source = "git+https://github.com/tikv/rust-rocksdb.git#0ef7101a061c513c684ad68acd15f01c8548b43a" dependencies = [ "bzip2-sys", "cc", @@ -4761,7 +4761,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#f94fdd30dd94f6fd22c8052edfd2c4039d9f2fbd" +source = "git+https://github.com/tikv/rust-rocksdb.git#0ef7101a061c513c684ad68acd15f01c8548b43a" dependencies = [ "libc 0.2.132", "librocksdb_sys", diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 8c983051438..730f44a7e2f 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -1,10 +1,28 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{DeleteStrategy, MiscExt, Range, Result}; +use engine_traits::{DeleteStrategy, MiscExt, Range, Result, StatisticsReporter}; use crate::engine::PanicEngine; +pub struct PanicReporter; + +impl StatisticsReporter for PanicReporter { + fn new(name: &str) -> Self { + panic!() + } + + fn collect(&mut self, engine: &PanicEngine) { + panic!() + } + + fn flush(&mut self) { + panic!() + } +} + impl MiscExt for PanicEngine { + type StatisticsReporter = PanicReporter; + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()> { panic!() } diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index f5e0c424db0..59c0422902c 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -136,10 +136,6 @@ impl RaftEngine for PanicEngine { panic!() } - fn reset_statistics(&self) { - panic!() - } - fn dump_stats(&self) -> Result { panic!() } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 0c37120e7fc..0e73de357e5 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -6,17 +6,7 @@ use engine_traits::{IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Resu use rocksdb::{DBIterator, Writable, DB}; use crate::{ - db_vector::RocksDbVector, - options::RocksReadOptions, - r2e, - rocks_metrics::{ - flush_engine_histogram_metrics, flush_engine_iostall_properties, flush_engine_properties, - flush_engine_ticker_metrics, - }, - rocks_metrics_defs::{ - ENGINE_HIST_TYPES, ENGINE_TICKER_TYPES, TITAN_ENGINE_HIST_TYPES, TITAN_ENGINE_TICKER_TYPES, - }, - util::get_cf_handle, + db_vector::RocksDbVector, options::RocksReadOptions, r2e, util::get_cf_handle, RocksEngineIterator, RocksSnapshot, }; @@ -62,35 +52,6 @@ impl KvEngine for RocksEngine { self.db.sync_wal().map_err(r2e) } - fn flush_metrics(&self, instance: &str) { - for t in ENGINE_TICKER_TYPES { - let v = self.db.get_and_reset_statistics_ticker_count(*t); - flush_engine_ticker_metrics(*t, v, instance); - } - for t in ENGINE_HIST_TYPES { - if let Some(v) = self.db.get_statistics_histogram(*t) { - flush_engine_histogram_metrics(*t, v, instance); - } - } - if self.db.is_titan() { - for t in TITAN_ENGINE_TICKER_TYPES { - let v = self.db.get_and_reset_statistics_ticker_count(*t); - flush_engine_ticker_metrics(*t, v, instance); - } - for t in TITAN_ENGINE_HIST_TYPES { - if let Some(v) = self.db.get_statistics_histogram(*t) { - flush_engine_histogram_metrics(*t, v, instance); - } - } - } - flush_engine_properties(&self.db, instance); - flush_engine_iostall_properties(&self.db, instance); - } - - fn reset_statistics(&self) { - self.db.reset_statistics(); - } - fn bad_downcast(&self) -> &T { let e: &dyn Any = &self.db; e.downcast_ref().expect("bad engine downcast") diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index b6f3e36146c..94a4c23a3c4 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -16,6 +16,8 @@ //! Please read the engine_trait crate docs before hacking. #![cfg_attr(test, feature(test))] +#![feature(let_chains)] +#![feature(option_get_or_insert_default)] #[allow(unused_extern_crates)] extern crate tikv_alloc; @@ -104,7 +106,10 @@ pub mod file_system; mod raft_engine; -pub use rocksdb::{set_perf_flags, set_perf_level, PerfContext, PerfFlag, PerfFlags, PerfLevel}; +pub use rocksdb::{ + set_perf_flags, set_perf_level, PerfContext, PerfFlag, PerfFlags, PerfLevel, + Statistics as RocksStatistics, +}; pub mod flow_control_factors; pub use flow_control_factors::*; diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 9ef2ed079b2..4761183546e 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -8,8 +8,8 @@ use rocksdb::Range as RocksRange; use tikv_util::{box_try, keybuilder::KeyBuilder}; use crate::{ - engine::RocksEngine, r2e, rocks_metrics_defs::*, sst::RocksSstWriterBuilder, util, - RocksSstWriter, + engine::RocksEngine, r2e, rocks_metrics::RocksStatisticsReporter, rocks_metrics_defs::*, + sst::RocksSstWriterBuilder, util, RocksSstWriter, }; pub const MAX_DELETE_COUNT_BY_KEY: usize = 2048; @@ -126,6 +126,8 @@ impl RocksEngine { } impl MiscExt for RocksEngine { + type StatisticsReporter = RocksStatisticsReporter; + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()> { let mut handles = vec![]; for cf in cfs { @@ -277,11 +279,6 @@ impl MiscExt for RocksEngine { s.extend_from_slice(v.as_bytes()); } - // more stats if enable_statistics is true. - if let Some(v) = self.as_inner().get_statistics() { - s.extend_from_slice(v.as_bytes()); - } - Ok(box_try!(String::from_utf8(s))) } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index 9095ef27dfd..cb4c5682252 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -325,10 +325,6 @@ impl RaftEngine for RocksEngine { KvEngine::flush_metrics(self, instance) } - fn reset_statistics(&self) { - KvEngine::reset_statistics(self) - } - fn dump_stats(&self) -> Result { MiscExt::dump_stats(self) } diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 026ef36cce7..d77f5f2dc99 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -1,14 +1,15 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::CF_DEFAULT; +use collections::HashMap; +use engine_traits::{StatisticsReporter, CF_DEFAULT}; use lazy_static::lazy_static; use prometheus::*; use prometheus_static_metric::*; use rocksdb::{ - DBStatisticsHistogramType as HistType, DBStatisticsTickerType as TickerType, HistogramData, DB, + DBStatisticsHistogramType as HistType, DBStatisticsTickerType as TickerType, HistogramData, }; -use crate::rocks_metrics_defs::*; +use crate::{engine::RocksEngine, rocks_metrics_defs::*, RocksStatistics}; make_auto_flush_static_metric! { pub label_enum TickerName { @@ -910,206 +911,355 @@ pub fn flush_engine_histogram_metrics(t: HistType, value: HistogramData, name: & } } -pub fn flush_engine_iostall_properties(engine: &DB, name: &str) { - let stall_num = ROCKSDB_IOSTALL_KEY.len(); - let mut counter = vec![0; stall_num]; - for cf in engine.cf_names() { - let handle = crate::util::get_cf_handle(engine, cf).unwrap(); - if let Some(info) = engine.get_map_property_cf(handle, ROCKSDB_CFSTATS) { - for i in 0..stall_num { - let value = info.get_property_int_value(ROCKSDB_IOSTALL_KEY[i]); - counter[i] += value as i64; - } - } else { - return; - } - } - for i in 0..stall_num { - STORE_ENGINE_WRITE_STALL_REASON_GAUGE_VEC - .with_label_values(&[name, ROCKSDB_IOSTALL_TYPE[i]]) - .set(counter[i]); - } +#[derive(Default, Clone)] +struct CfLevelStats { + num_files: Option, + // sum(compression_ratio_i * num_files_i) + weighted_compression_ratio: Option, + num_blob_files: Option, } -pub fn flush_engine_properties(engine: &DB, name: &str) { - for cf in engine.cf_names() { - let handle = crate::util::get_cf_handle(engine, cf).unwrap(); - // It is important to monitor each cf's size, especially the "raft" and "lock" - // column families. - let cf_used_size = crate::util::get_engine_cf_used_size(engine, handle); - STORE_ENGINE_SIZE_GAUGE_VEC - .with_label_values(&[name, cf]) - .set(cf_used_size as i64); - - let blob_cache_usage = engine.get_blob_cache_usage_cf(handle); - STORE_ENGINE_BLOB_CACHE_USAGE_GAUGE_VEC - .with_label_values(&[name, cf]) - .set(blob_cache_usage as i64); - - // TODO: find a better place to record these metrics. - // Refer: https://github.com/facebook/rocksdb/wiki/Memory-usage-in-RocksDB - // For index and filter blocks memory - if let Some(readers_mem) = engine.get_property_int_cf(handle, ROCKSDB_TABLE_READERS_MEM) { - STORE_ENGINE_MEMORY_GAUGE_VEC - .with_label_values(&[name, cf, "readers-mem"]) - .set(readers_mem as i64); - } - - // For memtable - if let Some(mem_table) = engine.get_property_int_cf(handle, ROCKSDB_CUR_SIZE_ALL_MEM_TABLES) - { - STORE_ENGINE_MEMORY_GAUGE_VEC - .with_label_values(&[name, cf, "mem-tables"]) - .set(mem_table as i64); - } +#[derive(Default)] +struct CfStats { + used_size: Option, + blob_cache_size: Option, + readers_mem: Option, + mem_tables: Option, + num_keys: Option, + pending_compaction_bytes: Option, + num_immutable_mem_table: Option, + live_blob_size: Option, + num_live_blob_file: Option, + num_obsolete_blob_file: Option, + live_blob_file_size: Option, + obsolete_blob_file_size: Option, + blob_file_discardable_ratio_le0: Option, + blob_file_discardable_ratio_le20: Option, + blob_file_discardable_ratio_le50: Option, + blob_file_discardable_ratio_le80: Option, + blob_file_discardable_ratio_le100: Option, + levels: Vec, +} - // TODO: add cache usage and pinned usage. +#[derive(Default)] +struct DbStats { + num_snapshots: Option, + oldest_snapshot_time: Option, + block_cache_size: Option, + stall_num: Vec>, +} - if let Some(num_keys) = engine.get_property_int_cf(handle, ROCKSDB_ESTIMATE_NUM_KEYS) { - STORE_ENGINE_ESTIMATE_NUM_KEYS_VEC - .with_label_values(&[name, cf]) - .set(num_keys as i64); - } +pub struct RocksStatisticsReporter { + name: String, + db_stats: DbStats, + cf_stats: HashMap, +} - // Pending compaction bytes - if let Some(pending_compaction_bytes) = - crate::util::get_cf_pending_compaction_bytes(engine, handle) - { - STORE_ENGINE_PENDING_COMPACTION_BYTES_VEC - .with_label_values(&[name, cf]) - .set(pending_compaction_bytes as i64); +impl StatisticsReporter for RocksStatisticsReporter { + fn new(name: &str) -> Self { + Self { + name: name.to_owned(), + db_stats: DbStats::default(), + cf_stats: HashMap::default(), } + } - let opts = engine.get_options_cf(handle); - for level in 0..opts.get_num_levels() { - // Compression ratio at levels + fn collect(&mut self, engine: &RocksEngine) { + let db = engine.as_inner(); + let stall_num = ROCKSDB_IOSTALL_KEY.len(); + self.db_stats.stall_num.resize(stall_num, None); + for cf in db.cf_names() { + let cf_stats = self.cf_stats.entry(cf.to_owned()).or_default(); + let handle = crate::util::get_cf_handle(db, cf).unwrap(); + // It is important to monitor each cf's size, especially the "raft" and "lock" + // column families. + *cf_stats.used_size.get_or_insert_default() += + crate::util::get_engine_cf_used_size(db, handle); + *cf_stats.blob_cache_size.get_or_insert_default() += db.get_blob_cache_usage_cf(handle); + // TODO: find a better place to record these metrics. + // Refer: https://github.com/facebook/rocksdb/wiki/Memory-usage-in-RocksDB + // For index and filter blocks memory + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_TABLE_READERS_MEM) { + *cf_stats.readers_mem.get_or_insert_default() += v; + } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_CUR_SIZE_ALL_MEM_TABLES) { + *cf_stats.mem_tables.get_or_insert_default() += v; + } + // TODO: add cache usage and pinned usage. + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_ESTIMATE_NUM_KEYS) { + *cf_stats.num_keys.get_or_insert_default() += v; + } + if let Some(v) = crate::util::get_cf_pending_compaction_bytes(db, handle) { + *cf_stats.pending_compaction_bytes.get_or_insert_default() += v; + } + if let Some(v) = crate::util::get_cf_num_immutable_mem_table(db, handle) { + *cf_stats.num_immutable_mem_table.get_or_insert_default() += v; + } + // Titan. + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_TITANDB_LIVE_BLOB_SIZE) { + *cf_stats.live_blob_size.get_or_insert_default() += v; + } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_TITANDB_NUM_LIVE_BLOB_FILE) { + *cf_stats.num_live_blob_file.get_or_insert_default() += v; + } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_TITANDB_NUM_OBSOLETE_BLOB_FILE) + { + *cf_stats.num_obsolete_blob_file.get_or_insert_default() += v; + } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_TITANDB_LIVE_BLOB_FILE_SIZE) { + *cf_stats.live_blob_file_size.get_or_insert_default() += v; + } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_TITANDB_OBSOLETE_BLOB_FILE_SIZE) + { + *cf_stats.obsolete_blob_file_size.get_or_insert_default() += v; + } if let Some(v) = - crate::util::get_engine_compression_ratio_at_level(engine, handle, level) + db.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE0_FILE) { - STORE_ENGINE_COMPRESSION_RATIO_VEC - .with_label_values(&[name, cf, &level.to_string()]) - .set(v); + *cf_stats + .blob_file_discardable_ratio_le0 + .get_or_insert_default() += v; } - - // Num files at levels - if let Some(v) = crate::util::get_cf_num_files_at_level(engine, handle, level) { - STORE_ENGINE_NUM_FILES_AT_LEVEL_VEC - .with_label_values(&[name, cf, &level.to_string()]) - .set(v as i64); + if let Some(v) = + db.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE20_FILE) + { + *cf_stats + .blob_file_discardable_ratio_le20 + .get_or_insert_default() += v; } - - // Titan Num blob files at levels - if let Some(v) = crate::util::get_cf_num_blob_files_at_level(engine, handle, level) { - STORE_ENGINE_TITANDB_NUM_BLOB_FILES_AT_LEVEL_VEC - .with_label_values(&[name, cf, &level.to_string()]) - .set(v as i64); + if let Some(v) = + db.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE50_FILE) + { + *cf_stats + .blob_file_discardable_ratio_le50 + .get_or_insert_default() += v; + } + if let Some(v) = + db.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE80_FILE) + { + *cf_stats + .blob_file_discardable_ratio_le80 + .get_or_insert_default() += v; + } + if let Some(v) = + db.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE100_FILE) + { + *cf_stats + .blob_file_discardable_ratio_le100 + .get_or_insert_default() += v; + } + // Level stats. + let opts = db.get_options_cf(handle); + if cf_stats.levels.len() < opts.get_num_levels() { + cf_stats + .levels + .resize(opts.get_num_levels(), CfLevelStats::default()); + } + for level in 0..opts.get_num_levels() { + if let Some(num_files) = crate::util::get_cf_num_files_at_level(db, handle, level) { + *cf_stats.levels[level].num_files.get_or_insert_default() += num_files; + if let Some(ratio) = + crate::util::get_engine_compression_ratio_at_level(db, handle, level) + { + *cf_stats.levels[level] + .weighted_compression_ratio + .get_or_insert_default() += num_files as f64 * ratio; + } + } + if let Some(v) = crate::util::get_cf_num_blob_files_at_level(db, handle, level) { + *cf_stats.levels[level] + .num_blob_files + .get_or_insert_default() += v; + } } - } - - // Num immutable mem-table - if let Some(v) = crate::util::get_cf_num_immutable_mem_table(engine, handle) { - STORE_ENGINE_NUM_IMMUTABLE_MEM_TABLE_VEC - .with_label_values(&[name, cf]) - .set(v as i64); - } - // Titan live blob size - if let Some(v) = engine.get_property_int_cf(handle, ROCKSDB_TITANDB_LIVE_BLOB_SIZE) { - STORE_ENGINE_TITANDB_LIVE_BLOB_SIZE_VEC - .with_label_values(&[name, cf]) - .set(v as i64); + if let Some(info) = db.get_map_property_cf(handle, ROCKSDB_CFSTATS) { + for i in 0..stall_num { + *self.db_stats.stall_num[i].get_or_insert_default() += + info.get_property_int_value(ROCKSDB_IOSTALL_KEY[i]); + } + } } - // Titan num live blob file - if let Some(v) = engine.get_property_int_cf(handle, ROCKSDB_TITANDB_NUM_LIVE_BLOB_FILE) { - STORE_ENGINE_TITANDB_NUM_LIVE_BLOB_FILE_VEC - .with_label_values(&[name, cf]) - .set(v as i64); + // For snapshot + *self.db_stats.num_snapshots.get_or_insert_default() += + db.get_property_int(ROCKSDB_NUM_SNAPSHOTS).unwrap_or(0); + let oldest_snapshot_time = + db.get_property_int(ROCKSDB_OLDEST_SNAPSHOT_TIME) + .map_or(0, |t| { + let now = time::get_time().sec as u64; + // RocksDB returns 0 if no snapshots. + if t > 0 && now > t { now - t } else { 0 } + }); + if oldest_snapshot_time > self.db_stats.oldest_snapshot_time.unwrap_or(0) { + *self.db_stats.oldest_snapshot_time.get_or_insert_default() = oldest_snapshot_time; } - // Titan num obsolete blob file - if let Some(v) = engine.get_property_int_cf(handle, ROCKSDB_TITANDB_NUM_OBSOLETE_BLOB_FILE) - { - STORE_ENGINE_TITANDB_NUM_OBSOLETE_BLOB_FILE_VEC - .with_label_values(&[name, cf]) - .set(v as i64); + // Since block cache is shared, getting cache size from any CF/DB is fine. Here + // we get from default CF. + if self.db_stats.block_cache_size.is_none() { + let handle = crate::util::get_cf_handle(db, CF_DEFAULT).unwrap(); + *self.db_stats.block_cache_size.get_or_insert_default() = + db.get_block_cache_usage_cf(handle); } + } - // Titan live blob file size - if let Some(v) = engine.get_property_int_cf(handle, ROCKSDB_TITANDB_LIVE_BLOB_FILE_SIZE) { - STORE_ENGINE_TITANDB_LIVE_BLOB_FILE_SIZE_VEC - .with_label_values(&[name, cf]) - .set(v as i64); - } + fn flush(&mut self) { + for (cf, cf_stats) in &self.cf_stats { + if let Some(v) = cf_stats.used_size { + STORE_ENGINE_SIZE_GAUGE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.blob_cache_size { + STORE_ENGINE_BLOB_CACHE_USAGE_GAUGE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.readers_mem { + STORE_ENGINE_MEMORY_GAUGE_VEC + .with_label_values(&[&self.name, cf, "readers-mem"]) + .set(v as i64); + } + if let Some(v) = cf_stats.mem_tables { + STORE_ENGINE_MEMORY_GAUGE_VEC + .with_label_values(&[&self.name, cf, "mem-tables"]) + .set(v as i64); + } + if let Some(v) = cf_stats.num_keys { + STORE_ENGINE_ESTIMATE_NUM_KEYS_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.pending_compaction_bytes { + STORE_ENGINE_PENDING_COMPACTION_BYTES_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + for (level, level_stats) in cf_stats.levels.iter().enumerate() { + if let Some(num_files) = level_stats.num_files { + STORE_ENGINE_NUM_FILES_AT_LEVEL_VEC + .with_label_values(&[&self.name, cf, &level.to_string()]) + .set(num_files as i64); + if num_files > 0 && let Some(ratio) = level_stats.weighted_compression_ratio { + let normalized_compression_ratio = + ratio / num_files as f64; + STORE_ENGINE_COMPRESSION_RATIO_VEC + .with_label_values(&[&self.name, cf, &level.to_string()]) + .set(normalized_compression_ratio); + } + } + if let Some(v) = level_stats.num_blob_files { + STORE_ENGINE_TITANDB_NUM_BLOB_FILES_AT_LEVEL_VEC + .with_label_values(&[&self.name, cf, &level.to_string()]) + .set(v as i64); + } + } - // Titan obsolete blob file size - if let Some(v) = engine.get_property_int_cf(handle, ROCKSDB_TITANDB_OBSOLETE_BLOB_FILE_SIZE) - { - STORE_ENGINE_TITANDB_OBSOLETE_BLOB_FILE_SIZE_VEC - .with_label_values(&[name, cf]) - .set(v as i64); + if let Some(v) = cf_stats.num_immutable_mem_table { + STORE_ENGINE_NUM_IMMUTABLE_MEM_TABLE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.live_blob_size { + STORE_ENGINE_TITANDB_LIVE_BLOB_SIZE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.num_live_blob_file { + STORE_ENGINE_TITANDB_NUM_LIVE_BLOB_FILE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.num_obsolete_blob_file { + STORE_ENGINE_TITANDB_NUM_OBSOLETE_BLOB_FILE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.live_blob_file_size { + STORE_ENGINE_TITANDB_LIVE_BLOB_FILE_SIZE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.obsolete_blob_file_size { + STORE_ENGINE_TITANDB_OBSOLETE_BLOB_FILE_SIZE_VEC + .with_label_values(&[&self.name, cf]) + .set(v as i64); + } + if let Some(v) = cf_stats.blob_file_discardable_ratio_le0 { + STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC + .with_label_values(&[&self.name, cf, "le0"]) + .set(v as i64); + } + if let Some(v) = cf_stats.blob_file_discardable_ratio_le20 { + STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC + .with_label_values(&[&self.name, cf, "le20"]) + .set(v as i64); + } + if let Some(v) = cf_stats.blob_file_discardable_ratio_le50 { + STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC + .with_label_values(&[&self.name, cf, "le50"]) + .set(v as i64); + } + if let Some(v) = cf_stats.blob_file_discardable_ratio_le80 { + STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC + .with_label_values(&[&self.name, cf, "le80"]) + .set(v as i64); + } + if let Some(v) = cf_stats.blob_file_discardable_ratio_le100 { + STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC + .with_label_values(&[&self.name, cf, "le100"]) + .set(v as i64); + } } - // Titan blob file discardable ratio - if let Some(v) = - engine.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE0_FILE) - { - STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC - .with_label_values(&[name, cf, "le0"]) - .set(v as i64); - } - if let Some(v) = - engine.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE20_FILE) - { - STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC - .with_label_values(&[name, cf, "le20"]) + if let Some(v) = self.db_stats.num_snapshots { + STORE_ENGINE_NUM_SNAPSHOTS_GAUGE_VEC + .with_label_values(&[&self.name]) .set(v as i64); } - if let Some(v) = - engine.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE50_FILE) - { - STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC - .with_label_values(&[name, cf, "le50"]) + if let Some(v) = self.db_stats.oldest_snapshot_time { + STORE_ENGINE_OLDEST_SNAPSHOT_DURATION_GAUGE_VEC + .with_label_values(&[&self.name]) .set(v as i64); } - if let Some(v) = - engine.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE80_FILE) - { - STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC - .with_label_values(&[name, cf, "le80"]) + if let Some(v) = self.db_stats.block_cache_size { + STORE_ENGINE_BLOCK_CACHE_USAGE_GAUGE_VEC + .with_label_values(&[&self.name, "all"]) .set(v as i64); } - if let Some(v) = - engine.get_property_int_cf(handle, ROCKSDB_TITANDB_DISCARDABLE_RATIO_LE100_FILE) - { - STORE_ENGINE_TITANDB_BLOB_FILE_DISCARDABLE_RATIO_VEC - .with_label_values(&[name, cf, "le100"]) - .set(v as i64); + let stall_num = ROCKSDB_IOSTALL_KEY.len(); + for i in 0..stall_num { + if let Some(v) = self.db_stats.stall_num[i] { + STORE_ENGINE_WRITE_STALL_REASON_GAUGE_VEC + .with_label_values(&[&self.name, ROCKSDB_IOSTALL_TYPE[i]]) + .set(v as i64); + } } } +} - // For snapshot - if let Some(n) = engine.get_property_int(ROCKSDB_NUM_SNAPSHOTS) { - STORE_ENGINE_NUM_SNAPSHOTS_GAUGE_VEC - .with_label_values(&[name]) - .set(n as i64); +pub fn flush_engine_statistics(statistics: &RocksStatistics, name: &str, is_titan: bool) { + for t in ENGINE_TICKER_TYPES { + let v = statistics.get_and_reset_ticker_count(*t); + flush_engine_ticker_metrics(*t, v, name); } - if let Some(t) = engine.get_property_int(ROCKSDB_OLDEST_SNAPSHOT_TIME) { - // RocksDB returns 0 if no snapshots. - let now = time::get_time().sec as u64; - let d = if t > 0 && now > t { now - t } else { 0 }; - STORE_ENGINE_OLDEST_SNAPSHOT_DURATION_GAUGE_VEC - .with_label_values(&[name]) - .set(d as i64); + for t in ENGINE_HIST_TYPES { + if let Some(v) = statistics.get_histogram(*t) { + flush_engine_histogram_metrics(*t, v, name); + } + } + if is_titan { + for t in TITAN_ENGINE_TICKER_TYPES { + let v = statistics.get_and_reset_ticker_count(*t); + flush_engine_ticker_metrics(*t, v, name); + } + for t in TITAN_ENGINE_HIST_TYPES { + if let Some(v) = statistics.get_histogram(*t) { + flush_engine_histogram_metrics(*t, v, name); + } + } } - - // Since block cache is shared, getting cache size from any CF is fine. Here we - // get from default CF. - let handle = crate::util::get_cf_handle(engine, CF_DEFAULT).unwrap(); - let block_cache_usage = engine.get_block_cache_usage_cf(handle); - STORE_ENGINE_BLOCK_CACHE_USAGE_GAUGE_VEC - .with_label_values(&[name, "all"]) - .set(block_cache_usage as i64); } // For property metrics @@ -1618,11 +1768,8 @@ mod tests { flush_engine_histogram_metrics(*tp, HistogramData::default(), "kv"); } - flush_engine_properties(engine.as_inner(), "kv"); - let handle = engine.as_inner().cf_handle("default").unwrap(); - let info = engine - .as_inner() - .get_map_property_cf(handle, ROCKSDB_CFSTATS); - assert!(info.is_some()); + let mut reporter = RocksStatisticsReporter::new("kv"); + reporter.collect(&engine); + reporter.flush(); } } diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 786dfec04d1..407cf8ee611 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -11,7 +11,7 @@ use slog_global::warn; use crate::{ cf_options::RocksCfOptions, db_options::RocksDbOptions, engine::RocksEngine, r2e, - raw::Statistics, rocks_metrics_defs::*, + rocks_metrics_defs::*, RocksStatistics, }; pub fn new_temp_engine(path: &tempfile::TempDir) -> Engines { @@ -28,7 +28,7 @@ pub fn new_default_engine(path: &str) -> Result { pub fn new_engine(path: &str, cfs: &[&str]) -> Result { let mut db_opts = RocksDbOptions::default(); - db_opts.set_statistics(&Statistics::new_titan()); + db_opts.set_statistics(&RocksStatistics::new_titan()); let cf_opts = cfs.iter().map(|name| (*name, Default::default())).collect(); new_engine_opt(path, db_opts, cf_opts) } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index e12ea074015..e76765e2ed6 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -47,10 +47,11 @@ pub trait KvEngine: /// Flush metrics to prometheus /// /// `instance` is the label of the metric to flush. - fn flush_metrics(&self, _instance: &str) {} - - /// Reset internal statistics - fn reset_statistics(&self) {} + fn flush_metrics(&self, instance: &str) { + let mut reporter = Self::StatisticsReporter::new(instance); + reporter.collect(self); + reporter.flush(); + } /// Cast to a concrete engine type /// diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 34502634280..edfea511d35 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -37,7 +37,26 @@ pub enum DeleteStrategy { DeleteByWriter { sst_path: String }, } +/// `StatisticsReporter` can be used to report engine's private statistics to +/// prometheus metrics. For one single engine, using it is equivalent to calling +/// `KvEngine::flush_metrics("name")`. For multiple engines, it can aggregate +/// statistics accordingly. +/// Note that it is not responsible for managing the statistics from +/// user-provided collectors that are potentially shared between engines. +pub trait StatisticsReporter { + fn new(name: &str) -> Self; + + /// Collect statistics from one single engine. + fn collect(&mut self, engine: &T); + + /// Aggregate and report statistics to prometheus metrics counters. The + /// statistics are not cleared afterwards. + fn flush(&mut self); +} + pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { + type StatisticsReporter: StatisticsReporter; + /// Flush all specified column families at once. /// /// If `cfs` is empty, it will try to flush all available column families. diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 8b29e07707a..7b0e04d0ab5 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -136,7 +136,6 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send fn flush_stats(&self) -> Option { None } - fn reset_statistics(&self) {} fn stop(&self) {} diff --git a/components/server/src/lib.rs b/components/server/src/lib.rs index 57793792289..5107a20eeab 100644 --- a/components/server/src/lib.rs +++ b/components/server/src/lib.rs @@ -2,6 +2,7 @@ #![allow(incomplete_features)] #![feature(specialization)] +#![feature(let_chains)] #[macro_use] extern crate tikv_util; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 8d64ff74c8b..470e3a41861 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -38,14 +38,15 @@ use cdc::{CdcConfigManager, MemoryQuota}; use concurrency_manager::ConcurrencyManager; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{ - from_rocks_compression_type, + flush_engine_statistics, from_rocks_compression_type, raw::{Cache, Env}, - FlowInfo, RocksEngine, + FlowInfo, RocksEngine, RocksStatistics, }; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, - RaftEngine, SingletonFactory, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, + RaftEngine, SingletonFactory, StatisticsReporter, TabletContext, TabletRegistry, CF_DEFAULT, + CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use file_system::{ @@ -169,7 +170,11 @@ fn run_impl(config: TikvConfig) { tikv.run_status_server(); tikv.init_quota_tuning_task(tikv.quota_limiter.clone()); - signal_handler::wait_for_signal(Some(tikv.engines.take().unwrap().engines)); + signal_handler::wait_for_signal( + Some(tikv.engines.take().unwrap().engines), + tikv.kv_statistics.clone(), + tikv.raft_statistics.clone(), + ); tikv.stop(); } @@ -226,6 +231,8 @@ struct TikvServer { snap_mgr: Option, // Will be filled in `init_servers`. encryption_key_manager: Option>, engines: Option>, + kv_statistics: Option>, + raft_statistics: Option>, servers: Option>, region_info_accessor: RegionInfoAccessor, coprocessor_host: Option>, @@ -376,6 +383,8 @@ where snap_mgr: None, encryption_key_manager: None, engines: None, + kv_statistics: None, + raft_statistics: None, servers: None, region_info_accessor, coprocessor_host, @@ -1204,6 +1213,8 @@ where // Debug service. let debug_service = DebugService::new( engines.engines.clone(), + self.kv_statistics.clone(), + self.raft_statistics.clone(), servers.server.get_debug_thread_pool().clone(), engines.engine.raft_extension().clone(), self.cfg_controller.as_ref().unwrap().clone(), @@ -1356,7 +1367,11 @@ where engines_info: Arc, ) { let mut engine_metrics = EngineMetricsManager::::new( - self.engines.as_ref().unwrap().engines.clone(), + self.tablet_registry.clone().unwrap(), + self.kv_statistics.clone(), + self.config.rocksdb.titan.enabled, + self.engines.as_ref().unwrap().engines.raft.clone(), + self.raft_statistics.clone(), ); let mut io_metrics = IoMetricsManager::new(fetcher); let engines_info_clone = engines_info.clone(); @@ -1675,7 +1690,12 @@ where } pub trait ConfiguredRaftEngine: RaftEngine { - fn build(_: &TikvConfig, _: &Arc, _: &Option>, _: &Cache) -> Self; + fn build( + _: &TikvConfig, + _: &Arc, + _: &Option>, + _: &Cache, + ) -> (Self, Option>); fn as_rocks_engine(&self) -> Option<&RocksEngine>; fn register_config(&self, _cfg_controller: &mut ConfigController); } @@ -1686,7 +1706,7 @@ impl ConfiguredRaftEngine for T { _: &Arc, _: &Option>, _: &Cache, - ) -> Self { + ) -> (Self, Option>) { unimplemented!() } default fn as_rocks_engine(&self) -> Option<&RocksEngine> { @@ -1701,7 +1721,7 @@ impl ConfiguredRaftEngine for RocksEngine { env: &Arc, key_manager: &Option>, block_cache: &Cache, - ) -> Self { + ) -> (Self, Option>) { let mut raft_data_state_machine = RaftDataStateMachine::new( &config.storage.data_dir, &config.raft_engine.config().dir, @@ -1713,6 +1733,8 @@ impl ConfiguredRaftEngine for RocksEngine { let config_raftdb = &config.raftdb; let mut raft_db_opts = config_raftdb.build_opt(); raft_db_opts.set_env(env.clone()); + let statistics = Arc::new(RocksStatistics::new_titan()); + raft_db_opts.set_statistics(statistics.as_ref()); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) .expect("failed to open raftdb"); @@ -1726,7 +1748,7 @@ impl ConfiguredRaftEngine for RocksEngine { drop(raft_engine); raft_data_state_machine.after_dump_data(); } - raftdb + (raftdb, Some(statistics)) } fn as_rocks_engine(&self) -> Option<&RocksEngine> { @@ -1747,7 +1769,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { env: &Arc, key_manager: &Option>, block_cache: &Cache, - ) -> Self { + ) -> (Self, Option>) { let mut raft_data_state_machine = RaftDataStateMachine::new( &config.storage.data_dir, &config.raft_store.raftdb_path, @@ -1776,7 +1798,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { drop(raftdb); raft_data_state_machine.after_dump_data(); } - raft_engine + (raft_engine, None) } } @@ -1792,12 +1814,13 @@ impl TikvServer { .unwrap(); // Create raft engine - let raft_engine = CER::build( + let (raft_engine, raft_statistics) = CER::build( &self.config, &env, &self.encryption_key_manager, &block_cache, ); + self.raft_statistics = raft_statistics; // Create kv engine. let builder = KvEngineFactoryBuilder::new(env, &self.config, block_cache) @@ -1811,6 +1834,7 @@ impl TikvServer { let kv_engine = factory .create_shared_db(&self.store_path) .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); + self.kv_statistics = Some(factory.rocks_statistics()); let engines = Engines::new(kv_engine.clone(), raft_engine); let cfg_controller = self.cfg_controller.as_mut().unwrap(); @@ -1954,25 +1978,58 @@ impl Stop for LazyWorker { } } -pub struct EngineMetricsManager { - engines: Engines, +pub struct EngineMetricsManager { + tablet_registry: TabletRegistry, + kv_statistics: Option>, + kv_is_titan: bool, + raft_engine: ER, + raft_statistics: Option>, last_reset: Instant, } -impl EngineMetricsManager { - pub fn new(engines: Engines) -> Self { +impl EngineMetricsManager { + pub fn new( + tablet_registry: TabletRegistry, + kv_statistics: Option>, + kv_is_titan: bool, + raft_engine: ER, + raft_statistics: Option>, + ) -> Self { EngineMetricsManager { - engines, + tablet_registry, + kv_statistics, + kv_is_titan, + raft_engine, + raft_statistics, last_reset: Instant::now(), } } pub fn flush(&mut self, now: Instant) { - KvEngine::flush_metrics(&self.engines.kv, "kv"); - self.engines.raft.flush_metrics("raft"); + let mut reporter = EK::StatisticsReporter::new("kv"); + self.tablet_registry + .for_each_opened_tablet(|_, db: &mut CachedTablet| { + if let Some(db) = db.latest() { + reporter.collect(db); + } + true + }); + reporter.flush(); + self.raft_engine.flush_metrics("raft"); + + if let Some(s) = self.kv_statistics.as_ref() { + flush_engine_statistics(s, "kv", self.kv_is_titan); + } + if let Some(s) = self.raft_statistics.as_ref() { + flush_engine_statistics(s, "raft", false); + } if now.saturating_duration_since(self.last_reset) >= DEFAULT_ENGINE_METRICS_RESET_INTERVAL { - KvEngine::reset_statistics(&self.engines.kv); - self.engines.raft.reset_statistics(); + if let Some(s) = self.kv_statistics.as_ref() { + s.reset(); + } + if let Some(s) = self.raft_statistics.as_ref() { + s.reset(); + } self.last_reset = now; } } diff --git a/components/server/src/signal_handler.rs b/components/server/src/signal_handler.rs index a92845b843d..0977a1ed814 100644 --- a/components/server/src/signal_handler.rs +++ b/components/server/src/signal_handler.rs @@ -1,18 +1,29 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +use std::sync::Arc; + +use engine_rocks::RocksStatistics; +use engine_traits::{Engines, KvEngine, RaftEngine}; + pub use self::imp::wait_for_signal; #[cfg(unix)] mod imp { - use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine}; + use engine_traits::MiscExt; use signal_hook::{ consts::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}, iterator::Signals, }; use tikv_util::metrics; + use super::*; + #[allow(dead_code)] - pub fn wait_for_signal(engines: Option>) { + pub fn wait_for_signal( + engines: Option>, + kv_statistics: Option>, + raft_statistics: Option>, + ) { let mut signals = Signals::new([SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]).unwrap(); for signal in &mut signals { match signal { @@ -25,7 +36,13 @@ mod imp { info!("{}", metrics::dump(false)); if let Some(ref engines) = engines { info!("{:?}", MiscExt::dump_stats(&engines.kv)); + if let Some(s) = kv_statistics.as_ref() && let Some(s) = s.to_string() { + info!("{:?}", s); + } info!("{:?}", RaftEngine::dump_stats(&engines.raft)); + if let Some(s) = raft_statistics.as_ref() && let Some(s) = s.to_string() { + info!("{:?}", s); + } } } // TODO: handle more signal @@ -37,7 +54,12 @@ mod imp { #[cfg(not(unix))] mod imp { - use engine_traits::{Engines, KvEngine, RaftEngine}; + use super::*; - pub fn wait_for_signal(_: Option>) {} + pub fn wait_for_signal( + _: Option>, + _: Option>, + _: Option>, + ) { + } } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index f9088ff4e3b..833e8131746 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -12,7 +12,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_rocks::{RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, WriteBatch, @@ -170,6 +170,8 @@ pub struct Cluster { group_props: HashMap, pub sst_workers: Vec>, pub sst_workers_map: HashMap, + pub kv_statistics: Vec>, + pub raft_statistics: Vec>>, pub sim: Arc>, pub pd_client: Arc, } @@ -205,6 +207,8 @@ impl Cluster { pd_client, sst_workers: vec![], sst_workers_map: HashMap::default(), + kv_statistics: vec![], + raft_statistics: vec![], } } @@ -240,12 +244,14 @@ impl Cluster { } fn create_engine(&mut self, router: Option>) { - let (engines, key_manager, dir, sst_worker) = + let (engines, key_manager, dir, sst_worker, kv_statistics, raft_statistics) = create_test_engine(router, self.io_rate_limiter.clone(), &self.cfg); self.dbs.push(engines); self.key_managers.push(key_manager); self.paths.push(dir); self.sst_workers.push(sst_worker); + self.kv_statistics.push(kv_statistics); + self.raft_statistics.push(raft_statistics); } pub fn create_engines(&mut self) { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index ea9868afdbd..e3cfb298c59 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -483,6 +483,8 @@ impl ServerCluster { let debug_thread_handle = debug_thread_pool.handle().clone(); let debug_service = DebugService::new( engines.clone(), + None, + None, debug_thread_handle, extension, ConfigController::default(), diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 3a4ed373e8c..d5c2eefa6d6 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -13,7 +13,7 @@ use collections::HashMap; use encryption_export::{ data_key_manager_from_config, DataKeyManager, FileConfig, MasterKeyConfig, }; -use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot}; +use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_RAFT, @@ -575,6 +575,8 @@ pub fn create_test_engine( Option>, TempDir, LazyWorker, + Arc, + Option>, ) { let dir = test_util::temp_dir("test_cluster", cfg.prefer_mem); let mut cfg = cfg.clone(); @@ -593,7 +595,7 @@ pub fn create_test_engine( let sst_worker = LazyWorker::new("sst-recovery"); let scheduler = sst_worker.scheduler(); - let raft_engine = RaftTestEngine::build(&cfg, &env, &key_manager, &cache); + let (raft_engine, raft_statistics) = RaftTestEngine::build(&cfg, &env, &key_manager, &cache); let mut builder = KvEngineFactoryBuilder::new(env, &cfg, cache).sst_recovery_sender(Some(scheduler)); @@ -605,7 +607,14 @@ pub fn create_test_engine( let factory = builder.build(); let engine = factory.create_shared_db(dir.path()).unwrap(); let engines = Engines::new(engine, raft_engine); - (engines, key_manager, dir, sst_worker) + ( + engines, + key_manager, + dir, + sst_worker, + factory.rocks_statistics(), + raft_statistics, + ) } pub fn configure_for_request_snapshot(cluster: &mut Cluster) { diff --git a/etc/config-template.toml b/etc/config-template.toml index 3ddbb6fc879..8820d2e0675 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -559,11 +559,6 @@ ## Max RocksDB WAL size in total # max-total-wal-size = "4GB" -## RocksDB Statistics provides cumulative stats over time. -## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to -## know the internal status of RocksDB. -# enable-statistics = true - ## Dump statistics periodically in information logs. ## Same as RocksDB's default value (10 min). # stats-dump-period = "10m" @@ -972,7 +967,6 @@ # max-manifest-file-size = "20MB" # create-if-missing = true -# enable-statistics = true # stats-dump-period = "10m" ## Raft RocksDB WAL directory. diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index ccac776b508..0c2116818dc 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -30428,11 +30428,11 @@ "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_compression_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (level)", + "expr": "avg(tikv_engine_compression_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf, level)", "format": "time_series", "hide": false, "intervalFactor": 2, - "legendFormat": "level - {{level}}", + "legendFormat": "{{cf}} - level - {{level}}", "metric": "", "refId": "A", "step": 10 diff --git a/src/config/mod.rs b/src/config/mod.rs index 0945eb7ca21..a9cfdb93505 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -31,13 +31,12 @@ use engine_rocks::{ raw::{ BlockBasedOptions, Cache, ChecksumType, CompactionPriority, DBCompactionStyle, DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, PrepopulateBlockCache, - Statistics, }, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, RaftDbLogger, RangePropertiesCollectorFactory, RawMvccPropertiesCollectorFactory, - RocksCfOptions, RocksDbOptions, RocksEngine, RocksEventListener, RocksTitanDbOptions, - RocksdbLogger, TtlPropertiesCollectorFactory, DEFAULT_PROP_KEYS_INDEX_DISTANCE, - DEFAULT_PROP_SIZE_INDEX_DISTANCE, + RocksCfOptions, RocksDbOptions, RocksEngine, RocksEventListener, RocksStatistics, + RocksTitanDbOptions, RocksdbLogger, TtlPropertiesCollectorFactory, + DEFAULT_PROP_KEYS_INDEX_DISTANCE, DEFAULT_PROP_SIZE_INDEX_DISTANCE, }; use engine_traits::{ CfOptions as _, DbOptions as _, MiscExt, TitanCfOptions as _, CF_DEFAULT, CF_LOCK, CF_RAFT, @@ -1038,6 +1037,8 @@ pub struct DbConfig { pub create_if_missing: bool, pub max_open_files: i32, #[online_config(skip)] + #[doc(hidden)] + #[serde(skip_serializing)] pub enable_statistics: bool, #[online_config(skip)] pub stats_dump_period: ReadableDuration, @@ -1156,7 +1157,7 @@ impl DbConfig { } } - pub fn build_opt(&self, stats: Option<&Statistics>) -> RocksDbOptions { + pub fn build_opt(&self, stats: Option<&RocksStatistics>) -> RocksDbOptions { let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { @@ -1172,11 +1173,9 @@ impl DbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - if self.enable_statistics { - match stats { - Some(stats) => opts.set_statistics(stats), - None => opts.set_statistics(&Statistics::new_titan()), - } + match stats { + Some(stats) => opts.set_statistics(stats), + None => opts.set_statistics(&RocksStatistics::new_titan()), } opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); @@ -1296,6 +1295,9 @@ impl DbConfig { ) .into()); } + if !self.enable_statistics { + warn!("kvdb: ignoring `enable_statistics`, statistics is always on.") + } Ok(()) } @@ -1411,6 +1413,8 @@ pub struct RaftDbConfig { pub create_if_missing: bool, pub max_open_files: i32, #[online_config(skip)] + #[doc(hidden)] + #[serde(skip_serializing)] pub enable_statistics: bool, #[online_config(skip)] pub stats_dump_period: ReadableDuration, @@ -1499,9 +1503,7 @@ impl RaftDbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - if self.enable_statistics { - opts.set_statistics(&Statistics::new_titan()); - } + opts.set_statistics(&RocksStatistics::new_titan()); opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); opts.set_max_log_file_size(self.info_log_max_size.0); @@ -1544,6 +1546,9 @@ impl RaftDbConfig { ); } } + if !self.enable_statistics { + warn!("raftdb: ignoring `enable_statistics`, statistics is always on.") + } Ok(()) } } diff --git a/src/server/debug.rs b/src/server/debug.rs index 666e2ca33e7..9445133239f 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -5,6 +5,7 @@ use std::{ iter::FromIterator, path::Path, result, + sync::Arc, thread::{Builder as ThreadBuilder, JoinHandle}, }; @@ -12,12 +13,12 @@ use collections::HashSet; use engine_rocks::{ raw::{CompactOptions, DBBottommostLevelCompaction}, util::get_cf_handle, - RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksWriteBatchVec, + RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksStatistics, RocksWriteBatchVec, }; use engine_traits::{ - Engines, IterOptions, Iterable, Iterator as EngineIterator, Mutable, MvccProperties, Peekable, - RaftEngine, RaftLogBatch, Range, RangePropertiesExt, SyncMutable, WriteBatch, WriteBatchExt, - WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + Engines, IterOptions, Iterable, Iterator as EngineIterator, MiscExt, Mutable, MvccProperties, + Peekable, RaftEngine, RaftLogBatch, Range, RangePropertiesExt, SyncMutable, WriteBatch, + WriteBatchExt, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ debugpb::{self, Db as DbType}, @@ -127,6 +128,8 @@ trait InnerRocksEngineExtractor { #[derive(Clone)] pub struct Debugger { engines: Engines, + kv_statistics: Option>, + raft_statistics: Option>, reset_to_version_manager: ResetToVersionManager, cfg_controller: ConfigController, } @@ -159,15 +162,41 @@ impl Debugger { let reset_to_version_manager = ResetToVersionManager::new(engines.kv.clone()); Debugger { engines, + kv_statistics: None, + raft_statistics: None, reset_to_version_manager, cfg_controller, } } + pub fn set_kv_statistics(&mut self, s: Option>) { + self.kv_statistics = s; + } + + pub fn set_raft_statistics(&mut self, s: Option>) { + self.raft_statistics = s; + } + pub fn get_engine(&self) -> &Engines { &self.engines } + pub fn dump_kv_stats(&self) -> Result { + let mut kv_str = box_try!(MiscExt::dump_stats(&self.engines.kv)); + if let Some(s) = self.kv_statistics.as_ref() && let Some(s) = s.to_string() { + kv_str.push_str(&s); + } + Ok(kv_str) + } + + pub fn dump_raft_stats(&self) -> Result { + let mut raft_str = box_try!(RaftEngine::dump_stats(&self.engines.raft)); + if let Some(s) = self.raft_statistics.as_ref() && let Some(s) = s.to_string() { + raft_str.push_str(&s); + } + Ok(raft_str) + } + /// Get all regions holding region meta data from raft CF in KV storage. pub fn get_all_regions_in_store(&self) -> Result> { let db = &self.engines.kv; diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index c21599f47a6..2c31c9522b1 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -3,9 +3,9 @@ use std::{path::Path, sync::Arc}; use engine_rocks::{ - raw::{Cache, Env, Statistics}, + raw::{Cache, Env}, CompactedEventSender, CompactionListener, FlowListener, RocksCfOptions, RocksCompactionJobInfo, - RocksDbOptions, RocksEngine, RocksEventListener, RocksPersistenceListener, + RocksDbOptions, RocksEngine, RocksEventListener, RocksPersistenceListener, RocksStatistics, }; use engine_traits::{ CompactionJobInfo, MiscExt, PersistenceListener, Result, StateStorage, TabletContext, @@ -28,7 +28,7 @@ struct FactoryInner { api_version: ApiVersion, flow_listener: Option, sst_recovery_sender: Option>, - statistics: Statistics, + statistics: Arc, state_storage: Option>, lite: bool, } @@ -40,6 +40,7 @@ pub struct KvEngineFactoryBuilder { impl KvEngineFactoryBuilder { pub fn new(env: Arc, config: &TikvConfig, cache: Cache) -> Self { + let statistics = Arc::new(RocksStatistics::new_titan()); Self { inner: FactoryInner { env, @@ -49,7 +50,7 @@ impl KvEngineFactoryBuilder { api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, - statistics: Statistics::new_titan(), + statistics, state_storage: None, lite: false, }, @@ -132,12 +133,16 @@ impl KvEngineFactory { )) } + pub fn rocks_statistics(&self) -> Arc { + self.inner.statistics.clone() + } + fn db_opts(&self) -> RocksDbOptions { // Create kv engine. let mut db_opts = self .inner .rocksdb_config - .build_opt(Some(&self.inner.statistics)); + .build_opt(Some(self.inner.statistics.as_ref())); db_opts.set_env(self.inner.env.clone()); if !self.inner.lite { db_opts.add_event_listener(RocksEventListener::new( diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index ae0d53bacda..e0ec9173ad5 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -1,7 +1,9 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::RocksEngine; -use engine_traits::{Engines, MiscExt, RaftEngine}; +use std::sync::Arc; + +use engine_rocks::{RocksEngine, RocksStatistics}; +use engine_traits::{Engines, RaftEngine}; use futures::{ future::{Future, FutureExt, TryFutureExt}, sink::SinkExt, @@ -54,11 +56,15 @@ impl Service { /// `GcWorker`. pub fn new( engines: Engines, + kv_statistics: Option>, + raft_statistics: Option>, pool: Handle, raft_router: T, cfg_controller: ConfigController, ) -> Self { - let debugger = Debugger::new(engines, cfg_controller); + let mut debugger = Debugger::new(engines, cfg_controller); + debugger.set_kv_statistics(kv_statistics); + debugger.set_raft_statistics(raft_statistics); Service { pool, debugger, @@ -353,9 +359,8 @@ impl debugpb::Debug for Service pd_client::Result, R: std::fmt::Debug, { - run_on_bad_connection(client, |c| { - f(c).unwrap_err(); - f(c).unwrap(); - }); + let mut success = false; + for _ in 0..3 { + run_on_bad_connection(client, |c| { + f(c).unwrap_err(); + success = f(c).is_ok(); + }); + if success { + return; + } + } + panic!("failed to retry after three attempts"); } test_retry_success(&mut client, |c| { diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 73dfdbaa977..4e22463503a 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -289,7 +289,7 @@ fn test_serde_custom_tikv_config() { max_manifest_file_size: ReadableSize::mb(12), create_if_missing: false, max_open_files: 12_345, - enable_statistics: false, + enable_statistics: true, stats_dump_period: ReadableDuration::minutes(12), compaction_readahead_size: ReadableSize::kb(1), info_log_max_size: ReadableSize::kb(1), @@ -584,7 +584,7 @@ fn test_serde_custom_tikv_config() { max_manifest_file_size: ReadableSize::mb(12), create_if_missing: false, max_open_files: 12_345, - enable_statistics: false, + enable_statistics: true, stats_dump_period: ReadableDuration::minutes(12), compaction_readahead_size: ReadableSize::kb(1), info_log_max_size: ReadableSize::kb(1), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 961eb59a77b..900e1c36aa6 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -252,7 +252,6 @@ max-background-flushes = 4 max-manifest-file-size = "12MB" create-if-missing = false max-open-files = 12345 -enable-statistics = false stats-dump-period = "12m" compaction-readahead-size = "1KB" info-log-max-size = "1KB" @@ -504,7 +503,6 @@ max-background-flushes = 4 max-manifest-file-size = "12MB" create-if-missing = false max-open-files = 12345 -enable-statistics = false stats-dump-period = "12m" compaction-readahead-size = "1KB" info-log-max-size = "1KB" diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index c8ee96c7c67..fbc7629c73f 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -15,11 +15,9 @@ fn flush_then_check(cluster: &mut Cluster, interval: u64, writt flush(cluster); // Wait for compaction. sleep_ms(interval * 2); - for engines in cluster.engines.values() { - let compact_write_bytes = engines - .kv - .as_inner() - .get_statistics_ticker_count(DBStatisticsTickerType::CompactWriteBytes); + for statistics in &cluster.kv_statistics { + let compact_write_bytes = + statistics.get_ticker_count(DBStatisticsTickerType::CompactWriteBytes); if written { assert!(compact_write_bytes > 0); } else { From 679c773040d78000e90989a08a9461c1a816963f Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 20 Dec 2022 19:58:55 +0800 Subject: [PATCH 0417/1149] raftstore-v2: fix compilation errors. (#13969) close tikv/tikv#13970 Fix compilation errors. Signed-off-by: Lucasliang --- components/raftstore-v2/src/router/response_channel.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index 01c1565ec62..2cb75acccfc 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -603,6 +603,7 @@ pub type FlushChannel = BaseChannel<()>; #[cfg(feature = "testexport")] pub type FlushSubscriber = BaseSubscriber<()>; +#[cfg(feature = "testexport")] impl Debug for FlushChannel { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "FlushChannel") From 99c70a3859489152b88e3aa064746221c3240877 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 21 Dec 2022 13:26:54 +0800 Subject: [PATCH 0418/1149] *: implement other essential functions for raftkv2 (#13967) ref tikv/tikv#12842 Functions like flashback will be supported in next milestone. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 8 + components/raftstore-v2/src/fsm/peer.rs | 8 + components/raftstore-v2/src/lib.rs | 2 +- .../src/operation/command/admin/mod.rs | 2 +- .../src/operation/command/admin/split.rs | 48 +++- .../raftstore-v2/src/operation/command/mod.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 4 +- components/raftstore-v2/src/operation/pd.rs | 18 +- .../src/operation/ready/snapshot.rs | 23 ++ components/raftstore-v2/src/router/imp.rs | 5 + components/raftstore-v2/src/router/message.rs | 31 ++- components/raftstore-v2/src/worker/pd/mod.rs | 16 +- .../src/worker/pd/region_heartbeat.rs | 6 +- .../raftstore-v2/src/worker/pd/split.rs | 4 +- components/raftstore/src/store/fsm/peer.rs | 93 +++----- components/raftstore/src/store/util.rs | 41 ++++ components/server/src/server.rs | 9 +- components/test_raftstore/src/server.rs | 7 +- components/tikv_kv/src/lib.rs | 2 +- components/tikv_kv/src/mock_engine.rs | 2 +- components/tikv_kv/src/rocksdb_engine.rs | 4 +- src/server/mod.rs | 3 +- src/server/node.rs | 165 +++++--------- src/server/raftkv/mod.rs | 4 +- src/server/raftkv2/mod.rs | 128 +++++------ src/server/raftkv2/node.rs | 210 ++++++++++++++++++ src/server/raftkv2/raft_extension.rs | 109 +++++++++ src/server/server.rs | 2 +- src/server/service/kv.rs | 4 +- 29 files changed, 670 insertions(+), 290 deletions(-) create mode 100644 src/server/raftkv2/node.rs create mode 100644 src/server/raftkv2/raft_extension.rs diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index ac767bcd7ce..98075969c66 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -508,6 +508,14 @@ impl StoreRouter { &self.logger } + #[inline] + pub fn check_send(&self, addr: u64, msg: PeerMsg) -> crate::Result<()> { + match self.router.send(addr, msg) { + Ok(()) => Ok(()), + Err(e) => Err(raftstore::router::handle_send_error(addr, e)), + } + } + pub fn send_raft_message( &self, msg: Box, diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 20e7a8f3c2b..f5425295347 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -280,6 +280,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::StoreUnreachable { to_store_id } => { self.fsm.peer_mut().on_store_unreachable(to_store_id) } + PeerMsg::SnapshotSent { to_peer_id, status } => { + self.fsm.peer_mut().on_snapshot_sent(to_peer_id, status) + } + PeerMsg::RequestSplit { request, ch } => { + self.fsm + .peer_mut() + .on_request_split(self.store_ctx, request, ch) + } #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 848e5fda8b2..d8327549da6 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -40,4 +40,4 @@ pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; -pub use raftstore::{Error, Result}; +pub use raftstore::{store::Config, Error, Result}; diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index fcb968a2195..9b7dce8570f 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -9,7 +9,7 @@ use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; use protobuf::Message; use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; -pub use split::{SplitInit, SplitResult, SPLIT_PREFIX}; +pub use split::{RequestSplit, SplitInit, SplitResult, SPLIT_PREFIX}; use tikv_util::box_err; use txn_types::WriteBatchFlags; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 6255b3ba9b9..8bf23da0fd6 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -33,7 +33,7 @@ use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletCont use fail::fail_point; use itertools::Itertools; use kvproto::{ - metapb::{self, Region}, + metapb::{self, Region, RegionEpoch}, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, raft_serverpb::RaftSnapshotData, }; @@ -42,6 +42,7 @@ use raft::{prelude::Snapshot, INVALID_ID}; use raftstore::{ coprocessor::split_observer::{is_valid_split_key, strip_timestamp_if_exists}, store::{ + cmd_resp, fsm::apply::validate_batch_split, metrics::PEER_ADMIN_CMD_COUNTER, snap::TABLET_SNAPSHOT_VERSION, @@ -58,7 +59,8 @@ use crate::{ fsm::ApplyResReporter, operation::AdminCmdResult, raft::{Apply, Peer}, - router::{PeerMsg, StoreMsg}, + router::{CmdResChannel, PeerMsg, StoreMsg}, + Error, }; pub const SPLIT_PREFIX: &str = "split_"; @@ -155,7 +157,49 @@ fn pre_propose_split(logger: &Logger, req: &mut AdminRequest, region: &Region) - } } +#[derive(Debug)] +pub struct RequestSplit { + pub epoch: RegionEpoch, + pub split_keys: Vec>, + pub source: Box, +} + impl Peer { + pub fn on_request_split( + &mut self, + ctx: &mut StoreContext, + rs: RequestSplit, + ch: CmdResChannel, + ) { + info!( + self.logger, + "on split"; + "split_keys" => %KeysInfoFormatter(rs.split_keys.iter()), + "source" => &rs.source, + ); + if !self.is_leader() { + // region on this store is no longer leader, skipped. + info!(self.logger, "not leader, skip."); + ch.set_result(cmd_resp::new_error(Error::NotLeader( + self.region_id(), + self.leader(), + ))); + return; + } + if let Err(e) = util::validate_split_region( + self.region_id(), + self.peer_id(), + self.region(), + &rs.epoch, + &rs.split_keys, + ) { + info!(self.logger, "invalid split request"; "err" => ?e, "source" => &rs.source); + ch.set_result(cmd_resp::new_error(e)); + return; + } + self.ask_batch_split_pd(ctx, rs.split_keys, ch); + } + pub fn propose_split( &mut self, store_ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index f6ac6ac7077..116edec91c3 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -49,7 +49,7 @@ mod admin; mod control; mod write; -pub use admin::{AdminCmdResult, SplitInit, SplitResult, SPLIT_PREFIX}; +pub use admin::{AdminCmdResult, RequestSplit, SplitInit, SplitResult, SPLIT_PREFIX}; pub use control::ProposalControl; pub use write::{ SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 5e6971b3346..80443f0ef60 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -7,8 +7,8 @@ mod query; mod ready; pub use command::{ - AdminCmdResult, CommittedEntries, ProposalControl, SimpleWriteBinary, SimpleWriteEncoder, - SimpleWriteReqDecoder, SimpleWriteReqEncoder, + AdminCmdResult, CommittedEntries, ProposalControl, RequestSplit, SimpleWriteBinary, + SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, }; pub use life::DestroyProgress; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index d80cee3c7d1..1c62c092878 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -12,7 +12,7 @@ use crate::{ batch::StoreContext, fsm::{PeerFsmDelegate, Store, StoreFsmDelegate}, raft::Peer, - router::{PeerTick, StoreTick}, + router::{CmdResChannel, PeerTick, StoreTick}, worker::pd, }; @@ -93,8 +93,6 @@ impl Peer { error!( self.logger, "failed to notify pd"; - "region_id" => self.region_id(), - "peer_id" => self.peer_id(), "err" => ?e, ); return; @@ -148,8 +146,6 @@ impl Peer { error!( self.logger, "failed to get peer from cache"; - "region_id" => self.region_id(), - "peer_id" => self.peer_id(), "get_peer_id" => id, ); } @@ -167,27 +163,29 @@ impl Peer { error!( self.logger, "failed to notify pd with DestroyPeer"; - "region_id" => self.region_id(), - "peer_id" => self.peer_id(), "err" => %e, ); } } #[inline] - pub fn ask_batch_split_pd(&self, ctx: &StoreContext, split_keys: Vec>) { + pub fn ask_batch_split_pd( + &self, + ctx: &StoreContext, + split_keys: Vec>, + ch: CmdResChannel, + ) { let task = pd::Task::AskBatchSplit { region: self.region().clone(), split_keys, peer: self.peer().clone(), right_derive: ctx.cfg.right_derive_when_split, + ch, }; if let Err(e) = ctx.pd_scheduler.schedule(task) { error!( self.logger, "failed to notify pd with AskBatchSplit"; - "region_id" => self.region_id(), - "peer_id" => self.peer_id(), "err" => %e, ); } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 1f4a1fee268..7339df22fa9 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -116,6 +116,29 @@ impl Peer { } } + pub fn on_snapshot_sent(&mut self, to_peer_id: u64, status: raft::SnapshotStatus) { + let to_peer = match self.peer_from_cache(to_peer_id) { + Some(peer) => peer, + None => { + // If to_peer is gone, ignore this snapshot status + warn!( + self.logger, + "peer not found, ignore snapshot status"; + "to_peer_id" => to_peer_id, + "status" => ?status, + ); + return; + } + }; + info!( + self.logger, + "report snapshot status"; + "to" => ?to_peer, + "status" => ?status, + ); + self.raft_group_mut().report_snapshot(to_peer_id, status); + } + pub fn on_applied_snapshot(&mut self, ctx: &mut StoreContext) { let persisted_index = self.persisted_index(); let first_index = self.storage().entry_storage().first_index(); diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index e838cefb743..3f10e08dee2 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -68,6 +68,11 @@ impl RaftRouter { self.router.send(addr, msg) } + #[inline] + pub fn check_send(&self, addr: u64, msg: PeerMsg) -> crate::Result<()> { + self.router.check_send(addr, msg) + } + pub fn store_meta(&self) -> &Arc> { self.local_reader.store_meta() } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index d5635574978..4c36f474ea9 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -3,6 +3,7 @@ // #[PerformanceCriticalPath] use kvproto::{ + metapb, raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, raft_serverpb::RaftMessage, }; @@ -15,7 +16,7 @@ use super::{ }, ApplyRes, }; -use crate::operation::{SimpleWriteBinary, SplitInit}; +use crate::operation::{RequestSplit, SimpleWriteBinary, SplitInit}; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] @@ -164,6 +165,15 @@ pub enum PeerMsg { StoreUnreachable { to_store_id: u64, }, + /// Reports whether the snapshot sending is successful or not. + SnapshotSent { + to_peer_id: u64, + status: raft::SnapshotStatus, + }, + RequestSplit { + request: RequestSplit, + ch: CmdResChannel, + }, /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), @@ -195,6 +205,25 @@ impl PeerMsg { sub, ) } + + pub fn request_split( + epoch: metapb::RegionEpoch, + split_keys: Vec>, + source: String, + ) -> (Self, CmdResSubscriber) { + let (ch, sub) = CmdResChannel::pair(); + ( + PeerMsg::RequestSplit { + request: RequestSplit { + epoch, + split_keys, + source: source.into_boxed_str(), + }, + ch, + }, + sub, + ) + } } #[derive(Debug)] diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 15bb2e73ff8..18b01a8026a 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -16,7 +16,10 @@ use slog::{error, info, Logger}; use tikv_util::{time::UnixSecs, worker::Runnable}; use yatp::{task::future::TaskCell, Remote}; -use crate::{batch::StoreRouter, router::PeerMsg}; +use crate::{ + batch::StoreRouter, + router::{CmdResChannel, PeerMsg}, +}; mod region_heartbeat; mod split; @@ -39,6 +42,7 @@ pub enum Task { split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + ch: CmdResChannel, }, ReportBatchSplit { regions: Vec, @@ -174,7 +178,8 @@ where split_keys, peer, right_derive, - } => self.handle_ask_batch_split(region, split_keys, peer, right_derive), + ch, + } => self.handle_ask_batch_split(region, split_keys, peer, right_derive, ch), Task::ReportBatchSplit { regions } => self.handle_report_batch_split(regions), Task::UpdateMaxTimestamp { region_id, @@ -208,6 +213,7 @@ mod requests { use raft::eraftpb::ConfChangeType; use super::*; + use crate::router::RaftRequest; pub fn send_admin_request( logger: &Logger, @@ -216,6 +222,7 @@ mod requests { epoch: metapb::RegionEpoch, peer: metapb::Peer, request: AdminRequest, + ch: Option, ) where EK: KvEngine, ER: RaftEngine, @@ -228,7 +235,10 @@ mod requests { req.mut_header().set_peer(peer); req.set_admin_request(request); - let (msg, _) = PeerMsg::admin_command(req); + let msg = match ch { + Some(ch) => PeerMsg::AdminCommand(RaftRequest::new(req, ch)), + None => PeerMsg::admin_command(req).0, + }; if let Err(e) = router.send(region_id, msg) { error!( logger, diff --git a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs b/components/raftstore-v2/src/worker/pd/region_heartbeat.rs index 4096467087a..31f84801ed2 100644 --- a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/region_heartbeat.rs @@ -184,7 +184,7 @@ where change_peer.get_change_type(), change_peer.take_peer(), ); - send_admin_request(&logger, &router, region_id, epoch, peer, req); + send_admin_request(&logger, &router, region_id, epoch, peer, req, None); } else if resp.has_change_peer_v2() { PD_HEARTBEAT_COUNTER_VEC .with_label_values(&["change peer"]) @@ -198,7 +198,7 @@ where "changes" => ?change_peer_v2.get_changes(), ); let req = new_change_peer_v2_request(change_peer_v2.take_changes().into()); - send_admin_request(&logger, &router, region_id, epoch, peer, req); + send_admin_request(&logger, &router, region_id, epoch, peer, req, None); } else if resp.has_transfer_leader() { PD_HEARTBEAT_COUNTER_VEC .with_label_values(&["transfer leader"]) @@ -217,7 +217,7 @@ where transfer_leader.take_peer(), transfer_leader.take_peers().into(), ); - send_admin_request(&logger, &router, region_id, epoch, peer, req); + send_admin_request(&logger, &router, region_id, epoch, peer, req, None); } else if resp.has_split_region() { // TODO info!(logger, "pd asks for split but ignored"); diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index 3cb85f6698c..cb7c3ad9308 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -9,6 +9,7 @@ use pd_client::PdClient; use slog::{info, warn}; use super::{requests::*, Runner}; +use crate::router::CmdResChannel; fn new_batch_split_region_request( split_keys: Vec>, @@ -42,6 +43,7 @@ where split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + ch: CmdResChannel, ) { if split_keys.is_empty() { info!(self.logger, "empty split key, skip ask batch split"; @@ -71,7 +73,7 @@ where ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); - send_admin_request(&logger, &router, region_id, epoch, peer, req); + send_admin_request(&logger, &router, region_id, epoch, peer, req, Some(ch)); } Err(e) => { warn!( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 311e7e58a12..bad3ac2077d 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5553,7 +5553,34 @@ where "split_keys" => %KeysInfoFormatter(split_keys.iter()), "source" => source, ); - if let Err(e) = self.validate_split_region(®ion_epoch, &split_keys) { + + if !self.fsm.peer.is_leader() { + // region on this store is no longer leader, skipped. + info!( + "not leader, skip proposing split"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + ); + cb.invoke_with_response(new_error(Error::NotLeader( + self.region_id(), + self.fsm.peer.get_peer_from_cache(self.fsm.peer.leader_id()), + ))); + return; + } + if let Err(e) = util::validate_split_region( + self.fsm.region_id(), + self.fsm.peer_id(), + self.region(), + ®ion_epoch, + &split_keys, + ) { + info!( + "invalid split request"; + "err" => ?e, + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "source" => %source + ); cb.invoke_with_response(new_error(e)); return; } @@ -5583,70 +5610,6 @@ where } } - fn validate_split_region( - &mut self, - epoch: &metapb::RegionEpoch, - split_keys: &[Vec], - ) -> Result<()> { - if split_keys.is_empty() { - error!( - "no split key is specified."; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - ); - return Err(box_err!("{} no split key is specified.", self.fsm.peer.tag)); - } - for key in split_keys { - if key.is_empty() { - error!( - "split key should not be empty!!!"; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - ); - return Err(box_err!( - "{} split key should not be empty", - self.fsm.peer.tag - )); - } - } - if !self.fsm.peer.is_leader() { - // region on this store is no longer leader, skipped. - info!( - "not leader, skip."; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - ); - return Err(Error::NotLeader( - self.region_id(), - self.fsm.peer.get_peer_from_cache(self.fsm.peer.leader_id()), - )); - } - - let region = self.fsm.peer.region(); - let latest_epoch = region.get_region_epoch(); - - // This is a little difference for `check_region_epoch` in region split case. - // Here we just need to check `version` because `conf_ver` will be update - // to the latest value of the peer, and then send to PD. - if latest_epoch.get_version() != epoch.get_version() { - info!( - "epoch changed, retry later"; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - "prev_epoch" => ?region.get_region_epoch(), - "epoch" => ?epoch, - ); - return Err(Error::EpochNotMatch( - format!( - "{} epoch changed {:?} != {:?}, retry later", - self.fsm.peer.tag, latest_epoch, epoch - ), - vec![region.to_owned()], - )); - } - Ok(()) - } - fn on_approximate_region_size(&mut self, size: u64) { self.fsm.peer.approximate_size = Some(size); self.register_split_region_check_tick(); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 78f024997cf..2d27b56fda5 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1621,6 +1621,47 @@ impl LatencyInspector { } } +pub fn validate_split_region( + region_id: u64, + peer_id: u64, + region: &Region, + epoch: &RegionEpoch, + split_keys: &[Vec], +) -> Result<()> { + if split_keys.is_empty() { + return Err(box_err!( + "[region {}] {} no split key is specified.", + region_id, + peer_id + )); + } + + let latest_epoch = region.get_region_epoch(); + // This is a little difference for `check_region_epoch` in region split case. + // Here we just need to check `version` because `conf_ver` will be update + // to the latest value of the peer, and then send to PD. + if latest_epoch.get_version() != epoch.get_version() { + return Err(Error::EpochNotMatch( + format!( + "[region {}] {} epoch changed {:?} != {:?}, retry later", + region_id, peer_id, latest_epoch, epoch + ), + vec![region.to_owned()], + )); + } + for key in split_keys { + if key.is_empty() { + return Err(box_err!( + "[region {}] {} split key should not be empty", + region_id, + peer_id + )); + } + check_key_in_region(key, region)?; + } + Ok(()) +} + #[cfg(test)] mod tests { use std::thread; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 470e3a41861..ffc5272c673 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -92,7 +92,6 @@ use tikv::{ read_pool::{build_yatp_read_pool, ReadPool, ReadPoolConfigManager}, server::{ config::{Config as ServerConfig, ServerConfigManager}, - create_raft_storage, gc_worker::{AutoGcConfig, GcWorker}, lock_manager::LockManager, raftkv::ReplicaReadLockChecker, @@ -108,7 +107,7 @@ use tikv::{ config_manager::StorageConfigManger, mvcc::MvccConsistencyCheckObserver, txn::flow_controller::{EngineFlowController, FlowController}, - Engine, + Engine, Storage, }, }; use tikv_util::{ @@ -797,7 +796,7 @@ where storage_read_pools.handle() }; - let storage = create_raft_storage::<_, _, _, F, _>( + let storage = Storage::<_, _, F>::from_engine( engines.engine.clone(), &self.config.storage, storage_read_pool_handle, @@ -825,7 +824,7 @@ where let (resolver, state) = resolve::new_resolver( self.pd_client.clone(), &self.background_worker, - storage.get_engine().raft_extension().clone(), + storage.get_engine().raft_extension(), ); self.resolver = Some(resolver); @@ -1216,7 +1215,7 @@ where self.kv_statistics.clone(), self.raft_statistics.clone(), servers.server.get_debug_thread_pool().clone(), - engines.engine.raft_extension().clone(), + engines.engine.raft_extension(), self.cfg_controller.as_ref().unwrap().clone(), ); if servers diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index e3cfb298c59..12d9982fea6 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -52,7 +52,6 @@ use tikv::{ import::{ImportSstService, SstImporter}, read_pool::ReadPool, server::{ - create_raft_storage, gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, @@ -66,7 +65,7 @@ use tikv::{ self, kv::{FakeExtension, SnapContext}, txn::flow_controller::{EngineFlowController, FlowController}, - Engine, + Engine, Storage, }, }; use tikv_util::{ @@ -401,8 +400,8 @@ impl ServerCluster { cfg.quota.max_delay_duration, cfg.quota.enable_auto_tune, )); - let extension = engine.raft_extension().clone(); - let store = create_raft_storage::<_, _, _, F, _>( + let extension = engine.raft_extension(); + let store = Storage::<_, _, F>::from_engine( engine, &cfg.storage, storage_read_pool.handle(), diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index bf277282bd8..5af54ee61b6 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -311,7 +311,7 @@ pub trait Engine: Send + Clone + 'static { type RaftExtension: raft_extension::RaftExtension = FakeExtension; /// Get the underlying raft extension. - fn raft_extension(&self) -> &Self::RaftExtension { + fn raft_extension(&self) -> Self::RaftExtension { unimplemented!() } diff --git a/components/tikv_kv/src/mock_engine.rs b/components/tikv_kv/src/mock_engine.rs index dc812e84d93..69a61d58963 100644 --- a/components/tikv_kv/src/mock_engine.rs +++ b/components/tikv_kv/src/mock_engine.rs @@ -154,7 +154,7 @@ impl Engine for MockEngine { } type RaftExtension = ::RaftExtension; - fn raft_extension(&self) -> &Self::RaftExtension { + fn raft_extension(&self) -> Self::RaftExtension { self.base.raft_extension() } diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 065766ae254..21099974d2d 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -233,8 +233,8 @@ impl Engine for RocksEngine { } type RaftExtension = RE; - fn raft_extension(&self) -> &Self::RaftExtension { - &self.ext + fn raft_extension(&self) -> Self::RaftExtension { + self.ext.clone() } fn modify_on_kv_engine(&self, region_modifies: HashMap>) -> Result<()> { diff --git a/src/server/mod.rs b/src/server/mod.rs index 0e4a3616a6c..0bb6da62ac7 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -32,10 +32,11 @@ pub use self::{ config::{Config, ServerConfigManager, DEFAULT_CLUSTER_ID, DEFAULT_LISTENING_ADDR}, errors::{Error, Result}, metrics::{CONFIG_ROCKSDB_GAUGE, CPU_CORES_QUOTA_GAUGE, MEM_TRACE_SUM_GAUGE}, - node::{create_raft_storage, Node}, + node::Node, proxy::{build_forward_option, get_target_address, Proxy}, raft_client::{ConnectionBuilder, RaftClient}, raftkv::RaftKv, + raftkv2::{NodeV2, RaftKv2}, resolve::{PdStoreAddrResolver, StoreAddrResolver}, server::{Server, GRPC_THREAD_PREFIX}, transport::ServerTransport, diff --git a/src/server/node.rs b/src/server/node.rs index 0b654921f59..e36e980e1d3 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -6,7 +6,7 @@ use std::{ time::Duration, }; -use api_version::{api_v2::TIDB_RANGES_COMPLEMENT, KvFormat}; +use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; use engine_traits::{Engines, Iterable, KvEngine, RaftEngine, DATA_CFS, DATA_KEY_PREFIX_LEN}; @@ -14,10 +14,9 @@ use grpcio_health::HealthService; use kvproto::{ kvrpcpb::ApiVersion, metapb, raft_serverpb::StoreIdent, replication_modepb::ReplicationStatus, }; -use pd_client::{Error as PdError, FeatureGate, PdClient, INVALID_ID}; +use pd_client::{Error as PdError, PdClient, INVALID_ID}; use raftstore::{ coprocessor::dispatcher::CoprocessorHost, - router::{LocalReadRouter, RaftStoreRouter}, store::{ self, fsm::{store::StoreMeta, ApplyRouter, RaftBatchSystem, RaftRouter}, @@ -25,68 +24,69 @@ use raftstore::{ RefreshConfigTask, SnapManager, SplitCheckTask, Transport, }, }; -use resource_metering::{CollectorRegHandle, ResourceTagFactory}; +use resource_metering::CollectorRegHandle; use tikv_util::{ config::VersionTrack, - quota_limiter::QuotaLimiter, worker::{LazyWorker, Scheduler, Worker}, }; -use super::{RaftKv, Result}; -use crate::{ - import::SstImporter, - read_pool::ReadPoolHandle, - server::Config as ServerConfig, - storage::{ - config::Config as StorageConfig, kv::FlowStatsReporter, lock_manager, - txn::flow_controller::FlowController, DynamicConfigs as StorageDynamicConfigs, Storage, - }, -}; +use super::Result; +use crate::{import::SstImporter, server::Config as ServerConfig}; const MAX_CHECK_CLUSTER_BOOTSTRAPPED_RETRY_COUNT: u64 = 60; const CHECK_CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs(3); -/// Creates a new storage engine which is backed by the Raft consensus -/// protocol. -pub fn create_raft_storage< - S, - EK, - R: FlowStatsReporter, - F: KvFormat, - LM: lock_manager::LockManager, ->( - engine: RaftKv, - cfg: &StorageConfig, - read_pool: ReadPoolHandle, - lock_mgr: LM, - concurrency_manager: ConcurrencyManager, - dynamic_configs: StorageDynamicConfigs, - flow_controller: Arc, - reporter: R, - resource_tag_factory: ResourceTagFactory, - quota_limiter: Arc, - feature_gate: FeatureGate, - causal_ts_provider: Option>, -) -> Result, LM, F>> -where - S: RaftStoreRouter + LocalReadRouter + 'static, - EK: KvEngine, -{ - let store = Storage::from_engine( - engine, - cfg, - read_pool, - lock_mgr, - concurrency_manager, - dynamic_configs, - flow_controller, - reporter, - resource_tag_factory, - quota_limiter, - feature_gate, - causal_ts_provider, - )?; - Ok(store) +pub(crate) fn init_store(store: Option, cfg: &ServerConfig) -> metapb::Store { + let mut store = store.unwrap_or_default(); + store.set_id(INVALID_ID); + if store.get_address().is_empty() { + if cfg.advertise_addr.is_empty() { + store.set_address(cfg.addr.clone()); + if store.get_peer_address().is_empty() { + store.set_peer_address(cfg.addr.clone()); + } + } else { + store.set_address(cfg.advertise_addr.clone()); + if store.get_peer_address().is_empty() { + store.set_peer_address(cfg.advertise_addr.clone()); + } + } + } + if store.get_status_address().is_empty() { + if cfg.advertise_status_addr.is_empty() { + store.set_status_address(cfg.status_addr.clone()); + } else { + store.set_status_address(cfg.advertise_status_addr.clone()) + } + } + if store.get_version().is_empty() { + store.set_version(env!("CARGO_PKG_VERSION").to_string()); + } + + if let Ok(path) = std::env::current_exe() { + if let Some(path) = path.parent() { + store.set_deploy_path(path.to_string_lossy().to_string()); + } + }; + + store.set_start_timestamp(chrono::Local::now().timestamp()); + if store.get_git_hash().is_empty() { + store.set_git_hash( + option_env!("TIKV_BUILD_GIT_HASH") + .unwrap_or("Unknown git hash") + .to_string(), + ); + } + + let mut labels = Vec::new(); + for (k, v) in &cfg.labels { + let mut label = metapb::StoreLabel::default(); + label.set_key(k.to_owned()); + label.set_value(v.to_owned()); + labels.push(label); + } + store.set_labels(labels.into()); + store } /// A wrapper for the raftstore which runs Multi-Raft. @@ -123,58 +123,7 @@ where health_service: Option, default_store: Option, ) -> Node { - let mut store = match default_store { - None => metapb::Store::default(), - Some(s) => s, - }; - store.set_id(INVALID_ID); - if store.get_address().is_empty() { - if cfg.advertise_addr.is_empty() { - store.set_address(cfg.addr.clone()); - if store.get_peer_address().is_empty() { - store.set_peer_address(cfg.addr.clone()); - } - } else { - store.set_address(cfg.advertise_addr.clone()); - if store.get_peer_address().is_empty() { - store.set_peer_address(cfg.advertise_addr.clone()); - } - } - } - if store.get_status_address().is_empty() { - if cfg.advertise_status_addr.is_empty() { - store.set_status_address(cfg.status_addr.clone()); - } else { - store.set_status_address(cfg.advertise_status_addr.clone()) - } - } - if store.get_version().is_empty() { - store.set_version(env!("CARGO_PKG_VERSION").to_string()); - } - - if let Ok(path) = std::env::current_exe() { - if let Some(path) = path.parent() { - store.set_deploy_path(path.to_string_lossy().to_string()); - } - }; - - store.set_start_timestamp(chrono::Local::now().timestamp()); - if store.get_git_hash().is_empty() { - store.set_git_hash( - option_env!("TIKV_BUILD_GIT_HASH") - .unwrap_or("Unknown git hash") - .to_string(), - ); - } - - let mut labels = Vec::new(); - for (k, v) in &cfg.labels { - let mut label = metapb::StoreLabel::default(); - label.set_key(k.to_owned()); - label.set_value(v.to_owned()); - labels.push(label); - } - store.set_labels(labels.into()); + let store = init_store(default_store, cfg); Node { cluster_id: cfg.cluster_id, diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 607d5af71f3..c50c42c9fc6 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -364,8 +364,8 @@ where type RaftExtension = RaftRouterWrap; #[inline] - fn raft_extension(&self) -> &Self::RaftExtension { - &self.router + fn raft_extension(&self) -> Self::RaftExtension { + self.router.clone() } fn modify_on_kv_engine( diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 5bcdd131d72..f850cc74d19 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -1,89 +1,36 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{mem, pin::Pin, task::Poll}; +mod node; +mod raft_extension; +use std::{ + mem, + pin::Pin, + sync::{Arc, RwLock}, + task::Poll, +}; + +use collections::HashSet; use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; use futures::{Future, Stream, StreamExt}; -use kvproto::{ - raft_cmdpb::{CmdType, RaftCmdRequest, Request}, - raft_serverpb::RaftMessage, -}; +use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; +pub use node::NodeV2; use raftstore::store::RegionSnapshot; use raftstore_v2::{ router::{ message::SimpleWrite, CmdResChannelBuilder, CmdResEvent, CmdResStream, PeerMsg, RaftRouter, }, - SimpleWriteEncoder, StoreRouter, + SimpleWriteEncoder, }; -use tikv_kv::{Modify, RaftExtension, WriteEvent}; +use tikv_kv::{Modify, WriteEvent}; use tikv_util::{codec::number::NumberEncoder, time::Instant}; -use txn_types::WriteBatchFlags; +use txn_types::{TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::{ metrics::{ASYNC_REQUESTS_COUNTER_VEC, ASYNC_REQUESTS_DURATIONS_VEC}, raftkv::{get_status_kind_from_engine_error, new_request_header}, }; -#[derive(Clone)] -pub struct RaftExtensionImpl { - router: StoreRouter, -} - -impl RaftExtension for RaftExtensionImpl { - #[inline] - fn feed(&self, msg: RaftMessage, key_message: bool) { - let region_id = msg.get_region_id(); - let msg_ty = msg.get_message().get_msg_type(); - // Channel full and region not found are ignored unless it's a key message. - if let Err(e) = self.router.send_raft_message(Box::new(msg)) && key_message { - error!("failed to send raft message"; "region_id" => region_id, "msg_ty" => ?msg_ty, "err" => ?e); - } - } - - fn report_reject_message(&self, _region_id: u64, _from_peer_id: u64) { - // TODO:reject the message on connection side instead of go through - // raft layer. - } - - fn report_peer_unreachable(&self, region_id: u64, to_peer_id: u64) { - let _ = self - .router - .send(region_id, PeerMsg::PeerUnreachable { to_peer_id }); - } - - fn report_store_unreachable(&self, _store_id: u64) {} - - fn report_snapshot_status( - &self, - _region_id: u64, - _to_peer_id: u64, - _status: raft::SnapshotStatus, - ) { - } - - fn report_resolved(&self, _store_id: u64, _group_id: u64) {} - - fn split( - &self, - _region_id: u64, - _region_epoch: kvproto::metapb::RegionEpoch, - _split_keys: Vec>, - _source: String, - ) -> futures::future::BoxFuture<'static, tikv_kv::Result>> { - Box::pin(async move { Err(box_err!("raft split is not supported")) }) - } - - fn query_region( - &self, - _region_id: u64, - ) -> futures::future::BoxFuture< - 'static, - tikv_kv::Result, - > { - Box::pin(async move { Err(box_err!("query region is not supported")) }) - } -} - struct Transform { resp: CmdResStream, early_err: Option, @@ -123,12 +70,25 @@ impl Stream for Transform { #[derive(Clone)] pub struct RaftKv2 { router: RaftRouter, + txn_extra_scheduler: Option>, + region_leaders: Arc>>, } impl RaftKv2 { #[allow(unused)] - pub fn new(router: RaftRouter) -> RaftKv2 { - RaftKv2 { router } + pub fn new( + router: RaftRouter, + region_leaders: Arc>>, + ) -> RaftKv2 { + RaftKv2 { + router, + region_leaders, + txn_extra_scheduler: None, + } + } + + pub fn set_txn_extra_scheduler(&mut self, txn_extra_scheduler: Arc) { + self.txn_extra_scheduler = Some(txn_extra_scheduler); } } @@ -141,7 +101,11 @@ impl tikv_kv::Engine for RaftKv2 { None } - type RaftExtension = RaftExtensionImpl; + type RaftExtension = raft_extension::Extension; + #[inline] + fn raft_extension(&self) -> Self::RaftExtension { + raft_extension::Extension::new(self.router.store_router().clone()) + } fn modify_on_kv_engine( &self, @@ -282,8 +246,8 @@ impl tikv_kv::Engine for RaftKv2 { let res = self .router .store_router() - .send(region_id, msg) - .map_err(|e| tikv_kv::Error::from(raftstore_v2::Error::from(e))); + .check_send(region_id, msg) + .map_err(tikv_kv::Error::from); (Transform { resp: CmdResStream::new(sub), early_err: res.err(), @@ -304,4 +268,22 @@ impl tikv_kv::Engine for RaftKv2 { } }) } + + #[inline] + fn precheck_write_with_ctx(&self, ctx: &kvproto::kvrpcpb::Context) -> tikv_kv::Result<()> { + let region_id = ctx.get_region_id(); + match self.region_leaders.read().unwrap().get(®ion_id) { + Some(_) => Ok(()), + None => Err(raftstore_v2::Error::NotLeader(region_id, None).into()), + } + } + + #[inline] + fn schedule_txn_extra(&self, txn_extra: TxnExtra) { + if let Some(tx) = self.txn_extra_scheduler.as_ref() { + if !txn_extra.is_empty() { + tx.schedule(txn_extra); + } + } + } } diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs new file mode 100644 index 00000000000..59daa053aa3 --- /dev/null +++ b/src/server/raftkv2/node.rs @@ -0,0 +1,210 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::{Arc, Mutex}; + +use causal_ts::CausalTsProviderImpl; +use concurrency_manager::ConcurrencyManager; +use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; +use kvproto::{metapb, replication_modepb::ReplicationStatus}; +use pd_client::PdClient; +use raftstore::store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}; +use raftstore_v2::{router::RaftRouter, Bootstrap, StoreSystem}; +use slog::{info, o, Logger}; +use tikv_util::{config::VersionTrack, worker::Worker}; + +use crate::server::{node::init_store, Result}; + +// TODO: we will rename another better name like RaftStore later. +pub struct NodeV2 { + cluster_id: u64, + store: metapb::Store, + store_cfg: Arc>, + system: StoreSystem, + has_started: bool, + + pd_client: Arc, + state: Arc>, + bg_worker: Worker, + registry: TabletRegistry, + logger: Logger, +} + +impl NodeV2 +where + C: PdClient, + EK: KvEngine, + ER: RaftEngine, +{ + /// Creates a new Node. + pub fn new( + system: StoreSystem, + cfg: &crate::server::Config, + store_cfg: Arc>, + pd_client: Arc, + state: Arc>, + bg_worker: Worker, + store: Option, + registry: TabletRegistry, + ) -> NodeV2 { + let store = init_store(store, cfg); + + NodeV2 { + cluster_id: cfg.cluster_id, + store, + store_cfg, + pd_client, + system, + has_started: false, + state, + bg_worker, + registry, + logger: slog_global::borrow_global().new(o!()), + } + } + + pub fn try_bootstrap_store(&mut self, raft_engine: &ER) -> Result<()> { + let store_id = Bootstrap::new( + raft_engine, + self.cluster_id, + &*self.pd_client, + self.logger.clone(), + ) + .bootstrap_store()?; + self.store.set_id(store_id); + Ok(()) + } + + /// Starts the Node. It tries to bootstrap cluster if the cluster is not + /// bootstrapped yet. Then it spawns a thread to run the raftstore in + /// background. + pub fn start( + &mut self, + raft_engine: ER, + trans: T, + router: &RaftRouter, + snap_mgr: TabletSnapManager, + concurrency_manager: ConcurrencyManager, + causal_ts_provider: Option>, // used for rawkv apiv2 + ) -> Result<()> + where + T: Transport + 'static, + { + let store_id = self.id(); + { + let mut meta = router.store_meta().lock().unwrap(); + meta.store_id = Some(store_id); + } + if let Some(region) = Bootstrap::new( + &raft_engine, + self.cluster_id, + &*self.pd_client, + self.logger.clone(), + ) + .bootstrap_first_region(&self.store, store_id)? + { + let path = self + .registry + .tablet_path(region.get_id(), RAFT_INIT_LOG_INDEX); + let ctx = TabletContext::new(®ion, Some(RAFT_INIT_LOG_INDEX)); + // TODO: make follow line can recover from abort. + self.registry + .tablet_factory() + .open_tablet(ctx, &path) + .unwrap(); + } + + // Put store only if the cluster is bootstrapped. + info!(self.logger, "put store to PD"; "store" => ?&self.store); + let status = self.pd_client.put_store(self.store.clone())?; + self.load_all_stores(status); + + self.start_store( + raft_engine, + trans, + router, + snap_mgr, + concurrency_manager, + causal_ts_provider, + )?; + + Ok(()) + } + + /// Gets the store id. + pub fn id(&self) -> u64 { + self.store.get_id() + } + + /// Gets a copy of Store which is registered to Pd. + pub fn store(&self) -> metapb::Store { + self.store.clone() + } + + // TODO: support updating dynamic configuration. + + // TODO: check api version. + // Do we really need to do the check giving we don't consider support upgrade + // ATM? + + fn load_all_stores(&mut self, status: Option) { + info!(self.logger, "initializing replication mode"; "status" => ?status, "store_id" => self.store.id); + let stores = match self.pd_client.get_all_stores(false) { + Ok(stores) => stores, + Err(e) => panic!("failed to load all stores: {:?}", e), + }; + let mut state = self.state.lock().unwrap(); + if let Some(s) = status { + state.set_status(s); + } + for mut store in stores { + state + .group + .register_store(store.id, store.take_labels().into()); + } + } + + fn start_store( + &mut self, + raft_engine: ER, + trans: T, + router: &RaftRouter, + snap_mgr: TabletSnapManager, + concurrency_manager: ConcurrencyManager, + causal_ts_provider: Option>, // used for rawkv apiv2 + ) -> Result<()> + where + T: Transport + 'static, + { + let store_id = self.store.get_id(); + info!(self.logger, "start raft store thread"; "store_id" => store_id); + + if self.has_started { + return Err(box_err!("{} is already started", store_id)); + } + self.has_started = true; + let cfg = self.store_cfg.clone(); + + self.system.start( + store_id, + cfg, + raft_engine, + self.registry.clone(), + trans, + self.pd_client.clone(), + router.store_router(), + router.store_meta().clone(), + snap_mgr, + concurrency_manager, + causal_ts_provider, + )?; + Ok(()) + } + + /// Stops the Node. + pub fn stop(&mut self) { + let store_id = self.store.get_id(); + info!(self.logger, "stop raft store thread"; "store_id" => store_id); + self.system.shutdown(); + self.bg_worker.stop(); + } +} diff --git a/src/server/raftkv2/raft_extension.rs b/src/server/raftkv2/raft_extension.rs new file mode 100644 index 00000000000..f2f433999b9 --- /dev/null +++ b/src/server/raftkv2/raft_extension.rs @@ -0,0 +1,109 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::raft_serverpb::RaftMessage; +use raftstore_v2::{ + router::{DebugInfoChannel, PeerMsg, StoreMsg}, + StoreRouter, +}; + +#[derive(Clone)] +pub struct Extension { + router: StoreRouter, +} + +impl Extension { + pub fn new(router: StoreRouter) -> Self { + Extension { router } + } +} + +impl tikv_kv::RaftExtension for Extension { + #[inline] + fn feed(&self, msg: RaftMessage, key_message: bool) { + let region_id = msg.get_region_id(); + let msg_ty = msg.get_message().get_msg_type(); + // Channel full and region not found are ignored unless it's a key message. + if let Err(e) = self.router.send_raft_message(Box::new(msg)) && key_message { + error!("failed to send raft message"; "region_id" => region_id, "msg_ty" => ?msg_ty, "err" => ?e); + } + } + + #[inline] + fn report_reject_message(&self, _region_id: u64, _from_peer_id: u64) { + // TODO:reject the message on connection side instead of go through + // raft layer. + } + + #[inline] + fn report_peer_unreachable(&self, region_id: u64, to_peer_id: u64) { + let _ = self + .router + .send(region_id, PeerMsg::PeerUnreachable { to_peer_id }); + } + + #[inline] + fn report_store_unreachable(&self, to_store_id: u64) { + let _ = self + .router + .send_control(StoreMsg::StoreUnreachable { to_store_id }); + } + + fn report_snapshot_status( + &self, + region_id: u64, + to_peer_id: u64, + status: raft::SnapshotStatus, + ) { + let _ = self + .router + .force_send(region_id, PeerMsg::SnapshotSent { to_peer_id, status }); + } + + fn report_resolved(&self, _store_id: u64, _group_id: u64) { + // TODO: support commit group + } + + fn split( + &self, + region_id: u64, + region_epoch: kvproto::metapb::RegionEpoch, + split_keys: Vec>, + source: String, + ) -> futures::future::BoxFuture<'static, tikv_kv::Result>> { + let (msg, sub) = PeerMsg::request_split(region_epoch, split_keys, source); + let res = self.router.check_send(region_id, msg); + Box::pin(async move { + res?; + let mut resp = match sub.result().await { + Some(r) => r, + None => return Err(box_err!("split is aborted")), + }; + if !resp.get_header().has_error() { + let regions = resp.mut_admin_response().mut_splits().take_regions(); + Ok(regions.into()) + } else { + Err(tikv_kv::Error::from(resp.mut_header().take_error())) + } + }) + } + + fn query_region( + &self, + region_id: u64, + ) -> futures::future::BoxFuture< + 'static, + tikv_kv::Result, + > { + let (ch, sub) = DebugInfoChannel::pair(); + let msg = PeerMsg::QueryDebugInfo(ch); + let res = self.router.check_send(region_id, msg); + Box::pin(async move { + res?; + match sub.result().await { + Some(res) => Ok(res), + None => Err(box_err!("query region is aborted")), + } + }) + } +} diff --git a/src/server/server.rs b/src/server/server.rs index 1921483e37b..428aee31090 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -122,7 +122,7 @@ where let snap_worker = Worker::new("snap-handler"); let lazy_worker = snap_worker.lazy_build("snap-handler"); - let raft_ext = storage.get_engine().raft_extension().clone(); + let raft_ext = storage.get_engine().raft_extension(); let proxy = Proxy::new(security_mgr.clone(), &env, Arc::new(cfg.value().clone())); let kv_service = KvService::new( diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index db50dfe459e..88ed0c99443 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -590,7 +590,7 @@ impl Tikv for Service { sink: ClientStreamingSink, ) { let store_id = self.store_id; - let ch = self.storage.get_engine().raft_extension().clone(); + let ch = self.storage.get_engine().raft_extension(); let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; let res = async move { @@ -633,7 +633,7 @@ impl Tikv for Service { ) { info!("batch_raft RPC is called, new gRPC stream established"); let store_id = self.store_id; - let ch = self.storage.get_engine().raft_extension().clone(); + let ch = self.storage.get_engine().raft_extension(); let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; let res = async move { From 186e242b5169b9cbc932ce5f6fb657108650a470 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 21 Dec 2022 20:28:55 +0800 Subject: [PATCH 0419/1149] raftstore-v2: implement a simplified version of CoprocessorHost (#13901) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang Signed-off-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Co-authored-by: Xinye Tao --- components/raftstore-v2/src/batch/store.rs | 30 ++++++++++--- components/raftstore-v2/src/fsm/mod.rs | 2 +- components/raftstore-v2/src/fsm/store.rs | 16 ++++++- components/raftstore-v2/src/lib.rs | 2 +- .../operation/command/admin/conf_change.rs | 8 ++++ .../src/operation/command/admin/split.rs | 13 +++++- components/raftstore-v2/src/operation/life.rs | 17 +++++++- .../raftstore-v2/src/operation/ready/mod.rs | 19 +++++++- .../src/operation/ready/snapshot.rs | 16 +++++-- components/raftstore-v2/src/raft/peer.rs | 43 ++++++++++++++++--- .../tests/integrations/cluster.rs | 22 +++++++--- src/server/lock_manager/deadlock.rs | 15 ++++++- src/server/lock_manager/mod.rs | 5 +++ src/server/raftkv2/node.rs | 6 ++- 14 files changed, 183 insertions(+), 31 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 98075969c66..8a1f60f3717 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -20,10 +20,13 @@ use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType}; use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; use pd_client::PdClient; -use raft::INVALID_ID; -use raftstore::store::{ - fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, - StoreWriters, TabletSnapManager, Transport, WriteSenders, +use raft::{StateRole, INVALID_ID}; +use raftstore::{ + coprocessor::RegionChangeEvent, + store::{ + fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, + StoreWriters, TabletSnapManager, Transport, WriteSenders, + }, }; use slog::Logger; use tikv_util::{ @@ -39,7 +42,10 @@ use tikv_util::{ use time::Timespec; use crate::{ - fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, + fsm::{ + LockManagerNotifier, PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, + StoreMeta, + }, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, worker::pd, @@ -76,6 +82,8 @@ pub struct StoreContext { pub snap_mgr: TabletSnapManager, pub pd_scheduler: Scheduler, + + pub lock_manager_notifier: Arc, } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -229,6 +237,7 @@ struct StorePollerBuilder { logger: Logger, store_meta: Arc>, snap_mgr: TabletSnapManager, + lock_manager_notifier: Arc, } impl StorePollerBuilder { @@ -245,6 +254,7 @@ impl StorePollerBuilder { logger: Logger, store_meta: Arc>, snap_mgr: TabletSnapManager, + lock_manager_notifier: Arc, ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; let max_pool_size = std::cmp::max( @@ -270,6 +280,7 @@ impl StorePollerBuilder { write_senders: store_writers.senders(), store_meta, snap_mgr, + lock_manager_notifier, } } @@ -291,6 +302,12 @@ impl StorePollerBuilder { Some(p) => p, None => return Ok(()), }; + self.lock_manager_notifier.on_region_changed( + storage.region_state().get_region(), + RegionChangeEvent::Create, + StateRole::Follower, + ); + let (sender, peer_fsm) = PeerFsm::new(&cfg, &self.tablet_registry, storage)?; meta.region_read_progress .insert(region_id, peer_fsm.as_ref().peer().read_progress().clone()); @@ -345,6 +362,7 @@ where self_disk_usage: DiskUsage::Normal, snap_mgr: self.snap_mgr.clone(), pd_scheduler: self.pd_scheduler.clone(), + lock_manager_notifier: self.lock_manager_notifier.clone(), }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); StorePoller::new(poll_ctx, cfg_tracker) @@ -392,6 +410,7 @@ impl StoreSystem { snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 + lock_manager_notifier: Arc, ) -> Result<()> where T: Transport + 'static, @@ -445,6 +464,7 @@ impl StoreSystem { self.logger.clone(), store_meta.clone(), snap_mgr, + lock_manager_notifier, ); self.workers = Some(workers); let peers = builder.init()?; diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index b3d0e0483ba..442c6b050ce 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -11,4 +11,4 @@ mod store; pub use apply::{ApplyFsm, ApplyResReporter, ApplyScheduler}; pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; -pub use store::{Store, StoreFsm, StoreFsmDelegate, StoreMeta}; +pub use store::{LockManagerNotifier, Store, StoreFsm, StoreFsmDelegate, StoreMeta}; diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index bd31de69496..6e2dfe4a75f 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -6,7 +6,12 @@ use batch_system::Fsm; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use futures::{compat::Future01CompatExt, FutureExt}; -use raftstore::store::{Config, ReadDelegate, RegionReadProgressRegistry}; +use kvproto::metapb::Region; +use raft::StateRole; +use raftstore::{ + coprocessor::{RegionChangeEvent, RoleChange}, + store::{Config, ReadDelegate, RegionReadProgressRegistry}, +}; use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, @@ -165,3 +170,12 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { } } } + +// A simplified version of CoprocessorHost used to convey information to +// LockManager only. +// It is replaced by CoprocessorHost in the future. +pub trait LockManagerNotifier: Send + Sync { + fn on_role_change(&self, region: &Region, role_change: RoleChange); + + fn on_region_changed(&self, region: &Region, event: RegionChangeEvent, role: StateRole); +} diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index d8327549da6..cb769b6594a 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -38,6 +38,6 @@ mod worker; pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; -pub use fsm::StoreMeta; +pub use fsm::{LockManagerNotifier, StoreMeta}; pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 4bda7eedf32..ec0b78e717a 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -18,6 +18,7 @@ use kvproto::{ use protobuf::Message; use raft::prelude::*; use raftstore::{ + coprocessor::{RegionChangeEvent, RegionChangeReason}, store::{ metrics::{PEER_ADMIN_CMD_COUNTER_VEC, PEER_PROPOSE_LOG_SIZE_HISTOGRAM}, util::{self, ChangePeerI, ConfChangeKind}, @@ -146,6 +147,13 @@ impl Peer { let remove_self = conf_change.region_state.get_state() == PeerState::Tombstone; self.storage_mut() .set_region_state(conf_change.region_state); + + ctx.lock_manager_notifier.on_region_changed( + self.region(), + RegionChangeEvent::Update(RegionChangeReason::ChangePeer), + self.get_role(), + ); + if self.is_leader() { info!( self.logger, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 8bf23da0fd6..870c203f07d 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -40,7 +40,10 @@ use kvproto::{ use protobuf::Message; use raft::{prelude::Snapshot, INVALID_ID}; use raftstore::{ - coprocessor::split_observer::{is_valid_split_key, strip_timestamp_if_exists}, + coprocessor::{ + split_observer::{is_valid_split_key, strip_timestamp_if_exists}, + RegionChangeReason, + }, store::{ cmd_resp, fsm::apply::validate_batch_split, @@ -415,7 +418,13 @@ impl Peer { { let mut meta = store_ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&derived.get_id()).unwrap(); - self.set_region(reader, derived.clone(), tablet_index); + self.set_region( + &store_ctx.lock_manager_notifier, + reader, + derived.clone(), + RegionChangeReason::Split, + tablet_index, + ); } self.post_split(); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index d9f706c32a1..73db4e760d1 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -19,7 +19,10 @@ use kvproto::{ metapb::Region, raft_serverpb::{PeerState, RaftMessage}, }; -use raftstore::store::{util, WriteTask}; +use raftstore::{ + coprocessor::RegionChangeEvent, + store::{util, WriteTask}, +}; use slog::{debug, error, info, warn}; use tikv_util::store::find_peer; @@ -291,7 +294,11 @@ impl Peer { /// /// After destroy is finished, `finish_destroy` should be called to clean up /// memory states. - pub fn start_destroy(&mut self, write_task: &mut WriteTask) { + pub fn start_destroy( + &mut self, + ctx: &mut StoreContext, + write_task: &mut WriteTask, + ) { let entry_storage = self.storage().entry_storage(); if self.postponed_destroy() { return; @@ -319,6 +326,12 @@ impl Peer { lb.put_region_state(region_id, applied_index, ®ion_state) .unwrap(); self.destroy_progress_mut().start(); + + ctx.lock_manager_notifier.on_region_changed( + self.region(), + RegionChangeEvent::Destroy, + self.get_role(), + ); } /// Do clean up for destroy. The peer is permanently destroyed when diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index f9a6c3a34d4..9463aae3d73 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -29,7 +29,10 @@ use error_code::ErrorCodeExt; use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; -use raftstore::store::{util, FetchedLogs, ReadProgress, Transport, WriteTask}; +use raftstore::{ + coprocessor::RoleChange, + store::{util, FetchedLogs, ReadProgress, Transport, WriteTask}, +}; use slog::{debug, error, trace, warn}; use tikv_util::{ store::find_peer, @@ -68,6 +71,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, if self.fsm.peer_mut().tick() { self.fsm.peer_mut().set_has_ready(); } + self.fsm.peer_mut().refresh_lead_transferee(); + self.schedule_tick(PeerTick::Raft); } } @@ -383,7 +388,7 @@ impl Peer { .collect(); } if !self.serving() { - self.start_destroy(&mut write_task); + self.start_destroy(ctx, &mut write_task); } // Ready number should increase monotonically. assert!(self.async_writer.known_largest_number() < ready.number()); @@ -517,8 +522,18 @@ impl Peer { } _ => {} } + ctx.lock_manager_notifier.on_role_change( + self.region(), + RoleChange { + state: ss.raft_state, + leader_id: ss.leader_id, + prev_lead_transferee: self.lead_transferee(), + vote: self.raft_group().raft.vote, + }, + ); self.proposal_control_mut().maybe_update_term(term); } + self.refresh_lead_transferee(); } /// If leader commits new admin commands, it may break lease assumption. So diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 7339df22fa9..6c027517454 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -31,9 +31,12 @@ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletReg use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; use raft::eraftpb::Snapshot; -use raftstore::store::{ - metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask, TabletSnapKey, - TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, +use raftstore::{ + coprocessor::RegionChangeEvent, + store::{ + metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask, TabletSnapKey, + TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, + }, }; use slog::{error, info, warn}; use tikv_util::box_err; @@ -150,7 +153,14 @@ impl Peer { // Use a new FlushState to avoid conflicts with the old one. tablet_ctx.flush_state = Some(flush_state); ctx.tablet_registry.load(tablet_ctx, false).unwrap(); + self.schedule_apply_fsm(ctx); + ctx.lock_manager_notifier.on_region_changed( + self.region(), + RegionChangeEvent::Create, + self.get_role(), + ); + self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(persisted_index); { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 500b166065f..0e38f0dd5a1 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -14,17 +14,20 @@ use engine_traits::{ use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; -use raftstore::store::{ - util::{Lease, RegionReadProgress}, - Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, - ReadProgress, TxnExt, WriteTask, +use raftstore::{ + coprocessor::{RegionChangeEvent, RegionChangeReason}, + store::{ + util::{Lease, RegionReadProgress}, + Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, + ReadProgress, TxnExt, WriteTask, + }, }; use slog::Logger; use super::storage::Storage; use crate::{ batch::StoreContext, - fsm::ApplyScheduler, + fsm::{ApplyScheduler, LockManagerNotifier}, operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteReqEncoder}, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -67,6 +70,9 @@ pub struct Peer { read_progress: Arc, leader_lease: Lease, + /// lead_transferee if this peer(leader) is in a leadership transferring. + lead_transferee: u64, + /// region buckets. region_buckets: Option, last_region_buckets: Option, @@ -155,6 +161,7 @@ impl Peer { txn_ext: Arc::default(), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), proposal_control: ProposalControl::new(0), + lead_transferee: raft::INVALID_ID, pending_ticks: Vec::new(), split_trace: vec![], state_changes: None, @@ -192,9 +199,10 @@ impl Peer { /// has been preserved in a durable device. pub fn set_region( &mut self, - // host: &CoprocessorHost, + lock_manager_observer: &Arc, reader: &mut ReadDelegate, region: metapb::Region, + reason: RegionChangeReason, tablet_index: u64, ) { if self.region().get_region_epoch().get_version() < region.get_region_epoch().get_version() @@ -239,7 +247,13 @@ impl Peer { pessimistic_locks.version = self.region().get_region_epoch().get_version(); } - // TODO: CoprocessorHost + if self.serving() { + lock_manager_observer.on_region_changed( + self.region(), + RegionChangeEvent::Update(reason), + self.get_role(), + ); + } } #[inline] @@ -395,6 +409,11 @@ impl Peer { .cloned() } + #[inline] + pub fn get_role(&self) -> StateRole { + self.raft_group.raft.state + } + #[inline] pub fn update_peer_statistics(&mut self) { if !self.is_leader() { @@ -641,6 +660,16 @@ impl Peer { .advance_apply(apply_index, term, region); } + #[inline] + pub fn lead_transferee(&self) -> u64 { + self.lead_transferee + } + + #[inline] + pub fn refresh_lead_transferee(&mut self) { + self.lead_transferee = self.raft_group.raft.lead_transferee.unwrap_or_default(); + } + // TODO: find a better place to put all txn related stuff. pub fn require_updating_max_ts(&self, ctx: &StoreContext) { let epoch = self.region().get_region_epoch(); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 732afb38f98..a454b0aa842 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -28,15 +28,18 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use pd_client::RpcClient; -use raft::eraftpb::MessageType; -use raftstore::store::{ - region_meta::{RegionLocalState, RegionMeta}, - Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, +use raft::{eraftpb::MessageType, StateRole}; +use raftstore::{ + coprocessor::{RegionChangeEvent, RoleChange}, + store::{ + region_meta::{RegionLocalState, RegionMeta}, + Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, + }, }; use raftstore_v2::{ create_store_batch_system, router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, - Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, + Bootstrap, LockManagerNotifier, SimpleWriteEncoder, StateStorage, StoreSystem, }; use slog::{debug, o, Logger}; use tempfile::TempDir; @@ -288,6 +291,7 @@ impl RunningState { snap_mgr.clone(), concurrency_manager, causal_ts_provider, + Arc::new(DummyLockManagerObserver {}), ) .unwrap(); @@ -570,3 +574,11 @@ impl Drop for Cluster { } } } + +struct DummyLockManagerObserver {} + +impl LockManagerNotifier for DummyLockManagerObserver { + fn on_region_changed(&self, _: &metapb::Region, _: RegionChangeEvent, _: StateRole) {} + + fn on_role_change(&self, _: &metapb::Region, _: RoleChange) {} +} diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 9583df80dd6..a9a31c68b8f 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -28,6 +28,7 @@ use raftstore::{ }, store::util::is_region_initialized, }; +use raftstore_v2::LockManagerNotifier; use security::SecurityManager; use tikv_util::{ future::paired_future_callback, @@ -524,7 +525,7 @@ const LEADER_KEY: &[u8] = b""; /// way to change the node from the leader of deadlock detector to follower, and /// vice versa. #[derive(Clone)] -pub(crate) struct RoleChangeNotifier { +pub struct RoleChangeNotifier { /// The id of the valid leader region. // raftstore.coprocessor needs it to be Sync + Send. leader_region_id: Arc>, @@ -606,6 +607,18 @@ impl RegionChangeObserver for RoleChangeNotifier { } } +impl LockManagerNotifier for RoleChangeNotifier { + fn on_role_change(&self, region: &Region, role_change: RoleChange) { + let mut ctx = ObserverContext::new(region); + RoleObserver::on_role_change(self, &mut ctx, &role_change); + } + + fn on_region_changed(&self, region: &Region, event: RegionChangeEvent, role: StateRole) { + let mut ctx = ObserverContext::new(region); + RegionChangeObserver::on_region_changed(self, &mut ctx, event, role); + } +} + struct Inner { /// The role of the deadlock detector. Default is `Role::Follower`. role: Role, diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index 243d533a0e5..44c31fcab1e 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -205,6 +205,11 @@ impl LockManager { role_change_notifier.register(host); } + /// Creates a `RoleChangeNotifier` of the deadlock detector worker + pub fn new_notifier(&self) -> RoleChangeNotifier { + RoleChangeNotifier::new(self.detector_scheduler.clone()) + } + /// Creates a `DeadlockService` to handle deadlock detect requests from /// other nodes. pub fn deadlock_service(&self) -> DeadlockService { diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 59daa053aa3..57bc575ff05 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -8,7 +8,7 @@ use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; use kvproto::{metapb, replication_modepb::ReplicationStatus}; use pd_client::PdClient; use raftstore::store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}; -use raftstore_v2::{router::RaftRouter, Bootstrap, StoreSystem}; +use raftstore_v2::{router::RaftRouter, Bootstrap, LockManagerNotifier, StoreSystem}; use slog::{info, o, Logger}; use tikv_util::{config::VersionTrack, worker::Worker}; @@ -85,6 +85,7 @@ where snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 + lock_manager_observer: Arc, ) -> Result<()> where T: Transport + 'static, @@ -125,6 +126,7 @@ where snap_mgr, concurrency_manager, causal_ts_provider, + lock_manager_observer, )?; Ok(()) @@ -171,6 +173,7 @@ where snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 + lock_manager_observer: Arc, ) -> Result<()> where T: Transport + 'static, @@ -196,6 +199,7 @@ where snap_mgr, concurrency_manager, causal_ts_provider, + lock_manager_observer, )?; Ok(()) } From fbff71d0026c365dc69bcafb9c6574872c5342be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 22 Dec 2022 11:40:55 +0800 Subject: [PATCH 0420/1149] log-backup: enhance logs (#13913) close tikv/tikv#13914 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 19 ++- .../backup-stream/src/subscription_manager.rs | 17 ++- .../backup-stream/src/subscription_track.rs | 92 +++++++++++-- components/backup-stream/src/utils.rs | 130 ++++++++++++++++++ 4 files changed, 237 insertions(+), 21 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index ec6b0dd41fb..c50c70a2eec 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1070,12 +1070,21 @@ pub enum ObserveOp { impl std::fmt::Debug for ObserveOp { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Start { region } => f.debug_struct("Start").field("region", region).finish(), - Self::Stop { region } => f.debug_struct("Stop").field("region", region).finish(), - Self::Destroy { region } => f.debug_struct("Destroy").field("region", region).finish(), + Self::Start { region } => f + .debug_struct("Start") + .field("region", &utils::debug_region(region)) + .finish(), + Self::Stop { region } => f + .debug_struct("Stop") + .field("region", &utils::debug_region(region)) + .finish(), + Self::Destroy { region } => f + .debug_struct("Destroy") + .field("region", &utils::debug_region(region)) + .finish(), Self::RefreshResolver { region } => f .debug_struct("RefreshResolver") - .field("region", region) + .field("region", &utils::debug_region(region)) .finish(), Self::NotifyFailToStartObserve { region, @@ -1083,7 +1092,7 @@ impl std::fmt::Debug for ObserveOp { err, } => f .debug_struct("NotifyFailToStartObserve") - .field("region", region) + .field("region", &utils::debug_region(region)) .field("handle", handle) .field("err", err) .finish(), diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 83181829b43..624392f3df8 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -165,7 +165,7 @@ impl ScanCmd { } = self; let begin = Instant::now_coarse(); let stat = initial_scan.do_initial_scan(region, *last_checkpoint, handle.clone())?; - info!("initial scanning of leader transforming finished!"; "takes" => ?begin.saturating_elapsed(), "region" => %region.get_id(), "from_ts" => %last_checkpoint); + info!("initial scanning finished!"; "takes" => ?begin.saturating_elapsed(), "from_ts" => %last_checkpoint, utils::slog_region(region)); utils::record_cf_stat("lock", &stat.lock); utils::record_cf_stat("write", &stat.write); utils::record_cf_stat("default", &stat.data); @@ -414,7 +414,7 @@ where true, false, ) - .map_err(|err| warn!("check epoch and stop failed."; "err" => %err)) + .map_err(|err| warn!("check epoch and stop failed."; utils::slog_region(region), "err" => %err)) .is_ok() }); } @@ -455,13 +455,16 @@ where "take" => ?now.saturating_elapsed(), "timedout" => %timedout); } let cps = self.subs.resolve_with(min_ts); - let min_region = cps.iter().min_by_key(|(_, rts)| rts); + let min_region = cps.iter().min_by_key(|rs| rs.checkpoint); // If there isn't any region observed, the `min_ts` can be used as resolved ts // safely. - let rts = min_region.map(|(_, rts)| *rts).unwrap_or(min_ts); - info!("getting checkpoint"; "defined_by_region" => ?min_region.map(|r| r.0.get_id()), "checkpoint" => %rts); + let rts = min_region.map(|rs| rs.checkpoint).unwrap_or(min_ts); + info!("getting checkpoint"; "defined_by_region" => ?min_region); self.subs.warn_if_gap_too_huge(rts); - callback(ResolvedRegions::new(rts, cps)); + callback(ResolvedRegions::new( + rts, + cps.into_iter().map(|r| (r.region, r.checkpoint)).collect(), + )); } } } @@ -583,7 +586,7 @@ where exists = true; let should_remove = old.handle().id == handle.id; if !should_remove { - warn!("stale retry command"; "region" => ?region, "handle" => ?handle, "old_handle" => ?old.handle()); + warn!("stale retry command"; utils::slog_region(®ion), "handle" => ?handle, "old_handle" => ?old.handle()); } should_remove }); diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 50c3c6c1143..6b51f983a3b 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -57,6 +57,63 @@ impl RegionSubscription { } } +#[derive(PartialEq, Eq)] +pub enum CheckpointType { + MinTs, + StartTsOfInitialScan, + StartTsOfTxn(Option>), +} + +impl std::fmt::Debug for CheckpointType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MinTs => write!(f, "MinTs"), + Self::StartTsOfInitialScan => write!(f, "StartTsOfInitialScan"), + Self::StartTsOfTxn(arg0) => f + .debug_tuple("StartTsOfTxn") + .field(&format_args!( + "{}", + utils::redact(&arg0.as_ref().map(|x| x.as_ref()).unwrap_or(&[])) + )) + .finish(), + } + } +} + +pub struct ResolveResult { + pub region: Region, + pub checkpoint: TimeStamp, + pub checkpoint_type: CheckpointType, +} + +impl std::fmt::Debug for ResolveResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ResolveResult") + .field("region", &self.region.get_id()) + .field("checkpoint", &self.checkpoint) + .field("checkpoint_type", &self.checkpoint_type) + .finish() + } +} + +impl ResolveResult { + fn resolve(sub: &mut RegionSubscription, min_ts: TimeStamp) -> Self { + let ts = sub.resolver.resolve(min_ts); + let ty = if ts == min_ts { + CheckpointType::MinTs + } else if sub.resolver.in_phase_one() { + CheckpointType::StartTsOfInitialScan + } else { + CheckpointType::StartTsOfTxn(sub.resolver.sample_far_lock()) + }; + Self { + region: sub.meta.clone(), + checkpoint: ts, + checkpoint_type: ty, + } + } +} + impl SubscriptionTracer { /// clear the current `SubscriptionTracer`. pub fn clear(&self) { @@ -91,11 +148,11 @@ impl SubscriptionTracer { /// try advance the resolved ts with the min ts of in-memory locks. /// returns the regions and theirs resolved ts. - pub fn resolve_with(&self, min_ts: TimeStamp) -> Vec<(Region, TimeStamp)> { + pub fn resolve_with(&self, min_ts: TimeStamp) -> Vec { self.0 .iter_mut() // Don't advance the checkpoint ts of removed region. - .map(|mut s| (s.meta.clone(), s.resolver.resolve(min_ts))) + .map(|mut s| ResolveResult::resolve(s.value_mut(), min_ts)) .collect() } @@ -140,7 +197,7 @@ impl SubscriptionTracer { false } None => { - warn!("trying to deregister region not registered"; "region_id" => %region_id); + debug!("trying to deregister region not registered"; "region_id" => %region_id); false } } @@ -156,7 +213,7 @@ impl SubscriptionTracer { let mut sub = match self.get_subscription_of(new_region.get_id()) { Some(sub) => sub, None => { - warn!("backup stream observer refreshing void subscription."; "new_region" => ?new_region); + warn!("backup stream observer refreshing void subscription."; utils::slog_region(new_region)); return true; } }; @@ -258,6 +315,12 @@ impl std::fmt::Debug for FutureLock { } impl TwoPhaseResolver { + /// try to get one of the key of the oldest lock in the resolver. + pub fn sample_far_lock(&self) -> Option> { + let (_, keys) = self.resolver.locks().first_key_value()?; + keys.iter().next().cloned() + } + pub fn in_phase_one(&self) -> bool { self.stable_ts.is_some() } @@ -348,6 +411,8 @@ impl std::fmt::Debug for TwoPhaseResolver { #[cfg(test)] mod test { + use std::sync::Arc; + use kvproto::metapb::{Region, RegionEpoch}; use raftstore::coprocessor::ObserveHandle; use txn_types::TimeStamp; @@ -433,15 +498,24 @@ mod test { subs.deregister_region_if(®ion(5, 8, 1), |_, _| true); drop(region4_sub); - let mut rs = subs.resolve_with(TimeStamp::new(1000)); + let mut rs = subs + .resolve_with(TimeStamp::new(1000)) + .into_iter() + .map(|r| (r.region, r.checkpoint, r.checkpoint_type)) + .collect::>(); rs.sort_by_key(|k| k.0.get_id()); + use crate::subscription_track::CheckpointType::*; assert_eq!( rs, vec![ - (region(1, 1, 1), TimeStamp::new(42)), - (region(2, 2, 1), TimeStamp::new(1000)), - (region(3, 4, 1), TimeStamp::new(1000)), - (region(4, 8, 1), TimeStamp::new(128)), + (region(1, 1, 1), 42.into(), StartTsOfInitialScan), + (region(2, 2, 1), 1000.into(), MinTs), + (region(3, 4, 1), 1000.into(), MinTs), + ( + region(4, 8, 1), + 128.into(), + StartTsOfTxn(Some(Arc::from(b"Alpi".as_slice()))) + ), ] ); } diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 6ecea21f2f5..1746882690f 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -3,6 +3,7 @@ use core::pin::Pin; use std::{ borrow::Borrow, + cell::RefCell, collections::{hash_map::RandomState, BTreeMap, HashMap}, ops::{Bound, RangeBounds}, path::Path, @@ -20,6 +21,7 @@ use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; use futures::{channel::mpsc, executor::block_on, ready, task::Poll, FutureExt, StreamExt}; use kvproto::{ brpb::CompressionType, + metapb::Region, raft_cmdpb::{CmdType, Request}, }; use raft::StateRole; @@ -743,6 +745,109 @@ impl CompressionWriter for ZstdCompressionWriter { } } +/// make a pair of key range to impl Debug which prints [start_key,$end_key). +pub fn debug_key_range<'ret, 'a: 'ret, 'b: 'ret>( + start: &'a [u8], + end: &'b [u8], +) -> impl std::fmt::Debug + 'ret { + DebugKeyRange::<'a, 'b>(start, end) +} + +struct DebugKeyRange<'start, 'end>(&'start [u8], &'end [u8]); + +impl<'start, 'end> std::fmt::Debug for DebugKeyRange<'start, 'end> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let end_key = if self.1.is_empty() { + Either::Left("inf") + } else { + Either::Right(redact(&self.1)) + }; + let end_key: &dyn std::fmt::Display = match &end_key { + Either::Left(x) => x, + Either::Right(y) => y, + }; + write!(f, "[{},{})", redact(&self.0), end_key) + } +} + +/// make a [`Region`](kvproto::metapb::Region) implements [`slog::KV`], which +/// prints its fields like `[r.id=xxx] [r.ver=xxx] ...` +pub fn slog_region(r: &Region) -> impl slog::KV + '_ { + SlogRegion(r) +} + +/// make a [`Region`](kvproto::metapb::Region) implements +/// [`Debug`](std::fmt::Debug), which prints its essential fields. +pub fn debug_region(r: &Region) -> impl std::fmt::Debug + '_ { + DebugRegion(r) +} + +struct DebugRegion<'a>(&'a Region); + +impl<'a> std::fmt::Debug for DebugRegion<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let r = self.0; + f.debug_struct("Region") + .field("id", &r.get_id()) + .field("ver", &r.get_region_epoch().get_version()) + .field("conf_ver", &r.get_region_epoch().get_conf_ver()) + .field( + "range", + &debug_key_range(r.get_start_key(), r.get_end_key()), + ) + .field( + "peers", + &debug_iter(r.get_peers().iter().map(|p| p.store_id)), + ) + .finish() + } +} + +struct SlogRegion<'a>(&'a Region); + +impl<'a> slog::KV for SlogRegion<'a> { + fn serialize( + &self, + _record: &slog::Record<'_>, + serializer: &mut dyn slog::Serializer, + ) -> slog::Result { + let r = self.0; + serializer.emit_u64("r.id", r.get_id())?; + serializer.emit_u64("r.ver", r.get_region_epoch().get_version())?; + serializer.emit_u64("r.conf_ver", r.get_region_epoch().get_conf_ver())?; + serializer.emit_arguments( + "r.range", + &format_args!("{:?}", debug_key_range(r.get_start_key(), r.get_end_key())), + )?; + serializer.emit_arguments( + "r.peers", + &format_args!("{:?}", debug_iter(r.get_peers().iter().map(|p| p.store_id))), + )?; + Ok(()) + } +} + +pub fn debug_iter(t: impl Iterator) -> impl std::fmt::Debug { + DebugIter(RefCell::new(t)) +} + +struct DebugIter>(RefCell); + +impl> std::fmt::Debug for DebugIter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut is_first = true; + while let Some(x) = self.0.borrow_mut().next() { + if !is_first { + write!(f, ",{:?}", x)?; + } else { + write!(f, "{:?}", x)?; + is_first = false; + } + } + Ok(()) + } +} + #[cfg(test)] mod test { use std::{ @@ -755,10 +860,35 @@ mod test { use engine_traits::WriteOptions; use futures::executor::block_on; + use kvproto::metapb::{Region, RegionEpoch}; use tokio::io::{AsyncWriteExt, BufReader}; use crate::utils::{is_in_range, CallbackWaitGroup, SegmentMap}; + #[test] + fn test_redact() { + log_wrappers::set_redact_info_log(true); + let mut region = Region::default(); + region.set_id(42); + region.set_start_key(b"TiDB".to_vec()); + region.set_end_key(b"TiDC".to_vec()); + region.set_region_epoch({ + let mut r = RegionEpoch::default(); + r.set_version(108); + r.set_conf_ver(352); + r + }); + + // Can we make a better way to test this? + assert_eq!( + "Region { id: 42, ver: 108, conf_ver: 352, range: [?,?), peers: }", + format!("{:?}", super::debug_region(®ion)) + ); + + let range = super::debug_key_range(b"alpha", b"omega"); + assert_eq!("[?,?)", format!("{:?}", range)); + } + #[test] fn test_range_functions() { #[derive(Debug)] From 0cc15e4e7211e406f183917e155d5fcf43e44c6a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 22 Dec 2022 12:30:55 +0800 Subject: [PATCH 0421/1149] Raftstore-v2: txn_ext and bucket_meta of RegionSnapshot should be inited (#13911) ref tikv/tikv#12842 Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/query/local.rs | 79 ++++++++++++------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 1878ead40c2..812cf2354fa 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -107,41 +107,52 @@ where req: &RaftCmdRequest, ) -> std::result::Result>, RaftCmdResponse> { match self.pre_propose_raft_command(req) { - Ok(Some((mut delegate, policy))) => match policy { - RequestPolicy::ReadLocal => { - let region = Arc::clone(&delegate.region); - let snap = RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); - // Ensures the snapshot is acquired before getting the time - atomic::fence(atomic::Ordering::Release); - let snapshot_ts = monotonic_raw_now(); - - if !delegate.is_in_leader_lease(snapshot_ts) { - return Ok(None); + Ok(Some((mut delegate, policy))) => { + let mut snap = match policy { + RequestPolicy::ReadLocal => { + let region = Arc::clone(&delegate.region); + let snap = + RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); + // Ensures the snapshot is acquired before getting the time + atomic::fence(atomic::Ordering::Release); + let snapshot_ts = monotonic_raw_now(); + + if !delegate.is_in_leader_lease(snapshot_ts) { + return Ok(None); + } + + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_requests.inc()); + + // Try renew lease in advance + self.maybe_renew_lease_in_advance(&delegate, req, snapshot_ts); + snap } + RequestPolicy::StaleRead => { + let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); + delegate.check_stale_read_safe(read_ts)?; - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); + let region = Arc::clone(&delegate.region); + let snap = + RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); - // Try renew lease in advance - self.maybe_renew_lease_in_advance(&delegate, req, snapshot_ts); - Ok(Some(snap)) - } - RequestPolicy::StaleRead => { - let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); - delegate.check_stale_read_safe(read_ts)?; + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_requests.inc()); - let region = Arc::clone(&delegate.region); - let snap = RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); + delegate.check_stale_read_safe(read_ts)?; - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); + snap + } + _ => unreachable!(), + }; - delegate.check_stale_read_safe(read_ts)?; + snap.txn_ext = Some(delegate.txn_ext.clone()); + snap.bucket_meta = delegate.bucket_meta.clone(); - TLS_LOCAL_READ_METRICS - .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); - Ok(Some(snap)) - } - _ => unreachable!(), - }, + Ok(Some(snap)) + } Ok(None) => Ok(None), Err(e) => { let mut response = cmd_resp::new_error(e); @@ -458,6 +469,7 @@ mod tests { use engine_traits::{MiscExt, Peekable, SyncMutable, TabletContext, DATA_CFS}; use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; + use pd_client::BucketMeta; use raftstore::store::{ util::Lease, ReadCallback, ReadProgress, RegionReadProgress, TrackVer, TxnExt, TLS_LOCAL_READ_METRICS, @@ -628,6 +640,8 @@ mod tests { // Register region 1 lease.renew(monotonic_raw_now()); let remote = lease.maybe_new_remote_lease(term6).unwrap(); + let txn_ext = Arc::new(TxnExt::default()); + let bucket_meta = Arc::new(BucketMeta::default()); { let mut meta = store_meta.as_ref().lock().unwrap(); @@ -641,11 +655,11 @@ mod tests { leader_lease: Some(remote), last_valid_ts: Timespec::new(0, 0), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), - txn_ext: Arc::new(TxnExt::default()), + txn_ext: txn_ext.clone(), read_progress: read_progress.clone(), pending_remove: false, track_ver: TrackVer::new(), - bucket_meta: None, + bucket_meta: Some(bucket_meta.clone()), }; meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data @@ -675,6 +689,11 @@ mod tests { // the applied term by the above thread, the snapshot will be acquired by // retrying. let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); + assert!(Arc::ptr_eq(snap.txn_ext.as_ref().unwrap(), &txn_ext)); + assert!(Arc::ptr_eq( + snap.bucket_meta.as_ref().unwrap(), + &bucket_meta + )); assert_eq!(*snap.get_region(), region1); assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), From 36570b8e2fea93e46b1b4028b5ac7cbda72fbe67 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 22 Dec 2022 16:26:56 +0800 Subject: [PATCH 0422/1149] raftstore-v2: make coprocessor work (#13978) ref tikv/tikv#12842 Coprocessor is necessary for a working daemon. This PR adjusts coprocessor to make it work with raftstore v2. And split check is also added for auto splitting. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 - components/raftstore-v2/Cargo.toml | 1 - components/raftstore-v2/src/batch/store.rs | 128 +++++++------ components/raftstore-v2/src/fsm/mod.rs | 2 +- components/raftstore-v2/src/fsm/peer.rs | 2 +- components/raftstore-v2/src/fsm/store.rs | 16 +- components/raftstore-v2/src/lib.rs | 2 +- .../operation/command/admin/conf_change.rs | 12 +- .../src/operation/command/admin/mod.rs | 9 +- .../src/operation/command/admin/split.rs | 181 +++++++----------- .../command/admin/transfer_leader.rs | 7 +- .../raftstore-v2/src/operation/command/mod.rs | 12 +- .../src/operation/command/write/mod.rs | 2 + components/raftstore-v2/src/operation/life.rs | 19 +- components/raftstore-v2/src/operation/mod.rs | 2 +- components/raftstore-v2/src/operation/pd.rs | 12 +- .../src/operation/ready/async_writer.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 17 +- .../src/operation/ready/snapshot.rs | 19 +- components/raftstore-v2/src/raft/apply.rs | 7 +- components/raftstore-v2/src/raft/peer.rs | 67 ++++--- components/raftstore-v2/src/router/imp.rs | 73 ++++++- .../src/router/internal_message.rs | 3 + components/raftstore-v2/src/router/message.rs | 2 +- .../tests/integrations/cluster.rs | 27 +-- .../tests/integrations/test_split.rs | 29 ++- .../raftstore/src/coprocessor/dispatcher.rs | 127 +++++++++++- components/raftstore/src/coprocessor/mod.rs | 2 +- .../src/coprocessor/split_check/half.rs | 18 +- .../src/coprocessor/split_check/keys.rs | 57 ++---- .../src/coprocessor/split_check/size.rs | 127 ++++-------- .../src/coprocessor/split_check/table.rs | 14 +- components/raftstore/src/lib.rs | 1 + components/raftstore/src/router.rs | 110 ++++++++++- components/raftstore/src/store/fsm/store.rs | 13 +- .../src/store/worker/consistency_check.rs | 48 ++--- .../raftstore/src/store/worker/split_check.rs | 164 ++++++++-------- src/server/lock_manager/deadlock.rs | 15 +- src/server/lock_manager/mod.rs | 5 - src/server/raftkv2/node.rs | 19 +- 40 files changed, 783 insertions(+), 591 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb371b739af..67ca50ba1ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4359,7 +4359,6 @@ dependencies = [ "file_system", "fs2", "futures 0.3.15", - "itertools", "keys", "kvproto", "log_wrappers", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index d9b1d65aebc..4d3d44ec6fd 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -41,7 +41,6 @@ fail = "0.5" file_system = { workspace = true } fs2 = "0.4" futures = { version = "0.3", features = ["compat"] } -itertools = "0.10" keys = { workspace = true } kvproto = { workspace = true } log_wrappers = { workspace = true } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 8a1f60f3717..642f6e745f0 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -22,10 +22,10 @@ use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; use pd_client::PdClient; use raft::{StateRole, INVALID_ID}; use raftstore::{ - coprocessor::RegionChangeEvent, + coprocessor::{CoprocessorHost, RegionChangeEvent}, store::{ fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, - StoreWriters, TabletSnapManager, Transport, WriteSenders, + SplitCheckRunner, SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteSenders, }, }; use slog::Logger; @@ -42,10 +42,7 @@ use tikv_util::{ use time::Timespec; use crate::{ - fsm::{ - LockManagerNotifier, PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, - StoreMeta, - }, + fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, worker::pd, @@ -56,6 +53,7 @@ use crate::{ pub struct StoreContext { /// A logger without any KV. It's clean for creating new PeerFSM. pub logger: Logger, + pub coprocessor_host: CoprocessorHost, /// The transport for sending messages to peers on other stores. pub trans: T, pub current_time: Option, @@ -69,21 +67,17 @@ pub struct StoreContext { pub tick_batch: Vec, /// The precise timer for scheduling tick. pub timer: SteadyTimer, - pub write_senders: WriteSenders, + pub schedulers: Schedulers, /// store meta pub store_meta: Arc>, pub engine: ER, pub tablet_registry: TabletRegistry, pub apply_pool: FuturePool, - pub read_scheduler: Scheduler>, /// Disk usage for the store itself. pub self_disk_usage: DiskUsage, pub snap_mgr: TabletSnapManager, - pub pd_scheduler: Scheduler, - - pub lock_manager_notifier: Arc, } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -225,19 +219,17 @@ impl PollHandler { cfg: Arc>, + coprocessor_host: CoprocessorHost, store_id: u64, engine: ER, tablet_registry: TabletRegistry, trans: T, router: StoreRouter, - read_scheduler: Scheduler>, - pd_scheduler: Scheduler, - write_senders: WriteSenders, + schedulers: Schedulers, apply_pool: FuturePool, logger: Logger, store_meta: Arc>, snap_mgr: TabletSnapManager, - lock_manager_notifier: Arc, } impl StorePollerBuilder { @@ -248,13 +240,11 @@ impl StorePollerBuilder { tablet_registry: TabletRegistry, trans: T, router: StoreRouter, - read_scheduler: Scheduler>, - pd_scheduler: Scheduler, - store_writers: &mut StoreWriters, + schedulers: Schedulers, logger: Logger, store_meta: Arc>, snap_mgr: TabletSnapManager, - lock_manager_notifier: Arc, + coprocessor_host: CoprocessorHost, ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; let max_pool_size = std::cmp::max( @@ -273,14 +263,12 @@ impl StorePollerBuilder { tablet_registry, trans, router, - read_scheduler, - pd_scheduler, apply_pool, logger, - write_senders: store_writers.senders(), + schedulers, store_meta, snap_mgr, - lock_manager_notifier, + coprocessor_host, } } @@ -296,17 +284,20 @@ impl StorePollerBuilder { region_id, self.store_id, self.engine.clone(), - self.read_scheduler.clone(), + self.schedulers.read.clone(), &self.logger, )? { Some(p) => p, None => return Ok(()), }; - self.lock_manager_notifier.on_region_changed( - storage.region_state().get_region(), - RegionChangeEvent::Create, - StateRole::Follower, - ); + + if storage.is_initialized() { + self.coprocessor_host.on_region_changed( + storage.region(), + RegionChangeEvent::Create, + StateRole::Follower, + ); + } let (sender, peer_fsm) = PeerFsm::new(&cfg, &self.tablet_registry, storage)?; meta.region_read_progress @@ -353,37 +344,49 @@ where router: self.router.clone(), tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], timer: SteadyTimer::default(), - write_senders: self.write_senders.clone(), + schedulers: self.schedulers.clone(), store_meta: self.store_meta.clone(), engine: self.engine.clone(), tablet_registry: self.tablet_registry.clone(), apply_pool: self.apply_pool.clone(), - read_scheduler: self.read_scheduler.clone(), self_disk_usage: DiskUsage::Normal, snap_mgr: self.snap_mgr.clone(), - pd_scheduler: self.pd_scheduler.clone(), - lock_manager_notifier: self.lock_manager_notifier.clone(), + coprocessor_host: self.coprocessor_host.clone(), }; let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); StorePoller::new(poll_ctx, cfg_tracker) } } +#[derive(Clone)] +pub struct Schedulers { + pub read: Scheduler>, + pub pd: Scheduler, + pub write: WriteSenders, + + // Following is not maintained by raftstore itself. + pub split_check: Scheduler, +} + /// A set of background threads that will processing offloaded work from /// raftstore. struct Workers { /// Worker for fetching raft logs asynchronously - async_read_worker: Worker, - pd_worker: Worker, - store_writers: StoreWriters, + async_read: Worker, + pd: Worker, + async_write: StoreWriters, + + // Following is not maintained by raftstore itself. + background: Worker, } -impl Default for Workers { - fn default() -> Self { +impl Workers { + fn new(background: Worker) -> Self { Self { - async_read_worker: Worker::new("async-read-worker"), - pd_worker: Worker::new("pd-worker"), - store_writers: StoreWriters::default(), + async_read: Worker::new("async-read-worker"), + pd: Worker::new("pd-worker"), + async_write: StoreWriters::default(), + background, } } } @@ -410,7 +413,8 @@ impl StoreSystem { snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 - lock_manager_notifier: Arc, + coprocessor_host: CoprocessorHost, + background: Worker, ) -> Result<()> where T: Transport + 'static, @@ -424,18 +428,16 @@ impl StoreSystem { .broadcast_normal(|| PeerMsg::Tick(PeerTick::PdHeartbeat)); }); - let mut workers = Workers::default(); + let mut workers = Workers::new(background); workers - .store_writers + .async_write .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; let mut read_runner = ReadRunner::new(router.clone(), raft_engine.clone()); read_runner.set_snap_mgr(snap_mgr.clone()); - let read_scheduler = workers - .async_read_worker - .start("async-read-worker", read_runner); + let read_scheduler = workers.async_read.start("async-read-worker", read_runner); - let pd_scheduler = workers.pd_worker.start( + let pd_scheduler = workers.pd.start( "pd-worker", pd::Runner::new( store_id, @@ -443,7 +445,7 @@ impl StoreSystem { raft_engine.clone(), tablet_registry.clone(), router.clone(), - workers.pd_worker.remote(), + workers.pd.remote(), concurrency_manager, causal_ts_provider, self.logger.clone(), @@ -451,6 +453,22 @@ impl StoreSystem { ), ); + let split_check_scheduler = workers.background.start( + "split-check", + SplitCheckRunner::with_registry( + tablet_registry.clone(), + router.clone(), + coprocessor_host.clone(), + ), + ); + + let schedulers = Schedulers { + read: read_scheduler, + pd: pd_scheduler, + write: workers.async_write.senders(), + split_check: split_check_scheduler, + }; + let builder = StorePollerBuilder::new( cfg.clone(), store_id, @@ -458,13 +476,11 @@ impl StoreSystem { tablet_registry, trans, router.clone(), - read_scheduler, - pd_scheduler, - &mut workers.store_writers, + schedulers, self.logger.clone(), store_meta.clone(), snap_mgr, - lock_manager_notifier, + coprocessor_host, ); self.workers = Some(workers); let peers = builder.init()?; @@ -510,9 +526,9 @@ impl StoreSystem { self.system.shutdown(); - workers.store_writers.shutdown(); - workers.async_read_worker.stop(); - workers.pd_worker.stop(); + workers.async_write.shutdown(); + workers.async_read.stop(); + workers.pd.stop(); } } diff --git a/components/raftstore-v2/src/fsm/mod.rs b/components/raftstore-v2/src/fsm/mod.rs index 442c6b050ce..b3d0e0483ba 100644 --- a/components/raftstore-v2/src/fsm/mod.rs +++ b/components/raftstore-v2/src/fsm/mod.rs @@ -11,4 +11,4 @@ mod store; pub use apply::{ApplyFsm, ApplyResReporter, ApplyScheduler}; pub use peer::{PeerFsm, PeerFsmDelegate, SenderFsmPair}; -pub use store::{LockManagerNotifier, Store, StoreFsm, StoreFsmDelegate, StoreMeta}; +pub use store::{Store, StoreFsm, StoreFsmDelegate, StoreMeta}; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index f5425295347..1ef9e198130 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -206,7 +206,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerTick::Raft => self.on_raft_tick(), PeerTick::PdHeartbeat => self.on_pd_heartbeat(), PeerTick::RaftLogGc => unimplemented!(), - PeerTick::SplitRegionCheck => unimplemented!(), + PeerTick::SplitRegionCheck => self.on_split_region_check(), PeerTick::CheckMerge => unimplemented!(), PeerTick::CheckPeerStaleState => unimplemented!(), PeerTick::EntryCacheEvict => unimplemented!(), diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 6e2dfe4a75f..bd31de69496 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -6,12 +6,7 @@ use batch_system::Fsm; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use futures::{compat::Future01CompatExt, FutureExt}; -use kvproto::metapb::Region; -use raft::StateRole; -use raftstore::{ - coprocessor::{RegionChangeEvent, RoleChange}, - store::{Config, ReadDelegate, RegionReadProgressRegistry}, -}; +use raftstore::store::{Config, ReadDelegate, RegionReadProgressRegistry}; use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, @@ -170,12 +165,3 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { } } } - -// A simplified version of CoprocessorHost used to convey information to -// LockManager only. -// It is replaced by CoprocessorHost in the future. -pub trait LockManagerNotifier: Send + Sync { - fn on_role_change(&self, region: &Region, role_change: RoleChange); - - fn on_region_changed(&self, region: &Region, event: RegionChangeEvent, role: StateRole); -} diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index cb769b6594a..d8327549da6 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -38,6 +38,6 @@ mod worker; pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; -pub use fsm::{LockManagerNotifier, StoreMeta}; +pub use fsm::StoreMeta; pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index ec0b78e717a..5a6c91d3567 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -147,13 +147,6 @@ impl Peer { let remove_self = conf_change.region_state.get_state() == PeerState::Tombstone; self.storage_mut() .set_region_state(conf_change.region_state); - - ctx.lock_manager_notifier.on_region_changed( - self.region(), - RegionChangeEvent::Update(RegionChangeReason::ChangePeer), - self.get_role(), - ); - if self.is_leader() { info!( self.logger, @@ -190,6 +183,11 @@ impl Peer { self.set_has_ready(); } } + ctx.coprocessor_host.on_region_changed( + self.region(), + RegionChangeEvent::Update(RegionChangeReason::ChangePeer), + self.raft_group().raft.state, + ); if remove_self { self.mark_for_destroy(None); } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9b7dce8570f..0b3d588abf7 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -9,7 +9,7 @@ use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; use protobuf::Message; use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; -pub use split::{RequestSplit, SplitInit, SplitResult, SPLIT_PREFIX}; +pub use split::{RequestSplit, SplitFlowControl, SplitInit, SplitResult, SPLIT_PREFIX}; use tikv_util::box_err; use txn_types::WriteBatchFlags; @@ -30,7 +30,7 @@ impl Peer { pub fn on_admin_command( &mut self, ctx: &mut StoreContext, - req: RaftCmdRequest, + mut req: RaftCmdRequest, ch: CmdResChannel, ) { if !self.serving() { @@ -43,6 +43,11 @@ impl Peer { ch.report_error(resp); return; } + if let Err(e) = ctx.coprocessor_host.pre_propose(self.region(), &mut req) { + let resp = cmd_resp::new_error(e.into()); + ch.report_error(resp); + return; + } let cmd_type = req.get_admin_request().get_cmd_type(); if let Err(e) = self.validate_command(req.get_header(), Some(cmd_type), &mut ctx.raft_metrics) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 870c203f07d..64388333fee 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,44 +25,40 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::cmp; +use std::{borrow::Cow, cmp}; use collections::HashSet; use crossbeam::channel::SendError; use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletContext}; use fail::fail_point; -use itertools::Itertools; use kvproto::{ metapb::{self, Region, RegionEpoch}, + pdpb::CheckPolicy, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, raft_serverpb::RaftSnapshotData, }; use protobuf::Message; use raft::{prelude::Snapshot, INVALID_ID}; use raftstore::{ - coprocessor::{ - split_observer::{is_valid_split_key, strip_timestamp_if_exists}, - RegionChangeReason, - }, + coprocessor::RegionChangeReason, store::{ cmd_resp, - fsm::apply::validate_batch_split, + fsm::{apply::validate_batch_split, ApplyMetrics}, metrics::PEER_ADMIN_CMD_COUNTER, snap::TABLET_SNAPSHOT_VERSION, util::{self, KeysInfoFormatter}, - PeerPessimisticLocks, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + PeerPessimisticLocks, SplitCheckTask, Transport, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, Result, }; -use slog::{error, info, warn, Logger}; -use tikv_util::box_err; +use slog::info; use crate::{ batch::StoreContext, - fsm::ApplyResReporter, + fsm::{ApplyResReporter, PeerFsmDelegate}, operation::AdminCmdResult, raft::{Apply, Peer}, - router::{CmdResChannel, PeerMsg, StoreMsg}, + router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, Error, }; @@ -107,67 +103,68 @@ impl SplitInit { } } -// validate split request and strip ts from split keys if needed -fn pre_propose_split(logger: &Logger, req: &mut AdminRequest, region: &Region) -> Result<()> { - if !req.has_splits() { - return Err(box_err!( - "cmd_type is BatchSplit but it doesn't have splits request, message maybe \ - corrupted!" - .to_owned() - )); - } - - let mut requests: Vec = req.mut_splits().take_requests().into(); - let ajusted_splits = std::mem::take(&mut requests) - .into_iter() - .enumerate() - .filter_map(|(i, mut split)| { - let key = split.take_split_key(); - let key = strip_timestamp_if_exists(key); - if is_valid_split_key(&key, i, region) { - split.split_key = key; - Some(split) - } else { - None - } - }) - .coalesce(|prev, curr| { - // Make sure that the split keys are sorted and unique. - if prev.split_key < curr.split_key { - Err((prev, curr)) - } else { - warn!( - logger, - "skip invalid split key: key should not be larger than the previous."; - "key" => log_wrappers::Value::key(&curr.split_key), - "previous" => log_wrappers::Value::key(&prev.split_key), - ); - Ok(prev) - } - }) - .collect::>(); - - if ajusted_splits.is_empty() { - error!( - logger, - "failed to handle split req, no valid key found for split"; - ); - Err(box_err!("no valid key found for split.".to_owned())) - } else { - // Rewrite the splits. - req.mut_splits().set_requests(ajusted_splits.into()); - Ok(()) - } -} - #[derive(Debug)] pub struct RequestSplit { pub epoch: RegionEpoch, pub split_keys: Vec>, - pub source: Box, + pub source: Cow<'static, str>, +} + +#[derive(Default, Debug)] +pub struct SplitFlowControl { + size_diff_hint: i64, + skip_split_count: u64, + may_skip_split_check: bool, +} + +impl PeerFsmDelegate<'_, EK, ER, T> { + pub fn on_split_region_check(&mut self) { + if !self.fsm.peer_mut().on_split_region_check(self.store_ctx) { + self.schedule_tick(PeerTick::SplitRegionCheck) + } + } } impl Peer { + /// Handle split check. + /// + /// Returns true means the check tick is consumed, no need to schedule + /// another tick. + pub fn on_split_region_check(&mut self, ctx: &mut StoreContext) -> bool { + if !self.is_leader() { + return true; + } + let is_generating_snapshot = self.storage().is_generating_snapshot(); + let control = self.split_flow_control_mut(); + if control.may_skip_split_check + && control.size_diff_hint < ctx.cfg.region_split_check_diff().0 as i64 + { + return true; + } + if ctx.schedulers.split_check.is_busy() { + return false; + } + if is_generating_snapshot && control.skip_split_count < 3 { + control.skip_split_count += 1; + return false; + } + let task = + SplitCheckTask::split_check(self.region().clone(), true, CheckPolicy::Scan, None); + if let Err(e) = ctx.schedulers.split_check.schedule(task) { + info!(self.logger, "failed to schedule split check"; "err" => ?e); + } + let control = self.split_flow_control_mut(); + control.may_skip_split_check = true; + control.size_diff_hint = 0; + control.skip_split_count = 0; + false + } + + pub fn update_split_flow_control(&mut self, metrics: &ApplyMetrics) { + let control = self.split_flow_control_mut(); + control.size_diff_hint += metrics.size_diff_hint; + } + pub fn on_request_split( &mut self, ctx: &mut StoreContext, @@ -178,7 +175,7 @@ impl Peer { self.logger, "on split"; "split_keys" => %KeysInfoFormatter(rs.split_keys.iter()), - "source" => &rs.source, + "source" => %&rs.source, ); if !self.is_leader() { // region on this store is no longer leader, skipped. @@ -196,7 +193,7 @@ impl Peer { &rs.epoch, &rs.split_keys, ) { - info!(self.logger, "invalid split request"; "err" => ?e, "source" => &rs.source); + info!(self.logger, "invalid split request"; "err" => ?e, "source" => %&rs.source); ch.set_result(cmd_resp::new_error(e)); return; } @@ -206,9 +203,9 @@ impl Peer { pub fn propose_split( &mut self, store_ctx: &mut StoreContext, - mut req: RaftCmdRequest, + req: RaftCmdRequest, ) -> Result { - pre_propose_split(&self.logger, req.mut_admin_request(), self.region())?; + validate_batch_split(req.get_admin_request(), self.region())?; // We rely on ConflictChecker to detect conflicts, so no need to set proposal // context. let data = req.write_to_bytes().unwrap(); @@ -419,7 +416,7 @@ impl Peer { let mut meta = store_ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&derived.get_id()).unwrap(); self.set_region( - &store_ctx.lock_manager_notifier, + &store_ctx.coprocessor_host, reader, derived.clone(), RegionChangeReason::Split, @@ -442,6 +439,7 @@ impl Peer { // Now pd only uses ReportBatchSplit for history operation show, // so we send it independently here. self.report_batch_split_pd(store_ctx, regions.to_vec()); + self.add_pending_tick(PeerTick::SplitRegionCheck); } let last_region_id = regions.last().unwrap().get_id(); @@ -552,7 +550,7 @@ impl Peer { let region_id = self.region_id(); if split_init.check_split { - // TODO: check if the last region needs to split again + self.add_pending_tick(PeerTick::SplitRegionCheck); } let _ = store_ctx .router @@ -581,7 +579,6 @@ impl Peer { if off > 0 { // There should be very few elements in the vector. split_trace.drain(..off); - // TODO: save admin_flushed. assert_ne!(admin_flushed, 0); self.storage_mut() .apply_trace_mut() @@ -618,7 +615,6 @@ mod test { store::{new_learner_peer, new_peer}, worker::dummy_scheduler, }; - use txn_types::Key; use super::*; use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; @@ -722,43 +718,6 @@ mod test { } } - #[test] - fn test_propose() { - let logger = slog_global::borrow_global().new(o!()); - - let mut region = Region::default(); - region.set_end_key(b"k10".to_vec()); - - let mut req = AdminRequest::default(); - let err = pre_propose_split(&logger, &mut req, ®ion).unwrap_err(); - assert!( - err.to_string() - .contains("cmd_type is BatchSplit but it doesn't have splits") - ); - - let mut splits = BatchSplitRequest::default(); - req.set_splits(splits.clone()); - let err = pre_propose_split(&logger, &mut req, ®ion).unwrap_err(); - assert!(err.to_string().contains("no valid key found")); - - splits.mut_requests().push(new_split_req(b"", 0, vec![])); - splits.mut_requests().push(new_split_req(b"k03", 0, vec![])); - splits.mut_requests().push(new_split_req(b"k02", 0, vec![])); - splits.mut_requests().push(new_split_req(b"k11", 0, vec![])); - let split_key = Key::from_raw(b"k06"); - let split_key_with_ts = split_key.clone().append_ts(10.into()); - splits - .mut_requests() - .push(new_split_req(split_key_with_ts.as_encoded(), 0, vec![])); - - req.set_splits(splits); - pre_propose_split(&logger, &mut req, ®ion).unwrap(); - let split_reqs = req.get_splits().get_requests(); - assert_eq!(split_reqs.len(), 2); - assert_eq!(split_reqs[0].get_split_key(), b"k03"); - assert_eq!(split_reqs[1].get_split_key(), split_key.as_encoded()); - } - #[test] fn test_split() { let store_id = 2; diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 1c25b363d59..12bd7bbf491 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -35,7 +35,7 @@ use crate::{ router::{CmdResChannel, PeerMsg, PeerTick}, }; -fn get_transfer_leader_cmd(msg: &RaftCmdRequest) -> Option<&TransferLeaderRequest> { +fn transfer_leader_cmd(msg: &RaftCmdRequest) -> Option<&TransferLeaderRequest> { if !msg.has_admin_request() { return None; } @@ -79,7 +79,7 @@ impl Peer { ) -> bool { ctx.raft_metrics.propose.transfer_leader.inc(); - let transfer_leader = get_transfer_leader_cmd(&req).unwrap(); + let transfer_leader = transfer_leader_cmd(&req).unwrap(); let prs = self.raft_group().raft.prs(); // Find the target with the largest matched index among the candidate @@ -108,7 +108,7 @@ impl Peer { _ => peers.choose(&mut rand::thread_rng()).unwrap(), }; - let transferee = if peer.id == self.peer().id { + let transferee = if peer.id == self.peer_id() { false } else { self.pre_transfer_leader(peer) @@ -212,6 +212,7 @@ impl Peer { "peer" => ?from, ); self.raft_group_mut().transfer_leader(from.get_id()); + self.refresh_leader_transferee(); } } } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 116edec91c3..fce01f19277 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -16,6 +16,8 @@ //! - Applied result are sent back to peer fsm, and update memory state in //! `on_apply_res`. +use std::mem; + use engine_traits::{KvEngine, RaftEngine, WriteBatch, WriteOptions}; use kvproto::raft_cmdpb::{ AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, @@ -49,7 +51,9 @@ mod admin; mod control; mod write; -pub use admin::{AdminCmdResult, RequestSplit, SplitInit, SplitResult, SPLIT_PREFIX}; +pub use admin::{ + AdminCmdResult, RequestSplit, SplitFlowControl, SplitInit, SplitResult, SPLIT_PREFIX, +}; pub use control::ProposalControl; pub use write::{ SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, @@ -277,6 +281,9 @@ impl Peer { } } + self.update_split_flow_control(&apply_res.metrics); + self.update_stat(&apply_res.metrics); + self.raft_group_mut() .advance_apply_to(apply_res.applied_index); self.proposal_control_advance_apply(apply_res.applied_index); @@ -483,6 +490,8 @@ impl Apply { if let Err(e) = wb.write_opt(&write_opt) { panic!("failed to write data: {:?}: {:?}", self.logger.list(), e); } + self.metrics.written_bytes += wb.data_size() as u64; + self.metrics.written_keys += wb.count() as u64; if wb.data_size() <= APPLY_WB_SHRINK_SIZE { wb.clear(); } else { @@ -502,6 +511,7 @@ impl Apply { apply_res.applied_term = term; apply_res.admin_result = self.take_admin_result().into_boxed_slice(); apply_res.modifications = *self.modifications_mut(); + apply_res.metrics = mem::take(&mut self.metrics); self.res_reporter().report(apply_res); } } diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 6ea6064a002..51beeee7dea 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -139,6 +139,7 @@ impl Apply { fail::fail_point!("APPLY_PUT", |_| Err(raftstore::Error::Other( "aborted by failpoint".into() ))); + self.metrics.size_diff_hint += (self.key_buffer.len() + value.len()) as i64; self.modifications_mut()[off] = index; Ok(()) } @@ -169,6 +170,7 @@ impl Apply { e ); }); + self.metrics.size_diff_hint -= self.key_buffer.len() as i64; self.modifications_mut()[off] = index; Ok(()) } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 73db4e760d1..59e0e532faa 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -19,10 +19,7 @@ use kvproto::{ metapb::Region, raft_serverpb::{PeerState, RaftMessage}, }; -use raftstore::{ - coprocessor::RegionChangeEvent, - store::{util, WriteTask}, -}; +use raftstore::store::{util, WriteTask}; use slog::{debug, error, info, warn}; use tikv_util::store::find_peer; @@ -229,7 +226,7 @@ impl Store { self.store_id(), region, ctx.engine.clone(), - ctx.read_scheduler.clone(), + ctx.schedulers.read.clone(), &ctx.logger, ) .and_then(|s| PeerFsm::new(&ctx.cfg, &ctx.tablet_registry, s)) @@ -294,11 +291,7 @@ impl Peer { /// /// After destroy is finished, `finish_destroy` should be called to clean up /// memory states. - pub fn start_destroy( - &mut self, - ctx: &mut StoreContext, - write_task: &mut WriteTask, - ) { + pub fn start_destroy(&mut self, write_task: &mut WriteTask) { let entry_storage = self.storage().entry_storage(); if self.postponed_destroy() { return; @@ -326,12 +319,6 @@ impl Peer { lb.put_region_state(region_id, applied_index, ®ion_state) .unwrap(); self.destroy_progress_mut().start(); - - ctx.lock_manager_notifier.on_region_changed( - self.region(), - RegionChangeEvent::Destroy, - self.get_role(), - ); } /// Do clean up for destroy. The peer is permanently destroyed when diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 80443f0ef60..f0a2624203a 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -8,7 +8,7 @@ mod ready; pub use command::{ AdminCmdResult, CommittedEntries, ProposalControl, RequestSplit, SimpleWriteBinary, - SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, + SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, }; pub use life::DestroyProgress; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 1c62c092878..894f39f278b 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -53,7 +53,7 @@ impl Store { // stats.set_query_stats(query_stats); let task = pd::Task::StoreHeartbeat { stats }; - if let Err(e) = ctx.pd_scheduler.schedule(task) { + if let Err(e) = ctx.schedulers.pd.schedule(task) { error!(self.logger(), "notify pd failed"; "store_id" => self.store_id(), "err" => ?e @@ -89,7 +89,7 @@ impl Peer { approximate_keys: None, wait_data_peers: Vec::new(), }); - if let Err(e) = ctx.pd_scheduler.schedule(task) { + if let Err(e) = ctx.schedulers.pd.schedule(task) { error!( self.logger, "failed to notify pd"; @@ -159,7 +159,7 @@ impl Peer { let task = pd::Task::DestroyPeer { region_id: self.region_id(), }; - if let Err(e) = ctx.pd_scheduler.schedule(task) { + if let Err(e) = ctx.schedulers.pd.schedule(task) { error!( self.logger, "failed to notify pd with DestroyPeer"; @@ -182,7 +182,7 @@ impl Peer { right_derive: ctx.cfg.right_derive_when_split, ch, }; - if let Err(e) = ctx.pd_scheduler.schedule(task) { + if let Err(e) = ctx.schedulers.pd.schedule(task) { error!( self.logger, "failed to notify pd with AskBatchSplit"; @@ -198,7 +198,7 @@ impl Peer { regions: Vec, ) { let task = pd::Task::ReportBatchSplit { regions }; - if let Err(e) = ctx.pd_scheduler.schedule(task) { + if let Err(e) = ctx.schedulers.pd.schedule(task) { error!( self.logger, "failed to notify pd with ReportBatchSplit"; @@ -214,7 +214,7 @@ impl Peer { initial_status, txn_ext: self.txn_ext().clone(), }; - if let Err(e) = ctx.pd_scheduler.schedule(task) { + if let Err(e) = ctx.schedulers.pd.schedule(task) { error!( self.logger, "failed to notify pd with UpdateMaxTimestamp"; diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index e89854f39f4..a2707b6d411 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -201,7 +201,7 @@ where ER: RaftEngine, { fn write_senders(&self) -> &WriteSenders { - &self.write_senders + &self.schedulers.write } fn config(&self) -> &Config { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 9463aae3d73..fcab8728916 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -30,7 +30,7 @@ use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::{ - coprocessor::RoleChange, + coprocessor::{RegionChangeEvent, RoleChange}, store::{util, FetchedLogs, ReadProgress, Transport, WriteTask}, }; use slog::{debug, error, trace, warn}; @@ -71,8 +71,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, if self.fsm.peer_mut().tick() { self.fsm.peer_mut().set_has_ready(); } - self.fsm.peer_mut().refresh_lead_transferee(); - self.schedule_tick(PeerTick::Raft); } } @@ -388,7 +386,12 @@ impl Peer { .collect(); } if !self.serving() { - self.start_destroy(ctx, &mut write_task); + self.start_destroy(&mut write_task); + ctx.coprocessor_host.on_region_changed( + self.region(), + RegionChangeEvent::Destroy, + self.raft_group().raft.state, + ); } // Ready number should increase monotonically. assert!(self.async_writer.known_largest_number() < ready.number()); @@ -522,18 +525,18 @@ impl Peer { } _ => {} } - ctx.lock_manager_notifier.on_role_change( + let target = self.refresh_leader_transferee(); + ctx.coprocessor_host.on_role_change( self.region(), RoleChange { state: ss.raft_state, leader_id: ss.leader_id, - prev_lead_transferee: self.lead_transferee(), + prev_lead_transferee: target, vote: self.raft_group().raft.vote, }, ); self.proposal_control_mut().maybe_update_term(term); } - self.refresh_lead_transferee(); } /// If leader commits new admin commands, it may break lease assumption. So diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 6c027517454..8598d1cc41d 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -30,7 +30,7 @@ use std::{ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; -use raft::eraftpb::Snapshot; +use raft::{eraftpb::Snapshot, StateRole}; use raftstore::{ coprocessor::RegionChangeEvent, store::{ @@ -143,6 +143,11 @@ impl Peer { } pub fn on_applied_snapshot(&mut self, ctx: &mut StoreContext) { + ctx.coprocessor_host.on_region_changed( + self.region(), + RegionChangeEvent::Create, + StateRole::Follower, + ); let persisted_index = self.persisted_index(); let first_index = self.storage().entry_storage().first_index(); if first_index == persisted_index + 1 { @@ -153,14 +158,7 @@ impl Peer { // Use a new FlushState to avoid conflicts with the old one. tablet_ctx.flush_state = Some(flush_state); ctx.tablet_registry.load(tablet_ctx, false).unwrap(); - self.schedule_apply_fsm(ctx); - ctx.lock_manager_notifier.on_region_changed( - self.region(), - RegionChangeEvent::Create, - self.get_role(), - ); - self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(persisted_index); { @@ -232,6 +230,11 @@ impl Apply { } impl Storage { + pub fn is_generating_snapshot(&self) -> bool { + let snap_state = self.snap_state_mut(); + matches!(*snap_state, SnapState::Generating { .. }) + } + /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no /// unavailable snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 56379f2a15f..5539de3d617 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -4,7 +4,10 @@ use std::{mem, sync::Arc}; use engine_traits::{CachedTablet, FlushState, KvEngine, TabletRegistry, WriteBatch, DATA_CFS_LEN}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; -use raftstore::store::{fsm::apply::DEFAULT_APPLY_WB_SIZE, ReadTask}; +use raftstore::store::{ + fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, + ReadTask, +}; use slog::Logger; use tikv_util::worker::Scheduler; @@ -46,6 +49,7 @@ pub struct Apply { res_reporter: R, read_scheduler: Scheduler>, + pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, } @@ -81,6 +85,7 @@ impl Apply { res_reporter, flush_state, log_recovery, + metrics: ApplyMetrics::default(), logger, } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 0e38f0dd5a1..25285f289a7 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -15,8 +15,9 @@ use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::Regio use pd_client::BucketStat; use raft::{RawNode, StateRole}; use raftstore::{ - coprocessor::{RegionChangeEvent, RegionChangeReason}, + coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, store::{ + fsm::ApplyMetrics, util::{Lease, RegionReadProgress}, Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, ReadProgress, TxnExt, WriteTask, @@ -27,8 +28,10 @@ use slog::Logger; use super::storage::Storage; use crate::{ batch::StoreContext, - fsm::{ApplyScheduler, LockManagerNotifier}, - operation::{AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteReqEncoder}, + fsm::ApplyScheduler, + operation::{ + AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, + }, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, }; @@ -70,9 +73,6 @@ pub struct Peer { read_progress: Arc, leader_lease: Lease, - /// lead_transferee if this peer(leader) is in a leadership transferring. - lead_transferee: u64, - /// region buckets. region_buckets: Option, last_region_buckets: Option, @@ -88,6 +88,7 @@ pub struct Peer { // Trace which peers have not finished split. split_trace: Vec<(u64, HashSet)>, + split_flow_control: SplitFlowControl, /// Apply related State changes that needs to be persisted to raft engine. /// @@ -95,6 +96,9 @@ pub struct Peer { /// advancing apply index. state_changes: Option>, flush_state: Arc, + + /// lead_transferee if this peer(leader) is in a leadership transferring. + leader_transferee: u64, } impl Peer { @@ -161,11 +165,12 @@ impl Peer { txn_ext: Arc::default(), txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), proposal_control: ProposalControl::new(0), - lead_transferee: raft::INVALID_ID, pending_ticks: Vec::new(), split_trace: vec![], state_changes: None, flush_state, + split_flow_control: SplitFlowControl::default(), + leader_transferee: raft::INVALID_ID, }; // If this region has only one peer and I am the one, campaign directly. @@ -199,7 +204,7 @@ impl Peer { /// has been preserved in a durable device. pub fn set_region( &mut self, - lock_manager_observer: &Arc, + host: &CoprocessorHost, reader: &mut ReadDelegate, region: metapb::Region, reason: RegionChangeReason, @@ -248,10 +253,10 @@ impl Peer { } if self.serving() { - lock_manager_observer.on_region_changed( + host.on_region_changed( self.region(), RegionChangeEvent::Update(reason), - self.get_role(), + self.state_role(), ); } } @@ -351,6 +356,12 @@ impl Peer { &self.self_stat } + #[inline] + pub fn update_stat(&mut self, metrics: &ApplyMetrics) { + self.self_stat.written_bytes += metrics.written_bytes; + self.self_stat.written_keys += metrics.written_keys; + } + /// Mark the peer has a ready so it will be checked at the end of every /// processing round. #[inline] @@ -409,11 +420,6 @@ impl Peer { .cloned() } - #[inline] - pub fn get_role(&self) -> StateRole { - self.raft_group.raft.state - } - #[inline] pub fn update_peer_statistics(&mut self) { if !self.is_leader() { @@ -465,6 +471,11 @@ impl Peer { down_peers } + #[inline] + pub fn state_role(&self) -> StateRole { + self.raft_group.raft.state + } + #[inline] pub fn is_leader(&self) -> bool { self.raft_group.raft.state == StateRole::Leader @@ -660,16 +671,6 @@ impl Peer { .advance_apply(apply_index, term, region); } - #[inline] - pub fn lead_transferee(&self) -> u64 { - self.lead_transferee - } - - #[inline] - pub fn refresh_lead_transferee(&mut self) { - self.lead_transferee = self.raft_group.raft.lead_transferee.unwrap_or_default(); - } - // TODO: find a better place to put all txn related stuff. pub fn require_updating_max_ts(&self, ctx: &StoreContext) { let epoch = self.region().get_region_epoch(); @@ -713,4 +714,20 @@ impl Peer { task.extra_write .merge_v2(Box::into_inner(self.state_changes.take().unwrap())); } + + #[inline] + pub fn split_flow_control_mut(&mut self) -> &mut SplitFlowControl { + &mut self.split_flow_control + } + + #[inline] + pub fn refresh_leader_transferee(&mut self) -> u64 { + mem::replace( + &mut self.leader_transferee, + self.raft_group + .raft + .lead_transferee + .unwrap_or(raft::INVALID_ID), + ) + } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 3f10e08dee2..a03459c96d2 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -1,6 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::{Arc, Mutex}; +use std::{ + borrow::Cow, + sync::{Arc, Mutex}, +}; use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; @@ -10,9 +13,14 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use raftstore::store::{AsyncReadNotifier, FetchedLogs, GenSnapRes, RegionSnapshot}; +use slog::warn; -use super::PeerMsg; -use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; +use super::{CmdResChannel, PeerMsg}; +use crate::{ + batch::StoreRouter, + operation::{LocalReader, RequestSplit}, + StoreMeta, +}; impl AsyncReadNotifier for StoreRouter { fn notify_logs_fetched(&self, region_id: u64, fetched_logs: FetchedLogs) { @@ -24,6 +32,65 @@ impl AsyncReadNotifier for StoreRouter { } } +impl raftstore::coprocessor::StoreHandle for StoreRouter { + fn update_approximate_size(&self, _region_id: u64, _size: u64) { + // TODO + } + + fn update_approximate_keys(&self, _region_id: u64, _keys: u64) { + // TODO + } + + fn ask_split( + &self, + region_id: u64, + region_epoch: kvproto::metapb::RegionEpoch, + split_keys: Vec>, + source: Cow<'static, str>, + ) { + let (ch, _) = CmdResChannel::pair(); + let res = self.send( + region_id, + PeerMsg::RequestSplit { + request: RequestSplit { + epoch: region_epoch, + split_keys, + source, + }, + ch, + }, + ); + if let Err(e) = res { + warn!( + self.logger(), + "failed to send ask split"; + "region_id" => region_id, + "err" => %e, + ); + } + } + + fn refresh_region_buckets( + &self, + _region_id: u64, + _region_epoch: kvproto::metapb::RegionEpoch, + _buckets: Vec, + _bucket_ranges: Option>, + ) { + // TODO + } + + fn update_compute_hash_result( + &self, + _region_id: u64, + _index: u64, + _context: Vec, + _hash: Vec, + ) { + // TODO + } +} + /// A router that routes messages to the raftstore pub struct RaftRouter where diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 67f2dec6160..4c317a22abd 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,5 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use raftstore::store::fsm::ApplyMetrics; + use crate::operation::{AdminCmdResult, CommittedEntries, DataTrace, GenSnapTask}; #[derive(Debug)] @@ -14,4 +16,5 @@ pub struct ApplyRes { pub applied_term: u64, pub admin_result: Box<[AdminCmdResult]>, pub modifications: DataTrace, + pub metrics: ApplyMetrics, } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 4c36f474ea9..faed3c0751d 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -217,7 +217,7 @@ impl PeerMsg { request: RequestSplit { epoch, split_keys, - source: source.into_boxed_str(), + source: source.into(), }, ch, }, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index a454b0aa842..891a97b5d86 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -28,9 +28,9 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use pd_client::RpcClient; -use raft::{eraftpb::MessageType, StateRole}; +use raft::eraftpb::MessageType; use raftstore::{ - coprocessor::{RegionChangeEvent, RoleChange}, + coprocessor::CoprocessorHost, store::{ region_meta::{RegionLocalState, RegionMeta}, Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, @@ -39,7 +39,7 @@ use raftstore::{ use raftstore_v2::{ create_store_batch_system, router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, - Bootstrap, LockManagerNotifier, SimpleWriteEncoder, StateStorage, StoreSystem, + Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, }; use slog::{debug, o, Logger}; use tempfile::TempDir; @@ -47,6 +47,7 @@ use test_pd::mocker::Service; use tikv_util::{ config::{ReadableDuration, VersionTrack}, store::new_peer, + worker::Worker, }; use txn_types::WriteBatchFlags; @@ -224,6 +225,7 @@ pub struct RunningState { pub cfg: Arc>, pub transport: TestTransport, snap_mgr: TabletSnapManager, + background: Worker, } impl RunningState { @@ -278,6 +280,12 @@ impl RunningState { let store_meta = router.store_meta().clone(); let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()); snap_mgr.init().unwrap(); + + let coprocessor_host = CoprocessorHost::new( + router.store_router().clone(), + raftstore::coprocessor::Config::default(), + ); + let background = Worker::new("background"); system .start( store_id, @@ -291,7 +299,8 @@ impl RunningState { snap_mgr.clone(), concurrency_manager, causal_ts_provider, - Arc::new(DummyLockManagerObserver {}), + coprocessor_host, + background.clone(), ) .unwrap(); @@ -303,6 +312,7 @@ impl RunningState { cfg, transport, snap_mgr, + background, }; (TestRouter(router), state) } @@ -311,6 +321,7 @@ impl RunningState { impl Drop for RunningState { fn drop(&mut self) { self.system.shutdown(); + self.background.stop(); } } @@ -574,11 +585,3 @@ impl Drop for Cluster { } } } - -struct DummyLockManagerObserver {} - -impl LockManagerNotifier for DummyLockManagerObserver { - fn on_region_changed(&self, _: &metapb::Region, _: RegionChangeEvent, _: StateRole) {} - - fn on_role_change(&self, _: &metapb::Region, _: RoleChange) {} -} diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index d5bc784857e..3b315a2d943 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -11,6 +11,7 @@ use kvproto::{ use raftstore::store::{INIT_EPOCH_VER, RAFT_INIT_LOG_INDEX}; use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use tikv_util::store::new_peer; +use txn_types::{Key, TimeStamp}; use crate::cluster::{Cluster, TestRouter}; @@ -61,6 +62,7 @@ fn split_region( split_peer: metapb::Peer, left_key: &[u8], right_key: &[u8], + propose_key: &[u8], split_key: &[u8], right_derive: bool, ) -> (metapb::Region, metapb::Region) { @@ -75,7 +77,7 @@ fn split_region( split_id.new_region_id = split_region_id; split_id.new_peer_ids = vec![split_peer.id]; let admin_req = - new_batch_split_region_request(vec![split_key.to_vec()], vec![split_id], right_derive); + new_batch_split_region_request(vec![propose_key.to_vec()], vec![split_id], right_derive); req.mut_requests().clear(); req.set_admin_request(admin_req); @@ -133,7 +135,7 @@ fn test_split() { // Region 1000 ["k22", ""] peer(1, 10) let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); - let (left, right) = split_region( + let (left, mut right) = split_region( router, region, peer.clone(), @@ -142,6 +144,7 @@ fn test_split() { b"k11", b"k33", b"k22", + b"k22", false, ); let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); @@ -174,6 +177,7 @@ fn test_split() { b"k00", b"k11", b"k11", + b"k11", false, ); let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); @@ -205,7 +209,7 @@ fn test_split() { .unwrap() .unwrap(); assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); - let _ = split_region( + right = split_region( router, right, new_peer(store_id, 10), @@ -214,8 +218,10 @@ fn test_split() { b"k22", b"k33", b"k33", + b"k33", false, - ); + ) + .1; let region_state = raft_engine .get_region_state(1000, u64::MAX) .unwrap() @@ -236,6 +242,21 @@ fn test_split() { "{flushed_index} >= {}", region_state.get_tablet_index() ); + + let split_key = Key::from_raw(b"k44").append_ts(TimeStamp::zero()); + let actual_split_key = split_key.clone().truncate_ts().unwrap(); + split_region( + router, + right, + new_peer(store_id, 12), + 1003, + new_peer(store_id, 13), + b"k33", + b"k55", + split_key.as_encoded(), + actual_split_key.as_encoded(), + false, + ); } // TODO: test split race with diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 99228aef44c..794a46b8e3a 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -1,11 +1,11 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] called by Fsm on_ready_compute_hash -use std::{marker::PhantomData, mem, ops::Deref}; +use std::{borrow::Cow, marker::PhantomData, mem, ops::Deref}; use engine_traits::{CfName, KvEngine}; use kvproto::{ - metapb::Region, + metapb::{Region, RegionEpoch}, pdpb::CheckPolicy, raft_cmdpb::{ComputeHashRequest, RaftCmdRequest}, }; @@ -13,8 +13,120 @@ use protobuf::Message; use raft::eraftpb; use tikv_util::box_try; -use super::*; -use crate::store::CasualRouter; +use super::{split_observer::SplitObserver, *}; +use crate::store::BucketRange; + +/// A handle for coprocessor to schedule some command back to raftstore. +pub trait StoreHandle: Clone + Send { + fn update_approximate_size(&self, region_id: u64, size: u64); + fn update_approximate_keys(&self, region_id: u64, keys: u64); + fn ask_split( + &self, + region_id: u64, + region_epoch: RegionEpoch, + split_keys: Vec>, + source: Cow<'static, str>, + ); + fn refresh_region_buckets( + &self, + region_id: u64, + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + ); + fn update_compute_hash_result( + &self, + region_id: u64, + index: u64, + context: Vec, + hash: Vec, + ); +} + +#[derive(Clone, Debug, PartialEq)] +pub enum SchedTask { + UpdateApproximateSize { + region_id: u64, + size: u64, + }, + UpdateApproximateKeys { + region_id: u64, + keys: u64, + }, + AskSplit { + region_id: u64, + region_epoch: RegionEpoch, + split_keys: Vec>, + source: Cow<'static, str>, + }, + RefreshRegionBuckets { + region_id: u64, + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + }, + UpdateComputeHashResult { + region_id: u64, + index: u64, + hash: Vec, + context: Vec, + }, +} + +impl StoreHandle for std::sync::mpsc::SyncSender { + fn update_approximate_size(&self, region_id: u64, size: u64) { + let _ = self.try_send(SchedTask::UpdateApproximateSize { region_id, size }); + } + + fn update_approximate_keys(&self, region_id: u64, keys: u64) { + let _ = self.try_send(SchedTask::UpdateApproximateKeys { region_id, keys }); + } + + fn ask_split( + &self, + region_id: u64, + region_epoch: RegionEpoch, + split_keys: Vec>, + source: Cow<'static, str>, + ) { + let _ = self.try_send(SchedTask::AskSplit { + region_id, + region_epoch, + split_keys, + source, + }); + } + + fn refresh_region_buckets( + &self, + region_id: u64, + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + ) { + let _ = self.try_send(SchedTask::RefreshRegionBuckets { + region_id, + region_epoch, + buckets, + bucket_ranges, + }); + } + + fn update_compute_hash_result( + &self, + region_id: u64, + index: u64, + context: Vec, + hash: Vec, + ) { + let _ = self.try_send(SchedTask::UpdateComputeHashResult { + region_id, + index, + context, + hash, + }); + } +} struct Entry { priority: u32, @@ -339,10 +451,8 @@ where } impl CoprocessorHost { - pub fn new + Clone + Send + 'static>( - ch: C, - cfg: Config, - ) -> CoprocessorHost { + pub fn new(ch: C, cfg: Config) -> CoprocessorHost { + // TODO load coprocessors from configuration let mut registry = Registry::default(); registry.register_split_check_observer( 200, @@ -357,6 +467,7 @@ impl CoprocessorHost { 400, BoxSplitCheckObserver::new(TableCheckObserver::default()), ); + registry.register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); CoprocessorHost { registry, cfg } } diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 7ac783c0d6d..022a44de463 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -33,7 +33,7 @@ pub use self::{ dispatcher::{ BoxAdminObserver, BoxApplySnapshotObserver, BoxCmdObserver, BoxConsistencyCheckObserver, BoxPdTaskObserver, BoxQueryObserver, BoxRegionChangeObserver, BoxRoleObserver, - BoxSplitCheckObserver, BoxUpdateSafeTsObserver, CoprocessorHost, Registry, + BoxSplitCheckObserver, BoxUpdateSafeTsObserver, CoprocessorHost, Registry, StoreHandle, }, error::{Error, Result}, region_info_accessor::{ diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index 8f572eb1f9f..fafa41e44b5 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -140,8 +140,8 @@ mod tests { *, }; use crate::{ - coprocessor::{Config, CoprocessorHost}, - store::{BucketRange, CasualMessage, SplitCheckRunner, SplitCheckTask}, + coprocessor::{dispatcher::SchedTask, Config, CoprocessorHost}, + store::{BucketRange, SplitCheckRunner, SplitCheckTask}, }; #[test] @@ -451,15 +451,11 @@ mod tests { )); loop { - if let Ok(( - _, - CasualMessage::RefreshRegionBuckets { - region_epoch: _, - buckets, - bucket_ranges, - .. - }, - )) = rx.try_recv() + if let Ok(SchedTask::RefreshRegionBuckets { + buckets, + bucket_ranges, + .. + }) = rx.try_recv() { assert_eq!(buckets.len(), bucket_ranges.unwrap().len()); assert_eq!(buckets.len(), 5); diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index e2e58933e57..92e159d233f 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -1,10 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - marker::PhantomData, - sync::{Arc, Mutex}, -}; - use engine_traits::{KvEngine, Range}; use error_code::ErrorCodeExt; use kvproto::{metapb::Region, pdpb::CheckPolicy}; @@ -19,7 +14,7 @@ use super::{ size::get_approximate_split_keys, Host, }; -use crate::store::{CasualMessage, CasualRouter}; +use crate::coprocessor::dispatcher::StoreHandle; pub struct Checker { max_keys_count: u64, @@ -116,29 +111,19 @@ where } #[derive(Clone)] -pub struct KeysCheckObserver { - router: Arc>, - _phantom: PhantomData, +pub struct KeysCheckObserver { + router: C, } -impl, E> KeysCheckObserver -where - E: KvEngine, -{ - pub fn new(router: C) -> KeysCheckObserver { - KeysCheckObserver { - router: Arc::new(Mutex::new(router)), - _phantom: PhantomData, - } +impl KeysCheckObserver { + pub fn new(router: C) -> KeysCheckObserver { + KeysCheckObserver { router } } } -impl Coprocessor for KeysCheckObserver {} +impl Coprocessor for KeysCheckObserver {} -impl + Send, E> SplitCheckObserver for KeysCheckObserver -where - E: KvEngine, -{ +impl SplitCheckObserver for KeysCheckObserver { fn add_checker( &self, ctx: &mut ObserverContext<'_>, @@ -172,15 +157,7 @@ where } }; - let res = CasualMessage::RegionApproximateKeys { keys: region_keys }; - if let Err(e) = self.router.lock().unwrap().send(region_id, res) { - warn!( - "failed to send approximate region keys"; - "region_id" => region_id, - "err" => %e, - "error_code" => %e.error_code(), - ); - } + self.router.update_approximate_keys(region_id, region_keys); REGION_KEYS_HISTOGRAM.observe(region_keys as f64); // if bucket checker using scan is added, to utilize the scan, @@ -253,8 +230,8 @@ mod tests { *, }; use crate::{ - coprocessor::{Config, CoprocessorHost}, - store::{CasualMessage, SplitCheckRunner, SplitCheckTask}, + coprocessor::{dispatcher::SchedTask, Config, CoprocessorHost}, + store::{SplitCheckRunner, SplitCheckTask}, }; fn put_data(engine: &impl KvEngine, mut start_idx: u64, end_idx: u64, fill_short_value: bool) { @@ -323,8 +300,8 @@ mod tests { )); // keys has not reached the max_keys 100 yet. match rx.try_recv() { - Ok((region_id, CasualMessage::RegionApproximateSize { .. })) - | Ok((region_id, CasualMessage::RegionApproximateKeys { .. })) => { + Ok(SchedTask::UpdateApproximateSize { region_id, .. }) + | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { assert_eq!(region_id, region.get_id()); } others => panic!("expect recv empty, but got {:?}", others), @@ -427,8 +404,8 @@ mod tests { )); // keys has not reached the max_keys 100 yet. match rx.try_recv() { - Ok((region_id, CasualMessage::RegionApproximateSize { .. })) - | Ok((region_id, CasualMessage::RegionApproximateKeys { .. })) => { + Ok(SchedTask::UpdateApproximateSize { region_id, .. }) + | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { assert_eq!(region_id, region.get_id()); } others => panic!("expect recv empty, but got {:?}", others), @@ -599,8 +576,8 @@ mod tests { )); // keys has not reached the max_keys 100 yet. match rx.try_recv() { - Ok((region_id, CasualMessage::RegionApproximateSize { .. })) - | Ok((region_id, CasualMessage::RegionApproximateKeys { .. })) => { + Ok(SchedTask::UpdateApproximateSize { region_id, .. }) + | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { assert_eq!(region_id, region.get_id()); } others => panic!("expect recv empty, but got {:?}", others), diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index bdcf817365c..1f4a33d7af7 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -1,10 +1,5 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - marker::PhantomData, - sync::{Arc, Mutex}, -}; - use engine_traits::{KvEngine, Range}; use error_code::ErrorCodeExt; use kvproto::{metapb::Region, pdpb::CheckPolicy}; @@ -17,7 +12,7 @@ use super::{ }, calc_split_keys_count, Host, }; -use crate::store::{CasualMessage, CasualRouter}; +use crate::coprocessor::dispatcher::StoreHandle; pub struct Checker { max_size: u64, @@ -116,29 +111,19 @@ where } #[derive(Clone)] -pub struct SizeCheckObserver { - router: Arc>, - _phantom: PhantomData, +pub struct SizeCheckObserver { + router: C, } -impl, E> SizeCheckObserver -where - E: KvEngine, -{ - pub fn new(router: C) -> SizeCheckObserver { - SizeCheckObserver { - router: Arc::new(Mutex::new(router)), - _phantom: PhantomData, - } +impl SizeCheckObserver { + pub fn new(router: C) -> SizeCheckObserver { + SizeCheckObserver { router } } } -impl Coprocessor for SizeCheckObserver {} +impl Coprocessor for SizeCheckObserver {} -impl + Send, E> SplitCheckObserver for SizeCheckObserver -where - E: KvEngine, -{ +impl SplitCheckObserver for SizeCheckObserver { fn add_checker( &self, ctx: &mut ObserverContext<'_>, @@ -173,15 +158,7 @@ where }; // send it to raftstore to update region approximate size - let res = CasualMessage::RegionApproximateSize { size: region_size }; - if let Err(e) = self.router.lock().unwrap().send(region_id, res) { - warn!( - "failed to send approximate region size"; - "region_id" => region_id, - "err" => %e, - "error_code" => %e.error_code(), - ); - } + self.router.update_approximate_size(region_id, region_size); let need_bucket_checker = host.cfg.enable_region_bucket && region_size >= 2 * host.cfg.region_bucket_size.0; @@ -256,7 +233,7 @@ pub fn get_approximate_split_keys( #[cfg(test)] pub mod tests { - use std::{iter, sync::mpsc, u64}; + use std::{assert_matches::assert_matches, iter, sync::mpsc, u64}; use collections::HashSet; use engine_test::{ @@ -276,30 +253,31 @@ pub mod tests { use super::{Checker, *}; use crate::{ - coprocessor::{Config, CoprocessorHost, ObserverContext, SplitChecker}, - store::{BucketRange, CasualMessage, KeyEntry, SplitCheckRunner, SplitCheckTask}, + coprocessor::{ + dispatcher::SchedTask, Config, CoprocessorHost, ObserverContext, SplitChecker, + }, + store::{BucketRange, KeyEntry, SplitCheckRunner, SplitCheckTask}, }; fn must_split_at_impl( - rx: &mpsc::Receiver<(u64, CasualMessage)>, + rx: &mpsc::Receiver, exp_region: &Region, exp_split_keys: Vec>, ignore_split_keys: bool, ) { loop { match rx.try_recv() { - Ok((region_id, CasualMessage::RegionApproximateSize { .. })) - | Ok((region_id, CasualMessage::RegionApproximateKeys { .. })) => { + Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) + | Ok(SchedTask::UpdateApproximateSize { region_id, .. }) + | Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { assert_eq!(region_id, exp_region.get_id()); } - Ok(( + Ok(SchedTask::AskSplit { region_id, - CasualMessage::SplitRegion { - region_epoch, - split_keys, - .. - }, - )) => { + region_epoch, + split_keys, + .. + }) => { assert_eq!(region_id, exp_region.get_id()); assert_eq!(®ion_epoch, exp_region.get_region_epoch()); if !ignore_split_keys { @@ -307,14 +285,13 @@ pub mod tests { } break; } - Ok((_region_id, CasualMessage::RefreshRegionBuckets { .. })) => {} others => panic!("expect split check result, but got {:?}", others), } } } pub fn must_split_at( - rx: &mpsc::Receiver<(u64, CasualMessage)>, + rx: &mpsc::Receiver, exp_region: &Region, exp_split_keys: Vec>, ) { @@ -322,50 +299,36 @@ pub mod tests { } pub fn must_split_with( - rx: &mpsc::Receiver<(u64, CasualMessage)>, + rx: &mpsc::Receiver, exp_region: &Region, exp_split_keys_count: usize, ) { loop { match rx.try_recv() { - Ok((region_id, CasualMessage::RegionApproximateSize { .. })) - | Ok((region_id, CasualMessage::RegionApproximateKeys { .. })) => { + Ok(SchedTask::UpdateApproximateSize { region_id, .. }) + | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) + | Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { assert_eq!(region_id, exp_region.get_id()); } - Ok(( + Ok(SchedTask::AskSplit { region_id, - CasualMessage::SplitRegion { - region_epoch, - split_keys, - .. - }, - )) => { + region_epoch, + split_keys, + .. + }) => { assert_eq!(region_id, exp_region.get_id()); assert_eq!(®ion_epoch, exp_region.get_region_epoch()); assert_eq!(split_keys.len(), exp_split_keys_count); break; } - Ok((_region_id, CasualMessage::RefreshRegionBuckets { .. })) => {} others => panic!("expect split check result, but got {:?}", others), } } } - pub fn must_generate_buckets( - rx: &mpsc::Receiver<(u64, CasualMessage)>, - exp_buckets_keys: &[Vec], - ) { + pub fn must_generate_buckets(rx: &mpsc::Receiver, exp_buckets_keys: &[Vec]) { loop { - if let Ok(( - _, - CasualMessage::RefreshRegionBuckets { - region_epoch: _, - mut buckets, - bucket_ranges: _, - .. - }, - )) = rx.try_recv() - { + if let Ok(SchedTask::RefreshRegionBuckets { mut buckets, .. }) = rx.try_recv() { let mut i = 0; if !exp_buckets_keys.is_empty() { let bucket = buckets.pop().unwrap(); @@ -383,23 +346,14 @@ pub mod tests { } pub fn must_generate_buckets_approximate( - rx: &mpsc::Receiver<(u64, CasualMessage)>, + rx: &mpsc::Receiver, bucket_range: Option, min_leap: i32, max_leap: i32, mvcc: bool, ) { loop { - if let Ok(( - _, - CasualMessage::RefreshRegionBuckets { - region_epoch: _, - mut buckets, - bucket_ranges: _, - .. - }, - )) = rx.try_recv() - { + if let Ok(SchedTask::RefreshRegionBuckets { mut buckets, .. }) = rx.try_recv() { let bucket_keys = buckets.pop().unwrap().keys; if let Some(bucket_range) = bucket_range { assert!(!bucket_keys.is_empty()); @@ -489,12 +443,7 @@ pub mod tests { None, )); // size has not reached the max_size 100 yet. - match rx.try_recv() { - Ok((region_id, CasualMessage::RegionApproximateSize { .. })) => { - assert_eq!(region_id, region.get_id()); - } - others => panic!("expect recv empty, but got {:?}", others), - } + assert_matches!(rx.try_recv(), Ok(SchedTask::UpdateApproximateSize { region_id, .. }) if region_id == region.get_id()); for i in 7..11 { let s = keys::data_key(format!("{:04}", i).as_bytes()); diff --git a/components/raftstore/src/coprocessor/split_check/table.rs b/components/raftstore/src/coprocessor/split_check/table.rs index 9b5220938fd..684e87e1693 100644 --- a/components/raftstore/src/coprocessor/split_check/table.rs +++ b/components/raftstore/src/coprocessor/split_check/table.rs @@ -238,8 +238,8 @@ mod tests { use super::*; use crate::{ - coprocessor::{Config, CoprocessorHost}, - store::{CasualMessage, SplitCheckRunner, SplitCheckTask}, + coprocessor::{dispatcher::SchedTask, Config, CoprocessorHost}, + store::{SplitCheckRunner, SplitCheckTask}, }; /// Composes table record and index prefix: `t[table_id]`. @@ -353,9 +353,9 @@ mod tests { let key = Key::from_raw(&gen_table_prefix(id)); loop { match rx.try_recv() { - Ok((_, CasualMessage::RegionApproximateSize { .. })) - | Ok((_, CasualMessage::RegionApproximateKeys { .. })) => (), - Ok((_, CasualMessage::SplitRegion { split_keys, .. })) => { + Ok(SchedTask::UpdateApproximateSize { .. }) + | Ok(SchedTask::UpdateApproximateKeys { .. }) => (), + Ok(SchedTask::AskSplit { split_keys, .. }) => { assert_eq!(split_keys, vec![key.into_encoded()]); break; } @@ -365,8 +365,8 @@ mod tests { } else { loop { match rx.try_recv() { - Ok((_, CasualMessage::RegionApproximateSize { .. })) - | Ok((_, CasualMessage::RegionApproximateKeys { .. })) => (), + Ok(SchedTask::UpdateApproximateSize { .. }) + | Ok(SchedTask::UpdateApproximateKeys { .. }) => (), Err(mpsc::TryRecvError::Empty) => { break; } diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index e56678edec2..6104ae7b7cf 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -7,6 +7,7 @@ #![feature(box_patterns)] #![feature(hash_drain_filter)] #![feature(let_chains)] +#![feature(assert_matches)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 1ded8be3886..0f22eb483a0 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -1,10 +1,14 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use std::borrow::Cow; + // #[PerformanceCriticalPath] use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, Snapshot}; -use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; +use error_code::ErrorCodeExt; +use kvproto::{metapb, raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::SnapshotStatus; +use slog_global::warn; use tikv_util::time::ThreadReadId; use crate::{ @@ -276,3 +280,107 @@ impl RaftStoreRouter for RaftRouter { batch_system::Router::broadcast_normal(self, msg_gen) } } + +// Because `CasualRouter` needs an generic while `RaftRotuer` doesn't. We have +// to bridge two by manually implementations. Using functions to reduce +// duplicated codes. + +impl crate::coprocessor::StoreHandle for RaftRouter { + fn update_approximate_size(&self, region_id: u64, size: u64) { + if let Err(e) = CasualRouter::send( + self, + region_id, + CasualMessage::RegionApproximateSize { size }, + ) { + warn!( + "failed to send approximate region size"; + "region_id" => region_id, + "err" => %e, + "error_code" => %e.error_code(), + ); + } + } + + fn update_approximate_keys(&self, region_id: u64, keys: u64) { + if let Err(e) = CasualRouter::send( + self, + region_id, + CasualMessage::RegionApproximateKeys { keys }, + ) { + warn!( + "failed to send approximate region keys"; + "region_id" => region_id, + "err" => %e, + "error_code" => %e.error_code(), + ); + } + } + + fn ask_split( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + split_keys: Vec>, + source: Cow<'static, str>, + ) { + if let Err(e) = CasualRouter::send( + self, + region_id, + CasualMessage::SplitRegion { + region_epoch, + split_keys, + callback: Callback::None, + source, + }, + ) { + warn!( + "failed to send ask split"; + "region_id" => region_id, + "err" => %e, + ); + } + } + + fn update_compute_hash_result( + &self, + region_id: u64, + index: u64, + context: Vec, + hash: Vec, + ) { + if let Err(e) = CasualRouter::send( + self, + region_id, + CasualMessage::ComputeHashResult { + index, + context, + hash, + }, + ) { + warn!( + "failed to send hash compute result"; + "region_id" => region_id, + "err" => %e, + ); + } + } + + fn refresh_region_buckets( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + ) { + let _ = CasualRouter::send( + self, + region_id, + CasualMessage::RefreshRegionBuckets { + region_epoch, + buckets, + bucket_ranges, + cb: Callback::None, + }, + ); + } +} diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 28c0db02eee..3cadcce5a82 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -66,10 +66,7 @@ use time::{self, Timespec}; use crate::{ bytes_capacity, - coprocessor::{ - split_observer::SplitObserver, BoxAdminObserver, CoprocessorHost, RegionChangeEvent, - RegionChangeReason, - }, + coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, store::{ async_io::{ read::{ReadRunner, ReadTask}, @@ -1467,7 +1464,7 @@ impl RaftBatchSystem { mgr: SnapManager, pd_worker: LazyWorker>, store_meta: Arc>, - mut coprocessor_host: CoprocessorHost, + coprocessor_host: CoprocessorHost, importer: Arc, split_check_scheduler: Scheduler, background_worker: Worker, @@ -1480,12 +1477,6 @@ impl RaftBatchSystem { ) -> Result<()> { assert!(self.workers.is_none()); // TODO: we can get cluster meta regularly too later. - - // TODO load coprocessors from configuration - coprocessor_host - .registry - .register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); - let purge_worker = if engines.raft.need_manual_purge() { let worker = Worker::new("purge-worker"); let raft_clone = engines.raft.clone(); diff --git a/components/raftstore/src/store/worker/consistency_check.rs b/components/raftstore/src/store/worker/consistency_check.rs index b3bd7ef32d0..fef2bae332c 100644 --- a/components/raftstore/src/store/worker/consistency_check.rs +++ b/components/raftstore/src/store/worker/consistency_check.rs @@ -9,8 +9,8 @@ use tikv_util::{error, info, warn, worker::Runnable}; use super::metrics::*; use crate::{ - coprocessor::CoprocessorHost, - store::{metrics::*, CasualMessage, CasualRouter}, + coprocessor::{dispatcher::StoreHandle, CoprocessorHost}, + store::metrics::*, }; /// Consistency checking task. @@ -44,12 +44,12 @@ impl Display for Task { } } -pub struct Runner> { +pub struct Runner { router: C, coprocessor_host: CoprocessorHost, } -impl> Runner { +impl Runner { pub fn new(router: C, cop_host: CoprocessorHost) -> Runner { Runner { router, @@ -85,18 +85,8 @@ impl> Runner { for (ctx, sum) in hashes { let mut checksum = Vec::with_capacity(4); checksum.write_u32::(sum).unwrap(); - let msg = CasualMessage::ComputeHashResult { - index, - context: ctx, - hash: checksum, - }; - if let Err(e) = self.router.send(region.get_id(), msg) { - warn!( - "failed to send hash compute result"; - "region_id" => region.get_id(), - "err" => %e, - ); - } + self.router + .update_compute_hash_result(region.get_id(), index, ctx, checksum); } timer.observe_duration(); @@ -106,7 +96,7 @@ impl> Runner { impl Runnable for Runner where EK: KvEngine, - C: CasualRouter, + C: StoreHandle, { type Task = Task; @@ -124,7 +114,7 @@ where #[cfg(test)] mod tests { - use std::{sync::mpsc, time::Duration}; + use std::{assert_matches::assert_matches, sync::mpsc, time::Duration}; use byteorder::{BigEndian, WriteBytesExt}; use engine_test::kv::{new_engine, KvTestEngine}; @@ -135,7 +125,8 @@ mod tests { use super::*; use crate::coprocessor::{ - BoxConsistencyCheckObserver, ConsistencyCheckMethod, RawConsistencyCheckObserver, + dispatcher::SchedTask, BoxConsistencyCheckObserver, ConsistencyCheckMethod, + RawConsistencyCheckObserver, }; #[test] @@ -177,21 +168,8 @@ mod tests { checksum_bytes.write_u32::(sum).unwrap(); let res = rx.recv_timeout(Duration::from_secs(3)).unwrap(); - match res { - ( - region_id, - CasualMessage::ComputeHashResult { - index, - hash, - context, - }, - ) => { - assert_eq!(region_id, region.get_id()); - assert_eq!(index, 10); - assert_eq!(context, vec![0]); - assert_eq!(hash, checksum_bytes); - } - e => panic!("unexpected {:?}", e), - } + assert_matches!(res, SchedTask::UpdateComputeHashResult { region_id, index, hash, context} if + region_id == region.get_id() && index == 10 && context == vec![0] && hash == checksum_bytes + ); } } diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index d1c531070ac..b6bc5fca65f 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -7,15 +7,16 @@ use std::{ mem, }; -use engine_traits::{CfName, IterOptions, Iterable, Iterator, KvEngine, CF_WRITE, LARGE_CFS}; +use engine_traits::{ + CfName, IterOptions, Iterable, Iterator, KvEngine, TabletRegistry, CF_WRITE, LARGE_CFS, +}; use file_system::{IoType, WithIoType}; use itertools::Itertools; -use kvproto::{ - metapb::{Region, RegionEpoch}, - pdpb::CheckPolicy, -}; +use kvproto::{metapb::Region, pdpb::CheckPolicy}; use online_config::{ConfigChange, OnlineConfig}; -use tikv_util::{box_err, debug, error, info, keybuilder::KeyBuilder, warn, worker::Runnable}; +use tikv_util::{ + box_err, debug, error, info, keybuilder::KeyBuilder, warn, worker::Runnable, Either, +}; use txn_types::Key; use super::metrics::*; @@ -23,10 +24,10 @@ use super::metrics::*; use crate::coprocessor::Config; use crate::{ coprocessor::{ + dispatcher::StoreHandle, split_observer::{is_valid_split_key, strip_timestamp_if_exists}, CoprocessorHost, SplitCheckerHost, }, - store::{Callback, CasualMessage, CasualRouter}, Result, }; @@ -131,10 +132,10 @@ where } } -#[derive(Default, Clone, Debug)] +#[derive(Default, Clone, Debug, PartialEq)] pub struct BucketRange(pub Vec, pub Vec); -#[derive(Default, Clone, Debug)] +#[derive(Default, Clone, Debug, PartialEq)] pub struct Bucket { // new proposed split keys under the bucket for split // if it does not need split, it's empty @@ -219,23 +220,30 @@ impl Display for Task { } } -pub struct Runner -where - E: KvEngine, -{ - engine: E, +pub struct Runner { + // We can't just use `TabletRegistry` here, otherwise v1 may create many + // invalid records and cause other problems. + engine: Either>, router: S, - coprocessor: CoprocessorHost, + coprocessor: CoprocessorHost, } -impl Runner -where - E: KvEngine, - S: CasualRouter, -{ - pub fn new(engine: E, router: S, coprocessor: CoprocessorHost) -> Runner { +impl Runner { + pub fn new(engine: EK, router: S, coprocessor: CoprocessorHost) -> Runner { Runner { - engine, + engine: Either::Left(engine), + router, + coprocessor, + } + } + + pub fn with_registry( + registry: TabletRegistry, + router: S, + coprocessor: CoprocessorHost, + ) -> Runner { + Runner { + engine: Either::Right(registry), router, coprocessor, } @@ -243,8 +251,9 @@ where fn approximate_check_bucket( &self, + tablet: &EK, region: &Region, - host: &mut SplitCheckerHost<'_, E>, + host: &mut SplitCheckerHost<'_, EK>, bucket_ranges: Option>, ) -> Result<()> { let ranges = bucket_ranges.clone().unwrap_or_else(|| { @@ -258,7 +267,7 @@ where let mut bucket = region.clone(); bucket.set_start_key(range.0.clone()); bucket.set_end_key(range.1.clone()); - let bucket_entry = host.approximate_bucket_keys(&bucket, &self.engine)?; + let bucket_entry = host.approximate_bucket_keys(&bucket, tablet)?; debug!( "bucket_entry size {} keys count {}", bucket_entry.size, @@ -328,14 +337,11 @@ where region: &Region, bucket_ranges: Option>, ) { - let _ = self.router.send( + self.router.refresh_region_buckets( region.get_id(), - CasualMessage::RefreshRegionBuckets { - region_epoch: region.get_region_epoch().clone(), - buckets, - bucket_ranges, - cb: Callback::None, - }, + region.get_region_epoch().clone(), + buckets, + bucket_ranges, ); } @@ -350,6 +356,20 @@ where policy: CheckPolicy, bucket_ranges: Option>, ) { + let mut cached; + let tablet = match &self.engine { + Either::Left(e) => e, + Either::Right(r) => match r.get(region.get_id()) { + Some(c) => { + cached = Some(c); + match cached.as_mut().unwrap().latest() { + Some(t) => t, + None => return, + } + } + None => return, + }, + }; let region_id = region.get_id(); let is_key_range = start_key.is_some() && end_key.is_some(); let start_key = if is_key_range { @@ -372,9 +392,9 @@ where "policy" => ?policy, ); CHECK_SPILT_COUNTER.all.inc(); - let mut host = - self.coprocessor - .new_split_checker_host(region, &self.engine, auto_split, policy); + let mut host = self + .coprocessor + .new_split_checker_host(region, tablet, auto_split, policy); if host.skip() { debug!("skip split check"; @@ -390,6 +410,7 @@ where CheckPolicy::Scan => { match self.scan_split_keys( &mut host, + tablet, region, is_key_range, &start_key, @@ -408,11 +429,11 @@ where } } } - CheckPolicy::Approximate => match host.approximate_split_keys(region, &self.engine) { + CheckPolicy::Approximate => match host.approximate_split_keys(region, tablet) { Ok(keys) => { if host.enable_region_bucket() { if let Err(e) = - self.approximate_check_bucket(region, &mut host, bucket_ranges) + self.approximate_check_bucket(tablet, region, &mut host, bucket_ranges) { error!(%e; "approximate_check_bucket failed"; @@ -437,6 +458,7 @@ where ); match self.scan_split_keys( &mut host, + tablet, region, is_key_range, &start_key, @@ -461,12 +483,8 @@ where if !split_keys.is_empty() { let region_epoch = region.get_region_epoch().clone(); - let msg = new_split_region(region_epoch, split_keys, "split checker"); - let res = self.router.send(region_id, msg); - if let Err(e) = res { - warn!("failed to send check result"; "region_id" => region_id, "err" => %e); - } - + self.router + .ask_split(region_id, region_epoch, split_keys, "split checker".into()); CHECK_SPILT_COUNTER.success.inc(); } else { debug!( @@ -484,7 +502,8 @@ where /// If it's Some(vec![]), skip generating buckets. fn scan_split_keys( &self, - host: &mut SplitCheckerHost<'_, E>, + host: &mut SplitCheckerHost<'_, EK>, + tablet: &EK, region: &Region, is_key_range: bool, start_key: &[u8], @@ -505,12 +524,8 @@ where (!host.enable_region_bucket(), &empty_bucket) }; - MergedIterator::<::Iterator>::new( - &self.engine, - LARGE_CFS, - start_key, - end_key, - false, + MergedIterator::<::Iterator>::new( + tablet, LARGE_CFS, start_key, end_key, false, ) .map(|mut iter| { let mut size = 0; @@ -595,14 +610,8 @@ where "bucket_count" => buckets.len(), "bucket_size" => bucket_size, ); - let _ = self.router.send( - region.get_id(), - CasualMessage::RegionApproximateSize { size }, - ); - let _ = self.router.send( - region.get_id(), - CasualMessage::RegionApproximateKeys { keys }, - ); + self.router.update_approximate_size(region.get_id(), size); + self.router.update_approximate_keys(region.get_id(), keys); })?; if host.enable_region_bucket() { @@ -632,10 +641,10 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where - E: KvEngine, - S: CasualRouter, + EK: KvEngine, + S: StoreHandle, { type Task = Task; fn run(&mut self, task: Task) { @@ -659,13 +668,28 @@ where Task::ChangeConfig(c) => self.change_cfg(c), Task::ApproximateBuckets(region) => { if self.coprocessor.cfg.enable_region_bucket { + let mut cached; + let tablet = match &self.engine { + Either::Left(e) => e, + Either::Right(r) => match r.get(region.get_id()) { + Some(c) => { + cached = Some(c); + match cached.as_mut().unwrap().latest() { + Some(t) => t, + None => return, + } + } + None => return, + }, + }; let mut host = self.coprocessor.new_split_checker_host( ®ion, - &self.engine, + tablet, false, CheckPolicy::Approximate, ); - if let Err(e) = self.approximate_check_bucket(®ion, &mut host, None) { + if let Err(e) = self.approximate_check_bucket(tablet, ®ion, &mut host, None) + { error!(%e; "approximate_check_bucket failed"; "region_id" => region.get_id(), @@ -678,19 +702,3 @@ where } } } - -fn new_split_region( - region_epoch: RegionEpoch, - split_keys: Vec>, - source: &'static str, -) -> CasualMessage -where - E: KvEngine, -{ - CasualMessage::SplitRegion { - region_epoch, - split_keys, - callback: Callback::None, - source: source.into(), - } -} diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index a9a31c68b8f..9583df80dd6 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -28,7 +28,6 @@ use raftstore::{ }, store::util::is_region_initialized, }; -use raftstore_v2::LockManagerNotifier; use security::SecurityManager; use tikv_util::{ future::paired_future_callback, @@ -525,7 +524,7 @@ const LEADER_KEY: &[u8] = b""; /// way to change the node from the leader of deadlock detector to follower, and /// vice versa. #[derive(Clone)] -pub struct RoleChangeNotifier { +pub(crate) struct RoleChangeNotifier { /// The id of the valid leader region. // raftstore.coprocessor needs it to be Sync + Send. leader_region_id: Arc>, @@ -607,18 +606,6 @@ impl RegionChangeObserver for RoleChangeNotifier { } } -impl LockManagerNotifier for RoleChangeNotifier { - fn on_role_change(&self, region: &Region, role_change: RoleChange) { - let mut ctx = ObserverContext::new(region); - RoleObserver::on_role_change(self, &mut ctx, &role_change); - } - - fn on_region_changed(&self, region: &Region, event: RegionChangeEvent, role: StateRole) { - let mut ctx = ObserverContext::new(region); - RegionChangeObserver::on_region_changed(self, &mut ctx, event, role); - } -} - struct Inner { /// The role of the deadlock detector. Default is `Role::Follower`. role: Role, diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index 44c31fcab1e..243d533a0e5 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -205,11 +205,6 @@ impl LockManager { role_change_notifier.register(host); } - /// Creates a `RoleChangeNotifier` of the deadlock detector worker - pub fn new_notifier(&self) -> RoleChangeNotifier { - RoleChangeNotifier::new(self.detector_scheduler.clone()) - } - /// Creates a `DeadlockService` to handle deadlock detect requests from /// other nodes. pub fn deadlock_service(&self) -> DeadlockService { diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 57bc575ff05..b3a445a1f7e 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -7,8 +7,11 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; use kvproto::{metapb, replication_modepb::ReplicationStatus}; use pd_client::PdClient; -use raftstore::store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}; -use raftstore_v2::{router::RaftRouter, Bootstrap, LockManagerNotifier, StoreSystem}; +use raftstore::{ + coprocessor::CoprocessorHost, + store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}, +}; +use raftstore_v2::{router::RaftRouter, Bootstrap, StoreSystem}; use slog::{info, o, Logger}; use tikv_util::{config::VersionTrack, worker::Worker}; @@ -85,7 +88,8 @@ where snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 - lock_manager_observer: Arc, + coprocessor_host: CoprocessorHost, + background: Worker, ) -> Result<()> where T: Transport + 'static, @@ -126,7 +130,8 @@ where snap_mgr, concurrency_manager, causal_ts_provider, - lock_manager_observer, + coprocessor_host, + background, )?; Ok(()) @@ -173,7 +178,8 @@ where snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 - lock_manager_observer: Arc, + coprocessor_host: CoprocessorHost, + background: Worker, ) -> Result<()> where T: Transport + 'static, @@ -199,7 +205,8 @@ where snap_mgr, concurrency_manager, causal_ts_provider, - lock_manager_observer, + coprocessor_host, + background, )?; Ok(()) } From a499caf0d809e530f46ac8125ae07817328145d0 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 22 Dec 2022 19:34:55 +0800 Subject: [PATCH 0423/1149] import: log L0 SST ranges when too many files in ingest (#13979) close tikv/tikv#13980 Signed-off-by: lance6716 Co-authored-by: Ti Chi Robot --- components/engine_panic/src/misc.rs | 4 ++ components/engine_rocks/src/misc.rs | 87 +++++++++++++++++++++++++++- components/engine_traits/src/misc.rs | 2 + src/import/sst_service.rs | 11 ++++ 4 files changed, 103 insertions(+), 1 deletion(-) diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 730f44a7e2f..561d2892ca9 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -48,6 +48,10 @@ impl MiscExt for PanicEngine { panic!() } + fn get_sst_key_ranges(&self, cf: &str, level: usize) -> Result, Vec)>> { + panic!() + } + fn get_engine_used_size(&self) -> Result { panic!() } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 4761183546e..75b193bdcf9 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -238,6 +238,24 @@ impl MiscExt for RocksEngine { Ok(false) } + fn get_sst_key_ranges(&self, cf: &str, level: usize) -> Result, Vec)>> { + let handle = util::get_cf_handle(self.as_inner(), cf)?; + let ret = self + .as_inner() + .get_column_family_meta_data(handle) + .get_level(level) + .get_files() + .iter() + .map(|sst_meta| { + ( + sst_meta.get_smallestkey().to_vec(), + sst_meta.get_largestkey().to_vec(), + ) + }) + .collect(); + Ok(ret) + } + fn get_engine_used_size(&self) -> Result { let mut used_size: u64 = 0; for cf in ALL_CFS { @@ -333,7 +351,8 @@ impl MiscExt for RocksEngine { #[cfg(test)] mod tests { use engine_traits::{ - DeleteStrategy, Iterable, Iterator, Mutable, SyncMutable, WriteBatchExt, ALL_CFS, + CompactExt, DeleteStrategy, Iterable, Iterator, Mutable, SyncMutable, WriteBatchExt, + ALL_CFS, }; use tempfile::Builder; @@ -581,4 +600,70 @@ mod tests { .unwrap(); check_data(&db, &[cf], kvs_left.as_slice()); } + + #[test] + fn test_get_sst_key_ranges() { + let path = Builder::new() + .prefix("test_get_sst_key_ranges") + .tempdir() + .unwrap(); + let path_str = path.path().to_str().unwrap(); + + let mut opts = RocksDbOptions::default(); + opts.create_if_missing(true); + opts.enable_multi_batch_write(true); + + let mut cf_opts = RocksCfOptions::default(); + // Prefix extractor(trim the timestamp at tail) for write cf. + cf_opts + .set_prefix_extractor( + "FixedSuffixSliceTransform", + crate::util::FixedSuffixSliceTransform::new(8), + ) + .unwrap_or_else(|err| panic!("{:?}", err)); + // Create prefix bloom filter for memtable. + cf_opts.set_memtable_prefix_bloom_size_ratio(0.1_f64); + let cf = "default"; + let db = new_engine_opt(path_str, opts, vec![(cf, cf_opts)]).unwrap(); + let mut wb = db.write_batch(); + let kvs: Vec<(&[u8], &[u8])> = vec![ + (b"k1", b"v1"), + (b"k2", b"v2"), + (b"k6", b"v3"), + (b"k7", b"v4"), + ]; + + for &(k, v) in kvs.as_slice() { + wb.put_cf(cf, k, v).unwrap(); + } + wb.write().unwrap(); + + db.flush_cf(cf, true).unwrap(); + let sst_range = db.get_sst_key_ranges(cf, 0).unwrap(); + let expected = vec![(b"k1".to_vec(), b"k7".to_vec())]; + assert_eq!(sst_range, expected); + + let mut wb = db.write_batch(); + let kvs: Vec<(&[u8], &[u8])> = vec![(b"k3", b"v1"), (b"k4", b"v2"), (b"k8", b"v3")]; + + for &(k, v) in kvs.as_slice() { + wb.put_cf(cf, k, v).unwrap(); + } + wb.write().unwrap(); + + db.flush_cf(cf, true).unwrap(); + let sst_range = db.get_sst_key_ranges(cf, 0).unwrap(); + let expected = vec![ + (b"k3".to_vec(), b"k8".to_vec()), + (b"k1".to_vec(), b"k7".to_vec()), + ]; + assert_eq!(sst_range, expected); + + db.compact_range(cf, None, None, false, 1).unwrap(); + let sst_range = db.get_sst_key_ranges(cf, 0).unwrap(); + assert_eq!(sst_range.len(), 0); + let sst_range = db.get_sst_key_ranges(cf, 1).unwrap(); + let expected = vec![(b"k1".to_vec(), b"k8".to_vec())]; + assert_eq!(sst_range, expected); + } } diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index edfea511d35..a7679256f21 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -84,6 +84,8 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn ingest_maybe_slowdown_writes(&self, cf: &str) -> Result; + fn get_sst_key_ranges(&self, cf: &str, level: usize) -> Result, Vec)>>; + /// Gets total used size of rocksdb engine, including: /// * total size (bytes) of all SST files. /// * total size (bytes) of active and unflushed immutable memtables. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index bdb552e8923..8ce6f9961fb 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -185,6 +185,17 @@ where .ingest_maybe_slowdown_writes(CF_WRITE) .expect("cf") { + match self.engine.get_sst_key_ranges(CF_WRITE, 0) { + Ok(l0_sst_ranges) => { + warn!( + "sst ingest is too slow"; + "sst_ranges" => ?l0_sst_ranges, + ); + } + Err(e) => { + error!("get sst key ranges failed"; "err" => ?e); + } + } let mut errorpb = errorpb::Error::default(); let err = "too many sst files are ingesting"; let mut server_is_busy_err = errorpb::ServerIsBusy::default(); From 90505f52857faf7ac993c8ae493bce5b2fdc270d Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 23 Dec 2022 13:34:14 +0800 Subject: [PATCH 0424/1149] *: support start with raftkv2 (#13981) ref tikv/tikv#12842 Not all functionality are supported, this is just a naive pure KV system with transaction support. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + cmd/tikv-server/src/main.rs | 10 +- components/raftstore-v2/src/batch/store.rs | 40 +- components/raftstore-v2/src/lib.rs | 1 + components/raftstore-v2/src/worker/pd/mod.rs | 30 +- .../src/worker/pd/update_max_timestamp.rs | 3 +- .../tests/integrations/cluster.rs | 4 +- components/raftstore/src/store/snap.rs | 29 + components/server/Cargo.toml | 1 + components/server/src/lib.rs | 1 + components/server/src/server.rs | 12 +- components/server/src/server2.rs | 1759 +++++++++++++++++ components/test_raftstore/src/server.rs | 8 +- components/tikv_util/src/sys/mod.rs | 19 +- src/server/raftkv2/node.rs | 75 +- src/server/server.rs | 44 +- src/server/service/kv.rs | 7 +- src/server/status_server/mod.rs | 61 +- tests/integrations/server/status_server.rs | 8 +- 19 files changed, 1980 insertions(+), 133 deletions(-) create mode 100644 components/server/src/server2.rs diff --git a/Cargo.lock b/Cargo.lock index 67ca50ba1ec..cf53d09da09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5226,6 +5226,7 @@ dependencies = [ "raft", "raft_log_engine", "raftstore", + "raftstore-v2", "rand 0.8.5", "resolved_ts", "resource_metering", diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index b366cd7849f..1d846d72bdb 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -7,7 +7,10 @@ use std::{path::Path, process}; use clap::{crate_authors, App, Arg}; use serde_json::{Map, Value}; use server::setup::{ensure_no_unrecognized_config, validate_and_persist_config}; -use tikv::config::{to_flatten_config_info, TikvConfig}; +use tikv::{ + config::{to_flatten_config_info, TikvConfig}, + storage::config::EngineType, +}; fn main() { let build_timestamp = option_env!("TIKV_BUILD_TIME"); @@ -207,5 +210,8 @@ fn main() { process::exit(0); } - server::server::run_tikv(config); + match config.storage.engine { + EngineType::RaftKv => server::server::run_tikv(config), + EngineType::RaftKv2 => server::server2::run_tikv(config), + } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 642f6e745f0..0d5f984107c 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -35,7 +35,7 @@ use tikv_util::{ sys::SysQuota, time::Instant as TiInstant, timer::SteadyTimer, - worker::{Scheduler, Worker}, + worker::{LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, }; @@ -373,7 +373,7 @@ pub struct Schedulers { struct Workers { /// Worker for fetching raft logs asynchronously async_read: Worker, - pd: Worker, + pd: LazyWorker, async_write: StoreWriters, // Following is not maintained by raftstore itself. @@ -381,10 +381,10 @@ struct Workers { } impl Workers { - fn new(background: Worker) -> Self { + fn new(background: Worker, pd: LazyWorker) -> Self { Self { async_read: Worker::new("async-read-worker"), - pd: Worker::new("pd-worker"), + pd, async_write: StoreWriters::default(), background, } @@ -415,6 +415,7 @@ impl StoreSystem { causal_ts_provider: Option>, // used for rawkv apiv2 coprocessor_host: CoprocessorHost, background: Worker, + pd_worker: LazyWorker, ) -> Result<()> where T: Transport + 'static, @@ -428,7 +429,7 @@ impl StoreSystem { .broadcast_normal(|| PeerMsg::Tick(PeerTick::PdHeartbeat)); }); - let mut workers = Workers::new(background); + let mut workers = Workers::new(background, pd_worker); workers .async_write .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; @@ -437,21 +438,18 @@ impl StoreSystem { read_runner.set_snap_mgr(snap_mgr.clone()); let read_scheduler = workers.async_read.start("async-read-worker", read_runner); - let pd_scheduler = workers.pd.start( - "pd-worker", - pd::Runner::new( - store_id, - pd_client, - raft_engine.clone(), - tablet_registry.clone(), - router.clone(), - workers.pd.remote(), - concurrency_manager, - causal_ts_provider, - self.logger.clone(), - self.shutdown.clone(), - ), - ); + workers.pd.start(pd::Runner::new( + store_id, + pd_client, + raft_engine.clone(), + tablet_registry.clone(), + router.clone(), + workers.pd.remote(), + concurrency_manager, + causal_ts_provider, + self.logger.clone(), + self.shutdown.clone(), + )); let split_check_scheduler = workers.background.start( "split-check", @@ -464,7 +462,7 @@ impl StoreSystem { let schedulers = Schedulers { read: read_scheduler, - pd: pd_scheduler, + pd: workers.pd.scheduler(), write: workers.async_write.senders(), split_check: split_check_scheduler, }; diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index d8327549da6..7ddb1687d91 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -41,3 +41,4 @@ pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; +pub use worker::pd::{FlowReporter, Task as PdTask}; diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 18b01a8026a..cc977e68236 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -11,9 +11,12 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{metapb, pdpb}; use pd_client::PdClient; -use raftstore::store::{util::KeysInfoFormatter, TxnExt}; +use raftstore::store::{util::KeysInfoFormatter, FlowStatsReporter, ReadStats, TxnExt, WriteStats}; use slog::{error, info, Logger}; -use tikv_util::{time::UnixSecs, worker::Runnable}; +use tikv_util::{ + time::UnixSecs, + worker::{Runnable, Scheduler}, +}; use yatp::{task::future::TaskCell, Remote}; use crate::{ @@ -206,6 +209,29 @@ where } } +#[derive(Clone)] +pub struct FlowReporter { + _scheduler: Scheduler, +} + +impl FlowReporter { + pub fn new(scheduler: Scheduler) -> Self { + FlowReporter { + _scheduler: scheduler, + } + } +} + +impl FlowStatsReporter for FlowReporter { + fn report_read_stats(&self, _read_stats: ReadStats) { + // TODO + } + + fn report_write_stats(&self, _write_stats: WriteStats) { + // TODO + } +} + mod requests { use kvproto::raft_cmdpb::{ AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, diff --git a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs b/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs index cbfecb8171d..0de3fb9a87c 100644 --- a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs +++ b/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs @@ -7,7 +7,6 @@ use std::{ use causal_ts::CausalTsProvider; use engine_traits::{KvEngine, RaftEngine}; -use fail::fail_point; use futures::{compat::Future01CompatExt, FutureExt}; use pd_client::PdClient; use raftstore::{store::TxnExt, Result}; @@ -96,7 +95,7 @@ where #[cfg(feature = "failpoints")] let delay = (|| { - fail_point!("delay_update_max_ts", |_| true); + fail::fail_point!("delay_update_max_ts", |_| true); false })(); #[cfg(not(feature = "failpoints"))] diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 891a97b5d86..ca166eab950 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -47,7 +47,7 @@ use test_pd::mocker::Service; use tikv_util::{ config::{ReadableDuration, VersionTrack}, store::new_peer, - worker::Worker, + worker::{LazyWorker, Worker}, }; use txn_types::WriteBatchFlags; @@ -286,6 +286,7 @@ impl RunningState { raftstore::coprocessor::Config::default(), ); let background = Worker::new("background"); + let pd_worker = LazyWorker::new("pd-worker"); system .start( store_id, @@ -301,6 +302,7 @@ impl RunningState { causal_ts_provider, coprocessor_host, background.clone(), + pd_worker, ) .unwrap(); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 8cb44e3718c..04aef985e3b 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1984,6 +1984,35 @@ impl TabletSnapManager { true } } + + pub fn total_snap_size(&self) -> Result { + let mut total_size = 0; + for entry in file_system::read_dir(&self.base)? { + let entry = match entry { + Ok(e) => e, + Err(e) if e.kind() == ErrorKind::NotFound => continue, + Err(e) => return Err(Error::from(e)), + }; + + let path = entry.path(); + // Generated snapshots are just checkpoints, only counts received snapshots. + if !path + .file_name() + .and_then(|n| n.to_str()) + .map_or(true, |n| n.starts_with(SNAP_REV_PREFIX)) + { + continue; + } + for e in file_system::read_dir(path)? { + match e.and_then(|e| e.metadata()) { + Ok(m) => total_size += m.len(), + Err(e) if e.kind() == ErrorKind::NotFound => continue, + Err(e) => return Err(Error::from(e)), + } + } + } + Ok(total_size) + } } #[cfg(test)] diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index b27846ad5a3..acdca09b29c 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -66,6 +66,7 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } +raftstore-v2 = { workspace = true } rand = "0.8" resolved_ts = { workspace = true } resource_metering = { workspace = true } diff --git a/components/server/src/lib.rs b/components/server/src/lib.rs index 5107a20eeab..d5c8e352a88 100644 --- a/components/server/src/lib.rs +++ b/components/server/src/lib.rs @@ -12,4 +12,5 @@ pub mod setup; pub mod memory; pub mod raft_engine_switch; pub mod server; +pub mod server2; pub mod signal_handler; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ffc5272c673..d7a05fff115 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -123,6 +123,7 @@ use tikv_util::{ thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, + Either, }; use tokio::runtime::Builder; @@ -959,9 +960,9 @@ where ), coprocessor_v2::Endpoint::new(&self.config.coprocessor_v2), self.resolver.clone().unwrap(), - snap_mgr.clone(), + Either::Left(snap_mgr.clone()), gc_worker.clone(), - check_leader_scheduler, + Some(check_leader_scheduler), self.env.clone(), unified_read_pool, debug_thread_pool, @@ -1649,7 +1650,7 @@ where self.config.server.status_thread_pool_size, self.cfg_controller.take().unwrap(), Arc::new(self.config.security.clone()), - self.router.clone(), + self.engines.as_ref().unwrap().engine.raft_extension(), self.store_path.clone(), ) { Ok(status_server) => Box::new(status_server), @@ -1951,13 +1952,12 @@ fn get_lock_dir() -> String { /// A small trait for components which can be trivially stopped. Lets us keep /// a list of these in `TiKV`, rather than storing each component individually. -trait Stop { +pub(crate) trait Stop { fn stop(self: Box); } -impl Stop for StatusServer +impl Stop for StatusServer where - E: 'static, R: 'static + Send, { fn stop(self: Box) { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs new file mode 100644 index 00000000000..cfda8feb233 --- /dev/null +++ b/components/server/src/server2.rs @@ -0,0 +1,1759 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module startups all the components of a TiKV server. +//! +//! It is responsible for reading from configs, starting up the various server +//! components, and handling errors (mostly by aborting and reporting to the +//! user). +//! +//! The entry point is `run_tikv`. +//! +//! Components are often used to initialize other components, and/or must be +//! explicitly stopped. We keep these components in the `TikvServer` struct. + +use std::{ + cmp, + collections::HashMap, + env, + net::SocketAddr, + path::{Path, PathBuf}, + str::FromStr, + sync::{ + atomic::{AtomicU32, AtomicU64, Ordering}, + mpsc, Arc, + }, + time::Duration, + u64, +}; + +use api_version::{dispatch_api_version, KvFormat}; +use causal_ts::CausalTsProviderImpl; +use concurrency_manager::ConcurrencyManager; +use encryption_export::{data_key_manager_from_config, DataKeyManager}; +use engine_rocks::{ + flush_engine_statistics, + raw::{Cache, Env}, + FlowInfo, RocksEngine, RocksStatistics, +}; +use engine_traits::{ + CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, + RaftEngine, StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, +}; +use error_code::ErrorCodeExt; +use file_system::{ + get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor, + MetricsManager as IoMetricsManager, +}; +use futures::executor::block_on; +use grpcio::{EnvBuilder, Environment}; +use grpcio_health::HealthService; +use kvproto::{deadlock::create_deadlock, diagnosticspb::create_diagnostics, kvrpcpb::ApiVersion}; +use pd_client::{PdClient, RpcClient}; +use raft_log_engine::RaftLogEngine; +use raftstore::{ + coprocessor::{ + BoxConsistencyCheckObserver, ConsistencyCheckMethod, CoprocessorHost, + RawConsistencyCheckObserver, + }, + store::{memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, SplitConfigManager, TabletSnapManager}, + RegionInfoAccessor, +}; +use security::SecurityManager; +use tikv::{ + config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, + coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, + coprocessor_v2, + read_pool::{build_yatp_read_pool, ReadPool}, + server::{ + config::{Config as ServerConfig, ServerConfigManager}, + gc_worker::{AutoGcConfig, GcWorker}, + lock_manager::LockManager, + raftkv::ReplicaReadLockChecker, + resolve, + service::DiagnosticsService, + status_server::StatusServer, + KvEngineFactoryBuilder, NodeV2, RaftKv2, Server, CPU_CORES_QUOTA_GAUGE, DEFAULT_CLUSTER_ID, + GRPC_THREAD_PREFIX, + }, + storage::{ + self, + config_manager::StorageConfigManger, + mvcc::MvccConsistencyCheckObserver, + txn::flow_controller::{FlowController, TabletFlowController}, + Engine, Storage, + }, +}; +use tikv_util::{ + check_environment_variables, + config::{ensure_dir_exist, RaftDataStateMachine, VersionTrack}, + math::MovingAvgU32, + metrics::INSTANCE_BACKEND_CPU_QUOTA, + quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, + sys::{ + cpu_time::ProcessStat, disk, path_in_diff_mount_point, register_memory_usage_high_water, + SysQuota, + }, + thread_group::GroupProperties, + time::{Instant, Monitor}, + worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, + Either, +}; +use tokio::runtime::Builder; + +use crate::{ + memory::*, raft_engine_switch::*, server::Stop, setup::*, signal_handler, + tikv_util::sys::thread::ThreadBuildWrapper, +}; + +// minimum number of core kept for background requests +const BACKGROUND_REQUEST_CORE_LOWER_BOUND: f64 = 1.0; +// max ratio of core quota for background requests +const BACKGROUND_REQUEST_CORE_MAX_RATIO: f64 = 0.95; +// default ratio of core quota for background requests = core_number * 0.5 +const BACKGROUND_REQUEST_CORE_DEFAULT_RATIO: f64 = 0.5; +// indication of TiKV instance is short of cpu +const SYSTEM_BUSY_THRESHOLD: f64 = 0.80; +// indication of TiKV instance in healthy state when cpu usage is in [0.5, 0.80) +const SYSTEM_HEALTHY_THRESHOLD: f64 = 0.50; +// pace of cpu quota adjustment +const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu + +#[inline] +fn run_impl(config: TikvConfig) { + let mut tikv = TikvServer::::init::(config); + + // Must be called after `TikvServer::init`. + let memory_limit = tikv.config.memory_usage_limit.unwrap().0; + let high_water = (tikv.config.memory_usage_high_water * memory_limit as f64) as u64; + register_memory_usage_high_water(high_water); + + tikv.check_conflict_addr(); + tikv.init_fs(); + tikv.init_yatp(); + tikv.init_encryption(); + let fetcher = tikv.init_io_utility(); + let listener = tikv.init_flow_receiver(); + let (raft_engine, engines_info) = tikv.init_raw_engines(listener); + tikv.init_engines(raft_engine); + let server_config = tikv.init_servers::(); + tikv.register_services(); + tikv.init_metrics_flusher(fetcher, engines_info); + tikv.init_storage_stats_task(); + tikv.run_server(server_config); + tikv.run_status_server(); + tikv.init_quota_tuning_task(tikv.quota_limiter.clone()); + + // TODO: support signal dump stats + signal_handler::wait_for_signal( + None as Option>, + tikv.kv_statistics.clone(), + tikv.raft_statistics.clone(), + ); + tikv.stop(); +} + +/// Run a TiKV server. Returns when the server is shutdown by the user, in which +/// case the server will be properly stopped. +pub fn run_tikv(config: TikvConfig) { + // Sets the global logger ASAP. + // It is okay to use the config w/o `validate()`, + // because `initial_logger()` handles various conditions. + initial_logger(&config); + + // Print version information. + let build_timestamp = option_env!("TIKV_BUILD_TIME"); + tikv::log_tikv_info(build_timestamp); + + // Print resource quota. + SysQuota::log_quota(); + CPU_CORES_QUOTA_GAUGE.set(SysQuota::cpu_cores_quota()); + + // Do some prepare works before start. + pre_start(); + + let _m = Monitor::default(); + + dispatch_api_version!(config.storage.api_version(), { + if !config.raft_engine.enable { + run_impl::(config) + } else { + run_impl::(config) + } + }) +} + +const RESERVED_OPEN_FDS: u64 = 1000; + +const DEFAULT_METRICS_FLUSH_INTERVAL: Duration = Duration::from_millis(10_000); +const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); +const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); +const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); +const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); + +/// A complete TiKV server. +struct TikvServer { + config: TikvConfig, + cfg_controller: Option, + security_mgr: Arc, + pd_client: Arc, + flow_info_sender: Option>, + flow_info_receiver: Option>, + node: Option>, + resolver: Option, + store_path: PathBuf, + snap_mgr: Option, // Will be filled in `init_servers`. + encryption_key_manager: Option>, + engines: Option>, + kv_statistics: Option>, + raft_statistics: Option>, + servers: Option>, + region_info_accessor: Option, + coprocessor_host: Option>, + to_stop: Vec>, + lock_files: Vec, + concurrency_manager: ConcurrencyManager, + env: Arc, + background_worker: Worker, + sst_worker: Option>>, + quota_limiter: Arc, + causal_ts_provider: Option>, // used for rawkv apiv2 + tablet_registry: Option>, +} + +struct TikvEngines { + raft_engine: ER, + engine: RaftKv2, +} + +struct Servers { + lock_mgr: LockManager, + server: LocalServer, +} + +type LocalServer = Server>; + +impl TikvServer +where + ER: RaftEngine, +{ + fn init(mut config: TikvConfig) -> TikvServer { + tikv_util::thread_group::set_properties(Some(GroupProperties::default())); + // It is okay use pd config and security config before `init_config`, + // because these configs must be provided by command line, and only + // used during startup process. + let security_mgr = Arc::new( + SecurityManager::new(&config.security) + .unwrap_or_else(|e| fatal!("failed to create security manager: {}", e)), + ); + let env = Arc::new( + EnvBuilder::new() + .cq_count(config.server.grpc_concurrency) + .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) + .build(), + ); + let pd_client = + Self::connect_to_pd_cluster(&mut config, env.clone(), Arc::clone(&security_mgr)); + + // Initialize and check config + let cfg_controller = Self::init_config(config); + let config = cfg_controller.get_current(); + + let store_path = Path::new(&config.storage.data_dir).to_owned(); + + let thread_count = config.server.background_thread_count; + let background_worker = WorkerBuilder::new("background") + .thread_count(thread_count) + .create(); + + // Initialize concurrency manager + let latest_ts = block_on(pd_client.get_tso()).expect("failed to get timestamp from PD"); + let concurrency_manager = ConcurrencyManager::new(latest_ts); + + // use different quota for front-end and back-end requests + let quota_limiter = Arc::new(QuotaLimiter::new( + config.quota.foreground_cpu_time, + config.quota.foreground_write_bandwidth, + config.quota.foreground_read_bandwidth, + config.quota.background_cpu_time, + config.quota.background_write_bandwidth, + config.quota.background_read_bandwidth, + config.quota.max_delay_duration, + config.quota.enable_auto_tune, + )); + + let mut causal_ts_provider = None; + if let ApiVersion::V2 = F::TAG { + let tso = block_on(causal_ts::BatchTsoProvider::new_opt( + pd_client.clone(), + config.causal_ts.renew_interval.0, + config.causal_ts.alloc_ahead_buffer.0, + config.causal_ts.renew_batch_min_size, + config.causal_ts.renew_batch_max_size, + )); + if let Err(e) = tso { + fatal!("Causal timestamp provider initialize failed: {:?}", e); + } + causal_ts_provider = Some(Arc::new(tso.unwrap().into())); + info!("Causal timestamp provider startup."); + } + + TikvServer { + config, + cfg_controller: Some(cfg_controller), + security_mgr, + pd_client, + node: None, + resolver: None, + store_path, + snap_mgr: None, + encryption_key_manager: None, + engines: None, + kv_statistics: None, + raft_statistics: None, + servers: None, + region_info_accessor: None, + coprocessor_host: None, + to_stop: vec![], + lock_files: vec![], + concurrency_manager, + env, + background_worker, + flow_info_sender: None, + flow_info_receiver: None, + sst_worker: None, + quota_limiter, + causal_ts_provider, + tablet_registry: None, + } + } + + /// Initialize and check the config + /// + /// Warnings are logged and fatal errors exist. + /// + /// # Fatal errors + /// + /// - If `dynamic config` feature is enabled and failed to register config + /// to PD + /// - If some critical configs (like data dir) are differrent from last run + /// - If the config can't pass `validate()` + /// - If the max open file descriptor limit is not high enough to support + /// the main database and the raft database. + fn init_config(mut config: TikvConfig) -> ConfigController { + validate_and_persist_config(&mut config, true); + + ensure_dir_exist(&config.storage.data_dir).unwrap(); + if !config.rocksdb.wal_dir.is_empty() { + ensure_dir_exist(&config.rocksdb.wal_dir).unwrap(); + } + if config.raft_engine.enable { + ensure_dir_exist(&config.raft_engine.config().dir).unwrap(); + } else { + ensure_dir_exist(&config.raft_store.raftdb_path).unwrap(); + if !config.raftdb.wal_dir.is_empty() { + ensure_dir_exist(&config.raftdb.wal_dir).unwrap(); + } + } + + check_system_config(&config); + + tikv_util::set_panic_hook(config.abort_on_panic, &config.storage.data_dir); + + info!( + "using config"; + "config" => serde_json::to_string(&config).unwrap(), + ); + if config.panic_when_unexpected_key_or_data { + info!("panic-when-unexpected-key-or-data is on"); + tikv_util::set_panic_when_unexpected_key_or_data(true); + } + + config.write_into_metrics(); + + ConfigController::new(config) + } + + fn connect_to_pd_cluster( + config: &mut TikvConfig, + env: Arc, + security_mgr: Arc, + ) -> Arc { + let pd_client = Arc::new( + RpcClient::new(&config.pd, Some(env), security_mgr) + .unwrap_or_else(|e| fatal!("failed to create rpc client: {}", e)), + ); + + let cluster_id = pd_client + .get_cluster_id() + .unwrap_or_else(|e| fatal!("failed to get cluster id: {}", e)); + if cluster_id == DEFAULT_CLUSTER_ID { + fatal!("cluster id can't be {}", DEFAULT_CLUSTER_ID); + } + config.server.cluster_id = cluster_id; + info!( + "connect to PD cluster"; + "cluster_id" => cluster_id + ); + + pd_client + } + + fn check_conflict_addr(&mut self) { + let cur_addr: SocketAddr = self + .config + .server + .addr + .parse() + .expect("failed to parse into a socket address"); + let cur_ip = cur_addr.ip(); + let cur_port = cur_addr.port(); + let lock_dir = get_lock_dir(); + + let search_base = env::temp_dir().join(lock_dir); + file_system::create_dir_all(&search_base) + .unwrap_or_else(|_| panic!("create {} failed", search_base.display())); + + for entry in file_system::read_dir(&search_base).unwrap().flatten() { + if !entry.file_type().unwrap().is_file() { + continue; + } + let file_path = entry.path(); + let file_name = file_path.file_name().unwrap().to_str().unwrap(); + if let Ok(addr) = file_name.replace('_', ":").parse::() { + let ip = addr.ip(); + let port = addr.port(); + if cur_port == port + && (cur_ip == ip || cur_ip.is_unspecified() || ip.is_unspecified()) + { + let _ = try_lock_conflict_addr(file_path); + } + } + } + + let cur_path = search_base.join(cur_addr.to_string().replace(':', "_")); + let cur_file = try_lock_conflict_addr(cur_path); + self.lock_files.push(cur_file); + } + + fn init_fs(&mut self) { + let lock_path = self.store_path.join(Path::new("LOCK")); + + let f = File::create(lock_path.as_path()) + .unwrap_or_else(|e| fatal!("failed to create lock at {}: {}", lock_path.display(), e)); + if f.try_lock_exclusive().is_err() { + fatal!( + "lock {} failed, maybe another instance is using this directory.", + self.store_path.display() + ); + } + self.lock_files.push(f); + + if tikv_util::panic_mark_file_exists(&self.config.storage.data_dir) { + fatal!( + "panic_mark_file {} exists, there must be something wrong with the db. \ + Do not remove the panic_mark_file and force the TiKV node to restart. \ + Please contact TiKV maintainers to investigate the issue. \ + If needed, use scale in and scale out to replace the TiKV node. \ + https://docs.pingcap.com/tidb/stable/scale-tidb-using-tiup", + tikv_util::panic_mark_file_path(&self.config.storage.data_dir).display() + ); + } + + // We truncate a big file to make sure that both raftdb and kvdb of TiKV have + // enough space to do compaction and region migration when TiKV recover. + // This file is created in data_dir rather than db_path, because we must not + // increase store size of db_path. + fn calculate_reserved_space(capacity: u64, reserved_size_from_config: u64) -> u64 { + let mut reserved_size = reserved_size_from_config; + if reserved_size_from_config != 0 { + reserved_size = + cmp::max((capacity as f64 * 0.05) as u64, reserved_size_from_config); + } + reserved_size + } + fn reserve_physical_space(data_dir: &String, available: u64, reserved_size: u64) { + let path = Path::new(data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); + if let Err(e) = file_system::remove_file(path) { + warn!("failed to remove space holder on starting: {}", e); + } + + // place holder file size is 20% of total reserved space. + if available > reserved_size { + file_system::reserve_space_for_recover(data_dir, reserved_size / 5) + .map_err(|e| panic!("Failed to reserve space for recovery: {}.", e)) + .unwrap(); + } else { + warn!("no enough disk space left to create the place holder file"); + } + } + + let disk_stats = fs2::statvfs(&self.config.storage.data_dir).unwrap(); + let mut capacity = disk_stats.total_space(); + if self.config.raft_store.capacity.0 > 0 { + capacity = cmp::min(capacity, self.config.raft_store.capacity.0); + } + // reserve space for kv engine + let kv_reserved_size = + calculate_reserved_space(capacity, self.config.storage.reserve_space.0); + disk::set_disk_reserved_space(kv_reserved_size); + reserve_physical_space( + &self.config.storage.data_dir, + disk_stats.available_space(), + kv_reserved_size, + ); + + let raft_data_dir = if self.config.raft_engine.enable { + self.config.raft_engine.config().dir + } else { + self.config.raft_store.raftdb_path.clone() + }; + + let separated_raft_mount_path = + path_in_diff_mount_point(&self.config.storage.data_dir, &raft_data_dir); + if separated_raft_mount_path { + let raft_disk_stats = fs2::statvfs(&raft_data_dir).unwrap(); + // reserve space for raft engine if raft engine is deployed separately + let raft_reserved_size = calculate_reserved_space( + raft_disk_stats.total_space(), + self.config.storage.reserve_raft_space.0, + ); + disk::set_raft_disk_reserved_space(raft_reserved_size); + reserve_physical_space( + &raft_data_dir, + raft_disk_stats.available_space(), + raft_reserved_size, + ); + } + } + + fn init_yatp(&self) { + yatp::metrics::set_namespace(Some("tikv")); + prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL0_CHANCE.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL_ELAPSED.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_EXEC_DURATION.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_POLL_DURATION.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_EXEC_TIMES.clone())).unwrap(); + } + + fn init_encryption(&mut self) { + self.encryption_key_manager = data_key_manager_from_config( + &self.config.security.encryption, + &self.config.storage.data_dir, + ) + .map_err(|e| { + panic!( + "Encryption failed to initialize: {}. code: {}", + e, + e.error_code() + ) + }) + .unwrap() + .map(Arc::new); + } + + fn init_flow_receiver(&mut self) -> engine_rocks::FlowListener { + let (tx, rx) = mpsc::channel(); + self.flow_info_sender = Some(tx.clone()); + self.flow_info_receiver = Some(rx); + engine_rocks::FlowListener::new(tx) + } + + fn init_engines(&mut self, raft_engine: ER) { + let tablet_registry = self.tablet_registry.clone().unwrap(); + let mut node = NodeV2::new( + &self.config.server, + self.pd_client.clone(), + None, + tablet_registry, + ); + node.try_bootstrap_store(&self.config.raft_store, &raft_engine) + .unwrap_or_else(|e| fatal!("failed to bootstrap store: {:?}", e)); + assert_ne!(node.id(), 0); + + let router = node.router(); + let mut coprocessor_host: CoprocessorHost = CoprocessorHost::new( + router.store_router().clone(), + self.config.coprocessor.clone(), + ); + let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); + + let engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); + + self.engines = Some(TikvEngines { + raft_engine, + engine, + }); + self.node = Some(node); + self.coprocessor_host = Some(coprocessor_host); + self.region_info_accessor = Some(region_info_accessor); + } + + fn init_gc_worker(&mut self) -> GcWorker> { + let engines = self.engines.as_ref().unwrap(); + let gc_worker = GcWorker::new( + engines.engine.clone(), + self.flow_info_sender.take().unwrap(), + self.config.gc.clone(), + self.pd_client.feature_gate().clone(), + Arc::new(self.region_info_accessor.clone().unwrap()), + ); + + let cfg_controller = self.cfg_controller.as_mut().unwrap(); + cfg_controller.register( + tikv::config::Module::Gc, + Box::new(gc_worker.get_config_manager()), + ); + + gc_worker + } + + fn init_servers(&mut self) -> Arc> { + let flow_controller = Arc::new(FlowController::Tablet(TabletFlowController::new( + &self.config.storage.flow_control, + self.tablet_registry.clone().unwrap(), + self.flow_info_receiver.take().unwrap(), + ))); + let mut gc_worker = self.init_gc_worker(); + let ttl_checker = Box::new(LazyWorker::new("ttl-checker")); + let ttl_scheduler = ttl_checker.scheduler(); + + let cfg_controller = self.cfg_controller.as_mut().unwrap(); + + cfg_controller.register( + tikv::config::Module::Quota, + Box::new(QuotaLimitConfigManager::new(Arc::clone( + &self.quota_limiter, + ))), + ); + + cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); + + let lock_mgr = LockManager::new(&self.config.pessimistic_txn); + cfg_controller.register( + tikv::config::Module::PessimisticTxn, + Box::new(lock_mgr.config_manager()), + ); + lock_mgr.register_detector_role_change_observer(self.coprocessor_host.as_mut().unwrap()); + + let engines = self.engines.as_ref().unwrap(); + + let pd_worker = LazyWorker::new("pd-worker"); + let pd_sender = raftstore_v2::FlowReporter::new(pd_worker.scheduler()); + + let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { + Some(build_yatp_read_pool( + &self.config.readpool.unified, + pd_sender.clone(), + engines.engine.clone(), + )) + } else { + None + }; + + // The `DebugService` and `DiagnosticsService` will share the same thread pool + let props = tikv_util::thread_group::current_properties(); + let debug_thread_pool = Arc::new( + Builder::new_multi_thread() + .thread_name(thd_name!("debugger")) + .worker_threads(1) + .after_start_wrapper(move || { + tikv_alloc::add_thread_memory_accessor(); + tikv_util::thread_group::set_properties(props.clone()); + }) + .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) + .build() + .unwrap(), + ); + + // Start resource metering. + let (recorder_notifier, collector_reg_handle, resource_tag_factory, recorder_worker) = + resource_metering::init_recorder(self.config.resource_metering.precision.as_millis()); + self.to_stop.push(recorder_worker); + let (reporter_notifier, data_sink_reg_handle, reporter_worker) = + resource_metering::init_reporter( + self.config.resource_metering.clone(), + collector_reg_handle, + ); + self.to_stop.push(reporter_worker); + let (address_change_notifier, single_target_worker) = resource_metering::init_single_target( + self.config.resource_metering.receiver_address.clone(), + self.env.clone(), + data_sink_reg_handle, + ); + self.to_stop.push(single_target_worker); + + let cfg_manager = resource_metering::ConfigManager::new( + self.config.resource_metering.clone(), + recorder_notifier, + reporter_notifier, + address_change_notifier, + ); + cfg_controller.register( + tikv::config::Module::ResourceMetering, + Box::new(cfg_manager), + ); + + let storage_read_pool_handle = if self.config.readpool.storage.use_unified_pool() { + unified_read_pool.as_ref().unwrap().handle() + } else { + let storage_read_pools = ReadPool::from(storage::build_read_pool( + &self.config.readpool.storage, + pd_sender.clone(), + engines.engine.clone(), + )); + storage_read_pools.handle() + }; + + let storage = Storage::<_, _, F>::from_engine( + engines.engine.clone(), + &self.config.storage, + storage_read_pool_handle, + lock_mgr.clone(), + self.concurrency_manager.clone(), + lock_mgr.get_storage_dynamic_configs(), + flow_controller.clone(), + pd_sender.clone(), + resource_tag_factory.clone(), + Arc::clone(&self.quota_limiter), + self.pd_client.feature_gate().clone(), + self.causal_ts_provider.clone(), + ) + .unwrap_or_else(|e| fatal!("failed to create raft storage: {}", e)); + cfg_controller.register( + tikv::config::Module::Storage, + Box::new(StorageConfigManger::new( + self.tablet_registry.as_ref().unwrap().clone(), + ttl_scheduler, + flow_controller, + storage.get_scheduler(), + )), + ); + + let (resolver, state) = resolve::new_resolver( + self.pd_client.clone(), + &self.background_worker, + storage.get_engine().raft_extension(), + ); + self.resolver = Some(resolver); + + ReplicaReadLockChecker::new(self.concurrency_manager.clone()) + .register(self.coprocessor_host.as_mut().unwrap()); + + // Create snapshot manager, server. + let snap_path = self + .store_path + .join(Path::new("tablet_snap")) + .to_str() + .unwrap() + .to_owned(); + + let snap_mgr = TabletSnapManager::new(snap_path); + + // Create coprocessor endpoint. + let cop_read_pool_handle = if self.config.readpool.coprocessor.use_unified_pool() { + unified_read_pool.as_ref().unwrap().handle() + } else { + let cop_read_pools = ReadPool::from(coprocessor::readpool_impl::build_read_pool( + &self.config.readpool.coprocessor, + pd_sender, + engines.engine.clone(), + )); + cop_read_pools.handle() + }; + + let server_config = Arc::new(VersionTrack::new(self.config.server.clone())); + + self.config + .raft_store + .validate( + self.config.coprocessor.region_split_size, + self.config.coprocessor.enable_region_bucket, + self.config.coprocessor.region_bucket_size, + ) + .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); + let raft_store = Arc::new(VersionTrack::new(self.config.raft_store.clone())); + let health_service = HealthService::default(); + + let node = self.node.as_ref().unwrap(); + + self.snap_mgr = Some(snap_mgr.clone()); + // Create server + let server = Server::new( + node.id(), + &server_config, + &self.security_mgr, + storage, + coprocessor::Endpoint::new( + &server_config.value(), + cop_read_pool_handle, + self.concurrency_manager.clone(), + resource_tag_factory, + Arc::clone(&self.quota_limiter), + ), + coprocessor_v2::Endpoint::new(&self.config.coprocessor_v2), + self.resolver.clone().unwrap(), + Either::Right(snap_mgr.clone()), + gc_worker.clone(), + None, + self.env.clone(), + unified_read_pool, + debug_thread_pool, + health_service, + ) + .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); + cfg_controller.register( + tikv::config::Module::Server, + Box::new(ServerConfigManager::new( + server.get_snap_worker_scheduler(), + server_config.clone(), + server.get_grpc_mem_quota().clone(), + )), + ); + + let split_config_manager = + SplitConfigManager::new(Arc::new(VersionTrack::new(self.config.split.clone()))); + cfg_controller.register(tikv::config::Module::Split, Box::new(split_config_manager)); + + // `ConsistencyCheckObserver` must be registered before `Node::start`. + let safe_point = Arc::new(AtomicU64::new(0)); + let observer = match self.config.coprocessor.consistency_check_method { + ConsistencyCheckMethod::Mvcc => BoxConsistencyCheckObserver::new( + MvccConsistencyCheckObserver::new(safe_point.clone()), + ), + ConsistencyCheckMethod::Raw => { + BoxConsistencyCheckObserver::new(RawConsistencyCheckObserver::default()) + } + }; + self.coprocessor_host + .as_mut() + .unwrap() + .registry + .register_consistency_check_observer(100, observer); + + self.node + .as_mut() + .unwrap() + .start( + engines.raft_engine.clone(), + server.transport(), + snap_mgr, + self.concurrency_manager.clone(), + self.causal_ts_provider.clone(), + self.coprocessor_host.clone().unwrap(), + self.background_worker.clone(), + pd_worker, + raft_store, + &state, + ) + .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); + + // Start auto gc. Must after `Node::start` because `node_id` is initialized + // there. + let store_id = self.node.as_ref().unwrap().id(); + let auto_gc_config = AutoGcConfig::new( + self.pd_client.clone(), + self.region_info_accessor.clone().unwrap(), + store_id, + ); + gc_worker + .start(store_id) + .unwrap_or_else(|e| fatal!("failed to start gc worker: {}", e)); + if let Err(e) = gc_worker.start_auto_gc(auto_gc_config, safe_point) { + fatal!("failed to start auto_gc on storage, error: {}", e); + } + + initial_metric(&self.config.metric); + + self.servers = Some(Servers { lock_mgr, server }); + + server_config + } + + fn register_services(&mut self) { + let servers = self.servers.as_mut().unwrap(); + + // Create Diagnostics service + let diag_service = DiagnosticsService::new( + servers.server.get_debug_thread_pool().clone(), + self.config.log.file.filename.clone(), + self.config.slow_log_file.clone(), + ); + if servers + .server + .register_service(create_diagnostics(diag_service)) + .is_some() + { + fatal!("failed to register diagnostics service"); + } + + // Lock manager. + if servers + .server + .register_service(create_deadlock(servers.lock_mgr.deadlock_service())) + .is_some() + { + fatal!("failed to register deadlock service"); + } + + servers + .lock_mgr + .start( + self.node.as_ref().unwrap().id(), + self.pd_client.clone(), + self.resolver.clone().unwrap(), + self.security_mgr.clone(), + &self.config.pessimistic_txn, + ) + .unwrap_or_else(|e| fatal!("failed to start lock manager: {}", e)); + } + + fn init_io_utility(&mut self) -> BytesFetcher { + let stats_collector_enabled = file_system::init_io_stats_collector() + .map_err(|e| warn!("failed to init I/O stats collector: {}", e)) + .is_ok(); + + let limiter = Arc::new( + self.config + .storage + .io_rate_limit + .build(!stats_collector_enabled /* enable_statistics */), + ); + let fetcher = if stats_collector_enabled { + BytesFetcher::FromIoStatsCollector() + } else { + BytesFetcher::FromRateLimiter(limiter.statistics().unwrap()) + }; + // Set up IO limiter even when rate limit is disabled, so that rate limits can + // be dynamically applied later on. + set_io_rate_limiter(Some(limiter)); + fetcher + } + + fn init_metrics_flusher( + &mut self, + fetcher: BytesFetcher, + engines_info: Arc, + ) { + let mut engine_metrics = EngineMetricsManager::::new( + self.tablet_registry.clone().unwrap(), + self.kv_statistics.clone(), + self.config.rocksdb.titan.enabled, + self.engines.as_ref().unwrap().raft_engine.clone(), + self.raft_statistics.clone(), + ); + let mut io_metrics = IoMetricsManager::new(fetcher); + let engines_info_clone = engines_info.clone(); + + // region_id -> (suffix, tablet) + // `update` of EnginesResourceInfo is called perodically which needs this map + // for recording the latest tablet for each region. + // `cached_latest_tablets` is passed to `update` to avoid memory + // allocation each time when calling `update`. + let mut cached_latest_tablets = HashMap::default(); + self.background_worker + .spawn_interval_task(DEFAULT_METRICS_FLUSH_INTERVAL, move || { + let now = Instant::now(); + engine_metrics.flush(now); + io_metrics.flush(now); + engines_info_clone.update(now, &mut cached_latest_tablets); + }); + if let Some(limiter) = get_io_rate_limiter() { + limiter.set_low_priority_io_adjustor_if_needed(Some(engines_info)); + } + + let mut mem_trace_metrics = MemoryTraceManager::default(); + mem_trace_metrics.register_provider(MEMTRACE_RAFTSTORE.clone()); + mem_trace_metrics.register_provider(MEMTRACE_COPROCESSOR.clone()); + self.background_worker + .spawn_interval_task(DEFAULT_MEMTRACE_FLUSH_INTERVAL, move || { + let now = Instant::now(); + mem_trace_metrics.flush(now); + }); + } + + // Only background cpu quota tuning is implemented at present. iops and frontend + // quota tuning is on the way + fn init_quota_tuning_task(&self, quota_limiter: Arc) { + // No need to do auto tune when capacity is really low + if SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO + < BACKGROUND_REQUEST_CORE_LOWER_BOUND + { + return; + }; + + // Determine the base cpu quota + let base_cpu_quota = + // if cpu quota is not specified, start from optimistic case + if quota_limiter.cputime_limiter(false).is_infinite() { + 1000_f64 + * f64::max( + BACKGROUND_REQUEST_CORE_LOWER_BOUND, + SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_DEFAULT_RATIO, + ) + } else { + quota_limiter.cputime_limiter(false) / 1000_f64 + }; + + // Calculate the celling and floor quota + let celling_quota = f64::min( + base_cpu_quota * 2.0, + 1_000_f64 * SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO, + ); + let floor_quota = f64::max( + base_cpu_quota * 0.5, + 1_000_f64 * BACKGROUND_REQUEST_CORE_LOWER_BOUND, + ); + + let mut proc_stats: ProcessStat = ProcessStat::cur_proc_stat().unwrap(); + self.background_worker.spawn_interval_task( + DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL, + move || { + if quota_limiter.auto_tune_enabled() { + let cputime_limit = quota_limiter.cputime_limiter(false); + let old_quota = if cputime_limit.is_infinite() { + base_cpu_quota + } else { + cputime_limit / 1000_f64 + }; + let cpu_usage = match proc_stats.cpu_usage() { + Ok(r) => r, + Err(_e) => 0.0, + }; + // Try tuning quota when cpu_usage is correctly collected. + // rule based tuning: + // - if instance is busy, shrink cpu quota for analyze by one quota pace until + // lower bound is hit; + // - if instance cpu usage is healthy, no op; + // - if instance is idle, increase cpu quota by one quota pace until upper + // bound is hit. + if cpu_usage > 0.0f64 { + let mut target_quota = old_quota; + + let cpu_util = cpu_usage / SysQuota::cpu_cores_quota(); + if cpu_util >= SYSTEM_BUSY_THRESHOLD { + target_quota = + f64::max(target_quota - CPU_QUOTA_ADJUSTMENT_PACE, floor_quota); + } else if cpu_util < SYSTEM_HEALTHY_THRESHOLD { + target_quota = + f64::min(target_quota + CPU_QUOTA_ADJUSTMENT_PACE, celling_quota); + } + + if old_quota != target_quota { + quota_limiter.set_cpu_time_limit(target_quota as usize, false); + debug!( + "cpu_time_limiter tuned for backend request"; + "cpu_util" => ?cpu_util, + "new quota" => ?target_quota); + INSTANCE_BACKEND_CPU_QUOTA.set(target_quota as i64); + } + } + } + }, + ); + } + + fn init_storage_stats_task(&self) { + let config_disk_capacity: u64 = self.config.raft_store.capacity.0; + let data_dir = self.config.storage.data_dir.clone(); + let store_path = self.store_path.clone(); + let snap_mgr = self.snap_mgr.clone().unwrap(); + let reserve_space = disk::get_disk_reserved_space(); + let reserve_raft_space = disk::get_raft_disk_reserved_space(); + if reserve_space == 0 && reserve_raft_space == 0 { + info!("disk space checker not enabled"); + return; + } + let raft_engine = self.engines.as_ref().unwrap().raft_engine.clone(); + let tablet_registry = self.tablet_registry.clone().unwrap(); + let raft_path = raft_engine.get_engine_path().to_string(); + let separated_raft_mount_path = + path_in_diff_mount_point(raft_path.as_str(), tablet_registry.tablet_root()); + let raft_almost_full_threshold = reserve_raft_space; + let raft_already_full_threshold = reserve_raft_space / 2; + + let almost_full_threshold = reserve_space; + let already_full_threshold = reserve_space / 2; + fn calculate_disk_usage(a: disk::DiskUsage, b: disk::DiskUsage) -> disk::DiskUsage { + match (a, b) { + (disk::DiskUsage::AlreadyFull, _) => disk::DiskUsage::AlreadyFull, + (_, disk::DiskUsage::AlreadyFull) => disk::DiskUsage::AlreadyFull, + (disk::DiskUsage::AlmostFull, _) => disk::DiskUsage::AlmostFull, + (_, disk::DiskUsage::AlmostFull) => disk::DiskUsage::AlmostFull, + (disk::DiskUsage::Normal, disk::DiskUsage::Normal) => disk::DiskUsage::Normal, + } + } + self.background_worker + .spawn_interval_task(DEFAULT_STORAGE_STATS_INTERVAL, move || { + let disk_stats = match fs2::statvfs(&store_path) { + Err(e) => { + error!( + "get disk stat for kv store failed"; + "kv path" => store_path.to_str(), + "err" => ?e + ); + return; + } + Ok(stats) => stats, + }; + let disk_cap = disk_stats.total_space(); + let snap_size = snap_mgr.total_snap_size().unwrap(); + + let mut kv_size = 0; + tablet_registry.for_each_opened_tablet(|_, cached| { + if let Some(tablet) = cached.latest() { + kv_size += tablet.get_engine_used_size().unwrap_or(0); + } + true + }); + + let raft_size = raft_engine + .get_engine_size() + .expect("get raft engine size"); + + let mut raft_disk_status = disk::DiskUsage::Normal; + if separated_raft_mount_path && reserve_raft_space != 0 { + let raft_disk_stats = match fs2::statvfs(&raft_path) { + Err(e) => { + error!( + "get disk stat for raft engine failed"; + "raft engine path" => raft_path.clone(), + "err" => ?e + ); + return; + } + Ok(stats) => stats, + }; + let raft_disk_cap = raft_disk_stats.total_space(); + let mut raft_disk_available = + raft_disk_cap.checked_sub(raft_size).unwrap_or_default(); + raft_disk_available = cmp::min(raft_disk_available, raft_disk_stats.available_space()); + raft_disk_status = if raft_disk_available <= raft_already_full_threshold + { + disk::DiskUsage::AlreadyFull + } else if raft_disk_available <= raft_almost_full_threshold + { + disk::DiskUsage::AlmostFull + } else { + disk::DiskUsage::Normal + }; + } + let placeholer_file_path = PathBuf::from_str(&data_dir) + .unwrap() + .join(Path::new(file_system::SPACE_PLACEHOLDER_FILE)); + + let placeholder_size: u64 = + file_system::get_file_size(placeholer_file_path).unwrap_or(0); + + let used_size = if !separated_raft_mount_path { + snap_size + kv_size + raft_size + placeholder_size + } else { + snap_size + kv_size + placeholder_size + }; + let capacity = if config_disk_capacity == 0 || disk_cap < config_disk_capacity { + disk_cap + } else { + config_disk_capacity + }; + + let mut available = capacity.checked_sub(used_size).unwrap_or_default(); + available = cmp::min(available, disk_stats.available_space()); + + let prev_disk_status = disk::get_disk_status(0); //0 no need care about failpoint. + let cur_kv_disk_status = if available <= already_full_threshold { + disk::DiskUsage::AlreadyFull + } else if available <= almost_full_threshold { + disk::DiskUsage::AlmostFull + } else { + disk::DiskUsage::Normal + }; + let cur_disk_status = calculate_disk_usage(raft_disk_status, cur_kv_disk_status); + if prev_disk_status != cur_disk_status { + warn!( + "disk usage {:?}->{:?} (raft engine usage: {:?}, kv engine usage: {:?}), seperated raft mount={}, kv available={}, snap={}, kv={}, raft={}, capacity={}", + prev_disk_status, + cur_disk_status, + raft_disk_status, + cur_kv_disk_status, + separated_raft_mount_path, + available, + snap_size, + kv_size, + raft_size, + capacity + ); + } + disk::set_disk_status(cur_disk_status); + }) + } + + fn init_sst_recovery_sender(&mut self) -> Option> { + if !self + .config + .storage + .background_error_recovery_window + .is_zero() + { + let sst_worker = Box::new(LazyWorker::new("sst-recovery")); + let scheduler = sst_worker.scheduler(); + self.sst_worker = Some(sst_worker); + Some(scheduler) + } else { + None + } + } + + fn run_server(&mut self, server_config: Arc>) { + let server = self.servers.as_mut().unwrap(); + server + .server + .build_and_bind() + .unwrap_or_else(|e| fatal!("failed to build server: {}", e)); + server + .server + .start(server_config, self.security_mgr.clone()) + .unwrap_or_else(|e| fatal!("failed to start server: {}", e)); + } + + fn run_status_server(&mut self) { + // Create a status server. + let status_enabled = !self.config.server.status_addr.is_empty(); + if status_enabled { + let mut status_server = match StatusServer::new( + self.config.server.status_thread_pool_size, + self.cfg_controller.take().unwrap(), + Arc::new(self.config.security.clone()), + self.engines.as_ref().unwrap().engine.raft_extension(), + self.store_path.clone(), + ) { + Ok(status_server) => Box::new(status_server), + Err(e) => { + error_unknown!(%e; "failed to start runtime for status service"); + return; + } + }; + // Start the status server. + if let Err(e) = status_server.start(self.config.server.status_addr.clone()) { + error_unknown!(%e; "failed to bind addr for status service"); + } else { + self.to_stop.push(status_server); + } + } + } + + fn stop(mut self) { + tikv_util::thread_group::mark_shutdown(); + let mut servers = self.servers.unwrap(); + servers + .server + .stop() + .unwrap_or_else(|e| fatal!("failed to stop server: {}", e)); + + self.node.as_mut().unwrap().stop(); + self.region_info_accessor.as_mut().unwrap().stop(); + + servers.lock_mgr.stop(); + + if let Some(sst_worker) = self.sst_worker { + sst_worker.stop_worker(); + } + + self.to_stop.into_iter().for_each(|s| s.stop()); + } +} + +pub trait ConfiguredRaftEngine: RaftEngine { + fn build( + _: &TikvConfig, + _: &Arc, + _: &Option>, + _: &Cache, + ) -> (Self, Option>); + fn as_rocks_engine(&self) -> Option<&RocksEngine>; + fn register_config(&self, _cfg_controller: &mut ConfigController); +} + +impl ConfiguredRaftEngine for T { + default fn build( + _: &TikvConfig, + _: &Arc, + _: &Option>, + _: &Cache, + ) -> (Self, Option>) { + unimplemented!() + } + default fn as_rocks_engine(&self) -> Option<&RocksEngine> { + None + } + default fn register_config(&self, _cfg_controller: &mut ConfigController) {} +} + +impl ConfiguredRaftEngine for RocksEngine { + fn build( + config: &TikvConfig, + env: &Arc, + key_manager: &Option>, + block_cache: &Cache, + ) -> (Self, Option>) { + let mut raft_data_state_machine = RaftDataStateMachine::new( + &config.storage.data_dir, + &config.raft_engine.config().dir, + &config.raft_store.raftdb_path, + ); + let should_dump = raft_data_state_machine.before_open_target(); + + let raft_db_path = &config.raft_store.raftdb_path; + let config_raftdb = &config.raftdb; + let mut raft_db_opts = config_raftdb.build_opt(); + raft_db_opts.set_env(env.clone()); + let statistics = Arc::new(RocksStatistics::new_titan()); + raft_db_opts.set_statistics(statistics.as_ref()); + let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); + let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) + .expect("failed to open raftdb"); + + if should_dump { + let raft_engine = + RaftLogEngine::new(config.raft_engine.config(), key_manager.clone(), None) + .expect("failed to open raft engine for migration"); + dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /* threads */); + raft_engine.stop(); + drop(raft_engine); + raft_data_state_machine.after_dump_data(); + } + (raftdb, Some(statistics)) + } + + fn as_rocks_engine(&self) -> Option<&RocksEngine> { + Some(self) + } + + fn register_config(&self, cfg_controller: &mut ConfigController) { + cfg_controller.register( + tikv::config::Module::Raftdb, + Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), + ); + } +} + +impl ConfiguredRaftEngine for RaftLogEngine { + fn build( + config: &TikvConfig, + env: &Arc, + key_manager: &Option>, + block_cache: &Cache, + ) -> (Self, Option>) { + let mut raft_data_state_machine = RaftDataStateMachine::new( + &config.storage.data_dir, + &config.raft_store.raftdb_path, + &config.raft_engine.config().dir, + ); + let should_dump = raft_data_state_machine.before_open_target(); + + let raft_config = config.raft_engine.config(); + let raft_engine = + RaftLogEngine::new(raft_config, key_manager.clone(), get_io_rate_limiter()) + .expect("failed to open raft engine"); + + if should_dump { + let config_raftdb = &config.raftdb; + let mut raft_db_opts = config_raftdb.build_opt(); + raft_db_opts.set_env(env.clone()); + let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); + let raftdb = engine_rocks::util::new_engine_opt( + &config.raft_store.raftdb_path, + raft_db_opts, + raft_cf_opts, + ) + .expect("failed to open raftdb for migration"); + dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /* threads */); + raftdb.stop(); + drop(raftdb); + raft_data_state_machine.after_dump_data(); + } + (raft_engine, None) + } +} + +impl TikvServer { + fn init_raw_engines( + &mut self, + flow_listener: engine_rocks::FlowListener, + ) -> (CER, Arc) { + let block_cache = self.config.storage.block_cache.build_shared_cache(); + let env = self + .config + .build_shared_rocks_env(self.encryption_key_manager.clone(), get_io_rate_limiter()) + .unwrap(); + + // Create raft engine + let (raft_engine, raft_statistics) = CER::build( + &self.config, + &env, + &self.encryption_key_manager, + &block_cache, + ); + self.raft_statistics = raft_statistics; + + // Create kv engine. + let builder = KvEngineFactoryBuilder::new(env, &self.config, block_cache) + .sst_recovery_sender(self.init_sst_recovery_sender()) + .flow_listener(flow_listener); + let factory = Box::new(builder.build()); + self.kv_statistics = Some(factory.rocks_statistics()); + let registry = TabletRegistry::new(factory, self.store_path.join("tablets")) + .unwrap_or_else(|e| fatal!("failed to create tablet registry {:?}", e)); + let cfg_controller = self.cfg_controller.as_mut().unwrap(); + cfg_controller.register( + tikv::config::Module::Rocksdb, + Box::new(DbConfigManger::new(registry.clone(), DbType::Kv)), + ); + self.tablet_registry = Some(registry.clone()); + raft_engine.register_config(cfg_controller); + + let engines_info = Arc::new(EnginesResourceInfo::new( + registry, + raft_engine.as_rocks_engine().cloned(), + 180, // max_samples_to_preserve + )); + + (raft_engine, engines_info) + } +} + +/// Various sanity-checks and logging before running a server. +/// +/// Warnings are logged. +/// +/// # Logs +/// +/// The presence of these environment variables that affect the database +/// behavior is logged. +/// +/// - `GRPC_POLL_STRATEGY` +/// - `http_proxy` and `https_proxy` +/// +/// # Warnings +/// +/// - if `net.core.somaxconn` < 32768 +/// - if `net.ipv4.tcp_syncookies` is not 0 +/// - if `vm.swappiness` is not 0 +/// - if data directories are not on SSDs +/// - if the "TZ" environment variable is not set on unix +fn pre_start() { + check_environment_variables(); + for e in tikv_util::config::check_kernel() { + warn!( + "check: kernel"; + "err" => %e + ); + } +} + +fn check_system_config(config: &TikvConfig) { + info!("beginning system configuration check"); + let mut rocksdb_max_open_files = config.rocksdb.max_open_files; + if config.rocksdb.titan.enabled { + // Titan engine maintains yet another pool of blob files and uses the same max + // number of open files setup as rocksdb does. So we double the max required + // open files here + rocksdb_max_open_files *= 2; + } + if let Err(e) = tikv_util::config::check_max_open_fds( + RESERVED_OPEN_FDS + (rocksdb_max_open_files + config.raftdb.max_open_files) as u64, + ) { + fatal!("{}", e); + } + + // Check RocksDB data dir + if let Err(e) = tikv_util::config::check_data_dir(&config.storage.data_dir) { + warn!( + "check: rocksdb-data-dir"; + "path" => &config.storage.data_dir, + "err" => %e + ); + } + // Check raft data dir + if let Err(e) = tikv_util::config::check_data_dir(&config.raft_store.raftdb_path) { + warn!( + "check: raftdb-path"; + "path" => &config.raft_store.raftdb_path, + "err" => %e + ); + } +} + +fn try_lock_conflict_addr>(path: P) -> File { + let f = File::create(path.as_ref()).unwrap_or_else(|e| { + fatal!( + "failed to create lock at {}: {}", + path.as_ref().display(), + e + ) + }); + + if f.try_lock_exclusive().is_err() { + fatal!( + "{} already in use, maybe another instance is binding with this address.", + path.as_ref().file_name().unwrap().to_str().unwrap() + ); + } + f +} + +#[cfg(unix)] +fn get_lock_dir() -> String { + format!("{}_TIKV_LOCK_FILES", unsafe { libc::getuid() }) +} + +#[cfg(not(unix))] +fn get_lock_dir() -> String { + "TIKV_LOCK_FILES".to_owned() +} + +pub struct EngineMetricsManager { + tablet_registry: TabletRegistry, + kv_statistics: Option>, + kv_is_titan: bool, + raft_engine: ER, + raft_statistics: Option>, + last_reset: Instant, +} + +impl EngineMetricsManager { + pub fn new( + tablet_registry: TabletRegistry, + kv_statistics: Option>, + kv_is_titan: bool, + raft_engine: ER, + raft_statistics: Option>, + ) -> Self { + EngineMetricsManager { + tablet_registry, + kv_statistics, + kv_is_titan, + raft_engine, + raft_statistics, + last_reset: Instant::now(), + } + } + + pub fn flush(&mut self, now: Instant) { + let mut reporter = EK::StatisticsReporter::new("kv"); + self.tablet_registry + .for_each_opened_tablet(|_, db: &mut CachedTablet| { + if let Some(db) = db.latest() { + reporter.collect(db); + } + true + }); + reporter.flush(); + self.raft_engine.flush_metrics("raft"); + + if let Some(s) = self.kv_statistics.as_ref() { + flush_engine_statistics(s, "kv", self.kv_is_titan); + } + if let Some(s) = self.raft_statistics.as_ref() { + flush_engine_statistics(s, "raft", false); + } + if now.saturating_duration_since(self.last_reset) >= DEFAULT_ENGINE_METRICS_RESET_INTERVAL { + if let Some(s) = self.kv_statistics.as_ref() { + s.reset(); + } + if let Some(s) = self.raft_statistics.as_ref() { + s.reset(); + } + self.last_reset = now; + } + } +} + +pub struct EnginesResourceInfo { + tablet_registry: TabletRegistry, + raft_engine: Option, + latest_normalized_pending_bytes: AtomicU32, + normalized_pending_bytes_collector: MovingAvgU32, +} + +impl EnginesResourceInfo { + const SCALE_FACTOR: u64 = 100; + + fn new( + tablet_registry: TabletRegistry, + raft_engine: Option, + max_samples_to_preserve: usize, + ) -> Self { + EnginesResourceInfo { + tablet_registry, + raft_engine, + latest_normalized_pending_bytes: AtomicU32::new(0), + normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), + } + } + + pub fn update( + &self, + _now: Instant, + cached_latest_tablets: &mut HashMap>, + ) { + let mut normalized_pending_bytes = 0; + + fn fetch_engine_cf(engine: &RocksEngine, cf: &str, normalized_pending_bytes: &mut u32) { + if let Ok(cf_opts) = engine.get_options_cf(cf) { + if let Ok(Some(b)) = engine.get_cf_pending_compaction_bytes(cf) { + if cf_opts.get_soft_pending_compaction_bytes_limit() > 0 { + *normalized_pending_bytes = std::cmp::max( + *normalized_pending_bytes, + (b * EnginesResourceInfo::SCALE_FACTOR + / cf_opts.get_soft_pending_compaction_bytes_limit()) + as u32, + ); + } + } + } + } + + if let Some(raft_engine) = &self.raft_engine { + fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); + } + + self.tablet_registry + .for_each_opened_tablet(|id, db: &mut CachedTablet| { + cached_latest_tablets.insert(id, db.clone()); + true + }); + + // todo(SpadeA): Now, there's a potential race condition problem where the + // tablet could be destroyed after the clone and before the fetching + // which could result in programme panic. It's okay now as the single global + // kv_engine will not be destroyed in normal operation and v2 is not + // ready for operation. Furthermore, this race condition is general to v2 as + // tablet clone is not a case exclusively happened here. We should + // propose another PR to tackle it such as destory tablet lazily in a GC + // thread. + + for (_, cache) in cached_latest_tablets.iter_mut() { + let Some(tablet) = cache.latest() else { continue }; + for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { + fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); + } + } + + // Clear ensures that these tablets are not hold forever. + cached_latest_tablets.clear(); + + let (_, avg) = self + .normalized_pending_bytes_collector + .add(normalized_pending_bytes); + self.latest_normalized_pending_bytes.store( + std::cmp::max(normalized_pending_bytes, avg), + Ordering::Relaxed, + ); + } +} + +impl IoBudgetAdjustor for EnginesResourceInfo { + fn adjust(&self, total_budgets: usize) -> usize { + let score = self.latest_normalized_pending_bytes.load(Ordering::Relaxed) as f32 + / Self::SCALE_FACTOR as f32; + // Two reasons for adding `sqrt` on top: + // 1) In theory the convergence point is independent of the value of pending + // bytes (as long as backlog generating rate equals consuming rate, which is + // determined by compaction budgets), a convex helps reach that point while + // maintaining low level of pending bytes. + // 2) Variance of compaction pending bytes grows with its magnitude, a filter + // with decreasing derivative can help balance such trend. + let score = score.sqrt(); + // The target global write flow slides between Bandwidth / 2 and Bandwidth. + let score = 0.5 + score / 2.0; + (total_budgets as f32 * score) as usize + } +} + +#[cfg(test)] +mod test { + use std::{ + collections::HashMap, + sync::{atomic::Ordering, Arc}, + }; + + use engine_rocks::raw::Env; + use engine_traits::{ + FlowControlFactorsExt, MiscExt, SyncMutable, TabletContext, TabletRegistry, CF_DEFAULT, + }; + use tempfile::Builder; + use tikv::{config::TikvConfig, server::KvEngineFactoryBuilder}; + use tikv_util::{config::ReadableSize, time::Instant}; + + use super::EnginesResourceInfo; + + #[test] + fn test_engines_resource_info_update() { + let mut config = TikvConfig::default(); + config.rocksdb.defaultcf.disable_auto_compactions = true; + config.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); + config.rocksdb.writecf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); + config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); + let env = Arc::new(Env::default()); + let path = Builder::new().prefix("test-update").tempdir().unwrap(); + let cache = config.storage.block_cache.build_shared_cache(); + + let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); + let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); + + for i in 1..6 { + let ctx = TabletContext::with_infinite_region(i, Some(10)); + reg.load(ctx, true).unwrap(); + } + + let mut cached = reg.get(1).unwrap(); + let mut tablet = cached.latest().unwrap(); + // Prepare some data for two tablets of the same region. So we can test whether + // we fetch the bytes from the latest one. + for i in 1..21 { + tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + if i % 2 == 0 { + tablet.flush_cf(CF_DEFAULT, true).unwrap(); + } + } + let old_pending_compaction_bytes = tablet + .get_cf_pending_compaction_bytes(CF_DEFAULT) + .unwrap() + .unwrap(); + + let ctx = TabletContext::with_infinite_region(1, Some(20)); + reg.load(ctx, true).unwrap(); + tablet = cached.latest().unwrap(); + + for i in 1..11 { + tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + if i % 2 == 0 { + tablet.flush_cf(CF_DEFAULT, true).unwrap(); + } + } + let new_pending_compaction_bytes = tablet + .get_cf_pending_compaction_bytes(CF_DEFAULT) + .unwrap() + .unwrap(); + + assert!(old_pending_compaction_bytes > new_pending_compaction_bytes); + + let engines_info = Arc::new(EnginesResourceInfo::new(reg, None, 10)); + + let mut cached_latest_tablets = HashMap::default(); + engines_info.update(Instant::now(), &mut cached_latest_tablets); + + // The memory allocation should be reserved + assert!(cached_latest_tablets.capacity() >= 5); + // The tablet cache should be cleared + assert!(cached_latest_tablets.is_empty()); + + // The latest_normalized_pending_bytes should be equal to the pending compaction + // bytes of tablet_1_20 + assert_eq!( + (new_pending_compaction_bytes * 100) as u32, + engines_info + .latest_normalized_pending_bytes + .load(Ordering::Relaxed) + ); + } +} diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 12d9982fea6..2521347ec18 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -523,9 +523,9 @@ impl ServerCluster { copr.clone(), copr_v2.clone(), resolver.clone(), - snap_mgr.clone(), + tikv_util::Either::Left(snap_mgr.clone()), gc_worker.clone(), - check_leader_scheduler.clone(), + Some(check_leader_scheduler.clone()), self.env.clone(), None, debug_thread_pool.clone(), @@ -795,6 +795,10 @@ impl Cluster { } panic!("failed to get snapshot of region {}", region_id); } + + pub fn raft_extension(&self, node_id: u64) -> SimulateRaftExtension { + self.sim.rl().storages[&node_id].raft_extension() + } } pub fn new_server_cluster(id: u64, count: usize) -> Cluster { diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index 35d417db650..49e6812b81f 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -9,9 +9,10 @@ pub mod ioload; pub mod thread; // re-export some traits for ease of use -#[cfg(target_os = "linux")] -use std::path::PathBuf; -use std::sync::atomic::{AtomicU64, Ordering}; +use std::{ + path::Path, + sync::atomic::{AtomicU64, Ordering}, +}; use fail::fail_point; #[cfg(target_os = "linux")] @@ -162,13 +163,13 @@ pub fn cache_line_size(level: usize) -> Option { } #[cfg(target_os = "linux")] -pub fn path_in_diff_mount_point(path1: &str, path2: &str) -> bool { - if path1.is_empty() || path2.is_empty() { +pub fn path_in_diff_mount_point(path1: impl AsRef, path2: impl AsRef) -> bool { + let (path1, path2) = (path1.as_ref(), path2.as_ref()); + let empty_path = |p: &Path| p.to_str().map_or(false, |s| s.is_empty()); + if empty_path(path1) || empty_path(path2) { return false; } - let path1 = PathBuf::from(path1); - let path2 = PathBuf::from(path2); - match (get_mount(&path1), get_mount(&path2)) { + match (get_mount(path1), get_mount(path2)) { (Err(e1), _) => { warn!("Get mount point error for path {}, {}", path1.display(), e1); false @@ -190,7 +191,7 @@ pub fn path_in_diff_mount_point(path1: &str, path2: &str) -> bool { } #[cfg(not(target_os = "linux"))] -pub fn path_in_diff_mount_point(_path1: &str, _path2: &str) -> bool { +pub fn path_in_diff_mount_point(_path1: impl AsRef, _path2: impl AsRef) -> bool { false } diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index b3a445a1f7e..bcfd542035b 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -11,9 +11,12 @@ use raftstore::{ coprocessor::CoprocessorHost, store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}, }; -use raftstore_v2::{router::RaftRouter, Bootstrap, StoreSystem}; +use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreSystem}; use slog::{info, o, Logger}; -use tikv_util::{config::VersionTrack, worker::Worker}; +use tikv_util::{ + config::VersionTrack, + worker::{LazyWorker, Worker}, +}; use crate::server::{node::init_store, Result}; @@ -21,13 +24,10 @@ use crate::server::{node::init_store, Result}; pub struct NodeV2 { cluster_id: u64, store: metapb::Store, - store_cfg: Arc>, - system: StoreSystem, + system: Option<(RaftRouter, StoreSystem)>, has_started: bool, pd_client: Arc, - state: Arc>, - bg_worker: Worker, registry: TabletRegistry, logger: Logger, } @@ -40,12 +40,8 @@ where { /// Creates a new Node. pub fn new( - system: StoreSystem, cfg: &crate::server::Config, - store_cfg: Arc>, pd_client: Arc, - state: Arc>, - bg_worker: Worker, store: Option, registry: TabletRegistry, ) -> NodeV2 { @@ -54,18 +50,19 @@ where NodeV2 { cluster_id: cfg.cluster_id, store, - store_cfg, pd_client, - system, + system: None, has_started: false, - state, - bg_worker, registry, logger: slog_global::borrow_global().new(o!()), } } - pub fn try_bootstrap_store(&mut self, raft_engine: &ER) -> Result<()> { + pub fn try_bootstrap_store( + &mut self, + cfg: &raftstore_v2::Config, + raft_engine: &ER, + ) -> Result<()> { let store_id = Bootstrap::new( raft_engine, self.cluster_id, @@ -74,9 +71,19 @@ where ) .bootstrap_store()?; self.store.set_id(store_id); + let (router, system) = + raftstore_v2::create_store_batch_system(cfg, store_id, self.logger.clone()); + self.system = Some(( + RaftRouter::new(store_id, self.registry.clone(), router), + system, + )); Ok(()) } + pub fn router(&self) -> &RaftRouter { + &self.system.as_ref().unwrap().0 + } + /// Starts the Node. It tries to bootstrap cluster if the cluster is not /// bootstrapped yet. Then it spawns a thread to run the raftstore in /// background. @@ -84,19 +91,21 @@ where &mut self, raft_engine: ER, trans: T, - router: &RaftRouter, snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 coprocessor_host: CoprocessorHost, background: Worker, + pd_worker: LazyWorker, + store_cfg: Arc>, + state: &Mutex, ) -> Result<()> where T: Transport + 'static, { let store_id = self.id(); { - let mut meta = router.store_meta().lock().unwrap(); + let mut meta = self.router().store_meta().lock().unwrap(); meta.store_id = Some(store_id); } if let Some(region) = Bootstrap::new( @@ -121,17 +130,18 @@ where // Put store only if the cluster is bootstrapped. info!(self.logger, "put store to PD"; "store" => ?&self.store); let status = self.pd_client.put_store(self.store.clone())?; - self.load_all_stores(status); + self.load_all_stores(state, status); self.start_store( raft_engine, trans, - router, snap_mgr, concurrency_manager, causal_ts_provider, coprocessor_host, background, + pd_worker, + store_cfg, )?; Ok(()) @@ -142,6 +152,10 @@ where self.store.get_id() } + pub fn logger(&self) -> Logger { + self.logger.clone() + } + /// Gets a copy of Store which is registered to Pd. pub fn store(&self) -> metapb::Store { self.store.clone() @@ -153,13 +167,17 @@ where // Do we really need to do the check giving we don't consider support upgrade // ATM? - fn load_all_stores(&mut self, status: Option) { + fn load_all_stores( + &mut self, + state: &Mutex, + status: Option, + ) { info!(self.logger, "initializing replication mode"; "status" => ?status, "store_id" => self.store.id); let stores = match self.pd_client.get_all_stores(false) { Ok(stores) => stores, Err(e) => panic!("failed to load all stores: {:?}", e), }; - let mut state = self.state.lock().unwrap(); + let mut state = state.lock().unwrap(); if let Some(s) = status { state.set_status(s); } @@ -174,12 +192,13 @@ where &mut self, raft_engine: ER, trans: T, - router: &RaftRouter, snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 coprocessor_host: CoprocessorHost, background: Worker, + pd_worker: LazyWorker, + store_cfg: Arc>, ) -> Result<()> where T: Transport + 'static, @@ -191,11 +210,12 @@ where return Err(box_err!("{} is already started", store_id)); } self.has_started = true; - let cfg = self.store_cfg.clone(); - self.system.start( + let (router, system) = self.system.as_mut().unwrap(); + + system.start( store_id, - cfg, + store_cfg, raft_engine, self.registry.clone(), trans, @@ -207,6 +227,7 @@ where causal_ts_provider, coprocessor_host, background, + pd_worker, )?; Ok(()) } @@ -214,8 +235,8 @@ where /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); + let Some((_, mut system)) = self.system.take() else { return }; info!(self.logger, "stop raft store thread"; "store_id" => store_id); - self.system.shutdown(); - self.bg_worker.stop(); + system.shutdown(); } } diff --git a/src/server/server.rs b/src/server/server.rs index 428aee31090..22ab1682309 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -13,7 +13,7 @@ use futures::{compat::Stream01CompatExt, stream::StreamExt}; use grpcio::{ChannelBuilder, Environment, ResourceQuota, Server as GrpcServer, ServerBuilder}; use grpcio_health::{create_health, HealthService, ServingStatus}; use kvproto::tikvpb::*; -use raftstore::store::{CheckLeaderTask, SnapManager}; +use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager}; use security::SecurityManager; use tikv_util::{ config::VersionTrack, @@ -39,7 +39,7 @@ use crate::{ coprocessor::Endpoint, coprocessor_v2, read_pool::ReadPool, - server::{gc_worker::GcWorker, Proxy}, + server::{gc_worker::GcWorker, tablet_snap::TabletRunner, Proxy}, storage::{lock_manager::LockManager, Engine, Storage}, tikv_util::sys::thread::ThreadBuildWrapper, }; @@ -67,7 +67,7 @@ pub struct Server { trans: ServerTransport, raft_router: E::RaftExtension, // For sending/receiving snapshots. - snap_mgr: SnapManager, + snap_mgr: Either, snap_worker: LazyWorker, // Currently load statistics is done in the thread. @@ -94,9 +94,9 @@ where copr: Endpoint, copr_v2: coprocessor_v2::Endpoint, resolver: S, - snap_mgr: SnapManager, + snap_mgr: Either, gc_worker: GcWorker, - check_leader_scheduler: Scheduler, + check_leader_scheduler: Option>, env: Arc, yatp_read_pool: Option, debug_thread_pool: Arc, @@ -252,14 +252,28 @@ where cfg: Arc>, security_mgr: Arc, ) -> Result<()> { - let snap_runner = SnapHandler::new( - Arc::clone(&self.env), - self.snap_mgr.clone(), - self.raft_router.clone(), - security_mgr, - Arc::clone(&cfg), - ); - self.snap_worker.start(snap_runner); + match self.snap_mgr.clone() { + Either::Left(mgr) => { + let snap_runner = SnapHandler::new( + self.env.clone(), + mgr, + self.raft_router.clone(), + security_mgr, + cfg, + ); + self.snap_worker.start(snap_runner); + } + Either::Right(mgr) => { + let snap_runner = TabletRunner::new( + self.env.clone(), + mgr, + self.raft_router.clone(), + security_mgr, + cfg, + ); + self.snap_worker.start(snap_runner); + } + } let mut grpc_server = self.builder_or_server.take().unwrap().right().unwrap(); info!("listening on addr"; "addr" => &self.local_addr); @@ -564,9 +578,9 @@ mod tests { quick_fail: Arc::clone(&quick_fail), addr: Arc::clone(&addr), }, - SnapManager::new(""), + Either::Left(SnapManager::new("")), gc_worker, - check_leader_scheduler, + Some(check_leader_scheduler), env, None, debug_thread_pool, diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 88ed0c99443..68a200b045e 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -65,6 +65,7 @@ const GRPC_MSG_NOTIFY_SIZE: usize = 8; pub struct Service { store_id: u64, /// Used to handle requests related to GC. + // TODO: make it Some after GC is supported for v2. gc_worker: GcWorker, // For handling KV requests. storage: Storage, @@ -75,7 +76,7 @@ pub struct Service { // For handling snapshot. snap_scheduler: Scheduler, // For handling `CheckLeader` request. - check_leader_scheduler: Scheduler, + check_leader_scheduler: Option>, enable_req_batch: bool, @@ -114,7 +115,7 @@ impl Service { copr: Endpoint, copr_v2: coprocessor_v2::Endpoint, snap_scheduler: Scheduler, - check_leader_scheduler: Scheduler, + check_leader_scheduler: Option>, grpc_thread_load: Arc, enable_req_batch: bool, proxy: Proxy, @@ -908,6 +909,7 @@ impl Tikv for Service { let (cb, resp) = paired_future_callback(); let check_leader_scheduler = self.check_leader_scheduler.clone(); let task = async move { + let Some(check_leader_scheduler) = check_leader_scheduler else { return Err(box_err!("check leader is not supported")) }; check_leader_scheduler .schedule(CheckLeaderTask::CheckLeader { leaders, cb }) .map_err(|e| Error::Other(format!("{}", e).into()))?; @@ -945,6 +947,7 @@ impl Tikv for Service { let (cb, resp) = paired_future_callback(); let check_leader_scheduler = self.check_leader_scheduler.clone(); let task = async move { + let Some(check_leader_scheduler) = check_leader_scheduler else { return Err(box_err!("check leader is not supported")) }; check_leader_scheduler .schedule(CheckLeaderTask::GetStoreTs { key_range, cb }) .map_err(|e| Error::Other(format!("{}", e).into()))?; diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 78302550fd5..2f87c5d0264 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -4,7 +4,6 @@ mod profile; use std::{ error::Error as StdError, - marker::PhantomData, net::SocketAddr, path::PathBuf, pin::Pin, @@ -16,7 +15,6 @@ use std::{ use async_stream::stream; use collections::HashMap; -use engine_traits::KvEngine; use flate2::{write::GzEncoder, Compression}; use futures::{ compat::{Compat01As03, Stream01CompatExt}, @@ -45,10 +43,10 @@ pub use profile::{ read_file, start_one_cpu_profile, start_one_heap_profile, }; use prometheus::TEXT_FORMAT; -use raftstore::store::{transport::CasualRouter, CasualMessage}; use regex::Regex; use security::{self, SecurityConfig}; use serde_json::Value; +use tikv_kv::RaftExtension; use tikv_util::{ logger::set_log_level, metrics::{dump, dump_to}, @@ -82,7 +80,7 @@ struct LogLevelRequest { pub log_level: LogLevel, } -pub struct StatusServer { +pub struct StatusServer { thread_pool: Runtime, tx: Sender<()>, rx: Option>, @@ -91,12 +89,10 @@ pub struct StatusServer { router: R, security_config: Arc, store_path: PathBuf, - _snap: PhantomData, } -impl StatusServer +impl StatusServer where - E: 'static, R: 'static + Send, { pub fn new( @@ -124,7 +120,6 @@ where router, security_config, store_path, - _snap: PhantomData, }) } @@ -423,10 +418,9 @@ where } } -impl StatusServer +impl StatusServer where - E: KvEngine, - R: 'static + Send + CasualRouter + Clone, + R: 'static + Send + RaftExtension + Clone, { pub async fn dump_region_meta(req: Request, router: R) -> hyper::Result> { lazy_static! { @@ -451,33 +445,18 @@ where )); } }; - let (tx, rx) = oneshot::channel(); - match router.send( - id, - CasualMessage::AccessPeer(Box::new(move |meta| { - if let Err(meta) = tx.send(meta) { - error!("receiver dropped, region meta: {:?}", meta) - } - })), - ) { - Ok(_) => (), - Err(raftstore::Error::RegionNotFound(_)) => { + let f = router.query_region(id); + let meta = match f.await { + Ok(meta) => meta, + Err(tikv_kv::Error(box tikv_kv::ErrorInner::Request(header))) + if header.has_region_not_found() => + { return not_found(format!("region({}) not found", id)); } Err(err) => { return Ok(make_response( StatusCode::INTERNAL_SERVER_ERROR, - format!("channel pending or disconnect: {}", err), - )); - } - } - - let meta = match rx.await { - Ok(meta) => meta, - Err(_) => { - return Ok(make_response( - StatusCode::INTERNAL_SERVER_ERROR, - "query cancelled", + format!("query failed: {}", err), )); } }; @@ -938,17 +917,21 @@ mod tests { use std::{env, io::Read, path::PathBuf, sync::Arc}; use collections::HashSet; - use engine_test::kv::KvTestEngine; use flate2::read::GzDecoder; - use futures::{executor::block_on, future::ok, prelude::*}; + use futures::{ + executor::block_on, + future::{ok, BoxFuture}, + prelude::*, + }; use http::header::{HeaderValue, ACCEPT_ENCODING}; use hyper::{body::Buf, client::HttpConnector, Body, Client, Method, Request, StatusCode, Uri}; use hyper_openssl::HttpsConnector; use online_config::OnlineConfig; use openssl::ssl::{SslConnector, SslFiletype, SslMethod}; - use raftstore::store::{transport::CasualRouter, CasualMessage}; + use raftstore::store::region_meta::RegionMeta; use security::SecurityConfig; use test_util::new_security_cfg; + use tikv_kv::RaftExtension; use tikv_util::logger::get_log_level; use crate::{ @@ -959,9 +942,9 @@ mod tests { #[derive(Clone)] struct MockRouter; - impl CasualRouter for MockRouter { - fn send(&self, region_id: u64, _: CasualMessage) -> raftstore::Result<()> { - Err(raftstore::Error::RegionNotFound(region_id)) + impl RaftExtension for MockRouter { + fn query_region(&self, region_id: u64) -> BoxFuture<'static, tikv_kv::Result> { + Box::pin(async move { Err(raftstore::Error::RegionNotFound(region_id).into()) }) } } diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index 455465d87cb..929a7c286ae 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -5,9 +5,8 @@ use std::{error::Error, net::SocketAddr, sync::Arc}; use hyper::{body, Client, StatusCode, Uri}; use raftstore::store::region_meta::RegionMeta; use security::SecurityConfig; -use test_raftstore::{new_server_cluster, Simulator}; +use test_raftstore::new_server_cluster; use tikv::{config::ConfigController, server::status_server::StatusServer}; -use tikv_util::HandyRwLock; async fn check(authority: SocketAddr, region_id: u64) -> Result<(), Box> { let client = Client::new(); @@ -39,13 +38,12 @@ fn test_region_meta_endpoint() { let peer = region.get_peers().get(0); assert!(peer.is_some()); let store_id = peer.unwrap().get_store_id(); - let router = cluster.sim.rl().get_router(store_id); - assert!(router.is_some()); + let router = cluster.raft_extension(store_id); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), - router.unwrap(), + router, std::env::temp_dir(), ) .unwrap(); From a422de9d27d96d2bfc627f9e53f655bd9fd00b9f Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 23 Dec 2022 13:50:14 +0800 Subject: [PATCH 0425/1149] *: make gc not write db directly (#13982) ref tikv/tikv#12842 We rely on non-concurrent memtable write for dynamic regions to achieve best performance. This PR makes sure writes of compaction filter be redirected to apply thread when dynamic regions is enabled. The solution may miss data if TiKV crashes before writes are flushed to disk. Note even for v1, it's also possible to leave garbage if writes to rocksdb fail. We need to scan default CFs and check for orphan versions. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/fsm/apply.rs | 1 + components/raftstore-v2/src/fsm/peer.rs | 6 + .../raftstore-v2/src/operation/command/mod.rs | 26 ++++ .../src/operation/command/write/mod.rs | 33 ++++- .../src/router/internal_message.rs | 2 + components/raftstore-v2/src/router/message.rs | 14 ++ src/server/gc_worker/compaction_filter.rs | 128 +++++++++++++----- src/server/gc_worker/gc_worker.rs | 82 +++++++++-- .../gc_worker/rawkv_compaction_filter.rs | 13 +- src/server/raftkv2/mod.rs | 47 ++++--- tests/failpoints/cases/test_gc_worker.rs | 2 +- 11 files changed, 277 insertions(+), 77 deletions(-) diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 07a577e0c35..c0eabd2120e 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -113,6 +113,7 @@ impl ApplyFsm { // TODO: flush by buffer size. ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, ApplyTask::Snapshot(snap_task) => self.apply.schedule_gen_snapshot(snap_task), + ApplyTask::UnsafeWrite(raw_write) => self.apply.apply_unsafe_write(raw_write), } // TODO: yield after some time. diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 1ef9e198130..4b22554e694 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -243,6 +243,12 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, write.ch, ); } + PeerMsg::UnsafeWrite(write) => { + self.on_receive_command(write.send_time); + self.fsm + .peer_mut() + .on_unsafe_write(self.store_ctx, write.data); + } PeerMsg::Tick(tick) => self.on_tick(tick), PeerMsg::ApplyRes(res) => self.fsm.peer.on_apply_res(self.store_ctx, res), PeerMsg::SplitInit(msg) => self.fsm.peer.on_split_init(self.store_ctx, msg), diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index fce01f19277..49040a20278 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -319,6 +319,32 @@ impl Apply { } impl Apply { + pub fn apply_unsafe_write(&mut self, data: Box<[u8]>) { + let decoder = match SimpleWriteReqDecoder::new(&self.logger, &data, u64::MAX, u64::MAX) { + Ok(decoder) => decoder, + Err(req) => unreachable!("unexpected request: {:?}", req), + }; + for req in decoder { + match req { + SimpleWrite::Put(put) => { + let _ = self.apply_put(put.cf, u64::MAX, put.key, put.value); + } + SimpleWrite::Delete(delete) => { + let _ = self.apply_delete(delete.cf, u64::MAX, delete.key); + } + SimpleWrite::DeleteRange(dr) => { + let _ = self.apply_delete_range( + dr.cf, + u64::MAX, + dr.start_key, + dr.end_key, + dr.notify_only, + ); + } + } + } + } + #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 51beeee7dea..ad6e537b956 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -16,7 +16,7 @@ use crate::{ batch::StoreContext, operation::cf_offset, raft::{Apply, Peer}, - router::CmdResChannel, + router::{ApplyTask, CmdResChannel}, }; mod simple_write; @@ -71,6 +71,29 @@ impl Peer { self.simple_write_encoder_mut().replace(encoder); } + #[inline] + pub fn on_unsafe_write( + &mut self, + ctx: &mut StoreContext, + data: SimpleWriteBinary, + ) { + if !self.serving() { + return; + } + let bin = SimpleWriteReqEncoder::new( + Box::::default(), + data, + ctx.cfg.raft_entry_max_size.0 as usize, + false, + ) + .encode() + .0 + .into_boxed_slice(); + if let Some(scheduler) = self.apply_scheduler() { + scheduler.send(ApplyTask::UnsafeWrite(bin)); + } + } + pub fn propose_pending_writes(&mut self, ctx: &mut StoreContext) { if let Some(encoder) = self.simple_write_encoder_mut().take() { let call_proposed_on_success = if encoder.notify_proposed() { @@ -140,7 +163,9 @@ impl Apply { "aborted by failpoint".into() ))); self.metrics.size_diff_hint += (self.key_buffer.len() + value.len()) as i64; - self.modifications_mut()[off] = index; + if index != u64::MAX { + self.modifications_mut()[off] = index; + } Ok(()) } @@ -171,7 +196,9 @@ impl Apply { ); }); self.metrics.size_diff_hint -= self.key_buffer.len() as i64; - self.modifications_mut()[off] = index; + if index != u64::MAX { + self.modifications_mut()[off] = index; + } Ok(()) } diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 4c317a22abd..05e1baea1cf 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -8,6 +8,8 @@ use crate::operation::{AdminCmdResult, CommittedEntries, DataTrace, GenSnapTask} pub enum ApplyTask { CommittedEntries(CommittedEntries), Snapshot(GenSnapTask), + /// Writes that doesn't care consistency. + UnsafeWrite(Box<[u8]>), } #[derive(Debug, Default)] diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index faed3c0751d..a69f6b5ead6 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -119,6 +119,12 @@ pub struct SimpleWrite { pub ch: CmdResChannel, } +#[derive(Debug)] +pub struct UnsafeWrite { + pub send_time: Instant, + pub data: SimpleWriteBinary, +} + /// Message that can be sent to a peer. #[derive(Debug)] pub enum PeerMsg { @@ -132,6 +138,7 @@ pub enum PeerMsg { /// Command changes the inernal states. It will be transformed into logs and /// applied on all replicas. SimpleWrite(SimpleWrite), + UnsafeWrite(UnsafeWrite), /// Command that contains admin requests. AdminCommand(RaftRequest), /// Tick is periodical task. If target peer doesn't exist there is a @@ -206,6 +213,13 @@ impl PeerMsg { ) } + pub fn unsafe_write(data: SimpleWriteBinary) -> Self { + PeerMsg::UnsafeWrite(UnsafeWrite { + send_time: Instant::now(), + data, + }) + } + pub fn request_split( epoch: metapb::RegionEpoch, split_keys: Vec>, diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 4c494d6f01f..5d33346a844 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -20,9 +20,7 @@ use engine_rocks::{ }, RocksEngine, RocksMvccProperties, RocksWriteBatchVec, }; -use engine_traits::{ - KvEngine, MiscExt, Mutable, MvccProperties, WriteBatch, WriteBatchExt, WriteOptions, -}; +use engine_traits::{KvEngine, MiscExt, MvccProperties, WriteBatch, WriteOptions}; use file_system::{IoType, WithIoType}; use pd_client::{Feature, FeatureGate}; use prometheus::{local::*, *}; @@ -30,6 +28,7 @@ use raftstore::coprocessor::RegionInfoProvider; use tikv_util::{ time::Instant, worker::{ScheduleError, Scheduler}, + Either, }; use txn_types::{Key, TimeStamp, WriteRef, WriteType}; @@ -51,7 +50,7 @@ const COMPACTION_FILTER_GC_FEATURE: Feature = Feature::require(5, 0, 0); // these fields are not available when constructing // `WriteCompactionFilterFactory`. pub struct GcContext { - pub(crate) db: RocksEngine, + pub(crate) db: Option, pub(crate) store_id: u64, pub(crate) safe_point: Arc, pub(crate) cfg_tracker: GcWorkerConfigManager, @@ -154,7 +153,7 @@ where ); } -impl CompactionFilterInitializer for EK +impl CompactionFilterInitializer for Option where EK: KvEngine, { @@ -171,7 +170,7 @@ where } } -impl CompactionFilterInitializer for RocksEngine { +impl CompactionFilterInitializer for Option { fn init_compaction_filter( &self, store_id: u64, @@ -237,7 +236,10 @@ impl CompactionFilterFactory for WriteCompactionFilterFactory { "ratio_threshold" => ratio_threshold, ); - if db.is_stalled_or_stopped() { + if db + .as_ref() + .map_or(false, RocksEngine::is_stalled_or_stopped) + { debug!("skip gc in compaction filter because the DB is stalled"); return std::ptr::null_mut(); } @@ -277,13 +279,60 @@ impl CompactionFilterFactory for WriteCompactionFilterFactory { } } +pub struct DeleteBatch { + pub batch: Either>, +} + +impl DeleteBatch { + fn new(db: &Option) -> Self + where + EK: KvEngine, + { + Self { + batch: match db { + Some(db) => Either::Left(db.write_batch_with_cap(DEFAULT_DELETE_BATCH_SIZE)), + None => Either::Right(Vec::with_capacity(64)), + }, + } + } + + // `key` has prefix `DATA_KEY`. + fn delete(&mut self, key: &[u8], ts: TimeStamp) -> Result<(), String> { + match &mut self.batch { + Either::Left(batch) => { + let key = Key::from_encoded_slice(key).append_ts(ts); + batch.delete(key.as_encoded())?; + } + Either::Right(keys) => { + let key = Key::from_encoded_slice(keys::origin_key(key)).append_ts(ts); + keys.push(key); + } + } + Ok(()) + } + + fn is_empty(&self) -> bool { + match &self.batch { + Either::Left(batch) => batch.is_empty(), + Either::Right(keys) => keys.is_empty(), + } + } + + pub fn count(&self) -> usize { + match &self.batch { + Either::Left(batch) => batch.count(), + Either::Right(keys) => keys.len(), + } + } +} + struct WriteCompactionFilter { safe_point: u64, - engine: RocksEngine, + engine: Option, is_bottommost_level: bool, encountered_errors: bool, - write_batch: RocksWriteBatchVec, + write_batch: DeleteBatch, gc_scheduler: Scheduler>, // A key batch which is going to be sent to the GC worker. mvcc_deletions: Vec, @@ -312,7 +361,7 @@ struct WriteCompactionFilter { impl WriteCompactionFilter { fn new( - engine: RocksEngine, + engine: Option, safe_point: u64, context: &CompactionFilterContext, gc_scheduler: Scheduler>, @@ -322,7 +371,7 @@ impl WriteCompactionFilter { assert!(safe_point > 0); debug!("gc in compaction filter"; "safe_point" => safe_point); - let write_batch = engine.write_batch_with_cap(DEFAULT_DELETE_BATCH_SIZE); + let write_batch = DeleteBatch::new(&engine); WriteCompactionFilter { safe_point, engine, @@ -469,9 +518,8 @@ impl WriteCompactionFilter { fn handle_filtered_write(&mut self, write: WriteRef<'_>) -> Result<(), String> { if write.short_value.is_none() && write.write_type == WriteType::Put { - let prefix = Key::from_encoded_slice(&self.mvcc_key_prefix); - let def_key = prefix.append_ts(write.start_ts).into_encoded(); - self.write_batch.delete(&def_key)?; + self.write_batch + .delete(&self.mvcc_key_prefix, write.start_ts)?; } Ok(()) } @@ -499,24 +547,40 @@ impl WriteCompactionFilter { } if self.write_batch.count() > DEFAULT_DELETE_BATCH_COUNT || force { - let mut wopts = WriteOptions::default(); - wopts.set_no_slowdown(true); - if let Err(e) = do_flush(&mut self.write_batch, &wopts) { - let wb = mem::replace( - &mut self.write_batch, - self.engine.write_batch_with_cap(DEFAULT_DELETE_BATCH_SIZE), - ); - self.orphan_versions += wb.count(); - let id = ORPHAN_VERSIONS_ID.fetch_add(1, Ordering::Relaxed); - let task = GcTask::OrphanVersions { wb, id }; + let err = match &mut self.write_batch.batch { + Either::Left(wb) => { + let mut wopts = WriteOptions::default(); + wopts.set_no_slowdown(true); + match do_flush(wb, &wopts) { + Ok(()) => { + wb.clear(); + return Ok(()); + } + Err(e) => Some(e), + } + } + Either::Right(_) => None, + }; + + let wb = mem::replace(&mut self.write_batch, DeleteBatch::new(&self.engine)); + self.orphan_versions += wb.count(); + let id = ORPHAN_VERSIONS_ID.fetch_add(1, Ordering::Relaxed); + let region_info_provider = self.regions_provider.1.clone(); + let task = GcTask::OrphanVersions { + wb, + id, + region_info_provider, + }; + if let Some(e) = &err { warn!( - "compaction filter flush fail, dispatch to gc worker"; - "task" => %task, "err" => ?e, + "compaction filter flush fail, dispatch to gc worker"; + "task" => %task, "err" => ?e, ); - self.schedule_gc_task(task, true); - return Err(e); } - self.write_batch.clear(); + self.schedule_gc_task(task, true); + if let Some(err) = err { + return Err(err); + } } Ok(()) } @@ -607,7 +671,9 @@ impl Drop for WriteCompactionFilter { if let Err(e) = self.flush_pending_writes_if_need(true) { error!("compaction filter flush writes fail"; "err" => ?e); } - self.engine.sync_wal().unwrap(); + if let Some(engine) = &self.engine { + engine.sync_wal().unwrap(); + } self.switch_key_metrics(); self.flush_metrics(); @@ -831,7 +897,7 @@ pub mod test_utils { let mut gc_context_opt = GC_CONTEXT.lock().unwrap(); *gc_context_opt = Some(GcContext { - db: engine.clone(), + db: Some(engine.clone()), store_id: 1, safe_point, cfg_tracker, diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 1ccac8860c6..106b36f61ad 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -38,7 +38,7 @@ use txn_types::{Key, TimeStamp}; use super::{ check_need_gc, compaction_filter::{ - CompactionFilterInitializer, GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED, + CompactionFilterInitializer, DeleteBatch, GC_COMPACTION_FILTER_MVCC_DELETION_HANDLED, GC_COMPACTION_FILTER_MVCC_DELETION_WASTED, GC_COMPACTION_FILTER_ORPHAN_VERSIONS, }, config::{GcConfig, GcWorkerConfigManager}, @@ -118,7 +118,11 @@ where /// until `DefaultCompactionFilter` is introduced. /// /// The tracking issue: . - OrphanVersions { wb: E::WriteBatch, id: usize }, + OrphanVersions { + wb: DeleteBatch, + id: usize, + region_info_provider: Arc, + }, #[cfg(any(test, feature = "testexport"))] Validate(Box), } @@ -162,7 +166,7 @@ where .field("start_key", &format!("{}", start_key)) .field("end_key", &format!("{}", end_key)) .finish(), - GcTask::OrphanVersions { id, wb } => f + GcTask::OrphanVersions { id, wb, .. } => f .debug_struct("OrphanVersions") .field("id", id) .field("count", &wb.count()) @@ -871,6 +875,46 @@ impl GcRunner { tikv_kv::snapshot(&mut self.engine, snap_ctx).await })?) } + + fn flush_deletes(&mut self, deletes: Vec, provider: Arc) { + let mut region_modifies = HashMap::default(); + // Should not panic. + let regions = match get_regions_for_range_of_keys(self.store_id, &deletes, provider) { + Ok(r) => r, + Err(e) => { + error!("failed to flush deletes, will leave garbage"; "err" => ?e); + return; + } + }; + if regions.is_empty() { + error!("no region is found, will leave garbage"); + return; + } + let mut keys = deletes.into_iter().peekable(); + let mut modifies = vec![]; + for region in ®ions { + let start_key = region.get_start_key(); + let end_key = region.get_end_key(); + while let Some(key) = keys.peek() { + if key.as_encoded().as_slice() < start_key { + error!("key is not in any region, will leave garbage"; "key" => %key); + keys.next(); + continue; + } + if !end_key.is_empty() && key.as_encoded().as_slice() >= end_key { + break; + } + modifies.push(Modify::Delete(CF_DEFAULT, keys.next().unwrap())); + } + if !modifies.is_empty() { + region_modifies.insert(region.id, modifies); + modifies = vec![]; + } + } + if let Err(e) = self.engine.modify_on_kv_engine(region_modifies) { + error!("failed to flush deletes, will leave garbage"; "err" => ?e); + } + } } impl Runnable for GcRunner { @@ -982,19 +1026,29 @@ impl Runnable for GcRunner { end_key ); } - GcTask::OrphanVersions { mut wb, id } => { - info!("handling GcTask::OrphanVersions"; "id" => id); - let mut wopts = WriteOptions::default(); - wopts.set_sync(true); - if let Err(e) = wb.write_opt(&wopts) { - error!("write GcTask::OrphanVersions fail"; "id" => id, "err" => ?e); - update_metrics(true); - return; + GcTask::OrphanVersions { + wb, + id, + region_info_provider, + } => { + let count = wb.count(); + match wb.batch { + Either::Left(mut wb) => { + info!("handling GcTask::OrphanVersions"; "id" => id); + let mut wopts = WriteOptions::default(); + wopts.set_sync(true); + if let Err(e) = wb.write_opt(&wopts) { + error!("write GcTask::OrphanVersions fail"; "id" => id, "err" => ?e); + update_metrics(true); + return; + } + info!("write GcTask::OrphanVersions success"; "id" => id); + } + Either::Right(deletes) => self.flush_deletes(deletes, region_info_provider), } - info!("write GcTask::OrphanVersions success"; "id" => id); GC_COMPACTION_FILTER_ORPHAN_VERSIONS .with_label_values(&[STAT_TXN_KEYMODE, "cleaned"]) - .inc_by(wb.count() as u64); + .inc_by(count as u64); update_metrics(false); } #[cfg(any(test, feature = "testexport"))] @@ -1144,7 +1198,7 @@ impl GcWorker { ); info!("initialize compaction filter to perform GC when necessary"); - self.engine.kv_engine().unwrap().init_compaction_filter( + self.engine.kv_engine().init_compaction_filter( cfg.self_store_id, safe_point.clone(), self.config_manager.clone(), diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index b1174d7d4f3..5e3913f4d40 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -48,7 +48,6 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { }; //---------------- GC context END -------------- - let db = gc_context.db.clone(); let gc_scheduler = gc_context.gc_scheduler.clone(); let store_id = gc_context.store_id; let region_info_provider = gc_context.region_info_provider.clone(); @@ -71,7 +70,11 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { "ratio_threshold" => ratio_threshold, ); - if db.is_stalled_or_stopped() { + if gc_context + .db + .as_ref() + .map_or(false, RocksEngine::is_stalled_or_stopped) + { debug!("skip gc in compaction filter because the DB is stalled"); return std::ptr::null_mut(); } @@ -91,7 +94,6 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { } let filter = RawCompactionFilter::new( - db, safe_point, gc_scheduler, current, @@ -105,7 +107,6 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { struct RawCompactionFilter { safe_point: u64, - engine: RocksEngine, is_bottommost_level: bool, gc_scheduler: Scheduler>, current_ts: u64, @@ -135,8 +136,6 @@ impl Drop for RawCompactionFilter { fn drop(&mut self) { self.raw_gc_mvcc_deletions(); - self.engine.sync_wal().unwrap(); - self.switch_key_metrics(); self.flush_metrics(); } @@ -172,7 +171,6 @@ impl CompactionFilter for RawCompactionFilter { impl RawCompactionFilter { fn new( - engine: RocksEngine, safe_point: u64, gc_scheduler: Scheduler>, ts: u64, @@ -184,7 +182,6 @@ impl RawCompactionFilter { debug!("gc in compaction filter"; "safe_point" => safe_point); RawCompactionFilter { safe_point, - engine, is_bottommost_level: context.is_bottommost_level(), gc_scheduler, current_ts: ts, diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index f850cc74d19..526a1fab3ca 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -20,7 +20,7 @@ use raftstore_v2::{ router::{ message::SimpleWrite, CmdResChannelBuilder, CmdResEvent, CmdResStream, PeerMsg, RaftRouter, }, - SimpleWriteEncoder, + SimpleWriteBinary, SimpleWriteEncoder, }; use tikv_kv::{Modify, WriteEvent}; use tikv_util::{codec::number::NumberEncoder, time::Instant}; @@ -67,6 +67,26 @@ impl Stream for Transform { } } +fn modifies_to_simple_write(modifies: Vec) -> SimpleWriteBinary { + let mut encoder = SimpleWriteEncoder::with_capacity(128); + for m in modifies { + match m { + Modify::Put(cf, k, v) => encoder.put(cf, k.as_encoded(), &v), + Modify::Delete(cf, k) => encoder.delete(cf, k.as_encoded()), + Modify::PessimisticLock(k, lock) => { + encoder.put(CF_LOCK, k.as_encoded(), &lock.into_lock().to_bytes()) + } + Modify::DeleteRange(cf, start_key, end_key, notify_only) => encoder.delete_range( + cf, + start_key.as_encoded(), + end_key.as_encoded(), + notify_only, + ), + } + } + encoder.encode() +} + #[derive(Clone)] pub struct RaftKv2 { router: RaftRouter, @@ -109,9 +129,12 @@ impl tikv_kv::Engine for RaftKv2 { fn modify_on_kv_engine( &self, - _region_modifies: collections::HashMap>, + region_modifies: collections::HashMap>, ) -> tikv_kv::Result<()> { - // TODO + for (region_id, batch) in region_modifies { + let bin = modifies_to_simple_write(batch); + let _ = self.router.send(region_id, PeerMsg::unsafe_write(bin)); + } Ok(()) } @@ -202,23 +225,7 @@ impl tikv_kv::Engine for RaftKv2 { header.set_flags(flags); self.schedule_txn_extra(batch.extra); - let mut encoder = SimpleWriteEncoder::with_capacity(128); - for m in batch.modifies { - match m { - Modify::Put(cf, k, v) => encoder.put(cf, k.as_encoded(), &v), - Modify::Delete(cf, k) => encoder.delete(cf, k.as_encoded()), - Modify::PessimisticLock(k, lock) => { - encoder.put(CF_LOCK, k.as_encoded(), &lock.into_lock().to_bytes()) - } - Modify::DeleteRange(cf, start_key, end_key, notify_only) => encoder.delete_range( - cf, - start_key.as_encoded(), - end_key.as_encoded(), - notify_only, - ), - } - } - let data = encoder.encode(); + let data = modifies_to_simple_write(batch.modifies); let mut builder = CmdResChannelBuilder::default(); if WriteEvent::subscribed_proposed(subscribed) { builder.subscribe_proposed(); diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index 3dbb7ffc7b0..d24ec85f040 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -6,7 +6,7 @@ use std::{ time::Duration, }; -use engine_traits::{Peekable, WriteBatch}; +use engine_traits::Peekable; use grpcio::{ChannelBuilder, Environment}; use keys::data_key; use kvproto::{kvrpcpb::*, metapb::Region, tikvpb::TikvClient}; From 8864e9bac814554bbe4acaddd01282f5fdae3dae Mon Sep 17 00:00:00 2001 From: Jay Date: Sat, 24 Dec 2022 13:02:14 +0800 Subject: [PATCH 0426/1149] v2: fix several panics (#13986) ref tikv/tikv#12842 Perf context is disabled for now as we don't have shared kv engine. And fix region info access panic by filter out uninitialized role change. There are also several other fixes. Signed-off-by: Jay Lee --- components/cdc/src/observer.rs | 2 + components/engine_rocks/src/rocks_metrics.rs | 19 ++--- components/raftstore-v2/src/fsm/peer.rs | 1 + .../raftstore-v2/src/operation/ready/mod.rs | 1 + components/raftstore-v2/src/raft/storage.rs | 6 +- .../tests/integrations/cluster.rs | 3 +- components/raftstore/src/coprocessor/mod.rs | 3 + .../src/coprocessor/region_info_accessor.rs | 78 ++++++++++++++++--- components/raftstore/src/store/peer.rs | 1 + components/raftstore/src/store/snap.rs | 15 ++-- components/server/src/server2.rs | 5 +- src/config/mod.rs | 4 + src/coprocessor/tracker.rs | 34 +++++--- src/server/service/kv.rs | 7 +- src/server/tablet_snap.rs | 4 +- src/storage/metrics.rs | 26 ++++--- 16 files changed, 143 insertions(+), 66 deletions(-) diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index 7c33d21aadd..696bc6341ee 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -272,6 +272,7 @@ mod tests { leader_id: 2, prev_lead_transferee: raft::INVALID_ID, vote: raft::INVALID_ID, + initialized: true, }, ); match rx.recv_timeout(Duration::from_millis(10)).unwrap().unwrap() { @@ -299,6 +300,7 @@ mod tests { leader_id: raft::INVALID_ID, prev_lead_transferee: 3, vote: 3, + initialized: true, }, ); match rx.recv_timeout(Duration::from_millis(10)).unwrap().unwrap() { diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index d77f5f2dc99..24ac9eee0b4 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -946,7 +946,7 @@ struct DbStats { num_snapshots: Option, oldest_snapshot_time: Option, block_cache_size: Option, - stall_num: Vec>, + stall_num: Option<[u64; ROCKSDB_IOSTALL_KEY.len()]>, } pub struct RocksStatisticsReporter { @@ -966,8 +966,6 @@ impl StatisticsReporter for RocksStatisticsReporter { fn collect(&mut self, engine: &RocksEngine) { let db = engine.as_inner(); - let stall_num = ROCKSDB_IOSTALL_KEY.len(); - self.db_stats.stall_num.resize(stall_num, None); for cf in db.cf_names() { let cf_stats = self.cf_stats.entry(cf.to_owned()).or_default(); let handle = crate::util::get_cf_handle(db, cf).unwrap(); @@ -1074,9 +1072,9 @@ impl StatisticsReporter for RocksStatisticsReporter { } if let Some(info) = db.get_map_property_cf(handle, ROCKSDB_CFSTATS) { - for i in 0..stall_num { - *self.db_stats.stall_num[i].get_or_insert_default() += - info.get_property_int_value(ROCKSDB_IOSTALL_KEY[i]); + let stall_num = self.db_stats.stall_num.get_or_insert_default(); + for (key, val) in ROCKSDB_IOSTALL_KEY.iter().zip(stall_num) { + *val += info.get_property_int_value(key); } } } @@ -1228,12 +1226,11 @@ impl StatisticsReporter for RocksStatisticsReporter { .with_label_values(&[&self.name, "all"]) .set(v as i64); } - let stall_num = ROCKSDB_IOSTALL_KEY.len(); - for i in 0..stall_num { - if let Some(v) = self.db_stats.stall_num[i] { + if let Some(stall_num) = &self.db_stats.stall_num { + for (ty, val) in ROCKSDB_IOSTALL_TYPE.iter().zip(stall_num) { STORE_ENGINE_WRITE_STALL_REASON_GAUGE_VEC - .with_label_values(&[&self.name, ROCKSDB_IOSTALL_TYPE[i]]) - .set(v as i64); + .with_label_values(&[&self.name, ty]) + .set(*val as i64); } } } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 4b22554e694..734c2bf93d4 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -188,6 +188,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, fn on_start(&mut self) { self.schedule_tick(PeerTick::Raft); + self.schedule_tick(PeerTick::SplitRegionCheck); if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index fcab8728916..854fd965d9e 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -533,6 +533,7 @@ impl Peer { leader_id: ss.leader_id, prev_lead_transferee: target, vote: self.raft_group().raft.vote, + initialized: self.storage().is_initialized(), }, ); self.proposal_control_mut().maybe_update_term(term); diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index de58d39cce5..bce313eab83 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -348,8 +348,7 @@ mod tests { fn test_apply_snapshot() { let region = new_region(); let path = TempDir::new().unwrap(); - let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()); - mgr.init().unwrap(); + let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()).unwrap(); let raft_engine = engine_test::raft::new_engine(&format!("{}", path.path().join("raft").display()), None) .unwrap(); @@ -402,8 +401,7 @@ mod tests { write_initial_states(&mut wb, region.clone()).unwrap(); assert!(!wb.is_empty()); raft_engine.consume(&mut wb, true).unwrap(); - let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()); - mgr.init().unwrap(); + let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()).unwrap(); // building a tablet factory let ops = DbOptions::default(); let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index ca166eab950..064fd9d1cad 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -278,8 +278,7 @@ impl RunningState { let router = RaftRouter::new(store_id, registry.clone(), router); let store_meta = router.store_meta().clone(); - let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()); - snap_mgr.init().unwrap(); + let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()).unwrap(); let coprocessor_host = CoprocessorHost::new( router.store_router().clone(), diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 022a44de463..5100e9d4632 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -268,15 +268,18 @@ pub struct RoleChange { pub prev_lead_transferee: u64, /// Which peer is voted by itself. pub vote: u64, + pub initialized: bool, } impl RoleChange { + #[cfg(feature = "testexport")] pub fn new(state: StateRole) -> Self { RoleChange { state, leader_id: raft::INVALID_ID, prev_lead_transferee: raft::INVALID_ID, vote: raft::INVALID_ID, + initialized: true, } } } diff --git a/components/raftstore/src/coprocessor/region_info_accessor.rs b/components/raftstore/src/coprocessor/region_info_accessor.rs index 338cf3962c4..37403310baf 100644 --- a/components/raftstore/src/coprocessor/region_info_accessor.rs +++ b/components/raftstore/src/coprocessor/region_info_accessor.rs @@ -46,11 +46,26 @@ use super::{ /// `RaftStoreEvent` Represents events dispatched from raftstore coprocessor. #[derive(Debug)] pub enum RaftStoreEvent { - CreateRegion { region: Region, role: StateRole }, - UpdateRegion { region: Region, role: StateRole }, - DestroyRegion { region: Region }, - RoleChange { region: Region, role: StateRole }, - UpdateRegionBuckets { region: Region, buckets: usize }, + CreateRegion { + region: Region, + role: StateRole, + }, + UpdateRegion { + region: Region, + role: StateRole, + }, + DestroyRegion { + region: Region, + }, + RoleChange { + region: Region, + role: StateRole, + initialized: bool, + }, + UpdateRegionBuckets { + region: Region, + buckets: usize, + }, } impl RaftStoreEvent { @@ -191,7 +206,11 @@ impl RoleObserver for RegionEventListener { fn on_role_change(&self, context: &mut ObserverContext<'_>, role_change: &RoleChange) { let region = context.region().clone(); let role = role_change.state; - let event = RaftStoreEvent::RoleChange { region, role }; + let event = RaftStoreEvent::RoleChange { + region, + role, + initialized: role_change.initialized, + }; self.scheduler .schedule(RegionInfoQuery::RaftStoreEvent(event)) .unwrap(); @@ -426,7 +445,10 @@ impl RegionCollector { // They are impossible to equal, or they cannot overlap. assert_ne!( region.get_region_epoch().get_version(), - current_region.get_region_epoch().get_version() + current_region.get_region_epoch().get_version(), + "{:?} vs {:?}", + region, + current_region, ); // Remove it since it's a out-of-date region info. if clear_regions_in_range { @@ -492,6 +514,10 @@ impl RegionCollector { // epoch is properly set and an Update message was sent. return; } + if let RaftStoreEvent::RoleChange { initialized, .. } = &event && !initialized { + // Ignore uninitialized peers. + return; + } if !self.check_region_range(region, true) { debug!( "Received stale event"; @@ -511,7 +537,7 @@ impl RegionCollector { RaftStoreEvent::DestroyRegion { region } => { self.handle_destroy_region(region); } - RaftStoreEvent::RoleChange { region, role } => { + RaftStoreEvent::RoleChange { region, role, .. } => { self.handle_role_change(region, role); } RaftStoreEvent::UpdateRegionBuckets { region, buckets } => { @@ -988,10 +1014,16 @@ mod tests { } } - fn must_change_role(c: &mut RegionCollector, region: &Region, role: StateRole) { + fn must_change_role( + c: &mut RegionCollector, + region: &Region, + role: StateRole, + initialized: bool, + ) { c.handle_raftstore_event(RaftStoreEvent::RoleChange { region: region.clone(), role, + initialized, }); if let Some(r) = c.regions.get(®ion.get_id()) { @@ -1037,6 +1069,12 @@ mod tests { c.handle_raftstore_event(RaftStoreEvent::RoleChange { region: new_region(1, b"k1", b"k2", 0), role: StateRole::Leader, + initialized: true, + }); + c.handle_raftstore_event(RaftStoreEvent::RoleChange { + region: new_region(1, b"", b"", 3), + role: StateRole::Leader, + initialized: false, }); check_collection(&c, &[]); @@ -1198,9 +1236,15 @@ mod tests { &mut c, &new_region(1, b"k0", b"k1", 2), StateRole::Candidate, + true, ); must_create_region(&mut c, &new_region(5, b"k99", b"", 2), StateRole::Follower); - must_change_role(&mut c, &new_region(2, b"k2", b"k8", 2), StateRole::Leader); + must_change_role( + &mut c, + &new_region(2, b"k2", b"k8", 2), + StateRole::Leader, + true, + ); must_update_region(&mut c, &new_region(2, b"k3", b"k7", 3), StateRole::Leader); // test region buckets update must_update_region_buckets(&mut c, &new_region(2, b"k3", b"k7", 3), 4); @@ -1343,7 +1387,12 @@ mod tests { // which haven't been handled. must_create_region(&mut c, &new_region(4, b"k5", b"k9", 2), StateRole::Follower); must_update_region(&mut c, &new_region(2, b"k1", b"k9", 1), StateRole::Follower); - must_change_role(&mut c, &new_region(2, b"k1", b"k9", 1), StateRole::Leader); + must_change_role( + &mut c, + &new_region(2, b"k1", b"k9", 1), + StateRole::Leader, + true, + ); must_update_region(&mut c, &new_region(2, b"k1", b"k5", 2), StateRole::Leader); // TODO: In fact, region 2's role should be follower. However because it's // previous state was removed while creating updating region 4, it can't be @@ -1364,7 +1413,12 @@ mod tests { // handled. must_update_region(&mut c, &new_region(2, b"k1", b"k9", 3), StateRole::Leader); must_update_region(&mut c, &new_region(4, b"k5", b"k9", 2), StateRole::Follower); - must_change_role(&mut c, &new_region(4, b"k5", b"k9", 2), StateRole::Leader); + must_change_role( + &mut c, + &new_region(4, b"k5", b"k9", 2), + StateRole::Leader, + true, + ); must_destroy_region(&mut c, new_region(4, b"k5", b"k9", 2)); check_collection( &c, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 86d16b07506..a72bb59d8bf 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2282,6 +2282,7 @@ where leader_id: ss.leader_id, prev_lead_transferee: self.lead_transferee, vote: self.raft_group.raft.vote, + initialized: self.is_initialized(), }, ); self.cmd_epoch_checker.maybe_update_term(self.term()); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 04aef985e3b..05decd62815 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1932,20 +1932,15 @@ impl Display for TabletSnapKey { #[derive(Clone)] pub struct TabletSnapManager { // directory to store snapfile. - base: String, + base: PathBuf, } impl TabletSnapManager { - pub fn new>(path: T) -> Self { - Self { base: path.into() } - } - - pub fn init(&self) -> io::Result<()> { + pub fn new>(path: T) -> io::Result { // Initialize the directory if it doesn't exist. - let path = Path::new(&self.base); + let path = path.into(); if !path.exists() { - file_system::create_dir_all(path)?; - return Ok(()); + file_system::create_dir_all(&path)?; } if !path.is_dir() { return Err(io::Error::new( @@ -1953,7 +1948,7 @@ impl TabletSnapManager { format!("{} should be a directory", path.display()), )); } - Ok(()) + Ok(Self { base: path }) } pub fn tablet_gen_path(&self, key: &TabletSnapKey) -> PathBuf { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index cfda8feb233..620a6b20b74 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -747,7 +747,10 @@ where .unwrap() .to_owned(); - let snap_mgr = TabletSnapManager::new(snap_path); + let snap_mgr = match TabletSnapManager::new(&snap_path) { + Ok(mgr) => mgr, + Err(e) => fatal!("failed to create snapshot manager at {}: {}", snap_path, e), + }; // Create coprocessor endpoint. let cop_read_pool_handle = if self.config.readpool.coprocessor.use_unified_pool() { diff --git a/src/config/mod.rs b/src/config/mod.rs index a9cfdb93505..808dd22299c 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2988,6 +2988,10 @@ impl TikvConfig { .to_owned(); } + if self.storage.engine == EngineType::RaftKv2 { + self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); + } + self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 24290701457..d6e146adf11 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -147,7 +147,11 @@ impl Tracker { _ => unreachable!(), } - self.with_perf_context(|perf_context| perf_context.start_observe()); + self.with_perf_context(|perf_context| { + if let Some(c) = perf_context { + c.start_observe(); + } + }); self.current_stage = TrackerState::ItemBegan(now); } @@ -160,7 +164,9 @@ impl Tracker { self.total_storage_stats.add(&storage_stats); } self.with_perf_context(|perf_context| { - perf_context.report_metrics(&[get_tls_tracker_token()]) + if let Some(c) = perf_context { + c.report_metrics(&[get_tls_tracker_token()]); + } }); self.current_stage = TrackerState::ItemFinished(now); } else { @@ -355,7 +361,7 @@ impl Tracker { fn with_perf_context(&self, f: F) -> T where - F: FnOnce(&mut Box) -> T, + F: FnOnce(&mut Option>) -> T, { thread_local! { static SELECT: RefCell>> = RefCell::new(None); @@ -379,15 +385,19 @@ impl Tracker { }; tls_cell.with(|c| { let mut c = c.borrow_mut(); - let perf_context = c.get_or_insert_with(|| unsafe { - with_tls_engine::(|engine| { - Box::new(engine.kv_engine().unwrap().get_perf_context( - PerfLevel::Uninitialized, - PerfContextKind::Coprocessor(self.req_ctx.tag.get_str()), - )) - }) - }); - f(perf_context) + if c.is_none() { + *c = unsafe { + with_tls_engine::(|engine| { + engine.kv_engine().map(|engine| { + Box::new(engine.get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Coprocessor(self.req_ctx.tag.get_str()), + )) as Box + }) + }) + }; + } + f(&mut c) }) } } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 68a200b045e..66fc5060e68 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -945,9 +945,12 @@ impl Tikv for Service { ) { let key_range = request.take_key_range(); let (cb, resp) = paired_future_callback(); - let check_leader_scheduler = self.check_leader_scheduler.clone(); + let check_leader_scheduler = match self.check_leader_scheduler.clone() { + Some(s) => s, + // Avoid print errors if it's not supported. + None => return, + }; let task = async move { - let Some(check_leader_scheduler) = check_leader_scheduler else { return Err(box_err!("check leader is not supported")) }; check_leader_scheduler .schedule(CheckLeaderTask::GetStoreTs { key_range, cb }) .map_err(|e| Error::Other(format!("{}", e).into()))?; diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 5dd83deb092..b5d989d5370 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -493,7 +493,7 @@ mod tests { msg.mut_message().mut_snapshot().mut_metadata().set_term(1); let send_path = TempDir::new().unwrap(); let send_snap_mgr = - TabletSnapManager::new(send_path.path().join("snap_dir").to_str().unwrap()); + TabletSnapManager::new(send_path.path().join("snap_dir").to_str().unwrap()).unwrap(); let snap_path = send_snap_mgr.tablet_gen_path(&snap_key); create_dir_all(snap_path.as_path()).unwrap(); // send file should skip directory @@ -512,7 +512,7 @@ mod tests { let recv_path = TempDir::new().unwrap(); let recv_snap_manager = - TabletSnapManager::new(recv_path.path().join("snap_dir").to_str().unwrap()); + TabletSnapManager::new(recv_path.path().join("snap_dir").to_str().unwrap()).unwrap(); let (tx, rx) = mpsc::unbounded(); let sink = tx.sink_map_err(Error::from); block_on(send_snap_files( diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index e84a7dfb4e9..080ff2c5951 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -347,17 +347,23 @@ where }; tls_cell.with(|c| { let mut c = c.borrow_mut(); - let perf_context = c.get_or_insert_with(|| { - with_tls_engine(|engine: &mut E| { - Box::new(engine.kv_engine().unwrap().get_perf_context( - PerfLevel::Uninitialized, - PerfContextKind::Storage(cmd.get_str()), - )) - }) - }); - perf_context.start_observe(); + if c.is_none() { + *c = with_tls_engine(|engine: &mut E| { + engine.kv_engine().map(|c| { + Box::new(c.get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Storage(cmd.get_str()), + )) as Box + }) + }); + }; + if let Some(c) = &mut *c { + c.start_observe(); + } let res = f(); - perf_context.report_metrics(&[get_tls_tracker_token()]); + if let Some(c) = &mut *c { + c.report_metrics(&[get_tls_tracker_token()]); + } res }) } From 8ec3cea85259f4eb91db5a49ca2ab6631ffbd6fe Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 27 Dec 2022 10:00:16 +0800 Subject: [PATCH 0427/1149] raftstore-v2: fix several errors (#13988) ref tikv/tikv#12842 This PR fixes several issues: - raftstore CPU usage missing in grafana - restart fails because incorrect path asserts - restart fails because of missing flush records - get snapshots fails occasionally because of lease not renew Signed-off-by: Jay Lee --- .../raftstore-v2/src/operation/query/local.rs | 90 ++++++++++++------- .../raftstore-v2/src/operation/query/mod.rs | 2 + .../src/operation/ready/snapshot.rs | 10 ++- .../tests/integrations/test_split.rs | 22 ++++- components/raftstore/src/store/fsm/peer.rs | 2 + components/raftstore/src/store/mod.rs | 8 +- components/raftstore/src/store/region_meta.rs | 11 ++- metrics/grafana/performance_write.json | 2 +- metrics/grafana/tikv_details.json | 2 +- metrics/grafana/tikv_summary.json | 2 +- metrics/grafana/tikv_trouble_shooting.json | 2 +- src/config/mod.rs | 8 +- 12 files changed, 115 insertions(+), 46 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 812cf2354fa..482de719308 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -17,8 +17,11 @@ use kvproto::{ use raftstore::{ errors::RAFTSTORE_IS_BUSY, store::{ - cmd_resp, util::LeaseState, LocalReadContext, LocalReaderCore, ReadDelegate, ReadExecutor, - ReadExecutorProvider, RegionSnapshot, RequestPolicy, TLS_LOCAL_READ_METRICS, + cmd_resp, + util::LeaseState, + worker_metrics::{self, TLS_LOCAL_READ_METRICS}, + LocalReadContext, LocalReaderCore, ReadDelegate, ReadExecutor, ReadExecutorProvider, + RegionSnapshot, RequestPolicy, }, Error, Result, }; @@ -94,6 +97,8 @@ where Ok(RequestPolicy::ReadLocal) => Ok(Some((delegate, RequestPolicy::ReadLocal))), Ok(RequestPolicy::StaleRead) => Ok(Some((delegate, RequestPolicy::StaleRead))), // It can not handle other policies. + // TODO: we should only abort when lease expires. For other cases we should retry + // infinitely. Ok(_) => Ok(None), Err(e) => Err(e), } @@ -179,33 +184,46 @@ where Ok(None) => Either::Right((self.try_to_renew_lease(region_id, &req), self.clone())), }; + worker_metrics::maybe_tls_local_read_metrics_flush(); + async move { match res { - Either::Left(Ok(Some(snap))) => return Ok(snap), - Either::Left(Err(e)) => return Err(e), + Either::Left(Ok(Some(snap))) => Ok(snap), + Either::Left(Err(e)) => Err(e), Either::Right((fut, mut reader)) => { - if let Some(query_res) = fut.await? - && query_res.read().is_some() - { - // If query successful, try again. - req.mut_header().set_read_quorum(false); - if let Some(snap) = reader.try_get_snapshot(&req)? { - return Ok(snap); + let err = match fut.await? { + Some(query_res) => { + if query_res.read().is_some() { + // If query successful, try again. + req.mut_header().set_read_quorum(false); + if let Some(snap) = reader.try_get_snapshot(&req)? { + return Ok(snap); + } else { + let mut err = errorpb::Error::default(); + err.set_message(format!("no delegate found for {}", region_id)); + err + } + } else { + let QueryResult::Response(res) = query_res else { unreachable!() }; + assert!(res.get_header().has_error(), "{:?}", res); + return Err(res); } - } + } + None => { + let mut err = errorpb::Error::default(); + err.set_message(format!( + "failed to extend lease: canceled: {}", + region_id + )); + err + } + }; + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + Err(resp) } Either::Left(Ok(None)) => unreachable!(), } - - let mut err = errorpb::Error::default(); - err.set_message(format!( - "Fail to get snapshot from LocalReader for region {}. \ - Maybe due to `not leader`, `region not found` or `not applied to the current term`", - region_id - )); - let mut resp = RaftCmdResponse::default(); - resp.mut_header().set_error(err); - Err(resp) } } @@ -216,7 +234,12 @@ where region_id: u64, req: &RaftCmdRequest, ) -> impl Future, RaftCmdResponse>> { - let (msg, sub) = PeerMsg::raft_query(req.clone()); + let mut req = req.clone(); + // Remote lease is updated step by step. It's possible local reader expires + // while the raftstore doesn't. So we need to trigger an update + // explicitly. TODO: find a way to reduce the triggered heartbeats. + req.mut_header().set_read_quorum(true); + let (msg, sub) = PeerMsg::raft_query(req); let res = match MsgRouter::send(&self.router, region_id, msg) { Ok(()) => Ok(sub), Err(TrySendError::Full(_)) => { @@ -471,8 +494,8 @@ mod tests { use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; use pd_client::BucketMeta; use raftstore::store::{ - util::Lease, ReadCallback, ReadProgress, RegionReadProgress, TrackVer, TxnExt, - TLS_LOCAL_READ_METRICS, + util::Lease, worker_metrics::TLS_LOCAL_READ_METRICS, ReadCallback, ReadProgress, + RegionReadProgress, TrackVer, TxnExt, }; use slog::o; use tempfile::Builder; @@ -556,13 +579,16 @@ mod tests { match msg { // send the result back to local reader - PeerMsg::RaftQuery(query) => ReadCallback::set_result( - query.ch, - QueryResult::Read(ReadResponse { - read_index: 0, - txn_extra_op: Default::default(), - }), - ), + PeerMsg::RaftQuery(query) => { + assert!(query.request.get_header().get_read_quorum()); + ReadCallback::set_result( + query.ch, + QueryResult::Read(ReadResponse { + read_index: 0, + txn_extra_op: Default::default(), + }), + ) + } _ => unreachable!(), } ch_tx.send(rx).unwrap(); diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index eb58dcbbc23..4ffb4bcdcec 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -380,6 +380,8 @@ impl Peer { entry_storage.apply_state(), GroupState::Ordered, self.raft_group().status(), + self.raft_group().raft.raft_log.last_index(), + self.raft_group().raft.raft_log.persisted, ); // V2 doesn't persist commit index and term, fill them with in-memory values. meta.raft_apply.commit_index = cmp::min( diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 8598d1cc41d..3208ecb25ae 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -27,7 +27,7 @@ use std::{ }, }; -use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, CF_RAFT}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, ALL_CFS}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; use raft::{eraftpb::Snapshot, StateRole}; @@ -442,8 +442,12 @@ impl Storage { .unwrap(); lb.put_region_state(region_id, last_index, self.region_state()) .unwrap(); - lb.put_flushed_index(region_id, CF_RAFT, last_index, last_index) - .unwrap(); + // We assume there should be flush records in all CFs. Skip any CF here may + // break the constraint. + for cf in ALL_CFS { + lb.put_flushed_index(region_id, cf, last_index, last_index) + .unwrap(); + } let (path, clean_split) = match self.split_init_mut() { // If index not match, the peer may accept a newer snapshot after split. diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index 3b315a2d943..1174a428011 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -2,7 +2,7 @@ use std::{thread, time::Duration}; -use engine_traits::{RaftEngineReadOnly, CF_DEFAULT, CF_RAFT}; +use engine_traits::{Peekable, RaftEngineReadOnly, CF_DEFAULT, CF_RAFT}; use futures::executor::block_on; use kvproto::{ metapb, pdpb, @@ -257,6 +257,26 @@ fn test_split() { actual_split_key.as_encoded(), false, ); + + // Split should survive restart. + drop(raft_engine); + cluster.restart(0); + let region_and_key = vec![ + (2, b"k00"), + (1000, b"k22"), + (1001, b"k11"), + (1002, b"k33"), + (1003, b"k55"), + ]; + for (region_id, key) in region_and_key { + let snapshot = cluster.routers[0].stale_snapshot(region_id); + assert!( + snapshot.get_value(key).unwrap().is_some(), + "{} {:?}", + region_id, + key + ); + } } // TODO: test split race with diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index bad3ac2077d..225126f0edb 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1122,6 +1122,8 @@ where store.apply_state(), self.fsm.hibernate_state.group_state(), peer.raft_group.status(), + peer.raft_group.raft.raft_log.last_index(), + peer.raft_group.raft.raft_log.persisted, )) } CasualMessage::QueryRegionLeaderResp { region, leader } => { diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 0846e8362b3..62561c63cbc 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -74,10 +74,10 @@ pub use self::{ txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ - metrics::TLS_LOCAL_READ_METRICS, AutoSplitController, Bucket, BucketRange, - CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, - KeyEntry, LocalReadContext, LocalReader, LocalReaderCore, PdTask, ReadDelegate, - ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + metrics as worker_metrics, AutoSplitController, Bucket, BucketRange, CachedReadDelegate, + CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, + LocalReadContext, LocalReader, LocalReaderCore, PdTask, ReadDelegate, ReadExecutor, + ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, StoreMetaDelegate, TrackVer, WriteStats, }, diff --git a/components/raftstore/src/store/region_meta.rs b/components/raftstore/src/store/region_meta.rs index 7de687e9dbb..4d44673e057 100644 --- a/components/raftstore/src/store/region_meta.rs +++ b/components/raftstore/src/store/region_meta.rs @@ -93,6 +93,8 @@ pub struct RaftStatus { pub applied: u64, pub voters: HashMap, pub learners: HashMap, + pub last_index: u64, + pub persisted_index: u64, } impl<'a> From> for RaftStatus { @@ -126,6 +128,8 @@ impl<'a> From> for RaftStatus { applied, voters, learners, + last_index: 0, + persisted_index: 0, } } } @@ -250,6 +254,8 @@ impl RegionMeta { apply_state: &raft_serverpb::RaftApplyState, group_state: GroupState, raft_status: Status<'_>, + last_index: u64, + persisted_index: u64, ) -> Self { let region = local_state.get_region(); let epoch = region.get_region_epoch(); @@ -270,10 +276,13 @@ impl RegionMeta { } else { None }; + let mut raft_status: RaftStatus = raft_status.into(); + raft_status.last_index = last_index; + raft_status.persisted_index = persisted_index; Self { group_state, - raft_status: raft_status.into(), + raft_status, raft_apply: RaftApplyState { applied_index: apply_state.get_applied_index(), commit_index: apply_state.get_commit_index(), diff --git a/metrics/grafana/performance_write.json b/metrics/grafana/performance_write.json index c289d979dc8..ddb9621b97a 100644 --- a/metrics/grafana/performance_write.json +++ b/metrics/grafana/performance_write.json @@ -3029,7 +3029,7 @@ "query": { "datasourceId": 1, "model": { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 0c2116818dc..cff4b5f7742 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -5816,7 +5816,7 @@ "query": { "datasourceId": 1, "model": { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", diff --git a/metrics/grafana/tikv_summary.json b/metrics/grafana/tikv_summary.json index b19478464a2..847ac5ef289 100644 --- a/metrics/grafana/tikv_summary.json +++ b/metrics/grafana/tikv_summary.json @@ -3109,7 +3109,7 @@ "query": { "datasourceId": 1, "model": { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", diff --git a/metrics/grafana/tikv_trouble_shooting.json b/metrics/grafana/tikv_trouble_shooting.json index 735c1f305f7..bf1fd5baacf 100644 --- a/metrics/grafana/tikv_trouble_shooting.json +++ b/metrics/grafana/tikv_trouble_shooting.json @@ -1326,7 +1326,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", diff --git a/src/config/mod.rs b/src/config/mod.rs index 808dd22299c..2074c992519 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3018,12 +3018,18 @@ impl TikvConfig { return Err("raftdb.wal_dir can't be same as rocksdb.wal_dir".into()); } + let kv_data_exists = if self.storage.engine == EngineType::RaftKv { + RocksEngine::exists(&kv_db_path) + } else { + Path::new(&self.storage.data_dir).join("tablets").exists() + }; + RaftDataStateMachine::new( &self.storage.data_dir, &self.raft_store.raftdb_path, &self.raft_engine.config.dir, ) - .validate(RocksEngine::exists(&kv_db_path))?; + .validate(kv_data_exists)?; // Check blob file dir is empty when titan is disabled if !self.rocksdb.titan.enabled { From 5806cd134335f0f29ce7a0acfe21bb06d7b6bbc1 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 27 Dec 2022 10:46:19 +0800 Subject: [PATCH 0428/1149] support check leader (#13987) ref tikv/tikv#12842 This PR makes check leader works by introducing a trait `ExternRegionInfo`. Signed-off-by: Jay Lee --- components/raftstore-v2/src/batch/store.rs | 3 +- components/raftstore-v2/src/fsm/store.rs | 98 ++++++++++++++++++- .../src/operation/command/admin/split.rs | 1 + components/raftstore-v2/src/operation/life.rs | 4 + .../raftstore-v2/src/operation/query/local.rs | 12 +-- .../src/operation/ready/snapshot.rs | 1 + components/raftstore-v2/src/router/imp.rs | 4 +- components/raftstore/src/store/fsm/store.rs | 36 +++++++ .../src/store/worker/check_leader.rs | 52 ++++------ components/raftstore/src/store/worker/read.rs | 10 +- components/server/src/server.rs | 2 +- components/server/src/server2.rs | 21 +++- components/test_raftstore/src/server.rs | 2 +- src/server/raftkv2/node.rs | 4 - src/server/server.rs | 4 +- src/server/service/kv.rs | 11 +-- 16 files changed, 192 insertions(+), 73 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 0d5f984107c..bcfa6ca0771 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -276,7 +276,7 @@ impl StorePollerBuilder { fn init(&self) -> Result>> { let mut regions = HashMap::default(); let cfg = self.cfg.value(); - let meta = self.store_meta.lock().unwrap(); + let mut meta = self.store_meta.lock().unwrap(); self.engine .for_each_raft_group::(&mut |region_id| { assert_ne!(region_id, INVALID_ID); @@ -298,6 +298,7 @@ impl StorePollerBuilder { StateRole::Follower, ); } + meta.set_region(storage.region(), storage.is_initialized(), &self.logger); let (sender, peer_fsm) = PeerFsm::new(&cfg, &self.tablet_registry, storage)?; meta.region_read_progress diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index bd31de69496..cb7aa99b179 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -1,12 +1,20 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::{Duration, SystemTime}; +use std::{ + collections::BTreeMap, + ops::Bound::{Excluded, Unbounded}, + time::{Duration, SystemTime}, +}; use batch_system::Fsm; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use futures::{compat::Future01CompatExt, FutureExt}; -use raftstore::store::{Config, ReadDelegate, RegionReadProgressRegistry}; +use keys::{data_end_key, data_key}; +use kvproto::metapb::Region; +use raftstore::store::{ + fsm::store::StoreRegionMeta, Config, ReadDelegate, RegionReadProgressRegistry, +}; use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, @@ -19,13 +27,95 @@ use crate::{ router::{StoreMsg, StoreTick}, }; -#[derive(Default)] pub struct StoreMeta { - pub store_id: Option, + pub store_id: u64, /// region_id -> reader pub readers: HashMap, /// region_id -> `RegionReadProgress` pub region_read_progress: RegionReadProgressRegistry, + /// (region_end_key, epoch.version) -> region_id + /// + /// Unlinke v1, ranges in v2 may be overlapped. So we use version + /// to avoid end key conflict. + pub(crate) region_ranges: BTreeMap<(Vec, u64), u64>, + /// region_id -> (region, initialized) + pub(crate) regions: HashMap, +} + +impl StoreMeta { + pub fn new(store_id: u64) -> StoreMeta { + StoreMeta { + store_id, + readers: HashMap::default(), + region_read_progress: RegionReadProgressRegistry::default(), + region_ranges: BTreeMap::default(), + regions: HashMap::default(), + } + } + + pub fn set_region(&mut self, region: &Region, initialized: bool, logger: &Logger) { + let region_id = region.get_id(); + let version = region.get_region_epoch().get_version(); + let prev = self + .regions + .insert(region_id, (region.clone(), initialized)); + // `prev` only makes sense when it's initialized. + if let Some((prev, prev_init)) = prev && prev_init { + assert!(initialized, "{:?} region corrupted", logger.list()); + if prev.get_region_epoch().get_version() != version { + let prev_id = self.region_ranges.remove(&(data_end_key(prev.get_end_key()), prev.get_region_epoch().get_version())); + assert_eq!(prev_id, Some(region_id), "{:?} region corrupted", logger.list()); + } else { + assert!(self.region_ranges.get(&(data_end_key(prev.get_end_key()), version)).is_some(), "{:?} region corrupted", logger.list()); + return; + } + } + if initialized { + assert!( + self.region_ranges + .insert((data_end_key(region.get_end_key()), version), region_id) + .is_none(), + "{:?} region corrupted", + logger.list() + ); + } + } +} + +impl StoreRegionMeta for StoreMeta { + #[inline] + fn store_id(&self) -> u64 { + self.store_id + } + + #[inline] + fn region_read_progress(&self) -> &RegionReadProgressRegistry { + &self.region_read_progress + } + + #[inline] + fn search_region( + &self, + start_key: &[u8], + end_key: &[u8], + mut visitor: impl FnMut(&kvproto::metapb::Region), + ) { + let start_key = data_key(start_key); + for (_, id) in self + .region_ranges + .range((Excluded((start_key, 0)), Unbounded::<(Vec, u64)>)) + { + let (region, initialized) = &self.regions[id]; + if !initialized { + continue; + } + if end_key.is_empty() || end_key > region.get_start_key() { + visitor(region); + } else { + break; + } + } + } } pub struct Store { diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 64388333fee..391f0253439 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -414,6 +414,7 @@ impl Peer { { let mut meta = store_ctx.store_meta.lock().unwrap(); + meta.set_region(derived, true, &self.logger); let reader = meta.readers.get_mut(&derived.get_id()).unwrap(); self.set_region( &store_ctx.coprocessor_host, diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 59e0e532faa..d61f11e7ada 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -237,6 +237,10 @@ impl Store { return; } }; + ctx.store_meta + .lock() + .unwrap() + .set_region(fsm.peer().region(), false, fsm.logger()); let mailbox = BasicMailbox::new(tx, fsm, ctx.router.state_cnt().clone()); if ctx .router diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 482de719308..2cb5497d789 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -81,7 +81,7 @@ where } pub fn store_meta(&self) -> &Arc> { - self.local_reader.store_meta() + &self.local_reader.store_meta().store_meta } pub fn pre_propose_raft_command( @@ -376,7 +376,7 @@ where type StoreMeta = Arc>; fn store_id(&self) -> Option { - self.store_meta.as_ref().lock().unwrap().store_id + Some(self.store_meta.as_ref().lock().unwrap().store_id) } /// get the ReadDelegate with region_id and the number of delegates in the @@ -397,10 +397,6 @@ where } (meta.readers.len(), None) } - - fn store_meta(&self) -> &Self::StoreMeta { - &self.store_meta - } } struct SnapRequestInspector<'r> { @@ -610,7 +606,7 @@ mod tests { let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); let reg = TabletRegistry::new(factory, path.path()).unwrap(); - let store_meta = Arc::new(Mutex::new(StoreMeta::default())); + let store_meta = Arc::new(Mutex::new(StoreMeta::new(store_id))); let (mut reader, mut rx) = new_reader(store_id, store_meta.clone(), reg.clone()); let (mix_tx, mix_rx) = sync_channel(1); let handler = mock_raftstore(mix_rx); @@ -811,7 +807,7 @@ mod tests { let reg = TabletRegistry::new(factory, path.path()).unwrap(); let store_meta = - StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::default())), reg.clone()); + StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::new(1))), reg.clone()); let tablet1; let tablet2; diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 3208ecb25ae..ce8327c2012 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -163,6 +163,7 @@ impl Peer { self.raft_group_mut().advance_apply_to(persisted_index); { let mut meta = ctx.store_meta.lock().unwrap(); + meta.set_region(self.region(), true, &self.logger); meta.readers .insert(region_id, self.generate_read_delegate()); meta.region_read_progress diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index a03459c96d2..668d7591a40 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -116,9 +116,7 @@ where impl RaftRouter { pub fn new(store_id: u64, reg: TabletRegistry, router: StoreRouter) -> Self { - let mut store_meta = StoreMeta::default(); - store_meta.store_id = Some(store_id); - let store_meta = Arc::new(Mutex::new(store_meta)); + let store_meta = Arc::new(Mutex::new(StoreMeta::new(store_id))); let logger = router.logger().clone(); RaftRouter { diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 3cadcce5a82..310c33b95b2 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -115,6 +115,14 @@ pub struct StoreInfo { pub capacity: u64, } +/// A trait that provide the meta information that can be accessed outside +/// of raftstore. +pub trait StoreRegionMeta: Send { + fn store_id(&self) -> u64; + fn region_read_progress(&self) -> &RegionReadProgressRegistry; + fn search_region(&self, start_key: &[u8], end_key: &[u8], visitor: impl FnMut(&Region)); +} + pub struct StoreMeta { pub store_id: Option, /// region_end_key -> region_id @@ -154,6 +162,34 @@ pub struct StoreMeta { pub damaged_ranges: HashMap, Vec)>, } +impl StoreRegionMeta for StoreMeta { + #[inline] + fn store_id(&self) -> u64 { + self.store_id.unwrap() + } + + #[inline] + fn search_region(&self, start_key: &[u8], end_key: &[u8], mut visitor: impl FnMut(&Region)) { + let start_key = data_key(start_key); + for (_, id) in self + .region_ranges + .range((Excluded(start_key), Unbounded::>)) + { + let region = &self.regions[id]; + if end_key.is_empty() || end_key > region.get_start_key() { + visitor(region); + } else { + break; + } + } + } + + #[inline] + fn region_read_progress(&self) -> &RegionReadProgressRegistry { + &self.region_read_progress + } +} + impl StoreMeta { pub fn new(vote_capacity: usize) -> StoreMeta { StoreMeta { diff --git a/components/raftstore/src/store/worker/check_leader.rs b/components/raftstore/src/store/worker/check_leader.rs index ab83752d8c3..c4646de35a4 100644 --- a/components/raftstore/src/store/worker/check_leader.rs +++ b/components/raftstore/src/store/worker/check_leader.rs @@ -1,27 +1,25 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - collections::Bound::{Excluded, Unbounded}, fmt, sync::{Arc, Mutex}, }; use engine_traits::KvEngine; use fail::fail_point; -use keys::{data_end_key, data_key, enc_start_key}; use kvproto::kvrpcpb::{KeyRange, LeaderInfo}; use tikv_util::worker::Runnable; use crate::{ coprocessor::CoprocessorHost, - store::{fsm::store::StoreMeta, util::RegionReadProgressRegistry}, + store::{fsm::store::StoreRegionMeta, util::RegionReadProgressRegistry}, }; -pub struct Runner +pub struct Runner where E: KvEngine, { - store_meta: Arc>, + store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, coprocessor: CoprocessorHost, } @@ -55,12 +53,13 @@ impl fmt::Display for Task { } } -impl Runner +impl Runner where + S: StoreRegionMeta, E: KvEngine, { - pub fn new(store_meta: Arc>, coprocessor: CoprocessorHost) -> Runner { - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); + pub fn new(store_meta: Arc>, coprocessor: CoprocessorHost) -> Self { + let region_read_progress = store_meta.lock().unwrap().region_read_progress().clone(); Runner { region_read_progress, store_meta, @@ -82,48 +81,39 @@ where .unwrap_or(0) }) } else { - let (start_key, end_key) = ( - data_key(key_range.get_start_key()), - data_end_key(key_range.get_end_key()), - ); // `store_safe_ts` won't be accessed frequently (like per-request or // per-transaction), also this branch won't entry because the request key range // is empty currently (in v5.1) keep this branch for robustness and future use, // so it is okay getting `store_safe_ts` from `store_meta` (behide a mutex) let meta = self.store_meta.lock().unwrap(); - meta.region_read_progress.with(|registry| { - meta.region_ranges - // get overlapped regions - .range((Excluded(start_key), Unbounded)) - .take_while(|(_, id)| end_key > enc_start_key(&meta.regions[*id])) - // get the min `safe_ts` - .map(|(_, id)| { - registry.get(id).unwrap().safe_ts() - }) - .filter(|ts| *ts != 0) // ts == 0 means the peer is uninitialized - .min() - .unwrap_or(0) + meta.region_read_progress().with(|registry| { + let mut min_ts = u64::MAX; + meta.search_region(key_range.get_start_key(), key_range.get_end_key(), |r| { + let ts = registry.get(&r.get_id()).unwrap().safe_ts(); + // ts == 0 means the peer is uninitialized + if ts != 0 && ts < min_ts { + min_ts = ts; + } + }); + if min_ts == u64::MAX { 0 } else { min_ts } }) } } } -impl Runnable for Runner -where - E: KvEngine, -{ +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { match task { Task::CheckLeader { leaders, cb } => { fail_point!( "before_check_leader_store_2", - self.store_meta.lock().unwrap().store_id == Some(2), + self.store_meta.lock().unwrap().store_id() == 2, |_| {} ); fail_point!( "before_check_leader_store_3", - self.store_meta.lock().unwrap().store_id == Some(3), + self.store_meta.lock().unwrap().store_id() == 3, |_| {} ); let regions = self @@ -146,7 +136,7 @@ mod tests { use kvproto::metapb::Region; use super::*; - use crate::store::util::RegionReadProgress; + use crate::store::{fsm::StoreMeta, util::RegionReadProgress}; #[test] fn test_get_range_min_safe_ts() { diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index a20fcefdbdb..a8fc2e6e3df 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -294,8 +294,6 @@ pub trait ReadExecutorProvider: Send + Clone + 'static { /// get the ReadDelegate with region_id and the number of delegates in the /// StoreMeta fn get_executor_and_len(&self, region_id: u64) -> (usize, Option); - - fn store_meta(&self) -> &Self::StoreMeta; } #[derive(Clone)] @@ -346,10 +344,6 @@ where } (meta.readers.len(), None) } - - fn store_meta(&self) -> &Self::StoreMeta { - &self.store_meta - } } /// #[RaftstoreCommon] @@ -716,8 +710,8 @@ where } } - pub fn store_meta(&self) -> &S::StoreMeta { - self.store_meta.store_meta() + pub fn store_meta(&self) -> &S { + &self.store_meta } // Ideally `get_delegate` should return `Option<&ReadDelegate>`, but if so the diff --git a/components/server/src/server.rs b/components/server/src/server.rs index d7a05fff115..73b42d96d22 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -962,7 +962,7 @@ where self.resolver.clone().unwrap(), Either::Left(snap_mgr.clone()), gc_worker.clone(), - Some(check_leader_scheduler), + check_leader_scheduler, self.env.clone(), unified_read_pool, debug_thread_pool, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 620a6b20b74..7f81d931181 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -55,7 +55,10 @@ use raftstore::{ BoxConsistencyCheckObserver, ConsistencyCheckMethod, CoprocessorHost, RawConsistencyCheckObserver, }, - store::{memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, SplitConfigManager, TabletSnapManager}, + store::{ + memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, CheckLeaderRunner, SplitConfigManager, + TabletSnapManager, + }, RegionInfoAccessor, }; use security::SecurityManager; @@ -214,6 +217,7 @@ struct TikvServer { concurrency_manager: ConcurrencyManager, env: Arc, background_worker: Worker, + check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -297,6 +301,10 @@ where info!("Causal timestamp provider startup."); } + // Run check leader in a dedicate thread, because it is time sensitive + // and crucial to TiCDC replication lag. + let check_leader_worker = WorkerBuilder::new("check_leader").thread_count(1).create(); + TikvServer { config, cfg_controller: Some(cfg_controller), @@ -318,6 +326,7 @@ where concurrency_manager, env, background_worker, + check_leader_worker, flow_info_sender: None, flow_info_receiver: None, sst_worker: None, @@ -764,6 +773,14 @@ where cop_read_pools.handle() }; + let check_leader_runner = CheckLeaderRunner::new( + self.node.as_ref().unwrap().router().store_meta().clone(), + self.coprocessor_host.clone().unwrap(), + ); + let check_leader_scheduler = self + .check_leader_worker + .start("check-leader", check_leader_runner); + let server_config = Arc::new(VersionTrack::new(self.config.server.clone())); self.config @@ -797,7 +814,7 @@ where self.resolver.clone().unwrap(), Either::Right(snap_mgr.clone()), gc_worker.clone(), - None, + check_leader_scheduler, self.env.clone(), unified_read_pool, debug_thread_pool, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 2521347ec18..0ec60e468ee 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -525,7 +525,7 @@ impl ServerCluster { resolver.clone(), tikv_util::Either::Left(snap_mgr.clone()), gc_worker.clone(), - Some(check_leader_scheduler.clone()), + check_leader_scheduler.clone(), self.env.clone(), None, debug_thread_pool.clone(), diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index bcfd542035b..ed6f16e8bec 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -104,10 +104,6 @@ where T: Transport + 'static, { let store_id = self.id(); - { - let mut meta = self.router().store_meta().lock().unwrap(); - meta.store_id = Some(store_id); - } if let Some(region) = Bootstrap::new( &raft_engine, self.cluster_id, diff --git a/src/server/server.rs b/src/server/server.rs index 22ab1682309..4c1f5e7ef69 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -96,7 +96,7 @@ where resolver: S, snap_mgr: Either, gc_worker: GcWorker, - check_leader_scheduler: Option>, + check_leader_scheduler: Scheduler, env: Arc, yatp_read_pool: Option, debug_thread_pool: Arc, @@ -580,7 +580,7 @@ mod tests { }, Either::Left(SnapManager::new("")), gc_worker, - Some(check_leader_scheduler), + check_leader_scheduler, env, None, debug_thread_pool, diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 66fc5060e68..6c85741f64a 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -76,7 +76,7 @@ pub struct Service { // For handling snapshot. snap_scheduler: Scheduler, // For handling `CheckLeader` request. - check_leader_scheduler: Option>, + check_leader_scheduler: Scheduler, enable_req_batch: bool, @@ -115,7 +115,7 @@ impl Service { copr: Endpoint, copr_v2: coprocessor_v2::Endpoint, snap_scheduler: Scheduler, - check_leader_scheduler: Option>, + check_leader_scheduler: Scheduler, grpc_thread_load: Arc, enable_req_batch: bool, proxy: Proxy, @@ -909,7 +909,6 @@ impl Tikv for Service { let (cb, resp) = paired_future_callback(); let check_leader_scheduler = self.check_leader_scheduler.clone(); let task = async move { - let Some(check_leader_scheduler) = check_leader_scheduler else { return Err(box_err!("check leader is not supported")) }; check_leader_scheduler .schedule(CheckLeaderTask::CheckLeader { leaders, cb }) .map_err(|e| Error::Other(format!("{}", e).into()))?; @@ -945,11 +944,7 @@ impl Tikv for Service { ) { let key_range = request.take_key_range(); let (cb, resp) = paired_future_callback(); - let check_leader_scheduler = match self.check_leader_scheduler.clone() { - Some(s) => s, - // Avoid print errors if it's not supported. - None => return, - }; + let check_leader_scheduler = self.check_leader_scheduler.clone(); let task = async move { check_leader_scheduler .schedule(CheckLeaderTask::GetStoreTs { key_range, cb }) From 929b329af491d40e11648e2606124c8d877ba37a Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 27 Dec 2022 11:00:17 +0800 Subject: [PATCH 0429/1149] fix the bug that send mistake peer snapshot (#13915) ref tikv/tikv#12842 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/operation/ready/snapshot.rs | 159 +++++++++--------- components/raftstore-v2/src/raft/storage.rs | 44 +++-- .../raftstore/src/store/async_io/read.rs | 6 +- 4 files changed, 116 insertions(+), 95 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 854fd965d9e..0e911e48255 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -520,7 +520,7 @@ impl Peer { } StateRole::Follower => { self.leader_lease_mut().expire(); - self.storage_mut().cancel_generating_snap(None); + self.storage_mut().cancel_generating_snap(None, None); self.clear_in_memory_pessimistic_locks(); } _ => {} diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index ce8327c2012..149505f0af4 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -20,7 +20,7 @@ use std::{ fmt::{self, Debug}, - fs, mem, + fs, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, @@ -101,6 +101,10 @@ impl GenSnapTask { pub fn set_for_balance(&mut self) { self.for_balance = true; } + + pub fn to_peer(&self) -> u64 { + self.to_peer + } } impl Debug for GenSnapTask { @@ -232,40 +236,38 @@ impl Apply { impl Storage { pub fn is_generating_snapshot(&self) -> bool { - let snap_state = self.snap_state_mut(); - matches!(*snap_state, SnapState::Generating { .. }) + let snap_states = self.snap_states.borrow_mut(); + for (_, state) in snap_states.iter() { + if matches!(*state, SnapState::Generating { .. }) { + return true; + } + } + false } /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no /// unavailable snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { - let mut snap_state = self.snap_state_mut(); - match &*snap_state { - SnapState::Generating { canceled, .. } => { - if canceled.load(Ordering::SeqCst) { - self.cancel_generating_snap(None); - } else { - return Err(raft::Error::Store( - raft::StorageError::SnapshotTemporarilyUnavailable, - )); + if let Some(state) = self.snap_states.borrow_mut().get_mut(&to) { + match state { + SnapState::Generating { ref canceled, .. } => { + if canceled.load(Ordering::SeqCst) { + self.cancel_generating_snap(Some(to), None); + } else { + return Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )); + } } - } - SnapState::Generated(_) => { - // TODO: `to` may not be equal to the generated snapshot. - let SnapState::Generated(snap) = mem::replace(&mut *snap_state, SnapState::Relax) else { unreachable!() }; - if self.validate_snap(&snap, request_index) { - return Ok(*snap); + SnapState::Generated(ref s) => { + let snap = *s.clone(); + *state = SnapState::Relax; + if self.validate_snap(&snap, request_index) { + return Ok(snap); + } } - } - _ => {} - } - - if SnapState::Relax != *snap_state { - panic!( - "{:?} unexpected state: {:?}", - self.logger().list(), - *snap_state - ); + _ => {} + }; } info!( @@ -276,15 +278,18 @@ impl Storage { ); let canceled = Arc::new(AtomicBool::new(false)); let index = Arc::new(AtomicU64::new(0)); - *snap_state = SnapState::Generating { - canceled: canceled.clone(), - index: index.clone(), - }; - - let task = GenSnapTask::new(self.region().get_id(), to, index, canceled); let mut gen_snap_task = self.gen_snap_task_mut(); - assert!(gen_snap_task.is_none()); - *gen_snap_task = Box::new(Some(task)); + if gen_snap_task.is_none() { + self.snap_states.borrow_mut().insert( + to, + SnapState::Generating { + canceled: canceled.clone(), + index: index.clone(), + }, + ); + let task = GenSnapTask::new(self.region().get_id(), to, index, canceled); + *gen_snap_task = Box::new(Some(task)); + } Err(raft::Error::Store( raft::StorageError::SnapshotTemporarilyUnavailable, )) @@ -332,28 +337,32 @@ impl Storage { true } - /// Cancel generating snapshot. - pub fn cancel_generating_snap(&self, compact_to: Option) { - let mut snap_state = self.snap_state_mut(); - let SnapState::Generating { - ref canceled, - ref index, - } = *snap_state else { return }; - - if let Some(idx) = compact_to { - let snap_index = index.load(Ordering::SeqCst); - if snap_index == 0 || idx <= snap_index + 1 { - return; + pub fn cancel_generating_snap(&self, to: Option, compact_to: Option) { + if let Some(id) = to { + let mut states = self.snap_states.borrow_mut(); + if let Some(state) = states.get(&id) { + let SnapState::Generating { + ref index, + .. + } = *state else { return }; + if let Some(idx) = compact_to { + let snap_index = index.load(Ordering::SeqCst); + if snap_index == 0 || idx <= snap_index + 1 { + return; + } + } + info!( + self.logger(), + "snapshot is canceled"; + "compact_to" => compact_to, + ); + self.cancel_snap_task(to); + states.remove(&id); } + } else { + self.cancel_snap_task(to); + self.snap_states.borrow_mut().clear(); } - canceled.store(true, Ordering::SeqCst); - *snap_state = SnapState::Relax; - self.gen_snap_task_mut().take(); - info!( - self.logger(), - "snapshot is canceled"; - "compact_to" => compact_to, - ); STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.cancel.inc(); } @@ -362,29 +371,27 @@ impl Storage { /// TODO: make the snap state more clearer, the snapshot must be consumed. pub fn on_snapshot_generated(&self, res: GenSnapRes) -> bool { if res.is_none() { - self.cancel_generating_snap(None); + self.cancel_generating_snap(None, None); return false; } - let snap = res.unwrap(); - let mut snap_state = self.snap_state_mut(); - let SnapState::Generating { - index, - .. - } = &*snap_state else { return false }; - - if snap.get_metadata().get_index() < index.load(Ordering::SeqCst) { - warn!( - self.logger(), - "snapshot is staled, skip"; - "snap index" => snap.get_metadata().get_index(), - "required index" => index.load(Ordering::SeqCst), - ); - return false; + let (snapshot, to_peer_id) = *res.unwrap(); + if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { + let SnapState::Generating { + ref index, + .. + } = *state else { return false }; + if snapshot.get_metadata().get_index() < index.load(Ordering::SeqCst) { + warn!( + self.logger(), + "snapshot is staled, skip"; + "snap index" => snapshot.get_metadata().get_index(), + "required index" => index.load(Ordering::SeqCst), + "to_peer_id" => to_peer_id, + ); + return false; + } + *state = SnapState::Generated(Box::new(snapshot)); } - // Should changed `SnapState::Generated` to `SnapState::Relax` when the - // snap is consumed or canceled. Such as leader changed, the state of generated - // should be reset. - *snap_state = SnapState::Generated(snap); true } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index bce313eab83..1015b5aaac7 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -5,6 +5,7 @@ use std::{ fmt::{self, Debug, Formatter}, }; +use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ metapb, @@ -37,8 +38,8 @@ pub struct Storage { logger: Logger, /// Snapshot part. - snap_state: RefCell, - gen_snap_task: RefCell>>, + pub snap_states: RefCell>, + pub gen_snap_task: RefCell>>, split_init: Option>, /// The flushed index of all CFs. apply_trace: ApplyTrace, @@ -87,13 +88,23 @@ impl Storage { } #[inline] - pub fn snap_state_mut(&self) -> RefMut<'_, SnapState> { - self.snap_state.borrow_mut() + pub fn gen_snap_task_mut(&self) -> RefMut<'_, Box>> { + self.gen_snap_task.borrow_mut() } #[inline] - pub fn gen_snap_task_mut(&self) -> RefMut<'_, Box>> { - self.gen_snap_task.borrow_mut() + pub fn cancel_snap_task(&self, to_peer_id: Option) { + if to_peer_id.is_none() { + self.gen_snap_task.borrow_mut().take(); + return; + } + let to = to_peer_id.unwrap(); + let mut task = self.gen_snap_task.borrow_mut(); + if let Some(t) = &**task { + if to == t.to_peer() { + *task = Box::new(None); + }; + } } #[inline] @@ -143,7 +154,7 @@ impl Storage { region_state, ever_persisted: persisted, logger, - snap_state: RefCell::new(SnapState::Relax), + snap_states: RefCell::new(HashMap::default()), gen_snap_task: RefCell::new(Box::new(None)), split_init: None, apply_trace, @@ -435,14 +446,17 @@ mod tests { ); // Test get snapshot - let snap = s.snapshot(0, 7); + let to_peer_id = 7; + let snap = s.snapshot(0, to_peer_id); let unavailable = RaftError::Store(StorageError::SnapshotTemporarilyUnavailable); assert_eq!(snap.unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); s.on_snapshot_generated(res); - let snap = match *s.snap_state.borrow() { + assert_eq!(s.snapshot(0, 8).unwrap_err(), unavailable); + assert!(s.snap_states.borrow().get(&8).is_some()); + let snap = match *s.snap_states.borrow().get(&to_peer_id).unwrap() { SnapState::Generated(ref snap) => *snap.clone(), ref s => panic!("unexpected state: {:?}", s), }; @@ -452,16 +466,16 @@ mod tests { let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); let checkpointer_path = mgr.tablet_gen_path(&snap_key); assert!(checkpointer_path.exists()); - s.snapshot(0, 7).unwrap(); + s.snapshot(0, to_peer_id).unwrap(); // Test cancel snapshot - let snap = s.snapshot(0, 0); + let snap = s.snapshot(0, 7); assert_eq!(snap.unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); - rx.recv_timeout(Duration::from_secs(1)).unwrap(); - s.cancel_generating_snap(None); - assert_eq!(*s.snap_state.borrow(), SnapState::Relax); + let _res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + s.cancel_generating_snap(None, None); + assert!(s.snap_states.borrow().get(&to_peer_id).is_none()); // Test get twice snapshot and cancel once. // get snapshot a @@ -471,7 +485,7 @@ mod tests { apply.set_apply_progress(1, 5); apply.schedule_gen_snapshot(gen_task_a); let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); - s.cancel_generating_snap(None); + s.cancel_generating_snap(None, None); // cancel get snapshot a, try get snaphsot b let snap = s.snapshot(0, 0); assert_eq!(snap.unwrap_err(), unavailable); diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 5dc01b40ef3..b298ed3529e 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -79,12 +79,12 @@ pub struct FetchedLogs { pub logs: Box, } -pub type GenSnapRes = Option>; +pub type GenSnapRes = Option>; /// A router for receiving fetched result. pub trait AsyncReadNotifier: Send { fn notify_logs_fetched(&self, region_id: u64, fetched: FetchedLogs); - fn notify_snapshot_generated(&self, region_id: u64, res: Option>); + fn notify_snapshot_generated(&self, region_id: u64, res: GenSnapRes); } pub struct ReadRunner @@ -231,7 +231,7 @@ where SNAP_HISTOGRAM .generate .observe(start.saturating_elapsed_secs()); - res = Some(Box::new(snapshot)) + res = Some(Box::new((snapshot, to_peer))) } self.notifier.notify_snapshot_generated(region_id, res); From f21361d9f8af7f61ed41e4f408bc9a3e6cc83b0e Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 27 Dec 2022 12:40:17 +0800 Subject: [PATCH 0430/1149] raftstore-v2: compact and gc raft logs (#13846) ref tikv/tikv#12842 Signed-off-by: tabokie --- Cargo.lock | 1 + components/engine_panic/src/raft_engine.rs | 11 +- components/engine_rocks/src/raft_engine.rs | 33 +- components/engine_traits/src/lib.rs | 2 +- components/engine_traits/src/raft_engine.rs | 26 +- components/raft_log_engine/src/engine.rs | 105 ++++-- components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/fsm/peer.rs | 6 +- .../operation/command/admin/compact_log.rs | 304 ++++++++++++++++++ .../src/operation/command/admin/mod.rs | 11 +- .../src/operation/command/admin/split.rs | 24 +- .../raftstore-v2/src/operation/command/mod.rs | 17 +- .../src/operation/ready/apply_trace.rs | 8 +- .../raftstore-v2/src/operation/ready/mod.rs | 14 +- .../src/operation/ready/snapshot.rs | 51 +-- components/raftstore-v2/src/raft/peer.rs | 42 +++ components/raftstore-v2/src/raft/storage.rs | 4 +- components/raftstore-v2/src/router/message.rs | 6 +- components/raftstore/src/store/fsm/peer.rs | 11 +- .../raftstore/src/store/peer_storage.rs | 7 +- .../raftstore/src/store/worker/raftlog_gc.rs | 70 ++-- src/server/debug.rs | 7 +- tests/failpoints/cases/test_snap.rs | 4 +- tests/failpoints/cases/test_stale_peer.rs | 4 +- 24 files changed, 578 insertions(+), 191 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/compact_log.rs diff --git a/Cargo.lock b/Cargo.lock index cf53d09da09..5f7ca0b8c7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4377,6 +4377,7 @@ dependencies = [ "tempfile", "test_pd", "test_util", + "thiserror", "tikv_util", "time", "tracker", diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 59c0422902c..c3de53b4932 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -120,7 +120,16 @@ impl RaftEngine for PanicEngine { panic!() } - fn gc(&self, raft_group_id: u64, mut from: u64, to: u64) -> Result { + fn gc(&self, raft_group_id: u64, from: u64, to: u64, batch: &mut Self::LogBatch) -> Result<()> { + panic!() + } + + fn delete_all_but_one_states_before( + &self, + raft_group_id: u64, + apply_index: u64, + batch: &mut Self::LogBatch, + ) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index cb4c5682252..d5331a2ce29 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -3,8 +3,8 @@ // #[PerformanceCriticalPath] use engine_traits::{ Error, Iterable, KvEngine, MiscExt, Mutable, Peekable, RaftEngine, RaftEngineDebug, - RaftEngineReadOnly, RaftLogBatch, RaftLogGcTask, Result, WriteBatch, WriteBatchExt, - WriteOptions, CF_DEFAULT, RAFT_LOG_MULTI_GET_CNT, + RaftEngineReadOnly, RaftLogBatch, Result, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT, + RAFT_LOG_MULTI_GET_CNT, }; use kvproto::{ metapb::Region, @@ -298,27 +298,18 @@ impl RaftEngine for RocksEngine { Ok(()) } - fn batch_gc(&self, groups: Vec) -> Result { - let mut total = 0; - let mut raft_wb = self.write_batch_with_cap(4 * 1024); - for task in groups { - total += self.gc_impl(task.raft_group_id, task.from, task.to, &mut raft_wb)?; - } - // TODO: disable WAL here. - if !WriteBatch::is_empty(&raft_wb) { - raft_wb.write()?; - } - Ok(total) + fn gc(&self, raft_group_id: u64, from: u64, to: u64, batch: &mut Self::LogBatch) -> Result<()> { + self.gc_impl(raft_group_id, from, to, batch)?; + Ok(()) } - fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result { - let mut raft_wb = self.write_batch_with_cap(1024); - let total = self.gc_impl(raft_group_id, from, to, &mut raft_wb)?; - // TODO: disable WAL here. - if !WriteBatch::is_empty(&raft_wb) { - raft_wb.write()?; - } - Ok(total) + fn delete_all_but_one_states_before( + &self, + _raft_group_id: u64, + _apply_index: u64, + _batch: &mut Self::LogBatch, + ) -> Result<()> { + panic!() } fn flush_metrics(&self, instance: &str) { diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index b75c3e7b7c0..bc54a5e7627 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -339,7 +339,7 @@ pub use crate::range::*; mod raft_engine; pub use raft_engine::{ - CacheStats, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch, RaftLogGcTask, + CacheStats, RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch, RAFT_LOG_MULTI_GET_CNT, }; diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 7b0e04d0ab5..9e95ae95e14 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -74,12 +74,6 @@ pub trait RaftEngineDebug: RaftEngine + Sync + Send + 'static { } } -pub struct RaftLogGcTask { - pub raft_group_id: u64, - pub from: u64, - pub to: u64, -} - // TODO: Refactor common methods between Kv and Raft engine into a shared trait. pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send + 'static { type LogBatch: RaftLogBatch; @@ -110,17 +104,17 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send batch: &mut Self::LogBatch, ) -> Result<()>; - /// Like `cut_logs` but the range could be very large. Return the deleted - /// count. Generally, `from` can be passed in `0`. - fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result; + /// Like `cut_logs` but the range could be very large. + fn gc(&self, raft_group_id: u64, from: u64, to: u64, batch: &mut Self::LogBatch) -> Result<()>; - fn batch_gc(&self, tasks: Vec) -> Result { - let mut total = 0; - for task in tasks { - total += self.gc(task.raft_group_id, task.from, task.to)?; - } - Ok(total) - } + /// Delete all but the latest one of states that are associated with smaller + /// apply_index. + fn delete_all_but_one_states_before( + &self, + raft_group_id: u64, + apply_index: u64, + batch: &mut Self::LogBatch, + ) -> Result<()>; fn need_manual_purge(&self) -> bool { false diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 7be02e8b6e2..7c98adf325f 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -11,8 +11,8 @@ use codec::number::NumberCodec; use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ CacheStats, EncryptionKeyManager, EncryptionMethod, PerfContextExt, PerfContextKind, PerfLevel, - RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, - RaftLogGcTask, Result, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, Result, + CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use file_system::{IoOp, IoRateLimiter, IoType}; use kvproto::{ @@ -301,6 +301,7 @@ fn cf_to_id(cf: &str) -> u8 { _ => panic!("unrecognized cf {}", cf), } } +const MAX_CF_ID: u8 = 3; /// Encode a key in the format `{prefix}{num}`. fn encode_key(prefix: &'static [u8], num: u64) -> [u8; 9] { @@ -380,6 +381,8 @@ const REGION_STATE_KEY: &[u8] = &[0x03]; const APPLY_STATE_KEY: &[u8] = &[0x04]; const RECOVER_STATE_KEY: &[u8] = &[0x05]; const FLUSH_STATE_KEY: &[u8] = &[0x06]; +// All keys are of the same length. +const KEY_PREFIX_LEN: usize = RAFT_LOG_STATE_KEY.len(); impl RaftLogBatchTrait for RaftLogBatch { fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { @@ -658,34 +661,80 @@ impl RaftEngine for RaftLogEngine { Ok(()) } - fn gc(&self, raft_group_id: u64, from: u64, to: u64) -> Result { - self.batch_gc(vec![RaftLogGcTask { - raft_group_id, - from, - to, - }]) + fn gc( + &self, + raft_group_id: u64, + _from: u64, + to: u64, + batch: &mut Self::LogBatch, + ) -> Result<()> { + batch + .0 + .add_command(raft_group_id, Command::Compact { index: to }); + Ok(()) } - fn batch_gc(&self, tasks: Vec) -> Result { - let mut batch = self.log_batch(tasks.len()); - let mut old_first_index = Vec::with_capacity(tasks.len()); - for task in &tasks { - batch - .0 - .add_command(task.raft_group_id, Command::Compact { index: task.to }); - old_first_index.push(self.0.first_index(task.raft_group_id)); - } - - self.consume(&mut batch, false)?; - - let mut total = 0; - for (old_first_index, task) in old_first_index.iter().zip(tasks) { - let new_first_index = self.0.first_index(task.raft_group_id); - if let (Some(old), Some(new)) = (old_first_index, new_first_index) { - total += new.saturating_sub(*old); - } - } - Ok(total as usize) + fn delete_all_but_one_states_before( + &self, + raft_group_id: u64, + apply_index: u64, + batch: &mut Self::LogBatch, + ) -> Result<()> { + // Makes sure REGION_STATE_KEY is the smallest and FLUSH_STATE_KEY is the + // largest. + debug_assert!(REGION_STATE_KEY < APPLY_STATE_KEY); + debug_assert!(APPLY_STATE_KEY < FLUSH_STATE_KEY); + + let mut end = [0; KEY_PREFIX_LEN + 1]; + end[..KEY_PREFIX_LEN].copy_from_slice(FLUSH_STATE_KEY); + end[KEY_PREFIX_LEN] = MAX_CF_ID + 1; + let mut found_region_state = false; + let mut found_apply_state = false; + let mut found_flush_state = [false; MAX_CF_ID as usize + 1]; + self.0 + .scan_raw_messages( + raft_group_id, + Some(REGION_STATE_KEY), + Some(&end), + true, + |key, _| { + match &key[..KEY_PREFIX_LEN] { + REGION_STATE_KEY + if NumberCodec::decode_u64(&key[KEY_PREFIX_LEN..]) <= apply_index => + { + if found_region_state { + batch.0.delete(raft_group_id, key.to_vec()); + } else { + found_region_state = true; + } + } + APPLY_STATE_KEY + if NumberCodec::decode_u64(&key[KEY_PREFIX_LEN..]) <= apply_index => + { + if found_apply_state { + batch.0.delete(raft_group_id, key.to_vec()); + } else { + found_apply_state = true; + } + } + FLUSH_STATE_KEY => { + let cf_id = key[KEY_PREFIX_LEN]; + let tablet_index = NumberCodec::decode_u64(&key[KEY_PREFIX_LEN + 1..]); + if cf_id <= MAX_CF_ID && tablet_index <= apply_index { + if found_flush_state[cf_id as usize] { + batch.0.delete(raft_group_id, key.to_vec()); + } else { + found_flush_state[cf_id as usize] = true; + } + } + } + _ => {} + } + true + }, + ) + .map_err(transfer_error)?; + Ok(()) } fn need_manual_purge(&self) -> bool { diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 4d3d44ec6fd..6726c5ed742 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -55,6 +55,7 @@ rand = "0.8.3" resource_metering = { workspace = true } slog = "2.3" smallvec = "1.4" +thiserror = "1.0" tikv_util = { workspace = true } time = "0.1" tracker = { workspace = true } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 734c2bf93d4..22145ecdcaa 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -189,6 +189,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, fn on_start(&mut self) { self.schedule_tick(PeerTick::Raft); self.schedule_tick(PeerTick::SplitRegionCheck); + self.schedule_tick(PeerTick::PdHeartbeat); + self.schedule_tick(PeerTick::CompactLog); if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } @@ -206,11 +208,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, match tick { PeerTick::Raft => self.on_raft_tick(), PeerTick::PdHeartbeat => self.on_pd_heartbeat(), - PeerTick::RaftLogGc => unimplemented!(), + PeerTick::CompactLog => self.on_compact_log_tick(), PeerTick::SplitRegionCheck => self.on_split_region_check(), PeerTick::CheckMerge => unimplemented!(), PeerTick::CheckPeerStaleState => unimplemented!(), - PeerTick::EntryCacheEvict => unimplemented!(), + PeerTick::EntryCacheEvict => self.on_entry_cache_evict(), PeerTick::CheckLeaderLease => unimplemented!(), PeerTick::ReactivateMemoryLock => self.on_reactivate_memory_lock_tick(), PeerTick::ReportBuckets => unimplemented!(), diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs new file mode 100644 index 00000000000..af8fb5acc47 --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -0,0 +1,304 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains processing logic of the following: +//! +//! # `CompactLog` and `EntryCacheEvict` ticks +//! +//! On region leader, periodically compacts useless Raft logs from the +//! underlying log engine, and evicts logs from entry cache if it reaches memory +//! limit. +//! +//! # `CompactLog` command +//! +//! Updates truncated index, and compacts logs if the corresponding changes have +//! been persisted in kvdb. + +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}; +use protobuf::Message; +use raftstore::{ + store::{fsm::new_admin_request, needs_evict_entry_cache, Transport}, + Result, +}; +use slog::{debug, error, info}; +use tikv_util::{box_err, Either}; + +use crate::{ + batch::StoreContext, + fsm::{ApplyResReporter, PeerFsmDelegate}, + operation::AdminCmdResult, + raft::{Apply, Peer}, + router::{CmdResChannel, PeerTick}, +}; + +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { + pub fn on_compact_log_tick(&mut self) { + if !self.fsm.peer().is_leader() { + // `compact_cache_to` is called when apply, there is no need to call + // `compact_to` here, snapshot generating has already been cancelled + // when the role becomes follower. + return; + } + self.schedule_tick(PeerTick::CompactLog); + + self.fsm + .peer_mut() + .maybe_propose_compact_log(self.store_ctx); + + self.on_entry_cache_evict(); + } + + pub fn on_entry_cache_evict(&mut self) { + if needs_evict_entry_cache(self.store_ctx.cfg.evict_cache_on_memory_ratio) { + self.fsm + .peer_mut() + .entry_storage_mut() + .evict_entry_cache(true); + if !self.fsm.peer().entry_storage().is_entry_cache_empty() { + self.schedule_tick(PeerTick::EntryCacheEvict); + } + } + } +} + +impl Peer { + // Mirrors v1::on_raft_gc_log_tick. + fn maybe_propose_compact_log(&mut self, store_ctx: &mut StoreContext) { + // As leader, we would not keep caches for the peers that didn't response + // heartbeat in the last few seconds. That happens probably because + // another TiKV is down. In this case if we do not clean up the cache, + // it may keep growing. + let drop_cache_duration = + store_ctx.cfg.raft_heartbeat_interval() + store_ctx.cfg.raft_entry_cache_life_time.0; + let cache_alive_limit = std::time::Instant::now() - drop_cache_duration; + + // Leader will replicate the compact log command to followers, + // If we use current replicated_index (like 10) as the compact index, + // when we replicate this log, the newest replicated_index will be 11, + // but we only compact the log to 10, not 11, at that time, + // the first index is 10, and replicated_index is 11, with an extra log, + // and we will do compact again with compact index 11, in cycles... + // So we introduce a threshold, if replicated index - first index > threshold, + // we will try to compact log. + // raft log entries[..............................................] + // ^ ^ + // |-----------------threshold------------ | + // first_index replicated_index + // `alive_cache_idx` is the smallest `replicated_index` of healthy up nodes. + // `alive_cache_idx` is only used to gc cache. + let applied_idx = self.entry_storage().applied_index(); + let truncated_idx = self.entry_storage().truncated_index(); + let first_idx = self.entry_storage().first_index(); + let last_idx = self.entry_storage().last_index(); + + let (mut replicated_idx, mut alive_cache_idx) = (last_idx, last_idx); + for (peer_id, p) in self.raft_group().raft.prs().iter() { + if replicated_idx > p.matched { + replicated_idx = p.matched; + } + if self.peer_heartbeat_is_fresh(*peer_id, &cache_alive_limit) { + if alive_cache_idx > p.matched && p.matched >= truncated_idx { + alive_cache_idx = p.matched; + } else if p.matched == 0 { + // the new peer is still applying snapshot, do not compact cache now + alive_cache_idx = 0; + } + } + } + + // When an election happened or a new peer is added, replicated_idx can be 0. + if replicated_idx > 0 { + assert!( + last_idx >= replicated_idx, + "expect last index {} >= replicated index {}", + last_idx, + replicated_idx + ); + } + + // leader may call `get_term()` on the latest replicated index, so compact + // entries before `alive_cache_idx` instead of `alive_cache_idx + 1`. + self.entry_storage_mut() + .compact_entry_cache(std::cmp::min(alive_cache_idx, applied_idx + 1)); + + let mut compact_idx = if applied_idx > first_idx + && applied_idx - first_idx >= store_ctx.cfg.raft_log_gc_count_limit() + || self.approximate_raft_log_size() >= store_ctx.cfg.raft_log_gc_size_limit().0 + { + std::cmp::max(first_idx + (last_idx - first_idx) / 2, replicated_idx) + } else if replicated_idx < first_idx + || last_idx - first_idx < 3 + || replicated_idx - first_idx < store_ctx.cfg.raft_log_gc_threshold + && self.maybe_skip_compact_log(store_ctx.cfg.raft_log_reserve_max_ticks) + { + return; + } else { + replicated_idx + }; + assert!(compact_idx >= first_idx); + // Have no idea why subtract 1 here, but original code did this by magic. + compact_idx -= 1; + if compact_idx < first_idx { + return; + } + + // Create a compact log request and notify directly. + // TODO: move this into a function + let term = self.raft_group().raft.raft_log.term(compact_idx).unwrap(); + + let mut req = new_admin_request(self.region_id(), self.peer().clone()); + let mut admin = AdminRequest::default(); + admin.set_cmd_type(AdminCmdType::CompactLog); + admin.mut_compact_log().set_compact_index(compact_idx); + admin.mut_compact_log().set_compact_term(term); + req.set_admin_request(admin); + + let (ch, _) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); + + self.reset_skip_compact_log_ticks(); + } +} + +#[derive(Debug)] +pub struct CompactLogResult { + index: u64, + compact_index: u64, + compact_term: u64, +} + +impl Peer { + pub fn propose_compact_log( + &mut self, + store_ctx: &mut StoreContext, + req: RaftCmdRequest, + ) -> Result { + let compact_log = req.get_admin_request().get_compact_log(); + // TODO: add unit tests to cover all the message integrity checks. + if compact_log.get_compact_term() == 0 { + info!( + self.logger, + "compact term missing, skip"; + "command" => ?compact_log + ); + // old format compact log command, safe to ignore. + return Err(box_err!( + "command format is outdated, please upgrade leader" + )); + } + + let data = req.write_to_bytes().unwrap(); + self.propose(store_ctx, data) + } +} + +impl Apply { + pub fn apply_compact_log( + &mut self, + req: &AdminRequest, + index: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + Ok(( + AdminResponse::default(), + AdminCmdResult::CompactLog(CompactLogResult { + index, + compact_index: req.get_compact_log().get_compact_index(), + compact_term: req.get_compact_log().get_compact_term(), + }), + )) + } +} + +impl Peer { + pub fn on_apply_res_compact_log( + &mut self, + store_ctx: &mut StoreContext, + res: CompactLogResult, + ) { + let first_index = self.entry_storage().first_index(); + if res.compact_index <= first_index { + debug!( + self.logger, + "compact index <= first index, no need to compact"; + "compact_index" => res.compact_index, + "first_index" => first_index, + ); + return; + } + // TODO: check is_merging + // TODO: check entry_cache_warmup_state + self.entry_storage_mut() + .compact_entry_cache(res.compact_index); + self.storage_mut() + .cancel_generating_snap_due_to_compacted(res.compact_index); + + let truncated_state = self + .entry_storage_mut() + .apply_state_mut() + .mut_truncated_state(); + let old_truncated = truncated_state.get_index(); + truncated_state.set_index(res.compact_index); + truncated_state.set_term(res.compact_term); + + let region_id = self.region_id(); + // TODO: get around this clone. + let apply_state = self.entry_storage().apply_state().clone(); + self.state_changes_mut() + .put_apply_state(region_id, res.index, &apply_state) + .unwrap(); + self.set_has_extra_write(); + + self.maybe_compact_log_from_engine(store_ctx, Either::Right(old_truncated)); + } + + #[inline] + pub fn on_advance_persisted_apply_index( + &mut self, + store_ctx: &mut StoreContext, + old_persisted: u64, + ) { + let new_persisted = self.storage().apply_trace().persisted_apply_index(); + if old_persisted < new_persisted { + // TODO: batch it. + if let Err(e) = store_ctx.engine.delete_all_but_one_states_before( + self.region_id(), + new_persisted, + self.state_changes_mut(), + ) { + error!(self.logger, "failed to delete raft states"; "err" => ?e); + } else { + self.set_has_extra_write(); + } + self.maybe_compact_log_from_engine(store_ctx, Either::Left(old_persisted)); + } + } + + pub fn maybe_compact_log_from_engine( + &mut self, + store_ctx: &mut StoreContext, + old_index: Either, + ) { + let truncated = self.entry_storage().truncated_index(); + let persisted = self.storage().apply_trace().persisted_apply_index(); + match old_index { + Either::Left(old_persisted) if old_persisted >= truncated => return, + Either::Right(old_truncated) if old_truncated >= persisted => return, + _ => {} + } + let compact_index = std::cmp::min(truncated, persisted); + // Raft Engine doesn't care about first index. + if let Err(e) = + store_ctx + .engine + .gc(self.region_id(), 0, compact_index, self.state_changes_mut()) + { + error!(self.logger, "failed to compact raft logs"; "err" => ?e); + } else { + self.set_has_extra_write(); + let applied = self.storage().apply_state().get_applied_index(); + let total_cnt = applied - self.storage().entry_storage().first_index() + 1; + let remain_cnt = applied - compact_index; + self.update_approximate_raft_log_size(|s| s * remain_cnt / total_cnt); + } + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 0b3d588abf7..9afd50a5305 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -1,19 +1,22 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod compact_log; mod conf_change; mod split; mod transfer_leader; +use compact_log::CompactLogResult; +use conf_change::ConfChangeResult; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; use protobuf::Message; use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; -pub use split::{RequestSplit, SplitFlowControl, SplitInit, SplitResult, SPLIT_PREFIX}; +use split::SplitResult; +pub use split::{RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; use tikv_util::box_err; use txn_types::WriteBatchFlags; -use self::conf_change::ConfChangeResult; use crate::{batch::StoreContext, raft::Peer, router::CmdResChannel}; #[derive(Debug)] @@ -23,6 +26,7 @@ pub enum AdminCmdResult { SplitRegion(SplitResult), ConfChange(ConfChangeResult), TransferLeader(u64), + CompactLog(CompactLogResult), } impl Peer { @@ -93,7 +97,7 @@ impl Peer { .contains(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL) { let data = req.write_to_bytes().unwrap(); - self.propose_with_ctx(ctx, data, vec![]) + self.propose(ctx, data) } else { if self.propose_transfer_leader(ctx, req, ch) { self.set_has_ready(); @@ -101,6 +105,7 @@ impl Peer { return; } } + AdminCmdType::CompactLog => self.propose_compact_log(ctx, req), _ => unimplemented!(), } }; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 391f0253439..2154eb20e90 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -388,13 +388,11 @@ impl Peer { pub fn on_apply_res_split( &mut self, store_ctx: &mut StoreContext, - derived_index: usize, - tablet_index: u64, - regions: Vec, + res: SplitResult, ) { fail_point!("on_split", self.peer().get_store_id() == 3, |_| {}); - let derived = ®ions[derived_index]; + let derived = &res.regions[res.derived_index]; let derived_epoch = derived.get_region_epoch().clone(); let region_id = derived.get_id(); @@ -408,7 +406,7 @@ impl Peer { // Update the version so the concurrent reader will fail due to EpochNotMatch // instead of PessimisticLockNotFound. pessimistic_locks.version = derived_epoch.get_version(); - pessimistic_locks.group_by_regions(®ions, derived) + pessimistic_locks.group_by_regions(&res.regions, derived) }; fail_point!("on_split_invalidate_locks"); @@ -421,7 +419,7 @@ impl Peer { reader, derived.clone(), RegionChangeReason::Split, - tablet_index, + res.tablet_index, ); } @@ -433,19 +431,17 @@ impl Peer { info!( self.logger, "notify pd with split"; - "region_id" => self.region_id(), - "peer_id" => self.peer_id(), - "split_count" => regions.len(), + "split_count" => res.regions.len(), ); // Now pd only uses ReportBatchSplit for history operation show, // so we send it independently here. - self.report_batch_split_pd(store_ctx, regions.to_vec()); + self.report_batch_split_pd(store_ctx, res.regions.to_vec()); self.add_pending_tick(PeerTick::SplitRegionCheck); } - let last_region_id = regions.last().unwrap().get_id(); + let last_region_id = res.regions.last().unwrap().get_id(); let mut new_ids = HashSet::default(); - for (new_region, locks) in regions.into_iter().zip(region_locks) { + for (new_region, locks) in res.regions.into_iter().zip(region_locks) { let new_region_id = new_region.get_id(); if new_region_id == region_id { continue; @@ -480,10 +476,10 @@ impl Peer { _ => unreachable!(), } } - self.split_trace_mut().push((tablet_index, new_ids)); + self.split_trace_mut().push((res.tablet_index, new_ids)); let region_state = self.storage().region_state().clone(); self.state_changes_mut() - .put_region_state(region_id, tablet_index, ®ion_state) + .put_region_state(region_id, res.tablet_index, ®ion_state) .unwrap(); self.set_has_extra_write(); } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 49040a20278..7fa2fa776c2 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -51,9 +51,7 @@ mod admin; mod control; mod write; -pub use admin::{ - AdminCmdResult, RequestSplit, SplitFlowControl, SplitInit, SplitResult, SPLIT_PREFIX, -}; +pub use admin::{AdminCmdResult, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; pub use control::ProposalControl; pub use write::{ SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, @@ -267,17 +265,14 @@ impl Peer { AdminCmdResult::ConfChange(conf_change) => { self.on_apply_res_conf_change(ctx, conf_change) } - AdminCmdResult::SplitRegion(SplitResult { - regions, - derived_index, - tablet_index, - }) => { + AdminCmdResult::SplitRegion(res) => { self.storage_mut() .apply_trace_mut() - .on_admin_modify(tablet_index); - self.on_apply_res_split(ctx, derived_index, tablet_index, regions) + .on_admin_modify(res.tablet_index); + self.on_apply_res_split(ctx, res) } AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(ctx, term), + AdminCmdResult::CompactLog(res) => self.on_apply_res_compact_log(ctx, res), } } @@ -446,7 +441,7 @@ impl Apply { if req.has_admin_request() { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { - AdminCmdType::CompactLog => unimplemented!(), + AdminCmdType::CompactLog => self.apply_compact_log(admin_req, entry.index)?, AdminCmdType::Split => self.apply_split(admin_req, log_index)?, AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index)?, AdminCmdType::PrepareMerge => unimplemented!(), diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 0b7521f2634..d5aa93b587a 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -6,7 +6,7 @@ //! //! In summary, we trace the persist progress by recording flushed event. //! Because memtable is flushed one by one, so a flushed memtable must contain -//! all the data within the CF before some certain apply index. So the minimun +//! all the data within the CF before certain apply index. So the minimun //! flushed apply index + 1 of all data CFs is the recovery start point. In //! some cases, a CF may not have any updates at all for a long time. In some //! cases, we may still need to recover from smaller index even if flushed @@ -121,7 +121,7 @@ impl engine_traits::StateStorage for StateStorage< } } -/// An alias of frequent use type that each data cf has a u64. +/// Mapping from data cf to an u64 index. pub type DataTrace = [u64; DATA_CFS_LEN]; #[derive(Clone, Copy, Default)] @@ -211,7 +211,7 @@ impl ApplyTrace { self.admin.last_modified = index; } - fn persisted_apply_index(&self) -> u64 { + pub fn persisted_apply_index(&self) -> u64 { self.admin.flushed } @@ -237,7 +237,7 @@ impl ApplyTrace { let candidate = cmp::min(mem_index, min_flushed.unwrap_or(u64::MAX)); if candidate > self.admin.flushed { self.admin.flushed = candidate; - if candidate > self.persisted_applied + 100 { + if self.admin.flushed > self.persisted_applied + 100 { self.try_persist = true; } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 0e911e48255..14010fc9fe2 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -31,7 +31,7 @@ use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, - store::{util, FetchedLogs, ReadProgress, Transport, WriteTask}, + store::{needs_evict_entry_cache, util, FetchedLogs, ReadProgress, Transport, WriteTask}, }; use slog::{debug, error, trace, warn}; use tikv_util::{ @@ -275,7 +275,7 @@ impl Peer { // asynchronously. if self.is_leader() { for entry in committed_entries.iter().rev() { - // TODO: handle raft_log_size_hint + self.update_approximate_raft_log_size(|s| s + entry.get_data().len() as u64); let propose_time = self .proposals() .find_propose_time(entry.get_term(), entry.get_index()); @@ -293,6 +293,10 @@ impl Peer { } } } + if needs_evict_entry_cache(ctx.cfg.evict_cache_on_memory_ratio) { + // Compact all cached entries instead of half evict. + self.entry_storage_mut().evict_entry_cache(false); + } self.schedule_apply_committed_entries(committed_entries); } @@ -375,9 +379,12 @@ impl Peer { let ready_number = ready.number(); let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); + let prev_persisted = self.storage().apply_trace().persisted_apply_index(); self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); + self.on_advance_persisted_apply_index(ctx, prev_persisted); + if !ready.persisted_messages().is_empty() { write_task.messages = ready .take_persisted_messages() @@ -517,10 +524,11 @@ impl Peer { self.entry_storage_mut().clear_entry_cache_warmup_state(); self.region_heartbeat_pd(ctx); + self.add_pending_tick(PeerTick::CompactLog); } StateRole::Follower => { self.leader_lease_mut().expire(); - self.storage_mut().cancel_generating_snap(None, None); + self.storage_mut().cancel_generating_snap(None); self.clear_in_memory_pessimistic_locks(); } _ => {} diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 149505f0af4..e1a36ed8ec7 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -252,7 +252,7 @@ impl Storage { match state { SnapState::Generating { ref canceled, .. } => { if canceled.load(Ordering::SeqCst) { - self.cancel_generating_snap(Some(to), None); + self.cancel_generating_snap(Some(to)); } else { return Err(raft::Error::Store( raft::StorageError::SnapshotTemporarilyUnavailable, @@ -337,41 +337,56 @@ impl Storage { true } - pub fn cancel_generating_snap(&self, to: Option, compact_to: Option) { - if let Some(id) = to { + pub fn cancel_generating_snap(&self, to_peer: Option) { + if let Some(id) = to_peer { let mut states = self.snap_states.borrow_mut(); - if let Some(state) = states.get(&id) { - let SnapState::Generating { - ref index, - .. - } = *state else { return }; - if let Some(idx) = compact_to { - let snap_index = index.load(Ordering::SeqCst); - if snap_index == 0 || idx <= snap_index + 1 { - return; - } - } + if let Some(state) = states.get(&id) + && matches!(*state, SnapState::Generating { .. }) + { info!( self.logger(), "snapshot is canceled"; - "compact_to" => compact_to, + "to_peer" => to_peer, ); - self.cancel_snap_task(to); + self.cancel_snap_task(to_peer); states.remove(&id); } } else { - self.cancel_snap_task(to); + self.cancel_snap_task(to_peer); self.snap_states.borrow_mut().clear(); } STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.cancel.inc(); } + pub fn cancel_generating_snap_due_to_compacted(&self, compact_to: u64) { + let mut states = self.snap_states.borrow_mut(); + states.retain(|id, state| { + let SnapState::Generating { + ref index, + .. + } = *state else { return true; }; + let snap_index = index.load(Ordering::SeqCst); + if snap_index == 0 || compact_to <= snap_index + 1 { + return true; + } + info!( + self.logger(), + "snapshot is canceled"; + "compact_to" => compact_to, + "to_peer" => id, + ); + self.cancel_snap_task(Some(*id)); + STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER.cancel.inc(); + false + }); + } + /// Try to switch snap state to generated. only `Generating` can switch to /// `Generated`. /// TODO: make the snap state more clearer, the snapshot must be consumed. pub fn on_snapshot_generated(&self, res: GenSnapRes) -> bool { if res.is_none() { - self.cancel_generating_snap(None, None); + self.cancel_generating_snap(None); return false; } let (snapshot, to_peer_id) = *res.unwrap(); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 25285f289a7..ca5aafa3bfb 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -53,6 +53,10 @@ pub struct Peer { /// Statistics for other peers, only maintained when self is the leader. peer_heartbeats: HashMap, + /// For raft log compaction. + skip_compact_log_ticks: usize, + approximate_raft_log_size: u64, + /// Encoder for batching proposals and encoding them in a more efficient way /// than protobuf. raw_write_encoder: Option, @@ -140,6 +144,8 @@ impl Peer { self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), + skip_compact_log_ticks: 0, + approximate_raft_log_size: 0, raw_write_encoder: None, proposals: ProposalQueue::new(region_id, raft_group.raft.id), async_writer: AsyncWriter::new(region_id, peer_id), @@ -450,6 +456,16 @@ impl Peer { self.peer_heartbeats.remove(&peer_id); } + /// Returns whether or not the peer sent heartbeat after the provided + /// deadline time. + #[inline] + pub fn peer_heartbeat_is_fresh(&self, peer_id: u64, deadline: &Instant) -> bool { + matches!( + self.peer_heartbeats.get(&peer_id), + Some(last_heartbeat) if *last_heartbeat >= *deadline + ) + } + pub fn collect_down_peers(&self, max_duration: Duration) -> Vec { let mut down_peers = Vec::new(); let now = Instant::now(); @@ -471,6 +487,31 @@ impl Peer { down_peers } + #[inline] + pub fn reset_skip_compact_log_ticks(&mut self) { + self.skip_compact_log_ticks = 0; + } + + #[inline] + pub fn maybe_skip_compact_log(&mut self, max_skip_ticks: usize) -> bool { + if self.skip_compact_log_ticks < max_skip_ticks { + self.skip_compact_log_ticks += 1; + true + } else { + false + } + } + + #[inline] + pub fn approximate_raft_log_size(&self) -> u64 { + self.approximate_raft_log_size + } + + #[inline] + pub fn update_approximate_raft_log_size(&mut self, f: impl Fn(u64) -> u64) { + self.approximate_raft_log_size = f(self.approximate_raft_log_size); + } + #[inline] pub fn state_role(&self) -> StateRole { self.raft_group.raft.state @@ -698,6 +739,7 @@ impl Peer { self.flush_state = Arc::default(); } + // Note: Call `set_has_extra_write` after adding new state changes. #[inline] pub fn state_changes_mut(&mut self) -> &mut ER::LogBatch { if self.state_changes.is_none() { diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 1015b5aaac7..959f817ebd7 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -474,7 +474,7 @@ mod tests { let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); let _res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); - s.cancel_generating_snap(None, None); + s.cancel_generating_snap(None); assert!(s.snap_states.borrow().get(&to_peer_id).is_none()); // Test get twice snapshot and cancel once. @@ -485,7 +485,7 @@ mod tests { apply.set_apply_progress(1, 5); apply.schedule_gen_snapshot(gen_task_a); let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); - s.cancel_generating_snap(None, None); + s.cancel_generating_snap(None); // cancel get snapshot a, try get snaphsot b let snap = s.snapshot(0, 0); assert_eq!(snap.unwrap_err(), unavailable); diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index a69f6b5ead6..cd88a23c744 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -22,7 +22,7 @@ use crate::operation::{RequestSplit, SimpleWriteBinary, SplitInit}; #[repr(u8)] pub enum PeerTick { Raft = 0, - RaftLogGc = 1, + CompactLog = 1, SplitRegionCheck = 2, PdHeartbeat = 3, CheckMerge = 4, @@ -41,7 +41,7 @@ impl PeerTick { pub fn tag(self) -> &'static str { match self { PeerTick::Raft => "raft", - PeerTick::RaftLogGc => "raft_log_gc", + PeerTick::CompactLog => "compact_log", PeerTick::SplitRegionCheck => "split_region_check", PeerTick::PdHeartbeat => "pd_heartbeat", PeerTick::CheckMerge => "check_merge", @@ -57,7 +57,7 @@ impl PeerTick { pub const fn all_ticks() -> &'static [PeerTick] { const TICKS: &[PeerTick] = &[ PeerTick::Raft, - PeerTick::RaftLogGc, + PeerTick::CompactLog, PeerTick::SplitRegionCheck, PeerTick::PdHeartbeat, PeerTick::CheckMerge, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 225126f0edb..1b484df5316 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -54,7 +54,7 @@ use tikv_util::{ box_err, debug, defer, error, escape, info, is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, store::{find_peer, is_learner, region_on_same_stores}, - sys::{disk::DiskUsage, memory_usage_reaches_high_water}, + sys::disk::DiskUsage, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, trace, warn, worker::{ScheduleError, Scheduler}, @@ -5425,12 +5425,9 @@ where fail_point!("on_entry_cache_evict_tick", |_| {}); if needs_evict_entry_cache(self.ctx.cfg.evict_cache_on_memory_ratio) { self.fsm.peer.mut_store().evict_entry_cache(true); - } - let mut _usage = 0; - if memory_usage_reaches_high_water(&mut _usage) - && !self.fsm.peer.get_store().is_entry_cache_empty() - { - self.register_entry_cache_evict_tick(); + if !self.fsm.peer.get_store().is_entry_cache_empty() { + self.register_entry_cache_evict_tick(); + } } } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index ce25544bcd8..c9e460d1cbc 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -2124,7 +2124,8 @@ pub mod tests { assert!(build_storage().is_err()); // It should not recover if corresponding log doesn't exist. - engines.raft.gc(1, 14, 15).unwrap(); + engines.raft.gc(1, 14, 15, &mut lb).unwrap(); + engines.raft.consume(&mut lb, false).unwrap(); apply_state.set_commit_index(14); apply_state.set_commit_term(RAFT_INIT_LOG_TERM); engines @@ -2136,7 +2137,7 @@ pub mod tests { let entries = (14..=20) .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); - engines.raft.gc(1, 0, 21).unwrap(); + engines.raft.gc(1, 0, 21, &mut lb).unwrap(); lb.append(1, entries).unwrap(); engines.raft.consume(&mut lb, false).unwrap(); raft_state.mut_hard_state().set_commit(14); @@ -2164,7 +2165,7 @@ pub mod tests { assert!(build_storage().is_err()); // last index < recorded_commit_index is invalid. - engines.raft.gc(1, 0, 21).unwrap(); + engines.raft.gc(1, 0, 21, &mut lb).unwrap(); raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); raft_state.set_last_index(13); lb.append(1, vec![new_entry(13, RAFT_INIT_LOG_TERM)]) diff --git a/components/raftstore/src/store/worker/raftlog_gc.rs b/components/raftstore/src/store/worker/raftlog_gc.rs index f93213dfa0d..ce829ed61b2 100644 --- a/components/raftstore/src/store/worker/raftlog_gc.rs +++ b/components/raftstore/src/store/worker/raftlog_gc.rs @@ -3,10 +3,9 @@ use std::{ error::Error as StdError, fmt::{self, Display, Formatter}, - sync::mpsc::Sender, }; -use engine_traits::{Engines, KvEngine, RaftEngine, RaftLogGcTask}; +use engine_traits::{Engines, KvEngine, RaftEngine}; use file_system::{IoType, WithIoType}; use thiserror::Error; use tikv_util::{ @@ -73,7 +72,6 @@ enum Error { pub struct Runner { tasks: Vec, engines: Engines, - gc_entries: Option>, compact_sync_interval: Duration, } @@ -82,25 +80,15 @@ impl Runner { Runner { engines, tasks: vec![], - gc_entries: None, compact_sync_interval: compact_log_interval, } } - /// Does the GC job and returns the count of logs collected. - fn gc_raft_log(&mut self, regions: Vec) -> Result { - fail::fail_point!("worker_gc_raft_log", |s| { - Ok(s.and_then(|s| s.parse().ok()).unwrap_or(0)) - }); - let deleted = box_try!(self.engines.raft.batch_gc(regions)); - fail::fail_point!("worker_gc_raft_log_finished", |_| { Ok(deleted) }); - Ok(deleted) - } - - fn report_collected(&self, collected: usize) { - if let Some(ref ch) = self.gc_entries { - ch.send(collected).unwrap(); - } + fn raft_log_gc(&mut self, mut batch: ER::LogBatch) -> Result<(), Error> { + fail::fail_point!("worker_gc_raft_log", |_| Ok(())); + box_try!(self.engines.raft.consume(&mut batch, false)); + fail::fail_point!("worker_gc_raft_log_finished"); + Ok(()) } fn flush(&mut self) { @@ -115,9 +103,11 @@ impl Runner { panic!("failed to sync kv_engine in raft_log_gc: {:?}", e); }); RAFT_LOG_GC_KV_SYNC_DURATION_HISTOGRAM.observe(start.saturating_elapsed_secs()); + let tasks = std::mem::take(&mut self.tasks); - let mut groups = Vec::with_capacity(tasks.len()); let mut cbs = Vec::new(); + let mut batch = self.engines.raft.log_batch(tasks.len()); + let start = Instant::now(); for t in tasks { debug!("gc raft log"; "region_id" => t.region_id, "start_index" => t.start_idx, "end_index" => t.end_idx); if let Some(cb) = t.cb { @@ -137,28 +127,22 @@ impl Runner { "end_index" => t.end_idx, ); } - groups.push(RaftLogGcTask { - raft_group_id: t.region_id, - from: t.start_idx, - to: t.end_idx, - }); - } - let start = Instant::now(); - match self.gc_raft_log(groups) { - Err(e) => { + if let Err(e) = self + .engines + .raft + .gc(t.region_id, t.start_idx, t.end_idx, &mut batch) + { error!("failed to gc"; "err" => %e); - self.report_collected(0); RAFT_LOG_GC_FAILED.inc(); } - Ok(n) => { - debug!("gc log entries"; "entry_count" => n); - self.report_collected(n); - RAFT_LOG_GC_DELETED_KEYS_HISTOGRAM.observe(n as f64); - } + } + if let Err(e) = self.raft_log_gc(batch) { + error!("failed to write gc task"; "err" => %e); + RAFT_LOG_GC_FAILED.inc(); } RAFT_LOG_GC_WRITE_DURATION_HISTOGRAM.observe(start.saturating_elapsed_secs()); for cb in cbs { - cb() + cb(); } } } @@ -201,7 +185,7 @@ where #[cfg(test)] mod tests { - use std::{sync::mpsc, time::Duration}; + use std::time::Duration; use engine_traits::{RaftEngine, RaftLogBatch, ALL_CFS}; use raft::eraftpb::Entry; @@ -218,9 +202,7 @@ mod tests { let kv_db = engine_test::kv::new_engine(path_raft.to_str().unwrap(), ALL_CFS).unwrap(); let engines = Engines::new(kv_db, raft_db.clone()); - let (tx, rx) = mpsc::channel(); let mut runner = Runner { - gc_entries: Some(tx), engines, tasks: vec![], compact_sync_interval: Duration::from_secs(5), @@ -237,17 +219,15 @@ mod tests { raft_db.consume(&mut raft_wb, false /* sync */).unwrap(); let tbls = vec![ - (Task::gc(region_id, 0, 10), 10, (0, 10), (10, 100)), - (Task::gc(region_id, 0, 50), 40, (0, 50), (50, 100)), - (Task::gc(region_id, 50, 50), 0, (0, 50), (50, 100)), - (Task::gc(region_id, 50, 60), 10, (0, 60), (60, 100)), + (Task::gc(region_id, 0, 10), (0, 10), (10, 100)), + (Task::gc(region_id, 0, 50), (0, 50), (50, 100)), + (Task::gc(region_id, 50, 50), (0, 50), (50, 100)), + (Task::gc(region_id, 50, 60), (0, 60), (60, 100)), ]; - for (task, expected_collectd, not_exist_range, exist_range) in tbls { + for (task, not_exist_range, exist_range) in tbls { runner.run(task); runner.flush(); - let res = rx.recv_timeout(Duration::from_secs(3)).unwrap(); - assert_eq!(res, expected_collectd); raft_log_must_not_exist(&raft_db, 1, not_exist_range.0, not_exist_range.1); raft_log_must_exist(&raft_db, 1, exist_range.0, exist_range.1); } diff --git a/src/server/debug.rs b/src/server/debug.rs index 9445133239f..c16621f4d85 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -766,10 +766,8 @@ impl Debugger { )); let mut lb = raft.log_batch(0); box_try!(lb.put_raft_state(region_id, &new_raft_local_state)); - // Will sync later. - box_try!(raft.consume(&mut lb, false)); - let deleted_logs = box_try!(raft.gc(region_id, applied_index + 1, last_index + 1)); - raft.sync().unwrap(); + box_try!(raft.gc(region_id, applied_index + 1, last_index + 1, &mut lb)); + box_try!(raft.consume(&mut lb, true)); kv.sync().unwrap(); info!( @@ -779,7 +777,6 @@ impl Debugger { "new_raft_local_state" => ?new_raft_local_state, "old_raft_apply_state" => ?old_raft_apply_state, "new_raft_apply_state" => ?new_raft_apply_state, - "deleted logs" => deleted_logs, ); } diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index dde25bff636..a6a4a1824f3 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -698,9 +698,9 @@ fn test_snapshot_clean_up_logs_with_unfinished_log_gc() { // Disable default max peer number check. pd_client.disable_default_operator(); cluster.run(); - // Simulate raft log gc are pending in queue. + // Simulate raft log gc tasks are lost during shutdown. let fp = "worker_gc_raft_log"; - fail::cfg(fp, "return(0)").unwrap(); + fail::cfg(fp, "return").unwrap(); let state = cluster.truncated_state(1, 3); for i in 0..30 { diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index 0321772661d..1a4ef0b0afc 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -301,9 +301,9 @@ fn test_destroy_clean_up_logs_with_unfinished_log_gc() { // Disable default max peer number check. pd_client.disable_default_operator(); cluster.run(); - // Simulate raft log gc are pending in queue. + // Simulate raft log gc tasks are lost during shutdown. let fp = "worker_gc_raft_log"; - fail::cfg(fp, "return(0)").unwrap(); + fail::cfg(fp, "return").unwrap(); let state = cluster.truncated_state(1, 3); for i in 0..30 { From 7be952a6c6655f1fd6e7860d051d061502334e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 27 Dec 2022 14:46:16 +0800 Subject: [PATCH 0431/1149] log-backup: applied some change to make better RPO (#13940) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#13941, ref pingcap/tidb#39620 - If failed to get initial snapshot, remove the subscription as soon as possible. - Added a cache of getting checkpoint. This cache is lease-based -- the lease time is simply the tick interval of the coordinator. - Make the channel size huger for don't blocking the main loop when many regions migrating. Signed-off-by: hillium Signed-off-by: hillium Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 15 ++++ components/backup-stream/src/event_loader.rs | 3 +- .../src/metadata/checkpoint_cache.rs | 71 +++++++++++++++++++ .../backup-stream/src/metadata/client.rs | 34 +++++++-- components/backup-stream/src/metadata/mod.rs | 1 + .../backup-stream/src/subscription_manager.rs | 2 +- .../backup-stream/src/subscription_track.rs | 19 ++--- 7 files changed, 129 insertions(+), 16 deletions(-) create mode 100644 components/backup-stream/src/metadata/checkpoint_cache.rs diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 8c3de3d34ce..5cf4292faa3 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -186,6 +186,16 @@ impl CheckpointManager { pub fn add_subscriber(&mut self, sub: Subscription) -> future![Result<()>] { let mgr = self.manager_handle.as_ref().cloned(); + let initial_data = self + .items + .values() + .map(|v| FlushEvent { + start_key: v.region.start_key.clone(), + end_key: v.region.end_key.clone(), + checkpoint: v.checkpoint.into_inner(), + ..Default::default() + }) + .collect::>(); // NOTE: we cannot send the real error into the client directly because once // we send the subscription into the sink, we cannot fetch it again :( @@ -208,6 +218,11 @@ impl CheckpointManager { mgr.send(SubscriptionOp::Add(sub)) .await .map_err(|err| annotate!(err, "failed to send request to subscriber manager"))?; + mgr.send(SubscriptionOp::Emit(initial_data)) + .await + .map_err(|err| { + annotate!(err, "failed to send initial data to subscriber manager") + })?; Ok(()) } } diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 27c05b5b875..6222f058cd4 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -236,7 +236,8 @@ where ) -> Result { let mut last_err = None; for _ in 0..MAX_GET_SNAPSHOT_RETRY { - let r = self.observe_over(region, cmd()); + let c = cmd(); + let r = self.observe_over(region, c); match r { Ok(s) => { return Ok(s); diff --git a/components/backup-stream/src/metadata/checkpoint_cache.rs b/components/backup-stream/src/metadata/checkpoint_cache.rs new file mode 100644 index 00000000000..50573d003d8 --- /dev/null +++ b/components/backup-stream/src/metadata/checkpoint_cache.rs @@ -0,0 +1,71 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use tikv_util::time::Instant; +use txn_types::TimeStamp; + +/// The lease time of a checkpoint. +/// 12s is the default interval of the coornaditor tick. +const CACHE_LEASE_TIME: Duration = Duration::from_secs(12); + +pub struct CheckpointCache { + last_access: Instant, + checkpoint: TimeStamp, + + cache_lease_time: Duration, +} + +impl Default for CheckpointCache { + fn default() -> Self { + Self { + last_access: Instant::now_coarse(), + checkpoint: TimeStamp::zero(), + + cache_lease_time: CACHE_LEASE_TIME, + } + } +} + +impl CheckpointCache { + #[cfg(test)] + pub fn with_cache_lease(lease: Duration) -> Self { + Self { + cache_lease_time: lease, + ..Self::default() + } + } + + pub fn update(&mut self, checkpoint: impl Into) { + self.last_access = Instant::now_coarse(); + self.checkpoint = self.checkpoint.max(checkpoint.into()) + } + + pub fn get(&self) -> Option { + if self.checkpoint.is_zero() + || self.last_access.saturating_elapsed() > self.cache_lease_time + { + return None; + } + Some(self.checkpoint) + } +} + +#[cfg(test)] +mod test { + use std::time::Duration; + + use super::CheckpointCache; + + #[test] + fn test_basic() { + let mut c = CheckpointCache::with_cache_lease(Duration::from_millis(100)); + assert_eq!(c.get(), None); + c.update(42); + assert_eq!(c.get(), Some(42.into())); + c.update(41); + assert_eq!(c.get(), Some(42.into())); + std::thread::sleep(Duration::from_millis(200)); + assert_eq!(c.get(), None); + } +} diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 2c0fd2577fc..97e8d2140b5 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -1,7 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp::Ordering, collections::HashMap, fmt::Debug, path::Path}; +use std::{cmp::Ordering, collections::HashMap, fmt::Debug, path::Path, sync::Arc}; +use dashmap::DashMap; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, @@ -11,6 +12,7 @@ use tokio_stream::StreamExt; use txn_types::TimeStamp; use super::{ + checkpoint_cache::CheckpointCache, keys::{self, KeyValue, MetaKey}, store::{ CondTransaction, Condition, GetExtra, Keys, KvEvent, KvEventType, MetaStore, Snapshot, @@ -26,6 +28,7 @@ use crate::{ #[derive(Clone)] pub struct MetadataClient { store_id: u64, + caches: Arc>, pub(crate) meta_store: Store, } @@ -239,6 +242,7 @@ impl MetadataClient { pub fn new(store: Store, store_id: u64) -> Self { Self { meta_store: store, + caches: Arc::default(), store_id, } } @@ -698,21 +702,41 @@ impl MetadataClient { Ok(min_checkpoint) } + fn cached_checkpoint(&self, task: &str) -> Option { + self.caches + .get(task) + .and_then(|x| x.value().get()) + .map(|x| Checkpoint { + provider: CheckpointProvider::Global, + ts: x, + }) + } + + fn update_cache(&self, task: &str, checkpoint: TimeStamp) { + let mut c = self.caches.entry(task.to_owned()).or_default(); + c.value_mut().update(checkpoint); + } + pub async fn get_region_checkpoint(&self, task: &str, region: &Region) -> Result { + if let Some(c) = self.cached_checkpoint(task) { + return Ok(c); + } let key = MetaKey::next_bakcup_ts_of_region(task, region); let s = self.meta_store.snapshot().await?; let r = s.get(Keys::Key(key.clone())).await?; - match r.len() { + let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; let cp = match global_cp { None => self.get_task_start_ts_checkpoint(task).await?, Some(cp) => cp, }; - Ok(cp) + cp } - _ => Ok(Checkpoint::from_kv(&r[0])?), - } + _ => Checkpoint::from_kv(&r[0])?, + }; + self.update_cache(task, cp.ts); + Ok(cp) } } diff --git a/components/backup-stream/src/metadata/mod.rs b/components/backup-stream/src/metadata/mod.rs index a616ace2dc6..20887a24b02 100644 --- a/components/backup-stream/src/metadata/mod.rs +++ b/components/backup-stream/src/metadata/mod.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod checkpoint_cache; mod client; pub mod keys; mod metrics; diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 624392f3df8..91b4c096e7d 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -281,7 +281,7 @@ impl ScanPoolHandle { } /// The default channel size. -const MESSAGE_BUFFER_SIZE: usize = 4096; +const MESSAGE_BUFFER_SIZE: usize = 32768; /// The operator for region subscription. /// It make a queue for operations over the `SubscriptionTracer`, generally, diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 6b51f983a3b..a24076661bb 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -2,7 +2,10 @@ use std::{sync::Arc, time::Duration}; -use dashmap::{mapref::one::RefMut, DashMap}; +use dashmap::{ + mapref::{entry::Entry, one::RefMut}, + DashMap, +}; use kvproto::metapb::Region; use raftstore::coprocessor::*; use resolved_ts::Resolver; @@ -185,21 +188,19 @@ impl SubscriptionTracer { if_cond: impl FnOnce(&RegionSubscription, &Region) -> bool, ) -> bool { let region_id = region.get_id(); - let remove_result = self.0.remove(®ion_id); + let remove_result = self.0.entry(region_id); match remove_result { - Some((_, mut v)) => { - if if_cond(&v, region) { + Entry::Occupied(mut x) => { + if if_cond(x.get(), region) { TRACK_REGION.dec(); - v.stop(); + x.get_mut().stop(); + let v = x.remove(); info!("stop listen stream from store"; "observer" => ?v, "region_id"=> %region_id); return true; } false } - None => { - debug!("trying to deregister region not registered"; "region_id" => %region_id); - false - } + Entry::Vacant(_) => false, } } From c3903b81ff42102130f05dc2d8d6debd49cafc1a Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 27 Dec 2022 16:24:16 +0800 Subject: [PATCH 0432/1149] server: improve the ergonomics of sharing things between tablets (#13984) ref tikv/tikv#12842 Signed-off-by: tabokie --- Cargo.lock | 6 +- cmd/tikv-ctl/src/executor.rs | 3 +- cmd/tikv-ctl/src/main.rs | 14 +- components/engine_rocks/src/db_options.rs | 22 +- components/engine_rocks/src/raw.rs | 12 +- components/engine_rocks/src/rocks_metrics.rs | 7 +- .../engine_rocks/src/rocks_metrics_defs.rs | 5 +- components/engine_rocks/src/write_batch.rs | 11 +- components/server/src/raft_engine_switch.rs | 2 +- components/server/src/server.rs | 7 +- components/server/src/server2.rs | 7 +- components/snap_recovery/src/init_cluster.rs | 3 +- etc/config-template.toml | 11 + src/config/mod.rs | 237 ++++++++++++++---- src/server/engine_factory.rs | 22 +- src/storage/kv/test_engine_builder.rs | 10 +- src/storage/mod.rs | 11 +- tests/integrations/config/mod.rs | 8 + tests/integrations/config/test-custom.toml | 6 + tests/integrations/storage/test_titan.rs | 13 +- 20 files changed, 292 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f7ca0b8c7b..4c510da6d77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2876,7 +2876,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0ef7101a061c513c684ad68acd15f01c8548b43a" +source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2895,7 +2895,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#0ef7101a061c513c684ad68acd15f01c8548b43a" +source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" dependencies = [ "bzip2-sys", "cc", @@ -4761,7 +4761,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0ef7101a061c513c684ad68acd15f01c8548b43a" +source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" dependencies = [ "libc 0.2.132", "librocksdb_sys", diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 42b08c629e7..94610face44 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -76,8 +76,7 @@ pub fn new_debug_executor( let cfg_controller = ConfigController::default(); if !cfg.raft_engine.enable { - let mut raft_db_opts = cfg.raftdb.build_opt(); - raft_db_opts.set_env(env); + let raft_db_opts = cfg.raftdb.build_opt(env, None); let raft_db_cf_opts = cfg.raftdb.build_cf_opts(factory.block_cache()); let raft_path = cfg.infer_raft_db_path(Some(data_dir)).unwrap(); if !db_exist(&raft_path) { diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 77888f36fa7..30cd7035bef 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -681,20 +681,20 @@ fn read_fail_file(path: &str) -> Vec<(String, String)> { list } -fn run_ldb_command(args: Vec, cfg: &TikvConfig) { +fn build_rocks_opts(cfg: &TikvConfig) -> engine_rocks::RocksDbOptions { let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .unwrap() .map(Arc::new); let env = get_env(key_manager, None /* io_rate_limiter */).unwrap(); - let mut opts = cfg.rocksdb.build_opt(None); - opts.set_env(env); + cfg.rocksdb.build_opt(&cfg.rocksdb.build_resources(env)) +} - engine_rocks::raw::run_ldb_tool(&args, &opts); +fn run_ldb_command(args: Vec, cfg: &TikvConfig) { + engine_rocks::raw::run_ldb_tool(&args, &build_rocks_opts(cfg)); } fn run_sst_dump_command(args: Vec, cfg: &TikvConfig) { - let opts = cfg.rocksdb.build_opt(None); - engine_rocks::raw::run_sst_dump_tool(&args, &opts); + engine_rocks::raw::run_sst_dump_tool(&args, &build_rocks_opts(cfg)); } fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, cfg: &TikvConfig) { @@ -713,7 +713,7 @@ fn print_bad_ssts(data_dir: &str, manifest: Option<&str>, pd_client: RpcClient, let stderr = BufferRedirect::stderr().unwrap(); let stdout = BufferRedirect::stdout().unwrap(); - let opts = cfg.rocksdb.build_opt(None); + let opts = build_rocks_opts(cfg); match run_and_wait_child_process(|| engine_rocks::raw::run_sst_dump_tool(&args, &opts)) { Ok(code) => { diff --git a/components/engine_rocks/src/db_options.rs b/components/engine_rocks/src/db_options.rs index f4044c44449..f437cc7b433 100644 --- a/components/engine_rocks/src/db_options.rs +++ b/components/engine_rocks/src/db_options.rs @@ -66,23 +66,29 @@ impl DbOptions for RocksDbOptions { } fn get_rate_bytes_per_sec(&self) -> Option { - self.0.get_rate_bytes_per_sec() + self.0.get_rate_limiter().map(|r| r.get_bytes_per_second()) } fn set_rate_bytes_per_sec(&mut self, rate_bytes_per_sec: i64) -> Result<()> { - self.0 - .set_rate_bytes_per_sec(rate_bytes_per_sec) - .map_err(|e| box_err!(e)) + if let Some(r) = self.0.get_rate_limiter() { + r.set_bytes_per_second(rate_bytes_per_sec); + } else { + return Err(box_err!("rate limiter not found")); + } + Ok(()) } fn get_rate_limiter_auto_tuned(&self) -> Option { - self.0.get_auto_tuned() + self.0.get_rate_limiter().map(|r| r.get_auto_tuned()) } fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()> { - self.0 - .set_auto_tuned(rate_limiter_auto_tuned) - .map_err(|e| box_err!(e)) + if let Some(r) = self.0.get_rate_limiter() { + r.set_auto_tuned(rate_limiter_auto_tuned); + } else { + return Err(box_err!("rate limiter not found")); + } + Ok(()) } fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index 4c2dd71b2a2..e940fdd2cd7 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -10,10 +10,10 @@ pub use rocksdb::{ new_compaction_filter_raw, run_ldb_tool, run_sst_dump_tool, BlockBasedOptions, Cache, ChecksumType, CompactOptions, CompactionFilter, CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, - CompactionJobInfo, CompactionOptions, CompactionPriority, DBBottommostLevelCompaction, - DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBRateLimiterMode, - DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, Env, EventListener, - IngestExternalFileOptions, LRUCacheOptions, MemoryAllocator, PerfContext, - PrepopulateBlockCache, Range, SliceTransform, Statistics, TablePropertiesCollector, - TablePropertiesCollectorFactory, + CompactionJobInfo, CompactionOptions, CompactionPriority, ConcurrentTaskLimiter, + DBBottommostLevelCompaction, DBCompactionFilter, DBCompactionStyle, DBCompressionType, + DBEntryType, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, + Env, EventListener, IngestExternalFileOptions, LRUCacheOptions, MemoryAllocator, PerfContext, + PrepopulateBlockCache, Range, RateLimiter, SliceTransform, Statistics, + TablePropertiesCollector, TablePropertiesCollectorFactory, WriteBufferManager, }; diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 24ac9eee0b4..522696cb150 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -582,12 +582,6 @@ pub fn flush_engine_ticker_metrics(t: TickerType, value: u64, name: &str) { .discardable .inc_by(value); } - TickerType::TitanGcSample => { - STORE_ENGINE_BLOB_GC_ACTION - .get(name_enum) - .sample - .inc_by(value); - } TickerType::TitanGcSmallFile => { STORE_ENGINE_BLOB_GC_ACTION .get(name_enum) @@ -612,6 +606,7 @@ pub fn flush_engine_ticker_metrics(t: TickerType, value: u64, name: &str) { .trigger_next .inc_by(value); } + // TODO: Some tickers are ignored. _ => {} } } diff --git a/components/engine_rocks/src/rocks_metrics_defs.rs b/components/engine_rocks/src/rocks_metrics_defs.rs index fc23871b90f..042949f1c09 100644 --- a/components/engine_rocks/src/rocks_metrics_defs.rs +++ b/components/engine_rocks/src/rocks_metrics_defs.rs @@ -138,8 +138,11 @@ pub const TITAN_ENGINE_TICKER_TYPES: &[TickerType] = &[ TickerType::TitanGcNoNeed, TickerType::TitanGcRemain, TickerType::TitanGcDiscardable, - TickerType::TitanGcSample, TickerType::TitanGcSmallFile, + TickerType::TitanGcLevelMergeMark, + TickerType::TitanGcLevelMergeDelete, + TickerType::TitanGcNoNeed, + TickerType::TitanGcRemain, TickerType::TitanGcFailure, TickerType::TitanGcSuccess, TickerType::TitanGcTriggerNext, diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index 6171ca7ee38..a46edfb0a4a 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -101,15 +101,18 @@ impl RocksWriteBatchVec { impl engine_traits::WriteBatch for RocksWriteBatchVec { fn write_opt(&mut self, opts: &WriteOptions) -> Result { let opt: RocksWriteOptions = opts.into(); + let mut seq = 0; if self.support_write_batch_vec { + // FIXME(tabokie): Callback for empty write batch won't be called. self.get_db() - .multi_batch_write(self.as_inner(), &opt.into_raw()) - .map_err(r2e) + .multi_batch_write_callback(self.as_inner(), &opt.into_raw(), |s| seq = s) + .map_err(r2e)?; } else { self.get_db() - .write_seq_opt(&self.wbs[0], &opt.into_raw()) - .map_err(r2e) + .write_callback(&self.wbs[0], &opt.into_raw(), |s| seq = s) + .map_err(r2e)?; } + Ok(seq) } fn data_size(&self) -> usize { diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index ba489f1be0f..d0637a04b0a 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -251,7 +251,7 @@ mod tests { // Prepare some data for the RocksEngine. let raftdb = engine_rocks::util::new_engine_opt( &cfg.raft_store.raftdb_path, - cfg.raftdb.build_opt(), + cfg.raftdb.build_opt(Default::default(), None), cfg.raftdb.build_cf_opts(&cache), ) .unwrap(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 73b42d96d22..3c926969ce2 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1731,10 +1731,8 @@ impl ConfiguredRaftEngine for RocksEngine { let raft_db_path = &config.raft_store.raftdb_path; let config_raftdb = &config.raftdb; - let mut raft_db_opts = config_raftdb.build_opt(); - raft_db_opts.set_env(env.clone()); let statistics = Arc::new(RocksStatistics::new_titan()); - raft_db_opts.set_statistics(statistics.as_ref()); + let raft_db_opts = config_raftdb.build_opt(env.clone(), Some(&statistics)); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) .expect("failed to open raftdb"); @@ -1784,8 +1782,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { if should_dump { let config_raftdb = &config.raftdb; - let mut raft_db_opts = config_raftdb.build_opt(); - raft_db_opts.set_env(env.clone()); + let raft_db_opts = config_raftdb.build_opt(env.clone(), None); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); let raftdb = engine_rocks::util::new_engine_opt( &config.raft_store.raftdb_path, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 7f81d931181..5beddf60151 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1323,10 +1323,8 @@ impl ConfiguredRaftEngine for RocksEngine { let raft_db_path = &config.raft_store.raftdb_path; let config_raftdb = &config.raftdb; - let mut raft_db_opts = config_raftdb.build_opt(); - raft_db_opts.set_env(env.clone()); let statistics = Arc::new(RocksStatistics::new_titan()); - raft_db_opts.set_statistics(statistics.as_ref()); + let raft_db_opts = config_raftdb.build_opt(env.clone(), Some(&statistics)); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) .expect("failed to open raftdb"); @@ -1376,8 +1374,7 @@ impl ConfiguredRaftEngine for RaftLogEngine { if should_dump { let config_raftdb = &config.raftdb; - let mut raft_db_opts = config_raftdb.build_opt(); - raft_db_opts.set_env(env.clone()); + let raft_db_opts = config_raftdb.build_opt(env.clone(), None); let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); let raftdb = engine_rocks::util::new_engine_opt( &config.raft_store.raftdb_path, diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 42c1d0b1882..e7818b3f888 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -328,8 +328,7 @@ pub fn create_local_engine_service( // init raft engine, either is rocksdb or raft engine if !config.raft_engine.enable { // rocksdb - let mut raft_db_opts = config.raftdb.build_opt(); - raft_db_opts.set_env(env); + let raft_db_opts = config.raftdb.build_opt(env, None); let raft_db_cf_opts = config.raftdb.build_cf_opts(factory.block_cache()); let raft_path = config .infer_raft_db_path(None) diff --git a/etc/config-template.toml b/etc/config-template.toml index 8820d2e0675..62623afed0e 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -626,6 +626,11 @@ ## RocksDB log levels # info-log-level = "info" +## Memory usage limit for Raft Engine. Undersized write buffers will be flushed to satisfy the +## requirement. +## No limit when not specified. +# write-buffer-limit = "1GB" + ## Options for `Titan`. [rocksdb.titan] ## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once @@ -848,6 +853,9 @@ ## # checksum = "crc32c" +## The maximum number of concurrent compaction tasks. 0 stands for no limit. +# max-compactions = 0 + ## Options for "Default" Column Family for `Titan`. [rocksdb.defaultcf.titan] ## The smallest value to store in blob files. Value smaller than @@ -935,6 +943,7 @@ # format-version = 2 # prepopulate-block-cache = "disabled" # checksum = "crc32c" +# max-compactions = 0 [rocksdb.lockcf] # compression-per-level = ["no", "no", "no", "no", "no", "no", "no"] @@ -959,6 +968,7 @@ # format-version = 2 # prepopulate-block-cache = "disabled" # checksum = "crc32c" +# max-compactions = 0 [raftdb] # max-background-jobs = 4 @@ -1020,6 +1030,7 @@ # format-version = 2 # prepopulate-block-cache = "disabled" # checksum = "crc32c" +# max-compactions = 0 [raft-engine] ## Determines whether to use Raft Engine to store raft logs. When it is diff --git a/src/config/mod.rs b/src/config/mod.rs index 2074c992519..8886711f948 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -29,8 +29,9 @@ use engine_rocks::{ get_env, properties::MvccPropertiesCollectorFactory, raw::{ - BlockBasedOptions, Cache, ChecksumType, CompactionPriority, DBCompactionStyle, - DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, PrepopulateBlockCache, + BlockBasedOptions, Cache, ChecksumType, CompactionPriority, ConcurrentTaskLimiter, + DBCompactionStyle, DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, + PrepopulateBlockCache, RateLimiter, WriteBufferManager, }, util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, RaftDbLogger, RangePropertiesCollectorFactory, RawMvccPropertiesCollectorFactory, @@ -361,6 +362,8 @@ macro_rules! cf_config { #[serde(with = "rocks_config::checksum_serde")] #[online_config(skip)] pub checksum: ChecksumType, + #[online_config(skip)] + pub max_compactions: u32, #[online_config(submodule)] pub titan: TitanCfConfig, } @@ -514,7 +517,13 @@ macro_rules! write_into_metrics { } macro_rules! build_cf_opt { - ($opt:ident, $cf_name:ident, $cache:expr, $region_info_provider:ident) => {{ + ( + $opt:ident, + $cf_name:ident, + $cache:expr, + $compaction_limiter:expr, + $region_info_provider:ident + ) => {{ let mut block_base_opts = BlockBasedOptions::new(); block_base_opts.set_block_size($opt.block_size.0 as usize); block_base_opts.set_no_block_cache($opt.disable_block_cache); @@ -599,10 +608,18 @@ macro_rules! build_cf_opt { warn!("compaction guard is disabled due to region info provider not available") } } + if let Some(r) = $compaction_limiter { + cf_opts.set_compaction_thread_limiter(r); + } cf_opts }}; } +pub struct CfResources { + pub cache: Cache, + pub compaction_thread_limiters: HashMap<&'static str, ConcurrentTaskLimiter>, +} + cf_config!(DefaultCfConfig); impl Default for DefaultCfConfig { @@ -661,6 +678,7 @@ impl Default for DefaultCfConfig { prepopulate_block_cache: PrepopulateBlockCache::Disabled, format_version: 2, checksum: ChecksumType::CRC32c, + max_compactions: 0, titan: TitanCfConfig::default(), } } @@ -669,12 +687,18 @@ impl Default for DefaultCfConfig { impl DefaultCfConfig { pub fn build_opt( &self, - cache: &Cache, + shared: &CfResources, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, for_engine: EngineType, ) -> RocksCfOptions { - let mut cf_opts = build_cf_opt!(self, CF_DEFAULT, cache, region_info_accessor); + let mut cf_opts = build_cf_opt!( + self, + CF_DEFAULT, + &shared.cache, + shared.compaction_thread_limiters.get(CF_DEFAULT), + region_info_accessor + ); cf_opts.set_memtable_prefix_bloom_size_ratio(bloom_filter_ratio(for_engine)); let f = RangePropertiesCollectorFactory { prop_size_index_distance: self.prop_size_index_distance, @@ -779,6 +803,7 @@ impl Default for WriteCfConfig { prepopulate_block_cache: PrepopulateBlockCache::Disabled, format_version: 2, checksum: ChecksumType::CRC32c, + max_compactions: 0, titan, } } @@ -787,11 +812,17 @@ impl Default for WriteCfConfig { impl WriteCfConfig { pub fn build_opt( &self, - cache: &Cache, + shared: &CfResources, region_info_accessor: Option<&RegionInfoAccessor>, for_engine: EngineType, ) -> RocksCfOptions { - let mut cf_opts = build_cf_opt!(self, CF_WRITE, cache, region_info_accessor); + let mut cf_opts = build_cf_opt!( + self, + CF_WRITE, + &shared.cache, + shared.compaction_thread_limiters.get(CF_WRITE), + region_info_accessor + ); // Prefix extractor(trim the timestamp at tail) for write cf. cf_opts .set_prefix_extractor( @@ -878,15 +909,22 @@ impl Default for LockCfConfig { prepopulate_block_cache: PrepopulateBlockCache::Disabled, format_version: 2, checksum: ChecksumType::CRC32c, + max_compactions: 0, titan, } } } impl LockCfConfig { - pub fn build_opt(&self, cache: &Cache, for_engine: EngineType) -> RocksCfOptions { + pub fn build_opt(&self, shared: &CfResources, for_engine: EngineType) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; - let mut cf_opts = build_cf_opt!(self, CF_LOCK, cache, no_region_info_accessor); + let mut cf_opts = build_cf_opt!( + self, + CF_LOCK, + &shared.cache, + shared.compaction_thread_limiters.get(CF_LOCK), + no_region_info_accessor + ); cf_opts .set_prefix_extractor("NoopSliceTransform", NoopSliceTransform) .unwrap(); @@ -954,15 +992,22 @@ impl Default for RaftCfConfig { prepopulate_block_cache: PrepopulateBlockCache::Disabled, format_version: 2, checksum: ChecksumType::CRC32c, + max_compactions: 0, titan, } } } impl RaftCfConfig { - pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { + pub fn build_opt(&self, shared: &CfResources) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; - let mut cf_opts = build_cf_opt!(self, CF_RAFT, cache, no_region_info_accessor); + let mut cf_opts = build_cf_opt!( + self, + CF_RAFT, + &shared.cache, + shared.compaction_thread_limiters.get(CF_RAFT), + no_region_info_accessor + ); cf_opts .set_prefix_extractor("NoopSliceTransform", NoopSliceTransform) .unwrap(); @@ -1078,6 +1123,16 @@ pub struct DbConfig { pub enable_unordered_write: bool, #[online_config(skip)] pub allow_concurrent_memtable_write: Option, + #[online_config(skip)] + pub write_buffer_limit: Option, + #[online_config(skip)] + #[doc(hidden)] + #[serde(skip_serializing)] + pub write_buffer_stall_ratio: f32, + #[online_config(skip)] + #[doc(hidden)] + #[serde(skip_serializing)] + pub write_buffer_flush_oldest_first: bool, // Dangerous option only for programming use. #[online_config(skip)] #[serde(skip)] @@ -1094,6 +1149,15 @@ pub struct DbConfig { pub titan: TitanDbConfig, } +#[derive(Clone)] +pub struct DbResources { + // DB Options. + pub env: Arc, + pub statistics: Arc, + pub rate_limiter: Option>, + pub write_buffer_manager: Option>, +} + impl Default for DbConfig { fn default() -> DbConfig { let bg_job_limits = get_background_job_limits(&KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); @@ -1134,6 +1198,9 @@ impl Default for DbConfig { enable_multi_batch_write: None, // deprecated enable_unordered_write: false, allow_concurrent_memtable_write: None, + write_buffer_limit: None, + write_buffer_stall_ratio: 0.0, + write_buffer_flush_oldest_first: false, paranoid_checks: None, defaultcf: DefaultCfConfig::default(), writecf: WriteCfConfig::default(), @@ -1157,7 +1224,33 @@ impl DbConfig { } } - pub fn build_opt(&self, stats: Option<&RocksStatistics>) -> RocksDbOptions { + pub fn build_resources(&self, env: Arc) -> DbResources { + let rate_limiter = if self.rate_bytes_per_sec.0 > 0 { + Some(Arc::new(RateLimiter::new_writeampbased_with_auto_tuned( + self.rate_bytes_per_sec.0 as i64, + (self.rate_limiter_refill_period.as_millis() * 1000) as i64, + 10, // fairness + self.rate_limiter_mode, + self.rate_limiter_auto_tuned, + ))) + } else { + None + }; + DbResources { + env, + statistics: Arc::new(RocksStatistics::new_titan()), + rate_limiter, + write_buffer_manager: self.write_buffer_limit.map(|limit| { + Arc::new(WriteBufferManager::new( + limit.0 as usize, + self.write_buffer_stall_ratio, + self.write_buffer_flush_oldest_first, + )) + }), + } + } + + pub fn build_opt(&self, shared: &DbResources) -> RocksDbOptions { let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { @@ -1173,33 +1266,11 @@ impl DbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - match stats { - Some(stats) => opts.set_statistics(stats), - None => opts.set_statistics(&RocksStatistics::new_titan()), - } opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - if self.rate_bytes_per_sec.0 > 0 { - if self.rate_limiter_auto_tuned { - opts.set_writeampbasedratelimiter_with_auto_tuned( - self.rate_bytes_per_sec.0 as i64, - (self.rate_limiter_refill_period.as_millis() * 1000) as i64, - self.rate_limiter_mode, - self.rate_limiter_auto_tuned, - ); - } else { - opts.set_ratelimiter_with_auto_tuned( - self.rate_bytes_per_sec.0 as i64, - (self.rate_limiter_refill_period.as_millis() * 1000) as i64, - self.rate_limiter_mode, - self.rate_limiter_auto_tuned, - ); - } - } - opts.set_bytes_per_sync(self.bytes_per_sync.0); opts.set_wal_bytes_per_sync(self.wal_bytes_per_sync.0); opts.set_max_subcompactions(self.max_sub_compactions); @@ -1226,12 +1297,52 @@ impl DbConfig { if self.titan.enabled { opts.set_titandb_options(&self.titan.build_opts()); } + opts.set_env(shared.env.clone()); + opts.set_statistics(&shared.statistics); + if let Some(r) = &shared.rate_limiter { + opts.set_rate_limiter(r); + } + if let Some(r) = &shared.write_buffer_manager { + opts.set_write_buffer_manager(r); + } opts } + pub fn build_cf_resources(&self, cache: Cache) -> CfResources { + let mut compaction_thread_limiters = HashMap::new(); + if self.defaultcf.max_compactions > 0 { + compaction_thread_limiters.insert( + CF_DEFAULT, + ConcurrentTaskLimiter::new(CF_DEFAULT, self.defaultcf.max_compactions), + ); + } + if self.writecf.max_compactions > 0 { + compaction_thread_limiters.insert( + CF_WRITE, + ConcurrentTaskLimiter::new(CF_WRITE, self.writecf.max_compactions), + ); + } + if self.lockcf.max_compactions > 0 { + compaction_thread_limiters.insert( + CF_LOCK, + ConcurrentTaskLimiter::new(CF_LOCK, self.lockcf.max_compactions), + ); + } + if self.raftcf.max_compactions > 0 { + compaction_thread_limiters.insert( + CF_RAFT, + ConcurrentTaskLimiter::new(CF_RAFT, self.raftcf.max_compactions), + ); + } + CfResources { + cache, + compaction_thread_limiters, + } + } + pub fn build_cf_opts( &self, - cache: &Cache, + shared: &CfResources, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, for_engine: EngineType, @@ -1240,16 +1351,16 @@ impl DbConfig { cf_opts.push(( CF_DEFAULT, self.defaultcf - .build_opt(cache, region_info_accessor, api_version, for_engine), + .build_opt(shared, region_info_accessor, api_version, for_engine), )); - cf_opts.push((CF_LOCK, self.lockcf.build_opt(cache, for_engine))); + cf_opts.push((CF_LOCK, self.lockcf.build_opt(shared, for_engine))); cf_opts.push(( CF_WRITE, self.writecf - .build_opt(cache, region_info_accessor, for_engine), + .build_opt(shared, region_info_accessor, for_engine), )); if for_engine == EngineType::RaftKv { - cf_opts.push((CF_RAFT, self.raftcf.build_opt(cache))); + cf_opts.push((CF_RAFT, self.raftcf.build_opt(shared))); } cf_opts } @@ -1367,6 +1478,7 @@ impl Default for RaftDefaultCfConfig { prepopulate_block_cache: PrepopulateBlockCache::Disabled, format_version: 2, checksum: ChecksumType::CRC32c, + max_compactions: 0, titan: TitanCfConfig::default(), } } @@ -1374,8 +1486,19 @@ impl Default for RaftDefaultCfConfig { impl RaftDefaultCfConfig { pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { + let limiter = if self.max_compactions > 0 { + Some(ConcurrentTaskLimiter::new(CF_DEFAULT, self.max_compactions)) + } else { + None + }; let no_region_info_accessor: Option<&RegionInfoAccessor> = None; - let mut cf_opts = build_cf_opt!(self, CF_DEFAULT, cache, no_region_info_accessor); + let mut cf_opts = build_cf_opt!( + self, + CF_DEFAULT, + cache, + limiter.as_ref(), + no_region_info_accessor + ); let f = FixedPrefixSliceTransform::new(region_raft_prefix_len()); cf_opts .set_memtable_insert_hint_prefix_extractor("RaftPrefixSliceTransform", f) @@ -1488,7 +1611,7 @@ impl Default for RaftDbConfig { } impl RaftDbConfig { - pub fn build_opt(&self) -> RocksDbOptions { + pub fn build_opt(&self, env: Arc, statistics: Option<&RocksStatistics>) -> RocksDbOptions { let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { @@ -1503,7 +1626,10 @@ impl RaftDbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - opts.set_statistics(&RocksStatistics::new_titan()); + match statistics { + Some(s) => opts.set_statistics(s), + None => opts.set_statistics(&RocksStatistics::new_titan()), + } opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); opts.set_max_log_file_size(self.info_log_max_size.0); @@ -1526,7 +1652,7 @@ impl RaftDbConfig { if self.titan.enabled { opts.set_titandb_options(&self.titan.build_opts()); } - + opts.set_env(env); opts } @@ -4273,6 +4399,15 @@ mod tests { tikv_cfg.validate().unwrap(); } + #[test] + fn test_rocks_rate_limit_zero() { + let mut tikv_cfg = TikvConfig::default(); + tikv_cfg.rocksdb.rate_bytes_per_sec = ReadableSize(0); + tikv_cfg + .rocksdb + .build_opt(&tikv_cfg.rocksdb.build_resources(Arc::new(Env::default()))); + } + #[test] fn test_parse_log_level() { #[derive(Serialize, Deserialize, Debug)] @@ -4433,9 +4568,13 @@ mod tests { assert_eq!(F::TAG, cfg.storage.api_version()); let engine = RocksDBEngine::new( &cfg.storage.data_dir, - Some(cfg.rocksdb.build_opt(None)), + Some( + cfg.rocksdb + .build_opt(&cfg.rocksdb.build_resources(Arc::new(Env::default()))), + ), cfg.rocksdb.build_cf_opts( - &cfg.storage.block_cache.build_shared_cache(), + &cfg.rocksdb + .build_cf_resources(cfg.storage.block_cache.build_shared_cache()), None, cfg.storage.api_version(), cfg.storage.engine, @@ -5051,6 +5190,7 @@ mod tests { #[test] fn test_compaction_guard() { let cache = Cache::new_lru_cache(LRUCacheOptions::new()); + let no_limiter: Option = None; // Test comopaction guard disabled. let config = DefaultCfConfig { target_file_size_base: ReadableSize::mb(16), @@ -5058,7 +5198,7 @@ mod tests { ..Default::default() }; let provider = Some(MockRegionInfoProvider::new(vec![])); - let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, provider); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, no_limiter.as_ref(), provider); assert_eq!( config.target_file_size_base.0, cf_opts.get_target_file_size_base() @@ -5071,7 +5211,7 @@ mod tests { ..Default::default() }; let provider: Option = None; - let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, provider); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, no_limiter.as_ref(), provider); assert_eq!( config.target_file_size_base.0, cf_opts.get_target_file_size_base() @@ -5086,7 +5226,7 @@ mod tests { ..Default::default() }; let provider = Some(MockRegionInfoProvider::new(vec![])); - let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, provider); + let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, no_limiter.as_ref(), provider); assert_eq!( config.compaction_guard_max_output_file_size.0, cf_opts.get_target_file_size_base() @@ -5391,6 +5531,7 @@ mod tests { cfg.memory_usage_limit = None; cfg.raft_engine.mut_config().memory_limit = None; cfg.coprocessor_v2.coprocessor_plugin_directory = None; // Default is `None`, which is represented by not setting the key. + cfg.rocksdb.write_buffer_limit = None; cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger = None; cfg.rocksdb.defaultcf.level0_stop_writes_trigger = None; cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = None; diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 2c31c9522b1..91b5178f8a0 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -16,19 +16,18 @@ use raftstore::RegionInfoAccessor; use tikv_util::worker::Scheduler; use crate::{ - config::{DbConfig, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}, + config::{CfResources, DbConfig, DbResources, TikvConfig, DEFAULT_ROCKSDB_SUB_DIR}, storage::config::EngineType, }; struct FactoryInner { - env: Arc, region_info_accessor: Option, - block_cache: Cache, rocksdb_config: Arc, api_version: ApiVersion, flow_listener: Option, sst_recovery_sender: Option>, - statistics: Arc, + db_resources: DbResources, + cf_resources: CfResources, state_storage: Option>, lite: bool, } @@ -40,17 +39,15 @@ pub struct KvEngineFactoryBuilder { impl KvEngineFactoryBuilder { pub fn new(env: Arc, config: &TikvConfig, cache: Cache) -> Self { - let statistics = Arc::new(RocksStatistics::new_titan()); Self { inner: FactoryInner { - env, region_info_accessor: None, - block_cache: cache, rocksdb_config: Arc::new(config.rocksdb.clone()), api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, - statistics, + db_resources: config.rocksdb.build_resources(env), + cf_resources: config.rocksdb.build_cf_resources(cache), state_storage: None, lite: false, }, @@ -134,7 +131,7 @@ impl KvEngineFactory { } pub fn rocks_statistics(&self) -> Arc { - self.inner.statistics.clone() + self.inner.db_resources.statistics.clone() } fn db_opts(&self) -> RocksDbOptions { @@ -142,8 +139,7 @@ impl KvEngineFactory { let mut db_opts = self .inner .rocksdb_config - .build_opt(Some(self.inner.statistics.as_ref())); - db_opts.set_env(self.inner.env.clone()); + .build_opt(&self.inner.db_resources); if !self.inner.lite { db_opts.add_event_listener(RocksEventListener::new( "kv", @@ -158,7 +154,7 @@ impl KvEngineFactory { fn cf_opts(&self, for_engine: EngineType) -> Vec<(&str, RocksCfOptions)> { self.inner.rocksdb_config.build_cf_opts( - &self.inner.block_cache, + &self.inner.cf_resources, self.inner.region_info_accessor.as_ref(), self.inner.api_version, for_engine, @@ -166,7 +162,7 @@ impl KvEngineFactory { } pub fn block_cache(&self) -> &Cache { - &self.inner.block_cache + &self.inner.cf_resources.cache } /// Create a shared db. diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index 12a7776e434..d15a33742ba 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -96,7 +96,7 @@ impl TestEngineBuilder { if !enable_block_cache { cache_opt.capacity = Some(ReadableSize::kb(0)); } - let cache = cache_opt.build_shared_cache(); + let shared = cfg_rocksdb.build_cf_resources(cache_opt.build_shared_cache()); let cfs_opts = cfs .iter() .map(|cf| match *cf { @@ -104,19 +104,19 @@ impl TestEngineBuilder { CF_DEFAULT, cfg_rocksdb .defaultcf - .build_opt(&cache, None, api_version, EngineType::RaftKv), + .build_opt(&shared, None, api_version, EngineType::RaftKv), ), CF_LOCK => ( CF_LOCK, - cfg_rocksdb.lockcf.build_opt(&cache, EngineType::RaftKv), + cfg_rocksdb.lockcf.build_opt(&shared, EngineType::RaftKv), ), CF_WRITE => ( CF_WRITE, cfg_rocksdb .writecf - .build_opt(&cache, None, EngineType::RaftKv), + .build_opt(&shared, None, EngineType::RaftKv), ), - CF_RAFT => (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), + CF_RAFT => (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&shared)), _ => (*cf, RocksCfOptions::default()), }) .collect(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 60e9b965c5d..802b0507849 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4136,12 +4136,13 @@ mod tests { let engine = { let path = "".to_owned(); let cfg_rocksdb = db_config; - let cache = BlockCacheConfig::default().build_shared_cache(); + let shared = + cfg_rocksdb.build_cf_resources(BlockCacheConfig::default().build_shared_cache()); let cfs_opts = vec![ ( CF_DEFAULT, cfg_rocksdb.defaultcf.build_opt( - &cache, + &shared, None, ApiVersion::V1, EngineType::RaftKv, @@ -4149,15 +4150,15 @@ mod tests { ), ( CF_LOCK, - cfg_rocksdb.lockcf.build_opt(&cache, EngineType::RaftKv), + cfg_rocksdb.lockcf.build_opt(&shared, EngineType::RaftKv), ), ( CF_WRITE, cfg_rocksdb .writecf - .build_opt(&cache, None, EngineType::RaftKv), + .build_opt(&shared, None, EngineType::RaftKv), ), - (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&cache)), + (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&shared)), ]; RocksEngine::new( &path, None, cfs_opts, None, // io_rate_limiter diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 4e22463503a..c6f8e565218 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -312,6 +312,9 @@ fn test_serde_custom_tikv_config() { paranoid_checks: None, allow_concurrent_memtable_write: Some(false), enable_unordered_write: true, + write_buffer_limit: Some(ReadableSize::gb(1)), + write_buffer_stall_ratio: 0.0, + write_buffer_flush_oldest_first: false, defaultcf: DefaultCfConfig { block_size: ReadableSize::kb(12), block_cache_size: ReadableSize::gb(12), @@ -365,6 +368,7 @@ fn test_serde_custom_tikv_config() { prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, format_version: 5, checksum: ChecksumType::XXH3, + max_compactions: 3, }, writecf: WriteCfConfig { block_size: ReadableSize::kb(12), @@ -433,6 +437,7 @@ fn test_serde_custom_tikv_config() { prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, format_version: 5, checksum: ChecksumType::XXH3, + max_compactions: 3, }, lockcf: LockCfConfig { block_size: ReadableSize::kb(12), @@ -501,6 +506,7 @@ fn test_serde_custom_tikv_config() { prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, format_version: 5, checksum: ChecksumType::XXH3, + max_compactions: 3, }, raftcf: RaftCfConfig { block_size: ReadableSize::kb(12), @@ -569,6 +575,7 @@ fn test_serde_custom_tikv_config() { prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, format_version: 5, checksum: ChecksumType::XXH3, + max_compactions: 3, }, titan: titan_db_config.clone(), }; @@ -652,6 +659,7 @@ fn test_serde_custom_tikv_config() { prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, format_version: 5, checksum: ChecksumType::XXH3, + max_compactions: 3, }, titan: titan_db_config, }; diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 900e1c36aa6..b096437e60c 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -271,6 +271,7 @@ enable-pipelined-write = false enable-multi-batch-write = true enable-unordered-write = true allow-concurrent-memtable-write = false +write-buffer-limit = "1GB" [rocksdb.titan] enabled = true @@ -331,6 +332,7 @@ compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" format-version = 5 checksum = "xxh3" +max-compactions = 3 [rocksdb.defaultcf.titan] min-blob-size = "2018B" @@ -393,6 +395,7 @@ compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" format-version = 5 checksum = "xxh3" +max-compactions = 3 [rocksdb.lockcf] block-size = "12KB" @@ -442,6 +445,7 @@ compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" format-version = 5 checksum = "xxh3" +max-compactions = 3 [rocksdb.raftcf] block-size = "12KB" @@ -491,6 +495,7 @@ compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" format-version = 5 checksum = "xxh3" +max-compactions = 3 [raftdb] wal-recovery-mode = "skip-any-corrupted-records" @@ -572,6 +577,7 @@ compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" format-version = 5 checksum = "xxh3" +max-compactions = 3 [raftdb.defaultcf.titan] min-blob-size = "2018B" diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 412f9f5a777..452bcc89238 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -159,10 +159,15 @@ fn test_delete_files_in_range_for_titan() { cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize(0); cfg.rocksdb.defaultcf.titan.discardable_ratio = 0.4; cfg.rocksdb.defaultcf.titan.min_blob_size = ReadableSize(0); - let kv_db_opts = cfg.rocksdb.build_opt(None); - let kv_cfs_opts = - cfg.rocksdb - .build_cf_opts(&cache, None, cfg.storage.api_version(), cfg.storage.engine); + let kv_db_opts = cfg + .rocksdb + .build_opt(&cfg.rocksdb.build_resources(Default::default())); + let kv_cfs_opts = cfg.rocksdb.build_cf_opts( + &cfg.rocksdb.build_cf_resources(cache), + None, + cfg.storage.api_version(), + cfg.storage.engine, + ); let raft_path = path.path().join(Path::new("titan")); let engines = Engines::new( From 77c21995488a702a2276c1f1b472bb68c8b85bc4 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 27 Dec 2022 20:16:16 +0800 Subject: [PATCH 0433/1149] raftstore-v2: gc tablets (#13974) ref tikv/tikv#12842 Signed-off-by: tabokie Signed-off-by: Xinye Tao --- components/engine_panic/src/compact.rs | 11 +- components/engine_panic/src/misc.rs | 8 + components/engine_rocks/src/compact.rs | 14 +- components/engine_rocks/src/file_system.rs | 2 +- components/engine_rocks/src/misc.rs | 12 +- .../engine_rocks_helper/src/sst_recovery.rs | 3 +- components/engine_traits/src/compact.rs | 26 +- components/engine_traits/src/misc.rs | 4 + components/engine_traits/src/tablet.rs | 24 +- components/raftstore-v2/src/batch/store.rs | 11 +- .../operation/command/admin/compact_log.rs | 13 +- .../src/operation/command/admin/split.rs | 19 ++ .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/operation/ready/snapshot.rs | 3 +- components/raftstore-v2/src/raft/peer.rs | 47 +++- components/raftstore-v2/src/raft/storage.rs | 2 +- components/raftstore-v2/src/worker/mod.rs | 1 + .../raftstore-v2/src/worker/tablet_gc.rs | 227 ++++++++++++++++++ .../raftstore/src/store/async_io/write.rs | 8 +- .../raftstore/src/store/compaction_guard.rs | 2 +- .../raftstore/src/store/worker/compact.rs | 2 +- components/test_raftstore/src/cluster.rs | 3 +- src/config/mod.rs | 6 + src/storage/mvcc/reader/reader.rs | 2 +- 24 files changed, 393 insertions(+), 59 deletions(-) create mode 100644 components/raftstore-v2/src/worker/tablet_gc.rs diff --git a/components/engine_panic/src/compact.rs b/components/engine_panic/src/compact.rs index f1e78d57010..988bec790de 100644 --- a/components/engine_panic/src/compact.rs +++ b/components/engine_panic/src/compact.rs @@ -13,7 +13,7 @@ impl CompactExt for PanicEngine { panic!() } - fn compact_range( + fn compact_range_cf( &self, cf: &str, start_key: Option<&[u8]>, @@ -24,15 +24,6 @@ impl CompactExt for PanicEngine { panic!() } - fn compact_files_in_range( - &self, - start: Option<&[u8]>, - end: Option<&[u8]>, - output_level: Option, - ) -> Result<()> { - panic!() - } - fn compact_files_in_range_cf( &self, cf: &str, diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 561d2892ca9..5e6fbe87267 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -64,10 +64,18 @@ impl MiscExt for PanicEngine { panic!() } + fn pause_background_work(&self) -> Result<()> { + panic!() + } + fn exists(path: &str) -> bool { panic!() } + fn locked(path: &str) -> Result { + panic!() + } + fn dump_stats(&self) -> Result { panic!() } diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index b9e3e5fe558..199b7d9f3be 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -24,7 +24,7 @@ impl CompactExt for RocksEngine { Ok(false) } - fn compact_range( + fn compact_range_cf( &self, cf: &str, start_key: Option<&[u8]>, @@ -43,18 +43,6 @@ impl CompactExt for RocksEngine { Ok(()) } - fn compact_files_in_range( - &self, - start: Option<&[u8]>, - end: Option<&[u8]>, - output_level: Option, - ) -> Result<()> { - for cf_name in self.cf_names() { - self.compact_files_in_range_cf(cf_name, start, end, output_level)?; - } - Ok(()) - } - fn compact_files_in_range_cf( &self, cf: &str, diff --git a/components/engine_rocks/src/file_system.rs b/components/engine_rocks/src/file_system.rs index 5fc0ed7f6e2..b470237f313 100644 --- a/components/engine_rocks/src/file_system.rs +++ b/components/engine_rocks/src/file_system.rs @@ -92,7 +92,7 @@ mod tests { assert!(stats.fetch(IoType::Flush, IoOp::Write) > value_size * 2); assert!(stats.fetch(IoType::Flush, IoOp::Write) < value_size * 2 + amplification_bytes); stats.reset(); - db.compact_range( + db.compact_range_cf( CF_DEFAULT, None, // start_key None, // end_key false, // exclusive_manual diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 75b193bdcf9..55546869272 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -273,10 +273,20 @@ impl MiscExt for RocksEngine { self.as_inner().sync_wal().map_err(r2e) } + fn pause_background_work(&self) -> Result<()> { + self.as_inner().pause_bg_work(); + Ok(()) + } + fn exists(path: &str) -> bool { crate::util::db_exist(path) } + fn locked(path: &str) -> Result { + let env = rocksdb::Env::default(); + env.is_db_locked(path).map_err(r2e) + } + fn dump_stats(&self) -> Result { const ROCKSDB_DB_STATS_KEY: &str = "rocksdb.dbstats"; const ROCKSDB_CF_STATS_KEY: &str = "rocksdb.cfstats"; @@ -659,7 +669,7 @@ mod tests { ]; assert_eq!(sst_range, expected); - db.compact_range(cf, None, None, false, 1).unwrap(); + db.compact_range_cf(cf, None, None, false, 1).unwrap(); let sst_range = db.get_sst_key_ranges(cf, 0).unwrap(); assert_eq!(sst_range.len(), 0); let sst_range = db.get_sst_key_ranges(cf, 1).unwrap(); diff --git a/components/engine_rocks_helper/src/sst_recovery.rs b/components/engine_rocks_helper/src/sst_recovery.rs index 7a820e6a79b..85fb8d74bee 100644 --- a/components/engine_rocks_helper/src/sst_recovery.rs +++ b/components/engine_rocks_helper/src/sst_recovery.rs @@ -227,7 +227,8 @@ mod tests { db.put(b"z2", b"val").unwrap(); db.put(b"z7", b"val").unwrap(); // generate SST file. - db.compact_range(CF_DEFAULT, None, None, false, 1).unwrap(); + db.compact_range_cf(CF_DEFAULT, None, None, false, 1) + .unwrap(); let files = db.as_inner().get_live_files(); assert_eq!(files.get_smallestkey(0), b"z2"); diff --git a/components/engine_traits/src/compact.rs b/components/engine_traits/src/compact.rs index 8dd1cc7d9b4..05590a1ff32 100644 --- a/components/engine_traits/src/compact.rs +++ b/components/engine_traits/src/compact.rs @@ -4,17 +4,30 @@ use std::collections::BTreeMap; -use crate::errors::Result; +use crate::{errors::Result, CfNamesExt}; -pub trait CompactExt { +pub trait CompactExt: CfNamesExt { type CompactedEvent: CompactedEvent; /// Checks whether any column family sets `disable_auto_compactions` to /// `True` or not. fn auto_compactions_is_disabled(&self) -> Result; - /// Compacts the column families in the specified range by manual or not. fn compact_range( + &self, + start_key: Option<&[u8]>, + end_key: Option<&[u8]>, + exclusive_manual: bool, + max_subcompactions: u32, + ) -> Result<()> { + for cf in self.cf_names() { + self.compact_range_cf(cf, start_key, end_key, exclusive_manual, max_subcompactions)?; + } + Ok(()) + } + + /// Compacts the column families in the specified range by manual or not. + fn compact_range_cf( &self, cf: &str, start_key: Option<&[u8]>, @@ -32,7 +45,12 @@ pub trait CompactExt { start: Option<&[u8]>, end: Option<&[u8]>, output_level: Option, - ) -> Result<()>; + ) -> Result<()> { + for cf in self.cf_names() { + self.compact_files_in_range_cf(cf, start, end, output_level)?; + } + Ok(()) + } /// Compacts files in the range and above the output level of the given /// column family. Compacts all files to the bottommost level if the diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index a7679256f21..d9a07a1a915 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -97,9 +97,13 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn sync_wal(&self) -> Result<()>; + fn pause_background_work(&self) -> Result<()>; + /// Check whether a database exists at a given path fn exists(path: &str) -> bool; + fn locked(path: &str) -> Result; + /// Dump stats about the database into a string. /// /// For debugging. The format and content is unspecified. diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 46b020cf138..64459bbc7ee 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -69,6 +69,19 @@ impl CachedTablet { } self.cache() } + + /// Returns how many versions has passed. + #[inline] + pub fn refresh(&mut self) -> u64 { + let old_version = self.version; + if self.latest.version.load(Ordering::Relaxed) > old_version { + let latest_data = self.latest.data.lock().unwrap(); + self.version = self.latest.version.load(Ordering::Relaxed); + self.cache = latest_data.clone(); + return self.version - old_version; + } + 0 + } } /// Context to be passed to `TabletFactory`. @@ -178,7 +191,6 @@ impl TabletFactory for SingletonFactory { struct TabletRegistryInner { // region_id, suffix -> tablet tablets: Mutex>>, - tombstone: Mutex>, factory: Box>, root: PathBuf, } @@ -197,9 +209,6 @@ impl Clone for TabletRegistry { } } -unsafe impl Send for TabletRegistry {} -unsafe impl Sync for TabletRegistry {} - impl TabletRegistry { pub fn new(factory: Box>, path: impl Into) -> Result { let root = path.into(); @@ -209,7 +218,6 @@ impl TabletRegistry { tablets: Mutex::new(HashMap::default()), factory, root, - tombstone: Mutex::default(), }), }) } @@ -273,17 +281,13 @@ impl TabletRegistry { ctx.suffix ))); } + // TODO: use compaction filter to trim range. let tablet = self.tablets.factory.open_tablet(ctx, &path)?; let mut cached = self.get_or_default(id); cached.set(tablet); Ok(cached) } - /// Destroy the tablet and its data - pub fn mark_tombstone(&self, id: u64, suffix: u64) { - self.tablets.tombstone.lock().unwrap().push((id, suffix)); - } - /// Loop over all opened tablets. Note, it's possible that the visited /// tablet is not the latest one. If latest one is required, you may /// either: diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index bcfa6ca0771..38ce4296c03 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -45,7 +45,7 @@ use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::pd, + worker::{pd, tablet_gc}, Error, Result, }; @@ -363,6 +363,7 @@ where pub struct Schedulers { pub read: Scheduler>, pub pd: Scheduler, + pub tablet_gc: Scheduler>, pub write: WriteSenders, // Following is not maintained by raftstore itself. @@ -375,6 +376,7 @@ struct Workers { /// Worker for fetching raft logs asynchronously async_read: Worker, pd: LazyWorker, + tablet_gc_worker: Worker, async_write: StoreWriters, // Following is not maintained by raftstore itself. @@ -386,6 +388,7 @@ impl Workers { Self { async_read: Worker::new("async-read-worker"), pd, + tablet_gc_worker: Worker::new("tablet-gc-worker"), async_write: StoreWriters::default(), background, } @@ -461,9 +464,15 @@ impl StoreSystem { ), ); + let tablet_gc_scheduler = workers.tablet_gc_worker.start( + "tablet-gc-worker", + tablet_gc::Runner::new(tablet_registry.clone(), self.logger.clone()), + ); + let schedulers = Schedulers { read: read_scheduler, pd: workers.pd.scheduler(), + tablet_gc: tablet_gc_scheduler, write: workers.async_write.senders(), split_check: split_check_scheduler, }; diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index af8fb5acc47..aaf067aa585 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -17,7 +17,7 @@ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}; use protobuf::Message; use raftstore::{ - store::{fsm::new_admin_request, needs_evict_entry_cache, Transport}, + store::{fsm::new_admin_request, needs_evict_entry_cache, Transport, WriteTask}, Result, }; use slog::{debug, error, info}; @@ -29,6 +29,7 @@ use crate::{ operation::AdminCmdResult, raft::{Apply, Peer}, router::{CmdResChannel, PeerTick}, + worker::tablet_gc, }; impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { @@ -256,12 +257,14 @@ impl Peer { &mut self, store_ctx: &mut StoreContext, old_persisted: u64, + task: &mut WriteTask, ) { let new_persisted = self.storage().apply_trace().persisted_apply_index(); if old_persisted < new_persisted { + let region_id = self.region_id(); // TODO: batch it. if let Err(e) = store_ctx.engine.delete_all_but_one_states_before( - self.region_id(), + region_id, new_persisted, self.state_changes_mut(), ) { @@ -270,6 +273,12 @@ impl Peer { self.set_has_extra_write(); } self.maybe_compact_log_from_engine(store_ctx, Either::Left(old_persisted)); + if self.remove_tombstone_tablets_before(new_persisted) { + let sched = store_ctx.schedulers.tablet_gc.clone(); + task.persisted_cbs.push(Box::new(move || { + let _ = sched.schedule(tablet_gc::Task::destroy(region_id, new_persisted)); + })) + } } } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 2154eb20e90..386528070e2 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -59,6 +59,7 @@ use crate::{ operation::AdminCmdResult, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, + worker::tablet_gc, Error, }; @@ -439,6 +440,15 @@ impl Peer { self.add_pending_tick(PeerTick::SplitRegionCheck); } + self.record_tablet_as_tombstone_and_refresh(res.tablet_index, store_ctx); + let _ = store_ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::trim( + self.tablet().unwrap().clone(), + derived, + )); + let last_region_id = res.regions.last().unwrap().get_id(); let mut new_ids = HashSet::default(); for (new_region, locks) in res.regions.into_iter().zip(region_locks) { @@ -491,6 +501,8 @@ impl Peer { ) { let region_id = split_init.region.id; if self.storage().is_initialized() && self.persisted_index() >= RAFT_INIT_LOG_INDEX { + // Race with split operation. The tablet created by split will eventually be + // deleted (TODO). We don't trim it. let _ = store_ctx .router .force_send(split_init.source_id, PeerMsg::SplitInitFinish(region_id)); @@ -532,6 +544,13 @@ impl Peer { store_ctx: &mut StoreContext, split_init: Box, ) { + let _ = store_ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::trim( + self.tablet().unwrap().clone(), + self.region(), + )); if split_init.source_leader && self.leader_id() == INVALID_ID && self.term() == RAFT_INIT_LOG_TERM diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 14010fc9fe2..66d9755c1df 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -383,7 +383,7 @@ impl Peer { self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); - self.on_advance_persisted_apply_index(ctx, prev_persisted); + self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); if !ready.persisted_messages().is_empty() { write_task.messages = ready diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index e1a36ed8ec7..7a6e00aec4f 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -162,6 +162,7 @@ impl Peer { // Use a new FlushState to avoid conflicts with the old one. tablet_ctx.flush_state = Some(flush_state); ctx.tablet_registry.load(tablet_ctx, false).unwrap(); + self.record_tablet_as_tombstone_and_refresh(persisted_index, ctx); self.schedule_apply_fsm(ctx); self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(persisted_index); @@ -504,7 +505,7 @@ impl Storage { let _ = fs::remove_dir_all(path); } }; - task.persisted_cb = Some(Box::new(hook)); + task.persisted_cbs.push(Box::new(hook)); task.has_snapshot = true; Ok(()) } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index ca5aafa3bfb..4fbc7e9874e 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -33,6 +33,7 @@ use crate::{ AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, }, router::{CmdResChannel, PeerTick, QueryResChannel}, + worker::tablet_gc, Result, }; @@ -42,6 +43,11 @@ const REGION_READ_PROGRESS_CAP: usize = 128; pub struct Peer { raft_group: RawNode>, tablet: CachedTablet, + /// Tombstone tablets can only be destroyed when the tablet that replaces it + /// is persisted. This is a list of tablet index that awaits to be + /// persisted. When persisted_apply is advanced, we need to notify tablet_gc + /// worker to destroy them. + pending_tombstone_tablets: Vec, /// Statistics for self. self_stat: PeerStat, @@ -126,7 +132,6 @@ impl Peer { let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); - let cached_tablet = tablet_registry.get_or_default(region_id); let flush_state: Arc = Arc::default(); // We can't create tablet if tablet index is 0. It can introduce race when gc // old tablet and create new peer. We also can't get the correct range of the @@ -137,10 +142,12 @@ impl Peer { // TODO: Perhaps we should stop create the tablet automatically. tablet_registry.load(ctx, false)?; } + let cached_tablet = tablet_registry.get_or_default(region_id); let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { tablet: cached_tablet, + pending_tombstone_tablets: Vec::new(), self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), @@ -328,13 +335,43 @@ impl Peer { } #[inline] - pub fn tablet(&self) -> &CachedTablet { - &self.tablet + pub fn tablet(&mut self) -> Option<&EK> { + self.tablet.latest() + } + + #[inline] + pub fn record_tablet_as_tombstone_and_refresh( + &mut self, + new_tablet_index: u64, + ctx: &StoreContext, + ) { + if let Some(old_tablet) = self.tablet.cache() { + self.pending_tombstone_tablets.push(new_tablet_index); + let _ = ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::prepare_destroy( + old_tablet.clone(), + self.region_id(), + new_tablet_index, + )); + } + // TODO: Handle race between split and snapshot. So that we can assert + // `self.tablet.refresh() == 1` + assert!(self.tablet.refresh() > 0); } + /// Returns if there's any tombstone being removed. #[inline] - pub fn tablet_mut(&mut self) -> &mut CachedTablet { - &mut self.tablet + pub fn remove_tombstone_tablets_before(&mut self, persisted: u64) -> bool { + let mut removed = 0; + while let Some(i) = self.pending_tombstone_tablets.first() + && *i <= persisted + { + removed += 1; + } + self.pending_tombstone_tablets.drain(..removed); + removed > 0 } #[inline] diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 959f817ebd7..636970c0ad1 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -393,7 +393,7 @@ mod tests { assert_ne!(10, s.entry_storage().applied_index()); assert_ne!(1, s.entry_storage().applied_term()); assert_eq!(10, s.region_state().get_tablet_index()); - assert!(task.persisted_cb.is_some()); + assert!(!task.persisted_cbs.is_empty()); s.on_applied_snapshot(); assert_eq!(10, s.entry_storage().applied_index()); diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index 3d4e69fdcf6..6fafd01df85 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,3 +1,4 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. pub mod pd; +pub mod tablet_gc; diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs new file mode 100644 index 00000000000..cc1fcd971e9 --- /dev/null +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -0,0 +1,227 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::{self, Display, Formatter}, + path::{Path, PathBuf}, + time::Duration, +}; + +use collections::HashMap; +use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry}; +use kvproto::metapb::Region; +use slog::{error, warn, Logger}; +use tikv_util::worker::{Runnable, RunnableWithTimer}; + +pub enum Task { + Trim { + tablet: EK, + start_key: Box<[u8]>, + end_key: Box<[u8]>, + }, + PrepareDestroy { + tablet: EK, + region_id: u64, + wait_for_persisted: u64, + }, + Destroy { + region_id: u64, + persisted_index: u64, + }, +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match *self { + Task::Trim { + ref start_key, + ref end_key, + .. + } => write!( + f, + "trim tablet for start_key {}, end_key {}", + log_wrappers::Value::key(start_key), + log_wrappers::Value::key(end_key), + ), + Task::PrepareDestroy { + region_id, + wait_for_persisted, + .. + } => write!( + f, + "prepare destroy tablet for region_id {}, wait_for_persisted {}", + region_id, wait_for_persisted, + ), + Task::Destroy { + region_id, + persisted_index, + } => write!( + f, + "destroy tablet for region_id {} persisted_index {}", + region_id, persisted_index, + ), + } + } +} + +impl Task { + #[inline] + pub fn trim(tablet: EK, region: &Region) -> Self { + Task::Trim { + tablet, + start_key: region.get_start_key().into(), + end_key: region.get_end_key().into(), + } + } + + #[inline] + pub fn prepare_destroy(tablet: EK, region_id: u64, wait_for_persisted: u64) -> Self { + Task::PrepareDestroy { + tablet, + region_id, + wait_for_persisted, + } + } + + #[inline] + pub fn destroy(region_id: u64, persisted_index: u64) -> Self { + Task::Destroy { + region_id, + persisted_index, + } + } +} + +pub struct Runner { + tablet_registry: TabletRegistry, + logger: Logger, + + // region_id -> [(tablet_path, wait_for_persisted)]. + waiting_destroy_tasks: HashMap>, + pending_destroy_tasks: Vec, +} + +impl Runner { + pub fn new(tablet_registry: TabletRegistry, logger: Logger) -> Self { + Self { + tablet_registry, + logger, + waiting_destroy_tasks: HashMap::default(), + pending_destroy_tasks: Vec::new(), + } + } + + fn trim(tablet: &EK, start_key: &[u8], end_key: &[u8]) -> engine_traits::Result<()> { + let start_key = keys::data_key(start_key); + let end_key = keys::data_end_key(end_key); + let range1 = Range::new(&[], &start_key); + let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); + tablet.delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[range1, range2])?; + // TODO: Avoid this after compaction filter is ready. + tablet.delete_ranges_cfs(DeleteStrategy::DeleteByRange, &[range1, range2])?; + for r in [range1, range2] { + tablet.compact_range(Some(r.start_key), Some(r.end_key), false, 1)?; + } + Ok(()) + } + + fn prepare_destroy(&mut self, region_id: u64, tablet: EK, wait_for_persisted: u64) { + let _ = tablet.pause_background_work(); + self.waiting_destroy_tasks + .entry(region_id) + .or_default() + .push((PathBuf::from(tablet.path()), wait_for_persisted)); + } + + fn destroy(&mut self, region_id: u64, persisted: u64) { + if let Some(v) = self.waiting_destroy_tasks.get_mut(®ion_id) { + v.retain(|(path, wait)| { + if *wait <= persisted { + if !Self::process_destroy_task(&self.logger, &self.tablet_registry, path) { + self.pending_destroy_tasks.push(path.clone()); + } + return false; + } + true + }); + } + } + + /// Returns true if task is consumed. Failure is considered consumed. + fn process_destroy_task(logger: &Logger, registry: &TabletRegistry, path: &Path) -> bool { + match EK::locked(path.to_str().unwrap()) { + Err(e) => warn!( + logger, + "failed to check whether the tablet path is locked"; + "err" => ?e, + "path" => path.display(), + ), + Ok(false) => { + // TODO: use a meaningful table context. + let _ = registry + .tablet_factory() + .destroy_tablet(TabletContext::with_infinite_region(0, None), path) + .map_err(|e| { + warn!( + logger, + "failed to destroy tablet"; + "err" => ?e, + "path" => path.display(), + ) + }); + return true; + } + _ => {} + } + false + } +} + +impl Runnable for Runner +where + EK: KvEngine, +{ + type Task = Task; + + fn run(&mut self, task: Task) { + match task { + Task::Trim { + tablet, + start_key, + end_key, + } => { + if let Err(e) = Self::trim(&tablet, &start_key, &end_key) { + error!( + self.logger, + "failed to trim tablet"; + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + "err" => %e, + ); + } + } + Task::PrepareDestroy { + region_id, + tablet, + wait_for_persisted, + } => self.prepare_destroy(region_id, tablet, wait_for_persisted), + Task::Destroy { + region_id, + persisted_index, + } => self.destroy(region_id, persisted_index), + } + } +} + +impl RunnableWithTimer for Runner +where + EK: KvEngine, +{ + fn on_timeout(&mut self) { + self.pending_destroy_tasks + .retain(|task| !Self::process_destroy_task(&self.logger, &self.tablet_registry, task)); + } + + fn get_interval(&self) -> Duration { + Duration::from_secs(2) + } +} diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 9b13ce6af9b..b8cf6006dee 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -185,7 +185,7 @@ where pub send_time: Instant, pub raft_wb: Option, // called after writing to kvdb and raftdb. - pub persisted_cb: Option>, + pub persisted_cbs: Vec>, pub entries: Vec, pub cut_logs: Option<(u64, u64)>, pub raft_state: Option, @@ -213,7 +213,7 @@ where extra_write: ExtraWrite::None, messages: vec![], trackers: vec![], - persisted_cb: None, + persisted_cbs: Vec::new(), has_snapshot: false, } } @@ -419,9 +419,9 @@ where ); } } - if let Some(v) = task.persisted_cb.take() { + for v in task.persisted_cbs.drain(..) { self.persisted_cbs.push(v); - }; + } self.tasks.push(task); } diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index d43e33a4e08..efee09be906 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -455,7 +455,7 @@ mod tests { db.put(b"zc5", &value).unwrap(); db.put(b"zc6", &value).unwrap(); db.flush_cfs(&[], true /* wait */).unwrap(); - db.compact_range( + db.compact_range_cf( CF_DEFAULT, None, // start_key None, // end_key false, // exclusive_manual diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index 958da2adaa6..7bc7052b277 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -108,7 +108,7 @@ where .start_coarse_timer(); box_try!( self.engine - .compact_range(cf_name, start_key, end_key, false, 1 /* threads */,) + .compact_range_cf(cf_name, start_key, end_key, false, 1 /* threads */,) ); compact_range_timer.observe_duration(); info!( diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 833e8131746..b2330e26f93 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -308,7 +308,8 @@ impl Cluster { pub fn compact_data(&self) { for engine in self.engines.values() { let db = &engine.kv; - db.compact_range(CF_DEFAULT, None, None, false, 1).unwrap(); + db.compact_range_cf(CF_DEFAULT, None, None, false, 1) + .unwrap(); } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8886711f948..6ed8da3f111 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -98,6 +98,8 @@ pub const MIN_BLOCK_CACHE_SHARD_SIZE: usize = 128 * MIB as usize; /// Maximum of 15% of system memory can be used by Raft Engine. Normally its /// memory usage is much smaller than that. const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; +/// Tentative value. +const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.25; const LOCKCF_MIN_MEM: usize = 256 * MIB as usize; const LOCKCF_MAX_MEM: usize = GIB as usize; @@ -1220,6 +1222,10 @@ impl DbConfig { EngineType::RaftKv2 => { self.enable_multi_batch_write.get_or_insert(false); self.allow_concurrent_memtable_write.get_or_insert(false); + let total_mem = SysQuota::memory_limit_in_bytes() as f64; + self.write_buffer_limit.get_or_insert(ReadableSize( + (total_mem * WRITE_BUFFER_MEMORY_LIMIT_RATE) as u64, + )); } } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 4847dbb8428..e530cc56577 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -1028,7 +1028,7 @@ pub mod tests { pub fn compact(&mut self) { for cf in ALL_CFS { - self.db.compact_range(cf, None, None, false, 1).unwrap(); + self.db.compact_range_cf(cf, None, None, false, 1).unwrap(); } } } From 0404a7c180dd052da1caccc35939a230f5106a37 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 27 Dec 2022 20:38:16 +0800 Subject: [PATCH 0434/1149] raftstore-v2: purge raft engine (#13993) ref tikv/tikv#12842 Implement periodical purge in v2. Signed-off-by: tabokie Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 33 +++++++++++++++++-- components/raftstore-v2/src/fsm/peer.rs | 3 +- .../operation/command/admin/compact_log.rs | 14 +++++--- components/raftstore-v2/src/router/message.rs | 1 + components/tikv_util/src/lib.rs | 1 + components/tikv_util/src/worker/pool.rs | 10 ++++-- 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 38ce4296c03..bd37a6d4e07 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -28,7 +28,7 @@ use raftstore::{ SplitCheckRunner, SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteSenders, }, }; -use slog::Logger; +use slog::{warn, Logger}; use tikv_util::{ box_err, config::{Tracker, VersionTrack}, @@ -378,18 +378,20 @@ struct Workers { pd: LazyWorker, tablet_gc_worker: Worker, async_write: StoreWriters, + purge: Option, // Following is not maintained by raftstore itself. background: Worker, } impl Workers { - fn new(background: Worker, pd: LazyWorker) -> Self { + fn new(background: Worker, pd: LazyWorker, purge: Option) -> Self { Self { async_read: Worker::new("async-read-worker"), pd, tablet_gc_worker: Worker::new("tablet-gc-worker"), async_write: StoreWriters::default(), + purge, background, } } @@ -433,7 +435,29 @@ impl StoreSystem { .broadcast_normal(|| PeerMsg::Tick(PeerTick::PdHeartbeat)); }); - let mut workers = Workers::new(background, pd_worker); + let purge_worker = if raft_engine.need_manual_purge() { + let worker = Worker::new("purge-worker"); + let raft_clone = raft_engine.clone(); + let logger = self.logger.clone(); + let router = router.clone(); + worker.spawn_interval_task(cfg.value().raft_engine_purge_interval.0, move || { + match raft_clone.manual_purge() { + Ok(regions) => { + for r in regions { + let _ = router.send(r, PeerMsg::ForceCompactLog); + } + } + Err(e) => { + warn!(logger, "purge expired files"; "err" => %e); + } + }; + }); + Some(worker) + } else { + None + }; + + let mut workers = Workers::new(background, pd_worker, purge_worker); workers .async_write .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; @@ -537,6 +561,9 @@ impl StoreSystem { workers.async_write.shutdown(); workers.async_read.stop(); workers.pd.stop(); + if let Some(w) = workers.purge { + w.stop(); + } } } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 22145ecdcaa..9b3586c6012 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -208,7 +208,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, match tick { PeerTick::Raft => self.on_raft_tick(), PeerTick::PdHeartbeat => self.on_pd_heartbeat(), - PeerTick::CompactLog => self.on_compact_log_tick(), + PeerTick::CompactLog => self.on_compact_log_tick(false), PeerTick::SplitRegionCheck => self.on_split_region_check(), PeerTick::CheckMerge => unimplemented!(), PeerTick::CheckPeerStaleState => unimplemented!(), @@ -297,6 +297,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_request_split(self.store_ctx, request, ch) } + PeerMsg::ForceCompactLog => self.on_compact_log_tick(true), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index aaf067aa585..d1d10d366bf 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -33,7 +33,7 @@ use crate::{ }; impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { - pub fn on_compact_log_tick(&mut self) { + pub fn on_compact_log_tick(&mut self, force: bool) { if !self.fsm.peer().is_leader() { // `compact_cache_to` is called when apply, there is no need to call // `compact_to` here, snapshot generating has already been cancelled @@ -44,7 +44,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.fsm .peer_mut() - .maybe_propose_compact_log(self.store_ctx); + .maybe_propose_compact_log(self.store_ctx, force); self.on_entry_cache_evict(); } @@ -64,7 +64,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, impl Peer { // Mirrors v1::on_raft_gc_log_tick. - fn maybe_propose_compact_log(&mut self, store_ctx: &mut StoreContext) { + fn maybe_propose_compact_log( + &mut self, + store_ctx: &mut StoreContext, + force: bool, + ) { // As leader, we would not keep caches for the peers that didn't response // heartbeat in the last few seconds. That happens probably because // another TiKV is down. In this case if we do not clean up the cache, @@ -122,7 +126,9 @@ impl Peer { self.entry_storage_mut() .compact_entry_cache(std::cmp::min(alive_cache_idx, applied_idx + 1)); - let mut compact_idx = if applied_idx > first_idx + let mut compact_idx = if force && replicated_idx > first_idx { + replicated_idx + } else if applied_idx > first_idx && applied_idx - first_idx >= store_ctx.cfg.raft_log_gc_count_limit() || self.approximate_raft_log_size() >= store_ctx.cfg.raft_log_gc_size_limit().0 { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index cd88a23c744..930de5ff036 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -181,6 +181,7 @@ pub enum PeerMsg { request: RequestSplit, ch: CmdResChannel, }, + ForceCompactLog, /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 9421c0e174b..9b13250fe1e 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -4,6 +4,7 @@ #![feature(thread_id_value)] #![feature(box_patterns)] #![feature(vec_into_raw_parts)] +#![feature(let_chains)] #[cfg(test)] extern crate test; diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index ba4b1e27f41..e761fac8bb5 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -376,8 +376,11 @@ impl Worker { let mut interval = GLOBAL_TIMER_HANDLE .interval(std::time::Instant::now(), interval) .compat(); + let stop = self.stop.clone(); self.remote.spawn(async move { - while let Some(Ok(_)) = interval.next().await { + while !stop.load(Ordering::Relaxed) + && let Some(Ok(_)) = interval.next().await + { func(); } }); @@ -391,8 +394,11 @@ impl Worker { let mut interval = GLOBAL_TIMER_HANDLE .interval(std::time::Instant::now(), interval) .compat(); + let stop = self.stop.clone(); self.remote.spawn(async move { - while let Some(Ok(_)) = interval.next().await { + while !stop.load(Ordering::Relaxed) + && let Some(Ok(_)) = interval.next().await + { let fut = func(); fut.await; } From 25261c8aa4f638b4d6f5d97e14fb7a786a3d7638 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 27 Dec 2022 23:30:17 +0800 Subject: [PATCH 0435/1149] raftstore-v2: cleanup stale tablet on restart (#13994) ref tikv/tikv#12842 If operations like snapshot, split, are aborted by restart, they needs to be either resumed or cleanup. This PR checks for garbage after restart and resume committed operations. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_rocks/src/write_batch.rs | 25 +++- components/engine_traits/src/lib.rs | 1 + components/engine_traits/src/tablet.rs | 9 ++ components/engine_traits/src/write_batch.rs | 7 + components/raftstore-v2/src/batch/store.rs | 68 ++++++++- components/raftstore-v2/src/fsm/peer.rs | 26 ++-- .../src/operation/command/admin/mod.rs | 2 +- .../src/operation/command/admin/split.rs | 40 ++++-- .../raftstore-v2/src/operation/command/mod.rs | 11 +- components/raftstore-v2/src/operation/life.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 1 + .../raftstore-v2/src/operation/query/mod.rs | 19 ++- .../src/operation/ready/apply_trace.rs | 57 +++++++- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/operation/ready/snapshot.rs | 69 ++++++++-- components/raftstore-v2/src/raft/apply.rs | 9 +- components/raftstore-v2/src/raft/peer.rs | 4 +- .../raftstore-v2/tests/failpoints/mod.rs | 1 + .../tests/failpoints/test_split.rs | 106 ++++++++++++++ .../tests/integrations/cluster.rs | 118 ++++++++++++++++ .../tests/integrations/test_split.rs | 130 ++---------------- .../raftstore/src/store/async_io/write.rs | 5 + components/raftstore/src/store/fsm/store.rs | 2 +- 23 files changed, 532 insertions(+), 182 deletions(-) create mode 100644 components/raftstore-v2/tests/failpoints/test_split.rs diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index a46edfb0a4a..3659a7628d6 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -96,24 +96,39 @@ impl RocksWriteBatchVec { } } } -} -impl engine_traits::WriteBatch for RocksWriteBatchVec { - fn write_opt(&mut self, opts: &WriteOptions) -> Result { + #[inline] + fn write_impl(&mut self, opts: &WriteOptions, mut cb: impl FnMut()) -> Result { let opt: RocksWriteOptions = opts.into(); let mut seq = 0; if self.support_write_batch_vec { // FIXME(tabokie): Callback for empty write batch won't be called. self.get_db() - .multi_batch_write_callback(self.as_inner(), &opt.into_raw(), |s| seq = s) + .multi_batch_write_callback(self.as_inner(), &opt.into_raw(), |s| { + seq = s; + cb(); + }) .map_err(r2e)?; } else { self.get_db() - .write_callback(&self.wbs[0], &opt.into_raw(), |s| seq = s) + .write_callback(&self.wbs[0], &opt.into_raw(), |s| { + seq = s; + cb(); + }) .map_err(r2e)?; } Ok(seq) } +} + +impl engine_traits::WriteBatch for RocksWriteBatchVec { + fn write_opt(&mut self, opts: &WriteOptions) -> Result { + self.write_impl(opts, || {}) + } + + fn write_callback_opt(&mut self, opts: &WriteOptions, cb: impl FnMut()) -> Result { + self.write_impl(opts, cb) + } fn data_size(&self) -> usize { let mut size: usize = 0; diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index bc54a5e7627..45a3d18fa7a 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -253,6 +253,7 @@ #![feature(assert_matches)] #![feature(linked_list_cursors)] #![feature(let_chains)] +#![feature(str_split_as_str)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 64459bbc7ee..edc0bd99870 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -226,6 +226,15 @@ impl TabletRegistry { format!("{}{}_{}", prefix, id, suffix) } + pub fn parse_tablet_name<'a>(&self, path: &'a Path) -> Option<(&'a str, u64, u64)> { + let name = path.file_name().unwrap().to_str().unwrap(); + let mut parts = name.rsplit('_'); + let suffix = parts.next()?.parse().ok()?; + let id = parts.next()?.parse().ok()?; + let prefix = parts.as_str(); + Some((prefix, id, suffix)) + } + pub fn tablet_root(&self) -> &Path { &self.tablets.root } diff --git a/components/engine_traits/src/write_batch.rs b/components/engine_traits/src/write_batch.rs index d8ff8d07796..8a92ac7c382 100644 --- a/components/engine_traits/src/write_batch.rs +++ b/components/engine_traits/src/write_batch.rs @@ -73,6 +73,13 @@ pub trait WriteBatch: Mutable { /// Commit the WriteBatch to disk with the given options fn write_opt(&mut self, opts: &WriteOptions) -> Result; + // TODO: it should be `FnOnce`. + fn write_callback_opt(&mut self, opts: &WriteOptions, mut cb: impl FnMut()) -> Result { + let seq = self.write_opt(opts)?; + cb(); + Ok(seq) + } + /// Commit the WriteBatch to disk atomically fn write(&mut self) -> Result { self.write_opt(&WriteOptions::default()) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index bd37a6d4e07..a3800085522 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -24,8 +24,10 @@ use raft::{StateRole, INVALID_ID}; use raftstore::{ coprocessor::{CoprocessorHost, RegionChangeEvent}, store::{ - fsm::store::PeerTickBatch, local_metrics::RaftMetrics, Config, ReadRunner, ReadTask, - SplitCheckRunner, SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteSenders, + fsm::store::{PeerTickBatch, ENTRY_CACHE_EVICT_TICK_DURATION}, + local_metrics::RaftMetrics, + Config, ReadRunner, ReadTask, SplitCheckRunner, SplitCheckTask, StoreWriters, + TabletSnapManager, Transport, WriteSenders, }, }; use slog::{warn, Logger}; @@ -43,6 +45,7 @@ use time::Timespec; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, + operation::SPLIT_PREFIX, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, worker::{pd, tablet_gc}, @@ -80,6 +83,32 @@ pub struct StoreContext { pub snap_mgr: TabletSnapManager, } +impl StoreContext { + pub fn update_ticks_timeout(&mut self) { + self.tick_batch[PeerTick::Raft as usize].wait_duration = self.cfg.raft_base_tick_interval.0; + self.tick_batch[PeerTick::CompactLog as usize].wait_duration = + self.cfg.raft_log_gc_tick_interval.0; + self.tick_batch[PeerTick::EntryCacheEvict as usize].wait_duration = + ENTRY_CACHE_EVICT_TICK_DURATION; + self.tick_batch[PeerTick::PdHeartbeat as usize].wait_duration = + self.cfg.pd_heartbeat_tick_interval.0; + self.tick_batch[PeerTick::SplitRegionCheck as usize].wait_duration = + self.cfg.split_region_check_tick_interval.0; + self.tick_batch[PeerTick::CheckPeerStaleState as usize].wait_duration = + self.cfg.peer_stale_state_check_interval.0; + self.tick_batch[PeerTick::CheckMerge as usize].wait_duration = + self.cfg.merge_check_tick_interval.0; + self.tick_batch[PeerTick::CheckLeaderLease as usize].wait_duration = + self.cfg.check_leader_lease_interval.0; + self.tick_batch[PeerTick::ReactivateMemoryLock as usize].wait_duration = + self.cfg.reactive_memory_lock_tick_interval.0; + self.tick_batch[PeerTick::ReportBuckets as usize].wait_duration = + self.cfg.report_region_buckets_tick_interval.0; + self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = + self.cfg.check_long_uncommitted_interval.0; + } +} + /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. /// /// It is responsible for: @@ -152,6 +181,7 @@ impl PollHandler StorePollerBuilder { } meta.set_region(storage.region(), storage.is_initialized(), &self.logger); - let (sender, peer_fsm) = PeerFsm::new(&cfg, &self.tablet_registry, storage)?; + let (sender, peer_fsm) = + PeerFsm::new(&cfg, &self.tablet_registry, &self.snap_mgr, storage)?; meta.region_read_progress .insert(region_id, peer_fsm.as_ref().peer().read_progress().clone()); @@ -318,7 +349,33 @@ impl StorePollerBuilder { Ok(regions) } - fn clean_up_tablets(&self, _peers: &HashMap>) -> Result<()> { + fn clean_up_tablets(&self, peers: &HashMap>) -> Result<()> { + for entry in file_system::read_dir(self.tablet_registry.tablet_root())? { + let entry = entry?; + let path = entry.path(); + let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; + let fsm = match peers.get(®ion_id) { + Some((_, fsm)) => fsm, + None => { + // The peer is either destroyed or not created yet. It will be + // recovered by leader heartbeats. + file_system::remove_dir_all(&path)?; + continue; + } + }; + // Valid split tablet should be installed during recovery. + if prefix == SPLIT_PREFIX { + file_system::remove_dir_all(&path)?; + continue; + } + if prefix.is_empty() { + // Stale split data can be deleted. + if fsm.peer().storage().tablet_index() > tablet_index { + file_system::remove_dir_all(&path)?; + } + } + // TODO: handle other prefix + } // TODO: list all available tablets and destroy those which are not in the // peers. Ok(()) @@ -335,7 +392,7 @@ where fn build(&mut self, _priority: batch_system::Priority) -> Self::Handler { let cfg = self.cfg.value().clone(); - let poll_ctx = StoreContext { + let mut poll_ctx = StoreContext { logger: self.logger.clone(), trans: self.trans.clone(), current_time: None, @@ -354,6 +411,7 @@ where snap_mgr: self.snap_mgr.clone(), coprocessor_host: self.coprocessor_host.clone(), }; + poll_ctx.update_ticks_timeout(); let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); StorePoller::new(poll_ctx, cfg_tracker) } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 9b3586c6012..49f1efcb760 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -7,7 +7,7 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use raftstore::store::{Config, LocksStatus, Transport}; +use raftstore::store::{Config, LocksStatus, TabletSnapManager, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ is_zero_duration, @@ -30,7 +30,7 @@ pub struct PeerFsm { receiver: Receiver, /// A registry for all scheduled ticks. This can avoid scheduling ticks /// twice accidentally. - tick_registry: u16, + tick_registry: [bool; PeerTick::VARIANT_COUNT], is_stopped: bool, reactivate_memory_lock_ticks: usize, } @@ -39,16 +39,17 @@ impl PeerFsm { pub fn new( cfg: &Config, tablet_registry: &TabletRegistry, + snap_mgr: &TabletSnapManager, storage: Storage, ) -> Result> { - let peer = Peer::new(cfg, tablet_registry, storage)?; + let peer = Peer::new(cfg, tablet_registry, snap_mgr, storage)?; info!(peer.logger, "create peer"); let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); let fsm = Box::new(PeerFsm { peer, mailbox: None, receiver: rx, - tick_registry: 0, + tick_registry: [false; PeerTick::VARIANT_COUNT], is_stopped: false, reactivate_memory_lock_ticks: 0, }); @@ -141,8 +142,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, pub fn schedule_tick(&mut self, tick: PeerTick) { assert!(PeerTick::VARIANT_COUNT <= u16::BITS as usize); let idx = tick as usize; - let key = 1u16 << (idx as u16); - if self.fsm.tick_registry & key != 0 { + if self.fsm.tick_registry[idx] { return; } if is_zero_duration(&self.store_ctx.tick_batch[idx].wait_duration) { @@ -167,7 +167,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, return; } }; - self.fsm.tick_registry |= key; + self.fsm.tick_registry[idx] = true; let logger = self.fsm.logger().clone(); // TODO: perhaps following allocation can be removed. let cb = Box::new(move || { @@ -194,6 +194,15 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } + // Unlike v1, it's a must to set ready when there are pending entries. Otherwise + // it may block for ever when there is unapplied conf change. + let entry_storage = self.fsm.peer.storage().entry_storage(); + if entry_storage.commit_index() > entry_storage.applied_index() + // Speed up setup if there is only one peer. + || self.fsm.peer.is_leader() + { + self.fsm.peer.set_has_ready(); + } } #[inline] @@ -205,6 +214,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } fn on_tick(&mut self, tick: PeerTick) { + self.fsm.tick_registry[tick as usize] = false; match tick { PeerTick::Raft => self.on_raft_tick(), PeerTick::PdHeartbeat => self.on_pd_heartbeat(), @@ -225,7 +235,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, match msg { PeerMsg::RaftMessage(msg) => { self.fsm.peer.on_raft_message(self.store_ctx, msg); - self.schedule_pending_ticks(); } PeerMsg::RaftQuery(cmd) => { self.on_receive_command(cmd.send_time); @@ -304,6 +313,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } // TODO: instead of propose pending commands immediately, we should use timeout. self.fsm.peer.propose_pending_writes(self.store_ctx); + self.schedule_pending_ticks(); } pub fn on_reactivate_memory_lock_tick(&mut self) { diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9afd50a5305..977e26e0675 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -13,7 +13,7 @@ use protobuf::Message; use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; use split::SplitResult; -pub use split::{RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; +pub use split::{temp_split_path, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; use tikv_util::box_err; use txn_types::WriteBatchFlags; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 386528070e2..e1f4ae552f6 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,11 +25,13 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::{borrow::Cow, cmp}; +use std::{borrow::Cow, cmp, path::PathBuf}; use collections::HashSet; use crossbeam::channel::SendError; -use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletContext}; +use engine_traits::{ + Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, +}; use fail::fail_point; use kvproto::{ metapb::{self, Region, RegionEpoch}, @@ -118,6 +120,11 @@ pub struct SplitFlowControl { may_skip_split_check: bool, } +pub fn temp_split_path(registry: &TabletRegistry, region_id: u64) -> PathBuf { + let tablet_name = registry.tablet_name(SPLIT_PREFIX, region_id, RAFT_INIT_LOG_INDEX); + registry.tablet_root().join(tablet_name) +} + impl PeerFsmDelegate<'_, EK, ER, T> { pub fn on_split_region_check(&mut self) { if !self.fsm.peer_mut().on_split_region_check(self.store_ctx) { @@ -329,8 +336,7 @@ impl Apply { continue; } - let name = reg.tablet_name(SPLIT_PREFIX, new_region_id, RAFT_INIT_LOG_INDEX); - let split_temp_path = reg.tablet_root().join(name); + let split_temp_path = temp_split_path(reg, new_region_id); checkpointer .create_at(&split_temp_path, None, 0) .unwrap_or_else(|e| { @@ -344,16 +350,22 @@ impl Apply { } let derived_path = self.tablet_registry().tablet_path(region_id, log_index); - checkpointer - .create_at(&derived_path, None, 0) - .unwrap_or_else(|e| { - panic!( - "{:?} fails to create checkpoint with path {:?}: {:?}", - self.logger.list(), - derived_path, - e - ) - }); + // If it's recovered from restart, it's possible the target path exists already. + // And because checkpoint is atomic, so we don't need to worry about corruption. + // And it's also wrong to delete it and remake as it may has applied and flushed + // some data to the new checkpoint before being restarted. + if !derived_path.exists() { + checkpointer + .create_at(&derived_path, None, 0) + .unwrap_or_else(|e| { + panic!( + "{:?} fails to create checkpoint with path {:?}: {:?}", + self.logger.list(), + derived_path, + e + ) + }); + } // Remove the old write batch. self.write_batch.take(); let reg = self.tablet_registry(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 7fa2fa776c2..35b4ec1918e 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -51,7 +51,9 @@ mod admin; mod control; mod write; -pub use admin::{AdminCmdResult, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; +pub use admin::{ + temp_split_path, AdminCmdResult, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, +}; pub use control::ProposalControl; pub use write::{ SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, @@ -505,10 +507,14 @@ impl Apply { #[inline] pub fn flush(&mut self) { + let (index, term) = self.apply_progress(); + let flush_state = self.flush_state().clone(); if let Some(wb) = &mut self.write_batch && !wb.is_empty() { let mut write_opt = WriteOptions::default(); write_opt.set_disable_wal(true); - if let Err(e) = wb.write_opt(&write_opt) { + if let Err(e) = wb.write_callback_opt(&write_opt, || { + flush_state.set_applied_index(index); + }) { panic!("failed to write data: {:?}: {:?}", self.logger.list(), e); } self.metrics.written_bytes += wb.data_size() as u64; @@ -527,7 +533,6 @@ impl Apply { callbacks.shrink_to(SHRINK_PENDING_CMD_QUEUE_CAP); } let mut apply_res = ApplyRes::default(); - let (index, term) = self.apply_progress(); apply_res.applied_index = index; apply_res.applied_term = term; apply_res.admin_result = self.take_admin_result().into_boxed_slice(); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index d61f11e7ada..ea42832eaea 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -229,7 +229,7 @@ impl Store { ctx.schedulers.read.clone(), &ctx.logger, ) - .and_then(|s| PeerFsm::new(&ctx.cfg, &ctx.tablet_registry, s)) + .and_then(|s| PeerFsm::new(&ctx.cfg, &ctx.tablet_registry, &ctx.snap_mgr, s)) { Ok(p) => p, res => { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index f0a2624203a..c49a14142ce 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -9,6 +9,7 @@ mod ready; pub use command::{ AdminCmdResult, CommittedEntries, ProposalControl, RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, + SPLIT_PREFIX, }; pub use life::DestroyProgress; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 4ffb4bcdcec..59c6f2d0f7c 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -19,7 +19,7 @@ use kvproto::{ errorpb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, StatusCmdType}, }; -use raft::Ready; +use raft::{Ready, StateRole}; use raftstore::{ errors::RAFTSTORE_IS_BUSY, store::{ @@ -29,7 +29,7 @@ use raftstore::{ }, Error, Result, }; -use slog::info; +use slog::{debug, info}; use tikv_util::box_err; use txn_types::WriteBatchFlags; @@ -375,11 +375,15 @@ impl Peer { /// Query internal states for debugging purpose. pub fn on_query_debug_info(&self, ch: DebugInfoChannel) { let entry_storage = self.storage().entry_storage(); + let mut status = self.raft_group().status(); + status + .progress + .get_or_insert_with(|| self.raft_group().raft.prs()); let mut meta = RegionMeta::new( self.storage().region_state(), entry_storage.apply_state(), GroupState::Ordered, - self.raft_group().status(), + status, self.raft_group().raft.raft_log.last_index(), self.raft_group().raft.raft_log.persisted, ); @@ -394,6 +398,10 @@ impl Peer { .raft_log .term(meta.raft_apply.commit_index) .unwrap(); + debug!(self.logger, "on query debug info"; + "tick" => self.raft_group().raft.election_elapsed, + "election_timeout" => self.raft_group().raft.randomized_election_timeout(), + ); ch.set_result(meta); } @@ -422,7 +430,10 @@ impl Peer { // Only leaders need to update applied_term. if progress_to_be_updated && self.is_leader() { - // TODO: add coprocessor_host hook + if applied_term == self.term() { + ctx.coprocessor_host + .on_applied_current_term(StateRole::Leader, self.region()); + } let progress = ReadProgress::applied_term(applied_term); let mut meta = ctx.store_meta.lock().unwrap(); let reader = meta.readers.get_mut(&self.region_id()).unwrap(); diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index d5aa93b587a..d6a83b7933b 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -30,18 +30,24 @@ use std::{cmp, sync::Mutex}; use engine_traits::{ - FlushProgress, KvEngine, RaftEngine, RaftLogBatch, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, - CF_WRITE, DATA_CFS, DATA_CFS_LEN, + FlushProgress, KvEngine, RaftEngine, RaftLogBatch, TabletRegistry, ALL_CFS, CF_DEFAULT, + CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; use kvproto::{ metapb::Region, raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, }; -use raftstore::store::{ReadTask, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM}; +use raftstore::store::{ + ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, +}; use slog::Logger; use tikv_util::{box_err, worker::Scheduler}; use crate::{ + operation::{ + command::temp_split_path, + ready::snapshot::{install_tablet, recv_snap_path}, + }, raft::{Peer, Storage}, router::PeerMsg, Result, StoreRouter, @@ -372,6 +378,51 @@ impl Storage { .map(Some) } + /// Region state is written before actually moving data. It's possible that + /// the tablet is missing after restart. We need to move the data again + /// after being restarted. + pub fn recover_tablet(&self, registry: &TabletRegistry, snap_mgr: &TabletSnapManager) { + let tablet_index = self.region_state().get_tablet_index(); + if tablet_index == 0 { + // It's an uninitialized peer, nothing to recover. + return; + } + let region_id = self.region().get_id(); + let target_path = registry.tablet_path(region_id, tablet_index); + if target_path.exists() { + // Move data succeeded before restart, nothing to recover. + return; + } + if tablet_index == RAFT_INIT_LOG_INDEX { + // Its data may come from split or snapshot. Try split first. + let split_path = temp_split_path(registry, region_id); + if install_tablet(registry, &split_path, region_id, tablet_index) { + return; + } + } + let truncated_index = self.entry_storage().truncated_index(); + if truncated_index == tablet_index { + // Try snapshot. + let peer_id = self.peer().get_id(); + let snap_path = recv_snap_path( + snap_mgr, + region_id, + peer_id, + self.entry_storage().truncated_term(), + tablet_index, + ); + if install_tablet(registry, &snap_path, region_id, tablet_index) { + return; + } + } + panic!( + "{:?} data loss detected: {}_{} not found", + self.logger().list(), + region_id, + tablet_index + ); + } + /// Write initial persist trace for uninit peer. pub fn init_apply_trace(&self, write_task: &mut WriteTask) { let region_id = self.region().get_id(); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 66d9755c1df..8b125844d0e 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -23,7 +23,6 @@ mod snapshot; use std::{cmp, time::Instant}; -pub use apply_trace::{cf_offset, write_initial_states, ApplyTrace, DataTrace, StateStorage}; use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; @@ -40,6 +39,7 @@ use tikv_util::{ }; pub use self::{ + apply_trace::{cf_offset, write_initial_states, ApplyTrace, DataTrace, StateStorage}, async_writer::AsyncWriter, snapshot::{GenSnapTask, SnapState}, }; diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 7a6e00aec4f..41dc0d39429 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -19,8 +19,10 @@ //! peer fsm, then Raft will get the snapshot. use std::{ + assert_matches::assert_matches, fmt::{self, Debug}, fs, + path::{Path, PathBuf}, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, @@ -43,7 +45,7 @@ use tikv_util::box_err; use crate::{ fsm::ApplyResReporter, - operation::command::SPLIT_PREFIX, + operation::command::temp_split_path, raft::{Apply, Peer, Storage}, Result, StoreContext, }; @@ -115,6 +117,48 @@ impl Debug for GenSnapTask { } } +pub fn recv_snap_path( + snap_mgr: &TabletSnapManager, + region_id: u64, + peer_id: u64, + term: u64, + index: u64, +) -> PathBuf { + let key = TabletSnapKey::new(region_id, peer_id, term, index); + snap_mgr.final_recv_path(&key) +} + +/// Move the tablet from `source` to managed path. +/// +/// Returns false if `source` doesn't exist. +pub fn install_tablet( + registry: &TabletRegistry, + source: &Path, + region_id: u64, + tablet_index: u64, +) -> bool { + if !source.exists() { + return false; + } + let target_path = registry.tablet_path(region_id, tablet_index); + assert_matches!( + EK::locked(source.to_str().unwrap()), + Ok(false), + "source is locked: {} => {}", + source.display(), + target_path.display() + ); + if let Err(e) = fs::rename(source, &target_path) { + panic!( + "failed to rename tablet {} => {}: {:?}", + source.display(), + target_path.display(), + e + ); + } + true +} + impl Peer { pub fn on_snapshot_generated(&mut self, snapshot: GenSnapRes) { if self.storage_mut().on_snapshot_generated(snapshot) { @@ -476,32 +520,29 @@ impl Storage { let (path, clean_split) = match self.split_init_mut() { // If index not match, the peer may accept a newer snapshot after split. Some(init) if init.scheduled && last_index == RAFT_INIT_LOG_INDEX => { - let name = reg.tablet_name(SPLIT_PREFIX, region_id, last_index); - (reg.tablet_root().join(name), false) - } - si => { - let key = TabletSnapKey::new(region_id, peer_id, last_term, last_index); - (snap_mgr.final_recv_path(&key), si.is_some()) + (temp_split_path(®, region_id), false) } + si => ( + recv_snap_path(&snap_mgr, region_id, peer_id, last_term, last_index), + si.is_some(), + ), }; let logger = self.logger().clone(); // The snapshot require no additional processing such as ingest them to DB, but // it should load it into the factory after it persisted. let hook = move || { - let target_path = reg.tablet_path(region_id, last_index); - if let Err(e) = std::fs::rename(&path, &target_path) { + if !install_tablet(®, &path, region_id, last_index) { panic!( - "{:?} failed to load tablet, path: {} -> {}, {:?}", + "{:?} failed to install tablet, path: {}, region_id: {}, tablet_index: {}", logger.list(), path.display(), - target_path.display(), - e + region_id, + last_index ); } if clean_split { - let name = reg.tablet_name(SPLIT_PREFIX, region_id, last_index); - let path = reg.tablet_root().join(name); + let path = temp_split_path(®, region_id); let _ = fs::remove_dir_all(path); } }; diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 5539de3d617..666f3adb699 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -34,6 +34,7 @@ pub struct Apply { /// command. tombstone: bool, applied_term: u64, + applied_index: u64, /// The largest index that have modified each column family. modifications: DataTrace, admin_cmd_result: Vec, @@ -76,6 +77,7 @@ impl Apply { callbacks: vec![], tombstone: false, applied_term: 0, + applied_index: flush_state.applied_index(), modifications: [0; DATA_CFS_LEN], admin_cmd_result: vec![], region_state, @@ -115,7 +117,7 @@ impl Apply { #[inline] pub fn set_apply_progress(&mut self, index: u64, term: u64) { - self.flush_state.set_applied_index(index); + self.applied_index = index; self.applied_term = term; if self.log_recovery.is_none() { return; @@ -123,12 +125,15 @@ impl Apply { let log_recovery = self.log_recovery.as_ref().unwrap(); if log_recovery.iter().all(|v| index >= *v) { self.log_recovery.take(); + // Now all logs are recovered, flush them to avoid recover again + // and again. + let _ = self.tablet.flush_cfs(&[], false); } } #[inline] pub fn apply_progress(&self) -> (u64, u64) { - (self.flush_state.applied_index(), self.applied_term) + (self.applied_index, self.applied_term) } #[inline] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 4fbc7e9874e..668b0ebf41d 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -20,7 +20,7 @@ use raftstore::{ fsm::ApplyMetrics, util::{Lease, RegionReadProgress}, Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, - ReadProgress, TxnExt, WriteTask, + ReadProgress, TabletSnapManager, TxnExt, WriteTask, }, }; use slog::Logger; @@ -118,6 +118,7 @@ impl Peer { pub fn new( cfg: &Config, tablet_registry: &TabletRegistry, + snap_mgr: &TabletSnapManager, storage: Storage, ) -> Result { let logger = storage.logger().clone(); @@ -137,6 +138,7 @@ impl Peer { // old tablet and create new peer. We also can't get the correct range of the // region, which is required for kv data gc. if tablet_index != 0 { + raft_group.store().recover_tablet(tablet_registry, snap_mgr); let mut ctx = TabletContext::new(®ion, Some(tablet_index)); ctx.flush_state = Some(flush_state.clone()); // TODO: Perhaps we should stop create the tablet automatically. diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index 84f1de2803d..d04ad2cafc2 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -10,4 +10,5 @@ mod cluster; mod test_basic_write; mod test_bootstrap; +mod test_split; mod test_trace_apply; diff --git a/components/raftstore-v2/tests/failpoints/test_split.rs b/components/raftstore-v2/tests/failpoints/test_split.rs new file mode 100644 index 00000000000..79356ae5805 --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_split.rs @@ -0,0 +1,106 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + thread, + time::{Duration, Instant}, +}; + +use engine_traits::{RaftEngineReadOnly, CF_DEFAULT}; +use futures::executor::block_on; +use raftstore::store::RAFT_INIT_LOG_INDEX; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; + +use crate::cluster::{split_helper::split_region, Cluster}; + +/// If a node is restarted after metadata is persisted before tablet is not +/// installed, it should resume install the tablet. +#[test] +fn test_restart_resume() { + let mut cluster = Cluster::default(); + let raft_engine = cluster.node(0).running_state().unwrap().raft_engine.clone(); + let router = &mut cluster.routers[0]; + + let region_id = 2; + let region = router.region_detail(region_id); + let peer = region.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + let fp = "async_write_before_cb"; + fail::cfg(fp, "return").unwrap(); + + let split_region_id = 1000; + let mut new_peer = peer.clone(); + new_peer.set_id(1001); + split_region( + router, + region, + peer, + split_region_id, + new_peer, + None, + None, + b"k11", + b"k11", + true, + ); + + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"k22", b"value"); + let header = Box::new(router.new_request_for(region_id).take_header()); + let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); + router.send(region_id, msg).unwrap(); + // Send a command to ensure split init is triggered. + block_on(sub.wait_proposed()); + + let region_state = raft_engine + .get_region_state(split_region_id, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); + let path = cluster + .node(0) + .tablet_registry() + .tablet_path(split_region_id, RAFT_INIT_LOG_INDEX); + assert!(!path.exists(), "{} should not exist", path.display()); + drop(raft_engine); + + cluster.restart(0); + // If split is resumed, the tablet should be installed. + assert!( + path.exists(), + "{} should exist after restart", + path.display() + ); + + // Both region should be recovered correctly. + let cases = vec![ + (split_region_id, b"k01", b"v01"), + (region_id, b"k21", b"v21"), + ]; + let router = &mut cluster.routers[0]; + let new_epoch = router + .new_request_for(split_region_id) + .take_header() + .take_region_epoch(); + let timer = Instant::now(); + for (region_id, key, val) in cases { + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, val); + let mut header = Box::new(router.new_request_for(region_id).take_header()); + while timer.elapsed() < Duration::from_secs(3) { + // We need to wait till source peer replay split. + if *header.get_region_epoch() != new_epoch { + thread::sleep(Duration::from_millis(100)); + header = Box::new(router.new_request_for(region_id).take_header()); + continue; + } + break; + } + assert_eq!(*header.get_region_epoch(), new_epoch, "{:?}", header); + let (msg, sub) = PeerMsg::simple_write(header, put.encode()); + router.send(region_id, msg).unwrap(); + // Send a command to ensure split init is triggered. + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + } +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 064fd9d1cad..4c025a0fc85 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -586,3 +586,121 @@ impl Drop for Cluster { } } } + +pub mod split_helper { + use std::{thread, time::Duration}; + + use engine_traits::CF_DEFAULT; + use futures::executor::block_on; + use kvproto::{ + metapb, pdpb, + raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest, RaftCmdResponse, SplitRequest}, + }; + use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; + + use super::TestRouter; + + pub fn new_batch_split_region_request( + split_keys: Vec>, + ids: Vec, + right_derive: bool, + ) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::BatchSplit); + req.mut_splits().set_right_derive(right_derive); + let mut requests = Vec::with_capacity(ids.len()); + for (mut id, key) in ids.into_iter().zip(split_keys) { + let mut split = SplitRequest::default(); + split.set_split_key(key); + split.set_new_region_id(id.get_new_region_id()); + split.set_new_peer_ids(id.take_new_peer_ids()); + requests.push(split); + } + req.mut_splits().set_requests(requests.into()); + req + } + + pub fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { + let (msg, sub) = PeerMsg::admin_command(req); + router.send(region_id, msg).unwrap(); + block_on(sub.result()).unwrap(); + + // TODO: when persistent implementation is ready, we can use tablet index of + // the parent to check whether the split is done. Now, just sleep a second. + thread::sleep(Duration::from_secs(1)); + } + + pub fn put(router: &mut TestRouter, region_id: u64, key: &[u8]) -> RaftCmdResponse { + let header = Box::new(router.new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, b"v1"); + router.simple_write(region_id, header, put).unwrap() + } + + // Split the region according to the parameters + // return the updated original region + pub fn split_region<'a>( + router: &'a mut TestRouter, + region: metapb::Region, + peer: metapb::Peer, + split_region_id: u64, + split_peer: metapb::Peer, + left_key: Option<&'a [u8]>, + right_key: Option<&'a [u8]>, + propose_key: &[u8], + split_key: &[u8], + right_derive: bool, + ) -> (metapb::Region, metapb::Region) { + let region_id = region.id; + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + req.mut_header().set_peer(peer); + + let mut split_id = pdpb::SplitId::new(); + split_id.new_region_id = split_region_id; + split_id.new_peer_ids = vec![split_peer.id]; + let admin_req = new_batch_split_region_request( + vec![propose_key.to_vec()], + vec![split_id], + right_derive, + ); + req.mut_requests().clear(); + req.set_admin_request(admin_req); + + must_split(region_id, req, router); + + let (left, right) = if !right_derive { + ( + router.region_detail(region_id), + router.region_detail(split_region_id), + ) + } else { + ( + router.region_detail(split_region_id), + router.region_detail(region_id), + ) + }; + + if let Some(right_key) = right_key { + let resp = put(router, left.id, right_key); + assert!(resp.get_header().has_error(), "{:?}", resp); + let resp = put(router, right.id, right_key); + assert!(!resp.get_header().has_error(), "{:?}", resp); + } + if let Some(left_key) = left_key { + let resp = put(router, left.id, left_key); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let resp = put(router, right.id, left_key); + assert!(resp.get_header().has_error(), "{:?}", resp); + } + + assert_eq!(left.get_end_key(), split_key); + assert_eq!(right.get_start_key(), split_key); + assert_eq!(region.get_start_key(), left.get_start_key()); + assert_eq!(region.get_end_key(), right.get_end_key()); + + (left, right) + } +} diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index 1174a428011..7cea980beac 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -1,121 +1,13 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{thread, time::Duration}; +use std::time::Duration; -use engine_traits::{Peekable, RaftEngineReadOnly, CF_DEFAULT, CF_RAFT}; -use futures::executor::block_on; -use kvproto::{ - metapb, pdpb, - raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest, RaftCmdResponse, SplitRequest}, -}; +use engine_traits::{Peekable, RaftEngineReadOnly, CF_RAFT}; use raftstore::store::{INIT_EPOCH_VER, RAFT_INIT_LOG_INDEX}; -use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use tikv_util::store::new_peer; use txn_types::{Key, TimeStamp}; -use crate::cluster::{Cluster, TestRouter}; - -fn new_batch_split_region_request( - split_keys: Vec>, - ids: Vec, - right_derive: bool, -) -> AdminRequest { - let mut req = AdminRequest::default(); - req.set_cmd_type(AdminCmdType::BatchSplit); - req.mut_splits().set_right_derive(right_derive); - let mut requests = Vec::with_capacity(ids.len()); - for (mut id, key) in ids.into_iter().zip(split_keys) { - let mut split = SplitRequest::default(); - split.set_split_key(key); - split.set_new_region_id(id.get_new_region_id()); - split.set_new_peer_ids(id.take_new_peer_ids()); - requests.push(split); - } - req.mut_splits().set_requests(requests.into()); - req -} - -fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { - let (msg, sub) = PeerMsg::admin_command(req); - router.send(region_id, msg).unwrap(); - block_on(sub.result()).unwrap(); - - // TODO: when persistent implementation is ready, we can use tablet index of - // the parent to check whether the split is done. Now, just sleep a second. - thread::sleep(Duration::from_secs(1)); -} - -fn put(router: &mut TestRouter, region_id: u64, key: &[u8]) -> RaftCmdResponse { - let header = Box::new(router.new_request_for(region_id).take_header()); - let mut put = SimpleWriteEncoder::with_capacity(64); - put.put(CF_DEFAULT, key, b"v1"); - router.simple_write(region_id, header, put).unwrap() -} - -// Split the region according to the parameters -// return the updated original region -fn split_region( - router: &mut TestRouter, - region: metapb::Region, - peer: metapb::Peer, - split_region_id: u64, - split_peer: metapb::Peer, - left_key: &[u8], - right_key: &[u8], - propose_key: &[u8], - split_key: &[u8], - right_derive: bool, -) -> (metapb::Region, metapb::Region) { - let region_id = region.id; - let mut req = RaftCmdRequest::default(); - req.mut_header().set_region_id(region_id); - req.mut_header() - .set_region_epoch(region.get_region_epoch().clone()); - req.mut_header().set_peer(peer); - - let mut split_id = pdpb::SplitId::new(); - split_id.new_region_id = split_region_id; - split_id.new_peer_ids = vec![split_peer.id]; - let admin_req = - new_batch_split_region_request(vec![propose_key.to_vec()], vec![split_id], right_derive); - req.mut_requests().clear(); - req.set_admin_request(admin_req); - - must_split(region_id, req, router); - - let (left, right) = if !right_derive { - ( - router.region_detail(region_id), - router.region_detail(split_region_id), - ) - } else { - ( - router.region_detail(split_region_id), - router.region_detail(region_id), - ) - }; - - // The end key of left region is `split_key` - // So writing `right_key` will fail - let resp = put(router, left.id, right_key); - assert!(resp.get_header().has_error(), "{:?}", resp); - // But `left_key` should succeed - let resp = put(router, left.id, left_key); - assert!(!resp.get_header().has_error(), "{:?}", resp); - - // Mirror of above case - let resp = put(router, right.id, left_key); - assert!(resp.get_header().has_error(), "{:?}", resp); - let resp = put(router, right.id, right_key); - assert!(!resp.get_header().has_error(), "{:?}", resp); - - assert_eq!(left.get_end_key(), split_key); - assert_eq!(right.get_start_key(), split_key); - assert_eq!(region.get_start_key(), left.get_start_key()); - assert_eq!(region.get_end_key(), right.get_end_key()); - - (left, right) -} +use crate::cluster::{split_helper::split_region, Cluster}; #[test] fn test_split() { @@ -141,8 +33,8 @@ fn test_split() { peer.clone(), 1000, new_peer(store_id, 10), - b"k11", - b"k33", + Some(b"k11"), + Some(b"k33"), b"k22", b"k22", false, @@ -174,8 +66,8 @@ fn test_split() { peer, 1001, new_peer(store_id, 11), - b"k00", - b"k11", + Some(b"k00"), + Some(b"k11"), b"k11", b"k11", false, @@ -215,8 +107,8 @@ fn test_split() { new_peer(store_id, 10), 1002, new_peer(store_id, 12), - b"k22", - b"k33", + Some(b"k22"), + Some(b"k33"), b"k33", b"k33", false, @@ -251,8 +143,8 @@ fn test_split() { new_peer(store_id, 12), 1003, new_peer(store_id, 13), - b"k33", - b"k55", + Some(b"k33"), + Some(b"k55"), split_key.as_encoded(), actual_split_key.as_encoded(), false, diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index b8cf6006dee..b4cceb96a82 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -718,6 +718,11 @@ where self.batch.after_write_to_raft_db(&self.metrics); + fail_point!( + "async_write_before_cb", + !self.batch.persisted_cbs.is_empty(), + |_| () + ); self.batch.after_write_all(); fail_point!("raft_before_follower_send"); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 310c33b95b2..54bb7d0cc0b 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -106,7 +106,7 @@ use crate::{ type Key = Vec; pub const PENDING_MSG_CAP: usize = 100; -const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); +pub const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); pub const MULTI_FILES_SNAPSHOT_FEATURE: Feature = Feature::require(6, 1, 0); // it only makes sense for large region pub struct StoreInfo { From 09f9aac35ecdc6dc8aee2eb11d6aaccaeddd3e23 Mon Sep 17 00:00:00 2001 From: Zwb Date: Wed, 28 Dec 2022 12:00:16 +0800 Subject: [PATCH 0436/1149] modify raft gc log impl for witness (#13869) ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang Signed-off-by: Zwb Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/raftstore/src/store/fsm/apply.rs | 210 ++++++++++++++++- components/raftstore/src/store/fsm/peer.rs | 118 +++++++++- components/raftstore/src/store/fsm/store.rs | 3 + components/raftstore/src/store/msg.rs | 3 + components/raftstore/src/store/peer.rs | 15 ++ tests/failpoints/cases/test_witness.rs | 167 +++++++++++++- tests/integrations/raftstore/test_witness.rs | 224 ++++++++++--------- 8 files changed, 628 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c510da6d77..8433f54c512 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2747,7 +2747,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#e53d558bc6d7d8b7bb2d283cdf6dda52a2615632" +source = "git+https://github.com/pingcap/kvproto.git#ae3b086b09afbb26cebcd4c1fe14b82bbe1f0796" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index affa0205e8f..ec2d7bf72a8 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -151,6 +151,7 @@ impl HeapSize for PendingCmd {} pub struct PendingCmdQueue { normals: VecDeque>, conf_change: Option>, + compacts: VecDeque>, } impl PendingCmdQueue { @@ -158,6 +159,7 @@ impl PendingCmdQueue { PendingCmdQueue { normals: VecDeque::new(), conf_change: None, + compacts: VecDeque::new(), } } @@ -190,6 +192,23 @@ impl PendingCmdQueue { fn set_conf_change(&mut self, cmd: PendingCmd) { self.conf_change = Some(cmd); } + + fn push_compact(&mut self, cmd: PendingCmd) { + self.compacts.push_back(cmd); + } + + fn pop_compact(&mut self, index: u64) -> Option> { + let mut front = None; + while self.compacts.front().map_or(false, |c| c.index < index) { + front = self.compacts.pop_front(); + front.as_mut().unwrap().cb.take().unwrap(); + } + front + } + + fn has_compact(&mut self) -> bool { + !self.compacts.is_empty() + } } #[derive(Default, Debug)] @@ -281,6 +300,7 @@ pub enum ExecResult { SetFlashbackState { region: Region, }, + PendingCompactCmd, } /// The possible returned value when applying logs. @@ -1488,7 +1508,8 @@ where | ExecResult::CompactLog { .. } | ExecResult::DeleteRange { .. } | ExecResult::IngestSst { .. } - | ExecResult::TransferLeader { .. } => {} + | ExecResult::TransferLeader { .. } + | ExecResult::PendingCompactCmd => {} ExecResult::SplitRegion { ref derived, .. } => { self.region = derived.clone(); self.metrics.size_diff_hint = 0; @@ -1545,6 +1566,9 @@ where if let Some(cmd) = self.pending_cmds.conf_change.take() { notify_region_removed(self.region.get_id(), id, cmd); } + for cmd in self.pending_cmds.compacts.drain(..) { + notify_region_removed(self.region.get_id(), id, cmd); + } self.yield_state = None; let mut event = TraceEvent::default(); @@ -1562,6 +1586,9 @@ where if let Some(cmd) = self.pending_cmds.conf_change.take() { notify_stale_command(region_id, peer_id, self.term, cmd); } + for cmd in self.pending_cmds.compacts.drain(..) { + notify_region_removed(self.region.get_id(), peer_id, cmd); + } } fn clear_all_commands_silently(&mut self) { @@ -1571,6 +1598,9 @@ where if let Some(mut cmd) = self.pending_cmds.conf_change.take() { cmd.cb.take(); } + for mut cmd in self.pending_cmds.compacts.drain(..) { + cmd.cb.take(); + } } } @@ -2937,13 +2967,86 @@ where )) } + fn try_compact_log( + &mut self, + voter_replicated_index: u64, + voter_replicated_term: u64, + ) -> Result>> { + PEER_ADMIN_CMD_COUNTER.compact.all.inc(); + let first_index = entry_storage::first_index(&self.apply_state); + + if self.is_merging { + info!( + "in merging mode, skip compact"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "voter_replicated_index" => voter_replicated_index, + ); + return Ok(None); + } + + // When the witness restarted, the pending compact cmd has been lost, so use + // `voter_replicated_index` for gc to avoid log accumulation. + if !self.pending_cmds.has_compact() { + if voter_replicated_index <= first_index { + debug!( + "voter_replicated_index <= first index, no need to compact"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "compact_index" => voter_replicated_index, + "first_index" => first_index, + ); + return Ok(Some(TaskRes::Compact { + state: self.apply_state.get_truncated_state().clone(), + first_index: 0, + has_pending: false, + })); + } + // compact failure is safe to be omitted, no need to assert. + compact_raft_log( + &self.tag, + &mut self.apply_state, + voter_replicated_index, + voter_replicated_term, + )?; + PEER_ADMIN_CMD_COUNTER.compact.success.inc(); + return Ok(Some(TaskRes::Compact { + state: self.apply_state.get_truncated_state().clone(), + first_index, + has_pending: false, + })); + } + + match self.pending_cmds.pop_compact(voter_replicated_index) { + Some(cmd) => { + // compact failure is safe to be omitted, no need to assert. + compact_raft_log(&self.tag, &mut self.apply_state, cmd.index, cmd.term)?; + PEER_ADMIN_CMD_COUNTER.compact.success.inc(); + Ok(Some(TaskRes::Compact { + state: self.apply_state.get_truncated_state().clone(), + first_index, + has_pending: self.pending_cmds.has_compact(), + })) + } + None => { + info!( + "latest voter_replicated_index < compact_index, skip"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "voter_replicated_index" => voter_replicated_index, + ); + Ok(None) + } + } + } + fn exec_compact_log( &mut self, req: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { PEER_ADMIN_CMD_COUNTER.compact.all.inc(); - let compact_index = req.get_compact_log().get_compact_index(); + let mut compact_index = req.get_compact_log().get_compact_index(); let resp = AdminResponse::default(); let first_index = entry_storage::first_index(&self.apply_state); if compact_index <= first_index { @@ -2966,7 +3069,7 @@ where return Ok((resp, ApplyResult::None)); } - let compact_term = req.get_compact_log().get_compact_term(); + let mut compact_term = req.get_compact_log().get_compact_term(); // TODO: add unit tests to cover all the message integrity checks. if compact_term == 0 { info!( @@ -2981,6 +3084,41 @@ where )); } + let voter_replicated_index = req.get_compact_log().get_voter_replicated_index(); + // If there is any voter lagging behind, the log truncation of the witness + // shouldn't be triggered even if it's force mode(raft log size/count exceeds + // the threshold or raft engine purge), otherwise the witness can't help the + // lagging voter catch up logs when leader is down. In this situation Compact + // index should be queued. If witness receives a voter_replicated_index + // that is larger than the pending compact index, logs can be deleted. + if self.peer.is_witness { + if voter_replicated_index < compact_index { + self.pending_cmds.push_compact(PendingCmd::new( + compact_index, + compact_term, + Callback::None, + )); + match self.pending_cmds.pop_compact(voter_replicated_index) { + Some(cmd) => { + compact_index = cmd.index; + compact_term = cmd.term; + } + None => { + info!( + "voter_replicated_index < compact_index, skip"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "command" => ?req.get_compact_log() + ); + return Ok((resp, ApplyResult::Res(ExecResult::PendingCompactCmd))); + } + } + } else { + for mut cmd in self.pending_cmds.compacts.drain(..) { + cmd.cb.take().unwrap(); + } + } + } // compact failure is safe to be omitted, no need to assert. compact_raft_log( &self.tag, @@ -3451,6 +3589,11 @@ where #[cfg(any(test, feature = "testexport"))] #[allow(clippy::type_complexity)] Validate(u64, Box), + CheckCompact { + region_id: u64, + voter_replicated_index: u64, + voter_replicated_term: u64, + }, } impl Msg @@ -3498,6 +3641,17 @@ where } => write!(f, "[region {}] change cmd", region_id), #[cfg(any(test, feature = "testexport"))] Msg::Validate(region_id, _) => write!(f, "[region {}] validate", region_id), + Msg::CheckCompact { + region_id, + voter_replicated_index, + voter_replicated_term, + } => { + write!( + f, + "[region {}] check compact, voter_replicated_index: {}, voter_replicated_term: {}", + region_id, voter_replicated_index, voter_replicated_term + ) + } } } } @@ -3542,6 +3696,11 @@ where // Whether destroy request is from its target region's snapshot merge_from_snapshot: bool, }, + Compact { + state: RaftTruncatedState, + first_index: u64, + has_pending: bool, + }, } pub struct ApplyFsm @@ -3947,6 +4106,34 @@ where cb.invoke_read(resp); } + fn check_pending_compact_log( + &mut self, + ctx: &mut ApplyContext, + voter_replicated_index: u64, + voter_replicated_term: u64, + ) { + let res = self + .delegate + .try_compact_log(voter_replicated_index, voter_replicated_term); + match res { + Ok(res) => { + if let Some(res) = res { + ctx.prepare_for(&mut self.delegate); + self.delegate.write_apply_state(ctx.kv_wb_mut()); + ctx.commit_opt(&mut self.delegate, true); + ctx.finish_for(&mut self.delegate, VecDeque::new()); + ctx.notifier + .notify_one(self.delegate.region_id(), PeerMsg::ApplyRes { res }); + } + } + Err(e) => error!(?e; + "failed to compact log"; + "region_id" => self.delegate.region.get_id(), + "peer_id" => self.delegate.id(), + ), + } + } + fn handle_tasks(&mut self, apply_ctx: &mut ApplyContext, msgs: &mut Vec>) { let mut drainer = msgs.drain(..); let mut batch_apply = None; @@ -4019,6 +4206,17 @@ where let delegate = &self.delegate as *const ApplyDelegate as *const u8; f(delegate) } + Msg::CheckCompact { + voter_replicated_index, + voter_replicated_term, + .. + } => { + self.check_pending_compact_log( + apply_ctx, + voter_replicated_index, + voter_replicated_term, + ); + } } } } @@ -4429,6 +4627,11 @@ where } #[cfg(any(test, feature = "testexport"))] Msg::Validate(..) => return, + Msg::CheckCompact { region_id, .. } => { + info!("target region is not found"; + "region_id" => region_id); + return; + } }, Either::Left(Err(TrySendError::Full(_))) => unreachable!(), }; @@ -4561,6 +4764,7 @@ mod memtrace { | Msg::Change { .. } => 0, #[cfg(any(test, feature = "testexport"))] Msg::Validate(..) => 0, + Msg::CheckCompact { .. } => 0, } } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 1b484df5316..abd8fd84771 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -53,7 +53,7 @@ use tikv_alloc::trace::TraceEvent; use tikv_util::{ box_err, debug, defer, error, escape, info, is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, - store::{find_peer, is_learner, region_on_same_stores}, + store::{find_peer, find_peer_by_id, is_learner, region_on_same_stores}, sys::disk::DiskUsage, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, trace, warn, @@ -1195,6 +1195,7 @@ where PeerTick::ReportBuckets => self.on_report_region_buckets_tick(), PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted_tick(), PeerTick::CheckPeersAvailability => self.on_check_peers_availability(), + PeerTick::RequestVoterReplicatedIndex => self.on_request_voter_replicated_index(), } } @@ -1217,6 +1218,9 @@ where self.fsm.has_ready = true; } self.fsm.peer.maybe_gen_approximate_buckets(self.ctx); + if self.fsm.peer.is_witness() { + self.register_pull_voter_replicated_index_tick(); + } } fn on_gc_snap(&mut self, snaps: Vec<(SnapKey, bool)>) { @@ -2310,6 +2314,21 @@ where *is_ready = true; } } + ApplyTaskRes::Compact { + state, + first_index, + has_pending, + } => { + self.fsm.peer.has_pending_compact_cmd = has_pending; + // When the witness restarts, the pending compact cmds will be lost. We will try + // to use `voter_replicated_index` as the `compact index` to avoid log + // accumulation, but if `voter_replicated_index` is less than `first_index`, + // then gc is not needed. In this case, the `first_index` we pass back will be + // 0, and `has_pending` set to false. + if first_index != 0 { + self.on_ready_compact_log(first_index, state); + } + } } if self.fsm.peer.unsafe_recovery_state.is_some() { self.check_unsafe_recovery_state(); @@ -2667,6 +2686,53 @@ where ); } + fn on_voter_replicated_index_request(&mut self, from: &metapb::Peer) { + if !self.fsm.peer.is_leader() { + return; + } + let mut voter_replicated_idx = self.fsm.peer.get_store().last_index(); + for (peer_id, p) in self.fsm.peer.raft_group.raft.prs().iter() { + let peer = find_peer_by_id(self.region(), *peer_id).unwrap(); + if voter_replicated_idx > p.matched && !is_learner(peer) { + voter_replicated_idx = p.matched; + } + } + let first_index = self.fsm.peer.get_store().first_index(); + if voter_replicated_idx > first_index { + voter_replicated_idx = first_index; + } + let mut resp = ExtraMessage::default(); + resp.set_type(ExtraMessageType::MsgVoterReplicatedIndexResponse); + resp.voter_replicated_index = voter_replicated_idx; + self.fsm + .peer + .send_extra_message(resp, &mut self.ctx.trans, from); + debug!( + "leader responses voter_replicated_index to witness"; + "region_id" => self.region().get_id(), + "witness_id" => from.id, + "leader_id" => self.fsm.peer.peer.get_id(), + "voter_replicated_index" => voter_replicated_idx, + ); + } + + fn on_voter_replicated_index_response(&mut self, msg: &ExtraMessage) { + if self.fsm.peer.is_leader() || !self.fsm.peer.is_witness() { + return; + } + let voter_replicated_index = msg.voter_replicated_index; + if let Ok(voter_replicated_term) = self.fsm.peer.get_store().term(voter_replicated_index) { + self.ctx.apply_router.schedule_task( + self.region_id(), + ApplyTask::CheckCompact { + region_id: self.region_id(), + voter_replicated_index, + voter_replicated_term, + }, + ) + } + } + fn on_extra_message(&mut self, mut msg: RaftMessage) { match msg.get_extra_msg().get_type() { ExtraMessageType::MsgRegionWakeUp | ExtraMessageType::MsgCheckStalePeer => { @@ -2716,6 +2782,12 @@ where ExtraMessageType::MsgAvailabilityResponse => { self.on_availability_response(msg.get_from_peer(), msg.get_extra_msg()); } + ExtraMessageType::MsgVoterReplicatedIndexRequest => { + self.on_voter_replicated_index_request(msg.get_from_peer()); + } + ExtraMessageType::MsgVoterReplicatedIndexResponse => { + self.on_voter_replicated_index_response(msg.get_extra_msg()); + } } } @@ -3871,6 +3943,9 @@ where self.fsm.peer.schedule_raftlog_gc(self.ctx, compact_to); self.fsm.peer.last_compacted_idx = compact_to; self.fsm.peer.mut_store().on_compact_raftlog(compact_to); + if self.fsm.peer.is_witness() { + self.fsm.peer.last_compacted_time = Instant::now(); + } } fn on_ready_split_region( @@ -4897,6 +4972,10 @@ where ExecResult::IngestSst { ssts } => self.on_ingest_sst_result(ssts), ExecResult::TransferLeader { term } => self.on_transfer_leader(term), ExecResult::SetFlashbackState { region } => self.on_set_flashback_state(region), + ExecResult::PendingCompactCmd => { + self.fsm.peer.has_pending_compact_cmd = true; + self.register_pull_voter_replicated_index_tick(); + } } } @@ -5315,8 +5394,13 @@ where let first_idx = self.fsm.peer.get_store().first_index(); let last_idx = self.fsm.peer.get_store().last_index(); + let mut voter_replicated_idx = last_idx; let (mut replicated_idx, mut alive_cache_idx) = (last_idx, last_idx); for (peer_id, p) in self.fsm.peer.raft_group.raft.prs().iter() { + let peer = find_peer_by_id(self.region(), *peer_id).unwrap(); + if !is_learner(peer) && voter_replicated_idx > p.matched { + voter_replicated_idx = p.matched; + } if replicated_idx > p.matched { replicated_idx = p.matched; } @@ -5405,7 +5489,8 @@ where let region_id = self.fsm.peer.region().get_id(); let peer = self.fsm.peer.peer.clone(); let term = self.fsm.peer.get_index_term(compact_idx); - let request = new_compact_log_request(region_id, peer, compact_idx, term); + let request = + new_compact_log_request(region_id, peer, compact_idx, term, voter_replicated_idx); self.propose_raft_command_internal( request, Callback::None, @@ -5444,6 +5529,27 @@ where self.register_check_long_uncommitted_tick(); } + fn on_request_voter_replicated_index(&mut self) { + if !self.fsm.peer.is_witness() || !self.fsm.peer.has_pending_compact_cmd { + return; + } + // TODO: make it configurable + if self.fsm.peer.last_compacted_time.elapsed() + > self.ctx.cfg.raft_log_gc_tick_interval.0 * 2 + { + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgVoterReplicatedIndexRequest); + let leader_id = self.fsm.peer.leader_id(); + let leader = self.fsm.peer.get_peer_from_cache(leader_id); + if let Some(leader) = leader { + self.fsm + .peer + .send_extra_message(msg, &mut self.ctx.trans, &leader); + } + } + self.register_pull_voter_replicated_index_tick(); + } + fn register_check_leader_lease_tick(&mut self) { self.schedule_tick(PeerTick::CheckLeaderLease) } @@ -5979,6 +6085,10 @@ where } } + fn register_pull_voter_replicated_index_tick(&mut self) { + self.schedule_tick(PeerTick::RequestVoterReplicatedIndex); + } + fn on_check_peer_stale_state_tick(&mut self) { if self.fsm.peer.pending_remove { return; @@ -6421,6 +6531,7 @@ fn new_compact_log_request( peer: metapb::Peer, compact_index: u64, compact_term: u64, + voter_replicated_index: u64, ) -> RaftCmdRequest { let mut request = new_admin_request(region_id, peer); @@ -6428,6 +6539,9 @@ fn new_compact_log_request( admin.set_cmd_type(AdminCmdType::CompactLog); admin.mut_compact_log().set_compact_index(compact_index); admin.mut_compact_log().set_compact_term(compact_term); + admin + .mut_compact_log() + .set_voter_replicated_index(voter_replicated_index); request.set_admin_request(admin); request } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 54bb7d0cc0b..ceb8858046d 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -594,6 +594,9 @@ where self.cfg.check_long_uncommitted_interval.0; self.tick_batch[PeerTick::CheckPeersAvailability as usize].wait_duration = self.cfg.check_peers_availability_interval.0; + // TODO: make it reasonable + self.tick_batch[PeerTick::RequestVoterReplicatedIndex as usize].wait_duration = + self.cfg.raft_log_gc_tick_interval.0 * 2; } } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a4c6c435741..08b0e9367dc 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -375,6 +375,7 @@ pub enum PeerTick { ReportBuckets = 9, CheckLongUncommitted = 10, CheckPeersAvailability = 11, + RequestVoterReplicatedIndex = 12, } impl PeerTick { @@ -395,6 +396,7 @@ impl PeerTick { PeerTick::ReportBuckets => "report_buckets", PeerTick::CheckLongUncommitted => "check_long_uncommitted", PeerTick::CheckPeersAvailability => "check_peers_availability", + PeerTick::RequestVoterReplicatedIndex => "request_voter_replicated_index", } } @@ -412,6 +414,7 @@ impl PeerTick { PeerTick::ReportBuckets, PeerTick::CheckLongUncommitted, PeerTick::CheckPeersAvailability, + PeerTick::RequestVoterReplicatedIndex, ]; TICKS } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index a72bb59d8bf..7752a0a1b0e 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -939,6 +939,15 @@ where /// The index of last compacted raft log. It is used for the next compact /// log task. pub last_compacted_idx: u64, + /// Record the time of the last raft log compact, the witness should query + /// the leader periodically whether `voter_replicated_index` is updated + /// if CompactLog admin command isn't triggered for a while. + pub last_compacted_time: Instant, + /// When the peer is witness, and there is any voter lagging behind, the + /// log truncation of the witness shouldn't be triggered even if it's + /// force mode, and this item will be set to `true`, after all pending + /// compact cmds have been handled, it will be set to `false`. + pub has_pending_compact_cmd: bool, /// The index of the latest urgent proposal index. last_urgent_proposal_idx: u64, /// The index of the latest committed split command. @@ -1083,6 +1092,10 @@ where let logger = slog_global::get_global().new(slog::o!("region_id" => region.get_id())); let raft_group = RawNode::new(&raft_cfg, ps, &logger)?; + // In order to avoid excessive log accumulation due to the loss of pending + // compaction cmds after the witness is restarted, it will actively pull + // voter_request_index once at start. + let has_pending_compact_cmd = peer.is_witness; let mut peer = Peer { peer, @@ -1118,6 +1131,8 @@ where tag: tag.clone(), last_applying_idx: applied_index, last_compacted_idx: 0, + last_compacted_time: Instant::now(), + has_pending_compact_cmd, last_urgent_proposal_idx: u64::MAX, last_committed_split_idx: 0, last_sent_snapshot_idx: 0, diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index cee75ff44b9..98a845b7016 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -2,11 +2,12 @@ use std::{iter::FromIterator, sync::Arc, time::Duration}; +use collections::HashMap; use futures::executor::block_on; -use kvproto::metapb; +use kvproto::{metapb, raft_serverpb::RaftApplyState}; use pd_client::PdClient; use test_raftstore::*; -use tikv_util::store::find_peer; +use tikv_util::{config::ReadableDuration, store::find_peer}; fn become_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { peer.set_role(metapb::PeerRole::Learner); @@ -69,3 +70,165 @@ fn test_witness_update_region_in_local_reader() { fail::remove("change_peer_after_update_region_store_3"); } + +// Test the case witness pull voter_replicated_index when has pending compact +// cmd. +#[test] +fn test_witness_raftlog_gc_pull_voter_replicated_index() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(50); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); + + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(200)); + let mut before_states = HashMap::default(); + for (&id, engines) in &cluster.engines { + let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + before_states.insert(id, state.take_truncated_state()); + } + + // one follower is down + cluster.stop_node(nodes[1]); + + // write some data to make log gap exceeds the gc limit + for i in 1..1000 { + let (k, v) = (format!("k{}", i), format!("v{}", i)); + let key = k.as_bytes(); + let value = v.as_bytes(); + cluster.must_put(key, value); + } + + // the witness truncated index is not advanced + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + if id == 2 { + assert_eq!( + state.get_truncated_state().get_index() - before_states[&id].get_index(), + 0 + ); + } else { + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } + } + + fail::cfg("on_raft_gc_log_tick", "return").unwrap(); + + // the follower is back online + cluster.run_node(nodes[1]).unwrap(); + cluster.must_put(b"k00", b"v00"); + must_get_equal(&cluster.get_engine(nodes[1]), b"k00", b"v00"); + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(300)); + + // the truncated index is advanced now, as all the peers has replicated + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } + fail::remove("on_raft_gc_log_tick"); +} + +// Test the case witness gc raftlog after reboot. +#[test] +fn test_witness_raftlog_gc_after_reboot() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(50); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); + + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(200)); + let mut before_states = HashMap::default(); + for (&id, engines) in &cluster.engines { + let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + before_states.insert(id, state.take_truncated_state()); + } + + // one follower is down + cluster.stop_node(nodes[1]); + + // write some data to make log gap exceeds the gc limit + for i in 1..1000 { + let (k, v) = (format!("k{}", i), format!("v{}", i)); + let key = k.as_bytes(); + let value = v.as_bytes(); + cluster.must_put(key, value); + } + + // the witness truncated index is not advanced + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + if id == 2 { + assert_eq!( + state.get_truncated_state().get_index() - before_states[&id].get_index(), + 0 + ); + } else { + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } + } + + fail::cfg("on_raft_gc_log_tick", "return").unwrap(); + + // the follower is back online + cluster.run_node(nodes[1]).unwrap(); + cluster.must_put(b"k00", b"v00"); + must_get_equal(&cluster.get_engine(nodes[1]), b"k00", b"v00"); + + // the witness is down + cluster.stop_node(nodes[2]); + std::thread::sleep(Duration::from_millis(100)); + // the witness is back online + cluster.run_node(nodes[2]).unwrap(); + + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(300)); + + // the truncated index is advanced now, as all the peers has replicated + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } + fail::remove("on_raft_gc_log_tick"); +} diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index a2518cc64ae..301a743588e 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -2,8 +2,13 @@ use std::{iter::FromIterator, sync::Arc, time::Duration}; +use collections::HashMap; use futures::executor::block_on; -use kvproto::{metapb, raft_cmdpb::ChangePeerRequest, raft_serverpb::PeerState}; +use kvproto::{ + metapb, + raft_cmdpb::ChangePeerRequest, + raft_serverpb::{PeerState, RaftApplyState}, +}; use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use test_raftstore::*; @@ -296,127 +301,134 @@ fn test_witness_conf_change() { // } // } -// TODO: add back when raft log gc logic is updated for witness -// // Test the case that truncated index won't advance when there is a witness -// even // if the gap gap exceeds the gc count limit -// #[test] -// fn test_witness_raftlog_gc_lagged_follower() { -// let mut cluster = new_server_cluster(0, 3); -// cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); -// cluster.run(); -// let nodes = Vec::from_iter(cluster.get_node_ids()); -// assert_eq!(nodes.len(), 3); +// Test the case that truncated index won't advance when there is a witness even +// if the gap gap exceeds the gc count limit +#[test] +fn test_witness_raftlog_gc_lagged_follower() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); -// let pd_client = Arc::clone(&cluster.pd_client); -// pd_client.disable_default_operator(); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); -// cluster.must_put(b"k0", b"v0"); + cluster.must_put(b"k0", b"v0"); -// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); -// let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); -// cluster.must_transfer_leader(region.get_id(), peer_on_store1); -// // nonwitness -> witness -// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); -// become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); -// // make sure raft log gc is triggered -// std::thread::sleep(Duration::from_millis(200)); -// let mut before_states = HashMap::default(); -// for (&id, engines) in &cluster.engines { -// let mut state: RaftApplyState = get_raft_msg_or_default(engines, -// &keys::apply_state_key(1)); before_states.insert(id, -// state.take_truncated_state()); } + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(200)); + let mut before_states = HashMap::default(); + for (&id, engines) in &cluster.engines { + let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + before_states.insert(id, state.take_truncated_state()); + } -// // one follower is down -// cluster.stop_node(nodes[1]); + // one follower is down + cluster.stop_node(nodes[1]); -// // write some data to make log gap exceeds the gc limit -// for i in 1..1000 { -// let (k, v) = (format!("k{}", i), format!("v{}", i)); -// let key = k.as_bytes(); -// let value = v.as_bytes(); -// cluster.must_put(key, value); -// } + // write some data to make log gap exceeds the gc limit + for i in 1..1000 { + let (k, v) = (format!("k{}", i), format!("v{}", i)); + let key = k.as_bytes(); + let value = v.as_bytes(); + cluster.must_put(key, value); + } -// // the truncated index is not advanced -// for (&id, engines) in &cluster.engines { -// let state: RaftApplyState = get_raft_msg_or_default(engines, -// &keys::apply_state_key(1)); assert!(state.get_truncated_state(). -// get_index() - before_states[&id].get_index() < 10); } - -// // the follower is back online -// cluster.run_node(nodes[1]).unwrap(); -// cluster.must_put(b"k00", b"v00"); -// must_get_equal(&cluster.get_engine(nodes[1]), b"k00", b"v00"); -// // make sure raft log gc is triggered -// std::thread::sleep(Duration::from_millis(300)); - -// // the truncated index is advanced now, as all the peers has replicated -// for (&id, engines) in &cluster.engines { -// let state: RaftApplyState = get_raft_msg_or_default(engines, -// &keys::apply_state_key(1)); assert_ge!( -// state.get_truncated_state().get_index() - -// before_states[&id].get_index(), 900 -// ); -// } -// } + // the witness truncated index is not advanced + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + if id == 2 { + assert_eq!( + state.get_truncated_state().get_index() - before_states[&id].get_index(), + 0 + ); + } else { + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } + } -// TODO: add back when raft log gc logic is updated for witness -// // Test the case that truncated index is advance when there is a lagged -// witness #[test] -// fn test_witness_raftlog_gc_lagged_witness() { -// let mut cluster = new_server_cluster(0, 3); -// cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); -// cluster.run(); -// let nodes = Vec::from_iter(cluster.get_node_ids()); -// assert_eq!(nodes.len(), 3); + // the follower is back online + cluster.run_node(nodes[1]).unwrap(); + cluster.must_put(b"k00", b"v00"); + must_get_equal(&cluster.get_engine(nodes[1]), b"k00", b"v00"); + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(300)); + + // the truncated index is advanced now, as all the peers has replicated + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } +} -// let pd_client = Arc::clone(&cluster.pd_client); -// pd_client.disable_default_operator(); +// Test the case that truncated index is advance when there is a lagged witness +#[test] +fn test_witness_raftlog_gc_lagged_witness() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); -// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); -// let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); -// cluster.must_transfer_leader(region.get_id(), peer_on_store1); -// // nonwitness -> witness -// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); -// become_witness(&cluster, region.get_id(), &mut peer_on_store3); -// cluster.must_put(b"k0", b"v0"); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); -// // make sure raft log gc is triggered -// std::thread::sleep(Duration::from_millis(200)); -// let mut before_states = HashMap::default(); -// for (&id, engines) in &cluster.engines { -// let mut state: RaftApplyState = get_raft_msg_or_default(engines, -// &keys::apply_state_key(1)); before_states.insert(id, -// state.take_truncated_state()); } + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + become_witness(&cluster, region.get_id(), &mut peer_on_store3); + cluster.must_put(b"k0", b"v0"); -// // the witness is down -// cluster.stop_node(nodes[2]); + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(200)); + let mut before_states = HashMap::default(); + for (&id, engines) in &cluster.engines { + let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + before_states.insert(id, state.take_truncated_state()); + } -// // write some data to make log gap exceeds the gc limit -// for i in 1..1000 { -// let (k, v) = (format!("k{}", i), format!("v{}", i)); -// let key = k.as_bytes(); -// let value = v.as_bytes(); -// cluster.must_put(key, value); -// } + // the witness is down + cluster.stop_node(nodes[2]); -// // the witness is back online -// cluster.run_node(nodes[2]).unwrap(); + // write some data to make log gap exceeds the gc limit + for i in 1..1000 { + let (k, v) = (format!("k{}", i), format!("v{}", i)); + let key = k.as_bytes(); + let value = v.as_bytes(); + cluster.must_put(key, value); + } -// cluster.must_put(b"k00", b"v00"); -// std::thread::sleep(Duration::from_millis(200)); + // the witness is back online + cluster.run_node(nodes[2]).unwrap(); -// // the truncated index is advanced -// for (&id, engines) in &cluster.engines { -// let state: RaftApplyState = get_raft_msg_or_default(engines, -// &keys::apply_state_key(1)); println!("{} {}", id, -// state.get_truncated_state().get_index()); assert_ge!( -// state.get_truncated_state().get_index() - -// before_states[&id].get_index(), 900 -// ); -// } -// } + cluster.must_put(b"k00", b"v00"); + std::thread::sleep(Duration::from_millis(200)); + + // the truncated index is advanced + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } +} // Test the case replica read can't be performed on witness peer. #[test] From 177efafee39a7f1cf7cbc6330d834cdbbe42a657 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 27 Dec 2022 23:52:16 -0800 Subject: [PATCH 0437/1149] raftstore-v2: a few panic fix (#13996) ref tikv/tikv#12842 a few panic fix 1) update_approximate_raft_log_size may run into divid by zero error 2) appy_delete may have None write_batch 3) StoreMeta::set_region may run into region corruption error if it's destroyed and re-created. 4) TabletSnapManager's snapshot size calculation may throw Other error. Signed-off-by: qi.xu Signed-off-by: Jay Lee Co-authored-by: qi.xu Co-authored-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 1 + components/raftstore-v2/src/fsm/store.rs | 14 ++++++++++++++ .../raftstore-v2/src/operation/command/mod.rs | 5 +++++ .../src/operation/command/write/mod.rs | 1 + components/raftstore-v2/src/operation/life.rs | 6 ++++++ components/raftstore-v2/src/worker/pd/mod.rs | 8 +++++++- .../raftstore-v2/src/worker/pd/store_heartbeat.rs | 7 +++++-- 7 files changed, 39 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index a3800085522..800dbc98f91 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -535,6 +535,7 @@ impl StoreSystem { causal_ts_provider, self.logger.clone(), self.shutdown.clone(), + cfg.clone(), )); let split_check_scheduler = workers.background.start( diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index cb7aa99b179..f107715a535 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -80,6 +80,20 @@ impl StoreMeta { ); } } + + pub fn remove_region(&mut self, region_id: u64) { + let prev = self.regions.remove(®ion_id); + if let Some((prev, initialized)) = prev { + if initialized { + let key = ( + data_end_key(prev.get_end_key()), + prev.get_region_epoch().get_version(), + ); + let prev_id = self.region_ranges.remove(&key); + assert_eq!(prev_id, Some(prev.get_id())); + } + } + } } impl StoreRegionMeta for StoreMeta { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 35b4ec1918e..0a58bb64016 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -245,6 +245,11 @@ impl Peer { let apply = CommittedEntries { entry_and_proposals, }; + assert!( + self.apply_scheduler().is_some(), + "apply_scheduler should be something. region_id {}", + self.region_id() + ); self.apply_scheduler() .unwrap() .send(ApplyTask::CommittedEntries(apply)); diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index ad6e537b956..af806e3024e 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -177,6 +177,7 @@ impl Apply { } util::check_key_in_region(key, self.region_state().get_region())?; keys::data_key_with_buffer(key, &mut self.key_buffer); + self.ensure_write_buffer(); let res = if cf.is_empty() || cf == CF_DEFAULT { // TODO: use write_vector self.write_batch.as_mut().unwrap().delete(&self.key_buffer) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index ea42832eaea..0f2e72061ef 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -331,6 +331,12 @@ impl Peer { pub fn finish_destroy(&mut self, ctx: &mut StoreContext) { info!(self.logger, "peer destroyed"); ctx.router.close(self.region_id()); + { + ctx.store_meta + .lock() + .unwrap() + .remove_region(self.region_id()); + } if let Some(msg) = self.destroy_progress_mut().finish() { // The message will be dispatched to store fsm, which will create a // new peer. Ignore error as it's just a best effort. diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index cc977e68236..bfcf3389754 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -11,9 +11,12 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{metapb, pdpb}; use pd_client::PdClient; -use raftstore::store::{util::KeysInfoFormatter, FlowStatsReporter, ReadStats, TxnExt, WriteStats}; +use raftstore::store::{ + util::KeysInfoFormatter, Config, FlowStatsReporter, ReadStats, TxnExt, WriteStats, +}; use slog::{error, info, Logger}; use tikv_util::{ + config::VersionTrack, time::UnixSecs, worker::{Runnable, Scheduler}, }; @@ -122,6 +125,7 @@ where logger: Logger, shutdown: Arc, + cfg: Arc>, } impl Runner @@ -141,6 +145,7 @@ where causal_ts_provider: Option>, // used for rawkv apiv2 logger: Logger, shutdown: Arc, + cfg: Arc>, ) -> Self { Self { store_id, @@ -158,6 +163,7 @@ where causal_ts_provider, logger, shutdown, + cfg, } } } diff --git a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs index 2fbe378cff8..22bee3cbf26 100644 --- a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs @@ -272,8 +272,11 @@ where Ok(stats) => stats, }; let disk_cap = disk_stats.total_space(); - // TODO: custom capacity. - let capacity = disk_cap; + let capacity = if self.cfg.value().capacity.0 == 0 { + disk_cap + } else { + std::cmp::min(disk_cap, self.cfg.value().capacity.0) + }; // TODO: accurate snapshot size and kv engines size. let snap_size = 0; let kv_size = 0; From 06bfaa42a120d1c2cefa5515810a699b3abd458b Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 30 Dec 2022 10:40:19 +0800 Subject: [PATCH 0438/1149] raftstore-v2: avoid ticking when there are many unapplied logs (#13995) ref tikv/tikv#12842 Whenever timeout, the peer will check for unapplied logs whether there are pending conf change and trigger heavy reads. So we wait till most logs are applied before ticking. It also fix following issues: - PersistenceListener is not installed - implementation of persisted_apply_index is wrong - parse tablet name is wrong Signed-off-by: Jay Lee --- components/engine_rocks/src/event_listener.rs | 2 +- components/engine_traits/src/flush.rs | 8 +- components/engine_traits/src/tablet.rs | 23 +++++- components/raftstore-v2/src/fsm/apply.rs | 3 + components/raftstore-v2/src/fsm/peer.rs | 19 ++--- .../operation/command/admin/compact_log.rs | 42 +++++----- .../operation/command/admin/conf_change.rs | 2 +- .../src/operation/command/admin/mod.rs | 10 ++- .../src/operation/command/admin/split.rs | 21 +++-- .../src/operation/command/control.rs | 5 ++ .../raftstore-v2/src/operation/command/mod.rs | 28 ++++++- .../src/operation/ready/apply_trace.rs | 55 ++++++++++--- .../raftstore-v2/src/operation/ready/mod.rs | 78 ++++++++++++++---- .../src/operation/ready/snapshot.rs | 51 +++++++++--- components/raftstore-v2/src/raft/apply.rs | 12 ++- components/raftstore-v2/src/raft/peer.rs | 44 ++++++++--- components/raftstore-v2/src/raft/storage.rs | 21 ++--- .../src/router/internal_message.rs | 1 + .../integrations/test_transfer_leader.rs | 63 ++++++++------- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/snap.rs | 7 +- components/server/src/server2.rs | 79 ++++++++++--------- src/config/mod.rs | 3 + src/server/raftkv2/node.rs | 34 ++++---- 24 files changed, 417 insertions(+), 195 deletions(-) diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index b940fcb39f3..3bbf03cb77f 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -261,7 +261,7 @@ mod tests { let (region_id, tablet_index) = (2, 3); let storage = Arc::new(MemStorage::default()); - let state = Arc::new(FlushState::default()); + let state = Arc::new(FlushState::new(0)); let listener = PersistenceListener::new(region_id, tablet_index, state.clone(), storage.clone()); let mut db_opt = RocksDbOptions::default(); diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index cfed95f0426..b3a827c234e 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -50,12 +50,18 @@ impl FlushProgress { /// raftstore will update state changes and corresponding apply index, when /// flush, `PersistenceListener` will query states related to the memtable /// and persist the relation to raft engine. -#[derive(Default, Debug)] +#[derive(Debug)] pub struct FlushState { applied_index: AtomicU64, } impl FlushState { + pub fn new(applied_index: u64) -> Self { + Self { + applied_index: AtomicU64::new(applied_index), + } + } + /// Set the latest applied index. #[inline] pub fn set_applied_index(&self, index: u64) { diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index edc0bd99870..f552fbc01aa 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -222,10 +222,20 @@ impl TabletRegistry { }) } + /// Format the name as {prefix}_{id}_{suffix}. If prefix is empty, it will + /// be format as {id}_{suffix}. pub fn tablet_name(&self, prefix: &str, id: u64, suffix: u64) -> String { - format!("{}{}_{}", prefix, id, suffix) + format!( + "{}{:_(&self, path: &'a Path) -> Option<(&'a str, u64, u64)> { let name = path.file_name().unwrap().to_str().unwrap(); let mut parts = name.rsplit('_'); @@ -463,10 +473,19 @@ mod tests { }); assert_eq!(count, 1); - let name = registry.tablet_name("prefix_", 12, 30); + let name = registry.tablet_name("prefix", 12, 30); assert_eq!(name, "prefix_12_30"); let normal_name = registry.tablet_name("", 20, 15); let normal_tablet_path = registry.tablet_path(20, 15); assert_eq!(registry.tablet_root().join(normal_name), normal_tablet_path); + + let full_prefix_path = registry.tablet_root().join(name); + let res = registry.parse_tablet_name(&full_prefix_path); + assert_eq!(res, Some(("prefix", 12, 30))); + let res = registry.parse_tablet_name(&normal_tablet_path); + assert_eq!(res, Some(("", 20, 15))); + let invalid_path = registry.tablet_root().join("invalid_12"); + let res = registry.parse_tablet_name(&invalid_path); + assert_eq!(res, None); } } diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index c0eabd2120e..b81d31329cb 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -65,6 +65,7 @@ impl ApplyFsm { read_scheduler: Scheduler>, flush_state: Arc, log_recovery: Option>, + applied_term: u64, logger: Logger, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); @@ -76,6 +77,7 @@ impl ApplyFsm { read_scheduler, flush_state, log_recovery, + applied_term, logger, ); ( @@ -114,6 +116,7 @@ impl ApplyFsm { ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, ApplyTask::Snapshot(snap_task) => self.apply.schedule_gen_snapshot(snap_task), ApplyTask::UnsafeWrite(raw_write) => self.apply.apply_unsafe_write(raw_write), + ApplyTask::ManualFlush => self.apply.on_manual_flush(), } // TODO: yield after some time. diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 49f1efcb760..8b05435246b 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -43,7 +43,11 @@ impl PeerFsm { storage: Storage, ) -> Result> { let peer = Peer::new(cfg, tablet_registry, snap_mgr, storage)?; - info!(peer.logger, "create peer"); + info!(peer.logger, "create peer"; + "raft_state" => ?peer.storage().raft_state(), + "apply_state" => ?peer.storage().apply_state(), + "region_state" => ?peer.storage().region_state() + ); let (tx, rx) = mpsc::loose_bounded(cfg.notify_capacity); let fsm = Box::new(PeerFsm { peer, @@ -187,20 +191,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } fn on_start(&mut self) { - self.schedule_tick(PeerTick::Raft); + if !self.fsm.peer.maybe_pause_for_recovery() { + self.schedule_tick(PeerTick::Raft); + } self.schedule_tick(PeerTick::SplitRegionCheck); self.schedule_tick(PeerTick::PdHeartbeat); self.schedule_tick(PeerTick::CompactLog); if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } - // Unlike v1, it's a must to set ready when there are pending entries. Otherwise - // it may block for ever when there is unapplied conf change. - let entry_storage = self.fsm.peer.storage().entry_storage(); - if entry_storage.commit_index() > entry_storage.applied_index() - // Speed up setup if there is only one peer. - || self.fsm.peer.is_leader() - { + // Speed up setup if there is only one peer. + if self.fsm.peer.is_leader() { self.fsm.peer.set_has_ready(); } } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index d1d10d366bf..c36c7353871 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -21,7 +21,7 @@ use raftstore::{ Result, }; use slog::{debug, error, info}; -use tikv_util::{box_err, Either}; +use tikv_util::box_err; use crate::{ batch::StoreContext, @@ -255,7 +255,15 @@ impl Peer { .unwrap(); self.set_has_extra_write(); - self.maybe_compact_log_from_engine(store_ctx, Either::Right(old_truncated)); + // All logs < perssited_apply will be deleted, so should check with +1. + if old_truncated + 1 < self.storage().apply_trace().persisted_apply_index() { + self.compact_log_from_engine(store_ctx); + } + + let applied = *self.last_applying_index_mut(); + let total_cnt = applied - old_truncated; + let remain_cnt = applied - res.compact_index; + self.update_approximate_raft_log_size(|s| s * remain_cnt / total_cnt); } #[inline] @@ -278,7 +286,9 @@ impl Peer { } else { self.set_has_extra_write(); } - self.maybe_compact_log_from_engine(store_ctx, Either::Left(old_persisted)); + if old_persisted < self.entry_storage().truncated_index() + 1 { + self.compact_log_from_engine(store_ctx); + } if self.remove_tombstone_tablets_before(new_persisted) { let sched = store_ctx.schedulers.tablet_gc.clone(); task.persisted_cbs.push(Box::new(move || { @@ -288,19 +298,10 @@ impl Peer { } } - pub fn maybe_compact_log_from_engine( - &mut self, - store_ctx: &mut StoreContext, - old_index: Either, - ) { - let truncated = self.entry_storage().truncated_index(); - let persisted = self.storage().apply_trace().persisted_apply_index(); - match old_index { - Either::Left(old_persisted) if old_persisted >= truncated => return, - Either::Right(old_truncated) if old_truncated >= persisted => return, - _ => {} - } - let compact_index = std::cmp::min(truncated, persisted); + fn compact_log_from_engine(&mut self, store_ctx: &mut StoreContext) { + let truncated = self.entry_storage().truncated_index() + 1; + let persisted_applied = self.storage().apply_trace().persisted_apply_index(); + let compact_index = std::cmp::min(truncated, persisted_applied); // Raft Engine doesn't care about first index. if let Err(e) = store_ctx @@ -309,11 +310,12 @@ impl Peer { { error!(self.logger, "failed to compact raft logs"; "err" => ?e); } else { + // TODO: make this debug when stable. + info!(self.logger, "compact log"; + "index" => compact_index, + "apply_trace" => ?self.storage().apply_trace(), + "truncated" => ?self.entry_storage().apply_state()); self.set_has_extra_write(); - let applied = self.storage().apply_state().get_applied_index(); - let total_cnt = applied - self.storage().entry_storage().first_index() + 1; - let remain_cnt = applied - compact_index; - self.update_approximate_raft_log_size(|s| s * remain_cnt / total_cnt); } } } diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 5a6c91d3567..72b582d775d 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -232,7 +232,7 @@ impl Apply { ) -> Result<(AdminResponse, AdminCmdResult)> { let region = self.region_state().get_region(); let change_kind = ConfChangeKind::confchange_kind(changes.len()); - info!(self.logger, "exec ConfChangeV2"; "kind" => ?change_kind, "legacy" => legacy, "epoch" => ?region.get_region_epoch()); + info!(self.logger, "exec ConfChangeV2"; "kind" => ?change_kind, "legacy" => legacy, "epoch" => ?region.get_region_epoch(), "index" => index); let mut new_region = region.clone(); match change_kind { ConfChangeKind::LeaveJoint => self.apply_leave_joint(&mut new_region), diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 977e26e0675..4f2abb9c65e 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -110,9 +110,13 @@ impl Peer { } }; match &res { - Ok(index) => self - .proposal_control_mut() - .record_proposed_admin(cmd_type, *index), + Ok(index) => { + self.proposal_control_mut() + .record_proposed_admin(cmd_type, *index); + if self.proposal_control_mut().has_uncommitted_admin() { + self.raft_group_mut().skip_bcast_commit(false); + } + } Err(e) => { info!( self.logger, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index e1f4ae552f6..faf059b3871 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -65,7 +65,7 @@ use crate::{ Error, }; -pub const SPLIT_PREFIX: &str = "split_"; +pub const SPLIT_PREFIX: &str = "split"; #[derive(Debug)] pub struct SplitResult { @@ -171,6 +171,9 @@ impl Peer { pub fn update_split_flow_control(&mut self, metrics: &ApplyMetrics) { let control = self.split_flow_control_mut(); control.size_diff_hint += metrics.size_diff_hint; + if self.is_leader() { + self.add_pending_tick(PeerTick::SplitRegionCheck); + } } pub fn on_request_split( @@ -265,6 +268,7 @@ impl Apply { self.logger, "split region"; "region" => ?region, + "index" => log_index, "boundaries" => %KeysInfoFormatter(boundaries.iter()), ); @@ -449,6 +453,8 @@ impl Peer { // Now pd only uses ReportBatchSplit for history operation show, // so we send it independently here. self.report_batch_split_pd(store_ctx, res.regions.to_vec()); + // After split, the peer may need to update its metrics. + self.split_flow_control_mut().may_skip_split_check = false; self.add_pending_tick(PeerTick::SplitRegionCheck); } @@ -629,7 +635,7 @@ mod test { kv::TestTabletFactory, }; use engine_traits::{ - Peekable, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, DATA_CFS, + FlushState, Peekable, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, DATA_CFS, }; use kvproto::{ metapb::RegionEpoch, @@ -787,8 +793,9 @@ mod test { reporter, reg, read_scheduler, - Arc::default(), + Arc::new(FlushState::new(5)), None, + 5, logger.clone(), ); @@ -803,7 +810,7 @@ mod test { splits.mut_requests().clear(); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 0).unwrap_err(); + let err = apply.apply_batch_split(&req, 6).unwrap_err(); // Empty requests should be rejected. assert!(err.to_string().contains("missing split requests")); @@ -824,7 +831,7 @@ mod test { .mut_requests() .push(new_split_req(b"", 1, vec![11, 12, 13])); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 0).unwrap_err(); + let err = apply.apply_batch_split(&req, 7).unwrap_err(); // Empty key will not in any region exclusively. assert!(err.to_string().contains("missing split key"), "{:?}", err); @@ -836,7 +843,7 @@ mod test { .mut_requests() .push(new_split_req(b"k1", 1, vec![11, 12, 13])); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 0).unwrap_err(); + let err = apply.apply_batch_split(&req, 8).unwrap_err(); // keys should be in ascend order. assert!( err.to_string().contains("invalid split request"), @@ -852,7 +859,7 @@ mod test { .mut_requests() .push(new_split_req(b"k2", 1, vec![11, 12])); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 0).unwrap_err(); + let err = apply.apply_batch_split(&req, 9).unwrap_err(); // All requests should be checked. assert!(err.to_string().contains("id count"), "{:?}", err); diff --git a/components/raftstore-v2/src/operation/command/control.rs b/components/raftstore-v2/src/operation/command/control.rs index b330d0093fe..fd53090fd65 100644 --- a/components/raftstore-v2/src/operation/command/control.rs +++ b/components/raftstore-v2/src/operation/command/control.rs @@ -181,6 +181,11 @@ impl ProposalControl { } } + #[inline] + pub fn has_uncommitted_admin(&self) -> bool { + !self.proposed_admin_cmd.is_empty() && !self.proposed_admin_cmd.back().unwrap().committed + } + pub fn advance_apply(&mut self, index: u64, term: u64, region: &metapb::Region) { while !self.proposed_admin_cmd.is_empty() { let cmd = self.proposed_admin_cmd.front_mut().unwrap(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 0a58bb64016..a533ae9af87 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -38,13 +38,14 @@ use raftstore::{ }, Error, Result, }; +use slog::{info, warn}; use tikv_util::{box_err, time::monotonic_raw_now}; use crate::{ batch::StoreContext, fsm::{ApplyFsm, ApplyResReporter}, raft::{Apply, Peer}, - router::{ApplyRes, ApplyTask, CmdResChannel}, + router::{ApplyRes, ApplyTask, CmdResChannel, PeerTick}, }; mod admin; @@ -111,6 +112,7 @@ impl Peer { read_scheduler, self.flush_state().clone(), self.storage().apply_trace().log_recovery(), + self.entry_storage().applied_term(), logger, ); @@ -306,6 +308,22 @@ impl Peer { apply_res.applied_index, progress_to_be_updated, ); + if self.pause_for_recovery() + && self.storage().entry_storage().commit_index() <= apply_res.applied_index + { + info!(self.logger, "recovery completed"; "apply_index" => apply_res.applied_index); + self.set_pause_for_recovery(false); + // Flush to avoid recover again and again. + if let Some(scheduler) = self.apply_scheduler() { + scheduler.send(ApplyTask::ManualFlush); + } + self.add_pending_tick(PeerTick::Raft); + } + if !self.pause_for_recovery() && self.storage_mut().apply_trace_mut().should_flush() { + if let Some(scheduler) = self.apply_scheduler() { + scheduler.send(ApplyTask::ManualFlush); + } + } } } @@ -347,6 +365,13 @@ impl Apply { } } + pub fn on_manual_flush(&mut self) { + self.flush(); + if let Err(e) = self.tablet().flush_cfs(&[], false) { + warn!(self.logger, "failed to flush: {:?}", e); + } + } + #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); @@ -512,6 +537,7 @@ impl Apply { #[inline] pub fn flush(&mut self) { + // TODO: maybe we should check whether there is anything to flush. let (index, term) = self.apply_progress(); let flush_state = self.flush_state().clone(); if let Some(wb) = &mut self.write_batch && !wb.is_empty() { diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index d6a83b7933b..1e9d1ef4221 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -40,7 +40,7 @@ use kvproto::{ use raftstore::store::{ ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use slog::Logger; +use slog::{trace, Logger}; use tikv_util::{box_err, worker::Scheduler}; use crate::{ @@ -130,7 +130,7 @@ impl engine_traits::StateStorage for StateStorage< /// Mapping from data cf to an u64 index. pub type DataTrace = [u64; DATA_CFS_LEN]; -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy, Default, Debug)] struct Progress { flushed: u64, /// The index of last entry that has modification to the CF. @@ -154,7 +154,7 @@ pub fn cf_offset(cf: &str) -> usize { /// interact with other peers will be traced. /// - support query the flushed progress without actually scanning raft engine, /// which is useful for cleaning up stale flush records. -#[derive(Default)] +#[derive(Default, Debug)] pub struct ApplyTrace { /// The modified indexes and flushed index of each data CF. data_cfs: Box<[Progress; DATA_CFS_LEN]>, @@ -168,6 +168,10 @@ pub struct ApplyTrace { admin: Progress, /// Index that is issued to be written. It may not be truely persisted. persisted_applied: u64, + /// Flush will be triggered explicitly when there are too many pending + /// writes. It marks the last index that is flushed to avoid too many + /// flushes. + last_flush_trigger: u64, /// `true` means the raft cf record should be persisted in next ready. try_persist: bool, } @@ -187,6 +191,7 @@ impl ApplyTrace { trace.admin.flushed = i; trace.admin.last_modified = i; trace.persisted_applied = i; + trace.last_flush_trigger = i; let applied_region_state = engine .get_region_state(region_id, trace.admin.flushed)? .unwrap(); @@ -218,7 +223,31 @@ impl ApplyTrace { } pub fn persisted_apply_index(&self) -> u64 { - self.admin.flushed + self.persisted_applied + } + + pub fn should_flush(&mut self) -> bool { + if self.admin.flushed < self.admin.last_modified { + // It's waiting for other peers, flush will not help. + return false; + } + let last_modified = self + .data_cfs + .iter() + .filter_map(|pr| { + if pr.last_modified != pr.flushed { + Some(pr.last_modified) + } else { + None + } + }) + .max(); + if let Some(m) = last_modified && m >= self.admin.flushed + 4096 && m >= self.last_flush_trigger + 4096 { + self.last_flush_trigger = m; + true + } else { + false + } } // All events before `mem_index` must be consumed before calling this function. @@ -456,6 +485,7 @@ impl Storage { impl Peer { pub fn on_data_flushed(&mut self, cf: &str, tablet_index: u64, index: u64) { + trace!(self.logger, "data flushed"; "cf" => cf, "tablet_index" => tablet_index, "index" => index, "trace" => ?self.storage().apply_trace()); if tablet_index < self.storage().tablet_index() { // Stale tablet. return; @@ -467,6 +497,7 @@ impl Peer { } pub fn on_data_modified(&mut self, modification: DataTrace) { + trace!(self.logger, "on data modified"; "modification" => ?modification, "trace" => ?self.storage().apply_trace()); let apply_index = self.storage().entry_storage().applied_index(); let apply_trace = self.storage_mut().apply_trace_mut(); for (cf, index) in DATA_CFS.iter().zip(modification) { @@ -556,22 +587,22 @@ mod tests { #[test] fn test_apply_trace() { let mut trace = ApplyTrace::default(); - assert_eq!(0, trace.persisted_apply_index()); + assert_eq!(0, trace.admin.flushed); // If there is no modifications, index should be advanced anyway. trace.maybe_advance_admin_flushed(2); - assert_eq!(2, trace.persisted_apply_index()); + assert_eq!(2, trace.admin.flushed); for cf in DATA_CFS { trace.on_modify(cf, 3); } trace.maybe_advance_admin_flushed(3); // Modification is not flushed. - assert_eq!(2, trace.persisted_apply_index()); + assert_eq!(2, trace.admin.flushed); for cf in DATA_CFS { trace.on_flush(cf, 3); } trace.maybe_advance_admin_flushed(3); // No admin is recorded, index should be advanced. - assert_eq!(3, trace.persisted_apply_index()); + assert_eq!(3, trace.admin.flushed); trace.on_admin_modify(4); for cf in DATA_CFS { trace.on_flush(cf, 4); @@ -581,25 +612,25 @@ mod tests { } trace.maybe_advance_admin_flushed(4); // Unflushed admin modification should hold index. - assert_eq!(3, trace.persisted_apply_index()); + assert_eq!(3, trace.admin.flushed); trace.on_admin_flush(4); trace.maybe_advance_admin_flushed(4); // Admin is flushed, index should be advanced. - assert_eq!(4, trace.persisted_apply_index()); + assert_eq!(4, trace.admin.flushed); for cf in DATA_CFS { trace.on_flush(cf, 5); } trace.maybe_advance_admin_flushed(4); // Though all data CFs are flushed, but index should not be // advanced as we don't know whether there is admin modification. - assert_eq!(4, trace.persisted_apply_index()); + assert_eq!(4, trace.admin.flushed); for cf in DATA_CFS { trace.on_modify(cf, 5); } trace.maybe_advance_admin_flushed(5); // Because modify is recorded, so we know there should be no admin // modification and index can be advanced. - assert_eq!(5, trace.persisted_apply_index()); + assert_eq!(5, trace.admin.flushed); } #[test] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 8b125844d0e..8a0e0770b1f 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -32,7 +32,7 @@ use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{needs_evict_entry_cache, util, FetchedLogs, ReadProgress, Transport, WriteTask}, }; -use slog::{debug, error, trace, warn}; +use slog::{debug, error, info, trace, warn}; use tikv_util::{ store::find_peer, time::{duration_to_sec, monotonic_raw_now}, @@ -50,6 +50,8 @@ use crate::{ router::{ApplyTask, PeerMsg, PeerTick}, }; +const PAUSE_FOR_RECOVERY_GAP: u64 = 128; + impl Store { pub fn on_store_unreachable( &mut self, @@ -76,9 +78,33 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } impl Peer { + pub fn maybe_pause_for_recovery(&mut self) -> bool { + let entry_storage = self.storage().entry_storage(); + let committed_index = entry_storage.commit_index(); + let applied_index = entry_storage.applied_index(); + if committed_index > applied_index { + // Unlike v1, it's a must to set ready when there are pending entries. Otherwise + // it may block for ever when there is unapplied conf change. + self.set_has_ready(); + } + if committed_index > applied_index + PAUSE_FOR_RECOVERY_GAP { + // If there are too many the missing logs, we need to skip ticking otherwise + // it may block the raftstore thread for a long time in reading logs for + // election timeout. + info!(self.logger, "pause for recovery"; "applied" => applied_index, "committed" => committed_index); + self.set_pause_for_recovery(true); + true + } else { + false + } + } + #[inline] fn tick(&mut self) -> bool { - self.raft_group_mut().tick() + // When it's handling snapshot, it's pointless to tick as all the side + // affects have to wait till snapshot is applied. On the other hand, ticking + // will bring other corner cases like elections. + !self.is_handling_snapshot() && self.raft_group_mut().tick() } pub fn on_peer_unreachable(&mut self, to_peer_id: u64) { @@ -107,6 +133,10 @@ impl Peer { "from_peer_id" => msg.get_from_peer().get_id(), "to_peer_id" => msg.get_to_peer().get_id(), ); + if self.pause_for_recovery() && msg.get_message().get_msg_type() == MessageType::MsgAppend { + ctx.raft_metrics.message_dropped.recovery.inc(); + return; + } if !self.serving() { return; } @@ -273,31 +303,44 @@ impl Peer { ) { // TODO: skip handling committed entries if a snapshot is being applied // asynchronously. - if self.is_leader() { + let mut update_lease = self.is_leader(); + if update_lease { for entry in committed_entries.iter().rev() { self.update_approximate_raft_log_size(|s| s + entry.get_data().len() as u64); - let propose_time = self - .proposals() - .find_propose_time(entry.get_term(), entry.get_index()); - if let Some(propose_time) = propose_time { - // We must renew current_time because this value may be created a long time ago. - // If we do not renew it, this time may be smaller than propose_time of a - // command, which was proposed in another thread while this thread receives its - // AppendEntriesResponse and is ready to calculate its commit-log-duration. - ctx.current_time.replace(monotonic_raw_now()); - ctx.raft_metrics.commit_log.observe(duration_to_sec( - (ctx.current_time.unwrap() - propose_time).to_std().unwrap(), - )); - self.maybe_renew_leader_lease(propose_time, &ctx.store_meta, None); - break; + if update_lease { + let propose_time = self + .proposals() + .find_propose_time(entry.get_term(), entry.get_index()); + if let Some(propose_time) = propose_time { + // We must renew current_time because this value may be created a long time + // ago. If we do not renew it, this time may be + // smaller than propose_time of a command, which was + // proposed in another thread while this thread receives its + // AppendEntriesResponse and is ready to calculate its commit-log-duration. + ctx.current_time.replace(monotonic_raw_now()); + ctx.raft_metrics.commit_log.observe(duration_to_sec( + (ctx.current_time.unwrap() - propose_time).to_std().unwrap(), + )); + self.maybe_renew_leader_lease(propose_time, &ctx.store_meta, None); + update_lease = false; + } } } } + let applying_index = committed_entries.last().unwrap().index; + let commit_to_current_term = committed_entries.last().unwrap().term == self.term(); + *self.last_applying_index_mut() = applying_index; if needs_evict_entry_cache(ctx.cfg.evict_cache_on_memory_ratio) { // Compact all cached entries instead of half evict. self.entry_storage_mut().evict_entry_cache(false); } self.schedule_apply_committed_entries(committed_entries); + if self.is_leader() + && commit_to_current_term + && !self.proposal_control().has_uncommitted_admin() + { + self.raft_group_mut().skip_bcast_commit(true); + } } /// Processing the ready of raft. A detail description of how it's handled @@ -525,6 +568,7 @@ impl Peer { self.region_heartbeat_pd(ctx); self.add_pending_tick(PeerTick::CompactLog); + self.add_pending_tick(PeerTick::SplitRegionCheck); } StateRole::Follower => { self.leader_lease_mut().expire(); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 41dc0d39429..c040bdcbb3b 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -37,7 +37,7 @@ use raftstore::{ coprocessor::RegionChangeEvent, store::{ metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask, TabletSnapKey, - TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, + TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, }; use slog::{error, info, warn}; @@ -197,19 +197,24 @@ impl Peer { StateRole::Follower, ); let persisted_index = self.persisted_index(); - let first_index = self.storage().entry_storage().first_index(); - if first_index == persisted_index + 1 { + *self.last_applying_index_mut() = persisted_index; + let snapshot_index = self.entry_storage().truncated_index(); + assert!(snapshot_index >= RAFT_INIT_LOG_INDEX, "{:?}", self.logger); + // If leader sends a message append to the follower while it's applying + // snapshot (via split init for example), the persisted_index may be larger + // than the first index. But as long as first index is not larger, the + // latest snapshot should be applied. + if snapshot_index <= persisted_index { let region_id = self.region_id(); - self.reset_flush_state(); + self.reset_flush_state(snapshot_index); let flush_state = self.flush_state().clone(); - let mut tablet_ctx = TabletContext::new(self.region(), Some(persisted_index)); + let mut tablet_ctx = TabletContext::new(self.region(), Some(snapshot_index)); // Use a new FlushState to avoid conflicts with the old one. tablet_ctx.flush_state = Some(flush_state); ctx.tablet_registry.load(tablet_ctx, false).unwrap(); - self.record_tablet_as_tombstone_and_refresh(persisted_index, ctx); - self.schedule_apply_fsm(ctx); + self.record_tablet_as_tombstone_and_refresh(snapshot_index, ctx); self.storage_mut().on_applied_snapshot(); - self.raft_group_mut().advance_apply_to(persisted_index); + self.raft_group_mut().advance_apply_to(snapshot_index); { let mut meta = ctx.store_meta.lock().unwrap(); meta.set_region(self.region(), true, &self.logger); @@ -218,18 +223,18 @@ impl Peer { meta.region_read_progress .insert(region_id, self.read_progress().clone()); } - self.read_progress_mut() - .update_applied_core(persisted_index); + self.read_progress_mut().update_applied_core(snapshot_index); let split = self.storage_mut().split_init_mut().take(); if split.as_ref().map_or(true, |s| { - !s.scheduled || persisted_index != RAFT_INIT_LOG_INDEX + !s.scheduled || snapshot_index != RAFT_INIT_LOG_INDEX }) { info!(self.logger, "apply tablet snapshot completely"); } if let Some(init) = split { - info!(self.logger, "init with snapshot finished"); + info!(self.logger, "init split with snapshot finished"); self.post_split_init(ctx, init); } + self.schedule_apply_fsm(ctx); } } } @@ -343,6 +348,15 @@ impl Storage { /// Validate the snapshot. Returns true if it's valid. fn validate_snap(&self, snap: &Snapshot, request_index: u64) -> bool { let idx = snap.get_metadata().get_index(); + if idx < RAFT_INIT_LOG_INDEX || snap.get_metadata().get_term() < RAFT_INIT_LOG_TERM { + info!( + self.logger(), + "corrupted snapshot detected, generate again"; + "snap" => ?snap, + "request_index" => request_index, + ); + return false; + } // TODO(nolouch): check tuncated index if idx < request_index { // stale snapshot, should generate again. @@ -489,8 +503,21 @@ impl Storage { )); } + let old_last_index = self.entry_storage().last_index(); + if self.entry_storage().first_index() <= old_last_index { + // All states are rewritten in the following blocks. Stale states will be + // cleaned up by compact worker. + task.cut_logs = Some((0, old_last_index + 1)); + self.entry_storage_mut().clear(); + } + let last_index = snap.get_metadata().get_index(); let last_term = snap.get_metadata().get_term(); + assert!( + last_index >= RAFT_INIT_LOG_INDEX && last_term >= RAFT_INIT_LOG_TERM, + "{:?}", + self.logger().list() + ); let region_state = self.region_state_mut(); region_state.set_state(PeerState::Normal); region_state.set_region(region); diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 666f3adb699..6818d7ae0d9 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -34,6 +34,9 @@ pub struct Apply { /// command. tombstone: bool, applied_term: u64, + // Apply progress is set after every command in case there is a flush. But it's + // wrong to update flush_state immediately as a manual flush from other thread + // can fetch the wrong apply index from flush_state. applied_index: u64, /// The largest index that have modified each column family. modifications: DataTrace, @@ -64,11 +67,15 @@ impl Apply { read_scheduler: Scheduler>, flush_state: Arc, log_recovery: Option>, + applied_term: u64, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry .get(region_state.get_region().get_id()) .unwrap(); + assert_ne!(applied_term, 0, "{:?}", logger.list()); + let applied_index = flush_state.applied_index(); + assert_ne!(applied_index, 0, "{:?}", logger.list()); Apply { peer, tablet: remote_tablet.latest().unwrap().clone(), @@ -76,7 +83,7 @@ impl Apply { write_batch: None, callbacks: vec![], tombstone: false, - applied_term: 0, + applied_term, applied_index: flush_state.applied_index(), modifications: [0; DATA_CFS_LEN], admin_cmd_result: vec![], @@ -125,9 +132,6 @@ impl Apply { let log_recovery = self.log_recovery.as_ref().unwrap(); if log_recovery.iter().all(|v| index >= *v) { self.log_recovery.take(); - // Now all logs are recovered, flush them to avoid recover again - // and again. - let _ = self.tablet.flush_cfs(&[], false); } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 668b0ebf41d..f3734b6821d 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -62,6 +62,7 @@ pub struct Peer { /// For raft log compaction. skip_compact_log_ticks: usize, approximate_raft_log_size: u64, + last_applying_index: u64, /// Encoder for batching proposals and encoding them in a more efficient way /// than protobuf. @@ -73,6 +74,7 @@ pub struct Peer { has_ready: bool, /// Sometimes there is no ready at all, but we need to trigger async write. has_extra_write: bool, + pause_for_recovery: bool, /// Writer for persisting side effects asynchronously. pub(crate) async_writer: AsyncWriter, @@ -133,7 +135,7 @@ impl Peer { let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); - let flush_state: Arc = Arc::default(); + let flush_state: Arc = Arc::new(FlushState::new(applied_index)); // We can't create tablet if tablet index is 0. It can introduce race when gc // old tablet and create new peer. We also can't get the correct range of the // region, which is required for kv data gc. @@ -155,12 +157,14 @@ impl Peer { peer_heartbeats: HashMap::default(), skip_compact_log_ticks: 0, approximate_raft_log_size: 0, + last_applying_index: raft_group.store().apply_state().get_applied_index(), raw_write_encoder: None, proposals: ProposalQueue::new(region_id, raft_group.raft.id), async_writer: AsyncWriter::new(region_id, peer_id), apply_scheduler: None, has_ready: false, has_extra_write: false, + pause_for_recovery: false, destroy_progress: DestroyProgress::None, raft_group, logger, @@ -366,14 +370,17 @@ impl Peer { /// Returns if there's any tombstone being removed. #[inline] pub fn remove_tombstone_tablets_before(&mut self, persisted: u64) -> bool { - let mut removed = 0; - while let Some(i) = self.pending_tombstone_tablets.first() - && *i <= persisted - { - removed += 1; + let removed = self + .pending_tombstone_tablets + .iter() + .take_while(|i| **i <= persisted) + .count(); + if removed > 0 { + self.pending_tombstone_tablets.drain(..removed); + true + } else { + false } - self.pending_tombstone_tablets.drain(..removed); - removed > 0 } #[inline] @@ -431,6 +438,16 @@ impl Peer { mem::take(&mut self.has_extra_write) } + #[inline] + pub fn set_pause_for_recovery(&mut self, pause: bool) { + self.pause_for_recovery = pause; + } + + #[inline] + pub fn pause_for_recovery(&self) -> bool { + self.pause_for_recovery + } + #[inline] pub fn insert_peer_cache(&mut self, peer: metapb::Peer) { for p in self.raft_group.store().region().get_peers() { @@ -551,6 +568,10 @@ impl Peer { self.approximate_raft_log_size = f(self.approximate_raft_log_size); } + pub fn last_applying_index_mut(&mut self) -> &mut u64 { + &mut self.last_applying_index + } + #[inline] pub fn state_role(&self) -> StateRole { self.raft_group.raft.state @@ -654,8 +675,7 @@ impl Peer { /// See the comments of `check_snap_status` for more details. #[inline] pub fn is_handling_snapshot(&self) -> bool { - // todo: This method may be unnecessary now? - false + self.persisted_index() < self.entry_storage().truncated_index() } /// Returns `true` if the raft group has replicated a snapshot but not @@ -774,8 +794,8 @@ impl Peer { &self.flush_state } - pub fn reset_flush_state(&mut self) { - self.flush_state = Arc::default(); + pub fn reset_flush_state(&mut self, index: u64) { + self.flush_state = Arc::new(FlushState::new(index)); } // Note: Call `set_has_extra_write` after adding new state changes. diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 636970c0ad1..51bd41ba253 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -298,7 +298,9 @@ mod tests { ctor::{CfOptions, DbOptions}, kv::TestTabletFactory, }; - use engine_traits::{RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS}; + use engine_traits::{ + FlushState, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS, + }; use kvproto::{ metapb::{Peer, Region}, raft_serverpb::PeerState, @@ -379,25 +381,25 @@ mod tests { .unwrap() .unwrap(); - let snapshot = new_empty_snapshot(region.clone(), 10, 1, false); + let snapshot = new_empty_snapshot(region.clone(), 10, 9, false); let mut task = WriteTask::new(region.get_id(), 5, 0); s.apply_snapshot(&snapshot, &mut task, mgr, reg).unwrap(); // It can be set before load tablet. assert_eq!(PeerState::Normal, s.region_state().get_state()); assert_eq!(10, s.entry_storage().truncated_index()); - assert_eq!(1, s.entry_storage().truncated_term()); - assert_eq!(1, s.entry_storage().last_term()); + assert_eq!(9, s.entry_storage().truncated_term()); + assert_eq!(9, s.entry_storage().last_term()); assert_eq!(10, s.entry_storage().raft_state().last_index); // This index can't be set before load tablet. assert_ne!(10, s.entry_storage().applied_index()); - assert_ne!(1, s.entry_storage().applied_term()); + assert_ne!(9, s.entry_storage().applied_term()); assert_eq!(10, s.region_state().get_tablet_index()); assert!(!task.persisted_cbs.is_empty()); s.on_applied_snapshot(); assert_eq!(10, s.entry_storage().applied_index()); - assert_eq!(1, s.entry_storage().applied_term()); + assert_eq!(9, s.entry_storage().applied_term()); assert_eq!(10, s.region_state().get_tablet_index()); } @@ -440,8 +442,9 @@ mod tests { router, reg, sched, - Arc::default(), + Arc::new(FlushState::new(5)), None, + 5, logger, ); @@ -460,8 +463,8 @@ mod tests { SnapState::Generated(ref snap) => *snap.clone(), ref s => panic!("unexpected state: {:?}", s), }; - assert_eq!(snap.get_metadata().get_index(), 0); - assert_eq!(snap.get_metadata().get_term(), 0); + assert_eq!(snap.get_metadata().get_index(), 5); + assert_eq!(snap.get_metadata().get_term(), 5); assert_eq!(snap.get_data().is_empty(), false); let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); let checkpointer_path = mgr.tablet_gen_path(&snap_key); diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 05e1baea1cf..092e7e21b5f 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -10,6 +10,7 @@ pub enum ApplyTask { Snapshot(GenSnapTask), /// Writes that doesn't care consistency. UnsafeWrite(Box<[u8]>), + ManualFlush, } #[derive(Debug, Default)] diff --git a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs index d031d6b1eba..18d81ef16aa 100644 --- a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs +++ b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{assert_matches::assert_matches, time::Duration}; use engine_traits::{Peekable, CF_DEFAULT}; use futures::executor::block_on; @@ -9,35 +9,32 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, TransferLeaderRequest}, }; use raft::prelude::ConfChangeType; -use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; +use raftstore_v2::{ + router::{PeerMsg, PeerTick}, + SimpleWriteEncoder, +}; use tikv_util::store::new_peer; use crate::cluster::Cluster; fn put_data( region_id: u64, - cluster: &Cluster, + cluster: &mut Cluster, node_off: usize, node_off_for_verify: usize, key: &[u8], ) { - let router = &cluster.routers[node_off]; + let mut router = &mut cluster.routers[node_off]; router.wait_applied_to_current_term(region_id, Duration::from_secs(3)); // router.wait_applied_to_current_term(2, Duration::from_secs(3)); - let tablet_registry = cluster.node(node_off).tablet_registry(); - let tablet = tablet_registry - .get(region_id) - .unwrap() - .latest() - .unwrap() - .clone(); - assert!(tablet.get_value(key).unwrap().is_none()); + let snap = router.stale_snapshot(region_id); + assert_matches!(snap.get_value(key), Ok(None)); let header = Box::new(router.new_request_for(region_id).take_header()); let mut put = SimpleWriteEncoder::with_capacity(64); - put.put(CF_DEFAULT, &key[1..], b"value"); + put.put(CF_DEFAULT, key, b"value"); let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); router.send(region_id, msg).unwrap(); std::thread::sleep(std::time::Duration::from_millis(10)); @@ -53,17 +50,29 @@ fn put_data( let resp = block_on(sub.result()).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); - assert_eq!(tablet.get_value(key).unwrap().unwrap(), b"value"); - - // Verify the data is ready in the other node - let tablet_registry = cluster.node(node_off_for_verify).tablet_registry(); - let tablet = tablet_registry - .get(region_id) - .unwrap() - .latest() - .unwrap() - .clone(); - assert_eq!(tablet.get_value(key).unwrap().unwrap(), b"value"); + router = &mut cluster.routers[node_off]; + let snap = router.stale_snapshot(region_id); + assert_eq!(snap.get_value(key).unwrap().unwrap(), b"value"); + + // Because of skip bcast commit, the data should not be applied yet. + router = &mut cluster.routers[node_off_for_verify]; + let snap = router.stale_snapshot(region_id); + assert_matches!(snap.get_value(key), Ok(None)); + // Trigger heartbeat explicitly to commit on follower. + router = &mut cluster.routers[node_off]; + for _ in 0..2 { + router + .send(region_id, PeerMsg::Tick(PeerTick::Raft)) + .unwrap(); + router + .send(region_id, PeerMsg::Tick(PeerTick::Raft)) + .unwrap(); + } + cluster.dispatch(region_id, vec![]); + std::thread::sleep(std::time::Duration::from_millis(100)); + router = &mut cluster.routers[node_off_for_verify]; + let snap = router.stale_snapshot(region_id); + assert_eq!(snap.get_value(key).unwrap().unwrap(), b"value"); } pub fn must_transfer_leader( @@ -97,7 +106,7 @@ pub fn must_transfer_leader( #[test] fn test_transfer_leader() { - let cluster = Cluster::with_node_count(3, None); + let mut cluster = Cluster::with_node_count(3, None); let region_id = 2; let router0 = &cluster.routers[0]; @@ -137,13 +146,13 @@ fn test_transfer_leader() { cluster.dispatch(region_id, vec![]); // Ensure follower has latest entries before transfer leader. - put_data(region_id, &cluster, 0, 1, b"zkey1"); + put_data(region_id, &mut cluster, 0, 1, b"key1"); // Perform transfer leader must_transfer_leader(&cluster, region_id, 0, 1, peer1); // Before transfer back to peer0, put some data again. - put_data(region_id, &cluster, 1, 0, b"zkey2"); + put_data(region_id, &mut cluster, 1, 0, b"key2"); // Perform transfer leader let store_id = cluster.node(0).id(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index b0f44c30c0f..ce4f099610e 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -177,6 +177,7 @@ make_static_metric! { region_nonexistent, applying_snap, disk_full, + recovery, } pub label_enum ProposalType { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 05decd62815..939bc2a1078 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1998,7 +1998,12 @@ impl TabletSnapManager { { continue; } - for e in file_system::read_dir(path)? { + let entries = match file_system::read_dir(path) { + Ok(entries) => entries, + Err(e) if e.kind() == ErrorKind::NotFound => continue, + Err(e) => return Err(Error::from(e)), + }; + for e in entries { match e.and_then(|e| e.metadata()) { Ok(m) => total_size += m.len(), Err(e) if e.kind() == ErrorKind::NotFound => continue, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 5beddf60151..4d4e283ea7e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -61,6 +61,7 @@ use raftstore::{ }, RegionInfoAccessor, }; +use raftstore_v2::{router::RaftRouter, StateStorage}; use security::SecurityManager; use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, @@ -136,8 +137,7 @@ fn run_impl(config: TikvConfig) { tikv.init_encryption(); let fetcher = tikv.init_io_utility(); let listener = tikv.init_flow_receiver(); - let (raft_engine, engines_info) = tikv.init_raw_engines(listener); - tikv.init_engines(raft_engine); + let engines_info = tikv.init_engines(listener); let server_config = tikv.init_servers::(); tikv.register_services(); tikv.init_metrics_flusher(fetcher, engines_info); @@ -201,6 +201,7 @@ struct TikvServer { pd_client: Arc, flow_info_sender: Option>, flow_info_receiver: Option>, + router: Option>, node: Option>, resolver: Option, store_path: PathBuf, @@ -310,6 +311,7 @@ where cfg_controller: Some(cfg_controller), security_mgr, pd_client, + router: None, node: None, resolver: None, store_path, @@ -567,36 +569,6 @@ where engine_rocks::FlowListener::new(tx) } - fn init_engines(&mut self, raft_engine: ER) { - let tablet_registry = self.tablet_registry.clone().unwrap(); - let mut node = NodeV2::new( - &self.config.server, - self.pd_client.clone(), - None, - tablet_registry, - ); - node.try_bootstrap_store(&self.config.raft_store, &raft_engine) - .unwrap_or_else(|e| fatal!("failed to bootstrap store: {:?}", e)); - assert_ne!(node.id(), 0); - - let router = node.router(); - let mut coprocessor_host: CoprocessorHost = CoprocessorHost::new( - router.store_router().clone(), - self.config.coprocessor.clone(), - ); - let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); - - let engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); - - self.engines = Some(TikvEngines { - raft_engine, - engine, - }); - self.node = Some(node); - self.coprocessor_host = Some(coprocessor_host); - self.region_info_accessor = Some(region_info_accessor); - } - fn init_gc_worker(&mut self) -> GcWorker> { let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( @@ -774,7 +746,7 @@ where }; let check_leader_runner = CheckLeaderRunner::new( - self.node.as_ref().unwrap().router().store_meta().clone(), + self.router.as_ref().unwrap().store_meta().clone(), self.coprocessor_host.clone().unwrap(), ); let check_leader_scheduler = self @@ -855,6 +827,8 @@ where .unwrap() .start( engines.raft_engine.clone(), + self.tablet_registry.clone().unwrap(), + self.router.as_ref().unwrap(), server.transport(), snap_mgr, self.concurrency_manager.clone(), @@ -1392,10 +1366,10 @@ impl ConfiguredRaftEngine for RaftLogEngine { } impl TikvServer { - fn init_raw_engines( + fn init_engines( &mut self, flow_listener: engine_rocks::FlowListener, - ) -> (CER, Arc) { + ) -> Arc { let block_cache = self.config.storage.block_cache.build_shared_cache(); let env = self .config @@ -1415,6 +1389,19 @@ impl TikvServer { let builder = KvEngineFactoryBuilder::new(env, &self.config, block_cache) .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); + + let mut node = NodeV2::new(&self.config.server, self.pd_client.clone(), None); + node.try_bootstrap_store(&self.config.raft_store, &raft_engine) + .unwrap_or_else(|e| fatal!("failed to bootstrap store: {:?}", e)); + assert_ne!(node.id(), 0); + + let router = node.router().clone(); + + // Create kv engine. + let builder = builder.state_storage(Arc::new(StateStorage::new( + raft_engine.clone(), + router.clone(), + ))); let factory = Box::new(builder.build()); self.kv_statistics = Some(factory.rocks_statistics()); let registry = TabletRegistry::new(factory, self.store_path.join("tablets")) @@ -1428,12 +1415,30 @@ impl TikvServer { raft_engine.register_config(cfg_controller); let engines_info = Arc::new(EnginesResourceInfo::new( - registry, + registry.clone(), raft_engine.as_rocks_engine().cloned(), 180, // max_samples_to_preserve )); - (raft_engine, engines_info) + let router = RaftRouter::new(node.id(), registry, router); + let mut coprocessor_host: CoprocessorHost = CoprocessorHost::new( + router.store_router().clone(), + self.config.coprocessor.clone(), + ); + let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); + + let engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); + + self.engines = Some(TikvEngines { + raft_engine, + engine, + }); + self.router = Some(router); + self.node = Some(node); + self.coprocessor_host = Some(coprocessor_host); + self.region_info_accessor = Some(region_info_accessor); + + engines_info } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 6ed8da3f111..c78ec02182f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3122,6 +3122,9 @@ impl TikvConfig { if self.storage.engine == EngineType::RaftKv2 { self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); + if !self.raft_engine.enable { + panic!("raft-kv2 only supports raft log engine."); + } } self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index ed6f16e8bec..b876951894c 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -11,7 +11,7 @@ use raftstore::{ coprocessor::CoprocessorHost, store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}, }; -use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreSystem}; +use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreRouter, StoreSystem}; use slog::{info, o, Logger}; use tikv_util::{ config::VersionTrack, @@ -24,11 +24,10 @@ use crate::server::{node::init_store, Result}; pub struct NodeV2 { cluster_id: u64, store: metapb::Store, - system: Option<(RaftRouter, StoreSystem)>, + system: Option<(StoreRouter, StoreSystem)>, has_started: bool, pd_client: Arc, - registry: TabletRegistry, logger: Logger, } @@ -43,7 +42,6 @@ where cfg: &crate::server::Config, pd_client: Arc, store: Option, - registry: TabletRegistry, ) -> NodeV2 { let store = init_store(store, cfg); @@ -53,7 +51,6 @@ where pd_client, system: None, has_started: false, - registry, logger: slog_global::borrow_global().new(o!()), } } @@ -71,16 +68,14 @@ where ) .bootstrap_store()?; self.store.set_id(store_id); + let (router, system) = raftstore_v2::create_store_batch_system(cfg, store_id, self.logger.clone()); - self.system = Some(( - RaftRouter::new(store_id, self.registry.clone(), router), - system, - )); + self.system = Some((router, system)); Ok(()) } - pub fn router(&self) -> &RaftRouter { + pub fn router(&self) -> &StoreRouter { &self.system.as_ref().unwrap().0 } @@ -90,6 +85,8 @@ where pub fn start( &mut self, raft_engine: ER, + registry: TabletRegistry, + router: &RaftRouter, trans: T, snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, @@ -112,15 +109,10 @@ where ) .bootstrap_first_region(&self.store, store_id)? { - let path = self - .registry - .tablet_path(region.get_id(), RAFT_INIT_LOG_INDEX); + let path = registry.tablet_path(region.get_id(), RAFT_INIT_LOG_INDEX); let ctx = TabletContext::new(®ion, Some(RAFT_INIT_LOG_INDEX)); // TODO: make follow line can recover from abort. - self.registry - .tablet_factory() - .open_tablet(ctx, &path) - .unwrap(); + registry.tablet_factory().open_tablet(ctx, &path).unwrap(); } // Put store only if the cluster is bootstrapped. @@ -130,6 +122,8 @@ where self.start_store( raft_engine, + registry, + router, trans, snap_mgr, concurrency_manager, @@ -187,6 +181,8 @@ where fn start_store( &mut self, raft_engine: ER, + registry: TabletRegistry, + router: &RaftRouter, trans: T, snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, @@ -207,13 +203,13 @@ where } self.has_started = true; - let (router, system) = self.system.as_mut().unwrap(); + let system = &mut self.system.as_mut().unwrap().1; system.start( store_id, store_cfg, raft_engine, - self.registry.clone(), + registry, trans, self.pd_client.clone(), router.store_router(), From 64293cb434c42c30fc37daeaaeae5c963aea26ea Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 30 Dec 2022 17:02:17 +0800 Subject: [PATCH 0439/1149] add commit/apply duration for raft store (#13946) ref tikv/tikv#12842 Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore-v2/src/batch/store.rs | 1 + components/raftstore-v2/src/operation/command/mod.rs | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 800dbc98f91..72f05801a0e 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -153,6 +153,7 @@ impl StorePoller { fn flush_events(&mut self) { self.schedule_ticks(); + self.poll_ctx.raft_metrics.maybe_flush(); } fn schedule_ticks(&mut self) { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index a533ae9af87..4831c4abf9f 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -33,13 +33,17 @@ use raftstore::{ Proposal, }, local_metrics::RaftMetrics, + metrics::APPLY_TASK_WAIT_TIME_HISTOGRAM, msg::ErrorCallback, util, WriteCallback, }, Error, Result, }; use slog::{info, warn}; -use tikv_util::{box_err, time::monotonic_raw_now}; +use tikv_util::{ + box_err, + time::{duration_to_sec, monotonic_raw_now, Instant}, +}; use crate::{ batch::StoreContext, @@ -81,6 +85,7 @@ pub struct CommittedEntries { /// Entries need to be applied. Note some entries may not be included for /// flow control. entry_and_proposals: Vec<(Entry, Vec)>, + committed_time: Instant, } fn new_response(header: &RaftRequestHeader) -> RaftCmdResponse { @@ -246,6 +251,7 @@ impl Peer { // memtables in kv engine is flushed. let apply = CommittedEntries { entry_and_proposals, + committed_time: Instant::now(), }; assert!( self.apply_scheduler().is_some(), @@ -375,6 +381,8 @@ impl Apply { #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); + APPLY_TASK_WAIT_TIME_HISTOGRAM + .observe(duration_to_sec(ce.committed_time.saturating_elapsed())); for (e, ch) in ce.entry_and_proposals { if self.tombstone() { apply::notify_req_region_removed(self.region_state().get_region().get_id(), ch); From a6afe78c43e293addd18251dee209d630322dd9e Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Tue, 3 Jan 2023 11:02:19 +0800 Subject: [PATCH 0440/1149] extend evict_entry_cache for restart (#13998) close tikv/tikv#13997 Support to use evict_entry_cache when restart node. Signed-off-by: tabokie Signed-off-by: hongyunyan <649330952@qq.com> Signed-off-by: Xinye Tao Signed-off-by: Jay Lee Signed-off-by: Wenbo Zhang Signed-off-by: Zwb Co-authored-by: Xinye Tao Co-authored-by: Jay Co-authored-by: Zwb Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/entry_storage.rs | 4 ++++ components/raftstore/src/store/peer.rs | 3 +++ 2 files changed, 7 insertions(+) diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index c6278c890f7..4d6372dd582 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1227,6 +1227,10 @@ impl EntryStorage { let idx = cache.cache[drain_to].index; let mem_size_change = cache.compact_to(idx + 1); RAFT_ENTRIES_EVICT_BYTES.inc_by(mem_size_change); + } else if !half { + let cache = &mut self.cache; + let mem_size_change = cache.compact_to(u64::MAX); + RAFT_ENTRIES_EVICT_BYTES.inc_by(mem_size_change); } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 7752a0a1b0e..9384a4940c7 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1188,6 +1188,9 @@ where peer.raft_group.campaign()?; } + let persisted_index = peer.raft_group.raft.raft_log.persisted; + peer.mut_store().update_cache_persisted(persisted_index); + Ok(peer) } From 5de5fd24da76d35060fab0ac6e85b903a7b32af2 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 3 Jan 2023 15:36:19 +0800 Subject: [PATCH 0441/1149] raft-engine: remove confusing API cut logs (#14010) ref tikv/tikv#12842 The API is supposed to be used with `append` but nowhere can we find the clue. This PR merges `cut_logs` and `append` to reduce confusion and mistakes. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_panic/src/raft_engine.rs | 11 ++++---- components/engine_rocks/src/raft_engine.rs | 21 +++++++++------ components/engine_traits/src/raft_engine.rs | 20 +++++++++----- components/raft_log_engine/src/engine.rs | 13 ++++----- components/raftstore-v2/src/operation/life.rs | 16 +++-------- .../src/operation/ready/snapshot.rs | 1 - .../raftstore/src/store/async_io/write.rs | 27 +++++++++++++------ .../src/store/async_io/write_tests.rs | 20 +++++--------- .../raftstore/src/store/entry_storage.rs | 3 +-- .../raftstore/src/store/peer_storage.rs | 12 ++++----- components/raftstore/src/store/snap.rs | 2 +- .../raftstore/src/store/worker/raftlog_gc.rs | 2 +- components/server/src/raft_engine_switch.rs | 8 +++--- tests/integrations/server/kv_service.rs | 2 +- 14 files changed, 83 insertions(+), 75 deletions(-) diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index c3de53b4932..854b75fe30d 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -167,11 +167,12 @@ impl RaftEngine for PanicEngine { } impl RaftLogBatch for PanicWriteBatch { - fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { - panic!() - } - - fn cut_logs(&mut self, raft_group_id: u64, from: u64, to: u64) { + fn append( + &mut self, + raft_group_id: u64, + overwrite_to: Option, + entries: Vec, + ) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index d5331a2ce29..d566ac3821b 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -361,7 +361,19 @@ impl RaftEngine for RocksEngine { } impl RaftLogBatch for RocksWriteBatchVec { - fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { + fn append( + &mut self, + raft_group_id: u64, + overwrite_to: Option, + entries: Vec, + ) -> Result<()> { + let overwrite_to = overwrite_to.unwrap_or(0); + if let Some(last) = entries.last() && last.get_index() + 1 < overwrite_to { + for index in last.get_index() + 1..overwrite_to { + let key = keys::raft_log_key(raft_group_id, index); + self.delete(&key).unwrap(); + } + } if let Some(max_size) = entries.iter().map(|e| e.compute_size()).max() { let ser_buf = Vec::with_capacity(max_size as usize); return self.append_impl(raft_group_id, &entries, ser_buf); @@ -369,13 +381,6 @@ impl RaftLogBatch for RocksWriteBatchVec { Ok(()) } - fn cut_logs(&mut self, raft_group_id: u64, from: u64, to: u64) { - for index in from..to { - let key = keys::raft_log_key(raft_group_id, index); - self.delete(&key).unwrap(); - } - } - fn put_raft_state(&mut self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { self.put_msg(&keys::raft_state_key(raft_group_id), state) } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 9e95ae95e14..68036eae1eb 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -66,7 +66,7 @@ pub trait RaftEngineDebug: RaftEngine + Sync + Send + 'static { Ok(true) }) .unwrap(); - batch.append(region_id, entries).unwrap(); + batch.append(region_id, None, entries).unwrap(); if let Some(state) = self.get_raft_state(region_id).unwrap() { batch.put_raft_state(region_id, &state).unwrap(); } @@ -150,11 +150,19 @@ pub trait RaftEngine: RaftEngineReadOnly + PerfContextExt + Clone + Sync + Send } pub trait RaftLogBatch: Send { - /// Note: `RaftLocalState` won't be updated in this call. - fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()>; - - /// Remove Raft logs in [`from`, `to`) which will be overwritten later. - fn cut_logs(&mut self, raft_group_id: u64, from: u64, to: u64); + /// Append continuous entries to the batch. + /// + /// All existing entries with same index will be overwritten. If + /// `overwrite_to` is set to a larger value, then entries in + /// `[entries.last().get_index(), overwrite_to)` will be deleted. + /// Nothing will be deleted if entries is empty. Note: `RaftLocalState` + /// won't be updated in this call. + fn append( + &mut self, + raft_group_id: u64, + overwrite_to: Option, + entries: Vec, + ) -> Result<()>; fn put_store_ident(&mut self, ident: &StoreIdent) -> Result<()>; diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 7c98adf325f..1ae148ba41c 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -385,17 +385,18 @@ const FLUSH_STATE_KEY: &[u8] = &[0x06]; const KEY_PREFIX_LEN: usize = RAFT_LOG_STATE_KEY.len(); impl RaftLogBatchTrait for RaftLogBatch { - fn append(&mut self, raft_group_id: u64, entries: Vec) -> Result<()> { + fn append( + &mut self, + raft_group_id: u64, + _overwrite_to: Option, + entries: Vec, + ) -> Result<()> { + // overwrite is handled within raft log engine. self.0 .add_entries::(raft_group_id, &entries) .map_err(transfer_error) } - fn cut_logs(&mut self, _: u64, _: u64, _: u64) { - // It's unnecessary because overlapped entries can be handled in - // `append`. - } - fn put_raft_state(&mut self, raft_group_id: u64, state: &RaftLocalState) -> Result<()> { self.0 .put_message(raft_group_id, RAFT_LOG_STATE_KEY.to_vec(), state) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 0f2e72061ef..954c6992cf9 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -10,8 +10,6 @@ //! sending a message to store fsm first, and then using split to initialized //! the peer. -use std::cmp; - use batch_system::BasicMailbox; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; @@ -296,32 +294,24 @@ impl Peer { /// After destroy is finished, `finish_destroy` should be called to clean up /// memory states. pub fn start_destroy(&mut self, write_task: &mut WriteTask) { - let entry_storage = self.storage().entry_storage(); if self.postponed_destroy() { return; } - let first_index = entry_storage.first_index(); - let last_index = entry_storage.last_index(); - if first_index <= last_index { - write_task.cut_logs = match write_task.cut_logs { - None => Some((first_index, last_index)), - Some((f, l)) => Some((cmp::min(first_index, f), cmp::max(last_index, l))), - }; - } let raft_engine = self.entry_storage().raft_engine(); let mut region_state = self.storage().region_state().clone(); let region_id = region_state.get_region().get_id(); + // Use extra write to ensure these writes are the last writes to raft engine. let lb = write_task .extra_write .ensure_v2(|| raft_engine.log_batch(2)); - // We only use raft-log-engine for v2, first index is not important. + // We only use raft-log-engine for v2, first index and state are not important. let raft_state = self.entry_storage().raft_state(); raft_engine.clean(region_id, 0, raft_state, lb).unwrap(); - // Write worker will do the clean up when meeting tombstone state. region_state.set_state(PeerState::Tombstone); let applied_index = self.entry_storage().applied_index(); lb.put_region_state(region_id, applied_index, ®ion_state) .unwrap(); + self.set_has_extra_write(); self.destroy_progress_mut().start(); } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index c040bdcbb3b..76a5b4297b3 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -507,7 +507,6 @@ impl Storage { if self.entry_storage().first_index() <= old_last_index { // All states are rewritten in the following blocks. Stale states will be // cleaned up by compact worker. - task.cut_logs = Some((0, old_last_index + 1)); self.entry_storage_mut().clear(); } diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index b4cceb96a82..56d0f93a11d 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -186,8 +186,8 @@ where pub raft_wb: Option, // called after writing to kvdb and raftdb. pub persisted_cbs: Vec>, - pub entries: Vec, - pub cut_logs: Option<(u64, u64)>, + overwrite_to: Option, + entries: Vec, pub raft_state: Option, pub extra_write: ExtraWrite, pub messages: Vec, @@ -207,8 +207,8 @@ where ready_number, send_time: Instant::now(), raft_wb: None, + overwrite_to: None, entries: vec![], - cut_logs: None, raft_state: None, extra_write: ExtraWrite::None, messages: vec![], @@ -221,11 +221,21 @@ where pub fn has_data(&self) -> bool { !(self.raft_state.is_none() && self.entries.is_empty() - && self.cut_logs.is_none() && self.extra_write.is_empty() && self.raft_wb.as_ref().map_or(true, |wb| wb.is_empty())) } + /// Append continous entries. + /// + /// All existing entries with same index will be overwritten. If + /// `overwrite_to` is set to a larger value, then entries in + /// `[entries.last().get_index(), overwrite_to)` will be deleted. If + /// entries is empty, nothing will be deleted. + pub fn set_append(&mut self, overwrite_to: Option, entries: Vec) { + self.entries = entries; + self.overwrite_to = overwrite_to; + } + #[inline] pub fn ready_number(&self) -> u64 { self.ready_number @@ -387,11 +397,12 @@ where raft_wb.merge(wb).unwrap(); } raft_wb - .append(task.region_id, std::mem::take(&mut task.entries)) + .append( + task.region_id, + task.overwrite_to, + std::mem::take(&mut task.entries), + ) .unwrap(); - if let Some((from, to)) = task.cut_logs { - raft_wb.cut_logs(task.region_id, from, to); - } if let Some(raft_state) = task.raft_state.take() && self.raft_states.insert(task.region_id, raft_state).is_none() { diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 6007b39489e..d1861a8903c 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -167,7 +167,9 @@ fn delete_kv(wb: Option<&mut TestKvWriteBatch>, key: &[u8]) { /// Simulate kv puts on raft engine. fn put_raft_kv(wb: Option<&mut TestRaftLogBatch>, key: u64) { - wb.unwrap().append(key, vec![new_entry(key, key)]).unwrap(); + wb.unwrap() + .append(key, None, vec![new_entry(key, key)]) + .unwrap(); } fn delete_raft_kv(engine: &RaftTestEngine, wb: Option<&mut TestRaftLogBatch>, key: u64) { @@ -294,10 +296,7 @@ fn test_worker() { put_kv(task_3.extra_write.v1_mut(), b"kv_k3", b"kv_v3"); put_raft_kv(task_3.raft_wb.as_mut(), 37); delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); - task_3 - .entries - .append(&mut vec![new_entry(6, 6), new_entry(7, 7)]); - task_3.cut_logs = Some((8, 9)); + task_3.set_append(Some(9), vec![new_entry(6, 6), new_entry(7, 7)]); task_3.raft_state = Some(new_raft_state(7, 124, 6, 7)); task_3 .messages @@ -392,10 +391,7 @@ fn test_worker_split_raft_wb() { lb.put_apply_state(region_1, 25, &apply_state_3).unwrap(); put_raft_kv(task_3.raft_wb.as_mut(), raft_key_3); delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), raft_key_1); - task_3 - .entries - .append(&mut vec![new_entry(6, 6), new_entry(7, 7)]); - task_3.cut_logs = Some((8, 9)); + task_3.set_append(Some(9), vec![new_entry(6, 6), new_entry(7, 7)]); task_3.raft_state = Some(new_raft_state(7, 124, 6, 7)); if split.1 { expected_wbs += 1; @@ -500,8 +496,7 @@ fn test_basic_flow() { delete_kv(task_3.extra_write.v1_mut(), b"kv_k1"); put_raft_kv(task_3.raft_wb.as_mut(), 37); delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); - task_3.entries.append(&mut vec![new_entry(6, 6)]); - task_3.cut_logs = Some((7, 8)); + task_3.set_append(Some(8), vec![new_entry(6, 6)]); task_3.raft_state = Some(new_raft_state(6, 345, 6, 6)); task_3 .messages @@ -603,8 +598,7 @@ fn test_basic_flow_with_states() { lb.put_apply_state(region_1, 5, &apply_state_3).unwrap(); put_raft_kv(task_3.raft_wb.as_mut(), 37); delete_raft_kv(&engines.raft, task_3.raft_wb.as_mut(), 17); - task_3.entries.append(&mut vec![new_entry(6, 6)]); - task_3.cut_logs = Some((7, 8)); + task_3.set_append(Some(8), vec![new_entry(6, 6)]); task_3.raft_state = Some(new_raft_state(6, 345, 6, 6)); task_3 .messages diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 4d6372dd582..bc85ecedc34 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1075,9 +1075,8 @@ impl EntryStorage { self.cache.append(self.region_id, self.peer_id, &entries); - task.entries = entries; // Delete any previously appended log entries which never committed. - task.cut_logs = Some((last_index + 1, prev_last_index + 1)); + task.set_append(Some(prev_last_index + 1), entries); self.raft_state.set_last_index(last_index); self.last_term = last_term; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index c9e460d1cbc..b060a866d71 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -2082,7 +2082,7 @@ pub mod tests { let mut lb = engines.raft.log_batch(4096); // last_index < commit_index is invalid. raft_state.set_last_index(11); - lb.append(1, vec![new_entry(11, RAFT_INIT_LOG_TERM)]) + lb.append(1, None, vec![new_entry(11, RAFT_INIT_LOG_TERM)]) .unwrap(); raft_state.mut_hard_state().set_commit(12); lb.put_raft_state(1, &raft_state).unwrap(); @@ -2093,7 +2093,7 @@ pub mod tests { let entries = (12..=20) .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); - lb.append(1, entries).unwrap(); + lb.append(1, None, entries).unwrap(); lb.put_raft_state(1, &raft_state).unwrap(); engines.raft.consume(&mut lb, false).unwrap(); s = build_storage().unwrap(); @@ -2138,7 +2138,7 @@ pub mod tests { .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); engines.raft.gc(1, 0, 21, &mut lb).unwrap(); - lb.append(1, entries).unwrap(); + lb.append(1, None, entries).unwrap(); engines.raft.consume(&mut lb, false).unwrap(); raft_state.mut_hard_state().set_commit(14); s = build_storage().unwrap(); @@ -2150,7 +2150,7 @@ pub mod tests { .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); entries[0].set_term(RAFT_INIT_LOG_TERM - 1); - lb.append(1, entries).unwrap(); + lb.append(1, None, entries).unwrap(); engines.raft.consume(&mut lb, false).unwrap(); assert!(build_storage().is_err()); @@ -2158,7 +2158,7 @@ pub mod tests { let entries = (14..=20) .map(|index| new_entry(index, RAFT_INIT_LOG_TERM)) .collect(); - lb.append(1, entries).unwrap(); + lb.append(1, None, entries).unwrap(); raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM - 1); lb.put_raft_state(1, &raft_state).unwrap(); engines.raft.consume(&mut lb, false).unwrap(); @@ -2168,7 +2168,7 @@ pub mod tests { engines.raft.gc(1, 0, 21, &mut lb).unwrap(); raft_state.mut_hard_state().set_term(RAFT_INIT_LOG_TERM); raft_state.set_last_index(13); - lb.append(1, vec![new_entry(13, RAFT_INIT_LOG_TERM)]) + lb.append(1, None, vec![new_entry(13, RAFT_INIT_LOG_TERM)]) .unwrap(); lb.put_raft_state(1, &raft_state).unwrap(); engines.raft.consume(&mut lb, false).unwrap(); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 939bc2a1078..a9ef7df8c62 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2154,7 +2154,7 @@ pub mod tests { apply_entry.set_term(0); apply_state.mut_truncated_state().set_index(10); kv.put_msg_cf(CF_RAFT, &keys::apply_state_key(region_id), &apply_state)?; - lb.append(region_id, vec![apply_entry])?; + lb.append(region_id, None, vec![apply_entry])?; // Put region info into kv engine. let region = gen_test_region(region_id, 1, 1); diff --git a/components/raftstore/src/store/worker/raftlog_gc.rs b/components/raftstore/src/store/worker/raftlog_gc.rs index ce829ed61b2..3edabae71a0 100644 --- a/components/raftstore/src/store/worker/raftlog_gc.rs +++ b/components/raftstore/src/store/worker/raftlog_gc.rs @@ -214,7 +214,7 @@ mod tests { for i in 0..100 { let mut e = Entry::new(); e.set_index(i); - raft_wb.append(region_id, vec![e]).unwrap(); + raft_wb.append(region_id, None, vec![e]).unwrap(); } raft_db.consume(&mut raft_wb, false /* sync */).unwrap(); diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index d0637a04b0a..bfaa2a6587e 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -161,7 +161,7 @@ fn run_dump_raftdb_worker( // Assume that we always scan entry first and raft state at the // end. batch - .append(region_id, std::mem::take(&mut entries)) + .append(region_id, None, std::mem::take(&mut entries)) .unwrap(); } _ => unreachable!("There is only 2 types of keys in raft"), @@ -170,7 +170,7 @@ fn run_dump_raftdb_worker( if local_size >= BATCH_THRESHOLD { local_size = 0; batch - .append(region_id, std::mem::take(&mut entries)) + .append(region_id, None, std::mem::take(&mut entries)) .unwrap(); let size = new_engine.consume(&mut batch, false).unwrap(); @@ -205,7 +205,7 @@ fn run_dump_raft_engine_worker( begin += old_engine .fetch_entries_to(id, begin, end, Some(BATCH_THRESHOLD), &mut entries) .unwrap() as u64; - batch.append(id, entries).unwrap(); + batch.append(id, None, entries).unwrap(); let size = new_engine.consume(&mut batch, false).unwrap(); count_size.fetch_add(size, Ordering::Relaxed); } @@ -303,7 +303,7 @@ mod tests { e.set_index(i); entries.push(e); } - batch.append(num, entries).unwrap(); + batch.append(num, None, entries).unwrap(); } // Get data from raft engine and assert. diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 496c587a7b9..8709373b766 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -966,7 +966,7 @@ fn test_debug_raft_log() { entry.set_entry_type(eraftpb::EntryType::EntryNormal); entry.set_data(vec![42].into()); let mut lb = engine.log_batch(0); - lb.append(region_id, vec![entry.clone()]).unwrap(); + lb.append(region_id, None, vec![entry.clone()]).unwrap(); engine.consume(&mut lb, false).unwrap(); assert_eq!( engine.get_entry(region_id, log_index).unwrap().unwrap(), From bce01cfbc82b58a38b066892a3c679daf91dd33f Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 3 Jan 2023 16:42:19 +0800 Subject: [PATCH 0442/1149] raftstore-v2: publish tablet in raftstore thread only (#14009) ref tikv/tikv#12842 Publish tablet in apply thread is unsafe. This PR moves the operation to raftstore. It also fixes the issues that applying two splits at a time can cause panic. It also makes sure cache will be cleared after tablet is published. Signed-off-by: Jay Lee --- components/engine_traits/src/tablet.rs | 34 +- components/raftstore-v2/src/batch/store.rs | 65 ++- components/raftstore-v2/src/fsm/store.rs | 17 +- .../operation/command/admin/compact_log.rs | 125 ++++- .../src/operation/command/admin/mod.rs | 1 + .../src/operation/command/admin/split.rs | 31 +- .../raftstore-v2/src/operation/command/mod.rs | 3 +- components/raftstore-v2/src/operation/life.rs | 10 +- components/raftstore-v2/src/operation/mod.rs | 11 +- .../raftstore-v2/src/operation/query/lease.rs | 6 +- .../raftstore-v2/src/operation/query/local.rs | 430 ++++++++++++------ .../raftstore-v2/src/operation/query/mod.rs | 4 +- .../raftstore-v2/src/operation/ready/mod.rs | 16 +- .../src/operation/ready/snapshot.rs | 52 ++- components/raftstore-v2/src/raft/apply.rs | 20 +- components/raftstore-v2/src/raft/peer.rs | 88 +--- components/raftstore-v2/src/raft/storage.rs | 76 +++- components/raftstore-v2/src/router/imp.rs | 8 +- .../raftstore-v2/src/worker/tablet_gc.rs | 15 +- .../tests/failpoints/test_split.rs | 3 + .../tests/integrations/cluster.rs | 2 +- .../raftstore/src/store/async_io/write.rs | 6 +- components/raftstore/src/store/mod.rs | 5 +- components/raftstore/src/store/worker/read.rs | 13 +- components/server/src/server2.rs | 4 +- src/config/mod.rs | 27 +- tests/integrations/config/mod.rs | 10 +- 27 files changed, 707 insertions(+), 375 deletions(-) diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index f552fbc01aa..6bdfa97a6e6 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -31,6 +31,13 @@ pub struct CachedTablet { version: u64, } +impl CachedTablet { + fn release(&mut self) { + self.cache = None; + self.version = 0; + } +} + impl CachedTablet { #[inline] fn new(data: Option) -> Self { @@ -44,13 +51,11 @@ impl CachedTablet { } } - pub fn set(&mut self, data: EK) { - self.version = { - let mut latest_data = self.latest.data.lock().unwrap(); - *latest_data = Some(data.clone()); - self.latest.version.fetch_add(1, Ordering::Relaxed) + 1 - }; - self.cache = Some(data); + pub fn set(&mut self, data: EK) -> Option { + self.cache = Some(data.clone()); + let mut latest_data = self.latest.data.lock().unwrap(); + self.version = self.latest.version.fetch_add(1, Ordering::Relaxed) + 1; + latest_data.replace(data) } /// Get the tablet from cache without checking if it's up to date. @@ -69,19 +74,6 @@ impl CachedTablet { } self.cache() } - - /// Returns how many versions has passed. - #[inline] - pub fn refresh(&mut self) -> u64 { - let old_version = self.version; - if self.latest.version.load(Ordering::Relaxed) > old_version { - let latest_data = self.latest.data.lock().unwrap(); - self.version = self.latest.version.load(Ordering::Relaxed); - self.cache = latest_data.clone(); - return self.version - old_version; - } - 0 - } } /// Context to be passed to `TabletFactory`. @@ -317,8 +309,10 @@ impl TabletRegistry { let mut tablets = self.tablets.tablets.lock().unwrap(); for (id, tablet) in tablets.iter_mut() { if !f(*id, tablet) { + tablet.release(); return; } + tablet.release(); } } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 72f05801a0e..9ba7a63139c 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -45,7 +45,7 @@ use time::Timespec; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, - operation::SPLIT_PREFIX, + operation::{SharedReadTablet, SPLIT_PREFIX}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, worker::{pd, tablet_gc}, @@ -72,7 +72,7 @@ pub struct StoreContext { pub timer: SteadyTimer, pub schedulers: Schedulers, /// store meta - pub store_meta: Arc>, + pub store_meta: Arc>>, pub engine: ER, pub tablet_registry: TabletRegistry, pub apply_pool: FuturePool, @@ -259,7 +259,7 @@ struct StorePollerBuilder { schedulers: Schedulers, apply_pool: FuturePool, logger: Logger, - store_meta: Arc>, + store_meta: Arc>>, snap_mgr: TabletSnapManager, } @@ -273,7 +273,7 @@ impl StorePollerBuilder { router: StoreRouter, schedulers: Schedulers, logger: Logger, - store_meta: Arc>, + store_meta: Arc>>, snap_mgr: TabletSnapManager, coprocessor_host: CoprocessorHost, ) -> Self { @@ -429,13 +429,22 @@ pub struct Schedulers { pub split_check: Scheduler, } +impl Schedulers { + fn stop(&self) { + self.read.stop(); + self.pd.stop(); + self.tablet_gc.stop(); + self.split_check.stop(); + } +} + /// A set of background threads that will processing offloaded work from /// raftstore. struct Workers { /// Worker for fetching raft logs asynchronously async_read: Worker, pd: LazyWorker, - tablet_gc_worker: Worker, + tablet_gc: Worker, async_write: StoreWriters, purge: Option, @@ -448,18 +457,29 @@ impl Workers { Self { async_read: Worker::new("async-read-worker"), pd, - tablet_gc_worker: Worker::new("tablet-gc-worker"), + tablet_gc: Worker::new("tablet-gc-worker"), async_write: StoreWriters::default(), purge, background, } } + + fn stop(mut self) { + self.async_write.shutdown(); + self.async_read.stop(); + self.pd.stop(); + self.tablet_gc.stop(); + if let Some(w) = self.purge { + w.stop(); + } + } } /// The system used for polling Raft activities. pub struct StoreSystem { system: BatchSystem, StoreFsm>, workers: Option>, + schedulers: Option>, logger: Logger, shutdown: Arc, } @@ -474,7 +494,7 @@ impl StoreSystem { trans: T, pd_client: Arc, router: &StoreRouter, - store_meta: Arc>, + store_meta: Arc>>, snap_mgr: TabletSnapManager, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -548,7 +568,7 @@ impl StoreSystem { ), ); - let tablet_gc_scheduler = workers.tablet_gc_worker.start( + let tablet_gc_scheduler = workers.tablet_gc.start_with_timer( "tablet-gc-worker", tablet_gc::Runner::new(tablet_registry.clone(), self.logger.clone()), ); @@ -568,13 +588,14 @@ impl StoreSystem { tablet_registry, trans, router.clone(), - schedulers, + schedulers.clone(), self.logger.clone(), store_meta.clone(), snap_mgr, coprocessor_host, ); self.workers = Some(workers); + self.schedulers = Some(schedulers); let peers = builder.init()?; // Choose a different name so we know what version is actually used. rs stands // for raft store. @@ -585,9 +606,14 @@ impl StoreSystem { let mut address = Vec::with_capacity(peers.len()); { let mut meta = store_meta.as_ref().lock().unwrap(); - for (region_id, (tx, fsm)) in peers { - meta.readers - .insert(region_id, fsm.peer().generate_read_delegate()); + for (region_id, (tx, mut fsm)) in peers { + if let Some(tablet) = fsm.peer_mut().tablet() { + let read_tablet = SharedReadTablet::new(tablet.clone()); + meta.readers.insert( + region_id, + (fsm.peer().generate_read_delegate(), read_tablet), + ); + } address.push(region_id); mailboxes.push(( @@ -612,18 +638,16 @@ impl StoreSystem { if self.workers.is_none() { return; } - let mut workers = self.workers.take().unwrap(); + let workers = self.workers.take().unwrap(); - // TODO: gracefully shutdown future pool + // TODO: gracefully shutdown future apply pool + // Stop schedulers first, so all background future worker pool will be stopped + // gracefully. + self.schedulers.take().unwrap().stop(); self.system.shutdown(); - workers.async_write.shutdown(); - workers.async_read.stop(); - workers.pd.stop(); - if let Some(w) = workers.purge { - w.stop(); - } + workers.stop(); } } @@ -707,6 +731,7 @@ where let system = StoreSystem { system, workers: None, + schedulers: None, logger: logger.clone(), shutdown: Arc::new(AtomicBool::new(false)), }; diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index f107715a535..a5f22d7e1a8 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -12,9 +12,7 @@ use engine_traits::{KvEngine, RaftEngine}; use futures::{compat::Future01CompatExt, FutureExt}; use keys::{data_end_key, data_key}; use kvproto::metapb::Region; -use raftstore::store::{ - fsm::store::StoreRegionMeta, Config, ReadDelegate, RegionReadProgressRegistry, -}; +use raftstore::store::{fsm::store::StoreRegionMeta, Config, RegionReadProgressRegistry}; use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, @@ -24,13 +22,14 @@ use tikv_util::{ use crate::{ batch::StoreContext, + operation::ReadDelegatePair, router::{StoreMsg, StoreTick}, }; -pub struct StoreMeta { +pub struct StoreMeta { pub store_id: u64, /// region_id -> reader - pub readers: HashMap, + pub readers: HashMap>, /// region_id -> `RegionReadProgress` pub region_read_progress: RegionReadProgressRegistry, /// (region_end_key, epoch.version) -> region_id @@ -42,9 +41,9 @@ pub struct StoreMeta { pub(crate) regions: HashMap, } -impl StoreMeta { - pub fn new(store_id: u64) -> StoreMeta { - StoreMeta { +impl StoreMeta { + pub fn new(store_id: u64) -> Self { + Self { store_id, readers: HashMap::default(), region_read_progress: RegionReadProgressRegistry::default(), @@ -96,7 +95,7 @@ impl StoreMeta { } } -impl StoreRegionMeta for StoreMeta { +impl StoreRegionMeta for StoreMeta { #[inline] fn store_id(&self) -> u64 { self.store_id diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index c36c7353871..7127cd45306 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -17,7 +17,9 @@ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}; use protobuf::Message; use raftstore::{ - store::{fsm::new_admin_request, needs_evict_entry_cache, Transport, WriteTask}, + store::{ + fsm::new_admin_request, needs_evict_entry_cache, Transport, WriteTask, RAFT_INIT_LOG_INDEX, + }, Result, }; use slog::{debug, error, info}; @@ -32,6 +34,47 @@ use crate::{ worker::tablet_gc, }; +#[derive(Debug)] +pub struct CompactLogContext { + skipped_ticks: usize, + approximate_log_size: u64, + last_applying_index: u64, + /// Tombstone tablets can only be destroyed when the tablet that replaces it + /// is persisted. This is a list of tablet index that awaits to be + /// persisted. When persisted_apply is advanced, we need to notify tablet_gc + /// worker to destroy them. + tombstone_tablets_wait_index: Vec, +} + +impl CompactLogContext { + pub fn new(last_applying_index: u64) -> CompactLogContext { + CompactLogContext { + skipped_ticks: 0, + approximate_log_size: 0, + last_applying_index, + tombstone_tablets_wait_index: vec![], + } + } + + #[inline] + pub fn maybe_skip_compact_log(&mut self, max_skip_ticks: usize) -> bool { + if self.skipped_ticks < max_skip_ticks { + self.skipped_ticks += 1; + true + } else { + false + } + } + + pub fn add_log_size(&mut self, size: u64) { + self.approximate_log_size += size; + } + + pub fn set_last_applying_index(&mut self, index: u64) { + self.last_applying_index = index; + } +} + impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { pub fn on_compact_log_tick(&mut self, force: bool) { if !self.fsm.peer().is_leader() { @@ -130,13 +173,16 @@ impl Peer { replicated_idx } else if applied_idx > first_idx && applied_idx - first_idx >= store_ctx.cfg.raft_log_gc_count_limit() - || self.approximate_raft_log_size() >= store_ctx.cfg.raft_log_gc_size_limit().0 + || self.compact_log_context().approximate_log_size + >= store_ctx.cfg.raft_log_gc_size_limit().0 { std::cmp::max(first_idx + (last_idx - first_idx) / 2, replicated_idx) } else if replicated_idx < first_idx || last_idx - first_idx < 3 || replicated_idx - first_idx < store_ctx.cfg.raft_log_gc_threshold - && self.maybe_skip_compact_log(store_ctx.cfg.raft_log_reserve_max_ticks) + && self + .compact_log_context_mut() + .maybe_skip_compact_log(store_ctx.cfg.raft_log_reserve_max_ticks) { return; } else { @@ -163,7 +209,7 @@ impl Peer { let (ch, _) = CmdResChannel::pair(); self.on_admin_command(store_ctx, req, ch); - self.reset_skip_compact_log_ticks(); + self.compact_log_context_mut().skipped_ticks = 0; } } @@ -217,6 +263,46 @@ impl Apply { } impl Peer { + #[inline] + pub fn record_tombstone_tablet( + &mut self, + ctx: &StoreContext, + old_tablet: EK, + new_tablet_index: u64, + ) { + let compact_log_context = self.compact_log_context_mut(); + compact_log_context + .tombstone_tablets_wait_index + .push(new_tablet_index); + let _ = ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::prepare_destroy( + old_tablet, + self.region_id(), + new_tablet_index, + )); + } + + /// Returns if there's any tombstone being removed. + #[inline] + fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { + let compact_log_context = self.compact_log_context_mut(); + let removed = compact_log_context + .tombstone_tablets_wait_index + .iter() + .take_while(|i| **i <= persisted) + .count(); + if removed > 0 { + compact_log_context + .tombstone_tablets_wait_index + .drain(..removed); + true + } else { + false + } + } + pub fn on_apply_res_compact_log( &mut self, store_ctx: &mut StoreContext, @@ -260,18 +346,25 @@ impl Peer { self.compact_log_from_engine(store_ctx); } - let applied = *self.last_applying_index_mut(); + let context = self.compact_log_context_mut(); + let applied = context.last_applying_index; let total_cnt = applied - old_truncated; let remain_cnt = applied - res.compact_index; - self.update_approximate_raft_log_size(|s| s * remain_cnt / total_cnt); + context.approximate_log_size = + (context.approximate_log_size as f64 * (remain_cnt as f64 / total_cnt as f64)) as u64; } + /// Called when apply index is persisted. There are two different situation: + /// + /// Generally, additional writes are triggered to persist apply index. In + /// this case task is `Some`. But after applying snapshot, the apply + /// index is persisted ahead of time. In this case task is `None`. #[inline] pub fn on_advance_persisted_apply_index( &mut self, store_ctx: &mut StoreContext, old_persisted: u64, - task: &mut WriteTask, + task: Option<&mut WriteTask>, ) { let new_persisted = self.storage().apply_trace().persisted_apply_index(); if old_persisted < new_persisted { @@ -286,14 +379,20 @@ impl Peer { } else { self.set_has_extra_write(); } - if old_persisted < self.entry_storage().truncated_index() + 1 { + // If it's snapshot, logs are gc already. + if task.is_some() && old_persisted < self.entry_storage().truncated_index() + 1 { self.compact_log_from_engine(store_ctx); } - if self.remove_tombstone_tablets_before(new_persisted) { + if self.remove_tombstone_tablets(new_persisted) { let sched = store_ctx.schedulers.tablet_gc.clone(); - task.persisted_cbs.push(Box::new(move || { + if let Some(task) = task { + task.persisted_cbs.push(Box::new(move || { + let _ = sched.schedule(tablet_gc::Task::destroy(region_id, new_persisted)); + })); + } else { + // In snapshot, the index is persisted, tablet can be destroyed directly. let _ = sched.schedule(tablet_gc::Task::destroy(region_id, new_persisted)); - })) + } } } } @@ -302,6 +401,10 @@ impl Peer { let truncated = self.entry_storage().truncated_index() + 1; let persisted_applied = self.storage().apply_trace().persisted_apply_index(); let compact_index = std::cmp::min(truncated, persisted_applied); + if compact_index == RAFT_INIT_LOG_INDEX + 1 { + // There is no logs at RAFT_INIT_LOG_INDEX, nothing to delete. + return; + } // Raft Engine doesn't care about first index. if let Err(e) = store_ctx diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 4f2abb9c65e..9ceaa76c03b 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -5,6 +5,7 @@ mod conf_change; mod split; mod transfer_leader; +pub use compact_log::CompactLogContext; use compact_log::CompactLogResult; use conf_change::ConfChangeResult; use engine_traits::{KvEngine, RaftEngine}; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index faf059b3871..add5af1ce52 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,7 +25,7 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::{borrow::Cow, cmp, path::PathBuf}; +use std::{any::Any, borrow::Cow, cmp, path::PathBuf}; use collections::HashSet; use crossbeam::channel::SendError; @@ -58,7 +58,7 @@ use slog::info; use crate::{ batch::StoreContext, fsm::{ApplyResReporter, PeerFsmDelegate}, - operation::AdminCmdResult, + operation::{AdminCmdResult, SharedReadTablet}, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, worker::tablet_gc, @@ -73,6 +73,10 @@ pub struct SplitResult { // The index of the derived region in `regions` pub derived_index: usize, pub tablet_index: u64, + // Hack: in common case we should use generic, but split is an unfrequent + // event that performance is not critical. And using `Any` can avoid polluting + // all existing code. + tablet: Box, } #[derive(Debug)] @@ -370,8 +374,6 @@ impl Apply { ) }); } - // Remove the old write batch. - self.write_batch.take(); let reg = self.tablet_registry(); let path = reg.tablet_path(region_id, log_index); let mut ctx = TabletContext::new(®ions[derived_index], Some(log_index)); @@ -380,7 +382,7 @@ impl Apply { // TODO: Should we avoid flushing for the old tablet? ctx.flush_state = Some(self.flush_state().clone()); let tablet = reg.tablet_factory().open_tablet(ctx, &path).unwrap(); - self.publish_tablet(tablet); + self.set_tablet(tablet.clone()); self.region_state_mut() .set_region(regions[derived_index].clone()); @@ -396,6 +398,7 @@ impl Apply { regions, derived_index, tablet_index: log_index, + tablet: Box::new(tablet), }), )) } @@ -427,10 +430,14 @@ impl Peer { }; fail_point!("on_split_invalidate_locks"); + let tablet: EK = match res.tablet.downcast() { + Ok(t) => *t, + Err(t) => unreachable!("tablet type should be the same: {:?}", t), + }; { let mut meta = store_ctx.store_meta.lock().unwrap(); meta.set_region(derived, true, &self.logger); - let reader = meta.readers.get_mut(&derived.get_id()).unwrap(); + let (reader, read_tablet) = meta.readers.get_mut(&derived.get_id()).unwrap(); self.set_region( &store_ctx.coprocessor_host, reader, @@ -438,6 +445,12 @@ impl Peer { RegionChangeReason::Split, res.tablet_index, ); + + // Tablet should be updated in lock to match the epoch. + *read_tablet = SharedReadTablet::new(tablet.clone()); + } + if let Some(tablet) = self.set_tablet(tablet) { + self.record_tombstone_tablet(store_ctx, tablet, res.tablet_index); } self.post_split(); @@ -457,8 +470,6 @@ impl Peer { self.split_flow_control_mut().may_skip_split_check = false; self.add_pending_tick(PeerTick::SplitRegionCheck); } - - self.record_tablet_as_tombstone_and_refresh(res.tablet_index, store_ctx); let _ = store_ctx .schedulers .tablet_gc @@ -632,7 +643,7 @@ mod test { use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::TestTabletFactory, + kv::{KvTestEngine, TestTabletFactory}, }; use engine_traits::{ FlushState, Peekable, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, DATA_CFS, @@ -679,7 +690,7 @@ mod test { } fn assert_split( - apply: &mut Apply, + apply: &mut Apply, parent_id: u64, right_derived: bool, new_region_ids: Vec, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 4831c4abf9f..8b0d3d7d461 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -57,7 +57,8 @@ mod control; mod write; pub use admin::{ - temp_split_path, AdminCmdResult, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, + temp_split_path, AdminCmdResult, CompactLogContext, RequestSplit, SplitFlowControl, SplitInit, + SPLIT_PREFIX, }; pub use control::ProposalControl; pub use write::{ diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 954c6992cf9..f312162d1e5 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -320,12 +320,12 @@ impl Peer { /// memory states. pub fn finish_destroy(&mut self, ctx: &mut StoreContext) { info!(self.logger, "peer destroyed"); - ctx.router.close(self.region_id()); + let region_id = self.region_id(); + ctx.router.close(region_id); { - ctx.store_meta - .lock() - .unwrap() - .remove_region(self.region_id()); + let mut meta = ctx.store_meta.lock().unwrap(); + meta.remove_region(region_id); + meta.readers.remove(®ion_id); } if let Some(msg) = self.destroy_progress_mut().finish() { // The message will be dispatched to store fsm, which will create a diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index c49a14142ce..dc245c24384 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -7,9 +7,9 @@ mod query; mod ready; pub use command::{ - AdminCmdResult, CommittedEntries, ProposalControl, RequestSplit, SimpleWriteBinary, - SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, - SPLIT_PREFIX, + AdminCmdResult, CommittedEntries, CompactLogContext, ProposalControl, RequestSplit, + SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, + SplitFlowControl, SPLIT_PREFIX, }; pub use life::DestroyProgress; pub use ready::{ @@ -17,4 +17,7 @@ pub use ready::{ StateStorage, }; -pub(crate) use self::{command::SplitInit, query::LocalReader}; +pub(crate) use self::{ + command::SplitInit, + query::{LocalReader, ReadDelegatePair, SharedReadTablet}, +}; diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index ca92729ee6f..0abd0cccd72 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -150,7 +150,7 @@ impl Peer { pub(crate) fn maybe_renew_leader_lease( &mut self, ts: Timespec, - store_meta: &Mutex, + store_meta: &Mutex>, progress: Option, ) { // A nonleader peer should never has leader lease. @@ -170,12 +170,12 @@ impl Peer { }; if let Some(progress) = progress { let mut meta = store_meta.lock().unwrap(); - let reader = meta.readers.get_mut(&self.region_id()).unwrap(); + let reader = &mut meta.readers.get_mut(&self.region_id()).unwrap().0; self.maybe_update_read_progress(reader, progress); } if let Some(progress) = read_progress { let mut meta = store_meta.lock().unwrap(); - let reader = meta.readers.get_mut(&self.region_id()).unwrap(); + let reader = &mut meta.readers.get_mut(&self.region_id()).unwrap().0; self.maybe_update_read_progress(reader, progress); } } diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 2cb5497d789..e4c0aa6d0b9 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -8,7 +8,7 @@ use std::{ use batch_system::Router; use crossbeam::channel::TrySendError; -use engine_traits::{CachedTablet, KvEngine, RaftEngine, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ errorpb, @@ -20,10 +20,9 @@ use raftstore::{ cmd_resp, util::LeaseState, worker_metrics::{self, TLS_LOCAL_READ_METRICS}, - LocalReadContext, LocalReaderCore, ReadDelegate, ReadExecutor, ReadExecutorProvider, - RegionSnapshot, RequestPolicy, + LocalReaderCore, ReadDelegate, ReadExecutorProvider, RegionSnapshot, }, - Error, Result, + Result, }; use slog::{debug, Logger}; use tikv_util::{box_err, codec::number::decode_u64, time::monotonic_raw_now, Either}; @@ -50,6 +49,87 @@ where } } +pub type ReadDelegatePair = (ReadDelegate, SharedReadTablet); + +/// A share struct for local reader. +/// +/// Though it looks like `CachedTablet`, but there are subtle differences. +/// 1. `CachedTablet` always hold the latest version of the tablet. But +/// `SharedReadTablet` should only hold the tablet that matches epoch. So it +/// will be updated only when the epoch is updated. +/// 2. `SharedReadTablet` should always hold a tablet and the same tablet. If +/// tablet is taken, then it should be considered as stale and should check +/// again epoch to load the new `SharedReadTablet`. +/// 3. `SharedReadTablet` may be cloned into thread local. So its cache should +/// be released as soon as possible, so there should be no strong reference +/// that prevents tablet from being dropped after it's marked as stale by other +/// threads. +pub struct SharedReadTablet { + tablet: Arc>>, + cache: Option, + source: bool, +} + +impl SharedReadTablet { + pub fn new(tablet: EK) -> Self { + Self { + tablet: Arc::new(Mutex::new(Some(tablet))), + cache: None, + source: true, + } + } + + /// Should call `fill_cache` first. + pub fn cache(&self) -> &EK { + self.cache.as_ref().unwrap() + } + + pub fn fill_cache(&mut self) -> bool + where + EK: Clone, + { + self.cache = self.tablet.lock().unwrap().clone(); + self.cache.is_some() + } + + pub fn release(&mut self) { + self.cache = None; + } +} + +impl Clone for SharedReadTablet { + fn clone(&self) -> Self { + Self { + tablet: Arc::clone(&self.tablet), + cache: None, + source: false, + } + } +} + +impl Drop for SharedReadTablet { + fn drop(&mut self) { + if self.source { + self.tablet.lock().unwrap().take(); + } + } +} + +enum ReadResult { + Ok(T), + Redirect, + RetryForStaleDelegate, + Err(E), +} + +fn fail_resp(msg: String) -> RaftCmdResponse { + let mut err = errorpb::Error::default(); + err.set_message(msg); + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + resp +} + #[derive(Clone)] pub struct LocalReader where @@ -67,63 +147,69 @@ where E: KvEngine, C: MsgRouter, { - pub fn new( - store_meta: Arc>, - reg: TabletRegistry, - router: C, - logger: Logger, - ) -> Self { + pub fn new(store_meta: Arc>>, router: C, logger: Logger) -> Self { Self { - local_reader: LocalReaderCore::new(StoreMetaDelegate::new(store_meta, reg)), + local_reader: LocalReaderCore::new(StoreMetaDelegate::new(store_meta)), router, logger, } } - pub fn store_meta(&self) -> &Arc> { + pub fn store_meta(&self) -> &Arc>> { &self.local_reader.store_meta().store_meta } - pub fn pre_propose_raft_command( + fn pre_propose_raft_command( &mut self, req: &RaftCmdRequest, - ) -> Result, RequestPolicy)>> { - if let Some(delegate) = self.local_reader.validate_request(req)? { - let mut inspector = SnapRequestInspector { - delegate: &delegate, - logger: &self.logger, - }; - match inspector.inspect(req) { - Ok(RequestPolicy::ReadLocal) => Ok(Some((delegate, RequestPolicy::ReadLocal))), - Ok(RequestPolicy::StaleRead) => Ok(Some((delegate, RequestPolicy::StaleRead))), - // It can not handle other policies. - // TODO: we should only abort when lease expires. For other cases we should retry - // infinitely. - Ok(_) => Ok(None), - Err(e) => Err(e), + ) -> ReadResult<(CachedReadDelegate, ReadRequestPolicy)> { + let mut delegate = match self.local_reader.validate_request(req) { + Ok(Some(delegate)) => delegate, + Ok(None) => return ReadResult::Redirect, + Err(e) => return ReadResult::Err(e), + }; + + if !delegate.cached_tablet.fill_cache() { + return ReadResult::RetryForStaleDelegate; + } + let mut inspector = SnapRequestInspector { + delegate: &delegate, + logger: &self.logger, + }; + match inspector.inspect(req) { + Ok(ReadRequestPolicy::ReadLocal) => { + ReadResult::Ok((delegate, ReadRequestPolicy::ReadLocal)) } - } else { - Err(Error::RegionNotFound(req.get_header().get_region_id())) + Ok(ReadRequestPolicy::StaleRead) => { + ReadResult::Ok((delegate, ReadRequestPolicy::StaleRead)) + } + // It can not handle other policies. + // TODO: we should only abort when lease expires. For other cases we should retry + // infinitely. + Ok(ReadRequestPolicy::ReadIndex) => ReadResult::Redirect, + Err(e) => ReadResult::Err(e), } } fn try_get_snapshot( &mut self, req: &RaftCmdRequest, - ) -> std::result::Result>, RaftCmdResponse> { + ) -> ReadResult, RaftCmdResponse> { match self.pre_propose_raft_command(req) { - Ok(Some((mut delegate, policy))) => { + ReadResult::Ok((mut delegate, policy)) => { let mut snap = match policy { - RequestPolicy::ReadLocal => { + ReadRequestPolicy::ReadLocal => { let region = Arc::clone(&delegate.region); - let snap = - RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); + let snap = RegionSnapshot::from_snapshot( + Arc::new(delegate.cached_tablet.cache().snapshot()), + region, + ); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); let snapshot_ts = monotonic_raw_now(); if !delegate.is_in_leader_lease(snapshot_ts) { - return Ok(None); + return ReadResult::Redirect; } TLS_LOCAL_READ_METRICS @@ -133,18 +219,24 @@ where self.maybe_renew_lease_in_advance(&delegate, req, snapshot_ts); snap } - RequestPolicy::StaleRead => { + ReadRequestPolicy::StaleRead => { let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); - delegate.check_stale_read_safe(read_ts)?; + if let Err(e) = delegate.check_stale_read_safe(read_ts) { + return ReadResult::Err(e); + } let region = Arc::clone(&delegate.region); - let snap = - RegionSnapshot::from_snapshot(delegate.get_snapshot(&None), region); + let snap = RegionSnapshot::from_snapshot( + Arc::new(delegate.cached_tablet.cache().snapshot()), + region, + ); TLS_LOCAL_READ_METRICS .with(|m| m.borrow_mut().local_executed_requests.inc()); - delegate.check_stale_read_safe(read_ts)?; + if let Err(e) = delegate.check_stale_read_safe(read_ts) { + return ReadResult::Err(e); + } TLS_LOCAL_READ_METRICS .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); @@ -156,10 +248,11 @@ where snap.txn_ext = Some(delegate.txn_ext.clone()); snap.bucket_meta = delegate.bucket_meta.clone(); - Ok(Some(snap)) + delegate.cached_tablet.release(); + + ReadResult::Ok(snap) } - Ok(None) => Ok(None), - Err(e) => { + ReadResult::Err(e) => { let mut response = cmd_resp::new_error(e); if let Some(delegate) = self .local_reader @@ -168,8 +261,10 @@ where { cmd_resp::bind_term(&mut response, delegate.term); } - Err(response) + ReadResult::Err(response) } + ReadResult::Redirect => ReadResult::Redirect, + ReadResult::RetryForStaleDelegate => ReadResult::RetryForStaleDelegate, } } @@ -179,50 +274,85 @@ where ) -> impl Future, RaftCmdResponse>> + Send { let region_id = req.header.get_ref().region_id; - let res = match self.try_get_snapshot(&req) { - res @ (Ok(Some(_)) | Err(_)) => Either::Left(res), - Ok(None) => Either::Right((self.try_to_renew_lease(region_id, &req), self.clone())), + let mut tried_cnt = 0; + let res = loop { + let res = self.try_get_snapshot(&req); + match res { + ReadResult::Ok(snap) => break Either::Left(Ok(snap)), + ReadResult::Err(e) => break Either::Left(Err(e)), + ReadResult::Redirect => { + break Either::Right((self.try_to_renew_lease(region_id, &req), self.clone())); + } + ReadResult::RetryForStaleDelegate => { + tried_cnt += 1; + if tried_cnt < 10 { + continue; + } + break Either::Left(Err(fail_resp(format!( + "internal error: failed to get valid dalegate for {}", + region_id + )))); + } + } }; worker_metrics::maybe_tls_local_read_metrics_flush(); async move { - match res { - Either::Left(Ok(Some(snap))) => Ok(snap), - Either::Left(Err(e)) => Err(e), - Either::Right((fut, mut reader)) => { - let err = match fut.await? { - Some(query_res) => { - if query_res.read().is_some() { - // If query successful, try again. - req.mut_header().set_read_quorum(false); - if let Some(snap) = reader.try_get_snapshot(&req)? { - return Ok(snap); - } else { - let mut err = errorpb::Error::default(); - err.set_message(format!("no delegate found for {}", region_id)); - err - } - } else { - let QueryResult::Response(res) = query_res else { unreachable!() }; - assert!(res.get_header().has_error(), "{:?}", res); - return Err(res); + let (mut fut, mut reader) = match res { + Either::Left(Ok(snap)) => return Ok(snap), + Either::Left(Err(e)) => return Err(e), + Either::Right((fut, reader)) => (fut, reader), + }; + + let mut tried_cnt = 0; + loop { + match fut.await? { + Some(query_res) => { + if query_res.read().is_none() { + let QueryResult::Response(res) = query_res else { unreachable!() }; + assert!(res.get_header().has_error(), "{:?}", res); + return Err(res); + } + } + None => { + return Err(fail_resp(format!( + "internal error: failed to extend lease: canceled: {}", + region_id + ))); + } + } + + // If query successful, try again. + req.mut_header().set_read_quorum(false); + loop { + let r = reader.try_get_snapshot(&req); + match r { + ReadResult::Ok(snap) => return Ok(snap), + ReadResult::Err(e) => return Err(e), + ReadResult::Redirect => { + tried_cnt += 1; + if tried_cnt < 10 { + fut = reader.try_to_renew_lease(region_id, &req); + break; } + return Err(fail_resp(format!( + "internal error: can't handle msg in local reader for {}", + region_id + ))); } - None => { - let mut err = errorpb::Error::default(); - err.set_message(format!( - "failed to extend lease: canceled: {}", + ReadResult::RetryForStaleDelegate => { + tried_cnt += 1; + if tried_cnt < 10 { + continue; + } + return Err(fail_resp(format!( + "internal error: failed to get valid dalegate for {}", region_id - )); - err + ))); } - }; - let mut resp = RaftCmdResponse::default(); - resp.mut_header().set_error(err); - Err(resp) + } } - Either::Left(Ok(None)) => unreachable!(), } } } @@ -309,7 +439,7 @@ where // The reason for this to be Arc, see the comment on get_delegate in // raftstore/src/store/worker/read.rs delegate: Arc, - cached_tablet: CachedTablet, + cached_tablet: SharedReadTablet, } impl Deref for CachedReadDelegate @@ -335,36 +465,20 @@ where } } -impl ReadExecutor for CachedReadDelegate -where - E: KvEngine, -{ - type Tablet = E; - - fn get_tablet(&mut self) -> &E { - self.cached_tablet.latest().unwrap() - } - - fn get_snapshot(&mut self, _: &Option>) -> Arc { - Arc::new(self.cached_tablet.latest().unwrap().snapshot()) - } -} - #[derive(Clone)] struct StoreMetaDelegate where E: KvEngine, { - store_meta: Arc>, - reg: TabletRegistry, + store_meta: Arc>>, } impl StoreMetaDelegate where E: KvEngine, { - pub fn new(store_meta: Arc>, reg: TabletRegistry) -> StoreMetaDelegate { - StoreMetaDelegate { store_meta, reg } + pub fn new(store_meta: Arc>>) -> StoreMetaDelegate { + StoreMetaDelegate { store_meta } } } @@ -373,7 +487,7 @@ where E: KvEngine, { type Executor = CachedReadDelegate; - type StoreMeta = Arc>; + type StoreMeta = Arc>>; fn store_id(&self) -> Option { Some(self.store_meta.as_ref().lock().unwrap().store_id) @@ -384,14 +498,13 @@ where fn get_executor_and_len(&self, region_id: u64) -> (usize, Option) { let meta = self.store_meta.as_ref().lock().unwrap(); let reader = meta.readers.get(®ion_id).cloned(); - if let Some(reader) = reader { + if let Some((reader, read_tablet)) = reader { // If reader is not None, cache must not be None. - let cached_tablet = self.reg.get(region_id).unwrap(); return ( meta.readers.len(), Some(CachedReadDelegate { delegate: Arc::new(reader), - cached_tablet, + cached_tablet: read_tablet, }), ); } @@ -399,13 +512,19 @@ where } } +enum ReadRequestPolicy { + StaleRead, + ReadLocal, + ReadIndex, +} + struct SnapRequestInspector<'r> { delegate: &'r ReadDelegate, logger: &'r Logger, } impl<'r> SnapRequestInspector<'r> { - fn inspect(&mut self, req: &RaftCmdRequest) -> Result { + fn inspect(&mut self, req: &RaftCmdRequest) -> Result { assert!(!req.has_admin_request()); if req.get_requests().len() != 1 || req.get_requests().first().unwrap().get_cmd_type() != CmdType::Snap @@ -417,26 +536,26 @@ impl<'r> SnapRequestInspector<'r> { let flags = WriteBatchFlags::from_bits_check(req.get_header().get_flags()); if flags.contains(WriteBatchFlags::STALE_READ) { - return Ok(RequestPolicy::StaleRead); + return Ok(ReadRequestPolicy::StaleRead); } if req.get_header().get_read_quorum() { - return Ok(RequestPolicy::ReadIndex); + return Ok(ReadRequestPolicy::ReadIndex); } // If applied index's term differs from current raft's term, leader transfer // must happened, if read locally, we may read old value. if !self.has_applied_to_current_term() { - return Ok(RequestPolicy::ReadIndex); + return Ok(ReadRequestPolicy::ReadIndex); } // Local read should be performed, if and only if leader is in lease. // None for now. match self.inspect_lease() { - LeaseState::Valid => Ok(RequestPolicy::ReadLocal), + LeaseState::Valid => Ok(ReadRequestPolicy::ReadLocal), LeaseState::Expired | LeaseState::Suspect => { // Perform a consistent read to Raft quorum and try to renew the leader lease. - Ok(RequestPolicy::ReadIndex) + Ok(ReadRequestPolicy::ReadIndex) } } } @@ -480,12 +599,13 @@ mod tests { thread::{self, JoinHandle}, }; + use collections::HashSet; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_test::{ ctor::{CfOptions, DbOptions}, kv::{KvTestEngine, TestTabletFactory}, }; - use engine_traits::{MiscExt, Peekable, SyncMutable, TabletContext, DATA_CFS}; + use engine_traits::{MiscExt, SyncMutable, TabletContext, TabletRegistry, DATA_CFS}; use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, raft_cmdpb::*}; use pd_client::BucketMeta; @@ -505,17 +625,27 @@ mod tests { #[derive(Clone)] struct MockRouter { p_router: SyncSender<(u64, PeerMsg)>, + addresses: Arc>>, } impl MockRouter { - fn new() -> (MockRouter, Receiver<(u64, PeerMsg)>) { + fn new(addresses: Arc>>) -> (MockRouter, Receiver<(u64, PeerMsg)>) { let (p_ch, p_rx) = sync_channel(1); - (MockRouter { p_router: p_ch }, p_rx) + ( + MockRouter { + p_router: p_ch, + addresses, + }, + p_rx, + ) } } impl MsgRouter for MockRouter { fn send(&self, addr: u64, cmd: PeerMsg) -> std::result::Result<(), TrySendError> { + if !self.addresses.lock().unwrap().contains(&addr) { + return Err(TrySendError::Disconnected(cmd)); + } self.p_router.send((addr, cmd)).unwrap(); Ok(()) } @@ -524,16 +654,15 @@ mod tests { #[allow(clippy::type_complexity)] fn new_reader( store_id: u64, - store_meta: Arc>, - reg: TabletRegistry, + store_meta: Arc>>, + addresses: Arc>>, ) -> ( LocalReader, Receiver<(u64, PeerMsg)>, ) { - let (ch, rx) = MockRouter::new(); + let (ch, rx) = MockRouter::new(addresses); let mut reader = LocalReader::new( store_meta, - reg, ch, Logger::root(slog::Discard, o!("key1" => "value1")), ); @@ -607,7 +736,8 @@ mod tests { let reg = TabletRegistry::new(factory, path.path()).unwrap(); let store_meta = Arc::new(Mutex::new(StoreMeta::new(store_id))); - let (mut reader, mut rx) = new_reader(store_id, store_meta.clone(), reg.clone()); + let addresses: Arc>> = Arc::default(); + let (mut reader, mut rx) = new_reader(store_id, store_meta.clone(), addresses.clone()); let (mix_tx, mix_rx) = sync_channel(1); let handler = mock_raftstore(mix_rx); @@ -649,9 +779,11 @@ mod tests { ); // No msg will ben sent rx.try_recv().unwrap_err(); + // It will be rejected first when processing local, and then rejected when + // trying to forward to raftstore. assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.no_region.get()), - 1 + 2 ); assert_eq!( TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), @@ -683,10 +815,11 @@ mod tests { track_ver: TrackVer::new(), bucket_meta: Some(bucket_meta.clone()), }; - meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data let ctx = TabletContext::new(®ion1, Some(10)); - reg.load(ctx, true).unwrap(); + let mut tablet = reg.load(ctx, true).unwrap(); + let shared = SharedReadTablet::new(tablet.latest().unwrap().clone()); + meta.readers.insert(1, (read_delegate, shared)); } let (ch_tx, ch_rx) = sync_channel(1); @@ -701,6 +834,7 @@ mod tests { meta.readers .get_mut(&1) .unwrap() + .0 .update(ReadProgress::applied_term(term6)); }), rx, @@ -710,6 +844,7 @@ mod tests { // The first try will be rejected due to unmatched applied term but after update // the applied term by the above thread, the snapshot will be acquired by // retrying. + addresses.lock().unwrap().insert(1); let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); assert!(Arc::ptr_eq(snap.txn_ext.as_ref().unwrap(), &txn_ext)); assert!(Arc::ptr_eq( @@ -730,14 +865,16 @@ mod tests { // Case: Expire lease to make the local reader lease check fail. lease.expire_remote_lease(); let remote = lease.maybe_new_remote_lease(term6).unwrap(); + let meta = store_meta.clone(); // Send what we want to do to mock raftstore mix_tx .send(( Box::new(move || { - let mut meta = store_meta.lock().unwrap(); + let mut meta = meta.lock().unwrap(); meta.readers .get_mut(&1) .unwrap() + .0 .update(ReadProgress::leader_lease(remote)); }), rx, @@ -757,6 +894,25 @@ mod tests { ); rx = ch_rx.recv().unwrap(); + // Case: Tablet miss should triger retry. + { + let ctx = TabletContext::new(®ion1, Some(15)); + let mut tablet = reg.load(ctx, true).unwrap(); + let shared = SharedReadTablet::new(tablet.latest().unwrap().clone()); + let mut meta = store_meta.lock().unwrap(); + meta.readers.get_mut(&1).unwrap().1 = shared; + } + block_on(reader.snapshot(cmd.clone())).unwrap(); + // Tablet miss should trigger reload tablet, so cache miss should increase. + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.cache_miss.get()), + 6 + ); + assert_eq!( + TLS_LOCAL_READ_METRICS.with(|m| m.borrow().reject_reason.lease_expire.get()), + 1 + ); + // Case: Read quorum. let mut cmd_read_quorum = cmd.clone(); cmd_read_quorum.mut_header().set_read_quorum(true); @@ -806,8 +962,7 @@ mod tests { let factory = Box::new(TestTabletFactory::new(ops, cf_opts)); let reg = TabletRegistry::new(factory, path.path()).unwrap(); - let store_meta = - StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::new(1))), reg.clone()); + let store_meta = StoreMetaDelegate::new(Arc::new(Mutex::new(StoreMeta::new(1)))); let tablet1; let tablet2; @@ -816,43 +971,46 @@ mod tests { // Create read_delegate with region id 1 let read_delegate = ReadDelegate::mock(1); - meta.readers.insert(1, read_delegate); // create tablet with region_id 1 and prepare some data let mut ctx = TabletContext::with_infinite_region(1, Some(10)); reg.load(ctx, true).unwrap(); tablet1 = reg.get(1).unwrap().latest().unwrap().clone(); tablet1.put(b"a1", b"val1").unwrap(); + let shared1 = SharedReadTablet::new(tablet1.clone()); + meta.readers.insert(1, (read_delegate, shared1)); // Create read_delegate with region id 2 let read_delegate = ReadDelegate::mock(2); - meta.readers.insert(2, read_delegate); // create tablet with region_id 1 and prepare some data ctx = TabletContext::with_infinite_region(2, Some(10)); reg.load(ctx, true).unwrap(); tablet2 = reg.get(2).unwrap().latest().unwrap().clone(); tablet2.put(b"a2", b"val2").unwrap(); + let shared2 = SharedReadTablet::new(tablet2.clone()); + meta.readers.insert(2, (read_delegate, shared2)); } let (_, delegate) = store_meta.get_executor_and_len(1); let mut delegate = delegate.unwrap(); - let tablet = delegate.get_tablet(); + assert!(delegate.cached_tablet.fill_cache()); + let tablet = delegate.cached_tablet.cache(); assert_eq!(tablet1.path(), tablet.path()); - let snapshot = delegate.get_snapshot(&None); - assert_eq!( - b"val1".to_vec(), - *snapshot.get_value(b"a1").unwrap().unwrap() - ); + let path1 = tablet.path().to_owned(); + delegate.cached_tablet.release(); let (_, delegate) = store_meta.get_executor_and_len(2); let mut delegate = delegate.unwrap(); - let tablet = delegate.get_tablet(); + assert!(delegate.cached_tablet.fill_cache()); + let tablet = delegate.cached_tablet.cache(); assert_eq!(tablet2.path(), tablet.path()); - let snapshot = delegate.get_snapshot(&None); - assert_eq!( - b"val2".to_vec(), - *snapshot.get_value(b"a2").unwrap().unwrap() - ); + + assert!(KvTestEngine::locked(&path1).unwrap()); + drop(tablet1); + drop(reg); + assert!(KvTestEngine::locked(&path1).unwrap()); + store_meta.store_meta.lock().unwrap().readers.remove(&1); + assert!(!KvTestEngine::locked(&path1).unwrap()); } } diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 59c6f2d0f7c..f26659c7b89 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -46,7 +46,7 @@ mod lease; mod local; mod replica; -pub(crate) use self::local::LocalReader; +pub(crate) use self::local::{LocalReader, ReadDelegatePair, SharedReadTablet}; impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> PeerFsmDelegate<'a, EK, ER, T> @@ -436,7 +436,7 @@ impl Peer { } let progress = ReadProgress::applied_term(applied_term); let mut meta = ctx.store_meta.lock().unwrap(); - let reader = meta.readers.get_mut(&self.region_id()).unwrap(); + let reader = &mut meta.readers.get_mut(&self.region_id()).unwrap().0; self.maybe_update_read_progress(reader, progress); } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 8a0e0770b1f..fe4208db549 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -306,7 +306,8 @@ impl Peer { let mut update_lease = self.is_leader(); if update_lease { for entry in committed_entries.iter().rev() { - self.update_approximate_raft_log_size(|s| s + entry.get_data().len() as u64); + self.compact_log_context_mut() + .add_log_size(entry.get_data().len() as u64); if update_lease { let propose_time = self .proposals() @@ -329,7 +330,8 @@ impl Peer { } let applying_index = committed_entries.last().unwrap().index; let commit_to_current_term = committed_entries.last().unwrap().term == self.term(); - *self.last_applying_index_mut() = applying_index; + self.compact_log_context_mut() + .set_last_applying_index(applying_index); if needs_evict_entry_cache(ctx.cfg.evict_cache_on_memory_ratio) { // Compact all cached entries instead of half evict. self.entry_storage_mut().evict_entry_cache(false); @@ -426,7 +428,7 @@ impl Peer { self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); - self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); + self.on_advance_persisted_apply_index(ctx, prev_persisted, Some(&mut write_task)); if !ready.persisted_messages().is_empty() { write_task.messages = ready @@ -612,9 +614,11 @@ impl Peer { // leader apply the split command or an election timeout is passed since split // is committed. We already forbid renewing lease after committing split, and // original leader will update the reader delegate with latest epoch after - // applying split before the split peer starts campaign, so here the only thing - // we need to do is marking split is committed (which is done by `commit_to` - // above). It's correct to allow local read during split. + // applying split before the split peer starts campaign, so what needs to be + // done are 1. mark split is committed, which is done by `commit_to` above, + // 2. make sure split result is invisible until epoch is updated or reader may + // miss data from the new tablet. This is done by always publish tablet in + // `on_apply_res_split`. So it's correct to allow local read during split. // // - For merge, after the prepare merge command is committed, the target peers // may apply commit merge at any time, so we need to forbid any type of read diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 76a5b4297b3..2e1b9362a69 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -45,7 +45,7 @@ use tikv_util::box_err; use crate::{ fsm::ApplyResReporter, - operation::command::temp_split_path, + operation::{command::temp_split_path, SharedReadTablet}, raft::{Apply, Peer, Storage}, Result, StoreContext, }; @@ -197,7 +197,8 @@ impl Peer { StateRole::Follower, ); let persisted_index = self.persisted_index(); - *self.last_applying_index_mut() = persisted_index; + self.compact_log_context_mut() + .set_last_applying_index(persisted_index); let snapshot_index = self.entry_storage().truncated_index(); assert!(snapshot_index >= RAFT_INIT_LOG_INDEX, "{:?}", self.logger); // If leader sends a message append to the follower while it's applying @@ -211,18 +212,41 @@ impl Peer { let mut tablet_ctx = TabletContext::new(self.region(), Some(snapshot_index)); // Use a new FlushState to avoid conflicts with the old one. tablet_ctx.flush_state = Some(flush_state); - ctx.tablet_registry.load(tablet_ctx, false).unwrap(); - self.record_tablet_as_tombstone_and_refresh(snapshot_index, ctx); + let path = ctx.tablet_registry.tablet_path(region_id, snapshot_index); + assert!( + path.exists(), + "{:?} {} not exists", + self.logger.list(), + path.display() + ); + let tablet = ctx + .tablet_registry + .tablet_factory() + .open_tablet(tablet_ctx, &path) + .unwrap_or_else(|e| { + panic!( + "{:?} failed to load tablet at {}: {:?}", + self.logger.list(), + path.display(), + e + ); + }); + + let prev_persisted_applied = self.storage().apply_trace().persisted_apply_index(); self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(snapshot_index); + let read_tablet = SharedReadTablet::new(tablet.clone()); { let mut meta = ctx.store_meta.lock().unwrap(); meta.set_region(self.region(), true, &self.logger); meta.readers - .insert(region_id, self.generate_read_delegate()); + .insert(region_id, (self.generate_read_delegate(), read_tablet)); meta.region_read_progress .insert(region_id, self.read_progress().clone()); } + if let Some(tablet) = self.set_tablet(tablet) { + self.record_tombstone_tablet(ctx, tablet, snapshot_index); + } self.read_progress_mut().update_applied_core(snapshot_index); let split = self.storage_mut().split_init_mut().take(); if split.as_ref().map_or(true, |s| { @@ -234,6 +258,7 @@ impl Peer { info!(self.logger, "init split with snapshot finished"); self.post_split_init(ctx, init); } + self.on_advance_persisted_apply_index(ctx, prev_persisted_applied, None); self.schedule_apply_fsm(ctx); } } @@ -506,7 +531,22 @@ impl Storage { let old_last_index = self.entry_storage().last_index(); if self.entry_storage().first_index() <= old_last_index { // All states are rewritten in the following blocks. Stale states will be - // cleaned up by compact worker. + // cleaned up by compact worker. Have to use raft write batch here becaue + // raft log engine expects deletes before writes. + let raft_engine = self.entry_storage().raft_engine(); + if task.raft_wb.is_none() { + task.raft_wb = Some(raft_engine.log_batch(64)); + } + let wb = task.raft_wb.as_mut().unwrap(); + raft_engine + .clean(region.get_id(), 0, self.entry_storage().raft_state(), wb) + .unwrap_or_else(|e| { + panic!( + "{:?} failed to clean up region: {:?}", + self.logger().list(), + e + ) + }); self.entry_storage_mut().clear(); } diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 6818d7ae0d9..8660e4795d0 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -2,7 +2,7 @@ use std::{mem, sync::Arc}; -use engine_traits::{CachedTablet, FlushState, KvEngine, TabletRegistry, WriteBatch, DATA_CFS_LEN}; +use engine_traits::{FlushState, KvEngine, TabletRegistry, WriteBatch, DATA_CFS_LEN}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{ fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, @@ -19,8 +19,6 @@ use crate::{ /// Apply applies all the committed commands to kv db. pub struct Apply { peer: metapb::Peer, - /// publish the update of the tablet - remote_tablet: CachedTablet, tablet: EK, pub write_batch: Option, /// A buffer for encoding key. @@ -79,7 +77,6 @@ impl Apply { Apply { peer, tablet: remote_tablet.latest().unwrap().clone(), - remote_tablet, write_batch: None, callbacks: vec![], tombstone: false, @@ -155,13 +152,16 @@ impl Apply { &mut self.region_state } - /// Publish the tablet so that it can be used by read worker. - /// - /// Note, during split/merge, lease is expired explicitly and read is - /// forbidden. So publishing it immediately is OK. + /// The tablet can't be public yet, otherwise content of latest tablet + /// doesn't matches its epoch in both readers and peer fsm. #[inline] - pub fn publish_tablet(&mut self, tablet: EK) { - self.remote_tablet.set(tablet.clone()); + pub fn set_tablet(&mut self, tablet: EK) { + assert!( + self.write_batch.as_ref().map_or(true, |wb| wb.is_empty()), + "{:?}", + self.logger.list() + ); + self.write_batch.take(); self.tablet = tablet; } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index f3734b6821d..bc3d8a5af8e 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -30,10 +30,10 @@ use crate::{ batch::StoreContext, fsm::ApplyScheduler, operation::{ - AsyncWriter, DestroyProgress, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, + AsyncWriter, CompactLogContext, DestroyProgress, ProposalControl, SimpleWriteReqEncoder, + SplitFlowControl, }, router::{CmdResChannel, PeerTick, QueryResChannel}, - worker::tablet_gc, Result, }; @@ -43,11 +43,6 @@ const REGION_READ_PROGRESS_CAP: usize = 128; pub struct Peer { raft_group: RawNode>, tablet: CachedTablet, - /// Tombstone tablets can only be destroyed when the tablet that replaces it - /// is persisted. This is a list of tablet index that awaits to be - /// persisted. When persisted_apply is advanced, we need to notify tablet_gc - /// worker to destroy them. - pending_tombstone_tablets: Vec, /// Statistics for self. self_stat: PeerStat, @@ -60,9 +55,7 @@ pub struct Peer { peer_heartbeats: HashMap, /// For raft log compaction. - skip_compact_log_ticks: usize, - approximate_raft_log_size: u64, - last_applying_index: u64, + compact_log_context: CompactLogContext, /// Encoder for batching proposals and encoding them in a more efficient way /// than protobuf. @@ -151,13 +144,10 @@ impl Peer { let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { tablet: cached_tablet, - pending_tombstone_tablets: Vec::new(), self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), - skip_compact_log_ticks: 0, - approximate_raft_log_size: 0, - last_applying_index: raft_group.store().apply_state().get_applied_index(), + compact_log_context: CompactLogContext::new(applied_index), raw_write_encoder: None, proposals: ProposalQueue::new(region_id, raft_group.raft.id), async_writer: AsyncWriter::new(region_id, peer_id), @@ -346,41 +336,18 @@ impl Peer { } #[inline] - pub fn record_tablet_as_tombstone_and_refresh( - &mut self, - new_tablet_index: u64, - ctx: &StoreContext, - ) { - if let Some(old_tablet) = self.tablet.cache() { - self.pending_tombstone_tablets.push(new_tablet_index); - let _ = ctx - .schedulers - .tablet_gc - .schedule(tablet_gc::Task::prepare_destroy( - old_tablet.clone(), - self.region_id(), - new_tablet_index, - )); - } - // TODO: Handle race between split and snapshot. So that we can assert - // `self.tablet.refresh() == 1` - assert!(self.tablet.refresh() > 0); + pub fn set_tablet(&mut self, tablet: EK) -> Option { + self.tablet.set(tablet) } - /// Returns if there's any tombstone being removed. #[inline] - pub fn remove_tombstone_tablets_before(&mut self, persisted: u64) -> bool { - let removed = self - .pending_tombstone_tablets - .iter() - .take_while(|i| **i <= persisted) - .count(); - if removed > 0 { - self.pending_tombstone_tablets.drain(..removed); - true - } else { - false - } + pub fn compact_log_context_mut(&mut self) -> &mut CompactLogContext { + &mut self.compact_log_context + } + + #[inline] + pub fn compact_log_context(&self) -> &CompactLogContext { + &self.compact_log_context } #[inline] @@ -543,35 +510,6 @@ impl Peer { down_peers } - #[inline] - pub fn reset_skip_compact_log_ticks(&mut self) { - self.skip_compact_log_ticks = 0; - } - - #[inline] - pub fn maybe_skip_compact_log(&mut self, max_skip_ticks: usize) -> bool { - if self.skip_compact_log_ticks < max_skip_ticks { - self.skip_compact_log_ticks += 1; - true - } else { - false - } - } - - #[inline] - pub fn approximate_raft_log_size(&self) -> u64 { - self.approximate_raft_log_size - } - - #[inline] - pub fn update_approximate_raft_log_size(&mut self, f: impl Fn(u64) -> u64) { - self.approximate_raft_log_size = f(self.approximate_raft_log_size); - } - - pub fn last_applying_index_mut(&mut self) -> &mut u64 { - &mut self.last_applying_index - } - #[inline] pub fn state_role(&self) -> StateRole { self.raft_group.raft.state diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 51bd41ba253..b0eec5a196c 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -307,8 +307,9 @@ mod tests { }; use raft::{Error as RaftError, StorageError}; use raftstore::store::{ - util::new_empty_snapshot, AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, - TabletSnapKey, TabletSnapManager, WriteTask, + util::new_empty_snapshot, write_to_db_for_test, AsyncReadNotifier, FetchedLogs, GenSnapRes, + ReadRunner, TabletSnapKey, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, + RAFT_INIT_LOG_TERM, }; use slog::o; use tempfile::TempDir; @@ -357,14 +358,20 @@ mod tests { region } + fn new_entry(index: u64, term: u64) -> Entry { + let mut e = Entry::default(); + e.set_index(index); + e.set_term(term); + e + } + #[test] fn test_apply_snapshot() { let region = new_region(); let path = TempDir::new().unwrap(); let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()).unwrap(); - let raft_engine = - engine_test::raft::new_engine(&format!("{}", path.path().join("raft").display()), None) - .unwrap(); + let engines = engine_test::new_temp_engine(&path); + let raft_engine = engines.raft.clone(); let mut wb = raft_engine.log_batch(10); write_initial_states(&mut wb, region.clone()).unwrap(); assert!(!wb.is_empty()); @@ -381,26 +388,57 @@ mod tests { .unwrap() .unwrap(); - let snapshot = new_empty_snapshot(region.clone(), 10, 9, false); - let mut task = WriteTask::new(region.get_id(), 5, 0); - s.apply_snapshot(&snapshot, &mut task, mgr, reg).unwrap(); + let mut task = WriteTask::new(region.get_id(), 5, 1); + let entries = (RAFT_INIT_LOG_INDEX + 1..RAFT_INIT_LOG_INDEX + 10) + .map(|i| new_entry(i, RAFT_INIT_LOG_TERM)) + .collect(); + s.entry_storage_mut().append(entries, &mut task); + write_to_db_for_test(&engines, task); + + let snap_index = RAFT_INIT_LOG_INDEX + 20; + let snap_term = 9; + let path = mgr.final_recv_path(&TabletSnapKey::new( + region.get_id(), + 5, + snap_term, + snap_index, + )); + reg.tablet_factory() + .open_tablet(TabletContext::new(®ion, Some(snap_index)), &path) + .unwrap(); + let snapshot = new_empty_snapshot(region.clone(), snap_index, snap_term, false); + let mut task = WriteTask::new(region.get_id(), 5, 1); + s.apply_snapshot(&snapshot, &mut task, mgr, reg.clone()) + .unwrap(); + // Add more entries to check if old entries are cleared. If not, it should panic + // with memtable hole when using raft engine. + let entries = (snap_index + 1..=snap_index + 10) + .map(|i| new_entry(i, snap_term)) + .collect(); + s.entry_storage_mut().append(entries, &mut task); + + assert!(!reg.tablet_path(region.get_id(), snap_index).exists()); + assert!(!task.persisted_cbs.is_empty()); + + write_to_db_for_test(&engines, task); + + assert!(reg.tablet_path(region.get_id(), snap_index).exists()); // It can be set before load tablet. assert_eq!(PeerState::Normal, s.region_state().get_state()); - assert_eq!(10, s.entry_storage().truncated_index()); - assert_eq!(9, s.entry_storage().truncated_term()); - assert_eq!(9, s.entry_storage().last_term()); - assert_eq!(10, s.entry_storage().raft_state().last_index); + assert_eq!(snap_index, s.entry_storage().truncated_index()); + assert_eq!(snap_term, s.entry_storage().truncated_term()); + assert_eq!(snap_term, s.entry_storage().last_term()); + assert_eq!(snap_index + 10, s.entry_storage().raft_state().last_index); // This index can't be set before load tablet. - assert_ne!(10, s.entry_storage().applied_index()); - assert_ne!(9, s.entry_storage().applied_term()); - assert_eq!(10, s.region_state().get_tablet_index()); - assert!(!task.persisted_cbs.is_empty()); + assert_ne!(snap_index, s.entry_storage().applied_index()); + assert_ne!(snap_term, s.entry_storage().applied_term()); + assert_eq!(snap_index, s.region_state().get_tablet_index()); s.on_applied_snapshot(); - assert_eq!(10, s.entry_storage().applied_index()); - assert_eq!(9, s.entry_storage().applied_term()); - assert_eq!(10, s.region_state().get_tablet_index()); + assert_eq!(snap_index, s.entry_storage().applied_index()); + assert_eq!(snap_term, s.entry_storage().applied_term()); + assert_eq!(snap_index, s.region_state().get_tablet_index()); } #[test] diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 668d7591a40..7a10c6c6b16 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -6,7 +6,7 @@ use std::{ }; use crossbeam::channel::TrySendError; -use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, @@ -115,13 +115,13 @@ where } impl RaftRouter { - pub fn new(store_id: u64, reg: TabletRegistry, router: StoreRouter) -> Self { + pub fn new(store_id: u64, router: StoreRouter) -> Self { let store_meta = Arc::new(Mutex::new(StoreMeta::new(store_id))); let logger = router.logger().clone(); RaftRouter { router: router.clone(), - local_reader: LocalReader::new(store_meta, reg, router, logger), + local_reader: LocalReader::new(store_meta, router, logger), } } @@ -138,7 +138,7 @@ impl RaftRouter { self.router.check_send(addr, msg) } - pub fn store_meta(&self) -> &Arc> { + pub fn store_meta(&self) -> &Arc>> { self.local_reader.store_meta() } diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index cc1fcd971e9..aba477f883f 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -9,7 +9,7 @@ use std::{ use collections::HashMap; use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry}; use kvproto::metapb::Region; -use slog::{error, warn, Logger}; +use slog::{debug, error, warn, Logger}; use tikv_util::worker::{Runnable, RunnableWithTimer}; pub enum Task { @@ -156,10 +156,15 @@ impl Runner { "path" => path.display(), ), Ok(false) => { + let (_, region_id, tablet_index) = + registry.parse_tablet_name(path).unwrap_or(("", 0, 0)); // TODO: use a meaningful table context. let _ = registry .tablet_factory() - .destroy_tablet(TabletContext::with_infinite_region(0, None), path) + .destroy_tablet( + TabletContext::with_infinite_region(region_id, Some(tablet_index)), + path, + ) .map_err(|e| { warn!( logger, @@ -170,7 +175,9 @@ impl Runner { }); return true; } - _ => {} + Ok(true) => { + debug!(logger, "ignore locked tablet"; "path" => path.display()); + } } false } @@ -222,6 +229,6 @@ where } fn get_interval(&self) -> Duration { - Duration::from_secs(2) + Duration::from_secs(10) } } diff --git a/components/raftstore-v2/tests/failpoints/test_split.rs b/components/raftstore-v2/tests/failpoints/test_split.rs index 79356ae5805..e67041ab181 100644 --- a/components/raftstore-v2/tests/failpoints/test_split.rs +++ b/components/raftstore-v2/tests/failpoints/test_split.rs @@ -82,6 +82,9 @@ fn test_restart_resume() { .new_request_for(split_region_id) .take_header() .take_region_epoch(); + // Split will be resumed for region 2, not removing the fp will make write block + // forever. + fail::remove(fp); let timer = Instant::now(); for (region_id, key, val) in cases { let mut put = SimpleWriteEncoder::with_capacity(64); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 4c025a0fc85..ce0248130fb 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -276,7 +276,7 @@ impl RunningState { factory.open_tablet(ctx, &path).unwrap(); } - let router = RaftRouter::new(store_id, registry.clone(), router); + let router = RaftRouter::new(store_id, router); let store_meta = router.store_meta().clone(); let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()).unwrap(); diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 56d0f93a11d..817ff576f67 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -912,7 +912,6 @@ where } /// Used for test to write task to kv db and raft db. -#[cfg(test)] pub fn write_to_db_for_test( engines: &engine_traits::Engines, task: WriteTask, @@ -922,7 +921,8 @@ pub fn write_to_db_for_test( { let mut batch = WriteTaskBatch::new(engines.raft.log_batch(RAFT_WB_DEFAULT_SIZE)); batch.add_write_task(&engines.raft, task); - batch.before_write_to_db(&StoreWriteMetrics::new(false)); + let metrics = StoreWriteMetrics::new(false); + batch.before_write_to_db(&metrics); if let ExtraBatchWrite::V1(kv_wb) = &mut batch.extra_batch_write { if !kv_wb.is_empty() { let mut write_opts = WriteOptions::new(); @@ -939,6 +939,8 @@ pub fn write_to_db_for_test( }); } } + batch.after_write_to_raft_db(&metrics); + batch.after_write_all(); } #[cfg(test)] diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 62561c63cbc..42fb320035b 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -31,7 +31,10 @@ pub use self::msg::PeerInternalStat; pub use self::{ async_io::{ read::{AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask}, - write::{PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, WriteTask}, + write::{ + write_to_db_for_test, PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, + WriteTask, + }, write_router::{WriteRouter, WriteRouterContext, WriteSenders}, }, bootstrap::{ diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index a8fc2e6e3df..a7849f5e1dd 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -286,7 +286,7 @@ impl Drop for ReadDelegate { /// #[RaftstoreCommon] pub trait ReadExecutorProvider: Send + Clone + 'static { - type Executor: ReadExecutor; + type Executor; type StoreMeta; fn store_id(&self) -> Option; @@ -687,11 +687,7 @@ where /// #[RaftstoreCommon]: LocalReader is an entry point where local read requests are dipatch to the /// relevant regions by LocalReader so that these requests can be handled by the /// relevant ReadDelegate respectively. -pub struct LocalReaderCore -where - D: ReadExecutor + Deref, - S: ReadExecutorProvider, -{ +pub struct LocalReaderCore { pub store_id: Cell>, store_meta: S, pub delegates: LruCache, @@ -699,7 +695,7 @@ where impl LocalReaderCore where - D: ReadExecutor + Deref + Clone, + D: Deref + Clone, S: ReadExecutorProvider, { pub fn new(store_meta: S) -> Self { @@ -827,8 +823,7 @@ where impl Clone for LocalReaderCore where - D: ReadExecutor + Deref, - S: ReadExecutorProvider, + S: Clone, { fn clone(&self) -> Self { LocalReaderCore { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 4d4e283ea7e..5d037fa3412 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1415,12 +1415,12 @@ impl TikvServer { raft_engine.register_config(cfg_controller); let engines_info = Arc::new(EnginesResourceInfo::new( - registry.clone(), + registry, raft_engine.as_rocks_engine().cloned(), 180, // max_samples_to_preserve )); - let router = RaftRouter::new(node.id(), registry, router); + let router = RaftRouter::new(node.id(), router); let mut coprocessor_host: CoprocessorHost = CoprocessorHost::new( router.store_router().clone(), self.config.coprocessor.clone(), diff --git a/src/config/mod.rs b/src/config/mod.rs index c78ec02182f..d2c5941c5ec 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -344,7 +344,7 @@ macro_rules! cf_config { #[online_config(skip)] pub enable_doubly_skiplist: bool, #[online_config(skip)] - pub enable_compaction_guard: bool, + pub enable_compaction_guard: Option, #[online_config(skip)] pub compaction_guard_min_output_file_size: ReadableSize, #[online_config(skip)] @@ -596,7 +596,7 @@ macro_rules! build_cf_opt { if $opt.enable_doubly_skiplist { cf_opts.set_doubly_skiplist(); } - if $opt.enable_compaction_guard { + if $opt.enable_compaction_guard.unwrap_or(false) { if let Some(provider) = $region_info_provider { let factory = CompactionGuardGeneratorFactory::new( $cf_name, @@ -671,7 +671,7 @@ impl Default for DefaultCfConfig { prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, - enable_compaction_guard: true, + enable_compaction_guard: None, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Zstd, @@ -796,7 +796,7 @@ impl Default for WriteCfConfig { prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, - enable_compaction_guard: true, + enable_compaction_guard: None, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Zstd, @@ -902,7 +902,7 @@ impl Default for LockCfConfig { prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, - enable_compaction_guard: false, + enable_compaction_guard: None, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, @@ -985,7 +985,7 @@ impl Default for RaftCfConfig { prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, - enable_compaction_guard: false, + enable_compaction_guard: None, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, @@ -1218,6 +1218,8 @@ impl DbConfig { match engine { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); + self.defaultcf.enable_compaction_guard.get_or_insert(true); + self.writecf.enable_compaction_guard.get_or_insert(true); } EngineType::RaftKv2 => { self.enable_multi_batch_write.get_or_insert(false); @@ -1475,7 +1477,7 @@ impl Default for RaftDefaultCfConfig { prop_size_index_distance: DEFAULT_PROP_SIZE_INDEX_DISTANCE, prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, - enable_compaction_guard: false, + enable_compaction_guard: None, compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, @@ -5203,7 +5205,7 @@ mod tests { // Test comopaction guard disabled. let config = DefaultCfConfig { target_file_size_base: ReadableSize::mb(16), - enable_compaction_guard: false, + enable_compaction_guard: Some(false), ..Default::default() }; let provider = Some(MockRegionInfoProvider::new(vec![])); @@ -5216,7 +5218,7 @@ mod tests { // Test compaction guard enabled but region info provider is missing. let config = DefaultCfConfig { target_file_size_base: ReadableSize::mb(16), - enable_compaction_guard: true, + enable_compaction_guard: Some(true), ..Default::default() }; let provider: Option = None; @@ -5229,7 +5231,7 @@ mod tests { // Test compaction guard enabled. let config = DefaultCfConfig { target_file_size_base: ReadableSize::mb(16), - enable_compaction_guard: true, + enable_compaction_guard: Some(true), compaction_guard_min_output_file_size: ReadableSize::mb(4), compaction_guard_max_output_file_size: ReadableSize::mb(64), ..Default::default() @@ -5541,22 +5543,27 @@ mod tests { cfg.raft_engine.mut_config().memory_limit = None; cfg.coprocessor_v2.coprocessor_plugin_directory = None; // Default is `None`, which is represented by not setting the key. cfg.rocksdb.write_buffer_limit = None; + cfg.rocksdb.defaultcf.enable_compaction_guard = None; cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger = None; cfg.rocksdb.defaultcf.level0_stop_writes_trigger = None; cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = None; cfg.rocksdb.defaultcf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.writecf.enable_compaction_guard = None; cfg.rocksdb.writecf.level0_slowdown_writes_trigger = None; cfg.rocksdb.writecf.level0_stop_writes_trigger = None; cfg.rocksdb.writecf.soft_pending_compaction_bytes_limit = None; cfg.rocksdb.writecf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.lockcf.enable_compaction_guard = None; cfg.rocksdb.lockcf.level0_slowdown_writes_trigger = None; cfg.rocksdb.lockcf.level0_stop_writes_trigger = None; cfg.rocksdb.lockcf.soft_pending_compaction_bytes_limit = None; cfg.rocksdb.lockcf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.raftcf.enable_compaction_guard = None; cfg.rocksdb.raftcf.level0_slowdown_writes_trigger = None; cfg.rocksdb.raftcf.level0_stop_writes_trigger = None; cfg.rocksdb.raftcf.soft_pending_compaction_bytes_limit = None; cfg.rocksdb.raftcf.hard_pending_compaction_bytes_limit = None; + cfg.raftdb.defaultcf.enable_compaction_guard = None; cfg.raftdb.defaultcf.level0_slowdown_writes_trigger = None; cfg.raftdb.defaultcf.level0_stop_writes_trigger = None; cfg.raftdb.defaultcf.soft_pending_compaction_bytes_limit = None; diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index c6f8e565218..0c6cf7cdd9c 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -359,7 +359,7 @@ fn test_serde_custom_tikv_config() { prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, enable_doubly_skiplist: false, - enable_compaction_guard: false, + enable_compaction_guard: Some(false), compaction_guard_min_output_file_size: ReadableSize::mb(12), compaction_guard_max_output_file_size: ReadableSize::mb(34), bottommost_level_compression: DBCompressionType::Disable, @@ -428,7 +428,7 @@ fn test_serde_custom_tikv_config() { prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, enable_doubly_skiplist: true, - enable_compaction_guard: false, + enable_compaction_guard: Some(false), compaction_guard_min_output_file_size: ReadableSize::mb(12), compaction_guard_max_output_file_size: ReadableSize::mb(34), bottommost_level_compression: DBCompressionType::Zstd, @@ -497,7 +497,7 @@ fn test_serde_custom_tikv_config() { prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, enable_doubly_skiplist: true, - enable_compaction_guard: true, + enable_compaction_guard: Some(true), compaction_guard_min_output_file_size: ReadableSize::mb(12), compaction_guard_max_output_file_size: ReadableSize::mb(34), bottommost_level_compression: DBCompressionType::Disable, @@ -566,7 +566,7 @@ fn test_serde_custom_tikv_config() { prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, enable_doubly_skiplist: true, - enable_compaction_guard: true, + enable_compaction_guard: Some(true), compaction_guard_min_output_file_size: ReadableSize::mb(12), compaction_guard_max_output_file_size: ReadableSize::mb(34), bottommost_level_compression: DBCompressionType::Disable, @@ -650,7 +650,7 @@ fn test_serde_custom_tikv_config() { prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, enable_doubly_skiplist: true, - enable_compaction_guard: true, + enable_compaction_guard: Some(true), compaction_guard_min_output_file_size: ReadableSize::mb(12), compaction_guard_max_output_file_size: ReadableSize::mb(34), bottommost_level_compression: DBCompressionType::Disable, From 8aef20c019c969d5f7984d0ea953c0678f98cd95 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 3 Jan 2023 17:36:19 +0800 Subject: [PATCH 0443/1149] *: introduce slog_panic and SlogFormat (#14014) ref tikv/tikv#12842 These two are helpers to utilize the static KV pairs in logger. In the past, we use `logger.list()` to try to format the configured KV pairs, but it will not work as values are omitted. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 7 +- components/raftstore-v2/src/fsm/store.rs | 14 +- .../operation/command/admin/conf_change.rs | 20 +-- .../src/operation/command/admin/mod.rs | 11 +- .../src/operation/command/admin/split.rs | 49 +++--- .../raftstore-v2/src/operation/command/mod.rs | 16 +- .../src/operation/command/write/mod.rs | 27 ++-- .../operation/command/write/simple_write.rs | 13 +- components/raftstore-v2/src/operation/pd.rs | 9 +- .../raftstore-v2/src/operation/query/mod.rs | 7 +- .../src/operation/ready/apply_trace.rs | 11 +- .../src/operation/ready/async_writer.rs | 23 +-- .../raftstore-v2/src/operation/ready/mod.rs | 16 +- .../src/operation/ready/snapshot.rs | 17 +-- components/raftstore-v2/src/raft/apply.rs | 6 +- components/tikv_util/src/log.rs | 142 ++++++++++++++++++ 16 files changed, 272 insertions(+), 116 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 9ba7a63139c..e25ad53df8b 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -34,6 +34,7 @@ use slog::{warn, Logger}; use tikv_util::{ box_err, config::{Tracker, VersionTrack}, + log::SlogFormat, sys::SysQuota, time::Instant as TiInstant, timer::SteadyTimer, @@ -339,9 +340,9 @@ impl StorePollerBuilder { let prev = regions.insert(region_id, (sender, peer_fsm)); if let Some((_, p)) = prev { return Err(box_err!( - "duplicate region {:?} vs {:?}", - p.logger().list(), - regions[®ion_id].1.logger().list() + "duplicate region {} vs {}", + SlogFormat(p.logger()), + SlogFormat(regions[®ion_id].1.logger()) )); } Ok(()) diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index a5f22d7e1a8..86e3540d23c 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -17,7 +17,9 @@ use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, is_zero_duration, + log::SlogFormat, mpsc::{self, LooseBoundedSender, Receiver}, + slog_panic, }; use crate::{ @@ -60,12 +62,12 @@ impl StoreMeta { .insert(region_id, (region.clone(), initialized)); // `prev` only makes sense when it's initialized. if let Some((prev, prev_init)) = prev && prev_init { - assert!(initialized, "{:?} region corrupted", logger.list()); + assert!(initialized, "{} region corrupted", SlogFormat(logger)); if prev.get_region_epoch().get_version() != version { let prev_id = self.region_ranges.remove(&(data_end_key(prev.get_end_key()), prev.get_region_epoch().get_version())); - assert_eq!(prev_id, Some(region_id), "{:?} region corrupted", logger.list()); + assert_eq!(prev_id, Some(region_id), "{} region corrupted", SlogFormat(logger)); } else { - assert!(self.region_ranges.get(&(data_end_key(prev.get_end_key()), version)).is_some(), "{:?} region corrupted", logger.list()); + assert!(self.region_ranges.get(&(data_end_key(prev.get_end_key()), version)).is_some(), "{} region corrupted", SlogFormat(logger)); return; } } @@ -74,8 +76,8 @@ impl StoreMeta { self.region_ranges .insert((data_end_key(region.get_end_key()), version), region_id) .is_none(), - "{:?} region corrupted", - logger.list() + "{} region corrupted", + SlogFormat(logger) ); } } @@ -216,7 +218,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { fn on_start(&mut self) { if self.fsm.store.start_time.is_some() { - panic!("{:?} unable to start again", self.fsm.store.logger.list(),); + slog_panic!(self.fsm.store.logger, "store is already started"); } self.fsm.store.start_time = Some( diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 72b582d775d..6c041a551fe 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -27,7 +27,7 @@ use raftstore::{ Error, Result, }; use slog::{error, info, warn}; -use tikv_util::box_err; +use tikv_util::{box_err, slog_panic}; use super::AdminCmdResult; use crate::{ @@ -312,10 +312,10 @@ impl Apply { change_num += 1; } if change_num == 0 { - panic!( - "{:?} can't leave a non-joint config, region: {:?}", - self.logger.list(), - self.region_state() + slog_panic!( + self.logger, + "can't leave a non-joint config"; + "region" => ?self.region_state() ); } let conf_ver = region.get_region_epoch().get_conf_ver() + change_num; @@ -433,11 +433,11 @@ impl Apply { if let Some(exist_peer) = tikv_util::store::find_peer(region, store_id) { let r = exist_peer.get_role(); if r == PeerRole::IncomingVoter || r == PeerRole::DemotingVoter { - panic!( - "{:?} can't apply confchange because configuration is still in joint state, confchange: {:?}, region: {:?}", - self.logger.list(), - cp, - self.region_state() + slog_panic!( + self.logger, + "can't apply confchange because configuration is still in joint state"; + "confchange" => ?cp, + "region_state" => ?self.region_state() ); } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9ceaa76c03b..52bc5329dd4 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -15,7 +15,7 @@ use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; use split::SplitResult; pub use split::{temp_split_path, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; -use tikv_util::box_err; +use tikv_util::{box_err, log::SlogFormat}; use txn_types::WriteBatchFlags; use crate::{batch::StoreContext, raft::Peer, router::CmdResChannel}; @@ -43,7 +43,10 @@ impl Peer { return; } if !req.has_admin_request() { - let e = box_err!("{:?} expect only execute admin command", self.logger.list()); + let e = box_err!( + "{} expect only execute admin command", + SlogFormat(&self.logger) + ); let resp = cmd_resp::new_error(e); ch.report_error(resp); return; @@ -67,8 +70,8 @@ impl Peer { // checker. if !self.applied_to_current_term() { let e = box_err!( - "{:?} peer has not applied to current term, applied_term {}, current_term {}", - self.logger.list(), + "{} peer has not applied to current term, applied_term {}, current_term {}", + SlogFormat(&self.logger), self.storage().entry_storage().applied_term(), self.term() ); diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index add5af1ce52..23fc6e3a8d9 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -54,6 +54,7 @@ use raftstore::{ Result, }; use slog::info; +use tikv_util::{log::SlogFormat, slog_panic}; use crate::{ batch::StoreContext, @@ -330,10 +331,10 @@ impl Apply { // We will freeze the memtable rather than flush it in the following PR. let tablet = self.tablet().clone(); let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { - panic!( - "{:?} fails to create checkpoint object: {:?}", - self.logger.list(), - e + slog_panic!( + self.logger, + "fails to create checkpoint object"; + "error" => ?e ) }); @@ -348,11 +349,11 @@ impl Apply { checkpointer .create_at(&split_temp_path, None, 0) .unwrap_or_else(|e| { - panic!( - "{:?} fails to create checkpoint with path {:?}: {:?}", - self.logger.list(), - split_temp_path, - e + slog_panic!( + self.logger, + "fails to create checkpoint"; + "path" => %split_temp_path.display(), + "error" => ?e ) }); } @@ -366,11 +367,11 @@ impl Apply { checkpointer .create_at(&derived_path, None, 0) .unwrap_or_else(|e| { - panic!( - "{:?} fails to create checkpoint with path {:?}: {:?}", - self.logger.list(), - derived_path, - e + slog_panic!( + self.logger, + "fails to create checkpoint"; + "path" => %derived_path.display(), + "error" => ?e ) }); } @@ -505,10 +506,10 @@ impl Peer { .router .force_send_control(StoreMsg::SplitInit(msg)) .unwrap_or_else(|e| { - panic!( - "{:?} fails to send split peer intialization msg to store : {:?}", - self.logger.list(), - e + slog_panic!( + self.logger, + "fails to send split peer intialization msg to store"; + "error" => ?e, ) }); } @@ -556,11 +557,11 @@ impl Peer { let res = self.raft_group_mut().step(msg); let accept_snap = self.raft_group().snap().is_some(); if res.is_err() || !accept_snap { - panic!( - "{:?} failed to accept snapshot {:?} with error {}", - self.logger.list(), - res, - accept_snap + slog_panic!( + self.logger, + "failed to accept snapshot"; + "accept_snapshot" => accept_snap, + "res" => ?res, ); } let prev = self.storage_mut().split_init_mut().replace(split_init); @@ -610,7 +611,7 @@ impl Peer { break; } } - assert!(found, "{:?} {}", self.logger.list(), region_id); + assert!(found, "{} {}", SlogFormat(&self.logger), region_id); let split_trace = self.split_trace_mut(); let mut off = 0; let mut admin_flushed = 0; diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 8b0d3d7d461..439d2136d76 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -41,7 +41,7 @@ use raftstore::{ }; use slog::{info, warn}; use tikv_util::{ - box_err, + box_err, slog_panic, time::{duration_to_sec, monotonic_raw_now, Instant}, }; @@ -71,12 +71,12 @@ fn parse_at(logger: &slog::Logger, buf: &[u8], index: u64, let mut m = M::default(); match m.merge_from_bytes(buf) { Ok(()) => m, - Err(e) => panic!( - "{:?} data is corrupted at [{}] {}: {:?}", - logger.list(), - term, - index, - e + Err(e) => slog_panic!( + logger, + "data is corrupted"; + "term" => term, + "index" => index, + "error" => ?e, ), } } @@ -555,7 +555,7 @@ impl Apply { if let Err(e) = wb.write_callback_opt(&write_opt, || { flush_state.set_applied_index(index); }) { - panic!("failed to write data: {:?}: {:?}", self.logger.list(), e); + slog_panic!(self.logger, "failed to write data"; "error" => ?e); } self.metrics.written_bytes += wb.data_size() as u64; self.metrics.written_keys += wb.count() as u64; diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index af806e3024e..14011d6fc1b 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -11,6 +11,7 @@ use raftstore::{ }, Result, }; +use tikv_util::slog_panic; use crate::{ batch::StoreContext, @@ -150,13 +151,13 @@ impl Apply { .put_cf(cf, &self.key_buffer, value) }; res.unwrap_or_else(|e| { - panic!( - "{:?} failed to write ({}, {}) {}: {:?}", - self.logger.list(), - log_wrappers::Value::key(key), - log_wrappers::Value::value(value), - cf, - e + slog_panic!( + self.logger, + "failed to write"; + "key" => %log_wrappers::Value::key(key), + "value" => %log_wrappers::Value::value(value), + "cf" => cf, + "error" => ?e ); }); fail::fail_point!("APPLY_PUT", |_| Err(raftstore::Error::Other( @@ -188,12 +189,12 @@ impl Apply { .delete_cf(cf, &self.key_buffer) }; res.unwrap_or_else(|e| { - panic!( - "{:?} failed to delete {} {}: {:?}", - self.logger.list(), - log_wrappers::Value::key(key), - cf, - e + slog_panic!( + self.logger, + "failed to delete"; + "key" => %log_wrappers::Value::key(key), + "cf" => cf, + "error" => ?e ); }); self.metrics.size_diff_hint -= self.key_buffer.len() as i64; diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index 57c01fca9d8..e6f81b20af1 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -5,6 +5,7 @@ use kvproto::raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}; use protobuf::{CodedInputStream, Message}; use raftstore::store::WriteCallback; use slog::Logger; +use tikv_util::slog_panic; use crate::{operation::command::parse_at, router::CmdResChannel}; @@ -191,12 +192,12 @@ impl<'a> SimpleWriteReqDecoder<'a> { let mut is = CodedInputStream::from_bytes(&buf[1..]); let header = match is.read_message() { Ok(h) => h, - Err(e) => panic!( - "{:?} data corrupted at [{}] {}: {:?}", - logger.list(), - term, - index, - e + Err(e) => slog_panic!( + logger, + "data corrupted"; + "term" => term, + "index" => index, + "error" => ?e ), }; let read = is.pos(); diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 894f39f278b..50b612f207d 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -7,6 +7,7 @@ use fail::fail_point; use kvproto::{metapb, pdpb}; use raftstore::store::Transport; use slog::error; +use tikv_util::slog_panic; use crate::{ batch::StoreContext, @@ -137,10 +138,10 @@ impl Peer { pending_peers.push(p); } else { if ctx.cfg.dev_assert { - panic!( - "{:?} failed to get peer {} from cache", - self.logger.list(), - id + slog_panic!( + self.logger, + "failed to get peer from cache"; + "get_peer_id" => id ); } error!( diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index f26659c7b89..305cdb666cc 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -30,7 +30,7 @@ use raftstore::{ Error, Result, }; use slog::{debug, info}; -use tikv_util::box_err; +use tikv_util::{box_err, log::SlogFormat}; use txn_types::WriteBatchFlags; use crate::{ @@ -363,7 +363,10 @@ impl Peer { } } StatusCmdType::InvalidStatus => { - return Err(box_err!("{:?} invalid status command!", self.logger.list())); + return Err(box_err!( + "{} invalid status command!", + SlogFormat(&self.logger) + )); } } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 1e9d1ef4221..5ff9a27dee0 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -41,7 +41,7 @@ use raftstore::store::{ ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; use slog::{trace, Logger}; -use tikv_util::{box_err, worker::Scheduler}; +use tikv_util::{box_err, slog_panic, worker::Scheduler}; use crate::{ operation::{ @@ -444,11 +444,10 @@ impl Storage { return; } } - panic!( - "{:?} data loss detected: {}_{} not found", - self.logger().list(), - region_id, - tablet_index + slog_panic!( + self.logger(), + "tablet loss detected"; + "tablet_index" => tablet_index ); } diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index a2707b6d411..96f1611d9f1 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -9,6 +9,7 @@ use raftstore::store::{ WriteSenders, WriteTask, }; use slog::{warn, Logger}; +use tikv_util::slog_panic; use crate::{ batch::{StoreContext, StoreRouter}, @@ -117,11 +118,11 @@ impl AsyncWriter { let last_unpersisted = self.unpersisted_readies.back(); if last_unpersisted.map_or(true, |u| u.number < ready_number) { - panic!( - "{:?} ready number is too large {:?} vs {}", - logger.list(), - last_unpersisted, - ready_number + slog_panic!( + logger, + "ready number is too large"; + "last_unpersisted" => ?last_unpersisted, + "ready_number" => ready_number ); } @@ -130,15 +131,15 @@ impl AsyncWriter { // There must be a match in `self.unpersisted_readies`. loop { let Some(v) = self.unpersisted_readies.pop_front() else { - panic!("{:?} ready number not found {}", logger.list(), ready_number); + slog_panic!(logger, "ready number not found"; "ready_number" => ready_number); }; has_snapshot |= v.has_snapshot; if v.number > ready_number { - panic!( - "{:?} ready number not matched {:?} vs {}", - logger.list(), - v, - ready_number + slog_panic!( + logger, + "ready number not matched"; + "ready" => ?v, + "ready_number" => ready_number ); } if raft_messages.is_empty() { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index fe4208db549..29452533632 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -34,6 +34,8 @@ use raftstore::{ }; use slog::{debug, error, info, trace, warn}; use tikv_util::{ + log::SlogFormat, + slog_panic, store::find_peer, time::{duration_to_sec, monotonic_raw_now}, }; @@ -388,8 +390,8 @@ impl Peer { let prev_commit_index = self.entry_storage().commit_index(); assert!( hs.get_commit() >= prev_commit_index, - "{:?} {:?} {}", - self.logger.list(), + "{} {:?} {}", + SlogFormat(&self.logger), hs, prev_commit_index ); @@ -456,11 +458,11 @@ impl Peer { } } if !light_rd.messages().is_empty() || light_rd.commit_index().is_some() { - panic!( - "{:?} unexpected messages [{}] commit index [{:?}]", - self.logger.list(), - light_rd.messages().len(), - light_rd.commit_index() + slog_panic!( + self.logger, + "unexpected messages"; + "messages_count" => ?light_rd.messages().len(), + "commit_index" => ?light_rd.commit_index() ); } if !light_rd.committed_entries().is_empty() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 2e1b9362a69..8716f0c75ea 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -41,7 +41,7 @@ use raftstore::{ }, }; use slog::{error, info, warn}; -use tikv_util::box_err; +use tikv_util::{box_err, log::SlogFormat, slog_panic}; use crate::{ fsm::ApplyResReporter, @@ -554,8 +554,8 @@ impl Storage { let last_term = snap.get_metadata().get_term(); assert!( last_index >= RAFT_INIT_LOG_INDEX && last_term >= RAFT_INIT_LOG_TERM, - "{:?}", - self.logger().list() + "{}", + SlogFormat(self.logger()) ); let region_state = self.region_state_mut(); region_state.set_state(PeerState::Normal); @@ -599,12 +599,11 @@ impl Storage { // it should load it into the factory after it persisted. let hook = move || { if !install_tablet(®, &path, region_id, last_index) { - panic!( - "{:?} failed to install tablet, path: {}, region_id: {}, tablet_index: {}", - logger.list(), - path.display(), - region_id, - last_index + slog_panic!( + logger, + "failed to install tablet"; + "path" => %path.display(), + "tablet_index" => last_index ); } if clean_split { diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 8660e4795d0..2407d1ab3fe 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -9,7 +9,7 @@ use raftstore::store::{ ReadTask, }; use slog::Logger; -use tikv_util::worker::Scheduler; +use tikv_util::{log::SlogFormat, worker::Scheduler}; use crate::{ operation::{AdminCmdResult, DataTrace}, @@ -71,9 +71,9 @@ impl Apply { let mut remote_tablet = tablet_registry .get(region_state.get_region().get_id()) .unwrap(); - assert_ne!(applied_term, 0, "{:?}", logger.list()); + assert_ne!(applied_term, 0, "{}", SlogFormat(&logger)); let applied_index = flush_state.applied_index(); - assert_ne!(applied_index, 0, "{:?}", logger.list()); + assert_ne!(applied_index, 0, "{}", SlogFormat(&logger)); Apply { peer, tablet: remote_tablet.latest().unwrap().clone(), diff --git a/components/tikv_util/src/log.rs b/components/tikv_util/src/log.rs index 10facfa2287..fd351eecbd4 100644 --- a/components/tikv_util/src/log.rs +++ b/components/tikv_util/src/log.rs @@ -82,3 +82,145 @@ macro_rules! debug(($($args:tt)+) => { macro_rules! trace(($($args:tt)+) => { ::slog_global::trace!($($args)+) };); + +use std::fmt::{self, Display, Write}; + +use slog::{BorrowedKV, OwnedKVList, Record, KV}; + +struct FormatKeyValueList<'a, W> { + buffer: &'a mut W, + written: bool, +} + +impl<'a, W: Write> slog::Serializer for FormatKeyValueList<'a, W> { + fn emit_arguments(&mut self, key: slog::Key, val: &fmt::Arguments<'_>) -> slog::Result { + if !self.written { + write!(&mut self.buffer, "[{}={}]", key, val).unwrap(); + self.written = true; + } else { + write!(&mut self.buffer, " [{}={}]", key, val).unwrap() + } + Ok(()) + } +} + +/// A helper struct to format the key-value list of a slog logger. It's not +/// exact the same format as `TiKVFormat` and etc. It's just a simple +/// implementation for panic, return errors that doesn't show in normal logs +/// processing. +pub struct SlogFormat<'a>(pub &'a slog::Logger); + +impl<'a> Display for SlogFormat<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut formatter = FormatKeyValueList { + buffer: f, + written: false, + }; + let record = slog::record_static!(slog::Level::Trace, ""); + self.0 + .list() + .serialize( + &Record::new(&record, &format_args!(""), slog::b!()), + &mut formatter, + ) + .unwrap(); + Ok(()) + } +} + +#[doc(hidden)] +pub fn format_kv_list(buffer: &mut String, kv_list: &OwnedKVList, borrow_list: BorrowedKV<'_>) { + let mut formatter = FormatKeyValueList { + buffer, + written: false, + }; + let record = slog::record_static!(slog::Level::Trace, ""); + let args = format_args!(""); + let record = Record::new(&record, &args, slog::b!()); + // Serialize borrow list first to make region_id, peer_id at the end. + borrow_list.serialize(&record, &mut formatter).unwrap(); + kv_list.serialize(&record, &mut formatter).unwrap(); +} + +/// A helper macro to panic with the key-value list of a slog logger. +/// +/// Similar to `SlogFormat`, but just panic. +#[macro_export] +macro_rules! slog_panic { + ($logger:expr, $msg:expr, $borrowed_kv:expr) => {{ + let owned_kv = ($logger).list(); + let mut s = String::new(); + $crate::log::format_kv_list(&mut s, &owned_kv, $borrowed_kv); + if s.is_empty() { + panic!("{}", $msg) + } else { + panic!("{} {}", $msg, s) + } + }}; + ($logger:expr, $msg:expr) => {{ + $crate::slog_panic!($logger, $msg, slog::b!()) + }}; + ($logger:expr, $msg:expr; $($arg:tt)+) => {{ + $crate::slog_panic!($logger, $msg, slog::b!($($arg)+)) + }}; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_format_kv() { + let logger = slog::Logger::root(slog::Discard, slog::o!()); + let s = format!("{}", super::SlogFormat(&logger)); + assert_eq!(s, String::new()); + + let logger = logger.new(slog::o!("a" => 1)); + let s = format!("{}", super::SlogFormat(&logger)); + assert_eq!(s, "[a=1]"); + + let logger = logger.new(slog::o!("b" => 2)); + let s = format!("{}", super::SlogFormat(&logger)); + assert_eq!(s, "[b=2] [a=1]"); + } + + #[test] + fn test_slog_panic() { + let logger = slog::Logger::root(slog::Discard, slog::o!()); + let err = panic_hook::recover_safe(|| { + crate::slog_panic!(logger, "test"); + }) + .unwrap_err(); + assert_eq!(err.downcast::().unwrap().as_str(), "test"); + + let err = panic_hook::recover_safe(|| { + crate::slog_panic!(logger, "test"; "k" => "v"); + }) + .unwrap_err(); + assert_eq!(err.downcast::().unwrap().as_str(), "test [k=v]"); + + let logger = logger.new(slog::o!("a" => 1)); + let err = panic_hook::recover_safe(|| { + crate::slog_panic!(logger, "test"); + }) + .unwrap_err(); + assert_eq!(err.downcast::().unwrap().as_str(), "test [a=1]"); + + let logger = logger.new(slog::o!("b" => 2)); + let err = panic_hook::recover_safe(|| { + crate::slog_panic!(logger, "test"); + }) + .unwrap_err(); + assert_eq!( + err.downcast::().unwrap().as_str(), + "test [b=2] [a=1]" + ); + + let err = panic_hook::recover_safe(|| { + crate::slog_panic!(logger, "test"; "k" => "v"); + }) + .unwrap_err(); + assert_eq!( + err.downcast::().unwrap().as_str(), + "test [k=v] [b=2] [a=1]" + ); + } +} From 4619f32f07207343692dc641656822c65157c616 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 3 Jan 2023 18:08:19 -0800 Subject: [PATCH 0444/1149] Introduce priority queue for priority scheduling (#14002) ref tikv/tikv#13730 Introduce priority-based channel Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- Cargo.lock | 59 ++-- components/concurrency_manager/Cargo.toml | 7 +- components/tikv_util/Cargo.toml | 2 + components/tikv_util/src/mpsc/mod.rs | 2 + .../tikv_util/src/mpsc/priority_queue.rs | 289 ++++++++++++++++++ 5 files changed, 314 insertions(+), 45 deletions(-) create mode 100644 components/tikv_util/src/mpsc/priority_queue.rs diff --git a/Cargo.lock b/Cargo.lock index 8433f54c512..7a3c9ced013 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -800,7 +800,7 @@ dependencies = [ "kvproto", "lazy_static", "log_wrappers", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "pd_client", "prometheus", "prometheus-static-metric", @@ -1041,7 +1041,7 @@ dependencies = [ "fail", "futures 0.3.15", "kvproto", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "rand 0.8.5", "tikv_alloc", "tikv_util", @@ -1204,18 +1204,6 @@ dependencies = [ "crossbeam-utils 0.8.11", ] -[[package]] -name = "crossbeam-epoch" -version = "0.9.3" -source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils 0.8.3", - "lazy_static", - "memoffset", - "scopeguard", -] - [[package]] name = "crossbeam-epoch" version = "0.9.8" @@ -1255,12 +1243,13 @@ dependencies = [ [[package]] name = "crossbeam-skiplist" -version = "0.0.0" -source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883a5821d7d079fcf34ac55f27a833ee61678110f6b97637cc74513c0d0b42fc" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch 0.9.3", - "crossbeam-utils 0.8.3", + "crossbeam-epoch 0.9.8", + "crossbeam-utils 0.8.8", "scopeguard", ] @@ -1275,16 +1264,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crossbeam-utils" -version = "0.8.3" -source = "git+https://github.com/tikv/crossbeam.git?branch=tikv-5.0#e0e083d062649484188b7337fe388fd12f2c8d94" -dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "lazy_static", -] - [[package]] name = "crossbeam-utils" version = "0.8.8" @@ -1379,7 +1358,7 @@ checksum = "c0834a35a3fce649144119e18da2a4d8ed12ef3862f47183fd46f625d072d96c" dependencies = [ "cfg-if 1.0.0", "num_cpus", - "parking_lot 0.12.0", + "parking_lot 0.12.1", ] [[package]] @@ -1867,7 +1846,7 @@ dependencies = [ "maligned", "online_config", "openssl", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "prometheus", "prometheus-static-metric", "rand 0.8.5", @@ -3650,9 +3629,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", "parking_lot_core 0.9.1", @@ -3929,7 +3908,7 @@ dependencies = [ "log", "nix 0.24.1", "once_cell", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "protobuf", "protobuf-codegen-pure", "smallvec", @@ -4214,7 +4193,7 @@ dependencies = [ "nix 0.25.0", "num-derive", "num-traits", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "prometheus", "prometheus-static-metric", "protobuf", @@ -4312,7 +4291,7 @@ dependencies = [ "openssl", "ordered-float", "panic_hook", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "pd_client", "prometheus", "prometheus-static-metric", @@ -4362,7 +4341,7 @@ dependencies = [ "keys", "kvproto", "log_wrappers", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "pd_client", "prometheus", "protobuf", @@ -6294,7 +6273,7 @@ dependencies = [ "online_config", "openssl", "panic_hook", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "paste", "pd_client", "pin-project", @@ -6515,6 +6494,7 @@ dependencies = [ "cpu-time", "crc32fast", "crossbeam", + "crossbeam-skiplist", "derive_more", "error_code", "fail", @@ -6536,6 +6516,7 @@ dependencies = [ "openssl", "page_size", "panic_hook", + "parking_lot 0.12.1", "pin-project", "procfs", "procinfo", @@ -6620,7 +6601,7 @@ dependencies = [ "memchr", "mio 0.8.5", "num_cpus", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", @@ -6874,7 +6855,7 @@ dependencies = [ "crossbeam-utils 0.8.8", "kvproto", "lazy_static", - "parking_lot 0.12.0", + "parking_lot 0.12.1", "pin-project", "prometheus", "slab", diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index e225cbe0519..b391c1d239a 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -5,6 +5,7 @@ publish = false version = "0.0.1" [dependencies] +crossbeam-skiplist = "0.1" fail = "0.5" kvproto = { workspace = true } parking_lot = "0.12" @@ -12,12 +13,6 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["macros", "sync", "time"] } txn_types = { workspace = true } -# FIXME: switch to the crates.io version after crossbeam-skiplist is released -[dependencies.crossbeam-skiplist] -git = "https://github.com/tikv/crossbeam.git" -branch = "tikv-5.0" -package = "crossbeam-skiplist" - [dev-dependencies] criterion = "0.3" futures = "0.3" diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 663eb2b681f..92f3bac3d5b 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -19,6 +19,7 @@ collections = { workspace = true } cpu-time = "1.0.0" crc32fast = "1.2" crossbeam = "0.8" +crossbeam-skiplist = "0.1" derive_more = "0.99.3" error_code = { workspace = true } fail = "0.5" @@ -37,6 +38,7 @@ num-traits = "0.2" num_cpus = "1" online_config = { workspace = true } openssl = "0.10" +parking_lot = "0.12.1" pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" diff --git a/components/tikv_util/src/mpsc/mod.rs b/components/tikv_util/src/mpsc/mod.rs index 45249fed9bc..700691f1189 100644 --- a/components/tikv_util/src/mpsc/mod.rs +++ b/components/tikv_util/src/mpsc/mod.rs @@ -3,7 +3,9 @@ //! This module provides an implementation of mpsc channel based on //! crossbeam_channel. Comparing to the crossbeam_channel, this implementation //! supports closed detection and try operations. + pub mod future; +pub mod priority_queue; use std::{ cell::Cell, diff --git a/components/tikv_util/src/mpsc/priority_queue.rs b/components/tikv_util/src/mpsc/priority_queue.rs new file mode 100644 index 00000000000..3389d6154c3 --- /dev/null +++ b/components/tikv_util/src/mpsc/priority_queue.rs @@ -0,0 +1,289 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::{ + atomic::{AtomicPtr, AtomicU64, AtomicUsize, Ordering}, + Arc, +}; + +use crossbeam::channel::{RecvError, SendError, TryRecvError, TrySendError}; +use crossbeam_skiplist::SkipMap; +use parking_lot::{Condvar, Mutex}; + +// Create a priority based channel. Sender can send message with priority of +// u64, and receiver will receive messages in ascending order of priority. For +// two messages of same priority, the receiving order follows FIFO. +pub fn unbounded() -> (Sender, Receiver) { + let queue = Arc::new(PriorityQueue::new()); + let sender = Sender { + inner: queue.clone(), + }; + let receiver = Receiver { inner: queue }; + (sender, receiver) +} + +struct Cell { + ptr: AtomicPtr, +} + +unsafe impl Send for Cell {} +unsafe impl Sync for Cell {} + +impl Cell { + fn new(value: T) -> Self { + Self { + ptr: AtomicPtr::new(Box::into_raw(Box::new(value))), + } + } + + fn take(&self) -> Option { + let p = self.ptr.swap(std::ptr::null_mut(), Ordering::SeqCst); + if !p.is_null() { + unsafe { Some(*Box::from_raw(p)) } + } else { + None + } + } +} + +impl Drop for Cell { + fn drop(&mut self) { + self.take(); + } +} + +#[derive(Default)] +struct PriorityQueue { + queue: SkipMap>, + disconnected: Mutex, + available: Condvar, + + sequencer: AtomicU64, + + senders: AtomicUsize, + receivers: AtomicUsize, +} + +impl PriorityQueue { + pub fn new() -> Self { + Self { + queue: SkipMap::new(), + disconnected: Mutex::new(false), + available: Condvar::new(), + sequencer: AtomicU64::new(0), + senders: AtomicUsize::new(1), + receivers: AtomicUsize::new(1), + } + } + + pub fn get_map_key(&self, pri: u64) -> MapKey { + MapKey { + priority: pri, + sequence: self.sequencer.fetch_add(1, Ordering::Relaxed), + } + } +} + +// When derived `PartialOrd` on structs, it will produce a lexicographic +// ordering based on the top-to-bottom declaration order of the struct’s +// members. +#[derive(Eq, PartialEq, Ord, PartialOrd)] +struct MapKey { + priority: u64, + sequence: u64, +} + +pub struct Sender { + inner: Arc>, +} + +impl Sender { + pub fn try_send(&self, msg: T, pri: u64) -> Result<(), TrySendError> { + self.send(msg, pri) + .map_err(|SendError(msg)| TrySendError::Disconnected(msg)) + } + + pub fn send(&self, msg: T, pri: u64) -> Result<(), SendError> { + if self.inner.receivers.load(Ordering::Acquire) == 0 { + return Err(SendError(msg)); + } + self.inner + .queue + .insert(self.inner.get_map_key(pri), Cell::new(msg)); + self.inner.available.notify_one(); + Ok(()) + } + + #[cfg(test)] + fn len(&self) -> usize { + self.inner.queue.len() + } +} + +impl Clone for Sender { + fn clone(&self) -> Self { + self.inner.senders.fetch_add(1, Ordering::AcqRel); + Self { + inner: Arc::clone(&self.inner), + } + } +} + +impl Drop for Sender { + fn drop(&mut self) { + let old = self.inner.senders.fetch_sub(1, Ordering::AcqRel); + if old <= 1 { + *self.inner.disconnected.lock() = true; + self.inner.available.notify_all(); + } + } +} + +pub struct Receiver { + inner: Arc>, +} + +impl Receiver { + pub fn try_recv(&self) -> Result { + match self.inner.queue.pop_front() { + Some(entry) => Ok(entry.value().take().unwrap()), + None if self.inner.senders.load(Ordering::SeqCst) == 0 => { + Err(TryRecvError::Disconnected) + } + None => Err(TryRecvError::Empty), + } + } + + pub fn recv(&self) -> Result { + loop { + match self.try_recv() { + Ok(msg) => return Ok(msg), + Err(TryRecvError::Disconnected) => { + return Err(RecvError); + } + Err(TryRecvError::Empty) => { + let mut disconnected = self.inner.disconnected.lock(); + if *disconnected { + return Err(RecvError); + } + self.inner.available.wait(&mut disconnected); + } + } + } + } + + #[cfg(test)] + fn len(&self) -> usize { + self.inner.queue.len() + } +} + +impl Clone for Receiver { + fn clone(&self) -> Self { + self.inner.receivers.fetch_add(1, Ordering::AcqRel); + Self { + inner: Arc::clone(&self.inner), + } + } +} + +impl Drop for Receiver { + fn drop(&mut self) { + self.inner.receivers.fetch_sub(1, Ordering::AcqRel); + } +} + +#[cfg(test)] +mod tests { + use std::{sync::atomic::AtomicU64, thread, time::Duration}; + + use crossbeam::channel::TrySendError; + use rand::Rng; + + use super::*; + + #[test] + fn test_priority() { + let (tx, rx) = super::unbounded::(); + tx.try_send(1, 2).unwrap(); + tx.send(2, 1).unwrap(); + tx.send(3, 3).unwrap(); + + assert_eq!(rx.try_recv(), Ok(2)); + assert_eq!(rx.recv(), Ok(1)); + assert_eq!(rx.recv(), Ok(3)); + assert_eq!(rx.try_recv(), Err(TryRecvError::Empty)); + + drop(rx); + assert_eq!(tx.send(2, 1), Err(SendError(2))); + assert_eq!(tx.try_send(2, 1), Err(TrySendError::Disconnected(2))); + + let (tx, rx) = super::unbounded::(); + drop(tx); + assert_eq!(rx.recv(), Err(RecvError)); + assert_eq!(rx.try_recv(), Err(TryRecvError::Disconnected)); + + let (tx, rx) = super::unbounded::(); + thread::spawn(move || { + thread::sleep(Duration::from_millis(100)); + tx.send(10, 1).unwrap(); + }); + assert_eq!(rx.recv(), Ok(10)); + + let (tx, rx) = super::unbounded::(); + assert_eq!(tx.len(), 0); + assert_eq!(rx.len(), 0); + tx.send(2, 1).unwrap(); + tx.send(3, 2).unwrap(); + assert_eq!(tx.len(), 2); + assert_eq!(rx.len(), 2); + drop(tx); + assert_eq!(rx.try_recv(), Ok(2)); + assert_eq!(rx.recv(), Ok(3)); + assert_eq!(rx.try_recv(), Err(TryRecvError::Disconnected)); + assert_eq!(rx.recv(), Err(RecvError)); + } + + #[test] + fn test_priority_multi_thread() { + let (tx, rx) = super::unbounded::(); + + let mut handlers = Vec::with_capacity(10); + let expected_count = Arc::new(AtomicU64::new(0)); + let real_counter = Arc::new(AtomicU64::new(0)); + for _ in 0..10 { + let sender = tx.clone(); + let expected_count = expected_count.clone(); + let handle = thread::spawn(move || { + let mut rng = rand::thread_rng(); + let pri = rng.gen_range(0..1000); + let mut cnt = 0; + for i in 0..1000 { + sender.send(i, pri).unwrap(); + cnt += i; + } + expected_count.fetch_add(cnt, Ordering::Relaxed); + }); + handlers.push(handle); + } + for _i in 0..10 { + let recv = rx.clone(); + let real_counter = real_counter.clone(); + let handle = thread::spawn(move || { + let mut cnt = 0; + while let Ok(v) = recv.recv() { + cnt += v; + } + real_counter.fetch_add(cnt, Ordering::Relaxed); + }); + handlers.push(handle); + } + drop(tx); + for h in handlers { + h.join().unwrap(); + } + assert_eq!( + expected_count.load(Ordering::Relaxed), + real_counter.load(Ordering::Relaxed) + ); + } +} From cdc2e486277d775b70f5db28a7b643ed2c3edbe1 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 5 Jan 2023 14:38:20 +0800 Subject: [PATCH 0445/1149] raftstore-v2: only send clean snapshot (#14015) ref tikv/tikv#12842 When the tablet contains dirty data right after split, generating snapshot may just a waste. On the other hand, split usually happens on all peers, so delay it a bit actually makes all peers more likely to be initialized by split. So this PR rejects generating snapshot when it detects it still has dirty data. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_panic/src/raft_engine.rs | 8 ++++ components/engine_rocks/src/raft_engine.rs | 13 ++++++ components/engine_traits/src/raft_engine.rs | 4 ++ components/raft_log_engine/src/engine.rs | 16 +++++++ components/raftstore-v2/src/fsm/peer.rs | 5 ++- .../src/operation/command/admin/split.rs | 45 +++++++++++++++---- .../raftstore-v2/src/operation/ready/mod.rs | 21 ++++++++- .../src/operation/ready/snapshot.rs | 25 ++++++++--- components/raftstore-v2/src/raft/storage.rs | 25 +++++++++++ components/raftstore-v2/src/router/message.rs | 3 ++ .../raftstore-v2/src/worker/tablet_gc.rs | 21 ++++++--- 11 files changed, 163 insertions(+), 23 deletions(-) diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index 854b75fe30d..c0539c1edd5 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -67,6 +67,10 @@ impl RaftEngineReadOnly for PanicEngine { panic!() } + fn get_dirty_mark(&self, raft_group_id: u64, tablet_index: u64) -> Result { + panic!() + } + fn get_recover_state(&self) -> Result> { panic!() } @@ -232,6 +236,10 @@ impl RaftLogBatch for PanicWriteBatch { panic!() } + fn put_dirty_mark(&mut self, raft_group_id: u64, tablet_index: u64, dirty: bool) -> Result<()> { + panic!() + } + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index d566ac3821b..a0a5acd5dd8 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -166,6 +166,10 @@ impl RaftEngineReadOnly for RocksEngine { panic!() } + fn get_dirty_mark(&self, _raft_group_id: u64, _tablet_index: u64) -> Result { + panic!() + } + fn get_recover_state(&self) -> Result> { self.get_msg_cf(CF_DEFAULT, keys::RECOVER_STATE_KEY) } @@ -439,6 +443,15 @@ impl RaftLogBatch for RocksWriteBatchVec { panic!() } + fn put_dirty_mark( + &mut self, + _raft_group_id: u64, + _tablet_index: u64, + _dirty: bool, + ) -> Result<()> { + panic!() + } + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { self.put_msg(keys::RECOVER_STATE_KEY, state) } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 68036eae1eb..671fed8b3cf 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -33,6 +33,7 @@ pub trait RaftEngineReadOnly: Sync + Send + 'static { ) -> Result>; /// Get the flushed index of the given CF. fn get_flushed_index(&self, raft_group_id: u64, cf: &str) -> Result>; + fn get_dirty_mark(&self, raft_group_id: u64, tablet_index: u64) -> Result; fn get_recover_state(&self) -> Result>; fn get_entry(&self, raft_group_id: u64, index: u64) -> Result>; @@ -201,6 +202,9 @@ pub trait RaftLogBatch: Send { apply_index: u64, ) -> Result<()>; + /// Mark a tablet may contain data that is not supposed to be in its range. + fn put_dirty_mark(&mut self, raft_group_id: u64, tablet_index: u64, dirty: bool) -> Result<()>; + /// Indicate whether region states should be recovered from raftdb and /// replay raft logs. /// When kvdb's write-ahead-log is disabled, the sequence number of the last diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 1ae148ba41c..3db865ed8ad 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -381,6 +381,7 @@ const REGION_STATE_KEY: &[u8] = &[0x03]; const APPLY_STATE_KEY: &[u8] = &[0x04]; const RECOVER_STATE_KEY: &[u8] = &[0x05]; const FLUSH_STATE_KEY: &[u8] = &[0x06]; +const DIRTY_MARK_KEY: &[u8] = &[0x07]; // All keys are of the same length. const KEY_PREFIX_LEN: usize = RAFT_LOG_STATE_KEY.len(); @@ -475,6 +476,16 @@ impl RaftLogBatchTrait for RaftLogBatch { Ok(()) } + fn put_dirty_mark(&mut self, raft_group_id: u64, tablet_index: u64, dirty: bool) -> Result<()> { + let key = encode_key(DIRTY_MARK_KEY, tablet_index); + if dirty { + self.0.put(raft_group_id, key.to_vec(), vec![]); + } else { + self.0.delete(raft_group_id, key.to_vec()); + } + Ok(()) + } + fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { self.0 .put_message(STORE_STATE_ID, RECOVER_STATE_KEY.to_vec(), state) @@ -601,6 +612,11 @@ impl RaftEngineReadOnly for RaftLogEngine { Ok(index) } + fn get_dirty_mark(&self, raft_group_id: u64, tablet_index: u64) -> Result { + let key = encode_key(DIRTY_MARK_KEY, tablet_index); + Ok(self.0.get(raft_group_id, &key).is_some()) + } + fn get_recover_state(&self) -> Result> { self.0 .get_message(STORE_STATE_ID, RECOVER_STATE_KEY) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 8b05435246b..c05b58d0839 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -191,7 +191,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } fn on_start(&mut self) { - if !self.fsm.peer.maybe_pause_for_recovery() { + if !self.fsm.peer.maybe_pause_for_recovery(self.store_ctx) { self.schedule_tick(PeerTick::Raft); } self.schedule_tick(PeerTick::SplitRegionCheck); @@ -308,6 +308,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .on_request_split(self.store_ctx, request, ch) } PeerMsg::ForceCompactLog => self.on_compact_log_tick(true), + PeerMsg::TabletTrimmed { tablet_index } => { + self.fsm.peer_mut().on_tablet_trimmed(tablet_index) + } #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 23fc6e3a8d9..71c1e095d8c 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -471,12 +471,18 @@ impl Peer { self.split_flow_control_mut().may_skip_split_check = false; self.add_pending_tick(PeerTick::SplitRegionCheck); } + self.storage_mut().set_has_dirty_data(true); + let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); + let tablet_index = res.tablet_index; let _ = store_ctx .schedulers .tablet_gc .schedule(tablet_gc::Task::trim( self.tablet().unwrap().clone(), derived, + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, )); let last_region_id = res.regions.last().unwrap().get_id(); @@ -521,6 +527,9 @@ impl Peer { self.state_changes_mut() .put_region_state(region_id, res.tablet_index, ®ion_state) .unwrap(); + self.state_changes_mut() + .put_dirty_mark(region_id, res.tablet_index, true) + .unwrap(); self.set_has_extra_write(); } @@ -574,13 +583,21 @@ impl Peer { store_ctx: &mut StoreContext, split_init: Box, ) { - let _ = store_ctx - .schedulers - .tablet_gc - .schedule(tablet_gc::Task::trim( - self.tablet().unwrap().clone(), - self.region(), - )); + let region_id = self.region_id(); + if self.storage().has_dirty_data() { + let tablet_index = self.storage().tablet_index(); + let mailbox = store_ctx.router.mailbox(region_id).unwrap(); + let _ = store_ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::trim( + self.tablet().unwrap().clone(), + self.region(), + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, + )); + } if split_init.source_leader && self.leader_id() == INVALID_ID && self.term() == RAFT_INIT_LOG_TERM @@ -593,7 +610,6 @@ impl Peer { // reduce client query miss. self.region_heartbeat_pd(store_ctx); } - let region_id = self.region_id(); if split_init.check_split { self.add_pending_tick(PeerTick::SplitRegionCheck); @@ -633,6 +649,19 @@ impl Peer { self.set_has_extra_write(); } } + + pub fn on_tablet_trimmed(&mut self, tablet_index: u64) { + info!(self.logger, "tablet is trimmed"; "tablet_index" => tablet_index); + let region_id = self.region_id(); + let changes = self.state_changes_mut(); + changes + .put_dirty_mark(region_id, tablet_index, false) + .unwrap(); + self.set_has_extra_write(); + if self.storage().tablet_index() == tablet_index { + self.storage_mut().set_has_dirty_data(false); + } + } } #[cfg(test)] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 29452533632..3ac500b7f49 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -50,6 +50,7 @@ use crate::{ fsm::{PeerFsmDelegate, Store}, raft::{Peer, Storage}, router::{ApplyTask, PeerMsg, PeerTick}, + worker::tablet_gc, }; const PAUSE_FOR_RECOVERY_GAP: u64 = 128; @@ -80,7 +81,25 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } impl Peer { - pub fn maybe_pause_for_recovery(&mut self) -> bool { + pub fn maybe_pause_for_recovery(&mut self, store_ctx: &mut StoreContext) -> bool { + // The task needs to be scheduled even if the tablet may be replaced during + // recovery. Otherwise if there are merges during recovery, the FSM may + // be paused forever. + if self.storage().has_dirty_data() { + let region_id = self.region_id(); + let mailbox = store_ctx.router.mailbox(region_id).unwrap(); + let tablet_index = self.storage().tablet_index(); + let _ = store_ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::trim( + self.tablet().unwrap().clone(), + self.region(), + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, + )); + } let entry_storage = self.storage().entry_storage(); let committed_index = entry_storage.commit_index(); let applied_index = entry_storage.applied_index(); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 8716f0c75ea..1919ce269a6 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -345,12 +345,23 @@ impl Storage { }; } - info!( - self.logger(), - "requesting snapshot"; - "request_index" => request_index, - "request_peer" => to, - ); + if self.has_dirty_data() { + info!(self.logger(), "delay generating snapshot as there are still dirty data"; "request_index" => request_index, "request_peer" => to); + // It's OK to delay. If there are still dirty data, it means the tablet is just + // split. In normal cases, all peers will apply split, so reject generates + // snapshot may actually good for all peers as they are more likely + // to be initialized by split. + return Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )); + } else { + info!( + self.logger(), + "requesting snapshot"; + "request_index" => request_index, + "request_peer" => to, + ); + } let canceled = Arc::new(AtomicBool::new(false)); let index = Arc::new(AtomicU64::new(0)); let mut gen_snap_task = self.gen_snap_task_mut(); @@ -586,6 +597,8 @@ impl Storage { let (path, clean_split) = match self.split_init_mut() { // If index not match, the peer may accept a newer snapshot after split. Some(init) if init.scheduled && last_index == RAFT_INIT_LOG_INDEX => { + lb.put_dirty_mark(region_id, last_index, true).unwrap(); + self.set_has_dirty_data(true); (temp_split_path(®, region_id), false) } si => ( diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index b0eec5a196c..aca8f0fafce 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -35,6 +35,9 @@ pub struct Storage { /// by messages, it has not persisted any states, we need to persist them /// at least once dispite whether the state changes since create. ever_persisted: bool, + /// It may have dirty data after split. Use a flag to indicate whether it + /// has finished clean up. + has_dirty_data: bool, logger: Logger, /// Snapshot part. @@ -116,6 +119,16 @@ impl Storage { pub fn apply_trace(&self) -> &ApplyTrace { &self.apply_trace } + + #[inline] + pub fn set_has_dirty_data(&mut self, has_dirty_data: bool) { + self.has_dirty_data = has_dirty_data; + } + + #[inline] + pub fn has_dirty_data(&self) -> bool { + self.has_dirty_data + } } impl Storage { @@ -139,6 +152,17 @@ impl Storage { }; let region = region_state.get_region(); let logger = logger.new(o!("region_id" => region.id, "peer_id" => peer.get_id())); + let has_dirty_data = + match engine.get_dirty_mark(region.get_id(), region_state.get_tablet_index()) { + Ok(b) => b, + Err(e) => { + return Err(box_err!( + "failed to get dirty mark for {}: {:?}", + region.get_id(), + e + )); + } + }; let entry_storage = EntryStorage::new( peer.get_id(), engine, @@ -153,6 +177,7 @@ impl Storage { peer: peer.clone(), region_state, ever_persisted: persisted, + has_dirty_data, logger, snap_states: RefCell::new(HashMap::default()), gen_snap_task: RefCell::new(Box::new(None)), diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 930de5ff036..353e17b0cb0 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -182,6 +182,9 @@ pub enum PeerMsg { ch: CmdResChannel, }, ForceCompactLog, + TabletTrimmed { + tablet_index: u64, + }, /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index aba477f883f..d4593223db3 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -17,6 +17,7 @@ pub enum Task { tablet: EK, start_key: Box<[u8]>, end_key: Box<[u8]>, + cb: Box, }, PrepareDestroy { tablet: EK, @@ -31,11 +32,9 @@ pub enum Task { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match *self { + match self { Task::Trim { - ref start_key, - ref end_key, - .. + start_key, end_key, .. } => write!( f, "trim tablet for start_key {}, end_key {}", @@ -65,11 +64,12 @@ impl Display for Task { impl Task { #[inline] - pub fn trim(tablet: EK, region: &Region) -> Self { + pub fn trim(tablet: EK, region: &Region, cb: impl FnOnce() + Send + 'static) -> Self { Task::Trim { tablet, start_key: region.get_start_key().into(), end_key: region.get_end_key().into(), + cb: Box::new(cb), } } @@ -110,7 +110,12 @@ impl Runner { } } - fn trim(tablet: &EK, start_key: &[u8], end_key: &[u8]) -> engine_traits::Result<()> { + fn trim( + tablet: &EK, + start_key: &[u8], + end_key: &[u8], + cb: Box, + ) -> engine_traits::Result<()> { let start_key = keys::data_key(start_key); let end_key = keys::data_end_key(end_key); let range1 = Range::new(&[], &start_key); @@ -121,6 +126,7 @@ impl Runner { for r in [range1, range2] { tablet.compact_range(Some(r.start_key), Some(r.end_key), false, 1)?; } + cb(); Ok(()) } @@ -195,8 +201,9 @@ where tablet, start_key, end_key, + cb, } => { - if let Err(e) = Self::trim(&tablet, &start_key, &end_key) { + if let Err(e) = Self::trim(&tablet, &start_key, &end_key, cb) { error!( self.logger, "failed to trim tablet"; From df3ee59d3d134e2ef5d8e5ec90d36d218b86e4a4 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 5 Jan 2023 17:24:20 +0800 Subject: [PATCH 0446/1149] raftstore-v2: update region size after split check (#14019) ref tikv/tikv#12842 Signed-off-by: Jay Lee --- components/raftstore-v2/src/fsm/peer.rs | 7 +++ .../src/operation/command/admin/split.rs | 52 ++++++++++++++++++- components/raftstore-v2/src/operation/pd.rs | 6 +-- components/raftstore-v2/src/router/imp.rs | 8 +-- components/raftstore-v2/src/router/message.rs | 7 +++ 5 files changed, 71 insertions(+), 9 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index c05b58d0839..fee1a00993b 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -307,6 +307,13 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_request_split(self.store_ctx, request, ch) } + PeerMsg::UpdateRegionSize { size } => { + self.fsm.peer_mut().on_update_region_size(size) + } + PeerMsg::UpdateRegionKeys { keys } => { + self.fsm.peer_mut().on_update_region_keys(keys) + } + PeerMsg::ClearRegionSize => self.fsm.peer_mut().on_clear_region_size(), PeerMsg::ForceCompactLog => self.on_compact_log_tick(true), PeerMsg::TabletTrimmed { tablet_index } => { self.fsm.peer_mut().on_tablet_trimmed(tablet_index) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 71c1e095d8c..f63f1f2ae17 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -74,7 +74,7 @@ pub struct SplitResult { // The index of the derived region in `regions` pub derived_index: usize, pub tablet_index: u64, - // Hack: in common case we should use generic, but split is an unfrequent + // Hack: in common case we should use generic, but split is an infrequent // event that performance is not critical. And using `Any` can avoid polluting // all existing code. tablet: Box, @@ -91,6 +91,8 @@ pub struct SplitInit { /// In-memory pessimistic locks that should be inherited from parent region pub locks: PeerPessimisticLocks, + approximate_size: Option, + approximate_keys: Option, } impl SplitInit { @@ -123,6 +125,20 @@ pub struct SplitFlowControl { size_diff_hint: i64, skip_split_count: u64, may_skip_split_check: bool, + approximate_size: Option, + approximate_keys: Option, +} + +impl SplitFlowControl { + #[inline] + pub fn approximate_size(&self) -> Option { + self.approximate_size + } + + #[inline] + pub fn approximate_keys(&self) -> Option { + self.approximate_keys + } } pub fn temp_split_path(registry: &TabletRegistry, region_id: u64) -> PathBuf { @@ -173,6 +189,25 @@ impl Peer { false } + pub fn on_update_region_size(&mut self, size: u64) { + self.split_flow_control_mut().approximate_size = Some(size); + self.add_pending_tick(PeerTick::SplitRegionCheck); + self.add_pending_tick(PeerTick::PdHeartbeat); + } + + pub fn on_update_region_keys(&mut self, keys: u64) { + self.split_flow_control_mut().approximate_keys = Some(keys); + self.add_pending_tick(PeerTick::SplitRegionCheck); + self.add_pending_tick(PeerTick::PdHeartbeat); + } + + pub fn on_clear_region_size(&mut self) { + let control = self.split_flow_control_mut(); + control.approximate_size.take(); + control.approximate_keys.take(); + self.add_pending_tick(PeerTick::SplitRegionCheck); + } + pub fn update_split_flow_control(&mut self, metrics: &ApplyMetrics) { let control = self.split_flow_control_mut(); control.size_diff_hint += metrics.size_diff_hint; @@ -454,6 +489,11 @@ impl Peer { self.record_tombstone_tablet(store_ctx, tablet, res.tablet_index); } + let new_region_count = res.regions.len() as u64; + let control = self.split_flow_control_mut(); + let estimated_size = control.approximate_size.map(|v| v / new_region_count); + let estimated_keys = control.approximate_keys.map(|v| v / new_region_count); + self.post_split(); if self.is_leader() { @@ -468,7 +508,10 @@ impl Peer { // so we send it independently here. self.report_batch_split_pd(store_ctx, res.regions.to_vec()); // After split, the peer may need to update its metrics. - self.split_flow_control_mut().may_skip_split_check = false; + let control = self.split_flow_control_mut(); + control.may_skip_split_check = false; + control.approximate_size = estimated_size; + control.approximate_keys = estimated_keys; self.add_pending_tick(PeerTick::SplitRegionCheck); } self.storage_mut().set_has_dirty_data(true); @@ -500,6 +543,8 @@ impl Peer { source_id: region_id, check_split: last_region_id == new_region_id, scheduled: false, + approximate_size: estimated_size, + approximate_keys: estimated_keys, locks, })); @@ -606,6 +651,9 @@ impl Peer { self.set_has_ready(); *self.txn_ext().pessimistic_locks.write() = split_init.locks; + let control = self.split_flow_control_mut(); + control.approximate_size = split_init.approximate_size; + control.approximate_keys = split_init.approximate_keys; // The new peer is likely to become leader, send a heartbeat immediately to // reduce client query miss. self.region_heartbeat_pd(store_ctx); diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 50b612f207d..d80258f14b1 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -77,7 +77,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, impl Peer { #[inline] - pub fn region_heartbeat_pd(&self, ctx: &StoreContext) { + pub fn region_heartbeat_pd(&mut self, ctx: &StoreContext) { let task = pd::Task::RegionHeartbeat(pd::RegionHeartbeatTask { term: self.term(), region: self.region().clone(), @@ -86,8 +86,8 @@ impl Peer { pending_peers: self.collect_pending_peers(ctx), written_bytes: self.self_stat().written_bytes, written_keys: self.self_stat().written_keys, - approximate_size: None, - approximate_keys: None, + approximate_size: self.split_flow_control_mut().approximate_size(), + approximate_keys: self.split_flow_control_mut().approximate_keys(), wait_data_peers: Vec::new(), }); if let Err(e) = ctx.schedulers.pd.schedule(task) { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 7a10c6c6b16..315f8a0d8eb 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -33,12 +33,12 @@ impl AsyncReadNotifier for StoreRouter { } impl raftstore::coprocessor::StoreHandle for StoreRouter { - fn update_approximate_size(&self, _region_id: u64, _size: u64) { - // TODO + fn update_approximate_size(&self, region_id: u64, size: u64) { + let _ = self.send(region_id, PeerMsg::UpdateRegionSize { size }); } - fn update_approximate_keys(&self, _region_id: u64, _keys: u64) { - // TODO + fn update_approximate_keys(&self, region_id: u64, keys: u64) { + let _ = self.send(region_id, PeerMsg::UpdateRegionKeys { keys }); } fn ask_split( diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 353e17b0cb0..c1e5f0d37dc 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -181,6 +181,13 @@ pub enum PeerMsg { request: RequestSplit, ch: CmdResChannel, }, + UpdateRegionSize { + size: u64, + }, + UpdateRegionKeys { + keys: u64, + }, + ClearRegionSize, ForceCompactLog, TabletTrimmed { tablet_index: u64, From cc9e69b925020e58b786bb811f1bcdba05a7c09f Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 6 Jan 2023 14:20:21 +0800 Subject: [PATCH 0447/1149] raftstore-v2: store heartbeat add kv size and snap size (#14016) ref tikv/tikv#12842 1. store heartbeat should add snapshot and kv engine used size Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Xinye Tao --- components/raftstore-v2/src/batch/store.rs | 1 + components/raftstore-v2/src/operation/pd.rs | 4 +--- components/raftstore-v2/src/worker/pd/mod.rs | 6 +++++- .../raftstore-v2/src/worker/pd/store_heartbeat.rs | 11 ++++++++--- .../tests/integrations/test_pd_heartbeat.rs | 1 + 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index e25ad53df8b..621f826619b 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -551,6 +551,7 @@ impl StoreSystem { pd_client, raft_engine.clone(), tablet_registry.clone(), + snap_mgr.clone(), router.clone(), workers.pd.remote(), concurrency_manager, diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index d80258f14b1..26945a3e176 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -50,9 +50,7 @@ impl Store { stats.set_bytes_written(0); stats.set_keys_written(0); stats.set_is_busy(false); - - // stats.set_query_stats(query_stats); - + // TODO: add query stats let task = pd::Task::StoreHeartbeat { stats }; if let Err(e) = ctx.schedulers.pd.schedule(task) { error!(self.logger(), "notify pd failed"; diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index bfcf3389754..b54d088db66 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -12,7 +12,8 @@ use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{metapb, pdpb}; use pd_client::PdClient; use raftstore::store::{ - util::KeysInfoFormatter, Config, FlowStatsReporter, ReadStats, TxnExt, WriteStats, + util::KeysInfoFormatter, Config, FlowStatsReporter, ReadStats, TabletSnapManager, TxnExt, + WriteStats, }; use slog::{error, info, Logger}; use tikv_util::{ @@ -105,6 +106,7 @@ where pd_client: Arc, raft_engine: ER, tablet_registry: TabletRegistry, + snap_mgr: TabletSnapManager, router: StoreRouter, remote: Remote, @@ -139,6 +141,7 @@ where pd_client: Arc, raft_engine: ER, tablet_registry: TabletRegistry, + snap_mgr: TabletSnapManager, router: StoreRouter, remote: Remote, concurrency_manager: ConcurrencyManager, @@ -152,6 +155,7 @@ where pd_client, raft_engine, tablet_registry, + snap_mgr, router, remote, region_peers: HashMap::default(), diff --git a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs index 22bee3cbf26..ba75354c753 100644 --- a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/store_heartbeat.rs @@ -277,9 +277,14 @@ where } else { std::cmp::min(disk_cap, self.cfg.value().capacity.0) }; - // TODO: accurate snapshot size and kv engines size. - let snap_size = 0; - let kv_size = 0; + let mut kv_size = 0; + self.tablet_registry.for_each_opened_tablet(|_, cached| { + if let Some(tablet) = cached.latest() { + kv_size += tablet.get_engine_used_size().unwrap_or(0); + } + true + }); + let snap_size = self.snap_mgr.total_snap_size().unwrap(); let used_size = snap_size + kv_size + self diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs index 96bcbbccf7a..09ead81c0c2 100644 --- a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -52,6 +52,7 @@ fn test_store_heartbeat() { let stats = block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); if stats.get_start_time() > 0 { assert_ne!(stats.get_capacity(), 0); + assert_ne!(stats.get_used_size(), 0); return; } std::thread::sleep(std::time::Duration::from_millis(50)); From c71fdfc49414005c4630e357e1ab6418ddf104f7 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 6 Jan 2023 17:52:22 +0800 Subject: [PATCH 0448/1149] log-backup: limit inflight raft msg from pitr (#13976) close tikv/tikv#13977 Signed-off-by: tabokie --- .../src/worker/pd/update_max_timestamp.rs | 3 - components/sst_importer/src/sst_importer.rs | 2 +- src/import/mod.rs | 2 +- src/import/sst_service.rs | 626 ++++++++---------- 4 files changed, 295 insertions(+), 338 deletions(-) diff --git a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs b/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs index 0de3fb9a87c..178d00ebd15 100644 --- a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs +++ b/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs @@ -93,13 +93,10 @@ where } }; - #[cfg(feature = "failpoints")] let delay = (|| { fail::fail_point!("delay_update_max_ts", |_| true); false })(); - #[cfg(not(feature = "failpoints"))] - let delay = false; if delay { info!(self.logger, "[failpoint] delay update max ts for 1s"; "region_id" => region_id); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 3e06eb76899..8b6d64f483f 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -763,7 +763,7 @@ impl SstImporter { start_ts: u64, restore_ts: u64, file_buff: Arc>, - build_fn: &mut dyn FnMut(Vec, Vec), + mut build_fn: impl FnMut(Vec, Vec), ) -> Result> { let mut event_iter = EventIterator::new(file_buff.as_slice()); let mut smallest_key = None; diff --git a/src/import/mod.rs b/src/import/mod.rs index d3a522ede5e..e2fa3729e52 100644 --- a/src/import/mod.rs +++ b/src/import/mod.rs @@ -29,7 +29,7 @@ pub fn make_rpc_error(err: E) -> RpcStatus { #[macro_export] macro_rules! send_rpc_response { - ($res:ident, $sink:ident, $label:ident, $timer:ident) => {{ + ($res:expr, $sink:ident, $label:ident, $timer:ident) => {{ let res = match $res { Ok(resp) => { IMPORT_RPC_DURATION diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 8ce6f9961fb..ea52cad0095 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1,7 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - collections::HashMap, + collections::{HashMap, VecDeque}, future::Future, path::PathBuf, sync::{Arc, Mutex}, @@ -11,7 +11,7 @@ use std::{ use collections::HashSet; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; -use futures::{future::join_all, sink::SinkExt, stream::TryStreamExt, TryFutureExt}; +use futures::{sink::SinkExt, stream::TryStreamExt, TryFutureExt}; use futures_executor::{ThreadPool, ThreadPoolBuilder}; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, @@ -19,9 +19,12 @@ use grpcio::{ use kvproto::{ encryptionpb::EncryptionMethod, errorpb, - import_sstpb::{RawWriteRequest_oneof_chunk as RawChunk, WriteRequest_oneof_chunk as Chunk, *}, + import_sstpb::{ + Error as ImportPbError, ImportSst, Range, RawWriteRequest_oneof_chunk as RawChunk, SstMeta, + SwitchMode, WriteRequest_oneof_chunk as Chunk, *, + }, kvrpcpb::Context, - raft_cmdpb::*, + raft_cmdpb::{CmdType, DeleteRequest, PutRequest, RaftCmdRequest, RaftRequestHeader, Request}, }; use protobuf::Message; use raftstore::{ @@ -44,6 +47,8 @@ use txn_types::{Key, WriteRef, WriteType}; use super::make_rpc_error; use crate::{import::duplicate_detect::DuplicateDetector, server::CONFIG_ROCKSDB_GAUGE}; +const MAX_INFLIGHT_RAFT_MSGS: usize = 64; + /// ImportSstService provides tikv-server with the ability to ingest SST files. /// /// It saves the SST sent from client to a file and then sends a command to @@ -74,6 +79,161 @@ pub struct SnapshotResult { term: u64, } +struct RequestCollector { + context: Context, + max_raft_req_size: usize, + /// Retain the last ts of each key in each request. + /// This is used for write CF because resolved ts observer hates duplicated + /// key in the same request. + write_reqs: HashMap, (Request, u64)>, + /// Collector favor that simple collect all items, and it do not contains + /// duplicated key-value. This is used for default CF. + default_reqs: HashMap, Request>, + /// Size of all `Request`s. + unpacked_size: usize, + + pending_raft_reqs: Vec, +} + +impl RequestCollector { + fn new(context: Context, max_raft_req_size: usize) -> Self { + Self { + context, + max_raft_req_size, + write_reqs: HashMap::default(), + default_reqs: HashMap::default(), + unpacked_size: 0, + pending_raft_reqs: Vec::new(), + } + } + + fn accept_kv(&mut self, cf: &str, is_delete: bool, k: Vec, v: Vec) { + // Need to skip the empty key/value that could break the transaction or cause + // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. + if k.is_empty() || (!is_delete && v.is_empty()) { + return; + } + let mut req = Request::default(); + if is_delete { + let mut del = DeleteRequest::default(); + del.set_key(k); + del.set_cf(cf.to_string()); + req.set_cmd_type(CmdType::Delete); + req.set_delete(del); + } else { + if cf == CF_WRITE && !write_needs_restore(&v) { + return; + } + + let mut put = PutRequest::default(); + put.set_key(k); + put.set_value(v); + put.set_cf(cf.to_string()); + req.set_cmd_type(CmdType::Put); + req.set_put(put); + } + self.accept(cf, req); + } + + // we need to remove duplicate keys in here, since + // in https://github.com/tikv/tikv/blob/a401f78bc86f7e6ea6a55ad9f453ae31be835b55/components/resolved_ts/src/cmd.rs#L204 + // will panic if found duplicated entry during Vec. + fn accept(&mut self, cf: &str, req: Request) { + let k = key_from_request(&req); + match cf { + CF_WRITE => { + let (encoded_key, ts) = match Key::split_on_ts_for(k) { + Ok(k) => k, + Err(err) => { + warn!( + "key without ts, skipping"; + "key" => %log_wrappers::Value::key(k), + "err" => %err + ); + return; + } + }; + if self + .write_reqs + .get(encoded_key) + .map(|(_, old_ts)| *old_ts < ts.into_inner()) + .unwrap_or(true) + { + self.unpacked_size += req.compute_size() as usize; + if let Some((v, _)) = self + .write_reqs + .insert(encoded_key.to_owned(), (req, ts.into_inner())) + { + self.unpacked_size -= v.get_cached_size() as usize; + } + } + } + CF_DEFAULT => { + self.unpacked_size += req.compute_size() as usize; + if let Some(v) = self.default_reqs.insert(k.to_owned(), req) { + self.unpacked_size -= v.get_cached_size() as usize; + } + } + _ => unreachable!(), + } + + if self.unpacked_size >= self.max_raft_req_size { + self.pack_all(); + } + } + + #[cfg(test)] + fn drain_unpacked_reqs(&mut self, cf: &str) -> Vec { + let res: Vec = if cf == CF_DEFAULT { + self.default_reqs.drain().map(|(_, req)| req).collect() + } else { + self.write_reqs.drain().map(|(_, (req, _))| req).collect() + }; + for r in &res { + self.unpacked_size -= r.get_cached_size() as usize; + } + res + } + + #[inline] + fn drain_raft_reqs(&mut self, take_unpacked: bool) -> std::vec::Drain<'_, RaftCmdRequest> { + if take_unpacked { + self.pack_all(); + } + self.pending_raft_reqs.drain(..) + } + + fn pack_all(&mut self) { + if self.unpacked_size == 0 { + return; + } + let mut cmd = RaftCmdRequest::default(); + let mut header = make_request_header(self.context.clone()); + // Set the UUID of header to prevent raftstore batching our requests. + // The current `resolved_ts` observer assumes that each batch of request doesn't + // has two writes to the same key. (Even with 2 different TS). That was true + // for normal cases because the latches reject concurrency write to keys. + // However we have bypassed the latch layer :( + header.set_uuid(uuid::Uuid::new_v4().as_bytes().to_vec()); + cmd.set_header(header); + let mut reqs: Vec<_> = self.write_reqs.drain().map(|(_, (req, _))| req).collect(); + reqs.append(&mut self.default_reqs.drain().map(|(_, req)| req).collect()); + if reqs.is_empty() { + debug_assert!(false, "attempt to pack an empty request"); + return; + } + cmd.set_requests(reqs.into()); + + self.pending_raft_reqs.push(cmd); + self.unpacked_size = 0; + } + + #[inline] + fn is_empty(&self) -> bool { + self.pending_raft_reqs.is_empty() && self.unpacked_size == 0 + } +} + impl ImportSstService where E: KvEngine, @@ -281,6 +441,101 @@ where Ok(resp) } } + + async fn apply_imp( + mut req: ApplyRequest, + importer: Arc, + router: Router, + limiter: Limiter, + max_raft_size: usize, + ) -> std::result::Result, ImportPbError> { + type RaftWriteFuture = futures::channel::oneshot::Receiver; + async fn handle_raft_write(fut: RaftWriteFuture) -> std::result::Result<(), ImportPbError> { + match fut.await { + Err(e) => { + let msg = format!("failed to complete raft command: {}", e); + let mut e = ImportPbError::default(); + e.set_message(msg); + return Err(e); + } + Ok(mut r) if r.response.get_header().has_error() => { + let mut e = ImportPbError::default(); + e.set_message("failed to complete raft command".to_string()); + e.set_store_error(r.response.take_header().take_error()); + return Err(e); + } + _ => {} + } + Ok(()) + } + + let mut range: Option = None; + + let mut collector = RequestCollector::new(req.take_context(), max_raft_size * 7 / 8); + let mut metas = req.take_metas(); + let mut rules = req.take_rewrite_rules(); + // For compatibility with old requests. + if req.has_meta() { + metas.push(req.take_meta()); + rules.push(req.take_rewrite_rule()); + } + let ext_storage = importer.wrap_kms( + importer + .external_storage_or_cache(req.get_storage_backend(), req.get_storage_cache_id())?, + false, + ); + + let mut inflight_futures: VecDeque = VecDeque::new(); + + let mut tasks = metas.iter().zip(rules.iter()).peekable(); + while let Some((meta, rule)) = tasks.next() { + let buff = importer.read_from_kv_file( + meta, + rule, + ext_storage.clone(), + req.get_storage_backend(), + &limiter, + )?; + if let Some(mut r) = importer.do_apply_kv_file( + meta.get_start_key(), + meta.get_end_key(), + meta.get_start_ts(), + meta.get_restore_ts(), + buff, + |k, v| collector.accept_kv(meta.get_cf(), meta.get_is_delete(), k, v), + )? { + if let Some(range) = range.as_mut() { + range.start = range.take_start().min(r.take_start()); + range.end = range.take_end().max(r.take_end()); + } else { + range = Some(r); + } + } + + let is_last_task = tasks.peek().is_none(); + for req in collector.drain_raft_reqs(is_last_task) { + while inflight_futures.len() >= MAX_INFLIGHT_RAFT_MSGS { + handle_raft_write(inflight_futures.pop_front().unwrap()).await?; + } + let (cb, future) = paired_future_callback(); + match router.send_command(req, Callback::write(cb), RaftCmdExtraOpts::default()) { + Ok(_) => inflight_futures.push_back(future), + Err(e) => { + let msg = format!("failed to send raft command: {}", e); + let mut e = ImportPbError::default(); + e.set_message(msg); + return Err(e); + } + } + } + } + assert!(collector.is_empty()); + for fut in inflight_futures { + handle_raft_write(fut).await?; + } + + Ok(range) + } } #[macro_export] @@ -375,8 +630,7 @@ where } let task = async move { - let res = Ok(SwitchModeResponse::default()); - crate::send_rpc_response!(res, sink, label, timer); + crate::send_rpc_response!(Ok(SwitchModeResponse::default()), sink, label, timer); }; ctx.spawn(task); } @@ -448,7 +702,7 @@ where .observe(start.saturating_elapsed().as_secs_f64()); if let Err(e) = importer.remove_dir(req.get_prefix()) { - let mut import_err = kvproto::import_sstpb::Error::default(); + let mut import_err = ImportPbError::default(); import_err.set_message(format!("failed to remove directory: {}", e)); resp.set_error(import_err); } @@ -456,176 +710,37 @@ where .with_label_values(&[label]) .observe(start.saturating_elapsed().as_secs_f64()); - let resp = Ok(resp); - crate::send_rpc_response!(resp, sink, label, timer); + crate::send_rpc_response!(Ok(resp), sink, label, timer); }; self.threads.spawn(handle_task); } // Downloads KV file and performs key-rewrite then apply kv into this tikv // store. - fn apply( - &mut self, - _ctx: RpcContext<'_>, - mut req: ApplyRequest, - sink: UnarySink, - ) { + fn apply(&mut self, _ctx: RpcContext<'_>, req: ApplyRequest, sink: UnarySink) { let label = "apply"; - let timer = Instant::now_coarse(); - let importer = Arc::clone(&self.importer); + let start = Instant::now(); + let importer = self.importer.clone(); let router = self.router.clone(); let limiter = self.limiter.clone(); - let start = Instant::now(); - let raft_size = self.raft_entry_max_size; + let max_raft_size = self.raft_entry_max_size.0 as usize; let handle_task = async move { // Records how long the apply task waits to be scheduled. sst_importer::metrics::IMPORTER_APPLY_DURATION .with_label_values(&["queue"]) .observe(start.saturating_elapsed().as_secs_f64()); - let mut start_apply = Instant::now(); - let mut futs = vec![]; - let mut apply_resp = ApplyResponse::default(); - let context = req.take_context(); - let mut rules = req.take_rewrite_rules(); - let mut metas = req.take_metas(); - // For compatibility with old requests. - if req.has_meta() { - metas.push(req.take_meta()); - rules.push(req.take_rewrite_rule()); - } - let result = (|| -> Result<()> { - let mut cmd_reqs = vec![]; - let mut reqs_default = RequestCollector::from_cf(CF_DEFAULT); - let mut reqs_write = RequestCollector::from_cf(CF_WRITE); - let mut req_default_size = 0_u64; - let mut req_write_size = 0_u64; - let mut range: Option = None; - let ext_storage = { - let inner = importer.wrap_kms( - importer.external_storage_or_cache( - req.get_storage_backend(), - req.get_storage_cache_id(), - )?, - false, - ); - inner - }; - - for (i, meta) in metas.iter().enumerate() { - let (reqs, req_size) = if meta.get_cf() == CF_DEFAULT { - (&mut reqs_default, &mut req_default_size) - } else { - (&mut reqs_write, &mut req_write_size) - }; - - let mut build_req_fn = build_apply_request( - req_size, - raft_size.0, - reqs, - cmd_reqs.as_mut(), - meta.get_is_delete(), - meta.get_cf(), - context.clone(), - ); - - let buff = importer.read_from_kv_file( - meta, - &rules[i], - Arc::clone(&ext_storage), - req.get_storage_backend(), - &limiter, - )?; - let r: Option = importer.do_apply_kv_file( - meta.get_start_key(), - meta.get_end_key(), - meta.get_start_ts(), - meta.get_restore_ts(), - buff, - &mut build_req_fn, - )?; - - if let Some(mut r) = r { - range = match range { - Some(mut v) => { - let s = v.take_start().min(r.take_start()); - let e = v.take_end().max(r.take_end()); - Some(Range { - start: s, - end: e, - ..Default::default() - }) - } - None => Some(r), - }; - } - } + let mut resp = ApplyResponse::default(); - if !reqs_default.is_empty() { - let cmd = make_request(&mut reqs_default, context.clone()); - cmd_reqs.push(cmd); - IMPORTER_APPLY_BYTES.observe(req_default_size as _); - } - if !reqs_write.is_empty() { - let cmd = make_request(&mut reqs_write, context); - cmd_reqs.push(cmd); - IMPORTER_APPLY_BYTES.observe(req_write_size as _); - } - - start_apply = Instant::now(); - for cmd in cmd_reqs { - let (cb, future) = paired_future_callback(); - match router.send_command(cmd, Callback::write(cb), RaftCmdExtraOpts::default()) - { - Ok(_) => futs.push(future), - Err(e) => { - let mut import_err = kvproto::import_sstpb::Error::default(); - import_err.set_message(format!("failed to send raft command: {}", e)); - apply_resp.set_error(import_err); - } - } - } - if let Some(r) = range { - apply_resp.set_range(r); - } - Ok(()) - })(); - if let Err(e) = result { - apply_resp.set_error(e.into()); + match Self::apply_imp(req, importer, router, limiter, max_raft_size).await { + Ok(Some(r)) => resp.set_range(r), + Err(e) => resp.set_error(e), + _ => {} } - let resp = Ok(join_all(futs).await.iter().fold(apply_resp, |mut resp, x| { - match x { - Err(e) => { - let mut import_err = kvproto::import_sstpb::Error::default(); - import_err.set_message(format!("failed to complete raft command: {}", e)); - resp.set_error(import_err); - } - Ok(r) => { - if r.response.get_header().has_error() { - let mut import_err = kvproto::import_sstpb::Error::default(); - let err = r.response.get_header().get_error(); - import_err.set_message("failed to complete raft command".to_string()); - // FIXME: if there are many errors, we may lose some of them here. - import_err.set_store_error(err.clone()); - warn!("failed to apply the file to the store"; "error" => ?err); - resp.set_error(import_err); - } - } - } - resp - })); - - // Records how long the apply task waits to be scheduled. - sst_importer::metrics::IMPORTER_APPLY_DURATION - .with_label_values(&["apply"]) - .observe(start_apply.saturating_elapsed().as_secs_f64()); - sst_importer::metrics::IMPORTER_APPLY_DURATION - .with_label_values(&["finish"]) - .observe(start.saturating_elapsed().as_secs_f64()); debug!("finished apply kv file with {:?}", resp); - crate::send_rpc_response!(resp, sink, label, timer); + crate::send_rpc_response!(Ok(resp), sink, label, start); }; self.block_threads.spawn_ok(handle_task); } @@ -678,8 +793,7 @@ where }, Err(e) => resp.set_error(e.into()), } - let resp = Ok(resp); - crate::send_rpc_response!(resp, sink, label, timer); + crate::send_rpc_response!(Ok(resp), sink, label, timer); }; self.threads.spawn(handle_task); @@ -848,8 +962,12 @@ where }); let ctx_task = async move { - let res = Ok(SetDownloadSpeedLimitResponse::default()); - crate::send_rpc_response!(res, sink, label, timer); + crate::send_rpc_response!( + Ok(SetDownloadSpeedLimitResponse::default()), + sink, + label, + timer + ); }; ctx.spawn(ctx_task); @@ -958,70 +1076,6 @@ fn pb_error_inc(type_: &str, e: &errorpb::Error) { IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); } -enum RequestCollector { - /// Retain the last ts of each key in each request. - /// This is used for write CF because resolved ts observer hates duplicated - /// key in the same request. - RetainLastTs(HashMap, (Request, u64)>), - /// Collector favor that simple collect all items, and it do not contains - /// duplicated key-value. This is used for default CF. - KeepAll(HashMap, Request>), -} - -impl RequestCollector { - fn from_cf(cf: &str) -> Self { - match cf { - CF_DEFAULT | "" => Self::KeepAll(Default::default()), - CF_WRITE => Self::RetainLastTs(Default::default()), - _ => { - warn!("unknown cf name, using default request collector"; "cf" => %cf); - Self::RetainLastTs(Default::default()) - } - } - } - - fn accept(&mut self, req: Request) { - let k = key_from_request(&req); - match self { - RequestCollector::RetainLastTs(ref mut reqs) => { - let (encoded_key, ts) = match Key::split_on_ts_for(k) { - Ok(k) => k, - Err(err) => { - warn!("key without ts, skipping"; "key" => %log_wrappers::Value::key(k), "err" => %err); - return; - } - }; - if reqs - .get(encoded_key) - .map(|(_, old_ts)| *old_ts < ts.into_inner()) - .unwrap_or(true) - { - reqs.insert(encoded_key.to_owned(), (req, ts.into_inner())); - } - } - RequestCollector::KeepAll(ref mut reqs) => { - reqs.insert(k.to_owned(), req); - } - } - } - - fn drain(&mut self) -> Vec { - match self { - RequestCollector::RetainLastTs(ref mut reqs) => { - reqs.drain().map(|(_, (req, _))| req).collect() - } - RequestCollector::KeepAll(ref mut reqs) => reqs.drain().map(|(_, req)| req).collect(), - } - } - - fn is_empty(&self) -> bool { - match self { - RequestCollector::RetainLastTs(reqs) => reqs.is_empty(), - RequestCollector::KeepAll(reqs) => reqs.is_empty(), - } - } -} - fn key_from_request(req: &Request) -> &[u8] { if req.has_put() { return req.get_put().get_key(); @@ -1029,8 +1083,7 @@ fn key_from_request(req: &Request) -> &[u8] { if req.has_delete() { return req.get_delete().get_key(); } - warn!("trying to extract key from request is neither put nor delete."); - b"" + panic!("trying to extract key from request is neither put nor delete.") } fn make_request_header(mut context: Context) -> RaftRequestHeader { @@ -1042,77 +1095,6 @@ fn make_request_header(mut context: Context) -> RaftRequestHeader { header } -fn make_request(reqs: &mut RequestCollector, context: Context) -> RaftCmdRequest { - let mut cmd = RaftCmdRequest::default(); - let mut header = make_request_header(context); - // Set the UUID of header to prevent raftstore batching our requests. - // The current `resolved_ts` observer assumes that each batch of request doesn't - // has two writes to the same key. (Even with 2 different TS). That was true - // for normal cases because the latches reject concurrency write to keys. - // However we have bypassed the latch layer :( - header.set_uuid(uuid::Uuid::new_v4().as_bytes().to_vec()); - cmd.set_header(header); - cmd.set_requests(reqs.drain().into()); - cmd -} - -// we need to remove duplicate keys in here, since -// in https://github.com/tikv/tikv/blob/a401f78bc86f7e6ea6a55ad9f453ae31be835b55/components/resolved_ts/src/cmd.rs#L204 -// will panic if found duplicated entry during Vec. -fn build_apply_request<'a, 'b>( - req_size: &'a mut u64, - raft_size: u64, - reqs: &'a mut RequestCollector, - cmd_reqs: &'a mut Vec, - is_delete: bool, - cf: &'b str, - context: Context, -) -> Box, Vec) + 'b> -where - 'a: 'b, -{ - // use callback to collect kv data. - Box::new(move |k: Vec, v: Vec| { - // Need to skip the empty key/value that could break the transaction or cause - // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. - if k.is_empty() || (!is_delete && v.is_empty()) { - return; - } - - let mut req = Request::default(); - if is_delete { - let mut del = DeleteRequest::default(); - del.set_key(k); - del.set_cf(cf.to_string()); - req.set_cmd_type(CmdType::Delete); - req.set_delete(del); - } else { - if cf == CF_WRITE && !write_needs_restore(&v) { - return; - } - - let mut put = PutRequest::default(); - put.set_key(k); - put.set_value(v); - put.set_cf(cf.to_string()); - req.set_cmd_type(CmdType::Put); - req.set_put(put); - } - - // When the request size get grow to max request size, - // build the request and add it to a batch. - if *req_size + req.compute_size() as u64 > raft_size * 7 / 8 { - IMPORTER_APPLY_BYTES.observe(*req_size as _); - *req_size = 0; - let cmd = make_request(reqs, context.clone()); - cmd_reqs.push(cmd); - } - - *req_size += req.compute_size() as u64; - reqs.accept(req); - }) -} - fn write_needs_restore(write: &[u8]) -> bool { let w = WriteRef::parse(write); match w { @@ -1146,9 +1128,7 @@ mod test { use kvproto::{kvrpcpb::Context, raft_cmdpb::*}; use txn_types::{Key, TimeStamp, Write, WriteType}; - use crate::import::sst_service::{ - build_apply_request, key_from_request, make_request, RequestCollector, - }; + use crate::import::sst_service::{key_from_request, RequestCollector}; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1213,30 +1193,14 @@ mod test { } fn run_case(c: &Case) { - let mut cmds = vec![]; - let mut reqs = RequestCollector::from_cf(c.cf); - let mut req_size = 0_u64; - - let mut builder = build_apply_request( - &mut req_size, - 1024, - &mut reqs, - &mut cmds, - c.is_delete, - c.cf, - Context::new(), - ); + let mut collector = RequestCollector::new(Context::new(), 1024); for (k, v) in c.mutations.clone() { - builder(k, v); - } - drop(builder); - if !reqs.is_empty() { - let cmd = make_request(&mut reqs, Context::new()); - cmds.push(cmd); + collector.accept_kv(c.cf, c.is_delete, k, v); } + let reqs = collector.drain_raft_reqs(true); - let mut req1: HashMap<_, _> = cmds + let mut req1: HashMap<_, _> = reqs .into_iter() .flat_map(|mut x| x.take_requests().into_iter()) .map(|req| { @@ -1318,8 +1282,7 @@ mod test { #[test] fn test_request_collector_with_write_cf() { - let mut request_collector = RequestCollector::from_cf(CF_WRITE); - assert_eq!(request_collector.is_empty(), true); + let mut request_collector = RequestCollector::new(Context::new(), 102400); let reqs = vec![ write_req(b"foo", WriteType::Put, 40, 39), write_req(b"aar", WriteType::Put, 38, 37), @@ -1333,23 +1296,21 @@ mod test { ]; for req in reqs { - request_collector.accept(req); + request_collector.accept(CF_WRITE, req); } - assert_eq!(request_collector.is_empty(), false); - let mut reqs = request_collector.drain(); + let mut reqs: Vec<_> = request_collector.drain_unpacked_reqs(CF_WRITE); reqs.sort_by(|r1, r2| { let k1 = key_from_request(r1); let k2 = key_from_request(r2); k1.cmp(k2) }); assert_eq!(reqs, reqs_result); - assert_eq!(request_collector.is_empty(), true); + assert!(request_collector.is_empty()); } #[test] fn test_request_collector_with_default_cf() { - let mut request_collector = RequestCollector::from_cf(CF_DEFAULT); - assert_eq!(request_collector.is_empty(), true); + let mut request_collector = RequestCollector::new(Context::new(), 102400); let reqs = vec![ default_req(b"foo", b"", 39), default_req(b"zzz", b"", 40), @@ -1363,10 +1324,9 @@ mod test { ]; for req in reqs { - request_collector.accept(req); + request_collector.accept(CF_DEFAULT, req); } - assert_eq!(request_collector.is_empty(), false); - let mut reqs = request_collector.drain(); + let mut reqs: Vec<_> = request_collector.drain_unpacked_reqs(CF_DEFAULT); reqs.sort_by(|r1, r2| { let k1 = key_from_request(r1); let (k1, ts1) = Key::split_on_ts_for(k1).unwrap(); @@ -1376,6 +1336,6 @@ mod test { k1.cmp(k2).then(ts1.cmp(&ts2)) }); assert_eq!(reqs, reqs_result); - assert_eq!(request_collector.is_empty(), true); + assert!(request_collector.is_empty()); } } From 71efe9e6af802761bec9fcc0e468035cf3adb3b7 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 6 Jan 2023 18:16:21 +0800 Subject: [PATCH 0449/1149] raftstore-v2: adaptive apply (#14020) ref tikv/tikv#12842 Make apply adaptive to reduce high tail latency. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/fsm/apply.rs | 12 ++- .../operation/command/admin/compact_log.rs | 99 +++++++++++++------ .../operation/command/admin/conf_change.rs | 11 ++- .../src/operation/command/admin/split.rs | 4 +- .../raftstore-v2/src/operation/command/mod.rs | 88 ++++++++++++++++- components/raftstore-v2/src/operation/life.rs | 8 +- components/raftstore-v2/src/operation/mod.rs | 6 +- .../src/operation/ready/apply_trace.rs | 17 ++-- .../raftstore-v2/src/operation/ready/mod.rs | 4 +- .../src/operation/ready/snapshot.rs | 28 +++--- components/raftstore-v2/src/raft/apply.rs | 21 +++- components/raftstore-v2/src/raft/storage.rs | 5 +- 12 files changed, 225 insertions(+), 78 deletions(-) diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index b81d31329cb..1544a703c6d 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -10,7 +10,7 @@ use crossbeam::channel::TryRecvError; use engine_traits::{FlushState, KvEngine, TabletRegistry}; use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; -use raftstore::store::ReadTask; +use raftstore::store::{Config, ReadTask}; use slog::Logger; use tikv_util::{ mpsc::future::{self, Receiver, Sender, WakePolicy}, @@ -58,6 +58,7 @@ pub struct ApplyFsm { impl ApplyFsm { pub fn new( + cfg: &Config, peer: metapb::Peer, region_state: RegionLocalState, res_reporter: R, @@ -70,6 +71,7 @@ impl ApplyFsm { ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); let apply = Apply::new( + cfg, peer, region_state, res_reporter, @@ -100,6 +102,7 @@ impl ApplyFsm { res = self.receiver.next().fuse() => res, _ = timeout.fuse() => None, }; + self.apply.on_start_apply(); let mut task = match res { Some(r) => r, None => { @@ -116,10 +119,10 @@ impl ApplyFsm { ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, ApplyTask::Snapshot(snap_task) => self.apply.schedule_gen_snapshot(snap_task), ApplyTask::UnsafeWrite(raw_write) => self.apply.apply_unsafe_write(raw_write), - ApplyTask::ManualFlush => self.apply.on_manual_flush(), + ApplyTask::ManualFlush => self.apply.on_manual_flush().await, } - // TODO: yield after some time. + self.apply.maybe_flush().await; // Perhaps spin sometime? match self.receiver.try_recv() { @@ -128,7 +131,8 @@ impl ApplyFsm { Err(TryRecvError::Disconnected) => return, } } - self.apply.flush(); + let written_bytes = self.apply.flush(); + self.apply.maybe_reschedule(written_bytes).await; } } } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 7127cd45306..39cf02de775 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -23,7 +23,7 @@ use raftstore::{ Result, }; use slog::{debug, error, info}; -use tikv_util::box_err; +use tikv_util::{box_err, log::SlogFormat}; use crate::{ batch::StoreContext, @@ -303,6 +303,35 @@ impl Peer { } } + #[inline] + pub fn record_tombstone_tablet_for_destroy( + &mut self, + ctx: &StoreContext, + task: &mut WriteTask, + ) { + let compact_log_context = self.compact_log_context_mut(); + assert!( + compact_log_context.tombstone_tablets_wait_index.is_empty(), + "{} all tombstone should be cleared before being destroyed.", + SlogFormat(&self.logger) + ); + let tablet = match self.tablet() { + Some(tablet) => tablet.clone(), + None => return, + }; + let region_id = self.region_id(); + let applied_index = self.entry_storage().applied_index(); + let sched = ctx.schedulers.tablet_gc.clone(); + let _ = sched.schedule(tablet_gc::Task::prepare_destroy( + tablet, + self.region_id(), + applied_index, + )); + task.persisted_cbs.push(Box::new(move || { + let _ = sched.schedule(tablet_gc::Task::destroy(region_id, applied_index)); + })); + } + pub fn on_apply_res_compact_log( &mut self, store_ctx: &mut StoreContext, @@ -342,8 +371,17 @@ impl Peer { self.set_has_extra_write(); // All logs < perssited_apply will be deleted, so should check with +1. - if old_truncated + 1 < self.storage().apply_trace().persisted_apply_index() { - self.compact_log_from_engine(store_ctx); + if old_truncated + 1 < self.storage().apply_trace().persisted_apply_index() + && let Some(index) = self.compact_log_index() { + // Raft Engine doesn't care about first index. + if let Err(e) = + store_ctx + .engine + .gc(self.region_id(), 0, index, self.state_changes_mut()) + { + error!(self.logger, "failed to compact raft logs"; "err" => ?e); + } + // Extra write set right above. } let context = self.compact_log_context_mut(); @@ -354,38 +392,44 @@ impl Peer { (context.approximate_log_size as f64 * (remain_cnt as f64 / total_cnt as f64)) as u64; } - /// Called when apply index is persisted. There are two different situation: - /// - /// Generally, additional writes are triggered to persist apply index. In - /// this case task is `Some`. But after applying snapshot, the apply - /// index is persisted ahead of time. In this case task is `None`. + /// Called when apply index is persisted. #[inline] pub fn on_advance_persisted_apply_index( &mut self, store_ctx: &mut StoreContext, old_persisted: u64, - task: Option<&mut WriteTask>, + task: &mut WriteTask, ) { let new_persisted = self.storage().apply_trace().persisted_apply_index(); if old_persisted < new_persisted { let region_id = self.region_id(); // TODO: batch it. + // TODO: avoid allocation if there is nothing to delete. if let Err(e) = store_ctx.engine.delete_all_but_one_states_before( region_id, new_persisted, - self.state_changes_mut(), + task.extra_write + .ensure_v2(|| self.entry_storage().raft_engine().log_batch(0)), ) { error!(self.logger, "failed to delete raft states"; "err" => ?e); - } else { - self.set_has_extra_write(); } // If it's snapshot, logs are gc already. - if task.is_some() && old_persisted < self.entry_storage().truncated_index() + 1 { - self.compact_log_from_engine(store_ctx); + if !task.has_snapshot + && old_persisted < self.entry_storage().truncated_index() + 1 + && let Some(index) = self.compact_log_index() { + let batch = task.extra_write.ensure_v2(|| self.entry_storage().raft_engine().log_batch(0)); + // Raft Engine doesn't care about first index. + if let Err(e) = + store_ctx + .engine + .gc(self.region_id(), 0, index, batch) + { + error!(self.logger, "failed to compact raft logs"; "err" => ?e); + } } if self.remove_tombstone_tablets(new_persisted) { let sched = store_ctx.schedulers.tablet_gc.clone(); - if let Some(task) = task { + if !task.has_snapshot { task.persisted_cbs.push(Box::new(move || { let _ = sched.schedule(tablet_gc::Task::destroy(region_id, new_persisted)); })); @@ -397,28 +441,19 @@ impl Peer { } } - fn compact_log_from_engine(&mut self, store_ctx: &mut StoreContext) { + fn compact_log_index(&mut self) -> Option { let truncated = self.entry_storage().truncated_index() + 1; let persisted_applied = self.storage().apply_trace().persisted_apply_index(); let compact_index = std::cmp::min(truncated, persisted_applied); if compact_index == RAFT_INIT_LOG_INDEX + 1 { // There is no logs at RAFT_INIT_LOG_INDEX, nothing to delete. - return; - } - // Raft Engine doesn't care about first index. - if let Err(e) = - store_ctx - .engine - .gc(self.region_id(), 0, compact_index, self.state_changes_mut()) - { - error!(self.logger, "failed to compact raft logs"; "err" => ?e); - } else { - // TODO: make this debug when stable. - info!(self.logger, "compact log"; - "index" => compact_index, - "apply_trace" => ?self.storage().apply_trace(), - "truncated" => ?self.entry_storage().apply_state()); - self.set_has_extra_write(); + return None; } + // TODO: make this debug when stable. + info!(self.logger, "compact log"; + "index" => compact_index, + "apply_trace" => ?self.storage().apply_trace(), + "truncated" => ?self.entry_storage().apply_state()); + Some(compact_index) } } diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 6c041a551fe..8c9771b0201 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -9,7 +9,7 @@ use std::time::Instant; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ metapb::{self, PeerRole}, raft_cmdpb::{AdminRequest, AdminResponse, ChangePeerRequest, RaftCmdRequest}, @@ -146,7 +146,7 @@ impl Peer { let remove_self = conf_change.region_state.get_state() == PeerState::Tombstone; self.storage_mut() - .set_region_state(conf_change.region_state); + .set_region_state(conf_change.region_state.clone()); if self.is_leader() { info!( self.logger, @@ -189,7 +189,14 @@ impl Peer { self.raft_group().raft.state, ); if remove_self { + // When self is destroyed, all metas will be cleaned in `start_destroy`. self.mark_for_destroy(None); + } else { + let region_id = self.region_id(); + self.state_changes_mut() + .put_region_state(region_id, conf_change.index, &conf_change.region_state) + .unwrap(); + self.set_has_extra_write(); } } } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index f63f1f2ae17..d01b1371338 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -731,7 +731,7 @@ mod test { raft_cmdpb::{BatchSplitRequest, SplitRequest}, raft_serverpb::{PeerState, RegionLocalState}, }; - use raftstore::store::cmd_resp::new_error; + use raftstore::store::{cmd_resp::new_error, Config}; use slog::o; use tempfile::TempDir; use tikv_util::{ @@ -872,6 +872,7 @@ mod test { let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); let mut apply = Apply::new( + &Config::default(), region .get_peers() .iter() @@ -1059,6 +1060,7 @@ mod test { // Split will create checkpoint tablet, so if there are some writes before // split, they should be flushed immediately. apply.apply_put(CF_DEFAULT, 50, b"k04", b"v4").unwrap(); + apply.apply_flow_control_mut().set_need_flush(true); assert!(!WriteBatch::is_empty(apply.write_batch.as_ref().unwrap())); splits.mut_requests().clear(); splits diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 439d2136d76..a6ab227d402 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -16,7 +16,7 @@ //! - Applied result are sent back to peer fsm, and update memory state in //! `on_apply_res`. -use std::mem; +use std::{mem, time::Duration}; use engine_traits::{KvEngine, RaftEngine, WriteBatch, WriteOptions}; use kvproto::raft_cmdpb::{ @@ -35,7 +35,7 @@ use raftstore::{ local_metrics::RaftMetrics, metrics::APPLY_TASK_WAIT_TIME_HISTOGRAM, msg::ErrorCallback, - util, WriteCallback, + util, Config, WriteCallback, }, Error, Result, }; @@ -111,6 +111,7 @@ impl Peer { let logger = self.logger.clone(); let read_scheduler = self.storage().read_scheduler(); let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( + &store_ctx.cfg, self.peer().clone(), region_state, mailbox, @@ -268,6 +269,8 @@ impl Peer { if !self.serving() { return; } + // TODO: remove following log once stable. + info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res); // It must just applied a snapshot. if apply_res.applied_index < self.entry_storage().first_index() { // Ignore admin command side effects, otherwise it may split incomplete @@ -334,7 +337,38 @@ impl Peer { } } +#[derive(Debug)] +pub struct ApplyFlowControl { + timer: Instant, + last_check_keys: u64, + need_flush: bool, + yield_time: Duration, + yield_written_bytes: u64, +} + +impl ApplyFlowControl { + pub fn new(cfg: &Config) -> Self { + ApplyFlowControl { + timer: Instant::now_coarse(), + last_check_keys: 0, + need_flush: false, + yield_time: cfg.apply_yield_duration.0, + yield_written_bytes: cfg.apply_yield_write_size.0, + } + } + + #[cfg(test)] + pub fn set_need_flush(&mut self, need_flush: bool) { + self.need_flush = need_flush; + } +} + impl Apply { + #[inline] + pub fn on_start_apply(&mut self) { + self.apply_flow_control_mut().timer = Instant::now_coarse(); + } + #[inline] fn should_skip(&self, off: usize, index: u64) -> bool { let log_recovery = self.log_recovery(); @@ -370,13 +404,15 @@ impl Apply { } } } + self.apply_flow_control_mut().need_flush = true; } - pub fn on_manual_flush(&mut self) { - self.flush(); + pub async fn on_manual_flush(&mut self) { + let written_bytes = self.flush(); if let Err(e) = self.tablet().flush_cfs(&[], false) { warn!(self.logger, "failed to flush: {:?}", e); } + self.maybe_reschedule(written_bytes).await } #[inline] @@ -414,6 +450,7 @@ impl Apply { } // Flush may be triggerred in the middle, so always update the index and term. self.set_apply_progress(e.index, e.term); + self.apply_flow_control_mut().need_flush = true; } } @@ -544,10 +581,49 @@ impl Apply { } } + fn should_reschedule(&self, written_bytes: u64) -> bool { + let control = self.apply_flow_control(); + written_bytes >= control.yield_written_bytes + || control.timer.saturating_elapsed() >= control.yield_time + } + + pub async fn maybe_reschedule(&mut self, written_bytes: u64) { + if self.should_reschedule(written_bytes) { + yatp::task::future::reschedule().await; + self.apply_flow_control_mut().timer = Instant::now_coarse(); + } + } + + /// Check whether it needs to flush. + /// + /// We always batch as much inputs as possible, flush will only be triggered + /// when it has been processing too long. + pub async fn maybe_flush(&mut self) { + let buffer_keys = self.metrics.written_keys; + let control = self.apply_flow_control_mut(); + if buffer_keys >= control.last_check_keys + 128 { + // Reschedule by write size was designed to avoid too many deletes impacts + // performance so it doesn't need pricise control. If checking bytes here may + // make the batch too small and hurt performance. + if self.should_reschedule(0) { + let written_bytes = self.flush(); + self.maybe_reschedule(written_bytes).await; + } else { + self.apply_flow_control_mut().last_check_keys = self.metrics.written_keys; + } + } + } + #[inline] - pub fn flush(&mut self) { + pub fn flush(&mut self) -> u64 { // TODO: maybe we should check whether there is anything to flush. let (index, term) = self.apply_progress(); + let control = self.apply_flow_control_mut(); + control.last_check_keys = 0; + if !control.need_flush { + return 0; + } + control.need_flush = false; let flush_state = self.flush_state().clone(); if let Some(wb) = &mut self.write_batch && !wb.is_empty() { let mut write_opt = WriteOptions::default(); @@ -578,6 +654,8 @@ impl Apply { apply_res.admin_result = self.take_admin_result().into_boxed_slice(); apply_res.modifications = *self.modifications_mut(); apply_res.metrics = mem::take(&mut self.metrics); + let written_bytes = apply_res.metrics.written_bytes; self.res_reporter().report(apply_res); + written_bytes } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index f312162d1e5..f82fb1e8386 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -293,7 +293,11 @@ impl Peer { /// /// After destroy is finished, `finish_destroy` should be called to clean up /// memory states. - pub fn start_destroy(&mut self, write_task: &mut WriteTask) { + pub fn start_destroy( + &mut self, + ctx: &mut StoreContext, + write_task: &mut WriteTask, + ) { if self.postponed_destroy() { return; } @@ -311,7 +315,7 @@ impl Peer { let applied_index = self.entry_storage().applied_index(); lb.put_region_state(region_id, applied_index, ®ion_state) .unwrap(); - self.set_has_extra_write(); + self.record_tombstone_tablet_for_destroy(ctx, write_task); self.destroy_progress_mut().start(); } diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index dc245c24384..807f425e998 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -7,9 +7,9 @@ mod query; mod ready; pub use command::{ - AdminCmdResult, CommittedEntries, CompactLogContext, ProposalControl, RequestSplit, - SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, - SplitFlowControl, SPLIT_PREFIX, + AdminCmdResult, ApplyFlowControl, CommittedEntries, CompactLogContext, ProposalControl, + RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, + SimpleWriteReqEncoder, SplitFlowControl, SPLIT_PREFIX, }; pub use life::DestroyProgress; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 5ff9a27dee0..e5b1c169c5b 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -301,19 +301,24 @@ impl ApplyTrace { None } - pub fn reset_snapshot(&mut self, index: u64) { + pub fn restore_snapshot(&mut self, index: u64) { for pr in self.data_cfs.iter_mut() { - pr.flushed = index; pr.last_modified = index; } - self.admin.flushed = index; + self.admin.last_modified = index; + // Snapshot is a special case that KVs are not flushed yet, so all flushed + // state should not be changed. But persisted_applied is updated whenever an + // asynchronous write is triggered. So it can lead to a special case that + // persisted_applied < admin.flushed. It seems no harm ATM though. self.persisted_applied = index; self.try_persist = false; } - #[inline] - pub fn reset_should_persist(&mut self) { - self.try_persist = false; + pub fn on_applied_snapshot(&mut self, index: u64) { + for pr in self.data_cfs.iter_mut() { + pr.flushed = index; + } + self.admin.flushed = index; } #[inline] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3ac500b7f49..2fdc228ea2f 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -449,7 +449,7 @@ impl Peer { self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); - self.on_advance_persisted_apply_index(ctx, prev_persisted, Some(&mut write_task)); + self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); if !ready.persisted_messages().is_empty() { write_task.messages = ready @@ -459,7 +459,7 @@ impl Peer { .collect(); } if !self.serving() { - self.start_destroy(&mut write_task); + self.start_destroy(ctx, &mut write_task); ctx.coprocessor_host.on_region_changed( self.region(), RegionChangeEvent::Destroy, diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 1919ce269a6..04b6ed7e12b 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -215,8 +215,8 @@ impl Peer { let path = ctx.tablet_registry.tablet_path(region_id, snapshot_index); assert!( path.exists(), - "{:?} {} not exists", - self.logger.list(), + "{} {} not exists", + SlogFormat(&self.logger), path.display() ); let tablet = ctx @@ -224,15 +224,14 @@ impl Peer { .tablet_factory() .open_tablet(tablet_ctx, &path) .unwrap_or_else(|e| { - panic!( - "{:?} failed to load tablet at {}: {:?}", - self.logger.list(), - path.display(), - e + slog_panic!( + self.logger, + "failed to load tablet"; + "path" => path.display(), + "error" => ?e ); }); - let prev_persisted_applied = self.storage().apply_trace().persisted_apply_index(); self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(snapshot_index); let read_tablet = SharedReadTablet::new(tablet.clone()); @@ -258,7 +257,6 @@ impl Peer { info!(self.logger, "init split with snapshot finished"); self.post_split_init(ctx, init); } - self.on_advance_persisted_apply_index(ctx, prev_persisted_applied, None); self.schedule_apply_fsm(ctx); } } @@ -511,7 +509,7 @@ impl Storage { let index = entry.truncated_index(); entry.set_applied_term(term); entry.apply_state_mut().set_applied_index(index); - self.apply_trace_mut().reset_snapshot(index); + self.apply_trace_mut().on_applied_snapshot(index); } pub fn apply_snapshot( @@ -552,10 +550,10 @@ impl Storage { raft_engine .clean(region.get_id(), 0, self.entry_storage().raft_state(), wb) .unwrap_or_else(|e| { - panic!( - "{:?} failed to clean up region: {:?}", - self.logger().list(), - e + slog_panic!( + self.logger(), + "failed to clean up region"; + "error" => ?e ) }); self.entry_storage_mut().clear(); @@ -578,7 +576,7 @@ impl Storage { entry_storage.set_truncated_term(last_term); entry_storage.set_last_term(last_term); - self.apply_trace_mut().reset_should_persist(); + self.apply_trace_mut().restore_snapshot(last_index); self.set_ever_persisted(); let lb = task .extra_write diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 2407d1ab3fe..7a5b03120b1 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -6,13 +6,13 @@ use engine_traits::{FlushState, KvEngine, TabletRegistry, WriteBatch, DATA_CFS_L use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{ fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, - ReadTask, + Config, ReadTask, }; use slog::Logger; use tikv_util::{log::SlogFormat, worker::Scheduler}; use crate::{ - operation::{AdminCmdResult, DataTrace}, + operation::{AdminCmdResult, ApplyFlowControl, DataTrace}, router::CmdResChannel, }; @@ -28,6 +28,8 @@ pub struct Apply { callbacks: Vec<(Vec, RaftCmdResponse)>, + flow_control: ApplyFlowControl, + /// A flag indicates whether the peer is destroyed by applying admin /// command. tombstone: bool, @@ -58,6 +60,7 @@ pub struct Apply { impl Apply { #[inline] pub fn new( + cfg: &Config, peer: metapb::Peer, region_state: RegionLocalState, res_reporter: R, @@ -79,6 +82,7 @@ impl Apply { tablet: remote_tablet.latest().unwrap().clone(), write_batch: None, callbacks: vec![], + flow_control: ApplyFlowControl::new(cfg), tombstone: false, applied_term, applied_index: flush_state.applied_index(), @@ -158,8 +162,8 @@ impl Apply { pub fn set_tablet(&mut self, tablet: EK) { assert!( self.write_batch.as_ref().map_or(true, |wb| wb.is_empty()), - "{:?}", - self.logger.list() + "{} setting tablet while still have dirty write batch", + SlogFormat(&self.logger) ); self.write_batch.take(); self.tablet = tablet; @@ -222,4 +226,13 @@ impl Apply { pub fn log_recovery(&self) -> &Option> { &self.log_recovery } + + #[inline] + pub fn apply_flow_control_mut(&mut self) -> &mut ApplyFlowControl { + &mut self.flow_control + } + + pub fn apply_flow_control(&self) -> &ApplyFlowControl { + &self.flow_control + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index aca8f0fafce..1d1f53f9c53 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -332,8 +332,8 @@ mod tests { }; use raft::{Error as RaftError, StorageError}; use raftstore::store::{ - util::new_empty_snapshot, write_to_db_for_test, AsyncReadNotifier, FetchedLogs, GenSnapRes, - ReadRunner, TabletSnapKey, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, + util::new_empty_snapshot, write_to_db_for_test, AsyncReadNotifier, Config, FetchedLogs, + GenSnapRes, ReadRunner, TabletSnapKey, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; use slog::o; @@ -500,6 +500,7 @@ mod tests { state.set_region(region.clone()); // setup peer applyer let mut apply = Apply::new( + &Config::default(), region.get_peers()[0].clone(), state, router, From 8e6e348505e7f1f7b5e023c00b30f90e8d1b4084 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 10 Jan 2023 14:28:23 +0800 Subject: [PATCH 0450/1149] raftstore-v2: add waterfall metrics (#14029) ref tikv/tikv#12842 - add water metrics - fix potential panic when destroying a peer - fix incorrect store size Signed-off-by: Jay Lee --- components/engine_rocks/src/misc.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 14 +- .../operation/command/admin/compact_log.rs | 10 +- .../raftstore-v2/src/operation/command/mod.rs | 71 ++++++++- components/raftstore-v2/src/operation/life.rs | 2 + .../src/operation/ready/apply_trace.rs | 5 + .../raftstore-v2/src/operation/ready/mod.rs | 144 +++++++++++++++++- components/raftstore-v2/src/raft/apply.rs | 15 +- .../src/router/response_channel.rs | 36 +++-- components/raftstore/src/lib.rs | 1 + .../raftstore/src/store/async_io/write.rs | 6 +- components/raftstore/src/store/fsm/apply.rs | 28 ++-- components/raftstore/src/store/fsm/peer.rs | 19 +-- .../raftstore/src/store/local_metrics.rs | 4 +- components/raftstore/src/store/msg.rs | 44 ++++-- components/raftstore/src/store/peer.rs | 24 +-- 16 files changed, 350 insertions(+), 77 deletions(-) diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 55546869272..e339facaac4 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -2,7 +2,7 @@ use engine_traits::{ CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, - Range, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, ALL_CFS, + Range, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, }; use rocksdb::Range as RocksRange; use tikv_util::{box_try, keybuilder::KeyBuilder}; @@ -258,7 +258,7 @@ impl MiscExt for RocksEngine { fn get_engine_used_size(&self) -> Result { let mut used_size: u64 = 0; - for cf in ALL_CFS { + for cf in self.cf_names() { let handle = util::get_cf_handle(self.as_inner(), cf)?; used_size += util::get_engine_cf_used_size(self.as_inner(), handle); } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 621f826619b..6183778c369 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -36,7 +36,7 @@ use tikv_util::{ config::{Tracker, VersionTrack}, log::SlogFormat, sys::SysQuota, - time::Instant as TiInstant, + time::{duration_to_sec, Instant as TiInstant}, timer::SteadyTimer, worker::{LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, @@ -122,6 +122,7 @@ struct StorePoller { /// Buffers to hold in-coming messages. store_msg_buf: Vec, peer_msg_buf: Vec, + timer: tikv_util::time::Instant, /// These fields controls the timing of flushing messages generated by /// FSMs. last_flush_time: TiInstant, @@ -135,6 +136,7 @@ impl StorePoller { cfg_tracker, store_msg_buf: Vec::new(), peer_msg_buf: Vec::new(), + timer: tikv_util::time::Instant::now(), last_flush_time: TiInstant::now(), need_flush_events: false, } @@ -185,6 +187,8 @@ impl PollHandler Option { @@ -234,7 +238,13 @@ impl PollHandler>>]) {} + fn end(&mut self, _batch: &mut [Option>>]) { + let dur = self.timer.saturating_elapsed(); + self.poll_ctx + .raft_metrics + .process_ready + .observe(duration_to_sec(dur)); + } fn pause(&mut self) { if self.poll_ctx.trans.need_flush() { diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 39cf02de775..a4983b28a47 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -303,15 +303,21 @@ impl Peer { } } + pub fn has_pending_tombstone_tablets(&self) -> bool { + !self + .compact_log_context() + .tombstone_tablets_wait_index + .is_empty() + } + #[inline] pub fn record_tombstone_tablet_for_destroy( &mut self, ctx: &StoreContext, task: &mut WriteTask, ) { - let compact_log_context = self.compact_log_context_mut(); assert!( - compact_log_context.tombstone_tablets_wait_index.is_empty(), + !self.has_pending_tombstone_tablets(), "{} all tombstone should be cleared before being destroyed.", SlogFormat(&self.logger) ); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index a6ab227d402..047fe026ffe 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -18,7 +18,7 @@ use std::{mem, time::Duration}; -use engine_traits::{KvEngine, RaftEngine, WriteBatch, WriteOptions}; +use engine_traits::{KvEngine, PerfContext, RaftEngine, WriteBatch, WriteOptions}; use kvproto::raft_cmdpb::{ AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, }; @@ -32,8 +32,8 @@ use raftstore::{ apply::{self, APPLY_WB_SHRINK_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP}, Proposal, }, - local_metrics::RaftMetrics, - metrics::APPLY_TASK_WAIT_TIME_HISTOGRAM, + local_metrics::{RaftMetrics, TimeTracker}, + metrics::{APPLY_TASK_WAIT_TIME_HISTOGRAM, APPLY_TIME_HISTOGRAM}, msg::ErrorCallback, util, Config, WriteCallback, }, @@ -221,12 +221,35 @@ impl Peer { } proposal.must_pass_epoch_check = self.applied_to_current_term(); proposal.propose_time = Some(*ctx.current_time.get_or_insert_with(monotonic_raw_now)); + self.report_batch_wait_duration(ctx, &proposal.cb); self.proposals_mut().push(proposal); self.set_has_ready(); } + fn report_batch_wait_duration( + &self, + ctx: &mut StoreContext, + ch: &Vec, + ) { + if !ctx.raft_metrics.waterfall_metrics || ch.is_empty() { + return; + } + let now = std::time::Instant::now(); + for c in ch { + for tracker in c.write_trackers() { + tracker.observe(now, &ctx.raft_metrics.wf_batch_wait, |t| { + &mut t.metrics.wf_batch_wait_nanos + }); + } + } + } + #[inline] - pub fn schedule_apply_committed_entries(&mut self, committed_entries: Vec) { + pub fn schedule_apply_committed_entries( + &mut self, + ctx: &mut StoreContext, + committed_entries: Vec, + ) { if committed_entries.is_empty() { return; } @@ -246,6 +269,7 @@ impl Peer { } else { entry_and_proposals = committed_entries.into_iter().map(|e| (e, vec![])).collect(); } + self.report_store_time_duration(ctx, &mut entry_and_proposals); // Unlike v1, v2 doesn't need to persist commit index and commit term. The // point of persist commit index/term of raft apply state is to recover commit // index when the writes to raft engine is lost but writes to kv engine is @@ -265,6 +289,26 @@ impl Peer { .send(ApplyTask::CommittedEntries(apply)); } + #[inline] + fn report_store_time_duration( + &mut self, + ctx: &mut StoreContext, + entry_and_proposals: &mut [(Entry, Vec)], + ) { + let now = std::time::Instant::now(); + for (_, chs) in entry_and_proposals { + for tracker in chs.write_trackers_mut() { + tracker.observe(now, &ctx.raft_metrics.store_time, |t| { + t.metrics.write_instant = Some(now); + &mut t.metrics.store_time_nanos + }); + if let TimeTracker::Instant(t) = tracker { + *t = now; + } + } + } + } + pub fn on_apply_res(&mut self, ctx: &mut StoreContext, apply_res: ApplyRes) { if !self.serving() { return; @@ -625,9 +669,11 @@ impl Apply { } control.need_flush = false; let flush_state = self.flush_state().clone(); - if let Some(wb) = &mut self.write_batch && !wb.is_empty() { + if let Some(wb) = &self.write_batch && !wb.is_empty() { + self.perf_context().start_observe(); let mut write_opt = WriteOptions::default(); write_opt.set_disable_wal(true); + let wb = self.write_batch.as_mut().unwrap(); if let Err(e) = wb.write_callback_opt(&write_opt, || { flush_state.set_applied_index(index); }) { @@ -640,11 +686,26 @@ impl Apply { } else { self.write_batch.take(); } + let tokens: Vec<_> = self + .callbacks_mut() + .iter() + .flat_map(|(v, _)| { + v.write_trackers() + .flat_map(|t| t.as_tracker_token().cloned()) + }) + .collect(); + self.perf_context().report_metrics(&tokens); } let callbacks = self.callbacks_mut(); + let now = std::time::Instant::now(); + let apply_time = APPLY_TIME_HISTOGRAM.local(); for (ch, resp) in callbacks.drain(..) { + for tracker in ch.write_trackers() { + tracker.observe(now, &apply_time, |t| &mut t.metrics.apply_time_nanos); + } ch.set_result(resp); } + apply_time.flush(); if callbacks.capacity() > SHRINK_PENDING_CMD_QUEUE_CAP { callbacks.shrink_to(SHRINK_PENDING_CMD_QUEUE_CAP); } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index f82fb1e8386..88646f06b59 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -286,6 +286,8 @@ impl Peer { let entry_storage = self.storage().entry_storage(); // TODO: check actual split index instead of commit index. entry_storage.applied_index() != entry_storage.commit_index() + // Wait for critical commands like split. + || self.has_pending_tombstone_tablets() } /// Start the destroy progress. It will write `Tombstone` state diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index e5b1c169c5b..5b88a6ba94d 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -473,6 +473,11 @@ impl Storage { } pub fn record_apply_trace(&mut self, write_task: &mut WriteTask) { + let trace = self.apply_trace(); + // Maybe tablet index can be different? + if trace.persisted_applied > trace.admin.flushed { + return; + } let region_id = self.region().get_id(); let raft_engine = self.entry_storage().raft_engine(); let tablet_index = self.tablet_index(); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 2fdc228ea2f..3f559feff8b 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -30,7 +30,10 @@ use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, - store::{needs_evict_entry_cache, util, FetchedLogs, ReadProgress, Transport, WriteTask}, + store::{ + needs_evict_entry_cache, util, FetchedLogs, ReadProgress, Transport, WriteCallback, + WriteTask, + }, }; use slog::{debug, error, info, trace, warn}; use tikv_util::{ @@ -205,10 +208,14 @@ impl Peer { self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); } self.insert_peer_cache(msg.take_from_peer()); + let pre_committed_index = self.raft_group().raft.raft_log.committed; if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) } else if let Err(e) = self.raft_group_mut().step(msg.take_message()) { error!(self.logger, "raft step error"; "err" => ?e); + } else { + let committed_index = self.raft_group().raft.raft_log.committed; + self.report_commit_log_duration(ctx, pre_committed_index, committed_index); } self.set_has_ready(); @@ -317,6 +324,56 @@ impl Peer { } } + /// Send a message. + /// + /// The message is pushed into the send buffer, it may not be sent out until + /// transport is flushed explicitly. + fn send_raft_message_on_leader( + &mut self, + ctx: &mut StoreContext, + msg: RaftMessage, + ) { + let message = msg.get_message(); + if message.get_msg_type() == MessageType::MsgAppend + && let Some(fe) = message.get_entries().first() + && let Some(le) = message.get_entries().last() + { + let last = (le.get_term(), le.get_index()); + let first = (fe.get_term(), fe.get_index()); + let now = Instant::now(); + let queue = self.proposals_mut().queue_mut(); + // Proposals are batched up, so it will liely hit after one or two steps. + for p in queue.iter_mut().rev() { + if p.sent { + break; + } + let cur = (p.term, p.index); + if cur > last { + continue; + } + if cur < first { + break; + } + for tracker in p.cb.write_trackers() { + tracker.observe(now, &ctx.raft_metrics.wf_send_proposal, |t| { + &mut t.metrics.wf_send_proposal_nanos + }); + } + p.sent = true; + } + } + if message.get_msg_type() == MessageType::MsgTimeoutNow { + // After a leader transfer procedure is triggered, the lease for + // the old leader may be expired earlier than usual, since a new leader + // may be elected and the old leader doesn't step down due to + // network partition from the new leader. + // For lease safety during leader transfer, transit `leader_lease` + // to suspect. + self.leader_lease_mut().suspect(monotonic_raw_now()); + } + self.send_raft_message(ctx, msg) + } + fn handle_raft_committed_entries( &mut self, ctx: &mut crate::batch::StoreContext, @@ -357,7 +414,7 @@ impl Peer { // Compact all cached entries instead of half evict. self.entry_storage_mut().evict_entry_cache(false); } - self.schedule_apply_committed_entries(committed_entries); + self.schedule_apply_committed_entries(ctx, committed_entries); if self.is_leader() && commit_to_current_term && !self.proposal_control().has_uncommitted_admin() @@ -423,7 +480,7 @@ impl Peer { debug_assert!(self.is_leader()); for msg in ready.take_messages() { if let Some(msg) = self.build_raft_message(msg) { - self.send_raft_message(ctx, msg); + self.send_raft_message_on_leader(ctx, msg); } } } @@ -445,6 +502,7 @@ impl Peer { let ready_number = ready.number(); let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); + self.report_send_to_queue_duration(ctx, &mut write_task, ready.entries()); let prev_persisted = self.storage().apply_trace().persisted_apply_index(); self.merge_state_changes_to(&mut write_task); self.storage_mut() @@ -519,8 +577,13 @@ impl Peer { } let persisted_number = self.async_writer.persisted_number(); + let pre_persisted_index = self.persisted_index(); + let pre_committed_index = self.raft_group().raft.raft_log.committed; self.raft_group_mut().on_persist_ready(persisted_number); let persisted_index = self.persisted_index(); + let committed_index = self.raft_group().raft.raft_log.committed; + self.report_persist_log_duration(ctx, pre_persisted_index, persisted_index); + self.report_commit_log_duration(ctx, pre_committed_index, committed_index); // The apply snapshot process order would be: // - Get the snapshot from the ready // - Wait for async writer to load this tablet @@ -543,6 +606,81 @@ impl Peer { } } + #[inline] + fn report_persist_log_duration( + &self, + ctx: &mut StoreContext, + from: u64, + to: u64, + ) { + if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() || from >= to { + return; + } + let now = Instant::now(); + for i in from + 1..to { + if let Some((term, trackers)) = self.proposals().find_trackers(i) { + if self.entry_storage().term(i).map_or(false, |t| t == term) { + for tracker in trackers { + tracker.observe(now, &ctx.raft_metrics.wf_persist_log, |t| { + &mut t.metrics.wf_persist_log_nanos + }); + } + } + } + } + } + + #[inline] + fn report_commit_log_duration(&self, ctx: &mut StoreContext, from: u64, to: u64) { + if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() || from >= to { + return; + } + let now = Instant::now(); + for i in from + 1..to { + if let Some((term, trackers)) = self.proposals().find_trackers(i) { + if self.entry_storage().term(i).map_or(false, |t| t == term) { + let commit_persisted = i <= self.persisted_index(); + let hist = if commit_persisted { + &ctx.raft_metrics.wf_commit_log + } else { + &ctx.raft_metrics.wf_commit_not_persist_log + }; + for tracker in trackers { + tracker.observe(now, hist, |t| { + t.metrics.commit_not_persisted = !commit_persisted; + &mut t.metrics.wf_commit_log_nanos + }); + } + } + } + } + } + + #[inline] + fn report_send_to_queue_duration( + &mut self, + ctx: &mut StoreContext, + write_task: &mut WriteTask, + entries: &[raft::eraftpb::Entry], + ) { + if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() { + return; + } + let now = Instant::now(); + for entry in entries { + if let Some((term, trackers)) = self.proposals().find_trackers(entry.index) { + if entry.term == term { + for tracker in trackers { + write_task.trackers.push(*tracker); + tracker.observe(now, &ctx.raft_metrics.wf_send_to_queue, |t| { + &mut t.metrics.wf_send_to_queue_nanos + }); + } + } + } + } + } + #[cfg(feature = "testexport")] pub fn on_wait_flush(&mut self, ch: crate::router::FlushChannel) { self.async_writer.subscirbe_flush(ch); diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 7a5b03120b1..6d1faa98cbf 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -2,7 +2,9 @@ use std::{mem, sync::Arc}; -use engine_traits::{FlushState, KvEngine, TabletRegistry, WriteBatch, DATA_CFS_LEN}; +use engine_traits::{ + FlushState, KvEngine, PerfContextKind, TabletRegistry, WriteBatch, DATA_CFS_LEN, +}; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use raftstore::store::{ fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, @@ -20,6 +22,7 @@ use crate::{ pub struct Apply { peer: metapb::Peer, tablet: EK, + perf_context: EK::PerfContext, pub write_batch: Option, /// A buffer for encoding key. pub key_buffer: Vec, @@ -77,9 +80,12 @@ impl Apply { assert_ne!(applied_term, 0, "{}", SlogFormat(&logger)); let applied_index = flush_state.applied_index(); assert_ne!(applied_index, 0, "{}", SlogFormat(&logger)); + let tablet = remote_tablet.latest().unwrap().clone(); + let perf_context = tablet.get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply); Apply { peer, - tablet: remote_tablet.latest().unwrap().clone(), + tablet, + perf_context, write_batch: None, callbacks: vec![], flow_control: ApplyFlowControl::new(cfg), @@ -174,6 +180,11 @@ impl Apply { &self.tablet } + #[inline] + pub fn perf_context(&mut self) -> &mut EK::PerfContext { + &mut self.perf_context + } + #[inline] pub fn peer(&self) -> &metapb::Peer { &self.peer diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index 2cb75acccfc..eeeb13f6555 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -30,8 +30,7 @@ use raftstore::store::{ local_metrics::TimeTracker, msg::ErrorCallback, region_meta::RegionMeta, ReadCallback, WriteCallback, }; -use smallvec::SmallVec; -use tracker::TrackerToken; +use tracker::{TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; /// A struct allows to watch and notify specific events. /// @@ -54,6 +53,7 @@ struct EventCore { before_set: UnsafeCell>>, // Waker can be changed, need to use `AtomicWaker` to guarantee no data race. waker: AtomicWaker, + tracker: UnsafeCell, } unsafe impl Send for EventCore {} @@ -244,16 +244,19 @@ impl BaseChannel { /// Creates a pair of channel and subscriber. #[inline] pub fn pair() -> (Self, BaseSubscriber) { - Self::with_mask(u32::MAX) + let tracker_token = tracker::get_tls_tracker_token(); + Self::with_mask(u32::MAX, TimeTracker::Tracker(tracker_token)) } - fn with_mask(mask: u32) -> (Self, BaseSubscriber) { + #[inline] + fn with_mask(mask: u32, tracker: TimeTracker) -> (Self, BaseSubscriber) { let core: Arc> = Arc::new(EventCore { event: AtomicU64::new(0), res: UnsafeCell::new(None), event_mask: mask, before_set: UnsafeCell::new(None), waker: AtomicWaker::new(), + tracker: UnsafeCell::new(tracker), }); (Self { core: core.clone() }, BaseSubscriber { core }) } @@ -449,7 +452,17 @@ impl CmdResChannelBuilder { #[inline] pub fn build(self) -> (CmdResChannel, CmdResSubscriber) { - let (c, s) = CmdResChannel::with_mask(self.event_mask); + let tracker_token = tracker::get_tls_tracker_token(); + let now = std::time::Instant::now(); + let tracker = if tracker_token == INVALID_TRACKER_TOKEN { + TimeTracker::Instant(now) + } else { + GLOBAL_TRACKERS.with_tracker(tracker_token, |tracker| { + tracker.metrics.write_instant = Some(now); + }); + TimeTracker::Tracker(tracker_token) + }; + let (c, s) = CmdResChannel::with_mask(self.event_mask, tracker); if let Some(f) = self.before_set { unsafe { *c.core.before_set.get() = Some(f); @@ -493,12 +506,15 @@ impl WriteCallback for CmdResChannel { self.core.notify_event(Self::COMMITTED_EVENT); } - fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { - None + type TimeTrackerListRef<'a> = &'a [TimeTracker]; + #[inline] + fn write_trackers(&self) -> Self::TimeTrackerListRef<'_> { + std::slice::from_ref(unsafe { &*self.core.tracker.get() }) } - fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { - None + type TimeTrackerListMut<'a> = &'a mut [TimeTracker]; + fn write_trackers_mut(&mut self) -> Self::TimeTrackerListMut<'_> { + std::slice::from_mut(unsafe { &mut *self.core.tracker.get() }) } // TODO: support executing hooks inside setting result. @@ -577,7 +593,7 @@ impl ReadCallback for QueryResChannel { } fn read_tracker(&self) -> Option<&TrackerToken> { - None + unsafe { (*self.core.tracker.get()).as_tracker_token() } } } diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 6104ae7b7cf..1db5f79d226 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -8,6 +8,7 @@ #![feature(hash_drain_filter)] #![feature(let_chains)] #![feature(assert_matches)] +#![feature(type_alias_impl_trait)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 817ff576f67..7016d0ab606 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -718,7 +718,11 @@ where .batch .tasks .iter() - .flat_map(|task| task.trackers.iter().flat_map(|t| t.as_tracker_token())) + .flat_map(|task| { + task.trackers + .iter() + .flat_map(|t| t.as_tracker_token().cloned()) + }) .collect(); self.perf_context.report_metrics(&trackers); write_raft_time = duration_to_sec(now.saturating_elapsed()); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index ec2d7bf72a8..cab6ae0ffe8 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -582,8 +582,7 @@ where .cb_batch .iter() .flat_map(|(cb, _)| cb.write_trackers()) - .flat_map(|trackers| trackers.iter().map(|t| t.as_tracker_token())) - .flatten() + .flat_map(|trackers| trackers.as_tracker_token().cloned()) .collect(); self.perf_context.report_metrics(&trackers); self.sync_log_hint = false; @@ -620,7 +619,7 @@ where // Invoke callbacks let now = std::time::Instant::now(); for (cb, resp) in cb_batch.drain(..) { - for tracker in cb.write_trackers().iter().flat_map(|v| *v) { + for tracker in cb.write_trackers() { tracker.observe(now, &self.apply_time, |t| &mut t.metrics.apply_time_nanos); } cb.invoke_with_response(resp); @@ -3333,15 +3332,13 @@ impl Apply { pub fn on_schedule(&mut self, metrics: &RaftMetrics) { let now = std::time::Instant::now(); for cb in &mut self.cbs { - if let Some(trackers) = cb.cb.write_trackers_mut() { - for tracker in trackers { - tracker.observe(now, &metrics.store_time, |t| { - t.metrics.write_instant = Some(now); - &mut t.metrics.store_time_nanos - }); - if let TimeTracker::Instant(t) = tracker { - *t = now; - } + for tracker in cb.cb.write_trackers_mut() { + tracker.observe(now, &metrics.store_time, |t| { + t.metrics.write_instant = Some(now); + &mut t.metrics.store_time_nanos + }); + if let TimeTracker::Instant(t) = tracker { + *t = now; } } } @@ -3410,6 +3407,7 @@ pub struct Proposal { /// lease. pub propose_time: Option, pub must_pass_epoch_check: bool, + pub sent: bool, } impl Proposal { @@ -3421,6 +3419,7 @@ impl Proposal { propose_time: None, must_pass_epoch_check: false, is_conf_change: false, + sent: false, } } } @@ -4170,9 +4169,9 @@ where .cbs .iter() .flat_map(|p| p.cb.write_trackers()) - .flat_map(|ts| ts.iter().flat_map(|t| t.as_tracker_token())) + .flat_map(|ts| ts.as_tracker_token()) { - GLOBAL_TRACKERS.with_tracker(tracker, |t| { + GLOBAL_TRACKERS.with_tracker(*tracker, |t| { t.metrics.apply_wait_nanos = apply_wait.as_nanos() as u64; }); } @@ -5082,6 +5081,7 @@ mod tests { cb, propose_time: None, must_pass_epoch_check: false, + sent: true, } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index abd8fd84771..e302ea6588a 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -524,13 +524,14 @@ where })) }; - let tokens: SmallVec<[TimeTracker; 4]> = cbs + let trackers: SmallVec<[TimeTracker; 4]> = cbs .iter_mut() - .filter_map(|cb| cb.write_trackers().map(|t| t[0])) + .flat_map(|cb| cb.write_trackers()) + .cloned() .collect(); - let mut cb = Callback::write_ext( - Box::new(move |resp| { + let cb = Callback::Write { + cb: Box::new(move |resp| { for cb in cbs { let mut cmd_resp = RaftCmdResponse::default(); cmd_resp.set_header(resp.response.get_header().clone()); @@ -539,12 +540,8 @@ where }), proposed_cb, committed_cb, - ); - - if let Some(trackers) = cb.write_trackers_mut() { - *trackers = tokens; - } - + trackers, + }; return Some((req, cb)); } None @@ -5245,7 +5242,7 @@ where if self.ctx.raft_metrics.waterfall_metrics { let now = Instant::now(); - for tracker in cb.write_trackers().iter().flat_map(|v| *v) { + for tracker in cb.write_trackers() { tracker.observe(now, &self.ctx.raft_metrics.wf_batch_wait, |t| { &mut t.metrics.wf_batch_wait_nanos }); diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 5cfbb645612..c1db17f8cae 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -214,9 +214,9 @@ pub enum TimeTracker { } impl TimeTracker { - pub fn as_tracker_token(&self) -> Option { + pub fn as_tracker_token(&self) -> Option<&TrackerToken> { match self { - TimeTracker::Tracker(tt) => Some(*tt), + TimeTracker::Tracker(tt) => Some(tt), TimeTracker::Instant(_) => None, } } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 08b0e9367dc..e3fc8530d76 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -225,8 +225,16 @@ pub trait WriteCallback: ErrorCallback { fn notify_proposed(&mut self); fn notify_committed(&mut self); - fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>>; - fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>>; + + type TimeTrackerListRef<'a>: IntoIterator + where + Self: 'a; + fn write_trackers(&self) -> Self::TimeTrackerListRef<'_>; + + type TimeTrackerListMut<'a>: IntoIterator + where + Self: 'a; + fn write_trackers_mut(&mut self) -> Self::TimeTrackerListMut<'_>; fn set_result(self, result: Self::Response); } @@ -276,16 +284,24 @@ impl WriteCallback for Callback { self.invoke_committed(); } + type TimeTrackerListRef<'a> = impl IntoIterator; #[inline] - fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { - let Callback::Write { trackers, .. } = self else { return None; }; - Some(trackers) + fn write_trackers(&self) -> Self::TimeTrackerListRef<'_> { + let trackers = match self { + Callback::Write { trackers, .. } => Some(trackers), + _ => None, + }; + trackers.into_iter().flatten() } + type TimeTrackerListMut<'a> = impl IntoIterator; #[inline] - fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { - let Callback::Write { trackers, .. } = self else { return None; }; - Some(trackers) + fn write_trackers_mut(&mut self) -> Self::TimeTrackerListMut<'_> { + let trackers = match self { + Callback::Write { trackers, .. } => Some(trackers), + _ => None, + }; + trackers.into_iter().flatten() } #[inline] @@ -296,7 +312,7 @@ impl WriteCallback for Callback { impl WriteCallback for Vec where - C: WriteCallback, + C: WriteCallback + 'static, C::Response: Clone, { type Response = C::Response; @@ -315,14 +331,16 @@ where } } + type TimeTrackerListRef<'a> = impl Iterator + 'a; #[inline] - fn write_trackers(&self) -> Option<&SmallVec<[TimeTracker; 4]>> { - None + fn write_trackers(&self) -> Self::TimeTrackerListRef<'_> { + self.iter().flat_map(|c| c.write_trackers()) } + type TimeTrackerListMut<'a> = impl Iterator + 'a; #[inline] - fn write_trackers_mut(&mut self) -> Option<&mut SmallVec<[TimeTracker; 4]>> { - None + fn write_trackers_mut(&mut self) -> Self::TimeTrackerListMut<'_> { + self.iter_mut().flat_map(|c| c.write_trackers_mut()) } #[inline] diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 9384a4940c7..347f62dd945 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -70,7 +70,7 @@ use uuid::Uuid; use super::{ cmd_resp, - local_metrics::{RaftMetrics, TimeTracker}, + local_metrics::RaftMetrics, metrics::*, peer_storage::{write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage}, read_queue::{ReadIndexQueue, ReadIndexRequest}, @@ -141,16 +141,16 @@ impl ProposalQueue { /// Find the trackers of given index. /// Caller should check if term is matched before using trackers. - fn find_trackers(&self, index: u64) -> Option<(u64, &SmallVec<[TimeTracker; 4]>)> { + pub fn find_trackers(&self, index: u64) -> Option<(u64, C::TimeTrackerListRef<'_>)> { self.queue .binary_search_by_key(&index, |p: &Proposal<_>| p.index) .ok() - .and_then(|i| { - self.queue[i] - .cb - .write_trackers() - .map(|ts| (self.queue[i].term, ts)) - }) + .map(|i| (self.queue[i].term, self.queue[i].cb.write_trackers())) + } + + #[inline] + pub fn queue_mut(&mut self) -> &mut VecDeque> { + &mut self.queue } pub fn find_propose_time(&self, term: u64, index: u64) -> Option { @@ -1825,7 +1825,7 @@ where { let proposal = &self.proposals.queue[idx]; if term == proposal.term { - for tracker in proposal.cb.write_trackers().iter().flat_map(|v| v.iter()) { + for tracker in proposal.cb.write_trackers() { tracker.observe(std_now, &ctx.raft_metrics.wf_send_proposal, |t| { &mut t.metrics.wf_send_proposal_nanos }); @@ -2767,8 +2767,8 @@ where for entry in ready.entries() { if let Some((term, times)) = self.proposals.find_trackers(entry.get_index()) { if entry.term == term { - trackers.extend_from_slice(times); for tracker in times { + trackers.push(*tracker); tracker.observe(now, &ctx.raft_metrics.wf_send_to_queue, |t| { &mut t.metrics.wf_send_to_queue_nanos }); @@ -3687,6 +3687,7 @@ where cb, propose_time: None, must_pass_epoch_check: has_applied_to_current_term, + sent: false, }; if let Some(cmd_type) = req_admin_cmd_type { self.cmd_epoch_checker @@ -4018,6 +4019,7 @@ where cb: Callback::None, propose_time: Some(now), must_pass_epoch_check: false, + sent: false, }; self.post_propose(poll_ctx, p); } @@ -5941,6 +5943,7 @@ mod tests { cb: Callback::write(Box::new(|_| {})), propose_time: Some(u64_to_timespec(index)), must_pass_epoch_check: false, + sent: false, }); }; for index in 1..=100 { @@ -6014,6 +6017,7 @@ mod tests { is_conf_change: false, propose_time: None, must_pass_epoch_check: false, + sent: false, }); } for (index, term) in entries { From 528e06dcc4ffa5d099b60fbe93972732d141e014 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Thu, 12 Jan 2023 16:22:34 +0800 Subject: [PATCH 0451/1149] util: Fix incorrect memory capacity (#14034) * util: Fix incorrect memory capacity Signed-off-by: Wish * Fix lints Signed-off-by: Wish * Check capacity with /proc/meminfo Signed-off-by: Wish Signed-off-by: Wish --- components/tikv_util/src/sys/mod.rs | 4 +- src/server/service/diagnostics/sys.rs | 61 ++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index 49e6812b81f..797da2aea54 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -22,7 +22,7 @@ use mnt::get_mount; use sysinfo::RefreshKind; pub use sysinfo::{CpuExt, DiskExt, NetworkExt, ProcessExt, SystemExt}; -use crate::config::{ReadableSize, KIB}; +use crate::config::ReadableSize; pub const HIGH_PRI: i32 = -1; const CPU_CORES_QUOTA_ENV_VAR_KEY: &str = "TIKV_CPU_CORES_QUOTA"; @@ -93,7 +93,7 @@ impl SysQuota { fn sysinfo_memory_limit_in_bytes() -> u64 { let system = sysinfo::System::new_with_specifics(RefreshKind::new().with_memory()); - system.total_memory() * KIB + system.total_memory() } } diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 6e9585ab2c9..8a84eaf6293 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -3,10 +3,7 @@ use std::{collections::HashMap, string::ToString}; use kvproto::diagnosticspb::{ServerInfoItem, ServerInfoPair}; -use tikv_util::{ - config::KIB, - sys::{cpu_time::LinuxStyleCpuTime, ioload, SysQuota, *}, -}; +use tikv_util::sys::{cpu_time::LinuxStyleCpuTime, ioload, SysQuota, *}; use walkdir::WalkDir; use crate::server::service::diagnostics::SYS_INFO; @@ -129,12 +126,12 @@ fn cpu_load_info(prev_cpu: CpuTimeSnapshot, collector: &mut Vec) fn mem_load_info(collector: &mut Vec) { let mut system = SYS_INFO.lock().unwrap(); system.refresh_memory(); - let total_memory = system.total_memory() * KIB; - let used_memory = system.used_memory() * KIB; - let free_memory = system.free_memory() * KIB; - let total_swap = system.total_swap() * KIB; - let used_swap = system.used_swap() * KIB; - let free_swap = system.free_swap() * KIB; + let total_memory = system.total_memory(); + let used_memory = system.used_memory(); + let free_memory = system.free_memory(); + let total_swap = system.total_swap(); + let used_swap = system.used_swap(); + let free_swap = system.free_swap(); drop(system); let used_memory_pct = (used_memory as f64) / (total_memory as f64); let free_memory_pct = (free_memory as f64) / (total_memory as f64); @@ -683,6 +680,50 @@ mod tests { assert_ne!(processes.get_pairs().len(), 0); } + #[test] + #[cfg(target_os = "linux")] + fn test_memory() { + let mut mem_total_kb: u64 = 0; + { + use std::io::BufRead; + + let f = std::fs::File::open("/proc/meminfo").unwrap(); + let reader = std::io::BufReader::new(f); + for line in reader.lines() { + let l = line.unwrap(); + let mut parts = l.split_whitespace(); + if parts.next().unwrap() != "MemTotal:" { + continue; + } + mem_total_kb = parts.next().unwrap().parse().unwrap(); + let unit = parts.next().unwrap(); + assert_eq!(unit, "kB"); + } + } + assert!(mem_total_kb > 0); + + let mut collector = vec![]; + hardware_info(&mut collector); + + let mut memory_checked = false; + + 'outer: for item in &collector { + if item.get_tp() != "memory" { + continue; + } + for pair in item.get_pairs() { + if pair.get_key() != "capacity" { + continue; + } + assert_eq!(pair.get_value(), (mem_total_kb * 1024).to_string()); + memory_checked = true; + break 'outer; + } + } + + assert!(memory_checked); + } + #[test] fn test_hardware_info() { let mut collector = vec![]; From e1467c56a445d36a8fd8642f9467a0b18fbb8203 Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 12 Jan 2023 17:35:52 +0800 Subject: [PATCH 0452/1149] pd_client: fix the kvproto compatibility for global config (#14041) * hotfix kvproto for global config Signed-off-by: husharp * make format happy Signed-off-by: husharp Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/pd_client/src/client.rs | 4 ++-- components/pd_client/src/client_v2.rs | 6 +++--- components/pd_client/src/lib.rs | 2 +- components/test_pd/src/mocker/mod.rs | 8 ++++---- tests/failpoints/cases/test_pd_client.rs | 15 +++++---------- tests/failpoints/cases/test_pd_client_legacy.rs | 16 +++++----------- 7 files changed, 21 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a3c9ced013..c98cd025fad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2726,7 +2726,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#ae3b086b09afbb26cebcd4c1fe14b82bbe1f0796" +source = "git+https://github.com/pingcap/kvproto.git#a14c44ef44b378d15adb5baad8402b838f031b51" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 9f466a6a351..5bccdcfacea 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -286,10 +286,10 @@ impl fmt::Debug for RpcClient { const LEADER_CHANGE_RETRY: usize = 10; impl PdClient for RpcClient { - fn load_global_config(&self, list: Vec) -> PdFuture> { + fn load_global_config(&self, config_path: String) -> PdFuture> { use kvproto::pdpb::LoadGlobalConfigRequest; let mut req = LoadGlobalConfigRequest::new(); - req.set_names(list.into()); + req.set_config_path(config_path); let executor = |client: &Client, req| match client .inner .rl() diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index 3d17a94a494..b42d8fb3ddb 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -542,7 +542,7 @@ pub trait PdClient { fn fetch_cluster_id(&mut self) -> Result; - fn load_global_config(&mut self, list: Vec) -> PdFuture>; + fn load_global_config(&mut self, config_path: String) -> PdFuture>; fn watch_global_config( &mut self, @@ -791,10 +791,10 @@ impl PdClient for RpcClient { Ok((tx, resp_rx)) } - fn load_global_config(&mut self, list: Vec) -> PdFuture> { + fn load_global_config(&mut self, config_path: String) -> PdFuture> { use kvproto::pdpb::LoadGlobalConfigRequest; let mut req = LoadGlobalConfigRequest::new(); - req.set_names(list.into()); + req.set_config_path(config_path); let mut raw_client = self.raw_client.clone(); Box::pin(async move { raw_client.wait_for_ready().await?; diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 8674130c799..46a3e6924db 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -209,7 +209,7 @@ pub const INVALID_ID: u64 = 0; /// all the time. pub trait PdClient: Send + Sync { /// Load a list of GlobalConfig - fn load_global_config(&self, _list: Vec) -> PdFuture> { + fn load_global_config(&self, _config_path: String) -> PdFuture> { unimplemented!(); } diff --git a/components/test_pd/src/mocker/mod.rs b/components/test_pd/src/mocker/mod.rs index d904c95d4a8..84c2508d4ea 100644 --- a/components/test_pd/src/mocker/mod.rs +++ b/components/test_pd/src/mocker/mod.rs @@ -27,13 +27,13 @@ pub type Result = result::Result; pub trait PdMocker { fn load_global_config( &self, - req: &LoadGlobalConfigRequest, + _req: &LoadGlobalConfigRequest, ) -> Option> { let mut send = vec![]; - for r in req.get_names() { + for r in 0..10 { let mut i = GlobalConfigItem::default(); - i.set_name(format!("/global/config/{}", r.clone())); - i.set_value(r.clone()); + i.set_name(format!("/global/config/{}", r)); + i.set_value(r.to_string()); send.push(i); } let mut res = LoadGlobalConfigResponse::default(); diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index ca0a473a8b7..7dd767d19c9 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -69,7 +69,7 @@ fn test_pd_client_deadlock() { request!(client => block_on(get_gc_safe_point())), request!(client => block_on(get_store_and_stats(0))), request!(client => get_operator(0)), - request!(client => load_global_config(vec![])), + request!(client => load_global_config(String::default())), ]; for (name, func) in test_funcs { @@ -101,14 +101,7 @@ fn test_pd_client_deadlock() { fn test_load_global_config() { let (mut _server, mut client) = new_test_server_and_client(ReadableDuration::millis(100)); let res = futures::executor::block_on(async move { - client - .load_global_config( - ["abc", "123", "xyz"] - .iter() - .map(|x| x.to_string()) - .collect::>(), - ) - .await + client.load_global_config("global".to_string()).await }); for (k, v) in res.unwrap() { assert_eq!(k, format!("/global/config/{}", v)) @@ -293,7 +286,9 @@ fn test_retry() { }); test_retry_success(&mut client, |c| block_on(c.get_gc_safe_point())); test_retry_success(&mut client, |c| c.get_operator(0)); - test_retry_success(&mut client, |c| block_on(c.load_global_config(vec![]))); + test_retry_success(&mut client, |c| { + block_on(c.load_global_config(String::default())) + }); fail::remove(pd_client_v2_timeout_fp); fail::remove(pd_client_v2_backoff_fp); diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index eb22ac29e45..172db8ac09e 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -73,7 +73,7 @@ fn test_pd_client_deadlock() { request!(client => block_on(get_store_stats_async(0))), request!(client => get_operator(0)), request!(client => block_on(get_tso())), - request!(client => load_global_config(vec![])), + request!(client => load_global_config(String::default())), ]; for (name, func) in test_funcs { @@ -108,16 +108,10 @@ fn test_pd_client_deadlock() { #[test] fn test_load_global_config() { let (mut _server, client) = new_test_server_and_client(ReadableDuration::millis(100)); - let res = futures::executor::block_on(async move { - client - .load_global_config( - ["abc", "123", "xyz"] - .iter() - .map(|x| x.to_string()) - .collect::>(), - ) - .await - }); + let res = + futures::executor::block_on( + async move { client.load_global_config("global".into()).await }, + ); for (k, v) in res.unwrap() { assert_eq!(k, format!("/global/config/{}", v)) } From 2daa168f13831ab9cfd653ad2971eccbb3f38a22 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 13 Jan 2023 09:03:46 +0800 Subject: [PATCH 0453/1149] *: add resource group for the read path (#14001) ref tikv/tikv#13730 Signed-off-by: glorv --- Cargo.lock | 25 +- Cargo.toml | 3 + components/resource_control/Cargo.toml | 20 + components/resource_control/src/future.rs | 46 ++ components/resource_control/src/lib.rs | 18 + .../resource_control/src/resource_group.rs | 482 ++++++++++++++++++ components/server/Cargo.toml | 1 + components/server/src/server.rs | 21 + components/server/src/server2.rs | 21 + .../tikv_util/src/yatp_pool/future_pool.rs | 23 +- components/tikv_util/src/yatp_pool/mod.rs | 16 +- src/config/mod.rs | 5 + src/coprocessor/endpoint.rs | 12 + src/read_pool.rs | 95 ++-- src/storage/mod.rs | 32 ++ 15 files changed, 783 insertions(+), 37 deletions(-) create mode 100644 components/resource_control/Cargo.toml create mode 100644 components/resource_control/src/future.rs create mode 100644 components/resource_control/src/lib.rs create mode 100644 components/resource_control/src/resource_group.rs diff --git a/Cargo.lock b/Cargo.lock index c98cd025fad..0b7ca52725c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4656,6 +4656,25 @@ dependencies = [ "txn_types", ] +[[package]] +name = "resource_control" +version = "0.0.1" +dependencies = [ + "byteorder", + "crossbeam-skiplist", + "dashmap", + "kvproto", + "lazy_static", + "online_config", + "pin-project", + "prometheus", + "serde", + "slog", + "slog-global", + "tikv_util", + "yatp", +] + [[package]] name = "resource_metering" version = "0.0.1" @@ -5209,6 +5228,7 @@ dependencies = [ "raftstore-v2", "rand 0.8.5", "resolved_ts", + "resource_control", "resource_metering", "security", "serde_json", @@ -6290,6 +6310,7 @@ dependencies = [ "rand 0.7.3", "regex", "reqwest", + "resource_control", "resource_metering", "rev_lines", "seahash", @@ -7363,9 +7384,11 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#39cb495953d40a7e846363c06090755c2eac65fa" +source = "git+https://github.com/tikv/yatp.git?branch=master#bcf431a2619c06ab7fa0c72073a0c775646c484f" dependencies = [ "crossbeam-deque", + "crossbeam-skiplist", + "crossbeam-utils 0.8.8", "dashmap", "fail", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 4c8af61e554..d76dce26a18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -139,6 +139,7 @@ raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } rand = "0.7.3" regex = "1.3" +resource_control = { workspace = true } resource_metering = { workspace = true } rev_lines = "0.2.1" seahash = "4.1.0" @@ -267,6 +268,7 @@ members = [ "components/raftstore", "components/raftstore-v2", "components/resolved_ts", + "components/resource_control", "components/resource_metering", "components/security", "components/server", @@ -341,6 +343,7 @@ raft_log_engine = { path = "components/raft_log_engine" } raftstore = { path = "components/raftstore", default-features = false } raftstore-v2 = { path = "components/raftstore-v2", default-features = false } resolved_ts = { path = "components/resolved_ts" } +resource_control = { path = "components/resource_control" } resource_metering = { path = "components/resource_metering" } security = { path = "components/security" } server = { path = "components/server" } diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml new file mode 100644 index 00000000000..822aed2cd2d --- /dev/null +++ b/components/resource_control/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "resource_control" +version = "0.0.1" +edition = "2021" +publish = false + +[dependencies] +byteorder = "1.2" +crossbeam-skiplist = "0.1" +dashmap = "5.1" +kvproto = { git = "https://github.com/pingcap/kvproto.git" } +lazy_static = "1.0" +online_config = { workspace = true } +pin-project = "1.0" +prometheus = { version = "0.13", features = ["nightly"] } +serde = { version = "1.0", features = ["derive"] } +slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +tikv_util = { workspace = true } +yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs new file mode 100644 index 00000000000..8027a27b394 --- /dev/null +++ b/components/resource_control/src/future.rs @@ -0,0 +1,46 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + future::Future, + pin::Pin, + sync::Arc, + task::{Context, Poll}, +}; + +use pin_project::pin_project; +use tikv_util::time::Instant; + +use crate::resource_group::{ResourceConsumeType, ResourceController}; + +#[pin_project] +pub struct ControlledFuture { + #[pin] + future: F, + controller: Arc, + group_name: Vec, +} + +impl ControlledFuture { + pub fn new(future: F, controller: Arc, group_name: Vec) -> Self { + Self { + future, + controller, + group_name, + } + } +} + +impl Future for ControlledFuture { + type Output = F::Output; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + let now = Instant::now(); + let res = this.future.poll(cx); + this.controller.consume( + this.group_name, + ResourceConsumeType::CpuTime(now.saturating_elapsed()), + ); + res + } +} diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs new file mode 100644 index 00000000000..516e5dd6c8d --- /dev/null +++ b/components/resource_control/src/lib.rs @@ -0,0 +1,18 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use online_config::OnlineConfig; +use serde::{Deserialize, Serialize}; + +mod resource_group; +pub use resource_group::{ResourceController, ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL}; + +mod future; +pub use future::ControlledFuture; + +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig, Default)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct Config { + #[online_config(skip)] + pub enabled: bool, +} diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs new file mode 100644 index 00000000000..d9fa3ccf14c --- /dev/null +++ b/components/resource_control/src/resource_group.rs @@ -0,0 +1,482 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, + time::Duration, +}; + +use dashmap::{mapref::one::Ref, DashMap}; +use kvproto::resource_manager::{GroupMode, ResourceGroup}; +use yatp::queue::priority::TaskPriorityProvider; + +// a read task cost at least 50us. +const DEFAULT_PRIORITY_PER_READ_TASK: u64 = 50; +// extra task schedule factor +const TASK_EXTRA_FACTOR_BY_LEVEL: [u64; 3] = [0, 20, 100]; +/// duration to update the minimal priority value of each resource group. +pub const MIN_PRIORITY_UPDATE_INTERVAL: Duration = Duration::from_secs(1); +/// default resource group name +const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; +/// default value of max RU quota. +const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; + +pub enum ResourceConsumeType { + CpuTime(Duration), + IoBytes(u64), +} + +/// ResourceGroupManager manages the metadata of each resource group. +#[derive(Default)] +pub struct ResourceGroupManager { + resource_groups: DashMap, + registry: Mutex>>, +} + +impl ResourceGroupManager { + fn get_ru_setting(rg: &ResourceGroup, is_read: bool) -> u64 { + match (rg.get_mode(), is_read) { + (GroupMode::RuMode, true) => rg + .get_r_u_settings() + .get_r_r_u() + .get_settings() + .get_fill_rate(), + (GroupMode::RuMode, false) => rg + .get_r_u_settings() + .get_w_r_u() + .get_settings() + .get_fill_rate(), + // TODO: currently we only consider the cpu usage in the read path, we may also take + // io read bytes into account later. + (GroupMode::RawMode, true) => rg + .get_resource_settings() + .get_cpu() + .get_settings() + .get_fill_rate(), + (GroupMode::RawMode, false) => rg + .get_resource_settings() + .get_io_write() + .get_settings() + .get_fill_rate(), + // return a default value for unsupported config. + (GroupMode::Unknown, _) => 1, + } + } + + pub fn add_resource_group(&self, rg: ResourceGroup) { + let group_name = rg.get_name().to_ascii_lowercase(); + self.registry.lock().unwrap().iter().for_each(|controller| { + let ru_quota = Self::get_ru_setting(&rg, controller.is_read); + controller.add_resource_group(group_name.clone().into_bytes(), ru_quota); + }); + self.resource_groups.insert(group_name, rg); + } + + pub fn remove_resource_group(&self, name: &str) { + let group_name = name.to_ascii_lowercase(); + self.registry.lock().unwrap().iter().for_each(|controller| { + controller.remove_resource_group(group_name.as_bytes()); + }); + self.resource_groups.remove(&group_name); + } + + pub fn get_resource_group(&self, name: &str) -> Option> { + self.resource_groups.get(&name.to_ascii_lowercase()) + } + + pub fn get_all_resource_groups(&self) -> Vec { + self.resource_groups.iter().map(|g| g.clone()).collect() + } + + pub fn derive_controller(&self, name: String, is_read: bool) -> Arc { + let controller = Arc::new(ResourceController::new(name, is_read)); + self.registry.lock().unwrap().push(controller.clone()); + for g in &self.resource_groups { + let ru_quota = Self::get_ru_setting(g.value(), controller.is_read); + controller.add_resource_group(g.key().clone().into_bytes(), ru_quota); + } + + controller + } + + pub fn advance_min_virtual_time(&self) { + for controller in self.registry.lock().unwrap().iter() { + controller.update_min_virtual_time(); + } + } +} + +pub struct ResourceController { + // resource controller name is not used currently. + #[allow(dead_code)] + name: String, + // We handle the priority differently between read and write request: + // 1. the priority factor is calculate based on read/write RU settings. + // 2. for read request, we increase a constant virtual time delta at each `get_priority` call + // because the cost can't be calculated at start, so we only increase a constant delta and + // increase the real cost after task is executed; but don't increase it at write because + // the cost is known so we just pre-consume it. + is_read: bool, + // Track the maximum ru quota used to calculate the factor of each resource group. + // factor = max_ru_quota / group_ru_quota * 10.0 + // We use mutex here to ensure when we need to change this value and do adjust all resource + // groups' factors, it can't be changed concurrently. + max_ru_quota: Mutex, + // record consumption of each resource group, name --> resource_group + resource_consumptions: DashMap, GroupPriorityTracker>, + + last_min_vt: AtomicU64, +} + +impl ResourceController { + pub fn new(name: String, is_read: bool) -> Self { + let controller = Self { + name, + is_read, + max_ru_quota: Mutex::new(DEFAULT_MAX_RU_QUOTA), + resource_consumptions: DashMap::new(), + last_min_vt: AtomicU64::new(0), + }; + // add the "default" resource group + controller.add_resource_group(DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0); + controller + } + + fn calculate_factor(max_quota: u64, quota: u64) -> u64 { + if quota > 0 { + // we use max_quota / quota as the resource group factor, but because we need to + // cast the value to integer, so we times it by 10 to ensure the accuracy is + // enough. + (max_quota as f64 / quota as f64 * 10.0).round() as u64 + } else { + 1 + } + } + + fn add_resource_group(&self, name: Vec, ru_quota: u64) { + let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); + if ru_quota > *max_ru_quota { + *max_ru_quota = ru_quota; + // adjust all group weight because the current value is too small. + self.adjust_all_resource_group_factors(ru_quota); + } + let weight = Self::calculate_factor(*max_ru_quota, ru_quota); + + let vt_delta_for_get = if self.is_read { + DEFAULT_PRIORITY_PER_READ_TASK * weight + } else { + 0 + }; + let group = GroupPriorityTracker { + ru_quota, + weight, + virtual_time: AtomicU64::new(self.last_min_vt.load(Ordering::Acquire)), + vt_delta_for_get, + }; + // maybe update existed group + self.resource_consumptions.insert(name, group); + } + + // we calculate the weight of each resource group based on the currently maximum + // ru quota, if a incoming resource group has a bigger quota, we need to + // adjust all the existing groups. As we expect this won't happen very + // often, and iterate 10k entry cost less than 5ms, so the performance is + // acceptable. + fn adjust_all_resource_group_factors(&self, max_ru_quota: u64) { + self.resource_consumptions.iter_mut().for_each(|mut g| { + g.value_mut().weight = Self::calculate_factor(max_ru_quota, g.ru_quota); + }); + } + + fn remove_resource_group(&self, name: &[u8]) { + // do not remove the default resource group, reset to default setting instead. + if DEFAULT_RESOURCE_GROUP_NAME.as_bytes() == name { + self.add_resource_group(DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0); + } + self.resource_consumptions.remove(name); + } + + #[inline] + fn resource_group(&self, name: &[u8]) -> Ref<'_, Vec, GroupPriorityTracker> { + if let Some(g) = self.resource_consumptions.get(name) { + g + } else { + self.resource_consumptions + .get(DEFAULT_RESOURCE_GROUP_NAME.as_bytes()) + .unwrap() + } + } + + pub fn consume(&self, name: &[u8], delta: ResourceConsumeType) { + self.resource_group(name).consume(delta) + } + + pub fn update_min_virtual_time(&self) { + let mut min_vt = u64::MAX; + let mut max_vt = 0; + self.resource_consumptions.iter().for_each(|g| { + let vt = g.current_vt(); + if min_vt > vt { + min_vt = vt; + } + if max_vt < vt { + max_vt = vt; + } + }); + + // TODO: use different threshold for different resource type + // needn't do update if the virtual different is less than 100ms/100KB. + if min_vt + 100_000 >= max_vt { + return; + } + + self.resource_consumptions.iter().for_each(|g| { + let vt = g.current_vt(); + if vt < max_vt { + // TODO: is increase by half is a good choice. + g.increase_vt((max_vt - vt) / 2); + } + }); + // max_vt is actually a little bigger than the current min vt, but we don't + // need totally accurate here. + self.last_min_vt.store(max_vt, Ordering::Relaxed); + } +} + +impl TaskPriorityProvider for ResourceController { + fn priority_of(&self, extras: &yatp::queue::Extras) -> u64 { + self.resource_group(extras.metadata()) + .get_priority(extras.current_level() as usize) + } +} + +struct GroupPriorityTracker { + // the ru setting of this group. + ru_quota: u64, + weight: u64, + virtual_time: AtomicU64, + // the constant delta value for each `get_priority` call, + vt_delta_for_get: u64, +} + +impl GroupPriorityTracker { + fn get_priority(&self, level: usize) -> u64 { + let task_extra_priority = TASK_EXTRA_FACTOR_BY_LEVEL[level] * 1000 * self.weight; + (if self.vt_delta_for_get > 0 { + self.virtual_time + .fetch_add(self.vt_delta_for_get, Ordering::Relaxed) + + self.vt_delta_for_get + } else { + self.virtual_time.load(Ordering::Relaxed) + }) + task_extra_priority + } + + #[inline] + fn current_vt(&self) -> u64 { + self.virtual_time.load(Ordering::Relaxed) + } + + #[inline] + fn increase_vt(&self, vt_delta: u64) { + self.virtual_time.fetch_add(vt_delta, Ordering::Relaxed); + } + + // TODO: make it delta type as generic to avoid mixed consume different types. + #[inline] + fn consume(&self, delta: ResourceConsumeType) { + let vt_delta = match delta { + ResourceConsumeType::CpuTime(dur) => dur.as_micros() as u64, + ResourceConsumeType::IoBytes(bytes) => bytes, + } * self.weight; + self.increase_vt(vt_delta); + } +} + +#[cfg(test)] +mod tests { + use kvproto::resource_manager::*; + use yatp::queue::Extras; + + use super::*; + + fn new_resource_group( + name: String, + is_ru_mode: bool, + read_tokens: u64, + write_tokens: u64, + ) -> ResourceGroup { + let mut group = ResourceGroup::new(); + group.set_name(name); + let mode = if is_ru_mode { + GroupMode::RuMode + } else { + GroupMode::RawMode + }; + group.set_mode(mode); + if is_ru_mode { + let mut ru_setting = GroupRequestUnitSettings::new(); + ru_setting + .mut_r_r_u() + .mut_settings() + .set_fill_rate(read_tokens); + ru_setting + .mut_w_r_u() + .mut_settings() + .set_fill_rate(write_tokens); + group.set_r_u_settings(ru_setting); + } else { + let mut resource_setting = GroupResourceSettings::new(); + resource_setting + .mut_cpu() + .mut_settings() + .set_fill_rate(read_tokens); + resource_setting + .mut_io_write() + .mut_settings() + .set_fill_rate(write_tokens); + group.set_resource_settings(resource_setting); + } + group + } + + #[test] + fn test_resource_group() { + let resource_manager = ResourceGroupManager::default(); + + let group1 = new_resource_group("TEST".into(), true, 100, 100); + resource_manager.add_resource_group(group1); + + assert!(resource_manager.get_resource_group("test1").is_none()); + + let group = resource_manager.get_resource_group("test").unwrap(); + assert_eq!( + group + .value() + .get_r_u_settings() + .get_r_r_u() + .get_settings() + .get_fill_rate(), + 100 + ); + drop(group); + assert_eq!(resource_manager.resource_groups.len(), 1); + + let group1 = new_resource_group("Test".into(), true, 200, 100); + resource_manager.add_resource_group(group1); + let group = resource_manager.get_resource_group("test").unwrap(); + assert_eq!( + group + .value() + .get_r_u_settings() + .get_r_r_u() + .get_settings() + .get_fill_rate(), + 200 + ); + drop(group); + assert_eq!(resource_manager.resource_groups.len(), 1); + + let group2 = new_resource_group("test2".into(), true, 400, 200); + resource_manager.add_resource_group(group2); + assert_eq!(resource_manager.resource_groups.len(), 2); + + let resouce_ctl = resource_manager.derive_controller("test_read".into(), true); + assert_eq!(resouce_ctl.resource_consumptions.len(), 3); + + let group1 = resouce_ctl.resource_group("test".as_bytes()); + assert_eq!(group1.weight, 500); + let group2 = resouce_ctl.resource_group("test2".as_bytes()); + assert_eq!(group2.weight, 250); + assert_eq!(group1.current_vt(), 0); + + let mut extras1 = Extras::single_level(); + extras1.set_metadata("test".as_bytes().to_owned()); + assert_eq!(resouce_ctl.priority_of(&extras1), 25_000); + assert_eq!(group1.current_vt(), 25_000); + + let mut extras2 = Extras::single_level(); + extras2.set_metadata("test2".as_bytes().to_owned()); + assert_eq!(resouce_ctl.priority_of(&extras2), 12_500); + assert_eq!(group2.current_vt(), 12_500); + + let mut extras3 = Extras::single_level(); + extras3.set_metadata("unknown_group".as_bytes().to_owned()); + assert_eq!(resouce_ctl.priority_of(&extras3), 50); + assert_eq!( + resouce_ctl + .resource_group("default".as_bytes()) + .current_vt(), + 50 + ); + + resouce_ctl.consume( + "test".as_bytes(), + ResourceConsumeType::CpuTime(Duration::from_micros(10000)), + ); + resouce_ctl.consume( + "test2".as_bytes(), + ResourceConsumeType::CpuTime(Duration::from_micros(10000)), + ); + + assert_eq!(group1.current_vt(), 5_025_000); + assert_eq!(group1.current_vt(), group2.current_vt() * 2); + + // test update all group vts + resource_manager.advance_min_virtual_time(); + let group1_vt = group1.current_vt(); + assert_eq!(group1_vt, 5_025_000); + assert!(group2.current_vt() >= group1.current_vt() * 3 / 4); + assert!( + resouce_ctl + .resource_group("default".as_bytes()) + .current_vt() + >= group1.current_vt() / 2 + ); + + drop(group1); + drop(group2); + + // test add 1 new resource group + let new_group = new_resource_group("new_group".into(), true, 500, 500); + resource_manager.add_resource_group(new_group); + + assert_eq!(resouce_ctl.resource_consumptions.len(), 4); + let group3 = resouce_ctl.resource_group("new_group".as_bytes()); + assert_eq!(group3.weight, 200); + assert!(group3.current_vt() >= group1_vt / 2); + } + + #[test] + fn test_adjust_resource_group_weight() { + let resource_manager = ResourceGroupManager::default(); + let resource_ctl = resource_manager.derive_controller("test_read".into(), true); + let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); + + let group1 = new_resource_group("test1".into(), true, 5000, 1000); + resource_manager.add_resource_group(group1); + assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); + assert_eq!( + resource_ctl_write.resource_group("test1".as_bytes()).weight, + 100 + ); + + // add a resource group with big ru + let group1 = new_resource_group("test2".into(), true, 50000, 2000); + resource_manager.add_resource_group(group1); + assert_eq!(*resource_ctl.max_ru_quota.lock().unwrap(), 50000); + assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 100); + assert_eq!(resource_ctl.resource_group("test2".as_bytes()).weight, 10); + // resource_ctl_write should be unchanged. + assert_eq!(*resource_ctl_write.max_ru_quota.lock().unwrap(), 10000); + assert_eq!( + resource_ctl_write.resource_group("test1".as_bytes()).weight, + 100 + ); + assert_eq!( + resource_ctl_write.resource_group("test2".as_bytes()).weight, + 50 + ); + } +} diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index acdca09b29c..d5e2f177b5e 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -69,6 +69,7 @@ raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } rand = "0.8" resolved_ts = { workspace = true } +resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } serde_json = "1.0" diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 3c926969ce2..52b9fbf1d1a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -82,6 +82,7 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; +use resource_control::{ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL}; use security::SecurityManager; use snap_recovery::RecoveryService; use tikv::{ @@ -244,6 +245,7 @@ struct TikvServer { check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, + resource_manager: Arc, causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, br_snap_recovery_mode: bool, // use for br snapshot recovery @@ -320,6 +322,7 @@ where let config = cfg_controller.get_current(); let store_path = Path::new(&config.storage.data_dir).to_owned(); + let resource_manager = Arc::new(ResourceGroupManager::default()); // Initialize raftstore channels. let (router, system) = fsm::create_raft_batch_system(&config.raft_store); @@ -328,6 +331,14 @@ where let background_worker = WorkerBuilder::new("background") .thread_count(thread_count) .create(); + // spawn a task to periodically update the minimal virtual time of all resource + // group. + if config.resource_control.enabled { + let resource_mgr1 = resource_manager.clone(); + background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { + resource_mgr1.advance_min_virtual_time(); + }); + } let mut coprocessor_host = Some(CoprocessorHost::new( router.clone(), @@ -398,6 +409,7 @@ where flow_info_receiver: None, sst_worker: None, quota_limiter, + resource_manager, causal_ts_provider, tablet_registry: None, br_snap_recovery_mode: is_recovering_marked, @@ -733,10 +745,19 @@ where } let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { + let priority_mgr = if self.config.resource_control.enabled { + Some( + self.resource_manager + .derive_controller("unified-read-pool".into(), true), + ) + } else { + None + }; Some(build_yatp_read_pool( &self.config.readpool.unified, pd_sender.clone(), engines.engine.clone(), + priority_mgr, )) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 5d037fa3412..12e6af61613 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -62,6 +62,7 @@ use raftstore::{ RegionInfoAccessor, }; use raftstore_v2::{router::RaftRouter, StateStorage}; +use resource_control::{ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL}; use security::SecurityManager; use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, @@ -221,6 +222,7 @@ struct TikvServer { check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, + resource_manager: Arc, causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, } @@ -285,6 +287,15 @@ where config.quota.max_delay_duration, config.quota.enable_auto_tune, )); + let resource_manager = Arc::new(ResourceGroupManager::default()); + // spawn a task to periodically update the minimal virtual time of all resource + // group. + if config.resource_control.enabled { + let resource_mgr1 = resource_manager.clone(); + background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { + resource_mgr1.advance_min_virtual_time(); + }); + } let mut causal_ts_provider = None; if let ApiVersion::V2 = F::TAG { @@ -333,6 +344,7 @@ where flow_info_receiver: None, sst_worker: None, quota_limiter, + resource_manager, causal_ts_provider, tablet_registry: None, } @@ -622,10 +634,19 @@ where let pd_sender = raftstore_v2::FlowReporter::new(pd_worker.scheduler()); let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { + let priority_mgr = if self.config.resource_control.enabled { + Some( + self.resource_manager + .derive_controller("unified-read-pool".into(), true), + ) + } else { + None + }; Some(build_yatp_read_pool( &self.config.readpool.unified, pd_sender.clone(), engines.engine.clone(), + priority_mgr, )) } else { None diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 9de2d49cb07..e74ced848c0 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -15,7 +15,7 @@ use fail::fail_point; use futures::channel::oneshot::{self, Canceled}; use prometheus::{IntCounter, IntGauge}; use tracker::TrackedFuture; -use yatp::task::future; +use yatp::{queue::Extras, task::future}; pub type ThreadPool = yatp::ThreadPool; @@ -82,7 +82,14 @@ impl FuturePool { where F: Future + Send + 'static, { - self.inner.spawn(TrackedFuture::new(future)) + self.inner.spawn(TrackedFuture::new(future), None) + } + + pub fn spawn_with_extras(&self, future: F, extras: Extras) -> Result<(), Full> + where + F: Future + Send + 'static, + { + self.inner.spawn(TrackedFuture::new(future), Some(extras)) } /// Spawns a future in the pool and returns a handle to the result of the @@ -143,7 +150,7 @@ impl PoolInner { } } - fn spawn(&self, future: F) -> Result<(), Full> + fn spawn(&self, future: F, extras: Option) -> Result<(), Full> where F: Future + Send + 'static, { @@ -154,11 +161,17 @@ impl PoolInner { metrics_running_task_count.inc(); - self.pool.spawn(async move { + let f = async move { let _ = future.await; metrics_handled_task_count.inc(); metrics_running_task_count.dec(); - }); + }; + + if let Some(extras) = extras { + self.pool.spawn(future::TaskCell::new(f, extras)); + } else { + self.pool.spawn(f); + } Ok(()) } diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 6e246d6cddf..29376b904a5 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -10,7 +10,7 @@ pub use future_pool::{Full, FuturePool}; use prometheus::{local::LocalHistogram, Histogram}; use yatp::{ pool::{CloneRunnerBuilder, Local, Runner}, - queue::{multilevel, QueueType, TaskCell as _}, + queue::{multilevel, priority, QueueType, TaskCell as _}, task::future::{Runner as FutureRunner, TaskCell}, ThreadPool, }; @@ -282,6 +282,20 @@ impl YatpPoolBuilder { .build_with_queue_and_runner(QueueType::Multilevel(multilevel_builder), runner_builder) } + pub fn build_priority_pool( + &mut self, + priority_provider: Arc, + ) -> ThreadPool { + let (builder, read_pool_runner) = self.create_builder(); + let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); + let priority_builder = priority::Builder::new( + priority::Config::default().name(Some(name)), + priority_provider, + ); + let runner_builder = priority_builder.runner_builder(CloneRunnerBuilder(read_pool_runner)); + builder.build_with_queue_and_runner(QueueType::Priority(priority_builder), runner_builder) + } + fn create_builder(&mut self) -> (yatp::Builder, YatpPoolRunner) { let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); let mut builder = yatp::Builder::new(thd_name!(name)); diff --git a/src/config/mod.rs b/src/config/mod.rs index d2c5941c5ec..8d3e5477f26 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -55,6 +55,7 @@ use raftstore::{ coprocessor::{Config as CopConfig, RegionInfoAccessor}, store::{CompactionGuardGeneratorFactory, Config as RaftstoreConfig, SplitConfig}, }; +use resource_control::Config as ResourceControlConfig; use resource_metering::Config as ResourceMeteringConfig; use security::SecurityConfig; use serde::{ @@ -3039,6 +3040,9 @@ pub struct TikvConfig { #[online_config(skip)] pub causal_ts: CausalTsConfig, + + #[online_config(submodule)] + pub resource_control: ResourceControlConfig, } impl Default for TikvConfig { @@ -3081,6 +3085,7 @@ impl Default for TikvConfig { resource_metering: ResourceMeteringConfig::default(), backup_stream: BackupStreamConfig::default(), causal_ts: CausalTsConfig::default(), + resource_control: ResourceControlConfig::default(), } } } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 54fcaeb0489..711cd83e607 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -470,6 +470,11 @@ impl Endpoint { let resource_tag = self .resource_tag_factory .new_tag_with_key_ranges(&req_ctx.context, key_ranges); + let group_name = req_ctx + .context + .get_resource_group_name() + .as_bytes() + .to_owned(); // box the tracker so that moving it is cheap. let tracker = Box::new(Tracker::new(req_ctx, self.slow_log_threshold)); @@ -480,6 +485,7 @@ impl Endpoint { .in_resource_metering_tag(resource_tag), priority, task_id, + group_name, ) .map_err(|_| Error::MaxPendingTasksExceeded); async move { res.await? } @@ -690,6 +696,11 @@ impl Endpoint { ) -> Result>> { let (tx, rx) = mpsc::channel::>(self.stream_channel_size); let priority = req_ctx.context.get_priority(); + let group_name = req_ctx + .context + .get_resource_group_name() + .as_bytes() + .to_owned(); let key_ranges = req_ctx .ranges .iter() @@ -712,6 +723,7 @@ impl Endpoint { }), priority, task_id, + group_name, ) .map_err(|_| Error::MaxPendingTasksExceeded)?; Ok(rx) diff --git a/src/read_pool.rs b/src/read_pool.rs index 5212c4ae594..1a590679584 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -11,6 +11,7 @@ use futures::{channel::oneshot, future::TryFutureExt}; use kvproto::kvrpcpb::CommandPri; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use prometheus::{IntCounter, IntGauge}; +use resource_control::{ControlledFuture, ResourceController}; use thiserror::Error; use tikv_util::{ sys::{cpu_time::ProcessStat, SysQuota}, @@ -52,6 +53,7 @@ pub enum ReadPool { running_threads: IntGauge, max_tasks: usize, pool_size: usize, + resource_ctl: Option>, }, } @@ -73,12 +75,14 @@ impl ReadPool { running_threads, max_tasks, pool_size, + resource_ctl, } => ReadPoolHandle::Yatp { remote: pool.remote().clone(), running_tasks: running_tasks.clone(), running_threads: running_threads.clone(), max_tasks: *max_tasks, pool_size: *pool_size, + resource_ctl: resource_ctl.clone(), }, } } @@ -97,11 +101,18 @@ pub enum ReadPoolHandle { running_threads: IntGauge, max_tasks: usize, pool_size: usize, + resource_ctl: Option>, }, } impl ReadPoolHandle { - pub fn spawn(&self, f: F, priority: CommandPri, task_id: u64) -> Result<(), ReadPoolError> + pub fn spawn( + &self, + f: F, + priority: CommandPri, + task_id: u64, + group_meta: Vec, + ) -> Result<(), ReadPoolError> where F: Future + Send + 'static, { @@ -123,6 +134,7 @@ impl ReadPoolHandle { remote, running_tasks, max_tasks, + resource_ctl, .. } => { let running_tasks = running_tasks.clone(); @@ -140,14 +152,29 @@ impl ReadPoolHandle { CommandPri::Normal => None, CommandPri::Low => Some(2), }; - let extras = Extras::new_multilevel(task_id, fixed_level); - let task_cell = TaskCell::new( - TrackedFuture::new(async move { - f.await; - running_tasks.dec(); - }), - extras, - ); + let mut extras = Extras::new_multilevel(task_id, fixed_level); + extras.set_metadata(group_meta.clone()); + let task_cell = if let Some(resource_ctl) = resource_ctl { + TaskCell::new( + TrackedFuture::new(ControlledFuture::new( + async move { + f.await; + running_tasks.dec(); + }, + resource_ctl.clone(), + group_meta, + )), + extras, + ) + } else { + TaskCell::new( + TrackedFuture::new(async move { + f.await; + running_tasks.dec(); + }), + extras, + ) + }; remote.spawn(task_cell); } } @@ -159,6 +186,7 @@ impl ReadPoolHandle { f: F, priority: CommandPri, task_id: u64, + group_meta: Vec, ) -> impl Future> where F: Future + Send + 'static, @@ -172,6 +200,7 @@ impl ReadPoolHandle { }, priority, task_id, + group_meta, ); async move { res?; @@ -262,11 +291,12 @@ pub fn build_yatp_read_pool( config: &UnifiedReadPoolConfig, reporter: R, engine: E, + resource_ctl: Option>, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); - let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }); let raftkv = Arc::new(Mutex::new(engine)); - let pool = builder + let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }); + builder .name_prefix(&unified_read_pool_name) .stack_size(config.stack_size.0 as usize) .thread_count( @@ -284,8 +314,12 @@ pub fn build_yatp_read_pool( }) .before_stop(|| unsafe { destroy_tls_engine::(); - }) - .build_multi_level_pool(); + }); + let pool = if let Some(ref r) = resource_ctl { + builder.build_priority_pool(r.clone()) + } else { + builder.build_multi_level_pool() + }; ReadPool::Yatp { pool, running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS @@ -296,6 +330,7 @@ pub fn build_yatp_read_pool( .max_tasks_per_worker .saturating_mul(config.max_thread_count), pool_size: config.max_thread_count, + resource_ctl, } } @@ -600,7 +635,7 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool(&config, DummyReporter, engine); + let pool = build_yatp_read_pool(&config, DummyReporter, engine, None); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -616,18 +651,18 @@ mod tests { let (task3, _tx3) = gen_task(); let (task4, _tx4) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2).unwrap(); + handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3) { + match handle.spawn(task3, CommandPri::Normal, 3, vec![]) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } tx1.send(()).unwrap(); thread::sleep(Duration::from_millis(300)); - handle.spawn(task4, CommandPri::Normal, 4).unwrap(); + handle.spawn(task4, CommandPri::Normal, 4, vec![]).unwrap(); } #[test] @@ -641,7 +676,7 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool(&config, DummyReporter, engine); + let pool = build_yatp_read_pool(&config, DummyReporter, engine, None); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -658,11 +693,11 @@ mod tests { let (task4, _tx4) = gen_task(); let (task5, _tx5) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2).unwrap(); + handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3) { + match handle.spawn(task3, CommandPri::Normal, 3, vec![]) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -670,10 +705,10 @@ mod tests { handle.scale_pool_size(3); assert_eq!(handle.get_normal_pool_size(), 3); - handle.spawn(task4, CommandPri::Normal, 4).unwrap(); + handle.spawn(task4, CommandPri::Normal, 4, vec![]).unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task5, CommandPri::Normal, 5) { + match handle.spawn(task5, CommandPri::Normal, 5, vec![]) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -690,7 +725,7 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool(&config, DummyReporter, engine); + let pool = build_yatp_read_pool(&config, DummyReporter, engine, None); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -707,11 +742,11 @@ mod tests { let (task4, _tx4) = gen_task(); let (task5, _tx5) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2).unwrap(); + handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3) { + match handle.spawn(task3, CommandPri::Normal, 3, vec![]) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -723,10 +758,10 @@ mod tests { handle.scale_pool_size(1); assert_eq!(handle.get_normal_pool_size(), 1); - handle.spawn(task4, CommandPri::Normal, 4).unwrap(); + handle.spawn(task4, CommandPri::Normal, 4, vec![]).unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task5, CommandPri::Normal, 5) { + match handle.spawn(task5, CommandPri::Normal, 5, vec![]) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 802b0507849..0819c2599b9 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -594,6 +594,7 @@ impl Storage { let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -727,6 +728,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) @@ -750,6 +752,11 @@ impl Storage { const CMD: CommandKind = CommandKind::batch_get_command; // all requests in a batch have the same region, epoch, term, replica_read let priority = requests[0].get_context().get_priority(); + let group_name = requests[0] + .get_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; @@ -910,6 +917,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) @@ -929,6 +937,7 @@ impl Storage { let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = keys .iter() @@ -1082,6 +1091,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -1109,6 +1119,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::scan; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1258,6 +1269,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -1276,6 +1288,7 @@ impl Storage { ) -> impl Future>> { const CMD: CommandKind = CommandKind::scan_lock; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1405,6 +1418,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) @@ -1577,6 +1591,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_get; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -1639,6 +1654,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -1657,6 +1673,11 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_batch_get_command; // all requests in a batch have the same region, epoch, term, replica_read let priority = gets[0].get_context().get_priority(); + let group_name = gets[0] + .get_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let api_version = self.api_version; @@ -1770,6 +1791,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) @@ -1786,6 +1808,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_batch_get; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = keys.iter().map(|k| (k.clone(), k.clone())).collect(); let resource_tag = self @@ -1866,6 +1889,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -2272,6 +2296,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_scan; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag(&ctx); let api_version = self.api_version; @@ -2380,6 +2405,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -2400,6 +2426,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_batch_scan; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2536,6 +2563,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -2553,6 +2581,7 @@ impl Storage { ) -> impl Future>> { const CMD: CommandKind = CommandKind::raw_get_key_ttl; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -2615,6 +2644,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { @@ -2719,6 +2749,7 @@ impl Storage { ) -> impl Future> { const CMD: CommandKind = CommandKind::raw_checksum; let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2793,6 +2824,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), + group_name, ); async move { From 321aa833ca5ec0fd5dcec7fa8c01f65116d72ba6 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 13 Jan 2023 11:49:46 +0800 Subject: [PATCH 0454/1149] txn: Do constraint check when handling repeated acqurie_pessimsitic_lock request (#14037) close tikv/tikv#14038, close pingcap/tidb#40114 Fixes the problem that when handling repeated acquire_pessimistic_lock requests is recevied, should_not_exist is ignored. TiKV provides idempotency for these RPC requests, but for acquire_pessimistic_lock, it ignored the possibility that the client may expect a pessimistic_rollback between two acquire_pessimistic_lock request on the same key. In this case the second request may come from another statement and carries `should_not_exist` that wasn't set in the previously finished pessimistic lock request. If the first request successfully acquired the lock and the pessimistic_rollback failed, TiKV may return a sucessful response, making the client believe that the key doesn't exist before. In some rare cases, this has risk to cause data inconsistency. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- .../txn/actions/acquire_pessimistic_lock.rs | 150 +++++++++++++++++- 1 file changed, 146 insertions(+), 4 deletions(-) diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index fcffd500c8e..86b9ddeab41 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -142,10 +142,22 @@ pub fn acquire_pessimistic_lock( None }; - if need_load_value { - val = reader.get(&key, for_update_ts)?; - } else if need_check_existence { - val = reader.get_write(&key, for_update_ts)?.map(|_| vec![]); + if need_load_value || need_check_existence || should_not_exist { + let write = reader.get_write_with_commit_ts(&key, for_update_ts)?; + if let Some((write, commit_ts)) = write { + // Here `get_write_with_commit_ts` returns only the latest PUT if it exists and + // is not deleted. It's still ok to pass it into `check_data_constraint`. + // In case we are going to lock it with write conflict, we do not check it since + // the statement will then retry. + if locked_with_conflict_ts.is_none() { + check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; + } + if need_load_value { + val = Some(reader.load_data(&key, write)?); + } else if need_check_existence { + val = Some(vec![]); + } + } } // Pervious write is not loaded. let (prev_write_loaded, prev_write) = (false, None); @@ -1832,4 +1844,134 @@ pub mod tests { must_pessimistic_rollback(&mut engine, b"k1", 10, 50); must_unlocked(&mut engine, b"k1"); } + + #[test] + fn test_repeated_request_check_should_not_exist() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + for &(return_values, check_existence) in + &[(false, false), (false, true), (true, false), (true, true)] + { + let key = &[b'k', (return_values as u8 * 2) + check_existence as u8] as &[u8]; + + // An empty key. + must_succeed(&mut engine, key, key, 10, 10); + let res = must_succeed_impl( + &mut engine, + key, + key, + 10, + true, + 1000, + 10, + return_values, + check_existence, + 15, + false, + ); + assert!(res.is_none()); + must_pessimistic_prewrite_lock(&mut engine, key, key, 10, 10, DoPessimisticCheck); + must_commit(&mut engine, key, 10, 19); + + // The key has one record: Lock(10, 19) + must_succeed(&mut engine, key, key, 20, 20); + let res = must_succeed_impl( + &mut engine, + key, + key, + 20, + true, + 1000, + 20, + return_values, + check_existence, + 25, + false, + ); + assert!(res.is_none()); + must_pessimistic_prewrite_put(&mut engine, key, b"v1", key, 20, 20, DoPessimisticCheck); + must_commit(&mut engine, key, 20, 29); + + // The key has records: + // Lock(10, 19), Put(20, 29) + must_succeed(&mut engine, key, key, 30, 30); + let error = must_err_impl( + &mut engine, + key, + key, + 30, + true, + 30, + return_values, + check_existence, + 35, + false, + ); + assert!(matches!( + error, + MvccError(box ErrorInner::AlreadyExist { .. }) + )); + must_pessimistic_prewrite_lock(&mut engine, key, key, 30, 30, DoPessimisticCheck); + must_commit(&mut engine, key, 30, 39); + + // Lock(10, 19), Put(20, 29), Lock(30, 39) + must_succeed(&mut engine, key, key, 40, 40); + let error = must_err_impl( + &mut engine, + key, + key, + 40, + true, + 40, + return_values, + check_existence, + 45, + false, + ); + assert!(matches!( + error, + MvccError(box ErrorInner::AlreadyExist { .. }) + )); + must_pessimistic_prewrite_delete(&mut engine, key, key, 40, 40, DoPessimisticCheck); + must_commit(&mut engine, key, 40, 49); + + // Lock(10, 19), Put(20, 29), Lock(30, 39), Delete(40, 49) + must_succeed(&mut engine, key, key, 50, 50); + let res = must_succeed_impl( + &mut engine, + key, + key, + 50, + true, + 1000, + 50, + return_values, + check_existence, + 55, + false, + ); + assert!(res.is_none()); + must_pessimistic_prewrite_lock(&mut engine, key, key, 50, 50, DoPessimisticCheck); + must_commit(&mut engine, key, 50, 59); + + // Lock(10, 19), Put(20, 29), Lock(30, 39), Delete(40, 49), Lock(50, 59) + must_succeed(&mut engine, key, key, 60, 60); + let res = must_succeed_impl( + &mut engine, + key, + key, + 60, + true, + 1000, + 60, + return_values, + check_existence, + 65, + false, + ); + assert!(res.is_none()); + must_pessimistic_prewrite_lock(&mut engine, key, key, 60, 60, DoPessimisticCheck); + must_commit(&mut engine, key, 60, 69); + } + } } From 65a99a89b9f03de1ca24cee8c33584d13370becc Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 13 Jan 2023 14:27:46 +0800 Subject: [PATCH 0455/1149] raftstore-v2: fix metrics and perf context (#14035) ref tikv/tikv#12842 This PR fixes several bugs and metrics: - Now waterfall timer will be reset in before_write, the goal is to solve the confusion that stall writes can pollute the whole waterfall metrics. - Perf context is changed not to be associated with engine instance. Perf context is thread local and instance independent under the hook. - Fix flushed index advance failure due to suspicious flush. - Support print long uncommitted logs and fix incorrect commit time Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_panic/src/perf_context.rs | 2 +- components/engine_rocks/src/perf_context.rs | 2 +- components/engine_tirocks/src/perf_context.rs | 1 - components/engine_traits/src/flush.rs | 5 +- components/engine_traits/src/perf_context.rs | 2 +- components/raft_log_engine/src/engine.rs | 2 +- components/raftstore-v2/src/batch/store.rs | 1 + components/raftstore-v2/src/fsm/peer.rs | 2 +- .../operation/command/admin/compact_log.rs | 5 + .../raftstore-v2/src/operation/command/mod.rs | 36 ++++--- .../raftstore-v2/src/operation/query/lease.rs | 2 +- .../src/operation/query/replica.rs | 2 +- .../src/operation/ready/apply_trace.rs | 33 +++++-- .../raftstore-v2/src/operation/ready/mod.rs | 66 ++++++++++++- components/raftstore-v2/src/raft/apply.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 18 +++- .../src/router/response_channel.rs | 94 +++++++++++++------ .../raftstore-v2/src/worker/tablet_gc.rs | 2 + .../raftstore/src/store/async_io/write.rs | 13 +-- components/raftstore/src/store/fsm/apply.rs | 14 ++- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/fsm/store.rs | 16 ++-- .../raftstore/src/store/local_metrics.rs | 73 ++++++++------ components/raftstore/src/store/msg.rs | 19 +--- components/raftstore/src/store/peer.rs | 4 +- src/coprocessor/tracker.rs | 32 +++---- src/storage/metrics.rs | 26 ++--- 27 files changed, 303 insertions(+), 173 deletions(-) diff --git a/components/engine_panic/src/perf_context.rs b/components/engine_panic/src/perf_context.rs index 46d18c00e77..27bdd1ac066 100644 --- a/components/engine_panic/src/perf_context.rs +++ b/components/engine_panic/src/perf_context.rs @@ -8,7 +8,7 @@ use crate::engine::PanicEngine; impl PerfContextExt for PanicEngine { type PerfContext = PanicPerfContext; - fn get_perf_context(&self, level: PerfLevel, kind: PerfContextKind) -> Self::PerfContext { + fn get_perf_context(level: PerfLevel, kind: PerfContextKind) -> Self::PerfContext { panic!() } } diff --git a/components/engine_rocks/src/perf_context.rs b/components/engine_rocks/src/perf_context.rs index a731a9461dc..f8cfdbcc667 100644 --- a/components/engine_rocks/src/perf_context.rs +++ b/components/engine_rocks/src/perf_context.rs @@ -8,7 +8,7 @@ use crate::{engine::RocksEngine, perf_context_impl::PerfContextStatistics}; impl PerfContextExt for RocksEngine { type PerfContext = RocksPerfContext; - fn get_perf_context(&self, level: PerfLevel, kind: PerfContextKind) -> Self::PerfContext { + fn get_perf_context(level: PerfLevel, kind: PerfContextKind) -> Self::PerfContext { RocksPerfContext::new(level, kind) } } diff --git a/components/engine_tirocks/src/perf_context.rs b/components/engine_tirocks/src/perf_context.rs index d1d975c65c3..643967230df 100644 --- a/components/engine_tirocks/src/perf_context.rs +++ b/components/engine_tirocks/src/perf_context.rs @@ -136,7 +136,6 @@ impl engine_traits::PerfContextExt for RocksEngine { type PerfContext = RocksPerfContext; fn get_perf_context( - &self, level: engine_traits::PerfLevel, kind: engine_traits::PerfContextKind, ) -> Self::PerfContext { diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index b3a827c234e..8300348da8c 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -157,7 +157,10 @@ impl PersistenceListener { } match flushed_pr { Some(pr) => pr, - None => panic!("{} not found in {:?}", cf, prs), + None => panic!( + "[region_id={}] [tablet_index={}] {} not found in {:?}", + self.region_id, self.tablet_index, cf, prs + ), } }; self.storage diff --git a/components/engine_traits/src/perf_context.rs b/components/engine_traits/src/perf_context.rs index ba48974a460..44462e3fe3c 100644 --- a/components/engine_traits/src/perf_context.rs +++ b/components/engine_traits/src/perf_context.rs @@ -37,7 +37,7 @@ numeric_enum_serializing_mod! {perf_level_serde PerfLevel { pub trait PerfContextExt { type PerfContext: PerfContext; - fn get_perf_context(&self, level: PerfLevel, kind: PerfContextKind) -> Self::PerfContext; + fn get_perf_context(level: PerfLevel, kind: PerfContextKind) -> Self::PerfContext; } /// The subsystem the PerfContext is being created for. diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 3db865ed8ad..838fe461f4b 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -366,7 +366,7 @@ impl RaftLogEngine { impl PerfContextExt for RaftLogEngine { type PerfContext = RaftEnginePerfContext; - fn get_perf_context(&self, _level: PerfLevel, _kind: PerfContextKind) -> Self::PerfContext { + fn get_perf_context(_level: PerfLevel, _kind: PerfContextKind) -> Self::PerfContext { RaftEnginePerfContext } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 6183778c369..ccf3f19f3ea 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -188,6 +188,7 @@ impl PollHandler PeerFsmDelegate<'a, EK, ER, PeerTick::CheckLeaderLease => unimplemented!(), PeerTick::ReactivateMemoryLock => self.on_reactivate_memory_lock_tick(), PeerTick::ReportBuckets => unimplemented!(), - PeerTick::CheckLongUncommitted => unimplemented!(), + PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted(), } } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index a4983b28a47..0f5fd9b392f 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -73,6 +73,11 @@ impl CompactLogContext { pub fn set_last_applying_index(&mut self, index: u64) { self.last_applying_index = index; } + + #[inline] + pub fn last_applying_index(&self) -> u64 { + self.last_applying_index + } } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 047fe026ffe..cf29d9ee25a 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -32,7 +32,7 @@ use raftstore::{ apply::{self, APPLY_WB_SHRINK_SIZE, SHRINK_PENDING_CMD_QUEUE_CAP}, Proposal, }, - local_metrics::{RaftMetrics, TimeTracker}, + local_metrics::RaftMetrics, metrics::{APPLY_TASK_WAIT_TIME_HISTOGRAM, APPLY_TIME_HISTOGRAM}, msg::ErrorCallback, util, Config, WriteCallback, @@ -302,9 +302,7 @@ impl Peer { t.metrics.write_instant = Some(now); &mut t.metrics.store_time_nanos }); - if let TimeTracker::Instant(t) = tracker { - *t = now; - } + tracker.reset(now); } } } @@ -314,7 +312,7 @@ impl Peer { return; } // TODO: remove following log once stable. - info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res); + info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); // It must just applied a snapshot. if apply_res.applied_index < self.entry_storage().first_index() { // Ignore admin command side effects, otherwise it may split incomplete @@ -378,6 +376,12 @@ impl Peer { scheduler.send(ApplyTask::ManualFlush); } } + let last_applying_index = self.compact_log_context().last_applying_index(); + let committed_index = self.entry_storage().commit_index(); + if last_applying_index < committed_index { + // We need to continue to apply after previous page is finished. + self.set_has_ready(); + } } } @@ -691,11 +695,23 @@ impl Apply { .iter() .flat_map(|(v, _)| { v.write_trackers() - .flat_map(|t| t.as_tracker_token().cloned()) + .flat_map(|t| t.as_tracker_token()) }) .collect(); self.perf_context().report_metrics(&tokens); } + let mut apply_res = ApplyRes::default(); + apply_res.applied_index = index; + apply_res.applied_term = term; + apply_res.admin_result = self.take_admin_result().into_boxed_slice(); + apply_res.modifications = *self.modifications_mut(); + apply_res.metrics = mem::take(&mut self.metrics); + let written_bytes = apply_res.metrics.written_bytes; + self.res_reporter().report(apply_res); + + // Report result first and then invoking callbacks. This may delays callback a + // little bit, but can make sure all following messages must see the side + // effect of admin commands. let callbacks = self.callbacks_mut(); let now = std::time::Instant::now(); let apply_time = APPLY_TIME_HISTOGRAM.local(); @@ -709,14 +725,6 @@ impl Apply { if callbacks.capacity() > SHRINK_PENDING_CMD_QUEUE_CAP { callbacks.shrink_to(SHRINK_PENDING_CMD_QUEUE_CAP); } - let mut apply_res = ApplyRes::default(); - apply_res.applied_index = index; - apply_res.applied_term = term; - apply_res.admin_result = self.take_admin_result().into_boxed_slice(); - apply_res.modifications = *self.modifications_mut(); - apply_res.metrics = mem::take(&mut self.metrics); - let written_bytes = apply_res.metrics.written_bytes; - self.res_reporter().report(apply_res); written_bytes } } diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 0abd0cccd72..3185f1bd24b 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -112,7 +112,7 @@ impl Peer { let time = monotonic_raw_now(); for (_, ch, mut read_index) in read_index_req.take_cmds().drain(..) { ch.read_tracker().map(|tracker| { - GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) .to_std() .unwrap() diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index fb00adbbc5a..901fd9726f6 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -75,7 +75,7 @@ impl Peer { let time = monotonic_raw_now(); for (req, ch, _) in read_index_req.take_cmds().drain(..) { ch.read_tracker().map(|tracker| { - GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) .to_std() .unwrap() diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 5b88a6ba94d..67bbed5aa4b 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -40,7 +40,7 @@ use kvproto::{ use raftstore::store::{ ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use slog::{trace, Logger}; +use slog::{info, trace, Logger}; use tikv_util::{box_err, slog_panic, worker::Scheduler}; use crate::{ @@ -133,7 +133,8 @@ pub type DataTrace = [u64; DATA_CFS_LEN]; #[derive(Clone, Copy, Default, Debug)] struct Progress { flushed: u64, - /// The index of last entry that has modification to the CF. + /// The index of last entry that has modification to the CF. The value + /// can be larger than the index that actually modifies the CF in apply. /// /// If `flushed` == `last_modified`, then all data in the CF is persisted. last_modified: u64, @@ -192,9 +193,13 @@ impl ApplyTrace { trace.admin.last_modified = i; trace.persisted_applied = i; trace.last_flush_trigger = i; - let applied_region_state = engine - .get_region_state(region_id, trace.admin.flushed)? - .unwrap(); + let applied_region_state = match engine.get_region_state(region_id, trace.admin.flushed)? { + Some(s) => s, + None => panic!( + "failed to get region state [region_id={}] [apply_trace={:?}]", + region_id, trace + ), + }; Ok((trace, applied_region_state)) } @@ -242,7 +247,7 @@ impl ApplyTrace { } }) .max(); - if let Some(m) = last_modified && m >= self.admin.flushed + 4096 && m >= self.last_flush_trigger + 4096 { + if let Some(m) = last_modified && m >= self.admin.flushed + 4096000 && m >= self.last_flush_trigger + 4096000 { self.last_flush_trigger = m; true } else { @@ -257,10 +262,17 @@ impl ApplyTrace { } let min_flushed = self .data_cfs - .iter() + .iter_mut() // Only unflushed CFs are considered. Flushed CF always have uptodate changes // persisted. .filter_map(|pr| { + // All modifications before mem_index must be seen. If following condition is + // true, it means the modification comes beyond general apply process (like + // transaction GC unsafe write). Align `last_modified` to `flushed` to avoid + // blocking raft log GC. + if mem_index >= pr.flushed && pr.flushed > pr.last_modified { + pr.last_modified = pr.flushed; + } if pr.last_modified != pr.flushed { Some(pr.flushed) } else { @@ -484,6 +496,7 @@ impl Storage { let lb = write_task .extra_write .ensure_v2(|| raft_engine.log_batch(1)); + info!(self.logger(), "persisting admin flushed"; "tablet_index" => tablet_index, "flushed" => trace.admin.flushed); let trace = self.apply_trace_mut(); lb.put_flushed_index(region_id, CF_RAFT, tablet_index, trace.admin.flushed) .unwrap(); @@ -660,6 +673,12 @@ mod tests { ([(8, 2), (9, 3), (7, 5)], (4, 4), 5, 5), ([(8, 2), (9, 3), (7, 5)], (5, 5), 5, 5), ([(2, 3), (9, 3), (7, 5)], (2, 2), 5, 2), + // In special cae, some CF may be flushed without any modification recorded, + // we should still able to advance the apply index forward. + ([(5, 2), (9, 3), (7, 3)], (2, 2), 3, 3), + ([(5, 2), (9, 3), (7, 3)], (2, 2), 6, 6), + ([(5, 2), (9, 3), (7, 3)], (2, 2), 10, 10), + ([(5, 2), (9, 3), (7, 3)], (2, 3), 10, 2), ]; for (case, (data_cfs, admin, mem_index, exp)) in cases.iter().enumerate() { let mut trace = ApplyTrace::default(); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3f559feff8b..d1348cf014b 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -81,6 +81,16 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } self.schedule_tick(PeerTick::Raft); } + + pub fn on_check_long_uncommitted(&mut self) { + if !self.fsm.peer().is_leader() { + return; + } + self.fsm + .peer_mut() + .check_long_uncommitted_proposals(self.store_ctx); + self.schedule_tick(PeerTick::CheckLongUncommitted); + } } impl Peer { @@ -396,9 +406,10 @@ impl Peer { // smaller than propose_time of a command, which was // proposed in another thread while this thread receives its // AppendEntriesResponse and is ready to calculate its commit-log-duration. - ctx.current_time.replace(monotonic_raw_now()); + let current_time = monotonic_raw_now(); + ctx.current_time.replace(current_time); ctx.raft_metrics.commit_log.observe(duration_to_sec( - (ctx.current_time.unwrap() - propose_time).to_std().unwrap(), + (current_time - propose_time).to_std().unwrap(), )); self.maybe_renew_leader_lease(propose_time, &ctx.store_meta, None); update_lease = false; @@ -730,6 +741,7 @@ impl Peer { self.region_heartbeat_pd(ctx); self.add_pending_tick(PeerTick::CompactLog); self.add_pending_tick(PeerTick::SplitRegionCheck); + self.add_pending_tick(PeerTick::CheckLongUncommitted); } StateRole::Follower => { self.leader_lease_mut().expire(); @@ -793,6 +805,56 @@ impl Peer { self.read_progress_mut().discard(); } } + + /// Check if there is long uncommitted proposal. + /// + /// This will increase the threshold when a long uncommitted proposal is + /// detected, and reset the threshold when there is no long uncommitted + /// proposal. + fn has_long_uncommitted_proposals(&mut self, ctx: &mut StoreContext) -> bool { + let mut has_long_uncommitted = false; + let base_threshold = ctx.cfg.long_uncommitted_base_threshold.0; + if let Some(propose_time) = self.proposals().oldest().and_then(|p| p.propose_time) { + // When a proposal was proposed with this ctx before, the current_time can be + // some. + let current_time = *ctx.current_time.get_or_insert_with(monotonic_raw_now); + let elapsed = match (current_time - propose_time).to_std() { + Ok(elapsed) => elapsed, + Err(_) => return false, + }; + // Increase the threshold for next turn when a long uncommitted proposal is + // detected. + let threshold = self.long_uncommitted_threshold(); + if elapsed >= threshold { + has_long_uncommitted = true; + self.set_long_uncommitted_threshold(threshold + base_threshold); + } else if elapsed < base_threshold { + self.set_long_uncommitted_threshold(base_threshold); + } + } else { + self.set_long_uncommitted_threshold(base_threshold); + } + has_long_uncommitted + } + + fn check_long_uncommitted_proposals(&mut self, ctx: &mut StoreContext) { + if self.has_long_uncommitted_proposals(ctx) { + let status = self.raft_group().status(); + let mut buffer: Vec<(u64, u64, u64)> = Vec::new(); + if let Some(prs) = status.progress { + for (id, p) in prs.iter() { + buffer.push((*id, p.commit_group_id, p.matched)); + } + } + warn!( + self.logger, + "found long uncommitted proposals"; + "progress" => ?buffer, + "cache_first_index" => ?self.entry_storage().entry_cache_first_index(), + "next_turn_threshold" => ?self.long_uncommitted_threshold(), + ); + } + } } impl Storage { diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 6d1faa98cbf..7a1a22a5a95 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -81,7 +81,7 @@ impl Apply { let applied_index = flush_state.applied_index(); assert_ne!(applied_index, 0, "{}", SlogFormat(&logger)); let tablet = remote_tablet.latest().unwrap().clone(); - let perf_context = tablet.get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply); + let perf_context = EK::get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply); Apply { peer, tablet, diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index bc3d8a5af8e..8051066d4f9 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - mem, + cmp, mem, sync::{atomic::Ordering, Arc}, time::{Duration, Instant}, }; @@ -104,6 +104,8 @@ pub struct Peer { /// lead_transferee if this peer(leader) is in a leadership transferring. leader_transferee: u64, + + long_uncommitted_threshold: u64, } impl Peer { @@ -180,6 +182,10 @@ impl Peer { flush_state, split_flow_control: SplitFlowControl::default(), leader_transferee: raft::INVALID_ID, + long_uncommitted_threshold: cmp::max( + cfg.long_uncommitted_base_threshold.0.as_secs(), + 1, + ), }; // If this region has only one peer and I am the one, campaign directly. @@ -769,4 +775,14 @@ impl Peer { .unwrap_or(raft::INVALID_ID), ) } + + #[inline] + pub fn long_uncommitted_threshold(&self) -> Duration { + Duration::from_secs(self.long_uncommitted_threshold) + } + + #[inline] + pub fn set_long_uncommitted_threshold(&mut self, dur: Duration) { + self.long_uncommitted_threshold = cmp::max(dur.as_secs(), 1); + } } diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index eeeb13f6555..f70b6635982 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -30,7 +30,12 @@ use raftstore::store::{ local_metrics::TimeTracker, msg::ErrorCallback, region_meta::RegionMeta, ReadCallback, WriteCallback, }; -use tracker::{TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; +use tracker::{get_tls_tracker_token, TrackerToken}; + +union Tracker { + read: TrackerToken, + write: TimeTracker, +} /// A struct allows to watch and notify specific events. /// @@ -53,7 +58,7 @@ struct EventCore { before_set: UnsafeCell>>, // Waker can be changed, need to use `AtomicWaker` to guarantee no data race. waker: AtomicWaker, - tracker: UnsafeCell, + tracker: UnsafeCell, } unsafe impl Send for EventCore {} @@ -240,16 +245,17 @@ pub struct BaseChannel { core: Arc>, } -impl BaseChannel { - /// Creates a pair of channel and subscriber. - #[inline] - pub fn pair() -> (Self, BaseSubscriber) { - let tracker_token = tracker::get_tls_tracker_token(); - Self::with_mask(u32::MAX, TimeTracker::Tracker(tracker_token)) - } +#[inline] +fn pair() -> (BaseChannel, BaseSubscriber) { + let tracker = Tracker { + read: get_tls_tracker_token(), + }; + BaseChannel::::with_mask(u32::MAX, tracker) +} +impl BaseChannel { #[inline] - fn with_mask(mask: u32, tracker: TimeTracker) -> (Self, BaseSubscriber) { + fn with_mask(mask: u32, tracker: Tracker) -> (Self, BaseSubscriber) { let core: Arc> = Arc::new(EventCore { event: AtomicU64::new(0), res: UnsafeCell::new(None), @@ -452,15 +458,8 @@ impl CmdResChannelBuilder { #[inline] pub fn build(self) -> (CmdResChannel, CmdResSubscriber) { - let tracker_token = tracker::get_tls_tracker_token(); - let now = std::time::Instant::now(); - let tracker = if tracker_token == INVALID_TRACKER_TOKEN { - TimeTracker::Instant(now) - } else { - GLOBAL_TRACKERS.with_tracker(tracker_token, |tracker| { - tracker.metrics.write_instant = Some(now); - }); - TimeTracker::Tracker(tracker_token) + let tracker = Tracker { + write: TimeTracker::default(), }; let (c, s) = CmdResChannel::with_mask(self.event_mask, tracker); if let Some(f) = self.before_set { @@ -476,6 +475,15 @@ impl CmdResChannel { // Valid range is [1, 30] const PROPOSED_EVENT: u64 = 1; const COMMITTED_EVENT: u64 = 2; + + /// Creates a pair of channel and subscriber. + #[inline] + pub fn pair() -> (Self, CmdResSubscriber) { + let tracker = Tracker { + write: TimeTracker::default(), + }; + Self::with_mask(u32::MAX, tracker) + } } impl ErrorCallback for CmdResChannel { @@ -509,12 +517,12 @@ impl WriteCallback for CmdResChannel { type TimeTrackerListRef<'a> = &'a [TimeTracker]; #[inline] fn write_trackers(&self) -> Self::TimeTrackerListRef<'_> { - std::slice::from_ref(unsafe { &*self.core.tracker.get() }) + std::slice::from_ref(unsafe { &(*self.core.tracker.get()).write }) } type TimeTrackerListMut<'a> = &'a mut [TimeTracker]; fn write_trackers_mut(&mut self) -> Self::TimeTrackerListMut<'_> { - std::slice::from_mut(unsafe { &mut *self.core.tracker.get() }) + std::slice::from_mut(unsafe { &mut (*self.core.tracker.get()).write }) } // TODO: support executing hooks inside setting result. @@ -572,6 +580,13 @@ impl QueryResult { pub type QueryResChannel = BaseChannel; +impl QueryResChannel { + #[inline] + pub fn pair() -> (Self, QueryResSubscriber) { + pair() + } +} + impl ErrorCallback for QueryResChannel { #[inline] fn report_error(self, err: RaftCmdResponse) { @@ -592,8 +607,8 @@ impl ReadCallback for QueryResChannel { self.set_result(res); } - fn read_tracker(&self) -> Option<&TrackerToken> { - unsafe { (*self.core.tracker.get()).as_tracker_token() } + fn read_tracker(&self) -> Option { + Some(unsafe { (*self.core.tracker.get()).read }) } } @@ -608,6 +623,13 @@ impl fmt::Debug for QueryResChannel { pub type DebugInfoChannel = BaseChannel; pub type DebugInfoSubscriber = BaseSubscriber; +impl DebugInfoChannel { + #[inline] + pub fn pair() -> (Self, DebugInfoSubscriber) { + pair() + } +} + impl Debug for DebugInfoChannel { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "DebugInfoChannel") @@ -615,17 +637,29 @@ impl Debug for DebugInfoChannel { } #[cfg(feature = "testexport")] -pub type FlushChannel = BaseChannel<()>; -#[cfg(feature = "testexport")] -pub type FlushSubscriber = BaseSubscriber<()>; +mod flush_channel { + use super::*; -#[cfg(feature = "testexport")] -impl Debug for FlushChannel { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "FlushChannel") + pub type FlushChannel = BaseChannel<()>; + pub type FlushSubscriber = BaseSubscriber<()>; + + impl FlushChannel { + #[inline] + pub fn pair() -> (Self, FlushSubscriber) { + pair() + } + } + + impl Debug for FlushChannel { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "FlushChannel") + } } } +#[cfg(feature = "testexport")] +pub use flush_channel::{FlushChannel, FlushSubscriber}; + #[cfg(test)] mod tests { use std::assert_matches::assert_matches; diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index d4593223db3..d6d19743b1e 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -131,6 +131,8 @@ impl Runner { } fn prepare_destroy(&mut self, region_id: u64, tablet: EK, wait_for_persisted: u64) { + // The tablet is about to be deleted, flush is a waste and will block destroy. + let _ = tablet.set_db_options(&[("avoid_flush_during_shutdown", "true")]); let _ = tablet.pause_background_work(); self.waiting_destroy_tasks .entry(region_id) diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 7016d0ab606..98c76ddd6d1 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -465,11 +465,12 @@ where self.flush_states_to_raft_wb(); if metrics.waterfall_metrics { let now = std::time::Instant::now(); - for task in &self.tasks { - for tracker in &task.trackers { + for task in &mut self.tasks { + for tracker in &mut task.trackers { tracker.observe(now, &metrics.wf_before_write, |t| { &mut t.metrics.wf_before_write_nanos }); + tracker.reset(now); } } } @@ -549,7 +550,7 @@ where ) -> Self { let batch = WriteTaskBatch::new(raft_engine.log_batch(RAFT_WB_DEFAULT_SIZE)); let perf_context = - raft_engine.get_perf_context(cfg.value().perf_level, PerfContextKind::RaftstoreStore); + ER::get_perf_context(cfg.value().perf_level, PerfContextKind::RaftstoreStore); let cfg_tracker = cfg.clone().tracker(tag.clone()); Self { store_id, @@ -718,11 +719,7 @@ where .batch .tasks .iter() - .flat_map(|task| { - task.trackers - .iter() - .flat_map(|t| t.as_tracker_token().cloned()) - }) + .flat_map(|task| task.trackers.iter().flat_map(|t| t.as_tracker_token())) .collect(); self.perf_context.report_metrics(&trackers); write_raft_time = duration_to_sec(now.saturating_elapsed()); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index cab6ae0ffe8..58df32fd404 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -83,7 +83,7 @@ use crate::{ cmd_resp, entry_storage::{self, CachedEntries}, fsm::RaftPollerBuilder, - local_metrics::{RaftMetrics, TimeTracker}, + local_metrics::RaftMetrics, memory::*, metrics::*, msg::{Callback, ErrorCallback, PeerMsg, ReadResponse, SignificantMsg}, @@ -475,7 +475,7 @@ where host, importer, region_scheduler, - engine: engine.clone(), + engine, router, notifier, kv_wb, @@ -488,7 +488,7 @@ where committed_count: 0, sync_log_hint: false, use_delete_range: cfg.use_delete_range, - perf_context: engine.get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply), + perf_context: EK::get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply), yield_duration: cfg.apply_yield_duration.0, yield_msg_size: cfg.apply_yield_write_size.0, delete_ssts: vec![], @@ -582,7 +582,7 @@ where .cb_batch .iter() .flat_map(|(cb, _)| cb.write_trackers()) - .flat_map(|trackers| trackers.as_tracker_token().cloned()) + .flat_map(|trackers| trackers.as_tracker_token()) .collect(); self.perf_context.report_metrics(&trackers); self.sync_log_hint = false; @@ -3337,9 +3337,7 @@ impl Apply { t.metrics.write_instant = Some(now); &mut t.metrics.store_time_nanos }); - if let TimeTracker::Instant(t) = tracker { - *t = now; - } + tracker.reset(now); } } } @@ -4171,7 +4169,7 @@ where .flat_map(|p| p.cb.write_trackers()) .flat_map(|ts| ts.as_tracker_token()) { - GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { t.metrics.apply_wait_nanos = apply_wait.as_nanos() as u64; }); } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index e302ea6588a..7e00798b6df 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -622,7 +622,7 @@ where .propose_wait_time .observe(propose_time.as_secs_f64()); cmd.callback.read_tracker().map(|tracker| { - GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { t.metrics.read_index_propose_wait_nanos = propose_time.as_nanos() as u64; }) diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index ceb8858046d..3724eba13e2 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1374,14 +1374,14 @@ where ready_count: 0, has_ready: false, current_time: None, - raft_perf_context: self - .engines - .raft - .get_perf_context(self.cfg.value().perf_level, PerfContextKind::RaftstoreStore), - kv_perf_context: self - .engines - .kv - .get_perf_context(self.cfg.value().perf_level, PerfContextKind::RaftstoreStore), + raft_perf_context: ER::get_perf_context( + self.cfg.value().perf_level, + PerfContextKind::RaftstoreStore, + ), + kv_perf_context: EK::get_perf_context( + self.cfg.value().perf_level, + PerfContextKind::RaftstoreStore, + ), tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], node_start_time: Some(TiInstant::now_coarse()), feature_gate: self.feature_gate.clone(), diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index c1db17f8cae..0e6a09cbf0b 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -7,7 +7,7 @@ use collections::HashSet; use prometheus::local::LocalHistogram; use raft::eraftpb::MessageType; use tikv_util::time::{Duration, Instant}; -use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS}; +use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; use super::metrics::*; @@ -208,47 +208,60 @@ impl StoreWriteMetrics { /// Tracker for the durations of a raftstore request. /// If a global tracker is not available, it will fallback to an Instant. #[derive(Debug, Clone, Copy)] -pub enum TimeTracker { - Tracker(TrackerToken), - Instant(std::time::Instant), +pub struct TimeTracker { + token: TrackerToken, + start: std::time::Instant, +} + +impl Default for TimeTracker { + #[inline] + fn default() -> Self { + let token = tracker::get_tls_tracker_token(); + let start = std::time::Instant::now(); + let tracker = TimeTracker { token, start }; + if token == INVALID_TRACKER_TOKEN { + return tracker; + } + + GLOBAL_TRACKERS.with_tracker(token, |tracker| { + tracker.metrics.write_instant = Some(start); + }); + tracker + } } impl TimeTracker { - pub fn as_tracker_token(&self) -> Option<&TrackerToken> { - match self { - TimeTracker::Tracker(tt) => Some(tt), - TimeTracker::Instant(_) => None, + #[inline] + pub fn as_tracker_token(&self) -> Option { + if self.token == INVALID_TRACKER_TOKEN { + None + } else { + Some(self.token) } } + #[inline] pub fn observe( &self, now: std::time::Instant, local_metric: &LocalHistogram, tracker_metric: impl FnOnce(&mut Tracker) -> &mut u64, ) { - match self { - TimeTracker::Tracker(t) => { - if let Some(dur) = GLOBAL_TRACKERS - .with_tracker(*t, |tracker| { - tracker.metrics.write_instant.map(|write_instant| { - let dur = now.saturating_duration_since(write_instant); - let metric = tracker_metric(tracker); - if *metric == 0 { - *metric = dur.as_nanos() as u64; - } - dur - }) - }) - .flatten() - { - local_metric.observe(dur.as_secs_f64()); - } - } - TimeTracker::Instant(t) => { - let dur = now.saturating_duration_since(*t); - local_metric.observe(dur.as_secs_f64()); - } + let dur = now.saturating_duration_since(self.start); + local_metric.observe(dur.as_secs_f64()); + if self.token == INVALID_TRACKER_TOKEN { + return; } + GLOBAL_TRACKERS.with_tracker(self.token, |tracker| { + let metric = tracker_metric(tracker); + if *metric == 0 { + *metric = dur.as_nanos() as u64; + } + }); + } + + #[inline] + pub fn reset(&mut self, start: std::time::Instant) { + self.start = start; } } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index e3fc8530d76..b2a2a7aa1d1 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -24,7 +24,7 @@ use pd_client::BucketMeta; use raft::SnapshotStatus; use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; -use tracker::{get_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; +use tracker::{get_tls_tracker_token, TrackerToken}; use super::{local_metrics::TimeTracker, region_meta::RegionMeta, FetchedLogs, RegionSnapshot}; use crate::store::{ @@ -137,16 +137,7 @@ where proposed_cb: Option, committed_cb: Option, ) -> Self { - let tracker_token = get_tls_tracker_token(); - let now = std::time::Instant::now(); - let tracker = if tracker_token == INVALID_TRACKER_TOKEN { - TimeTracker::Instant(now) - } else { - GLOBAL_TRACKERS.with_tracker(tracker_token, |tracker| { - tracker.metrics.write_instant = Some(now); - }); - TimeTracker::Tracker(tracker_token) - }; + let tracker = TimeTracker::default(); Callback::Write { cb, @@ -217,7 +208,7 @@ pub trait ReadCallback: ErrorCallback { type Response; fn set_result(self, result: Self::Response); - fn read_tracker(&self) -> Option<&TrackerToken>; + fn read_tracker(&self) -> Option; } pub trait WriteCallback: ErrorCallback { @@ -265,9 +256,9 @@ impl ReadCallback for Callback { self.invoke_read(result); } - fn read_tracker(&self) -> Option<&TrackerToken> { + fn read_tracker(&self) -> Option { let Callback::Read { tracker, .. } = self else { return None; }; - Some(tracker) + Some(*tracker) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 347f62dd945..586ab7ba133 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -200,7 +200,7 @@ impl ProposalQueue { } #[inline] - fn oldest(&self) -> Option<&Proposal> { + pub fn oldest(&self) -> Option<&Proposal> { self.queue.front() } @@ -3292,7 +3292,7 @@ where let time = monotonic_raw_now(); for (req, cb, mut read_index) in read.take_cmds().drain(..) { cb.read_tracker().map(|tracker| { - GLOBAL_TRACKERS.with_tracker(*tracker, |t| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { t.metrics.read_index_confirm_wait_nanos = (time - read.propose_time).to_std().unwrap().as_nanos() as u64; }) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index d6e146adf11..9c0b79ff8b8 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -6,7 +6,7 @@ use ::tracker::{get_tls_tracker_token, with_tls_tracker}; use engine_traits::{PerfContext, PerfContextExt, PerfContextKind}; use kvproto::{kvrpcpb, kvrpcpb::ScanDetailV2}; use pd_client::BucketMeta; -use tikv_kv::{with_tls_engine, Engine}; +use tikv_kv::Engine; use tikv_util::time::{self, Duration, Instant}; use txn_types::Key; @@ -148,9 +148,7 @@ impl Tracker { } self.with_perf_context(|perf_context| { - if let Some(c) = perf_context { - c.start_observe(); - } + perf_context.start_observe(); }); self.current_stage = TrackerState::ItemBegan(now); } @@ -164,9 +162,7 @@ impl Tracker { self.total_storage_stats.add(&storage_stats); } self.with_perf_context(|perf_context| { - if let Some(c) = perf_context { - c.report_metrics(&[get_tls_tracker_token()]); - } + perf_context.report_metrics(&[get_tls_tracker_token()]); }); self.current_stage = TrackerState::ItemFinished(now); } else { @@ -361,7 +357,7 @@ impl Tracker { fn with_perf_context(&self, f: F) -> T where - F: FnOnce(&mut Option>) -> T, + F: FnOnce(&mut Box) -> T, { thread_local! { static SELECT: RefCell>> = RefCell::new(None); @@ -385,19 +381,13 @@ impl Tracker { }; tls_cell.with(|c| { let mut c = c.borrow_mut(); - if c.is_none() { - *c = unsafe { - with_tls_engine::(|engine| { - engine.kv_engine().map(|engine| { - Box::new(engine.get_perf_context( - PerfLevel::Uninitialized, - PerfContextKind::Coprocessor(self.req_ctx.tag.get_str()), - )) as Box - }) - }) - }; - } - f(&mut c) + let perf_context = c.get_or_insert_with(|| { + Box::new(E::Local::get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Coprocessor(self.req_ctx.tag.get_str()), + )) as Box + }); + f(perf_context) }) } } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 080ff2c5951..4837567ee43 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -11,7 +11,7 @@ use pd_client::BucketMeta; use prometheus::*; use prometheus_static_metric::*; use raftstore::store::{util::build_key_range, ReadStats}; -use tikv_kv::{with_tls_engine, Engine}; +use tikv_kv::Engine; use tracker::get_tls_tracker_token; use crate::{ @@ -347,23 +347,15 @@ where }; tls_cell.with(|c| { let mut c = c.borrow_mut(); - if c.is_none() { - *c = with_tls_engine(|engine: &mut E| { - engine.kv_engine().map(|c| { - Box::new(c.get_perf_context( - PerfLevel::Uninitialized, - PerfContextKind::Storage(cmd.get_str()), - )) as Box - }) - }); - }; - if let Some(c) = &mut *c { - c.start_observe(); - } + let perf_context = c.get_or_insert_with(|| { + Box::new(E::Local::get_perf_context( + PerfLevel::Uninitialized, + PerfContextKind::Storage(cmd.get_str()), + )) as Box + }); + perf_context.start_observe(); let res = f(); - if let Some(c) = &mut *c { - c.report_metrics(&[get_tls_tracker_token()]); - } + perf_context.report_metrics(&[get_tls_tracker_token()]); res }) } From a3c15ce27d582dc695848bffb363631f4cae2db5 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 16 Jan 2023 12:27:48 +0800 Subject: [PATCH 0456/1149] raftstore-v2: cleanup txn_ext (#14051) ref tikv/tikv#12842 Move transaction related code to txn_ext.rs. Fix the bug that snapshot doesn't set term and extra_op. Signed-off-by: Jay Lee --- components/raftstore-v2/src/fsm/peer.rs | 39 +-- .../operation/command/admin/conf_change.rs | 1 + .../src/operation/command/admin/split.rs | 16 +- .../command/admin/transfer_leader.rs | 97 +------ components/raftstore-v2/src/operation/mod.rs | 2 + components/raftstore-v2/src/operation/pd.rs | 16 -- .../raftstore-v2/src/operation/query/local.rs | 4 + .../raftstore-v2/src/operation/ready/mod.rs | 15 +- .../raftstore-v2/src/operation/txn_ext.rs | 260 ++++++++++++++++++ components/raftstore-v2/src/raft/peer.rs | 66 ++--- 10 files changed, 303 insertions(+), 213 deletions(-) create mode 100644 components/raftstore-v2/src/operation/txn_ext.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 0a6a66e8df1..26d5c2a1458 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -7,7 +7,7 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use raftstore::store::{Config, LocksStatus, TabletSnapManager, Transport}; +use raftstore::store::{Config, TabletSnapManager, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ is_zero_duration, @@ -32,7 +32,6 @@ pub struct PeerFsm { /// twice accidentally. tick_registry: [bool; PeerTick::VARIANT_COUNT], is_stopped: bool, - reactivate_memory_lock_ticks: usize, } impl PeerFsm { @@ -55,7 +54,6 @@ impl PeerFsm { receiver: rx, tick_registry: [false; PeerTick::VARIANT_COUNT], is_stopped: false, - reactivate_memory_lock_ticks: 0, }); Ok((tx, fsm)) } @@ -136,9 +134,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, fn schedule_pending_ticks(&mut self) { let pending_ticks = self.fsm.peer.take_pending_ticks(); for tick in pending_ticks { - if tick == PeerTick::ReactivateMemoryLock { - self.fsm.reactivate_memory_lock_ticks = 0; - } self.schedule_tick(tick); } } @@ -225,7 +220,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerTick::CheckPeerStaleState => unimplemented!(), PeerTick::EntryCacheEvict => self.on_entry_cache_evict(), PeerTick::CheckLeaderLease => unimplemented!(), - PeerTick::ReactivateMemoryLock => self.on_reactivate_memory_lock_tick(), + PeerTick::ReactivateMemoryLock => { + self.fsm.peer.on_reactivate_memory_lock_tick(self.store_ctx) + } PeerTick::ReportBuckets => unimplemented!(), PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted(), } @@ -326,32 +323,4 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.fsm.peer.propose_pending_writes(self.store_ctx); self.schedule_pending_ticks(); } - - pub fn on_reactivate_memory_lock_tick(&mut self) { - let mut pessimistic_locks = self.fsm.peer.txn_ext().pessimistic_locks.write(); - - // If it is not leader, we needn't reactivate by tick. In-memory pessimistic - // lock will be enabled when this region becomes leader again. - // And this tick is currently only used for the leader transfer failure case. - if !self.fsm.peer().is_leader() - || pessimistic_locks.status != LocksStatus::TransferringLeader - { - return; - } - - self.fsm.reactivate_memory_lock_ticks += 1; - let transferring_leader = self.fsm.peer.raft_group().raft.lead_transferee.is_some(); - // `lead_transferee` is not set immediately after the lock status changes. So, - // we need the tick count condition to avoid reactivating too early. - if !transferring_leader - && self.fsm.reactivate_memory_lock_ticks - >= self.store_ctx.cfg.reactive_memory_lock_timeout_tick - { - pessimistic_locks.status = LocksStatus::Normal; - self.fsm.reactivate_memory_lock_ticks = 0; - } else { - drop(pessimistic_locks); - self.schedule_tick(PeerTick::ReactivateMemoryLock); - } - } } diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 8c9771b0201..42c433584fe 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -261,6 +261,7 @@ impl Apply { "changes" => ?changes, "legacy" => legacy, "original region" => ?region, "err" => ?e); + return Err(e); } } let conf_ver = region.get_region_epoch().get_conf_ver() + changes.len() as u64; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index d01b1371338..f9e44286490 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -449,21 +449,9 @@ impl Peer { fail_point!("on_split", self.peer().get_store_id() == 3, |_| {}); let derived = &res.regions[res.derived_index]; - let derived_epoch = derived.get_region_epoch().clone(); let region_id = derived.get_id(); - // Group in-memory pessimistic locks in the original region into new regions. - // The locks of new regions will be put into the corresponding new regions - // later. And the locks belonging to the old region will stay in the original - // map. - let region_locks = { - let mut pessimistic_locks = self.txn_ext().pessimistic_locks.write(); - info!(self.logger, "moving {} locks to new regions", pessimistic_locks.len();); - // Update the version so the concurrent reader will fail due to EpochNotMatch - // instead of PessimisticLockNotFound. - pessimistic_locks.version = derived_epoch.get_version(); - pessimistic_locks.group_by_regions(&res.regions, derived) - }; + let region_locks = self.txn_context().split(&res.regions, derived); fail_point!("on_split_invalidate_locks"); let tablet: EK = match res.tablet.downcast() { @@ -650,7 +638,7 @@ impl Peer { let _ = self.raft_group_mut().campaign(); self.set_has_ready(); - *self.txn_ext().pessimistic_locks.write() = split_init.locks; + self.txn_context().init_with_lock(split_init.locks); let control = self.split_flow_control_mut(); control.approximate_size = split_init.approximate_size; control.approximate_keys = split_init.approximate_keys; diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 12bd7bbf491..54aa9845e17 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -3,22 +3,19 @@ use std::cmp::Ordering; use bytes::Bytes; -use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; -use fail::fail_point; +use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ disk_usage::DiskUsage, metapb, raft_cmdpb::{ - AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest, RaftRequestHeader, - TransferLeaderRequest, + AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest, TransferLeaderRequest, }, }; -use parking_lot::RwLockWriteGuard; use raft::{eraftpb, ProgressState, Storage}; use raftstore::{ store::{ fsm::new_admin_request, make_transfer_leader_response, metrics::PEER_ADMIN_CMD_COUNTER, - LocksStatus, TRANSFER_LEADER_COMMAND_REPLY_CTX, + TRANSFER_LEADER_COMMAND_REPLY_CTX, }, Result, }; @@ -30,9 +27,8 @@ use super::AdminCmdResult; use crate::{ batch::StoreContext, fsm::ApplyResReporter, - operation::command::write::SimpleWriteEncoder, raft::{Apply, Peer}, - router::{CmdResChannel, PeerMsg, PeerTick}, + router::{CmdResChannel, PeerMsg}, }; fn transfer_leader_cmd(msg: &RaftCmdRequest) -> Option<&TransferLeaderRequest> { @@ -296,91 +292,6 @@ impl Peer { } None } - - // Returns whether we should propose another TransferLeader command. This is - // for: - // - Considering the amount of pessimistic locks can be big, it can reduce - // unavailable time caused by waiting for the transferee catching up logs. - // - Make transferring leader strictly after write commands that executes before - // proposing the locks, preventing unexpected lock loss. - fn propose_locks_before_transfer_leader( - &mut self, - ctx: &mut StoreContext, - msg: &eraftpb::Message, - ) -> bool { - // 1. Disable in-memory pessimistic locks. - - // Clone to make borrow checker happy when registering ticks. - let txn_ext = self.txn_ext().clone(); - let mut pessimistic_locks = txn_ext.pessimistic_locks.write(); - - // If the message context == TRANSFER_LEADER_COMMAND_REPLY_CTX, the message - // is a reply to a transfer leader command before. If the locks status remain - // in the TransferringLeader status, we can safely initiate transferring leader - // now. - // If it's not in TransferringLeader status now, it is probably because several - // ticks have passed after proposing the locks in the last time and we - // reactivate the memory locks. Then, we should propose the locks again. - if msg.get_context() == TRANSFER_LEADER_COMMAND_REPLY_CTX - && pessimistic_locks.status == LocksStatus::TransferringLeader - { - return false; - } - - // If it is not writable, it's probably because it's a retried TransferLeader - // and the locks have been proposed. But we still need to return true to - // propose another TransferLeader command. Otherwise, some write requests that - // have marked some locks as deleted will fail because raft rejects more - // proposals. - // It is OK to return true here if it's in other states like MergingRegion or - // NotLeader. In those cases, the locks will fail to propose and nothing will - // happen. - if !pessimistic_locks.is_writable() { - return true; - } - pessimistic_locks.status = LocksStatus::TransferringLeader; - self.add_pending_tick(PeerTick::ReactivateMemoryLock); - - // 2. Propose pessimistic locks - if pessimistic_locks.is_empty() { - return false; - } - // FIXME: Raft command has size limit. Either limit the total size of - // pessimistic locks in a region, or split commands here. - let mut encoder = SimpleWriteEncoder::with_capacity(512); - let mut lock_count = 0; - { - // Downgrade to a read guard, do not block readers in the scheduler as far as - // possible. - let pessimistic_locks = RwLockWriteGuard::downgrade(pessimistic_locks); - fail_point!("invalidate_locks_before_transfer_leader"); - for (key, (lock, deleted)) in &*pessimistic_locks { - if *deleted { - continue; - } - lock_count += 1; - encoder.put(CF_LOCK, key.as_encoded(), &lock.to_lock().to_bytes()); - } - } - if lock_count == 0 { - // If the map is not empty but all locks are deleted, it is possible that a - // write command has just marked locks deleted but not proposed yet. - // It might cause that command to fail if we skip proposing the - // extra TransferLeader command here. - return true; - } - let mut header = Box::::default(); - header.set_region_id(self.region_id()); - header.set_region_epoch(self.region().get_region_epoch().clone()); - header.set_peer(self.peer().clone()); - info!( - self.logger, - "propose {} locks before transferring leader", lock_count; - ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; - self.on_simple_write(ctx, write.header, write.data, write.ch); - true - } } impl Apply { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 807f425e998..76baf31f9c8 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -5,6 +5,7 @@ mod life; mod pd; mod query; mod ready; +mod txn_ext; pub use command::{ AdminCmdResult, ApplyFlowControl, CommittedEntries, CompactLogContext, ProposalControl, @@ -20,4 +21,5 @@ pub use ready::{ pub(crate) use self::{ command::SplitInit, query::{LocalReader, ReadDelegatePair, SharedReadTablet}, + txn_ext::TxnContext, }; diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 26945a3e176..17abdd85cf0 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -205,20 +205,4 @@ impl Peer { ); } } - - #[inline] - pub fn update_max_timestamp_pd(&self, ctx: &StoreContext, initial_status: u64) { - let task = pd::Task::UpdateMaxTimestamp { - region_id: self.region_id(), - initial_status, - txn_ext: self.txn_ext().clone(), - }; - if let Err(e) = ctx.schedulers.pd.schedule(task) { - error!( - self.logger, - "failed to notify pd with UpdateMaxTimestamp"; - "err" => %e, - ); - } - } } diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index e4c0aa6d0b9..13b815d1ebc 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -2,6 +2,7 @@ // #[PerformanceCriticalPath] use std::{ + num::NonZeroU64, ops::Deref, sync::{atomic, Arc, Mutex}, }; @@ -246,6 +247,8 @@ where }; snap.txn_ext = Some(delegate.txn_ext.clone()); + snap.term = NonZeroU64::new(delegate.term); + snap.txn_extra_op = delegate.txn_extra_op.load(); snap.bucket_meta = delegate.bucket_meta.clone(); delegate.cached_tablet.release(); @@ -945,6 +948,7 @@ mod tests { assert_eq!(read_progress.safe_ts(), 2); let snap = block_on(reader.snapshot(cmd.clone())).unwrap(); assert_eq!(*snap.get_region(), region1); + assert_eq!(snap.term, NonZeroU64::new(term6)); drop(mix_tx); handler.join().unwrap(); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index d1348cf014b..87e1c100a87 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -728,12 +728,12 @@ impl Peer { // latency. self.raft_group_mut().skip_bcast_commit(false); - // Init the in-memory pessimistic lock table when the peer becomes leader. - self.activate_in_memory_pessimistic_locks(); - - // A more recent read may happen on the old leader. So max ts should - // be updated after a peer becomes leader. - self.require_updating_max_ts(ctx); + self.txn_context().on_became_leader( + ctx, + self.term(), + self.region(), + &self.logger, + ); // Exit entry cache warmup state when the peer becomes leader. self.entry_storage_mut().clear_entry_cache_warmup_state(); @@ -746,7 +746,8 @@ impl Peer { StateRole::Follower => { self.leader_lease_mut().expire(); self.storage_mut().cancel_generating_snap(None); - self.clear_in_memory_pessimistic_locks(); + self.txn_context() + .on_became_follower(self.term(), self.region()); } _ => {} } diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs new file mode 100644 index 00000000000..911c1eaab78 --- /dev/null +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -0,0 +1,260 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains everything related to transaction hook. +//! +//! This is the temporary (efficient) solution, it should be implemented as one +//! type of coprocessor. + +use std::sync::{atomic::Ordering, Arc}; + +use crossbeam::atomic::AtomicCell; +use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; +use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::RaftRequestHeader}; +use parking_lot::RwLockWriteGuard; +use raft::eraftpb; +use raftstore::store::{ + LocksStatus, PeerPessimisticLocks, TxnExt, TRANSFER_LEADER_COMMAND_REPLY_CTX, +}; +use slog::{error, info, Logger}; + +use crate::{ + batch::StoreContext, + raft::Peer, + router::{PeerMsg, PeerTick}, + worker::pd, + SimpleWriteEncoder, +}; + +pub struct TxnContext { + ext: Arc, + extra_op: Arc>, + reactivate_memory_lock_ticks: usize, +} + +impl Default for TxnContext { + #[inline] + fn default() -> Self { + Self { + ext: Arc::default(), + extra_op: Arc::new(AtomicCell::new(ExtraOp::Noop)), + reactivate_memory_lock_ticks: 0, + } + } +} + +impl TxnContext { + #[inline] + pub fn on_region_changed(&self, term: u64, region: &Region) { + let mut pessimistic_locks = self.ext.pessimistic_locks.write(); + pessimistic_locks.term = term; + pessimistic_locks.version = region.get_region_epoch().get_version(); + } + + #[inline] + pub fn on_became_leader( + &self, + ctx: &mut StoreContext, + term: u64, + region: &Region, + logger: &Logger, + ) { + // A more recent read may happen on the old leader. So max ts should + // be updated after a peer becomes leader. + self.require_updating_max_ts(ctx, term, region, logger); + + // Init the in-memory pessimistic lock table when the peer becomes leader. + let mut pessimistic_locks = self.ext.pessimistic_locks.write(); + pessimistic_locks.status = LocksStatus::Normal; + pessimistic_locks.term = term; + pessimistic_locks.version = region.get_region_epoch().get_version(); + } + + #[inline] + pub fn on_became_follower(&self, term: u64, region: &Region) { + let mut pessimistic_locks = self.ext.pessimistic_locks.write(); + pessimistic_locks.status = LocksStatus::NotLeader; + pessimistic_locks.clear(); + pessimistic_locks.term = term; + pessimistic_locks.version = region.get_region_epoch().get_version(); + } + + #[inline] + pub fn ext(&self) -> &Arc { + &self.ext + } + + #[inline] + pub fn extra_op(&self) -> &Arc> { + &self.extra_op + } + + // TODO: find a better place to put all txn related stuff. + fn require_updating_max_ts( + &self, + ctx: &StoreContext, + term: u64, + region: &Region, + logger: &Logger, + ) where + EK: KvEngine, + ER: RaftEngine, + { + let epoch = region.get_region_epoch(); + let term_low_bits = term & ((1 << 32) - 1); // 32 bits + let version_lot_bits = epoch.get_version() & ((1 << 31) - 1); // 31 bits + let initial_status = (term_low_bits << 32) | (version_lot_bits << 1); + self.ext + .max_ts_sync_status + .store(initial_status, Ordering::SeqCst); + info!( + logger, + "require updating max ts"; + "initial_status" => initial_status, + ); + let task = pd::Task::UpdateMaxTimestamp { + region_id: region.get_id(), + initial_status, + txn_ext: self.ext.clone(), + }; + if let Err(e) = ctx.schedulers.pd.schedule(task) { + error!(logger, "failed to notify pd with UpdateMaxTimestamp"; "err" => ?e); + } + } + + pub fn split(&self, regions: &[Region], derived: &Region) -> Vec { + // Group in-memory pessimistic locks in the original region into new regions. + // The locks of new regions will be put into the corresponding new regions + // later. And the locks belonging to the old region will stay in the original + // map. + let mut pessimistic_locks = self.ext.pessimistic_locks.write(); + // Update the version so the concurrent reader will fail due to EpochNotMatch + // instead of PessimisticLockNotFound. + pessimistic_locks.version = derived.get_region_epoch().get_version(); + pessimistic_locks.group_by_regions(regions, derived) + } + + pub fn init_with_lock(&self, locks: PeerPessimisticLocks) { + let mut pessimistic_locks = self.ext.pessimistic_locks.write(); + *pessimistic_locks = locks; + } +} + +impl Peer { + /// Returns True means the tick is consumed, otherwise the tick should be + /// rescheduled. + pub fn on_reactivate_memory_lock_tick(&mut self, ctx: &mut StoreContext) { + // If it is not leader, we needn't reactivate by tick. In-memory pessimistic + // lock will be enabled when this region becomes leader again. + if !self.is_leader() { + return; + } + + let transferring_leader = self.raft_group().raft.lead_transferee.is_some(); + let txn_context = self.txn_context_mut(); + let mut pessimistic_locks = txn_context.ext.pessimistic_locks.write(); + + // And this tick is currently only used for the leader transfer failure case. + if pessimistic_locks.status != LocksStatus::TransferringLeader { + return; + } + + txn_context.reactivate_memory_lock_ticks += 1; + // `lead_transferee` is not set immediately after the lock status changes. So, + // we need the tick count condition to avoid reactivating too early. + if !transferring_leader + && txn_context.reactivate_memory_lock_ticks >= ctx.cfg.reactive_memory_lock_timeout_tick + { + pessimistic_locks.status = LocksStatus::Normal; + txn_context.reactivate_memory_lock_ticks = 0; + } else { + drop(pessimistic_locks); + self.add_pending_tick(PeerTick::ReactivateMemoryLock); + } + } + + // Returns whether we should propose another TransferLeader command. This is + // for: + // - Considering the amount of pessimistic locks can be big, it can reduce + // unavailable time caused by waiting for the transferee catching up logs. + // - Make transferring leader strictly after write commands that executes before + // proposing the locks, preventing unexpected lock loss. + pub fn propose_locks_before_transfer_leader( + &mut self, + ctx: &mut StoreContext, + msg: &eraftpb::Message, + ) -> bool { + // 1. Disable in-memory pessimistic locks. + + // Clone to make borrow checker happy when registering ticks. + let txn_ext = self.txn_context().ext.clone(); + let mut pessimistic_locks = txn_ext.pessimistic_locks.write(); + + // If the message context == TRANSFER_LEADER_COMMAND_REPLY_CTX, the message + // is a reply to a transfer leader command before. If the locks status remain + // in the TransferringLeader status, we can safely initiate transferring leader + // now. + // If it's not in TransferringLeader status now, it is probably because several + // ticks have passed after proposing the locks in the last time and we + // reactivate the memory locks. Then, we should propose the locks again. + if msg.get_context() == TRANSFER_LEADER_COMMAND_REPLY_CTX + && pessimistic_locks.status == LocksStatus::TransferringLeader + { + return false; + } + + // If it is not writable, it's probably because it's a retried TransferLeader + // and the locks have been proposed. But we still need to return true to + // propose another TransferLeader command. Otherwise, some write requests that + // have marked some locks as deleted will fail because raft rejects more + // proposals. + // It is OK to return true here if it's in other states like MergingRegion or + // NotLeader. In those cases, the locks will fail to propose and nothing will + // happen. + if !pessimistic_locks.is_writable() { + return true; + } + pessimistic_locks.status = LocksStatus::TransferringLeader; + self.txn_context_mut().reactivate_memory_lock_ticks = 0; + self.add_pending_tick(PeerTick::ReactivateMemoryLock); + + // 2. Propose pessimistic locks + if pessimistic_locks.is_empty() { + return false; + } + // FIXME: Raft command has size limit. Either limit the total size of + // pessimistic locks in a region, or split commands here. + let mut encoder = SimpleWriteEncoder::with_capacity(512); + let mut lock_count = 0; + { + // Downgrade to a read guard, do not block readers in the scheduler as far as + // possible. + let pessimistic_locks = RwLockWriteGuard::downgrade(pessimistic_locks); + fail::fail_point!("invalidate_locks_before_transfer_leader"); + for (key, (lock, deleted)) in &*pessimistic_locks { + if *deleted { + continue; + } + lock_count += 1; + encoder.put(CF_LOCK, key.as_encoded(), &lock.to_lock().to_bytes()); + } + } + if lock_count == 0 { + // If the map is not empty but all locks are deleted, it is possible that a + // write command has just marked locks deleted but not proposed yet. + // It might cause that command to fail if we skip proposing the + // extra TransferLeader command here. + return true; + } + let mut header = Box::::default(); + header.set_region_id(self.region_id()); + header.set_region_epoch(self.region().get_region_epoch().clone()); + header.set_peer(self.peer().clone()); + info!( + self.logger, + "propose {} locks before transferring leader", lock_count; + ); + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; + self.on_simple_write(ctx, write.header, write.data, write.ch); + true + } +} diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 8051066d4f9..6cfcda4da25 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -2,16 +2,15 @@ use std::{ cmp, mem, - sync::{atomic::Ordering, Arc}, + sync::Arc, time::{Duration, Instant}, }; use collections::{HashMap, HashSet}; -use crossbeam::atomic::AtomicCell; use engine_traits::{ CachedTablet, FlushState, KvEngine, RaftEngine, TabletContext, TabletRegistry, }; -use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb, pdpb, raft_serverpb::RegionLocalState}; +use kvproto::{metapb, pdpb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; use raftstore::{ @@ -19,19 +18,18 @@ use raftstore::{ store::{ fsm::ApplyMetrics, util::{Lease, RegionReadProgress}, - Config, EntryStorage, LocksStatus, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, - ReadProgress, TabletSnapManager, TxnExt, WriteTask, + Config, EntryStorage, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, ReadProgress, + TabletSnapManager, WriteTask, }, }; use slog::Logger; use super::storage::Storage; use crate::{ - batch::StoreContext, fsm::ApplyScheduler, operation::{ AsyncWriter, CompactLogContext, DestroyProgress, ProposalControl, SimpleWriteReqEncoder, - SplitFlowControl, + SplitFlowControl, TxnContext, }, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -83,8 +81,7 @@ pub struct Peer { last_region_buckets: Option, /// Transaction extensions related to this peer. - txn_ext: Arc, - txn_extra_op: Arc>, + txn_context: TxnContext, pending_ticks: Vec, @@ -173,8 +170,7 @@ impl Peer { ), region_buckets: None, last_region_buckets: None, - txn_ext: Arc::default(), - txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::Noop)), + txn_context: TxnContext::default(), proposal_control: ProposalControl::new(0), pending_ticks: Vec::new(), split_trace: vec![], @@ -261,11 +257,8 @@ impl Peer { self.read_progress .update_leader_info(self.leader_id(), self.term(), self.region()); - { - let mut pessimistic_locks = self.txn_ext.pessimistic_locks.write(); - pessimistic_locks.term = self.term(); - pessimistic_locks.version = self.region().get_region_epoch().get_version(); - } + self.txn_context + .on_region_changed(self.term(), self.region()); if self.serving() { host.on_region_changed( @@ -639,21 +632,6 @@ impl Peer { mem::take(&mut self.pending_ticks) } - pub fn activate_in_memory_pessimistic_locks(&mut self) { - let mut pessimistic_locks = self.txn_ext.pessimistic_locks.write(); - pessimistic_locks.status = LocksStatus::Normal; - pessimistic_locks.term = self.term(); - pessimistic_locks.version = self.region().get_region_epoch().get_version(); - } - - pub fn clear_in_memory_pessimistic_locks(&mut self) { - let mut pessimistic_locks = self.txn_ext.pessimistic_locks.write(); - pessimistic_locks.status = LocksStatus::NotLeader; - pessimistic_locks.clear(); - pessimistic_locks.term = self.term(); - pessimistic_locks.version = self.region().get_region_epoch().get_version(); - } - #[inline] pub fn post_split(&mut self) { self.reset_region_buckets(); @@ -678,8 +656,13 @@ impl Peer { } #[inline] - pub fn txn_ext(&self) -> &Arc { - &self.txn_ext + pub fn txn_context(&self) -> &TxnContext { + &self.txn_context + } + + #[inline] + pub fn txn_context_mut(&mut self) -> &mut TxnContext { + &mut self.txn_context } pub fn generate_read_delegate(&self) -> ReadDelegate { @@ -690,8 +673,8 @@ impl Peer { self.term(), self.region().clone(), self.storage().entry_storage().applied_term(), - self.txn_extra_op.clone(), - self.txn_ext.clone(), + self.txn_context.extra_op().clone(), + self.txn_context.ext().clone(), self.read_progress().clone(), self.region_buckets.as_ref().map(|b| b.meta.clone()), ) @@ -715,19 +698,6 @@ impl Peer { .advance_apply(apply_index, term, region); } - // TODO: find a better place to put all txn related stuff. - pub fn require_updating_max_ts(&self, ctx: &StoreContext) { - let epoch = self.region().get_region_epoch(); - let term_low_bits = self.term() & ((1 << 32) - 1); // 32 bits - let version_lot_bits = epoch.get_version() & ((1 << 31) - 1); // 31 bits - let initial_status = (term_low_bits << 32) | (version_lot_bits << 1); - self.txn_ext - .max_ts_sync_status - .store(initial_status, Ordering::SeqCst); - - self.update_max_timestamp_pd(ctx, initial_status); - } - #[inline] pub fn split_trace_mut(&mut self) -> &mut Vec<(u64, HashSet)> { &mut self.split_trace From 6d163b846327a0f61c1049b97cb4b315639ce9a6 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 16 Jan 2023 19:01:49 -0800 Subject: [PATCH 0457/1149] raftstore-v2: a few small fixes (#14039) ref tikv/tikv#12842 1) add snapshot apply metrics 2) disable bloomfilter for raftkv-v2 for now until a proper ratio is found 3) disable rocksdb write stall for raftkv-v2 until the tablet flow control is fully verified. Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- components/raftstore-v2/src/operation/ready/mod.rs | 5 +++-- components/raftstore-v2/src/operation/ready/snapshot.rs | 6 ++++-- src/config/mod.rs | 7 ++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 87e1c100a87..38d126ac87a 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -31,8 +31,8 @@ use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{ - needs_evict_entry_cache, util, FetchedLogs, ReadProgress, Transport, WriteCallback, - WriteTask, + needs_evict_entry_cache, util, worker_metrics::SNAP_COUNTER, FetchedLogs, ReadProgress, + Transport, WriteCallback, WriteTask, }, }; use slog::{debug, error, info, trace, warn}; @@ -877,6 +877,7 @@ impl Storage { ctx.snap_mgr.clone(), ctx.tablet_registry.clone(), ) { + SNAP_COUNTER.apply.fail.inc(); error!(self.logger(),"failed to apply snapshot";"error" => ?e) } } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 04b6ed7e12b..bcbe220252b 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -36,8 +36,9 @@ use raft::{eraftpb::Snapshot, StateRole}; use raftstore::{ coprocessor::RegionChangeEvent, store::{ - metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, GenSnapRes, ReadTask, TabletSnapKey, - TabletSnapManager, Transport, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + metrics::STORE_SNAPSHOT_VALIDATION_FAILURE_COUNTER, worker_metrics::SNAP_COUNTER, + GenSnapRes, ReadTask, TabletSnapKey, TabletSnapManager, Transport, WriteTask, + RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, }; use slog::{error, info, warn}; @@ -252,6 +253,7 @@ impl Peer { !s.scheduled || snapshot_index != RAFT_INIT_LOG_INDEX }) { info!(self.logger, "apply tablet snapshot completely"); + SNAP_COUNTER.apply.success.inc(); } if let Some(init) = split { info!(self.logger, "init split with snapshot finished"); diff --git a/src/config/mod.rs b/src/config/mod.rs index 8d3e5477f26..9caa68d8e6b 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -116,7 +116,8 @@ fn bloom_filter_ratio(et: EngineType) -> f64 { EngineType::RaftKv => 0.1, // In v2, every peer has its own tablet. The data scale is about tens of // GiBs. We only need a small portion for those key. - EngineType::RaftKv2 => 0.005, + // TODO: disable it for now until find out the proper ratio + EngineType::RaftKv2 => 0.0, } } @@ -1229,6 +1230,10 @@ impl DbConfig { self.write_buffer_limit.get_or_insert(ReadableSize( (total_mem * WRITE_BUFFER_MEMORY_LIMIT_RATE) as u64, )); + self.defaultcf.disable_write_stall = true; + self.writecf.disable_write_stall = true; + self.lockcf.disable_write_stall = true; + self.raftcf.disable_write_stall = true; } } } From a463db0911b4a2f2f47a29b567c54338a7ff3876 Mon Sep 17 00:00:00 2001 From: Zwb Date: Tue, 17 Jan 2023 14:51:48 +0800 Subject: [PATCH 0458/1149] apply: fix witness raft log gc panic and refactor (#14054) ref tikv/tikv#12876 fix witness raft log gc panic and refactor Signed-off-by: Wenbo Zhang Co-authored-by: Xinye Tao --- components/raftstore/src/store/config.rs | 7 ++ components/raftstore/src/store/fsm/apply.rs | 77 ++++++++++++--------- components/raftstore/src/store/fsm/peer.rs | 35 ++++------ tests/failpoints/cases/test_witness.rs | 8 +++ tests/integrations/config/mod.rs | 1 + 5 files changed, 74 insertions(+), 54 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 454cf61a4c8..4d3210318a6 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -68,6 +68,9 @@ pub struct Config { pub raft_log_compact_sync_interval: ReadableDuration, // Interval to gc unnecessary raft log. pub raft_log_gc_tick_interval: ReadableDuration, + // Interval to request voter_replicated_index for gc unnecessary raft log, + // if the leader has not initiated gc for a long time. + pub request_voter_replicated_index_interval: ReadableDuration, // A threshold to gc stale raft log, must >= 1. pub raft_log_gc_threshold: u64, // When entry count exceed this value, gc will be forced trigger. @@ -339,6 +342,7 @@ impl Default for Config { raft_entry_max_size: ReadableSize::mb(8), raft_log_compact_sync_interval: ReadableDuration::secs(2), raft_log_gc_tick_interval: ReadableDuration::secs(3), + request_voter_replicated_index_interval: ReadableDuration::minutes(5), raft_log_gc_threshold: 50, raft_log_gc_count_limit: None, raft_log_gc_size_limit: None, @@ -813,6 +817,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["raft_log_gc_tick_interval"]) .set(self.raft_log_gc_tick_interval.as_secs_f64()); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["request_voter_replicated_index_interval"]) + .set(self.request_voter_replicated_index_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["raft_log_gc_threshold"]) .set(self.raft_log_gc_threshold as f64); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 58df32fd404..60ed35e6892 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -258,6 +258,7 @@ pub enum ExecResult { CompactLog { state: RaftTruncatedState, first_index: u64, + has_pending: bool, }, SplitRegion { regions: Vec, @@ -300,7 +301,12 @@ pub enum ExecResult { SetFlashbackState { region: Region, }, - PendingCompactCmd, + // The raftstore thread will use it to update the internal state of `PeerFsm`. If it is + // `true`, when the raftstore detects that the raft log has not been gc for a long time, + // the raftstore thread will actively pull the `voter_replicated_index` from the leader + // and try to compact pending gc. If false, raftstore does not do any additional + // processing. + HasPendingCompactCmd(bool), } /// The possible returned value when applying logs. @@ -1508,7 +1514,7 @@ where | ExecResult::DeleteRange { .. } | ExecResult::IngestSst { .. } | ExecResult::TransferLeader { .. } - | ExecResult::PendingCompactCmd => {} + | ExecResult::HasPendingCompactCmd(..) => {} ExecResult::SplitRegion { ref derived, .. } => { self.region = derived.clone(); self.metrics.size_diff_hint = 0; @@ -2966,11 +2972,13 @@ where )) } + // When the first return value is true, it means that we have updated + // `RaftApplyState`, and the caller needs to do persistence. fn try_compact_log( &mut self, voter_replicated_index: u64, voter_replicated_term: u64, - ) -> Result>> { + ) -> Result<(bool, Option>)> { PEER_ADMIN_CMD_COUNTER.compact.all.inc(); let first_index = entry_storage::first_index(&self.apply_state); @@ -2981,7 +2989,7 @@ where "peer_id" => self.id(), "voter_replicated_index" => voter_replicated_index, ); - return Ok(None); + return Ok((false, None)); } // When the witness restarted, the pending compact cmd has been lost, so use @@ -2995,11 +3003,7 @@ where "compact_index" => voter_replicated_index, "first_index" => first_index, ); - return Ok(Some(TaskRes::Compact { - state: self.apply_state.get_truncated_state().clone(), - first_index: 0, - has_pending: false, - })); + return Ok((false, Some(ExecResult::HasPendingCompactCmd(false)))); } // compact failure is safe to be omitted, no need to assert. compact_raft_log( @@ -3009,11 +3013,7 @@ where voter_replicated_term, )?; PEER_ADMIN_CMD_COUNTER.compact.success.inc(); - return Ok(Some(TaskRes::Compact { - state: self.apply_state.get_truncated_state().clone(), - first_index, - has_pending: false, - })); + return Ok((true, Some(ExecResult::HasPendingCompactCmd(false)))); } match self.pending_cmds.pop_compact(voter_replicated_index) { @@ -3021,11 +3021,14 @@ where // compact failure is safe to be omitted, no need to assert. compact_raft_log(&self.tag, &mut self.apply_state, cmd.index, cmd.term)?; PEER_ADMIN_CMD_COUNTER.compact.success.inc(); - Ok(Some(TaskRes::Compact { - state: self.apply_state.get_truncated_state().clone(), - first_index, - has_pending: self.pending_cmds.has_compact(), - })) + Ok(( + true, + Some(ExecResult::CompactLog { + state: self.apply_state.get_truncated_state().clone(), + first_index, + has_pending: self.pending_cmds.has_compact(), + }), + )) } None => { info!( @@ -3034,7 +3037,7 @@ where "peer_id" => self.id(), "voter_replicated_index" => voter_replicated_index, ); - Ok(None) + Ok((false, None)) } } } @@ -3109,7 +3112,10 @@ where "peer_id" => self.id(), "command" => ?req.get_compact_log() ); - return Ok((resp, ApplyResult::Res(ExecResult::PendingCompactCmd))); + return Ok(( + resp, + ApplyResult::Res(ExecResult::HasPendingCompactCmd(true)), + )); } } } else { @@ -3133,6 +3139,7 @@ where ApplyResult::Res(ExecResult::CompactLog { state: self.apply_state.get_truncated_state().clone(), first_index, + has_pending: self.pending_cmds.has_compact(), }), )) } @@ -3693,11 +3700,6 @@ where // Whether destroy request is from its target region's snapshot merge_from_snapshot: bool, }, - Compact { - state: RaftTruncatedState, - first_index: u64, - has_pending: bool, - }, } pub struct ApplyFsm @@ -4109,18 +4111,29 @@ where voter_replicated_index: u64, voter_replicated_term: u64, ) { + if self.delegate.pending_remove || self.delegate.stopped { + return; + } + let res = self .delegate .try_compact_log(voter_replicated_index, voter_replicated_term); match res { - Ok(res) => { + Ok((should_write, res)) => { if let Some(res) = res { + if ctx.timer.is_none() { + ctx.timer = Some(Instant::now_coarse()); + } ctx.prepare_for(&mut self.delegate); - self.delegate.write_apply_state(ctx.kv_wb_mut()); - ctx.commit_opt(&mut self.delegate, true); - ctx.finish_for(&mut self.delegate, VecDeque::new()); - ctx.notifier - .notify_one(self.delegate.region_id(), PeerMsg::ApplyRes { res }); + let mut result = VecDeque::new(); + // If modified `truncated_state` in `try_compact_log`, the apply state should be + // persisted. + if should_write { + self.delegate.write_apply_state(ctx.kv_wb_mut()); + ctx.commit_opt(&mut self.delegate, true); + } + result.push_back(res); + ctx.finish_for(&mut self.delegate, result); } } Err(e) => error!(?e; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 7e00798b6df..ccde4b031ef 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2311,21 +2311,6 @@ where *is_ready = true; } } - ApplyTaskRes::Compact { - state, - first_index, - has_pending, - } => { - self.fsm.peer.has_pending_compact_cmd = has_pending; - // When the witness restarts, the pending compact cmds will be lost. We will try - // to use `voter_replicated_index` as the `compact index` to avoid log - // accumulation, but if `voter_replicated_index` is less than `first_index`, - // then gc is not needed. In this case, the `first_index` we pass back will be - // 0, and `has_pending` set to false. - if first_index != 0 { - self.on_ready_compact_log(first_index, state); - } - } } if self.fsm.peer.unsafe_recovery_state.is_some() { self.check_unsafe_recovery_state(); @@ -4933,8 +4918,13 @@ where while let Some(result) = exec_results.pop_front() { match result { ExecResult::ChangePeer(cp) => self.on_ready_change_peer(cp), - ExecResult::CompactLog { first_index, state } => { - self.on_ready_compact_log(first_index, state) + ExecResult::CompactLog { + state, + first_index, + has_pending, + } => { + self.fsm.peer.has_pending_compact_cmd = has_pending; + self.on_ready_compact_log(first_index, state); } ExecResult::SplitRegion { derived, @@ -4969,9 +4959,11 @@ where ExecResult::IngestSst { ssts } => self.on_ingest_sst_result(ssts), ExecResult::TransferLeader { term } => self.on_transfer_leader(term), ExecResult::SetFlashbackState { region } => self.on_set_flashback_state(region), - ExecResult::PendingCompactCmd => { - self.fsm.peer.has_pending_compact_cmd = true; - self.register_pull_voter_replicated_index_tick(); + ExecResult::HasPendingCompactCmd(has_pending) => { + self.fsm.peer.has_pending_compact_cmd = has_pending; + if has_pending { + self.register_pull_voter_replicated_index_tick(); + } } } } @@ -5530,9 +5522,8 @@ where if !self.fsm.peer.is_witness() || !self.fsm.peer.has_pending_compact_cmd { return; } - // TODO: make it configurable if self.fsm.peer.last_compacted_time.elapsed() - > self.ctx.cfg.raft_log_gc_tick_interval.0 * 2 + > self.ctx.cfg.request_voter_replicated_index_interval.0 { let mut msg = ExtraMessage::default(); msg.set_type(ExtraMessageType::MsgVoterReplicatedIndexRequest); diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index 98a845b7016..552434d1fed 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -78,6 +78,10 @@ fn test_witness_raftlog_gc_pull_voter_replicated_index() { let mut cluster = new_server_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(50); + cluster + .cfg + .raft_store + .request_voter_replicated_index_interval = ReadableDuration::millis(100); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -155,6 +159,10 @@ fn test_witness_raftlog_gc_after_reboot() { let mut cluster = new_server_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(50); + cluster + .cfg + .raft_store + .request_voter_replicated_index_interval = ReadableDuration::millis(100); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 0c6cf7cdd9c..a4e15b8fa6e 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -180,6 +180,7 @@ fn test_serde_custom_tikv_config() { raft_entry_max_size: ReadableSize::mb(12), raft_log_compact_sync_interval: ReadableDuration::secs(12), raft_log_gc_tick_interval: ReadableDuration::secs(12), + request_voter_replicated_index_interval: ReadableDuration::minutes(5), raft_log_gc_threshold: 12, raft_log_gc_count_limit: Some(12), raft_log_gc_size_limit: Some(ReadableSize::kb(1)), From 5235542066f3cd41d02581c6ee064159938f545e Mon Sep 17 00:00:00 2001 From: iosmanthus Date: Tue, 17 Jan 2023 21:05:50 +0800 Subject: [PATCH 0459/1149] copr: support handling keyspace request (#14027) ref tikv/tikv#12999 copr: support handling keyspace request Signed-off-by: iosmanthus --- Cargo.lock | 4 + components/api_version/Cargo.toml | 1 + components/api_version/src/keyspace.rs | 163 +++++++++++++++ components/api_version/src/lib.rs | 6 +- components/test_backup/src/lib.rs | 9 +- components/tidb_query_common/Cargo.toml | 1 + .../tidb_query_common/src/storage/scanner.rs | 187 +++++++++--------- components/tidb_query_datatype/Cargo.toml | 1 + .../tidb_query_datatype/src/codec/table.rs | 17 +- components/tidb_query_executors/Cargo.toml | 1 + .../src/index_scan_executor.rs | 39 ++-- components/tidb_query_executors/src/runner.rs | 11 +- .../src/table_scan_executor.rs | 35 ++-- .../src/util/scan_executor.rs | 16 +- src/coprocessor/checksum.rs | 8 +- src/coprocessor/dag/mod.rs | 15 +- src/coprocessor/endpoint.rs | 20 +- src/coprocessor/statistics/analyze.rs | 46 +++-- .../coprocessor_executors/index_scan/util.rs | 3 +- .../coprocessor_executors/integrated/util.rs | 3 +- .../coprocessor_executors/table_scan/util.rs | 3 +- .../benches/coprocessor_executors/util/mod.rs | 3 +- .../integrations/coprocessor/test_checksum.rs | 10 +- 23 files changed, 414 insertions(+), 188 deletions(-) create mode 100644 components/api_version/src/keyspace.rs diff --git a/Cargo.lock b/Cargo.lock index 0b7ca52725c..069dbc4950e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,6 +84,7 @@ dependencies = [ "codec", "engine_traits", "kvproto", + "log_wrappers", "match-template", "panic_hook", "thiserror", @@ -6103,6 +6104,7 @@ name = "tidb_query_common" version = "0.0.1" dependencies = [ "anyhow", + "api_version", "async-trait", "byteorder", "derive_more", @@ -6124,6 +6126,7 @@ dependencies = [ name = "tidb_query_datatype" version = "0.0.1" dependencies = [ + "api_version", "base64", "bitfield", "bitflags", @@ -6164,6 +6167,7 @@ name = "tidb_query_executors" version = "0.0.1" dependencies = [ "anyhow", + "api_version", "async-trait", "codec", "collections", diff --git a/components/api_version/Cargo.toml b/components/api_version/Cargo.toml index 7362ca25ccc..c80607145bd 100644 --- a/components/api_version/Cargo.toml +++ b/components/api_version/Cargo.toml @@ -12,6 +12,7 @@ bitflags = "1.0.1" codec = { workspace = true } engine_traits = { workspace = true } kvproto = { workspace = true } +log_wrappers = { workspace = true } match-template = "0.0.1" thiserror = "1.0" tikv_alloc = { workspace = true } diff --git a/components/api_version/src/keyspace.rs b/components/api_version/src/keyspace.rs new file mode 100644 index 00000000000..4b263822a1b --- /dev/null +++ b/components/api_version/src/keyspace.rs @@ -0,0 +1,163 @@ +use std::fmt::Debug; + +use engine_traits::{Error, Result}; +use tikv_util::box_err; + +use super::*; + +const KEYSPACE_PREFIX_LEN: usize = 4; + +pub trait KvPair { + fn key(&self) -> &[u8]; + fn value(&self) -> &[u8]; + fn kv(&self) -> (&[u8], &[u8]) { + (self.key(), self.value()) + } +} + +impl KvPair for (Vec, Vec) { + fn key(&self) -> &[u8] { + &self.0 + } + fn value(&self) -> &[u8] { + &self.1 + } +} + +pub trait Keyspace { + type KvPair: KvPair = (Vec, Vec); + fn make_kv_pair(p: (Vec, Vec)) -> Result; + fn parse_keyspace(key: &[u8]) -> Result<(Option, &[u8])> { + Ok((None, key)) + } +} + +#[derive(PartialEq, Clone, Copy, Debug)] +pub struct KeyspaceId(u32); + +impl From for KeyspaceId { + fn from(id: u32) -> Self { + Self(id) + } +} + +impl Keyspace for ApiV1 { + fn make_kv_pair(p: (Vec, Vec)) -> Result { + Ok(p) + } +} + +impl Keyspace for ApiV1Ttl { + fn make_kv_pair(p: (Vec, Vec)) -> Result { + Ok(p) + } +} + +impl Keyspace for ApiV2 { + type KvPair = KeyspaceKv; + + fn make_kv_pair(p: (Vec, Vec)) -> Result { + let (k, v) = p; + let (keyspace, _) = Self::parse_keyspace(&k)?; + Ok(KeyspaceKv { + k, + v, + keyspace: keyspace.unwrap(), + }) + } + + fn parse_keyspace(key: &[u8]) -> Result<(Option, &[u8])> { + let mode = ApiV2::parse_key_mode(key); + if key.len() < KEYSPACE_PREFIX_LEN || (mode != KeyMode::Raw && mode != KeyMode::Txn) { + return Err(Error::Other(box_err!( + "invalid API V2 key: {}", + log_wrappers::Value(key) + ))); + } + let id = u32::from_be_bytes([0, key[1], key[2], key[3]]); + Ok((Some(KeyspaceId::from(id)), &key[KEYSPACE_PREFIX_LEN..])) + } +} + +pub struct KeyspaceKv { + k: Vec, + v: Vec, + keyspace: KeyspaceId, +} + +impl KvPair for KeyspaceKv { + fn key(&self) -> &[u8] { + &self.k[KEYSPACE_PREFIX_LEN..] + } + + fn value(&self) -> &[u8] { + &self.v + } +} + +impl KeyspaceKv { + pub fn keyspace(&self) -> KeyspaceId { + self.keyspace + } +} + +impl PartialEq<(Vec, Vec)> for KeyspaceKv { + fn eq(&self, other: &(Vec, Vec)) -> bool { + self.kv() == (&other.0, &other.1) + } +} + +impl PartialEq for KeyspaceKv { + fn eq(&self, other: &Self) -> bool { + self.k == other.k && self.v == other.v + } +} + +impl Debug for KeyspaceKv { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KeyspaceKv") + .field("key", &log_wrappers::Value(self.key())) + .field("value", &log_wrappers::Value(self.value())) + .field("keyspace", &self.keyspace()) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_v1_parse_keyspace() { + let k = b"t123_111"; + let (keyspace, key) = ApiV1::parse_keyspace(k).unwrap(); + assert_eq!(None, keyspace); + assert_eq!(k, key); + + let (keyspace, key) = ApiV1Ttl::parse_keyspace(k).unwrap(); + assert_eq!(None, keyspace); + assert_eq!(k, key); + } + + #[test] + fn test_v2_parse_keyspace() { + let ok = vec![ + (b"x\x00\x00\x01t123_114", 1, b"t123_114"), + (b"r\x00\x00\x01t123_112", 1, b"t123_112"), + (b"x\x01\x00\x00t213_112", 0x010000, b"t213_112"), + (b"r\x01\x00\x00t123_113", 0x010000, b"t123_113"), + ]; + + for (key, id, user_key) in ok { + let (keyspace, key) = ApiV2::parse_keyspace(key).unwrap(); + assert_eq!(Some(KeyspaceId::from(id)), keyspace); + assert_eq!(user_key, key); + } + + let err: Vec<&[u8]> = vec![b"t123_111", b"s\x00\x00", b"r\x00\x00"]; + + for key in err { + ApiV2::parse_keyspace(key).unwrap_err(); + } + } +} diff --git a/components/api_version/src/lib.rs b/components/api_version/src/lib.rs index 0c9ae388917..879751e7b62 100644 --- a/components/api_version/src/lib.rs +++ b/components/api_version/src/lib.rs @@ -1,17 +1,21 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. #![feature(min_specialization)] +#![feature(associated_type_defaults)] mod api_v1; mod api_v1ttl; pub mod api_v2; +pub mod keyspace; use engine_traits::Result; use kvproto::kvrpcpb::ApiVersion; pub use match_template::match_template; use txn_types::{Key, TimeStamp}; -pub trait KvFormat: Clone + Copy + 'static + Send + Sync { +use crate::keyspace::Keyspace; + +pub trait KvFormat: Keyspace + Clone + Copy + 'static + Send + Sync { const TAG: ApiVersion; /// Corresponding TAG of client requests. For test only. #[cfg(any(test, feature = "testexport"))] diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index e990924c638..3409a6ef366 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -8,7 +8,7 @@ use std::{ time::Duration, }; -use api_version::{dispatch_api_version, KvFormat, RawValue}; +use api_version::{dispatch_api_version, keyspace::KvPair, ApiV1, KvFormat, RawValue}; use backup::Task; use collections::HashMap; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; @@ -354,7 +354,7 @@ impl TestSuite { Default::default(), false, ); - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: TikvStorage::new(snap_store, false), ranges: vec![Range::Interval(IntervalRange::from((start, end)))], scan_backward_in_range: false, @@ -362,8 +362,9 @@ impl TestSuite { is_scanned_range_aware: false, }); let digest = crc64fast::Digest::new(); - while let Some((k, v)) = block_on(scanner.next()).unwrap() { - checksum = checksum_crc64_xor(checksum, digest.clone(), &k, &v); + while let Some(row) = block_on(scanner.next()).unwrap() { + let (k, v) = row.kv(); + checksum = checksum_crc64_xor(checksum, digest.clone(), k, v); total_kvs += 1; total_bytes += (k.len() + v.len()) as u64; } diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index 3dd1693ba0d..f192b22a5f6 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -7,6 +7,7 @@ description = "Common utility of a query engine to run TiDB pushed down executor [dependencies] anyhow = "1.0" +api_version = { workspace = true } async-trait = "0.1" derive_more = "0.99.3" error_code = { workspace = true } diff --git a/components/tidb_query_common/src/storage/scanner.rs b/components/tidb_query_common/src/storage/scanner.rs index e12659f329b..d0d2345a09e 100644 --- a/components/tidb_query_common/src/storage/scanner.rs +++ b/components/tidb_query_common/src/storage/scanner.rs @@ -1,7 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{marker::PhantomData, time::Duration}; +use api_version::KvFormat; use tikv_util::time::Instant; use yatp::task::future::reschedule; @@ -17,7 +18,7 @@ const CHECK_KEYS: usize = 32; /// A scanner that scans over multiple ranges. Each range can be a point range /// containing only one row, or an interval range containing multiple rows. -pub struct RangesScanner { +pub struct RangesScanner { storage: T, ranges_iter: RangesIterator, @@ -34,6 +35,8 @@ pub struct RangesScanner { working_range_begin_key: Vec, working_range_end_key: Vec, rescheduler: RescheduleChecker, + + _phantom: PhantomData, } // TODO: maybe it's better to make it generic to avoid directly depending @@ -72,7 +75,7 @@ pub struct RangesScannerOptions { pub is_scanned_range_aware: bool, // TODO: This can be const generics } -impl RangesScanner { +impl RangesScanner { pub fn new( RangesScannerOptions { storage, @@ -81,7 +84,7 @@ impl RangesScanner { is_key_only, is_scanned_range_aware, }: RangesScannerOptions, - ) -> RangesScanner { + ) -> RangesScanner { let ranges_len = ranges.len(); let ranges_iter = RangesIterator::new(ranges); RangesScanner { @@ -98,13 +101,14 @@ impl RangesScanner { working_range_begin_key: Vec::with_capacity(KEY_BUFFER_CAPACITY), working_range_end_key: Vec::with_capacity(KEY_BUFFER_CAPACITY), rescheduler: RescheduleChecker::new(), + _phantom: PhantomData, } } /// Fetches next row. // Note: This is not implemented over `Iterator` since it can fail. // TODO: Change to use reference to avoid allocation and copy. - pub async fn next(&mut self) -> Result, StorageError> { + pub async fn next(&mut self) -> Result, StorageError> { self.next_opt(true).await } @@ -114,7 +118,7 @@ impl RangesScanner { pub async fn next_opt( &mut self, update_scanned_range: bool, - ) -> Result, StorageError> { + ) -> Result, StorageError> { loop { let mut force_check = true; let range = self.ranges_iter.next(); @@ -150,14 +154,14 @@ impl RangesScanner { if self.is_scanned_range_aware && update_scanned_range { self.update_scanned_range_from_scanned_row(&some_row); } - if some_row.is_some() { + if let Some(row) = some_row { // Retrieved one row from point range or interval range. if let Some(r) = self.scanned_rows_per_range.last_mut() { *r += 1; } self.rescheduler.check_reschedule(force_check).await; - - return Ok(some_row); + let kv = F::make_kv_pair(row).map_err(|e| StorageError(anyhow::Error::from(e)))?; + return Ok(Some(kv)); } else { // No more row in the range. self.ranges_iter.notify_drained(); @@ -288,6 +292,7 @@ impl RangesScanner { #[cfg(test)] mod tests { + use api_version::{keyspace::KvPair, ApiV1}; use futures::executor::block_on; use super::*; @@ -315,7 +320,7 @@ mod tests { PointRange::from("foo_3").into(), IntervalRange::from(("a", "c")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: false, @@ -323,24 +328,24 @@ mod tests { is_scanned_range_aware: false, }); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo".to_vec(), b"1".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo".to_vec(), b"1".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo_2".to_vec(), b"3".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo_2".to_vec(), b"3".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo_3".to_vec(), b"5".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo_3".to_vec(), b"5".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"bar".to_vec(), b"2".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"bar".to_vec(), b"2".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"bar_2".to_vec(), b"4".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"bar_2".to_vec(), b"4".to_vec()) ); assert_eq!(block_on(scanner.next()).unwrap(), None); @@ -351,7 +356,7 @@ mod tests { PointRange::from("foo_3").into(), IntervalRange::from(("a", "bar_2")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: true, @@ -359,20 +364,20 @@ mod tests { is_scanned_range_aware: false, }); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo_2".to_vec(), b"3".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo_2".to_vec(), b"3".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo".to_vec(), b"1".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo".to_vec(), b"1".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo_3".to_vec(), b"5".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo_3".to_vec(), b"5".to_vec()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"bar".to_vec(), b"2".to_vec())) + block_on(scanner.next()).unwrap().unwrap(), + (b"bar".to_vec(), b"2".to_vec()) ); assert_eq!(block_on(scanner.next()).unwrap(), None); @@ -382,7 +387,7 @@ mod tests { PointRange::from("foo_3").into(), PointRange::from("bar_3").into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage, ranges, scan_backward_in_range: false, @@ -390,24 +395,24 @@ mod tests { is_scanned_range_aware: false, }); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"bar".to_vec(), Vec::new())) + block_on(scanner.next()).unwrap().unwrap(), + (b"bar".to_vec(), Vec::new()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"bar_2".to_vec(), Vec::new())) + block_on(scanner.next()).unwrap().unwrap(), + (b"bar_2".to_vec(), Vec::new()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo".to_vec(), Vec::new())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo".to_vec(), Vec::new()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo_2".to_vec(), Vec::new())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo_2".to_vec(), Vec::new()) ); assert_eq!( - block_on(scanner.next()).unwrap(), - Some((b"foo_3".to_vec(), Vec::new())) + block_on(scanner.next()).unwrap().unwrap(), + (b"foo_3".to_vec(), Vec::new()) ); assert_eq!(block_on(scanner.next()).unwrap(), None); } @@ -422,7 +427,7 @@ mod tests { PointRange::from("foo_3").into(), IntervalRange::from(("a", "z")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage, ranges, scan_backward_in_range: false, @@ -431,9 +436,9 @@ mod tests { }); let mut scanned_rows_per_range = Vec::new(); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_3"); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![2, 0, 1]); @@ -443,21 +448,21 @@ mod tests { assert_eq!(scanned_rows_per_range, vec![0]); scanned_rows_per_range.clear(); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"bar"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"bar_2"); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![0, 2]); scanned_rows_per_range.clear(); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo"); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); assert_eq!(scanned_rows_per_range, vec![1]); scanned_rows_per_range.clear(); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_3"); assert_eq!(block_on(scanner.next()).unwrap(), None); scanner.collect_scanned_rows_per_range(&mut scanned_rows_per_range); @@ -477,7 +482,7 @@ mod tests { // No range let ranges = vec![]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: false, @@ -497,7 +502,7 @@ mod tests { // Empty interval range let ranges = vec![IntervalRange::from(("x", "xb")).into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: false, @@ -513,7 +518,7 @@ mod tests { // Empty point range let ranges = vec![PointRange::from("x").into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: false, @@ -529,7 +534,7 @@ mod tests { // Filled interval range let ranges = vec![IntervalRange::from(("foo", "foo_8")).into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: false, @@ -537,14 +542,14 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"foo_2\0"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_3"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_2\0"); @@ -567,7 +572,7 @@ mod tests { PointRange::from("bar_3").into(), IntervalRange::from(("bar_4", "box")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage, ranges, scan_backward_in_range: false, @@ -575,25 +580,25 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); assert_eq!(&r.upper_exclusive, b"foo\0"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo\0"); assert_eq!(&r.upper_exclusive, b"foo_2\0"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"bar"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_2\0"); assert_eq!(&r.upper_exclusive, b"bar\0"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"bar_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar\0"); @@ -612,7 +617,7 @@ mod tests { // No range let ranges = vec![]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: true, @@ -632,7 +637,7 @@ mod tests { // Empty interval range let ranges = vec![IntervalRange::from(("x", "xb")).into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: true, @@ -648,7 +653,7 @@ mod tests { // Empty point range let ranges = vec![PointRange::from("x").into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: true, @@ -664,7 +669,7 @@ mod tests { // Filled interval range let ranges = vec![IntervalRange::from(("foo", "foo_8")).into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: true, @@ -672,14 +677,14 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_3"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_3"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo_2"); assert_eq!(&r.upper_exclusive, b"foo_8"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); @@ -700,7 +705,7 @@ mod tests { IntervalRange::from(("foo_5", "foo_50")).into(), IntervalRange::from(("foo", "foo_3")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage, ranges, scan_backward_in_range: true, @@ -708,20 +713,20 @@ mod tests { is_scanned_range_aware: true, }); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"bar_2"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar_2"); assert_eq!(&r.upper_exclusive, b"box"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"bar"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"bar"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"bar"); assert_eq!(&r.upper_exclusive, b"bar_2"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo_2"); - assert_eq!(&block_on(scanner.next()).unwrap().unwrap().0, b"foo"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo_2"); + assert_eq!(&block_on(scanner.next()).unwrap().unwrap().key(), b"foo"); let r = scanner.take_scanned_range(); assert_eq!(&r.lower_inclusive, b"foo"); @@ -739,7 +744,7 @@ mod tests { let storage = create_storage(); // Filled interval range let ranges = vec![IntervalRange::from(("foo", "foo_8")).into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: false, @@ -749,7 +754,7 @@ mod tests { // Only lower_inclusive is updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -757,7 +762,7 @@ mod tests { // Upper_exclusive is updated. assert_eq!( - &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + &block_on(scanner.next_opt(true)).unwrap().unwrap().key(), b"foo_2" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -765,7 +770,7 @@ mod tests { // Upper_exclusive is not updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo_3" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -791,7 +796,7 @@ mod tests { PointRange::from("bar_3").into(), IntervalRange::from(("bar_4", "box")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage, ranges, scan_backward_in_range: false, @@ -801,7 +806,7 @@ mod tests { // Only lower_inclusive is updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -809,7 +814,7 @@ mod tests { // Upper_exclusive is updated. Updated by scanned row. assert_eq!( - &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + &block_on(scanner.next_opt(true)).unwrap().unwrap().key(), b"foo_2" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -817,7 +822,7 @@ mod tests { // Upper_exclusive is not updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"bar" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -825,7 +830,7 @@ mod tests { // Upper_exclusive is not updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"bar_2" ); assert_eq!(&scanner.working_range_begin_key, b"foo"); @@ -846,7 +851,7 @@ mod tests { let storage = create_storage(); // Filled interval range let ranges = vec![IntervalRange::from(("foo", "foo_8")).into()]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: storage.clone(), ranges, scan_backward_in_range: true, @@ -856,7 +861,7 @@ mod tests { // Only lower_inclusive is updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo_3" ); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); @@ -864,7 +869,7 @@ mod tests { // Upper_exclusive is updated. assert_eq!( - &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + &block_on(scanner.next_opt(true)).unwrap().unwrap().key(), b"foo_2" ); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); @@ -872,7 +877,7 @@ mod tests { // Upper_exclusive is not updated. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo" ); assert_eq!(&scanner.working_range_begin_key, b"foo_8"); @@ -896,7 +901,7 @@ mod tests { IntervalRange::from(("foo_5", "foo_50")).into(), IntervalRange::from(("foo", "foo_3")).into(), ]; - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage, ranges, scan_backward_in_range: true, @@ -906,7 +911,7 @@ mod tests { // Lower_inclusive is updated. Upper_exclusive is not update. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"bar_2" ); assert_eq!(&scanner.working_range_begin_key, b"box"); @@ -914,7 +919,7 @@ mod tests { // Upper_exclusive is updated. Updated by scanned row. assert_eq!( - &block_on(scanner.next_opt(true)).unwrap().unwrap().0, + &block_on(scanner.next_opt(true)).unwrap().unwrap().key(), b"bar" ); assert_eq!(&scanner.working_range_begin_key, b"box"); @@ -922,7 +927,7 @@ mod tests { // Upper_exclusive is not update. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo_2" ); assert_eq!(&scanner.working_range_begin_key, b"box"); @@ -930,7 +935,7 @@ mod tests { // Upper_exclusive is not update. assert_eq!( - &block_on(scanner.next_opt(false)).unwrap().unwrap().0, + &block_on(scanner.next_opt(false)).unwrap().unwrap().key(), b"foo" ); assert_eq!(&scanner.working_range_begin_key, b"box"); diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index e9d96e16284..e670674cdc6 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -6,6 +6,7 @@ publish = false description = "Data type of a query engine to run TiDB pushed down executors" [dependencies] +api_version = { workspace = true } base64 = "0.13" bitfield = "0.13.2" bitflags = "1.0.1" diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 00f6c22347b..37becbfb801 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -2,6 +2,7 @@ use std::{cmp, convert::TryInto, io::Write, sync::Arc, u8}; +use api_version::KvFormat; use codec::prelude::*; use collections::{HashMap, HashSet}; use kvproto::coprocessor::KeyRange; @@ -75,10 +76,13 @@ pub fn extract_table_prefix(key: &[u8]) -> Result<&[u8]> { } /// Checks if the range is for table record or index. -pub fn check_table_ranges(ranges: &[KeyRange]) -> Result<()> { +pub fn check_table_ranges(ranges: &[KeyRange]) -> Result<()> { for range in ranges { - extract_table_prefix(range.get_start())?; - extract_table_prefix(range.get_end())?; + let (_, start) = + F::parse_keyspace(range.get_start()).map_err(|e| Error::Other(Box::new(e)))?; + let (_, end) = F::parse_keyspace(range.get_end()).map_err(|e| Error::Other(Box::new(e)))?; + extract_table_prefix(start)?; + extract_table_prefix(end)?; if range.get_start() >= range.get_end() { return Err(invalid_type!( "invalid range,range.start should be smaller than range.end, but got [{:?},{:?})", @@ -544,6 +548,7 @@ pub fn generate_index_data_for_test( mod tests { use std::{i64, iter::FromIterator}; + use api_version::ApiV1; use collections::{HashMap, HashSet}; use tipb::ColumnInfo; @@ -790,18 +795,18 @@ mod tests { let mut range = KeyRange::default(); range.set_start(small_key.clone()); range.set_end(large_key.clone()); - check_table_ranges(&[range]).unwrap(); + check_table_ranges::(&[range]).unwrap(); // test range.start > range.end let mut range = KeyRange::default(); range.set_end(small_key.clone()); range.set_start(large_key); - check_table_ranges(&[range]).unwrap_err(); + check_table_ranges::(&[range]).unwrap_err(); // test invalid end let mut range = KeyRange::default(); range.set_start(small_key); range.set_end(b"xx".to_vec()); - check_table_ranges(&[range]).unwrap_err(); + check_table_ranges::(&[range]).unwrap_err(); } #[test] diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 123c306c125..331634dbd04 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -6,6 +6,7 @@ publish = false description = "A vector query engine to run TiDB pushed down executors" [dependencies] +api_version = { workspace = true } async-trait = "0.1" codec = { workspace = true } collections = { workspace = true } diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index ae04ffe03e6..9e415918541 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use api_version::{ApiV1, KvFormat}; use async_trait::async_trait; use codec::{number::NumberCodec, prelude::NumberDecoder}; use itertools::izip; @@ -30,11 +31,13 @@ use DecodeHandleStrategy::*; use super::util::scan_executor::*; use crate::interface::*; -pub struct BatchIndexScanExecutor(ScanExecutor); +pub struct BatchIndexScanExecutor( + ScanExecutor, +); // We assign a dummy type `Box>` so that we can // omit the type when calling `check_supported`. -impl BatchIndexScanExecutor>> { +impl BatchIndexScanExecutor>, ApiV1> { /// Checks whether this executor can be used. #[inline] pub fn check_supported(descriptor: &IndexScan) -> Result<()> { @@ -42,7 +45,7 @@ impl BatchIndexScanExecutor>> { } } -impl BatchIndexScanExecutor { +impl BatchIndexScanExecutor { pub fn new( storage: S, config: Arc, @@ -154,7 +157,7 @@ impl BatchIndexScanExecutor { } #[async_trait] -impl BatchExecutor for BatchIndexScanExecutor { +impl BatchExecutor for BatchIndexScanExecutor { type StorageStats = S::Statistics; #[inline] @@ -975,7 +978,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), vec![columns_info[0].clone(), columns_info[1].clone()], @@ -1028,7 +1031,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), vec![ @@ -1092,7 +1095,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), vec![columns_info[1].clone(), columns_info[0].clone()], @@ -1133,7 +1136,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), vec![ @@ -1185,7 +1188,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), vec![ @@ -1262,7 +1265,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), vec![ @@ -1319,7 +1322,7 @@ mod tests { range }]; - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), vec![ @@ -1433,7 +1436,7 @@ mod tests { let mut value = value_prefix.clone(); value.extend(restore_data); let store = FixtureStorage::from(vec![(key.clone(), value)]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info.clone(), @@ -1476,7 +1479,7 @@ mod tests { let value = value_prefix; let store = FixtureStorage::from(vec![(key, value)]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1572,7 +1575,7 @@ mod tests { }]; let store = FixtureStorage::from(vec![(key, vec![])]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1672,7 +1675,7 @@ mod tests { }]; let store = FixtureStorage::from(vec![(key, value)]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1766,7 +1769,7 @@ mod tests { }]; let store = FixtureStorage::from(vec![(key, value)]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1859,7 +1862,7 @@ mod tests { }]; let store = FixtureStorage::from(vec![(key, value)]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1985,7 +1988,7 @@ mod tests { let mut value = value_prefix; value.extend(restore_data); let store = FixtureStorage::from(vec![(key, value)]); - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 551c3da8a7e..d04be41507e 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -2,6 +2,7 @@ use std::{convert::TryFrom, sync::Arc}; +use api_version::KvFormat; use fail::fail_point; use kvproto::coprocessor::KeyRange; use protobuf::Message; @@ -164,7 +165,7 @@ fn is_arrow_encodable(schema: &[FieldType]) -> bool { } #[allow(clippy::explicit_counter_loop)] -pub fn build_executors( +pub fn build_executors( executor_descriptors: Vec, storage: S, ranges: Vec, @@ -192,7 +193,7 @@ pub fn build_executors( let primary_prefix_column_ids = descriptor.take_primary_prefix_column_ids(); Box::new( - BatchTableScanExecutor::new( + BatchTableScanExecutor::<_, F>::new( storage, config.clone(), columns_info, @@ -212,7 +213,7 @@ pub fn build_executors( let columns_info = descriptor.take_columns().into(); let primary_column_ids_len = descriptor.take_primary_column_ids().len(); Box::new( - BatchIndexScanExecutor::new( + BatchIndexScanExecutor::<_, F>::new( storage, config.clone(), columns_info, @@ -364,7 +365,7 @@ pub fn build_executors( } impl BatchExecutorsRunner { - pub fn from_request + 'static>( + pub fn from_request + 'static, F: KvFormat>( mut req: DagRequest, ranges: Vec, storage: S, @@ -380,7 +381,7 @@ impl BatchExecutorsRunner { config.paging_size = paging_size; let config = Arc::new(config); - let out_most_executor = build_executors( + let out_most_executor = build_executors::<_, F>( req.take_executors().into(), storage, ranges, diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index 957a23ba8c0..4397869fcaa 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -2,6 +2,7 @@ use std::{collections::HashSet, sync::Arc}; +use api_version::{ApiV1, KvFormat}; use async_trait::async_trait; use collections::HashMap; use kvproto::coprocessor::KeyRange; @@ -23,13 +24,15 @@ use tipb::{ColumnInfo, FieldType, TableScan}; use super::util::scan_executor::*; use crate::interface::*; -pub struct BatchTableScanExecutor(ScanExecutor); +pub struct BatchTableScanExecutor( + ScanExecutor, +); type HandleIndicesVec = SmallVec<[usize; 2]>; // We assign a dummy type `Box>` so that we can // omit the type when calling `check_supported`. -impl BatchTableScanExecutor>> { +impl BatchTableScanExecutor>, ApiV1> { /// Checks whether this executor can be used. #[inline] pub fn check_supported(descriptor: &TableScan) -> Result<()> { @@ -37,7 +40,7 @@ impl BatchTableScanExecutor>> { } } -impl BatchTableScanExecutor { +impl BatchTableScanExecutor { #[allow(clippy::too_many_arguments)] pub fn new( storage: S, @@ -110,7 +113,7 @@ impl BatchTableScanExecutor { } #[async_trait] -impl BatchExecutor for BatchTableScanExecutor { +impl BatchExecutor for BatchTableScanExecutor { type StorageStats = S::Statistics; #[inline] @@ -702,7 +705,7 @@ mod tests { batch_expect_rows: &[usize], ) { let columns_info = helper.columns_info_by_idx(col_idxs); - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( helper.store(), Arc::new(EvalConfig::default()), columns_info, @@ -786,7 +789,7 @@ mod tests { fn test_execution_summary() { let helper = TableScanTestHelper::new(); - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( helper.store(), Arc::new(EvalConfig::default()), helper.columns_info_by_idx(&[0]), @@ -925,7 +928,7 @@ mod tests { // For row 0 + row 1 + (row 2 ~ row 4), we should only get row 0, row 1 and an // error. for corrupted_row_index in 2..=4 { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), columns_info.clone(), @@ -1032,7 +1035,7 @@ mod tests { // We should get row 0 and error because no further rows should be scanned when // there is an error. { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), columns_info.clone(), @@ -1080,7 +1083,7 @@ mod tests { }); let mut schema = schema.clone(); schema.push(FieldTypeTp::LongLong.into()); - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), columns_info, @@ -1122,7 +1125,7 @@ mod tests { // Let's also repeat case 1 for smaller batch size { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), columns_info.clone(), @@ -1165,7 +1168,7 @@ mod tests { // Case 2: row 1 + row 2 // We should get error and no row, for the same reason as above. { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), columns_info.clone(), @@ -1186,7 +1189,7 @@ mod tests { // Case 3: row 2 + row 0 // We should get row 2 and row 0. There is no error. { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store.clone(), Arc::new(EvalConfig::default()), columns_info.clone(), @@ -1220,7 +1223,7 @@ mod tests { // Case 4: row 1 // We should get error. { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1270,7 +1273,7 @@ mod tests { let store = FixtureStorage::new(iter::once((key, (Ok(value)))).collect()); - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1378,7 +1381,7 @@ mod tests { let store = FixtureStorage::new(iter::once((key, (Ok(value)))).collect()); - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info, @@ -1559,7 +1562,7 @@ mod tests { let store = FixtureStorage::new(iter::once((key, (Ok(value)))).collect()); - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( store, Arc::new(EvalConfig::default()), columns_info.clone(), diff --git a/components/tidb_query_executors/src/util/scan_executor.rs b/components/tidb_query_executors/src/util/scan_executor.rs index 935db5dd392..75c7cdc9fe3 100644 --- a/components/tidb_query_executors/src/util/scan_executor.rs +++ b/components/tidb_query_executors/src/util/scan_executor.rs @@ -1,5 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +use api_version::{keyspace::KvPair, KvFormat}; use async_trait::async_trait; use kvproto::coprocessor::KeyRange; use tidb_query_common::{ @@ -40,12 +41,12 @@ pub trait ScanExecutorImpl: Send { /// A shared executor implementation for both table scan and index scan. /// Implementation differences between table scan and index scan are further /// given via `ScanExecutorImpl`. -pub struct ScanExecutor { +pub struct ScanExecutor { /// The internal scanning implementation. imp: I, /// The scanner that scans over ranges. - scanner: RangesScanner, + scanner: RangesScanner, /// A flag indicating whether this executor is ended. When table is drained /// or there was an error scanning the table, this flag will be set to @@ -63,7 +64,7 @@ pub struct ScanExecutorOptions { pub is_scanned_range_aware: bool, } -impl ScanExecutor { +impl ScanExecutor { pub fn new( ScanExecutorOptions { imp, @@ -75,7 +76,7 @@ impl ScanExecutor { is_scanned_range_aware, }: ScanExecutorOptions, ) -> Result { - tidb_query_datatype::codec::table::check_table_ranges(&key_ranges)?; + tidb_query_datatype::codec::table::check_table_ranges::(&key_ranges)?; if is_backward { key_ranges.reverse(); } @@ -108,10 +109,11 @@ impl ScanExecutor { for i in 0..scan_rows { let some_row = self.scanner.next_opt(i == scan_rows - 1).await?; - if let Some((key, value)) = some_row { + if let Some(row) = some_row { // Retrieved one row from point range or non-point range. - if let Err(e) = self.imp.process_kv_pair(&key, &value, columns) { + let (key, value) = row.kv(); + if let Err(e) = self.imp.process_kv_pair(key, value, columns) { // When there are errors in `process_kv_pair`, columns' length may not be // identical. For example, the filling process may be partially done so that // first several columns have N rows while the rest have N-1 rows. Since we do @@ -162,7 +164,7 @@ pub fn check_columns_info_supported(columns_info: &[ColumnInfo]) -> Result<()> { } #[async_trait] -impl BatchExecutor for ScanExecutor { +impl BatchExecutor for ScanExecutor { type StorageStats = S::Statistics; #[inline] diff --git a/src/coprocessor/checksum.rs b/src/coprocessor/checksum.rs index 52bd0a60184..3778f549427 100644 --- a/src/coprocessor/checksum.rs +++ b/src/coprocessor/checksum.rs @@ -1,5 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +use api_version::{keyspace::KvPair, ApiV1}; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; @@ -18,7 +19,7 @@ use crate::{ // `ChecksumContext` is used to handle `ChecksumRequest` pub struct ChecksumContext { req: ChecksumRequest, - scanner: RangesScanner>>, + scanner: RangesScanner>, ApiV1>, } impl ChecksumContext { @@ -73,12 +74,13 @@ impl RequestHandler for ChecksumContext { let mut prefix_digest = crc64fast::Digest::new(); prefix_digest.write(&old_prefix); - while let Some((k, v)) = self.scanner.next().await? { + while let Some(row) = self.scanner.next().await? { + let (k, v) = row.kv(); if !k.starts_with(&new_prefix) { return Err(box_err!("Wrong prefix expect: {:?}", new_prefix)); } checksum = - checksum_crc64_xor(checksum, prefix_digest.clone(), &k[new_prefix.len()..], &v); + checksum_crc64_xor(checksum, prefix_digest.clone(), &k[new_prefix.len()..], v); total_kvs += 1; total_bytes += k.len() + v.len() + old_prefix.len() - new_prefix.len(); } diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index ce575859e59..31a6df181d5 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -2,8 +2,9 @@ mod storage_impl; -use std::sync::Arc; +use std::{marker::PhantomData, sync::Arc}; +use api_version::KvFormat; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; @@ -18,7 +19,7 @@ use crate::{ tikv_util::quota_limiter::QuotaLimiter, }; -pub struct DagHandlerBuilder { +pub struct DagHandlerBuilder { req: DagRequest, ranges: Vec, store: S, @@ -29,9 +30,10 @@ pub struct DagHandlerBuilder { is_cache_enabled: bool, paging_size: Option, quota_limiter: Arc, + _phantom: PhantomData, } -impl DagHandlerBuilder { +impl DagHandlerBuilder { pub fn new( req: DagRequest, ranges: Vec, @@ -54,6 +56,7 @@ impl DagHandlerBuilder { is_cache_enabled, paging_size, quota_limiter, + _phantom: PhantomData, } } @@ -65,7 +68,7 @@ impl DagHandlerBuilder { pub fn build(self) -> Result> { COPR_DAG_REQ_COUNT.with_label_values(&["batch"]).inc(); - Ok(BatchDagHandler::new( + Ok(BatchDagHandler::new::<_, F>( self.req, self.ranges, self.store, @@ -87,7 +90,7 @@ pub struct BatchDagHandler { } impl BatchDagHandler { - pub fn new( + pub fn new( req: DagRequest, ranges: Vec, store: S, @@ -100,7 +103,7 @@ impl BatchDagHandler { quota_limiter: Arc, ) -> Result { Ok(Self { - runner: tidb_query_executors::runner::BatchExecutorsRunner::from_request( + runner: tidb_query_executors::runner::BatchExecutorsRunner::from_request::<_, F>( req, ranges, TikvStorage::new(store, is_cache_enabled), diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 711cd83e607..b9d01419a49 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -7,6 +7,7 @@ use std::{ use ::tracker::{ set_tls_tracker_token, with_tls_tracker, RequestInfo, RequestType, GLOBAL_TRACKERS, }; +use api_version::{dispatch_api_version, KvFormat}; use async_stream::try_stream; use concurrency_manager::ConcurrencyManager; use engine_traits::PerfLevel; @@ -147,6 +148,21 @@ impl Endpoint { /// /// It also checks if there are locks in memory blocking this read request. fn parse_request_and_check_memory_locks( + &self, + req: coppb::Request, + peer: Option, + is_streaming: bool, + ) -> Result<(RequestHandlerBuilder, ReqContext)> { + dispatch_api_version!(req.get_context().get_api_version(), { + self.parse_request_and_check_memory_locks_impl::(req, peer, is_streaming) + }) + } + + /// Parse the raw `Request` to create `RequestHandlerBuilder` and + /// `ReqContext`. Returns `Err` if fails. + /// + /// It also checks if there are locks in memory blocking this read request. + fn parse_request_and_check_memory_locks_impl( &self, mut req: coppb::Request, peer: Option, @@ -232,7 +248,7 @@ impl Endpoint { 0 => None, i => Some(i), }; - dag::DagHandlerBuilder::new( + dag::DagHandlerBuilder::<_, F>::new( dag, req_ctx.ranges.clone(), store, @@ -281,7 +297,7 @@ impl Endpoint { let quota_limiter = self.quota_limiter.clone(); builder = Box::new(move |snap, req_ctx| { - statistics::analyze::AnalyzeContext::new( + statistics::analyze::AnalyzeContext::<_, F>::new( analyze, req_ctx.ranges.clone(), start_ts, diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 383f6161a1b..25ecf95653d 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -1,7 +1,8 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp::Reverse, collections::BinaryHeap, mem, sync::Arc}; +use std::{cmp::Reverse, collections::BinaryHeap, marker::PhantomData, mem, sync::Arc}; +use api_version::{keyspace::KvPair, KvFormat}; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; @@ -41,16 +42,17 @@ const ANALYZE_VERSION_V1: i32 = 1; const ANALYZE_VERSION_V2: i32 = 2; // `AnalyzeContext` is used to handle `AnalyzeReq` -pub struct AnalyzeContext { +pub struct AnalyzeContext { req: AnalyzeReq, storage: Option>>, ranges: Vec, storage_stats: Statistics, quota_limiter: Arc, is_auto_analyze: bool, + _phantom: PhantomData, } -impl AnalyzeContext { +impl AnalyzeContext { pub fn new( req: AnalyzeReq, ranges: Vec, @@ -77,13 +79,14 @@ impl AnalyzeContext { storage_stats: Statistics::default(), quota_limiter, is_auto_analyze, + _phantom: PhantomData, }) } // handle_column is used to process `AnalyzeColumnsReq` // it would build a histogram for the primary key(if needed) and // collectors for each column value. - async fn handle_column(builder: &mut SampleBuilder) -> Result> { + async fn handle_column(builder: &mut SampleBuilder) -> Result> { let (col_res, _) = builder.collect_columns_stats().await?; let res_data = { @@ -93,7 +96,7 @@ impl AnalyzeContext { Ok(res_data) } - async fn handle_mixed(builder: &mut SampleBuilder) -> Result> { + async fn handle_mixed(builder: &mut SampleBuilder) -> Result> { let (col_res, idx_res) = builder.collect_columns_stats().await?; let res_data = { @@ -109,7 +112,7 @@ impl AnalyzeContext { Ok(res_data) } - async fn handle_full_sampling(builder: &mut RowSampleBuilder) -> Result> { + async fn handle_full_sampling(builder: &mut RowSampleBuilder) -> Result> { let sample_res = builder.collect_column_stats().await?; let res_data = { let res = sample_res.into_proto(); @@ -122,7 +125,7 @@ impl AnalyzeContext { // it would build a histogram and count-min sketch of index values. async fn handle_index( req: AnalyzeIndexReq, - scanner: &mut RangesScanner>>, + scanner: &mut RangesScanner>, F>, is_common_handle: bool, ) -> Result> { let mut hist = Histogram::new(req.get_bucket_size() as usize); @@ -142,8 +145,8 @@ impl AnalyzeContext { } else { ANALYZE_VERSION_V1 }; - while let Some((key, _)) = scanner.next().await? { - let mut key = &key[..]; + while let Some(row) = scanner.next().await? { + let mut key = row.key(); if is_common_handle { table::check_record_key(key)?; key = &key[table::PREFIX_LEN..]; @@ -209,14 +212,14 @@ impl AnalyzeContext { } #[async_trait] -impl RequestHandler for AnalyzeContext { +impl RequestHandler for AnalyzeContext { async fn handle_request(&mut self) -> Result> { let ret = match self.req.get_tp() { AnalyzeType::TypeIndex | AnalyzeType::TypeCommonHandle => { let req = self.req.take_idx_req(); let ranges = std::mem::take(&mut self.ranges); - table::check_table_ranges(&ranges)?; - let mut scanner = RangesScanner::new(RangesScannerOptions { + table::check_table_ranges::(&ranges)?; + let mut scanner = RangesScanner::<_, F>::new(RangesScannerOptions { storage: self.storage.take().unwrap(), ranges: ranges .into_iter() @@ -240,7 +243,7 @@ impl RequestHandler for AnalyzeContext { let col_req = self.req.take_col_req(); let storage = self.storage.take().unwrap(); let ranges = std::mem::take(&mut self.ranges); - let mut builder = SampleBuilder::new(col_req, None, storage, ranges)?; + let mut builder = SampleBuilder::<_, F>::new(col_req, None, storage, ranges)?; let res = AnalyzeContext::handle_column(&mut builder).await; builder.data.collect_storage_stats(&mut self.storage_stats); res @@ -252,7 +255,8 @@ impl RequestHandler for AnalyzeContext { let idx_req = self.req.take_idx_req(); let storage = self.storage.take().unwrap(); let ranges = std::mem::take(&mut self.ranges); - let mut builder = SampleBuilder::new(col_req, Some(idx_req), storage, ranges)?; + let mut builder = + SampleBuilder::<_, F>::new(col_req, Some(idx_req), storage, ranges)?; let res = AnalyzeContext::handle_mixed(&mut builder).await; builder.data.collect_storage_stats(&mut self.storage_stats); res @@ -263,7 +267,7 @@ impl RequestHandler for AnalyzeContext { let storage = self.storage.take().unwrap(); let ranges = std::mem::take(&mut self.ranges); - let mut builder = RowSampleBuilder::new( + let mut builder = RowSampleBuilder::<_, F>::new( col_req, storage, ranges, @@ -302,8 +306,8 @@ impl RequestHandler for AnalyzeContext { } } -struct RowSampleBuilder { - data: BatchTableScanExecutor>>, +struct RowSampleBuilder { + data: BatchTableScanExecutor>, F>, max_sample_size: usize, max_fm_sketch_size: usize, @@ -314,7 +318,7 @@ struct RowSampleBuilder { is_auto_analyze: bool, } -impl RowSampleBuilder { +impl RowSampleBuilder { fn new( mut req: AnalyzeColumnsReq, storage: TikvStorage>, @@ -784,8 +788,8 @@ impl Drop for BaseRowSampleCollector { } } -struct SampleBuilder { - data: BatchTableScanExecutor>>, +struct SampleBuilder { + data: BatchTableScanExecutor>, F>, max_bucket_size: usize, max_sample_size: usize, @@ -802,7 +806,7 @@ struct SampleBuilder { /// `SampleBuilder` is used to analyze columns. It collects sample from /// the result set using Reservoir Sampling algorithm, estimates NDVs /// using FM Sketch during the collecting process, and builds count-min sketch. -impl SampleBuilder { +impl SampleBuilder { fn new( mut req: AnalyzeColumnsReq, common_handle_req: Option, diff --git a/tests/benches/coprocessor_executors/index_scan/util.rs b/tests/benches/coprocessor_executors/index_scan/util.rs index 7531fb68944..8d579c98a4f 100644 --- a/tests/benches/coprocessor_executors/index_scan/util.rs +++ b/tests/benches/coprocessor_executors/index_scan/util.rs @@ -2,6 +2,7 @@ use std::{marker::PhantomData, sync::Arc}; +use api_version::ApiV1; use criterion::black_box; use futures::executor::block_on; use kvproto::coprocessor::KeyRange; @@ -33,7 +34,7 @@ impl scan_bencher::ScanExecutorBuilder for BatchIndexScan store: &Store, unique: bool, ) -> Self::E { - let mut executor = BatchIndexScanExecutor::new( + let mut executor = BatchIndexScanExecutor::<_, ApiV1>::new( black_box(TikvStorage::new( ToTxnStore::::to_store(store), false, diff --git a/tests/benches/coprocessor_executors/integrated/util.rs b/tests/benches/coprocessor_executors/integrated/util.rs index d9cb5fd2138..4b747307049 100644 --- a/tests/benches/coprocessor_executors/integrated/util.rs +++ b/tests/benches/coprocessor_executors/integrated/util.rs @@ -2,6 +2,7 @@ use std::{marker::PhantomData, sync::Arc}; +use api_version::ApiV1; use criterion::{black_box, measurement::Measurement}; use kvproto::coprocessor::KeyRange; use test_coprocessor::*; @@ -71,7 +72,7 @@ where store: &Store, ) { crate::util::bencher::BatchNextAllBencher::new(|| { - tidb_query_executors::runner::build_executors( + tidb_query_executors::runner::build_executors::<_, ApiV1>( black_box(executors.to_vec()), black_box(TikvStorage::new(ToTxnStore::::to_store(store), false)), black_box(ranges.to_vec()), diff --git a/tests/benches/coprocessor_executors/table_scan/util.rs b/tests/benches/coprocessor_executors/table_scan/util.rs index 2fe7c4fc4c0..0b2185074c8 100644 --- a/tests/benches/coprocessor_executors/table_scan/util.rs +++ b/tests/benches/coprocessor_executors/table_scan/util.rs @@ -2,6 +2,7 @@ use std::{marker::PhantomData, sync::Arc}; +use api_version::ApiV1; use criterion::black_box; use futures::executor::block_on; use kvproto::coprocessor::KeyRange; @@ -33,7 +34,7 @@ impl scan_bencher::ScanExecutorBuilder for BatchTableScan store: &Store, _: (), ) -> Self::E { - let mut executor = BatchTableScanExecutor::new( + let mut executor = BatchTableScanExecutor::<_, ApiV1>::new( black_box(TikvStorage::new( ToTxnStore::::to_store(store), false, diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 5ef442a25cd..0a5708c74ce 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -8,6 +8,7 @@ pub mod store; use std::{marker::PhantomData, sync::Arc}; +use api_version::ApiV1; use criterion::{black_box, measurement::Measurement}; use kvproto::coprocessor::KeyRange; use test_coprocessor::*; @@ -41,7 +42,7 @@ pub fn build_dag_handler( let mut dag = DagRequest::default(); dag.set_executors(executors.to_vec().into()); - tikv::coprocessor::dag::DagHandlerBuilder::new( + tikv::coprocessor::dag::DagHandlerBuilder::<_, ApiV1>::new( black_box(dag), black_box(ranges.to_vec()), black_box(ToTxnStore::::to_store(store)), diff --git a/tests/integrations/coprocessor/test_checksum.rs b/tests/integrations/coprocessor/test_checksum.rs index 66df6b2832c..405070842b4 100644 --- a/tests/integrations/coprocessor/test_checksum.rs +++ b/tests/integrations/coprocessor/test_checksum.rs @@ -2,6 +2,7 @@ use std::u64; +use api_version::{keyspace::KvPair, ApiV1}; use futures::executor::block_on; use kvproto::{ coprocessor::{KeyRange, Request}, @@ -79,7 +80,7 @@ fn reversed_checksum_crc64_xor(store: &Store, range: KeyRange) -> Default::default(), false, ); - let mut scanner = RangesScanner::new(RangesScannerOptions { + let mut scanner = RangesScanner::<_, ApiV1>::new(RangesScannerOptions { storage: TikvStorage::new(store, false), ranges: vec![Range::from_pb_range(range, false)], scan_backward_in_range: true, @@ -89,10 +90,11 @@ fn reversed_checksum_crc64_xor(store: &Store, range: KeyRange) -> let mut checksum = 0; let digest = crc64fast::Digest::new(); - while let Some((k, v)) = block_on(scanner.next()).unwrap() { + while let Some(row) = block_on(scanner.next()).unwrap() { + let (k, v) = row.kv(); let mut digest = digest.clone(); - digest.write(&k); - digest.write(&v); + digest.write(k); + digest.write(v); checksum ^= digest.sum64(); } checksum From e2e9f9c2a62051dc21cdb28767e41e65fc79acee Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 17 Jan 2023 23:21:49 +0800 Subject: [PATCH 0460/1149] storage: add priority scheduling for scheduler worker (#14057) ref tikv/tikv#13730 Support priority-based scheduling for the scheduler worker pool. Signed-off-by: Connor1996 Co-authored-by: Xinye Tao --- Cargo.lock | 2 + components/resource_control/src/lib.rs | 4 +- .../resource_control/src/resource_group.rs | 26 +- components/server/src/server.rs | 43 +-- components/server/src/server2.rs | 37 +-- components/test_raftstore/Cargo.toml | 1 + components/test_raftstore/src/cluster.rs | 18 +- components/test_raftstore/src/node.rs | 2 + components/test_raftstore/src/server.rs | 5 + .../tikv_util/src/yatp_pool/future_pool.rs | 2 + components/tikv_util/src/yatp_pool/mod.rs | 65 ++-- src/config/mod.rs | 20 +- src/read_pool.rs | 3 +- src/server/metrics.rs | 6 + src/server/service/kv.rs | 20 ++ src/storage/mod.rs | 55 +++- src/storage/txn/commands/mod.rs | 7 + src/storage/txn/mod.rs | 2 +- src/storage/txn/sched_pool.rs | 165 ++++++++-- src/storage/txn/scheduler.rs | 289 ++++++------------ tests/Cargo.toml | 1 + tests/failpoints/cases/test_storage.rs | 5 +- 22 files changed, 441 insertions(+), 337 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 069dbc4950e..ab1d164a1e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5884,6 +5884,7 @@ dependencies = [ "raftstore", "rand 0.8.5", "resolved_ts", + "resource_control", "resource_metering", "security", "server", @@ -5997,6 +5998,7 @@ dependencies = [ "raftstore", "rand 0.8.5", "rand_xorshift", + "resource_control", "resource_metering", "security", "serde_json", diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 516e5dd6c8d..eb6679f71e8 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -4,7 +4,9 @@ use online_config::OnlineConfig; use serde::{Deserialize, Serialize}; mod resource_group; -pub use resource_group::{ResourceController, ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL}; +pub use resource_group::{ + ResourceConsumeType, ResourceController, ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL, +}; mod future; pub use future::ControlledFuture; diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index d9fa3ccf14c..70f89fd1a9d 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -382,40 +382,40 @@ mod tests { resource_manager.add_resource_group(group2); assert_eq!(resource_manager.resource_groups.len(), 2); - let resouce_ctl = resource_manager.derive_controller("test_read".into(), true); - assert_eq!(resouce_ctl.resource_consumptions.len(), 3); + let resource_ctl = resource_manager.derive_controller("test_read".into(), true); + assert_eq!(resource_ctl.resource_consumptions.len(), 3); - let group1 = resouce_ctl.resource_group("test".as_bytes()); + let group1 = resource_ctl.resource_group("test".as_bytes()); assert_eq!(group1.weight, 500); - let group2 = resouce_ctl.resource_group("test2".as_bytes()); + let group2 = resource_ctl.resource_group("test2".as_bytes()); assert_eq!(group2.weight, 250); assert_eq!(group1.current_vt(), 0); let mut extras1 = Extras::single_level(); extras1.set_metadata("test".as_bytes().to_owned()); - assert_eq!(resouce_ctl.priority_of(&extras1), 25_000); + assert_eq!(resource_ctl.priority_of(&extras1), 25_000); assert_eq!(group1.current_vt(), 25_000); let mut extras2 = Extras::single_level(); extras2.set_metadata("test2".as_bytes().to_owned()); - assert_eq!(resouce_ctl.priority_of(&extras2), 12_500); + assert_eq!(resource_ctl.priority_of(&extras2), 12_500); assert_eq!(group2.current_vt(), 12_500); let mut extras3 = Extras::single_level(); extras3.set_metadata("unknown_group".as_bytes().to_owned()); - assert_eq!(resouce_ctl.priority_of(&extras3), 50); + assert_eq!(resource_ctl.priority_of(&extras3), 50); assert_eq!( - resouce_ctl + resource_ctl .resource_group("default".as_bytes()) .current_vt(), 50 ); - resouce_ctl.consume( + resource_ctl.consume( "test".as_bytes(), ResourceConsumeType::CpuTime(Duration::from_micros(10000)), ); - resouce_ctl.consume( + resource_ctl.consume( "test2".as_bytes(), ResourceConsumeType::CpuTime(Duration::from_micros(10000)), ); @@ -429,7 +429,7 @@ mod tests { assert_eq!(group1_vt, 5_025_000); assert!(group2.current_vt() >= group1.current_vt() * 3 / 4); assert!( - resouce_ctl + resource_ctl .resource_group("default".as_bytes()) .current_vt() >= group1.current_vt() / 2 @@ -442,8 +442,8 @@ mod tests { let new_group = new_resource_group("new_group".into(), true, 500, 500); resource_manager.add_resource_group(new_group); - assert_eq!(resouce_ctl.resource_consumptions.len(), 4); - let group3 = resouce_ctl.resource_group("new_group".as_bytes()); + assert_eq!(resource_ctl.resource_consumptions.len(), 4); + let group3 = resource_ctl.resource_group("new_group".as_bytes()); assert_eq!(group3.weight, 200); assert!(group3.current_vt() >= group1_vt / 2); } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 52b9fbf1d1a..cfc7e59e243 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -245,7 +245,7 @@ struct TikvServer { check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, - resource_manager: Arc, + resource_manager: Option>, causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, br_snap_recovery_mode: bool, // use for br snapshot recovery @@ -322,23 +322,27 @@ where let config = cfg_controller.get_current(); let store_path = Path::new(&config.storage.data_dir).to_owned(); - let resource_manager = Arc::new(ResourceGroupManager::default()); - - // Initialize raftstore channels. - let (router, system) = fsm::create_raft_batch_system(&config.raft_store); let thread_count = config.server.background_thread_count; let background_worker = WorkerBuilder::new("background") .thread_count(thread_count) .create(); - // spawn a task to periodically update the minimal virtual time of all resource - // group. - if config.resource_control.enabled { - let resource_mgr1 = resource_manager.clone(); + + let resource_manager = if config.resource_control.enabled { + let mgr = Arc::new(ResourceGroupManager::default()); + let mgr1 = mgr.clone(); + // spawn a task to periodically update the minimal virtual time of all resource + // group. background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { - resource_mgr1.advance_min_virtual_time(); + mgr1.advance_min_virtual_time(); }); - } + Some(mgr) + } else { + None + }; + + // Initialize raftstore channels. + let (router, system) = fsm::create_raft_batch_system(&config.raft_store); let mut coprocessor_host = Some(CoprocessorHost::new( router.clone(), @@ -745,19 +749,15 @@ where } let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { - let priority_mgr = if self.config.resource_control.enabled { - Some( - self.resource_manager - .derive_controller("unified-read-pool".into(), true), - ) - } else { - None - }; + let resource_ctl = self + .resource_manager + .as_ref() + .map(|m| m.derive_controller("unified-read-pool".into(), true)); Some(build_yatp_read_pool( &self.config.readpool.unified, pd_sender.clone(), engines.engine.clone(), - priority_mgr, + resource_ctl, )) } else { None @@ -831,6 +831,9 @@ where Arc::clone(&self.quota_limiter), self.pd_client.feature_gate().clone(), self.causal_ts_provider.clone(), + self.resource_manager + .as_ref() + .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), ) .unwrap_or_else(|e| fatal!("failed to create raft storage: {}", e)); cfg_controller.register( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 12e6af61613..03b02e5f81e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -222,7 +222,7 @@ struct TikvServer { check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, - resource_manager: Arc, + resource_manager: Option>, causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, } @@ -287,15 +287,19 @@ where config.quota.max_delay_duration, config.quota.enable_auto_tune, )); - let resource_manager = Arc::new(ResourceGroupManager::default()); - // spawn a task to periodically update the minimal virtual time of all resource - // group. - if config.resource_control.enabled { - let resource_mgr1 = resource_manager.clone(); + + let resource_manager = if config.resource_control.enabled { + let mgr = Arc::new(ResourceGroupManager::default()); + let mgr1 = mgr.clone(); + // spawn a task to periodically update the minimal virtual time of all resource + // group. background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { - resource_mgr1.advance_min_virtual_time(); + mgr1.advance_min_virtual_time(); }); - } + Some(mgr) + } else { + None + }; let mut causal_ts_provider = None; if let ApiVersion::V2 = F::TAG { @@ -634,19 +638,15 @@ where let pd_sender = raftstore_v2::FlowReporter::new(pd_worker.scheduler()); let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { - let priority_mgr = if self.config.resource_control.enabled { - Some( - self.resource_manager - .derive_controller("unified-read-pool".into(), true), - ) - } else { - None - }; + let resource_ctl = self + .resource_manager + .as_ref() + .map(|m| m.derive_controller("unified-read-pool".into(), true)); Some(build_yatp_read_pool( &self.config.readpool.unified, pd_sender.clone(), engines.engine.clone(), - priority_mgr, + resource_ctl, )) } else { None @@ -719,6 +719,9 @@ where Arc::clone(&self.quota_limiter), self.pd_client.feature_gate().clone(), self.causal_ts_provider.clone(), + self.resource_manager + .as_ref() + .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), ) .unwrap_or_else(|e| fatal!("failed to create raft storage: {}", e)); cfg_controller.register( diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index 71c214ae21d..25a1224e261 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -49,6 +49,7 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code raftstore = { workspace = true, features = ["testexport"] } rand = "0.8" resolved_ts = { workspace = true } +resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } server = { workspace = true } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index b2330e26f93..2121b7e021f 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -46,6 +46,7 @@ use raftstore::{ }, Error, Result, }; +use resource_control::ResourceGroupManager; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::server::Result as ServerResult; @@ -80,6 +81,7 @@ pub trait Simulator { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, + resource_manager: &Arc, ) -> ServerResult; fn stop_node(&mut self, node_id: u64); fn get_node_ids(&self) -> HashSet; @@ -174,6 +176,7 @@ pub struct Cluster { pub raft_statistics: Vec>>, pub sim: Arc>, pub pd_client: Arc, + resource_manager: Arc, } impl Cluster { @@ -207,6 +210,7 @@ impl Cluster { pd_client, sst_workers: vec![], sst_workers_map: HashMap::default(), + resource_manager: Arc::new(ResourceGroupManager::default()), kv_statistics: vec![], raft_statistics: vec![], } @@ -294,6 +298,7 @@ impl Cluster { key_mgr.clone(), router, system, + &self.resource_manager, )?; self.group_props.insert(node_id, props); self.engines.insert(node_id, engines); @@ -365,9 +370,16 @@ impl Cluster { tikv_util::thread_group::set_properties(Some(props)); debug!("calling run node"; "node_id" => node_id); // FIXME: rocksdb event listeners may not work, because we change the router. - self.sim - .wl() - .run_node(node_id, cfg, engines, store_meta, key_mgr, router, system)?; + self.sim.wl().run_node( + node_id, + cfg, + engines, + store_meta, + key_mgr, + router, + system, + &self.resource_manager, + )?; debug!("node {} started", node_id); Ok(()) } diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 78d98e5a5d3..9ae76dba9f8 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -30,6 +30,7 @@ use raftstore::{ }, Result, }; +use resource_control::ResourceGroupManager; use resource_metering::CollectorRegHandle; use tempfile::TempDir; use test_pd_client::TestPdClient; @@ -229,6 +230,7 @@ impl Simulator for NodeCluster { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, + _resource_manager: &Arc, ) -> ServerResult { assert!(node_id == 0 || !self.nodes.contains_key(&node_id)); let pd_worker = LazyWorker::new("test-pd-worker"); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0ec60e468ee..ccf4df43497 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -42,6 +42,7 @@ use raftstore::{ }, Result, }; +use resource_control::ResourceGroupManager; use resource_metering::{CollectorRegHandle, ResourceTagFactory}; use security::SecurityManager; use tempfile::TempDir; @@ -264,6 +265,7 @@ impl ServerCluster { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, + resource_manager: &Arc, ) -> ServerResult { let (tmp_str, tmp) = if node_id == 0 || !self.snap_paths.contains_key(&node_id) { let p = test_util::temp_dir("test_cluster", cfg.prefer_mem); @@ -414,6 +416,7 @@ impl ServerCluster { quota_limiter.clone(), self.pd_client.feature_gate().clone(), self.get_causal_ts_provider(node_id), + Some(resource_manager.derive_controller("scheduler-worker-pool".to_owned(), true)), )?; self.storages.insert(node_id, raft_engine); @@ -649,6 +652,7 @@ impl Simulator for ServerCluster { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, + resource_manager: &Arc, ) -> ServerResult { dispatch_api_version!( cfg.storage.api_version(), @@ -660,6 +664,7 @@ impl Simulator for ServerCluster { key_manager, router, system, + resource_manager, ) ) } diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index e74ced848c0..f010b508aaa 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -28,6 +28,8 @@ struct Env { } #[derive(Clone)] +// FuturePool wraps a yatp thread pool providing task count metrics and gate +// maximum running tasks. pub struct FuturePool { inner: Arc, } diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 29376b904a5..305d2162482 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -198,42 +198,42 @@ impl YatpPoolBuilder { } } - pub fn config(&mut self, config: Config) -> &mut Self { + pub fn config(self, config: Config) -> Self { // TODO: maybe we should use (1, num_cpu) for min and max thread count. self.thread_count(config.workers, config.workers, config.workers) .stack_size(config.stack_size) .max_tasks(config.workers.saturating_mul(config.max_tasks_per_worker)) } - pub fn stack_size(&mut self, val: usize) -> &mut Self { + pub fn stack_size(mut self, val: usize) -> Self { self.stack_size = val; self } - pub fn name_prefix(&mut self, val: impl Into) -> &mut Self { + pub fn name_prefix(mut self, val: impl Into) -> Self { let name = val.into(); self.name_prefix = Some(name); self } pub fn thread_count( - &mut self, + mut self, min_thread_count: usize, core_thread_count: usize, max_thread_count: usize, - ) -> &mut Self { + ) -> Self { self.min_thread_count = min_thread_count; self.core_thread_count = core_thread_count; self.max_thread_count = max_thread_count; self } - pub fn max_tasks(&mut self, tasks: usize) -> &mut Self { + pub fn max_tasks(mut self, tasks: usize) -> Self { self.max_tasks = tasks; self } - pub fn before_stop(&mut self, f: F) -> &mut Self + pub fn before_stop(mut self, f: F) -> Self where F: Fn() + Send + Sync + 'static, { @@ -241,7 +241,7 @@ impl YatpPoolBuilder { self } - pub fn after_start(&mut self, f: F) -> &mut Self + pub fn after_start(mut self, f: F) -> Self where F: Fn() + Send + Sync + 'static, { @@ -249,7 +249,7 @@ impl YatpPoolBuilder { self } - pub fn before_pause(&mut self, f: F) -> &mut Self + pub fn before_pause(mut self, f: F) -> Self where F: Fn() + Send + Sync + 'static, { @@ -257,13 +257,32 @@ impl YatpPoolBuilder { self } - pub fn build_future_pool(&mut self) -> FuturePool { + pub fn build_future_pool(self) -> FuturePool { + let name = self + .name_prefix + .clone() + .unwrap_or_else(|| "yatp_pool".to_string()); + let size = self.core_thread_count; + let task = self.max_tasks; let pool = self.build_single_level_pool(); - let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); - FuturePool::from_pool(pool, name, self.core_thread_count, self.max_tasks) + FuturePool::from_pool(pool, &name, size, task) + } + + pub fn build_priority_future_pool( + self, + priority_provider: Arc, + ) -> FuturePool { + let name = self + .name_prefix + .clone() + .unwrap_or_else(|| "yatp_pool".to_string()); + let size = self.core_thread_count; + let task = self.max_tasks; + let pool = self.build_priority_pool(priority_provider); + FuturePool::from_pool(pool, &name, size, task) } - pub fn build_single_level_pool(&mut self) -> ThreadPool { + pub fn build_single_level_pool(self) -> ThreadPool { let (builder, runner) = self.create_builder(); builder.build_with_queue_and_runner( yatp::queue::QueueType::SingleLevel, @@ -271,9 +290,12 @@ impl YatpPoolBuilder { ) } - pub fn build_multi_level_pool(&mut self) -> ThreadPool { + pub fn build_multi_level_pool(self) -> ThreadPool { + let name = self + .name_prefix + .clone() + .unwrap_or_else(|| "yatp_pool".to_string()); let (builder, read_pool_runner) = self.create_builder(); - let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); let multilevel_builder = multilevel::Builder::new(multilevel::Config::default().name(Some(name))); let runner_builder = @@ -283,11 +305,14 @@ impl YatpPoolBuilder { } pub fn build_priority_pool( - &mut self, + self, priority_provider: Arc, ) -> ThreadPool { + let name = self + .name_prefix + .clone() + .unwrap_or_else(|| "yatp_pool".to_string()); let (builder, read_pool_runner) = self.create_builder(); - let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); let priority_builder = priority::Builder::new( priority::Config::default().name(Some(name)), priority_provider, @@ -296,8 +321,8 @@ impl YatpPoolBuilder { builder.build_with_queue_and_runner(QueueType::Priority(priority_builder), runner_builder) } - fn create_builder(&mut self) -> (yatp::Builder, YatpPoolRunner) { - let name = self.name_prefix.as_deref().unwrap_or("yatp_pool"); + fn create_builder(mut self) -> (yatp::Builder, YatpPoolRunner) { + let name = self.name_prefix.unwrap_or_else(|| "yatp_pool".to_string()); let mut builder = yatp::Builder::new(thd_name!(name)); builder .stack_size(self.stack_size) @@ -309,7 +334,7 @@ impl YatpPoolBuilder { let before_stop = self.before_stop.take(); let before_pause = self.before_pause.take(); let schedule_wait_duration = - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name]); let read_pool_runner = YatpPoolRunner::new( Default::default(), self.ticker.clone(), diff --git a/src/config/mod.rs b/src/config/mod.rs index 9caa68d8e6b..7878696faa5 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -4930,14 +4930,8 @@ mod tests { let max_pool_size = std::cmp::max(4, SysQuota::cpu_cores_quota() as usize); let check_scale_pool_size = |size: usize, ok: bool| { - let origin_pool_size = scheduler - .get_sched_pool(CommandPri::Normal) - .pool - .get_pool_size(); - let origin_pool_size_high = scheduler - .get_sched_pool(CommandPri::High) - .pool - .get_pool_size(); + let origin_pool_size = scheduler.get_sched_pool().get_pool_size(CommandPri::Normal); + let origin_pool_size_high = scheduler.get_sched_pool().get_pool_size(CommandPri::High); let res = cfg_controller .update_config("storage.scheduler-worker-pool-size", &format!("{}", size)); let (expected_size, expected_size_high) = if ok { @@ -4948,17 +4942,11 @@ mod tests { (origin_pool_size, origin_pool_size_high) }; assert_eq!( - scheduler - .get_sched_pool(CommandPri::Normal) - .pool - .get_pool_size(), + scheduler.get_sched_pool().get_pool_size(CommandPri::Normal), expected_size ); assert_eq!( - scheduler - .get_sched_pool(CommandPri::High) - .pool - .get_pool_size(), + scheduler.get_sched_pool().get_pool_size(CommandPri::High), expected_size_high ); }; diff --git a/src/read_pool.rs b/src/read_pool.rs index 1a590679584..ea20b149a3d 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -295,8 +295,7 @@ pub fn build_yatp_read_pool( ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); let raftkv = Arc::new(Mutex::new(engine)); - let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }); - builder + let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) .stack_size(config.stack_size.0 as usize) .thread_count( diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 23f8256835b..d35c58cbf34 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -207,6 +207,12 @@ lazy_static! { &["type"] ) .unwrap(); + pub static ref GRPC_RESOURCE_GROUP_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( + "tikv_grpc_resource_group_total", + "Total number of handle grpc message for each resource group", + &["name"] + ) + .unwrap(); pub static ref GRPC_PROXY_MSG_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_grpc_proxy_msg_total", "Total number of handle grpc proxy message", diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 6c85741f64a..d42eb510891 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -171,6 +171,10 @@ macro_rules! handle_request { let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); + let resource_group_name = req.get_context().get_resource_group_name(); + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_group_name]) + .inc(); let resp = $future_name(&self.storage, req); let task = async move { let resp = resp.await?; @@ -1043,6 +1047,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, Some(batch_commands_request::request::Cmd::Get(mut req)) => { + let resource_group_name = req.get_context().get_resource_group_name(); + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_group_name]) + .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) }) { @@ -1057,6 +1065,10 @@ fn handle_batch_commands_request( } }, Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { + let resource_group_name = req.get_context().get_resource_group_name(); + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_group_name]) + .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_raw_get(&req) }) { @@ -1071,6 +1083,10 @@ fn handle_batch_commands_request( } }, Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { + let resource_group_name = req.get_context().get_resource_group_name(); + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_group_name]) + .inc(); let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = future_copr(copr, Some(peer.to_string()), req) @@ -1098,6 +1114,10 @@ fn handle_batch_commands_request( ); } $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { + let resource_group_name = req.get_context().get_resource_group_name(); + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_group_name]) + .inc(); let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = $future_fn($($arg,)* req) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0819c2599b9..7429ed8900b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -89,6 +89,7 @@ use kvproto::{ use pd_client::FeatureGate; use raftstore::store::{util::build_key_range, ReadStats, TxnExt, WriteStats}; use rand::prelude::*; +use resource_control::ResourceController; use resource_metering::{FutureExt, ResourceTagFactory}; use tikv_kv::{OnAppliedCb, SnapshotExt}; use tikv_util::{ @@ -129,7 +130,7 @@ use crate::{ txn::{ commands::{RawAtomicStore, RawCompareAndSwap, TypedCommand}, flow_controller::{EngineFlowController, FlowController}, - scheduler::Scheduler as TxnScheduler, + scheduler::TxnScheduler, Command, ErrorInner as TxnError, }, types::StorageCallbackType, @@ -270,6 +271,7 @@ impl Storage { quota_limiter: Arc, feature_gate: FeatureGate, causal_ts_provider: Option>, + resource_ctl: Option>, ) -> Result { assert_eq!(config.api_version(), F::TAG, "Api version not match"); @@ -285,6 +287,7 @@ impl Storage { resource_tag_factory.clone(), Arc::clone("a_limiter), feature_gate, + resource_ctl, ); info!("Storage started."); @@ -1509,15 +1512,20 @@ impl Storage { // Schedule raw modify commands, which reuse the scheduler worker pool. // TODO: separate the txn and raw commands if needed in the future. - fn sched_raw_command(&self, tag: CommandKind, future: T) -> Result<()> + fn sched_raw_command( + &self, + group_name: &str, + pri: CommandPri, + tag: CommandKind, + future: T, + ) -> Result<()> where - T: Future + Send + 'static, + T: Future + Send + 'static, { SCHED_STAGE_COUNTER_VEC.get(tag).new.inc(); self.sched - .get_sched_pool(CommandPri::Normal) - .pool - .spawn(future) + .get_sched_pool() + .spawn(group_name, pri, future) .map_err(|_| Error::from(ErrorInner::SchedTooBusy)) } @@ -1955,7 +1963,10 @@ impl Storage { let provider = self.causal_ts_provider.clone(); let engine = self.engine.clone(); let concurrency_manager = self.concurrency_manager.clone(); - self.sched_raw_command(CMD, async move { + + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2065,7 +2076,9 @@ impl Storage { let engine = self.engine.clone(); let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2128,7 +2141,9 @@ impl Storage { let engine = self.engine.clone(); let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2187,7 +2202,9 @@ impl Storage { let cf = Self::rawkv_cf(&cf, self.api_version)?; let engine = self.engine.clone(); let deadline = Self::get_deadline(&ctx); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2233,7 +2250,9 @@ impl Storage { let engine = self.engine.clone(); let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2672,7 +2691,9 @@ impl Storage { return Err(Error::from(ErrorInner::TtlNotEnabled)); } let sched = self.get_scheduler(); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { let key = F::encode_raw_key_owned(key, None); let cmd = RawCompareAndSwap::new(cf, key, previous_value, value, ttl, api_version, ctx); Self::sched_raw_atomic_command( @@ -2703,7 +2724,9 @@ impl Storage { Self::check_ttl_valid(pairs.len(), &ttls)?; let sched = self.get_scheduler(); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls, None); let cmd = RawAtomicStore::new(cf, modifies, ctx); Self::sched_raw_atomic_command( @@ -2726,7 +2749,9 @@ impl Storage { Self::check_api_version(self.api_version, ctx.api_version, CMD, &keys)?; let cf = Self::rawkv_cf(&cf, self.api_version)?; let sched = self.get_scheduler(); - self.sched_raw_command(CMD, async move { + let priority = ctx.get_priority(); + let group_name = ctx.get_resource_group_name().to_owned(); + self.sched_raw_command(&group_name, priority, CMD, async move { // Do NOT encode ts here as RawAtomicStore use key to gen lock let modifies = keys .into_iter() @@ -3183,6 +3208,7 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), ts_provider, + None, ) } @@ -3213,6 +3239,7 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), None, + Some(Arc::new(ResourceController::new("test".to_owned(), false))), ) } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 2d79ebc97cc..5b94ea5bd85 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -715,6 +715,13 @@ impl Command { self.command_ext().get_ctx().get_priority() } + pub fn group_name(&self) -> String { + self.command_ext() + .get_ctx() + .get_resource_group_name() + .to_owned() + } + pub fn need_flow_control(&self) -> bool { !self.readonly() && self.priority() != CommandPri::High } diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index f6884b0efb8..d3b199208cb 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -32,7 +32,7 @@ pub use self::{ }, commands::{Command, RESOLVE_LOCK_BATCH_SIZE}, latch::{Latches, Lock}, - scheduler::Scheduler, + scheduler::TxnScheduler, store::{ EntryBatch, FixtureStore, FixtureStoreScanner, Scanner, SnapshotStore, Store, TxnEntry, TxnEntryScanner, TxnEntryStore, diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index c7c69b5bbf4..0cff9d51d41 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -8,14 +8,16 @@ use std::{ use collections::HashMap; use file_system::{set_io_type, IoType}; -use kvproto::pdpb::QueryKind; +use kvproto::{kvrpcpb::CommandPri, pdpb::QueryKind}; use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; +use resource_control::{ControlledFuture, ResourceController}; use tikv_util::{ sys::SysQuota, - yatp_pool::{FuturePool, PoolTicker, YatpPoolBuilder}, + yatp_pool::{Full, FuturePool, PoolTicker, YatpPoolBuilder}, }; +use yatp::queue::Extras; use crate::storage::{ kv::{destroy_tls_engine, set_tls_engine, Engine, FlowStatsReporter, Statistics}, @@ -41,11 +43,6 @@ thread_local! { static TLS_FEATURE_GATE: RefCell = RefCell::new(latest_feature_gate()); } -#[derive(Clone)] -pub struct SchedPool { - pub pool: FuturePool, -} - #[derive(Clone)] pub struct SchedTicker { reporter: R, @@ -57,38 +54,142 @@ impl PoolTicker for SchedTicker { } } +#[derive(Clone)] +pub enum SchedPool { + // separated thread pools for different priority commands + Vanilla { + high_worker_pool: FuturePool, + worker_pool: FuturePool, + }, + // one priority based thread pool to handle all commands + Priority { + worker_pool: FuturePool, + resource_ctl: Arc, + }, +} + impl SchedPool { pub fn new( engine: E, pool_size: usize, reporter: R, feature_gate: FeatureGate, - name_prefix: &str, + resource_ctl: Option>, ) -> Self { - let engine = Arc::new(Mutex::new(engine)); - // for low cpu quota env, set the max-thread-count as 4 to allow potential cases - // that we need more thread than cpu num. - let max_pool_size = std::cmp::max( - pool_size, - std::cmp::max(4, SysQuota::cpu_cores_quota() as usize), - ); - let pool = YatpPoolBuilder::new(SchedTicker {reporter:reporter.clone()}) - .thread_count(1, pool_size, max_pool_size) - .name_prefix(name_prefix) - // Safety: by setting `after_start` and `before_stop`, `FuturePool` ensures - // the tls_engine invariants. - .after_start(move || { - set_tls_engine(engine.lock().unwrap().clone()); - set_io_type(IoType::ForegroundWrite); - TLS_FEATURE_GATE.with(|c| *c.borrow_mut() = feature_gate.clone()); - }) - .before_stop(move || unsafe { - // Safety: we ensure the `set_` and `destroy_` calls use the same engine type. - destroy_tls_engine::(); - tls_flush(&reporter); - }) - .build_future_pool(); - SchedPool { pool } + let builder = |pool_size: usize, name_prefix: &str| { + let engine = Arc::new(Mutex::new(engine.clone())); + let feature_gate = feature_gate.clone(); + let reporter = reporter.clone(); + // for low cpu quota env, set the max-thread-count as 4 to allow potential cases + // that we need more thread than cpu num. + let max_pool_size = std::cmp::max( + pool_size, + std::cmp::max(4, SysQuota::cpu_cores_quota() as usize), + ); + YatpPoolBuilder::new(SchedTicker {reporter:reporter.clone()}) + .thread_count(1, pool_size, max_pool_size) + .name_prefix(name_prefix) + // Safety: by setting `after_start` and `before_stop`, `FuturePool` ensures + // the tls_engine invariants. + .after_start(move || { + set_tls_engine(engine.lock().unwrap().clone()); + set_io_type(IoType::ForegroundWrite); + TLS_FEATURE_GATE.with(|c| *c.borrow_mut() = feature_gate.clone()); + }) + .before_stop(move || unsafe { + // Safety: we ensure the `set_` and `destroy_` calls use the same engine type. + destroy_tls_engine::(); + tls_flush(&reporter); + }) + }; + if let Some(ref r) = resource_ctl { + SchedPool::Priority { + worker_pool: builder(pool_size, "sched-worker-pool") + .build_priority_future_pool(r.clone()), + resource_ctl: r.clone(), + } + } else { + SchedPool::Vanilla { + worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), + high_worker_pool: builder(std::cmp::max(1, pool_size / 2), "sched-high-pri-pool") + .build_future_pool(), + } + } + } + + pub fn spawn( + &self, + group_name: &str, + priority: CommandPri, + f: impl futures::Future + Send + 'static, + ) -> Result<(), Full> { + match self { + SchedPool::Vanilla { + high_worker_pool, + worker_pool, + } => { + if priority == CommandPri::High { + high_worker_pool.spawn(f) + } else { + worker_pool.spawn(f) + } + } + SchedPool::Priority { + worker_pool, + resource_ctl, + } => { + let fixed_level = match priority { + CommandPri::High => Some(0), + CommandPri::Normal => None, + CommandPri::Low => Some(2), + }; + // TODO: maybe use a better way to generate task_id + let task_id = rand::random::(); + let mut extras = Extras::new_multilevel(task_id, fixed_level); + extras.set_metadata(group_name.as_bytes().to_owned()); + worker_pool.spawn_with_extras( + ControlledFuture::new( + async move { + f.await; + }, + resource_ctl.clone(), + group_name.as_bytes().to_owned(), + ), + extras, + ) + } + } + } + + pub fn scale_pool_size(&self, pool_size: usize) { + match self { + SchedPool::Vanilla { + high_worker_pool, + worker_pool, + } => { + high_worker_pool.scale_pool_size(std::cmp::max(1, pool_size / 2)); + worker_pool.scale_pool_size(pool_size); + } + SchedPool::Priority { worker_pool, .. } => { + worker_pool.scale_pool_size(pool_size); + } + } + } + + pub fn get_pool_size(&self, priority: CommandPri) -> usize { + match self { + SchedPool::Vanilla { + high_worker_pool, + worker_pool, + } => { + if priority == CommandPri::High { + high_worker_pool.get_pool_size() + } else { + worker_pool.get_pool_size() + } + } + SchedPool::Priority { worker_pool, .. } => worker_pool.get_pool_size(), + } } } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index d96e3e7c97f..17110a07e7b 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath -//! Scheduler which schedules the execution of `storage::Command`s. +//! TxnScheduler which schedules the execution of `storage::Command`s. //! //! There is one scheduler for each store. It receives commands from clients, //! executes them against the MVCC layer storage engine. @@ -12,16 +12,16 @@ //! leader. When the client read or write a row, the command is sent to the //! scheduler which is on the region leader's store. //! -//! Scheduler runs in a single-thread event loop, but command executions are +//! TxnScheduler runs in a single-thread event loop, but command executions are //! delegated to a pool of worker thread. //! -//! Scheduler keeps track of all the running commands and uses latches to ensure -//! serialized access to the overlapping rows involved in concurrent commands. -//! But note that scheduler only ensures serialized access to the overlapping -//! rows at command level, but a transaction may consist of multiple commands, -//! therefore conflicts may happen at transaction level. Transaction semantics -//! is ensured by the transaction protocol implemented in the client library, -//! which is transparent to the scheduler. +//! TxnScheduler keeps track of all the running commands and uses latches to +//! ensure serialized access to the overlapping rows involved in concurrent +//! commands. But note that scheduler only ensures serialized access to the +//! overlapping rows at command level, but a transaction may consist of multiple +//! commands, therefore conflicts may happen at transaction level. Transaction +//! semantics is ensured by the transaction protocol implemented in the client +//! library, which is transparent to the scheduler. use std::{ marker::PhantomData, @@ -47,12 +47,11 @@ use kvproto::{ use parking_lot::{Mutex, MutexGuard, RwLockWriteGuard}; use pd_client::{Feature, FeatureGate}; use raftstore::store::TxnExt; +use resource_control::ResourceController; use resource_metering::{FutureExt, ResourceTagFactory}; use smallvec::{smallvec, SmallVec}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData, WriteEvent}; -use tikv_util::{ - deadline::Deadline, quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE, -}; +use tikv_util::{quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE}; use tracker::{get_tls_tracker_token, set_tls_tracker_token, TrackerToken}; use txn_types::TimeStamp; @@ -239,7 +238,7 @@ impl SchedulerTaskCallback { } } -struct SchedulerInner { +struct TxnSchedulerInner { // slot_id -> { cid -> `TaskContext` } in the slot. task_slots: Vec>>>, @@ -251,11 +250,8 @@ struct SchedulerInner { sched_pending_write_threshold: usize, - // worker pool - worker_pool: SchedPool, - - // high priority commands and system commands will be delivered to this pool - high_priority_pool: SchedPool, + // all tasks are executed in this pool + sched_worker_pool: SchedPool, // used to control write flow running_write_bytes: CachePadded, @@ -292,7 +288,7 @@ fn id_index(cid: u64) -> usize { cid as usize % TASKS_SLOTS_NUM } -impl SchedulerInner { +impl TxnSchedulerInner { /// Generates the next command ID. #[inline] fn gen_id(&self) -> u64 { @@ -375,19 +371,23 @@ impl SchedulerInner { /// /// Returns a deadline error if the deadline is exceeded. Returns the `Task` /// if all latches are acquired, returns `None` otherwise. - fn acquire_lock_on_wakeup(&self, cid: u64) -> Result, StorageError> { + fn acquire_lock_on_wakeup( + &self, + cid: u64, + ) -> Result, (String, CommandPri, StorageError)> { let mut task_slot = self.get_task_slot(cid); let tctx = task_slot.get_mut(&cid).unwrap(); // Check deadline early during acquiring latches to avoid expired requests // blocking other requests. - if let Err(e) = tctx.task.as_ref().unwrap().cmd.deadline().check() { + let cmd = &tctx.task.as_ref().unwrap().cmd; + if let Err(e) = cmd.deadline().check() { // `acquire_lock_on_wakeup` is called when another command releases its locks // and wakes up command `cid`. This command inserted its lock before // and now the lock is at the front of the queue. The actual // acquired count is one more than the `owned_count` recorded in the // lock, so we increase one to make `release` work. tctx.lock.owned_count += 1; - return Err(e.into()); + return Err((cmd.group_name(), cmd.priority(), e.into())); } if self.latches.acquire(&mut tctx.lock, cid) { tctx.on_schedule(); @@ -401,25 +401,22 @@ impl SchedulerInner { } fn scale_pool_size(&self, pool_size: usize) { - self.worker_pool.pool.scale_pool_size(pool_size); - self.high_priority_pool - .pool - .scale_pool_size(std::cmp::max(1, pool_size / 2)); + self.sched_worker_pool.scale_pool_size(pool_size); } } -/// Scheduler which schedules the execution of `storage::Command`s. +/// TxnScheduler which schedules the execution of `storage::Command`s. #[derive(Clone)] -pub struct Scheduler { - inner: Arc>, +pub struct TxnScheduler { + inner: Arc>, // The engine can be fetched from the thread local storage of scheduler threads. // So, we don't store the engine here. _engine: PhantomData, } -unsafe impl Send for Scheduler {} +unsafe impl Send for TxnScheduler {} -impl Scheduler { +impl TxnScheduler { /// Creates a scheduler. pub(in crate::storage) fn new( engine: E, @@ -433,6 +430,7 @@ impl Scheduler { resource_tag_factory: ResourceTagFactory, quota_limiter: Arc, feature_gate: FeatureGate, + resource_ctl: Option>, ) -> Self { let t = Instant::now_coarse(); let mut task_slots = Vec::with_capacity(TASKS_SLOTS_NUM); @@ -442,25 +440,18 @@ impl Scheduler { let lock_wait_queues = LockWaitQueues::new(lock_mgr.clone()); - let inner = Arc::new(SchedulerInner { + let inner = Arc::new(TxnSchedulerInner { task_slots, id_alloc: AtomicU64::new(0).into(), latches: Latches::new(config.scheduler_concurrency), running_write_bytes: AtomicUsize::new(0).into(), sched_pending_write_threshold: config.scheduler_pending_write_threshold.0 as usize, - worker_pool: SchedPool::new( - engine.clone(), - config.scheduler_worker_pool_size, - reporter.clone(), - feature_gate.clone(), - "sched-worker-pool", - ), - high_priority_pool: SchedPool::new( + sched_worker_pool: SchedPool::new( engine, - std::cmp::max(1, config.scheduler_worker_pool_size / 2), + config.scheduler_worker_pool_size, reporter, feature_gate.clone(), - "sched-high-pri-pool", + resource_ctl, ), control_mutex: Arc::new(tokio::sync::Mutex::new(false)), lock_mgr, @@ -481,7 +472,7 @@ impl Scheduler { t.saturating_elapsed(), "initialized the transaction scheduler" ); - Scheduler { + TxnScheduler { inner, _engine: PhantomData, } @@ -561,26 +552,19 @@ impl Scheduler { return; } let task = tctx.task.as_ref().unwrap(); - let deadline = task.cmd.deadline(); - let cmd_ctx = task.cmd.ctx().clone(); - self.fail_fast_or_check_deadline(cid, tag, cmd_ctx, deadline); + self.fail_fast_or_check_deadline(cid, &task.cmd); fail_point!("txn_scheduler_acquire_fail"); } - fn fail_fast_or_check_deadline( - &self, - cid: u64, - tag: CommandKind, - cmd_ctx: Context, - deadline: Deadline, - ) { + fn fail_fast_or_check_deadline(&self, cid: u64, cmd: &Command) { + let tag = cmd.tag(); + let ctx = cmd.ctx().clone(); + let deadline = cmd.deadline(); let sched = self.clone(); - self.inner - .high_priority_pool - .pool - .spawn(async move { + self.get_sched_pool() + .spawn(&cmd.group_name(), cmd.priority(), async move { match unsafe { - with_tls_engine(|engine: &mut E| engine.precheck_write_with_ctx(&cmd_ctx)) + with_tls_engine(|engine: &mut E| engine.precheck_write_with_ctx(&ctx)) } { // Precheck failed, try to return err early. Err(e) => { @@ -632,14 +616,12 @@ impl Scheduler { self.execute(task); } Ok(None) => {} - Err(err) => { + Err((group_name, pri, err)) => { // Spawn the finish task to the pool to avoid stack overflow // when many queuing tasks fail successively. let this = self.clone(); - self.inner - .worker_pool - .pool - .spawn(async move { + self.get_sched_pool() + .spawn(&group_name, pri, async move { this.finish_with_err(cid, err); }) .unwrap(); @@ -670,21 +652,17 @@ impl Scheduler { } // pub for test - pub fn get_sched_pool(&self, priority: CommandPri) -> &SchedPool { - if priority == CommandPri::High { - &self.inner.high_priority_pool - } else { - &self.inner.worker_pool - } + pub fn get_sched_pool(&self) -> &SchedPool { + &self.inner.sched_worker_pool } /// Executes the task in the sched pool. fn execute(&self, mut task: Task) { set_tls_tracker_token(task.tracker); let sched = self.clone(); - self.get_sched_pool(task.cmd.priority()) - .pool - .spawn(async move { + + self.get_sched_pool() + .spawn(&task.cmd.group_name(), task.cmd.priority(), async move { fail_point!("scheduler_start_execute"); if sched.check_task_deadline_exceeded(&task) { return; @@ -800,6 +778,7 @@ impl Scheduler { async_apply_prewrite: bool, new_acquired_locks: Vec, tag: CommandKind, + group_name: &str, ) { // TODO: Does async apply prewrite worth a special metric here? if pipelined { @@ -847,7 +826,7 @@ impl Scheduler { assert!(pipelined || async_apply_prewrite); } - self.on_acquired_locks_finished(new_acquired_locks); + self.on_acquired_locks_finished(group_name, new_acquired_locks); if do_wake_up { let woken_up_resumable_lock_requests = tctx.woken_up_resumable_lock_requests; @@ -932,7 +911,11 @@ impl Scheduler { ); } - fn on_release_locks(&self, released_locks: ReleasedLocks) -> SVec> { + fn on_release_locks( + &self, + group_name: &str, + released_locks: ReleasedLocks, + ) -> SVec> { // This function is always called when holding the latch of the involved keys. // So if we found the lock waiting queues are empty, there's no chance // that other threads/commands adds new lock-wait entries to the keys @@ -973,13 +956,21 @@ impl Scheduler { }); if !legacy_wake_up_list.is_empty() || !delay_wake_up_futures.is_empty() { - self.wake_up_legacy_pessimistic_locks(legacy_wake_up_list, delay_wake_up_futures); + self.wake_up_legacy_pessimistic_locks( + group_name, + legacy_wake_up_list, + delay_wake_up_futures, + ); } resumable_wake_up_list } - fn on_acquired_locks_finished(&self, new_acquired_locks: Vec) { + fn on_acquired_locks_finished( + &self, + group_name: &str, + new_acquired_locks: Vec, + ) { if new_acquired_locks.is_empty() || self.inner.lock_wait_queues.is_empty() { return; } @@ -992,9 +983,8 @@ impl Scheduler { .update_lock_wait(new_acquired_locks); } else { let lock_wait_queues = self.inner.lock_wait_queues.clone(); - self.get_sched_pool(CommandPri::High) - .pool - .spawn(async move { + self.get_sched_pool() + .spawn(group_name, CommandPri::High, async move { lock_wait_queues.update_lock_wait(new_acquired_locks); }) .unwrap(); @@ -1003,15 +993,16 @@ impl Scheduler { fn wake_up_legacy_pessimistic_locks( &self, + group_name: &str, legacy_wake_up_list: impl IntoIterator, ReleasedLock)> + Send + 'static, delayed_wake_up_futures: impl IntoIterator + Send + 'static, ) { let self1 = self.clone(); - self.get_sched_pool(CommandPri::High) - .pool - .spawn(async move { + let group_name1 = group_name.to_owned(); + self.get_sched_pool() + .spawn(group_name, CommandPri::High, async move { for (lock_info, released_lock) in legacy_wake_up_list { let cb = lock_info.key_cb.unwrap().into_inner(); let e = StorageError::from(Error::from(MvccError::from( @@ -1030,9 +1021,8 @@ impl Scheduler { for f in delayed_wake_up_futures { let self2 = self1.clone(); self1 - .get_sched_pool(CommandPri::High) - .pool - .spawn(async move { + .get_sched_pool() + .spawn(&group_name1, CommandPri::High, async move { let res = f.await; if let Some(resumable_lock_wait_entry) = res { self2.schedule_awakened_pessimistic_locks( @@ -1121,7 +1111,7 @@ impl Scheduler { } /// Processes a read command within a worker thread, then posts - /// `ReadFinished` message back to the `Scheduler`. + /// `ReadFinished` message back to the `TxnScheduler`. fn process_read(self, snapshot: E::Snap, task: Task, statistics: &mut Statistics) { fail_point!("txn_before_process_read"); debug!("process read cmd in worker pool"; "cid" => task.cid); @@ -1144,12 +1134,13 @@ impl Scheduler { /// Processes a write command within a worker thread, then posts either a /// `WriteFinished` message if successful or a `FinishedWithErr` message - /// back to the `Scheduler`. + /// back to the `TxnScheduler`. async fn process_write(self, snapshot: E::Snap, task: Task, statistics: &mut Statistics) { fail_point!("txn_before_process_write"); let write_bytes = task.cmd.write_bytes(); let tag = task.cmd.tag(); let cid = task.cid; + let group_name = task.cmd.group_name(); let tracker = task.tracker; let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); @@ -1285,7 +1276,7 @@ impl Scheduler { } let woken_up_resumable_entries = if !released_locks.is_empty() { - scheduler.on_release_locks(released_locks) + scheduler.on_release_locks(&group_name, released_locks) } else { smallvec![] }; @@ -1306,6 +1297,7 @@ impl Scheduler { false, new_acquired_locks, tag, + &group_name, ); return; } @@ -1336,6 +1328,7 @@ impl Scheduler { false, new_acquired_locks, tag, + &group_name, ); return; } @@ -1522,6 +1515,7 @@ impl Scheduler { is_async_apply_prewrite, new_acquired_locks, tag, + &group_name, ); KV_COMMAND_KEYWRITE_HISTOGRAM_VEC .get(tag) @@ -1828,7 +1822,7 @@ mod tests { } // TODO(cosven): use this in the following test cases to reduce duplicate code. - fn new_test_scheduler() -> (Scheduler, RocksEngine) { + fn new_test_scheduler() -> (TxnScheduler, RocksEngine) { let engine = TestEngineBuilder::new().build().unwrap(); let config = Config { scheduler_concurrency: 1024, @@ -1838,7 +1832,7 @@ mod tests { ..Default::default() }; ( - Scheduler::new( + TxnScheduler::new( engine.clone(), MockLockManager::new(), ConcurrencyManager::new(1.into()), @@ -1854,6 +1848,7 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), latest_feature_gate(), + Some(Arc::new(ResourceController::new("test".to_owned(), true))), ), engine, ) @@ -1978,31 +1973,7 @@ mod tests { #[test] fn test_acquire_latch_deadline() { - let engine = TestEngineBuilder::new().build().unwrap(); - let config = Config { - scheduler_concurrency: 1024, - scheduler_worker_pool_size: 1, - scheduler_pending_write_threshold: ReadableSize(100 * 1024 * 1024), - enable_async_apply_prewrite: false, - ..Default::default() - }; - let scheduler = Scheduler::new( - engine, - MockLockManager::new(), - ConcurrencyManager::new(1.into()), - &config, - DynamicConfigs { - pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), - in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), - wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), - }, - Arc::new(FlowController::Singleton(EngineFlowController::empty())), - None, - DummyReporter, - ResourceTagFactory::new_for_test(), - Arc::new(QuotaLimiter::default()), - latest_feature_gate(), - ); + let (scheduler, _) = new_test_scheduler(); let mut lock = Lock::new(&[Key::from_raw(b"b")]); let cid = scheduler.inner.gen_id(); @@ -2084,38 +2055,15 @@ mod tests { #[test] fn test_pool_available_deadline() { - let engine = TestEngineBuilder::new().build().unwrap(); - let config = Config { - scheduler_concurrency: 1024, - scheduler_worker_pool_size: 1, - scheduler_pending_write_threshold: ReadableSize(100 * 1024 * 1024), - enable_async_apply_prewrite: false, - ..Default::default() - }; - let scheduler = Scheduler::new( - engine, - MockLockManager::new(), - ConcurrencyManager::new(1.into()), - &config, - DynamicConfigs { - pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), - in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), - wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), - }, - Arc::new(FlowController::Singleton(EngineFlowController::empty())), - None, - DummyReporter, - ResourceTagFactory::new_for_test(), - Arc::new(QuotaLimiter::default()), - latest_feature_gate(), - ); + let (scheduler, _) = new_test_scheduler(); // Spawn a task that sleeps for 500ms to occupy the pool. The next request // cannot run within 500ms. scheduler - .get_sched_pool(CommandPri::Normal) - .pool - .spawn(async { thread::sleep(Duration::from_millis(500)) }) + .get_sched_pool() + .spawn("", CommandPri::Normal, async { + thread::sleep(Duration::from_millis(500)) + }) .unwrap(); let mut req = BatchRollbackRequest::default(); @@ -2144,31 +2092,7 @@ mod tests { #[test] fn test_flow_control_trottle_deadline() { - let engine = TestEngineBuilder::new().build().unwrap(); - let config = Config { - scheduler_concurrency: 1024, - scheduler_worker_pool_size: 1, - scheduler_pending_write_threshold: ReadableSize(100 * 1024 * 1024), - enable_async_apply_prewrite: false, - ..Default::default() - }; - let scheduler = Scheduler::new( - engine, - MockLockManager::new(), - ConcurrencyManager::new(1.into()), - &config, - DynamicConfigs { - pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), - in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), - wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), - }, - Arc::new(FlowController::Singleton(EngineFlowController::empty())), - None, - DummyReporter, - ResourceTagFactory::new_for_test(), - Arc::new(QuotaLimiter::default()), - latest_feature_gate(), - ); + let (scheduler, _) = new_test_scheduler(); let mut req = CheckTxnStatusRequest::default(); req.mut_context().max_execution_duration_ms = 100; @@ -2212,31 +2136,7 @@ mod tests { #[test] fn test_accumulate_many_expired_commands() { - let engine = TestEngineBuilder::new().build().unwrap(); - let config = Config { - scheduler_concurrency: 1024, - scheduler_worker_pool_size: 1, - scheduler_pending_write_threshold: ReadableSize(100 * 1024 * 1024), - enable_async_apply_prewrite: false, - ..Default::default() - }; - let scheduler = Scheduler::new( - engine, - MockLockManager::new(), - ConcurrencyManager::new(1.into()), - &config, - DynamicConfigs { - pipelined_pessimistic_lock: Arc::new(AtomicBool::new(true)), - in_memory_pessimistic_lock: Arc::new(AtomicBool::new(false)), - wake_up_delay_duration_ms: Arc::new(AtomicU64::new(0)), - }, - Arc::new(FlowController::Singleton(EngineFlowController::empty())), - None, - DummyReporter, - ResourceTagFactory::new_for_test(), - Arc::new(QuotaLimiter::default()), - latest_feature_gate(), - ); + let (scheduler, _) = new_test_scheduler(); let mut lock = Lock::new(&[Key::from_raw(b"b")]); let cid = scheduler.inner.gen_id(); @@ -2283,7 +2183,7 @@ mod tests { let feature_gate = FeatureGate::default(); feature_gate.set_version("6.0.0").unwrap(); - let scheduler = Scheduler::new( + let scheduler = TxnScheduler::new( engine, MockLockManager::new(), ConcurrencyManager::new(1.into()), @@ -2299,6 +2199,7 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), feature_gate.clone(), + Some(Arc::new(ResourceController::new("test".to_owned(), true))), ); // Use sync mode if pipelined_pessimistic_lock is false. assert_eq!(scheduler.pessimistic_lock_mode(), PessimisticLockMode::Sync); diff --git a/tests/Cargo.toml b/tests/Cargo.toml index ae6c6984487..1cc0e6bce87 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -95,6 +95,7 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code raft_log_engine = { workspace = true } raftstore = { workspace = true } rand = "0.8.3" +resource_control = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tempfile = "3.0" diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 2508b544285..1a7d44db972 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -312,10 +312,7 @@ fn test_scale_scheduler_pool() { .update_config("storage.scheduler-worker-pool-size", &format!("{}", size)) .unwrap(); assert_eq!( - scheduler - .get_sched_pool(CommandPri::Normal) - .pool - .get_pool_size(), + scheduler.get_sched_pool().get_pool_size(CommandPri::Normal), size ); }; From 7240e5778ef3c379b0f898c103dc675fad7af099 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 18 Jan 2023 11:47:50 +0800 Subject: [PATCH 0461/1149] fix docker build (#13937) ref tikv/tikv#11312 Fix `make docker`. Signed-off-by: tabokie --- Dockerfile | 11 ++++++++--- cmd/build.rs | 4 +++- components/profiler/Cargo.toml | 1 + scripts/check-docker-build | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index c4ad36dc6e7..aefa51b2222 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,6 +50,11 @@ RUN ln -s /usr/bin/cmake3 /usr/bin/cmake ENV LIBRARY_PATH /usr/local/lib:$LIBRARY_PATH ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH +# Install protoc +RUN curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip" +RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/ +ENV PATH /usr/local/bin/:$PATH + # Install Rustup RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y ENV PATH /root/.cargo/bin/:$PATH @@ -72,8 +77,7 @@ RUN mkdir -p ./cmd/tikv-ctl/src ./cmd/tikv-server/src && \ echo 'fn main() {}' > ./cmd/tikv-ctl/src/main.rs && \ echo 'fn main() {}' > ./cmd/tikv-server/src/main.rs && \ for cargotoml in $(find . -type f -name "Cargo.toml"); do \ - sed -i '/fuzz/d' ${cargotoml} && \ - sed -i '/profiler/d' ${cargotoml} ; \ + sed -i '/fuzz/d' ${cargotoml} ; \ done COPY Makefile ./ @@ -105,8 +109,9 @@ FROM pingcap/alpine-glibc COPY --from=builder /tikv/target/release/tikv-server /tikv-server COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl +# FIXME: Figure out why libstdc++ is not staticly linked. RUN apk add --no-cache \ - curl + curl libstdc++ EXPOSE 20160 20180 diff --git a/cmd/build.rs b/cmd/build.rs index 6d11a38f705..c19797d9227 100644 --- a/cmd/build.rs +++ b/cmd/build.rs @@ -32,7 +32,9 @@ fn link_sys_lib(lib: &str, tool: &cc::Tool) { } // remove lib prefix and .a postfix. let libname = &lib[3..lib.len() - 2]; - println!("cargo:rustc-link-lib=static:+whole-archive={}", &libname); + // Get around the issue "the linking modifiers `+bundle` and `+whole-archive` + // are not compatible with each other when generating rlibs" + println!("cargo:rustc-link-lib=static:-bundle,+whole-archive={}", &libname); println!( "cargo:rustc-link-search=native={}", path.parent().unwrap().display() diff --git a/components/profiler/Cargo.toml b/components/profiler/Cargo.toml index b0c456b209f..e5583a631d5 100644 --- a/components/profiler/Cargo.toml +++ b/components/profiler/Cargo.toml @@ -18,4 +18,5 @@ valgrind_request = { version = "1.1.0", optional = true } [[example]] name = "prime" +path = "examples/prime.rs" required-features = ["profiling"] diff --git a/scripts/check-docker-build b/scripts/check-docker-build index 6a505f31a89..0eee0c5cf1f 100755 --- a/scripts/check-docker-build +++ b/scripts/check-docker-build @@ -2,7 +2,7 @@ # This script checks if all cargo targets have path specifications. set -euo pipefail -for i in $(git ls-files | grep 'Cargo.toml' | grep -v 'fuzz/\|./profiler/'); do +for i in $(git ls-files | grep 'Cargo.toml' | grep -v 'fuzz/'); do for target in "test" "bench" "bin" "example"; do # from "[[test]]" to the first trailing empty line matches=$(sed -n "/\[\[$target\]\]/,/^$/ p" $i) From b35d4fb33a18c5be9136c790e01ca449075e6acb Mon Sep 17 00:00:00 2001 From: Hu# Date: Wed, 18 Jan 2023 14:57:51 +0800 Subject: [PATCH 0462/1149] pd_client: fix the kvproto compatibility (#14064) close tikv/tikv#14063 make sure kvproto compatibility Signed-off-by: husharp --- Cargo.lock | 2 +- components/error_code/src/pd.rs | 1 + components/pd_client/src/client.rs | 6 +----- components/pd_client/src/client_v2.rs | 6 +----- components/pd_client/src/errors.rs | 4 ++++ components/pd_client/src/util.rs | 1 + components/resource_control/src/resource_group.rs | 8 ++++---- etc/error_code.toml | 5 +++++ 8 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab1d164a1e0..a2924314f8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2727,7 +2727,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#a14c44ef44b378d15adb5baad8402b838f031b51" +source = "git+https://github.com/pingcap/kvproto.git#adcf4c414bfd0ccf18436b377430aa2450fd4c81" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/pd.rs b/components/error_code/src/pd.rs index 3ca2ac0b29f..782c4f3923b 100644 --- a/components/error_code/src/pd.rs +++ b/components/error_code/src/pd.rs @@ -12,5 +12,6 @@ define_error_codes!( REGION_NOT_FOUND => ("RegionNotFound", "", ""), STORE_TOMBSTONE => ("StoreTombstone", "", ""), GLOBAL_CONFIG_NOT_FOUND => ("GlobalConfigNotFound","",""), + DATA_COMPACTED => ("DataCompacted","",""), UNKNOWN => ("Unknown", "", "") ); diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 5bccdcfacea..1e1e5980908 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -302,11 +302,7 @@ impl PdClient for RpcClient { Ok(grpc_response) => { let mut res = HashMap::with_capacity(grpc_response.get_items().len()); for c in grpc_response.get_items() { - if c.has_error() { - error!("failed to load global config with key {:?}", c.get_error()); - } else { - res.insert(c.get_name().to_owned(), c.get_value().to_owned()); - } + res.insert(c.get_name().to_owned(), c.get_value().to_owned()); } Ok(res) } diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index b42d8fb3ddb..35e5c3b4785 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -803,11 +803,7 @@ impl PdClient for RpcClient { Ok(grpc_response) => { let mut res = HashMap::with_capacity(grpc_response.get_items().len()); for c in grpc_response.get_items() { - if c.has_error() { - error!("failed to load global config with key {:?}", c.get_error()); - } else { - res.insert(c.get_name().to_owned(), c.get_value().to_owned()); - } + res.insert(c.get_name().to_owned(), c.get_value().to_owned()); } Ok(res) } diff --git a/components/pd_client/src/errors.rs b/components/pd_client/src/errors.rs index 61adceec391..689cb276064 100644 --- a/components/pd_client/src/errors.rs +++ b/components/pd_client/src/errors.rs @@ -26,6 +26,8 @@ pub enum Error { StoreTombstone(String), #[error("global config item {0} not found")] GlobalConfigNotFound(String), + #[error("required watch revision is smaller than current compact/min revision. {0:?}")] + DataCompacted(String), } pub type Result = result::Result; @@ -38,6 +40,7 @@ impl Error { | Error::RegionNotFound(_) | Error::StoreTombstone(_) | Error::GlobalConfigNotFound(_) + | Error::DataCompacted(_) | Error::ClusterBootstrapped(_) | Error::Incompatible => false, } @@ -55,6 +58,7 @@ impl ErrorCodeExt for Error { Error::RegionNotFound(_) => error_code::pd::REGION_NOT_FOUND, Error::StoreTombstone(_) => error_code::pd::STORE_TOMBSTONE, Error::GlobalConfigNotFound(_) => error_code::pd::GLOBAL_CONFIG_NOT_FOUND, + Error::DataCompacted(_) => error_code::pd::DATA_COMPACTED, Error::Other(_) => error_code::pd::UNKNOWN, } } diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 72c8cc16b04..fd58cd921d8 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -873,6 +873,7 @@ pub fn check_resp_header(header: &ResponseHeader) -> Result<()> { ErrorType::GlobalConfigNotFound => { Err(Error::GlobalConfigNotFound(err.get_message().to_owned())) } + ErrorType::DataCompacted => Err(Error::DataCompacted(err.get_message().to_owned())), ErrorType::Ok => Ok(()), ErrorType::DuplicatedEntry | ErrorType::EntryNotFound => Err(box_err!(err.get_message())), ErrorType::Unknown => Err(box_err!(err.get_message())), diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 70f89fd1a9d..bfe9d92d0f3 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -51,12 +51,12 @@ impl ResourceGroupManager { // TODO: currently we only consider the cpu usage in the read path, we may also take // io read bytes into account later. (GroupMode::RawMode, true) => rg - .get_resource_settings() + .get_raw_resource_settings() .get_cpu() .get_settings() .get_fill_rate(), (GroupMode::RawMode, false) => rg - .get_resource_settings() + .get_raw_resource_settings() .get_io_write() .get_settings() .get_fill_rate(), @@ -327,7 +327,7 @@ mod tests { .set_fill_rate(write_tokens); group.set_r_u_settings(ru_setting); } else { - let mut resource_setting = GroupResourceSettings::new(); + let mut resource_setting = GroupRawResourceSettings::new(); resource_setting .mut_cpu() .mut_settings() @@ -336,7 +336,7 @@ mod tests { .mut_io_write() .mut_settings() .set_fill_rate(write_tokens); - group.set_resource_settings(resource_setting); + group.set_raw_resource_settings(resource_setting); } group } diff --git a/etc/error_code.toml b/etc/error_code.toml index 5cdd770f8d2..6b361e29e37 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -263,6 +263,11 @@ error = ''' KV:Pd:GlobalConfigNotFound ''' +["KV:Pd:DataCompacted"] +error = ''' +KV:Pd:DataCompacted +''' + ["KV:Pd:Unknown"] error = ''' KV:Pd:Unknown From 15445fd8a9c6832afeaf335a84c334fa13f6ecfe Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 19 Jan 2023 11:23:49 +0800 Subject: [PATCH 0463/1149] raftstore-v2: add more features to pd worker v2 (#14003) ref tikv/tikv#12842 Signed-off-by: tabokie --- components/engine_panic/src/misc.rs | 4 + components/engine_panic/src/snapshot.rs | 10 +- components/engine_rocks/src/misc.rs | 12 + components/engine_rocks/src/snapshot.rs | 10 +- components/engine_traits/src/misc.rs | 2 + components/engine_traits/src/snapshot.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 17 +- components/raftstore-v2/src/lib.rs | 2 +- .../pd/{update_max_timestamp.rs => misc.rs} | 13 + components/raftstore-v2/src/worker/pd/mod.rs | 223 ++++++++-- .../pd/{region_heartbeat.rs => region.rs} | 180 +++++++- .../raftstore-v2/src/worker/pd/split.rs | 85 +++- .../pd/{store_heartbeat.rs => store.rs} | 11 + .../tests/integrations/cluster.rs | 6 +- .../src/coprocessor/consistency_check.rs | 4 +- components/raftstore/src/store/fsm/store.rs | 5 +- components/raftstore/src/store/mod.rs | 9 +- components/raftstore/src/store/worker/mod.rs | 5 +- components/raftstore/src/store/worker/pd.rs | 408 ++++++++---------- components/server/src/server2.rs | 68 ++- components/test_raftstore/src/util.rs | 5 +- src/server/raftkv2/node.rs | 14 +- 22 files changed, 781 insertions(+), 316 deletions(-) rename components/raftstore-v2/src/worker/pd/{update_max_timestamp.rs => misc.rs} (89%) rename components/raftstore-v2/src/worker/pd/{region_heartbeat.rs => region.rs} (58%) rename components/raftstore-v2/src/worker/pd/{store_heartbeat.rs => store.rs} (96%) diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 5e6fbe87267..93218767ec0 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -92,6 +92,10 @@ impl MiscExt for PanicEngine { panic!() } + fn get_num_keys(&self) -> Result { + panic!() + } + fn get_range_entries_and_versions( &self, cf: &str, diff --git a/components/engine_panic/src/snapshot.rs b/components/engine_panic/src/snapshot.rs index 296d7ce617a..f6cda5312cb 100644 --- a/components/engine_panic/src/snapshot.rs +++ b/components/engine_panic/src/snapshot.rs @@ -2,7 +2,9 @@ use std::ops::Deref; -use engine_traits::{IterOptions, Iterable, Iterator, Peekable, ReadOptions, Result, Snapshot}; +use engine_traits::{ + CfNamesExt, IterOptions, Iterable, Iterator, Peekable, ReadOptions, Result, Snapshot, +}; use crate::{db_vector::PanicDbVector, engine::PanicEngine}; @@ -36,6 +38,12 @@ impl Iterable for PanicSnapshot { } } +impl CfNamesExt for PanicSnapshot { + fn cf_names(&self) -> Vec<&str> { + panic!() + } +} + pub struct PanicSnapshotIterator; impl Iterator for PanicSnapshotIterator { diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index e339facaac4..3477226ae76 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -332,6 +332,18 @@ impl MiscExt for RocksEngine { .get_property_int_cf(handle, ROCKSDB_TOTAL_SST_FILES_SIZE)) } + fn get_num_keys(&self) -> Result { + let mut total = 0; + for cf in self.cf_names() { + let handle = util::get_cf_handle(self.as_inner(), cf).unwrap(); + total += self + .as_inner() + .get_property_int_cf(handle, ROCKSDB_ESTIMATE_NUM_KEYS) + .unwrap_or_default(); + } + Ok(total) + } + fn get_range_entries_and_versions( &self, cf: &str, diff --git a/components/engine_rocks/src/snapshot.rs b/components/engine_rocks/src/snapshot.rs index b19a32fd739..60a12c4ac6d 100644 --- a/components/engine_rocks/src/snapshot.rs +++ b/components/engine_rocks/src/snapshot.rs @@ -5,7 +5,9 @@ use std::{ sync::Arc, }; -use engine_traits::{self, IterOptions, Iterable, Peekable, ReadOptions, Result, Snapshot}; +use engine_traits::{ + self, CfNamesExt, IterOptions, Iterable, Peekable, ReadOptions, Result, Snapshot, +}; use rocksdb::{rocksdb_options::UnsafeSnap, DBIterator, DB}; use crate::{ @@ -95,3 +97,9 @@ impl Peekable for RocksSnapshot { Ok(v.map(RocksDbVector::from_raw)) } } + +impl CfNamesExt for RocksSnapshot { + fn cf_names(&self) -> Vec<&str> { + self.db.cf_names() + } +} diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index d9a07a1a915..5bbcbb2de79 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -115,6 +115,8 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn get_total_sst_files_size_cf(&self, cf: &str) -> Result>; + fn get_num_keys(&self) -> Result; + fn get_range_entries_and_versions( &self, cf: &str, diff --git a/components/engine_traits/src/snapshot.rs b/components/engine_traits/src/snapshot.rs index 7907abd1445..a5829161e25 100644 --- a/components/engine_traits/src/snapshot.rs +++ b/components/engine_traits/src/snapshot.rs @@ -2,7 +2,7 @@ use std::fmt::Debug; -use crate::{iterable::Iterable, peekable::Peekable}; +use crate::{iterable::Iterable, peekable::Peekable, CfNamesExt}; /// A consistent read-only view of the database. /// @@ -10,6 +10,6 @@ use crate::{iterable::Iterable, peekable::Peekable}; /// clonable, call `into_sync` to create a `SyncSnapshot`. pub trait Snapshot where - Self: 'static + Peekable + Iterable + Send + Sync + Sized + Debug, + Self: 'static + Peekable + Iterable + CfNamesExt + Send + Sync + Sized + Debug, { } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index ccf3f19f3ea..280e8dcc396 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -26,10 +26,11 @@ use raftstore::{ store::{ fsm::store::{PeerTickBatch, ENTRY_CACHE_EVICT_TICK_DURATION}, local_metrics::RaftMetrics, - Config, ReadRunner, ReadTask, SplitCheckRunner, SplitCheckTask, StoreWriters, - TabletSnapManager, Transport, WriteSenders, + AutoSplitController, Config, ReadRunner, ReadTask, SplitCheckRunner, SplitCheckTask, + StoreWriters, TabletSnapManager, Transport, WriteSenders, }, }; +use resource_metering::CollectorRegHandle; use slog::{warn, Logger}; use tikv_util::{ box_err, @@ -511,6 +512,8 @@ impl StoreSystem { concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 coprocessor_host: CoprocessorHost, + auto_split_controller: AutoSplitController, + collector_reg_handle: CollectorRegHandle, background: Worker, pd_worker: LazyWorker, ) -> Result<()> @@ -526,7 +529,9 @@ impl StoreSystem { .broadcast_normal(|| PeerMsg::Tick(PeerTick::PdHeartbeat)); }); - let purge_worker = if raft_engine.need_manual_purge() { + let purge_worker = if raft_engine.need_manual_purge() + && !cfg.value().raft_engine_purge_interval.0.is_zero() + { let worker = Worker::new("purge-worker"); let raft_clone = raft_engine.clone(); let logger = self.logger.clone(); @@ -567,10 +572,14 @@ impl StoreSystem { workers.pd.remote(), concurrency_manager, causal_ts_provider, + workers.pd.scheduler(), + auto_split_controller, + store_meta.lock().unwrap().region_read_progress.clone(), + collector_reg_handle, self.logger.clone(), self.shutdown.clone(), cfg.clone(), - )); + )?); let split_check_scheduler = workers.background.start( "split-check", diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 7ddb1687d91..b82b6de3931 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -41,4 +41,4 @@ pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; -pub use worker::pd::{FlowReporter, Task as PdTask}; +pub use worker::pd::{PdReporter, Task as PdTask}; diff --git a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs b/components/raftstore-v2/src/worker/pd/misc.rs similarity index 89% rename from components/raftstore-v2/src/worker/pd/update_max_timestamp.rs rename to components/raftstore-v2/src/worker/pd/misc.rs index 178d00ebd15..68c624b089a 100644 --- a/components/raftstore-v2/src/worker/pd/update_max_timestamp.rs +++ b/components/raftstore-v2/src/worker/pd/misc.rs @@ -107,4 +107,17 @@ where self.remote.spawn(f); } } + + pub fn handle_report_min_resolved_ts(&mut self, store_id: u64, min_resolved_ts: u64) { + let resp = self + .pd_client + .report_min_resolved_ts(store_id, min_resolved_ts); + let logger = self.logger.clone(); + let f = async move { + if let Err(e) = resp.await { + warn!(logger, "report min resolved_ts failed"; "err" => ?e); + } + }; + self.remote.spawn(f); + } } diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index b54d088db66..b23d1500914 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -10,12 +10,14 @@ use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{metapb, pdpb}; -use pd_client::PdClient; +use pd_client::{BucketStat, PdClient}; use raftstore::store::{ - util::KeysInfoFormatter, Config, FlowStatsReporter, ReadStats, TabletSnapManager, TxnExt, - WriteStats, + util::KeysInfoFormatter, AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, + ReadStats, RegionReadProgressRegistry, SplitInfo, StoreStatsReporter, TabletSnapManager, + TxnExt, WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }; -use slog::{error, info, Logger}; +use resource_metering::{Collector, CollectorRegHandle, RawRecords}; +use slog::{error, Logger}; use tikv_util::{ config::VersionTrack, time::UnixSecs, @@ -28,22 +30,36 @@ use crate::{ router::{CmdResChannel, PeerMsg}, }; -mod region_heartbeat; +mod misc; +mod region; mod split; -mod store_heartbeat; -mod update_max_timestamp; +mod store; -pub use region_heartbeat::RegionHeartbeatTask; +pub use region::RegionHeartbeatTask; + +type RecordPairVec = Vec; pub enum Task { - RegionHeartbeat(RegionHeartbeatTask), + // In store.rs. StoreHeartbeat { stats: pdpb::StoreStats, // TODO: StoreReport, StoreDrAutoSyncStatus }, + UpdateStoreInfos { + cpu_usages: RecordPairVec, + read_io_rates: RecordPairVec, + write_io_rates: RecordPairVec, + }, + // In region.rs. + RegionHeartbeat(RegionHeartbeatTask), + ReportRegionBuckets(BucketStat), + UpdateReadStats(ReadStats), + UpdateWriteStats(WriteStats), + UpdateRegionCpuRecords(Arc), DestroyPeer { region_id: u64, }, + // In split.rs. AskBatchSplit { region: metapb::Region, split_keys: Vec>, @@ -54,24 +70,51 @@ pub enum Task { ReportBatchSplit { regions: Vec, }, + AutoSplit { + split_infos: Vec, + }, + // In misc.rs. UpdateMaxTimestamp { region_id: u64, initial_status: u64, txn_ext: Arc, }, + ReportMinResolvedTs { + store_id: u64, + min_resolved_ts: u64, + }, } impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { + Task::StoreHeartbeat { ref stats, .. } => { + write!(f, "store heartbeat stats: {stats:?}") + } + Task::UpdateStoreInfos { + ref cpu_usages, + ref read_io_rates, + ref write_io_rates, + } => write!( + f, + "get store's information: cpu_usages {:?}, read_io_rates {:?}, write_io_rates {:?}", + cpu_usages, read_io_rates, write_io_rates, + ), Task::RegionHeartbeat(ref hb_task) => write!( f, "region heartbeat for region {:?}, leader {}", hb_task.region, hb_task.peer.get_id(), ), - Task::StoreHeartbeat { ref stats, .. } => { - write!(f, "store heartbeat stats: {:?}", stats) + Task::ReportRegionBuckets(ref buckets) => write!(f, "report buckets: {:?}", buckets), + Task::UpdateReadStats(ref stats) => { + write!(f, "update read stats: {stats:?}") + } + Task::UpdateWriteStats(ref stats) => { + write!(f, "update write stats: {stats:?}") + } + Task::UpdateRegionCpuRecords(ref cpu_records) => { + write!(f, "get region cpu records: {:?}", cpu_records) } Task::DestroyPeer { ref region_id } => { write!(f, "destroy peer of region {}", region_id) @@ -87,11 +130,22 @@ impl Display for Task { KeysInfoFormatter(split_keys.iter()) ), Task::ReportBatchSplit { ref regions } => write!(f, "report split {:?}", regions), + Task::AutoSplit { ref split_infos } => { + write!(f, "auto split split regions, num is {}", split_infos.len()) + } Task::UpdateMaxTimestamp { region_id, .. } => write!( f, "update the max timestamp for region {} in the concurrency manager", region_id ), + Task::ReportMinResolvedTs { + store_id, + min_resolved_ts, + } => write!( + f, + "report min resolved ts: store {}, resolved ts {}", + store_id, min_resolved_ts, + ), } } } @@ -108,16 +162,18 @@ where tablet_registry: TabletRegistry, snap_mgr: TabletSnapManager, router: StoreRouter, + stats_monitor: PdStatsMonitor, remote: Remote, - region_peers: HashMap, - - // For store_heartbeat. + // For store. start_ts: UnixSecs, - store_stat: store_heartbeat::StoreStat, + store_stat: store::StoreStat, - // For region_heartbeat. + // For region. + region_peers: HashMap, + region_buckets: HashMap, + // region_id -> total_cpu_time_ms (since last region heartbeat) region_cpu_records: HashMap, is_hb_receiver_scheduled: bool, @@ -146,21 +202,38 @@ where remote: Remote, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 + pd_scheduler: Scheduler, + auto_split_controller: AutoSplitController, + region_read_progress: RegionReadProgressRegistry, + collector_reg_handle: CollectorRegHandle, logger: Logger, shutdown: Arc, cfg: Arc>, - ) -> Self { - Self { + ) -> Result { + let mut stats_monitor = PdStatsMonitor::new( + cfg.value().pd_store_heartbeat_tick_interval.0 / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + cfg.value().report_min_resolved_ts_interval.0, + PdReporter::new(pd_scheduler, logger.clone()), + ); + stats_monitor.start( + auto_split_controller, + region_read_progress, + collector_reg_handle, + store_id, + )?; + Ok(Self { store_id, pd_client, raft_engine, tablet_registry, snap_mgr, router, + stats_monitor, remote, - region_peers: HashMap::default(), start_ts: UnixSecs::zero(), - store_stat: store_heartbeat::StoreStat::default(), + store_stat: store::StoreStat::default(), + region_peers: HashMap::default(), + region_buckets: HashMap::default(), region_cpu_records: HashMap::default(), is_hb_receiver_scheduled: false, concurrency_manager, @@ -168,7 +241,7 @@ where logger, shutdown, cfg, - } + }) } } @@ -183,8 +256,17 @@ where fn run(&mut self, task: Task) { self.maybe_schedule_heartbeat_receiver(); match task { - Task::RegionHeartbeat(task) => self.handle_region_heartbeat(task), Task::StoreHeartbeat { stats } => self.handle_store_heartbeat(stats), + Task::UpdateStoreInfos { + cpu_usages, + read_io_rates, + write_io_rates, + } => self.handle_update_store_infos(cpu_usages, read_io_rates, write_io_rates), + Task::RegionHeartbeat(task) => self.handle_region_heartbeat(task), + Task::ReportRegionBuckets(buckets) => self.handle_report_region_buckets(buckets), + Task::UpdateReadStats(stats) => self.handle_update_read_stats(stats), + Task::UpdateWriteStats(stats) => self.handle_update_write_stats(stats), + Task::UpdateRegionCpuRecords(records) => self.handle_update_region_cpu_records(records), Task::DestroyPeer { region_id } => self.handle_destroy_peer(region_id), Task::AskBatchSplit { region, @@ -194,51 +276,98 @@ where ch, } => self.handle_ask_batch_split(region, split_keys, peer, right_derive, ch), Task::ReportBatchSplit { regions } => self.handle_report_batch_split(regions), + Task::AutoSplit { split_infos } => self.handle_auto_split(split_infos), Task::UpdateMaxTimestamp { region_id, initial_status, txn_ext, } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), + Task::ReportMinResolvedTs { + store_id, + min_resolved_ts, + } => self.handle_report_min_resolved_ts(store_id, min_resolved_ts), } } } -impl Runner -where - EK: KvEngine, - ER: RaftEngine, - T: PdClient + 'static, -{ - fn handle_destroy_peer(&mut self, region_id: u64) { - match self.region_peers.remove(®ion_id) { - None => {} - Some(_) => { - info!(self.logger, "remove peer statistic record in pd"; "region_id" => region_id) - } +#[derive(Clone)] +pub struct PdReporter { + scheduler: Scheduler, + logger: Logger, +} + +impl PdReporter { + pub fn new(scheduler: Scheduler, logger: Logger) -> Self { + PdReporter { scheduler, logger } + } +} + +impl FlowStatsReporter for PdReporter { + fn report_read_stats(&self, stats: ReadStats) { + if let Err(e) = self.scheduler.schedule(Task::UpdateReadStats(stats)) { + error!(self.logger, "Failed to send read flow statistics"; "err" => ?e); + } + } + + fn report_write_stats(&self, stats: WriteStats) { + if let Err(e) = self.scheduler.schedule(Task::UpdateWriteStats(stats)) { + error!(self.logger, "Failed to send write flow statistics"; "err" => ?e); } } } -#[derive(Clone)] -pub struct FlowReporter { - _scheduler: Scheduler, +impl Collector for PdReporter { + fn collect(&self, records: Arc) { + self.scheduler + .schedule(Task::UpdateRegionCpuRecords(records)) + .ok(); + } } -impl FlowReporter { - pub fn new(scheduler: Scheduler) -> Self { - FlowReporter { - _scheduler: scheduler, +impl StoreStatsReporter for PdReporter { + fn report_store_infos( + &self, + cpu_usages: RecordPairVec, + read_io_rates: RecordPairVec, + write_io_rates: RecordPairVec, + ) { + let task = Task::UpdateStoreInfos { + cpu_usages, + read_io_rates, + write_io_rates, + }; + if let Err(e) = self.scheduler.schedule(task) { + error!( + self.logger, + "failed to send store infos to pd worker"; + "err" => ?e, + ); } } -} -impl FlowStatsReporter for FlowReporter { - fn report_read_stats(&self, _read_stats: ReadStats) { - // TODO + fn report_min_resolved_ts(&self, store_id: u64, min_resolved_ts: u64) { + let task = Task::ReportMinResolvedTs { + store_id, + min_resolved_ts, + }; + if let Err(e) = self.scheduler.schedule(task) { + error!( + self.logger, + "failed to send min resolved ts to pd worker"; + "err" => ?e, + ); + } } - fn report_write_stats(&self, _write_stats: WriteStats) { - // TODO + fn auto_split(&self, split_infos: Vec) { + let task = Task::AutoSplit { split_infos }; + if let Err(e) = self.scheduler.schedule(task) { + error!( + self.logger, + "failed to send split infos to pd worker"; + "err" => ?e, + ); + } } } diff --git a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs b/components/raftstore-v2/src/worker/pd/region.rs similarity index 58% rename from components/raftstore-v2/src/worker/pd/region_heartbeat.rs rename to components/raftstore-v2/src/worker/pd/region.rs index 31f84801ed2..d282534329b 100644 --- a/components/raftstore-v2/src/worker/pd/region_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -1,10 +1,15 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{sync::Arc, time::Duration}; +use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{metapb, pdpb}; -use pd_client::{metrics::PD_HEARTBEAT_COUNTER_VEC, PdClient, RegionStat}; +use pd_client::{ + merge_bucket_stats, metrics::PD_HEARTBEAT_COUNTER_VEC, BucketStat, PdClient, RegionStat, +}; +use raftstore::store::{ReadStats, WriteStats}; +use resource_metering::RawRecords; use slog::{debug, info}; use tikv_util::{store::QueryStats, time::UnixSecs}; @@ -44,6 +49,58 @@ pub struct PeerStat { pub approximate_size: u64, } +#[derive(Default)] +pub struct ReportBucket { + current_stat: BucketStat, + last_report_stat: Option, + last_report_ts: UnixSecs, +} + +impl ReportBucket { + fn new(current_stat: BucketStat) -> Self { + Self { + current_stat, + ..Default::default() + } + } + + fn report(&mut self, report_ts: UnixSecs) -> BucketStat { + self.last_report_ts = report_ts; + match self.last_report_stat.replace(self.current_stat.clone()) { + Some(last) => { + let mut delta = BucketStat::new( + self.current_stat.meta.clone(), + pd_client::new_bucket_stats(&self.current_stat.meta), + ); + // Buckets may be changed, recalculate last stats according to current meta. + merge_bucket_stats( + &delta.meta.keys, + &mut delta.stats, + &last.meta.keys, + &last.stats, + ); + for i in 0..delta.meta.keys.len() - 1 { + delta.stats.write_bytes[i] = + self.current_stat.stats.write_bytes[i] - delta.stats.write_bytes[i]; + delta.stats.write_keys[i] = + self.current_stat.stats.write_keys[i] - delta.stats.write_keys[i]; + delta.stats.write_qps[i] = + self.current_stat.stats.write_qps[i] - delta.stats.write_qps[i]; + + delta.stats.read_bytes[i] = + self.current_stat.stats.read_bytes[i] - delta.stats.read_bytes[i]; + delta.stats.read_keys[i] = + self.current_stat.stats.read_keys[i] - delta.stats.read_keys[i]; + delta.stats.read_qps[i] = + self.current_stat.stats.read_qps[i] - delta.stats.read_qps[i]; + } + delta + } + None => self.current_stat.clone(), + } + } +} + impl Runner where EK: KvEngine, @@ -244,4 +301,123 @@ where self.remote.spawn(f); self.is_hb_receiver_scheduled = true; } + + pub fn handle_report_region_buckets(&mut self, region_buckets: BucketStat) { + let region_id = region_buckets.meta.region_id; + self.merge_buckets(region_buckets); + let report_buckets = self.region_buckets.get_mut(®ion_id).unwrap(); + let last_report_ts = if report_buckets.last_report_ts.is_zero() { + self.start_ts + } else { + report_buckets.last_report_ts + }; + let now = UnixSecs::now(); + let interval_second = now.into_inner() - last_report_ts.into_inner(); + let delta = report_buckets.report(now); + let resp = self + .pd_client + .report_region_buckets(&delta, Duration::from_secs(interval_second)); + let logger = self.logger.clone(); + let f = async move { + if let Err(e) = resp.await { + debug!( + logger, + "failed to send buckets"; + "region_id" => region_id, + "version" => delta.meta.version, + "region_epoch" => ?delta.meta.region_epoch, + "err" => ?e + ); + } + }; + self.remote.spawn(f); + } + + pub fn handle_update_read_stats(&mut self, mut stats: ReadStats) { + for (region_id, region_info) in stats.region_infos.iter_mut() { + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); + peer_stat.read_bytes += region_info.flow.read_bytes as u64; + peer_stat.read_keys += region_info.flow.read_keys as u64; + self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; + self.store_stat.engine_total_keys_read += region_info.flow.read_keys as u64; + peer_stat + .query_stats + .add_query_stats(®ion_info.query_stats.0); + self.store_stat + .engine_total_query_num + .add_query_stats(®ion_info.query_stats.0); + } + for (_, region_buckets) in std::mem::take(&mut stats.region_buckets) { + self.merge_buckets(region_buckets); + } + if !stats.region_infos.is_empty() { + self.stats_monitor.maybe_send_read_stats(stats); + } + } + + pub fn handle_update_write_stats(&mut self, mut stats: WriteStats) { + for (region_id, region_info) in stats.region_infos.iter_mut() { + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); + peer_stat.query_stats.add_query_stats(®ion_info.0); + self.store_stat + .engine_total_query_num + .add_query_stats(®ion_info.0); + } + } + + pub fn handle_update_region_cpu_records(&mut self, records: Arc) { + // Send Region CPU info to AutoSplitController inside the stats_monitor. + self.stats_monitor.maybe_send_cpu_stats(&records); + Self::calculate_region_cpu_records(self.store_id, records, &mut self.region_cpu_records); + } + + pub fn handle_destroy_peer(&mut self, region_id: u64) { + match self.region_peers.remove(®ion_id) { + None => {} + Some(_) => { + info!(self.logger, "remove peer statistic record in pd"; "region_id" => region_id) + } + } + } + + fn merge_buckets(&mut self, mut buckets: BucketStat) { + let region_id = buckets.meta.region_id; + self.region_buckets + .entry(region_id) + .and_modify(|report_bucket| { + let current = &mut report_bucket.current_stat; + if current.meta < buckets.meta { + std::mem::swap(current, &mut buckets); + } + + merge_bucket_stats( + ¤t.meta.keys, + &mut current.stats, + &buckets.meta.keys, + &buckets.stats, + ); + }) + .or_insert_with(|| ReportBucket::new(buckets)); + } + + fn calculate_region_cpu_records( + store_id: u64, + records: Arc, + region_cpu_records: &mut HashMap, + ) { + for (tag, record) in &records.records { + let record_store_id = tag.store_id; + if record_store_id != store_id { + continue; + } + // Reporting a region heartbeat later will clear the corresponding record. + *region_cpu_records.entry(tag.region_id).or_insert(0) += record.cpu_time; + } + } } diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index cb7c3ad9308..bf13e01120a 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -6,10 +6,12 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, AdminRequest, SplitRequest}, }; use pd_client::PdClient; -use slog::{info, warn}; +use raftstore::store::SplitInfo; +use slog::{info, warn, Logger}; +use yatp::{task::future::TaskCell, Remote}; use super::{requests::*, Runner}; -use crate::router::CmdResChannel; +use crate::{batch::StoreRouter, router::CmdResChannel}; fn new_batch_split_region_request( split_keys: Vec>, @@ -37,24 +39,50 @@ where ER: RaftEngine, T: PdClient + 'static, { + #[inline] pub fn handle_ask_batch_split( &mut self, - mut region: metapb::Region, + region: metapb::Region, split_keys: Vec>, peer: metapb::Peer, right_derive: bool, ch: CmdResChannel, + ) { + Self::ask_batch_split_imp( + &self.pd_client, + &self.logger, + &self.router, + &self.remote, + region, + split_keys, + peer, + right_derive, + Some(ch), + ); + } + + fn ask_batch_split_imp( + pd_client: &T, + logger: &Logger, + router: &StoreRouter, + remote: &Remote, + mut region: metapb::Region, + split_keys: Vec>, + peer: metapb::Peer, + right_derive: bool, + ch: Option, ) { if split_keys.is_empty() { - info!(self.logger, "empty split key, skip ask batch split"; - "region_id" => region.get_id()); + info!( + logger, + "empty split key, skip ask batch split"; + "region_id" => region.get_id() + ); return; } - let resp = self - .pd_client - .ask_batch_split(region.clone(), split_keys.len()); - let router = self.router.clone(); - let logger = self.logger.clone(); + let resp = pd_client.ask_batch_split(region.clone(), split_keys.len()); + let router = router.clone(); + let logger = logger.clone(); let f = async move { match resp.await { Ok(mut resp) => { @@ -73,7 +101,7 @@ where ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); - send_admin_request(&logger, &router, region_id, epoch, peer, req, Some(ch)); + send_admin_request(&logger, &router, region_id, epoch, peer, req, ch); } Err(e) => { warn!( @@ -85,7 +113,7 @@ where } } }; - self.remote.spawn(f); + remote.spawn(f); } pub fn handle_report_batch_split(&mut self, regions: Vec) { @@ -98,4 +126,37 @@ where }; self.remote.spawn(f); } + + pub fn handle_auto_split(&mut self, split_infos: Vec) { + let pd_client = self.pd_client.clone(); + let logger = self.logger.clone(); + let router = self.router.clone(); + let remote = self.remote.clone(); + + let f = async move { + for split_info in split_infos { + let Ok(Some(region)) = + pd_client.get_region_by_id(split_info.region_id).await else { continue }; + // Try to split the region with the given split key. + if let Some(split_key) = split_info.split_key { + Self::ask_batch_split_imp( + &pd_client, + &logger, + &router, + &remote, + region, + vec![split_key], + split_info.peer, + true, + None, + ); + // Try to split the region on half within the given key + // range if there is no `split_key` been given. + } else if split_info.start_key.is_some() && split_info.end_key.is_some() { + // TODO: implement half split + } + } + }; + self.remote.spawn(f); + } } diff --git a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs b/components/raftstore-v2/src/worker/pd/store.rs similarity index 96% rename from components/raftstore-v2/src/worker/pd/store_heartbeat.rs rename to components/raftstore-v2/src/worker/pd/store.rs index ba75354c753..8f30b85d6f3 100644 --- a/components/raftstore-v2/src/worker/pd/store_heartbeat.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -257,6 +257,17 @@ where self.remote.spawn(f); } + pub fn handle_update_store_infos( + &mut self, + cpu_usages: RecordPairVec, + read_io_rates: RecordPairVec, + write_io_rates: RecordPairVec, + ) { + self.store_stat.store_cpu_usages = cpu_usages; + self.store_stat.store_read_io_rates = read_io_rates; + self.store_stat.store_write_io_rates = write_io_rates; + } + /// Returns (capacity, used, available). fn collect_engine_size(&self) -> Option<(u64, u64, u64)> { let disk_stats = match fs2::statvfs(self.tablet_registry.tablet_root()) { diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index ce0248130fb..90f7c500903 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -33,7 +33,8 @@ use raftstore::{ coprocessor::CoprocessorHost, store::{ region_meta::{RegionLocalState, RegionMeta}, - Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX, + AutoSplitController, Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, + RAFT_INIT_LOG_INDEX, }, }; use raftstore_v2::{ @@ -41,6 +42,7 @@ use raftstore_v2::{ router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, }; +use resource_metering::CollectorRegHandle; use slog::{debug, o, Logger}; use tempfile::TempDir; use test_pd::mocker::Service; @@ -300,6 +302,8 @@ impl RunningState { concurrency_manager, causal_ts_provider, coprocessor_host, + AutoSplitController::default(), + CollectorRegHandle::new_for_test(), background.clone(), pd_worker, ) diff --git a/components/raftstore/src/coprocessor/consistency_check.rs b/components/raftstore/src/coprocessor/consistency_check.rs index 5ba97089f85..2ebf27c963f 100644 --- a/components/raftstore/src/coprocessor/consistency_check.rs +++ b/components/raftstore/src/coprocessor/consistency_check.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; -use engine_traits::{KvEngine, Snapshot, ALL_CFS, CF_RAFT}; +use engine_traits::{KvEngine, Snapshot, CF_RAFT}; use kvproto::metapb::Region; use crate::{ @@ -63,7 +63,7 @@ fn compute_hash_on_raw(region: &Region, snap: &S) -> Result { let start_key = keys::enc_start_key(region); let end_key = keys::enc_end_key(region); - for cf in ALL_CFS { + for cf in snap.cf_names() { snap.scan(cf, &start_key, &end_key, false, |k, v| { digest.update(k); digest.update(v); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 3724eba13e2..b75aee3b4bb 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1516,7 +1516,9 @@ impl RaftBatchSystem { ) -> Result<()> { assert!(self.workers.is_none()); // TODO: we can get cluster meta regularly too later. - let purge_worker = if engines.raft.need_manual_purge() { + let purge_worker = if engines.raft.need_manual_purge() + && !cfg.value().raft_engine_purge_interval.0.is_zero() + { let worker = Worker::new("purge-worker"); let raft_clone = engines.raft.clone(); let router_clone = self.router(); @@ -1735,7 +1737,6 @@ impl RaftBatchSystem { Arc::clone(&pd_client), self.router.clone(), workers.pd_worker.scheduler(), - cfg.pd_store_heartbeat_tick_interval.0, auto_split_controller, concurrency_manager, snap_mgr, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 42fb320035b..fe3c12427bd 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -79,9 +79,10 @@ pub use self::{ worker::{ metrics as worker_metrics, AutoSplitController, Bucket, BucketRange, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, - LocalReadContext, LocalReader, LocalReaderCore, PdTask, ReadDelegate, ReadExecutor, - ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, - SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, StoreMetaDelegate, - TrackVer, WriteStats, + LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, + ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, + StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }, }; diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index e021651ba3d..ac23f4e58d5 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -27,7 +27,8 @@ pub use self::{ consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, - Runner as PdRunner, Task as PdTask, + Runner as PdRunner, StatsMonitor as PdStatsMonitor, StoreStatsReporter, Task as PdTask, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }, raftlog_gc::{Runner as RaftlogGcRunner, Task as RaftlogGcTask}, read::{ @@ -44,5 +45,5 @@ pub use self::{ Bucket, BucketRange, KeyEntry, Runner as SplitCheckRunner, Task as SplitCheckTask, }, split_config::{SplitConfig, SplitConfigManager}, - split_controller::{AutoSplitController, ReadStats, SplitConfigChange, WriteStats}, + split_controller::{AutoSplitController, ReadStats, SplitConfigChange, SplitInfo, WriteStats}, }; diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index fdfa1b44c85..79b58d75c83 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -69,6 +69,8 @@ use crate::{ }, }; +pub const NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT: u32 = 2; + type RecordPairVec = Vec; #[derive(Default, Debug, Clone)] @@ -189,7 +191,6 @@ where id: u64, duration: RaftstoreDuration, }, - UpdateRegionCpuCollector(bool), RegionCpuRecords(Arc), ReportMinResolvedTs { store_id: u64, @@ -267,7 +268,7 @@ pub struct PeerStat { } #[derive(Default)] -pub struct ReportBucket { +struct ReportBucket { current_stat: BucketStat, last_report_stat: Option, last_report_ts: UnixSecs, @@ -418,12 +419,6 @@ where Task::UpdateSlowScore { id, ref duration } => { write!(f, "compute slow score: id {}, duration {:?}", id, duration) } - Task::UpdateRegionCpuCollector(is_register) => { - if is_register { - return write!(f, "register region cpu collector"); - } - write!(f, "deregister region cpu collector") - } Task::RegionCpuRecords(ref cpu_records) => { write!(f, "get region cpu records: {:?}", cpu_records) } @@ -476,12 +471,83 @@ fn convert_record_pairs(m: HashMap) -> RecordPairVec { .collect() } -struct StatsMonitor +#[derive(Clone)] +pub struct WrappedScheduler(Scheduler>); + +impl Collector for WrappedScheduler where EK: KvEngine, ER: RaftEngine, { - scheduler: Scheduler>, + fn collect(&self, records: Arc) { + self.0.schedule(Task::RegionCpuRecords(records)).ok(); + } +} + +pub trait StoreStatsReporter: Send + Clone + Sync + 'static + Collector { + fn report_store_infos( + &self, + cpu_usages: RecordPairVec, + read_io_rates: RecordPairVec, + write_io_rates: RecordPairVec, + ); + fn report_min_resolved_ts(&self, store_id: u64, min_resolved_ts: u64); + fn auto_split(&self, split_infos: Vec); +} + +impl StoreStatsReporter for WrappedScheduler +where + EK: KvEngine, + ER: RaftEngine, +{ + fn report_store_infos( + &self, + cpu_usages: RecordPairVec, + read_io_rates: RecordPairVec, + write_io_rates: RecordPairVec, + ) { + let task = Task::StoreInfos { + cpu_usages, + read_io_rates, + write_io_rates, + }; + if let Err(e) = self.0.schedule(task) { + error!( + "failed to send store infos to pd worker"; + "err" => ?e, + ); + } + } + + fn report_min_resolved_ts(&self, store_id: u64, min_resolved_ts: u64) { + let task = Task::ReportMinResolvedTs { + store_id, + min_resolved_ts, + }; + if let Err(e) = self.0.schedule(task) { + error!( + "failed to send min resolved ts to pd worker"; + "err" => ?e, + ); + } + } + + fn auto_split(&self, split_infos: Vec) { + let task = Task::AutoSplit { split_infos }; + if let Err(e) = self.0.schedule(task) { + error!( + "failed to send split infos to pd worker"; + "err" => ?e, + ); + } + } +} + +pub struct StatsMonitor +where + T: StoreStatsReporter, +{ + reporter: T, handle: Option>, timer: Option>, read_stats_sender: Option>, @@ -492,18 +558,13 @@ where report_min_resolved_ts_interval: Duration, } -impl StatsMonitor +impl StatsMonitor where - EK: KvEngine, - ER: RaftEngine, + T: StoreStatsReporter, { - pub fn new( - interval: Duration, - report_min_resolved_ts_interval: Duration, - scheduler: Scheduler>, - ) -> Self { + pub fn new(interval: Duration, report_min_resolved_ts_interval: Duration, reporter: T) -> Self { StatsMonitor { - scheduler, + reporter, handle: None, timer: None, read_stats_sender: None, @@ -524,11 +585,10 @@ where &mut self, mut auto_split_controller: AutoSplitController, region_read_progress: RegionReadProgressRegistry, + collector_reg_handle: CollectorRegHandle, store_id: u64, ) -> Result<(), io::Error> { - if self.collect_tick_interval < default_collect_tick_interval() - || self.collect_store_infos_interval < self.collect_tick_interval - { + if self.collect_tick_interval < default_collect_tick_interval() { info!( "interval is too small, skip stats monitoring. If we are running tests, it is normal, otherwise a check is needed." ); @@ -555,7 +615,7 @@ where let (cpu_stats_sender, cpu_stats_receiver) = mpsc::channel(); self.cpu_stats_sender = Some(cpu_stats_sender); - let scheduler = self.scheduler.clone(); + let reporter = self.reporter.clone(); let props = tikv_util::thread_group::current_properties(); fn is_enable_tick(timer_cnt: u64, interval: u64) -> bool { @@ -570,13 +630,23 @@ where // make sure the record won't be disturbed. let mut collect_store_infos_thread_stats = ThreadInfoStatistics::new(); let mut load_base_split_thread_stats = ThreadInfoStatistics::new(); + let mut region_cpu_records_collector = None; + // Register the region CPU records collector. + if auto_split_controller + .cfg + .region_cpu_overload_threshold_ratio + > 0.0 + { + region_cpu_records_collector = + Some(collector_reg_handle.register(Box::new(reporter.clone()), false)); + } while let Err(mpsc::RecvTimeoutError::Timeout) = timer_rx.recv_timeout(tick_interval) { if is_enable_tick(timer_cnt, collect_store_infos_interval) { StatsMonitor::collect_store_infos( &mut collect_store_infos_thread_stats, - &scheduler, + &reporter, ); } if is_enable_tick(timer_cnt, load_base_split_check_interval) { @@ -585,14 +655,15 @@ where &read_stats_receiver, &cpu_stats_receiver, &mut load_base_split_thread_stats, - &scheduler, + &reporter, + &collector_reg_handle, + &mut region_cpu_records_collector, ); } if is_enable_tick(timer_cnt, report_min_resolved_ts_interval) { - StatsMonitor::report_min_resolved_ts( - ®ion_read_progress, + reporter.report_min_resolved_ts( store_id, - &scheduler, + region_read_progress.get_min_resolved_ts(), ); } timer_cnt += 1; @@ -604,26 +675,13 @@ where Ok(()) } - pub fn collect_store_infos( - thread_stats: &mut ThreadInfoStatistics, - scheduler: &Scheduler>, - ) { + pub fn collect_store_infos(thread_stats: &mut ThreadInfoStatistics, reporter: &T) { thread_stats.record(); let cpu_usages = convert_record_pairs(thread_stats.get_cpu_usages()); let read_io_rates = convert_record_pairs(thread_stats.get_read_io_rates()); let write_io_rates = convert_record_pairs(thread_stats.get_write_io_rates()); - let task = Task::StoreInfos { - cpu_usages, - read_io_rates, - write_io_rates, - }; - if let Err(e) = scheduler.schedule(task) { - error!( - "failed to send store infos to pd worker"; - "err" => ?e, - ); - } + reporter.report_store_infos(cpu_usages, read_io_rates, write_io_rates); } pub fn load_base_split( @@ -631,16 +689,19 @@ where read_stats_receiver: &Receiver, cpu_stats_receiver: &Receiver>, thread_stats: &mut ThreadInfoStatistics, - scheduler: &Scheduler>, + reporter: &T, + collector_reg_handle: &CollectorRegHandle, + region_cpu_records_collector: &mut Option, ) { let start_time = TiInstant::now(); match auto_split_controller.refresh_and_check_cfg() { SplitConfigChange::UpdateRegionCpuCollector(is_register) => { - if let Err(e) = scheduler.schedule(Task::UpdateRegionCpuCollector(is_register)) { - error!( - "failed to register or deregister the region cpu collector"; - "is_register" => is_register, - "err" => ?e, + // If it's a deregister task, just take and drop the original collector. + if !is_register { + region_cpu_records_collector.take(); + } else { + region_cpu_records_collector.get_or_insert( + collector_reg_handle.register(Box::new(reporter.clone()), false), ); } } @@ -658,13 +719,7 @@ where let (top_qps, split_infos) = auto_split_controller.flush(read_stats_vec, cpu_stats_vec, thread_stats); auto_split_controller.clear(); - let task = Task::AutoSplit { split_infos }; - if let Err(e) = scheduler.schedule(task) { - error!( - "failed to send split infos to pd worker"; - "err" => ?e, - ); - } + reporter.auto_split(split_infos); for i in 0..TOP_N { if i < top_qps.len() { READ_QPS_TOPN @@ -677,23 +732,6 @@ where LOAD_BASE_SPLIT_DURATION_HISTOGRAM.observe(start_time.saturating_elapsed_secs()); } - pub fn report_min_resolved_ts( - region_read_progress: &RegionReadProgressRegistry, - store_id: u64, - scheduler: &Scheduler>, - ) { - let task = Task::ReportMinResolvedTs { - store_id, - min_resolved_ts: region_read_progress.get_min_resolved_ts(), - }; - if let Err(e) = scheduler.schedule(task) { - error!( - "failed to send min resolved ts to pd worker"; - "err" => ?e, - ); - } - } - pub fn stop(&mut self) { if let Some(h) = self.handle.take() { drop(self.timer.take()); @@ -705,14 +743,22 @@ where } } - #[inline(always)] - fn get_read_stats_sender(&self) -> &Option> { - &self.read_stats_sender + #[inline] + pub fn maybe_send_read_stats(&self, read_stats: ReadStats) { + if let Some(sender) = &self.read_stats_sender { + if sender.send(read_stats).is_err() { + warn!("send read_stats failed, are we shutting down?") + } + } } - #[inline(always)] - fn get_cpu_stats_sender(&self) -> &Option>> { - &self.cpu_stats_sender + #[inline] + pub fn maybe_send_cpu_stats(&self, cpu_stats: &Arc) { + if let Some(sender) = &self.cpu_stats_sender { + if sender.send(cpu_stats.clone()).is_err() { + warn!("send region cpu info failed, are we shutting down?") + } + } } } @@ -845,37 +891,6 @@ impl SlowScore { } } -// RegionCpuMeteringCollector is used to collect the region-related CPU info. -struct RegionCpuMeteringCollector -where - EK: KvEngine, - ER: RaftEngine, -{ - scheduler: Scheduler>, -} - -impl RegionCpuMeteringCollector -where - EK: KvEngine, - ER: RaftEngine, -{ - fn new(scheduler: Scheduler>) -> RegionCpuMeteringCollector { - RegionCpuMeteringCollector { scheduler } - } -} - -impl Collector for RegionCpuMeteringCollector -where - EK: KvEngine, - ER: RaftEngine, -{ - fn collect(&self, records: Arc) { - self.scheduler - .schedule(Task::RegionCpuRecords(records)) - .ok(); - } -} - pub struct Runner where EK: KvEngine, @@ -896,11 +911,9 @@ where // actually it is the sender connected to Runner's Worker which // calls Runner's run() on Task received. scheduler: Scheduler>, - stats_monitor: StatsMonitor, + stats_monitor: StatsMonitor>, store_heartbeat_interval: Duration, - collector_reg_handle: CollectorRegHandle, - region_cpu_records_collector: Option, // region_id -> total_cpu_time_ms (since last region heartbeat) region_cpu_records: HashMap, @@ -922,15 +935,12 @@ where ER: RaftEngine, T: PdClient + 'static, { - const INTERVAL_DIVISOR: u32 = 2; - pub fn new( cfg: &Config, store_id: u64, pd_client: Arc, router: RaftRouter, scheduler: Scheduler>, - store_heartbeat_interval: Duration, auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, snap_mgr: SnapManager, @@ -941,25 +951,19 @@ where coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Runner { - // Register the region CPU records collector. - let mut region_cpu_records_collector = None; - if auto_split_controller - .cfg - .region_cpu_overload_threshold_ratio - > 0.0 - { - region_cpu_records_collector = Some(collector_reg_handle.register( - Box::new(RegionCpuMeteringCollector::new(scheduler.clone())), - false, - )); - } - let interval = store_heartbeat_interval / Self::INTERVAL_DIVISOR; + let store_heartbeat_interval = cfg.pd_store_heartbeat_tick_interval.0; + let interval = store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT; let mut stats_monitor = StatsMonitor::new( interval, cfg.report_min_resolved_ts_interval.0, - scheduler.clone(), + WrappedScheduler(scheduler.clone()), ); - if let Err(e) = stats_monitor.start(auto_split_controller, region_read_progress, store_id) { + if let Err(e) = stats_monitor.start( + auto_split_controller, + region_read_progress, + collector_reg_handle, + store_id, + ) { error!("failed to start stats collector, error = {:?}", e); } @@ -975,8 +979,6 @@ where scheduler, store_heartbeat_interval, stats_monitor, - collector_reg_handle, - region_cpu_records_collector, region_cpu_records: HashMap::default(), concurrency_manager, snap_mgr, @@ -1041,21 +1043,6 @@ where self.remote.spawn(f); } - fn handle_update_region_cpu_collector(&mut self, is_register: bool) { - // If it's a deregister task, just take and drop the original collector. - if !is_register { - self.region_cpu_records_collector.take(); - return; - } - if self.region_cpu_records_collector.is_some() { - return; - } - self.region_cpu_records_collector = Some(self.collector_reg_handle.register( - Box::new(RegionCpuMeteringCollector::new(self.scheduler.clone())), - false, - )); - } - // Note: The parameter doesn't contain `self` because this function may // be called in an asynchronous context. fn handle_ask_batch_split( @@ -1604,11 +1591,7 @@ where self.merge_buckets(region_buckets); } if !read_stats.region_infos.is_empty() { - if let Some(sender) = self.stats_monitor.get_read_stats_sender() { - if sender.send(read_stats).is_err() { - warn!("send read_stats failed, are we shutting down?") - } - } + self.stats_monitor.maybe_send_read_stats(read_stats); } } @@ -1756,11 +1739,7 @@ where // TODO: more accurate CPU consumption of a specified region. fn handle_region_cpu_records(&mut self, records: Arc) { // Send Region CPU info to AutoSplitController inside the stats_monitor. - if let Some(cpu_stats_sender) = self.stats_monitor.get_cpu_stats_sender() { - if cpu_stats_sender.send(records.clone()).is_err() { - warn!("send region cpu info failed, are we shutting down?") - } - } + self.stats_monitor.maybe_send_cpu_stats(&records); calculate_region_cpu_records(self.store_id, records, &mut self.region_cpu_records); } @@ -1856,22 +1835,10 @@ where stats.set_is_busy(true); // We do not need to report store_info, so we just set `None` here. - let task = Task::StoreHeartbeat { - stats, - store_info: None, - report: None, - dr_autosync_status: None, - }; - if let Err(e) = self.scheduler.schedule(task) { - error!("force report store heartbeat failed"; - "store_id" => self.store_id, - "err" => ?e - ); - } else { - warn!("scheduling store_heartbeat timeout, force report store slow score to pd."; - "store_id" => self.store_id, - ); - } + self.handle_store_heartbeat(stats, None, None, None); + warn!("scheduling store_heartbeat timeout, force report store slow score to pd."; + "store_id" => self.store_id, + ); } fn is_store_heartbeat_delayed(&self) -> bool { @@ -1954,48 +1921,43 @@ where let f = async move { for split_info in split_infos { - if let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await - { - // Try to split the region with the given split key. - if let Some(split_key) = split_info.split_key { - Self::handle_ask_batch_split( - router.clone(), - scheduler.clone(), - pd_client.clone(), - region, - vec![split_key], - split_info.peer, - true, - Callback::None, - String::from("auto_split"), - remote.clone(), + let Ok(Some(region)) = + pd_client.get_region_by_id(split_info.region_id).await else { continue }; + // Try to split the region with the given split key. + if let Some(split_key) = split_info.split_key { + Self::handle_ask_batch_split( + router.clone(), + scheduler.clone(), + pd_client.clone(), + region, + vec![split_key], + split_info.peer, + true, + Callback::None, + String::from("auto_split"), + remote.clone(), + ); + // Try to split the region on half within the given key + // range if there is no `split_key` been given. + } else if split_info.start_key.is_some() && split_info.end_key.is_some() { + let start_key = split_info.start_key.unwrap(); + let end_key = split_info.end_key.unwrap(); + let region_id = region.get_id(); + let msg = CasualMessage::HalfSplitRegion { + region_epoch: region.get_region_epoch().clone(), + start_key: Some(start_key.clone()), + end_key: Some(end_key.clone()), + policy: pdpb::CheckPolicy::Scan, + source: "auto_split", + cb: Callback::None, + }; + if let Err(e) = router.send(region_id, PeerMsg::CasualMessage(msg)) { + error!("send auto half split request failed"; + "region_id" => region_id, + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + "err" => ?e, ); - return; - } - // Try to split the region on half within the given key range - // if there is no `split_key` been given. - if split_info.start_key.is_some() && split_info.end_key.is_some() { - let start_key = split_info.start_key.unwrap(); - let end_key = split_info.end_key.unwrap(); - let region_id = region.get_id(); - let msg = CasualMessage::HalfSplitRegion { - region_epoch: region.get_region_epoch().clone(), - start_key: Some(start_key.clone()), - end_key: Some(end_key.clone()), - policy: pdpb::CheckPolicy::Scan, - source: "auto_split", - cb: Callback::None, - }; - if let Err(e) = router.send(region_id, PeerMsg::CasualMessage(msg)) - { - error!("send auto half split request failed"; - "region_id" => region_id, - "start_key" => log_wrappers::Value::key(&start_key), - "end_key" => log_wrappers::Value::key(&end_key), - "err" => ?e, - ); - } } } } @@ -2124,9 +2086,6 @@ where } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => self.slow_score.record(id, duration.sum()), - Task::UpdateRegionCpuCollector(is_register) => { - self.handle_update_region_cpu_collector(is_register) - } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), Task::ReportMinResolvedTs { store_id, @@ -2469,7 +2428,7 @@ mod tests { struct RunnerTest { store_stat: Arc>, - stats_monitor: StatsMonitor, + stats_monitor: StatsMonitor>, } impl RunnerTest { @@ -2481,13 +2440,16 @@ mod tests { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), Duration::from_secs(0), - scheduler, + WrappedScheduler(scheduler), ); let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); - if let Err(e) = - stats_monitor.start(AutoSplitController::default(), region_read_progress, 1) - { + if let Err(e) = stats_monitor.start( + AutoSplitController::default(), + region_read_progress, + CollectorRegHandle::new_for_test(), + 1, + ) { error!("failed to start stats collector, error = {:?}", e); } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 03b02e5f81e..36a02130fdb 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -47,7 +47,10 @@ use file_system::{ use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; -use kvproto::{deadlock::create_deadlock, diagnosticspb::create_diagnostics, kvrpcpb::ApiVersion}; +use kvproto::{ + deadlock::create_deadlock, diagnosticspb::create_diagnostics, kvrpcpb::ApiVersion, + resource_usage_agent::create_resource_metering_pub_sub, +}; use pd_client::{PdClient, RpcClient}; use raft_log_engine::RaftLogEngine; use raftstore::{ @@ -56,8 +59,8 @@ use raftstore::{ RawConsistencyCheckObserver, }, store::{ - memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, CheckLeaderRunner, SplitConfigManager, - TabletSnapManager, + memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, AutoSplitController, CheckLeaderRunner, + SplitConfigManager, TabletSnapManager, }, RegionInfoAccessor, }; @@ -68,7 +71,7 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, - read_pool::{build_yatp_read_pool, ReadPool}, + read_pool::{build_yatp_read_pool, ReadPool, ReadPoolConfigManager}, server::{ config::{Config as ServerConfig, ServerConfigManager}, gc_worker::{AutoGcConfig, GcWorker}, @@ -235,6 +238,7 @@ struct TikvEngines { struct Servers { lock_mgr: LockManager, server: LocalServer, + rsmeter_pubsub_service: resource_metering::PubSubService, } type LocalServer = Server>; @@ -635,7 +639,10 @@ where let engines = self.engines.as_ref().unwrap(); let pd_worker = LazyWorker::new("pd-worker"); - let pd_sender = raftstore_v2::FlowReporter::new(pd_worker.scheduler()); + let pd_sender = raftstore_v2::PdReporter::new( + pd_worker.scheduler(), + slog_global::borrow_global().new(slog::o!()), + ); let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { let resource_ctl = self @@ -674,15 +681,16 @@ where let (reporter_notifier, data_sink_reg_handle, reporter_worker) = resource_metering::init_reporter( self.config.resource_metering.clone(), - collector_reg_handle, + collector_reg_handle.clone(), ); self.to_stop.push(reporter_worker); let (address_change_notifier, single_target_worker) = resource_metering::init_single_target( self.config.resource_metering.receiver_address.clone(), self.env.clone(), - data_sink_reg_handle, + data_sink_reg_handle.clone(), ); self.to_stop.push(single_target_worker); + let rsmeter_pubsub_service = resource_metering::PubSubService::new(data_sink_reg_handle); let cfg_manager = resource_metering::ConfigManager::new( self.config.resource_metering.clone(), @@ -769,6 +777,22 @@ where cop_read_pools.handle() }; + let mut unified_read_pool_scale_receiver = None; + if self.config.readpool.is_unified_pool_enabled() { + let (unified_read_pool_scale_notifier, rx) = mpsc::sync_channel(10); + cfg_controller.register( + tikv::config::Module::Readpool, + Box::new(ReadPoolConfigManager::new( + unified_read_pool.as_ref().unwrap().handle(), + unified_read_pool_scale_notifier, + &self.background_worker, + self.config.readpool.unified.max_thread_count, + self.config.readpool.unified.auto_adjust_pool_size, + )), + ); + unified_read_pool_scale_receiver = Some(rx); + } + let check_leader_runner = CheckLeaderRunner::new( self.router.as_ref().unwrap().store_meta().clone(), self.coprocessor_host.clone().unwrap(), @@ -828,7 +852,17 @@ where let split_config_manager = SplitConfigManager::new(Arc::new(VersionTrack::new(self.config.split.clone()))); - cfg_controller.register(tikv::config::Module::Split, Box::new(split_config_manager)); + cfg_controller.register( + tikv::config::Module::Split, + Box::new(split_config_manager.clone()), + ); + + let auto_split_controller = AutoSplitController::new( + split_config_manager, + self.config.server.grpc_concurrency, + self.config.readpool.unified.max_thread_count, + unified_read_pool_scale_receiver, + ); // `ConsistencyCheckObserver` must be registered before `Node::start`. let safe_point = Arc::new(AtomicU64::new(0)); @@ -858,6 +892,8 @@ where self.concurrency_manager.clone(), self.causal_ts_provider.clone(), self.coprocessor_host.clone().unwrap(), + auto_split_controller, + collector_reg_handle, self.background_worker.clone(), pd_worker, raft_store, @@ -882,7 +918,11 @@ where initial_metric(&self.config.metric); - self.servers = Some(Servers { lock_mgr, server }); + self.servers = Some(Servers { + lock_mgr, + server, + rsmeter_pubsub_service, + }); server_config } @@ -923,6 +963,16 @@ where &self.config.pessimistic_txn, ) .unwrap_or_else(|e| fatal!("failed to start lock manager: {}", e)); + + if servers + .server + .register_service(create_resource_metering_pub_sub( + servers.rsmeter_pubsub_service.clone(), + )) + .is_some() + { + warn!("failed to register resource metering pubsub service"); + } } fn init_io_utility(&mut self) -> BytesFetcher { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index d5c2eefa6d6..8b3745120d5 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -16,7 +16,8 @@ use encryption_export::{ use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_RAFT, + CfNamesExt, Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, CF_DEFAULT, + CF_RAFT, }; use file_system::IoRateLimiter; use futures::executor::block_on; @@ -101,7 +102,7 @@ pub fn must_region_cleared(engine: &Engines, region assert_eq!(state.get_state(), PeerState::Tombstone, "{:?}", state); let start_key = keys::data_key(region.get_start_key()); let end_key = keys::data_key(region.get_end_key()); - for cf in ALL_CFS { + for cf in engine.kv.cf_names() { engine .kv .scan(cf, &start_key, &end_key, false, |k, v| { diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index b876951894c..588e8ae9e9b 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -9,9 +9,13 @@ use kvproto::{metapb, replication_modepb::ReplicationStatus}; use pd_client::PdClient; use raftstore::{ coprocessor::CoprocessorHost, - store::{GlobalReplicationState, TabletSnapManager, Transport, RAFT_INIT_LOG_INDEX}, + store::{ + AutoSplitController, GlobalReplicationState, TabletSnapManager, Transport, + RAFT_INIT_LOG_INDEX, + }, }; use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreRouter, StoreSystem}; +use resource_metering::CollectorRegHandle; use slog::{info, o, Logger}; use tikv_util::{ config::VersionTrack, @@ -92,6 +96,8 @@ where concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 coprocessor_host: CoprocessorHost, + auto_split_controller: AutoSplitController, + collector_reg_handle: CollectorRegHandle, background: Worker, pd_worker: LazyWorker, store_cfg: Arc>, @@ -129,6 +135,8 @@ where concurrency_manager, causal_ts_provider, coprocessor_host, + auto_split_controller, + collector_reg_handle, background, pd_worker, store_cfg, @@ -188,6 +196,8 @@ where concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 coprocessor_host: CoprocessorHost, + auto_split_controller: AutoSplitController, + collector_reg_handle: CollectorRegHandle, background: Worker, pd_worker: LazyWorker, store_cfg: Arc>, @@ -218,6 +228,8 @@ where concurrency_manager, causal_ts_provider, coprocessor_host, + auto_split_controller, + collector_reg_handle, background, pd_worker, )?; From 860fc839a988a6c975fbea18fc22f1d840bdfdc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 19 Jan 2023 17:03:49 +0800 Subject: [PATCH 0464/1149] log-backup: an ad-hoc way for hot reloading TLS certs (#14072) close tikv/tikv#14071 Log backup would aware TLS certifications changing. Signed-off-by: hillium --- .../src/metadata/store/lazy_etcd.rs | 91 ++++++++++++------- components/security/src/lib.rs | 4 + components/server/src/server.rs | 8 +- 3 files changed, 65 insertions(+), 38 deletions(-) diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index b712a23973d..37ffbad37c4 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -1,15 +1,20 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, time::Duration}; +use std::{ + sync::Arc, + time::{Duration, SystemTime}, +}; use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; use futures::Future; use openssl::x509::verify::X509VerifyFlags; +use security::SecurityManager; use tikv_util::{ info, stream::{RetryError, RetryExt}, + warn, }; -use tokio::sync::OnceCell; +use tokio::sync::Mutex as AsyncMutex; use super::{etcd::EtcdSnapshot, EtcdStore, MetaStore}; use crate::errors::{ContextualResultExt, Result}; @@ -17,20 +22,34 @@ use crate::errors::{ContextualResultExt, Result}; const RPC_TIMEOUT: Duration = Duration::from_secs(30); #[derive(Clone)] -pub struct LazyEtcdClient(Arc); +pub struct LazyEtcdClient(Arc>); -#[derive(Debug)] +#[derive(Clone)] pub struct ConnectionConfig { - pub tls: Option, + pub tls: Arc, pub keep_alive_interval: Duration, pub keep_alive_timeout: Duration, } +impl std::fmt::Debug for ConnectionConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ConnectionConfig") + .field("keep_alive_interval", &self.keep_alive_interval) + .field("keep_alive_timeout", &self.keep_alive_timeout) + .finish() + } +} + impl ConnectionConfig { /// Convert the config to the connection option. fn to_connection_options(&self) -> ConnectOptions { let mut opts = ConnectOptions::new(); - if let Some(tls) = &self.tls { + if let Some(tls) = &self + .tls + .client_suite() + .map_err(|err| warn!("failed to load client suite!"; "err" => %err)) + .ok() + { opts = opts.with_openssl_tls( OpenSslClientConfig::default() .ca_cert_pem(&tls.ca) @@ -54,28 +73,27 @@ impl ConnectionConfig { impl LazyEtcdClient { pub fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { - Self(Arc::new(LazyEtcdClientInner { - opt: conf.to_connection_options(), + Self(Arc::new(AsyncMutex::new(LazyEtcdClientInner { + conf, endpoints: endpoints.iter().map(ToString::to_string).collect(), - cli: OnceCell::new(), - })) + last_modified: None, + cli: None, + }))) } -} - -impl std::ops::Deref for LazyEtcdClient { - type Target = LazyEtcdClientInner; - fn deref(&self) -> &Self::Target { - Arc::deref(&self.0) + async fn get_cli(&self) -> Result { + let mut l = self.0.lock().await; + l.get_cli().await.cloned() } } #[derive(Clone)] pub struct LazyEtcdClientInner { - opt: ConnectOptions, + conf: ConnectionConfig, endpoints: Vec, - cli: OnceCell, + last_modified: Option, + cli: Option, } fn etcd_error_is_retryable(etcd_err: &EtcdError) -> bool { @@ -130,23 +148,34 @@ where } impl LazyEtcdClientInner { - async fn connect(&self) -> Result { + async fn connect(&mut self) -> Result<&EtcdStore> { let store = retry(|| { // For now, the interface of the `etcd_client` doesn't us to control // how to create channels when connecting, hence we cannot update the tls config - // at runtime. - // TODO: maybe add some method like `with_channel` for `etcd_client`, and adapt - // the `SecurityManager` API, instead of doing everything by own. - etcd_client::Client::connect(self.endpoints.clone(), Some(self.opt.clone())) + // at runtime, now what we did is manually check that each time we are getting + // the clients. + etcd_client::Client::connect( + self.endpoints.clone(), + Some(self.conf.to_connection_options()), + ) }) .await .context("during connecting to the etcd")?; - Ok(EtcdStore::from(store)) + let store = EtcdStore::from(store); + self.cli = Some(store); + Ok(self.cli.as_ref().unwrap()) } - pub async fn get_cli(&self) -> Result<&EtcdStore> { - let store = self.cli.get_or_try_init(|| self.connect()).await?; - Ok(store) + pub async fn get_cli(&mut self) -> Result<&EtcdStore> { + let modified = self.conf.tls.get_config().is_modified(&mut self.last_modified) + // Don't reload once we cannot check whether it is modified. + // Because when TLS disabled, this would always fail. + .unwrap_or(false); + if !modified && self.cli.is_some() { + return Ok(self.cli.as_ref().unwrap()); + } + info!("log backup reconnecting to the etcd service."; "tls_modified" => %modified, "connected_before" => %self.cli.is_some()); + self.connect().await } } @@ -155,7 +184,7 @@ impl MetaStore for LazyEtcdClient { type Snap = EtcdSnapshot; async fn snapshot(&self) -> Result { - self.0.get_cli().await?.snapshot().await + self.get_cli().await?.snapshot().await } async fn watch( @@ -163,14 +192,14 @@ impl MetaStore for LazyEtcdClient { keys: super::Keys, start_rev: i64, ) -> Result { - self.0.get_cli().await?.watch(keys, start_rev).await + self.get_cli().await?.watch(keys, start_rev).await } async fn txn(&self, txn: super::Transaction) -> Result<()> { - self.0.get_cli().await?.txn(txn).await + self.get_cli().await?.txn(txn).await } async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { - self.0.get_cli().await?.txn_cond(txn).await + self.get_cli().await?.txn_cond(txn).await } } diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index 52f438236fd..68328c01ebe 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -190,6 +190,10 @@ impl SecurityManager { ) } } + + pub fn get_config(&self) -> &SecurityConfig { + &self.cfg + } } #[derive(Clone)] diff --git a/components/server/src/server.rs b/components/server/src/server.rs index cfc7e59e243..97fd1f77eef 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1022,13 +1022,7 @@ where ConnectionConfig { keep_alive_interval: self.config.server.grpc_keepalive_time.0, keep_alive_timeout: self.config.server.grpc_keepalive_timeout.0, - tls: self - .security_mgr - .client_suite() - .map_err(|err| { - warn!("Failed to load client TLS suite, ignoring TLS config."; "err" => %err); - }) - .ok(), + tls: Arc::clone(&self.security_mgr), }, ); let backup_stream_endpoint = backup_stream::Endpoint::new( From 42c3814f2a11c50d6a496c8aaca8e314b26f7ead Mon Sep 17 00:00:00 2001 From: YangKeao Date: Thu, 19 Jan 2023 04:25:49 -0500 Subject: [PATCH 0465/1149] json, copr: implement unary not for json (#14070) close tikv/tikv#14069 Signed-off-by: YangKeao Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/tidb_query_executors/src/runner.rs | 9 ++++++ components/tidb_query_expr/src/impl_op.rs | 32 +++++++++++++++++++ components/tidb_query_expr/src/lib.rs | 1 + 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index a2924314f8a..cc89037bffa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6599,7 +6599,7 @@ dependencies = [ [[package]] name = "tipb" version = "0.0.1" -source = "git+https://github.com/pingcap/tipb.git#f3286471a05a4454a1071dd5f66ac7dbf6c79ba3" +source = "git+https://github.com/pingcap/tipb.git#c6b7a5a1623bb2766a502301ecc3ac8f98cc7c79" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index d04be41507e..392b41ff165 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -150,6 +150,15 @@ impl BatchExecutorsRunner<()> { ExecType::TypePartitionTableScan => { other_err!("PartitionTableScan executor not implemented"); } + ExecType::TypeSort => { + other_err!("Sort executor not implemented"); + } + ExecType::TypeWindow => { + other_err!("Window executor not implemented"); + } + ExecType::TypeExpand => { + other_err!("Expand executor not implemented"); + } } } diff --git a/components/tidb_query_expr/src/impl_op.rs b/components/tidb_query_expr/src/impl_op.rs index 5289f427e93..665448279fb 100644 --- a/components/tidb_query_expr/src/impl_op.rs +++ b/components/tidb_query_expr/src/impl_op.rs @@ -55,6 +55,18 @@ pub fn unary_not_decimal(arg: Option<&Decimal>) -> Result> { Ok(arg.as_ref().map(|v| v.is_zero() as i64)) } +#[rpn_fn(nullable)] +#[inline] +pub fn unary_not_json(arg: Option) -> Result> { + let json_zero = Json::from_i64(0).unwrap(); + Ok(arg.as_ref().map(|v| { + if v == &json_zero.as_ref() { + return 1; + } + 0 + })) +} + #[rpn_fn(nullable)] #[inline] pub fn unary_minus_uint(arg: Option<&Int>) -> Result> { @@ -383,6 +395,26 @@ mod tests { } } + #[test] + fn test_unary_not_json() { + let test_cases = vec![ + (None, None), + (Some(Json::from_i64(0).unwrap()), Some(1)), + (Some(Json::from_i64(1).unwrap()), Some(0)), + ( + Some(Json::from_array(vec![Json::from_i64(0).unwrap()]).unwrap()), + Some(0), + ), + ]; + for (arg, expect_output) in test_cases { + let output = RpnFnScalarEvaluator::new() + .push_param(arg.clone()) + .evaluate(ScalarFuncSig::UnaryNotJson) + .unwrap(); + assert_eq!(output, expect_output, "{:?}", arg.as_ref()); + } + } + #[test] fn test_unary_minus_int() { let unsigned_test_cases = vec![ diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 43b0602ebbb..649a7cfa1c8 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -732,6 +732,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::UnaryNotInt => unary_not_int_fn_meta(), ScalarFuncSig::UnaryNotReal => unary_not_real_fn_meta(), ScalarFuncSig::UnaryNotDecimal => unary_not_decimal_fn_meta(), + ScalarFuncSig::UnaryNotJson => unary_not_json_fn_meta(), ScalarFuncSig::UnaryMinusInt => map_unary_minus_int_func(value, children)?, ScalarFuncSig::UnaryMinusReal => unary_minus_real_fn_meta(), ScalarFuncSig::UnaryMinusDecimal => unary_minus_decimal_fn_meta(), From cf622538b2ab118f51bf64a23ba41507b7e67f3f Mon Sep 17 00:00:00 2001 From: Zwb Date: Thu, 19 Jan 2023 22:15:00 +0800 Subject: [PATCH 0466/1149] raftstore: support switch witness (#13491) * support switch witness ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * add switch witness api for test_pd_client ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * pd heartbeat resp support switch witness ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * update region epoch ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * fix write apply state race ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * remove unnecessary code ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * add back test_witness_conf_change ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * add some tests ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * avoid test failures ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * a few refactor ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * add witness election priority and address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * clean code ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * fix tests failed caused by cfg ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * fix test failed caused by mistake modify ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * adjust priority after snapshot persisted ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * address comments ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * notify pd after switch witness as region changed ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * define a new backoff error for witness ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * fix panic caused by applygap ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * forbid transfer leader to non-witness waiting data ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * update kvproto ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * fix two panics ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * retry request snapshot ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang * retry to request snaphost after term change ref #12876 Signed-off-by: Wenbo Zhang * update kvproto comment ref #12876 Signed-off-by: Wenbo Zhang Signed-off-by: Wenbo Zhang Signed-off-by: Zwb Co-authored-by: Xinye Tao --- Cargo.lock | 22 +- components/error_code/src/raftstore.rs | 3 + .../raftstore-v2/src/operation/query/local.rs | 1 + components/raftstore/src/coprocessor/mod.rs | 1 + components/raftstore/src/errors.rs | 9 + components/raftstore/src/store/config.rs | 8 + components/raftstore/src/store/fsm/apply.rs | 136 +++++++- components/raftstore/src/store/fsm/mod.rs | 2 +- components/raftstore/src/store/fsm/peer.rs | 170 +++++++-- components/raftstore/src/store/fsm/store.rs | 5 + components/raftstore/src/store/metrics.rs | 7 +- components/raftstore/src/store/msg.rs | 5 +- components/raftstore/src/store/peer.rs | 108 ++++-- .../raftstore/src/store/peer_storage.rs | 18 + components/raftstore/src/store/util.rs | 2 +- .../raftstore/src/store/worker/metrics.rs | 1 + components/raftstore/src/store/worker/pd.rs | 34 +- components/raftstore/src/store/worker/read.rs | 24 +- components/test_pd_client/src/pd.rs | 119 ++++++- etc/error_code.toml | 5 + tests/failpoints/cases/test_witness.rs | 273 +++++++++++++-- tests/integrations/config/mod.rs | 1 + tests/integrations/raftstore/test_witness.rs | 328 +++++++++--------- 23 files changed, 1029 insertions(+), 253 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cc89037bffa..e9f55d1923d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2732,7 +2732,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "protobuf", - "protobuf-build", + "protobuf-build 0.13.0", "raft-proto", ] @@ -4121,6 +4121,18 @@ dependencies = [ "regex", ] +[[package]] +name = "protobuf-build" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb3c02f54ecaf12572c1a60dbdb36b1f8f713a16105881143f2be84cca5bbe3" +dependencies = [ + "bitflags", + "protobuf", + "protobuf-codegen", + "regex", +] + [[package]] name = "protobuf-codegen" version = "2.8.0" @@ -4161,7 +4173,7 @@ dependencies = [ [[package]] name = "raft" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#2357cb22760719bcd107a90d1e64ef505bdb1e15" +source = "git+https://github.com/tikv/raft-rs?branch=master#f73766712a538c2f6eb135b455297ad6c03fc58d" dependencies = [ "bytes", "fxhash", @@ -4220,11 +4232,11 @@ dependencies = [ [[package]] name = "raft-proto" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#2357cb22760719bcd107a90d1e64ef505bdb1e15" +source = "git+https://github.com/tikv/raft-rs?branch=master#f73766712a538c2f6eb135b455297ad6c03fc58d" dependencies = [ "bytes", "protobuf", - "protobuf-build", + "protobuf-build 0.14.0", ] [[package]] @@ -6604,7 +6616,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "protobuf", - "protobuf-build", + "protobuf-build 0.13.0", ] [[package]] diff --git a/components/error_code/src/raftstore.rs b/components/error_code/src/raftstore.rs index 1b6a85493cf..35dfe564ef0 100644 --- a/components/error_code/src/raftstore.rs +++ b/components/error_code/src/raftstore.rs @@ -32,6 +32,7 @@ define_error_codes!( RECOVERY_IN_PROGRESS => ("RecoveryInProgress", "", ""), FLASHBACK_IN_PROGRESS => ("FlashbackInProgress", "", ""), FLASHBACK_NOT_PREPARED => ("FlashbackNotPrepared", "", ""), + IS_WITNESS => ("IsWitness", "", ""), SNAP_ABORT => ("SnapAbort", "", ""), SNAP_TOO_MANY => ("SnapTooMany", "", ""), @@ -70,6 +71,8 @@ impl ErrorCodeExt for errorpb::Error { FLASHBACK_IN_PROGRESS } else if self.has_flashback_not_prepared() { FLASHBACK_NOT_PREPARED + } else if self.has_is_witness() { + IS_WITNESS } else { UNKNOWN } diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 13b815d1ebc..f574571f790 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -815,6 +815,7 @@ mod tests { txn_ext: txn_ext.clone(), read_progress: read_progress.clone(), pending_remove: false, + wait_data: false, track_ver: TrackVer::new(), bucket_meta: Some(bucket_meta.clone()), }; diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 5100e9d4632..73110660856 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -300,6 +300,7 @@ pub enum RegionChangeReason { PrepareMerge, CommitMerge, RollbackMerge, + SwitchWitness, } #[derive(Clone, Copy, Debug, PartialEq)] diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 3c415c65af6..36fcec7f1f3 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -140,6 +140,9 @@ pub enum Error { region_id: u64, local_state: raft_serverpb::RegionLocalState, }, + + #[error("peer is a witness of region {0}")] + IsWitness(u64), } pub type Result = result::Result; @@ -263,6 +266,11 @@ impl From for errorpb::Error { e.set_region_id(region_id); errorpb.set_flashback_not_prepared(e); } + Error::IsWitness(region_id) => { + let mut e = errorpb::IsWitness::default(); + e.set_region_id(region_id); + errorpb.set_is_witness(e); + } _ => {} }; @@ -319,6 +327,7 @@ impl ErrorCodeExt for Error { Error::DataIsNotReady { .. } => error_code::raftstore::DATA_IS_NOT_READY, Error::DeadlineExceeded => error_code::raftstore::DEADLINE_EXCEEDED, Error::PendingPrepareMerge => error_code::raftstore::PENDING_PREPARE_MERGE, + Error::IsWitness(..) => error_code::raftstore::IS_WITNESS, Error::Other(_) | Error::RegionNotRegistered { .. } => error_code::raftstore::UNKNOWN, } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 4d3210318a6..34f4e159dee 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -324,6 +324,12 @@ pub struct Config { #[online_config(hidden)] // Interval to check peers availability info. pub check_peers_availability_interval: ReadableDuration, + + #[doc(hidden)] + #[serde(skip_serializing)] + #[online_config(hidden)] + // Interval to check if need to request snapshot. + pub check_request_snapshot_interval: ReadableDuration, } impl Default for Config { @@ -433,6 +439,8 @@ impl Default for Config { unreachable_backoff: ReadableDuration::secs(10), // TODO: make its value reasonable check_peers_availability_interval: ReadableDuration::secs(30), + // TODO: make its value reasonable + check_request_snapshot_interval: ReadableDuration::minutes(1), } } } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 60ed35e6892..9f2d234010f 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -40,7 +40,7 @@ use kvproto::{ metapb::{self, PeerRole, Region, RegionEpoch}, raft_cmdpb::{ AdminCmdType, AdminRequest, AdminResponse, ChangePeerRequest, CmdType, CommitMergeRequest, - RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, + RaftCmdRequest, RaftCmdResponse, Request, SplitRequest, SwitchWitnessRequest, }, raft_serverpb::{MergeState, PeerState, RaftApplyState, RaftTruncatedState, RegionLocalState}, }; @@ -252,6 +252,13 @@ impl Range { } } +#[derive(Default, Debug)] +pub struct SwitchWitness { + pub index: u64, + pub switches: Vec, + pub region: Region, +} + #[derive(Debug)] pub enum ExecResult { ChangePeer(ChangePeer), @@ -301,6 +308,7 @@ pub enum ExecResult { SetFlashbackState { region: Region, }, + BatchSwitchWitness(SwitchWitness), // The raftstore thread will use it to update the internal state of `PeerFsm`. If it is // `true`, when the raftstore detects that the raft log has not been gc for a long time, // the raftstore thread will actively pull the `voter_replicated_index` from the leader @@ -979,6 +987,9 @@ where /// in same Ready should be applied failed. pending_remove: bool, + /// Indicates whether the peer is waiting data. See more in `Peer`. + wait_data: bool, + /// The commands waiting to be committed and applied pending_cmds: PendingCmdQueue>, /// The counter of pending request snapshots. See more in `Peer`. @@ -1041,6 +1052,7 @@ where peer: find_peer_by_id(®.region, reg.id).unwrap().clone(), region: reg.region, pending_remove: false, + wait_data: false, last_flush_applied_index: reg.apply_state.get_applied_index(), apply_state: reg.apply_state, applied_term: reg.applied_term, @@ -1119,7 +1131,13 @@ where match res { ApplyResult::None => {} - ApplyResult::Res(res) => results.push_back(res), + ApplyResult::Res(res) => { + results.push_back(res); + if self.wait_data { + apply_ctx.committed_count -= committed_entries_drainer.len(); + break; + } + } ApplyResult::Yield | ApplyResult::WaitMergeSource(_) => { // Both cancel and merge will yield current processing. apply_ctx.committed_count -= committed_entries_drainer.len() + 1; @@ -1535,6 +1553,12 @@ where ExecResult::SetFlashbackState { ref region } => { self.region = region.clone(); } + ExecResult::BatchSwitchWitness(ref switches) => { + self.region = switches.region.clone(); + if let Some(p) = find_peer_by_id(&self.region, self.id()) { + self.peer = p.clone(); + } + } } } if let Some(epoch) = origin_epoch { @@ -1669,7 +1693,7 @@ where AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { self.exec_flashback(ctx, request) } - AdminCmdType::BatchSwitchWitness => Err(box_err!("unsupported admin command type")), + AdminCmdType::BatchSwitchWitness => self.exec_batch_switch_witness(ctx, request), AdminCmdType::InvalidAdmin => Err(box_err!("unsupported admin command type")), }?; response.set_cmd_type(cmd_type); @@ -3202,6 +3226,90 @@ where )) } + fn exec_batch_switch_witness( + &mut self, + ctx: &mut ApplyContext, + request: &AdminRequest, + ) -> Result<(AdminResponse, ApplyResult)> { + assert!(request.has_switch_witnesses()); + let switches = request + .get_switch_witnesses() + .get_switch_witnesses() + .to_vec(); + + info!( + "exec BatchSwitchWitness"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "epoch" => ?self.region.get_region_epoch(), + ); + + let mut region = self.region.clone(); + for s in switches.as_slice() { + PEER_ADMIN_CMD_COUNTER.batch_switch_witness.all.inc(); + let (peer_id, is_witness) = (s.get_peer_id(), s.get_is_witness()); + let mut peer_is_exist = false; + for p in region.mut_peers().iter_mut() { + if p.id == peer_id { + if p.is_witness == is_witness { + return Err(box_err!( + "switch peer {:?} on region {:?} is no-op", + p, + self.region + )); + } + p.is_witness = is_witness; + peer_is_exist = true; + break; + } + } + if !peer_is_exist { + return Err(box_err!( + "switch peer {} on region {:?} failed: peer does not exist", + peer_id, + self.region + )); + } + PEER_ADMIN_CMD_COUNTER.batch_switch_witness.success.inc(); + if self.id() == peer_id && !is_witness { + self.wait_data = true; + self.peer.is_witness = false; + } + } + let conf_ver = region.get_region_epoch().get_conf_ver() + switches.len() as u64; + region.mut_region_epoch().set_conf_ver(conf_ver); + info!( + "switch witness successfully"; + "region_id" => self.region_id(), + "peer_id" => self.id(), + "switches" => ?switches, + "original region" => ?&self.region, + "current region" => ?®ion, + ); + + let state = if self.pending_remove { + PeerState::Tombstone + } else if self.wait_data { + PeerState::Unavailable + } else { + PeerState::Normal + }; + + if let Err(e) = write_peer_state(ctx.kv_wb_mut(), ®ion, state, None) { + panic!("{} failed to update region state: {:?}", self.tag, e); + } + + let resp = AdminResponse::default(); + Ok(( + resp, + ApplyResult::Res(ExecResult::BatchSwitchWitness(SwitchWitness { + index: ctx.exec_log_index, + switches, + region, + })), + )) + } + fn update_memory_trace(&mut self, event: &mut TraceEvent) { let pending_cmds = self.pending_cmds.heap_size(); let merge_yield = if let Some(ref mut state) = self.yield_state { @@ -3593,6 +3701,7 @@ where #[cfg(any(test, feature = "testexport"))] #[allow(clippy::type_complexity)] Validate(u64, Box), + Recover(u64), CheckCompact { region_id: u64, voter_replicated_index: u64, @@ -3645,6 +3754,7 @@ where } => write!(f, "[region {}] change cmd", region_id), #[cfg(any(test, feature = "testexport"))] Msg::Validate(region_id, _) => write!(f, "[region {}] validate", region_id), + Msg::Recover(region_id) => write!(f, "recover [region {}] apply", region_id), Msg::CheckCompact { region_id, voter_replicated_index, @@ -3770,6 +3880,10 @@ where return; } + if self.delegate.wait_data { + return; + } + let mut entries = Vec::new(); let mut dangle_size = 0; @@ -3972,8 +4086,9 @@ where if self.delegate.pending_remove || self.delegate.stopped { return; } - if self.delegate.peer.is_witness { - // witness shouldn't generate snapshot. + if self.delegate.peer.is_witness || self.delegate.wait_data { + // witness or non-witness hasn't finish applying snapshot shouldn't generate + // snapshot. return; } let applied_index = self.delegate.apply_state.get_applied_index(); @@ -4199,8 +4314,11 @@ where } } } - batch_apply = Some(apply); + if !self.delegate.wait_data { + batch_apply = Some(apply); + } } + Msg::Recover(..) => self.delegate.wait_data = false, Msg::Registration(reg) => self.handle_registration(reg), Msg::Destroy(d) => self.handle_destroy(apply_ctx, d), Msg::LogsUpToDate(cul) => self.logs_up_to_date_for_merge(apply_ctx, cul), @@ -4637,6 +4755,11 @@ where } #[cfg(any(test, feature = "testexport"))] Msg::Validate(..) => return, + Msg::Recover(region_id) => { + info!("recover apply"; + "region_id" => region_id); + return; + } Msg::CheckCompact { region_id, .. } => { info!("target region is not found"; "region_id" => region_id); @@ -4774,6 +4897,7 @@ mod memtrace { | Msg::Change { .. } => 0, #[cfg(any(test, feature = "testexport"))] Msg::Validate(..) => 0, + Msg::Recover(..) => 0, Msg::CheckCompact { .. } => 0, } } diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index 2f700eec9bf..b481caf4f74 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -14,7 +14,7 @@ pub use self::{ check_sst_for_ingestion, create_apply_batch_system, Apply, ApplyBatchSystem, ApplyMetrics, ApplyRes, ApplyRouter, Builder as ApplyPollerBuilder, CatchUpLogs, ChangeObserver, ChangePeer, ExecResult, GenSnapTask, Msg as ApplyTask, Notifier as ApplyNotifier, Proposal, - Registration, TaskRes as ApplyTaskRes, + Registration, SwitchWitness, TaskRes as ApplyTaskRes, }, peer::{new_admin_request, DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO}, store::{ diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index ccde4b031ef..d405c3471af 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -75,7 +75,7 @@ use crate::{ apply, store::{PollContext, StoreMeta}, ApplyMetrics, ApplyTask, ApplyTaskRes, CatchUpLogs, ChangeObserver, ChangePeer, - ExecResult, + ExecResult, SwitchWitness, }, hibernate_state::{GroupState, HibernateState}, local_metrics::{RaftMetrics, TimeTracker}, @@ -247,6 +247,7 @@ where raftlog_fetch_scheduler: Scheduler>, engines: Engines, region: &metapb::Region, + wait_data: bool, ) -> Result> { let meta_peer = match find_peer(region, store_id) { None => { @@ -277,6 +278,7 @@ where engines, region, meta_peer, + wait_data, )?, tick_registry: [false; PeerTick::VARIANT_COUNT], missing_ticks: 0, @@ -331,6 +333,7 @@ where engines, ®ion, peer, + false, )?, tick_registry: [false; PeerTick::VARIANT_COUNT], missing_ticks: 0, @@ -1192,6 +1195,7 @@ where PeerTick::ReportBuckets => self.on_report_region_buckets_tick(), PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted_tick(), PeerTick::CheckPeersAvailability => self.on_check_peers_availability(), + PeerTick::RequestSnapshot => self.on_request_snapshot_tick(), PeerTick::RequestVoterReplicatedIndex => self.on_request_voter_replicated_index(), } } @@ -1203,6 +1207,9 @@ where self.register_split_region_check_tick(); self.register_check_peer_stale_state_tick(); self.on_check_merge(); + if self.fsm.peer.wait_data { + self.on_request_snapshot_tick(); + } // Apply committed entries more quickly. // Or if it's a leader. This implicitly means it's a singleton // because it becomes leader in `Peer::new` when it's a @@ -1951,6 +1958,7 @@ where self.register_raft_gc_log_tick(); self.register_check_leader_lease_tick(); self.register_report_region_buckets_tick(); + self.register_check_peers_availability_tick(); } if let Some(ForceLeaderState::ForceLeader { .. }) = self.fsm.peer.force_leader { @@ -2161,12 +2169,6 @@ where return; } - // Keep ticking if there are disk full peers for the Region. - if !self.fsm.peer.disk_full_peers.is_empty() { - self.register_raft_base_tick(); - return; - } - debug!("stop ticking"; "res" => ?res, "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), @@ -2258,6 +2260,9 @@ where "peer_id" => self.fsm.peer_id(), "res" => ?res, ); + if self.fsm.peer.wait_data { + return; + } self.on_ready_result(&mut res.exec_res, &res.metrics); if self.fsm.stopped { return; @@ -2467,6 +2472,17 @@ where return Ok(()); } + if MessageType::MsgAppend == msg_type + && self.fsm.peer.wait_data + && self.fsm.peer.should_reject_msgappend + { + debug!("skip {:?} because of non-witness waiting data", msg_type; + "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id() + ); + self.ctx.raft_metrics.message_dropped.non_witness.inc(); + return Ok(()); + } + if !self.validate_raft_msg(&msg) { return Ok(()); } @@ -2603,6 +2619,7 @@ where fn on_hibernate_request(&mut self, from: &metapb::Peer) { if !self.ctx.cfg.hibernate_regions || self.fsm.peer.has_uncommitted_log() + || self.fsm.peer.wait_data || from.get_id() != self.fsm.peer.leader_id() { // Ignore the message means rejecting implicitly. @@ -3053,7 +3070,7 @@ where if snap.get_metadata().get_index() < self.fsm.peer.get_store().applied_index() && snap_data.get_meta().get_for_witness() != self.fsm.peer.is_witness() { - info!( + error!( "mismatch witness snapshot"; "region_id" => region_id, "peer_id" => self.fsm.peer_id(), @@ -3355,7 +3372,6 @@ where ); } else { self.fsm.peer.transfer_leader(&from); - self.fsm.peer.wait_data_peers.clear(); } } } @@ -4069,6 +4085,7 @@ where self.ctx.raftlog_fetch_scheduler.clone(), self.ctx.engines.clone(), &new_region, + false, ) { Ok((sender, new_peer)) => (sender, new_peer), Err(e) => { @@ -4959,6 +4976,9 @@ where ExecResult::IngestSst { ssts } => self.on_ingest_sst_result(ssts), ExecResult::TransferLeader { term } => self.on_transfer_leader(term), ExecResult::SetFlashbackState { region } => self.on_set_flashback_state(region), + ExecResult::BatchSwitchWitness(switches) => { + self.on_ready_batch_switch_witness(switches) + } ExecResult::HasPendingCompactCmd(has_pending) => { self.fsm.peer.has_pending_compact_cmd = has_pending; if has_pending { @@ -5126,8 +5146,29 @@ where && msg.get_admin_request().get_cmd_type() == AdminCmdType::TransferLeader) { self.ctx.raft_metrics.invalid_proposal.witness.inc(); - // TODO: use a dedicated error type - return Err(Error::RecoveryInProgress(self.region_id())); + return Err(Error::IsWitness(self.region_id())); + } + + // Forbid requests to switch it into a witness when it's a leader + if self.fsm.peer.is_leader() + && msg.has_admin_request() + && msg.get_admin_request().get_cmd_type() == AdminCmdType::BatchSwitchWitness + && msg + .get_admin_request() + .get_switch_witnesses() + .get_switch_witnesses() + .iter() + .any(|s| s.get_peer_id() == self.fsm.peer.peer.get_id() && s.get_is_witness()) + { + self.ctx.raft_metrics.invalid_proposal.witness.inc(); + return Err(Error::IsWitness(self.region_id())); + } + + // Forbid requests when it becomes to non-witness but not finish applying + // snapshot. + if self.fsm.peer.wait_data { + self.ctx.raft_metrics.invalid_proposal.non_witness.inc(); + return Err(Error::IsWitness(self.region_id())); } // check whether the peer is initialized. @@ -5518,6 +5559,36 @@ where self.register_check_long_uncommitted_tick(); } + fn on_request_snapshot_tick(&mut self) { + fail_point!("ignore request snapshot", |_| { + self.schedule_tick(PeerTick::RequestSnapshot); + }); + if !self.fsm.peer.wait_data || self.fsm.peer.is_leader() { + return; + } + self.fsm.peer.request_index = self.fsm.peer.raft_group.raft.raft_log.last_index(); + let last_term = self.fsm.peer.get_index_term(self.fsm.peer.request_index); + if last_term == self.fsm.peer.term() { + self.fsm.peer.should_reject_msgappend = true; + if let Err(e) = self.fsm.peer.raft_group.request_snapshot() { + error!( + "failed to request snapshot"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "err" => %e, + ); + } + } else { + // If a leader change occurs after switch to non-witness, it should be + // continue processing `MsgAppend` until `last_term == term`, then retry + // to request snapshot. + self.fsm.peer.should_reject_msgappend = false; + } + // Requesting a snapshot may fail, so register a periodic event as a defense + // until succeeded. + self.schedule_tick(PeerTick::RequestSnapshot); + } + fn on_request_voter_replicated_index(&mut self) { if !self.fsm.peer.is_witness() || !self.fsm.peer.has_pending_compact_cmd { return; @@ -6059,18 +6130,31 @@ where } fn on_check_peers_availability(&mut self) { + let mut invalid_peers: Vec = Vec::new(); for peer_id in self.fsm.peer.wait_data_peers.iter() { - let peer = self.fsm.peer.get_peer_from_cache(*peer_id).unwrap(); - let mut msg = ExtraMessage::default(); - msg.set_type(ExtraMessageType::MsgAvailabilityRequest); - self.fsm - .peer - .send_extra_message(msg, &mut self.ctx.trans, &peer); - debug!( - "check peer availability"; - "target peer id" => *peer_id, - ); + match self.fsm.peer.get_peer_from_cache(*peer_id) { + Some(peer) => { + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgAvailabilityRequest); + self.fsm + .peer + .send_extra_message(msg, &mut self.ctx.trans, &peer); + debug!( + "check peer availability"; + "target peer id" => *peer_id, + ); + } + None => invalid_peers.push(*peer_id), + } } + // For some reasons, the peer corresponding to the previously saved peer_id + // no longer exists. In order to avoid passing invalid information to pd when + // reporting pending peers and affecting pd scheduling, remove it from the + // `wait_data_peers`. + self.fsm + .peer + .wait_data_peers + .retain(|peer_id| !invalid_peers.contains(peer_id)); } fn register_pull_voter_replicated_index_tick(&mut self) { @@ -6355,6 +6439,50 @@ where self.fsm.peer.leader_lease_mut().expire_remote_lease(); } + fn on_ready_batch_switch_witness(&mut self, sw: SwitchWitness) { + { + let mut meta = self.ctx.store_meta.lock().unwrap(); + meta.set_region( + &self.ctx.coprocessor_host, + sw.region, + &mut self.fsm.peer, + RegionChangeReason::SwitchWitness, + ); + } + for s in sw.switches { + let (peer_id, is_witness) = (s.get_peer_id(), s.get_is_witness()); + if self.fsm.peer_id() == peer_id { + if is_witness && !self.fsm.peer.is_leader() { + let _ = self.fsm.peer.get_store().clear_data(); + self.fsm.peer.raft_group.set_priority(-1); + } else { + self.fsm + .peer + .update_read_progress(self.ctx, ReadProgress::WaitData(true)); + self.fsm.peer.wait_data = true; + self.on_request_snapshot_tick(); + } + self.fsm.peer.peer.is_witness = is_witness; + continue; + } + if !is_witness && !self.fsm.peer.wait_data_peers.contains(&peer_id) { + self.fsm.peer.wait_data_peers.push(peer_id); + } + } + if self.fsm.peer.is_leader() { + info!( + "notify pd with change peer region"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "region" => ?self.fsm.peer.region(), + ); + self.fsm.peer.heartbeat_pd(self.ctx); + if !self.fsm.peer.wait_data_peers.is_empty() { + self.register_check_peers_availability_tick(); + } + } + } + /// Verify and store the hash to state. return true means the hash has been /// stored successfully. // TODO: Consider context in the function. diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index b75aee3b4bb..2ca573824f9 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -594,6 +594,8 @@ where self.cfg.check_long_uncommitted_interval.0; self.tick_batch[PeerTick::CheckPeersAvailability as usize].wait_duration = self.cfg.check_peers_availability_interval.0; + self.tick_batch[PeerTick::RequestSnapshot as usize].wait_duration = + self.cfg.check_request_snapshot_interval.0; // TODO: make it reasonable self.tick_batch[PeerTick::RequestVoterReplicatedIndex as usize].wait_duration = self.cfg.raft_log_gc_tick_interval.0 * 2; @@ -1206,6 +1208,7 @@ impl RaftPollerBuilder { self.raftlog_fetch_scheduler.clone(), self.engines.clone(), region, + local_state.get_state() == PeerState::Unavailable, )); peer.peer.init_replication_mode(&mut replication_state); if local_state.get_state() == PeerState::Merging { @@ -1246,6 +1249,7 @@ impl RaftPollerBuilder { self.raftlog_fetch_scheduler.clone(), self.engines.clone(), ®ion, + false, )?; peer.peer.init_replication_mode(&mut replication_state); peer.schedule_applying_snapshot(); @@ -2911,6 +2915,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.ctx.raftlog_fetch_scheduler.clone(), self.ctx.engines.clone(), ®ion, + false, ) { Ok((sender, peer)) => (sender, peer), Err(e) => { diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index ce4f099610e..6c6357d286c 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -35,7 +35,8 @@ make_auto_flush_static_metric! { compact, transfer_leader, prepare_flashback, - finish_flashback + finish_flashback, + batch_switch_witness : "batch-switch-witness", } pub label_enum AdminCmdStatus { @@ -177,6 +178,7 @@ make_static_metric! { region_nonexistent, applying_snap, disk_full, + non_witness, recovery, } @@ -205,7 +207,8 @@ make_static_metric! { force_leader, witness, flashback_in_progress, - flashback_not_prepared + flashback_not_prepared, + non_witness, } pub label_enum RaftEventDurationType { diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index b2a2a7aa1d1..3c555689cb9 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -384,7 +384,8 @@ pub enum PeerTick { ReportBuckets = 9, CheckLongUncommitted = 10, CheckPeersAvailability = 11, - RequestVoterReplicatedIndex = 12, + RequestSnapshot = 12, + RequestVoterReplicatedIndex = 13, } impl PeerTick { @@ -405,6 +406,7 @@ impl PeerTick { PeerTick::ReportBuckets => "report_buckets", PeerTick::CheckLongUncommitted => "check_long_uncommitted", PeerTick::CheckPeersAvailability => "check_peers_availability", + PeerTick::RequestSnapshot => "request_snapshot", PeerTick::RequestVoterReplicatedIndex => "request_voter_replicated_index", } } @@ -423,6 +425,7 @@ impl PeerTick { PeerTick::ReportBuckets, PeerTick::CheckLongUncommitted, PeerTick::CheckPeersAvailability, + PeerTick::RequestSnapshot, PeerTick::RequestVoterReplicatedIndex, ]; TICKS diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 586ab7ba133..e2a914fded6 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -894,6 +894,17 @@ where /// the same time period. pub wait_data: bool, + /// When the witness becomes non-witness, it need to actively request a + /// snapshot from the leader, but the request may fail, so we need to save + /// the request index for retrying. + pub request_index: u64, + + /// When the witness becomes non-witness, it need to actively request a + /// snapshot from the leader, In order to avoid log lag, we need to reject + /// the leader's `MsgAppend` request unless the `term` of the `last index` + /// is less than the peer's current `term`. + pub should_reject_msgappend: bool, + /// Force leader state is only used in online recovery when the majority of /// peers are missing. In this state, it forces one peer to become leader /// out of accordance with Raft election rule, and forbids any @@ -1055,6 +1066,7 @@ where engines: Engines, region: &metapb::Region, peer: metapb::Peer, + wait_data: bool, ) -> Result> { let peer_id = peer.get_id(); if peer_id == raft::INVALID_ID { @@ -1086,12 +1098,13 @@ where skip_bcast_commit: true, pre_vote: cfg.prevote, max_committed_size_per_ready: MAX_COMMITTED_SIZE_PER_READY, - // TODO: if peer.is_witness { 0 } else { 1 }, + priority: if peer.is_witness { -1 } else { 0 }, ..Default::default() }; let logger = slog_global::get_global().new(slog::o!("region_id" => region.get_id())); let raft_group = RawNode::new(&raft_cfg, ps, &logger)?; + let last_index = raft_group.store().last_index(); // In order to avoid excessive log accumulation due to the loss of pending // compaction cmds after the witness is restarted, it will actively pull // voter_request_index once at start. @@ -1118,7 +1131,9 @@ where compaction_declined_bytes: 0, leader_unreachable: false, pending_remove: false, - wait_data: false, + wait_data, + request_index: last_index, + should_reject_msgappend: false, should_wake_up: false, force_leader: None, pending_merge_state: None, @@ -1592,6 +1607,14 @@ where res.reason = "replication mode"; return res; } + if !self.disk_full_peers.is_empty() { + res.reason = "has disk full peers"; + return res; + } + if !self.wait_data_peers.is_empty() { + res.reason = "has wait data peers"; + return res; + } res.up_to_date = true; res } @@ -1617,6 +1640,8 @@ where && !self.has_unresolved_reads() // If it becomes leader, the stats is not valid anymore. && !self.is_leader() + // Keep ticking if it's waiting for snapshot. + && !self.wait_data } } @@ -2061,6 +2086,12 @@ where let status = self.raft_group.status(); let truncated_idx = self.get_store().truncated_index(); + for peer_id in &self.wait_data_peers { + if let Some(p) = self.get_peer_from_cache(*peer_id) { + pending_peers.push(p); + } + } + if status.progress.is_none() { return pending_peers; } @@ -2137,6 +2168,9 @@ where if self.peers_start_pending_time[i].0 != peer_id { continue; } + if self.wait_data_peers.contains(&peer_id) { + continue; + } let truncated_idx = self.raft_group.store().truncated_index(); if let Some(progress) = self.raft_group.raft.prs().get(peer_id) { if progress.matched >= truncated_idx { @@ -2394,8 +2428,12 @@ where // a stale heartbeat can make the leader think follower has already applied // the snapshot, and send remaining log entries, which may increase // commit_index. + // + // If it's witness before, but a command changes it to non-witness, it will stop + // applying all following command, therefore, add the judgment of `wait_data` to + // avoid applying snapshot is also blocked. // TODO: add more test - self.last_applying_idx == self.get_store().applied_index() + (self.last_applying_idx == self.get_store().applied_index() || self.wait_data) // Requesting snapshots also triggers apply workers to write // apply states even if there is no pending committed entry. // TODO: Instead of sharing the counter, we should apply snapshots @@ -2565,11 +2603,18 @@ where // i.e. call `RawNode::advance_apply_to`. self.post_pending_read_index_on_replica(ctx); // Resume `read_progress` + self.update_read_progress(ctx, ReadProgress::WaitData(false)); self.read_progress.resume(); // Update apply index to `last_applying_idx` self.read_progress .update_applied(self.last_applying_idx, &ctx.coprocessor_host); - self.notify_leader_the_peer_is_available(ctx); + if self.wait_data { + self.notify_leader_the_peer_is_available(ctx); + ctx.apply_router + .schedule_task(self.region_id, ApplyTask::Recover(self.region_id)); + self.wait_data = false; + return false; + } } CheckApplyingSnapStatus::Idle => { // FIXME: It's possible that the snapshot applying task is canceled. @@ -2590,22 +2635,19 @@ where &mut self, ctx: &mut PollContext, ) { - if self.wait_data { - self.wait_data = false; - fail_point!("ignore notify leader the peer is available", |_| {}); - let leader_id = self.leader_id(); - let leader = self.get_peer_from_cache(leader_id); - if let Some(leader) = leader { - let mut msg = ExtraMessage::default(); - msg.set_type(ExtraMessageType::MsgAvailabilityResponse); - msg.wait_data = false; - self.send_extra_message(msg, &mut ctx.trans, &leader); - info!( - "notify leader the leader is available"; - "region id" => self.region().get_id(), - "peer id" => self.peer.id - ); - } + fail_point!("ignore notify leader the peer is available", |_| {}); + let leader_id = self.leader_id(); + let leader = self.get_peer_from_cache(leader_id); + if let Some(leader) = leader { + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgAvailabilityResponse); + msg.wait_data = false; + self.send_extra_message(msg, &mut ctx.trans, &leader); + info!( + "notify leader the peer is available"; + "region id" => self.region().get_id(), + "peer id" => self.peer.id + ); } } @@ -3128,9 +3170,8 @@ where "after" => ?peer, ); self.peer = peer; - // TODO: set priority for witness - // self.raft_group - // .set_priority(if self.peer.is_witness { 0 } else { 1 }); + self.raft_group + .set_priority(if self.peer.is_witness { -1 } else { 0 }); }; self.activate(ctx); @@ -3586,6 +3627,16 @@ where reader.update(progress); } + pub fn update_read_progress( + &self, + ctx: &mut PollContext, + progress: ReadProgress, + ) { + let mut meta = ctx.store_meta.lock().unwrap(); + let reader = meta.readers.get_mut(&self.region_id).unwrap(); + self.maybe_update_read_progress(reader, progress); + } + pub fn maybe_campaign(&mut self, parent_is_leader: bool) -> bool { if self.region().get_peers().len() <= 1 { // The peer campaigned when it was created, no need to do it again. @@ -4434,13 +4485,10 @@ where msg: &eraftpb::Message, peer_disk_usage: DiskUsage, ) -> bool { - if self.is_witness() { - // shouldn't transfer leader to witness peer - return true; - } - let pending_snapshot = self.is_handling_snapshot() || self.has_pending_snapshot(); - if pending_snapshot + // shouldn't transfer leader to witness peer or non-witness waiting data + if self.is_witness() || self.wait_data + || pending_snapshot || msg.get_from() != self.leader_id() // Transfer leader to node with disk full will lead to write availablity downback. // But if the current leader is disk full, and send such request, we should allow it, @@ -4455,6 +4503,8 @@ where "from" => msg.get_from(), "pending_snapshot" => pending_snapshot, "disk_usage" => ?ctx.self_disk_usage, + "is_witness" => self.is_witness(), + "wait_data" => self.wait_data, ); return true; } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index b060a866d71..8dc8a18906c 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -449,6 +449,11 @@ where /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no /// available snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { + fail_point!("ignore generate snapshot", self.peer_id == 1, |_| { + Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )) + }); if self.peer.as_ref().unwrap().is_witness { // witness could be the leader for a while, do not generate snapshot now return Err(raft::Error::Store( @@ -457,6 +462,18 @@ where } if find_peer_by_id(&self.region, to).map_or(false, |p| p.is_witness) { + // Although we always sending snapshot task behind apply task to get latest + // snapshot, we can't use `last_applying_idx` here, as below the judgment + // condition will generate an witness snapshot directly, the new non-witness + // will ingore this mismatch snapshot and can't request snapshot successfully + // again. + if self.applied_index() < request_index { + // It may be a request from non-witness. In order to avoid generating mismatch + // snapshots, wait for apply non-witness to complete + return Err(raft::Error::Store( + raft::StorageError::SnapshotTemporarilyUnavailable, + )); + } // generate an empty snapshot for witness directly return Ok(util::new_empty_snapshot( self.region.clone(), @@ -666,6 +683,7 @@ where "peer_id" => self.peer_id, "region" => ?region, "state" => ?self.apply_state(), + "for_witness" => for_witness, ); Ok((region, for_witness)) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 2d27b56fda5..2f61534d159 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -228,7 +228,7 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { AdminCmdEpochState::new(true, true, false, false) } - AdminCmdType::BatchSwitchWitness => unimplemented!(), + AdminCmdType::BatchSwitchWitness => AdminCmdEpochState::new(false, true, false, true), } } diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 5861e27a508..e6c3c505cdf 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -59,6 +59,7 @@ make_static_metric! { witness, flashback_not_prepared, flashback_in_progress, + wait_data, } pub struct LocalReadRejectCounter : LocalIntCounter { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 79b58d75c83..18ecc77f599 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -25,8 +25,8 @@ use kvproto::{ kvrpcpb::DiskFullOpt, metapb, pdpb, raft_cmdpb::{ - AdminCmdType, AdminRequest, ChangePeerRequest, ChangePeerV2Request, RaftCmdRequest, - SplitRequest, + AdminCmdType, AdminRequest, BatchSwitchWitnessRequest, ChangePeerRequest, + ChangePeerV2Request, RaftCmdRequest, SplitRequest, SwitchWitnessRequest, }, raft_serverpb::RaftMessage, replication_modepb::{RegionReplicationStatus, StoreDrAutoSyncStatus}, @@ -1551,6 +1551,18 @@ where deadline:None, disk_full_opt:DiskFullOpt::AllowedOnAlmostFull, }); + } else if resp.has_switch_witnesses() { + PD_HEARTBEAT_COUNTER_VEC + .with_label_values(&["switch witness"]) + .inc(); + + let mut switches = resp.take_switch_witnesses(); + info!("try to switch witness"; + "region_id" => region_id, + "switch witness" => ?switches + ); + let req = new_batch_switch_witness(switches.take_switch_witnesses().into()); + send_admin_request(&router, region_id, epoch, peer, req, Callback::None, Default::default()); } else { PD_HEARTBEAT_COUNTER_VEC.with_label_values(&["noop"]).inc(); } @@ -2257,6 +2269,24 @@ fn new_merge_request(merge: pdpb::Merge) -> AdminRequest { req } +fn new_batch_switch_witness(switches: Vec) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::BatchSwitchWitness); + let switch_reqs = switches + .into_iter() + .map(|s| { + let mut sw = SwitchWitnessRequest::default(); + sw.set_peer_id(s.get_peer_id()); + sw.set_is_witness(s.get_is_witness()); + sw + }) + .collect(); + let mut sw = BatchSwitchWitnessRequest::default(); + sw.set_switch_witnesses(switch_reqs); + req.set_switch_witnesses(sw); + req +} + fn send_admin_request( router: &RaftRouter, region_id: u64, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index a7849f5e1dd..6b20e375786 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -412,6 +412,8 @@ pub struct ReadDelegate { pub txn_ext: Arc, pub read_progress: Arc, pub pending_remove: bool, + /// Indicates whether the peer is waiting data. See more in `Peer`. + pub wait_data: bool, // `track_ver` used to keep the local `ReadDelegate` in `LocalReader` // up-to-date with the global `ReadDelegate` stored at `StoreMeta` @@ -435,6 +437,7 @@ impl ReadDelegate { txn_ext: peer.txn_ext.clone(), read_progress: peer.read_progress.clone(), pending_remove: false, + wait_data: false, bucket_meta: peer.region_buckets.as_ref().map(|b| b.meta.clone()), track_ver: TrackVer::new(), } @@ -463,6 +466,7 @@ impl ReadDelegate { txn_ext, read_progress, pending_remove: false, + wait_data: false, bucket_meta, track_ver: TrackVer::new(), } @@ -496,6 +500,9 @@ impl ReadDelegate { Progress::RegionBuckets(bucket_meta) => { self.bucket_meta = Some(bucket_meta); } + Progress::WaitData(wait_data) => { + self.wait_data = wait_data; + } } } @@ -591,6 +598,7 @@ impl ReadDelegate { txn_ext: Default::default(), read_progress, pending_remove: false, + wait_data: false, track_ver: TrackVer::new(), bucket_meta: None, } @@ -620,6 +628,7 @@ pub enum Progress { AppliedTerm(u64), LeaderLease(RemoteLease), RegionBuckets(Arc), + WaitData(bool), } impl Progress { @@ -642,6 +651,10 @@ impl Progress { pub fn region_buckets(bucket_meta: Arc) -> Progress { Progress::RegionBuckets(bucket_meta) } + + pub fn wait_data(wait_data: bool) -> Progress { + Progress::WaitData(wait_data) + } } struct SnapCache @@ -797,7 +810,13 @@ where // Check witness if find_peer_by_id(&delegate.region, delegate.peer_id).map_or(true, |p| p.is_witness) { TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); - return Err(Error::RecoveryInProgress(region_id)); + return Err(Error::IsWitness(region_id)); + } + + // Check non-witness hasn't finish applying snapshot yet. + if delegate.wait_data { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.wait_data.inc()); + return Err(Error::IsWitness(region_id)); } // Check whether the region is in the flashback state and the local read could @@ -1299,6 +1318,7 @@ mod tests { txn_ext: Arc::new(TxnExt::default()), read_progress: read_progress.clone(), pending_remove: false, + wait_data: false, track_ver: TrackVer::new(), bucket_meta: None, }; @@ -1590,6 +1610,7 @@ mod tests { track_ver: TrackVer::new(), read_progress: Arc::new(RegionReadProgress::new(®ion, 0, 0, 1)), pending_remove: false, + wait_data: false, bucket_meta: None, }; meta.readers.insert(1, read_delegate); @@ -1715,6 +1736,7 @@ mod tests { txn_ext: Arc::new(TxnExt::default()), read_progress, pending_remove: false, + wait_data: false, track_ver: TrackVer::new(), bucket_meta: None, }; diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 513d08643a7..a76692c4a67 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -27,8 +27,8 @@ use keys::{self, data_key, enc_end_key, enc_start_key}; use kvproto::{ metapb::{self, PeerRole}, pdpb::{ - self, ChangePeer, ChangePeerV2, CheckPolicy, Merge, RegionHeartbeatResponse, SplitRegion, - TransferLeader, + self, BatchSwitchWitness, ChangePeer, ChangePeerV2, CheckPolicy, Merge, + RegionHeartbeatResponse, SplitRegion, SwitchWitness, TransferLeader, }, replication_modepb::{ DrAutoSyncState, RegionReplicationStatus, ReplicationMode, ReplicationStatus, @@ -40,7 +40,7 @@ use pd_client::{ }; use raft::eraftpb::ConfChangeType; use tikv_util::{ - store::{check_key_in_region, find_peer, is_learner, new_peer, QueryStats}, + store::{check_key_in_region, find_peer, find_peer_by_id, is_learner, new_peer, QueryStats}, time::{Instant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, Either, HandyRwLock, @@ -135,6 +135,11 @@ enum Operator { remove_peers: Vec, policy: SchedulePolicy, }, + BatchSwitchWitness { + peer_ids: Vec, + is_witnesses: Vec, + policy: SchedulePolicy, + }, } pub fn sleep_ms(ms: u64) { @@ -201,6 +206,22 @@ pub fn new_pd_merge_region(target_region: metapb::Region) -> RegionHeartbeatResp resp } +fn switch_witness(peer_id: u64, is_witness: bool) -> SwitchWitness { + let mut sw = SwitchWitness::default(); + sw.set_peer_id(peer_id); + sw.set_is_witness(is_witness); + sw +} + +pub fn new_pd_batch_switch_witnesses(switches: Vec) -> RegionHeartbeatResponse { + let mut switch_witnesses = BatchSwitchWitness::default(); + switch_witnesses.set_switch_witnesses(switches.into()); + + let mut resp = RegionHeartbeatResponse::default(); + resp.set_switch_witnesses(switch_witnesses); + resp +} + impl Operator { fn make_region_heartbeat_response( &self, @@ -276,6 +297,17 @@ impl Operator { } new_pd_change_peer_v2(cps) } + Operator::BatchSwitchWitness { + ref peer_ids, + ref is_witnesses, + .. + } => { + let mut switches = Vec::with_capacity(peer_ids.len()); + for (peer_id, is_witness) in peer_ids.iter().zip(is_witnesses.iter()) { + switches.push(switch_witness(*peer_id, *is_witness)); + } + new_pd_batch_switch_witnesses(switches) + } } } @@ -360,6 +392,26 @@ impl Operator { add && remove || !policy.schedule() } + Operator::BatchSwitchWitness { + ref peer_ids, + ref is_witnesses, + ref mut policy, + } => { + if !policy.schedule() { + return true; + } + for (peer_id, is_witness) in peer_ids.iter().zip(is_witnesses.iter()) { + if region + .get_peers() + .iter() + .any(|p| (p.get_id() == *peer_id) && (p.get_is_witness() != *is_witness)) + || cluster.pending_peers.contains_key(peer_id) + { + return false; + } + } + true + } } } } @@ -1043,6 +1095,48 @@ impl TestPdClient { panic!("region {:?} failed to leave joint", region); } + pub fn must_finish_switch_witnesses( + &self, + region_id: u64, + peer_ids: Vec, + is_witnesses: Vec, + ) { + for _ in 1..500 { + sleep_ms(10); + let region = match block_on(self.get_region_by_id(region_id)).unwrap() { + Some(region) => region, + None => continue, + }; + + for p in region.get_peers().iter() { + error!("in must_finish_switch_witnesses, p: {:?}", p); + } + + let mut need_retry = false; + for (peer_id, is_witness) in peer_ids.iter().zip(is_witnesses.iter()) { + match find_peer_by_id(®ion, *peer_id) { + Some(p) => { + if p.get_is_witness() != *is_witness + || self.cluster.rl().pending_peers.contains_key(&p.get_id()) + { + need_retry = true; + break; + } + } + None => { + need_retry = true; + break; + } + } + } + if !need_retry { + return; + } + } + let region = block_on(self.get_region_by_id(region_id)).unwrap(); + panic!("region {:?} failed to finish switch witnesses", region); + } + pub fn add_region(&self, region: &metapb::Region) { self.cluster.wl().add_region(region) } @@ -1072,6 +1166,15 @@ impl TestPdClient { self.schedule_operator(region_id, op); } + pub fn switch_witnesses(&self, region_id: u64, peer_ids: Vec, is_witnesses: Vec) { + let op = Operator::BatchSwitchWitness { + peer_ids, + is_witnesses, + policy: SchedulePolicy::TillSuccess, + }; + self.schedule_operator(region_id, op); + } + pub fn joint_confchange( &self, region_id: u64, @@ -1189,6 +1292,16 @@ impl TestPdClient { self.must_none_peer(region_id, peer); } + pub fn must_switch_witnesses( + &self, + region_id: u64, + peer_ids: Vec, + is_witnesses: Vec, + ) { + self.switch_witnesses(region_id, peer_ids.clone(), is_witnesses.clone()); + self.must_finish_switch_witnesses(region_id, peer_ids, is_witnesses); + } + pub fn must_joint_confchange( &self, region_id: u64, diff --git a/etc/error_code.toml b/etc/error_code.toml index 6b361e29e37..bb23c9b5e26 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -448,6 +448,11 @@ error = ''' KV:Raftstore:FlashbackNotPrepared ''' +["KV:Raftstore:IsWitness"] +error = ''' +KV:Raftstore:IsWitness +''' + ["KV:Raftstore:SnapAbort"] error = ''' KV:Raftstore:SnapAbort diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index 552434d1fed..ef178ee8aa0 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -4,22 +4,11 @@ use std::{iter::FromIterator, sync::Arc, time::Duration}; use collections::HashMap; use futures::executor::block_on; -use kvproto::{metapb, raft_serverpb::RaftApplyState}; +use kvproto::raft_serverpb::RaftApplyState; use pd_client::PdClient; use test_raftstore::*; use tikv_util::{config::ReadableDuration, store::find_peer}; -fn become_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { - peer.set_role(metapb::PeerRole::Learner); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - cluster.pd_client.must_remove_peer(region_id, peer.clone()); - peer.set_is_witness(true); - peer.set_id(peer.get_id() + 10); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - peer.set_role(metapb::PeerRole::Voter); - cluster.pd_client.must_add_peer(region_id, peer.clone()); -} - // Test the case local reader works well with witness peer. #[test] fn test_witness_update_region_in_local_reader() { @@ -35,8 +24,12 @@ fn test_witness_update_region_in_local_reader() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); cluster.must_put(b"k0", b"v0"); @@ -61,8 +54,8 @@ fn test_witness_update_region_in_local_reader() { .read(None, request.clone(), Duration::from_millis(100)) .unwrap(); assert_eq!( - resp.get_header().get_error().get_recovery_in_progress(), - &kvproto::errorpb::RecoveryInProgress { + resp.get_header().get_error().get_is_witness(), + &kvproto::errorpb::IsWitness { region_id: region.get_id(), ..Default::default() } @@ -95,8 +88,12 @@ fn test_witness_raftlog_gc_pull_voter_replicated_index() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); // make sure raft log gc is triggered std::thread::sleep(Duration::from_millis(200)); @@ -176,8 +173,12 @@ fn test_witness_raftlog_gc_after_reboot() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); // make sure raft log gc is triggered std::thread::sleep(Duration::from_millis(200)); @@ -240,3 +241,235 @@ fn test_witness_raftlog_gc_after_reboot() { } fail::remove("on_raft_gc_log_tick"); } + +// Test the case request snapshot and apply successfully after non-witness +// restart. +#[test] +fn test_request_snapshot_after_reboot() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(20); + cluster.cfg.raft_store.check_request_snapshot_interval = ReadableDuration::millis(20); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + // nonwitness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + + cluster.must_put(b"k1", b"v1"); + + std::thread::sleep(Duration::from_millis(100)); + must_get_none(&cluster.get_engine(3), b"k1"); + + // witness -> nonwitness + let fp = "ignore request snapshot"; + fail::cfg(fp, "return").unwrap(); + cluster + .pd_client + .switch_witnesses(region.get_id(), vec![peer_on_store3.get_id()], vec![false]); + std::thread::sleep(Duration::from_millis(500)); + // as we ignore request snapshot, so snapshot should still not applied yet + assert_eq!(cluster.pd_client.get_pending_peers().len(), 1); + must_get_none(&cluster.get_engine(3), b"k1"); + + cluster.stop_node(nodes[2]); + fail::remove(fp); + std::thread::sleep(Duration::from_millis(100)); + // the PeerState is Unavailable, so it will request snapshot immediately after + // start. + cluster.run_node(nodes[2]).unwrap(); + must_get_none(&cluster.get_engine(3), b"k1"); + std::thread::sleep(Duration::from_millis(500)); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + assert_eq!(cluster.pd_client.get_pending_peers().len(), 0); +} + +// Test the case request snapshot and apply successfully after term change. +#[test] +fn test_request_snapshot_after_term_change() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(20); + cluster.cfg.raft_store.check_request_snapshot_interval = ReadableDuration::millis(20); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + // nonwitness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + + cluster.must_put(b"k1", b"v1"); + + std::thread::sleep(Duration::from_millis(100)); + must_get_none(&cluster.get_engine(3), b"k1"); + + // witness -> nonwitness + let fp1 = "ignore generate snapshot"; + fail::cfg(fp1, "return").unwrap(); + cluster + .pd_client + .switch_witnesses(region.get_id(), vec![peer_on_store3.get_id()], vec![false]); + std::thread::sleep(Duration::from_millis(500)); + // as we ignore generate snapshot, so snapshot should still not applied yet + assert_eq!(cluster.pd_client.get_pending_peers().len(), 1); + must_get_none(&cluster.get_engine(3), b"k1"); + + let peer_on_store2 = find_peer(®ion, nodes[1]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store2.clone()); + // After leader changes, the `term` and `last term` no longer match, so + // continue to receive `MsgAppend` until the two get equal, then retry to + // request snapshot and complete the application. + std::thread::sleep(Duration::from_millis(500)); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + assert_eq!(cluster.pd_client.get_pending_peers().len(), 0); + fail::remove(fp1); +} + +fn test_non_witness_availability(fp: &str) { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.check_peers_availability_interval = ReadableDuration::millis(20); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // non-witness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + + cluster.must_put(b"k1", b"v1"); + + std::thread::sleep(Duration::from_millis(100)); + must_get_none(&cluster.get_engine(3), b"k1"); + + fail::cfg(fp, "return").unwrap(); + + // witness -> non-witness + cluster + .pd_client + .switch_witnesses(region.get_id(), vec![peer_on_store3.get_id()], vec![false]); + std::thread::sleep(Duration::from_millis(500)); + // snapshot applied + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + assert_eq!(cluster.pd_client.get_pending_peers().len(), 0); + fail::remove(fp); +} + +// Test the case leader pulls non-witness availability when non-witness failed +// to push the info. +#[test] +fn test_pull_non_witness_availability() { + test_non_witness_availability("ignore notify leader the peer is available"); +} + +// Test the case non-witness pushes its availability without leader pulling. +#[test] +fn test_push_non_witness_availability() { + test_non_witness_availability("ignore schedule check non-witness availability tick"); +} + +// Test the case non-witness hasn't finish applying snapshot when receives read +// request. +#[test] +fn test_non_witness_replica_read() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.check_request_snapshot_interval = ReadableDuration::millis(20); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + // nonwitness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + + // witness -> nonwitness + fail::cfg("ignore request snapshot", "return").unwrap(); + cluster + .pd_client + .switch_witnesses(region.get_id(), vec![peer_on_store3.get_id()], vec![false]); + std::thread::sleep(Duration::from_millis(100)); + // as we ignore request snapshot, so snapshot should still not applied yet + + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(b"k0")], + false, + ); + request.mut_header().set_peer(peer_on_store3.clone()); + request.mut_header().set_replica_read(true); + + let resp = cluster + .read(None, request, Duration::from_millis(100)) + .unwrap(); + assert_eq!( + resp.get_header().get_error().get_is_witness(), + &kvproto::errorpb::IsWitness { + region_id: region.get_id(), + ..Default::default() + } + ); + + // start requesting snapshot and give enough time for applying snapshot to + // complete + fail::remove("ignore request snapshot"); + std::thread::sleep(Duration::from_millis(500)); + + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(b"k0")], + false, + ); + request.mut_header().set_peer(peer_on_store3); + request.mut_header().set_replica_read(true); + + let resp = cluster + .read(None, request, Duration::from_millis(100)) + .unwrap(); + assert_eq!(resp.get_header().has_error(), false); +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index a4e15b8fa6e..bb35b069a41 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -255,6 +255,7 @@ fn test_serde_custom_tikv_config() { max_snapshot_file_raw_size: ReadableSize::gb(10), unreachable_backoff: ReadableDuration::secs(111), check_peers_availability_interval: ReadableDuration::secs(30), + check_request_snapshot_interval: ReadableDuration::minutes(1), }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index 301a743588e..f35b21b08a1 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -14,28 +14,6 @@ use raft::eraftpb::ConfChangeType; use test_raftstore::*; use tikv_util::store::find_peer; -fn become_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { - peer.set_role(metapb::PeerRole::Learner); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - cluster.pd_client.must_remove_peer(region_id, peer.clone()); - peer.set_is_witness(true); - peer.set_id(peer.get_id() + 10); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - peer.set_role(metapb::PeerRole::Voter); - cluster.pd_client.must_add_peer(region_id, peer.clone()); -} - -fn become_non_witness(cluster: &Cluster, region_id: u64, peer: &mut metapb::Peer) { - peer.set_role(metapb::PeerRole::Learner); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - cluster.pd_client.must_remove_peer(region_id, peer.clone()); - peer.set_is_witness(false); - peer.set_id(peer.get_id() + 10); - cluster.pd_client.must_add_peer(region_id, peer.clone()); - peer.set_role(metapb::PeerRole::Voter); - cluster.pd_client.must_add_peer(region_id, peer.clone()); -} - // Test the case that region split or merge with witness peer #[test] fn test_witness_split_merge() { @@ -49,9 +27,12 @@ fn test_witness_split_merge() { let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); - + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); let before = cluster .apply_state(region.get_id(), nodes[2]) .get_applied_index(); @@ -96,8 +77,12 @@ fn test_witness_split_merge() { assert!(find_peer(&right, nodes[2]).unwrap().is_witness); // can't merge with different witness location - let mut peer_on_store3 = find_peer(&left, nodes[2]).unwrap().clone(); - become_non_witness(&cluster, left.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(&left, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + left.get_id(), + vec![peer_on_store3.get_id()], + vec![false], + ); let left = cluster.get_region(b"k1"); let req = new_admin_request( left.get_id(), @@ -174,6 +159,8 @@ fn test_witness_conf_change() { .pd_client .must_remove_peer(region.get_id(), peer_on_store3); + std::thread::sleep(Duration::from_millis(10)); + assert_eq!( cluster .region_local_state(region.get_id(), nodes[2]) @@ -182,124 +169,127 @@ fn test_witness_conf_change() { ); } -// #[test] -// // Test flow of switch witness -// fn test_witness_switch_witness() { -// let mut cluster = new_server_cluster(0, 3); -// cluster.run(); -// let nodes = Vec::from_iter(cluster.get_node_ids()); -// assert_eq!(nodes.len(), 3); - -// let pd_client = Arc::clone(&cluster.pd_client); -// pd_client.disable_default_operator(); - -// cluster.must_put(b"k1", b"v1"); - -// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); -// let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); -// cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); - -// // nonwitness -> witness -// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); -// become_witness(&cluster, region.get_id(), &mut peer_on_store3); - -// std::thread::sleep(Duration::from_millis(100)); -// must_get_none(&cluster.get_engine(3), b"k1"); - -// // witness -> nonwitness -// peer_on_store3.set_role(metapb::PeerRole::Learner); -// cluster -// .pd_client -// .must_add_peer(region.get_id(), peer_on_store3.clone()); -// cluster -// .pd_client -// .must_remove_peer(region.get_id(), peer_on_store3.clone()); -// peer_on_store3.set_is_witness(false); -// cluster -// .pd_client -// .must_add_peer(region.get_id(), peer_on_store3.clone()); -// std::thread::sleep(Duration::from_millis(100)); -// must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); -// } - -// TODO: add back when switch witness is supported -// // Test the case that leader is forbidden to become witness -// #[test] -// fn test_witness_leader() { -// let mut cluster = new_server_cluster(0, 3); -// cluster.run(); -// let nodes = Vec::from_iter(cluster.get_node_ids()); -// assert_eq!(nodes.len(), 3); - -// let pd_client = Arc::clone(&cluster.pd_client); -// pd_client.disable_default_operator(); - -// cluster.must_put(b"k1", b"v1"); - -// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); -// let mut peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); -// cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); - -// // can't make leader to witness -// peer_on_store1.set_is_witness(true); -// cluster -// .pd_client -// .add_peer(region.get_id(), peer_on_store1.clone()); - -// std::thread::sleep(Duration::from_millis(100)); -// assert_eq!( -// cluster.leader_of_region(region.get_id()).unwrap().store_id, -// 1 -// ); -// // leader changes to witness failed, so still can get the value -// must_get_equal(&cluster.get_engine(nodes[0]), b"k1", b"v1"); - -// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); -// // can't transfer leader to witness -// cluster.transfer_leader(region.get_id(), &mut peer_on_store3); -// assert_eq!( -// cluster.leader_of_region(region.get_id()).unwrap().store_id, -// nodes[0], -// ); -// } - -// TODO: add back when election priority is supported -// // Test the case that witness can't be elected as leader based on election -// // priority when there is no log gap -// #[test] -// fn test_witness_election_priority() { -// let mut cluster = new_server_cluster(0, 3); -// cluster.run(); -// let nodes = Vec::from_iter(cluster.get_node_ids()); -// assert_eq!(nodes.len(), 3); - -// let pd_client = Arc::clone(&cluster.pd_client); -// pd_client.disable_default_operator(); - -// let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); -// // nonwitness -> witness -// let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); -// become_witness(&cluster, region.get_id(), &mut peer_on_store3); -// cluster.must_put(b"k0", b"v0"); - -// // make sure logs are replicated to the witness -// std::thread::sleep(Duration::from_millis(100)); - -// for i in 1..10 { -// let node = -// cluster.leader_of_region(region.get_id()).unwrap().store_id; cluster. -// stop_node(node); let (k, v) = (format!("k{}", i), format!("v{}", i)); -// let key = k.as_bytes(); -// let value = v.as_bytes(); -// cluster.must_put(key, value); -// // the witness can't be elected as the leader when there is no log -// gap assert_ne!( -// cluster.leader_of_region(region.get_id()).unwrap().store_id, -// nodes[2], -// ); -// cluster.run_node(node).unwrap(); -// } -// } +// Test flow of switch witness +#[test] +fn test_witness_switch_witness() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k1", b"v1"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // nonwitness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + + std::thread::sleep(Duration::from_millis(100)); + must_get_none(&cluster.get_engine(3), b"k1"); + + // witness -> non-witness + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![false], + ); + + std::thread::sleep(Duration::from_millis(100)); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); +} + +// Test the case that leader is forbidden to become witness +#[test] +fn test_witness_leader() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k1", b"v1"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // can't make leader to witness + cluster + .pd_client + .switch_witnesses(region.get_id(), vec![peer_on_store1.get_id()], vec![true]); + + std::thread::sleep(Duration::from_millis(100)); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap().store_id, + 1 + ); + // leader changes to witness failed, so still can get the value + must_get_equal(&cluster.get_engine(nodes[0]), b"k1", b"v1"); + + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + // can't transfer leader to witness + cluster.transfer_leader(region.get_id(), peer_on_store3); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap().store_id, + nodes[0], + ); +} + +// Test the case that witness can't be elected as leader based on election +// priority when there is no log gap +#[test] +fn test_witness_election_priority() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + + // nonwitness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + cluster.must_put(b"k0", b"v0"); + + // make sure logs are replicated to the witness + std::thread::sleep(Duration::from_millis(100)); + + for i in 1..10 { + let node = cluster.leader_of_region(region.get_id()).unwrap().store_id; + cluster.stop_node(node); + let (k, v) = (format!("k{}", i), format!("v{}", i)); + let key = k.as_bytes(); + let value = v.as_bytes(); + cluster.must_put(key, value); + // the witness can't be elected as the leader when there is no log gap + assert_ne!( + cluster.leader_of_region(region.get_id()).unwrap().store_id, + nodes[2], + ); + cluster.run_node(node).unwrap(); + // make sure logs are replicated to the restarted node + std::thread::sleep(Duration::from_millis(100)); + } +} // Test the case that truncated index won't advance when there is a witness even // if the gap gap exceeds the gc count limit @@ -320,8 +310,12 @@ fn test_witness_raftlog_gc_lagged_follower() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); // make sure raft log gc is triggered std::thread::sleep(Duration::from_millis(200)); @@ -391,8 +385,12 @@ fn test_witness_raftlog_gc_lagged_witness() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); cluster.must_put(b"k0", b"v0"); // make sure raft log gc is triggered @@ -447,8 +445,12 @@ fn test_witness_replica_read() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); // nonwitness -> witness - let mut peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); - become_witness(&cluster, region.get_id(), &mut peer_on_store3); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); let mut request = new_request( region.get_id(), @@ -463,15 +465,15 @@ fn test_witness_replica_read() { .read(None, request, Duration::from_millis(100)) .unwrap(); assert_eq!( - resp.get_header().get_error().get_recovery_in_progress(), - &kvproto::errorpb::RecoveryInProgress { + resp.get_header().get_error().get_is_witness(), + &kvproto::errorpb::IsWitness { region_id: region.get_id(), ..Default::default() } ); } -fn must_get_error_recovery_in_progress( +fn must_get_error_is_witness( cluster: &mut Cluster, region: &metapb::Region, cmd: kvproto::raft_cmdpb::Request, @@ -486,8 +488,8 @@ fn must_get_error_recovery_in_progress( .call_command_on_leader(req, Duration::from_millis(100)) .unwrap(); assert_eq!( - resp.get_header().get_error().get_recovery_in_progress(), - &kvproto::errorpb::RecoveryInProgress { + resp.get_header().get_error().get_is_witness(), + &kvproto::errorpb::IsWitness { region_id: region.get_id(), ..Default::default() }, @@ -513,9 +515,13 @@ fn test_witness_leader_down() { let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store1); - let mut peer_on_store2 = find_peer(®ion, nodes[1]).unwrap().clone(); + let peer_on_store2 = find_peer(®ion, nodes[1]).unwrap().clone(); // nonwitness -> witness - become_witness(&cluster, region.get_id(), &mut peer_on_store2); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store2.get_id()], + vec![true], + ); // the other follower is isolated cluster.add_send_filter(IsolationFilterFactory::new(3)); @@ -530,13 +536,13 @@ fn test_witness_leader_down() { // forbid writes let put = new_put_cmd(b"k3", b"v3"); - must_get_error_recovery_in_progress(&mut cluster, ®ion, put); + must_get_error_is_witness(&mut cluster, ®ion, put); // forbid reads let get = new_get_cmd(b"k1"); - must_get_error_recovery_in_progress(&mut cluster, ®ion, get); + must_get_error_is_witness(&mut cluster, ®ion, get); // forbid read index let read_index = new_read_index_cmd(); - must_get_error_recovery_in_progress(&mut cluster, ®ion, read_index); + must_get_error_is_witness(&mut cluster, ®ion, read_index); let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); cluster.must_transfer_leader(region.get_id(), peer_on_store3); From f178f781048bef4930a8e82fd08c3e194e9f8ae4 Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 20 Jan 2023 14:57:49 +0800 Subject: [PATCH 0467/1149] resource_manager: add watch for resource group (#14022) close tikv/tikv#13983 - add etcd mock for pd - add service for resource group Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- Cargo.lock | 10 + components/pd_client/src/client.rs | 67 +++- components/pd_client/src/errors.rs | 6 +- components/pd_client/src/lib.rs | 24 +- components/resource_control/Cargo.toml | 10 + components/resource_control/src/lib.rs | 3 + .../resource_control/src/resource_group.rs | 7 +- components/resource_control/src/service.rs | 267 ++++++++++++++++ components/server/src/server.rs | 16 +- components/server/src/server2.rs | 16 +- components/test_pd/Cargo.toml | 3 + components/test_pd/src/lib.rs | 1 + components/test_pd/src/mocker/etcd.rs | 288 ++++++++++++++++++ components/test_pd/src/mocker/mod.rs | 61 +++- components/test_pd/src/server.rs | 67 +++- components/tikv_util/src/worker/pool.rs | 7 + tests/failpoints/cases/test_pd_client.rs | 54 ---- .../failpoints/cases/test_pd_client_legacy.rs | 111 ++++--- 18 files changed, 866 insertions(+), 152 deletions(-) create mode 100644 components/resource_control/src/service.rs create mode 100644 components/test_pd/src/mocker/etcd.rs diff --git a/Cargo.lock b/Cargo.lock index e9f55d1923d..ee047aaae6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4676,15 +4676,22 @@ dependencies = [ "byteorder", "crossbeam-skiplist", "dashmap", + "fail", + "futures 0.3.15", "kvproto", "lazy_static", "online_config", + "pd_client", "pin-project", "prometheus", + "protobuf", "serde", "slog", "slog-global", + "test_pd", + "test_pd_client", "tikv_util", + "tokio", "yatp", ] @@ -5838,11 +5845,14 @@ dependencies = [ "futures 0.3.15", "grpcio", "kvproto", + "log_wrappers", "pd_client", "security", "slog", "slog-global", "tikv_util", + "tokio", + "tokio-stream", ] [[package]] diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 1e1e5980908..6686c4e8a04 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -1,7 +1,6 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - collections::HashMap, fmt, sync::{ atomic::{AtomicU64, Ordering}, @@ -286,9 +285,46 @@ impl fmt::Debug for RpcClient { const LEADER_CHANGE_RETRY: usize = 10; impl PdClient for RpcClient { - fn load_global_config(&self, config_path: String) -> PdFuture> { - use kvproto::pdpb::LoadGlobalConfigRequest; - let mut req = LoadGlobalConfigRequest::new(); + fn store_global_config( + &self, + config_path: String, + items: Vec, + ) -> PdFuture<()> { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["store_global_config"]) + .start_coarse_timer(); + + let mut req = pdpb::StoreGlobalConfigRequest::new(); + req.set_config_path(config_path); + req.set_changes(items.into()); + let executor = move |client: &Client, req| match client + .inner + .rl() + .client_stub + .store_global_config_async(&req) + { + Ok(grpc_response) => Box::pin(async move { + if let Err(err) = grpc_response.await { + return Err(box_err!("{:?}", err)); + } + Ok(()) + }) as PdFuture<_>, + Err(err) => Box::pin(async move { Err(box_err!("{:?}", err)) }) as PdFuture<_>, + }; + self.pd_client + .request(req, executor, LEADER_CHANGE_RETRY) + .execute() + } + + fn load_global_config( + &self, + config_path: String, + ) -> PdFuture<(Vec, i64)> { + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["load_global_config"]) + .start_coarse_timer(); + + let mut req = pdpb::LoadGlobalConfigRequest::new(); req.set_config_path(config_path); let executor = |client: &Client, req| match client .inner @@ -299,13 +335,10 @@ impl PdClient for RpcClient { { Ok(grpc_response) => Box::pin(async move { match grpc_response.await { - Ok(grpc_response) => { - let mut res = HashMap::with_capacity(grpc_response.get_items().len()); - for c in grpc_response.get_items() { - res.insert(c.get_name().to_owned(), c.get_value().to_owned()); - } - Ok(res) - } + Ok(grpc_response) => Ok(( + Vec::from(grpc_response.get_items()), + grpc_response.get_revision(), + )), Err(err) => Err(box_err!("{:?}", err)), } }) as PdFuture<_>, @@ -318,9 +351,17 @@ impl PdClient for RpcClient { fn watch_global_config( &self, + config_path: String, + revision: i64, ) -> Result> { - use kvproto::pdpb::WatchGlobalConfigRequest; - let req = WatchGlobalConfigRequest::default(); + let _timer = PD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["watch_global_config"]) + .start_coarse_timer(); + + let mut req = pdpb::WatchGlobalConfigRequest::default(); + info!("[global_config] start watch global config"; "path" => &config_path, "revision" => revision); + req.set_config_path(config_path); + req.set_revision(revision); sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, _| { client.watch_global_config(&req) }) diff --git a/components/pd_client/src/errors.rs b/components/pd_client/src/errors.rs index 689cb276064..5bacca03354 100644 --- a/components/pd_client/src/errors.rs +++ b/components/pd_client/src/errors.rs @@ -35,12 +35,14 @@ pub type Result = result::Result; impl Error { pub fn retryable(&self) -> bool { match self { - Error::Grpc(_) | Error::ClusterNotBootstrapped(_) | Error::StreamDisconnect(_) => true, + Error::Grpc(_) + | Error::ClusterNotBootstrapped(_) + | Error::StreamDisconnect(_) + | Error::DataCompacted(_) => true, Error::Other(_) | Error::RegionNotFound(_) | Error::StoreTombstone(_) | Error::GlobalConfigNotFound(_) - | Error::DataCompacted(_) | Error::ClusterBootstrapped(_) | Error::Incompatible => false, } diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 46a3e6924db..b877750770d 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -14,15 +14,14 @@ mod util; mod config; pub mod errors; -use std::{cmp::Ordering, collections::HashMap, ops::Deref, sync::Arc, time::Duration}; +use std::{cmp::Ordering, ops::Deref, sync::Arc, time::Duration}; use futures::future::BoxFuture; -use grpcio::ClientSStreamReceiver; use kvproto::{ metapb, pdpb, replication_modepb::{RegionReplicationStatus, ReplicationStatus, StoreDrAutoSyncStatus}, }; -use pdpb::{QueryStats, WatchGlobalConfigResponse}; +use pdpb::QueryStats; use tikv_util::time::{Instant, UnixSecs}; use txn_types::TimeStamp; @@ -201,6 +200,8 @@ impl BucketStat { } pub const INVALID_ID: u64 = 0; +// TODO: Implementation of config registration for each module +pub const RESOURCE_CONTROL_CONFIG_PATH: &str = "resource_group/settings"; /// PdClient communicates with Placement Driver (PD). /// Because now one PD only supports one cluster, so it is no need to pass @@ -209,17 +210,28 @@ pub const INVALID_ID: u64 = 0; /// all the time. pub trait PdClient: Send + Sync { /// Load a list of GlobalConfig - fn load_global_config(&self, _config_path: String) -> PdFuture> { + fn load_global_config( + &self, + _config_path: String, + ) -> PdFuture<(Vec, i64)> { unimplemented!(); } /// Store a list of GlobalConfig - fn store_global_config(&self, _list: HashMap) -> PdFuture<()> { + fn store_global_config( + &self, + _config_path: String, + _items: Vec, + ) -> PdFuture<()> { unimplemented!(); } /// Watching change of GlobalConfig - fn watch_global_config(&self) -> Result> { + fn watch_global_config( + &self, + _config_path: String, + _revision: i64, + ) -> Result> { unimplemented!(); } diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 822aed2cd2d..3f796627040 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -4,17 +4,27 @@ version = "0.0.1" edition = "2021" publish = false +[features] +failpoints = ["fail/failpoints"] + [dependencies] byteorder = "1.2" crossbeam-skiplist = "0.1" dashmap = "5.1" +fail = "0.5" +futures = { version = "0.3" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.0" online_config = { workspace = true } +pd_client = { workspace = true } pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } +protobuf = { version = "2.8", features = ["bytes"] } serde = { version = "1.0", features = ["derive"] } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +test_pd = { workspace = true } +test_pd_client = { workspace = true } tikv_util = { workspace = true } +tokio = { version = "1.5", features = ["time"] } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index eb6679f71e8..5534ed2153d 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -11,6 +11,9 @@ pub use resource_group::{ mod future; pub use future::ControlledFuture; +mod service; +pub use service::ResourceManagerService; + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig, Default)] #[serde(default)] #[serde(rename_all = "kebab-case")] diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index bfe9d92d0f3..23a50b42560 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -295,18 +295,19 @@ impl GroupPriorityTracker { } #[cfg(test)] -mod tests { - use kvproto::resource_manager::*; +pub(crate) mod tests { use yatp::queue::Extras; use super::*; - fn new_resource_group( + pub fn new_resource_group( name: String, is_ru_mode: bool, read_tokens: u64, write_tokens: u64, ) -> ResourceGroup { + use kvproto::resource_manager::{GroupRawResourceSettings, GroupRequestUnitSettings}; + let mut group = ResourceGroup::new(); group.set_name(name); let mode = if is_ru_mode { diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs new file mode 100644 index 00000000000..ea9a9d724b9 --- /dev/null +++ b/components/resource_control/src/service.rs @@ -0,0 +1,267 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{sync::Arc, time::Duration}; + +use futures::StreamExt; +use kvproto::{pdpb::EventType, resource_manager::ResourceGroup}; +use pd_client::{Error as PdError, PdClient, RpcClient, RESOURCE_CONTROL_CONFIG_PATH}; +use tikv_util::error; + +use crate::ResourceGroupManager; + +#[derive(Clone)] +pub struct ResourceManagerService { + manager: Arc, + pd_client: Arc, + // record watch revision + revision: i64, +} + +impl ResourceManagerService { + /// Constructs a new `Service` with `ResourceGroupManager` and a `RpcClient` + pub fn new( + manager: Arc, + pd_client: Arc, + ) -> ResourceManagerService { + ResourceManagerService { + pd_client, + manager, + revision: 0, + } + } +} + +impl ResourceManagerService { + pub async fn watch_resource_groups(&mut self) { + // Firstly, load all resource groups as of now. + let (groups, revision) = self.list_resource_groups().await; + self.revision = revision; + groups + .into_iter() + .for_each(|rg| self.manager.add_resource_group(rg)); + // Secondly, start watcher at loading revision. + loop { + match self + .pd_client + .watch_global_config(RESOURCE_CONTROL_CONFIG_PATH.to_string(), self.revision) + { + Ok(mut stream) => { + while let Some(grpc_response) = stream.next().await { + match grpc_response { + Ok(r) => { + self.revision = r.get_revision(); + r.get_changes() + .iter() + .for_each(|item| match item.get_kind() { + EventType::Put => { + if let Ok(group) = + protobuf::parse_from_bytes::( + item.get_value().as_bytes(), + ) + { + self.manager.add_resource_group(group); + } + } + EventType::Delete => { + self.manager.remove_resource_group(item.get_name()); + } + }); + } + Err(err) => { + error!("failed to get stream"; "err" => ?err); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + } + Err(PdError::DataCompacted(msg)) => { + error!("required revision has been compacted"; "err" => ?msg); + // If the etcd revision is compacted, we need to reload all resouce groups. + let (groups, revision) = self.list_resource_groups().await; + self.revision = revision; + groups + .into_iter() + .for_each(|rg| self.manager.add_resource_group(rg)); + } + Err(err) => { + error!("failed to watch resource groups"; "err" => ?err); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + } + + async fn list_resource_groups(&mut self) -> (Vec, i64) { + loop { + match self + .pd_client + .load_global_config(RESOURCE_CONTROL_CONFIG_PATH.to_string()) + .await + { + Ok((items, revision)) => { + let groups = items + .into_iter() + .filter_map(|g| protobuf::parse_from_bytes(g.get_value().as_bytes()).ok()) + .collect(); + return (groups, revision); + } + Err(err) => { + error!("failed to load global config"; "err" => ?err); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + } +} + +#[cfg(test)] +pub mod tests { + use std::time::Duration; + + use futures::executor::block_on; + use kvproto::pdpb::GlobalConfigItem; + use pd_client::RpcClient; + use protobuf::Message; + use test_pd::{mocker::Service, util::*, Server as MockServer}; + use tikv_util::{config::ReadableDuration, worker::Builder}; + + use crate::resource_group::tests::new_resource_group; + + fn new_test_server_and_client( + update_interval: ReadableDuration, + ) -> (MockServer, RpcClient) { + let server = MockServer::new(1); + let eps = server.bind_addrs(); + let client = new_client_with_update_interval(eps, None, update_interval); + (server, client) + } + + fn add_resource_group(pd_client: Arc, group: ResourceGroup) { + let mut item = GlobalConfigItem::default(); + item.set_kind(EventType::Put); + item.set_name(group.get_name().to_string()); + let mut buf = Vec::new(); + group.write_to_vec(&mut buf).unwrap(); + item.set_value(String::from_utf8(buf).unwrap()); + + futures::executor::block_on(async move { + pd_client + .store_global_config(RESOURCE_CONTROL_CONFIG_PATH.to_string(), vec![item]) + .await + }) + .unwrap(); + } + + fn delete_resource_group(pd_client: Arc, name: &str) { + let mut item = GlobalConfigItem::default(); + item.set_kind(EventType::Delete); + item.set_name(name.to_string()); + + futures::executor::block_on(async move { + pd_client + .store_global_config(RESOURCE_CONTROL_CONFIG_PATH.to_string(), vec![item]) + .await + }) + .unwrap(); + } + + use super::*; + #[test] + fn crud_config_test() { + let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let resource_manager = ResourceGroupManager::default(); + + let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); + let group = new_resource_group("TEST".into(), true, 100, 100); + add_resource_group(s.pd_client.clone(), group); + let (res, revision) = block_on(s.list_resource_groups()); + assert_eq!(res.len(), 1); + assert_eq!(revision, 1); + + delete_resource_group(s.pd_client.clone(), "TEST"); + let (res, revision) = block_on(s.list_resource_groups()); + assert_eq!(res.len(), 0); + assert_eq!(revision, 2); + + server.stop(); + } + + #[test] + fn watch_config_test() { + let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let resource_manager = ResourceGroupManager::default(); + + let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); + let (res, revision) = block_on(s.list_resource_groups()); + assert_eq!(res.len(), 0); + assert_eq!(revision, 0); + + let background_worker = Builder::new("background").thread_count(1).create(); + let mut s_clone = s.clone(); + background_worker.spawn_async_task(async move { + s_clone.watch_resource_groups().await; + }); + // Mock add + let group1 = new_resource_group("TEST1".into(), true, 100, 100); + add_resource_group(s.pd_client.clone(), group1); + let group2 = new_resource_group("TEST2".into(), true, 100, 100); + add_resource_group(s.pd_client.clone(), group2); + // Mock modify + let group2 = new_resource_group("TEST2".into(), true, 50, 50); + add_resource_group(s.pd_client.clone(), group2); + let (res, revision) = block_on(s.list_resource_groups()); + assert_eq!(res.len(), 2); + assert_eq!(revision, 3); + // Mock delete + delete_resource_group(s.pd_client.clone(), "TEST1"); + let (res, revision) = block_on(s.list_resource_groups()); + assert_eq!(res.len(), 1); + assert_eq!(revision, 4); + // Wait for watcher + std::thread::sleep(Duration::from_millis(100)); + let groups = s.manager.get_all_resource_groups(); + assert_eq!(groups.len(), 1); + assert!(s.manager.get_resource_group("TEST1").is_none()); + let group = s.manager.get_resource_group("TEST2").unwrap(); + assert_eq!( + group + .value() + .get_r_u_settings() + .get_r_r_u() + .get_settings() + .get_fill_rate(), + 50 + ); + server.stop(); + } + + #[test] + fn reboot_watch_server_test() { + let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let resource_manager = ResourceGroupManager::default(); + + let s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); + let background_worker = Builder::new("background").thread_count(1).create(); + let mut s_clone = s.clone(); + background_worker.spawn_async_task(async move { + s_clone.watch_resource_groups().await; + }); + // Mock add + let group1 = new_resource_group("TEST1".into(), true, 100, 100); + add_resource_group(s.pd_client.clone(), group1); + // Mock reboot watch server + let watch_global_config_fp = "watch_global_config_return"; + fail::cfg(watch_global_config_fp, "return").unwrap(); + std::thread::sleep(Duration::from_millis(100)); + fail::remove(watch_global_config_fp); + // Mock add after rebooting will success + let group1 = new_resource_group("TEST2".into(), true, 100, 100); + add_resource_group(s.pd_client.clone(), group1); + // Wait watcher update + std::thread::sleep(Duration::from_secs(1)); + let groups = s.manager.get_all_resource_groups(); + assert_eq!(groups.len(), 2); + + server.stop(); + } +} diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 97fd1f77eef..207373313a4 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -82,7 +82,9 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; -use resource_control::{ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL}; +use resource_control::{ + ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, +}; use security::SecurityManager; use snap_recovery::RecoveryService; use tikv::{ @@ -330,11 +332,17 @@ where let resource_manager = if config.resource_control.enabled { let mgr = Arc::new(ResourceGroupManager::default()); - let mgr1 = mgr.clone(); + let mut resource_mgr_service = + ResourceManagerService::new(mgr.clone(), pd_client.clone()); // spawn a task to periodically update the minimal virtual time of all resource - // group. + // groups. + let resource_mgr = mgr.clone(); background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { - mgr1.advance_min_virtual_time(); + resource_mgr.advance_min_virtual_time(); + }); + // spawn a task to watch all resource groups update. + background_worker.spawn_async_task(async move { + resource_mgr_service.watch_resource_groups().await; }); Some(mgr) } else { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 36a02130fdb..f193e1c7445 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -65,7 +65,9 @@ use raftstore::{ RegionInfoAccessor, }; use raftstore_v2::{router::RaftRouter, StateStorage}; -use resource_control::{ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL}; +use resource_control::{ + ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, +}; use security::SecurityManager; use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, @@ -294,11 +296,17 @@ where let resource_manager = if config.resource_control.enabled { let mgr = Arc::new(ResourceGroupManager::default()); - let mgr1 = mgr.clone(); + let mut resource_mgr_service = + ResourceManagerService::new(mgr.clone(), pd_client.clone()); // spawn a task to periodically update the minimal virtual time of all resource - // group. + // groups. + let resource_mgr = mgr.clone(); background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { - mgr1.advance_min_virtual_time(); + resource_mgr.advance_min_virtual_time(); + }); + // spawn a task to watch all resource groups update. + background_worker.spawn_async_task(async move { + resource_mgr_service.watch_resource_groups().await; }); Some(mgr) } else { diff --git a/components/test_pd/Cargo.toml b/components/test_pd/Cargo.toml index a478e6ee325..6277789b194 100644 --- a/components/test_pd/Cargo.toml +++ b/components/test_pd/Cargo.toml @@ -10,8 +10,11 @@ fail = "0.5" futures = "0.3" grpcio = { workspace = true } kvproto = { workspace = true } +log_wrappers = { workspace = true } pd_client = { workspace = true } security = { workspace = true } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tikv_util = { workspace = true } +tokio = { version = "1.0", features = ["full"] } +tokio-stream = "0.1" diff --git a/components/test_pd/src/lib.rs b/components/test_pd/src/lib.rs index 187a899d7fb..bd768e58318 100644 --- a/components/test_pd/src/lib.rs +++ b/components/test_pd/src/lib.rs @@ -1,4 +1,5 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(slice_group_by)] #[macro_use] extern crate tikv_util; diff --git a/components/test_pd/src/mocker/etcd.rs b/components/test_pd/src/mocker/etcd.rs new file mode 100644 index 00000000000..3939dfc9a72 --- /dev/null +++ b/components/test_pd/src/mocker/etcd.rs @@ -0,0 +1,288 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + cell::Cell, + collections::{BTreeMap, HashMap}, + ops::Bound, + sync::Arc, +}; + +use futures::lock::Mutex; +use tokio::sync::mpsc::{self, Sender}; +use tokio_stream::wrappers::ReceiverStream; + +use super::Result; + +/// An in-memory, single versioned storage. +/// Emulating some interfaces of etcd for testing. +#[derive(Default, Debug)] +pub struct Etcd { + items: BTreeMap, + subs: HashMap, + revision: i64, + sub_id_alloc: Cell, +} + +pub type EtcdClient = Arc>; + +impl Etcd { + fn alloc_rev(&mut self) -> i64 { + self.revision += 1; + self.revision + } + + pub fn get_revision(&self) -> i64 { + self.revision + } + + pub fn get_key(&self, keys: Keys) -> (Vec, i64) { + let (start_key, end_key) = keys.into_bound(); + let kvs = self + .items + .range(( + Bound::Included(&Key(start_key, 0)), + Bound::Excluded(&Key(end_key, self.revision)), + )) + .collect::>() + .as_slice() + .group_by(|item1, item2| item1.0.0 == item2.0.0) + .filter_map(|group| { + let (k, v) = group.last()?; + match v { + Value::Val(val) => Some(KeyValue(MetaKey(k.0.clone()), val.clone())), + Value::Del => None, + } + }) + .fold(Vec::new(), |mut items, item| { + items.push(item); + items + }); + + (kvs, self.get_revision()) + } + + pub async fn set(&mut self, mut pair: KeyValue) -> Result<()> { + let rev = self.alloc_rev(); + for sub in self.subs.values() { + if pair.key() < sub.end_key.as_slice() && pair.key() >= sub.start_key.as_slice() { + sub.tx + .send(KvEvent { + kind: KvEventType::Put, + pair: pair.clone(), + }) + .await + .unwrap(); + } + } + self.items + .insert(Key(pair.take_key(), rev), Value::Val(pair.take_value())); + Ok(()) + } + + pub async fn delete(&mut self, keys: Keys) -> Result<()> { + let (start_key, end_key) = keys.into_bound(); + let rev = self.alloc_rev(); + let mut v = self + .items + .range(( + Bound::Included(Key(start_key, 0)), + Bound::Excluded(Key(end_key, self.revision)), + )) + .map(|(k, _)| Key::clone(k)) + .collect::>(); + v.dedup_by(|k1, k2| k1.0 == k2.0); + + for mut victim in v { + let k = Key(victim.0.clone(), rev); + self.items.insert(k, Value::Del); + + for sub in self.subs.values() { + if victim.0.as_slice() < sub.end_key.as_slice() + && victim.0.as_slice() >= sub.start_key.as_slice() + { + sub.tx + .send(KvEvent { + kind: KvEventType::Delete, + pair: KeyValue(MetaKey(std::mem::take(&mut victim.0)), vec![]), + }) + .await + .unwrap(); + } + } + } + Ok(()) + } + + pub async fn watch(&mut self, keys: Keys, start_rev: i64) -> Result> { + let id = self.sub_id_alloc.get(); + self.sub_id_alloc.set(id + 1); + let (tx, rx) = mpsc::channel(1024); + let (start_key, end_key) = keys.into_bound(); + + // Sending events from [start_rev, now) to the client. + let mut pending = self + .items + .range(( + Bound::Included(Key(start_key.clone(), 0)), + Bound::Excluded(Key(end_key.clone(), self.revision)), + )) + .filter(|(k, _)| k.1 >= start_rev) + .collect::>(); + pending.sort_by_key(|(k, _)| k.1); + for (k, v) in pending { + let event = match v { + Value::Val(val) => KvEvent { + kind: KvEventType::Put, + pair: KeyValue(MetaKey(k.0.clone()), val.clone()), + }, + Value::Del => KvEvent { + kind: KvEventType::Delete, + pair: KeyValue(MetaKey(k.0.clone()), vec![]), + }, + }; + tx.send(event).await.expect("too many pending events"); + } + + self.subs.insert( + id, + Subscriber { + start_key, + end_key, + tx, + }, + ); + Ok(ReceiverStream::new(rx)) + } + + pub fn clear_subs(&mut self) { + self.subs.clear(); + self.sub_id_alloc.set(0); + } + + /// A tool for dumpling the whole storage when test failed. + /// Add this to test code temporarily for debugging. + #[allow(dead_code)] + pub fn dump(&self) { + println!(">>>>>>> /etc (revision = {}) <<<<<<<", self.revision); + for (k, v) in self.items.iter() { + println!("{:?} => {:?}", k, v); + } + } +} + +#[derive(Clone, Debug)] +pub struct MetaKey(pub Vec); + +impl MetaKey { + /// return the key that keeps the range [self, self.next()) contains only + /// `self`. + pub fn next(&self) -> Self { + let mut next = self.clone(); + next.0.push(0); + next + } + + /// return the key that keeps the range [self, self.next_prefix()) contains + /// all keys with the prefix `self`. + pub fn next_prefix(&self) -> Self { + let mut next_prefix = self.clone(); + for i in (0..next_prefix.0.len()).rev() { + if next_prefix.0[i] == u8::MAX { + next_prefix.0.pop(); + } else { + next_prefix.0[i] += 1; + break; + } + } + next_prefix + } +} + +/// A simple key value pair of metadata. +#[derive(Clone, Debug)] +pub struct KeyValue(pub MetaKey, pub Vec); + +impl KeyValue { + pub fn key(&self) -> &[u8] { + self.0.0.as_slice() + } + + pub fn value(&self) -> &[u8] { + self.1.as_slice() + } + + pub fn take_key(&mut self) -> Vec { + std::mem::take(&mut self.0.0) + } + + pub fn take_value(&mut self) -> Vec { + std::mem::take(&mut self.1) + } +} + +#[derive(Debug)] +pub enum KvEventType { + Put, + Delete, +} + +#[derive(Debug)] +pub struct KvEvent { + pub kind: KvEventType, + pub pair: KeyValue, +} + +#[derive(Debug)] +struct Subscriber { + start_key: Vec, + end_key: Vec, + tx: Sender, +} + +/// A key with revision. +#[derive(Default, Eq, PartialEq, Ord, PartialOrd, Clone)] +struct Key(Vec, i64); + +impl std::fmt::Debug for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Key") + .field(&format_args!( + "{}@{}", + log_wrappers::Value::key(&self.0), + self.1 + )) + .finish() + } +} + +/// A value (maybe tombstone.) +#[derive(Debug, PartialEq, Clone)] +enum Value { + Val(Vec), + Del, +} + +/// The key set for getting. +#[derive(Debug)] +pub enum Keys { + Prefix(MetaKey), + Range(MetaKey, MetaKey), + Key(MetaKey), +} + +impl Keys { + /// convert the key set for corresponding key range. + pub fn into_bound(self) -> (Vec, Vec) { + match self { + Keys::Prefix(x) => { + let next = x.next_prefix().0; + ((x.0), (next)) + } + Keys::Range(start, end) => ((start.0), (end.0)), + Keys::Key(k) => { + let next = k.next().0; + ((k.0), (next)) + } + } + } +} diff --git a/components/test_pd/src/mocker/mod.rs b/components/test_pd/src/mocker/mod.rs index 84c2508d4ea..b9ae839b06e 100644 --- a/components/test_pd/src/mocker/mod.rs +++ b/components/test_pd/src/mocker/mod.rs @@ -2,15 +2,18 @@ use std::result; +use futures::executor::block_on; use kvproto::pdpb::*; mod bootstrap; +pub mod etcd; mod incompatible; mod leader_change; mod retry; mod service; mod split; +use self::etcd::{EtcdClient, KeyValue, Keys, MetaKey}; pub use self::{ bootstrap::AlreadyBootstrapped, incompatible::Incompatible, @@ -28,28 +31,62 @@ pub trait PdMocker { fn load_global_config( &self, _req: &LoadGlobalConfigRequest, + etcd_client: EtcdClient, ) -> Option> { - let mut send = vec![]; - for r in 0..10 { - let mut i = GlobalConfigItem::default(); - i.set_name(format!("/global/config/{}", r)); - i.set_value(r.to_string()); - send.push(i); - } let mut res = LoadGlobalConfigResponse::default(); - res.set_items(send.into()); + let mut items = Vec::new(); + let (resp, revision) = block_on(async move { + etcd_client.lock().await.get_key(Keys::Range( + MetaKey(b"".to_vec()), + MetaKey(b"\xff".to_vec()), + )) + }); + + let values: Vec = resp + .iter() + .map(|kv| { + let mut item = GlobalConfigItem::default(); + item.set_name(String::from_utf8(kv.key().to_vec()).unwrap()); + item.set_value(String::from_utf8(kv.value().to_vec()).unwrap()); + item + }) + .collect(); + + items.extend(values); + res.set_revision(revision); + res.set_items(items.into()); Some(Ok(res)) } fn store_global_config( &self, - _: &StoreGlobalConfigRequest, + req: &StoreGlobalConfigRequest, + etcd_client: EtcdClient, ) -> Option> { - unimplemented!() + for item in req.get_changes() { + let cli = etcd_client.clone(); + block_on(async move { + match item.get_kind() { + EventType::Put => { + let kv = KeyValue(MetaKey(item.get_name().into()), item.get_value().into()); + cli.lock().await.set(kv).await + } + EventType::Delete => { + let key = Keys::Key(MetaKey(item.get_name().into())); + cli.lock().await.delete(key).await + } + } + }) + .unwrap(); + } + Some(Ok(StoreGlobalConfigResponse::default())) } - fn watch_global_config(&self) -> Option> { - panic!("could not mock this function due to it should return a stream") + fn watch_global_config( + &self, + _req: &WatchGlobalConfigRequest, + ) -> Option> { + unimplemented!() } fn get_members(&self, _: &GetMembersRequest) -> Option> { diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 9e1a2b3bb0f..cb495307a1f 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + str::from_utf8, sync::{ atomic::{AtomicI64, Ordering}, Arc, @@ -20,6 +21,7 @@ use pd_client::Error as PdError; use security::*; use super::mocker::*; +use crate::mocker::etcd::{EtcdClient, Keys, KvEventType, MetaKey}; pub struct Server { server: Option, @@ -57,6 +59,7 @@ impl Server { default_handler, case, tso_logical: Arc::new(AtomicI64::default()), + etcd_client: EtcdClient::default(), }; let mut server = Server { server: None, @@ -170,6 +173,7 @@ struct PdMock { default_handler: Arc, case: Option>, tso_logical: Arc, + etcd_client: EtcdClient, } impl Clone for PdMock { @@ -178,6 +182,7 @@ impl Clone for PdMock { default_handler: Arc::clone(&self.default_handler), case: self.case.clone(), tso_logical: self.tso_logical.clone(), + etcd_client: self.etcd_client.clone(), } } } @@ -189,39 +194,71 @@ impl Pd for PdMock { req: LoadGlobalConfigRequest, sink: UnarySink, ) { - hijack_unary(self, ctx, sink, |c| c.load_global_config(&req)) + let cli = self.etcd_client.clone(); + hijack_unary(self, ctx, sink, |c| c.load_global_config(&req, cli.clone())) } fn store_global_config( &mut self, - _ctx: RpcContext<'_>, - _req: StoreGlobalConfigRequest, - _sink: UnarySink, + ctx: RpcContext<'_>, + req: StoreGlobalConfigRequest, + sink: UnarySink, ) { - unimplemented!() + let cli = self.etcd_client.clone(); + hijack_unary(self, ctx, sink, |c| { + c.store_global_config(&req, cli.clone()) + }) } fn watch_global_config( &mut self, ctx: RpcContext<'_>, - _req: WatchGlobalConfigRequest, + req: WatchGlobalConfigRequest, mut sink: ServerStreamingSink, ) { - ctx.spawn(async move { - let mut name: usize = 0; - loop { + let cli = self.etcd_client.clone(); + let future = async move { + let mut watcher = match cli + .lock() + .await + .watch( + Keys::Range(MetaKey(b"".to_vec()), MetaKey(b"\xff".to_vec())), + req.revision, + ) + .await + { + Ok(w) => w, + Err(err) => { + error!("failed to watch: {:?}", err); + return; + } + }; + + while let Some(event) = watcher.as_mut().recv().await { + info!("watch event from etcd"; "event" => ?event); let mut change = GlobalConfigItem::new(); - change.set_name(format!("/global/config/{:?}", name).to_owned()); - change.set_value(format!("{:?}", name)); + change.set_kind(match event.kind { + KvEventType::Put => EventType::Put, + KvEventType::Delete => EventType::Delete, + }); + change.set_name(from_utf8(event.pair.key()).unwrap().to_string()); + change.set_value(from_utf8(event.pair.value()).unwrap().to_string()); let mut wc = WatchGlobalConfigResponse::default(); wc.set_changes(vec![change].into()); - // simulate network delay - std::thread::sleep(Duration::from_millis(10)); - name += 1; let _ = sink.send((wc, WriteFlags::default())).await; let _ = sink.flush().await; + #[cfg(feature = "failpoints")] + { + use futures::executor::block_on; + let cli_clone = cli.clone(); + fail_point!("watch_global_config_return", |_| { + block_on(async move { cli_clone.lock().await.clear_subs() }); + watcher.close(); + }); + } } - }) + }; + ctx.spawn(future); } fn get_members( diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index e761fac8bb5..26dbf495f54 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -405,6 +405,13 @@ impl Worker { }); } + pub fn spawn_async_task(&self, f: F) + where + F: Future + Send + 'static, + { + self.remote.spawn(f); + } + fn delay_notify(tx: UnboundedSender>, timeout: Duration) { let now = Instant::now(); let f = GLOBAL_TIMER_HANDLE diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 7dd767d19c9..92942fa90f9 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -97,60 +97,6 @@ fn test_pd_client_deadlock() { fail::remove(pd_client_reconnect_fp); } -#[test] -fn test_load_global_config() { - let (mut _server, mut client) = new_test_server_and_client(ReadableDuration::millis(100)); - let res = futures::executor::block_on(async move { - client.load_global_config("global".to_string()).await - }); - for (k, v) in res.unwrap() { - assert_eq!(k, format!("/global/config/{}", v)) - } -} - -#[test] -fn test_watch_global_config_on_closed_server() { - let (mut server, mut client) = new_test_server_and_client(ReadableDuration::millis(100)); - use futures::StreamExt; - let j = std::thread::spawn(move || { - let mut r = client.watch_global_config().unwrap(); - block_on(async move { - let mut i: usize = 0; - while let Some(r) = r.next().await { - match r { - Ok(res) => { - let change = &res.get_changes()[0]; - assert_eq!( - change - .get_name() - .split('/') - .collect::>() - .last() - .unwrap() - .to_owned(), - format!("{:?}", i) - ); - assert_eq!(change.get_value().to_owned(), format!("{:?}", i)); - i += 1; - } - Err(e) => { - if let grpcio::Error::RpcFailure(e) = e { - // 14-UNAVAILABLE - assert_eq!(e.code(), grpcio::RpcStatusCode::from(14)); - break; - } else { - panic!("other error occur {:?}", e) - } - } - } - } - }); - }); - thread::sleep(Duration::from_millis(200)); - server.stop(); - j.join().unwrap(); -} - // Updating pd leader may be slow, we need to make sure it does not block other // RPC in the same gRPC Environment. #[test] diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index 172db8ac09e..3638e448bd9 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -7,11 +7,11 @@ use std::{ }; use grpcio::EnvBuilder; -use kvproto::metapb::*; +use kvproto::{metapb::*, pdpb::GlobalConfigItem}; use pd_client::{PdClient, RegionInfo, RegionStat, RpcClient}; use security::{SecurityConfig, SecurityManager}; use test_pd::{mocker::*, util::*, Server as MockServer}; -use tikv_util::config::ReadableDuration; +use tikv_util::{config::ReadableDuration, worker::Builder}; fn new_test_server_and_client( update_interval: ReadableDuration, @@ -108,57 +108,90 @@ fn test_pd_client_deadlock() { #[test] fn test_load_global_config() { let (mut _server, client) = new_test_server_and_client(ReadableDuration::millis(100)); - let res = - futures::executor::block_on( - async move { client.load_global_config("global".into()).await }, - ); - for (k, v) in res.unwrap() { - assert_eq!(k, format!("/global/config/{}", v)) + let global_items = vec![("test1", "val1"), ("test2", "val2"), ("test3", "val3")]; + let check_items = global_items.clone(); + if let Err(err) = futures::executor::block_on( + client.store_global_config( + String::from("global"), + global_items + .iter() + .map(|(name, value)| { + let mut item = GlobalConfigItem::default(); + item.set_name(name.to_string()); + item.set_value(value.to_string()); + item + }) + .collect::>(), + ), + ) { + panic!("error occur {:?}", err); } + + let (res, revision) = + futures::executor::block_on(client.load_global_config(String::from("global"))).unwrap(); + assert!( + res.iter() + .zip(check_items) + .all(|(item1, item2)| item1.name == item2.0 && item1.value == item2.1) + ); + assert_eq!(revision, 3); } #[test] fn test_watch_global_config_on_closed_server() { let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let global_items = vec![("test1", "val1"), ("test2", "val2"), ("test3", "val3")]; + let items_clone = global_items.clone(); + let client = Arc::new(client); + let cli_clone = client.clone(); use futures::StreamExt; - let j = std::thread::spawn(move || { - futures::executor::block_on(async move { - let mut r = client.watch_global_config().unwrap(); - let mut i: usize = 0; - while let Some(r) = r.next().await { - match r { - Ok(res) => { - let change = &res.get_changes()[0]; - assert_eq!( - change - .get_name() - .split('/') - .collect::>() - .last() - .unwrap() - .to_owned(), - format!("{:?}", i) - ); - assert_eq!(change.get_value().to_owned(), format!("{:?}", i)); - i += 1; - } - Err(e) => { - if let grpcio::Error::RpcFailure(e) = e { - // 14-UNAVAILABLE - assert_eq!(e.code(), grpcio::RpcStatusCode::from(14)); - break; - } else { - panic!("other error occur {:?}", e) + let background_worker = Builder::new("background").thread_count(1).create(); + background_worker.spawn_async_task(async move { + match cli_clone.watch_global_config("global".into(), 0) { + Ok(mut stream) => { + let mut i: usize = 0; + while let Some(grpc_response) = stream.next().await { + match grpc_response { + Ok(r) => { + for item in r.get_changes() { + assert_eq!(item.get_name(), items_clone[i].0); + assert_eq!(item.get_value(), items_clone[i].1); + i += 1; + } } + Err(err) => panic!("failed to get stream, err: {:?}", err), } } } - }); + Err(err) => { + if !err.to_string().contains("UNAVAILABLE") { + // Not 14-UNAVAILABLE + panic!("other error occur {:?}", err) + } + } + } }); - thread::sleep(Duration::from_millis(200)); + + if let Err(err) = futures::executor::block_on( + client.store_global_config( + "global".into(), + global_items + .iter() + .map(|(name, value)| { + let mut item = GlobalConfigItem::default(); + item.set_name(name.to_string()); + item.set_value(value.to_string()); + item + }) + .collect::>(), + ), + ) { + panic!("error occur {:?}", err); + } + + thread::sleep(Duration::from_millis(100)); server.stop(); - j.join().unwrap(); } // Updating pd leader may be slow, we need to make sure it does not block other From 9726e56e5b667649504e3ec636f12843bc94ff8d Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 20 Jan 2023 22:43:49 +0800 Subject: [PATCH 0468/1149] batch-system: add priority scheduling for batch system (#14065) ref tikv/tikv#13730 Support priority-based scheduling for the apply batch system. Signed-off-by: Connor1996 --- Cargo.lock | 8 +- components/batch-system/Cargo.toml | 2 + .../batch-system/benches/batch-system.rs | 6 +- components/batch-system/benches/router.rs | 2 +- components/batch-system/src/batch.rs | 101 ++----- components/batch-system/src/channel.rs | 252 ++++++++++++++++++ components/batch-system/src/fsm.rs | 18 +- components/batch-system/src/lib.rs | 3 +- components/batch-system/src/mailbox.rs | 2 + components/batch-system/src/test_runner.rs | 19 +- components/batch-system/tests/cases/batch.rs | 105 +++++++- components/batch-system/tests/cases/router.rs | 4 +- components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/batch/store.rs | 2 +- .../raftstore-v2/src/operation/command/mod.rs | 1 + components/raftstore-v2/src/router/message.rs | 5 + components/raftstore/Cargo.toml | 1 + .../raftstore/src/store/entry_storage.rs | 7 + components/raftstore/src/store/fsm/apply.rs | 55 +++- components/raftstore/src/store/fsm/peer.rs | 6 +- components/raftstore/src/store/fsm/store.rs | 17 +- components/raftstore/src/store/msg.rs | 5 + components/raftstore/src/store/peer.rs | 5 +- components/raftstore/src/store/util.rs | 38 ++- .../src/store/worker/refresh_config.rs | 2 +- .../resource_control/src/resource_group.rs | 15 +- components/server/src/server.rs | 2 +- components/test_raftstore/src/cluster.rs | 12 +- components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 8 +- components/tikv_util/Cargo.toml | 2 +- .../tikv_util/src/mpsc/priority_queue.rs | 46 ++-- .../integrations/config/dynamic/raftstore.rs | 2 +- tests/integrations/config/dynamic/snap.rs | 3 +- .../integrations/raftstore/test_bootstrap.rs | 2 +- 35 files changed, 614 insertions(+), 147 deletions(-) create mode 100644 components/batch-system/src/channel.rs diff --git a/Cargo.lock b/Cargo.lock index ee047aaae6d..d288af846a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -581,9 +581,11 @@ dependencies = [ "derive_more", "fail", "file_system", + "kvproto", "lazy_static", "online_config", "prometheus", + "resource_control", "serde", "serde_derive", "slog", @@ -2727,7 +2729,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#adcf4c414bfd0ccf18436b377430aa2450fd4c81" +source = "git+https://github.com/pingcap/kvproto.git#009f31598ac3200dc8b32e18f96fc4deb7b32e48" dependencies = [ "futures 0.3.15", "grpcio", @@ -4312,6 +4314,7 @@ dependencies = [ "raft", "raft-proto", "rand 0.8.5", + "resource_control", "resource_metering", "serde", "serde_derive", @@ -4362,6 +4365,7 @@ dependencies = [ "raft-proto", "raftstore", "rand 0.8.5", + "resource_control", "resource_metering", "slog", "slog-global", @@ -6565,7 +6569,7 @@ dependencies = [ "openssl", "page_size", "panic_hook", - "parking_lot 0.12.1", + "parking_lot_core 0.9.1", "pin-project", "procfs", "procinfo", diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index 7fe5798f833..75a0230c188 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -13,9 +13,11 @@ crossbeam = "0.8" derive_more = { version = "0.99", optional = true } fail = "0.5" file_system = { workspace = true } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } lazy_static = "1.3" online_config = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +resource_control = { workspace = true } serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } diff --git a/components/batch-system/benches/batch-system.rs b/components/batch-system/benches/batch-system.rs index c248eabaf04..9edf72f0ff9 100644 --- a/components/batch-system/benches/batch-system.rs +++ b/components/batch-system/benches/batch-system.rs @@ -20,7 +20,7 @@ fn end_hook(tx: &std::sync::mpsc::Sender<()>) -> Message { fn bench_spawn_many(c: &mut Criterion) { let (control_tx, control_fsm) = Runner::new(100000); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); system.spawn("test".to_owned(), Builder::new()); const ID_LIMIT: u64 = 32; const MESSAGE_LIMIT: usize = 256; @@ -55,7 +55,7 @@ fn bench_spawn_many(c: &mut Criterion) { fn bench_imbalance(c: &mut Criterion) { let (control_tx, control_fsm) = Runner::new(100000); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); system.spawn("test".to_owned(), Builder::new()); const ID_LIMIT: u64 = 10; const MESSAGE_LIMIT: usize = 512; @@ -92,7 +92,7 @@ fn bench_imbalance(c: &mut Criterion) { fn bench_fairness(c: &mut Criterion) { let (control_tx, control_fsm) = Runner::new(100000); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); system.spawn("test".to_owned(), Builder::new()); let state_cnt = Arc::new(AtomicUsize::new(0)); for id in 0..10 { diff --git a/components/batch-system/benches/router.rs b/components/batch-system/benches/router.rs index 3dd7e282e15..e25ee58b94d 100644 --- a/components/batch-system/benches/router.rs +++ b/components/batch-system/benches/router.rs @@ -8,7 +8,7 @@ use criterion::*; fn bench_send(c: &mut Criterion) { let (control_tx, control_fsm) = Runner::new(100000); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); system.spawn("test".to_owned(), Builder::new()); let (normal_tx, normal_fsm) = Runner::new(100000); let normal_box = BasicMailbox::new(normal_tx, normal_fsm, Arc::default()); diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index 4d935ad4819..48ef809d421 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -15,15 +15,16 @@ use std::{ time::Duration, }; -use crossbeam::channel::{self, SendError}; use fail::fail_point; use file_system::{set_io_type, IoType}; +use resource_control::ResourceController; use tikv_util::{ debug, error, info, mpsc, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, - time::Instant, warn, + time::Instant, }; use crate::{ + channel::{fsm_channel, ControlScheduler, FsmReceiver, FsmSender, NormalScheduler}, config::Config, fsm::{Fsm, FsmScheduler, Priority}, mailbox::BasicMailbox, @@ -37,60 +38,6 @@ pub enum FsmTypes { // Used as a signal that scheduler should be shutdown. Empty, } - -// A macro to introduce common definition of scheduler. -macro_rules! impl_sched { - ($name:ident, $ty:path,Fsm = $fsm:tt) => { - pub struct $name { - sender: channel::Sender>, - low_sender: channel::Sender>, - } - - impl Clone for $name { - #[inline] - fn clone(&self) -> $name { - $name { - sender: self.sender.clone(), - low_sender: self.low_sender.clone(), - } - } - } - - impl FsmScheduler for $name - where - $fsm: Fsm, - { - type Fsm = $fsm; - - #[inline] - fn schedule(&self, fsm: Box) { - let sender = match fsm.get_priority() { - Priority::Normal => &self.sender, - Priority::Low => &self.low_sender, - }; - match sender.send($ty(fsm)) { - Ok(()) => {} - // TODO: use debug instead. - Err(SendError($ty(fsm))) => warn!("failed to schedule fsm {:p}", fsm), - _ => unreachable!(), - } - } - - fn shutdown(&self) { - // TODO: close it explicitly once it's supported. - // Magic number, actually any number greater than poll pool size works. - for _ in 0..256 { - let _ = self.sender.send(FsmTypes::Empty); - let _ = self.low_sender.send(FsmTypes::Empty); - } - } - } - }; -} - -impl_sched!(NormalScheduler, FsmTypes::Normal, Fsm = N); -impl_sched!(ControlScheduler, FsmTypes::Control, Fsm = C); - pub struct NormalFsm { fsm: Box, timer: Instant, @@ -168,7 +115,7 @@ impl Batch { /// /// When pending messages of the FSM is different than `expected_len`, /// attempts to schedule it in this poller again. Returns the `fsm` if the - /// re-scheduling suceeds. + /// re-scheduling succeeds. fn release(&mut self, mut fsm: NormalFsm, expected_len: usize) -> Option> { let mailbox = fsm.take_mailbox().unwrap(); mailbox.release(fsm.fsm); @@ -341,7 +288,7 @@ pub trait PollHandler: Send + 'static { /// Internal poller that fetches batch and call handler hooks for readiness. pub struct Poller { pub router: Router, ControlScheduler>, - pub fsm_receiver: channel::Receiver>, + pub fsm_receiver: FsmReceiver, pub handler: Handler, pub max_batch_size: usize, pub reschedule_duration: Duration, @@ -534,8 +481,8 @@ pub trait HandlerBuilder { pub struct BatchSystem { name_prefix: Option, router: BatchRouter, - receiver: channel::Receiver>, - low_receiver: channel::Receiver>, + receiver: FsmReceiver, + low_receiver: FsmReceiver, pool_size: usize, max_batch_size: usize, workers: Arc>>>, @@ -649,15 +596,15 @@ where } } -struct PoolStateBuilder { +struct PoolStateBuilder { max_batch_size: usize, reschedule_duration: Duration, - fsm_receiver: channel::Receiver>, - fsm_sender: channel::Sender>, + fsm_receiver: FsmReceiver, + fsm_sender: FsmSender, pool_size: usize, } -impl PoolStateBuilder { +impl PoolStateBuilder { fn build>( self, name_prefix: String, @@ -683,11 +630,11 @@ impl PoolStateBuilder { } } -pub struct PoolState> { +pub struct PoolState> { pub name_prefix: String, pub handler_builder: H, - pub fsm_receiver: channel::Receiver>, - pub fsm_sender: channel::Sender>, + pub fsm_receiver: FsmReceiver, + pub fsm_sender: FsmSender, pub low_priority_pool_size: usize, pub expected_pool_size: usize, pub workers: Arc>>>, @@ -707,32 +654,32 @@ pub fn create_system( cfg: &Config, sender: mpsc::LooseBoundedSender, controller: Box, + resource_ctl: Option>, ) -> (BatchRouter, BatchSystem) { let state_cnt = Arc::new(AtomicUsize::new(0)); let control_box = BasicMailbox::new(sender, controller, state_cnt.clone()); - let (tx, rx) = channel::unbounded(); - let (tx2, rx2) = channel::unbounded(); + let (sender, receiver) = fsm_channel(resource_ctl); + let (low_sender, low_receiver) = fsm_channel(None); // no resource control for low fsm let normal_scheduler = NormalScheduler { - sender: tx.clone(), - low_sender: tx2.clone(), + sender: sender.clone(), + low_sender, }; let control_scheduler = ControlScheduler { - sender: tx.clone(), - low_sender: tx2, + sender: sender.clone(), }; let pool_state_builder = PoolStateBuilder { max_batch_size: cfg.max_batch_size(), reschedule_duration: cfg.reschedule_duration.0, - fsm_receiver: rx.clone(), - fsm_sender: tx, + fsm_receiver: receiver.clone(), + fsm_sender: sender, pool_size: cfg.pool_size, }; let router = Router::new(control_box, normal_scheduler, control_scheduler, state_cnt); let system = BatchSystem { name_prefix: None, router: router.clone(), - receiver: rx, - low_receiver: rx2, + receiver, + low_receiver, pool_size: cfg.pool_size, max_batch_size: cfg.max_batch_size(), workers: Arc::new(Mutex::new(Vec::new())), diff --git a/components/batch-system/src/channel.rs b/components/batch-system/src/channel.rs new file mode 100644 index 00000000000..094b6a7a2ae --- /dev/null +++ b/components/batch-system/src/channel.rs @@ -0,0 +1,252 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{cell::RefCell, sync::Arc}; + +use crossbeam::channel::{self, RecvError, SendError, TryRecvError, TrySendError}; +use kvproto::kvrpcpb::CommandPri; +use resource_control::{ResourceConsumeType, ResourceController}; +use tikv_util::{mpsc::priority_queue, warn}; + +use crate::{ + fsm::{Fsm, FsmScheduler, Priority, ResourceMetered}, + FsmTypes, +}; + +pub fn fsm_channel( + resource_ctl: Option>, +) -> (FsmSender, FsmReceiver) { + if let Some(ctl) = resource_ctl { + let (tx, rx) = priority_queue::unbounded(); + ( + FsmSender::Priority { + resource_ctl: ctl, + sender: tx, + last_msg_group: RefCell::new(String::new()), + }, + FsmReceiver::Priority(rx), + ) + } else { + let (tx, rx) = channel::unbounded(); + (FsmSender::Vanilla(tx), FsmReceiver::Vanilla(rx)) + } +} + +pub struct NormalScheduler { + pub(crate) sender: FsmSender, + pub(crate) low_sender: FsmSender, +} + +impl Clone for NormalScheduler +where + N: Fsm, + C: Fsm, +{ + fn clone(&self) -> Self { + NormalScheduler { + sender: self.sender.clone(), + low_sender: self.low_sender.clone(), + } + } +} + +impl FsmScheduler for NormalScheduler +where + N: Fsm, + C: Fsm, +{ + type Fsm = N; + + fn consume_msg_resource(&self, msg: &::Message) { + self.sender.consume_msg_resource(msg); + } + + #[inline] + fn schedule(&self, fsm: Box) { + let sender = match fsm.get_priority() { + Priority::Normal => &self.sender, + Priority::Low => &self.low_sender, + }; + + match sender.send(FsmTypes::Normal(fsm)) { + Ok(()) => {} + Err(SendError(FsmTypes::Normal(fsm))) => warn!("failed to schedule fsm {:p}", fsm), + _ => unreachable!(), + } + } + + fn shutdown(&self) { + // TODO: close it explicitly once it's supported. + // Magic number, actually any number greater than poll pool size works. + for _ in 0..256 { + let _ = self.sender.send(FsmTypes::Empty); + let _ = self.low_sender.send(FsmTypes::Empty); + } + } +} + +pub struct ControlScheduler { + pub(crate) sender: FsmSender, +} + +impl Clone for ControlScheduler +where + N: Fsm, + C: Fsm, +{ + fn clone(&self) -> Self { + ControlScheduler { + sender: self.sender.clone(), + } + } +} + +impl FsmScheduler for ControlScheduler +where + N: Fsm, + C: Fsm, +{ + type Fsm = C; + + fn consume_msg_resource(&self, _msg: &::Message) {} + + #[inline] + fn schedule(&self, fsm: Box) { + match self.sender.send(FsmTypes::Control(fsm)) { + Ok(()) => {} + Err(SendError(FsmTypes::Control(fsm))) => warn!("failed to schedule fsm {:p}", fsm), + _ => unreachable!(), + } + } + + fn shutdown(&self) { + // TODO: close it explicitly once it's supported. + // Magic number, actually any number greater than poll pool size works. + for _ in 0..256 { + let _ = self.sender.send(FsmTypes::Empty); + } + } +} + +pub enum FsmSender { + Vanilla(channel::Sender>), + Priority { + resource_ctl: Arc, + sender: priority_queue::Sender>, + last_msg_group: RefCell, + }, +} + +impl Clone for FsmSender +where + N: Fsm, + C: Fsm, +{ + fn clone(&self) -> Self { + match self { + FsmSender::Vanilla(sender) => FsmSender::Vanilla(sender.clone()), + FsmSender::Priority { + resource_ctl, + sender, + .. + } => FsmSender::Priority { + resource_ctl: resource_ctl.clone(), + sender: sender.clone(), + last_msg_group: RefCell::new(String::new()), + }, + } + } +} + +impl FsmSender { + pub fn send(&self, fsm: FsmTypes) -> Result<(), SendError>> { + match self { + FsmSender::Vanilla(sender) => sender.send(fsm), + FsmSender::Priority { + resource_ctl, + sender, + last_msg_group, + } => { + // TODO: pass different priority + let pri = resource_ctl + .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal); + sender.send(fsm, pri) + } + } + } + + pub fn try_send(&self, fsm: FsmTypes) -> Result<(), TrySendError>> { + match self { + FsmSender::Vanilla(sender) => sender.try_send(fsm), + FsmSender::Priority { + resource_ctl, + sender, + last_msg_group, + } => { + let priority = resource_ctl + .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal); + sender.try_send(fsm, priority) + } + } + } + + fn consume_msg_resource(&self, msg: &N::Message) { + match self { + FsmSender::Vanilla(_) => {} + FsmSender::Priority { + resource_ctl, + last_msg_group, + .. + } => { + if let Some(mut groups) = msg.get_resource_consumptions() { + let mut dominant_group = "".to_owned(); + let mut max_write_bytes = 0; + for (group_name, write_bytes) in groups.drain() { + resource_ctl.consume( + group_name.as_bytes(), + ResourceConsumeType::IoBytes(write_bytes), + ); + if write_bytes > max_write_bytes { + dominant_group = group_name; + max_write_bytes = write_bytes; + } + } + *last_msg_group.borrow_mut() = dominant_group; + } + } + } + } +} + +pub enum FsmReceiver { + Vanilla(channel::Receiver>), + Priority(priority_queue::Receiver>), +} + +impl Clone for FsmReceiver +where + N: Fsm, + C: Fsm, +{ + fn clone(&self) -> Self { + match self { + FsmReceiver::Vanilla(receiver) => FsmReceiver::Vanilla(receiver.clone()), + FsmReceiver::Priority(receiver) => FsmReceiver::Priority(receiver.clone()), + } + } +} + +impl FsmReceiver { + pub fn recv(&self) -> Result, RecvError> { + match self { + FsmReceiver::Vanilla(receiver) => receiver.recv(), + FsmReceiver::Priority(receiver) => receiver.recv(), + } + } + + pub fn try_recv(&self) -> Result, TryRecvError> { + match self { + FsmReceiver::Vanilla(receiver) => receiver.try_recv(), + FsmReceiver::Priority(receiver) => receiver.try_recv(), + } + } +} diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 09e32333c96..5d9e009fa01 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -10,6 +10,8 @@ use std::{ usize, }; +use collections::HashMap; + use crate::mailbox::BasicMailbox; #[derive(Clone, Copy, Debug, PartialEq)] @@ -24,15 +26,26 @@ pub trait FsmScheduler { /// Schedule a Fsm for later handling. fn schedule(&self, fsm: Box); + /// Shutdown the scheduler, which indicates that resources like /// background thread pool should be released. fn shutdown(&self); + + /// Consume the resources of msg in resource controller if enabled, + /// otherwise do nothing. + fn consume_msg_resource(&self, msg: &::Message); +} + +pub trait ResourceMetered { + fn get_resource_consumptions(&self) -> Option> { + None + } } /// A `Fsm` is a finite state machine. It should be able to be notified for /// updating internal state according to incoming messages. -pub trait Fsm { - type Message: Send; +pub trait Fsm: Send + 'static { + type Message: Send + ResourceMetered; fn is_stopped(&self) -> bool; @@ -42,6 +55,7 @@ pub trait Fsm { Self: Sized, { } + /// Take the mailbox from FSM. Implementation should ensure there will be /// no reference to mailbox after calling this method. fn take_mailbox(&mut self) -> Option> diff --git a/components/batch-system/src/lib.rs b/components/batch-system/src/lib.rs index 9a307a534ac..f4f799dcc9a 100644 --- a/components/batch-system/src/lib.rs +++ b/components/batch-system/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. mod batch; +mod channel; mod config; mod fsm; mod mailbox; @@ -16,7 +17,7 @@ pub use self::{ PollHandler, Poller, PoolState, }, config::Config, - fsm::{Fsm, FsmScheduler, Priority}, + fsm::{Fsm, FsmScheduler, Priority, ResourceMetered}, mailbox::{BasicMailbox, Mailbox}, router::Router, }; diff --git a/components/batch-system/src/mailbox.rs b/components/batch-system/src/mailbox.rs index 5afddf73c14..869031392af 100644 --- a/components/batch-system/src/mailbox.rs +++ b/components/batch-system/src/mailbox.rs @@ -75,6 +75,7 @@ impl BasicMailbox { msg: Owner::Message, scheduler: &S, ) -> Result<(), SendError> { + scheduler.consume_msg_resource(&msg); self.sender.force_send(msg)?; self.state.notify(scheduler, Cow::Borrowed(self)); Ok(()) @@ -89,6 +90,7 @@ impl BasicMailbox { msg: Owner::Message, scheduler: &S, ) -> Result<(), TrySendError> { + scheduler.consume_msg_resource(&msg); self.sender.try_send(msg)?; self.state.notify(scheduler, Cow::Borrowed(self)); Ok(()) diff --git a/components/batch-system/src/test_runner.rs b/components/batch-system/src/test_runner.rs index 6be64d5d695..a3ae80dc55a 100644 --- a/components/batch-system/src/test_runner.rs +++ b/components/batch-system/src/test_runner.rs @@ -11,10 +11,11 @@ use std::{ }, }; +use collections::HashMap; use derive_more::{Add, AddAssign}; use tikv_util::mpsc; -use crate::*; +use crate::{fsm::ResourceMetered, *}; /// Message `Runner` can accepts. pub enum Message { @@ -22,6 +23,21 @@ pub enum Message { Loop(usize), /// `Runner` will call the callback directly. Callback(Box), + /// group name, write bytes + Resource(String, u64), +} + +impl ResourceMetered for Message { + fn get_resource_consumptions(&self) -> Option> { + match self { + Message::Resource(group_name, bytes) => { + let mut map = HashMap::default(); + map.insert(group_name.to_owned(), *bytes); + Some(map) + } + _ => None, + } + } } /// A simple runner used for benchmarking only. @@ -102,6 +118,7 @@ impl Handler { } } Ok(Message::Callback(cb)) => cb(self, r), + Ok(Message::Resource(..)) => {} Err(_) => break, } } diff --git a/components/batch-system/tests/cases/batch.rs b/components/batch-system/tests/cases/batch.rs index f950df68b8d..dc13affc363 100644 --- a/components/batch-system/tests/cases/batch.rs +++ b/components/batch-system/tests/cases/batch.rs @@ -7,13 +7,15 @@ use std::{ }; use batch_system::{test_runner::*, *}; +use kvproto::resource_manager::{GroupMode, GroupRawResourceSettings, ResourceGroup}; +use resource_control::ResourceGroupManager; use tikv_util::mpsc; #[test] fn test_batch() { let (control_tx, control_fsm) = Runner::new(10); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); let builder = Builder::new(); let metrics = builder.metrics.clone(); system.spawn("test".to_owned(), builder); @@ -55,7 +57,7 @@ fn test_batch() { fn test_priority() { let (control_tx, control_fsm) = Runner::new(10); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); let builder = Builder::new(); system.spawn("test".to_owned(), builder); let (tx, rx) = mpsc::unbounded(); @@ -101,3 +103,102 @@ fn test_priority() { .unwrap(); assert_eq!(rx.recv_timeout(Duration::from_secs(3)), Ok(3)); } + +#[test] +fn test_resource_group() { + let (control_tx, control_fsm) = Runner::new(10); + let resource_manager = ResourceGroupManager::default(); + + let get_group = |name: &str, read_tokens: u64, write_tokens: u64| -> ResourceGroup { + let mut group = ResourceGroup::new(); + group.set_name(name.to_string()); + group.set_mode(GroupMode::RawMode); + let mut resource_setting = GroupRawResourceSettings::new(); + resource_setting + .mut_cpu() + .mut_settings() + .set_fill_rate(read_tokens); + resource_setting + .mut_io_write() + .mut_settings() + .set_fill_rate(write_tokens); + group.set_raw_resource_settings(resource_setting); + group + }; + + resource_manager.add_resource_group(get_group("group1", 10, 10)); + resource_manager.add_resource_group(get_group("group2", 100, 100)); + + let mut cfg = Config::default(); + cfg.pool_size = 1; + let (router, mut system) = batch_system::create_system( + &cfg, + control_tx, + control_fsm, + Some(resource_manager.derive_controller("test".to_string(), false)), + ); + let builder = Builder::new(); + system.spawn("test".to_owned(), builder); + let (tx, rx) = mpsc::unbounded(); + let tx_ = tx.clone(); + let r = router.clone(); + let state_cnt = Arc::new(AtomicUsize::new(0)); + router + .send_control(Message::Callback(Box::new( + move |_: &Handler, _: &mut Runner| { + let (tx, runner) = Runner::new(10); + r.register(1, BasicMailbox::new(tx, runner, state_cnt.clone())); + let (tx2, runner2) = Runner::new(10); + r.register(2, BasicMailbox::new(tx2, runner2, state_cnt)); + tx_.send(0).unwrap(); + }, + ))) + .unwrap(); + assert_eq!(rx.recv_timeout(Duration::from_secs(3)), Ok(0)); + + let tx_ = tx.clone(); + let (tx1, rx1) = std::sync::mpsc::sync_channel(0); + // block the thread + router + .send_control(Message::Callback(Box::new( + move |_: &Handler, _: &mut Runner| { + tx_.send(0).unwrap(); + tx1.send(0).unwrap(); + }, + ))) + .unwrap(); + assert_eq!(rx.recv_timeout(Duration::from_secs(3)), Ok(0)); + + router + .send(1, Message::Resource("group1".to_string(), 1)) + .unwrap(); + let tx_ = tx.clone(); + router + .send( + 1, + Message::Callback(Box::new(move |_: &Handler, _: &mut Runner| { + tx_.send(1).unwrap(); + })), + ) + .unwrap(); + + router + .send(2, Message::Resource("group2".to_string(), 1)) + .unwrap(); + router + .send( + 2, + Message::Callback(Box::new(move |_: &Handler, _: &mut Runner| { + tx.send(2).unwrap(); + })), + ) + .unwrap(); + + // pause the blocking thread + assert_eq!(rx1.recv_timeout(Duration::from_secs(3)), Ok(0)); + + // should recv from group2 first, because group2 has more tokens and it would be + // handled with higher priority. + assert_eq!(rx.recv_timeout(Duration::from_secs(3)), Ok(2)); + assert_eq!(rx.recv_timeout(Duration::from_secs(3)), Ok(1)); +} diff --git a/components/batch-system/tests/cases/router.rs b/components/batch-system/tests/cases/router.rs index 543937fa8ef..d746dfad5cb 100644 --- a/components/batch-system/tests/cases/router.rs +++ b/components/batch-system/tests/cases/router.rs @@ -30,7 +30,7 @@ fn test_basic() { let (control_drop_tx, control_drop_rx) = mpsc::unbounded(); control_fsm.sender = Some(control_drop_tx); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); let builder = Builder::new(); system.spawn("test".to_owned(), builder); @@ -130,7 +130,7 @@ fn test_basic() { fn test_router_trace() { let (control_tx, control_fsm) = Runner::new(10); let (router, mut system) = - batch_system::create_system(&Config::default(), control_tx, control_fsm); + batch_system::create_system(&Config::default(), control_tx, control_fsm, None); let builder = Builder::new(); system.spawn("test".to_owned(), builder); diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 6726c5ed742..5b917b9ddf7 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -52,6 +52,7 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code raft-proto = { version = "0.7.0" } raftstore = { workspace = true } rand = "0.8.3" +resource_control = { workspace = true } resource_metering = { workspace = true } slog = "2.3" smallvec = "1.4" diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 280e8dcc396..1c7360a86bc 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -749,7 +749,7 @@ where { let (store_tx, store_fsm) = StoreFsm::new(cfg, store_id, logger.clone()); let (router, system) = - batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm); + batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm, None); let system = StoreSystem { system, workers: None, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index cf29d9ee25a..edca9510c27 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -590,6 +590,7 @@ impl Apply { AdminCmdType::InvalidAdmin => { return Err(box_err!("invalid admin command type")); } + AdminCmdType::UpdateGcPeer => unimplemented!(), }; match admin_result { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index c1e5f0d37dc..a9353e171d9 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -2,6 +2,7 @@ // #[PerformanceCriticalPath] +use batch_system::ResourceMetered; use kvproto::{ metapb, raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, @@ -197,6 +198,8 @@ pub enum PeerMsg { WaitFlush(super::FlushChannel), } +impl ResourceMetered for PeerMsg {} + impl PeerMsg { pub fn raft_query(req: RaftCmdRequest) -> (Self, QueryResSubscriber) { let (ch, sub) = QueryResChannel::pair(); @@ -259,3 +262,5 @@ pub enum StoreMsg { Start, StoreUnreachable { to_store_id: u64 }, } + +impl ResourceMetered for StoreMsg {} diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 548693b71ac..8df501f279d 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -72,6 +72,7 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft-proto = { version = "0.7.0", default-features = false } rand = "0.8.3" +resource_control = { workspace = true } resource_metering = { workspace = true } serde = "1.0" serde_derive = "1.0" diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index bc85ecedc34..afa13730ccf 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -69,6 +69,13 @@ impl CachedEntries { } } + pub fn iter_entries(&self, mut f: impl FnMut(&Entry)) { + let entries = self.entries.lock().unwrap(); + for entry in &entries.0 { + f(entry); + } + } + /// Take cached entries and dangle size for them. `dangle` means not in /// entry cache. pub fn take_entries(&self) -> (Vec, usize) { diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 9f2d234010f..22a42393173 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -24,7 +24,7 @@ use std::{ use batch_system::{ BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, HandleResult, - HandlerBuilder, PollHandler, Priority, + HandlerBuilder, PollHandler, Priority, ResourceMetered, }; use collections::{HashMap, HashMapEntry, HashSet}; use crossbeam::channel::{TryRecvError, TrySendError}; @@ -46,11 +46,12 @@ use kvproto::{ }; use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; use prometheus::local::LocalHistogram; -use protobuf::{wire_format::WireType, CodedInputStream}; +use protobuf::{wire_format::WireType, CodedInputStream, Message}; use raft::eraftpb::{ ConfChange, ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot, }; use raft_proto::ConfChangeI; +use resource_control::ResourceController; use smallvec::{smallvec, SmallVec}; use sst_importer::SstImporter; use tikv_alloc::trace::TraceEvent; @@ -1695,6 +1696,7 @@ where } AdminCmdType::BatchSwitchWitness => self.exec_batch_switch_witness(ctx, request), AdminCmdType::InvalidAdmin => Err(box_err!("unsupported admin command type")), + AdminCmdType::UpdateGcPeer => unimplemented!(), }?; response.set_cmd_type(cmd_type); @@ -3709,6 +3711,26 @@ where }, } +impl ResourceMetered for Msg { + fn get_resource_consumptions(&self) -> Option> { + match self { + Msg::Apply { apply, .. } => { + let mut map = HashMap::default(); + for cached_entries in &apply.entries { + cached_entries.iter_entries(|entry| { + // TODO: maybe use a more efficient way to get the resource group name. + let header = util::get_entry_header(entry); + let group_name = header.get_resource_group_name().to_owned(); + *map.entry(group_name).or_default() += entry.compute_size() as u64; + }); + } + Some(map) + } + _ => None, + } + } +} + impl Msg where EK: KvEngine, @@ -4406,6 +4428,7 @@ pub enum ControlMsg { }, } +impl ResourceMetered for ControlMsg {} pub struct ControlFsm { receiver: Receiver, stopped: bool, @@ -4834,10 +4857,15 @@ impl ApplyBatchSystem { pub fn create_apply_batch_system( cfg: &Config, + resource_ctl: Option>, ) -> (ApplyRouter, ApplyBatchSystem) { let (control_tx, control_fsm) = ControlFsm::new(); - let (router, system) = - batch_system::create_system(&cfg.apply_batch_system, control_tx, control_fsm); + let (router, system) = batch_system::create_system( + &cfg.apply_batch_system, + control_tx, + control_fsm, + resource_ctl, + ); (ApplyRouter { router }, ApplyBatchSystem { system }) } @@ -4984,6 +5012,7 @@ mod tests { cmd.mut_put().set_key(b"key".to_vec()); cmd.mut_put().set_value(b"value".to_vec()); let mut req = RaftCmdRequest::default(); + req.set_header(RaftRequestHeader::default()); req.mut_requests().push(cmd); e.set_data(req.write_to_bytes().unwrap().into()) } @@ -5251,7 +5280,7 @@ mod tests { let (_dir, importer) = create_tmp_importer("apply-basic"); let (region_scheduler, mut snapshot_rx) = dummy_scheduler(); let cfg = Arc::new(VersionTrack::new(Config::default())); - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -5715,7 +5744,7 @@ mod tests { let (region_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Arc::new(VersionTrack::new(Config::default())); - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -6054,7 +6083,7 @@ mod tests { let (region_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Arc::new(VersionTrack::new(Config::default())); - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -6145,7 +6174,7 @@ mod tests { cfg.apply_batch_system.low_priority_pool_size = 0; Arc::new(VersionTrack::new(cfg)) }; - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -6325,7 +6354,7 @@ mod tests { cfg.apply_batch_system.low_priority_pool_size = 0; Arc::new(VersionTrack::new(cfg)) }; - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -6418,7 +6447,7 @@ mod tests { let (region_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Config::default(); - let (router, mut system) = create_apply_batch_system(&cfg); + let (router, mut system) = create_apply_batch_system(&cfg, None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-exec-observer".to_owned(), @@ -6642,7 +6671,7 @@ mod tests { let (region_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Config::default(); - let (router, mut system) = create_apply_batch_system(&cfg); + let (router, mut system) = create_apply_batch_system(&cfg, None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -6922,7 +6951,7 @@ mod tests { .register_cmd_observer(1, BoxCmdObserver::new(obs)); let (region_scheduler, _) = dummy_scheduler(); let cfg = Arc::new(VersionTrack::new(Config::default())); - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "test-store".to_owned(), @@ -7148,7 +7177,7 @@ mod tests { let (region_scheduler, _) = dummy_scheduler(); let sender = Box::new(TestNotifier { tx }); let cfg = Arc::new(VersionTrack::new(Config::default())); - let (router, mut system) = create_apply_batch_system(&cfg.value()); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); let builder = super::Builder:: { tag: "flashback_need_to_be_applied".to_owned(), diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index d405c3471af..4266e400cd3 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2702,7 +2702,7 @@ where } let mut resp = ExtraMessage::default(); resp.set_type(ExtraMessageType::MsgVoterReplicatedIndexResponse); - resp.voter_replicated_index = voter_replicated_idx; + resp.index = voter_replicated_idx; self.fsm .peer .send_extra_message(resp, &mut self.ctx.trans, from); @@ -2719,7 +2719,7 @@ where if self.fsm.peer.is_leader() || !self.fsm.peer.is_witness() { return; } - let voter_replicated_index = msg.voter_replicated_index; + let voter_replicated_index = msg.index; if let Ok(voter_replicated_term) = self.fsm.peer.get_store().term(voter_replicated_index) { self.ctx.apply_router.schedule_task( self.region_id(), @@ -2787,6 +2787,8 @@ where ExtraMessageType::MsgVoterReplicatedIndexResponse => { self.on_voter_replicated_index_response(msg.get_extra_msg()); } + ExtraMessageType::MsgGcPeerRequest => unimplemented!(), + ExtraMessageType::MsgGcPeerResponse => unimplemented!(), } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 2ca573824f9..e68873cadf1 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -42,6 +42,7 @@ use kvproto::{ use pd_client::{Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; +use resource_control::ResourceGroupManager; use resource_metering::CollectorRegHandle; use sst_importer::SstImporter; use tikv_alloc::trace::TraceEvent; @@ -1795,11 +1796,21 @@ impl RaftBatchSystem { pub fn create_raft_batch_system( cfg: &Config, + resource_manager: &Option>, ) -> (RaftRouter, RaftBatchSystem) { let (store_tx, store_fsm) = StoreFsm::new(cfg); - let (apply_router, apply_system) = create_apply_batch_system(cfg); - let (router, system) = - batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm); + let (apply_router, apply_system) = create_apply_batch_system( + cfg, + resource_manager + .as_ref() + .map(|m| m.derive_controller("apply".to_owned(), false)), + ); + let (router, system) = batch_system::create_system( + &cfg.store_batch_system, + store_tx, + store_fsm, + None, // Do not do priority scheduling for store batch system + ); let raft_router = RaftRouter { router }; let system = RaftBatchSystem { system, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 3c555689cb9..195a94478dc 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -5,6 +5,7 @@ use std::sync::Arc; use std::{borrow::Cow, fmt}; +use batch_system::ResourceMetered; use collections::HashSet; use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; @@ -772,6 +773,8 @@ pub enum PeerMsg { Destroy(u64), } +impl ResourceMetered for PeerMsg {} + impl fmt::Debug for PeerMsg { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -867,6 +870,8 @@ where }, } +impl ResourceMetered for StoreMsg {} + impl fmt::Debug for StoreMsg where EK: KvEngine, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index e2a914fded6..44701fbf705 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4843,7 +4843,7 @@ where return; } if let Some(ref state) = self.pending_merge_state { - if state.get_commit() == extra_msg.get_premerge_commit() { + if state.get_commit() == extra_msg.get_index() { self.add_want_rollback_merge_peer(peer_id); } } @@ -5438,7 +5438,7 @@ where }; let mut extra_msg = ExtraMessage::default(); extra_msg.set_type(ExtraMessageType::MsgWantRollbackMerge); - extra_msg.set_premerge_commit(premerge_commit); + extra_msg.set_index(premerge_commit); self.send_extra_message(extra_msg, &mut ctx.trans, &to_peer); } @@ -5795,6 +5795,7 @@ mod tests { AdminCmdType::ComputeHash, AdminCmdType::VerifyHash, AdminCmdType::BatchSwitchWitness, + AdminCmdType::UpdateGcPeer, ]; for tp in AdminCmdType::values() { let mut msg = RaftCmdRequest::default(); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 2f61534d159..4d8128822c7 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -24,9 +24,9 @@ use kvproto::{ }, raft_serverpb::{RaftMessage, RaftSnapshotData}, }; -use protobuf::{self, Message}; +use protobuf::{self, CodedInputStream, Message}; use raft::{ - eraftpb::{self, ConfChangeType, ConfState, MessageType, Snapshot}, + eraftpb::{self, ConfChangeType, ConfState, Entry, EntryType, MessageType, Snapshot}, Changer, RawNode, INVALID_INDEX, }; use raft_proto::ConfChangeI; @@ -229,6 +229,7 @@ pub fn admin_cmd_epoch_lookup(admin_cmp_type: AdminCmdType) -> AdminCmdEpochStat AdminCmdEpochState::new(true, true, false, false) } AdminCmdType::BatchSwitchWitness => AdminCmdEpochState::new(false, true, false, true), + AdminCmdType::UpdateGcPeer => AdminCmdEpochState::new(false, false, false, false), } } @@ -725,6 +726,24 @@ pub(crate) fn u64_to_timespec(u: u64) -> Timespec { Timespec::new(sec as i64, nsec as i32) } +pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { + if entry.get_entry_type() != EntryType::EntryNormal { + return RaftRequestHeader::default(); + } + // request header is encoded into data + let mut is = CodedInputStream::from_bytes(entry.get_data()); + if is.eof().unwrap() { + return RaftRequestHeader::default(); + } + let (field_number, _) = is.read_tag_unpack().unwrap(); + let t = is.read_message().unwrap(); + // Header field is of number 1 + if field_number != 1 { + panic!("unexpected field number: {} {:?}", field_number, t); + } + t +} + /// Parse data of entry `index`. /// /// # Panics @@ -1671,6 +1690,7 @@ mod tests { metapb::{self, RegionEpoch}, raft_cmdpb::AdminRequest, }; + use protobuf::Message as _; use raft::eraftpb::{ConfChangeType, Entry, Message, MessageType}; use tikv_util::store::new_peer; use time::Duration as TimeDuration; @@ -1749,6 +1769,20 @@ mod tests { assert_eq!(m1.inspect(Some(monotonic_raw_now())), LeaseState::Valid); } + #[test] + fn test_get_entry_header() { + let mut req = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_resource_group_name("test".to_owned()); + req.set_header(header); + let mut entry = Entry::new(); + entry.set_term(1); + entry.set_index(2); + entry.set_data(req.write_to_bytes().unwrap().into()); + let header = get_entry_header(&entry); + assert_eq!(header.get_resource_group_name(), "test"); + } + #[test] fn test_timespec_u64() { let cases = vec![ diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index 6555e96f102..d09a6dd9f53 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -43,7 +43,7 @@ where for _ in 0..size { if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty) { error!( - "failed to decrese thread pool"; + "failed to decrease thread pool"; "decrease to" => size, "err" => %e, ); diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 23a50b42560..1524ebcba5d 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -9,7 +9,10 @@ use std::{ }; use dashmap::{mapref::one::Ref, DashMap}; -use kvproto::resource_manager::{GroupMode, ResourceGroup}; +use kvproto::{ + kvrpcpb::CommandPri, + resource_manager::{GroupMode, ResourceGroup}, +}; use yatp::queue::priority::TaskPriorityProvider; // a read task cost at least 50us. @@ -97,7 +100,6 @@ impl ResourceGroupManager { let ru_quota = Self::get_ru_setting(g.value(), controller.is_read); controller.add_resource_group(g.key().clone().into_bytes(), ru_quota); } - controller } @@ -243,6 +245,15 @@ impl ResourceController { // need totally accurate here. self.last_min_vt.store(max_vt, Ordering::Relaxed); } + + pub fn get_priority(&self, name: &[u8], pri: CommandPri) -> u64 { + let level = match pri { + CommandPri::Low => 2, + CommandPri::Normal => 1, + CommandPri::High => 0, + }; + self.resource_group(name).get_priority(level) + } } impl TaskPriorityProvider for ResourceController { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 207373313a4..2a479964ced 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -350,7 +350,7 @@ where }; // Initialize raftstore channels. - let (router, system) = fsm::create_raft_batch_system(&config.raft_store); + let (router, system) = fsm::create_raft_batch_system(&config.raft_store, &resource_manager); let mut coprocessor_host = Some(CoprocessorHost::new( router.clone(), diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 2121b7e021f..81e7129407e 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -81,7 +81,7 @@ pub trait Simulator { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, - resource_manager: &Arc, + resource_manager: &Option>, ) -> ServerResult; fn stop_node(&mut self, node_id: u64); fn get_node_ids(&self) -> HashSet; @@ -176,7 +176,7 @@ pub struct Cluster { pub raft_statistics: Vec>>, pub sim: Arc>, pub pd_client: Arc, - resource_manager: Arc, + resource_manager: Option>, } impl Cluster { @@ -210,7 +210,7 @@ impl Cluster { pd_client, sst_workers: vec![], sst_workers_map: HashMap::default(), - resource_manager: Arc::new(ResourceGroupManager::default()), + resource_manager: Some(Arc::new(ResourceGroupManager::default())), kv_statistics: vec![], raft_statistics: vec![], } @@ -279,7 +279,8 @@ impl Cluster { // Try start new nodes. for _ in 0..self.count - self.engines.len() { - let (router, system) = create_raft_batch_system(&self.cfg.raft_store); + let (router, system) = + create_raft_batch_system(&self.cfg.raft_store, &self.resource_manager); self.create_engine(Some(router.clone())); let engines = self.dbs.last().unwrap().clone(); @@ -350,7 +351,8 @@ impl Cluster { debug!("starting node {}", node_id); let engines = self.engines[&node_id].clone(); let key_mgr = self.key_managers_map[&node_id].clone(); - let (router, system) = create_raft_batch_system(&self.cfg.raft_store); + let (router, system) = + create_raft_batch_system(&self.cfg.raft_store, &self.resource_manager); let mut cfg = self.cfg.clone(); if let Some(labels) = self.labels.get(&node_id) { cfg.server.labels = labels.to_owned(); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 9ae76dba9f8..05ed8ece83d 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -230,7 +230,7 @@ impl Simulator for NodeCluster { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, - _resource_manager: &Arc, + _resource_manager: &Option>, ) -> ServerResult { assert!(node_id == 0 || !self.nodes.contains_key(&node_id)); let pd_worker = LazyWorker::new("test-pd-worker"); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index ccf4df43497..63a0b4e4804 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -265,7 +265,7 @@ impl ServerCluster { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, - resource_manager: &Arc, + resource_manager: &Option>, ) -> ServerResult { let (tmp_str, tmp) = if node_id == 0 || !self.snap_paths.contains_key(&node_id) { let p = test_util::temp_dir("test_cluster", cfg.prefer_mem); @@ -416,7 +416,9 @@ impl ServerCluster { quota_limiter.clone(), self.pd_client.feature_gate().clone(), self.get_causal_ts_provider(node_id), - Some(resource_manager.derive_controller("scheduler-worker-pool".to_owned(), true)), + resource_manager + .as_ref() + .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), )?; self.storages.insert(node_id, raft_engine); @@ -652,7 +654,7 @@ impl Simulator for ServerCluster { key_manager: Option>, router: RaftRouter, system: RaftBatchSystem, - resource_manager: &Arc, + resource_manager: &Option>, ) -> ServerResult { dispatch_api_version!( cfg.storage.api_version(), diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 92f3bac3d5b..1193751b228 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -38,7 +38,7 @@ num-traits = "0.2" num_cpus = "1" online_config = { workspace = true } openssl = "0.10" -parking_lot = "0.12.1" +parking_lot_core = "0.9.1" pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" diff --git a/components/tikv_util/src/mpsc/priority_queue.rs b/components/tikv_util/src/mpsc/priority_queue.rs index 3389d6154c3..fac741361db 100644 --- a/components/tikv_util/src/mpsc/priority_queue.rs +++ b/components/tikv_util/src/mpsc/priority_queue.rs @@ -7,7 +7,9 @@ use std::sync::{ use crossbeam::channel::{RecvError, SendError, TryRecvError, TrySendError}; use crossbeam_skiplist::SkipMap; -use parking_lot::{Condvar, Mutex}; +use parking_lot_core::{ + park, unpark_all, unpark_one, SpinWait, DEFAULT_PARK_TOKEN, DEFAULT_UNPARK_TOKEN, +}; // Create a priority based channel. Sender can send message with priority of // u64, and receiver will receive messages in ascending order of priority. For @@ -54,8 +56,6 @@ impl Drop for Cell { #[derive(Default)] struct PriorityQueue { queue: SkipMap>, - disconnected: Mutex, - available: Condvar, sequencer: AtomicU64, @@ -67,8 +67,6 @@ impl PriorityQueue { pub fn new() -> Self { Self { queue: SkipMap::new(), - disconnected: Mutex::new(false), - available: Condvar::new(), sequencer: AtomicU64::new(0), senders: AtomicUsize::new(1), receivers: AtomicUsize::new(1), @@ -81,6 +79,10 @@ impl PriorityQueue { sequence: self.sequencer.fetch_add(1, Ordering::Relaxed), } } + + fn is_disconnected(&self) -> bool { + self.senders.load(Ordering::SeqCst) == 0 + } } // When derived `PartialOrd` on structs, it will produce a lexicographic @@ -109,7 +111,10 @@ impl Sender { self.inner .queue .insert(self.inner.get_map_key(pri), Cell::new(msg)); - self.inner.available.notify_one(); + let addr = &*self.inner as *const PriorityQueue as usize; + unsafe { + unpark_one(addr, |_| DEFAULT_UNPARK_TOKEN); + } Ok(()) } @@ -132,8 +137,10 @@ impl Drop for Sender { fn drop(&mut self) { let old = self.inner.senders.fetch_sub(1, Ordering::AcqRel); if old <= 1 { - *self.inner.disconnected.lock() = true; - self.inner.available.notify_all(); + let addr = &*self.inner as *const PriorityQueue as usize; + unsafe { + unpark_all(addr, DEFAULT_UNPARK_TOKEN); + } } } } @@ -146,14 +153,13 @@ impl Receiver { pub fn try_recv(&self) -> Result { match self.inner.queue.pop_front() { Some(entry) => Ok(entry.value().take().unwrap()), - None if self.inner.senders.load(Ordering::SeqCst) == 0 => { - Err(TryRecvError::Disconnected) - } + None if self.inner.is_disconnected() => Err(TryRecvError::Disconnected), None => Err(TryRecvError::Empty), } } pub fn recv(&self) -> Result { + let mut spin = SpinWait::new(); loop { match self.try_recv() { Ok(msg) => return Ok(msg), @@ -161,17 +167,25 @@ impl Receiver { return Err(RecvError); } Err(TryRecvError::Empty) => { - let mut disconnected = self.inner.disconnected.lock(); - if *disconnected { - return Err(RecvError); + if spin.spin() { + continue; + } + let addr = &*self.inner as *const PriorityQueue as usize; + unsafe { + park( + addr, + || self.len() == 0 && !self.inner.is_disconnected(), + || {}, + |_, _| {}, + DEFAULT_PARK_TOKEN, + None, + ); } - self.inner.available.wait(&mut disconnected); } } } } - #[cfg(test)] fn len(&self) -> usize { self.inner.queue.len() } diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 70e70b3cbe6..ff1babb7e1f 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -66,7 +66,7 @@ fn start_raftstore( ApplyRouter, RaftBatchSystem, ) { - let (raft_router, mut system) = create_raft_batch_system(&cfg.raft_store); + let (raft_router, mut system) = create_raft_batch_system(&cfg.raft_store, &None); let engines = create_tmp_engine(dir); let host = CoprocessorHost::default(); let importer = { diff --git a/tests/integrations/config/dynamic/snap.rs b/tests/integrations/config/dynamic/snap.rs index 1a82ec8005e..af03246acf4 100644 --- a/tests/integrations/config/dynamic/snap.rs +++ b/tests/integrations/config/dynamic/snap.rs @@ -45,7 +45,8 @@ fn start_server( .name_prefix(thd_name!("test-server")) .build(), ); - let (raft_router, _) = create_raft_batch_system::(&cfg.raft_store); + let (raft_router, _) = + create_raft_batch_system::(&cfg.raft_store, &None); let mut snap_worker = Worker::new("snap-handler").lazy_build("snap-handler"); let snap_worker_scheduler = snap_worker.scheduler(); let server_config = Arc::new(VersionTrack::new(cfg.server.clone())); diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 8ede13bd0f4..ee063e0f1e7 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -42,7 +42,7 @@ fn test_node_bootstrap_with_prepared_data() { let pd_client = Arc::new(TestPdClient::new(0, false)); let cfg = new_tikv_config(0); - let (_, system) = fsm::create_raft_batch_system(&cfg.raft_store); + let (_, system) = fsm::create_raft_batch_system(&cfg.raft_store, &None); let simulate_trans = SimulateTransport::new(ChannelTransport::new()); let tmp_path = Builder::new().prefix("test_cluster").tempdir().unwrap(); let engine = From c353910ef6a296b592db6b217ec888cee34eaffc Mon Sep 17 00:00:00 2001 From: andreid-db <103079610+andreid-db@users.noreply.github.com> Date: Sat, 28 Jan 2023 20:15:54 -0800 Subject: [PATCH 0469/1149] config: allow starting TiKV nodes with <1 CPU (#14084) close tikv/tikv#13586, close tikv/tikv#13752, ref tikv/tikv#14017 Signed-off-by: Andrei Dragus --- components/raftstore/src/store/config.rs | 2 +- src/config/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 34f4e159dee..d6994a16ed4 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -660,7 +660,7 @@ impl Config { // prevent mistakenly inputting too large values, the max limit is made // according to the cpu quota * 10. Notice 10 is only an estimate, not an // empirical value. - let limit = SysQuota::cpu_cores_quota() as usize * 10; + let limit = (SysQuota::cpu_cores_quota() * 10.0) as usize; if self.apply_batch_system.pool_size == 0 || self.apply_batch_system.pool_size > limit { return Err(box_err!( "apply-pool-size should be greater than 0 and less than or equal to: {}", diff --git a/src/config/mod.rs b/src/config/mod.rs index 7878696faa5..99b593e2443 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1396,7 +1396,7 @@ impl DbConfig { // prevent mistakenly inputting too large values, the max limit is made // according to the cpu quota * 10. Notice 10 is only an estimate, not an // empirical value. - let limit = SysQuota::cpu_cores_quota() as i32 * 10; + let limit = (SysQuota::cpu_cores_quota() * 10.0) as i32; if self.max_background_jobs <= 0 || self.max_background_jobs > limit { return Err(format!( "max_background_jobs should be greater than 0 and less than or equal to {:?}", From 68710b99ee8f64bb353e617745c8ddf727646913 Mon Sep 17 00:00:00 2001 From: Hu# Date: Sun, 29 Jan 2023 13:49:53 +0800 Subject: [PATCH 0470/1149] pd_client: replace PD_REQUEST_HISTOGRAM_VEC with static metrics (#14087) close tikv/tikv#14086 PD_REQUEST_HISTOGRAM_VEC can be changed to use static metrics to improve performance. Signed-off-by: husharp Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/batch-system/src/router.rs | 9 +- components/pd_client/Cargo.toml | 1 + components/pd_client/src/client.rs | 96 +++++++++------------ components/pd_client/src/client_v2.rs | 80 +++++++---------- components/pd_client/src/metrics.rs | 50 +++++++++-- components/raftstore/src/store/fsm/peer.rs | 4 +- components/raftstore/src/store/fsm/store.rs | 2 +- 8 files changed, 124 insertions(+), 119 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d288af846a6..95587f98565 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3696,6 +3696,7 @@ dependencies = [ "log", "log_wrappers", "prometheus", + "prometheus-static-metric", "security", "semver 0.10.0", "serde", diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index bfcb93c9d6b..ef937209531 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -12,12 +12,7 @@ use std::{ use collections::HashMap; use crossbeam::channel::{SendError, TrySendError}; -use tikv_util::{ - debug, info, - lru::LruCache, - time::{duration_to_sec, Instant}, - Either, -}; +use tikv_util::{debug, info, lru::LruCache, time::Instant, Either}; use crate::{ fsm::{Fsm, FsmScheduler, FsmState}, @@ -322,7 +317,7 @@ where for mailbox in mailboxes.map.values() { let _ = mailbox.force_send(msg_gen(), &self.normal_scheduler); } - BROADCAST_NORMAL_DURATION.observe(duration_to_sec(timer.saturating_elapsed())); + BROADCAST_NORMAL_DURATION.observe(timer.saturating_elapsed_secs()); } /// Try to notify all FSMs that the cluster is being shutdown. diff --git a/components/pd_client/Cargo.toml b/components/pd_client/Cargo.toml index c25e37f23b5..f46d6111c5d 100644 --- a/components/pd_client/Cargo.toml +++ b/components/pd_client/Cargo.toml @@ -19,6 +19,7 @@ lazy_static = "1.3" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } +prometheus-static-metric = "0.5" security = { workspace = true } semver = "0.10" serde = "1.0" diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 6686c4e8a04..b0c21797a91 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -26,10 +26,8 @@ use kvproto::{ }; use security::SecurityManager; use tikv_util::{ - box_err, debug, error, info, thd_name, - time::{duration_to_sec, Instant}, - timer::GLOBAL_TIMER_HANDLE, - warn, Either, HandyRwLock, + box_err, debug, error, info, thd_name, time::Instant, timer::GLOBAL_TIMER_HANDLE, warn, Either, + HandyRwLock, }; use txn_types::TimeStamp; use yatp::{task::future::TaskCell, ThreadPool}; @@ -193,9 +191,7 @@ impl RpcClient { &self, key: &[u8], ) -> PdFuture<(metapb::Region, Option)> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.get_region.start_coarse_timer(); let mut req = pdpb::GetRegionRequest::default(); req.set_header(self.header()); @@ -255,8 +251,8 @@ impl RpcClient { Box::pin(async move { let mut resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_store_async"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_store_async + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; let store = resp.take_store(); if store.get_state() != metapb::StoreState::Tombstone { @@ -291,7 +287,7 @@ impl PdClient for RpcClient { items: Vec, ) -> PdFuture<()> { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["store_global_config"]) + .store_global_config .start_coarse_timer(); let mut req = pdpb::StoreGlobalConfigRequest::new(); @@ -321,7 +317,7 @@ impl PdClient for RpcClient { config_path: String, ) -> PdFuture<(Vec, i64)> { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["load_global_config"]) + .load_global_config .start_coarse_timer(); let mut req = pdpb::LoadGlobalConfigRequest::new(); @@ -355,7 +351,7 @@ impl PdClient for RpcClient { revision: i64, ) -> Result> { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["watch_global_config"]) + .watch_global_config .start_coarse_timer(); let mut req = pdpb::WatchGlobalConfigRequest::default(); @@ -377,7 +373,7 @@ impl PdClient for RpcClient { region: metapb::Region, ) -> Result> { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["bootstrap_cluster"]) + .bootstrap_cluster .start_coarse_timer(); let mut req = pdpb::BootstrapRequest::default(); @@ -394,7 +390,7 @@ impl PdClient for RpcClient { fn is_cluster_bootstrapped(&self) -> Result { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["is_cluster_bootstrapped"]) + .is_cluster_bootstrapped .start_coarse_timer(); let mut req = pdpb::IsBootstrappedRequest::default(); @@ -409,9 +405,7 @@ impl PdClient for RpcClient { } fn alloc_id(&self) -> Result { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["alloc_id"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.alloc_id.start_coarse_timer(); let mut req = pdpb::AllocIdRequest::default(); req.set_header(self.header()); @@ -430,7 +424,7 @@ impl PdClient for RpcClient { fn is_recovering_marked(&self) -> Result { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["is_recovering_marked"]) + .is_recovering_marked .start_coarse_timer(); let mut req = pdpb::IsSnapshotRecoveringRequest::default(); @@ -445,9 +439,7 @@ impl PdClient for RpcClient { } fn put_store(&self, store: metapb::Store) -> Result> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["put_store"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.put_store.start_coarse_timer(); let mut req = pdpb::PutStoreRequest::default(); req.set_header(self.header()); @@ -462,9 +454,7 @@ impl PdClient for RpcClient { } fn get_store(&self, store_id: u64) -> Result { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_store"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.get_store.start_coarse_timer(); let mut req = pdpb::GetStoreRequest::default(); req.set_header(self.header()); @@ -488,9 +478,7 @@ impl PdClient for RpcClient { } fn get_all_stores(&self, exclude_tombstone: bool) -> Result> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_all_stores"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.get_all_stores.start_coarse_timer(); let mut req = pdpb::GetAllStoresRequest::default(); req.set_header(self.header()); @@ -506,7 +494,7 @@ impl PdClient for RpcClient { fn get_cluster_config(&self) -> Result { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_cluster_config"]) + .get_cluster_config .start_coarse_timer(); let mut req = pdpb::GetClusterConfigRequest::default(); @@ -558,8 +546,8 @@ impl PdClient for RpcClient { Box::pin(async move { let mut resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region_by_id"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_region_by_id + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; if resp.has_region() { Ok(Some(resp.take_region())) @@ -600,8 +588,8 @@ impl PdClient for RpcClient { Box::pin(async move { let mut resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region_leader_by_id"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_region_leader_by_id + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; if resp.has_region() && resp.has_leader() { Ok(Some((resp.take_region(), resp.take_leader()))) @@ -737,8 +725,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["ask_split"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .ask_split + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; Ok(resp) }) as PdFuture<_> @@ -775,8 +763,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["ask_batch_split"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .ask_batch_split + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; Ok(resp) }) as PdFuture<_> @@ -821,8 +809,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["store_heartbeat"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .store_heartbeat + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; match feature_gate.set_version(resp.get_cluster_version()) { Err(_) => warn!("invalid cluster version: {}", resp.get_cluster_version()), @@ -858,8 +846,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["report_batch_split"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .report_batch_split + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; Ok(()) }) as PdFuture<_> @@ -871,9 +859,7 @@ impl PdClient for RpcClient { } fn scatter_region(&self, mut region: RegionInfo) -> Result<()> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["scatter_region"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.scatter_region.start_coarse_timer(); let mut req = pdpb::ScatterRegionRequest::default(); req.set_header(self.header()); @@ -912,8 +898,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_gc_safe_point"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_gc_safe_point + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; Ok(resp.get_safe_point()) }) as PdFuture<_> @@ -929,9 +915,7 @@ impl PdClient for RpcClient { } fn get_operator(&self, region_id: u64) -> Result { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_operator"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.get_operator.start_coarse_timer(); let mut req = pdpb::GetOperatorRequest::default(); req.set_header(self.header()); @@ -946,7 +930,7 @@ impl PdClient for RpcClient { } fn batch_get_tso(&self, count: u32) -> PdFuture { - let begin = Instant::now(); + let timer = Instant::now(); let executor = move |client: &Client, _| { // Remove Box::pin and Compat when GLOBAL_TIMER_HANDLE supports futures 0.3 let ts_fut = Compat::new(Box::pin(client.inner.rl().tso.get_timestamp(count))); @@ -965,8 +949,8 @@ impl PdClient for RpcClient { } })?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["tso"]) - .observe(duration_to_sec(begin.saturating_elapsed())); + .tso + .observe(timer.saturating_elapsed_secs()); Ok(ts) }) as PdFuture<_> }; @@ -981,7 +965,7 @@ impl PdClient for RpcClient { safe_point: TimeStamp, ttl: Duration, ) -> PdFuture<()> { - let begin = Instant::now(); + let timer = Instant::now(); let mut req = pdpb::UpdateServiceGcSafePointRequest::default(); req.set_header(self.header()); req.set_service_id(name.into()); @@ -1003,8 +987,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["update_service_safe_point"]) - .observe(duration_to_sec(begin.saturating_elapsed())); + .update_service_safe_point + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; Ok(()) }) as PdFuture<_> @@ -1039,8 +1023,8 @@ impl PdClient for RpcClient { Box::pin(async move { let resp = handler.await?; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["min_resolved_ts"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .min_resolved_ts + .observe(timer.saturating_elapsed_secs()); check_resp_header(resp.get_header())?; Ok(()) }) as PdFuture<_> diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index 35e5c3b4785..cfa0d46303c 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -47,12 +47,8 @@ use kvproto::{ }; use security::SecurityManager; use tikv_util::{ - box_err, error, info, - mpsc::future as mpsc, - slow_log, thd_name, - time::{duration_to_sec, Instant}, - timer::GLOBAL_TIMER_HANDLE, - warn, + box_err, error, info, mpsc::future as mpsc, slow_log, thd_name, time::Instant, + timer::GLOBAL_TIMER_HANDLE, warn, }; use tokio::sync::{broadcast, mpsc as tokio_mpsc}; use txn_types::TimeStamp; @@ -835,7 +831,7 @@ impl PdClient for RpcClient { region: metapb::Region, ) -> Result> { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["bootstrap_cluster"]) + .bootstrap_cluster .start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -856,7 +852,7 @@ impl PdClient for RpcClient { fn is_cluster_bootstrapped(&mut self) -> Result { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["is_cluster_bootstrapped"]) + .is_cluster_bootstrapped .start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -875,9 +871,7 @@ impl PdClient for RpcClient { } fn alloc_id(&mut self) -> Result { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["alloc_id"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.alloc_id.start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -902,7 +896,7 @@ impl PdClient for RpcClient { fn is_recovering_marked(&mut self) -> Result { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["is_recovering_marked"]) + .is_recovering_marked .start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -921,9 +915,7 @@ impl PdClient for RpcClient { } fn put_store(&mut self, store: metapb::Store) -> Result> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["put_store"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.put_store.start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -962,8 +954,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_store_and_stats"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_store_and_stats + .observe(timer.saturating_elapsed_secs()); let mut resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; let store = resp.take_store(); @@ -976,9 +968,7 @@ impl PdClient for RpcClient { } fn get_all_stores(&mut self, exclude_tombstone: bool) -> Result> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_all_stores"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.get_all_stores.start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -998,7 +988,7 @@ impl PdClient for RpcClient { fn get_cluster_config(&mut self) -> Result { let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_cluster_config"]) + .get_cluster_config .start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -1037,8 +1027,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_region + .observe(timer.saturating_elapsed_secs()); let mut resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; let region = if resp.has_region() { @@ -1076,8 +1066,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region_by_id"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_region_by_id + .observe(timer.saturating_elapsed_secs()); let mut resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; if resp.has_region() { @@ -1115,8 +1105,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_region_leader_by_id"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_region_leader_by_id + .observe(timer.saturating_elapsed_secs()); let mut resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; if resp.has_region() && resp.has_leader() { @@ -1145,8 +1135,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["ask_split"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .ask_split + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; Ok(resp) @@ -1179,8 +1169,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["ask_batch_split"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .ask_batch_split + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; Ok(resp) @@ -1223,8 +1213,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["store_heartbeat"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .store_heartbeat + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; match feature_gate.set_version(resp.get_cluster_version()) { @@ -1257,8 +1247,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["report_batch_split"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .report_batch_split + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; Ok(()) @@ -1266,9 +1256,7 @@ impl PdClient for RpcClient { } fn scatter_region(&mut self, mut region: RegionInfo) -> Result<()> { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["scatter_region"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.scatter_region.start_coarse_timer(); let mut req = pdpb::ScatterRegionRequest::default(); req.set_region_id(region.get_id()); @@ -1307,8 +1295,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_gc_saft_point"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .get_gc_safe_point + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; Ok(resp.get_safe_point()) @@ -1316,9 +1304,7 @@ impl PdClient for RpcClient { } fn get_operator(&mut self, region_id: u64) -> Result { - let _timer = PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["get_operator"]) - .start_coarse_timer(); + let _timer = PD_REQUEST_HISTOGRAM_VEC.get_operator.start_coarse_timer(); block_on(self.raw_client.wait_for_ready())?; @@ -1366,8 +1352,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["update_service_safe_point"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .update_service_safe_point + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; Ok(()) @@ -1396,8 +1382,8 @@ impl PdClient for RpcClient { }) .await; PD_REQUEST_HISTOGRAM_VEC - .with_label_values(&["min_resolved_ts"]) - .observe(duration_to_sec(timer.saturating_elapsed())); + .min_resolved_ts + .observe(timer.saturating_elapsed_secs()); let resp = raw_client.check_resp(resp)?; check_resp_header(resp.get_header())?; Ok(()) diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index 57879a57d0e..a4ef9c5ce4e 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -2,14 +2,52 @@ use lazy_static::lazy_static; use prometheus::*; +use prometheus_static_metric::{make_static_metric, register_static_histogram_vec}; + +make_static_metric! { + pub label_enum PDRequestEventType { + get_region, + get_region_by_id, + get_region_leader_by_id, + scatter_region, + get_store, + get_store_async, + put_store, + get_all_stores, + get_store_and_stats, + store_global_config, + load_global_config, + watch_global_config, + bootstrap_cluster, + is_cluster_bootstrapped, + get_cluster_config, + ask_split, + ask_batch_split, + report_batch_split, + get_gc_safe_point, + update_service_safe_point, + min_resolved_ts, + get_operator, + alloc_id, + is_recovering_marked, + store_heartbeat, + tso, + } + + pub struct PDRequestEventHistogramVec: Histogram { + "type" => PDRequestEventType, + } +} lazy_static! { - pub static ref PD_REQUEST_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( - "tikv_pd_request_duration_seconds", - "Bucketed histogram of PD requests duration", - &["type"] - ) - .unwrap(); + pub static ref PD_REQUEST_HISTOGRAM_VEC: PDRequestEventHistogramVec = + register_static_histogram_vec!( + PDRequestEventHistogramVec, + "tikv_pd_request_duration_seconds", + "Bucketed histogram of PD requests duration", + &["type"] + ) + .unwrap(); pub static ref PD_HEARTBEAT_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_pd_heartbeat_message_total", "Total number of PD heartbeat messages.", diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 4266e400cd3..1cc603f2490 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -55,7 +55,7 @@ use tikv_util::{ mpsc::{self, LooseBoundedSender, Receiver}, store::{find_peer, find_peer_by_id, is_learner, region_on_same_stores}, sys::disk::DiskUsage, - time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, + time::{monotonic_raw_now, Instant as TiInstant}, trace, warn, worker::{ScheduleError, Scheduler}, Either, @@ -694,7 +694,7 @@ where .raft_metrics .event_time .peer_msg - .observe(duration_to_sec(timer.saturating_elapsed())); + .observe(timer.saturating_elapsed_secs()); } #[inline] diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index e68873cadf1..26f2983998d 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -806,7 +806,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> .raft_metrics .event_time .store_msg - .observe(duration_to_sec(timer.saturating_elapsed())); + .observe(timer.saturating_elapsed_secs()); } fn start(&mut self, store: metapb::Store) { From b1936e6c2d73789b05545b96b32dc22fac880a79 Mon Sep 17 00:00:00 2001 From: Zhi Qi <30543181+LittleFall@users.noreply.github.com> Date: Mon, 30 Jan 2023 17:25:54 +0800 Subject: [PATCH 0471/1149] copr: (refactor) Lift heap struct out from top_n_executor (#14096) ref tikv/tikv#13936 Signed-off-by: Zhi Qi --- .../src/top_n_executor.rs | 210 ++--------------- .../tidb_query_executors/src/util/mod.rs | 1 + .../src/util/top_n_heap.rs | 211 ++++++++++++++++++ 3 files changed, 229 insertions(+), 193 deletions(-) create mode 100644 components/tidb_query_executors/src/util/top_n_heap.rs diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index 6ef8c6b2224..5ebc65baa25 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -1,20 +1,23 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp::Ordering, collections::BinaryHeap, ptr::NonNull, sync::Arc}; +use std::sync::Arc; use async_trait::async_trait; use tidb_query_common::{storage::IntervalRange, Result}; use tidb_query_datatype::{ - codec::{ - batch::{LazyBatchColumn, LazyBatchColumnVec}, - data_type::*, - }, + codec::{batch::LazyBatchColumnVec, data_type::*}, expr::{EvalConfig, EvalContext, EvalWarnings}, }; use tidb_query_expr::{RpnExpression, RpnExpressionBuilder, RpnStackNode}; use tipb::{Expr, FieldType, TopN}; -use crate::{interface::*, util::*}; +use crate::{ + interface::*, + util::{ + top_n_heap::{HeapItemSourceData, HeapItemUnsafe, TopNHeap}, + *, + }, +}; pub struct BatchTopNExecutor { /// The heap, which contains N rows at most. @@ -22,7 +25,7 @@ pub struct BatchTopNExecutor { /// This field is placed before `eval_columns_buffer_unsafe`, `order_exprs`, /// `order_is_desc` and `src` because it relies on data in those fields /// and we want this field to be dropped first. - heap: BinaryHeap, + heap: TopNHeap, /// A collection of all evaluated columns. This is to avoid repeated /// allocations in each `next_batch()`. @@ -97,7 +100,7 @@ impl BatchTopNExecutor { .collect(); Self { - heap: BinaryHeap::new(), + heap: TopNHeap::new(n), eval_columns_buffer_unsafe: Box::>::default(), order_exprs: order_exprs.into_boxed_slice(), order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), @@ -126,7 +129,7 @@ impl BatchTopNExecutor { .collect(); Self { - heap: BinaryHeap::new(), + heap: TopNHeap::new(n), eval_columns_buffer_unsafe: Box::>::default(), order_exprs: order_exprs.into_boxed_slice(), order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), @@ -140,7 +143,7 @@ impl BatchTopNExecutor { } pub fn new( - config: std::sync::Arc, + config: Arc, src: Src, order_exprs_def: Vec, order_is_desc: Vec, @@ -163,8 +166,7 @@ impl BatchTopNExecutor { .collect(); Ok(Self { - // Avoid large N causing OOM - heap: BinaryHeap::with_capacity(n.min(1024)), + heap: TopNHeap::new(n), // Simply large enough to avoid repeated allocations eval_columns_buffer_unsafe: Box::new(Vec::with_capacity(512)), order_exprs: order_exprs.into_boxed_slice(), @@ -182,7 +184,7 @@ impl BatchTopNExecutor { async fn handle_next_batch(&mut self) -> Result> { // Use max batch size from the beginning because top N // always needs to calculate over all data. - let src_result = self.src.next_batch(crate::runner::BATCH_MAX_SIZE).await; + let src_result = self.src.next_batch(BATCH_MAX_SIZE).await; self.context.warnings = src_result.warnings; @@ -193,7 +195,7 @@ impl BatchTopNExecutor { } if src_is_drained { - Ok(Some(self.heap_take_all())) + Ok(Some(self.heap.take_all())) } else { Ok(None) } @@ -240,84 +242,11 @@ impl BatchTopNExecutor { eval_columns_offset: eval_offset, logical_row_index, }; - self.heap_add_row(row)?; - } - - Ok(()) - } - - fn heap_add_row(&mut self, row: HeapItemUnsafe) -> Result<()> { - if self.heap.len() < self.n { - // HeapItemUnsafe must be checked valid to compare in advance, or else it may - // panic inside BinaryHeap. - row.cmp_sort_key(&row)?; - - // Push into heap when heap is not full. - self.heap.push(row); - } else { - // Swap the greatest row in the heap if this row is smaller than that row. - let mut greatest_row = self.heap.peek_mut().unwrap(); - if row.cmp_sort_key(&greatest_row)? == Ordering::Less { - *greatest_row = row; - } + self.heap.add_row(row)?; } Ok(()) } - - #[allow(clippy::clone_on_copy)] - fn heap_take_all(&mut self) -> LazyBatchColumnVec { - let heap = std::mem::take(&mut self.heap); - let sorted_items = heap.into_sorted_vec(); - if sorted_items.is_empty() { - return LazyBatchColumnVec::empty(); - } - - let mut result = sorted_items[0] - .source_data - .physical_columns - .clone_empty(sorted_items.len()); - - for (column_index, result_column) in result.as_mut_slice().iter_mut().enumerate() { - match result_column { - LazyBatchColumn::Raw(dest_column) => { - for item in &sorted_items { - let src = item.source_data.physical_columns[column_index].raw(); - dest_column - .push(&src[item.source_data.logical_rows[item.logical_row_index]]); - } - } - LazyBatchColumn::Decoded(dest_vector_value) => { - match_template::match_template! { - TT = [ - Int, - Real, - Duration, - Decimal, - DateTime, - Bytes => BytesRef, - Json => JsonRef, - Enum => EnumRef, - Set => SetRef, - ], - match dest_vector_value { - VectorValue::TT(dest_column) => { - for item in &sorted_items { - let src: &VectorValue = item.source_data.physical_columns[column_index].decoded(); - let src_ref = TT::borrow_vector_value(src); - // TODO: This clone is not necessary. - dest_column.push(src_ref.get_option_ref(item.source_data.logical_rows[item.logical_row_index]).map(|x| x.into_owned_value())); - } - }, - } - } - } - } - } - - result.assert_columns_equal_length(); - result - } } #[async_trait] @@ -402,111 +331,6 @@ impl BatchExecutor for BatchTopNExecutor { } } -struct HeapItemSourceData { - physical_columns: LazyBatchColumnVec, - logical_rows: Vec, -} - -/// The item in the heap of `BatchTopNExecutor`. -/// -/// WARN: The content of this structure is valid only if `BatchTopNExecutor` is -/// valid (i.e. not dropped). Thus it is called unsafe. -struct HeapItemUnsafe { - /// A pointer to the `order_is_desc` field in `BatchTopNExecutor`. - order_is_desc_ptr: NonNull<[bool]>, - - /// A pointer to the `order_exprs_field_type` field in `order_exprs`. - order_exprs_field_type_ptr: NonNull<[FieldType]>, - - /// The source data that evaluated column in this structure is using. - source_data: Arc, - - /// A pointer to the `eval_columns_buffer` field in `BatchTopNExecutor`. - eval_columns_buffer_ptr: NonNull>>, - - /// The begin offset of the evaluated columns stored in the buffer. - /// - /// The length of evaluated columns in the buffer is `order_is_desc.len()`. - eval_columns_offset: usize, - - /// Which logical row in the evaluated columns this heap item is - /// representing. - logical_row_index: usize, -} - -impl HeapItemUnsafe { - fn get_order_is_desc(&self) -> &[bool] { - unsafe { self.order_is_desc_ptr.as_ref() } - } - - fn get_order_exprs_field_type(&self) -> &[FieldType] { - unsafe { self.order_exprs_field_type_ptr.as_ref() } - } - - fn get_eval_columns(&self, len: usize) -> &[RpnStackNode<'_>] { - let offset_begin = self.eval_columns_offset; - let offset_end = offset_begin + len; - let vec_buf = unsafe { self.eval_columns_buffer_ptr.as_ref() }; - &vec_buf[offset_begin..offset_end] - } - - fn cmp_sort_key(&self, other: &Self) -> Result { - // Only debug assert because this function is called pretty frequently. - debug_assert_eq!(self.get_order_is_desc(), other.get_order_is_desc()); - - let order_is_desc = self.get_order_is_desc(); - let order_exprs_field_type = self.get_order_exprs_field_type(); - let columns_len = order_is_desc.len(); - let eval_columns_lhs = self.get_eval_columns(columns_len); - let eval_columns_rhs = other.get_eval_columns(columns_len); - - for column_idx in 0..columns_len { - let lhs_node = &eval_columns_lhs[column_idx]; - let rhs_node = &eval_columns_rhs[column_idx]; - let lhs = lhs_node.get_logical_scalar_ref(self.logical_row_index); - let rhs = rhs_node.get_logical_scalar_ref(other.logical_row_index); - - // There is panic inside, but will never panic, since the data type of - // corresponding column should be consistent for each - // `HeapItemUnsafe`. - let ord = lhs.cmp_sort_key(&rhs, &order_exprs_field_type[column_idx])?; - - if ord == Ordering::Equal { - continue; - } - if !order_is_desc[column_idx] { - return Ok(ord); - } else { - return Ok(ord.reverse()); - } - } - - Ok(Ordering::Equal) - } -} - -/// WARN: HeapItemUnsafe implements partial ordering. It panics when Collator -/// fails to parse. So make sure that it is valid before putting it into a heap. -impl Ord for HeapItemUnsafe { - fn cmp(&self, other: &Self) -> Ordering { - self.cmp_sort_key(other).unwrap() - } -} - -impl PartialOrd for HeapItemUnsafe { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialEq for HeapItemUnsafe { - fn eq(&self, other: &Self) -> bool { - self.cmp(other) == Ordering::Equal - } -} - -impl Eq for HeapItemUnsafe {} - #[cfg(test)] mod tests { use futures::executor::block_on; diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index 6aa578459e2..ca05e49fcd3 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -5,6 +5,7 @@ pub mod hash_aggr_helper; #[cfg(test)] pub mod mock_executor; pub mod scan_executor; +pub mod top_n_heap; use tidb_query_common::Result; use tidb_query_datatype::{codec::batch::LazyBatchColumnVec, expr::EvalContext}; diff --git a/components/tidb_query_executors/src/util/top_n_heap.rs b/components/tidb_query_executors/src/util/top_n_heap.rs new file mode 100644 index 00000000000..0cbef103e4d --- /dev/null +++ b/components/tidb_query_executors/src/util/top_n_heap.rs @@ -0,0 +1,211 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{cmp::Ordering, collections::BinaryHeap, ptr::NonNull, sync::Arc}; + +use tidb_query_common::Result; +use tidb_query_datatype::codec::{ + batch::{LazyBatchColumn, LazyBatchColumnVec}, + data_type::*, +}; +use tidb_query_expr::RpnStackNode; +use tipb::FieldType; + +/// TopNHeap is the common data structure used in TopN-like executors. +pub struct TopNHeap { + /// The maximum number of rows in the heap. + n: usize, + /// The heap. + heap: BinaryHeap, +} + +impl TopNHeap { + /// parameters: + /// - n: The maximum number of rows in the heaps + /// note: to avoid large N causing OOM, the initial capacity will be limited + /// up to 1024. + pub fn new(n: usize) -> Self { + Self { + n, + // Avoid large N causing OOM + heap: BinaryHeap::with_capacity(n.min(1024)), + } + } + + pub fn add_row(&mut self, row: HeapItemUnsafe) -> Result<()> { + if self.heap.len() < self.n { + // HeapItemUnsafe must be checked valid to compare in advance, or else it may + // panic inside BinaryHeap. + row.cmp_sort_key(&row)?; + + // Push into heap when heap is not full. + self.heap.push(row); + } else { + // Swap the greatest row in the heap if this row is smaller than that row. + let mut greatest_row = self.heap.peek_mut().unwrap(); + if row.cmp_sort_key(&greatest_row)? == Ordering::Less { + *greatest_row = row; + } + } + + Ok(()) + } + + #[allow(clippy::clone_on_copy)] + pub fn take_all(&mut self) -> LazyBatchColumnVec { + let heap = std::mem::take(&mut self.heap); + let sorted_items = heap.into_sorted_vec(); + if sorted_items.is_empty() { + return LazyBatchColumnVec::empty(); + } + + let mut result = sorted_items[0] + .source_data + .physical_columns + .clone_empty(sorted_items.len()); + + for (column_index, result_column) in result.as_mut_slice().iter_mut().enumerate() { + match result_column { + LazyBatchColumn::Raw(dest_column) => { + for item in &sorted_items { + let src = item.source_data.physical_columns[column_index].raw(); + dest_column + .push(&src[item.source_data.logical_rows[item.logical_row_index]]); + } + } + LazyBatchColumn::Decoded(dest_vector_value) => { + match_template::match_template! { + TT = [ + Int, + Real, + Duration, + Decimal, + DateTime, + Bytes => BytesRef, + Json => JsonRef, + Enum => EnumRef, + Set => SetRef, + ], + match dest_vector_value { + VectorValue::TT(dest_column) => { + for item in &sorted_items { + let src: &VectorValue = item.source_data.physical_columns[column_index].decoded(); + let src_ref = TT::borrow_vector_value(src); + // TODO: This clone is not necessary. + dest_column.push(src_ref.get_option_ref(item.source_data.logical_rows[item.logical_row_index]).map(|x| x.into_owned_value())); + } + }, + } + } + } + } + } + + result.assert_columns_equal_length(); + result + } +} + +pub struct HeapItemSourceData { + pub physical_columns: LazyBatchColumnVec, + pub logical_rows: Vec, +} + +/// The item in the heap of `BatchTopNExecutor`. +/// +/// WARN: The content of this structure is valid only if `BatchTopNExecutor` is +/// valid (i.e. not dropped). Thus it is called unsafe. +pub struct HeapItemUnsafe { + /// A pointer to the `order_is_desc` field in `BatchTopNExecutor`. + pub order_is_desc_ptr: NonNull<[bool]>, + + /// A pointer to the `order_exprs_field_type` field in `order_exprs`. + pub order_exprs_field_type_ptr: NonNull<[FieldType]>, + + /// The source data that evaluated column in this structure is using. + pub source_data: Arc, + + /// A pointer to the `eval_columns_buffer` field in `BatchTopNExecutor`. + pub eval_columns_buffer_ptr: NonNull>>, + + /// The begin offset of the evaluated columns stored in the buffer. + /// + /// The length of evaluated columns in the buffer is `order_is_desc.len()`. + pub eval_columns_offset: usize, + + /// Which logical row in the evaluated columns this heap item is + /// representing. + pub logical_row_index: usize, +} + +impl HeapItemUnsafe { + fn get_order_is_desc(&self) -> &[bool] { + unsafe { self.order_is_desc_ptr.as_ref() } + } + + fn get_order_exprs_field_type(&self) -> &[FieldType] { + unsafe { self.order_exprs_field_type_ptr.as_ref() } + } + + fn get_eval_columns(&self, len: usize) -> &[RpnStackNode<'_>] { + let offset_begin = self.eval_columns_offset; + let offset_end = offset_begin + len; + let vec_buf = unsafe { self.eval_columns_buffer_ptr.as_ref() }; + &vec_buf[offset_begin..offset_end] + } + + fn cmp_sort_key(&self, other: &Self) -> Result { + // Only debug assert because this function is called pretty frequently. + debug_assert_eq!(self.get_order_is_desc(), other.get_order_is_desc()); + + let order_is_desc = self.get_order_is_desc(); + let order_exprs_field_type = self.get_order_exprs_field_type(); + let columns_len = order_is_desc.len(); + let eval_columns_lhs = self.get_eval_columns(columns_len); + let eval_columns_rhs = other.get_eval_columns(columns_len); + + for column_idx in 0..columns_len { + let lhs_node = &eval_columns_lhs[column_idx]; + let rhs_node = &eval_columns_rhs[column_idx]; + let lhs = lhs_node.get_logical_scalar_ref(self.logical_row_index); + let rhs = rhs_node.get_logical_scalar_ref(other.logical_row_index); + + // There is panic inside, but will never panic, since the data type of + // corresponding column should be consistent for each + // `HeapItemUnsafe`. + let ord = lhs.cmp_sort_key(&rhs, &order_exprs_field_type[column_idx])?; + + if ord == Ordering::Equal { + continue; + } + return if !order_is_desc[column_idx] { + Ok(ord) + } else { + Ok(ord.reverse()) + }; + } + + Ok(Ordering::Equal) + } +} + +/// WARN: HeapItemUnsafe implements partial ordering. It panics when Collator +/// fails to parse. So make sure that it is valid before putting it into a heap. +impl Ord for HeapItemUnsafe { + fn cmp(&self, other: &Self) -> Ordering { + self.cmp_sort_key(other).unwrap() + } +} + +impl PartialOrd for HeapItemUnsafe { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for HeapItemUnsafe { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for HeapItemUnsafe {} From 0ce3485ca67eab8c9540a6931047478de67c48c0 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 30 Jan 2023 17:43:54 +0800 Subject: [PATCH 0472/1149] raftstore: allow the read request with a smaller ts during flashback (#14088) close tikv/tikv#14045 - Store the flashback `start_ts` in region meta. - Allow the read request with a smaller ts during flashback. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/raftstore/src/store/fsm/apply.rs | 4 +- components/raftstore/src/store/fsm/peer.rs | 10 ++- components/raftstore/src/store/util.rs | 24 ++++++- components/raftstore/src/store/worker/read.rs | 5 +- components/test_raftstore/src/util.rs | 30 ++++++--- components/tikv_kv/src/lib.rs | 7 ++- src/server/raftkv/mod.rs | 27 ++++---- src/server/raftkv2/mod.rs | 19 +++--- src/server/service/kv.rs | 9 ++- tests/integrations/server/kv_service.rs | 62 +++++++++++-------- 11 files changed, 129 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 95587f98565..46eac5930a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2729,7 +2729,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#009f31598ac3200dc8b32e18f96fc4deb7b32e48" +source = "git+https://github.com/pingcap/kvproto.git#1b2b4114103afb06796b7e44f45f7e55133673c0" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 22a42393173..bb262b9ffa8 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1649,7 +1649,8 @@ where req.get_header().get_region_epoch().get_version() >= self.last_merge_version; check_req_region_epoch(req, &self.region, include_region)?; check_flashback_state( - self.region.get_is_in_flashback(), + self.region.is_in_flashback, + self.region.flashback_start_ts, req, self.region_id(), false, @@ -2975,6 +2976,7 @@ where // Modify the region meta in memory. let mut region = self.region.clone(); region.set_is_in_flashback(is_in_flashback); + region.set_flashback_start_ts(req.get_prepare_flashback().get_start_ts()); // Modify the `RegionLocalState` persisted in disk. write_peer_state(ctx.kv_wb_mut(), ®ion, PeerState::Normal, None).unwrap_or_else(|e| { panic!( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 1cc603f2490..a8232fd8322 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5223,9 +5223,13 @@ where // the apply phase and because a read-only request doesn't need to be applied, // so it will be allowed during the flashback progress, for example, a snapshot // request. - if let Err(e) = - util::check_flashback_state(self.region().is_in_flashback, msg, region_id, true) - { + if let Err(e) = util::check_flashback_state( + self.region().is_in_flashback, + self.region().flashback_start_ts, + msg, + region_id, + true, + ) { match e { Error::FlashbackInProgress(_) => self .ctx diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 4d8128822c7..0344adb2b92 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -31,7 +31,9 @@ use raft::{ }; use raft_proto::ConfChangeI; use tikv_util::{ - box_err, debug, info, + box_err, + codec::number::{decode_u64, NumberEncoder}, + debug, info, store::{find_peer_by_id, region}, time::monotonic_raw_now, Either, @@ -336,6 +338,7 @@ pub fn compare_region_epoch( // flashback. pub fn check_flashback_state( is_in_flashback: bool, + flashback_start_ts: u64, req: &RaftCmdRequest, region_id: u64, skip_not_prepared: bool, @@ -347,11 +350,20 @@ pub fn check_flashback_state( { return Ok(()); } + // TODO: only use `flashback_start_ts` to check flashback state. + let is_in_flashback = is_in_flashback || flashback_start_ts > 0; let is_flashback_request = WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) .contains(WriteBatchFlags::FLASHBACK); - // If the region is in the flashback state, the only allowed request is the - // flashback request itself. + // If the region is in the flashback state: + // - A request with flashback flag will be allowed. + // - A read request whose `read_ts` is smaller than `flashback_start_ts` will + // be allowed. if is_in_flashback && !is_flashback_request { + if let Ok(read_ts) = decode_u64(&mut req.get_header().get_flag_data()) { + if read_ts != 0 && read_ts < flashback_start_ts { + return Ok(()); + } + } return Err(Error::FlashbackInProgress(region_id)); } // If the region is not in the flashback state, the flashback request itself @@ -362,6 +374,12 @@ pub fn check_flashback_state( Ok(()) } +pub fn encode_start_ts_into_flag_data(header: &mut RaftRequestHeader, start_ts: u64) { + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(start_ts).unwrap(); + header.set_flag_data(data.into()); +} + pub fn is_region_epoch_equal( from_epoch: &metapb::RegionEpoch, current_epoch: &metapb::RegionEpoch, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 6b20e375786..5d6835666b4 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -822,7 +822,10 @@ where // Check whether the region is in the flashback state and the local read could // be performed. let is_in_flashback = delegate.region.is_in_flashback; - if let Err(e) = util::check_flashback_state(is_in_flashback, req, region_id, false) { + let flashback_start_ts = delegate.region.flashback_start_ts; + if let Err(e) = + util::check_flashback_state(is_in_flashback, flashback_start_ts, req, region_id, false) + { TLS_LOCAL_READ_METRICS.with(|m| match e { Error::FlashbackNotPrepared(_) => { m.borrow_mut().reject_reason.flashback_not_prepared.inc() diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 8b3745120d5..4bcb99adca3 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1246,15 +1246,9 @@ pub fn must_raw_get(client: &TikvClient, ctx: Context, key: Vec) -> Option { pub pb_ctx: &'a Context, pub read_id: Option, - // When start_ts is None and `stale_read` is true, it means acquire a snapshot without any - // consistency guarantee. + // When `start_ts` is None and `stale_read` is true, it means acquire a snapshot without any + // consistency guarantee. This filed is also used to check if a read is allowed in the + // flashback. pub start_ts: Option, // `key_ranges` is used in replica read. It will send to // the leader via raft "read index" to check memory locks. @@ -418,7 +419,7 @@ pub trait Engine: Send + Clone + 'static { /// Mark the start of flashback. // It's an infrequent API, use trait object for simplicity. - fn start_flashback(&self, _ctx: &Context) -> BoxFuture<'static, Result<()>> { + fn start_flashback(&self, _ctx: &Context, _start_ts: u64) -> BoxFuture<'static, Result<()>> { Box::pin(futures::future::ready(Ok(()))) } diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index c50c42c9fc6..751c07c6b65 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -44,14 +44,13 @@ use raftstore::{ errors::Error as RaftServerError, router::{LocalReadRouter, RaftStoreRouter}, store::{ - self, Callback as StoreCallback, RaftCmdExtraOpts, ReadIndexContext, ReadResponse, - RegionSnapshot, StoreMsg, WriteResponse, + self, util::encode_start_ts_into_flag_data, Callback as StoreCallback, RaftCmdExtraOpts, + ReadIndexContext, ReadResponse, RegionSnapshot, StoreMsg, WriteResponse, }, }; use thiserror::Error; use tikv_kv::{write_modifies, OnAppliedCb, WriteEvent}; use tikv_util::{ - codec::number::NumberEncoder, future::{paired_future_callback, paired_must_called_future_callback}, time::Instant, }; @@ -547,18 +546,21 @@ where let mut header = new_request_header(ctx.pb_ctx); let mut flags = 0; - if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { - let mut data = [0u8; 8]; - (&mut data[..]) - .encode_u64(ctx.start_ts.unwrap_or_default().into_inner()) - .unwrap(); + let need_encoded_start_ts = ctx.start_ts.map_or(true, |ts| !ts.is_zero()); + if ctx.pb_ctx.get_stale_read() && need_encoded_start_ts { flags |= WriteBatchFlags::STALE_READ.bits(); - header.set_flag_data(data.into()); } if ctx.allowed_in_flashback { flags |= WriteBatchFlags::FLASHBACK.bits(); } header.set_flags(flags); + // Encode `start_ts` in `flag_data` for the check of stale read and flashback. + if need_encoded_start_ts { + encode_start_ts_into_flag_data( + &mut header, + ctx.start_ts.unwrap_or_default().into_inner(), + ); + } let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); @@ -637,13 +639,16 @@ where } } - fn start_flashback(&self, ctx: &Context) -> BoxFuture<'static, kv::Result<()>> { + fn start_flashback(&self, ctx: &Context, start_ts: u64) -> BoxFuture<'static, kv::Result<()>> { // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the // later flashback. Once invoked, we will update the persistent region meta and // the memory state of the flashback in Peer FSM to reject all read, write // and scheduling operations for this region when propose/apply before we // start the actual data flashback transaction command in the next phase. - let req = new_flashback_req(ctx, AdminCmdType::PrepareFlashback); + let mut req = new_flashback_req(ctx, AdminCmdType::PrepareFlashback); + req.mut_admin_request() + .mut_prepare_flashback() + .set_start_ts(start_ts); exec_admin(&*self.router, req) } diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 526a1fab3ca..28f2a1d5d25 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -15,7 +15,7 @@ use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; use futures::{Future, Stream, StreamExt}; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; pub use node::NodeV2; -use raftstore::store::RegionSnapshot; +use raftstore::store::{util::encode_start_ts_into_flag_data, RegionSnapshot}; use raftstore_v2::{ router::{ message::SimpleWrite, CmdResChannelBuilder, CmdResEvent, CmdResStream, PeerMsg, RaftRouter, @@ -23,7 +23,7 @@ use raftstore_v2::{ SimpleWriteBinary, SimpleWriteEncoder, }; use tikv_kv::{Modify, WriteEvent}; -use tikv_util::{codec::number::NumberEncoder, time::Instant}; +use tikv_util::time::Instant; use txn_types::{TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::{ @@ -153,18 +153,21 @@ impl tikv_kv::Engine for RaftKv2 { let mut header = new_request_header(ctx.pb_ctx); let mut flags = 0; - if ctx.pb_ctx.get_stale_read() && ctx.start_ts.map_or(true, |ts| !ts.is_zero()) { - let mut data = [0u8; 8]; - (&mut data[..]) - .encode_u64(ctx.start_ts.unwrap_or_default().into_inner()) - .unwrap(); + let need_encoded_start_ts = ctx.start_ts.map_or(true, |ts| !ts.is_zero()); + if ctx.pb_ctx.get_stale_read() && need_encoded_start_ts { flags |= WriteBatchFlags::STALE_READ.bits(); - header.set_flag_data(data.into()); } if ctx.allowed_in_flashback { flags |= WriteBatchFlags::FLASHBACK.bits(); } header.set_flags(flags); + // Encode `start_ts` in `flag_data` for the check of stale read and flashback. + if need_encoded_start_ts { + encode_start_ts_into_flag_data( + &mut header, + ctx.start_ts.unwrap_or_default().into_inner(), + ); + } let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index d42eb510891..da292eca17d 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1450,7 +1450,9 @@ fn future_prepare_flashback_to_version( ) -> impl Future> { let storage = storage.clone(); async move { - let f = storage.get_engine().start_flashback(req.get_context()); + let f = storage + .get_engine() + .start_flashback(req.get_context(), req.get_start_ts()); let mut res = f.await.map_err(storage::Error::from); if matches!(res, Ok(())) { // After the region is put into the flashback state, we need to do a special @@ -1488,10 +1490,7 @@ fn future_flashback_to_version( res = f.await.unwrap_or_else(|e| Err(box_err!(e))); } if matches!(res, Ok(())) { - // Only finish flashback when Flashback executed successfully. - fail_point!("skip_finish_flashback_to_version", |_| { - Ok(FlashbackToVersionResponse::default()) - }); + // Only finish when flashback executed successfully. let f = storage.get_engine().end_flashback(req.get_context()); res = f.await.map_err(storage::Error::from); } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 8709373b766..61a3fb39097 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -711,19 +711,17 @@ fn test_mvcc_flashback() { } #[test] -#[cfg(feature = "failpoints")] fn test_mvcc_flashback_block_rw() { let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); - fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); - // Flashback - must_flashback_to_version(&client, ctx.clone(), 0, 1, 2); - // Try to read. + // Prepare the flashback. + must_prepare_flashback(&client, ctx.clone(), 1, 2); + // Try to read version 3 (after flashback, FORBIDDEN). let (k, v) = (b"key".to_vec(), b"value".to_vec()); // Get let mut get_req = GetRequest::default(); get_req.set_context(ctx.clone()); get_req.key = k.clone(); - get_req.version = 1; + get_req.version = 3; let get_resp = client.kv_get(&get_req).unwrap(); assert!(get_resp.get_region_error().has_flashback_in_progress()); assert!(!get_resp.has_error()); @@ -733,28 +731,48 @@ fn test_mvcc_flashback_block_rw() { scan_req.set_context(ctx.clone()); scan_req.start_key = k.clone(); scan_req.limit = 1; - scan_req.version = 1; + scan_req.version = 3; let scan_resp = client.kv_scan(&scan_req).unwrap(); assert!(scan_resp.get_region_error().has_flashback_in_progress()); + assert!(!scan_resp.has_error()); assert!(scan_resp.pairs.is_empty()); - // Try to write. + // Try to read version 1 (before flashback, ALLOWED). + // Get + let mut get_req = GetRequest::default(); + get_req.set_context(ctx.clone()); + get_req.key = k.clone(); + get_req.version = 1; + let get_resp = client.kv_get(&get_req).unwrap(); + assert!(!get_resp.has_region_error()); + assert!(!get_resp.has_error()); + assert!(get_resp.value.is_empty()); + // Scan + let mut scan_req = ScanRequest::default(); + scan_req.set_context(ctx.clone()); + scan_req.start_key = k.clone(); + scan_req.limit = 1; + scan_req.version = 1; + let scan_resp = client.kv_scan(&scan_req).unwrap(); + assert!(!scan_resp.has_region_error()); + assert!(!scan_resp.has_error()); + assert!(scan_resp.pairs.is_empty()); + // Try to write (FORBIDDEN). // Prewrite let mut mutation = Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(k.clone()); mutation.set_value(v); - let prewrite_resp = try_kv_prewrite(&client, ctx, vec![mutation], k, 1); + let prewrite_resp = try_kv_prewrite(&client, ctx.clone(), vec![mutation], k, 1); assert!(prewrite_resp.get_region_error().has_flashback_in_progress()); - fail::remove("skip_finish_flashback_to_version"); + // Finish the flashback. + must_finish_flashback(&client, ctx, 1, 2, 3); } #[test] -#[cfg(feature = "failpoints")] fn test_mvcc_flashback_block_scheduling() { let (mut cluster, client, ctx) = must_new_cluster_and_kv_client(); - fail::cfg("skip_finish_flashback_to_version", "return").unwrap(); - // Flashback - must_flashback_to_version(&client, ctx, 0, 1, 2); + // Prepare the flashback. + must_prepare_flashback(&client, ctx.clone(), 0, 1); // Try to transfer leader. let transfer_leader_resp = cluster.try_transfer_leader(1, new_peer(2, 2)); assert!( @@ -763,7 +781,8 @@ fn test_mvcc_flashback_block_scheduling() { .get_error() .has_flashback_in_progress() ); - fail::remove("skip_finish_flashback_to_version"); + // Finish the flashback. + must_finish_flashback(&client, ctx, 0, 1, 2); } #[test] @@ -794,16 +813,7 @@ fn test_mvcc_flashback_unprepared() { assert!(!get_resp.has_error()); assert_eq!(get_resp.value, b"".to_vec()); // Mock the flashback retry. - let mut req = FlashbackToVersionRequest::default(); - req.set_context(ctx); - req.set_start_ts(6); - req.set_commit_ts(7); - req.version = 0; - req.start_key = b"a".to_vec(); - req.end_key = b"z".to_vec(); - let resp = client.kv_flashback_to_version(&req).unwrap(); - assert!(!resp.has_region_error()); - assert!(resp.get_error().is_empty()); + must_finish_flashback(&client, ctx.clone(), 0, 6, 7); let get_resp = client.kv_get(&get_req).unwrap(); assert!(!get_resp.has_region_error()); assert!(!get_resp.has_error()); @@ -811,7 +821,7 @@ fn test_mvcc_flashback_unprepared() { } #[test] -fn test_mvcc_flashback_with_unlimit_range() { +fn test_mvcc_flashback_with_unlimited_range() { let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; From 7ec73fdd440a1d81e2a5a8c62aa9e31828959903 Mon Sep 17 00:00:00 2001 From: iosmanthus Date: Tue, 31 Jan 2023 10:41:55 +0800 Subject: [PATCH 0473/1149] import: sst_importer support download SST and rewrite into keyspace data. (#14046) ref tikv/tikv#12999 import: sst_importer support download SST and rewrite into keyspace data. Signed-off-by: iosmanthus --- Cargo.lock | 1 + components/keys/Cargo.toml | 1 + components/keys/src/rewrite.rs | 10 +++ components/sst_importer/src/import_file.rs | 3 +- components/sst_importer/src/sst_importer.rs | 90 ++++++++++++++++----- components/txn_types/src/types.rs | 10 +++ src/import/sst_service.rs | 4 +- 7 files changed, 95 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46eac5930a1..f2ce2ba4ce1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2724,6 +2724,7 @@ dependencies = [ "panic_hook", "thiserror", "tikv_alloc", + "tikv_util", ] [[package]] diff --git a/components/keys/Cargo.toml b/components/keys/Cargo.toml index 5f2bf5935ee..b5a6412d00a 100644 --- a/components/keys/Cargo.toml +++ b/components/keys/Cargo.toml @@ -10,6 +10,7 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } +tikv_util = { workspace = true } [dev-dependencies] panic_hook = { workspace = true } diff --git a/components/keys/src/rewrite.rs b/components/keys/src/rewrite.rs index 51f588e9732..68541bb50e0 100644 --- a/components/keys/src/rewrite.rs +++ b/components/keys/src/rewrite.rs @@ -6,11 +6,21 @@ use std::ops::Bound::{self, *}; +use tikv_util::codec::bytes::encode_bytes; + /// An error indicating the key cannot be rewritten because it does not start /// with the given prefix. #[derive(PartialEq, Debug, Clone)] pub struct WrongPrefix; +pub fn encode_bound(bound: Bound>) -> Bound> { + match bound { + Included(k) => Included(encode_bytes(&k)), + Excluded(k) => Excluded(encode_bytes(&k)), + Unbounded => Unbounded, + } +} + /// Rewrites the prefix of a byte array. pub fn rewrite_prefix( old_prefix: &[u8], diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index f766729a066..84d2f67bbab 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -15,6 +15,7 @@ use engine_traits::{ iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, }; use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; +use keys::data_key; use kvproto::{import_sstpb::*, kvrpcpb::ApiVersion}; use tikv_util::time::Instant; use uuid::{Builder as UuidBuilder, Uuid}; @@ -336,7 +337,7 @@ impl ImportDir { let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; for &(start, end) in TIDB_RANGES_COMPLEMENT { - let opt = iter_option(start, end, false); + let opt = iter_option(&data_key(start), &data_key(end), false); let mut iter = sst_reader.iter(opt)?; if iter.seek(start)? { error!( diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 8b6d64f483f..fabe9e2a13a 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -32,7 +32,10 @@ use kvproto::{ kvrpcpb::ApiVersion, }; use tikv_util::{ - codec::stream_event::{EventEncoder, EventIterator, Iterator as EIterator}, + codec::{ + bytes::{decode_bytes_in_place, encode_bytes}, + stream_event::{EventEncoder, EventIterator, Iterator as EIterator}, + }, config::ReadableSize, stream::block_on_external_io, sys::SysQuota, @@ -53,13 +56,18 @@ use crate::{ #[derive(Default, Debug, Clone)] pub struct DownloadExt<'a> { cache_key: Option<&'a str>, + req_type: DownloadRequestType, } impl<'a> DownloadExt<'a> { - pub fn cache_key(self, key: &'a str) -> Self { - Self { - cache_key: Some(key), - } + pub fn cache_key(mut self, key: &'a str) -> Self { + self.cache_key = Some(key); + self + } + + pub fn req_type(mut self, req_type: DownloadRequestType) -> Self { + self.req_type = req_type; + self } } @@ -896,16 +904,20 @@ impl SstImporter { let sst_reader = RocksSstReader::open_with_env(dst_file_name, Some(env))?; sst_reader.verify_checksum()?; + // undo key rewrite so we could compare with the keys inside SST + let old_prefix = rewrite_rule.get_old_key_prefix(); + let new_prefix = rewrite_rule.get_new_key_prefix(); + let req_type = ext.req_type; + debug!("downloaded file and verified"; "meta" => ?meta, "name" => name, "path" => dst_file_name, + "old_prefix" => log_wrappers::Value::key(old_prefix), + "new_prefix" => log_wrappers::Value::key(new_prefix), + "req_type" => ?req_type, ); - // undo key rewrite so we could compare with the keys inside SST - let old_prefix = rewrite_rule.get_old_key_prefix(); - let new_prefix = rewrite_rule.get_new_key_prefix(); - let range_start = meta.get_range().get_start(); let range_end = meta.get_range().get_end(); let range_start_bound = key_to_bound(range_start); @@ -915,14 +927,14 @@ impl SstImporter { key_to_bound(range_end) }; - let range_start = + let mut range_start = keys::rewrite::rewrite_prefix_of_start_bound(new_prefix, old_prefix, range_start_bound) .map_err(|_| Error::WrongKeyPrefix { what: "SST start range", key: range_start.to_vec(), prefix: new_prefix.to_vec(), })?; - let range_end = + let mut range_end = keys::rewrite::rewrite_prefix_of_end_bound(new_prefix, old_prefix, range_end_bound) .map_err(|_| Error::WrongKeyPrefix { what: "SST end range", @@ -930,6 +942,11 @@ impl SstImporter { prefix: new_prefix.to_vec(), })?; + if req_type == DownloadRequestType::Keyspace { + range_start = keys::rewrite::encode_bound(range_start); + range_end = keys::rewrite::encode_bound(range_end); + } + let start_rename_rewrite = Instant::now(); // read the first and last keys from the SST, determine if we could // simply move the entire SST instead of iterating and generate a new one. @@ -942,9 +959,15 @@ impl SstImporter { return Ok(None); } if !iter.seek_to_first()? { + let mut range = meta.get_range().clone(); + if req_type == DownloadRequestType::Keyspace { + *range.mut_start() = encode_bytes(&range.take_start()); + *range.mut_end() = encode_bytes(&range.take_end()); + } // the SST is empty, so no need to iterate at all (should be impossible?) - return Ok(Some(meta.get_range().clone())); + return Ok(Some(range)); } + let start_key = keys::origin_key(iter.key()); if is_before_start_bound(start_key, &range_start) { // SST's start is before the range to consume, so needs to iterate to skip over @@ -995,8 +1018,10 @@ impl SstImporter { } // perform iteration and key rewrite. - let mut key = keys::data_key(new_prefix); - let new_prefix_data_key_len = key.len(); + let mut data_key = keys::DATA_PREFIX_KEY.to_vec(); + let data_key_prefix_len = keys::DATA_PREFIX_KEY.len(); + let mut user_key = new_prefix.to_vec(); + let user_key_prefix_len = new_prefix.len(); let mut first_key = None; match range_start { @@ -1016,10 +1041,22 @@ impl SstImporter { .unwrap(); while iter.valid()? { - let old_key = keys::origin_key(iter.key()); - if is_after_end_bound(old_key, &range_end) { + let mut old_key = Cow::Borrowed(keys::origin_key(iter.key())); + let mut ts = None; + + if is_after_end_bound(old_key.as_ref(), &range_end) { break; } + + if req_type == DownloadRequestType::Keyspace { + ts = Some(Key::decode_ts_bytes_from(old_key.as_ref())?.to_owned()); + old_key = { + let mut key = old_key.to_vec(); + decode_bytes_in_place(&mut key, false)?; + Cow::Owned(key) + }; + } + if !old_key.starts_with(old_prefix) { return Err(Error::WrongKeyPrefix { what: "Key in SST", @@ -1027,12 +1064,21 @@ impl SstImporter { prefix: old_prefix.to_vec(), }); } - key.truncate(new_prefix_data_key_len); - key.extend_from_slice(&old_key[old_prefix.len()..]); + + data_key.truncate(data_key_prefix_len); + user_key.truncate(user_key_prefix_len); + user_key.extend_from_slice(&old_key[old_prefix.len()..]); + if req_type == DownloadRequestType::Keyspace { + data_key.extend(encode_bytes(&user_key)); + data_key.extend(ts.unwrap()); + } else { + data_key.extend_from_slice(&user_key); + } + let mut value = Cow::Borrowed(iter.value()); if rewrite_rule.new_timestamp != 0 { - key = Key::from_encoded(key) + data_key = Key::from_encoded(data_key) .truncate_ts() .map_err(|e| { Error::BadFormat(format!( @@ -1056,10 +1102,10 @@ impl SstImporter { } } - sst_writer.put(&key, &value)?; + sst_writer.put(&data_key, &value)?; iter.next()?; if first_key.is_none() { - first_key = Some(keys::origin_key(&key).to_vec()); + first_key = Some(keys::origin_key(&data_key).to_vec()); } } @@ -1078,7 +1124,7 @@ impl SstImporter { let mut final_range = Range::default(); final_range.set_start(start_key); - final_range.set_end(keys::origin_key(&key).to_vec()); + final_range.set_end(keys::origin_key(&data_key).to_vec()); Ok(Some(final_range)) } else { // nothing is written: prevents finishing the SST at all. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 60e64bf444a..15779df426a 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -192,6 +192,16 @@ impl Key { Ok(number::decode_u64_desc(&mut ts)?.into()) } + /// Decode the timestamp from a ts encoded key and return in bytes. + #[inline] + pub fn decode_ts_bytes_from(key: &[u8]) -> Result<&[u8], codec::Error> { + let len = key.len(); + if len < number::U64_SIZE { + return Err(codec::Error::KeyLength); + } + Ok(&key[key.len() - number::U64_SIZE..]) + } + /// Whether the user key part of a ts encoded key `ts_encoded_key` equals to /// the encoded user key `user_key`. /// diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index ea52cad0095..08eabe32f0c 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -783,7 +783,9 @@ where cipher, limiter, engine, - DownloadExt::default().cache_key(req.get_storage_cache_id()), + DownloadExt::default() + .cache_key(req.get_storage_cache_id()) + .req_type(req.get_request_type()), ); let mut resp = DownloadResponse::default(); match res.await { From ec2f4dc5420dbdab05ea47ff1724a54e765cdca4 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 30 Jan 2023 21:47:54 -0800 Subject: [PATCH 0474/1149] rocksdb: reduce rocksdb block size to 16KB (#14053) close tikv/tikv#14052 The writecf and defaultcf's default block size is changed to 16KB to improve read performance (reduce read amplification) Signed-off-by: qi.xu Co-authored-by: qi.xu --- etc/config-template.toml | 4 ++-- src/config/mod.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 62623afed0e..59152570da1 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -679,7 +679,7 @@ ## The data block size. RocksDB compresses data based on the unit of block. ## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that ## the block size specified here corresponds to uncompressed data. -# block-size = "64KB" +# block-size = "16KB" ## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters ## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive @@ -915,7 +915,7 @@ [rocksdb.writecf] ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`. # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] -# block-size = "64KB" +# block-size = "16KB" ## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`. # write-buffer-size = "128MB" diff --git a/src/config/mod.rs b/src/config/mod.rs index 99b593e2443..0a32c99f422 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -631,7 +631,7 @@ impl Default for DefaultCfConfig { let total_mem = SysQuota::memory_limit_in_bytes(); DefaultCfConfig { - block_size: ReadableSize::kb(64), + block_size: ReadableSize::kb(16), block_cache_size: memory_limit_for_cf(false, CF_DEFAULT, total_mem), disable_block_cache: false, cache_index_and_filter_blocks: true, @@ -756,7 +756,7 @@ impl Default for WriteCfConfig { }; WriteCfConfig { - block_size: ReadableSize::kb(64), + block_size: ReadableSize::kb(16), block_cache_size: memory_limit_for_cf(false, CF_WRITE, total_mem), disable_block_cache: false, cache_index_and_filter_blocks: true, From 15d6040c68eb0f2edf6b9304aebf69092657f8a4 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 30 Jan 2023 22:01:54 -0800 Subject: [PATCH 0475/1149] storage: add an alias partitioned-raft-kv for RaftKv2 (#14083) ref tikv/tikv#12842 add an alias partitioned-raft-kv for RaftKv2 Signed-off-by: qi.xu Co-authored-by: qi.xu Co-authored-by: Ti Chi Robot --- src/config/mod.rs | 2 +- src/storage/config.rs | 1 + tests/integrations/config/test-custom.toml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 0a32c99f422..7e006ef2eed 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3135,7 +3135,7 @@ impl TikvConfig { if self.storage.engine == EngineType::RaftKv2 { self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); if !self.raft_engine.enable { - panic!("raft-kv2 only supports raft log engine."); + panic!("partitioned-raft-kv only supports raft log engine."); } } diff --git a/src/storage/config.rs b/src/storage/config.rs index 68d739c1639..d74bd721104 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -35,6 +35,7 @@ const DEFAULT_RESERVED_RAFT_SPACE_GB: u64 = 1; #[serde(rename_all = "kebab-case")] pub enum EngineType { RaftKv, + #[serde(alias = "partitioned-raft-kv")] RaftKv2, } diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index b096437e60c..d79ec7899e2 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -90,7 +90,7 @@ a = "b" [storage] data-dir = "/var" -engine = "raft-kv2" +engine = "partitioned-raft-kv" gc-ratio-threshold = 1.2 max-key-size = 4096 scheduler-concurrency = 123 From 23a228824cb0e82cc495edb28c3276774f97aead Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 31 Jan 2023 14:35:54 +0800 Subject: [PATCH 0476/1149] resolved_ts: reduce network traffic by filter regions (#14098) close tikv/tikv#14092 resolved_ts: reduce network traffic by filter regions Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/src/endpoint.rs | 2 +- components/resolved_ts/src/advance.rs | 128 +++++++++++++++++++++++++- src/config/mod.rs | 2 +- 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 6d64754d042..2b4eb9ff226 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1543,7 +1543,7 @@ mod tests { } let diff = cfg.diff(&updated_cfg); ep.run(Task::ChangeConfig(diff)); - assert_eq!(ep.config.min_ts_interval, ReadableDuration::millis(200)); + assert_eq!(ep.config.min_ts_interval, ReadableDuration::secs(1)); assert_eq!(ep.config.hibernate_regions_compatible, true); { diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index a78e903bc72..fd58fac1601 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -149,6 +149,7 @@ pub struct LeadershipResolver { region_map: HashMap>, // region_id -> peers id, record the responses. resp_map: HashMap>, + checking_regions: HashSet, valid_regions: HashSet, gc_interval: Duration, @@ -176,6 +177,7 @@ impl LeadershipResolver { region_map: HashMap::default(), resp_map: HashMap::default(), valid_regions: HashSet::default(), + checking_regions: HashSet::default(), last_gc_time: Instant::now_coarse(), gc_interval, } @@ -188,6 +190,7 @@ impl LeadershipResolver { self.region_map = HashMap::default(); self.resp_map = HashMap::default(); self.valid_regions = HashSet::default(); + self.checking_regions = HashSet::default(); self.last_gc_time = now; } } @@ -203,6 +206,7 @@ impl LeadershipResolver { for v in self.resp_map.values_mut() { v.clear(); } + self.checking_regions.clear(); self.valid_regions.clear(); } @@ -248,7 +252,11 @@ impl LeadershipResolver { // This function broadcasts a special message to all stores, gets the leader id // of them to confirm whether current peer has a quorum which accepts its // leadership. - pub async fn resolve(&mut self, _regions: Vec, min_ts: TimeStamp) -> Vec { + pub async fn resolve(&mut self, regions: Vec, min_ts: TimeStamp) -> Vec { + if regions.is_empty() { + return regions; + } + // Clear previous result before resolving. self.clear(); // GC when necessary to prevent memory leak. @@ -256,15 +264,22 @@ impl LeadershipResolver { PENDING_RTS_COUNT.inc(); defer!(PENDING_RTS_COUNT.dec()); - fail_point!("before_sync_replica_read_state", |_| _regions.clone()); + fail_point!("before_sync_replica_read_state", |_| regions.clone()); let store_id = self.store_id; let valid_regions = &mut self.valid_regions; let region_map = &mut self.region_map; let resp_map = &mut self.resp_map; let store_req_map = &mut self.store_req_map; + let checking_regions = &mut self.checking_regions; + for region_id in ®ions { + checking_regions.insert(*region_id); + } self.region_read_progress.with(|registry| { for (region_id, read_progress) in registry { + if !checking_regions.contains(region_id) { + continue; + } let core = read_progress.get_core(); let local_leader_info = core.get_local_leader_info(); let leader_id = local_leader_info.get_leader_id(); @@ -512,3 +527,112 @@ async fn get_tikv_client( RTS_TIKV_CLIENT_INIT_DURATION_HISTOGRAM.observe(start.saturating_elapsed_secs()); Ok(cli) } + +#[cfg(test)] +mod tests { + use std::{ + sync::{ + mpsc::{channel, Receiver, Sender}, + Arc, + }, + time::Duration, + }; + + use grpcio::{self, ChannelBuilder, EnvBuilder, Server, ServerBuilder}; + use kvproto::{metapb::Region, tikvpb::Tikv, tikvpb_grpc::create_tikv}; + use pd_client::PdClient; + use raftstore::store::util::RegionReadProgress; + use tikv_util::store::new_peer; + + use super::*; + + #[derive(Clone)] + struct MockTikv { + req_tx: Sender, + } + + impl Tikv for MockTikv { + fn check_leader( + &mut self, + ctx: grpcio::RpcContext<'_>, + req: CheckLeaderRequest, + sink: ::grpcio::UnarySink, + ) { + self.req_tx.send(req).unwrap(); + ctx.spawn(async { + sink.success(CheckLeaderResponse::default()).await.unwrap(); + }) + } + } + + struct MockPdClient {} + impl PdClient for MockPdClient {} + + fn new_rpc_suite(env: Arc) -> (Server, TikvClient, Receiver) { + let (tx, rx) = channel(); + let tikv_service = MockTikv { req_tx: tx }; + let builder = ServerBuilder::new(env.clone()).register_service(create_tikv(tikv_service)); + let mut server = builder.bind("127.0.0.1", 0).build().unwrap(); + server.start(); + let (_, port) = server.bind_addrs().next().unwrap(); + let addr = format!("127.0.0.1:{}", port); + let channel = ChannelBuilder::new(env).connect(&addr); + let client = TikvClient::new(channel); + (server, client, rx) + } + + #[tokio::test] + async fn test_resolve_leader_request_size() { + let env = Arc::new(EnvBuilder::new().build()); + let (mut server, tikv_client, rx) = new_rpc_suite(env.clone()); + + let mut region1 = Region::default(); + region1.id = 1; + region1.peers.push(new_peer(1, 1)); + region1.peers.push(new_peer(2, 11)); + let progress1 = RegionReadProgress::new(®ion1, 1, 1, 1); + progress1.update_leader_info(1, 1, ®ion1); + + let mut region2 = Region::default(); + region2.id = 2; + region2.peers.push(new_peer(1, 2)); + region2.peers.push(new_peer(2, 22)); + let progress2 = RegionReadProgress::new(®ion2, 1, 1, 2); + progress2.update_leader_info(2, 2, ®ion2); + + let mut leader_resolver = LeadershipResolver::new( + 1, // store id + Arc::new(MockPdClient {}), + env.clone(), + Arc::new(SecurityManager::default()), + RegionReadProgressRegistry::new(), + Duration::from_secs(1), + ); + leader_resolver + .tikv_clients + .lock() + .await + .insert(2 /* store id */, tikv_client); + leader_resolver + .region_read_progress + .insert(1, Arc::new(progress1)); + leader_resolver + .region_read_progress + .insert(2, Arc::new(progress2)); + + leader_resolver.resolve(vec![1, 2], TimeStamp::new(1)).await; + let req = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + assert_eq!(req.regions.len(), 2); + + // Checking one region only send 1 region in request. + leader_resolver.resolve(vec![1], TimeStamp::new(1)).await; + let req = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + assert_eq!(req.regions.len(), 1); + + // Checking zero region does not send request. + leader_resolver.resolve(vec![], TimeStamp::new(1)).await; + rx.recv_timeout(Duration::from_secs(1)).unwrap_err(); + + let _ = server.shutdown().await; + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index 7e006ef2eed..3274d5442df 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2653,7 +2653,7 @@ pub struct CdcConfig { impl Default for CdcConfig { fn default() -> Self { Self { - min_ts_interval: ReadableDuration::millis(200), + min_ts_interval: ReadableDuration::secs(1), hibernate_regions_compatible: true, // 4 threads for incremental scan. incremental_scan_threads: 4, From a33eb2d08991f278785e8b3047c643bf07839bce Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 31 Jan 2023 14:59:54 +0800 Subject: [PATCH 0477/1149] raftstore-v2: fix peer not cleanup when it replicates more logs (#14101) ref tikv/tikv#12842 If it accepts more logs than conf remove itself, applied_index == commit_index will never be true. So we should check if it's a tombstone already first. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/fsm/store.rs | 4 + components/raftstore-v2/src/operation/life.rs | 7 +- .../raftstore-v2/src/operation/ready/mod.rs | 26 +++--- .../src/operation/ready/snapshot.rs | 17 +++- components/raftstore-v2/src/raft/storage.rs | 7 +- components/raftstore-v2/src/router/message.rs | 10 ++- .../tests/integrations/cluster.rs | 13 ++- .../tests/integrations/test_conf_change.rs | 80 ++++++++++++++++++- .../raftstore/src/store/async_io/read.rs | 6 +- 9 files changed, 144 insertions(+), 26 deletions(-) diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 86e3540d23c..17c0a9a50f9 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -266,6 +266,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { .fsm .store .on_store_unreachable(self.store_ctx, to_store_id), + #[cfg(feature = "testexport")] + StoreMsg::WaitFlush { region_id, ch } => { + self.fsm.store.on_wait_flush(self.store_ctx, region_id, ch) + } } } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 88646f06b59..3a9f678bd8c 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -284,8 +284,11 @@ impl Peer { #[inline] pub fn postponed_destroy(&self) -> bool { let entry_storage = self.storage().entry_storage(); - // TODO: check actual split index instead of commit index. - entry_storage.applied_index() != entry_storage.commit_index() + // If it's marked as tombstone, then it must be changed by conf change. In + // this case, all following entries are skipped so applied_index never equals + // to commit_index. + (self.storage().region_state().get_state() != PeerState::Tombstone + && entry_storage.applied_index() != entry_storage.commit_index()) // Wait for critical commands like split. || self.has_pending_tombstone_tablets() } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 38d126ac87a..e7c32e742ec 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -52,7 +52,7 @@ use crate::{ batch::StoreContext, fsm::{PeerFsmDelegate, Store}, raft::{Peer, Storage}, - router::{ApplyTask, PeerMsg, PeerTick}, + router::{PeerMsg, PeerTick}, worker::tablet_gc, }; @@ -70,6 +70,19 @@ impl Store { ctx.router .broadcast_normal(|| PeerMsg::StoreUnreachable { to_store_id }); } + + #[cfg(feature = "testexport")] + pub fn on_wait_flush( + &mut self, + ctx: &mut StoreContext, + region_id: u64, + ch: crate::router::FlushChannel, + ) where + EK: KvEngine, + ER: RaftEngine, + { + let _ = ctx.router.send(region_id, PeerMsg::WaitFlush(ch)); + } } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { @@ -455,6 +468,7 @@ impl Peer { && !self.raft_group().has_ready() && (self.serving() || self.postponed_destroy()) { + self.maybe_schedule_gen_snapshot(); #[cfg(feature = "testexport")] self.async_writer.notify_flush(); return; @@ -501,15 +515,7 @@ impl Peer { self.handle_raft_committed_entries(ctx, ready.take_committed_entries()); } - // Check whether there is a pending generate snapshot task, the task - // needs to be sent to the apply system. - // Always sending snapshot task after apply task, so it gets latest - // snapshot. - if let Some(gen_task) = self.storage_mut().take_gen_snap_task() { - self.apply_scheduler() - .unwrap() - .send(ApplyTask::Snapshot(gen_task)); - } + self.maybe_schedule_gen_snapshot(); let ready_number = ready.number(); let mut write_task = WriteTask::new(self.region_id(), self.peer_id(), ready_number); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index bcbe220252b..1fae813577c 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -41,13 +41,14 @@ use raftstore::{ RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }, }; -use slog::{error, info, warn}; +use slog::{debug, error, info, warn}; use tikv_util::{box_err, log::SlogFormat, slog_panic}; use crate::{ fsm::ApplyResReporter, operation::{command::temp_split_path, SharedReadTablet}, raft::{Apply, Peer, Storage}, + router::ApplyTask, Result, StoreContext, }; @@ -161,6 +162,19 @@ pub fn install_tablet( } impl Peer { + /// Check whether there is a pending generate snapshot task, the task + /// needs to be sent to the apply system. + /// Always sending snapshot task after apply task, so it gets latest + /// snapshot. + #[inline] + pub fn maybe_schedule_gen_snapshot(&mut self) { + if let Some(gen_task) = self.storage_mut().take_gen_snap_task() { + self.apply_scheduler() + .unwrap() + .send(ApplyTask::Snapshot(gen_task)); + } + } + pub fn on_snapshot_generated(&mut self, snapshot: GenSnapRes) { if self.storage_mut().on_snapshot_generated(snapshot) { self.raft_group_mut().ping(); @@ -270,6 +284,7 @@ impl Apply { /// Will schedule a task to read worker and then generate a snapshot /// asynchronously. pub fn schedule_gen_snapshot(&mut self, snap_task: GenSnapTask) { + debug!(self.logger, "scheduling snapshot"; "task" => ?snap_task); // Do not generate, the peer is removed. if self.tombstone() { snap_task.canceled.store(true, Ordering::SeqCst); diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 1d1f53f9c53..ce15ac20621 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -9,7 +9,7 @@ use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ metapb, - raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, + raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState}, }; use raft::{ eraftpb::{ConfState, Entry, Snapshot}, @@ -234,10 +234,7 @@ impl Storage { #[inline] pub fn tablet_index(&self) -> u64 { - match self.region_state.get_state() { - PeerState::Tombstone | PeerState::Applying => 0, - _ => self.region_state.get_tablet_index(), - } + self.region_state.get_tablet_index() } #[inline] diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index a9353e171d9..8814a97cc5f 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -260,7 +260,15 @@ pub enum StoreMsg { SplitInit(Box), Tick(StoreTick), Start, - StoreUnreachable { to_store_id: u64 }, + StoreUnreachable { + to_store_id: u64, + }, + /// A message that used to check if a flush is happened. + #[cfg(feature = "testexport")] + WaitFlush { + region_id: u64, + ch: super::FlushChannel, + }, } impl ResourceMetered for StoreMsg {} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 90f7c500903..2076272b44b 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -39,7 +39,7 @@ use raftstore::{ }; use raftstore_v2::{ create_store_batch_system, - router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter}, + router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter, StoreMsg}, Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, }; use resource_metering::CollectorRegHandle; @@ -127,7 +127,16 @@ impl TestRouter { let res = self.send(region_id, PeerMsg::WaitFlush(ch)); match res { Ok(_) => return block_on(sub.result()).is_some(), - Err(TrySendError::Disconnected(_)) => return false, + Err(TrySendError::Disconnected(m)) => { + let PeerMsg::WaitFlush(ch) = m else { unreachable!() }; + match self + .store_router() + .send_control(StoreMsg::WaitFlush { region_id, ch }) + { + Ok(_) => return block_on(sub.result()).is_some(), + Err(_) => return false, + } + } Err(TrySendError::Full(_)) => thread::sleep(Duration::from_millis(10)), } } diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 8a075bb9a35..4b3445a00ad 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -2,8 +2,9 @@ use std::{self, time::Duration}; -use engine_traits::{Peekable, CF_DEFAULT}; -use kvproto::raft_cmdpb::AdminCmdType; +use engine_traits::{Peekable, RaftEngineReadOnly, CF_DEFAULT}; +use futures::executor::block_on; +use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::PeerState}; use raft::prelude::ConfChangeType; use raftstore_v2::{ router::{PeerMsg, PeerTick}, @@ -102,3 +103,78 @@ fn test_simple_change() { let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); check_skip_wal(cached.latest().unwrap().as_inner().path()); } + +/// Test if a peer can be destroyed by conf change if logs after conf change are +/// also replicated. +#[test] +fn test_remove_by_conf_change() { + let cluster = Cluster::with_node_count(2, None); + let region_id = 2; + let mut req = cluster.routers[0].new_request_for(2); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::ChangePeer); + admin_req + .mut_change_peer() + .set_change_type(ConfChangeType::AddLearnerNode); + let store_id = cluster.node(1).id(); + let new_peer = new_learner_peer(store_id, 10); + admin_req.mut_change_peer().set_peer(new_peer); + let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + // So heartbeat will create a learner. + cluster.dispatch(2, vec![]); + // Trigger the raft tick to replica the log to the learner and execute the + // snapshot task. + cluster.routers[0] + .send(region_id, PeerMsg::Tick(PeerTick::Raft)) + .unwrap(); + cluster.dispatch(region_id, vec![]); + // Wait some time so snapshot can be generated. + std::thread::sleep(Duration::from_millis(100)); + cluster.dispatch(region_id, vec![]); + + // write one kv to make flow control replicated. + let (key, val) = (b"key", b"value"); + let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, val); + let (msg, _) = PeerMsg::simple_write(header, put.encode()); + cluster.routers[0].send(region_id, msg).unwrap(); + cluster.dispatch(region_id, vec![]); + + let new_conf_ver = req.get_header().get_region_epoch().get_conf_ver() + 1; + req.mut_header() + .mut_region_epoch() + .set_conf_ver(new_conf_ver); + req.mut_admin_request() + .mut_change_peer() + .set_change_type(ConfChangeType::RemoveNode); + let (admin_msg, admin_sub) = PeerMsg::admin_command(req.clone()); + // write one kv after removal + let (key, val) = (b"key1", b"value"); + let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, val); + let (msg, sub) = PeerMsg::simple_write(header, put.encode()); + // Send them at the same time so they will be all sent to learner. + cluster.routers[0].send(region_id, admin_msg).unwrap(); + cluster.routers[0].send(region_id, msg).unwrap(); + let resp = block_on(admin_sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + // Dispatch messages so the learner will receive conf remove and write at the + // same time. + cluster.dispatch(region_id, vec![]); + cluster.routers[1].wait_flush(region_id, Duration::from_millis(300)); + // Wait for apply. + std::thread::sleep(Duration::from_millis(100)); + let raft_engine = &cluster.node(1).running_state().unwrap().raft_engine; + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_state(), PeerState::Tombstone); + assert_eq!(raft_engine.get_raft_state(region_id).unwrap(), None); +} diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index b298ed3529e..45492feb294 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -227,10 +227,10 @@ where error!("failed to create checkpointer"; "region_id" => region_id, "error" => %e); SNAP_COUNTER.generate.fail.inc(); } else { + let elapsed = start.saturating_elapsed_secs(); SNAP_COUNTER.generate.success.inc(); - SNAP_HISTOGRAM - .generate - .observe(start.saturating_elapsed_secs()); + SNAP_HISTOGRAM.generate.observe(elapsed); + info!("snapshot generated"; "region_id" => region_id, "elapsed" => elapsed, "key" => ?snap_key, "for_balance" => for_balance); res = Some(Box::new((snapshot, to_peer))) } From 9c0df6d68c72d30021b36d24275fdceca9864235 Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 1 Feb 2023 15:43:55 +0800 Subject: [PATCH 0478/1149] cop: handle unset scan details in store batch (#14102) close tikv/tikv#14109 Signed-off-by: you06 --- src/coprocessor/endpoint.rs | 2 + tests/integrations/coprocessor/test_select.rs | 60 +++++++++++-------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index b9d01419a49..6ac1bebc541 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -600,6 +600,8 @@ impl Endpoint { response.set_locked(lock_info); } response.set_other_error(resp.take_other_error()); + // keep the exec details already generated. + response.set_exec_details_v2(resp.take_exec_details_v2()); GLOBAL_TRACKERS.with_tracker(cur_tracker, |tracker| { tracker.write_scan_detail( response.mut_exec_details_v2().mut_scan_detail_v2(), diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index ad195f62774..056f24b5fee 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -4,11 +4,10 @@ use std::{cmp, thread, time::Duration}; use engine_traits::CF_LOCK; use kvproto::{ - coprocessor::{Request, Response, StoreBatchTask}, - errorpb, - kvrpcpb::{Context, IsolationLevel, LockInfo}, + coprocessor::{Request, Response, StoreBatchTask, StoreBatchTaskResponse}, + kvrpcpb::{Context, IsolationLevel}, }; -use protobuf::{Message, SingularPtrField}; +use protobuf::Message; use raftstore::store::Bucket; use test_coprocessor::*; use test_raftstore::{Cluster, ServerCluster}; @@ -2151,11 +2150,14 @@ fn test_batch_request() { } req }; - let verify_response = |result: &QueryResult, - data: &[u8], - region_err: &SingularPtrField, - locked: &SingularPtrField, - other_err: &String| { + let verify_response = |result: &QueryResult, resp: &Response| { + let (data, details, region_err, locked, other_err) = ( + resp.get_data(), + resp.get_exec_details_v2(), + &resp.region_error, + &resp.locked, + &resp.other_error, + ); match result { QueryResult::Valid(res) => { let expected_len = res.len(); @@ -2179,6 +2181,12 @@ fn test_batch_request() { assert!(region_err.is_none()); assert!(locked.is_none()); assert!(other_err.is_empty()); + let scan_details = details.get_scan_detail_v2(); + assert_eq!(scan_details.processed_versions, row_count as u64); + if row_count > 0 { + assert!(scan_details.processed_versions_size > 0); + assert!(scan_details.total_versions > 0); + } } QueryResult::ErrRegion => { assert!(region_err.is_some()); @@ -2198,6 +2206,20 @@ fn test_batch_request() { } }; + let batch_resp_2_resp = |batch_resp: &mut StoreBatchTaskResponse| -> Response { + let mut response = Response::default(); + response.set_data(batch_resp.take_data()); + if let Some(err) = batch_resp.region_error.take() { + response.set_region_error(err); + } + if let Some(lock_info) = batch_resp.locked.take() { + response.set_locked(lock_info); + } + response.set_other_error(batch_resp.take_other_error()); + response.set_exec_details_v2(batch_resp.take_exec_details_v2()); + response + }; + for (ranges, results, invalid_epoch, key_is_locked) in cases.iter() { let mut req = prepare_req(&mut cluster, ranges); if *invalid_epoch { @@ -2229,25 +2251,13 @@ fn test_batch_request() { } } let mut resp = handle_request(&endpoint, req); - let batch_results = resp.take_batch_responses().to_vec(); + let mut batch_results = resp.take_batch_responses().to_vec(); for (i, result) in results.iter().enumerate() { if i == 0 { - verify_response( - result, - resp.get_data(), - &resp.region_error, - &resp.locked, - &resp.other_error, - ); + verify_response(result, &resp); } else { - let batch_resp = batch_results.get(i - 1).unwrap(); - verify_response( - result, - batch_resp.get_data(), - &batch_resp.region_error, - &batch_resp.locked, - &batch_resp.other_error, - ); + let batch_resp = batch_results.get_mut(i - 1).unwrap(); + verify_response(result, &batch_resp_2_resp(batch_resp)); }; } if *key_is_locked { From db14c53267ebf815d6a8ae12036bd5e20326f7ee Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 2 Feb 2023 10:23:55 +0800 Subject: [PATCH 0479/1149] resource_control: unify wru/rru to ru (#14121) close tikv/tikv#14120 resource_control: unify wru/rru to ru Signed-off-by: nolouch --- Cargo.lock | 2 +- .../resource_control/src/resource_group.rs | 43 +++++++++---------- components/resource_control/src/service.rs | 14 +++--- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2ce2ba4ce1..1747e74fafa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2730,7 +2730,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#1b2b4114103afb06796b7e44f45f7e55133673c0" +source = "git+https://github.com/pingcap/kvproto.git#a7c51106dfe70ebf59221018b50d1ec6ad25da74" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 1524ebcba5d..a0abfb11464 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -41,14 +41,10 @@ pub struct ResourceGroupManager { impl ResourceGroupManager { fn get_ru_setting(rg: &ResourceGroup, is_read: bool) -> u64 { match (rg.get_mode(), is_read) { - (GroupMode::RuMode, true) => rg + // RU mode, read and write use the same setting. + (GroupMode::RuMode, _) => rg .get_r_u_settings() - .get_r_r_u() - .get_settings() - .get_fill_rate(), - (GroupMode::RuMode, false) => rg - .get_r_u_settings() - .get_w_r_u() + .get_r_u() .get_settings() .get_fill_rate(), // TODO: currently we only consider the cpu usage in the read path, we may also take @@ -311,6 +307,10 @@ pub(crate) mod tests { use super::*; + pub fn new_resource_group_ru(name: String, ru: u64) -> ResourceGroup { + new_resource_group(name, true, ru, ru) + } + pub fn new_resource_group( name: String, is_ru_mode: bool, @@ -328,15 +328,12 @@ pub(crate) mod tests { }; group.set_mode(mode); if is_ru_mode { + assert!(read_tokens == write_tokens); let mut ru_setting = GroupRequestUnitSettings::new(); ru_setting - .mut_r_r_u() + .mut_r_u() .mut_settings() .set_fill_rate(read_tokens); - ru_setting - .mut_w_r_u() - .mut_settings() - .set_fill_rate(write_tokens); group.set_r_u_settings(ru_setting); } else { let mut resource_setting = GroupRawResourceSettings::new(); @@ -357,7 +354,7 @@ pub(crate) mod tests { fn test_resource_group() { let resource_manager = ResourceGroupManager::default(); - let group1 = new_resource_group("TEST".into(), true, 100, 100); + let group1 = new_resource_group_ru("TEST".into(), 100); resource_manager.add_resource_group(group1); assert!(resource_manager.get_resource_group("test1").is_none()); @@ -367,7 +364,7 @@ pub(crate) mod tests { group .value() .get_r_u_settings() - .get_r_r_u() + .get_r_u() .get_settings() .get_fill_rate(), 100 @@ -375,14 +372,14 @@ pub(crate) mod tests { drop(group); assert_eq!(resource_manager.resource_groups.len(), 1); - let group1 = new_resource_group("Test".into(), true, 200, 100); + let group1 = new_resource_group_ru("Test".into(), 200); resource_manager.add_resource_group(group1); let group = resource_manager.get_resource_group("test").unwrap(); assert_eq!( group .value() .get_r_u_settings() - .get_r_r_u() + .get_r_u() .get_settings() .get_fill_rate(), 200 @@ -390,7 +387,7 @@ pub(crate) mod tests { drop(group); assert_eq!(resource_manager.resource_groups.len(), 1); - let group2 = new_resource_group("test2".into(), true, 400, 200); + let group2 = new_resource_group_ru("test2".into(), 400); resource_manager.add_resource_group(group2); assert_eq!(resource_manager.resource_groups.len(), 2); @@ -451,7 +448,7 @@ pub(crate) mod tests { drop(group2); // test add 1 new resource group - let new_group = new_resource_group("new_group".into(), true, 500, 500); + let new_group = new_resource_group_ru("new_group".into(), 500); resource_manager.add_resource_group(new_group); assert_eq!(resource_ctl.resource_consumptions.len(), 4); @@ -466,29 +463,29 @@ pub(crate) mod tests { let resource_ctl = resource_manager.derive_controller("test_read".into(), true); let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); - let group1 = new_resource_group("test1".into(), true, 5000, 1000); + let group1 = new_resource_group_ru("test1".into(), 5000); resource_manager.add_resource_group(group1); assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); assert_eq!( resource_ctl_write.resource_group("test1".as_bytes()).weight, - 100 + 20 ); // add a resource group with big ru - let group1 = new_resource_group("test2".into(), true, 50000, 2000); + let group1 = new_resource_group_ru("test2".into(), 50000); resource_manager.add_resource_group(group1); assert_eq!(*resource_ctl.max_ru_quota.lock().unwrap(), 50000); assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 100); assert_eq!(resource_ctl.resource_group("test2".as_bytes()).weight, 10); // resource_ctl_write should be unchanged. - assert_eq!(*resource_ctl_write.max_ru_quota.lock().unwrap(), 10000); + assert_eq!(*resource_ctl_write.max_ru_quota.lock().unwrap(), 50000); assert_eq!( resource_ctl_write.resource_group("test1".as_bytes()).weight, 100 ); assert_eq!( resource_ctl_write.resource_group("test2".as_bytes()).weight, - 50 + 10 ); } } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index ea9a9d724b9..2381b168987 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -125,7 +125,7 @@ pub mod tests { use test_pd::{mocker::Service, util::*, Server as MockServer}; use tikv_util::{config::ReadableDuration, worker::Builder}; - use crate::resource_group::tests::new_resource_group; + use crate::resource_group::tests::{new_resource_group, new_resource_group_ru}; fn new_test_server_and_client( update_interval: ReadableDuration, @@ -202,12 +202,12 @@ pub mod tests { s_clone.watch_resource_groups().await; }); // Mock add - let group1 = new_resource_group("TEST1".into(), true, 100, 100); + let group1 = new_resource_group_ru("TEST1".into(), 100); add_resource_group(s.pd_client.clone(), group1); - let group2 = new_resource_group("TEST2".into(), true, 100, 100); + let group2 = new_resource_group_ru("TEST2".into(), 100); add_resource_group(s.pd_client.clone(), group2); // Mock modify - let group2 = new_resource_group("TEST2".into(), true, 50, 50); + let group2 = new_resource_group_ru("TEST2".into(), 50); add_resource_group(s.pd_client.clone(), group2); let (res, revision) = block_on(s.list_resource_groups()); assert_eq!(res.len(), 2); @@ -227,7 +227,7 @@ pub mod tests { group .value() .get_r_u_settings() - .get_r_r_u() + .get_r_u() .get_settings() .get_fill_rate(), 50 @@ -247,7 +247,7 @@ pub mod tests { s_clone.watch_resource_groups().await; }); // Mock add - let group1 = new_resource_group("TEST1".into(), true, 100, 100); + let group1 = new_resource_group_ru("TEST1".into(), 100); add_resource_group(s.pd_client.clone(), group1); // Mock reboot watch server let watch_global_config_fp = "watch_global_config_return"; @@ -255,7 +255,7 @@ pub mod tests { std::thread::sleep(Duration::from_millis(100)); fail::remove(watch_global_config_fp); // Mock add after rebooting will success - let group1 = new_resource_group("TEST2".into(), true, 100, 100); + let group1 = new_resource_group_ru("TEST2".into(), 100); add_resource_group(s.pd_client.clone(), group1); // Wait watcher update std::thread::sleep(Duration::from_secs(1)); From d1d29203e6a93b05dd435ea27a9b39fb30b23f41 Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 2 Feb 2023 16:33:56 +0800 Subject: [PATCH 0480/1149] pd_client: fix item value type (#14106) close tikv/tikv#14104 We need to use the new field to support item value as bytes to avoid proto string check failures. Signed-off-by: husharp --- Cargo.lock | 3 +-- components/resource_control/Cargo.toml | 1 - .../resource_control/src/resource_group.rs | 1 - components/resource_control/src/service.rs | 27 +++++++++++++------ components/test_pd/src/mocker/mod.rs | 5 ++-- components/test_pd/src/server.rs | 2 +- .../failpoints/cases/test_pd_client_legacy.rs | 12 ++++++--- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1747e74fafa..78c9e88b538 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2730,7 +2730,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#a7c51106dfe70ebf59221018b50d1ec6ad25da74" +source = "git+https://github.com/pingcap/kvproto.git#2b853bed812556901846f42820b63d8a0d9c8d24" dependencies = [ "futures 0.3.15", "grpcio", @@ -4697,7 +4697,6 @@ dependencies = [ "test_pd", "test_pd_client", "tikv_util", - "tokio", "yatp", ] diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 3f796627040..39d37ac0f6b 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -26,5 +26,4 @@ slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global test_pd = { workspace = true } test_pd_client = { workspace = true } tikv_util = { workspace = true } -tokio = { version = "1.5", features = ["time"] } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index a0abfb11464..c5112c13516 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -358,7 +358,6 @@ pub(crate) mod tests { resource_manager.add_resource_group(group1); assert!(resource_manager.get_resource_group("test1").is_none()); - let group = resource_manager.get_resource_group("test").unwrap(); assert_eq!( group diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 2381b168987..fc24af4fdc4 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -2,10 +2,10 @@ use std::{sync::Arc, time::Duration}; -use futures::StreamExt; +use futures::{compat::Future01CompatExt, StreamExt}; use kvproto::{pdpb::EventType, resource_manager::ResourceGroup}; use pd_client::{Error as PdError, PdClient, RpcClient, RESOURCE_CONTROL_CONFIG_PATH}; -use tikv_util::error; +use tikv_util::{error, timer::GLOBAL_TIMER_HANDLE}; use crate::ResourceGroupManager; @@ -31,6 +31,8 @@ impl ResourceManagerService { } } +const RETRY_INTERVAL: Duration = Duration::from_secs(1); // to consistent with pd_client + impl ResourceManagerService { pub async fn watch_resource_groups(&mut self) { // Firstly, load all resource groups as of now. @@ -56,7 +58,7 @@ impl ResourceManagerService { EventType::Put => { if let Ok(group) = protobuf::parse_from_bytes::( - item.get_value().as_bytes(), + item.get_payload(), ) { self.manager.add_resource_group(group); @@ -69,7 +71,10 @@ impl ResourceManagerService { } Err(err) => { error!("failed to get stream"; "err" => ?err); - tokio::time::sleep(Duration::from_secs(1)).await; + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; } } } @@ -85,7 +90,10 @@ impl ResourceManagerService { } Err(err) => { error!("failed to watch resource groups"; "err" => ?err); - tokio::time::sleep(Duration::from_secs(1)).await; + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; } } } @@ -101,13 +109,16 @@ impl ResourceManagerService { Ok((items, revision)) => { let groups = items .into_iter() - .filter_map(|g| protobuf::parse_from_bytes(g.get_value().as_bytes()).ok()) + .filter_map(|g| protobuf::parse_from_bytes(g.get_payload()).ok()) .collect(); return (groups, revision); } Err(err) => { error!("failed to load global config"; "err" => ?err); - tokio::time::sleep(Duration::from_secs(1)).await; + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; } } } @@ -142,7 +153,7 @@ pub mod tests { item.set_name(group.get_name().to_string()); let mut buf = Vec::new(); group.write_to_vec(&mut buf).unwrap(); - item.set_value(String::from_utf8(buf).unwrap()); + item.set_payload(buf); futures::executor::block_on(async move { pd_client diff --git a/components/test_pd/src/mocker/mod.rs b/components/test_pd/src/mocker/mod.rs index b9ae839b06e..fc257b12a9f 100644 --- a/components/test_pd/src/mocker/mod.rs +++ b/components/test_pd/src/mocker/mod.rs @@ -47,7 +47,7 @@ pub trait PdMocker { .map(|kv| { let mut item = GlobalConfigItem::default(); item.set_name(String::from_utf8(kv.key().to_vec()).unwrap()); - item.set_value(String::from_utf8(kv.value().to_vec()).unwrap()); + item.set_payload(kv.value().into()); item }) .collect(); @@ -68,7 +68,8 @@ pub trait PdMocker { block_on(async move { match item.get_kind() { EventType::Put => { - let kv = KeyValue(MetaKey(item.get_name().into()), item.get_value().into()); + let kv = + KeyValue(MetaKey(item.get_name().into()), item.get_payload().into()); cli.lock().await.set(kv).await } EventType::Delete => { diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index cb495307a1f..28d4077b674 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -242,7 +242,7 @@ impl Pd for PdMock { KvEventType::Delete => EventType::Delete, }); change.set_name(from_utf8(event.pair.key()).unwrap().to_string()); - change.set_value(from_utf8(event.pair.value()).unwrap().to_string()); + change.set_payload(event.pair.value().into()); let mut wc = WatchGlobalConfigResponse::default(); wc.set_changes(vec![change].into()); let _ = sink.send((wc, WriteFlags::default())).await; diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index 3638e448bd9..d6cf7f1817d 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + str::from_utf8, sync::{mpsc, Arc}, thread, time::Duration, @@ -118,7 +119,7 @@ fn test_load_global_config() { .map(|(name, value)| { let mut item = GlobalConfigItem::default(); item.set_name(name.to_string()); - item.set_value(value.to_string()); + item.set_payload(value.as_bytes().into()); item }) .collect::>(), @@ -132,7 +133,7 @@ fn test_load_global_config() { assert!( res.iter() .zip(check_items) - .all(|(item1, item2)| item1.name == item2.0 && item1.value == item2.1) + .all(|(item1, item2)| item1.name == item2.0 && item1.payload == item2.1.as_bytes()) ); assert_eq!(revision, 3); } @@ -156,7 +157,10 @@ fn test_watch_global_config_on_closed_server() { Ok(r) => { for item in r.get_changes() { assert_eq!(item.get_name(), items_clone[i].0); - assert_eq!(item.get_value(), items_clone[i].1); + assert_eq!( + from_utf8(item.get_payload()).unwrap(), + items_clone[i].1 + ); i += 1; } } @@ -181,7 +185,7 @@ fn test_watch_global_config_on_closed_server() { .map(|(name, value)| { let mut item = GlobalConfigItem::default(); item.set_name(name.to_string()); - item.set_value(value.to_string()); + item.set_payload(value.as_bytes().into()); item }) .collect::>(), From 37915609defa68c174e7659f99108a0982662989 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 3 Feb 2023 13:57:56 +0800 Subject: [PATCH 0481/1149] raftstore-v2: add tablet logger and update dep (#14129) ref tikv/tikv#12842 - Update raft-engine to fix data corruption during restart - Add tablet logger so we can know which tablet the logs belongs to Signed-off-by: Jay Lee --- Cargo.lock | 209 +++++++++--------- cmd/tikv-ctl/src/main.rs | 3 +- components/engine_rocks/src/logger.rs | 24 ++ components/raft_log_engine/src/engine.rs | 11 +- .../raftstore-v2/src/operation/ready/mod.rs | 12 +- src/config/mod.rs | 15 +- src/server/engine_factory.rs | 13 +- tests/integrations/storage/test_titan.rs | 5 +- 8 files changed, 167 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78c9e88b538..633194d9323 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -225,7 +225,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d962799a5863fdf06fbf594e04102130582d010379137e9a98a7e2e693a5885" dependencies = [ "error-code", - "libc 0.2.132", + "libc 0.2.139", "wasm-bindgen", "winapi 0.3.9", ] @@ -256,7 +256,7 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -447,7 +447,7 @@ dependencies = [ "addr2line", "cc", "cfg-if 1.0.0", - "libc 0.2.132", + "libc 0.2.139", "miniz_oxide 0.4.4", "object", "rustc-demangle", @@ -603,7 +603,7 @@ dependencies = [ "bcc-sys", "bitflags", "byteorder", - "libc 0.2.132", + "libc 0.2.139", "regex", "thiserror", ] @@ -735,7 +735,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" dependencies = [ "cc", - "libc 0.2.132", + "libc 0.2.139", "pkg-config", ] @@ -761,7 +761,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7f788eaf239475a3c1e1acf89951255a46c4b9b46cf3e866fc4d0707b4b9e36" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "valgrind_request", ] @@ -934,7 +934,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1" dependencies = [ "glob", - "libc 0.2.132", + "libc 0.2.139", "libloading", ] @@ -1018,7 +1018,7 @@ dependencies = [ "byteorder", "bytes", "error_code", - "libc 0.2.132", + "libc 0.2.139", "panic_hook", "protobuf", "rand 0.8.5", @@ -1077,7 +1077,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" dependencies = [ "core-foundation-sys", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -1092,7 +1092,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -1150,7 +1150,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63aaaf47e457badbcb376c65a49d0f182c317ebd97dc6d1ced94c8e1d09c0f3a" dependencies = [ "criterion", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -1217,7 +1217,7 @@ dependencies = [ "cfg-if 1.0.0", "crossbeam-utils 0.8.8", "lazy_static", - "memoffset", + "memoffset 0.6.4", "scopeguard", ] @@ -1229,7 +1229,7 @@ dependencies = [ "autocfg", "cfg-if 1.0.0", "crossbeam-utils 0.8.11", - "memoffset", + "memoffset 0.6.4", "once_cell", "scopeguard", ] @@ -1420,7 +1420,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "redox_users", "winapi 0.3.9", ] @@ -1681,7 +1681,7 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5115567ac25674e0043e472be13d14e537f37ea8aa4bdc4aef0c89add1db1ff" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "str-buf", ] @@ -1789,7 +1789,7 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "libloading", "matches", "nix 0.24.1", @@ -1845,7 +1845,7 @@ dependencies = [ "crossbeam-utils 0.8.8", "fs2", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "maligned", "online_config", "openssl", @@ -1870,7 +1870,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed3d8a5e20435ff00469e51a0d82049bae66504b5c429920dadf9bb54d47b3f" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "thiserror", "winapi 0.3.9", ] @@ -1882,7 +1882,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.132", + "libc 0.2.139", "redox_syscall 0.2.11", "winapi 0.3.9", ] @@ -1895,7 +1895,7 @@ checksum = "d691fdb3f817632d259d09220d4cf0991dbb2c9e59e044a02a59194bf6e14484" dependencies = [ "cc", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -1923,7 +1923,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2adaffba6388640136149e18ed080b77a78611c1e1d6de75aedcdf78df5d4682" dependencies = [ "crc32fast", - "libc 0.2.132", + "libc 0.2.139", "libz-sys", "miniz_oxide 0.3.7", ] @@ -1964,7 +1964,7 @@ name = "fs2" version = "0.4.3" source = "git+https://github.com/tabokie/fs2-rs?branch=tikv#cd503764a19a99d74c1ab424dd13d6bcd093fcae" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -1990,7 +1990,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f41b048a94555da0f42f1d632e2e19510084fb8e303b0daa2816e733fb3644a0" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -2226,7 +2226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.132", + "libc 0.2.139", "wasi 0.7.0", ] @@ -2238,7 +2238,7 @@ checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if 1.0.0", "js-sys", - "libc 0.2.132", + "libc 0.2.139", "wasi 0.10.2+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2287,7 +2287,7 @@ dependencies = [ "futures-executor", "futures-util", "grpcio-sys", - "libc 0.2.132", + "libc 0.2.139", "log", "parking_lot 0.11.1", "protobuf", @@ -2324,7 +2324,7 @@ dependencies = [ "bindgen 0.59.2", "cc", "cmake", - "libc 0.2.132", + "libc 0.2.139", "libz-sys", "openssl-sys", "pkg-config", @@ -2392,7 +2392,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307c3c9f937f38e3534b1d6447ecf090cafcc9744e4a6360e8b037b2cf5af120" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -2600,7 +2600,7 @@ checksum = "4816c66d2c8ae673df83366c18341538f234a26d65a9ecea5c348b453ac1d02f" dependencies = [ "bitflags", "inotify-sys", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -2609,7 +2609,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -2636,7 +2636,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -2682,7 +2682,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b1d42ef453b30b7387e113da1c83ab1605d90c5b4e0eb8e96d016ed3b8c160" dependencies = [ "getrandom 0.1.12", - "libc 0.2.132", + "libc 0.2.139", "log", ] @@ -2823,9 +2823,9 @@ checksum = "e32a70cf75e5846d53a673923498228bbec6a8624708a9ea5645f075d6276122" [[package]] name = "libc" -version = "0.2.132" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libfuzzer-sys" @@ -2865,7 +2865,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.132", + "libc 0.2.139", "libtitan_sys", "libz-sys", "lz4-sys", @@ -2883,7 +2883,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.132", + "libc 0.2.139", "libz-sys", "lz4-sys", "snappy-sys", @@ -2897,7 +2897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66" dependencies = [ "cc", - "libc 0.2.132", + "libc 0.2.139", "pkg-config", "vcpkg", ] @@ -2953,7 +2953,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" dependencies = [ "cc", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -3008,7 +3008,7 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -3017,7 +3017,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -3027,7 +3027,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -3039,6 +3039,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "memory_trace_macros" version = "0.1.0" @@ -3098,7 +3107,7 @@ dependencies = [ "fuchsia-zircon-sys", "iovec", "kernel32-sys", - "libc 0.2.132", + "libc 0.2.139", "log", "miow", "net2", @@ -3112,7 +3121,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "log", "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.42.0", @@ -3158,7 +3167,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1587ebb20a5b04738f16cffa7e2526f1b8496b84f92920facd518362ff1559eb" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -3209,7 +3218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" dependencies = [ "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "log", "openssl", "openssl-probe", @@ -3227,7 +3236,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -3239,22 +3248,22 @@ checksum = "8f17df307904acd05aa8e32e97bb20f2a0df1728bbc2d771ae8f9a90463441e9" dependencies = [ "bitflags", "cfg-if 1.0.0", - "libc 0.2.132", - "memoffset", + "libc 0.2.139", + "memoffset 0.6.4", ] [[package]] name = "nix" -version = "0.25.0" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e322c04a9e3440c327fca7b6c8a63e6890a32fa2ad689db972425f07e0d22abb" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "autocfg", "bitflags", "cfg-if 1.0.0", - "libc 0.2.132", - "memoffset", + "libc 0.2.139", + "memoffset 0.7.1", "pin-utils", + "static_assertions", ] [[package]] @@ -3311,7 +3320,7 @@ dependencies = [ "fsevent", "fsevent-sys", "inotify", - "libc 0.2.132", + "libc 0.2.139", "mio 0.6.23", "mio-extras", "walkdir", @@ -3464,7 +3473,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -3542,7 +3551,7 @@ dependencies = [ "bitflags", "cfg-if 1.0.0", "foreign-types", - "libc 0.2.132", + "libc 0.2.139", "once_cell", "openssl-macros", "openssl-sys", @@ -3582,7 +3591,7 @@ checksum = "e5f9bd0c2710541a3cda73d6f9ac4f1b240de4ae261065d309dbe73d9dceb42f" dependencies = [ "autocfg", "cc", - "libc 0.2.132", + "libc 0.2.139", "openssl-src", "pkg-config", "vcpkg", @@ -3612,7 +3621,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -3649,7 +3658,7 @@ checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ "cfg-if 1.0.0", "instant", - "libc 0.2.132", + "libc 0.2.139", "redox_syscall 0.2.11", "smallvec", "winapi 0.3.9", @@ -3662,7 +3671,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.132", + "libc 0.2.139", "redox_syscall 0.2.11", "smallvec", "windows-sys 0.32.0", @@ -3739,7 +3748,7 @@ checksum = "b8f94885300e262ef461aa9fd1afbf7df3caf9e84e271a74925d1c6c8b24830f" dependencies = [ "bitflags", "byteorder", - "libc 0.2.132", + "libc 0.2.139", "mmap", "nom 4.2.3", "phf", @@ -3882,7 +3891,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d27361d7578b410d0eb5fe815c2b2105b01ab770a7c738cb9a231457a809fcc7" dependencies = [ "ipnetwork", - "libc 0.2.132", + "libc 0.2.139", "pnet_base", "pnet_sys", "winapi 0.2.8", @@ -3894,7 +3903,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82f881a6d75ac98c5541db6144682d1773bb14c6fc50c6ebac7086c8f7f23c29" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.2.8", "ws2_32-sys", ] @@ -3909,7 +3918,7 @@ dependencies = [ "cfg-if 1.0.0", "findshlibs", "inferno", - "libc 0.2.132", + "libc 0.2.139", "log", "nix 0.24.1", "once_cell", @@ -3993,7 +4002,7 @@ dependencies = [ "byteorder", "hex 0.4.2", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -4002,7 +4011,7 @@ version = "0.4.2" source = "git+https://github.com/tikv/procinfo-rs?rev=6599eb9dca74229b2c1fcc44118bef7eff127128#6599eb9dca74229b2c1fcc44118bef7eff127128" dependencies = [ "byteorder", - "libc 0.2.132", + "libc 0.2.139", "nom 2.2.1", "rustc_version 0.2.3", ] @@ -4027,7 +4036,7 @@ dependencies = [ "cfg-if 1.0.0", "fnv", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "memchr", "parking_lot 0.11.1", "protobuf", @@ -4192,7 +4201,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#82f6da7b8dff1856483e8e72a59dda903fb2499b" +source = "git+https://github.com/tikv/raft-engine.git#33530112c3a4acaf8c50ca9d0470284109926296" dependencies = [ "byteorder", "crc32fast", @@ -4203,11 +4212,11 @@ dependencies = [ "hex 0.4.2", "if_chain", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "log", "lz4-sys", "memmap2", - "nix 0.25.0", + "nix 0.26.2", "num-derive", "num-traits", "parking_lot 0.12.1", @@ -4226,7 +4235,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#82f6da7b8dff1856483e8e72a59dda903fb2499b" +source = "git+https://github.com/tikv/raft-engine.git#33530112c3a4acaf8c50ca9d0470284109926296" dependencies = [ "clap 3.1.6", "env_logger", @@ -4390,7 +4399,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" dependencies = [ "fuchsia-cprng", - "libc 0.2.132", + "libc 0.2.139", "rand_core 0.3.1", "rdrand", "winapi 0.3.9", @@ -4403,7 +4412,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ "getrandom 0.1.12", - "libc 0.2.132", + "libc 0.2.139", "rand_chacha 0.2.1", "rand_core 0.5.1", "rand_hc", @@ -4415,7 +4424,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "rand_chacha 0.3.0", "rand_core 0.6.2", ] @@ -4710,7 +4719,7 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "log", "online_config", "pdqselect", @@ -4773,7 +4782,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b72b84d47e8ec5a4f2872e8262b8f8256c5be1c938a7d6d3a867a3ba8f722f74" dependencies = [ "cc", - "libc 0.2.132", + "libc 0.2.139", "once_cell", "spin", "untrusted", @@ -4786,7 +4795,7 @@ name = "rocksdb" version = "0.3.0" source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "librocksdb_sys", ] @@ -5034,7 +5043,7 @@ dependencies = [ "bitflags", "core-foundation", "core-foundation-sys", - "libc 0.2.132", + "libc 0.2.139", "security-framework-sys", ] @@ -5045,7 +5054,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3676258fd3cfe2c9a0ec99ce3038798d847ce3e4bb17746373eb9f0f1ac16339" dependencies = [ "core-foundation-sys", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -5241,7 +5250,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.132", + "libc 0.2.139", "log", "log_wrappers", "pd_client", @@ -5302,7 +5311,7 @@ version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "signal-hook-registry", ] @@ -5312,7 +5321,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -5445,7 +5454,7 @@ version = "0.1.0" source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", - "libc 0.2.132", + "libc 0.2.139", "pkg-config", ] @@ -5473,7 +5482,7 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "winapi 0.3.9", ] @@ -5683,7 +5692,7 @@ checksum = "ade661fa5e048ada64ad7901713301c21d2dbc5b65ee7967de8826c111452960" dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", - "libc 0.2.132", + "libc 0.2.139", "ntapi", "once_cell", "rayon", @@ -5766,7 +5775,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.132", + "libc 0.2.139", "rand 0.8.5", "redox_syscall 0.2.11", "remove_dir_all", @@ -6009,7 +6018,7 @@ dependencies = [ "hyper", "keys", "kvproto", - "libc 0.2.132", + "libc 0.2.139", "log_wrappers", "more-asserts", "online_config", @@ -6310,7 +6319,7 @@ dependencies = [ "keys", "kvproto", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "libloading", "log", "log_wrappers", @@ -6410,7 +6419,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.132", + "libc 0.2.139", "log", "log_wrappers", "pd_client", @@ -6445,7 +6454,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "paste", "tikv-jemalloc-sys", ] @@ -6458,7 +6467,7 @@ checksum = "aeab4310214fe0226df8bfeb893a291a58b19682e8a07e1e1d4483ad4200d315" dependencies = [ "cc", "fs_extra", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -6467,7 +6476,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "tikv-jemalloc-sys", ] @@ -6490,7 +6499,7 @@ version = "0.1.0" dependencies = [ "fxhash", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "mimalloc", "snmalloc-rs", "tcmalloc", @@ -6559,7 +6568,7 @@ dependencies = [ "http", "kvproto", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", "log", "log_wrappers", "mnt", @@ -6608,7 +6617,7 @@ version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "redox_syscall 0.1.56", "winapi 0.3.9", ] @@ -6651,7 +6660,7 @@ checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" dependencies = [ "autocfg", "bytes", - "libc 0.2.132", + "libc 0.2.139", "memchr", "mio 0.8.5", "num_cpus", @@ -7037,7 +7046,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "055058552ca15c566082fc61da433ae678f78986a6f16957e33162d1b218792a" dependencies = [ "kernel32-sys", - "libc 0.2.132", + "libc 0.2.139", "winapi 0.2.8", ] @@ -7222,7 +7231,7 @@ checksum = "2a5a7e487e921cf220206864a94a89b6c6905bfc19f1057fa26a4cb360e5c1d2" dependencies = [ "either", "lazy_static", - "libc 0.2.132", + "libc 0.2.139", ] [[package]] @@ -7461,7 +7470,7 @@ version = "5.0.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" dependencies = [ - "libc 0.2.132", + "libc 0.2.139", "zstd-sys", ] @@ -7472,5 +7481,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", - "libc 0.2.132", + "libc 0.2.139", ] diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 30cd7035bef..e4c7be98dba 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -686,7 +686,8 @@ fn build_rocks_opts(cfg: &TikvConfig) -> engine_rocks::RocksDbOptions { .unwrap() .map(Arc::new); let env = get_env(key_manager, None /* io_rate_limiter */).unwrap(); - cfg.rocksdb.build_opt(&cfg.rocksdb.build_resources(env)) + let resource = cfg.rocksdb.build_resources(env); + cfg.rocksdb.build_opt(&resource, cfg.storage.engine) } fn run_ldb_command(args: Vec, cfg: &TikvConfig) { diff --git a/components/engine_rocks/src/logger.rs b/components/engine_rocks/src/logger.rs index b7b196448c5..85f4de713ac 100644 --- a/components/engine_rocks/src/logger.rs +++ b/components/engine_rocks/src/logger.rs @@ -20,6 +20,30 @@ impl Logger for RocksdbLogger { } } +pub struct TabletLogger { + tablet_name: String, +} + +impl TabletLogger { + pub fn new(tablet_name: String) -> Self { + Self { tablet_name } + } +} + +impl Logger for TabletLogger { + fn logv(&self, log_level: InfoLogLevel, log: &str) { + match log_level { + InfoLogLevel::Header => info!(#"rocksdb_log_header", "[{}]{}", self.tablet_name, log), + InfoLogLevel::Debug => debug!(#"rocksdb_log", "[{}]{}", self.tablet_name, log), + InfoLogLevel::Info => info!(#"rocksdb_log", "[{}]{}", self.tablet_name, log), + InfoLogLevel::Warn => warn!(#"rocksdb_log", "[{}]{}", self.tablet_name, log), + InfoLogLevel::Error => error!(#"rocksdb_log", "[{}]{}", self.tablet_name, log), + InfoLogLevel::Fatal => crit!(#"rocksdb_log", "[{}]{}", self.tablet_name, log), + _ => {} + } + } +} + #[derive(Default)] pub struct RaftDbLogger; diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 838fe461f4b..92d7a4f7353 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -472,18 +472,21 @@ impl RaftLogBatchTrait for RaftLogBatch { let key = encode_flushed_key(cf, tablet_index); let mut value = vec![0; 8]; NumberCodec::encode_u64(&mut value, apply_index); - self.0.put(raft_group_id, key.to_vec(), value); - Ok(()) + self.0 + .put(raft_group_id, key.to_vec(), value) + .map_err(transfer_error) } fn put_dirty_mark(&mut self, raft_group_id: u64, tablet_index: u64, dirty: bool) -> Result<()> { let key = encode_key(DIRTY_MARK_KEY, tablet_index); if dirty { - self.0.put(raft_group_id, key.to_vec(), vec![]); + self.0 + .put(raft_group_id, key.to_vec(), vec![]) + .map_err(transfer_error) } else { self.0.delete(raft_group_id, key.to_vec()); + Ok(()) } - Ok(()) } fn put_recover_state(&mut self, state: &StoreRecoverState) -> Result<()> { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index e7c32e742ec..7f656e29210 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -535,11 +535,13 @@ impl Peer { } if !self.serving() { self.start_destroy(ctx, &mut write_task); - ctx.coprocessor_host.on_region_changed( - self.region(), - RegionChangeEvent::Destroy, - self.raft_group().raft.state, - ); + if self.persisted_index() != 0 { + ctx.coprocessor_host.on_region_changed( + self.region(), + RegionChangeEvent::Destroy, + self.raft_group().raft.state, + ); + } } // Ready number should increase monotonically. assert!(self.async_writer.known_largest_number() < ready.number()); diff --git a/src/config/mod.rs b/src/config/mod.rs index 3274d5442df..38d69f1ab29 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1264,7 +1264,7 @@ impl DbConfig { } } - pub fn build_opt(&self, shared: &DbResources) -> RocksDbOptions { + pub fn build_opt(&self, shared: &DbResources, for_engine: EngineType) -> RocksDbOptions { let mut opts = RocksDbOptions::default(); opts.set_wal_recovery_mode(self.wal_recovery_mode); if !self.wal_dir.is_empty() { @@ -1306,7 +1306,9 @@ impl DbConfig { if let Some(b) = self.paranoid_checks { opts.set_paranoid_checks(b); } - opts.set_info_log(RocksdbLogger::default()); + if for_engine == EngineType::RaftKv { + opts.set_info_log(RocksdbLogger::default()); + } opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { opts.set_titandb_options(&self.titan.build_opts()); @@ -4424,9 +4426,10 @@ mod tests { fn test_rocks_rate_limit_zero() { let mut tikv_cfg = TikvConfig::default(); tikv_cfg.rocksdb.rate_bytes_per_sec = ReadableSize(0); + let resource = tikv_cfg.rocksdb.build_resources(Arc::new(Env::default())); tikv_cfg .rocksdb - .build_opt(&tikv_cfg.rocksdb.build_resources(Arc::new(Env::default()))); + .build_opt(&resource, tikv_cfg.storage.engine); } #[test] @@ -4587,12 +4590,10 @@ mod tests { Arc, ) { assert_eq!(F::TAG, cfg.storage.api_version()); + let resource = cfg.rocksdb.build_resources(Arc::default()); let engine = RocksDBEngine::new( &cfg.storage.data_dir, - Some( - cfg.rocksdb - .build_opt(&cfg.rocksdb.build_resources(Arc::new(Env::default()))), - ), + Some(cfg.rocksdb.build_opt(&resource, cfg.storage.engine)), cfg.rocksdb.build_cf_opts( &cfg.rocksdb .build_cf_resources(cfg.storage.block_cache.build_shared_cache()), diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 91b5178f8a0..ff06e41cc57 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -6,6 +6,7 @@ use engine_rocks::{ raw::{Cache, Env}, CompactedEventSender, CompactionListener, FlowListener, RocksCfOptions, RocksCompactionJobInfo, RocksDbOptions, RocksEngine, RocksEventListener, RocksPersistenceListener, RocksStatistics, + TabletLogger, }; use engine_traits::{ CompactionJobInfo, MiscExt, PersistenceListener, Result, StateStorage, TabletContext, @@ -134,12 +135,12 @@ impl KvEngineFactory { self.inner.db_resources.statistics.clone() } - fn db_opts(&self) -> RocksDbOptions { + fn db_opts(&self, for_engine: EngineType) -> RocksDbOptions { // Create kv engine. let mut db_opts = self .inner .rocksdb_config - .build_opt(&self.inner.db_resources); + .build_opt(&self.inner.db_resources, for_engine); if !self.inner.lite { db_opts.add_event_listener(RocksEventListener::new( "kv", @@ -170,7 +171,7 @@ impl KvEngineFactory { /// It will always create in path/DEFAULT_DB_SUB_DIR. pub fn create_shared_db(&self, path: impl AsRef) -> Result { let path = path.as_ref(); - let mut db_opts = self.db_opts(); + let mut db_opts = self.db_opts(EngineType::RaftKv); let cf_opts = self.cf_opts(EngineType::RaftKv); if let Some(listener) = &self.inner.flow_listener { db_opts.add_event_listener(listener.clone()); @@ -187,7 +188,9 @@ impl KvEngineFactory { impl TabletFactory for KvEngineFactory { fn open_tablet(&self, ctx: TabletContext, path: &Path) -> Result { - let mut db_opts = self.db_opts(); + let mut db_opts = self.db_opts(EngineType::RaftKv2); + let tablet_name = path.file_name().unwrap().to_str().unwrap().to_string(); + db_opts.set_info_log(TabletLogger::new(tablet_name)); let cf_opts = self.cf_opts(EngineType::RaftKv2); if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { db_opts.add_event_listener(listener.clone_with(ctx.id, suffix)); @@ -215,7 +218,7 @@ impl TabletFactory for KvEngineFactory { fn destroy_tablet(&self, ctx: TabletContext, path: &Path) -> Result<()> { info!("destroy tablet"; "path" => %path.display(), "id" => ctx.id, "suffix" => ?ctx.suffix); // Create kv engine. - let _db_opts = self.db_opts(); + let _db_opts = self.db_opts(EngineType::RaftKv2); let _cf_opts = self.cf_opts(EngineType::RaftKv2); // TODOTODO: call rust-rocks or tirocks to destroy_engine; // engine_rocks::util::destroy_engine( diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 452bcc89238..dc0a85bc9c2 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -159,9 +159,8 @@ fn test_delete_files_in_range_for_titan() { cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize(0); cfg.rocksdb.defaultcf.titan.discardable_ratio = 0.4; cfg.rocksdb.defaultcf.titan.min_blob_size = ReadableSize(0); - let kv_db_opts = cfg - .rocksdb - .build_opt(&cfg.rocksdb.build_resources(Default::default())); + let resource = cfg.rocksdb.build_resources(Default::default()); + let kv_db_opts = cfg.rocksdb.build_opt(&resource, cfg.storage.engine); let kv_cfs_opts = cfg.rocksdb.build_cf_opts( &cfg.rocksdb.build_cf_resources(cache), None, From c8c1ca8b8376d7f29c05cd1cf08b469ddbc4939c Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 3 Feb 2023 15:43:55 +0800 Subject: [PATCH 0482/1149] raftstore: Observe when receive raft message (#14043) ref tikv/tikv#13855 Introduce observers when receive raft message. Signed-off-by: CalvinNeo --- components/cdc/src/observer.rs | 2 ++ .../raftstore-v2/src/operation/ready/mod.rs | 1 + .../raftstore/src/coprocessor/dispatcher.rs | 35 +++++++++++++++++++ components/raftstore/src/coprocessor/mod.rs | 15 ++++++-- components/raftstore/src/store/fsm/peer.rs | 3 ++ components/raftstore/src/store/fsm/store.rs | 3 ++ components/raftstore/src/store/peer.rs | 1 + components/raftstore/src/store/snap.rs | 16 ++++++--- 8 files changed, 70 insertions(+), 6 deletions(-) diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index 696bc6341ee..aac2842e404 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -273,6 +273,7 @@ mod tests { prev_lead_transferee: raft::INVALID_ID, vote: raft::INVALID_ID, initialized: true, + peer_id: raft::INVALID_ID, }, ); match rx.recv_timeout(Duration::from_millis(10)).unwrap().unwrap() { @@ -301,6 +302,7 @@ mod tests { prev_lead_transferee: 3, vote: 3, initialized: true, + peer_id: raft::INVALID_ID, }, ); match rx.recv_timeout(Duration::from_millis(10)).unwrap().unwrap() { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 7f656e29210..03dce74d4e7 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -768,6 +768,7 @@ impl Peer { prev_lead_transferee: target, vote: self.raft_group().raft.vote, initialized: self.storage().is_initialized(), + peer_id: self.peer().get_id(), }, ); self.proposal_control_mut().maybe_update_term(term); diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 794a46b8e3a..0e45ef1d09d 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -8,6 +8,7 @@ use kvproto::{ metapb::{Region, RegionEpoch}, pdpb::CheckPolicy, raft_cmdpb::{ComputeHashRequest, RaftCmdRequest}, + raft_serverpb::RaftMessage, }; use protobuf::Message; use raft::eraftpb; @@ -278,6 +279,7 @@ impl_box_observer_g!( ConsistencyCheckObserver, WrappedConsistencyCheckObserver ); +impl_box_observer!(BoxMessageObserver, MessageObserver, WrappedMessageObserver); /// Registry contains all registered coprocessors. #[derive(Clone)] @@ -296,6 +298,7 @@ where read_index_observers: Vec>, pd_task_observers: Vec>, update_safe_ts_observers: Vec>, + message_observers: Vec>, // TODO: add endpoint } @@ -313,6 +316,7 @@ impl Default for Registry { read_index_observers: Default::default(), pd_task_observers: Default::default(), update_safe_ts_observers: Default::default(), + message_observers: Default::default(), } } } @@ -381,6 +385,10 @@ impl Registry { pub fn register_update_safe_ts_observer(&mut self, priority: u32, qo: BoxUpdateSafeTsObserver) { push!(priority, qo, self.update_safe_ts_observers); } + + pub fn register_message_observer(&mut self, priority: u32, qo: BoxMessageObserver) { + push!(priority, qo, self.message_observers); + } } /// A macro that loops over all observers and returns early when error is found @@ -780,6 +788,17 @@ impl CoprocessorHost { true } + /// Returns false if the message should not be stepped later. + pub fn on_raft_message(&self, msg: &RaftMessage) -> bool { + for observer in &self.registry.message_observers { + let observer = observer.observer.inner(); + if !observer.on_raft_message(msg) { + return false; + } + } + true + } + pub fn on_flush_applied_cmd_batch( &self, max_level: ObserveLevel, @@ -890,6 +909,7 @@ mod tests { OnUpdateSafeTs = 23, PrePersist = 24, PreWriteApplyState = 25, + OnRaftMessage = 26, } impl Coprocessor for TestCoprocessor {} @@ -1132,6 +1152,14 @@ mod tests { } } + impl MessageObserver for TestCoprocessor { + fn on_raft_message(&self, _: &RaftMessage) -> bool { + self.called + .fetch_add(ObserverIndex::OnRaftMessage as usize, Ordering::SeqCst); + true + } + } + macro_rules! assert_all { ($target:expr, $expect:expr) => {{ for (c, e) in ($target).iter().zip($expect) { @@ -1168,6 +1196,8 @@ mod tests { .register_cmd_observer(1, BoxCmdObserver::new(ob.clone())); host.registry .register_update_safe_ts_observer(1, BoxUpdateSafeTsObserver::new(ob.clone())); + host.registry + .register_message_observer(1, BoxMessageObserver::new(ob.clone())); let mut index: usize = 0; let region = Region::default(); @@ -1282,6 +1312,11 @@ mod tests { host.pre_write_apply_state(®ion); index += ObserverIndex::PreWriteApplyState as usize; assert_all!([&ob.called], &[index]); + + let msg = RaftMessage::default(); + host.on_raft_message(&msg); + index += ObserverIndex::OnRaftMessage as usize; + assert_all!([&ob.called], &[index]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 73110660856..98b045dbed8 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -26,14 +26,16 @@ mod metrics; pub mod region_info_accessor; mod split_check; pub mod split_observer; +use kvproto::raft_serverpb::RaftMessage; pub use self::{ config::{Config, ConsistencyCheckMethod}, consistency_check::{ConsistencyCheckObserver, Raw as RawConsistencyCheckObserver}, dispatcher::{ BoxAdminObserver, BoxApplySnapshotObserver, BoxCmdObserver, BoxConsistencyCheckObserver, - BoxPdTaskObserver, BoxQueryObserver, BoxRegionChangeObserver, BoxRoleObserver, - BoxSplitCheckObserver, BoxUpdateSafeTsObserver, CoprocessorHost, Registry, StoreHandle, + BoxMessageObserver, BoxPdTaskObserver, BoxQueryObserver, BoxRegionChangeObserver, + BoxRoleObserver, BoxSplitCheckObserver, BoxUpdateSafeTsObserver, CoprocessorHost, Registry, + StoreHandle, }, error::{Error, Result}, region_info_accessor::{ @@ -269,6 +271,7 @@ pub struct RoleChange { /// Which peer is voted by itself. pub vote: u64, pub initialized: bool, + pub peer_id: u64, } impl RoleChange { @@ -280,6 +283,7 @@ impl RoleChange { prev_lead_transferee: raft::INVALID_ID, vote: raft::INVALID_ID, initialized: true, + peer_id: raft::INVALID_ID, } } } @@ -334,6 +338,13 @@ pub trait RegionChangeObserver: Coprocessor { } } +pub trait MessageObserver: Coprocessor { + /// Returns false if the message should not be stepped later. + fn on_raft_message(&self, _: &RaftMessage) -> bool { + true + } +} + #[derive(Clone, Debug, Default)] pub struct Cmd { pub index: u64, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index a8232fd8322..75da7d497e4 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -610,6 +610,9 @@ where for m in msgs.drain(..) { match m { PeerMsg::RaftMessage(msg) => { + if !self.ctx.coprocessor_host.on_raft_message(&msg.msg) { + continue; + } if let Err(e) = self.on_raft_message(msg) { error!(%e; "handle raft message err"; diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 26f2983998d..85631bebe09 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -753,6 +753,9 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> match m { StoreMsg::Tick(tick) => self.on_tick(tick), StoreMsg::RaftMessage(msg) => { + if !self.ctx.coprocessor_host.on_raft_message(&msg.msg) { + continue; + } if let Err(e) = self.on_raft_message(msg) { if matches!(&e, Error::RegionNotRegistered { .. }) { // This may happen in normal cases when add-peer runs slowly diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 44701fbf705..a6010a6761f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2335,6 +2335,7 @@ where prev_lead_transferee: self.lead_transferee, vote: self.raft_group.raft.vote, initialized: self.is_initialized(), + peer_id: self.peer.get_id(), }, ); self.cmd_epoch_checker.maybe_update_term(self.term()); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index a9ef7df8c62..358ec716195 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -207,7 +207,9 @@ fn retry_delete_snapshot(mgr: &SnapManagerCore, key: &SnapKey, snap: &Snapshot) false } -fn gen_snapshot_meta(cf_files: &[CfFile], for_balance: bool) -> RaftStoreResult { +// Create a SnapshotMeta that can be later put into RaftSnapshotData or written +// into file. +pub fn gen_snapshot_meta(cf_files: &[CfFile], for_balance: bool) -> RaftStoreResult { let mut meta = Vec::with_capacity(cf_files.len()); for cf_file in cf_files { if !SNAPSHOT_CFS.iter().any(|cf| cf_file.cf == *cf) { @@ -663,7 +665,8 @@ impl Snapshot { Ok(snapshot_meta) } - fn set_snapshot_meta(&mut self, snapshot_meta: SnapshotMeta) -> RaftStoreResult<()> { + // Validate and set SnapshotMeta of this Snapshot. + pub fn set_snapshot_meta(&mut self, snapshot_meta: SnapshotMeta) -> RaftStoreResult<()> { let mut cf_file_count_from_meta: Vec = vec![]; let mut file_count = 0; let mut current_cf = ""; @@ -812,8 +815,9 @@ impl Snapshot { } } - // Only called in `do_build`. - fn save_meta_file(&mut self) -> RaftStoreResult<()> { + // Save `SnapshotMeta` to file. + // Used in `do_build` and by external crates. + pub fn save_meta_file(&mut self) -> RaftStoreResult<()> { let v = box_try!(self.meta_file.meta.as_ref().unwrap().write_to_bytes()); if let Some(mut f) = self.meta_file.file.take() { // `meta_file` could be None for this case: in `init_for_building` the snapshot @@ -1125,6 +1129,10 @@ impl Snapshot { file_system::metadata(&self.meta_file.path) } + pub fn meta_path(&self) -> &PathBuf { + &self.meta_file.path + } + pub fn total_size(&self) -> u64 { self.cf_files .iter() From 656c9831d5f3be206b06745f6e0fd9b51ccfcfad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 3 Feb 2023 15:59:56 +0800 Subject: [PATCH 0483/1149] log-backup: added check leader call before flushing (#14108) close tikv/tikv#14099 Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 17 +++++ components/backup-stream/src/metrics.rs | 5 ++ .../backup-stream/src/subscription_manager.rs | 15 ++++- .../backup-stream/src/subscription_track.rs | 23 +++++-- components/backup-stream/tests/mod.rs | 64 +++++++++++++++++++ components/server/src/server.rs | 8 +++ 6 files changed, 125 insertions(+), 7 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index c50c70a2eec..ff380551b90 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -8,6 +8,7 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use error_code::ErrorCodeExt; use futures::FutureExt; +use grpcio::Environment; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, @@ -17,7 +18,10 @@ use pd_client::PdClient; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, RegionInfoProvider}, router::RaftStoreRouter, + store::RegionReadProgressRegistry, }; +use resolved_ts::LeadershipResolver; +use security::SecurityManager; use tikv::config::BackupStreamConfig; use tikv_util::{ box_err, @@ -112,6 +116,10 @@ where router: RT, pd_client: Arc, concurrency_manager: ConcurrencyManager, + // Required by Leadership Resolver. + env: Arc, + region_read_progress: RegionReadProgressRegistry, + security_mgr: Arc, ) -> Self { crate::metrics::STREAM_ENABLED.inc(); let pool = create_tokio_runtime((config.num_threads / 2).max(1), "backup-stream") @@ -148,6 +156,14 @@ where let initial_scan_throughput_quota = Limiter::new(limit); info!("the endpoint of stream backup started"; "path" => %config.temp_path); let subs = SubscriptionTracer::default(); + let leadership_resolver = LeadershipResolver::new( + store_id, + Arc::clone(&pd_client) as _, + env, + security_mgr, + region_read_progress, + Duration::from_secs(60), + ); let (region_operator, op_loop) = RegionSubscriptionManager::start( InitialDataLoader::new( router.clone(), @@ -163,6 +179,7 @@ where meta_client.clone(), pd_client.clone(), ((config.num_threads + 1) / 2).max(1), + leadership_resolver, ); pool.spawn(op_loop); let mut checkpoint_mgr = CheckpointManager::default(); diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index c3f99b8617e..0805dae5f77 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -155,4 +155,9 @@ lazy_static! { &["stage"] ) .unwrap(); + pub static ref LOST_LEADER_REGION: IntCounter = register_int_counter!( + "tikv_log_backup_lost_leader_region", + "The regions that lost leadership during resolving" + ) + .unwrap(); } diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 91b4c096e7d..a31a43980b5 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -21,6 +21,7 @@ use raftstore::{ router::RaftStoreRouter, store::fsm::ChangeObserver, }; +use resolved_ts::LeadershipResolver; use tikv::storage::Statistics; use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; use tokio::sync::mpsc::{channel, Receiver, Sender}; @@ -351,6 +352,7 @@ where meta_cli: MetadataClient, pd_client: Arc, scan_pool_size: usize, + leader_checker: LeadershipResolver, ) -> (Self, future![()]) where E: KvEngine, @@ -370,7 +372,7 @@ where scan_pool_handle: Arc::new(scan_pool_handle), scans: CallbackWaitGroup::new(), }; - let fut = op.clone().region_operator_loop(rx); + let fut = op.clone().region_operator_loop(rx, leader_checker); (op, fut) } @@ -390,7 +392,11 @@ where } /// the handler loop. - async fn region_operator_loop(self, mut message_box: Receiver) { + async fn region_operator_loop( + self, + mut message_box: Receiver, + mut leader_checker: LeadershipResolver, + ) { while let Some(op) = message_box.recv().await { info!("backup stream: on_modify_observe"; "op" => ?op); match op { @@ -454,7 +460,10 @@ where warn!("waiting for initial scanning done timed out, forcing progress!"; "take" => ?now.saturating_elapsed(), "timedout" => %timedout); } - let cps = self.subs.resolve_with(min_ts); + let regions = leader_checker + .resolve(self.subs.current_regions(), min_ts) + .await; + let cps = self.subs.resolve_with(min_ts, regions); let min_region = cps.iter().min_by_key(|rs| rs.checkpoint); // If there isn't any region observed, the `min_ts` can be used as resolved ts // safely. diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index a24076661bb..1f823130d3b 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, time::Duration}; +use std::{collections::HashSet, sync::Arc, time::Duration}; use dashmap::{ mapref::{entry::Entry, one::RefMut}, @@ -149,12 +149,27 @@ impl SubscriptionTracer { } } + pub fn current_regions(&self) -> Vec { + self.0.iter().map(|s| *s.key()).collect() + } + /// try advance the resolved ts with the min ts of in-memory locks. /// returns the regions and theirs resolved ts. - pub fn resolve_with(&self, min_ts: TimeStamp) -> Vec { + pub fn resolve_with( + &self, + min_ts: TimeStamp, + regions: impl IntoIterator, + ) -> Vec { + let rs = regions.into_iter().collect::>(); self.0 .iter_mut() - // Don't advance the checkpoint ts of removed region. + .filter(|s| { + let contains = rs.contains(s.key()); + if !contains { + crate::metrics::LOST_LEADER_REGION.inc(); + } + contains + }) .map(|mut s| ResolveResult::resolve(s.value_mut(), min_ts)) .collect() } @@ -500,7 +515,7 @@ mod test { drop(region4_sub); let mut rs = subs - .resolve_with(TimeStamp::new(1000)) + .resolve_with(TimeStamp::new(1000), vec![1, 2, 3, 4]) .into_iter() .map(|r| (r.region, r.checkpoint, r.checkpoint_type)) .collect::>(); diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 7256cd62c03..b7afcd1441f 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -337,6 +337,13 @@ impl Suite { raft_router, cluster.pd_client.clone(), cm, + Arc::clone(&self.env), + cluster.store_metas[&id] + .lock() + .unwrap() + .region_read_progress + .clone(), + Arc::clone(&sim.security_mgr), ); worker.start(endpoint); } @@ -803,6 +810,7 @@ mod test { }; use futures::{Stream, StreamExt}; use pd_client::PdClient; + use test_raftstore::IsolationFilterFactory; use tikv_util::{box_err, defer, info, HandyRwLock}; use tokio::time::timeout; use txn_types::{Key, TimeStamp}; @@ -1231,6 +1239,17 @@ mod test { ); } + async fn collect_all_current( + mut s: impl Stream + Unpin, + max_gap: Duration, + ) -> Vec { + let mut r = vec![]; + while let Ok(Some(x)) = timeout(max_gap, s.next()).await { + r.push(x); + } + r + } + async fn collect_current(mut s: impl Stream + Unpin, goal: usize) -> Vec { let mut r = vec![]; while let Ok(Some(x)) = timeout(Duration::from_secs(10), s.next()).await { @@ -1286,4 +1305,49 @@ mod test { round1.union(&round2).map(|x| x.as_slice()), )); } + + #[test] + fn network_partition() { + let mut suite = super::SuiteBuilder::new_named("network_partition") + .nodes(3) + .build(); + let stream = suite.flush_stream(); + suite.must_register_task(1, "network_partition"); + let leader = suite.cluster.leader_of_region(1).unwrap(); + let round1 = run_async_test(suite.write_records(0, 64, 1)); + + suite + .cluster + .add_send_filter(IsolationFilterFactory::new(leader.store_id)); + suite.cluster.reset_leader_of_region(1); + suite + .cluster + .must_wait_for_leader_expire(leader.store_id, 1); + let leader2 = suite.cluster.leader_of_region(1).unwrap(); + assert_ne!(leader.store_id, leader2.store_id, "leader not switched."); + let ts = suite.tso(); + suite.must_kv_prewrite( + 1, + vec![mutation(make_record_key(1, 778), b"generator".to_vec())], + make_record_key(1, 778), + ts, + ); + suite.sync(); + suite.force_flush_files("network_partition"); + suite.wait_for_flush(); + + let cps = run_async_test(collect_all_current(stream, Duration::from_secs(2))); + assert!( + cps.iter() + .flat_map(|(_s, cp)| cp.events.iter().map(|resp| resp.checkpoint)) + .all(|cp| cp <= ts.into_inner()), + "ts={} cps={:?}", + ts, + cps + ); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.iter().map(|k| k.as_slice()), + )) + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 2a479964ced..3da6b0c4950 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1043,6 +1043,14 @@ where self.router.clone(), self.pd_client.clone(), self.concurrency_manager.clone(), + Arc::clone(&self.env), + engines + .store_meta + .lock() + .unwrap() + .region_read_progress + .clone(), + Arc::clone(&self.security_mgr), ); backup_stream_worker.start(backup_stream_endpoint); self.to_stop.push(backup_stream_worker); From 6daed4f45208a2818f09038279aa5ba1e0f0412e Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 3 Feb 2023 18:35:56 +0800 Subject: [PATCH 0484/1149] raftstore-v2: support tracing peer lifetime (#14056) ref tikv/tikv#12842, ref tikv/tikv#13818 In V1, a peer is responsible to destroy itself. The design is to make leader do less work and reduce writes. But from the practice of the pass years, not making it a strong guarantee actually makes the implementation complicated and hard to be correct and difficult to understand. In V2, we changes to make leader the very role to make sure all removed peers or merged peers must be destroyed in the end. Push mode is way easier to understand and implement correctly. The downside is extra writes are introduced but it's worthy. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 7 + components/raftstore-v2/src/fsm/peer.rs | 1 + components/raftstore-v2/src/fsm/store.rs | 9 +- .../operation/command/admin/conf_change.rs | 76 ++++- .../src/operation/command/admin/mod.rs | 7 +- .../raftstore-v2/src/operation/command/mod.rs | 21 +- components/raftstore-v2/src/operation/life.rs | 294 +++++++++++++++++- components/raftstore-v2/src/operation/mod.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 50 ++- .../src/operation/ready/snapshot.rs | 4 + components/raftstore-v2/src/raft/peer.rs | 43 ++- components/raftstore-v2/src/router/message.rs | 3 + .../tests/integrations/cluster.rs | 4 + .../tests/integrations/test_conf_change.rs | 38 ++- .../tests/integrations/test_life.rs | 214 ++++++++++++- .../raftstore/src/store/async_io/read.rs | 2 + components/raftstore/src/store/fsm/apply.rs | 2 +- components/raftstore/src/store/fsm/peer.rs | 4 +- 18 files changed, 732 insertions(+), 49 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1c7360a86bc..2a3cc63f797 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -75,6 +75,7 @@ pub struct StoreContext { pub schedulers: Schedulers, /// store meta pub store_meta: Arc>>, + pub shutdown: Arc, pub engine: ER, pub tablet_registry: TabletRegistry, pub apply_pool: FuturePool, @@ -108,6 +109,7 @@ impl StoreContext { self.cfg.report_region_buckets_tick_interval.0; self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = self.cfg.check_long_uncommitted_interval.0; + self.tick_batch[PeerTick::GcPeer as usize].wait_duration = Duration::from_secs(60); } } @@ -273,6 +275,7 @@ struct StorePollerBuilder { apply_pool: FuturePool, logger: Logger, store_meta: Arc>>, + shutdown: Arc, snap_mgr: TabletSnapManager, } @@ -287,6 +290,7 @@ impl StorePollerBuilder { schedulers: Schedulers, logger: Logger, store_meta: Arc>>, + shutdown: Arc, snap_mgr: TabletSnapManager, coprocessor_host: CoprocessorHost, ) -> Self { @@ -312,6 +316,7 @@ impl StorePollerBuilder { schedulers, store_meta, snap_mgr, + shutdown, coprocessor_host, } } @@ -418,6 +423,7 @@ where timer: SteadyTimer::default(), schedulers: self.schedulers.clone(), store_meta: self.store_meta.clone(), + shutdown: self.shutdown.clone(), engine: self.engine.clone(), tablet_registry: self.tablet_registry.clone(), apply_pool: self.apply_pool.clone(), @@ -613,6 +619,7 @@ impl StoreSystem { schedulers.clone(), self.logger.clone(), store_meta.clone(), + self.shutdown.clone(), snap_mgr, coprocessor_host, ); diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 26d5c2a1458..47d23a67d1d 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -225,6 +225,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } PeerTick::ReportBuckets => unimplemented!(), PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted(), + PeerTick::GcPeer => self.fsm.peer_mut().on_gc_peer_tick(self.store_ctx), } } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 17c0a9a50f9..fef433f04f5 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -12,7 +12,9 @@ use engine_traits::{KvEngine, RaftEngine}; use futures::{compat::Future01CompatExt, FutureExt}; use keys::{data_end_key, data_key}; use kvproto::metapb::Region; -use raftstore::store::{fsm::store::StoreRegionMeta, Config, RegionReadProgressRegistry}; +use raftstore::store::{ + fsm::store::StoreRegionMeta, Config, RegionReadProgressRegistry, Transport, +}; use slog::{info, o, Logger}; use tikv_util::{ future::poll_future_notify, @@ -255,7 +257,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { } } - pub fn handle_msgs(&mut self, store_msg_buf: &mut Vec) { + pub fn handle_msgs(&mut self, store_msg_buf: &mut Vec) + where + T: Transport, + { for msg in store_msg_buf.drain(..) { match msg { StoreMsg::Start => self.on_start(), diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 42c433584fe..1b8d29a7a54 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -49,6 +49,12 @@ pub struct ConfChangeResult { pub region_state: RegionLocalState, } +#[derive(Debug)] +pub struct UpdateGcPeersResult { + index: u64, + region_state: RegionLocalState, +} + impl Peer { #[inline] pub fn propose_conf_change( @@ -177,10 +183,13 @@ impl Peer { } } } - if has_new_peer.is_some() { - // Speed up snapshot instead of waiting another heartbeat. - self.raft_group_mut().ping(); - self.set_has_ready(); + if self.is_leader() { + if has_new_peer.is_some() { + // Speed up snapshot instead of waiting another heartbeat. + self.raft_group_mut().ping(); + self.set_has_ready(); + } + self.maybe_schedule_gc_peer_tick(); } } ctx.coprocessor_host.on_region_changed( @@ -199,6 +208,15 @@ impl Peer { self.set_has_extra_write(); } } + + pub fn on_apply_res_update_gc_peers(&mut self, result: UpdateGcPeersResult) { + let region_id = self.region_id(); + self.state_changes_mut() + .put_region_state(region_id, result.index, &result.region_state) + .unwrap(); + self.set_has_extra_write(); + self.storage_mut().set_region_state(result.region_state); + } } impl Apply { @@ -279,7 +297,28 @@ impl Apply { ); let my_id = self.peer().get_id(); let state = self.region_state_mut(); + let mut removed_records: Vec<_> = state.take_removed_records().into(); + for p0 in state.get_region().get_peers() { + // No matching store ID means the peer must be removed. + if new_region + .get_peers() + .iter() + .all(|p1| p1.get_store_id() != p0.get_store_id()) + { + removed_records.push(p0.clone()); + } + } + // If a peer is replaced in the same store, the leader will keep polling the + // new peer on the same store, which implies that the old peer must be + // tombstone in the end. + removed_records.retain(|p0| { + new_region + .get_peers() + .iter() + .all(|p1| p1.get_store_id() != p0.get_store_id()) + }); state.set_region(new_region.clone()); + state.set_removed_records(removed_records.into()); let new_peer = new_region .get_peers() .iter() @@ -534,4 +573,33 @@ impl Apply { .inc(); Ok(()) } + + pub fn apply_update_gc_peer( + &mut self, + log_index: u64, + admin_req: &AdminRequest, + ) -> (AdminResponse, AdminCmdResult) { + let mut removed_records: Vec<_> = self.region_state_mut().take_removed_records().into(); + let mut merged_records: Vec<_> = self.region_state_mut().take_merged_records().into(); + let updates = admin_req.get_update_gc_peers().get_peer_id(); + info!(self.logger, "update gc peer"; "index" => log_index, "updates" => ?updates, "gc_peers" => ?removed_records, "merged_peers" => ?merged_records); + removed_records.retain(|p| !updates.contains(&p.get_id())); + merged_records.retain_mut(|r| { + let mut sources: Vec<_> = r.take_source_peers().into(); + sources.retain(|p| !updates.contains(&p.get_id())); + r.set_source_peers(sources.into()); + !r.get_source_peers().is_empty() + }); + self.region_state_mut() + .set_removed_records(removed_records.into()); + self.region_state_mut() + .set_merged_records(merged_records.into()); + ( + AdminResponse::default(), + AdminCmdResult::UpdateGcPeers(UpdateGcPeersResult { + index: log_index, + region_state: self.region_state().clone(), + }), + ) + } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 52bc5329dd4..1546983645f 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -7,7 +7,7 @@ mod transfer_leader; pub use compact_log::CompactLogContext; use compact_log::CompactLogResult; -use conf_change::ConfChangeResult; +use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; use protobuf::Message; @@ -28,6 +28,7 @@ pub enum AdminCmdResult { ConfChange(ConfChangeResult), TransferLeader(u64), CompactLog(CompactLogResult), + UpdateGcPeers(UpdateGcPeersResult), } impl Peer { @@ -110,6 +111,10 @@ impl Peer { } } AdminCmdType::CompactLog => self.propose_compact_log(ctx, req), + AdminCmdType::UpdateGcPeer => { + let data = req.write_to_bytes().unwrap(); + self.propose(ctx, data) + } _ => unimplemented!(), } }; diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index edca9510c27..5434eca6b38 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -16,7 +16,7 @@ //! - Applied result are sent back to peer fsm, and update memory state in //! `on_apply_res`. -use std::{mem, time::Duration}; +use std::{mem, sync::atomic::Ordering, time::Duration}; use engine_traits::{KvEngine, PerfContext, RaftEngine, WriteBatch, WriteOptions}; use kvproto::raft_cmdpb::{ @@ -41,7 +41,9 @@ use raftstore::{ }; use slog::{info, warn}; use tikv_util::{ - box_err, slog_panic, + box_err, + log::SlogFormat, + slog_panic, time::{duration_to_sec, monotonic_raw_now, Instant}, }; @@ -107,7 +109,17 @@ impl Peer { #[inline] pub fn schedule_apply_fsm(&mut self, store_ctx: &mut StoreContext) { let region_state = self.storage().region_state().clone(); - let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); + let mailbox = match store_ctx.router.mailbox(self.region_id()) { + Some(m) => m, + None => { + assert!( + store_ctx.shutdown.load(Ordering::Relaxed), + "failed to load mailbox: {}", + SlogFormat(&self.logger) + ); + return; + } + }; let logger = self.logger.clone(); let read_scheduler = self.storage().read_scheduler(); let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( @@ -334,6 +346,7 @@ impl Peer { } AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(ctx, term), AdminCmdResult::CompactLog(res) => self.on_apply_res_compact_log(ctx, res), + AdminCmdResult::UpdateGcPeers(state) => self.on_apply_res_update_gc_peers(state), } } @@ -587,10 +600,10 @@ impl Apply { AdminCmdType::PrepareFlashback => unimplemented!(), AdminCmdType::FinishFlashback => unimplemented!(), AdminCmdType::BatchSwitchWitness => unimplemented!(), + AdminCmdType::UpdateGcPeer => self.apply_update_gc_peer(log_index, admin_req), AdminCmdType::InvalidAdmin => { return Err(box_err!("invalid admin command type")); } - AdminCmdType::UpdateGcPeer => unimplemented!(), }; match admin_result { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 3a9f678bd8c..a407f6bc8ef 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -9,15 +9,34 @@ //! In v1, it can also be created by split. In v2, it's required to create by //! sending a message to store fsm first, and then using split to initialized //! the peer. +//! +//! A peer can only be removed in a raft group by conf change or merge. When +//! applying conf change, removed peer is added to `removed_records`; when +//! applying merge, source peer is added to merged_records. Quorum must agree +//! on the removal, but the removed peer may not necessary be in the quorum. So +//! the peer may not really destroy itself until either: +//! - applying conf change remove; +//! - receiving a RaftMessage with `is_tombstone` set; +//! - receiving a RaftMessage targeting larger ID. +//! +//! Leader is responsible to keep polling all removed peers and guarantee they +//! are really destroyed. A peer is considered destroyed only when a tombstone +//! record with the same ID or larger ID is persisted. For `removed_records`, +//! leader only needs to send a message with `is_tombstone` set. For +//! `merged_records`, to avoid race between destroy and merge, leader needs to +//! ask target peer to destroy source peer. + +use std::{cmp, mem}; use batch_system::BasicMailbox; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ - metapb::Region, - raft_serverpb::{PeerState, RaftMessage}, + metapb::{self, Region}, + raft_cmdpb::{AdminCmdType, RaftCmdRequest}, + raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}, }; -use raftstore::store::{util, WriteTask}; +use raftstore::store::{util, Transport, WriteTask}; use slog::{debug, error, info, warn}; use tikv_util::store::find_peer; @@ -26,7 +45,7 @@ use crate::{ batch::StoreContext, fsm::{PeerFsm, Store}, raft::{Peer, Storage}, - router::PeerMsg, + router::{CmdResChannel, PeerMsg, PeerTick}, }; /// When a peer is about to destroy, it becomes `WaitReady` first. If there is @@ -87,6 +106,11 @@ impl DestroyProgress { } } +#[derive(Default)] +pub struct GcPeerContext { + confirmed_ids: Vec, +} + impl Store { /// The method is called during split. /// The creation process is: @@ -100,6 +124,7 @@ impl Store { ) where EK: KvEngine, ER: RaftEngine, + T: Transport, { let region_id = msg.region.id; let mut raft_msg = Box::::default(); @@ -137,10 +162,11 @@ impl Store { ) where EK: KvEngine, ER: RaftEngine, + T: Transport, { let region_id = msg.get_region_id(); // The message can be sent when the peer is being created, so try send it first. - let msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m))) = + let mut msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m))) = ctx.router.send(region_id, PeerMsg::RaftMessage(msg)) { m @@ -166,13 +192,12 @@ impl Store { ctx.raft_metrics.message_dropped.mismatch_region_epoch.inc(); return; } - // TODO: maybe we need to ack the message to confirm the peer is destroyed. - if msg.get_is_tombstone() || msg.has_merge_target() { + if msg.has_merge_target() { // Target tombstone peer doesn't exist, so ignore it. ctx.raft_metrics.message_dropped.stale_msg.inc(); return; } - let from_epoch = msg.get_region_epoch(); + let mut destroyed = false; let local_state = match ctx.engine.get_region_state(region_id, u64::MAX) { Ok(s) => s, Err(e) => { @@ -192,30 +217,51 @@ impl Store { // skip handling gc for simplicity. let local_epoch = local_state.get_region().get_region_epoch(); // The region in this peer is already destroyed - if util::is_epoch_stale(from_epoch, local_epoch) { - ctx.raft_metrics.message_dropped.region_tombstone_peer.inc(); + if util::is_epoch_stale(msg.get_region_epoch(), local_epoch) { + destroyed = true; + } + if !destroyed && let Some(local_peer) = find_peer(local_state.get_region(), self.store_id()) && to_peer.id <= local_peer.get_id() { + destroyed = true; + } + } + if destroyed { + if msg.get_is_tombstone() { + if let Some(msg) = build_peer_destroyed_report(&mut msg) { + let _ = ctx.trans.send(msg); + } return; } - if let Some(local_peer) = find_peer(local_state.get_region(), self.store_id()) { - if to_peer.id <= local_peer.get_id() { - ctx.raft_metrics.message_dropped.region_tombstone_peer.inc(); + if msg.has_extra_msg() { + let extra_msg = msg.get_extra_msg(); + if extra_msg.get_type() == ExtraMessageType::MsgGcPeerRequest + && extra_msg.has_check_gc_peer() + { + forward_destroy_source_peer(ctx, &msg); return; } } + ctx.raft_metrics.message_dropped.region_tombstone_peer.inc(); + return; } + // If it's not destroyed, and the message is a tombstone message, create the + // peer and destroy immediately to leave a tombstone record. // So the peer must need to be created. We don't need to synchronous with split // as split won't create peer in v2. And we don't check for range // conflict as v2 depends on tablet, which allows conflict ranges. let mut region = Region::default(); region.set_id(region_id); - region.set_region_epoch(from_epoch.clone()); + region.set_region_epoch(msg.get_region_epoch().clone()); // Peer list doesn't have to be complete, as it's uninitialized. // // If the id of the from_peer is INVALID_ID, this msg must be sent from parent // peer in the split execution in which case we do not add it into the region. - if from_peer.id != raft::INVALID_ID { + if from_peer.id != raft::INVALID_ID + // Check merge may be sent from different region + && (msg.get_extra_msg().get_type() != ExtraMessageType::MsgGcPeerRequest + || msg.get_extra_msg().get_check_gc_peer().get_from_region_id() == region_id) + { region.mut_peers().push(from_peer.clone()); } region.mut_peers().push(to_peer.clone()); @@ -260,7 +306,225 @@ impl Store { } } +/// Tell leader that `to_peer` from `tombstone_msg` is destroyed. +fn build_peer_destroyed_report(tombstone_msg: &mut RaftMessage) -> Option { + let to_region_id = if tombstone_msg.has_extra_msg() { + assert_eq!( + tombstone_msg.get_extra_msg().get_type(), + ExtraMessageType::MsgGcPeerRequest + ); + tombstone_msg + .get_extra_msg() + .get_check_gc_peer() + .get_from_region_id() + } else { + tombstone_msg.get_region_id() + }; + if to_region_id == 0 || tombstone_msg.get_from_peer().get_id() == 0 { + return None; + } + let mut msg = RaftMessage::default(); + msg.set_region_id(to_region_id); + msg.set_from_peer(tombstone_msg.take_to_peer()); + msg.set_to_peer(tombstone_msg.take_from_peer()); + msg.mut_extra_msg() + .set_type(ExtraMessageType::MsgGcPeerResponse); + Some(msg) +} + +/// Forward the destroy request from target peer to merged source peer. +fn forward_destroy_source_peer(ctx: &mut StoreContext, msg: &RaftMessage) +where + EK: KvEngine, + ER: RaftEngine, + T: Transport, +{ + let extra_msg = msg.get_extra_msg(); + // Instead of respond leader directly, send a message to target region to + // double check it's really destroyed. + let check_gc_peer = extra_msg.get_check_gc_peer(); + let mut tombstone_msg = Box::::default(); + tombstone_msg.set_region_id(check_gc_peer.get_check_region_id()); + tombstone_msg.set_from_peer(msg.get_from_peer().clone()); + tombstone_msg.set_to_peer(check_gc_peer.get_check_peer().clone()); + tombstone_msg.set_region_epoch(check_gc_peer.get_check_region_epoch().clone()); + tombstone_msg.set_is_tombstone(true); + // No need to set epoch as we don't know what it is. + tombstone_msg + .mut_extra_msg() + .set_type(ExtraMessageType::MsgGcPeerRequest); + tombstone_msg + .mut_extra_msg() + .mut_check_gc_peer() + .set_from_region_id(check_gc_peer.get_from_region_id()); + let _ = ctx.router.send_raft_message(tombstone_msg); +} + impl Peer { + pub fn maybe_schedule_gc_peer_tick(&mut self) { + let region_state = self.storage().region_state(); + if !region_state.get_removed_records().is_empty() + || !region_state.get_merged_records().is_empty() + { + self.add_pending_tick(PeerTick::GcPeer); + } + } + + /// Returns `true` means the sender will be gced. The message is stale. + pub fn maybe_gc_sender(&mut self, msg: &RaftMessage) -> bool { + let removed_peers = self.storage().region_state().get_removed_records(); + // Only removed_records can be determined directly. + if let Some(peer) = removed_peers + .iter() + .find(|p| p.id == msg.get_from_peer().get_id()) + { + let tombstone_msg = self.tombstone_message_for_same_region(peer.clone()); + self.add_message(tombstone_msg); + self.set_has_ready(); + true + } else { + false + } + } + + fn tombstone_message_for_same_region(&self, peer: metapb::Peer) -> RaftMessage { + let region_id = self.region_id(); + let mut tombstone_message = RaftMessage::default(); + tombstone_message.set_region_id(region_id); + tombstone_message.set_from_peer(self.peer().clone()); + tombstone_message.set_to_peer(peer); + tombstone_message.set_region_epoch(self.region().get_region_epoch().clone()); + tombstone_message.set_is_tombstone(true); + tombstone_message + } + + pub fn on_tombstone_message(&mut self, msg: &mut RaftMessage) { + match msg.get_to_peer().get_id().cmp(&self.peer_id()) { + cmp::Ordering::Less => { + if let Some(msg) = build_peer_destroyed_report(msg) { + self.add_message(msg); + self.set_has_ready(); + } + } + // No matter it's greater or equal, the current peer must be destroyed. + _ => { + self.mark_for_destroy(None); + } + } + } + + /// When leader tries to gc merged source peer, it will send a gc request to + /// target peer. If target peer makes sure the merged is finished, it + /// forward the message to source peer and let source peer send back a + /// response. + pub fn on_gc_peer_request( + &mut self, + ctx: &mut StoreContext, + msg: &RaftMessage, + ) { + let extra_msg = msg.get_extra_msg(); + if !extra_msg.has_check_gc_peer() || extra_msg.get_index() == 0 { + // Corrupted message. + return; + } + if self.storage().tablet_index() < extra_msg.get_index() { + // Merge not finish. + return; + } + + forward_destroy_source_peer(ctx, msg); + } + + /// A peer confirms it's destroyed. + pub fn on_gc_peer_response(&mut self, msg: &RaftMessage) { + let gc_peer_id = msg.get_from_peer().get_id(); + let state = self.storage().region_state(); + if state + .get_removed_records() + .iter() + .all(|p| p.get_id() != gc_peer_id) + && state.get_merged_records().iter().all(|p| { + p.get_source_peers() + .iter() + .all(|p| p.get_id() != gc_peer_id) + }) + { + return; + } + let ctx = self.gc_peer_context_mut(); + if ctx.confirmed_ids.contains(&gc_peer_id) { + return; + } + ctx.confirmed_ids.push(gc_peer_id); + } + + pub fn on_gc_peer_tick(&mut self, ctx: &mut StoreContext) { + if !self.is_leader() { + return; + } + let state = self.storage().region_state(); + if state.get_removed_records().is_empty() && state.get_merged_records().is_empty() { + return; + } + let mut need_gc_ids = Vec::with_capacity(5); + let gc_context = self.gc_peer_context(); + for peer in state.get_removed_records() { + need_gc_ids.push(peer.get_id()); + if gc_context.confirmed_ids.contains(&peer.get_id()) { + continue; + } + + let msg = self.tombstone_message_for_same_region(peer.clone()); + // For leader, it's OK to send gc message immediately. + let _ = ctx.trans.send(msg); + } + for record in state.get_merged_records() { + // For merge, we ask target to check whether source should be deleted. + for (source, target) in record + .get_source_peers() + .iter() + .zip(record.get_target_peers()) + { + need_gc_ids.push(source.get_id()); + if gc_context.confirmed_ids.contains(&source.get_id()) { + continue; + } + + let mut msg = RaftMessage::default(); + msg.set_region_id(record.get_target_region_id()); + msg.set_from_peer(self.peer().clone()); + msg.set_to_peer(target.clone()); + msg.set_region_epoch(record.get_target_epoch().clone()); + let extra_msg = msg.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgGcPeerRequest); + extra_msg.set_index(record.get_index()); + let check_peer = extra_msg.mut_check_gc_peer(); + check_peer.set_from_region_id(self.region_id()); + check_peer.set_check_region_id(record.get_source_region_id()); + check_peer.set_check_peer(source.clone()); + check_peer.set_check_region_epoch(record.get_source_epoch().clone()); + let _ = ctx.trans.send(msg); + } + } + let gc_ctx = self.gc_peer_context_mut(); + if !gc_ctx.confirmed_ids.is_empty() { + let mut confirmed_ids = mem::take(&mut gc_ctx.confirmed_ids); + confirmed_ids.retain(|id| need_gc_ids.contains(id)); + let mut req = RaftCmdRequest::default(); + let header = req.mut_header(); + header.set_region_id(self.region_id()); + header.set_peer(self.peer().clone()); + let admin = req.mut_admin_request(); + admin.set_cmd_type(AdminCmdType::UpdateGcPeer); + let gc_peer = admin.mut_update_gc_peers(); + gc_peer.set_peer_id(confirmed_ids); + let (ch, _) = CmdResChannel::pair(); + // It's OK to fail as we will retry by tick. + self.on_admin_command(ctx, req, ch); + } + self.maybe_schedule_gc_peer_tick(); + } + /// A peer can be destroyed in three cases: /// 1. Received a gc message; /// 2. Received a message whose target peer's ID is larger than this; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 76baf31f9c8..492595851e2 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -12,7 +12,7 @@ pub use command::{ RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, SPLIT_PREFIX, }; -pub use life::DestroyProgress; +pub use life::{DestroyProgress, GcPeerContext}; pub use ready::{ cf_offset, write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, SnapState, StateStorage, diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 03dce74d4e7..c77766f6ce5 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -25,7 +25,10 @@ use std::{cmp, time::Instant}; use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; -use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; +use kvproto::{ + raft_cmdpb::AdminCmdType, + raft_serverpb::{ExtraMessageType, RaftMessage}, +}; use protobuf::Message as _; use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; use raftstore::{ @@ -168,7 +171,7 @@ impl Peer { } } - pub fn on_raft_message( + pub fn on_raft_message( &mut self, ctx: &mut StoreContext, mut msg: Box, @@ -187,16 +190,34 @@ impl Peer { if !self.serving() { return; } + if util::is_vote_msg(msg.get_message()) && self.maybe_gc_sender(&msg) { + return; + } if msg.get_to_peer().get_store_id() != self.peer().get_store_id() { ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); return; } - if !msg.has_region_epoch() { - ctx.raft_metrics.message_dropped.mismatch_region_epoch.inc(); + if msg.get_is_tombstone() { + self.on_tombstone_message(&mut msg); return; } - if msg.get_is_tombstone() { - self.mark_for_destroy(None); + if msg.has_extra_msg() && msg.get_to_peer().get_id() == self.peer_id() { + // GcRequest/GcResponse may be sent from/to different regions, skip further + // checks. + match msg.get_extra_msg().get_type() { + ExtraMessageType::MsgGcPeerResponse => { + self.on_gc_peer_response(&msg); + return; + } + ExtraMessageType::MsgGcPeerRequest => { + self.on_gc_peer_request(ctx, &msg); + return; + } + _ => (), + } + } + if !msg.has_region_epoch() { + ctx.raft_metrics.message_dropped.mismatch_region_epoch.inc(); return; } if msg.has_merge_target() { @@ -221,7 +242,6 @@ impl Peer { } if msg.has_extra_msg() { unimplemented!(); - // return; } // TODO: drop all msg append when the peer is uninitialized and has conflict @@ -465,6 +485,7 @@ impl Peer { ctx.has_ready = true; if !has_extra_write + && !self.has_pending_messages() && !self.raft_group().has_ready() && (self.serving() || self.postponed_destroy()) { @@ -508,6 +529,11 @@ impl Peer { self.send_raft_message_on_leader(ctx, msg); } } + if self.has_pending_messages() { + for msg in self.take_pending_messages() { + self.send_raft_message_on_leader(ctx, msg); + } + } } self.apply_reads(ctx, &ready); @@ -533,6 +559,15 @@ impl Peer { .flat_map(|m| self.build_raft_message(m)) .collect(); } + if self.has_pending_messages() { + if write_task.messages.is_empty() { + write_task.messages = self.take_pending_messages(); + } else { + write_task + .messages + .append(&mut self.take_pending_messages()); + } + } if !self.serving() { self.start_destroy(ctx, &mut write_task); if self.persisted_index() != 0 { @@ -750,6 +785,7 @@ impl Peer { self.add_pending_tick(PeerTick::CompactLog); self.add_pending_tick(PeerTick::SplitRegionCheck); self.add_pending_tick(PeerTick::CheckLongUncommitted); + self.maybe_schedule_gc_peer_tick(); } StateRole::Follower => { self.leader_lease_mut().expire(); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 1fae813577c..adf20bfce37 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -546,6 +546,8 @@ impl Storage { let mut snap_data = RaftSnapshotData::default(); snap_data.merge_from_bytes(snap.get_data())?; let region = snap_data.take_region(); + let removed_records = snap_data.take_removed_records(); + let merged_records = snap_data.take_merged_records(); if region.get_id() != region_id { return Err(box_err!( "mismatch region id {}!={}", @@ -586,6 +588,8 @@ impl Storage { let region_state = self.region_state_mut(); region_state.set_state(PeerState::Normal); region_state.set_region(region); + region_state.set_removed_records(removed_records); + region_state.set_merged_records(merged_records); region_state.set_tablet_index(last_index); let entry_storage = self.entry_storage_mut(); entry_storage.raft_state_mut().set_last_index(last_index); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 6cfcda4da25..814dc72e622 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -10,7 +10,10 @@ use collections::{HashMap, HashSet}; use engine_traits::{ CachedTablet, FlushState, KvEngine, RaftEngine, TabletContext, TabletRegistry, }; -use kvproto::{metapb, pdpb, raft_serverpb::RegionLocalState}; +use kvproto::{ + metapb, pdpb, + raft_serverpb::{RaftMessage, RegionLocalState}, +}; use pd_client::BucketStat; use raft::{RawNode, StateRole}; use raftstore::{ @@ -28,8 +31,8 @@ use super::storage::Storage; use crate::{ fsm::ApplyScheduler, operation::{ - AsyncWriter, CompactLogContext, DestroyProgress, ProposalControl, SimpleWriteReqEncoder, - SplitFlowControl, TxnContext, + AsyncWriter, CompactLogContext, DestroyProgress, GcPeerContext, ProposalControl, + SimpleWriteReqEncoder, SplitFlowControl, TxnContext, }, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -103,6 +106,12 @@ pub struct Peer { leader_transferee: u64, long_uncommitted_threshold: u64, + + /// Pending messages to be sent on handle ready. We should avoid sending + /// messages immediately otherwise it may break the persistence assumption. + pending_messages: Vec, + + gc_peer_context: GcPeerContext, } impl Peer { @@ -182,6 +191,8 @@ impl Peer { cfg.long_uncommitted_base_threshold.0.as_secs(), 1, ), + pending_messages: vec![], + gc_peer_context: GcPeerContext::default(), }; // If this region has only one peer and I am the one, campaign directly. @@ -624,6 +635,7 @@ impl Peer { #[inline] pub fn add_pending_tick(&mut self, tick: PeerTick) { + // Msg per batch is 4096/256 by default, the buffer won't grow too large. self.pending_ticks.push(tick); } @@ -755,4 +767,29 @@ impl Peer { pub fn set_long_uncommitted_threshold(&mut self, dur: Duration) { self.long_uncommitted_threshold = cmp::max(dur.as_secs(), 1); } + + #[inline] + pub fn add_message(&mut self, msg: RaftMessage) { + self.pending_messages.push(msg); + } + + #[inline] + pub fn has_pending_messages(&mut self) -> bool { + !self.pending_messages.is_empty() + } + + #[inline] + pub fn take_pending_messages(&mut self) -> Vec { + mem::take(&mut self.pending_messages) + } + + #[inline] + pub fn gc_peer_context(&self) -> &GcPeerContext { + &self.gc_peer_context + } + + #[inline] + pub fn gc_peer_context_mut(&mut self) -> &mut GcPeerContext { + &mut self.gc_peer_context + } } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 8814a97cc5f..a14c9ba9866 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -33,6 +33,7 @@ pub enum PeerTick { ReactivateMemoryLock = 8, ReportBuckets = 9, CheckLongUncommitted = 10, + GcPeer = 11, } impl PeerTick { @@ -52,6 +53,7 @@ impl PeerTick { PeerTick::ReactivateMemoryLock => "reactivate_memory_lock", PeerTick::ReportBuckets => "report_buckets", PeerTick::CheckLongUncommitted => "check_long_uncommitted", + PeerTick::GcPeer => "gc_peer", } } @@ -68,6 +70,7 @@ impl PeerTick { PeerTick::ReactivateMemoryLock, PeerTick::ReportBuckets, PeerTick::CheckLongUncommitted, + PeerTick::GcPeer, ]; TICKS } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 2076272b44b..ac3f30c7107 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -525,6 +525,10 @@ impl Cluster { &self.nodes[offset] } + pub fn receiver(&self, offset: usize) -> &Receiver { + &self.receivers[offset] + } + /// Send messages and wait for side effects are all handled. #[allow(clippy::vec_box)] pub fn dispatch(&self, region_id: u64, mut msgs: Vec>) { diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 4b3445a00ad..7fa75a5a281 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -38,7 +38,7 @@ fn test_simple_change() { let match_index = meta.raft_apply.applied_index; assert_eq!(meta.region_state.epoch.version, epoch.get_version()); assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); - assert_eq!(meta.region_state.peers, vec![leader_peer, new_peer]); + assert_eq!(meta.region_state.peers, vec![leader_peer, new_peer.clone()]); // So heartbeat will create a learner. cluster.dispatch(2, vec![]); @@ -96,6 +96,42 @@ fn test_simple_change() { assert_eq!(meta.region_state.epoch.version, epoch.get_version()); assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); assert_eq!(meta.region_state.peers, vec![leader_peer]); + cluster.routers[0].wait_flush(region_id, Duration::from_millis(300)); + let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert!( + region_state.get_removed_records().contains(&new_peer), + "{:?}", + region_state + ); + + // If adding a peer on the same store, removed_records should be cleaned. + req.mut_header() + .mut_region_epoch() + .set_conf_ver(new_conf_ver); + req.mut_admin_request() + .mut_change_peer() + .set_change_type(ConfChangeType::AddLearnerNode); + req.mut_admin_request() + .mut_change_peer() + .mut_peer() + .set_id(11); + let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + cluster.routers[0].wait_flush(region_id, Duration::from_millis(300)); + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert!( + region_state.get_removed_records().is_empty(), + "{:?}", + region_state + ); + // TODO: check if the peer is removed once life trace is implemented or // snapshot is implemented. diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index a2ae0bbb9f8..2a5dfafc509 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -7,14 +7,19 @@ use std::{ }; use crossbeam::channel::TrySendError; -use engine_traits::{RaftEngine, RaftEngineReadOnly}; +use engine_traits::{RaftEngine, RaftEngineReadOnly, CF_DEFAULT}; use futures::executor::block_on; use kvproto::{ metapb, - raft_serverpb::{PeerState, RaftMessage}, + raft_cmdpb::AdminCmdType, + raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}, }; -use raftstore_v2::router::{DebugInfoChannel, PeerMsg}; -use tikv_util::store::new_peer; +use raft::prelude::{ConfChangeType, MessageType}; +use raftstore_v2::{ + router::{DebugInfoChannel, PeerMsg, PeerTick}, + SimpleWriteEncoder, +}; +use tikv_util::store::{new_learner_peer, new_peer}; use crate::cluster::{Cluster, TestRouter}; @@ -62,6 +67,23 @@ fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb ); } +#[track_caller] +fn assert_valid_report(report: &RaftMessage, region_id: u64, peer_id: u64) { + assert_eq!( + report.get_extra_msg().get_type(), + ExtraMessageType::MsgGcPeerResponse + ); + assert_eq!(report.get_region_id(), region_id); + assert_eq!(report.get_from_peer().get_id(), peer_id); +} + +#[track_caller] +fn assert_tombstone_msg(msg: &RaftMessage, region_id: u64, peer_id: u64) { + assert_eq!(msg.get_region_id(), region_id); + assert_eq!(msg.get_to_peer().get_id(), peer_id); + assert!(msg.get_is_tombstone()); +} + /// Test a peer can be created by general raft message and destroyed tombstone /// message. #[test] @@ -99,9 +121,6 @@ fn test_life_by_message() { msg.take_region_epoch(); }); - // Check tombstone. - assert_wrong(&|msg| msg.set_is_tombstone(true)); - // Correct message will create a peer, but the peer will not be initialized. router.send_raft_message(msg.clone()).unwrap(); let timeout = Duration::from_secs(3); @@ -156,11 +175,20 @@ fn test_destroy_by_larger_id() { msg.mut_region_epoch().set_conf_ver(1); msg.set_from_peer(new_peer(2, 8)); let raft_message = msg.mut_message(); - raft_message.set_msg_type(raft::prelude::MessageType::MsgHeartbeat); + raft_message.set_msg_type(MessageType::MsgHeartbeat); raft_message.set_from(6); raft_message.set_term(init_term); // Create the peer. router.send_raft_message(msg.clone()).unwrap(); + // There must be heartbeat response. + let hb = cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap(); + assert_eq!( + hb.get_message().get_msg_type(), + MessageType::MsgHeartbeatResponse + ); let timeout = Duration::from_secs(3); let meta = router @@ -178,6 +206,20 @@ fn test_destroy_by_larger_id() { .unwrap(); assert_eq!(meta.raft_status.id, test_peer_id); assert_eq!(meta.raft_status.hard_state.term, init_term); + cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap_err(); + + // Smaller ID tombstone message should trigger report. + let mut smaller_id_tombstone_msg = smaller_id_msg.clone(); + smaller_id_tombstone_msg.set_is_tombstone(true); + router.send_raft_message(smaller_id_tombstone_msg).unwrap(); + let report = cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap(); + assert_valid_report(&report, test_region_id, test_peer_id - 1); // Larger ID should trigger destroy. let mut larger_id_msg = smaller_id_msg; @@ -199,3 +241,159 @@ fn test_destroy_by_larger_id() { assert_eq!(meta.raft_status.id, test_peer_id + 1); assert_eq!(meta.raft_status.hard_state.term, init_term + 1); } + +#[test] +fn test_gc_peer_request() { + let cluster = Cluster::default(); + let router = &cluster.routers[0]; + let test_region_id = 4; + let test_peer_id = 5; + let test_leader_id = 6; + + let mut msg = Box::::default(); + msg.set_region_id(test_region_id); + msg.set_to_peer(new_peer(1, test_peer_id)); + msg.mut_region_epoch().set_conf_ver(1); + msg.set_from_peer(new_peer(2, test_leader_id)); + let raft_message = msg.mut_message(); + raft_message.set_msg_type(raft::prelude::MessageType::MsgHeartbeat); + raft_message.set_from(6); + raft_message.set_term(5); + + // Tombstone message should create the peer and then destroy it. + let mut tombstone_msg = msg.clone(); + tombstone_msg.set_is_tombstone(true); + router.send_raft_message(tombstone_msg.clone()).unwrap(); + cluster.routers[0].wait_flush(test_region_id, Duration::from_millis(300)); + assert_peer_not_exist(test_region_id, test_peer_id, router); + // Resend a normal message will not create the peer. + router.send_raft_message(msg).unwrap(); + assert_peer_not_exist(test_region_id, test_peer_id, router); + cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap_err(); + // Resend tombstone message should trigger report. + router.send_raft_message(tombstone_msg).unwrap(); + assert_peer_not_exist(test_region_id, test_peer_id, router); + let report = cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap(); + assert_valid_report(&report, test_region_id, test_peer_id); +} + +#[test] +fn test_gc_peer_response() { + let cluster = Cluster::with_node_count(2, None); + let region_id = 2; + let mut req = cluster.routers[0].new_request_for(region_id); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::ChangePeer); + admin_req + .mut_change_peer() + .set_change_type(ConfChangeType::AddLearnerNode); + let store_id = cluster.node(1).id(); + let new_peer = new_learner_peer(store_id, 10); + admin_req.mut_change_peer().set_peer(new_peer.clone()); + let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert!(region_state.get_removed_records().is_empty()); + + let new_conf_ver = req.get_header().get_region_epoch().get_conf_ver() + 1; + req.mut_header() + .mut_region_epoch() + .set_conf_ver(new_conf_ver); + req.mut_admin_request() + .mut_change_peer() + .set_change_type(ConfChangeType::RemoveNode); + let resp = cluster.routers[0] + .admin_command(region_id, req.clone()) + .unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + cluster.routers[0].wait_flush(region_id, Duration::from_millis(300)); + // Drain all existing messages. + while cluster.receiver(0).try_recv().is_ok() {} + + let mut msg = Box::::default(); + msg.set_region_id(region_id); + msg.set_to_peer(req.get_header().get_peer().clone()); + msg.set_from_peer(new_peer); + let receiver = &cluster.receiver(0); + for ty in &[MessageType::MsgRequestVote, MessageType::MsgRequestPreVote] { + msg.mut_message().set_msg_type(*ty); + cluster.routers[0].send_raft_message(msg.clone()).unwrap(); + let tombstone_msg = match receiver.recv_timeout(Duration::from_millis(300)) { + Ok(msg) => msg, + Err(e) => panic!("failed to receive tombstone message {:?}: {:?}", ty, e), + }; + assert_tombstone_msg(&tombstone_msg, region_id, 10); + } + // Non-vote message should not trigger tombstone. + msg.mut_message().set_msg_type(MessageType::MsgHeartbeat); + cluster.routers[0].send_raft_message(msg).unwrap(); + cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap_err(); + + // GcTick should also trigger tombstone. + cluster.routers[0] + .send(region_id, PeerMsg::Tick(PeerTick::GcPeer)) + .unwrap(); + let tombstone_msg = cluster + .receiver(0) + .recv_timeout(Duration::from_millis(300)) + .unwrap(); + assert_tombstone_msg(&tombstone_msg, region_id, 10); + + // First message to create the peer and destroy. + cluster.routers[1] + .send_raft_message(Box::new(tombstone_msg.clone())) + .unwrap(); + cluster.routers[1].wait_flush(region_id, Duration::from_millis(300)); + cluster + .receiver(1) + .recv_timeout(Duration::from_millis(300)) + .unwrap_err(); + // Send message should trigger tombstone report. + cluster.routers[1] + .send_raft_message(Box::new(tombstone_msg)) + .unwrap(); + let report = cluster + .receiver(1) + .recv_timeout(Duration::from_millis(300)) + .unwrap(); + assert_valid_report(&report, region_id, 10); + cluster.routers[0] + .send_raft_message(Box::new(report)) + .unwrap(); + let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_removed_records().len(), 1); + // Tick should flush records gc. + cluster.routers[0] + .send(region_id, PeerMsg::Tick(PeerTick::GcPeer)) + .unwrap(); + // Trigger a write to make sure records gc is finished. + let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); + let (msg, sub) = PeerMsg::simple_write(header, put.encode()); + cluster.routers[0].send(region_id, msg).unwrap(); + block_on(sub.result()).unwrap(); + cluster.routers[0].wait_flush(region_id, Duration::from_millis(300)); + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert!(region_state.get_removed_records().is_empty()); +} diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 45492feb294..5b53ad499b5 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -218,6 +218,8 @@ where snap_data.set_region(region_state.get_region().clone()); snap_data.set_version(TABLET_SNAPSHOT_VERSION); snap_data.mut_meta().set_for_balance(for_balance); + snap_data.set_removed_records(region_state.get_removed_records().into()); + snap_data.set_merged_records(region_state.get_merged_records().into()); snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); // create checkpointer. diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index bb262b9ffa8..1853d200140 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1695,9 +1695,9 @@ where AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { self.exec_flashback(ctx, request) } + AdminCmdType::UpdateGcPeer => Err(box_err!("v2 only command and it's safe to skip")), AdminCmdType::BatchSwitchWitness => self.exec_batch_switch_witness(ctx, request), AdminCmdType::InvalidAdmin => Err(box_err!("unsupported admin command type")), - AdminCmdType::UpdateGcPeer => unimplemented!(), }?; response.set_cmd_type(cmd_type); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 75da7d497e4..d5b73e5f721 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2790,8 +2790,8 @@ where ExtraMessageType::MsgVoterReplicatedIndexResponse => { self.on_voter_replicated_index_response(msg.get_extra_msg()); } - ExtraMessageType::MsgGcPeerRequest => unimplemented!(), - ExtraMessageType::MsgGcPeerResponse => unimplemented!(), + // It's v2 only message and ignore does no harm. + ExtraMessageType::MsgGcPeerRequest | ExtraMessageType::MsgGcPeerResponse => (), } } From 44a586f9083a83fa7f083acddc1dfd336ba7d264 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 3 Feb 2023 19:01:56 +0800 Subject: [PATCH 0485/1149] raftstore: support priority scheduling for async write (#14103) ref tikv/tikv#13730 Support priority-based scheduling for the async write. Each channel of async write worker is replaced with a priority-based channel when the config `resource-control.enabled` is true. Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 + components/batch-system/src/batch.rs | 25 +- components/batch-system/src/channel.rs | 252 ------------------ components/batch-system/src/fsm.rs | 8 +- components/batch-system/src/lib.rs | 4 +- components/batch-system/src/scheduler.rs | 105 ++++++++ components/batch-system/src/test_runner.rs | 3 +- components/raftstore-v2/src/batch/store.rs | 2 +- components/raftstore-v2/src/router/message.rs | 2 +- .../raftstore/src/store/async_io/write.rs | 49 +++- .../src/store/async_io/write_router.rs | 106 +++++--- .../src/store/async_io/write_tests.rs | 160 +++++++++-- components/raftstore/src/store/fsm/apply.rs | 5 +- components/raftstore/src/store/fsm/store.rs | 21 +- components/raftstore/src/store/msg.rs | 2 +- .../src/store/worker/refresh_config.rs | 2 +- components/resource_control/Cargo.toml | 2 + components/resource_control/src/channel.rs | 183 +++++++++++++ components/resource_control/src/lib.rs | 3 + 19 files changed, 580 insertions(+), 356 deletions(-) delete mode 100644 components/batch-system/src/channel.rs create mode 100644 components/batch-system/src/scheduler.rs create mode 100644 components/resource_control/src/channel.rs diff --git a/Cargo.lock b/Cargo.lock index 633194d9323..21145778082 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4689,6 +4689,8 @@ name = "resource_control" version = "0.0.1" dependencies = [ "byteorder", + "collections", + "crossbeam", "crossbeam-skiplist", "dashmap", "fail", diff --git a/components/batch-system/src/batch.rs b/components/batch-system/src/batch.rs index 48ef809d421..19005ef2c43 100644 --- a/components/batch-system/src/batch.rs +++ b/components/batch-system/src/batch.rs @@ -17,18 +17,21 @@ use std::{ use fail::fail_point; use file_system::{set_io_type, IoType}; -use resource_control::ResourceController; +use resource_control::{ + channel::{unbounded, Receiver, Sender}, + ResourceController, +}; use tikv_util::{ debug, error, info, mpsc, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, time::Instant, }; use crate::{ - channel::{fsm_channel, ControlScheduler, FsmReceiver, FsmSender, NormalScheduler}, config::Config, fsm::{Fsm, FsmScheduler, Priority}, mailbox::BasicMailbox, router::Router, + scheduler::{ControlScheduler, NormalScheduler}, }; /// A unify type for FSMs so that they can be sent to channel easily. @@ -288,7 +291,7 @@ pub trait PollHandler: Send + 'static { /// Internal poller that fetches batch and call handler hooks for readiness. pub struct Poller { pub router: Router, ControlScheduler>, - pub fsm_receiver: FsmReceiver, + pub fsm_receiver: Receiver>, pub handler: Handler, pub max_batch_size: usize, pub reschedule_duration: Duration, @@ -481,8 +484,8 @@ pub trait HandlerBuilder { pub struct BatchSystem { name_prefix: Option, router: BatchRouter, - receiver: FsmReceiver, - low_receiver: FsmReceiver, + receiver: Receiver>, + low_receiver: Receiver>, pool_size: usize, max_batch_size: usize, workers: Arc>>>, @@ -599,8 +602,8 @@ where struct PoolStateBuilder { max_batch_size: usize, reschedule_duration: Duration, - fsm_receiver: FsmReceiver, - fsm_sender: FsmSender, + fsm_receiver: Receiver>, + fsm_sender: Sender>, pool_size: usize, } @@ -633,8 +636,8 @@ impl PoolStateBuilder { pub struct PoolState> { pub name_prefix: String, pub handler_builder: H, - pub fsm_receiver: FsmReceiver, - pub fsm_sender: FsmSender, + pub fsm_receiver: Receiver>, + pub fsm_sender: Sender>, pub low_priority_pool_size: usize, pub expected_pool_size: usize, pub workers: Arc>>>, @@ -658,8 +661,8 @@ pub fn create_system( ) -> (BatchRouter, BatchSystem) { let state_cnt = Arc::new(AtomicUsize::new(0)); let control_box = BasicMailbox::new(sender, controller, state_cnt.clone()); - let (sender, receiver) = fsm_channel(resource_ctl); - let (low_sender, low_receiver) = fsm_channel(None); // no resource control for low fsm + let (sender, receiver) = unbounded(resource_ctl); + let (low_sender, low_receiver) = unbounded(None); // no resource control for low fsm let normal_scheduler = NormalScheduler { sender: sender.clone(), low_sender, diff --git a/components/batch-system/src/channel.rs b/components/batch-system/src/channel.rs deleted file mode 100644 index 094b6a7a2ae..00000000000 --- a/components/batch-system/src/channel.rs +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{cell::RefCell, sync::Arc}; - -use crossbeam::channel::{self, RecvError, SendError, TryRecvError, TrySendError}; -use kvproto::kvrpcpb::CommandPri; -use resource_control::{ResourceConsumeType, ResourceController}; -use tikv_util::{mpsc::priority_queue, warn}; - -use crate::{ - fsm::{Fsm, FsmScheduler, Priority, ResourceMetered}, - FsmTypes, -}; - -pub fn fsm_channel( - resource_ctl: Option>, -) -> (FsmSender, FsmReceiver) { - if let Some(ctl) = resource_ctl { - let (tx, rx) = priority_queue::unbounded(); - ( - FsmSender::Priority { - resource_ctl: ctl, - sender: tx, - last_msg_group: RefCell::new(String::new()), - }, - FsmReceiver::Priority(rx), - ) - } else { - let (tx, rx) = channel::unbounded(); - (FsmSender::Vanilla(tx), FsmReceiver::Vanilla(rx)) - } -} - -pub struct NormalScheduler { - pub(crate) sender: FsmSender, - pub(crate) low_sender: FsmSender, -} - -impl Clone for NormalScheduler -where - N: Fsm, - C: Fsm, -{ - fn clone(&self) -> Self { - NormalScheduler { - sender: self.sender.clone(), - low_sender: self.low_sender.clone(), - } - } -} - -impl FsmScheduler for NormalScheduler -where - N: Fsm, - C: Fsm, -{ - type Fsm = N; - - fn consume_msg_resource(&self, msg: &::Message) { - self.sender.consume_msg_resource(msg); - } - - #[inline] - fn schedule(&self, fsm: Box) { - let sender = match fsm.get_priority() { - Priority::Normal => &self.sender, - Priority::Low => &self.low_sender, - }; - - match sender.send(FsmTypes::Normal(fsm)) { - Ok(()) => {} - Err(SendError(FsmTypes::Normal(fsm))) => warn!("failed to schedule fsm {:p}", fsm), - _ => unreachable!(), - } - } - - fn shutdown(&self) { - // TODO: close it explicitly once it's supported. - // Magic number, actually any number greater than poll pool size works. - for _ in 0..256 { - let _ = self.sender.send(FsmTypes::Empty); - let _ = self.low_sender.send(FsmTypes::Empty); - } - } -} - -pub struct ControlScheduler { - pub(crate) sender: FsmSender, -} - -impl Clone for ControlScheduler -where - N: Fsm, - C: Fsm, -{ - fn clone(&self) -> Self { - ControlScheduler { - sender: self.sender.clone(), - } - } -} - -impl FsmScheduler for ControlScheduler -where - N: Fsm, - C: Fsm, -{ - type Fsm = C; - - fn consume_msg_resource(&self, _msg: &::Message) {} - - #[inline] - fn schedule(&self, fsm: Box) { - match self.sender.send(FsmTypes::Control(fsm)) { - Ok(()) => {} - Err(SendError(FsmTypes::Control(fsm))) => warn!("failed to schedule fsm {:p}", fsm), - _ => unreachable!(), - } - } - - fn shutdown(&self) { - // TODO: close it explicitly once it's supported. - // Magic number, actually any number greater than poll pool size works. - for _ in 0..256 { - let _ = self.sender.send(FsmTypes::Empty); - } - } -} - -pub enum FsmSender { - Vanilla(channel::Sender>), - Priority { - resource_ctl: Arc, - sender: priority_queue::Sender>, - last_msg_group: RefCell, - }, -} - -impl Clone for FsmSender -where - N: Fsm, - C: Fsm, -{ - fn clone(&self) -> Self { - match self { - FsmSender::Vanilla(sender) => FsmSender::Vanilla(sender.clone()), - FsmSender::Priority { - resource_ctl, - sender, - .. - } => FsmSender::Priority { - resource_ctl: resource_ctl.clone(), - sender: sender.clone(), - last_msg_group: RefCell::new(String::new()), - }, - } - } -} - -impl FsmSender { - pub fn send(&self, fsm: FsmTypes) -> Result<(), SendError>> { - match self { - FsmSender::Vanilla(sender) => sender.send(fsm), - FsmSender::Priority { - resource_ctl, - sender, - last_msg_group, - } => { - // TODO: pass different priority - let pri = resource_ctl - .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal); - sender.send(fsm, pri) - } - } - } - - pub fn try_send(&self, fsm: FsmTypes) -> Result<(), TrySendError>> { - match self { - FsmSender::Vanilla(sender) => sender.try_send(fsm), - FsmSender::Priority { - resource_ctl, - sender, - last_msg_group, - } => { - let priority = resource_ctl - .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal); - sender.try_send(fsm, priority) - } - } - } - - fn consume_msg_resource(&self, msg: &N::Message) { - match self { - FsmSender::Vanilla(_) => {} - FsmSender::Priority { - resource_ctl, - last_msg_group, - .. - } => { - if let Some(mut groups) = msg.get_resource_consumptions() { - let mut dominant_group = "".to_owned(); - let mut max_write_bytes = 0; - for (group_name, write_bytes) in groups.drain() { - resource_ctl.consume( - group_name.as_bytes(), - ResourceConsumeType::IoBytes(write_bytes), - ); - if write_bytes > max_write_bytes { - dominant_group = group_name; - max_write_bytes = write_bytes; - } - } - *last_msg_group.borrow_mut() = dominant_group; - } - } - } - } -} - -pub enum FsmReceiver { - Vanilla(channel::Receiver>), - Priority(priority_queue::Receiver>), -} - -impl Clone for FsmReceiver -where - N: Fsm, - C: Fsm, -{ - fn clone(&self) -> Self { - match self { - FsmReceiver::Vanilla(receiver) => FsmReceiver::Vanilla(receiver.clone()), - FsmReceiver::Priority(receiver) => FsmReceiver::Priority(receiver.clone()), - } - } -} - -impl FsmReceiver { - pub fn recv(&self) -> Result, RecvError> { - match self { - FsmReceiver::Vanilla(receiver) => receiver.recv(), - FsmReceiver::Priority(receiver) => receiver.recv(), - } - } - - pub fn try_recv(&self) -> Result, TryRecvError> { - match self { - FsmReceiver::Vanilla(receiver) => receiver.try_recv(), - FsmReceiver::Priority(receiver) => receiver.try_recv(), - } - } -} diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 5d9e009fa01..3fa5ad15a64 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -10,7 +10,7 @@ use std::{ usize, }; -use collections::HashMap; +use resource_control::ResourceMetered; use crate::mailbox::BasicMailbox; @@ -36,12 +36,6 @@ pub trait FsmScheduler { fn consume_msg_resource(&self, msg: &::Message); } -pub trait ResourceMetered { - fn get_resource_consumptions(&self) -> Option> { - None - } -} - /// A `Fsm` is a finite state machine. It should be able to be notified for /// updating internal state according to incoming messages. pub trait Fsm: Send + 'static { diff --git a/components/batch-system/src/lib.rs b/components/batch-system/src/lib.rs index f4f799dcc9a..2e59d42808c 100644 --- a/components/batch-system/src/lib.rs +++ b/components/batch-system/src/lib.rs @@ -1,12 +1,12 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. mod batch; -mod channel; mod config; mod fsm; mod mailbox; mod metrics; mod router; +mod scheduler; #[cfg(feature = "test-runner")] pub mod test_runner; @@ -17,7 +17,7 @@ pub use self::{ PollHandler, Poller, PoolState, }, config::Config, - fsm::{Fsm, FsmScheduler, Priority, ResourceMetered}, + fsm::{Fsm, FsmScheduler, Priority}, mailbox::{BasicMailbox, Mailbox}, router::Router, }; diff --git a/components/batch-system/src/scheduler.rs b/components/batch-system/src/scheduler.rs new file mode 100644 index 00000000000..9eadb125f78 --- /dev/null +++ b/components/batch-system/src/scheduler.rs @@ -0,0 +1,105 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use crossbeam::channel::SendError; +use resource_control::channel::Sender; +use tikv_util::warn; + +use crate::{ + fsm::{Fsm, FsmScheduler, Priority}, + FsmTypes, +}; +pub struct NormalScheduler { + pub(crate) sender: Sender>, + pub(crate) low_sender: Sender>, +} + +impl Clone for NormalScheduler +where + N: Fsm, + C: Fsm, +{ + fn clone(&self) -> Self { + NormalScheduler { + sender: self.sender.clone(), + low_sender: self.low_sender.clone(), + } + } +} + +impl FsmScheduler for NormalScheduler +where + N: Fsm, + C: Fsm, +{ + type Fsm = N; + + fn consume_msg_resource(&self, msg: &::Message) { + self.sender.consume_msg_resource(msg); + } + + #[inline] + fn schedule(&self, fsm: Box) { + let sender = match fsm.get_priority() { + Priority::Normal => &self.sender, + Priority::Low => &self.low_sender, + }; + + match sender.send(FsmTypes::Normal(fsm), 0) { + Ok(_) => {} + Err(SendError(FsmTypes::Normal(fsm))) => warn!("failed to schedule fsm {:p}", fsm), + _ => unreachable!(), + } + } + + fn shutdown(&self) { + // TODO: close it explicitly once it's supported. + // Magic number, actually any number greater than poll pool size works. + for _ in 0..256 { + let _ = self.sender.send(FsmTypes::Empty, 0); + let _ = self.low_sender.send(FsmTypes::Empty, 0); + } + } +} + +pub struct ControlScheduler { + pub(crate) sender: Sender>, +} + +impl Clone for ControlScheduler +where + N: Fsm, + C: Fsm, +{ + fn clone(&self) -> Self { + ControlScheduler { + sender: self.sender.clone(), + } + } +} + +impl FsmScheduler for ControlScheduler +where + N: Fsm, + C: Fsm, +{ + type Fsm = C; + + fn consume_msg_resource(&self, _msg: &::Message) {} + + #[inline] + fn schedule(&self, fsm: Box) { + match self.sender.send(FsmTypes::Control(fsm), 0) { + Ok(_) => {} + Err(SendError(FsmTypes::Control(fsm))) => warn!("failed to schedule fsm {:p}", fsm), + _ => unreachable!(), + } + } + + fn shutdown(&self) { + // TODO: close it explicitly once it's supported. + // Magic number, actually any number greater than poll pool size works. + for _ in 0..256 { + let _ = self.sender.send(FsmTypes::Empty, 0); + } + } +} diff --git a/components/batch-system/src/test_runner.rs b/components/batch-system/src/test_runner.rs index a3ae80dc55a..9a84a5fe545 100644 --- a/components/batch-system/src/test_runner.rs +++ b/components/batch-system/src/test_runner.rs @@ -13,9 +13,10 @@ use std::{ use collections::HashMap; use derive_more::{Add, AddAssign}; +use resource_control::ResourceMetered; use tikv_util::mpsc; -use crate::{fsm::ResourceMetered, *}; +use crate::*; /// Message `Runner` can accepts. pub enum Message { diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 2a3cc63f797..83d6b2e1f2a 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -477,7 +477,7 @@ impl Workers { async_read: Worker::new("async-read-worker"), pd, tablet_gc: Worker::new("tablet-gc-worker"), - async_write: StoreWriters::default(), + async_write: StoreWriters::new(None), purge, background, } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index a14c9ba9866..91efc54c867 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -2,13 +2,13 @@ // #[PerformanceCriticalPath] -use batch_system::ResourceMetered; use kvproto::{ metapb, raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, raft_serverpb::RaftMessage, }; use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, GenSnapRes}; +use resource_control::ResourceMetered; use tikv_util::time::Instant; use super::{ diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 98c76ddd6d1..4d8392edd55 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -14,7 +14,7 @@ use std::{ }; use collections::HashMap; -use crossbeam::channel::{bounded, Receiver, Sender, TryRecvError}; +use crossbeam::channel::TryRecvError; use engine_traits::{ KvEngine, PerfContext, PerfContextKind, RaftEngine, RaftLogBatch, WriteBatch, WriteOptions, }; @@ -23,6 +23,10 @@ use fail::fail_point; use kvproto::raft_serverpb::{RaftLocalState, RaftMessage}; use protobuf::Message; use raft::eraftpb::Entry; +use resource_control::{ + channel::{bounded, Receiver, Sender}, + ResourceController, ResourceMetered, +}; use tikv_util::{ box_err, config::{ReadableSize, Tracker, VersionTrack}, @@ -41,6 +45,7 @@ use crate::{ local_metrics::{RaftSendMessageMetrics, StoreWriteMetrics, TimeTracker}, metrics::*, transport::Transport, + util, util::LatencyInspector, PeerMsg, }, @@ -268,6 +273,29 @@ where inspector: Vec, }, Shutdown, + #[cfg(test)] + Pause(std::sync::mpsc::Receiver<()>), +} + +impl ResourceMetered for WriteMsg +where + EK: KvEngine, + ER: RaftEngine, +{ + fn get_resource_consumptions(&self) -> Option> { + match self { + WriteMsg::WriteTask(t) => { + let mut map = HashMap::default(); + for entry in &t.entries { + let header = util::get_entry_header(entry); + let group_name = header.get_resource_group_name().to_owned(); + *map.entry(group_name).or_default() += entry.compute_size() as u64; + } + Some(map) + } + _ => None, + } + } } impl fmt::Debug for WriteMsg @@ -284,6 +312,8 @@ where ), WriteMsg::Shutdown => write!(fmt, "WriteMsg::Shutdown"), WriteMsg::LatencyInspect { .. } => write!(fmt, "WriteMsg::LatencyInspect"), + #[cfg(test)] + WriteMsg::Pause(_) => write!(fmt, "WriteMsg::Pause"), } } } @@ -641,6 +671,10 @@ where } => { self.pending_latency_inspect.push((send_time, inspector)); } + #[cfg(test)] + WriteMsg::Pause(rx) => { + let _ = rx.recv(); + } } false } @@ -845,13 +879,15 @@ where EK: KvEngine, ER: RaftEngine, { + resource_ctl: Option>, writers: Vec>>, handlers: Vec>, } -impl Default for StoreWriters { - fn default() -> Self { +impl StoreWriters { + pub fn new(resource_ctl: Option>) -> Self { Self { + resource_ctl, writers: vec![], handlers: vec![], } @@ -879,7 +915,10 @@ where let pool_size = cfg.value().store_io_pool_size; for i in 0..pool_size { let tag = format!("store-writer-{}", i); - let (tx, rx) = bounded(cfg.value().store_io_notify_capacity); + let (tx, rx) = bounded( + self.resource_ctl.clone(), + cfg.value().store_io_notify_capacity, + ); let mut worker = Worker::new( store_id, tag.clone(), @@ -906,7 +945,7 @@ where assert_eq!(self.writers.len(), self.handlers.len()); for (i, handler) in self.handlers.drain(..).enumerate() { info!("stopping store writer {}", i); - self.writers[i].send(WriteMsg::Shutdown).unwrap(); + self.writers[i].send(WriteMsg::Shutdown, 0).unwrap(); handler.join().unwrap(); } } diff --git a/components/raftstore/src/store/async_io/write_router.rs b/components/raftstore/src/store/async_io/write_router.rs index 6c1db6419cf..ead22f70b28 100644 --- a/components/raftstore/src/store/async_io/write_router.rs +++ b/components/raftstore/src/store/async_io/write_router.rs @@ -13,8 +13,9 @@ use std::{ time::Duration, }; -use crossbeam::channel::{Sender, TrySendError}; +use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine}; +use resource_control::channel::Sender; use tikv_util::{info, time::Instant}; use crate::store::{ @@ -22,7 +23,7 @@ use crate::store::{ metrics::*, }; -const RETRY_SCHEDULE_MILLISECONS: u64 = 10; +const RETRY_SCHEDULE_MILLISECONDS: u64 = 10; pub trait WriteRouterContext where @@ -68,6 +69,9 @@ where last_unpersisted: Option, /// Pending write msgs since rescheduling. pending_write_msgs: Vec>, + /// The scheduling priority of the last msg, only valid when priority + /// scheduling is enabled + last_msg_priority: u64, } impl WriteRouter @@ -83,6 +87,7 @@ where next_writer_id: None, last_unpersisted: None, pending_write_msgs: vec![], + last_msg_priority: 0, } } @@ -217,17 +222,21 @@ where } else { // Rescheduling fails at this time. Retry 10ms later. // The task should be sent to the original write worker. - self.next_retry_time = now + Duration::from_millis(RETRY_SCHEDULE_MILLISECONS); + self.next_retry_time = now + Duration::from_millis(RETRY_SCHEDULE_MILLISECONDS); true } } - fn send>(&self, ctx: &mut C, msg: WriteMsg) { - match ctx.write_senders()[self.writer_id].try_send(msg) { - Ok(()) => (), + fn send>(&mut self, ctx: &mut C, msg: WriteMsg) { + let sender = &ctx.write_senders()[self.writer_id]; + sender.consume_msg_resource(&msg); + // pass the priority of last msg as low bound to make sure all messages of one + // peer are handled sequentially. + match sender.try_send(msg, self.last_msg_priority) { + Ok(priority) => self.last_msg_priority = priority, Err(TrySendError::Full(msg)) => { let now = Instant::now(); - if ctx.write_senders()[self.writer_id].send(msg).is_err() { + if sender.send(msg, self.last_msg_priority).is_err() { // Write threads are destroyed after store threads during shutdown. panic!("{} failed to send write msg, err: disconnected", self.tag); } @@ -275,35 +284,55 @@ impl Index for WriteSenders { } #[cfg(test)] -mod tests { +pub(crate) mod tests { use std::thread; - use crossbeam::channel::{bounded, Receiver}; - use engine_test::kv::KvTestEngine; + use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; + use resource_control::channel::{bounded, Receiver}; use tikv_util::config::ReadableDuration; use super::*; + pub struct TestContext { + pub senders: WriteSenders, + pub config: Config, + pub raft_metrics: RaftMetrics, + } + + impl WriteRouterContext for TestContext { + fn write_senders(&self) -> &WriteSenders { + &self.senders + } + + fn config(&self) -> &Config { + &self.config + } + + fn raft_metrics(&self) -> &RaftMetrics { + &self.raft_metrics + } + } + struct TestWriteRouter { - receivers: Vec>>, - senders: WriteSenders, - config: Config, - raft_metrics: RaftMetrics, + receivers: Vec>>, + ctx: TestContext, } impl TestWriteRouter { fn new(config: Config) -> Self { let (mut receivers, mut senders) = (vec![], vec![]); for _ in 0..config.store_io_pool_size { - let (tx, rx) = bounded(config.store_io_notify_capacity); + let (tx, rx) = bounded(None, config.store_io_notify_capacity); receivers.push(rx); senders.push(tx); } Self { receivers, - senders: WriteSenders::new(senders), - config, - raft_metrics: RaftMetrics::new(true), + ctx: TestContext { + senders: WriteSenders::new(senders), + config, + raft_metrics: RaftMetrics::new(true), + }, } } @@ -321,6 +350,7 @@ mod tests { fn must_same_reschedule_count(&self, count: usize) { let cnt = self + .ctx .senders .io_reschedule_concurrent_count .load(Ordering::Relaxed); @@ -330,20 +360,6 @@ mod tests { } } - impl WriteRouterContext for TestWriteRouter { - fn write_senders(&self) -> &WriteSenders { - &self.senders - } - - fn config(&self) -> &Config { - &self.config - } - - fn raft_metrics(&self) -> &RaftMetrics { - &self.raft_metrics - } - } - #[test] fn test_write_router_no_schedule() { let mut config = Config::new(); @@ -352,10 +368,10 @@ mod tests { config.store_io_pool_size = 4; let mut t = TestWriteRouter::new(config); let mut r = WriteRouter::new("1".to_string()); - r.send_write_msg(&mut t, None, WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, None, WriteMsg::Shutdown); let writer_id = r.writer_id; for _ in 1..10 { - r.send_write_msg(&mut t, Some(10), WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, Some(10), WriteMsg::Shutdown); thread::sleep(Duration::from_millis(10)); } assert_eq!(writer_id, r.writer_id); @@ -375,7 +391,7 @@ mod tests { let last_time = r.next_retry_time; thread::sleep(Duration::from_millis(10)); // `writer_id` will be chosen randomly due to `last_unpersisted` is None - r.send_write_msg(&mut t, None, WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, None, WriteMsg::Shutdown); assert!(r.next_retry_time > last_time); assert_eq!(r.next_writer_id, None); assert_eq!(r.last_unpersisted, None); @@ -390,7 +406,7 @@ mod tests { let writer_id = r.writer_id; let timer = Instant::now(); loop { - r.send_write_msg(&mut t, Some(10), WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, Some(10), WriteMsg::Shutdown); if let Some(id) = r.next_writer_id { assert!(writer_id != id); assert_eq!(r.last_unpersisted, Some(10)); @@ -408,7 +424,7 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - r.send_write_msg(&mut t, Some(20), WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, Some(20), WriteMsg::Shutdown); assert!(r.next_writer_id.is_some()); // `last_unpersisted` should not change assert_eq!(r.last_unpersisted, Some(10)); @@ -417,7 +433,7 @@ mod tests { t.must_same_reschedule_count(1); // No effect due to 9 < `last_unpersisted`(10) - r.check_new_persisted(&mut t, 9); + r.check_new_persisted(&mut t.ctx, 9); assert!(r.next_writer_id.is_some()); assert_eq!(r.last_unpersisted, Some(10)); assert_eq!(r.pending_write_msgs.len(), 2); @@ -425,7 +441,7 @@ mod tests { t.must_same_reschedule_count(1); // Should reschedule and send msg - r.check_new_persisted(&mut t, 10); + r.check_new_persisted(&mut t.ctx, 10); assert_eq!(r.next_writer_id, None); assert_eq!(r.last_unpersisted, None); assert!(r.pending_write_msgs.is_empty()); @@ -433,7 +449,8 @@ mod tests { t.must_same_reschedule_count(0); thread::sleep(Duration::from_millis(10)); - t.senders + t.ctx + .senders .io_reschedule_concurrent_count .store(4, Ordering::Relaxed); // Should retry reschedule next time because the limitation of concurrent count. @@ -441,7 +458,7 @@ mod tests { // so using loop here. let timer = Instant::now(); loop { - r.send_write_msg(&mut t, Some(30), WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, Some(30), WriteMsg::Shutdown); t.must_same_msg_count(r.writer_id, 1); if r.next_writer_id.is_some() { assert_eq!(r.last_unpersisted, None); @@ -456,12 +473,13 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - t.senders + t.ctx + .senders .io_reschedule_concurrent_count .store(3, Ordering::Relaxed); - thread::sleep(Duration::from_millis(RETRY_SCHEDULE_MILLISECONS + 2)); + thread::sleep(Duration::from_millis(RETRY_SCHEDULE_MILLISECONDS + 2)); // Should reschedule now - r.send_write_msg(&mut t, Some(40), WriteMsg::Shutdown); + r.send_write_msg(&mut t.ctx, Some(40), WriteMsg::Shutdown); assert!(r.next_writer_id.is_some()); assert_eq!(r.last_unpersisted, Some(40)); t.must_same_msg_count(r.writer_id, 0); diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index d1861a8903c..cae5842c8b8 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -1,20 +1,27 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{sync::mpsc, time::Duration}; use collections::HashSet; -use crossbeam::channel::unbounded; +use crossbeam::channel::{unbounded, Receiver, Sender}; use engine_test::{kv::KvTestEngine, new_temp_engine, raft::RaftTestEngine}; use engine_traits::{Engines, Mutable, Peekable, RaftEngineReadOnly, WriteBatchExt}; -use kvproto::raft_serverpb::{RaftApplyState, RaftMessage, RegionLocalState}; +use kvproto::{ + raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, + raft_serverpb::{RaftApplyState, RaftMessage, RegionLocalState}, + resource_manager::{GroupMode, GroupRawResourceSettings, ResourceGroup}, +}; +use resource_control::ResourceGroupManager; use tempfile::Builder; use super::*; use crate::{ - store::{peer_storage::tests::new_entry, Config, Transport}, + store::{ + async_io::write_router::tests::TestContext, local_metrics::RaftMetrics, + peer_storage::tests::new_entry, Config, Transport, WriteRouter, + }, Result, }; - type TestKvWriteBatch = ::WriteBatch; type TestRaftLogBatch = ::LogBatch; @@ -122,7 +129,7 @@ fn must_wait_same_notifies( } let timer = Instant::now(); loop { - match notify_rx.recv() { + match notify_rx.recv_timeout(Duration::from_secs(3)) { Ok((region_id, n)) => { if let Some(n2) = notify_map.get(®ion_id) { if n == *n2 { @@ -196,7 +203,7 @@ struct TestWorker { impl TestWorker { fn new(cfg: &Config, engines: &Engines) -> Self { - let (_, task_rx) = unbounded(); + let (_, task_rx) = resource_control::channel::unbounded(None); let (msg_tx, msg_rx) = unbounded(); let trans = TestTransport { tx: msg_tx }; let (notify_tx, notify_rx) = unbounded(); @@ -222,15 +229,24 @@ struct TestWriters { writers: StoreWriters, msg_rx: Receiver, notify_rx: Receiver<(u64, (u64, u64))>, + ctx: TestContext, } impl TestWriters { - fn new(cfg: &Config, engines: &Engines) -> Self { + fn new( + cfg: Config, + engines: &Engines, + resource_manager: Option>, + ) -> Self { let (msg_tx, msg_rx) = unbounded(); let trans = TestTransport { tx: msg_tx }; let (notify_tx, notify_rx) = unbounded(); let notifier = TestNotifier { tx: notify_tx }; - let mut writers = StoreWriters::default(); + let mut writers = StoreWriters::new( + resource_manager + .as_ref() + .map(|m| m.derive_controller("test".into(), false)), + ); writers .spawn( 1, @@ -242,13 +258,21 @@ impl TestWriters { ) .unwrap(); Self { - writers, msg_rx, notify_rx, + ctx: TestContext { + config: cfg, + raft_metrics: RaftMetrics::new(true), + senders: writers.senders(), + }, + writers, } } - fn write_sender(&self, id: usize) -> Sender> { + fn write_sender( + &self, + id: usize, + ) -> resource_control::channel::Sender> { self.writers.senders()[id].clone() } } @@ -460,7 +484,7 @@ fn test_basic_flow() { let engines = new_temp_engine(&path); let mut cfg = Config::default(); cfg.store_io_pool_size = 2; - let mut t = TestWriters::new(&cfg, &engines); + let mut t = TestWriters::new(cfg, &engines, None); let mut task_1 = WriteTask::::new(region_1, 1, 10); init_write_batch(&engines, &mut task_1); @@ -474,7 +498,9 @@ fn test_basic_flow() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.write_sender(0).send(WriteMsg::WriteTask(task_1)).unwrap(); + t.write_sender(0) + .send(WriteMsg::WriteTask(task_1), 0) + .unwrap(); let mut task_2 = WriteTask::::new(2, 2, 20); init_write_batch(&engines, &mut task_2); @@ -488,7 +514,9 @@ fn test_basic_flow() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.write_sender(1).send(WriteMsg::WriteTask(task_2)).unwrap(); + t.write_sender(1) + .send(WriteMsg::WriteTask(task_2), 0) + .unwrap(); let mut task_3 = WriteTask::::new(region_1, 1, 15); init_write_batch(&engines, &mut task_3); @@ -502,7 +530,9 @@ fn test_basic_flow() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.write_sender(0).send(WriteMsg::WriteTask(task_3)).unwrap(); + t.write_sender(0) + .send(WriteMsg::WriteTask(task_3), 0) + .unwrap(); must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); @@ -532,7 +562,6 @@ fn test_basic_flow() { ); must_have_same_count_msg(6, &t.msg_rx); - t.writers.shutdown(); } @@ -548,7 +577,7 @@ fn test_basic_flow_with_states() { let engines = new_temp_engine(&path); let mut cfg = Config::default(); cfg.store_io_pool_size = 2; - let mut t = TestWriters::new(&cfg, &engines); + let mut t = TestWriters::new(cfg, &engines, None); let mut task_1 = WriteTask::::new(region_1, 1, 10); task_1.raft_wb = Some(engines.raft.log_batch(0)); @@ -571,7 +600,9 @@ fn test_basic_flow_with_states() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.write_sender(0).send(WriteMsg::WriteTask(task_1)).unwrap(); + t.write_sender(0) + .send(WriteMsg::WriteTask(task_1), 0) + .unwrap(); let mut task_2 = WriteTask::::new(2, 2, 20); task_2.raft_wb = Some(engines.raft.log_batch(0)); @@ -588,7 +619,9 @@ fn test_basic_flow_with_states() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.write_sender(1).send(WriteMsg::WriteTask(task_2)).unwrap(); + t.write_sender(1) + .send(WriteMsg::WriteTask(task_2), 0) + .unwrap(); let mut task_3 = WriteTask::::new(region_1, 1, 15); task_3.raft_wb = Some(engines.raft.log_batch(0)); @@ -604,7 +637,9 @@ fn test_basic_flow_with_states() { .messages .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); - t.write_sender(0).send(WriteMsg::WriteTask(task_3)).unwrap(); + t.write_sender(0) + .send(WriteMsg::WriteTask(task_3), 0) + .unwrap(); must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); @@ -645,3 +680,88 @@ fn test_basic_flow_with_states() { t.writers.shutdown(); } + +#[test] +fn test_resource_group() { + let region_1 = 1; + let region_2 = 2; + + let resource_manager = Arc::new(ResourceGroupManager::default()); + let get_group = |name: &str, read_tokens: u64, write_tokens: u64| -> ResourceGroup { + let mut group = ResourceGroup::new(); + group.set_name(name.to_string()); + group.set_mode(GroupMode::RawMode); + let mut resource_setting = GroupRawResourceSettings::new(); + resource_setting + .mut_cpu() + .mut_settings() + .set_fill_rate(read_tokens); + resource_setting + .mut_io_write() + .mut_settings() + .set_fill_rate(write_tokens); + group.set_raw_resource_settings(resource_setting); + group + }; + resource_manager.add_resource_group(get_group("group1", 10, 10)); + resource_manager.add_resource_group(get_group("group2", 100, 100)); + + let path = Builder::new().prefix("async-io-basic").tempdir().unwrap(); + let engines = new_temp_engine(&path); + let mut cfg = Config::default(); + cfg.store_io_pool_size = 1; + + let mut t = TestWriters::new(cfg, &engines, Some(resource_manager)); + + let (tx, rx) = mpsc::sync_channel(0); + t.write_sender(0).send(WriteMsg::Pause(rx), 0).unwrap(); + + let mut r = WriteRouter::new("1".to_string()); + let mut task_1 = WriteTask::::new(region_1, 1, 10); + init_write_batch(&engines, &mut task_1); + put_raft_kv(task_1.raft_wb.as_mut(), 17); + let entries = vec![new_entry(5, 5), new_entry(6, 5), new_entry(7, 5)]; + let mut entries = entries + .into_iter() + .map(|mut e| { + let mut req = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_resource_group_name("group1".to_owned()); + req.set_header(header); + e.set_data(req.write_to_bytes().unwrap().into()); + e + }) + .collect(); + task_1.entries.append(&mut entries); + task_1.raft_state = Some(new_raft_state(5, 234, 6, 7)); + task_1 + .messages + .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); + r.send_write_msg(&mut t.ctx, None, WriteMsg::WriteTask(task_1)); + + let mut r = WriteRouter::new("2".to_string()); + let mut task_2 = WriteTask::::new(region_2, 2, 20); + init_write_batch(&engines, &mut task_2); + put_raft_kv(task_2.raft_wb.as_mut(), 27); + let entries = vec![new_entry(50, 12), new_entry(51, 13)]; + let mut entries = entries + .into_iter() + .map(|mut e| { + let mut req = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_resource_group_name("group2".to_owned()); + req.set_header(header); + e.set_data(req.write_to_bytes().unwrap().into()); + e + }) + .collect(); + task_2.entries.append(&mut entries); + task_2.raft_state = Some(new_raft_state(13, 567, 49, 51)); + task_2 + .messages + .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); + r.send_write_msg(&mut t.ctx, None, WriteMsg::WriteTask(task_2)); + + tx.send(()).unwrap(); + must_wait_same_notifies(vec![(region_1, (1, 10)), (region_2, (2, 20))], &t.notify_rx); +} diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 1853d200140..7f4e5497cb9 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -24,7 +24,7 @@ use std::{ use batch_system::{ BasicMailbox, BatchRouter, BatchSystem, Config as BatchSystemConfig, Fsm, HandleResult, - HandlerBuilder, PollHandler, Priority, ResourceMetered, + HandlerBuilder, PollHandler, Priority, }; use collections::{HashMap, HashMapEntry, HashSet}; use crossbeam::channel::{TryRecvError, TrySendError}; @@ -51,7 +51,7 @@ use raft::eraftpb::{ ConfChange, ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot, }; use raft_proto::ConfChangeI; -use resource_control::ResourceController; +use resource_control::{ResourceController, ResourceMetered}; use smallvec::{smallvec, SmallVec}; use sst_importer::SstImporter; use tikv_alloc::trace::TraceEvent; @@ -4431,6 +4431,7 @@ pub enum ControlMsg { } impl ResourceMetered for ControlMsg {} + pub struct ControlFsm { receiver: Receiver, stopped: bool, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 85631bebe09..66acd187215 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -22,7 +22,7 @@ use batch_system::{ use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashMapEntry, HashSet}; use concurrency_manager::ConcurrencyManager; -use crossbeam::channel::{unbounded, TryRecvError, TrySendError}; +use crossbeam::channel::{TryRecvError, TrySendError}; use engine_traits::{ CompactedEvent, DeleteStrategy, Engines, KvEngine, Mutable, PerfContextKind, RaftEngine, RaftLogBatch, Range, WriteBatch, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -42,7 +42,7 @@ use kvproto::{ use pd_client::{Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; -use resource_control::ResourceGroupManager; +use resource_control::{channel::unbounded, ResourceGroupManager}; use resource_metering::CollectorRegHandle; use sst_importer::SstImporter; use tikv_alloc::trace::TraceEvent; @@ -1053,12 +1053,13 @@ impl PollHandler, St } } else { let writer_id = rand::random::() % self.poll_ctx.cfg.store_io_pool_size; - if let Err(err) = - self.poll_ctx.write_senders[writer_id].try_send(WriteMsg::LatencyInspect { + if let Err(err) = self.poll_ctx.write_senders[writer_id].try_send( + WriteMsg::LatencyInspect { send_time: write_begin, inspector: latency_inspect, - }) - { + }, + 0, + ) { warn!("send latency inspecting to write workers failed"; "err" => ?err); } } @@ -1340,7 +1341,7 @@ where fn build(&mut self, _: Priority) -> RaftPoller { let sync_write_worker = if self.write_senders.is_empty() { - let (_, rx) = unbounded(); + let (_, rx) = unbounded(None); Some(WriteWorker::new( self.store.get_id(), "sync-writer".to_string(), @@ -1821,7 +1822,11 @@ pub fn create_raft_batch_system( apply_router, apply_system, router: raft_router.clone(), - store_writers: StoreWriters::default(), + store_writers: StoreWriters::new( + resource_manager + .as_ref() + .map(|m| m.derive_controller("store-writer".to_owned(), false)), + ), }; (raft_router, system) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 195a94478dc..935210951f0 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -5,7 +5,6 @@ use std::sync::Arc; use std::{borrow::Cow, fmt}; -use batch_system::ResourceMetered; use collections::HashSet; use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; @@ -23,6 +22,7 @@ use kvproto::{ #[cfg(any(test, feature = "testexport"))] use pd_client::BucketMeta; use raft::SnapshotStatus; +use resource_control::ResourceMetered; use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; use tracker::{get_tls_tracker_token, TrackerToken}; diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index d09a6dd9f53..ff34b9abb4e 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -41,7 +41,7 @@ where { pub fn decrease_by(&mut self, size: usize) { for _ in 0..size { - if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty) { + if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty, 0) { error!( "failed to decrease thread pool"; "decrease to" => size, diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 39d37ac0f6b..2e1a0990d49 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -9,6 +9,8 @@ failpoints = ["fail/failpoints"] [dependencies] byteorder = "1.2" +collections = { workspace = true } +crossbeam = "0.8" crossbeam-skiplist = "0.1" dashmap = "5.1" fail = "0.5" diff --git a/components/resource_control/src/channel.rs b/components/resource_control/src/channel.rs new file mode 100644 index 00000000000..55bc2ed33b9 --- /dev/null +++ b/components/resource_control/src/channel.rs @@ -0,0 +1,183 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{cell::RefCell, sync::Arc}; + +use collections::HashMap; +use crossbeam::channel::{self, RecvError, SendError, TryRecvError, TrySendError}; +use kvproto::kvrpcpb::CommandPri; +use tikv_util::mpsc::priority_queue; + +use crate::{ResourceConsumeType, ResourceController}; + +pub trait ResourceMetered { + // returns the msg consumption of each hash map + fn get_resource_consumptions(&self) -> Option> { + None + } +} + +pub fn bounded( + resource_ctl: Option>, + cap: usize, +) -> (Sender, Receiver) { + if let Some(ctl) = resource_ctl { + // TODO: make it bounded + let (tx, rx) = priority_queue::unbounded(); + ( + Sender::Priority { + resource_ctl: ctl, + sender: tx, + last_msg_group: RefCell::new(String::new()), + }, + Receiver::Priority(rx), + ) + } else { + let (tx, rx) = channel::bounded(cap); + (Sender::Vanilla(tx), Receiver::Vanilla(rx)) + } +} + +pub fn unbounded( + resource_ctl: Option>, +) -> (Sender, Receiver) { + if let Some(ctl) = resource_ctl { + let (tx, rx) = priority_queue::unbounded(); + ( + Sender::Priority { + resource_ctl: ctl, + sender: tx, + last_msg_group: RefCell::new(String::new()), + }, + Receiver::Priority(rx), + ) + } else { + let (tx, rx) = channel::unbounded(); + (Sender::Vanilla(tx), Receiver::Vanilla(rx)) + } +} + +pub enum Sender { + Vanilla(channel::Sender), + Priority { + resource_ctl: Arc, + sender: priority_queue::Sender, + last_msg_group: RefCell, + }, +} + +impl Clone for Sender { + fn clone(&self) -> Self { + match self { + Sender::Vanilla(sender) => Sender::Vanilla(sender.clone()), + Sender::Priority { + resource_ctl, + sender, + .. + } => Sender::Priority { + resource_ctl: resource_ctl.clone(), + sender: sender.clone(), + last_msg_group: RefCell::new(String::new()), + }, + } + } +} + +impl Sender { + // `low_bound` represents the lowest priority that the message can be sent with. + // It's used to make sure messages from one peer are sent in order. + // The returned value is the priority that the message sent with. It is + // calculated by resource controller and compared with `low_bound`. + pub fn send(&self, m: T, low_bound: u64) -> Result> { + match self { + Sender::Vanilla(sender) => sender.send(m).map(|_| 0), + Sender::Priority { + resource_ctl, + sender, + last_msg_group, + } => { + // TODO: pass different command priority + let priority = std::cmp::max( + resource_ctl + .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal), + low_bound, + ); + sender.send(m, priority).map(|_| priority) + } + } + } + + pub fn try_send(&self, m: T, low_bound: u64) -> Result> { + match self { + Sender::Vanilla(sender) => sender.try_send(m).map(|_| 0), + Sender::Priority { + resource_ctl, + sender, + last_msg_group, + } => { + let priority = std::cmp::max( + resource_ctl + .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal), + low_bound, + ); + sender.try_send(m, priority).map(|_| priority) + } + } + } + + pub fn consume_msg_resource(&self, msg: &impl ResourceMetered) { + match self { + Sender::Vanilla(_) => {} + Sender::Priority { + resource_ctl, + last_msg_group, + .. + } => { + if let Some(mut groups) = msg.get_resource_consumptions() { + let mut dominant_group = "".to_owned(); + let mut max_write_bytes = 0; + for (group_name, write_bytes) in groups.drain() { + resource_ctl.consume( + group_name.as_bytes(), + ResourceConsumeType::IoBytes(write_bytes), + ); + if write_bytes > max_write_bytes { + dominant_group = group_name; + max_write_bytes = write_bytes; + } + } + *last_msg_group.borrow_mut() = dominant_group; + } + } + } + } +} + +pub enum Receiver { + Vanilla(channel::Receiver), + Priority(priority_queue::Receiver), +} + +impl Clone for Receiver { + fn clone(&self) -> Self { + match self { + Receiver::Vanilla(receiver) => Receiver::Vanilla(receiver.clone()), + Receiver::Priority(receiver) => Receiver::Priority(receiver.clone()), + } + } +} + +impl Receiver { + pub fn recv(&self) -> Result { + match self { + Receiver::Vanilla(receiver) => receiver.recv(), + Receiver::Priority(receiver) => receiver.recv(), + } + } + + pub fn try_recv(&self) -> Result { + match self { + Receiver::Vanilla(receiver) => receiver.try_recv(), + Receiver::Priority(receiver) => receiver.try_recv(), + } + } +} diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 5534ed2153d..1c4c93c82d2 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -14,6 +14,9 @@ pub use future::ControlledFuture; mod service; pub use service::ResourceManagerService; +pub mod channel; +pub use channel::ResourceMetered; + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig, Default)] #[serde(default)] #[serde(rename_all = "kebab-case")] From 495abac06eb3319f0a75d0e5e63ea43086b06fe7 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Fri, 3 Feb 2023 19:17:56 +0800 Subject: [PATCH 0486/1149] cloud: azblob: add retry for http code 500 error (#14094) close tikv/tikv#14093 Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 ++ components/cloud/azure/Cargo.toml | 2 ++ components/cloud/azure/src/azblob.rs | 44 ++++++++++++++++++++++++---- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21145778082..dd2869a7b10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -359,7 +359,9 @@ dependencies = [ "futures 0.3.15", "futures-util", "kvproto", + "lazy_static", "oauth2", + "regex", "slog", "slog-global", "tikv_util", diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index c08dc76fdff..57ea6c14aef 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -15,7 +15,9 @@ cloud = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { workspace = true } +lazy_static = "1.4.0" oauth2 = { version = "4.0.0", default-features = false } +regex = "1" slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tikv_util = { workspace = true } diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 12b6149fad5..47d2d731da8 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -26,7 +26,9 @@ use futures_util::{ TryStreamExt, }; pub use kvproto::brpb::{AzureBlobStorage as InputConfig, Bucket as InputBucket, CloudDynamic}; +use lazy_static::lazy_static; use oauth2::{ClientId, ClientSecret}; +use regex::Regex; use tikv_util::{ debug, stream::{retry, RetryError}, @@ -224,6 +226,7 @@ impl BlobConfig for Config { enum RequestError { InvalidInput(Box, String), + InternalError(String), TimeOut(String), } @@ -233,6 +236,7 @@ impl From for io::Error { RequestError::InvalidInput(e, tag) => { Self::new(io::ErrorKind::InvalidInput, format!("{}: {}", tag, &e)) } + RequestError::InternalError(msg) => Self::new(io::ErrorKind::Other, msg), RequestError::TimeOut(msg) => Self::new(io::ErrorKind::TimedOut, msg), } } @@ -240,10 +244,21 @@ impl From for io::Error { impl RetryError for RequestError { fn is_retryable(&self) -> bool { - matches!(self, Self::TimeOut(_)) + matches!(self, Self::TimeOut(_) | Self::InternalError(_)) } } +fn err_is_retryable(err_info: &str) -> bool { + // HTTP Code 503: The server is busy + // HTTP Code 500: Operation could not be completed within the specified time. + // More details seen in https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes + lazy_static! { + static ref RE: Regex = Regex::new(r"status: 5[0-9][0-9],").unwrap(); + } + + RE.is_match(err_info) +} + const CONNECTION_TIMEOUT: Duration = Duration::from_secs(900); /// A helper for uploading a large file to Azure storage. @@ -308,10 +323,9 @@ impl AzureUploader { Ok(_) => Ok(()), Err(err) => { let err_info = ToString::to_string(&err); - if err_info.contains("busy") { - // server is busy, retry later - Err(RequestError::TimeOut(format!( - "the resource is busy: {}, retry later", + if err_is_retryable(&err_info) { + Err(RequestError::InternalError(format!( + "internal error: {}, retry later", err_info ))) } else { @@ -765,4 +779,24 @@ mod tests { cd.set_bucket(bucket); cd } + + #[tokio::test] + async fn test_error_retryable() { + let err_info = "HTTP error status (status: 503,... The server is busy."; + assert!(err_is_retryable(err_info)); + let err_info = "HTTP error status (status: 500,... Operation could not be completed within the specified time."; + assert!(err_is_retryable(err_info)); + let err_info = + "HTTP error status (status: 409,... The blob type is invalid for this operation."; + assert!(!err_is_retryable(err_info)); + let err_info = "HTTP error status (status: 50,... "; + assert!(!err_is_retryable(err_info)); + let err = "NaN".parse::().unwrap_err(); + let err1 = RequestError::InvalidInput(Box::new(err), "invalid-input".to_owned()); + let err2 = RequestError::InternalError("internal-error".to_owned()); + let err3 = RequestError::TimeOut("time-out".to_owned()); + assert!(!err1.is_retryable()); + assert!(err2.is_retryable()); + assert!(err3.is_retryable()); + } } From 856987fde93b68c6489635d16d9e27c102b7d47f Mon Sep 17 00:00:00 2001 From: Zwb Date: Fri, 3 Feb 2023 19:33:55 +0800 Subject: [PATCH 0487/1149] tests: fix test_witness_replica_read fail (#14110) ref tikv/tikv#12876 Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- tests/integrations/raftstore/test_witness.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index f35b21b08a1..907c49c03af 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -452,6 +452,9 @@ fn test_witness_replica_read() { vec![true], ); + // make sure the peer_on_store3 has completed applied to witness + std::thread::sleep(Duration::from_millis(200)); + let mut request = new_request( region.get_id(), region.get_region_epoch().clone(), From 22202b26f9ced98a8d944eae1309e3610ca48566 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 3 Feb 2023 21:41:55 -0800 Subject: [PATCH 0488/1149] tikv: optimize default config for multi-rocksdb (#14107) ref tikv/tikv#12842 set block-cache.capacity to 0.3 * memory limit instead of 0.45 for partitioned-raft-kv. set region-split-size to 1GB by default when bucket is enabled set region-split-size to 10GB by default when partitioned-raft-kv is enabled. These numbers may be tuned futher if we have other better results. Signed-off-by: qi.xu Co-authored-by: qi.xu --- cmd/tikv-ctl/src/executor.rs | 5 +- .../raftstore/src/coprocessor/config.rs | 50 +++++++---- .../src/coprocessor/split_check/half.rs | 4 +- .../src/coprocessor/split_check/keys.rs | 2 +- .../src/coprocessor/split_check/size.rs | 12 +-- .../src/coprocessor/split_check/table.rs | 2 +- components/server/src/raft_engine_switch.rs | 5 +- components/server/src/server.rs | 13 ++- components/server/src/server2.rs | 13 ++- components/snap_recovery/src/init_cluster.rs | 7 +- components/test_raftstore/src/node.rs | 4 +- components/test_raftstore/src/server.rs | 2 +- components/test_raftstore/src/util.rs | 5 +- src/config/mod.rs | 83 +++++++++++++++++-- src/server/engine_factory.rs | 5 +- src/storage/config.rs | 10 ++- src/storage/kv/test_engine_builder.rs | 3 +- src/storage/mod.rs | 5 +- tests/failpoints/cases/test_split_region.rs | 2 +- tests/integrations/config/mod.rs | 2 +- .../raftstore/test_split_region.rs | 8 +- tests/integrations/storage/test_titan.rs | 5 +- 22 files changed, 185 insertions(+), 62 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 94610face44..7dd00a1d29c 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -61,7 +61,10 @@ pub fn new_debug_executor( .unwrap() .map(Arc::new); - let cache = cfg.storage.block_cache.build_shared_cache(); + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); let env = cfg .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index fb1fc35345f..3014c5c2358 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -25,7 +25,7 @@ pub struct Config { /// [b,c), [c,d) will be region_split_size (maybe a little larger). /// by default, region_max_size = region_split_size * 2 / 3. pub region_max_size: Option, - pub region_split_size: ReadableSize, + pub region_split_size: Option, /// When the number of keys in region [a,e) meets the region_max_keys, /// it will be split into two several regions [a,b), [b,c), [c,d), [d,e). @@ -71,6 +71,9 @@ pub enum ConsistencyCheckMethod { /// Default region split size. pub const SPLIT_SIZE_MB: u64 = 96; +pub const LARGE_REGION_SPLIT_SIZE_MB: u64 = 1024; +pub const RAFTSTORE_V2_SPLIT_SIZE_MB: u64 = 10240; + /// Default batch split limit. pub const BATCH_SPLIT_LIMIT: u64 = 10; @@ -80,11 +83,10 @@ pub const DEFAULT_REGION_BUCKET_MERGE_SIZE_RATIO: f64 = 0.33; impl Default for Config { fn default() -> Config { - let split_size = ReadableSize::mb(SPLIT_SIZE_MB); Config { split_region_on_table: false, batch_split_limit: BATCH_SPLIT_LIMIT, - region_split_size: split_size, + region_split_size: None, region_max_size: None, region_split_keys: None, region_max_keys: None, @@ -100,39 +102,55 @@ impl Default for Config { } impl Config { + pub fn region_split_size(&self) -> ReadableSize { + self.region_split_size + .unwrap_or(/* v1 only */ if self.enable_region_bucket { + ReadableSize::mb(LARGE_REGION_SPLIT_SIZE_MB) + } else { + ReadableSize::mb(SPLIT_SIZE_MB) + }) + } + pub fn region_max_keys(&self) -> u64 { - let default_split_keys = self.region_split_size.as_mb_f64() * 10000.0; + let default_split_keys = self.region_split_size().as_mb_f64() * 10000.0; self.region_max_keys .unwrap_or(default_split_keys as u64 / 2 * 3) } pub fn region_max_size(&self) -> ReadableSize { self.region_max_size - .unwrap_or(self.region_split_size / 2 * 3) + .unwrap_or(self.region_split_size() / 2 * 3) } pub fn region_split_keys(&self) -> u64 { // Assume the average size of KVs is 100B. self.region_split_keys - .unwrap_or((self.region_split_size.as_mb_f64() * 10000.0) as u64) + .unwrap_or((self.region_split_size().as_mb_f64() * 10000.0) as u64) + } + + pub fn optimize_for(&mut self, raftstore_v2: bool) { + // overwrite the default region_split_size when it's multi-rocksdb + if raftstore_v2 && self.region_split_size.is_none() { + self.region_split_size = Some(ReadableSize::mb(RAFTSTORE_V2_SPLIT_SIZE_MB)); + } } pub fn validate(&mut self) -> Result<()> { if self.region_split_keys.is_none() { - self.region_split_keys = Some((self.region_split_size.as_mb_f64() * 10000.0) as u64); + self.region_split_keys = Some((self.region_split_size().as_mb_f64() * 10000.0) as u64); } match self.region_max_size { Some(region_max_size) => { - if region_max_size.0 < self.region_split_size.0 { + if region_max_size.0 < self.region_split_size().0 { return Err(box_err!( "region max size {} must >= split size {}", region_max_size.0, - self.region_split_size.0 + self.region_split_size().0 )); } } - None => self.region_max_size = Some(self.region_split_size / 2 * 3), + None => self.region_max_size = Some(self.region_split_size() / 2 * 3), } match self.region_max_keys { @@ -148,10 +166,10 @@ impl Config { None => self.region_max_keys = Some(self.region_split_keys() / 2 * 3), } if self.enable_region_bucket { - if self.region_split_size.0 < self.region_bucket_size.0 { + if self.region_split_size().0 < self.region_bucket_size.0 { return Err(box_err!( "region split size {} must >= region bucket size {}", - self.region_split_size.0, + self.region_split_size().0, self.region_bucket_size.0 )); } @@ -208,12 +226,12 @@ mod tests { cfg = Config::default(); cfg.region_max_size = Some(ReadableSize(10)); - cfg.region_split_size = ReadableSize(20); + cfg.region_split_size = Some(ReadableSize(20)); cfg.validate().unwrap_err(); cfg = Config::default(); cfg.region_max_size = None; - cfg.region_split_size = ReadableSize(20); + cfg.region_split_size = Some(ReadableSize(20)); cfg.validate().unwrap(); assert_eq!(cfg.region_max_size, Some(ReadableSize(30))); @@ -230,12 +248,12 @@ mod tests { cfg = Config::default(); cfg.enable_region_bucket = false; - cfg.region_split_size = ReadableSize(20); + cfg.region_split_size = Some(ReadableSize(20)); cfg.region_bucket_size = ReadableSize(30); cfg.validate().unwrap(); cfg = Config::default(); - cfg.region_split_size = ReadableSize::mb(20); + cfg.region_split_size = Some(ReadableSize::mb(20)); cfg.validate().unwrap(); assert_eq!(cfg.region_split_keys, Some(200000)); } diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index fafa41e44b5..259334d2f42 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -267,7 +267,7 @@ mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { - region_split_size: ReadableSize(130_u64), + region_split_size: Some(ReadableSize(130_u64)), enable_region_bucket: true, region_bucket_size: ReadableSize(20_u64), // so that each key below will form a bucket ..Default::default() @@ -391,7 +391,7 @@ mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { - region_split_size: ReadableSize(130_u64), + region_split_size: Some(ReadableSize(130_u64)), enable_region_bucket: true, region_bucket_size: ReadableSize(20_u64), // so that each key below will form a bucket ..Default::default() diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 92e159d233f..58c42d55513 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -590,7 +590,7 @@ mod tests { // The split by keys should still work. But if the bug in on_kv() in size.rs // exists, it will result in split by keys failed. cfg.region_max_size = Some(ReadableSize(region_size * 6 / 5)); - cfg.region_split_size = ReadableSize(region_size * 4 / 5); + cfg.region_split_size = Some(ReadableSize(region_size * 4 / 5)); runnable = SplitCheckRunner::new(engine, tx.clone(), CoprocessorHost::new(tx, cfg)); runnable.run(SplitCheckTask::split_check( region.clone(), diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 1f4a33d7af7..8a1a5558c7d 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -149,7 +149,7 @@ impl SplitCheckObserver for SizeCheckObserver // Need to check size. host.add_checker(Box::new(Checker::new( host.cfg.region_max_size().0, - host.cfg.region_split_size.0, + host.cfg.region_split_size().0, host.cfg.batch_split_limit, policy, ))); @@ -186,7 +186,7 @@ impl SplitCheckObserver for SizeCheckObserver // Need to check size. host.add_checker(Box::new(Checker::new( host.cfg.region_max_size().0, - host.cfg.region_split_size.0, + host.cfg.region_split_size().0, host.cfg.batch_split_limit, policy, ))); @@ -420,7 +420,7 @@ pub mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { region_max_size: Some(ReadableSize(100)), - region_split_size: ReadableSize(60), + region_split_size: Some(ReadableSize(60)), region_max_keys: Some(1000000), region_split_keys: Some(1000000), batch_split_limit: 5, @@ -545,7 +545,7 @@ pub mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { region_max_size: Some(ReadableSize(50000)), - region_split_size: ReadableSize(50000), + region_split_size: Some(ReadableSize(50000)), region_max_keys: Some(1000000), region_split_keys: Some(1000000), batch_split_limit: 5, @@ -671,7 +671,7 @@ pub mod tests { let (tx, _rx) = mpsc::sync_channel(100); let mut cfg = Config { region_max_size: Some(ReadableSize(50000)), - region_split_size: ReadableSize(50000), + region_split_size: Some(ReadableSize(50000)), region_max_keys: Some(1000000), region_split_keys: Some(1000000), batch_split_limit: 5, @@ -736,7 +736,7 @@ pub mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { region_max_size: Some(ReadableSize(100)), - region_split_size: ReadableSize(60), + region_split_size: Some(ReadableSize(60)), region_max_keys: Some(1000000), region_split_keys: Some(1000000), batch_split_limit: 5, diff --git a/components/raftstore/src/coprocessor/split_check/table.rs b/components/raftstore/src/coprocessor/split_check/table.rs index 684e87e1693..eec7b15b9b3 100644 --- a/components/raftstore/src/coprocessor/split_check/table.rs +++ b/components/raftstore/src/coprocessor/split_check/table.rs @@ -326,7 +326,7 @@ mod tests { split_region_on_table: true, // Try to "disable" size split. region_max_size: Some(ReadableSize::gb(2)), - region_split_size: ReadableSize::gb(1), + region_split_size: Some(ReadableSize::gb(1)), // Try to "disable" keys split region_max_keys: Some(2000000000), region_split_keys: Some(1000000000), diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index bfaa2a6587e..bf46f07eabd 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -237,7 +237,10 @@ mod tests { cfg.raft_store.raftdb_path = raftdb_path.to_str().unwrap().to_owned(); cfg.raftdb.wal_dir = raftdb_wal_path.to_str().unwrap().to_owned(); cfg.raft_engine.mut_config().dir = raft_engine_path.to_str().unwrap().to_owned(); - let cache = cfg.storage.block_cache.build_shared_cache(); + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); // Dump logs from RocksEngine to RaftLogEngine. let raft_engine = RaftLogEngine::new( diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 3da6b0c4950..4fe397e9eb5 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -955,7 +955,7 @@ where self.config .raft_store .validate( - self.config.coprocessor.region_split_size, + self.config.coprocessor.region_split_size(), self.config.coprocessor.enable_region_bucket, self.config.coprocessor.region_bucket_size, ) @@ -1838,7 +1838,11 @@ impl TikvServer { &mut self, flow_listener: engine_rocks::FlowListener, ) -> (Engines, Arc) { - let block_cache = self.config.storage.block_cache.build_shared_cache(); + let block_cache = self + .config + .storage + .block_cache + .build_shared_cache(self.config.storage.engine); let env = self .config .build_shared_rocks_env(self.encryption_key_manager.clone(), get_io_rate_limiter()) @@ -2193,7 +2197,10 @@ mod test { config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); let env = Arc::new(Env::default()); let path = Builder::new().prefix("test-update").tempdir().unwrap(); - let cache = config.storage.block_cache.build_shared_cache(); + let cache = config + .storage + .block_cache + .build_shared_cache(config.storage.engine); let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index f193e1c7445..20d79e7cce5 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -814,7 +814,7 @@ where self.config .raft_store .validate( - self.config.coprocessor.region_split_size, + self.config.coprocessor.region_split_size(), self.config.coprocessor.enable_region_bucket, self.config.coprocessor.region_bucket_size, ) @@ -1452,7 +1452,11 @@ impl TikvServer { &mut self, flow_listener: engine_rocks::FlowListener, ) -> Arc { - let block_cache = self.config.storage.block_cache.build_shared_cache(); + let block_cache = self + .config + .storage + .block_cache + .build_shared_cache(self.config.storage.engine); let env = self .config .build_shared_rocks_env(self.encryption_key_manager.clone(), get_io_rate_limiter()) @@ -1799,7 +1803,10 @@ mod test { config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); let env = Arc::new(Env::default()); let path = Builder::new().prefix("test-update").tempdir().unwrap(); - let cache = config.storage.block_cache.build_shared_cache(); + let cache = config + .storage + .block_cache + .build_shared_cache(config.storage.engine); let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index e7818b3f888..d3a2ebade73 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -100,7 +100,7 @@ pub fn enter_snap_recovery_mode(config: &mut TikvConfig) { // Disable region split during recovering. config.coprocessor.region_max_size = Some(ReadableSize::gb(MAX_REGION_SIZE)); - config.coprocessor.region_split_size = ReadableSize::gb(MAX_REGION_SIZE); + config.coprocessor.region_split_size = Some(ReadableSize::gb(MAX_REGION_SIZE)); config.coprocessor.region_max_keys = Some(MAX_SPLIT_KEY); config.coprocessor.region_split_keys = Some(MAX_SPLIT_KEY); } @@ -314,7 +314,10 @@ pub fn create_local_engine_service( let env = config .build_shared_rocks_env(key_manager.clone(), None) .map_err(|e| format!("build shared rocks env: {}", e))?; - let block_cache = config.storage.block_cache.build_shared_cache(); + let block_cache = config + .storage + .block_cache + .build_shared_cache(config.storage.engine); // init rocksdb / kv db let factory = KvEngineFactoryBuilder::new(env.clone(), config, block_cache) diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 05ed8ece83d..78e1dbb36c3 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -239,7 +239,7 @@ impl Simulator for NodeCluster { let mut raft_store = cfg.raft_store.clone(); raft_store .validate( - cfg.coprocessor.region_split_size, + cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket, cfg.coprocessor.region_bucket_size, ) @@ -347,7 +347,7 @@ impl Simulator for NodeCluster { .map(|p| p.path().to_str().unwrap().to_owned()) ); - let region_split_size = cfg.coprocessor.region_split_size; + let region_split_size = cfg.coprocessor.region_split_size(); let enable_region_bucket = cfg.coprocessor.enable_region_bucket; let region_bucket_size = cfg.coprocessor.region_bucket_size; let mut raftstore_cfg = cfg.tikv.raft_store; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 63a0b4e4804..a17c65b8aec 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -499,7 +499,7 @@ impl ServerCluster { let mut raft_store = cfg.raft_store.clone(); raft_store .validate( - cfg.coprocessor.region_split_size, + cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket, cfg.coprocessor.region_bucket_size, ) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 4bcb99adca3..e765cfb883f 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -588,7 +588,10 @@ pub fn create_test_engine( data_key_manager_from_config(&cfg.security.encryption, dir.path().to_str().unwrap()) .unwrap() .map(Arc::new); - let cache = cfg.storage.block_cache.build_shared_cache(); + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); let env = cfg .build_shared_rocks_env(key_manager.clone(), limiter) .unwrap(); diff --git a/src/config/mod.rs b/src/config/mod.rs index 38d69f1ab29..7539fc13c63 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -89,6 +89,9 @@ pub const DEFAULT_ROCKSDB_SUB_DIR: &str = "db"; /// By default, block cache size will be set to 45% of system memory. pub const BLOCK_CACHE_RATE: f64 = 0.45; +/// Because multi-rocksdb has 25% memory table quota, we have to reduce block +/// cache a bit +pub const RAFTSTORE_V2_BLOCK_CACHE_RATE: f64 = 0.30; /// By default, TiKV will try to limit memory usage to 75% of system memory. pub const MEMORY_USAGE_LIMIT_RATE: f64 = 0.75; @@ -3230,9 +3233,14 @@ impl TikvConfig { self.raft_engine.validate()?; self.server.validate()?; self.pd.validate()?; + + // cannot pass EngineType directly as component raftstore cannot have dependency + // on tikv + self.coprocessor + .optimize_for(self.storage.engine == EngineType::RaftKv2); self.coprocessor.validate()?; self.raft_store.validate( - self.coprocessor.region_split_size, + self.coprocessor.region_split_size(), self.coprocessor.enable_region_bucket, self.coprocessor.region_bucket_size, )?; @@ -3447,7 +3455,7 @@ impl TikvConfig { "override coprocessor.region-split-size with raftstore.region-split-size, {:?}", self.raft_store.region_split_size ); - self.coprocessor.region_split_size = self.raft_store.region_split_size; + self.coprocessor.region_split_size = Some(self.raft_store.region_split_size); } self.raft_store.region_split_size = default_raft_store.region_split_size; } @@ -4167,7 +4175,10 @@ mod tests { use grpcio::ResourceQuota; use itertools::Itertools; use kvproto::kvrpcpb::CommandPri; - use raftstore::coprocessor::region_info_accessor::MockRegionInfoProvider; + use raftstore::coprocessor::{ + config::{LARGE_REGION_SPLIT_SIZE_MB, RAFTSTORE_V2_SPLIT_SIZE_MB, SPLIT_SIZE_MB}, + region_info_accessor::MockRegionInfoProvider, + }; use slog::Level; use tempfile::Builder; use tikv_kv::RocksEngine as RocksDBEngine; @@ -4595,8 +4606,11 @@ mod tests { &cfg.storage.data_dir, Some(cfg.rocksdb.build_opt(&resource, cfg.storage.engine)), cfg.rocksdb.build_cf_opts( - &cfg.rocksdb - .build_cf_resources(cfg.storage.block_cache.build_shared_cache()), + &cfg.rocksdb.build_cf_resources( + cfg.storage + .block_cache + .build_shared_cache(cfg.storage.engine), + ), None, cfg.storage.api_version(), cfg.storage.engine, @@ -5523,17 +5537,22 @@ mod tests { // on. default_cfg.readpool.storage.adjust_use_unified_pool(); default_cfg.readpool.coprocessor.adjust_use_unified_pool(); + default_cfg + .coprocessor + .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); default_cfg.security.redact_info_log = Some(false); default_cfg.coprocessor.region_max_size = Some(default_cfg.coprocessor.region_max_size()); default_cfg.coprocessor.region_max_keys = Some(default_cfg.coprocessor.region_max_keys()); + default_cfg.coprocessor.region_split_size = + Some(default_cfg.coprocessor.region_split_size()); default_cfg.coprocessor.region_split_keys = Some(default_cfg.coprocessor.region_split_keys()); default_cfg.raft_store.raft_log_gc_size_limit = - Some(default_cfg.coprocessor.region_split_size * 3 / 4); + Some(default_cfg.coprocessor.region_split_size() * 3 / 4); default_cfg.raft_store.raft_log_gc_count_limit = - Some(default_cfg.coprocessor.region_split_size * 3 / 4 / ReadableSize::kb(1)); + Some(default_cfg.coprocessor.region_split_size() * 3 / 4 / ReadableSize::kb(1)); default_cfg.raft_store.region_split_check_diff = - Some(default_cfg.coprocessor.region_split_size / 16); + Some(default_cfg.coprocessor.region_split_size() / 16); // Other special cases. cfg.pd.retry_max_count = default_cfg.pd.retry_max_count; // Both -1 and isize::MAX are the same. @@ -5567,10 +5586,58 @@ mod tests { cfg.raftdb.defaultcf.level0_stop_writes_trigger = None; cfg.raftdb.defaultcf.soft_pending_compaction_bytes_limit = None; cfg.raftdb.defaultcf.hard_pending_compaction_bytes_limit = None; + cfg.coprocessor + .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); assert_eq!(cfg, default_cfg); } + #[test] + fn test_region_size_config() { + let mut default_cfg = TikvConfig::default(); + default_cfg.coprocessor.optimize_for(false); + default_cfg.coprocessor.validate().unwrap(); + assert_eq!( + default_cfg.coprocessor.region_split_size(), + ReadableSize::mb(SPLIT_SIZE_MB) + ); + + let mut default_cfg = TikvConfig::default(); + default_cfg.coprocessor.enable_region_bucket = true; + default_cfg.coprocessor.optimize_for(false); + default_cfg.coprocessor.validate().unwrap(); + assert_eq!( + default_cfg.coprocessor.region_split_size(), + ReadableSize::mb(LARGE_REGION_SPLIT_SIZE_MB) + ); + + let mut default_cfg = TikvConfig::default(); + default_cfg.coprocessor.optimize_for(true); + default_cfg.coprocessor.validate().unwrap(); + assert_eq!( + default_cfg.coprocessor.region_split_size(), + ReadableSize::mb(RAFTSTORE_V2_SPLIT_SIZE_MB) + ); + + let mut default_cfg = TikvConfig::default(); + default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); + default_cfg.coprocessor.optimize_for(false); + default_cfg.coprocessor.validate().unwrap(); + assert_eq!( + default_cfg.coprocessor.region_split_size(), + ReadableSize::mb(500) + ); + + let mut default_cfg = TikvConfig::default(); + default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); + default_cfg.coprocessor.optimize_for(true); + default_cfg.coprocessor.validate().unwrap(); + assert_eq!( + default_cfg.coprocessor.region_split_size(), + ReadableSize::mb(500) + ); + } + #[test] fn test_compatibility_with_old_config_template() { let mut buf = Vec::new(); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index ff06e41cc57..413adf0d415 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -258,7 +258,10 @@ mod tests { e ); }); - let cache = cfg.storage.block_cache.build_shared_cache(); + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); let dir = test_util::temp_dir("test-engine-factory", false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); diff --git a/src/storage/config.rs b/src/storage/config.rs index d74bd721104..f65ed15cece 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -14,7 +14,7 @@ use tikv_util::{ sys::SysQuota, }; -use crate::config::{BLOCK_CACHE_RATE, MIN_BLOCK_CACHE_SHARD_SIZE}; +use crate::config::{BLOCK_CACHE_RATE, MIN_BLOCK_CACHE_SHARD_SIZE, RAFTSTORE_V2_BLOCK_CACHE_RATE}; pub const DEFAULT_DATA_DIR: &str = "./"; const DEFAULT_GC_RATIO_THRESHOLD: f64 = 1.1; @@ -240,14 +240,18 @@ impl BlockCacheConfig { } } - pub fn build_shared_cache(&self) -> Cache { + pub fn build_shared_cache(&self, engine_type: EngineType) -> Cache { if self.shared == Some(false) { warn!("storage.block-cache.shared is deprecated, cache is always shared."); } let capacity = match self.capacity { None => { let total_mem = SysQuota::memory_limit_in_bytes(); - ((total_mem as f64) * BLOCK_CACHE_RATE) as usize + if engine_type == EngineType::RaftKv2 { + ((total_mem as f64) * RAFTSTORE_V2_BLOCK_CACHE_RATE) as usize + } else { + ((total_mem as f64) * BLOCK_CACHE_RATE) as usize + } } Some(c) => c.0 as usize, }; diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index d15a33742ba..aff54a41faa 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -96,7 +96,8 @@ impl TestEngineBuilder { if !enable_block_cache { cache_opt.capacity = Some(ReadableSize::kb(0)); } - let shared = cfg_rocksdb.build_cf_resources(cache_opt.build_shared_cache()); + let shared = + cfg_rocksdb.build_cf_resources(cache_opt.build_shared_cache(EngineType::RaftKv)); let cfs_opts = cfs .iter() .map(|cf| match *cf { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 7429ed8900b..6273bc3d54c 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4195,8 +4195,9 @@ mod tests { let engine = { let path = "".to_owned(); let cfg_rocksdb = db_config; - let shared = - cfg_rocksdb.build_cf_resources(BlockCacheConfig::default().build_shared_cache()); + let shared = cfg_rocksdb.build_cf_resources( + BlockCacheConfig::default().build_shared_cache(EngineType::RaftKv), + ); let cfs_opts = vec![ ( CF_DEFAULT, diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 416116c833b..09e87bb8d4d 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -175,7 +175,7 @@ fn gen_split_region() -> (Region, Region, Region) { let region_split_size = 30000; cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(20); cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); - cluster.cfg.coprocessor.region_split_size = ReadableSize(region_split_size); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); let mut range = 1..; cluster.run(); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index bb35b069a41..61ec0d1f3f4 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -726,7 +726,7 @@ fn test_serde_custom_tikv_config() { split_region_on_table: false, batch_split_limit: 1, region_max_size: Some(ReadableSize::mb(12)), - region_split_size: ReadableSize::mb(12), + region_split_size: Some(ReadableSize::mb(12)), region_max_keys: Some(100000), region_split_keys: Some(100000), consistency_check_method: ConsistencyCheckMethod::Raw, diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 10771c57863..23c3b0b41c2 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -152,7 +152,7 @@ fn test_server_split_region_twice() { fn test_auto_split_region(cluster: &mut Cluster) { cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(100); cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(REGION_MAX_SIZE)); - cluster.cfg.coprocessor.region_split_size = ReadableSize(REGION_SPLIT_SIZE); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(REGION_SPLIT_SIZE)); let check_size_diff = cluster.cfg.raft_store.region_split_check_diff().0; let mut range = 1..; @@ -564,7 +564,7 @@ fn test_split_region_diff_check(cluster: &mut Cluster) { cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(20); cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); - cluster.cfg.coprocessor.region_split_size = ReadableSize(region_split_size); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); let mut range = 1..; @@ -630,7 +630,7 @@ fn test_node_split_region_after_reboot_with_config_change() { cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(20); cluster.cfg.coprocessor.enable_region_bucket = true; cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); - cluster.cfg.coprocessor.region_split_size = ReadableSize(region_split_size); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(region_split_size); cluster.run(); @@ -646,7 +646,7 @@ fn test_node_split_region_after_reboot_with_config_change() { // change the config to make the region splittable cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size / 3)); - cluster.cfg.coprocessor.region_split_size = ReadableSize(region_split_size / 3); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size / 3)); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(region_split_size / 3); cluster.stop_node(1); cluster.run_node(1).unwrap(); diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index dc0a85bc9c2..921dcf3615f 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -149,7 +149,10 @@ fn test_delete_files_in_range_for_titan() { // Set configs and create engines let mut cfg = TikvConfig::default(); - let cache = cfg.storage.block_cache.build_shared_cache(); + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); cfg.rocksdb.titan.enabled = true; cfg.rocksdb.titan.disable_gc = true; cfg.rocksdb.titan.purge_obsolete_files_period = ReadableDuration::secs(1); From 8484ececb571a28094f3d316fcb6b71f7b2ff12e Mon Sep 17 00:00:00 2001 From: 3pointer Date: Sat, 4 Feb 2023 21:29:55 +0800 Subject: [PATCH 0489/1149] log-backup: support CA-bundle certifications (#14081) ref tikv/tikv#13867, ref pingcap/tidb#38775 --- .../src/metadata/store/lazy_etcd.rs | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 37ffbad37c4..7e8b7881070 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -7,7 +7,10 @@ use std::{ use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; use futures::Future; -use openssl::x509::verify::X509VerifyFlags; +use openssl::{ + pkey::PKey, + x509::{verify::X509VerifyFlags, X509}, +}; use security::SecurityManager; use tikv_util::{ info, @@ -59,7 +62,20 @@ impl ConnectionConfig { // We haven't make it configurable because it is enabled in gRPC by default too. // TODO: Perhaps implement grpc-io based etcd client, fully remove the difference between gRPC TLS and our custom TLS? .manually(|c| c.cert_store_mut().set_flags(X509VerifyFlags::PARTIAL_CHAIN)) - .client_cert_pem_and_key(&tls.client_cert, &tls.client_key.0), + .manually(|c| { + let mut client_certs= X509::stack_from_pem(&tls.client_cert)?; + let client_key = PKey::private_key_from_pem(&tls.client_key.0)?; + if !client_certs.is_empty() { + c.set_certificate(&client_certs[0])?; + } + if client_certs.len() > 1 { + for i in client_certs.drain(1..) { + c.add_extra_chain_cert(i)?; + } + } + c.set_private_key(&client_key)?; + Ok(()) + }), ) } opts = opts From 14dd46d82e807933f7a2ee237632152aa5ea5e9f Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Mon, 6 Feb 2023 13:07:57 +0800 Subject: [PATCH 0490/1149] rawkv: fix flaky integration test case `test_raw_put_key_guard` (#14140) close tikv/tikv#14141 rawkv: fix flaky integration test case `test_raw_put_key_guard`. Signed-off-by: Ping Yu --- tests/failpoints/cases/test_rawkv.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index 274a458958e..e228e82830c 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -276,7 +276,7 @@ fn test_region_merge() { suite.stop(); } -// Verify the raw key guard correctness in apiv2 +// Verify the raw key guard correctness in APIv2. #[test] fn test_raw_put_key_guard() { let mut suite = TestSuite::new(3, ApiVersion::V2); @@ -296,12 +296,19 @@ fn test_raw_put_key_guard() { let copy_test_key = test_key.clone(); let copy_test_value = test_value.clone(); - let apply_wait_timeout = 2000; // ms, assume send request and apply can be finished in 2s. fail::cfg(pause_write_fp, "pause").unwrap(); let handle = thread::spawn(move || { must_raw_put(&client, ctx, copy_test_key, copy_test_value); }); - thread::sleep(Duration::from_millis(apply_wait_timeout)); + + // Wait for global_min_lock_ts. + sleep_ms(500); + let start = Instant::now(); + while leader_cm.global_min_lock_ts().is_none() + && start.saturating_elapsed() < Duration::from_secs(5) + { + sleep_ms(200); + } // Before raw_put finish, min_ts should be the ts of "key guard" of the raw_put // request. From 41a89be5c36ece45084a56143ba387b1c8840055 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 6 Feb 2023 13:55:56 +0800 Subject: [PATCH 0491/1149] Integration test: use proc-macro to reuse test cases. (#14133) ref tikv/tikv#12842 Add proc-macro test_case to reuse test cases. Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- Cargo.lock | 10 ++ Cargo.toml | 2 + components/test_raftstore_macro/Cargo.toml | 13 ++ components/test_raftstore_macro/src/lib.rs | 151 +++++++++++++++++ scripts/check-bins.py | 2 +- tests/Cargo.toml | 1 + tests/integrations/raftstore/test_single.rs | 174 +++++++++----------- 7 files changed, 252 insertions(+), 101 deletions(-) create mode 100644 components/test_raftstore_macro/Cargo.toml create mode 100644 components/test_raftstore_macro/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index dd2869a7b10..0872b28c827 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5940,6 +5940,15 @@ dependencies = [ "txn_types", ] +[[package]] +name = "test_raftstore_macro" +version = "0.0.1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "test_sst_importer" version = "0.1.0" @@ -6051,6 +6060,7 @@ dependencies = [ "test_pd", "test_pd_client", "test_raftstore", + "test_raftstore_macro", "test_sst_importer", "test_storage", "test_util", diff --git a/Cargo.toml b/Cargo.toml index d76dce26a18..f7d44c94866 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -280,6 +280,7 @@ members = [ "components/test_pd", "components/test_pd_client", "components/test_raftstore", + "components/test_raftstore_macro", "components/test_sst_importer", "components/test_storage", "components/test_util", @@ -355,6 +356,7 @@ example_coprocessor_plugin = { path = "components/test_coprocessor_plugin/exampl test_pd = { path = "components/test_pd" } test_pd_client = { path = "components/test_pd_client" } test_raftstore = { path = "components/test_raftstore", default-features = false } +test_raftstore_macro = { path = "components/test_raftstore_macro" } test_sst_importer = { path = "components/test_sst_importer" } test_storage = { path = "components/test_storage", default-features = false } test_util = { path = "components/test_util" } diff --git a/components/test_raftstore_macro/Cargo.toml b/components/test_raftstore_macro/Cargo.toml new file mode 100644 index 00000000000..7a05f56ed3d --- /dev/null +++ b/components/test_raftstore_macro/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "test_raftstore_macro" +version = "0.0.1" +edition = "2018" +publish = false + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1" +syn = { version = "1", features = ["full", "extra-traits"] } diff --git a/components/test_raftstore_macro/src/lib.rs b/components/test_raftstore_macro/src/lib.rs new file mode 100644 index 00000000000..59a2c6f1273 --- /dev/null +++ b/components/test_raftstore_macro/src/lib.rs @@ -0,0 +1,151 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use proc_macro::TokenStream; +use proc_macro2::{TokenStream as TokenStream2, TokenTree}; +use quote::{quote, ToTokens}; +use syn::{parse_macro_input, parse_quote, Ident, ItemFn, Path}; + +/// test_case generate test cases using cluster creation method provided. +/// +/// ex: +/// #[test_case(test_raftstore::new_node_cluster)] +/// #[test_case(test_raftstore::new_server_cluster)] +/// #[test_case(test_raftstore_v2::new_node_cluster)] +/// fn test_something() { +/// let cluster = new_cluster(...) +/// } +/// +/// It generates three test cases as following: +/// +/// #[cfg(test)] +/// mod test_something { +/// #[test] +/// fn test_raftstore_new_node_cluster() { +/// use test_raftstore::new_node_cluster as new_cluster; +/// let mut cluster = new_cluster(0, 1); +/// } +/// +/// #[test] +/// fn test_raftstore_new_server_cluster() { +/// use test_raftstore::new_server_cluster as new_cluster; +/// let mut cluster = new_cluster(0, 1); +/// } +/// +/// #[test] +/// fn test_raftstore_v2_new_server_cluster() { +/// use test_raftstore::test_raftstore_v2 as new_cluster; +/// let mut cluster = new_cluster(0, 1); +/// } +/// } +#[proc_macro_attribute] +pub fn test_case(arg: TokenStream, input: TokenStream) -> TokenStream { + let mut fn_item = parse_macro_input!(input as ItemFn); + let mut test_cases = vec![TokenStream2::from(arg)]; + let mut attrs_to_remove = vec![]; + + let legal_test_case_name: Path = parse_quote!(test_case); + for (idx, attr) in fn_item.attrs.iter().enumerate() { + if legal_test_case_name == attr.path { + test_cases.push(attr.into_token_stream()); + attrs_to_remove.push(idx); + } + } + + for i in attrs_to_remove.into_iter().rev() { + fn_item.attrs.swap_remove(i); + } + + render_test_cases(test_cases, fn_item.clone()) +} + +fn render_test_cases(test_cases: Vec, fn_item: ItemFn) -> TokenStream { + let mut rendered_test_cases: Vec = vec![]; + for case in test_cases { + let mut item = fn_item.clone(); + + // Parse test case to get the package name and the method name + let (package, method) = parse_test_case(case); + let test_name = format!("{}_{}", package, method); + // Insert a use statment at the beginning of the test, + // ex: " use test_raftstore::new_node_cluster as new_cluster ", so we can use + // new_cluster in all situations. + item.block.stmts.insert( + 0, + syn::parse( + quote! { + use #package::#method as new_cluster; + } + .into(), + ) + .unwrap(), + ); + item.attrs.insert(0, parse_quote! { #[test] }); + let method_name = Ident::new(&test_name, item.sig.ident.span()); + item.sig.ident = method_name; + + rendered_test_cases.push(item.to_token_stream()); + } + + let mod_name = fn_item.sig.ident; + let output = quote! { + #[cfg(test)] + mod #mod_name { + #[allow(unused_imports)] + use super::*; + + #(#rendered_test_cases)* + } + }; + + output.into() +} + +// Parsing test case to get package name and method name. +// There are two cases that need to be considered +// 1. the first token is Ident type +// 2. the first token is Punct type +// +// use the following case as an example +// #[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore::new_server_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +// fn test_something() {} +// +// The first case ( #[test_case(test_raftstore::new_node_cluster)] ) +// will be passed to the proc-macro "test_case" as the first argument and the +// #[test_case(...)] will be stripped off automatically. So the first token is +// the Ident type, namely "test_raftstore". +// +// The other two cases are in the `attr` fileds of ItemFn, and +// #[test_case(...)] are untouched. So the first token is Punct type. +fn parse_test_case(test_case: TokenStream2) -> (Ident, Ident) { + let mut iter = test_case.into_iter(); + let package = match iter.next().unwrap() { + // ex: test_raftstore::new_node_cluster + TokenTree::Ident(package) => package, + // ex: #[test_raftstore::new_node_cluster] + TokenTree::Punct(_) => match iter.next().unwrap() { + TokenTree::Group(group) => { + let mut iter = group.stream().into_iter(); + iter.next(); + match iter.next().unwrap() { + TokenTree::Group(group) => { + let stream = group.stream(); + return parse_test_case(stream); + } + _ => panic!("Invalid token stream"), + } + } + _ => panic!("Invalid token stream"), + }, + _ => panic!("Invalid token stream"), + }; + // Skip two ':' + iter.next(); + iter.next(); + let method = match iter.next().unwrap() { + TokenTree::Ident(method) => method, + _ => panic!("Invalid token stream"), + }; + (package, method) +} diff --git a/scripts/check-bins.py b/scripts/check-bins.py index aaa13e6b9de..1255472a76a 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -14,7 +14,7 @@ "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_coprocessor_plugin", "memory_trace_macros", "case_macros", - "tracker" + "tracker", "test_raftstore_macro" } JEMALLOC_SYMBOL = ["je_arena_boot", " malloc"] diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 1cc0e6bce87..96ee19e9bae 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -143,6 +143,7 @@ test_coprocessor = { workspace = true } test_pd = { workspace = true } test_pd_client = { workspace = true } test_raftstore = { workspace = true } +test_raftstore_macro = { workspace = true } test_sst_importer = { workspace = true } test_storage = { workspace = true } test_util = { workspace = true } diff --git a/tests/integrations/raftstore/test_single.rs b/tests/integrations/raftstore/test_single.rs index 73944428953..b7fcb6a7b34 100644 --- a/tests/integrations/raftstore/test_single.rs +++ b/tests/integrations/raftstore/test_single.rs @@ -6,11 +6,59 @@ use engine_traits::{CfName, CF_DEFAULT, CF_WRITE}; use raftstore::store::*; use rand::prelude::*; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::*, time::Instant}; // TODO add epoch not match test cases. -fn test_put(cluster: &mut Cluster) { +fn test_delete_range(cluster: &mut Cluster, cf: CfName) { + let data_set: Vec<_> = (1..500) + .map(|i| { + ( + format!("key{:08}", i).into_bytes(), + format!("value{}", i).into_bytes(), + ) + }) + .collect(); + for kvs in data_set.chunks(50) { + let requests = kvs.iter().map(|(k, v)| new_put_cf_cmd(cf, k, v)).collect(); + // key9 is always the last region. + cluster.batch_put(b"key9", requests).unwrap(); + } + + // delete_range request with notify_only set should not actually delete data. + cluster.must_notify_delete_range_cf(cf, b"", b""); + + let mut rng = rand::thread_rng(); + for _ in 0..50 { + let (k, v) = data_set.choose(&mut rng).unwrap(); + assert_eq!(cluster.get_cf(cf, k).unwrap(), *v); + } + + // Empty keys means the whole range. + cluster.must_delete_range_cf(cf, b"", b""); + + for _ in 0..50 { + let k = &data_set.choose(&mut rng).unwrap().0; + assert!(cluster.get_cf(cf, k).is_none()); + } +} + +fn test_put_large_entry(cluster: &mut Cluster) { + let max_size: usize = 1024; + cluster.cfg.raft_store.raft_entry_max_size = ReadableSize(max_size as u64); + + cluster.run(); + + let large_value = vec![b'v'; max_size + 1]; + let res = cluster.put(b"key", large_value.as_slice()); + assert!(res.as_ref().err().unwrap().has_raft_entry_too_large()); +} + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +fn test_put() { + let mut cluster = new_cluster(0, 1); cluster.run(); let mut data_set: Vec<_> = (1..1000) @@ -53,7 +101,10 @@ fn test_put(cluster: &mut Cluster) { } } -fn test_delete(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +fn test_delete() { + let mut cluster = new_cluster(0, 1); cluster.run(); let data_set: Vec<_> = (1..1000) @@ -80,40 +131,30 @@ fn test_delete(cluster: &mut Cluster) { } } -fn test_delete_range(cluster: &mut Cluster, cf: CfName) { - let data_set: Vec<_> = (1..500) - .map(|i| { - ( - format!("key{:08}", i).into_bytes(), - format!("value{}", i).into_bytes(), - ) - }) - .collect(); - for kvs in data_set.chunks(50) { - let requests = kvs.iter().map(|(k, v)| new_put_cf_cmd(cf, k, v)).collect(); - // key9 is always the last region. - cluster.batch_put(b"key9", requests).unwrap(); - } - - // delete_range request with notify_only set should not actually delete data. - cluster.must_notify_delete_range_cf(cf, b"", b""); - - let mut rng = rand::thread_rng(); - for _ in 0..50 { - let (k, v) = data_set.choose(&mut rng).unwrap(); - assert_eq!(cluster.get_cf(cf, k).unwrap(), *v); - } - - // Empty keys means the whole range. - cluster.must_delete_range_cf(cf, b"", b""); +#[test] +fn test_node_use_delete_range() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.use_delete_range = true; + cluster.run(); + test_delete_range(&mut cluster, CF_DEFAULT); + // Prefix bloom filter is always enabled in the Write CF. + test_delete_range(&mut cluster, CF_WRITE); +} - for _ in 0..50 { - let k = &data_set.choose(&mut rng).unwrap().0; - assert!(cluster.get_cf(cf, k).is_none()); - } +#[test] +fn test_node_not_use_delete_range() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.use_delete_range = false; + cluster.run(); + test_delete_range(&mut cluster, CF_DEFAULT); + // Prefix bloom filter is always enabled in the Write CF. + test_delete_range(&mut cluster, CF_WRITE); } -fn test_wrong_store_id(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +fn test_wrong_store_id() { + let mut cluster = new_cluster(0, 1); cluster.run(); let (k, v) = (b"k", b"v"); @@ -137,73 +178,6 @@ fn test_wrong_store_id(cluster: &mut Cluster) { ); } -fn test_put_large_entry(cluster: &mut Cluster) { - let max_size: usize = 1024; - cluster.cfg.raft_store.raft_entry_max_size = ReadableSize(max_size as u64); - - cluster.run(); - - let large_value = vec![b'v'; max_size + 1]; - let res = cluster.put(b"key", large_value.as_slice()); - assert!(res.as_ref().err().unwrap().has_raft_entry_too_large()); -} - -#[test] -fn test_node_put() { - let mut cluster = new_node_cluster(0, 1); - test_put(&mut cluster); -} - -#[test] -fn test_node_delete() { - let mut cluster = new_node_cluster(0, 1); - test_delete(&mut cluster); -} - -#[test] -fn test_node_use_delete_range() { - let mut cluster = new_node_cluster(0, 1); - cluster.cfg.raft_store.use_delete_range = true; - cluster.run(); - test_delete_range(&mut cluster, CF_DEFAULT); - // Prefix bloom filter is always enabled in the Write CF. - test_delete_range(&mut cluster, CF_WRITE); -} - -#[test] -fn test_node_not_use_delete_range() { - let mut cluster = new_node_cluster(0, 1); - cluster.cfg.raft_store.use_delete_range = false; - cluster.run(); - test_delete_range(&mut cluster, CF_DEFAULT); - // Prefix bloom filter is always enabled in the Write CF. - test_delete_range(&mut cluster, CF_WRITE); -} - -#[test] -fn test_node_wrong_store_id() { - let mut cluster = new_node_cluster(0, 1); - test_wrong_store_id(&mut cluster); -} - -#[test] -fn test_server_put() { - let mut cluster = new_server_cluster(0, 1); - test_put(&mut cluster); -} - -#[test] -fn test_server_delete() { - let mut cluster = new_server_cluster(0, 1); - test_delete(&mut cluster); -} - -#[test] -fn test_server_wrong_store_id() { - let mut cluster = new_server_cluster(0, 1); - test_wrong_store_id(&mut cluster); -} - #[test] fn test_node_put_large_entry() { let mut cluster = new_node_cluster(0, 1); From 7c20add6cef90ac231db6d10374856f23d89e3f1 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 6 Feb 2023 16:31:57 +0800 Subject: [PATCH 0492/1149] raftstore, storage: return FlashbackNotPrepared error if the flashback commit check failed (#14145) close tikv/tikv#14143, ref tikv/tikv#14143 As https://github.com/tikv/tikv/issues/14143 mentioned, flashback should not return `TxnLockNotFound` error to the client if the flashback commit check failed, which will cause TiDB to retry the flashback forever. This PR changes this error to `FlashbackNotPrepared` to match the client handling logic. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/error_code/src/storage.rs | 1 + components/raftstore/src/errors.rs | 7 ++++--- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/util.rs | 2 +- components/raftstore/src/store/worker/read.rs | 2 +- etc/error_code.toml | 10 ++++++++++ src/storage/errors.rs | 9 +++++++++ src/storage/txn/actions/flashback_to_version.rs | 13 +++++++------ .../txn/commands/flashback_to_version_read_phase.rs | 1 + src/storage/txn/mod.rs | 7 +++++++ tests/integrations/server/kv_service.rs | 3 ++- 12 files changed, 45 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0872b28c827..485aeb43c52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2732,7 +2732,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#2b853bed812556901846f42820b63d8a0d9c8d24" +source = "git+https://github.com/pingcap/kvproto.git#eccad3776d7b076da68d6c51fb7506b8562b9802" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/storage.rs b/components/error_code/src/storage.rs index ff994032dea..e2cf34094c3 100644 --- a/components/error_code/src/storage.rs +++ b/components/error_code/src/storage.rs @@ -21,6 +21,7 @@ define_error_codes!( BAD_FORMAT_WRITE => ("BadFormatWrite", "",""), KEY_IS_LOCKED => ("KeyIsLocked", "", ""), MAX_TIMESTAMP_NOT_SYNCED => ("MaxTimestampNotSynced", "", ""), + FLASHBACK_NOT_PREPARED => ("FlashbackNotPrepared", "", ""), DEADLINE_EXCEEDED => ("DeadlineExceeded", "", ""), API_VERSION_NOT_MATCHED => ("ApiVersionNotMatched", "", ""), INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 36fcec7f1f3..5deef832723 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -58,8 +58,8 @@ pub enum Error { #[error("region {0} is in the recovery progress")] RecoveryInProgress(u64), - #[error("region {0} is in the flashback progress")] - FlashbackInProgress(u64), + #[error("region {0} is in the flashback progress with start_ts {1}")] + FlashbackInProgress(u64, u64), #[error("region {0} not prepared the flashback")] FlashbackNotPrepared(u64), @@ -256,9 +256,10 @@ impl From for errorpb::Error { e.set_region_id(region_id); errorpb.set_recovery_in_progress(e); } - Error::FlashbackInProgress(region_id) => { + Error::FlashbackInProgress(region_id, flashback_start_ts) => { let mut e = errorpb::FlashbackInProgress::default(); e.set_region_id(region_id); + e.set_flashback_start_ts(flashback_start_ts); errorpb.set_flashback_in_progress(e); } Error::FlashbackNotPrepared(region_id) => { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index d5b73e5f721..05b443be4eb 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5234,7 +5234,7 @@ where true, ) { match e { - Error::FlashbackInProgress(_) => self + Error::FlashbackInProgress(..) => self .ctx .raft_metrics .invalid_proposal diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 0344adb2b92..0127cc5c7e6 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -364,7 +364,7 @@ pub fn check_flashback_state( return Ok(()); } } - return Err(Error::FlashbackInProgress(region_id)); + return Err(Error::FlashbackInProgress(region_id, flashback_start_ts)); } // If the region is not in the flashback state, the flashback request itself // should be rejected. diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5d6835666b4..379af09eb2e 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -830,7 +830,7 @@ where Error::FlashbackNotPrepared(_) => { m.borrow_mut().reject_reason.flashback_not_prepared.inc() } - Error::FlashbackInProgress(_) => { + Error::FlashbackInProgress(..) => { m.borrow_mut().reject_reason.flashback_in_progress.inc() } _ => unreachable!(), diff --git a/etc/error_code.toml b/etc/error_code.toml index bb23c9b5e26..4fae4d9ea57 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -563,6 +563,11 @@ error = ''' KV:SstImporter:InvalidKeyMode ''' +["KV:SstImporter:ResourceNotEnough"] +error = ''' +KV:SstImporter:ResourceNotEnough +''' + ["KV:Storage:Timeout"] error = ''' KV:Storage:Timeout @@ -653,6 +658,11 @@ error = ''' KV:Storage:MaxTimestampNotSynced ''' +["KV:Storage:FlashbackNotPrepared"] +error = ''' +KV:Storage:FlashbackNotPrepared +''' + ["KV:Storage:DeadlineExceeded"] error = ''' KV:Storage:DeadlineExceeded diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 2b41cf23ea2..92568d22e45 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -255,6 +255,15 @@ pub fn extract_region_error_from_error(e: &Error) -> Option { err.set_max_timestamp_not_synced(Default::default()); Some(err) } + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::FlashbackNotPrepared( + region_id, + )))) => { + let mut err = errorpb::Error::default(); + let mut flashback_not_prepared_err = errorpb::FlashbackNotPrepared::default(); + flashback_not_prepared_err.set_region_id(*region_id); + err.set_flashback_not_prepared(flashback_not_prepared_err); + Some(err) + } Error(box ErrorInner::SchedTooBusy) => { let mut err = errorpb::Error::default(); let mut server_is_busy_err = errorpb::ServerIsBusy::default(); diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index f44854159c0..bb0c95eb935 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -233,6 +233,7 @@ pub fn check_flashback_commit( key_to_commit: &Key, flashback_start_ts: TimeStamp, flashback_commit_ts: TimeStamp, + region_id: u64, ) -> TxnResult { match reader.load_lock(key_to_commit)? { // If the lock exists, it means the flashback hasn't been finished. @@ -241,7 +242,7 @@ pub fn check_flashback_commit( return Ok(false); } error!( - "check flashback commit exception: lock not found"; + "check flashback commit exception: lock record mismatched"; "key_to_commit" => log_wrappers::Value::key(key_to_commit.as_encoded()), "flashback_start_ts" => flashback_start_ts, "flashback_commit_ts" => flashback_commit_ts, @@ -266,11 +267,11 @@ pub fn check_flashback_commit( ); } } - Err(txn::Error::from_mvcc(mvcc::ErrorInner::TxnLockNotFound { - start_ts: flashback_start_ts, - commit_ts: flashback_commit_ts, - key: key_to_commit.to_raw()?, - })) + // If both the flashback lock and commit records are mismatched, it means + // the current region is not in the flashback state. + Err(txn::Error::from(txn::ErrorInner::FlashbackNotPrepared( + region_id, + ))) } pub fn get_first_user_key( diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 7fdc86288c2..4be0239aad2 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -213,6 +213,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { &start_key, self.start_ts, self.commit_ts, + self.ctx.get_region_id(), )? { statistics.add(&reader.statistics); return Ok(ProcessResult::Res); diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index d3b199208cb..f43e309f503 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -141,6 +141,9 @@ pub enum ErrorInner { start_ts: {start_ts}, region_id: {region_id}" )] MaxTimestampNotSynced { region_id: u64, start_ts: TimeStamp }, + + #[error("region {0} not prepared the flashback")] + FlashbackNotPrepared(u64), } impl ErrorInner { @@ -174,6 +177,9 @@ impl ErrorInner { region_id, start_ts, }), + ErrorInner::FlashbackNotPrepared(region_id) => { + Some(ErrorInner::FlashbackNotPrepared(region_id)) + } ErrorInner::Other(_) | ErrorInner::ProtoBuf(_) | ErrorInner::Io(_) => None, } } @@ -224,6 +230,7 @@ impl ErrorCodeExt for Error { ErrorInner::MaxTimestampNotSynced { .. } => { error_code::storage::MAX_TIMESTAMP_NOT_SYNCED } + ErrorInner::FlashbackNotPrepared(_) => error_code::storage::FLASHBACK_NOT_PREPARED, } } } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 61a3fb39097..30dd3b120ca 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -800,7 +800,8 @@ fn test_mvcc_flashback_unprepared() { req.set_start_key(b"a".to_vec()); req.set_end_key(b"z".to_vec()); let resp = client.kv_flashback_to_version(&req).unwrap(); - assert!(resp.get_error().contains("txn lock not found")); + assert!(resp.get_region_error().has_flashback_not_prepared()); + assert!(resp.get_error().is_empty()); must_kv_read_equal(&client, ctx.clone(), k.clone(), v, 6); // Flashback with preparing. must_flashback_to_version(&client, ctx.clone(), 0, 6, 7); From 2e7aede3d8d11b07b0c83d920f06238790861968 Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 7 Feb 2023 11:33:57 +0800 Subject: [PATCH 0493/1149] raftstore: support dynamically resize the count of async ios. (#13965) close tikv/tikv#13964 Support dynamically modify the count of async-ios. Signed-off-by: Lucasliang --- .../raftstore/src/store/async_io/write.rs | 143 ++++++++++--- .../src/store/async_io/write_router.rs | 91 +++++++-- components/raftstore/src/store/config.rs | 28 ++- components/raftstore/src/store/fsm/store.rs | 21 +- .../src/store/worker/refresh_config.rs | 100 ++++++++- .../integrations/config/dynamic/raftstore.rs | 69 ++++++- .../integrations/raftstore/test_scale_pool.rs | 189 ++++++++++++++++++ 7 files changed, 572 insertions(+), 69 deletions(-) diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 4d8392edd55..9b25d7de806 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -21,10 +21,11 @@ use engine_traits::{ use error_code::ErrorCodeExt; use fail::fail_point; use kvproto::raft_serverpb::{RaftLocalState, RaftMessage}; +use parking_lot::Mutex; use protobuf::Message; use raft::eraftpb::Entry; use resource_control::{ - channel::{bounded, Receiver, Sender}, + channel::{bounded, Receiver}, ResourceController, ResourceMetered, }; use tikv_util::{ @@ -37,7 +38,7 @@ use tikv_util::{ warn, }; -use super::write_router::WriteSenders; +use super::write_router::{SharedSenders, WriteSenders}; use crate::{ store::{ config::Config, @@ -874,22 +875,41 @@ where } } +#[derive(Clone)] +pub struct StoreWritersContext +where + EK: KvEngine, + ER: RaftEngine, + T: Transport + 'static, + N: PersistedNotifier, +{ + pub store_id: u64, + pub raft_engine: ER, + pub kv_engine: Option, + pub transfer: T, + pub notifier: N, + pub cfg: Arc>, +} + +#[derive(Clone)] pub struct StoreWriters where EK: KvEngine, ER: RaftEngine, { resource_ctl: Option>, - writers: Vec>>, - handlers: Vec>, + /// Mailboxes for sending raft messages to async ios. + writers: Arc>>, + /// Background threads for handling asynchronous messages. + handlers: Arc>>>, } impl StoreWriters { pub fn new(resource_ctl: Option>) -> Self { Self { resource_ctl, - writers: vec![], - handlers: vec![], + writers: Arc::new(VersionTrack::default()), + handlers: Arc::new(Mutex::new(vec![])), } } } @@ -913,42 +933,99 @@ where cfg: &Arc>, ) -> Result<()> { let pool_size = cfg.value().store_io_pool_size; - for i in 0..pool_size { - let tag = format!("store-writer-{}", i); - let (tx, rx) = bounded( - self.resource_ctl.clone(), - cfg.value().store_io_notify_capacity, - ); - let mut worker = Worker::new( - store_id, - tag.clone(), - raft_engine.clone(), - kv_engine.clone(), - rx, - notifier.clone(), - trans.clone(), - cfg, - ); - info!("starting store writer {}", i); - let t = thread::Builder::new() - .name(thd_name!(tag)) - .spawn_wrapper(move || { - worker.run(); - })?; - self.writers.push(tx); - self.handlers.push(t); + if pool_size > 0 { + self.increase_to( + pool_size, + StoreWritersContext { + store_id, + notifier: notifier.clone(), + raft_engine, + kv_engine, + transfer: trans.clone(), + cfg: cfg.clone(), + }, + )?; } Ok(()) } pub fn shutdown(&mut self) { - assert_eq!(self.writers.len(), self.handlers.len()); - for (i, handler) in self.handlers.drain(..).enumerate() { + let mut handlers = self.handlers.lock(); + let writers = self.writers.value().get(); + assert_eq!(writers.len(), handlers.len()); + for (i, handler) in handlers.drain(..).enumerate() { info!("stopping store writer {}", i); - self.writers[i].send(WriteMsg::Shutdown, 0).unwrap(); + writers[i].send(WriteMsg::Shutdown, 0).unwrap(); handler.join().unwrap(); } } + + #[inline] + /// Returns the valid size of store writers. + pub fn size(&self) -> usize { + self.writers.value().get().len() + } + + pub fn decrease_to(&mut self, size: usize) -> Result<()> { + // Only update logical version of writers but not destroying the workers, so + // that peers that are still using the writer_id (because there're + // unpersisted tasks) can proceed to finish their tasks. After the peer + // gets rescheduled, it will use a new writer_id within the new + // capacity, specified by refreshed `store-io-pool-size`. + // + // TODO: find an elegant way to effectively free workers. + assert_eq!(self.writers.value().get().len(), self.handlers.lock().len()); + self.writers + .update(move |writers: &mut SharedSenders| -> Result<()> { + assert!(writers.get().len() > size); + Ok(()) + })?; + Ok(()) + } + + pub fn increase_to( + &mut self, + size: usize, + writer_meta: StoreWritersContext, + ) -> Result<()> { + let mut handlers = self.handlers.lock(); + let current_size = self.writers.value().get().len(); + assert_eq!(current_size, handlers.len()); + let resource_ctl = self.resource_ctl.clone(); + self.writers + .update(move |writers: &mut SharedSenders| -> Result<()> { + let mut cached_senders = writers.get(); + for i in current_size..size { + let tag = format!("store-writer-{}", i); + let (tx, rx) = bounded( + resource_ctl.clone(), + writer_meta.cfg.value().store_io_notify_capacity, + ); + let mut worker = Worker::new( + writer_meta.store_id, + tag.clone(), + writer_meta.raft_engine.clone(), + writer_meta.kv_engine.clone(), + rx, + writer_meta.notifier.clone(), + writer_meta.transfer.clone(), + &writer_meta.cfg, + ); + info!("starting store writer {}", i); + let t = + thread::Builder::new() + .name(thd_name!(tag)) + .spawn_wrapper(move || { + worker.run(); + })?; + cached_senders.push(tx); + handlers.push(t); + } + writers.set(cached_senders); + Ok(()) + })?; + Ok(()) + } } /// Used for test to write task to kv db and raft db. diff --git a/components/raftstore/src/store/async_io/write_router.rs b/components/raftstore/src/store/async_io/write_router.rs index ead22f70b28..d00007a9485 100644 --- a/components/raftstore/src/store/async_io/write_router.rs +++ b/components/raftstore/src/store/async_io/write_router.rs @@ -16,7 +16,11 @@ use std::{ use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine}; use resource_control::channel::Sender; -use tikv_util::{info, time::Instant}; +use tikv_util::{ + config::{Tracker, VersionTrack}, + error, info, safe_panic, + time::Instant, +}; use crate::store::{ async_io::write::WriteMsg, config::Config, fsm::store::PollContext, local_metrics::RaftMetrics, @@ -163,13 +167,14 @@ where if self.last_unpersisted.is_some() { return false; } - if ctx.config().store_io_pool_size <= 1 { - self.writer_id = 0; - return true; - } + // Local senders may not be updated when `store_io_pool_size()` has been + // increased by the `ctx.config().update()`, keep the real size until it's + // updated by `poller.begin()`. + let async_io_pool_size = + std::cmp::min(ctx.write_senders().size(), ctx.config().store_io_pool_size); if last_unpersisted.is_none() { // If no previous pending ready, we can randomly select a new writer worker. - self.writer_id = rand::random::() % ctx.config().store_io_pool_size; + self.writer_id = rand::random::() % async_io_pool_size; self.next_retry_time = Instant::now_coarse() + ctx.config().io_reschedule_hotpot_duration.0; self.next_writer_id = None; @@ -188,7 +193,7 @@ where // The hot write peers should not be rescheduled entirely. // So it will not be rescheduled if the random id is the same as the original // one. - let new_id = rand::random::() % ctx.config().store_io_pool_size; + let new_id = rand::random::() % async_io_pool_size; if new_id == self.writer_id { // Reset the time self.next_retry_time = now + ctx.config().io_reschedule_hotpot_duration.0; @@ -238,7 +243,7 @@ where let now = Instant::now(); if sender.send(msg, self.last_msg_priority).is_err() { // Write threads are destroyed after store threads during shutdown. - panic!("{} failed to send write msg, err: disconnected", self.tag); + safe_panic!("{} failed to send write msg, err: disconnected", self.tag); } ctx.raft_metrics() .write_block_wait @@ -246,31 +251,87 @@ where } Err(TrySendError::Disconnected(_)) => { // Write threads are destroyed after store threads during shutdown. - panic!("{} failed to send write msg, err: disconnected", self.tag); + safe_panic!("{} failed to send write msg, err: disconnected", self.tag); } } } } +/// Safefly shared senders among the controller and raftstore threads. +/// Senders in it can only be accessed by cloning method `senders()`. +/// +/// `Clone` is safe to race with concurrent `Sender.send()` because the +/// `RefCell` field `last_msg_group` in `Sender` is skipped. +#[derive(Clone)] +pub struct SharedSenders(Vec>>); + +impl Default for SharedSenders { + fn default() -> Self { + Self(vec![]) + } +} + +impl SharedSenders { + #[inline] + pub fn get(&self) -> Vec>> { + self.0.clone() + } + + #[inline] + pub fn set(&mut self, senders: Vec>>) { + self.0 = senders; + } +} + +/// All `Sender`s in `SharedSenders` are shared by the global controller +/// thread and raftstore threads. There won't exist concurrent `Sender.send()` +/// calling scenarios among threads on a same `Sender`. +/// On the one hand, th controller thread will not call `Sender.send()` to +/// consume resources to send messages, just updating the size of `Sender`s if +/// `store-io-pool-size` is resized. On the other hand, each raftstore thread +/// just use its local cloned `Sender`s for sending messages and update it at +/// `begin()`, the first stage for processing messages. +/// Therefore, it's safe to manually remain `Send` trait for +/// `SharedSenders`. +/// +/// TODO: use an elegant implementation, such as `Mutex`, to avoid this +/// hack for sharing `Sender`s among multi-threads. +unsafe impl Sync for SharedSenders {} + /// Senders for asynchronous writes. There can be multiple senders, generally /// you should use `WriteRouter` to decide which sender to be used. #[derive(Clone)] pub struct WriteSenders { - write_senders: Vec>>, + senders: Tracker>, + cached_senders: Vec>>, io_reschedule_concurrent_count: Arc, } impl WriteSenders { - pub fn new(write_senders: Vec>>) -> Self { + pub fn new(senders: Arc>>) -> Self { + let cached_senders = senders.value().get(); WriteSenders { - write_senders, + senders: senders.tracker("async writers' tracker".to_owned()), + cached_senders, io_reschedule_concurrent_count: Arc::default(), } } #[inline] pub fn is_empty(&self) -> bool { - self.write_senders.is_empty() + self.cached_senders.is_empty() + } + + #[inline] + pub fn size(&self) -> usize { + self.cached_senders.len() + } + + #[inline] + pub fn refresh(&mut self) { + if let Some(senders) = self.senders.any_new() { + self.cached_senders = senders.get(); + } } } @@ -279,7 +340,7 @@ impl Index for WriteSenders { #[inline] fn index(&self, index: usize) -> &Sender> { - &self.write_senders[index] + &self.cached_senders[index] } } @@ -329,7 +390,7 @@ pub(crate) mod tests { Self { receivers, ctx: TestContext { - senders: WriteSenders::new(senders), + senders: WriteSenders::new(Arc::new(VersionTrack::new(SharedSenders(senders)))), config, raft_metrics: RaftMetrics::new(true), }, diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index d6994a16ed4..6667a46c4e5 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -206,7 +206,6 @@ pub struct Config { pub store_batch_system: BatchSystemConfig, /// If it is 0, it means io tasks are handled in store threads. - #[online_config(skip)] pub store_io_pool_size: usize, #[online_config(skip)] @@ -1049,8 +1048,25 @@ impl ConfigManager for RaftstoreConfigManager { ) -> std::result::Result<(), Box> { { let change = change.clone(); - self.config - .update(move |cfg: &mut Config| cfg.update(change))?; + self.config.update(move |cfg: &mut Config| { + // Currently, it's forbidden to modify the write mode either from `async` to + // `sync` or from `sync` to `async`. + if let Some(ConfigValue::Usize(resized_io_size)) = change.get("store_io_pool_size") + { + if cfg.store_io_pool_size == 0 && *resized_io_size > 0 { + return Err( + "SYNC mode, not allowed to resize the size of store-io-pool-size" + .into(), + ); + } else if cfg.store_io_pool_size > 0 && *resized_io_size == 0 { + return Err( + "ASYNC mode, not allowed to be set to SYNC mode by resizing store-io-pool-size to 0" + .into(), + ); + } + } + cfg.update(change) + })?; } if let Some(ConfigValue::Module(raft_batch_system_change)) = change.get("store_batch_system") @@ -1062,6 +1078,12 @@ impl ConfigManager for RaftstoreConfigManager { { self.schedule_config_change(RaftStoreBatchComponent::Apply, apply_batch_system_change); } + if let Some(ConfigValue::Usize(resized_io_size)) = change.get("store_io_pool_size") { + let resize_io_task = RefreshConfigTask::ScaleWriters(*resized_io_size); + if let Err(e) = self.scheduler.schedule(resize_io_task) { + error!("raftstore configuration manager schedule to resize store-io-pool-size work task failed"; "err"=> ?e); + } + } info!( "raftstore config changed"; "change" => ?change, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 66acd187215..4b9e69f9763 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -71,7 +71,7 @@ use crate::{ store::{ async_io::{ read::{ReadRunner, ReadTask}, - write::{StoreWriters, Worker as WriteWorker, WriteMsg}, + write::{StoreWriters, StoreWritersContext, Worker as WriteWorker, WriteMsg}, write_router::WriteSenders, }, config::Config, @@ -907,6 +907,8 @@ impl PollHandler, St self.poll_ctx.update_ticks_timeout(); update_cfg(&incoming.store_batch_system); } + // update store writers if necessary + self.poll_ctx.write_senders.refresh(); } fn handle_control(&mut self, store: &mut StoreFsm) -> Option { @@ -1052,7 +1054,13 @@ impl PollHandler, St } } } else { - let writer_id = rand::random::() % self.poll_ctx.cfg.store_io_pool_size; + // Use the valid size of async-ios for generating `writer_id` when the local + // senders haven't been updated by `poller.begin(). + let writer_id = rand::random::() + % std::cmp::min( + self.poll_ctx.cfg.store_io_pool_size, + self.poll_ctx.write_senders.size(), + ); if let Err(err) = self.poll_ctx.write_senders[writer_id].try_send( WriteMsg::LatencyInspect { send_time: write_begin, @@ -1733,6 +1741,15 @@ impl RaftBatchSystem { .spawn("apply".to_owned(), apply_poller_builder); let refresh_config_runner = RefreshConfigRunner::new( + StoreWritersContext { + store_id: store.get_id(), + notifier: self.router.clone(), + raft_engine: raft_builder.engines.raft.clone(), + kv_engine: Some(raft_builder.engines.kv.clone()), + transfer: raft_builder.trans.clone(), + cfg: raft_builder.cfg.clone(), + }, + self.store_writers.clone(), self.apply_router.router.clone(), self.router.router.clone(), self.apply_system.build_pool_state(apply_builder), diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index ff34b9abb4e..7ba0476d381 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -11,10 +11,15 @@ use tikv_util::{ debug, error, info, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable, }; -use crate::store::fsm::{ - apply::{ApplyFsm, ControlFsm}, - store::StoreFsm, - PeerFsm, +use crate::store::{ + async_io::write::{StoreWriters, StoreWritersContext}, + fsm::{ + apply::{ApplyFsm, ControlFsm}, + store::{RaftRouter, StoreFsm}, + PeerFsm, + }, + transport::Transport, + PersistedNotifier, }; pub struct PoolController> { @@ -110,6 +115,38 @@ where } } +struct WriterContoller +where + EK: engine_traits::KvEngine, + ER: engine_traits::RaftEngine, + T: Transport + 'static, + N: PersistedNotifier, +{ + writer_meta: StoreWritersContext, + store_writers: StoreWriters, + expected_writers_size: usize, +} + +impl WriterContoller +where + EK: engine_traits::KvEngine, + ER: engine_traits::RaftEngine, + T: Transport + 'static, + N: PersistedNotifier, +{ + pub fn new( + writer_meta: StoreWritersContext, + store_writers: StoreWriters, + ) -> Self { + let writers_size = store_writers.size(); + Self { + writer_meta, + store_writers, + expected_writers_size: writers_size, + } + } +} + #[derive(Debug, Clone, Copy)] pub enum BatchComponent { Store, @@ -133,6 +170,7 @@ impl Display for BatchComponent { pub enum Task { ScalePool(BatchComponent, usize), ScaleBatchSize(BatchComponent, usize), + ScaleWriters(usize), } impl Display for Task { @@ -144,38 +182,48 @@ impl Display for Task { Task::ScaleBatchSize(component, size) => { write!(f, "Scale max_batch_size adjusts {}: {} ", component, size) } + Task::ScaleWriters(size) => { + write!(f, "Scale store_io_pool_size adjusts {} ", size) + } } } } -pub struct Runner +pub struct Runner where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, AH: HandlerBuilder, ControlFsm>, RH: HandlerBuilder, StoreFsm>, + T: Transport + 'static, { + writer_ctrl: WriterContoller>, apply_pool: PoolController, ControlFsm, AH>, raft_pool: PoolController, StoreFsm, RH>, } -impl Runner +impl Runner where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, AH: HandlerBuilder, ControlFsm>, RH: HandlerBuilder, StoreFsm>, + T: Transport + 'static, { pub fn new( + writer_meta: StoreWritersContext>, + store_writers: StoreWriters, apply_router: BatchRouter, ControlFsm>, raft_router: BatchRouter, StoreFsm>, apply_pool_state: PoolState, ControlFsm, AH>, raft_pool_state: PoolState, StoreFsm, RH>, ) -> Self { + let writer_ctrl = WriterContoller::new(writer_meta, store_writers); let apply_pool = PoolController::new(apply_router, apply_pool_state); let raft_pool = PoolController::new(raft_router, raft_pool_state); Runner { + writer_ctrl, apply_pool, raft_pool, } @@ -187,7 +235,7 @@ where match current_pool_size.cmp(&size) { std::cmp::Ordering::Greater => self.raft_pool.decrease_by(current_pool_size - size), std::cmp::Ordering::Less => self.raft_pool.increase_by(size - current_pool_size), - std::cmp::Ordering::Equal => (), + std::cmp::Ordering::Equal => return, } self.raft_pool.cleanup_poller_threads(); info!( @@ -203,7 +251,7 @@ where match current_pool_size.cmp(&size) { std::cmp::Ordering::Greater => self.apply_pool.decrease_by(current_pool_size - size), std::cmp::Ordering::Less => self.apply_pool.increase_by(size - current_pool_size), - std::cmp::Ordering::Equal => (), + std::cmp::Ordering::Equal => return, } self.apply_pool.cleanup_poller_threads(); info!( @@ -212,14 +260,47 @@ where "to" => self.apply_pool.state.expected_pool_size ); } + + /// Resizes the count of background threads in store_writers. + fn resize_store_writers(&mut self, size: usize) { + // The resizing of store writers will not directly update the local cached + // store writers in each poller. Each poller will timely correct its local + // cached in its next `poller.begin()` after the resize operation completed. + let current_size = self.writer_ctrl.expected_writers_size; + self.writer_ctrl.expected_writers_size = size; + match current_size.cmp(&size) { + std::cmp::Ordering::Greater => { + if let Err(e) = self.writer_ctrl.store_writers.decrease_to(size) { + error!("failed to decrease store writers size"; "err_msg" => ?e); + } + } + std::cmp::Ordering::Less => { + let writer_meta = self.writer_ctrl.writer_meta.clone(); + if let Err(e) = self + .writer_ctrl + .store_writers + .increase_to(size, writer_meta) + { + error!("failed to increase store writers size"; "err_msg" => ?e); + } + } + std::cmp::Ordering::Equal => return, + } + info!( + "resize store writers pool"; + "from" => current_size, + "to" => size + ); + } } -impl Runnable for Runner +impl Runnable for Runner where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, AH: HandlerBuilder, ControlFsm> + std::marker::Send, RH: HandlerBuilder, StoreFsm> + std::marker::Send, + T: Transport + 'static, { type Task = Task; @@ -237,6 +318,7 @@ where self.apply_pool.state.max_batch_size = size; } }, + Task::ScaleWriters(size) => self.resize_store_writers(size), } } } diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index ff1babb7e1f..003d63d9a47 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -141,19 +141,19 @@ where rx.recv_timeout(Duration::from_secs(3)).unwrap(); } +fn new_changes(cfgs: Vec<(&str, &str)>) -> std::collections::HashMap { + std::collections::HashMap::from_iter( + cfgs.into_iter() + .map(|kv| (kv.0.to_owned(), kv.1.to_owned())), + ) +} + #[test] fn test_update_raftstore_config() { let (mut config, _dir) = TikvConfig::with_tmp().unwrap(); config.validate().unwrap(); let (cfg_controller, router, _, mut system) = start_raftstore(config.clone(), &_dir); - let new_changes = |cfgs: Vec<(&str, &str)>| { - std::collections::HashMap::from_iter( - cfgs.into_iter() - .map(|kv| (kv.0.to_owned(), kv.1.to_owned())), - ) - }; - // dispatch updated config let change = new_changes(vec![ ("raftstore.messages-per-tick", "12345"), @@ -224,3 +224,58 @@ fn test_update_raftstore_config() { system.shutdown(); } + +#[test] +fn test_update_raftstore_io_config() { + // Test update raftstore configurations on io settings. + // Start from SYNC mode. + { + let (mut resize_config, _dir) = TikvConfig::with_tmp().unwrap(); + resize_config.validate().unwrap(); + let (cfg_controller, _, _, mut system) = start_raftstore(resize_config, &_dir); + + // not allowed to resize from SYNC mode to ASYNC mode + let resize_store_writers_cfg = vec![("raftstore.store-io-pool-size", "2")]; + assert!( + cfg_controller + .update(new_changes(resize_store_writers_cfg)) + .is_err() + ); + system.shutdown(); + } + // Start from ASYNC mode. + { + let (mut resize_config, _dir) = TikvConfig::with_tmp().unwrap(); + resize_config.raft_store.store_io_pool_size = 2; + resize_config.validate().unwrap(); + let (cfg_controller, _, _, mut system) = start_raftstore(resize_config, &_dir); + + // not allowed to resize from ASYNC mode to SYNC mode + let resize_store_writers_cfg = vec![("raftstore.store-io-pool-size", "0")]; + assert!( + cfg_controller + .update(new_changes(resize_store_writers_cfg)) + .is_err() + ); + system.shutdown(); + } + // Modify the size of async-ios. + { + let (mut resize_config, _dir) = TikvConfig::with_tmp().unwrap(); + resize_config.raft_store.store_io_pool_size = 2; + resize_config.validate().unwrap(); + let (cfg_controller, _, _, mut system) = start_raftstore(resize_config, &_dir); + + // resize the count of ios to 1 by decreasing. + let resize_store_writers_cfg = vec![("raftstore.store-io-pool-size", "1")]; + cfg_controller + .update(new_changes(resize_store_writers_cfg)) + .unwrap(); + // resize the count of ios to 4 by increasing. + let resize_store_writers_cfg = vec![("raftstore.store-io-pool-size", "4")]; + cfg_controller + .update(new_changes(resize_store_writers_cfg)) + .unwrap(); + system.shutdown(); + } +} diff --git a/tests/integrations/raftstore/test_scale_pool.rs b/tests/integrations/raftstore/test_scale_pool.rs index 1672e57ae02..794cf90f4cb 100644 --- a/tests/integrations/raftstore/test_scale_pool.rs +++ b/tests/integrations/raftstore/test_scale_pool.rs @@ -157,3 +157,192 @@ fn test_decrease_pool() { cluster.must_put(b"k2", b"v2"); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } + +fn get_async_writers_tids() -> Vec { + let prefix = "store-writer-"; + let mut writers_tids = vec![]; + let pid = thread::process_id(); + let all_tids: Vec<_> = thread::thread_ids(pid).unwrap(); + for tid in all_tids { + if let Ok(stat) = thread::full_thread_stat(pid, tid) { + if stat.command.starts_with(prefix) { + writers_tids.push(tid); + } + } + } + writers_tids +} + +#[test] +fn test_increase_async_ios() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.store_io_pool_size = 1; + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // Save current async-io tids before shrinking + let org_writers_tids = get_async_writers_tids(); + assert_eq!(1, org_writers_tids.len()); + // Request can be handled as usual + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + + // Update config, expand from 1 to 2 + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store-io-pool-size".to_owned(), "2".to_owned()); + change + }; + + cfg_controller.update(change).unwrap(); + assert_eq!( + cfg_controller.get_current().raft_store.store_io_pool_size, + 2 + ); + // Wait for the completion of increasing async-ios + std::thread::sleep(std::time::Duration::from_secs(1)); + } + // Save current async-io tids after scaling up, and compared with the + // orginial one before scaling up, the thread num should be added up to TWO. + let cur_writers_tids = get_async_writers_tids(); + assert_eq!(cur_writers_tids.len() - 1, org_writers_tids.len()); + + // Request can be handled as usual + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); +} + +#[test] +fn test_decrease_async_ios() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.store_io_pool_size = 4; + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // Save current async-io tids before shrinking + let org_writers_tids = get_async_writers_tids(); + assert_eq!(4, org_writers_tids.len()); + // Request can be handled as usual + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + + // Update config, shrink from 4 to 1 + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store-io-pool-size".to_owned(), "1".to_owned()); + change + }; + + cfg_controller.update(change).unwrap(); + assert_eq!( + cfg_controller.get_current().raft_store.store_io_pool_size, + 1 + ); + // Wait for the completion of decreasing async-ios + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + // Save current async-io tids after scaling down, and compared with the + // orginial one before shrinking. As the decreasing of async-ios won't + // release asynchronous writers, the thread num should not be updated. + let cur_writers_tids = get_async_writers_tids(); + assert_eq!(cur_writers_tids.len(), org_writers_tids.len()); + // After shrinking, all the left tids must be there before + for tid in cur_writers_tids { + assert!(org_writers_tids.contains(&tid)); + } + // Request can be handled as usual + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); +} + +#[test] +fn test_resize_async_ios_failed_1() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.store_io_pool_size = 2; + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // Save current async-io tids before shrinking + let org_writers_tids = get_async_writers_tids(); + assert_eq!(2, org_writers_tids.len()); + // Request can be handled as usual + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + + // Update config, expand from async-mode(async-ios == 2) to + // sync-mode(async-ios == 0). + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store-io-pool-size".to_owned(), "0".to_owned()); + change + }; + + assert!(cfg_controller.update(change).is_err()); + assert_eq!( + cfg_controller.get_current().raft_store.store_io_pool_size, + 2 + ); + } + // Save current async-io tids after scaling up, and compared with the + // orginial one before scaling up, the thread num should be added up to TWO. + let cur_writers_tids = get_async_writers_tids(); + assert_eq!(cur_writers_tids.len(), org_writers_tids.len()); + + // Request can be handled as usual + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); +} + +#[test] +fn test_resize_async_ios_failed_2() { + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.store_io_pool_size = 0; + cluster.pd_client.disable_default_operator(); + let _ = cluster.run_conf_change(); + + // Save current async-io tids before shrinking + let org_writers_tids = get_async_writers_tids(); + assert_eq!(0, org_writers_tids.len()); + // Request can be handled as usual + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + + // Update config, expand from sync-mode(async-ios == 0) to + // async-mode(async-ios == 2). + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store-io-pool-size".to_owned(), "2".to_owned()); + change + }; + + assert!(cfg_controller.update(change).is_err()); + assert_eq!( + cfg_controller.get_current().raft_store.store_io_pool_size, + 0 + ); + } + // Save current async-io tids after scaling up, and compared with the + // orginial one before scaling up, the thread num should be added up to TWO. + let cur_writers_tids = get_async_writers_tids(); + assert_eq!(cur_writers_tids.len(), org_writers_tids.len()); + + // Request can be handled as usual + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); +} From 26813ba8525f309160c6691893ef93e5ed1bf34e Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 7 Feb 2023 13:01:57 +0800 Subject: [PATCH 0494/1149] raftstore-v2: fix tablet gc issues (#14125) close tikv/tikv#14115 Signed-off-by: tabokie Co-authored-by: Ti Chi Robot --- Cargo.lock | 6 +- components/engine_panic/src/misc.rs | 4 + components/engine_rocks/src/misc.rs | 9 + components/engine_test/src/lib.rs | 4 +- components/engine_traits/src/misc.rs | 4 + .../raftstore-v2/src/worker/tablet_gc.rs | 164 +++++++++++++++--- 6 files changed, 158 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 485aeb43c52..473058ffd9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2861,7 +2861,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" +source = "git+https://github.com/tikv/rust-rocksdb.git#b2cd42588ac62e40e297fea56a2286c0c389aade" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2880,7 +2880,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" +source = "git+https://github.com/tikv/rust-rocksdb.git#b2cd42588ac62e40e297fea56a2286c0c389aade" dependencies = [ "bzip2-sys", "cc", @@ -4797,7 +4797,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#14e4fe7f47054408cf3d2905beeca798c6656191" +source = "git+https://github.com/tikv/rust-rocksdb.git#b2cd42588ac62e40e297fea56a2286c0c389aade" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 93218767ec0..5603bf43c77 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -68,6 +68,10 @@ impl MiscExt for PanicEngine { panic!() } + fn continue_background_work(&self) -> Result<()> { + panic!() + } + fn exists(path: &str) -> bool { panic!() } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 3477226ae76..8d5bb3d43ef 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -274,10 +274,19 @@ impl MiscExt for RocksEngine { } fn pause_background_work(&self) -> Result<()> { + // This will make manual compaction return error instead of waiting. In practice + // we might want to identify this case by parsing error message. + self.as_inner().disable_manual_compaction(); self.as_inner().pause_bg_work(); Ok(()) } + fn continue_background_work(&self) -> Result<()> { + self.as_inner().enable_manual_compaction(); + self.as_inner().continue_bg_work(); + Ok(()) + } + fn exists(path: &str) -> bool { crate::util::db_exist(path) } diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 2d89929a4b2..1b0dbfbddb6 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -129,8 +129,8 @@ pub mod kv { } fn destroy_tablet(&self, _ctx: TabletContext, path: &Path) -> Result<()> { - let tombstone_path = path.join(TOMBSTONE_SUFFIX); - std::fs::remove_dir_all(&tombstone_path)?; + let tombstone_path = path.with_extension(TOMBSTONE_SUFFIX); + let _ = std::fs::remove_dir_all(&tombstone_path); std::fs::rename(path, &tombstone_path)?; std::fs::remove_dir_all(tombstone_path)?; Ok(()) diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 5bbcbb2de79..c2d317f529f 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -97,8 +97,12 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn sync_wal(&self) -> Result<()>; + /// Depending on the implementation, some on-going manual compactions may be + /// aborted. fn pause_background_work(&self) -> Result<()>; + fn continue_background_work(&self) -> Result<()>; + /// Check whether a database exists at a given path fn exists(path: &str) -> bool; diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index d6d19743b1e..dc5f3dad56d 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -9,8 +9,13 @@ use std::{ use collections::HashMap; use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry}; use kvproto::metapb::Region; -use slog::{debug, error, warn, Logger}; -use tikv_util::worker::{Runnable, RunnableWithTimer}; +use slog::{debug, error, info, warn, Logger}; +use tikv_util::{ + worker::{Runnable, RunnableWithTimer}, + yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, +}; + +const DEFAULT_BACKGROUND_POOL_SIZE: usize = 6; pub enum Task { Trim { @@ -98,6 +103,10 @@ pub struct Runner { // region_id -> [(tablet_path, wait_for_persisted)]. waiting_destroy_tasks: HashMap>, pending_destroy_tasks: Vec, + + // An independent pool to run tasks that are time-consuming but doesn't take CPU resources, + // such as waiting for RocksDB compaction. + background_pool: FuturePool, } impl Runner { @@ -107,27 +116,72 @@ impl Runner { logger, waiting_destroy_tasks: HashMap::default(), pending_destroy_tasks: Vec::new(), + background_pool: YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix("tablet-gc-bg") + .thread_count( + 0, + DEFAULT_BACKGROUND_POOL_SIZE, + DEFAULT_BACKGROUND_POOL_SIZE, + ) + .build_future_pool(), } } - fn trim( - tablet: &EK, - start_key: &[u8], - end_key: &[u8], - cb: Box, - ) -> engine_traits::Result<()> { - let start_key = keys::data_key(start_key); - let end_key = keys::data_end_key(end_key); + fn trim(&self, tablet: EK, start: Box<[u8]>, end: Box<[u8]>, cb: Box) { + let start_key = keys::data_key(&start); + let end_key = keys::data_end_key(&end); let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); - tablet.delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[range1, range2])?; - // TODO: Avoid this after compaction filter is ready. - tablet.delete_ranges_cfs(DeleteStrategy::DeleteByRange, &[range1, range2])?; - for r in [range1, range2] { - tablet.compact_range(Some(r.start_key), Some(r.end_key), false, 1)?; + // TODO: Avoid `DeleteByRange` after compaction filter is ready. + if let Err(e) = tablet + .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[range1, range2]) + .and_then(|_| { + tablet.delete_ranges_cfs(DeleteStrategy::DeleteByRange, &[range1, range2]) + }) + { + error!( + self.logger, + "failed to trim tablet"; + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + "err" => %e, + ); + return; } - cb(); - Ok(()) + let logger = self.logger.clone(); + self.background_pool + .spawn(async move { + let range1 = Range::new(&[], &start_key); + let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); + for r in [range1, range2] { + if let Err(e) = + tablet.compact_range(Some(r.start_key), Some(r.end_key), false, 1) + { + if e.to_string().contains("Manual compaction paused") { + info!( + logger, + "tablet manual compaction is paused, skip trim"; + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + "err" => %e, + ); + } else { + error!( + logger, + "failed to trim tablet"; + "start_key" => log_wrappers::Value::key(&start_key), + "end_key" => log_wrappers::Value::key(&end_key), + "err" => %e, + ); + } + return; + } + } + // drop before callback. + drop(tablet); + cb(); + }) + .unwrap(); } fn prepare_destroy(&mut self, region_id: u64, tablet: EK, wait_for_persisted: u64) { @@ -204,17 +258,7 @@ where start_key, end_key, cb, - } => { - if let Err(e) = Self::trim(&tablet, &start_key, &end_key, cb) { - error!( - self.logger, - "failed to trim tablet"; - "start_key" => log_wrappers::Value::key(&start_key), - "end_key" => log_wrappers::Value::key(&end_key), - "err" => %e, - ); - } - } + } => self.trim(tablet, start_key, end_key, cb), Task::PrepareDestroy { region_id, tablet, @@ -241,3 +285,67 @@ where Duration::from_secs(10) } } + +#[cfg(test)] +mod tests { + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::TestTabletFactory, + }; + use engine_traits::{MiscExt, TabletContext, TabletRegistry}; + use tempfile::Builder; + + use super::*; + + #[test] + fn test_race_between_destroy_and_trim() { + let dir = Builder::new() + .prefix("test_race_between_destroy_and_trim") + .tempdir() + .unwrap(); + let factory = Box::new(TestTabletFactory::new( + DbOptions::default(), + vec![("default", CfOptions::default())], + )); + let registry = TabletRegistry::new(factory, dir.path()).unwrap(); + let logger = slog_global::borrow_global().new(slog::o!()); + let mut runner = Runner::new(registry.clone(), logger); + + let mut region = Region::default(); + let rid = 1; + region.set_id(rid); + region.set_start_key(b"a".to_vec()); + region.set_end_key(b"b".to_vec()); + let tablet = registry + .load(TabletContext::new(®ion, Some(1)), true) + .unwrap() + .latest() + .unwrap() + .clone(); + runner.run(Task::prepare_destroy(tablet.clone(), rid, 10)); + let (tx, rx) = std::sync::mpsc::channel(); + runner.run(Task::trim(tablet, ®ion, move || tx.send(()).unwrap())); + rx.recv().unwrap(); + + let rid = 2; + region.set_id(rid); + region.set_start_key(b"c".to_vec()); + region.set_end_key(b"d".to_vec()); + let tablet = registry + .load(TabletContext::new(®ion, Some(1)), true) + .unwrap() + .latest() + .unwrap() + .clone(); + registry.remove(rid); + runner.run(Task::prepare_destroy(tablet.clone(), rid, 10)); + runner.run(Task::destroy(rid, 100)); + let path = PathBuf::from(tablet.path()); + assert!(path.exists()); + let (tx, rx) = std::sync::mpsc::channel(); + runner.run(Task::trim(tablet, ®ion, move || tx.send(()).unwrap())); + rx.recv().unwrap(); + runner.on_timeout(); + assert!(!path.exists()); + } +} From d083fc92d5228fb4bfc74a98e5ae7982d8fef22a Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Tue, 7 Feb 2023 13:29:57 +0800 Subject: [PATCH 0495/1149] copr: reject request when estimated waiting duration exceeds threshold (#14077) ref tikv/tikv#14151 Add a read pool time slice inspector to predict the waiting time for read requests. Use the estimated duration to reject requests which have busy_threashold. Signed-off-by: Yilin Chen Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/server/src/server.rs | 13 +- components/server/src/server2.rs | 13 +- src/coprocessor/endpoint.rs | 17 ++- src/read_pool.rs | 175 ++++++++++++++++++++++++- src/storage/mod.rs | 150 ++++++++++----------- tests/failpoints/cases/test_storage.rs | 17 ++- 7 files changed, 295 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 473058ffd9d..1b3c1452ebf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2732,7 +2732,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#eccad3776d7b076da68d6c51fb7506b8562b9802" +source = "git+https://github.com/pingcap/kvproto.git#0561adc3754362675cc08b5203d8b6444e645395" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 4fe397e9eb5..be516a84ae0 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -92,7 +92,9 @@ use tikv::{ coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, - read_pool::{build_yatp_read_pool, ReadPool, ReadPoolConfigManager}, + read_pool::{ + build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, + }, server::{ config::{Config as ServerConfig, ServerConfigManager}, gc_worker::{AutoGcConfig, GcWorker}, @@ -770,6 +772,15 @@ where } else { None }; + if let Some(unified_read_pool) = &unified_read_pool { + let handle = unified_read_pool.handle(); + self.background_worker.spawn_interval_task( + UPDATE_EWMA_TIME_SLICE_INTERVAL, + move || { + handle.update_ewma_time_slice(); + }, + ); + } // The `DebugService` and `DiagnosticsService` will share the same thread pool let props = tikv_util::thread_group::current_properties(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 20d79e7cce5..0797b391d87 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -73,7 +73,9 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, - read_pool::{build_yatp_read_pool, ReadPool, ReadPoolConfigManager}, + read_pool::{ + build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, + }, server::{ config::{Config as ServerConfig, ServerConfigManager}, gc_worker::{AutoGcConfig, GcWorker}, @@ -666,6 +668,15 @@ where } else { None }; + if let Some(unified_read_pool) = &unified_read_pool { + let handle = unified_read_pool.handle(); + self.background_worker.spawn_interval_task( + UPDATE_EWMA_TIME_SLICE_INTERVAL, + move || { + handle.update_ewma_time_slice(); + }, + ); + } // The `DebugService` and `DiagnosticsService` will share the same thread pool let props = tikv_util::thread_group::current_properties(); diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 6ac1bebc541..3ba320149ac 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -11,7 +11,7 @@ use api_version::{dispatch_api_version, KvFormat}; use async_stream::try_stream; use concurrency_manager::ConcurrencyManager; use engine_traits::PerfLevel; -use futures::{channel::mpsc, prelude::*}; +use futures::{channel::mpsc, future::Either, prelude::*}; use kvproto::{coprocessor as coppb, errorpb, kvrpcpb}; use protobuf::{CodedInputStream, Message}; use resource_metering::{FutureExt, ResourceTagFactory, StreamExt}; @@ -516,6 +516,16 @@ impl Endpoint { mut req: coppb::Request, peer: Option, ) -> impl Future> { + // Check the load of the read pool. If it's too busy, generate and return + // error in the gRPC thread to avoid waiting in the queue of the read pool. + if let Err(busy_err) = self.read_pool.check_busy_threshold(Duration::from_millis( + req.get_context().get_busy_threshold_ms() as u64, + )) { + let mut resp = coppb::Response::default(); + resp.mut_region_error().set_server_is_busy(busy_err); + return Either::Left(async move { resp.into() }); + } + let tracker = GLOBAL_TRACKERS.insert(::tracker::Tracker::new(RequestInfo::new( req.get_context(), RequestType::Unknown, @@ -526,7 +536,7 @@ impl Endpoint { let result_of_future = self .parse_request_and_check_memory_locks(req, peer, false) .map(|(handler_builder, req_ctx)| self.handle_unary_request(req_ctx, handler_builder)); - async move { + let fut = async move { let res = match result_of_future { Err(e) => { let mut res = make_error_response(e); @@ -546,7 +556,8 @@ impl Endpoint { }; GLOBAL_TRACKERS.remove(tracker); res - } + }; + Either::Right(fut) } // process_batch_tasks process the input batched coprocessor tasks if any, diff --git a/src/read_pool.rs b/src/read_pool.rs index ea20b149a3d..2c56e205ef7 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -1,16 +1,21 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + convert::TryFrom, future::Future, - sync::{mpsc::SyncSender, Arc, Mutex}, + sync::{ + atomic::{AtomicU64, Ordering}, + mpsc::SyncSender, + Arc, Mutex, + }, time::Duration, }; use file_system::{set_io_type, IoType}; use futures::{channel::oneshot, future::TryFutureExt}; -use kvproto::kvrpcpb::CommandPri; +use kvproto::{errorpb, kvrpcpb::CommandPri}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; -use prometheus::{IntCounter, IntGauge}; +use prometheus::{Histogram, IntCounter, IntGauge}; use resource_control::{ControlledFuture, ResourceController}; use thiserror::Error; use tikv_util::{ @@ -54,6 +59,7 @@ pub enum ReadPool { max_tasks: usize, pool_size: usize, resource_ctl: Option>, + time_slice_inspector: Arc, }, } @@ -76,6 +82,7 @@ impl ReadPool { max_tasks, pool_size, resource_ctl, + time_slice_inspector, } => ReadPoolHandle::Yatp { remote: pool.remote().clone(), running_tasks: running_tasks.clone(), @@ -83,6 +90,7 @@ impl ReadPool { max_tasks: *max_tasks, pool_size: *pool_size, resource_ctl: resource_ctl.clone(), + time_slice_inspector: time_slice_inspector.clone(), }, } } @@ -102,6 +110,7 @@ pub enum ReadPoolHandle { max_tasks: usize, pool_size: usize, resource_ctl: Option>, + time_slice_inspector: Arc, }, } @@ -251,6 +260,121 @@ impl ReadPoolHandle { } } } + + pub fn get_ewma_time_slice(&self) -> Option { + match self { + ReadPoolHandle::FuturePools { .. } => None, + ReadPoolHandle::Yatp { + time_slice_inspector, + .. + } => Some(time_slice_inspector.get_ewma_time_slice()), + } + } + + pub fn update_ewma_time_slice(&self) { + if let ReadPoolHandle::Yatp { + time_slice_inspector, + .. + } = self + { + time_slice_inspector.update(); + } + } + + pub fn get_estimated_wait_duration(&self) -> Option { + self.get_ewma_time_slice() + .map(|s| s * (self.get_queue_size_per_worker() as u32)) + } + + pub fn check_busy_threshold( + &self, + busy_threshold: Duration, + ) -> Result<(), errorpb::ServerIsBusy> { + if busy_threshold.is_zero() { + return Ok(()); + } + let estimated_wait = match self.get_estimated_wait_duration() { + Some(estimated_wait) if estimated_wait > busy_threshold => estimated_wait, + _ => return Ok(()), + }; + // TODO: Get applied_index from the raftstore and check memory locks. Then, we + // can skip read index in replica read. But now the difficulty is that we don't + // have access to the the local reader in gRPC threads. + let mut busy_err = errorpb::ServerIsBusy::default(); + busy_err.set_reason("estimated wait time exceeds threshold".to_owned()); + busy_err.estimated_wait_ms = u32::try_from(estimated_wait.as_millis()).unwrap_or(u32::MAX); + Err(busy_err) + } +} + +pub const UPDATE_EWMA_TIME_SLICE_INTERVAL: Duration = Duration::from_millis(200); + +pub struct TimeSliceInspector { + // `atomic_ewma_nanos` is a mirror of `inner.ewma` provided for fast access. It is updated in + // the `update` method. + atomic_ewma_nanos: AtomicU64, + inner: Mutex, +} + +struct TimeSliceInspectorInner { + time_slice_hist: [Histogram; 3], + ewma: Duration, + + last_sum: Duration, + last_count: u64, +} + +impl TimeSliceInspector { + pub fn new(name: &str) -> Self { + let time_slice_hist = [ + yatp::metrics::TASK_POLL_DURATION.with_label_values(&[name, "0"]), + yatp::metrics::TASK_POLL_DURATION.with_label_values(&[name, "1"]), + yatp::metrics::TASK_POLL_DURATION.with_label_values(&[name, "2"]), + ]; + let inner = TimeSliceInspectorInner { + time_slice_hist, + ewma: Duration::default(), + last_sum: Duration::default(), + last_count: 0, + }; + Self { + atomic_ewma_nanos: AtomicU64::default(), + inner: Mutex::new(inner), + } + } + + pub fn update(&self) { + // new_ewma = WEIGHT * new_val + (1 - WEIGHT) * old_ewma + const WEIGHT: f64 = 0.3; + // If the accumulated time slice is less than 100ms, the EWMA is not updated. + const MIN_TIME_DIFF: Duration = Duration::from_millis(100); + + let mut inner = self.inner.lock().unwrap(); + let mut new_sum = Duration::default(); + let mut new_count = 0; + // Now, we simplify the problem by merging samples from all levels. If we want + // more accurate answer in the future, calculate for each level separately. + for hist in &inner.time_slice_hist { + new_sum += Duration::from_secs_f64(hist.get_sample_sum()); + new_count += hist.get_sample_count(); + } + let time_diff = new_sum - inner.last_sum; + if time_diff < MIN_TIME_DIFF { + return; + } + let new_val = time_diff / ((new_count - inner.last_count) as u32); + let new_ewma = new_val.mul_f64(WEIGHT) + inner.ewma.mul_f64(1.0 - WEIGHT); + inner.ewma = new_ewma; + inner.last_sum = new_sum; + inner.last_count = new_count; + + self.atomic_ewma_nanos + .store(new_ewma.as_nanos() as u64, Ordering::Release); + } + + pub fn get_ewma_time_slice(&self) -> Duration { + Duration::from_nanos(self.atomic_ewma_nanos.load(Ordering::Acquire)) + } } #[derive(Clone)] @@ -273,8 +397,6 @@ impl ReporterTicker { #[cfg(test)] fn get_unified_read_pool_name() -> String { - use std::sync::atomic::{AtomicU64, Ordering}; - static COUNTER: AtomicU64 = AtomicU64::new(0); format!( "unified-read-pool-test-{}", @@ -319,6 +441,7 @@ pub fn build_yatp_read_pool( } else { builder.build_multi_level_pool() }; + let time_slice_inspector = Arc::new(TimeSliceInspector::new(&unified_read_pool_name)); ReadPool::Yatp { pool, running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS @@ -330,6 +453,7 @@ pub fn build_yatp_read_pool( .saturating_mul(config.max_thread_count), pool_size: config.max_thread_count, resource_ctl, + time_slice_inspector, } } @@ -765,4 +889,45 @@ mod tests { _ => panic!("should return full error"), } } + + #[test] + fn test_time_slice_inspector_ewma() { + const MARGIN: f64 = 1e-5; // 10us + + let name = "test_time_slice_inspector_ewma"; + let inspector = TimeSliceInspector::new(name); + let hist = yatp::metrics::TASK_POLL_DURATION.with_label_values(&[name, "0"]); + + // avg: 0.055, prev_ewma: 0 => new_ewma = 0.0165 + for i in 1..=10 { + hist.observe(i as f64 * 0.01); + } + inspector.update(); + let ewma = inspector.get_ewma_time_slice().as_secs_f64(); + assert!((ewma - 0.0165).abs() < MARGIN); + + // avg: 0.0125, prev_ewma: 0.0165 => new_ewma = 0.0153 + for i in 5..=20 { + hist.observe(i as f64 * 0.001); + } + inspector.update(); + let ewma = inspector.get_ewma_time_slice().as_secs_f64(); + assert!((ewma - 0.0153).abs() < MARGIN); + + // sum: 55ms, don't update ewma + for i in 1..=10 { + hist.observe(i as f64 * 0.001); + } + inspector.update(); + let ewma = inspector.get_ewma_time_slice().as_secs_f64(); + assert!((ewma - 0.0153).abs() < MARGIN); + + // avg: 0.00786, prev_ewma: 0.0153 => new_ewma = 0.01307 + for i in 5..=15 { + hist.observe(i as f64 * 0.001); + } + inspector.update(); + let ewma = inspector.get_ewma_time_slice().as_secs_f64(); + assert!((ewma - 0.01307).abs() < MARGIN); + } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6273bc3d54c..ca35018e01e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -69,6 +69,7 @@ use std::{ atomic::{self, AtomicBool, AtomicU64, Ordering}, Arc, }, + time::Duration, }; use api_version::{ApiV1, ApiV2, KeyMode, KvFormat, RawValue}; @@ -78,7 +79,7 @@ use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; use engine_traits::{ raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; -use futures::prelude::*; +use futures::{future::Either, prelude::*}; use kvproto::{ kvrpcpb::{ ApiVersion, ChecksumAlgorithm, CommandPri, Context, GetRequest, IsolationLevel, KeyRange, @@ -605,11 +606,13 @@ impl Storage { ); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); let quota_limiter = self.quota_limiter.clone(); let mut sample = quota_limiter.new_sample(true); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { let stage_scheduled_ts = Instant::now(); tls_collect_query( @@ -663,13 +666,13 @@ impl Storage { false, ); snap_store - .get(&key, &mut statistics) - // map storage::txn::Error -> storage::Error - .map_err(Error::from) - .map(|r| { - KV_COMMAND_KEYREAD_HISTOGRAM_STATIC.get(CMD).observe(1_f64); - r - }) + .get(&key, &mut statistics) + // map storage::txn::Error -> storage::Error + .map_err(Error::from) + .map(|r| { + KV_COMMAND_KEYREAD_HISTOGRAM_STATIC.get(CMD).observe(1_f64); + r + }) }); metrics::tls_collect_scan_details(CMD, &statistics); metrics::tls_collect_read_flow( @@ -732,11 +735,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Get values of a set of keys with separate context from a snapshot, @@ -762,6 +761,8 @@ impl Storage { .to_owned(); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; + let busy_threshold = + Duration::from_millis(requests[0].get_context().busy_threshold_ms as u64); // The resource tags of these batched requests are not the same, and it is quite // expensive to distinguish them, so we can find random one of them as a @@ -775,7 +776,8 @@ impl Storage { // Unset the TLS tracker because the future below does not belong to any // specific request clear_tls_tracker_token(); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); KV_COMMAND_KEYREAD_HISTOGRAM_STATIC @@ -921,11 +923,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Get values of a set of keys in a batch from the snapshot. @@ -951,9 +949,11 @@ impl Storage { .new_tag_with_key_ranges(&ctx, key_ranges); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); let quota_limiter = self.quota_limiter.clone(); let mut sample = quota_limiter.new_sample(true); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { let stage_scheduled_ts = Instant::now(); let mut key_ranges = vec![]; @@ -1095,12 +1095,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Scan keys in [`start_key`, `end_key`) up to `limit` keys from the @@ -1136,8 +1131,10 @@ impl Storage { ); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { { let end_key = match &end_key { @@ -1273,12 +1270,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } pub fn scan_lock( @@ -1605,8 +1597,10 @@ impl Storage { .resource_tag_factory .new_tag_with_key_ranges(&ctx, vec![(key.clone(), key.clone())]); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_COMMANDS_PRI_COUNTER_VEC_STATIC @@ -1663,12 +1657,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Get the values of a set of raw keys, return a list of `Result`s. @@ -1688,6 +1677,7 @@ impl Storage { .to_owned(); let priority_tag = get_priority_tag(priority); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(gets[0].get_context().busy_threshold_ms as u64); // The resource tags of these batched requests are not the same, and it is quite // expensive to distinguish them, so we can find random one of them as a @@ -1699,7 +1689,8 @@ impl Storage { .resource_tag_factory .new_tag_with_key_ranges(rand_ctx, vec![(rand_key.clone(), rand_key)]); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_COMMANDS_PRI_COUNTER_VEC_STATIC @@ -1800,11 +1791,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Get the values of some raw keys in a batch. @@ -1823,8 +1810,10 @@ impl Storage { .resource_tag_factory .new_tag_with_key_ranges(&ctx, key_ranges); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { let mut key_ranges = vec![]; KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); @@ -1898,12 +1887,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } async fn check_causal_ts_flushed(ctx: &mut Context, tag: CommandKind) -> Result<()> { @@ -2319,8 +2303,10 @@ impl Storage { let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag(&ctx); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_COMMANDS_PRI_COUNTER_VEC_STATIC @@ -2425,12 +2411,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Scan raw keys in multiple ranges in a batch. @@ -2455,8 +2436,10 @@ impl Storage { .resource_tag_factory .new_tag_with_key_ranges(&ctx, key_ranges); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_COMMANDS_PRI_COUNTER_VEC_STATIC @@ -2583,12 +2566,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } /// Get the value of a raw key. @@ -2606,8 +2584,10 @@ impl Storage { .resource_tag_factory .new_tag_with_key_ranges(&ctx, vec![(key.clone(), key.clone())]); let api_version = self.api_version; + let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); - let res = self.read_pool.spawn_handle( + self.read_pool_spawn_with_busy_check( + busy_threshold, async move { KV_COMMAND_COUNTER_VEC_STATIC.get(CMD).inc(); SCHED_COMMANDS_PRI_COUNTER_VEC_STATIC @@ -2664,12 +2644,7 @@ impl Storage { priority, thread_rng().next_u64(), group_name, - ); - - async move { - res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) - .await? - } + ) } pub fn raw_compare_and_swap_atomic( @@ -2857,6 +2832,31 @@ impl Storage { .await? } } + + fn read_pool_spawn_with_busy_check( + &self, + busy_threshold: Duration, + future: Fut, + priority: CommandPri, + task_id: u64, + group_meta: Vec, + ) -> impl Future> + where + Fut: Future> + Send + 'static, + T: Send + 'static, + { + if let Err(busy_err) = self.read_pool.check_busy_threshold(busy_threshold) { + let mut err = kvproto::errorpb::Error::default(); + err.set_server_is_busy(busy_err); + return Either::Left(future::err(Error::from(ErrorInner::Kv(err.into())))); + } + Either::Right( + self.read_pool + .spawn_handle(future, priority, task_id, group_meta) + .map_err(|_| Error::from(ErrorInner::SchedTooBusy)) + .and_then(|res| future::ready(res)), + ) + } } pub async fn get_raw_key_guard( diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 1a7d44db972..42cda54281e 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1445,12 +1445,17 @@ fn test_before_propose_deadline() { }), ) .unwrap(); - assert!(matches!( - rx.recv().unwrap(), - Err(StorageError(box StorageErrorInner::Kv(KvError( - box KvErrorInner::Request(_), - )))) - )); + let res = rx.recv().unwrap(); + assert!( + matches!( + res, + Err(StorageError(box StorageErrorInner::Kv(KvError( + box KvErrorInner::Request(_), + )))) + ), + "actual: {:?}", + res + ); } #[test] From 10e93a767162e74c15aa054f2d36939355e9052f Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 7 Feb 2023 13:47:57 +0800 Subject: [PATCH 0496/1149] raftstore-v2: fix peer destroy not clear in scale-in scene (#14112) close tikv/tikv#14128 1. release tablet in TableCache after peer destroyed 2. release tombstone tablet after applying snapshot Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- components/batch-system/src/router.rs | 2 +- .../operation/command/admin/compact_log.rs | 7 +- components/raftstore-v2/src/operation/life.rs | 1 + .../src/operation/ready/snapshot.rs | 7 + .../tests/integrations/test_conf_change.rs | 247 ++++++++---------- 5 files changed, 131 insertions(+), 133 deletions(-) diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index ef937209531..4238929d1d4 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -337,7 +337,7 @@ where /// Close the mailbox of address. pub fn close(&self, addr: u64) { - info!("[region {}] shutdown mailbox", addr); + info!("shutdown mailbox"; "region_id" => addr); unsafe { &mut *self.caches.as_ptr() }.remove(&addr); let mut mailboxes = self.normals.lock().unwrap(); if let Some(mb) = mailboxes.map.remove(&addr) { diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 0f5fd9b392f..8e83387012e 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -275,6 +275,11 @@ impl Peer { old_tablet: EK, new_tablet_index: u64, ) { + info!(self.logger, + "record tombstone tablet"; + "prev_tablet_path" => old_tablet.path(), + "new_tablet_index" => new_tablet_index + ); let compact_log_context = self.compact_log_context_mut(); compact_log_context .tombstone_tablets_wait_index @@ -291,7 +296,7 @@ impl Peer { /// Returns if there's any tombstone being removed. #[inline] - fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { + pub fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { let compact_log_context = self.compact_log_context_mut(); let removed = compact_log_context .tombstone_tablets_wait_index diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index a407f6bc8ef..11969701c74 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -599,6 +599,7 @@ impl Peer { let mut meta = ctx.store_meta.lock().unwrap(); meta.remove_region(region_id); meta.readers.remove(®ion_id); + ctx.tablet_registry.remove(region_id); } if let Some(msg) = self.destroy_progress_mut().finish() { // The message will be dispatched to store fsm, which will create a diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index adf20bfce37..87a1496be15 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -49,6 +49,7 @@ use crate::{ operation::{command::temp_split_path, SharedReadTablet}, raft::{Apply, Peer, Storage}, router::ApplyTask, + worker::tablet_gc, Result, StoreContext, }; @@ -274,6 +275,12 @@ impl Peer { self.post_split_init(ctx, init); } self.schedule_apply_fsm(ctx); + if self.remove_tombstone_tablets(snapshot_index) { + let _ = ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::destroy(region_id, snapshot_index)); + } } } } diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 7fa75a5a281..7ea49c02a6b 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -4,7 +4,10 @@ use std::{self, time::Duration}; use engine_traits::{Peekable, RaftEngineReadOnly, CF_DEFAULT}; use futures::executor::block_on; -use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::PeerState}; +use kvproto::{ + raft_cmdpb::{AdminCmdType, RaftCmdRequest}, + raft_serverpb::PeerState, +}; use raft::prelude::ConfChangeType; use raftstore_v2::{ router::{PeerMsg, PeerTick}, @@ -17,58 +20,18 @@ use crate::cluster::{check_skip_wal, Cluster}; #[test] fn test_simple_change() { let mut cluster = Cluster::with_node_count(2, None); - let region_id = 2; - let mut req = cluster.routers[0].new_request_for(2); - let admin_req = req.mut_admin_request(); - admin_req.set_cmd_type(AdminCmdType::ChangePeer); - admin_req - .mut_change_peer() - .set_change_type(ConfChangeType::AddLearnerNode); - let store_id = cluster.node(1).id(); - let new_peer = new_learner_peer(store_id, 10); - admin_req.mut_change_peer().set_peer(new_peer.clone()); - let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); - assert!(!resp.get_header().has_error(), "{:?}", resp); - let epoch = req.get_header().get_region_epoch(); - let new_conf_ver = epoch.get_conf_ver() + 1; - let leader_peer = req.get_header().get_peer().clone(); + let (region_id, peer_id, offset_id) = (2, 10, 1); + + // 1. add learner on store-2 + add_learner(&cluster, offset_id, region_id, peer_id); let meta = cluster.routers[0] - .must_query_debug_info(2, Duration::from_secs(3)) + .must_query_debug_info(region_id, Duration::from_secs(3)) .unwrap(); let match_index = meta.raft_apply.applied_index; - assert_eq!(meta.region_state.epoch.version, epoch.get_version()); - assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); - assert_eq!(meta.region_state.peers, vec![leader_peer, new_peer.clone()]); - // So heartbeat will create a learner. - cluster.dispatch(2, vec![]); - let meta = cluster.routers[1] - .must_query_debug_info(2, Duration::from_secs(3)) - .unwrap(); - assert_eq!(meta.raft_status.id, 10, "{:?}", meta); - assert_eq!(meta.region_state.epoch.version, epoch.get_version()); - assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); - assert_eq!( - meta.raft_status.soft_state.leader_id, - req.get_header().get_peer().get_id() - ); - // Trigger the raft tick to replica the log to the learner and execute the - // snapshot task. - cluster.routers[0] - .send(region_id, PeerMsg::Tick(PeerTick::Raft)) - .unwrap(); - cluster.dispatch(region_id, vec![]); - - // write one kv after snapshot + // 2. write one kv after snapshot let (key, val) = (b"key", b"value"); - let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); - let mut put = SimpleWriteEncoder::with_capacity(64); - put.put(CF_DEFAULT, key, val); - let (msg, _) = PeerMsg::simple_write(header, put.encode()); - cluster.routers[0].send(region_id, msg).unwrap(); - std::thread::sleep(Duration::from_millis(1000)); - cluster.dispatch(region_id, vec![]); - + write_kv(&cluster, region_id, key, val); let meta = cluster.routers[1] .must_query_debug_info(region_id, Duration::from_secs(3)) .unwrap(); @@ -76,67 +39,29 @@ fn test_simple_change() { // read the new written kv. assert_eq!(match_index, meta.raft_apply.truncated_state.index); assert!(meta.raft_apply.applied_index >= match_index); - let snap = cluster.routers[1].stale_snapshot(2); + let snap = cluster.routers[offset_id].stale_snapshot(region_id); assert_eq!(snap.get_value(key).unwrap().unwrap(), val); + // 3. remove peer from store-2 + remove_peer(&cluster, offset_id, region_id, peer_id); - req.mut_header() - .mut_region_epoch() - .set_conf_ver(new_conf_ver); - req.mut_admin_request() - .mut_change_peer() - .set_change_type(ConfChangeType::RemoveNode); - let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); - assert!(!resp.get_header().has_error(), "{:?}", resp); - let epoch = req.get_header().get_region_epoch(); - let new_conf_ver = epoch.get_conf_ver() + 1; - let leader_peer = req.get_header().get_peer().clone(); - let meta = cluster.routers[0] - .must_query_debug_info(2, Duration::from_secs(3)) - .unwrap(); - assert_eq!(meta.region_state.epoch.version, epoch.get_version()); - assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); - assert_eq!(meta.region_state.peers, vec![leader_peer]); - cluster.routers[0].wait_flush(region_id, Duration::from_millis(300)); - let raft_engine = &cluster.node(0).running_state().unwrap().raft_engine; - let region_state = raft_engine - .get_region_state(region_id, u64::MAX) - .unwrap() - .unwrap(); - assert!( - region_state.get_removed_records().contains(&new_peer), - "{:?}", - region_state - ); + // To avaid that some status doesn't clear after destroying, it can support to + // create peer by many times. + let repeat = 3; + for i in 1..repeat { + add_learner(&cluster, offset_id, region_id, peer_id + i); + write_kv(&cluster, region_id, key, val); + remove_peer(&cluster, offset_id, region_id, peer_id + i); + } - // If adding a peer on the same store, removed_records should be cleaned. - req.mut_header() - .mut_region_epoch() - .set_conf_ver(new_conf_ver); - req.mut_admin_request() - .mut_change_peer() - .set_change_type(ConfChangeType::AddLearnerNode); - req.mut_admin_request() - .mut_change_peer() - .mut_peer() - .set_id(11); - let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); - assert!(!resp.get_header().has_error(), "{:?}", resp); - cluster.routers[0].wait_flush(region_id, Duration::from_millis(300)); - let region_state = raft_engine - .get_region_state(region_id, u64::MAX) - .unwrap() - .unwrap(); - assert!( - region_state.get_removed_records().is_empty(), - "{:?}", - region_state - ); + add_learner(&cluster, offset_id, region_id, peer_id + repeat); + write_kv(&cluster, region_id, key, val); + let snap = cluster.routers[offset_id].stale_snapshot(region_id); + assert_eq!(snap.get_value(key).unwrap().unwrap(), val); // TODO: check if the peer is removed once life trace is implemented or // snapshot is implemented. - // Check if WAL is skipped for admin command. - let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); + let mut cached = cluster.node(0).tablet_registry().get(region_id).unwrap(); check_skip_wal(cached.latest().unwrap().as_inner().path()); } @@ -145,38 +70,12 @@ fn test_simple_change() { #[test] fn test_remove_by_conf_change() { let cluster = Cluster::with_node_count(2, None); - let region_id = 2; - let mut req = cluster.routers[0].new_request_for(2); - let admin_req = req.mut_admin_request(); - admin_req.set_cmd_type(AdminCmdType::ChangePeer); - admin_req - .mut_change_peer() - .set_change_type(ConfChangeType::AddLearnerNode); - let store_id = cluster.node(1).id(); - let new_peer = new_learner_peer(store_id, 10); - admin_req.mut_change_peer().set_peer(new_peer); - let resp = cluster.routers[0].admin_command(2, req.clone()).unwrap(); - assert!(!resp.get_header().has_error(), "{:?}", resp); - // So heartbeat will create a learner. - cluster.dispatch(2, vec![]); - // Trigger the raft tick to replica the log to the learner and execute the - // snapshot task. - cluster.routers[0] - .send(region_id, PeerMsg::Tick(PeerTick::Raft)) - .unwrap(); - cluster.dispatch(region_id, vec![]); - // Wait some time so snapshot can be generated. - std::thread::sleep(Duration::from_millis(100)); - cluster.dispatch(region_id, vec![]); + let (region_id, peer_id, offset_id) = (2, 10, 1); + let mut req = add_learner(&cluster, offset_id, region_id, peer_id); // write one kv to make flow control replicated. let (key, val) = (b"key", b"value"); - let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); - let mut put = SimpleWriteEncoder::with_capacity(64); - put.put(CF_DEFAULT, key, val); - let (msg, _) = PeerMsg::simple_write(header, put.encode()); - cluster.routers[0].send(region_id, msg).unwrap(); - cluster.dispatch(region_id, vec![]); + write_kv(&cluster, region_id, key, val); let new_conf_ver = req.get_header().get_region_epoch().get_conf_ver() + 1; req.mut_header() @@ -214,3 +113,89 @@ fn test_remove_by_conf_change() { assert_eq!(region_state.get_state(), PeerState::Tombstone); assert_eq!(raft_engine.get_raft_state(region_id).unwrap(), None); } + +fn add_learner( + cluster: &Cluster, + offset_id: usize, + region_id: u64, + peer_id: u64, +) -> RaftCmdRequest { + let store_id = cluster.node(offset_id).id(); + let mut req = cluster.routers[0].new_request_for(region_id); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::ChangePeer); + admin_req + .mut_change_peer() + .set_change_type(ConfChangeType::AddLearnerNode); + let new_peer = new_learner_peer(store_id, peer_id); + admin_req.mut_change_peer().set_peer(new_peer.clone()); + let resp = cluster.routers[0] + .admin_command(region_id, req.clone()) + .unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let epoch = req.get_header().get_region_epoch(); + let new_conf_ver = epoch.get_conf_ver() + 1; + let leader_peer = req.get_header().get_peer().clone(); + let meta = cluster.routers[0] + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.region_state.epoch.version, epoch.get_version()); + assert_eq!(meta.region_state.epoch.conf_ver, new_conf_ver); + assert_eq!(meta.region_state.peers, vec![leader_peer, new_peer]); + + // heartbeat will create a learner. + cluster.dispatch(region_id, vec![]); + cluster.routers[0] + .send(region_id, PeerMsg::Tick(PeerTick::Raft)) + .unwrap(); + let meta = cluster.routers[offset_id] + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.raft_status.id, peer_id, "{:?}", meta); + + // Wait some time so snapshot can be generated. + std::thread::sleep(Duration::from_millis(100)); + cluster.dispatch(region_id, vec![]); + req +} + +fn write_kv(cluster: &Cluster, region_id: u64, key: &[u8], val: &[u8]) { + let header = Box::new(cluster.routers[0].new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, key, val); + let (msg, _) = PeerMsg::simple_write(header, put.encode()); + cluster.routers[0].send(region_id, msg).unwrap(); + std::thread::sleep(Duration::from_millis(1000)); + cluster.dispatch(region_id, vec![]); +} + +fn remove_peer(cluster: &Cluster, offset_id: usize, region_id: u64, peer_id: u64) { + let store_id = cluster.node(offset_id).id(); + let mut req = cluster.routers[0].new_request_for(region_id); + let admin_req = req.mut_admin_request(); + admin_req.set_cmd_type(AdminCmdType::ChangePeer); + admin_req + .mut_change_peer() + .set_change_type(ConfChangeType::RemoveNode); + admin_req + .mut_change_peer() + .set_peer(new_learner_peer(store_id, peer_id)); + let resp = cluster.routers[0] + .admin_command(region_id, req.clone()) + .unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + cluster.routers[offset_id] + .send(region_id, PeerMsg::Tick(PeerTick::Raft)) + .unwrap(); + cluster.dispatch(region_id, vec![]); + std::thread::sleep(Duration::from_millis(100)); + + let raft_engine = &cluster.node(offset_id).running_state().unwrap().raft_engine; + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_state(), PeerState::Tombstone); + assert_eq!(raft_engine.get_raft_state(region_id).unwrap(), None); +} From a45c7de539223d5c7aaa00aabd6b5f98dd471cbe Mon Sep 17 00:00:00 2001 From: Zwb Date: Tue, 7 Feb 2023 14:31:57 +0800 Subject: [PATCH 0497/1149] apply: ignore compute and verify hash when it's a witness (#14150) close tikv/tikv#14142 apply: ignore compute and verify hash when it's a witness Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/apply.rs | 29 ++++++++----- tests/integrations/raftstore/test_witness.rs | 44 +++++++++++++++++++- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 7f4e5497cb9..fba17db7391 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3197,16 +3197,20 @@ where let resp = AdminResponse::default(); Ok(( resp, - ApplyResult::Res(ExecResult::ComputeHash { - region: self.region.clone(), - index: ctx.exec_log_index, - context: req.get_compute_hash().get_context().to_vec(), - // This snapshot may be held for a long time, which may cause too many - // open files in rocksdb. - // TODO: figure out another way to do consistency check without snapshot - // or short life snapshot. - snap: ctx.engine.snapshot(), - }), + if self.peer.is_witness { + ApplyResult::None + } else { + ApplyResult::Res(ExecResult::ComputeHash { + region: self.region.clone(), + index: ctx.exec_log_index, + context: req.get_compute_hash().get_context().to_vec(), + // This snapshot may be held for a long time, which may cause too many + // open files in rocksdb. + // TODO: figure out another way to do consistency check without snapshot + // or short life snapshot. + snap: ctx.engine.snapshot(), + }) + }, )) } @@ -3215,11 +3219,14 @@ where _: &ApplyContext, req: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { + let resp = AdminResponse::default(); + if self.peer.is_witness { + return Ok((resp, ApplyResult::None)); + } let verify_req = req.get_verify_hash(); let index = verify_req.get_index(); let context = verify_req.get_context().to_vec(); let hash = verify_req.get_hash().to_vec(); - let resp = AdminResponse::default(); Ok(( resp, ApplyResult::Res(ExecResult::VerifyHash { diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index 907c49c03af..d5a9992bc3a 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -12,7 +12,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use test_raftstore::*; -use tikv_util::store::find_peer; +use tikv_util::{config::ReadableDuration, store::find_peer}; // Test the case that region split or merge with witness peer #[test] @@ -556,3 +556,45 @@ fn test_witness_leader_down() { ); assert_eq!(cluster.must_get(b"k9"), Some(b"v9".to_vec())); } + +// Test the case that witness ignore consistency check as it has no data +#[test] +fn test_witness_ignore_consistency_check() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_election_timeout_ticks = 50; + // disable compact log to make test more stable. + cluster.cfg.raft_store.raft_log_gc_threshold = 1000; + cluster.cfg.raft_store.consistency_check_interval = ReadableDuration::secs(1); + cluster.run(); + + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k1", b"v1"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // nonwitness -> witness + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store3.get_id()], + vec![true], + ); + + // make sure the peer_on_store3 has completed applied to witness + std::thread::sleep(Duration::from_millis(200)); + + for i in 0..300 { + cluster.must_put( + format!("k{:06}", i).as_bytes(), + format!("k{:06}", i).as_bytes(), + ); + std::thread::sleep(Duration::from_millis(10)); + } +} From adff03cab87e78e2f7f542b1e2875254c7706f38 Mon Sep 17 00:00:00 2001 From: fengou1 <85682690+fengou1@users.noreply.github.com> Date: Tue, 7 Feb 2023 17:25:57 +0800 Subject: [PATCH 0498/1149] fix: ebs volume snapshot support tikv node equipped with 2 cpu or less configuration (#14153) close tikv/tikv#14017 Signed-off-by: fengou1 Co-authored-by: Ti Chi Robot --- components/snap_recovery/src/init_cluster.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index d3a2ebade73..4e72a19d6a6 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error as StdError, result, sync::Arc, thread, time::Duration}; +use std::{cmp, error::Error as StdError, i32, result, sync::Arc, thread, time::Duration}; use encryption_export::data_key_manager_from_config; use engine_rocks::{util::new_engine_opt, RocksEngine}; @@ -14,7 +14,10 @@ use tikv::{ config::TikvConfig, server::{config::Config as ServerConfig, KvEngineFactoryBuilder}, }; -use tikv_util::config::{ReadableDuration, ReadableSize, VersionTrack}; +use tikv_util::{ + config::{ReadableDuration, ReadableSize, VersionTrack}, + sys::SysQuota, +}; const CLUSTER_BOOTSTRAPPED_MAX_RETRY: u64 = 60; const CLUSTER_BOOTSTRAPPED_RETRY_INTERVAL: Duration = Duration::from_secs(3); @@ -88,7 +91,9 @@ pub fn enter_snap_recovery_mode(config: &mut TikvConfig) { config.rocksdb.lockcf.disable_auto_compactions = true; config.rocksdb.raftcf.disable_auto_compactions = true; - config.rocksdb.max_background_jobs = 32; + // for cpu = 1, take a reasonable value min[32, maxValue]. + let limit = (SysQuota::cpu_cores_quota() * 10.0) as i32; + config.rocksdb.max_background_jobs = cmp::min(32, limit); // disable resolve ts during the recovery config.resolved_ts.enable = false; From db50ce6ad2f07854ae323a11e49d3d15a4e43b6b Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Tue, 7 Feb 2023 02:39:57 -0800 Subject: [PATCH 0499/1149] Increase read pool limit (#13766) close tikv/tikv#13690 * Read pool size limit is removed. It was previously hard-coded to be the total number of cores. Limiting the number of threads to be less than the number of cores reduces the chance of context switch, but also makes TiKV prone to starvation problem (since read pool is currently FIFO), even if all the queries require no IO or IO is async (imagining there are number_of_cores clients sending big queries, and another numbers of clients sending small queries concurrently to a single TiKV server, and all of them require no IO). Thread starvation causes high tail latency which is even worse than context switching. According to the feature requester, increasing the number of threads significantly improved the tail latency in their environment. Thus, we should remove the limit, and leave it to the users. Signed-off-by: Yang Zhang --- src/config/mod.rs | 8 ++------ src/read_pool.rs | 7 +++++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 7539fc13c63..4188d8409e3 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1960,7 +1960,7 @@ impl UnifiedReadPoolConfig { } let limit = cmp::max( UNIFIED_READPOOL_MIN_CONCURRENCY, - SysQuota::cpu_cores_quota() as usize, + SysQuota::cpu_cores_quota() as usize * 10, // at most 10 threads per core ); if self.max_thread_count > limit { return Err(format!( @@ -2050,11 +2050,7 @@ mod unified_read_pool_tests { }; invalid_cfg.validate().unwrap_err(); let invalid_cfg = UnifiedReadPoolConfig { - min_thread_count: 1, - max_thread_count: cmp::max( - UNIFIED_READPOOL_MIN_CONCURRENCY, - SysQuota::cpu_cores_quota() as usize, - ) + 1, + max_thread_count: SysQuota::cpu_cores_quota() as usize * 10 + 1, ..cfg }; invalid_cfg.validate().unwrap_err(); diff --git a/src/read_pool.rs b/src/read_pool.rs index 2c56e205ef7..8ef2c4a9b25 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -424,8 +424,11 @@ pub fn build_yatp_read_pool( config.min_thread_count, config.max_thread_count, std::cmp::max( - UNIFIED_READPOOL_MIN_CONCURRENCY, - SysQuota::cpu_cores_quota() as usize, + std::cmp::max( + UNIFIED_READPOOL_MIN_CONCURRENCY, + SysQuota::cpu_cores_quota() as usize, + ), + config.max_thread_count, ), ) .after_start(move || { From 1df793c27270157f1e479de67a1a3d4badba646f Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Tue, 7 Feb 2023 19:37:57 +0800 Subject: [PATCH 0500/1149] copr: fix error when cast const Enum to any type (#14149) close tikv/tikv#14156, close pingcap/tidb#40341 copr: fix error when cast const Enum to any type Co-authored-by: Ti Chi Robot --- components/tidb_query_expr/src/types/expr_builder.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/tidb_query_expr/src/types/expr_builder.rs b/components/tidb_query_expr/src/types/expr_builder.rs index 5311a2c03d9..6ccfd35631e 100644 --- a/components/tidb_query_expr/src/types/expr_builder.rs +++ b/components/tidb_query_expr/src/types/expr_builder.rs @@ -68,7 +68,8 @@ impl RpnExpressionBuilder { | ExprType::MysqlTime | ExprType::MysqlDuration | ExprType::MysqlDecimal - | ExprType::MysqlJson => Ok(true), + | ExprType::MysqlJson + | ExprType::MysqlEnum => Ok(true), ExprType::ScalarFunc => Ok(false), ExprType::ColumnRef => Ok(false), _ => Err(other_err!("Unsupported expression type {:?}", c.get_tp())), From 98ac5d2ad17c76d13a0f7e34f0bd32507fc2b8dc Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Tue, 7 Feb 2023 19:23:59 -0800 Subject: [PATCH 0501/1149] Support backup replica read (#13975) close tikv/tikv#14060 Support replica read for backup Signed-off-by: Yang Zhang --- components/backup/src/endpoint.rs | 219 +++++++++++++++++++++++++----- 1 file changed, 185 insertions(+), 34 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 0469ffa30a7..896020cf51a 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -71,6 +71,7 @@ struct Request { compression_type: CompressionType, compression_level: i32, cipher: CipherInfo, + replica_read: bool, } /// Backup Task. @@ -131,6 +132,7 @@ impl Task { cf, compression_type: req.get_compression_type(), compression_level: req.get_compression_level(), + replica_read: req.get_replica_read(), cipher: req.cipher_info.unwrap_or_else(|| { let mut cipher = CipherInfo::default(); cipher.set_cipher_type(EncryptionMethod::Plaintext); @@ -153,9 +155,10 @@ pub struct BackupRange { start_key: Option, end_key: Option, region: Region, - leader: Peer, + peer: Peer, codec: KeyValueCodec, cf: CfName, + uses_replica_read: bool, } /// The generic saveable writer. for generic `InMemBackupFiles`. @@ -304,35 +307,45 @@ impl BackupRange { let mut ctx = Context::default(); ctx.set_region_id(self.region.get_id()); ctx.set_region_epoch(self.region.get_region_epoch().to_owned()); - ctx.set_peer(self.leader.clone()); - - // Update max_ts and check the in-memory lock table before getting the snapshot - concurrency_manager.update_max_ts(backup_ts); - concurrency_manager - .read_range_check( - self.start_key.as_ref(), - self.end_key.as_ref(), - |key, lock| { - Lock::check_ts_conflict( - Cow::Borrowed(lock), - key, - backup_ts, - &Default::default(), - IsolationLevel::Si, - ) - }, - ) - .map_err(MvccError::from) - .map_err(TxnError::from)?; + ctx.set_peer(self.peer.clone()); + ctx.set_replica_read(self.uses_replica_read); + ctx.set_isolation_level(IsolationLevel::Si); - // Currently backup always happens on the leader, so we don't need - // to set key ranges and start ts to check. - assert!(!ctx.get_replica_read()); - let snap_ctx = SnapContext { + let mut snap_ctx = SnapContext { pb_ctx: &ctx, allowed_in_flashback: self.region.is_in_flashback, ..Default::default() }; + if self.uses_replica_read { + snap_ctx.start_ts = Some(backup_ts); + let mut key_range = KeyRange::default(); + if let Some(start_key) = self.start_key.as_ref() { + key_range.set_start_key(start_key.clone().into_encoded()); + } + if let Some(end_key) = self.end_key.as_ref() { + key_range.set_end_key(end_key.clone().into_encoded()); + } + snap_ctx.key_ranges = vec![key_range]; + } else { + // Update max_ts and check the in-memory lock table before getting the snapshot + concurrency_manager.update_max_ts(backup_ts); + concurrency_manager + .read_range_check( + self.start_key.as_ref(), + self.end_key.as_ref(), + |key, lock| { + Lock::check_ts_conflict( + Cow::Borrowed(lock), + key, + backup_ts, + &Default::default(), + IsolationLevel::Si, + ) + }, + ) + .map_err(MvccError::from) + .map_err(TxnError::from)?; + } let start_snapshot = Instant::now(); let snapshot = match engine.snapshot(snap_ctx) { @@ -540,7 +553,8 @@ impl BackupRange { let mut ctx = Context::default(); ctx.set_region_id(self.region.get_id()); ctx.set_region_epoch(self.region.get_region_epoch().to_owned()); - ctx.set_peer(self.leader.clone()); + ctx.set_peer(self.peer.clone()); + let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -739,7 +753,7 @@ impl Progress { /// Forward the progress by `ranges` BackupRanges /// /// The size of the returned BackupRanges should <= `ranges` - fn forward(&mut self, limit: usize) -> Vec { + fn forward(&mut self, limit: usize, replica_read: bool) -> Vec { if self.finished { return Vec::new(); } @@ -769,18 +783,20 @@ impl Progress { break; } } - if info.role == StateRole::Leader { + let peer = find_peer(region, store_id).unwrap().to_owned(); + // Raft peer role has to match the replica read flag. + if replica_read || info.role == StateRole::Leader { let ekey = get_min_end_key(end_key.as_ref(), region); let skey = get_max_start_key(start_key.as_ref(), region); assert!(!(skey == ekey && ekey.is_some()), "{:?} {:?}", skey, ekey); - let leader = find_peer(region, store_id).unwrap().to_owned(); let backup_range = BackupRange { start_key: skey, end_key: ekey, region: region.clone(), - leader, + peer, codec, cf: cf_name, + uses_replica_read: info.role != StateRole::Leader, }; tx.send(backup_range).unwrap(); count += 1; @@ -907,7 +923,7 @@ impl Endpoint { // (See https://tokio.rs/tokio/tutorial/shared-state) // Use &mut and mark the type for making rust-analyzer happy. let progress: &mut Progress<_> = &mut prs.lock().unwrap(); - let batch = progress.forward(batch_size); + let batch = progress.forward(batch_size, request.replica_read); if batch.is_empty() { return; } @@ -1080,7 +1096,6 @@ impl Endpoint { let backend = Arc::::from(backend); let concurrency = self.config_manager.0.read().unwrap().num_threads; self.pool.borrow_mut().adjust_with(concurrency); - // make the buffer small enough to implement back pressure. let (tx, rx) = async_channel::bounded(1); for _ in 0..concurrency { self.spawn_backup_worker( @@ -1307,6 +1322,38 @@ pub mod tests { map.create_region(r, StateRole::Leader); } } + pub fn add_region( + &self, + id: u64, + mut start_key: Vec, + mut end_key: Vec, + peer_role: metapb::PeerRole, + state_role: StateRole, + ) { + let mut region = metapb::Region::default(); + region.set_id(id); + if !start_key.is_empty() { + if self.need_encode_key { + start_key = Key::from_raw(&start_key).into_encoded(); + } else { + start_key = Key::from_encoded(start_key).into_encoded(); + } + } + if !end_key.is_empty() { + if self.need_encode_key { + end_key = Key::from_raw(&end_key).into_encoded(); + } else { + end_key = Key::from_encoded(end_key).into_encoded(); + } + } + region.set_start_key(start_key); + region.set_end_key(end_key); + let mut new_peer = new_peer(1, 1); + new_peer.set_role(peer_role); + region.mut_peers().push(new_peer); + let mut map = self.regions.lock().unwrap(); + map.create_region(region, state_role); + } fn canecl_on_seek(&mut self, cancel: Arc) { self.cancel = Some(cancel); } @@ -1456,7 +1503,7 @@ pub mod tests { let mut ranges = Vec::with_capacity(expect.len()); while ranges.len() != expect.len() { let n = (rand::random::() % 3) + 1; - let mut r = prs.forward(n); + let mut r = prs.forward(n, false); // The returned backup ranges should <= n assert!(r.len() <= n); @@ -1508,6 +1555,7 @@ pub mod tests { compression_type: CompressionType::Unknown, compression_level: 0, cipher: CipherInfo::default(), + replica_read: false, }, resp: tx, }; @@ -1563,6 +1611,108 @@ pub mod tests { } } + #[test] + fn test_backup_replica_read() { + let (_tmp, endpoint) = new_endpoint(); + + endpoint.region_info.add_region( + 1, + b"".to_vec(), + b"1".to_vec(), + metapb::PeerRole::Voter, + StateRole::Leader, + ); + endpoint.region_info.add_region( + 2, + b"1".to_vec(), + b"2".to_vec(), + metapb::PeerRole::Voter, + StateRole::Follower, + ); + endpoint.region_info.add_region( + 3, + b"2".to_vec(), + b"3".to_vec(), + metapb::PeerRole::Learner, + StateRole::Follower, + ); + + let tmp = TempDir::new().unwrap(); + let backend = make_local_backend(tmp.path()); + + let (tx, rx) = unbounded(); + let mut ranges = vec![]; + let key_range = KeyRange { + start_key: b"".to_vec(), + end_key: b"3".to_vec(), + ..Default::default() + }; + ranges.push(key_range); + let read_leader_task = Task { + request: Request { + start_key: b"1".to_vec(), + end_key: b"2".to_vec(), + sub_ranges: ranges.clone(), + start_ts: 1.into(), + end_ts: 1.into(), + backend: backend.clone(), + limiter: Limiter::new(f64::INFINITY), + cancel: Arc::default(), + is_raw_kv: false, + dst_api_ver: ApiVersion::V1, + cf: engine_traits::CF_DEFAULT, + compression_type: CompressionType::Unknown, + compression_level: 0, + cipher: CipherInfo::default(), + replica_read: false, + }, + resp: tx, + }; + endpoint.handle_backup_task(read_leader_task); + let resps: Vec<_> = block_on(rx.collect()); + assert_eq!(resps.len(), 1); + for a in &resps { + assert_eq!(a.get_start_key(), b""); + assert_eq!(a.get_end_key(), b"1"); + } + + let (tx, rx) = unbounded(); + let replica_read_task = Task { + request: Request { + start_key: b"".to_vec(), + end_key: b"3".to_vec(), + sub_ranges: ranges.clone(), + start_ts: 1.into(), + end_ts: 1.into(), + backend, + limiter: Limiter::new(f64::INFINITY), + cancel: Arc::default(), + is_raw_kv: false, + dst_api_ver: ApiVersion::V1, + cf: engine_traits::CF_DEFAULT, + compression_type: CompressionType::Unknown, + compression_level: 0, + cipher: CipherInfo::default(), + replica_read: true, + }, + resp: tx, + }; + endpoint.handle_backup_task(replica_read_task); + let resps: Vec<_> = block_on(rx.collect()); + let expected: Vec<(&[u8], &[u8])> = vec![(b"", b"1"), (b"1", b"2"), (b"2", b"3")]; + assert_eq!(resps.len(), 3); + for a in &resps { + assert!( + expected + .iter() + .any(|b| { a.get_start_key() == b.0 && a.get_end_key() == b.1 }), + "{:?} {:?}", + resps, + expected + ); + } + } + #[test] fn test_seek_ranges() { let (_tmp, endpoint) = new_endpoint(); @@ -1594,7 +1744,7 @@ pub mod tests { let mut ranges = Vec::with_capacity(expect.len()); while ranges.len() != expect.len() { let n = (rand::random::() % 3) + 1; - let mut r = prs.forward(n); + let mut r = prs.forward(n, false); // The returned backup ranges should <= n assert!(r.len() <= n); @@ -1656,6 +1806,7 @@ pub mod tests { compression_type: CompressionType::Unknown, compression_level: 0, cipher: CipherInfo::default(), + replica_read: false, }, resp: tx, }; From 1d97f4d56b11195bc14ceb82878fd955adc0afb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 8 Feb 2023 11:37:59 +0800 Subject: [PATCH 0502/1149] log-backup: report when watch canceled (#14154) close tikv/tikv#14159 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 2 + components/backup-stream/src/errors.rs | 18 ++++++++- .../backup-stream/src/metadata/store/etcd.rs | 39 +++++++++++++------ 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index ff380551b90..dc053feff33 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -354,6 +354,7 @@ where continue; } }; + info!("start watching the task changes."; "from_rev" => %revision_new); loop { if let Some(event) = watcher.stream.next().await { @@ -403,6 +404,7 @@ where continue; } }; + info!("start watching the pausing events."; "from_rev" => %revision_new); loop { if let Some(event) = watcher.stream.next().await { diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index a3f76e0255f..2fecf0ac514 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -22,7 +22,7 @@ pub enum Error { #[error("gRPC meet error {0}")] Grpc(#[from] GrpcError), #[error("Etcd meet error {0}")] - Etcd(#[from] EtcdError), + Etcd(#[from] EtcdErrorExt), #[error("Protobuf meet error {0}")] Protobuf(#[from] ProtobufError), #[error("No such task {task_name:?}")] @@ -52,6 +52,22 @@ pub enum Error { Other(#[from] Box), } +impl From for Error { + fn from(value: EtcdError) -> Self { + Self::Etcd(value.into()) + } +} + +#[derive(ThisError, Debug)] +pub enum EtcdErrorExt { + #[error("{0}")] + Normal(#[from] EtcdError), + #[error("the watch canceled")] + WatchCanceled, + #[error("the required revision has been compacted, current is {current}")] + RevisionCompacted { current: i64 }, +} + impl ErrorCodeExt for Error { fn error_code(&self) -> error_code::ErrorCode { use error_code::backup_stream::*; diff --git a/components/backup-stream/src/metadata/store/etcd.rs b/components/backup-stream/src/metadata/store/etcd.rs index 556661700f9..e52cc4f92d9 100644 --- a/components/backup-stream/src/metadata/store/etcd.rs +++ b/components/backup-stream/src/metadata/store/etcd.rs @@ -23,7 +23,7 @@ use super::{ TransactionOp, }; use crate::{ - errors::Result, + errors::{Error, EtcdErrorExt, Result}, metadata::{ keys::{KeyValue, MetaKey}, metrics::METADATA_KEY_OPERATION, @@ -113,17 +113,32 @@ impl MetaStore for EtcdStore { |events| -> Pin> + Send>> { match events { Err(err) => Box::pin(tokio_stream::once(Err(err.into()))), - Ok(events) => Box::pin(tokio_stream::iter( - // TODO: remove the copy here via access the protobuf field directly. - #[allow(clippy::unnecessary_to_owned)] - events.events().to_owned().into_iter().filter_map(|event| { - let kv = event.kv()?; - Some(Ok(KvEvent { - kind: event.event_type().into(), - pair: kv.clone().into(), - })) - }), - )), + Ok(events) => { + if events.compact_revision() > 0 && events.canceled() { + return Box::pin(tokio_stream::once(Err(Error::Etcd( + EtcdErrorExt::RevisionCompacted { + current: events.compact_revision(), + }, + )))); + } + if events.canceled() { + return Box::pin(tokio_stream::once(Err(Error::Etcd( + EtcdErrorExt::WatchCanceled, + )))); + } + Box::pin(tokio_stream::iter( + // TODO: remove the copy here via access the protobuf field + // directly. + #[allow(clippy::unnecessary_to_owned)] + events.events().to_owned().into_iter().filter_map(|event| { + let kv = event.kv()?; + Some(Ok(KvEvent { + kind: event.event_type().into(), + pair: kv.clone().into(), + })) + }), + )) + } } }, )), From 5a8d477fdbd713faf1fc22dbb3eadb9167e388d9 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 8 Feb 2023 14:27:59 +0800 Subject: [PATCH 0503/1149] engine_traits: allow chaos flush notification (#14160) close tikv/tikv#14113 `OnFlushComplete` can be called out of order. What we can assume is when a seqno finishes flush, all SSTs have smaller seqno must also finish flush. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_rocks/src/event_listener.rs | 20 +++++-- components/engine_rocks/src/lib.rs | 1 + components/engine_traits/src/cf_defs.rs | 5 ++ components/engine_traits/src/flush.rs | 60 ++++++++++++++----- .../src/operation/command/write/mod.rs | 7 +-- components/raftstore-v2/src/operation/mod.rs | 3 +- .../src/operation/ready/apply_trace.rs | 15 ++--- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- 8 files changed, 77 insertions(+), 36 deletions(-) diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 3bbf03cb77f..23ff7cf5f50 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -194,8 +194,20 @@ impl rocksdb::EventListener for RocksPersistenceListener { } fn on_flush_completed(&self, job: &FlushJobInfo) { + let num = match job + .file_path() + .file_prefix() + .and_then(|n| n.to_str()) + .map(|n| n.parse()) + { + Some(Ok(n)) => n, + _ => { + slog_global::error!("failed to parse file number"; "path" => job.file_path().display()); + 0 + } + }; self.0 - .on_flush_completed(job.cf_name(), job.largest_seqno()); + .on_flush_completed(job.cf_name(), job.largest_seqno(), num); } } @@ -207,7 +219,7 @@ mod tests { }; use engine_traits::{ - FlushProgress, FlushState, MiscExt, StateStorage, SyncMutable, CF_DEFAULT, DATA_CFS, + ApplyProgress, FlushState, MiscExt, StateStorage, SyncMutable, CF_DEFAULT, DATA_CFS, }; use tempfile::Builder; @@ -221,7 +233,7 @@ mod tests { assert_eq!(filename, "/000398.sst"); } - type Record = (u64, u64, FlushProgress); + type Record = (u64, u64, ApplyProgress); #[derive(Default)] struct MemStorage { @@ -229,7 +241,7 @@ mod tests { } impl StateStorage for MemStorage { - fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress) { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: ApplyProgress) { self.records .lock() .unwrap() diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 94a4c23a3c4..b5561b3de42 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -18,6 +18,7 @@ #![cfg_attr(test, feature(test))] #![feature(let_chains)] #![feature(option_get_or_insert_default)] +#![feature(path_file_prefix)] #[allow(unused_extern_crates)] extern crate tikv_alloc; diff --git a/components/engine_traits/src/cf_defs.rs b/components/engine_traits/src/cf_defs.rs index 1658f49053c..27546dfc1c1 100644 --- a/components/engine_traits/src/cf_defs.rs +++ b/components/engine_traits/src/cf_defs.rs @@ -11,6 +11,11 @@ pub const ALL_CFS: &[CfName] = &[CF_DEFAULT, CF_LOCK, CF_WRITE, CF_RAFT]; pub const DATA_CFS: &[CfName] = &[CF_DEFAULT, CF_LOCK, CF_WRITE]; pub const DATA_CFS_LEN: usize = DATA_CFS.len(); +pub fn data_cf_offset(cf: &str) -> usize { + let cf = if cf.is_empty() { CF_DEFAULT } else { cf }; + DATA_CFS.iter().position(|c| *c == cf).expect(cf) +} + pub fn name_to_cf(name: &str) -> Option { if name.is_empty() { return Some(CF_DEFAULT); diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 8300348da8c..d35233bc310 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -20,17 +20,20 @@ use std::{ }, }; -use crate::{RaftEngine, RaftLogBatch}; +use slog_global::info; +use tikv_util::set_panic_mark; + +use crate::{data_cf_offset, RaftEngine, RaftLogBatch, DATA_CFS_LEN}; #[derive(Debug)] -pub struct FlushProgress { +pub struct ApplyProgress { cf: String, apply_index: u64, earliest_seqno: u64, } -impl FlushProgress { - fn merge(&mut self, pr: FlushProgress) { +impl ApplyProgress { + fn merge(&mut self, pr: ApplyProgress) { debug_assert_eq!(self.cf, pr.cf); debug_assert!(self.apply_index <= pr.apply_index); self.apply_index = pr.apply_index; @@ -45,6 +48,12 @@ impl FlushProgress { } } +#[derive(Default, Debug)] +struct FlushProgress { + prs: LinkedList, + last_flushed: [u64; DATA_CFS_LEN], +} + /// A share state between raftstore and underlying engine. /// /// raftstore will update state changes and corresponding apply index, when @@ -77,7 +86,7 @@ impl FlushState { /// A helper trait to avoid exposing `RaftEngine` to `TabletFactory`. pub trait StateStorage: Sync + Send { - fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress); + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: ApplyProgress); } /// A flush listener that maps memtable to apply index and persist the relation @@ -86,7 +95,7 @@ pub struct PersistenceListener { region_id: u64, tablet_index: u64, state: Arc, - progress: Mutex>, + progress: Mutex, storage: Arc, } @@ -101,7 +110,7 @@ impl PersistenceListener { region_id, tablet_index, state, - progress: Mutex::new(LinkedList::new()), + progress: Mutex::new(FlushProgress::default()), storage, } } @@ -120,8 +129,17 @@ impl PersistenceListener { // thread writting to the DB and increasing apply index. // Apply index will be set within DB lock, so it's correct even with manual // flush. + let offset = data_cf_offset(&cf); let apply_index = self.state.applied_index.load(Ordering::SeqCst); - self.progress.lock().unwrap().push_back(FlushProgress { + let mut prs = self.progress.lock().unwrap(); + let flushed = prs.last_flushed[offset]; + if flushed > earliest_seqno { + panic!( + "sealed seqno has been flushed {} {} {} <= {}", + cf, apply_index, earliest_seqno, flushed + ); + } + prs.prs.push_back(ApplyProgress { cf, apply_index, earliest_seqno, @@ -131,12 +149,21 @@ impl PersistenceListener { /// Called a memtable finished flushing. /// /// `largest_seqno` should be the largest seqno of the generated file. - pub fn on_flush_completed(&self, cf: &str, largest_seqno: u64) { + pub fn on_flush_completed(&self, cf: &str, largest_seqno: u64, file_no: u64) { // Maybe we should hook the compaction to avoid the file is compacted before // being recorded. + let offset = data_cf_offset(cf); let pr = { let mut prs = self.progress.lock().unwrap(); - let mut cursor = prs.cursor_front_mut(); + let flushed = prs.last_flushed[offset]; + if flushed >= largest_seqno { + // According to facebook/rocksdb#11183, it's possible OnFlushCompleted can be + // called out of order. But it's guaranteed files are installed in order. + info!("flush complete reorder found"; "flushed" => flushed, "largest_seqno" => largest_seqno, "file_no" => file_no, "cf" => cf); + return; + } + prs.last_flushed[offset] = largest_seqno; + let mut cursor = prs.prs.cursor_front_mut(); let mut flushed_pr = None; while let Some(pr) = cursor.current() { if pr.cf != cf { @@ -157,10 +184,13 @@ impl PersistenceListener { } match flushed_pr { Some(pr) => pr, - None => panic!( - "[region_id={}] [tablet_index={}] {} not found in {:?}", - self.region_id, self.tablet_index, cf, prs - ), + None => { + set_panic_mark(); + panic!( + "[region_id={}] [tablet_index={}] {} {} {} not found in {:?}", + self.region_id, self.tablet_index, cf, largest_seqno, file_no, prs + ) + } } }; self.storage @@ -169,7 +199,7 @@ impl PersistenceListener { } impl StateStorage for R { - fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress) { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: ApplyProgress) { if pr.apply_index == 0 { return; } diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 14011d6fc1b..e958a3ec08f 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, Mutable, RaftEngine, CF_DEFAULT}; +use engine_traits::{data_cf_offset, KvEngine, Mutable, RaftEngine, CF_DEFAULT}; use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ @@ -15,7 +15,6 @@ use tikv_util::slog_panic; use crate::{ batch::StoreContext, - operation::cf_offset, raft::{Apply, Peer}, router::{ApplyTask, CmdResChannel}, }; @@ -129,7 +128,7 @@ impl Peer { impl Apply { #[inline] pub fn apply_put(&mut self, cf: &str, index: u64, key: &[u8], value: &[u8]) -> Result<()> { - let off = cf_offset(cf); + let off = data_cf_offset(cf); if self.should_skip(off, index) { return Ok(()); } @@ -172,7 +171,7 @@ impl Apply { #[inline] pub fn apply_delete(&mut self, cf: &str, index: u64, key: &[u8]) -> Result<()> { - let off = cf_offset(cf); + let off = data_cf_offset(cf); if self.should_skip(off, index) { return Ok(()); } diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 492595851e2..9cdd78dcb4c 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -14,8 +14,7 @@ pub use command::{ }; pub use life::{DestroyProgress, GcPeerContext}; pub use ready::{ - cf_offset, write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, SnapState, - StateStorage, + write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, SnapState, StateStorage, }; pub(crate) use self::{ diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 67bbed5aa4b..71e282728f7 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -30,8 +30,8 @@ use std::{cmp, sync::Mutex}; use engine_traits::{ - FlushProgress, KvEngine, RaftEngine, RaftLogBatch, TabletRegistry, ALL_CFS, CF_DEFAULT, - CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, + data_cf_offset, ApplyProgress, KvEngine, RaftEngine, RaftLogBatch, TabletRegistry, ALL_CFS, + CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; use kvproto::{ metapb::Region, @@ -111,7 +111,7 @@ impl StateStorage { } impl engine_traits::StateStorage for StateStorage { - fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: FlushProgress) { + fn persist_progress(&self, region_id: u64, tablet_index: u64, pr: ApplyProgress) { let cf = to_static_cf(pr.cf()); let flushed_index = pr.applied_index(); self.raft_engine @@ -140,11 +140,6 @@ struct Progress { last_modified: u64, } -pub fn cf_offset(cf: &str) -> usize { - let cf = if cf.is_empty() { CF_DEFAULT } else { cf }; - DATA_CFS.iter().position(|c| *c == cf).expect(cf) -} - /// `ApplyTrace` is used to track the indexes of modifications and flushes. /// /// It has 3 core functionalities: @@ -204,7 +199,7 @@ impl ApplyTrace { } fn on_flush(&mut self, cf: &str, index: u64) { - let off = cf_offset(cf); + let off = data_cf_offset(cf); // Technically it should always be true. if index > self.data_cfs[off].flushed { self.data_cfs[off].flushed = index; @@ -212,7 +207,7 @@ impl ApplyTrace { } fn on_modify(&mut self, cf: &str, index: u64) { - let off = cf_offset(cf); + let off = data_cf_offset(cf); self.data_cfs[off].last_modified = index; } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index c77766f6ce5..a88df2245cc 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -47,7 +47,7 @@ use tikv_util::{ }; pub use self::{ - apply_trace::{cf_offset, write_initial_states, ApplyTrace, DataTrace, StateStorage}, + apply_trace::{write_initial_states, ApplyTrace, DataTrace, StateStorage}, async_writer::AsyncWriter, snapshot::{GenSnapTask, SnapState}, }; From 8d63c2714f0f00fc0a3fe8bb468cbe2aeb851144 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 8 Feb 2023 14:53:59 +0800 Subject: [PATCH 0504/1149] raftstore-v2: gc all split tablets (#14169) close tikv/tikv#14162, close tikv/tikv#14163 Force gc all split tablets by checking finish event. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 5 ++ .../src/operation/command/admin/mod.rs | 5 +- .../src/operation/command/admin/split.rs | 51 ++++++++++---- .../raftstore-v2/src/operation/command/mod.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 69 ++++++++++++------- .../raftstore-v2/src/worker/tablet_gc.rs | 67 ++++++++++++++++-- 6 files changed, 155 insertions(+), 46 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 83d6b2e1f2a..1a507bb7f10 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -372,6 +372,11 @@ impl StorePollerBuilder { for entry in file_system::read_dir(self.tablet_registry.tablet_root())? { let entry = entry?; let path = entry.path(); + if path.extension().map_or(false, |s| s == "tmp") { + // The directory may be generated by an aborted checkpoint. + file_system::remove_dir_all(&path)?; + continue; + } let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; let fsm = match peers.get(®ion_id) { Some((_, fsm)) => fsm, diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 1546983645f..a912cb7a3d5 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -14,7 +14,10 @@ use protobuf::Message; use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; use split::SplitResult; -pub use split::{temp_split_path, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX}; +pub use split::{ + report_split_init_finish, temp_split_path, RequestSplit, SplitFlowControl, SplitInit, + SPLIT_PREFIX, +}; use tikv_util::{box_err, log::SlogFormat}; use txn_types::WriteBatchFlags; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index f9e44286490..0fbe31277ed 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -53,7 +53,7 @@ use raftstore::{ }, Result, }; -use slog::info; +use slog::{error, info}; use tikv_util::{log::SlogFormat, slog_panic}; use crate::{ @@ -86,8 +86,8 @@ pub struct SplitInit { pub region: metapb::Region, pub check_split: bool, pub scheduled: bool, - pub source_leader: bool, - pub source_id: u64, + pub derived_leader: bool, + pub derived_region_id: u64, /// In-memory pessimistic locks that should be inherited from parent region pub locks: PeerPessimisticLocks, @@ -113,6 +113,35 @@ impl SplitInit { } } +pub fn report_split_init_finish( + ctx: &mut StoreContext, + derived_region_id: u64, + finish_region_id: u64, + cleanup: bool, +) where + EK: KvEngine, + ER: RaftEngine, +{ + let _ = ctx.router.force_send( + derived_region_id, + PeerMsg::SplitInitFinish(finish_region_id), + ); + if !cleanup { + return; + } + + if let Err(e) = ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::direct_destroy_path(temp_split_path( + &ctx.tablet_registry, + finish_region_id, + ))) + { + error!(ctx.logger, "failed to destroy split init temp"; "error" => ?e); + } +} + #[derive(Debug)] pub struct RequestSplit { pub epoch: RegionEpoch, @@ -527,8 +556,8 @@ impl Peer { new_ids.insert(new_region_id); let split_init = PeerMsg::SplitInit(Box::new(SplitInit { region: new_region, - source_leader: self.is_leader(), - source_id: region_id, + derived_leader: self.is_leader(), + derived_region_id: region_id, check_split: last_region_id == new_region_id, scheduled: false, approximate_size: estimated_size, @@ -574,10 +603,8 @@ impl Peer { let region_id = split_init.region.id; if self.storage().is_initialized() && self.persisted_index() >= RAFT_INIT_LOG_INDEX { // Race with split operation. The tablet created by split will eventually be - // deleted (TODO). We don't trim it. - let _ = store_ctx - .router - .force_send(split_init.source_id, PeerMsg::SplitInitFinish(region_id)); + // deleted. We don't trim it. + report_split_init_finish(store_ctx, split_init.derived_region_id, region_id, true); return; } @@ -631,7 +658,7 @@ impl Peer { }, )); } - if split_init.source_leader + if split_init.derived_leader && self.leader_id() == INVALID_ID && self.term() == RAFT_INIT_LOG_TERM { @@ -650,9 +677,7 @@ impl Peer { if split_init.check_split { self.add_pending_tick(PeerTick::SplitRegionCheck); } - let _ = store_ctx - .router - .force_send(split_init.source_id, PeerMsg::SplitInitFinish(region_id)); + report_split_init_finish(store_ctx, split_init.derived_region_id, region_id, false); } pub fn on_split_init_finish(&mut self, region_id: u64) { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 5434eca6b38..d887af7d6d6 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -59,8 +59,8 @@ mod control; mod write; pub use admin::{ - temp_split_path, AdminCmdResult, CompactLogContext, RequestSplit, SplitFlowControl, SplitInit, - SPLIT_PREFIX, + report_split_init_finish, temp_split_path, AdminCmdResult, CompactLogContext, RequestSplit, + SplitFlowControl, SplitInit, SPLIT_PREFIX, }; pub use control::ProposalControl; pub use write::{ diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 11969701c74..8b63f9aae89 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -44,6 +44,7 @@ use super::command::SplitInit; use crate::{ batch::StoreContext, fsm::{PeerFsm, Store}, + operation::command::report_split_init_finish, raft::{Peer, Storage}, router::{CmdResChannel, PeerMsg, PeerTick}, }; @@ -111,6 +112,46 @@ pub struct GcPeerContext { confirmed_ids: Vec, } +fn check_if_to_peer_destroyed( + engine: &ER, + msg: &RaftMessage, + store_id: u64, +) -> engine_traits::Result { + let region_id = msg.get_region_id(); + let to_peer = msg.get_to_peer(); + let local_state = match engine.get_region_state(region_id, u64::MAX)? { + Some(s) => s, + None => return Ok(false), + }; + // Split will not create peer in v2, so the state must be Tombstone. + if local_state.get_state() != PeerState::Tombstone { + panic!( + "[region {}] {} peer doesn't exist but has valid local state {:?}", + region_id, to_peer.id, local_state + ); + } + // Compared to v1, we rely on leader to confirm destroy actively, so here + // skip handling gc for simplicity. + let local_epoch = local_state.get_region().get_region_epoch(); + // The region in this peer is already destroyed + if util::is_epoch_stale(msg.get_region_epoch(), local_epoch) { + return Ok(true); + } + if let Some(local_peer) = find_peer(local_state.get_region(), store_id) && to_peer.id <= local_peer.get_id() { + return Ok(true); + } + // If the peer is destroyed by conf change, all above checks will pass. + if local_state + .get_removed_records() + .iter() + .find(|p| p.get_store_id() == store_id) + .map_or(false, |p| to_peer.id <= p.get_id()) + { + return Ok(true); + } + Ok(false) +} + impl Store { /// The method is called during split. /// The creation process is: @@ -126,6 +167,7 @@ impl Store { ER: RaftEngine, T: Transport, { + let derived_region_id = msg.derived_region_id; let region_id = msg.region.id; let mut raft_msg = Box::::default(); raft_msg.set_region_id(region_id); @@ -147,7 +189,8 @@ impl Store { self.logger(), "Split peer is destroyed before sending the intialization msg"; "split init msg" => ?m, - ) + ); + report_split_init_finish(ctx, derived_region_id, region_id, true); } } @@ -197,33 +240,13 @@ impl Store { ctx.raft_metrics.message_dropped.stale_msg.inc(); return; } - let mut destroyed = false; - let local_state = match ctx.engine.get_region_state(region_id, u64::MAX) { - Ok(s) => s, + let destroyed = match check_if_to_peer_destroyed(&ctx.engine, &msg, self.store_id()) { + Ok(d) => d, Err(e) => { error!(self.logger(), "failed to get region state"; "region_id" => region_id, "err" => ?e); return; } }; - if let Some(local_state) = local_state { - // Split will not create peer in v2, so the state must be Tombstone. - if local_state.get_state() != PeerState::Tombstone { - panic!( - "[region {}] {} peer doesn't exist but has valid local state {:?}", - region_id, to_peer.id, local_state - ); - } - // Compared to v1, we rely on leader to confirm destroy actively, so here - // skip handling gc for simplicity. - let local_epoch = local_state.get_region().get_region_epoch(); - // The region in this peer is already destroyed - if util::is_epoch_stale(msg.get_region_epoch(), local_epoch) { - destroyed = true; - } - if !destroyed && let Some(local_peer) = find_peer(local_state.get_region(), self.store_id()) && to_peer.id <= local_peer.get_id() { - destroyed = true; - } - } if destroyed { if msg.get_is_tombstone() { if let Some(msg) = build_peer_destroyed_report(&mut msg) { diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index dc5f3dad56d..d9bd03b326a 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -13,6 +13,7 @@ use slog::{debug, error, info, warn, Logger}; use tikv_util::{ worker::{Runnable, RunnableWithTimer}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, + Either, }; const DEFAULT_BACKGROUND_POOL_SIZE: usize = 6; @@ -25,7 +26,8 @@ pub enum Task { cb: Box, }, PrepareDestroy { - tablet: EK, + // A path is passed only when the db is never opened. + tablet: Either, region_id: u64, wait_for_persisted: u64, }, @@ -33,6 +35,8 @@ pub enum Task { region_id: u64, persisted_index: u64, }, + /// Sometimes we know for sure a tablet can be destroyed directly. + DirectDestroy { tablet: Either }, } impl Display for Task { @@ -63,6 +67,9 @@ impl Display for Task { "destroy tablet for region_id {} persisted_index {}", region_id, persisted_index, ), + Task::DirectDestroy { .. } => { + write!(f, "direct destroy tablet") + } } } } @@ -81,7 +88,16 @@ impl Task { #[inline] pub fn prepare_destroy(tablet: EK, region_id: u64, wait_for_persisted: u64) -> Self { Task::PrepareDestroy { - tablet, + tablet: Either::Left(tablet), + region_id, + wait_for_persisted, + } + } + + #[inline] + pub fn prepare_destroy_path(path: PathBuf, region_id: u64, wait_for_persisted: u64) -> Self { + Task::PrepareDestroy { + tablet: Either::Right(path), region_id, wait_for_persisted, } @@ -94,6 +110,20 @@ impl Task { persisted_index, } } + + #[inline] + pub fn direct_destroy(tablet: EK) -> Self { + Task::DirectDestroy { + tablet: Either::Left(tablet), + } + } + + #[inline] + pub fn direct_destroy_path(path: PathBuf) -> Self { + Task::DirectDestroy { + tablet: Either::Right(path), + } + } } pub struct Runner { @@ -184,14 +214,29 @@ impl Runner { .unwrap(); } - fn prepare_destroy(&mut self, region_id: u64, tablet: EK, wait_for_persisted: u64) { - // The tablet is about to be deleted, flush is a waste and will block destroy. - let _ = tablet.set_db_options(&[("avoid_flush_during_shutdown", "true")]); - let _ = tablet.pause_background_work(); + fn pause_background_work(&mut self, tablet: Either) -> PathBuf { + match tablet { + Either::Left(tablet) => { + // The tablet is about to be deleted, flush is a waste and will block destroy. + let _ = tablet.set_db_options(&[("avoid_flush_during_shutdown", "true")]); + let _ = tablet.pause_background_work(); + PathBuf::from(tablet.path()) + } + Either::Right(path) => path, + } + } + + fn prepare_destroy( + &mut self, + region_id: u64, + tablet: Either, + wait_for_persisted: u64, + ) { + let path = self.pause_background_work(tablet); self.waiting_destroy_tasks .entry(region_id) .or_default() - .push((PathBuf::from(tablet.path()), wait_for_persisted)); + .push((path, wait_for_persisted)); } fn destroy(&mut self, region_id: u64, persisted: u64) { @@ -208,6 +253,13 @@ impl Runner { } } + fn direct_destroy(&mut self, tablet: Either) { + let path = self.pause_background_work(tablet); + if !Self::process_destroy_task(&self.logger, &self.tablet_registry, &path) { + self.pending_destroy_tasks.push(path); + } + } + /// Returns true if task is consumed. Failure is considered consumed. fn process_destroy_task(logger: &Logger, registry: &TabletRegistry, path: &Path) -> bool { match EK::locked(path.to_str().unwrap()) { @@ -268,6 +320,7 @@ where region_id, persisted_index, } => self.destroy(region_id, persisted_index), + Task::DirectDestroy { tablet, .. } => self.direct_destroy(tablet), } } } From eb2e41645fa6a8db25211e8e3831dbea89bb7e18 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Wed, 8 Feb 2023 15:23:59 +0800 Subject: [PATCH 0505/1149] log-backup: retry for more time when failed to get snapshot(to adapt the feature witness) (#14155) ref tikv/tikv#14137 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 8 +++++--- components/backup-stream/src/event_loader.rs | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index dc053feff33..a13c52c9212 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -349,7 +349,7 @@ where let mut watcher = match watcher { Ok(w) => w, Err(e) => { - e.report("failed to start watch pause"); + e.report("failed to start watch task"); tokio::time::sleep(Duration::from_secs(5)).await; continue; } @@ -358,11 +358,12 @@ where loop { if let Some(event) = watcher.stream.next().await { - info!("backup stream watch event from etcd"; "event" => ?event); + info!("backup stream watch task from etcd"; "event" => ?event); let revision = meta_client.get_reversion().await; if let Ok(r) = revision { revision_new = r; + info!("update the revision"; "revision" => revision_new); } match event { @@ -408,10 +409,11 @@ where loop { if let Some(event) = watcher.stream.next().await { - info!("backup stream watch event from etcd"; "event" => ?event); + info!("backup stream watch pause from etcd"; "event" => ?event); let revision = meta_client.get_reversion().await; if let Ok(r) = revision { revision_new = r; + info!("update the revision"; "revision" => revision_new); } match event { diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 6222f058cd4..13c958a499a 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -43,7 +43,7 @@ use crate::{ Task, }; -const MAX_GET_SNAPSHOT_RETRY: usize = 3; +const MAX_GET_SNAPSHOT_RETRY: usize = 5; #[derive(Clone)] pub struct PendingMemoryQuota(Arc); @@ -269,7 +269,7 @@ where if !can_retry { break; } - std::thread::sleep(Duration::from_millis(500)); + std::thread::sleep(Duration::from_secs(1)); continue; } } From dda37a457c116586992b6a5b758b807f1ca2c1fc Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 8 Feb 2023 15:53:59 +0800 Subject: [PATCH 0506/1149] server: disable `PersistStats` RocksDB task for v2 (#14111) ref tikv/tikv#12842 Signed-off-by: tabokie --- src/config/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/config/mod.rs b/src/config/mod.rs index 4188d8409e3..7247d426b21 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1324,6 +1324,10 @@ impl DbConfig { if let Some(r) = &shared.write_buffer_manager { opts.set_write_buffer_manager(r); } + if for_engine == EngineType::RaftKv2 { + // Historical stats are not used. + opts.set_stats_persist_period_sec(0); + } opts } From f46f3866410b1698d306242f4168bae7c3366c48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 8 Feb 2023 16:23:59 +0800 Subject: [PATCH 0507/1149] log-backup: update the endpoints when etcd cluster config changes (#14127) close tikv/tikv#14165 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/backup-stream/Cargo.toml | 2 +- .../backup-stream/src/metadata/store/etcd.rs | 317 +++++++++++++++++- .../src/metadata/store/lazy_etcd.rs | 109 +++++- 4 files changed, 417 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b3c1452ebf..7750f729778 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1702,7 +1702,7 @@ dependencies = [ [[package]] name = "etcd-client" version = "0.10.2" -source = "git+https://github.com/pingcap/etcd-client?rev=14a6f8731f1890d5fd2f6e16a9f0d0a306b0599e#14a6f8731f1890d5fd2f6e16a9f0d0a306b0599e" +source = "git+https://github.com/pingcap/etcd-client?rev=41d393c32a7a7c728550cee1d9a138dafe6f3e27#41d393c32a7a7c728550cee1d9a138dafe6f3e27" dependencies = [ "http", "hyper", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index b1a61580cb6..43bda42a088 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -32,7 +32,7 @@ engine_traits = { workspace = true } error_code = { workspace = true } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "14a6f8731f1890d5fd2f6e16a9f0d0a306b0599e", features = ["pub-response-field", "tls-openssl-vendored"] } +etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"] } external_storage = { workspace = true } external_storage_export = { workspace = true } fail = "0.5" diff --git a/components/backup-stream/src/metadata/store/etcd.rs b/components/backup-stream/src/metadata/store/etcd.rs index e52cc4f92d9..62a246a08ef 100644 --- a/components/backup-stream/src/metadata/store/etcd.rs +++ b/components/backup-stream/src/metadata/store/etcd.rs @@ -4,17 +4,17 @@ use std::{ cmp::Ordering, collections::{HashMap, HashSet}, pin::Pin, - sync::Arc, + sync::{Arc, Weak}, time::Duration, }; use async_trait::async_trait; use etcd_client::{ - Client, Compare, CompareOp, DeleteOptions, EventType, GetOptions, PutOptions, SortOrder, - SortTarget, Txn, TxnOp, WatchOptions, + Client, Compare, CompareOp, DeleteOptions, EventType, GetOptions, Member, PutOptions, + SortOrder, SortTarget, Txn, TxnOp, WatchOptions, }; use futures::StreamExt; -use tikv_util::warn; +use tikv_util::{info, warn}; use tokio::sync::Mutex; use tokio_stream::Stream; @@ -23,6 +23,7 @@ use super::{ TransactionOp, }; use crate::{ + annotate, errors::{Error, EtcdErrorExt, Result}, metadata::{ keys::{KeyValue, MetaKey}, @@ -35,6 +36,187 @@ use crate::{ #[derive(Clone)] pub struct EtcdStore(Arc>); +#[derive(Default)] +pub(super) struct TopologyUpdater { + last_urls: HashSet, + client: Weak>, + + // back off configs + pub(super) loop_interval: Duration, + pub(super) loop_failure_back_off: Duration, +} + +impl std::fmt::Debug for TopologyUpdater { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TopologyUpdater") + .field("last_urls", &self.last_urls) + .finish() + } +} + +#[async_trait] +pub(super) trait ClusterInfoProvider { + async fn get_members(&mut self) -> Result>; + async fn add_endpoint(&mut self, endpoint: &str) -> Result<()>; + async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()>; +} + +#[async_trait] +impl ClusterInfoProvider for Client { + async fn get_members(&mut self) -> Result> { + let result = self.member_list().await?; + Ok(result.members().to_vec()) + } + + async fn add_endpoint(&mut self, endpoint: &str) -> Result<()> { + Client::add_endpoint(self, endpoint) + .await + .map_err(|err| annotate!(err, "during adding the endpoint {}", endpoint))?; + Ok(()) + } + + async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()> { + Client::remove_endpoint(self, endpoint) + .await + .map_err(|err| annotate!(err, "during removing the endpoint {}", endpoint))?; + Ok(()) + } +} + +#[derive(Debug, Clone, Copy)] +enum DiffType { + Add, + Remove, +} + +#[derive(Clone)] +struct Diff { + diff_type: DiffType, + url: String, +} + +impl std::fmt::Debug for Diff { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let syn = match self.diff_type { + DiffType::Add => "+", + DiffType::Remove => "-", + }; + write!(f, "{}{}", syn, self.url) + } +} + +impl TopologyUpdater { + // Note: we may require the initial endpoints from the arguments directly. + // So the internal map won't get inconsistent when the cluster config changed + // during initializing. + // But that is impossible for now because we cannot query the node ID before + // connecting. + pub fn new(cluster_ref: Weak>) -> Self { + Self { + last_urls: Default::default(), + client: cluster_ref, + + loop_interval: Duration::from_secs(60), + loop_failure_back_off: Duration::from_secs(10), + } + } + + pub fn init(&mut self, members: impl Iterator) { + for mem in members { + self.last_urls.insert(mem); + } + } + + fn diff(&self, incoming: &[Member]) -> Vec { + let newer = incoming + .iter() + .flat_map(|mem| mem.client_urls().iter()) + .collect::>(); + let mut result = vec![]; + for url in &newer { + if !self.last_urls.contains(*url) { + result.push(Diff { + diff_type: DiffType::Add, + url: String::clone(url), + }) + } + } + for url in &self.last_urls { + if !newer.contains(url) { + result.push(Diff { + diff_type: DiffType::Remove, + url: String::clone(url), + }) + } + } + result + } + + fn apply(&mut self, diff: &Diff) -> Option { + match diff.diff_type { + DiffType::Add => match self.last_urls.insert(diff.url.clone()) { + true => None, + false => Some(format!( + "the member to adding with url {} overrides existing urls.", + diff.url + )), + }, + DiffType::Remove => match self.last_urls.remove(&diff.url) { + true => None, + false => Some(format!( + "the member to remove with url {} hasn't been added.", + diff.url + )), + }, + } + } + + async fn update_topology_by(&mut self, cli: &mut C, diff: &Diff) -> Result<()> { + match diff.diff_type { + DiffType::Add => cli.add_endpoint(&diff.url).await?, + DiffType::Remove => cli.remove_endpoint(&diff.url).await?, + } + Ok(()) + } + + async fn do_update(&mut self, cli: &mut C) -> Result<()> { + let cluster = cli.get_members().await?; + let diffs = self.diff(cluster.as_slice()); + if !diffs.is_empty() { + info!("log backup updating store topology."; "diffs" => ?diffs, "current_state" => ?self); + } + for diff in diffs { + match self.apply(&diff) { + Some(warning) => { + warn!("log backup meet some wrong status when updating PD clients, skipping this update."; "warn" => %warning); + } + None => self.update_topology_by(cli, &diff).await?, + } + } + Result::Ok(()) + } + + pub(super) async fn update_topology_loop(&mut self) { + while let Some(cli) = self.client.upgrade() { + let mut lock = cli.lock().await; + let result = self.do_update(&mut lock).await; + drop(lock); + match result { + Ok(_) => tokio::time::sleep(self.loop_interval).await, + Err(err) => { + err.report("during updating etcd topology"); + tokio::time::sleep(self.loop_failure_back_off).await; + } + } + } + } + + pub async fn main_loop(mut self) { + info!("log backup topology updater finish initialization."; "current_state" => ?self); + self.update_topology_loop().await + } +} + impl EtcdStore { pub fn connect, S: AsRef<[E]>>(endpoints: S) -> Self { // TODO remove block_on @@ -42,6 +224,10 @@ impl EtcdStore { futures::executor::block_on(etcd_client::Client::connect(&endpoints, None)).unwrap(); Self(Arc::new(Mutex::new(cli))) } + + pub fn inner(&self) -> &Arc> { + &self.0 + } } impl From for EtcdStore { @@ -316,3 +502,126 @@ impl Snapshot for EtcdSnapshot { self.revision } } + +#[cfg(test)] +mod test { + use std::{ + collections::{HashMap, HashSet}, + fmt::Display, + sync::Arc, + time::Duration, + }; + + use async_trait::async_trait; + use etcd_client::{proto::PbMember, Member}; + use tokio::{sync::Mutex, time::timeout}; + + use super::{ClusterInfoProvider, TopologyUpdater}; + use crate::errors::Result; + + #[derive(Default, Debug)] + struct FakeCluster { + id_alloc: u64, + members: HashMap, + endpoints: HashSet, + } + + #[async_trait] + impl ClusterInfoProvider for FakeCluster { + async fn get_members(&mut self) -> Result> { + let members = self.members.values().cloned().collect(); + Ok(members) + } + + async fn add_endpoint(&mut self, endpoint: &str) -> Result<()> { + self.endpoints.insert(endpoint.to_owned()); + Ok(()) + } + + async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()> { + self.endpoints.remove(endpoint); + Ok(()) + } + } + + impl FakeCluster { + fn new_id(&mut self) -> u64 { + let i = self.id_alloc; + self.id_alloc += 1; + i + } + + fn init_with_member(&mut self, n: usize) -> Vec { + let mut endpoints = Vec::with_capacity(n); + for _ in 0..n { + let mem = self.add_member(); + let url = format!("fakestore://{}", mem); + self.endpoints.insert(url.clone()); + endpoints.push(url); + } + endpoints + } + + fn add_member(&mut self) -> u64 { + let id = self.new_id(); + let mut mem = PbMember::default(); + mem.id = id; + mem.client_ur_ls = vec![format!("fakestore://{}", id)]; + // Safety: `Member` is #[repr(transparent)]. + self.members.insert(id, unsafe { std::mem::transmute(mem) }); + id + } + + fn remove_member(&mut self, id: u64) -> bool { + self.members.remove(&id).is_some() + } + + fn check_consistency(&self, message: impl Display) { + let urls = self + .members + .values() + .flat_map(|mem| mem.client_urls().iter().cloned()) + .collect::>(); + assert_eq!( + urls, self.endpoints, + "{}: consistency check not passed.", + message + ); + } + } + + #[test] + fn test_topology_updater() { + let mut c = FakeCluster::default(); + let eps = c.init_with_member(3); + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + let sc = Arc::new(Mutex::new(c)); + let mut tu = TopologyUpdater::new(Arc::downgrade(&sc)); + tu.loop_failure_back_off = Duration::ZERO; + tu.loop_interval = Duration::from_millis(100); + tu.init(eps.into_iter()); + + { + let mut sc = sc.blocking_lock(); + sc.check_consistency("after init"); + sc.add_member(); + rt.block_on(tu.do_update(&mut sc)).unwrap(); + sc.check_consistency("adding nodes"); + sc.add_member(); + sc.add_member(); + rt.block_on(tu.do_update(&mut sc)).unwrap(); + sc.check_consistency("adding more nodes"); + assert!(sc.remove_member(0), "{:?}", sc); + rt.block_on(tu.do_update(&mut sc)).unwrap(); + sc.check_consistency("removing nodes"); + } + + drop(sc); + rt.block_on(async { timeout(Duration::from_secs(1), tu.update_topology_loop()).await }) + .unwrap() + } +} diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 7e8b7881070..3b697dae9b9 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -19,7 +19,10 @@ use tikv_util::{ }; use tokio::sync::Mutex as AsyncMutex; -use super::{etcd::EtcdSnapshot, EtcdStore, MetaStore}; +use super::{ + etcd::{EtcdSnapshot, TopologyUpdater}, + EtcdStore, MetaStore, +}; use crate::errors::{ContextualResultExt, Result}; const RPC_TIMEOUT: Duration = Duration::from_secs(30); @@ -34,6 +37,16 @@ pub struct ConnectionConfig { pub keep_alive_timeout: Duration, } +impl Default for ConnectionConfig { + fn default() -> Self { + Self { + tls: Default::default(), + keep_alive_interval: Duration::from_secs(10), + keep_alive_timeout: Duration::from_secs(3), + } + } +} + impl std::fmt::Debug for ConnectionConfig { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ConnectionConfig") @@ -89,12 +102,15 @@ impl ConnectionConfig { impl LazyEtcdClient { pub fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { - Self(Arc::new(AsyncMutex::new(LazyEtcdClientInner { - conf, - endpoints: endpoints.iter().map(ToString::to_string).collect(), - last_modified: None, - cli: None, - }))) + let mut inner = LazyEtcdClientInner::new(endpoints, conf); + inner.normalize_urls(); + Self(Arc::new(AsyncMutex::new(inner))) + } + + // For testing -- check whether the endpoints are properly normalized. + #[cfg(test)] + pub(super) fn endpoints(&self) -> Vec { + self.0.blocking_lock().endpoints.clone() } async fn get_cli(&self) -> Result { @@ -112,6 +128,17 @@ pub struct LazyEtcdClientInner { cli: Option, } +impl LazyEtcdClientInner { + fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { + LazyEtcdClientInner { + conf, + endpoints: endpoints.iter().map(ToString::to_string).collect(), + last_modified: None, + cli: None, + } + } +} + fn etcd_error_is_retryable(etcd_err: &EtcdError) -> bool { match etcd_err { EtcdError::InvalidArgs(_) @@ -164,6 +191,21 @@ where } impl LazyEtcdClientInner { + fn normalize_urls(&mut self) { + let enabled_tls = self.conf.tls.client_suite().is_ok(); + for endpoint in self.endpoints.iter_mut() { + // Don't touch them when the schemes already provided. + // Given etcd is based on gRPC (which relies on HTTP/2), + // there shouldn't be other schemes available (Hopefully...) + if endpoint.starts_with("http://") || endpoint.starts_with("https://") { + continue; + } + let expected_scheme = if enabled_tls { "https" } else { "http" }; + *endpoint = format!("{}://{}", expected_scheme, endpoint) + } + info!("log backup normalized etcd endpoints"; "endpoints" => ?self.endpoints); + } + async fn connect(&mut self) -> Result<&EtcdStore> { let store = retry(|| { // For now, the interface of the `etcd_client` doesn't us to control @@ -178,7 +220,10 @@ impl LazyEtcdClientInner { .await .context("during connecting to the etcd")?; let store = EtcdStore::from(store); + let mut updater = TopologyUpdater::new(Arc::downgrade(store.inner())); self.cli = Some(store); + updater.init(self.endpoints.iter().cloned()); + tokio::task::spawn(updater.main_loop()); Ok(self.cli.as_ref().unwrap()) } @@ -219,3 +264,53 @@ impl MetaStore for LazyEtcdClient { self.get_cli().await?.txn_cond(txn).await } } + +#[cfg(test)] +mod tests { + use std::{fs::File, io::Write, path::PathBuf, sync::Arc}; + + use security::{SecurityConfig, SecurityManager}; + use tempfile::TempDir; + + use super::LazyEtcdClient; + use crate::{errors::Result, metadata::ConnectionConfig}; + + #[test] + fn test_normalize_url() -> Result<()> { + let endpoints = ["http://pd-1".to_owned(), "pd-2".to_owned()]; + let le = LazyEtcdClient::new(&endpoints, Default::default()); + assert_eq!(le.endpoints(), &["http://pd-1", "http://pd-2"]); + + let tempdir = TempDir::new()?; + let write_all = |path: &PathBuf, content| { + let mut f = File::create(path)?; + f.write_all(content)?; + Result::Ok(()) + }; + let ca = tempdir.path().join("ca"); + let cert = tempdir.path().join("cert"); + let key = tempdir.path().join("key"); + write_all(&ca, b"CA :3")?; + write_all(&cert, b"Cert :D")?; + write_all(&key, b"Key X)")?; + + let cfg = SecurityConfig { + ca_path: ca.to_string_lossy().into_owned(), + cert_path: cert.to_string_lossy().into_owned(), + key_path: key.to_string_lossy().into_owned(), + + ..Default::default() + }; + let sm = SecurityManager::new(&cfg).unwrap(); + let endpoints = ["https://pd-1".to_owned(), "pd-2".to_owned()]; + let le = LazyEtcdClient::new( + &endpoints, + ConnectionConfig { + tls: Arc::new(sm), + ..Default::default() + }, + ); + assert_eq!(le.endpoints(), &["https://pd-1", "https://pd-2"]); + Result::Ok(()) + } +} From f6513edc265b6716f3b99ed9b10dd4ffacdf4de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 8 Feb 2023 17:27:59 +0800 Subject: [PATCH 0508/1149] log-backup: edit checkpoint to 2 hours (#13894) ref tikv/tikv#13889 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/checkpoint_manager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 5cf4292faa3..47ec34d2113 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -371,13 +371,13 @@ impl FlushObserver for BasicFlushObserver { .update_service_safe_point( format!("backup-stream-{}-{}", task, self.store_id), TimeStamp::new(rts.saturating_sub(1)), - // Add a service safe point for 24 hours. (the same as fatal error.) + // Add a service safe point for 2 hours. // We make it the same duration as we meet fatal errors because TiKV may be // SIGKILL'ed after it meets fatal error and before it successfully updated the // fatal error safepoint. // TODO: We'd better make the coordinator, who really // calculates the checkpoint to register service safepoint. - Duration::from_secs(60 * 60 * 24), + Duration::from_secs(60 * 60 * 2), ) .await { From ee00d70008562b45bafb0dcd88d70520a18fa742 Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 8 Feb 2023 18:23:59 +0800 Subject: [PATCH 0509/1149] grafana: add grpc resource group QPS panel (#14171) ref tikv/tikv#13730 Add grpc resource group QPS panel and fix datasource Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 154 ++++++++++++++++++++++++------ 1 file changed, 126 insertions(+), 28 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index cff4b5f7742..357edac04a7 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -40,6 +40,12 @@ "name": "Singlestat", "version": "" }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, { "type": "panel", "id": "table", @@ -64,7 +70,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1651043540619, + "iteration": 1675760728538, "links": [], "panels": [ { @@ -4532,7 +4538,6 @@ }, "yaxes": [ { - "$$hashKey": "object:150", "format": "s", "label": null, "logBase": 2, @@ -4541,7 +4546,6 @@ "show": true }, { - "$$hashKey": "object:151", "format": "short", "label": null, "logBase": 2, @@ -4632,7 +4636,6 @@ }, "yaxes": [ { - "$$hashKey": "object:150", "format": "s", "label": null, "logBase": 2, @@ -4641,7 +4644,6 @@ "show": true }, { - "$$hashKey": "object:151", "format": "short", "label": null, "logBase": 2, @@ -4697,7 +4699,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:80", "alias": "/.*/", "stack": "A" } @@ -4746,7 +4747,6 @@ }, "yaxes": [ { - "$$hashKey": "object:264", "format": "ns", "label": null, "logBase": 1, @@ -4755,7 +4755,6 @@ "show": true }, { - "$$hashKey": "object:265", "format": "short", "label": null, "logBase": 1, @@ -4811,7 +4810,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:62", "alias": "/.*/", "stack": "A" } @@ -4858,7 +4856,6 @@ }, "yaxes": [ { - "$$hashKey": "object:264", "format": "binBps", "label": null, "logBase": 1, @@ -4867,7 +4864,6 @@ "show": true }, { - "$$hashKey": "object:265", "format": "short", "label": null, "logBase": 1, @@ -4915,7 +4911,7 @@ "h": 8, "w": 12, "x": 0, - "y": 4 + "y": 5 }, "hiddenSeries": false, "id": 95, @@ -4941,7 +4937,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -5019,7 +5015,7 @@ "h": 8, "w": 12, "x": 12, - "y": 4 + "y": 5 }, "hiddenSeries": false, "id": 107, @@ -5045,7 +5041,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -5123,7 +5119,7 @@ "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 13 }, "hiddenSeries": false, "id": 98, @@ -5151,7 +5147,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -5228,7 +5224,7 @@ "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 13 }, "hiddenSeries": false, "id": 2532, @@ -5256,7 +5252,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -5333,7 +5329,7 @@ "h": 8, "w": 12, "x": 0, - "y": 20 + "y": 21 }, "hiddenSeries": false, "id": 2533, @@ -5361,7 +5357,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -5473,7 +5469,7 @@ "h": 8, "w": 12, "x": 12, - "y": 20 + "y": 21 }, "hiddenSeries": false, "id": 2534, @@ -5501,7 +5497,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -5653,7 +5649,6 @@ }, "yaxes": [ { - "$$hashKey": "object:69", "format": "ops", "label": null, "logBase": 1, @@ -5662,7 +5657,6 @@ "show": true }, { - "$$hashKey": "object:70", "format": "short", "label": null, "logBase": 1, @@ -5761,7 +5755,6 @@ }, "yaxes": [ { - "$$hashKey": "object:69", "format": "µs", "label": null, "logBase": 1, @@ -5770,7 +5763,112 @@ "show": true }, { - "$$hashKey": "object:70", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The QPS of different resource groups of gRPC request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 37 + }, + "hiddenSeries": false, + "id": 23763573091, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_grpc_resource_group_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "gRPC resource group QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { "format": "short", "label": null, "logBase": 1, @@ -15871,7 +15969,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "tidb-cluster", + "datasource": "${DS_TEST-CLUSTER}", "editable": true, "error": false, "fieldConfig": { From 2d7bf4c2089c0b9d8e2ef6d6417d2473f66f2892 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 8 Feb 2023 20:31:59 +0800 Subject: [PATCH 0510/1149] resource_control: support return resource groups config via http (#14170) ref tikv/tikv#13730 Signed-off-by: glorv --- components/pd_client/src/client.rs | 8 +- .../resource_control/src/resource_group.rs | 55 ++++++ components/resource_control/src/service.rs | 182 ++++++++++-------- components/server/src/server.rs | 1 + components/server/src/server2.rs | 1 + components/test_pd/src/mocker/etcd.rs | 27 ++- src/server/status_server/mod.rs | 79 ++++++++ tests/integrations/server/status_server.rs | 1 + 8 files changed, 268 insertions(+), 86 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index b0c21797a91..402192596b5 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -338,7 +338,13 @@ impl PdClient for RpcClient { Err(err) => Err(box_err!("{:?}", err)), } }) as PdFuture<_>, - Err(err) => Box::pin(async move { Err(box_err!("{:?}", err)) }) as PdFuture<_>, + Err(err) => Box::pin(async move { + Err(box_err!( + "load global config failed, path: '{}', err: {:?}", + req.get_config_path(), + err + )) + }) as PdFuture<_>, }; self.pd_client .request(req, executor, LEADER_CHANGE_RETRY) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index c5112c13516..390214bc687 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -13,6 +13,7 @@ use kvproto::{ kvrpcpb::CommandPri, resource_manager::{GroupMode, ResourceGroup}, }; +use tikv_util::info; use yatp::queue::priority::TaskPriorityProvider; // a read task cost at least 50us. @@ -70,6 +71,7 @@ impl ResourceGroupManager { let ru_quota = Self::get_ru_setting(&rg, controller.is_read); controller.add_resource_group(group_name.clone().into_bytes(), ru_quota); }); + info!("add resource group"; "name"=> &rg.name, "ru" => rg.get_r_u_settings().get_r_u().get_settings().get_fill_rate()); self.resource_groups.insert(group_name, rg); } @@ -78,9 +80,28 @@ impl ResourceGroupManager { self.registry.lock().unwrap().iter().for_each(|controller| { controller.remove_resource_group(group_name.as_bytes()); }); + info!("remove resource group"; "name"=> name); self.resource_groups.remove(&group_name); } + pub fn retain(&self, mut f: impl FnMut(&String, &ResourceGroup) -> bool) { + let mut removed_names = vec![]; + self.resource_groups.retain(|k, v| { + let ret = f(k, v); + if !ret { + removed_names.push(k.clone()); + } + ret + }); + if !removed_names.is_empty() { + self.registry.lock().unwrap().iter().for_each(|controller| { + for name in &removed_names { + controller.remove_resource_group(name.as_bytes()); + } + }); + } + } + pub fn get_resource_group(&self, name: &str) -> Option> { self.resource_groups.get(&name.to_ascii_lowercase()) } @@ -173,6 +194,7 @@ impl ResourceController { virtual_time: AtomicU64::new(self.last_min_vt.load(Ordering::Acquire)), vt_delta_for_get, }; + // maybe update existed group self.resource_consumptions.insert(name, group); } @@ -192,6 +214,7 @@ impl ResourceController { // do not remove the default resource group, reset to default setting instead. if DEFAULT_RESOURCE_GROUP_NAME.as_bytes() == name { self.add_resource_group(DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0); + return; } self.resource_consumptions.remove(name); } @@ -487,4 +510,36 @@ pub(crate) mod tests { 10 ); } + + #[test] + fn test_retain_resource_groups() { + let resource_manager = ResourceGroupManager::default(); + let resource_ctl = resource_manager.derive_controller("test_read".into(), true); + let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); + + for i in 0..5 { + let group1 = new_resource_group_ru(format!("test{}", i), 100); + resource_manager.add_resource_group(group1); + // add a resource group with big ru + let group1 = new_resource_group_ru(format!("group{}", i), 100); + resource_manager.add_resource_group(group1); + } + assert_eq!(resource_manager.get_all_resource_groups().len(), 10); + assert_eq!(resource_ctl.resource_consumptions.len(), 11); // 10 + 1(default) + assert_eq!(resource_ctl_write.resource_consumptions.len(), 11); + + resource_manager.retain(|k, _v| k.starts_with("test")); + assert_eq!(resource_manager.get_all_resource_groups().len(), 5); + assert_eq!(resource_ctl.resource_consumptions.len(), 6); + assert_eq!(resource_ctl_write.resource_consumptions.len(), 6); + assert!(resource_manager.get_resource_group("group1").is_none()); + assert_eq!( + resource_ctl.resource_group("group2".as_bytes()).key(), + "default".as_bytes() + ); + assert_eq!( + resource_ctl_write.resource_group("group2".as_bytes()).key(), + "default".as_bytes() + ); + } } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index fc24af4fdc4..a2d64f57c3b 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -1,6 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, time::Duration}; +use std::{collections::HashSet, sync::Arc, time::Duration}; use futures::{compat::Future01CompatExt, StreamExt}; use kvproto::{pdpb::EventType, resource_manager::ResourceGroup}; @@ -35,71 +35,76 @@ const RETRY_INTERVAL: Duration = Duration::from_secs(1); // to consistent with p impl ResourceManagerService { pub async fn watch_resource_groups(&mut self) { - // Firstly, load all resource groups as of now. - let (groups, revision) = self.list_resource_groups().await; - self.revision = revision; - groups - .into_iter() - .for_each(|rg| self.manager.add_resource_group(rg)); - // Secondly, start watcher at loading revision. - loop { - match self - .pd_client - .watch_global_config(RESOURCE_CONTROL_CONFIG_PATH.to_string(), self.revision) - { - Ok(mut stream) => { - while let Some(grpc_response) = stream.next().await { - match grpc_response { - Ok(r) => { - self.revision = r.get_revision(); - r.get_changes() - .iter() - .for_each(|item| match item.get_kind() { - EventType::Put => { - if let Ok(group) = - protobuf::parse_from_bytes::( + 'outer: loop { + // Firstly, load all resource groups as of now. + self.reload_all_resource_groups().await; + // Secondly, start watcher at loading revision. + loop { + match self + .pd_client + .watch_global_config(RESOURCE_CONTROL_CONFIG_PATH.to_string(), self.revision) + { + Ok(mut stream) => { + while let Some(grpc_response) = stream.next().await { + match grpc_response { + Ok(r) => { + self.revision = r.get_revision(); + r.get_changes() + .iter() + .for_each(|item| match item.get_kind() { + EventType::Put => { + match protobuf::parse_from_bytes::( item.get_payload(), - ) - { - self.manager.add_resource_group(group); + ) { + Ok(group) => { + self.manager.add_resource_group(group); + } + Err(e) => { + error!("parse put resource group event failed"; "name" => item.get_name(), "err" => ?e); + } + } } - } - EventType::Delete => { - self.manager.remove_resource_group(item.get_name()); - } - }); - } - Err(err) => { - error!("failed to get stream"; "err" => ?err); - let _ = GLOBAL_TIMER_HANDLE - .delay(std::time::Instant::now() + RETRY_INTERVAL) - .compat() - .await; + EventType::Delete => { + match protobuf::parse_from_bytes::( + item.get_payload(), + ) { + Ok(group) => { + self.manager.remove_resource_group(group.get_name()); + } + Err(e) => { + error!("parse delete resource group event failed"; "name" => item.get_name(), "err" => ?e); + } + } + } + }); + } + Err(err) => { + error!("failed to get stream"; "err" => ?err); + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; + } } } } - } - Err(PdError::DataCompacted(msg)) => { - error!("required revision has been compacted"; "err" => ?msg); - // If the etcd revision is compacted, we need to reload all resouce groups. - let (groups, revision) = self.list_resource_groups().await; - self.revision = revision; - groups - .into_iter() - .for_each(|rg| self.manager.add_resource_group(rg)); - } - Err(err) => { - error!("failed to watch resource groups"; "err" => ?err); - let _ = GLOBAL_TIMER_HANDLE - .delay(std::time::Instant::now() + RETRY_INTERVAL) - .compat() - .await; + Err(PdError::DataCompacted(msg)) => { + error!("required revision has been compacted"; "err" => ?msg); + continue 'outer; + } + Err(err) => { + error!("failed to watch resource groups"; "err" => ?err); + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; + } } } } } - async fn list_resource_groups(&mut self) -> (Vec, i64) { + async fn reload_all_resource_groups(&mut self) { loop { match self .pd_client @@ -107,11 +112,22 @@ impl ResourceManagerService { .await { Ok((items, revision)) => { - let groups = items - .into_iter() - .filter_map(|g| protobuf::parse_from_bytes(g.get_payload()).ok()) - .collect(); - return (groups, revision); + let mut vaild_groups = HashSet::with_capacity(items.len()); + items.iter().for_each(|g| { + match protobuf::parse_from_bytes::(g.get_payload()) { + Ok(rg) => { + vaild_groups.insert(rg.get_name().to_ascii_lowercase()); + self.manager.add_resource_group(rg); + } + Err(e) => { + error!("parse resource group failed"; "name" => g.get_name(), "err" => ?e); + } + } + }); + + self.manager.retain(|name, _g| vaild_groups.contains(name)); + self.revision = revision; + return; } Err(err) => { error!("failed to load global config"; "err" => ?err); @@ -185,14 +201,14 @@ pub mod tests { let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); let group = new_resource_group("TEST".into(), true, 100, 100); add_resource_group(s.pd_client.clone(), group); - let (res, revision) = block_on(s.list_resource_groups()); - assert_eq!(res.len(), 1); - assert_eq!(revision, 1); + block_on(s.reload_all_resource_groups()); + assert_eq!(s.manager.get_all_resource_groups().len(), 1); + assert_eq!(s.revision, 1); delete_resource_group(s.pd_client.clone(), "TEST"); - let (res, revision) = block_on(s.list_resource_groups()); - assert_eq!(res.len(), 0); - assert_eq!(revision, 2); + block_on(s.reload_all_resource_groups()); + assert_eq!(s.manager.get_all_resource_groups().len(), 0); + assert_eq!(s.revision, 2); server.stop(); } @@ -203,9 +219,24 @@ pub mod tests { let resource_manager = ResourceGroupManager::default(); let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); - let (res, revision) = block_on(s.list_resource_groups()); - assert_eq!(res.len(), 0); - assert_eq!(revision, 0); + block_on(s.reload_all_resource_groups()); + assert_eq!(s.manager.get_all_resource_groups().len(), 0); + assert_eq!(s.revision, 0); + + // TODO: find a better way to observe the watch is ready. + let wait_watch_ready = |s: &ResourceManagerService, count: usize| { + for _i in 0..100 { + if s.manager.get_all_resource_groups().len() == count { + return; + } + std::thread::sleep(Duration::from_millis(1)); + } + panic!( + "wait time out, expectd: {}, got: {}", + count, + s.manager.get_all_resource_groups().len() + ); + }; let background_worker = Builder::new("background").thread_count(1).create(); let mut s_clone = s.clone(); @@ -220,16 +251,13 @@ pub mod tests { // Mock modify let group2 = new_resource_group_ru("TEST2".into(), 50); add_resource_group(s.pd_client.clone(), group2); - let (res, revision) = block_on(s.list_resource_groups()); - assert_eq!(res.len(), 2); - assert_eq!(revision, 3); + wait_watch_ready(&s, 2); + // Mock delete delete_resource_group(s.pd_client.clone(), "TEST1"); - let (res, revision) = block_on(s.list_resource_groups()); - assert_eq!(res.len(), 1); - assert_eq!(revision, 4); + // Wait for watcher - std::thread::sleep(Duration::from_millis(100)); + wait_watch_ready(&s, 1); let groups = s.manager.get_all_resource_groups(); assert_eq!(groups.len(), 1); assert!(s.manager.get_resource_group("TEST1").is_none()); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index be516a84ae0..99d56ac10cd 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1697,6 +1697,7 @@ where Arc::new(self.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), self.store_path.clone(), + self.resource_manager.clone(), ) { Ok(status_server) => Box::new(status_server), Err(e) => { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 0797b391d87..2a67318439b 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1311,6 +1311,7 @@ where Arc::new(self.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), self.store_path.clone(), + self.resource_manager.clone(), ) { Ok(status_server) => Box::new(status_server), Err(e) => { diff --git a/components/test_pd/src/mocker/etcd.rs b/components/test_pd/src/mocker/etcd.rs index 3939dfc9a72..d0fe3f43e68 100644 --- a/components/test_pd/src/mocker/etcd.rs +++ b/components/test_pd/src/mocker/etcd.rs @@ -50,7 +50,7 @@ impl Etcd { let (k, v) = group.last()?; match v { Value::Val(val) => Some(KeyValue(MetaKey(k.0.clone()), val.clone())), - Value::Del => None, + Value::Del(_) => None, } }) .fold(Vec::new(), |mut items, item| { @@ -88,13 +88,14 @@ impl Etcd { Bound::Included(Key(start_key, 0)), Bound::Excluded(Key(end_key, self.revision)), )) - .map(|(k, _)| Key::clone(k)) + .map(|(k, v)| (Key::clone(k), v.clone())) .collect::>(); v.dedup_by(|k1, k2| k1.0 == k2.0); - for mut victim in v { + for (victim, data) in v { let k = Key(victim.0.clone(), rev); - self.items.insert(k, Value::Del); + let data = data.take_data(); + self.items.insert(k, Value::Del(data.clone())); for sub in self.subs.values() { if victim.0.as_slice() < sub.end_key.as_slice() @@ -103,7 +104,7 @@ impl Etcd { sub.tx .send(KvEvent { kind: KvEventType::Delete, - pair: KeyValue(MetaKey(std::mem::take(&mut victim.0)), vec![]), + pair: KeyValue(MetaKey(victim.0.clone()), data.clone()), }) .await .unwrap(); @@ -135,9 +136,9 @@ impl Etcd { kind: KvEventType::Put, pair: KeyValue(MetaKey(k.0.clone()), val.clone()), }, - Value::Del => KvEvent { + Value::Del(val) => KvEvent { kind: KvEventType::Delete, - pair: KeyValue(MetaKey(k.0.clone()), vec![]), + pair: KeyValue(MetaKey(k.0.clone()), val.clone()), }, }; tx.send(event).await.expect("too many pending events"); @@ -259,7 +260,17 @@ impl std::fmt::Debug for Key { #[derive(Debug, PartialEq, Clone)] enum Value { Val(Vec), - Del, + // the value is the last put val. This is used for watch changes. + Del(Vec), +} + +impl Value { + fn take_data(self) -> Vec { + match self { + Value::Val(d) => d, + Value::Del(d) => d, + } + } } /// The key set for getting. diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 2f87c5d0264..ad7779b121c 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -32,6 +32,7 @@ use hyper::{ service::{make_service_fn, service_fn}, Body, Method, Request, Response, Server, StatusCode, }; +use kvproto::resource_manager::ResourceGroup; use online_config::OnlineConfig; use openssl::{ ssl::{Ssl, SslAcceptor, SslContext, SslFiletype, SslMethod, SslVerifyMode}, @@ -44,7 +45,9 @@ pub use profile::{ }; use prometheus::TEXT_FORMAT; use regex::Regex; +use resource_control::ResourceGroupManager; use security::{self, SecurityConfig}; +use serde::Serialize; use serde_json::Value; use tikv_kv::RaftExtension; use tikv_util::{ @@ -89,6 +92,7 @@ pub struct StatusServer { router: R, security_config: Arc, store_path: PathBuf, + resource_manager: Option>, } impl StatusServer @@ -101,6 +105,7 @@ where security_config: Arc, router: R, store_path: PathBuf, + resource_manager: Option>, ) -> Result { let thread_pool = Builder::new_multi_thread() .enable_all() @@ -120,6 +125,7 @@ where router, security_config, store_path, + resource_manager, }) } @@ -518,6 +524,7 @@ where let cfg_controller = self.cfg_controller.clone(); let router = self.router.clone(); let store_path = self.store_path.clone(); + let resource_manager = self.resource_manager.clone(); // Start to serve. let server = builder.serve(make_service_fn(move |conn: &C| { let x509 = conn.get_x509(); @@ -525,6 +532,7 @@ where let cfg_controller = cfg_controller.clone(); let router = router.clone(); let store_path = store_path.clone(); + let resource_manager = resource_manager.clone(); async move { // Create a status service. Ok::<_, hyper::Error>(service_fn(move |req: Request| { @@ -533,6 +541,7 @@ where let cfg_controller = cfg_controller.clone(); let router = router.clone(); let store_path = store_path.clone(); + let resource_manager = resource_manager.clone(); async move { let path = req.uri().path().to_owned(); let method = req.method().to_owned(); @@ -607,6 +616,9 @@ where (Method::PUT, path) if path.starts_with("/log-level") => { Self::change_log_level(req).await } + (Method::GET, "/resource_groups") => { + Self::handle_get_all_resource_groups(resource_manager.as_ref()) + } _ => Ok(make_response(StatusCode::NOT_FOUND, "path not found")), } } @@ -644,6 +656,63 @@ where } Ok(()) } + + pub fn handle_get_all_resource_groups( + mgr: Option<&Arc>, + ) -> hyper::Result> { + let groups = if let Some(mgr) = mgr { + mgr.get_all_resource_groups() + .into_iter() + .map(into_debug_request_group) + .collect() + } else { + vec![] + }; + let body = match serde_json::to_vec(&groups) { + Ok(body) => body, + Err(err) => { + return Ok(make_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("fails to json: {}", err), + )); + } + }; + match Response::builder() + .header("content-type", "application/json") + .body(hyper::Body::from(body)) + { + Ok(resp) => Ok(resp), + Err(err) => Ok(make_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("fails to build response: {}", err), + )), + } + } +} + +#[derive(Serialize)] +struct ResouceGroupSetting { + name: String, + ru: u64, + burst_limit: i64, +} + +fn into_debug_request_group(rg: ResourceGroup) -> ResouceGroupSetting { + ResouceGroupSetting { + name: rg.name, + ru: rg + .r_u_settings + .get_ref() + .get_r_u() + .get_settings() + .get_fill_rate(), + burst_limit: rg + .r_u_settings + .get_ref() + .get_r_u() + .get_settings() + .get_burst_limit(), + } } // To unify TLS/Plain connection usage in start_serve function @@ -957,6 +1026,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1005,6 +1075,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1050,6 +1121,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1166,6 +1238,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1210,6 +1283,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1246,6 +1320,7 @@ mod tests { Arc::new(new_security_cfg(Some(allowed_cn))), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1319,6 +1394,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1349,6 +1425,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1382,6 +1459,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1437,6 +1515,7 @@ mod tests { Arc::new(SecurityConfig::default()), MockRouter, temp_dir.path().to_path_buf(), + None, ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index 929a7c286ae..1e3963ffdb7 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -45,6 +45,7 @@ fn test_region_meta_endpoint() { Arc::new(SecurityConfig::default()), router, std::env::temp_dir(), + None, ) .unwrap(); let addr = format!("127.0.0.1:{}", test_util::alloc_port()); From 2301dac437347b9e81823894c3ea2bbc96f2b0a9 Mon Sep 17 00:00:00 2001 From: Liu Cong Date: Wed, 8 Feb 2023 22:09:59 +0800 Subject: [PATCH 0511/1149] raftstore: new slow store detecting (#14000) ref tikv/tikv#14131 PD schedulers: new scheduler `evict-slow-trend-scheduler`, for new slow store detecting and leader evicting Signed-off-by: Liu Cong Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/config.rs | 9 + components/raftstore/src/store/metrics.rs | 70 ++ components/raftstore/src/store/worker/pd.rs | 97 ++- components/tikv_util/src/lib.rs | 1 + components/tikv_util/src/store/query_stats.rs | 8 + components/tikv_util/src/trend.rs | 734 ++++++++++++++++++ tests/integrations/config/mod.rs | 2 + 7 files changed, 920 insertions(+), 1 deletion(-) create mode 100644 components/tikv_util/src/trend.rs diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 6667a46c4e5..342ace1139e 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -291,6 +291,11 @@ pub struct Config { // Interval to inspect the latency of raftstore for slow store detection. pub inspect_interval: ReadableDuration, + // The unsensitive(increase it to reduce sensitiveness) of the cause-trend detection + pub slow_trend_unsensitive_cause: f64, + // The unsensitive(increase it to reduce sensitiveness) of the result-trend detection + pub slow_trend_unsensitive_result: f64, + // Interval to report min resolved ts, if it is zero, it means disabled. pub report_min_resolved_ts_interval: ReadableDuration, @@ -430,6 +435,10 @@ impl Default for Config { region_split_size: ReadableSize(0), clean_stale_peer_delay: ReadableDuration::minutes(0), inspect_interval: ReadableDuration::millis(500), + // The param `slow_trend_unsensitive_cause == 2.0` can yield good results, + // make it `10.0` to reduce a bit sensitiveness because SpikeFilter is disabled + slow_trend_unsensitive_cause: 10.0, + slow_trend_unsensitive_result: 0.5, report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 6c6357d286c..7df8819c998 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -780,6 +780,76 @@ lazy_static! { pub static ref STORE_SLOW_SCORE_GAUGE: Gauge = register_gauge!("tikv_raftstore_slow_score", "Slow score of the store.").unwrap(); + pub static ref STORE_SLOW_TREND_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend", "Slow trend changing rate").unwrap(); + + pub static ref STORE_SLOW_TREND_L0_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l0", "Slow trend L0 window avg value.").unwrap(); + pub static ref STORE_SLOW_TREND_L1_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l1", "Slow trend L1 window avg value.").unwrap(); + pub static ref STORE_SLOW_TREND_L2_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l2", "Slow trend L2 window avg value.").unwrap(); + + pub static ref STORE_SLOW_TREND_L0_L1_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l0_l1", "Slow trend changing rate: L0/L1.").unwrap(); + pub static ref STORE_SLOW_TREND_L1_L2_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l1_l2", "Slow trend changing rate: L1/L2.").unwrap(); + + pub static ref STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l1_margin_error", "Slow trend: L1 margin error range").unwrap(); + pub static ref STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_l2_margin_error", "Slow trend: L2 margin error range").unwrap(); + + pub static ref STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC: IntGaugeVec = + register_int_gauge_vec!( + "tikv_raftstore_slow_trend_margin_error_gap", + "Slow trend: the gap between margin window time and current sampling time", + &["window"] + ).unwrap(); + + pub static ref STORE_SLOW_TREND_MISC_GAUGE_VEC: IntGaugeVec = + register_int_gauge_vec!( + "tikv_raftstore_slow_trend_misc", + "Slow trend uncatelogued gauge(s)", + &["type"] + ).unwrap(); + + pub static ref STORE_SLOW_TREND_RESULT_VALUE_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_value", "Store slow trend result meantime value").unwrap(); + pub static ref STORE_SLOW_TREND_RESULT_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result", "Store slow trend result changing rate").unwrap(); + + pub static ref STORE_SLOW_TREND_RESULT_L0_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l0", "Slow trend result L0 window avg value.").unwrap(); + pub static ref STORE_SLOW_TREND_RESULT_L1_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l1", "Slow trend result L1 window avg value.").unwrap(); + pub static ref STORE_SLOW_TREND_RESULT_L2_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l2", "Slow trend result L2 window avg value.").unwrap(); + + pub static ref STORE_SLOW_TREND_RESULT_L0_L1_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l0_l1", "Slow trend result changing rate: L0/L1.").unwrap(); + pub static ref STORE_SLOW_TREND_RESULT_L1_L2_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l1_l2", "Slow trend result changing rate: L1/L2.").unwrap(); + + pub static ref STORE_SLOW_TREND_RESULT_L1_MARGIN_ERROR_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l1_margin_error", "Slow trend result: L1 margin error range").unwrap(); + pub static ref STORE_SLOW_TREND_RESULT_L2_MARGIN_ERROR_GAUGE: Gauge = + register_gauge!("tikv_raftstore_slow_trend_result_l2_margin_error", "Slow trend result: L2 margin error range").unwrap(); + + pub static ref STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC: IntGaugeVec = + register_int_gauge_vec!( + "tikv_raftstore_slow_trend_result_margin_error_gap", + "Slow trend result: the gap between margin window time and current sampling time", + &["window"] + ).unwrap(); + + pub static ref STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC: IntGaugeVec = + register_int_gauge_vec!( + "tikv_raftstore_slow_trend_result_misc", + "Slow trend result uncatelogued gauge(s)", + &["type"] + ).unwrap(); + pub static ref RAFT_LOG_GC_SKIPPED_VEC: IntCounterVec = register_int_counter_vec!( "tikv_raftstore_raft_log_gc_skipped", "Total number of skipped raft log gc.", diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 18ecc77f599..f43e1ec33d5 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -45,6 +45,7 @@ use tikv_util::{ time::{Instant as TiInstant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, topn::TopN, + trend::{RequestPerSecRecorder, Trend}, warn, worker::{Runnable, RunnableWithTimer, ScheduleError, Scheduler}, }; @@ -921,6 +922,9 @@ where snap_mgr: SnapManager, remote: Remote, slow_score: SlowScore, + slow_trend_cause: Trend, + slow_trend_result: Trend, + slow_trend_result_recorder: RequestPerSecRecorder, // The health status of the store is updated by the slow score mechanism. health_service: Option, @@ -984,6 +988,39 @@ where snap_mgr, remote, slow_score: SlowScore::new(cfg.inspect_interval.0), + slow_trend_cause: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(180), + Duration::from_secs(30), + Duration::from_secs(120), + Duration::from_secs(600), + 1, + tikv_util::time::duration_to_us(Duration::from_micros(500)), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_cause, + ), + slow_trend_result: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(120), + Duration::from_secs(15), + Duration::from_secs(60), + Duration::from_secs(300), + 1, + 2000, + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L1"]), + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_result, + ), + slow_trend_result_recorder: RequestPerSecRecorder::new(), health_service, curr_health_status: ServingStatus::Serving, coprocessor_host, @@ -1254,6 +1291,9 @@ where .store_stat .engine_total_query_num .sub_query_stats(&self.store_stat.engine_last_query_num); + let total_query_num = self + .slow_trend_result_recorder + .record_and_get_current_rps(res.get_all_query_num(), Instant::now()); stats.set_query_stats(res.0); stats.set_cpu_usages(self.store_stat.store_cpu_usages.clone().into()); @@ -1293,6 +1333,7 @@ where let slow_score = self.slow_score.get(); stats.set_slow_score(slow_score as u64); + self.set_slow_trend_to_store_stats(&mut stats, total_query_num); let router = self.router.clone(); let resp = self @@ -1379,6 +1420,51 @@ where self.remote.spawn(f); } + fn set_slow_trend_to_store_stats( + &mut self, + stats: &mut pdpb::StoreStats, + total_query_num: Option, + ) { + let slow_trend_cause_rate = self.slow_trend_cause.increasing_rate(); + STORE_SLOW_TREND_GAUGE.set(slow_trend_cause_rate); + let mut slow_trend = pdpb::SlowTrend::default(); + slow_trend.set_cause_rate(slow_trend_cause_rate); + slow_trend.set_cause_value(self.slow_trend_cause.l0_avg()); + if let Some(total_query_num) = total_query_num { + self.slow_trend_result + .record(total_query_num as u64, Instant::now()); + slow_trend.set_result_value(self.slow_trend_result.l0_avg()); + let slow_trend_result_rate = self.slow_trend_result.increasing_rate(); + slow_trend.set_result_rate(slow_trend_result_rate); + STORE_SLOW_TREND_RESULT_GAUGE.set(slow_trend_result_rate); + STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(total_query_num); + } else { + // Just to mark the invalid range on the graphic + STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(-100.0); + } + stats.set_slow_trend(slow_trend); + self.write_slow_trend_metrics(); + } + + fn write_slow_trend_metrics(&mut self) { + STORE_SLOW_TREND_L0_GAUGE.set(self.slow_trend_cause.l0_avg()); + STORE_SLOW_TREND_L1_GAUGE.set(self.slow_trend_cause.l1_avg()); + STORE_SLOW_TREND_L2_GAUGE.set(self.slow_trend_cause.l2_avg()); + STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slow_trend_cause.l0_l1_rate()); + STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend_cause.l1_l2_rate()); + STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l1_margin_error_base()); + STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l2_margin_error_base()); + STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend_result.l0_avg()); + STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend_result.l1_avg()); + STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend_result.l2_avg()); + STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slow_trend_result.l0_l1_rate()); + STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slow_trend_result.l1_l2_rate()); + STORE_SLOW_TREND_RESULT_L1_MARGIN_ERROR_GAUGE + .set(self.slow_trend_result.l1_margin_error_base()); + STORE_SLOW_TREND_RESULT_L2_MARGIN_ERROR_GAUGE + .set(self.slow_trend_result.l2_margin_error_base()); + } + fn handle_report_batch_split(&self, regions: Vec) { let resp = self.pd_client.report_batch_split(regions); let f = async move { @@ -2097,7 +2183,13 @@ where txn_ext, } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), - Task::UpdateSlowScore { id, duration } => self.slow_score.record(id, duration.sum()), + Task::UpdateSlowScore { id, duration } => { + self.slow_score.record(id, duration.sum()); + self.slow_trend_cause.record( + tikv_util::time::duration_to_us(duration.store_wait_duration.unwrap()), + Instant::now(), + ); + } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), Task::ReportMinResolvedTs { store_id, @@ -2121,6 +2213,9 @@ where T: PdClient + 'static, { fn on_timeout(&mut self) { + // Record a fairly great value when timeout + self.slow_trend_cause.record(500_000, Instant::now()); + // The health status is recovered to serving as long as any tick // does not timeout. if self.curr_health_status == ServingStatus::ServiceUnknown diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 9b13250fe1e..fd294a08d34 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -61,6 +61,7 @@ pub mod thread_group; pub mod time; pub mod timer; pub mod topn; +pub mod trend; pub mod worker; pub mod yatp_pool; diff --git a/components/tikv_util/src/store/query_stats.rs b/components/tikv_util/src/store/query_stats.rs index 1c352cfc303..6cf461411aa 100644 --- a/components/tikv_util/src/store/query_stats.rs +++ b/components/tikv_util/src/store/query_stats.rs @@ -94,6 +94,14 @@ impl QueryStats { mem::swap(&mut self.0, &mut query_stats); query_stats } + + pub fn get_all_query_num(&self) -> u64 { + let mut sum: u64 = 0; + for kind in QUERY_KINDS { + sum += QueryStats::get_query_num(&self.0, *kind); + } + sum + } } pub fn is_read_query(kind: QueryKind) -> bool { diff --git a/components/tikv_util/src/trend.rs b/components/tikv_util/src/trend.rs new file mode 100644 index 00000000000..8ae3bb3d5aa --- /dev/null +++ b/components/tikv_util/src/trend.rs @@ -0,0 +1,734 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::vec_deque::VecDeque, + time::{Duration, Instant}, +}; + +use prometheus::IntGauge; + +pub struct SampleValue { + value: u64, + time: Instant, +} + +pub struct SampleWindow { + sum: u64, + values: VecDeque, + duration: Duration, + overflow: bool, +} + +impl SampleWindow { + pub fn new(duration: Duration) -> Self { + Self { + sum: 0, + values: VecDeque::new(), + duration, + overflow: false, + } + } + + #[inline] + pub fn record(&mut self, value: u64, now: Instant) { + self.values.push_back(SampleValue { value, time: now }); + self.sum = self.sum.saturating_add(value); + while !self.values.is_empty() + && now.duration_since(self.values.front().unwrap().time) > self.duration + { + let front = self.values.pop_front().unwrap(); + self.sum = self.sum.saturating_sub(front.value); + self.overflow = true; + } + } + + #[inline] + pub fn is_overflow(&self) -> bool { + self.overflow + } + + #[inline] + pub fn drain(&mut self) -> (VecDeque, u64, bool) { + let result = ( + self.values.drain(..).collect::>(), + self.sum, + self.overflow, + ); + self.sum = 0; + self.overflow = false; + result + } + + #[inline] + // TODO: better memory operating? + pub fn move_from(&mut self, source: &mut Self) { + (self.values, self.sum, self.overflow) = source.drain(); + } + + #[inline] + pub fn valid(&self) -> bool { + !self.values.is_empty() + } + + #[inline] + pub fn avg(&self) -> f64 { + if !self.values.is_empty() { + self.sum as f64 / self.values.len() as f64 + } else { + 0.0 + } + } + + #[inline] + pub fn std_ev(&self) -> f64 { + if self.values.len() <= 1 { + return 0.0; + } + let avg = self.avg(); + let mut delta_sq_sum = 0.0; + for v in self.values.iter() { + let delta = (v.value as f64) - avg; + delta_sq_sum += delta * delta; + } + // We use `self.values.len()` rather than `self.values.len() - 1` + f64::sqrt(delta_sq_sum / self.values.len() as f64) + } + + #[inline] + pub fn std_ev_ratio(&self) -> f64 { + if self.values.len() <= 1 { + 0.0 + } else { + self.std_ev() / self.avg() + } + } +} + +pub struct SampleWindows { + pub windows: Vec, +} + +impl SampleWindows { + pub fn new(windows_durations: Vec) -> Self { + let mut windows = vec![]; + for duration in windows_durations.iter() { + windows.push(SampleWindow::new(*duration)); + } + Self { windows } + } + + #[inline] + pub fn record(&mut self, value: u64, now: Instant) { + self.windows + .iter_mut() + .for_each(|window| window.record(value, now)) + } + + #[inline] + pub fn valid(&self) -> bool { + for window in self.windows.iter() { + if !window.valid() { + return false; + } + } + true + } +} + +// TODO: Generalize this module using SPOT(https://dl.acm.org/doi/10.1145/3097983.3098144) +// +// Without SPOT: +// - Margin errors calculating is based on sampling +// - `flip_margin_error_multiple` controls when to flip, hence control what to +// sample +// - `flip_margin_error_multiple` is a fixed value, can't fit all cases +// +// With SPOT: +// - `enter_threshold_multiple` will be insteaded of by `risk` +// - `risk` also a fixed value, but it's based on distribution, so it could +// fits all +struct HistoryWindow { + name: &'static str, + window_duration: Duration, + sample_interval_duration: Duration, + current_window: SampleWindow, + previous_window: SampleWindow, + last_sampled_time: Instant, + last_flipped_time: Instant, + flipping_start_time: Option, + margin_error_base: f64, + flip_margin_error_multiple: f64, + gap_gauge: IntGauge, +} + +impl HistoryWindow { + pub fn new( + name: &'static str, + window_duration: Duration, + sample_interval_duration: Duration, + margin_error_base: f64, + gap_gauge: IntGauge, + flip_margin_error_multiple: f64, + ) -> Self { + let now = Instant::now(); + Self { + name, + window_duration, + sample_interval_duration, + current_window: SampleWindow::new(window_duration), + previous_window: SampleWindow::new(window_duration), + last_sampled_time: now, + last_flipped_time: now, + flipping_start_time: None, + margin_error_base, + gap_gauge, + flip_margin_error_multiple, + } + } + + #[inline] + pub fn record(&mut self, value: f64, now: Instant, increasing_rate: f64) { + let gap_secs = if self.current_window.is_overflow() { + now.saturating_duration_since(self.current_window.values.front().unwrap().time) + .as_secs() as i64 + } else if self.previous_window.is_overflow() { + now.saturating_duration_since(self.previous_window.values.front().unwrap().time) + .as_secs() as i64 + } else { + // Just to mark the invalid range on the graphic + -100 + }; + self.gap_gauge.set(gap_secs); + + if now.duration_since(self.last_sampled_time) <= self.sample_interval_duration { + return; + } + let should_skip = self.try_flip(value, now, increasing_rate); + if should_skip { + return; + } + self.current_window.record(value as u64, now); + self.last_sampled_time = now; + } + + #[inline] + pub fn valid(&self) -> bool { + self.current_window.is_overflow() || self.previous_window.is_overflow() + } + + #[inline] + pub fn margin_error(&self) -> f64 { + let margin_error = if self.flipping_start_time.is_none() { + if self.current_window.is_overflow() { + self.current_window.std_ev() + } else if self.previous_window.is_overflow() { + // We use the previous margin error in the duration: + // - After flipping ends + // - Yet before current window is overflow + self.previous_window.std_ev() + } else { + 0.0 + } + } else if self.previous_window.is_overflow() { + self.previous_window.std_ev() + } else { + 0.0 + }; + f64::max(margin_error, self.margin_error_base) + } + + #[inline] + // Return bool: shoud_skip_current_value + fn try_flip(&mut self, value: f64, now: Instant, increasing_rate: f64) -> bool { + if !self.current_window.is_overflow() { + return false; + } + let current_avg = self.current_window.avg(); + let margin_error = self.margin_error(); + + // The output margin_error multiple can up to `self.flip_margin_error_multiple + + // 1` without flipping (increasing_rate already minus a margin_error) + let flip_margin_error = margin_error * self.flip_margin_error_multiple; + let delta = f64::abs(value - current_avg); + + // Strict condition for exiting flipping (to do actual flipping) + if self.flipping_start_time.is_some() { + // Make sure not stuck at flipping phase by using `time_based_multiple`, + // increase by time + // - Expectation of time_based_multiple: starts at 0.0, to `margin_error * 5%` + // at 4min, to 10% at 12min, to 20% at 28min + // - f64::abs() is for preventing crash in case the server time is adjusted + let flipping_duration = now.duration_since(self.flipping_start_time.unwrap()); + let time_based_multiple = + (f64::abs(flipping_duration.as_secs() as f64) / 240.0 + 1.0).log2() / 20.0; + if f64::abs(increasing_rate) > margin_error * time_based_multiple { + // Keep flipping, skip the huge-changing phase, wait for stable + return true; + } else { + // The huge-changing phase ends, do flipping + self.flip(); + self.flipping_start_time = None; + self.last_flipped_time = now; + info!( + "history window flipping: end"; + "name" => self.name, + "delta" => delta, + "flip_margin_error" => margin_error, + "time_based_multiple" => time_based_multiple, + "increasing_rate" => increasing_rate, + "flipping_duration" => flipping_duration.as_secs(), + ); + return false; + } + } + + // Loose condition for entering flipping + if now.duration_since(self.last_flipped_time) > self.window_duration + && delta > flip_margin_error + { + // Enter flipping phase, may last for a while + self.flipping_start_time = Some(Instant::now()); + info!( + "history window flipping: enter"; + "name" => self.name, + "delta" => delta, + "flip_margin_error" => flip_margin_error, + "increasing_rate" => increasing_rate, + ); + } + false + } + + #[inline] + fn flip(&mut self) { + self.previous_window.move_from(&mut self.current_window); + } +} + +// TODO: Generalize this filter using SPOT(https://dl.acm.org/doi/10.1145/3097983.3098144) +// - `enter_threshold_multiple` is a fixed value, can't fit all cases +// - Using SPOT, `enter_threshold_multiple` will be insteaded of by `risk` +// - `risk` also a fixed value, but it's based on distribution, so it could +// fits all +pub struct SpikeFilter { + values: VecDeque, + duration: Duration, + filter_value_gauge: IntGauge, + filter_count_gauge: IntGauge, + exit_threshold_avg_multiple: f64, + exit_threshold_margin_error_multiple: f64, + enter_threshold_multiple: f64, +} + +impl SpikeFilter { + pub fn new( + duration: Duration, + filter_value_gauge: IntGauge, + filter_count_gauge: IntGauge, + exit_threshold_avg_multiple: f64, + exit_threshold_margin_error_multiple: f64, + enter_threshold_multiple: f64, + ) -> Self { + assert!(enter_threshold_multiple > 1.0); + Self { + values: VecDeque::new(), + duration, + filter_value_gauge, + filter_count_gauge, + exit_threshold_avg_multiple, + exit_threshold_margin_error_multiple, + enter_threshold_multiple, + } + } + + #[inline] + // TODO: better memory operating? + pub fn record( + &mut self, + value: u64, + now: Instant, + history_avg: f64, + history_margin_error: f64, + ) -> Option> { + let exit_threshold = history_avg * self.exit_threshold_avg_multiple + + history_margin_error * self.exit_threshold_margin_error_multiple; + let enter_threshold = exit_threshold * self.enter_threshold_multiple; + let curr = SampleValue { value, time: now }; + + // Spike entering check + if (value as f64) > enter_threshold { + // Hold the very high values in the checking sequence + self.values.push_back(curr); + if now.duration_since(self.values.front().unwrap().time) > self.duration { + // The checking sequence is too long to be a spike, dump all and exit checking + let values: Vec = self.values.drain(..).collect(); + return Some(values); + } + // The curr value is on hold, return None + return None; + } + + // Not in a spike, nothing happen + if self.values.is_empty() { + return Some(vec![curr]); + } + + // In a spike + + // Spike ending check + if (value as f64) < exit_threshold { + if self.values.len() <= 2 { + // The checking sequence is too short to be a spike, dump all and exit checking + let mut values: Vec = self.values.drain(..).collect(); + values.push(curr); + return Some(values); + } + // The checking sequence is not long enough to be regular high, it's a spike, + // discard all but return curr + self.filter_value_gauge.set(self.avg() as i64); + self.filter_count_gauge.inc(); + self.values.drain(..); + return Some(vec![curr]); + } + + // Hold curr value to this spike + self.values.push_back(curr); + None + } + + #[inline] + fn avg(&self) -> f64 { + if self.values.is_empty() { + return 0.0; + } + let mut sum: f64 = 0.0; + for value in self.values.iter() { + sum += value.value as f64; + } + sum / (self.values.len() as f64) + } +} + +// Responsibilities of each window: +// +// L0: +// Eleminate very short time jitter, +// Consider its avg value as a point in data flow +// L1: +// `L0.avg/L1.avg` to trigger slow-event, not last long but high sensitive +// Sensitive could be tuned by `L0.duration` and `L1.duration` +// Include periodic fluctuations, so it's avg could be seen as baseline +// value Its duration is also the no-detectable duration after TiKV starting +// L2: +// `L1.avg/L2.avg` to trigger slow-event, last long but low sensitive +// Sensitive could be tuned by `L1.duration` and `L2.duration` +// +// L* History: +// Sample history values and calculate the margin error +// +// Spike Filter: +// Erase very high and short time spike-values +// +pub struct Trend { + sample_interval: usize, + sample_sequence_id: usize, + + spike_filter: SpikeFilter, + spike_filter_enabled: bool, + + data_flow: SampleWindows, + + l1_history: HistoryWindow, + l2_history: HistoryWindow, + + // When SPOT is being used, these should be `risk multiple` + l1_margin_error_multiple: f64, + l2_margin_error_multiple: f64, + + curves_composer: CurvesComposer, +} + +impl Trend { + pub fn new( + spike_filter_duration: Duration, + spike_filter_value_gauge: IntGauge, + spike_filter_count_gauge: IntGauge, + history_duration: Duration, + l0_duration: Duration, + l1_duration: Duration, + l2_duration: Duration, + sample_interval: usize, + tolerable_margin_error_value: u64, + l1_gap_gauge: IntGauge, + l2_gap_gauge: IntGauge, + unsensitive_multiple: f64, + ) -> Self { + let margin_error_base = tolerable_margin_error_value as f64; + Self { + sample_interval, + sample_sequence_id: 0, + data_flow: SampleWindows::new(vec![l0_duration, l1_duration, l2_duration]), + spike_filter_enabled: !spike_filter_duration.is_zero(), + spike_filter: SpikeFilter::new( + spike_filter_duration, + spike_filter_value_gauge, + spike_filter_count_gauge, + 1.0, + 5.0, + 2.0, + ), + l1_history: HistoryWindow::new( + "L1", + history_duration, + Duration::from_secs(1), + margin_error_base, + l1_gap_gauge, + 3.0, + ), + l2_history: HistoryWindow::new( + "L2", + history_duration, + Duration::from_secs(1), + margin_error_base, + l2_gap_gauge, + 2.0, + ), + l1_margin_error_multiple: 3.0 * unsensitive_multiple, + l2_margin_error_multiple: 2.0 * unsensitive_multiple, + curves_composer: CurvesComposer::new(l0_duration, l1_duration, l2_duration, 2.0), + } + } + + #[inline] + pub fn record(&mut self, value: u64, now: Instant) { + if !self.check_should_sample() { + return; + } + if !self.spike_filter_enabled || !self.data_flow.windows[1].is_overflow() { + self.record_unfiltered(value, now); + return; + } + if let Some(filtered) = + self.spike_filter + .record(value, now, self.l1_avg(), self.l1_margin_error()) + { + for sample in filtered.iter() { + self.record_unfiltered(sample.value, sample.time) + } + } + } + + #[inline] + pub fn increasing_rate(&self) -> f64 { + self.curves_composer + .compose(self.l0_l1_rate(), self.l1_l2_rate()) + } + + #[inline] + pub fn l0_avg(&self) -> f64 { + self.data_flow.windows[0].avg() + } + + #[inline] + pub fn l1_avg(&self) -> f64 { + self.data_flow.windows[1].avg() + } + + #[inline] + pub fn l2_avg(&self) -> f64 { + self.data_flow.windows[2].avg() + } + + #[inline] + pub fn l1_margin_error_base(&self) -> f64 { + self.l1_history.margin_error() + } + + #[inline] + pub fn l2_margin_error_base(&self) -> f64 { + self.l2_history.margin_error() + } + + #[inline] + pub fn l0_l1_rate(&self) -> f64 { + if !self.data_flow.windows[2].is_overflow() { + return 0.0; + } + if !self.l1_history.valid() { + return 0.0; + } + let l1_avg = self.l1_avg(); + Trend::la_lb_rate(self.l0_avg(), l1_avg, self.l1_margin_error()) + } + + #[inline] + pub fn l1_l2_rate(&self) -> f64 { + if !self.data_flow.windows[2].is_overflow() { + return 0.0; + } + if !self.l2_history.valid() { + return 0.0; + } + Trend::la_lb_rate(self.l1_avg(), self.l2_avg(), self.l2_margin_error()) + } + + #[inline] + fn check_should_sample(&mut self) -> bool { + if self.sample_interval <= 1 { + return true; + } + let should = self.sample_sequence_id % self.sample_interval == 0; + self.sample_sequence_id += 1; + should + } + + #[inline] + fn record_unfiltered(&mut self, value: u64, now: Instant) { + self.data_flow.record(value, now); + // TODO: Reduce the `increasing_rate()` calculating count? + let increasing_rate = self.increasing_rate(); + self.l1_history.record(self.l0_avg(), now, increasing_rate); + self.l2_history.record(self.l1_avg(), now, increasing_rate); + } + + #[inline] + fn l1_margin_error(&self) -> f64 { + self.l1_history.margin_error() * self.l1_margin_error_multiple + } + + #[inline] + fn l2_margin_error(&self) -> f64 { + self.l2_history.margin_error() * self.l2_margin_error_multiple + } + + #[inline] + fn la_lb_rate(la_avg: f64, lb_avg: f64, margin_error: f64) -> f64 { + if lb_avg < f64::EPSILON { + return 0.0; + } + let mut increased = la_avg - lb_avg; + if f64::abs(increased) < f64::EPSILON { + return 0.0; + } + increased = if la_avg < lb_avg { + if -increased > margin_error { + -increased - margin_error + } else { + 0.0 + } + } else if increased > margin_error { + increased - margin_error + } else { + 0.0 + }; + let mut inc_sq = increased * increased; + if la_avg < lb_avg { + inc_sq = -inc_sq; + }; + let res = la_avg * inc_sq / f64::sqrt(lb_avg); + if la_avg >= lb_avg { + f64::sqrt(res) + } else { + -f64::sqrt(-res) + } + } +} + +struct CurvesComposer { + l0_l1_vs_l1_l2: f64, +} + +impl CurvesComposer { + pub fn new( + l0_duration: Duration, + l1_duration: Duration, + l2_duration: Duration, + l1_l2_extra_weight: f64, + ) -> Self { + let l0_l1 = l0_duration.as_nanos() as f64 / l1_duration.as_nanos() as f64; + let l1_l2 = l1_duration.as_nanos() as f64 / l2_duration.as_nanos() as f64; + Self { + l0_l1_vs_l1_l2: l1_l2_extra_weight * l0_l1 / l1_l2, + } + } + + #[inline] + pub fn compose(&self, l0_l1_rate: f64, l1_l2_rate: f64) -> f64 { + l0_l1_rate + l1_l2_rate * self.l0_l1_vs_l1_l2 + } +} + +pub struct RequestPerSecRecorder { + previous_ts: Instant, + inited: bool, +} + +impl Default for RequestPerSecRecorder { + fn default() -> Self { + Self::new() + } +} + +impl RequestPerSecRecorder { + pub fn new() -> Self { + Self { + previous_ts: Instant::now(), + inited: false, + } + } + + #[inline] + pub fn record_and_get_current_rps( + &mut self, + observed_request_count: u64, + now: Instant, + ) -> Option { + if !self.inited { + self.inited = true; + self.previous_ts = now; + None + } else { + self.inited = true; + let secs = now.saturating_duration_since(self.previous_ts).as_secs(); + self.previous_ts = now; + if secs == 0 { + None + } else { + Some(observed_request_count as f64 / secs as f64) + } + } + } +} + +#[cfg(test)] +mod tests { + use std::time::{Duration, Instant}; + + use super::*; + + #[test] + fn test_sample_window() { + let now = Instant::now(); + let mut window = SampleWindow::new(Duration::from_secs(4)); + assert_eq!(window.valid(), false); + assert_eq!(window.avg(), 0.0); + assert_eq!(window.std_ev_ratio(), 0.0); + window.record(10, now); + assert_eq!(window.valid(), true); + assert_eq!(window.avg(), 10.0); + assert_eq!(window.overflow, false); + assert_eq!(window.std_ev_ratio(), 0.0); + window.record(20, now + Duration::from_secs(1)); + assert_eq!(window.avg(), (10.0 + 20.0) / 2.0); + assert_eq!(window.overflow, false); + assert_eq!(window.std_ev_ratio(), 5.0 / 15.0); + window.record(30, now + Duration::from_secs(2)); + assert_eq!(window.avg(), (10.0 + 20.0 + 30.0) / 3.0); + assert_eq!(window.overflow, false); + assert_eq!(window.std_ev_ratio(), f64::sqrt(200.0 / 3.0) / 20.0); + window.record(40, now + Duration::from_secs(5)); + assert_eq!(window.avg(), (20.0 + 30.0 + 40.0) / 3.0); + assert_eq!(window.overflow, true); + assert_eq!(window.std_ev_ratio(), f64::sqrt(200.0 / 3.0) / 30.0); + } +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 61ec0d1f3f4..351e9d74ca0 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -256,6 +256,8 @@ fn test_serde_custom_tikv_config() { unreachable_backoff: ReadableDuration::secs(111), check_peers_availability_interval: ReadableDuration::secs(30), check_request_snapshot_interval: ReadableDuration::minutes(1), + slow_trend_unsensitive_cause: 10.0, + slow_trend_unsensitive_result: 0.5, }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { From 6b6f9154b6507ad7036bc2df41d58864c9dece43 Mon Sep 17 00:00:00 2001 From: ekexium Date: Thu, 9 Feb 2023 12:12:00 +0800 Subject: [PATCH 0512/1149] metrics: add panels showing pessimistic lock queue lengths (#14158) ref tikv/tikv#14157 Signed-off-by: ekexium Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 184 ++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 357edac04a7..334c3c119f7 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -36456,6 +36456,190 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "dashLength": 10, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "editable": true, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 23763573091, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "pluginVersion": "7.5.11", + "pointradius": 5, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "targets": [ + { + "expr": "sum(tikv_lock_wait_queue_entries_gauge_vec{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "legendFormat": "{{type}}", + "interval": "", + "exemplar": true, + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Pessimistic lock activities", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + }, + "bars": false, + "dashes": false, + "error": false, + "fillGradient": 0, + "hiddenSeries": false, + "percentage": false, + "points": false, + "stack": false, + "steppedLine": false, + "timeFrom": null, + "timeShift": null, + "description": "The number of active keys and waiters." + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 83 + }, + "id": 23763573092, + "legend": { + "show": false + }, + "links": [], + "pluginVersion": "7.5.11", + "targets": [ + { + "expr": "sum(increase(tikv_lock_wait_queue_length_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "legendFormat": "{{le}}", + "interval": "", + "exemplar": true, + "format": "heatmap", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "title": "Lengths of lock wait queues when transaction enqueues", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "timeFrom": null, + "timeShift": null, + "description": "The length includes the entering transaction itself", + "heatmap": {}, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "mode": "spectrum", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "exponent": 0.5, + "colorScheme": "interpolateOranges" + }, + "dataFormat": "tsbuckets", + "yBucketBound": "auto", + "reverseYBuckets": false, + "xAxis": { + "show": true + }, + "yAxis": { + "show": true, + "format": "short", + "decimals": null, + "logBase": 1, + "splitFactor": null, + "min": null, + "max": null + }, + "xBucketSize": null, + "xBucketNumber": null, + "yBucketSize": null, + "yBucketNumber": null, + "highlightCards": true, + "hideZeroBuckets": true } ], "title": "Pessimistic Locking", From fcc6829e41ea675b63290475ac1760664c905f62 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 9 Feb 2023 13:37:59 +0800 Subject: [PATCH 0513/1149] config: disallow encryption for v2 (#14190) ref tikv/tikv#12842 Signed-off-by: tabokie --- Cargo.lock | 1 + Cargo.toml | 6 ++++-- cmd/tikv-ctl/Cargo.toml | 4 ++-- components/backup-stream/Cargo.toml | 4 ++-- components/backup/Cargo.toml | 4 ++-- components/batch-system/Cargo.toml | 6 +++--- components/causal_ts/Cargo.toml | 4 ++-- components/cdc/Cargo.toml | 4 ++-- components/cloud/aws/Cargo.toml | 4 ++-- components/cloud/azure/Cargo.toml | 4 ++-- components/cloud/gcp/Cargo.toml | 4 ++-- components/encryption/Cargo.toml | 4 ++-- components/encryption/export/Cargo.toml | 4 ++-- components/engine_rocks/Cargo.toml | 4 ++-- components/engine_rocks_helper/Cargo.toml | 4 ++-- components/engine_tirocks/Cargo.toml | 4 ++-- components/engine_traits/Cargo.toml | 4 ++-- components/external_storage/Cargo.toml | 4 ++-- components/file_system/Cargo.toml | 4 ++-- components/pd_client/Cargo.toml | 4 ++-- components/raft_log_engine/Cargo.toml | 4 ++-- components/raftstore-v2/Cargo.toml | 2 +- components/raftstore/Cargo.toml | 4 ++-- components/resolved_ts/Cargo.toml | 4 ++-- components/resource_control/Cargo.toml | 6 +++--- components/resource_metering/Cargo.toml | 4 ++-- components/security/Cargo.toml | 1 + components/security/src/lib.rs | 16 +++++++++++----- components/server/Cargo.toml | 4 ++-- components/snap_recovery/Cargo.toml | 6 +++--- components/sst_importer/Cargo.toml | 4 ++-- components/test_pd/Cargo.toml | 4 ++-- components/test_pd_client/Cargo.toml | 4 ++-- components/test_raftstore/Cargo.toml | 4 ++-- components/test_util/Cargo.toml | 4 ++-- components/tidb_query_datatype/Cargo.toml | 4 ++-- components/tidb_query_executors/Cargo.toml | 4 ++-- components/tikv_kv/Cargo.toml | 4 ++-- components/tikv_util/Cargo.toml | 4 ++-- components/txn_types/Cargo.toml | 2 +- src/config/mod.rs | 3 ++- tests/Cargo.toml | 4 ++-- 42 files changed, 94 insertions(+), 83 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7750f729778..74701b0561f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5031,6 +5031,7 @@ dependencies = [ "collections", "encryption", "grpcio", + "kvproto", "serde", "serde_derive", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index f7d44c94866..29337b4a002 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -149,8 +149,8 @@ serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" serde_ignored = "0.1" serde_json = { version = "1.0", features = ["preserve_order"] } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } smallvec = "1.4" sst_importer = { workspace = true } strum = { version = "0.20", features = ["derive"] } @@ -380,6 +380,8 @@ tipb = { git = "https://github.com/pingcap/tipb.git" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } [profile.dev.package.grpcio-sys] debug = false diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 1e0699f64cf..718d760e3d4 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -78,8 +78,8 @@ regex = "1" security = { workspace = true } serde_json = "1.0" server = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } structopt = "0.3" tempfile = "3.0" tikv = { workspace = true } diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 43bda42a088..f3f1b482be0 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -56,8 +56,8 @@ raftstore = { workspace = true } regex = "1" resolved_ts = { workspace = true } security = { path = "../security" } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1" tidb_query_datatype = { workspace = true } tikv = { workspace = true } diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 27f7d68e8e3..4f12dd04c36 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -63,9 +63,9 @@ raftstore = { workspace = true } security = { workspace = true } serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } thiserror = "1.0" tidb_query_common = { workspace = true } tikv = { workspace = true } diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index 75a0230c188..af57bbef930 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -13,15 +13,15 @@ crossbeam = "0.8" derive_more = { version = "0.99", optional = true } fail = "0.5" file_system = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" online_config = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } resource_control = { workspace = true } serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index a5dd62cd5d2..71af0419a68 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -26,8 +26,8 @@ prometheus-static-metric = "0.5" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } test_pd_client = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 94d80bf1d9f..3dfbb402d2e 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -55,8 +55,8 @@ raftstore = { workspace = true } resolved_ts = { workspace = true } security = { workspace = true } semver = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1.0" tikv = { workspace = true } tikv_kv = { workspace = true } diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 5d28e09e8f4..24518515ea0 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -31,8 +31,8 @@ rusoto_credential = "0.46.0" rusoto_kms = { version = "0.46.0", features = ["serialize_structs"] } rusoto_s3 = { version = "0.46.0", features = ["serialize_structs"] } rusoto_sts = "0.46.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1.0" tikv_util = { workspace = true } # better to not use slog-global, but pass in the logger diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 57ea6c14aef..0a45ccc2c63 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -18,8 +18,8 @@ kvproto = { workspace = true } lazy_static = "1.4.0" oauth2 = { version = "4.0.0", default-features = false } regex = "1" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time"] } url = "2.0" diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index 5074a3c9da4..4c3b8994ffc 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -12,9 +12,9 @@ http = "0.2.0" hyper = "0.14" hyper-tls = "0.5" kvproto = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } tame-gcs = { version = "0.10", features = ["async-multipart"] } tame-oauth = "0.4.7" tikv_util = { workspace = true } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 18b6cb7305c..94ab0d39957 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -30,9 +30,9 @@ protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8" serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index fc4fe59d3fb..164ea312e5d 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -21,9 +21,9 @@ file_system = { workspace = true } kvproto = { workspace = true } openssl = "0.10" protobuf = { version = "2.8", features = ["bytes"] } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } tikv_util = { workspace = true } [dev-dependencies] diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index a0e3e878c54..6775705e3e1 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -44,8 +44,8 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code regex = "1" serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } slog_derive = "0.2" tempfile = "3.0" tikv_alloc = { workspace = true } diff --git a/components/engine_rocks_helper/Cargo.toml b/components/engine_rocks_helper/Cargo.toml index ec66aa474a9..b8847fa6ba8 100644 --- a/components/engine_rocks_helper/Cargo.toml +++ b/components/engine_rocks_helper/Cargo.toml @@ -18,8 +18,8 @@ pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = "2.8" raftstore = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_util = { workspace = true } [dev-dependencies] diff --git a/components/engine_tirocks/Cargo.toml b/components/engine_tirocks/Cargo.toml index 07c2a7ec42c..b3cac78b502 100644 --- a/components/engine_tirocks/Cargo.toml +++ b/components/engine_tirocks/Cargo.toml @@ -14,8 +14,8 @@ lazy_static = "1.4.0" log_wrappers = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } slog_derive = "0.2" tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 2370f1c9e7e..00b3bb97b66 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -19,8 +19,8 @@ log_wrappers = { workspace = true } protobuf = "2" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } serde = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 839e34e3f22..4ff13e564ff 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -37,9 +37,9 @@ prometheus = { version = "0.13", default-features = false, features = ["nightly" protobuf = { optional = true, version = "2" } rand = "0.8" rusoto_core = "0.46.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } tikv_alloc = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "fs", "process"] } diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index 033d31681c1..2252ebc3f1b 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -21,8 +21,8 @@ prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" rand = "0.8" serde = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } strum = { version = "0.20", features = ["derive"] } tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/pd_client/Cargo.toml b/components/pd_client/Cargo.toml index f46d6111c5d..976ad90432a 100644 --- a/components/pd_client/Cargo.toml +++ b/components/pd_client/Cargo.toml @@ -24,8 +24,8 @@ security = { workspace = true } semver = "0.10" serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 8a336177706..cbccea9dbe0 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -18,8 +18,8 @@ raft = { version = "0.7.0", default-features = false, features = ["protobuf-code raft-engine = { git = "https://github.com/tikv/raft-engine.git", features = ["swap"] } serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_util = { workspace = true } time = "0.1" tracker = { workspace = true } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 5b917b9ddf7..c7e403afebe 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -65,7 +65,7 @@ yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] engine_test = { workspace = true } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } tempfile = "3.0" test_pd = { workspace = true } test_util = { workspace = true } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 8df501f279d..cbf943800ee 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -77,8 +77,8 @@ resource_metering = { workspace = true } serde = "1.0" serde_derive = "1.0" serde_with = "1.4" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } smallvec = "1.4" sst_importer = { workspace = true } tempfile = "3.0" diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index 10a555678c3..db3c0643cb7 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -41,8 +41,8 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raftstore = { workspace = true } security = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1.0" tikv = { workspace = true } tikv_util = { workspace = true } diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 2e1a0990d49..6cb7d547e6c 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -15,7 +15,7 @@ crossbeam-skiplist = "0.1" dashmap = "5.1" fail = "0.5" futures = { version = "0.3" } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.0" online_config = { workspace = true } pd_client = { workspace = true } @@ -23,8 +23,8 @@ pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } serde = { version = "1.0", features = ["derive"] } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } test_pd = { workspace = true } test_pd_client = { workspace = true } tikv_util = { workspace = true } diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index 20ed4ea2eda..f8e26e01c50 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -18,8 +18,8 @@ pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_util = { workspace = true } [target.'cfg(target_os = "linux")'.dependencies] diff --git a/components/security/Cargo.toml b/components/security/Cargo.toml index a9cdd620d12..fdf7ab8e29e 100644 --- a/components/security/Cargo.toml +++ b/components/security/Cargo.toml @@ -8,6 +8,7 @@ publish = false collections = { workspace = true } encryption = { workspace = true } grpcio = { workspace = true } +kvproto = { workspace = true } serde = "1.0" serde_derive = "1.0" serde_json = "1.0" diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index 68328c01ebe..bbd296ae1f7 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -87,7 +87,7 @@ pub struct ClientSuite { impl SecurityConfig { /// Validates ca, cert and private key. - pub fn validate(&self) -> Result<(), Box> { + pub fn validate(&self, raftstore_v2: bool) -> Result<(), Box> { check_key_file("ca key", &self.ca_path)?; check_key_file("cert key", &self.cert_path)?; check_key_file("private key", &self.key_path)?; @@ -97,6 +97,12 @@ impl SecurityConfig { { return Err("ca, cert and private key should be all configured.".into()); } + if raftstore_v2 + && self.encryption.data_encryption_method + != kvproto::encryptionpb::EncryptionMethod::Plaintext + { + return Err("encryption is not supported for partitioned-raft-kv".into()); + } Ok(()) } @@ -298,7 +304,7 @@ mod tests { fn test_security() { let cfg = SecurityConfig::default(); // default is disable secure connection. - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); let mgr = SecurityManager::new(&cfg).unwrap(); assert!(mgr.cfg.ca_path.is_empty()); assert!(mgr.cfg.cert_path.is_empty()); @@ -307,7 +313,7 @@ mod tests { let assert_cfg = |c: fn(&mut SecurityConfig), valid: bool| { let mut invalid_cfg = cfg.clone(); c(&mut invalid_cfg); - assert_eq!(invalid_cfg.validate().is_ok(), valid); + assert_eq!(invalid_cfg.validate(false).is_ok(), valid); }; // invalid path should be rejected. @@ -335,11 +341,11 @@ mod tests { c.cert_path = format!("{}", example_cert.display()); c.key_path = format!("{}", example_key.display()); // incomplete configuration. - c.validate().unwrap_err(); + c.validate(false).unwrap_err(); // data should be loaded from file after validating. c.ca_path = format!("{}", example_ca.display()); - c.validate().unwrap(); + c.validate(false).unwrap(); let (ca, cert, key) = c.load_certs().unwrap_or_default(); assert_eq!(ca, vec![0]); diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index d5e2f177b5e..554dbaa63f9 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -73,8 +73,8 @@ resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } serde_json = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } snap_recovery = { workspace = true } tempfile = "3.0" tikv = { workspace = true } diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index 1b69d8ba150..4768759b852 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -14,14 +14,14 @@ engine_traits = { workspace = true } futures = { version = "0.3", features = ["executor"] } grpcio = { workspace = true } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto" } +kvproto = { workspace = true } log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } raft_log_engine = { workspace = true } raftstore = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } structopt = "0.3" tempfile = "3.0" thiserror = "1.0" diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index d0e2ff7eca8..a21a58c0a6c 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -34,8 +34,8 @@ prometheus = { version = "0.13", default-features = false } rand = "0.8" serde = "1.0" serde_derive = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/components/test_pd/Cargo.toml b/components/test_pd/Cargo.toml index 6277789b194..7747ac1bbc6 100644 --- a/components/test_pd/Cargo.toml +++ b/components/test_pd/Cargo.toml @@ -13,8 +13,8 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } pd_client = { workspace = true } security = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.0", features = ["full"] } tokio-stream = "0.1" diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml index 9f67752b4c5..3b002970236 100644 --- a/components/test_pd_client/Cargo.toml +++ b/components/test_pd_client/Cargo.toml @@ -14,8 +14,8 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } pd_client = { workspace = true } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-timer = { workspace = true } diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index 25a1224e261..1b87aeac11b 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -53,9 +53,9 @@ resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } server = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } tempfile = "3.0" test_pd_client = { workspace = true } test_util = { workspace = true } diff --git a/components/test_util/Cargo.toml b/components/test_util/Cargo.toml index 740132353f3..64dbb2456ce 100644 --- a/components/test_util/Cargo.toml +++ b/components/test_util/Cargo.toml @@ -20,8 +20,8 @@ kvproto = { workspace = true } rand = "0.8" rand_isaac = "0.3" security = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tempfile = "3.0" tikv_util = { workspace = true } time = "0.1" diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index e670674cdc6..c1be29a956d 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -32,8 +32,8 @@ protobuf = "2" regex = "1.1" serde = "1.0" serde_json = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } static_assertions = { version = "1.0", features = ["nightly"] } thiserror = "1.0" tidb_query_common = { workspace = true } diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 331634dbd04..3fb3fdca2bb 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -17,8 +17,8 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } match-template = "0.0.1" protobuf = { version = "2.8", features = ["bytes"] } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } smallvec = "1.4" tidb_query_aggr = { workspace = true } tidb_query_common = { workspace = true } diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 2911c7738c6..8197637243e 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -43,8 +43,8 @@ prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raftstore = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } slog_derive = "0.2" tempfile = "3.0" thiserror = "1.0" diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 1193751b228..b501322e152 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -47,9 +47,9 @@ rand = "0.8" rusoto_core = "0.46.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } slog-async = "2.3" -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } slog-json = "2.3" slog-term = "2.4" sysinfo = "0.26" diff --git a/components/txn_types/Cargo.toml b/components/txn_types/Cargo.toml index 0c357ef1dd6..987b7216d22 100644 --- a/components/txn_types/Cargo.toml +++ b/components/txn_types/Cargo.toml @@ -13,7 +13,7 @@ error_code = { workspace = true } farmhash = "1.1.5" kvproto = { workspace = true } log_wrappers = { workspace = true } -slog = "2.3" +slog = { workspace = true } thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } diff --git a/src/config/mod.rs b/src/config/mod.rs index 7247d426b21..5e923023ca0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3244,7 +3244,8 @@ impl TikvConfig { self.coprocessor.enable_region_bucket, self.coprocessor.region_bucket_size, )?; - self.security.validate()?; + self.security + .validate(self.storage.engine == EngineType::RaftKv2)?; self.import.validate()?; self.backup.validate()?; self.backup_stream.validate()?; diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 96ee19e9bae..6fb05f19cd1 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -96,8 +96,8 @@ raft_log_engine = { workspace = true } raftstore = { workspace = true } rand = "0.8.3" resource_control = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog = { workspace = true } +slog-global = { workspace = true } tempfile = "3.0" tidb_query_aggr = { workspace = true } tidb_query_common = { workspace = true } From 10c5813851cdcd399bde953248616f8717b19e60 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 9 Feb 2023 14:03:59 +0800 Subject: [PATCH 0514/1149] resource_control: pass missing resource group name to request header (#14192) close tikv/tikv#14191 pass missing resource group name to request header Signed-off-by: Connor1996 Co-authored-by: Xinye Tao --- src/server/raftkv/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 751c07c6b65..9c4c59a4ae5 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -160,6 +160,7 @@ pub fn new_request_header(ctx: &Context) -> RaftRequestHeader { } header.set_sync_log(ctx.get_sync_log()); header.set_replica_read(ctx.get_replica_read()); + header.set_resource_group_name(ctx.get_resource_group_name().to_owned()); header } From 984d09a559e14d6a9a26e8162a2345e667d8f49d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 9 Feb 2023 14:20:00 +0800 Subject: [PATCH 0515/1149] log-backup: allow observer hibernate when there isn't any task (#14018) close tikv/tikv#14012 Added a "hibernate mode" for the log backup observer: while there isn't any task, it won't emit leader drop or region change events. So some verbose logs can be omitted. Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/backup-stream/src/observer.rs | 32 +++++++++++++++++++-- components/backup-stream/src/utils.rs | 4 +++ components/sst_importer/src/sst_importer.rs | 8 ++++-- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index 36c310d3532..1a0a0f7cc9e 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -96,6 +96,13 @@ impl BackupStreamObserver { .rl() .is_overlapping((region.get_start_key(), end_key)) } + + /// Check whether there are any task range registered to the observer. + /// when there isn't any task, we can ignore the events, so we don't need to + /// handle useless events. (Also won't yield verbose logs.) + fn is_hibernating(&self) -> bool { + self.ranges.rl().is_empty() + } } impl Coprocessor for BackupStreamObserver {} @@ -149,7 +156,7 @@ impl CmdObserver for BackupStreamObserver { impl RoleObserver for BackupStreamObserver { fn on_role_change(&self, ctx: &mut ObserverContext<'_>, r: &RoleChange) { - if r.state != StateRole::Leader { + if r.state != StateRole::Leader && !self.is_hibernating() { try_send!( self.scheduler, Task::ModifyObserve(ObserveOp::Stop { @@ -167,7 +174,7 @@ impl RegionChangeObserver for BackupStreamObserver { event: RegionChangeEvent, role: StateRole, ) { - if role != StateRole::Leader { + if role != StateRole::Leader || self.is_hibernating() { return; } match event { @@ -207,7 +214,7 @@ mod tests { use raft::StateRole; use raftstore::coprocessor::{ Cmd, CmdBatch, CmdObserveInfo, CmdObserver, ObserveHandle, ObserveLevel, ObserverContext, - RegionChangeEvent, RegionChangeObserver, RoleChange, RoleObserver, + RegionChangeEvent, RegionChangeObserver, RegionChangeReason, RoleChange, RoleObserver, }; use tikv_util::{worker::dummy_scheduler, HandyRwLock}; @@ -321,4 +328,23 @@ mod tests { Ok(Some(Task::ModifyObserve(ObserveOp::Stop { region, .. }))) if region.id == 42 ); } + + #[test] + fn test_hibernate() { + let (sched, mut rx) = dummy_scheduler(); + + // Prepare: assuming a task wants the range of [0001, 0010]. + let o = BackupStreamObserver::new(sched); + let r = fake_region(43, b"0010", b"0042"); + let mut ctx = ObserverContext::new(&r); + o.on_region_changed(&mut ctx, RegionChangeEvent::Create, StateRole::Leader); + o.on_region_changed( + &mut ctx, + RegionChangeEvent::Update(RegionChangeReason::Split), + StateRole::Leader, + ); + o.on_role_change(&mut ctx, &RoleChange::new(StateRole::Leader)); + let task = rx.recv_timeout(Duration::from_millis(20)); + assert!(task.is_err(), "it is {:?}", task); + } } diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 1746882690f..a5d83e50328 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -315,6 +315,10 @@ impl SegmentMap { pub fn get_inner(&mut self) -> &mut BTreeMap> { &mut self.0 } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } /// transform a [`RaftCmdRequest`] to `(key, value, cf)` triple. diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index fabe9e2a13a..384a48e96a8 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -438,7 +438,9 @@ impl SstImporter { if self.import_support_download() { let shrink_file_count = shrink_files.len(); - info!("shrink space by tick"; "shrink files count" => shrink_file_count, "retain files count" => retain_file_count); + if shrink_file_count > 0 || retain_file_count > 0 { + info!("shrink space by tick"; "shrink files count" => shrink_file_count, "retain files count" => retain_file_count); + } for f in shrink_files { if let Err(e) = file_system::remove_file(&f) { @@ -447,7 +449,9 @@ impl SstImporter { } shrink_file_count } else { - info!("shrink cache by tick"; "shrink size" => shrink_buff_size, "retain size" => retain_buff_size); + if shrink_buff_size > 0 || retain_buff_size > 0 { + info!("shrink cache by tick"; "shrink size" => shrink_buff_size, "retain size" => retain_buff_size); + } self.dec_mem(shrink_buff_size as _); shrink_buff_size } From c5e8704c701840267e4fc128be8b99a10836c717 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 9 Feb 2023 15:40:00 +0800 Subject: [PATCH 0516/1149] raftkv: allow cancel error in snapshot (#14183) close tikv/tikv#13926 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/test_raftstore/src/cluster.rs | 5 +++ src/server/raftkv/mod.rs | 8 ++-- tests/integrations/coprocessor/test_select.rs | 44 ++++++++++++++++++- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 81e7129407e..9d6444904f2 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -389,12 +389,17 @@ impl Cluster { pub fn stop_node(&mut self, node_id: u64) { debug!("stopping node {}", node_id); self.group_props[&node_id].mark_shutdown(); + // Simulate shutdown behavior of server shutdown. It's not enough to just set + // the map above as current thread may also query properties during shutdown. + let previous_prop = tikv_util::thread_group::current_properties(); + tikv_util::thread_group::set_properties(Some(self.group_props[&node_id].clone())); match self.sim.write() { Ok(mut sim) => sim.stop_node(node_id), Err(_) => safe_panic!("failed to acquire write lock."), } self.pd_client.shutdown_store(node_id); debug!("node {} stopped", node_id); + tikv_util::thread_group::set_properties(previous_prop); } pub fn get_engine(&self, node_id: u64) -> RocksEngine { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 9c4c59a4ae5..0f0d8fa5689 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -579,10 +579,12 @@ where .map_err(kv::Error::from); } async move { - // It's impossible to return cancel because the callback will be invoked if it's - // destroyed. let res = match res { - Ok(()) => f.await.unwrap(), + Ok(()) => match f.await { + Ok(r) => r, + // Canceled may be returned during shutdown. + Err(e) => Err(kv::Error::from(kv::ErrorInner::Other(box_err!(e)))), + }, Err(e) => Err(e), }; match res { diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 056f24b5fee..fe545d07ec1 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -21,7 +21,11 @@ use tikv::{ server::Config, storage::TestEngineBuilder, }; -use tikv_util::{codec::number::*, config::ReadableSize}; +use tikv_util::{ + codec::number::*, + config::{ReadableDuration, ReadableSize}, + HandyRwLock, +}; use tipb::{ AnalyzeColumnsReq, AnalyzeReq, AnalyzeType, ChecksumRequest, Chunk, Expr, ExprType, ScalarFuncSig, SelectResponse, @@ -226,6 +230,44 @@ fn test_select_after_lease() { } } +/// If a failed read should not trigger panic. +#[test] +fn test_select_failed() { + let mut cluster = test_raftstore::new_server_cluster(0, 3); + cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); + cluster.run(); + // make sure leader has been elected. + assert_eq!(cluster.must_get(b""), None); + let region = cluster.get_region(b""); + let leader = cluster.leader_of_region(region.get_id()).unwrap(); + let engine = cluster.sim.rl().storages[&leader.get_id()].clone(); + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(leader); + + let product = ProductTable::new(); + let (_, endpoint, _) = + init_data_with_engine_and_commit(ctx.clone(), engine, &product, &[], true); + + // Sleep until the leader lease is expired. + thread::sleep( + cluster.cfg.raft_store.raft_heartbeat_interval() + * cluster.cfg.raft_store.raft_election_timeout_ticks as u32 + * 2, + ); + for id in 1..=3 { + if id != ctx.get_peer().get_store_id() { + cluster.stop_node(id); + } + } + let req = DagSelect::from(&product).build_with(ctx.clone(), &[0]); + let f = endpoint.parse_and_handle_unary_request(req, None); + cluster.stop_node(ctx.get_peer().get_store_id()); + drop(cluster); + let _ = futures::executor::block_on(f); +} + #[test] fn test_scan_detail() { let data = vec![ From d513b789f0709520cdccc20bc4e6560761ad01a5 Mon Sep 17 00:00:00 2001 From: Zhi Qi <30543181+LittleFall@users.noreply.github.com> Date: Thu, 9 Feb 2023 17:31:59 +0800 Subject: [PATCH 0517/1149] copr: (feat) Implement operator PartitionTopN (#14116) ref tikv/tikv#13936 Signed-off-by: Zhi Qi Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/tidb_query_executors/src/lib.rs | 2 + .../src/partition_top_n_executor.rs | 2204 +++++++++++++++++ components/tidb_query_executors/src/runner.rs | 40 +- .../src/top_n_executor.rs | 2 +- .../src/util/top_n_heap.rs | 26 +- 6 files changed, 2258 insertions(+), 18 deletions(-) create mode 100644 components/tidb_query_executors/src/partition_top_n_executor.rs diff --git a/Cargo.lock b/Cargo.lock index 74701b0561f..046220980bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6650,7 +6650,7 @@ dependencies = [ [[package]] name = "tipb" version = "0.0.1" -source = "git+https://github.com/pingcap/tipb.git#c6b7a5a1623bb2766a502301ecc3ac8f98cc7c79" +source = "git+https://github.com/pingcap/tipb.git#614f3ffd42ddc84b78ff59d65f105f2099a6f1b1" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/tidb_query_executors/src/lib.rs b/components/tidb_query_executors/src/lib.rs index ad86f94f9b8..2ce85f90111 100644 --- a/components/tidb_query_executors/src/lib.rs +++ b/components/tidb_query_executors/src/lib.rs @@ -29,6 +29,7 @@ mod fast_hash_aggr_executor; mod index_scan_executor; pub mod interface; mod limit_executor; +mod partition_top_n_executor; mod projection_executor; pub mod runner; mod selection_executor; @@ -42,6 +43,7 @@ mod util; pub use self::{ fast_hash_aggr_executor::BatchFastHashAggregationExecutor, index_scan_executor::BatchIndexScanExecutor, limit_executor::BatchLimitExecutor, + partition_top_n_executor::BatchPartitionTopNExecutor, projection_executor::BatchProjectionExecutor, selection_executor::BatchSelectionExecutor, simple_aggr_executor::BatchSimpleAggregationExecutor, slow_hash_aggr_executor::BatchSlowHashAggregationExecutor, diff --git a/components/tidb_query_executors/src/partition_top_n_executor.rs b/components/tidb_query_executors/src/partition_top_n_executor.rs new file mode 100644 index 00000000000..52cf2e85925 --- /dev/null +++ b/components/tidb_query_executors/src/partition_top_n_executor.rs @@ -0,0 +1,2204 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Arc; + +use async_trait::async_trait; +use tidb_query_common::{storage::IntervalRange, Result}; +use tidb_query_datatype::{ + codec::{batch::LazyBatchColumnVec, data_type::BATCH_MAX_SIZE}, + expr::{EvalConfig, EvalContext, EvalWarnings}, +}; +use tidb_query_expr::{RpnExpression, RpnExpressionBuilder, RpnStackNode}; +use tipb::{Expr, FieldType}; + +use crate::{ + interface::*, + util::{ + ensure_columns_decoded, eval_exprs_decoded_no_lifetime, + top_n_heap::{HeapItemSourceData, HeapItemUnsafe, TopNHeap}, + }, +}; + +pub struct BatchPartitionTopNExecutor { + heap: TopNHeap, + + /// See `BatchPartitionTopNExecutor::eval_columns_buffer_unsafe` for more + /// information. + #[allow(clippy::box_collection)] + eval_columns_buffer_unsafe: Box>>, + + /// The data should be sorted by the partition expression. + /// But if not, the result is still correct after the second-stage topn. + partition_exprs: Box<[RpnExpression]>, + partition_exprs_field_type: Box<[FieldType]>, + /// dummy value, just for convenience. + partition_is_desc: Box<[bool]>, + + /// The partition key of the last row, i.e. all the heap items have the same + /// partition key. + /// The reason for make this a HeapItemUnsafe is to reuse of the existing + /// comparison logic, i.e. `cmp_sort_key`. + last_partition_key: Option, + + order_exprs: Box<[RpnExpression]>, + /// This field stores the field type of the results evaluated by the exprs + /// in `order_exprs`. + order_exprs_field_type: Box<[FieldType]>, + + /// Whether or not it is descending order for each order by column. + order_is_desc: Box<[bool]>, + + n: usize, + + context: EvalContext, + src: Src, +} + +impl BatchPartitionTopNExecutor { + #[cfg(test)] + pub fn new_for_test( + src: Src, + order_exprs: Vec, + order_is_desc: Vec, + partition_exprs: Vec, + n: usize, + ) -> Self { + assert_eq!(order_exprs.len(), order_is_desc.len()); + + let order_exprs_field_type: Vec = order_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + let partition_exprs_field_type: Vec = partition_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + Self { + heap: TopNHeap::new(n), + eval_columns_buffer_unsafe: Box::>::default(), + partition_is_desc: vec![false; partition_exprs.len()].into_boxed_slice(), + partition_exprs: partition_exprs.into_boxed_slice(), + partition_exprs_field_type: partition_exprs_field_type.into_boxed_slice(), + last_partition_key: None, + order_exprs: order_exprs.into_boxed_slice(), + order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), + order_is_desc: order_is_desc.into_boxed_slice(), + n, + + context: EvalContext::default(), + src, + } + } + + #[cfg(test)] + pub fn new_for_test_with_config( + config: Arc, + src: Src, + order_exprs: Vec, + order_is_desc: Vec, + partition_exprs: Vec, + n: usize, + ) -> Self { + assert_eq!(order_exprs.len(), order_is_desc.len()); + + let order_exprs_field_type: Vec = order_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + let partition_exprs_field_type: Vec = partition_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + Self { + heap: TopNHeap::new(n), + eval_columns_buffer_unsafe: Box::>::default(), + partition_is_desc: vec![false; partition_exprs.len()].into_boxed_slice(), + partition_exprs: partition_exprs.into_boxed_slice(), + partition_exprs_field_type: partition_exprs_field_type.into_boxed_slice(), + last_partition_key: None, + order_exprs: order_exprs.into_boxed_slice(), + order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), + order_is_desc: order_is_desc.into_boxed_slice(), + n, + + context: EvalContext::new(config), + src, + } + } + + pub fn new( + config: Arc, + src: Src, + partition_exprs_def: Vec, + order_exprs_def: Vec, + order_is_desc: Vec, + n: usize, + ) -> Result { + assert_eq!(order_exprs_def.len(), order_is_desc.len()); + + let mut ctx = EvalContext::new(config.clone()); + + let mut order_exprs: Vec = Vec::with_capacity(order_exprs_def.len()); + for def in order_exprs_def { + order_exprs.push(RpnExpressionBuilder::build_from_expr_tree( + def, + &mut ctx, + src.schema().len(), + )?); + } + let order_exprs_field_type: Vec = order_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + let mut partition_exprs: Vec = Vec::with_capacity(partition_exprs_def.len()); + for def in partition_exprs_def { + partition_exprs.push(RpnExpressionBuilder::build_from_expr_tree( + def, + &mut ctx, + src.schema().len(), + )?); + } + let partition_exprs_field_type: Vec = partition_exprs + .iter() + .map(|expr| expr.ret_field_type(src.schema()).clone()) + .collect(); + + Ok(Self { + // Simply large enough to avoid repeated allocations + heap: TopNHeap::new(n), + eval_columns_buffer_unsafe: Box::new(Vec::with_capacity(512)), + partition_is_desc: vec![false; partition_exprs.len()].into_boxed_slice(), + partition_exprs: partition_exprs.into_boxed_slice(), + partition_exprs_field_type: partition_exprs_field_type.into_boxed_slice(), + order_exprs: order_exprs.into_boxed_slice(), + order_exprs_field_type: order_exprs_field_type.into_boxed_slice(), + order_is_desc: order_is_desc.into_boxed_slice(), + n, + context: EvalContext::new(config), + src, + last_partition_key: None, + }) + } + + // Check whether the partition key of the this row is equal to the saved + // partition key. If yes, return true. Else, update saved partition key, + // and return false. + fn check_partition_equal_or_update(&mut self, current: HeapItemUnsafe) -> Result { + if let Some(last_partition_key) = &self.last_partition_key { + if last_partition_key == ¤t { + return Ok(true); + } + } + self.last_partition_key = Some(current); + Ok(false) + } + + #[inline] + async fn handle_next_batch(&mut self) -> Result<(LazyBatchColumnVec, bool)> { + let mut result = LazyBatchColumnVec::empty(); + let src_result = self.src.next_batch(BATCH_MAX_SIZE).await; + self.context.warnings = src_result.warnings; + let src_is_drained = src_result.is_drained?; + + let (mut physical_columns, logical_rows) = + (src_result.physical_columns, src_result.logical_rows); + + if !logical_rows.is_empty() { + ensure_columns_decoded( + &mut self.context, + &self.order_exprs, + self.src.schema(), + &mut physical_columns, + &logical_rows, + )?; + ensure_columns_decoded( + &mut self.context, + &self.partition_exprs, + self.src.schema(), + &mut physical_columns, + &logical_rows, + )?; + + let pinned_source_data = Arc::new(HeapItemSourceData { + physical_columns, + logical_rows, + }); + + let order_eval_offset = self.eval_columns_buffer_unsafe.len(); + unsafe { + eval_exprs_decoded_no_lifetime( + &mut self.context, + &self.order_exprs, + self.src.schema(), + &pinned_source_data.physical_columns, + &pinned_source_data.logical_rows, + &mut self.eval_columns_buffer_unsafe, + )?; + } + // todo: optimize memory use of this. + let partition_eval_offset = self.eval_columns_buffer_unsafe.len(); + unsafe { + eval_exprs_decoded_no_lifetime( + &mut self.context, + &self.partition_exprs, + self.src.schema(), + &pinned_source_data.physical_columns, + &pinned_source_data.logical_rows, + &mut self.eval_columns_buffer_unsafe, + )?; + } + // todo: optimize the memory usage of this, don't need so many same information + // in items. Maybe we can import a Heap with customized comparator. + for logical_row_index in 0..pinned_source_data.logical_rows.len() { + let partition_key = HeapItemUnsafe { + // order_is_desc_ptr here is just a dummy value, todo: refactor the compare + // logic and eliminate this. + order_is_desc_ptr: (*self.partition_is_desc).into(), + order_exprs_field_type_ptr: (*self.partition_exprs_field_type).into(), + source_data: pinned_source_data.clone(), + eval_columns_buffer_ptr: self.eval_columns_buffer_unsafe.as_ref().into(), + eval_columns_offset: partition_eval_offset, + logical_row_index, + }; + + if !self.check_partition_equal_or_update(partition_key)? { + self.heap.take_all_append_to(&mut result); + self.heap = TopNHeap::new(self.n); + } + + let row = HeapItemUnsafe { + order_is_desc_ptr: (*self.order_is_desc).into(), + order_exprs_field_type_ptr: (*self.order_exprs_field_type).into(), + source_data: pinned_source_data.clone(), + eval_columns_buffer_ptr: self.eval_columns_buffer_unsafe.as_ref().into(), + eval_columns_offset: order_eval_offset, + logical_row_index, + }; + self.heap.add_row(row)?; + } + } + if src_is_drained { + self.heap.take_all_append_to(&mut result); + } + + Ok((result, src_is_drained)) + } +} + +/// todo: review this. +/// All `NonNull` pointers in `BatchPartitionTopNExecutor` cannot be accessed +/// out of the struct and `BatchPartitionTopNExecutor` doesn't leak the pointers +/// to other threads. Therefore, with those `NonNull` pointers, +/// BatchPartitionTopNExecutor still remains `Send`. +unsafe impl Send for BatchPartitionTopNExecutor {} + +#[async_trait] +impl BatchExecutor for BatchPartitionTopNExecutor { + type StorageStats = Src::StorageStats; + + #[inline] + fn schema(&self) -> &[FieldType] { + self.src.schema() + } + + /// Implementation of BatchExecutor::next_batch + /// Memory Control Analysis: + /// 1. if n > paging_size(1024), this operator won't do anything and just + /// return data to upstream. So we can think n is less than or equal to + /// paging_size. + /// 2. The worst case is that there is already n rows in heap, and first + /// row of src_result has different partition with rows in heap. So heap + /// will be flushed. And the last row of src_result has another different + /// partition with the first two. So heap will be flushed again. + /// In this case, there can be 2*n-1 rows in the result, which may be larger + /// than paging_size. + /// todo: find a good solution to limit it up to paging_size. + /// baseline: limit n up to paging_size/2 + #[inline] + async fn next_batch(&mut self, scan_rows: usize) -> BatchExecuteResult { + if self.n == 0 { + return BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }; + } + + // limit intermediate memory by paging_size. + if let Some(paging_size) = self.context.cfg.paging_size { + if self.n * 2 > paging_size as usize { + return self.src.next_batch(scan_rows).await; + } + } + + let result = self.handle_next_batch().await; + + match result { + Err(e) => BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: self.context.take_warnings(), + is_drained: Err(e), + }, + Ok((logical_columns, is_drained)) => { + let logical_rows = (0..logical_columns.rows_len()).collect(); + BatchExecuteResult { + physical_columns: logical_columns, + logical_rows, + warnings: self.context.take_warnings(), + is_drained: Ok(is_drained), + } + } + } + } + + #[inline] + fn collect_exec_stats(&mut self, dest: &mut ExecuteStats) { + self.src.collect_exec_stats(dest); + } + + #[inline] + fn collect_storage_stats(&mut self, dest: &mut Self::StorageStats) { + self.src.collect_storage_stats(dest); + } + + #[inline] + fn take_scanned_range(&mut self) -> IntervalRange { + self.src.take_scanned_range() + } + + #[inline] + fn can_be_cached(&self) -> bool { + self.src.can_be_cached() + } +} + +#[cfg(test)] +mod tests { + use futures::executor::block_on; + use tidb_query_datatype::{ + builder::FieldTypeBuilder, + codec::{batch::LazyBatchColumnVec, data_type::*}, + expr::EvalWarnings, + Collation, FieldTypeFlag, FieldTypeTp, + }; + use tidb_query_expr::{ + impl_arithmetic::{IntDivideInt, IntIntPlus}, + RpnExpressionBuilder, + }; + + use super::*; + use crate::{impl_arithmetic::arithmetic_fn_meta, util::mock_executor::MockExecutor}; + + #[test] + fn test_top_0() { + let src_exec = MockExecutor::new( + vec![FieldTypeTp::Double.into(), FieldTypeTp::Double.into()], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Real(vec![None, Real::new(7.0).ok(), None, None].into()), + VectorValue::Real(vec![None, Real::new(7.0).ok(), None, None].into()), + ]), + logical_rows: (0..1).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_constant_for_test(1) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_constant_for_test(1) + .build_for_test(), + ], + 0, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_constant_partition() { + let src_exec = MockExecutor::new( + vec![FieldTypeTp::Double.into(), FieldTypeTp::Double.into()], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Real( + vec![ + Real::new(1.0).ok(), + Real::new(2.0).ok(), + Real::new(3.0).ok(), + Real::new(4.0).ok(), + ] + .into(), + ), + VectorValue::Real( + vec![ + Real::new(5.0).ok(), + Real::new(6.0).ok(), + Real::new(7.0).ok(), + Real::new(8.0).ok(), + ] + .into(), + ), + ]), + logical_rows: (0..4).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_constant_for_test(1) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_constant_for_test(1) + .build_for_test(), + ], + 2, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1]); + assert_eq!(r.physical_columns.rows_len(), 2); + assert_eq!(r.physical_columns.columns_len(), 2); + assert_eq!( + r.physical_columns[0].decoded().to_real_vec(), + &[Real::new(2.0).ok(), Real::new(1.0).ok(),] + ); + assert_eq!( + r.physical_columns[1].decoded().to_real_vec(), + &[Real::new(6.0).ok(), Real::new(5.0).ok(),] + ); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_multiple_and_null_part_key() { + let src_exec = MockExecutor::new( + vec![FieldTypeTp::Long.into(), FieldTypeTp::Long.into()], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int( + vec![ + Some(1), + Some(1), + Some(1), + None, + None, + None, + Some(2), + Some(2), + Some(2), + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(1), + Some(1), + None, + None, + None, + Some(2), + Some(1), + Some(1), + None, + ] + .into(), + ), + ]), + logical_rows: (0..9).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![], + vec![], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + 1, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5]); + assert_eq!(r.physical_columns.rows_len(), 6); + assert_eq!(r.physical_columns.columns_len(), 2); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(1), Some(1), None, None, Some(2), Some(2)] + ); + assert_eq!( + r.physical_columns[1].decoded().to_int_vec(), + &[Some(1), None, None, Some(2), Some(1), None] + ); + assert!(r.is_drained.unwrap()); + } + + fn make_expr_case() -> MockExecutor { + MockExecutor::new( + vec![ + FieldTypeTp::Long.into(), + FieldTypeTp::Long.into(), + FieldTypeTp::Long.into(), + ], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int( + vec![ + Some(1), + Some(2), + Some(3), + Some(4), + None, + Some(6), + None, + Some(8), + Some(9), + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(2), + Some(1), + Some(4), + Some(3), + Some(5), + None, + None, + Some(9), + Some(8), + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(1), + Some(2), + Some(3), + Some(4), + Some(5), + Some(6), + Some(7), + Some(8), + Some(9), + ] + .into(), + ), + ]), + logical_rows: (0..9).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ) + } + + /// partition by col2/2, order by col2 + #[test] + fn test_expr_key1() { + let mut exec = BatchPartitionTopNExecutor::new_for_test( + make_expr_case(), + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .push_constant_for_test(2) + .push_fn_call_for_test( + arithmetic_fn_meta::(), + 2, + FieldTypeTp::Long, + ) + .build_for_test(), + ], + 1, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[2].decoded().to_int_vec(), + &[Some(1), Some(2), Some(4), Some(6), Some(8)] + ); + assert!(r.is_drained.unwrap()); + } + + /// partition by col0 + col1, order by col2 + #[test] + fn test_expr_key2() { + let mut exec = BatchPartitionTopNExecutor::new_for_test( + make_expr_case(), + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .build_for_test(), + ], + vec![true], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .push_column_ref_for_test(1) + .push_fn_call_for_test(arithmetic_fn_meta::(), 2, FieldTypeTp::Long) + .build_for_test(), + ], + 1, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3]); + assert_eq!(r.physical_columns.rows_len(), 4); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[2].decoded().to_int_vec(), + &[Some(2), Some(4), Some(7), Some(9)] + ); + assert!(r.is_drained.unwrap()); + } + + /// Currently, When the data is not ordered by partition key, e.g. 1 1 2 1, + /// it will treat discontinuous same key as different partition. + #[test] + fn test_unordered_key() { + let src_exec = MockExecutor::new( + vec![FieldTypeTp::Long.into(), FieldTypeTp::Double.into()], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int(vec![Some(1), Some(1), Some(2), Some(1)].into()), + VectorValue::Real( + vec![ + Real::new(5.0).ok(), + None, + Real::new(7.0).ok(), + Real::new(4.0).ok(), + ] + .into(), + ), + ]), + logical_rows: (0..4).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + 1, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2]); + assert_eq!(r.physical_columns.rows_len(), 3); + assert_eq!(r.physical_columns.columns_len(), 2); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(1), Some(2), Some(1)] + ); + assert_eq!( + r.physical_columns[1].decoded().to_real_vec(), + &[None, Real::new(7.0).ok(), Real::new(4.0).ok()] + ); + assert!(r.is_drained.unwrap()); + } + + fn make_integrated_data() -> MockExecutor { + MockExecutor::new( + vec![ + FieldTypeBuilder::new() + .tp(FieldTypeTp::VarString) + .flag(FieldTypeFlag::BINARY) + .into(), // primary key + FieldTypeBuilder::new() + .tp(FieldTypeTp::VarString) + .flag(FieldTypeFlag::BINARY) + .into(), // secondary key + FieldTypeTp::LongLong.into(), // timestamp + FieldTypeTp::MediumBlob.into(), // value + ], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Bytes( + vec![ + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + ] + .into(), + ), + VectorValue::Bytes( + vec![ + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"1".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + Some(b"2".to_vec()), + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(1672736824585607000_i64), + Some(1672736824789029000_i64), + Some(1672736824850598000_i64), + Some(1672736824884993000_i64), + Some(1672736824918933000_i64), + Some(1672736824953241000_i64), + Some(1672736824987116000_i64), + Some(1672736825021485000_i64), + Some(1672736825208127000_i64), + Some(1672736825263135000_i64), + Some(1672736825296467000_i64), + Some(1672736825330420000_i64), + Some(1672736825363611000_i64), + Some(1672736825398155000_i64), + Some(1672736825432106000_i64), + Some(1672736825466432000_i64), + ] + .into(), + ), + VectorValue::Bytes( + vec![ + Some(b"01".to_vec()), + Some(b"02".to_vec()), + Some(b"03".to_vec()), + Some(b"04".to_vec()), + Some(b"05".to_vec()), + Some(b"06".to_vec()), + Some(b"07".to_vec()), + Some(b"08".to_vec()), + Some(b"09".to_vec()), + Some(b"10".to_vec()), + Some(b"11".to_vec()), + Some(b"12".to_vec()), + Some(b"13".to_vec()), + Some(b"14".to_vec()), + Some(b"15".to_vec()), + Some(b"16".to_vec()), + ] + .into(), + ), + ]), + logical_rows: (0..16).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ) + } + + #[test] + fn test_integrated() { + let mut exec = BatchPartitionTopNExecutor::new_for_test( + make_integrated_data(), + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .build_for_test(), + ], + vec![true], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + 2, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(r.physical_columns.rows_len(), 8); + assert_eq!(r.physical_columns.columns_len(), 4); + assert!(r.is_drained.unwrap()); + + assert_eq!( + r.physical_columns[2].decoded().to_int_vec(), + &[ + Some(1672736824884993000), + Some(1672736824850598000), + Some(1672736825021485000), + Some(1672736824987116000), + Some(1672736825330420000), + Some(1672736825296467000), + Some(1672736825466432000), + Some(1672736825432106000) + ] + ); + assert_eq!( + r.physical_columns[3].decoded().to_bytes_vec(), + &[ + Some(b"04".to_vec()), + Some(b"03".to_vec()), + Some(b"08".to_vec()), + Some(b"07".to_vec()), + Some(b"12".to_vec()), + Some(b"11".to_vec()), + Some(b"16".to_vec()), + Some(b"15".to_vec()) + ] + ); + } + + /// Builds an executor that will return these data: + /// + /// ```text + /// == Schema == + /// Col0 (LongLong(Unsigned)) Col1(LongLong[UnSigned]) Col2(LongLong[Signed]) + /// == Call #1 == + /// 1 18,446,744,073,709,551,615 -3 + /// 1 NULL NULL + /// 1 18,446,744,073,709,551,613 -1 + /// 1 2023 2024 + /// 1 2000 2000 + /// == Call #2 == + /// == Call #3 == + /// 2 9,223,372,036,854,775,807 9,223,372,036,854,775,807 + /// 2 300 300 + /// 2 9,223,372,036,854,775,808 -9,223,372,036,854,775,808 + /// 2 NULL NULL + /// 3 NULL NULL + /// == Call #4 == + /// (drained) (drained) (drained) + fn make_full_batch() -> MockExecutor { + MockExecutor::new( + vec![ + FieldTypeBuilder::new() + .tp(FieldTypeTp::LongLong) + .flag(FieldTypeFlag::UNSIGNED) + .into(), + FieldTypeBuilder::new() + .tp(FieldTypeTp::LongLong) + .flag(FieldTypeFlag::UNSIGNED) + .into(), + FieldTypeTp::LongLong.into(), + ], + vec![ + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int(vec![Some(1), Some(1), Some(1), Some(1), Some(1)].into()), + VectorValue::Int( + vec![ + Some(18_446_744_073_709_551_615_u64 as i64), + None, + Some(18_446_744_073_709_551_613_u64 as i64), + Some(2023), + Some(2000), + ] + .into(), + ), + VectorValue::Int( + vec![Some(-3), None, Some(-1), Some(2024), Some(2000)].into(), + ), + ]), + logical_rows: vec![0, 1, 2, 3, 4], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int(vec![Some(2), Some(2), Some(2), Some(2), Some(3)].into()), + VectorValue::Int( + vec![ + Some(9_223_372_036_854_775_807_u64 as i64), + Some(300), + Some(9_223_372_036_854_775_808_u64 as i64), + None, + None, + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(9_223_372_036_854_775_807_u64 as i64), + Some(300), + Some(-9_223_372_036_854_775_808), + None, + None, + ] + .into(), + ), + ]), + logical_rows: vec![0, 1, 2, 3, 4], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }, + ], + ) + } + + #[test] + fn test_small_n() { + let mut config = EvalConfig::default(); + config.paging_size = Some(10); + let config = Arc::new(config); + let src_exec = make_full_batch(); + let mut exec = BatchPartitionTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + 2, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3]); + assert_eq!(r.physical_columns.rows_len(), 4); + assert_eq!(r.physical_columns.columns_len(), 3); + assert!(!r.is_drained.unwrap()); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(1), Some(1), Some(2), Some(2)] + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0]); + assert_eq!(r.physical_columns.rows_len(), 1); + assert!(r.is_drained.unwrap()); + assert_eq!(r.physical_columns[0].decoded().to_int_vec(), &[Some(3)]); + } + + #[test] + fn test_without_order_key() { + let mut config = EvalConfig::default(); + config.paging_size = Some(10); + let config = Arc::new(config); + let src_exec = make_full_batch(); + let mut exec = BatchPartitionTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![], + vec![], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + 2, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3]); + assert_eq!(r.physical_columns.rows_len(), 4); + assert_eq!(r.physical_columns.columns_len(), 3); + assert!(!r.is_drained.unwrap()); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(1), Some(1), Some(2), Some(2)] + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0]); + assert_eq!(r.physical_columns.rows_len(), 1); + assert!(r.is_drained.unwrap()); + assert_eq!(r.physical_columns[0].decoded().to_int_vec(), &[Some(3)]); + } + + #[test] + fn test_paging_limit_normal_n() { + let mut config = EvalConfig::default(); + config.paging_size = Some(10); + let config = Arc::new(config); + let src_exec = make_full_batch(); + let mut exec = BatchPartitionTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6, 7, 8]); + assert_eq!(r.physical_columns.rows_len(), 9); + assert_eq!(r.physical_columns.columns_len(), 3); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0]); + assert_eq!(r.physical_columns.rows_len(), 1); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_paging_limit_oversize_n() { + let mut config = EvalConfig::default(); + config.paging_size = Some(9); + let config = Arc::new(config); + let src_exec = make_full_batch(); + let mut exec = BatchPartitionTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + vec![false], + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(r.is_drained.unwrap()); + } + + /// The following tests are copied from `batch_top_n_executor.rs`. + #[test] + fn test_no_partition_top_0() { + let src_exec = MockExecutor::new( + vec![FieldTypeTp::Double.into()], + vec![BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![VectorValue::Real( + vec![None, Real::new(7.0).ok(), None, None].into(), + )]), + logical_rows: (0..1).collect(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }], + ); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_constant_for_test(1) + .build_for_test(), + ], + vec![false], + vec![], + 0, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_no_partition_no_row() { + let src_exec = MockExecutor::new( + vec![FieldTypeTp::LongLong.into()], + vec![ + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![VectorValue::Int( + vec![Some(5)].into(), + )]), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }, + ], + ); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + vec![false], + vec![], + 10, + ); + + let r = block_on(exec.next_batch(1)); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(r.is_drained.unwrap()); + } + + /// Builds an executor that will return these data: + /// + /// == Schema == + /// Col0 (Int) Col1(Int) Col2(Real) + /// == Call #1 == + /// NULL -1 -1.0 + /// NULL NULL 2.0 + /// NULL 1 4.0 + /// == Call #2 == + /// == Call #3 == + /// -1 NULL NULL + /// -10 10 3.0 + /// -10 NULL -5.0 + /// -10 -10 0.0 + /// (drained) + fn make_src_executor() -> MockExecutor { + MockExecutor::new( + vec![ + FieldTypeTp::LongLong.into(), + FieldTypeTp::LongLong.into(), + FieldTypeTp::Double.into(), + ], + vec![ + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int(vec![None, None, Some(5), None].into()), + VectorValue::Int(vec![None, Some(1), None, Some(-1)].into()), + VectorValue::Real( + vec![ + Real::new(2.0).ok(), + Real::new(4.0).ok(), + None, + Real::new(-1.0).ok(), + ] + .into(), + ), + ]), + logical_rows: vec![3, 0, 1], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int(vec![Some(0)].into()), + VectorValue::Int(vec![Some(10)].into()), + VectorValue::Real(vec![Real::new(10.0).ok()].into()), + ]), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int( + vec![Some(-10), Some(-1), Some(-10), None, Some(-10), None].into(), + ), + VectorValue::Int( + vec![None, None, Some(10), Some(-9), Some(-10), None].into(), + ), + VectorValue::Real( + vec![ + Real::new(-5.0).ok(), + None, + Real::new(3.0).ok(), + None, + Real::new(0.0).ok(), + Real::new(9.9).ok(), + ] + .into(), + ), + ]), + logical_rows: vec![1, 2, 0, 4], + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }, + ], + ) + } + + #[test] + fn test_no_partition_integration_1() { + // Order by single column, data len < n. + // + // mysql> select * from t order by col2 limit 100; + // +------+------+------+ + // | col0 | col1 | col2 | + // +------+------+------+ + // | -1 | NULL | NULL | + // | -10 | NULL | -5 | + // | NULL | -1 | -1 | + // | -10 | -10 | 0 | + // | NULL | NULL | 2 | + // | -10 | 10 | 3 | + // | NULL | 1 | 4 | + // +------+------+------+ + // + // Note: ORDER BY does not use stable sort, so let's order by col2 to avoid + // duplicate records. + + let src_exec = make_src_executor(); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .build_for_test(), + ], + vec![false], + vec![], + 100, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); + assert_eq!(r.physical_columns.rows_len(), 7); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(-1), Some(-10), None, Some(-10), None, Some(-10), None] + ); + assert_eq!( + r.physical_columns[1].decoded().to_int_vec(), + &[None, None, Some(-1), Some(-10), None, Some(10), Some(1)] + ); + assert_eq!( + r.physical_columns[2].decoded().to_real_vec(), + &[ + None, + Real::new(-5.0).ok(), + Real::new(-1.0).ok(), + Real::new(0.0).ok(), + Real::new(2.0).ok(), + Real::new(3.0).ok(), + Real::new(4.0).ok() + ] + ); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_no_partition_integration_2() { + // Order by multiple columns, data len == n. + // + // mysql> select * from t order by col0 desc, col1 limit 7; + // +------+------+------+ + // | col0 | col1 | col2 | + // +------+------+------+ + // | -1 | NULL | NULL | + // | -10 | NULL | -5 | + // | -10 | -10 | 0 | + // | -10 | 10 | 3 | + // | NULL | NULL | 2 | + // | NULL | -1 | -1 | + // | NULL | 1 | 4 | + // +------+------+------+ + + let src_exec = make_src_executor(); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + vec![true, false], + vec![], + 7, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); + assert_eq!(r.physical_columns.rows_len(), 7); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(-1), Some(-10), Some(-10), Some(-10), None, None, None] + ); + assert_eq!( + r.physical_columns[1].decoded().to_int_vec(), + &[None, None, Some(-10), Some(10), None, Some(-1), Some(1)] + ); + assert_eq!( + r.physical_columns[2].decoded().to_real_vec(), + &[ + None, + Real::new(-5.0).ok(), + Real::new(0.0).ok(), + Real::new(3.0).ok(), + Real::new(2.0).ok(), + Real::new(-1.0).ok(), + Real::new(4.0).ok() + ] + ); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_no_partition_integration_3() { + use tidb_query_expr::{ + impl_arithmetic::{arithmetic_fn_meta, IntIntPlus}, + impl_op::is_null_fn_meta, + }; + + // Order by multiple expressions, data len > n. + // + // mysql> select * from t order by isnull(col0), col0, col1 + 1 desc limit 5; + // +------+------+------+ + // | col0 | col1 | col2 | + // +------+------+------+ + // | -10 | 10 | 3 | + // | -10 | -10 | 0 | + // | -10 | NULL | -5 | + // | -1 | NULL | NULL | + // | NULL | 1 | 4 | + // +------+------+------+ + + let src_exec = make_src_executor(); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .push_fn_call_for_test(is_null_fn_meta::(), 1, FieldTypeTp::LongLong) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .push_constant_for_test(1) + .push_fn_call_for_test( + arithmetic_fn_meta::(), + 2, + FieldTypeTp::LongLong, + ) + .build_for_test(), + ], + vec![false, false, true], + vec![], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[0].decoded().to_int_vec(), + &[Some(-10), Some(-10), Some(-10), Some(-1), None] + ); + assert_eq!( + r.physical_columns[1].decoded().to_int_vec(), + &[Some(10), Some(-10), None, None, Some(1)] + ); + assert_eq!( + r.physical_columns[2].decoded().to_real_vec(), + &[ + Real::new(3.0).ok(), + Real::new(0.0).ok(), + Real::new(-5.0).ok(), + None, + Real::new(4.0).ok() + ] + ); + assert!(r.is_drained.unwrap()); + } + + /// Builds an executor that will return these data: + /// + /// ```text + /// == Schema == + /// Col0 (Bytes[Utf8Mb4GeneralCi]) Col1(Bytes[Utf8Mb4Bin]) Col2(Bytes[Binary]) + /// == Call #1 == + /// "aa" "aaa" "áaA" + /// NULL NULL "Aa" + /// "aa" "aa" NULL + /// == Call #2 == + /// == Call #3 == + /// "áaA" "áa" NULL + /// "áa" "áaA" "aa" + /// "Aa" NULL "aaa" + /// "aaa" "Aa" "áa" + /// (drained) + /// ``` + fn make_bytes_src_executor() -> MockExecutor { + MockExecutor::new( + vec![ + FieldTypeBuilder::new() + .tp(FieldTypeTp::VarChar) + .collation(Collation::Utf8Mb4GeneralCi) + .into(), + FieldTypeBuilder::new() + .tp(FieldTypeTp::VarChar) + .collation(Collation::Utf8Mb4Bin) + .into(), + FieldTypeBuilder::new() + .tp(FieldTypeTp::VarChar) + .collation(Collation::Binary) + .into(), + ], + vec![ + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Bytes( + vec![Some(b"aa".to_vec()), None, Some(b"aa".to_vec())].into(), + ), + VectorValue::Bytes( + vec![Some(b"aa".to_vec()), None, Some(b"aaa".to_vec())].into(), + ), + VectorValue::Bytes( + vec![None, Some(b"Aa".to_vec()), Some("áaA".as_bytes().to_vec())] + .into(), + ), + ]), + logical_rows: vec![2, 1, 0], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Bytes( + vec![ + Some("áaA".as_bytes().to_vec()), + Some("áa".as_bytes().to_vec()), + Some(b"Aa".to_vec()), + Some(b"aaa".to_vec()), + ] + .into(), + ), + VectorValue::Bytes( + vec![ + Some("áa".as_bytes().to_vec()), + Some("áaA".as_bytes().to_vec()), + None, + Some(b"Aa".to_vec()), + ] + .into(), + ), + VectorValue::Bytes( + vec![ + None, + Some(b"aa".to_vec()), + Some(b"aaa".to_vec()), + Some("áa".as_bytes().to_vec()), + ] + .into(), + ), + ]), + logical_rows: vec![0, 1, 2, 3], + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }, + ], + ) + } + + #[test] + fn test_no_partition_bytes_1() { + // Order by multiple expressions with collation, data len > n. + // + // mysql> select * from t order by col1 desc, col3 desc, col2 limit 5; + // +------+--------+--------+ + // | col1 | col2 | col3 | + // +------+--------+--------+ + // | aaa | Aa | áa | + // | áaA | áa | | + // | aa | aaa | áaA | + // | Aa | | aaa | + // | áa | áaA | aa | + // +------+--------+--------+ + + let src_exec = make_bytes_src_executor(); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + ], + vec![true, true, false], + vec![], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[0].decoded().to_bytes_vec(), + &[ + Some(b"aaa".to_vec()), + Some("áaA".as_bytes().to_vec()), + Some(b"aa".to_vec()), + Some(b"Aa".to_vec()), + Some("áa".as_bytes().to_vec()), + ] + ); + assert_eq!( + r.physical_columns[1].decoded().to_bytes_vec(), + &[ + Some(b"Aa".to_vec()), + Some("áa".as_bytes().to_vec()), + Some(b"aaa".to_vec()), + None, + Some("áaA".as_bytes().to_vec()), + ] + ); + assert_eq!( + r.physical_columns[2].decoded().to_bytes_vec(), + &[ + Some("áa".as_bytes().to_vec()), + None, + Some("áaA".as_bytes().to_vec()), + Some(b"aaa".to_vec()), + Some(b"aa".to_vec()), + ] + ); + assert!(r.is_drained.unwrap()); + } + + #[test] + fn test_no_partition_bytes_2() { + // Order by multiple expressions with collation, data len > n. + // + // mysql> select * from test order by col1, col2, col3 limit 5; + // +--------+--------+--------+ + // | col1 | col2 | col3 | + // +--------+--------+--------+ + // | | | Aa | + // | Aa | | aaa | + // | aa | aa | | + // | aa | aaa | áaA | + // | áa | áaA | aa | + // +--------+--------+--------+ + + let src_exec = make_bytes_src_executor(); + + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(1) + .build_for_test(), + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(2) + .build_for_test(), + ], + vec![false, false, false], + vec![], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[0].decoded().to_bytes_vec(), + &[ + None, + Some(b"Aa".to_vec()), + Some(b"aa".to_vec()), + Some(b"aa".to_vec()), + Some("áa".as_bytes().to_vec()), + ] + ); + assert_eq!( + r.physical_columns[1].decoded().to_bytes_vec(), + &[ + None, + None, + Some(b"aa".to_vec()), + Some(b"aaa".to_vec()), + Some("áaA".as_bytes().to_vec()), + ] + ); + assert_eq!( + r.physical_columns[2].decoded().to_bytes_vec(), + &[ + Some(b"Aa".to_vec()), + Some(b"aaa".to_vec()), + None, + Some("áaA".as_bytes().to_vec()), + Some(b"aa".to_vec()), + ] + ); + assert!(r.is_drained.unwrap()); + } + + /// Builds an executor that will return these data: + /// + /// ```text + /// == Schema == + /// Col0 (LongLong(Unsigned)) Col1(LongLong[Signed]) Col2(Long[Unsigned]) + /// == Call #1 == + /// 18,446,744,073,709,551,615 -3 4,294,967,293 + /// NULL NULL NULL + /// 18,446,744,073,709,551,613 -1 4,294,967,295 + /// == Call #2 == + /// == Call #3 == + /// 2000 2000 2000 + /// 9,223,372,036,854,775,807 9,223,372,036,854,775,807 2,147,483,647 + /// 300 300 300 + /// 9,223,372,036,854,775,808 -9,223,372,036,854,775,808 2,147,483,648 + /// (drained) (drained) (drained) + /// ``` + fn make_src_executor_unsigned() -> MockExecutor { + MockExecutor::new( + vec![ + FieldTypeBuilder::new() + .tp(FieldTypeTp::LongLong) + .flag(FieldTypeFlag::UNSIGNED) + .into(), + FieldTypeTp::LongLong.into(), + FieldTypeBuilder::new() + .tp(FieldTypeTp::Long) + .flag(FieldTypeFlag::UNSIGNED) + .into(), + ], + vec![ + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int( + vec![ + Some(18_446_744_073_709_551_613_u64 as i64), + None, + Some(18_446_744_073_709_551_615_u64 as i64), + ] + .into(), + ), + VectorValue::Int(vec![Some(-1), None, Some(-3)].into()), + VectorValue::Int( + vec![ + Some(4_294_967_295_u32 as i64), + None, + Some(4_294_967_295_u32 as i64), + ] + .into(), + ), + ]), + logical_rows: vec![2, 1, 0], + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::empty(), + logical_rows: Vec::new(), + warnings: EvalWarnings::default(), + is_drained: Ok(false), + }, + BatchExecuteResult { + physical_columns: LazyBatchColumnVec::from(vec![ + VectorValue::Int( + vec![ + Some(300_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(2000_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(300), + Some(9_223_372_036_854_775_807), + Some(2000), + Some(-9_223_372_036_854_775_808), + ] + .into(), + ), + VectorValue::Int( + vec![ + Some(300_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2000_u32 as i64), + Some(2_147_483_648_u32 as i64), + ] + .into(), + ), + ]), + logical_rows: vec![2, 1, 0, 3], + warnings: EvalWarnings::default(), + is_drained: Ok(true), + }, + ], + ) + } + + #[test] + fn test_no_partition_top_unsigned() { + let test_top5 = |col_index: usize, is_desc: bool, expected: &[Option]| { + let src_exec = make_src_executor_unsigned(); + let mut exec = BatchPartitionTopNExecutor::new_for_test( + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(col_index) + .build_for_test(), + ], + vec![is_desc], + vec![], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[col_index].decoded().to_int_vec(), + expected + ); + assert!(r.is_drained.unwrap()); + }; + + test_top5( + 0, + false, + &[ + None, + Some(300_u64 as i64), + Some(2000_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + ], + ); + + test_top5( + 0, + true, + &[ + Some(18_446_744_073_709_551_615_u64 as i64), + Some(18_446_744_073_709_551_613_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(2000_u64 as i64), + ], + ); + + test_top5( + 1, + false, + &[ + None, + Some(-9_223_372_036_854_775_808), + Some(-3), + Some(-1), + Some(300), + ], + ); + + test_top5( + 1, + true, + &[ + Some(9_223_372_036_854_775_807), + Some(2000), + Some(300), + Some(-1), + Some(-3), + ], + ); + + test_top5( + 2, + false, + &[ + None, + Some(300_u32 as i64), + Some(2000_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2_147_483_648_u32 as i64), + ], + ); + + test_top5( + 2, + true, + &[ + Some(4_294_967_295_u32 as i64), + Some(4_294_967_295_u32 as i64), + Some(2_147_483_648_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2000_u32 as i64), + ], + ); + } + + #[test] + fn test_no_partition_top_paging() { + // Top N = 5 and PagingSize = 10, same with no-paging. + let test_top5_paging6 = |col_index: usize, is_desc: bool, expected: &[Option]| { + let mut config = EvalConfig::default(); + config.paging_size = Some(10); + let config = Arc::new(config); + let src_exec = make_src_executor_unsigned(); + let mut exec = BatchPartitionTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(col_index) + .build_for_test(), + ], + vec![is_desc], + vec![], + 5, + ); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert!(r.logical_rows.is_empty()); + assert_eq!(r.physical_columns.rows_len(), 0); + assert!(!r.is_drained.unwrap()); + + let r = block_on(exec.next_batch(1)); + assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); + assert_eq!(r.physical_columns.rows_len(), 5); + assert_eq!(r.physical_columns.columns_len(), 3); + assert_eq!( + r.physical_columns[col_index].decoded().to_int_vec(), + expected + ); + assert!(r.is_drained.unwrap()); + }; + + test_top5_paging6( + 0, + false, + &[ + None, + Some(300_u64 as i64), + Some(2000_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + ], + ); + + test_top5_paging6( + 0, + true, + &[ + Some(18_446_744_073_709_551_615_u64 as i64), + Some(18_446_744_073_709_551_613_u64 as i64), + Some(9_223_372_036_854_775_808_u64 as i64), + Some(9_223_372_036_854_775_807_u64 as i64), + Some(2000_u64 as i64), + ], + ); + + test_top5_paging6( + 1, + false, + &[ + None, + Some(-9_223_372_036_854_775_808), + Some(-3), + Some(-1), + Some(300), + ], + ); + + test_top5_paging6( + 1, + true, + &[ + Some(9_223_372_036_854_775_807), + Some(2000), + Some(300), + Some(-1), + Some(-3), + ], + ); + + test_top5_paging6( + 2, + false, + &[ + None, + Some(300_u32 as i64), + Some(2000_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2_147_483_648_u32 as i64), + ], + ); + + test_top5_paging6( + 2, + true, + &[ + Some(4_294_967_295_u32 as i64), + Some(4_294_967_295_u32 as i64), + Some(2_147_483_648_u32 as i64), + Some(2_147_483_647_u32 as i64), + Some(2000_u32 as i64), + ], + ); + + // Top N = 5 and PagingSize = 8, return all data and do nothing. + let test_top5_paging4 = |build_src_executor: fn() -> MockExecutor| { + let mut config = EvalConfig::default(); + config.paging_size = Some(8); + let config = Arc::new(config); + let src_exec = build_src_executor(); + let mut exec = BatchPartitionTopNExecutor::new_for_test_with_config( + config, + src_exec, + vec![ + RpnExpressionBuilder::new_for_test() + .push_column_ref_for_test(0) + .build_for_test(), + ], + vec![false], + vec![], + 5, + ); + let mut exec2 = build_src_executor(); + + loop { + let r1 = block_on(exec.next_batch(1)); + let r2 = block_on(exec2.next_batch(1)); + assert_eq!(r1.logical_rows, r2.logical_rows); + assert_eq!( + r1.physical_columns.rows_len(), + r2.physical_columns.rows_len() + ); + assert_eq!( + r1.physical_columns.columns_len(), + r2.physical_columns.columns_len() + ); + let r1_is_drained = r1.is_drained.unwrap(); + assert_eq!(r1_is_drained, r2.is_drained.unwrap()); + if r1_is_drained { + break; + } + } + }; + + test_top5_paging4(make_src_executor_unsigned); + test_top5_paging4(make_src_executor); + test_top5_paging4(make_bytes_src_executor); + } +} diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 392b41ff165..f4a3ea8a2ad 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -4,6 +4,7 @@ use std::{convert::TryFrom, sync::Arc}; use api_version::KvFormat; use fail::fail_point; +use itertools::Itertools; use kvproto::coprocessor::KeyRange; use protobuf::Message; use tidb_query_common::{ @@ -348,17 +349,36 @@ pub fn build_executors( order_exprs_def.push(item.take_expr()); order_is_desc.push(item.get_desc()); } + let partition_by = d + .take_partition_by() + .into_iter() + .map(|mut item| item.take_expr()) + .collect_vec(); - Box::new( - BatchTopNExecutor::new( - config.clone(), - executor, - order_exprs_def, - order_is_desc, - d.get_limit() as usize, - )? - .collect_summary(summary_slot_index), - ) + if partition_by.is_empty() { + Box::new( + BatchTopNExecutor::new( + config.clone(), + executor, + order_exprs_def, + order_is_desc, + d.get_limit() as usize, + )? + .collect_summary(summary_slot_index), + ) + } else { + Box::new( + BatchPartitionTopNExecutor::new( + config.clone(), + executor, + partition_by, + order_exprs_def, + order_is_desc, + d.get_limit() as usize, + )? + .collect_summary(summary_slot_index), + ) + } } _ => { return Err(other_err!( diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index 5ebc65baa25..dd6b7be2dba 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -38,7 +38,7 @@ pub struct BatchTopNExecutor { /// 1. `BatchTopNExecutor` is valid (i.e. not dropped). /// /// 2. The referenced `LazyBatchColumnVec` of the element must be valid, - /// which only happens when at least one of the row is in the `heap`. + /// which only happens when at least one of the row is in the `heap`. /// Note that rows may be swapped out from `heap` at any time. /// /// This field is placed before `order_exprs` and `src` because it relies on diff --git a/components/tidb_query_executors/src/util/top_n_heap.rs b/components/tidb_query_executors/src/util/top_n_heap.rs index 0cbef103e4d..57bd4b63017 100644 --- a/components/tidb_query_executors/src/util/top_n_heap.rs +++ b/components/tidb_query_executors/src/util/top_n_heap.rs @@ -51,17 +51,25 @@ impl TopNHeap { } #[allow(clippy::clone_on_copy)] - pub fn take_all(&mut self) -> LazyBatchColumnVec { + pub fn take_all_append_to(&mut self, result: &mut LazyBatchColumnVec) { let heap = std::mem::take(&mut self.heap); let sorted_items = heap.into_sorted_vec(); if sorted_items.is_empty() { - return LazyBatchColumnVec::empty(); + return; } - let mut result = sorted_items[0] - .source_data - .physical_columns - .clone_empty(sorted_items.len()); + // If it is a pure empty LazyBatchColumnVec, we need create columns on it first. + if result.columns_len() == 0 { + *result = sorted_items[0] + .source_data + .physical_columns + .clone_empty(self.heap.len()); + } + // todo: check schema is equal + assert_eq!( + result.columns_len(), + sorted_items[0].source_data.physical_columns.columns_len(), + ); for (column_index, result_column) in result.as_mut_slice().iter_mut().enumerate() { match result_column { @@ -101,6 +109,12 @@ impl TopNHeap { } result.assert_columns_equal_length(); + } + + #[allow(clippy::clone_on_copy)] + pub fn take_all(&mut self) -> LazyBatchColumnVec { + let mut result = LazyBatchColumnVec::empty(); + self.take_all_append_to(&mut result); result } } From ef09d272b13136517044125e4da3d1577fadb327 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 9 Feb 2023 08:05:58 -0800 Subject: [PATCH 0518/1149] polish the config region_split_size (#14182) ref tikv/tikv#12842 materialize the region_split_size in optimize_for Signed-off-by: qi.xu Co-authored-by: qi.xu Co-authored-by: Ti Chi Robot --- components/raftstore/src/coprocessor/config.rs | 8 ++++++-- src/config/mod.rs | 2 -- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index 3014c5c2358..137de200b71 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -130,8 +130,12 @@ impl Config { pub fn optimize_for(&mut self, raftstore_v2: bool) { // overwrite the default region_split_size when it's multi-rocksdb - if raftstore_v2 && self.region_split_size.is_none() { - self.region_split_size = Some(ReadableSize::mb(RAFTSTORE_V2_SPLIT_SIZE_MB)); + if self.region_split_size.is_none() { + if raftstore_v2 { + self.region_split_size = Some(ReadableSize::mb(RAFTSTORE_V2_SPLIT_SIZE_MB)); + } else { + self.region_split_size = Some(self.region_split_size()); + } } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 5e923023ca0..a8e15c38642 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5544,8 +5544,6 @@ mod tests { default_cfg.security.redact_info_log = Some(false); default_cfg.coprocessor.region_max_size = Some(default_cfg.coprocessor.region_max_size()); default_cfg.coprocessor.region_max_keys = Some(default_cfg.coprocessor.region_max_keys()); - default_cfg.coprocessor.region_split_size = - Some(default_cfg.coprocessor.region_split_size()); default_cfg.coprocessor.region_split_keys = Some(default_cfg.coprocessor.region_split_keys()); default_cfg.raft_store.raft_log_gc_size_limit = From 91b7a49773978c3258e48f310afa48d1e1d5c1ea Mon Sep 17 00:00:00 2001 From: zzm Date: Sat, 11 Feb 2023 14:40:00 +0800 Subject: [PATCH 0519/1149] cdc: fix cdc integration test ` test_rawkv_sacn` (#14147) close tikv/tikv#14146 make cdc integration test `test_rawkv_sacn` stable Signed-off-by: zeminzhou --- components/cdc/tests/integrations/test_cdc.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 73f46fe6427..9de1a77a8ad 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -613,16 +613,15 @@ fn test_cdc_scan_impl() { fn test_cdc_rawkv_scan() { let mut suite = TestSuite::new(3, ApiVersion::V2); - suite.set_tso(10); - suite.flush_causal_timestamp_for_region(1); let (k1, v1) = (b"rkey1".to_vec(), b"value1".to_vec()); suite.must_kv_put(1, k1, v1); let (k2, v2) = (b"rkey2".to_vec(), b"value2".to_vec()); suite.must_kv_put(1, k2, v2); - suite.set_tso(1000); + let ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); suite.flush_causal_timestamp_for_region(1); + let (k3, v3) = (b"rkey3".to_vec(), b"value3".to_vec()); suite.must_kv_put(1, k3.clone(), v3.clone()); @@ -631,7 +630,7 @@ fn test_cdc_rawkv_scan() { let mut req = suite.new_changedata_request(1); req.set_kv_api(ChangeDataRequestKvApi::RawKv); - req.set_checkpoint_ts(999); + req.set_checkpoint_ts(ts.into_inner()); let (mut req_tx, event_feed_wrap, receive_event) = new_event_feed(suite.get_region_cdc_client(1)); block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); From e05f8f3ac0083e3f663da3afb24bccb5a3c0007c Mon Sep 17 00:00:00 2001 From: Yilin Chen Date: Mon, 13 Feb 2023 10:46:01 +0800 Subject: [PATCH 0520/1149] read_pool: avoid get inconsistent sample sum and count from histogram (#14202) close tikv/tikv#14200 We may get inconsistent state when calling `get_sample_sum` and `get_sample_count` in a non-atomic fashion. This may cause unexpected calculation result. This PR calls `Histogram::metric` which uses a lock inside to guarantee consistency. And the PR also adds protective checks to avoid dividing by zero. Signed-off-by: Yilin Chen --- src/read_pool.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/read_pool.rs b/src/read_pool.rs index 8ef2c4a9b25..1488ffada15 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -15,7 +15,7 @@ use file_system::{set_io_type, IoType}; use futures::{channel::oneshot, future::TryFutureExt}; use kvproto::{errorpb, kvrpcpb::CommandPri}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; -use prometheus::{Histogram, IntCounter, IntGauge}; +use prometheus::{core::Metric, Histogram, IntCounter, IntGauge}; use resource_control::{ControlledFuture, ResourceController}; use thiserror::Error; use tikv_util::{ @@ -355,11 +355,15 @@ impl TimeSliceInspector { // Now, we simplify the problem by merging samples from all levels. If we want // more accurate answer in the future, calculate for each level separately. for hist in &inner.time_slice_hist { - new_sum += Duration::from_secs_f64(hist.get_sample_sum()); - new_count += hist.get_sample_count(); + // Call `metric` to get a consistent snapshot of sum and count. + let metric_proto = hist.metric(); + let hist_proto = metric_proto.get_histogram(); + new_sum += Duration::from_secs_f64(hist_proto.get_sample_sum()); + new_count += hist_proto.get_sample_count(); } - let time_diff = new_sum - inner.last_sum; - if time_diff < MIN_TIME_DIFF { + let time_diff = new_sum.saturating_sub(inner.last_sum); + let count_diff = new_count.saturating_sub(inner.last_count); + if time_diff < MIN_TIME_DIFF || count_diff == 0 { return; } let new_val = time_diff / ((new_count - inner.last_count) as u32); From 61c5f1caf74aaeec36e0fd46fc137af672b37451 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 13 Feb 2023 13:52:00 +0800 Subject: [PATCH 0521/1149] *: fix tablet leak in flow control (#14197) close tikv/tikv#14196 Also add a tool to trace tablet leak. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + Cargo.toml | 1 + cmd/tikv-server/Cargo.toml | 1 + components/engine_rocks/Cargo.toml | 1 + components/engine_rocks/src/engine.rs | 148 +++++++++++++++++- components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/tablet.rs | 5 +- src/server/status_server/mod.rs | 16 ++ .../singleton_flow_controller.rs | 86 ++++++---- .../flow_controller/tablet_flow_controller.rs | 72 ++++++--- 10 files changed, 267 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 046220980bc..6974e776935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1616,6 +1616,7 @@ dependencies = [ "file_system", "keys", "kvproto", + "lazy_static", "log_wrappers", "protobuf", "raft", diff --git a/Cargo.toml b/Cargo.toml index 29337b4a002..bab7869a9d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ publish = false [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] +trace-tablet-lifetime = ["engine_rocks/trace-lifetime"] tcmalloc = ["tikv_alloc/tcmalloc"] jemalloc = ["tikv_alloc/jemalloc", "engine_rocks/jemalloc"] mimalloc = ["tikv_alloc/mimalloc"] diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index c5b5cb6403c..4bba926a68e 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -7,6 +7,7 @@ publish = false [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] +trace-tablet-lifetime = ["tikv/trace-tablet-lifetime"] tcmalloc = ["server/tcmalloc"] jemalloc = ["server/jemalloc"] mimalloc = ["server/mimalloc"] diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index 6775705e3e1..4c2b7bf5a52 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -5,6 +5,7 @@ edition = "2018" publish = false [features] +trace-lifetime = [] jemalloc = ["rocksdb/jemalloc"] portable = ["rocksdb/portable"] sse = ["rocksdb/sse"] diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 0e73de357e5..de29e676277 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -10,21 +10,154 @@ use crate::{ RocksEngineIterator, RocksSnapshot, }; +#[cfg(feature = "trace-lifetime")] +mod trace { + //! Trace tools for tablets. + //! + //! It's hard to know who is holding the rocksdb reference when trying to + //! debug why the tablet is not deleted. The module will record the + //! backtrace and thread name when the tablet is created or clone. So + //! after print all the backtrace, we can easily figure out who is + //! leaking the tablet. + //! + //! To use the feature, you need to compile tikv-server with + //! trace-tabelt-lifetime feature. For example, `env + //! ENABLE_FEATURES=trace-tablet-lifetime make release`. And then query the trace information by `curl http://ip:status_port/region/id?trace-tablet=1`. + + use std::{ + backtrace::Backtrace, + collections::BTreeMap, + ops::Bound::Included, + sync::{ + atomic::{AtomicU64, Ordering}, + Mutex, + }, + }; + + use rocksdb::DB; + + static CNT: AtomicU64 = AtomicU64::new(0); + + fn inc_id() -> u64 { + CNT.fetch_add(1, Ordering::Relaxed) + } + + struct BacktraceInfo { + bt: Backtrace, + name: String, + } + + impl BacktraceInfo { + fn default() -> Self { + BacktraceInfo { + bt: Backtrace::force_capture(), + name: std::thread::current().name().unwrap_or("").to_string(), + } + } + } + + #[derive(PartialEq, PartialOrd, Eq, Ord, Clone, Copy, Default, Debug)] + struct TabletTraceKey { + region_id: u64, + suffix: u64, + addr: u64, + alloc_id: u64, + } + + lazy_static::lazy_static! { + static ref TABLET_TRACE: Mutex> = Mutex::new(BTreeMap::default()); + } + + pub fn list(id: u64) -> Vec { + let min = TabletTraceKey { + region_id: id, + suffix: 0, + addr: 0, + alloc_id: 0, + }; + let max = TabletTraceKey { + region_id: id, + suffix: u64::MAX, + addr: u64::MAX, + alloc_id: u64::MAX, + }; + let traces = TABLET_TRACE.lock().unwrap(); + traces + .range((Included(min), Included(max))) + .map(|(k, v)| { + format!( + "{}_{} {} {} {}", + k.region_id, k.suffix, k.addr, v.name, v.bt + ) + }) + .collect() + } + + #[derive(Debug)] + pub struct TabletTraceId(TabletTraceKey); + + impl TabletTraceId { + pub fn new(path: &str, db: &DB) -> Self { + let mut name = path.split('/'); + let name = name.next_back().unwrap(); + let parts: Vec<_> = name.split('_').collect(); + if parts.len() == 2 { + let id: u64 = parts[0].parse().unwrap(); + let suffix: u64 = parts[1].parse().unwrap(); + let bt = BacktraceInfo::default(); + let key = TabletTraceKey { + region_id: id, + suffix, + addr: db as *const _ as u64, + alloc_id: inc_id(), + }; + TABLET_TRACE.lock().unwrap().insert(key, bt); + Self(key) + } else { + Self(Default::default()) + } + } + } + + impl Clone for TabletTraceId { + fn clone(&self) -> Self { + if self.0.region_id != 0 { + let bt = BacktraceInfo::default(); + let mut key = self.0; + key.alloc_id = inc_id(); + TABLET_TRACE.lock().unwrap().insert(key, bt); + Self(key) + } else { + Self(self.0) + } + } + } + + impl Drop for TabletTraceId { + fn drop(&mut self) { + if self.0.region_id != 0 { + TABLET_TRACE.lock().unwrap().remove(&self.0); + } + } + } +} + #[derive(Clone, Debug)] pub struct RocksEngine { db: Arc, support_multi_batch_write: bool, + #[cfg(feature = "trace-lifetime")] + _id: trace::TabletTraceId, } impl RocksEngine { pub(crate) fn new(db: DB) -> RocksEngine { - RocksEngine::from_db(Arc::new(db)) - } - - pub fn from_db(db: Arc) -> Self { + let db = Arc::new(db); RocksEngine { - db: db.clone(), support_multi_batch_write: db.get_db_options().is_enable_multi_batch_write(), + #[cfg(feature = "trace-lifetime")] + _id: trace::TabletTraceId::new(db.path(), &db), + db, } } @@ -39,6 +172,11 @@ impl RocksEngine { pub fn support_multi_batch_write(&self) -> bool { self.support_multi_batch_write } + + #[cfg(feature = "trace-lifetime")] + pub fn trace(region_id: u64) -> Vec { + trace::list(region_id) + } } impl KvEngine for RocksEngine { diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 00b3bb97b66..f235a4d545e 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -15,6 +15,7 @@ fail = "0.5" file_system = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } +lazy_static = "1.0" log_wrappers = { workspace = true } protobuf = "2" raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 6bdfa97a6e6..2c2eb290b0e 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -44,10 +44,11 @@ impl CachedTablet { CachedTablet { latest: Arc::new(LatestTablet { data: Mutex::new(data.clone()), - version: AtomicU64::new(0), + version: AtomicU64::new(1), }), cache: data, - version: 0, + // We use 0 in release, so it needs to be intialized to 1. + version: 1, } } diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index ad7779b121c..2beed27de8b 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -476,6 +476,22 @@ where )); } }; + + #[cfg(feature = "trace-tablet-lifetime")] + let body = { + let query = req.uri().query().unwrap_or(""); + let query_pairs: HashMap<_, _> = + url::form_urlencoded::parse(query.as_bytes()).collect(); + + let mut body = body; + if query_pairs.contains_key("trace-tablet") { + for s in engine_rocks::RocksEngine::trace(id) { + body.push(b'\n'); + body.extend_from_slice(s.as_bytes()); + } + }; + body + }; match Response::builder() .header("content-type", "application/json") .body(hyper::Body::from(body)) diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 2b36d6d8821..801d3d27280 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -442,8 +442,43 @@ impl Default for CfFlowChecker { } } +pub trait FlowControlFactorStore { + fn num_files_at_level(&self, region_id: u64, cf: &str, level: usize) -> u64; + fn num_immutable_mem_table(&self, region_id: u64, cf: &str) -> u64; + fn pending_compaction_bytes(&self, region_id: u64, cf: &str) -> u64; + fn cf_names(&self, region_id: u64) -> Vec; +} + +impl FlowControlFactorStore for E { + fn cf_names(&self, _region_id: u64) -> Vec { + CfNamesExt::cf_names(self) + .iter() + .map(|v| v.to_string()) + .collect() + } + + fn num_files_at_level(&self, _region_id: u64, cf: &str, level: usize) -> u64 { + match self.get_cf_num_files_at_level(cf, level) { + Ok(Some(n)) => n, + _ => 0, + } + } + fn num_immutable_mem_table(&self, _region_id: u64, cf: &str) -> u64 { + match self.get_cf_num_immutable_mem_table(cf) { + Ok(Some(n)) => n, + _ => 0, + } + } + fn pending_compaction_bytes(&self, _region_id: u64, cf: &str) -> u64 { + match self.get_cf_pending_compaction_bytes(cf) { + Ok(Some(n)) => n, + _ => 0, + } + } +} + #[derive(CopyGetters, Setters)] -pub(super) struct FlowChecker { +pub(super) struct FlowChecker { pub soft_pending_compaction_bytes_limit: u64, hard_pending_compaction_bytes_limit: u64, memtables_threshold: u64, @@ -469,34 +504,38 @@ pub(super) struct FlowChecker FlowChecker { +impl FlowChecker { pub fn new( config: &FlowControlConfig, engine: E, discard_ratio: Arc, limiter: Arc, ) -> Self { - Self::new_with_tablet_suffix(config, engine, discard_ratio, limiter, 0) + Self::new_with_region_id(0, 0, config, engine, discard_ratio, limiter) } - pub fn new_with_tablet_suffix( + pub fn new_with_region_id( + region_id: u64, + tablet_suffix: u64, config: &FlowControlConfig, engine: E, discard_ratio: Arc, limiter: Arc, - tablet_suffix: u64, ) -> Self { let cf_checkers = engine - .cf_names() + .cf_names(region_id) .into_iter() - .map(|cf| (cf.to_owned(), CfFlowChecker::default())) + .map(|cf_name| (cf_name, CfFlowChecker::default())) .collect(); Self { + region_id, + tablet_suffix, soft_pending_compaction_bytes_limit: config.soft_pending_compaction_bytes_limit.0, hard_pending_compaction_bytes_limit: config.hard_pending_compaction_bytes_limit.0, memtables_threshold: config.memtables_threshold, @@ -510,7 +549,6 @@ impl FlowChecker { last_record_time: Instant::now_coarse(), last_speed: 0.0, wait_for_destroy_range_finish: false, - tablet_suffix, } } @@ -568,11 +606,8 @@ impl FlowChecker { for (cf, cf_checker) in &mut self.cf_checkers { if let Some(before) = cf_checker.pending_bytes_before_unsafe_destroy_range { let soft = (self.soft_pending_compaction_bytes_limit as f64).log2(); - let after = (self - .engine - .get_cf_pending_compaction_bytes(cf) - .unwrap_or(None) - .unwrap_or(0) as f64) + let after = (self.engine.pending_compaction_bytes(self.region_id, cf) + as f64) .log2(); assert!(before < soft); @@ -691,12 +726,7 @@ impl FlowChecker { // Because pending compaction bytes changes dramatically, take the // logarithm of pending compaction bytes to make the values fall into // a relative small range - let num = (self - .engine - .get_cf_pending_compaction_bytes(&cf) - .unwrap_or(None) - .unwrap_or(0) as f64) - .log2(); + let num = (self.engine.pending_compaction_bytes(self.region_id, &cf) as f64).log2(); let checker = self.cf_checkers.get_mut(&cf).unwrap(); checker.long_term_pending_bytes.observe(num); SCHED_PENDING_COMPACTION_BYTES_GAUGE @@ -756,11 +786,7 @@ impl FlowChecker { } fn on_memtable_change(&mut self, cf: &str) { - let num_memtables = self - .engine - .get_cf_num_immutable_mem_table(cf) - .unwrap_or(None) - .unwrap_or(0); + let num_memtables = self.engine.num_immutable_mem_table(self.region_id, cf); let checker = self.cf_checkers.get_mut(cf).unwrap(); SCHED_MEMTABLE_GAUGE .with_label_values(&[cf]) @@ -839,11 +865,7 @@ impl FlowChecker { } fn collect_l0_consumption_stats(&mut self, cf: &str, l0_bytes: u64) { - let num_l0_files = self - .engine - .get_cf_num_files_at_level(cf, 0) - .unwrap_or(None) - .unwrap_or(0); + let num_l0_files = self.engine.num_files_at_level(self.region_id, cf, 0); let checker = self.cf_checkers.get_mut(cf).unwrap(); checker.last_l0_bytes += l0_bytes; checker.long_term_num_l0_files.observe(num_l0_files); @@ -856,11 +878,7 @@ impl FlowChecker { } fn collect_l0_production_stats(&mut self, cf: &str, flush_bytes: u64) { - let num_l0_files = self - .engine - .get_cf_num_files_at_level(cf, 0) - .unwrap_or(None) - .unwrap_or(0); + let num_l0_files = self.engine.num_files_at_level(self.region_id, cf, 0); let checker = self.cf_checkers.get_mut(cf).unwrap(); checker.last_flush_bytes += flush_bytes; diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index 922e986874a..556b5f4a8fa 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -17,9 +17,48 @@ use engine_traits::{CfNamesExt, FlowControlFactorsExt, TabletRegistry}; use rand::Rng; use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; -use super::singleton_flow_controller::{FlowChecker, Msg, RATIO_SCALE_FACTOR, TICK_DURATION}; +use super::singleton_flow_controller::{ + FlowChecker, FlowControlFactorStore, Msg, RATIO_SCALE_FACTOR, TICK_DURATION, +}; use crate::storage::config::FlowControlConfig; +pub struct TabletFlowFactorStore { + registry: TabletRegistry, +} + +impl TabletFlowFactorStore { + pub fn new(registry: TabletRegistry) -> Self { + Self { registry } + } + + fn query(&self, region_id: u64, f: impl Fn(&EK) -> engine_traits::Result>) -> u64 { + self.registry + .get(region_id) + .and_then(|mut c| c.latest().and_then(|t| f(t).ok().flatten())) + .unwrap_or(0) + } +} + +impl FlowControlFactorStore + for TabletFlowFactorStore +{ + fn cf_names(&self, _region_id: u64) -> Vec { + engine_traits::DATA_CFS + .iter() + .map(|s| s.to_string()) + .collect() + } + fn num_files_at_level(&self, region_id: u64, cf: &str, level: usize) -> u64 { + self.query(region_id, |t| t.get_cf_num_files_at_level(cf, level)) + } + fn num_immutable_mem_table(&self, region_id: u64, cf: &str) -> u64 { + self.query(region_id, |t| t.get_cf_num_immutable_mem_table(cf)) + } + fn pending_compaction_bytes(&self, region_id: u64, cf: &str) -> u64 { + self.query(region_id, |t| t.get_cf_pending_compaction_bytes(cf)) + } +} + type Limiters = Arc, Arc)>>>; pub struct TabletFlowController { enabled: Arc, @@ -59,8 +98,7 @@ impl TabletFlowController { Msg::Disable }) .unwrap(); - let flow_checkers: Arc>>> = - Arc::new(RwLock::new(HashMap::default())); + let flow_checkers = Arc::new(RwLock::new(HashMap::default())); let limiters: Limiters = Arc::new(RwLock::new(HashMap::default())); Self { enabled: Arc::new(AtomicBool::new(config.enable)), @@ -90,7 +128,7 @@ impl FlowInfoDispatcher { rx: Receiver, flow_info_receiver: Receiver, registry: TabletRegistry, - flow_checkers: Arc>>>, + flow_checkers: Arc>>>>, limiters: Limiters, config: FlowControlConfig, ) -> JoinHandle<()> { @@ -139,14 +177,10 @@ impl FlowInfoDispatcher { } Ok(FlowInfo::Created(region_id, suffix)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); - let checker = match checkers.entry(region_id) { + match checkers.entry(region_id) { HashMapEntry::Occupied(e) => e.into_mut(), HashMapEntry::Vacant(e) => { - let engine = if let Some(mut c) = registry.get(region_id) && let Some(t) = c.latest() { - t.clone() - } else { - continue; - }; + let engine = TabletFlowFactorStore::new(registry.clone()); let mut v = limiters.as_ref().write().unwrap(); let discard_ratio = Arc::new(AtomicU32::new(0)); let limiter = v.entry(region_id).or_insert(( @@ -157,26 +191,16 @@ impl FlowInfoDispatcher { ), discard_ratio, )); - e.insert(FlowChecker::new_with_tablet_suffix( + e.insert(FlowChecker::new_with_region_id( + region_id, + suffix, &config, engine, limiter.1.clone(), limiter.0.clone(), - suffix, )) - }, - }; - // check if the checker's engine is exactly (region_id, suffix) - // if checker.suffix < suffix, it means its tablet is old and needs the - // refresh - if checker.tablet_suffix() < suffix { - let cached = registry.get(region_id); - // None means the region is destroyed. - if let Some(mut c) = cached && let Some(engine) = c.latest() { - checker.set_engine(engine.clone()); - checker.set_tablet_suffix(suffix); } - } + }; } Ok(FlowInfo::Destroyed(region_id, suffix)) => { let mut remove_limiter = false; From c9cebe6cd44255018202d2e56023df096038bc09 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 13 Feb 2023 16:20:01 +0800 Subject: [PATCH 0522/1149] raftstore-v2: integration test (#13989) ref tikv/tikv#12842 Implement integration test framework v2 Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- Cargo.lock | 49 + Cargo.toml | 4 +- .../cdc/tests/failpoints/test_observe.rs | 2 +- .../tests/integrations/test_flow_control.rs | 2 +- components/cdc/tests/mod.rs | 2 +- components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/tablet.rs | 7 + components/raftstore-v2/src/lib.rs | 2 +- components/raftstore-v2/src/router/imp.rs | 12 + components/raftstore/src/store/snap.rs | 5 + components/resolved_ts/tests/mod.rs | 2 +- components/test_backup/src/lib.rs | 2 +- components/test_raftstore-v2/Cargo.toml | 68 + components/test_raftstore-v2/src/cluster.rs | 1518 +++++++++++++++++ components/test_raftstore-v2/src/lib.rs | 9 + components/test_raftstore-v2/src/node.rs | 423 +++++ components/test_raftstore-v2/src/server.rs | 726 ++++++++ .../src/transport_simulate.rs | 128 ++ components/test_raftstore-v2/src/util.rs | 191 +++ components/test_raftstore/src/cluster.rs | 47 +- components/test_raftstore/src/lib.rs | 2 +- .../test_raftstore/src/transport_simulate.rs | 2 +- components/test_raftstore/src/util.rs | 96 +- components/test_raftstore_macro/src/lib.rs | 10 +- src/server/engine_factory.rs | 8 + src/server/service/kv.rs | 6 + src/server/tablet_snap.rs | 39 + tests/Cargo.toml | 1 + tests/failpoints/cases/test_async_io.rs | 4 +- .../cases/test_cmd_epoch_checker.rs | 4 +- tests/failpoints/cases/test_conf_change.rs | 2 +- tests/failpoints/cases/test_early_apply.rs | 2 +- tests/failpoints/cases/test_merge.rs | 38 +- tests/failpoints/cases/test_pending_peers.rs | 4 +- tests/failpoints/cases/test_rawkv.rs | 2 +- tests/failpoints/cases/test_replica_read.rs | 22 +- .../cases/test_replica_stale_read.rs | 4 +- tests/failpoints/cases/test_snap.rs | 18 +- tests/failpoints/cases/test_split_region.rs | 10 +- tests/failpoints/cases/test_stale_peer.rs | 2 +- tests/failpoints/cases/test_stale_read.rs | 12 +- .../raftstore/test_conf_change.rs | 311 ++-- .../integrations/raftstore/test_flashback.rs | 4 +- .../integrations/raftstore/test_lease_read.rs | 24 +- tests/integrations/raftstore/test_merge.rs | 95 +- tests/integrations/raftstore/test_prevote.rs | 2 +- .../raftstore/test_region_info_accessor.rs | 2 +- .../raftstore/test_replica_read.rs | 18 +- tests/integrations/raftstore/test_single.rs | 54 +- tests/integrations/raftstore/test_snap.rs | 6 +- .../raftstore/test_split_region.rs | 286 ++-- .../raftstore/test_transfer_leader.rs | 145 +- .../raftstore/test_unsafe_recovery.rs | 4 +- tests/integrations/storage/test_raftkv.rs | 2 +- 54 files changed, 3774 insertions(+), 667 deletions(-) create mode 100644 components/test_raftstore-v2/Cargo.toml create mode 100644 components/test_raftstore-v2/src/cluster.rs create mode 100644 components/test_raftstore-v2/src/lib.rs create mode 100644 components/test_raftstore-v2/src/node.rs create mode 100644 components/test_raftstore-v2/src/server.rs create mode 100644 components/test_raftstore-v2/src/transport_simulate.rs create mode 100644 components/test_raftstore-v2/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index 6974e776935..6083f14bad7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5942,6 +5942,54 @@ dependencies = [ "txn_types", ] +[[package]] +name = "test_raftstore-v2" +version = "0.0.1" +dependencies = [ + "api_version", + "backtrace", + "causal_ts", + "collections", + "concurrency_manager", + "crossbeam", + "encryption_export", + "engine_rocks", + "engine_rocks_helper", + "engine_test", + "engine_traits", + "fail", + "file_system", + "futures 0.3.15", + "grpcio", + "grpcio-health", + "keys", + "kvproto", + "lazy_static", + "log_wrappers", + "pd_client", + "protobuf", + "raft", + "raftstore", + "raftstore-v2", + "rand 0.8.5", + "resolved_ts", + "resource_control", + "resource_metering", + "security", + "server", + "slog", + "slog-global", + "tempfile", + "test_pd_client", + "test_raftstore", + "test_util", + "tikv", + "tikv_util", + "tokio", + "tokio-timer", + "txn_types", +] + [[package]] name = "test_raftstore_macro" version = "0.0.1" @@ -6062,6 +6110,7 @@ dependencies = [ "test_pd", "test_pd_client", "test_raftstore", + "test_raftstore-v2", "test_raftstore_macro", "test_sst_importer", "test_storage", diff --git a/Cargo.toml b/Cargo.toml index bab7869a9d0..63be8944f5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ cloud-azure = [ "encryption_export/cloud-azure", "sst_importer/cloud-azure", ] -testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport"] +testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport"] test-engine-kv-rocksdb = [ "engine_test/test-engine-kv-rocksdb" ] @@ -281,6 +281,7 @@ members = [ "components/test_pd", "components/test_pd_client", "components/test_raftstore", + "components/test_raftstore-v2", "components/test_raftstore_macro", "components/test_sst_importer", "components/test_storage", @@ -357,6 +358,7 @@ example_coprocessor_plugin = { path = "components/test_coprocessor_plugin/exampl test_pd = { path = "components/test_pd" } test_pd_client = { path = "components/test_pd_client" } test_raftstore = { path = "components/test_raftstore", default-features = false } +test_raftstore-v2 = { path = "components/test_raftstore-v2", default-features = false } test_raftstore_macro = { path = "components/test_raftstore_macro" } test_sst_importer = { path = "components/test_sst_importer" } test_storage = { path = "components/test_storage", default-features = false } diff --git a/components/cdc/tests/failpoints/test_observe.rs b/components/cdc/tests/failpoints/test_observe.rs index 8c418558dcc..480fcc4582f 100644 --- a/components/cdc/tests/failpoints/test_observe.rs +++ b/components/cdc/tests/failpoints/test_observe.rs @@ -130,7 +130,7 @@ fn test_observe_duplicate_cmd_impl() { #[allow(dead_code)] fn test_delayed_change_cmd() { let mut cluster = new_server_cluster(1, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(20)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(100); cluster.pd_client.disable_default_operator(); let mut suite = TestSuiteBuilder::new().cluster(cluster).build(); diff --git a/components/cdc/tests/integrations/test_flow_control.rs b/components/cdc/tests/integrations/test_flow_control.rs index 56cb43e06c4..fdfd136d9c7 100644 --- a/components/cdc/tests/integrations/test_flow_control.rs +++ b/components/cdc/tests/integrations/test_flow_control.rs @@ -15,7 +15,7 @@ use crate::{new_event_feed, TestSuiteBuilder}; fn test_cdc_congest() { let mut cluster = new_server_cluster(1, 1); // Increase the Raft tick interval to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(100), None); + configure_for_lease_read(&mut cluster.cfg, Some(100), None); let memory_quota = 1024; // 1KB let mut suite = TestSuiteBuilder::new() .cluster(cluster) diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 77e50bb10b2..843b6b2f1d0 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -236,7 +236,7 @@ impl TestSuite { pub fn new(count: usize, api_version: ApiVersion) -> TestSuite { let mut cluster = new_server_cluster_with_api_ver(1, count, api_version); // Increase the Raft tick interval to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(100), None); + configure_for_lease_read(&mut cluster.cfg, Some(100), None); // Disable background renew to make timestamp predictable. configure_for_causal_ts(&mut cluster, "0s", 1); diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index f235a4d545e..664bc72afc5 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -6,6 +6,7 @@ publish = false [features] failpoints = ["fail/failpoints"] +testexport = [] [dependencies] case_macros = { workspace = true } diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 2c2eb290b0e..79512a99f64 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -13,6 +13,8 @@ use collections::HashMap; use kvproto::metapb::Region; use tikv_util::box_err; +#[cfg(any(test, feature = "testexport"))] +use crate::StateStorage; use crate::{Error, FlushState, Result}; #[derive(Debug)] @@ -146,6 +148,11 @@ pub trait TabletFactory: Send + Sync { /// Check if the tablet with specified path exists fn exists(&self, path: &Path) -> bool; + + #[cfg(any(test, feature = "testexport"))] + fn set_state_storage(&self, _: Arc) { + unimplemented!() + } } pub struct SingletonFactory { diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index b82b6de3931..8af6b57e9bc 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -39,6 +39,6 @@ pub(crate) use batch::StoreContext; pub use batch::{create_store_batch_system, StoreRouter, StoreSystem}; pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; -pub use operation::{SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; +pub use operation::{write_initial_states, SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; pub use worker::pd::{PdReporter, Task as PdTask}; diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 315f8a0d8eb..a9a8b23b571 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -156,4 +156,16 @@ impl RaftRouter { { self.local_reader.snapshot(req) } + + #[cfg(any(test, feature = "testexport"))] + pub fn new_with_store_meta( + router: StoreRouter, + store_meta: Arc>>, + ) -> Self { + let logger = router.logger().clone(); + RaftRouter { + router: router.clone(), + local_reader: LocalReader::new(store_meta, router, logger), + } + } } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 358ec716195..5f971818e9a 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2021,6 +2021,11 @@ impl TabletSnapManager { } Ok(total_size) } + + #[inline] + pub fn root_path(&self) -> &Path { + self.base.as_path() + } } #[cfg(test)] diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index e8d2a6429ba..314a11db1a2 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -40,7 +40,7 @@ impl TestSuite { pub fn new(count: usize) -> Self { let mut cluster = new_server_cluster(1, count); // Increase the Raft tick interval to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(100), None); + configure_for_lease_read(&mut cluster.cfg, Some(100), None); Self::with_cluster(count, cluster) } diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 3409a6ef366..cb669070b9e 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -73,7 +73,7 @@ impl TestSuite { pub fn new(count: usize, sst_max_size: u64, api_version: ApiVersion) -> TestSuite { let mut cluster = new_server_cluster_with_api_ver(1, count, api_version); // Increase the Raft tick interval to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(100), None); + configure_for_lease_read(&mut cluster.cfg, Some(100), None); cluster.run(); let mut endpoints = HashMap::default(); diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml new file mode 100644 index 00000000000..9ccfdb93cfe --- /dev/null +++ b/components/test_raftstore-v2/Cargo.toml @@ -0,0 +1,68 @@ +[package] +name = "test_raftstore-v2" +version = "0.0.1" +edition = "2018" +publish = false + +[features] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] +cloud-aws = ["encryption_export/cloud-aws"] +cloud-gcp = ["encryption_export/cloud-gcp"] +cloud-azure = ["encryption_export/cloud-azure"] +test-engine-kv-rocksdb = [ + "raftstore/test-engine-kv-rocksdb" +] +test-engine-raft-raft-engine = [ + "raftstore/test-engine-raft-raft-engine" +] +test-engines-rocksdb = [ + "raftstore/test-engines-rocksdb", +] +test-engines-panic = [ + "raftstore/test-engines-panic", +] + +[dependencies] +api_version = { workspace = true } +backtrace = "0.3" +causal_ts = { workspace = true, features = ["testexport"] } +collections = { workspace = true } +concurrency_manager = { workspace = true } +crossbeam = "0.8" +encryption_export = { workspace = true } +engine_rocks = { workspace = true } +engine_rocks_helper = { workspace = true } +engine_test = { workspace = true } +engine_traits = { workspace = true } +fail = "0.5" +file_system = { workspace = true } +futures = "0.3" +grpcio = { workspace = true } +grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } +keys = { workspace = true } +kvproto = { git = "https://github.com/pingcap/kvproto.git" } +lazy_static = "1.3" +log_wrappers = { workspace = true } +pd_client = { workspace = true } +protobuf = { version = "2.8", features = ["bytes"] } +raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raftstore = { workspace = true, features = ["testexport"] } +raftstore-v2 = { workspace = true, features = ["testexport"] } +rand = "0.8" +resolved_ts = { workspace = true } +resource_control = { workspace = true } +resource_metering = { workspace = true } +security = { workspace = true } +server = { workspace = true } +slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +# better to not use slog-global, but pass in the logger +slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +tempfile = "3.0" +test_pd_client = { workspace = true } +test_raftstore = { workspace = true } +test_util = { workspace = true } +tikv = { workspace = true } +tikv_util = { workspace = true } +tokio = { version = "1.5", features = ["rt-multi-thread"] } +tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +txn_types = { workspace = true } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs new file mode 100644 index 00000000000..c935040055f --- /dev/null +++ b/components/test_raftstore-v2/src/cluster.rs @@ -0,0 +1,1518 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::hash_map::Entry as MapEntry, + result, + sync::{Arc, Mutex, RwLock}, + thread, + time::Duration, +}; + +use collections::{HashMap, HashSet}; +use encryption_export::DataKeyManager; +use engine_rocks::{RocksDbVector, RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_test::raft::RaftTestEngine; +use engine_traits::{ + Iterable, KvEngine, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, + ReadOptions, SyncMutable, TabletRegistry, CF_DEFAULT, +}; +use file_system::IoRateLimiter; +use futures::{compat::Future01CompatExt, executor::block_on, select, FutureExt}; +use keys::data_key; +use kvproto::{ + errorpb::Error as PbError, + kvrpcpb::ApiVersion, + metapb::{self, Buckets, PeerRole, RegionEpoch}, + raft_cmdpb::{ + AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RegionDetailResponse, Request, + Response, StatusCmdType, + }, + raft_serverpb::{PeerState, RaftApplyState, RegionLocalState, StoreIdent}, +}; +use pd_client::PdClient; +use raftstore::{ + store::{ + cmd_resp, initial_region, util::check_key_in_region, Bucket, BucketRange, Callback, + RegionSnapshot, WriteResponse, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, + }, + Error, Result, +}; +use raftstore_v2::{ + router::{PeerMsg, QueryResult}, + write_initial_states, SimpleWriteEncoder, StoreMeta, StoreRouter, +}; +use resource_control::ResourceGroupManager; +use tempfile::TempDir; +use test_pd_client::TestPdClient; +use test_raftstore::{ + is_error_response, new_admin_request, new_delete_cmd, new_delete_range_cmd, new_get_cf_cmd, + new_peer, new_put_cf_cmd, new_region_detail_cmd, new_region_leader_cmd, new_request, + new_snap_cmd, new_status_request, new_store, new_tikv_config_with_api_ver, + new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, PartitionFilterFactory, + RawEngine, +}; +use tikv::server::Result as ServerResult; +use tikv_util::{ + box_err, box_try, debug, error, safe_panic, thread_group::GroupProperties, time::Instant, + timer::GLOBAL_TIMER_HANDLE, warn, worker::LazyWorker, HandyRwLock, +}; + +use crate::create_test_engine; + +// We simulate 3 or 5 nodes, each has a store. +// Sometimes, we use fixed id to test, which means the id +// isn't allocated by pd, and node id, store id are same. +// E,g, for node 1, the node id and store id are both 1. +pub trait Simulator { + // Pass 0 to let pd allocate a node id if db is empty. + // If node id > 0, the node must be created in db already, + // and the node id must be the same as given argument. + // Return the node id. + // TODO: we will rename node name here because now we use store only. + fn run_node( + &mut self, + node_id: u64, + cfg: Config, + store_meta: Arc>>, + raft_engine: RaftTestEngine, + tablet_registry: TabletRegistry, + resource_manager: &Option>, + ) -> ServerResult; + + fn stop_node(&mut self, node_id: u64); + fn get_node_ids(&self) -> HashSet; + fn add_send_filter(&mut self, node_id: u64, filter: Box); + fn clear_send_filters(&mut self, node_id: u64); + fn get_router(&self, node_id: u64) -> Option>; + fn get_snap_dir(&self, node_id: u64) -> String; + + fn read(&mut self, request: RaftCmdRequest, timeout: Duration) -> Result { + let mut req_clone = request.clone(); + req_clone.clear_requests(); + req_clone.mut_requests().push(new_snap_cmd()); + match self.snapshot(req_clone, timeout) { + Ok(snap) => { + let requests = request.get_requests(); + let mut response = RaftCmdResponse::default(); + let mut responses = Vec::with_capacity(requests.len()); + for req in requests { + let cmd_type = req.get_cmd_type(); + match cmd_type { + CmdType::Get => { + let mut resp = Response::default(); + let key = req.get_get().get_key(); + let cf = req.get_get().get_cf(); + let region = snap.get_region(); + + if let Err(e) = check_key_in_region(key, region) { + return Ok(cmd_resp::new_error(e)); + } + + let res = if cf.is_empty() { + snap.get_value(key).unwrap_or_else(|e| { + panic!( + "[region {}] failed to get {} with cf {}: {:?}", + snap.get_region().get_id(), + log_wrappers::Value::key(key), + cf, + e + ) + }) + } else { + snap.get_value_cf(cf, key).unwrap_or_else(|e| { + panic!( + "[region {}] failed to get {}: {:?}", + snap.get_region().get_id(), + log_wrappers::Value::key(key), + e + ) + }) + }; + if let Some(res) = res { + resp.mut_get().set_value(res.to_vec()); + } + resp.set_cmd_type(cmd_type); + responses.push(resp); + } + _ => unimplemented!(), + } + } + response.set_responses(responses.into()); + + Ok(response) + } + Err(e) => Ok(e), + } + } + + fn snapshot( + &mut self, + request: RaftCmdRequest, + timeout: Duration, + ) -> std::result::Result::Snapshot>, RaftCmdResponse>; + + fn async_peer_msg_on_node(&self, node_id: u64, region_id: u64, msg: PeerMsg) -> Result<()>; + + fn call_query(&self, request: RaftCmdRequest, timeout: Duration) -> Result { + let node_id = request.get_header().get_peer().get_store_id(); + self.call_query_on_node(node_id, request, timeout) + } + + fn call_query_on_node( + &self, + node_id: u64, + request: RaftCmdRequest, + timeout: Duration, + ) -> Result { + let region_id = request.get_header().get_region_id(); + let (msg, sub) = PeerMsg::raft_query(request); + match self.async_peer_msg_on_node(node_id, region_id, msg) { + Ok(()) => {} + Err(e) => { + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(e.into()); + return Ok(resp); + } + } + + let timeout_f = GLOBAL_TIMER_HANDLE.delay(std::time::Instant::now() + timeout); + // todo: unwrap? + match block_on(async move { + select! { + res = sub.result().fuse() => Ok(res.unwrap()), + _ = timeout_f.compat().fuse() => Err(Error::Timeout(format!("request timeout for {:?}", timeout))), + + } + }).unwrap() { + QueryResult::Read(_) => unreachable!(), + QueryResult::Response(resp) => Ok(resp), + } + } + + fn call_command(&self, request: RaftCmdRequest, timeout: Duration) -> Result { + let node_id = request.get_header().get_peer().get_store_id(); + self.call_command_on_node(node_id, request, timeout) + } + + fn call_command_on_node( + &self, + node_id: u64, + mut request: RaftCmdRequest, + timeout: Duration, + ) -> Result { + let region_id = request.get_header().get_region_id(); + + let (msg, sub) = if request.has_admin_request() { + PeerMsg::admin_command(request) + } else { + let requests = request.get_requests(); + let mut write_encoder = SimpleWriteEncoder::with_capacity(64); + for req in requests { + match req.get_cmd_type() { + CmdType::Put => { + let put = req.get_put(); + write_encoder.put(put.get_cf(), put.get_key(), put.get_value()); + } + CmdType::Delete => { + let delete = req.get_delete(); + write_encoder.delete(delete.get_cf(), delete.get_key()); + } + CmdType::DeleteRange => { + unimplemented!() + } + _ => unreachable!(), + } + } + PeerMsg::simple_write(Box::new(request.take_header()), write_encoder.encode()) + }; + + match self.async_peer_msg_on_node(node_id, region_id, msg) { + Ok(()) => {} + Err(e) => { + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(e.into()); + return Ok(resp); + } + } + + let timeout_f = GLOBAL_TIMER_HANDLE.delay(std::time::Instant::now() + timeout); + block_on(async move { + select! { + // todo: unwrap? + res = sub.result().fuse() => Ok(res.unwrap()), + _ = timeout_f.compat().fuse() => Err(Error::Timeout(format!("request timeout for {:?}", timeout))), + + } + }) + } +} + +pub struct Cluster { + pub cfg: Config, + leaders: HashMap, + pub count: usize, + + pub paths: Vec, + pub engines: Vec<(TabletRegistry, RaftTestEngine)>, + pub tablet_registries: HashMap>, + pub raft_engines: HashMap, + pub store_metas: HashMap>>>, + key_managers: Vec>>, + pub io_rate_limiter: Option>, + key_managers_map: HashMap>>, + group_props: HashMap, + pub sst_workers: Vec>, + pub sst_workers_map: HashMap, + pub kv_statistics: Vec>, + pub raft_statistics: Vec>>, + pub sim: Arc>, + pub pd_client: Arc, + resource_manager: Option>, +} + +impl Cluster { + pub fn new( + id: u64, + count: usize, + sim: Arc>, + pd_client: Arc, + api_version: ApiVersion, + ) -> Cluster { + Cluster { + cfg: Config { + tikv: new_tikv_config_with_api_ver(id, api_version), + prefer_mem: true, + }, + count, + tablet_registries: HashMap::default(), + key_managers_map: HashMap::default(), + group_props: HashMap::default(), + raft_engines: HashMap::default(), + store_metas: HashMap::default(), + leaders: HashMap::default(), + kv_statistics: vec![], + raft_statistics: vec![], + sst_workers: vec![], + sst_workers_map: HashMap::default(), + paths: vec![], + engines: vec![], + key_managers: vec![], + io_rate_limiter: None, + resource_manager: Some(Arc::new(ResourceGroupManager::default())), + sim, + pd_client, + } + } + + pub fn id(&self) -> u64 { + self.cfg.server.cluster_id + } + + // Bootstrap the store with fixed ID (like 1, 2, .. 5) and + // initialize first region in all stores, then start the cluster. + pub fn run(&mut self) { + self.create_engines(); + self.bootstrap_region().unwrap(); + self.start().unwrap(); + } + + // Bootstrap the store with fixed ID (like 1, 2, .. 5) and + // initialize first region in store 1, then start the cluster. + pub fn run_conf_change(&mut self) -> u64 { + self.create_engines(); + let region_id = self.bootstrap_conf_change(); + self.start().unwrap(); + region_id + } + + pub fn create_engines(&mut self) { + self.io_rate_limiter = Some(Arc::new( + self.cfg + .storage + .io_rate_limit + .build(true /* enable_statistics */), + )); + for id in 1..self.count + 1 { + self.create_engine(Some((self.id(), id as u64))); + } + } + + // id indicates cluster id store_id + fn create_engine(&mut self, id: Option<(u64, u64)>) { + let (reg, raft_engine, key_manager, dir, sst_worker, kv_statistics, raft_statistics) = + create_test_engine(id, self.io_rate_limiter.clone(), &self.cfg); + self.engines.push((reg, raft_engine)); + self.key_managers.push(key_manager); + self.paths.push(dir); + self.sst_workers.push(sst_worker); + self.kv_statistics.push(kv_statistics); + self.raft_statistics.push(raft_statistics); + } + + pub fn start(&mut self) -> ServerResult<()> { + if self.cfg.raft_store.store_io_pool_size == 0 { + // v2 always use async write. + self.cfg.raft_store.store_io_pool_size = 1; + } + + let node_ids: Vec = self.tablet_registries.iter().map(|(&id, _)| id).collect(); + for node_id in node_ids { + self.run_node(node_id)?; + } + + // Try start new nodes. + for id in self.raft_engines.len()..self.count { + let id = id as u64 + 1; + self.create_engine(Some((self.id(), id))); + let (tablet_registry, raft_engine) = self.engines.last().unwrap().clone(); + + let key_mgr = self.key_managers.last().unwrap().clone(); + let store_meta = Arc::new(Mutex::new(StoreMeta::new(id))); + + let props = GroupProperties::default(); + tikv_util::thread_group::set_properties(Some(props.clone())); + + // todo: GroupProperties + let mut sim = self.sim.wl(); + let node_id = sim.run_node( + id, + self.cfg.clone(), + store_meta.clone(), + raft_engine.clone(), + tablet_registry.clone(), + &self.resource_manager, + )?; + assert_eq!(id, node_id); + self.group_props.insert(node_id, props); + self.raft_engines.insert(node_id, raft_engine.clone()); + self.tablet_registries + .insert(node_id, tablet_registry.clone()); + self.store_metas.insert(node_id, store_meta); + self.key_managers_map.insert(node_id, key_mgr); + } + + Ok(()) + } + + pub fn run_node(&mut self, node_id: u64) -> ServerResult<()> { + debug!("starting node {}", node_id); + let tablet_registry = self.tablet_registries[&node_id].clone(); + let raft_engine = self.raft_engines[&node_id].clone(); + let cfg = self.cfg.clone(); + + // if let Some(labels) = self.labels.get(&node_id) { + // cfg.server.labels = labels.to_owned(); + // } + let store_meta = match self.store_metas.entry(node_id) { + MapEntry::Occupied(o) => { + let mut meta = o.get().lock().unwrap(); + *meta = StoreMeta::new(node_id); + o.get().clone() + } + MapEntry::Vacant(v) => v + .insert(Arc::new(Mutex::new(StoreMeta::new(node_id)))) + .clone(), + }; + + let props = GroupProperties::default(); + self.group_props.insert(node_id, props.clone()); + tikv_util::thread_group::set_properties(Some(props)); + + debug!("calling run node"; "node_id" => node_id); + self.sim.wl().run_node( + node_id, + cfg, + store_meta, + raft_engine, + tablet_registry, + &self.resource_manager, + )?; + debug!("node {} started", node_id); + Ok(()) + } + + pub fn stop_node(&mut self, node_id: u64) { + debug!("stopping node {}", node_id); + self.group_props[&node_id].mark_shutdown(); + + // Simulate shutdown behavior of server shutdown. It's not enough to just set + // the map above as current thread may also query properties during shutdown. + let previous_prop = tikv_util::thread_group::current_properties(); + tikv_util::thread_group::set_properties(Some(self.group_props[&node_id].clone())); + match self.sim.write() { + Ok(mut sim) => sim.stop_node(node_id), + Err(_) => safe_panic!("failed to acquire write lock."), + } + self.pd_client.shutdown_store(node_id); + + let mut regions = vec![]; + let reg = &self.tablet_registries[&node_id]; + reg.for_each_opened_tablet(|region_id, _| { + regions.push(region_id); + true + }); + for region_id in regions { + if let Some(mut tablet) = reg.get(region_id) { + if let Some(tablet) = tablet.latest() { + let mut tried = 0; + while tried < 10 { + if Arc::strong_count(tablet.as_inner()) <= 3 { + break; + } + thread::sleep(Duration::from_millis(10)); + tried += 1; + } + } + } + reg.remove(region_id); + } + + debug!("node {} stopped", node_id); + tikv_util::thread_group::set_properties(previous_prop); + } + + /// Multiple nodes with fixed node id, like node 1, 2, .. 5, + /// First region 1 is in all stores with peer 1, 2, .. 5. + /// Peer 1 is in node 1, store 1, etc. + /// + /// Must be called after `create_engines`. + pub fn bootstrap_region(&mut self) -> Result<()> { + for (i, (tablet_registry, raft_engine)) in self.engines.iter().enumerate() { + let id = i as u64 + 1; + self.tablet_registries.insert(id, tablet_registry.clone()); + self.raft_engines.insert(id, raft_engine.clone()); + let store_meta = Arc::new(Mutex::new(StoreMeta::new(id))); + self.store_metas.insert(id, store_meta); + self.key_managers_map + .insert(id, self.key_managers[i].clone()); + self.sst_workers_map.insert(id, i); + } + + let mut region = metapb::Region::default(); + region.set_id(1); + region.set_start_key(keys::EMPTY_KEY.to_vec()); + region.set_end_key(keys::EMPTY_KEY.to_vec()); + region.mut_region_epoch().set_version(INIT_EPOCH_VER); + region.mut_region_epoch().set_conf_ver(INIT_EPOCH_CONF_VER); + + for &id in self.raft_engines.keys() { + let peer = new_peer(id, id); + region.mut_peers().push(peer.clone()); + } + + for raft_engine in self.raft_engines.values() { + let mut wb = raft_engine.log_batch(10); + wb.put_prepare_bootstrap_region(®ion)?; + write_initial_states(&mut wb, region.clone())?; + box_try!(raft_engine.consume(&mut wb, true)); + } + + self.bootstrap_cluster(region); + + Ok(()) + } + + pub fn bootstrap_conf_change(&mut self) -> u64 { + for (i, (tablet_registry, raft_engine)) in self.engines.iter().enumerate() { + let id = i as u64 + 1; + self.tablet_registries.insert(id, tablet_registry.clone()); + self.raft_engines.insert(id, raft_engine.clone()); + let store_meta = Arc::new(Mutex::new(StoreMeta::new(id))); + self.store_metas.insert(id, store_meta); + self.key_managers_map + .insert(id, self.key_managers[i].clone()); + self.sst_workers_map.insert(id, i); + } + + let node_id = 1; + let region_id = 1; + let peer_id = 1; + + let region = initial_region(node_id, region_id, peer_id); + let raft_engine = self.raft_engines[&node_id].clone(); + let mut wb = raft_engine.log_batch(10); + wb.put_prepare_bootstrap_region(®ion).unwrap(); + write_initial_states(&mut wb, region.clone()).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); + + self.bootstrap_cluster(region); + + region_id + } + + // This is only for fixed id test + fn bootstrap_cluster(&mut self, region: metapb::Region) { + self.pd_client + .bootstrap_cluster(new_store(1, "".to_owned()), region) + .unwrap(); + for id in self.raft_engines.keys() { + let store = new_store(*id, "".to_owned()); + // todo: labels + self.pd_client.put_store(store).unwrap(); + } + } + + pub fn get_engine(&self, node_id: u64) -> WrapFactory { + WrapFactory::new( + self.pd_client.clone(), + self.raft_engines[&node_id].clone(), + self.tablet_registries[&node_id].clone(), + ) + } + + // mixed read and write requests are not supportted + pub fn call_command( + &mut self, + request: RaftCmdRequest, + timeout: Duration, + ) -> Result { + let mut is_read = false; + let mut not_read = false; + for req in request.get_requests() { + match req.get_cmd_type() { + CmdType::Get | CmdType::Snap | CmdType::ReadIndex => { + is_read = true; + } + _ => { + not_read = true; + } + } + } + let ret = if is_read { + assert!(!not_read); + self.sim.wl().read(request.clone(), timeout) + } else if request.has_status_request() { + self.sim.wl().call_query(request.clone(), timeout) + } else { + self.sim.wl().call_command(request.clone(), timeout) + }; + match ret { + Err(e) => { + warn!("failed to call command {:?}: {:?}", request, e); + Err(e) + } + a => a, + } + } + + pub fn call_command_on_leader( + &mut self, + mut request: RaftCmdRequest, + timeout: Duration, + ) -> Result { + let timer = Instant::now(); + let region_id = request.get_header().get_region_id(); + loop { + let leader = match self.leader_of_region(region_id) { + None => return Err(Error::NotLeader(region_id, None)), + Some(l) => l, + }; + request.mut_header().set_peer(leader); + let resp = match self.call_command(request.clone(), timeout) { + e @ Err(_) => return e, + Ok(resp) => resp, + }; + if self.refresh_leader_if_needed(&resp, region_id) + && timer.saturating_elapsed() < timeout + { + warn!( + "{:?} is no longer leader, let's retry", + request.get_header().get_peer() + ); + continue; + } + return Ok(resp); + } + } + + pub fn call_command_on_node( + &self, + node_id: u64, + request: RaftCmdRequest, + timeout: Duration, + ) -> Result { + match self + .sim + .rl() + .call_command_on_node(node_id, request.clone(), timeout) + { + Err(e) => { + warn!("failed to call command {:?}: {:?}", request, e); + Err(e) + } + a => a, + } + } + + pub fn leader_of_region(&mut self, region_id: u64) -> Option { + let timer = Instant::now_coarse(); + let timeout = Duration::from_secs(5); + let mut store_ids = None; + while timer.saturating_elapsed() < timeout { + match self.voter_store_ids_of_region(region_id) { + None => thread::sleep(Duration::from_millis(10)), + Some(ids) => { + store_ids = Some(ids); + break; + } + } + } + let store_ids = store_ids?; + if let Some(l) = self.leaders.get(®ion_id) { + // leader may be stopped in some tests. + if self.valid_leader_id(region_id, l.get_store_id()) { + return Some(l.clone()); + } + } + self.reset_leader_of_region(region_id); + let mut leader = None; + let mut leaders = HashMap::default(); + + let node_ids = self.sim.rl().get_node_ids(); + // For some tests, we stop the node but pd still has this information, + // and we must skip this. + let alive_store_ids: Vec<_> = store_ids + .iter() + .filter(|id| node_ids.contains(id)) + .cloned() + .collect(); + while timer.saturating_elapsed() < timeout { + for store_id in &alive_store_ids { + let l = match self.query_leader(*store_id, region_id, Duration::from_secs(1)) { + None => continue, + Some(l) => l, + }; + leaders + .entry(l.get_id()) + .or_insert((l, vec![])) + .1 + .push(*store_id); + } + if let Some((_, (l, c))) = leaders.iter().max_by_key(|(_, (_, c))| c.len()) { + if c.contains(&l.get_store_id()) { + leader = Some(l.clone()); + // Technically, correct calculation should use two quorum when in joint + // state. Here just for simplicity. + if c.len() > store_ids.len() / 2 { + break; + } + } + } + debug!("failed to detect leaders"; "leaders" => ?leaders, "store_ids" => ?store_ids); + sleep_ms(10); + leaders.clear(); + } + + if let Some(l) = leader { + self.leaders.insert(region_id, l); + } + + self.leaders.get(®ion_id).cloned() + } + + pub fn query_leader( + &mut self, + store_id: u64, + region_id: u64, + timeout: Duration, + ) -> Option { + // To get region leader, we don't care real peer id, so use 0 instead. + let peer = new_peer(store_id, 0); + let find_leader = new_status_request(region_id, peer, new_region_leader_cmd()); + let mut resp = match self.call_command(find_leader, timeout) { + Ok(resp) => resp, + Err(err) => { + error!( + "fail to get leader of region {} on store {}, error: {:?}", + region_id, store_id, err + ); + return None; + } + }; + let mut region_leader = resp.take_status_response().take_region_leader(); + // NOTE: node id can't be 0. + if self.valid_leader_id(region_id, region_leader.get_leader().get_store_id()) { + Some(region_leader.take_leader()) + } else { + None + } + } + + fn valid_leader_id(&self, region_id: u64, leader_store_id: u64) -> bool { + let store_ids = match self.voter_store_ids_of_region(region_id) { + None => return false, + Some(ids) => ids, + }; + let node_ids = self.sim.rl().get_node_ids(); + store_ids.contains(&leader_store_id) && node_ids.contains(&leader_store_id) + } + + fn voter_store_ids_of_region(&self, region_id: u64) -> Option> { + block_on(self.pd_client.get_region_by_id(region_id)) + .unwrap() + .map(|region| { + region + .get_peers() + .iter() + .flat_map(|p| { + if p.get_role() != PeerRole::Learner { + Some(p.get_store_id()) + } else { + None + } + }) + .collect() + }) + } + + pub fn reset_leader_of_region(&mut self, region_id: u64) { + self.leaders.remove(®ion_id); + } + + // If the resp is "not leader error", get the real leader. + // Otherwise reset or refresh leader if needed. + // Returns if the request should retry. + fn refresh_leader_if_needed(&mut self, resp: &RaftCmdResponse, region_id: u64) -> bool { + if !is_error_response(resp) { + return false; + } + + let err = resp.get_header().get_error(); + if err + .get_message() + .contains("peer has not applied to current term") + { + // leader peer has not applied to current term + return true; + } + + // If command is stale, leadership may have changed. + // EpochNotMatch is not checked as leadership is checked first in raftstore. + if err.has_stale_command() { + self.reset_leader_of_region(region_id); + return true; + } + + if !err.has_not_leader() { + return false; + } + let err = err.get_not_leader(); + if !err.has_leader() { + self.reset_leader_of_region(region_id); + return true; + } + self.leaders.insert(region_id, err.get_leader().clone()); + true + } + + pub fn request( + &mut self, + key: &[u8], + reqs: Vec, + read_quorum: bool, + timeout: Duration, + ) -> RaftCmdResponse { + let timer = Instant::now(); + let mut tried_times = 0; + while tried_times < 2 || timer.saturating_elapsed() < timeout { + tried_times += 1; + let mut region = self.get_region(key); + let region_id = region.get_id(); + let req = new_request( + region_id, + region.take_region_epoch(), + reqs.clone(), + read_quorum, + ); + let result = self.call_command_on_leader(req, timeout); + + let resp = match result { + e @ Err(Error::Timeout(_)) + | e @ Err(Error::NotLeader(..)) + | e @ Err(Error::StaleCommand) => { + warn!("call command failed, retry it"; "err" => ?e); + sleep_ms(100); + continue; + } + Err(e) => panic!("call command failed {:?}", e), + Ok(resp) => resp, + }; + + if resp.get_header().get_error().has_epoch_not_match() { + warn!("seems split, let's retry"); + sleep_ms(100); + continue; + } + if resp + .get_header() + .get_error() + .get_message() + .contains("merging mode") + { + warn!("seems waiting for merge, let's retry"); + sleep_ms(100); + continue; + } + return resp; + } + panic!("request timeout"); + } + + pub fn get_region(&self, key: &[u8]) -> metapb::Region { + self.get_region_with(key, |_| true) + } + + pub fn get_region_id(&self, key: &[u8]) -> u64 { + self.get_region(key).get_id() + } + + // Get region ids of all opened tablets in a store + pub fn region_ids(&self, store_id: u64) -> Vec { + let mut ids = vec![]; + let registry = self.tablet_registries.get(&store_id).unwrap(); + registry.for_each_opened_tablet(|id, _| -> bool { + ids.push(id); + true + }); + ids + } + + pub fn scan( + &self, + store_id: u64, + cf: &str, + start_key: &[u8], + end_key: &[u8], + fill_cache: bool, + mut f: F, + ) -> engine_traits::Result<()> + where + F: FnMut(&[u8], &[u8]) -> engine_traits::Result, + { + let region_ids = self.region_ids(store_id); + for id in region_ids { + self.scan_region(store_id, id, cf, start_key, end_key, fill_cache, &mut f)?; + } + Ok(()) + } + + // start_key and end_key should be `data key` + fn scan_region( + &self, + store_id: u64, + region_id: u64, + cf: &str, + start_key: &[u8], + end_key: &[u8], + fill_cache: bool, + f: F, + ) -> engine_traits::Result<()> + where + F: FnMut(&[u8], &[u8]) -> engine_traits::Result, + { + let tablet_registry = self.tablet_registries.get(&store_id).unwrap(); + let tablet = tablet_registry + .get(region_id) + .unwrap() + .latest() + .unwrap() + .clone(); + + let region = block_on(self.pd_client.get_region_by_id(region_id)) + .unwrap() + .unwrap(); + let region_start_key: &[u8] = &data_key(region.get_start_key()); + let region_end_key: &[u8] = &data_key(region.get_end_key()); + + let amended_start_key = if start_key > region_start_key { + start_key + } else { + region_start_key + }; + let amended_end_key = if end_key < region_end_key || region_end_key.is_empty() { + end_key + } else { + region_end_key + }; + + tablet.scan(cf, amended_start_key, amended_end_key, fill_cache, f) + } + + pub fn get_raft_engine(&self, node_id: u64) -> RaftTestEngine { + self.raft_engines[&node_id].clone() + } + + pub fn get_region_epoch(&self, region_id: u64) -> RegionEpoch { + block_on(self.pd_client.get_region_by_id(region_id)) + .unwrap() + .unwrap() + .take_region_epoch() + } + + pub fn region_detail(&mut self, region_id: u64, store_id: u64) -> RegionDetailResponse { + let status_cmd = new_region_detail_cmd(); + let peer = new_peer(store_id, 0); + let req = new_status_request(region_id, peer, status_cmd); + let resp = self.call_command(req, Duration::from_secs(5)); + assert!(resp.is_ok(), "{:?}", resp); + + let mut resp = resp.unwrap(); + assert!(resp.has_status_response()); + let mut status_resp = resp.take_status_response(); + assert_eq!(status_resp.get_cmd_type(), StatusCmdType::RegionDetail); + assert!(status_resp.has_region_detail()); + status_resp.take_region_detail() + } + + pub fn get(&mut self, key: &[u8]) -> Option> { + self.get_impl(CF_DEFAULT, key, false) + } + + pub fn get_cf(&mut self, cf: &str, key: &[u8]) -> Option> { + self.get_impl(cf, key, false) + } + + pub fn must_get(&mut self, key: &[u8]) -> Option> { + self.get_impl(CF_DEFAULT, key, true) + } + + fn get_impl(&mut self, cf: &str, key: &[u8], read_quorum: bool) -> Option> { + let mut resp = self.request( + key, + vec![new_get_cf_cmd(cf, key)], + read_quorum, + Duration::from_secs(5), + ); + if resp.get_header().has_error() { + panic!("response {:?} has error", resp); + } + assert_eq!(resp.get_responses().len(), 1); + assert_eq!(resp.get_responses()[0].get_cmd_type(), CmdType::Get); + if resp.get_responses()[0].has_get() { + Some(resp.mut_responses()[0].mut_get().take_value()) + } else { + None + } + } + + // Flush the cf of all opened tablets + pub fn must_flush_cf(&mut self, cf: &str, sync: bool) { + for registry in self.tablet_registries.values() { + registry.for_each_opened_tablet(|_id, cached_tablet| -> bool { + if let Some(db) = cached_tablet.latest() { + db.flush_cf(cf, sync).unwrap(); + } + true + }); + } + } + + // Get region when the `filter` returns true. + pub fn get_region_with(&self, key: &[u8], filter: F) -> metapb::Region + where + F: Fn(&metapb::Region) -> bool, + { + for _ in 0..100 { + if let Ok(region) = self.pd_client.get_region(key) { + if filter(®ion) { + return region; + } + } + // We may meet range gap after split, so here we will + // retry to get the region again. + sleep_ms(20); + } + + panic!("find no region for {}", log_wrappers::hex_encode_upper(key)); + } + + pub fn must_put(&mut self, key: &[u8], value: &[u8]) { + self.must_put_cf(CF_DEFAULT, key, value); + } + + pub fn must_put_cf(&mut self, cf: &str, key: &[u8], value: &[u8]) { + if let Err(e) = self.batch_put(key, vec![new_put_cf_cmd(cf, key, value)]) { + panic!("has error: {:?}", e); + } + } + + pub fn put(&mut self, key: &[u8], value: &[u8]) -> result::Result<(), PbError> { + self.batch_put(key, vec![new_put_cf_cmd(CF_DEFAULT, key, value)]) + .map(|_| ()) + } + + pub fn batch_put( + &mut self, + region_key: &[u8], + reqs: Vec, + ) -> result::Result { + let resp = self.request(region_key, reqs, false, Duration::from_secs(5)); + if resp.get_header().has_error() { + Err(resp.get_header().get_error().clone()) + } else { + Ok(resp) + } + } + + pub fn must_delete(&mut self, key: &[u8]) { + self.must_delete_cf(CF_DEFAULT, key) + } + + pub fn must_delete_cf(&mut self, cf: &str, key: &[u8]) { + let resp = self.request( + key, + vec![new_delete_cmd(cf, key)], + false, + Duration::from_secs(5), + ); + if resp.get_header().has_error() { + panic!("response {:?} has error", resp); + } + } + + pub fn must_delete_range_cf(&mut self, cf: &str, start: &[u8], end: &[u8]) { + let resp = self.request( + start, + vec![new_delete_range_cmd(cf, start, end)], + false, + Duration::from_secs(5), + ); + if resp.get_header().has_error() { + panic!("response {:?} has error", resp); + } + } + + pub fn must_notify_delete_range_cf(&mut self, cf: &str, start: &[u8], end: &[u8]) { + let mut req = new_delete_range_cmd(cf, start, end); + req.mut_delete_range().set_notify_only(true); + let resp = self.request(start, vec![req], false, Duration::from_secs(5)); + if resp.get_header().has_error() { + panic!("response {:?} has error", resp); + } + } + + pub fn apply_state(&self, region_id: u64, store_id: u64) -> RaftApplyState { + self.get_engine(store_id) + .get_apply_state(region_id) + .unwrap() + .unwrap() + } + + pub fn add_send_filter_on_node(&mut self, node_id: u64, filter: Box) { + self.sim.wl().add_send_filter(node_id, filter); + } + + pub fn add_send_filter(&self, factory: F) { + let mut sim = self.sim.wl(); + for node_id in sim.get_node_ids() { + for filter in factory.generate(node_id) { + sim.add_send_filter(node_id, filter); + } + } + } + + pub fn clear_send_filters(&self) { + let mut sim = self.sim.wl(); + for node_id in sim.get_node_ids() { + sim.clear_send_filters(node_id); + } + } + + // it's so common that we provide an API for it + pub fn partition(&mut self, s1: Vec, s2: Vec) { + self.add_send_filter(PartitionFilterFactory::new(s1, s2)); + } + + pub fn transfer_leader(&mut self, region_id: u64, leader: metapb::Peer) { + let epoch = self.get_region_epoch(region_id); + let transfer_leader = new_admin_request(region_id, &epoch, new_transfer_leader_cmd(leader)); + // todo(SpadeA): modify + let resp = self + .call_command_on_leader(transfer_leader, Duration::from_secs(500)) + .unwrap(); + assert_eq!( + resp.get_admin_response().get_cmd_type(), + AdminCmdType::TransferLeader, + "{:?}", + resp + ); + } + + pub fn must_transfer_leader(&mut self, region_id: u64, leader: metapb::Peer) { + let timer = Instant::now(); + loop { + self.reset_leader_of_region(region_id); + let cur_leader = self.leader_of_region(region_id); + if let Some(ref cur_leader) = cur_leader { + if cur_leader.get_id() == leader.get_id() + && cur_leader.get_store_id() == leader.get_store_id() + { + return; + } + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!( + "failed to transfer leader to [{}] {:?}, current leader: {:?}", + region_id, leader, cur_leader + ); + } + self.transfer_leader(region_id, leader.clone()); + } + } + + pub fn try_transfer_leader(&mut self, region_id: u64, leader: metapb::Peer) -> RaftCmdResponse { + let epoch = self.get_region_epoch(region_id); + let transfer_leader = new_admin_request(region_id, &epoch, new_transfer_leader_cmd(leader)); + self.call_command_on_leader(transfer_leader, Duration::from_secs(5)) + .unwrap() + } + + // It's similar to `ask_split`, the difference is the msg, it sends, is + // `Msg::SplitRegion`, and `region` will not be embedded to that msg. + // Caller must ensure that the `split_key` is in the `region`. + pub fn split_region( + &mut self, + region: &metapb::Region, + split_key: &[u8], + mut cb: Callback, + ) { + let leader = self.leader_of_region(region.get_id()).unwrap(); + let router = self.sim.rl().get_router(leader.get_store_id()).unwrap(); + let split_key = split_key.to_vec(); + let (split_region_req, mut sub) = PeerMsg::request_split( + region.get_region_epoch().clone(), + vec![split_key], + "test".into(), + ); + + router + .check_send(region.get_id(), split_region_req) + .unwrap(); + + block_on(async { + sub.wait_proposed().await; + cb.invoke_proposed(); + sub.wait_committed().await; + cb.invoke_committed(); + let res = sub.result().await.unwrap(); + cb.invoke_with_response(res) + }); + } + + pub fn must_split(&mut self, region: &metapb::Region, split_key: &[u8]) { + let mut try_cnt = 0; + let split_count = self.pd_client.get_split_count(); + loop { + debug!("asking split"; "region" => ?region, "key" => ?split_key); + // In case ask split message is ignored, we should retry. + if try_cnt % 50 == 0 { + self.reset_leader_of_region(region.get_id()); + let key = split_key.to_vec(); + let check = Box::new(move |write_resp: WriteResponse| { + let mut resp = write_resp.response; + if resp.get_header().has_error() { + let error = resp.get_header().get_error(); + if error.has_epoch_not_match() + || error.has_not_leader() + || error.has_stale_command() + || error + .get_message() + .contains("peer has not applied to current term") + { + warn!("fail to split: {:?}, ignore.", error); + return; + } + panic!("failed to split: {:?}", resp); + } + let admin_resp = resp.mut_admin_response(); + let split_resp = admin_resp.mut_splits(); + let regions = split_resp.get_regions(); + assert_eq!(regions.len(), 2); + assert_eq!(regions[0].get_end_key(), key.as_slice()); + assert_eq!(regions[0].get_end_key(), regions[1].get_start_key()); + }); + if self.leader_of_region(region.get_id()).is_some() { + self.split_region(region, split_key, Callback::write(check)); + } + } + + if self.pd_client.check_split(region, split_key) + && self.pd_client.get_split_count() > split_count + { + return; + } + + if try_cnt > 250 { + panic!( + "region {:?} has not been split by {}", + region, + log_wrappers::hex_encode_upper(split_key) + ); + } + try_cnt += 1; + sleep_ms(20); + } + } + + pub fn wait_region_split(&mut self, region: &metapb::Region) { + self.wait_region_split_max_cnt(region, 20, 250, true); + } + + pub fn wait_region_split_max_cnt( + &mut self, + region: &metapb::Region, + itvl_ms: u64, + max_try_cnt: u64, + is_panic: bool, + ) { + let mut try_cnt = 0; + let split_count = self.pd_client.get_split_count(); + loop { + if self.pd_client.get_split_count() > split_count { + match self.pd_client.get_region(region.get_start_key()) { + Err(_) => {} + Ok(left) => { + if left.get_end_key() != region.get_end_key() { + return; + } + } + } + } + + if try_cnt > max_try_cnt { + if is_panic { + panic!( + "region {:?} has not been split after {}ms", + region, + max_try_cnt * itvl_ms + ); + } else { + return; + } + } + try_cnt += 1; + sleep_ms(itvl_ms); + } + } + + pub fn get_snap_dir(&self, node_id: u64) -> String { + self.sim.rl().get_snap_dir(node_id) + } + + pub fn refresh_region_bucket_keys( + &mut self, + _region: &metapb::Region, + _buckets: Vec, + _bucket_ranges: Option>, + _expect_buckets: Option, + ) -> u64 { + unimplemented!() + } + + pub fn send_half_split_region_message( + &mut self, + _region: &metapb::Region, + _expected_bucket_ranges: Option>, + ) { + unimplemented!() + } + + pub fn shutdown(&mut self) { + debug!("about to shutdown cluster"); + let keys = match self.sim.read() { + Ok(s) => s.get_node_ids(), + Err(_) => { + safe_panic!("failed to acquire read lock"); + // Leave the resource to avoid double panic. + return; + } + }; + for id in keys { + self.stop_node(id); + } + self.leaders.clear(); + for store_meta in self.store_metas.values() { + while Arc::strong_count(store_meta) != 1 { + std::thread::sleep(Duration::from_millis(10)); + } + } + self.store_metas.clear(); + for sst_worker in self.sst_workers.drain(..) { + sst_worker.stop_worker(); + } + + debug!("all nodes are shut down."); + } +} + +pub fn bootstrap_store( + raft_engine: &ER, + cluster_id: u64, + store_id: u64, +) -> Result<()> { + let mut ident = StoreIdent::default(); + + if !raft_engine.is_empty()? { + return Err(box_err!("store is not empty and has already had data")); + } + + ident.set_cluster_id(cluster_id); + ident.set_store_id(store_id); + + let mut lb = raft_engine.log_batch(1); + lb.put_store_ident(&ident)?; + raft_engine.consume(&mut lb, true)?; + + Ok(()) +} + +impl Drop for Cluster { + fn drop(&mut self) { + test_util::clear_failpoints(); + self.shutdown(); + } +} + +pub struct WrapFactory { + pd_client: Arc, + raft_engine: RaftTestEngine, + tablet_registry: TabletRegistry, +} + +impl WrapFactory { + pub fn new( + pd_client: Arc, + raft_engine: RaftTestEngine, + tablet_registry: TabletRegistry, + ) -> Self { + Self { + raft_engine, + tablet_registry, + pd_client, + } + } + + fn region_id_of_key(&self, key: &[u8]) -> u64 { + self.pd_client.get_region(key).unwrap().get_id() + } + + fn get_tablet(&self, key: &[u8]) -> Option { + // todo: unwrap + let region_id = self.region_id_of_key(key); + self.tablet_registry.get(region_id)?.latest().cloned() + } + + pub fn get_region_state( + &self, + region_id: u64, + ) -> engine_traits::Result> { + self.raft_engine.get_region_state(region_id, u64::MAX) + } + + pub fn get_apply_state(&self, region_id: u64) -> engine_traits::Result> { + self.raft_engine.get_apply_state(region_id, u64::MAX) + } +} + +impl Peekable for WrapFactory { + type DbVector = RocksDbVector; + + fn get_value_opt( + &self, + opts: &ReadOptions, + key: &[u8], + ) -> engine_traits::Result> { + let region_id = self.region_id_of_key(key); + + if let Ok(Some(state)) = self.get_region_state(region_id) { + if state.state == PeerState::Tombstone { + return Ok(None); + } + } + + match self.get_tablet(key) { + Some(tablet) => tablet.get_value_opt(opts, key), + _ => Ok(None), + } + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> engine_traits::Result> { + let region_id = self.region_id_of_key(key); + + if let Ok(Some(state)) = self.get_region_state(region_id) { + if state.state == PeerState::Tombstone { + return Ok(None); + } + } + + match self.get_tablet(key) { + Some(tablet) => tablet.get_value_cf_opt(opts, cf, key), + _ => Ok(None), + } + } + + fn get_msg_cf( + &self, + _cf: &str, + _key: &[u8], + ) -> engine_traits::Result> { + unimplemented!() + } +} + +impl SyncMutable for WrapFactory { + fn put(&self, key: &[u8], value: &[u8]) -> engine_traits::Result<()> { + match self.get_tablet(key) { + Some(tablet) => tablet.put(key, value), + _ => unimplemented!(), + } + } + + fn put_cf(&self, cf: &str, key: &[u8], value: &[u8]) -> engine_traits::Result<()> { + match self.get_tablet(key) { + Some(tablet) => tablet.put_cf(cf, key, value), + _ => unimplemented!(), + } + } + + fn delete(&self, key: &[u8]) -> engine_traits::Result<()> { + match self.get_tablet(key) { + Some(tablet) => tablet.delete(key), + _ => unimplemented!(), + } + } + + fn delete_cf(&self, cf: &str, key: &[u8]) -> engine_traits::Result<()> { + match self.get_tablet(key) { + Some(tablet) => tablet.delete_cf(cf, key), + _ => unimplemented!(), + } + } + + fn delete_range(&self, _begin_key: &[u8], _end_key: &[u8]) -> engine_traits::Result<()> { + unimplemented!() + } + + fn delete_range_cf( + &self, + _cf: &str, + _begin_key: &[u8], + _end_key: &[u8], + ) -> engine_traits::Result<()> { + unimplemented!() + } +} + +impl RawEngine for WrapFactory { + fn region_local_state( + &self, + region_id: u64, + ) -> engine_traits::Result> { + self.get_region_state(region_id) + } +} diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs new file mode 100644 index 00000000000..101658ff57b --- /dev/null +++ b/components/test_raftstore-v2/src/lib.rs @@ -0,0 +1,9 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +mod cluster; +mod node; +mod server; +mod transport_simulate; +pub mod util; + +pub use crate::{cluster::*, node::*, server::*, transport_simulate::*, util::*}; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs new file mode 100644 index 00000000000..96275cc8383 --- /dev/null +++ b/components/test_raftstore-v2/src/node.rs @@ -0,0 +1,423 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, Mutex, RwLock}, + time::Duration, +}; + +use collections::{HashMap, HashSet}; +use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; +use engine_test::raft::RaftTestEngine; +use engine_traits::{RaftEngineReadOnly, TabletRegistry}; +use kvproto::{ + kvrpcpb::ApiVersion, + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_serverpb::RaftMessage, +}; +use raft::prelude::MessageType; +use raftstore::{ + coprocessor::CoprocessorHost, + errors::Error as RaftError, + store::{ + AutoSplitController, GlobalReplicationState, RegionSnapshot, SplitConfigManager, + TabletSnapKey, TabletSnapManager, Transport, + }, + Result, +}; +use raftstore_v2::{ + router::{PeerMsg, RaftRouter}, + StateStorage, StoreMeta, StoreRouter, +}; +use resource_control::ResourceGroupManager; +use resource_metering::CollectorRegHandle; +use tempfile::TempDir; +use test_pd_client::TestPdClient; +use test_raftstore::{Config, Filter}; +use tikv::{ + config::{ConfigController, Module}, + server::{ + raftkv::ReplicaReadLockChecker, tablet_snap::copy_tablet_snapshot, NodeV2, + Result as ServerResult, + }, +}; +use tikv_util::{ + box_err, + config::VersionTrack, + worker::{Builder as WorkerBuilder, LazyWorker}, +}; + +use crate::{Cluster, RaftStoreRouter, SimulateTransport, Simulator, SnapshotRouter}; + +#[derive(Clone)] +pub struct ChannelTransport { + core: Arc>, +} + +impl ChannelTransport { + pub fn new() -> ChannelTransport { + ChannelTransport { + core: Arc::new(Mutex::new(ChannelTransportCore { + snap_paths: HashMap::default(), + routers: HashMap::default(), + })), + } + } + + pub fn core(&self) -> &Arc> { + &self.core + } +} + +impl Transport for ChannelTransport { + fn send(&mut self, msg: RaftMessage) -> raftstore::Result<()> { + let from_store = msg.get_from_peer().get_store_id(); + let to_store = msg.get_to_peer().get_store_id(); + let is_snapshot = msg.get_message().get_msg_type() == MessageType::MsgSnapshot; + + if is_snapshot { + let snap = msg.get_message().get_snapshot(); + let key = TabletSnapKey::from_region_snap( + msg.get_region_id(), + msg.get_to_peer().get_id(), + snap, + ); + let sender_snap_mgr = match self.core.lock().unwrap().snap_paths.get(&from_store) { + Some(snap_mgr) => snap_mgr.0.clone(), + None => return Err(box_err!("missing snap manager for store {}", from_store)), + }; + let recver_snap_mgr = match self.core.lock().unwrap().snap_paths.get(&to_store) { + Some(snap_mgr) => snap_mgr.0.clone(), + None => return Err(box_err!("missing snap manager for store {}", to_store)), + }; + + if let Err(e) = + copy_tablet_snapshot(key, msg.clone(), &sender_snap_mgr, &recver_snap_mgr) + { + return Err(box_err!("copy tablet snapshot failed: {:?}", e)); + } + } + + let core = self.core.lock().unwrap(); + match core.routers.get(&to_store) { + Some(h) => { + h.send_raft_msg(msg)?; + // report snapshot status if needed + Ok(()) + } + _ => Err(box_err!("missing sender for store {}", to_store)), + } + } + + fn set_store_allowlist(&mut self, _allowlist: Vec) { + unimplemented!(); + } + + fn need_flush(&self) -> bool { + false + } + + fn flush(&mut self) {} +} + +pub struct ChannelTransportCore { + pub snap_paths: HashMap, + pub routers: HashMap>>, +} + +impl Default for ChannelTransport { + fn default() -> Self { + Self::new() + } +} + +type SimulateChannelTransport = SimulateTransport; + +pub struct NodeCluster { + trans: ChannelTransport, + pd_client: Arc, + nodes: HashMap>, + simulate_trans: HashMap, + concurrency_managers: HashMap, + // snap_mgrs: HashMap, +} + +impl NodeCluster { + pub fn new(pd_client: Arc) -> NodeCluster { + NodeCluster { + trans: ChannelTransport::new(), + pd_client, + nodes: HashMap::default(), + simulate_trans: HashMap::default(), + concurrency_managers: HashMap::default(), + // snap_mgrs: HashMap::default(), + } + } +} + +impl Simulator for NodeCluster { + fn get_node_ids(&self) -> HashSet { + self.nodes.keys().cloned().collect() + } + + fn add_send_filter(&mut self, node_id: u64, filter: Box) { + self.simulate_trans + .get_mut(&node_id) + .unwrap() + .add_filter(filter); + } + + fn clear_send_filters(&mut self, node_id: u64) { + self.simulate_trans + .get_mut(&node_id) + .unwrap() + .clear_filters(); + } + + fn run_node( + &mut self, + node_id: u64, + cfg: Config, + store_meta: Arc>>, + raft_engine: RaftTestEngine, + tablet_registry: TabletRegistry, + _resource_manager: &Option>, + ) -> ServerResult { + assert!(!self.nodes.contains_key(&node_id)); + let pd_worker = LazyWorker::new("test-pd-worker"); + + let simulate_trans = SimulateTransport::new(self.trans.clone()); + let mut raft_store = cfg.raft_store.clone(); + raft_store + .validate( + cfg.coprocessor.region_split_size.unwrap_or_default(), + cfg.coprocessor.enable_region_bucket, + cfg.coprocessor.region_bucket_size, + ) + .unwrap(); + + let mut node = NodeV2::new(&cfg.server, self.pd_client.clone(), None); + node.try_bootstrap_store(&raft_store, &raft_engine).unwrap(); + assert_eq!(node.id(), node_id); + + tablet_registry + .tablet_factory() + .set_state_storage(Arc::new(StateStorage::new( + raft_engine.clone(), + node.router().clone(), + ))); + + // todo: node id 0 + let (snap_mgr, snap_mgs_path) = if node_id == 0 + || !self + .trans + .core + .lock() + .unwrap() + .snap_paths + .contains_key(&node_id) + { + let tmp = test_util::temp_dir("test_cluster", cfg.prefer_mem); + let snap_path = tmp.path().to_str().unwrap().to_owned(); + (TabletSnapManager::new(snap_path)?, Some(tmp)) + } else { + let trans = self.trans.core.lock().unwrap(); + let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + (snap_mgr.clone(), None) + }; + + let raft_router = RaftRouter::new_with_store_meta(node.router().clone(), store_meta); + // Create coprocessor. + let mut coprocessor_host = + CoprocessorHost::new(raft_router.store_router().clone(), cfg.coprocessor.clone()); + + // if let Some(f) = self.post_create_coprocessor_host.as_ref() { + // f(node_id, &mut coprocessor_host); + // } + + let cm = ConcurrencyManager::new(1.into()); + self.concurrency_managers.insert(node_id, cm.clone()); + + ReplicaReadLockChecker::new(cm.clone()).register(&mut coprocessor_host); + + let cfg_controller = ConfigController::new(cfg.tikv.clone()); + // cfg_controller.register( + // Module::Coprocessor, + // Box::new(SplitCheckConfigManager(split_scheduler.clone())), + // ); + + let split_config_manager = + SplitConfigManager::new(Arc::new(VersionTrack::new(cfg.tikv.split.clone()))); + cfg_controller.register(Module::Split, Box::new(split_config_manager.clone())); + + let auto_split_controller = AutoSplitController::new( + split_config_manager, + cfg.tikv.server.grpc_concurrency, + cfg.tikv.readpool.unified.max_thread_count, + // todo: Is None sufficient for test? + None, + ); + + let bg_worker = WorkerBuilder::new("background").thread_count(2).create(); + let state: Arc> = Arc::default(); + node.start( + raft_engine.clone(), + tablet_registry, + &raft_router, + simulate_trans.clone(), + snap_mgr.clone(), + cm, + None, + coprocessor_host, + auto_split_controller, + CollectorRegHandle::new_for_test(), + bg_worker, + pd_worker, + Arc::new(VersionTrack::new(raft_store)), + &state, + )?; + assert!( + raft_engine + .get_prepare_bootstrap_region() + .unwrap() + .is_none() + ); + assert!(node_id == 0 || node_id == node.id()); + let node_id = node.id(); + + let region_split_size = cfg.coprocessor.region_split_size; + let enable_region_bucket = cfg.coprocessor.enable_region_bucket; + let region_bucket_size = cfg.coprocessor.region_bucket_size; + let mut raftstore_cfg = cfg.tikv.raft_store; + raftstore_cfg + .validate( + region_split_size.unwrap_or_default(), + enable_region_bucket, + region_bucket_size, + ) + .unwrap(); + + // let raft_store = Arc::new(VersionTrack::new(raftstore_cfg)); + // cfg_controller.register( + // Module::Raftstore, + // Box::new(RaftstoreConfigManager::new( + // node.refresh_config_scheduler(), + // raft_store, + // )), + // ); + + if let Some(tmp) = snap_mgs_path { + self.trans + .core + .lock() + .unwrap() + .snap_paths + .insert(node_id, (snap_mgr, tmp)); + } + + self.trans + .core + .lock() + .unwrap() + .routers + .insert(node_id, SimulateTransport::new(raft_router)); + + self.nodes.insert(node_id, node); + self.simulate_trans.insert(node_id, simulate_trans); + Ok(node_id) + } + + fn snapshot( + &mut self, + request: RaftCmdRequest, + timeout: Duration, + ) -> std::result::Result< + RegionSnapshot<::Snapshot>, + RaftCmdResponse, + > { + let node_id = request.get_header().get_peer().get_store_id(); + if !self + .trans + .core + .lock() + .unwrap() + .routers + .contains_key(&node_id) + { + let mut resp = RaftCmdResponse::default(); + let e: RaftError = box_err!("missing sender for store {}", node_id); + resp.mut_header().set_error(e.into()); + return Err(resp); + } + + let mut router = { + let mut guard = self.trans.core.lock().unwrap(); + guard.routers.get_mut(&node_id).unwrap().clone() + }; + + router.snapshot(request, timeout) + } + + fn async_peer_msg_on_node(&self, node_id: u64, region_id: u64, msg: PeerMsg) -> Result<()> { + if !self + .trans + .core + .lock() + .unwrap() + .routers + .contains_key(&node_id) + { + return Err(box_err!("missing sender for store {}", node_id)); + } + + let router = self + .trans + .core + .lock() + .unwrap() + .routers + .get(&node_id) + .cloned() + .unwrap(); + + router.send_peer_msg(region_id, msg) + } + + fn stop_node(&mut self, node_id: u64) { + if let Some(mut node) = self.nodes.remove(&node_id) { + node.stop(); + } + self.trans + .core + .lock() + .unwrap() + .routers + .remove(&node_id) + .unwrap(); + } + + fn get_router(&self, node_id: u64) -> Option> { + self.nodes.get(&node_id).map(|node| node.router().clone()) + } + + fn get_snap_dir(&self, node_id: u64) -> String { + self.trans.core.lock().unwrap().snap_paths[&node_id] + .0 + .root_path() + .to_str() + .unwrap() + .to_owned() + } +} + +pub fn new_node_cluster(id: u64, count: usize) -> Cluster { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { + let pd_client = Arc::new(TestPdClient::new(id, true)); + let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs new file mode 100644 index 00000000000..e64844bb490 --- /dev/null +++ b/components/test_raftstore-v2/src/server.rs @@ -0,0 +1,726 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, Mutex, RwLock}, + thread, + time::Duration, +}; + +use api_version::{dispatch_api_version, KvFormat}; +use causal_ts::CausalTsProviderImpl; +use collections::{HashMap, HashSet}; +use concurrency_manager::ConcurrencyManager; +use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_test::raft::RaftTestEngine; +use engine_traits::{KvEngine, TabletRegistry}; +use futures::executor::block_on; +use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; +use grpcio_health::HealthService; +use kvproto::{ + deadlock_grpc::create_deadlock, + debugpb_grpc::DebugClient, + diagnosticspb_grpc::create_diagnostics, + kvrpcpb::{ApiVersion, Context}, + metapb, + raft_cmdpb::RaftCmdResponse, + tikvpb_grpc::TikvClient, +}; +use pd_client::PdClient; +use raftstore::{ + coprocessor::CoprocessorHost, + errors::Error as RaftError, + store::{ + AutoSplitController, CheckLeaderRunner, FlowStatsReporter, ReadStats, RegionSnapshot, + TabletSnapManager, WriteStats, + }, + RegionInfoAccessor, +}; +use raftstore_v2::{router::RaftRouter, StateStorage, StoreMeta, StoreRouter}; +use resource_control::ResourceGroupManager; +use resource_metering::{CollectorRegHandle, ResourceTagFactory}; +use security::SecurityManager; +use slog_global::debug; +use tempfile::TempDir; +use test_pd_client::TestPdClient; +use test_raftstore::{AddressMap, Config}; +use tikv::{ + coprocessor, coprocessor_v2, + read_pool::ReadPool, + server::{ + gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, + raftkv::ReplicaReadLockChecker, resolve, service::DiagnosticsService, ConnectionBuilder, + Error, NodeV2, PdStoreAddrResolver, RaftClient, RaftKv2, Result as ServerResult, Server, + ServerTransport, + }, + storage::{ + self, + kv::{FakeExtension, SnapContext}, + txn::flow_controller::{EngineFlowController, FlowController}, + Engine, Storage, + }, +}; +use tikv_util::{ + box_err, + config::VersionTrack, + quota_limiter::QuotaLimiter, + sys::thread::ThreadBuildWrapper, + thd_name, + worker::{Builder as WorkerBuilder, LazyWorker}, + Either, HandyRwLock, +}; +use tokio::runtime::Builder as TokioBuilder; +use txn_types::TxnExtraScheduler; + +use crate::{Cluster, RaftStoreRouter, SimulateTransport, Simulator, SnapshotRouter}; + +#[derive(Clone)] +struct DummyReporter; + +impl FlowStatsReporter for DummyReporter { + fn report_read_stats(&self, _read_stats: ReadStats) {} + fn report_write_stats(&self, _write_stats: WriteStats) {} +} + +type SimulateRaftExtension = ::RaftExtension; +type SimulateStoreTransport = SimulateTransport>; +type SimulateServerTransport = + SimulateTransport>; + +pub type SimulateEngine = RaftKv2; + +pub struct ServerMeta { + node: NodeV2, + server: Server, + sim_router: SimulateStoreTransport, + sim_trans: SimulateServerTransport, + raw_router: StoreRouter, + rsmeter_cleanup: Box, +} + +type PendingServices = Vec Service>>; + +pub struct ServerCluster { + metas: HashMap, + addrs: AddressMap, + pub storages: HashMap, + pub region_info_accessors: HashMap, + snap_paths: HashMap, + snap_mgrs: HashMap, + pd_client: Arc, + // raft_client: RaftClient, + concurrency_managers: HashMap, + env: Arc, + pub pending_services: HashMap, + pub health_services: HashMap, + pub security_mgr: Arc, + pub txn_extra_schedulers: HashMap>, + pub causal_ts_providers: HashMap>, +} + +impl ServerCluster { + pub fn new(pd_client: Arc) -> ServerCluster { + let env = Arc::new( + EnvBuilder::new() + .cq_count(2) + .name_prefix(thd_name!("server-cluster")) + .build(), + ); + let security_mgr = Arc::new(SecurityManager::new(&Default::default()).unwrap()); + let map = AddressMap::default(); + // We don't actually need to handle snapshot message, just create a dead worker + // to make it compile. + let worker = LazyWorker::new("snap-worker"); + let conn_builder = ConnectionBuilder::new( + env.clone(), + Arc::default(), + security_mgr.clone(), + map.clone(), + FakeExtension {}, + worker.scheduler(), + Arc::new(ThreadLoadPool::with_threshold(usize::MAX)), + ); + let _raft_client = RaftClient::new(conn_builder); + ServerCluster { + metas: HashMap::default(), + addrs: map, + pd_client, + security_mgr, + storages: HashMap::default(), + region_info_accessors: HashMap::default(), + snap_mgrs: HashMap::default(), + snap_paths: HashMap::default(), + pending_services: HashMap::default(), + health_services: HashMap::default(), + // raft_client, + concurrency_managers: HashMap::default(), + env, + txn_extra_schedulers: HashMap::default(), + causal_ts_providers: HashMap::default(), + } + } + + pub fn get_addr(&self, node_id: u64) -> String { + self.addrs.get(node_id).unwrap() + } + + pub fn run_node_impl( + &mut self, + node_id: u64, + mut cfg: Config, + store_meta: Arc>>, + raft_engine: RaftTestEngine, + tablet_registry: TabletRegistry, + resource_manager: &Option>, + ) -> ServerResult { + let (snap_mgr, snap_mgs_path) = if !self.snap_mgrs.contains_key(&node_id) { + let tmp = test_util::temp_dir("test_cluster", cfg.prefer_mem); + let snap_path = tmp.path().to_str().unwrap().to_owned(); + (TabletSnapManager::new(snap_path)?, Some(tmp)) + } else { + (self.snap_mgrs[&node_id].clone(), None) + }; + + let bg_worker = WorkerBuilder::new("background").thread_count(2).create(); + + if cfg.server.addr == "127.0.0.1:0" { + // Now we cache the store address, so here we should re-use last + // listening address for the same store. + if let Some(addr) = self.addrs.get(node_id) { + cfg.server.addr = addr; + } else { + cfg.server.addr = format!("127.0.0.1:{}", test_util::alloc_port()); + } + } + + // Create node. + let mut raft_store = cfg.raft_store.clone(); + raft_store + .validate( + cfg.coprocessor.region_split_size.unwrap_or_default(), + cfg.coprocessor.enable_region_bucket, + cfg.coprocessor.region_bucket_size, + ) + .unwrap(); + + let mut node = NodeV2::new(&cfg.server, self.pd_client.clone(), None); + node.try_bootstrap_store(&raft_store, &raft_engine).unwrap(); + assert_eq!(node.id(), node_id); + + tablet_registry + .tablet_factory() + .set_state_storage(Arc::new(StateStorage::new( + raft_engine.clone(), + node.router().clone(), + ))); + + let server_cfg = Arc::new(VersionTrack::new(cfg.server.clone())); + + let raft_router = + RaftRouter::new_with_store_meta(node.router().clone(), store_meta.clone()); + + // Create coprocessor. + let mut coprocessor_host = + CoprocessorHost::new(raft_router.store_router().clone(), cfg.coprocessor.clone()); + + let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); + + let sim_router = SimulateTransport::new(raft_router.clone()); + // todo(SpadeA): simulate transport + let mut raft_kv_v2 = + RaftKv2::new(raft_router.clone(), region_info_accessor.region_leaders()); + + // Create storage. + let pd_worker = LazyWorker::new("test-pd-worker"); + let pd_sender = raftstore_v2::PdReporter::new( + pd_worker.scheduler(), + slog_global::borrow_global().new(slog::o!()), + ); + let storage_read_pool = ReadPool::from(storage::build_read_pool( + &tikv::config::StorageReadPoolConfig::default_for_test(), + pd_sender, + raft_kv_v2.clone(), + )); + + if let Some(scheduler) = self.txn_extra_schedulers.remove(&node_id) { + raft_kv_v2.set_txn_extra_scheduler(scheduler); + } + + let latest_ts = + block_on(self.pd_client.get_tso()).expect("failed to get timestamp from PD"); + let concurrency_manager = ConcurrencyManager::new(latest_ts); + + let (tx, _rx) = std::sync::mpsc::channel(); + let mut gc_worker = GcWorker::new( + raft_kv_v2.clone(), + tx, + cfg.gc.clone(), + Default::default(), + Arc::new(region_info_accessor.clone()), + ); + gc_worker.start(node_id).unwrap(); + + // todo: resolved ts + + if ApiVersion::V2 == F::TAG { + let casual_ts_provider: Arc = Arc::new( + block_on(causal_ts::BatchTsoProvider::new_opt( + self.pd_client.clone(), + cfg.causal_ts.renew_interval.0, + cfg.causal_ts.alloc_ahead_buffer.0, + cfg.causal_ts.renew_batch_min_size, + cfg.causal_ts.renew_batch_max_size, + )) + .unwrap() + .into(), + ); + self.causal_ts_providers.insert(node_id, casual_ts_provider); + } + + // Start resource metering. + let (res_tag_factory, collector_reg_handle, rsmeter_cleanup) = + self.init_resource_metering(&cfg.resource_metering); + + let check_leader_runner = CheckLeaderRunner::new(store_meta, coprocessor_host.clone()); + let check_leader_scheduler = bg_worker.start("check-leader", check_leader_runner); + + let mut lock_mgr = LockManager::new(&cfg.pessimistic_txn); + let quota_limiter = Arc::new(QuotaLimiter::new( + cfg.quota.foreground_cpu_time, + cfg.quota.foreground_write_bandwidth, + cfg.quota.foreground_read_bandwidth, + cfg.quota.background_cpu_time, + cfg.quota.background_write_bandwidth, + cfg.quota.background_read_bandwidth, + cfg.quota.max_delay_duration, + cfg.quota.enable_auto_tune, + )); + + let casual_ts_provider = self.get_causal_ts_provider(node_id); + let store = Storage::<_, _, F>::from_engine( + raft_kv_v2.clone(), + &cfg.storage, + storage_read_pool.handle(), + lock_mgr.clone(), + concurrency_manager.clone(), + lock_mgr.get_storage_dynamic_configs(), + Arc::new(FlowController::Singleton(EngineFlowController::empty())), + DummyReporter, + res_tag_factory.clone(), + quota_limiter.clone(), + self.pd_client.feature_gate().clone(), + casual_ts_provider.clone(), + resource_manager + .as_ref() + .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), + )?; + self.storages.insert(node_id, raft_kv_v2); + + ReplicaReadLockChecker::new(concurrency_manager.clone()).register(&mut coprocessor_host); + + // todo: Import Sst Service + + // Create deadlock service. + let deadlock_service = lock_mgr.deadlock_service(); + + // Create pd client, snapshot manager, server. + let (resolver, state) = resolve::new_resolver( + Arc::clone(&self.pd_client), + &bg_worker, + store.get_engine().raft_extension(), + ); + let security_mgr = Arc::new(SecurityManager::new(&cfg.security).unwrap()); + let cop_read_pool = ReadPool::from(coprocessor::readpool_impl::build_read_pool_for_test( + &tikv::config::CoprReadPoolConfig::default_for_test(), + store.get_engine(), + )); + let copr = coprocessor::Endpoint::new( + &server_cfg.value().clone(), + cop_read_pool.handle(), + concurrency_manager.clone(), + res_tag_factory, + quota_limiter, + ); + let copr_v2 = coprocessor_v2::Endpoint::new(&cfg.coprocessor_v2); + let mut server = None; + + // Create Debug service. + let debug_thread_pool = Arc::new( + TokioBuilder::new_multi_thread() + .thread_name(thd_name!("debugger")) + .worker_threads(1) + .after_start_wrapper(|| {}) + .before_stop_wrapper(|| {}) + .build() + .unwrap(), + ); + let debug_thread_handle = debug_thread_pool.handle().clone(); + let diag_service = DiagnosticsService::new( + debug_thread_handle, + cfg.log.file.filename.clone(), + cfg.slow_log_file.clone(), + ); + + let health_service = HealthService::default(); + + for _ in 0..100 { + let mut svr = Server::new( + node_id, + &server_cfg, + &security_mgr, + store.clone(), + copr.clone(), + copr_v2.clone(), + resolver.clone(), + Either::Right(snap_mgr.clone()), + gc_worker.clone(), + check_leader_scheduler.clone(), + self.env.clone(), + None, + debug_thread_pool.clone(), + health_service.clone(), + ) + .unwrap(); + svr.register_service(create_diagnostics(diag_service.clone())); + svr.register_service(create_deadlock(deadlock_service.clone())); + if let Some(svcs) = self.pending_services.get(&node_id) { + for fact in svcs { + svr.register_service(fact()); + } + } + match svr.build_and_bind() { + Ok(_) => { + server = Some(svr); + break; + } + Err(Error::Grpc(GrpcError::BindFail(ref addr, ref port))) => { + // Servers may meet the error, when we restart them. + debug!("fail to create a server: bind fail {:?}", (addr, port)); + thread::sleep(Duration::from_millis(100)); + continue; + } + Err(ref e) => panic!("fail to create a server: {:?}", e), + } + } + let mut server = server.unwrap(); + let addr = server.listening_addr(); + assert_eq!(addr.clone().to_string(), node.store().address); + cfg.server.addr = format!("{}", addr); + let trans = server.transport(); + let simulate_trans = SimulateTransport::new(trans); + let server_cfg = Arc::new(VersionTrack::new(cfg.server.clone())); + + // Register the role change observer of the lock manager. + lock_mgr.register_detector_role_change_observer(&mut coprocessor_host); + + let pessimistic_txn_cfg = cfg.tikv.pessimistic_txn; + node.start( + raft_engine, + tablet_registry, + &raft_router, + simulate_trans.clone(), + snap_mgr.clone(), + concurrency_manager.clone(), + casual_ts_provider, + coprocessor_host, + AutoSplitController::default(), + collector_reg_handle, + bg_worker, + pd_worker, + Arc::new(VersionTrack::new(raft_store)), + &state, + )?; + assert!(node_id == 0 || node_id == node.id()); + let node_id = node.id(); + self.snap_mgrs.insert(node_id, snap_mgr); + if let Some(tmp) = snap_mgs_path { + self.snap_paths.insert(node_id, tmp); + } + self.region_info_accessors + .insert(node_id, region_info_accessor); + // todo: importer + self.health_services.insert(node_id, health_service); + + lock_mgr + .start( + node.id(), + Arc::clone(&self.pd_client), + resolver, + Arc::clone(&security_mgr), + &pessimistic_txn_cfg, + ) + .unwrap(); + + server.start(server_cfg, security_mgr).unwrap(); + + self.metas.insert( + node_id, + ServerMeta { + raw_router: raft_router.store_router().clone(), + node, + server, + sim_router, + sim_trans: simulate_trans, + rsmeter_cleanup, + }, + ); + self.addrs.insert(node_id, format!("{}", addr)); + self.concurrency_managers + .insert(node_id, concurrency_manager); + + Ok(node_id) + } + + pub fn get_causal_ts_provider(&self, node_id: u64) -> Option> { + self.causal_ts_providers.get(&node_id).cloned() + } + + fn init_resource_metering( + &self, + cfg: &resource_metering::Config, + ) -> (ResourceTagFactory, CollectorRegHandle, Box) { + let (_, collector_reg_handle, resource_tag_factory, recorder_worker) = + resource_metering::init_recorder(cfg.precision.as_millis()); + let (_, data_sink_reg_handle, reporter_worker) = + resource_metering::init_reporter(cfg.clone(), collector_reg_handle.clone()); + let (_, single_target_worker) = resource_metering::init_single_target( + cfg.receiver_address.clone(), + Arc::new(Environment::new(2)), + data_sink_reg_handle, + ); + + ( + resource_tag_factory, + collector_reg_handle, + Box::new(move || { + single_target_worker.stop_worker(); + reporter_worker.stop_worker(); + recorder_worker.stop_worker(); + }), + ) + } + + pub fn get_concurrency_manager(&self, node_id: u64) -> ConcurrencyManager { + self.concurrency_managers.get(&node_id).unwrap().clone() + } +} + +impl Simulator for ServerCluster { + fn get_node_ids(&self) -> HashSet { + self.metas.keys().cloned().collect() + } + + fn add_send_filter(&mut self, node_id: u64, filter: Box) { + self.metas + .get_mut(&node_id) + .unwrap() + .sim_trans + .add_filter(filter); + } + + fn clear_send_filters(&mut self, node_id: u64) { + self.metas + .get_mut(&node_id) + .unwrap() + .sim_trans + .clear_filters(); + } + + fn run_node( + &mut self, + node_id: u64, + cfg: Config, + store_meta: Arc>>, + raft_engine: RaftTestEngine, + tablet_registry: TabletRegistry, + resource_manager: &Option>, + ) -> ServerResult { + dispatch_api_version!( + cfg.storage.api_version(), + self.run_node_impl::( + node_id, + cfg, + store_meta, + raft_engine, + tablet_registry, + resource_manager + ) + ) + } + + fn stop_node(&mut self, node_id: u64) { + if let Some(mut meta) = self.metas.remove(&node_id) { + meta.server.stop().unwrap(); + meta.node.stop(); + // // resolved ts worker started, let's stop it + // if let Some(worker) = meta.rts_worker { + // worker.stop_worker(); + // } + (meta.rsmeter_cleanup)(); + } + self.storages.remove(&node_id); + } + + fn snapshot( + &mut self, + request: kvproto::raft_cmdpb::RaftCmdRequest, + timeout: Duration, + ) -> std::result::Result::Snapshot>, RaftCmdResponse> + { + let node_id = request.get_header().get_peer().get_store_id(); + let mut router = match self.metas.get(&node_id) { + None => { + let mut resp = RaftCmdResponse::default(); + let e: RaftError = box_err!("missing sender for store {}", node_id); + resp.mut_header().set_error(e.into()); + return Err(resp); + } + Some(meta) => meta.sim_router.clone(), + }; + + router.snapshot(request, timeout) + } + + fn async_peer_msg_on_node( + &self, + node_id: u64, + region_id: u64, + msg: raftstore_v2::router::PeerMsg, + ) -> raftstore::Result<()> { + let router = match self.metas.get(&node_id) { + None => return Err(box_err!("missing sender for store {}", node_id)), + Some(meta) => meta.sim_router.clone(), + }; + + router.send_peer_msg(region_id, msg) + } + + fn get_router(&self, node_id: u64) -> Option> { + self.metas.get(&node_id).map(|m| m.raw_router.clone()) + } + + fn get_snap_dir(&self, node_id: u64) -> String { + self.snap_mgrs[&node_id] + .root_path() + .to_str() + .unwrap() + .to_owned() + } +} + +impl Cluster { + pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { + let mut try_snapshot = || -> Option> { + let leader = self.leader_of_region(region_id)?; + let store_id = leader.store_id; + let epoch = self.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + + let mut storage = self.sim.rl().storages.get(&store_id).unwrap().clone(); + let snap_ctx = SnapContext { + pb_ctx: &ctx, + ..Default::default() + }; + storage.snapshot(snap_ctx).ok() + }; + for _ in 0..10 { + if let Some(snapshot) = try_snapshot() { + return snapshot; + } + thread::sleep(Duration::from_millis(200)); + } + panic!("failed to get snapshot of region {}", region_id); + } +} + +pub fn new_server_cluster(id: u64, count: usize) -> Cluster { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +pub fn new_incompatible_server_cluster(id: u64, count: usize) -> Cluster { + let pd_client = Arc::new(TestPdClient::new(id, true)); + let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +pub fn new_server_cluster_with_api_ver( + id: u64, + count: usize, + api_ver: ApiVersion, +) -> Cluster { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, api_ver) +} + +pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, Context) { + must_new_cluster_and_kv_client_mul(1) +} + +pub fn must_new_cluster_and_kv_client_mul( + count: usize, +) -> (Cluster, TikvClient, Context) { + let (cluster, leader, ctx) = must_new_cluster_mul(count); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + (cluster, client, ctx) +} +pub fn must_new_cluster_mul(count: usize) -> (Cluster, metapb::Peer, Context) { + must_new_and_configure_cluster_mul(count, |_| ()) +} + +fn must_new_and_configure_cluster_mul( + count: usize, + mut configure: impl FnMut(&mut Cluster), +) -> (Cluster, metapb::Peer, Context) { + let mut cluster = new_server_cluster(0, count); + configure(&mut cluster); + cluster.run(); + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader.clone()); + ctx.set_region_epoch(epoch); + + (cluster, leader, ctx) +} + +pub fn must_new_and_configure_cluster_and_kv_client( + configure: impl FnMut(&mut Cluster), +) -> (Cluster, TikvClient, Context) { + let (cluster, leader, ctx) = must_new_and_configure_cluster(configure); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + (cluster, client, ctx) +} + +pub fn must_new_and_configure_cluster( + configure: impl FnMut(&mut Cluster), +) -> (Cluster, metapb::Peer, Context) { + must_new_and_configure_cluster_mul(1, configure) +} + +pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClient, u64) { + let (cluster, leader, _) = must_new_cluster_mul(1); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = DebugClient::new(channel); + + (cluster, client, leader.get_store_id()) +} diff --git a/components/test_raftstore-v2/src/transport_simulate.rs b/components/test_raftstore-v2/src/transport_simulate.rs new file mode 100644 index 00000000000..f42a891e60f --- /dev/null +++ b/components/test_raftstore-v2/src/transport_simulate.rs @@ -0,0 +1,128 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, RwLock}, + time::{Duration, Instant}, +}; + +use engine_traits::{KvEngine, RaftEngine}; +use futures::{compat::Future01CompatExt, FutureExt}; +use kvproto::{ + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_serverpb::RaftMessage, +}; +use raftstore::{ + router::handle_send_error, + store::{cmd_resp, RegionSnapshot, Transport}, + Error, Result, Result as RaftStoreResult, +}; +use raftstore_v2::router::{PeerMsg, RaftRouter}; +use test_raftstore::{filter_send, Filter}; +use tikv_util::{timer::GLOBAL_TIMER_HANDLE, HandyRwLock}; + +#[derive(Clone)] +pub struct SimulateTransport { + filters: Arc>>>, + ch: C, +} + +impl SimulateTransport { + pub fn new(ch: C) -> SimulateTransport { + Self { + filters: Arc::new(RwLock::new(vec![])), + ch, + } + } + + pub fn clear_filters(&mut self) { + self.filters.wl().clear(); + } + + pub fn add_filter(&mut self, filter: Box) { + self.filters.wl().push(filter); + } +} + +impl Transport for SimulateTransport { + fn send(&mut self, m: RaftMessage) -> Result<()> { + let ch = &mut self.ch; + filter_send(&self.filters, m, |m| ch.send(m)) + } + + fn set_store_allowlist(&mut self, allowlist: Vec) { + self.ch.set_store_allowlist(allowlist); + } + + fn need_flush(&self) -> bool { + self.ch.need_flush() + } + + fn flush(&mut self) { + self.ch.flush(); + } +} + +pub trait SnapshotRouter { + fn snapshot( + &mut self, + req: RaftCmdRequest, + timeout: Duration, + ) -> std::result::Result, RaftCmdResponse>; +} + +impl SnapshotRouter for RaftRouter { + fn snapshot( + &mut self, + req: RaftCmdRequest, + timeout: Duration, + ) -> std::result::Result, RaftCmdResponse> { + let timeout_f = GLOBAL_TIMER_HANDLE.delay(Instant::now() + timeout).compat(); + futures::executor::block_on(async move { + futures::select! { + res = self.snapshot(req).fuse() => res, + e = timeout_f.fuse() => { + Err(cmd_resp::new_error(Error::Timeout(format!("request timeout for {:?}: {:?}", timeout,e)))) + }, + } + }) + } +} + +impl> SnapshotRouter for SimulateTransport { + fn snapshot( + &mut self, + req: RaftCmdRequest, + timeout: Duration, + ) -> std::result::Result, RaftCmdResponse> { + self.ch.snapshot(req, timeout) + } +} + +pub trait RaftStoreRouter { + fn send_peer_msg(&self, region_id: u64, msg: PeerMsg) -> Result<()>; + + fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()>; +} + +impl RaftStoreRouter for RaftRouter { + fn send_peer_msg(&self, region_id: u64, msg: PeerMsg) -> RaftStoreResult<()> { + self.send(region_id, msg) + .map_err(|e| handle_send_error(region_id, e)) + } + + fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()> { + let region_id = msg.get_region_id(); + self.send_raft_message(Box::new(msg)) + .map_err(|e| handle_send_error(region_id, e)) + } +} + +impl RaftStoreRouter for SimulateTransport { + fn send_peer_msg(&self, region_id: u64, msg: PeerMsg) -> RaftStoreResult<()> { + self.ch.send_peer_msg(region_id, msg) + } + + fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()> { + filter_send(&self.filters, msg, |m| self.ch.send_raft_msg(m)) + } +} diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs new file mode 100644 index 00000000000..2bd9444b002 --- /dev/null +++ b/components/test_raftstore-v2/src/util.rs @@ -0,0 +1,191 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{fmt::Write, sync::Arc, thread, time::Duration}; + +use encryption_export::{data_key_manager_from_config, DataKeyManager}; +use engine_rocks::{RocksEngine, RocksStatistics}; +use engine_test::raft::RaftTestEngine; +use engine_traits::{TabletRegistry, CF_DEFAULT}; +use file_system::IoRateLimiter; +use kvproto::kvrpcpb::Context; +use rand::RngCore; +use server::server2::ConfiguredRaftEngine; +use tempfile::TempDir; +use test_raftstore::{new_put_cf_cmd, Config}; +use tikv::{ + server::KvEngineFactoryBuilder, + storage::{ + config::EngineType, + kv::{SnapContext, SnapshotExt}, + Engine, Snapshot, + }, +}; +use tikv_util::{config::ReadableDuration, worker::LazyWorker}; + +use crate::{bootstrap_store, cluster::Cluster, ServerCluster, Simulator}; + +pub fn create_test_engine( + // TODO: pass it in for all cases. + id: Option<(u64, u64)>, + limiter: Option>, + cfg: &Config, +) -> ( + TabletRegistry, + RaftTestEngine, + Option>, + TempDir, + LazyWorker, + Arc, + Option>, +) { + let dir = test_util::temp_dir("test_cluster", cfg.prefer_mem); + let mut cfg = cfg.clone(); + cfg.storage.data_dir = dir.path().to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + let key_manager = + data_key_manager_from_config(&cfg.security.encryption, dir.path().to_str().unwrap()) + .unwrap() + .map(Arc::new); + let cache = cfg + .storage + .block_cache + .build_shared_cache(EngineType::RaftKv2); + let env = cfg + .build_shared_rocks_env(key_manager.clone(), limiter) + .unwrap(); + + let sst_worker = LazyWorker::new("sst-recovery"); + let scheduler = sst_worker.scheduler(); + + let (raft_engine, raft_statistics) = RaftTestEngine::build(&cfg, &env, &key_manager, &cache); + + if let Some((cluster_id, store_id)) = id { + assert_ne!(store_id, 0); + bootstrap_store(&raft_engine, cluster_id, store_id).unwrap(); + } + + let builder = + KvEngineFactoryBuilder::new(env, &cfg.tikv, cache).sst_recovery_sender(Some(scheduler)); + + let factory = Box::new(builder.build()); + let rocks_statistics = factory.rocks_statistics(); + let reg = TabletRegistry::new(factory, dir.path().join("tablet")).unwrap(); + + ( + reg, + raft_engine, + key_manager, + dir, + sst_worker, + rocks_statistics, + raft_statistics, + ) +} + +/// Keep putting random kvs until specified size limit is reached. +pub fn put_till_size( + cluster: &mut Cluster, + limit: u64, + range: &mut dyn Iterator, +) -> Vec { + put_cf_till_size(cluster, CF_DEFAULT, limit, range) +} + +pub fn put_cf_till_size( + cluster: &mut Cluster, + cf: &'static str, + limit: u64, + range: &mut dyn Iterator, +) -> Vec { + assert!(limit > 0); + let mut len = 0; + let mut rng = rand::thread_rng(); + let mut key = String::new(); + let mut value = vec![0; 64]; + while len < limit { + let batch_size = std::cmp::min(1024, limit - len); + let mut reqs = vec![]; + for _ in 0..batch_size / 74 + 1 { + key.clear(); + let key_id = range.next().unwrap(); + write!(key, "{:09}", key_id).unwrap(); + rng.fill_bytes(&mut value); + // plus 1 for the extra encoding prefix + len += key.len() as u64 + 1; + len += value.len() as u64; + reqs.push(new_put_cf_cmd(cf, key.as_bytes(), &value)); + } + cluster.batch_put(key.as_bytes(), reqs).unwrap(); + // Approximate size of memtable is inaccurate for small data, + // we flush it to SST so we can use the size properties instead. + cluster.must_flush_cf(cf, true); + } + key.into_bytes() +} + +pub fn configure_for_snapshot(cluster: &mut Cluster) { + // Truncate the log quickly so that we can force sending snapshot. + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(2); + cluster.cfg.raft_store.merge_max_log_gap = 1; + cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); +} + +pub fn configure_for_lease_read_v2( + cluster: &mut Cluster, + base_tick_ms: Option, + election_ticks: Option, +) -> Duration { + if let Some(base_tick_ms) = base_tick_ms { + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); + } + let base_tick_interval = cluster.cfg.raft_store.raft_base_tick_interval.0; + if let Some(election_ticks) = election_ticks { + cluster.cfg.raft_store.raft_election_timeout_ticks = election_ticks; + } + let election_ticks = cluster.cfg.raft_store.raft_election_timeout_ticks as u32; + let election_timeout = base_tick_interval * election_ticks; + // Adjust max leader lease. + cluster.cfg.raft_store.raft_store_max_leader_lease = + ReadableDuration(election_timeout - base_tick_interval); + // Use large peer check interval, abnormal and max leader missing duration to + // make a valid config, that is election timeout x 2 < peer stale state + // check < abnormal < max leader missing duration. + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration(election_timeout * 3); + cluster.cfg.raft_store.abnormal_leader_missing_duration = + ReadableDuration(election_timeout * 4); + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration(election_timeout * 5); + + election_timeout +} + +pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, region_id: u64) { + let mut storage = cluster + .sim + .read() + .unwrap() + .storages + .get(&node_id) + .unwrap() + .clone(); + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + let snap_ctx = SnapContext { + pb_ctx: &ctx, + ..Default::default() + }; + let snapshot = storage.snapshot(snap_ctx).unwrap(); + let txn_ext = snapshot.txn_ext.clone().unwrap(); + for retry in 0..10 { + if txn_ext.is_max_ts_synced() { + break; + } + thread::sleep(Duration::from_millis(1 << retry)); + } + assert!(snapshot.ext().is_max_ts_synced()); +} diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 9d6444904f2..d5842bf6659 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -12,11 +12,11 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{RocksDbVector, RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, WriteBatch, - WriteBatchExt, CF_DEFAULT, CF_RAFT, + CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, + WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::{self, channel::oneshot, executor::block_on}; @@ -1335,6 +1335,10 @@ impl Cluster { kv_wb.write().unwrap(); } + pub fn add_send_filter_on_node(&mut self, node_id: u64, filter: Box) { + self.sim.wl().add_send_filter(node_id, filter); + } + pub fn add_send_filter(&self, factory: F) { let mut sim = self.sim.wl(); for node_id in sim.get_node_ids() { @@ -1899,6 +1903,25 @@ impl Cluster { .unwrap(); rx.recv_timeout(Duration::from_secs(5)).unwrap(); } + + pub fn scan( + &self, + store_id: u64, + cf: &str, + start_key: &[u8], + end_key: &[u8], + fill_cache: bool, + f: F, + ) -> engine_traits::Result<()> + where + F: FnMut(&[u8], &[u8]) -> engine_traits::Result, + { + self.engines[&store_id] + .kv + .scan(cf, start_key, end_key, fill_cache, f)?; + + Ok(()) + } } impl Drop for Cluster { @@ -1907,3 +1930,21 @@ impl Drop for Cluster { self.shutdown(); } } + +pub trait RawEngine: Peekable + SyncMutable { + fn region_local_state(&self, region_id: u64) + -> engine_traits::Result>; + + fn raft_apply_state(&self, _region_id: u64) -> engine_traits::Result> { + unimplemented!() + } +} + +impl RawEngine for RocksEngine { + fn region_local_state( + &self, + region_id: u64, + ) -> engine_traits::Result> { + self.get_msg_cf(CF_RAFT, &keys::region_state_key(region_id)) + } +} diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 8893d8a7ca4..950581a6ce8 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -11,7 +11,7 @@ mod node; mod router; mod server; mod transport_simulate; -mod util; +pub mod util; pub use crate::{ cluster::*, config::Config, node::*, router::*, server::*, transport_simulate::*, util::*, diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 06ff550aa64..a49a41af4e3 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -162,7 +162,7 @@ impl SimulateTransport { } } -fn filter_send( +pub fn filter_send( filters: &Arc>>>, msg: RaftMessage, mut h: H, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index e765cfb883f..b7a9ea6f1af 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -46,14 +46,21 @@ use rand::RngCore; use server::server::ConfiguredRaftEngine; use tempfile::TempDir; use test_pd_client::TestPdClient; -use tikv::{config::*, server::KvEngineFactoryBuilder, storage::point_key_range}; +use tikv::{ + config::*, + server::KvEngineFactoryBuilder, + storage::{ + kv::{SnapContext, SnapshotExt}, + point_key_range, Engine, Snapshot, + }, +}; pub use tikv_util::store::{find_peer, new_learner_peer, new_peer}; use tikv_util::{config::*, escape, time::ThreadReadId, worker::LazyWorker, HandyRwLock}; use txn_types::Key; -use crate::{Cluster, Config, ServerCluster, Simulator}; +use crate::{Cluster, Config, RawEngine, ServerCluster, Simulator}; -pub fn must_get(engine: &RocksEngine, cf: &str, key: &[u8], value: Option<&[u8]>) { +pub fn must_get(engine: &impl RawEngine, cf: &str, key: &[u8], value: Option<&[u8]>) { for _ in 1..300 { let res = engine.get_value_cf(cf, &keys::data_key(key)).unwrap(); if let (Some(value), Some(res)) = (value, res.as_ref()) { @@ -79,19 +86,19 @@ pub fn must_get(engine: &RocksEngine, cf: &str, key: &[u8], value: Option<&[u8]> ) } -pub fn must_get_equal(engine: &RocksEngine, key: &[u8], value: &[u8]) { +pub fn must_get_equal(engine: &impl RawEngine, key: &[u8], value: &[u8]) { must_get(engine, "default", key, Some(value)); } -pub fn must_get_none(engine: &RocksEngine, key: &[u8]) { +pub fn must_get_none(engine: &impl RawEngine, key: &[u8]) { must_get(engine, "default", key, None); } -pub fn must_get_cf_equal(engine: &RocksEngine, cf: &str, key: &[u8], value: &[u8]) { +pub fn must_get_cf_equal(engine: &impl RawEngine, cf: &str, key: &[u8], value: &[u8]) { must_get(engine, cf, key, Some(value)); } -pub fn must_get_cf_none(engine: &RocksEngine, cf: &str, key: &[u8]) { +pub fn must_get_cf_none(engine: &impl RawEngine, cf: &str, key: &[u8]) { must_get(engine, cf, key, None); } @@ -129,7 +136,7 @@ pub fn must_region_cleared(engine: &Engines, region } lazy_static! { - static ref TEST_CONFIG: TikvConfig = { + pub static ref TEST_CONFIG: TikvConfig = { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let common_test_cfg = manifest_dir.join("src/common-test.toml"); TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { @@ -635,24 +642,24 @@ pub fn configure_for_hibernate(cluster: &mut Cluster) { cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::secs(10); } -pub fn configure_for_snapshot(cluster: &mut Cluster) { +pub fn configure_for_snapshot(config: &mut Config) { // Truncate the log quickly so that we can force sending snapshot. - cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(2); - cluster.cfg.raft_store.merge_max_log_gap = 1; - cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); + config.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); + config.raft_store.raft_log_gc_count_limit = Some(2); + config.raft_store.merge_max_log_gap = 1; + config.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); } -pub fn configure_for_merge(cluster: &mut Cluster) { +pub fn configure_for_merge(config: &mut Config) { // Avoid log compaction which will prevent merge. - cluster.cfg.raft_store.raft_log_gc_threshold = 1000; - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); - cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); + config.raft_store.raft_log_gc_threshold = 1000; + config.raft_store.raft_log_gc_count_limit = Some(1000); + config.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); // Make merge check resume quickly. - cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(100); + config.raft_store.merge_check_tick_interval = ReadableDuration::millis(100); // When isolated, follower relies on stale check tick to detect failure leader, // choose a smaller number to make it recover faster. - cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); + config.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); } pub fn ignore_merge_target_integrity(cluster: &mut Cluster) { @@ -660,30 +667,29 @@ pub fn ignore_merge_target_integrity(cluster: &mut Cluster) { cluster.pd_client.ignore_merge_target_integrity(); } -pub fn configure_for_lease_read( - cluster: &mut Cluster, +pub fn configure_for_lease_read( + cfg: &mut Config, base_tick_ms: Option, election_ticks: Option, ) -> Duration { if let Some(base_tick_ms) = base_tick_ms { - cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); + cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); } - let base_tick_interval = cluster.cfg.raft_store.raft_base_tick_interval.0; + let base_tick_interval = cfg.raft_store.raft_base_tick_interval.0; if let Some(election_ticks) = election_ticks { - cluster.cfg.raft_store.raft_election_timeout_ticks = election_ticks; + cfg.raft_store.raft_election_timeout_ticks = election_ticks; } - let election_ticks = cluster.cfg.raft_store.raft_election_timeout_ticks as u32; + let election_ticks = cfg.raft_store.raft_election_timeout_ticks as u32; let election_timeout = base_tick_interval * election_ticks; // Adjust max leader lease. - cluster.cfg.raft_store.raft_store_max_leader_lease = + cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(election_timeout - base_tick_interval); // Use large peer check interval, abnormal and max leader missing duration to // make a valid config, that is election timeout x 2 < peer stale state // check < abnormal < max leader missing duration. - cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration(election_timeout * 3); - cluster.cfg.raft_store.abnormal_leader_missing_duration = - ReadableDuration(election_timeout * 4); - cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration(election_timeout * 5); + cfg.raft_store.peer_stale_state_check_interval = ReadableDuration(election_timeout * 3); + cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration(election_timeout * 4); + cfg.raft_store.max_leader_missing_duration = ReadableDuration(election_timeout * 5); election_timeout } @@ -1384,3 +1390,33 @@ pub fn peer_on_store(region: &metapb::Region, store_id: u64) -> metapb::Peer { .unwrap() .clone() } + +pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, region_id: u64) { + let mut storage = cluster + .sim + .read() + .unwrap() + .storages + .get(&node_id) + .unwrap() + .clone(); + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + let snap_ctx = SnapContext { + pb_ctx: &ctx, + ..Default::default() + }; + let snapshot = storage.snapshot(snap_ctx).unwrap(); + let txn_ext = snapshot.txn_ext.clone().unwrap(); + for retry in 0..10 { + if txn_ext.is_max_ts_synced() { + break; + } + thread::sleep(Duration::from_millis(1 << retry)); + } + assert!(snapshot.ext().is_max_ts_synced()); +} diff --git a/components/test_raftstore_macro/src/lib.rs b/components/test_raftstore_macro/src/lib.rs index 59a2c6f1273..3c8239d9f3b 100644 --- a/components/test_raftstore_macro/src/lib.rs +++ b/components/test_raftstore_macro/src/lib.rs @@ -6,6 +6,8 @@ use quote::{quote, ToTokens}; use syn::{parse_macro_input, parse_quote, Ident, ItemFn, Path}; /// test_case generate test cases using cluster creation method provided. +/// It also import the package related util module, which means we should locate +/// methods using Cluster in the related util modules. /// /// ex: /// #[test_case(test_raftstore::new_node_cluster)] @@ -21,19 +23,19 @@ use syn::{parse_macro_input, parse_quote, Ident, ItemFn, Path}; /// mod test_something { /// #[test] /// fn test_raftstore_new_node_cluster() { -/// use test_raftstore::new_node_cluster as new_cluster; +/// use test_raftstore::(util::*, new_node_cluster as new_cluster); /// let mut cluster = new_cluster(0, 1); /// } /// /// #[test] /// fn test_raftstore_new_server_cluster() { -/// use test_raftstore::new_server_cluster as new_cluster; +/// use test_raftstore::(util::*, new_server_cluster as new_cluster); /// let mut cluster = new_cluster(0, 1); /// } /// /// #[test] /// fn test_raftstore_v2_new_server_cluster() { -/// use test_raftstore::test_raftstore_v2 as new_cluster; +/// use test_raftstore::(util::*, test_raftstore_v2 as new_cluster); /// let mut cluster = new_cluster(0, 1); /// } /// } @@ -73,7 +75,7 @@ fn render_test_cases(test_cases: Vec, fn_item: ItemFn) -> TokenStr 0, syn::parse( quote! { - use #package::#method as new_cluster; + use #package::{util::*, #method as new_cluster}; } .into(), ) diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 413adf0d415..eb49775e5c1 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -236,6 +236,14 @@ impl TabletFactory for KvEngineFactory { fn exists(&self, path: &Path) -> bool { RocksEngine::exists(path.to_str().unwrap()) } + + #[cfg(any(test, feature = "testexport"))] + fn set_state_storage(&self, state_storage: Arc) { + let inner = Arc::as_ptr(&self.inner) as *mut FactoryInner; + unsafe { + (*inner).state_storage = Some(state_storage); + } + } } #[cfg(test)] diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index da292eca17d..ce6971eb8fb 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -88,6 +88,12 @@ pub struct Service { reject_messages_on_memory_ratio: f64, } +impl Drop for Service { + fn drop(&mut self) { + self.check_leader_scheduler.stop(); + } +} + impl Clone for Service { fn clone(&self) -> Self { Service { diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index b5d989d5370..0fc836f36c2 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -1,5 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +#[cfg(any(test, feature = "testexport"))] +use std::io; use std::{ convert::{TryFrom, TryInto}, fs::{self, File}, @@ -461,6 +463,43 @@ impl Runnable for TabletRunner { } } +// A helper function to copy snapshot. +#[cfg(any(test, feature = "testexport"))] +pub fn copy_tablet_snapshot( + key: TabletSnapKey, + msg: RaftMessage, + sender_snap_mgr: &TabletSnapManager, + recver_snap_mgr: &TabletSnapManager, +) -> Result<()> { + let sender_path = sender_snap_mgr.tablet_gen_path(&key); + let files = fs::read_dir(sender_path)? + .map(|f| Ok(f?.path())) + .filter(|f| f.is_ok() && f.as_ref().unwrap().is_file()) + .collect::>>()?; + + let mut head = SnapshotChunk::default(); + head.set_message(msg); + head.set_data(usize::to_ne_bytes(SNAP_CHUNK_LEN).to_vec()); + + let recv_context = RecvTabletSnapContext::new(head)?; + let recv_path = recver_snap_mgr.tmp_recv_path(&recv_context.key); + fs::create_dir_all(&recv_path)?; + + for path in files { + let sender_name = path.file_name().unwrap().to_str().unwrap(); + let mut sender_f = File::open(&path)?; + + let recv_p = recv_path.join(sender_name); + let mut recv_f = File::create(recv_p)?; + + while io::copy(&mut sender_f, &mut recv_f)? != 0 {} + } + + let final_path = recver_snap_mgr.final_recv_path(&recv_context.key); + fs::rename(&recv_path, final_path)?; + Ok(()) +} + #[cfg(test)] mod tests { use std::{ diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 6fb05f19cd1..331575339a5 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -143,6 +143,7 @@ test_coprocessor = { workspace = true } test_pd = { workspace = true } test_pd_client = { workspace = true } test_raftstore = { workspace = true } +test_raftstore-v2 = { workspace = true } test_raftstore_macro = { workspace = true } test_sst_importer = { workspace = true } test_storage = { workspace = true } diff --git a/tests/failpoints/cases/test_async_io.rs b/tests/failpoints/cases/test_async_io.rs index 43ed82d4cdd..3d53b9c5f14 100644 --- a/tests/failpoints/cases/test_async_io.rs +++ b/tests/failpoints/cases/test_async_io.rs @@ -97,7 +97,7 @@ fn test_async_io_delay_destroy_after_conf_change() { fn test_async_io_cannot_destroy_when_persist_snapshot() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.raft_store.store_io_pool_size = 2; - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -180,7 +180,7 @@ fn test_async_io_cannot_destroy_when_persist_snapshot() { fn test_async_io_cannot_handle_ready_when_persist_snapshot() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.raft_store.store_io_pool_size = 2; - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index d96c467d487..feaa1af76ef 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -159,7 +159,7 @@ fn test_reject_proposal_during_region_split() { #[test] fn test_reject_proposal_during_region_merge() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); cluster.run(); @@ -284,7 +284,7 @@ fn test_reject_proposal_during_region_merge() { #[test] fn test_reject_proposal_during_rollback_region_merge() { let mut cluster = new_node_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); cluster.run_conf_change(); diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index d4219808af0..7821c8be5df 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -19,7 +19,7 @@ fn test_destroy_local_reader() { let mut cluster = new_node_cluster(0, 3); // Set election timeout and max leader lease to 1s. - configure_for_lease_read(&mut cluster, Some(100), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); let pd_client = cluster.pd_client.clone(); // Disable default max peer count check. diff --git a/tests/failpoints/cases/test_early_apply.rs b/tests/failpoints/cases/test_early_apply.rs index acac65cd397..a194ef74d8f 100644 --- a/tests/failpoints/cases/test_early_apply.rs +++ b/tests/failpoints/cases/test_early_apply.rs @@ -97,7 +97,7 @@ fn test_early_apply_yield_followed_with_many_entries() { let mut cluster = new_node_cluster(0, 3); cluster.pd_client.disable_default_operator(); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); cluster.must_put(b"k1", b"v1"); diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index fa4f6e9cb42..c22136d04de 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -28,7 +28,7 @@ use txn_types::{Key, PessimisticLock}; #[test] fn test_node_merge_rollback() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -116,7 +116,7 @@ fn test_node_merge_rollback() { #[test] fn test_node_merge_restart() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -201,7 +201,7 @@ fn test_node_merge_restart() { #[test] fn test_node_merge_catch_up_logs_restart() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); cluster.must_put(b"k1", b"v1"); @@ -242,7 +242,7 @@ fn test_node_merge_catch_up_logs_restart() { #[test] fn test_node_merge_catch_up_logs_leader_election() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.raft_election_timeout_ticks = 25; cluster.cfg.raft_store.raft_log_gc_threshold = 12; @@ -296,7 +296,7 @@ fn test_node_merge_catch_up_logs_leader_election() { #[test] fn test_node_merge_catch_up_logs_no_need() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.raft_election_timeout_ticks = 25; cluster.cfg.raft_store.raft_log_gc_threshold = 12; @@ -366,7 +366,7 @@ fn test_node_merge_catch_up_logs_no_need() { #[test] fn test_node_merge_recover_snapshot() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); let pd_client = Arc::clone(&cluster.pd_client); @@ -424,7 +424,7 @@ fn test_node_merge_multiple_snapshots_not_together() { fn test_node_merge_multiple_snapshots(together: bool) { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -534,7 +534,7 @@ fn test_node_merge_multiple_snapshots(together: bool) { #[test] fn test_node_merge_restart_after_apply_premerge_before_apply_compact_log() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.merge_max_log_gap = 10; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(11); // Rely on this config to trigger a compact log @@ -617,7 +617,7 @@ fn test_node_merge_restart_after_apply_premerge_before_apply_compact_log() { #[test] fn test_node_failed_merge_before_succeed_merge() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.merge_max_log_gap = 30; cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.store_batch_system.pool_size = 2; @@ -706,7 +706,7 @@ fn test_node_failed_merge_before_succeed_merge() { #[test] fn test_node_merge_transfer_leader() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.store_batch_system.pool_size = 2; let pd_client = Arc::clone(&cluster.pd_client); @@ -768,7 +768,7 @@ fn test_node_merge_transfer_leader() { #[test] fn test_node_merge_cascade_merge_with_apply_yield() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -807,7 +807,7 @@ fn test_node_merge_cascade_merge_with_apply_yield() { #[test] fn test_node_multiple_rollback_merge() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(20); let pd_client = Arc::clone(&cluster.pd_client); @@ -1208,7 +1208,7 @@ fn test_node_merge_crash_when_snapshot() { #[test] fn test_prewrite_before_max_ts_is_synced() { let mut cluster = new_server_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); // Transfer leader to node 1 first to ensure all operations happen on node 1 @@ -1265,7 +1265,7 @@ fn test_prewrite_before_max_ts_is_synced() { #[test] fn test_source_peer_read_delegate_after_apply() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1314,7 +1314,7 @@ fn test_source_peer_read_delegate_after_apply() { #[test] fn test_merge_with_concurrent_pessimistic_locking() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; cluster.run(); @@ -1402,7 +1402,7 @@ fn test_merge_with_concurrent_pessimistic_locking() { #[test] fn test_merge_pessimistic_locks_with_concurrent_prewrite() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; let pd_client = Arc::clone(&cluster.pd_client); @@ -1487,7 +1487,7 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { #[test] fn test_retry_pending_prepare_merge_fail() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; let pd_client = Arc::clone(&cluster.pd_client); @@ -1564,7 +1564,7 @@ fn test_retry_pending_prepare_merge_fail() { #[test] fn test_merge_pessimistic_locks_propose_fail() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; let pd_client = Arc::clone(&cluster.pd_client); @@ -1633,7 +1633,7 @@ fn test_merge_pessimistic_locks_propose_fail() { #[test] fn test_destroy_source_peer_while_merging() { let mut cluster = new_node_cluster(0, 5); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_pending_peers.rs b/tests/failpoints/cases/test_pending_peers.rs index 5618bc9ab8e..c41c97034b4 100644 --- a/tests/failpoints/cases/test_pending_peers.rs +++ b/tests/failpoints/cases/test_pending_peers.rs @@ -41,8 +41,8 @@ fn test_pending_peers() { #[test] fn test_pending_snapshot() { let mut cluster = new_node_cluster(0, 3); - configure_for_snapshot(&mut cluster); - let election_timeout = configure_for_lease_read(&mut cluster, None, Some(15)); + configure_for_snapshot(&mut cluster.cfg); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, None, Some(15)); let gc_limit = cluster.cfg.raft_store.raft_log_gc_count_limit(); cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index e228e82830c..a795422c120 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -24,7 +24,7 @@ impl TestSuite { // Disable background renew by setting `renew_interval` to 0, to make timestamp // allocation predictable. configure_for_causal_ts(&mut cluster, "0s", 100); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); cluster.pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 5fe71834e45..9f844f582e4 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -20,7 +20,7 @@ fn test_wait_for_apply_index() { let mut cluster = new_server_cluster(0, 3); // Increase the election tick to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -76,7 +76,7 @@ fn test_wait_for_apply_index() { fn test_duplicate_read_index_ctx() { // Initialize cluster let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); cluster.cfg.raft_store.raft_heartbeat_ticks = 1; let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -155,7 +155,7 @@ fn test_duplicate_read_index_ctx() { fn test_read_before_init() { // Initialize cluster let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -206,7 +206,7 @@ fn test_read_before_init() { fn test_read_applying_snapshot() { // Initialize cluster let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -270,8 +270,8 @@ fn test_read_applying_snapshot() { #[test] fn test_read_after_cleanup_range_for_snap() { let mut cluster = new_server_cluster(1, 3); - configure_for_snapshot(&mut cluster); - configure_for_lease_read(&mut cluster, Some(100), Some(10)); + configure_for_snapshot(&mut cluster.cfg); + configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -365,7 +365,7 @@ fn test_read_after_cleanup_range_for_snap() { #[test] fn test_new_split_learner_can_not_find_leader() { let mut cluster = new_node_cluster(0, 4); - configure_for_lease_read(&mut cluster, Some(5000), None); + configure_for_lease_read(&mut cluster.cfg, Some(5000), None); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -411,7 +411,7 @@ fn test_new_split_learner_can_not_find_leader() { fn test_replica_read_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(100)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -488,7 +488,7 @@ fn test_read_index_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); - configure_for_lease_read(&mut cluster, Some(50), Some(100)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); // Setup cluster and check all peers have data. let region_id = cluster.run_conf_change(); pd_client.must_add_peer(region_id, new_peer(2, 2)); @@ -579,7 +579,7 @@ fn test_read_index_after_transfer_leader() { #[test] fn test_batch_read_index_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(100)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -730,7 +730,7 @@ fn test_read_index_lock_checking_on_follower() { fn test_read_index_lock_checking_on_false_leader() { let mut cluster = new_node_cluster(0, 5); // Use long election timeout and short lease. - configure_for_lease_read(&mut cluster, Some(50), Some(200)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(200)); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(Duration::from_millis(100)); diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index 3dc7223ae41..b7d436d92d7 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -14,14 +14,14 @@ fn prepare_for_stale_read(leader: Peer) -> (Cluster, Arc)>>, + before_run: Option>, ) -> (Cluster, Arc, PeerClient) { let mut cluster = new_server_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); if let Some(f) = before_run { - f(&mut cluster); + f(&mut cluster.cfg); }; cluster.cfg.resolved_ts.enable = true; cluster.run(); diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index a6a4a1824f3..ca329896df1 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -62,7 +62,7 @@ fn test_overlap_cleanup() { #[test] fn test_server_snapshot_on_resolve_failure() { let mut cluster = new_server_cluster(1, 2); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let on_send_store_fp = "transport_on_send_snapshot"; @@ -195,7 +195,7 @@ fn assert_snapshot(snap_dir: &str, region_id: u64, exist: bool) { #[test] fn test_destroy_peer_on_pending_snapshot() { let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -261,7 +261,7 @@ fn test_destroy_peer_on_pending_snapshot() { #[test] fn test_destroy_peer_on_pending_snapshot_and_restart() { let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -387,7 +387,7 @@ fn test_shutdown_when_snap_gc() { #[test] fn test_receive_old_snapshot() { let mut cluster = new_node_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; let pd_client = Arc::clone(&cluster.pd_client); @@ -482,7 +482,7 @@ fn test_receive_old_snapshot() { #[test] fn test_gen_snapshot_with_no_committed_entries_ready() { let mut cluster = new_node_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -571,7 +571,7 @@ fn test_cancel_snapshot_generating() { #[test] fn test_snapshot_gc_after_failed() { let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.snap_gc_timeout = ReadableDuration::millis(300); let pd_client = Arc::clone(&cluster.pd_client); @@ -641,7 +641,7 @@ fn test_snapshot_gc_after_failed() { #[test] fn test_sending_fail_with_net_error() { let mut cluster = new_server_cluster(1, 2); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.snap_gc_timeout = ReadableDuration::millis(300); let pd_client = Arc::clone(&cluster.pd_client); @@ -742,7 +742,7 @@ fn test_snapshot_clean_up_logs_with_unfinished_log_gc() { #[test] fn test_snapshot_recover_from_raft_write_failure() { let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); // Avoid triggering snapshot at final step. cluster.cfg.raft_store.raft_log_gc_count_limit = Some(10); let pd_client = Arc::clone(&cluster.pd_client); @@ -800,7 +800,7 @@ fn test_snapshot_recover_from_raft_write_failure() { #[test] fn test_snapshot_recover_from_raft_write_failure_with_uncommitted_log() { let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); // Avoid triggering snapshot at final step. cluster.cfg.raft_store.raft_log_gc_count_limit = Some(10); let pd_client = Arc::clone(&cluster.pd_client); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 09e87bb8d4d..f3a052c8027 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -268,7 +268,7 @@ impl Filter for PrevoteRangeFilter { #[test] fn test_split_not_to_split_existing_region() { let mut cluster = new_node_cluster(0, 4); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; cluster.cfg.raft_store.apply_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.apply_batch_system.pool_size = 2; @@ -341,7 +341,7 @@ fn test_split_not_to_split_existing_region() { #[test] fn test_split_not_to_split_existing_tombstone_region() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.store_batch_system.pool_size = 2; @@ -409,7 +409,7 @@ fn test_split_not_to_split_existing_tombstone_region() { #[test] fn test_split_continue_when_destroy_peer_after_mem_check() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.store_batch_system.pool_size = 2; @@ -496,7 +496,7 @@ fn test_split_continue_when_destroy_peer_after_mem_check() { #[test] fn test_split_should_split_existing_same_uninitialied_peer() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.store_batch_system.pool_size = 2; @@ -549,7 +549,7 @@ fn test_split_should_split_existing_same_uninitialied_peer() { #[test] fn test_split_not_to_split_existing_different_uninitialied_peer() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); cluster.cfg.raft_store.store_batch_system.pool_size = 2; diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index 1a4ef0b0afc..b171cebd173 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -138,7 +138,7 @@ fn test_stale_learner_restart() { #[test] fn test_stale_peer_destroy_when_apply_snapshot() { let mut cluster = new_node_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index 18ddb865fd9..475ed71a1b0 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -17,7 +17,7 @@ fn stale_read_during_splitting(right_derive: bool) { let count = 3; let mut cluster = new_node_cluster(0, count); cluster.cfg.raft_store.right_derive_when_split = right_derive; - let election_timeout = configure_for_lease_read(&mut cluster, None, None); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, None, None); cluster.run(); // Write the initial values. @@ -215,8 +215,8 @@ fn test_node_stale_read_during_splitting_right_derive() { fn test_stale_read_during_merging() { let count = 3; let mut cluster = new_node_cluster(0, count); - configure_for_merge(&mut cluster); - let election_timeout = configure_for_lease_read(&mut cluster, None, None); + configure_for_merge(&mut cluster.cfg); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, None, None); cluster.cfg.raft_store.right_derive_when_split = false; cluster.cfg.raft_store.pd_heartbeat_tick_interval = cluster.cfg.raft_store.raft_base_tick_interval; @@ -323,7 +323,7 @@ fn test_read_index_when_transfer_leader_2() { let mut cluster = new_node_cluster(0, 3); // Increase the election tick to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); // Stop log compaction to transfer leader with filter easier. configure_for_request_snapshot(&mut cluster); let max_lease = Duration::from_secs(2); @@ -482,8 +482,8 @@ fn test_stale_read_during_merging_2() { let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); - configure_for_merge(&mut cluster); - configure_for_lease_read(&mut cluster, Some(50), Some(20)); + configure_for_merge(&mut cluster.cfg); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); cluster.run(); diff --git a/tests/integrations/raftstore/test_conf_change.rs b/tests/integrations/raftstore/test_conf_change.rs index 63b3feff0e0..500a27ae266 100644 --- a/tests/integrations/raftstore/test_conf_change.rs +++ b/tests/integrations/raftstore/test_conf_change.rs @@ -9,11 +9,10 @@ use std::{ time::Duration, }; -use engine_traits::{Peekable, CF_RAFT}; +use engine_traits::Peekable; use futures::executor::block_on; use kvproto::{ metapb::{self, PeerRole}, - raft_cmdpb::{RaftCmdResponse, RaftResponseHeader}, raft_serverpb::*, }; use pd_client::PdClient; @@ -21,9 +20,27 @@ use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::Result; use test_pd_client::TestPdClient; use test_raftstore::*; -use tikv_util::{config::ReadableDuration, store::is_learner, time::Instant, HandyRwLock}; +use test_raftstore_macro::test_case; +use tikv_util::{config::ReadableDuration, store::is_learner, time::Instant}; + +macro_rules! call_conf_change { + ($cluster:expr, $region_id:expr, $conf_change_type:expr, $peer:expr) => {{ + let conf_change = new_change_peer_request($conf_change_type, $peer); + let epoch = $cluster.pd_client.get_region_epoch($region_id); + let admin_req = new_admin_request($region_id, &epoch, conf_change); + $cluster.call_command_on_leader(admin_req, Duration::from_secs(3)) + }}; +} + +fn new_conf_change_peer(store: &metapb::Store, pd_client: &Arc) -> metapb::Peer { + let peer_id = pd_client.alloc_id().unwrap(); + new_peer(store.get_id(), peer_id) +} -fn test_simple_conf_change(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_server_cluster)] +fn test_server_simple_conf_change() { + let count = 5; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); @@ -99,7 +116,7 @@ fn test_simple_conf_change(cluster: &mut Cluster) { assert_eq!(cluster.get(b"k4"), Some(b"v4".to_vec())); must_get_equal(&engine_2, b"k4", b"v4"); - let resp = call_conf_change(cluster, r1, ConfChangeType::AddNode, new_peer(2, 2)).unwrap(); + let resp = call_conf_change!(cluster, r1, ConfChangeType::AddNode, new_peer(2, 2)).unwrap(); let exec_res = resp .get_header() .get_error() @@ -138,12 +155,13 @@ fn test_simple_conf_change(cluster: &mut Cluster) { // TODO: add more tests. } -fn new_conf_change_peer(store: &metapb::Store, pd_client: &Arc) -> metapb::Peer { - let peer_id = pd_client.alloc_id().unwrap(); - new_peer(store.get_id(), peer_id) -} - -fn test_pd_conf_change(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_pd_conf_change() { + let count = 5; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); @@ -233,27 +251,6 @@ fn test_pd_conf_change(cluster: &mut Cluster) { // TODO: add more tests. } -#[test] -fn test_server_simple_conf_change() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_simple_conf_change(&mut cluster); -} - -#[test] -fn test_node_pd_conf_change() { - let count = 5; - let mut cluster = new_node_cluster(0, count); - test_pd_conf_change(&mut cluster); -} - -#[test] -fn test_server_pd_conf_change() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_pd_conf_change(&mut cluster); -} - fn wait_till_reach_count(pd_client: Arc, region_id: u64, c: usize) { let mut replica_count = 0; for _ in 0..1000 { @@ -273,7 +270,13 @@ fn wait_till_reach_count(pd_client: Arc, region_id: u64, c: usize) ); } -fn test_auto_adjust_replica(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_auto_adjust_replica() { + let count = 7; + let mut cluster = new_cluster(0, count); cluster.start().unwrap(); let pd_client = Arc::clone(&cluster.pd_client); @@ -333,21 +336,19 @@ fn test_auto_adjust_replica(cluster: &mut Cluster) { wait_till_reach_count(Arc::clone(&pd_client), region_id, 5); } -#[test] -fn test_node_auto_adjust_replica() { - let count = 7; - let mut cluster = new_node_cluster(0, count); - test_auto_adjust_replica(&mut cluster); -} - -#[test] -fn test_server_auto_adjust_replica() { - let count = 7; - let mut cluster = new_server_cluster(0, count); - test_auto_adjust_replica(&mut cluster); +macro_rules! find_leader_response_header { + ($cluster:expr, $region_id:expr, $peer:expr) => {{ + let find_leader = new_status_request($region_id, $peer, new_region_leader_cmd()); + let resp = $cluster.call_command(find_leader, Duration::from_secs(5)); + resp.unwrap().take_header() + }}; } -fn test_after_remove_itself(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +fn test_after_remove_itself() { + let count = 3; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); @@ -400,39 +401,25 @@ fn test_after_remove_itself(cluster: &mut Cluster) { cluster.run_node(3).unwrap(); for _ in 0..250 { - let region: RegionLocalState = engine1 - .get_msg_cf(CF_RAFT, &keys::region_state_key(r1)) - .unwrap() - .unwrap(); + let region: RegionLocalState = engine1.region_local_state(r1).unwrap().unwrap(); if region.get_state() == PeerState::Tombstone { return; } sleep_ms(20); } - let region: RegionLocalState = engine1 - .get_msg_cf(CF_RAFT, &keys::region_state_key(r1)) - .unwrap() - .unwrap(); + let region: RegionLocalState = engine1.region_local_state(r1).unwrap().unwrap(); assert_eq!(region.get_state(), PeerState::Tombstone); // TODO: add split after removing itself test later. } -#[test] -fn test_node_after_remove_itself() { - let count = 3; - let mut cluster = new_node_cluster(0, count); - test_after_remove_itself(&mut cluster); -} - -#[test] -fn test_server_after_remove_itself() { - let count = 3; - let mut cluster = new_server_cluster(0, count); - test_after_remove_itself(&mut cluster); -} - -fn test_split_brain(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_split_brain() { + let count = 6; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -488,7 +475,7 @@ fn test_split_brain(cluster: &mut Cluster) { // check whether a new cluster [1,2,3] is formed // if so, both [1,2,3] and [4,5,6] think they serve for region r1 // result in split brain - let header0 = find_leader_response_header(cluster, r1, new_peer(2, 2)); + let header0 = find_leader_response_header!(cluster, r1, new_peer(2, 2)); assert!(header0.get_error().has_region_not_found()); // at least wait for a round of election timeout and check again @@ -496,36 +483,17 @@ fn test_split_brain(cluster: &mut Cluster) { let election_timeout = base_tick * cluster.cfg.raft_store.raft_election_timeout_ticks as u32; thread::sleep(election_timeout * 2); - let header1 = find_leader_response_header(cluster, r1, new_peer(2, 2)); + let header1 = find_leader_response_header!(cluster, r1, new_peer(2, 2)); assert!(header1.get_error().has_region_not_found()); } -fn find_leader_response_header( - cluster: &mut Cluster, - region_id: u64, - peer: metapb::Peer, -) -> RaftResponseHeader { - let find_leader = new_status_request(region_id, peer, new_region_leader_cmd()); - let resp = cluster.call_command(find_leader, Duration::from_secs(5)); - resp.unwrap().take_header() -} - -#[test] -fn test_server_split_brain() { - let count = 6; - let mut cluster = new_server_cluster(0, count); - test_split_brain(&mut cluster); -} - -#[test] -fn test_node_split_brain() { - let count = 6; - let mut cluster = new_node_cluster(0, count); - test_split_brain(&mut cluster); -} - -/// A helper function for testing the conf change is safe. -fn test_conf_change_safe(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_conf_change_safe() { + let count = 5; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); @@ -589,7 +557,11 @@ fn test_conf_change_safe(cluster: &mut Cluster) { pd_client.must_remove_peer(region_id, new_peer(2, 2)); } -fn test_transfer_leader_safe(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_transfer_leader_safe() { + let count = 5; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); @@ -636,7 +608,40 @@ fn test_transfer_leader_safe(cluster: &mut Cluster) { } } -fn test_learner_conf_change(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_conf_change_remove_leader() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.allow_remove_leader = false; + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let r1 = cluster.run_conf_change(); + pd_client.must_add_peer(r1, new_peer(2, 2)); + pd_client.must_add_peer(r1, new_peer(3, 3)); + + // Transfer leader to the first peer. + cluster.must_transfer_leader(r1, new_peer(1, 1)); + // Put a new kv to ensure leader has applied to newest log, so that to avoid + // false warning about pending conf change. + cluster.must_put(b"k1", b"v1"); + + // Try to remove leader, which should be ignored. + let res = call_conf_change!(cluster, r1, ConfChangeType::RemoveNode, new_peer(1, 1)).unwrap(); + assert!( + res.get_header() + .get_error() + .get_message() + .contains("ignore remove leader"), + "{:?}", + res + ); +} + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_learner_conf_change() { + let count = 5; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); @@ -654,11 +659,11 @@ fn test_learner_conf_change(cluster: &mut Cluster) { must_get_equal(&engine_4, b"k2", b"v2"); // Can't add duplicate learner. - let resp = call_conf_change( + let resp = call_conf_change!( cluster, r1, ConfChangeType::AddLearnerNode, - new_learner_peer(4, 11), + new_learner_peer(4, 11) ) .unwrap(); let err_msg = resp.get_header().get_error().get_message(); @@ -702,7 +707,7 @@ fn test_learner_conf_change(cluster: &mut Cluster) { } else { ConfChangeType::AddNode }; - call_conf_change(cluster, r1, conf_type, peer).unwrap() + call_conf_change!(cluster, r1, conf_type, peer).unwrap() }; // Add learner on store which already has peer. @@ -726,67 +731,10 @@ fn test_learner_conf_change(cluster: &mut Cluster) { pd_client.must_none_peer(r1, new_peer(4, 15)); } -#[test] -fn test_node_conf_change_safe() { - let count = 5; - let mut cluster = new_node_cluster(0, count); - test_conf_change_safe(&mut cluster); -} - -#[test] -fn test_server_safe_conf_change() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_conf_change_safe(&mut cluster); -} - -#[test] -fn test_server_transfer_leader_safe() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_transfer_leader_safe(&mut cluster); -} - -#[test] -fn test_conf_change_remove_leader() { - let mut cluster = new_node_cluster(0, 3); - cluster.cfg.raft_store.allow_remove_leader = false; - let pd_client = Arc::clone(&cluster.pd_client); - pd_client.disable_default_operator(); - let r1 = cluster.run_conf_change(); - pd_client.must_add_peer(r1, new_peer(2, 2)); - pd_client.must_add_peer(r1, new_peer(3, 3)); - - // Transfer leader to the first peer. - cluster.must_transfer_leader(r1, new_peer(1, 1)); - // Put a new kv to ensure leader has applied to newest log, so that to avoid - // false warning about pending conf change. - cluster.must_put(b"k1", b"v1"); - - // Try to remove leader, which should be ignored. - let res = - call_conf_change(&mut cluster, r1, ConfChangeType::RemoveNode, new_peer(1, 1)).unwrap(); - assert!( - res.get_header() - .get_error() - .get_message() - .contains("ignore remove leader"), - "{:?}", - res - ); -} - -#[test] -fn test_node_learner_conf_change() { - let count = 5; - let mut cluster = new_node_cluster(0, count); - test_learner_conf_change(&mut cluster); -} - -#[test] +#[test_case(test_raftstore::new_server_cluster)] fn test_learner_with_slow_snapshot() { - let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + let mut cluster = new_cluster(0, 3); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); @@ -829,7 +777,7 @@ fn test_learner_with_slow_snapshot() { }); // New added learner should keep pending until snapshot is applied. - cluster.sim.wl().add_send_filter(1, snap_filter); + cluster.add_send_filter_on_node(1, snap_filter); pd_client.must_add_peer(r1, new_learner_peer(2, 2)); for _ in 0..500 { sleep_ms(10); @@ -864,7 +812,12 @@ fn test_learner_with_slow_snapshot() { assert!(count.load(Ordering::SeqCst) > 0); } -fn test_stale_peer(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_stale_peer() { + let mut cluster = new_cluster(0, 4); + // To avoid stale peers know they are stale from PD. + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::hours(2); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -888,37 +841,15 @@ fn test_stale_peer(cluster: &mut Cluster) { must_get_none(&cluster.get_engine(3), b"k1"); } -#[test] -fn test_node_stale_peer() { - let mut cluster = new_node_cluster(0, 4); - // To avoid stale peers know they are stale from PD. - cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::hours(2); - test_stale_peer(&mut cluster); -} - -fn call_conf_change( - cluster: &mut Cluster, - region_id: u64, - conf_change_type: ConfChangeType, - peer: metapb::Peer, -) -> Result -where - T: Simulator, -{ - let conf_change = new_change_peer_request(conf_change_type, peer); - let epoch = cluster.pd_client.get_region_epoch(region_id); - let admin_req = new_admin_request(region_id, &epoch, conf_change); - cluster.call_command_on_leader(admin_req, Duration::from_secs(3)) -} - /// Tests if conf change relies on heartbeat. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_conf_change_fast() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Sets heartbeat timeout to more than 5 seconds. It also changes the election // timeout, but it's OK as the cluster starts with only one peer, it will // campaigns immediately. - configure_for_lease_read(&mut cluster, Some(5000), None); + configure_for_lease_read(&mut cluster.cfg, Some(5000), None); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index e50ca59fdff..eec5ea9b94c 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -285,7 +285,7 @@ fn test_flashback_for_read() { #[test] fn test_flashback_for_local_read() { let mut cluster = new_node_cluster(0, 3); - let election_timeout = configure_for_lease_read(&mut cluster, Some(50), None); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(50), None); // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; cluster.run(); @@ -384,7 +384,7 @@ fn test_flashback_for_check_is_in_persist() { #[test] fn test_flashback_for_apply_snapshot() { let mut cluster = new_node_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(3, 3)); diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 855063bae98..6d8319ebae6 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -37,7 +37,7 @@ fn test_renew_lease(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. // Use large election timeout to make leadership stable. - configure_for_lease_read(cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); // Override max leader lease to 2 seconds. let max_lease = Duration::from_secs(2); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); @@ -132,7 +132,7 @@ fn test_lease_expired(cluster: &mut Cluster) { // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. - let election_timeout = configure_for_lease_read(cluster, Some(50), None); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(50), None); let node_id = 3u64; let store_id = 3u64; @@ -174,7 +174,7 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. - let election_timeout = configure_for_lease_read(cluster, Some(500), Some(5)); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(500), Some(5)); cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); cluster.cfg.raft_store.renew_leader_lease_advance_duration = ReadableDuration::secs(0); @@ -297,7 +297,7 @@ fn test_batch_id_in_lease(cluster: &mut Cluster) { cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); // Increase the Raft tick interval to make this test case running reliably. - let election_timeout = configure_for_lease_read(cluster, Some(100), None); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(100), None); cluster.run(); let (split_key1, split_key2) = (b"k22", b"k44"); @@ -402,7 +402,7 @@ fn test_node_callback_when_destroyed() { let count = 3; let mut cluster = new_node_cluster(0, count); // Increase the election tick to make this test case running reliably. - configure_for_lease_read(&mut cluster, None, Some(50)); + configure_for_lease_read(&mut cluster.cfg, None, Some(50)); cluster.run(); cluster.must_put(b"k1", b"v1"); let leader = cluster.leader_of_region(1).unwrap(); @@ -457,7 +457,7 @@ fn test_lease_read_callback_destroy() { // Only server cluster can fake sending message successfully in raftstore layer. let mut cluster = new_server_cluster(0, 3); // Increase the Raft tick interval to make this test case running reliably. - let election_timeout = configure_for_lease_read(&mut cluster, Some(50), None); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -480,7 +480,7 @@ fn test_read_index_stale_in_suspect_lease() { let mut cluster = new_node_cluster(0, 3); // Increase the election tick to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let max_lease = Duration::from_secs(2); // Stop log compaction to transfer leader with filter easier. configure_for_request_snapshot(&mut cluster); @@ -581,7 +581,7 @@ fn test_read_index_stale_in_suspect_lease() { #[test] fn test_local_read_cache() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), None); + configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.pd_client.disable_default_operator(); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -616,7 +616,7 @@ fn test_not_leader_read_lease() { // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(50), None); + configure_for_lease_read(&mut cluster.cfg, Some(50), None); let heartbeat_interval = cluster.cfg.raft_store.raft_heartbeat_interval(); cluster.run(); @@ -671,7 +671,7 @@ fn test_not_leader_read_lease() { #[test] fn test_read_index_after_write() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10)); let heartbeat_interval = cluster.cfg.raft_store.raft_heartbeat_interval(); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -737,7 +737,7 @@ fn test_infinite_lease() { cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. // Use large election timeout to make leadership stable. - configure_for_lease_read(&mut cluster, Some(50), Some(10_000)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); // Override max leader lease to 2 seconds. let max_lease = Duration::from_secs(2); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); @@ -792,7 +792,7 @@ fn test_node_local_read_renew_lease() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(500); let (base_tick_ms, election_ticks) = (50, 10); - configure_for_lease_read(&mut cluster, Some(50), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10)); cluster.pd_client.disable_default_operator(); let region_id = cluster.run_conf_change(); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index c72ba5ac595..de1187f35b1 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -5,7 +5,6 @@ use std::{iter::*, sync::*, thread, time::*}; use api_version::{test_kv_format_impl, KvFormat}; use engine_traits::{Peekable, CF_LOCK, CF_RAFT, CF_WRITE}; use kvproto::{ - kvrpcpb::Context, raft_cmdpb::CmdType, raft_serverpb::{PeerState, RaftMessage, RegionLocalState}, }; @@ -13,10 +12,7 @@ use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; -use tikv::storage::{ - kv::{SnapContext, SnapshotExt}, - Engine, Snapshot, -}; +use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; use txn_types::{Key, PessimisticLock}; @@ -25,7 +21,7 @@ use txn_types::{Key, PessimisticLock}; fn test_node_base_merge() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.rocksdb.titan.enabled = true; - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); @@ -113,7 +109,7 @@ fn test_node_base_merge() { #[test] fn test_node_merge_with_slow_learner() { let mut cluster = new_node_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.raft_log_gc_threshold = 40; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(40); cluster.cfg.raft_store.merge_max_log_gap = 15; @@ -188,7 +184,7 @@ fn test_node_merge_with_slow_learner() { #[test] fn test_node_merge_prerequisites_check() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); cluster.run(); @@ -269,7 +265,7 @@ fn test_node_merge_prerequisites_check() { #[test] fn test_node_check_merged_message() { let mut cluster = new_node_cluster(0, 4); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -340,7 +336,7 @@ fn test_node_merge_slow_split_left() { // merge. fn test_node_merge_slow_split(is_right_derive: bool) { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -405,7 +401,7 @@ fn test_node_merge_slow_split(is_right_derive: bool) { #[test] fn test_node_merge_dist_isolation() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -481,7 +477,7 @@ fn test_node_merge_dist_isolation() { #[test] fn test_node_merge_brain_split() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); @@ -660,9 +656,9 @@ fn test_merge_approximate_size_and_keys() { #[test] fn test_node_merge_update_region() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); // Election timeout and max leader lease is 1s. - configure_for_lease_read(&mut cluster, Some(100), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); cluster.run(); @@ -740,7 +736,7 @@ fn test_node_merge_update_region() { #[test] fn test_node_merge_catch_up_logs_empty_entries() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); cluster.must_put(b"k1", b"v1"); @@ -795,7 +791,7 @@ fn test_node_merge_catch_up_logs_empty_entries() { #[test] fn test_merge_with_slow_promote() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -837,7 +833,7 @@ fn test_merge_with_slow_promote() { #[test] fn test_merge_isolated_store_with_no_target_peer() { let mut cluster = new_node_cluster(0, 4); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); cluster.cfg.raft_store.right_derive_when_split = true; let pd_client = Arc::clone(&cluster.pd_client); @@ -896,7 +892,7 @@ fn test_merge_isolated_store_with_no_target_peer() { #[test] fn test_merge_cascade_merge_isolated() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -947,7 +943,7 @@ fn test_merge_cascade_merge_isolated() { #[test] fn test_merge_isolated_not_in_merge_learner() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -993,7 +989,7 @@ fn test_merge_isolated_not_in_merge_learner() { #[test] fn test_merge_isolated_stale_learner() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; // Do not rely on pd to remove stale peer cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::hours(2); @@ -1044,7 +1040,7 @@ fn test_merge_isolated_stale_learner() { #[test] fn test_merge_isolated_not_in_merge_learner_2() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1089,7 +1085,7 @@ fn test_merge_isolated_not_in_merge_learner_2() { #[test] fn test_merge_remove_target_peer_isolated() { let mut cluster = new_node_cluster(0, 4); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1151,7 +1147,7 @@ fn test_sync_max_ts_after_region_merge() { fn test_sync_max_ts_after_region_merge_impl() { let mut cluster = new_server_cluster_with_api_ver(0, 3, F::TAG); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); // Transfer leader to node 1 first to ensure all operations happen on node 1 @@ -1166,45 +1162,14 @@ fn test_sync_max_ts_after_region_merge_impl() { let right = cluster.get_region(b"k3"); let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); - let mut storage = cluster - .sim - .read() - .unwrap() - .storages - .get(&1) - .unwrap() - .clone(); - let mut wait_for_synced = |cluster: &mut Cluster| { - let region_id = right.get_id(); - let leader = cluster.leader_of_region(region_id).unwrap(); - let epoch = cluster.get_region_epoch(region_id); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader); - ctx.set_region_epoch(epoch); - let snap_ctx = SnapContext { - pb_ctx: &ctx, - ..Default::default() - }; - let snapshot = storage.snapshot(snap_ctx).unwrap(); - let txn_ext = snapshot.txn_ext.clone().unwrap(); - for retry in 0..10 { - if txn_ext.is_max_ts_synced() { - break; - } - thread::sleep(Duration::from_millis(1 << retry)); - } - assert!(snapshot.ext().is_max_ts_synced()); - }; - - wait_for_synced(&mut cluster); + wait_for_synced(&mut cluster, 1, 1); let max_ts = cm.max_ts(); cluster.pd_client.trigger_tso_failure(); // Merge left to right cluster.pd_client.must_merge(left.get_id(), right.get_id()); - wait_for_synced(&mut cluster); + wait_for_synced(&mut cluster, 1, 1); let new_max_ts = cm.max_ts(); assert!(new_max_ts > max_ts); } @@ -1214,8 +1179,8 @@ fn test_sync_max_ts_after_region_merge_impl() { #[test] fn test_merge_snapshot_demote() { let mut cluster = new_node_cluster(0, 4); - configure_for_merge(&mut cluster); - configure_for_snapshot(&mut cluster); + configure_for_merge(&mut cluster.cfg); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1270,7 +1235,7 @@ fn test_merge_snapshot_demote() { #[test] fn test_propose_in_memory_pessimistic_locks() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1349,7 +1314,7 @@ fn test_propose_in_memory_pessimistic_locks() { #[test] fn test_merge_pessimistic_locks_when_gap_is_too_large() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; // Set raft_entry_max_size to 64 KiB. We will try to make the gap larger than @@ -1399,7 +1364,7 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { #[test] fn test_merge_pessimistic_locks_repeated_merge() { let mut cluster = new_server_cluster(0, 2); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; let pd_client = Arc::clone(&cluster.pd_client); @@ -1466,7 +1431,7 @@ fn test_merge_pessimistic_locks_repeated_merge() { #[test] fn test_node_merge_long_isolated() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); ignore_merge_target_integrity(&mut cluster); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1528,7 +1493,7 @@ fn test_node_merge_long_isolated() { #[test] fn test_stale_message_after_merge() { let mut cluster = new_server_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1576,7 +1541,7 @@ fn test_stale_message_after_merge() { #[test] fn test_prepare_merge_with_reset_matched() { let mut cluster = new_server_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); let r = cluster.run_conf_change(); @@ -1625,7 +1590,7 @@ fn test_prepare_merge_with_reset_matched() { #[test] fn test_prepare_merge_with_5_nodes_snapshot() { let mut cluster = new_server_cluster(0, 5); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); cluster.run(); diff --git a/tests/integrations/raftstore/test_prevote.rs b/tests/integrations/raftstore/test_prevote.rs index a4336e9f3ed..c81b34f0435 100644 --- a/tests/integrations/raftstore/test_prevote.rs +++ b/tests/integrations/raftstore/test_prevote.rs @@ -50,7 +50,7 @@ fn test_prevote( cluster.cfg.raft_store.hibernate_regions = false; // To stable the test, we use a large election timeout to make // leader's readiness get handle within an election timeout - configure_for_lease_read(cluster, Some(20), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(20), Some(10)); let leader_id = 1; let detect_during_failure = detect_during_failure.into(); diff --git a/tests/integrations/raftstore/test_region_info_accessor.rs b/tests/integrations/raftstore/test_region_info_accessor.rs index 9bff000194a..24d90b66327 100644 --- a/tests/integrations/raftstore/test_region_info_accessor.rs +++ b/tests/integrations/raftstore/test_region_info_accessor.rs @@ -172,7 +172,7 @@ fn test_region_info_accessor_impl(cluster: &mut Cluster, c: &Region #[test] fn test_node_cluster_region_info_accessor() { let mut cluster = new_node_cluster(1, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 6deccad3a5e..16fad00a59b 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -58,7 +58,7 @@ fn test_replica_read_not_applied() { let mut cluster = new_node_cluster(0, 3); // Increase the election tick to make this test case running reliably. - configure_for_lease_read(&mut cluster, Some(50), Some(30)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(30)); let max_lease = Duration::from_secs(1); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); // After the leader has committed to its term, pending reads on followers can be @@ -129,7 +129,7 @@ fn test_replica_read_not_applied() { fn test_replica_read_on_hibernate() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(20)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); cluster.pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); @@ -191,7 +191,7 @@ fn test_replica_read_on_hibernate() { fn test_read_hibernated_region() { let mut cluster = new_node_cluster(0, 3); // Initialize the cluster. - configure_for_lease_read(&mut cluster, Some(100), Some(8)); + configure_for_lease_read(&mut cluster.cfg, Some(100), Some(8)); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(Duration::from_millis(1)); cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); cluster.pd_client.disable_default_operator(); @@ -254,7 +254,7 @@ fn test_read_hibernated_region() { fn test_replica_read_on_stale_peer() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), Some(30)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(30)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -288,7 +288,7 @@ fn test_read_index_out_of_order() { let mut cluster = new_node_cluster(0, 2); // Use long election timeout and short lease. - configure_for_lease_read(&mut cluster, Some(1000), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(1000), Some(10)); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(Duration::from_millis(100)); @@ -328,7 +328,7 @@ fn test_read_index_retry_lock_checking() { let mut cluster = new_node_cluster(0, 2); // Use long election timeout and short lease. - configure_for_lease_read(&mut cluster, Some(50), Some(20)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(Duration::from_millis(100)); @@ -402,7 +402,7 @@ fn test_split_isolation() { let mut cluster = new_node_cluster(0, 2); // Use long election timeout and short lease. configure_for_hibernate(&mut cluster); - configure_for_lease_read(&mut cluster, Some(50), Some(20)); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(11); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -461,7 +461,7 @@ fn test_split_isolation() { #[test] fn test_read_local_after_snapshpot_replace_peer() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), None); + configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); @@ -527,7 +527,7 @@ fn test_read_local_after_snapshpot_replace_peer() { #[test] fn test_malformed_read_index() { let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(50), None); + configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); cluster.cfg.raft_store.hibernate_regions = true; diff --git a/tests/integrations/raftstore/test_single.rs b/tests/integrations/raftstore/test_single.rs index b7fcb6a7b34..7fedc3c1cd4 100644 --- a/tests/integrations/raftstore/test_single.rs +++ b/tests/integrations/raftstore/test_single.rs @@ -3,15 +3,18 @@ use std::time::Duration; use engine_traits::{CfName, CF_DEFAULT, CF_WRITE}; -use raftstore::store::*; +use raftstore::store::RAFT_INIT_LOG_INDEX; use rand::prelude::*; -use test_raftstore::*; +use test_raftstore::{new_put_cf_cmd, new_put_cmd, new_request, sleep_ms}; use test_raftstore_macro::test_case; use tikv_util::{config::*, time::Instant}; // TODO add epoch not match test cases. -fn test_delete_range(cluster: &mut Cluster, cf: CfName) { +fn test_delete_range( + cluster: &mut test_raftstore::Cluster, + cf: CfName, +) { let data_set: Vec<_> = (1..500) .map(|i| { ( @@ -44,19 +47,10 @@ fn test_delete_range(cluster: &mut Cluster, cf: CfName) { } } -fn test_put_large_entry(cluster: &mut Cluster) { - let max_size: usize = 1024; - cluster.cfg.raft_store.raft_entry_max_size = ReadableSize(max_size as u64); - - cluster.run(); - - let large_value = vec![b'v'; max_size + 1]; - let res = cluster.put(b"key", large_value.as_slice()); - assert!(res.as_ref().err().unwrap().has_raft_entry_too_large()); -} - #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_put() { let mut cluster = new_cluster(0, 1); cluster.run(); @@ -103,6 +97,8 @@ fn test_put() { #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_delete() { let mut cluster = new_cluster(0, 1); cluster.run(); @@ -133,7 +129,7 @@ fn test_delete() { #[test] fn test_node_use_delete_range() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = test_raftstore::new_node_cluster(0, 1); cluster.cfg.raft_store.use_delete_range = true; cluster.run(); test_delete_range(&mut cluster, CF_DEFAULT); @@ -143,7 +139,7 @@ fn test_node_use_delete_range() { #[test] fn test_node_not_use_delete_range() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = test_raftstore::new_node_cluster(0, 1); cluster.cfg.raft_store.use_delete_range = false; cluster.run(); test_delete_range(&mut cluster, CF_DEFAULT); @@ -153,6 +149,8 @@ fn test_node_not_use_delete_range() { #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_wrong_store_id() { let mut cluster = new_cluster(0, 1); cluster.run(); @@ -178,21 +176,25 @@ fn test_wrong_store_id() { ); } -#[test] -fn test_node_put_large_entry() { - let mut cluster = new_node_cluster(0, 1); - test_put_large_entry(&mut cluster); -} +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_put_large_entry() { + let mut cluster = new_cluster(0, 1); + let max_size: usize = 1024; + cluster.cfg.raft_store.raft_entry_max_size = ReadableSize(max_size as u64); -#[test] -fn test_server_put_large_entry() { - let mut cluster = new_server_cluster(0, 1); - test_put_large_entry(&mut cluster); + cluster.run(); + + let large_value = vec![b'v'; max_size + 1]; + let res = cluster.put(b"key", large_value.as_slice()); + assert!(res.as_ref().err().unwrap().has_raft_entry_too_large()); } #[test] fn test_node_apply_no_op() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = test_raftstore::new_node_cluster(0, 1); cluster.pd_client.disable_default_operator(); cluster.run(); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 8d3212ad4a6..d18f42ec8ca 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -100,7 +100,7 @@ fn test_server_huge_snapshot_multi_files() { fn test_server_snap_gc_internal(version: &str) { let mut cluster = new_server_cluster(0, 3); - configure_for_snapshot(&mut cluster); + configure_for_snapshot(&mut cluster.cfg); cluster.pd_client.reset_version(version); cluster.cfg.raft_store.snap_gc_timeout = ReadableDuration::millis(300); cluster.cfg.raft_store.max_snapshot_file_raw_size = ReadableSize::mb(100); @@ -269,7 +269,7 @@ fn test_server_concurrent_snap() { } fn test_cf_snapshot(cluster: &mut Cluster) { - configure_for_snapshot(cluster); + configure_for_snapshot(&mut cluster.cfg); cluster.run(); let cf = "lock"; @@ -443,7 +443,7 @@ impl Filter for SnapshotAppendFilter { } fn test_snapshot_with_append(cluster: &mut Cluster) { - configure_for_snapshot(cluster); + configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 23c3b0b41c2..4b7914f7324 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -17,6 +17,7 @@ use raftstore::{ Result, }; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::config::*; use txn_types::{Key, PessimisticLock}; @@ -24,88 +25,83 @@ use txn_types::{Key, PessimisticLock}; pub const REGION_MAX_SIZE: u64 = 50000; pub const REGION_SPLIT_SIZE: u64 = 30000; -fn test_base_split_region(cluster: &mut Cluster, split: F, right_derive: bool) -where - T: Simulator, - F: Fn(&mut Cluster, &metapb::Region, &[u8]), -{ - cluster.cfg.raft_store.right_derive_when_split = right_derive; - cluster.run(); - - let pd_client = Arc::clone(&cluster.pd_client); - - let tbls = vec![ - (b"k22", b"k11", b"k33"), - (b"k11", b"k00", b"k11"), - (b"k33", b"k22", b"k33"), - ]; - - for (split_key, left_key, right_key) in tbls { - cluster.must_put(left_key, b"v1"); - cluster.must_put(right_key, b"v3"); - - // Left and right key must be in same region before split. - let region = pd_client.get_region(left_key).unwrap(); - let region2 = pd_client.get_region(right_key).unwrap(); - assert_eq!(region.get_id(), region2.get_id()); - - // Split with split_key, so left_key must in left, and right_key in right. - split(cluster, ®ion, split_key); - - let left = pd_client.get_region(left_key).unwrap(); - let right = pd_client.get_region(right_key).unwrap(); - - assert_eq!( - region.get_id(), - if right_derive { - right.get_id() - } else { - left.get_id() - } - ); - assert_eq!(region.get_start_key(), left.get_start_key()); - assert_eq!(left.get_end_key(), right.get_start_key()); - assert_eq!(region.get_end_key(), right.get_end_key()); - - cluster.must_put(left_key, b"vv1"); - assert_eq!(cluster.get(left_key).unwrap(), b"vv1".to_vec()); - - cluster.must_put(right_key, b"vv3"); - assert_eq!(cluster.get(right_key).unwrap(), b"vv3".to_vec()); - - let epoch = left.get_region_epoch().clone(); - let get = new_request(left.get_id(), epoch, vec![new_get_cmd(right_key)], false); - debug!("requesting {:?}", get); - let resp = cluster - .call_command_on_leader(get, Duration::from_secs(5)) - .unwrap(); - assert!(resp.get_header().has_error(), "{:?}", resp); - assert!( - resp.get_header().get_error().has_key_not_in_region(), - "{:?}", - resp - ); - } -} - -#[test] -fn test_server_base_split_region_left_derive() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_base_split_region(&mut cluster, Cluster::must_split, false); -} +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_server_base_split_region() { + let test_base_split_region = |right_derive| { + let count = 5; + let mut cluster = new_cluster(0, count); + + cluster.cfg.raft_store.right_derive_when_split = right_derive; + cluster.run(); + + let pd_client = Arc::clone(&cluster.pd_client); + + let tbls = vec![ + (b"k22", b"k11", b"k33"), + (b"k11", b"k00", b"k11"), + (b"k33", b"k22", b"k33"), + ]; + + for (split_key, left_key, right_key) in tbls { + cluster.must_put(left_key, b"v1"); + cluster.must_put(right_key, b"v3"); + + // Left and right key must be in same region before split. + let region = pd_client.get_region(left_key).unwrap(); + let region2 = pd_client.get_region(right_key).unwrap(); + assert_eq!(region.get_id(), region2.get_id()); + + // Split with split_key, so left_key must in left, and right_key in right. + cluster.must_split(®ion, split_key); + + let left = pd_client.get_region(left_key).unwrap(); + let right = pd_client.get_region(right_key).unwrap(); + + assert_eq!( + region.get_id(), + if right_derive { + right.get_id() + } else { + left.get_id() + } + ); + assert_eq!(region.get_start_key(), left.get_start_key()); + assert_eq!(left.get_end_key(), right.get_start_key()); + assert_eq!(region.get_end_key(), right.get_end_key()); + + cluster.must_put(left_key, b"vv1"); + assert_eq!(cluster.get(left_key).unwrap(), b"vv1".to_vec()); + + cluster.must_put(right_key, b"vv3"); + assert_eq!(cluster.get(right_key).unwrap(), b"vv3".to_vec()); + + let epoch = left.get_region_epoch().clone(); + let get = new_request(left.get_id(), epoch, vec![new_get_cmd(right_key)], false); + debug!("requesting {:?}", get); + let resp = cluster + .call_command_on_leader(get, Duration::from_secs(5)) + .unwrap(); + assert!(resp.get_header().has_error(), "{:?}", resp); + assert!( + resp.get_header().get_error().has_key_not_in_region(), + "{:?}", + resp + ); + } + }; -#[test] -fn test_server_base_split_region_right_derive() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_base_split_region(&mut cluster, Cluster::must_split, true); + // left derive + test_base_split_region(false); + // right derive + test_base_split_region(true); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_server_split_region_twice() { let count = 5; - let mut cluster = new_server_cluster(0, count); + let mut cluster = new_cluster(0, count); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -308,13 +304,13 @@ fn check_cluster(cluster: &mut Cluster, k: &[u8], v: &[u8], all_ /// sure broadcast commit is disabled when split. #[test] fn test_delay_split_region() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = test_raftstore::new_server_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(500); cluster.cfg.raft_store.merge_max_log_gap = 100; cluster.cfg.raft_store.raft_log_gc_threshold = 500; // To stable the test, we use a large hearbeat timeout 200ms(100ms * 2). // And to elect leader quickly, set election timeout to 1s(100ms * 10). - configure_for_lease_read(&mut cluster, Some(100), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); // We use three nodes for this test. cluster.run(); @@ -359,7 +355,12 @@ fn test_delay_split_region() { check_cluster(&mut cluster, b"k6", b"v6", false); } -fn test_split_overlap_snapshot(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_node_split_overlap_snapshot() { + let mut cluster = new_cluster(0, 3); // We use three nodes([1, 2, 3]) for this test. cluster.run(); @@ -410,19 +411,9 @@ fn test_split_overlap_snapshot(cluster: &mut Cluster) { must_get_equal(&engine3, b"k3", b"v3"); } -#[test] -fn test_node_split_overlap_snapshot() { - let mut cluster = new_node_cluster(0, 3); - test_split_overlap_snapshot(&mut cluster); -} - -#[test] -fn test_server_split_overlap_snapshot() { - let mut cluster = new_server_cluster(0, 3); - test_split_overlap_snapshot(&mut cluster); -} - -fn test_apply_new_version_snapshot(cluster: &mut Cluster) { +fn test_apply_new_version_snapshot( + cluster: &mut test_raftstore::Cluster, +) { // truncate the log quickly so that we can force sending snapshot. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(5); @@ -477,19 +468,19 @@ fn test_apply_new_version_snapshot(cluster: &mut Cluster) { #[test] fn test_node_apply_new_version_snapshot() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = test_raftstore::new_node_cluster(0, 3); test_apply_new_version_snapshot(&mut cluster); } #[test] fn test_server_apply_new_version_snapshot() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = test_raftstore::new_server_cluster(0, 3); test_apply_new_version_snapshot(&mut cluster); } #[test] fn test_server_split_with_stale_peer() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = test_raftstore::new_server_cluster(0, 3); // disable raft log gc. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); @@ -557,7 +548,11 @@ fn test_server_split_with_stale_peer() { must_get_equal(&engine3, b"k3", b"v3"); } -fn test_split_region_diff_check(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +fn test_split_region_diff_check() { + let count = 1; + let mut cluster = new_cluster(0, count); let region_max_size = 2000; let region_split_size = 1000; cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(100); @@ -575,7 +570,7 @@ fn test_split_region_diff_check(cluster: &mut Cluster) { // The default size index distance is too large for small data, we flush // multiple times to generate more size index handles. for _ in 0..10 { - put_till_size(cluster, region_max_size, &mut range); + put_till_size(&mut cluster, region_max_size, &mut range); } // Peer will split when size of region meet region_max_size, so assume the last @@ -602,20 +597,6 @@ fn test_split_region_diff_check(cluster: &mut Cluster) { } } -#[test] -fn test_server_split_region_diff_check() { - let count = 1; - let mut cluster = new_server_cluster(0, count); - test_split_region_diff_check(&mut cluster); -} - -#[test] -fn test_node_split_region_diff_check() { - let count = 1; - let mut cluster = new_node_cluster(0, count); - test_split_region_diff_check(&mut cluster); -} - // Test steps // set max region size/split size 2000 and put data till 1000 // set max region size/split size < 1000 and reboot @@ -623,7 +604,7 @@ fn test_node_split_region_diff_check() { #[test] fn test_node_split_region_after_reboot_with_config_change() { let count = 1; - let mut cluster = new_server_cluster(0, count); + let mut cluster = test_raftstore::new_server_cluster(0, count); let region_max_size = 2000; let region_split_size = 2000; cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); @@ -664,7 +645,10 @@ fn test_node_split_region_after_reboot_with_config_change() { } } -fn test_split_epoch_not_match(cluster: &mut Cluster, right_derive: bool) { +fn test_split_epoch_not_match( + cluster: &mut test_raftstore::Cluster, + right_derive: bool, +) { cluster.cfg.raft_store.right_derive_when_split = right_derive; cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -736,33 +720,39 @@ fn test_split_epoch_not_match(cluster: &mut Cluster, right_deri #[test] fn test_server_split_epoch_not_match_left_derive() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = test_raftstore::new_server_cluster(0, 3); test_split_epoch_not_match(&mut cluster, false); } #[test] fn test_server_split_epoch_not_match_right_derive() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = test_raftstore::new_server_cluster(0, 3); test_split_epoch_not_match(&mut cluster, true); } #[test] fn test_node_split_epoch_not_match_left_derive() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = test_raftstore::new_node_cluster(0, 3); test_split_epoch_not_match(&mut cluster, false); } #[test] fn test_node_split_epoch_not_match_right_derive() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = test_raftstore::new_node_cluster(0, 3); test_split_epoch_not_match(&mut cluster, true); } -// For the peer which is the leader of the region before split, it should -// campaigns immediately. and then this peer may take the leadership -// earlier. `test_quick_election_after_split` is a helper function for testing -// this feature. -fn test_quick_election_after_split(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_node_quick_election_after_split() { + let mut cluster = new_cluster(0, 3); + + // For the peer which is the leader of the region before split, it should + // campaigns immediately. and then this peer may take the leadership + // earlier. `test_quick_election_after_split` is a helper function for testing + // this feature. // Calculate the reserved time before a new campaign after split. let reserved_time = Duration::from_millis(cluster.cfg.raft_store.raft_base_tick_interval.as_millis() * 2); @@ -790,33 +780,10 @@ fn test_quick_election_after_split(cluster: &mut Cluster) { assert!(new_leader.is_some()); } -#[test] -fn test_node_quick_election_after_split() { - let mut cluster = new_node_cluster(0, 3); - test_quick_election_after_split(&mut cluster); -} - -#[test] -fn test_server_quick_election_after_split() { - let mut cluster = new_server_cluster(0, 3); - test_quick_election_after_split(&mut cluster); -} - #[test] fn test_node_split_region() { let count = 5; - let mut cluster = new_node_cluster(0, count); - test_split_region(&mut cluster); -} - -#[test] -fn test_server_split_region() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_split_region(&mut cluster); -} - -fn test_split_region(cluster: &mut Cluster) { + let mut cluster = test_raftstore::new_node_cluster(0, count); // length of each key+value let item_len = 74; // make bucket's size to item_len, which means one row one bucket @@ -825,8 +792,8 @@ fn test_split_region(cluster: &mut Cluster) { cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); let region = pd_client.get_region(b"").unwrap(); - let mid_key = put_till_size(cluster, 11 * item_len, &mut range); - let max_key = put_till_size(cluster, 9 * item_len, &mut range); + let mid_key = put_till_size(&mut cluster, 11 * item_len, &mut range); + let max_key = put_till_size(&mut cluster, 9 * item_len, &mut range); let target = pd_client.get_region(&max_key).unwrap(); assert_eq!(region, target); pd_client.must_split_region(target, pdpb::CheckPolicy::Scan, vec![]); @@ -852,11 +819,12 @@ fn test_split_region(cluster: &mut Cluster) { assert_eq!(y2.get_end_key(), b""); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_split_update_region_right_derive() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Election timeout and max leader lease is 1s. - configure_for_lease_read(&mut cluster, Some(100), Some(10)); + configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); cluster.run(); @@ -906,9 +874,10 @@ fn test_node_split_update_region_right_derive() { ); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_split_with_epoch_not_match() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -940,9 +909,10 @@ fn test_split_with_epoch_not_match() { assert!(resp.get_header().get_error().has_epoch_not_match()); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_split_with_in_memory_pessimistic_locks() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1018,7 +988,7 @@ fn test_split_with_in_memory_pessimistic_locks() { #[test] fn test_refresh_region_bucket_keys() { let count = 5; - let mut cluster = new_server_cluster(0, count); + let mut cluster = test_raftstore::new_server_cluster(0, count); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -1204,7 +1174,7 @@ fn test_refresh_region_bucket_keys() { #[test] fn test_gen_split_check_bucket_ranges() { let count = 5; - let mut cluster = new_server_cluster(0, count); + let mut cluster = test_raftstore::new_server_cluster(0, count); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(5); cluster.cfg.coprocessor.enable_region_bucket = true; // disable report buckets; as it will reset the user traffic stats to randomize diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index b4f8c33d54d..b97191d1a13 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -4,18 +4,18 @@ use std::{sync::Arc, thread, time::Duration}; use api_version::{test_kv_format_impl, KvFormat}; use engine_traits::CF_LOCK; -use kvproto::kvrpcpb::Context; use raft::eraftpb::MessageType; use raftstore::store::LocksStatus; use test_raftstore::*; -use tikv::storage::{ - kv::{SnapContext, SnapshotExt}, - Engine, Snapshot, -}; +use test_raftstore_macro::test_case; +use tikv::storage::Snapshot; use tikv_util::config::*; use txn_types::{Key, PessimisticLock}; -fn test_basic_transfer_leader(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_server_basic_transfer_leader() { + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; let reserved_time = Duration::from_millis( cluster.cfg.raft_store.raft_base_tick_interval.as_millis() @@ -64,13 +64,10 @@ fn test_basic_transfer_leader(cluster: &mut Cluster) { assert!(resp.get_header().get_error().has_not_leader()); } -#[test] -fn test_server_basic_transfer_leader() { - let mut cluster = new_server_cluster(0, 3); - test_basic_transfer_leader(&mut cluster); -} - -fn test_pd_transfer_leader(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_server_pd_transfer_leader() { + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -118,7 +115,10 @@ fn test_pd_transfer_leader(cluster: &mut Cluster) { } } -fn test_pd_transfer_leader_multi_target(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_server_pd_transfer_leader_multi_target() { + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -155,6 +155,11 @@ fn test_pd_transfer_leader_multi_target(cluster: &mut Cluster) } } + // Give some time for leader to commit the first entry + // todo: It shouldn't need this, but for now and for v2, without it, the test is + // not stable. + thread::sleep(Duration::from_millis(100)); + // call command on this leader directly, must successfully. let mut req = new_request( region.get_id(), @@ -170,19 +175,9 @@ fn test_pd_transfer_leader_multi_target(cluster: &mut Cluster) assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); } -#[test] -fn test_server_pd_transfer_leader() { - let mut cluster = new_server_cluster(0, 3); - test_pd_transfer_leader(&mut cluster); -} - -#[test] -fn test_server_pd_transfer_leader_multi_target() { - let mut cluster = new_server_cluster(0, 3); - test_pd_transfer_leader_multi_target(&mut cluster); -} - -fn test_transfer_leader_during_snapshot(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_server_cluster)] +fn test_server_transfer_leader_during_snapshot() { + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); @@ -225,71 +220,42 @@ fn test_transfer_leader_during_snapshot(cluster: &mut Cluster) must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); } -#[test] -fn test_server_transfer_leader_during_snapshot() { - let mut cluster = new_server_cluster(0, 3); - test_transfer_leader_during_snapshot(&mut cluster); -} - -#[test] +#[test_case(test_raftstore::new_server_cluster_with_api_ver)] +#[test_case(test_raftstore_v2::new_server_cluster_with_api_ver)] fn test_sync_max_ts_after_leader_transfer() { - test_kv_format_impl!(test_sync_max_ts_after_leader_transfer_impl); -} - -fn test_sync_max_ts_after_leader_transfer_impl() { - let mut cluster = new_server_cluster_with_api_ver(0, 3, F::TAG); - cluster.cfg.raft_store.raft_heartbeat_ticks = 20; - cluster.run(); - - let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); - let mut storage = cluster - .sim - .read() - .unwrap() - .storages - .get(&1) - .unwrap() - .clone(); - let mut wait_for_synced = |cluster: &mut Cluster| { - let region_id = 1; - let leader = cluster.leader_of_region(region_id).unwrap(); - let epoch = cluster.get_region_epoch(region_id); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader); - ctx.set_region_epoch(epoch); - let snap_ctx = SnapContext { - pb_ctx: &ctx, - ..Default::default() - }; - let snapshot = storage.snapshot(snap_ctx).unwrap(); - let txn_ext = snapshot.txn_ext.clone().unwrap(); - for retry in 0..10 { - if txn_ext.is_max_ts_synced() { - break; - } - thread::sleep(Duration::from_millis(1 << retry)); - } - assert!(snapshot.ext().is_max_ts_synced()); - }; - - cluster.must_transfer_leader(1, new_peer(1, 1)); - wait_for_synced(&mut cluster); - let max_ts = cm.max_ts(); - - cluster.pd_client.trigger_tso_failure(); - // Transfer the leader out and back - cluster.must_transfer_leader(1, new_peer(2, 2)); - cluster.must_transfer_leader(1, new_peer(1, 1)); + // This method should be modified with + // `test_sync_max_ts_after_leader_transfer_impl_v2` simultaneously + fn test_sync_max_ts_after_leader_transfer_impl() { + let mut cluster = new_cluster(0, 3, F::TAG); + cluster.cfg.raft_store.raft_heartbeat_ticks = 20; + cluster.run(); + + let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); + cluster.must_transfer_leader(1, new_peer(1, 1)); + // Give some time for leader to commit the first entry + // todo: It shouldn't need this, but for now and for v2, without it, the test is + // not stable. + thread::sleep(Duration::from_millis(100)); + wait_for_synced(&mut cluster, 1, 1); + let max_ts = cm.max_ts(); + + cluster.pd_client.trigger_tso_failure(); + // Transfer the leader out and back + cluster.must_transfer_leader(1, new_peer(2, 2)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + wait_for_synced(&mut cluster, 1, 1); + let new_max_ts = cm.max_ts(); + assert!(new_max_ts > max_ts); + } - wait_for_synced(&mut cluster); - let new_max_ts = cm.max_ts(); - assert!(new_max_ts > max_ts); + test_kv_format_impl!(test_sync_max_ts_after_leader_transfer_impl); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_propose_in_memory_pessimistic_locks() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; cluster.run(); @@ -328,9 +294,10 @@ fn test_propose_in_memory_pessimistic_locks() { assert_eq!(value, lock.into_lock().to_bytes()); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_memory_pessimistic_locks_status_after_transfer_leader_failure() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; cluster.cfg.raft_store.reactive_memory_lock_tick_interval = ReadableDuration::millis(200); cluster.cfg.raft_store.reactive_memory_lock_timeout_tick = 3; diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index a2c2ea75c64..e3f22afe6d9 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -1160,7 +1160,7 @@ fn test_force_leader_multiple_election_rounds() { #[test] fn test_unsafe_recovery_has_commit_merge() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); @@ -1218,7 +1218,7 @@ fn test_unsafe_recovery_has_commit_merge() { #[test] fn test_unsafe_recovery_during_merge() { let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster); + configure_for_merge(&mut cluster.cfg); cluster.run(); diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 01993fb89cd..3dcdab0cf6b 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -276,7 +276,7 @@ fn test_read_on_replica_check_memory_locks() { fn test_invalid_read_index_when_no_leader() { // Initialize cluster let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster, Some(10), Some(6)); + configure_for_lease_read(&mut cluster.cfg, Some(10), Some(6)); cluster.cfg.raft_store.raft_heartbeat_ticks = 1; cluster.cfg.raft_store.hibernate_regions = false; let pd_client = Arc::clone(&cluster.pd_client); From c4b38e8fa7db5392bb5b424856c9066f95a23886 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 14 Feb 2023 10:50:01 +0800 Subject: [PATCH 0523/1149] Raftstore-v2: support split operator (#14199) ref tikv/tikv#12842 Support split operator got from PD. Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/fsm/peer.rs | 4 ++ .../src/operation/command/admin/mod.rs | 4 +- .../src/operation/command/admin/split.rs | 60 ++++++++++++++++++- .../raftstore-v2/src/operation/command/mod.rs | 4 +- components/raftstore-v2/src/operation/mod.rs | 2 +- components/raftstore-v2/src/router/message.rs | 6 +- .../raftstore-v2/src/worker/pd/region.rs | 49 ++++++++++++++- .../raftstore/test_split_region.rs | 38 ++++++------ 8 files changed, 137 insertions(+), 30 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 47d23a67d1d..1b127e5851b 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -305,6 +305,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_request_split(self.store_ctx, request, ch) } + PeerMsg::RequestHalfSplit { request, ch } => self + .fsm + .peer_mut() + .on_request_half_split(self.store_ctx, request, ch), PeerMsg::UpdateRegionSize { size } => { self.fsm.peer_mut().on_update_region_size(size) } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index a912cb7a3d5..ca91e597cb9 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -15,8 +15,8 @@ use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; use slog::info; use split::SplitResult; pub use split::{ - report_split_init_finish, temp_split_path, RequestSplit, SplitFlowControl, SplitInit, - SPLIT_PREFIX, + report_split_init_finish, temp_split_path, RequestHalfSplit, RequestSplit, SplitFlowControl, + SplitInit, SPLIT_PREFIX, }; use tikv_util::{box_err, log::SlogFormat}; use txn_types::WriteBatchFlags; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0fbe31277ed..86b0aab558e 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -53,7 +53,7 @@ use raftstore::{ }, Result, }; -use slog::{error, info}; +use slog::{error, info, warn}; use tikv_util::{log::SlogFormat, slog_panic}; use crate::{ @@ -149,6 +149,15 @@ pub struct RequestSplit { pub source: Cow<'static, str>, } +#[derive(Debug)] +pub struct RequestHalfSplit { + pub epoch: RegionEpoch, + pub start_key: Option>, + pub end_key: Option>, + pub policy: CheckPolicy, + pub source: Cow<'static, str>, +} + #[derive(Default, Debug)] pub struct SplitFlowControl { size_diff_hint: i64, @@ -280,6 +289,55 @@ impl Peer { self.ask_batch_split_pd(ctx, rs.split_keys, ch); } + pub fn on_request_half_split( + &mut self, + ctx: &mut StoreContext, + rhs: RequestHalfSplit, + _ch: CmdResChannel, + ) { + let is_key_range = rhs.start_key.is_some() && rhs.end_key.is_some(); + info!( + self.logger, + "on half split"; + "is_key_range" => is_key_range, + "policy" => ?rhs.policy, + "source" => ?rhs.source, + ); + if !self.is_leader() { + // region on this store is no longer leader, skipped. + info!(self.logger, "not leader, skip."); + return; + } + + let region = self.region(); + if util::is_epoch_stale(&rhs.epoch, region.get_region_epoch()) { + warn!( + self.logger, + "receive a stale halfsplit message"; + "is_key_range" => is_key_range, + ); + return; + } + + let task = SplitCheckTask::split_check_key_range( + region.clone(), + rhs.start_key, + rhs.end_key, + false, + rhs.policy, + // todo: bucket range + None, + ); + if let Err(e) = ctx.schedulers.split_check.schedule(task) { + error!( + self.logger, + "failed to schedule split check"; + "is_key_range" => is_key_range, + "err" => %e, + ); + } + } + pub fn propose_split( &mut self, store_ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index d887af7d6d6..d06e43c0303 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -59,8 +59,8 @@ mod control; mod write; pub use admin::{ - report_split_init_finish, temp_split_path, AdminCmdResult, CompactLogContext, RequestSplit, - SplitFlowControl, SplitInit, SPLIT_PREFIX, + report_split_init_finish, temp_split_path, AdminCmdResult, CompactLogContext, RequestHalfSplit, + RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, }; pub use control::ProposalControl; pub use write::{ diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 9cdd78dcb4c..f022ab91109 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -9,7 +9,7 @@ mod txn_ext; pub use command::{ AdminCmdResult, ApplyFlowControl, CommittedEntries, CompactLogContext, ProposalControl, - RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, + RequestHalfSplit, RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, SPLIT_PREFIX, }; pub use life::{DestroyProgress, GcPeerContext}; diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 91efc54c867..04bc5dbab10 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -17,7 +17,7 @@ use super::{ }, ApplyRes, }; -use crate::operation::{RequestSplit, SimpleWriteBinary, SplitInit}; +use crate::operation::{RequestHalfSplit, RequestSplit, SimpleWriteBinary, SplitInit}; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] @@ -185,6 +185,10 @@ pub enum PeerMsg { request: RequestSplit, ch: CmdResChannel, }, + RequestHalfSplit { + request: RequestHalfSplit, + ch: CmdResChannel, + }, UpdateRegionSize { size: u64, }, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index d282534329b..c862d1f208b 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -10,10 +10,14 @@ use pd_client::{ }; use raftstore::store::{ReadStats, WriteStats}; use resource_metering::RawRecords; -use slog::{debug, info}; +use slog::{debug, error, info}; use tikv_util::{store::QueryStats, time::UnixSecs}; use super::{requests::*, Runner}; +use crate::{ + operation::{RequestHalfSplit, RequestSplit}, + router::{CmdResChannel, PeerMsg}, +}; pub struct RegionHeartbeatTask { pub term: u64, @@ -276,8 +280,47 @@ where ); send_admin_request(&logger, &router, region_id, epoch, peer, req, None); } else if resp.has_split_region() { - // TODO - info!(logger, "pd asks for split but ignored"); + PD_HEARTBEAT_COUNTER_VEC + .with_label_values(&["split region"]) + .inc(); + + let mut split_region = resp.take_split_region(); + info!( + logger, + "try to split"; + "region_id" => region_id, + "region_epoch" => ?epoch, + ); + + let (ch, _) = CmdResChannel::pair(); + let msg = if split_region.get_policy() == pdpb::CheckPolicy::Usekey { + PeerMsg::RequestSplit { + request: RequestSplit { + epoch, + split_keys: split_region.take_keys().into(), + source: "pd".into(), + }, + ch, + } + } else { + PeerMsg::RequestHalfSplit { + request: RequestHalfSplit { + epoch, + start_key: None, + end_key: None, + policy: split_region.get_policy(), + source: "pd".into(), + }, + ch, + } + }; + if let Err(e) = router.send(region_id, msg) { + error!(logger, + "send split request failed"; + "region_id" => region_id, + "err" => ?e + ); + } } else if resp.has_merge() { // TODO info!(logger, "pd asks for merge but ignored"); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 4b7914f7324..55cc642aca1 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -304,7 +304,7 @@ fn check_cluster(cluster: &mut Cluster, k: &[u8], v: &[u8], all_ /// sure broadcast commit is disabled when split. #[test] fn test_delay_split_region() { - let mut cluster = test_raftstore::new_server_cluster(0, 3); + let mut cluster = new_server_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(500); cluster.cfg.raft_store.merge_max_log_gap = 100; cluster.cfg.raft_store.raft_log_gc_threshold = 500; @@ -411,9 +411,7 @@ fn test_node_split_overlap_snapshot() { must_get_equal(&engine3, b"k3", b"v3"); } -fn test_apply_new_version_snapshot( - cluster: &mut test_raftstore::Cluster, -) { +fn test_apply_new_version_snapshot(cluster: &mut Cluster) { // truncate the log quickly so that we can force sending snapshot. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(5); @@ -468,19 +466,19 @@ fn test_apply_new_version_snapshot( #[test] fn test_node_apply_new_version_snapshot() { - let mut cluster = test_raftstore::new_node_cluster(0, 3); + let mut cluster = new_node_cluster(0, 3); test_apply_new_version_snapshot(&mut cluster); } #[test] fn test_server_apply_new_version_snapshot() { - let mut cluster = test_raftstore::new_server_cluster(0, 3); + let mut cluster = new_server_cluster(0, 3); test_apply_new_version_snapshot(&mut cluster); } #[test] fn test_server_split_with_stale_peer() { - let mut cluster = test_raftstore::new_server_cluster(0, 3); + let mut cluster = new_server_cluster(0, 3); // disable raft log gc. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); @@ -604,7 +602,7 @@ fn test_split_region_diff_check() { #[test] fn test_node_split_region_after_reboot_with_config_change() { let count = 1; - let mut cluster = test_raftstore::new_server_cluster(0, count); + let mut cluster = new_server_cluster(0, count); let region_max_size = 2000; let region_split_size = 2000; cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); @@ -645,10 +643,7 @@ fn test_node_split_region_after_reboot_with_config_change() { } } -fn test_split_epoch_not_match( - cluster: &mut test_raftstore::Cluster, - right_derive: bool, -) { +fn test_split_epoch_not_match(cluster: &mut Cluster, right_derive: bool) { cluster.cfg.raft_store.right_derive_when_split = right_derive; cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -720,25 +715,25 @@ fn test_split_epoch_not_match( #[test] fn test_server_split_epoch_not_match_left_derive() { - let mut cluster = test_raftstore::new_server_cluster(0, 3); + let mut cluster = new_server_cluster(0, 3); test_split_epoch_not_match(&mut cluster, false); } #[test] fn test_server_split_epoch_not_match_right_derive() { - let mut cluster = test_raftstore::new_server_cluster(0, 3); + let mut cluster = new_server_cluster(0, 3); test_split_epoch_not_match(&mut cluster, true); } #[test] fn test_node_split_epoch_not_match_left_derive() { - let mut cluster = test_raftstore::new_node_cluster(0, 3); + let mut cluster = new_node_cluster(0, 3); test_split_epoch_not_match(&mut cluster, false); } #[test] fn test_node_split_epoch_not_match_right_derive() { - let mut cluster = test_raftstore::new_node_cluster(0, 3); + let mut cluster = new_node_cluster(0, 3); test_split_epoch_not_match(&mut cluster, true); } @@ -780,10 +775,13 @@ fn test_node_quick_election_after_split() { assert!(new_leader.is_some()); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_node_split_region() { let count = 5; - let mut cluster = test_raftstore::new_node_cluster(0, count); + let mut cluster = new_cluster(0, count); // length of each key+value let item_len = 74; // make bucket's size to item_len, which means one row one bucket @@ -988,7 +986,7 @@ fn test_split_with_in_memory_pessimistic_locks() { #[test] fn test_refresh_region_bucket_keys() { let count = 5; - let mut cluster = test_raftstore::new_server_cluster(0, count); + let mut cluster = new_server_cluster(0, count); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -1174,7 +1172,7 @@ fn test_refresh_region_bucket_keys() { #[test] fn test_gen_split_check_bucket_ranges() { let count = 5; - let mut cluster = test_raftstore::new_server_cluster(0, count); + let mut cluster = new_server_cluster(0, count); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(5); cluster.cfg.coprocessor.enable_region_bucket = true; // disable report buckets; as it will reset the user traffic stats to randomize From 6d17e254c4a284ce26c9b0bf0e4883fde6e593a1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 14 Feb 2023 11:24:01 +0800 Subject: [PATCH 0524/1149] Raftstore-v2: use appropriate default region split size when integration test suit start (#14210) ref tikv/tikv#12842 use appropriate default region split size when integration test suit start Signed-off-by: SpadeA-Tang Signed-off-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/test_raftstore-v2/src/node.rs | 10 +-- components/test_raftstore-v2/src/server.rs | 2 +- .../raftstore/test_split_region.rs | 73 ++++++++++--------- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 96275cc8383..f8c8d84bc9b 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -190,7 +190,7 @@ impl Simulator for NodeCluster { let mut raft_store = cfg.raft_store.clone(); raft_store .validate( - cfg.coprocessor.region_split_size.unwrap_or_default(), + cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket, cfg.coprocessor.region_bucket_size, ) @@ -285,16 +285,12 @@ impl Simulator for NodeCluster { assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); - let region_split_size = cfg.coprocessor.region_split_size; + let region_split_size = cfg.coprocessor.region_split_size(); let enable_region_bucket = cfg.coprocessor.enable_region_bucket; let region_bucket_size = cfg.coprocessor.region_bucket_size; let mut raftstore_cfg = cfg.tikv.raft_store; raftstore_cfg - .validate( - region_split_size.unwrap_or_default(), - enable_region_bucket, - region_bucket_size, - ) + .validate(region_split_size, enable_region_bucket, region_bucket_size) .unwrap(); // let raft_store = Arc::new(VersionTrack::new(raftstore_cfg)); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index e64844bb490..64e05d6b766 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -196,7 +196,7 @@ impl ServerCluster { let mut raft_store = cfg.raft_store.clone(); raft_store .validate( - cfg.coprocessor.region_split_size.unwrap_or_default(), + cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket, cfg.coprocessor.region_bucket_size, ) diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 55cc642aca1..20a7c3f503a 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -266,45 +266,48 @@ impl Filter for EraseHeartbeatCommit { } } -fn check_cluster(cluster: &mut Cluster, k: &[u8], v: &[u8], all_committed: bool) { - let region = cluster.pd_client.get_region(k).unwrap(); - let mut tried_cnt = 0; - let leader = loop { - match cluster.leader_of_region(region.get_id()) { - None => { - tried_cnt += 1; - if tried_cnt >= 3 { - panic!("leader should be elected"); +macro_rules! check_cluster { + ($cluster:expr, $k:expr, $v:expr, $all_committed:expr) => { + let region = $cluster.pd_client.get_region($k).unwrap(); + let mut tried_cnt = 0; + let leader = loop { + match $cluster.leader_of_region(region.get_id()) { + None => { + tried_cnt += 1; + if tried_cnt >= 3 { + panic!("leader should be elected"); + } + continue; } - continue; + Some(l) => break l, } - Some(l) => break l, - } - }; - let mut missing_count = 0; - for i in 1..=region.get_peers().len() as u64 { - let engine = cluster.get_engine(i); - if all_committed || i == leader.get_store_id() { - must_get_equal(&engine, k, v); - } else { - // Note that a follower can still commit the log by an empty MsgAppend - // when bcast commit is disabled. A heartbeat response comes to leader - // before MsgAppendResponse will trigger MsgAppend. - match engine.get_value(&keys::data_key(k)).unwrap() { - Some(res) => assert_eq!(v, &res[..]), - None => missing_count += 1, + }; + let mut missing_count = 0; + for i in 1..=region.get_peers().len() as u64 { + let engine = $cluster.get_engine(i); + if $all_committed || i == leader.get_store_id() { + must_get_equal(&engine, $k, $v); + } else { + // Note that a follower can still commit the log by an empty MsgAppend + // when bcast commit is disabled. A heartbeat response comes to leader + // before MsgAppendResponse will trigger MsgAppend. + match engine.get_value(&keys::data_key($k)).unwrap() { + Some(res) => assert_eq!($v, &res[..]), + None => missing_count += 1, + } } } - } - assert!(all_committed || missing_count > 0); + assert!($all_committed || missing_count > 0); + }; } /// TiKV enables lazy broadcast commit optimization, which can delay split /// on follower node. So election of new region will delay. We need to make /// sure broadcast commit is disabled when split. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_delay_split_region() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(500); cluster.cfg.raft_store.merge_max_log_gap = 100; cluster.cfg.raft_store.raft_log_gc_threshold = 500; @@ -323,8 +326,8 @@ fn test_delay_split_region() { cluster.must_put(b"k3", b"v3"); // Although skip bcast is enabled, but heartbeat will commit the log in period. - check_cluster(&mut cluster, b"k1", b"v1", true); - check_cluster(&mut cluster, b"k3", b"v3", true); + check_cluster!(cluster, b"k1", b"v1", true); + check_cluster!(cluster, b"k3", b"v3", true); cluster.must_transfer_leader(region.get_id(), new_peer(1, 1)); cluster.add_send_filter(CloneFilterFactory(EraseHeartbeatCommit)); @@ -333,14 +336,14 @@ fn test_delay_split_region() { sleep_ms(100); // skip bcast is enabled by default, so all followers should not commit // the log. - check_cluster(&mut cluster, b"k4", b"v4", false); + check_cluster!(cluster, b"k4", b"v4", false); cluster.must_transfer_leader(region.get_id(), new_peer(3, 3)); // New leader should flush old committed entries eagerly. - check_cluster(&mut cluster, b"k4", b"v4", true); + check_cluster!(cluster, b"k4", b"v4", true); cluster.must_put(b"k5", b"v5"); // New committed entries should be broadcast lazily. - check_cluster(&mut cluster, b"k5", b"v5", false); + check_cluster!(cluster, b"k5", b"v5", false); cluster.add_send_filter(CloneFilterFactory(EraseHeartbeatCommit)); let k2 = b"k2"; @@ -352,7 +355,7 @@ fn test_delay_split_region() { sleep_ms(100); // After split, skip bcast is enabled again, so all followers should not // commit the log. - check_cluster(&mut cluster, b"k6", b"v6", false); + check_cluster!(cluster, b"k6", b"v6", false); } #[test_case(test_raftstore::new_node_cluster)] From 8e5e5ea571411729bb2507c773ac1271d8d35fa1 Mon Sep 17 00:00:00 2001 From: lijie Date: Tue, 14 Feb 2023 14:01:32 +0800 Subject: [PATCH 0525/1149] chore: bump version to v6.7.0-alpha (#14216) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6083f14bad7..c8cf54fd4a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6334,7 +6334,7 @@ dependencies = [ [[package]] name = "tikv" -version = "6.6.0-alpha" +version = "6.7.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 63be8944f5f..cf66773b576 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "6.6.0-alpha" +version = "6.7.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 54c5ec7f557ebe9947b391ddd8874b9debd7e89a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 15 Feb 2023 10:46:01 +0800 Subject: [PATCH 0526/1149] log-backup: make the download more finer-grained (#14203) close tikv/tikv#14206 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- components/sst_importer/src/metrics.rs | 10 + components/sst_importer/src/sst_importer.rs | 341 +++++++++++++++++--- 2 files changed, 306 insertions(+), 45 deletions(-) diff --git a/components/sst_importer/src/metrics.rs b/components/sst_importer/src/metrics.rs index e7eeefd3e82..6b4af299ba8 100644 --- a/components/sst_importer/src/metrics.rs +++ b/components/sst_importer/src/metrics.rs @@ -106,4 +106,14 @@ lazy_static! { "The operations over storage cache", &["operation"] ).unwrap(); + + pub static ref CACHED_FILE_IN_MEM: IntGauge = register_int_gauge!( + "tikv_import_apply_cached_bytes", + "The files cached by the apply requests of importer." + ).unwrap(); + pub static ref CACHE_EVENT: IntCounterVec = register_int_counter_vec!( + "tikv_import_apply_cache_event", + "The events of caching. event = {add, remove, out-of-quota}", + &["type"] + ).unwrap(); } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 384a48e96a8..42a96e21652 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -9,12 +9,12 @@ use std::{ path::{Path, PathBuf}, sync::{ atomic::{AtomicU64, Ordering}, - Arc, + Arc, Condvar, Mutex, }, time::Duration, }; -use dashmap::DashMap; +use dashmap::{mapref::entry::Entry, DashMap}; use encryption::{to_engine_encryption_method, DataKeyManager}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ @@ -45,7 +45,7 @@ use tokio::runtime::{Handle, Runtime}; use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ - caching::cache_map::CacheMap, + caching::cache_map::{CacheMap, ShareOwned}, import_file::{ImportDir, ImportFile}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, metrics::*, @@ -53,6 +53,28 @@ use crate::{ util, Config, Error, Result, }; +pub struct LoadedFile { + permit: MemUsePermit, + content: Arc<[u8]>, +} + +impl std::fmt::Debug for LoadedFile { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LoadedFileInner") + .field("permit", &self.permit) + .field("content.len()", &self.content.len()) + .finish() + } +} + +impl ShareOwned for LoadedFile { + type Shared = Arc<[u8]>; + + fn share_owned(&self) -> Self::Shared { + Arc::clone(&self.content) + } +} + #[derive(Default, Debug, Clone)] pub struct DownloadExt<'a> { cache_key: Option<&'a str>, @@ -71,17 +93,164 @@ impl<'a> DownloadExt<'a> { } } -#[derive(Clone, PartialEq, Debug)] +#[derive(Debug)] +struct MemUsePermit { + amount: u64, + statistic: Arc, +} + +impl Drop for MemUsePermit { + fn drop(&mut self) { + self.statistic.fetch_sub(self.amount, Ordering::SeqCst); + } +} + +#[derive(Clone, Debug)] pub enum CacheKvFile { - Mem(Arc>), + Mem(Remote), Fs(Arc), } +/// Remote presents a "remote" object which can be downloaded and then cached. +/// The remote object should generally implement the `ShareOwned` trait. +/// This structure doesn't manage how it is downloaded, it just manages the +/// state. You need to provide the manually downloaded data to the +/// [`DownloadPromise`]. +/// Below is the state transform of this: +/// ```text +/// DownloadPromise::fulfill +/// +-----------+ +-----------+ +/// |Downloading+-------->|Cached | +/// +--+--------+ +-----------+ +/// | ^ +/// | | +/// DownloadPromise | | Somebody takes +/// dropped | | over the duty. +/// v | +/// +--------+--+ +/// |Leaked | +/// +-----------+ +/// ``` +#[derive(Debug)] +pub struct Remote(Arc<(Mutex>, Condvar)>); + +impl Clone for Remote { + fn clone(&self) -> Self { + Self(Arc::clone(&self.0)) + } +} + +/// When holding this, the holder has promised to downloading the remote object +/// into local, then provide it to others waiting the object, by +/// [`Self::fulfill()`]. +pub struct DownloadPromise(Arc<(Mutex>, Condvar)>); + +impl DownloadPromise { + /// provide the downloaded data and make it cached. + pub fn fulfill(self, item: T) -> Remote { + let mut l = self.0.as_ref().0.lock().unwrap(); + debug_assert!(matches!(*l, FileCacheInner::Downloading)); + *l = FileCacheInner::Cached(item); + self.0.as_ref().1.notify_all(); + drop(l); + Remote(Arc::clone(&self.0)) + } +} + +impl Drop for DownloadPromise { + fn drop(&mut self) { + let mut l = self.0.as_ref().0.lock().unwrap(); + if matches!(*l, FileCacheInner::Downloading) { + *l = FileCacheInner::Leaked; + self.0.as_ref().1.notify_one(); + } + } +} + +impl Remote { + /// create a downloading remote object. + /// it returns the handle to the remote object and a [`DownloadPromise`], + /// the latter can be used to fulfill the remote object. + /// + /// # Examples + /// ``` + /// # use sst_importer::sst_importer::Remote; + /// let (remote_obj, promise) = Remote::download(); + /// promise.fulfill(42); + /// assert_eq!(remote_obj.get(), Some(42)); + /// ``` + pub fn download() -> (Self, DownloadPromise) { + let inner = Arc::new((Mutex::new(FileCacheInner::Downloading), Condvar::new())); + (Self(Arc::clone(&inner)), DownloadPromise(inner)) + } + + /// Block and wait until the remote object is downloaded. + /// # Returns + /// If the remote object has been fulfilled, return `None`. + /// If the remote object hasn't been fulfilled, return a + /// [`DownloadPromise`]: it is time to take over the duty of downloading. + /// + /// # Examples + /// ``` + /// # use sst_importer::sst_importer::Remote; + /// let (remote_obj, promise) = Remote::download(); + /// drop(promise); + /// let new_promise = remote_obj.wait_until_fill(); + /// new_promise + /// .expect("wait_until_fill should return new promise when old promise dropped") + /// .fulfill(42); + /// assert!(remote_obj.wait_until_fill().is_none()); + /// ``` + pub fn wait_until_fill(&self) -> Option> { + let mut l = self.0.as_ref().0.lock().unwrap(); + loop { + match *l { + FileCacheInner::Downloading => { + l = self.0.as_ref().1.wait(l).unwrap(); + } + FileCacheInner::Leaked => { + *l = FileCacheInner::Downloading; + return Some(DownloadPromise(Arc::clone(&self.0))); + } + FileCacheInner::Cached(_) => return None, + } + } + } +} + +impl Remote { + /// Fetch the internal object of the remote object. + pub fn get(&self) -> Option<::Shared> { + let l = self.0.as_ref().0.lock().unwrap(); + match *l { + FileCacheInner::Downloading | FileCacheInner::Leaked => None, + FileCacheInner::Cached(ref t) => Some(t.share_owned()), + } + } +} + +/// returns a error indices that we are going to panic in a invalid state. +/// (Rust panic information cannot be send to BR, hence client cannot know +/// what happens, so we pack it into a `Result`.) +fn bug(message: impl std::fmt::Display) -> Error { + Error::Io(std::io::Error::new( + std::io::ErrorKind::Other, + format!("BUG in TiKV: {}", message), + )) +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum FileCacheInner { + Downloading, + Leaked, + Cached(T), +} + impl CacheKvFile { // get the ref count of item. pub fn ref_count(&self) -> usize { match self { - CacheKvFile::Mem(buff) => Arc::strong_count(buff), + CacheKvFile::Mem(buff) => Arc::strong_count(&buff.0), CacheKvFile::Fs(path) => Arc::strong_count(path), } } @@ -109,7 +278,7 @@ pub struct SstImporter { cached_storage: CacheMap, download_rt: Runtime, file_locks: Arc>, - mem_use: AtomicU64, + mem_use: Arc, mem_limit: ReadableSize, } @@ -139,7 +308,7 @@ impl SstImporter { file_locks: Arc::new(DashMap::default()), cached_storage, download_rt, - mem_use: AtomicU64::new(0), + mem_use: Arc::new(AtomicU64::new(0)), mem_limit: ReadableSize(memory_limit as u64), }) } @@ -412,9 +581,10 @@ impl SstImporter { let mut need_retain = true; match c { CacheKvFile::Mem(buff) => { - let buflen = buff.len(); + let buflen = buff.get().map(|v| v.len()).unwrap_or_default(); // The term of recycle memeory is 60s. if c.ref_count() == 1 && c.is_expired(start) { + CACHE_EVENT.with_label_values(&["remove"]).inc(); need_retain = false; shrink_buff_size += buflen; } else { @@ -436,6 +606,8 @@ impl SstImporter { need_retain }); + CACHED_FILE_IN_MEM.set(self.mem_use.load(Ordering::SeqCst) as _); + if self.import_support_download() { let shrink_file_count = shrink_files.len(); if shrink_file_count > 0 || retain_file_count > 0 { @@ -452,7 +624,6 @@ impl SstImporter { if shrink_buff_size > 0 || retain_buff_size > 0 { info!("shrink cache by tick"; "shrink size" => shrink_buff_size, "retain size" => retain_buff_size); } - self.dec_mem(shrink_buff_size as _); shrink_buff_size } } @@ -463,23 +634,24 @@ impl SstImporter { self.mem_limit == ReadableSize(0) } - fn inc_mem_and_check(&self, meta: &KvMeta) -> bool { + fn request_memory(&self, meta: &KvMeta) -> Option { let size = meta.get_length(); let old = self.mem_use.fetch_add(size, Ordering::SeqCst); // If the memory is limited, roll backup the mem_use and return false. if old + size > self.mem_limit.0 { self.mem_use.fetch_sub(size, Ordering::SeqCst); - false + CACHE_EVENT.with_label_values(&["out-of-quota"]).inc(); + None } else { - true + CACHE_EVENT.with_label_values(&["add"]).inc(); + Some(MemUsePermit { + amount: size, + statistic: Arc::clone(&self.mem_use), + }) } } - fn dec_mem(&self, size: u64) { - self.mem_use.fetch_sub(size, Ordering::SeqCst); - } - pub fn do_read_kv_file( &self, meta: &KvMeta, @@ -490,21 +662,41 @@ impl SstImporter { let start = Instant::now(); let dst_name = format!("{}_{}", meta.get_name(), meta.get_range_offset()); - let mut lock = self - .file_locks - .entry(dst_name) - .or_insert((CacheKvFile::Mem(Arc::default()), Instant::now())); - - if let CacheKvFile::Mem(buff) = &lock.0 { - if !buff.is_empty() { - lock.1 = Instant::now(); - return Ok(lock.0.clone()); + let promise = { + let lock = self.file_locks.entry(dst_name); + IMPORTER_APPLY_DURATION + .with_label_values(&["download-get-lock"]) + .observe(start.saturating_elapsed().as_secs_f64()); + + match lock { + Entry::Occupied(mut ent) => match ent.get_mut() { + (CacheKvFile::Mem(buff), last_used) => { + *last_used = Instant::now(); + match buff.wait_until_fill() { + Some(handle) => handle, + None => return Ok(ent.get().0.clone()), + } + } + _ => { + return Err(bug(concat!( + "using both read-to-memory and download-to-file is unacceptable for now.", + "(If you think it is possible in the future you are reading this, ", + "please change this line to `return item.get.0.clone()`)", + "(Please also check the state transform is OK too.)", + ))); + } + }, + Entry::Vacant(ent) => { + let (cache, handle) = Remote::download(); + ent.insert((CacheKvFile::Mem(cache), Instant::now())); + handle + } } - } + }; - if !self.inc_mem_and_check(meta) { - return Err(Error::ResourceNotEnough(String::from("memory is limited"))); - } + let permit = self + .request_memory(meta) + .ok_or_else(|| Error::ResourceNotEnough(String::from("memory is limited")))?; let expected_sha256 = { let sha256 = meta.get_sha256().to_vec(); @@ -544,8 +736,10 @@ impl SstImporter { .observe(start.saturating_elapsed().as_secs_f64()); let rewrite_buff = self.rewrite_kv_file(buff, rewrite_rule)?; - *lock = (CacheKvFile::Mem(Arc::new(rewrite_buff)), Instant::now()); - Ok(lock.0.clone()) + Ok(CacheKvFile::Mem(promise.fulfill(LoadedFile { + content: Arc::from(rewrite_buff.into_boxed_slice()), + permit, + }))) } pub fn wrap_kms( @@ -619,7 +813,7 @@ impl SstImporter { ext_storage: Arc, backend: &StorageBackend, speed_limiter: &Limiter, - ) -> Result>> { + ) -> Result> { let c = if self.import_support_download() { self.do_download_kv_file(meta, backend, speed_limiter)? } else { @@ -627,7 +821,7 @@ impl SstImporter { }; match c { // If cache memroy, it has been rewrite, return buffer directly. - CacheKvFile::Mem(buff) => Ok(buff), + CacheKvFile::Mem(buff) => buff.get().ok_or_else(|| bug("invalid cache state")), // If cache file name, it need to read and rewrite. CacheKvFile::Fs(path) => { let file = File::open(path.as_ref())?; @@ -636,7 +830,7 @@ impl SstImporter { reader.read_to_end(&mut buffer)?; let rewrite_buff = self.rewrite_kv_file(buffer, rewrite_rule)?; - Ok(Arc::new(rewrite_buff)) + Ok(Arc::from(rewrite_buff.into_boxed_slice())) } } } @@ -774,10 +968,10 @@ impl SstImporter { end_key: &[u8], start_ts: u64, restore_ts: u64, - file_buff: Arc>, + file_buff: Arc<[u8]>, mut build_fn: impl FnMut(Vec, Vec), ) -> Result> { - let mut event_iter = EventIterator::new(file_buff.as_slice()); + let mut event_iter = EventIterator::new(file_buff.as_ref()); let mut smallest_key = None; let mut largest_key = None; let mut total_key = 0; @@ -1245,7 +1439,10 @@ mod tests { use tempfile::Builder; use test_sst_importer::*; use test_util::new_test_key_manager; - use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io}; + use tikv_util::{ + codec::stream_event::EventEncoder, stream::block_on_external_io, + sys::thread::StdThreadBuildWrapper, + }; use txn_types::{Value, WriteType}; use uuid::Uuid; @@ -1781,7 +1978,11 @@ mod tests { ) .unwrap(); - assert_eq!(CacheKvFile::Mem(Arc::new(buff.clone())), output); + assert!( + matches!(output.clone(), CacheKvFile::Mem(rc) if &*rc.get().unwrap() == buff.as_slice()), + "{:?}", + output + ); // Do not shrint nothing. let shrink_size = importer.shrink_by_tick(); @@ -2794,11 +2995,11 @@ mod tests { length: 100, ..Default::default() }; - let check = importer.inc_mem_and_check(&meta); - assert!(check); + let check = importer.request_memory(&meta); + assert!(check.is_some()); assert_eq!(importer.mem_use.load(Ordering::SeqCst), meta.get_length()); - importer.dec_mem(meta.get_length()); + drop(check); assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); // test inc_mem_and_check() failed. @@ -2806,8 +3007,8 @@ mod tests { length: u64::MAX, ..Default::default() }; - let check = importer.inc_mem_and_check(&meta); - assert!(!check); + let check = importer.request_memory(&meta); + assert!(check.is_none()); } #[test] @@ -2817,7 +3018,8 @@ mod tests { SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); let key = "file1"; - let value = (CacheKvFile::Mem(Arc::default()), Instant::now()); + let (r, _) = Remote::download(); + let value = (CacheKvFile::Mem(r), Instant::now()); let lock = importer.file_locks.entry(key.to_string()).or_insert(value); // test locked by try_entry() @@ -2834,4 +3036,53 @@ mod tests { let _buff = v.0.clone(); assert_eq!(v.0.ref_count(), 2); } + + #[test] + fn test_remote_waiting() { + let (r, dl) = Remote::download(); + let r2 = r.clone(); + let js = (0..2) + .map(|_| { + let r = r.clone(); + std::thread::spawn(move || { + assert!(r.wait_until_fill().is_none()); + r.get() + }) + }) + .collect::>(); + dl.fulfill(42); + for j in js { + assert!(matches!(j.join(), Ok(Some(42)))); + } + assert_eq!(r2.get(), Some(42)); + } + + #[test] + fn test_remote_drop_in_one_thread() { + let (r, dl) = Remote::download(); + drop(dl); + let p = r.wait_until_fill(); + assert!(p.is_some()); + p.unwrap().fulfill("Kitty"); + assert_eq!(r.get(), Some("Kitty")); + } + + #[test] + fn test_remote_take_duty() { + let (r, dl) = Remote::download(); + let js = (0..4).map(|i| { + let r = r.clone(); + std::thread::Builder::new() + .name(format!("rd-{}", i)) + .spawn_wrapper(move || match r.wait_until_fill() { + Some(x) => x.fulfill(42).get(), + None => r.get(), + }) + .unwrap() + }); + drop(dl); + for j in js { + assert!(matches!(j.join(), Ok(Some(42)))); + } + } } From ff0a79bfa4c7f4a806d7e52ca107a6244afd0102 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 15 Feb 2023 12:08:02 +0800 Subject: [PATCH 0527/1149] raftstore-v2: fix wrong peer cache (#14212) close tikv/tikv#14211 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../tests/integrations/test_conf_change.rs | 39 +++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index a88df2245cc..fea60049b93 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -250,7 +250,7 @@ impl Peer { if self.is_leader() && from_peer.get_id() != INVALID_ID { self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); } - self.insert_peer_cache(msg.take_from_peer()); + self.insert_peer_cache(from_peer); let pre_committed_index = self.raft_group().raft.raft_log.committed; if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) diff --git a/components/raftstore-v2/tests/integrations/test_conf_change.rs b/components/raftstore-v2/tests/integrations/test_conf_change.rs index 7ea49c02a6b..c1c7861fd54 100644 --- a/components/raftstore-v2/tests/integrations/test_conf_change.rs +++ b/components/raftstore-v2/tests/integrations/test_conf_change.rs @@ -6,14 +6,14 @@ use engine_traits::{Peekable, RaftEngineReadOnly, CF_DEFAULT}; use futures::executor::block_on; use kvproto::{ raft_cmdpb::{AdminCmdType, RaftCmdRequest}, - raft_serverpb::PeerState, + raft_serverpb::{PeerState, RaftMessage}, }; -use raft::prelude::ConfChangeType; +use raft::prelude::{ConfChangeType, MessageType}; use raftstore_v2::{ router::{PeerMsg, PeerTick}, SimpleWriteEncoder, }; -use tikv_util::store::new_learner_peer; +use tikv_util::store::{new_learner_peer, new_peer}; use crate::cluster::{check_skip_wal, Cluster}; @@ -199,3 +199,36 @@ fn remove_peer(cluster: &Cluster, offset_id: usize, region_id: u64, peer_id: u64 assert_eq!(region_state.get_state(), PeerState::Tombstone); assert_eq!(raft_engine.get_raft_state(region_id).unwrap(), None); } + +/// The peer should be able to respond an unknown sender, otherwise the +/// liveness of configuration change can't be guaranteed. +#[test] +fn test_unknown_peer() { + let cluster = Cluster::with_node_count(1, None); + + let router = &cluster.routers[0]; + let header = router.new_request_for(2).take_header(); + + // Create a fake message to see whether it's responded. + let from_peer = new_peer(10, 10); + let mut msg = Box::::default(); + msg.set_region_id(2); + msg.set_to_peer(header.get_peer().clone()); + msg.set_region_epoch(header.get_region_epoch().clone()); + msg.set_from_peer(from_peer.clone()); + let raft_message = msg.mut_message(); + raft_message.set_msg_type(raft::prelude::MessageType::MsgHeartbeat); + raft_message.set_from(10); + raft_message.set_term(10); + + router.send_raft_message(msg).unwrap(); + router.wait_flush(2, Duration::from_secs(3)); + // If peer cache is updated correctly, it should be able to respond. + let msg = cluster.receiver(0).try_recv().unwrap(); + assert_eq!(*msg.get_to_peer(), from_peer); + assert_eq!(msg.get_from_peer(), header.get_peer()); + assert_eq!( + msg.get_message().get_msg_type(), + MessageType::MsgHeartbeatResponse + ); +} From c072027294b5f0ffa7e1b47c181d4d4bc40df958 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 15 Feb 2023 15:40:02 +0800 Subject: [PATCH 0528/1149] limit should await (#14222) close tikv/tikv#14221 Signed-off-by: bufferflies <1045931706@qq.com> --- src/server/tablet_snap.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 0fc836f36c2..a54c5461e0d 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -60,7 +60,7 @@ impl RecvTabletSnapContext { } let chunk_size = match head.take_data().try_into() { - Ok(buff) => usize::from_ne_bytes(buff), + Ok(buff) => usize::from_le_bytes(buff), Err(_) => return Err(box_err!("failed to get chunk size")), }; let meta = head.take_message(); @@ -119,7 +119,7 @@ async fn send_snap_files( let mut total_sent = msg.compute_size() as u64; let mut chunk = SnapshotChunk::default(); chunk.set_message(msg); - chunk.set_data(usize::to_ne_bytes(SNAP_CHUNK_LEN).to_vec()); + chunk.set_data(usize::to_le_bytes(SNAP_CHUNK_LEN).to_vec()); sender .feed((chunk, WriteFlags::default().buffer_hint(true))) .await?; @@ -145,7 +145,7 @@ async fn send_snap_files( } off += readed; } - limiter.consume(off); + limiter.consume(off).await; total_sent += off as u64; let mut chunk = SnapshotChunk::default(); chunk.set_data(buffer); @@ -260,7 +260,7 @@ async fn recv_snap_files( None => return Err(box_err!("missing chunk")), }; f.write_all(&chunk[..])?; - limit.consume(chunk.len()); + limit.consume(chunk.len()).await; size += chunk.len(); } debug!("received snap file"; "file" => %p.display(), "size" => size); @@ -479,7 +479,7 @@ pub fn copy_tablet_snapshot( let mut head = SnapshotChunk::default(); head.set_message(msg); - head.set_data(usize::to_ne_bytes(SNAP_CHUNK_LEN).to_vec()); + head.set_data(usize::to_le_bytes(SNAP_CHUNK_LEN).to_vec()); let recv_context = RecvTabletSnapContext::new(head)?; let recv_path = recver_snap_mgr.tmp_recv_path(&recv_context.key); From 13c2e545a475d776a1202d9baf4029710c9043ca Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 15 Feb 2023 21:56:01 +0800 Subject: [PATCH 0529/1149] causal_ts: simplify the logic of pop ts (#14227) ref tikv/tikv#12794 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- components/causal_ts/src/tso.rs | 39 ++++++++++++++------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index e63c3c2c3ba..2c99d8c068a 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -75,30 +75,25 @@ const MAX_TSO_BATCH_LIST_CAPACITY: u32 = 1024; /// TSO range: [(physical, logical_start), (physical, logical_end)) #[derive(Debug)] struct TsoBatch { - size: u32, physical: u64, + logical_start: u64, logical_end: u64, // exclusive - logical_start: AtomicU64, + // current valid logical_tso offset, alloc_offset >= logical_end means + // the batch is exhausted. + alloc_offset: AtomicU64, } impl TsoBatch { pub fn pop(&self) -> Option<(TimeStamp, bool /* is_used_up */)> { - let mut logical = self.logical_start.load(Ordering::Relaxed); - while logical < self.logical_end { - match self.logical_start.compare_exchange_weak( - logical, - logical + 1, - Ordering::Relaxed, - Ordering::Relaxed, - ) { - Ok(_) => { - return Some(( - TimeStamp::compose(self.physical, logical), - logical + 1 == self.logical_end, - )); - } - Err(x) => logical = x, - } + // alloc_offset might be far bigger than logical_end if the concurrency is + // *very* high, but it won't overflow in practice, so no need to do an + // extra load check here. + let ts = self.alloc_offset.fetch_add(1, Ordering::Relaxed); + if ts < self.logical_end { + return Some(( + TimeStamp::compose(self.physical, ts), + ts + 1 == self.logical_end, + )); } None } @@ -109,22 +104,22 @@ impl TsoBatch { let logical_start = logical_end.checked_sub(batch_size as u64).unwrap(); Self { - size: batch_size, physical, + logical_start, logical_end, - logical_start: AtomicU64::new(logical_start), + alloc_offset: AtomicU64::new(logical_start), } } /// Number of remaining (available) TSO in the batch. pub fn remain(&self) -> u32 { self.logical_end - .saturating_sub(self.logical_start.load(Ordering::Relaxed)) as u32 + .saturating_sub(self.alloc_offset.load(Ordering::Relaxed)) as u32 } /// The original start timestamp in the batch. pub fn original_start(&self) -> TimeStamp { - TimeStamp::compose(self.physical, self.logical_end - self.size as u64) + TimeStamp::compose(self.physical, self.logical_start) } /// The excluded end timestamp after the last in batch. From 3df8a7a8aed9dc34498cc105af9b730bac336b18 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 16 Feb 2023 10:58:01 +0800 Subject: [PATCH 0530/1149] Raftstore-v2: snap status should be cleared when sending snapshot failed (#14230) ref tikv/tikv#12842 snap status should be cleared when sending snapshot failed Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- .../src/operation/command/admin/mod.rs | 8 ++++- .../raftstore-v2/src/operation/ready/mod.rs | 6 +++- components/test_raftstore-v2/src/util.rs | 10 +++--- tests/integrations/raftstore/test_snap.rs | 32 ++++++++----------- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index ca91e597cb9..0661d1c15dc 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -69,10 +69,16 @@ impl Peer { return; } + let pre_transfer_leader = cmd_type == AdminCmdType::TransferLeader + && !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + .contains(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL); + // The admin request is rejected because it may need to update epoch checker // which introduces an uncertainty and may breaks the correctness of epoch // checker. - if !self.applied_to_current_term() { + // As pre transfer leader is just a warmup phase, applying to the current term + // is not required. + if !self.applied_to_current_term() && !pre_transfer_leader { let e = box_err!( "{} peer has not applied to current term, applied_term {}, current_term {}", SlogFormat(&self.logger), diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index fea60049b93..943e3b6ba2f 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -30,7 +30,7 @@ use kvproto::{ raft_serverpb::{ExtraMessageType, RaftMessage}, }; use protobuf::Message as _; -use raft::{eraftpb, prelude::MessageType, Ready, StateRole, INVALID_ID}; +use raft::{eraftpb, prelude::MessageType, Ready, SnapshotStatus, StateRole, INVALID_ID}; use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{ @@ -362,6 +362,10 @@ impl Peer { ); // unreachable store self.raft_group_mut().report_unreachable(to_peer_id); + if msg_type == eraftpb::MessageType::MsgSnapshot { + self.raft_group_mut() + .report_snapshot(to_peer_id, SnapshotStatus::Failure); + } ctx.raft_metrics.send_message.add(msg_type, false); } } diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 2bd9444b002..2f512982019 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -124,12 +124,12 @@ pub fn put_cf_till_size( key.into_bytes() } -pub fn configure_for_snapshot(cluster: &mut Cluster) { +pub fn configure_for_snapshot(config: &mut Config) { // Truncate the log quickly so that we can force sending snapshot. - cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(2); - cluster.cfg.raft_store.merge_max_log_gap = 1; - cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); + config.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); + config.raft_store.raft_log_gc_count_limit = Some(2); + config.raft_store.merge_max_log_gap = 1; + config.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); } pub fn configure_for_lease_read_v2( diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index d18f42ec8ca..ddc4bb50406 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -21,6 +21,7 @@ use raftstore::{store::*, Result}; use rand::Rng; use security::SecurityManager; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::server::snap::send_snap; use tikv_util::{config::*, time::Instant, HandyRwLock}; @@ -268,7 +269,12 @@ fn test_server_concurrent_snap() { test_concurrent_snap(&mut cluster); } -fn test_cf_snapshot(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_cf_snapshot() { + let mut cluster = new_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); cluster.run(); @@ -306,18 +312,6 @@ fn test_cf_snapshot(cluster: &mut Cluster) { must_get_cf_equal(&engine1, cf, b"k3", b"v3"); } -#[test] -fn test_node_cf_snapshot() { - let mut cluster = new_node_cluster(0, 3); - test_cf_snapshot(&mut cluster); -} - -#[test] -fn test_server_snapshot() { - let mut cluster = new_server_cluster(0, 3); - test_cf_snapshot(&mut cluster); -} - // replace content of all the snapshots with the first snapshot it received. #[derive(Clone)] struct StaleSnap { @@ -661,11 +655,12 @@ fn random_long_vec(length: usize) -> Vec { /// Snapshot is generated using apply term from apply thread, which should be /// set correctly otherwise lead to inconsistency. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_correct_snapshot_term() { // Use five replicas so leader can send a snapshot to a new peer without // committing extra logs. - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); @@ -714,9 +709,10 @@ fn test_correct_snapshot_term() { } /// Test when applying a snapshot, old logs should be cleaned up. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_snapshot_clean_up_logs_with_log_gc() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(50); cluster.cfg.raft_store.raft_log_gc_threshold = 50; // Speed up log gc. @@ -739,7 +735,7 @@ fn test_snapshot_clean_up_logs_with_log_gc() { // Peer (4, 4) must become leader at the end and send snapshot to 2. must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); - let raft_engine = cluster.engines[&2].raft.clone(); + let raft_engine = cluster.get_raft_engine(2); let mut dest = vec![]; raft_engine.get_all_entries_to(1, &mut dest).unwrap(); // No new log is proposed, so there should be no log at all. From c6aa5e35cd00dcd62262f9dd4531ac44e5bf8bd1 Mon Sep 17 00:00:00 2001 From: woofyzhao <490467089@qq.com> Date: Thu, 16 Feb 2023 16:40:02 +0800 Subject: [PATCH 0531/1149] Revise CONTRIBUTING.md contents (#14032) close tikv/tikv#14031 Signed-off-by: woofyzhao <490467089@qq.com> Co-authored-by: Ti Chi Robot --- CONTRIBUTING.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 711b2bdb192..41b2ef7a528 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,6 +19,7 @@ To build TiKV you'll need to at least have the following installed: * `make` - Build tool (run common workflows) * `cmake` - Build tool (required for gRPC) * `awk` - Pattern scanning/processing language +* [`protoc`](https://github.com/protocolbuffers/protobuf/releases) - Google protocol buffer compiler * C++ compiler - gcc 5+ (required for gRPC) If you are targeting platforms other than x86_64/aarch64 Linux or macOS, you'll also need: @@ -92,7 +93,7 @@ make format make clippy ``` -See the [style doc](https://github.com/rust-lang/rfcs/blob/master/style-guide/README.md) and the [API guidelines](https://rust-lang-nursery.github.io/api-guidelines/) for details on the conventions. +See the [style doc](https://github.com/rust-lang/fmt-rfcs/blob/master/guide/guide.md) and the [API guidelines](https://rust-lang-nursery.github.io/api-guidelines/) for details on the conventions. Please follow this style to make TiKV easy to review, maintain, and develop. @@ -115,13 +116,13 @@ To run TiKV as an actual key-value store, you will need to run it as a cluster ( Use [PD](https://github.com/tikv/pd) to manage the cluster (even if just one node on a single machine). -Instructions are in our [docs](https://tikv.org/docs/dev/tasks/deploy/binary/) (if you build TiKV from source, you could skip `1. Download package` and `tikv-server` is in directory `/target`). +Instructions are in our [docs](https://tikv.org/docs/latest/deploy/install/test/#install-binary-manually) (if you build TiKV from source, you could skip `1. Download package` and `tikv-server` is in directory `/target`). Tips: It's recommended to increase the open file limit above 82920. WSL2 users may refer to [the comment](https://github.com/Microsoft/WSL/issues/1688#issuecomment-532767317) if having difficulty in changing the `ulimit`. ### Configuration -Read our configuration guide to learn about various [configuration options](https://tikv.org/docs/dev/tasks/configure/introduction/). There is also a [configuration template](./etc/config-template.toml). +Read our configuration guide to learn about various [configuration options](https://tikv.org/docs/latest/deploy/configure/introduction/). There is also a [configuration template](./etc/config-template.toml). ## Contribution flow @@ -133,7 +134,7 @@ This is a rough outline of what a contributor's workflow looks like: - Write code, add test cases, and commit your work (see below for message format). - Run tests and make sure all tests pass. - Push your changes to a branch in your fork of the repository and submit a pull request. - * Make sure mention the issue, which is created at step 1, in the commit meesage. + * Make sure to mention the issue, which is created at step 1, in the commit message. - Your PR will be reviewed and may be requested some changes. * Once you've made changes, your PR must be re-reviewed and approved. * If the PR becomes out of date, you can use GitHub's 'update branch' button. From bbe06e97e43272be41de3e61fe4118607ec8055a Mon Sep 17 00:00:00 2001 From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com> Date: Thu, 16 Feb 2023 17:20:03 +0800 Subject: [PATCH 0532/1149] coprocessor: use mur3 to calculate fmsketch (#14204) ref tikv/tikv#14231 Signed-off-by: xuyifan <675434007@qq.com> --- Cargo.lock | 8 +- Cargo.toml | 2 +- src/coprocessor/statistics/analyze.rs | 129 ++++++++++++++++++++++--- src/coprocessor/statistics/cmsketch.rs | 7 +- src/coprocessor/statistics/fmsketch.rs | 11 +-- 5 files changed, 125 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c8cf54fd4a9..d87014110fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3209,10 +3209,10 @@ dependencies = [ ] [[package]] -name = "murmur3" -version = "0.5.1" +name = "mur3" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ead5388e485d38e622630c6b05afd3761a6701ff15c55b279ea5b31dcb62cff" +checksum = "97af489e1e21b68de4c390ecca6703318bc1aa16e9733bcb62c089b73c6fbb1b" [[package]] name = "native-tls" @@ -6392,7 +6392,7 @@ dependencies = [ "memory_trace_macros", "mime", "more-asserts", - "murmur3", + "mur3", "nom 5.1.0", "notify", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index cf66773b576..509f9514b10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,7 +118,7 @@ match-template = "0.0.1" memory_trace_macros = { workspace = true } mime = "0.3.13" more-asserts = "0.2" -murmur3 = "0.5.1" +mur3 = "0.1" nom = { version = "5.1.0", default-features = false, features = ["std"] } notify = "4" num-traits = "0.2.14" diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 25ecf95653d..85e0281064e 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -1,10 +1,13 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp::Reverse, collections::BinaryHeap, marker::PhantomData, mem, sync::Arc}; +use std::{ + cmp::Reverse, collections::BinaryHeap, hash::Hasher, marker::PhantomData, mem, sync::Arc, +}; use api_version::{keyspace::KvPair, KvFormat}; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; +use mur3::Hasher128; use protobuf::Message; use rand::{rngs::StdRng, Rng}; use tidb_query_common::storage::{ @@ -411,7 +414,7 @@ impl RowSampleBuilder { } else { // Only if the `decoded_val` is Datum::Null, `decoded_val` is a Ok(None). // So it is safe the unwrap the Ok value. - let decoded_sorted_val = TT::sort_key(&decoded_val.as_string()?.unwrap().into_owned())?; + let decoded_sorted_val = TT::sort_key(&decoded_val.as_string()?.unwrap())?; decoded_sorted_val } } @@ -488,7 +491,6 @@ struct BaseRowSampleCollector { fm_sketches: Vec, rng: StdRng, total_sizes: Vec, - row_buf: Vec, memory_usage: usize, reported_memory_usage: usize, } @@ -501,7 +503,6 @@ impl Default for BaseRowSampleCollector { fm_sketches: vec![], rng: StdRng::from_entropy(), total_sizes: vec![], - row_buf: Vec::new(), memory_usage: 0, reported_memory_usage: 0, } @@ -516,11 +517,11 @@ impl BaseRowSampleCollector { fm_sketches: vec![FmSketch::new(max_fm_sketch_size); col_and_group_len], rng: StdRng::from_entropy(), total_sizes: vec![0; col_and_group_len], - row_buf: Vec::new(), memory_usage: 0, reported_memory_usage: 0, } } + pub fn collect_column_group( &mut self, columns_val: &[Vec], @@ -530,7 +531,6 @@ impl BaseRowSampleCollector { ) { let col_len = columns_val.len(); for i in 0..column_groups.len() { - self.row_buf.clear(); let offsets = column_groups[i].get_column_offsets(); let mut has_null = true; for j in offsets { @@ -545,23 +545,31 @@ impl BaseRowSampleCollector { self.null_count[col_len + i] += 1; continue; } - // Use a in place murmur3 to replace this memory copy. - for j in offsets { - if columns_info[*j as usize].as_accessor().is_string_like() { - self.row_buf - .extend_from_slice(&collation_keys_val[*j as usize]); + if offsets.len() == 1 { + let offset = offsets[0] as usize; + if columns_info[offset].as_accessor().is_string_like() { + self.fm_sketches[col_len + i].insert(&collation_keys_val[offset]); } else { - self.row_buf.extend_from_slice(&columns_val[*j as usize]); + self.fm_sketches[col_len + i].insert(&columns_val[offset]); + } + } else { + let mut hasher = Hasher128::with_seed(0); + for j in offsets { + if columns_info[*j as usize].as_accessor().is_string_like() { + hasher.write(&collation_keys_val[*j as usize]); + } else { + hasher.write(&columns_val[*j as usize]); + } } + self.fm_sketches[col_len + i].insert_hash_value(hasher.finish()); } - self.fm_sketches[col_len + i].insert(&self.row_buf); } } pub fn collect_column( &mut self, columns_val: &[Vec], - collation_keys_val: Vec>, + collation_keys_val: &[Vec], columns_info: &[tipb::ColumnInfo], ) { for i in 0..columns_val.len() { @@ -659,7 +667,7 @@ impl RowSampleCollector for BernoulliRowSampleCollector { columns_info: &[tipb::ColumnInfo], ) { self.base - .collect_column(&columns_val, collation_keys_val, columns_info); + .collect_column(&columns_val, &collation_keys_val, columns_info); self.sampling(columns_val); } fn sampling(&mut self, data: Vec>) { @@ -736,7 +744,7 @@ impl RowSampleCollector for ReservoirRowSampleCollector { columns_info: &[tipb::ColumnInfo], ) { self.base - .collect_column(&columns_val, collation_keys_val, columns_info); + .collect_column(&columns_val, &collation_keys_val, columns_info); self.sampling(columns_val); } @@ -1357,3 +1365,92 @@ mod tests { } } } + +#[cfg(test)] +mod benches { + use tidb_query_datatype::{ + codec::{ + batch::LazyBatchColumn, + collation::{collator::CollatorUtf8Mb4Bin, Collator}, + }, + EvalType, FieldTypeTp, + }; + + use super::*; + + fn prepare_arguments() -> ( + Vec>, + Vec>, + Vec, + Vec, + ) { + let mut columns_info = Vec::new(); + for i in 1..4 { + let mut col_info = tipb::ColumnInfo::default(); + col_info.set_column_id(i as i64); + col_info.as_mut_accessor().set_tp(FieldTypeTp::VarChar); + col_info + .as_mut_accessor() + .set_collation(Collation::Utf8Mb4Bin); + columns_info.push(col_info); + } + let mut columns_slice = Vec::new(); + for _ in 0..3 { + let mut col = LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Bytes); + col.mut_decoded().push_bytes(Some(b"abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789".to_vec())); + columns_slice.push(col) + } + let mut column_vals = Vec::new(); + let mut collation_key_vals = Vec::new(); + for i in 0..columns_info.len() { + let mut val = vec![]; + columns_slice[i] + .encode(0, &columns_info[i], &mut EvalContext::default(), &mut val) + .unwrap(); + if columns_info[i].as_accessor().is_string_like() { + let mut mut_val = &val[..]; + let decoded_val = table::decode_col_value( + &mut mut_val, + &mut EvalContext::default(), + &columns_info[i], + ) + .unwrap(); + let decoded_sorted_val = + CollatorUtf8Mb4Bin::sort_key(&decoded_val.as_string().unwrap().unwrap()) + .unwrap(); + collation_key_vals.push(decoded_sorted_val); + } else { + collation_key_vals.push(Vec::new()); + } + column_vals.push(val); + } + let mut column_group = tipb::AnalyzeColumnGroup::default(); + column_group.set_column_offsets(vec![0, 1, 2]); + column_group.set_prefix_lengths(vec![-1, -1, -1]); + let column_groups = vec![column_group]; + (column_vals, collation_key_vals, columns_info, column_groups) + } + + #[bench] + fn bench_collect_column(b: &mut test::Bencher) { + let mut collector = BaseRowSampleCollector::new(10000, 4); + let (column_vals, collation_key_vals, columns_info, _) = prepare_arguments(); + b.iter(|| { + collector.collect_column(&column_vals, &collation_key_vals, &columns_info); + }) + } + + #[bench] + fn bench_collect_column_group(b: &mut test::Bencher) { + let mut collector = BaseRowSampleCollector::new(10000, 4); + let (column_vals, collation_key_vals, columns_info, column_groups) = prepare_arguments(); + b.iter(|| { + collector.collect_column_group( + &column_vals, + &collation_key_vals, + &columns_info, + &column_groups, + ); + }) + } +} diff --git a/src/coprocessor/statistics/cmsketch.rs b/src/coprocessor/statistics/cmsketch.rs index 6a3042c8ee7..754a05b0bb2 100644 --- a/src/coprocessor/statistics/cmsketch.rs +++ b/src/coprocessor/statistics/cmsketch.rs @@ -1,6 +1,6 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use murmur3::murmur3_x64_128; +use mur3::murmurhash3_x64_128; /// `CmSketch` is used to estimate point queries. /// Refer:[Count-Min Sketch](https://en.wikipedia.org/wiki/Count-min_sketch) @@ -30,9 +30,8 @@ impl CmSketch { } // `hash` hashes the data into two u64 using murmur hash. - fn hash(mut bytes: &[u8]) -> (u64, u64) { - let out = murmur3_x64_128(&mut bytes, 0).unwrap(); - (out as u64, (out >> 64) as u64) + fn hash(bytes: &[u8]) -> (u64, u64) { + murmurhash3_x64_128(bytes, 0) } // `insert` inserts the data into cm sketch. For each row i, the position at diff --git a/src/coprocessor/statistics/fmsketch.rs b/src/coprocessor/statistics/fmsketch.rs index b52559434c7..341223215f3 100644 --- a/src/coprocessor/statistics/fmsketch.rs +++ b/src/coprocessor/statistics/fmsketch.rs @@ -1,7 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use collections::HashSet; -use murmur3::murmur3_x64_128; +use mur3::murmurhash3_x64_128; /// `FmSketch` is used to count the approximate number of distinct /// elements in multiset. @@ -22,11 +22,8 @@ impl FmSketch { } } - pub fn insert(&mut self, mut bytes: &[u8]) { - let hash = { - let out = murmur3_x64_128(&mut bytes, 0).unwrap(); - out as u64 - }; + pub fn insert(&mut self, bytes: &[u8]) { + let hash = murmurhash3_x64_128(bytes, 0).0; self.insert_hash_value(hash); } @@ -38,7 +35,7 @@ impl FmSketch { proto } - fn insert_hash_value(&mut self, hash_val: u64) { + pub fn insert_hash_value(&mut self, hash_val: u64) { if (hash_val & self.mask) != 0 { return; } From 7a045008b28f7879b140a7960237d99d3a7d7381 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 16 Feb 2023 18:22:02 +0800 Subject: [PATCH 0533/1149] cdc: filter changes based on the range in request (#14213) close pingcap/tiflow#6346, close tikv/tikv#10073 cdc: filter changes based on the range in request Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/src/delegate.rs | 292 +++++++++++++++--- components/cdc/src/endpoint.rs | 23 +- components/cdc/src/initializer.rs | 24 +- components/cdc/src/service.rs | 26 +- components/cdc/tests/integrations/mod.rs | 2 + components/cdc/tests/integrations/test_cdc.rs | 133 ++++++++ 6 files changed, 444 insertions(+), 56 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 120806588dc..c4212c426be 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -130,6 +130,7 @@ pub struct Downstream { state: Arc>, kv_api: ChangeDataRequestKvApi, filter_loop: bool, + pub(crate) observed_range: ObservedRange, } impl Downstream { @@ -144,6 +145,7 @@ impl Downstream { conn_id: ConnId, kv_api: ChangeDataRequestKvApi, filter_loop: bool, + observed_range: ObservedRange, ) -> Downstream { Downstream { id: DownstreamId::new(), @@ -155,6 +157,7 @@ impl Downstream { state: Arc::new(AtomicCell::new(DownstreamState::default())), kv_api, filter_loop, + observed_range, } } @@ -395,6 +398,11 @@ impl Delegate { self.region_id, ); + // Check observed key range in region. + for downstream in self.downstreams_mut() { + downstream.observed_range.update_region_key_range(®ion); + } + // Mark the delegate as initialized. let mut pending = self.pending.take().unwrap(); self.region = Some(region); @@ -479,6 +487,7 @@ impl Delegate { request_id: u64, entries: Vec>, filter_loop: bool, + observed_range: &ObservedRange, ) -> Result> { let entries_len = entries.len(); let mut rows = vec![Vec::with_capacity(entries_len)]; @@ -496,6 +505,9 @@ impl Delegate { lock, old_value, })) => { + if !observed_range.contains_encoded_key(&lock.0) { + continue; + } let l = Lock::parse(&lock.1).unwrap(); if decode_lock(lock.0, l, &mut row, &mut _has_value) { continue; @@ -509,6 +521,9 @@ impl Delegate { write, old_value, })) => { + if !observed_range.contains_encoded_key(&write.0) { + continue; + } if decode_write(write.0, &write.1, &mut row, &mut _has_value, false) { continue; } @@ -640,51 +655,25 @@ impl Delegate { self.region_id ); - let mut need_filter = false; - for ds in downstreams { - if ds.filter_loop { - need_filter = true; - break; - } - } - // collect the change event cause by user write, which is `txn_source` = 0. // for changefeed which only need the user write, send the `filtered`, or else, // send them all. - let filtered = if need_filter { - let filtered = entries - .iter() - .filter(|x| x.txn_source == 0) - .cloned() - .collect::>(); - if filtered.is_empty() { - None - } else { - Some(Event { - region_id: self.region_id, - index, - event: Some(Event_oneof_event::Entries(EventEntries { - entries: filtered.into(), - ..Default::default() - })), - ..Default::default() - }) + let mut filtered_entries = None; + for downstream in downstreams { + if downstream.filter_loop { + let filtered = entries + .iter() + .filter(|x| x.txn_source == 0) + .cloned() + .collect::>(); + if !filtered.is_empty() { + filtered_entries = Some(filtered); + } + break; } - } else { - None - }; - - let event_entries = EventEntries { - entries: entries.into(), - ..Default::default() - }; - let change_data_event = Event { - region_id: self.region_id, - index, - event: Some(Event_oneof_event::Entries(event_entries)), - ..Default::default() - }; + } + let region_id = self.region_id; let send = move |downstream: &Downstream| { // No ready downstream or a downstream that does not match the kv_api type, will // be ignored. There will be one region that contains both Txn & Raw entries. @@ -692,15 +681,30 @@ impl Delegate { if !downstream.state.load().ready_for_change_events() || downstream.kv_api != kv_api { return Ok(()); } - if downstream.filter_loop && filtered.is_none() { + if downstream.filter_loop && filtered_entries.is_none() { return Ok(()); } - let event = if downstream.filter_loop { - filtered.clone().unwrap() + let entries_clone = if downstream.filter_loop { + downstream + .observed_range + .filter_entries(filtered_entries.clone().unwrap()) } else { - change_data_event.clone() + downstream.observed_range.filter_entries(entries.clone()) + }; + if entries_clone.is_empty() { + return Ok(()); + } + let event = Event { + region_id, + index, + event: Some(Event_oneof_event::Entries(EventEntries { + entries: entries_clone.into(), + ..Default::default() + })), + ..Default::default() }; + // Do not force send for real time change data events. let force_send = false; downstream.sink_event(event, force_send) @@ -1059,6 +1063,70 @@ fn decode_default(value: Vec, row: &mut EventRow, has_value: &mut bool) { *has_value = true; } +/// Observed key range. +#[derive(Clone, Default)] +pub struct ObservedRange { + start_key_encoded: Vec, + end_key_encoded: Vec, + start_key_raw: Vec, + end_key_raw: Vec, + pub(crate) all_key_covered: bool, +} + +impl ObservedRange { + pub fn new(start_key_encoded: Vec, end_key_encoded: Vec) -> Result { + let start_key_raw = Key::from_encoded(start_key_encoded.clone()) + .into_raw() + .map_err(|e| Error::Other(e.into()))?; + let end_key_raw = Key::from_encoded(end_key_encoded.clone()) + .into_raw() + .map_err(|e| Error::Other(e.into()))?; + Ok(ObservedRange { + start_key_encoded, + end_key_encoded, + start_key_raw, + end_key_raw, + all_key_covered: false, + }) + } + + #[allow(clippy::collapsible_if)] + pub fn update_region_key_range(&mut self, region: &Region) { + // Check observed key range in region. + if self.start_key_encoded <= region.start_key { + if self.end_key_encoded.is_empty() + || (region.end_key <= self.end_key_encoded && !region.end_key.is_empty()) + { + // Observed range covers the region. + self.all_key_covered = true; + } + } + } + + fn is_key_in_range(&self, start_key: &[u8], end_key: &[u8], key: &[u8]) -> bool { + if self.all_key_covered { + return true; + } + if start_key <= key && (key < end_key || end_key.is_empty()) { + return true; + } + false + } + + pub fn contains_encoded_key(&self, key: &[u8]) -> bool { + self.is_key_in_range(&self.start_key_encoded, &self.end_key_encoded, key) + } + + pub fn filter_entries(&self, mut entries: Vec) -> Vec { + if self.all_key_covered { + return entries; + } + // Entry's key is in raw key format. + entries.retain(|e| self.is_key_in_range(&self.start_key_raw, &self.end_key_raw, &e.key)); + entries + } +} + #[cfg(test)] mod tests { use std::cell::Cell; @@ -1068,6 +1136,7 @@ mod tests { use kvproto::{errorpb::Error as ErrorHeader, metapb::Region}; use super::*; + use crate::channel::{channel, recv_timeout, MemoryQuota}; #[test] fn test_error() { @@ -1090,6 +1159,7 @@ mod tests { ConnId::new(), ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); downstream.set_sink(sink); let mut delegate = Delegate::new(region_id, Default::default()); @@ -1097,6 +1167,7 @@ mod tests { assert!(delegate.handle.is_observing()); let resolver = Resolver::new(region_id); assert!(delegate.on_region_ready(resolver, region).is_empty()); + assert!(delegate.downstreams()[0].observed_range.all_key_covered); let rx_wrap = Cell::new(Some(rx)); let receive_error = || { @@ -1214,6 +1285,7 @@ mod tests { ConnId::new(), ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ) }; @@ -1264,6 +1336,138 @@ mod tests { assert!(!delegate.handle.is_observing()); } + #[test] + fn test_observed_range() { + for case in vec![ + (b"".as_slice(), b"".as_slice(), false), + (b"a", b"", false), + (b"", b"b", false), + (b"a", b"b", true), + (b"a", b"bb", false), + (b"a", b"aa", true), + (b"aa", b"aaa", true), + ] { + let start_key = if !case.0.is_empty() { + Key::from_raw(case.0).into_encoded() + } else { + case.0.to_owned() + }; + let end_key = if !case.1.is_empty() { + Key::from_raw(case.1).into_encoded() + } else { + case.1.to_owned() + }; + let mut region = Region::default(); + region.start_key = start_key.to_owned(); + region.end_key = end_key.to_owned(); + + for k in 0..=0xff { + let mut observed_range = ObservedRange::default(); + observed_range.update_region_key_range(®ion); + assert!(observed_range.contains_encoded_key(&Key::from_raw(&[k]).into_encoded())); + } + let mut observed_range = ObservedRange::new( + Key::from_raw(b"a").into_encoded(), + Key::from_raw(b"b").into_encoded(), + ) + .unwrap(); + observed_range.update_region_key_range(®ion); + assert_eq!(observed_range.all_key_covered, case.2, "{:?}", case); + assert!( + observed_range.contains_encoded_key(&Key::from_raw(b"a").into_encoded()), + "{:?}", + case + ); + assert!( + observed_range.contains_encoded_key(&Key::from_raw(b"ab").into_encoded()), + "{:?}", + case + ); + if observed_range.all_key_covered { + assert!( + observed_range.contains_encoded_key(&Key::from_raw(b"b").into_encoded()), + "{:?}", + case + ); + } else { + assert!( + !observed_range.contains_encoded_key(&Key::from_raw(b"b").into_encoded()), + "{:?}", + case + ); + } + } + } + + #[test] + fn test_downstream_filter_entires() { + // Create a new delegate that observes [b, d). + let observed_range = ObservedRange::new( + Key::from_raw(b"b").into_encoded(), + Key::from_raw(b"d").into_encoded(), + ) + .unwrap(); + let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); + let mut delegate = Delegate::new(1, txn_extra_op); + assert!(delegate.handle.is_observing()); + + let mut map = HashMap::default(); + for k in b'a'..=b'e' { + let mut put = PutRequest::default(); + put.key = Key::from_raw(&[k]).into_encoded(); + put.cf = "lock".to_owned(); + put.value = Lock::new( + LockType::Put, + put.key.clone(), + 1.into(), + 10, + None, + TimeStamp::zero(), + 0, + TimeStamp::zero(), + ) + .to_bytes(); + delegate + .sink_txn_put( + put, + false, + &mut map, + |_: &mut EventRow, _: TimeStamp| Ok(()), + ) + .unwrap(); + } + assert_eq!(map.len(), 5); + + let (sink, mut drain) = channel(1, MemoryQuota::new(1024)); + let downstream = Downstream { + id: DownstreamId::new(), + req_id: 1, + conn_id: ConnId::new(), + peer: String::new(), + region_epoch: RegionEpoch::default(), + sink: Some(sink), + state: Arc::new(AtomicCell::new(DownstreamState::Normal)), + kv_api: ChangeDataRequestKvApi::TiDb, + filter_loop: false, + observed_range, + }; + delegate.add_downstream(downstream); + let entries = map.values().map(|(r, _)| r).cloned().collect(); + delegate + .sink_downstream(entries, 1, ChangeDataRequestKvApi::TiDb) + .unwrap(); + + let (mut tx, mut rx) = futures::channel::mpsc::unbounded(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + runtime.spawn(async move { + drain.forward(&mut tx).await.unwrap(); + }); + let (e, _) = recv_timeout(&mut rx, std::time::Duration::from_secs(5)) + .unwrap() + .unwrap(); + assert_eq!(e.events[0].get_entries().get_entries().len(), 2, "{:?}", e); + } + #[test] fn test_decode_rawkv() { let cases = vec![ diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 2b4eb9ff226..b5e15ceee23 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -727,11 +727,12 @@ impl, E: KvEngine> Endpoint { }; let change_cmd = ChangeObserver::from_cdc(region_id, delegate.handle.clone()); - + let observed_range = downstream_.observed_range; let region_epoch = request.take_region_epoch(); let mut init = Initializer { engine: self.engine.clone(), sched, + observed_range, region_id, region_epoch, conn_id, @@ -1275,7 +1276,7 @@ mod tests { }; use super::*; - use crate::{channel, recv_timeout}; + use crate::{channel, delegate::ObservedRange, recv_timeout}; struct TestEndpointSuite { // The order must ensure `endpoint` be dropped before other fields. @@ -1426,6 +1427,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::RawKv, false, + ObservedRange::default(), ); req.set_kv_api(ChangeDataRequestKvApi::RawKv); suite.run(Task::Register { @@ -1462,6 +1464,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TxnKv, false, + ObservedRange::default(), ); req.set_kv_api(ChangeDataRequestKvApi::TxnKv); suite.run(Task::Register { @@ -1499,6 +1502,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TxnKv, false, + ObservedRange::default(), ); req.set_kv_api(ChangeDataRequestKvApi::TxnKv); suite.run(Task::Register { @@ -1678,6 +1682,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req, @@ -1725,6 +1730,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); @@ -1748,6 +1754,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req.clone(), @@ -1785,6 +1792,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req, @@ -1830,6 +1838,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.add_local_reader(100); suite.run(Task::Register { @@ -1862,6 +1871,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req, @@ -1938,6 +1948,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); downstream.get_state().store(DownstreamState::Normal); // Enable batch resolved ts in the test. @@ -1975,6 +1986,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); downstream.get_state().store(DownstreamState::Normal); suite.add_region(2, 100); @@ -2021,6 +2033,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); downstream.get_state().store(DownstreamState::Normal); suite.add_region(3, 100); @@ -2092,6 +2105,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); let downstream_id = downstream.get_id(); suite.run(Task::Register { @@ -2135,6 +2149,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); let new_downstream_id = downstream.get_id(); suite.run(Task::Register { @@ -2187,6 +2202,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req, @@ -2242,6 +2258,7 @@ mod tests { conn_id, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); downstream.get_state().store(DownstreamState::Normal); suite.run(Task::Register { @@ -2360,6 +2377,7 @@ mod tests { conn_id_a, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req.clone(), @@ -2384,6 +2402,7 @@ mod tests { conn_id_b, ChangeDataRequestKvApi::TiDb, false, + ObservedRange::default(), ); suite.run(Task::Register { request: req.clone(), diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 38c8603900e..68850ac55ac 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -47,7 +47,7 @@ use txn_types::{Key, KvPair, Lock, LockType, OldValue, TimeStamp}; use crate::{ channel::CdcEvent, - delegate::{post_init_downstream, Delegate, DownstreamId, DownstreamState}, + delegate::{post_init_downstream, Delegate, DownstreamId, DownstreamState, ObservedRange}, endpoint::Deregister, metrics::*, old_value::{near_seek_old_value, new_old_value_cursor, OldValueCursors}, @@ -79,6 +79,7 @@ pub(crate) struct Initializer { pub(crate) sched: Scheduler, pub(crate) sink: crate::channel::Sink, + pub(crate) observed_range: ObservedRange, pub(crate) region_id: u64, pub(crate) region_epoch: RegionEpoch, pub(crate) observe_id: ObserveId, @@ -206,10 +207,12 @@ impl Initializer { let region_id = region.get_id(); let observe_id = self.observe_id; let kv_api = self.kv_api; + self.observed_range.update_region_key_range(®ion); debug!("cdc async incremental scan"; "region_id" => region_id, "downstream_id" => ?downstream_id, "observe_id" => ?self.observe_id, + "all_key_covered" => ?self.observed_range.all_key_covered, "start_key" => log_wrappers::Value::key(snap.lower_bound().unwrap_or_default()), "end_key" => log_wrappers::Value::key(snap.upper_bound().unwrap_or_default())); @@ -432,6 +435,7 @@ impl Initializer { self.request_id, entries, self.filter_loop, + &self.observed_range, )?; if done { let (cb, fut) = tikv_util::future::paired_future_callback(); @@ -641,7 +645,7 @@ mod tests { }), sched: receiver_worker.scheduler(), sink, - + observed_range: ObservedRange::default(), region_id: 1, region_epoch: RegionEpoch::default(), observe_id: ObserveId::new(), @@ -668,6 +672,12 @@ mod tests { let mut expected_locks = BTreeMap::>>::new(); + // Only observe ["", "b\0x90"] + let observed_range = ObservedRange::new( + Key::from_raw(&[]).into_encoded(), + Key::from_raw(&[b'k', 90]).into_encoded(), + ) + .unwrap(); let mut total_bytes = 0; // Pessimistic locks should not be tracked for i in 0..10 { @@ -700,6 +710,7 @@ mod tests { ChangeDataRequestKvApi::TiDb, false, ); + initializer.observed_range = observed_range.clone(); let check_result = || loop { let task = rx.recv().unwrap(); match task { @@ -713,7 +724,14 @@ mod tests { // To not block test by barrier. pool.spawn(async move { let mut d = drain.drain(); - while d.next().await.is_some() {} + while let Some((e, _)) = d.next().await { + if let CdcEvent::Event(e) = e { + for e in e.get_entries().get_entries() { + let key = Key::from_raw(&e.key).into_encoded(); + assert!(observed_range.contains_encoded_key(&key), "{:?}", e); + } + } + } }); block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index f9665283c45..215f2cdebca 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -26,7 +26,7 @@ use tikv_util::{error, info, warn, worker::*}; use crate::{ channel::{channel, MemoryQuota, Sink, CDC_CHANNLE_CAPACITY}, - delegate::{Downstream, DownstreamId, DownstreamState}, + delegate::{Downstream, DownstreamId, DownstreamState, ObservedRange}, endpoint::{Deregister, Task}, }; @@ -207,7 +207,7 @@ impl ChangeData for Service { let (event_sink, mut event_drain) = channel(CDC_CHANNLE_CAPACITY, self.memory_quota.clone()); let peer = ctx.peer(); - let conn = Conn::new(event_sink, peer); + let conn = Conn::new(event_sink, peer.clone()); let conn_id = conn.get_id(); if let Err(status) = self @@ -217,11 +217,12 @@ impl ChangeData for Service { RpcStatus::with_message(RpcStatusCode::INVALID_ARGUMENT, format!("{:?}", e)) }) { - error!("cdc connection initiate failed"; "error" => ?status); - ctx.spawn( - sink.fail(status) - .unwrap_or_else(|e| error!("cdc failed to send error"; "error" => ?e)), - ); + error!("cdc connection initiate failed"; + "downstream" => ?peer, "error" => ?status); + ctx.spawn(sink.fail(status).unwrap_or_else(move |e| { + error!("cdc failed to send error"; + "downstream" => ?peer, "error" => ?e) + })); return; } @@ -236,10 +237,20 @@ impl ChangeData for Service { Err(e) => { warn!("empty or invalid TiCDC version, please upgrading TiCDC"; "version" => request.get_header().get_ticdc_version(), + "downstream" => ?peer, "error" => ?e); semver::Version::new(0, 0, 0) } }; + let observed_range = + match ObservedRange::new(request.start_key.clone(), request.end_key.clone()) { + Ok(observed_range) => observed_range, + Err(e) => { + warn!("cdc invalid observed start key or end key version"; + "downstream" => ?peer, "error" => ?e); + ObservedRange::default() + } + }; let downstream = Downstream::new( peer.clone(), region_epoch, @@ -247,6 +258,7 @@ impl ChangeData for Service { conn_id, req_kvapi, request.filter_loop, + observed_range, ); let ret = scheduler .schedule(Task::Register { diff --git a/components/cdc/tests/integrations/mod.rs b/components/cdc/tests/integrations/mod.rs index 821e4ad186e..c60a1fe8cb9 100644 --- a/components/cdc/tests/integrations/mod.rs +++ b/components/cdc/tests/integrations/mod.rs @@ -1,5 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(assert_matches)] + mod test_cdc; mod test_flow_control; diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 9de1a77a8ad..3e5345e51f8 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -2597,3 +2597,136 @@ fn test_flashback() { } } } + +#[test] +fn test_cdc_filter_key_range() { + let mut suite = TestSuite::new(1, ApiVersion::V1); + + let req = suite.new_changedata_request(1); + + // Observe range [key1, key3). + let mut req_1_3 = req.clone(); + req_1_3.request_id = 13; + req_1_3.start_key = Key::from_raw(b"key1").into_encoded(); + req_1_3.end_key = Key::from_raw(b"key3").into_encoded(); + let (mut req_tx13, _event_feed_wrap13, receive_event13) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx13.send((req_1_3, WriteFlags::default()))).unwrap(); + let event = receive_event13(false); + event + .events + .into_iter() + .for_each(|e| match e.event.unwrap() { + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + }); + + let (mut req_tx24, _event_feed_wrap24, receive_event24) = + new_event_feed(suite.get_region_cdc_client(1)); + let mut req_2_4 = req; + req_2_4.request_id = 24; + req_2_4.start_key = Key::from_raw(b"key2").into_encoded(); + req_2_4.end_key = Key::from_raw(b"key4").into_encoded(); + block_on(req_tx24.send((req_2_4, WriteFlags::default()))).unwrap(); + let event = receive_event24(false); + event + .events + .into_iter() + .for_each(|e| match e.event.unwrap() { + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + }); + + // Sleep a while to make sure the stream is registered. + sleep_ms(1000); + + let receive_and_check_events = |is13: bool, is24: bool| -> Vec { + if is13 && is24 { + let mut events = receive_event13(false).events.to_vec(); + let mut events24 = receive_event24(false).events.to_vec(); + events.append(&mut events24); + events + } else if is13 { + let events = receive_event13(false).events.to_vec(); + let event = receive_event24(true); + assert!(event.resolved_ts.is_some(), "{:?}", event); + events + } else if is24 { + let events = receive_event24(false).events.to_vec(); + let event = receive_event13(true); + assert!(event.resolved_ts.is_some(), "{:?}", event); + events + } else { + let event = receive_event13(true); + assert!(event.resolved_ts.is_some(), "{:?}", event); + let event = receive_event24(true); + assert!(event.resolved_ts.is_some(), "{:?}", event); + vec![] + } + }; + for case in &[ + ("key1", true, false, true /* commit */), + ("key1", true, false, false /* rollback */), + ("key2", true, true, true), + ("key3", false, true, true), + ("key4", false, false, true), + ] { + let (k, v) = (case.0.to_owned(), "value".to_owned()); + // Prewrite + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone().into_bytes(); + mutation.value = v.into_bytes(); + suite.must_kv_prewrite(1, vec![mutation], k.clone().into_bytes(), start_ts); + let mut events = receive_and_check_events(case.1, case.2); + while let Some(event) = events.pop() { + match event.event.unwrap() { + Event_oneof_event::Entries(entries) => { + assert_eq!(entries.entries.len(), 1); + assert_eq!(entries.entries[0].get_type(), EventLogType::Prewrite); + } + other => panic!("unknown event {:?}", other), + } + } + + if case.3 { + // Commit + let commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + suite.must_kv_commit(1, vec![k.into_bytes()], start_ts, commit_ts); + let mut events = receive_and_check_events(case.1, case.2); + while let Some(event) = events.pop() { + match event.event.unwrap() { + Event_oneof_event::Entries(entries) => { + assert_eq!(entries.entries.len(), 1); + assert_eq!(entries.entries[0].get_type(), EventLogType::Commit); + } + other => panic!("unknown event {:?}", other), + } + } + } else { + // Rollback + suite.must_kv_rollback(1, vec![k.into_bytes()], start_ts); + let mut events = receive_and_check_events(case.1, case.2); + while let Some(event) = events.pop() { + match event.event.unwrap() { + Event_oneof_event::Entries(entries) => { + assert_eq!(entries.entries.len(), 1); + assert_eq!(entries.entries[0].get_type(), EventLogType::Rollback); + } + other => panic!("unknown event {:?}", other), + } + } + } + } + + suite.stop(); +} From c5ce165ad35121114892fb7b27aeeaae3c3e7afc Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 16 Feb 2023 18:48:02 +0800 Subject: [PATCH 0534/1149] raftstore-v2: fix destroy blocked by apply progress (#14223) close tikv/tikv#14215 If a peer is marked for destroy, it will skip all apply result, which will make it never apply to committed index. This PR relaxes the check to last_applying_index and always process apply result. It also fixes a bug that new peer created by large ID may not survive restart. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/command/mod.rs | 9 +- components/raftstore-v2/src/operation/life.rs | 5 +- .../raftstore-v2/src/operation/ready/mod.rs | 14 +++ .../raftstore-v2/tests/failpoints/mod.rs | 1 + .../tests/failpoints/test_life.rs | 67 +++++++++++++++ .../tests/integrations/cluster.rs | 75 +++++++++++++++- .../tests/integrations/test_life.rs | 85 +++---------------- .../raftstore/src/store/worker/region.rs | 17 ++-- 8 files changed, 183 insertions(+), 90 deletions(-) create mode 100644 components/raftstore-v2/tests/failpoints/test_life.rs diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index d06e43c0303..9f24241b039 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -320,11 +320,10 @@ impl Peer { } pub fn on_apply_res(&mut self, ctx: &mut StoreContext, apply_res: ApplyRes) { - if !self.serving() { - return; + if !self.serving() || !apply_res.admin_result.is_empty() { + // TODO: remove following log once stable. + info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); } - // TODO: remove following log once stable. - info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); // It must just applied a snapshot. if apply_res.applied_index < self.entry_storage().first_index() { // Ignore admin command side effects, otherwise it may split incomplete @@ -391,7 +390,7 @@ impl Peer { } let last_applying_index = self.compact_log_context().last_applying_index(); let committed_index = self.entry_storage().commit_index(); - if last_applying_index < committed_index { + if last_applying_index < committed_index || !self.serving() { // We need to continue to apply after previous page is finished. self.set_has_ready(); } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 8b63f9aae89..fdba7efdf4d 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -570,12 +570,13 @@ impl Peer { /// tablet. #[inline] pub fn postponed_destroy(&self) -> bool { + let last_applying_index = self.compact_log_context().last_applying_index(); let entry_storage = self.storage().entry_storage(); // If it's marked as tombstone, then it must be changed by conf change. In // this case, all following entries are skipped so applied_index never equals - // to commit_index. + // to last_applying_index. (self.storage().region_state().get_state() != PeerState::Tombstone - && entry_storage.applied_index() != entry_storage.commit_index()) + && entry_storage.applied_index() != last_applying_index) // Wait for critical commands like split. || self.has_pending_tombstone_tablets() } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 943e3b6ba2f..adb0edf82e4 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -236,6 +236,7 @@ impl Peer { } cmp::Ordering::Greater => { // We need to create the target peer. + info!(self.logger, "mark for destroy for larger ID"; "larger_id" => to_peer.get_id()); self.mark_for_destroy(Some(msg)); return; } @@ -943,6 +944,19 @@ impl Storage { } // If snapshot initializes the peer, we don't need to write apply trace again. if !self.ever_persisted() { + let region_id = self.region().get_id(); + let entry_storage = self.entry_storage(); + let raft_engine = entry_storage.raft_engine(); + if write_task.raft_wb.is_none() { + write_task.raft_wb = Some(raft_engine.log_batch(64)); + } + let wb = write_task.raft_wb.as_mut().unwrap(); + // There may be tombstone key from last peer. + raft_engine + .clean(region_id, 0, entry_storage.raft_state(), wb) + .unwrap_or_else(|e| { + slog_panic!(self.logger(), "failed to clean up region"; "error" => ?e); + }); self.init_apply_trace(write_task); self.set_ever_persisted(); } diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index d04ad2cafc2..e2f6884dd54 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -10,5 +10,6 @@ mod cluster; mod test_basic_write; mod test_bootstrap; +mod test_life; mod test_split; mod test_trace_apply; diff --git a/components/raftstore-v2/tests/failpoints/test_life.rs b/components/raftstore-v2/tests/failpoints/test_life.rs new file mode 100644 index 00000000000..ed05c1c6fad --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_life.rs @@ -0,0 +1,67 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use engine_traits::CF_DEFAULT; +use futures::executor::block_on; +use kvproto::raft_serverpb::RaftMessage; +use raft::prelude::MessageType; +use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; +use tikv_util::store::new_peer; + +use crate::cluster::{life_helper::assert_peer_not_exist, Cluster}; + +/// Test if a peer can be destroyed when it's applying entries +#[test] +fn test_destroy_by_larger_id_while_applying() { + let fp = "APPLY_COMMITTED_ENTRIES"; + let mut cluster = Cluster::default(); + let router = &cluster.routers[0]; + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + fail::cfg(fp, "pause").unwrap(); + + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); + let (msg, mut sub) = PeerMsg::simple_write(header.clone(), put.clone().encode()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_committed())); + + let mut larger_id_msg = Box::::default(); + larger_id_msg.set_region_id(2); + let mut target_peer = header.get_peer().clone(); + target_peer.set_id(target_peer.get_id() + 1); + larger_id_msg.set_to_peer(target_peer.clone()); + larger_id_msg.set_region_epoch(header.get_region_epoch().clone()); + larger_id_msg + .mut_region_epoch() + .set_conf_ver(header.get_region_epoch().get_conf_ver() + 1); + larger_id_msg.set_from_peer(new_peer(2, 8)); + let raft_message = larger_id_msg.mut_message(); + raft_message.set_msg_type(MessageType::MsgHeartbeat); + raft_message.set_from(8); + raft_message.set_to(target_peer.get_id()); + raft_message.set_term(10); + + // Larger ID should trigger destroy. + router.send_raft_message(larger_id_msg).unwrap(); + fail::remove(fp); + assert_peer_not_exist(2, header.get_peer().get_id(), router); + let meta = router + .must_query_debug_info(2, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.raft_status.id, target_peer.get_id()); + assert_eq!(meta.raft_status.hard_state.term, 10); + + std::thread::sleep(Duration::from_millis(10)); + + // New peer should survive restart. + cluster.restart(0); + let router = &cluster.routers[0]; + let meta = router + .must_query_debug_info(2, Duration::from_secs(3)) + .unwrap(); + assert_eq!(meta.raft_status.id, target_peer.get_id()); + assert_eq!(meta.raft_status.hard_state.term, 10); +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index ac3f30c7107..4a14b85f616 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -98,7 +98,10 @@ impl TestRouter { thread::sleep(Duration::from_millis(10)); continue; } - return block_on(sub.result()); + let res = block_on(sub.result()); + if res.is_some() { + return res; + } } None } @@ -721,3 +724,73 @@ pub mod split_helper { (left, right) } } + +pub mod life_helper { + use std::assert_matches::assert_matches; + + use engine_traits::RaftEngine; + use kvproto::raft_serverpb::{ExtraMessageType, PeerState}; + + use super::*; + + pub fn assert_peer_not_exist(region_id: u64, peer_id: u64, router: &TestRouter) { + let timer = Instant::now(); + loop { + let (ch, sub) = DebugInfoChannel::pair(); + let msg = PeerMsg::QueryDebugInfo(ch); + match router.send(region_id, msg) { + Err(TrySendError::Disconnected(_)) => return, + Ok(()) => { + if let Some(m) = block_on(sub.result()) { + if m.raft_status.id != peer_id { + return; + } + } + } + Err(_) => (), + } + if timer.elapsed() < Duration::from_secs(3) { + thread::sleep(Duration::from_millis(10)); + } else { + panic!("peer of {} still exists", region_id); + } + } + } + + // TODO: make raft engine support more suitable way to verify range is empty. + /// Verify all states in raft engine are cleared. + pub fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb::Peer) { + let mut buf = vec![]; + raft_engine.get_all_entries_to(region_id, &mut buf).unwrap(); + assert!(buf.is_empty(), "{:?}", buf); + assert_matches!(raft_engine.get_raft_state(region_id), Ok(None)); + assert_matches!(raft_engine.get_apply_state(region_id, u64::MAX), Ok(None)); + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + assert_matches!(region_state.get_state(), PeerState::Tombstone); + assert!( + region_state.get_region().get_peers().contains(peer), + "{:?}", + region_state + ); + } + + #[track_caller] + pub fn assert_valid_report(report: &RaftMessage, region_id: u64, peer_id: u64) { + assert_eq!( + report.get_extra_msg().get_type(), + ExtraMessageType::MsgGcPeerResponse + ); + assert_eq!(report.get_region_id(), region_id); + assert_eq!(report.get_from_peer().get_id(), peer_id); + } + + #[track_caller] + pub fn assert_tombstone_msg(msg: &RaftMessage, region_id: u64, peer_id: u64) { + assert_eq!(msg.get_region_id(), region_id); + assert_eq!(msg.get_to_peer().get_id(), peer_id); + assert!(msg.get_is_tombstone()); + } +} diff --git a/components/raftstore-v2/tests/integrations/test_life.rs b/components/raftstore-v2/tests/integrations/test_life.rs index 2a5dfafc509..373763a53ef 100644 --- a/components/raftstore-v2/tests/integrations/test_life.rs +++ b/components/raftstore-v2/tests/integrations/test_life.rs @@ -1,88 +1,23 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - assert_matches::assert_matches, - thread, - time::{Duration, Instant}, -}; +use std::time::Duration; -use crossbeam::channel::TrySendError; -use engine_traits::{RaftEngine, RaftEngineReadOnly, CF_DEFAULT}; +use engine_traits::{RaftEngineReadOnly, CF_DEFAULT}; use futures::executor::block_on; -use kvproto::{ - metapb, - raft_cmdpb::AdminCmdType, - raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}, -}; +use kvproto::{raft_cmdpb::AdminCmdType, raft_serverpb::RaftMessage}; use raft::prelude::{ConfChangeType, MessageType}; use raftstore_v2::{ - router::{DebugInfoChannel, PeerMsg, PeerTick}, + router::{PeerMsg, PeerTick}, SimpleWriteEncoder, }; use tikv_util::store::{new_learner_peer, new_peer}; -use crate::cluster::{Cluster, TestRouter}; - -fn assert_peer_not_exist(region_id: u64, peer_id: u64, router: &TestRouter) { - let timer = Instant::now(); - loop { - let (ch, sub) = DebugInfoChannel::pair(); - let msg = PeerMsg::QueryDebugInfo(ch); - match router.send(region_id, msg) { - Err(TrySendError::Disconnected(_)) => return, - Ok(()) => { - if let Some(m) = block_on(sub.result()) { - if m.raft_status.id != peer_id { - return; - } - } - } - Err(_) => (), - } - if timer.elapsed() < Duration::from_secs(3) { - thread::sleep(Duration::from_millis(10)); - } else { - panic!("peer of {} still exists", region_id); - } - } -} - -// TODO: make raft engine support more suitable way to verify range is empty. -/// Verify all states in raft engine are cleared. -fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb::Peer) { - let mut buf = vec![]; - raft_engine.get_all_entries_to(region_id, &mut buf).unwrap(); - assert!(buf.is_empty(), "{:?}", buf); - assert_matches!(raft_engine.get_raft_state(region_id), Ok(None)); - assert_matches!(raft_engine.get_apply_state(region_id, u64::MAX), Ok(None)); - let region_state = raft_engine - .get_region_state(region_id, u64::MAX) - .unwrap() - .unwrap(); - assert_matches!(region_state.get_state(), PeerState::Tombstone); - assert!( - region_state.get_region().get_peers().contains(peer), - "{:?}", - region_state - ); -} - -#[track_caller] -fn assert_valid_report(report: &RaftMessage, region_id: u64, peer_id: u64) { - assert_eq!( - report.get_extra_msg().get_type(), - ExtraMessageType::MsgGcPeerResponse - ); - assert_eq!(report.get_region_id(), region_id); - assert_eq!(report.get_from_peer().get_id(), peer_id); -} - -#[track_caller] -fn assert_tombstone_msg(msg: &RaftMessage, region_id: u64, peer_id: u64) { - assert_eq!(msg.get_region_id(), region_id); - assert_eq!(msg.get_to_peer().get_id(), peer_id); - assert!(msg.get_is_tombstone()); -} +use crate::cluster::{ + life_helper::{ + assert_peer_not_exist, assert_tombstone, assert_tombstone_msg, assert_valid_report, + }, + Cluster, +}; /// Test a peer can be created by general raft message and destroyed tombstone /// message. diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 694be1a2b8c..84bc3b27084 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -929,14 +929,13 @@ pub(crate) mod tests { }, }; - const PENDING_APPLY_CHECK_INTERVAL: u64 = 200; + const PENDING_APPLY_CHECK_INTERVAL: Duration = Duration::from_millis(200); const STALE_PEER_CHECK_TICK: usize = 1; pub fn make_raftstore_cfg(use_delete_range: bool) -> Arc> { let mut store_cfg = Config::default(); store_cfg.snap_apply_batch_size = ReadableSize(0); - store_cfg.region_worker_tick_interval = - ReadableDuration::millis(PENDING_APPLY_CHECK_INTERVAL); + store_cfg.region_worker_tick_interval = ReadableDuration(PENDING_APPLY_CHECK_INTERVAL); store_cfg.clean_stale_ranges_tick = STALE_PEER_CHECK_TICK; store_cfg.use_delete_range = use_delete_range; store_cfg.snap_generator_pool_size = 2; @@ -1349,7 +1348,7 @@ pub(crate) mod tests { ); gen_and_apply_snap(5); destroy_region(6); - thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); + thread::sleep(PENDING_APPLY_CHECK_INTERVAL * 2); assert!(check_region_exist(6)); assert_eq!( engine @@ -1406,7 +1405,7 @@ pub(crate) mod tests { .unwrap(), 2 ); - thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); + thread::sleep(PENDING_APPLY_CHECK_INTERVAL * 2); assert!(!check_region_exist(6)); #[cfg(feature = "failpoints")] @@ -1414,12 +1413,16 @@ pub(crate) mod tests { engine.kv.compact_files_in_range(None, None, None).unwrap(); fail::cfg("handle_new_pending_applies", "return").unwrap(); gen_and_apply_snap(7); - thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); + thread::sleep(PENDING_APPLY_CHECK_INTERVAL * 2); must_not_finish(&[7]); fail::remove("handle_new_pending_applies"); - thread::sleep(Duration::from_millis(PENDING_APPLY_CHECK_INTERVAL * 2)); + thread::sleep(PENDING_APPLY_CHECK_INTERVAL * 2); wait_apply_finish(&[7]); } + bg_worker.stop(); + // Wait the timer fired. Otherwise deletion of directory may race with timer + // task. + thread::sleep(PENDING_APPLY_CHECK_INTERVAL * 2); } #[derive(Clone, Default)] From e784a50463b40250d273b0c21a6417bdb374379b Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 17 Feb 2023 15:50:02 +0800 Subject: [PATCH 0535/1149] raftstore-v2: impl report buckets (#14044) ref tikv/tikv#12842 1. implement bucket split and report to pd Signed-off-by: bufferflies <1045931706@qq.com> --- components/pd_client/src/client.rs | 137 ++++++----- components/pd_client/src/client_v2.rs | 90 ++++--- components/pd_client/src/lib.rs | 5 + components/raftstore-v2/src/fsm/peer.rs | 8 +- .../raftstore-v2/src/operation/bucket.rs | 223 ++++++++++++++++++ .../src/operation/command/admin/split.rs | 2 + components/raftstore-v2/src/operation/mod.rs | 1 + .../raftstore-v2/src/operation/ready/mod.rs | 1 + components/raftstore-v2/src/raft/peer.rs | 26 +- components/raftstore-v2/src/router/imp.rs | 24 +- components/raftstore-v2/src/router/message.rs | 6 + components/raftstore-v2/src/worker/pd/mod.rs | 3 + .../tests/integrations/cluster.rs | 37 ++- .../tests/integrations/test_pd_heartbeat.rs | 92 +++++++- components/test_pd/src/mocker/mod.rs | 4 + components/test_pd/src/mocker/service.rs | 17 +- components/test_pd/src/server.rs | 27 ++- 17 files changed, 560 insertions(+), 143 deletions(-) create mode 100644 components/raftstore-v2/src/operation/bucket.rs diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 402192596b5..917176b454e 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -269,6 +269,41 @@ impl RpcClient { } } +fn get_region_resp_by_id( + pd_client: Arc, + header: pdpb::RequestHeader, + region_id: u64, +) -> PdFuture { + let timer = Instant::now(); + let mut req = pdpb::GetRegionByIdRequest::default(); + req.set_header(header); + req.set_region_id(region_id); + + let executor = move |client: &Client, req: pdpb::GetRegionByIdRequest| { + let handler = { + let inner = client.inner.rl(); + inner + .client_stub + .get_region_by_id_async_opt(&req, call_option_inner(&inner)) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_by_id", e); + }) + }; + Box::pin(async move { + let resp = handler.await?; + PD_REQUEST_HISTOGRAM_VEC + .get_region_by_id + .observe(timer.saturating_elapsed_secs()); + check_resp_header(resp.get_header())?; + Ok(resp) + }) as PdFuture<_> + }; + + pd_client + .request(req, executor, LEADER_CHANGE_RETRY) + .execute() +} + impl fmt::Debug for RpcClient { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("RpcClient") @@ -532,82 +567,46 @@ impl PdClient for RpcClient { .boxed() } - fn get_region_by_id(&self, region_id: u64) -> PdFuture> { - let timer = Instant::now(); - - let mut req = pdpb::GetRegionByIdRequest::default(); - req.set_header(self.header()); - req.set_region_id(region_id); - - let executor = move |client: &Client, req: pdpb::GetRegionByIdRequest| { - let handler = { - let inner = client.inner.rl(); - inner - .client_stub - .get_region_by_id_async_opt(&req, call_option_inner(&inner)) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_region_by_id", e); - }) - }; - Box::pin(async move { - let mut resp = handler.await?; - PD_REQUEST_HISTOGRAM_VEC - .get_region_by_id - .observe(timer.saturating_elapsed_secs()); - check_resp_header(resp.get_header())?; - if resp.has_region() { - Ok(Some(resp.take_region())) - } else { - Ok(None) - } - }) as PdFuture<_> - }; + fn get_buckets_by_id(&self, region_id: u64) -> PdFuture> { + let header = self.header(); + let pd_client = self.pd_client.clone(); + Box::pin(async move { + let mut resp = get_region_resp_by_id(pd_client, header, region_id).await?; + if resp.has_buckets() { + Ok(Some(resp.take_buckets())) + } else { + Ok(None) + } + }) as PdFuture> + } - self.pd_client - .request(req, executor, LEADER_CHANGE_RETRY) - .execute() + fn get_region_by_id(&self, region_id: u64) -> PdFuture> { + let header = self.header(); + let pd_client = self.pd_client.clone(); + Box::pin(async move { + let mut resp = get_region_resp_by_id(pd_client, header, region_id).await?; + if resp.has_region() { + Ok(Some(resp.take_region())) + } else { + Ok(None) + } + }) } fn get_region_leader_by_id( &self, region_id: u64, ) -> PdFuture> { - let timer = Instant::now(); - - let mut req = pdpb::GetRegionByIdRequest::default(); - req.set_header(self.header()); - req.set_region_id(region_id); - - let executor = move |client: &Client, req: pdpb::GetRegionByIdRequest| { - let handler = { - let inner = client.inner.rl(); - inner - .client_stub - .get_region_by_id_async_opt(&req, call_option_inner(&inner)) - .unwrap_or_else(|e| { - panic!( - "fail to request PD {} err {:?}", - "get_region_leader_by_id", e - ) - }) - }; - Box::pin(async move { - let mut resp = handler.await?; - PD_REQUEST_HISTOGRAM_VEC - .get_region_leader_by_id - .observe(timer.saturating_elapsed_secs()); - check_resp_header(resp.get_header())?; - if resp.has_region() && resp.has_leader() { - Ok(Some((resp.take_region(), resp.take_leader()))) - } else { - Ok(None) - } - }) as PdFuture<_> - }; - - self.pd_client - .request(req, executor, LEADER_CHANGE_RETRY) - .execute() + let header = self.header(); + let pd_client = self.pd_client.clone(); + Box::pin(async move { + let mut resp = get_region_resp_by_id(pd_client, header, region_id).await?; + if resp.has_region() && resp.has_leader() { + Ok(Some((resp.take_region(), resp.take_leader()))) + } else { + Ok(None) + } + }) } fn region_heartbeat( diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index cfa0d46303c..b583772bb72 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -515,6 +515,29 @@ impl RpcClient { } } +async fn get_region_resp_by_id( + mut raw_client: CachedRawClient, + region_id: u64, +) -> Result { + let timer = Instant::now_coarse(); + let mut req = pdpb::GetRegionByIdRequest::default(); + req.set_region_id(region_id); + raw_client.wait_for_ready().await?; + req.set_header(raw_client.header()); + let resp = raw_client + .stub() + .get_region_by_id_async_opt(&req, raw_client.call_option().timeout(request_timeout())) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "get_region_by_id", e); + }) + .await; + PD_REQUEST_HISTOGRAM_VEC + .get_region_by_id + .observe(timer.saturating_elapsed_secs()); + let resp = raw_client.check_resp(resp)?; + check_resp_header(resp.get_header())?; + Ok(resp) +} pub trait PdClient { type ResponseChannel: Stream>; @@ -584,6 +607,8 @@ pub trait PdClient { fn get_region_by_id(&mut self, region_id: u64) -> PdFuture>; + fn get_buckets_by_id(&self, region_id: u64) -> PdFuture>; + fn get_region_leader_by_id( &mut self, region_id: u64, @@ -1045,31 +1070,22 @@ impl PdClient for RpcClient { }) } - fn get_region_by_id(&mut self, region_id: u64) -> PdFuture> { - let timer = Instant::now_coarse(); - - let mut req = pdpb::GetRegionByIdRequest::default(); - req.set_region_id(region_id); + fn get_buckets_by_id(&self, region_id: u64) -> PdFuture> { + let pd_client = self.raw_client.clone(); + Box::pin(async move { + let mut resp = get_region_resp_by_id(pd_client, region_id).await?; + if resp.has_buckets() { + Ok(Some(resp.take_buckets())) + } else { + Ok(None) + } + }) + } - let mut raw_client = self.raw_client.clone(); + fn get_region_by_id(&mut self, region_id: u64) -> PdFuture> { + let pd_client = self.raw_client.clone(); Box::pin(async move { - raw_client.wait_for_ready().await?; - req.set_header(raw_client.header()); - let resp = raw_client - .stub() - .get_region_by_id_async_opt( - &req, - raw_client.call_option().timeout(request_timeout()), - ) - .unwrap_or_else(|e| { - panic!("fail to request PD {} err {:?}", "get_region_by_id", e); - }) - .await; - PD_REQUEST_HISTOGRAM_VEC - .get_region_by_id - .observe(timer.saturating_elapsed_secs()); - let mut resp = raw_client.check_resp(resp)?; - check_resp_header(resp.get_header())?; + let mut resp = get_region_resp_by_id(pd_client, region_id).await?; if resp.has_region() { Ok(Some(resp.take_region())) } else { @@ -1082,33 +1098,9 @@ impl PdClient for RpcClient { &mut self, region_id: u64, ) -> PdFuture> { - let timer = Instant::now_coarse(); - - let mut req = pdpb::GetRegionByIdRequest::default(); - req.set_region_id(region_id); - - let mut raw_client = self.raw_client.clone(); + let pd_client = self.raw_client.clone(); Box::pin(async move { - raw_client.wait_for_ready().await?; - req.set_header(raw_client.header()); - let resp = raw_client - .stub() - .get_region_by_id_async_opt( - &req, - raw_client.call_option().timeout(request_timeout()), - ) - .unwrap_or_else(|e| { - panic!( - "fail to request PD {} err {:?}", - "get_region_leader_by_id", e - ); - }) - .await; - PD_REQUEST_HISTOGRAM_VEC - .get_region_leader_by_id - .observe(timer.saturating_elapsed_secs()); - let mut resp = raw_client.check_resp(resp)?; - check_resp_header(resp.get_header())?; + let mut resp = get_region_resp_by_id(pd_client, region_id).await?; if resp.has_region() && resp.has_leader() { Ok(Some((resp.take_region(), resp.take_leader()))) } else { diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index b877750770d..00b5efff23b 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -345,6 +345,11 @@ pub trait PdClient: Send + Sync { unimplemented!(); } + // Gets Buckets by Region id. + fn get_buckets_by_id(&self, _region_id: u64) -> PdFuture> { + unimplemented!(); + } + /// Gets Region and its leader by Region id. fn get_region_leader_by_id( &self, diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 1b127e5851b..814a0b1311a 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -195,6 +195,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } + self.fsm.peer.maybe_gen_approximate_buckets(self.store_ctx); // Speed up setup if there is only one peer. if self.fsm.peer.is_leader() { self.fsm.peer.set_has_ready(); @@ -223,7 +224,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerTick::ReactivateMemoryLock => { self.fsm.peer.on_reactivate_memory_lock_tick(self.store_ctx) } - PeerTick::ReportBuckets => unimplemented!(), + PeerTick::ReportBuckets => self.on_report_region_buckets_tick(), PeerTick::CheckLongUncommitted => self.on_check_long_uncommitted(), PeerTick::GcPeer => self.fsm.peer_mut().on_gc_peer_tick(self.store_ctx), } @@ -305,6 +306,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_request_split(self.store_ctx, request, ch) } + PeerMsg::RefreshRegionBuckets { + region_epoch, + buckets, + bucket_ranges, + } => self.on_refresh_region_buckets(region_epoch, buckets, bucket_ranges), PeerMsg::RequestHalfSplit { request, ch } => self .fsm .peer_mut() diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs new file mode 100644 index 00000000000..2bc2d232b12 --- /dev/null +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -0,0 +1,223 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module implements the interactions with bucket. + +use std::sync::Arc; + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::metapb::RegionEpoch; +use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; +use raftstore::{ + coprocessor::RegionChangeEvent, + store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, +}; +use slog::{error, warn}; + +use crate::{batch::StoreContext, fsm::PeerFsmDelegate, raft::Peer, router::PeerTick, worker::pd}; + +impl Peer { + #[inline] + pub fn on_refresh_region_buckets( + &mut self, + store_ctx: &mut StoreContext, + region_epoch: RegionEpoch, + mut buckets: Vec, + bucket_ranges: Option>, + ) { + // bucket version layout + // term logical counter + // |-----------|-----------| + // high bits low bits + // term: given 10s election timeout, the 32 bit means 1362 year running time + let gen_bucket_version = |term, current_version| { + let current_version_term = current_version >> 32; + let bucket_version: u64 = if current_version_term == term { + current_version + 1 + } else { + if term > u32::MAX.into() { + error!( + self.logger, + "unexpected term {} more than u32::MAX. Bucket + version will be backward.", + term + ); + } + term << 32 + }; + bucket_version + }; + + let region = self.region(); + let current_version = self + .region_buckets() + .as_ref() + .or_else(|| self.last_region_buckets().as_ref()) + .map(|b| b.meta.version) + .unwrap_or_default(); + let mut region_buckets: BucketStat; + // The region buckets reset after this region happened split or merge. + // The message should be dropped if it's epoch is lower than the regions. + // The bucket ranges is none when the region buckets is also none. + // So this condition indicates that the region buckets needs to refresh not + // renew. + if let (Some(bucket_ranges), Some(peer_region_buckets)) = + (bucket_ranges, self.region_buckets()) + { + assert_eq!(buckets.len(), bucket_ranges.len()); + let mut meta_idx = 0; + region_buckets = peer_region_buckets.clone(); + let mut meta = (*region_buckets.meta).clone(); + if !buckets.is_empty() { + meta.version = gen_bucket_version(self.term(), current_version); + } + meta.region_epoch = region_epoch; + for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { + // the bucket ranges maybe need to split or merge not all the meta keys, so it + // needs to find the first keys. + while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { + meta_idx += 1; + } + // meta_idx can't be not the last entry (which is end key) + if meta_idx >= meta.keys.len() - 1 { + warn!( + self.logger, + "can't find the bucket key"; + "bucket_range_key" => log_wrappers::Value::key(&bucket_range.0)); + break; + } + // the bucket size is small and does not have split keys, + // then it should be merged with its left neighbor + let region_bucket_merge_size = store_ctx + .coprocessor_host + .cfg + .region_bucket_merge_size_ratio + * (store_ctx.coprocessor_host.cfg.region_bucket_size.0 as f64); + if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { + meta.sizes[meta_idx] = bucket.size; + // the region has more than one bucket + // and the left neighbor + current bucket size is not very big + if meta.keys.len() > 2 + && meta_idx != 0 + && meta.sizes[meta_idx - 1] + bucket.size + < store_ctx.coprocessor_host.cfg.region_bucket_size.0 * 2 + { + // bucket is too small + region_buckets.left_merge(meta_idx); + meta.left_merge(meta_idx); + continue; + } + } else { + // update size + meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; + // insert new bucket keys (split the original bucket) + for bucket_key in bucket.keys { + meta_idx += 1; + region_buckets.split(meta_idx); + meta.split(meta_idx, bucket_key); + } + } + meta_idx += 1; + } + region_buckets.meta = Arc::new(meta); + } else { + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + assert_eq!(buckets.len(), 1); + let bucket_keys = buckets.pop().unwrap().keys; + let bucket_count = bucket_keys.len() + 1; + let mut meta = BucketMeta { + region_id: self.region_id(), + region_epoch, + version: gen_bucket_version(self.term(), current_version), + keys: bucket_keys, + sizes: vec![store_ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], + }; + // padding the boundary keys and initialize the flow. + meta.keys.insert(0, region.get_start_key().to_vec()); + meta.keys.push(region.get_end_key().to_vec()); + let stats = new_bucket_stats(&meta); + region_buckets = BucketStat::new(Arc::new(meta), stats); + } + + let buckets_count = region_buckets.meta.keys.len() - 1; + store_ctx.coprocessor_host.on_region_changed( + region, + RegionChangeEvent::UpdateBuckets(buckets_count), + self.state_role(), + ); + let meta = region_buckets.meta.clone(); + self.set_region_buckets(Some(region_buckets)); + let mut store_meta = store_ctx.store_meta.lock().unwrap(); + if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { + reader.0.update(ReadProgress::region_buckets(meta)); + } + } + + #[inline] + pub fn report_region_buckets_pd(&mut self, ctx: &StoreContext) { + let region_buckets = self.region_buckets().as_ref().unwrap(); + let task = pd::Task::ReportBuckets(region_buckets.clone()); + if let Err(e) = ctx.schedulers.pd.schedule(task) { + error!( + self.logger, + "failed to report buckets to pd"; + "err" => ?e, + ); + } + } + + pub fn maybe_gen_approximate_buckets(&self, ctx: &StoreContext) { + if ctx.coprocessor_host.cfg.enable_region_bucket && self.storage().is_initialized() { + if let Err(e) = ctx + .schedulers + .split_check + .schedule(SplitCheckTask::ApproximateBuckets(self.region().clone())) + { + error!( + self.logger, + "failed to schedule check approximate buckets"; + "err" => %e, + ); + } + } + } +} + +impl<'a, EK, ER, T: Transport> PeerFsmDelegate<'a, EK, ER, T> +where + EK: KvEngine, + ER: RaftEngine, +{ + #[inline] + pub fn on_report_region_buckets_tick(&mut self) { + if !self.fsm.peer().is_leader() || self.fsm.peer().region_buckets().is_none() { + return; + } + self.fsm.peer_mut().report_region_buckets_pd(self.store_ctx); + self.schedule_tick(PeerTick::ReportBuckets); + } + + pub fn on_refresh_region_buckets( + &mut self, + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + ) { + if util::is_epoch_stale(®ion_epoch, self.fsm.peer().region().get_region_epoch()) { + error!( + self.fsm.peer().logger, + "receive a stale refresh region bucket message"; + "epoch" => ?region_epoch, + "current_epoch" => ?self.fsm.peer().region().get_region_epoch(), + ); + return; + } + self.fsm.peer_mut().on_refresh_region_buckets( + self.store_ctx, + region_epoch, + buckets, + bucket_ranges, + ); + self.schedule_tick(PeerTick::ReportBuckets); + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 86b0aab558e..b4e2b4654e7 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -215,6 +215,8 @@ impl Peer { control.skip_split_count += 1; return false; } + // todo: the suspected buckets range should generated by the diff write bytes. + // it will be done in next pr. let task = SplitCheckTask::split_check(self.region().clone(), true, CheckPolicy::Scan, None); if let Err(e) = ctx.schedulers.split_check.schedule(task) { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index f022ab91109..ee0680f7fbb 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +mod bucket; mod command; mod life; mod pd; diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index adb0edf82e4..ebff7ad44ce 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -790,6 +790,7 @@ impl Peer { self.add_pending_tick(PeerTick::CompactLog); self.add_pending_tick(PeerTick::SplitRegionCheck); self.add_pending_tick(PeerTick::CheckLongUncommitted); + self.add_pending_tick(PeerTick::ReportBuckets); self.maybe_schedule_gc_peer_tick(); } StateRole::Follower => { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 814dc72e622..142b4e91943 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -210,6 +210,24 @@ impl Peer { Ok(peer) } + #[inline] + pub fn region_buckets(&self) -> &Option { + &self.region_buckets + } + + #[inline] + pub fn set_region_buckets(&mut self, buckets: Option) { + if let Some(b) = self.region_buckets.take() { + self.last_region_buckets = Some(b); + } + self.region_buckets = buckets; + } + + #[inline] + pub fn last_region_buckets(&self) -> &Option { + &self.last_region_buckets + } + #[inline] pub fn region(&self) -> &metapb::Region { self.raft_group.store().region() @@ -646,13 +664,7 @@ impl Peer { #[inline] pub fn post_split(&mut self) { - self.reset_region_buckets(); - } - - pub fn reset_region_buckets(&mut self) { - if self.region_buckets.is_some() { - self.last_region_buckets = self.region_buckets.take(); - } + self.set_region_buckets(None); } pub fn maybe_campaign(&mut self) -> bool { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index a9a8b23b571..bcda7298bd4 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -72,12 +72,26 @@ impl raftstore::coprocessor::StoreHandle for Store fn refresh_region_buckets( &self, - _region_id: u64, - _region_epoch: kvproto::metapb::RegionEpoch, - _buckets: Vec, - _bucket_ranges: Option>, + region_id: u64, + region_epoch: kvproto::metapb::RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, ) { - // TODO + let res = self.send( + region_id, + PeerMsg::RefreshRegionBuckets { + region_epoch, + buckets, + bucket_ranges, + }, + ); + if let Err(e) = res { + warn!( + self.logger(), + "failed to refresh region buckets"; + "err" => %e, + ); + } } fn update_compute_hash_result( diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 04bc5dbab10..317ba74d4d6 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -4,6 +4,7 @@ use kvproto::{ metapb, + metapb::RegionEpoch, raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, raft_serverpb::RaftMessage, }; @@ -185,6 +186,11 @@ pub enum PeerMsg { request: RequestSplit, ch: CmdResChannel, }, + RefreshRegionBuckets { + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + }, RequestHalfSplit { request: RequestHalfSplit, ch: CmdResChannel, diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index b23d1500914..e529f7dddee 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -79,6 +79,7 @@ pub enum Task { initial_status: u64, txn_ext: Arc, }, + ReportBuckets(BucketStat), ReportMinResolvedTs { store_id: u64, min_resolved_ts: u64, @@ -138,6 +139,7 @@ impl Display for Task { "update the max timestamp for region {} in the concurrency manager", region_id ), + Task::ReportBuckets(ref buckets) => write!(f, "report buckets: {:?}", buckets), Task::ReportMinResolvedTs { store_id, min_resolved_ts, @@ -282,6 +284,7 @@ where initial_status, txn_ext, } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), + Task::ReportBuckets(buckets) => self.handle_report_region_buckets(buckets), Task::ReportMinResolvedTs { store_id, min_resolved_ts, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 4a14b85f616..065d032eaa2 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -30,7 +30,7 @@ use kvproto::{ use pd_client::RpcClient; use raft::eraftpb::MessageType; use raftstore::{ - coprocessor::CoprocessorHost, + coprocessor::{Config as CopConfig, CoprocessorHost}, store::{ region_meta::{RegionLocalState, RegionMeta}, AutoSplitController, Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, @@ -237,6 +237,7 @@ pub struct RunningState { pub registry: TabletRegistry, pub system: StoreSystem, pub cfg: Arc>, + pub cop_cfg: Arc>, pub transport: TestTransport, snap_mgr: TabletSnapManager, background: Worker, @@ -247,6 +248,7 @@ impl RunningState { pd_client: &Arc, path: &Path, cfg: Arc>, + cop_cfg: Arc>, transport: TestTransport, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, @@ -293,11 +295,9 @@ impl RunningState { let router = RaftRouter::new(store_id, router); let store_meta = router.store_meta().clone(); let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()).unwrap(); + let coprocessor_host = + CoprocessorHost::new(router.store_router().clone(), cop_cfg.value().clone()); - let coprocessor_host = CoprocessorHost::new( - router.store_router().clone(), - raftstore::coprocessor::Config::default(), - ); let background = Worker::new("background"); let pd_worker = LazyWorker::new("pd-worker"); system @@ -330,6 +330,7 @@ impl RunningState { transport, snap_mgr, background, + cop_cfg, }; (TestRouter(router), state) } @@ -361,11 +362,17 @@ impl TestNode { } } - fn start(&mut self, cfg: Arc>, trans: TestTransport) -> TestRouter { + fn start( + &mut self, + cfg: Arc>, + cop_cfg: Arc>, + trans: TestTransport, + ) -> TestRouter { let (router, state) = RunningState::new( &self.pd_client, self.path.path(), cfg, + cop_cfg, trans, ConcurrencyManager::new(1.into()), None, @@ -392,8 +399,9 @@ impl TestNode { let state = self.running_state().unwrap(); let prev_transport = state.transport.clone(); let cfg = state.cfg.clone(); + let cop_cfg = state.cop_cfg.clone(); self.stop(); - self.start(cfg, prev_transport) + self.start(cfg, cop_cfg, prev_transport) } pub fn running_state(&self) -> Option<&RunningState> { @@ -492,6 +500,14 @@ impl Cluster { } pub fn with_node_count(count: usize, config: Option) -> Self { + Cluster::with_configs(count, config, None) + } + + pub fn with_cop_cfg(coprocessor_cfg: CopConfig) -> Cluster { + Cluster::with_configs(1, None, Some(coprocessor_cfg)) + } + + pub fn with_configs(count: usize, config: Option, cop_cfg: Option) -> Self { let pd_server = test_pd::Server::new(1); let logger = slog_global::borrow_global().new(o!()); let mut cluster = Cluster { @@ -507,10 +523,15 @@ impl Cluster { v2_default_config() }; disable_all_auto_ticks(&mut cfg); + let cop_cfg = cop_cfg.unwrap_or_default(); for _ in 1..=count { let mut node = TestNode::with_pd(&cluster.pd_server, cluster.logger.clone()); let (tx, rx) = new_test_transport(); - let router = node.start(Arc::new(VersionTrack::new(cfg.clone())), tx); + let router = node.start( + Arc::new(VersionTrack::new(cfg.clone())), + Arc::new(VersionTrack::new(cop_cfg.clone())), + tx, + ); cluster.nodes.push(node); cluster.receivers.push(rx); cluster.routers.push(router); diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs index 09ead81c0c2..11ff6bd4d02 100644 --- a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -1,9 +1,17 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::time::Duration; + +use engine_traits::{MiscExt, CF_DEFAULT}; use futures::executor::block_on; use kvproto::raft_cmdpb::{RaftCmdRequest, StatusCmdType}; use pd_client::PdClient; -use tikv_util::store::new_peer; +use raftstore::coprocessor::Config as CopConfig; +use raftstore_v2::{ + router::{PeerMsg, PeerTick}, + SimpleWriteEncoder, +}; +use tikv_util::{config::ReadableSize, store::new_peer}; use crate::cluster::Cluster; @@ -59,3 +67,85 @@ fn test_store_heartbeat() { } panic!("failed to get store stats"); } + +#[test] +fn test_report_buckets() { + let region_id = 2; + let mut cop_cfg = CopConfig::default(); + cop_cfg.enable_region_bucket = true; + cop_cfg.region_bucket_size = ReadableSize::kb(1); + let cluster = Cluster::with_cop_cfg(cop_cfg); + let store_id = cluster.node(0).id(); + let router = &cluster.routers[0]; + + // When there is only one peer, it should campaign immediately. + let mut req = RaftCmdRequest::default(); + req.mut_header().set_peer(new_peer(store_id, 3)); + req.mut_status_request() + .set_cmd_type(StatusCmdType::RegionLeader); + let res = router.query(region_id, req.clone()).unwrap(); + let status_resp = res.response().unwrap().get_status_response(); + assert_eq!( + *status_resp.get_region_leader().get_leader(), + new_peer(store_id, 3) + ); + router.wait_applied_to_current_term(region_id, Duration::from_secs(3)); + + // load data to split bucket. + let header = Box::new(router.new_request_for(region_id).take_header()); + let mut suffix = String::from(""); + for _ in 0..200 { + suffix.push_str("fake "); + } + for i in 0..10 { + let mut put = SimpleWriteEncoder::with_capacity(64); + let mut key = format!("key-{}", i); + key.push_str(&suffix); + put.put(CF_DEFAULT, key.as_bytes(), b"value"); + let (msg, sub) = PeerMsg::simple_write(header.clone(), put.clone().encode()); + router.send(region_id, msg).unwrap(); + let _resp = block_on(sub.result()).unwrap(); + } + // To find the split keys, it should flush memtable manually. + let mut cached = cluster.node(0).tablet_registry().get(region_id).unwrap(); + cached.latest().unwrap().flush_cf(CF_DEFAULT, true).unwrap(); + // send split region check to split bucket. + router + .send(region_id, PeerMsg::Tick(PeerTick::SplitRegionCheck)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + // report buckets to pd. + router + .send(region_id, PeerMsg::Tick(PeerTick::ReportBuckets)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + + let resp = block_on(cluster.node(0).pd_client().get_buckets_by_id(region_id)).unwrap(); + let mut buckets_tmp = vec![]; + let mut bucket_ranges = vec![]; + if let Some(buckets) = resp { + assert!(buckets.get_keys().len() > 2); + assert_eq!(buckets.get_region_id(), region_id); + for i in 0..buckets.keys.len() - 1 { + buckets_tmp.push(raftstore::store::Bucket::default()); + let bucket_range = + raftstore::store::BucketRange(buckets.keys[i].clone(), buckets.keys[i + 1].clone()); + bucket_ranges.push(bucket_range); + } + } + + // send the same region buckets to refresh which needs to merge the last. + let resp = block_on(cluster.node(0).pd_client().get_region_by_id(region_id)).unwrap(); + if let Some(region) = resp { + let region_epoch = region.get_region_epoch().clone(); + for _ in 0..2 { + let msg = PeerMsg::RefreshRegionBuckets { + region_epoch: region_epoch.clone(), + buckets: buckets_tmp.clone(), + bucket_ranges: Some(bucket_ranges.clone()), + }; + router.send(region_id, msg).unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + } + } +} diff --git a/components/test_pd/src/mocker/mod.rs b/components/test_pd/src/mocker/mod.rs index fc257b12a9f..d8282ca3df0 100644 --- a/components/test_pd/src/mocker/mod.rs +++ b/components/test_pd/src/mocker/mod.rs @@ -133,6 +133,10 @@ pub trait PdMocker { None } + fn report_buckets(&self, _: &ReportBucketsRequest) -> Option> { + None + } + fn get_region(&self, _: &GetRegionRequest) -> Option> { None } diff --git a/components/test_pd/src/mocker/service.rs b/components/test_pd/src/mocker/service.rs index 45dd6e5661d..330a5375fb2 100644 --- a/components/test_pd/src/mocker/service.rs +++ b/components/test_pd/src/mocker/service.rs @@ -8,7 +8,7 @@ use std::sync::{ use collections::HashMap; use fail::fail_point; use kvproto::{ - metapb::{Peer, Region, Store, StoreState}, + metapb::{Buckets, Peer, Region, Store, StoreState}, pdpb::*, }; @@ -21,6 +21,7 @@ pub struct Service { is_bootstrapped: AtomicBool, stores: Mutex>, regions: Mutex>, + buckets: Mutex>, leaders: Mutex>, feature_gate: Mutex, } @@ -35,6 +36,7 @@ impl Service { regions: Mutex::new(HashMap::default()), leaders: Mutex::new(HashMap::default()), feature_gate: Mutex::new(String::default()), + buckets: Mutex::new(HashMap::default()), } } @@ -210,6 +212,9 @@ impl PdMocker for Service { Some(region) => { resp.set_header(Service::header()); resp.set_region(region.clone()); + if let Some(bucket) = self.buckets.lock().unwrap().get(&req.get_region_id()) { + resp.set_buckets(bucket.clone()); + } if let Some(leader) = leaders.get(®ion.get_id()) { resp.set_leader(leader.clone()); } @@ -227,6 +232,16 @@ impl PdMocker for Service { } } + fn report_buckets(&self, req: &ReportBucketsRequest) -> Option> { + let buckets = req.get_buckets(); + let region_id = req.get_buckets().get_region_id(); + self.buckets + .lock() + .unwrap() + .insert(region_id, buckets.clone()); + None + } + fn region_heartbeat( &self, req: &RegionHeartbeatRequest, diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 28d4077b674..b1909485ac8 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -13,8 +13,8 @@ use std::{ use fail::fail_point; use futures::{future, SinkExt, TryFutureExt, TryStreamExt}; use grpcio::{ - DuplexSink, EnvBuilder, RequestStream, RpcContext, RpcStatus, RpcStatusCode, - Server as GrpcServer, ServerBuilder, ServerStreamingSink, UnarySink, WriteFlags, + ClientStreamingSink, DuplexSink, EnvBuilder, RequestStream, RpcContext, RpcStatus, + RpcStatusCode, Server as GrpcServer, ServerBuilder, ServerStreamingSink, UnarySink, WriteFlags, }; use kvproto::pdpb::*; use pd_client::Error as PdError; @@ -360,6 +360,29 @@ impl Pd for PdMock { hijack_unary(self, ctx, sink, |c| c.store_heartbeat(&req)) } + fn report_buckets( + &mut self, + ctx: grpcio::RpcContext<'_>, + stream: RequestStream, + sink: ClientStreamingSink, + ) { + let mock = self.clone(); + ctx.spawn(async move { + let mut stream = stream.map_err(PdError::from); + while let Ok(Some(req)) = stream.try_next().await { + let resp = mock + .case + .as_ref() + .and_then(|case| case.report_buckets(&req)) + .or_else(|| mock.default_handler.report_buckets(&req)); + if let Some(Ok(resp)) = resp { + sink.success(resp); + break; + } + } + }); + } + fn region_heartbeat( &mut self, ctx: RpcContext<'_>, From 07b2bde11fe072bfe36a41b24758f103a7dcbecd Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 17 Feb 2023 17:07:04 +0800 Subject: [PATCH 0536/1149] raftstore,resolved_ts: advance resolved ts as needed (#14123) close tikv/tikv#13110, close tikv/tikv#14122, close pingcap/tidb#40903 Fix an issue that stale read fails when a leader is slowly. Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot Co-authored-by: Jay --- components/raftstore/src/store/peer.rs | 9 +- components/raftstore/src/store/util.rs | 14 +++ components/raftstore/src/store/worker/read.rs | 106 +++++++++++++++++- components/resolved_ts/src/advance.rs | 25 +++-- components/resolved_ts/src/endpoint.rs | 23 +++- components/resolved_ts/src/resolver.rs | 2 +- 6 files changed, 162 insertions(+), 17 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index a6010a6761f..c788256799b 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -65,7 +65,7 @@ use tikv_util::{ }; use time::{Duration as TimeDuration, Timespec}; use tracker::GLOBAL_TRACKERS; -use txn_types::WriteBatchFlags; +use txn_types::{TimeStamp, WriteBatchFlags}; use uuid::Uuid; use super::{ @@ -109,8 +109,8 @@ use crate::{ }; const SHRINK_CACHE_CAPACITY: usize = 64; -const MIN_BCAST_WAKE_UP_INTERVAL: u64 = 1_000; // 1s +const MIN_BCAST_WAKE_UP_INTERVAL: u64 = 1_000; const REGION_READ_PROGRESS_CAP: usize = 128; #[doc(hidden)] pub const MAX_COMMITTED_SIZE_PER_READY: u64 = 16 * 1024 * 1024; @@ -4796,6 +4796,11 @@ where let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); let safe_ts = self.read_progress.safe_ts(); if safe_ts < read_ts { + // Advancing resolved ts may be expensive, only notify if read_ts - safe_ts > + // 200ms. + if TimeStamp::from(read_ts).physical() > TimeStamp::from(safe_ts).physical() + 200 { + self.read_progress.notify_advance_resolved_ts(); + } warn!( "read rejected by safe timestamp"; "safe ts" => safe_ts, diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 0127cc5c7e6..d48c5e78e7c 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -39,6 +39,7 @@ use tikv_util::{ Either, }; use time::{Duration, Timespec}; +use tokio::sync::Notify; use txn_types::WriteBatchFlags; use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; @@ -1234,6 +1235,16 @@ impl RegionReadProgress { } } + pub fn update_advance_resolved_ts_notify(&self, advance_notify: Arc) { + self.core.lock().unwrap().advance_notify = Some(advance_notify); + } + + pub fn notify_advance_resolved_ts(&self) { + if let Ok(core) = self.core.try_lock() && let Some(advance_notify) = &core.advance_notify { + advance_notify.notify_waiters(); + } + } + pub fn update_applied(&self, applied: u64, coprocessor: &CoprocessorHost) { let mut core = self.core.lock().unwrap(); if let Some(ts) = core.update_applied(applied) { @@ -1397,6 +1408,8 @@ pub struct RegionReadProgressCore { pause: bool, // Discard incoming `(idx, ts)` discard: bool, + // A notify to trigger advancing resolved ts immediately. + advance_notify: Option>, } // A helpful wrapper of `(apply_index, safe_ts)` item @@ -1468,6 +1481,7 @@ impl RegionReadProgressCore { last_merge_index: 0, pause: is_witness, discard: is_witness, + advance_notify: None, } } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 379af09eb2e..826537f4e44 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -29,6 +29,7 @@ use tikv_util::{ time::{monotonic_raw_now, ThreadReadId}, }; use time::Timespec; +use txn_types::TimeStamp; use super::metrics::*; use crate::{ @@ -563,11 +564,15 @@ impl ReadDelegate { if safe_ts >= read_ts { return Ok(()); } + // Advancing resolved ts may be expensive, only notify if read_ts - safe_ts > + // 200ms. + if TimeStamp::from(read_ts).physical() > TimeStamp::from(safe_ts).physical() + 200 { + self.read_progress.notify_advance_resolved_ts(); + } debug!( "reject stale read by safe ts"; "safe_ts" => safe_ts, "read_ts" => read_ts, - "region_id" => self.region.get_id(), "peer_id" => self.peer_id, ); @@ -2014,4 +2019,103 @@ mod tests { .is_none() ); } + + #[test] + fn test_stale_read_notify() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx) = new_reader("test-local-reader", store_id, store_meta.clone()); + reader.kv_engine.put(b"key", b"value").unwrap(); + + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let term6 = 6; + + // Register region1 + let pr_ids1 = vec![2, 3, 4]; + let prs1 = new_peers(store_id, pr_ids1.clone()); + prepare_read_delegate( + store_id, + 1, + term6, + pr_ids1, + epoch13.clone(), + store_meta.clone(), + ); + let leader1 = prs1[0].clone(); + + // Local read + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader1); + header.set_region_epoch(epoch13); + header.set_term(term6); + header.set_flags(header.get_flags() | WriteBatchFlags::STALE_READ.bits()); + cmd.set_header(header.clone()); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + // A peer can serve read_ts < safe_ts. + let safe_ts = TimeStamp::compose(2, 0); + { + let mut meta = store_meta.lock().unwrap(); + let delegate = meta.readers.get_mut(&1).unwrap(); + delegate + .read_progress + .update_safe_ts(1, safe_ts.into_inner()); + assert_eq!(delegate.read_progress.safe_ts(), safe_ts.into_inner()); + } + let read_ts_1 = TimeStamp::compose(1, 0); + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(read_ts_1.into_inner()).unwrap(); + header.set_flag_data(data.into()); + cmd.set_header(header.clone()); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task); + snap_rx.recv().unwrap().snapshot.unwrap(); + + // A peer has to notify advancing resolved ts if read_ts >= safe_ts. + let notify = Arc::new(tokio::sync::Notify::new()); + { + let mut meta = store_meta.lock().unwrap(); + let delegate = meta.readers.get_mut(&1).unwrap(); + delegate + .read_progress + .update_advance_resolved_ts_notify(notify.clone()); + } + // 201ms larger than safe_ts. + let read_ts_2 = TimeStamp::compose(safe_ts.physical() + 201, 0); + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(read_ts_2.into_inner()).unwrap(); + header.set_flag_data(data.into()); + cmd.set_header(header.clone()); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |_: ReadResponse| {})), + ); + let (notify_tx, notify_rx) = channel(); + let (wait_spawn_tx, wait_spawn_rx) = channel(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + let _ = runtime.spawn(async move { + wait_spawn_tx.send(()).unwrap(); + notify.notified().await; + notify_tx.send(()).unwrap(); + }); + wait_spawn_rx.recv().unwrap(); + thread::sleep(std::time::Duration::from_millis(500)); // Prevent lost notify. + must_not_redirect(&mut reader, &rx, task); + notify_rx.recv().unwrap(); + } } diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index fd58fac1601..611d8a84424 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -1,6 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cmp, ffi::CString, sync::{ atomic::{AtomicI32, Ordering}, @@ -45,10 +46,11 @@ use txn_types::TimeStamp; use crate::{endpoint::Task, metrics::*}; -const DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS: u64 = 5_000; // 5s +const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s pub struct AdvanceTsWorker { pd_client: Arc, + advance_ts_interval: Duration, timer: SteadyTimer, worker: Runtime, scheduler: Scheduler, @@ -59,6 +61,7 @@ pub struct AdvanceTsWorker { impl AdvanceTsWorker { pub fn new( + advance_ts_interval: Duration, pd_client: Arc, scheduler: Scheduler, concurrency_manager: ConcurrencyManager, @@ -75,6 +78,7 @@ impl AdvanceTsWorker { scheduler, pd_client, worker, + advance_ts_interval, timer: SteadyTimer::default(), concurrency_manager, } @@ -88,15 +92,19 @@ impl AdvanceTsWorker { regions: Vec, mut leader_resolver: LeadershipResolver, advance_ts_interval: Duration, - cfg_update_notify: Arc, + advance_notify: Arc, ) { let cm = self.concurrency_manager.clone(); let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); let timeout = self.timer.delay(advance_ts_interval); + let min_timeout = self.timer.delay(cmp::min( + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION, + self.advance_ts_interval, + )); let fut = async move { - // Ignore get tso errors since we will retry every `advance_ts_interval`. + // Ignore get tso errors since we will retry every `advdance_ts_interval`. let mut min_ts = pd_client.get_tso().await.unwrap_or_default(); // Sync with concurrency manager so that it can work correctly when @@ -122,9 +130,12 @@ impl AdvanceTsWorker { futures::select! { _ = timeout.compat().fuse() => (), - // Skip wait timeout if cfg is updated. - _ = cfg_update_notify.notified().fuse() => (), + // Skip wait timeout if a notify is arrived. + _ = advance_notify.notified().fuse() => (), }; + // Wait min timeout to prevent from overloading advancing resolved ts. + let _ = min_timeout.compat().await; + // NB: We must schedule the leader resolver even if there is no region, // otherwise we can not advance resolved ts next time. if let Err(e) = scheduler.schedule(Task::AdvanceResolvedTs { leader_resolver }) { @@ -386,7 +397,7 @@ impl LeadershipResolver { PENDING_CHECK_LEADER_REQ_SENT_COUNT.inc(); defer!(PENDING_CHECK_LEADER_REQ_SENT_COUNT.dec()); - let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); + let timeout = DEFAULT_CHECK_LEADER_TIMEOUT_DURATION; let resp = tokio::time::timeout(timeout, rpc) .map_err(|e| (to_store, true, format!("[timeout] {}", e))) .await? @@ -509,7 +520,7 @@ async fn get_tikv_client( return Ok(client); } } - let timeout = Duration::from_millis(DEFAULT_CHECK_LEADER_TIMEOUT_MILLISECONDS); + let timeout = DEFAULT_CHECK_LEADER_TIMEOUT_DURATION; let store = tokio::time::timeout(timeout, pd_client.get_store_async(store_id)) .await .map_err(|e| pd_client::Error::Other(Box::new(e))) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index def3d512d3a..8d2ee1631b4 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -90,6 +90,10 @@ impl ObserveRegion { } } + fn read_progress(&self) -> &RegionReadProgress { + self.resolver.read_progress.as_ref().unwrap() + } + fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> std::result::Result<(), String> { match &mut self.resolver_status { ResolverStatus::Pending { @@ -265,7 +269,7 @@ impl ObserveRegion { pub struct Endpoint { store_id: Option, cfg: ResolvedTsConfig, - cfg_update_notify: Arc, + advance_notify: Arc, store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, regions: HashMap, @@ -294,8 +298,12 @@ where let meta = store_meta.lock().unwrap(); (meta.region_read_progress.clone(), meta.store_id) }; - let advance_worker = - AdvanceTsWorker::new(pd_client.clone(), scheduler.clone(), concurrency_manager); + let advance_worker = AdvanceTsWorker::new( + cfg.advance_ts_interval.0, + pd_client.clone(), + scheduler.clone(), + concurrency_manager, + ); let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, raft_router); let store_resolver_gc_interval = Duration::from_secs(60); let leader_resolver = LeadershipResolver::new( @@ -309,7 +317,7 @@ where let ep = Self { store_id, cfg: cfg.clone(), - cfg_update_notify: Arc::new(Notify::new()), + advance_notify: Arc::new(Notify::new()), scheduler, store_meta, region_read_progress, @@ -345,6 +353,9 @@ where ResolverStatus::Pending { ref cancelled, .. } => cancelled.clone(), ResolverStatus::Ready => panic!("resolved ts illeagal created observe region"), }; + observe_region + .read_progress() + .update_advance_resolved_ts_notify(self.advance_notify.clone()); self.regions.insert(region_id, observe_region); let scan_task = self.build_scan_task(region, observe_handle, cancelled); @@ -560,7 +571,7 @@ where regions, leader_resolver, self.cfg.advance_ts_interval.0, - self.cfg_update_notify.clone(), + self.advance_notify.clone(), ); } @@ -569,7 +580,7 @@ where if let Err(e) = self.cfg.update(change) { warn!("resolved-ts config fails"; "error" => ?e); } else { - self.cfg_update_notify.notify_waiters(); + self.advance_notify.notify_waiters(); info!( "resolved-ts config changed"; "prev" => prev, diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 514f812665a..b341c546940 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -21,7 +21,7 @@ pub struct Resolver { // The highest index `Resolver` had been tracked tracked_index: u64, // The region read progress used to utilize `resolved_ts` to serve stale read request - read_progress: Option>, + pub(crate) read_progress: Option>, // The timestamps that advance the resolved_ts when there is no more write. min_ts: TimeStamp, // Whether the `Resolver` is stopped From 728511e8be17a50c03b6a82bc073008192533c7a Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 17 Feb 2023 23:25:04 -0800 Subject: [PATCH 0537/1149] rocksdb: reduce rocksdb block size to 32KB (#14053) (#14244) close tikv/tikv#14052 Because of memory fragment issue 16KB causes, we change it to 32KB and the result shows there's no significant memory fragment. Signed-off-by: qi.xu Co-authored-by: qi.xu --- etc/config-template.toml | 4 ++-- src/config/mod.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 59152570da1..38082367d40 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -679,7 +679,7 @@ ## The data block size. RocksDB compresses data based on the unit of block. ## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that ## the block size specified here corresponds to uncompressed data. -# block-size = "16KB" +# block-size = "32KB" ## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters ## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive @@ -915,7 +915,7 @@ [rocksdb.writecf] ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`. # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] -# block-size = "16KB" +# block-size = "32KB" ## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`. # write-buffer-size = "128MB" diff --git a/src/config/mod.rs b/src/config/mod.rs index a8e15c38642..e633b76d2db 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -634,7 +634,7 @@ impl Default for DefaultCfConfig { let total_mem = SysQuota::memory_limit_in_bytes(); DefaultCfConfig { - block_size: ReadableSize::kb(16), + block_size: ReadableSize::kb(32), block_cache_size: memory_limit_for_cf(false, CF_DEFAULT, total_mem), disable_block_cache: false, cache_index_and_filter_blocks: true, @@ -759,7 +759,7 @@ impl Default for WriteCfConfig { }; WriteCfConfig { - block_size: ReadableSize::kb(16), + block_size: ReadableSize::kb(32), block_cache_size: memory_limit_for_cf(false, CF_WRITE, total_mem), disable_block_cache: false, cache_index_and_filter_blocks: true, From b82036eae1a4f01b607af79352e42480cb88f3e5 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Sat, 18 Feb 2023 15:47:04 +0800 Subject: [PATCH 0538/1149] config: increase resolved-ts.advance-ts-interval to 20s (#14136) close tikv/tikv#14100 Save network traffic by increasing resolved-ts.advance-ts-interval Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- src/config/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index e633b76d2db..4be54665443 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2745,7 +2745,7 @@ impl Default for ResolvedTsConfig { fn default() -> Self { Self { enable: true, - advance_ts_interval: ReadableDuration::secs(1), + advance_ts_interval: ReadableDuration::secs(20), scan_lock_pool_size: 2, } } @@ -4731,7 +4731,7 @@ mod tests { // Default value assert_eq!( resolved_ts_cfg.advance_ts_interval, - ReadableDuration::secs(1) + ReadableDuration::secs(20) ); // Update `advance-ts-interval` to 100ms From 1216d5efa99f512a5505998524013233c198363b Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 21 Feb 2023 17:33:06 +0800 Subject: [PATCH 0539/1149] pd_client: add some function to buckets (#14239) close tikv/tikv#14240 1. add `from_meta` constructor 2. add `merge` to merge delta flow 3. add `add_flow` to add flow for given key range Signed-off-by: bufferflies <1045931706@qq.com> --- components/pd_client/src/lib.rs | 23 ++++++++++++ .../raftstore-v2/src/operation/bucket.rs | 5 ++- .../raftstore-v2/src/worker/pd/region.rs | 24 +++---------- components/raftstore/src/store/fsm/apply.rs | 14 ++++---- components/raftstore/src/store/fsm/peer.rs | 14 +++----- components/raftstore/src/store/worker/pd.rs | 22 +++--------- .../src/store/worker/split_controller.rs | 36 ++++++++----------- components/test_pd_client/src/pd.rs | 8 +---- 8 files changed, 59 insertions(+), 87 deletions(-) diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 00b5efff23b..05b5729e98c 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -152,6 +152,29 @@ impl BucketStat { } } + pub fn from_meta(meta: Arc) -> Self { + let stats = new_bucket_stats(&meta); + Self::new(meta, stats) + } + + pub fn set_meta(&mut self, meta: Arc) { + self.stats = new_bucket_stats(&meta); + self.meta = meta; + } + + pub fn merge(&mut self, delta: &BucketStat) { + merge_bucket_stats( + &self.meta.keys, + &mut self.stats, + &delta.meta.keys, + &delta.stats, + ); + } + + pub fn add_flows>(&mut self, incoming: &[I], delta_stats: &metapb::BucketStats) { + merge_bucket_stats(&self.meta.keys, &mut self.stats, incoming, delta_stats); + } + pub fn write_key(&mut self, key: &[u8], value_size: u64) { let idx = match util::find_bucket_index(key, &self.meta.keys) { Some(idx) => idx, diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 2bc2d232b12..efff68fc453 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use engine_traits::{KvEngine, RaftEngine}; use kvproto::metapb::RegionEpoch; -use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; +use pd_client::{BucketMeta, BucketStat}; use raftstore::{ coprocessor::RegionChangeEvent, store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, @@ -135,8 +135,7 @@ impl Peer { // padding the boundary keys and initialize the flow. meta.keys.insert(0, region.get_start_key().to_vec()); meta.keys.push(region.get_end_key().to_vec()); - let stats = new_bucket_stats(&meta); - region_buckets = BucketStat::new(Arc::new(meta), stats); + region_buckets = BucketStat::from_meta(Arc::new(meta)); } let buckets_count = region_buckets.meta.keys.len() - 1; diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index c862d1f208b..bd4925e8563 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -5,9 +5,7 @@ use std::{sync::Arc, time::Duration}; use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{metapb, pdpb}; -use pd_client::{ - merge_bucket_stats, metrics::PD_HEARTBEAT_COUNTER_VEC, BucketStat, PdClient, RegionStat, -}; +use pd_client::{metrics::PD_HEARTBEAT_COUNTER_VEC, BucketStat, PdClient, RegionStat}; use raftstore::store::{ReadStats, WriteStats}; use resource_metering::RawRecords; use slog::{debug, error, info}; @@ -72,17 +70,9 @@ impl ReportBucket { self.last_report_ts = report_ts; match self.last_report_stat.replace(self.current_stat.clone()) { Some(last) => { - let mut delta = BucketStat::new( - self.current_stat.meta.clone(), - pd_client::new_bucket_stats(&self.current_stat.meta), - ); + let mut delta = BucketStat::from_meta(self.current_stat.meta.clone()); // Buckets may be changed, recalculate last stats according to current meta. - merge_bucket_stats( - &delta.meta.keys, - &mut delta.stats, - &last.meta.keys, - &last.stats, - ); + delta.merge(&last); for i in 0..delta.meta.keys.len() - 1 { delta.stats.write_bytes[i] = self.current_stat.stats.write_bytes[i] - delta.stats.write_bytes[i]; @@ -438,13 +428,7 @@ where if current.meta < buckets.meta { std::mem::swap(current, &mut buckets); } - - merge_bucket_stats( - ¤t.meta.keys, - &mut current.stats, - &buckets.meta.keys, - &buckets.stats, - ); + current.merge(&buckets); }) .or_insert_with(|| ReportBucket::new(buckets)); } diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index fba17db7391..7afb188a4b0 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -44,7 +44,7 @@ use kvproto::{ }, raft_serverpb::{MergeState, PeerState, RaftApplyState, RaftTruncatedState, RegionLocalState}, }; -use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; +use pd_client::{BucketMeta, BucketStat}; use prometheus::local::LocalHistogram; use protobuf::{wire_format::WireType, CodedInputStream, Message}; use raft::eraftpb::{ @@ -3941,12 +3941,12 @@ where self.delegate.term = apply.term; if let Some(meta) = apply.bucket_meta.clone() { - let buckets = self - .delegate - .buckets - .get_or_insert_with(BucketStat::default); - buckets.stats = new_bucket_stats(&meta); - buckets.meta = meta; + if let Some(old) = &mut self.delegate.buckets { + old.set_meta(meta); + } else { + let new = BucketStat::from_meta(meta); + self.delegate.buckets.replace(new); + } } let prev_state = ( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 05b443be4eb..30420668164 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -41,7 +41,7 @@ use kvproto::{ replication_modepb::{DrAutoSyncState, ReplicationMode}, }; use parking_lot::RwLockWriteGuard; -use pd_client::{merge_bucket_stats, new_bucket_stats, BucketMeta, BucketStat}; +use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; use protobuf::Message; use raft::{ self, @@ -2273,12 +2273,7 @@ where let applied_index = res.apply_state.applied_index; let buckets = self.fsm.peer.region_buckets.as_mut(); if let (Some(delta), Some(buckets)) = (res.bucket_stat, buckets) { - merge_bucket_stats( - &buckets.meta.keys, - &mut buckets.stats, - &delta.meta.keys, - &delta.stats, - ); + buckets.merge(&delta); } self.fsm.has_ready |= self.fsm.peer.post_apply( self.ctx, @@ -5946,9 +5941,7 @@ where }; meta.keys.insert(0, region.get_start_key().to_vec()); meta.keys.push(region.get_end_key().to_vec()); - - let stats = new_bucket_stats(&meta); - region_buckets = BucketStat::new(Arc::new(meta), stats); + region_buckets = BucketStat::from_meta(Arc::new(meta)); } let buckets_count = region_buckets.meta.keys.len() - 1; @@ -6341,6 +6334,7 @@ where "err" => ?e, ); } + // todo: it will delete in next pr. region_buckets.stats = new_bucket_stats(®ion_buckets.meta); self.register_report_region_buckets_tick(); diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index f43e1ec33d5..74fa4d046f1 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -32,7 +32,7 @@ use kvproto::{ replication_modepb::{RegionReplicationStatus, StoreDrAutoSyncStatus}, }; use ordered_float::OrderedFloat; -use pd_client::{merge_bucket_stats, metrics::*, BucketStat, Error, PdClient, RegionStat}; +use pd_client::{metrics::*, BucketStat, Error, PdClient, RegionStat}; use prometheus::local::LocalHistogram; use raft::eraftpb::ConfChangeType; use resource_metering::{Collector, CollectorGuard, CollectorRegHandle, RawRecords}; @@ -287,17 +287,9 @@ impl ReportBucket { self.last_report_ts = report_ts; match self.last_report_stat.replace(self.current_stat.clone()) { Some(last) => { - let mut delta = BucketStat::new( - self.current_stat.meta.clone(), - pd_client::new_bucket_stats(&self.current_stat.meta), - ); + let mut delta = BucketStat::from_meta(self.current_stat.meta.clone()); // Buckets may be changed, recalculate last stats according to current meta. - merge_bucket_stats( - &delta.meta.keys, - &mut delta.stats, - &last.meta.keys, - &last.stats, - ); + delta.merge(&last); for i in 0..delta.meta.keys.len() - 1 { delta.stats.write_bytes[i] = self.current_stat.stats.write_bytes[i] - delta.stats.write_bytes[i]; @@ -1891,13 +1883,7 @@ where if current.meta < buckets.meta { mem::swap(current, &mut buckets); } - - merge_bucket_stats( - ¤t.meta.keys, - &mut current.stats, - &buckets.meta.keys, - &buckets.stats, - ); + current.merge(&buckets); }) .or_insert_with(|| ReportBucket::new(buckets)); } diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 7e00daa2764..6d556d1c283 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -13,7 +13,7 @@ use kvproto::{ metapb::{self, Peer}, pdpb::QueryKind, }; -use pd_client::{merge_bucket_stats, new_bucket_stats, BucketMeta, BucketStat}; +use pd_client::{BucketMeta, BucketStat}; use rand::Rng; use resource_metering::RawRecords; use tikv_util::{ @@ -451,30 +451,22 @@ impl ReadStats { region_info.flow.add(write); region_info.flow.add(data); if let Some(buckets) = buckets { - let bucket_stat = self.region_buckets.entry(region_id).or_insert_with(|| { - let stats = new_bucket_stats(buckets); - BucketStat::new(buckets.clone(), stats) - }); - if bucket_stat.meta < *buckets { - let stats = new_bucket_stats(buckets); - let mut new = BucketStat::new(buckets.clone(), stats); - merge_bucket_stats( - &new.meta.keys, - &mut new.stats, - &bucket_stat.meta.keys, - &bucket_stat.stats, - ); - *bucket_stat = new; - } + let bucket_stat = self + .region_buckets + .entry(region_id) + .and_modify(|current| { + if current.meta < *buckets { + let mut new = BucketStat::from_meta(buckets.clone()); + std::mem::swap(current, &mut new); + current.merge(&new); + } + }) + .or_insert_with(|| BucketStat::from_meta(buckets.clone())); let mut delta = metapb::BucketStats::default(); delta.set_read_bytes(vec![(write.read_bytes + data.read_bytes) as u64]); delta.set_read_keys(vec![(write.read_keys + data.read_keys) as u64]); - let start = start.unwrap_or_default(); - let end = end.unwrap_or_default(); - merge_bucket_stats( - &bucket_stat.meta.keys, - &mut bucket_stat.stats, - &[start, end], + bucket_stat.add_flows( + &[start.unwrap_or_default(), end.unwrap_or_default()], &delta, ); } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index a76692c4a67..d3bbce685c0 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1946,13 +1946,7 @@ impl PdClient for TestPdClient { if current.meta < buckets.meta { std::mem::swap(current, &mut buckets); } - - pd_client::merge_bucket_stats( - ¤t.meta.keys, - &mut current.stats, - &buckets.meta.keys, - &buckets.stats, - ); + current.merge(&buckets); }) .or_insert(buckets); ready(Ok(())).boxed() From 9a91e60b0677def44f35f5cc2b9ad8f5a4df8a0a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 21 Feb 2023 20:53:05 +0800 Subject: [PATCH 0540/1149] Raftstore-v2: update peer state after persisting snapshot (#14248) ref tikv/tikv#12842 Update peer state after persisting snapshot Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/operation/ready/snapshot.rs | 4 ++++ tests/integrations/raftstore/test_conf_change.rs | 1 + 2 files changed, 5 insertions(+) diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 87a1496be15..29d94c955af 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -259,6 +259,10 @@ impl Peer { meta.region_read_progress .insert(region_id, self.read_progress().clone()); } + + let region_state = self.raft_group().store().region_state().clone(); + self.storage_mut().set_region_state(region_state); + if let Some(tablet) = self.set_tablet(tablet) { self.record_tombstone_tablet(ctx, tablet, snapshot_index); } diff --git a/tests/integrations/raftstore/test_conf_change.rs b/tests/integrations/raftstore/test_conf_change.rs index 500a27ae266..79b3488d868 100644 --- a/tests/integrations/raftstore/test_conf_change.rs +++ b/tests/integrations/raftstore/test_conf_change.rs @@ -732,6 +732,7 @@ fn test_node_learner_conf_change() { } #[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_learner_with_slow_snapshot() { let mut cluster = new_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); From 061d874c297307a0c6184d827577c58378ae4b72 Mon Sep 17 00:00:00 2001 From: zyguan Date: Tue, 21 Feb 2023 21:07:05 +0800 Subject: [PATCH 0541/1149] read_pool: avoid tail latency of spawning (#14207) ref tikv/tikv#14118, close tikv/tikv#14188 Each multilevel/priority pool maintains an internal map for tracking the elapsed time of running tasks. Previously we try to cleanup the map every 10s on spawning new tasks, which leads to the tail latency issue described in #14118. This PR tries to resolve the issue by spawning a background task for cleaning up the map. Signed-off-by: zyguan Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/server/src/server.rs | 2 + components/server/src/server2.rs | 2 + components/tikv_util/src/yatp_pool/mod.rs | 221 ++++++++++++++++++++-- src/read_pool.rs | 13 +- 5 files changed, 224 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d87014110fd..1fa0937ce40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7491,7 +7491,7 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#bcf431a2619c06ab7fa0c72073a0c775646c484f" +source = "git+https://github.com/tikv/yatp.git?branch=master#7ed25299d60a5338bea4ac0ed7470887ab74a010" dependencies = [ "crossbeam-deque", "crossbeam-skiplist", diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 99d56ac10cd..9576cb91423 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -128,6 +128,7 @@ use tikv_util::{ thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, + yatp_pool::CleanupMethod, Either, }; use tokio::runtime::Builder; @@ -768,6 +769,7 @@ where pd_sender.clone(), engines.engine.clone(), resource_ctl, + CleanupMethod::Remote(self.background_worker.remote()), )) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 2a67318439b..9a2a1a1e8e0 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -108,6 +108,7 @@ use tikv_util::{ thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, + yatp_pool::CleanupMethod, Either, }; use tokio::runtime::Builder; @@ -664,6 +665,7 @@ where pd_sender.clone(), engines.engine.clone(), resource_ctl, + CleanupMethod::Remote(self.background_worker.remote()), )) } else { None diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 305d2162482..05c245bd5a3 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -7,10 +7,11 @@ use std::sync::Arc; use fail::fail_point; pub use future_pool::{Full, FuturePool}; +use futures::{compat::Stream01CompatExt, StreamExt}; use prometheus::{local::LocalHistogram, Histogram}; use yatp::{ - pool::{CloneRunnerBuilder, Local, Runner}, - queue::{multilevel, priority, QueueType, TaskCell as _}, + pool::{CloneRunnerBuilder, Local, Remote, Runner}, + queue::{multilevel, priority, Extras, QueueType, TaskCell as _}, task::future::{Runner as FutureRunner, TaskCell}, ThreadPool, }; @@ -18,8 +19,77 @@ use yatp::{ use crate::{ thread_group::GroupProperties, time::{Duration, Instant}, + timer::GLOBAL_TIMER_HANDLE, }; +const DEFAULT_CLEANUP_INTERVAL: Duration = if cfg!(test) { + Duration::from_millis(100) +} else { + Duration::from_secs(10) +}; + +fn background_cleanup_task(cleanup: F) -> TaskCell +where + F: Fn() -> Option + Send + 'static, +{ + let mut interval = GLOBAL_TIMER_HANDLE + .interval( + std::time::Instant::now() + DEFAULT_CLEANUP_INTERVAL, + DEFAULT_CLEANUP_INTERVAL, + ) + .compat(); + TaskCell::new( + async move { + while let Some(Ok(_)) = interval.next().await { + cleanup(); + } + }, + Extras::multilevel_default(), + ) +} + +/// CleanupMethod describes how a pool cleanup its internal task-elapsed map. A +/// task-elapsed map is used for tracking how long each task has been running, +/// so that the pool can adjust the level of a task according to its running +/// time. To prevent a task-elapsed map from growing too large, the following +/// strategies are provided for cleaning up it periodically. +pub enum CleanupMethod { + /// Cleanup in place on spawning. + InPlace, + /// Cleanup in this pool (the one to be built) locally. + Local, + /// Cleanup in the given remote pool. + Remote(Remote), +} + +impl CleanupMethod { + /// Returns the perferred cleanup interval used for creating a queue + /// builder. + fn preferred_interval(&self) -> Option { + match self { + Self::InPlace => Some(DEFAULT_CLEANUP_INTERVAL), + _ => None, + } + } + + /// Tries to create a task from the cleanup function and spawn it if + /// possible, returns Some(task) if there is a task shall be spawned but + /// hasn't been spawned (that is, need to be spawned locally later). + fn try_spawn(&self, cleanup: F) -> Option + where + F: Fn() -> Option + Send + 'static, + { + match self { + Self::InPlace => None, + Self::Local => Some(background_cleanup_task(cleanup)), + Self::Remote(remote) => { + remote.spawn(background_cleanup_task(cleanup)); + None + } + } + } +} + pub(crate) const TICK_INTERVAL: Duration = Duration::from_secs(1); fn tick_interval() -> Duration { @@ -180,6 +250,10 @@ pub struct YatpPoolBuilder { max_thread_count: usize, stack_size: usize, max_tasks: usize, + cleanup_method: CleanupMethod, + + #[cfg(test)] + background_cleanup_hook: Option>, } impl YatpPoolBuilder { @@ -195,6 +269,10 @@ impl YatpPoolBuilder { max_thread_count: 1, stack_size: 0, max_tasks: std::usize::MAX, + cleanup_method: CleanupMethod::InPlace, + + #[cfg(test)] + background_cleanup_hook: None, } } @@ -233,6 +311,11 @@ impl YatpPoolBuilder { self } + pub fn cleanup_method(mut self, method: CleanupMethod) -> Self { + self.cleanup_method = method; + self + } + pub fn before_stop(mut self, f: F) -> Self where F: Fn() + Send + Sync + 'static, @@ -295,13 +378,21 @@ impl YatpPoolBuilder { .name_prefix .clone() .unwrap_or_else(|| "yatp_pool".to_string()); + let multilevel_builder = multilevel::Builder::new( + multilevel::Config::default() + .name(Some(name)) + .cleanup_interval(self.cleanup_method.preferred_interval()), + ); + let pending_task = self.try_spawn_cleanup(multilevel_builder.cleanup_fn()); let (builder, read_pool_runner) = self.create_builder(); - let multilevel_builder = - multilevel::Builder::new(multilevel::Config::default().name(Some(name))); let runner_builder = multilevel_builder.runner_builder(CloneRunnerBuilder(read_pool_runner)); - builder - .build_with_queue_and_runner(QueueType::Multilevel(multilevel_builder), runner_builder) + let pool = builder + .build_with_queue_and_runner(QueueType::Multilevel(multilevel_builder), runner_builder); + if let Some(task) = pending_task { + pool.spawn(task); + } + pool } pub fn build_priority_pool( @@ -312,13 +403,54 @@ impl YatpPoolBuilder { .name_prefix .clone() .unwrap_or_else(|| "yatp_pool".to_string()); - let (builder, read_pool_runner) = self.create_builder(); let priority_builder = priority::Builder::new( - priority::Config::default().name(Some(name)), + priority::Config::default() + .name(Some(name)) + .cleanup_interval(self.cleanup_method.preferred_interval()), priority_provider, ); + let pending_task = self.try_spawn_cleanup(priority_builder.cleanup_fn()); + let (builder, read_pool_runner) = self.create_builder(); let runner_builder = priority_builder.runner_builder(CloneRunnerBuilder(read_pool_runner)); - builder.build_with_queue_and_runner(QueueType::Priority(priority_builder), runner_builder) + let pool = builder + .build_with_queue_and_runner(QueueType::Priority(priority_builder), runner_builder); + if let Some(task) = pending_task { + pool.spawn(task); + } + pool + } + + #[cfg(test)] + fn background_cleanup_hook(mut self, f: F) -> Self + where + F: Fn() + Send + Sync + 'static, + { + self.background_cleanup_hook = Some(Arc::new(f)); + self + } + + #[cfg(test)] + fn try_spawn_cleanup(&self, cleanup: F) -> Option + where + F: Fn() -> Option + Send + 'static, + { + if let Some(hook) = &self.background_cleanup_hook { + let on_cleanup = hook.clone(); + self.cleanup_method.try_spawn(move || { + on_cleanup(); + cleanup() + }) + } else { + self.cleanup_method.try_spawn(cleanup) + } + } + + #[cfg(not(test))] + fn try_spawn_cleanup(&self, cleanup: F) -> Option + where + F: Fn() -> Option + Send + 'static, + { + self.cleanup_method.try_spawn(cleanup) } fn create_builder(mut self) -> (yatp::Builder, YatpPoolRunner) { @@ -349,12 +481,15 @@ impl YatpPoolBuilder { #[cfg(test)] mod tests { - use std::sync::mpsc; + use std::{ + sync::{atomic, mpsc}, + thread, + }; use futures::compat::Future01CompatExt; use super::*; - use crate::timer::GLOBAL_TIMER_HANDLE; + use crate::{timer::GLOBAL_TIMER_HANDLE, worker}; #[test] fn test_record_schedule_wait_duration() { @@ -382,4 +517,68 @@ mod tests { let histogram = metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); } + + #[test] + fn test_cleanup_in_place_by_default() { + let name = "test_cleanup_default"; + let count = Arc::new(atomic::AtomicU32::new(0)); + let n = count.clone(); + let pool = YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix(name) + .background_cleanup_hook(move || { + n.fetch_add(1, atomic::Ordering::SeqCst); + }) + .build_multi_level_pool(); + + thread::sleep(3 * DEFAULT_CLEANUP_INTERVAL); + drop(pool); + assert_eq!(0, count.load(atomic::Ordering::SeqCst)); + } + + #[test] + fn test_cleanup_in_local_pool() { + let name = "test_cleanup_local"; + let count = Arc::new(atomic::AtomicU32::new(0)); + let n = count.clone(); + let pool = YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix(name) + .cleanup_method(CleanupMethod::Local) + .background_cleanup_hook(move || { + n.fetch_add(1, atomic::Ordering::SeqCst); + let t = thread::current(); + assert!(t.name().unwrap().starts_with(name)); + }) + .build_multi_level_pool(); + + thread::sleep(3 * DEFAULT_CLEANUP_INTERVAL + DEFAULT_CLEANUP_INTERVAL / 2); + drop(pool); + thread::sleep(2 * DEFAULT_CLEANUP_INTERVAL); + assert!(3 == count.load(atomic::Ordering::SeqCst)); + } + + #[test] + fn test_cleanup_in_remote_pool() { + let name = "test_cleanup_remote"; + let bg_name = "test_background"; + let bg_pool = worker::Builder::new(bg_name).create(); + let count = Arc::new(atomic::AtomicU32::new(0)); + let n = count.clone(); + let pool = YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix(name) + .cleanup_method(CleanupMethod::Remote(bg_pool.remote())) + .background_cleanup_hook(move || { + n.fetch_add(1, atomic::Ordering::SeqCst); + let t = thread::current(); + assert!(t.name().unwrap().starts_with(bg_name)); + }) + .build_multi_level_pool(); + + thread::sleep(3 * DEFAULT_CLEANUP_INTERVAL + DEFAULT_CLEANUP_INTERVAL / 2); + drop(pool); + thread::sleep(2 * DEFAULT_CLEANUP_INTERVAL); + assert!(5 == count.load(atomic::Ordering::SeqCst)); + drop(bg_pool); + thread::sleep(2 * DEFAULT_CLEANUP_INTERVAL); + assert!(5 == count.load(atomic::Ordering::SeqCst)); + } } diff --git a/src/read_pool.rs b/src/read_pool.rs index 1488ffada15..4852caa181b 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -22,7 +22,7 @@ use tikv_util::{ sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, - yatp_pool::{self, FuturePool, PoolTicker, YatpPoolBuilder}, + yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; use tracker::TrackedFuture; use yatp::{ @@ -418,11 +418,13 @@ pub fn build_yatp_read_pool( reporter: R, engine: E, resource_ctl: Option>, + cleanup_method: CleanupMethod, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); let raftkv = Arc::new(Mutex::new(engine)); let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) + .cleanup_method(cleanup_method) .stack_size(config.stack_size.0 as usize) .thread_count( config.min_thread_count, @@ -765,7 +767,8 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool(&config, DummyReporter, engine, None); + let pool = + build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -806,7 +809,8 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool(&config, DummyReporter, engine, None); + let pool = + build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -855,7 +859,8 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool(&config, DummyReporter, engine, None); + let pool = + build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); From e247c7686dce08f7243d7ce286764250723d4a76 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 22 Feb 2023 14:25:06 +0800 Subject: [PATCH 0542/1149] integration test v2: strip off the data prefix when getting the region id when necessary (#14235) ref tikv/tikv#12842 strip off the data prefix when getting the region id when necessary Signed-off-by: SpadeA-Tang --- components/test_raftstore-v2/src/cluster.rs | 15 +++++- components/test_raftstore-v2/src/node.rs | 10 ++++ components/test_raftstore-v2/src/server.rs | 8 ++++ components/test_raftstore/src/cluster.rs | 4 ++ tests/integrations/raftstore/test_snap.rs | 46 +++++++++++++++++-- .../raftstore/test_split_region.rs | 24 ++++------ .../raftstore/test_transfer_leader.rs | 1 + 7 files changed, 88 insertions(+), 20 deletions(-) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index c935040055f..b9d057d33c5 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -18,7 +18,7 @@ use engine_traits::{ }; use file_system::IoRateLimiter; use futures::{compat::Future01CompatExt, executor::block_on, select, FutureExt}; -use keys::data_key; +use keys::{data_key, validate_data_key, DATA_PREFIX_KEY}; use kvproto::{ errorpb::Error as PbError, kvrpcpb::ApiVersion, @@ -81,8 +81,13 @@ pub trait Simulator { fn stop_node(&mut self, node_id: u64); fn get_node_ids(&self) -> HashSet; + fn add_send_filter(&mut self, node_id: u64, filter: Box); fn clear_send_filters(&mut self, node_id: u64); + + fn add_recv_filter(&mut self, node_id: u64, filter: Box); + fn clear_recv_filters(&mut self, node_id: u64); + fn get_router(&self, node_id: u64) -> Option>; fn get_snap_dir(&self, node_id: u64) -> String; @@ -1102,6 +1107,10 @@ impl Cluster { self.sim.wl().add_send_filter(node_id, filter); } + pub fn add_recv_filter_on_node(&mut self, node_id: u64, filter: Box) { + self.sim.wl().add_recv_filter(node_id, filter); + } + pub fn add_send_filter(&self, factory: F) { let mut sim = self.sim.wl(); for node_id in sim.get_node_ids() { @@ -1392,7 +1401,9 @@ impl WrapFactory { } } - fn region_id_of_key(&self, key: &[u8]) -> u64 { + fn region_id_of_key(&self, mut key: &[u8]) -> u64 { + assert!(validate_data_key(key)); + key = &key[DATA_PREFIX_KEY.len()..]; self.pd_client.get_region(key).unwrap().get_id() } diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index f8c8d84bc9b..f6211c09748 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -404,6 +404,16 @@ impl Simulator for NodeCluster { .unwrap() .to_owned() } + + fn add_recv_filter(&mut self, node_id: u64, filter: Box) { + let mut trans = self.trans.core.lock().unwrap(); + trans.routers.get_mut(&node_id).unwrap().add_filter(filter); + } + + fn clear_recv_filters(&mut self, node_id: u64) { + let mut trans = self.trans.core.lock().unwrap(); + trans.routers.get_mut(&node_id).unwrap().clear_filters(); + } } pub fn new_node_cluster(id: u64, count: usize) -> Cluster { diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 64e05d6b766..8804f0c0f8c 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -525,6 +525,14 @@ impl Simulator for ServerCluster { .clear_filters(); } + fn add_recv_filter(&mut self, _node_id: u64, _filter: Box) { + unimplemented!() + } + + fn clear_recv_filters(&mut self, _node_id: u64) { + unimplemented!() + } + fn run_node( &mut self, node_id: u64, diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index d5842bf6659..d4668fe4928 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1339,6 +1339,10 @@ impl Cluster { self.sim.wl().add_send_filter(node_id, filter); } + pub fn add_recv_filter_on_node(&mut self, node_id: u64, filter: Box) { + self.sim.wl().add_recv_filter(node_id, filter); + } + pub fn add_send_filter(&self, factory: F) { let mut sim = self.sim.wl(); for node_id in sim.get_node_ids() { diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index ddc4bb50406..e8a0730488a 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -257,10 +257,50 @@ fn test_concurrent_snap(cluster: &mut Cluster) { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_concurrent_snap() { - let mut cluster = new_node_cluster(0, 3); - test_concurrent_snap(&mut cluster); + let mut cluster = new_cluster(0, 3); + // Test that the handling of snapshot is correct when there are multiple + // snapshots which have overlapped region ranges arrive at the same + // raftstore. + cluster.cfg.rocksdb.titan.enabled = true; + // Disable raft log gc in this test case. + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer count check. + pd_client.disable_default_operator(); + + let r1 = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + pd_client.must_add_peer(r1, new_peer(2, 2)); + // Force peer 2 to be followers all the way. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(r1, 2) + .msg_type(MessageType::MsgRequestVote) + .direction(Direction::Send), + )); + cluster.must_transfer_leader(r1, new_peer(1, 1)); + cluster.must_put(b"k3", b"v3"); + // Pile up snapshots of overlapped region ranges and deliver them all at once. + let (tx, rx) = mpsc::channel(); + cluster.add_recv_filter_on_node(3, Box::new(CollectSnapshotFilter::new(tx))); + pd_client.must_add_peer(r1, new_peer(3, 3)); + let region = cluster.get_region(b"k1"); + // Ensure the snapshot of range ("", "") is sent and piled in filter. + if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { + panic!("the snapshot is not sent before split, e: {:?}", e); + } + // Split the region range and then there should be another snapshot for the + // split ranges. + cluster.must_split(®ion, b"k2"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Ensure the regions work after split. + cluster.must_put(b"k11", b"v11"); + must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); + cluster.must_put(b"k4", b"v4"); + must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } #[test] diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 20a7c3f503a..963424d8986 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -414,7 +414,12 @@ fn test_node_split_overlap_snapshot() { must_get_equal(&engine3, b"k3", b"v3"); } -fn test_apply_new_version_snapshot(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_apply_new_version_snapshot() { + let mut cluster = new_cluster(0, 3); // truncate the log quickly so that we can force sending snapshot. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(5); @@ -467,21 +472,10 @@ fn test_apply_new_version_snapshot(cluster: &mut Cluster) { must_get_equal(&engine3, b"k2", b"v2"); } -#[test] -fn test_node_apply_new_version_snapshot() { - let mut cluster = new_node_cluster(0, 3); - test_apply_new_version_snapshot(&mut cluster); -} - -#[test] -fn test_server_apply_new_version_snapshot() { - let mut cluster = new_server_cluster(0, 3); - test_apply_new_version_snapshot(&mut cluster); -} - -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_server_split_with_stale_peer() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // disable raft log gc. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index b97191d1a13..6ed9b3c487b 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -176,6 +176,7 @@ fn test_server_pd_transfer_leader_multi_target() { } #[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_server_transfer_leader_during_snapshot() { let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); From 13eb4f606bfe93ac1a33709729a11913689633a4 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 22 Feb 2023 15:53:06 +0800 Subject: [PATCH 0543/1149] raftstore-v2: prepare merge (#14226) ref tikv/tikv#12842, ref tikv/tikv#13818 Implement prepare merge for raftstore-v2 Signed-off-by: tabokie Signed-off-by: Xinye Tao --- components/raftstore-v2/src/lib.rs | 1 + .../operation/command/admin/compact_log.rs | 23 +- .../operation/command/admin/conf_change.rs | 2 +- .../src/operation/command/admin/merge/mod.rs | 112 ++++ .../operation/command/admin/merge/prepare.rs | 507 ++++++++++++++++++ .../src/operation/command/admin/mod.rs | 18 +- .../src/operation/command/admin/split.rs | 10 +- .../src/operation/command/control.rs | 35 +- .../raftstore-v2/src/operation/command/mod.rs | 26 +- .../src/operation/command/write/mod.rs | 13 +- components/raftstore-v2/src/operation/mod.rs | 6 +- .../raftstore-v2/src/operation/query/mod.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 8 + .../raftstore-v2/src/operation/txn_ext.rs | 1 - components/raftstore-v2/src/raft/apply.rs | 10 + components/raftstore-v2/src/raft/peer.rs | 53 +- components/raftstore-v2/src/router/imp.rs | 22 +- components/raftstore-v2/src/worker/pd/mod.rs | 8 + .../raftstore-v2/src/worker/pd/region.rs | 8 +- components/raftstore/src/store/fsm/peer.rs | 2 +- 20 files changed, 799 insertions(+), 68 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/merge/mod.rs create mode 100644 components/raftstore-v2/src/operation/command/admin/merge/prepare.rs diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 8af6b57e9bc..bbb73676ffb 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -26,6 +26,7 @@ #![feature(div_duration)] #![feature(box_into_inner)] #![feature(assert_matches)] +#![feature(option_get_or_insert_default)] mod batch; mod bootstrap; diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 8e83387012e..af61434041a 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -351,7 +351,7 @@ impl Peer { pub fn on_apply_res_compact_log( &mut self, store_ctx: &mut StoreContext, - res: CompactLogResult, + mut res: CompactLogResult, ) { let first_index = self.entry_storage().first_index(); if res.compact_index <= first_index { @@ -363,7 +363,17 @@ impl Peer { ); return; } - // TODO: check is_merging + if let Some(i) = self.merge_context().and_then(|c| c.max_compact_log_index()) + && res.compact_index > i + { + info!( + self.logger, + "in merging mode, adjust compact index"; + "old_index" => res.compact_index, + "new_index" => i, + ); + res.compact_index = i; + } // TODO: check entry_cache_warmup_state self.entry_storage_mut() .compact_entry_cache(res.compact_index); @@ -388,10 +398,10 @@ impl Peer { // All logs < perssited_apply will be deleted, so should check with +1. if old_truncated + 1 < self.storage().apply_trace().persisted_apply_index() - && let Some(index) = self.compact_log_index() { + && let Some(index) = self.compact_log_index() + { // Raft Engine doesn't care about first index. - if let Err(e) = - store_ctx + if let Err(e) = store_ctx .engine .gc(self.region_id(), 0, index, self.state_changes_mut()) { @@ -432,7 +442,8 @@ impl Peer { // If it's snapshot, logs are gc already. if !task.has_snapshot && old_persisted < self.entry_storage().truncated_index() + 1 - && let Some(index) = self.compact_log_index() { + && let Some(index) = self.compact_log_index() + { let batch = task.extra_write.ensure_v2(|| self.entry_storage().raft_engine().log_batch(0)); // Raft Engine doesn't care about first index. if let Err(e) = diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 1b8d29a7a54..7bc20068736 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -255,7 +255,7 @@ impl Apply { cc: ConfChangeV2, legacy: bool, ) -> Result<(AdminResponse, AdminCmdResult)> { - let region = self.region_state().get_region(); + let region = self.region(); let change_kind = ConfChangeKind::confchange_kind(changes.len()); info!(self.logger, "exec ConfChangeV2"; "kind" => ?change_kind, "legacy" => legacy, "epoch" => ?region.get_region_epoch(), "index" => index); let mut new_region = region.clone(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs new file mode 100644 index 00000000000..a3895a1b435 --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs @@ -0,0 +1,112 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +pub mod prepare; + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::{ + raft_cmdpb::RaftCmdRequest, + raft_serverpb::{PeerState, RegionLocalState}, +}; +use prepare::PrepareStatus; +use raft::{ProgressState, INVALID_INDEX}; +use raftstore::Result; +use slog::{info, warn, Logger}; +use tikv_util::box_err; + +use crate::raft::Peer; + +#[derive(Default)] +pub struct MergeContext { + prepare_status: Option, +} + +impl MergeContext { + #[inline] + pub fn from_region_state(logger: &Logger, state: &RegionLocalState) -> Option { + if state.get_state() == PeerState::Merging { + info!(logger, "region is merging"; "region_state" => ?state); + let mut ctx = Self::default(); + ctx.prepare_status = Some(PrepareStatus::Applied(state.get_merge_state().clone())); + Some(ctx) + } else { + None + } + } + + #[inline] + pub fn maybe_take_pending_prepare(&mut self, applied: u64) -> Option { + if let Some(PrepareStatus::WaitForFence { + fence, + req, + .. + }) = self.prepare_status.as_mut() + && applied >= *fence + { + // The status will be updated during processing the proposal. + return req.take(); + } + None + } + + #[inline] + pub fn max_compact_log_index(&self) -> Option { + if let Some(PrepareStatus::WaitForFence { ctx, .. }) = self.prepare_status.as_ref() { + Some(ctx.min_matched) + } else { + None + } + } +} + +impl Peer { + #[inline] + pub fn update_merge_progress_on_became_follower(&mut self) { + if let Some(ctx) = self.merge_context() + && matches!(ctx.prepare_status, Some(PrepareStatus::WaitForFence { .. })) + { + self.take_merge_context(); + self.proposal_control_mut().set_pending_prepare_merge(false); + } + } + + /// Returns (minimal matched, minimal committed) + pub fn calculate_min_progress(&self) -> Result<(u64, u64)> { + let (mut min_m, mut min_c) = (None, None); + if let Some(progress) = self.raft_group().status().progress { + for (id, pr) in progress.iter() { + // Reject merge if there is any pending request snapshot, + // because a target region may merge a source region which is in + // an invalid state. + if pr.state == ProgressState::Snapshot + || pr.pending_request_snapshot != INVALID_INDEX + { + return Err(box_err!( + "there is a pending snapshot peer {} [{:?}], skip merge", + id, + pr + )); + } + if min_m.unwrap_or(u64::MAX) > pr.matched { + min_m = Some(pr.matched); + } + if min_c.unwrap_or(u64::MAX) > pr.committed_index { + min_c = Some(pr.committed_index); + } + } + } + let (mut min_m, min_c) = (min_m.unwrap_or(0), min_c.unwrap_or(0)); + if min_m < min_c { + warn!( + self.logger, + "min_matched < min_committed, raft progress is inaccurate"; + "min_matched" => min_m, + "min_committed" => min_c, + ); + // Reset `min_matched` to `min_committed`, since the raft log at `min_committed` + // is known to be committed in all peers, all of the peers should also have + // replicated it + min_m = min_c; + } + Ok((min_m, min_c)) + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs new file mode 100644 index 00000000000..f9df2d9ea1a --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -0,0 +1,507 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! The handling of `PrepareMerge` command. +//! +//! ## Propose (`Peer::propose_prepare_merge`) +//! +//! Checks for these requirements: +//! +//! - Validate the request. (`Peer::validate_prepare_merge_command`) +//! - Log gap between source region leader and peers is not too large. This is +//! because these logs need to be embeded in the later `CommitMerge` command. +//! - Logs that aren't fully committed (to all peers) does not contains +//! `CompactLog` or certain admin commands. +//! +//! Then, transfer all in-memory pessimistic locks to the target region as a +//! Raft proposal. To guarantee the consistency of lock serialization, we might +//! need to wait for some in-flight logs to be applied. During the wait, all +//! incoming write proposals will be rejected. Read the comments of +//! `PrepareStatus::WaitForFence` for more details. +//! +//! ## Apply (`Apply::apply_prepare_merge`) +//! +//! Increase region epoch and write the merge state. +//! +//! ## On Apply Result (`Peer::on_apply_res_prepare_merge`) +//! +//! Start the tick (`Peer::on_check_merge`) to periodically check the +//! eligibility of merge. + +use std::mem; + +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, CF_LOCK}; +use kvproto::{ + raft_cmdpb::{ + AdminCmdType, AdminRequest, AdminResponse, CmdType, PrepareMergeRequest, PutRequest, + RaftCmdRequest, Request, + }, + raft_serverpb::{MergeState, PeerState, RegionLocalState}, +}; +use parking_lot::RwLockUpgradableReadGuard; +use protobuf::Message; +use raft::{eraftpb::EntryType, GetEntriesContext, NO_LIMIT}; +use raftstore::{ + coprocessor::RegionChangeReason, + store::{metrics::PEER_ADMIN_CMD_COUNTER, util, LocksStatus, ProposalContext, Transport}, + Error, Result, +}; +use slog::{debug, info}; +use tikv_util::{box_err, log::SlogFormat, store::region_on_same_stores}; + +use crate::{ + batch::StoreContext, + fsm::ApplyResReporter, + operation::AdminCmdResult, + raft::{Apply, Peer}, + router::CmdResChannel, +}; + +#[derive(Clone)] +pub struct PreProposeContext { + pub min_matched: u64, + lock_size_limit: usize, +} + +pub enum PrepareStatus { + /// When a fence is present, we (1) delay the PrepareMerge + /// command `cmd` until all writes before `idx` are applied (2) reject all + /// in-coming write proposals. + /// Before proposing `PrepareMerge`, we first serialize and propose the lock + /// table. Locks marked as deleted (but not removed yet) will be + /// serialized as normal locks. + /// Thanks to the fence, we can ensure at the time of lock transfer, locks + /// are either removed (when applying logs) or won't be removed before + /// merge (the proposals to remove them are rejected). + /// + /// The request can be `None` because we needs to take it out to redo the + /// propose. In the meantime the fence is needed to bypass the check. + WaitForFence { + fence: u64, + ctx: PreProposeContext, + req: Option, + }, + /// In this state, all write proposals except for `RollbackMerge` will be + /// rejected. + Applied(MergeState), +} + +#[derive(Debug)] +pub struct PrepareMergeResult { + region_state: RegionLocalState, + state: MergeState, +} + +impl Peer { + pub fn propose_prepare_merge( + &mut self, + store_ctx: &mut StoreContext, + mut req: RaftCmdRequest, + ) -> Result { + if self.storage().has_dirty_data() { + return Err(box_err!( + "{} source peer has dirty data, try again later", + SlogFormat(&self.logger) + )); + } + self.validate_prepare_merge_command( + store_ctx, + req.get_admin_request().get_prepare_merge(), + )?; + let pre_propose = if let Some(r) = self.already_checked_pessimistic_locks()? { + r + } else { + let r = self.check_logs_before_prepare_merge(store_ctx)?; + self.check_pessimistic_locks(r, &mut req)? + }; + req.mut_admin_request() + .mut_prepare_merge() + .set_min_index(pre_propose.min_matched + 1); + let r = self + .propose_locks_before_prepare_merge(store_ctx, pre_propose.lock_size_limit) + .and_then(|_| { + let mut proposal_ctx = ProposalContext::empty(); + proposal_ctx.insert(ProposalContext::PREPARE_MERGE); + let data = req.write_to_bytes().unwrap(); + self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + }); + if r.is_ok() { + self.proposal_control_mut().set_pending_prepare_merge(false); + } else { + // Match v1::post_propose_fail. + // If we just failed to propose PrepareMerge, the pessimistic locks status + // may become MergingRegion incorrectly. So, we have to revert it here. + // Note: The `is_merging` check from v1 is removed because proposed + // `PrepareMerge` rejects all writes (in `ProposalControl::check_conflict`). + assert!( + !self.proposal_control().is_merging(), + "{}", + SlogFormat(&self.logger) + ); + self.take_merge_context(); + self.proposal_control_mut().set_pending_prepare_merge(false); + let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); + if pessimistic_locks.status == LocksStatus::MergingRegion { + pessimistic_locks.status = LocksStatus::Normal; + } + } + r + } + + /// Match v1::check_merge_proposal. + /// - Target region epoch as requested is identical with the local version. + /// - Target region is a sibling to the source region. + /// - Peers of both source and target region are aligned, i.e. located on + /// the same set of stores. + fn validate_prepare_merge_command( + &mut self, + store_ctx: &mut StoreContext, + req: &PrepareMergeRequest, + ) -> Result<()> { + // Just for simplicity, do not start region merge while in joint state + if self.in_joint_state() { + return Err(box_err!( + "{} region in joint state, can not propose merge command, command: {:?}", + SlogFormat(&self.logger), + req + )); + } + let region = self.region(); + let target_region = req.get_target(); + { + let store_meta = store_ctx.store_meta.lock().unwrap(); + match store_meta.regions.get(&target_region.get_id()) { + Some((region, _)) if *region != *target_region => { + return Err(box_err!( + "target region not matched, skip proposing: {:?} != {:?}", + region, + target_region + )); + } + None => { + return Err(box_err!( + "target region {} doesn't exist.", + target_region.get_id() + )); + } + _ => {} + } + } + + if !util::is_sibling_regions(target_region, region) { + return Err(box_err!( + "{:?} and {:?} are not sibling, skip proposing.", + target_region, + region + )); + } + if !region_on_same_stores(target_region, region) { + return Err(box_err!( + "peers doesn't match {:?} != {:?}, reject merge", + region.get_peers(), + target_region.get_peers() + )); + } + Ok(()) + } + + // Match v1::pre_propose_prepare_merge. + fn check_logs_before_prepare_merge( + &mut self, + store_ctx: &mut StoreContext, + ) -> Result { + let last_index = self.raft_group().raft.raft_log.last_index(); + let (min_matched, min_committed) = self.calculate_min_progress()?; + if min_matched == 0 + || min_committed == 0 + || last_index - min_matched > store_ctx.cfg.merge_max_log_gap + || last_index - min_committed > store_ctx.cfg.merge_max_log_gap * 2 + || min_matched < self.last_sent_snapshot_index() + { + return Err(box_err!( + "log gap too large, skip merge: matched: {}, committed: {}, last index: {}", + min_matched, + min_committed, + last_index + )); + } + let mut entry_size = 0; + for entry in self.raft_group().raft.raft_log.entries( + min_committed + 1, + NO_LIMIT, + GetEntriesContext::empty(false), + )? { + // commit merge only contains entries start from min_matched + 1 + if entry.index > min_matched { + entry_size += entry.get_data().len(); + } + if entry.get_entry_type() == EntryType::EntryConfChange + || entry.get_entry_type() == EntryType::EntryConfChangeV2 + { + return Err(box_err!( + "{} log gap contains conf change, skip merging.", + "tag" + )); + } + if entry.get_data().is_empty() { + continue; + } + let cmd: RaftCmdRequest = + util::parse_data_at(entry.get_data(), entry.get_index(), "tag"); + if !cmd.has_admin_request() { + continue; + } + let cmd_type = cmd.get_admin_request().get_cmd_type(); + match cmd_type { + AdminCmdType::TransferLeader + | AdminCmdType::ComputeHash + | AdminCmdType::VerifyHash + | AdminCmdType::InvalidAdmin => continue, + _ => {} + } + // Any command that can change epoch or log gap should be rejected. + return Err(box_err!( + "log gap contains admin request {:?}, skip merging.", + cmd_type + )); + } + let entry_size_limit = store_ctx.cfg.raft_entry_max_size.0 as usize * 9 / 10; + if entry_size > entry_size_limit { + return Err(box_err!( + "log gap size exceed entry size limit, skip merging." + )); + }; + Ok(PreProposeContext { + min_matched, + lock_size_limit: entry_size_limit - entry_size, + }) + } + + fn check_pessimistic_locks( + &mut self, + ctx: PreProposeContext, + req: &mut RaftCmdRequest, + ) -> Result { + let has_locks = { + let pessimistic_locks = self.txn_context().ext().pessimistic_locks.read(); + if pessimistic_locks.status != LocksStatus::Normal { + // If `status` is not `Normal`, it means the in-memory pessimistic locks are + // being transferred, probably triggered by transferring leader. In this case, + // we abort merging to simplify the situation. + return Err(box_err!( + "pessimistic locks status is {:?}, skip merging.", + pessimistic_locks.status + )); + } + !pessimistic_locks.is_empty() + }; + let last_index = self.raft_group().raft.raft_log.last_index(); + if has_locks && self.entry_storage().applied_index() < last_index { + self.merge_context_mut().prepare_status = Some(PrepareStatus::WaitForFence { + fence: last_index, + ctx, + req: Some(mem::take(req)), + }); + self.proposal_control_mut().set_pending_prepare_merge(true); + info!( + self.logger, + "start rejecting new proposals before prepare merge"; + "prepare_merge_fence" => last_index + ); + return Err(Error::PendingPrepareMerge); + } + Ok(ctx) + } + + fn already_checked_pessimistic_locks(&mut self) -> Result> { + let applied_index = self.entry_storage().applied_index(); + match self + .merge_context() + .as_ref() + .and_then(|c| c.prepare_status.as_ref()) + { + Some(PrepareStatus::WaitForFence { fence, ctx, .. }) => { + if applied_index < *fence { + info!( + self.logger, + "reject PrepareMerge because applied_index has not reached prepare_merge_fence"; + "applied_index" => applied_index, + "prepare_merge_fence" => fence, + ); + Err(Error::PendingPrepareMerge) + } else { + Ok(Some(ctx.clone())) + } + } + Some(PrepareStatus::Applied(state)) => Err(box_err!( + "another merge is in-progress, merge_state: {:?}.", + state + )), + None => Ok(None), + } + } + + /// Called after some new entries have been applied and the fence can + /// probably be lifted. + pub fn retry_pending_prepare_merge( + &mut self, + store_ctx: &mut StoreContext, + applied_index: u64, + ) { + if let Some(req) = self + .merge_context_mut() + .maybe_take_pending_prepare(applied_index) + { + let (ch, _) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); + } + } + + fn propose_locks_before_prepare_merge( + &mut self, + store_ctx: &mut StoreContext, + size_limit: usize, + ) -> Result<()> { + let pessimistic_locks = self.txn_context().ext().pessimistic_locks.upgradable_read(); + if pessimistic_locks.is_empty() { + let mut pessimistic_locks = RwLockUpgradableReadGuard::upgrade(pessimistic_locks); + pessimistic_locks.status = LocksStatus::MergingRegion; + return Ok(()); + } + + // The proposed pessimistic locks here will also be carried in CommitMerge. + // Check the size to avoid CommitMerge exceeding the size limit of a raft entry. + // This check is a inaccurate check. We will check the size again accurately + // later using the protobuf encoding. + if pessimistic_locks.memory_size > size_limit { + return Err(box_err!( + "pessimistic locks size {} exceed size limit {}, skip merging.", + pessimistic_locks.memory_size, + size_limit + )); + } + + let mut cmd = RaftCmdRequest::default(); + for (key, (lock, _deleted)) in &*pessimistic_locks { + let mut put = PutRequest::default(); + put.set_cf(CF_LOCK.to_string()); + put.set_key(key.as_encoded().to_owned()); + put.set_value(lock.to_lock().to_bytes()); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Put); + req.set_put(put); + cmd.mut_requests().push(req); + } + cmd.mut_header().set_region_id(self.region_id()); + cmd.mut_header() + .set_region_epoch(self.region().get_region_epoch().clone()); + cmd.mut_header().set_peer(self.peer().clone()); + let proposal_size = cmd.compute_size(); + if proposal_size as usize > size_limit { + return Err(box_err!( + "pessimistic locks size {} exceed size limit {}, skip merging.", + proposal_size, + size_limit + )); + } + + { + let mut pessimistic_locks = RwLockUpgradableReadGuard::upgrade(pessimistic_locks); + pessimistic_locks.status = LocksStatus::MergingRegion; + } + debug!( + self.logger, + "propose {} pessimistic locks before prepare merge", + cmd.get_requests().len(); + ); + self.propose(store_ctx, cmd.write_to_bytes().unwrap())?; + Ok(()) + } +} + +impl Apply { + // Match v1::exec_prepare_merge. + pub fn apply_prepare_merge( + &mut self, + req: &AdminRequest, + log_index: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + PEER_ADMIN_CMD_COUNTER.prepare_merge.all.inc(); + + let prepare_merge = req.get_prepare_merge(); + let index = prepare_merge.get_min_index(); + // Note: the check against first_index is removed in v2. + let mut region = self.region().clone(); + let region_version = region.get_region_epoch().get_version() + 1; + region.mut_region_epoch().set_version(region_version); + // In theory conf version should not be increased when executing prepare_merge. + // However, we don't want to do conf change after prepare_merge is committed. + // This can also be done by iterating all proposal to find if prepare_merge is + // proposed before proposing conf change, but it make things complicated. + // Another way is make conf change also check region version, but this is not + // backward compatible. + let conf_version = region.get_region_epoch().get_conf_ver() + 1; + region.mut_region_epoch().set_conf_ver(conf_version); + let mut merging_state = MergeState::default(); + merging_state.set_min_index(index); + merging_state.set_target(prepare_merge.get_target().to_owned()); + merging_state.set_commit(log_index); + + self.region_state_mut().set_region(region.clone()); + self.region_state_mut().set_state(PeerState::Merging); + assert!( + !self.region_state().has_merge_state(), + "{:?}", + self.region_state() + ); + self.region_state_mut() + .set_merge_state(merging_state.clone()); + + PEER_ADMIN_CMD_COUNTER.prepare_merge.success.inc(); + + Ok(( + AdminResponse::default(), + AdminCmdResult::PrepareMerge(PrepareMergeResult { + region_state: self.region_state().clone(), + state: merging_state, + }), + )) + } +} + +impl Peer { + // Match v1::on_ready_prepare_merge. + pub fn on_apply_res_prepare_merge( + &mut self, + store_ctx: &mut StoreContext, + res: PrepareMergeResult, + ) { + let region = res.region_state.get_region().clone(); + { + let mut meta = store_ctx.store_meta.lock().unwrap(); + meta.set_region(®ion, true, &self.logger); + let (reader, _) = meta.readers.get_mut(®ion.get_id()).unwrap(); + self.set_region( + &store_ctx.coprocessor_host, + reader, + region, + RegionChangeReason::PrepareMerge, + res.state.get_commit(), + ); + } + + self.storage_mut() + .set_region_state(res.region_state.clone()); + let region_id = self.region_id(); + self.state_changes_mut() + .put_region_state(region_id, res.state.get_commit(), &res.region_state) + .unwrap(); + self.set_has_extra_write(); + + self.proposal_control_mut() + .enter_prepare_merge(res.state.get_commit()); + self.merge_context_mut().prepare_status = Some(PrepareStatus::Applied(res.state)); + + // TODO: self. + // update_merge_progress_on_apply_res_prepare_merge(store_ctx); + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 0661d1c15dc..fe84413ff28 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -2,6 +2,7 @@ mod compact_log; mod conf_change; +mod merge; mod split; mod transfer_leader; @@ -10,8 +11,13 @@ use compact_log::CompactLogResult; use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; +use merge::prepare::PrepareMergeResult; +pub use merge::MergeContext; use protobuf::Message; -use raftstore::store::{cmd_resp, fsm::apply, msg::ErrorCallback}; +use raftstore::{ + store::{cmd_resp, fsm::apply, msg::ErrorCallback}, + Error, +}; use slog::info; use split::SplitResult; pub use split::{ @@ -32,6 +38,7 @@ pub enum AdminCmdResult { TransferLeader(u64), CompactLog(CompactLogResult), UpdateGcPeers(UpdateGcPeersResult), + PrepareMerge(PrepareMergeResult), } impl Peer { @@ -93,6 +100,14 @@ impl Peer { conflict.delay_channel(ch); return; } + if self.proposal_control().has_pending_prepare_merge() + && cmd_type != AdminCmdType::PrepareMerge + || self.proposal_control().is_merging() && cmd_type != AdminCmdType::RollbackMerge + { + let resp = cmd_resp::new_error(Error::ProposalInMergingMode(self.region_id())); + ch.report_error(resp); + return; + } // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); let res = if apply::is_conf_change_cmd(&req) { @@ -124,6 +139,7 @@ impl Peer { let data = req.write_to_bytes().unwrap(); self.propose(ctx, data) } + AdminCmdType::PrepareMerge => self.propose_prepare_merge(ctx, req), _ => unimplemented!(), } }; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index b4e2b4654e7..bbc6aac058e 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -187,7 +187,7 @@ pub fn temp_split_path(registry: &TabletRegistry, region_id: u64) -> Pat impl PeerFsmDelegate<'_, EK, ER, T> { pub fn on_split_region_check(&mut self) { if !self.fsm.peer_mut().on_split_region_check(self.store_ctx) { - self.schedule_tick(PeerTick::SplitRegionCheck) + self.schedule_tick(PeerTick::SplitRegionCheck); } } } @@ -382,16 +382,16 @@ impl Apply { ) -> Result<(AdminResponse, AdminCmdResult)> { PEER_ADMIN_CMD_COUNTER.batch_split.all.inc(); - let region = self.region_state().get_region(); + let region = self.region(); let region_id = region.get_id(); - validate_batch_split(req, self.region_state().get_region())?; + validate_batch_split(req, self.region())?; let mut boundaries: Vec<&[u8]> = Vec::default(); - boundaries.push(self.region_state().get_region().get_start_key()); + boundaries.push(self.region().get_start_key()); for req in req.get_splits().get_requests() { boundaries.push(req.get_split_key()); } - boundaries.push(self.region_state().get_region().get_end_key()); + boundaries.push(self.region().get_end_key()); info!( self.logger, diff --git a/components/raftstore-v2/src/operation/command/control.rs b/components/raftstore-v2/src/operation/command/control.rs index fd53090fd65..586d9f5c019 100644 --- a/components/raftstore-v2/src/operation/command/control.rs +++ b/components/raftstore-v2/src/operation/command/control.rs @@ -77,10 +77,12 @@ impl ProposedAdminCmd { /// Compared to `CmdEpochChecker`, `ProposalControl` also traces the whole /// lifetime of prepare merge. pub struct ProposalControl { + // Admin commands that are proposed but not applied. // Use `LinkedList` to reduce memory footprint. In most cases, the list // should be empty or 1 element. And access speed is not a concern. proposed_admin_cmd: LinkedList, - pending_merge_index: u64, + has_pending_prepare_merge: bool, + applied_prepare_merge_index: u64, term: u64, } @@ -88,7 +90,8 @@ impl ProposalControl { pub fn new(term: u64) -> ProposalControl { ProposalControl { proposed_admin_cmd: LinkedList::new(), - pending_merge_index: 0, + has_pending_prepare_merge: false, + applied_prepare_merge_index: 0, term, } } @@ -135,6 +138,7 @@ impl ProposalControl { self.proposed_admin_cmd.iter_mut().rev().find(|cmd| { (check_ver && cmd.epoch_state.change_ver) || (check_conf_ver && cmd.epoch_state.change_conf_ver) + || cmd.cmd_type == AdminCmdType::PrepareMerge }) } @@ -209,19 +213,34 @@ impl ProposalControl { } } + #[inline] + pub fn set_pending_prepare_merge(&mut self, v: bool) { + self.has_pending_prepare_merge = v; + } + + #[inline] + pub fn has_pending_prepare_merge(&self) -> bool { + self.has_pending_prepare_merge + } + #[inline] pub fn enter_prepare_merge(&mut self, prepare_merge_index: u64) { - self.pending_merge_index = prepare_merge_index; + self.applied_prepare_merge_index = prepare_merge_index; } #[inline] pub fn leave_prepare_merge(&mut self, prepare_merge_index: u64) { - if self.pending_merge_index != 0 { - assert_eq!(self.pending_merge_index, prepare_merge_index); - self.pending_merge_index = 0; + if self.applied_prepare_merge_index != 0 { + assert_eq!(self.applied_prepare_merge_index, prepare_merge_index); + self.applied_prepare_merge_index = 0; } } + #[inline] + pub fn has_applied_prepare_merge(&self) -> bool { + self.applied_prepare_merge_index != 0 + } + /// Check if there is an on-going split command on current term. /// /// The answer is reliable only when the peer is leader. @@ -242,8 +261,8 @@ impl ProposalControl { /// applied. #[inline] pub fn is_merging(&self) -> bool { - if self.proposed_admin_cmd.is_empty() { - return self.pending_merge_index != 0; + if self.applied_prepare_merge_index != 0 { + return true; } self.proposed_admin_cmd .iter() diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 9f24241b039..8bff64e66c9 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -35,7 +35,7 @@ use raftstore::{ local_metrics::RaftMetrics, metrics::{APPLY_TASK_WAIT_TIME_HISTOGRAM, APPLY_TIME_HISTOGRAM}, msg::ErrorCallback, - util, Config, WriteCallback, + util, Config, Transport, WriteCallback, }, Error, Result, }; @@ -59,8 +59,8 @@ mod control; mod write; pub use admin::{ - report_split_init_finish, temp_split_path, AdminCmdResult, CompactLogContext, RequestHalfSplit, - RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, + report_split_init_finish, temp_split_path, AdminCmdResult, CompactLogContext, MergeContext, + RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, }; pub use control::ProposalControl; pub use write::{ @@ -319,7 +319,11 @@ impl Peer { } } - pub fn on_apply_res(&mut self, ctx: &mut StoreContext, apply_res: ApplyRes) { + pub fn on_apply_res( + &mut self, + ctx: &mut StoreContext, + apply_res: ApplyRes, + ) { if !self.serving() || !apply_res.admin_result.is_empty() { // TODO: remove following log once stable. info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); @@ -346,6 +350,7 @@ impl Peer { AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(ctx, term), AdminCmdResult::CompactLog(res) => self.on_apply_res_compact_log(ctx, res), AdminCmdResult::UpdateGcPeers(state) => self.on_apply_res_update_gc_peers(state), + AdminCmdResult::PrepareMerge(res) => self.on_apply_res_prepare_merge(ctx, res), } } @@ -365,6 +370,9 @@ impl Peer { if !is_leader { entry_storage.compact_entry_cache(apply_res.applied_index + 1); } + if is_leader { + self.retry_pending_prepare_merge(ctx, apply_res.applied_index); + } self.on_data_modified(apply_res.modifications); self.handle_read_on_apply( ctx, @@ -482,7 +490,7 @@ impl Apply { .observe(duration_to_sec(ce.committed_time.saturating_elapsed())); for (e, ch) in ce.entry_and_proposals { if self.tombstone() { - apply::notify_req_region_removed(self.region_state().get_region().get_id(), ch); + apply::notify_req_region_removed(self.region_id(), ch); continue; } if !e.get_data().is_empty() { @@ -528,7 +536,7 @@ impl Apply { Ok(decoder) => { util::compare_region_epoch( decoder.header().get_region_epoch(), - self.region_state().get_region(), + self.region(), false, true, true, @@ -575,14 +583,14 @@ impl Apply { } }; - util::check_req_region_epoch(&req, self.region_state().get_region(), true)?; + util::check_req_region_epoch(&req, self.region(), true)?; if req.has_admin_request() { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { - AdminCmdType::CompactLog => self.apply_compact_log(admin_req, entry.index)?, + AdminCmdType::CompactLog => self.apply_compact_log(admin_req, log_index)?, AdminCmdType::Split => self.apply_split(admin_req, log_index)?, AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index)?, - AdminCmdType::PrepareMerge => unimplemented!(), + AdminCmdType::PrepareMerge => self.apply_prepare_merge(admin_req, log_index)?, AdminCmdType::CommitMerge => unimplemented!(), AdminCmdType::RollbackMerge => unimplemented!(), AdminCmdType::TransferLeader => { diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index e958a3ec08f..988b7cf4b2d 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -9,7 +9,7 @@ use raftstore::{ msg::ErrorCallback, util::{self, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER}, }, - Result, + Error, Result, }; use tikv_util::slog_panic; @@ -58,6 +58,13 @@ impl Peer { conflict.delay_channel(ch); return; } + if self.proposal_control().has_pending_prepare_merge() + || self.proposal_control().is_merging() + { + let resp = cmd_resp::new_error(Error::ProposalInMergingMode(self.region_id())); + ch.report_error(resp); + return; + } // ProposalControl is reliable only when applied to current term. let call_proposed_on_success = self.applied_to_current_term(); let mut encoder = SimpleWriteReqEncoder::new( @@ -132,7 +139,7 @@ impl Apply { if self.should_skip(off, index) { return Ok(()); } - util::check_key_in_region(key, self.region_state().get_region())?; + util::check_key_in_region(key, self.region())?; // Technically it's OK to remove prefix for raftstore v2. But rocksdb doesn't // support specifying infinite upper bound in various APIs. keys::data_key_with_buffer(key, &mut self.key_buffer); @@ -175,7 +182,7 @@ impl Apply { if self.should_skip(off, index) { return Ok(()); } - util::check_key_in_region(key, self.region_state().get_region())?; + util::check_key_in_region(key, self.region())?; keys::data_key_with_buffer(key, &mut self.key_buffer); self.ensure_write_buffer(); let res = if cf.is_empty() || cf == CF_DEFAULT { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index ee0680f7fbb..0ba7de2c3e5 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -9,9 +9,9 @@ mod ready; mod txn_ext; pub use command::{ - AdminCmdResult, ApplyFlowControl, CommittedEntries, CompactLogContext, ProposalControl, - RequestHalfSplit, RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, - SimpleWriteReqEncoder, SplitFlowControl, SPLIT_PREFIX, + AdminCmdResult, ApplyFlowControl, CommittedEntries, CompactLogContext, MergeContext, + ProposalControl, RequestHalfSplit, RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, + SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, SPLIT_PREFIX, }; pub use life::{DestroyProgress, GcPeerContext}; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 305cdb666cc..fc7cee35fa5 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -281,7 +281,7 @@ impl Peer { self.storage().apply_state().get_applied_index() >= read_index // If it is in pending merge state(i.e. applied PrepareMerge), the data may be stale. // TODO: Add a test to cover this case - && !self.has_pending_merge_state() + && self.proposal_control().has_applied_prepare_merge() } #[inline] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index ebff7ad44ce..bf7b8ec8858 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -340,6 +340,10 @@ impl Peer { let msg_type = msg.get_message().get_msg_type(); let to_peer_id = msg.get_to_peer().get_id(); let to_store_id = msg.get_to_peer().get_store_id(); + if msg_type == MessageType::MsgSnapshot { + let index = msg.get_message().get_snapshot().get_metadata().get_index(); + self.update_last_sent_snapshot_index(index); + } trace!( self.logger, @@ -775,6 +779,9 @@ impl Peer { // current term to apply the read. So broadcast eagerly to avoid unexpected // latency. self.raft_group_mut().skip_bcast_commit(false); + self.update_last_sent_snapshot_index( + self.raft_group().raft.raft_log.last_index(), + ); self.txn_context().on_became_leader( ctx, @@ -798,6 +805,7 @@ impl Peer { self.storage_mut().cancel_generating_snap(None); self.txn_context() .on_became_follower(self.term(), self.region()); + self.update_merge_progress_on_became_follower(); } _ => {} } diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 911c1eaab78..e30bc25eec4 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -88,7 +88,6 @@ impl TxnContext { &self.extra_op } - // TODO: find a better place to put all txn related stuff. fn require_updating_max_ts( &self, ctx: &StoreContext, diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 7a1a22a5a95..a7af3c470ae 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -162,6 +162,16 @@ impl Apply { &mut self.region_state } + #[inline] + pub fn region(&self) -> &metapb::Region { + self.region_state.get_region() + } + + #[inline] + pub fn region_id(&self) -> u64 { + self.region().get_id() + } + /// The tablet can't be public yet, otherwise content of latest tablet /// doesn't matches its epoch in both readers and peer fsm. #[inline] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 142b4e91943..bcf92471ebe 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -11,7 +11,8 @@ use engine_traits::{ CachedTablet, FlushState, KvEngine, RaftEngine, TabletContext, TabletRegistry, }; use kvproto::{ - metapb, pdpb, + metapb::{self, PeerRole}, + pdpb, raft_serverpb::{RaftMessage, RegionLocalState}, }; use pd_client::BucketStat; @@ -31,8 +32,8 @@ use super::storage::Storage; use crate::{ fsm::ApplyScheduler, operation::{ - AsyncWriter, CompactLogContext, DestroyProgress, GcPeerContext, ProposalControl, - SimpleWriteReqEncoder, SplitFlowControl, TxnContext, + AsyncWriter, CompactLogContext, DestroyProgress, GcPeerContext, MergeContext, + ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, TxnContext, }, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -58,6 +59,9 @@ pub struct Peer { /// For raft log compaction. compact_log_context: CompactLogContext, + merge_context: Option>, + last_sent_snapshot_index: u64, + /// Encoder for batching proposals and encoding them in a more efficient way /// than protobuf. raw_write_encoder: Option, @@ -132,6 +136,7 @@ impl Peer { let region_id = storage.region().get_id(); let tablet_index = storage.region_state().get_tablet_index(); + let merge_context = MergeContext::from_region_state(&logger, storage.region_state()); let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); @@ -156,6 +161,8 @@ impl Peer { peer_cache: vec![], peer_heartbeats: HashMap::default(), compact_log_context: CompactLogContext::new(applied_index), + merge_context: merge_context.map(|c| Box::new(c)), + last_sent_snapshot_index: 0, raw_write_encoder: None, proposals: ProposalQueue::new(region_id, raft_group.raft.id), async_writer: AsyncWriter::new(region_id, peer_id), @@ -378,6 +385,21 @@ impl Peer { &self.compact_log_context } + #[inline] + pub fn merge_context(&self) -> Option<&MergeContext> { + self.merge_context.as_deref() + } + + #[inline] + pub fn merge_context_mut(&mut self) -> &mut MergeContext { + self.merge_context.get_or_insert_default() + } + + #[inline] + pub fn take_merge_context(&mut self) -> Option> { + self.merge_context.take() + } + #[inline] pub fn raft_group(&self) -> &RawNode> { &self.raft_group @@ -578,12 +600,6 @@ impl Peer { false } - #[inline] - // TODO - pub fn has_pending_merge_state(&self) -> bool { - false - } - pub fn serving(&self) -> bool { matches!(self.destroy_progress, DestroyProgress::None) } @@ -722,6 +738,13 @@ impl Peer { .advance_apply(apply_index, term, region); } + #[inline] + pub fn in_joint_state(&self) -> bool { + self.region().get_peers().iter().any(|p| { + p.get_role() == PeerRole::IncomingVoter || p.get_role() == PeerRole::DemotingVoter + }) + } + #[inline] pub fn split_trace_mut(&mut self) -> &mut Vec<(u64, HashSet)> { &mut self.split_trace @@ -804,4 +827,16 @@ impl Peer { pub fn gc_peer_context_mut(&mut self) -> &mut GcPeerContext { &mut self.gc_peer_context } + + #[inline] + pub fn update_last_sent_snapshot_index(&mut self, i: u64) { + if i > self.last_sent_snapshot_index { + self.last_sent_snapshot_index = i; + } + } + + #[inline] + pub fn last_sent_snapshot_index(&self) -> u64 { + self.last_sent_snapshot_index + } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index bcda7298bd4..67b0a7adeb7 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -15,12 +15,8 @@ use kvproto::{ use raftstore::store::{AsyncReadNotifier, FetchedLogs, GenSnapRes, RegionSnapshot}; use slog::warn; -use super::{CmdResChannel, PeerMsg}; -use crate::{ - batch::StoreRouter, - operation::{LocalReader, RequestSplit}, - StoreMeta, -}; +use super::PeerMsg; +use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; impl AsyncReadNotifier for StoreRouter { fn notify_logs_fetched(&self, region_id: u64, fetched_logs: FetchedLogs) { @@ -48,18 +44,8 @@ impl raftstore::coprocessor::StoreHandle for Store split_keys: Vec>, source: Cow<'static, str>, ) { - let (ch, _) = CmdResChannel::pair(); - let res = self.send( - region_id, - PeerMsg::RequestSplit { - request: RequestSplit { - epoch: region_epoch, - split_keys, - source, - }, - ch, - }, - ); + let (msg, _) = PeerMsg::request_split(region_epoch, split_keys, source.to_string()); + let res = self.send(region_id, msg); if let Err(e) = res { warn!( self.logger(), diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index e529f7dddee..e06d161fe08 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -455,4 +455,12 @@ mod requests { req.mut_transfer_leader().set_peers(peers.into()); req } + + pub fn new_merge_request(merge: pdpb::Merge) -> AdminRequest { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::PrepareMerge); + req.mut_prepare_merge() + .set_target(merge.get_target().to_owned()); + req + } } diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index bd4925e8563..bca48412aa6 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -312,8 +312,12 @@ where ); } } else if resp.has_merge() { - // TODO - info!(logger, "pd asks for merge but ignored"); + PD_HEARTBEAT_COUNTER_VEC.with_label_values(&["merge"]).inc(); + + let merge = resp.take_merge(); + info!(logger, "try to merge"; "region_id" => region_id, "merge" => ?merge); + let req = new_merge_request(merge); + send_admin_request(&logger, &router, region_id, epoch, peer, req, None); } else { PD_HEARTBEAT_COUNTER_VEC.with_label_values(&["noop"]).inc(); } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 30420668164..6d0801696cb 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5627,7 +5627,7 @@ where } fn register_split_region_check_tick(&mut self) { - self.schedule_tick(PeerTick::SplitRegionCheck) + self.schedule_tick(PeerTick::SplitRegionCheck); } #[inline] From e99ebbc8148c1462453358dc43f2cd78265a8a5d Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 22 Feb 2023 16:21:06 +0800 Subject: [PATCH 0544/1149] *: enable bucket automatically if region size is large enough (#14255) ref tikv/tikv#12842 So that it's easier to use v2. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/engine_rocks/src/engine.rs | 2 +- .../raftstore-v2/src/operation/bucket.rs | 2 +- .../tests/integrations/test_pd_heartbeat.rs | 2 +- .../raftstore/src/coprocessor/config.rs | 83 ++++++++++--------- .../src/coprocessor/split_check/half.rs | 4 +- .../src/coprocessor/split_check/keys.rs | 2 +- .../src/coprocessor/split_check/mod.rs | 2 +- .../src/coprocessor/split_check/size.rs | 6 +- components/raftstore/src/store/config.rs | 2 +- components/raftstore/src/store/fsm/peer.rs | 2 +- components/raftstore/src/store/peer.rs | 3 +- .../raftstore/src/store/worker/split_check.rs | 2 +- components/server/src/server.rs | 2 +- components/server/src/server2.rs | 2 +- components/test_raftstore-v2/src/node.rs | 4 +- components/test_raftstore-v2/src/server.rs | 2 +- components/test_raftstore/src/node.rs | 4 +- components/test_raftstore/src/server.rs | 2 +- src/config/mod.rs | 24 ++---- tests/failpoints/cases/test_stats.rs | 2 +- tests/integrations/config/mod.rs | 2 +- .../raftstore/test_split_region.rs | 4 +- 22 files changed, 80 insertions(+), 80 deletions(-) diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index de29e676277..6499880490f 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -151,7 +151,7 @@ pub struct RocksEngine { } impl RocksEngine { - pub(crate) fn new(db: DB) -> RocksEngine { + pub fn new(db: DB) -> RocksEngine { let db = Arc::new(db); RocksEngine { support_multi_batch_write: db.get_db_options().is_enable_multi_batch_write(), diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index efff68fc453..05976d49d97 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -166,7 +166,7 @@ impl Peer { } pub fn maybe_gen_approximate_buckets(&self, ctx: &StoreContext) { - if ctx.coprocessor_host.cfg.enable_region_bucket && self.storage().is_initialized() { + if ctx.coprocessor_host.cfg.enable_region_bucket() && self.storage().is_initialized() { if let Err(e) = ctx .schedulers .split_check diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs index 11ff6bd4d02..56159538836 100644 --- a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -72,7 +72,7 @@ fn test_store_heartbeat() { fn test_report_buckets() { let region_id = 2; let mut cop_cfg = CopConfig::default(); - cop_cfg.enable_region_bucket = true; + cop_cfg.enable_region_bucket = Some(true); cop_cfg.region_bucket_size = ReadableSize::kb(1); let cluster = Cluster::with_cop_cfg(cop_cfg); let store_id = cluster.node(0).id(); diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index 137de200b71..c05a8e89a41 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -46,7 +46,7 @@ pub struct Config { pub perf_level: PerfLevel, // enable subsplit ranges (aka bucket) within the region - pub enable_region_bucket: bool, + pub enable_region_bucket: Option, pub region_bucket_size: ReadableSize, // region size threshold for using approximate size instead of scan pub region_size_threshold_for_approximate: ReadableSize, @@ -70,9 +70,8 @@ pub enum ConsistencyCheckMethod { } /// Default region split size. -pub const SPLIT_SIZE_MB: u64 = 96; -pub const LARGE_REGION_SPLIT_SIZE_MB: u64 = 1024; -pub const RAFTSTORE_V2_SPLIT_SIZE_MB: u64 = 10240; +pub const SPLIT_SIZE: ReadableSize = ReadableSize::mb(96); +pub const RAFTSTORE_V2_SPLIT_SIZE: ReadableSize = ReadableSize::gb(10); /// Default batch split limit. pub const BATCH_SPLIT_LIMIT: u64 = 10; @@ -92,7 +91,7 @@ impl Default for Config { region_max_keys: None, consistency_check_method: ConsistencyCheckMethod::Mvcc, perf_level: PerfLevel::Uninitialized, - enable_region_bucket: false, + enable_region_bucket: None, region_bucket_size: DEFAULT_BUCKET_SIZE, region_size_threshold_for_approximate: DEFAULT_BUCKET_SIZE * BATCH_SPLIT_LIMIT / 2 * 3, region_bucket_merge_size_ratio: DEFAULT_REGION_BUCKET_MERGE_SIZE_RATIO, @@ -103,12 +102,7 @@ impl Default for Config { impl Config { pub fn region_split_size(&self) -> ReadableSize { - self.region_split_size - .unwrap_or(/* v1 only */ if self.enable_region_bucket { - ReadableSize::mb(LARGE_REGION_SPLIT_SIZE_MB) - } else { - ReadableSize::mb(SPLIT_SIZE_MB) - }) + self.region_split_size.unwrap_or(SPLIT_SIZE) } pub fn region_max_keys(&self) -> u64 { @@ -128,17 +122,48 @@ impl Config { .unwrap_or((self.region_split_size().as_mb_f64() * 10000.0) as u64) } + pub fn enable_region_bucket(&self) -> bool { + self.enable_region_bucket.unwrap_or(false) + } + pub fn optimize_for(&mut self, raftstore_v2: bool) { // overwrite the default region_split_size when it's multi-rocksdb if self.region_split_size.is_none() { if raftstore_v2 { - self.region_split_size = Some(ReadableSize::mb(RAFTSTORE_V2_SPLIT_SIZE_MB)); + self.region_split_size = Some(RAFTSTORE_V2_SPLIT_SIZE); } else { self.region_split_size = Some(self.region_split_size()); } } } + fn validate_bucket_size(&self) -> Result<()> { + if self.region_split_size().0 < self.region_bucket_size.0 { + return Err(box_err!( + "region split size {} must >= region bucket size {}", + self.region_split_size().0, + self.region_bucket_size.0 + )); + } + if self.region_size_threshold_for_approximate.0 < self.region_bucket_size.0 { + return Err(box_err!( + "large region threshold size {} must >= region bucket size {}", + self.region_size_threshold_for_approximate.0, + self.region_bucket_size.0 + )); + } + if self.region_bucket_size.0 == 0 { + return Err(box_err!("region_bucket size cannot be 0.")); + } + if self.region_bucket_merge_size_ratio <= 0.0 || self.region_bucket_merge_size_ratio >= 0.5 + { + return Err(box_err!( + "region-bucket-merge-size-ratio should be 0 to 0.5 (not include both ends)." + )); + } + Ok(()) + } + pub fn validate(&mut self) -> Result<()> { if self.region_split_keys.is_none() { self.region_split_keys = Some((self.region_split_size().as_mb_f64() * 10000.0) as u64); @@ -169,31 +194,13 @@ impl Config { } None => self.region_max_keys = Some(self.region_split_keys() / 2 * 3), } - if self.enable_region_bucket { - if self.region_split_size().0 < self.region_bucket_size.0 { - return Err(box_err!( - "region split size {} must >= region bucket size {}", - self.region_split_size().0, - self.region_bucket_size.0 - )); - } - if self.region_size_threshold_for_approximate.0 < self.region_bucket_size.0 { - return Err(box_err!( - "large region threshold size {} must >= region bucket size {}", - self.region_size_threshold_for_approximate.0, - self.region_bucket_size.0 - )); - } - if self.region_bucket_size.0 == 0 { - return Err(box_err!("region_bucket size cannot be 0.")); - } - if self.region_bucket_merge_size_ratio <= 0.0 - || self.region_bucket_merge_size_ratio >= 0.5 - { - return Err(box_err!( - "region-bucket-merge-size-ratio should be 0 to 0.5 (not include both ends)." - )); - } + let res = self.validate_bucket_size(); + // If it's OK to enable bucket, we will prefer to enable it if useful. + if let Ok(()) = res && self.enable_region_bucket.is_none() { + let useful = self.region_split_size() >= self.region_bucket_size * 2; + self.enable_region_bucket = Some(useful); + } else if let Err(e) = res && self.enable_region_bucket() { + return Err(e); } Ok(()) } @@ -251,7 +258,7 @@ mod tests { assert_eq!(cfg.region_max_keys, Some(30)); cfg = Config::default(); - cfg.enable_region_bucket = false; + cfg.enable_region_bucket = Some(false); cfg.region_split_size = Some(ReadableSize(20)); cfg.region_bucket_size = ReadableSize(30); cfg.validate().unwrap(); diff --git a/components/raftstore/src/coprocessor/split_check/half.rs b/components/raftstore/src/coprocessor/split_check/half.rs index 259334d2f42..1f4527128d8 100644 --- a/components/raftstore/src/coprocessor/split_check/half.rs +++ b/components/raftstore/src/coprocessor/split_check/half.rs @@ -268,7 +268,7 @@ mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { region_split_size: Some(ReadableSize(130_u64)), - enable_region_bucket: true, + enable_region_bucket: Some(true), region_bucket_size: ReadableSize(20_u64), // so that each key below will form a bucket ..Default::default() }; @@ -392,7 +392,7 @@ mod tests { let (tx, rx) = mpsc::sync_channel(100); let cfg = Config { region_split_size: Some(ReadableSize(130_u64)), - enable_region_bucket: true, + enable_region_bucket: Some(true), region_bucket_size: ReadableSize(20_u64), // so that each key below will form a bucket ..Default::default() }; diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 58c42d55513..2c0e71dd8cb 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -555,7 +555,7 @@ mod tests { region_max_keys: Some(159), region_split_keys: Some(80), batch_split_limit: 5, - enable_region_bucket: true, + enable_region_bucket: Some(true), // need check split region buckets, but region size does not exceed the split threshold region_bucket_size: ReadableSize(100), ..Default::default() diff --git a/components/raftstore/src/coprocessor/split_check/mod.rs b/components/raftstore/src/coprocessor/split_check/mod.rs index 3978789db91..e92000f2c95 100644 --- a/components/raftstore/src/coprocessor/split_check/mod.rs +++ b/components/raftstore/src/coprocessor/split_check/mod.rs @@ -120,7 +120,7 @@ impl<'a, E> Host<'a, E> { #[inline] pub fn enable_region_bucket(&self) -> bool { - self.cfg.enable_region_bucket + self.cfg.enable_region_bucket() } #[inline] diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 8a1a5558c7d..4b320bef1b6 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -161,7 +161,7 @@ impl SplitCheckObserver for SizeCheckObserver self.router.update_approximate_size(region_id, region_size); let need_bucket_checker = - host.cfg.enable_region_bucket && region_size >= 2 * host.cfg.region_bucket_size.0; + host.cfg.enable_region_bucket() && region_size >= 2 * host.cfg.region_bucket_size.0; REGION_SIZE_HISTOGRAM.observe(region_size as f64); let need_split_region = region_size >= host.cfg.region_max_size().0; @@ -549,7 +549,7 @@ pub mod tests { region_max_keys: Some(1000000), region_split_keys: Some(1000000), batch_split_limit: 5, - enable_region_bucket: true, + enable_region_bucket: Some(true), region_bucket_size: ReadableSize(3000), region_size_threshold_for_approximate: ReadableSize(50000), ..Default::default() @@ -675,7 +675,7 @@ pub mod tests { region_max_keys: Some(1000000), region_split_keys: Some(1000000), batch_split_limit: 5, - enable_region_bucket: true, + enable_region_bucket: Some(true), region_bucket_size: ReadableSize(1), // minimal bucket size region_size_threshold_for_approximate: ReadableSize(500000000), // follow split region's check policy, not force to use approximate diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 342ace1139e..301f3cea0cc 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -1109,7 +1109,7 @@ mod tests { #[test] fn test_config_validate() { - let split_size = ReadableSize::mb(coprocessor::config::SPLIT_SIZE_MB); + let split_size = coprocessor::config::SPLIT_SIZE; let mut cfg = Config::new(); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 6d0801696cb..6acddde2257 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5982,7 +5982,7 @@ where // generate bucket range list to run split-check (to further split buckets) fn gen_bucket_range_for_update(&self) -> Option> { - if !self.ctx.coprocessor_host.cfg.enable_region_bucket { + if !self.ctx.coprocessor_host.cfg.enable_region_bucket() { return None; } let region_buckets = self.fsm.peer.region_buckets.as_ref()?; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index c788256799b..a1817edd17b 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5090,7 +5090,8 @@ where } pub fn maybe_gen_approximate_buckets(&self, ctx: &PollContext) { - if ctx.coprocessor_host.cfg.enable_region_bucket && !self.region().get_peers().is_empty() { + if ctx.coprocessor_host.cfg.enable_region_bucket() && !self.region().get_peers().is_empty() + { if let Err(e) = ctx .split_check_scheduler .schedule(SplitCheckTask::ApproximateBuckets(self.region().clone())) diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index b6bc5fca65f..1335ed5d5e8 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -667,7 +667,7 @@ where ), Task::ChangeConfig(c) => self.change_cfg(c), Task::ApproximateBuckets(region) => { - if self.coprocessor.cfg.enable_region_bucket { + if self.coprocessor.cfg.enable_region_bucket() { let mut cached; let tablet = match &self.engine { Either::Left(e) => e, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 9576cb91423..5ba70b5db5a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -969,7 +969,7 @@ where .raft_store .validate( self.config.coprocessor.region_split_size(), - self.config.coprocessor.enable_region_bucket, + self.config.coprocessor.enable_region_bucket(), self.config.coprocessor.region_bucket_size, ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 9a2a1a1e8e0..7b391c20bb8 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -828,7 +828,7 @@ where .raft_store .validate( self.config.coprocessor.region_split_size(), - self.config.coprocessor.enable_region_bucket, + self.config.coprocessor.enable_region_bucket(), self.config.coprocessor.region_bucket_size, ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index f6211c09748..b9609ad2783 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -191,7 +191,7 @@ impl Simulator for NodeCluster { raft_store .validate( cfg.coprocessor.region_split_size(), - cfg.coprocessor.enable_region_bucket, + cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, ) .unwrap(); @@ -286,7 +286,7 @@ impl Simulator for NodeCluster { let node_id = node.id(); let region_split_size = cfg.coprocessor.region_split_size(); - let enable_region_bucket = cfg.coprocessor.enable_region_bucket; + let enable_region_bucket = cfg.coprocessor.enable_region_bucket(); let region_bucket_size = cfg.coprocessor.region_bucket_size; let mut raftstore_cfg = cfg.tikv.raft_store; raftstore_cfg diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 8804f0c0f8c..d02dffa73fc 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -197,7 +197,7 @@ impl ServerCluster { raft_store .validate( cfg.coprocessor.region_split_size(), - cfg.coprocessor.enable_region_bucket, + cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, ) .unwrap(); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 78e1dbb36c3..618b760e29e 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -240,7 +240,7 @@ impl Simulator for NodeCluster { raft_store .validate( cfg.coprocessor.region_split_size(), - cfg.coprocessor.enable_region_bucket, + cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, ) .unwrap(); @@ -348,7 +348,7 @@ impl Simulator for NodeCluster { ); let region_split_size = cfg.coprocessor.region_split_size(); - let enable_region_bucket = cfg.coprocessor.enable_region_bucket; + let enable_region_bucket = cfg.coprocessor.enable_region_bucket(); let region_bucket_size = cfg.coprocessor.region_bucket_size; let mut raftstore_cfg = cfg.tikv.raft_store; raftstore_cfg diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index a17c65b8aec..3f6b704687a 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -500,7 +500,7 @@ impl ServerCluster { raft_store .validate( cfg.coprocessor.region_split_size(), - cfg.coprocessor.enable_region_bucket, + cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, ) .unwrap(); diff --git a/src/config/mod.rs b/src/config/mod.rs index 4be54665443..dff0fcb2436 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3241,7 +3241,7 @@ impl TikvConfig { self.coprocessor.validate()?; self.raft_store.validate( self.coprocessor.region_split_size(), - self.coprocessor.enable_region_bucket, + self.coprocessor.enable_region_bucket(), self.coprocessor.region_bucket_size, )?; self.security @@ -4177,7 +4177,7 @@ mod tests { use itertools::Itertools; use kvproto::kvrpcpb::CommandPri; use raftstore::coprocessor::{ - config::{LARGE_REGION_SPLIT_SIZE_MB, RAFTSTORE_V2_SPLIT_SIZE_MB, SPLIT_SIZE_MB}, + config::{RAFTSTORE_V2_SPLIT_SIZE, SPLIT_SIZE}, region_info_accessor::MockRegionInfoProvider, }; use slog::Level; @@ -5596,27 +5596,17 @@ mod tests { let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.optimize_for(false); default_cfg.coprocessor.validate().unwrap(); - assert_eq!( - default_cfg.coprocessor.region_split_size(), - ReadableSize::mb(SPLIT_SIZE_MB) - ); - - let mut default_cfg = TikvConfig::default(); - default_cfg.coprocessor.enable_region_bucket = true; - default_cfg.coprocessor.optimize_for(false); - default_cfg.coprocessor.validate().unwrap(); - assert_eq!( - default_cfg.coprocessor.region_split_size(), - ReadableSize::mb(LARGE_REGION_SPLIT_SIZE_MB) - ); + assert_eq!(default_cfg.coprocessor.region_split_size(), SPLIT_SIZE); + assert!(!default_cfg.coprocessor.enable_region_bucket()); let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.optimize_for(true); default_cfg.coprocessor.validate().unwrap(); assert_eq!( default_cfg.coprocessor.region_split_size(), - ReadableSize::mb(RAFTSTORE_V2_SPLIT_SIZE_MB) + RAFTSTORE_V2_SPLIT_SIZE ); + assert!(default_cfg.coprocessor.enable_region_bucket()); let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); @@ -5626,6 +5616,7 @@ mod tests { default_cfg.coprocessor.region_split_size(), ReadableSize::mb(500) ); + assert!(default_cfg.coprocessor.enable_region_bucket()); let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); @@ -5635,6 +5626,7 @@ mod tests { default_cfg.coprocessor.region_split_size(), ReadableSize::mb(500) ); + assert!(default_cfg.coprocessor.enable_region_bucket()); } #[test] diff --git a/tests/failpoints/cases/test_stats.rs b/tests/failpoints/cases/test_stats.rs index 37c87fa4547..7bc97edf759 100644 --- a/tests/failpoints/cases/test_stats.rs +++ b/tests/failpoints/cases/test_stats.rs @@ -7,7 +7,7 @@ use tikv_util::config::*; #[test] fn test_bucket_stats() { let (mut cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { - cluster.cfg.coprocessor.enable_region_bucket = true; + cluster.cfg.coprocessor.enable_region_bucket = Some(true); cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::days(1); cluster.cfg.raft_store.report_region_buckets_tick_interval = ReadableDuration::millis(100); }); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 351e9d74ca0..80cab3aca43 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -733,7 +733,7 @@ fn test_serde_custom_tikv_config() { region_split_keys: Some(100000), consistency_check_method: ConsistencyCheckMethod::Raw, perf_level: PerfLevel::Uninitialized, - enable_region_bucket: true, + enable_region_bucket: Some(true), region_bucket_size: ReadableSize::mb(1), region_size_threshold_for_approximate: ReadableSize::mb(3), prefer_approximate_bucket: false, diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 963424d8986..7f907970a72 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -604,7 +604,7 @@ fn test_node_split_region_after_reboot_with_config_change() { let region_split_size = 2000; cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(20); - cluster.cfg.coprocessor.enable_region_bucket = true; + cluster.cfg.coprocessor.enable_region_bucket = Some(true); cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(region_split_size); @@ -1171,7 +1171,7 @@ fn test_gen_split_check_bucket_ranges() { let count = 5; let mut cluster = new_server_cluster(0, count); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(5); - cluster.cfg.coprocessor.enable_region_bucket = true; + cluster.cfg.coprocessor.enable_region_bucket = Some(true); // disable report buckets; as it will reset the user traffic stats to randomize // the test result cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::secs(5); From 6ea9c3af2c3d94b6f6e22311a7709a4b78872f62 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 22 Feb 2023 17:35:06 +0800 Subject: [PATCH 0545/1149] raftstore-v2: consider `None` when getting mailbox (#14234) ref tikv/tikv#12842, close tikv/tikv#14233 consider None when getting mailbox Signed-off-by: SpadeA-Tang --- .../src/operation/command/admin/split.rs | 28 +++++++++++-------- .../raftstore/test_split_region.rs | 7 +++-- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index bbc6aac058e..4e14c7e016d 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -706,17 +706,23 @@ impl Peer { let region_id = self.region_id(); if self.storage().has_dirty_data() { let tablet_index = self.storage().tablet_index(); - let mailbox = store_ctx.router.mailbox(region_id).unwrap(); - let _ = store_ctx - .schedulers - .tablet_gc - .schedule(tablet_gc::Task::trim( - self.tablet().unwrap().clone(), - self.region(), - move || { - let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); - }, - )); + if let Some(mailbox) = store_ctx.router.mailbox(region_id) { + let _ = store_ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::trim( + self.tablet().unwrap().clone(), + self.region(), + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, + )); + } else { + // None means the node is shutdown concurrently and thus the + // mailboxes in router have been cleared + assert!(store_ctx.router.is_shutdown()); + return; + } } if split_init.derived_leader && self.leader_id() == INVALID_ID diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 7f907970a72..f8d6ff9b468 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -545,6 +545,8 @@ fn test_server_split_with_stale_peer() { #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_split_region_diff_check() { let count = 1; let mut cluster = new_cluster(0, count); @@ -596,10 +598,11 @@ fn test_split_region_diff_check() { // set max region size/split size 2000 and put data till 1000 // set max region size/split size < 1000 and reboot // verify the region is splitted. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_node_split_region_after_reboot_with_config_change() { let count = 1; - let mut cluster = new_server_cluster(0, count); + let mut cluster = new_cluster(0, count); let region_max_size = 2000; let region_split_size = 2000; cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); From a675ca8eacbdbe1a0e5e08d446af998f0691eb04 Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 22 Feb 2023 17:49:06 +0800 Subject: [PATCH 0546/1149] copr: early stop paging copr when resultset is drained. (#14209) close tikv/tikv#14254 When the result set is drained, it indicates that no more data is required in the range. This PR set the scanned range to None to avoid the following paging requests in the current range. Co-authored-by: Ti Chi Robot --- components/tidb_query_executors/src/runner.rs | 9 +- tests/failpoints/cases/test_coprocessor.rs | 172 ++++++++---------- 2 files changed, 77 insertions(+), 104 deletions(-) diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index f4a3ea8a2ad..3093b9bb24b 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -510,9 +510,12 @@ impl BatchExecutorsRunner { self.out_most_executor .collect_exec_stats(&mut self.exec_stats); - let range = self - .paging_size - .map(|_| self.out_most_executor.take_scanned_range()); + let range = if drained { + None + } else { + self.paging_size + .map(|_| self.out_most_executor.take_scanned_range()) + }; let mut sel_resp = SelectResponse::default(); sel_resp.set_chunks(chunks.into()); diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index c515b8d66cb..b3a6bf76c01 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -251,6 +251,16 @@ fn test_paging_scan() { assert_ge!(res_end_key, end_key.get_start()); assert_le!(res_end_key, end_key.get_end()); } + + // test limit with early return + let req = DagSelect::from(&product) + .paging_size(2) + .limit(1) + .desc(desc) + .build(); + let resp = handle_request(&endpoint, req); + assert!(resp.range.is_none()); + assert!(resp.range.is_none()); } } @@ -271,113 +281,73 @@ fn test_paging_scan_multi_ranges() { fail::cfg("copr_batch_grow_size", "return(1)").unwrap(); // test multi ranges with gap - for desc in [true] { - let paging_size = 3; - let mut exp = [data[0], data[1], data[3], data[4]]; - if desc { - exp.reverse(); - } - - let builder = DagSelect::from(&product) - .paging_size(paging_size) - .desc(desc); - let mut range1 = builder.key_ranges[0].clone(); - range1.set_end(product.get_record_range_one(data[1].0).get_end().into()); - let mut range2 = builder.key_ranges[0].clone(); - range2.set_start(product.get_record_range_one(data[3].0).get_start().into()); - let key_ranges = vec![range1.clone(), range2.clone()]; + for desc in [true, false] { + for paging_size in [3, 5] { + let mut exp = [data[0], data[1], data[3], data[4]]; + if desc { + exp.reverse(); + } - let req = builder.key_ranges(key_ranges).build(); - let resp = handle_request(&endpoint, req); - let mut select_resp = SelectResponse::default(); - select_resp.merge_from_bytes(resp.get_data()).unwrap(); - - let mut row_count = 0; - let spliter = DagChunkSpliter::new(select_resp.take_chunks().into(), 3); - for (row, (id, name, cnt)) in spliter.zip(exp) { - let name_datum = name.unwrap().as_bytes().into(); - let expected_encoded = datum::encode_value( - &mut EvalContext::default(), - &[Datum::I64(id), name_datum, Datum::I64(cnt)], - ) - .unwrap(); - let result_encoded = datum::encode_value(&mut EvalContext::default(), &row).unwrap(); - assert_eq!(result_encoded, &*expected_encoded); - row_count += 1; - } - assert_eq!(row_count, paging_size); + let builder = DagSelect::from(&product) + .paging_size(paging_size) + .desc(desc); + let mut range1 = builder.key_ranges[0].clone(); + range1.set_end(product.get_record_range_one(data[1].0).get_end().into()); + let mut range2 = builder.key_ranges[0].clone(); + range2.set_start(product.get_record_range_one(data[3].0).get_start().into()); + let key_ranges = vec![range1.clone(), range2.clone()]; - let res_range = resp.get_range(); - let (res_start_key, res_end_key) = match desc { - true => (res_range.get_end(), res_range.get_start()), - false => (res_range.get_start(), res_range.get_end()), - }; - let start_key = match desc { - true => range2.get_end(), - false => range1.get_start(), - }; - let end_id = match desc { - true => data[1].0, - false => data[3].0, - }; - let end_key = product.get_record_range_one(end_id); - assert_eq!(res_start_key, start_key); - assert_ge!(res_end_key, end_key.get_start()); - assert_le!(res_end_key, end_key.get_end()); - } + let req = builder.key_ranges(key_ranges).build(); + let resp = handle_request(&endpoint, req); + let mut select_resp = SelectResponse::default(); + select_resp.merge_from_bytes(resp.get_data()).unwrap(); - // test drained - for desc in [false, true] { - let paging_size = 5; - let mut exp = [data[0], data[1], data[3], data[4]]; - if desc { - exp.reverse(); - } + let mut row_count = 0; + let spliter = DagChunkSpliter::new(select_resp.take_chunks().into(), 3); + for (row, (id, name, cnt)) in spliter.zip(exp) { + let name_datum = name.unwrap().as_bytes().into(); + let expected_encoded = datum::encode_value( + &mut EvalContext::default(), + &[Datum::I64(id), name_datum, Datum::I64(cnt)], + ) + .unwrap(); + let result_encoded = + datum::encode_value(&mut EvalContext::default(), &row).unwrap(); + assert_eq!(result_encoded, &*expected_encoded); + row_count += 1; + } + let exp_len = if paging_size <= 4 { + paging_size + } else { + exp.len() as u64 + }; + assert_eq!(row_count, exp_len); - let builder = DagSelect::from(&product) - .paging_size(paging_size) - .desc(desc); - let mut range1 = builder.key_ranges[0].clone(); - range1.set_end(product.get_record_range_one(data[1].0).get_end().into()); - let mut range2 = builder.key_ranges[0].clone(); - range2.set_start(product.get_record_range_one(data[3].0).get_start().into()); - let key_ranges = vec![range1.clone(), range2.clone()]; + let res_range = resp.get_range(); - let req = builder.key_ranges(key_ranges).build(); - let resp = handle_request(&endpoint, req); - let mut select_resp = SelectResponse::default(); - select_resp.merge_from_bytes(resp.get_data()).unwrap(); - - let mut row_count = 0; - let spliter = DagChunkSpliter::new(select_resp.take_chunks().into(), 3); - for (row, (id, name, cnt)) in spliter.zip(exp) { - let name_datum = name.unwrap().as_bytes().into(); - let expected_encoded = datum::encode_value( - &mut EvalContext::default(), - &[Datum::I64(id), name_datum, Datum::I64(cnt)], - ) - .unwrap(); - let result_encoded = datum::encode_value(&mut EvalContext::default(), &row).unwrap(); - assert_eq!(result_encoded, &*expected_encoded); - row_count += 1; + let (res_start_key, res_end_key) = match desc { + true => (res_range.get_end(), res_range.get_start()), + false => (res_range.get_start(), res_range.get_end()), + }; + if paging_size != 5 { + let start_key = match desc { + true => range2.get_end(), + false => range1.get_start(), + }; + let end_id = match desc { + true => data[1].0, + false => data[3].0, + }; + let end_key = product.get_record_range_one(end_id); + assert_eq!(res_start_key, start_key); + assert_ge!(res_end_key, end_key.get_start()); + assert_le!(res_end_key, end_key.get_end()); + } else { + // drained. + assert!(res_start_key.is_empty()); + assert!(res_end_key.is_empty()); + } } - assert_eq!(row_count, exp.len()); - - let res_range = resp.get_range(); - let (res_start_key, res_end_key) = match desc { - true => (res_range.get_end(), res_range.get_start()), - false => (res_range.get_start(), res_range.get_end()), - }; - let start_key = match desc { - true => range2.get_end(), - false => range1.get_start(), - }; - let end_key = match desc { - true => product.get_record_range_one(i64::MIN), - false => product.get_record_range_one(i64::MAX), - }; - assert_eq!(res_start_key, start_key); - assert_eq!(res_end_key, end_key.get_start(), "{}", desc); } } From 0368d0a6e5416aea5cc16546a58d091e3bbc504f Mon Sep 17 00:00:00 2001 From: Shaowen Yin Date: Thu, 23 Feb 2023 09:29:07 +0800 Subject: [PATCH 0547/1149] *: update openssl-src version to fix CVE-2023-0286 (#14258) close tikv/tikv#14257 Upgrade openssl-src version to fix CVE-2023-0286. Signed-off-by: cosven Co-authored-by: Ti Chi Robot --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1fa0937ce40..7add84159b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3579,9 +3579,9 @@ checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" [[package]] name = "openssl-src" -version = "111.20.0+1.1.1o" +version = "111.25.0+1.1.1t" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92892c4f87d56e376e469ace79f1128fdaded07646ddf73aa0be4706ff712dec" +checksum = "3173cd3626c43e3854b1b727422a276e568d9ec5fe8cec197822cf52cfb743d6" dependencies = [ "cc", ] From f0af6ff1f5a01def628ddc6fe61bbb5d005cfc9d Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 23 Feb 2023 15:25:07 +0800 Subject: [PATCH 0548/1149] integration test v2: report snapshot status after sending (#14252) ref tikv/tikv#12842 report snapshot status after sending Signed-off-by: SpadeA-Tang Co-authored-by: Ti Chi Robot --- components/test_raftstore-v2/src/node.rs | 12 ++++++-- .../src/transport_simulate.rs | 28 +++++++++++++++++++ tests/integrations/raftstore/test_snap.rs | 28 +++++++++++++++++-- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index b9609ad2783..6c71e2d9cdc 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -15,7 +15,7 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; -use raft::prelude::MessageType; +use raft::{prelude::MessageType, SnapshotStatus}; use raftstore::{ coprocessor::CoprocessorHost, errors::Error as RaftError, @@ -73,6 +73,8 @@ impl Transport for ChannelTransport { fn send(&mut self, msg: RaftMessage) -> raftstore::Result<()> { let from_store = msg.get_from_peer().get_store_id(); let to_store = msg.get_to_peer().get_store_id(); + let to_peer_id = msg.get_to_peer().get_id(); + let region_id = msg.get_region_id(); let is_snapshot = msg.get_message().get_msg_type() == MessageType::MsgSnapshot; if is_snapshot { @@ -102,7 +104,13 @@ impl Transport for ChannelTransport { match core.routers.get(&to_store) { Some(h) => { h.send_raft_msg(msg)?; - // report snapshot status if needed + if is_snapshot { + let _ = core.routers[&from_store].report_snapshot_status( + region_id, + to_peer_id, + SnapshotStatus::Finish, + ); + } Ok(()) } _ => Err(box_err!("missing sender for store {}", to_store)), diff --git a/components/test_raftstore-v2/src/transport_simulate.rs b/components/test_raftstore-v2/src/transport_simulate.rs index f42a891e60f..b55c29dbd3a 100644 --- a/components/test_raftstore-v2/src/transport_simulate.rs +++ b/components/test_raftstore-v2/src/transport_simulate.rs @@ -11,6 +11,7 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; +use raft::SnapshotStatus; use raftstore::{ router::handle_send_error, store::{cmd_resp, RegionSnapshot, Transport}, @@ -102,6 +103,14 @@ pub trait RaftStoreRouter { fn send_peer_msg(&self, region_id: u64, msg: PeerMsg) -> Result<()>; fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()>; + + /// Reports the sending snapshot status to the peer of the Region. + fn report_snapshot_status( + &self, + region_id: u64, + to_peer_id: u64, + status: SnapshotStatus, + ) -> RaftStoreResult<()>; } impl RaftStoreRouter for RaftRouter { @@ -115,6 +124,15 @@ impl RaftStoreRouter for RaftRouter { self.send_raft_message(Box::new(msg)) .map_err(|e| handle_send_error(region_id, e)) } + + fn report_snapshot_status( + &self, + region_id: u64, + to_peer_id: u64, + status: SnapshotStatus, + ) -> RaftStoreResult<()> { + self.send_peer_msg(region_id, PeerMsg::SnapshotSent { to_peer_id, status }) + } } impl RaftStoreRouter for SimulateTransport { @@ -125,4 +143,14 @@ impl RaftStoreRouter for SimulateTransport { fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()> { filter_send(&self.filters, msg, |m| self.ch.send_raft_msg(m)) } + + fn report_snapshot_status( + &self, + region_id: u64, + to_peer_id: u64, + status: SnapshotStatus, + ) -> RaftStoreResult<()> { + self.ch + .report_snapshot_status(region_id, to_peer_id, status) + } } diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index e8a0730488a..a69a2216cd4 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -476,6 +476,8 @@ impl Filter for SnapshotAppendFilter { } } +// todo(SpadeA): to be removed when receive filter is supported on ServerCluster +// V2 fn test_snapshot_with_append(cluster: &mut Cluster) { configure_for_snapshot(&mut cluster.cfg); @@ -502,10 +504,30 @@ fn test_snapshot_with_append(cluster: &mut Cluster) { must_get_equal(&engine4, b"k2", b"v2"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_snapshot_with_append() { - let mut cluster = new_node_cluster(0, 4); - test_snapshot_with_append(&mut cluster); + let mut cluster = new_cluster(0, 4); + configure_for_snapshot(&mut cluster.cfg); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer count check. + pd_client.disable_default_operator(); + cluster.run(); + + // In case of removing leader, let's transfer leader to some node first. + cluster.must_transfer_leader(1, new_peer(1, 1)); + pd_client.must_remove_peer(1, new_peer(4, 4)); + + let (tx, rx) = mpsc::channel(); + cluster.add_recv_filter_on_node(4, Box::new(SnapshotAppendFilter::new(tx))); + pd_client.add_peer(1, new_peer(4, 5)); + rx.recv_timeout(Duration::from_secs(3)).unwrap(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + let engine4 = cluster.get_engine(4); + must_get_equal(&engine4, b"k1", b"v1"); + must_get_equal(&engine4, b"k2", b"v2"); } #[test] From 5f5bb766ea056f1eb1320e32364084818a3eca64 Mon Sep 17 00:00:00 2001 From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com> Date: Mon, 27 Feb 2023 01:15:07 +0800 Subject: [PATCH 0549/1149] coprocessor: avoid unnecessary vec allocation in collect_column_stats (#14280) ref tikv/tikv#14231 When collect_column_stats handles each row, reuse column_vals and collation_key_vals to avoid allocating many small objects. Signed-off-by: xuyifan <675434007@qq.com> --- src/coprocessor/statistics/analyze.rs | 67 +++++++++++++-------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 85e0281064e..f292b5220e3 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -391,41 +391,36 @@ impl RowSampleBuilder { is_drained = result.is_drained?; let columns_slice = result.physical_columns.as_slice(); - + let mut column_vals: Vec> = vec![vec![]; self.columns_info.len()]; + let mut collation_key_vals: Vec> = vec![vec![]; self.columns_info.len()]; for logical_row in &result.logical_rows { - let mut column_vals: Vec> = Vec::new(); - let mut collation_key_vals: Vec> = Vec::new(); for i in 0..self.columns_info.len() { - let mut val = vec![]; + column_vals[i].clear(); + collation_key_vals[i].clear(); columns_slice[i].encode( *logical_row, &self.columns_info[i], &mut EvalContext::default(), - &mut val, + &mut column_vals[i], )?; if self.columns_info[i].as_accessor().is_string_like() { - let sorted_val = match_template_collator! { + match_template_collator! { TT, match self.columns_info[i].as_accessor().collation()? { Collation::TT => { - let mut mut_val = &val[..]; + let mut mut_val = &column_vals[i][..]; let decoded_val = table::decode_col_value(&mut mut_val, &mut EvalContext::default(), &self.columns_info[i])?; if decoded_val == Datum::Null { - val.clone() + collation_key_vals[i].clone_from(&column_vals[i]); } else { // Only if the `decoded_val` is Datum::Null, `decoded_val` is a Ok(None). // So it is safe the unwrap the Ok value. - let decoded_sorted_val = TT::sort_key(&decoded_val.as_string()?.unwrap())?; - decoded_sorted_val + TT::write_sort_key(&mut collation_key_vals[i], &decoded_val.as_string()?.unwrap())?; } } } }; - collation_key_vals.push(sorted_val); - } else { - collation_key_vals.push(Vec::new()); } - read_size += val.len(); - column_vals.push(val); + read_size += column_vals[i].len(); } collector.mut_base().count += 1; collector.collect_column_group( @@ -434,7 +429,7 @@ impl RowSampleBuilder { &self.columns_info, &self.column_groups, ); - collector.collect_column(column_vals, collation_key_vals, &self.columns_info); + collector.collect_column(&column_vals, &collation_key_vals, &self.columns_info); } } @@ -470,11 +465,11 @@ trait RowSampleCollector: Send { ); fn collect_column( &mut self, - columns_val: Vec>, - collation_keys_val: Vec>, + columns_val: &[Vec], + collation_keys_val: &[Vec], columns_info: &[tipb::ColumnInfo], ); - fn sampling(&mut self, data: Vec>); + fn sampling(&mut self, data: &[Vec]); fn to_proto(&mut self) -> tipb::RowSampleCollector; fn get_reported_memory_usage(&mut self) -> usize { self.mut_base().reported_memory_usage @@ -662,22 +657,23 @@ impl RowSampleCollector for BernoulliRowSampleCollector { } fn collect_column( &mut self, - columns_val: Vec>, - collation_keys_val: Vec>, + columns_val: &[Vec], + collation_keys_val: &[Vec], columns_info: &[tipb::ColumnInfo], ) { self.base - .collect_column(&columns_val, &collation_keys_val, columns_info); + .collect_column(columns_val, collation_keys_val, columns_info); self.sampling(columns_val); } - fn sampling(&mut self, data: Vec>) { + fn sampling(&mut self, data: &[Vec]) { let cur_rng = self.base.rng.gen_range(0.0, 1.0); if cur_rng >= self.sample_rate { return; } - self.base.memory_usage += data.iter().map(|x| x.capacity()).sum::(); + let sample = data.to_vec(); + self.base.memory_usage += sample.iter().map(|x| x.capacity()).sum::(); self.base.report_memory_usage(false); - self.samples.push(data); + self.samples.push(sample); } fn to_proto(&mut self) -> tipb::RowSampleCollector { self.base.memory_usage = 0; @@ -739,16 +735,16 @@ impl RowSampleCollector for ReservoirRowSampleCollector { fn collect_column( &mut self, - columns_val: Vec>, - collation_keys_val: Vec>, + columns_val: &[Vec], + collation_keys_val: &[Vec], columns_info: &[tipb::ColumnInfo], ) { self.base - .collect_column(&columns_val, &collation_keys_val, columns_info); + .collect_column(columns_val, collation_keys_val, columns_info); self.sampling(columns_val); } - fn sampling(&mut self, data: Vec>) { + fn sampling(&mut self, data: &[Vec]) { // We should tolerate the abnormal case => `self.max_sample_size == 0`. if self.max_sample_size == 0 { return; @@ -764,9 +760,10 @@ impl RowSampleCollector for ReservoirRowSampleCollector { } if need_push { - self.base.memory_usage += data.iter().map(|x| x.capacity()).sum::(); - self.samples.push(Reverse((cur_rng, data))); + let sample = data.to_vec(); + self.base.memory_usage += sample.iter().map(|x| x.capacity()).sum::(); self.base.report_memory_usage(false); + self.samples.push(Reverse((cur_rng, sample))); } } @@ -1255,7 +1252,7 @@ mod tests { for loop_i in 0..loop_cnt { let mut collector = ReservoirRowSampleCollector::new(sample_num, 1000, 1); for row in &nums { - collector.sampling([row.clone()].to_vec()); + collector.sampling(&[row.clone()]); } assert_eq!(collector.samples.len(), sample_num); for sample in &collector.samples { @@ -1304,7 +1301,7 @@ mod tests { let mut collector = BernoulliRowSampleCollector::new(sample_num as f64 / row_num as f64, 1000, 1); for row in &nums { - collector.sampling([row.clone()].to_vec()); + collector.sampling(&[row.clone()]); } for sample in &collector.samples { *item_cnt.entry(sample[0].clone()).or_insert(0) += 1; @@ -1350,7 +1347,7 @@ mod tests { // Test for ReservoirRowSampleCollector let mut collector = ReservoirRowSampleCollector::new(sample_num, 1000, 1); for row in &nums { - collector.sampling([row.clone()].to_vec()); + collector.sampling(&[row.clone()]); } assert_eq!(collector.samples.len(), 0); } @@ -1359,7 +1356,7 @@ mod tests { let mut collector = BernoulliRowSampleCollector::new(sample_num as f64 / row_num as f64, 1000, 1); for row in &nums { - collector.sampling([row.clone()].to_vec()); + collector.sampling(&[row.clone()]); } assert_eq!(collector.samples.len(), 0); } From b4da741a49517553765b2e44709ed2e02a2c2324 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 27 Feb 2023 13:09:08 +0800 Subject: [PATCH 0550/1149] raftstore-v2: update region state in apply_snapshot (#14279) ref tikv/tikv#12842 update region state in apply_snapshot Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- components/raftstore-v2/src/operation/ready/snapshot.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 29d94c955af..5eae3078a0a 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -259,10 +259,6 @@ impl Peer { meta.region_read_progress .insert(region_id, self.read_progress().clone()); } - - let region_state = self.raft_group().store().region_state().clone(); - self.storage_mut().set_region_state(region_state); - if let Some(tablet) = self.set_tablet(tablet) { self.record_tombstone_tablet(ctx, tablet, snapshot_index); } @@ -596,12 +592,15 @@ impl Storage { "{}", SlogFormat(self.logger()) ); - let region_state = self.region_state_mut(); + let mut region_state = self.region_state().clone(); region_state.set_state(PeerState::Normal); region_state.set_region(region); region_state.set_removed_records(removed_records); region_state.set_merged_records(merged_records); region_state.set_tablet_index(last_index); + // We need set_region_state here to update the peer. + self.set_region_state(region_state); + let entry_storage = self.entry_storage_mut(); entry_storage.raft_state_mut().set_last_index(last_index); entry_storage.set_truncated_index(last_index); From 6a906dae15f62be7df7c707c43550e25c0599a9a Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 27 Feb 2023 13:27:08 +0800 Subject: [PATCH 0551/1149] impl buckets flow report (#14238) ref tikv/tikv#12842, ref tikv/tikv#14044 collect bucket flow: - write/read flow include keys and bytes not include qps Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- components/pd_client/src/lib.rs | 4 + components/raftstore-v2/src/fsm/apply.rs | 6 + .../raftstore-v2/src/operation/bucket.rs | 160 ++++++++++++++++-- .../src/operation/command/admin/split.rs | 20 ++- .../raftstore-v2/src/operation/command/mod.rs | 24 ++- .../src/operation/command/write/mod.rs | 6 + components/raftstore-v2/src/operation/mod.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 4 + components/raftstore-v2/src/raft/peer.rs | 37 ++-- components/raftstore-v2/src/raft/storage.rs | 1 + .../src/router/internal_message.rs | 3 + .../raftstore-v2/src/worker/pd/region.rs | 6 +- .../tests/integrations/cluster.rs | 4 +- .../tests/integrations/test_pd_heartbeat.rs | 73 ++++++-- components/raftstore/src/store/worker/read.rs | 14 +- 15 files changed, 299 insertions(+), 64 deletions(-) diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 05b5729e98c..86e52eaf2a5 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -162,6 +162,10 @@ impl BucketStat { self.meta = meta; } + pub fn clear_stats(&mut self) { + self.stats = new_bucket_stats(&self.meta); + } + pub fn merge(&mut self, delta: &BucketStat) { merge_bucket_stats( &self.meta.keys, diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 1544a703c6d..67e8d557dd9 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -10,6 +10,7 @@ use crossbeam::channel::TryRecvError; use engine_traits::{FlushState, KvEngine, TabletRegistry}; use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; +use pd_client::BucketStat; use raftstore::store::{Config, ReadTask}; use slog::Logger; use tikv_util::{ @@ -68,6 +69,7 @@ impl ApplyFsm { log_recovery: Option>, applied_term: u64, logger: Logger, + buckets: Option, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); let apply = Apply::new( @@ -81,6 +83,7 @@ impl ApplyFsm { log_recovery, applied_term, logger, + buckets, ); ( ApplyScheduler { sender: tx }, @@ -120,6 +123,9 @@ impl ApplyFsm { ApplyTask::Snapshot(snap_task) => self.apply.schedule_gen_snapshot(snap_task), ApplyTask::UnsafeWrite(raw_write) => self.apply.apply_unsafe_write(raw_write), ApplyTask::ManualFlush => self.apply.on_manual_flush().await, + ApplyTask::RefreshBucketStat(bucket_meta) => { + self.apply.on_refresh_buckets(bucket_meta) + } } self.apply.maybe_flush().await; diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 05976d49d97..be4ca092d98 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::metapb::RegionEpoch; +use kvproto::metapb::{self, RegionEpoch}; use pd_client::{BucketMeta, BucketStat}; use raftstore::{ coprocessor::RegionChangeEvent, @@ -13,7 +13,119 @@ use raftstore::{ }; use slog::{error, warn}; -use crate::{batch::StoreContext, fsm::PeerFsmDelegate, raft::Peer, router::PeerTick, worker::pd}; +use crate::{ + batch::StoreContext, + fsm::PeerFsmDelegate, + raft::Peer, + router::{ApplyTask, PeerTick}, + worker::pd, +}; + +#[derive(Debug, Clone, Default)] +pub struct BucketStatsInfo { + bucket_stat: Option, + // the last buckets records the stats that the recently refreshed. + last_bucket_stat: Option, + // the report bucket stat records the increment stats after last report pd. + // it will be reset after report pd. + report_bucket_stat: Option, +} + +impl BucketStatsInfo { + /// returns all bucket ranges those's write_bytes exceed the given + /// diff_size_threshold. + pub fn gen_bucket_range_for_update( + &self, + diff_size_threshold: u64, + ) -> Option> { + let region_buckets = self.bucket_stat.as_ref()?; + let stats = ®ion_buckets.stats; + let keys = ®ion_buckets.meta.keys; + + let empty_last_keys = vec![]; + let empty_last_stats = metapb::BucketStats::default(); + let (last_keys, last_stats, stats_reset) = self + .last_bucket_stat + .as_ref() + .map(|b| { + ( + &b.meta.keys, + &b.stats, + region_buckets.create_time != b.create_time, + ) + }) + .unwrap_or((&empty_last_keys, &empty_last_stats, false)); + + let mut bucket_ranges = vec![]; + let mut j = 0; + assert_eq!(keys.len(), stats.write_bytes.len() + 1); + for i in 0..stats.write_bytes.len() { + let mut diff_in_bytes = stats.write_bytes[i]; + while j < last_keys.len() && keys[i] > last_keys[j] { + j += 1; + } + if j < last_keys.len() && keys[i] == last_keys[j] { + if !stats_reset { + diff_in_bytes -= last_stats.write_bytes[j]; + } + j += 1; + } + if diff_in_bytes >= diff_size_threshold { + bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); + } + } + Some(bucket_ranges) + } + + #[inline] + pub fn version(&self) -> u64 { + self.bucket_stat + .as_ref() + .or(self.last_bucket_stat.as_ref()) + .map(|b| b.meta.version) + .unwrap_or_default() + } + #[inline] + pub fn add_bucket_flow(&mut self, delta: &Option) { + if let (Some(buckets), Some(report_buckets), Some(delta)) = ( + self.bucket_stat.as_mut(), + self.report_bucket_stat.as_mut(), + delta, + ) { + buckets.merge(delta); + report_buckets.merge(delta); + } + } + + #[inline] + pub fn set_bucket_stat(&mut self, buckets: Option) { + if let Some(b) = self.bucket_stat.take() { + self.last_bucket_stat = Some(b); + } + self.report_bucket_stat = buckets.clone(); + self.bucket_stat = buckets; + } + + #[inline] + pub fn clear_bucket_stat(&mut self) { + if let Some(bucket) = self.report_bucket_stat.as_mut() { + bucket.clear_stats(); + } + } + + #[inline] + pub fn report_bucket_stat(&mut self) -> BucketStat { + let current = self.report_bucket_stat.as_mut().unwrap(); + let delta = current.clone(); + current.clear_stats(); + delta + } + + #[inline] + pub fn bucket_stat(&self) -> &Option { + &self.bucket_stat + } +} impl Peer { #[inline] @@ -48,12 +160,7 @@ impl Peer { }; let region = self.region(); - let current_version = self - .region_buckets() - .as_ref() - .or_else(|| self.last_region_buckets().as_ref()) - .map(|b| b.meta.version) - .unwrap_or_default(); + let current_version = self.region_buckets_info().version(); let mut region_buckets: BucketStat; // The region buckets reset after this region happened split or merge. // The message should be dropped if it's epoch is lower than the regions. @@ -61,7 +168,7 @@ impl Peer { // So this condition indicates that the region buckets needs to refresh not // renew. if let (Some(bucket_ranges), Some(peer_region_buckets)) = - (bucket_ranges, self.region_buckets()) + (bucket_ranges, self.region_buckets_info().bucket_stat()) { assert_eq!(buckets.len(), bucket_ranges.len()); let mut meta_idx = 0; @@ -145,17 +252,22 @@ impl Peer { self.state_role(), ); let meta = region_buckets.meta.clone(); - self.set_region_buckets(Some(region_buckets)); + self.region_buckets_info_mut() + .set_bucket_stat(Some(region_buckets.clone())); + let mut store_meta = store_ctx.store_meta.lock().unwrap(); if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { reader.0.update(ReadProgress::region_buckets(meta)); } + self.apply_scheduler() + .unwrap() + .send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } #[inline] pub fn report_region_buckets_pd(&mut self, ctx: &StoreContext) { - let region_buckets = self.region_buckets().as_ref().unwrap(); - let task = pd::Task::ReportBuckets(region_buckets.clone()); + let delta = self.region_buckets_info_mut().report_bucket_stat(); + let task = pd::Task::ReportBuckets(delta); if let Err(e) = ctx.schedulers.pd.schedule(task) { error!( self.logger, @@ -180,6 +292,21 @@ impl Peer { } } } + + // generate bucket range list to run split-check (to further split buckets) + // It will return the suspected bucket ranges whose write bytes exceed the + // threshold. + pub fn gen_bucket_range_for_update( + &self, + ctx: &StoreContext, + ) -> Option> { + if !ctx.coprocessor_host.cfg.enable_region_bucket() { + return None; + } + let bucket_update_diff_size_threshold = ctx.coprocessor_host.cfg.region_bucket_size.0 / 2; + self.region_buckets_info() + .gen_bucket_range_for_update(bucket_update_diff_size_threshold) + } } impl<'a, EK, ER, T: Transport> PeerFsmDelegate<'a, EK, ER, T> @@ -189,7 +316,14 @@ where { #[inline] pub fn on_report_region_buckets_tick(&mut self) { - if !self.fsm.peer().is_leader() || self.fsm.peer().region_buckets().is_none() { + if !self.fsm.peer().is_leader() + || self + .fsm + .peer() + .region_buckets_info() + .bucket_stat() + .is_none() + { return; } self.fsm.peer_mut().report_region_buckets_pd(self.store_ctx); diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 4e14c7e016d..260fb8700b8 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -217,8 +217,12 @@ impl Peer { } // todo: the suspected buckets range should generated by the diff write bytes. // it will be done in next pr. - let task = - SplitCheckTask::split_check(self.region().clone(), true, CheckPolicy::Scan, None); + let task = SplitCheckTask::split_check( + self.region().clone(), + true, + CheckPolicy::Scan, + self.gen_bucket_range_for_update(ctx), + ); if let Err(e) = ctx.schedulers.split_check.schedule(task) { info!(self.logger, "failed to schedule split check"; "err" => ?e); } @@ -321,14 +325,21 @@ impl Peer { return; } + // Do not check the bucket ranges if we want to split the region with a given + // key range, this is to avoid compatibility issues. + let split_check_bucket_ranges = if !is_key_range { + self.gen_bucket_range_for_update(ctx) + } else { + None + }; + let task = SplitCheckTask::split_check_key_range( region.clone(), rhs.start_key, rhs.end_key, false, rhs.policy, - // todo: bucket range - None, + split_check_bucket_ranges, ); if let Err(e) = ctx.schedulers.split_check.schedule(task) { error!( @@ -966,6 +977,7 @@ mod test { None, 5, logger.clone(), + None, ); let mut splits = BatchSplitRequest::default(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 8bff64e66c9..76a7741134e 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -16,7 +16,11 @@ //! - Applied result are sent back to peer fsm, and update memory state in //! `on_apply_res`. -use std::{mem, sync::atomic::Ordering, time::Duration}; +use std::{ + mem, + sync::{atomic::Ordering, Arc}, + time::Duration, +}; use engine_traits::{KvEngine, PerfContext, RaftEngine, WriteBatch, WriteOptions}; use kvproto::raft_cmdpb::{ @@ -63,6 +67,7 @@ pub use admin::{ RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, }; pub use control::ProposalControl; +use pd_client::{BucketMeta, BucketStat}; pub use write::{ SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, }; @@ -122,6 +127,7 @@ impl Peer { }; let logger = self.logger.clone(); let read_scheduler = self.storage().read_scheduler(); + let buckets = self.region_buckets_info().bucket_stat().clone(); let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( &store_ctx.cfg, self.peer().clone(), @@ -133,6 +139,7 @@ impl Peer { self.storage().apply_trace().log_recovery(), self.entry_storage().applied_term(), logger, + buckets, ); store_ctx @@ -353,7 +360,8 @@ impl Peer { AdminCmdResult::PrepareMerge(res) => self.on_apply_res_prepare_merge(ctx, res), } } - + self.region_buckets_info_mut() + .add_bucket_flow(&apply_res.bucket_stat); self.update_split_flow_control(&apply_res.metrics); self.update_stat(&apply_res.metrics); @@ -483,6 +491,14 @@ impl Apply { self.maybe_reschedule(written_bytes).await } + pub fn on_refresh_buckets(&mut self, meta: Arc) { + let mut new = BucketStat::from_meta(meta); + if let Some(origin) = self.buckets.as_ref() { + new.merge(origin); + } + self.buckets.replace(new); + } + #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); @@ -727,8 +743,12 @@ impl Apply { apply_res.admin_result = self.take_admin_result().into_boxed_slice(); apply_res.modifications = *self.modifications_mut(); apply_res.metrics = mem::take(&mut self.metrics); + apply_res.bucket_stat = self.buckets.clone(); let written_bytes = apply_res.metrics.written_bytes; self.res_reporter().report(apply_res); + if let Some(buckets) = &mut self.buckets { + buckets.clear_stats(); + } // Report result first and then invoking callbacks. This may delays callback a // little bit, but can make sure all following messages must see the side diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 988b7cf4b2d..ca4c7152364 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -140,6 +140,9 @@ impl Apply { return Ok(()); } util::check_key_in_region(key, self.region())?; + if let Some(s) = self.buckets.as_mut() { + s.write_key(key, value.len() as u64); + } // Technically it's OK to remove prefix for raftstore v2. But rocksdb doesn't // support specifying infinite upper bound in various APIs. keys::data_key_with_buffer(key, &mut self.key_buffer); @@ -183,6 +186,9 @@ impl Apply { return Ok(()); } util::check_key_in_region(key, self.region())?; + if let Some(s) = self.buckets.as_mut() { + s.write_key(key, 0); + } keys::data_key_with_buffer(key, &mut self.key_buffer); self.ensure_write_buffer(); let res = if cf.is_empty() || cf == CF_DEFAULT { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 0ba7de2c3e5..68acac6668b 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -19,6 +19,7 @@ pub use ready::{ }; pub(crate) use self::{ + bucket::BucketStatsInfo, command::SplitInit, query::{LocalReader, ReadDelegatePair, SharedReadTablet}, txn_ext::TxnContext, diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index a7af3c470ae..b4109fd9de0 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -6,6 +6,7 @@ use engine_traits::{ FlushState, KvEngine, PerfContextKind, TabletRegistry, WriteBatch, DATA_CFS_LEN, }; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; +use pd_client::BucketStat; use raftstore::store::{ fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, Config, ReadTask, @@ -58,6 +59,7 @@ pub struct Apply { read_scheduler: Scheduler>, pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, + pub(crate) buckets: Option, } impl Apply { @@ -73,6 +75,7 @@ impl Apply { log_recovery: Option>, applied_term: u64, logger: Logger, + buckets: Option, ) -> Self { let mut remote_tablet = tablet_registry .get(region_state.get_region().get_id()) @@ -103,6 +106,7 @@ impl Apply { log_recovery, metrics: ApplyMetrics::default(), logger, + buckets, } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index bcf92471ebe..e510c85cbf9 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -15,7 +15,6 @@ use kvproto::{ pdpb, raft_serverpb::{RaftMessage, RegionLocalState}, }; -use pd_client::BucketStat; use raft::{RawNode, StateRole}; use raftstore::{ coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, @@ -32,8 +31,8 @@ use super::storage::Storage; use crate::{ fsm::ApplyScheduler, operation::{ - AsyncWriter, CompactLogContext, DestroyProgress, GcPeerContext, MergeContext, - ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, TxnContext, + AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, GcPeerContext, + MergeContext, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, TxnContext, }, router::{CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -83,9 +82,7 @@ pub struct Peer { read_progress: Arc, leader_lease: Lease, - /// region buckets. - region_buckets: Option, - last_region_buckets: Option, + region_buckets_info: BucketStatsInfo, /// Transaction extensions related to this peer. txn_context: TxnContext, @@ -184,8 +181,7 @@ impl Peer { cfg.raft_store_max_leader_lease(), cfg.renew_leader_lease_advance_duration(), ), - region_buckets: None, - last_region_buckets: None, + region_buckets_info: BucketStatsInfo::default(), txn_context: TxnContext::default(), proposal_control: ProposalControl::new(0), pending_ticks: Vec::new(), @@ -217,22 +213,12 @@ impl Peer { Ok(peer) } - #[inline] - pub fn region_buckets(&self) -> &Option { - &self.region_buckets - } - - #[inline] - pub fn set_region_buckets(&mut self, buckets: Option) { - if let Some(b) = self.region_buckets.take() { - self.last_region_buckets = Some(b); - } - self.region_buckets = buckets; + pub fn region_buckets_info_mut(&mut self) -> &mut BucketStatsInfo { + &mut self.region_buckets_info } - #[inline] - pub fn last_region_buckets(&self) -> &Option { - &self.last_region_buckets + pub fn region_buckets_info(&self) -> &BucketStatsInfo { + &self.region_buckets_info } #[inline] @@ -680,7 +666,7 @@ impl Peer { #[inline] pub fn post_split(&mut self) { - self.set_region_buckets(None); + self.region_buckets_info_mut().set_bucket_stat(None); } pub fn maybe_campaign(&mut self) -> bool { @@ -716,7 +702,10 @@ impl Peer { self.txn_context.extra_op().clone(), self.txn_context.ext().clone(), self.read_progress().clone(), - self.region_buckets.as_ref().map(|b| b.meta.clone()), + self.region_buckets_info() + .bucket_stat() + .as_ref() + .map(|b| b.meta.clone()), ) } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index ce15ac20621..8f9fe2d8947 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -507,6 +507,7 @@ mod tests { None, 5, logger, + None, ); // Test get snapshot diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 092e7e21b5f..764e8df7dfd 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use pd_client::{BucketMeta, BucketStat}; use raftstore::store::fsm::ApplyMetrics; use crate::operation::{AdminCmdResult, CommittedEntries, DataTrace, GenSnapTask}; @@ -11,6 +12,7 @@ pub enum ApplyTask { /// Writes that doesn't care consistency. UnsafeWrite(Box<[u8]>), ManualFlush, + RefreshBucketStat(std::sync::Arc), } #[derive(Debug, Default)] @@ -20,4 +22,5 @@ pub struct ApplyRes { pub admin_result: Box<[AdminCmdResult]>, pub modifications: DataTrace, pub metrics: ApplyMetrics, + pub bucket_stat: Option, } diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index bca48412aa6..e825dd54c32 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -66,8 +66,7 @@ impl ReportBucket { } } - fn report(&mut self, report_ts: UnixSecs) -> BucketStat { - self.last_report_ts = report_ts; + fn report(&mut self) -> BucketStat { match self.last_report_stat.replace(self.current_stat.clone()) { Some(last) => { let mut delta = BucketStat::from_meta(self.current_stat.meta.clone()); @@ -350,7 +349,8 @@ where }; let now = UnixSecs::now(); let interval_second = now.into_inner() - last_report_ts.into_inner(); - let delta = report_buckets.report(now); + report_buckets.last_report_ts = now; + let delta = report_buckets.report(); let resp = self .pd_client .report_region_buckets(&delta, Duration::from_secs(interval_second)); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 065d032eaa2..264d127cc8c 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -503,8 +503,8 @@ impl Cluster { Cluster::with_configs(count, config, None) } - pub fn with_cop_cfg(coprocessor_cfg: CopConfig) -> Cluster { - Cluster::with_configs(1, None, Some(coprocessor_cfg)) + pub fn with_cop_cfg(config: Option, coprocessor_cfg: CopConfig) -> Cluster { + Cluster::with_configs(1, config, Some(coprocessor_cfg)) } pub fn with_configs(count: usize, config: Option, cop_cfg: Option) -> Self { diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs index 56159538836..b9dea63bbfe 100644 --- a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -13,7 +13,7 @@ use raftstore_v2::{ }; use tikv_util::{config::ReadableSize, store::new_peer}; -use crate::cluster::Cluster; +use crate::cluster::{v2_default_config, Cluster}; #[test] fn test_region_heartbeat() { @@ -74,7 +74,9 @@ fn test_report_buckets() { let mut cop_cfg = CopConfig::default(); cop_cfg.enable_region_bucket = Some(true); cop_cfg.region_bucket_size = ReadableSize::kb(1); - let cluster = Cluster::with_cop_cfg(cop_cfg); + let mut config = v2_default_config(); + config.region_split_check_diff = Some(ReadableSize::kb(1)); + let cluster = Cluster::with_cop_cfg(Some(config), cop_cfg); let store_id = cluster.node(0).id(); let router = &cluster.routers[0]; @@ -92,20 +94,13 @@ fn test_report_buckets() { router.wait_applied_to_current_term(region_id, Duration::from_secs(3)); // load data to split bucket. - let header = Box::new(router.new_request_for(region_id).take_header()); let mut suffix = String::from(""); for _ in 0..200 { suffix.push_str("fake "); } - for i in 0..10 { - let mut put = SimpleWriteEncoder::with_capacity(64); - let mut key = format!("key-{}", i); - key.push_str(&suffix); - put.put(CF_DEFAULT, key.as_bytes(), b"value"); - let (msg, sub) = PeerMsg::simple_write(header.clone(), put.clone().encode()); - router.send(region_id, msg).unwrap(); - let _resp = block_on(sub.result()).unwrap(); - } + + let repeat: u64 = 10; + let bytes = write_keys(&cluster, region_id, &suffix, repeat.try_into().unwrap()); // To find the split keys, it should flush memtable manually. let mut cached = cluster.node(0).tablet_registry().get(region_id).unwrap(); cached.latest().unwrap().flush_cf(CF_DEFAULT, true).unwrap(); @@ -126,6 +121,12 @@ fn test_report_buckets() { if let Some(buckets) = resp { assert!(buckets.get_keys().len() > 2); assert_eq!(buckets.get_region_id(), region_id); + let write_bytes = buckets.get_stats().get_write_bytes(); + let write_keys = buckets.get_stats().get_write_keys(); + for i in 0..buckets.keys.len() - 1 { + assert!(write_bytes[i] >= bytes); + assert!(write_keys[i] >= repeat); + } for i in 0..buckets.keys.len() - 1 { buckets_tmp.push(raftstore::store::Bucket::default()); let bucket_range = @@ -134,6 +135,23 @@ fn test_report_buckets() { } } + // report buckets to pd again, the write bytes and keys should be zero. + router + .send(region_id, PeerMsg::Tick(PeerTick::ReportBuckets)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + + let resp = block_on(cluster.node(0).pd_client().get_buckets_by_id(region_id)).unwrap(); + if let Some(buckets) = resp { + assert_eq!(buckets.get_region_id(), region_id); + let write_bytes = buckets.get_stats().get_write_bytes(); + let write_keys = buckets.get_stats().get_write_keys(); + for i in 0..buckets.keys.len() - 1 { + assert!(write_bytes[i] == 0); + assert!(write_keys[i] == 0); + } + } + // send the same region buckets to refresh which needs to merge the last. let resp = block_on(cluster.node(0).pd_client().get_region_by_id(region_id)).unwrap(); if let Some(region) = resp { @@ -148,4 +166,35 @@ fn test_report_buckets() { std::thread::sleep(std::time::Duration::from_millis(50)); } } + // report buckets to pd again, the write bytes and keys should be zero. + router + .send(region_id, PeerMsg::Tick(PeerTick::ReportBuckets)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + + let resp = block_on(cluster.node(0).pd_client().get_buckets_by_id(region_id)).unwrap(); + if let Some(buckets) = resp { + assert_eq!(buckets.get_region_id(), region_id); + let write_bytes = buckets.get_stats().get_write_bytes(); + let write_keys = buckets.get_stats().get_write_keys(); + assert_eq!(write_bytes.len(), 1); + assert_eq!(write_keys.len(), 1); + } + + fn write_keys(cluster: &Cluster, region_id: u64, suffix: &str, repeat: usize) -> u64 { + let router = &cluster.routers[0]; + let header = Box::new(router.new_request_for(region_id).take_header()); + for i in 0..repeat { + let mut put = SimpleWriteEncoder::with_capacity(64); + let mut key = format!("key-{}", i); + key.push_str(suffix); + put.put(CF_DEFAULT, key.as_bytes(), b"value"); + let (msg, sub) = PeerMsg::simple_write(header.clone(), put.clone().encode()); + router.send(region_id, msg).unwrap(); + let _resp = block_on(sub.result()).unwrap(); + } + ((suffix.as_bytes().len() + 10) * repeat) + .try_into() + .unwrap() + } } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 826537f4e44..022bd457cd5 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -967,8 +967,11 @@ where } let region = Arc::clone(&delegate.region); - let response = delegate.execute(&req, ®ion, None, Some(local_read_ctx)); - + let mut response = + delegate.execute(&req, ®ion, None, Some(local_read_ctx)); + if let Some(snap) = response.snapshot.as_mut() { + snap.bucket_meta = delegate.bucket_meta.clone(); + } // Try renew lease in advance delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); response @@ -992,8 +995,11 @@ where let region = Arc::clone(&delegate.region); // Getting the snapshot - let response = delegate.execute(&req, ®ion, None, Some(local_read_ctx)); - + let mut response = + delegate.execute(&req, ®ion, None, Some(local_read_ctx)); + if let Some(snap) = response.snapshot.as_mut() { + snap.bucket_meta = delegate.bucket_meta.clone(); + } // Double check in case `safe_ts` change after the first check and before // getting snapshot if let Err(resp) = delegate.check_stale_read_safe(read_ts) { From de78be91b59be3351e80c37380e16348108425de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 28 Feb 2023 10:27:08 +0800 Subject: [PATCH 0552/1149] log_backup: move all download request into the same runtime (#14286) ref hyperium/hyper#2112, close tikv/tikv#14285 Download tasks will executed in a tiny runtime for now. Signed-off-by: hillium --- components/file_system/src/io_stats/proc.rs | 5 +++ components/sst_importer/src/sst_importer.rs | 36 ++++++++++++++------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index 51c74ae56a8..652fe05c658 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -141,6 +141,11 @@ pub fn init() -> Result<(), String> { Ok(()) } +/// Bind I/O type for the current thread. +/// Following calls to the [`file_system`](crate) APIs would be throttled and +/// recorded via this information. +/// Generally, when you are creating new threads playing with the local disks, +/// you should call this before doing so. pub fn set_io_type(new_io_type: IoType) { IO_TYPE.with(|io_type| { if io_type.get() != new_io_type { diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 42a96e21652..0da45c195be 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -25,7 +25,7 @@ use engine_traits::{ use external_storage_export::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, }; -use file_system::{get_io_rate_limiter, OpenOptions}; +use file_system::{get_io_rate_limiter, IoType, OpenOptions}; use kvproto::{ brpb::{CipherInfo, StorageBackend}, import_sstpb::*, @@ -37,8 +37,7 @@ use tikv_util::{ stream_event::{EventEncoder, EventIterator, Iterator as EIterator}, }, config::ReadableSize, - stream::block_on_external_io, - sys::SysQuota, + sys::{thread::ThreadBuildWrapper, SysQuota}, time::{Instant, Limiter}, }; use tokio::runtime::{Handle, Runtime}; @@ -291,7 +290,20 @@ impl SstImporter { ) -> Result { let switcher = ImportModeSwitcher::new(cfg); let cached_storage = CacheMap::default(); - let download_rt = tokio::runtime::Builder::new_current_thread() + // We are going to run some background tasks here, (hyper needs to maintain the + // connection, the cache map needs gc intervally.) so we must create a + // multi-thread runtime, given there isn't blocking, a single thread runtime is + // enough. + let download_rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .thread_name("sst_import_misc") + .after_start_wrapper(|| { + tikv_alloc::add_thread_memory_accessor(); + file_system::set_io_type(IoType::Import); + }) + .before_stop_wrapper(|| { + tikv_alloc::remove_thread_memory_accessor(); + }) .enable_all() .build()?; download_rt.spawn(cached_storage.gc_loop()); @@ -788,13 +800,15 @@ impl SstImporter { encrypt_wrap_reader(file_crypter, inner)? }; - let r = block_on_external_io(external_storage_export::read_external_storage_info_buff( - &mut reader, - speed_limiter, - file_length, - expected_sha256, - external_storage_export::MIN_READ_SPEED, - )); + let r = + self.download_rt + .block_on(external_storage_export::read_external_storage_info_buff( + &mut reader, + speed_limiter, + file_length, + expected_sha256, + external_storage_export::MIN_READ_SPEED, + )); let url = ext_storage.url()?.to_string(); let buff = r.map_err(|e| Error::CannotReadExternalStorage { url: url.to_string(), From 192dff638d05724759e3dee642639a86b20e4565 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 28 Feb 2023 16:03:08 +0800 Subject: [PATCH 0553/1149] importer: use kv engine instead of raw API (#14294) ref tikv/tikv#12842 So that it can support both v1 and v2. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/raft_log_engine/src/engine.rs | 6 +- components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/batch/store.rs | 8 + components/raftstore-v2/src/fsm/apply.rs | 7 +- .../src/operation/command/admin/split.rs | 9 +- .../raftstore-v2/src/operation/command/mod.rs | 14 +- .../src/operation/command/write/mod.rs | 42 +- .../operation/command/write/simple_write.rs | 101 +++- components/raftstore-v2/src/operation/mod.rs | 17 + components/raftstore-v2/src/raft/apply.rs | 13 +- components/raftstore-v2/src/raft/storage.rs | 9 +- .../tests/integrations/cluster.rs | 12 + components/server/src/server.rs | 2 +- components/server/src/server2.rs | 47 +- components/test_raftstore-v2/src/cluster.rs | 4 + components/test_raftstore-v2/src/node.rs | 13 +- components/test_raftstore-v2/src/server.rs | 25 +- components/test_raftstore/src/server.rs | 4 +- components/tikv_kv/src/btree_engine.rs | 1 + components/tikv_kv/src/lib.rs | 30 +- src/import/sst_service.rs | 439 +++++++----------- src/server/gc_worker/gc_worker.rs | 2 + src/server/raftkv/mod.rs | 6 + src/server/raftkv2/mod.rs | 16 +- src/server/raftkv2/node.rs | 5 + src/storage/mvcc/reader/reader.rs | 1 + tests/integrations/server/kv_service.rs | 2 +- 28 files changed, 536 insertions(+), 301 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7add84159b6..2bd382ee8f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4384,6 +4384,7 @@ dependencies = [ "slog", "slog-global", "smallvec", + "sst_importer", "tempfile", "test_pd", "test_util", diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 92d7a4f7353..a9e75ca9580 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -337,10 +337,6 @@ impl RaftLogEngine { ))) } - pub fn path(&self) -> &str { - self.0.path() - } - /// If path is not an empty directory, we say db exists. pub fn exists(path: &str) -> bool { let path = Path::new(path); @@ -780,7 +776,7 @@ impl RaftEngine for RaftLogEngine { } fn get_engine_path(&self) -> &str { - self.path() + self.0.path() } fn for_each_raft_group(&self, f: &mut F) -> std::result::Result<(), E> diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index c7e403afebe..3dfeb512980 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -56,6 +56,7 @@ resource_control = { workspace = true } resource_metering = { workspace = true } slog = "2.3" smallvec = "1.4" +sst_importer = { workspace = true } thiserror = "1.0" tikv_util = { workspace = true } time = "0.1" diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1a507bb7f10..4693b0db369 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -32,6 +32,7 @@ use raftstore::{ }; use resource_metering::CollectorRegHandle; use slog::{warn, Logger}; +use sst_importer::SstImporter; use tikv_util::{ box_err, config::{Tracker, VersionTrack}, @@ -84,6 +85,7 @@ pub struct StoreContext { pub self_disk_usage: DiskUsage, pub snap_mgr: TabletSnapManager, + pub sst_importer: Arc, } impl StoreContext { @@ -277,6 +279,7 @@ struct StorePollerBuilder { store_meta: Arc>>, shutdown: Arc, snap_mgr: TabletSnapManager, + sst_importer: Arc, } impl StorePollerBuilder { @@ -293,6 +296,7 @@ impl StorePollerBuilder { shutdown: Arc, snap_mgr: TabletSnapManager, coprocessor_host: CoprocessorHost, + sst_importer: Arc, ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; let max_pool_size = std::cmp::max( @@ -318,6 +322,7 @@ impl StorePollerBuilder { snap_mgr, shutdown, coprocessor_host, + sst_importer, } } @@ -435,6 +440,7 @@ where self_disk_usage: DiskUsage::Normal, snap_mgr: self.snap_mgr.clone(), coprocessor_host: self.coprocessor_host.clone(), + sst_importer: self.sst_importer.clone(), }; poll_ctx.update_ticks_timeout(); let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); @@ -527,6 +533,7 @@ impl StoreSystem { collector_reg_handle: CollectorRegHandle, background: Worker, pd_worker: LazyWorker, + sst_importer: Arc, ) -> Result<()> where T: Transport + 'static, @@ -627,6 +634,7 @@ impl StoreSystem { self.shutdown.clone(), snap_mgr, coprocessor_host, + sst_importer, ); self.workers = Some(workers); self.schedulers = Some(schedulers); diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 67e8d557dd9..e1bf5169d55 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -13,6 +13,7 @@ use kvproto::{metapb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raftstore::store::{Config, ReadTask}; use slog::Logger; +use sst_importer::SstImporter; use tikv_util::{ mpsc::future::{self, Receiver, Sender, WakePolicy}, timer::GLOBAL_TIMER_HANDLE, @@ -68,8 +69,9 @@ impl ApplyFsm { flush_state: Arc, log_recovery: Option>, applied_term: u64, - logger: Logger, buckets: Option, + sst_importer: Arc, + logger: Logger, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); let apply = Apply::new( @@ -82,8 +84,9 @@ impl ApplyFsm { flush_state, log_recovery, applied_term, - logger, buckets, + sst_importer, + logger, ); ( ApplyScheduler { sender: tx }, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 260fb8700b8..b31fc7e7471 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -830,7 +830,10 @@ mod test { }; use super::*; - use crate::{fsm::ApplyResReporter, raft::Apply, router::ApplyRes}; + use crate::{ + fsm::ApplyResReporter, operation::test_util::create_tmp_importer, raft::Apply, + router::ApplyRes, + }; struct MockReporter { sender: Sender, @@ -961,6 +964,7 @@ mod test { let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); + let (_tmp_dir, importer) = create_tmp_importer(); let mut apply = Apply::new( &Config::default(), region @@ -976,8 +980,9 @@ mod test { Arc::new(FlushState::new(5)), None, 5, - logger.clone(), None, + importer, + logger.clone(), ); let mut splits = BatchSplitRequest::default(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 76a7741134e..ea8c8c227d0 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -43,7 +43,7 @@ use raftstore::{ }, Error, Result, }; -use slog::{info, warn}; +use slog::{error, info, warn}; use tikv_util::{ box_err, log::SlogFormat, @@ -138,8 +138,9 @@ impl Peer { self.flush_state().clone(), self.storage().apply_trace().log_recovery(), self.entry_storage().applied_term(), - logger, buckets, + store_ctx.sst_importer.clone(), + logger, ); store_ctx @@ -478,6 +479,12 @@ impl Apply { dr.notify_only, ); } + SimpleWrite::Ingest(_) => { + error!( + self.logger, + "IngestSST is not supposed to be called on local engine" + ); + } } } self.apply_flow_control_mut().need_flush = true; @@ -575,6 +582,9 @@ impl Apply { dr.notify_only, )?; } + SimpleWrite::Ingest(ssts) => { + self.apply_ingest(ssts)?; + } } } return res; diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index ca4c7152364..a461420f75b 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,20 +1,23 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{data_cf_offset, KvEngine, Mutable, RaftEngine, CF_DEFAULT}; -use kvproto::raft_cmdpb::RaftRequestHeader; +use kvproto::{import_sstpb::SstMeta, raft_cmdpb::RaftRequestHeader}; use raftstore::{ store::{ - cmd_resp, + check_sst_for_ingestion, cmd_resp, fsm::{apply, MAX_PROPOSAL_SIZE_RATIO}, + metrics::PEER_WRITE_CMD_COUNTER, msg::ErrorCallback, util::{self, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER}, }, Error, Result, }; +use slog::error; use tikv_util::slog_panic; use crate::{ batch::StoreContext, + fsm::ApplyResReporter, raft::{Apply, Peer}, router::{ApplyTask, CmdResChannel}, }; @@ -132,9 +135,10 @@ impl Peer { } } -impl Apply { +impl Apply { #[inline] pub fn apply_put(&mut self, cf: &str, index: u64, key: &[u8], value: &[u8]) -> Result<()> { + PEER_WRITE_CMD_COUNTER.put.inc(); let off = data_cf_offset(cf); if self.should_skip(off, index) { return Ok(()); @@ -181,6 +185,7 @@ impl Apply { #[inline] pub fn apply_delete(&mut self, cf: &str, index: u64, key: &[u8]) -> Result<()> { + PEER_WRITE_CMD_COUNTER.delete.inc(); let off = data_cf_offset(cf); if self.should_skip(off, index) { return Ok(()); @@ -228,4 +233,35 @@ impl Apply { // TODO: reuse the same delete as split/merge. Ok(()) } + + #[inline] + pub fn apply_ingest(&mut self, ssts: Vec) -> Result<()> { + PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); + let mut infos = Vec::with_capacity(ssts.len()); + for sst in &ssts { + if let Err(e) = check_sst_for_ingestion(sst, self.region()) { + error!( + self.logger, + "ingest fail"; + "sst" => ?sst, + "region" => ?self.region(), + "error" => ?e + ); + let _ = self.sst_importer().delete(sst); + return Err(e); + } + match self.sst_importer().validate(sst) { + Ok(meta_info) => infos.push(meta_info), + Err(e) => { + slog_panic!(self.logger, "corrupted sst"; "sst" => ?sst, "error" => ?e); + } + } + } + // Unlike v1, we can't batch ssts accross regions. + self.flush(); + if let Err(e) = self.sst_importer().ingest(&infos, self.tablet()) { + slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); + } + Ok(()) + } } diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index e6f81b20af1..cf267f854b7 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -1,7 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +use std::assert_matches::debug_assert_matches; + use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; -use kvproto::raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}; +use kvproto::{ + import_sstpb::SstMeta, + raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, +}; use protobuf::{CodedInputStream, Message}; use raftstore::store::WriteCallback; use slog::Logger; @@ -15,9 +20,16 @@ use crate::{operation::command::parse_at, router::CmdResChannel}; const MAGIC_PREFIX: u8 = 0x00; #[derive(Clone, Debug)] -#[repr(transparent)] pub struct SimpleWriteBinary { buf: Box<[u8]>, + write_type: WriteType, +} + +impl SimpleWriteBinary { + /// Freeze the binary will forbid further batching. + pub fn freeze(&mut self) { + self.write_type = WriteType::Unspecified; + } } /// We usually use `RaftCmdRequest` for read write request. But the codec is @@ -29,6 +41,7 @@ pub struct SimpleWriteReqEncoder { buf: Vec, channels: Vec, size_limit: usize, + write_type: WriteType, notify_proposed: bool, } @@ -53,19 +66,24 @@ impl SimpleWriteReqEncoder { buf, channels: vec![], size_limit, + write_type: bin.write_type, notify_proposed, } } - /// Encode the simple write into the buffer dispite header check. + /// Encode the simple write into the buffer. /// - /// Return false if the buffer limit is reached or the write can be amended. + /// Return false if the buffer limit is reached or the binary type not + /// match. #[inline] pub fn amend(&mut self, header: &RaftRequestHeader, bin: &SimpleWriteBinary) -> bool { if *self.header != *header { return false; } - if self.buf.len() + bin.buf.len() < self.size_limit { + if self.write_type == bin.write_type + && bin.write_type != WriteType::Unspecified + && self.buf.len() + bin.buf.len() < self.size_limit + { self.buf.extend_from_slice(&bin.buf); true } else { @@ -128,11 +146,21 @@ pub enum SimpleWrite<'a> { Put(Put<'a>), Delete(Delete<'a>), DeleteRange(DeleteRange<'a>), + Ingest(Vec), +} + +#[derive(Clone, Copy, Debug, PartialEq)] +enum WriteType { + Unspecified, + PutDelete, + DeleteRange, + Ingest, } #[derive(Clone)] pub struct SimpleWriteEncoder { buf: Vec, + write_type: WriteType, } impl SimpleWriteEncoder { @@ -140,21 +168,36 @@ impl SimpleWriteEncoder { pub fn with_capacity(cap: usize) -> SimpleWriteEncoder { SimpleWriteEncoder { buf: Vec::with_capacity(cap), + write_type: WriteType::Unspecified, } } #[inline] pub fn put(&mut self, cf: &str, key: &[u8], value: &[u8]) { + debug_assert_matches!( + self.write_type, + WriteType::Unspecified | WriteType::PutDelete + ); encode(SimpleWrite::Put(Put { cf, key, value }), &mut self.buf); + self.write_type = WriteType::PutDelete; } #[inline] pub fn delete(&mut self, cf: &str, key: &[u8]) { + debug_assert_matches!( + self.write_type, + WriteType::Unspecified | WriteType::PutDelete + ); encode(SimpleWrite::Delete(Delete { cf, key }), &mut self.buf); + self.write_type = WriteType::PutDelete; } #[inline] pub fn delete_range(&mut self, cf: &str, start_key: &[u8], end_key: &[u8], notify_only: bool) { + debug_assert_matches!( + self.write_type, + WriteType::Unspecified | WriteType::DeleteRange + ); encode( SimpleWrite::DeleteRange(DeleteRange { cf, @@ -164,12 +207,21 @@ impl SimpleWriteEncoder { }), &mut self.buf, ); + self.write_type = WriteType::DeleteRange; + } + + #[inline] + pub fn ingest(&mut self, sst: Vec) { + debug_assert_matches!(self.write_type, WriteType::Unspecified | WriteType::Ingest); + encode(SimpleWrite::Ingest(sst), &mut self.buf); + self.write_type = WriteType::Ingest; } #[inline] pub fn encode(self) -> SimpleWriteBinary { SimpleWriteBinary { buf: self.buf.into_boxed_slice(), + write_type: self.write_type, } } } @@ -228,6 +280,7 @@ impl<'a> Iterator for SimpleWriteReqDecoder<'a> { const PUT_TAG: u8 = 0; const DELETE_TAG: u8 = 1; const DELETE_RANGE_TAG: u8 = 2; +const INGEST_TAG: u8 = 3; const DEFAULT_CF_TAG: u8 = 0; const WRITE_CF_TAG: u8 = 1; @@ -353,6 +406,14 @@ fn encode(simple_write: SimpleWrite<'_>, buf: &mut Vec) { encode_bytes(dr.end_key, buf); buf.push(dr.notify_only as u8); } + SimpleWrite::Ingest(ssts) => { + buf.push(INGEST_TAG); + encode_len(ssts.len() as u32, buf); + // IngestSST is not a frequent operation, use protobuf to reduce complexity. + for sst in ssts { + sst.write_length_delimited_to_vec(buf).unwrap(); + } + } } } @@ -386,12 +447,28 @@ fn decode<'a>(buf: &mut &'a [u8]) -> Option> { notify_only: *notify_only != 0, })) } + INGEST_TAG => { + let (len, left) = decode_len(left); + let mut ssts = Vec::with_capacity(len as usize); + let mut is = CodedInputStream::from_bytes(left); + for _ in 0..len { + let sst = match is.read_message() { + Ok(sst) => sst, + Err(e) => panic!("data corrupted {:?}", e), + }; + ssts.push(sst); + } + *buf = left; + Some(SimpleWrite::Ingest(ssts)) + } tag => panic!("corrupted data: invalid tag {}", tag), } } #[cfg(test)] mod tests { + use std::assert_matches::assert_matches; + use kvproto::raft_cmdpb::{CmdType, Request}; use slog::o; @@ -412,7 +489,9 @@ mod tests { let mut encoder = SimpleWriteEncoder::with_capacity(512); encoder.delete_range(CF_LOCK, b"key", b"key", true); encoder.delete_range("cf", b"key", b"key", false); - req_encoder.amend(&header, &encoder.encode()); + let bin = encoder.encode(); + assert!(!req_encoder.amend(&header, &bin)); + let req_encoder2 = SimpleWriteReqEncoder::new(header.clone(), bin, 0, false); let (bytes, _) = req_encoder.encode(); let logger = slog_global::borrow_global().new(o!()); @@ -428,7 +507,10 @@ mod tests { let SimpleWrite::Delete(delete) = write else { panic!("should be delete") }; assert_eq!(delete.cf, CF_WRITE); assert_eq!(delete.key, &delete_key); + assert_matches!(decoder.next(), None); + let (bytes, _) = req_encoder2.encode(); + decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; assert_eq!(dr.cf, CF_LOCK); @@ -500,6 +582,13 @@ mod tests { // Only simple write command with same header can be batched. assert!(!req_encoder.amend(&header2, &bin)); + let mut bin2 = bin.clone(); + bin2.freeze(); + // Frozen bin can't be merged with other bin. + assert!(!req_encoder.amend(&header, &bin2)); + let mut req_encoder2 = SimpleWriteReqEncoder::new(header.clone(), bin2.clone(), 512, false); + assert!(!req_encoder2.amend(&header, &bin)); + // Batch should not excceed max size limit. let large_value = vec![0; 512]; let mut encoder = SimpleWriteEncoder::with_capacity(512); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 68acac6668b..5514d966cea 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -24,3 +24,20 @@ pub(crate) use self::{ query::{LocalReader, ReadDelegatePair, SharedReadTablet}, txn_ext::TxnContext, }; + +#[cfg(test)] +pub mod test_util { + use std::sync::Arc; + + use kvproto::kvrpcpb::ApiVersion; + use sst_importer::SstImporter; + use tempfile::TempDir; + + pub fn create_tmp_importer() -> (TempDir, Arc) { + let dir = TempDir::new().unwrap(); + let importer = Arc::new( + SstImporter::new(&Default::default(), dir.path(), None, ApiVersion::V1).unwrap(), + ); + (dir, importer) + } +} diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index b4109fd9de0..5e7c7e84f84 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -12,6 +12,7 @@ use raftstore::store::{ Config, ReadTask, }; use slog::Logger; +use sst_importer::SstImporter; use tikv_util::{log::SlogFormat, worker::Scheduler}; use crate::{ @@ -57,6 +58,7 @@ pub struct Apply { res_reporter: R, read_scheduler: Scheduler>, + sst_importer: Arc, pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, pub(crate) buckets: Option, @@ -74,8 +76,9 @@ impl Apply { flush_state: Arc, log_recovery: Option>, applied_term: u64, - logger: Logger, buckets: Option, + sst_importer: Arc, + logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry .get(region_state.get_region().get_id()) @@ -105,8 +108,9 @@ impl Apply { flush_state, log_recovery, metrics: ApplyMetrics::default(), - logger, buckets, + sst_importer, + logger, } } @@ -260,4 +264,9 @@ impl Apply { pub fn apply_flow_control(&self) -> &ApplyFlowControl { &self.flow_control } + + #[inline] + pub fn sst_importer(&self) -> &SstImporter { + &self.sst_importer + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 8f9fe2d8947..d386ed0acae 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -339,7 +339,10 @@ mod tests { use super::*; use crate::{ - fsm::ApplyResReporter, operation::write_initial_states, raft::Apply, router::ApplyRes, + fsm::ApplyResReporter, + operation::{test_util::create_tmp_importer, write_initial_states}, + raft::Apply, + router::ApplyRes, }; #[derive(Clone)] @@ -495,6 +498,7 @@ mod tests { worker.start(read_runner); let mut state = RegionLocalState::default(); state.set_region(region.clone()); + let (_tmp_dir, importer) = create_tmp_importer(); // setup peer applyer let mut apply = Apply::new( &Config::default(), @@ -506,8 +510,9 @@ mod tests { Arc::new(FlushState::new(5)), None, 5, - logger, None, + importer, + logger, ); // Test get snapshot diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 264d127cc8c..451f7131cc9 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -23,6 +23,7 @@ use engine_test::{ use engine_traits::{TabletContext, TabletRegistry, DATA_CFS}; use futures::executor::block_on; use kvproto::{ + kvrpcpb::ApiVersion, metapb::{self, RegionEpoch, Store}, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, Request}, raft_serverpb::RaftMessage, @@ -44,6 +45,7 @@ use raftstore_v2::{ }; use resource_metering::CollectorRegHandle; use slog::{debug, o, Logger}; +use sst_importer::SstImporter; use tempfile::TempDir; use test_pd::mocker::Service; use tikv_util::{ @@ -297,6 +299,15 @@ impl RunningState { let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()).unwrap(); let coprocessor_host = CoprocessorHost::new(router.store_router().clone(), cop_cfg.value().clone()); + let importer = Arc::new( + SstImporter::new( + &Default::default(), + path.join("importer"), + None, + ApiVersion::V1, + ) + .unwrap(), + ); let background = Worker::new("background"); let pd_worker = LazyWorker::new("pd-worker"); @@ -318,6 +329,7 @@ impl RunningState { CollectorRegHandle::new_for_test(), background.clone(), pd_worker, + importer, ) .unwrap(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 5ba70b5db5a..2cde9e9cb78 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1245,7 +1245,7 @@ where let import_service = ImportSstService::new( self.config.import.clone(), self.config.raft_store.raft_entry_max_size, - self.router.clone(), + engines.engine.clone(), engines.engines.kv.clone(), servers.importer.clone(), ); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 7b391c20bb8..01a76dfffbc 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -31,7 +31,7 @@ use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{ - flush_engine_statistics, + flush_engine_statistics, from_rocks_compression_type, raw::{Cache, Env}, FlowInfo, RocksEngine, RocksStatistics, }; @@ -73,6 +73,7 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, + import::SstImporter, read_pool::{ build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, }, @@ -243,6 +244,7 @@ struct TikvEngines { struct Servers { lock_mgr: LockManager, server: LocalServer, + _importer: Arc, rsmeter_pubsub_service: resource_metering::PubSubService, } @@ -871,6 +873,30 @@ where )), ); + let import_path = self.store_path.join("import"); + let mut importer = SstImporter::new( + &self.config.import, + import_path, + self.encryption_key_manager.clone(), + self.config.storage.api_version(), + ) + .unwrap(); + for (cf_name, compression_type) in &[ + ( + CF_DEFAULT, + self.config.rocksdb.defaultcf.bottommost_level_compression, + ), + ( + CF_WRITE, + self.config.rocksdb.writecf.bottommost_level_compression, + ), + ] { + importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); + } + let importer = Arc::new(importer); + + // V2 starts split-check worker within raftstore. + let split_config_manager = SplitConfigManager::new(Arc::new(VersionTrack::new(self.config.split.clone()))); cfg_controller.register( @@ -919,6 +945,7 @@ where pd_worker, raft_store, &state, + importer.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); @@ -942,6 +969,7 @@ where self.servers = Some(Servers { lock_mgr, server, + _importer: importer, rsmeter_pubsub_service, }); @@ -950,6 +978,23 @@ where fn register_services(&mut self) { let servers = self.servers.as_mut().unwrap(); + let _engines = self.engines.as_ref().unwrap(); + + // Import SST service. + // let import_service = ImportSstService::new( + // self.config.import.clone(), + // self.config.raft_store.raft_entry_max_size, + // engines.engine.clone(), + // self.tablet_registry.as_ref().unwrap().clone(), + // servers.importer.clone(), + // ); + // if servers + // .server + // .register_service(create_import_sst(import_service)) + // .is_some() + // { + // fatal!("failed to register import service"); + // } // Create Diagnostics service let diag_service = DiagnosticsService::new( diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index b9d057d33c5..30d3456d652 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -74,6 +74,7 @@ pub trait Simulator { node_id: u64, cfg: Config, store_meta: Arc>>, + key_mgr: Option>, raft_engine: RaftTestEngine, tablet_registry: TabletRegistry, resource_manager: &Option>, @@ -383,6 +384,7 @@ impl Cluster { id, self.cfg.clone(), store_meta.clone(), + key_mgr.clone(), raft_engine.clone(), tablet_registry.clone(), &self.resource_manager, @@ -424,10 +426,12 @@ impl Cluster { tikv_util::thread_group::set_properties(Some(props)); debug!("calling run node"; "node_id" => node_id); + let key_mgr = self.key_managers_map.get(&node_id).unwrap().clone(); self.sim.wl().run_node( node_id, cfg, store_meta, + key_mgr, raft_engine, tablet_registry, &self.resource_manager, diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 6c71e2d9cdc..0fde6ba42c5 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -1,15 +1,17 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + path::Path, sync::{Arc, Mutex, RwLock}, time::Duration, }; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; +use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{RaftEngineReadOnly, TabletRegistry}; +use engine_traits::{RaftEngine, RaftEngineReadOnly, TabletRegistry}; use kvproto::{ kvrpcpb::ApiVersion, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, @@ -36,6 +38,7 @@ use test_pd_client::TestPdClient; use test_raftstore::{Config, Filter}; use tikv::{ config::{ConfigController, Module}, + import::SstImporter, server::{ raftkv::ReplicaReadLockChecker, tablet_snap::copy_tablet_snapshot, NodeV2, Result as ServerResult, @@ -187,6 +190,7 @@ impl Simulator for NodeCluster { node_id: u64, cfg: Config, store_meta: Arc>>, + key_manager: Option>, raft_engine: RaftTestEngine, tablet_registry: TabletRegistry, _resource_manager: &Option>, @@ -265,6 +269,12 @@ impl Simulator for NodeCluster { // todo: Is None sufficient for test? None, ); + let importer = { + let dir = Path::new(raft_engine.get_engine_path()).join("../import-sst"); + Arc::new( + SstImporter::new(&cfg.import, dir, key_manager, cfg.storage.api_version()).unwrap(), + ) + }; let bg_worker = WorkerBuilder::new("background").thread_count(2).create(); let state: Arc> = Arc::default(); @@ -283,6 +293,7 @@ impl Simulator for NodeCluster { pd_worker, Arc::new(VersionTrack::new(raft_store)), &state, + importer, )?; assert!( raft_engine diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index d02dffa73fc..1aa3bfc47f8 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + path::Path, sync::{Arc, Mutex, RwLock}, thread, time::Duration, @@ -10,9 +11,10 @@ use api_version::{dispatch_api_version, KvFormat}; use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; +use encryption_export::DataKeyManager; use engine_rocks::{RocksEngine, RocksSnapshot}; use engine_test::raft::RaftTestEngine; -use engine_traits::{KvEngine, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use futures::executor::block_on; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; @@ -45,6 +47,7 @@ use test_pd_client::TestPdClient; use test_raftstore::{AddressMap, Config}; use tikv::{ coprocessor, coprocessor_v2, + import::SstImporter, read_pool::ReadPool, server::{ gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, @@ -168,6 +171,7 @@ impl ServerCluster { node_id: u64, mut cfg: Config, store_meta: Arc>>, + key_manager: Option>, raft_engine: RaftTestEngine, tablet_registry: TabletRegistry, resource_manager: &Option>, @@ -317,7 +321,20 @@ impl ServerCluster { ReplicaReadLockChecker::new(concurrency_manager.clone()).register(&mut coprocessor_host); - // todo: Import Sst Service + // Create import service. + let importer = { + let dir = Path::new(raft_engine.get_engine_path()).join("../import-sst"); + Arc::new( + SstImporter::new(&cfg.import, dir, key_manager, cfg.storage.api_version()).unwrap(), + ) + }; + // let import_service = ImportSstService::new( + // cfg.import.clone(), + // cfg.raft_store.raft_entry_max_size, + // raft_kv_2.clone(), + // tablet_registry.clone(), + // Arc::clone(&importer), + // ); // Create deadlock service. let deadlock_service = lock_mgr.deadlock_service(); @@ -382,6 +399,7 @@ impl ServerCluster { .unwrap(); svr.register_service(create_diagnostics(diag_service.clone())); svr.register_service(create_deadlock(deadlock_service.clone())); + // svr.register_service(create_import_sst(import_service.clone())); if let Some(svcs) = self.pending_services.get(&node_id) { for fact in svcs { svr.register_service(fact()); @@ -428,6 +446,7 @@ impl ServerCluster { pd_worker, Arc::new(VersionTrack::new(raft_store)), &state, + importer, )?; assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); @@ -538,6 +557,7 @@ impl Simulator for ServerCluster { node_id: u64, cfg: Config, store_meta: Arc>>, + key_manager: Option>, raft_engine: RaftTestEngine, tablet_registry: TabletRegistry, resource_manager: &Option>, @@ -548,6 +568,7 @@ impl Simulator for ServerCluster { node_id, cfg, store_meta, + key_manager, raft_engine, tablet_registry, resource_manager diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 3f6b704687a..8c2297fbc45 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -404,7 +404,7 @@ impl ServerCluster { )); let extension = engine.raft_extension(); let store = Storage::<_, _, F>::from_engine( - engine, + engine.clone(), &cfg.storage, storage_read_pool.handle(), lock_mgr.clone(), @@ -440,7 +440,7 @@ impl ServerCluster { let import_service = ImportSstService::new( cfg.import.clone(), cfg.raft_store.raft_entry_max_size, - sim_router.clone(), + engine, engines.kv.clone(), Arc::clone(&importer), ); diff --git a/components/tikv_kv/src/btree_engine.rs b/components/tikv_kv/src/btree_engine.rs index 35f666896f3..336523dd60c 100644 --- a/components/tikv_kv/src/btree_engine.rs +++ b/components/tikv_kv/src/btree_engine.rs @@ -290,6 +290,7 @@ fn write_modifies(engine: &BTreeEngine, modifies: Vec) -> EngineResult<( cf_tree.write().unwrap().insert(k, v); } Modify::DeleteRange(_cf, _start_key, _end_key, _notify_only) => unimplemented!(), + Modify::Ingest(_) => unimplemented!(), }; } Ok(()) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 02bfc1c9c55..22b11e425c5 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -43,6 +43,7 @@ use futures::{compat::Future01CompatExt, future::BoxFuture, prelude::*}; use into_other::IntoOther; use kvproto::{ errorpb::Error as ErrorHeader, + import_sstpb::SstMeta, kvrpcpb::{Context, DiskFullOpt, ExtraOp as TxnExtraOp, KeyRange}, raft_cmdpb, }; @@ -80,6 +81,7 @@ pub enum Modify { PessimisticLock(Key, PessimisticLock), // cf_name, start_key, end_key, notify_only DeleteRange(CfName, Key, Key, bool), + Ingest(Box), } impl Modify { @@ -88,7 +90,7 @@ impl Modify { Modify::Delete(cf, _) => cf, Modify::Put(cf, ..) => cf, Modify::PessimisticLock(..) => &CF_LOCK, - Modify::DeleteRange(..) => unreachable!(), + Modify::DeleteRange(..) | Modify::Ingest(_) => unreachable!(), }; let cf_size = if cf == &CF_DEFAULT { 0 } else { cf.len() }; @@ -96,7 +98,7 @@ impl Modify { Modify::Delete(_, k) => cf_size + k.as_encoded().len(), Modify::Put(_, k, v) => cf_size + k.as_encoded().len() + v.len(), Modify::PessimisticLock(k, _) => cf_size + k.as_encoded().len(), // FIXME: inaccurate - Modify::DeleteRange(..) => unreachable!(), + Modify::DeleteRange(..) | Modify::Ingest(_) => unreachable!(), } } @@ -105,7 +107,7 @@ impl Modify { Modify::Delete(_, ref k) => k, Modify::Put(_, ref k, _) => k, Modify::PessimisticLock(ref k, _) => k, - Modify::DeleteRange(..) => unreachable!(), + Modify::DeleteRange(..) | Modify::Ingest(_) => unreachable!(), } } } @@ -151,6 +153,10 @@ impl From for raft_cmdpb::Request { req.set_cmd_type(raft_cmdpb::CmdType::DeleteRange); req.set_delete_range(delete_range); } + Modify::Ingest(sst) => { + req.set_cmd_type(raft_cmdpb::CmdType::IngestSst); + req.mut_ingest_sst().set_sst(*sst); + } }; req } @@ -191,6 +197,10 @@ impl From for Modify { delete_range.get_notify_only(), ) } + raft_cmdpb::CmdType::IngestSst => { + let sst = req.mut_ingest_sst().take_sst(); + Modify::Ingest(Box::new(sst)) + } _ => { unimplemented!() } @@ -220,6 +230,7 @@ pub struct WriteData { pub extra: TxnExtra, pub deadline: Option, pub disk_full_opt: DiskFullOpt, + pub avoid_batch: bool, } impl WriteData { @@ -229,6 +240,7 @@ impl WriteData { extra, deadline: None, disk_full_opt: DiskFullOpt::NotAllowedOnFull, + avoid_batch: false, } } @@ -251,9 +263,18 @@ impl WriteData { pub fn set_disk_full_opt(&mut self, level: DiskFullOpt) { self.disk_full_opt = level } + + /// Underlying engine may batch up several requests to increase throughput. + /// + /// If external correctness depends on isolation of requests, you may need + /// to set this flag to true. + pub fn set_avoid_batch(&mut self, avoid_batch: bool) { + self.avoid_batch = avoid_batch + } } /// Events that can subscribed from the `WriteSubscriber`. +#[derive(Debug)] pub enum WriteEvent { Proposed, Committed, @@ -746,6 +767,9 @@ pub fn write_modifies(kv_engine: &impl LocalEngine, modifies: Vec) -> Re Ok(()) } } + Modify::Ingest(_) => { + unimplemented!("IngestSST is not implemented for local engine yet.") + } }; // TODO: turn the error into an engine error. if let Err(msg) = res { diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 08eabe32f0c..b589da50b76 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -9,9 +9,9 @@ use std::{ }; use collections::HashSet; -use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; +use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; -use futures::{sink::SinkExt, stream::TryStreamExt, TryFutureExt}; +use futures::{sink::SinkExt, stream::TryStreamExt, Stream, StreamExt, TryFutureExt}; use futures_executor::{ThreadPool, ThreadPoolBuilder}; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, @@ -24,20 +24,15 @@ use kvproto::{ SwitchMode, WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, - raft_cmdpb::{CmdType, DeleteRequest, PutRequest, RaftCmdRequest, RaftRequestHeader, Request}, -}; -use protobuf::Message; -use raftstore::{ - router::RaftStoreRouter, - store::{Callback, RaftCmdExtraOpts, RegionSnapshot}, }; use sst_importer::{ error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, Error, Result, SstImporter, }; +use tikv_kv::{Engine, Modify, SnapContext, Snapshot, SnapshotExt, WriteData, WriteEvent}; use tikv_util::{ config::ReadableSize, - future::{create_stream_with_buffer, paired_future_callback}, + future::create_stream_with_buffer, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, }; @@ -45,22 +40,41 @@ use tokio::{runtime::Runtime, time::sleep}; use txn_types::{Key, WriteRef, WriteType}; use super::make_rpc_error; -use crate::{import::duplicate_detect::DuplicateDetector, server::CONFIG_ROCKSDB_GAUGE}; +use crate::{ + import::duplicate_detect::DuplicateDetector, + server::CONFIG_ROCKSDB_GAUGE, + storage::{self, errors::extract_region_error_from_error}, +}; const MAX_INFLIGHT_RAFT_MSGS: usize = 64; +fn transfer_error(err: storage::Error) -> ImportPbError { + let mut e = ImportPbError::default(); + if let Some(region_error) = extract_region_error_from_error(&err) { + e.set_store_error(region_error); + } + e.set_message(format!("failed to complete raft command: {:?}", err)); + e +} + +async fn wait_write(mut s: impl Stream + Send + Unpin) -> storage::Result<()> { + match s.next().await { + Some(WriteEvent::Finished(Ok(()))) => Ok(()), + Some(WriteEvent::Finished(Err(e))) => Err(e.into()), + Some(e) => Err(box_err!("unexpected event: {:?}", e)), + None => Err(box_err!("stream closed")), + } +} + /// ImportSstService provides tikv-server with the ability to ingest SST files. /// /// It saves the SST sent from client to a file and then sends a command to /// raftstore to trigger the ingest process. #[derive(Clone)] -pub struct ImportSstService -where - E: KvEngine, -{ +pub struct ImportSstService { cfg: Config, + tablet_registry: E::Local, engine: E, - router: Router, threads: Arc, // For now, PiTR cannot be executed in the tokio runtime because it is synchronous and may // blocks. (tokio is so strict... it panics if we do insane things like blocking in an async @@ -74,36 +88,29 @@ where raft_entry_max_size: ReadableSize, } -pub struct SnapshotResult { - snapshot: RegionSnapshot, - term: u64, -} - struct RequestCollector { - context: Context, max_raft_req_size: usize, /// Retain the last ts of each key in each request. /// This is used for write CF because resolved ts observer hates duplicated /// key in the same request. - write_reqs: HashMap, (Request, u64)>, + write_reqs: HashMap, (Modify, u64)>, /// Collector favor that simple collect all items, and it do not contains /// duplicated key-value. This is used for default CF. - default_reqs: HashMap, Request>, + default_reqs: HashMap, Modify>, /// Size of all `Request`s. unpacked_size: usize, - pending_raft_reqs: Vec, + pending_writes: Vec, } impl RequestCollector { - fn new(context: Context, max_raft_req_size: usize) -> Self { + fn new(max_raft_req_size: usize) -> Self { Self { - context, max_raft_req_size, write_reqs: HashMap::default(), default_reqs: HashMap::default(), unpacked_size: 0, - pending_raft_reqs: Vec::new(), + pending_writes: Vec::new(), } } @@ -113,41 +120,37 @@ impl RequestCollector { if k.is_empty() || (!is_delete && v.is_empty()) { return; } - let mut req = Request::default(); - if is_delete { - let mut del = DeleteRequest::default(); - del.set_key(k); - del.set_cf(cf.to_string()); - req.set_cmd_type(CmdType::Delete); - req.set_delete(del); + // Filter out not supported CF. + let cf = match cf { + CF_WRITE => CF_WRITE, + CF_DEFAULT => CF_DEFAULT, + _ => return, + }; + let m = if is_delete { + Modify::Delete(cf, Key::from_encoded(k)) } else { if cf == CF_WRITE && !write_needs_restore(&v) { return; } - let mut put = PutRequest::default(); - put.set_key(k); - put.set_value(v); - put.set_cf(cf.to_string()); - req.set_cmd_type(CmdType::Put); - req.set_put(put); - } - self.accept(cf, req); + Modify::Put(cf, Key::from_encoded(k), v) + }; + self.accept(cf, m); } // we need to remove duplicate keys in here, since // in https://github.com/tikv/tikv/blob/a401f78bc86f7e6ea6a55ad9f453ae31be835b55/components/resolved_ts/src/cmd.rs#L204 // will panic if found duplicated entry during Vec. - fn accept(&mut self, cf: &str, req: Request) { - let k = key_from_request(&req); + fn accept(&mut self, cf: &str, m: Modify) { + let k = m.key(); match cf { CF_WRITE => { - let (encoded_key, ts) = match Key::split_on_ts_for(k) { + let (encoded_key, ts) = match Key::split_on_ts_for(k.as_encoded()) { Ok(k) => k, Err(err) => { warn!( "key without ts, skipping"; - "key" => %log_wrappers::Value::key(k), + "key" => %k, "err" => %err ); return; @@ -159,19 +162,19 @@ impl RequestCollector { .map(|(_, old_ts)| *old_ts < ts.into_inner()) .unwrap_or(true) { - self.unpacked_size += req.compute_size() as usize; + self.unpacked_size += m.size(); if let Some((v, _)) = self .write_reqs - .insert(encoded_key.to_owned(), (req, ts.into_inner())) + .insert(encoded_key.to_owned(), (m, ts.into_inner())) { - self.unpacked_size -= v.get_cached_size() as usize; + self.unpacked_size -= v.size(); } } } CF_DEFAULT => { - self.unpacked_size += req.compute_size() as usize; - if let Some(v) = self.default_reqs.insert(k.to_owned(), req) { - self.unpacked_size -= v.get_cached_size() as usize; + self.unpacked_size += m.size(); + if let Some(v) = self.default_reqs.insert(k.as_encoded().clone(), m) { + self.unpacked_size -= v.size(); } } _ => unreachable!(), @@ -183,69 +186,61 @@ impl RequestCollector { } #[cfg(test)] - fn drain_unpacked_reqs(&mut self, cf: &str) -> Vec { - let res: Vec = if cf == CF_DEFAULT { - self.default_reqs.drain().map(|(_, req)| req).collect() + fn drain_unpacked_reqs(&mut self, cf: &str) -> Vec { + let res: Vec = if cf == CF_DEFAULT { + self.default_reqs.drain().map(|(_, m)| m).collect() } else { - self.write_reqs.drain().map(|(_, (req, _))| req).collect() + self.write_reqs.drain().map(|(_, (m, _))| m).collect() }; for r in &res { - self.unpacked_size -= r.get_cached_size() as usize; + self.unpacked_size -= r.size(); } res } #[inline] - fn drain_raft_reqs(&mut self, take_unpacked: bool) -> std::vec::Drain<'_, RaftCmdRequest> { + fn drain_pending_writes(&mut self, take_unpacked: bool) -> std::vec::Drain<'_, WriteData> { if take_unpacked { self.pack_all(); } - self.pending_raft_reqs.drain(..) + self.pending_writes.drain(..) } fn pack_all(&mut self) { if self.unpacked_size == 0 { return; } - let mut cmd = RaftCmdRequest::default(); - let mut header = make_request_header(self.context.clone()); // Set the UUID of header to prevent raftstore batching our requests. // The current `resolved_ts` observer assumes that each batch of request doesn't // has two writes to the same key. (Even with 2 different TS). That was true // for normal cases because the latches reject concurrency write to keys. // However we have bypassed the latch layer :( - header.set_uuid(uuid::Uuid::new_v4().as_bytes().to_vec()); - cmd.set_header(header); let mut reqs: Vec<_> = self.write_reqs.drain().map(|(_, (req, _))| req).collect(); reqs.append(&mut self.default_reqs.drain().map(|(_, req)| req).collect()); if reqs.is_empty() { debug_assert!(false, "attempt to pack an empty request"); return; } - cmd.set_requests(reqs.into()); - - self.pending_raft_reqs.push(cmd); + let mut data = WriteData::from_modifies(reqs); + data.set_avoid_batch(true); + self.pending_writes.push(data); self.unpacked_size = 0; } #[inline] fn is_empty(&self) -> bool { - self.pending_raft_reqs.is_empty() && self.unpacked_size == 0 + self.pending_writes.is_empty() && self.unpacked_size == 0 } } -impl ImportSstService -where - E: KvEngine, - Router: 'static + RaftStoreRouter, -{ +impl ImportSstService { pub fn new( cfg: Config, raft_entry_max_size: ReadableSize, - router: Router, engine: E, + tablet_registry: E::Local, importer: Arc, - ) -> ImportSstService { + ) -> Self { let props = tikv_util::thread_group::current_properties(); let threads = tokio::runtime::Builder::new_multi_thread() .worker_threads(cfg.num_threads) @@ -271,15 +266,15 @@ where .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) .create() .unwrap(); - importer.start_switch_mode_check(threads.handle(), engine.clone()); + importer.start_switch_mode_check(threads.handle(), tablet_registry.clone()); threads.spawn(Self::tick(importer.clone())); ImportSstService { cfg, - engine, + tablet_registry, threads: Arc::new(threads), block_threads: Arc::new(block_threads), - router, + engine, importer, limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), @@ -306,46 +301,36 @@ where Ok(slots.remove(&p)) } - async fn async_snapshot( - router: Router, - header: RaftRequestHeader, - ) -> std::result::Result, errorpb::Error> { - let mut req = Request::default(); - req.set_cmd_type(CmdType::Snap); - let mut cmd = RaftCmdRequest::default(); - cmd.set_header(header); - cmd.set_requests(vec![req].into()); - let (cb, future) = paired_future_callback(); - if let Err(e) = router.send_command(cmd, Callback::read(cb), RaftCmdExtraOpts::default()) { - return Err(e.into()); - } - let mut res = future.await.map_err(|_| { - let mut err = errorpb::Error::default(); - let err_str = "too many sst files are ingesting"; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(err_str.to_string()); - err.set_message(err_str.to_string()); - err.set_server_is_busy(server_is_busy_err); - err - })?; - let mut header = res.response.take_header(); - if header.has_error() { - return Err(header.take_error()); + fn async_snapshot( + engine: &mut E, + context: &Context, + ) -> impl Future> { + let res = engine.async_snapshot(SnapContext { + pb_ctx: context, + ..Default::default() + }); + async move { + res.await.map_err(|e| { + let err: storage::Error = e.into(); + if let Some(e) = extract_region_error_from_error(&err) { + e + } else { + let mut e = errorpb::Error::default(); + e.set_message(format!("{}", err)); + e + } + }) } - Ok(SnapshotResult { - snapshot: res.snapshot.unwrap(), - term: header.get_current_term(), - }) } fn check_write_stall(&self) -> Option { if self.importer.get_mode() == SwitchMode::Normal && self - .engine + .tablet_registry .ingest_maybe_slowdown_writes(CF_WRITE) .expect("cf") { - match self.engine.get_sst_key_ranges(CF_WRITE, 0) { + match self.tablet_registry.get_sst_key_ranges(CF_WRITE, 0) { Ok(l0_sst_ranges) => { warn!( "sst ingest is too slow"; @@ -368,14 +353,13 @@ where } fn ingest_files( - &self, - context: Context, + &mut self, + mut context: Context, label: &'static str, ssts: Vec, ) -> impl Future> { - let header = make_request_header(context); - let snapshot_res = Self::async_snapshot(self.router.clone(), header.clone()); - let router = self.router.clone(); + let snapshot_res = Self::async_snapshot(&mut self.engine, &context); + let engine = self.engine.clone(); let importer = self.importer.clone(); async move { // check api version @@ -394,17 +378,6 @@ where }; fail_point!("import::sst_service::ingest"); - // Make ingest command. - let mut cmd = RaftCmdRequest::default(); - cmd.set_header(header); - cmd.mut_header().set_term(res.term); - for sst in ssts.iter() { - let mut ingest = Request::default(); - ingest.set_cmd_type(CmdType::IngestSst); - ingest.mut_ingest_sst().set_sst(sst.clone()); - cmd.mut_requests().push(ingest); - } - // Here we shall check whether the file has been ingested before. This operation // must execute after geting a snapshot from raftstore to make sure that the // current leader has applied to current term. @@ -423,20 +396,31 @@ where return Ok(resp); } } + let modifies = ssts + .iter() + .map(|s| Modify::Ingest(Box::new(s.clone()))) + .collect(); + context.set_term(res.ext().get_term().unwrap().into()); + let region_id = context.get_region_id(); + let res = engine.async_write( + &context, + WriteData::from_modifies(modifies), + WriteEvent::BASIC_EVENT, + None, + ); - let (cb, future) = paired_future_callback(); - if let Err(e) = - router.send_command(cmd, Callback::write(cb), RaftCmdExtraOpts::default()) - { - resp.set_error(e.into()); - return Ok(resp); - } - - let mut res = future.await.map_err(Error::from)?; - let mut header = res.response.take_header(); - if header.has_error() { - pb_error_inc(label, header.get_error()); - resp.set_error(header.take_error()); + let mut resp = IngestResponse::default(); + if let Err(e) = wait_write(res).await { + if let Some(e) = extract_region_error_from_error(&e) { + pb_error_inc(label, &e); + resp.set_error(e); + } else { + IMPORTER_ERROR_VEC + .with_label_values(&[label, "unknown"]) + .inc(); + resp.mut_error() + .set_message(format!("[region {}] ingest failed: {:?}", region_id, e)); + } } Ok(resp) } @@ -445,33 +429,14 @@ where async fn apply_imp( mut req: ApplyRequest, importer: Arc, - router: Router, + engine: E, limiter: Limiter, max_raft_size: usize, ) -> std::result::Result, ImportPbError> { - type RaftWriteFuture = futures::channel::oneshot::Receiver; - async fn handle_raft_write(fut: RaftWriteFuture) -> std::result::Result<(), ImportPbError> { - match fut.await { - Err(e) => { - let msg = format!("failed to complete raft command: {}", e); - let mut e = ImportPbError::default(); - e.set_message(msg); - return Err(e); - } - Ok(mut r) if r.response.get_header().has_error() => { - let mut e = ImportPbError::default(); - e.set_message("failed to complete raft command".to_string()); - e.set_store_error(r.response.take_header().take_error()); - return Err(e); - } - _ => {} - } - Ok(()) - } - let mut range: Option = None; - let mut collector = RequestCollector::new(req.take_context(), max_raft_size * 7 / 8); + let mut collector = RequestCollector::new(max_raft_size * 7 / 8); + let context = req.take_context(); let mut metas = req.take_metas(); let mut rules = req.take_rewrite_rules(); // For compatibility with old requests. @@ -485,7 +450,7 @@ where false, ); - let mut inflight_futures: VecDeque = VecDeque::new(); + let mut inflight_futures = VecDeque::new(); let mut tasks = metas.iter().zip(rules.iter()).peekable(); while let Some((meta, rule)) = tasks.next() { @@ -513,25 +478,19 @@ where } let is_last_task = tasks.peek().is_none(); - for req in collector.drain_raft_reqs(is_last_task) { - while inflight_futures.len() >= MAX_INFLIGHT_RAFT_MSGS { - handle_raft_write(inflight_futures.pop_front().unwrap()).await?; - } - let (cb, future) = paired_future_callback(); - match router.send_command(req, Callback::write(cb), RaftCmdExtraOpts::default()) { - Ok(_) => inflight_futures.push_back(future), - Err(e) => { - let msg = format!("failed to send raft command: {}", e); - let mut e = ImportPbError::default(); - e.set_message(msg); - return Err(e); - } + for req in collector.drain_pending_writes(is_last_task) { + let f = engine.async_write(&context, req, WriteEvent::BASIC_EVENT, None); + inflight_futures.push_back(f); + if inflight_futures.len() >= MAX_INFLIGHT_RAFT_MSGS { + wait_write(inflight_futures.pop_front().unwrap()) + .await + .map_err(transfer_error)?; } } } assert!(collector.is_empty()); - for fut in inflight_futures { - handle_raft_write(fut).await?; + for f in inflight_futures { + wait_write(f).await.map_err(transfer_error)?; } Ok(range) @@ -548,7 +507,7 @@ macro_rules! impl_write { sink: ClientStreamingSink<$resp_ty>, ) { let import = self.importer.clone(); - let engine = self.engine.clone(); + let tablet_registry = self.tablet_registry.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -566,7 +525,7 @@ macro_rules! impl_write { _ => return Err(Error::InvalidChunk), }; - let writer = match import.$writer_fn(&engine, meta) { + let writer = match import.$writer_fn(&tablet_registry, meta) { Ok(w) => w, Err(e) => { error!("build writer failed {:?}", e); @@ -600,11 +559,7 @@ macro_rules! impl_write { }; } -impl ImportSst for ImportSstService -where - E: KvEngine, - Router: 'static + RaftStoreRouter, -{ +impl ImportSst for ImportSstService { fn switch_mode( &mut self, ctx: RpcContext<'_>, @@ -620,8 +575,12 @@ where } match req.get_mode() { - SwitchMode::Normal => self.importer.enter_normal_mode(self.engine.clone(), mf), - SwitchMode::Import => self.importer.enter_import_mode(self.engine.clone(), mf), + SwitchMode::Normal => self + .importer + .enter_normal_mode(self.tablet_registry.clone(), mf), + SwitchMode::Import => self + .importer + .enter_import_mode(self.tablet_registry.clone(), mf), } }; match res { @@ -721,7 +680,7 @@ where let label = "apply"; let start = Instant::now(); let importer = self.importer.clone(); - let router = self.router.clone(); + let engine = self.engine.clone(); let limiter = self.limiter.clone(); let max_raft_size = self.raft_entry_max_size.0 as usize; @@ -733,7 +692,7 @@ where let mut resp = ApplyResponse::default(); - match Self::apply_imp(req, importer, router, limiter, max_raft_size).await { + match Self::apply_imp(req, importer, engine, limiter, max_raft_size).await { Ok(Some(r)) => resp.set_range(r), Err(e) => resp.set_error(e), _ => {} @@ -756,7 +715,7 @@ where let timer = Instant::now_coarse(); let importer = Arc::clone(&self.importer); let limiter = self.limiter.clone(); - let engine = self.engine.clone(); + let tablet_registry = self.tablet_registry.clone(); let start = Instant::now(); let handle_task = async move { @@ -775,14 +734,14 @@ where .into_option() .filter(|c| c.cipher_type != EncryptionMethod::Plaintext); - let res = importer.download_ext::( + let res = importer.download_ext::( req.get_sst(), req.get_storage_backend(), req.get_name(), req.get_rewrite_rule(), cipher, limiter, - engine, + tablet_registry, DownloadExt::default() .cache_key(req.get_storage_cache_id()) .req_type(req.get_request_type()), @@ -906,7 +865,7 @@ where ) { let label = "compact"; let timer = Instant::now_coarse(); - let engine = self.engine.clone(); + let tablet_registry = self.tablet_registry.clone(); let handle_task = async move { let (start, end) = if !req.has_range() { @@ -923,7 +882,7 @@ where Some(req.get_output_level()) }; - let res = engine.compact_files_in_range(start, end, output_level); + let res = tablet_registry.compact_files_in_range(start, end, output_level); match res { Ok(_) => info!( "compact files in range"; @@ -984,7 +943,6 @@ where let label = "duplicate_detect"; let timer = Instant::now_coarse(); let context = request.take_context(); - let router = self.router.clone(); let start_key = request.take_start_key(); let min_commit_ts = request.get_min_commit_ts(); let end_key = if request.get_end_key().is_empty() { @@ -993,11 +951,11 @@ where Some(request.take_end_key()) }; let key_only = request.get_key_only(); - let snap_res = Self::async_snapshot(router, make_request_header(context)); + let snap_res = Self::async_snapshot(&mut self.engine, &context); let handle_task = async move { let res = snap_res.await; let snapshot = match res { - Ok(snap) => snap.snapshot, + Ok(snap) => snap, Err(e) => { let mut resp = DuplicateDetectResponse::default(); pb_error_inc(label, &e); @@ -1078,25 +1036,6 @@ fn pb_error_inc(type_: &str, e: &errorpb::Error) { IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); } -fn key_from_request(req: &Request) -> &[u8] { - if req.has_put() { - return req.get_put().get_key(); - } - if req.has_delete() { - return req.get_delete().get_key(); - } - panic!("trying to extract key from request is neither put nor delete.") -} - -fn make_request_header(mut context: Context) -> RaftRequestHeader { - let region_id = context.get_region_id(); - let mut header = RaftRequestHeader::default(); - header.set_peer(context.take_peer()); - header.set_region_id(region_id); - header.set_region_epoch(context.take_region_epoch()); - header -} - fn write_needs_restore(write: &[u8]) -> bool { let w = WriteRef::parse(write); match w { @@ -1127,10 +1066,10 @@ mod test { use std::collections::HashMap; use engine_traits::{CF_DEFAULT, CF_WRITE}; - use kvproto::{kvrpcpb::Context, raft_cmdpb::*}; + use tikv_kv::Modify; use txn_types::{Key, TimeStamp, Write, WriteType}; - use crate::import::sst_service::{key_from_request, RequestCollector}; + use crate::import::sst_service::RequestCollector; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1143,45 +1082,18 @@ mod test { (k.into_encoded(), val.to_owned()) } - fn default_req(key: &[u8], val: &[u8], start_ts: u64) -> Request { + fn default_req(key: &[u8], val: &[u8], start_ts: u64) -> Modify { let (k, v) = default(key, val, start_ts); - req(k, v, CF_DEFAULT, CmdType::Put) + Modify::Put(CF_DEFAULT, Key::from_encoded(k), v) } - fn write_req(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> Request { + fn write_req(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> Modify { let (k, v) = write(key, ty, commit_ts, start_ts); - let cmd_type = if ty == WriteType::Delete { - CmdType::Delete + if ty == WriteType::Delete { + Modify::Delete(CF_WRITE, Key::from_encoded(k)) } else { - CmdType::Put - }; - - req(k, v, CF_WRITE, cmd_type) - } - - fn req(k: Vec, v: Vec, cf: &str, cmd_type: CmdType) -> Request { - let mut req = Request::default(); - req.set_cmd_type(cmd_type); - - match cmd_type { - CmdType::Put => { - let mut put = PutRequest::default(); - put.set_key(k); - put.set_value(v); - put.set_cf(cf.to_string()); - - req.set_put(put) - } - CmdType::Delete => { - let mut del = DeleteRequest::default(); - del.set_cf(cf.to_string()); - del.set_key(k); - - req.set_delete(del); - } - _ => panic!("invalid input cmd_type"), + Modify::Put(CF_WRITE, Key::from_encoded(k), v) } - req } #[test] @@ -1191,27 +1103,30 @@ mod test { cf: &'static str, is_delete: bool, mutations: Vec<(Vec, Vec)>, - expected_reqs: Vec, + expected_reqs: Vec, } fn run_case(c: &Case) { - let mut collector = RequestCollector::new(Context::new(), 1024); + let mut collector = RequestCollector::new(1024); for (k, v) in c.mutations.clone() { collector.accept_kv(c.cf, c.is_delete, k, v); } - let reqs = collector.drain_raft_reqs(true); + let reqs = collector.drain_pending_writes(true); let mut req1: HashMap<_, _> = reqs .into_iter() - .flat_map(|mut x| x.take_requests().into_iter()) + .flat_map(|x| { + assert!(x.avoid_batch); + x.modifies.into_iter() + }) .map(|req| { - let key = key_from_request(&req).to_owned(); + let key = req.key().to_owned(); (key, req) }) .collect(); for req in c.expected_reqs.iter() { - let r = req1.remove(key_from_request(req)); + let r = req1.remove(req.key()); assert_eq!(r.as_ref(), Some(req), "{:?}", c); } assert!(req1.is_empty(), "{:?}\ncase = {:?}", req1, c); @@ -1284,7 +1199,7 @@ mod test { #[test] fn test_request_collector_with_write_cf() { - let mut request_collector = RequestCollector::new(Context::new(), 102400); + let mut request_collector = RequestCollector::new(102400); let reqs = vec![ write_req(b"foo", WriteType::Put, 40, 39), write_req(b"aar", WriteType::Put, 38, 37), @@ -1301,18 +1216,14 @@ mod test { request_collector.accept(CF_WRITE, req); } let mut reqs: Vec<_> = request_collector.drain_unpacked_reqs(CF_WRITE); - reqs.sort_by(|r1, r2| { - let k1 = key_from_request(r1); - let k2 = key_from_request(r2); - k1.cmp(k2) - }); + reqs.sort_by(|r1, r2| r1.key().cmp(r2.key())); assert_eq!(reqs, reqs_result); assert!(request_collector.is_empty()); } #[test] fn test_request_collector_with_default_cf() { - let mut request_collector = RequestCollector::new(Context::new(), 102400); + let mut request_collector = RequestCollector::new(102400); let reqs = vec![ default_req(b"foo", b"", 39), default_req(b"zzz", b"", 40), @@ -1330,10 +1241,8 @@ mod test { } let mut reqs: Vec<_> = request_collector.drain_unpacked_reqs(CF_DEFAULT); reqs.sort_by(|r1, r2| { - let k1 = key_from_request(r1); - let (k1, ts1) = Key::split_on_ts_for(k1).unwrap(); - let k2 = key_from_request(r2); - let (k2, ts2) = Key::split_on_ts_for(k2).unwrap(); + let (k1, ts1) = Key::split_on_ts_for(r1.key().as_encoded()).unwrap(); + let (k2, ts2) = Key::split_on_ts_for(r2.key().as_encoded()).unwrap(); k1.cmp(k2).then(ts1.cmp(&ts2)) }); diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 106b36f61ad..87ab5c10575 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1361,6 +1361,7 @@ pub mod test_gc_worker { let bytes = keys::data_end_key(key2.as_encoded()); *key2 = Key::from_encoded(bytes); } + Modify::Ingest(_) => unimplemented!(), } } write_modifies(&self.kv_engine().unwrap(), modifies) @@ -1388,6 +1389,7 @@ pub mod test_gc_worker { *start_key = Key::from_encoded(keys::data_key(start_key.as_encoded())); *end_key = Key::from_encoded(keys::data_end_key(end_key.as_encoded())); } + Modify::Ingest(_) => unimplemented!(), }); self.0.async_write(ctx, batch, subscribed, on_applied) } diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 0f0d8fa5689..e175fa502f8 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -393,6 +393,9 @@ where let bytes = keys::data_end_key(key2.as_encoded()); *key2 = Key::from_encoded(bytes); } + Modify::Ingest(_) => { + return Err(box_err!("ingest sst is not supported in local engine")); + } } } } @@ -449,6 +452,9 @@ where let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); let txn_extra = batch.extra; let mut header = new_request_header(ctx); + if batch.avoid_batch { + header.set_uuid(uuid::Uuid::new_v4().as_bytes().to_vec()); + } let mut flags = 0; if txn_extra.one_pc { flags |= WriteBatchFlags::ONE_PC.bits(); diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 28f2a1d5d25..9fb4ef70b03 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -69,6 +69,8 @@ impl Stream for Transform { fn modifies_to_simple_write(modifies: Vec) -> SimpleWriteBinary { let mut encoder = SimpleWriteEncoder::with_capacity(128); + let modifies_len = modifies.len(); + let mut ssts = vec![]; for m in modifies { match m { Modify::Put(cf, k, v) => encoder.put(cf, k.as_encoded(), &v), @@ -82,8 +84,17 @@ fn modifies_to_simple_write(modifies: Vec) -> SimpleWriteBinary { end_key.as_encoded(), notify_only, ), + Modify::Ingest(sst) => { + if ssts.capacity() == 0 { + ssts.reserve(modifies_len); + } + ssts.push(*sst); + } } } + if !ssts.is_empty() { + encoder.ingest(ssts); + } encoder.encode() } @@ -228,7 +239,10 @@ impl tikv_kv::Engine for RaftKv2 { header.set_flags(flags); self.schedule_txn_extra(batch.extra); - let data = modifies_to_simple_write(batch.modifies); + let mut data = modifies_to_simple_write(batch.modifies); + if batch.avoid_batch { + data.freeze(); + } let mut builder = CmdResChannelBuilder::default(); if WriteEvent::subscribed_proposed(subscribed) { builder.subscribe_proposed(); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 588e8ae9e9b..b9cc956d40e 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -17,6 +17,7 @@ use raftstore::{ use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreRouter, StoreSystem}; use resource_metering::CollectorRegHandle; use slog::{info, o, Logger}; +use sst_importer::SstImporter; use tikv_util::{ config::VersionTrack, worker::{LazyWorker, Worker}, @@ -102,6 +103,7 @@ where pd_worker: LazyWorker, store_cfg: Arc>, state: &Mutex, + sst_importer: Arc, ) -> Result<()> where T: Transport + 'static, @@ -140,6 +142,7 @@ where background, pd_worker, store_cfg, + sst_importer, )?; Ok(()) @@ -201,6 +204,7 @@ where background: Worker, pd_worker: LazyWorker, store_cfg: Arc>, + sst_importer: Arc, ) -> Result<()> where T: Transport + 'static, @@ -232,6 +236,7 @@ where collector_reg_handle, background, pd_worker, + sst_importer, )?; Ok(()) } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index e530cc56577..d8f31ba77a8 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -1015,6 +1015,7 @@ pub mod tests { wb.delete_range_cf(cf, &k1, &k2).unwrap(); } } + Modify::Ingest(_) => unimplemented!(), } } wb.write().unwrap(); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 30dd3b120ca..4a981bdfa53 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1207,7 +1207,7 @@ fn test_double_run_node() { let snap_mgr = SnapManager::new(tmp.path().to_str().unwrap()); let coprocessor_host = CoprocessorHost::new(router, raftstore::coprocessor::Config::default()); let importer = { - let dir = Path::new(engines.kv.path()).join("import-sst"); + let dir = Path::new(MiscExt::path(&engines.kv)).join("import-sst"); Arc::new(SstImporter::new(&ImportConfig::default(), dir, None, ApiVersion::V1).unwrap()) }; let (split_check_scheduler, _) = dummy_scheduler(); From 76bbf9504eabc80c142167b41a691560ae813938 Mon Sep 17 00:00:00 2001 From: Nathan Date: Wed, 1 Mar 2023 14:47:08 +0800 Subject: [PATCH 0554/1149] raft_client: report SnapshotStatus for witness (#14267) close tikv/tikv#14228 raft_client: report SnapshotStatus for witness Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- src/server/raft_client.rs | 6 +- tests/integrations/raftstore/test_witness.rs | 60 +++++++++++++++++++- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index fa12600bb98..17de1d3365d 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -461,10 +461,14 @@ where snapshot .merge_from_bytes(msg.get_message().get_snapshot().get_data()) .unwrap(); - // Witness's snapshot must be empty, no need to send snapshot files + // Witness's snapshot must be empty, no need to send snapshot files, report + // immediately if !snapshot.get_meta().get_for_witness() { self.send_snapshot_sock(msg); continue; + } else { + let rep = self.new_snapshot_reporter(&msg); + rep.report(SnapshotStatus::Finish); } } self.buffer.push(msg); diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index d5a9992bc3a..d4332403cea 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -1,6 +1,10 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{iter::FromIterator, sync::Arc, time::Duration}; +use std::{ + iter::FromIterator, + sync::{Arc, Mutex}, + time::Duration, +}; use collections::HashMap; use futures::executor::block_on; @@ -10,9 +14,13 @@ use kvproto::{ raft_serverpb::{PeerState, RaftApplyState}, }; use pd_client::PdClient; -use raft::eraftpb::ConfChangeType; +use raft::eraftpb::{ConfChangeType, MessageType}; use test_raftstore::*; -use tikv_util::{config::ReadableDuration, store::find_peer}; +use tikv_util::{ + config::ReadableDuration, + store::{find_peer, new_witness_peer}, + HandyRwLock, +}; // Test the case that region split or merge with witness peer #[test] @@ -598,3 +606,49 @@ fn test_witness_ignore_consistency_check() { std::thread::sleep(Duration::from_millis(10)); } } + +// Test the case that witness apply snapshot with network isolation +#[test] +fn test_witness_apply_snapshot_with_network_isolation() { + let mut cluster = new_server_cluster(0, 3); + configure_for_snapshot(&mut cluster.cfg); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let r1 = cluster.run_conf_change(); + pd_client.must_add_peer(r1, new_peer(2, 2)); + pd_client.must_add_peer(r1, new_witness_peer(3, 3)); + // Ensure all peers are initialized. + std::thread::sleep(Duration::from_millis(100)); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + + cluster.add_send_filter(IsolationFilterFactory::new(3)); + + for i in 0..20 { + cluster.must_put(format!("k{}", i).as_bytes(), b"v1"); + } + sleep_ms(500); + + // Ignore witness's MsgAppendResponse, after applying snaphost + let dropped_msgs = Arc::new(Mutex::new(Vec::new())); + let recv_filter = Box::new( + RegionPacketFilter::new(r1, 1) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppendResponse) + .reserve_dropped(Arc::clone(&dropped_msgs)), + ); + cluster.sim.wl().add_recv_filter(1, recv_filter); + + cluster.clear_send_filters(); + // Wait for leader send snapshot. + sleep_ms(500); + + cluster.sim.wl().clear_recv_filters(1); + + // Witness's ProgressState must have been changed to Probe + cluster.must_transfer_leader(1, new_peer(2, 2)); + + for i in 20..25 { + cluster.must_put(format!("k{}", i).as_bytes(), b"v1"); + } +} From f3cb8ed3e81a1cbc112126dd2806a0213a434eaf Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 28 Feb 2023 23:33:09 -0800 Subject: [PATCH 0555/1149] tablet_flow_control polish (#14232) ref tikv/tikv#12842 1) Remove the suffix parameter in tablet flow control code as it's not needed anymore by using the FlowControlFactorStore. 2) Before this change, it the tablet flow control may not work properly because of unnecessarily checking the suffix value. 3) Fix the scheduler flow's metrics for multi-rocksdb Signed-off-by: qi.xu Co-authored-by: qi.xu --- components/engine_rocks/src/flow_listener.rs | 27 +- src/server/engine_factory.rs | 12 +- .../singleton_flow_controller.rs | 329 +++++------------- .../flow_controller/tablet_flow_controller.rs | 160 +++++---- 4 files changed, 203 insertions(+), 325 deletions(-) diff --git a/components/engine_rocks/src/flow_listener.rs b/components/engine_rocks/src/flow_listener.rs index f36b5393f7a..4a4f80cc46f 100644 --- a/components/engine_rocks/src/flow_listener.rs +++ b/components/engine_rocks/src/flow_listener.rs @@ -7,21 +7,20 @@ use rocksdb::{CompactionJobInfo, EventListener, FlushJobInfo, IngestionInfo}; #[derive(Clone)] pub enum FlowInfo { - L0(String, u64, u64, u64), - L0Intra(String, u64, u64, u64), - Flush(String, u64, u64, u64), - Compaction(String, u64, u64), + L0(String, u64, u64), + L0Intra(String, u64, u64), + Flush(String, u64, u64), + Compaction(String, u64), BeforeUnsafeDestroyRange(u64), AfterUnsafeDestroyRange(u64), - Created(u64, u64), - Destroyed(u64, u64), + Created(u64), + Destroyed(u64), } #[derive(Clone)] pub struct FlowListener { flow_info_sender: Arc>>, region_id: u64, - suffix_id: u64, } impl FlowListener { @@ -29,15 +28,13 @@ impl FlowListener { Self { flow_info_sender: Arc::new(Mutex::new(flow_info_sender)), region_id: 0, - suffix_id: 0, } } - pub fn clone_with(&self, region_id: u64, suffix_id: u64) -> Self { + pub fn clone_with(&self, region_id: u64) -> Self { Self { flow_info_sender: self.flow_info_sender.clone(), region_id, - suffix_id, } } @@ -46,7 +43,7 @@ impl FlowListener { .flow_info_sender .lock() .unwrap() - .send(FlowInfo::Created(self.region_id, self.suffix_id)); + .send(FlowInfo::Created(self.region_id)); } pub fn on_destroyed(&self) { @@ -54,7 +51,7 @@ impl FlowListener { .flow_info_sender .lock() .unwrap() - .send(FlowInfo::Destroyed(self.region_id, self.suffix_id)); + .send(FlowInfo::Destroyed(self.region_id)); } } @@ -67,7 +64,6 @@ impl EventListener for FlowListener { info.cf_name().to_owned(), total, self.region_id, - self.suffix_id, )); } @@ -81,7 +77,6 @@ impl EventListener for FlowListener { info.cf_name().to_owned(), total, self.region_id, - self.suffix_id, )); } else { // ingestion may change the pending bytes. @@ -92,7 +87,6 @@ impl EventListener for FlowListener { .send(FlowInfo::Compaction( info.cf_name().to_owned(), self.region_id, - self.suffix_id, )); } } @@ -138,7 +132,6 @@ impl EventListener for FlowListener { info.cf_name().to_owned(), diff, self.region_id, - self.suffix_id, )); } else { let l0_input_file_at_input_level = @@ -162,7 +155,6 @@ impl EventListener for FlowListener { info.cf_name().to_owned(), read_bytes, self.region_id, - self.suffix_id, )); } } @@ -174,7 +166,6 @@ impl EventListener for FlowListener { .send(FlowInfo::Compaction( info.cf_name().to_owned(), self.region_id, - self.suffix_id, )); } } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index eb49775e5c1..f50afe4bc44 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -192,8 +192,8 @@ impl TabletFactory for KvEngineFactory { let tablet_name = path.file_name().unwrap().to_str().unwrap().to_string(); db_opts.set_info_log(TabletLogger::new(tablet_name)); let cf_opts = self.cf_opts(EngineType::RaftKv2); - if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { - db_opts.add_event_listener(listener.clone_with(ctx.id, suffix)); + if let Some(listener) = &self.inner.flow_listener { + db_opts.add_event_listener(listener.clone_with(ctx.id)); } if let Some(storage) = &self.inner.state_storage && let Some(flush_state) = ctx.flush_state { @@ -209,8 +209,8 @@ impl TabletFactory for KvEngineFactory { engine_rocks::util::new_engine_opt(path.to_str().unwrap(), db_opts, cf_opts); if let Err(e) = &kv_engine { error!("failed to create tablet"; "id" => ctx.id, "suffix" => ?ctx.suffix, "path" => %path.display(), "err" => ?e); - } else if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { - listener.clone_with(ctx.id, suffix).on_created(); + } else if let Some(listener) = &self.inner.flow_listener { + listener.clone_with(ctx.id).on_created(); } kv_engine } @@ -227,8 +227,8 @@ impl TabletFactory for KvEngineFactory { // kv_cfs_opts, // )?; let _ = std::fs::remove_dir_all(path); - if let Some(listener) = &self.inner.flow_listener && let Some(suffix) = ctx.suffix { - listener.clone_with(ctx.id, suffix).on_destroyed(); + if let Some(listener) = &self.inner.flow_listener { + listener.clone_with(ctx.id).on_destroyed(); } Ok(()) } diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 801d3d27280..f51249facfc 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -505,8 +505,7 @@ pub(super) struct FlowChecker { wait_for_destroy_range_finish: bool, region_id: u64, - #[getset(get_copy = "pub", set = "pub")] - tablet_suffix: u64, + rc: AtomicU32, } impl FlowChecker { @@ -516,12 +515,11 @@ impl FlowChecker { discard_ratio: Arc, limiter: Arc, ) -> Self { - Self::new_with_region_id(0, 0, config, engine, discard_ratio, limiter) + Self::new_with_region_id(0, config, engine, discard_ratio, limiter) } pub fn new_with_region_id( region_id: u64, - tablet_suffix: u64, config: &FlowControlConfig, engine: E, discard_ratio: Arc, @@ -535,7 +533,6 @@ impl FlowChecker { Self { region_id, - tablet_suffix, soft_pending_compaction_bytes_limit: config.soft_pending_compaction_bytes_limit.0, hard_pending_compaction_bytes_limit: config.hard_pending_compaction_bytes_limit.0, memtables_threshold: config.memtables_threshold, @@ -549,6 +546,7 @@ impl FlowChecker { last_record_time: Instant::now_coarse(), last_speed: 0.0, wait_for_destroy_range_finish: false, + rc: AtomicU32::new(1), } } @@ -653,7 +651,11 @@ impl FlowChecker { let msg = flow_info_receiver.recv_deadline(deadline); if let Err(RecvTimeoutError::Timeout) = msg { - checker.update_statistics(); + let (rate, cf_throttle_flags) = checker.update_statistics(); + for (cf, val) in cf_throttle_flags { + SCHED_THROTTLE_CF_GAUGE.with_label_values(&[cf]).set(val); + } + SCHED_WRITE_FLOW_GAUGE.set(rate as i64); deadline = std::time::Instant::now() + TICK_DURATION; } else { checker.on_flow_info_msg(enabled, msg); @@ -684,26 +686,25 @@ impl FlowChecker { self.discard_ratio.store(0, Ordering::Relaxed); } - pub fn update_statistics(&mut self) { + pub fn update_statistics(&mut self) -> (f64, HashMap<&str, i64>) { + let mut cf_throttle_flags = HashMap::default(); if let Some(throttle_cf) = self.throttle_cf.as_ref() { - SCHED_THROTTLE_CF_GAUGE - .with_label_values(&[throttle_cf]) - .set(1); + cf_throttle_flags.insert(throttle_cf.as_str(), 1); for cf in self.cf_checkers.keys() { if cf != throttle_cf { - SCHED_THROTTLE_CF_GAUGE.with_label_values(&[cf]).set(0); + cf_throttle_flags.insert(cf.as_str(), 0); } } } else { for cf in self.cf_checkers.keys() { - SCHED_THROTTLE_CF_GAUGE.with_label_values(&[cf]).set(0); + cf_throttle_flags.insert(cf.as_str(), 0); } } // calculate foreground write flow let dur = self.last_record_time.saturating_elapsed_secs(); if dur < f64::EPSILON { - return; + return (0.0, cf_throttle_flags); } let rate = self.limiter.total_bytes_consumed() as f64 / dur; // don't record those write rate of 0. @@ -713,10 +714,11 @@ impl FlowChecker { if self.limiter.total_bytes_consumed() != 0 { self.write_flow_recorder.observe(rate as u64); } - SCHED_WRITE_FLOW_GAUGE.set(rate as i64); + self.last_record_time = Instant::now_coarse(); self.limiter.reset_statistics(); + (rate, cf_throttle_flags) } fn on_pending_compaction_bytes_change(&mut self, cf: String) { @@ -1005,6 +1007,14 @@ impl FlowChecker { }); self.limiter.set_speed_limit(throttle) } + + pub fn inc(&self) -> u32 { + self.rc.fetch_add(1, Ordering::SeqCst) + } + + pub fn dec(&self) -> u32 { + self.rc.fetch_sub(1, Ordering::SeqCst) + } } #[cfg(test)] @@ -1101,7 +1111,6 @@ pub(super) mod tests { stub: &EngineStub, tx: &mpsc::SyncSender, region_id: u64, - tablet_suffix: u64, ) { assert_eq!(flow_controller.consume(0, 2000), Duration::ZERO); loop { @@ -1121,95 +1130,45 @@ pub(super) mod tests { // exceeds the threshold on start stub.0.num_memtables.store(8, Ordering::Relaxed); - tx.send(FlowInfo::Flush( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert_eq!(flow_controller.should_drop(region_id), false); // on start check forbids flow control assert_eq!(flow_controller.is_unlimited(region_id), true); // once falls below the threshold, pass the on start check stub.0.num_memtables.store(1, Ordering::Relaxed); - tx.send(FlowInfo::Flush( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); // not throttle when the average of the sliding window doesn't exceeds the // threshold stub.0.num_memtables.store(6, Ordering::Relaxed); - tx.send(FlowInfo::Flush( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), true); // the average of sliding window exceeds the threshold stub.0.num_memtables.store(6, Ordering::Relaxed); - tx.send(FlowInfo::Flush( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), false); assert_ne!(flow_controller.consume(region_id, 2000), Duration::ZERO); // not throttle once the number of memtables falls below the threshold stub.0.num_memtables.store(1, Ordering::Relaxed); - tx.send(FlowInfo::Flush( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), true); } @@ -1220,7 +1179,7 @@ pub(super) mod tests { let flow_controller = EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); let flow_controller = FlowController::Singleton(flow_controller); - test_flow_controller_memtable_impl(&flow_controller, &stub, &tx, 0, 0); + test_flow_controller_memtable_impl(&flow_controller, &stub, &tx, 0); } pub fn test_flow_controller_l0_impl( @@ -1228,7 +1187,6 @@ pub(super) mod tests { stub: &EngineStub, tx: &mpsc::SyncSender, region_id: u64, - tablet_suffix: u64, ) { assert_eq!(flow_controller.consume(region_id, 2000), Duration::ZERO); loop { @@ -1240,56 +1198,26 @@ pub(super) mod tests { // exceeds the threshold stub.0.num_l0_files.store(30, Ordering::Relaxed); - tx.send(FlowInfo::L0( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::L0("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert_eq!(flow_controller.should_drop(region_id), false); // on start check forbids flow control assert_eq!(flow_controller.is_unlimited(region_id), true); // once fall below the threshold, pass the on start check stub.0.num_l0_files.store(10, Ordering::Relaxed); - tx.send(FlowInfo::L0( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::L0("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); // exceeds the threshold, throttle now stub.0.num_l0_files.store(30, Ordering::Relaxed); - tx.send(FlowInfo::L0( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::L0("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), false); assert_ne!(flow_controller.consume(region_id, 2000), Duration::ZERO); @@ -1302,7 +1230,7 @@ pub(super) mod tests { let flow_controller = EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); let flow_controller = FlowController::Singleton(flow_controller); - test_flow_controller_l0_impl(&flow_controller, &stub, &tx, 0, 0); + test_flow_controller_l0_impl(&flow_controller, &stub, &tx, 0); } pub fn test_flow_controller_pending_compaction_bytes_impl( @@ -1310,25 +1238,15 @@ pub(super) mod tests { stub: &EngineStub, tx: &mpsc::SyncSender, region_id: u64, - tablet_suffix: u64, ) { // exceeds the threshold stub.0 .pending_compaction_bytes .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); // on start check forbids flow control assert!( flow_controller.discard_ratio(region_id) < f64::EPSILON, @@ -1339,60 +1257,33 @@ pub(super) mod tests { stub.0 .pending_compaction_bytes .store(100 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); stub.0 .pending_compaction_bytes .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); stub.0 .pending_compaction_bytes .store(1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); // pending compaction bytes jump after unsafe destroy range tx.send(FlowInfo::BeforeUnsafeDestroyRange(region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id, 0)) + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); @@ -1400,39 +1291,21 @@ pub(super) mod tests { stub.0 .pending_compaction_bytes .store(1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); stub.0 .pending_compaction_bytes .store(10000000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); tx.send(FlowInfo::AfterUnsafeDestroyRange(region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert!( flow_controller.discard_ratio(region_id) < f64::EPSILON, "discard_ratio {}", @@ -1443,37 +1316,19 @@ pub(super) mod tests { stub.0 .pending_compaction_bytes .store(1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); stub.0 .pending_compaction_bytes .store(1000000000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction( - "default".to_string(), - region_id, - tablet_suffix, - )) - .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); } @@ -1484,7 +1339,7 @@ pub(super) mod tests { let flow_controller = EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); let flow_controller = FlowController::Singleton(flow_controller); - test_flow_controller_pending_compaction_bytes_impl(&flow_controller, &stub, &tx, 0, 0); + test_flow_controller_pending_compaction_bytes_impl(&flow_controller, &stub, &tx, 0); } #[test] diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index 556b5f4a8fa..d4590b90acc 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -20,7 +20,7 @@ use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; use super::singleton_flow_controller::{ FlowChecker, FlowControlFactorStore, Msg, RATIO_SCALE_FACTOR, TICK_DURATION, }; -use crate::storage::config::FlowControlConfig; +use crate::storage::{config::FlowControlConfig, metrics::*}; pub struct TabletFlowFactorStore { registry: TabletRegistry, @@ -156,15 +156,12 @@ impl FlowInfoDispatcher { let msg = flow_info_receiver.recv_deadline(deadline); match msg.clone() { - Ok(FlowInfo::L0(_cf, _, region_id, suffix)) - | Ok(FlowInfo::L0Intra(_cf, _, region_id, suffix)) - | Ok(FlowInfo::Flush(_cf, _, region_id, suffix)) - | Ok(FlowInfo::Compaction(_cf, region_id, suffix)) => { + Ok(FlowInfo::L0(_cf, _, region_id)) + | Ok(FlowInfo::L0Intra(_cf, _, region_id)) + | Ok(FlowInfo::Flush(_cf, _, region_id)) + | Ok(FlowInfo::Compaction(_cf, region_id)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); if let Some(checker) = checkers.get_mut(®ion_id) { - if checker.tablet_suffix() != suffix { - continue; - } checker.on_flow_info_msg(enabled, msg); } } @@ -175,10 +172,14 @@ impl FlowInfoDispatcher { checker.on_flow_info_msg(enabled, msg); } } - Ok(FlowInfo::Created(region_id, suffix)) => { + Ok(FlowInfo::Created(region_id)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); match checkers.entry(region_id) { - HashMapEntry::Occupied(e) => e.into_mut(), + HashMapEntry::Occupied(e) => { + let val = e.into_mut(); + val.inc(); + val + } HashMapEntry::Vacant(e) => { let engine = TabletFlowFactorStore::new(registry.clone()); let mut v = limiters.as_ref().write().unwrap(); @@ -193,7 +194,6 @@ impl FlowInfoDispatcher { )); e.insert(FlowChecker::new_with_region_id( region_id, - suffix, &config, engine, limiter.1.clone(), @@ -202,12 +202,14 @@ impl FlowInfoDispatcher { } }; } - Ok(FlowInfo::Destroyed(region_id, suffix)) => { + Ok(FlowInfo::Destroyed(region_id)) => { let mut remove_limiter = false; { let mut checkers = flow_checkers.as_ref().write().unwrap(); - if let Some(checker) = checkers.get_mut(®ion_id) { - if checker.tablet_suffix() == suffix { + if let Some(checker) = checkers.get(®ion_id) { + // if the previous value is 1, then the updated reference count + // will be 0 + if checker.dec() == 1 { checkers.remove(®ion_id); remove_limiter = true; } @@ -219,8 +221,22 @@ impl FlowInfoDispatcher { } Err(RecvTimeoutError::Timeout) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); + let mut total_rate = 0.0; + let mut cf_throttle_flags = HashMap::default(); for checker in (*checkers).values_mut() { - checker.update_statistics(); + let (rate, tablet_cf_throttle_flags) = checker.update_statistics(); + total_rate += rate; + for (key, val) in tablet_cf_throttle_flags { + if let Some(value) = cf_throttle_flags.get_mut(key) { + *value += val; + } else { + cf_throttle_flags.insert(key, val); + } + } + } + SCHED_WRITE_FLOW_GAUGE.set(total_rate as i64); + for (cf, val) in cf_throttle_flags { + SCHED_THROTTLE_CF_GAUGE.with_label_values(&[cf]).set(val); } deadline = std::time::Instant::now() + TICK_DURATION; } @@ -353,25 +369,65 @@ mod tests { let tablet_suffix = 5_u64; let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); reg.load(tablet_context, false).unwrap(); - tx.send(FlowInfo::Created(region_id, tablet_suffix)) + tx.send(FlowInfo::Created(region_id)).unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); test_flow_controller_basic_impl(&flow_controller, region_id); - tx.send(FlowInfo::Destroyed(region_id, tablet_suffix)) + tx.send(FlowInfo::Destroyed(region_id)).unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); + } + + #[test] + fn test_tablet_flow_controller_life_cycle() { + const WAIT_TICK: Duration = Duration::from_millis(100); + let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); + let region_id = 5_u64; + let tablet_suffix = 5_u64; + let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); + reg.load(tablet_context, false).unwrap(); + tx.send(FlowInfo::Created(region_id)).unwrap(); + for _ in 0..30 { + std::thread::sleep(WAIT_TICK); + flow_controller.set_speed_limit(region_id, 1000.0); + if !flow_controller.is_unlimited(region_id) { + break; + } + } + tx.send(FlowInfo::Destroyed(region_id)).unwrap(); + for _ in 0..30 { + std::thread::sleep(WAIT_TICK); + if flow_controller.is_unlimited(region_id) { + break; + } + } + // the region's limiter is removed so it's unlimited + assert!(flow_controller.is_unlimited(region_id)); + + tx.send(FlowInfo::Created(region_id)).unwrap(); + tx.send(FlowInfo::Created(region_id)).unwrap(); + for _ in 0..30 { + std::thread::sleep(WAIT_TICK); + flow_controller.set_speed_limit(region_id, 1000.0); + if !flow_controller.is_unlimited(region_id) { + break; + } + } + tx.send(FlowInfo::Destroyed(region_id)).unwrap(); + std::thread::sleep(TICK_DURATION); + // the region's limiter should not be removed as the reference count is still 1 + assert!(!flow_controller.is_unlimited(region_id)); + tx.send(FlowInfo::Destroyed(region_id)).unwrap(); + for _ in 0..30 { + std::thread::sleep(WAIT_TICK); + if flow_controller.is_unlimited(region_id) { + break; + } + } + // the region's limiter is removed so it's unlimited + assert!(flow_controller.is_unlimited(region_id)); + // no-op it should not crash + tx.send(FlowInfo::Destroyed(region_id)).unwrap(); } #[test] @@ -382,16 +438,10 @@ mod tests { let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); let mut cached = reg.load(tablet_context, false).unwrap(); let stub = cached.latest().unwrap().clone(); - tx.send(FlowInfo::Created(region_id, tablet_suffix)) + tx.send(FlowInfo::Created(region_id)).unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - test_flow_controller_memtable_impl(&flow_controller, &stub, &tx, region_id, tablet_suffix); + test_flow_controller_memtable_impl(&flow_controller, &stub, &tx, region_id); } #[test] @@ -402,16 +452,10 @@ mod tests { let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); let mut cached = reg.load(tablet_context, false).unwrap(); let stub = cached.latest().unwrap().clone(); - tx.send(FlowInfo::Created(region_id, tablet_suffix)) + tx.send(FlowInfo::Created(region_id)).unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - test_flow_controller_l0_impl(&flow_controller, &stub, &tx, region_id, tablet_suffix); + test_flow_controller_l0_impl(&flow_controller, &stub, &tx, region_id); } #[test] @@ -422,22 +466,10 @@ mod tests { let tablet_context = TabletContext::with_infinite_region(region_id, Some(tablet_suffix)); let mut cached = reg.load(tablet_context, false).unwrap(); let stub = cached.latest().unwrap().clone(); - tx.send(FlowInfo::Created(region_id, tablet_suffix)) + tx.send(FlowInfo::Created(region_id)).unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); - tx.send(FlowInfo::L0Intra( - "default".to_string(), - 0, - region_id, - tablet_suffix, - )) - .unwrap(); - test_flow_controller_pending_compaction_bytes_impl( - &flow_controller, - &stub, - &tx, - region_id, - tablet_suffix, - ); + test_flow_controller_pending_compaction_bytes_impl(&flow_controller, &stub, &tx, region_id); } } From b5508681f09828d3c3e7336f592ffcfca0b58091 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 1 Mar 2023 17:25:09 +0800 Subject: [PATCH 0556/1149] pd_client_v2: fix version race (#14310) close tikv/tikv#14309 Version should be updated before broadcast updates, otherwise the update will be just ignore. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- components/pd_client/src/client_v2.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index b583772bb72..11224ad894e 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -245,8 +245,9 @@ impl CachedRawClient { let latest_version = { let mut latest = self.core.latest.lock().unwrap(); *latest = self.cache.clone(); + let v = self.core.version.fetch_add(1, Ordering::Relaxed) + 1; let _ = self.core.on_reconnect_tx.send(()); - self.core.version.fetch_add(1, Ordering::Relaxed) + 1 + v }; debug_assert!(self.cache_version < latest_version); self.cache_version = latest_version; From ddb4e729d530b6dfed06a5bde9aa58e3fc8ff11d Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 1 Mar 2023 22:03:10 +0800 Subject: [PATCH 0557/1149] copr: fix paging stop early unexpectedly with agg executors (#14292) ref tikv/tikv#14209, close tikv/tikv#14291 #14209 stop paging when the result set is drained. But when supporting agg paging, the executors will also return drained when there is enough data returned. This PR label the drain reasons and stop following paging when the resultset is really drained. Signed-off-by: you06 Co-authored-by: Liqi Geng --- .../src/fast_hash_aggr_executor.rs | 34 ++-- .../src/index_scan_executor.rs | 28 ++-- .../tidb_query_executors/src/interface.rs | 44 ++++- .../src/limit_executor.rs | 38 ++--- .../src/partition_top_n_executor.rs | 150 +++++++++--------- .../src/projection_executor.rs | 46 +++--- components/tidb_query_executors/src/runner.rs | 12 +- .../src/selection_executor.rs | 64 ++++---- .../src/simple_aggr_executor.rs | 24 +-- .../src/slow_hash_aggr_executor.rs | 8 +- .../src/stream_aggr_executor.rs | 22 +-- .../src/table_scan_executor.rs | 8 +- .../src/top_n_executor.rs | 82 +++++----- .../src/util/aggr_executor.rs | 41 ++--- .../src/util/mock_executor.rs | 6 +- .../src/util/scan_executor.rs | 13 +- src/coprocessor/statistics/analyze.rs | 4 +- .../coprocessor_executors/util/bencher.rs | 2 +- .../coprocessor_executors/util/fixture.rs | 6 +- tests/failpoints/cases/test_coprocessor.rs | 10 ++ 20 files changed, 350 insertions(+), 292 deletions(-) diff --git a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs index 174912ca0b0..a878347fc68 100644 --- a/components/tidb_query_executors/src/fast_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/fast_hash_aggr_executor.rs @@ -361,10 +361,10 @@ impl AggregationExecutorImpl for FastHashAggregationImp fn iterate_available_groups( &mut self, entities: &mut Entities, - src_is_drained: bool, + src_is_drained: BatchExecIsDrain, mut iteratee: impl FnMut(&mut Entities, &[Box]) -> Result<()>, ) -> Result> { - assert!(src_is_drained); + assert!(src_is_drained.stop()); let aggr_fns_len = entities.each_aggr_fn.len(); let mut group_by_column = LazyBatchColumn::decoded_with_capacity_and_tp( @@ -545,12 +545,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let mut r = block_on(exec.next_batch(1)); // col_0 + col_1 can result in [NULL, 9.0, 6.0], thus there will be three @@ -681,12 +681,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let mut r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); @@ -765,12 +765,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let mut r = block_on(exec.next_batch(1)); // col_4 can result in [NULL, "aa", "aaa"], thus there will be three groups. @@ -935,13 +935,13 @@ mod tests { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -950,12 +950,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } } @@ -998,12 +998,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let mut r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2]); @@ -1069,12 +1069,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let mut r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); @@ -1135,7 +1135,7 @@ mod tests { )]), logical_rows: vec![6, 4, 5, 1, 3, 2, 0], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); let mut exec = exec_builder(src_exec); diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 9e415918541..de59b843eb5 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -991,7 +991,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 3); assert!(result.physical_columns[0].is_raw()); @@ -1048,7 +1048,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 3); assert!(result.physical_columns[0].is_raw()); @@ -1108,7 +1108,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 2); assert_eq!(result.physical_columns.rows_len(), 3); assert!(result.physical_columns[0].is_raw()); @@ -1153,7 +1153,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 3); assert!(result.physical_columns[0].is_raw()); @@ -1205,7 +1205,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 2); assert!(result.physical_columns[0].is_raw()); @@ -1282,7 +1282,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 2); assert!(result.physical_columns[0].is_raw()); @@ -1339,7 +1339,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -1449,7 +1449,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -1492,7 +1492,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -1588,7 +1588,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -1688,7 +1688,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -1782,7 +1782,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -1875,7 +1875,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 3); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); @@ -2001,7 +2001,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert!(result.is_drained.as_ref().unwrap()); + assert!(result.is_drained.as_ref().unwrap().stop()); assert_eq!(result.physical_columns.columns_len(), 4); assert_eq!(result.physical_columns.rows_len(), 1); assert!(result.physical_columns[0].is_raw()); diff --git a/components/tidb_query_executors/src/interface.rs b/components/tidb_query_executors/src/interface.rs index 611516ab6bc..352fbab4720 100644 --- a/components/tidb_query_executors/src/interface.rs +++ b/components/tidb_query_executors/src/interface.rs @@ -174,17 +174,45 @@ pub struct BatchExecuteResult { /// Whether or not there is no more data. /// /// This structure is a `Result`. When it is: - /// - `Ok(false)`: The normal case, means that there could be more data. The - /// caller should continue calling `next_batch()` although for each call - /// the returned data may be empty. - /// - `Ok(true)`: Means that the executor is drained and no more data will - /// be returned in future. However there could be some (last) data in the - /// `data` field this time. The caller should NOT call `next_batch()` any - /// more. + /// - `Ok(batch_exec_is_drain)`: See the comment of `BatchExecIsDrain`. /// - `Err(_)`: Means that there is an error when trying to retrieve more /// data. In this case, the error is returned and the executor is also /// drained. Similar to `Ok(true)`, there could be some remaining data in /// the `data` field which is valid data and should be processed. The /// caller should NOT call `next_batch()` any more. - pub is_drained: Result, + pub is_drained: Result, +} + +/// The result of batch execution. +/// - `Drain`: The executor is completely drained and no more data will be +/// returned in the given range.However there could be some (last) data in +/// `data` field this time. The caller should NOT call `next_batch()` any +/// more. +/// - `PagingDrain`: The executor output enough rows of the paging request, +/// there may be following data in the next paging request, the paging request +/// should be returned with scanned range in this case. Only used in paging +/// mode, Also check the last data in `data` field. +/// - `Remain`: The normal case, means that there could be more data. The caller +/// should continue calling `next_batch()` although for each call the returned +/// data may be empty. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BatchExecIsDrain { + Remain, + Drain, + PagingDrain, +} + +impl BatchExecIsDrain { + #[inline] + pub fn is_remain(&self) -> bool { + *self == BatchExecIsDrain::Remain + } + + /// the batch execution need to stop when the result status is Drain or + /// PagingDrain, but only when we meet Drain, the resultset is really + /// drained. + #[inline] + pub fn stop(&self) -> bool { + !self.is_remain() + } } diff --git a/components/tidb_query_executors/src/limit_executor.rs b/components/tidb_query_executors/src/limit_executor.rs index a9cd2cae482..bbbe5d576d0 100644 --- a/components/tidb_query_executors/src/limit_executor.rs +++ b/components/tidb_query_executors/src/limit_executor.rs @@ -46,7 +46,7 @@ impl BatchExecutor for BatchLimitExecutor { } else { // We don't need to touch the physical data. result.logical_rows.truncate(self.remaining_rows); - result.is_drained = Ok(true); + result.is_drained = Ok(BatchExecIsDrain::Drain); self.remaining_rows = 0; } @@ -96,7 +96,7 @@ mod tests { )]), logical_rows: vec![1, 2], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -105,7 +105,7 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -141,7 +141,7 @@ mod tests { )]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![VectorValue::Int( @@ -149,7 +149,7 @@ mod tests { )]), logical_rows: vec![1, 2], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -159,12 +159,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -178,7 +178,7 @@ mod tests { )]), logical_rows: vec![1, 2], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![VectorValue::Int( @@ -196,12 +196,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 2]); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(r.is_drained.unwrap()); // No errors + assert!(r.is_drained.unwrap().stop()); // No errors } #[test] @@ -215,13 +215,13 @@ mod tests { )]), logical_rows: vec![1, 2], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![VectorValue::Int( @@ -229,7 +229,7 @@ mod tests { )]), logical_rows: vec![0, 4, 1, 3], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -239,17 +239,17 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1, 2]); assert_eq!(r.physical_columns.rows_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 4]); assert_eq!(r.physical_columns.rows_len(), 5); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -262,7 +262,7 @@ mod tests { let r = block_on(exec.next_batch(100)); assert_eq!(r.logical_rows, &[0, 1, 2, 3, 4]); let r = block_on(exec.next_batch(2)); - assert_eq!(r.is_drained.unwrap(), true); + assert!(r.is_drained.unwrap().stop()); let schema = vec![FieldTypeTp::LongLong.into()]; let rows = (0..1024).collect(); @@ -270,9 +270,9 @@ mod tests { let mut exec = BatchLimitExecutor::new(src_exec, 1024, true).unwrap(); for _i in 0..1023 { let r = block_on(exec.next_batch(1)); - assert_eq!(r.is_drained.unwrap(), false); + assert!(r.is_drained.unwrap().is_remain()); } let r = block_on(exec.next_batch(1)); - assert_eq!(r.is_drained.unwrap(), true); + assert!(r.is_drained.unwrap().stop()); } } diff --git a/components/tidb_query_executors/src/partition_top_n_executor.rs b/components/tidb_query_executors/src/partition_top_n_executor.rs index 52cf2e85925..980adb3e459 100644 --- a/components/tidb_query_executors/src/partition_top_n_executor.rs +++ b/components/tidb_query_executors/src/partition_top_n_executor.rs @@ -199,7 +199,7 @@ impl BatchPartitionTopNExecutor { } #[inline] - async fn handle_next_batch(&mut self) -> Result<(LazyBatchColumnVec, bool)> { + async fn handle_next_batch(&mut self) -> Result<(LazyBatchColumnVec, BatchExecIsDrain)> { let mut result = LazyBatchColumnVec::empty(); let src_result = self.src.next_batch(BATCH_MAX_SIZE).await; self.context.warnings = src_result.warnings; @@ -282,7 +282,7 @@ impl BatchPartitionTopNExecutor { self.heap.add_row(row)?; } } - if src_is_drained { + if src_is_drained.stop() { self.heap.take_all_append_to(&mut result); } @@ -326,7 +326,7 @@ impl BatchExecutor for BatchPartitionTopNExecutor { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }; } @@ -407,7 +407,7 @@ mod tests { ]), logical_rows: (0..1).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -429,7 +429,7 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -459,7 +459,7 @@ mod tests { ]), logical_rows: (0..4).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -491,7 +491,7 @@ mod tests { r.physical_columns[1].decoded().to_real_vec(), &[Real::new(6.0).ok(), Real::new(5.0).ok(),] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -531,7 +531,7 @@ mod tests { ]), logical_rows: (0..9).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -562,7 +562,7 @@ mod tests { r.physical_columns[1].decoded().to_int_vec(), &[Some(1), None, None, Some(2), Some(1), None] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } fn make_expr_case() -> MockExecutor { @@ -619,7 +619,7 @@ mod tests { ]), logical_rows: (0..9).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ) } @@ -657,7 +657,7 @@ mod tests { r.physical_columns[2].decoded().to_int_vec(), &[Some(1), Some(2), Some(4), Some(6), Some(8)] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// partition by col0 + col1, order by col2 @@ -689,7 +689,7 @@ mod tests { r.physical_columns[2].decoded().to_int_vec(), &[Some(2), Some(4), Some(7), Some(9)] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Currently, When the data is not ordered by partition key, e.g. 1 1 2 1, @@ -713,7 +713,7 @@ mod tests { ]), logical_rows: (0..4).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -745,7 +745,7 @@ mod tests { r.physical_columns[1].decoded().to_real_vec(), &[None, Real::new(7.0).ok(), Real::new(4.0).ok()] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } fn make_integrated_data() -> MockExecutor { @@ -851,7 +851,7 @@ mod tests { ]), logical_rows: (0..16).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ) } @@ -881,7 +881,7 @@ mod tests { assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6, 7]); assert_eq!(r.physical_columns.rows_len(), 8); assert_eq!(r.physical_columns.columns_len(), 4); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); assert_eq!( r.physical_columns[2].decoded().to_int_vec(), @@ -927,8 +927,8 @@ mod tests { /// 2 9,223,372,036,854,775,807 9,223,372,036,854,775,807 /// 2 300 300 /// 2 9,223,372,036,854,775,808 -9,223,372,036,854,775,808 - /// 2 NULL NULL - /// 3 NULL NULL + /// 2 NULL NULL + /// 3 NULL NULL /// == Call #4 == /// (drained) (drained) (drained) fn make_full_batch() -> MockExecutor { @@ -964,13 +964,13 @@ mod tests { ]), logical_rows: vec![0, 1, 2, 3, 4], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -998,13 +998,13 @@ mod tests { ]), logical_rows: vec![0, 1, 2, 3, 4], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -1036,18 +1036,18 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3]); assert_eq!(r.physical_columns.rows_len(), 4); assert_eq!(r.physical_columns.columns_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); assert_eq!( r.physical_columns[0].decoded().to_int_vec(), &[Some(1), Some(1), Some(2), Some(2)] @@ -1056,7 +1056,7 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); assert_eq!(r.physical_columns[0].decoded().to_int_vec(), &[Some(3)]); } @@ -1082,18 +1082,18 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3]); assert_eq!(r.physical_columns.rows_len(), 4); assert_eq!(r.physical_columns.columns_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); assert_eq!( r.physical_columns[0].decoded().to_int_vec(), &[Some(1), Some(1), Some(2), Some(2)] @@ -1102,7 +1102,7 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); assert_eq!(r.physical_columns[0].decoded().to_int_vec(), &[Some(3)]); } @@ -1132,23 +1132,23 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6, 7, 8]); assert_eq!(r.physical_columns.rows_len(), 9); assert_eq!(r.physical_columns.columns_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -1178,23 +1178,23 @@ mod tests { assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); assert_eq!(r.physical_columns.rows_len(), 5); assert_eq!(r.physical_columns.columns_len(), 3); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// The following tests are copied from `batch_top_n_executor.rs`. @@ -1208,7 +1208,7 @@ mod tests { )]), logical_rows: (0..1).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -1226,7 +1226,7 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -1240,13 +1240,13 @@ mod tests { )]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -1265,11 +1265,11 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these data: @@ -1311,7 +1311,7 @@ mod tests { ]), logical_rows: vec![3, 0, 1], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -1321,7 +1321,7 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -1345,7 +1345,7 @@ mod tests { ]), logical_rows: vec![1, 2, 0, 4], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -1388,12 +1388,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); @@ -1419,7 +1419,7 @@ mod tests { Real::new(4.0).ok() ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -1459,12 +1459,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); @@ -1490,7 +1490,7 @@ mod tests { Real::new(4.0).ok() ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -1543,12 +1543,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -1572,7 +1572,7 @@ mod tests { Real::new(4.0).ok() ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these data: @@ -1624,13 +1624,13 @@ mod tests { ]), logical_rows: vec![2, 1, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -1664,7 +1664,7 @@ mod tests { ]), logical_rows: vec![0, 1, 2, 3], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -1708,12 +1708,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -1749,7 +1749,7 @@ mod tests { Some(b"aa".to_vec()), ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -1790,12 +1790,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -1831,7 +1831,7 @@ mod tests { Some(b"aa".to_vec()), ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these data: @@ -1887,13 +1887,13 @@ mod tests { ]), logical_rows: vec![2, 1, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -1927,7 +1927,7 @@ mod tests { ]), logical_rows: vec![2, 1, 0, 3], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -1952,12 +1952,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -1967,7 +1967,7 @@ mod tests { r.physical_columns[col_index].decoded().to_int_vec(), expected ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); }; test_top5( @@ -2067,12 +2067,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -2082,7 +2082,7 @@ mod tests { r.physical_columns[col_index].decoded().to_int_vec(), expected ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); }; test_top5_paging6( @@ -2191,7 +2191,7 @@ mod tests { ); let r1_is_drained = r1.is_drained.unwrap(); assert_eq!(r1_is_drained, r2.is_drained.unwrap()); - if r1_is_drained { + if r1_is_drained.stop() { break; } } diff --git a/components/tidb_query_executors/src/projection_executor.rs b/components/tidb_query_executors/src/projection_executor.rs index 962cd8698e5..2e88767ecbe 100644 --- a/components/tidb_query_executors/src/projection_executor.rs +++ b/components/tidb_query_executors/src/projection_executor.rs @@ -183,7 +183,7 @@ mod tests { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -192,13 +192,13 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -222,15 +222,15 @@ mod tests { // | assert_eq!(r.logical_rows.as_slice(), &[]); // | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ cannot infer type assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these logical data: @@ -258,7 +258,7 @@ mod tests { ]), logical_rows: vec![2, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -267,7 +267,7 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -276,7 +276,7 @@ mod tests { ]), logical_rows: vec![1], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -299,12 +299,12 @@ mod tests { r.physical_columns[0].decoded().to_int_vec(), vec![Some(1), Some(1), Some(1), Some(1), Some(1)] ); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.columns_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1]); @@ -313,7 +313,7 @@ mod tests { r.physical_columns[0].decoded().to_int_vec(), vec![Some(1), Some(1)] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -340,12 +340,12 @@ mod tests { r.physical_columns[1].decoded().to_real_vec(), vec![Real::new(7.0).ok(), Real::new(-5.0).ok(), None, None, None] ); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.columns_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1]); @@ -358,7 +358,7 @@ mod tests { r.physical_columns[1].decoded().to_real_vec(), vec![None, None] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// This function returns 1 when the value is even, 0 otherwise. @@ -406,13 +406,13 @@ mod tests { ]), logical_rows: vec![3, 4, 0, 2], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -422,7 +422,7 @@ mod tests { ]), logical_rows: vec![0], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -452,17 +452,17 @@ mod tests { r.physical_columns[1].decoded().to_int_vec(), vec![Some(0), Some(1), Some(0), Some(1)] ); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(r.logical_rows, &[0]); assert_eq!(r.physical_columns[0].decoded().to_int_vec(), vec![None]); assert_eq!(r.physical_columns[1].decoded().to_int_vec(), vec![Some(1)]); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -497,7 +497,7 @@ mod tests { ]), logical_rows: vec![1, 3, 4, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -506,7 +506,7 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 3093b9bb24b..60857dda80d 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -28,7 +28,7 @@ use tipb::{ }; use super::{ - interface::{BatchExecutor, ExecuteStats}, + interface::{BatchExecIsDrain, BatchExecutor, ExecuteStats}, *, }; @@ -506,13 +506,13 @@ impl BatchExecutorsRunner { record_all += record_len; } - if drained || self.paging_size.map_or(false, |p| record_all >= p as usize) { + if drained.stop() || self.paging_size.map_or(false, |p| record_all >= p as usize) { self.out_most_executor .collect_exec_stats(&mut self.exec_stats); - - let range = if drained { + let range = if drained == BatchExecIsDrain::Drain { None } else { + // It's not allowed to stop paging when BatchExecIsDrain::PagingDrain. self.paging_size .map(|_| self.out_most_executor.take_scanned_range()) }; @@ -583,7 +583,7 @@ impl BatchExecutorsRunner { .mut_rows_data() .extend_from_slice(current_chunk.get_rows_data()); record_len += len; - is_drained = drained; + is_drained = drained.stop(); } if !is_drained || record_len > 0 { @@ -617,7 +617,7 @@ impl BatchExecutorsRunner { chunk: &mut Chunk, warnings: &mut EvalWarnings, ctx: &mut EvalContext, - ) -> Result<(bool, usize)> { + ) -> Result<(BatchExecIsDrain, usize)> { let mut record_len = 0; self.deadline.check()?; diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index 60459229f4f..bd65547109d 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -237,7 +237,7 @@ mod tests { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -246,13 +246,13 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -276,15 +276,15 @@ mod tests { // | assert_eq!(r.logical_rows.as_slice(), &[]); // | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ cannot infer type assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these logical data: @@ -312,7 +312,7 @@ mod tests { ]), logical_rows: vec![2, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -321,7 +321,7 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -330,7 +330,7 @@ mod tests { ]), logical_rows: vec![1], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -364,15 +364,15 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[2, 0]); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[1]); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } } @@ -390,15 +390,15 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// This function returns 1 when the value is even, 0 otherwise. @@ -446,13 +446,13 @@ mod tests { ]), logical_rows: vec![3, 4, 0, 2], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -462,7 +462,7 @@ mod tests { ]), logical_rows: vec![0], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -484,15 +484,15 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[3, 0]); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -509,15 +509,15 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 2]); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Tests the scenario that there are multiple predicates. Only the row that @@ -547,15 +547,15 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } } @@ -582,15 +582,15 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } } @@ -626,7 +626,7 @@ mod tests { ]), logical_rows: vec![1, 3, 4, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -635,7 +635,7 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); diff --git a/components/tidb_query_executors/src/simple_aggr_executor.rs b/components/tidb_query_executors/src/simple_aggr_executor.rs index b6717a40fb5..e2138394d99 100644 --- a/components/tidb_query_executors/src/simple_aggr_executor.rs +++ b/components/tidb_query_executors/src/simple_aggr_executor.rs @@ -214,10 +214,10 @@ impl AggregationExecutorImpl for SimpleAggregationImpl fn iterate_available_groups( &mut self, entities: &mut Entities, - src_is_drained: bool, + src_is_drained: BatchExecIsDrain, mut iteratee: impl FnMut(&mut Entities, &[Box]) -> Result<()>, ) -> Result> { - assert!(src_is_drained); + assert!(src_is_drained.stop()); if self.has_input_rows { iteratee(entities, &self.states)?; } @@ -465,11 +465,11 @@ mod tests { // The scan rows parameter has no effect for mock executor. We don't care. let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); @@ -502,7 +502,7 @@ mod tests { r.physical_columns[11].decoded().to_real_vec(), &[Real::new(12.0).ok()] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -553,11 +553,11 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); @@ -586,7 +586,7 @@ mod tests { r.physical_columns[9].decoded().to_real_vec(), &[Real::new(8.5).ok()] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -629,13 +629,13 @@ mod tests { )]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -671,11 +671,11 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } } diff --git a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs index ee076b652a7..a086e574506 100644 --- a/components/tidb_query_executors/src/slow_hash_aggr_executor.rs +++ b/components/tidb_query_executors/src/slow_hash_aggr_executor.rs @@ -435,10 +435,10 @@ impl AggregationExecutorImpl for SlowHashAggregationImp fn iterate_available_groups( &mut self, entities: &mut Entities, - src_is_drained: bool, + src_is_drained: BatchExecIsDrain, mut iteratee: impl FnMut(&mut Entities, &[Box]) -> Result<()>, ) -> Result> { - assert!(src_is_drained); + assert!(src_is_drained.stop()); let number_of_groups = self.groups.len(); let mut group_by_columns: Vec<_> = self @@ -577,12 +577,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let mut r = block_on(exec.next_batch(1)); // col_4 (sort_key), col_0 + 1 can result in: diff --git a/components/tidb_query_executors/src/stream_aggr_executor.rs b/components/tidb_query_executors/src/stream_aggr_executor.rs index d8a0599bf87..7ec683affa0 100644 --- a/components/tidb_query_executors/src/stream_aggr_executor.rs +++ b/components/tidb_query_executors/src/stream_aggr_executor.rs @@ -349,10 +349,10 @@ impl AggregationExecutorImpl for BatchStreamAggregation fn iterate_available_groups( &mut self, entities: &mut Entities, - src_is_drained: bool, + src_is_drained: BatchExecIsDrain, mut iteratee: impl FnMut(&mut Entities, &[Box]) -> Result<()>, ) -> Result> { - let number_of_groups = if src_is_drained { + let number_of_groups = if src_is_drained.stop() { AggregationExecutorImpl::::groups_len(self) } else { // don't include the partial group @@ -518,7 +518,7 @@ mod tests { assert_eq!(&r.logical_rows, &[0, 1]); assert_eq!(r.physical_columns.rows_len(), 2); assert_eq!(r.physical_columns.columns_len(), 5); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); // COUNT assert_eq!( r.physical_columns[0].decoded().to_int_vec(), @@ -548,13 +548,13 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 5); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); // COUNT assert_eq!(r.physical_columns[0].decoded().to_int_vec(), &[Some(5)]); // AVG_COUNT @@ -602,7 +602,7 @@ mod tests { assert_eq!(&r.logical_rows, &[0, 1]); assert_eq!(r.physical_columns.rows_len(), 2); assert_eq!(r.physical_columns.columns_len(), 2); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); // col_0 assert_eq!( r.physical_columns[0].decoded().to_bytes_vec(), @@ -617,13 +617,13 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0]); assert_eq!(r.physical_columns.rows_len(), 1); assert_eq!(r.physical_columns.columns_len(), 2); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); // col_0 assert_eq!( r.physical_columns[0].decoded().to_bytes_vec(), @@ -691,7 +691,7 @@ mod tests { ]), logical_rows: vec![3, 1, 4, 2, 6], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -702,7 +702,7 @@ mod tests { ]), logical_rows: vec![2], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -713,7 +713,7 @@ mod tests { ]), logical_rows: (0..2).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index 4397869fcaa..fa05071e8bd 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -723,7 +723,7 @@ mod tests { let expect_rows = *expect_rows; let expect_drained = start_row + expect_rows > total_rows; let result = block_on(executor.next_batch(expect_rows)); - assert_eq!(*result.is_drained.as_ref().unwrap(), expect_drained); + assert_eq!(result.is_drained.as_ref().unwrap().stop(), expect_drained); if expect_drained { // all remaining rows are fetched helper.expect_table_values( @@ -1286,7 +1286,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert_eq!(result.is_drained.unwrap(), true); + assert!(result.is_drained.unwrap().stop()); assert_eq!(result.logical_rows.len(), 1); assert_eq!(result.physical_columns.columns_len(), columns_is_pk.len()); for i in 0..columns_is_pk.len() { @@ -1394,7 +1394,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert_eq!(result.is_drained.unwrap(), true); + assert!(result.is_drained.unwrap().stop()); assert_eq!(result.logical_rows.len(), 1); // We expect we fill the primary column with the value embedded in the common @@ -1575,7 +1575,7 @@ mod tests { .unwrap(); let mut result = block_on(executor.next_batch(10)); - assert_eq!(result.is_drained.unwrap(), true); + assert!(result.is_drained.unwrap().stop()); if !columns_info.is_empty() { assert_eq!(result.logical_rows.len(), 1); } diff --git a/components/tidb_query_executors/src/top_n_executor.rs b/components/tidb_query_executors/src/top_n_executor.rs index dd6b7be2dba..670b0e0a879 100644 --- a/components/tidb_query_executors/src/top_n_executor.rs +++ b/components/tidb_query_executors/src/top_n_executor.rs @@ -194,7 +194,7 @@ impl BatchTopNExecutor { self.process_batch_input(src_result.physical_columns, src_result.logical_rows)?; } - if src_is_drained { + if src_is_drained.stop() { Ok(Some(self.heap.take_all())) } else { Ok(None) @@ -268,7 +268,7 @@ impl BatchExecutor for BatchTopNExecutor { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }; } @@ -298,14 +298,14 @@ impl BatchExecutor for BatchTopNExecutor { physical_columns: logical_columns, logical_rows, warnings: self.context.take_warnings(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), } } Ok(None) => BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: self.context.take_warnings(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, } } @@ -352,7 +352,7 @@ mod tests { )]), logical_rows: (0..1).collect(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }], ); @@ -369,7 +369,7 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -383,13 +383,13 @@ mod tests { )]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ); @@ -407,11 +407,11 @@ mod tests { let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these data: @@ -453,7 +453,7 @@ mod tests { ]), logical_rows: vec![3, 0, 1], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -463,7 +463,7 @@ mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -487,7 +487,7 @@ mod tests { ]), logical_rows: vec![1, 2, 0, 4], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -529,12 +529,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); @@ -560,7 +560,7 @@ mod tests { Real::new(4.0).ok() ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -599,12 +599,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4, 5, 6]); @@ -630,7 +630,7 @@ mod tests { Real::new(4.0).ok() ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -682,12 +682,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -711,7 +711,7 @@ mod tests { Real::new(4.0).ok() ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these data: @@ -763,13 +763,13 @@ mod tests { ]), logical_rows: vec![2, 1, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -803,7 +803,7 @@ mod tests { ]), logical_rows: vec![0, 1, 2, 3], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -846,12 +846,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -887,7 +887,7 @@ mod tests { Some(b"aa".to_vec()), ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } #[test] @@ -927,12 +927,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -968,7 +968,7 @@ mod tests { Some(b"aa".to_vec()), ] ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } /// Builds an executor that will return these data: @@ -1024,13 +1024,13 @@ mod tests { ]), logical_rows: vec![2, 1, 0], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::empty(), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -1064,7 +1064,7 @@ mod tests { ]), logical_rows: vec![2, 1, 0, 3], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -1088,12 +1088,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -1103,7 +1103,7 @@ mod tests { r.physical_columns[col_index].decoded().to_int_vec(), expected ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); }; test_top5( @@ -1202,12 +1202,12 @@ mod tests { let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert!(r.logical_rows.is_empty()); assert_eq!(r.physical_columns.rows_len(), 0); - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); let r = block_on(exec.next_batch(1)); assert_eq!(&r.logical_rows, &[0, 1, 2, 3, 4]); @@ -1217,7 +1217,7 @@ mod tests { r.physical_columns[col_index].decoded().to_int_vec(), expected ); - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); }; test_top5_paging6( @@ -1325,7 +1325,7 @@ mod tests { ); let r1_is_drained = r1.is_drained.unwrap(); assert_eq!(r1_is_drained, r2.is_drained.unwrap()); - if r1_is_drained { + if r1_is_drained.stop() { break; } } diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index ceb9949f83b..0535e8dbd83 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -86,7 +86,7 @@ pub trait AggregationExecutorImpl: Send { fn iterate_available_groups( &mut self, entities: &mut Entities, - src_is_drained: bool, + src_is_drained: BatchExecIsDrain, iteratee: impl FnMut(&mut Entities, &[Box]) -> Result<()>, ) -> Result>; @@ -203,7 +203,9 @@ impl> AggregationExecutor Result<(Option, bool)> { + async fn handle_next_batch( + &mut self, + ) -> Result<(Option, BatchExecIsDrain)> { // Use max batch size from the beginning because aggregation // always needs to calculate over all data. let src_result = self @@ -231,16 +233,16 @@ impl> AggregationExecutor= required_row as usize { - src_is_drained = true + src_is_drained = BatchExecIsDrain::PagingDrain; } // StreamAgg will return groups_len - 1 rows immediately - if !src_is_drained && self.imp.is_partial_results_ready() { + if src_is_drained.is_remain() && self.imp.is_partial_results_ready() { self.required_row = Some(required_row + 1 - self.imp.groups_len() as u64) } } // aggregate result is always available when source is drained - let result = if src_is_drained || self.imp.is_partial_results_ready() { + let result = if src_is_drained.stop() || self.imp.is_partial_results_ready() { Some(self.aggregate_partial_results(src_is_drained)?) } else { None @@ -249,7 +251,10 @@ impl> AggregationExecutor Result { + fn aggregate_partial_results( + &mut self, + src_is_drained: BatchExecIsDrain, + ) -> Result { let groups_len = self.imp.groups_len(); let mut all_result_columns: Vec<_> = self .entities @@ -324,7 +329,7 @@ impl> BatchExecutor } } Ok((data, src_is_drained)) => { - self.is_ended = src_is_drained; + self.is_ended = src_is_drained.stop(); let logical_columns = data.unwrap_or_else(LazyBatchColumnVec::empty); let logical_rows = (0..logical_columns.rows_len()).collect(); BatchExecuteResult { @@ -464,7 +469,7 @@ pub mod tests { ]), logical_rows: vec![2, 4, 0, 1], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -476,7 +481,7 @@ pub mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -494,7 +499,7 @@ pub mod tests { ]), logical_rows: vec![1], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -540,7 +545,7 @@ pub mod tests { ]), logical_rows: vec![2, 4, 0, 1], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -549,7 +554,7 @@ pub mod tests { ]), logical_rows: Vec::new(), warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -567,7 +572,7 @@ pub mod tests { ]), logical_rows: vec![1, 2], warnings: EvalWarnings::default(), - is_drained: Ok(false), + is_drained: Ok(BatchExecIsDrain::Remain), }, BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![ @@ -576,7 +581,7 @@ pub mod tests { ]), logical_rows: vec![1, 0], warnings: EvalWarnings::default(), - is_drained: Ok(true), + is_drained: Ok(BatchExecIsDrain::Drain), }, ], ) @@ -651,9 +656,9 @@ pub mod tests { for nth_call in 0..call_num { let r = block_on(exec.next_batch(1)); if nth_call == call_num - 1 { - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } else { - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); } assert_eq!(r.physical_columns.rows_len(), row_num[nth_call]); } @@ -681,9 +686,9 @@ pub mod tests { for nth_call in 0..call_num { let r = block_on(exec.next_batch(1)); if nth_call == call_num - 1 { - assert!(r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().stop()); } else { - assert!(!r.is_drained.unwrap()); + assert!(r.is_drained.unwrap().is_remain()); } assert_eq!(r.physical_columns.rows_len(), row_num[nth_call]); } diff --git a/components/tidb_query_executors/src/util/mock_executor.rs b/components/tidb_query_executors/src/util/mock_executor.rs index a6f11904b33..aee7e526425 100644 --- a/components/tidb_query_executors/src/util/mock_executor.rs +++ b/components/tidb_query_executors/src/util/mock_executor.rs @@ -95,7 +95,11 @@ impl BatchExecutor for MockScanExecutor { self.pos += 1; cur_row_idx += 1; } - let is_drained = self.pos >= self.rows.len(); + let is_drained = if self.pos >= self.rows.len() { + BatchExecIsDrain::Drain + } else { + BatchExecIsDrain::Remain + }; BatchExecuteResult { physical_columns: LazyBatchColumnVec::from(vec![VectorValue::Int(res_col.into())]), logical_rows: res_logical_rows, diff --git a/components/tidb_query_executors/src/util/scan_executor.rs b/components/tidb_query_executors/src/util/scan_executor.rs index 75c7cdc9fe3..be134725de6 100644 --- a/components/tidb_query_executors/src/util/scan_executor.rs +++ b/components/tidb_query_executors/src/util/scan_executor.rs @@ -188,10 +188,17 @@ impl BatchExecutor for ScanExecuto // *successfully* retrieving these rows. After that, if we only consumes // some of the rows (TopN / Limit), we should ignore this error. - match &is_drained { + let is_drained = match is_drained { // Note: `self.is_ended` is only used for assertion purpose. - Err(_) | Ok(true) => self.is_ended = true, - Ok(false) => {} + Err(e) => { + self.is_ended = true; + Err(e) + } + Ok(true) => { + self.is_ended = true; + Ok(BatchExecIsDrain::Drain) + } + Ok(false) => Ok(BatchExecIsDrain::Remain), }; BatchExecuteResult { diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index f292b5220e3..25fd67b9a99 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -388,7 +388,7 @@ impl RowSampleBuilder { res }; let _guard = sample.observe_cpu(); - is_drained = result.is_drained?; + is_drained = result.is_drained?.stop(); let columns_slice = result.physical_columns.as_slice(); let mut column_vals: Vec> = vec![vec![]; self.columns_info.len()]; @@ -887,7 +887,7 @@ impl SampleBuilder { let mut common_handle_fms = FmSketch::new(self.max_fm_sketch_size); while !is_drained { let result = self.data.next_batch(BATCH_MAX_SIZE).await; - is_drained = result.is_drained?; + is_drained = result.is_drained?.stop(); let mut columns_slice = result.physical_columns.as_slice(); let mut columns_info = &self.columns_info[..]; diff --git a/tests/benches/coprocessor_executors/util/bencher.rs b/tests/benches/coprocessor_executors/util/bencher.rs index 246510f991b..4b4734f3038 100644 --- a/tests/benches/coprocessor_executors/util/bencher.rs +++ b/tests/benches/coprocessor_executors/util/bencher.rs @@ -64,7 +64,7 @@ impl E> Bencher for BatchNextAllBencher { loop { let r = block_on(executor.next_batch(1024)); black_box(&r); - if r.is_drained.unwrap() { + if r.is_drained.unwrap().stop() { break; } } diff --git a/tests/benches/coprocessor_executors/util/fixture.rs b/tests/benches/coprocessor_executors/util/fixture.rs index 24062c7a2da..e3306d3e0ed 100644 --- a/tests/benches/coprocessor_executors/util/fixture.rs +++ b/tests/benches/coprocessor_executors/util/fixture.rs @@ -314,7 +314,11 @@ impl BatchExecutor for BatchFixtureExecutor { physical_columns, logical_rows, warnings: EvalWarnings::default(), - is_drained: Ok(self.columns[0].is_empty()), + is_drained: Ok(if self.columns[0].is_empty() { + BatchExecIsDrain::Drain + } else { + BatchExecIsDrain::Remain + }), } } diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index b3a6bf76c01..d7f6540a3c6 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -261,6 +261,16 @@ fn test_paging_scan() { let resp = handle_request(&endpoint, req); assert!(resp.range.is_none()); assert!(resp.range.is_none()); + + let agg_req = DagSelect::from(&product) + .count(&product["count"]) + .group_by(&[&product["name"]]) + .output_offsets(Some(vec![0, 1])) + .desc(desc) + .paging_size(2) + .build(); + let resp = handle_request(&endpoint, agg_req); + assert!(resp.range.is_some()); } } From 1208135d166ef91faa3997119604ca8290452380 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 2 Mar 2023 09:13:09 +0800 Subject: [PATCH 0558/1149] file_system: initialize io stats sentinel on thread start (#14319) ref tikv/tikv#10867 Fix thread I/O not monitored if I/O type isn't set. Signed-off-by: tabokie --- components/file_system/src/io_stats/proc.rs | 63 +++++++++++++++++++++ components/tikv_util/src/sys/thread.rs | 14 +++++ components/tikv_util/src/yatp_pool/mod.rs | 1 + 3 files changed, 78 insertions(+) diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index 652fe05c658..fca0f6a64b1 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -138,6 +138,12 @@ pub fn init() -> Result<(), String> { ThreadId::current() .fetch_io_bytes() .map_err(|e| format!("failed to fetch I/O bytes from proc: {}", e))?; + // Manually initialize the sentinel so that `fetch_io_bytes` doesn't miss any + // thread. + LOCAL_IO_STATS.get_or(|| CachePadded::new(Mutex::new(LocalIoStats::current()))); + tikv_util::sys::thread::hook_thread_start(Box::new(|| { + LOCAL_IO_STATS.get_or(|| CachePadded::new(Mutex::new(LocalIoStats::current()))); + })); Ok(()) } @@ -179,11 +185,13 @@ mod tests { use std::{ io::{Read, Write}, os::unix::fs::OpenOptionsExt, + sync::mpsc, }; use libc::O_DIRECT; use maligned::{AsBytes, AsBytesMut, A512}; use tempfile::{tempdir, tempdir_in}; + use tikv_util::sys::thread::StdThreadBuildWrapper; use super::*; use crate::{OpenOptions, WithIoType}; @@ -243,6 +251,61 @@ mod tests { } } + #[test] + fn test_fetch_all_io_bytes() { + let tmp = tempdir_in("/var/tmp").unwrap_or_else(|_| tempdir().unwrap()); + + init().unwrap(); + + let file_path = tmp.path().join("test_fetch_all_io_bytes_1.txt"); + let (tx1, rx1) = mpsc::sync_channel(0); + let t1 = std::thread::Builder::new() + .spawn_wrapper(move || { + set_io_type(IoType::ForegroundWrite); + let mut f = OpenOptions::new() + .write(true) + .create(true) + .custom_flags(O_DIRECT) + .open(file_path) + .unwrap(); + let w = vec![A512::default(); 8]; + f.write_all(w.as_bytes()).unwrap(); + f.sync_all().unwrap(); + tx1.send(()).unwrap(); + tx1.send(()).unwrap(); + }) + .unwrap(); + + let file_path = tmp.path().join("test_fetch_all_io_bytes_2.txt"); + let (tx2, rx2) = mpsc::sync_channel(0); + let t2 = std::thread::Builder::new() + .spawn_wrapper(move || { + let mut f = OpenOptions::new() + .write(true) + .create(true) + .custom_flags(O_DIRECT) + .open(file_path) + .unwrap(); + let w = vec![A512::default(); 8]; + f.write_all(w.as_bytes()).unwrap(); + f.sync_all().unwrap(); + tx2.send(()).unwrap(); + tx2.send(()).unwrap(); + }) + .unwrap(); + + rx1.recv().unwrap(); + rx2.recv().unwrap(); + let bytes = fetch_io_bytes(); + assert_eq!(bytes[IoType::ForegroundWrite as usize].write, 4096); + assert_eq!(bytes[IoType::Other as usize].write, 4096); + + rx1.recv().unwrap(); + rx2.recv().unwrap(); + t1.join().unwrap(); + t2.join().unwrap(); + } + #[bench] fn bench_fetch_thread_io_bytes(b: &mut test::Bencher) { let mut id = ThreadId::current(); diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index 60c420661d0..1f138669b96 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -384,6 +384,17 @@ pub trait ThreadBuildWrapper { lazy_static::lazy_static! { pub static ref THREAD_NAME_HASHMAP: Mutex> = Mutex::new(HashMap::default()); + pub static ref THREAD_START_HOOKS: Mutex>> = Mutex::new(Vec::new()); +} + +pub fn hook_thread_start(f: Box) { + THREAD_START_HOOKS.lock().unwrap().push(f); +} + +pub(crate) fn call_thread_start_hooks() { + for f in THREAD_START_HOOKS.lock().unwrap().iter() { + f(); + } } pub(crate) fn add_thread_name_to_map() { @@ -411,6 +422,7 @@ impl StdThreadBuildWrapper for std::thread::Builder { { #[allow(clippy::disallowed_methods)] self.spawn(|| { + call_thread_start_hooks(); add_thread_name_to_map(); let res = f(); remove_thread_name_from_map(); @@ -426,6 +438,7 @@ impl ThreadBuildWrapper for tokio::runtime::Builder { { #[allow(clippy::disallowed_methods)] self.on_thread_start(move || { + call_thread_start_hooks(); add_thread_name_to_map(); f(); }) @@ -450,6 +463,7 @@ impl ThreadBuildWrapper for futures::executor::ThreadPoolBuilder { { #[allow(clippy::disallowed_methods)] self.after_start(move |_| { + call_thread_start_hooks(); add_thread_name_to_map(); f(); }) diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 05c245bd5a3..930185a1440 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -172,6 +172,7 @@ impl Runner for YatpPoolRunner { type TaskCell = TaskCell; fn start(&mut self, local: &mut Local) { + crate::sys::thread::call_thread_start_hooks(); crate::sys::thread::add_thread_name_to_map(); if let Some(props) = self.props.take() { crate::thread_group::set_properties(Some(props)); From b050f07c403d6bf63ea54b56e8adc0c542acb1fd Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 2 Mar 2023 09:43:10 +0800 Subject: [PATCH 0559/1149] integration test v2: introduce TestRaftKv2 (#14300) ref tikv/tikv#12842 introduce TestRaftKv2 Signed-off-by: SpadeA-Tang Co-authored-by: Xinye Tao --- components/test_raftstore-v2/src/cluster.rs | 106 +++++++++- components/test_raftstore-v2/src/server.rs | 185 ++++++++++++++++-- .../src/transport_simulate.rs | 4 + components/test_raftstore-v2/src/util.rs | 24 ++- components/test_raftstore/src/cluster.rs | 12 ++ src/server/mod.rs | 2 +- src/server/raftkv2/mod.rs | 1 + tests/failpoints/cases/test_conf_change.rs | 32 +-- tests/failpoints/cases/test_snap.rs | 15 +- tests/integrations/raftstore/test_snap.rs | 97 +-------- 10 files changed, 348 insertions(+), 130 deletions(-) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 30d3456d652..164794aca56 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -27,7 +27,7 @@ use kvproto::{ AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RegionDetailResponse, Request, Response, StatusCmdType, }, - raft_serverpb::{PeerState, RaftApplyState, RegionLocalState, StoreIdent}, + raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, }; use pd_client::PdClient; use raftstore::{ @@ -53,8 +53,13 @@ use test_raftstore::{ }; use tikv::server::Result as ServerResult; use tikv_util::{ - box_err, box_try, debug, error, safe_panic, thread_group::GroupProperties, time::Instant, - timer::GLOBAL_TIMER_HANDLE, warn, worker::LazyWorker, HandyRwLock, + box_err, box_try, debug, error, safe_panic, + thread_group::GroupProperties, + time::{Instant, ThreadReadId}, + timer::GLOBAL_TIMER_HANDLE, + warn, + worker::LazyWorker, + HandyRwLock, }; use crate::create_test_engine; @@ -314,6 +319,17 @@ impl Cluster { self.cfg.server.cluster_id } + pub fn flush_data(&self) { + for reg in self.tablet_registries.values() { + reg.for_each_opened_tablet(|_, cached| -> bool { + if let Some(tablet) = cached.latest() { + tablet.flush_cf(CF_DEFAULT, true /* sync */).unwrap(); + } + true + }); + } + } + // Bootstrap the store with fixed ID (like 1, 2, .. 5) and // initialize first region in all stores, then start the cluster. pub fn run(&mut self) { @@ -569,6 +585,22 @@ impl Cluster { ) } + pub fn read( + &self, + // v2 does not need this + _batch_id: Option, + request: RaftCmdRequest, + timeout: Duration, + ) -> Result { + match self.sim.wl().read(request.clone(), timeout) { + Err(e) => { + warn!("failed to read {:?}: {:?}", request, e); + Err(e) + } + a => a, + } + } + // mixed read and write requests are not supportted pub fn call_command( &mut self, @@ -1111,10 +1143,18 @@ impl Cluster { self.sim.wl().add_send_filter(node_id, filter); } + pub fn clear_send_filter_on_node(&mut self, node_id: u64) { + self.sim.wl().clear_send_filters(node_id); + } + pub fn add_recv_filter_on_node(&mut self, node_id: u64, filter: Box) { self.sim.wl().add_recv_filter(node_id, filter); } + pub fn clear_recv_filter_on_node(&mut self, node_id: u64) { + self.sim.wl().clear_recv_filters(node_id); + } + pub fn add_send_filter(&self, factory: F) { let mut sim = self.sim.wl(); for node_id in sim.get_node_ids() { @@ -1312,6 +1352,10 @@ impl Cluster { self.sim.rl().get_snap_dir(node_id) } + pub fn get_router(&self, node_id: u64) -> Option> { + self.sim.rl().get_router(node_id) + } + pub fn refresh_region_bucket_keys( &mut self, _region: &metapb::Region, @@ -1330,6 +1374,58 @@ impl Cluster { unimplemented!() } + pub fn wait_tombstone(&self, region_id: u64, peer: metapb::Peer, check_exist: bool) { + let timer = Instant::now(); + let mut state; + loop { + state = self.region_local_state(region_id, peer.get_store_id()); + if state.get_state() == PeerState::Tombstone + && (!check_exist || state.get_region().get_peers().contains(&peer)) + { + return; + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + break; + } + thread::sleep(Duration::from_millis(10)); + } + panic!( + "{:?} is still not gc in region {} {:?}", + peer, region_id, state + ); + } + + pub fn wait_destroy_and_clean(&self, region_id: u64, peer: metapb::Peer) { + let timer = Instant::now(); + self.wait_tombstone(region_id, peer.clone(), false); + let mut state; + loop { + state = self.get_raft_local_state(region_id, peer.get_store_id()); + if state.is_none() { + return; + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + break; + } + thread::sleep(Duration::from_millis(10)); + } + panic!( + "{:?} is still not cleaned in region {} {:?}", + peer, region_id, state + ); + } + + pub fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { + self.get_engine(store_id) + .get_region_state(region_id) + .unwrap() + .unwrap() + } + + pub fn get_raft_local_state(&self, region_id: u64, store_id: u64) -> Option { + self.get_engine(store_id).get_raft_local_state(region_id) + } + pub fn shutdown(&mut self) { debug!("about to shutdown cluster"); let keys = match self.sim.read() { @@ -1427,6 +1523,10 @@ impl WrapFactory { pub fn get_apply_state(&self, region_id: u64) -> engine_traits::Result> { self.raft_engine.get_apply_state(region_id, u64::MAX) } + + pub fn get_raft_local_state(&self, region_id: u64) -> Option { + self.raft_engine.get_raft_state(region_id).unwrap() + } } impl Peekable for WrapFactory { diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 1aa3bfc47f8..dbcede48a6a 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -25,6 +25,7 @@ use kvproto::{ kvrpcpb::{ApiVersion, Context}, metapb, raft_cmdpb::RaftCmdResponse, + raft_serverpb::RaftMessage, tikvpb_grpc::TikvClient, }; use pd_client::PdClient; @@ -32,8 +33,8 @@ use raftstore::{ coprocessor::CoprocessorHost, errors::Error as RaftError, store::{ - AutoSplitController, CheckLeaderRunner, FlowStatsReporter, ReadStats, RegionSnapshot, - TabletSnapManager, WriteStats, + region_meta, AutoSplitController, CheckLeaderRunner, FlowStatsReporter, ReadStats, + RegionSnapshot, TabletSnapManager, WriteStats, }, RegionInfoAccessor, }; @@ -44,7 +45,7 @@ use security::SecurityManager; use slog_global::debug; use tempfile::TempDir; use test_pd_client::TestPdClient; -use test_raftstore::{AddressMap, Config}; +use test_raftstore::{filter_send, AddressMap, Config, Filter}; use tikv::{ coprocessor, coprocessor_v2, import::SstImporter, @@ -52,12 +53,12 @@ use tikv::{ server::{ gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, raftkv::ReplicaReadLockChecker, resolve, service::DiagnosticsService, ConnectionBuilder, - Error, NodeV2, PdStoreAddrResolver, RaftClient, RaftKv2, Result as ServerResult, Server, - ServerTransport, + Error, Extension, NodeV2, PdStoreAddrResolver, RaftClient, RaftKv2, Result as ServerResult, + Server, ServerTransport, }, storage::{ self, - kv::{FakeExtension, SnapContext}, + kv::{FakeExtension, RaftExtension, SnapContext}, txn::flow_controller::{EngineFlowController, FlowController}, Engine, Storage, }, @@ -84,16 +85,159 @@ impl FlowStatsReporter for DummyReporter { fn report_write_stats(&self, _write_stats: WriteStats) {} } -type SimulateRaftExtension = ::RaftExtension; +type SimulateRaftExtension = ::RaftExtension; type SimulateStoreTransport = SimulateTransport>; type SimulateServerTransport = SimulateTransport>; pub type SimulateEngine = RaftKv2; +// TestRaftKvv2 behaves the same way with RaftKv2, except that it has filters +// that can mock various network conditions. +#[derive(Clone)] +pub struct TestRaftKv2 { + raftkv: SimulateEngine, + filters: Arc>>>, +} + +impl TestRaftKv2 { + pub fn new(raftkv: SimulateEngine, filters: Arc>>>) -> TestRaftKv2 { + TestRaftKv2 { raftkv, filters } + } + + pub fn set_txn_extra_scheduler(&mut self, txn_extra_scheduler: Arc) { + self.raftkv.set_txn_extra_scheduler(txn_extra_scheduler); + } +} + +impl Engine for TestRaftKv2 { + type Snap = RegionSnapshot<::Snapshot>; + type Local = RocksEngine; + + fn kv_engine(&self) -> Option { + self.raftkv.kv_engine() + } + + type RaftExtension = TestExtension; + fn raft_extension(&self) -> Self::RaftExtension { + TestExtension::new(self.raftkv.raft_extension(), self.filters.clone()) + } + + fn modify_on_kv_engine( + &self, + region_modifies: HashMap>, + ) -> storage::kv::Result<()> { + self.raftkv.modify_on_kv_engine(region_modifies) + } + + type SnapshotRes = ::SnapshotRes; + fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes { + self.raftkv.async_snapshot(ctx) + } + + type WriteRes = ::WriteRes; + fn async_write( + &self, + ctx: &Context, + batch: storage::kv::WriteData, + subscribed: u8, + on_applied: Option, + ) -> Self::WriteRes { + self.raftkv.async_write(ctx, batch, subscribed, on_applied) + } + + #[inline] + fn precheck_write_with_ctx(&self, ctx: &Context) -> storage::kv::Result<()> { + self.raftkv.precheck_write_with_ctx(ctx) + } + + #[inline] + fn schedule_txn_extra(&self, txn_extra: txn_types::TxnExtra) { + self.raftkv.schedule_txn_extra(txn_extra) + } +} + +#[derive(Clone)] +pub struct TestExtension { + extension: Extension, + filters: Arc>>>, +} + +impl TestExtension { + pub fn new( + extension: Extension, + filters: Arc>>>, + ) -> Self { + TestExtension { extension, filters } + } +} + +impl RaftExtension for TestExtension { + fn feed(&self, msg: RaftMessage, key_message: bool) { + let send = |msg| -> raftstore::Result<()> { + self.extension.feed(msg, key_message); + Ok(()) + }; + + let _ = filter_send(&self.filters, msg, send); + } + + #[inline] + fn report_reject_message(&self, region_id: u64, from_peer_id: u64) { + self.extension + .report_reject_message(region_id, from_peer_id) + } + + #[inline] + fn report_peer_unreachable(&self, region_id: u64, to_peer_id: u64) { + self.extension + .report_peer_unreachable(region_id, to_peer_id) + } + + #[inline] + fn report_store_unreachable(&self, store_id: u64) { + self.extension.report_store_unreachable(store_id) + } + + #[inline] + fn report_snapshot_status( + &self, + region_id: u64, + to_peer_id: u64, + status: raft::SnapshotStatus, + ) { + self.extension + .report_snapshot_status(region_id, to_peer_id, status) + } + + #[inline] + fn report_resolved(&self, store_id: u64, group_id: u64) { + self.extension.report_resolved(store_id, group_id) + } + + #[inline] + fn split( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + split_keys: Vec>, + source: String, + ) -> futures::future::BoxFuture<'static, storage::kv::Result>> { + self.extension + .split(region_id, region_epoch, split_keys, source) + } + + fn query_region( + &self, + region_id: u64, + ) -> futures::future::BoxFuture<'static, storage::kv::Result> { + self.extension.query_region(region_id) + } +} + pub struct ServerMeta { node: NodeV2, - server: Server, + server: Server, sim_router: SimulateStoreTransport, sim_trans: SimulateServerTransport, raw_router: StoreRouter, @@ -105,7 +249,7 @@ type PendingServices = Vec Service>>; pub struct ServerCluster { metas: HashMap, addrs: AddressMap, - pub storages: HashMap, + pub storages: HashMap, pub region_info_accessors: HashMap, snap_paths: HashMap, snap_mgrs: HashMap, @@ -229,9 +373,10 @@ impl ServerCluster { let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); let sim_router = SimulateTransport::new(raft_router.clone()); - // todo(SpadeA): simulate transport - let mut raft_kv_v2 = - RaftKv2::new(raft_router.clone(), region_info_accessor.region_leaders()); + let mut raft_kv_v2 = TestRaftKv2::new( + RaftKv2::new(raft_router.clone(), region_info_accessor.region_leaders()), + sim_router.filters().clone(), + ); // Create storage. let pd_worker = LazyWorker::new("test-pd-worker"); @@ -544,12 +689,20 @@ impl Simulator for ServerCluster { .clear_filters(); } - fn add_recv_filter(&mut self, _node_id: u64, _filter: Box) { - unimplemented!() + fn add_recv_filter(&mut self, node_id: u64, filter: Box) { + self.metas + .get_mut(&node_id) + .unwrap() + .sim_router + .add_filter(filter); } - fn clear_recv_filters(&mut self, _node_id: u64) { - unimplemented!() + fn clear_recv_filters(&mut self, node_id: u64) { + self.metas + .get_mut(&node_id) + .unwrap() + .sim_router + .clear_filters(); } fn run_node( diff --git a/components/test_raftstore-v2/src/transport_simulate.rs b/components/test_raftstore-v2/src/transport_simulate.rs index b55c29dbd3a..9c11505d75f 100644 --- a/components/test_raftstore-v2/src/transport_simulate.rs +++ b/components/test_raftstore-v2/src/transport_simulate.rs @@ -42,6 +42,10 @@ impl SimulateTransport { pub fn add_filter(&mut self, filter: Box) { self.filters.wl().push(filter); } + + pub fn filters(&self) -> &Arc>>> { + &self.filters + } } impl Transport for SimulateTransport { diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 2f512982019..d9a0377210b 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -7,11 +7,12 @@ use engine_rocks::{RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; -use kvproto::kvrpcpb::Context; +use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; +use raftstore::Result; use rand::RngCore; use server::server2::ConfiguredRaftEngine; use tempfile::TempDir; -use test_raftstore::{new_put_cf_cmd, Config}; +use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, Config}; use tikv::{ server::KvEngineFactoryBuilder, storage::{ @@ -189,3 +190,22 @@ pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, regio } assert!(snapshot.ext().is_max_ts_synced()); } + +// Issue a read request on the specified peer. +pub fn read_on_peer( + cluster: &mut Cluster, + peer: metapb::Peer, + region: metapb::Region, + key: &[u8], + read_quorum: bool, + timeout: Duration, +) -> Result { + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(key)], + read_quorum, + ); + request.mut_header().set_peer(peer); + cluster.read(None, request, timeout) +} diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index d4668fe4928..2a73f5e239c 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1339,6 +1339,10 @@ impl Cluster { self.sim.wl().add_send_filter(node_id, filter); } + pub fn clear_send_filter_on_node(&mut self, node_id: u64) { + self.sim.wl().clear_send_filters(node_id); + } + pub fn add_recv_filter_on_node(&mut self, node_id: u64, filter: Box) { self.sim.wl().add_recv_filter(node_id, filter); } @@ -1352,6 +1356,10 @@ impl Cluster { } } + pub fn clear_recv_filter_on_node(&mut self, node_id: u64) { + self.sim.wl().clear_recv_filters(node_id); + } + pub fn transfer_leader(&mut self, region_id: u64, leader: metapb::Peer) { let epoch = self.get_region_epoch(region_id); let transfer_leader = new_admin_request(region_id, &epoch, new_transfer_leader_cmd(leader)); @@ -1830,6 +1838,10 @@ impl Cluster { ctx } + pub fn get_router(&self, node_id: u64) -> Option> { + self.sim.rl().get_router(node_id) + } + pub fn refresh_region_bucket_keys( &mut self, region: &metapb::Region, diff --git a/src/server/mod.rs b/src/server/mod.rs index 0bb6da62ac7..773e2040f17 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -36,7 +36,7 @@ pub use self::{ proxy::{build_forward_option, get_target_address, Proxy}, raft_client::{ConnectionBuilder, RaftClient}, raftkv::RaftKv, - raftkv2::{NodeV2, RaftKv2}, + raftkv2::{Extension, NodeV2, RaftKv2}, resolve::{PdStoreAddrResolver, StoreAddrResolver}, server::{Server, GRPC_THREAD_PREFIX}, transport::ServerTransport, diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 9fb4ef70b03..60e0a53a20a 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -15,6 +15,7 @@ use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; use futures::{Future, Stream, StreamExt}; use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; pub use node::NodeV2; +pub use raft_extension::Extension; use raftstore::store::{util::encode_start_ts_into_flag_data, RegionSnapshot}; use raftstore_v2::{ router::{ diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index 7821c8be5df..0a1be37cab6 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -11,12 +11,14 @@ use kvproto::raft_serverpb::RaftMessage; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, HandyRwLock}; -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_destroy_local_reader() { // 3 nodes cluster. - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Set election timeout and max leader lease to 1s. configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); @@ -141,10 +143,11 @@ fn test_write_after_destroy() { must_region_cleared(&engines_3, ®ion); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_tick_after_destroy() { // 3 nodes cluster. - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(50); let pd_client = cluster.pd_client.clone(); @@ -186,10 +189,11 @@ fn test_tick_after_destroy() { must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_stale_peer_cache() { // 3 nodes cluster. - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); // Now region 1 only has peer (1, 1); @@ -213,9 +217,10 @@ fn test_stale_peer_cache() { // 6. then peer 3 calling `Raft::apply_conf_change` to add peer 4; // 7. so the disk configuration `[1, 2, 3]` is different from memory // configuration `[1, 2, 3, 4]`. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_redundant_conf_change_by_snapshot() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(5); cluster.cfg.raft_store.merge_max_log_gap = 4; cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); @@ -239,7 +244,7 @@ fn test_redundant_conf_change_by_snapshot() { .direction(Direction::Recv) .msg_type(MessageType::MsgAppend), ); - cluster.sim.wl().add_recv_filter(3, filter); + cluster.add_recv_filter_on_node(3, filter); // propose to remove peer 4, and append more entries to compact raft logs. cluster.pd_client.must_remove_peer(1, new_peer(4, 4)); @@ -247,7 +252,7 @@ fn test_redundant_conf_change_by_snapshot() { sleep_ms(50); // Clear filters on peer 3, so it can receive and restore a snapshot. - cluster.sim.wl().clear_recv_filters(3); + cluster.clear_recv_filter_on_node(3); sleep_ms(100); // Use a filter to capture messages sent from 3 to 4. @@ -264,7 +269,7 @@ fn test_redundant_conf_change_by_snapshot() { .when(Arc::new(AtomicBool::new(false))) .set_msg_callback(cb), ); - cluster.sim.wl().add_send_filter(3, filter); + cluster.add_recv_filter_on_node(3, filter); // Unpause the fail point, so peer 3 can apply the redundant conf change result. fail::cfg("apply_on_conf_change_3_1", "off").unwrap(); @@ -275,9 +280,10 @@ fn test_redundant_conf_change_by_snapshot() { fail::remove("apply_on_conf_change_3_1"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_handle_conf_change_when_apply_fsm_resume_pending_state() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index ca329896df1..64b03f6d0b3 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -15,6 +15,7 @@ use engine_traits::RaftEngineReadOnly; use kvproto::raft_serverpb::RaftMessage; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::*, time::Instant, HandyRwLock}; #[test] @@ -384,9 +385,10 @@ fn test_shutdown_when_snap_gc() { } // Test if a peer handle the old snapshot properly. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_receive_old_snapshot() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; @@ -420,7 +422,7 @@ fn test_receive_old_snapshot() { .msg_type(MessageType::MsgSnapshot) .reserve_dropped(Arc::clone(&dropped_msgs)), ); - cluster.sim.wl().add_recv_filter(2, recv_filter); + cluster.add_recv_filter_on_node(2, recv_filter); cluster.clear_send_filters(); for _ in 0..20 { @@ -440,17 +442,18 @@ fn test_receive_old_snapshot() { std::mem::take(guard.as_mut()) }; - cluster.sim.wl().clear_recv_filters(2); + cluster.clear_recv_filter_on_node(2); for i in 20..40 { cluster.must_put(format!("k{}", i).as_bytes(), b"v1"); } must_get_equal(&cluster.get_engine(2), b"k39", b"v1"); - let router = cluster.sim.wl().get_router(2).unwrap(); + let router = cluster.get_router(2).unwrap(); // Send the old snapshot for raft_msg in msgs { - router.send_raft_message(raft_msg).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(raft_msg.into()).unwrap(); } cluster.must_put(b"k40", b"v1"); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index a69a2216cd4..4d9290b4eff 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -211,55 +211,11 @@ fn test_server_snap_gc() { test_server_snap_gc_internal("5.1.0"); } -/// A helper function for testing the handling of snapshot is correct -/// when there are multiple snapshots which have overlapped region ranges -/// arrive at the same raftstore. -fn test_concurrent_snap(cluster: &mut Cluster) { - cluster.cfg.rocksdb.titan.enabled = true; - // Disable raft log gc in this test case. - cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); - - let pd_client = Arc::clone(&cluster.pd_client); - // Disable default max peer count check. - pd_client.disable_default_operator(); - - let r1 = cluster.run_conf_change(); - cluster.must_put(b"k1", b"v1"); - pd_client.must_add_peer(r1, new_peer(2, 2)); - // Force peer 2 to be followers all the way. - cluster.add_send_filter(CloneFilterFactory( - RegionPacketFilter::new(r1, 2) - .msg_type(MessageType::MsgRequestVote) - .direction(Direction::Send), - )); - cluster.must_transfer_leader(r1, new_peer(1, 1)); - cluster.must_put(b"k3", b"v3"); - // Pile up snapshots of overlapped region ranges and deliver them all at once. - let (tx, rx) = mpsc::channel(); - cluster - .sim - .wl() - .add_recv_filter(3, Box::new(CollectSnapshotFilter::new(tx))); - pd_client.must_add_peer(r1, new_peer(3, 3)); - let region = cluster.get_region(b"k1"); - // Ensure the snapshot of range ("", "") is sent and piled in filter. - if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { - panic!("the snapshot is not sent before split, e: {:?}", e); - } - // Split the region range and then there should be another snapshot for the - // split ranges. - cluster.must_split(®ion, b"k2"); - must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); - // Ensure the regions work after split. - cluster.must_put(b"k11", b"v11"); - must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); - cluster.must_put(b"k4", b"v4"); - must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); -} - #[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] -fn test_node_concurrent_snap() { +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_concurrent_snap() { let mut cluster = new_cluster(0, 3); // Test that the handling of snapshot is correct when there are multiple // snapshots which have overlapped region ranges arrive at the same @@ -303,12 +259,6 @@ fn test_node_concurrent_snap() { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } -#[test] -fn test_server_concurrent_snap() { - let mut cluster = new_server_cluster(0, 3); - test_concurrent_snap(&mut cluster); -} - #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] @@ -389,9 +339,10 @@ impl Filter for StaleSnap { } } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_stale_snap() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // disable compact log to make snapshot only be sent when peer is first added. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); @@ -476,36 +427,10 @@ impl Filter for SnapshotAppendFilter { } } -// todo(SpadeA): to be removed when receive filter is supported on ServerCluster -// V2 -fn test_snapshot_with_append(cluster: &mut Cluster) { - configure_for_snapshot(&mut cluster.cfg); - - let pd_client = Arc::clone(&cluster.pd_client); - // Disable default max peer count check. - pd_client.disable_default_operator(); - cluster.run(); - - // In case of removing leader, let's transfer leader to some node first. - cluster.must_transfer_leader(1, new_peer(1, 1)); - pd_client.must_remove_peer(1, new_peer(4, 4)); - - let (tx, rx) = mpsc::channel(); - cluster - .sim - .wl() - .add_recv_filter(4, Box::new(SnapshotAppendFilter::new(tx))); - pd_client.add_peer(1, new_peer(4, 5)); - rx.recv_timeout(Duration::from_secs(3)).unwrap(); - cluster.must_put(b"k1", b"v1"); - cluster.must_put(b"k2", b"v2"); - let engine4 = cluster.get_engine(4); - must_get_equal(&engine4, b"k1", b"v1"); - must_get_equal(&engine4, b"k2", b"v2"); -} - #[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_node_snapshot_with_append() { let mut cluster = new_cluster(0, 4); configure_for_snapshot(&mut cluster.cfg); @@ -530,12 +455,6 @@ fn test_node_snapshot_with_append() { must_get_equal(&engine4, b"k2", b"v2"); } -#[test] -fn test_server_snapshot_with_append() { - let mut cluster = new_server_cluster(0, 4); - test_snapshot_with_append(&mut cluster); -} - #[test] fn test_inspected_snapshot() { let mut cluster = new_server_cluster(1, 3); From 69dba51a41f1d24a7740e64ad1cb1725a93c29e1 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 2 Mar 2023 13:49:10 +0800 Subject: [PATCH 0560/1149] raftstore-v2: reduce file count (#14318) close tikv/tikv#14306, close tikv/tikv#14316, close tikv/tikv#14324 Compaction guard is disabled in v2, which will use 8MiB for file size. We need set multiplier to reduce sst file count. This PR also fixes a race between region creation and destroy. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 6 +-- components/raftstore-v2/src/fsm/peer.rs | 33 +++++++++++- .../raftstore-v2/src/operation/command/mod.rs | 12 ++--- components/raftstore-v2/src/operation/life.rs | 5 +- components/test_util/src/lib.rs | 36 +++++++++++++ etc/config-template.toml | 7 ++- src/config/mod.rs | 50 ++++++++++++++----- tests/integrations/config/mod.rs | 46 ++++------------- tests/integrations/config/test-custom.toml | 5 ++ 9 files changed, 138 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2bd382ee8f0..ea1ebcfbb3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2862,7 +2862,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b2cd42588ac62e40e297fea56a2286c0c389aade" +source = "git+https://github.com/tikv/rust-rocksdb.git#cd8b60758b46afbbde6fde52fa86a2776b401723" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2881,7 +2881,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#b2cd42588ac62e40e297fea56a2286c0c389aade" +source = "git+https://github.com/tikv/rust-rocksdb.git#cd8b60758b46afbbde6fde52fa86a2776b401723" dependencies = [ "bzip2-sys", "cc", @@ -4799,7 +4799,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b2cd42588ac62e40e297fea56a2286c0c389aade" +source = "git+https://github.com/tikv/rust-rocksdb.git#cd8b60758b46afbbde6fde52fa86a2776b401723" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 814a0b1311a..2c47ab165f2 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -7,6 +7,7 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; +use kvproto::{errorpb, raft_cmdpb::RaftCmdResponse}; use raftstore::store::{Config, TabletSnapManager, Transport}; use slog::{debug, error, info, trace, Logger}; use tikv_util::{ @@ -18,7 +19,7 @@ use tikv_util::{ use crate::{ batch::StoreContext, raft::{Peer, Storage}, - router::{PeerMsg, PeerTick}, + router::{PeerMsg, PeerTick, QueryResult}, Result, }; @@ -335,3 +336,33 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.schedule_pending_ticks(); } } + +impl Drop for PeerFsm { + fn drop(&mut self) { + self.peer_mut().pending_reads_mut().clear_all(None); + + let region_id = self.peer().region_id(); + + let build_resp = || { + let mut err = errorpb::Error::default(); + err.set_message("region is not found".to_owned()); + err.mut_region_not_found().set_region_id(region_id); + let mut resp = RaftCmdResponse::default(); + resp.mut_header().set_error(err); + resp + }; + while let Ok(msg) = self.receiver.try_recv() { + match msg { + // Only these messages need to be responded explicitly as they rely on + // deterministic response. + PeerMsg::RaftQuery(query) => { + query.ch.set_result(QueryResult::Response(build_resp())); + } + PeerMsg::SimpleWrite(w) => { + w.ch.set_result(build_resp()); + } + _ => continue, + } + } + } +} diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index ea8c8c227d0..6cb4460428d 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -300,13 +300,13 @@ impl Peer { committed_time: Instant::now(), }; assert!( - self.apply_scheduler().is_some(), - "apply_scheduler should be something. region_id {}", - self.region_id() + self.apply_scheduler().is_some() || ctx.router.is_shutdown(), + "{} apply_scheduler should not be None", + SlogFormat(&self.logger) ); - self.apply_scheduler() - .unwrap() - .send(ApplyTask::CommittedEntries(apply)); + if let Some(scheduler) = self.apply_scheduler() { + scheduler.send(ApplyTask::CommittedEntries(apply)); + } } #[inline] diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index fdba7efdf4d..9e9cc2f5fc0 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -618,18 +618,21 @@ impl Peer { pub fn finish_destroy(&mut self, ctx: &mut StoreContext) { info!(self.logger, "peer destroyed"); let region_id = self.region_id(); - ctx.router.close(region_id); { let mut meta = ctx.store_meta.lock().unwrap(); meta.remove_region(region_id); meta.readers.remove(®ion_id); ctx.tablet_registry.remove(region_id); } + // Remove tablet first, otherwise in extreme cases, a new peer can be created + // and race on tablet record removal and creation. + ctx.router.close(region_id); if let Some(msg) = self.destroy_progress_mut().finish() { // The message will be dispatched to store fsm, which will create a // new peer. Ignore error as it's just a best effort. let _ = ctx.router.send_raft_message(msg); } + self.pending_reads_mut().clear_all(Some(region_id)); self.clear_apply_scheduler(); } } diff --git a/components/test_util/src/lib.rs b/components/test_util/src/lib.rs index d2096e74c82..453ed7fb7f1 100644 --- a/components/test_util/src/lib.rs +++ b/components/test_util/src/lib.rs @@ -15,6 +15,7 @@ mod security; use std::{ env, + fmt::Debug, sync::atomic::{AtomicU16, Ordering}, thread, }; @@ -118,3 +119,38 @@ pub fn temp_dir(prefix: impl Into>, prefer_mem: bool) -> te _ => builder.tempdir().unwrap(), } } + +/// Compare two structs and provide more helpful debug difference. +#[track_caller] +pub fn assert_eq_debug(lhs: &C, rhs: &C) { + if lhs == rhs { + return; + } + let lhs_str = format!("{:?}", lhs); + let rhs_str = format!("{:?}", rhs); + + fn find_index(l: impl Iterator) -> usize { + let it = l + .enumerate() + .take_while(|(_, (l, r))| l == r) + .filter(|(_, (l, _))| *l == b' '); + let mut last = None; + let mut second = None; + for a in it { + second = last; + last = Some(a); + } + second.map_or(0, |(i, _)| i) + } + let cpl = find_index(lhs_str.bytes().zip(rhs_str.bytes())); + let csl = find_index(lhs_str.bytes().rev().zip(rhs_str.bytes().rev())); + if cpl + csl > lhs_str.len() || cpl + csl > rhs_str.len() { + assert_eq!(lhs, rhs); + } + let lhs_diff = String::from_utf8_lossy(&lhs_str.as_bytes()[cpl..lhs_str.len() - csl]); + let rhs_diff = String::from_utf8_lossy(&rhs_str.as_bytes()[cpl..rhs_str.len() - csl]); + panic!( + "config not matched:\nlhs: ...{}...,\nrhs: ...{}...", + lhs_diff, rhs_diff + ); +} diff --git a/etc/config-template.toml b/etc/config-template.toml index 38082367d40..9b9a81d4106 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -739,8 +739,10 @@ ## Target file size for compaction. ## The SST file size of level-0 is influenced by the compaction algorithm of `write-buffer-size` ## and level0. `target-file-size-base` is used to control the size of a single SST file of level1 to -## level6. +## level6. Each level will have `target-file-size-base * (target-file-size-multiplier ^ (level - 1))`. # target-file-size-base = "8MB" +## In partitioned-raft-kv, the default value of target-file-size-multiplier is 2 for write and default cf. +# target-file-size-multiplier = 1 ## Max bytes for `compaction.max_compaction_bytes`. ## If it's necessary to enlarge value of this entry, it's better to also enlarge `reserve-space` @@ -925,6 +927,7 @@ ## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`. # max-bytes-for-level-base = "512MB" # target-file-size-base = "8MB" +# target-file-size-multiplier = 1 # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 @@ -953,6 +956,7 @@ # min-write-buffer-number-to-merge = 1 # max-bytes-for-level-base = "128MB" # target-file-size-base = "8MB" +# target-file-size-multiplier = 1 # level0-file-num-compaction-trigger = 1 # level0-slowdown-writes-trigger = 20 # level0-stop-writes-trigger = 20 @@ -1014,6 +1018,7 @@ ## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`. # max-bytes-for-level-base = "512MB" # target-file-size-base = "8MB" +# target-file-size-multiplier = 1 # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 diff --git a/src/config/mod.rs b/src/config/mod.rs index dff0fcb2436..0f97487edcf 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -321,6 +321,7 @@ macro_rules! cf_config { pub min_write_buffer_number_to_merge: i32, pub max_bytes_for_level_base: ReadableSize, pub target_file_size_base: ReadableSize, + pub target_file_size_multiplier: i32, pub level0_file_num_compaction_trigger: i32, pub level0_slowdown_writes_trigger: Option, pub level0_stop_writes_trigger: Option, @@ -572,6 +573,9 @@ macro_rules! build_cf_opt { cf_opts.set_min_write_buffer_number_to_merge($opt.min_write_buffer_number_to_merge); cf_opts.set_max_bytes_for_level_base($opt.max_bytes_for_level_base.0); cf_opts.set_target_file_size_base($opt.target_file_size_base.0); + if $opt.target_file_size_multiplier != 0 { + cf_opts.set_target_file_size_multiplier($opt.target_file_size_multiplier); + } cf_opts.set_level_zero_file_num_compaction_trigger($opt.level0_file_num_compaction_trigger); cf_opts.set_level_zero_slowdown_writes_trigger( $opt.level0_slowdown_writes_trigger.unwrap_or_default(), @@ -659,6 +663,7 @@ impl Default for DefaultCfConfig { min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), target_file_size_base: ReadableSize::mb(8), + target_file_size_multiplier: 0, level0_file_num_compaction_trigger: 4, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -784,6 +789,7 @@ impl Default for WriteCfConfig { min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), target_file_size_base: ReadableSize::mb(8), + target_file_size_multiplier: 0, level0_file_num_compaction_trigger: 4, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -890,6 +896,7 @@ impl Default for LockCfConfig { min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(128), target_file_size_base: ReadableSize::mb(8), + target_file_size_multiplier: 0, level0_file_num_compaction_trigger: 1, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -973,6 +980,7 @@ impl Default for RaftCfConfig { min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(128), target_file_size_base: ReadableSize::mb(8), + target_file_size_multiplier: 0, level0_file_num_compaction_trigger: 1, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -1233,6 +1241,16 @@ impl DbConfig { self.write_buffer_limit.get_or_insert(ReadableSize( (total_mem * WRITE_BUFFER_MEMORY_LIMIT_RATE) as u64, )); + if self.writecf.enable_compaction_guard != Some(true) + && self.writecf.target_file_size_multiplier == 0 + { + self.writecf.target_file_size_multiplier = 2; + } + if self.defaultcf.enable_compaction_guard != Some(true) + && self.defaultcf.target_file_size_multiplier == 0 + { + self.defaultcf.target_file_size_multiplier = 2; + } self.defaultcf.disable_write_stall = true; self.writecf.disable_write_stall = true; self.lockcf.disable_write_stall = true; @@ -1475,6 +1493,7 @@ impl Default for RaftDefaultCfConfig { min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), target_file_size_base: ReadableSize::mb(8), + target_file_size_multiplier: 0, level0_file_num_compaction_trigger: 4, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -4182,6 +4201,7 @@ mod tests { }; use slog::Level; use tempfile::Builder; + use test_util::assert_eq_debug; use tikv_kv::RocksEngine as RocksDBEngine; use tikv_util::{ config::VersionTrack, @@ -5001,25 +5021,25 @@ mod tests { Module::Quota, Box::new(QuotaLimitConfigManager::new(Arc::clone("a_limiter))), ); - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); // u64::MAX ns casts to 213503d. cfg_controller .update_config("quota.max-delay-duration", "213504d") .unwrap_err(); - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); cfg_controller .update_config("quota.foreground-cpu-time", "2000") .unwrap(); cfg.quota.foreground_cpu_time = 2000; - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); cfg_controller .update_config("quota.foreground-write-bandwidth", "256MB") .unwrap(); cfg.quota.foreground_write_bandwidth = ReadableSize::mb(256); - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); let mut sample = quota_limiter.new_sample(true); sample.add_read_bytes(ReadableSize::mb(32).0 as usize); @@ -5040,13 +5060,13 @@ mod tests { .update_config("quota.background-cpu-time", "2000") .unwrap(); cfg.quota.background_cpu_time = 2000; - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); cfg_controller .update_config("quota.background-write-bandwidth", "256MB") .unwrap(); cfg.quota.background_write_bandwidth = ReadableSize::mb(256); - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); let mut sample = quota_limiter.new_sample(false); sample.add_read_bytes(ReadableSize::mb(32).0 as usize); @@ -5057,7 +5077,7 @@ mod tests { .update_config("quota.background-read-bandwidth", "512MB") .unwrap(); cfg.quota.background_read_bandwidth = ReadableSize::mb(512); - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); let mut sample = quota_limiter.new_sample(false); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, false)); @@ -5067,7 +5087,7 @@ mod tests { .update_config("quota.max-delay-duration", "50ms") .unwrap(); cfg.quota.max_delay_duration = ReadableDuration::millis(50); - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); let mut sample = quota_limiter.new_sample(true); sample.add_write_bytes(ReadableSize::mb(128).0 as usize); let should_delay = block_on(quota_limiter.consume_sample(sample, true)); @@ -5083,7 +5103,7 @@ mod tests { .update_config("quota.enable-auto-tune", "true") .unwrap(); cfg.quota.enable_auto_tune = true; - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); } #[test] @@ -5103,7 +5123,7 @@ mod tests { ); let check_cfg = |cfg: &TikvConfig| { - assert_eq!(&cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), cfg); assert_eq!(&*version_tracker.value(), &cfg.server); }; @@ -5117,7 +5137,7 @@ mod tests { .update_config("server.raft-msg-max-batch-size", "32") .unwrap(); cfg.server.raft_msg_max_batch_size = 32; - assert_eq!(cfg_controller.get_current(), cfg); + assert_eq_debug(&cfg_controller.get_current(), &cfg); check_cfg(&cfg); } @@ -5133,7 +5153,7 @@ mod tests { for _ in 0..10 { cfg.compatible_adjust(); cfg.validate().unwrap(); - assert_eq!(c, cfg); + assert_eq_debug(&c, &cfg); } } @@ -5552,6 +5572,10 @@ mod tests { Some(default_cfg.coprocessor.region_split_size() * 3 / 4 / ReadableSize::kb(1)); default_cfg.raft_store.region_split_check_diff = Some(default_cfg.coprocessor.region_split_size() / 16); + default_cfg.rocksdb.writecf.target_file_size_multiplier = 1; + default_cfg.rocksdb.defaultcf.target_file_size_multiplier = 1; + default_cfg.rocksdb.lockcf.target_file_size_multiplier = 1; + default_cfg.raftdb.defaultcf.target_file_size_multiplier = 1; // Other special cases. cfg.pd.retry_max_count = default_cfg.pd.retry_max_count; // Both -1 and isize::MAX are the same. @@ -5588,7 +5612,7 @@ mod tests { cfg.coprocessor .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); - assert_eq!(cfg, default_cfg); + assert_eq_debug(&cfg, &default_cfg); } #[test] diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 80cab3aca43..672fd79ee12 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -24,6 +24,7 @@ use raftstore::{ }; use security::SecurityConfig; use slog::Level; +use test_util::assert_eq_debug; use tikv::{ config::*, import::Config as ImportConfig, @@ -345,6 +346,7 @@ fn test_serde_custom_tikv_config() { min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), + target_file_size_multiplier: 3, level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -400,6 +402,7 @@ fn test_serde_custom_tikv_config() { min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), + target_file_size_multiplier: 3, level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -469,6 +472,7 @@ fn test_serde_custom_tikv_config() { min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), + target_file_size_multiplier: 3, level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -538,6 +542,7 @@ fn test_serde_custom_tikv_config() { min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), + target_file_size_multiplier: 3, level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -636,6 +641,7 @@ fn test_serde_custom_tikv_config() { min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), target_file_size_base: ReadableSize::kb(123), + target_file_size_multiplier: 3, level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -827,45 +833,11 @@ fn test_serde_custom_tikv_config() { let custom = read_file_in_project_dir("integrations/config/test-custom.toml"); let load = toml::from_str(&custom).unwrap(); - if value != load { - diff_config(&value, &load); - } + assert_eq_debug(&value, &load); + let dump = toml::to_string_pretty(&load).unwrap(); let load_from_dump = toml::from_str(&dump).unwrap(); - if load != load_from_dump { - diff_config(&load, &load_from_dump); - } -} - -#[track_caller] -fn diff_config(lhs: &TikvConfig, rhs: &TikvConfig) { - let lhs_str = format!("{:?}", lhs); - let rhs_str = format!("{:?}", rhs); - - fn find_index(l: impl Iterator) -> usize { - let it = l - .enumerate() - .take_while(|(_, (l, r))| l == r) - .filter(|(_, (l, _))| *l == b' '); - let mut last = None; - let mut second = None; - for a in it { - second = last; - last = Some(a); - } - second.map_or(0, |(i, _)| i) - } - let cpl = find_index(lhs_str.bytes().zip(rhs_str.bytes())); - let csl = find_index(lhs_str.bytes().rev().zip(rhs_str.bytes().rev())); - if cpl + csl > lhs_str.len() || cpl + csl > rhs_str.len() { - assert_eq!(lhs, rhs); - } - let lhs_diff = String::from_utf8_lossy(&lhs_str.as_bytes()[cpl..lhs_str.len() - csl]); - let rhs_diff = String::from_utf8_lossy(&rhs_str.as_bytes()[cpl..rhs_str.len() - csl]); - panic!( - "config not matched:\nlhs: ...{}...,\nrhs: ...{}...", - lhs_diff, rhs_diff - ); + assert_eq_debug(&load, &load_from_dump); } #[test] diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index d79ec7899e2..f8931cbddac 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -309,6 +309,7 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" +target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -373,6 +374,7 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" +target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -423,6 +425,7 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" +target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -473,6 +476,7 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" +target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -555,6 +559,7 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" +target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 From 4f2430d7265d126ced0402342de5d6f16e0bc158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 2 Mar 2023 17:45:10 +0800 Subject: [PATCH 0561/1149] sst_importer: add wire extra bytes into the packed size (#14312) close tikv/tikv#14313 Signed-off-by: hillium Co-authored-by: Xinye Tao --- src/import/sst_service.rs | 59 +++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index b589da50b76..02e7297bea8 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -47,6 +47,19 @@ use crate::{ }; const MAX_INFLIGHT_RAFT_MSGS: usize = 64; +/// The extra bytes required by the wire encoding. +/// Generally, a field (and a embedded message) would introduce 2 extra +/// bytes. In detail, they are: +/// - 2 bytes for the request type (Tag+Value). +/// - 2 bytes for every string or bytes field (Tag+Length), they are: +/// . + the key field +/// . + the value field +/// . + the CF field (None for CF_DEFAULT) +/// - 2 bytes for the embedded message field `PutRequest` (Tag+Length). +/// In fact, the length field is encoded by varint, which may grow when the +/// content length is greater than 128, however when the length is greater than +/// 128, the extra 1~4 bytes can be ignored. +const WIRE_EXTRA_BYTES: usize = 10; fn transfer_error(err: storage::Error) -> ImportPbError { let mut e = ImportPbError::default(); @@ -104,6 +117,20 @@ struct RequestCollector { } impl RequestCollector { + fn record_size_of_message(&mut self, size: usize) { + // We make a raft command entry when we unpacked size grows to 7/8 of the max + // raft entry size. + // + // Which means, if we don't add the extra bytes, when the amplification by the + // extra bytes is greater than 8/7 (i.e. the average size of entry is + // less than 70B), we may encounter the "raft entry is too large" error. + self.unpacked_size += size + WIRE_EXTRA_BYTES; + } + + fn release_message_of_size(&mut self, size: usize) { + self.unpacked_size -= size + WIRE_EXTRA_BYTES; + } + fn new(max_raft_req_size: usize) -> Self { Self { max_raft_req_size, @@ -162,19 +189,19 @@ impl RequestCollector { .map(|(_, old_ts)| *old_ts < ts.into_inner()) .unwrap_or(true) { - self.unpacked_size += m.size(); + self.record_size_of_message(m.size()); if let Some((v, _)) = self .write_reqs .insert(encoded_key.to_owned(), (m, ts.into_inner())) { - self.unpacked_size -= v.size(); + self.release_message_of_size(v.size()) } } } CF_DEFAULT => { - self.unpacked_size += m.size(); + self.record_size_of_message(m.size()); if let Some(v) = self.default_reqs.insert(k.as_encoded().clone(), m) { - self.unpacked_size -= v.size(); + self.release_message_of_size(v.size()); } } _ => unreachable!(), @@ -193,7 +220,7 @@ impl RequestCollector { self.write_reqs.drain().map(|(_, (m, _))| m).collect() }; for r in &res { - self.unpacked_size -= r.size(); + self.release_message_of_size(r.size()); } res } @@ -1066,6 +1093,8 @@ mod test { use std::collections::HashMap; use engine_traits::{CF_DEFAULT, CF_WRITE}; + use kvproto::raft_cmdpb::Request; + use protobuf::Message; use tikv_kv::Modify; use txn_types::{Key, TimeStamp, Write, WriteType}; @@ -1249,4 +1278,24 @@ mod test { assert_eq!(reqs, reqs_result); assert!(request_collector.is_empty()); } + + #[test] + fn test_collector_size() { + let mut request_collector = RequestCollector::new(1024); + + for i in 0..100u64 { + request_collector.accept(CF_DEFAULT, default_req(&i.to_ne_bytes(), b"egg", i)); + } + + let pws = request_collector.pending_writes; + for w in pws { + let req_size = w + .modifies + .into_iter() + .map(Request::from) + .map(|x| x.compute_size()) + .sum::(); + assert!(req_size < 1024, "{}", req_size); + } + } } From d74fd1325280999b367424ee332c49e11bbf80b0 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 2 Mar 2023 20:05:10 +0800 Subject: [PATCH 0562/1149] raftstore-v2: store heartbeat supports write keys and bytes. (#14271) ref tikv/tikv#12842 1. store heartbeat supports write keys and bytes. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 13 ++++++- .../raftstore-v2/src/operation/command/mod.rs | 2 + .../operation/command/write/simple_write.rs | 5 +++ components/raftstore-v2/src/operation/pd.rs | 16 +++++++- .../tests/integrations/test_pd_heartbeat.rs | 37 ++++++++++++++----- components/raftstore/src/store/fsm/mod.rs | 1 + components/raftstore/src/store/fsm/store.rs | 12 +++--- 7 files changed, 67 insertions(+), 19 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 4693b0db369..83fa6b7a018 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -24,7 +24,10 @@ use raft::{StateRole, INVALID_ID}; use raftstore::{ coprocessor::{CoprocessorHost, RegionChangeEvent}, store::{ - fsm::store::{PeerTickBatch, ENTRY_CACHE_EVICT_TICK_DURATION}, + fsm::{ + store::{PeerTickBatch, ENTRY_CACHE_EVICT_TICK_DURATION}, + GlobalStoreStat, LocalStoreStat, + }, local_metrics::RaftMetrics, AutoSplitController, Config, ReadRunner, ReadTask, SplitCheckRunner, SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteSenders, @@ -85,6 +88,8 @@ pub struct StoreContext { pub self_disk_usage: DiskUsage, pub snap_mgr: TabletSnapManager, + pub global_stat: GlobalStoreStat, + pub store_stat: LocalStoreStat, pub sst_importer: Arc, } @@ -162,6 +167,7 @@ impl StorePoller { fn flush_events(&mut self) { self.schedule_ticks(); self.poll_ctx.raft_metrics.maybe_flush(); + self.poll_ctx.store_stat.flush(); } fn schedule_ticks(&mut self) { @@ -279,6 +285,7 @@ struct StorePollerBuilder { store_meta: Arc>>, shutdown: Arc, snap_mgr: TabletSnapManager, + global_stat: GlobalStoreStat, sst_importer: Arc, } @@ -308,6 +315,7 @@ impl StorePollerBuilder { .after_start(move || set_io_type(IoType::ForegroundWrite)) .name_prefix("apply") .build_future_pool(); + let global_stat = GlobalStoreStat::default(); StorePollerBuilder { cfg, store_id, @@ -322,6 +330,7 @@ impl StorePollerBuilder { snap_mgr, shutdown, coprocessor_host, + global_stat, sst_importer, } } @@ -440,6 +449,8 @@ where self_disk_usage: DiskUsage::Normal, snap_mgr: self.snap_mgr.clone(), coprocessor_host: self.coprocessor_host.clone(), + global_stat: self.global_stat.clone(), + store_stat: self.global_stat.local(), sst_importer: self.sst_importer.clone(), }; poll_ctx.update_ticks_timeout(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 6cb4460428d..df289a26f4a 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -365,6 +365,8 @@ impl Peer { .add_bucket_flow(&apply_res.bucket_stat); self.update_split_flow_control(&apply_res.metrics); self.update_stat(&apply_res.metrics); + ctx.store_stat.engine_total_bytes_written += apply_res.metrics.written_bytes; + ctx.store_stat.engine_total_keys_written += apply_res.metrics.written_keys; self.raft_group_mut() .advance_apply_to(apply_res.applied_index); diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index cf267f854b7..a2c378cb04b 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -30,6 +30,11 @@ impl SimpleWriteBinary { pub fn freeze(&mut self) { self.write_type = WriteType::Unspecified; } + + #[inline] + pub fn data_size(&self) -> usize { + self.buf.len() + } } /// We usually use `RaftCmdRequest` for read write request. But the codec is diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 17abdd85cf0..4fd21a32488 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -2,6 +2,8 @@ //! This module implements the interactions with pd. +use std::sync::atomic::Ordering; + use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; use kvproto::{metapb, pdpb}; @@ -47,8 +49,18 @@ impl Store { stats.set_start_time(self.start_time().unwrap() as u32); - stats.set_bytes_written(0); - stats.set_keys_written(0); + stats.set_bytes_written( + ctx.global_stat + .stat + .engine_total_bytes_written + .swap(0, Ordering::Relaxed), + ); + stats.set_keys_written( + ctx.global_stat + .stat + .engine_total_keys_written + .swap(0, Ordering::Relaxed), + ); stats.set_is_busy(false); // TODO: add query stats let task = pd::Task::StoreHeartbeat { stats }; diff --git a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs index b9dea63bbfe..679183735b6 100644 --- a/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/integrations/test_pd_heartbeat.rs @@ -8,7 +8,7 @@ use kvproto::raft_cmdpb::{RaftCmdRequest, StatusCmdType}; use pd_client::PdClient; use raftstore::coprocessor::Config as CopConfig; use raftstore_v2::{ - router::{PeerMsg, PeerTick}, + router::{PeerMsg, PeerTick, StoreMsg, StoreTick}, SimpleWriteEncoder, }; use tikv_util::{config::ReadableSize, store::new_peer}; @@ -54,18 +54,35 @@ fn test_region_heartbeat() { #[test] fn test_store_heartbeat() { + let region_id = 2; let cluster = Cluster::with_node_count(1, None); let store_id = cluster.node(0).id(); - for _ in 0..5 { - let stats = block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); - if stats.get_start_time() > 0 { - assert_ne!(stats.get_capacity(), 0); - assert_ne!(stats.get_used_size(), 0); - return; - } - std::thread::sleep(std::time::Duration::from_millis(50)); + let router = &cluster.routers[0]; + // load data to split bucket. + let header = Box::new(router.new_request_for(region_id).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); + let data = put.encode(); + let write_bytes = data.data_size(); + let (msg, sub) = PeerMsg::simple_write(header, data); + router.send(region_id, msg).unwrap(); + let _resp = block_on(sub.result()).unwrap(); + + // report store heartbeat to pd. + std::thread::sleep(std::time::Duration::from_millis(50)); + router + .store_router() + .send_control(StoreMsg::Tick(StoreTick::PdStoreHeartbeat)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + + let stats = block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); + if stats.get_start_time() > 0 { + assert_ne!(stats.get_capacity(), 0); + assert_ne!(stats.get_used_size(), 0); + assert_eq!(stats.get_keys_written(), 1); + assert!(stats.get_bytes_written() > write_bytes.try_into().unwrap()); } - panic!("failed to get store stats"); } #[test] diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index b481caf4f74..ffba120056c 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -16,6 +16,7 @@ pub use self::{ ChangePeer, ExecResult, GenSnapTask, Msg as ApplyTask, Notifier as ApplyNotifier, Proposal, Registration, SwitchWitness, TaskRes as ApplyTaskRes, }, + metrics::{GlobalStoreStat, LocalStoreStat}, peer::{new_admin_request, DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO}, store::{ create_raft_batch_system, RaftBatchSystem, RaftPollerBuilder, RaftRouter, StoreInfo, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 4b9e69f9763..4fafc049bee 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -2479,14 +2479,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER .global_stat .stat .engine_total_bytes_written - .swap(0, Ordering::SeqCst), + .swap(0, Ordering::Relaxed), ); stats.set_keys_written( self.ctx .global_stat .stat .engine_total_keys_written - .swap(0, Ordering::SeqCst), + .swap(0, Ordering::Relaxed), ); stats.set_is_busy( @@ -2494,7 +2494,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER .global_stat .stat .is_busy - .swap(false, Ordering::SeqCst), + .swap(false, Ordering::Relaxed), ); let mut query_stats = QueryStats::default(); @@ -2503,21 +2503,21 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER .global_stat .stat .engine_total_query_put - .swap(0, Ordering::SeqCst), + .swap(0, Ordering::Relaxed), ); query_stats.set_delete( self.ctx .global_stat .stat .engine_total_query_delete - .swap(0, Ordering::SeqCst), + .swap(0, Ordering::Relaxed), ); query_stats.set_delete_range( self.ctx .global_stat .stat .engine_total_query_delete_range - .swap(0, Ordering::SeqCst), + .swap(0, Ordering::Relaxed), ); stats.set_query_stats(query_stats); From 8bfa12dea2d5bd0918b35e5ae041676aeeee77ab Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Sat, 4 Mar 2023 23:13:10 +0800 Subject: [PATCH 0563/1149] raftstore-v2: add some missing metrics (#14326) ref tikv/tikv#12842 None Signed-off-by: tabokie --- .../operation/command/admin/compact_log.rs | 28 +- .../raftstore/src/store/worker/metrics.rs | 6 - metrics/alertmanager/tikv.rules.yml | 4 +- metrics/grafana/performance_write.json | 2 +- metrics/grafana/tikv_details.json | 390 +----------------- metrics/grafana/tikv_summary.json | 2 +- 6 files changed, 36 insertions(+), 396 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index af61434041a..1ce118a957f 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -18,7 +18,8 @@ use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequ use protobuf::Message; use raftstore::{ store::{ - fsm::new_admin_request, needs_evict_entry_cache, Transport, WriteTask, RAFT_INIT_LOG_INDEX, + fsm::new_admin_request, metrics::REGION_MAX_LOG_LAG, needs_evict_entry_cache, Transport, + WriteTask, RAFT_INIT_LOG_INDEX, }, Result, }; @@ -167,6 +168,7 @@ impl Peer { last_idx, replicated_idx ); + REGION_MAX_LOG_LAG.observe((last_idx - replicated_idx) as f64); } // leader may call `get_term()` on the latest replicated index, so compact @@ -182,13 +184,19 @@ impl Peer { >= store_ctx.cfg.raft_log_gc_size_limit().0 { std::cmp::max(first_idx + (last_idx - first_idx) / 2, replicated_idx) - } else if replicated_idx < first_idx - || last_idx - first_idx < 3 - || replicated_idx - first_idx < store_ctx.cfg.raft_log_gc_threshold - && self - .compact_log_context_mut() - .maybe_skip_compact_log(store_ctx.cfg.raft_log_reserve_max_ticks) + } else if replicated_idx < first_idx || last_idx - first_idx < 3 { + store_ctx.raft_metrics.raft_log_gc_skipped.reserve_log.inc(); + return; + } else if replicated_idx - first_idx < store_ctx.cfg.raft_log_gc_threshold + && self + .compact_log_context_mut() + .maybe_skip_compact_log(store_ctx.cfg.raft_log_reserve_max_ticks) { + store_ctx + .raft_metrics + .raft_log_gc_skipped + .threshold_limit + .inc(); return; } else { replicated_idx @@ -197,6 +205,12 @@ impl Peer { // Have no idea why subtract 1 here, but original code did this by magic. compact_idx -= 1; if compact_idx < first_idx { + // In case compact_idx == first_idx before subtraction. + store_ctx + .raft_metrics + .raft_log_gc_skipped + .compact_idx_too_small + .inc(); return; } diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index e6c3c505cdf..36a217be607 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -191,12 +191,6 @@ lazy_static! { "Total number of seek operations from raft log gc." ) .unwrap(); - pub static ref RAFT_LOG_GC_DELETED_KEYS_HISTOGRAM: Histogram = register_histogram!( - "tikv_raftstore_raft_log_gc_deleted_keys", - "Bucket of number of deleted keys from raft log gc.", - exponential_buckets(1.0, 2.0, 20).unwrap() - ) - .unwrap(); pub static ref RAFT_LOG_GC_FAILED: IntCounter = register_int_counter!( "tikv_raftstore_raft_log_gc_failed", "Total number of failed raft log gc." diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index 19f8085866e..e43ca401d42 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -110,12 +110,12 @@ groups: summary: TiKV coprocessor request wait seconds more than 10s - alert: TiKV_raftstore_thread_cpu_seconds_total - expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (instance) > 1.6 + expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"(raftstore|rs)_.*"}[1m])) by (instance) > 1.6 for: 1m labels: env: ENV_LABELS_ENV level: critical - expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (instance) > 1.6 + expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"(raftstore|rs)_.*"}[1m])) by (instance) > 1.6 annotations: description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' value: '{{ $value }}' diff --git a/metrics/grafana/performance_write.json b/metrics/grafana/performance_write.json index ddb9621b97a..695e96725c3 100644 --- a/metrics/grafana/performance_write.json +++ b/metrics/grafana/performance_write.json @@ -3104,7 +3104,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 334c3c119f7..ead1e842d0c 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -6000,7 +6000,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -16605,116 +16605,6 @@ "y": 24 }, "hiddenSeries": false, - "id": 12972, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_deleted_keys_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Raft log GC deleted keys", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, "id": 12884, "legend": { "alignAsTable": true, @@ -16801,264 +16691,6 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 12973, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_log_gc_deleted_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99% - {{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_raft_log_gc_deleted_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "95% - {{instance}}", - "refId": "B", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_deleted_keys_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_raftstore_raft_log_gc_deleted_keys_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "avg - {{instance}}", - "refId": "C", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Raft log GC write batch size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 - }, - "hiddenSeries": false, - "id": 12883, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_log_gc_purge_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99% - {{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_raft_log_gc_purge_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "95% - {{instance}}", - "refId": "B", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_purge_duration_secs_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_raftstore_raft_log_gc_purge_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "avg - {{instance}}", - "refId": "C", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Raft log GC purge duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 10, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "aliasColors": {}, "bars": false, @@ -17079,8 +16711,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 40 + "x": 0, + "y": 32 }, "hiddenSeries": false, "id": 12887, @@ -17189,8 +16821,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 48 + "x": 12, + "y": 32 }, "hiddenSeries": false, "id": 12975, @@ -17299,8 +16931,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 48 + "x": 0, + "y": 40 }, "hiddenSeries": false, "id": 12974, @@ -17409,8 +17041,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 56 + "x": 12, + "y": 40 }, "hiddenSeries": false, "id": 23763572229, @@ -17516,8 +17148,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 56 + "x": 0, + "y": 48 }, "hiddenSeries": false, "id": 23763572555, diff --git a/metrics/grafana/tikv_summary.json b/metrics/grafana/tikv_summary.json index 847ac5ef289..528fe04b7fb 100644 --- a/metrics/grafana/tikv_summary.json +++ b/metrics/grafana/tikv_summary.json @@ -3185,7 +3185,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", From 199e63f3eb036e9d20ec8003276d339c5eaa25b8 Mon Sep 17 00:00:00 2001 From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com> Date: Mon, 6 Mar 2023 22:45:12 +0800 Subject: [PATCH 0564/1149] coprocessor: avoid fmsketch calculation for single-column index (#14345) ref tikv/tikv#14231 Signed-off-by: xuyifan <675434007@qq.com> Co-authored-by: Ti Chi Robot --- src/coprocessor/statistics/analyze.rs | 54 ++++++++++++++++----------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 25fd67b9a99..6b486c3bb7e 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -450,6 +450,24 @@ impl RowSampleBuilder { .inc_by(quota_delay.as_micros() as u64); } } + for i in 0..self.column_groups.len() { + let offsets = self.column_groups[i].get_column_offsets(); + if offsets.len() != 1 { + continue; + } + // For the single-column group, its fm_sketch is the same as that of the + // corresponding column. Hence, we don't maintain its fm_sketch in + // collect_column_group. We just copy the corresponding column's fm_sketch after + // iterating all rows. Also, we can directly copy total_size and null_count. + let col_pos = offsets[0] as usize; + let col_group_pos = self.columns_info.len() + i; + collector.mut_base().fm_sketches[col_group_pos] = + collector.mut_base().fm_sketches[col_pos].clone(); + collector.mut_base().null_count[col_group_pos] = + collector.mut_base().null_count[col_pos]; + collector.mut_base().total_sizes[col_group_pos] = + collector.mut_base().total_sizes[col_pos]; + } Ok(AnalyzeSamplingResult::new(collector)) } } @@ -527,37 +545,29 @@ impl BaseRowSampleCollector { let col_len = columns_val.len(); for i in 0..column_groups.len() { let offsets = column_groups[i].get_column_offsets(); - let mut has_null = true; + if offsets.len() == 1 { + // For the single-column group, its fm_sketch is the same as that of the + // corresponding column. Hence, we don't need to maintain its + // fm_sketch. We just copy the corresponding column's fm_sketch after iterating + // all rows. Also, we can directly copy total_size and null_count. + continue; + } + // We don't maintain the null count information for the multi-column group. for j in offsets { if columns_val[*j as usize][0] == NIL_FLAG { continue; } - has_null = false; self.total_sizes[col_len + i] += columns_val[*j as usize].len() as i64 - 1 } - // We only maintain the null count for single column case. - if has_null && offsets.len() == 1 { - self.null_count[col_len + i] += 1; - continue; - } - if offsets.len() == 1 { - let offset = offsets[0] as usize; - if columns_info[offset].as_accessor().is_string_like() { - self.fm_sketches[col_len + i].insert(&collation_keys_val[offset]); + let mut hasher = Hasher128::with_seed(0); + for j in offsets { + if columns_info[*j as usize].as_accessor().is_string_like() { + hasher.write(&collation_keys_val[*j as usize]); } else { - self.fm_sketches[col_len + i].insert(&columns_val[offset]); - } - } else { - let mut hasher = Hasher128::with_seed(0); - for j in offsets { - if columns_info[*j as usize].as_accessor().is_string_like() { - hasher.write(&collation_keys_val[*j as usize]); - } else { - hasher.write(&columns_val[*j as usize]); - } + hasher.write(&columns_val[*j as usize]); } - self.fm_sketches[col_len + i].insert_hash_value(hasher.finish()); } + self.fm_sketches[col_len + i].insert_hash_value(hasher.finish()); } } From 38568e46b27878222961f64e600d4be317bf288f Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 7 Mar 2023 12:15:12 +0800 Subject: [PATCH 0565/1149] metrics: add some metrics for snapshot-v2 (#14195) ref tikv/tikv#12842 1. add snapshot size metrics 2. add snapshot send/recv count metrics Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore-v2/src/operation/pd.rs | 11 +++++++++-- components/raftstore/src/store/async_io/read.rs | 15 ++++++++++++++- src/server/engine_factory.rs | 2 +- src/server/tablet_snap.rs | 3 +++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 4fd21a32488..3b5e7d32f89 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -7,7 +7,7 @@ use std::sync::atomic::Ordering; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; use kvproto::{metapb, pdpb}; -use raftstore::store::Transport; +use raftstore::store::{metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, Transport}; use slog::error; use tikv_util::slog_panic; @@ -43,10 +43,17 @@ impl Store { let meta = ctx.store_meta.lock().unwrap(); stats.set_region_count(meta.readers.len() as u32); } - + // todo: imple snapshot status report stats.set_sending_snap_count(0); stats.set_receiving_snap_count(0); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC + .with_label_values(&["sending"]) + .set(stats.get_sending_snap_count() as i64); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC + .with_label_values(&["receiving"]) + .set(stats.get_receiving_snap_count() as i64); + stats.set_start_time(self.start_time().unwrap() as u32); stats.set_bytes_written( diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 5b53ad499b5..b02992bbeb0 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -18,6 +18,7 @@ use raft::{eraftpb::Snapshot, GetEntriesContext}; use tikv_util::{error, info, time::Instant, worker::Runnable}; use crate::store::{ + metrics::{SNAPSHOT_KV_COUNT_HISTOGRAM, SNAPSHOT_SIZE_HISTOGRAM}, snap::TABLET_SNAPSHOT_VERSION, util, worker::metrics::{SNAP_COUNTER, SNAP_HISTOGRAM}, @@ -225,6 +226,8 @@ where // create checkpointer. let snap_key = TabletSnapKey::from_region_snap(region_id, to_peer, &snapshot); let mut res = None; + let total_size = tablet.get_engine_used_size().unwrap_or(0); + let total_keys = tablet.get_num_keys().unwrap_or(0); if let Err(e) = self.generate_snap(&snap_key, tablet) { error!("failed to create checkpointer"; "region_id" => region_id, "error" => %e); SNAP_COUNTER.generate.fail.inc(); @@ -232,7 +235,17 @@ where let elapsed = start.saturating_elapsed_secs(); SNAP_COUNTER.generate.success.inc(); SNAP_HISTOGRAM.generate.observe(elapsed); - info!("snapshot generated"; "region_id" => region_id, "elapsed" => elapsed, "key" => ?snap_key, "for_balance" => for_balance); + SNAPSHOT_SIZE_HISTOGRAM.observe(total_size as f64); + SNAPSHOT_KV_COUNT_HISTOGRAM.observe(total_keys as f64); + info!( + "snapshot generated"; + "region_id" => region_id, + "elapsed" => elapsed, + "key" => ?snap_key, + "for_balance" => for_balance, + "total_size" => total_size, + "total_keys" => total_keys, + ); res = Some(Box::new((snapshot, to_peer))) } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index f50afe4bc44..e9b59141da2 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -216,7 +216,7 @@ impl TabletFactory for KvEngineFactory { } fn destroy_tablet(&self, ctx: TabletContext, path: &Path) -> Result<()> { - info!("destroy tablet"; "path" => %path.display(), "id" => ctx.id, "suffix" => ?ctx.suffix); + info!("destroy tablet"; "path" => %path.display(), "region_id" => ctx.id, "suffix" => ?ctx.suffix); // Create kv engine. let _db_opts = self.db_opts(EngineType::RaftKv2); let _cf_opts = self.cf_opts(EngineType::RaftKv2); diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index a54c5461e0d..c0ecf4db611 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -162,6 +162,9 @@ async fn send_snap_files( } } info!("sent all snap file finish"; "snap_key" => %key); + SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC + .send + .inc_by(total_sent); sender.close().await?; Ok(total_sent) } From bb6f5e84762c63d2092bc4741b13db179423aced Mon Sep 17 00:00:00 2001 From: ShuNing Date: Tue, 7 Mar 2023 14:21:12 +0800 Subject: [PATCH 0566/1149] resource_control: enable by default (#14354) close tikv/tikv#14353 resource_control: enable resource control by default Signed-off-by: nolouch --- components/resource_control/src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 1c4c93c82d2..99645688cf7 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -17,10 +17,16 @@ pub use service::ResourceManagerService; pub mod channel; pub use channel::ResourceMetered; -#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig, Default)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct Config { #[online_config(skip)] pub enabled: bool, } + +impl Default for Config { + fn default() -> Self { + Self { enabled: true } + } +} From 3b56cfb2a8620894911e898ecc844ea361cc9ddc Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 7 Mar 2023 15:09:12 +0800 Subject: [PATCH 0567/1149] Metrics: add snapshot transport to grafana (#14337) ref tikv/tikv#13409 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 104 ++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index ead1e842d0c..d4374fda369 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -23641,6 +23641,110 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The speed of sending or receiving snapshot", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 66 + }, + "id": 4201, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(tikv_snapshot_limit_transport_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}--{{type}}", + "metric": "tikv_snapshot_limit_transport_bytes", + "refId": "A", + "step": 40 + },{ + "exemplar": true, + "expr": "rate(tikv_snapshot_limit_generate_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}--generate", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Snapshot transport speed", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, From 02490222cd4bb770668f010bcf427d0604e71f96 Mon Sep 17 00:00:00 2001 From: Zhi Qi <30543181+LittleFall@users.noreply.github.com> Date: Tue, 7 Mar 2023 18:13:12 +0800 Subject: [PATCH 0568/1149] copr: (enhance) support executor limit with partition_by fields (#14359) ref tikv/tikv#13936 Signed-off-by: Zhi Qi --- Cargo.lock | 2 +- components/tidb_query_executors/src/runner.rs | 41 +++++++++++++++---- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea1ebcfbb3b..d939343c06a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6701,7 +6701,7 @@ dependencies = [ [[package]] name = "tipb" version = "0.0.1" -source = "git+https://github.com/pingcap/tipb.git#614f3ffd42ddc84b78ff59d65f105f2099a6f1b1" +source = "git+https://github.com/pingcap/tipb.git#955fbdc879517f16b7a2f5967f143b92a6ab03dd" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 60857dda80d..60359f22c55 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -329,14 +329,39 @@ pub fn build_executors( ExecType::TypeLimit => { EXECUTOR_COUNT_METRICS.batch_limit.inc(); - Box::new( - BatchLimitExecutor::new( - executor, - ed.get_limit().get_limit() as usize, - is_src_scan_executor, - )? - .collect_summary(summary_slot_index), - ) + let mut d = ed.take_limit(); + + // If there is partition_by field in Limit, we treat it as a + // partitionTopN without order_by. + // todo: refine those logics. + let partition_by = d + .take_partition_by() + .into_iter() + .map(|mut item| item.take_expr()) + .collect_vec(); + + if partition_by.is_empty() { + Box::new( + BatchLimitExecutor::new( + executor, + d.get_limit() as usize, + is_src_scan_executor, + )? + .collect_summary(summary_slot_index), + ) + } else { + Box::new( + BatchPartitionTopNExecutor::new( + config.clone(), + executor, + partition_by, + vec![], + vec![], + d.get_limit() as usize, + )? + .collect_summary(summary_slot_index), + ) + } } ExecType::TypeTopN => { EXECUTOR_COUNT_METRICS.batch_top_n.inc(); From 32925ca564ddb801950942315818929265f58222 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Tue, 7 Mar 2023 21:43:13 +0800 Subject: [PATCH 0569/1149] txn: Well-defined behavior of allow_lock_with_conflict with should_not_exist or lock_if_exists (#14330) close tikv/tikv#14293 Signed-off-by: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot Co-authored-by: cfzjywxk Co-authored-by: ekexium --- .../txn/actions/acquire_pessimistic_lock.rs | 568 +++++++++++++++++- src/storage/types.rs | 8 +- 2 files changed, 555 insertions(+), 21 deletions(-) diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 86b9ddeab41..987af9fbed7 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -7,7 +7,7 @@ use txn_types::{Key, LockType, OldValue, PessimisticLock, TimeStamp, Value, Writ use crate::storage::{ mvcc::{ metrics::{MVCC_CONFLICT_COUNTER, MVCC_DUPLICATE_CMD_COUNTER_VEC}, - ErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, + Error as MvccError, ErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, }, txn::{ actions::check_data_constraint::check_data_constraint, sched_pool::tls_can_enable, @@ -117,13 +117,14 @@ pub fn acquire_pessimistic_lock( .into()); } + let requested_for_update_ts = for_update_ts; let locked_with_conflict_ts = if allow_lock_with_conflict && for_update_ts < lock.for_update_ts { // If the key is already locked by the same transaction with larger // for_update_ts, and the current request has // `allow_lock_with_conflict` set, we must consider // these possibilities: - // * If a previous request successfully locked the key with conflict, but the + // * A previous request successfully locked the key with conflict, but the // response is lost due to some errors such as RPC failures. In this case, we // return like the current request's result is locked_with_conflict, for // idempotency concern. @@ -147,11 +148,33 @@ pub fn acquire_pessimistic_lock( if let Some((write, commit_ts)) = write { // Here `get_write_with_commit_ts` returns only the latest PUT if it exists and // is not deleted. It's still ok to pass it into `check_data_constraint`. - // In case we are going to lock it with write conflict, we do not check it since - // the statement will then retry. - if locked_with_conflict_ts.is_none() { - check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; - } + check_data_constraint(reader, should_not_exist, &write, commit_ts, &key).or_else( + |e| { + if is_already_exist(&e) && commit_ts > requested_for_update_ts { + // If `allow_lock_with_conflict` is set and there is write conflict, + // and the constraint check doesn't pass on the latest version, + // return a WriteConflict error instead of AlreadyExist, to inform the + // client to retry. + // Note the conflict_info may be not consistent with the + // `locked_with_conflict_ts` we got before. + // This is possible if the key is locked by a newer request with + // larger for_update_ts, in which case the result of this request + // doesn't matter at all. So we don't need + // to care about it. + let conflict_info = ConflictInfo { + conflict_start_ts: write.start_ts, + conflict_commit_ts: commit_ts, + }; + return Err(conflict_info.into_write_conflict_error( + reader.start_ts, + primary.to_vec(), + key.to_raw()?, + )); + } + Err(e) + }, + )?; + if need_load_value { val = Some(reader.load_data(&key, write)?); } else if need_check_existence { @@ -159,7 +182,7 @@ pub fn acquire_pessimistic_lock( } } } - // Pervious write is not loaded. + // Previous write is not loaded. let (prev_write_loaded, prev_write) = (false, None); let old_value = load_old_value( need_old_value, @@ -200,7 +223,7 @@ pub fn acquire_pessimistic_lock( )); } - let mut locked_with_conflict_ts = None; + let mut conflict_info = None; // Following seek_write read the previous write. let (prev_write_loaded, mut prev_write) = (true, None); @@ -221,7 +244,10 @@ pub fn acquire_pessimistic_lock( .inc(); if allow_lock_with_conflict { // TODO: New metrics. - locked_with_conflict_ts = Some(commit_ts); + conflict_info = Some(ConflictInfo { + conflict_start_ts: write.start_ts, + conflict_commit_ts: commit_ts, + }); for_update_ts = commit_ts; need_load_value = true; } else { @@ -269,19 +295,30 @@ pub fn acquire_pessimistic_lock( } } - // Check data constraint when acquiring pessimistic lock. But in case we are - // going to lock it with write conflict, we do not check it since the - // statement will then retry. - if locked_with_conflict_ts.is_none() { - check_data_constraint(reader, should_not_exist, &write, commit_ts, &key)?; - } + // Check data constraint when acquiring pessimistic lock. + check_data_constraint(reader, should_not_exist, &write, commit_ts, &key).or_else(|e| { + if is_already_exist(&e) { + // If `allow_lock_with_conflict` is set and there is write conflict, + // and the constraint check doesn't pass on the latest version, + // return a WriteConflict error instead of AlreadyExist, to inform the + // client to retry. + if let Some(conflict_info) = conflict_info { + return Err(conflict_info.into_write_conflict_error( + reader.start_ts, + primary.to_vec(), + key.to_raw()?, + )); + } + } + Err(e) + })?; (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); // Load value if locked_with_conflict, so that when the client (TiDB) need to // read the value during statement retry, it will be possible to read the value // from cache instead of RPC. - if need_value || need_check_existence || locked_with_conflict_ts.is_some() { + if need_value || need_check_existence || conflict_info.is_some() { val = match write.write_type { // If it's a valid Write, no need to read again. WriteType::Put @@ -338,6 +375,12 @@ pub fn acquire_pessimistic_lock( // do it when val exists if !lock_only_if_exists || val.is_some() { txn.put_pessimistic_lock(key, lock, true); + } else if let Some(conflict_info) = conflict_info { + return Err(conflict_info.into_write_conflict_error( + reader.start_ts, + primary.to_vec(), + key.into_raw()?, + )); } // TODO don't we need to commit the modifies in txn? @@ -345,13 +388,46 @@ pub fn acquire_pessimistic_lock( PessimisticLockKeyResult::new_success( need_value, need_check_existence, - locked_with_conflict_ts, + conflict_info.map(ConflictInfo::into_locked_with_conflict_ts), val, ), old_value, )) } +#[derive(Clone, Copy)] +struct ConflictInfo { + conflict_start_ts: TimeStamp, + conflict_commit_ts: TimeStamp, +} + +impl ConflictInfo { + fn into_locked_with_conflict_ts(self) -> TimeStamp { + self.conflict_commit_ts + } + + fn into_write_conflict_error( + self, + start_ts: TimeStamp, + primary: Vec, + key: Vec, + ) -> MvccError { + ErrorInner::WriteConflict { + start_ts, + conflict_start_ts: self.conflict_start_ts, + conflict_commit_ts: self.conflict_commit_ts, + key, + primary, + reason: WriteConflictReason::PessimisticRetry, + } + .into() + } +} + +fn is_already_exist(res: &MvccError) -> bool { + matches!(res, MvccError(box ErrorInner::AlreadyExist { .. })) +} + pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::Context; @@ -385,6 +461,8 @@ pub mod tests { for_update_ts: impl Into, need_value: bool, need_check_existence: bool, + should_not_exist: bool, + lock_only_if_exists: bool, ) -> MvccResult { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -397,14 +475,14 @@ pub mod tests { &mut reader, Key::from_raw(key), pk, - false, + should_not_exist, 1, for_update_ts.into(), need_value, need_check_existence, 0.into(), false, - false, + lock_only_if_exists, true, ); if res.is_ok() { @@ -436,6 +514,8 @@ pub mod tests { for_update_ts, need_value, need_check_existence, + false, + false, ) .unwrap() } @@ -1826,6 +1906,8 @@ pub mod tests { 55, false, false, + false, + false, ) .unwrap_err(); assert!(matches!(err, MvccError(box ErrorInner::KeyIsLocked(_)))); @@ -1837,6 +1919,8 @@ pub mod tests { 9, false, false, + false, + false, ) .unwrap_err(); assert!(matches!(err, MvccError(box ErrorInner::KeyIsLocked(_)))); @@ -1974,4 +2058,448 @@ pub mod tests { must_commit(&mut engine, key, 60, 69); } } + + #[test] + fn test_lock_with_conflict_should_not_exist() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 20); + must_commit(&mut engine, b"k1", 20, 30); + + // Key already exists. + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 10, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::WriteConflict { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_unlocked(&mut engine, b"k1"); + + // Key already exists and already locked by the same txn. + must_succeed(&mut engine, b"k1", b"k1", 10, 30); + must_pessimistic_locked(&mut engine, b"k1", 10, 30); + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 10, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::WriteConflict { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_pessimistic_locked(&mut engine, b"k1", 10, 30); + + // Key already exists and already locked by a larger for_update_ts (stale + // request). + must_succeed(&mut engine, b"k1", b"k1", 10, 40); + must_pessimistic_locked(&mut engine, b"k1", 10, 40); + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 10, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::WriteConflict { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_pessimistic_locked(&mut engine, b"k1", 10, 40); + + // Key not exist. + must_pessimistic_prewrite_delete(&mut engine, b"k1", b"k1", 10, 40, DoPessimisticCheck); + must_commit(&mut engine, b"k1", 10, 60); + must_unlocked(&mut engine, b"k1"); + + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 50, + 50, + false, + false, + true, + false, + ) + .unwrap() + .assert_locked_with_conflict(None, 60); + must_pessimistic_locked(&mut engine, b"k1", 50, 60); + // Key not exist and key is already locked (idempotency). + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 50, + 50, + false, + false, + true, + false, + ) + .unwrap() + .assert_locked_with_conflict(None, 60); + must_pessimistic_locked(&mut engine, b"k1", 50, 60); + + // Key not exist and key is locked with a larger for_update_ts (stale request). + must_succeed(&mut engine, b"k1", b"k1", 50, 70); + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 50, + 50, + false, + false, + true, + false, + ) + .unwrap() + .assert_locked_with_conflict(None, 70); + must_pessimistic_locked(&mut engine, b"k1", 50, 70); + + // The following test cases tests if `allow_lock_with_conflict` causes any + // problem when there's no write conflict. + + // Key not exist and no conflict. + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 10, + 10, + false, + false, + true, + false, + ) + .unwrap() + .assert_empty(); + must_pessimistic_locked(&mut engine, b"k2", 10, 10); + + // Idempotency + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 10, + 10, + false, + false, + true, + false, + ) + .unwrap() + .assert_empty(); + must_pessimistic_locked(&mut engine, b"k2", 10, 10); + + // Locked by a larger for_update_ts (stale request). + // Note that in this case, the client must have been requested a lock with + // larger for_update_ts, and the current request must be stale. + // Therefore it doesn't matter what result this request returns. It only + // need to guarantee the data won't be broken. + must_succeed(&mut engine, b"k2", b"k2", 10, 20); + must_pessimistic_locked(&mut engine, b"k2", 10, 20); + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 10, + 10, + false, + false, + true, + false, + ) + .unwrap() + .assert_locked_with_conflict(None, 20); + must_pessimistic_locked(&mut engine, b"k2", 10, 20); + + // Locked by a smaller for_update_ts. + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 10, + 25, + false, + false, + true, + false, + ) + .unwrap() + .assert_empty(); + must_pessimistic_locked(&mut engine, b"k2", 10, 25); + + // Key exists and no conflict. + must_pessimistic_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 10, 20, DoPessimisticCheck); + must_commit(&mut engine, b"k2", 10, 30); + must_unlocked(&mut engine, b"k2"); + + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 40, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::AlreadyExist { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_unlocked(&mut engine, b"k2"); + + // Key exists, no conflict, and key is already locked. + must_succeed(&mut engine, b"k2", b"k2", 40, 40); + must_pessimistic_locked(&mut engine, b"k2", 40, 40); + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 40, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::AlreadyExist { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_pessimistic_locked(&mut engine, b"k2", 40, 40); + + // Key exists, no conflict, and key is locked with a larger for_update_ts (stale + // request). + // Note that in this case, the client must have been requested a lock with + // larger for_update_ts, and the current request must be stale. + // Therefore it doesn't matter what result this request returns. It only + // need to guarantee the data won't be broken. + must_succeed(&mut engine, b"k2", b"k2", 40, 50); + must_pessimistic_locked(&mut engine, b"k2", 40, 50); + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 40, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::AlreadyExist { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_pessimistic_locked(&mut engine, b"k2", 40, 50); + + // Key exists, no conflict, and key is locked with a smaller for_update_ts. + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 60, + false, + false, + true, + false, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::AlreadyExist { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_pessimistic_locked(&mut engine, b"k2", 40, 50); + } + + #[test] + fn test_lock_with_conflict_lock_only_if_exists() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + must_prewrite_put(&mut engine, b"k1", b"v1", b"k1", 20); + must_commit(&mut engine, b"k1", 20, 30); + + // Key exists. + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 10, + true, + false, + false, + true, + ) + .unwrap() + .assert_locked_with_conflict(Some(b"v1"), 30); + must_pessimistic_locked(&mut engine, b"k1", 10, 30); + + // Key exists and already locked (idempotency). + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 10, + true, + false, + false, + true, + ) + .unwrap() + .assert_locked_with_conflict(Some(b"v1"), 30); + must_pessimistic_locked(&mut engine, b"k1", 10, 30); + + // Key exists and is locked with a larger for_update_ts (stale request) + must_succeed(&mut engine, b"k1", b"k1", 10, 40); + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 10, + 10, + true, + false, + false, + true, + ) + .unwrap() + .assert_locked_with_conflict(Some(b"v1"), 40); + must_pessimistic_locked(&mut engine, b"k1", 10, 40); + + // Key not exist. + must_pessimistic_prewrite_delete(&mut engine, b"k1", b"k1", 10, 40, DoPessimisticCheck); + must_commit(&mut engine, b"k1", 10, 60); + must_unlocked(&mut engine, b"k1"); + + let e = acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"k1", + 50, + 50, + true, + false, + false, + true, + ) + .unwrap_err(); + match e { + MvccError(box ErrorInner::WriteConflict { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_unlocked(&mut engine, b"k1"); + + // lock_only_if_exists didn't handle the case that the key doesn't exist but + // already locked. So do not test it in this case. + + // The following test cases tests if `allow_lock_with_conflict` causes any + // problem when there's no write conflict. + + // Key not exist and no conflict. + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 10, + 10, + true, + false, + false, + true, + ) + .unwrap() + .assert_value(None); + must_unlocked(&mut engine, b"k2"); + + // Key exists and no conflict. + must_prewrite_put(&mut engine, b"k2", b"v2", b"k2", 10); + must_commit(&mut engine, b"k2", 10, 30); + + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 40, + true, + false, + false, + true, + ) + .unwrap() + .assert_value(Some(b"v2")); + must_pessimistic_locked(&mut engine, b"k2", 40, 40); + + // Key exists, no conflict and already locked (idempotency). + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 40, + true, + false, + false, + true, + ) + .unwrap() + .assert_value(Some(b"v2")); + must_pessimistic_locked(&mut engine, b"k2", 40, 40); + + // Key exists, no conflict and locked with a larger for_update_ts (stale + // request). + // Note that in this case, the client must have been requested a lock with + // larger for_update_ts, and the current request must be stale. + // Therefore it doesn't matter what result this request returns. It only + // need to guarantee the data won't be broken. + must_succeed(&mut engine, b"k2", b"k2", 40, 50); + must_pessimistic_locked(&mut engine, b"k2", 40, 50); + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k2", + b"k2", + 40, + 40, + true, + false, + false, + true, + ) + .unwrap() + .assert_locked_with_conflict(Some(b"v2"), 50); + must_pessimistic_locked(&mut engine, b"k2", 40, 50); + } } diff --git a/src/storage/types.rs b/src/storage/types.rs index b4e91811843..7774dcda9ec 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -223,7 +223,13 @@ impl PessimisticLockKeyResult { } pub fn assert_empty(&self) { - assert!(matches!(self, Self::Empty)); + match self { + Self::Empty => (), + x => panic!( + "pessimistic lock key result not match, expected Empty, got {:?}", + x + ), + } } #[cfg(test)] From 404c777e710e6fece2e917fdb99ce970d5a85dd1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 8 Mar 2023 10:53:12 +0800 Subject: [PATCH 0570/1149] raftstore-v2: consider None when getting mailbox (#14348) close tikv/tikv#14347 consider None when getting mailbox Signed-off-by: SpadeA-Tang --- .../src/operation/command/admin/split.rs | 44 ++++++++++++++----- tests/failpoints/cases/test_split_region.rs | 34 +++++++++++++- 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index b31fc7e7471..e6cd7511801 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -603,7 +603,23 @@ impl Peer { self.add_pending_tick(PeerTick::SplitRegionCheck); } self.storage_mut().set_has_dirty_data(true); - let mailbox = store_ctx.router.mailbox(self.region_id()).unwrap(); + + fail_point!("before_cluster_shutdown1"); + let mailbox = { + match store_ctx.router.mailbox(self.region_id()) { + Some(mailbox) => mailbox, + None => { + // None means the node is shutdown concurrently and thus the + // mailboxes in router have been cleared + assert!( + store_ctx.router.is_shutdown(), + "{} router should have been closed", + SlogFormat(&self.logger) + ); + return; + } + } + }; let tablet_index = res.tablet_index; let _ = store_ctx .schedulers @@ -641,16 +657,20 @@ impl Peer { match store_ctx.router.force_send(new_region_id, split_init) { Ok(_) => {} Err(SendError(PeerMsg::SplitInit(msg))) => { - store_ctx + fail_point!("before_cluster_shutdown2", |_| {}); + if let Err(e) = store_ctx .router .force_send_control(StoreMsg::SplitInit(msg)) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to send split peer intialization msg to store"; - "error" => ?e, - ) - }); + { + if store_ctx.router.is_shutdown() { + return; + } + slog_panic!( + self.logger, + "fails to send split peer intialization msg to store"; + "error" => ?e, + ); + } } _ => unreachable!(), } @@ -731,7 +751,11 @@ impl Peer { } else { // None means the node is shutdown concurrently and thus the // mailboxes in router have been cleared - assert!(store_ctx.router.is_shutdown()); + assert!( + store_ctx.router.is_shutdown(), + "{} router should have been closed", + SlogFormat(&self.logger) + ); return; } } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index f3a052c8027..94dfd1b5648 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -23,7 +23,9 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::{ - store::{config::Config as RaftstoreConfig, util::is_vote_msg, Callback, PeerMsg}, + store::{ + config::Config as RaftstoreConfig, util::is_vote_msg, Callback, PeerMsg, WriteResponse, + }, Result, }; use test_raftstore::*; @@ -1104,3 +1106,33 @@ fn test_split_store_channel_full() { assert_ne!(region.id, 1); fail::remove(sender_fp); } + +#[test] +fn test_split_during_cluster_shutdown() { + // test case for raftstore-v2 + use test_raftstore_v2::*; + + let test_split = |split_fp| { + let count = 1; + let mut cluster = new_server_cluster(0, count); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k2", b"v2"); + cluster.must_put(b"k3", b"v3"); + fail::cfg_callback(split_fp, move || { + // After one second, mailboxes will be cleared in shutdown + thread::sleep(Duration::from_secs(1)); + }) + .unwrap(); + + let pd_client = cluster.pd_client.clone(); + let region = pd_client.get_region(b"k2").unwrap(); + let c = Box::new(move |_write_resp: WriteResponse| {}); + cluster.split_region(®ion, b"k2", Callback::write(c)); + + cluster.shutdown(); + }; + + test_split("before_cluster_shutdown1"); + test_split("before_cluster_shutdown2"); +} From e186703363e101934868f71d74a7978f5ce44b1d Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 8 Mar 2023 13:11:11 +0800 Subject: [PATCH 0571/1149] raftstore-v2: use compaction filter to trim tablet (#14350) ref tikv/tikv#12842 None Signed-off-by: tabokie --- Cargo.lock | 6 +- Cargo.toml | 1 + components/engine_rocks/src/raw.rs | 18 +- components/engine_rocks/src/util.rs | 180 +++++++++++++++++- components/engine_test/src/lib.rs | 12 +- components/engine_traits/src/tablet.rs | 2 +- .../raftstore-v2/src/worker/tablet_gc.rs | 9 +- components/tikv_util/src/sys/cgroup.rs | 1 - src/config/mod.rs | 145 ++++++++++---- src/server/engine_factory.rs | 64 ++++++- src/server/gc_worker/compaction_filter.rs | 27 +-- .../gc_worker/rawkv_compaction_filter.rs | 21 +- src/server/ttl/ttl_compaction_filter.rs | 19 +- src/storage/kv/test_engine_builder.rs | 16 +- src/storage/mod.rs | 7 +- tests/integrations/storage/test_titan.rs | 1 + 16 files changed, 423 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d939343c06a..77d24e482d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2862,7 +2862,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#cd8b60758b46afbbde6fde52fa86a2776b401723" +source = "git+https://github.com/tikv/rust-rocksdb.git#9e4678857e5b4c738e95c7ee1a35ee962264f4e9" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2881,7 +2881,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#cd8b60758b46afbbde6fde52fa86a2776b401723" +source = "git+https://github.com/tikv/rust-rocksdb.git#9e4678857e5b4c738e95c7ee1a35ee962264f4e9" dependencies = [ "bzip2-sys", "cc", @@ -4799,7 +4799,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#cd8b60758b46afbbde6fde52fa86a2776b401723" +source = "git+https://github.com/tikv/rust-rocksdb.git#9e4678857e5b4c738e95c7ee1a35ee962264f4e9" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index 509f9514b10..a559fa22474 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -185,6 +185,7 @@ api_version = { workspace = true, features = ["testexport"] } example_coprocessor_plugin = { workspace = true } # should be a binary dependency hyper-openssl = "0.9" panic_hook = { workspace = true } +raftstore = { workspace = true, features = ["testexport"] } reqwest = { version = "0.11", features = ["blocking"] } test_sst_importer = { workspace = true } test_util = { workspace = true } diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index e940fdd2cd7..474137534f8 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -7,13 +7,13 @@ //! crate, but only until the engine interface is completely abstracted. pub use rocksdb::{ - new_compaction_filter_raw, run_ldb_tool, run_sst_dump_tool, BlockBasedOptions, Cache, - ChecksumType, CompactOptions, CompactionFilter, CompactionFilterContext, - CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, - CompactionJobInfo, CompactionOptions, CompactionPriority, ConcurrentTaskLimiter, - DBBottommostLevelCompaction, DBCompactionFilter, DBCompactionStyle, DBCompressionType, - DBEntryType, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, DBTitanDBBlobRunMode, - Env, EventListener, IngestExternalFileOptions, LRUCacheOptions, MemoryAllocator, PerfContext, - PrepopulateBlockCache, Range, RateLimiter, SliceTransform, Statistics, - TablePropertiesCollector, TablePropertiesCollectorFactory, WriteBufferManager, + run_ldb_tool, run_sst_dump_tool, BlockBasedOptions, Cache, ChecksumType, CompactOptions, + CompactionFilter, CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, + CompactionFilterValueType, CompactionJobInfo, CompactionOptions, CompactionPriority, + ConcurrentTaskLimiter, DBBottommostLevelCompaction, DBCompactionFilter, DBCompactionStyle, + DBCompressionType, DBEntryType, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, + DBTableFileCreationReason, DBTitanDBBlobRunMode, Env, EventListener, IngestExternalFileOptions, + LRUCacheOptions, MemoryAllocator, PerfContext, PrepopulateBlockCache, Range, RateLimiter, + SliceTransform, Statistics, TablePropertiesCollector, TablePropertiesCollectorFactory, + WriteBufferManager, }; diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 407cf8ee611..52b1364c3ce 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -1,11 +1,13 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fs, path::Path, str::FromStr, sync::Arc}; +use std::{ffi::CString, fs, path::Path, str::FromStr, sync::Arc}; use engine_traits::{Engines, Range, Result, CF_DEFAULT}; use rocksdb::{ - load_latest_options, CColumnFamilyDescriptor, CFHandle, ColumnFamilyOptions, Env, - Range as RocksRange, SliceTransform, DB, + load_latest_options, CColumnFamilyDescriptor, CFHandle, ColumnFamilyOptions, CompactionFilter, + CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, + CompactionFilterValueType, DBTableFileCreationReason, Env, Range as RocksRange, SliceTransform, + DB, }; use slog_global::warn; @@ -331,6 +333,178 @@ pub fn from_raw_perf_level(level: rocksdb::PerfLevel) -> engine_traits::PerfLeve } } +struct OwnedRange { + start_key: Box<[u8]>, + end_key: Box<[u8]>, +} + +type FilterByReason = [bool; 4]; + +fn reason_to_index(reason: DBTableFileCreationReason) -> usize { + match reason { + DBTableFileCreationReason::Flush => 0, + DBTableFileCreationReason::Compaction => 1, + DBTableFileCreationReason::Recovery => 2, + DBTableFileCreationReason::Misc => 3, + } +} + +fn filter_by_reason(factory: &impl CompactionFilterFactory) -> FilterByReason { + let mut r = FilterByReason::default(); + r[reason_to_index(DBTableFileCreationReason::Flush)] = + factory.should_filter_table_file_creation(DBTableFileCreationReason::Flush); + r[reason_to_index(DBTableFileCreationReason::Compaction)] = + factory.should_filter_table_file_creation(DBTableFileCreationReason::Compaction); + r[reason_to_index(DBTableFileCreationReason::Recovery)] = + factory.should_filter_table_file_creation(DBTableFileCreationReason::Recovery); + r[reason_to_index(DBTableFileCreationReason::Misc)] = + factory.should_filter_table_file_creation(DBTableFileCreationReason::Misc); + r +} + +pub struct StackingCompactionFilterFactory { + outer_should_filter: FilterByReason, + outer: A, + inner_should_filter: FilterByReason, + inner: B, +} + +impl StackingCompactionFilterFactory { + /// Creates a factory of stacked filter with `outer` on top of `inner`. + /// Table keys will be filtered through `outer` first before reaching + /// `inner`. + pub fn new(outer: A, inner: B) -> Self { + let outer_should_filter = filter_by_reason(&outer); + let inner_should_filter = filter_by_reason(&inner); + Self { + outer_should_filter, + outer, + inner_should_filter, + inner, + } + } +} + +impl CompactionFilterFactory + for StackingCompactionFilterFactory +{ + type Filter = StackingCompactionFilter; + + fn create_compaction_filter( + &self, + context: &CompactionFilterContext, + ) -> Option<(CString, Self::Filter)> { + let i = reason_to_index(context.reason()); + let mut outer_filter = None; + let mut inner_filter = None; + let mut full_name = String::new(); + if self.outer_should_filter[i] + && let Some((name, filter)) = self.outer.create_compaction_filter(context) + { + outer_filter = Some(filter); + full_name = name.into_string().unwrap(); + } + if self.inner_should_filter[i] + && let Some((name, filter)) = self.inner.create_compaction_filter(context) + { + inner_filter = Some(filter); + if !full_name.is_empty() { + full_name += "."; + } + full_name += name.to_str().unwrap(); + } + if outer_filter.is_none() && inner_filter.is_none() { + None + } else { + let filter = StackingCompactionFilter { + outer: outer_filter, + inner: inner_filter, + }; + Some((CString::new(full_name).unwrap(), filter)) + } + } + + fn should_filter_table_file_creation(&self, reason: DBTableFileCreationReason) -> bool { + let i = reason_to_index(reason); + self.outer_should_filter[i] || self.inner_should_filter[i] + } +} + +pub struct StackingCompactionFilter { + outer: Option, + inner: Option, +} + +impl CompactionFilter for StackingCompactionFilter { + fn featured_filter( + &mut self, + level: usize, + key: &[u8], + seqno: u64, + value: &[u8], + value_type: CompactionFilterValueType, + ) -> CompactionFilterDecision { + if let Some(outer) = self.outer.as_mut() + && let r = outer.featured_filter(level, key, seqno, value, value_type) + && !matches!(r, CompactionFilterDecision::Keep) + { + r + } else if let Some(inner) = self.inner.as_mut() { + inner.featured_filter(level, key, seqno, value, value_type) + } else { + CompactionFilterDecision::Keep + } + } +} + +#[derive(Clone)] +pub struct RangeCompactionFilterFactory(Arc); + +impl RangeCompactionFilterFactory { + pub fn new(start_key: Box<[u8]>, end_key: Box<[u8]>) -> Self { + let range = OwnedRange { start_key, end_key }; + Self(Arc::new(range)) + } +} + +impl CompactionFilterFactory for RangeCompactionFilterFactory { + type Filter = RangeCompactionFilter; + + fn create_compaction_filter( + &self, + _context: &CompactionFilterContext, + ) -> Option<(CString, Self::Filter)> { + Some(( + CString::new("range_filter").unwrap(), + RangeCompactionFilter(self.0.clone()), + )) + } + + fn should_filter_table_file_creation(&self, _reason: DBTableFileCreationReason) -> bool { + true + } +} + +/// Filters out all keys outside the key range. +pub struct RangeCompactionFilter(Arc); + +impl CompactionFilter for RangeCompactionFilter { + fn featured_filter( + &mut self, + _level: usize, + key: &[u8], + _seqno: u64, + _value: &[u8], + _value_type: CompactionFilterValueType, + ) -> CompactionFilterDecision { + if key < self.0.start_key.as_ref() || key >= self.0.end_key.as_ref() { + CompactionFilterDecision::Remove + } else { + CompactionFilterDecision::Keep + } + } +} + #[cfg(test)] mod tests { use engine_traits::{CfOptionsExt, Peekable, SyncMutable, CF_DEFAULT}; diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 1b0dbfbddb6..bc8b2f8baf2 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -365,7 +365,7 @@ pub mod ctor { use engine_rocks::{ get_env, properties::{MvccPropertiesCollectorFactory, RangePropertiesCollectorFactory}, - util::new_engine_opt as rocks_new_engine_opt, + util::{new_engine_opt as rocks_new_engine_opt, RangeCompactionFilterFactory}, RocksCfOptions, RocksDbOptions, RocksPersistenceListener, }; use engine_traits::{ @@ -425,9 +425,17 @@ pub mod ctor { ); rocks_db_opts.add_event_listener(RocksPersistenceListener::new(listener)); } + let factory = + RangeCompactionFilterFactory::new(ctx.start_key.clone(), ctx.end_key.clone()); let rocks_cfs_opts = cf_opts .iter() - .map(|(name, opt)| (*name, get_rocks_cf_opts(opt))) + .map(|(name, opt)| { + let mut opt = get_rocks_cf_opts(opt); + // We assume `get_rocks_cf_opts` didn't set a factory already. + opt.set_compaction_filter_factory("range_filter_factory", factory.clone()) + .unwrap(); + (*name, opt) + }) .collect(); rocks_new_engine_opt(path, rocks_db_opts, rocks_cfs_opts) } diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 79512a99f64..14f7d186f76 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -149,7 +149,7 @@ pub trait TabletFactory: Send + Sync { /// Check if the tablet with specified path exists fn exists(&self, path: &Path) -> bool; - #[cfg(any(test, feature = "testexport"))] + #[cfg(feature = "testexport")] fn set_state_storage(&self, _: Arc) { unimplemented!() } diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index d9bd03b326a..0be8fdaa901 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -162,13 +162,7 @@ impl Runner { let end_key = keys::data_end_key(&end); let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); - // TODO: Avoid `DeleteByRange` after compaction filter is ready. - if let Err(e) = tablet - .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[range1, range2]) - .and_then(|_| { - tablet.delete_ranges_cfs(DeleteStrategy::DeleteByRange, &[range1, range2]) - }) - { + if let Err(e) = tablet.delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[range1, range2]) { error!( self.logger, "failed to trim tablet"; @@ -184,6 +178,7 @@ impl Runner { let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); for r in [range1, range2] { + // When compaction filter is present, trivial move is disallowed. if let Err(e) = tablet.compact_range(Some(r.start_key), Some(r.end_key), false, 1) { diff --git a/components/tikv_util/src/sys/cgroup.rs b/components/tikv_util/src/sys/cgroup.rs index 2cd420e5d51..371d51e0b70 100644 --- a/components/tikv_util/src/sys/cgroup.rs +++ b/components/tikv_util/src/sys/cgroup.rs @@ -560,7 +560,6 @@ mod tests { ("-18446744073709551610", None), // Raise InvalidDigit instead of NegOverflow. ("0.1", None), ]; - println!("{:?}", "-18446744073709551610".parse::()); for (content, expect) in cases.into_iter() { let limit = parse_memory_max(content); assert_eq!(limit, expect); diff --git a/src/config/mod.rs b/src/config/mod.rs index 0f97487edcf..b51883826c8 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -33,7 +33,10 @@ use engine_rocks::{ DBCompactionStyle, DBCompressionType, DBRateLimiterMode, DBRecoveryMode, Env, PrepopulateBlockCache, RateLimiter, WriteBufferManager, }, - util::{FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform}, + util::{ + FixedPrefixSliceTransform, FixedSuffixSliceTransform, NoopSliceTransform, + RangeCompactionFilterFactory, StackingCompactionFilterFactory, + }, RaftDbLogger, RangePropertiesCollectorFactory, RawMvccPropertiesCollectorFactory, RocksCfOptions, RocksDbOptions, RocksEngine, RocksEventListener, RocksStatistics, RocksTitanDbOptions, RocksdbLogger, TtlPropertiesCollectorFactory, @@ -702,6 +705,7 @@ impl DefaultCfConfig { shared: &CfResources, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, + filter_factory: Option<&RangeCompactionFilterFactory>, for_engine: EngineType, ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!( @@ -721,29 +725,67 @@ impl DefaultCfConfig { RawMvccPropertiesCollectorFactory::default(), ); cf_opts.add_table_properties_collector_factory("tikv.range-properties-collector", f); - match api_version { - ApiVersion::V1 => { - // nothing to do - } - ApiVersion::V1ttl => { - cf_opts.add_table_properties_collector_factory( - "tikv.ttl-properties-collector", - TtlPropertiesCollectorFactory::::default(), - ); - cf_opts - .set_compaction_filter_factory( - "ttl_compaction_filter_factory", + if let Some(factory) = filter_factory { + match api_version { + ApiVersion::V1 => { + cf_opts + .set_compaction_filter_factory("range_filter_factory", factory.clone()) + .unwrap(); + } + ApiVersion::V1ttl => { + cf_opts.add_table_properties_collector_factory( + "tikv.ttl-properties-collector", + TtlPropertiesCollectorFactory::::default(), + ); + let factory = StackingCompactionFilterFactory::new( + factory.clone(), TtlCompactionFilterFactory::::default(), - ) - .unwrap(); - } - ApiVersion::V2 => { - cf_opts - .set_compaction_filter_factory( - "apiv2_gc_compaction_filter_factory", + ); + cf_opts + .set_compaction_filter_factory( + "range_filter_factory.ttl_compaction_filter_factory", + factory, + ) + .unwrap(); + } + ApiVersion::V2 => { + let factory = StackingCompactionFilterFactory::new( + factory.clone(), RawCompactionFilterFactory, - ) - .unwrap(); + ); + cf_opts + .set_compaction_filter_factory( + "range_filter_factory.apiv2_gc_compaction_filter_factory", + factory, + ) + .unwrap(); + } + } + } else { + match api_version { + ApiVersion::V1 => { + // nothing to do + } + ApiVersion::V1ttl => { + cf_opts.add_table_properties_collector_factory( + "tikv.ttl-properties-collector", + TtlPropertiesCollectorFactory::::default(), + ); + cf_opts + .set_compaction_filter_factory( + "ttl_compaction_filter_factory", + TtlCompactionFilterFactory::::default(), + ) + .unwrap(); + } + ApiVersion::V2 => { + cf_opts + .set_compaction_filter_factory( + "apiv2_gc_compaction_filter_factory", + RawCompactionFilterFactory, + ) + .unwrap(); + } } } cf_opts.set_titan_cf_options(&self.titan.build_opts()); @@ -827,6 +869,7 @@ impl WriteCfConfig { &self, shared: &CfResources, region_info_accessor: Option<&RegionInfoAccessor>, + filter_factory: Option<&RangeCompactionFilterFactory>, for_engine: EngineType, ) -> RocksCfOptions { let mut cf_opts = build_cf_opt!( @@ -855,12 +898,23 @@ impl WriteCfConfig { prop_keys_index_distance: self.prop_keys_index_distance, }; cf_opts.add_table_properties_collector_factory("tikv.range-properties-collector", f); - cf_opts - .set_compaction_filter_factory( - "write_compaction_filter_factory", - WriteCompactionFilterFactory, - ) - .unwrap(); + if let Some(factory) = filter_factory { + let factory = + StackingCompactionFilterFactory::new(factory.clone(), WriteCompactionFilterFactory); + cf_opts + .set_compaction_filter_factory( + "range_filter_factory.write_compaction_filter_factory", + factory, + ) + .unwrap(); + } else { + cf_opts + .set_compaction_filter_factory( + "write_compaction_filter_factory", + WriteCompactionFilterFactory, + ) + .unwrap(); + } cf_opts.set_titan_cf_options(&self.titan.build_opts()); cf_opts } @@ -930,7 +984,12 @@ impl Default for LockCfConfig { } impl LockCfConfig { - pub fn build_opt(&self, shared: &CfResources, for_engine: EngineType) -> RocksCfOptions { + pub fn build_opt( + &self, + shared: &CfResources, + filter_factory: Option<&RangeCompactionFilterFactory>, + for_engine: EngineType, + ) -> RocksCfOptions { let no_region_info_accessor: Option<&RegionInfoAccessor> = None; let mut cf_opts = build_cf_opt!( self, @@ -948,6 +1007,11 @@ impl LockCfConfig { }; cf_opts.add_table_properties_collector_factory("tikv.range-properties-collector", f); cf_opts.set_memtable_prefix_bloom_size_ratio(bloom_filter_ratio(for_engine)); + if let Some(factory) = filter_factory { + cf_opts + .set_compaction_filter_factory("range_filter_factory", factory.clone()) + .unwrap(); + } cf_opts.set_titan_cf_options(&self.titan.build_opts()); cf_opts } @@ -1386,19 +1450,28 @@ impl DbConfig { shared: &CfResources, region_info_accessor: Option<&RegionInfoAccessor>, api_version: ApiVersion, + filter_factory: Option<&RangeCompactionFilterFactory>, for_engine: EngineType, ) -> Vec<(&'static str, RocksCfOptions)> { let mut cf_opts = Vec::with_capacity(4); cf_opts.push(( CF_DEFAULT, - self.defaultcf - .build_opt(shared, region_info_accessor, api_version, for_engine), + self.defaultcf.build_opt( + shared, + region_info_accessor, + api_version, + filter_factory, + for_engine, + ), + )); + cf_opts.push(( + CF_LOCK, + self.lockcf.build_opt(shared, filter_factory, for_engine), )); - cf_opts.push((CF_LOCK, self.lockcf.build_opt(shared, for_engine))); cf_opts.push(( CF_WRITE, self.writecf - .build_opt(shared, region_info_accessor, for_engine), + .build_opt(shared, region_info_accessor, filter_factory, for_engine), )); if for_engine == EngineType::RaftKv { cf_opts.push((CF_RAFT, self.raftcf.build_opt(shared))); @@ -3159,7 +3232,10 @@ impl TikvConfig { if self.storage.engine == EngineType::RaftKv2 { self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); if !self.raft_engine.enable { - panic!("partitioned-raft-kv only supports raft log engine."); + return Err("partitioned-raft-kv only supports raft log engine.".into()); + } + if self.rocksdb.titan.enabled { + return Err("partitioned-raft-kv doesn't support titan.".into()); } } @@ -4634,6 +4710,7 @@ mod tests { ), None, cfg.storage.api_version(), + None, cfg.storage.engine, ), None, diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index e9b59141da2..9d2c03998e6 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -4,6 +4,7 @@ use std::{path::Path, sync::Arc}; use engine_rocks::{ raw::{Cache, Env}, + util::RangeCompactionFilterFactory, CompactedEventSender, CompactionListener, FlowListener, RocksCfOptions, RocksCompactionJobInfo, RocksDbOptions, RocksEngine, RocksEventListener, RocksPersistenceListener, RocksStatistics, TabletLogger, @@ -153,11 +154,16 @@ impl KvEngineFactory { db_opts } - fn cf_opts(&self, for_engine: EngineType) -> Vec<(&str, RocksCfOptions)> { + fn cf_opts( + &self, + filter_factory: Option<&RangeCompactionFilterFactory>, + for_engine: EngineType, + ) -> Vec<(&str, RocksCfOptions)> { self.inner.rocksdb_config.build_cf_opts( &self.inner.cf_resources, self.inner.region_info_accessor.as_ref(), self.inner.api_version, + filter_factory, for_engine, ) } @@ -172,7 +178,7 @@ impl KvEngineFactory { pub fn create_shared_db(&self, path: impl AsRef) -> Result { let path = path.as_ref(); let mut db_opts = self.db_opts(EngineType::RaftKv); - let cf_opts = self.cf_opts(EngineType::RaftKv); + let cf_opts = self.cf_opts(None, EngineType::RaftKv); if let Some(listener) = &self.inner.flow_listener { db_opts.add_event_listener(listener.clone()); } @@ -191,7 +197,8 @@ impl TabletFactory for KvEngineFactory { let mut db_opts = self.db_opts(EngineType::RaftKv2); let tablet_name = path.file_name().unwrap().to_str().unwrap().to_string(); db_opts.set_info_log(TabletLogger::new(tablet_name)); - let cf_opts = self.cf_opts(EngineType::RaftKv2); + let factory = RangeCompactionFilterFactory::new(ctx.start_key.clone(), ctx.end_key.clone()); + let cf_opts = self.cf_opts(Some(&factory), EngineType::RaftKv2); if let Some(listener) = &self.inner.flow_listener { db_opts.add_event_listener(listener.clone_with(ctx.id)); } @@ -219,7 +226,7 @@ impl TabletFactory for KvEngineFactory { info!("destroy tablet"; "path" => %path.display(), "region_id" => ctx.id, "suffix" => ?ctx.suffix); // Create kv engine. let _db_opts = self.db_opts(EngineType::RaftKv2); - let _cf_opts = self.cf_opts(EngineType::RaftKv2); + let _cf_opts = self.cf_opts(None, EngineType::RaftKv2); // TODOTODO: call rust-rocks or tirocks to destroy_engine; // engine_rocks::util::destroy_engine( // path.to_str().unwrap(), @@ -237,7 +244,7 @@ impl TabletFactory for KvEngineFactory { RocksEngine::exists(path.to_str().unwrap()) } - #[cfg(any(test, feature = "testexport"))] + #[cfg(feature = "testexport")] fn set_state_storage(&self, state_storage: Arc) { let inner = Arc::as_ptr(&self.inner) as *mut FactoryInner; unsafe { @@ -250,13 +257,13 @@ impl TabletFactory for KvEngineFactory { mod tests { use std::path::Path; - use engine_traits::TabletRegistry; + use engine_traits::{Peekable, SyncMutable, TabletRegistry}; + use kvproto::metapb::Region; use super::*; use crate::config::TikvConfig; - #[test] - fn test_engine_factory() { + fn build_test(name: &'static str) -> (tempfile::TempDir, TabletRegistry) { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let common_test_cfg = manifest_dir.join("components/test_raftstore/src/common-test.toml"); let cfg = TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { @@ -270,11 +277,18 @@ mod tests { .storage .block_cache .build_shared_cache(cfg.storage.engine); - let dir = test_util::temp_dir("test-engine-factory", false); + let dir = test_util::temp_dir(name, false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); let reg = TabletRegistry::new(Box::new(factory), dir.path()).unwrap(); + (dir, reg) + } + + #[test] + fn test_engine_factory() { + let (_dir, reg) = build_test("test_engine_factory"); + let path = reg.tablet_path(1, 3); assert!(!reg.tablet_factory().exists(&path)); let mut tablet_ctx = TabletContext::with_infinite_region(1, Some(3)); @@ -294,4 +308,36 @@ mod tests { .unwrap(); assert!(!reg.tablet_factory().exists(&path)); } + + #[test] + fn test_engine_factory_compaction_filter() { + let (_dir, reg) = build_test("test_engine_factory_compaction_filter"); + + let region = Region { + id: 1, + start_key: b"k1".to_vec(), + end_key: b"k3".to_vec(), + ..Default::default() + }; + let tablet_ctx = TabletContext::new(®ion, Some(3)); + let path = reg.tablet_path(1, 3); + let engine = reg.tablet_factory().open_tablet(tablet_ctx, &path).unwrap(); + engine.put(&keys::data_key(b"k0"), b"v0").unwrap(); + engine.put(&keys::data_key(b"k1"), b"v1").unwrap(); + engine.put(&keys::data_key(b"k2"), b"v2").unwrap(); + engine.put(&keys::data_key(b"k3"), b"v3").unwrap(); + engine.put(&keys::data_key(b"k4"), b"v4").unwrap(); + engine.flush_cfs(&[], true).unwrap(); + assert!(engine.get_value(&keys::data_key(b"k0")).unwrap().is_none()); + assert_eq!( + engine.get_value(&keys::data_key(b"k1")).unwrap().unwrap(), + b"v1" + ); + assert_eq!( + engine.get_value(&keys::data_key(b"k2")).unwrap().unwrap(), + b"v2" + ); + assert!(engine.get_value(&keys::data_key(b"k3")).unwrap().is_none()); + assert!(engine.get_value(&keys::data_key(b"k4")).unwrap().is_none()); + } } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 5d33346a844..e6a5b923628 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -14,9 +14,8 @@ use std::{ use engine_rocks::{ raw::{ - new_compaction_filter_raw, CompactionFilter, CompactionFilterContext, - CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, - DBCompactionFilter, + CompactionFilter, CompactionFilterContext, CompactionFilterDecision, + CompactionFilterFactory, CompactionFilterValueType, }, RocksEngine, RocksMvccProperties, RocksWriteBatchVec, }; @@ -199,21 +198,23 @@ impl CompactionFilterInitializer for Option { pub struct WriteCompactionFilterFactory; impl CompactionFilterFactory for WriteCompactionFilterFactory { + type Filter = WriteCompactionFilter; + fn create_compaction_filter( &self, context: &CompactionFilterContext, - ) -> *mut DBCompactionFilter { + ) -> Option<(CString, Self::Filter)> { let gc_context_option = GC_CONTEXT.lock().unwrap(); let gc_context = match *gc_context_option { Some(ref ctx) => ctx, - None => return std::ptr::null_mut(), + None => return None, }; let safe_point = gc_context.safe_point.load(Ordering::Relaxed); if safe_point == 0 { // Safe point has not been initialized yet. debug!("skip gc in compaction filter because of no safe point"); - return std::ptr::null_mut(); + return None; } let (enable, skip_vcheck, ratio_threshold) = { @@ -241,12 +242,12 @@ impl CompactionFilterFactory for WriteCompactionFilterFactory { .map_or(false, RocksEngine::is_stalled_or_stopped) { debug!("skip gc in compaction filter because the DB is stalled"); - return std::ptr::null_mut(); + return None; } if !do_check_allowed(enable, skip_vcheck, &gc_context.feature_gate) { debug!("skip gc in compaction filter because it's not allowed"); - return std::ptr::null_mut(); + return None; } drop(gc_context_option); GC_COMPACTION_FILTER_PERFORM @@ -257,7 +258,7 @@ impl CompactionFilterFactory for WriteCompactionFilterFactory { GC_COMPACTION_FILTER_SKIP .with_label_values(&[STAT_TXN_KEYMODE]) .inc(); - return std::ptr::null_mut(); + return None; } debug!( @@ -275,7 +276,7 @@ impl CompactionFilterFactory for WriteCompactionFilterFactory { (store_id, region_info_provider), ); let name = CString::new("write_compaction_filter").unwrap(); - unsafe { new_compaction_filter_raw(name, filter) } + Some((name, filter)) } } @@ -326,7 +327,7 @@ impl DeleteBatch { } } -struct WriteCompactionFilter { +pub struct WriteCompactionFilter { safe_point: u64, engine: Option, is_bottommost_level: bool, @@ -1067,7 +1068,7 @@ pub mod tests { // Wait up to 1 second, and treat as no task if timeout. if let Ok(Some(task)) = gc_runner.gc_receiver.recv_timeout(Duration::new(1, 0)) { - assert!(expect_tasks, "a GC task is expected"); + assert!(expect_tasks, "unexpected GC task"); match task { GcTask::GcKeys { keys, .. } => { assert_eq!(keys.len(), 1); @@ -1079,7 +1080,7 @@ pub mod tests { } return; } - assert!(!expect_tasks, "no GC task is expected"); + assert!(!expect_tasks, "no GC task after 1 second"); }; // No key switch after the deletion mark. diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index 5e3913f4d40..b2af5b73118 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -9,9 +9,8 @@ use std::{ use api_version::{ApiV2, KeyMode, KvFormat}; use engine_rocks::{ raw::{ - new_compaction_filter_raw, CompactionFilter, CompactionFilterContext, - CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, - DBCompactionFilter, + CompactionFilter, CompactionFilterContext, CompactionFilterDecision, + CompactionFilterFactory, CompactionFilterValueType, }, RocksEngine, }; @@ -36,15 +35,17 @@ use crate::{ pub struct RawCompactionFilterFactory; impl CompactionFilterFactory for RawCompactionFilterFactory { + type Filter = RawCompactionFilter; + fn create_compaction_filter( &self, context: &CompactionFilterContext, - ) -> *mut DBCompactionFilter { + ) -> Option<(CString, Self::Filter)> { //---------------- GC context -------------- let gc_context_option = GC_CONTEXT.lock().unwrap(); let gc_context = match *gc_context_option { Some(ref ctx) => ctx, - None => return std::ptr::null_mut(), + None => return None, }; //---------------- GC context END -------------- @@ -57,7 +58,7 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { if safe_point == 0 { // Safe point has not been initialized yet. debug!("skip gc in compaction filter because of no safe point"); - return std::ptr::null_mut(); + return None; } let ratio_threshold = { @@ -76,7 +77,7 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { .map_or(false, RocksEngine::is_stalled_or_stopped) { debug!("skip gc in compaction filter because the DB is stalled"); - return std::ptr::null_mut(); + return None; } drop(gc_context_option); @@ -90,7 +91,7 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { GC_COMPACTION_FILTER_SKIP .with_label_values(&[STAT_RAW_KEYMODE]) .inc(); - return std::ptr::null_mut(); + return None; } let filter = RawCompactionFilter::new( @@ -101,11 +102,11 @@ impl CompactionFilterFactory for RawCompactionFilterFactory { (store_id, region_info_provider), ); let name = CString::new("raw_compaction_filter").unwrap(); - unsafe { new_compaction_filter_raw(name, filter) } + Some((name, filter)) } } -struct RawCompactionFilter { +pub struct RawCompactionFilter { safe_point: u64, is_bottommost_level: bool, gc_scheduler: Scheduler>, diff --git a/src/server/ttl/ttl_compaction_filter.rs b/src/server/ttl/ttl_compaction_filter.rs index a53a766f235..7fdb3c686b7 100644 --- a/src/server/ttl/ttl_compaction_filter.rs +++ b/src/server/ttl/ttl_compaction_filter.rs @@ -5,9 +5,8 @@ use std::{ffi::CString, marker::PhantomData}; use api_version::{KeyMode, KvFormat, RawValue}; use engine_rocks::{ raw::{ - new_compaction_filter_raw, CompactionFilter, CompactionFilterContext, - CompactionFilterDecision, CompactionFilterFactory, CompactionFilterValueType, - DBCompactionFilter, + CompactionFilter, CompactionFilterContext, CompactionFilterDecision, + CompactionFilterFactory, CompactionFilterValueType, DBTableFileCreationReason, }, RocksTtlProperties, }; @@ -21,10 +20,12 @@ pub struct TtlCompactionFilterFactory { } impl CompactionFilterFactory for TtlCompactionFilterFactory { + type Filter = TtlCompactionFilter; + fn create_compaction_filter( &self, context: &CompactionFilterContext, - ) -> *mut DBCompactionFilter { + ) -> Option<(CString, Self::Filter)> { let current = ttl_current_ts(); let mut min_expire_ts = u64::MAX; @@ -38,7 +39,7 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { } } if min_expire_ts > current { - return std::ptr::null_mut(); + return None; } let name = CString::new("ttl_compaction_filter").unwrap(); @@ -46,11 +47,15 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { ts: current, _phantom: PhantomData, }; - unsafe { new_compaction_filter_raw(name, filter) } + Some((name, filter)) + } + + fn should_filter_table_file_creation(&self, _reason: DBTableFileCreationReason) -> bool { + true } } -struct TtlCompactionFilter { +pub struct TtlCompactionFilter { ts: u64, _phantom: PhantomData, } diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index aff54a41faa..c6a7cb7f20d 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -103,19 +103,25 @@ impl TestEngineBuilder { .map(|cf| match *cf { CF_DEFAULT => ( CF_DEFAULT, - cfg_rocksdb - .defaultcf - .build_opt(&shared, None, api_version, EngineType::RaftKv), + cfg_rocksdb.defaultcf.build_opt( + &shared, + None, + api_version, + None, + EngineType::RaftKv, + ), ), CF_LOCK => ( CF_LOCK, - cfg_rocksdb.lockcf.build_opt(&shared, EngineType::RaftKv), + cfg_rocksdb + .lockcf + .build_opt(&shared, None, EngineType::RaftKv), ), CF_WRITE => ( CF_WRITE, cfg_rocksdb .writecf - .build_opt(&shared, None, EngineType::RaftKv), + .build_opt(&shared, None, None, EngineType::RaftKv), ), CF_RAFT => (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&shared)), _ => (*cf, RocksCfOptions::default()), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index ca35018e01e..8c58274bc33 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4205,18 +4205,21 @@ mod tests { &shared, None, ApiVersion::V1, + None, EngineType::RaftKv, ), ), ( CF_LOCK, - cfg_rocksdb.lockcf.build_opt(&shared, EngineType::RaftKv), + cfg_rocksdb + .lockcf + .build_opt(&shared, None, EngineType::RaftKv), ), ( CF_WRITE, cfg_rocksdb .writecf - .build_opt(&shared, None, EngineType::RaftKv), + .build_opt(&shared, None, None, EngineType::RaftKv), ), (CF_RAFT, cfg_rocksdb.raftcf.build_opt(&shared)), ]; diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 921dcf3615f..76eee9b1322 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -168,6 +168,7 @@ fn test_delete_files_in_range_for_titan() { &cfg.rocksdb.build_cf_resources(cache), None, cfg.storage.api_version(), + None, cfg.storage.engine, ); From c4bc6d9a4929d773660591fff5b6a21d5a07fc93 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 8 Mar 2023 16:33:12 +0800 Subject: [PATCH 0572/1149] raftstore-v2: use larger target file size (#14361) ref tikv/tikv#14352 Use the same target file size as if compaction guard is enabled. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- etc/config-template.toml | 11 +--- src/config/mod.rs | 68 ++++++++++------------ tests/integrations/config/mod.rs | 15 ++--- tests/integrations/config/test-custom.toml | 5 -- 4 files changed, 39 insertions(+), 60 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 9b9a81d4106..ea73efdf59e 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -515,8 +515,8 @@ ## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter ## blocks into block cache at startup. So if your database has a large working set, it will take ## several minutes to open the DB. You may need to increase this if your database has a large -## working set. You can estimate the number of files based on `target-file-size-base` and -## `target_file_size_multiplier` for level-based compaction. +## working set. You can estimate the number of files based on `target-file-size-base` for +## level-based compaction. # max-open-files = 40960 ## Max size of RocksDB's MANIFEST file. @@ -739,10 +739,8 @@ ## Target file size for compaction. ## The SST file size of level-0 is influenced by the compaction algorithm of `write-buffer-size` ## and level0. `target-file-size-base` is used to control the size of a single SST file of level1 to -## level6. Each level will have `target-file-size-base * (target-file-size-multiplier ^ (level - 1))`. +## level6. # target-file-size-base = "8MB" -## In partitioned-raft-kv, the default value of target-file-size-multiplier is 2 for write and default cf. -# target-file-size-multiplier = 1 ## Max bytes for `compaction.max_compaction_bytes`. ## If it's necessary to enlarge value of this entry, it's better to also enlarge `reserve-space` @@ -927,7 +925,6 @@ ## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`. # max-bytes-for-level-base = "512MB" # target-file-size-base = "8MB" -# target-file-size-multiplier = 1 # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 @@ -956,7 +953,6 @@ # min-write-buffer-number-to-merge = 1 # max-bytes-for-level-base = "128MB" # target-file-size-base = "8MB" -# target-file-size-multiplier = 1 # level0-file-num-compaction-trigger = 1 # level0-slowdown-writes-trigger = 20 # level0-stop-writes-trigger = 20 @@ -1018,7 +1014,6 @@ ## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`. # max-bytes-for-level-base = "512MB" # target-file-size-base = "8MB" -# target-file-size-multiplier = 1 # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 diff --git a/src/config/mod.rs b/src/config/mod.rs index b51883826c8..57c2e935d78 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -323,8 +323,7 @@ macro_rules! cf_config { #[online_config(skip)] pub min_write_buffer_number_to_merge: i32, pub max_bytes_for_level_base: ReadableSize, - pub target_file_size_base: ReadableSize, - pub target_file_size_multiplier: i32, + pub target_file_size_base: Option, pub level0_file_num_compaction_trigger: i32, pub level0_slowdown_writes_trigger: Option, pub level0_stop_writes_trigger: Option, @@ -380,6 +379,11 @@ macro_rules! cf_config { } impl $name { + #[inline] + fn target_file_size_base(&self) -> u64 { + self.target_file_size_base.unwrap_or(ReadableSize::mb(8)).0 + } + fn validate(&self) -> Result<(), Box> { if self.block_size.0 as usize > MAX_BLOCK_SIZE { return Err(format!( @@ -453,7 +457,7 @@ macro_rules! write_into_metrics { .set($cf.max_bytes_for_level_base.0 as f64); $metrics .with_label_values(&[$tag, "target_file_size_base"]) - .set($cf.target_file_size_base.0 as f64); + .set($cf.target_file_size_base() as f64); $metrics .with_label_values(&[$tag, "level0_file_num_compaction_trigger"]) .set($cf.level0_file_num_compaction_trigger.into()); @@ -575,10 +579,7 @@ macro_rules! build_cf_opt { cf_opts.set_max_write_buffer_number($opt.max_write_buffer_number); cf_opts.set_min_write_buffer_number_to_merge($opt.min_write_buffer_number_to_merge); cf_opts.set_max_bytes_for_level_base($opt.max_bytes_for_level_base.0); - cf_opts.set_target_file_size_base($opt.target_file_size_base.0); - if $opt.target_file_size_multiplier != 0 { - cf_opts.set_target_file_size_multiplier($opt.target_file_size_multiplier); - } + cf_opts.set_target_file_size_base($opt.target_file_size_base()); cf_opts.set_level_zero_file_num_compaction_trigger($opt.level0_file_num_compaction_trigger); cf_opts.set_level_zero_slowdown_writes_trigger( $opt.level0_slowdown_writes_trigger.unwrap_or_default(), @@ -665,8 +666,7 @@ impl Default for DefaultCfConfig { max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), - target_file_size_base: ReadableSize::mb(8), - target_file_size_multiplier: 0, + target_file_size_base: None, level0_file_num_compaction_trigger: 4, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -830,8 +830,7 @@ impl Default for WriteCfConfig { max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), - target_file_size_base: ReadableSize::mb(8), - target_file_size_multiplier: 0, + target_file_size_base: None, level0_file_num_compaction_trigger: 4, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -949,8 +948,7 @@ impl Default for LockCfConfig { max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(128), - target_file_size_base: ReadableSize::mb(8), - target_file_size_multiplier: 0, + target_file_size_base: None, level0_file_num_compaction_trigger: 1, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -1043,8 +1041,7 @@ impl Default for RaftCfConfig { max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(128), - target_file_size_base: ReadableSize::mb(8), - target_file_size_multiplier: 0, + target_file_size_base: None, level0_file_num_compaction_trigger: 1, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -1305,16 +1302,14 @@ impl DbConfig { self.write_buffer_limit.get_or_insert(ReadableSize( (total_mem * WRITE_BUFFER_MEMORY_LIMIT_RATE) as u64, )); - if self.writecf.enable_compaction_guard != Some(true) - && self.writecf.target_file_size_multiplier == 0 - { - self.writecf.target_file_size_multiplier = 2; - } - if self.defaultcf.enable_compaction_guard != Some(true) - && self.defaultcf.target_file_size_multiplier == 0 - { - self.defaultcf.target_file_size_multiplier = 2; - } + // In RaftKv2, every region uses its own rocksdb instance, it's actually the + // even stricter compaction guard, so use the same output file size base. + self.writecf + .target_file_size_base + .get_or_insert(self.writecf.compaction_guard_max_output_file_size); + self.defaultcf + .target_file_size_base + .get_or_insert(self.defaultcf.compaction_guard_max_output_file_size); self.defaultcf.disable_write_stall = true; self.writecf.disable_write_stall = true; self.lockcf.disable_write_stall = true; @@ -1565,8 +1560,7 @@ impl Default for RaftDefaultCfConfig { max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), - target_file_size_base: ReadableSize::mb(8), - target_file_size_multiplier: 0, + target_file_size_base: None, level0_file_num_compaction_trigger: 4, level0_slowdown_writes_trigger: None, level0_stop_writes_trigger: None, @@ -4867,7 +4861,7 @@ mod tests { cfg.rocksdb.max_background_jobs = 4; cfg.rocksdb.max_background_flushes = 2; cfg.rocksdb.defaultcf.disable_auto_compactions = false; - cfg.rocksdb.defaultcf.target_file_size_base = ReadableSize::mb(64); + cfg.rocksdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(64)); cfg.rocksdb.defaultcf.block_cache_size = ReadableSize::mb(8); cfg.rocksdb.rate_bytes_per_sec = ReadableSize::mb(64); cfg.rocksdb.rate_limiter_auto_tuned = false; @@ -5315,33 +5309,33 @@ mod tests { let no_limiter: Option = None; // Test comopaction guard disabled. let config = DefaultCfConfig { - target_file_size_base: ReadableSize::mb(16), + target_file_size_base: Some(ReadableSize::mb(16)), enable_compaction_guard: Some(false), ..Default::default() }; let provider = Some(MockRegionInfoProvider::new(vec![])); let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, no_limiter.as_ref(), provider); assert_eq!( - config.target_file_size_base.0, + config.target_file_size_base(), cf_opts.get_target_file_size_base() ); // Test compaction guard enabled but region info provider is missing. let config = DefaultCfConfig { - target_file_size_base: ReadableSize::mb(16), + target_file_size_base: Some(ReadableSize::mb(16)), enable_compaction_guard: Some(true), ..Default::default() }; let provider: Option = None; let cf_opts = build_cf_opt!(config, CF_DEFAULT, &cache, no_limiter.as_ref(), provider); assert_eq!( - config.target_file_size_base.0, + config.target_file_size_base(), cf_opts.get_target_file_size_base() ); // Test compaction guard enabled. let config = DefaultCfConfig { - target_file_size_base: ReadableSize::mb(16), + target_file_size_base: Some(ReadableSize::mb(16)), enable_compaction_guard: Some(true), compaction_guard_min_output_file_size: ReadableSize::mb(4), compaction_guard_max_output_file_size: ReadableSize::mb(64), @@ -5649,10 +5643,10 @@ mod tests { Some(default_cfg.coprocessor.region_split_size() * 3 / 4 / ReadableSize::kb(1)); default_cfg.raft_store.region_split_check_diff = Some(default_cfg.coprocessor.region_split_size() / 16); - default_cfg.rocksdb.writecf.target_file_size_multiplier = 1; - default_cfg.rocksdb.defaultcf.target_file_size_multiplier = 1; - default_cfg.rocksdb.lockcf.target_file_size_multiplier = 1; - default_cfg.raftdb.defaultcf.target_file_size_multiplier = 1; + default_cfg.rocksdb.writecf.target_file_size_base = Some(ReadableSize::mb(8)); + default_cfg.rocksdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); + default_cfg.rocksdb.lockcf.target_file_size_base = Some(ReadableSize::mb(8)); + default_cfg.raftdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); // Other special cases. cfg.pd.retry_max_count = default_cfg.pd.retry_max_count; // Both -1 and isize::MAX are the same. diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 672fd79ee12..a25a43ce6e1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -345,8 +345,7 @@ fn test_serde_custom_tikv_config() { max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), - target_file_size_base: ReadableSize::kb(123), - target_file_size_multiplier: 3, + target_file_size_base: Some(ReadableSize::kb(123)), level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -401,8 +400,7 @@ fn test_serde_custom_tikv_config() { max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), - target_file_size_base: ReadableSize::kb(123), - target_file_size_multiplier: 3, + target_file_size_base: Some(ReadableSize::kb(123)), level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -471,8 +469,7 @@ fn test_serde_custom_tikv_config() { max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), - target_file_size_base: ReadableSize::kb(123), - target_file_size_multiplier: 3, + target_file_size_base: Some(ReadableSize::kb(123)), level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -541,8 +538,7 @@ fn test_serde_custom_tikv_config() { max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), - target_file_size_base: ReadableSize::kb(123), - target_file_size_multiplier: 3, + target_file_size_base: Some(ReadableSize::kb(123)), level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), @@ -640,8 +636,7 @@ fn test_serde_custom_tikv_config() { max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), - target_file_size_base: ReadableSize::kb(123), - target_file_size_multiplier: 3, + target_file_size_base: Some(ReadableSize::kb(123)), level0_file_num_compaction_trigger: 123, level0_slowdown_writes_trigger: Some(123), level0_stop_writes_trigger: Some(123), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index f8931cbddac..d79ec7899e2 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -309,7 +309,6 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" -target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -374,7 +373,6 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" -target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -425,7 +423,6 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" -target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -476,7 +473,6 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" -target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 @@ -559,7 +555,6 @@ max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" target-file-size-base = "123KB" -target-file-size-multiplier = 3 level0-file-num-compaction-trigger = 123 level0-slowdown-writes-trigger = 123 level0-stop-writes-trigger = 123 From cc72dc9ba921ac3e1a6501e435aa754cd96dd543 Mon Sep 17 00:00:00 2001 From: Jay Date: Wed, 8 Mar 2023 17:31:12 +0800 Subject: [PATCH 0573/1149] importer: support raftstore v2 (#14305) ref tikv/tikv#12842 A few behavior changes: - In v2, normal mode is always used, trying to switch to import mode will get error response. - A context is added to compact range request. If not compact with region ID, the request will be rejected. - SSTs are cleaned up immediately if its corresponding regions doesn't exist on the store anymore. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/raftstore-v2/src/batch/store.rs | 6 +- components/raftstore-v2/src/fsm/peer.rs | 4 + components/raftstore-v2/src/fsm/store.rs | 5 + .../src/operation/command/write/ingest.rs | 114 ++++++++++++++++++ .../src/operation/command/write/mod.rs | 37 +----- components/raftstore-v2/src/router/message.rs | 2 + .../raftstore-v2/src/worker/tablet_gc.rs | 30 ++++- components/server/src/server.rs | 4 +- components/server/src/server2.rs | 39 +++--- components/test_raftstore-v2/src/server.rs | 21 ++-- components/test_raftstore/src/server.rs | 4 +- src/import/mod.rs | 26 +++- src/import/sst_service.rs | 98 +++++++++++---- 14 files changed, 293 insertions(+), 99 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/write/ingest.rs diff --git a/Cargo.lock b/Cargo.lock index 77d24e482d7..90e77ce6e56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2733,7 +2733,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#0561adc3754362675cc08b5203d8b6444e645395" +source = "git+https://github.com/pingcap/kvproto.git#02fc19e8abc41245e286d4a70f23e5139e3a33fe" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 83fa6b7a018..4833030fec3 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -621,7 +621,11 @@ impl StoreSystem { let tablet_gc_scheduler = workers.tablet_gc.start_with_timer( "tablet-gc-worker", - tablet_gc::Runner::new(tablet_registry.clone(), self.logger.clone()), + tablet_gc::Runner::new( + tablet_registry.clone(), + sst_importer.clone(), + self.logger.clone(), + ), ); let schedulers = Schedulers { diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 2c47ab165f2..388cdbbcce5 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -327,6 +327,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::TabletTrimmed { tablet_index } => { self.fsm.peer_mut().on_tablet_trimmed(tablet_index) } + PeerMsg::CleanupImportSst(ssts) => self + .fsm + .peer_mut() + .on_cleanup_import_sst(self.store_ctx, ssts), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index fef433f04f5..afb7aa5d0d8 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -230,6 +230,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { ); self.on_pd_store_heartbeat(); + self.schedule_tick( + StoreTick::CleanupImportSst, + self.store_ctx.cfg.cleanup_import_sst_interval.0, + ); } pub fn schedule_tick(&mut self, tick: StoreTick, timeout: Duration) { @@ -253,6 +257,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { fn on_tick(&mut self, tick: StoreTick) { match tick { StoreTick::PdStoreHeartbeat => self.on_pd_store_heartbeat(), + StoreTick::CleanupImportSst => self.on_cleanup_import_sst(), _ => unimplemented!(), } } diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs new file mode 100644 index 00000000000..c39fc25a28b --- /dev/null +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -0,0 +1,114 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use collections::HashMap; +use crossbeam::channel::TrySendError; +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::import_sstpb::SstMeta; +use raftstore::{ + store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, + Result, +}; +use slog::error; +use tikv_util::{box_try, slog_panic}; + +use crate::{ + batch::StoreContext, + fsm::{ApplyResReporter, Store, StoreFsmDelegate}, + raft::{Apply, Peer}, + router::{PeerMsg, StoreTick}, + worker::tablet_gc, +}; + +impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { + #[inline] + pub fn on_cleanup_import_sst(&mut self) { + if let Err(e) = self.fsm.store.on_cleanup_import_sst(self.store_ctx) { + error!(self.fsm.store.logger(), "cleanup import sst failed"; "error" => ?e); + } + self.schedule_tick( + StoreTick::CleanupImportSst, + self.store_ctx.cfg.cleanup_import_sst_interval.0, + ); + } +} + +impl Store { + #[inline] + fn on_cleanup_import_sst( + &mut self, + ctx: &mut StoreContext, + ) -> Result<()> { + let ssts = box_try!(ctx.sst_importer.list_ssts()); + if ssts.is_empty() { + return Ok(()); + } + let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); + for sst in ssts { + region_ssts + .entry(sst.get_region_id()) + .or_default() + .push(sst); + } + for (region_id, ssts) in region_ssts { + if let Err(TrySendError::Disconnected(msg)) = ctx.router.send(region_id, PeerMsg::CleanupImportSst(ssts.into())) + && !ctx.router.is_shutdown() { + let PeerMsg::CleanupImportSst(ssts) = msg else { unreachable!() }; + let _ = ctx.schedulers.tablet_gc.schedule(tablet_gc::Task::CleanupImportSst(ssts)); + } + } + + Ok(()) + } +} + +impl Peer { + pub fn on_cleanup_import_sst( + &mut self, + ctx: &mut StoreContext, + ssts: Box<[SstMeta]>, + ) { + let epoch = self.region().get_region_epoch(); + let mut stale_ssts = Vec::from(ssts); + stale_ssts.retain(|sst| util::is_epoch_stale(sst.get_region_epoch(), epoch)); + if stale_ssts.is_empty() { + return; + } + let _ = ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::CleanupImportSst(stale_ssts.into())); + } +} + +impl Apply { + #[inline] + pub fn apply_ingest(&mut self, ssts: Vec) -> Result<()> { + PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); + let mut infos = Vec::with_capacity(ssts.len()); + for sst in &ssts { + if let Err(e) = check_sst_for_ingestion(sst, self.region()) { + error!( + self.logger, + "ingest fail"; + "sst" => ?sst, + "region" => ?self.region(), + "error" => ?e + ); + let _ = self.sst_importer().delete(sst); + return Err(e); + } + match self.sst_importer().validate(sst) { + Ok(meta_info) => infos.push(meta_info), + Err(e) => { + slog_panic!(self.logger, "corrupted sst"; "sst" => ?sst, "error" => ?e); + } + } + } + // Unlike v1, we can't batch ssts accross regions. + self.flush(); + if let Err(e) = self.sst_importer().ingest(&infos, self.tablet()) { + slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); + } + Ok(()) + } +} diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index a461420f75b..b017a7b0ef7 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,10 +1,10 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{data_cf_offset, KvEngine, Mutable, RaftEngine, CF_DEFAULT}; -use kvproto::{import_sstpb::SstMeta, raft_cmdpb::RaftRequestHeader}; +use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ - check_sst_for_ingestion, cmd_resp, + cmd_resp, fsm::{apply, MAX_PROPOSAL_SIZE_RATIO}, metrics::PEER_WRITE_CMD_COUNTER, msg::ErrorCallback, @@ -12,7 +12,6 @@ use raftstore::{ }, Error, Result, }; -use slog::error; use tikv_util::slog_panic; use crate::{ @@ -22,6 +21,7 @@ use crate::{ router::{ApplyTask, CmdResChannel}, }; +mod ingest; mod simple_write; pub use simple_write::{ @@ -233,35 +233,4 @@ impl Apply { // TODO: reuse the same delete as split/merge. Ok(()) } - - #[inline] - pub fn apply_ingest(&mut self, ssts: Vec) -> Result<()> { - PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); - let mut infos = Vec::with_capacity(ssts.len()); - for sst in &ssts { - if let Err(e) = check_sst_for_ingestion(sst, self.region()) { - error!( - self.logger, - "ingest fail"; - "sst" => ?sst, - "region" => ?self.region(), - "error" => ?e - ); - let _ = self.sst_importer().delete(sst); - return Err(e); - } - match self.sst_importer().validate(sst) { - Ok(meta_info) => infos.push(meta_info), - Err(e) => { - slog_panic!(self.logger, "corrupted sst"; "sst" => ?sst, "error" => ?e); - } - } - } - // Unlike v1, we can't batch ssts accross regions. - self.flush(); - if let Err(e) = self.sst_importer().ingest(&infos, self.tablet()) { - slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); - } - Ok(()) - } } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 317ba74d4d6..88ac0ba7948 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -3,6 +3,7 @@ // #[PerformanceCriticalPath] use kvproto::{ + import_sstpb::SstMeta, metapb, metapb::RegionEpoch, raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, @@ -206,6 +207,7 @@ pub enum PeerMsg { TabletTrimmed { tablet_index: u64, }, + CleanupImportSst(Box<[SstMeta]>), /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet_gc.rs index 0be8fdaa901..5799398c080 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet_gc.rs @@ -3,13 +3,15 @@ use std::{ fmt::{self, Display, Formatter}, path::{Path, PathBuf}, + sync::Arc, time::Duration, }; use collections::HashMap; use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry}; -use kvproto::metapb::Region; +use kvproto::{import_sstpb::SstMeta, metapb::Region}; use slog::{debug, error, info, warn, Logger}; +use sst_importer::SstImporter; use tikv_util::{ worker::{Runnable, RunnableWithTimer}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, @@ -37,6 +39,8 @@ pub enum Task { }, /// Sometimes we know for sure a tablet can be destroyed directly. DirectDestroy { tablet: Either }, + /// Cleanup ssts. + CleanupImportSst(Box<[SstMeta]>), } impl Display for Task { @@ -70,6 +74,9 @@ impl Display for Task { Task::DirectDestroy { .. } => { write!(f, "direct destroy tablet") } + Task::CleanupImportSst(ssts) => { + write!(f, "cleanup import ssts {:?}", ssts) + } } } } @@ -128,6 +135,7 @@ impl Task { pub struct Runner { tablet_registry: TabletRegistry, + sst_importer: Arc, logger: Logger, // region_id -> [(tablet_path, wait_for_persisted)]. @@ -140,9 +148,14 @@ pub struct Runner { } impl Runner { - pub fn new(tablet_registry: TabletRegistry, logger: Logger) -> Self { + pub fn new( + tablet_registry: TabletRegistry, + sst_importer: Arc, + logger: Logger, + ) -> Self { Self { tablet_registry, + sst_importer, logger, waiting_destroy_tasks: HashMap::default(), pending_destroy_tasks: Vec::new(), @@ -290,6 +303,14 @@ impl Runner { } false } + + fn cleanup_ssts(&self, ssts: Box<[SstMeta]>) { + for sst in Vec::from(ssts) { + if let Err(e) = self.sst_importer.delete(&sst) { + warn!(self.logger, "failed to cleanup sst"; "err" => ?e, "sst" => ?sst); + } + } + } } impl Runnable for Runner @@ -316,6 +337,7 @@ where persisted_index, } => self.destroy(region_id, persisted_index), Task::DirectDestroy { tablet, .. } => self.direct_destroy(tablet), + Task::CleanupImportSst(ssts) => self.cleanup_ssts(ssts), } } } @@ -344,6 +366,7 @@ mod tests { use tempfile::Builder; use super::*; + use crate::operation::test_util::create_tmp_importer; #[test] fn test_race_between_destroy_and_trim() { @@ -357,7 +380,8 @@ mod tests { )); let registry = TabletRegistry::new(factory, dir.path()).unwrap(); let logger = slog_global::borrow_global().new(slog::o!()); - let mut runner = Runner::new(registry.clone(), logger); + let (_dir, importer) = create_tmp_importer(); + let mut runner = Runner::new(registry.clone(), importer, logger); let mut region = Region::default(); let rid = 1; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 2cde9e9cb78..ae6b86bbbd3 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -91,7 +91,7 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, - import::{ImportSstService, SstImporter}, + import::{ImportSstService, LocalTablets, SstImporter}, read_pool::{ build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, }, @@ -1246,7 +1246,7 @@ where self.config.import.clone(), self.config.raft_store.raft_entry_max_size, engines.engine.clone(), - engines.engines.kv.clone(), + LocalTablets::Singleton(engines.engines.kv.clone()), servers.importer.clone(), ); if servers diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 01a76dfffbc..50b75f27c23 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -48,7 +48,8 @@ use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; use kvproto::{ - deadlock::create_deadlock, diagnosticspb::create_diagnostics, kvrpcpb::ApiVersion, + deadlock::create_deadlock, diagnosticspb::create_diagnostics, + import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, resource_usage_agent::create_resource_metering_pub_sub, }; use pd_client::{PdClient, RpcClient}; @@ -73,7 +74,7 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, - import::SstImporter, + import::{ImportSstService, LocalTablets, SstImporter}, read_pool::{ build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, }, @@ -244,7 +245,7 @@ struct TikvEngines { struct Servers { lock_mgr: LockManager, server: LocalServer, - _importer: Arc, + importer: Arc, rsmeter_pubsub_service: resource_metering::PubSubService, } @@ -969,7 +970,7 @@ where self.servers = Some(Servers { lock_mgr, server, - _importer: importer, + importer, rsmeter_pubsub_service, }); @@ -978,23 +979,23 @@ where fn register_services(&mut self) { let servers = self.servers.as_mut().unwrap(); - let _engines = self.engines.as_ref().unwrap(); + let engines = self.engines.as_ref().unwrap(); // Import SST service. - // let import_service = ImportSstService::new( - // self.config.import.clone(), - // self.config.raft_store.raft_entry_max_size, - // engines.engine.clone(), - // self.tablet_registry.as_ref().unwrap().clone(), - // servers.importer.clone(), - // ); - // if servers - // .server - // .register_service(create_import_sst(import_service)) - // .is_some() - // { - // fatal!("failed to register import service"); - // } + let import_service = ImportSstService::new( + self.config.import.clone(), + self.config.raft_store.raft_entry_max_size, + engines.engine.clone(), + LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), + servers.importer.clone(), + ); + if servers + .server + .register_service(create_import_sst(import_service)) + .is_some() + { + fatal!("failed to register import service"); + } // Create Diagnostics service let diag_service = DiagnosticsService::new( diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index dbcede48a6a..347b6010669 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -22,6 +22,7 @@ use kvproto::{ deadlock_grpc::create_deadlock, debugpb_grpc::DebugClient, diagnosticspb_grpc::create_diagnostics, + import_sstpb_grpc::create_import_sst, kvrpcpb::{ApiVersion, Context}, metapb, raft_cmdpb::RaftCmdResponse, @@ -48,7 +49,7 @@ use test_pd_client::TestPdClient; use test_raftstore::{filter_send, AddressMap, Config, Filter}; use tikv::{ coprocessor, coprocessor_v2, - import::SstImporter, + import::{ImportSstService, LocalTablets, SstImporter}, read_pool::ReadPool, server::{ gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, @@ -462,7 +463,7 @@ impl ServerCluster { .as_ref() .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), )?; - self.storages.insert(node_id, raft_kv_v2); + self.storages.insert(node_id, raft_kv_v2.clone()); ReplicaReadLockChecker::new(concurrency_manager.clone()).register(&mut coprocessor_host); @@ -473,13 +474,13 @@ impl ServerCluster { SstImporter::new(&cfg.import, dir, key_manager, cfg.storage.api_version()).unwrap(), ) }; - // let import_service = ImportSstService::new( - // cfg.import.clone(), - // cfg.raft_store.raft_entry_max_size, - // raft_kv_2.clone(), - // tablet_registry.clone(), - // Arc::clone(&importer), - // ); + let import_service = ImportSstService::new( + cfg.import.clone(), + cfg.raft_store.raft_entry_max_size, + raft_kv_v2, + LocalTablets::Registry(tablet_registry.clone()), + Arc::clone(&importer), + ); // Create deadlock service. let deadlock_service = lock_mgr.deadlock_service(); @@ -544,7 +545,7 @@ impl ServerCluster { .unwrap(); svr.register_service(create_diagnostics(diag_service.clone())); svr.register_service(create_deadlock(deadlock_service.clone())); - // svr.register_service(create_import_sst(import_service.clone())); + svr.register_service(create_import_sst(import_service.clone())); if let Some(svcs) = self.pending_services.get(&node_id) { for fact in svcs { svr.register_service(fact()); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 8c2297fbc45..9fd1229e6e5 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -50,7 +50,7 @@ use test_pd_client::TestPdClient; use tikv::{ config::ConfigController, coprocessor, coprocessor_v2, - import::{ImportSstService, SstImporter}, + import::{ImportSstService, LocalTablets, SstImporter}, read_pool::ReadPool, server::{ gc_worker::GcWorker, @@ -441,7 +441,7 @@ impl ServerCluster { cfg.import.clone(), cfg.raft_store.raft_entry_max_size, engine, - engines.kv.clone(), + LocalTablets::Singleton(engines.kv.clone()), Arc::clone(&importer), ); diff --git a/src/import/mod.rs b/src/import/mod.rs index e2fa3729e52..7ee5647f723 100644 --- a/src/import/mod.rs +++ b/src/import/mod.rs @@ -15,8 +15,9 @@ mod duplicate_detect; mod sst_service; -use std::fmt::Debug; +use std::{borrow::Cow, fmt::Debug}; +use engine_traits::TabletRegistry; use grpcio::{RpcStatus, RpcStatusCode}; pub use sst_importer::{Config, Error, Result, SstImporter, TxnSstWriter}; @@ -48,3 +49,26 @@ macro_rules! send_rpc_response { let _ = res.map_err(|e| warn!("send rpc response"; "err" => %e)).await; }}; } + +#[derive(Clone)] +pub enum LocalTablets { + Singleton(EK), + Registry(TabletRegistry), +} + +impl LocalTablets { + /// Get the tablet of the given region. + /// + /// If `None` is returned, the region may not exist or may not initialized. + /// If there are multiple versions of tablet, the latest one is returned + /// with best effort. + fn get(&self, region_id: u64) -> Option> { + match self { + LocalTablets::Singleton(tablet) => Some(Cow::Borrowed(tablet)), + LocalTablets::Registry(registry) => { + let mut cached = registry.get(region_id)?; + cached.latest().cloned().map(Cow::Owned) + } + } + } +} diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 02e7297bea8..291841facde 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -39,7 +39,7 @@ use tikv_util::{ use tokio::{runtime::Runtime, time::sleep}; use txn_types::{Key, WriteRef, WriteType}; -use super::make_rpc_error; +use super::{make_rpc_error, LocalTablets}; use crate::{ import::duplicate_detect::DuplicateDetector, server::CONFIG_ROCKSDB_GAUGE, @@ -86,7 +86,7 @@ async fn wait_write(mut s: impl Stream + Send + Unpin) -> sto #[derive(Clone)] pub struct ImportSstService { cfg: Config, - tablet_registry: E::Local, + tablets: LocalTablets, engine: E, threads: Arc, // For now, PiTR cannot be executed in the tokio runtime because it is synchronous and may @@ -265,7 +265,7 @@ impl ImportSstService { cfg: Config, raft_entry_max_size: ReadableSize, engine: E, - tablet_registry: E::Local, + tablets: LocalTablets, importer: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); @@ -293,12 +293,14 @@ impl ImportSstService { .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) .create() .unwrap(); - importer.start_switch_mode_check(threads.handle(), tablet_registry.clone()); + if let LocalTablets::Singleton(tablet) = &tablets { + importer.start_switch_mode_check(threads.handle(), tablet.clone()); + } threads.spawn(Self::tick(importer.clone())); ImportSstService { cfg, - tablet_registry, + tablets, threads: Arc::new(threads), block_threads: Arc::new(block_threads), engine, @@ -350,14 +352,20 @@ impl ImportSstService { } } - fn check_write_stall(&self) -> Option { + fn check_write_stall(&self, region_id: u64) -> Option { + let tablet = match self.tablets.get(region_id) { + Some(tablet) => tablet, + None => { + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(format!("region {} not found", region_id)); + errorpb.mut_region_not_found().set_region_id(region_id); + return Some(errorpb); + } + }; if self.importer.get_mode() == SwitchMode::Normal - && self - .tablet_registry - .ingest_maybe_slowdown_writes(CF_WRITE) - .expect("cf") + && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") { - match self.tablet_registry.get_sst_key_ranges(CF_WRITE, 0) { + match tablet.get_sst_key_ranges(CF_WRITE, 0) { Ok(l0_sst_ranges) => { warn!( "sst ingest is too slow"; @@ -534,7 +542,7 @@ macro_rules! impl_write { sink: ClientStreamingSink<$resp_ty>, ) { let import = self.importer.clone(); - let tablet_registry = self.tablet_registry.clone(); + let tablets = self.tablets.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -551,8 +559,17 @@ macro_rules! impl_write { }, _ => return Err(Error::InvalidChunk), }; + let region_id = meta.get_region_id(); + let tablet = match tablets.get(region_id) { + Some(t) => t, + None => { + return Err(Error::Engine( + format!("region {} not found", region_id).into(), + )); + } + }; - let writer = match import.$writer_fn(&tablet_registry, meta) { + let writer = match import.$writer_fn(&*tablet, meta) { Ok(w) => w, Err(e) => { error!("build writer failed {:?}", e); @@ -601,13 +618,17 @@ impl ImportSst for ImportSstService { CONFIG_ROCKSDB_GAUGE.with_label_values(&[cf, name]).set(v); } - match req.get_mode() { - SwitchMode::Normal => self - .importer - .enter_normal_mode(self.tablet_registry.clone(), mf), - SwitchMode::Import => self - .importer - .enter_import_mode(self.tablet_registry.clone(), mf), + if let LocalTablets::Singleton(tablet) = &self.tablets { + match req.get_mode() { + SwitchMode::Normal => self.importer.enter_normal_mode(tablet.clone(), mf), + SwitchMode::Import => self.importer.enter_import_mode(tablet.clone(), mf), + } + } else if req.get_mode() != SwitchMode::Normal { + Err(sst_importer::Error::Engine( + "partitioned-raft-kv doesn't support import mode".into(), + )) + } else { + Ok(false) } }; match res { @@ -742,7 +763,8 @@ impl ImportSst for ImportSstService { let timer = Instant::now_coarse(); let importer = Arc::clone(&self.importer); let limiter = self.limiter.clone(); - let tablet_registry = self.tablet_registry.clone(); + let region_id = req.get_sst().get_region_id(); + let tablets = self.tablets.clone(); let start = Instant::now(); let handle_task = async move { @@ -761,6 +783,19 @@ impl ImportSst for ImportSstService { .into_option() .filter(|c| c.cipher_type != EncryptionMethod::Plaintext); + let tablet = match tablets.get(region_id) { + Some(tablet) => tablet, + None => { + let error = sst_importer::Error::Engine(box_err!( + "region {} not found, maybe it's not a replica of this store", + region_id + )); + let mut resp = DownloadResponse::default(); + resp.set_error(error.into()); + return crate::send_rpc_response!(Ok(resp), sink, label, timer); + } + }; + let res = importer.download_ext::( req.get_sst(), req.get_storage_backend(), @@ -768,7 +803,7 @@ impl ImportSst for ImportSstService { req.get_rewrite_rule(), cipher, limiter, - tablet_registry, + tablet.into_owned(), DownloadExt::default() .cache_key(req.get_storage_cache_id()) .req_type(req.get_request_type()), @@ -802,7 +837,8 @@ impl ImportSst for ImportSstService { let timer = Instant::now_coarse(); let mut resp = IngestResponse::default(); - if let Some(errorpb) = self.check_write_stall() { + let region_id = req.get_context().get_region_id(); + if let Some(errorpb) = self.check_write_stall(region_id) { resp.set_error(errorpb); ctx.spawn( sink.success(resp) @@ -844,7 +880,7 @@ impl ImportSst for ImportSstService { let timer = Instant::now_coarse(); let mut resp = IngestResponse::default(); - if let Some(errorpb) = self.check_write_stall() { + if let Some(errorpb) = self.check_write_stall(req.get_context().get_region_id()) { resp.set_error(errorpb); ctx.spawn( sink.success(resp) @@ -892,7 +928,7 @@ impl ImportSst for ImportSstService { ) { let label = "compact"; let timer = Instant::now_coarse(); - let tablet_registry = self.tablet_registry.clone(); + let tablets = self.tablets.clone(); let handle_task = async move { let (start, end) = if !req.has_range() { @@ -909,7 +945,17 @@ impl ImportSst for ImportSstService { Some(req.get_output_level()) }; - let res = tablet_registry.compact_files_in_range(start, end, output_level); + let region_id = req.get_context().get_region_id(); + let tablet = match tablets.get(region_id) { + Some(tablet) => tablet, + None => { + let e = Error::Engine(format!("region {} not found", region_id).into()); + crate::send_rpc_response!(Err(e), sink, label, timer); + return; + } + }; + + let res = tablet.compact_files_in_range(start, end, output_level); match res { Ok(_) => info!( "compact files in range"; From 0aa9e14962db2a3c1bfdc591b5e97b014eb8b16a Mon Sep 17 00:00:00 2001 From: Nathan Date: Thu, 9 Mar 2023 10:41:12 +0800 Subject: [PATCH 0574/1149] raftstore: fix incorrect cond judgment leading to rejection of pre_proposal (#14283) close tikv/tikv#14219 raftstore: fix incorrect cond judgment leading to rejection of pre_proposal Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/apply.rs | 5 + components/raftstore/src/store/fsm/peer.rs | 31 ++- components/raftstore/src/store/peer.rs | 15 ++ .../raftstore/src/store/peer_storage.rs | 7 +- tests/failpoints/cases/test_witness.rs | 205 +++++++++++++++++- 5 files changed, 258 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 7afb188a4b0..b9f737158fc 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3242,6 +3242,11 @@ where ctx: &mut ApplyContext, request: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { + fail_point!( + "before_exec_batch_switch_witness", + self.id() == 2, + |_| unimplemented!() + ); assert!(request.has_switch_witnesses()); let switches = request .get_switch_witnesses() diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 6acddde2257..3eca179d770 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2652,6 +2652,7 @@ where return; } if !msg.wait_data { + let original_remains_nr = self.fsm.peer.wait_data_peers.len(); self.fsm .peer .wait_data_peers @@ -2660,6 +2661,15 @@ where "receive peer ready info"; "peer_id" => self.fsm.peer.peer.get_id(), ); + if original_remains_nr != self.fsm.peer.wait_data_peers.len() { + info!( + "notify pd with change peer region"; + "region_id" => self.fsm.region_id(), + "peer_id" => from.get_id(), + "region" => ?self.fsm.peer.region(), + ); + self.fsm.peer.heartbeat_pd(self.ctx); + } return; } self.register_check_peers_availability_tick(); @@ -5149,6 +5159,8 @@ where return Err(Error::IsWitness(self.region_id())); } + fail_point!("ignore_forbid_leader_to_be_witness", |_| Ok(None)); + // Forbid requests to switch it into a witness when it's a leader if self.fsm.peer.is_leader() && msg.has_admin_request() @@ -5567,7 +5579,14 @@ where fail_point!("ignore request snapshot", |_| { self.schedule_tick(PeerTick::RequestSnapshot); }); - if !self.fsm.peer.wait_data || self.fsm.peer.is_leader() { + if !self.fsm.peer.wait_data { + return; + } + if self.fsm.peer.is_leader() + || self.fsm.peer.is_handling_snapshot() + || self.fsm.peer.has_pending_snapshot() + { + self.schedule_tick(PeerTick::RequestSnapshot); return; } self.fsm.peer.request_index = self.fsm.peer.raft_group.raft.raft_log.last_index(); @@ -6455,9 +6474,15 @@ where for s in sw.switches { let (peer_id, is_witness) = (s.get_peer_id(), s.get_is_witness()); if self.fsm.peer_id() == peer_id { - if is_witness && !self.fsm.peer.is_leader() { - let _ = self.fsm.peer.get_store().clear_data(); + if is_witness { self.fsm.peer.raft_group.set_priority(-1); + if !self.fsm.peer.is_leader() { + let _ = self.fsm.peer.get_store().clear_data(); + } else { + // Avoid calling `clear_data` as the region worker may be scanning snapshot, + // to avoid problems (although no problems were found by testing). + self.fsm.peer.delay_clean_data = true; + } } else { self.fsm .peer diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index a1817edd17b..8dc69a0def4 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -899,6 +899,13 @@ where /// the request index for retrying. pub request_index: u64, + /// It's used to identify the situation where the region worker is + /// generating and sending snapshots when the newly elected leader by Raft + /// applies the switch witness cmd which commited before the election. This + /// flag will prevent immediate data clearing and will be cleared after + /// the successful transfer of leadership. + pub delay_clean_data: bool, + /// When the witness becomes non-witness, it need to actively request a /// snapshot from the leader, In order to avoid log lag, we need to reject /// the leader's `MsgAppend` request unless the `term` of the `last index` @@ -1133,6 +1140,7 @@ where pending_remove: false, wait_data, request_index: last_index, + delay_clean_data: false, should_reject_msgappend: false, should_wake_up: false, force_leader: None, @@ -2323,6 +2331,10 @@ where self.mut_store().cancel_generating_snap(None); self.clear_disk_full_peers(ctx); self.clear_in_memory_pessimistic_locks(); + if self.peer.is_witness && self.delay_clean_data { + let _ = self.get_store().clear_data(); + self.delay_clean_data = false; + } } _ => {} } @@ -2614,6 +2626,7 @@ where ctx.apply_router .schedule_task(self.region_id, ApplyTask::Recover(self.region_id)); self.wait_data = false; + self.should_reject_msgappend = false; return false; } } @@ -5730,6 +5743,7 @@ fn is_request_urgent(req: &RaftCmdRequest) -> bool { | AdminCmdType::PrepareMerge | AdminCmdType::CommitMerge | AdminCmdType::RollbackMerge + | AdminCmdType::BatchSwitchWitness ) } @@ -5828,6 +5842,7 @@ mod tests { AdminCmdType::PrepareMerge, AdminCmdType::CommitMerge, AdminCmdType::RollbackMerge, + AdminCmdType::BatchSwitchWitness, ]; for tp in AdminCmdType::values() { let mut req = RaftCmdRequest::default(); diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 8dc8a18906c..470cdfee998 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -524,7 +524,12 @@ where panic!("{} unexpected state: {:?}", self.tag, *snap_state); } - if *tried_cnt >= MAX_SNAP_TRY_CNT { + let max_snap_try_cnt = (|| { + fail_point!("ignore_snap_try_cnt", |_| usize::MAX); + MAX_SNAP_TRY_CNT + })(); + + if *tried_cnt >= max_snap_try_cnt { let cnt = *tried_cnt; *tried_cnt = 0; return Err(raft::Error::Store(box_err!( diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index ef178ee8aa0..02411ba1b76 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -4,7 +4,7 @@ use std::{iter::FromIterator, sync::Arc, time::Duration}; use collections::HashMap; use futures::executor::block_on; -use kvproto::raft_serverpb::RaftApplyState; +use kvproto::{metapb, raft_serverpb::RaftApplyState}; use pd_client::PdClient; use test_raftstore::*; use tikv_util::{config::ReadableDuration, store::find_peer}; @@ -473,3 +473,206 @@ fn test_non_witness_replica_read() { .unwrap(); assert_eq!(resp.get_header().has_error(), false); } + +fn must_get_error_is_witness( + cluster: &mut Cluster, + region: &metapb::Region, + cmd: kvproto::raft_cmdpb::Request, +) { + let req = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![cmd], + true, + ); + let resp = cluster + .call_command_on_leader(req, Duration::from_millis(100)) + .unwrap(); + assert_eq!( + resp.get_header().get_error().get_is_witness(), + &kvproto::errorpb::IsWitness { + region_id: region.get_id(), + ..Default::default() + }, + "{:?}", + resp + ); +} + +// Test the case that once a Raft election elects a voter as the leader, and +// then this voter applies the switch witness cmd, it becomes a witness and can +// correctly transfer the leader identity. +#[test] +fn test_witness_leader_transfer_out() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + + // prevent this peer from applying the switch witness command until it's elected + // as the Raft leader + fail::cfg("before_exec_batch_switch_witness", "pause").unwrap(); + let peer_on_store2 = find_peer(®ion, nodes[1]).unwrap().clone(); + // nonwitness -> witness + cluster + .pd_client + .switch_witnesses(region.get_id(), vec![peer_on_store2.get_id()], vec![true]); + // make sure the left peers have applied switch witness cmd + std::thread::sleep(Duration::from_millis(500)); + + // the other follower is isolated + cluster.add_send_filter(IsolationFilterFactory::new(3)); + for i in 1..10 { + cluster.must_put(format!("k{}", i).as_bytes(), format!("v{}", i).as_bytes()); + } + // the leader is down + cluster.stop_node(1); + + // new leader would help to replicate the logs + cluster.clear_send_filters(); + std::thread::sleep(Duration::from_millis(1000)); + // make sure the new leader has became to the witness + fail::remove("before_exec_batch_switch_witness"); + std::thread::sleep(Duration::from_millis(500)); + + // forbid writes + let put = new_put_cmd(b"k3", b"v3"); + must_get_error_is_witness(&mut cluster, ®ion, put); + // forbid reads + let get = new_get_cmd(b"k1"); + must_get_error_is_witness(&mut cluster, ®ion, get); + // forbid read index + let read_index = new_read_index_cmd(); + must_get_error_is_witness(&mut cluster, ®ion, read_index); + + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + + cluster.must_transfer_leader(region.get_id(), peer_on_store3); + cluster.must_put(b"k1", b"v1"); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap().store_id, + nodes[2], + ); + assert_eq!(cluster.must_get(b"k9"), Some(b"v9".to_vec())); +} + +// Test the case that once a Raft election elects a voter as the leader, +// and is currently generating a snapshot for another peer, then applies the +// switch witness cmd to be a witness, the generated snapshot will be checked as +// invalidated and will not be regenerated +#[test] +fn test_witness_leader_ignore_gen_snapshot() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); + configure_for_snapshot(&mut cluster.cfg); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_put(b"k0", b"v0"); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1.clone()); + + // the other follower is isolated + cluster.add_send_filter(IsolationFilterFactory::new(3)); + + // make sure raft log gc is triggered + std::thread::sleep(Duration::from_millis(200)); + let mut before_states = HashMap::default(); + for (&id, engines) in &cluster.engines { + let mut state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + before_states.insert(id, state.take_truncated_state()); + } + + // write some data to make log gap exceeds the gc limit + for i in 1..1000 { + let (k, v) = (format!("k{}", i), format!("v{}", i)); + let key = k.as_bytes(); + let value = v.as_bytes(); + cluster.must_put(key, value); + } + + std::thread::sleep(Duration::from_millis(200)); + + // the truncated index is advanced + for (&id, engines) in &cluster.engines { + let state: RaftApplyState = get_raft_msg_or_default(engines, &keys::apply_state_key(1)); + let diff = state.get_truncated_state().get_index() - before_states[&id].get_index(); + error!("EEEEE"; + "id" => &id, + "diff" => diff, + "state.get_truncated_state().get_index()" => state.get_truncated_state().get_index(), + "before_states[&id].get_index()" => before_states[&id].get_index() + ); + assert_ne!( + 900, + state.get_truncated_state().get_index() - before_states[&id].get_index() + ); + } + + // ingore raft log gc to avoid canceling snapshots + fail::cfg("on_raft_gc_log_tick", "return").unwrap(); + // wait for leader applied switch to witness + fail::cfg("before_region_gen_snap", "pause").unwrap(); + fail::cfg("ignore_snap_try_cnt", "return").unwrap(); + // After the snapshot is generated, it will be checked as invalidated and will + // not be regenerated (handle_snapshot will not generate a snapshot for + // witness) + cluster.clear_send_filters(); + std::thread::sleep(Duration::from_millis(500)); + + // non-witness -> witness + fail::cfg("ignore_forbid_leader_to_be_witness", "return").unwrap(); + cluster.pd_client.must_switch_witnesses( + region.get_id(), + vec![peer_on_store1.get_id()], + vec![true], + ); + fail::remove("before_region_gen_snap"); + + std::thread::sleep(Duration::from_millis(500)); + + // forbid writes + let put = new_put_cmd(b"k3", b"v3"); + must_get_error_is_witness(&mut cluster, ®ion, put); + // forbid reads + let get = new_get_cmd(b"k1"); + must_get_error_is_witness(&mut cluster, ®ion, get); + // forbid read index + let read_index = new_read_index_cmd(); + must_get_error_is_witness(&mut cluster, ®ion, read_index); + + // reject to transfer, as can't send snapshot to peer_on_store3, there's a log + // gap + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + let _ = cluster.try_transfer_leader(region.get_id(), peer_on_store3); + std::thread::sleep(Duration::from_secs(5)); + assert_eq!(cluster.leader_of_region(1).unwrap(), peer_on_store1); + + // should be enable to transfer leader to peer_on_store2 + let peer_on_store2 = find_peer(®ion, nodes[1]).unwrap().clone(); + cluster.must_transfer_leader(1, peer_on_store2); + cluster.must_put(b"k1", b"v1"); + assert_eq!( + cluster.leader_of_region(region.get_id()).unwrap().store_id, + nodes[1], + ); + assert_eq!(cluster.must_get(b"k9"), Some(b"v9".to_vec())); + + fail::remove("on_raft_gc_log_tick"); + fail::remove("ignore_snap_try_cnt"); + fail::remove("ignore_forbid_leader_to_be_witness"); +} From 82ac84bb485d4cd2f7a2e6714e7170485efb6296 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 9 Mar 2023 10:55:12 +0800 Subject: [PATCH 0575/1149] integration test v2: mvcc resolve lock gc test (#14360) ref tikv/tikv#12842 mvcc resolve lock gc test for v2 Signed-off-by: Spade A Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/ready/mod.rs | 19 ++++++++++++------- components/test_raftstore-v2/src/server.rs | 6 ++++++ tests/integrations/server/kv_service.rs | 6 ++++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index bf7b8ec8858..4c0bf9cbe88 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -673,14 +673,14 @@ impl Peer { fn report_persist_log_duration( &self, ctx: &mut StoreContext, - from: u64, - to: u64, + old_index: u64, + new_index: u64, ) { - if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() || from >= to { + if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() || old_index >= new_index { return; } let now = Instant::now(); - for i in from + 1..to { + for i in old_index + 1..=new_index { if let Some((term, trackers)) = self.proposals().find_trackers(i) { if self.entry_storage().term(i).map_or(false, |t| t == term) { for tracker in trackers { @@ -694,12 +694,17 @@ impl Peer { } #[inline] - fn report_commit_log_duration(&self, ctx: &mut StoreContext, from: u64, to: u64) { - if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() || from >= to { + fn report_commit_log_duration( + &self, + ctx: &mut StoreContext, + old_index: u64, + new_index: u64, + ) { + if !ctx.cfg.waterfall_metrics || self.proposals().is_empty() || old_index >= new_index { return; } let now = Instant::now(); - for i in from + 1..to { + for i in old_index + 1..=new_index { if let Some((term, trackers)) = self.proposals().find_trackers(i) { if self.entry_storage().term(i).map_or(false, |t| t == term) { let commit_persisted = i <= self.persisted_index(); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 347b6010669..b105f52be39 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -242,6 +242,7 @@ pub struct ServerMeta { sim_router: SimulateStoreTransport, sim_trans: SimulateServerTransport, raw_router: StoreRouter, + gc_worker: GcWorker, rsmeter_cleanup: Box, } @@ -624,6 +625,7 @@ impl ServerCluster { node, server, sim_router, + gc_worker, sim_trans: simulate_trans, rsmeter_cleanup, }, @@ -635,6 +637,10 @@ impl ServerCluster { Ok(node_id) } + pub fn get_gc_worker(&self, node_id: u64) -> &GcWorker { + &self.metas.get(&node_id).unwrap().gc_worker + } + pub fn get_causal_ts_provider(&self, node_id: u64) -> Option> { self.causal_ts_providers.get(&node_id).cloned() } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 4a981bdfa53..44d16961f7d 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -34,6 +34,7 @@ use raftstore::{ use resource_metering::CollectorRegHandle; use tempfile::Builder; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::{ config::QuotaConfig, coprocessor::REQ_TYPE_DAG, @@ -415,11 +416,12 @@ fn test_mvcc_rollback_and_cleanup() { assert_eq!(scan_lock_resp.locks.len(), 0); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_resolve_lock_gc_and_delete() { use kvproto::kvrpcpb::*; - let (cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; From 6342e0efbd6ea08ca95118a4bac53b1dea90ec52 Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 9 Mar 2023 16:37:12 +0800 Subject: [PATCH 0576/1149] server: support sending incremental snapshot (#14275) ref tikv/tikv#14256 This PR tries to reduce the bandwidth consumption by utilizing local tablet. If a file already exists in the local tablet, it will be skipped. This PR also fixes a race of receiving snapshot. Signed-off-by: Jay Lee --- Cargo.lock | 2 +- components/batch-system/src/router.rs | 10 +- components/file_system/src/lib.rs | 66 ++ components/raftstore-v2/src/fsm/peer.rs | 13 +- .../raftstore/src/store/async_io/read.rs | 2 +- components/raftstore/src/store/snap.rs | 37 +- components/server/src/server.rs | 7 +- components/server/src/server2.rs | 6 +- components/test_raftstore-v2/src/server.rs | 6 +- components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 7 +- etc/config-template.toml | 8 +- src/lib.rs | 1 + src/server/config.rs | 5 +- src/server/engine_factory.rs | 2 +- src/server/server.rs | 7 +- src/server/service/kv.rs | 18 + src/server/snap.rs | 27 +- src/server/tablet_snap.rs | 825 ++++++++++++------ tests/integrations/config/dynamic/snap.rs | 4 +- tests/integrations/config/mod.rs | 2 +- tests/integrations/config/test-custom.toml | 2 +- tests/integrations/raftstore/test_snap.rs | 2 +- 23 files changed, 764 insertions(+), 297 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90e77ce6e56..5fb51b4fcdf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2733,7 +2733,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#02fc19e8abc41245e286d4a70f23e5139e3a33fe" +source = "git+https://github.com/pingcap/kvproto.git#60b33e619c70d8abe151f086a19a82895965f28f" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 4238929d1d4..119b7875506 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -183,10 +183,18 @@ where mailbox: BasicMailbox, msg: N::Message, ) -> Result<(), (BasicMailbox, N::Message)> { + let mut normals = self.normals.lock().unwrap(); + // Send has to be done within lock, otherwise the message may be handled + // before the mailbox is register. if let Err(SendError(m)) = mailbox.force_send(msg, &self.normal_scheduler) { return Err((mailbox, m)); } - self.register(addr, mailbox); + if let Some(mailbox) = normals.map.insert(addr, mailbox) { + mailbox.close(); + } + normals + .alive_cnt + .store(normals.map.len(), Ordering::Relaxed); Ok(()) } diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 058b2a3a5f9..0b6213094af 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -442,6 +442,42 @@ pub fn reserve_space_for_recover>(data_dir: P, file_size: u64) -> } } +const TRASH_PREFIX: &str = "TRASH-"; + +/// Remove a directory. +/// +/// Rename it before actually removal. +#[inline] +pub fn trash_dir_all(path: impl AsRef) -> io::Result<()> { + let path = path.as_ref(); + let name = match path.file_name() { + Some(n) => n, + None => return Err(io::Error::new(ErrorKind::InvalidInput, "path is invalid")), + }; + let trash_path = path.with_file_name(format!("{}{}", TRASH_PREFIX, name.to_string_lossy())); + if let Err(e) = rename(path, &trash_path) { + if e.kind() == ErrorKind::NotFound { + return Ok(()); + } + return Err(e); + } + remove_dir_all(trash_path) +} + +/// When using `trash_dir_all`, it's possible the directory is marked as trash +/// but not being actually deleted after a restart. This function can be used +/// to resume all those removal in the given directory. +#[inline] +pub fn clean_up_trash(path: impl AsRef) -> io::Result<()> { + for e in read_dir(path)? { + let e = e?; + if e.file_name().to_string_lossy().starts_with(TRASH_PREFIX) { + remove_dir_all(e.path())?; + } + } + Ok(()) +} + #[cfg(test)] mod tests { use std::{io::Write, iter}; @@ -608,4 +644,34 @@ mod tests { reserve_space_for_recover(data_path, 0).unwrap(); assert!(!file.exists()); } + + #[test] + fn test_trash_dir_all() { + let tmp_dir = Builder::new() + .prefix("test_reserve_space_for_recover") + .tempdir() + .unwrap(); + let data_path = tmp_dir.path(); + let sub_dir0 = data_path.join("sub_dir0"); + let trash_sub_dir0 = data_path.join(format!("{}sub_dir0", TRASH_PREFIX)); + create_dir_all(&sub_dir0).unwrap(); + assert!(sub_dir0.exists()); + + trash_dir_all(&sub_dir0).unwrap(); + assert!(!sub_dir0.exists()); + assert!(!trash_sub_dir0.exists()); + + create_dir_all(&sub_dir0).unwrap(); + create_dir_all(&trash_sub_dir0).unwrap(); + trash_dir_all(&sub_dir0).unwrap(); + assert!(!sub_dir0.exists()); + assert!(!trash_sub_dir0.exists()); + + clean_up_trash(data_path).unwrap(); + + create_dir_all(&trash_sub_dir0).unwrap(); + assert!(trash_sub_dir0.exists()); + clean_up_trash(data_path).unwrap(); + assert!(!trash_sub_dir0.exists()); + } } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 388cdbbcce5..77860b0ff49 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -9,10 +9,11 @@ use crossbeam::channel::TryRecvError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{errorpb, raft_cmdpb::RaftCmdResponse}; use raftstore::store::{Config, TabletSnapManager, Transport}; -use slog::{debug, error, info, trace, Logger}; +use slog::{debug, info, trace, Logger}; use tikv_util::{ is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, + slog_panic, time::{duration_to_sec, Instant}, }; @@ -159,12 +160,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, let mb = match self.store_ctx.router.mailbox(region_id) { Some(mb) => mb, None => { - error!( - self.fsm.logger(), - "failed to get mailbox"; - "tick" => ?tick, - ); - return; + if !self.fsm.peer.serving() || self.store_ctx.router.is_shutdown() { + return; + } + slog_panic!(self.fsm.logger(), "failed to get mailbox"; "tick" => ?tick); } }; self.fsm.tick_registry[idx] = true; diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index b02992bbeb0..985134048dd 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -124,7 +124,7 @@ impl ReadRunner { let checkpointer_path = self.snap_mgr().tablet_gen_path(snap_key); if checkpointer_path.as_path().exists() { // Remove the old checkpoint directly. - std::fs::remove_dir_all(checkpointer_path.as_path())?; + file_system::trash_dir_all(&checkpointer_path)?; } // Here not checkpoint to a temporary directory first, the temporary directory // logic already implemented in rocksdb. diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 5f971818e9a..37189d2e52b 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -4,13 +4,12 @@ use std::{ cmp::{self, Ordering as CmpOrdering, Reverse}, error::Error as StdError, fmt::{self, Display, Formatter}, - fs, io::{self, ErrorKind, Read, Write}, path::{Path, PathBuf}, result, str, sync::{ atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, - Arc, RwLock, + Arc, Mutex, RwLock, }, thread, time, u64, }; @@ -1932,6 +1931,19 @@ impl Display for TabletSnapKey { } } +pub struct ReceivingGuard<'a> { + receiving: &'a Mutex>, + key: TabletSnapKey, +} + +impl Drop for ReceivingGuard<'_> { + fn drop(&mut self) { + let mut receiving = self.receiving.lock().unwrap(); + let pos = receiving.iter().position(|k| k == &self.key).unwrap(); + receiving.swap_remove(pos); + } +} + /// `TabletSnapManager` manager tablet snapshot and shared between raftstore v2. /// It's similar `SnapManager`, but simpler in tablet version. /// @@ -1941,6 +1953,7 @@ impl Display for TabletSnapKey { pub struct TabletSnapManager { // directory to store snapfile. base: PathBuf, + receiving: Arc>>, } impl TabletSnapManager { @@ -1956,7 +1969,11 @@ impl TabletSnapManager { format!("{} should be a directory", path.display()), )); } - Ok(Self { base: path }) + file_system::clean_up_trash(&path)?; + Ok(Self { + base: path, + receiving: Arc::default(), + }) } pub fn tablet_gen_path(&self, key: &TabletSnapKey) -> PathBuf { @@ -1976,7 +1993,7 @@ impl TabletSnapManager { pub fn delete_snapshot(&self, key: &TabletSnapKey) -> bool { let path = self.tablet_gen_path(key); - if path.exists() && let Err(e) = fs::remove_dir_all(path.as_path()) { + if path.exists() && let Err(e) = file_system::trash_dir_all(&path) { error!( "delete snapshot failed"; "path" => %path.display(), @@ -2026,6 +2043,18 @@ impl TabletSnapManager { pub fn root_path(&self) -> &Path { self.base.as_path() } + + pub fn start_receive(&self, key: TabletSnapKey) -> Option> { + let mut receiving = self.receiving.lock().unwrap(); + if receiving.iter().any(|k| k == &key) { + return None; + } + receiving.push(key.clone()); + Some(ReceivingGuard { + receiving: &self.receiving, + key, + }) + } } #[cfg(test)] diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ae6b86bbbd3..e77197a7737 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -103,6 +103,7 @@ use tikv::{ resolve, service::{DebugService, DiagnosticsService}, status_server::StatusServer, + tablet_snap::NoSnapshotCache, ttl::TtlChecker, KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, DEFAULT_CLUSTER_ID, GRPC_THREAD_PREFIX, @@ -885,8 +886,8 @@ where .unwrap() .to_owned(); - let bps = i64::try_from(self.config.server.snap_max_write_bytes_per_sec.0) - .unwrap_or_else(|_| fatal!("snap_max_write_bytes_per_sec > i64::max_value")); + let bps = i64::try_from(self.config.server.snap_io_max_bytes_per_sec.0) + .unwrap_or_else(|_| fatal!("snap_io_max_bytes_per_sec > i64::max_value")); let snap_mgr = SnapManagerBuilder::default() .max_write_bytes_per_sec(bps) @@ -1685,7 +1686,7 @@ where .unwrap_or_else(|e| fatal!("failed to build server: {}", e)); server .server - .start(server_config, self.security_mgr.clone()) + .start(server_config, self.security_mgr.clone(), NoSnapshotCache) .unwrap_or_else(|e| fatal!("failed to start server: {}", e)); } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 50b75f27c23..9ae032dca7a 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1345,7 +1345,11 @@ where .unwrap_or_else(|e| fatal!("failed to build server: {}", e)); server .server - .start(server_config, self.security_mgr.clone()) + .start( + server_config, + self.security_mgr.clone(), + self.tablet_registry.clone().unwrap(), + ) .unwrap_or_else(|e| fatal!("failed to start server: {}", e)); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index b105f52be39..1c6d956d1a8 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -580,7 +580,7 @@ impl ServerCluster { let pessimistic_txn_cfg = cfg.tikv.pessimistic_txn; node.start( raft_engine, - tablet_registry, + tablet_registry.clone(), &raft_router, simulate_trans.clone(), snap_mgr.clone(), @@ -616,7 +616,9 @@ impl ServerCluster { ) .unwrap(); - server.start(server_cfg, security_mgr).unwrap(); + server + .start(server_cfg, security_mgr, tablet_registry) + .unwrap(); self.metas.insert( node_id, diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 618b760e29e..c75adf33645 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -268,7 +268,7 @@ impl Simulator for NodeCluster { { let tmp = test_util::temp_dir("test_cluster", cfg.prefer_mem); let snap_mgr = SnapManagerBuilder::default() - .max_write_bytes_per_sec(cfg.server.snap_max_write_bytes_per_sec.0 as i64) + .max_write_bytes_per_sec(cfg.server.snap_io_max_bytes_per_sec.0 as i64) .max_total_size(cfg.server.snap_max_total_size.0) .encryption_key_manager(key_manager) .max_per_file_size(cfg.raft_store.max_snapshot_file_raw_size.0) diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 9fd1229e6e5..54da33fa3dd 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -59,6 +59,7 @@ use tikv::{ raftkv::ReplicaReadLockChecker, resolve::{self, StoreAddrResolver}, service::DebugService, + tablet_snap::NoSnapshotCache, ConnectionBuilder, Error, Node, PdStoreAddrResolver, RaftClient, RaftKv, Result as ServerResult, Server, ServerTransport, }, @@ -452,7 +453,7 @@ impl ServerCluster { let (resolver, state) = resolve::new_resolver(Arc::clone(&self.pd_client), &bg_worker, extension.clone()); let snap_mgr = SnapManagerBuilder::default() - .max_write_bytes_per_sec(cfg.server.snap_max_write_bytes_per_sec.0 as i64) + .max_write_bytes_per_sec(cfg.server.snap_io_max_bytes_per_sec.0 as i64) .max_total_size(cfg.server.snap_max_total_size.0) .encryption_key_manager(key_manager) .max_per_file_size(cfg.raft_store.max_snapshot_file_raw_size.0) @@ -620,7 +621,9 @@ impl ServerCluster { ) .unwrap(); - server.start(server_cfg, security_mgr).unwrap(); + server + .start(server_cfg, security_mgr, NoSnapshotCache) + .unwrap(); self.metas.insert( node_id, diff --git a/etc/config-template.toml b/etc/config-template.toml index ea73efdf59e..3930a247374 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -205,9 +205,11 @@ ## Max time to handle Coprocessor requests before timeout. # end-point-request-max-handle-duration = "60s" -## Max bytes that snapshot can be written to disk in one second. -## It should be set based on your disk performance. -# snap-max-write-bytes-per-sec = "100MB" +## Max bytes that snapshot can interact with disk in one second. It should be +## set based on your disk performance. Only write flow is considered, if +## partiioned-raft-kv is used, read flow is also considered and it will be estimated +## as read_size * 0.5 to get around errors from page cache. +# snap-io-max-bytes-per-sec = "100MB" ## Whether to enable request batch. # enable-request-batch = true diff --git a/src/lib.rs b/src/lib.rs index 43d5db81458..4da16ee0e74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,7 @@ #![feature(drain_filter)] #![feature(deadline_api)] #![feature(let_chains)] +#![feature(read_buf)] #![feature(type_alias_impl_trait)] #[macro_use(fail_point)] diff --git a/src/server/config.rs b/src/server/config.rs index ae5c70abe1d..5f15e72ae2f 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -146,7 +146,8 @@ pub struct Config { #[serde(with = "perf_level_serde")] #[online_config(skip)] pub end_point_perf_level: PerfLevel, - pub snap_max_write_bytes_per_sec: ReadableSize, + #[serde(alias = "snap_max_write_bytes_per_sec")] + pub snap_io_max_bytes_per_sec: ReadableSize, pub snap_max_total_size: ReadableSize, #[online_config(skip)] pub stats_concurrency: usize, @@ -251,7 +252,7 @@ impl Default for Config { ), end_point_max_concurrency: cmp::max(cpu_num as usize, MIN_ENDPOINT_MAX_CONCURRENCY), end_point_perf_level: PerfLevel::Uninitialized, - snap_max_write_bytes_per_sec: ReadableSize(DEFAULT_SNAP_MAX_BYTES_PER_SEC), + snap_io_max_bytes_per_sec: ReadableSize(DEFAULT_SNAP_MAX_BYTES_PER_SEC), snap_max_total_size: ReadableSize(0), stats_concurrency: 1, // 75 means a gRPC thread is under heavy load if its total CPU usage diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 9d2c03998e6..c3976b8eeac 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -233,7 +233,7 @@ impl TabletFactory for KvEngineFactory { // kv_db_opts, // kv_cfs_opts, // )?; - let _ = std::fs::remove_dir_all(path); + let _ = file_system::trash_dir_all(path); if let Some(listener) = &self.inner.flow_listener { listener.clone_with(ctx.id).on_destroyed(); } diff --git a/src/server/server.rs b/src/server/server.rs index 4c1f5e7ef69..15de7f0d4e7 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -32,6 +32,7 @@ use super::{ resolve::StoreAddrResolver, service::*, snap::{Runner as SnapHandler, Task as SnapTask}, + tablet_snap::SnapCacheBuilder, transport::ServerTransport, Config, Error, Result, }; @@ -251,6 +252,7 @@ where &mut self, cfg: Arc>, security_mgr: Arc, + snap_cache_builder: impl SnapCacheBuilder + Clone + 'static, ) -> Result<()> { match self.snap_mgr.clone() { Either::Left(mgr) => { @@ -267,6 +269,7 @@ where let snap_runner = TabletRunner::new( self.env.clone(), mgr, + snap_cache_builder, self.raft_router.clone(), security_mgr, cfg, @@ -458,7 +461,7 @@ mod tests { use crate::{ config::CoprReadPoolConfig, coprocessor::{self, readpool_impl}, - server::{raftkv::RaftRouterWrap, TestRaftStoreRouter}, + server::{raftkv::RaftRouterWrap, tablet_snap::NoSnapshotCache, TestRaftStoreRouter}, storage::{lock_manager::MockLockManager, TestEngineBuilder, TestStorageBuilderApiV1}, }; @@ -589,7 +592,7 @@ mod tests { .unwrap(); server.build_and_bind().unwrap(); - server.start(cfg, security_mgr).unwrap(); + server.start(cfg, security_mgr, NoSnapshotCache).unwrap(); let mut trans = server.transport(); router.report_unreachable(0, 0).unwrap(); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index ce6971eb8fb..f0d0009b8e6 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -702,6 +702,24 @@ impl Tikv for Service { } } + fn tablet_snapshot( + &mut self, + ctx: RpcContext<'_>, + stream: RequestStream, + sink: DuplexSink, + ) { + let task = SnapTask::RecvTablet { stream, sink }; + if let Err(e) = self.snap_scheduler.schedule(task) { + let err_msg = format!("{}", e); + let sink = match e.into_inner() { + SnapTask::Recv { sink, .. } => sink, + _ => unreachable!(), + }; + let status = RpcStatus::with_message(RpcStatusCode::RESOURCE_EXHAUSTED, err_msg); + ctx.spawn(sink.fail(status).map(|_| ())); + } + } + #[allow(clippy::collapsible_else_if)] fn split_region( &mut self, diff --git a/src/server/snap.rs b/src/server/snap.rs index 8fe737c2e60..bae0587c505 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -13,17 +13,20 @@ use std::{ use file_system::{IoType, WithIoType}; use futures::{ - future::{Future, TryFutureExt}, + future::{Future, FutureExt, TryFutureExt}, sink::SinkExt, stream::{Stream, StreamExt, TryStreamExt}, task::{Context, Poll}, }; use grpcio::{ - ChannelBuilder, ClientStreamingSink, Environment, RequestStream, RpcStatus, RpcStatusCode, - WriteFlags, + ChannelBuilder, ClientStreamingSink, DuplexSink, Environment, RequestStream, RpcStatus, + RpcStatusCode, WriteFlags, }; use kvproto::{ - raft_serverpb::{Done, RaftMessage, RaftSnapshotData, SnapshotChunk}, + raft_serverpb::{ + Done, RaftMessage, RaftSnapshotData, SnapshotChunk, TabletSnapshotRequest, + TabletSnapshotResponse, + }, tikvpb::TikvClient, }; use protobuf::Message; @@ -51,6 +54,10 @@ pub enum Task { stream: RequestStream, sink: ClientStreamingSink, }, + RecvTablet { + stream: RequestStream, + sink: DuplexSink, + }, Send { addr: String, msg: RaftMessage, @@ -64,6 +71,7 @@ impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { Task::Recv { .. } => write!(f, "Recv"), + Task::RecvTablet { .. } => write!(f, "RecvTablet"), Task::Send { ref addr, ref msg, .. } => write!(f, "Send Snap[to: {}, snap: {:?}]", addr, msg), @@ -368,8 +376,8 @@ impl Runner { fn refresh_cfg(&mut self) { if let Some(incoming) = self.cfg_tracker.any_new() { - let limit = if incoming.snap_max_write_bytes_per_sec.0 > 0 { - incoming.snap_max_write_bytes_per_sec.0 as f64 + let limit = if incoming.snap_io_max_bytes_per_sec.0 > 0 { + incoming.snap_io_max_bytes_per_sec.0 as f64 } else { f64::INFINITY }; @@ -422,6 +430,13 @@ impl Runnable for Runner { }; self.pool.spawn(task); } + Task::RecvTablet { sink, .. } => { + let status = RpcStatus::with_message( + RpcStatusCode::UNIMPLEMENTED, + "tablet snap is not supported".to_string(), + ); + self.pool.spawn(sink.fail(status).map(|_| ())); + } Task::Send { addr, msg, cb } => { fail_point!("send_snapshot"); let region_id = msg.get_region_id(); diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index c0ecf4db611..a5a8b24d10b 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -1,11 +1,30 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +//! This file contains the implementation of sending and receiving tablet +//! snapshot. +//! +//! Different from v1, tablet snapshot always tries to use cache to speed up +//! transfering speed. The protocol is quite simple: +//! +//! sender receiver +//! send snapshot meta ----> receive snapshot meta +//! extra snapshot preview collect cache meta +//! send all preview ----> receive preview and clean up miss cache +//! files receive files list <----- send missing file list +//! send missing files ----> receive missing files +//! close sender ----> persist snapshot and report to raftstore +//! wait for receiver <----- close sender +//! finish + #[cfg(any(test, feature = "testexport"))] use std::io; use std::{ - convert::{TryFrom, TryInto}, + cmp, + convert::TryFrom, + fmt::Debug, fs::{self, File}, - io::{Read, Write}, + io::{BorrowedBuf, Read, Seek, SeekFrom, Write}, + path::Path, sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -13,26 +32,32 @@ use std::{ time::Duration, }; -use file_system::{IoType, WithIoType}; +use collections::HashMap; +use crc64fast::Digest; +use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; +use file_system::{IoType, OpenOptions, WithIoType}; use futures::{ - future::{Future, TryFutureExt}, + future::{Future, FutureExt}, sink::{Sink, SinkExt}, stream::{Stream, StreamExt, TryStreamExt}, }; use grpcio::{ - self, ChannelBuilder, ClientStreamingSink, Environment, RequestStream, RpcStatus, - RpcStatusCode, WriteFlags, + self, ChannelBuilder, DuplexSink, Environment, RequestStream, RpcStatus, RpcStatusCode, + WriteFlags, }; use kvproto::{ - raft_serverpb::{Done, RaftMessage, RaftSnapshotData, SnapshotChunk}, + raft_serverpb::{ + RaftMessage, RaftSnapshotData, TabletSnapshotFileChunk, TabletSnapshotFileMeta, + TabletSnapshotPreview, TabletSnapshotRequest, TabletSnapshotResponse, + }, tikvpb::TikvClient, }; use protobuf::Message; -use raftstore::store::snap::{TabletSnapKey, TabletSnapManager}; +use raftstore::store::snap::{ReceivingGuard, TabletSnapKey, TabletSnapManager}; use security::SecurityManager; use tikv_kv::RaftExtension; use tikv_util::{ - config::{Tracker, VersionTrack}, + config::{ReadableSize, Tracker, VersionTrack}, time::Instant, worker::Runnable, }; @@ -40,29 +65,75 @@ use tokio::runtime::{Builder as RuntimeBuilder, Runtime}; use super::{ metrics::*, - snap::{Task, DEFAULT_POOL_SIZE, SNAP_CHUNK_LEN}, + snap::{Task, DEFAULT_POOL_SIZE}, Config, Error, Result, }; use crate::tikv_util::{sys::thread::ThreadBuildWrapper, time::Limiter}; -struct RecvTabletSnapContext { +const PREVIEW_CHUNK_LEN: usize = ReadableSize::kb(1).0 as usize; +const PREVIEW_BATCH_SIZE: usize = 256; +const FILE_CHUNK_LEN: usize = ReadableSize::mb(1).0 as usize; +const USE_CACHE_THRESHOLD: u64 = ReadableSize::mb(4).0; + +fn is_sst(file_name: &str) -> bool { + file_name.ends_with(".sst") +} + +async fn read_to(f: &mut File, to: &mut Vec, size: usize, limiter: &Limiter) -> Result<()> { + // It's likely in page cache already. + limiter.consume(size / 2).await; + to.clear(); + to.reserve_exact(size); + let mut buf: BorrowedBuf<'_> = to.spare_capacity_mut().into(); + f.read_buf_exact(buf.unfilled())?; + unsafe { + to.set_len(size); + } + Ok(()) +} + +pub trait SnapCacheBuilder: Send + Sync { + fn build(&self, region_id: u64, path: &Path) -> Result<()>; +} + +impl SnapCacheBuilder for TabletRegistry { + fn build(&self, region_id: u64, path: &Path) -> Result<()> { + if let Some(mut c) = self.get(region_id) && let Some(db) = c.latest() { + let mut checkpointer = db.new_checkpointer()?; + // Avoid flush. + checkpointer.create_at(path, None, u64::MAX)?; + Ok(()) + } else { + Err(Error::Other(format!("region {} not found", region_id).into())) + } + } +} + +#[derive(Clone)] +pub struct NoSnapshotCache; + +impl SnapCacheBuilder for NoSnapshotCache { + fn build(&self, _: u64, _: &Path) -> Result<()> { + Err(Error::Other("cache is disabled".into())) + } +} + +struct RecvTabletSnapContext<'a> { key: TabletSnapKey, raft_msg: RaftMessage, + use_cache: bool, io_type: IoType, + // Lock to avoid receive the same snapshot concurrently. + _receiving_guard: ReceivingGuard<'a>, start: Instant, - chunk_size: usize, } -impl RecvTabletSnapContext { - fn new(mut head: SnapshotChunk) -> Result { - if !head.has_message() { +impl<'a> RecvTabletSnapContext<'a> { + fn new(mut head: TabletSnapshotRequest, mgr: &'a TabletSnapManager) -> Result { + if !head.has_head() { return Err(box_err!("no raft message in the first chunk")); } - - let chunk_size = match head.take_data().try_into() { - Ok(buff) => usize::from_le_bytes(buff), - Err(_) => return Err(box_err!("failed to get chunk size")), - }; + let mut head = head.take_head(); let meta = head.take_message(); let key = TabletSnapKey::from_region_snap( meta.get_region_id(), @@ -70,13 +141,18 @@ impl RecvTabletSnapContext { meta.get_message().get_snapshot(), ); let io_type = io_type_from_raft_message(&meta)?; + let receiving_guard = match mgr.start_receive(key.clone()) { + Some(g) => g, + None => return Err(box_err!("failed to start receive snapshot")), + }; Ok(RecvTabletSnapContext { key, raft_msg: meta, + use_cache: head.use_cache, io_type, + _receiving_guard: receiving_guard, start: Instant::now(), - chunk_size, }) } @@ -101,70 +177,437 @@ fn io_type_from_raft_message(msg: &RaftMessage) -> Result { } } -async fn send_snap_files( - mgr: &TabletSnapManager, - mut sender: impl Sink<(SnapshotChunk, WriteFlags), Error = Error> + Unpin, - msg: RaftMessage, - key: TabletSnapKey, - limiter: Limiter, -) -> Result { - let path = mgr.tablet_gen_path(&key); - info!("begin to send snapshot file";"snap_key" => %key); - let files = fs::read_dir(&path)? - .map(|f| Ok(f?.path())) - .filter(|f| f.is_ok() && f.as_ref().unwrap().is_file()) - .collect::>>()?; - let io_type = io_type_from_raft_message(&msg)?; - let _with_io_type = WithIoType::new(io_type); - let mut total_sent = msg.compute_size() as u64; - let mut chunk = SnapshotChunk::default(); - chunk.set_message(msg); - chunk.set_data(usize::to_le_bytes(SNAP_CHUNK_LEN).to_vec()); - sender - .feed((chunk, WriteFlags::default().buffer_hint(true))) - .await?; - for path in files { - let name = path.file_name().unwrap().to_str().unwrap(); - let mut buffer = Vec::with_capacity(SNAP_CHUNK_LEN); - buffer.push(name.len() as u8); - buffer.extend_from_slice(name.as_bytes()); - let mut f = File::open(&path)?; - let mut off = buffer.len(); - loop { - unsafe { - buffer.set_len(SNAP_CHUNK_LEN); +fn protocol_error(exp: &str, act: impl Debug) -> Error { + Error::Other(format!("protocol error: expect {exp}, but got {act:?}").into()) +} + +/// Check if a local SST file matches the preview meta. +/// +/// It's considered matched when: +/// 1. Have the same file size; +/// 2. The first `PREVIEW_CHUNK_LEN` bytes are the same, this contains the +/// actual data of an SST; +/// 3. The last `PREVIEW_CHUNK_LEN` bytes are the same, this contains checksum, +/// properties and other medata of an SST. +async fn is_sst_match_preview( + preview_meta: &TabletSnapshotFileMeta, + target: &Path, + buffer: &mut Vec, + limiter: &Limiter, +) -> Result { + let mut f = File::open(target)?; + let exist_len = f.metadata()?.len(); + if exist_len != preview_meta.file_size { + return Ok(false); + } + + let head_len = preview_meta.head_chunk.len(); + let trailing_len = preview_meta.trailing_chunk.len(); + if head_len as u64 > preview_meta.file_size || trailing_len as u64 > preview_meta.file_size { + return Err(Error::Other( + format!( + "invalid chunk length {} {} {}", + preview_meta.file_size, head_len, trailing_len + ) + .into(), + )); + } + read_to(&mut f, buffer, head_len, limiter).await?; + if *buffer != preview_meta.head_chunk { + return Ok(false); + } + + if preview_meta.trailing_chunk.is_empty() { + // A safet check to detect wrong protocol implementation. Only head chunk + // contains all the data can trailing chunk be empty. + return Ok(head_len as u64 == preview_meta.file_size); + } + + f.seek(SeekFrom::End(-(trailing_len as i64)))?; + read_to(&mut f, buffer, trailing_len, limiter).await?; + Ok(*buffer == preview_meta.trailing_chunk) +} + +async fn cleanup_cache( + path: &Path, + stream: &mut (impl Stream> + Unpin), + sink: &mut (impl Sink<(TabletSnapshotResponse, WriteFlags), Error = grpcio::Error> + Unpin), + limiter: &Limiter, +) -> Result<(u64, Vec)> { + let mut reused = 0; + let mut exists = HashMap::default(); + for entry in fs::read_dir(path)? { + let entry = entry?; + let ft = entry.file_type()?; + if ft.is_dir() { + fs::remove_dir_all(entry.path())?; + continue; + } + if ft.is_file() { + let os_name = entry.file_name(); + let name = os_name.to_str().unwrap(); + if is_sst(name) { + // Collect length requires another IO, delay till we are sure + // it's probably be reused. + exists.insert(name.to_string(), entry.path()); + continue; } - // it should break if readed len is zero or the buffer is full. - while off < SNAP_CHUNK_LEN { - let readed = f.read(&mut buffer[off..])?; - if readed == 0 { - unsafe { - buffer.set_len(off); - } - break; + } + fs::remove_file(entry.path())?; + } + let mut missing = vec![]; + loop { + let mut preview = match stream.next().await { + Some(Ok(mut req)) if req.has_preview() => req.take_preview(), + res => return Err(protocol_error("preview", res)), + }; + let mut buffer = Vec::with_capacity(PREVIEW_CHUNK_LEN); + for meta in preview.take_metas().into_vec() { + if is_sst(&meta.file_name) && let Some(p) = exists.remove(&meta.file_name) { + if is_sst_match_preview(&meta, &p, &mut buffer, limiter).await? { + reused += meta.file_size; + continue; + } + // We should not write to the file directly as it's hard linked. + fs::remove_file(p)?; + } + missing.push(meta.file_name); + } + if preview.end { + break; + } + } + for (_, p) in exists { + fs::remove_file(p)?; + } + let mut resp = TabletSnapshotResponse::default(); + resp.mut_files().set_file_name(missing.clone().into()); + sink.send((resp, WriteFlags::default())).await?; + Ok((reused, missing)) +} + +async fn accept_one_file( + path: &Path, + mut chunk: TabletSnapshotFileChunk, + stream: &mut (impl Stream> + Unpin), + limiter: &Limiter, + digest: &mut Digest, +) -> Result { + let name = chunk.file_name; + digest.write(name.as_bytes()); + let mut f = OpenOptions::new() + .write(true) + .create_new(true) + .open(path.join(&name))?; + let exp_size = chunk.file_size; + let mut file_size = 0; + loop { + let chunk_len = chunk.data.len(); + file_size += chunk_len as u64; + if file_size > exp_size { + return Err(Error::Other( + format!("file {} too long {} {}", name, file_size, exp_size).into(), + )); + } + limiter.consume(chunk_len).await; + digest.write(&chunk.data); + f.write_all(&chunk.data)?; + if exp_size == file_size { + f.sync_data()?; + return Ok(exp_size); + } + chunk = match stream.next().await { + Some(Ok(mut req)) if req.has_chunk() => req.take_chunk(), + res => return Err(protocol_error("chunk", res)), + }; + if !chunk.file_name.is_empty() { + return Err(protocol_error(&name, &chunk.file_name)); + } + } +} + +async fn accept_missing( + path: &Path, + missing_ssts: Vec, + stream: &mut (impl Stream> + Unpin), + limiter: &Limiter, +) -> Result { + let mut digest = Digest::default(); + let mut received_bytes: u64 = 0; + for name in missing_ssts { + let chunk = match stream.next().await { + Some(Ok(mut req)) if req.has_chunk() => req.take_chunk(), + res => return Err(protocol_error("chunk", res)), + }; + if chunk.file_name != name { + return Err(protocol_error(&name, &chunk.file_name)); + } + received_bytes += accept_one_file(path, chunk, stream, limiter, &mut digest).await?; + } + // Now receive other files. + loop { + let chunk = match stream.next().await { + Some(Ok(mut req)) if req.has_chunk() => req.take_chunk(), + Some(Ok(req)) if req.has_end() => { + let checksum = req.get_end().get_checksum(); + if checksum != digest.sum64() { + return Err(Error::Other( + format!("checksum mismatch {} {}", checksum, digest.sum64()).into(), + )); } - off += readed; + File::open(path)?.sync_data()?; + let res = stream.next().await; + return if res.is_none() { + Ok(received_bytes) + } else { + Err(protocol_error("None", res)) + }; } - limiter.consume(off).await; - total_sent += off as u64; - let mut chunk = SnapshotChunk::default(); - chunk.set_data(buffer); + res => return Err(protocol_error("chunk", res)), + }; + if chunk.file_name.is_empty() { + return Err(protocol_error("file_name", &chunk.file_name)); + } + received_bytes += accept_one_file(path, chunk, stream, limiter, &mut digest).await?; + } +} + +async fn recv_snap_files<'a>( + snap_mgr: &'a TabletSnapManager, + cache_builder: impl SnapCacheBuilder, + mut stream: impl Stream> + Unpin, + sink: &mut (impl Sink<(TabletSnapshotResponse, WriteFlags), Error = grpcio::Error> + Unpin), + limiter: Limiter, +) -> Result> { + let head = stream + .next() + .await + .transpose()? + .ok_or_else(|| Error::Other("empty gRPC stream".into()))?; + let context = RecvTabletSnapContext::new(head, snap_mgr)?; + let _with_io_type = WithIoType::new(context.io_type); + let region_id = context.key.region_id; + let final_path = snap_mgr.final_recv_path(&context.key); + if final_path.exists() { + // The snapshot is received already, should wait for peer to apply. If the + // snapshot is corrupted, the peer should destroy it first then request again. + return Err(Error::Other( + format!("snapshot {} already exists", final_path.display()).into(), + )); + } + let path = snap_mgr.tmp_recv_path(&context.key); + info!("begin to receive tablet snapshot files"; "file" => %path.display(), "region_id" => region_id); + if path.exists() { + fs::remove_dir_all(&path)?; + } + let (reused, missing_ssts) = if context.use_cache { + if let Err(e) = cache_builder.build(region_id, &path) { + info!("not using cache"; "region_id" => region_id, "err" => ?e); + fs::create_dir_all(&path)?; + } + cleanup_cache(&path, &mut stream, sink, &limiter).await? + } else { + info!("not using cache"; "region_id" => region_id); + fs::create_dir_all(&path)?; + (0, vec![]) + }; + let received = accept_missing(&path, missing_ssts, &mut stream, &limiter).await?; + info!("received all tablet snapshot file"; "snap_key" => %context.key, "region_id" => region_id, "received" => received, "reused" => reused); + let final_path = snap_mgr.final_recv_path(&context.key); + fs::rename(&path, final_path)?; + Ok(context) +} + +async fn recv_snap( + stream: RequestStream, + sink: DuplexSink, + snap_mgr: TabletSnapManager, + raft_router: R, + cache_builder: impl SnapCacheBuilder, + limiter: Limiter, +) -> Result<()> { + let stream = stream.map_err(Error::from); + let mut sink = sink; + let res = recv_snap_files(&snap_mgr, cache_builder, stream, &mut sink, limiter) + .await + .and_then(|context| context.finish(raft_router)); + match res { + Ok(()) => sink.close().await.map_err(Error::from), + Err(e) => { + let status = RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); + sink.fail(status).await.map_err(Error::from) + } + } +} + +async fn build_one_preview( + path: &Path, + iter: &mut impl Iterator, + limiter: &Limiter, +) -> Result { + let mut preview = TabletSnapshotPreview::default(); + for _ in 0..PREVIEW_BATCH_SIZE { + let (name, size) = match iter.next() { + Some((name, size)) => (name, *size), + None => break, + }; + let mut meta = TabletSnapshotFileMeta::default(); + meta.file_name = name.clone(); + meta.file_size = size; + let mut f = File::open(path.join(name))?; + let to_read = cmp::min(size as usize, PREVIEW_CHUNK_LEN); + read_to(&mut f, &mut meta.head_chunk, to_read, limiter).await?; + if size > PREVIEW_CHUNK_LEN as u64 { + f.seek(SeekFrom::End(-(to_read as i64)))?; + read_to(&mut f, &mut meta.trailing_chunk, to_read, limiter).await?; + } + preview.mut_metas().push(meta); + } + let mut req = TabletSnapshotRequest::default(); + req.set_preview(preview); + Ok(req) +} + +async fn find_missing( + path: &Path, + mut head: TabletSnapshotRequest, + sender: &mut (impl Sink<(TabletSnapshotRequest, WriteFlags), Error = Error> + Unpin), + receiver: &mut (impl Stream> + Unpin), + limiter: &Limiter, +) -> Result> { + let mut sst_sizes = 0; + let mut ssts = HashMap::default(); + let mut other_files = vec![]; + for f in fs::read_dir(path)? { + let entry = f?; + let ft = entry.file_type()?; + // What if it's titan? + if !ft.is_file() { + continue; + } + let os_name = entry.file_name(); + let name = os_name.to_str().unwrap().to_string(); + let file_size = entry.metadata()?.len(); + if is_sst(&name) { + sst_sizes += file_size; + ssts.insert(name, file_size); + } else { + other_files.push((name, file_size)); + } + } + if sst_sizes < USE_CACHE_THRESHOLD { + sender + .send((head, WriteFlags::default().buffer_hint(true))) + .await?; + other_files.extend(ssts); + return Ok(other_files); + } + + head.mut_head().set_use_cache(true); + // Send immediately to make receiver collect cache earlier. + sender.send((head, WriteFlags::default())).await?; + let sst_count = ssts.len(); + // PREVIEW_BATCH_SIZE -> 1, PREVIEW_BATCH_SIZE + 1 = 2. sst_count can't be 0. + let batch_count = (sst_count - 1) / PREVIEW_BATCH_SIZE + 1; + let mut ssts_iter = ssts.iter(); + for _ in 0..batch_count { + let req = build_one_preview(path, &mut ssts_iter, limiter).await?; + sender + .send((req, WriteFlags::default().buffer_hint(true))) + .await?; + } + let mut req = build_one_preview(path, &mut ssts_iter, limiter).await?; + req.mut_preview().end = true; + sender.send((req, WriteFlags::default())).await?; + + let accepted = match receiver.next().await { + Some(Ok(mut req)) if req.has_files() => req.take_files().take_file_name(), + res => return Err(protocol_error("missing files", res)), + }; + let mut missing = Vec::with_capacity(accepted.len()); + for name in &accepted { + let s = match ssts.remove_entry(name) { + Some(s) => s, + None => return Err(Error::Other(format!("missing file {}", name).into())), + }; + missing.push(s); + } + missing.extend(other_files); + Ok(missing) +} + +async fn send_missing( + path: &Path, + missing: Vec<(String, u64)>, + sender: &mut (impl Sink<(TabletSnapshotRequest, WriteFlags), Error = Error> + Unpin), + limiter: &Limiter, +) -> Result<(u64, u64)> { + let mut total_sent = 0; + let mut digest = Digest::default(); + for (name, mut file_size) in missing { + let mut chunk = TabletSnapshotFileChunk::default(); + chunk.file_name = name; + digest.write(chunk.file_name.as_bytes()); + chunk.file_size = file_size; + total_sent += file_size; + if file_size == 0 { + let mut req = TabletSnapshotRequest::default(); + req.set_chunk(chunk); sender - .feed((chunk, WriteFlags::default().buffer_hint(true))) + .send((req, WriteFlags::default().buffer_hint(true))) .await?; - // It should switch the next file if the read buffer len is less than the - // SNAP_CHUNK_LEN. - if off < SNAP_CHUNK_LEN { + continue; + } + + let mut f = File::open(path.join(&chunk.file_name))?; + loop { + let to_read = cmp::min(FILE_CHUNK_LEN as u64, file_size) as usize; + read_to(&mut f, &mut chunk.data, to_read, limiter).await?; + digest.write(&chunk.data); + let mut req = TabletSnapshotRequest::default(); + req.set_chunk(chunk); + sender + .send((req, WriteFlags::default().buffer_hint(true))) + .await?; + if file_size == to_read as u64 { break; } - buffer = Vec::with_capacity(SNAP_CHUNK_LEN); - off = 0 + chunk = TabletSnapshotFileChunk::default(); + file_size -= to_read as u64; } } - info!("sent all snap file finish"; "snap_key" => %key); + Ok((total_sent, digest.sum64())) +} + +async fn send_snap_files( + mgr: &TabletSnapManager, + mut sender: impl Sink<(TabletSnapshotRequest, WriteFlags), Error = Error> + Unpin, + receiver: &mut (impl Stream> + Unpin), + msg: RaftMessage, + key: TabletSnapKey, + limiter: Limiter, +) -> Result { + let region_id = key.region_id; + let to_peer = key.to_peer; + let path = mgr.tablet_gen_path(&key); + info!("begin to send snapshot file"; "snap_key" => %key, "region_id" => region_id, "to_peer" => to_peer); + let io_type = io_type_from_raft_message(&msg)?; + let _with_io_type = WithIoType::new(io_type); + let mut head = TabletSnapshotRequest::default(); + head.mut_head().set_message(msg); + let missing = find_missing(&path, head, &mut sender, receiver, &limiter).await?; + let (total_sent, checksum) = send_missing(&path, missing, &mut sender, &limiter).await?; + // In gRPC, stream in serverside can finish without error (when the connection + // is closed). So we need to use an explicit `Done` to indicate all messages + // are sent. In V1, we have checksum and meta list, so this is not a + // problem. + let mut req = TabletSnapshotRequest::default(); + req.mut_end().set_checksum(checksum); + sender.send((req, WriteFlags::default())).await?; SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC .send .inc_by(total_sent); + info!("sent all snap file finish"; "snap_key" => %key, "region_id" => region_id, "to_peer" => to_peer); sender.close().await?; Ok(total_sent) } @@ -201,107 +644,31 @@ pub fn send_snap( let channel = security_mgr.connect(cb, addr); let client = TikvClient::new(channel); - let (sink, receiver) = client.snapshot()?; + let (sink, mut receiver) = client.tablet_snapshot()?; let send_task = async move { let sink = sink.sink_map_err(Error::from); - let total_size = send_snap_files(&mgr, sink, msg, key.clone(), limiter).await?; - let recv_result = receiver.map_err(Error::from).await; + let total_size = + send_snap_files(&mgr, sink, &mut receiver, msg, key.clone(), limiter).await?; + let recv_result = receiver.next().await; send_timer.observe_duration(); drop(client); + mgr.delete_snapshot(&key); match recv_result { - Ok(_) => { - mgr.delete_snapshot(&key); - Ok(SendStat { - key, - total_size, - elapsed: timer.saturating_elapsed(), - }) - } - Err(e) => Err(e), + None => Ok(SendStat { + key, + total_size, + elapsed: timer.saturating_elapsed(), + }), + Some(Err(e)) => Err(e.into()), + Some(Ok(resp)) => Err(Error::Other( + format!("receive unexpected response {:?}", resp).into(), + )), } }; Ok(send_task) } -async fn recv_snap_files( - snap_mgr: TabletSnapManager, - mut stream: impl Stream> + Unpin, - limit: Limiter, -) -> Result { - let head = stream - .next() - .await - .transpose()? - .ok_or_else(|| Error::Other("empty gRPC stream".into()))?; - let context = RecvTabletSnapContext::new(head)?; - let chunk_size = context.chunk_size; - let path = snap_mgr.tmp_recv_path(&context.key); - info!("begin to receive tablet snapshot files"; "file" => %path.display()); - fs::create_dir_all(&path)?; - let _with_io_type = WithIoType::new(context.io_type); - loop { - let mut chunk = match stream.next().await { - Some(Ok(mut c)) if !c.has_message() => c.take_data(), - Some(_) => { - return Err(box_err!("duplicated metadata")); - } - None => break, - }; - // the format of chunk: - // |--name_len--|--name--|--content--| - let len = chunk[0] as usize; - let file_name = box_try!(std::str::from_utf8(&chunk[1..len + 1])); - let p = path.join(file_name); - let mut f = File::create(&p)?; - let mut size = chunk.len() - len - 1; - f.write_all(&chunk[len + 1..])?; - // It should switch next file if the chunk size is less than the SNAP_CHUNK_LEN. - while chunk.len() >= chunk_size { - chunk = match stream.next().await { - Some(Ok(mut c)) if !c.has_message() => c.take_data(), - Some(_) => return Err(box_err!("duplicated metadata")), - None => return Err(box_err!("missing chunk")), - }; - f.write_all(&chunk[..])?; - limit.consume(chunk.len()).await; - size += chunk.len(); - } - debug!("received snap file"; "file" => %p.display(), "size" => size); - SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC - .recv - .inc_by(size as u64); - f.sync_data()?; - } - info!("received all tablet snapshot file"; "snap_key" => %context.key); - let final_path = snap_mgr.final_recv_path(&context.key); - fs::rename(&path, final_path)?; - Ok(context) -} - -fn recv_snap( - stream: RequestStream, - sink: ClientStreamingSink, - snap_mgr: TabletSnapManager, - raft_router: R, - limit: Limiter, -) -> impl Future> { - let recv_task = async move { - let stream = stream.map_err(Error::from); - let context = recv_snap_files(snap_mgr, stream, limit).await?; - context.finish(raft_router) - }; - async move { - match recv_task.await { - Ok(()) => sink.success(Done::default()).await.map_err(Error::from), - Err(e) => { - let status = RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); - sink.fail(status).await.map_err(Error::from) - } - } - } -} - -pub struct TabletRunner { +pub struct TabletRunner { env: Arc, snap_mgr: TabletSnapManager, security_mgr: Arc, @@ -311,21 +678,23 @@ pub struct TabletRunner { cfg: Config, sending_count: Arc, recving_count: Arc, + cache_builder: B, limiter: Limiter, } -impl TabletRunner { +impl TabletRunner { pub fn new( env: Arc, snap_mgr: TabletSnapManager, + cache_builder: B, r: R, security_mgr: Arc, cfg: Arc>, ) -> Self { let config = cfg.value().clone(); let cfg_tracker = cfg.tracker("tablet-sender".to_owned()); - let limit = i64::try_from(config.snap_max_write_bytes_per_sec.0) - .unwrap_or_else(|_| panic!("snap_max_write_bytes_per_sec > i64::max_value")); + let limit = i64::try_from(config.snap_io_max_bytes_per_sec.0) + .unwrap_or_else(|_| panic!("snap_io_max_bytes_per_sec > i64::max_value")); let limiter = Limiter::new(if limit > 0 { limit as f64 } else { @@ -348,6 +717,7 @@ impl TabletRunner { cfg: config, sending_count: Arc::new(AtomicUsize::new(0)), recving_count: Arc::new(AtomicUsize::new(0)), + cache_builder, limiter, }; snap_worker @@ -355,8 +725,8 @@ impl TabletRunner { fn refresh_cfg(&mut self) { if let Some(incoming) = self.cfg_tracker.any_new() { - let limit = if incoming.snap_max_write_bytes_per_sec.0 > 0 { - incoming.snap_max_write_bytes_per_sec.0 as f64 + let limit = if incoming.snap_io_max_bytes_per_sec.0 > 0 { + incoming.snap_io_max_bytes_per_sec.0 as f64 } else { f64::INFINITY }; @@ -374,12 +744,23 @@ pub struct SendStat { elapsed: Duration, } -impl Runnable for TabletRunner { +impl Runnable for TabletRunner +where + B: SnapCacheBuilder + Clone + 'static, + R: RaftExtension, +{ type Task = Task; fn run(&mut self, task: Task) { match task { - Task::Recv { stream, sink } => { + Task::Recv { sink, .. } => { + let status = RpcStatus::with_message( + RpcStatusCode::UNIMPLEMENTED, + "tablet snap is not supported".to_string(), + ); + self.pool.spawn(sink.fail(status).map(|_| ())); + } + Task::RecvTablet { stream, sink } => { let task_num = self.recving_count.load(Ordering::SeqCst); if task_num >= self.cfg.concurrent_recv_snap_limit { warn!("too many recving snapshot tasks, ignore"); @@ -399,9 +780,12 @@ impl Runnable for TabletRunner { let raft_router = self.raft_router.clone(); let recving_count = self.recving_count.clone(); recving_count.fetch_add(1, Ordering::SeqCst); - let limit = self.limiter.clone(); + let limiter = self.limiter.clone(); + let cache_builder = self.cache_builder.clone(); let task = async move { - let result = recv_snap(stream, sink, snap_mgr, raft_router, limit).await; + let result = + recv_snap(stream, sink, snap_mgr, raft_router, cache_builder, limiter) + .await; recving_count.fetch_sub(1, Ordering::SeqCst); if let Err(e) = result { error!("failed to recv snapshot"; "err" => %e); @@ -427,9 +811,16 @@ impl Runnable for TabletRunner { let security_mgr = Arc::clone(&self.security_mgr); let sending_count = Arc::clone(&self.sending_count); sending_count.fetch_add(1, Ordering::SeqCst); - let limit = self.limiter.clone(); - let send_task = - send_snap(env, mgr, security_mgr, &self.cfg.clone(), &addr, msg, limit); + let limiter = self.limiter.clone(); + let send_task = send_snap( + env, + mgr, + security_mgr, + &self.cfg.clone(), + &addr, + msg, + limiter, + ); let task = async move { let res = match send_task { Err(e) => Err(e), @@ -480,11 +871,10 @@ pub fn copy_tablet_snapshot( .filter(|f| f.is_ok() && f.as_ref().unwrap().is_file()) .collect::>>()?; - let mut head = SnapshotChunk::default(); - head.set_message(msg); - head.set_data(usize::to_le_bytes(SNAP_CHUNK_LEN).to_vec()); + let mut head = TabletSnapshotRequest::default(); + head.mut_head().set_message(msg); - let recv_context = RecvTabletSnapContext::new(head)?; + let recv_context = RecvTabletSnapContext::new(head, recver_snap_mgr)?; let recv_path = recver_snap_mgr.tmp_recv_path(&recv_context.key); fs::create_dir_all(&recv_path)?; @@ -502,78 +892,3 @@ pub fn copy_tablet_snapshot( fs::rename(&recv_path, final_path)?; Ok(()) } - -#[cfg(test)] -mod tests { - use std::{ - fs::{create_dir_all, File}, - io::Write, - }; - - use futures::{ - channel::mpsc::{self}, - executor::block_on, - sink::SinkExt, - }; - use futures_util::StreamExt; - use grpcio::WriteFlags; - use kvproto::raft_serverpb::{RaftMessage, SnapshotChunk}; - use raftstore::store::snap::{TabletSnapKey, TabletSnapManager}; - use tempfile::TempDir; - use tikv_util::{store::new_peer, time::Limiter}; - - use super::{super::Error, recv_snap_files, send_snap_files, SNAP_CHUNK_LEN}; - - #[test] - fn test_send_tablet() { - let limiter = Limiter::new(f64::INFINITY); - let snap_key = TabletSnapKey::new(1, 1, 1, 1); - let mut msg = RaftMessage::default(); - msg.set_region_id(1); - msg.set_to_peer(new_peer(1, 1)); - msg.mut_message().mut_snapshot().mut_metadata().set_index(1); - msg.mut_message().mut_snapshot().mut_metadata().set_term(1); - let send_path = TempDir::new().unwrap(); - let send_snap_mgr = - TabletSnapManager::new(send_path.path().join("snap_dir").to_str().unwrap()).unwrap(); - let snap_path = send_snap_mgr.tablet_gen_path(&snap_key); - create_dir_all(snap_path.as_path()).unwrap(); - // send file should skip directory - create_dir_all(snap_path.join("dir")).unwrap(); - for i in 0..2 { - let mut f = File::create(snap_path.join(i.to_string())).unwrap(); - let count = SNAP_CHUNK_LEN - 2; - let mut data = std::iter::repeat("a".as_bytes()) - .take(count) - .collect::>(); - for buffer in data.iter_mut() { - f.write_all(buffer).unwrap(); - } - f.sync_data().unwrap(); - } - - let recv_path = TempDir::new().unwrap(); - let recv_snap_manager = - TabletSnapManager::new(recv_path.path().join("snap_dir").to_str().unwrap()).unwrap(); - let (tx, rx) = mpsc::unbounded(); - let sink = tx.sink_map_err(Error::from); - block_on(send_snap_files( - &send_snap_mgr, - sink, - msg, - snap_key.clone(), - limiter.clone(), - )) - .unwrap(); - - let stream = rx.map(|x: (SnapshotChunk, WriteFlags)| Ok(x.0)); - let final_path = recv_snap_manager.final_recv_path(&snap_key); - let r = block_on(recv_snap_files(recv_snap_manager, stream, limiter)).unwrap(); - assert_eq!(r.key, snap_key); - std::thread::sleep(std::time::Duration::from_secs(1)); - let dir = std::fs::read_dir(final_path).unwrap(); - assert_eq!(2, dir.count()); - send_snap_mgr.delete_snapshot(&snap_key); - assert!(!snap_path.exists()); - } -} diff --git a/tests/integrations/config/dynamic/snap.rs b/tests/integrations/config/dynamic/snap.rs index af03246acf4..bb91d0d62eb 100644 --- a/tests/integrations/config/dynamic/snap.rs +++ b/tests/integrations/config/dynamic/snap.rs @@ -95,7 +95,7 @@ fn test_update_server_config() { let change = { let mut m = std::collections::HashMap::new(); m.insert( - "server.snap-max-write-bytes-per-sec".to_owned(), + "server.snap-io-max-bytes-per-sec".to_owned(), "512MB".to_owned(), ); m.insert( @@ -106,7 +106,7 @@ fn test_update_server_config() { }; cfg_controller.update(change).unwrap(); - svr_cfg.snap_max_write_bytes_per_sec = ReadableSize::mb(512); + svr_cfg.snap_io_max_bytes_per_sec = ReadableSize::mb(512); svr_cfg.concurrent_send_snap_limit = 100; // config should be updated assert_eq!(snap_mgr.get_speed_limit() as u64, 536870912); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index a25a43ce6e1..102d695b2de 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -113,7 +113,7 @@ fn test_serde_custom_tikv_config() { end_point_request_max_handle_duration: ReadableDuration::secs(12), end_point_max_concurrency: 10, end_point_perf_level: PerfLevel::EnableTime, - snap_max_write_bytes_per_sec: ReadableSize::mb(10), + snap_io_max_bytes_per_sec: ReadableSize::mb(10), snap_max_total_size: ReadableSize::gb(10), stats_concurrency: 10, heavy_load_threshold: 25, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index d79ec7899e2..722bdf0c56b 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -74,7 +74,7 @@ end-point-enable-batch-if-possible = true end-point-request-max-handle-duration = "12s" end-point-max-concurrency = 10 end-point-perf-level = 5 -snap-max-write-bytes-per-sec = "10MB" +snap-io-max-bytes-per-sec = "10MB" snap-max-total-size = "10GB" stats-concurrency = 10 heavy-load-threshold = 25 diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 4d9290b4eff..0ca576e5e9a 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -508,7 +508,7 @@ fn test_inspected_snapshot() { #[test] fn test_gen_during_heavy_recv() { let mut cluster = new_server_cluster(0, 3); - cluster.cfg.server.snap_max_write_bytes_per_sec = ReadableSize(5 * 1024 * 1024); + cluster.cfg.server.snap_io_max_bytes_per_sec = ReadableSize(5 * 1024 * 1024); cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration(Duration::from_secs(100)); let pd_client = Arc::clone(&cluster.pd_client); From 64a41f042283255e43a285357b999b26853a3e8f Mon Sep 17 00:00:00 2001 From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com> Date: Fri, 10 Mar 2023 17:59:13 +0800 Subject: [PATCH 0577/1149] coprocessor: reuse EvalContext in collect_column_stats (#14376) ref tikv/tikv#14231 Signed-off-by: xuyifan <675434007@qq.com> --- src/coprocessor/statistics/analyze.rs | 60 ++++++++++----------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 6b486c3bb7e..a49ac72398e 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -376,6 +376,7 @@ impl RowSampleBuilder { let mut is_drained = false; let mut collector = self.new_collector(); + let mut ctx = EvalContext::default(); while !is_drained { let mut sample = self.quota_limiter.new_sample(!self.is_auto_analyze); let mut read_size: usize = 0; @@ -400,7 +401,7 @@ impl RowSampleBuilder { columns_slice[i].encode( *logical_row, &self.columns_info[i], - &mut EvalContext::default(), + &mut ctx, &mut column_vals[i], )?; if self.columns_info[i].as_accessor().is_string_like() { @@ -408,7 +409,7 @@ impl RowSampleBuilder { TT, match self.columns_info[i].as_accessor().collation()? { Collation::TT => { let mut mut_val = &column_vals[i][..]; - let decoded_val = table::decode_col_value(&mut mut_val, &mut EvalContext::default(), &self.columns_info[i])?; + let decoded_val = table::decode_col_value(&mut mut_val, &mut ctx, &self.columns_info[i])?; if decoded_val == Datum::Null { collation_key_vals[i].clone_from(&column_vals[i]); } else { @@ -895,6 +896,7 @@ impl SampleBuilder { let mut common_handle_hist = Histogram::new(self.max_bucket_size); let mut common_handle_cms = CmSketch::new(self.cm_sketch_depth, self.cm_sketch_width); let mut common_handle_fms = FmSketch::new(self.max_fm_sketch_size); + let mut ctx = EvalContext::default(); while !is_drained { let result = self.data.next_batch(BATCH_MAX_SIZE).await; is_drained = result.is_drained?.stop(); @@ -904,12 +906,7 @@ impl SampleBuilder { if columns_without_handle_len + 1 == columns_slice.len() { for logical_row in &result.logical_rows { let mut data = vec![]; - columns_slice[0].encode( - *logical_row, - &columns_info[0], - &mut EvalContext::default(), - &mut data, - )?; + columns_slice[0].encode(*logical_row, &columns_info[0], &mut ctx, &mut data)?; pk_builder.append(&data, false); } columns_slice = &columns_slice[1..]; @@ -929,7 +926,7 @@ impl SampleBuilder { columns_slice[i].encode( *logical_row, &columns_info[i], - &mut EvalContext::default(), + &mut ctx, &mut handle_col_val, )?; data.extend_from_slice(&handle_col_val); @@ -974,12 +971,7 @@ impl SampleBuilder { for (i, collector) in collectors.iter_mut().enumerate() { for logical_row in &result.logical_rows { let mut val = vec![]; - columns_slice[i].encode( - *logical_row, - &columns_info[i], - &mut EvalContext::default(), - &mut val, - )?; + columns_slice[i].encode(*logical_row, &columns_info[i], &mut ctx, &mut val)?; // This is a workaround for different encoding methods used by TiDB and TiKV for // CM Sketch. We need this because we must ensure we are using the same encoding @@ -1000,9 +992,8 @@ impl SampleBuilder { INT_FLAG | UINT_FLAG | DURATION_FLAG => { let mut mut_val = &val[..]; let decoded_val = mut_val.read_datum()?; - let flattened = - table::flatten(&mut EvalContext::default(), decoded_val)?; - encode_value(&mut EvalContext::default(), &[flattened])? + let flattened = table::flatten(&mut ctx, decoded_val)?; + encode_value(&mut ctx, &[flattened])? } _ => val, }; @@ -1012,14 +1003,14 @@ impl SampleBuilder { TT, match columns_info[i].as_accessor().collation()? { Collation::TT => { let mut mut_val = &val[..]; - let decoded_val = table::decode_col_value(&mut mut_val, &mut EvalContext::default(), &columns_info[i])?; + let decoded_val = table::decode_col_value(&mut mut_val, &mut ctx, &columns_info[i])?; if decoded_val == Datum::Null { val } else { // Only if the `decoded_val` is Datum::Null, `decoded_val` is a Ok(None). // So it is safe the unwrap the Ok value. let decoded_sorted_val = TT::sort_key(&decoded_val.as_string()?.unwrap().into_owned())?; - encode_value(&mut EvalContext::default(), &[Datum::Bytes(decoded_sorted_val)])? + encode_value(&mut ctx, &[Datum::Bytes(decoded_sorted_val)])? } } } @@ -1237,8 +1228,9 @@ mod tests { ); let cases = vec![Datum::I64(1), Datum::Null, Datum::I64(2), Datum::I64(5)]; + let mut ctx = EvalContext::default(); for data in cases { - sample.collect(datum::encode_value(&mut EvalContext::default(), &[data]).unwrap()); + sample.collect(datum::encode_value(&mut ctx, &[data]).unwrap()); } assert_eq!(sample.samples.len(), max_sample_size); assert_eq!(sample.null_count, 1); @@ -1254,10 +1246,9 @@ mod tests { let loop_cnt = 1000; let mut item_cnt: HashMap, usize> = HashMap::new(); let mut nums: Vec> = Vec::with_capacity(row_num); + let mut ctx = EvalContext::default(); for i in 0..row_num { - nums.push( - datum::encode_value(&mut EvalContext::default(), &[Datum::I64(i as i64)]).unwrap(), - ); + nums.push(datum::encode_value(&mut ctx, &[Datum::I64(i as i64)]).unwrap()); } for loop_i in 0..loop_cnt { let mut collector = ReservoirRowSampleCollector::new(sample_num, 1000, 1); @@ -1302,10 +1293,9 @@ mod tests { let loop_cnt = 1000; let mut item_cnt: HashMap, usize> = HashMap::new(); let mut nums: Vec> = Vec::with_capacity(row_num); + let mut ctx = EvalContext::default(); for i in 0..row_num { - nums.push( - datum::encode_value(&mut EvalContext::default(), &[Datum::I64(i as i64)]).unwrap(), - ); + nums.push(datum::encode_value(&mut ctx, &[Datum::I64(i as i64)]).unwrap()); } for loop_i in 0..loop_cnt { let mut collector = @@ -1348,10 +1338,9 @@ mod tests { let sample_num = 0; // abnormal. let row_num = 100; let mut nums: Vec> = Vec::with_capacity(row_num); + let mut ctx = EvalContext::default(); for i in 0..row_num { - nums.push( - datum::encode_value(&mut EvalContext::default(), &[Datum::I64(i as i64)]).unwrap(), - ); + nums.push(datum::encode_value(&mut ctx, &[Datum::I64(i as i64)]).unwrap()); } { // Test for ReservoirRowSampleCollector @@ -1409,19 +1398,16 @@ mod benches { } let mut column_vals = Vec::new(); let mut collation_key_vals = Vec::new(); + let mut ctx = EvalContext::default(); for i in 0..columns_info.len() { let mut val = vec![]; columns_slice[i] - .encode(0, &columns_info[i], &mut EvalContext::default(), &mut val) + .encode(0, &columns_info[i], &mut ctx, &mut val) .unwrap(); if columns_info[i].as_accessor().is_string_like() { let mut mut_val = &val[..]; - let decoded_val = table::decode_col_value( - &mut mut_val, - &mut EvalContext::default(), - &columns_info[i], - ) - .unwrap(); + let decoded_val = + table::decode_col_value(&mut mut_val, &mut ctx, &columns_info[i]).unwrap(); let decoded_sorted_val = CollatorUtf8Mb4Bin::sort_key(&decoded_val.as_string().unwrap().unwrap()) .unwrap(); From fc9cf096b3eddc48e2f1df09dd6d716198067410 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 10 Mar 2023 18:15:13 +0800 Subject: [PATCH 0578/1149] *: more accurate time detail in kv response (#14358) close tikv/tikv#14262 Co-authored-by: Ti Chi Robot --- components/tikv_kv/src/stats.rs | 8 +- src/coprocessor/endpoint.rs | 348 +++++++++++------- src/coprocessor/tracker.rs | 28 +- src/server/service/kv.rs | 54 ++- src/storage/mod.rs | 16 +- tests/integrations/config/mod.rs | 2 + tests/integrations/config/test-custom.toml | 3 + tests/integrations/coprocessor/test_select.rs | 3 + tests/integrations/server/kv_service.rs | 17 +- 9 files changed, 303 insertions(+), 176 deletions(-) diff --git a/components/tikv_kv/src/stats.rs b/components/tikv_kv/src/stats.rs index 4362f5d57ca..d38c97397ee 100644 --- a/components/tikv_kv/src/stats.rs +++ b/components/tikv_kv/src/stats.rs @@ -283,8 +283,8 @@ impl StatisticsSummary { /// ``` #[derive(Debug, Default, Copy, Clone)] pub struct StageLatencyStats { - pub schedule_wait_time_ms: u64, - pub snapshot_wait_time_ms: u64, - pub wait_wall_time_ms: u64, - pub process_wall_time_ms: u64, + pub schedule_wait_time_ns: u64, + pub snapshot_wait_time_ns: u64, + pub wait_wall_time_ns: u64, + pub process_wall_time_ns: u64, } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 3ba320149ac..43bf20f582b 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -878,7 +878,7 @@ mod tests { /// A unary `RequestHandler` that always produces a fixture. struct UnaryFixture { - handle_duration_millis: u64, + handle_duration: Duration, yieldable: bool, result: Option>, } @@ -886,7 +886,7 @@ mod tests { impl UnaryFixture { pub fn new(result: Result) -> UnaryFixture { UnaryFixture { - handle_duration_millis: 0, + handle_duration: Default::default(), yieldable: false, result: Some(result), } @@ -894,10 +894,10 @@ mod tests { pub fn new_with_duration( result: Result, - handle_duration_millis: u64, + handle_duration: Duration, ) -> UnaryFixture { UnaryFixture { - handle_duration_millis, + handle_duration, yieldable: false, result: Some(result), } @@ -905,10 +905,10 @@ mod tests { pub fn new_with_duration_yieldable( result: Result, - handle_duration_millis: u64, + handle_duration: Duration, ) -> UnaryFixture { UnaryFixture { - handle_duration_millis, + handle_duration, yieldable: true, result: Some(result), } @@ -920,13 +920,15 @@ mod tests { async fn handle_request(&mut self) -> Result> { if self.yieldable { // We split the task into small executions of 100 milliseconds. - for _ in 0..self.handle_duration_millis / 100 { + for _ in 0..self.handle_duration.as_millis() as u64 / 100 { thread::sleep(Duration::from_millis(100)); yatp::task::future::reschedule().await; } - thread::sleep(Duration::from_millis(self.handle_duration_millis % 100)); + thread::sleep(Duration::from_millis( + self.handle_duration.as_millis() as u64 % 100, + )); } else { - thread::sleep(Duration::from_millis(self.handle_duration_millis)); + thread::sleep(self.handle_duration); } self.result.take().unwrap().map(|x| x.into()) @@ -937,7 +939,7 @@ mod tests { struct StreamFixture { result_len: usize, result_iter: vec::IntoIter>, - handle_durations_millis: vec::IntoIter, + handle_durations: vec::IntoIter, nth: usize, } @@ -947,20 +949,20 @@ mod tests { StreamFixture { result_len: len, result_iter: result.into_iter(), - handle_durations_millis: vec![0; len].into_iter(), + handle_durations: vec![Duration::default(); len].into_iter(), nth: 0, } } pub fn new_with_duration( result: Vec>, - handle_durations_millis: Vec, + handle_durations: Vec, ) -> StreamFixture { - assert_eq!(result.len(), handle_durations_millis.len()); + assert_eq!(result.len(), handle_durations.len()); StreamFixture { result_len: result.len(), result_iter: result.into_iter(), - handle_durations_millis: handle_durations_millis.into_iter(), + handle_durations: handle_durations.into_iter(), nth: 0, } } @@ -980,8 +982,8 @@ mod tests { Ok((None, is_finished)) } Some(val) => { - let handle_duration_ms = self.handle_durations_millis.next().unwrap(); - thread::sleep(Duration::from_millis(handle_duration_ms)); + let handle_duration = self.handle_durations.next().unwrap(); + thread::sleep(handle_duration); match val { Ok(resp) => Ok((Some(resp), is_finished)), Err(e) => Err(e), @@ -1198,7 +1200,10 @@ mod tests { context.set_priority(kvrpcpb::CommandPri::Normal); let handler_builder = Box::new(|_, _: &_| { - Ok(UnaryFixture::new_with_duration(Ok(response), 1000).into_boxed()) + Ok( + UnaryFixture::new_with_duration(Ok(response), Duration::from_millis(1000)) + .into_boxed(), + ) }); let future = copr.handle_unary_request(ReqContext::default_for_test(), handler_builder); let tx = tx.clone(); @@ -1466,20 +1471,20 @@ mod tests { use tikv_util::config::ReadableDuration; /// Asserted that the snapshot can be retrieved in 500ms. - const SNAPSHOT_DURATION_MS: u64 = 500; + const SNAPSHOT_DURATION: Duration = Duration::from_millis(500); /// Asserted that the delay caused by OS scheduling other tasks is /// smaller than 200ms. This is mostly for CI. - const HANDLE_ERROR_MS: u64 = 200; + const HANDLE_ERROR: Duration = Duration::from_millis(200); /// The acceptable error range for a coarse timer. Note that we use /// CLOCK_MONOTONIC_COARSE which can be slewed by time /// adjustment code (e.g., NTP, PTP). - const COARSE_ERROR_MS: u64 = 50; + const COARSE_ERROR: Duration = Duration::from_millis(50); /// The duration that payload executes. - const PAYLOAD_SMALL: u64 = 3000; - const PAYLOAD_LARGE: u64 = 6000; + const PAYLOAD_SMALL: Duration = Duration::from_millis(3000); + const PAYLOAD_LARGE: Duration = Duration::from_millis(6000); let engine = TestEngineBuilder::new().build().unwrap(); @@ -1494,7 +1499,7 @@ mod tests { )); let config = Config { - end_point_request_max_handle_duration: ReadableDuration::millis( + end_point_request_max_handle_duration: ReadableDuration( (PAYLOAD_SMALL + PAYLOAD_LARGE) * 2, ), ..Default::default() @@ -1516,7 +1521,7 @@ mod tests { req_with_exec_detail.context.set_record_time_stat(true); { - let mut wait_time: u64 = 0; + let mut wait_time: Duration = Duration::default(); // Request 1: Unary, success response. let handler_builder = Box::new(|_, _: &_| { @@ -1530,7 +1535,7 @@ mod tests { let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_1).unwrap()]).unwrap()); // Sleep a while to make sure that thread is spawn and snapshot is taken. - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); + thread::sleep(SNAPSHOT_DURATION); // Request 2: Unary, error response. let handler_builder = Box::new(|_, _: &_| { @@ -1543,63 +1548,95 @@ mod tests { copr.handle_unary_request(req_with_exec_detail.clone(), handler_builder); let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_2).unwrap()]).unwrap()); - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); + thread::sleep(SNAPSHOT_DURATION); // Response 1 let resp = &rx.recv().unwrap()[0]; assert!(resp.get_other_error().is_empty()); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL + HANDLE_ERROR + COARSE_ERROR ); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time.saturating_sub(HANDLE_ERROR + COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time + HANDLE_ERROR + COARSE_ERROR ); - wait_time += PAYLOAD_SMALL - SNAPSHOT_DURATION_MS; + wait_time += PAYLOAD_SMALL - SNAPSHOT_DURATION; // Response 2 let resp = &rx.recv().unwrap()[0]; assert!(!resp.get_other_error().is_empty()); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE + HANDLE_ERROR + COARSE_ERROR ); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time.saturating_sub(HANDLE_ERROR + COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time + HANDLE_ERROR + COARSE_ERROR + ); + + // check TimeDetail and TimeDetailV2 has the same value. + let time_detail = resp.get_exec_details_v2().get_time_detail(); + let time_detail_v2 = resp.get_exec_details_v2().get_time_detail_v2(); + assert_eq!( + time_detail.get_process_wall_time_ms(), + time_detail_v2.get_process_wall_time_ns() / 1_000_000, + ); + assert_eq!( + time_detail.get_wait_wall_time_ms(), + time_detail_v2.get_wait_wall_time_ns() / 1_000_000, + ); + assert_eq!( + time_detail.get_kv_read_wall_time_ms(), + time_detail_v2.get_kv_read_wall_time_ns() / 1_000_000, ); } @@ -1618,7 +1655,7 @@ mod tests { let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_1).unwrap()]).unwrap()); // Sleep a while to make sure that thread is spawn and snapshot is taken. - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); + thread::sleep(SNAPSHOT_DURATION); // Request 2: Unary, error response. let handler_builder = Box::new(|_, _: &_| { @@ -1631,7 +1668,7 @@ mod tests { copr.handle_unary_request(req_with_exec_detail.clone(), handler_builder); let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_2).unwrap()]).unwrap()); - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); + thread::sleep(SNAPSHOT_DURATION); // Response 1 // @@ -1644,16 +1681,20 @@ mod tests { let resp = &rx.recv().unwrap()[0]; assert!(resp.get_other_error().is_empty()); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL + PAYLOAD_LARGE + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL + PAYLOAD_LARGE + HANDLE_ERROR + COARSE_ERROR ); // Response 2 @@ -1667,21 +1708,25 @@ mod tests { let resp = &rx.recv().unwrap()[0]; assert!(!resp.get_other_error().is_empty()); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL + PAYLOAD_LARGE + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL + PAYLOAD_LARGE + HANDLE_ERROR + COARSE_ERROR ); } { - let mut wait_time: u64 = 0; + let mut wait_time = Duration::default(); // Request 1: Unary, success response. let handler_builder = Box::new(|_, _: &_| { @@ -1695,7 +1740,7 @@ mod tests { let sender = tx.clone(); thread::spawn(move || sender.send(vec![block_on(resp_future_1).unwrap()]).unwrap()); // Sleep a while to make sure that thread is spawn and snapshot is taken. - thread::sleep(Duration::from_millis(SNAPSHOT_DURATION_MS)); + thread::sleep(SNAPSHOT_DURATION); // Request 2: Stream. let handler_builder = Box::new(|_, _: &_| { @@ -1726,92 +1771,116 @@ mod tests { let resp = &rx.recv().unwrap()[0]; assert!(resp.get_other_error().is_empty()); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE + HANDLE_ERROR + COARSE_ERROR ); assert_ge!( - resp.get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time.saturating_sub(HANDLE_ERROR + COARSE_ERROR) ); assert_lt!( - resp.get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp.get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time + HANDLE_ERROR + COARSE_ERROR ); - wait_time += PAYLOAD_LARGE - SNAPSHOT_DURATION_MS; + wait_time += PAYLOAD_LARGE - SNAPSHOT_DURATION; // Response 2 let resp = &rx.recv().unwrap(); assert_eq!(resp.len(), 2); assert!(resp[0].get_other_error().is_empty()); assert_ge!( - resp[0] - .get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp[0] + .get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp[0] - .get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_SMALL + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp[0] + .get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_SMALL + HANDLE_ERROR + COARSE_ERROR ); assert_ge!( - resp[0] - .get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) + Duration::from_nanos( + resp[0] + .get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time.saturating_sub(HANDLE_ERROR + COARSE_ERROR) ); assert_lt!( - resp[0] - .get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp[0] + .get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time + HANDLE_ERROR + COARSE_ERROR ); assert!(!resp[1].get_other_error().is_empty()); assert_ge!( - resp[1] - .get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE.saturating_sub(COARSE_ERROR_MS) + Duration::from_nanos( + resp[1] + .get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE.saturating_sub(COARSE_ERROR) ); assert_lt!( - resp[1] - .get_exec_details() - .get_time_detail() - .get_process_wall_time_ms(), - PAYLOAD_LARGE + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp[1] + .get_exec_details_v2() + .get_time_detail_v2() + .get_process_wall_time_ns() + ), + PAYLOAD_LARGE + HANDLE_ERROR + COARSE_ERROR ); assert_ge!( - resp[1] - .get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time.saturating_sub(HANDLE_ERROR_MS + COARSE_ERROR_MS) + Duration::from_nanos( + resp[1] + .get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time.saturating_sub(HANDLE_ERROR + COARSE_ERROR) ); assert_lt!( - resp[1] - .get_exec_details() - .get_time_detail() - .get_wait_wall_time_ms(), - wait_time + HANDLE_ERROR_MS + COARSE_ERROR_MS + Duration::from_nanos( + resp[1] + .get_exec_details_v2() + .get_time_detail_v2() + .get_wait_wall_time_ns() + ), + wait_time + HANDLE_ERROR + COARSE_ERROR ); } } @@ -1848,10 +1917,11 @@ mod tests { { let handler_builder = Box::new(|_, _: &_| { - Ok( - UnaryFixture::new_with_duration_yieldable(Ok(coppb::Response::default()), 1500) - .into_boxed(), + Ok(UnaryFixture::new_with_duration_yieldable( + Ok(coppb::Response::default()), + Duration::from_millis(1500), ) + .into_boxed()) }); let mut config = ReqContext::default_for_test(); diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 9c0b79ff8b8..ca726be9a43 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -67,7 +67,7 @@ pub struct Tracker { total_process_time: Duration, total_storage_stats: Statistics, slow_log_threshold: Duration, - scan_process_time_ms: u64, + scan_process_time_ns: u64, pub buckets: Option>, @@ -96,7 +96,7 @@ impl Tracker { total_suspend_time: Duration::default(), total_process_time: Duration::default(), total_storage_stats: Statistics::default(), - scan_process_time_ms: 0, + scan_process_time_ns: 0, slow_log_threshold, req_ctx, buckets: None, @@ -175,7 +175,7 @@ impl Tracker { } pub fn collect_scan_process_time(&mut self, exec_summary: ExecSummary) { - self.scan_process_time_ms = (exec_summary.time_processed_ns / 1000000) as u64; + self.scan_process_time_ns = exec_summary.time_processed_ns as u64; } /// Get current item's ExecDetail according to previous collected metrics. @@ -183,7 +183,7 @@ impl Tracker { /// WARN: TRY BEST NOT TO USE THIS FUNCTION. pub fn get_item_exec_details(&self) -> (kvrpcpb::ExecDetails, kvrpcpb::ExecDetailsV2) { if let TrackerState::ItemFinished(_) = self.current_stage { - self.exec_details(self.item_process_time) + self.exec_details(self.item_process_time, self.item_suspend_time) } else { unreachable!() } @@ -194,27 +194,39 @@ impl Tracker { pub fn get_exec_details(&self) -> (kvrpcpb::ExecDetails, kvrpcpb::ExecDetailsV2) { if let TrackerState::ItemFinished(_) = self.current_stage { // TODO: Separate process time and suspend time - self.exec_details(self.total_process_time + self.total_suspend_time) + self.exec_details(self.total_process_time, self.total_suspend_time) } else { unreachable!() } } - fn exec_details(&self, measure: Duration) -> (kvrpcpb::ExecDetails, kvrpcpb::ExecDetailsV2) { + fn exec_details( + &self, + process_time: Duration, + suspend_time: Duration, + ) -> (kvrpcpb::ExecDetails, kvrpcpb::ExecDetailsV2) { // For compatibility, ExecDetails field is still filled. let mut exec_details = kvrpcpb::ExecDetails::default(); + // TimeDetail is deprecated, we only keep it for backward compatibility. let mut td = kvrpcpb::TimeDetail::default(); - td.set_process_wall_time_ms(time::duration_to_ms(measure)); + td.set_process_wall_time_ms(time::duration_to_ms(process_time)); td.set_wait_wall_time_ms(time::duration_to_ms(self.wait_time)); - td.set_kv_read_wall_time_ms(self.scan_process_time_ms); + td.set_kv_read_wall_time_ms(self.scan_process_time_ns / 1_000_000); exec_details.set_time_detail(td.clone()); let detail = self.total_storage_stats.scan_detail(); exec_details.set_scan_detail(detail); + let mut td_v2 = kvrpcpb::TimeDetailV2::default(); + td_v2.set_process_wall_time_ns(process_time.as_nanos() as u64); + td_v2.set_process_suspend_wall_time_ns(suspend_time.as_nanos() as u64); + td_v2.set_wait_wall_time_ns(self.wait_time.as_nanos() as u64); + td_v2.set_kv_read_wall_time_ns(self.scan_process_time_ns); + let mut exec_details_v2 = kvrpcpb::ExecDetailsV2::default(); exec_details_v2.set_time_detail(td); + exec_details_v2.set_time_detail_v2(td_v2); let mut detail_v2 = ScanDetailV2::default(); detail_v2.set_processed_versions(self.total_storage_stats.write.processed_keys as u64); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index f0d0009b8e6..6fc3a3ebd76 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1,7 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath]: TiKV gRPC APIs implementation -use std::{mem, sync::Arc}; +use std::{mem, sync::Arc, time::Duration}; use api_version::KvFormat; use fail::fail_point; @@ -27,12 +27,12 @@ use raftstore::{ Error as RaftStoreError, Result as RaftStoreResult, }; use tikv_alloc::trace::MemoryTraceGuard; -use tikv_kv::RaftExtension; +use tikv_kv::{RaftExtension, StageLatencyStats}; use tikv_util::{ future::{paired_future_callback, poll_future_notify}, mpsc::future::{unbounded, BatchReceiver, Sender, WakePolicy}, sys::memory_usage_reaches_high_water, - time::{duration_to_ms, duration_to_sec, Instant}, + time::Instant, worker::Scheduler, }; use tracker::{set_tls_tracker_token, RequestInfo, RequestType, Tracker, GLOBAL_TRACKERS}; @@ -214,6 +214,10 @@ macro_rules! set_total_time { .mut_exec_details_v2() .mut_time_detail() .set_total_rpc_wall_time_ns($duration.as_nanos() as u64); + $resp + .mut_exec_details_v2() + .mut_time_detail_v2() + .set_total_rpc_wall_time_ns($duration.as_nanos() as u64); }; } @@ -578,7 +582,7 @@ impl Tikv for Service { Ok(_) => { GRPC_MSG_HISTOGRAM_STATIC .coprocessor_stream - .observe(duration_to_sec(begin_instant.saturating_elapsed())); + .observe(begin_instant.saturating_elapsed().as_secs_f64()); let _ = sink.close().await; } Err(e) => { @@ -795,7 +799,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .split_region - .observe(duration_to_sec(begin_instant.saturating_elapsed())); + .observe(begin_instant.saturating_elapsed().as_secs_f64()); ServerResult::Ok(()) } .map_err(|e| { @@ -1220,6 +1224,9 @@ fn handle_measures_for_batch_commands(measures: &mut MeasuredBatchResponse) { exec_details .mut_time_detail() .set_total_rpc_wall_time_ns(elapsed.as_nanos() as u64); + exec_details + .mut_time_detail_v2() + .set_total_rpc_wall_time_ns(elapsed.as_nanos() as u64); } } } @@ -1262,7 +1269,7 @@ fn future_get( async move { let v = v.await; - let duration_ms = duration_to_ms(start.saturating_elapsed()); + let duration = start.saturating_elapsed(); let mut resp = GetResponse::default(); if let Some(err) = extract_region_error(&v) { resp.set_region_error(err); @@ -1275,10 +1282,7 @@ fn future_get( GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { tracker.write_scan_detail(scan_detail_v2); }); - let time_detail = exec_detail_v2.mut_time_detail(); - time_detail.set_kv_read_wall_time_ms(duration_ms); - time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms); - time_detail.set_process_wall_time_ms(stats.latency_stats.process_wall_time_ms); + set_time_detail(exec_detail_v2, duration, &stats.latency_stats); match val { Some(val) => resp.set_value(val), None => resp.set_not_found(true), @@ -1292,6 +1296,29 @@ fn future_get( } } +fn set_time_detail( + exec_detail_v2: &mut ExecDetailsV2, + total_dur: Duration, + stats: &StageLatencyStats, +) { + let duration_ns = total_dur.as_nanos() as u64; + // deprecated. we will remove the `time_detail` field in future version. + { + let time_detail = exec_detail_v2.mut_time_detail(); + time_detail.set_kv_read_wall_time_ms(duration_ns / 1_000_000); + time_detail.set_wait_wall_time_ms(stats.wait_wall_time_ns / 1_000_000); + time_detail.set_process_wall_time_ms(stats.process_wall_time_ns / 1_000_000); + } + + let time_detail_v2 = exec_detail_v2.mut_time_detail_v2(); + time_detail_v2.set_kv_read_wall_time_ns(duration_ns); + time_detail_v2.set_wait_wall_time_ns(stats.wait_wall_time_ns); + time_detail_v2.set_process_wall_time_ns(stats.process_wall_time_ns); + // currently, the schedule suspend_wall_time is always 0 for get and + // batch_get. TODO: once we support aync-io, we may also count the + // schedule suspend duration here. +} + fn future_scan( storage: &Storage, mut req: ScanRequest, @@ -1356,7 +1383,7 @@ fn future_batch_get( async move { let v = v.await; - let duration_ms = duration_to_ms(start.saturating_elapsed()); + let duration = start.saturating_elapsed(); let mut resp = BatchGetResponse::default(); if let Some(err) = extract_region_error(&v) { resp.set_region_error(err); @@ -1370,10 +1397,7 @@ fn future_batch_get( GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { tracker.write_scan_detail(scan_detail_v2); }); - let time_detail = exec_detail_v2.mut_time_detail(); - time_detail.set_kv_read_wall_time_ms(duration_ms); - time_detail.set_wait_wall_time_ms(stats.latency_stats.wait_wall_time_ms); - time_detail.set_process_wall_time_ms(stats.latency_stats.process_wall_time_ms); + set_time_detail(exec_detail_v2, duration, &stats.latency_stats); resp.set_pairs(pairs.into()); } Err(e) => { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8c58274bc33..8f955f3850d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -713,10 +713,10 @@ impl Storage { let process_wall_time = stage_finished_ts.saturating_duration_since(stage_snap_recv_ts); let latency_stats = StageLatencyStats { - schedule_wait_time_ms: duration_to_ms(schedule_wait_time), - snapshot_wait_time_ms: duration_to_ms(snapshot_wait_time), - wait_wall_time_ms: duration_to_ms(wait_wall_time), - process_wall_time_ms: duration_to_ms(process_wall_time), + schedule_wait_time_ns: schedule_wait_time.as_nanos() as u64, + snapshot_wait_time_ns: snapshot_wait_time.as_nanos() as u64, + wait_wall_time_ns: wait_wall_time.as_nanos() as u64, + process_wall_time_ns: process_wall_time.as_nanos() as u64, }; with_tls_tracker(|tracker| { tracker.metrics.read_pool_schedule_wait_nanos = @@ -1077,10 +1077,10 @@ impl Storage { schedule_wait_time.as_nanos() as u64; }); let latency_stats = StageLatencyStats { - schedule_wait_time_ms: duration_to_ms(schedule_wait_time), - snapshot_wait_time_ms: duration_to_ms(snapshot_wait_time), - wait_wall_time_ms: duration_to_ms(wait_wall_time), - process_wall_time_ms: duration_to_ms(process_wall_time), + schedule_wait_time_ns: duration_to_ms(schedule_wait_time), + snapshot_wait_time_ns: duration_to_ms(snapshot_wait_time), + wait_wall_time_ns: duration_to_ms(wait_wall_time), + process_wall_time_ns: duration_to_ms(process_wall_time), }; Ok(( result?, diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 102d695b2de..8ff9e2f93af 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -22,6 +22,7 @@ use raftstore::{ coprocessor::{Config as CopConfig, ConsistencyCheckMethod}, store::Config as RaftstoreConfig, }; +use resource_control::Config as ResourceControlConfig; use security::SecurityConfig; use slog::Level; use test_util::assert_eq_debug; @@ -825,6 +826,7 @@ fn test_serde_custom_tikv_config() { renew_batch_max_size: 8192, alloc_ahead_buffer: ReadableDuration::millis(3000), }; + value.resource_control = ResourceControlConfig { enabled: false }; let custom = read_file_in_project_dir("integrations/config/test-custom.toml"); let load = toml::from_str(&custom).unwrap(); diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 722bdf0c56b..ecab04350b6 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -688,3 +688,6 @@ sample-threshold = 100 byte-threshold = 31457280 split.split-balance-score = 0.25 split.split-contained-score = 0.5 + +[resource-control] +enabled = false diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index fe545d07ec1..d5f8d55e320 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -303,6 +303,7 @@ fn test_scan_detail() { assert_eq!(scan_detail.get_lock().get_total(), 1); assert!(resp.get_exec_details_v2().has_time_detail()); + assert!(resp.get_exec_details_v2().has_time_detail_v2()); let scan_detail_v2 = resp.get_exec_details_v2().get_scan_detail_v2(); assert_eq!(scan_detail_v2.get_total_versions(), 5); assert_eq!(scan_detail_v2.get_processed_versions(), 4); @@ -1017,6 +1018,7 @@ fn test_del_select() { assert_eq!(row_count, 5); assert!(resp.get_exec_details_v2().has_time_detail()); + assert!(resp.get_exec_details_v2().has_time_detail_v2()); let scan_detail_v2 = resp.get_exec_details_v2().get_scan_detail_v2(); assert_eq!(scan_detail_v2.get_total_versions(), 8); assert_eq!(scan_detail_v2.get_processed_versions(), 5); @@ -1722,6 +1724,7 @@ fn test_exec_details() { assert!(resp.has_exec_details_v2()); let exec_details = resp.get_exec_details_v2(); assert!(exec_details.has_time_detail()); + assert!(exec_details.has_time_detail_v2()); assert!(exec_details.has_scan_detail_v2()); } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 44d16961f7d..284a3f1cb89 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -290,6 +290,7 @@ fn test_mvcc_basic() { assert!(!get_resp.has_region_error()); assert!(!get_resp.has_error()); assert!(get_resp.get_exec_details_v2().has_time_detail()); + assert!(get_resp.get_exec_details_v2().has_time_detail_v2()); let scan_detail_v2 = get_resp.get_exec_details_v2().get_scan_detail_v2(); assert_eq!(scan_detail_v2.get_total_versions(), 1); assert_eq!(scan_detail_v2.get_processed_versions(), 1); @@ -322,6 +323,7 @@ fn test_mvcc_basic() { batch_get_req.version = batch_get_version; let batch_get_resp = client.kv_batch_get(&batch_get_req).unwrap(); assert!(batch_get_resp.get_exec_details_v2().has_time_detail()); + assert!(batch_get_resp.get_exec_details_v2().has_time_detail_v2()); let scan_detail_v2 = batch_get_resp.get_exec_details_v2().get_scan_detail_v2(); assert_eq!(scan_detail_v2.get_total_versions(), 1); assert_eq!(scan_detail_v2.get_processed_versions(), 1); @@ -2312,6 +2314,7 @@ fn test_txn_api_version() { assert!(!get_resp.has_region_error()); assert!(!get_resp.has_error()); assert!(get_resp.get_exec_details_v2().has_time_detail()); + assert!(get_resp.get_exec_details_v2().has_time_detail_v2()); } { // Pessimistic Lock @@ -2491,10 +2494,20 @@ fn test_rpc_wall_time() { assert!( get_resp .get_exec_details_v2() - .get_time_detail() + .get_time_detail_v2() .get_total_rpc_wall_time_ns() > 0 ); + assert_eq!( + get_resp + .get_exec_details_v2() + .get_time_detail_v2() + .get_total_rpc_wall_time_ns(), + get_resp + .get_exec_details_v2() + .get_time_detail() + .get_total_rpc_wall_time_ns() + ); let (mut sender, receiver) = client.batch_commands().unwrap(); let mut batch_req = BatchCommandsRequest::default(); @@ -2525,7 +2538,7 @@ fn test_rpc_wall_time() { assert!( resp.get_get() .get_exec_details_v2() - .get_time_detail() + .get_time_detail_v2() .get_total_rpc_wall_time_ns() > 0 ); From ee57a81270f2def812336a6c5f3395c0cafa7ded Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 13 Mar 2023 12:08:37 +0800 Subject: [PATCH 0579/1149] raftstore-v2: fix ingest codec (#14373) ref tikv/tikv#12842 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/command/mod.rs | 2 +- .../src/operation/command/write/ingest.rs | 19 +++++++++++------ .../operation/command/write/simple_write.rs | 21 ++++++++++++++++++- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index df289a26f4a..f14c2c905a3 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -585,7 +585,7 @@ impl Apply { )?; } SimpleWrite::Ingest(ssts) => { - self.apply_ingest(ssts)?; + self.apply_ingest(log_index, ssts)?; } } } diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index c39fc25a28b..73459740393 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -2,7 +2,7 @@ use collections::HashMap; use crossbeam::channel::TrySendError; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{data_cf_offset, KvEngine, RaftEngine}; use kvproto::import_sstpb::SstMeta; use raftstore::{ store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, @@ -82,10 +82,15 @@ impl Peer { impl Apply { #[inline] - pub fn apply_ingest(&mut self, ssts: Vec) -> Result<()> { + pub fn apply_ingest(&mut self, index: u64, ssts: Vec) -> Result<()> { PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); let mut infos = Vec::with_capacity(ssts.len()); for sst in &ssts { + // This may not be enough as ingest sst may not trigger flush at all. + let off = data_cf_offset(sst.get_cf_name()); + if self.should_skip(off, index) { + continue; + } if let Err(e) = check_sst_for_ingestion(sst, self.region()) { error!( self.logger, @@ -104,10 +109,12 @@ impl Apply { } } } - // Unlike v1, we can't batch ssts accross regions. - self.flush(); - if let Err(e) = self.sst_importer().ingest(&infos, self.tablet()) { - slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); + if !infos.is_empty() { + // Unlike v1, we can't batch ssts accross regions. + self.flush(); + if let Err(e) = self.sst_importer().ingest(&infos, self.tablet()) { + slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); + } } Ok(()) } diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore-v2/src/operation/command/write/simple_write.rs index a2c378cb04b..5f72fa62738 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore-v2/src/operation/command/write/simple_write.rs @@ -463,7 +463,8 @@ fn decode<'a>(buf: &mut &'a [u8]) -> Option> { }; ssts.push(sst); } - *buf = left; + let read = is.pos(); + *buf = &left[read as usize..]; Some(SimpleWrite::Ingest(ssts)) } tag => panic!("corrupted data: invalid tag {}", tag), @@ -532,6 +533,24 @@ mod tests { let res = decoder.next(); assert!(res.is_none(), "{:?}", res); + + let mut encoder = SimpleWriteEncoder::with_capacity(512); + let exp: Vec<_> = (0..10) + .map(|id| { + let mut meta = SstMeta::default(); + meta.set_region_id(id); + meta + }) + .collect(); + encoder.ingest(exp.clone()); + let bin = encoder.encode(); + let req_encoder = SimpleWriteReqEncoder::new(header, bin, 0, false); + let (bytes, _) = req_encoder.encode(); + let mut decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + let write = decoder.next().unwrap(); + let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; + assert_eq!(exp, ssts); + assert_matches!(decoder.next(), None); } #[test] From 571e513d6c2089ba4ceaf50051dbf81fd221db8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 13 Mar 2023 16:22:40 +0800 Subject: [PATCH 0580/1149] log-backup: added intervally resolve regions (#14180) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/tikv#13638 This PR added a “two phase” flush to log backup for reducing checkpoint lag. Generally, we added a `MinTs` task, where resolve the regions and advance the `resolved_ts` in the checkpoint manager. then, once we are doing flush, we would make current `resolved_ts` become `checkpoint_ts`. This allows us to advance checkpoint_ts even the leader has gone. When the leader changes frequently, this can greatly reduce checkpoint lag. Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 235 +++++++++++------- components/backup-stream/src/endpoint.rs | 93 ++++++- components/backup-stream/src/metrics.rs | 6 + .../backup-stream/src/subscription_manager.rs | 22 +- components/backup-stream/tests/mod.rs | 61 ++++- src/config/mod.rs | 17 ++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 8 files changed, 326 insertions(+), 110 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 47ec34d2113..50a6ac27864 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -1,10 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashMap, sync::Arc, time::Duration}; +use std::{cell::RefCell, collections::HashMap, sync::Arc, time::Duration}; use futures::{ channel::mpsc::{self as async_mpsc, Receiver, Sender}, - SinkExt, StreamExt, + future::BoxFuture, + FutureExt, SinkExt, StreamExt, }; use grpcio::{RpcStatus, RpcStatusCode, ServerStreamingSink, WriteFlags}; use kvproto::{ @@ -13,7 +14,7 @@ use kvproto::{ metapb::Region, }; use pd_client::PdClient; -use tikv_util::{box_err, defer, info, warn, worker::Scheduler}; +use tikv_util::{box_err, defer, info, time::Instant, warn, worker::Scheduler}; use txn_types::TimeStamp; use uuid::Uuid; @@ -22,7 +23,9 @@ use crate::{ errors::{Error, ReportableResult, Result}, future, metadata::{store::MetaStore, Checkpoint, CheckpointProvider, MetadataClient}, - metrics, try_send, RegionCheckpointOperation, Task, + metrics, + subscription_track::ResolveResult, + try_send, RegionCheckpointOperation, Task, }; /// A manager for maintaining the last flush ts. @@ -31,14 +34,16 @@ use crate::{ /// checkpoint then advancing the global checkpoint. #[derive(Default)] pub struct CheckpointManager { - items: HashMap, + checkpoint_ts: HashMap, + resolved_ts: HashMap, manager_handle: Option>, } impl std::fmt::Debug for CheckpointManager { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("CheckpointManager") - .field("items", &self.items) + .field("checkpoints", &self.checkpoint_ts) + .field("resolved-ts", &self.resolved_ts) .finish() } } @@ -60,49 +65,59 @@ impl SubscriptionManager { while let Some(msg) = self.input.next().await { match msg { SubscriptionOp::Add(sub) => { - self.subscribers.insert(Uuid::new_v4(), sub); + let uid = Uuid::new_v4(); + info!("log backup adding new subscriber"; "id" => %uid); + self.subscribers.insert(uid, sub); } SubscriptionOp::Emit(events) => { - let mut canceled = vec![]; - for (id, sub) in &mut self.subscribers { - let send_all = async { - for es in events.chunks(1024) { - let mut resp = SubscribeFlushEventResponse::new(); - resp.set_events(es.to_vec().into()); - sub.feed((resp, WriteFlags::default())).await?; - } - sub.flush().await - }; - - match send_all.await { - Err(grpcio::Error::RemoteStopped) => { - canceled.push(*id); - } - Err(err) => { - Error::from(err).report("sending subscription"); - } - _ => {} - } - } - - for c in canceled { - match self.subscribers.remove(&c) { - Some(mut sub) => { - info!("client is gone, removing subscription"; "id" => %c); - sub.close().await.report_if_err(format_args!( - "during removing subscription {}", - c - )) - } - None => { - warn!("BUG: the subscriber has been removed before we are going to remove it."; "id" => %c); - } - } - } + self.emit_events(events).await; } } } } + + async fn emit_events(&mut self, events: Box<[FlushEvent]>) { + let mut canceled = vec![]; + info!("log backup sending events"; "event_len" => %events.len(), "downstream" => %self.subscribers.len()); + for (id, sub) in &mut self.subscribers { + let send_all = async { + for es in events.chunks(1024) { + let mut resp = SubscribeFlushEventResponse::new(); + resp.set_events(es.to_vec().into()); + sub.feed((resp, WriteFlags::default())).await?; + } + sub.flush().await + }; + + match send_all.await { + Err(grpcio::Error::RemoteStopped) => { + canceled.push(*id); + } + Err(err) => { + Error::from(err).report("sending subscription"); + } + _ => {} + } + } + + for c in canceled { + self.remove_subscription(&c).await; + } + } + + async fn remove_subscription(&mut self, id: &Uuid) { + match self.subscribers.remove(id) { + Some(mut sub) => { + info!("client is gone, removing subscription"; "id" => %id); + sub.close() + .await + .report_if_err(format_args!("during removing subscription {}", id)) + } + None => { + warn!("BUG: the subscriber has been removed before we are going to remove it."; "id" => %id); + } + } + } } // Note: can we make it more generic...? @@ -154,11 +169,6 @@ impl GetCheckpointResult { } impl CheckpointManager { - /// clear the manager. - pub fn clear(&mut self) { - self.items.clear(); - } - pub fn spawn_subscription_mgr(&mut self) -> future![()] { let (tx, rx) = async_mpsc::channel(1024); let sub = SubscriptionManager { @@ -169,25 +179,67 @@ impl CheckpointManager { sub.main_loop() } - pub fn update_region_checkpoints(&mut self, region_and_checkpoint: Vec<(Region, TimeStamp)>) { - for (region, checkpoint) in ®ion_and_checkpoint { - self.do_update(region, *checkpoint); + pub fn resolve_regions(&mut self, region_and_checkpoint: Vec) { + for res in region_and_checkpoint { + self.do_update(res.region, res.checkpoint); } + } - self.notify(region_and_checkpoint.into_iter()); + pub fn flush(&mut self) { + info!("log backup checkpoint manager flushing."; "resolved_ts_len" => %self.resolved_ts.len(), "resolved_ts" => ?self.get_resolved_ts()); + self.checkpoint_ts = std::mem::take(&mut self.resolved_ts); + // Clippy doesn't know this iterator borrows `self.checkpoint_ts` :( + #[allow(clippy::needless_collect)] + let items = self + .checkpoint_ts + .values() + .cloned() + .map(|x| (x.region, x.checkpoint)) + .collect::>(); + self.notify(items.into_iter()); } /// update a region checkpoint in need. #[cfg(test)] pub fn update_region_checkpoint(&mut self, region: &Region, checkpoint: TimeStamp) { - self.do_update(region, checkpoint); - self.notify(std::iter::once((region.clone(), checkpoint))); + Self::update_ts(&mut self.checkpoint_ts, region.clone(), checkpoint) + } + + fn update_ts( + container: &mut HashMap, + region: Region, + checkpoint: TimeStamp, + ) { + let e = container.entry(region.get_id()); + let ver = region.get_region_epoch().get_version(); + // A hacky way to allow the two closures move out the region. + // It is safe given the two closures would only be called once. + let r = RefCell::new(Some(region)); + e.and_modify(|old_cp| { + let old_ver = old_cp.region.get_region_epoch().get_version(); + let checkpoint_is_newer = old_cp.checkpoint < checkpoint; + if old_ver < ver || (old_ver == ver && checkpoint_is_newer) { + *old_cp = LastFlushTsOfRegion { + checkpoint, + region: r.borrow_mut().take().expect( + "unreachable: `and_modify` and `or_insert_with` called at the same time.", + ), + }; + } + }) + .or_insert_with(|| LastFlushTsOfRegion { + checkpoint, + region: r + .borrow_mut() + .take() + .expect("unreachable: `and_modify` and `or_insert_with` called at the same time."), + }); } - pub fn add_subscriber(&mut self, sub: Subscription) -> future![Result<()>] { + pub fn add_subscriber(&mut self, sub: Subscription) -> BoxFuture<'static, Result<()>> { let mgr = self.manager_handle.as_ref().cloned(); let initial_data = self - .items + .checkpoint_ts .values() .map(|v| FlushEvent { start_key: v.region.start_key.clone(), @@ -225,6 +277,7 @@ impl CheckpointManager { })?; Ok(()) } + .boxed() } fn notify(&mut self, items: impl Iterator) { @@ -248,28 +301,13 @@ impl CheckpointManager { } } - fn do_update(&mut self, region: &Region, checkpoint: TimeStamp) { - let e = self.items.entry(region.get_id()); - e.and_modify(|old_cp| { - if old_cp.checkpoint < checkpoint - && old_cp.region.get_region_epoch().get_version() - <= region.get_region_epoch().get_version() - { - *old_cp = LastFlushTsOfRegion { - checkpoint, - region: region.clone(), - }; - } - }) - .or_insert_with(|| LastFlushTsOfRegion { - checkpoint, - region: region.clone(), - }); + fn do_update(&mut self, region: Region, checkpoint: TimeStamp) { + Self::update_ts(&mut self.resolved_ts, region, checkpoint) } /// get checkpoint from a region. pub fn get_from_region(&self, region: RegionIdWithVersion) -> GetCheckpointResult { - let checkpoint = self.items.get(®ion.region_id); + let checkpoint = self.checkpoint_ts.get(®ion.region_id); if checkpoint.is_none() { return GetCheckpointResult::not_found(region); } @@ -282,7 +320,11 @@ impl CheckpointManager { /// get all checkpoints stored. pub fn get_all(&self) -> Vec { - self.items.values().cloned().collect() + self.checkpoint_ts.values().cloned().collect() + } + + pub fn get_resolved_ts(&self) -> Option { + self.resolved_ts.values().map(|x| x.checkpoint).min() } } @@ -333,7 +375,7 @@ pub struct LastFlushTsOfRegion { #[async_trait::async_trait] pub trait FlushObserver: Send + 'static { /// The callback when the flush has advanced the resolver. - async fn before(&mut self, checkpoints: Vec<(Region, TimeStamp)>); + async fn before(&mut self, checkpoints: Vec); /// The callback when the flush is done. (Files are fully written to /// external storage.) async fn after(&mut self, task: &str, rts: u64) -> Result<()>; @@ -363,7 +405,7 @@ impl BasicFlushObserver { #[async_trait::async_trait] impl FlushObserver for BasicFlushObserver { - async fn before(&mut self, _checkpoints: Vec<(Region, TimeStamp)>) {} + async fn before(&mut self, _checkpoints: Vec) {} async fn after(&mut self, task: &str, rts: u64) -> Result<()> { if let Err(err) = self @@ -401,8 +443,9 @@ pub struct CheckpointV3FlushObserver { sched: Scheduler, meta_cli: MetadataClient, - checkpoints: Vec<(Region, TimeStamp)>, + checkpoints: Vec, global_checkpoint_cache: HashMap, + start_time: Instant, } impl CheckpointV3FlushObserver { @@ -414,6 +457,7 @@ impl CheckpointV3FlushObserver { // We almost always have only one entry. global_checkpoint_cache: HashMap::with_capacity(1), baseline, + start_time: Instant::now(), } } } @@ -443,15 +487,19 @@ where S: MetaStore + 'static, O: FlushObserver + Send, { - async fn before(&mut self, checkpoints: Vec<(Region, TimeStamp)>) { + async fn before(&mut self, checkpoints: Vec) { self.checkpoints = checkpoints; } async fn after(&mut self, task: &str, _rts: u64) -> Result<()> { - let t = Task::RegionCheckpointsOp(RegionCheckpointOperation::Update(std::mem::take( - &mut self.checkpoints, - ))); - try_send!(self.sched, t); + let resolve_task = Task::RegionCheckpointsOp(RegionCheckpointOperation::Resolved { + checkpoints: std::mem::take(&mut self.checkpoints), + start_time: self.start_time, + }); + let flush_task = Task::RegionCheckpointsOp(RegionCheckpointOperation::Flush); + try_send!(self.sched, resolve_task); + try_send!(self.sched, flush_task); + let global_checkpoint = self.get_checkpoint(task).await?; info!("getting global checkpoint from cache for updating."; "checkpoint" => ?global_checkpoint); self.baseline @@ -499,6 +547,26 @@ pub mod tests { r } + #[test] + fn test_flush() { + let mut mgr = super::CheckpointManager::default(); + mgr.do_update(region(1, 32, 8), TimeStamp::new(8)); + mgr.do_update(region(2, 34, 8), TimeStamp::new(15)); + mgr.do_update(region(2, 35, 8), TimeStamp::new(16)); + mgr.do_update(region(2, 35, 8), TimeStamp::new(14)); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::NotFound { .. }); + + mgr.flush(); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok { checkpoint , .. } if checkpoint.into_inner() == 8); + let r = mgr.get_from_region(RegionIdWithVersion::new(2, 35)); + assert_matches::assert_matches!(r, GetCheckpointResult::Ok { checkpoint , .. } if checkpoint.into_inner() == 16); + mgr.flush(); + let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); + assert_matches::assert_matches!(r, GetCheckpointResult::NotFound { .. }); + } + #[test] fn test_mgr() { let mut mgr = super::CheckpointManager::default(); @@ -510,6 +578,7 @@ pub mod tests { assert_matches::assert_matches!(r, GetCheckpointResult::EpochNotMatch { .. }); let r = mgr.get_from_region(RegionIdWithVersion::new(3, 44)); assert_matches::assert_matches!(r, GetCheckpointResult::NotFound { .. }); + mgr.update_region_checkpoint(®ion(1, 30, 8), TimeStamp::new(16)); let r = mgr.get_from_region(RegionIdWithVersion::new(1, 32)); assert_matches::assert_matches!(r, GetCheckpointResult::Ok{checkpoint, ..} if checkpoint.into_inner() == 8); diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index a13c52c9212..49ca811285b 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -56,7 +56,7 @@ use crate::{ observer::BackupStreamObserver, router::{ApplyEvents, Router, TaskSelector}, subscription_manager::{RegionSubscriptionManager, ResolvedRegions}, - subscription_track::SubscriptionTracer, + subscription_track::{ResolveResult, SubscriptionTracer}, try_send, utils::{self, CallbackWaitGroup, StopWatch, Work}, }; @@ -93,7 +93,6 @@ pub struct Endpoint { failover_time: Option, // We holds the config before, even it is useless for now, // however probably it would be useful in the future. - #[allow(dead_code)] config: BackupStreamConfig, checkpoint_mgr: CheckpointManager, } @@ -184,7 +183,7 @@ where pool.spawn(op_loop); let mut checkpoint_mgr = CheckpointManager::default(); pool.spawn(checkpoint_mgr.spawn_subscription_mgr()); - Endpoint { + let ep = Endpoint { meta_client, range_router, scheduler, @@ -203,7 +202,9 @@ where failover_time: None, config, checkpoint_mgr, - } + }; + ep.pool.spawn(ep.min_ts_worker()); + ep } } @@ -763,7 +764,7 @@ where let mut resolved = get_rts.await?; let mut new_rts = resolved.global_checkpoint(); fail::fail_point!("delay_on_flush"); - flush_ob.before(resolved.take_region_checkpoints()).await; + flush_ob.before(resolved.take_resolve_result()).await; if let Some(rewritten_rts) = flush_ob.rewrite_resolved_ts(&task).await { info!("rewriting resolved ts"; "old" => %new_rts, "new" => %rewritten_rts); new_rts = rewritten_rts.min(new_rts); @@ -919,13 +920,31 @@ where } } + fn min_ts_worker(&self) -> future![()] { + let sched = self.scheduler.clone(); + let interval = self.config.min_ts_interval.0; + async move { + loop { + tokio::time::sleep(interval).await; + try_send!( + sched, + Task::RegionCheckpointsOp(RegionCheckpointOperation::PrepareMinTsForResolve) + ); + } + } + } + pub fn handle_region_checkpoints_op(&mut self, op: RegionCheckpointOperation) { match op { - RegionCheckpointOperation::Update(u) => { - // Let's clear all stale checkpoints first. - // Or they may slow down the global checkpoint. - self.checkpoint_mgr.clear(); - self.checkpoint_mgr.update_region_checkpoints(u); + RegionCheckpointOperation::Resolved { + checkpoints, + start_time, + } => { + self.checkpoint_mgr.resolve_regions(checkpoints); + metrics::MIN_TS_RESOLVE_DURATION.observe(start_time.saturating_elapsed_secs()); + } + RegionCheckpointOperation::Flush => { + self.checkpoint_mgr.flush(); } RegionCheckpointOperation::Get(g, cb) => { let _guard = self.pool.handle().enter(); @@ -953,6 +972,37 @@ where } }); } + RegionCheckpointOperation::PrepareMinTsForResolve => { + let min_ts = self.pool.block_on(self.prepare_min_ts()); + let start_time = Instant::now(); + // We need to reschedule the `Resolve` task to queue, because the subscription + // is asynchronous -- there may be transactions committed before + // the min_ts we prepared but haven't been observed yet. + try_send!( + self.scheduler, + Task::RegionCheckpointsOp(RegionCheckpointOperation::Resolve { + min_ts, + start_time + }) + ); + } + RegionCheckpointOperation::Resolve { min_ts, start_time } => { + let sched = self.scheduler.clone(); + try_send!( + self.scheduler, + Task::ModifyObserve(ObserveOp::ResolveRegions { + callback: Box::new(move |mut resolved| { + let t = + Task::RegionCheckpointsOp(RegionCheckpointOperation::Resolved { + checkpoints: resolved.take_resolve_result(), + start_time, + }); + try_send!(sched, t); + }), + min_ts + }) + ); + } } } @@ -997,7 +1047,16 @@ pub enum RegionSet { } pub enum RegionCheckpointOperation { - Update(Vec<(Region, TimeStamp)>), + Flush, + PrepareMinTsForResolve, + Resolve { + min_ts: TimeStamp, + start_time: Instant, + }, + Resolved { + checkpoints: Vec, + start_time: Instant, + }, Get(RegionSet, Box) + Send>), Subscribe(Subscription), } @@ -1005,9 +1064,17 @@ pub enum RegionCheckpointOperation { impl fmt::Debug for RegionCheckpointOperation { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Update(arg0) => f.debug_tuple("Update").field(arg0).finish(), + Self::Flush => f.debug_tuple("Flush").finish(), Self::Get(arg0, _) => f.debug_tuple("Get").field(arg0).finish(), + Self::Subscribe(_) => f.debug_tuple("Subscription").finish(), + Self::Resolved { checkpoints, .. } => { + f.debug_tuple("Resolved").field(checkpoints).finish() + } + Self::PrepareMinTsForResolve => f.debug_tuple("PrepareMinTsForResolve").finish(), + Self::Resolve { min_ts, .. } => { + f.debug_struct("Resolve").field("min_ts", min_ts).finish() + } } } } @@ -1185,7 +1252,7 @@ impl Task { ObserveOp::NotifyFailToStartObserve { .. } => "modify_observe.retry", ObserveOp::ResolveRegions { .. } => "modify_observe.resolve", }, - Task::ForceFlush(_) => "force_flush", + Task::ForceFlush(..) => "force_flush", Task::FatalError(..) => "fatal_error", Task::Sync(..) => "sync", Task::MarkFailover(_) => "mark_failover", diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index 0805dae5f77..d7f836833b0 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -160,4 +160,10 @@ lazy_static! { "The regions that lost leadership during resolving" ) .unwrap(); + pub static ref MIN_TS_RESOLVE_DURATION: Histogram = register_histogram!( + "tikv_log_backup_resolve_duration_sec", + "The duration of resolving.", + exponential_buckets(0.001, 2.0, 16).unwrap() + ) + .unwrap(); } diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index a31a43980b5..4f75423a241 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -38,7 +38,7 @@ use crate::{ metrics, observer::BackupStreamObserver, router::{Router, TaskSelector}, - subscription_track::SubscriptionTracer, + subscription_track::{ResolveResult, SubscriptionTracer}, try_send, utils::{self, CallbackWaitGroup, Work}, Task, @@ -58,7 +58,7 @@ struct ScanCmd { /// The response of requesting resolve the new checkpoint of regions. pub struct ResolvedRegions { - items: Vec<(Region, TimeStamp)>, + items: Vec, checkpoint: TimeStamp, } @@ -67,7 +67,7 @@ impl ResolvedRegions { /// Note: Maybe we can compute the global checkpoint internal and getting /// the interface clear. However we must take the `min_ts` or we cannot /// provide valid global checkpoint if there isn't any region checkpoint. - pub fn new(checkpoint: TimeStamp, checkpoints: Vec<(Region, TimeStamp)>) -> Self { + pub fn new(checkpoint: TimeStamp, checkpoints: Vec) -> Self { Self { items: checkpoints, checkpoint, @@ -75,7 +75,16 @@ impl ResolvedRegions { } /// take the region checkpoints from the structure. + #[deprecated = "please use `take_resolve_result` instead."] pub fn take_region_checkpoints(&mut self) -> Vec<(Region, TimeStamp)> { + std::mem::take(&mut self.items) + .into_iter() + .map(|x| (x.region, x.checkpoint)) + .collect() + } + + /// take the resolve result from this struct. + pub fn take_resolve_result(&mut self) -> Vec { std::mem::take(&mut self.items) } @@ -455,7 +464,7 @@ where } ObserveOp::ResolveRegions { callback, min_ts } => { let now = Instant::now(); - let timedout = self.wait(Duration::from_secs(30)).await; + let timedout = self.wait(Duration::from_secs(5)).await; if timedout { warn!("waiting for initial scanning done timed out, forcing progress!"; "take" => ?now.saturating_elapsed(), "timedout" => %timedout); @@ -470,10 +479,7 @@ where let rts = min_region.map(|rs| rs.checkpoint).unwrap_or(min_ts); info!("getting checkpoint"; "defined_by_region" => ?min_region); self.subs.warn_if_gap_too_huge(rts); - callback(ResolvedRegions::new( - rts, - cps.into_iter().map(|r| (r.region, r.checkpoint)).collect(), - )); + callback(ResolvedRegions::new(rts, cps)); } } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index b7afcd1441f..d6dfb2b2839 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -21,7 +21,7 @@ use backup_stream::{ router::Router, Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, }; -use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt, TryStreamExt}; +use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt}; use grpcio::{ChannelBuilder, Server, ServerBuilder}; use kvproto::{ brpb::{CompressionType, Local, Metadata, StorageBackend}, @@ -275,7 +275,10 @@ impl Suite { /// create a subscription stream. this has simply asserted no error, because /// in theory observing flushing should not emit error. change that if /// needed. - fn flush_stream(&self) -> impl Stream { + fn flush_stream( + &self, + panic_while_fail: bool, + ) -> impl Stream { let streams = self .log_backup_cli .iter() @@ -288,8 +291,18 @@ impl Suite { }) .unwrap_or_else(|err| panic!("failed to subscribe on {} because {}", id, err)); let id = *id; - stream.map_ok(move |x| (id, x)).map(move |x| { - x.unwrap_or_else(move |err| panic!("failed to rec from {} because {}", id, err)) + stream.filter_map(move |x| { + futures::future::ready(match x { + Ok(x) => Some((id, x)), + Err(err) => { + if panic_while_fail { + panic!("failed to rec from {} because {}", id, err) + } else { + println!("[WARN] failed to rec from {} because {}", id, err); + None + } + } + }) }) }) .collect::>(); @@ -463,6 +476,7 @@ impl Suite { } fn force_flush_files(&self, task: &str) { + // TODO: use the callback to make the test more stable. self.run(|| Task::ForceFlush(task.to_owned())); self.sync(); } @@ -1264,7 +1278,7 @@ mod test { #[test] fn subscribe_flushing() { let mut suite = super::SuiteBuilder::new_named("sub_flush").build(); - let stream = suite.flush_stream(); + let stream = suite.flush_stream(true); for i in 1..10 { let split_key = make_split_key_at_record(1, i * 20); suite.must_split(&split_key); @@ -1306,12 +1320,47 @@ mod test { )); } + #[test] + fn resolved_follower() { + let mut suite = super::SuiteBuilder::new_named("r").build(); + let round1 = run_async_test(suite.write_records(0, 128, 1)); + suite.must_register_task(1, "r"); + suite.run(|| Task::RegionCheckpointsOp(RegionCheckpointOperation::PrepareMinTsForResolve)); + suite.sync(); + std::thread::sleep(Duration::from_secs(1)); + + let leader = suite.cluster.leader_of_region(1).unwrap(); + suite.must_shuffle_leader(1); + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite + .endpoints + .get(&leader.store_id) + .unwrap() + .scheduler() + .schedule(Task::ForceFlush("r".to_owned())) + .unwrap(); + suite.sync(); + std::thread::sleep(Duration::from_secs(1)); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.iter().map(|x| x.as_slice()), + )); + assert!(suite.global_checkpoint() > 256); + suite.force_flush_files("r"); + suite.wait_for_flush(); + assert!(suite.global_checkpoint() > 512); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(|x| x.as_slice()), + )); + } + #[test] fn network_partition() { let mut suite = super::SuiteBuilder::new_named("network_partition") .nodes(3) .build(); - let stream = suite.flush_stream(); + let stream = suite.flush_stream(true); suite.must_register_task(1, "network_partition"); let leader = suite.cluster.leader_of_region(1).unwrap(); let round1 = run_async_test(suite.write_records(0, 64, 1)); diff --git a/src/config/mod.rs b/src/config/mod.rs index 57c2e935d78..689e0330a2b 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2654,6 +2654,8 @@ impl Default for BackupConfig { #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct BackupStreamConfig { + #[online_config(skip)] + pub min_ts_interval: ReadableDuration, #[online_config(skip)] pub max_flush_interval: ReadableDuration, #[online_config(skip)] @@ -2681,6 +2683,20 @@ impl BackupStreamConfig { ); self.num_threads = default_cfg.num_threads; } + if self.max_flush_interval < ReadableDuration::secs(10) { + return Err(format!( + "the max_flush_interval is too small, it is {}, and should be greater than 10s.", + self.max_flush_interval + ) + .into()); + } + if self.min_ts_interval < ReadableDuration::secs(1) { + return Err(format!( + "the min_ts_interval is too small, it is {}, and should be greater than 1s.", + self.min_ts_interval + ) + .into()); + } Ok(()) } } @@ -2691,6 +2707,7 @@ impl Default for BackupStreamConfig { let total_mem = SysQuota::memory_limit_in_bytes(); let quota_size = (total_mem as f64 * 0.1).min(ReadableSize::mb(512).0 as _); Self { + min_ts_interval: ReadableDuration::secs(10), max_flush_interval: ReadableDuration::minutes(3), // use at most 50% of vCPU by default num_threads: (cpu_num * 0.5).clamp(2.0, 12.0) as usize, diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 8ff9e2f93af..ff6807fa6a1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -782,6 +782,7 @@ fn test_serde_custom_tikv_config() { file_size_limit: ReadableSize::gb(5), initial_scan_pending_memory_quota: ReadableSize::kb(2), initial_scan_rate_limit: ReadableSize::mb(3), + min_ts_interval: ReadableDuration::secs(2), }; value.import = ImportConfig { num_threads: 123, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index ecab04350b6..7f5dbfa1db7 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -632,6 +632,7 @@ s3-multi-part-size = "15MB" sst-max-size = "789MB" [log-backup] +min-ts-interval = "2s" max-flush-interval = "11s" num-threads = 7 enable = true From dcd15aee8fd5f3a11b9438eac2ed3cd7935a4e04 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 14 Mar 2023 12:26:40 +0800 Subject: [PATCH 0581/1149] server: Record the duration of executing the snapshot and feedback to PD Server (#13410) close tikv/tikv#13409 1. collect the duration of generating and sending snapshot 2. records the the total duration between receiving the snapshot task and finish to sending all snapshot . 3. report the metrics to the pd server. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ping Yu --- components/raftstore/src/store/fsm/store.rs | 2 + .../raftstore/src/store/peer_storage.rs | 7 +- components/raftstore/src/store/snap.rs | 71 +++++++++++++++---- .../raftstore/src/store/worker/region.rs | 5 +- src/server/snap.rs | 28 ++++++-- tests/integrations/raftstore/test_snap.rs | 19 +++-- 6 files changed, 108 insertions(+), 24 deletions(-) diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 4fafc049bee..09d6db62764 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -2464,6 +2464,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER let snap_stats = self.ctx.snap_mgr.stats(); stats.set_sending_snap_count(snap_stats.sending_count as u32); stats.set_receiving_snap_count(snap_stats.receiving_count as u32); + stats.set_snapshot_stats(snap_stats.stats.into()); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC .with_label_values(&["sending"]) .set(snap_stats.sending_count as i64); diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 470cdfee998..6ac38b60dfe 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -30,7 +30,10 @@ use raft::{ Error as RaftError, GetEntriesContext, RaftState, Ready, Storage, StorageError, }; use tikv_util::{ - box_err, box_try, debug, defer, error, info, store::find_peer_by_id, time::Instant, warn, + box_err, box_try, debug, defer, error, info, + store::find_peer_by_id, + time::{Instant, UnixSecs}, + warn, worker::Scheduler, }; @@ -1060,6 +1063,7 @@ pub fn do_snapshot( last_applied_state: RaftApplyState, for_balance: bool, allow_multi_files_snapshot: bool, + start: UnixSecs, ) -> raft::Result where E: KvEngine, @@ -1117,6 +1121,7 @@ where region_state.get_region(), allow_multi_files_snapshot, for_balance, + start, )?; snapshot.set_data(snap_data.write_to_bytes()?.into()); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 37189d2e52b..57cdbd2a75c 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -27,6 +27,7 @@ use keys::{enc_end_key, enc_start_key}; use kvproto::{ encryptionpb::EncryptionMethod, metapb::Region, + pdpb::SnapshotStat, raft_serverpb::{RaftSnapshotData, SnapshotCfFile, SnapshotMeta}, }; use openssl::symm::{Cipher, Crypter, Mode}; @@ -35,7 +36,7 @@ use raft::eraftpb::Snapshot as RaftSnapshot; use thiserror::Error; use tikv_util::{ box_err, box_try, debug, error, info, - time::{duration_to_sec, Instant, Limiter}, + time::{duration_to_sec, Instant, Limiter, UnixSecs}, warn, HandyRwLock, }; @@ -145,7 +146,6 @@ impl SnapKey { if let Err(e) = snap_data.merge_from_bytes(snap.get_data()) { return Err(io::Error::new(ErrorKind::Other, e)); } - Ok(SnapKey::from_region_snap( snap_data.get_region().get_id(), snap, @@ -1032,6 +1032,7 @@ impl Snapshot { region: &Region, allow_multi_files_snapshot: bool, for_balance: bool, + start: UnixSecs, ) -> RaftStoreResult { let mut snap_data = RaftSnapshotData::default(); snap_data.set_region(region.clone()); @@ -1050,7 +1051,10 @@ impl Snapshot { // set snapshot meta data snap_data.set_file_size(total_size); snap_data.set_version(SNAPSHOT_VERSION); - snap_data.set_meta(self.meta_file.meta.as_ref().unwrap().clone()); + let meta = self.meta_file.meta.as_mut().unwrap(); + meta.set_start(start.into_inner()); + meta.set_generate_duration_sec(t.saturating_elapsed().as_secs()); + snap_data.set_meta(meta.clone()); SNAPSHOT_BUILD_TIME_HISTOGRAM.observe(duration_to_sec(t.saturating_elapsed())); SNAPSHOT_KV_COUNT_HISTOGRAM.observe(total_count as f64); @@ -1362,6 +1366,7 @@ pub enum SnapEntry { pub struct SnapStats { pub sending_count: usize, pub receiving_count: usize, + pub stats: Vec, } #[derive(Clone)] @@ -1375,6 +1380,7 @@ struct SnapManagerCore { encryption_key_manager: Option>, max_per_file_size: Arc, enable_multi_snapshot_files: Arc, + stats: Arc>>, } /// `SnapManagerCore` trace all current processing snapshots. @@ -1656,6 +1662,18 @@ impl SnapManager { self.core.limiter.speed_limit() } + pub fn collect_stat(&self, snap: SnapshotStat) { + debug!( + "collect snapshot stat"; + "region_id" => snap.region_id, + "total_size" => snap.get_transport_size(), + "total_duration_sec" => snap.get_total_duration_sec(), + "generate_duration_sec" => snap.get_generate_duration_sec(), + "send_duration_sec" => snap.get_generate_duration_sec(), + ); + self.core.stats.lock().unwrap().push(snap); + } + pub fn register(&self, key: SnapKey, entry: SnapEntry) { debug!( "register snapshot"; @@ -1726,9 +1744,11 @@ impl SnapManager { } } + let stats = std::mem::take(self.core.stats.lock().unwrap().as_mut()); SnapStats { sending_count: sending_cnt, receiving_count: receiving_cnt, + stats, } } @@ -1887,6 +1907,7 @@ impl SnapManagerBuilder { enable_multi_snapshot_files: Arc::new(AtomicBool::new( self.enable_multi_snapshot_files, )), + stats: Default::default(), }, max_total_size: Arc::new(AtomicU64::new(max_total_size)), }; @@ -2269,6 +2290,7 @@ pub mod tests { encryption_key_manager: None, max_per_file_size: Arc::new(AtomicU64::new(max_per_file_size)), enable_multi_snapshot_files: Arc::new(AtomicBool::new(true)), + stats: Default::default(), } } @@ -2405,7 +2427,9 @@ pub mod tests { assert!(!s1.exists()); assert_eq!(mgr_core.get_total_snap_size().unwrap(), 0); - let mut snap_data = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let mut snap_data = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); // Ensure that this snapshot file does exist after being built. assert!(s1.exists()); @@ -2505,13 +2529,17 @@ pub mod tests { let mut s1 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s1.exists()); - let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let _ = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(s1.exists()); let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(s2.exists()); - let _ = s2.build(&db, &snapshot, ®ion, true, false).unwrap(); + let _ = s2 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(s2.exists()); } @@ -2654,7 +2682,9 @@ pub mod tests { let mut s1 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s1.exists()); - let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let _ = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(s1.exists()); corrupt_snapshot_size_in(dir.path()); @@ -2663,7 +2693,9 @@ pub mod tests { let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s2.exists()); - let snap_data = s2.build(&db, &snapshot, ®ion, true, false).unwrap(); + let snap_data = s2 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(s2.exists()); let dst_dir = Builder::new() @@ -2724,7 +2756,9 @@ pub mod tests { let mut s1 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s1.exists()); - let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let _ = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(s1.exists()); assert_eq!(1, corrupt_snapshot_meta_file(dir.path())); @@ -2733,7 +2767,9 @@ pub mod tests { let mut s2 = Snapshot::new_for_building(dir.path(), &key, &mgr_core).unwrap(); assert!(!s2.exists()); - let mut snap_data = s2.build(&db, &snapshot, ®ion, true, false).unwrap(); + let mut snap_data = s2 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(s2.exists()); let dst_dir = Builder::new() @@ -2795,7 +2831,9 @@ pub mod tests { let mgr_core = create_manager_core(&path, u64::MAX); let mut s1 = Snapshot::new_for_building(&path, &key1, &mgr_core).unwrap(); let mut region = gen_test_region(1, 1, 1); - let mut snap_data = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let mut snap_data = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); let mut s = Snapshot::new_for_sending(&path, &key1, &mgr_core).unwrap(); let expected_size = s.total_size(); let mut s2 = @@ -2867,7 +2905,9 @@ pub mod tests { // Ensure the snapshot being built will not be deleted on GC. src_mgr.register(key.clone(), SnapEntry::Generating); let mut s1 = src_mgr.get_snapshot_for_building(&key).unwrap(); - let mut snap_data = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let mut snap_data = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); check_registry_around_deregister(&src_mgr, &key, &SnapEntry::Generating); @@ -2950,6 +2990,7 @@ pub mod tests { &gen_test_region(100, 1, 1), true, false, + UnixSecs::now(), ) .unwrap() }; @@ -2973,7 +3014,7 @@ pub mod tests { let region = gen_test_region(region_id, 1, 1); let mut s = snap_mgr.get_snapshot_for_building(&key).unwrap(); let _ = s - .build(&engine.kv, &snapshot, ®ion, true, false) + .build(&engine.kv, &snapshot, ®ion, true, false, UnixSecs::now()) .unwrap(); // The first snap_size is for region 100. @@ -3043,7 +3084,9 @@ pub mod tests { // correctly. for _ in 0..2 { let mut s1 = snap_mgr.get_snapshot_for_building(&key).unwrap(); - let _ = s1.build(&db, &snapshot, ®ion, true, false).unwrap(); + let _ = s1 + .build(&db, &snapshot, ®ion, true, false, UnixSecs::now()) + .unwrap(); assert!(snap_mgr.delete_snapshot(&key, &s1, false)); } } diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 84bc3b27084..7dc894204ec 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -26,7 +26,7 @@ use tikv_util::{ box_err, box_try, config::VersionTrack, defer, error, info, thd_name, - time::Instant, + time::{Instant, UnixSecs}, warn, worker::{Runnable, RunnableWithTimer}, }; @@ -241,6 +241,7 @@ struct SnapGenContext { engine: EK, mgr: SnapManager, router: R, + start: UnixSecs, } impl SnapGenContext @@ -269,6 +270,7 @@ where last_applied_state, for_balance, allow_multi_files_snapshot, + self.start )); // Only enable the fail point when the region id is equal to 1, which is // the id of bootstrapped region in tests. @@ -821,6 +823,7 @@ where engine: self.engine.clone(), mgr: self.mgr.clone(), router: self.router.clone(), + start: UnixSecs::now(), }; self.pool.spawn(async move { tikv_alloc::add_thread_memory_accessor(); diff --git a/src/server/snap.rs b/src/server/snap.rs index bae0587c505..afce0e8a2fd 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -2,7 +2,7 @@ use std::{ fmt::{self, Display, Formatter}, - io::{Read, Write}, + io::{Error as IoError, ErrorKind, Read, Write}, pin::Pin, sync::{ atomic::{AtomicUsize, Ordering}, @@ -23,6 +23,7 @@ use grpcio::{ RpcStatusCode, WriteFlags, }; use kvproto::{ + pdpb::SnapshotStat, raft_serverpb::{ Done, RaftMessage, RaftSnapshotData, SnapshotChunk, TabletSnapshotRequest, TabletSnapshotResponse, @@ -35,7 +36,7 @@ use security::SecurityManager; use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, - time::Instant, + time::{Instant, UnixSecs}, worker::Runnable, DeferContext, }; @@ -139,9 +140,16 @@ pub fn send_snap( let send_timer = SEND_SNAP_HISTOGRAM.start_coarse_timer(); - let key = { + let (key, snap_start, generate_duration_sec) = { let snap = msg.get_message().get_snapshot(); - SnapKey::from_snap(snap)? + let mut snap_data = RaftSnapshotData::default(); + if let Err(e) = snap_data.merge_from_bytes(snap.get_data()) { + return Err(Error::Io(IoError::new(ErrorKind::Other, e))); + } + let key = SnapKey::from_region_snap(snap_data.get_region().get_id(), snap); + let snap_start = snap_data.get_meta().get_start(); + let generate_duration_sec = snap_data.get_meta().get_generate_duration_sec(); + (key, snap_start, generate_duration_sec) }; mgr.register(key.clone(), SnapEntry::Sending); @@ -193,6 +201,18 @@ pub fn send_snap( Ok(_) => { fail_point!("snapshot_delete_after_send"); mgr.delete_snapshot(&key, &chunks.snap, true); + let cost = UnixSecs::now().into_inner().saturating_sub(snap_start); + // it should ignore if the duration of snapshot is less than 1s to decrease the + // grpc data size. + if cost >= 1 { + let mut stat = SnapshotStat::default(); + stat.set_region_id(key.region_id); + stat.set_transport_size(total_size); + stat.set_generate_duration_sec(generate_duration_sec); + stat.set_send_duration_sec(timer.saturating_elapsed().as_secs()); + stat.set_total_duration_sec(cost); + mgr.collect_stat(stat); + } // TODO: improve it after rustc resolves the bug. // Call `info` in the closure directly will cause rustc // panic with `Cannot create local mono-item for DefId`. diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 0ca576e5e9a..fc0364c13b0 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -23,7 +23,11 @@ use security::SecurityManager; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::server::snap::send_snap; -use tikv_util::{config::*, time::Instant, HandyRwLock}; +use tikv_util::{ + config::*, + time::{Instant, UnixSecs}, + HandyRwLock, +}; fn test_huge_snapshot(cluster: &mut Cluster, max_snapshot_file_size: u64) { cluster.cfg.rocksdb.titan.enabled = true; @@ -508,7 +512,7 @@ fn test_inspected_snapshot() { #[test] fn test_gen_during_heavy_recv() { let mut cluster = new_server_cluster(0, 3); - cluster.cfg.server.snap_io_max_bytes_per_sec = ReadableSize(5 * 1024 * 1024); + cluster.cfg.server.snap_io_max_bytes_per_sec = ReadableSize(1024 * 1024); cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration(Duration::from_secs(100)); let pd_client = Arc::clone(&cluster.pd_client); @@ -554,6 +558,7 @@ fn test_gen_during_heavy_recv() { snap_apply_state, true, true, + UnixSecs::now(), ) .unwrap(); @@ -593,8 +598,14 @@ fn test_gen_during_heavy_recv() { pd_client.must_add_peer(r1, new_learner_peer(3, 3)); sleep_ms(500); must_get_equal(&cluster.get_engine(3), b"zzz-0000", b"value"); - assert_eq!(cluster.get_snap_mgr(1).stats().sending_count, 0); - assert_eq!(cluster.get_snap_mgr(2).stats().receiving_count, 0); + + // store 1 and store 2 must send snapshot, so stats should record the snapshot. + let send_stats = cluster.get_snap_mgr(1).stats(); + let recv_stats = cluster.get_snap_mgr(2).stats(); + assert_eq!(send_stats.sending_count, 0); + assert_eq!(recv_stats.receiving_count, 0); + assert_ne!(send_stats.stats.len(), 0); + assert_ne!(recv_stats.stats.len(), 0); drop(cluster); let _ = th.join(); } From 852af464cd48a97ec2b88c6a183a4f1ec4a84938 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 14 Mar 2023 14:10:39 +0800 Subject: [PATCH 0582/1149] raftstore: optimize write priority scheduling path (#14335) ref tikv/tikv#14353 Optimize write priority scheduling path including: - replace `DashMap` with `RwLock` in the resource controller - use visit pattern for consuming msg resource to avoid constructing hashmap - introduce `ParsedEntry` to avoid parsing raft command from entry data repeatedly in different places Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/batch-system/src/test_runner.rs | 10 +- .../operation/command/admin/merge/prepare.rs | 3 +- .../raftstore/src/store/async_io/write.rs | 19 +- .../raftstore/src/store/entry_storage.rs | 32 ++- components/raftstore/src/store/fsm/apply.rs | 159 +++--------- components/raftstore/src/store/peer.rs | 3 +- components/raftstore/src/store/util.rs | 235 +++++++++++++++++- components/resource_control/Cargo.toml | 1 + components/resource_control/src/channel.rs | 76 ++++-- components/resource_control/src/lib.rs | 4 + .../resource_control/src/resource_group.rs | 110 ++++---- 12 files changed, 443 insertions(+), 210 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fb51b4fcdf..94c562c5c6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4702,6 +4702,7 @@ dependencies = [ "kvproto", "lazy_static", "online_config", + "parking_lot 0.12.1", "pd_client", "pin-project", "prometheus", diff --git a/components/batch-system/src/test_runner.rs b/components/batch-system/src/test_runner.rs index 9a84a5fe545..ad9c3f54d04 100644 --- a/components/batch-system/src/test_runner.rs +++ b/components/batch-system/src/test_runner.rs @@ -11,9 +11,8 @@ use std::{ }, }; -use collections::HashMap; use derive_more::{Add, AddAssign}; -use resource_control::ResourceMetered; +use resource_control::{ResourceConsumeType, ResourceController, ResourceMetered}; use tikv_util::mpsc; use crate::*; @@ -29,12 +28,11 @@ pub enum Message { } impl ResourceMetered for Message { - fn get_resource_consumptions(&self) -> Option> { + fn consume_resource(&self, resource_ctl: &Arc) -> Option { match self { Message::Resource(group_name, bytes) => { - let mut map = HashMap::default(); - map.insert(group_name.to_owned(), *bytes); - Some(map) + resource_ctl.consume(group_name.as_bytes(), ResourceConsumeType::IoBytes(*bytes)); + Some(group_name.to_owned()) } _ => None, } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index f9df2d9ea1a..378e3d2e7c8 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -245,8 +245,7 @@ impl Peer { if entry.get_data().is_empty() { continue; } - let cmd: RaftCmdRequest = - util::parse_data_at(entry.get_data(), entry.get_index(), "tag"); + let cmd: RaftCmdRequest = util::parse_data_at(entry.get_data(), entry.get_index()); if !cmd.has_admin_request() { continue; } diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 9b25d7de806..d20b9d0bec0 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -26,7 +26,7 @@ use protobuf::Message; use raft::eraftpb::Entry; use resource_control::{ channel::{bounded, Receiver}, - ResourceController, ResourceMetered, + ResourceConsumeType, ResourceController, ResourceMetered, }; use tikv_util::{ box_err, @@ -283,16 +283,25 @@ where EK: KvEngine, ER: RaftEngine, { - fn get_resource_consumptions(&self) -> Option> { + fn consume_resource(&self, resource_ctl: &Arc) -> Option { match self { WriteMsg::WriteTask(t) => { - let mut map = HashMap::default(); + let mut dominant_group = "".to_owned(); + let mut max_write_bytes = 0; for entry in &t.entries { let header = util::get_entry_header(entry); let group_name = header.get_resource_group_name().to_owned(); - *map.entry(group_name).or_default() += entry.compute_size() as u64; + let write_bytes = entry.compute_size() as u64; + resource_ctl.consume( + group_name.as_bytes(), + ResourceConsumeType::IoBytes(write_bytes), + ); + if write_bytes > max_write_bytes { + dominant_group = group_name; + max_write_bytes = write_bytes; + } } - Some(map) + Some(dominant_group) } _ => None, } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index afa13730ccf..1e2e40b2da6 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -30,7 +30,11 @@ use super::{ metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use crate::{bytes_capacity, store::ReadTask, Result}; +use crate::{ + bytes_capacity, + store::{util::ParsedEntry, ReadTask}, + Result, +}; const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; const SHRINK_CACHE_CAPACITY: usize = 64; @@ -54,7 +58,7 @@ pub fn last_index(state: &RaftLocalState) -> u64 { pub struct CachedEntries { pub range: Range, // Entries and dangle size for them. `dangle` means not in entry cache. - entries: Arc, usize)>>, + entries: Arc, usize)>>, } impl CachedEntries { @@ -64,21 +68,24 @@ impl CachedEntries { let end = entries.last().map(|x| x.index).unwrap() + 1; let range = Range { start, end }; CachedEntries { - entries: Arc::new(Mutex::new((entries, 0))), + entries: Arc::new(Mutex::new(( + entries.into_iter().map(|e| ParsedEntry::new(e)).collect(), + 0, + ))), range, } } - pub fn iter_entries(&self, mut f: impl FnMut(&Entry)) { - let entries = self.entries.lock().unwrap(); - for entry in &entries.0 { + pub fn iter_entries_mut(&self, mut f: impl FnMut(&mut ParsedEntry)) { + let mut entries = self.entries.lock().unwrap(); + for entry in &mut entries.0 { f(entry); } } /// Take cached entries and dangle size for them. `dangle` means not in /// entry cache. - pub fn take_entries(&self) -> (Vec, usize) { + pub fn take_entries(&self) -> (Vec, usize) { mem::take(&mut *self.entries.lock().unwrap()) } } @@ -325,8 +332,8 @@ impl EntryCache { let dangle_size = { let mut guard = entries.entries.lock().unwrap(); - let last_idx = guard.0.last().map(|e| e.index).unwrap(); - let cache_front = match self.cache.front().map(|e| e.index) { + let last_idx = guard.0.last().map(|e| e.get_index()).unwrap(); + let cache_front = match self.cache.front().map(|e| e.get_index()) { Some(i) => i, None => u64::MAX, }; @@ -334,7 +341,10 @@ impl EntryCache { let dangle_range = if last_idx < cache_front { // All entries are not in entry cache. 0..guard.0.len() - } else if let Ok(i) = guard.0.binary_search_by(|e| e.index.cmp(&cache_front)) { + } else if let Ok(i) = guard + .0 + .binary_search_by(|e| e.get_index().cmp(&cache_front)) + { // Some entries are in entry cache. 0..i } else { @@ -344,7 +354,7 @@ impl EntryCache { let mut size = 0; for e in &guard.0[dangle_range] { - size += bytes_capacity(&e.data) + bytes_capacity(&e.context); + size += e.bytes_capacity(); } guard.1 = size; size diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index b9f737158fc..181ff207c0b 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -9,7 +9,6 @@ use std::{ cmp::{Ord, Ordering as CmpOrdering}, collections::VecDeque, fmt::{self, Debug, Formatter}, - io::BufRead, mem, ops::{Deref, DerefMut, Range as StdRange}, sync::{ @@ -46,12 +45,8 @@ use kvproto::{ }; use pd_client::{BucketMeta, BucketStat}; use prometheus::local::LocalHistogram; -use protobuf::{wire_format::WireType, CodedInputStream, Message}; -use raft::eraftpb::{ - ConfChange, ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot, -}; -use raft_proto::ConfChangeI; -use resource_control::{ResourceController, ResourceMetered}; +use raft::eraftpb::{ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot}; +use resource_control::{ResourceConsumeType, ResourceController, ResourceMetered}; use smallvec::{smallvec, SmallVec}; use sst_importer::SstImporter; use tikv_alloc::trace::TraceEvent; @@ -93,6 +88,7 @@ use crate::{ util::{ self, admin_cmd_epoch_lookup, check_flashback_state, check_req_region_epoch, compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, LatencyInspector, + ParsedEntry, }, Config, RegionSnapshot, RegionTask, WriteCallback, }, @@ -852,43 +848,6 @@ fn should_sync_log(cmd: &RaftCmdRequest) -> bool { false } -fn can_witness_skip(entry: &Entry) -> bool { - // need to handle ConfChange entry type - if entry.get_entry_type() != EntryType::EntryNormal { - return false; - } - - // HACK: check admin request field in serialized data from `RaftCmdRequest` - // without deserializing all. It's done by checking the existence of the - // field number of `admin_request`. - // See the encoding in `write_to_with_cached_sizes()` of `RaftCmdRequest` in - // `raft_cmdpb.rs` for reference. - let mut is = CodedInputStream::from_bytes(entry.get_data()); - if is.eof().unwrap() { - return true; - } - let (mut field_number, wire_type) = is.read_tag_unpack().unwrap(); - // Header field is of number 1 - if field_number == 1 { - if wire_type != WireType::WireTypeLengthDelimited { - panic!("unexpected wire type"); - } - let len = is.read_raw_varint32().unwrap(); - // skip parsing the content of `Header` - is.consume(len as usize); - // read next field number - (field_number, _) = is.read_tag_unpack().unwrap(); - } - - // `Requests` field is of number 2 and `AdminRequest` field is of number 3. - // - If the next field is 2, there must be no admin request as in one - // `RaftCmdRequest`, either requests or admin_request is filled. - // - If the next field is 3, it's exactly an admin request. - // - If the next field is others, neither requests nor admin_request is filled, - // so there is no admin request. - field_number != 3 -} - /// A struct that stores the state related to Merge. /// /// When executing a `CommitMerge`, the source peer may have not applied @@ -911,7 +870,7 @@ where { /// All of the entries that need to continue to be applied after /// the source peer has applied its logs. - pending_entries: Vec, + pending_entries: Vec, /// All of messages that need to continue to be handled after /// the source peer has applied its logs and pending entries /// are all handled. @@ -1091,7 +1050,7 @@ where fn handle_raft_committed_entries( &mut self, apply_ctx: &mut ApplyContext, - mut committed_entries_drainer: Drain<'_, Entry>, + mut committed_entries_drainer: Drain<'_, ParsedEntry>, ) { if committed_entries_drainer.len() == 0 { return; @@ -1102,7 +1061,7 @@ where // must re-propose these commands again. apply_ctx.committed_count += committed_entries_drainer.len(); let mut results = VecDeque::new(); - while let Some(entry) = committed_entries_drainer.next() { + while let Some(mut entry) = committed_entries_drainer.next() { if self.pending_remove { // This peer is about to be destroyed, skip everything. break; @@ -1124,9 +1083,9 @@ where // running on data written by new version tikv), but PD will reject old version // tikv join the cluster, so this should not happen. let res = match entry.get_entry_type() { - EntryType::EntryNormal => self.handle_raft_entry_normal(apply_ctx, &entry), + EntryType::EntryNormal => self.handle_raft_entry_normal(apply_ctx, &mut entry), EntryType::EntryConfChange | EntryType::EntryConfChangeV2 => { - self.handle_raft_entry_conf_change(apply_ctx, &entry) + self.handle_raft_entry_conf_change(apply_ctx, &mut entry) } }; @@ -1196,7 +1155,7 @@ where fn handle_raft_entry_normal( &mut self, apply_ctx: &mut ApplyContext, - entry: &Entry, + entry: &mut ParsedEntry, ) -> ApplyResult { fail_point!( "yield_apply_first_region", @@ -1206,11 +1165,10 @@ where let index = entry.get_index(); let term = entry.get_term(); - let data = entry.get_data(); - if !data.is_empty() { - if !self.peer.is_witness || !can_witness_skip(entry) { - let cmd = util::parse_data_at(data, index, &self.tag); + if !entry.is_empty() { + if !self.peer.is_witness || !entry.can_witness_skip() { + let cmd = entry.take_cmd(); if apply_ctx.yield_high_latency_operation && has_high_latency_operation(&cmd) { self.priority = Priority::Low; } @@ -1269,7 +1227,7 @@ where fn handle_raft_entry_conf_change( &mut self, apply_ctx: &mut ApplyContext, - entry: &Entry, + entry: &mut ParsedEntry, ) -> ApplyResult { // Although conf change can't yield in normal case, it is convenient to // simulate yield before applying a conf change log. @@ -1277,16 +1235,7 @@ where ApplyResult::Yield }); let (index, term) = (entry.get_index(), entry.get_term()); - let conf_change: ConfChangeV2 = match entry.get_entry_type() { - EntryType::EntryConfChange => { - let conf_change: ConfChange = - util::parse_data_at(entry.get_data(), index, &self.tag); - conf_change.into_v2() - } - EntryType::EntryConfChangeV2 => util::parse_data_at(entry.get_data(), index, &self.tag), - _ => unreachable!(), - }; - let cmd = util::parse_data_at(conf_change.get_context(), index, &self.tag); + let (conf_change, cmd) = entry.take_conf_change(); match self.process_raft_cmd(apply_ctx, index, term, cmd) { ApplyResult::None => { // If failed, tell Raft that the `ConfChange` was aborted. @@ -3726,19 +3675,29 @@ where } impl ResourceMetered for Msg { - fn get_resource_consumptions(&self) -> Option> { + fn consume_resource(&self, resource_ctl: &Arc) -> Option { match self { Msg::Apply { apply, .. } => { - let mut map = HashMap::default(); + let mut dominant_group = "".to_owned(); + let mut max_write_bytes = 0; for cached_entries in &apply.entries { - cached_entries.iter_entries(|entry| { - // TODO: maybe use a more efficient way to get the resource group name. - let header = util::get_entry_header(entry); - let group_name = header.get_resource_group_name().to_owned(); - *map.entry(group_name).or_default() += entry.compute_size() as u64; + cached_entries.iter_entries_mut(|entry| { + if entry.is_empty() { + return; + } + let write_bytes = entry.compute_size() as u64; + let group_name = entry.get_cmd().get_header().get_resource_group_name(); + resource_ctl.consume( + group_name.as_bytes(), + ResourceConsumeType::IoBytes(write_bytes), + ); + if write_bytes > max_write_bytes { + dominant_group = group_name.to_owned(); + max_write_bytes = write_bytes; + } }); } - Some(map) + Some(dominant_group) } _ => None, } @@ -3924,19 +3883,21 @@ where let mut dangle_size = 0; for cached_entries in apply.entries { - let (e, sz) = cached_entries.take_entries(); + let (ents, sz) = cached_entries.take_entries(); dangle_size += sz; - if e.is_empty() { + if ents.is_empty() { let rid = self.delegate.region_id(); let StdRange { start, end } = cached_entries.range; + let mut tmp_ents = Vec::new(); self.delegate .raft_engine - .fetch_entries_to(rid, start, end, None, &mut entries) + .fetch_entries_to(rid, start, end, None, &mut tmp_ents) .unwrap(); + entries.extend(tmp_ents.into_iter().map(|e| ParsedEntry::new(e))); } else if entries.is_empty() { - entries = e; + entries = ents; } else { - entries.extend(e); + entries.extend(ents); } } if dangle_size > 0 { @@ -4908,9 +4869,9 @@ mod memtrace { EK: KvEngine, { fn heap_size(&self) -> usize { - let mut size = self.pending_entries.capacity() * mem::size_of::(); + let mut size = self.pending_entries.capacity() * mem::size_of::(); for e in &self.pending_entries { - size += bytes_capacity(&e.data) + bytes_capacity(&e.context); + size += e.bytes_capacity(); } size += self.pending_msgs.capacity() * mem::size_of::>(); @@ -4967,7 +4928,6 @@ mod tests { time::*, }; - use bytes::Bytes; use engine_panic::PanicEngine; use engine_test::kv::{new_engine, KvTestEngine, KvTestSnapshot}; use engine_traits::{Peekable as PeekableTrait, SyncMutable, WriteBatchExt}; @@ -4977,7 +4937,6 @@ mod tests { raft_cmdpb::*, }; use protobuf::Message; - use raft::eraftpb::{ConfChange, ConfChangeV2}; use sst_importer::Config as ImportConfig; use tempfile::{Builder, TempDir}; use test_sst_importer::*; @@ -5084,42 +5043,6 @@ mod tests { } } - #[test] - fn test_can_witness_skip() { - let mut entry = Entry::new(); - let mut req = RaftCmdRequest::default(); - entry.set_entry_type(EntryType::EntryNormal); - let data = req.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(can_witness_skip(&entry)); - - req.mut_admin_request() - .set_cmd_type(AdminCmdType::CompactLog); - let data = req.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(!can_witness_skip(&entry)); - - let mut req = RaftCmdRequest::default(); - let mut request = Request::default(); - request.set_cmd_type(CmdType::Put); - req.set_requests(vec![request].into()); - let data = req.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(can_witness_skip(&entry)); - - entry.set_entry_type(EntryType::EntryConfChange); - let conf_change = ConfChange::new(); - let data = conf_change.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(!can_witness_skip(&entry)); - - entry.set_entry_type(EntryType::EntryConfChangeV2); - let conf_change_v2 = ConfChangeV2::new(); - let data = conf_change_v2.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(!can_witness_skip(&entry)); - } - #[test] fn test_should_sync_log() { // Admin command diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8dc69a0def4..a0b28e44f07 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4211,8 +4211,7 @@ where if entry.get_data().is_empty() { continue; } - let cmd: RaftCmdRequest = - util::parse_data_at(entry.get_data(), entry.get_index(), &self.tag); + let cmd: RaftCmdRequest = util::parse_data_at(entry.get_data(), entry.get_index()); if !cmd.has_admin_request() { continue; } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d48c5e78e7c..7408b540285 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -6,6 +6,7 @@ use std::{ collections::{HashMap, VecDeque}, fmt, fmt::Display, + io::BufRead, option::Option, sync::{ atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering}, @@ -24,12 +25,15 @@ use kvproto::{ }, raft_serverpb::{RaftMessage, RaftSnapshotData}, }; -use protobuf::{self, CodedInputStream, Message}; +use protobuf::{self, wire_format::WireType, CodedInputStream, Message}; use raft::{ eraftpb::{self, ConfChangeType, ConfState, Entry, EntryType, MessageType, Snapshot}, Changer, RawNode, INVALID_INDEX, }; -use raft_proto::ConfChangeI; +use raft_proto::{ + eraftpb::{ConfChange, ConfChangeV2}, + ConfChangeI, +}; use tikv_util::{ box_err, codec::number::{decode_u64, NumberEncoder}, @@ -43,7 +47,9 @@ use tokio::sync::Notify; use txn_types::WriteBatchFlags; use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; -use crate::{coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result}; +use crate::{ + bytes_capacity, coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result, +}; const INVALID_TIMESTAMP: u64 = u64::MAX; @@ -745,6 +751,139 @@ pub(crate) fn u64_to_timespec(u: u64) -> Timespec { Timespec::new(sec as i64, nsec as i32) } +// ParsedEntry wraps raft-proto `Entry` and used to avoid parsing raft command +// from entry's data repeatedly. The parsed command may be used in multiple +// places, so cache it at the first place. +pub struct ParsedEntry { + entry: Entry, + cmd: Option, + conf_change: Option, + parsed: bool, +} + +impl ParsedEntry { + pub fn new(entry: Entry) -> ParsedEntry { + ParsedEntry { + entry, + cmd: None, + conf_change: None, + parsed: false, + } + } + + pub fn get_entry_type(&self) -> EntryType { + self.entry.get_entry_type() + } + + pub fn get_index(&self) -> u64 { + self.entry.get_index() + } + + pub fn get_term(&self) -> u64 { + self.entry.get_term() + } + + pub fn compute_size(&self) -> u32 { + self.entry.compute_size() + } + + pub fn is_empty(&self) -> bool { + self.entry.get_data().is_empty() + } + + pub fn bytes_capacity(&self) -> usize { + bytes_capacity(&self.entry.data) + bytes_capacity(&self.entry.context) + } + + fn parse(&mut self) { + assert!(!self.is_empty()); + + let data = self.entry.get_data(); + let index = self.entry.get_index(); + // lazy parse the cmd from entry context + let conf_change = match self.entry.get_entry_type() { + EntryType::EntryConfChange => { + let conf_change: ConfChange = parse_data_at(data, index); + Some(conf_change.into_v2()) + } + EntryType::EntryConfChangeV2 => Some(parse_data_at(data, index)), + EntryType::EntryNormal => { + self.cmd = Some(parse_data_at(data, index)); + None + } + }; + if let Some(conf_change) = conf_change { + self.cmd = Some(parse_data_at(conf_change.get_context(), index)); + self.conf_change = Some(conf_change); + } + self.parsed = true; + } + + pub fn get_cmd(&mut self) -> &RaftCmdRequest { + if !self.parsed { + self.parse(); + } + self.cmd.as_ref().unwrap() + } + + pub fn take_cmd(&mut self) -> RaftCmdRequest { + if !self.parsed { + self.parse(); + } + self.parsed = false; + self.cmd.take().unwrap() + } + + pub fn take_conf_change(&mut self) -> (ConfChangeV2, RaftCmdRequest) { + if !self.parsed { + self.parse(); + } + self.parsed = false; + (self.conf_change.take().unwrap(), self.cmd.take().unwrap()) + } + + pub fn can_witness_skip(&self) -> bool { + !has_admin_request(&self.entry) + } +} + +fn has_admin_request(entry: &Entry) -> bool { + // need to handle ConfChange entry type + if entry.get_entry_type() != EntryType::EntryNormal { + return true; + } + + // HACK: check admin request field in serialized data from `RaftCmdRequest` + // without deserializing all. It's done by checking the existence of the + // field number of `admin_request`. + // See the encoding in `write_to_with_cached_sizes()` of `RaftCmdRequest` in + // `raft_cmdpb.rs` for reference. + let mut is = CodedInputStream::from_bytes(entry.get_data()); + if is.eof().unwrap() { + return false; + } + let (mut field_number, wire_type) = is.read_tag_unpack().unwrap(); + // Header field is of number 1 + if field_number == 1 { + if wire_type != WireType::WireTypeLengthDelimited { + panic!("unexpected wire type"); + } + let len = is.read_raw_varint32().unwrap(); + // skip parsing the content of `Header` + is.consume(len as usize); + // read next field number + (field_number, _) = is.read_tag_unpack().unwrap(); + } + + // `Requests` field is of number 2 and `AdminRequest` field is of number 3. + // - If the next field is 2, there must be no admin request as in one + // `RaftCmdRequest`, either requests or admin_request is filled. + // - If the next field is 3, it's exactly an admin request. + // - If the next field is others, neither requests nor admin_request is filled, + // so there is no admin request. + field_number == 3 +} + pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { if entry.get_entry_type() != EntryType::EntryNormal { return RaftRequestHeader::default(); @@ -770,10 +909,10 @@ pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { /// If `data` is corrupted, this function will panic. // TODO: make sure received entries are not corrupted #[inline] -pub fn parse_data_at(data: &[u8], index: u64, tag: &str) -> T { +pub fn parse_data_at(data: &[u8], index: u64) -> T { let mut result = T::default(); result.merge_from_bytes(data).unwrap_or_else(|e| { - panic!("{} data is corrupted at {}: {:?}", tag, index, e); + panic!("{} data is corrupted : {:?}", index, e); }); result } @@ -1717,10 +1856,11 @@ pub fn validate_split_region( mod tests { use std::thread; + use bytes::Bytes; use engine_test::kv::KvTestEngine; use kvproto::{ metapb::{self, RegionEpoch}, - raft_cmdpb::AdminRequest, + raft_cmdpb::{AdminRequest, CmdType, Request}, }; use protobuf::Message as _; use raft::eraftpb::{ConfChangeType, Entry, Message, MessageType}; @@ -1801,6 +1941,53 @@ mod tests { assert_eq!(m1.inspect(Some(monotonic_raw_now())), LeaseState::Valid); } + #[test] + fn test_parsed_entry() { + let mut req = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_resource_group_name("test".to_owned()); + req.set_header(header); + + let mut entry = Entry::new(); + entry.set_term(1); + entry.set_index(2); + entry.set_entry_type(raft::eraftpb::EntryType::EntryNormal); + entry.set_data(req.write_to_bytes().unwrap().into()); + + let mut parsed = ParsedEntry::new(entry); + assert_eq!(parsed.get_term(), 1); + assert_eq!(parsed.get_index(), 2); + assert_eq!( + parsed.get_cmd().get_header().get_resource_group_name(), + "test" + ); + + let mut entry = Entry::new(); + entry.set_term(1); + entry.set_index(2); + entry.set_entry_type(raft::eraftpb::EntryType::EntryConfChangeV2); + let mut cc = ConfChangeV2::new(); + let mut ccs = eraftpb::ConfChangeSingle::default(); + ccs.set_change_type(ConfChangeType::AddNode); + ccs.set_node_id(3); + cc.set_changes(vec![ccs].into()); + cc.set_context(req.write_to_bytes().unwrap().into()); + entry.set_data(cc.write_to_bytes().unwrap().into()); + + let mut parsed = ParsedEntry::new(entry); + let (conf_change, cmd) = parsed.take_conf_change(); + assert_eq!( + conf_change.get_changes()[0].get_change_type(), + ConfChangeType::AddNode + ); + assert_eq!(conf_change.get_changes()[0].get_node_id(), 3); + assert_eq!(cmd.get_header().get_resource_group_name(), "test"); + assert_eq!( + parsed.get_cmd().get_header().get_resource_group_name(), + "test" + ); + } + #[test] fn test_get_entry_header() { let mut req = RaftCmdRequest::default(); @@ -2151,6 +2338,42 @@ mod tests { check_term(&header, 10).unwrap_err(); } + #[test] + fn test_has_admin_request() { + let mut entry = Entry::new(); + let mut req = RaftCmdRequest::default(); + entry.set_entry_type(EntryType::EntryNormal); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!has_admin_request(&entry)); + + req.mut_admin_request() + .set_cmd_type(AdminCmdType::CompactLog); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(has_admin_request(&entry)); + + let mut req = RaftCmdRequest::default(); + let mut request = Request::default(); + request.set_cmd_type(CmdType::Put); + req.set_requests(vec![request].into()); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!has_admin_request(&entry)); + + entry.set_entry_type(EntryType::EntryConfChange); + let conf_change = ConfChange::new(); + let data = conf_change.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(has_admin_request(&entry)); + + entry.set_entry_type(EntryType::EntryConfChangeV2); + let conf_change_v2 = ConfChangeV2::new(); + let data = conf_change_v2.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(has_admin_request(&entry)); + } + #[test] fn test_check_req_region_epoch() { let mut epoch = RegionEpoch::default(); diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 6cb7d547e6c..9a488b06d77 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -18,6 +18,7 @@ futures = { version = "0.3" } kvproto = { workspace = true } lazy_static = "1.0" online_config = { workspace = true } +parking_lot = "0.12" pd_client = { workspace = true } pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } diff --git a/components/resource_control/src/channel.rs b/components/resource_control/src/channel.rs index 55bc2ed33b9..a62b9636f83 100644 --- a/components/resource_control/src/channel.rs +++ b/components/resource_control/src/channel.rs @@ -1,17 +1,15 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. - use std::{cell::RefCell, sync::Arc}; -use collections::HashMap; use crossbeam::channel::{self, RecvError, SendError, TryRecvError, TrySendError}; use kvproto::kvrpcpb::CommandPri; use tikv_util::mpsc::priority_queue; -use crate::{ResourceConsumeType, ResourceController}; +use crate::ResourceController; pub trait ResourceMetered { // returns the msg consumption of each hash map - fn get_resource_consumptions(&self) -> Option> { + fn consume_resource(&self, _: &Arc) -> Option { None } } @@ -132,19 +130,7 @@ impl Sender { last_msg_group, .. } => { - if let Some(mut groups) = msg.get_resource_consumptions() { - let mut dominant_group = "".to_owned(); - let mut max_write_bytes = 0; - for (group_name, write_bytes) in groups.drain() { - resource_ctl.consume( - group_name.as_bytes(), - ResourceConsumeType::IoBytes(write_bytes), - ); - if write_bytes > max_write_bytes { - dominant_group = group_name; - max_write_bytes = write_bytes; - } - } + if let Some(dominant_group) = msg.consume_resource(resource_ctl) { *last_msg_group.borrow_mut() = dominant_group; } } @@ -181,3 +167,59 @@ impl Receiver { } } } + +#[cfg(test)] +mod tests { + use std::{thread, usize}; + + use test::Bencher; + + use super::*; + use crate::ResourceConsumeType; + + struct Msg(usize); + + impl ResourceMetered for Msg { + fn consume_resource(&self, resource_ctl: &Arc) -> Option { + // None + let write_bytes = self.0 as u64; + let group_name = "test".to_owned(); + resource_ctl.consume( + group_name.as_bytes(), + ResourceConsumeType::IoBytes(write_bytes), + ); + Some(group_name) + } + } + + #[bench] + fn bench_channel(b: &mut Bencher) { + let (tx, rx) = unbounded(Some(Arc::new(ResourceController::new( + "test".to_owned(), + false, + )))); + + let t = thread::spawn(move || { + let mut n2: usize = 0; + loop { + if let Ok(Msg(n)) = rx.recv() { + n2 += n; + } else { + return n2; + } + } + }); + + let mut n1 = 0; + b.iter(|| { + n1 += 1; + let msg = Msg(1); + tx.consume_msg_resource(&msg); + tx.send(msg, 0).unwrap(); + }); + + drop(tx); + let n2 = t.join().unwrap(); + assert_eq!(n1, n2); + } +} diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 99645688cf7..b186cb8a0c7 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -1,4 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(test)] use online_config::OnlineConfig; use serde::{Deserialize, Serialize}; @@ -11,6 +12,9 @@ pub use resource_group::{ mod future; pub use future::ControlledFuture; +#[cfg(test)] +extern crate test; + mod service; pub use service::ResourceManagerService; diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 390214bc687..cea045dbf1a 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -8,11 +8,13 @@ use std::{ time::Duration, }; +use collections::HashMap; use dashmap::{mapref::one::Ref, DashMap}; use kvproto::{ kvrpcpb::CommandPri, resource_manager::{GroupMode, ResourceGroup}, }; +use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; use tikv_util::info; use yatp::queue::priority::TaskPriorityProvider; @@ -144,7 +146,7 @@ pub struct ResourceController { // groups' factors, it can't be changed concurrently. max_ru_quota: Mutex, // record consumption of each resource group, name --> resource_group - resource_consumptions: DashMap, GroupPriorityTracker>, + resource_consumptions: RwLock, GroupPriorityTracker>>, last_min_vt: AtomicU64, } @@ -155,7 +157,7 @@ impl ResourceController { name, is_read, max_ru_quota: Mutex::new(DEFAULT_MAX_RU_QUOTA), - resource_consumptions: DashMap::new(), + resource_consumptions: RwLock::new(HashMap::default()), last_min_vt: AtomicU64::new(0), }; // add the "default" resource group @@ -196,7 +198,7 @@ impl ResourceController { }; // maybe update existed group - self.resource_consumptions.insert(name, group); + self.resource_consumptions.write().insert(name, group); } // we calculate the weight of each resource group based on the currently maximum @@ -205,9 +207,12 @@ impl ResourceController { // often, and iterate 10k entry cost less than 5ms, so the performance is // acceptable. fn adjust_all_resource_group_factors(&self, max_ru_quota: u64) { - self.resource_consumptions.iter_mut().for_each(|mut g| { - g.value_mut().weight = Self::calculate_factor(max_ru_quota, g.ru_quota); - }); + self.resource_consumptions + .write() + .iter_mut() + .for_each(|(_, tracker)| { + tracker.weight = Self::calculate_factor(max_ru_quota, tracker.ru_quota); + }); } fn remove_resource_group(&self, name: &[u8]) { @@ -216,18 +221,19 @@ impl ResourceController { self.add_resource_group(DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0); return; } - self.resource_consumptions.remove(name); + self.resource_consumptions.write().remove(name); } #[inline] - fn resource_group(&self, name: &[u8]) -> Ref<'_, Vec, GroupPriorityTracker> { - if let Some(g) = self.resource_consumptions.get(name) { - g - } else { - self.resource_consumptions - .get(DEFAULT_RESOURCE_GROUP_NAME.as_bytes()) - .unwrap() - } + fn resource_group(&self, name: &[u8]) -> MappedRwLockReadGuard<'_, GroupPriorityTracker> { + let guard = self.resource_consumptions.read(); + RwLockReadGuard::map(guard, |m| { + if let Some(g) = m.get(name) { + g + } else { + m.get(DEFAULT_RESOURCE_GROUP_NAME.as_bytes()).unwrap() + } + }) } pub fn consume(&self, name: &[u8], delta: ResourceConsumeType) { @@ -237,15 +243,18 @@ impl ResourceController { pub fn update_min_virtual_time(&self) { let mut min_vt = u64::MAX; let mut max_vt = 0; - self.resource_consumptions.iter().for_each(|g| { - let vt = g.current_vt(); - if min_vt > vt { - min_vt = vt; - } - if max_vt < vt { - max_vt = vt; - } - }); + self.resource_consumptions + .read() + .iter() + .for_each(|(_, tracker)| { + let vt = tracker.current_vt(); + if min_vt > vt { + min_vt = vt; + } + if max_vt < vt { + max_vt = vt; + } + }); // TODO: use different threshold for different resource type // needn't do update if the virtual different is less than 100ms/100KB. @@ -253,13 +262,16 @@ impl ResourceController { return; } - self.resource_consumptions.iter().for_each(|g| { - let vt = g.current_vt(); - if vt < max_vt { - // TODO: is increase by half is a good choice. - g.increase_vt((max_vt - vt) / 2); - } - }); + self.resource_consumptions + .read() + .iter() + .for_each(|(_, tracker)| { + let vt = tracker.current_vt(); + if vt < max_vt { + // TODO: is increase by half is a good choice. + tracker.increase_vt((max_vt - vt) / 2); + } + }); // max_vt is actually a little bigger than the current min vt, but we don't // need totally accurate here. self.last_min_vt.store(max_vt, Ordering::Relaxed); @@ -414,7 +426,7 @@ pub(crate) mod tests { assert_eq!(resource_manager.resource_groups.len(), 2); let resource_ctl = resource_manager.derive_controller("test_read".into(), true); - assert_eq!(resource_ctl.resource_consumptions.len(), 3); + assert_eq!(resource_ctl.resource_consumptions.read().len(), 3); let group1 = resource_ctl.resource_group("test".as_bytes()); assert_eq!(group1.weight, 500); @@ -473,7 +485,7 @@ pub(crate) mod tests { let new_group = new_resource_group_ru("new_group".into(), 500); resource_manager.add_resource_group(new_group); - assert_eq!(resource_ctl.resource_consumptions.len(), 4); + assert_eq!(resource_ctl.resource_consumptions.read().len(), 4); let group3 = resource_ctl.resource_group("new_group".as_bytes()); assert_eq!(group3.weight, 200); assert!(group3.current_vt() >= group1_vt / 2); @@ -524,22 +536,34 @@ pub(crate) mod tests { let group1 = new_resource_group_ru(format!("group{}", i), 100); resource_manager.add_resource_group(group1); } + // consume for default group + resource_ctl.consume( + b"default", + ResourceConsumeType::CpuTime(Duration::from_micros(10000)), + ); + resource_ctl_write.consume(b"default", ResourceConsumeType::IoBytes(10000)); + assert_eq!(resource_manager.get_all_resource_groups().len(), 10); - assert_eq!(resource_ctl.resource_consumptions.len(), 11); // 10 + 1(default) - assert_eq!(resource_ctl_write.resource_consumptions.len(), 11); + assert_eq!(resource_ctl.resource_consumptions.read().len(), 11); // 10 + 1(default) + assert_eq!(resource_ctl_write.resource_consumptions.read().len(), 11); resource_manager.retain(|k, _v| k.starts_with("test")); assert_eq!(resource_manager.get_all_resource_groups().len(), 5); - assert_eq!(resource_ctl.resource_consumptions.len(), 6); - assert_eq!(resource_ctl_write.resource_consumptions.len(), 6); + assert_eq!(resource_ctl.resource_consumptions.read().len(), 6); + assert_eq!(resource_ctl_write.resource_consumptions.read().len(), 6); assert!(resource_manager.get_resource_group("group1").is_none()); - assert_eq!( - resource_ctl.resource_group("group2".as_bytes()).key(), - "default".as_bytes() + // should use the virtual time of default group for non-exist group + assert_ne!( + resource_ctl + .resource_group("group2".as_bytes()) + .current_vt(), + 0 ); - assert_eq!( - resource_ctl_write.resource_group("group2".as_bytes()).key(), - "default".as_bytes() + assert_ne!( + resource_ctl_write + .resource_group("group2".as_bytes()) + .current_vt(), + 0 ); } } From 6f85355d0b115d0cf59c7a37f587c96ad0cfa232 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 14 Mar 2023 17:34:39 +0800 Subject: [PATCH 0583/1149] txn: add more logs for panic (#14394) ref tikv/tikv#14390 Signed-off-by: Jay Lee --- components/tikv_kv/src/lib.rs | 4 ++++ components/tikv_kv/src/raftstore_impls.rs | 6 ++++++ src/storage/mvcc/reader/mod.rs | 8 ++++++-- src/storage/mvcc/reader/reader.rs | 8 ++++++-- src/storage/txn/actions/check_txn_status.rs | 14 ++++++++++++-- src/storage/txn/commands/check_secondary_locks.rs | 4 +++- 6 files changed, 37 insertions(+), 7 deletions(-) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 22b11e425c5..c5313620995 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -514,6 +514,10 @@ pub trait SnapshotExt { None } + fn get_region_id(&self) -> Option { + None + } + fn get_txn_extra_op(&self) -> TxnExtraOp { TxnExtraOp::Noop } diff --git a/components/tikv_kv/src/raftstore_impls.rs b/components/tikv_kv/src/raftstore_impls.rs index c1384bdcd45..e89087e565f 100644 --- a/components/tikv_kv/src/raftstore_impls.rs +++ b/components/tikv_kv/src/raftstore_impls.rs @@ -40,10 +40,16 @@ impl<'a, S: Snapshot> SnapshotExt for RegionSnapshotExt<'a, S> { .unwrap_or(false) } + #[inline] fn get_term(&self) -> Option { self.snapshot.term } + #[inline] + fn get_region_id(&self) -> Option { + Some(self.snapshot.get_region().id) + } + fn get_txn_extra_op(&self) -> TxnExtraOp { self.snapshot.txn_extra_op } diff --git a/src/storage/mvcc/reader/mod.rs b/src/storage/mvcc/reader/mod.rs index 2e7d20ccf2b..949d8094e72 100644 --- a/src/storage/mvcc/reader/mod.rs +++ b/src/storage/mvcc/reader/mod.rs @@ -83,10 +83,14 @@ impl TxnCommitRecord { } } - pub fn unwrap_none(self) -> Option { + #[inline] + pub fn unwrap_none(self, region_id: u64) -> Option { match self { Self::None { overlapped_write } => overlapped_write, - _ => panic!("txn record found but not expected: {:?}", self), + _ => panic!( + "txn record found but not expected: {:?} [region_id={}]", + self, region_id + ), } } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index d8f31ba77a8..36e8816ad25 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -755,6 +755,10 @@ impl MvccReader { pub fn set_hint_min_ts(&mut self, ts_bound: Option>) { self.hint_min_ts = ts_bound; } + + pub fn snapshot_ext(&self) -> S::Ext<'_> { + self.snapshot.ext() + } } #[cfg(test)] @@ -1222,7 +1226,7 @@ pub mod tests { let overlapped_write = reader .get_txn_commit_record(&key, 55.into()) .unwrap() - .unwrap_none(); + .unwrap_none(0); assert!(overlapped_write.is_none()); // When no such record is found but a record of another txn has a write record @@ -1230,7 +1234,7 @@ pub mod tests { let overlapped_write = reader .get_txn_commit_record(&key, 50.into()) .unwrap() - .unwrap_none() + .unwrap_none(0) .unwrap(); assert_eq!(overlapped_write.write.start_ts, 45.into()); assert_eq!(overlapped_write.write.write_type, WriteType::Put); diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index 88982d6da72..a3cd3253201 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -1,5 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use tikv_kv::SnapshotExt; // #[PerformanceCriticalPath] use txn_types::{Key, Lock, TimeStamp, Write, WriteType}; @@ -154,8 +155,17 @@ pub fn rollback_lock( ) -> Result> { let overlapped_write = match reader.get_txn_commit_record(&key)? { TxnCommitRecord::None { overlapped_write } => overlapped_write, - TxnCommitRecord::SingleRecord { write, .. } if write.write_type != WriteType::Rollback => { - panic!("txn record found but not expected: {:?}", txn) + TxnCommitRecord::SingleRecord { write, commit_ts } + if write.write_type != WriteType::Rollback => + { + panic!( + "txn record found but not expected: {:?} {} {:?} {:?} [region_id={}]", + write, + commit_ts, + txn, + lock, + reader.reader.snapshot_ext().get_region_id().unwrap_or(0) + ) } _ => return Ok(txn.unlock_key(key, is_pessimistic_txn, TimeStamp::zero())), }; diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index a19a5d82bb6..d21d47871d4 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -58,6 +58,7 @@ impl WriteCommand for CheckSecondaryLocks { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { // It is not allowed for commit to overwrite a protected rollback. So we update // max_ts to prevent this case from happening. + let region_id = self.ctx.get_region_id(); context.concurrency_manager.update_max_ts(self.start_ts); let mut txn = MvccTxn::new(self.start_ts, context.concurrency_manager); @@ -77,7 +78,8 @@ impl WriteCommand for CheckSecondaryLocks { Some(lock) if lock.ts == self.start_ts => { if lock.lock_type == LockType::Pessimistic { released_lock = txn.unlock_key(key.clone(), true, TimeStamp::zero()); - let overlapped_write = reader.get_txn_commit_record(&key)?.unwrap_none(); + let overlapped_write = + reader.get_txn_commit_record(&key)?.unwrap_none(region_id); (SecondaryLockStatus::RolledBack, true, overlapped_write) } else { (SecondaryLockStatus::Locked(lock), false, None) From c3e1cfb04046fe8ee1bc4a7ce453f273490e697a Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 14 Mar 2023 18:18:39 +0800 Subject: [PATCH 0584/1149] storage: Fix flow controller pending compaction bytes always be zero (#14393) close tikv/tikv#14392 Fix the issue that flow control may not work when pending compaction bytes is high. If the pending compaction bytes is 0, then 0.log2() is -INF which would cause the later average always be zero even if the pending compaction bytes is already high. Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- .../singleton_flow_controller.rs | 121 ++++++++---------- 1 file changed, 55 insertions(+), 66 deletions(-) diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index f51249facfc..abf0689f1fc 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -728,7 +728,11 @@ impl FlowChecker { // Because pending compaction bytes changes dramatically, take the // logarithm of pending compaction bytes to make the values fall into // a relative small range - let num = (self.engine.pending_compaction_bytes(self.region_id, &cf) as f64).log2(); + let mut num = (self.engine.pending_compaction_bytes(self.region_id, &cf) as f64).log2(); + if !num.is_finite() { + // 0.log2() == -inf, which is not expected and may lead to sum always be NaN + num = 0.0; + } let checker = self.cf_checkers.get_mut(&cf).unwrap(); checker.long_term_pending_bytes.observe(num); SCHED_PENDING_COMPACTION_BYTES_GAUGE @@ -1078,6 +1082,15 @@ pub(super) mod tests { } } + fn send_flow_info(tx: &mpsc::SyncSender, region_id: u64) { + tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) + .unwrap(); + tx.send(FlowInfo::Compaction("default".to_string(), region_id)) + .unwrap(); + tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) + .unwrap(); + } + pub fn test_flow_controller_basic_impl(flow_controller: &FlowController, region_id: u64) { // enable flow controller assert_eq!(flow_controller.enabled(), true); @@ -1130,48 +1143,34 @@ pub(super) mod tests { // exceeds the threshold on start stub.0.num_memtables.store(8, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert_eq!(flow_controller.should_drop(region_id), false); // on start check forbids flow control assert_eq!(flow_controller.is_unlimited(region_id), true); // once falls below the threshold, pass the on start check stub.0.num_memtables.store(1, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); // not throttle when the average of the sliding window doesn't exceeds the // threshold stub.0.num_memtables.store(6, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), true); // the average of sliding window exceeds the threshold stub.0.num_memtables.store(6, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), false); assert_ne!(flow_controller.consume(region_id, 2000), Duration::ZERO); // not throttle once the number of memtables falls below the threshold stub.0.num_memtables.store(1, Ordering::Relaxed); - tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), true); } + #[test] fn test_flow_controller_memtable() { let stub = EngineStub::new(); @@ -1198,26 +1197,17 @@ pub(super) mod tests { // exceeds the threshold stub.0.num_l0_files.store(30, Ordering::Relaxed); - tx.send(FlowInfo::L0("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert_eq!(flow_controller.should_drop(region_id), false); // on start check forbids flow control assert_eq!(flow_controller.is_unlimited(region_id), true); // once fall below the threshold, pass the on start check stub.0.num_l0_files.store(10, Ordering::Relaxed); - tx.send(FlowInfo::L0("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); // exceeds the threshold, throttle now stub.0.num_l0_files.store(30, Ordering::Relaxed); - tx.send(FlowInfo::L0("default".to_string(), 0, region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert_eq!(flow_controller.should_drop(region_id), false); assert_eq!(flow_controller.is_unlimited(region_id), false); assert_ne!(flow_controller.consume(region_id, 2000), Duration::ZERO); @@ -1243,41 +1233,25 @@ pub(super) mod tests { stub.0 .pending_compaction_bytes .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); // on start check forbids flow control - assert!( - flow_controller.discard_ratio(region_id) < f64::EPSILON, - "discard_ratio {}", - flow_controller.discard_ratio(region_id) - ); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); // once fall below the threshold, pass the on start check stub.0 .pending_compaction_bytes .store(100 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); stub.0 .pending_compaction_bytes .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); stub.0 .pending_compaction_bytes .store(1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); // pending compaction bytes jump after unsafe destroy range @@ -1291,10 +1265,7 @@ pub(super) mod tests { stub.0 .pending_compaction_bytes .store(1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); stub.0 @@ -1316,19 +1287,13 @@ pub(super) mod tests { stub.0 .pending_compaction_bytes .store(1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); stub.0 .pending_compaction_bytes .store(1000000000 * 1024 * 1024 * 1024, Ordering::Relaxed); - tx.send(FlowInfo::Compaction("default".to_string(), region_id)) - .unwrap(); - tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) - .unwrap(); + send_flow_info(tx, region_id); assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); } @@ -1342,6 +1307,30 @@ pub(super) mod tests { test_flow_controller_pending_compaction_bytes_impl(&flow_controller, &stub, &tx, 0); } + #[test] + fn test_flow_controller_pending_compaction_bytes_of_zero() { + let region_id = 0; + let stub = EngineStub::new(); + let (tx, rx) = mpsc::sync_channel(0); + let flow_controller = + EngineFlowController::new(&FlowControlConfig::default(), stub.clone(), rx); + let flow_controller = FlowController::Singleton(flow_controller); + + // should handle zero pending compaction bytes properly + stub.0.pending_compaction_bytes.store(0, Ordering::Relaxed); + send_flow_info(&tx, region_id); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); + stub.0 + .pending_compaction_bytes + .store(10000000000 * 1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(&tx, region_id); + stub.0 + .pending_compaction_bytes + .store(10000000000 * 1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(&tx, region_id); + assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); + } + #[test] fn test_smoother() { let mut smoother = Smoother::::default(); From bff6695aef5200da447eab96838a199e63bed50d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:46:39 +0800 Subject: [PATCH 0585/1149] log-backup: Fix sub tracking (#14185) close tikv/tikv#14184 Signed-off-by: hillium --- components/backup-stream/src/endpoint.rs | 4 +- components/backup-stream/src/event_loader.rs | 2 +- components/backup-stream/src/lib.rs | 4 +- components/backup-stream/src/router.rs | 2 +- .../backup-stream/src/subscription_manager.rs | 8 +- .../backup-stream/src/subscription_track.rs | 263 +++++++++++++----- components/backup-stream/src/utils.rs | 2 +- components/backup-stream/tests/mod.rs | 34 ++- 8 files changed, 246 insertions(+), 73 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 49ca811285b..d8c0e09744f 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -56,7 +56,7 @@ use crate::{ observer::BackupStreamObserver, router::{ApplyEvents, Router, TaskSelector}, subscription_manager::{RegionSubscriptionManager, ResolvedRegions}, - subscription_track::{ResolveResult, SubscriptionTracer}, + subscription_track::{Ref, RefMut, ResolveResult, SubscriptionTracer}, try_send, utils::{self, CallbackWaitGroup, StopWatch, Work}, }; @@ -477,7 +477,7 @@ where } fn backup_batch(&self, batch: CmdBatch, work: Work) { - let mut sw = StopWatch::new(); + let mut sw = StopWatch::by_now(); let router = self.range_router.clone(); let sched = self.scheduler.clone(); diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 13c958a499a..8b808a16cca 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -37,7 +37,7 @@ use crate::{ errors::{ContextualResultExt, Error, Result}, metrics, router::{ApplyEvent, ApplyEvents, Router}, - subscription_track::{SubscriptionTracer, TwoPhaseResolver}, + subscription_track::{Ref, RefMut, SubscriptionTracer, TwoPhaseResolver}, try_send, utils::{self, RegionPager}, Task, diff --git a/components/backup-stream/src/lib.rs b/components/backup-stream/src/lib.rs index 34dbfa33e4c..a36b42c227d 100644 --- a/components/backup-stream/src/lib.rs +++ b/components/backup-stream/src/lib.rs @@ -16,7 +16,9 @@ pub mod router; mod service; mod subscription_manager; mod subscription_track; -mod utils; +// Publish it for integration test. +// Perhaps we'd better move some of then into `tikv_util`. +pub mod utils; pub use checkpoint_manager::GetCheckpointResult; pub use endpoint::{Endpoint, ObserveOp, RegionCheckpointOperation, RegionSet, Task}; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index ead124c103a..5b862f732a2 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -1174,7 +1174,7 @@ impl StreamTaskInfo { return Ok(None); } let begin = Instant::now_coarse(); - let mut sw = StopWatch::new(); + let mut sw = StopWatch::by_now(); // generate meta data and prepare to flush to storage let mut metadata_info = self diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 4f75423a241..6e72d66a98b 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -478,7 +478,6 @@ where // safely. let rts = min_region.map(|rs| rs.checkpoint).unwrap_or(min_ts); info!("getting checkpoint"; "defined_by_region" => ?min_region); - self.subs.warn_if_gap_too_huge(rts); callback(ResolvedRegions::new(rts, cps)); } } @@ -497,6 +496,7 @@ where .with_label_values(&["region-changed"]) .inc(); let r = async { + self.subs.add_pending_region(region); self.observe_over_with_initial_data_from_checkpoint( region, self.get_last_checkpoint_of(&for_task, region).await?, @@ -518,7 +518,7 @@ where } else { warn!( "BUG: the region {:?} is register to no task but being observed", - ®ion + utils::debug_region(region) ); } } @@ -538,6 +538,9 @@ where } Some(for_task) => { + // the extra failpoint is used to pause the thread. + // once it triggered "pause" it cannot trigger early return then. + fail::fail_point!("try_start_observe0"); fail::fail_point!("try_start_observe", |_| { Err(Error::Other(box_err!("Nature is boring"))) }); @@ -550,6 +553,7 @@ where async fn start_observe(&self, region: Region) { let handle = ObserveHandle::new(); + self.subs.add_pending_region(®ion); if let Err(err) = self.try_start_observe(®ion, handle.clone()).await { warn!("failed to start observe, retrying"; "err" => %err); try_send!( diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 1f823130d3b..c13339d1c29 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -1,9 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashSet, sync::Arc, time::Duration}; +use std::{collections::HashSet, sync::Arc}; use dashmap::{ - mapref::{entry::Entry, one::RefMut}, + mapref::{entry::Entry, one::RefMut as DashRefMut}, DashMap, }; use kvproto::metapb::Region; @@ -16,15 +16,50 @@ use crate::{debug, metrics::TRACK_REGION, utils}; /// A utility to tracing the regions being subscripted. #[derive(Clone, Default, Debug)] -pub struct SubscriptionTracer(Arc>); +pub struct SubscriptionTracer(Arc>); + +/// The state of the subscription state machine: +/// Initial state is `ABSENT`, the subscription isn't in the tracer. +/// Once it becomes the leader, it would be in `PENDING` state, where we would +/// prepare the information needed for doing initial scanning. +/// When we are able to start execute initial scanning, it would be in `RUNNING` +/// state, where it starts to handle events. +/// You may notice there are also some state transforms in the +/// [`TwoPhaseResolver`] struct, states there are sub-states of the `RUNNING` +/// stage here. +enum SubscribeState { + // NOTE: shall we add `SubscriptionHandle` here? + // (So we can check this when calling `remove_if`.) + Pending(Region), + Running(ActiveSubscription), +} + +impl SubscribeState { + /// check whether the current state is pending. + fn is_pending(&self) -> bool { + matches!(self, SubscribeState::Pending(_)) + } +} -pub struct RegionSubscription { +impl std::fmt::Debug for SubscribeState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Pending(arg0) => f + .debug_tuple("Pending") + .field(&utils::debug_region(arg0)) + .finish(), + Self::Running(arg0) => f.debug_tuple("Running").field(arg0).finish(), + } + } +} + +pub struct ActiveSubscription { pub meta: Region, pub(crate) handle: ObserveHandle, pub(crate) resolver: TwoPhaseResolver, } -impl std::fmt::Debug for RegionSubscription { +impl std::fmt::Debug for ActiveSubscription { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_tuple("RegionSubscription") .field(&self.meta.get_id()) @@ -33,7 +68,7 @@ impl std::fmt::Debug for RegionSubscription { } } -impl RegionSubscription { +impl ActiveSubscription { pub fn new(region: Region, handle: ObserveHandle, start_ts: Option) -> Self { let resolver = TwoPhaseResolver::new(region.get_id(), start_ts); Self { @@ -100,7 +135,7 @@ impl std::fmt::Debug for ResolveResult { } impl ResolveResult { - fn resolve(sub: &mut RegionSubscription, min_ts: TimeStamp) -> Self { + fn resolve(sub: &mut ActiveSubscription, min_ts: TimeStamp) -> Self { let ts = sub.resolver.resolve(min_ts); let ty = if ts == min_ts { CheckpointType::MinTs @@ -121,12 +156,46 @@ impl SubscriptionTracer { /// clear the current `SubscriptionTracer`. pub fn clear(&self) { self.0.retain(|_, v| { - v.stop(); - TRACK_REGION.dec(); + if let SubscribeState::Running(s) = v { + s.stop(); + TRACK_REGION.dec(); + } false }); } + /// Add a pending region into the tracker. + /// A `PENDING` region is a region we are going to start subscribe however + /// there are still tiny impure things need to do. (e.g. getting the + /// checkpoint of this region.) + /// + /// This state is a placeholder for those regions: once they failed in the + /// impure operations, this would be the evidence proofing they were here. + /// + /// So we can do better when we are doing refreshing, say: + /// ```no_run + /// match task { + /// Task::RefreshObserve(r) if is_pending(r) => { /* Execute the refresh. */ } + /// Task::RefreshObserve(r) if is_absent(r) => { /* Do nothing. Maybe stale. */ } + /// } + /// ``` + /// + /// We should execute the refresh when it is pending, because the start may + /// fail and then a refresh fires. + /// We should skip when we are going to refresh absent regions because there + /// may be some stale commands. + pub fn add_pending_region(&self, region: &Region) { + let r = self + .0 + .insert(region.get_id(), SubscribeState::Pending(region.clone())); + if let Some(s) = r { + warn!( + "excepted state transform: running | pending -> pending"; + "old" => ?s, utils::slog_region(region), + ) + } + } + // Register a region as tracing. // The `start_ts` is used to tracking the progress of initial scanning. // Note: the `None` case of `start_ts` is for testing / refresh region status @@ -138,14 +207,25 @@ impl SubscriptionTracer { handle: ObserveHandle, start_ts: Option, ) { - info!("start listen stream from store"; "observer" => ?handle, "region_id" => %region.get_id()); + info!("start listen stream from store"; "observer" => ?handle); TRACK_REGION.inc(); - if let Some(mut o) = self.0.insert( - region.get_id(), - RegionSubscription::new(region.clone(), handle, start_ts), - ) { - TRACK_REGION.dec(); - o.stop(); + let e = self.0.entry(region.id); + match e { + Entry::Occupied(o) => { + let sub = ActiveSubscription::new(region.clone(), handle, start_ts); + let (_, s) = o.replace_entry(SubscribeState::Running(sub)); + if !s.is_pending() { + // If there is another subscription already (perhaps repeated Start), + // don't add the counter. + warn!("excepted state transform: running -> running"; "old" => ?s, utils::slog_region(region)); + TRACK_REGION.dec(); + } + } + Entry::Vacant(e) => { + warn!("excepted state transform: absent -> running"; utils::slog_region(region)); + let sub = ActiveSubscription::new(region.clone(), handle, start_ts); + e.insert(SubscribeState::Running(sub)); + } } } @@ -163,59 +243,54 @@ impl SubscriptionTracer { let rs = regions.into_iter().collect::>(); self.0 .iter_mut() - .filter(|s| { - let contains = rs.contains(s.key()); - if !contains { - crate::metrics::LOST_LEADER_REGION.inc(); + // Don't advance the checkpoint ts of pending region. + .filter_map(|mut s| { + let region_id = *s.key(); + match s.value_mut() { + SubscribeState::Running(sub) => { + let contains = rs.contains(®ion_id); + if !contains { + crate::metrics::LOST_LEADER_REGION.inc(); + } + contains.then(|| ResolveResult::resolve(sub, min_ts)) } - contains + SubscribeState::Pending(r) => {warn!("pending region, skip resolving"; utils::slog_region(r)); None}, + } }) - .map(|mut s| ResolveResult::resolve(s.value_mut(), min_ts)) .collect() } - #[inline(always)] - pub fn warn_if_gap_too_huge(&self, ts: TimeStamp) { - let gap = TimeStamp::physical_now() - ts.physical(); - if gap >= 10 * 60 * 1000 - // 10 mins - { - let far_resolver = self - .0 - .iter() - .min_by_key(|r| r.value().resolver.resolved_ts()); - warn!("log backup resolver ts advancing too slow"; - "far_resolver" => %{match far_resolver { - Some(r) => format!("{:?}", r.value().resolver), - None => "BUG[NoResolverButResolvedTSDoesNotAdvance]".to_owned() - }}, - "gap" => ?Duration::from_millis(gap), - ); - } - } - /// try to mark a region no longer be tracked by this observer. /// returns whether success (it failed if the region hasn't been observed /// when calling this.) pub fn deregister_region_if( &self, region: &Region, - if_cond: impl FnOnce(&RegionSubscription, &Region) -> bool, + if_cond: impl FnOnce(&ActiveSubscription, &Region) -> bool, ) -> bool { let region_id = region.get_id(); let remove_result = self.0.entry(region_id); match remove_result { - Entry::Occupied(mut x) => { - if if_cond(x.get(), region) { - TRACK_REGION.dec(); - x.get_mut().stop(); - let v = x.remove(); - info!("stop listen stream from store"; "observer" => ?v, "region_id"=> %region_id); - return true; - } - false - } Entry::Vacant(_) => false, + Entry::Occupied(mut o) => match o.get_mut() { + SubscribeState::Pending(r) => { + info!("remove pending subscription"; "region_id"=> %region_id, utils::slog_region(r)); + + o.remove(); + true + } + SubscribeState::Running(s) => { + if if_cond(s, region) { + TRACK_REGION.dec(); + s.stop(); + info!("stop listen stream from store"; "observer" => ?s, "region_id"=> %region_id); + + o.remove(); + return true; + } + false + } + }, } } @@ -229,8 +304,8 @@ impl SubscriptionTracer { let mut sub = match self.get_subscription_of(new_region.get_id()) { Some(sub) => sub, None => { - warn!("backup stream observer refreshing void subscription."; utils::slog_region(new_region)); - return true; + warn!("backup stream observer refreshing pending / absent subscription."; utils::slog_region(new_region)); + return false; } }; @@ -250,11 +325,10 @@ impl SubscriptionTracer { pub fn is_observing(&self, region_id: u64) -> bool { let sub = self.0.get_mut(®ion_id); match sub { - Some(mut sub) if !sub.is_observing() => { - sub.value_mut().stop(); - false - } - Some(_) => true, + Some(mut s) => match s.value_mut() { + SubscribeState::Pending(_) => false, + SubscribeState::Running(s) => s.is_observing(), + }, None => false, } } @@ -262,8 +336,68 @@ impl SubscriptionTracer { pub fn get_subscription_of( &self, region_id: u64, - ) -> Option> { - self.0.get_mut(®ion_id) + ) -> Option + '_> { + self.0 + .get_mut(®ion_id) + .and_then(|x| SubscriptionRef::try_from_dash(x)) + } +} + +pub trait Ref { + type Key; + type Value; + + fn key(&self) -> &Self::Key; + fn value(&self) -> &Self::Value; +} + +pub trait RefMut: Ref { + fn value_mut(&mut self) -> &mut ::Value; +} + +impl<'a> Ref for SubscriptionRef<'a> { + type Key = u64; + type Value = ActiveSubscription; + + fn key(&self) -> &Self::Key { + DashRefMut::key(&self.0) + } + + fn value(&self) -> &Self::Value { + self.sub() + } +} + +impl<'a> RefMut for SubscriptionRef<'a> { + fn value_mut(&mut self) -> &mut ::Value { + self.sub_mut() + } +} + +struct SubscriptionRef<'a>(DashRefMut<'a, u64, SubscribeState>); + +impl<'a> SubscriptionRef<'a> { + fn try_from_dash(mut d: DashRefMut<'a, u64, SubscribeState>) -> Option { + match d.value_mut() { + SubscribeState::Pending(_) => None, + SubscribeState::Running(_) => Some(Self(d)), + } + } + + fn sub(&self) -> &ActiveSubscription { + match self.0.value() { + // Panic Safety: the constructor would prevent us from creating pending subscription + // ref. + SubscribeState::Pending(_) => unreachable!(), + SubscribeState::Running(s) => s, + } + } + + fn sub_mut(&mut self) -> &mut ActiveSubscription { + match self.0.value_mut() { + SubscribeState::Pending(_) => unreachable!(), + SubscribeState::Running(s) => s, + } } } @@ -434,6 +568,7 @@ mod test { use txn_types::TimeStamp; use super::{SubscriptionTracer, TwoPhaseResolver}; + use crate::subscription_track::RefMut; #[test] fn test_two_phase_resolver() { @@ -498,6 +633,7 @@ mod test { ); subs.get_subscription_of(3) .unwrap() + .value_mut() .resolver .phase_one_done(); subs.register_region( @@ -506,8 +642,9 @@ mod test { Some(TimeStamp::new(92)), ); let mut region4_sub = subs.get_subscription_of(4).unwrap(); - region4_sub.resolver.phase_one_done(); + region4_sub.value_mut().resolver.phase_one_done(); region4_sub + .value_mut() .resolver .track_lock(TimeStamp::new(128), b"Alpi".to_vec()); subs.register_region(®ion(5, 8, 1), ObserveHandle::new(), None); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index a5d83e50328..77c689da70d 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -144,7 +144,7 @@ pub struct StopWatch(Instant); impl StopWatch { /// Create a new stopwatch via current time. - pub fn new() -> Self { + pub fn by_now() -> Self { Self(Instant::now_coarse()) } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index d6dfb2b2839..db4f84924b0 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -19,7 +19,7 @@ use backup_stream::{ }, observer::BackupStreamObserver, router::Router, - Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, + utils, Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, }; use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt}; use grpcio::{ChannelBuilder, Server, ServerBuilder}; @@ -403,7 +403,7 @@ impl Suite { rx.into_iter() .map(|r| match r { GetCheckpointResult::Ok { checkpoint, region } => { - info!("getting checkpoint"; "checkpoint" => %checkpoint, "region" => ?region); + info!("getting checkpoint"; "checkpoint" => %checkpoint, utils::slog_region(®ion)); checkpoint.into_inner() } GetCheckpointResult::NotFound { .. } @@ -1320,6 +1320,36 @@ mod test { )); } + #[test] + fn failure_and_split() { + let mut suite = super::SuiteBuilder::new_named("failure_and_split") + .nodes(1) + .build(); + fail::cfg("try_start_observe0", "pause").unwrap(); + + // write data before the task starting, for testing incremental scanning. + let round1 = run_async_test(suite.write_records(0, 128, 1)); + suite.must_register_task(1, "failure_and_split"); + suite.sync(); + + suite.must_split(&make_split_key_at_record(1, 42)); + suite.sync(); + std::thread::sleep(Duration::from_millis(200)); + fail::cfg("try_start_observe", "2*return").unwrap(); + fail::cfg("try_start_observe0", "off").unwrap(); + + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("failure_and_split"); + suite.wait_for_flush(); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + )); + let cp = suite.global_checkpoint(); + assert!(cp > 512, "it is {}", cp); + suite.cluster.shutdown(); + } + #[test] fn resolved_follower() { let mut suite = super::SuiteBuilder::new_named("r").build(); From 09cd29f3f2eafbd835896db9e1f1e4a2e03123aa Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 16 Mar 2023 15:04:40 +0800 Subject: [PATCH 0586/1149] raftstore-v2: add some logs and fix possible race between clean and tick (#14399) ref tikv/tikv#14386 None Signed-off-by: tabokie Co-authored-by: Ti Chi Robot --- .../operation/command/admin/compact_log.rs | 17 +++++++++++++--- .../raftstore-v2/src/operation/ready/mod.rs | 20 +++++++++++-------- .../raftstore/src/store/async_io/write.rs | 3 ++- .../raftstore/src/store/entry_storage.rs | 7 ++++++- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 1ce118a957f..ed4d22a59b4 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -289,7 +289,8 @@ impl Peer { old_tablet: EK, new_tablet_index: u64, ) { - info!(self.logger, + info!( + self.logger, "record tombstone tablet"; "prev_tablet_path" => old_tablet.path(), "new_tablet_index" => new_tablet_index @@ -490,11 +491,21 @@ impl Peer { // There is no logs at RAFT_INIT_LOG_INDEX, nothing to delete. return None; } + assert!( + compact_index <= self.raft_group().raft.raft_log.committed, + "{}: compact_index={}, committed={}", + SlogFormat(&self.logger), + compact_index, + self.raft_group().raft.raft_log.committed, + ); // TODO: make this debug when stable. - info!(self.logger, "compact log"; + info!( + self.logger, + "compact log"; "index" => compact_index, "apply_trace" => ?self.storage().apply_trace(), - "truncated" => ?self.entry_storage().apply_state()); + "truncated" => ?self.entry_storage().apply_state() + ); Some(compact_index) } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 4c0bf9cbe88..3755d92b587 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -154,7 +154,7 @@ impl Peer { // When it's handling snapshot, it's pointless to tick as all the side // affects have to wait till snapshot is applied. On the other hand, ticking // will bring other corner cases like elections. - !self.is_handling_snapshot() && self.raft_group_mut().tick() + !self.is_handling_snapshot() && self.serving() && self.raft_group_mut().tick() } pub fn on_peer_unreachable(&mut self, to_peer_id: u64) { @@ -932,7 +932,7 @@ impl Storage { write_task: &mut WriteTask, ) { let prev_raft_state = self.entry_storage().raft_state().clone(); - let ever_persisted = self.ever_persisted(); + let prev_ever_persisted = self.ever_persisted(); if !ready.snapshot().is_empty() { if let Err(e) = self.apply_snapshot( @@ -946,20 +946,24 @@ impl Storage { } } - let entry_storage = self.entry_storage_mut(); if !ready.entries().is_empty() { - entry_storage.append(ready.take_entries(), write_task); + assert!(self.ever_persisted(), "{}", SlogFormat(self.logger())); + self.entry_storage_mut() + .append(ready.take_entries(), write_task); } if let Some(hs) = ready.hs() { - entry_storage.raft_state_mut().set_hard_state(hs.clone()); + self.entry_storage_mut() + .raft_state_mut() + .set_hard_state(hs.clone()); } - if !ever_persisted || prev_raft_state != *entry_storage.raft_state() { + let entry_storage = self.entry_storage(); + if !prev_ever_persisted || prev_raft_state != *entry_storage.raft_state() { write_task.raft_state = Some(entry_storage.raft_state().clone()); } - // If snapshot initializes the peer, we don't need to write apply trace again. + // If snapshot initializes the peer (in `apply_snapshot`), we don't need to + // write apply trace again. if !self.ever_persisted() { let region_id = self.region().get_id(); - let entry_storage = self.entry_storage(); let raft_engine = entry_storage.raft_engine(); if write_task.raft_wb.is_none() { write_task.raft_wb = Some(raft_engine.log_batch(64)); diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index d20b9d0bec0..b58d2601d95 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -445,7 +445,8 @@ where .unwrap(); if let Some(raft_state) = task.raft_state.take() - && self.raft_states.insert(task.region_id, raft_state).is_none() { + && self.raft_states.insert(task.region_id, raft_state).is_none() + { self.state_size += std::mem::size_of::(); } self.extra_batch_write.merge(&mut task.extra_write); diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 1e2e40b2da6..4d3f487a499 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -970,7 +970,12 @@ impl EntryStorage { .raft_engine .get_entry(self.region_id, idx) .unwrap() - .unwrap() + .unwrap_or_else(|| { + panic!( + "region_id={}, peer_id={}, idx={idx}", + self.region_id, self.peer_id + ) + }) .get_term()) } } From 21ef364077bdefbd306a26019879c75a3687a27b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 16 Mar 2023 18:36:40 +0800 Subject: [PATCH 0587/1149] raftstore-v2: filter read index msg when sending from self (#14396) close tikv/tikv#14388 filter read index msg when sending from self Signed-off-by: Spade A --- .../raftstore-v2/src/operation/ready/mod.rs | 21 ++- components/test_raftstore-v2/src/cluster.rs | 136 +++++++++++------- components/test_raftstore-v2/src/lib.rs | 3 + components/test_raftstore-v2/src/node.rs | 20 +-- components/test_raftstore-v2/src/server.rs | 19 ++- .../src/transport_simulate.rs | 36 ++--- components/test_raftstore-v2/src/util.rs | 22 ++- .../raftstore/test_replica_read.rs | 80 ++++++++++- 8 files changed, 236 insertions(+), 101 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3755d92b587..3591a17d989 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -255,11 +255,24 @@ impl Peer { let pre_committed_index = self.raft_group().raft.raft_log.committed; if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) - } else if let Err(e) = self.raft_group_mut().step(msg.take_message()) { - error!(self.logger, "raft step error"; "err" => ?e); } else { - let committed_index = self.raft_group().raft.raft_log.committed; - self.report_commit_log_duration(ctx, pre_committed_index, committed_index); + // This can be a message that sent when it's still a follower. Nevertheleast, + // it's meaningless to continue to handle the request as callbacks are cleared. + if msg.get_message().get_msg_type() == MessageType::MsgReadIndex + && self.is_leader() + && (msg.get_message().get_from() == raft::INVALID_ID + || msg.get_message().get_from() == self.peer_id()) + { + ctx.raft_metrics.message_dropped.stale_msg.inc(); + return; + } + + if let Err(e) = self.raft_group_mut().step(msg.take_message()) { + error!(self.logger, "raft step error"; "err" => ?e); + } else { + let committed_index = self.raft_group().raft.raft_log.committed; + self.report_commit_log_duration(ctx, pre_committed_index, committed_index); + } } self.set_has_ready(); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 164794aca56..44ce6a69358 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -13,11 +13,11 @@ use encryption_export::DataKeyManager; use engine_rocks::{RocksDbVector, RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - Iterable, KvEngine, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, - ReadOptions, SyncMutable, TabletRegistry, CF_DEFAULT, + Iterable, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, ReadOptions, + SyncMutable, TabletRegistry, CF_DEFAULT, }; use file_system::IoRateLimiter; -use futures::{compat::Future01CompatExt, executor::block_on, select, FutureExt}; +use futures::{compat::Future01CompatExt, executor::block_on, select, Future, FutureExt}; use keys::{data_key, validate_data_key, DATA_PREFIX_KEY}; use kvproto::{ errorpb::Error as PbError, @@ -27,7 +27,9 @@ use kvproto::{ AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RegionDetailResponse, Request, Response, StatusCmdType, }, - raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, + raft_serverpb::{ + PeerState, RaftApplyState, RaftLocalState, RaftMessage, RegionLocalState, StoreIdent, + }, }; use pd_client::PdClient; use raftstore::{ @@ -96,71 +98,93 @@ pub trait Simulator { fn get_router(&self, node_id: u64) -> Option>; fn get_snap_dir(&self, node_id: u64) -> String; + fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()>; fn read(&mut self, request: RaftCmdRequest, timeout: Duration) -> Result { + let timeout_f = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + timeout) + .compat(); + futures::executor::block_on(async move { + futures::select! { + res = self.async_read(request).fuse() => res, + e = timeout_f.fuse() => { + Err(Error::Timeout(format!("request timeout for {:?}: {:?}", timeout,e))) + }, + } + }) + } + + fn async_read( + &mut self, + request: RaftCmdRequest, + ) -> impl Future> + Send { let mut req_clone = request.clone(); req_clone.clear_requests(); req_clone.mut_requests().push(new_snap_cmd()); - match self.snapshot(req_clone, timeout) { - Ok(snap) => { - let requests = request.get_requests(); - let mut response = RaftCmdResponse::default(); - let mut responses = Vec::with_capacity(requests.len()); - for req in requests { - let cmd_type = req.get_cmd_type(); - match cmd_type { - CmdType::Get => { - let mut resp = Response::default(); - let key = req.get_get().get_key(); - let cf = req.get_get().get_cf(); - let region = snap.get_region(); - - if let Err(e) = check_key_in_region(key, region) { - return Ok(cmd_resp::new_error(e)); + let snap = self.async_snapshot(req_clone); + async move { + match snap.await { + Ok(snap) => { + let requests = request.get_requests(); + let mut response = RaftCmdResponse::default(); + let mut responses = Vec::with_capacity(requests.len()); + for req in requests { + let cmd_type = req.get_cmd_type(); + match cmd_type { + CmdType::Get => { + let mut resp = Response::default(); + let key = req.get_get().get_key(); + let cf = req.get_get().get_cf(); + let region = snap.get_region(); + + if let Err(e) = check_key_in_region(key, region) { + return Ok(cmd_resp::new_error(e)); + } + + let res = if cf.is_empty() { + snap.get_value(key).unwrap_or_else(|e| { + panic!( + "[region {}] failed to get {} with cf {}: {:?}", + snap.get_region().get_id(), + log_wrappers::Value::key(key), + cf, + e + ) + }) + } else { + snap.get_value_cf(cf, key).unwrap_or_else(|e| { + panic!( + "[region {}] failed to get {}: {:?}", + snap.get_region().get_id(), + log_wrappers::Value::key(key), + e + ) + }) + }; + if let Some(res) = res { + resp.mut_get().set_value(res.to_vec()); + } + resp.set_cmd_type(cmd_type); + responses.push(resp); } - - let res = if cf.is_empty() { - snap.get_value(key).unwrap_or_else(|e| { - panic!( - "[region {}] failed to get {} with cf {}: {:?}", - snap.get_region().get_id(), - log_wrappers::Value::key(key), - cf, - e - ) - }) - } else { - snap.get_value_cf(cf, key).unwrap_or_else(|e| { - panic!( - "[region {}] failed to get {}: {:?}", - snap.get_region().get_id(), - log_wrappers::Value::key(key), - e - ) - }) - }; - if let Some(res) = res { - resp.mut_get().set_value(res.to_vec()); - } - resp.set_cmd_type(cmd_type); - responses.push(resp); + _ => unimplemented!(), } - _ => unimplemented!(), } - } - response.set_responses(responses.into()); + response.set_responses(responses.into()); - Ok(response) + Ok(response) + } + Err(e) => Ok(e), } - Err(e) => Ok(e), } } - fn snapshot( + fn async_snapshot( &mut self, request: RaftCmdRequest, - timeout: Duration, - ) -> std::result::Result::Snapshot>, RaftCmdResponse>; + ) -> impl Future< + Output = std::result::Result, RaftCmdResponse>, + > + Send; fn async_peer_msg_on_node(&self, node_id: u64, region_id: u64, msg: PeerMsg) -> Result<()>; @@ -666,6 +690,10 @@ impl Cluster { } } + pub fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()> { + self.sim.wl().send_raft_msg(msg) + } + pub fn call_command_on_node( &self, node_id: u64, diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index 101658ff57b..ea7e9f6f6e9 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -1,4 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +#![allow(incomplete_features)] +#![feature(type_alias_impl_trait)] +#![feature(return_position_impl_trait_in_trait)] mod cluster; mod node; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 0fde6ba42c5..5617787bb70 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -3,7 +3,6 @@ use std::{ path::Path, sync::{Arc, Mutex, RwLock}, - time::Duration, }; use collections::{HashMap, HashSet}; @@ -12,6 +11,7 @@ use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; use engine_traits::{RaftEngine, RaftEngineReadOnly, TabletRegistry}; +use futures::Future; use kvproto::{ kvrpcpb::ApiVersion, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, @@ -342,14 +342,12 @@ impl Simulator for NodeCluster { Ok(node_id) } - fn snapshot( + fn async_snapshot( &mut self, request: RaftCmdRequest, - timeout: Duration, - ) -> std::result::Result< - RegionSnapshot<::Snapshot>, - RaftCmdResponse, - > { + ) -> impl Future< + Output = std::result::Result, RaftCmdResponse>, + > + Send { let node_id = request.get_header().get_peer().get_store_id(); if !self .trans @@ -362,7 +360,7 @@ impl Simulator for NodeCluster { let mut resp = RaftCmdResponse::default(); let e: RaftError = box_err!("missing sender for store {}", node_id); resp.mut_header().set_error(e.into()); - return Err(resp); + // return async move {Err(resp)}; } let mut router = { @@ -370,7 +368,7 @@ impl Simulator for NodeCluster { guard.routers.get_mut(&node_id).unwrap().clone() }; - router.snapshot(request, timeout) + router.snapshot(request) } fn async_peer_msg_on_node(&self, node_id: u64, region_id: u64, msg: PeerMsg) -> Result<()> { @@ -433,6 +431,10 @@ impl Simulator for NodeCluster { let mut trans = self.trans.core.lock().unwrap(); trans.routers.get_mut(&node_id).unwrap().clear_filters(); } + + fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()> { + self.trans.send(msg) + } } pub fn new_node_cluster(id: u64, count: usize) -> Cluster { diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 1c6d956d1a8..ec8e3fe2635 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -15,7 +15,7 @@ use encryption_export::DataKeyManager; use engine_rocks::{RocksEngine, RocksSnapshot}; use engine_test::raft::RaftTestEngine; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use futures::executor::block_on; +use futures::{executor::block_on, Future}; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; use kvproto::{ @@ -751,24 +751,25 @@ impl Simulator for ServerCluster { self.storages.remove(&node_id); } - fn snapshot( + fn async_snapshot( &mut self, request: kvproto::raft_cmdpb::RaftCmdRequest, - timeout: Duration, - ) -> std::result::Result::Snapshot>, RaftCmdResponse> - { + ) -> impl Future< + Output = std::result::Result, RaftCmdResponse>, + > + Send { let node_id = request.get_header().get_peer().get_store_id(); let mut router = match self.metas.get(&node_id) { None => { let mut resp = RaftCmdResponse::default(); let e: RaftError = box_err!("missing sender for store {}", node_id); resp.mut_header().set_error(e.into()); - return Err(resp); + // return async move {Err(resp)}; + unreachable!() } Some(meta) => meta.sim_router.clone(), }; - router.snapshot(request, timeout) + router.snapshot(request) } fn async_peer_msg_on_node( @@ -796,6 +797,10 @@ impl Simulator for ServerCluster { .unwrap() .to_owned() } + + fn send_raft_msg(&mut self, _msg: RaftMessage) -> raftstore::Result<()> { + unimplemented!() + } } impl Cluster { diff --git a/components/test_raftstore-v2/src/transport_simulate.rs b/components/test_raftstore-v2/src/transport_simulate.rs index 9c11505d75f..7b9333aae83 100644 --- a/components/test_raftstore-v2/src/transport_simulate.rs +++ b/components/test_raftstore-v2/src/transport_simulate.rs @@ -1,12 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - sync::{Arc, RwLock}, - time::{Duration, Instant}, -}; +use std::sync::{Arc, RwLock}; use engine_traits::{KvEngine, RaftEngine}; -use futures::{compat::Future01CompatExt, FutureExt}; +use futures::Future; use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -14,12 +11,12 @@ use kvproto::{ use raft::SnapshotStatus; use raftstore::{ router::handle_send_error, - store::{cmd_resp, RegionSnapshot, Transport}, - Error, Result, Result as RaftStoreResult, + store::{RegionSnapshot, Transport}, + Result, Result as RaftStoreResult, }; use raftstore_v2::router::{PeerMsg, RaftRouter}; use test_raftstore::{filter_send, Filter}; -use tikv_util::{timer::GLOBAL_TIMER_HANDLE, HandyRwLock}; +use tikv_util::HandyRwLock; #[derive(Clone)] pub struct SimulateTransport { @@ -71,25 +68,16 @@ pub trait SnapshotRouter { fn snapshot( &mut self, req: RaftCmdRequest, - timeout: Duration, - ) -> std::result::Result, RaftCmdResponse>; + ) -> impl Future, RaftCmdResponse>> + Send; } impl SnapshotRouter for RaftRouter { fn snapshot( &mut self, req: RaftCmdRequest, - timeout: Duration, - ) -> std::result::Result, RaftCmdResponse> { - let timeout_f = GLOBAL_TIMER_HANDLE.delay(Instant::now() + timeout).compat(); - futures::executor::block_on(async move { - futures::select! { - res = self.snapshot(req).fuse() => res, - e = timeout_f.fuse() => { - Err(cmd_resp::new_error(Error::Timeout(format!("request timeout for {:?}: {:?}", timeout,e)))) - }, - } - }) + ) -> impl Future, RaftCmdResponse>> + Send + { + self.snapshot(req) } } @@ -97,9 +85,9 @@ impl> SnapshotRouter for SimulateTransport< fn snapshot( &mut self, req: RaftCmdRequest, - timeout: Duration, - ) -> std::result::Result, RaftCmdResponse> { - self.ch.snapshot(req, timeout) + ) -> impl Future, RaftCmdResponse>> + Send + { + self.ch.snapshot(req) } } diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index d9a0377210b..e2cc88c569c 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -7,6 +7,7 @@ use engine_rocks::{RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; +use futures::Future; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; use raftstore::Result; use rand::RngCore; @@ -21,7 +22,7 @@ use tikv::{ Engine, Snapshot, }, }; -use tikv_util::{config::ReadableDuration, worker::LazyWorker}; +use tikv_util::{config::ReadableDuration, worker::LazyWorker, HandyRwLock}; use crate::{bootstrap_store, cluster::Cluster, ServerCluster, Simulator}; @@ -209,3 +210,22 @@ pub fn read_on_peer( request.mut_header().set_peer(peer); cluster.read(None, request, timeout) } + +pub fn async_read_on_peer( + cluster: &mut Cluster, + peer: metapb::Peer, + region: metapb::Region, + key: &[u8], + read_quorum: bool, + replica_read: bool, +) -> impl Future> { + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(key)], + read_quorum, + ); + request.mut_header().set_peer(peer); + request.mut_header().set_replica_read(replica_read); + cluster.sim.wl().async_read(request) +} diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 16fad00a59b..0359bacf436 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -12,13 +12,13 @@ use std::{ time::Duration, }; -use futures::executor::block_on; +use futures::{compat::Future01CompatExt, executor::block_on, FutureExt}; use kvproto::raft_serverpb::RaftMessage; use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::{store::ReadIndexContext, Result}; use test_raftstore::*; -use tikv_util::{config::*, time::Instant, HandyRwLock}; +use tikv_util::{config::*, time::Instant, timer::GLOBAL_TIMER_HANDLE, HandyRwLock}; use txn_types::{Key, Lock, LockType}; use uuid::Uuid; @@ -583,3 +583,79 @@ fn test_malformed_read_index() { let resp = resp.recv_timeout(Duration::from_secs(10)).unwrap(); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); } + +/// The case checks if a malformed request should not corrupt the leader's read +/// queue. +#[test] +fn test_malformed_read_index_v2() { + use test_raftstore_v2::*; + + let mut cluster = new_node_cluster(0, 3); + configure_for_lease_read(&mut cluster.cfg, Some(50), None); + cluster.cfg.raft_store.raft_log_gc_threshold = 12; + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); + cluster.cfg.raft_store.hibernate_regions = true; + cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_put(b"k1", b"v1"); + for i in 1..=3 { + must_get_equal(&cluster.get_engine(i), b"k1", b"v1"); + } + + // Wait till lease expires. + std::thread::sleep( + cluster + .cfg + .raft_store + .raft_store_max_leader_lease() + .to_std() + .unwrap(), + ); + let region = cluster.get_region(b"k1"); + // Send a malformed request to leader + let mut raft_msg = raft::eraftpb::Message::default(); + raft_msg.set_msg_type(MessageType::MsgReadIndex); + let rctx = ReadIndexContext { + id: Uuid::new_v4(), + request: None, + locked: None, + }; + let mut e = raft::eraftpb::Entry::default(); + e.set_data(rctx.to_bytes().into()); + raft_msg.mut_entries().push(e); + raft_msg.from = 1; + raft_msg.to = 1; + let mut message = RaftMessage::default(); + message.set_region_id(region_id); + message.set_from_peer(new_peer(1, 1)); + message.set_to_peer(new_peer(1, 1)); + message.set_region_epoch(region.get_region_epoch().clone()); + message.set_message(raft_msg); + // So the read won't be handled soon. + cluster.add_send_filter(IsolationFilterFactory::new(1)); + cluster.send_raft_msg(message).unwrap(); + // Also send a correct request. If the malformed request doesn't corrupt + // the read queue, the correct request should be responded. + let resp = async_read_on_peer(&mut cluster, new_peer(1, 1), region, b"k1", true, false); + cluster.clear_send_filters(); + + let timeout = Duration::from_secs(10); + let timeout_f = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + timeout) + .compat(); + let resp = futures::executor::block_on(async move { + futures::select! { + res = resp.fuse() => res.unwrap(), + e = timeout_f.fuse() => { + panic!("request timeout for {:?}: {:?}", timeout,e); + }, + } + }); + assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); +} From eb4ad726a34ab522f0dec6e94b20e67725a2fdc3 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 17 Mar 2023 10:24:40 +0800 Subject: [PATCH 0588/1149] raft-log-engine: supply `prefill-for-recycle` configuration to enable starting engine in cold state. (#14372) close tikv/tikv#14371 Adds a new configuration `raft-engine.prefill-for-recycle` for supporting to enable log recycling when starting TiKV in cold state. --- Cargo.lock | 4 ++-- etc/config-template.toml | 8 ++++++++ metrics/grafana/tikv_details.json | 10 +++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 94c562c5c6d..4123bc6377c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4204,7 +4204,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#33530112c3a4acaf8c50ca9d0470284109926296" +source = "git+https://github.com/tikv/raft-engine.git#404e3fefaeeb4da6b7650268d500cfd3fbd29cae" dependencies = [ "byteorder", "crc32fast", @@ -4238,7 +4238,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#33530112c3a4acaf8c50ca9d0470284109926296" +source = "git+https://github.com/tikv/raft-engine.git#404e3fefaeeb4da6b7650268d500cfd3fbd29cae" dependencies = [ "clap 3.1.6", "env_logger", diff --git a/etc/config-template.toml b/etc/config-template.toml index 3930a247374..80d9bc8a4d6 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -1104,6 +1104,14 @@ ## Default: false. # enable-log-recycle = false +## Whether to prepare log files for recycling when start. +## If `true`, batch empty log files will be prepared for recycling when +## starting engine. +## Only available for `enable-log-reycle` is true. +## +## Default: false +# prefill-for-recycle = false + [security] ## The path for TLS certificates. Empty string means disabling secure connections. # ca-path = "" diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index d4374fda369..f404ebc5376 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -23695,7 +23695,8 @@ "metric": "tikv_snapshot_limit_transport_bytes", "refId": "A", "step": 40 - },{ + }, + { "exemplar": true, "expr": "rate(tikv_snapshot_limit_generate_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", "hide": true, @@ -31976,6 +31977,13 @@ "intervalFactor": 1, "legendFormat": "swap", "refId": "B" + }, + { + "exemplar": true, + "expr": "avg(raft_engine_recycled_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "intervalFactor": 1, + "legendFormat": "recycle", + "refId": "C" } ], "thresholds": [], From 315d402780bf1ba8155aafaf57e1608809cedf1b Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 17 Mar 2023 13:30:39 +0800 Subject: [PATCH 0589/1149] Revert changes that may cause performance regression introduced by write prioirty scheduling (#14412) close tikv/tikv#14375 Revert the parsed entry that may lead to performance regression and disable priority pool for sched worker Signed-off-by: Connor1996 --- .../operation/command/admin/merge/prepare.rs | 3 +- .../raftstore/src/store/entry_storage.rs | 32 +-- components/raftstore/src/store/fsm/apply.rs | 141 ++++++++--- components/raftstore/src/store/peer.rs | 3 +- components/raftstore/src/store/util.rs | 235 +----------------- src/storage/txn/sched_pool.rs | 27 +- 6 files changed, 148 insertions(+), 293 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 378e3d2e7c8..f9df2d9ea1a 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -245,7 +245,8 @@ impl Peer { if entry.get_data().is_empty() { continue; } - let cmd: RaftCmdRequest = util::parse_data_at(entry.get_data(), entry.get_index()); + let cmd: RaftCmdRequest = + util::parse_data_at(entry.get_data(), entry.get_index(), "tag"); if !cmd.has_admin_request() { continue; } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 4d3f487a499..f5226961a6c 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -30,11 +30,7 @@ use super::{ metrics::*, peer_storage::storage_error, WriteTask, MEMTRACE_ENTRY_CACHE, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use crate::{ - bytes_capacity, - store::{util::ParsedEntry, ReadTask}, - Result, -}; +use crate::{bytes_capacity, store::ReadTask, Result}; const MAX_ASYNC_FETCH_TRY_CNT: usize = 3; const SHRINK_CACHE_CAPACITY: usize = 64; @@ -58,7 +54,7 @@ pub fn last_index(state: &RaftLocalState) -> u64 { pub struct CachedEntries { pub range: Range, // Entries and dangle size for them. `dangle` means not in entry cache. - entries: Arc, usize)>>, + entries: Arc, usize)>>, } impl CachedEntries { @@ -68,24 +64,21 @@ impl CachedEntries { let end = entries.last().map(|x| x.index).unwrap() + 1; let range = Range { start, end }; CachedEntries { - entries: Arc::new(Mutex::new(( - entries.into_iter().map(|e| ParsedEntry::new(e)).collect(), - 0, - ))), + entries: Arc::new(Mutex::new((entries, 0))), range, } } - pub fn iter_entries_mut(&self, mut f: impl FnMut(&mut ParsedEntry)) { - let mut entries = self.entries.lock().unwrap(); - for entry in &mut entries.0 { + pub fn iter_entries(&self, mut f: impl FnMut(&Entry)) { + let entries = self.entries.lock().unwrap(); + for entry in &entries.0 { f(entry); } } /// Take cached entries and dangle size for them. `dangle` means not in /// entry cache. - pub fn take_entries(&self) -> (Vec, usize) { + pub fn take_entries(&self) -> (Vec, usize) { mem::take(&mut *self.entries.lock().unwrap()) } } @@ -332,8 +325,8 @@ impl EntryCache { let dangle_size = { let mut guard = entries.entries.lock().unwrap(); - let last_idx = guard.0.last().map(|e| e.get_index()).unwrap(); - let cache_front = match self.cache.front().map(|e| e.get_index()) { + let last_idx = guard.0.last().map(|e| e.index).unwrap(); + let cache_front = match self.cache.front().map(|e| e.index) { Some(i) => i, None => u64::MAX, }; @@ -341,10 +334,7 @@ impl EntryCache { let dangle_range = if last_idx < cache_front { // All entries are not in entry cache. 0..guard.0.len() - } else if let Ok(i) = guard - .0 - .binary_search_by(|e| e.get_index().cmp(&cache_front)) - { + } else if let Ok(i) = guard.0.binary_search_by(|e| e.index.cmp(&cache_front)) { // Some entries are in entry cache. 0..i } else { @@ -354,7 +344,7 @@ impl EntryCache { let mut size = 0; for e in &guard.0[dangle_range] { - size += e.bytes_capacity(); + size += bytes_capacity(&e.data) + bytes_capacity(&e.context); } guard.1 = size; size diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 181ff207c0b..16a8bacbced 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -9,6 +9,7 @@ use std::{ cmp::{Ord, Ordering as CmpOrdering}, collections::VecDeque, fmt::{self, Debug, Formatter}, + io::BufRead, mem, ops::{Deref, DerefMut, Range as StdRange}, sync::{ @@ -45,7 +46,11 @@ use kvproto::{ }; use pd_client::{BucketMeta, BucketStat}; use prometheus::local::LocalHistogram; -use raft::eraftpb::{ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot}; +use protobuf::{wire_format::WireType, CodedInputStream, Message}; +use raft::eraftpb::{ + ConfChange, ConfChangeType, ConfChangeV2, Entry, EntryType, Snapshot as RaftSnapshot, +}; +use raft_proto::ConfChangeI; use resource_control::{ResourceConsumeType, ResourceController, ResourceMetered}; use smallvec::{smallvec, SmallVec}; use sst_importer::SstImporter; @@ -88,7 +93,6 @@ use crate::{ util::{ self, admin_cmd_epoch_lookup, check_flashback_state, check_req_region_epoch, compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, LatencyInspector, - ParsedEntry, }, Config, RegionSnapshot, RegionTask, WriteCallback, }, @@ -848,6 +852,43 @@ fn should_sync_log(cmd: &RaftCmdRequest) -> bool { false } +fn can_witness_skip(entry: &Entry) -> bool { + // need to handle ConfChange entry type + if entry.get_entry_type() != EntryType::EntryNormal { + return false; + } + + // HACK: check admin request field in serialized data from `RaftCmdRequest` + // without deserializing all. It's done by checking the existence of the + // field number of `admin_request`. + // See the encoding in `write_to_with_cached_sizes()` of `RaftCmdRequest` in + // `raft_cmdpb.rs` for reference. + let mut is = CodedInputStream::from_bytes(entry.get_data()); + if is.eof().unwrap() { + return true; + } + let (mut field_number, wire_type) = is.read_tag_unpack().unwrap(); + // Header field is of number 1 + if field_number == 1 { + if wire_type != WireType::WireTypeLengthDelimited { + panic!("unexpected wire type"); + } + let len = is.read_raw_varint32().unwrap(); + // skip parsing the content of `Header` + is.consume(len as usize); + // read next field number + (field_number, _) = is.read_tag_unpack().unwrap(); + } + + // `Requests` field is of number 2 and `AdminRequest` field is of number 3. + // - If the next field is 2, there must be no admin request as in one + // `RaftCmdRequest`, either requests or admin_request is filled. + // - If the next field is 3, it's exactly an admin request. + // - If the next field is others, neither requests nor admin_request is filled, + // so there is no admin request. + field_number != 3 +} + /// A struct that stores the state related to Merge. /// /// When executing a `CommitMerge`, the source peer may have not applied @@ -870,7 +911,7 @@ where { /// All of the entries that need to continue to be applied after /// the source peer has applied its logs. - pending_entries: Vec, + pending_entries: Vec, /// All of messages that need to continue to be handled after /// the source peer has applied its logs and pending entries /// are all handled. @@ -1050,7 +1091,7 @@ where fn handle_raft_committed_entries( &mut self, apply_ctx: &mut ApplyContext, - mut committed_entries_drainer: Drain<'_, ParsedEntry>, + mut committed_entries_drainer: Drain<'_, Entry>, ) { if committed_entries_drainer.len() == 0 { return; @@ -1061,7 +1102,7 @@ where // must re-propose these commands again. apply_ctx.committed_count += committed_entries_drainer.len(); let mut results = VecDeque::new(); - while let Some(mut entry) = committed_entries_drainer.next() { + while let Some(entry) = committed_entries_drainer.next() { if self.pending_remove { // This peer is about to be destroyed, skip everything. break; @@ -1083,9 +1124,9 @@ where // running on data written by new version tikv), but PD will reject old version // tikv join the cluster, so this should not happen. let res = match entry.get_entry_type() { - EntryType::EntryNormal => self.handle_raft_entry_normal(apply_ctx, &mut entry), + EntryType::EntryNormal => self.handle_raft_entry_normal(apply_ctx, &entry), EntryType::EntryConfChange | EntryType::EntryConfChangeV2 => { - self.handle_raft_entry_conf_change(apply_ctx, &mut entry) + self.handle_raft_entry_conf_change(apply_ctx, &entry) } }; @@ -1155,7 +1196,7 @@ where fn handle_raft_entry_normal( &mut self, apply_ctx: &mut ApplyContext, - entry: &mut ParsedEntry, + entry: &Entry, ) -> ApplyResult { fail_point!( "yield_apply_first_region", @@ -1165,10 +1206,11 @@ where let index = entry.get_index(); let term = entry.get_term(); + let data = entry.get_data(); - if !entry.is_empty() { - if !self.peer.is_witness || !entry.can_witness_skip() { - let cmd = entry.take_cmd(); + if !data.is_empty() { + if !self.peer.is_witness || !can_witness_skip(entry) { + let cmd = util::parse_data_at(data, index, &self.tag); if apply_ctx.yield_high_latency_operation && has_high_latency_operation(&cmd) { self.priority = Priority::Low; } @@ -1227,7 +1269,7 @@ where fn handle_raft_entry_conf_change( &mut self, apply_ctx: &mut ApplyContext, - entry: &mut ParsedEntry, + entry: &Entry, ) -> ApplyResult { // Although conf change can't yield in normal case, it is convenient to // simulate yield before applying a conf change log. @@ -1235,7 +1277,16 @@ where ApplyResult::Yield }); let (index, term) = (entry.get_index(), entry.get_term()); - let (conf_change, cmd) = entry.take_conf_change(); + let conf_change: ConfChangeV2 = match entry.get_entry_type() { + EntryType::EntryConfChange => { + let conf_change: ConfChange = + util::parse_data_at(entry.get_data(), index, &self.tag); + conf_change.into_v2() + } + EntryType::EntryConfChangeV2 => util::parse_data_at(entry.get_data(), index, &self.tag), + _ => unreachable!(), + }; + let cmd = util::parse_data_at(conf_change.get_context(), index, &self.tag); match self.process_raft_cmd(apply_ctx, index, term, cmd) { ApplyResult::None => { // If failed, tell Raft that the `ConfChange` was aborted. @@ -3681,18 +3732,16 @@ impl ResourceMetered for Msg { let mut dominant_group = "".to_owned(); let mut max_write_bytes = 0; for cached_entries in &apply.entries { - cached_entries.iter_entries_mut(|entry| { - if entry.is_empty() { - return; - } + cached_entries.iter_entries(|entry| { + let header = util::get_entry_header(entry); + let group_name = header.get_resource_group_name().to_owned(); let write_bytes = entry.compute_size() as u64; - let group_name = entry.get_cmd().get_header().get_resource_group_name(); resource_ctl.consume( group_name.as_bytes(), ResourceConsumeType::IoBytes(write_bytes), ); if write_bytes > max_write_bytes { - dominant_group = group_name.to_owned(); + dominant_group = group_name; max_write_bytes = write_bytes; } }); @@ -3883,21 +3932,19 @@ where let mut dangle_size = 0; for cached_entries in apply.entries { - let (ents, sz) = cached_entries.take_entries(); + let (e, sz) = cached_entries.take_entries(); dangle_size += sz; - if ents.is_empty() { + if e.is_empty() { let rid = self.delegate.region_id(); let StdRange { start, end } = cached_entries.range; - let mut tmp_ents = Vec::new(); self.delegate .raft_engine - .fetch_entries_to(rid, start, end, None, &mut tmp_ents) + .fetch_entries_to(rid, start, end, None, &mut entries) .unwrap(); - entries.extend(tmp_ents.into_iter().map(|e| ParsedEntry::new(e))); } else if entries.is_empty() { - entries = ents; + entries = e; } else { - entries.extend(ents); + entries.extend(e); } } if dangle_size > 0 { @@ -4869,9 +4916,9 @@ mod memtrace { EK: KvEngine, { fn heap_size(&self) -> usize { - let mut size = self.pending_entries.capacity() * mem::size_of::(); + let mut size = self.pending_entries.capacity() * mem::size_of::(); for e in &self.pending_entries { - size += e.bytes_capacity(); + size += bytes_capacity(&e.data) + bytes_capacity(&e.context); } size += self.pending_msgs.capacity() * mem::size_of::>(); @@ -4928,6 +4975,7 @@ mod tests { time::*, }; + use bytes::Bytes; use engine_panic::PanicEngine; use engine_test::kv::{new_engine, KvTestEngine, KvTestSnapshot}; use engine_traits::{Peekable as PeekableTrait, SyncMutable, WriteBatchExt}; @@ -4937,6 +4985,7 @@ mod tests { raft_cmdpb::*, }; use protobuf::Message; + use raft::eraftpb::{ConfChange, ConfChangeV2}; use sst_importer::Config as ImportConfig; use tempfile::{Builder, TempDir}; use test_sst_importer::*; @@ -5043,6 +5092,42 @@ mod tests { } } + #[test] + fn test_can_witness_skip() { + let mut entry = Entry::new(); + let mut req = RaftCmdRequest::default(); + entry.set_entry_type(EntryType::EntryNormal); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(can_witness_skip(&entry)); + + req.mut_admin_request() + .set_cmd_type(AdminCmdType::CompactLog); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!can_witness_skip(&entry)); + + let mut req = RaftCmdRequest::default(); + let mut request = Request::default(); + request.set_cmd_type(CmdType::Put); + req.set_requests(vec![request].into()); + let data = req.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(can_witness_skip(&entry)); + + entry.set_entry_type(EntryType::EntryConfChange); + let conf_change = ConfChange::new(); + let data = conf_change.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!can_witness_skip(&entry)); + + entry.set_entry_type(EntryType::EntryConfChangeV2); + let conf_change_v2 = ConfChangeV2::new(); + let data = conf_change_v2.write_to_bytes().unwrap(); + entry.set_data(Bytes::copy_from_slice(&data)); + assert!(!can_witness_skip(&entry)); + } + #[test] fn test_should_sync_log() { // Admin command diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index a0b28e44f07..8dc69a0def4 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4211,7 +4211,8 @@ where if entry.get_data().is_empty() { continue; } - let cmd: RaftCmdRequest = util::parse_data_at(entry.get_data(), entry.get_index()); + let cmd: RaftCmdRequest = + util::parse_data_at(entry.get_data(), entry.get_index(), &self.tag); if !cmd.has_admin_request() { continue; } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 7408b540285..d48c5e78e7c 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -6,7 +6,6 @@ use std::{ collections::{HashMap, VecDeque}, fmt, fmt::Display, - io::BufRead, option::Option, sync::{ atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering}, @@ -25,15 +24,12 @@ use kvproto::{ }, raft_serverpb::{RaftMessage, RaftSnapshotData}, }; -use protobuf::{self, wire_format::WireType, CodedInputStream, Message}; +use protobuf::{self, CodedInputStream, Message}; use raft::{ eraftpb::{self, ConfChangeType, ConfState, Entry, EntryType, MessageType, Snapshot}, Changer, RawNode, INVALID_INDEX, }; -use raft_proto::{ - eraftpb::{ConfChange, ConfChangeV2}, - ConfChangeI, -}; +use raft_proto::ConfChangeI; use tikv_util::{ box_err, codec::number::{decode_u64, NumberEncoder}, @@ -47,9 +43,7 @@ use tokio::sync::Notify; use txn_types::WriteBatchFlags; use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; -use crate::{ - bytes_capacity, coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result, -}; +use crate::{coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result}; const INVALID_TIMESTAMP: u64 = u64::MAX; @@ -751,139 +745,6 @@ pub(crate) fn u64_to_timespec(u: u64) -> Timespec { Timespec::new(sec as i64, nsec as i32) } -// ParsedEntry wraps raft-proto `Entry` and used to avoid parsing raft command -// from entry's data repeatedly. The parsed command may be used in multiple -// places, so cache it at the first place. -pub struct ParsedEntry { - entry: Entry, - cmd: Option, - conf_change: Option, - parsed: bool, -} - -impl ParsedEntry { - pub fn new(entry: Entry) -> ParsedEntry { - ParsedEntry { - entry, - cmd: None, - conf_change: None, - parsed: false, - } - } - - pub fn get_entry_type(&self) -> EntryType { - self.entry.get_entry_type() - } - - pub fn get_index(&self) -> u64 { - self.entry.get_index() - } - - pub fn get_term(&self) -> u64 { - self.entry.get_term() - } - - pub fn compute_size(&self) -> u32 { - self.entry.compute_size() - } - - pub fn is_empty(&self) -> bool { - self.entry.get_data().is_empty() - } - - pub fn bytes_capacity(&self) -> usize { - bytes_capacity(&self.entry.data) + bytes_capacity(&self.entry.context) - } - - fn parse(&mut self) { - assert!(!self.is_empty()); - - let data = self.entry.get_data(); - let index = self.entry.get_index(); - // lazy parse the cmd from entry context - let conf_change = match self.entry.get_entry_type() { - EntryType::EntryConfChange => { - let conf_change: ConfChange = parse_data_at(data, index); - Some(conf_change.into_v2()) - } - EntryType::EntryConfChangeV2 => Some(parse_data_at(data, index)), - EntryType::EntryNormal => { - self.cmd = Some(parse_data_at(data, index)); - None - } - }; - if let Some(conf_change) = conf_change { - self.cmd = Some(parse_data_at(conf_change.get_context(), index)); - self.conf_change = Some(conf_change); - } - self.parsed = true; - } - - pub fn get_cmd(&mut self) -> &RaftCmdRequest { - if !self.parsed { - self.parse(); - } - self.cmd.as_ref().unwrap() - } - - pub fn take_cmd(&mut self) -> RaftCmdRequest { - if !self.parsed { - self.parse(); - } - self.parsed = false; - self.cmd.take().unwrap() - } - - pub fn take_conf_change(&mut self) -> (ConfChangeV2, RaftCmdRequest) { - if !self.parsed { - self.parse(); - } - self.parsed = false; - (self.conf_change.take().unwrap(), self.cmd.take().unwrap()) - } - - pub fn can_witness_skip(&self) -> bool { - !has_admin_request(&self.entry) - } -} - -fn has_admin_request(entry: &Entry) -> bool { - // need to handle ConfChange entry type - if entry.get_entry_type() != EntryType::EntryNormal { - return true; - } - - // HACK: check admin request field in serialized data from `RaftCmdRequest` - // without deserializing all. It's done by checking the existence of the - // field number of `admin_request`. - // See the encoding in `write_to_with_cached_sizes()` of `RaftCmdRequest` in - // `raft_cmdpb.rs` for reference. - let mut is = CodedInputStream::from_bytes(entry.get_data()); - if is.eof().unwrap() { - return false; - } - let (mut field_number, wire_type) = is.read_tag_unpack().unwrap(); - // Header field is of number 1 - if field_number == 1 { - if wire_type != WireType::WireTypeLengthDelimited { - panic!("unexpected wire type"); - } - let len = is.read_raw_varint32().unwrap(); - // skip parsing the content of `Header` - is.consume(len as usize); - // read next field number - (field_number, _) = is.read_tag_unpack().unwrap(); - } - - // `Requests` field is of number 2 and `AdminRequest` field is of number 3. - // - If the next field is 2, there must be no admin request as in one - // `RaftCmdRequest`, either requests or admin_request is filled. - // - If the next field is 3, it's exactly an admin request. - // - If the next field is others, neither requests nor admin_request is filled, - // so there is no admin request. - field_number == 3 -} - pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { if entry.get_entry_type() != EntryType::EntryNormal { return RaftRequestHeader::default(); @@ -909,10 +770,10 @@ pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { /// If `data` is corrupted, this function will panic. // TODO: make sure received entries are not corrupted #[inline] -pub fn parse_data_at(data: &[u8], index: u64) -> T { +pub fn parse_data_at(data: &[u8], index: u64, tag: &str) -> T { let mut result = T::default(); result.merge_from_bytes(data).unwrap_or_else(|e| { - panic!("{} data is corrupted : {:?}", index, e); + panic!("{} data is corrupted at {}: {:?}", tag, index, e); }); result } @@ -1856,11 +1717,10 @@ pub fn validate_split_region( mod tests { use std::thread; - use bytes::Bytes; use engine_test::kv::KvTestEngine; use kvproto::{ metapb::{self, RegionEpoch}, - raft_cmdpb::{AdminRequest, CmdType, Request}, + raft_cmdpb::AdminRequest, }; use protobuf::Message as _; use raft::eraftpb::{ConfChangeType, Entry, Message, MessageType}; @@ -1941,53 +1801,6 @@ mod tests { assert_eq!(m1.inspect(Some(monotonic_raw_now())), LeaseState::Valid); } - #[test] - fn test_parsed_entry() { - let mut req = RaftCmdRequest::default(); - let mut header = RaftRequestHeader::default(); - header.set_resource_group_name("test".to_owned()); - req.set_header(header); - - let mut entry = Entry::new(); - entry.set_term(1); - entry.set_index(2); - entry.set_entry_type(raft::eraftpb::EntryType::EntryNormal); - entry.set_data(req.write_to_bytes().unwrap().into()); - - let mut parsed = ParsedEntry::new(entry); - assert_eq!(parsed.get_term(), 1); - assert_eq!(parsed.get_index(), 2); - assert_eq!( - parsed.get_cmd().get_header().get_resource_group_name(), - "test" - ); - - let mut entry = Entry::new(); - entry.set_term(1); - entry.set_index(2); - entry.set_entry_type(raft::eraftpb::EntryType::EntryConfChangeV2); - let mut cc = ConfChangeV2::new(); - let mut ccs = eraftpb::ConfChangeSingle::default(); - ccs.set_change_type(ConfChangeType::AddNode); - ccs.set_node_id(3); - cc.set_changes(vec![ccs].into()); - cc.set_context(req.write_to_bytes().unwrap().into()); - entry.set_data(cc.write_to_bytes().unwrap().into()); - - let mut parsed = ParsedEntry::new(entry); - let (conf_change, cmd) = parsed.take_conf_change(); - assert_eq!( - conf_change.get_changes()[0].get_change_type(), - ConfChangeType::AddNode - ); - assert_eq!(conf_change.get_changes()[0].get_node_id(), 3); - assert_eq!(cmd.get_header().get_resource_group_name(), "test"); - assert_eq!( - parsed.get_cmd().get_header().get_resource_group_name(), - "test" - ); - } - #[test] fn test_get_entry_header() { let mut req = RaftCmdRequest::default(); @@ -2338,42 +2151,6 @@ mod tests { check_term(&header, 10).unwrap_err(); } - #[test] - fn test_has_admin_request() { - let mut entry = Entry::new(); - let mut req = RaftCmdRequest::default(); - entry.set_entry_type(EntryType::EntryNormal); - let data = req.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(!has_admin_request(&entry)); - - req.mut_admin_request() - .set_cmd_type(AdminCmdType::CompactLog); - let data = req.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(has_admin_request(&entry)); - - let mut req = RaftCmdRequest::default(); - let mut request = Request::default(); - request.set_cmd_type(CmdType::Put); - req.set_requests(vec![request].into()); - let data = req.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(!has_admin_request(&entry)); - - entry.set_entry_type(EntryType::EntryConfChange); - let conf_change = ConfChange::new(); - let data = conf_change.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(has_admin_request(&entry)); - - entry.set_entry_type(EntryType::EntryConfChangeV2); - let conf_change_v2 = ConfChangeV2::new(); - let data = conf_change_v2.write_to_bytes().unwrap(); - entry.set_data(Bytes::copy_from_slice(&data)); - assert!(has_admin_request(&entry)); - } - #[test] fn test_check_req_region_epoch() { let mut epoch = RegionEpoch::default(); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 0cff9d51d41..49539d51d8c 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -74,7 +74,7 @@ impl SchedPool { pool_size: usize, reporter: R, feature_gate: FeatureGate, - resource_ctl: Option>, + _resource_ctl: Option>, ) -> Self { let builder = |pool_size: usize, name_prefix: &str| { let engine = Arc::new(Mutex::new(engine.clone())); @@ -102,19 +102,20 @@ impl SchedPool { tls_flush(&reporter); }) }; - if let Some(ref r) = resource_ctl { - SchedPool::Priority { - worker_pool: builder(pool_size, "sched-worker-pool") - .build_priority_future_pool(r.clone()), - resource_ctl: r.clone(), - } - } else { - SchedPool::Vanilla { - worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), - high_worker_pool: builder(std::cmp::max(1, pool_size / 2), "sched-high-pri-pool") - .build_future_pool(), - } + // FIXME: for performance issue, disable priority pool temporarily + // if let Some(ref r) = resource_ctl { + // SchedPool::Priority { + // worker_pool: builder(pool_size, "sched-worker-pool") + // .build_priority_future_pool(r.clone()), + // resource_ctl: r.clone(), + // } + // } else { + SchedPool::Vanilla { + worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), + high_worker_pool: builder(std::cmp::max(1, pool_size / 2), "sched-high-pri-pool") + .build_future_pool(), } + // } } pub fn spawn( From b9bc478913da5bda779d9e59e10e649fe86f89d4 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 17 Mar 2023 13:46:39 +0800 Subject: [PATCH 0590/1149] engine: enable log recycling by default (#14380) close tikv/tikv#14379 Enable log recycling in `RaftLogEngine` by default Signed-off-by: Lucasliang Co-authored-by: Ti Chi Robot --- etc/config-template.toml | 4 ++-- src/config/mod.rs | 8 +++++++- tests/integrations/config/test-custom.toml | 1 + tests/integrations/config/test-default.toml | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 80d9bc8a4d6..aec5e108949 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -1101,8 +1101,8 @@ ## Only available for `format-version` >= 2. This option is only ## available when TiKV >= 6.3.x. ## -## Default: false. -# enable-log-recycle = false +## Default: true. +# enable-log-recycle = true ## Whether to prepare log files for recycling when start. ## If `true`, batch empty log files will be prepared for recycling when diff --git a/src/config/mod.rs b/src/config/mod.rs index 689e0330a2b..c1c38e39d77 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1800,7 +1800,13 @@ impl Default for RaftEngineConfig { fn default() -> Self { Self { enable: true, - config: RawRaftEngineConfig::default(), + config: RawRaftEngineConfig { + // TODO: after update the dependency to `raft-engine` lib, revokes the + // following unelegant settings. + // Enable log recycling by default. + enable_log_recycle: true, + ..RawRaftEngineConfig::default() + }, } } } diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 7f5dbfa1db7..416505a7318 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -602,6 +602,7 @@ recovery-mode = "tolerate-tail-corruption" recovery-read-block-size = "1KB" recovery-threads = 2 memory-limit = "1GB" +enable-log-recycle = true # enable by default [security] ca-path = "invalid path" diff --git a/tests/integrations/config/test-default.toml b/tests/integrations/config/test-default.toml index 23e53b9daf3..ef3c83c00df 100644 --- a/tests/integrations/config/test-default.toml +++ b/tests/integrations/config/test-default.toml @@ -39,6 +39,7 @@ [raftdb.defaultcf] [raft-engine] +enable-log-recycle = true # enable by default [security] From fd2db9a796b16d7665927340d9ebdc83022ea0e2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 17 Mar 2023 14:10:39 +0800 Subject: [PATCH 0591/1149] raftstore-v2: split init may be out of dated when conf change ocurred (#14407) close tikv/tikv#14389 split init may be out of dated Signed-off-by: Spade A Co-authored-by: Xinye Tao --- .../src/operation/command/admin/split.rs | 20 +++++++- tests/failpoints/cases/test_split_region.rs | 51 +++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index e6cd7511801..82bae03f062 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -391,6 +391,11 @@ impl Apply { req: &AdminRequest, log_index: u64, ) -> Result<(AdminResponse, AdminCmdResult)> { + fail_point!( + "on_apply_batch_split", + self.peer().get_store_id() == 3, + |_| { unreachable!() } + ); PEER_ADMIN_CMD_COUNTER.batch_split.all.inc(); let region = self.region(); @@ -692,7 +697,20 @@ impl Peer { mut split_init: Box, ) { let region_id = split_init.region.id; - if self.storage().is_initialized() && self.persisted_index() >= RAFT_INIT_LOG_INDEX { + let peer_id = split_init + .region + .get_peers() + .iter() + .find(|p| p.get_store_id() == self.peer().get_store_id()) + .unwrap() + .get_id(); + + // If peer_id in `split_init` is less than the current peer_id, the conf change + // for the peer should have occurred and we should just report finish to + // the source region of this out of dated peer initialization. + if self.storage().is_initialized() && self.persisted_index() >= RAFT_INIT_LOG_INDEX + || peer_id < self.peer().get_id() + { // Race with split operation. The tablet created by split will eventually be // deleted. We don't trim it. report_split_init_finish(store_ctx, split_init.derived_region_id, region_id, true); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 94dfd1b5648..792a21217ad 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1136,3 +1136,54 @@ fn test_split_during_cluster_shutdown() { test_split("before_cluster_shutdown1"); test_split("before_cluster_shutdown2"); } + +// Test that split is handled pretty slow in one node, say node 2. Before node 2 +// handles the split, the peer of the new split region on node 2 has been +// removed and added back sooner. So, when the new split region on node 2 +// receives a heartbeat from it's leader, it creates a peer with higher peer id +// than the peer created due to the split on this node. +#[test] +fn test_split_race_with_conf_change() { + // test case for raftstore-v2 + use test_raftstore_v2::*; + + let mut cluster = new_node_cluster(0, 3); + configure_for_snapshot(&mut cluster.cfg); + cluster.cfg.raft_store.right_derive_when_split = false; + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let split_key1 = b"k05"; + let region = cluster.get_region(split_key1); + cluster.must_transfer_leader(region.get_id(), new_peer(1, 1)); + + fail::cfg("on_apply_batch_split", "pause").unwrap(); + cluster.must_split(®ion, split_key1); + + let region = pd_client.get_region(b"k10").unwrap(); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), 3) + .msg_type(MessageType::MsgSnapshot) + .msg_type(MessageType::MsgAppend) + .direction(Direction::Recv), + )); + + let mut peer3 = region + .get_peers() + .iter() + .find(|p| p.get_store_id() == 3) + .unwrap() + .clone(); + pd_client.must_remove_peer(region.get_id(), peer3.clone()); + peer3.set_id(2000); + pd_client.must_add_peer(region.get_id(), peer3.clone()); + + fail::remove("on_apply_batch_split"); + std::thread::sleep(Duration::from_millis(200)); + cluster.clear_send_filters(); + + cluster.stop_node(2); + cluster.must_put(b"k06", b"val"); + assert_eq!(cluster.must_get(b"k06").unwrap(), b"val".to_vec()); +} From 138e1cd3c819ef8e9388ab7dc06e7d43fd9a5896 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 17 Mar 2023 17:04:40 +0800 Subject: [PATCH 0592/1149] raftstore-v2: remove flashback context (#14404) ref tikv/tikv#12842, ref tikv/tikv#14405 Flashback is not fully implemented for raftkv2, setting fields may lead to request failure in normal cases due to https://github.com/tikv/tikv/issues/14405. Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- src/server/raftkv2/mod.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 60e0a53a20a..5434da9ce91 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -169,9 +169,10 @@ impl tikv_kv::Engine for RaftKv2 { if ctx.pb_ctx.get_stale_read() && need_encoded_start_ts { flags |= WriteBatchFlags::STALE_READ.bits(); } - if ctx.allowed_in_flashback { - flags |= WriteBatchFlags::FLASHBACK.bits(); - } + // TODO: flashback is not supported yet. + // if ctx.allowed_in_flashback { + // flags |= WriteBatchFlags::FLASHBACK.bits(); + // } header.set_flags(flags); // Encode `start_ts` in `flag_data` for the check of stale read and flashback. if need_encoded_start_ts { @@ -234,9 +235,10 @@ impl tikv_kv::Engine for RaftKv2 { if batch.extra.one_pc { flags |= WriteBatchFlags::ONE_PC.bits(); } - if batch.extra.allowed_in_flashback { - flags |= WriteBatchFlags::FLASHBACK.bits(); - } + // TODO: flashback is not supported yet. + // if batch.extra.allowed_in_flashback { + // flags |= WriteBatchFlags::FLASHBACK.bits(); + // } header.set_flags(flags); self.schedule_txn_extra(batch.extra); From 4baf9e72b97b20d8199b25f97376f5204560ce46 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 17 Mar 2023 19:12:40 +0800 Subject: [PATCH 0593/1149] tikv-ctl,raftstore: add a log to output corrupted raft msg (#13669) ref tikv/tikv#13668 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- cmd/tikv-ctl/src/cmd.rs | 2 ++ cmd/tikv-ctl/src/executor.rs | 7 ++++++- cmd/tikv-ctl/src/main.rs | 9 +++++++-- components/raftstore/src/store/util.rs | 8 +++++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 657d296109c..42678386f5a 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -586,6 +586,8 @@ pub enum RaftCmd { help = RAW_KEY_HINT, )] key: Option, + #[structopt(short = "b")] + binary: bool, }, /// print region info Region { diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 7dd00a1d29c..df095e44425 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -233,7 +233,7 @@ pub trait DebugExecutor { ); } - fn dump_raft_log(&self, region: u64, index: u64) { + fn dump_raft_log(&self, region: u64, index: u64, binary: bool) { let idx_key = keys::raft_log_key(region, index); println!("idx_key: {}", escape(&idx_key)); println!("region: {}", region); @@ -248,6 +248,11 @@ pub trait DebugExecutor { return; } + if binary { + println!("data: \n{}", hex::encode_upper(&data)); + return; + } + match entry.get_entry_type() { EntryType::EntryNormal => { let mut msg = RaftCmdRequest::default(); diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index e4c7be98dba..f547a2cee3a 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -258,7 +258,12 @@ fn main() { debug_executor.dump_value(&cf, key); } Cmd::Raft { cmd: subcmd } => match subcmd { - RaftCmd::Log { region, index, key } => { + RaftCmd::Log { + region, + index, + key, + binary, + } => { let (id, index) = if let Some(key) = key.as_deref() { keys::decode_raft_log_key(&unescape(key)).unwrap() } else { @@ -266,7 +271,7 @@ fn main() { let index = index.unwrap(); (id, index) }; - debug_executor.dump_raft_log(id, index); + debug_executor.dump_raft_log(id, index, binary); } RaftCmd::Region { regions, diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d48c5e78e7c..82a04ec6f4b 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -773,7 +773,13 @@ pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { pub fn parse_data_at(data: &[u8], index: u64, tag: &str) -> T { let mut result = T::default(); result.merge_from_bytes(data).unwrap_or_else(|e| { - panic!("{} data is corrupted at {}: {:?}", tag, index, e); + panic!( + "{} data is corrupted at {}: {:?}. hex value: {}", + tag, + index, + e, + log_wrappers::Value::value(data) + ); }); result } From bec40346e641f480b35c6a83bfe6e4fd169ddc01 Mon Sep 17 00:00:00 2001 From: you06 Date: Sun, 19 Mar 2023 16:46:39 +0800 Subject: [PATCH 0594/1149] readpool: fix missing metric `tikv_yatp_task_poll_duration` (#14423) close tikv/tikv#14424 Fix the missing metric `tikv_yatp_task_poll_duration` by upgrading yatp. Signed-off-by: you06 --- Cargo.lock | 37 +++++---------------- Cargo.toml | 4 --- src/read_pool.rs | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4123bc6377c..f3ee64d058b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1184,7 +1184,7 @@ dependencies = [ "cfg-if 1.0.0", "crossbeam-channel", "crossbeam-deque", - "crossbeam-epoch 0.9.8", + "crossbeam-epoch", "crossbeam-queue", "crossbeam-utils 0.8.8", ] @@ -1201,12 +1201,13 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" -source = "git+https://github.com/crossbeam-rs/crossbeam?rev=41ed3d948720f26149b2ebeaf58fe8a193134056#41ed3d948720f26149b2ebeaf58fe8a193134056" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch 0.9.10", - "crossbeam-utils 0.8.11", + "crossbeam-epoch", + "crossbeam-utils 0.8.8", ] [[package]] @@ -1223,19 +1224,6 @@ dependencies = [ "scopeguard", ] -[[package]] -name = "crossbeam-epoch" -version = "0.9.10" -source = "git+https://github.com/crossbeam-rs/crossbeam?rev=41ed3d948720f26149b2ebeaf58fe8a193134056#41ed3d948720f26149b2ebeaf58fe8a193134056" -dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "crossbeam-utils 0.8.11", - "memoffset 0.6.4", - "once_cell", - "scopeguard", -] - [[package]] name = "crossbeam-queue" version = "0.3.5" @@ -1253,7 +1241,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "883a5821d7d079fcf34ac55f27a833ee61678110f6b97637cc74513c0d0b42fc" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch 0.9.8", + "crossbeam-epoch", "crossbeam-utils 0.8.8", "scopeguard", ] @@ -1279,15 +1267,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crossbeam-utils" -version = "0.8.11" -source = "git+https://github.com/crossbeam-rs/crossbeam?rev=41ed3d948720f26149b2ebeaf58fe8a193134056#41ed3d948720f26149b2ebeaf58fe8a193134056" -dependencies = [ - "cfg-if 1.0.0", - "once_cell", -] - [[package]] name = "crypto-mac" version = "0.10.0" @@ -7493,7 +7472,7 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#7ed25299d60a5338bea4ac0ed7470887ab74a010" +source = "git+https://github.com/tikv/yatp.git?branch=master#5523a9a6a4d0d6242bdb02b0a344f7ee1477b39b" dependencies = [ "crossbeam-deque", "crossbeam-skiplist", diff --git a/Cargo.toml b/Cargo.toml index a559fa22474..57a2ab4eced 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -215,10 +215,6 @@ fs2 = { git = "https://github.com/tabokie/fs2-rs", branch = "tikv" } # Remove this when a new version is release. We need to solve rust-lang/cmake-rs#143. cmake = { git = "https://github.com/rust-lang/cmake-rs" } -# TODO: remove this after crossbeam-deque is updated to the next release version. -# This is a workaround for cargo can't resolving the this patch in yatp. -crossbeam-deque = { git = "https://github.com/crossbeam-rs/crossbeam", rev = "41ed3d948720f26149b2ebeaf58fe8a193134056" } - [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to diff --git a/src/read_pool.rs b/src/read_pool.rs index 4852caa181b..16d1a7091b7 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -413,6 +413,7 @@ fn get_unified_read_pool_name() -> String { "unified-read-pool".to_string() } +#[inline] pub fn build_yatp_read_pool( config: &UnifiedReadPoolConfig, reporter: R, @@ -421,6 +422,24 @@ pub fn build_yatp_read_pool( cleanup_method: CleanupMethod, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); + build_yatp_read_pool_with_name( + config, + reporter, + engine, + resource_ctl, + cleanup_method, + unified_read_pool_name, + ) +} + +pub fn build_yatp_read_pool_with_name( + config: &UnifiedReadPoolConfig, + reporter: R, + engine: E, + resource_ctl: Option>, + cleanup_method: CleanupMethod, + unified_read_pool_name: String, +) -> ReadPool { let raftkv = Arc::new(Mutex::new(engine)); let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) @@ -744,6 +763,7 @@ mod tests { use futures::channel::oneshot; use raftstore::store::{ReadStats, WriteStats}; + use resource_control::ResourceGroupManager; use super::*; use crate::storage::TestEngineBuilder; @@ -942,4 +962,69 @@ mod tests { let ewma = inspector.get_ewma_time_slice().as_secs_f64(); assert!((ewma - 0.01307).abs() < MARGIN); } + + #[test] + fn test_yatp_task_poll_duration_metric() { + let count_metric = |name: &str| -> u64 { + let mut sum = 0; + for i in 0..=2 { + let hist = + yatp::metrics::TASK_POLL_DURATION.with_label_values(&[name, &format!("{}", i)]); + sum += hist.get_sample_count(); + } + sum + }; + + for control in [false, true] { + let name = format!("test_yatp_task_poll_duration_metric_{}", control); + let resource_manager = if control { + let resource_manager = ResourceGroupManager::default(); + let resource_ctl = resource_manager.derive_controller(name.clone(), true); + Some(resource_ctl) + } else { + None + }; + let config = UnifiedReadPoolConfig { + min_thread_count: 1, + max_thread_count: 2, + max_tasks_per_worker: 1, + ..Default::default() + }; + + let engine = TestEngineBuilder::new().build().unwrap(); + + let pool = build_yatp_read_pool_with_name( + &config, + DummyReporter, + engine, + resource_manager, + CleanupMethod::InPlace, + name.clone(), + ); + + let gen_task = || { + let (tx, rx) = oneshot::channel::<()>(); + let task = async move { + // sleep the thread 100ms to trigger flushing the metrics. + std::thread::sleep(std::time::Duration::from_millis(100)); + let _ = rx.await; + }; + (task, tx) + }; + + let handle = pool.handle(); + let (task1, tx1) = gen_task(); + let (task2, tx2) = gen_task(); + + handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); + handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); + + tx1.send(()).unwrap(); + tx2.send(()).unwrap(); + + thread::sleep(Duration::from_millis(300)); + assert_eq!(count_metric(&name), 2); + drop(pool); + } + } } From 63d82f3404e3219f355f37d8360d33a0e303ab3c Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 20 Mar 2023 11:52:41 +0800 Subject: [PATCH 0595/1149] resource_control: Introduce resource group priority (#14414) ref tikv/tikv#13730 Introduce resource group priority, tasks are scheduled based on the order of (priority, virtual_time) Signed-off-by: Connor1996 --- Cargo.lock | 2 +- components/batch-system/src/scheduler.rs | 10 +- components/raftstore/src/coprocessor/mod.rs | 2 +- .../raftstore/src/store/async_io/write.rs | 2 +- .../src/store/async_io/write_router.rs | 9 +- .../src/store/async_io/write_tests.rs | 14 +-- components/raftstore/src/store/fsm/store.rs | 2 +- .../src/store/worker/refresh_config.rs | 2 +- components/resource_control/src/channel.rs | 35 ++++--- .../resource_control/src/resource_group.rs | 98 +++++++++++++++---- components/resource_control/src/service.rs | 12 +-- src/server/status_server/mod.rs | 8 +- 12 files changed, 130 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f3ee64d058b..f313d747187 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2712,7 +2712,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#60b33e619c70d8abe151f086a19a82895965f28f" +source = "git+https://github.com/pingcap/kvproto.git#b47a4830141f7c8d2719db0f0184652e692eb672" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/batch-system/src/scheduler.rs b/components/batch-system/src/scheduler.rs index 9eadb125f78..723863249fb 100644 --- a/components/batch-system/src/scheduler.rs +++ b/components/batch-system/src/scheduler.rs @@ -44,7 +44,7 @@ where Priority::Low => &self.low_sender, }; - match sender.send(FsmTypes::Normal(fsm), 0) { + match sender.send(FsmTypes::Normal(fsm), None) { Ok(_) => {} Err(SendError(FsmTypes::Normal(fsm))) => warn!("failed to schedule fsm {:p}", fsm), _ => unreachable!(), @@ -55,8 +55,8 @@ where // TODO: close it explicitly once it's supported. // Magic number, actually any number greater than poll pool size works. for _ in 0..256 { - let _ = self.sender.send(FsmTypes::Empty, 0); - let _ = self.low_sender.send(FsmTypes::Empty, 0); + let _ = self.sender.send(FsmTypes::Empty, None); + let _ = self.low_sender.send(FsmTypes::Empty, None); } } } @@ -88,7 +88,7 @@ where #[inline] fn schedule(&self, fsm: Box) { - match self.sender.send(FsmTypes::Control(fsm), 0) { + match self.sender.send(FsmTypes::Control(fsm), None) { Ok(_) => {} Err(SendError(FsmTypes::Control(fsm))) => warn!("failed to schedule fsm {:p}", fsm), _ => unreachable!(), @@ -99,7 +99,7 @@ where // TODO: close it explicitly once it's supported. // Magic number, actually any number greater than poll pool size works. for _ in 0..256 { - let _ = self.sender.send(FsmTypes::Empty, 0); + let _ = self.sender.send(FsmTypes::Empty, None); } } } diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 98b045dbed8..82b6dce17ee 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -275,7 +275,7 @@ pub struct RoleChange { } impl RoleChange { - #[cfg(feature = "testexport")] + #[cfg(any(test, feature = "testexport"))] pub fn new(state: StateRole) -> Self { RoleChange { state, diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index b58d2601d95..e94f7360c23 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -965,7 +965,7 @@ where assert_eq!(writers.len(), handlers.len()); for (i, handler) in handlers.drain(..).enumerate() { info!("stopping store writer {}", i); - writers[i].send(WriteMsg::Shutdown, 0).unwrap(); + writers[i].send(WriteMsg::Shutdown, None).unwrap(); handler.join().unwrap(); } } diff --git a/components/raftstore/src/store/async_io/write_router.rs b/components/raftstore/src/store/async_io/write_router.rs index d00007a9485..3669fddd613 100644 --- a/components/raftstore/src/store/async_io/write_router.rs +++ b/components/raftstore/src/store/async_io/write_router.rs @@ -75,7 +75,7 @@ where pending_write_msgs: Vec>, /// The scheduling priority of the last msg, only valid when priority /// scheduling is enabled - last_msg_priority: u64, + last_msg_priority: Option, } impl WriteRouter @@ -91,7 +91,7 @@ where next_writer_id: None, last_unpersisted: None, pending_write_msgs: vec![], - last_msg_priority: 0, + last_msg_priority: None, } } @@ -103,6 +103,10 @@ where last_unpersisted: Option, msg: WriteMsg, ) { + if last_unpersisted.is_none() { + // reset when there is no pending write + self.last_msg_priority = None; + } if self.should_send(ctx, last_unpersisted) { self.send(ctx, msg); } else { @@ -238,6 +242,7 @@ where // pass the priority of last msg as low bound to make sure all messages of one // peer are handled sequentially. match sender.try_send(msg, self.last_msg_priority) { + // TODO: handle last msg priority properly Ok(priority) => self.last_msg_priority = priority, Err(TrySendError::Full(msg)) => { let now = Instant::now(); diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index cae5842c8b8..24abf24c4fd 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -499,7 +499,7 @@ fn test_basic_flow() { .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); t.write_sender(0) - .send(WriteMsg::WriteTask(task_1), 0) + .send(WriteMsg::WriteTask(task_1), None) .unwrap(); let mut task_2 = WriteTask::::new(2, 2, 20); @@ -515,7 +515,7 @@ fn test_basic_flow() { .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); t.write_sender(1) - .send(WriteMsg::WriteTask(task_2), 0) + .send(WriteMsg::WriteTask(task_2), None) .unwrap(); let mut task_3 = WriteTask::::new(region_1, 1, 15); @@ -531,7 +531,7 @@ fn test_basic_flow() { .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); t.write_sender(0) - .send(WriteMsg::WriteTask(task_3), 0) + .send(WriteMsg::WriteTask(task_3), None) .unwrap(); must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); @@ -601,7 +601,7 @@ fn test_basic_flow_with_states() { .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); t.write_sender(0) - .send(WriteMsg::WriteTask(task_1), 0) + .send(WriteMsg::WriteTask(task_1), None) .unwrap(); let mut task_2 = WriteTask::::new(2, 2, 20); @@ -620,7 +620,7 @@ fn test_basic_flow_with_states() { .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); t.write_sender(1) - .send(WriteMsg::WriteTask(task_2), 0) + .send(WriteMsg::WriteTask(task_2), None) .unwrap(); let mut task_3 = WriteTask::::new(region_1, 1, 15); @@ -638,7 +638,7 @@ fn test_basic_flow_with_states() { .append(&mut vec![RaftMessage::default(), RaftMessage::default()]); t.write_sender(0) - .send(WriteMsg::WriteTask(task_3), 0) + .send(WriteMsg::WriteTask(task_3), None) .unwrap(); must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); @@ -714,7 +714,7 @@ fn test_resource_group() { let mut t = TestWriters::new(cfg, &engines, Some(resource_manager)); let (tx, rx) = mpsc::sync_channel(0); - t.write_sender(0).send(WriteMsg::Pause(rx), 0).unwrap(); + t.write_sender(0).send(WriteMsg::Pause(rx), None).unwrap(); let mut r = WriteRouter::new("1".to_string()); let mut task_1 = WriteTask::::new(region_1, 1, 10); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 09d6db62764..a546b286a68 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1066,7 +1066,7 @@ impl PollHandler, St send_time: write_begin, inspector: latency_inspect, }, - 0, + None, ) { warn!("send latency inspecting to write workers failed"; "err" => ?err); } diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index 7ba0476d381..6fcbd6a93e7 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -46,7 +46,7 @@ where { pub fn decrease_by(&mut self, size: usize) { for _ in 0..size { - if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty, 0) { + if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty, None) { error!( "failed to decrease thread pool"; "decrease to" => size, diff --git a/components/resource_control/src/channel.rs b/components/resource_control/src/channel.rs index a62b9636f83..ccad4aba4bb 100644 --- a/components/resource_control/src/channel.rs +++ b/components/resource_control/src/channel.rs @@ -85,39 +85,38 @@ impl Sender { // It's used to make sure messages from one peer are sent in order. // The returned value is the priority that the message sent with. It is // calculated by resource controller and compared with `low_bound`. - pub fn send(&self, m: T, low_bound: u64) -> Result> { + pub fn send(&self, m: T, low_bound: Option) -> Result, SendError> { match self { - Sender::Vanilla(sender) => sender.send(m).map(|_| 0), + Sender::Vanilla(sender) => sender.send(m).map(|_| None), Sender::Priority { resource_ctl, sender, last_msg_group, } => { - // TODO: pass different command priority - let priority = std::cmp::max( - resource_ctl - .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal), - low_bound, - ); - sender.send(m, priority).map(|_| priority) + let p = resource_ctl + .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal); + let priority = if let Some(low_bound) = low_bound { + std::cmp::max(p, low_bound) + } else { + p + }; + sender.send(m, priority).map(|_| Some(priority)) } } } - pub fn try_send(&self, m: T, low_bound: u64) -> Result> { + pub fn try_send(&self, m: T, low_bound: Option) -> Result, TrySendError> { match self { - Sender::Vanilla(sender) => sender.try_send(m).map(|_| 0), + Sender::Vanilla(sender) => sender.try_send(m).map(|_| None), Sender::Priority { resource_ctl, sender, last_msg_group, } => { - let priority = std::cmp::max( - resource_ctl - .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal), - low_bound, - ); - sender.try_send(m, priority).map(|_| priority) + let p = resource_ctl + .get_priority(last_msg_group.borrow().as_bytes(), CommandPri::Normal); + let priority = std::cmp::max(p, low_bound.unwrap_or(0)); + sender.try_send(m, priority).map(|_| Some(priority)) } } } @@ -215,7 +214,7 @@ mod tests { n1 += 1; let msg = Msg(1); tx.consume_msg_resource(&msg); - tx.send(msg, 0).unwrap(); + tx.send(msg, None).unwrap(); }); drop(tx); diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index cea045dbf1a..690a3e3812f 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -29,6 +29,12 @@ const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; /// default value of max RU quota. const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; +#[cfg(test)] +const LOW_PRIORITY: u32 = 1; +const MEDIUM_PRIORITY: u32 = 8; +#[cfg(test)] +const HIGH_PRIORITY: u32 = 16; + pub enum ResourceConsumeType { CpuTime(Duration), IoBytes(u64), @@ -71,7 +77,7 @@ impl ResourceGroupManager { let group_name = rg.get_name().to_ascii_lowercase(); self.registry.lock().unwrap().iter().for_each(|controller| { let ru_quota = Self::get_ru_setting(&rg, controller.is_read); - controller.add_resource_group(group_name.clone().into_bytes(), ru_quota); + controller.add_resource_group(group_name.clone().into_bytes(), ru_quota, rg.priority); }); info!("add resource group"; "name"=> &rg.name, "ru" => rg.get_r_u_settings().get_r_u().get_settings().get_fill_rate()); self.resource_groups.insert(group_name, rg); @@ -117,7 +123,7 @@ impl ResourceGroupManager { self.registry.lock().unwrap().push(controller.clone()); for g in &self.resource_groups { let ru_quota = Self::get_ru_setting(g.value(), controller.is_read); - controller.add_resource_group(g.key().clone().into_bytes(), ru_quota); + controller.add_resource_group(g.key().clone().into_bytes(), ru_quota, g.priority); } controller } @@ -161,7 +167,11 @@ impl ResourceController { last_min_vt: AtomicU64::new(0), }; // add the "default" resource group - controller.add_resource_group(DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0); + controller.add_resource_group( + DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), + 0, + MEDIUM_PRIORITY, + ); controller } @@ -176,7 +186,11 @@ impl ResourceController { } } - fn add_resource_group(&self, name: Vec, ru_quota: u64) { + fn add_resource_group(&self, name: Vec, ru_quota: u64, mut group_priority: u32) { + if group_priority == 0 { + // map 0 to medium priority(default priority) + group_priority = MEDIUM_PRIORITY; + } let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); if ru_quota > *max_ru_quota { *max_ru_quota = ru_quota; @@ -192,6 +206,7 @@ impl ResourceController { }; let group = GroupPriorityTracker { ru_quota, + group_priority, weight, virtual_time: AtomicU64::new(self.last_min_vt.load(Ordering::Acquire)), vt_delta_for_get, @@ -218,7 +233,11 @@ impl ResourceController { fn remove_resource_group(&self, name: &[u8]) { // do not remove the default resource group, reset to default setting instead. if DEFAULT_RESOURCE_GROUP_NAME.as_bytes() == name { - self.add_resource_group(DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0); + self.add_resource_group( + DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), + 0, + MEDIUM_PRIORITY, + ); return; } self.resource_consumptions.write().remove(name); @@ -294,9 +313,19 @@ impl TaskPriorityProvider for ResourceController { } } +fn concat_priority_vt(group_priority: u32, vt: u64) -> u64 { + assert!((1..=16).contains(&group_priority)); + + // map group_priority from [1, 16] to [0, 15] to limit it 4 bits and get bitwise + // negation to replace leading 4 bits of vt. So that the priority is ordered in + // the descending order by group_priority first, then by vt in ascending order. + vt | (!((group_priority - 1) as u64) << 60) +} + struct GroupPriorityTracker { // the ru setting of this group. ru_quota: u64, + group_priority: u32, weight: u64, virtual_time: AtomicU64, // the constant delta value for each `get_priority` call, @@ -306,13 +335,14 @@ struct GroupPriorityTracker { impl GroupPriorityTracker { fn get_priority(&self, level: usize) -> u64 { let task_extra_priority = TASK_EXTRA_FACTOR_BY_LEVEL[level] * 1000 * self.weight; - (if self.vt_delta_for_get > 0 { + let vt = (if self.vt_delta_for_get > 0 { self.virtual_time .fetch_add(self.vt_delta_for_get, Ordering::Relaxed) + self.vt_delta_for_get } else { self.virtual_time.load(Ordering::Relaxed) - }) + task_extra_priority + }) + task_extra_priority; + concat_priority_vt(self.group_priority, vt) } #[inline] @@ -342,8 +372,8 @@ pub(crate) mod tests { use super::*; - pub fn new_resource_group_ru(name: String, ru: u64) -> ResourceGroup { - new_resource_group(name, true, ru, ru) + pub fn new_resource_group_ru(name: String, ru: u64, group_priority: u32) -> ResourceGroup { + new_resource_group(name, true, ru, ru, group_priority) } pub fn new_resource_group( @@ -351,6 +381,7 @@ pub(crate) mod tests { is_ru_mode: bool, read_tokens: u64, write_tokens: u64, + group_priority: u32, ) -> ResourceGroup { use kvproto::resource_manager::{GroupRawResourceSettings, GroupRequestUnitSettings}; @@ -362,6 +393,7 @@ pub(crate) mod tests { GroupMode::RawMode }; group.set_mode(mode); + group.set_priority(group_priority); if is_ru_mode { assert!(read_tokens == write_tokens); let mut ru_setting = GroupRequestUnitSettings::new(); @@ -389,7 +421,7 @@ pub(crate) mod tests { fn test_resource_group() { let resource_manager = ResourceGroupManager::default(); - let group1 = new_resource_group_ru("TEST".into(), 100); + let group1 = new_resource_group_ru("TEST".into(), 100, 0); resource_manager.add_resource_group(group1); assert!(resource_manager.get_resource_group("test1").is_none()); @@ -406,7 +438,7 @@ pub(crate) mod tests { drop(group); assert_eq!(resource_manager.resource_groups.len(), 1); - let group1 = new_resource_group_ru("Test".into(), 200); + let group1 = new_resource_group_ru("Test".into(), 200, LOW_PRIORITY); resource_manager.add_resource_group(group1); let group = resource_manager.get_resource_group("test").unwrap(); assert_eq!( @@ -418,10 +450,11 @@ pub(crate) mod tests { .get_fill_rate(), 200 ); + assert_eq!(group.value().get_priority(), 1); drop(group); assert_eq!(resource_manager.resource_groups.len(), 1); - let group2 = new_resource_group_ru("test2".into(), 400); + let group2 = new_resource_group_ru("test2".into(), 400, 0); resource_manager.add_resource_group(group2); assert_eq!(resource_manager.resource_groups.len(), 2); @@ -436,17 +469,26 @@ pub(crate) mod tests { let mut extras1 = Extras::single_level(); extras1.set_metadata("test".as_bytes().to_owned()); - assert_eq!(resource_ctl.priority_of(&extras1), 25_000); + assert_eq!( + resource_ctl.priority_of(&extras1), + concat_priority_vt(LOW_PRIORITY, 25_000) + ); assert_eq!(group1.current_vt(), 25_000); let mut extras2 = Extras::single_level(); extras2.set_metadata("test2".as_bytes().to_owned()); - assert_eq!(resource_ctl.priority_of(&extras2), 12_500); + assert_eq!( + resource_ctl.priority_of(&extras2), + concat_priority_vt(MEDIUM_PRIORITY, 12_500) + ); assert_eq!(group2.current_vt(), 12_500); let mut extras3 = Extras::single_level(); extras3.set_metadata("unknown_group".as_bytes().to_owned()); - assert_eq!(resource_ctl.priority_of(&extras3), 50); + assert_eq!( + resource_ctl.priority_of(&extras3), + concat_priority_vt(MEDIUM_PRIORITY, 50) + ); assert_eq!( resource_ctl .resource_group("default".as_bytes()) @@ -482,7 +524,7 @@ pub(crate) mod tests { drop(group2); // test add 1 new resource group - let new_group = new_resource_group_ru("new_group".into(), 500); + let new_group = new_resource_group_ru("new_group".into(), 500, HIGH_PRIORITY); resource_manager.add_resource_group(new_group); assert_eq!(resource_ctl.resource_consumptions.read().len(), 4); @@ -497,7 +539,7 @@ pub(crate) mod tests { let resource_ctl = resource_manager.derive_controller("test_read".into(), true); let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); - let group1 = new_resource_group_ru("test1".into(), 5000); + let group1 = new_resource_group_ru("test1".into(), 5000, 0); resource_manager.add_resource_group(group1); assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); assert_eq!( @@ -506,7 +548,7 @@ pub(crate) mod tests { ); // add a resource group with big ru - let group1 = new_resource_group_ru("test2".into(), 50000); + let group1 = new_resource_group_ru("test2".into(), 50000, 0); resource_manager.add_resource_group(group1); assert_eq!(*resource_ctl.max_ru_quota.lock().unwrap(), 50000); assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 100); @@ -530,10 +572,10 @@ pub(crate) mod tests { let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); for i in 0..5 { - let group1 = new_resource_group_ru(format!("test{}", i), 100); + let group1 = new_resource_group_ru(format!("test{}", i), 100, 0); resource_manager.add_resource_group(group1); // add a resource group with big ru - let group1 = new_resource_group_ru(format!("group{}", i), 100); + let group1 = new_resource_group_ru(format!("group{}", i), 100, 0); resource_manager.add_resource_group(group1); } // consume for default group @@ -566,4 +608,20 @@ pub(crate) mod tests { 0 ); } + + #[test] + fn test_concat_priority_vt() { + let v1 = concat_priority_vt(MEDIUM_PRIORITY, 1000); + let v2 = concat_priority_vt(MEDIUM_PRIORITY, 1111); + assert!(v1 < v2); + + let v3 = concat_priority_vt(LOW_PRIORITY, 1000); + assert!(v1 < v3); + + let v4 = concat_priority_vt(MEDIUM_PRIORITY, 1111); + assert_eq!(v2, v4); + + let v5 = concat_priority_vt(HIGH_PRIORITY, 10); + assert!(v5 < v1); + } } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index a2d64f57c3b..82c01eae398 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -199,7 +199,7 @@ pub mod tests { let resource_manager = ResourceGroupManager::default(); let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); - let group = new_resource_group("TEST".into(), true, 100, 100); + let group = new_resource_group("TEST".into(), true, 100, 100, 0); add_resource_group(s.pd_client.clone(), group); block_on(s.reload_all_resource_groups()); assert_eq!(s.manager.get_all_resource_groups().len(), 1); @@ -244,12 +244,12 @@ pub mod tests { s_clone.watch_resource_groups().await; }); // Mock add - let group1 = new_resource_group_ru("TEST1".into(), 100); + let group1 = new_resource_group_ru("TEST1".into(), 100, 0); add_resource_group(s.pd_client.clone(), group1); - let group2 = new_resource_group_ru("TEST2".into(), 100); + let group2 = new_resource_group_ru("TEST2".into(), 100, 0); add_resource_group(s.pd_client.clone(), group2); // Mock modify - let group2 = new_resource_group_ru("TEST2".into(), 50); + let group2 = new_resource_group_ru("TEST2".into(), 50, 0); add_resource_group(s.pd_client.clone(), group2); wait_watch_ready(&s, 2); @@ -286,7 +286,7 @@ pub mod tests { s_clone.watch_resource_groups().await; }); // Mock add - let group1 = new_resource_group_ru("TEST1".into(), 100); + let group1 = new_resource_group_ru("TEST1".into(), 100, 0); add_resource_group(s.pd_client.clone(), group1); // Mock reboot watch server let watch_global_config_fp = "watch_global_config_return"; @@ -294,7 +294,7 @@ pub mod tests { std::thread::sleep(Duration::from_millis(100)); fail::remove(watch_global_config_fp); // Mock add after rebooting will success - let group1 = new_resource_group_ru("TEST2".into(), 100); + let group1 = new_resource_group_ru("TEST2".into(), 100, 0); add_resource_group(s.pd_client.clone(), group1); // Wait watcher update std::thread::sleep(Duration::from_secs(1)); diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 2beed27de8b..2ce7a8714c0 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -707,14 +707,15 @@ where } #[derive(Serialize)] -struct ResouceGroupSetting { +struct ResourceGroupSetting { name: String, ru: u64, + priority: u32, burst_limit: i64, } -fn into_debug_request_group(rg: ResourceGroup) -> ResouceGroupSetting { - ResouceGroupSetting { +fn into_debug_request_group(rg: ResourceGroup) -> ResourceGroupSetting { + ResourceGroupSetting { name: rg.name, ru: rg .r_u_settings @@ -722,6 +723,7 @@ fn into_debug_request_group(rg: ResourceGroup) -> ResouceGroupSetting { .get_r_u() .get_settings() .get_fill_rate(), + priority: rg.priority, burst_limit: rg .r_u_settings .get_ref() From 7b1fe9df07e6e58c759231b668f2765bdeddb583 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Mon, 20 Mar 2023 12:20:40 +0800 Subject: [PATCH 0596/1149] PITR: support modifying the config tikv.import.memory-use-ratio online when restore point. (#14408) close tikv/tikv#14409 Signed-off-by: joccau Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/server/src/server.rs | 7 ++ components/server/src/server2.rs | 7 ++ components/sst_importer/Cargo.toml | 1 + components/sst_importer/src/config.rs | 53 +++++++++++-- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 85 +++++++++++++++++++-- src/config/mod.rs | 2 +- src/import/sst_service.rs | 26 +++++-- 9 files changed, 160 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f313d747187..4265565e353 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5499,6 +5499,7 @@ dependencies = [ "kvproto", "lazy_static", "log_wrappers", + "online_config", "openssl", "prometheus", "rand 0.8.5", diff --git a/components/server/src/server.rs b/components/server/src/server.rs index e77197a7737..b9563f295b5 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1250,6 +1250,8 @@ where LocalTablets::Singleton(engines.engines.kv.clone()), servers.importer.clone(), ); + let import_cfg_mgr = import_service.get_config_manager(); + if servers .server .register_service(create_import_sst(import_service)) @@ -1258,6 +1260,11 @@ where fatal!("failed to register import service"); } + self.cfg_controller + .as_mut() + .unwrap() + .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); + // Debug service. let debug_service = DebugService::new( engines.engines.clone(), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 9ae032dca7a..ef38c3e2286 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -989,6 +989,8 @@ where LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), servers.importer.clone(), ); + let import_cfg_mgr = import_service.get_config_manager(); + if servers .server .register_service(create_import_sst(import_service)) @@ -997,6 +999,11 @@ where fatal!("failed to register import service"); } + self.cfg_controller + .as_mut() + .unwrap() + .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); + // Create Diagnostics service let diag_service = DiagnosticsService::new( servers.server.get_debug_thread_pool().clone(), diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index a21a58c0a6c..8e2799b7437 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -29,6 +29,7 @@ keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } +online_config = { workspace = true } openssl = "0.10" prometheus = { version = "0.13", default-features = false } rand = "0.8" diff --git a/components/sst_importer/src/config.rs b/components/sst_importer/src/config.rs index ac789e2f4ae..7434c5cf0cd 100644 --- a/components/sst_importer/src/config.rs +++ b/components/sst_importer/src/config.rs @@ -1,10 +1,15 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error, result::Result}; +use std::{ + error::Error, + result::Result, + sync::{Arc, RwLock}, +}; -use tikv_util::config::ReadableDuration; +use online_config::{self, OnlineConfig}; +use tikv_util::{config::ReadableDuration, HandyRwLock}; -#[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct Config { @@ -47,12 +52,48 @@ impl Config { self.stream_channel_window = default_cfg.stream_channel_window; } if self.memory_use_ratio > 0.5 || self.memory_use_ratio < 0.0 { + return Err("import.mem_ratio should belong to [0.0, 0.5].".into()); + } + Ok(()) + } +} + +#[derive(Clone)] +pub struct ConfigManager(pub Arc>); + +impl ConfigManager { + pub fn new(cfg: Config) -> Self { + ConfigManager(Arc::new(RwLock::new(cfg))) + } +} + +impl online_config::ConfigManager for ConfigManager { + fn dispatch(&mut self, change: online_config::ConfigChange) -> online_config::Result<()> { + info!( + "import config changed"; + "change" => ?change, + ); + + let mut cfg = self.rl().clone(); + cfg.update(change)?; + + if let Err(e) = cfg.validate() { warn!( - "import.mem_ratio should belong to [0.0, 0.5], change it to {}", - default_cfg.memory_use_ratio, + "import config changed"; + "change" => ?cfg, ); - self.memory_use_ratio = default_cfg.memory_use_ratio; + return Err(e); } + + *self.wl() = cfg; Ok(()) } } + +impl std::ops::Deref for ConfigManager { + type Target = RwLock; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index 4d25201253a..e073ff941ae 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -24,7 +24,7 @@ pub mod metrics; pub mod sst_importer; pub use self::{ - config::Config, + config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, import_file::sst_meta_to_path, sst_importer::SstImporter, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 0da45c195be..5b55974dff3 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -36,9 +36,9 @@ use tikv_util::{ bytes::{decode_bytes_in_place, encode_bytes}, stream_event::{EventEncoder, EventIterator, Iterator as EIterator}, }, - config::ReadableSize, sys::{thread::ThreadBuildWrapper, SysQuota}, time::{Instant, Limiter}, + HandyRwLock, }; use tokio::runtime::{Handle, Runtime}; use txn_types::{Key, TimeStamp, WriteRef}; @@ -49,7 +49,7 @@ use crate::{ import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, metrics::*, sst_writer::{RawSstWriter, TxnSstWriter}, - util, Config, Error, Result, + util, Config, ConfigManager as ImportConfigManager, Error, Result, }; pub struct LoadedFile { @@ -278,7 +278,7 @@ pub struct SstImporter { download_rt: Runtime, file_locks: Arc>, mem_use: Arc, - mem_limit: ReadableSize, + mem_limit: Arc, } impl SstImporter { @@ -308,8 +308,12 @@ impl SstImporter { .build()?; download_rt.spawn(cached_storage.gc_loop()); - let memory_limit = (SysQuota::memory_limit_in_bytes() as f64) * cfg.memory_use_ratio; - info!("sst importer memory limit when apply"; "size" => ?memory_limit); + let memory_limit = Self::calcualte_usage_mem(cfg.memory_use_ratio); + info!( + "sst importer memory limit when apply"; + "ratio" => cfg.memory_use_ratio, + "size" => ?memory_limit, + ); Ok(SstImporter { dir: ImportDir::new(root)?, @@ -321,10 +325,14 @@ impl SstImporter { cached_storage, download_rt, mem_use: Arc::new(AtomicU64::new(0)), - mem_limit: ReadableSize(memory_limit as u64), + mem_limit: Arc::new(AtomicU64::new(memory_limit)), }) } + fn calcualte_usage_mem(mem_ratio: f64) -> u64 { + ((SysQuota::memory_limit_in_bytes() as f64) * mem_ratio) as u64 + } + pub fn set_compression_type( &mut self, cf_name: CfName, @@ -583,6 +591,19 @@ impl SstImporter { Ok(()) } + pub fn update_config_memory_use_ratio(&self, cfg_mgr: &ImportConfigManager) { + let mem_ratio = cfg_mgr.rl().memory_use_ratio; + let memory_limit = Self::calcualte_usage_mem(mem_ratio); + + if self.mem_limit.load(Ordering::SeqCst) != memory_limit { + self.mem_limit.store(memory_limit, Ordering::SeqCst); + info!("update importer config"; + "memory-use-ratio" => mem_ratio, + "size" => memory_limit, + ) + } + } + pub fn shrink_by_tick(&self) -> usize { let mut shrink_buff_size: usize = 0; let mut retain_buff_size: usize = 0; @@ -643,7 +664,7 @@ impl SstImporter { // If mem_limit is 0, which represent download kv-file when import. // Or read kv-file into buffer directly. pub fn import_support_download(&self) -> bool { - self.mem_limit == ReadableSize(0) + self.mem_limit.load(Ordering::SeqCst) == 0 } fn request_memory(&self, meta: &KvMeta) -> Option { @@ -651,7 +672,7 @@ impl SstImporter { let old = self.mem_use.fetch_add(size, Ordering::SeqCst); // If the memory is limited, roll backup the mem_use and return false. - if old + size > self.mem_limit.0 { + if old + size > self.mem_limit.load(Ordering::SeqCst) { self.mem_use.fetch_sub(size, Ordering::SeqCst); CACHE_EVENT.with_label_values(&["out-of-quota"]).inc(); None @@ -1449,6 +1470,7 @@ mod tests { }; use external_storage_export::read_external_storage_info_buff; use file_system::File; + use online_config::{ConfigManager, OnlineConfig}; use openssl::hash::{Hasher, MessageDigest}; use tempfile::Builder; use test_sst_importer::*; @@ -1958,6 +1980,53 @@ mod tests { assert_eq!(err.kind(), io::ErrorKind::TimedOut); } + #[test] + fn test_update_config_memory_use_ratio() { + // create SstImpoter with default. + let cfg = Config { + memory_use_ratio: 0.3, + ..Default::default() + }; + let import_dir = tempfile::tempdir().unwrap(); + let importer = SstImporter::new(&cfg, import_dir, None, ApiVersion::V1).unwrap(); + let mem_limit_old = importer.mem_limit.load(Ordering::SeqCst); + + // create new config and get the diff config. + let cfg_new = Config { + memory_use_ratio: 0.1, + ..Default::default() + }; + let change = cfg.diff(&cfg_new); + + // create config manager and update config. + let mut cfg_mgr = ImportConfigManager::new(cfg); + cfg_mgr.dispatch(change).unwrap(); + importer.update_config_memory_use_ratio(&cfg_mgr); + + let mem_limit_new = importer.mem_limit.load(Ordering::SeqCst); + assert!(mem_limit_old > mem_limit_new); + assert_eq!( + mem_limit_old / 3, + mem_limit_new, + "mem_limit_old / 3 = {} mem_limit_new = {}", + mem_limit_old / 3, + mem_limit_new + ); + } + + #[test] + fn test_update_config_with_invalid_conifg() { + let cfg = Config::default(); + let cfg_new = Config { + memory_use_ratio: -0.1, + ..Default::default() + }; + let change = cfg.diff(&cfg_new); + let mut cfg_mgr = ImportConfigManager::new(cfg); + let r = cfg_mgr.dispatch(change); + assert!(r.is_err()); + } + #[test] fn test_do_read_kv_file() { // create a sample kv file. diff --git a/src/config/mod.rs b/src/config/mod.rs index c1c38e39d77..3eb15ba8ace 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3127,7 +3127,7 @@ pub struct TikvConfig { #[online_config(skip)] pub security: SecurityConfig, - #[online_config(skip)] + #[online_config(submodule)] pub import: ImportConfig, #[online_config(submodule)] diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 291841facde..b23046bfe4b 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -26,8 +26,8 @@ use kvproto::{ kvrpcpb::Context, }; use sst_importer::{ - error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, Error, Result, - SstImporter, + error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, ConfigManager, + Error, Result, SstImporter, }; use tikv_kv::{Engine, Modify, SnapContext, Snapshot, SnapshotExt, WriteData, WriteEvent}; use tikv_util::{ @@ -35,6 +35,7 @@ use tikv_util::{ future::create_stream_with_buffer, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, + HandyRwLock, }; use tokio::{runtime::Runtime, time::sleep}; use txn_types::{Key, WriteRef, WriteType}; @@ -85,7 +86,7 @@ async fn wait_write(mut s: impl Stream + Send + Unpin) -> sto /// raftstore to trigger the ingest process. #[derive(Clone)] pub struct ImportSstService { - cfg: Config, + cfg: ConfigManager, tablets: LocalTablets, engine: E, threads: Arc, @@ -296,10 +297,12 @@ impl ImportSstService { if let LocalTablets::Singleton(tablet) = &tablets { importer.start_switch_mode_check(threads.handle(), tablet.clone()); } - threads.spawn(Self::tick(importer.clone())); + + let cfg_mgr = ConfigManager::new(cfg); + threads.spawn(Self::tick(importer.clone(), cfg_mgr.clone())); ImportSstService { - cfg, + cfg: cfg_mgr, tablets, threads: Arc::new(threads), block_threads: Arc::new(block_threads), @@ -311,9 +314,15 @@ impl ImportSstService { } } - async fn tick(importer: Arc) { + pub fn get_config_manager(&self) -> ConfigManager { + self.cfg.clone() + } + + async fn tick(importer: Arc, cfg: ConfigManager) { loop { sleep(Duration::from_secs(10)).await; + + importer.update_config_memory_use_ratio(&cfg); importer.shrink_by_tick(); } } @@ -544,7 +553,7 @@ macro_rules! impl_write { let import = self.importer.clone(); let tablets = self.tablets.clone(); let (rx, buf_driver) = - create_stream_with_buffer(stream, self.cfg.stream_channel_window); + create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); let timer = Instant::now_coarse(); @@ -652,7 +661,8 @@ impl ImportSst for ImportSstService { let label = "upload"; let timer = Instant::now_coarse(); let import = self.importer.clone(); - let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.stream_channel_window); + let (rx, buf_driver) = + create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut map_rx = rx.map_err(Error::from); let handle_task = async move { From 4dc1a5a94b0f88257a11c0733937a4892c70518d Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 20 Mar 2023 13:40:40 +0800 Subject: [PATCH 0597/1149] grafana: fix grafana display anomaly (#14428) close tikv/tikv#14427 Fix grafana display anomaly. The `pessimistic lock activities` panel's id is the same as that of `gRPC resource group QPS` panel which makes grafana display anomaly. So change the duplicated id. Signed-off-by: Connor1996 --- metrics/grafana/tikv_details.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index f404ebc5376..9600222547e 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -5797,7 +5797,7 @@ "y": 37 }, "hiddenSeries": false, - "id": 23763573091, + "id": 23763573090, "legend": { "alignAsTable": true, "avg": false, From 5a8f970d313a6dc640ee02a9fc71020b1215a31a Mon Sep 17 00:00:00 2001 From: lijie Date: Mon, 20 Mar 2023 14:48:07 +0800 Subject: [PATCH 0598/1149] feat: bump version to 7.1.0-alpha (#14431) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4265565e353..14e351effba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6316,7 +6316,7 @@ dependencies = [ [[package]] name = "tikv" -version = "6.7.0-alpha" +version = "7.1.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 57a2ab4eced..f8e67d70c04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "6.7.0-alpha" +version = "7.1.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 27f4d8c9fa86ee7c1e7631c42c869632db418d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 22 Mar 2023 16:36:42 +0800 Subject: [PATCH 0599/1149] backup-stream: don't close the server stream when encountered errors (#14432) close tikv/tikv#14426 Signed-off-by: hillium Co-authored-by: Ti Chi Robot --- .../backup-stream/src/checkpoint_manager.rs | 206 ++++++++++++++++-- components/backup-stream/src/service.rs | 7 +- 2 files changed, 191 insertions(+), 22 deletions(-) diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index 50a6ac27864..d32c2ea7c00 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -7,7 +7,7 @@ use futures::{ future::BoxFuture, FutureExt, SinkExt, StreamExt, }; -use grpcio::{RpcStatus, RpcStatusCode, ServerStreamingSink, WriteFlags}; +use grpcio::{RpcStatus, RpcStatusCode, WriteFlags}; use kvproto::{ errorpb::{Error as PbError, *}, logbackuppb::{FlushEvent, SubscribeFlushEventResponse}, @@ -20,7 +20,7 @@ use uuid::Uuid; use crate::{ annotate, - errors::{Error, ReportableResult, Result}, + errors::{Error, Result}, future, metadata::{store::MetaStore, Checkpoint, CheckpointProvider, MetadataClient}, metrics, @@ -51,9 +51,11 @@ impl std::fmt::Debug for CheckpointManager { enum SubscriptionOp { Add(Subscription), Emit(Box<[FlushEvent]>), + #[cfg(test)] + Inspect(Box), } -struct SubscriptionManager { +pub struct SubscriptionManager { subscribers: HashMap, input: Receiver, } @@ -72,8 +74,13 @@ impl SubscriptionManager { SubscriptionOp::Emit(events) => { self.emit_events(events).await; } + #[cfg(test)] + SubscriptionOp::Inspect(f) => { + f(&self); + } } } + // NOTE: Maybe close all subscription streams here. } async fn emit_events(&mut self, events: Box<[FlushEvent]>) { @@ -89,14 +96,9 @@ impl SubscriptionManager { sub.flush().await }; - match send_all.await { - Err(grpcio::Error::RemoteStopped) => { - canceled.push(*id); - } - Err(err) => { - Error::from(err).report("sending subscription"); - } - _ => {} + if let Err(err) = send_all.await { + canceled.push(*id); + Error::from(err).report("sending subscription"); } } @@ -107,11 +109,10 @@ impl SubscriptionManager { async fn remove_subscription(&mut self, id: &Uuid) { match self.subscribers.remove(id) { - Some(mut sub) => { + Some(sub) => { info!("client is gone, removing subscription"; "id" => %id); - sub.close() - .await - .report_if_err(format_args!("during removing subscription {}", id)) + // The stream is an endless stream -- we don't need to close it. + drop(sub); } None => { warn!("BUG: the subscriber has been removed before we are going to remove it."; "id" => %id); @@ -121,7 +122,12 @@ impl SubscriptionManager { } // Note: can we make it more generic...? -pub type Subscription = ServerStreamingSink; +#[cfg(not(test))] +pub type Subscription = + grpcio::ServerStreamingSink; + +#[cfg(test)] +pub type Subscription = tests::MockSink; /// The result of getting a checkpoint. /// The possibility of failed to getting checkpoint is pretty high: @@ -201,7 +207,7 @@ impl CheckpointManager { /// update a region checkpoint in need. #[cfg(test)] - pub fn update_region_checkpoint(&mut self, region: &Region, checkpoint: TimeStamp) { + fn update_region_checkpoint(&mut self, region: &Region, checkpoint: TimeStamp) { Self::update_ts(&mut self.checkpoint_ts, region.clone(), checkpoint) } @@ -326,6 +332,29 @@ impl CheckpointManager { pub fn get_resolved_ts(&self) -> Option { self.resolved_ts.values().map(|x| x.checkpoint).min() } + + #[cfg(test)] + fn sync_with_subs_mgr( + &mut self, + f: impl FnOnce(&SubscriptionManager) -> T + Send + 'static, + ) -> T { + use std::sync::Mutex; + + let (tx, rx) = std::sync::mpsc::sync_channel(1); + let t = Arc::new(Mutex::new(None)); + let tr = Arc::clone(&t); + self.manager_handle + .as_mut() + .unwrap() + .try_send(SubscriptionOp::Inspect(Box::new(move |x| { + *tr.lock().unwrap() = Some(f(x)); + tx.send(()).unwrap(); + }))) + .unwrap(); + rx.recv().unwrap(); + let mut t = t.lock().unwrap(); + t.take().unwrap() + } } fn not_leader(r: u64) -> PbError { @@ -525,17 +554,21 @@ pub mod tests { use std::{ assert_matches, collections::HashMap, - sync::{Arc, RwLock}, + sync::{Arc, Mutex, RwLock}, time::Duration, }; - use futures::future::ok; - use kvproto::metapb::*; + use futures::{future::ok, Sink}; + use grpcio::{RpcStatus, RpcStatusCode}; + use kvproto::{logbackuppb::SubscribeFlushEventResponse, metapb::*}; use pd_client::{PdClient, PdFuture}; use txn_types::TimeStamp; use super::{BasicFlushObserver, FlushObserver, RegionIdWithVersion}; - use crate::GetCheckpointResult; + use crate::{ + subscription_track::{CheckpointType, ResolveResult}, + GetCheckpointResult, + }; fn region(id: u64, version: u64, conf_version: u64) -> Region { let mut r = Region::new(); @@ -547,6 +580,137 @@ pub mod tests { r } + #[derive(Clone)] + pub struct MockSink(Arc>); + + impl MockSink { + fn with_fail_once(code: RpcStatusCode) -> Self { + let mut failed = false; + let inner = MockSinkInner { + items: Vec::default(), + closed: false, + on_error: Box::new(move || { + if failed { + RpcStatusCode::OK + } else { + failed = true; + code + } + }), + }; + Self(Arc::new(Mutex::new(inner))) + } + + fn trivial() -> Self { + let inner = MockSinkInner { + items: Vec::default(), + closed: false, + on_error: Box::new(|| RpcStatusCode::OK), + }; + Self(Arc::new(Mutex::new(inner))) + } + + pub async fn fail(&self, status: RpcStatus) -> crate::errors::Result<()> { + panic!("failed in a case should never fail: {}", status); + } + } + + struct MockSinkInner { + items: Vec, + closed: bool, + on_error: Box grpcio::RpcStatusCode + Send>, + } + + impl Sink<(SubscribeFlushEventResponse, grpcio::WriteFlags)> for MockSink { + type Error = grpcio::Error; + + fn poll_ready( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Ok(()).into() + } + + fn start_send( + self: std::pin::Pin<&mut Self>, + item: (SubscribeFlushEventResponse, grpcio::WriteFlags), + ) -> Result<(), Self::Error> { + let mut guard = self.0.lock().unwrap(); + let code = (guard.on_error)(); + if code != RpcStatusCode::OK { + return Err(grpcio::Error::RpcFailure(RpcStatus::new(code))); + } + guard.items.push(item.0); + Ok(()) + } + + fn poll_flush( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Ok(()).into() + } + + fn poll_close( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + let mut guard = self.0.lock().unwrap(); + guard.closed = true; + Ok(()).into() + } + } + + fn simple_resolve_result() -> ResolveResult { + let mut region = Region::new(); + region.set_id(42); + ResolveResult { + region, + checkpoint: 42.into(), + checkpoint_type: CheckpointType::MinTs, + } + } + + #[test] + fn test_rpc_sub() { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(); + let mut mgr = super::CheckpointManager::default(); + rt.spawn(mgr.spawn_subscription_mgr()); + + let trivial_sink = MockSink::trivial(); + rt.block_on(mgr.add_subscriber(trivial_sink.clone())) + .unwrap(); + + mgr.resolve_regions(vec![simple_resolve_result()]); + mgr.flush(); + mgr.sync_with_subs_mgr(|_| {}); + assert_eq!(trivial_sink.0.lock().unwrap().items.len(), 1); + } + + #[test] + fn test_rpc_failure() { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(); + let mut mgr = super::CheckpointManager::default(); + rt.spawn(mgr.spawn_subscription_mgr()); + + let error_sink = MockSink::with_fail_once(RpcStatusCode::INTERNAL); + rt.block_on(mgr.add_subscriber(error_sink.clone())).unwrap(); + + mgr.resolve_regions(vec![simple_resolve_result()]); + mgr.flush(); + assert_eq!(mgr.sync_with_subs_mgr(|item| { item.subscribers.len() }), 0); + let sink = error_sink.0.lock().unwrap(); + assert_eq!(sink.items.len(), 0); + // The stream shouldn't be closed when exit by a failure. + assert_eq!(sink.closed, false); + } + #[test] fn test_flush() { let mut mgr = super::CheckpointManager::default(); diff --git a/components/backup-stream/src/service.rs b/components/backup-stream/src/service.rs index 9d312a984d1..43d4ede2f27 100644 --- a/components/backup-stream/src/service.rs +++ b/components/backup-stream/src/service.rs @@ -94,8 +94,13 @@ impl LogBackup for Service { &mut self, _ctx: grpcio::RpcContext<'_>, _req: kvproto::logbackuppb::SubscribeFlushEventRequest, - sink: grpcio::ServerStreamingSink, + #[allow(unused_variables)] sink: grpcio::ServerStreamingSink< + kvproto::logbackuppb::SubscribeFlushEventResponse, + >, ) { + #[cfg(test)] + panic!("Service should not be used in an unit test"); + #[cfg(not(test))] try_send!( self.endpoint, Task::RegionCheckpointsOp(RegionCheckpointOperation::Subscribe(sink)) From 4b2dda4823b5b329798a2aff1109167534560313 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 23 Mar 2023 18:28:43 +0800 Subject: [PATCH 0600/1149] storage: fix the apply write wal tracking time (#14444) ref tikv/tikv#12362 Fix the returned apply write wal tracking time. Signed-off-by: cfzjywxk --- components/tracker/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 56ce2aa3280..35ae0fc15f2 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -68,7 +68,7 @@ impl Tracker { detail.set_apply_log_nanos(self.metrics.apply_time_nanos - self.metrics.apply_wait_nanos); detail.set_apply_mutex_lock_nanos(self.metrics.apply_mutex_lock_nanos); detail.set_apply_write_leader_wait_nanos(self.metrics.apply_thread_wait_nanos); - detail.set_apply_write_wal_nanos(self.metrics.apply_wait_nanos); + detail.set_apply_write_wal_nanos(self.metrics.apply_write_wal_nanos); detail.set_apply_write_memtable_nanos(self.metrics.apply_write_memtable_nanos); } } From ffaf4862c2fedd1eaf154f1cdf057b00210670b1 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 24 Mar 2023 09:18:43 +0800 Subject: [PATCH 0601/1149] raftstore-v2: commit merge (#14253) ref tikv/tikv#12842, ref tikv/tikv#13818 Implement commit merge for raftstore-v2 Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- Cargo.lock | 14 +- components/engine_panic/src/checkpoint.rs | 4 + components/engine_rocks/src/checkpoint.rs | 8 + components/engine_rocks/src/event_listener.rs | 4 +- components/engine_traits/src/checkpoint.rs | 2 + components/engine_traits/src/flush.rs | 15 +- components/raftstore-v2/src/batch/store.rs | 17 +- components/raftstore-v2/src/fsm/apply.rs | 9 +- components/raftstore-v2/src/fsm/peer.rs | 16 +- components/raftstore-v2/src/fsm/store.rs | 3 + .../operation/command/admin/compact_log.rs | 54 +- .../operation/command/admin/conf_change.rs | 4 + .../operation/command/admin/merge/commit.rs | 792 ++++++++++++++++++ .../src/operation/command/admin/merge/mod.rs | 37 +- .../operation/command/admin/merge/prepare.rs | 45 +- .../operation/command/admin/merge/rollback.rs | 12 + .../src/operation/command/admin/mod.rs | 9 +- .../src/operation/command/admin/split.rs | 12 +- .../raftstore-v2/src/operation/command/mod.rs | 8 +- components/raftstore-v2/src/operation/life.rs | 44 +- components/raftstore-v2/src/operation/mod.rs | 7 +- .../raftstore-v2/src/operation/ready/mod.rs | 7 + .../src/operation/ready/snapshot.rs | 4 + .../raftstore-v2/src/operation/txn_ext.rs | 14 + components/raftstore-v2/src/raft/peer.rs | 9 + components/raftstore-v2/src/raft/storage.rs | 3 +- components/raftstore-v2/src/router/message.rs | 15 +- components/raftstore-v2/src/router/mod.rs | 5 +- .../raftstore-v2/tests/failpoints/mod.rs | 1 + .../tests/failpoints/test_merge.rs | 109 +++ .../tests/integrations/cluster.rs | 63 +- .../raftstore-v2/tests/integrations/mod.rs | 1 + .../tests/integrations/test_merge.rs | 113 +++ .../tests/integrations/test_split.rs | 59 +- components/test_raftstore-v2/Cargo.toml | 2 +- components/test_raftstore-v2/src/cluster.rs | 147 +++- components/test_raftstore-v2/src/lib.rs | 1 + components/test_raftstore-v2/src/server.rs | 16 +- components/test_raftstore/src/util.rs | 6 +- tests/failpoints/cases/test_merge.rs | 2 +- tests/integrations/raftstore/test_merge.rs | 431 ++++++---- 41 files changed, 1852 insertions(+), 272 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/merge/commit.rs create mode 100644 components/raftstore-v2/src/operation/command/admin/merge/rollback.rs create mode 100644 components/raftstore-v2/tests/failpoints/test_merge.rs create mode 100644 components/raftstore-v2/tests/integrations/test_merge.rs diff --git a/Cargo.lock b/Cargo.lock index 14e351effba..e12ee05562d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1792,12 +1792,12 @@ dependencies = [ [[package]] name = "fail" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011" +checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" dependencies = [ - "lazy_static", "log", + "once_cell", "rand 0.8.5", ] @@ -2712,7 +2712,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#b47a4830141f7c8d2719db0f0184652e692eb672" +source = "git+https://github.com/pingcap/kvproto.git#df1ae63d0cfe2f5e01d2016a1839a7e88ef2da38" dependencies = [ "futures 0.3.15", "grpcio", @@ -2841,7 +2841,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#9e4678857e5b4c738e95c7ee1a35ee962264f4e9" +source = "git+https://github.com/tikv/rust-rocksdb.git#a9fbe325939c166ffc5f80e63066f5d8594a1fff" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2860,7 +2860,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#9e4678857e5b4c738e95c7ee1a35ee962264f4e9" +source = "git+https://github.com/tikv/rust-rocksdb.git#a9fbe325939c166ffc5f80e63066f5d8594a1fff" dependencies = [ "bzip2-sys", "cc", @@ -4779,7 +4779,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#9e4678857e5b4c738e95c7ee1a35ee962264f4e9" +source = "git+https://github.com/tikv/rust-rocksdb.git#a9fbe325939c166ffc5f80e63066f5d8594a1fff" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/components/engine_panic/src/checkpoint.rs b/components/engine_panic/src/checkpoint.rs index 6743810eb90..bed49c8e55b 100644 --- a/components/engine_panic/src/checkpoint.rs +++ b/components/engine_panic/src/checkpoint.rs @@ -15,6 +15,10 @@ impl Checkpointable for PanicEngine { fn new_checkpointer(&self) -> Result { panic!() } + + fn merge(&self, dbs: &[&Self]) -> Result<()> { + panic!() + } } impl Checkpointer for PanicCheckpointer { diff --git a/components/engine_rocks/src/checkpoint.rs b/components/engine_rocks/src/checkpoint.rs index 8b82043a392..0f86aa29945 100644 --- a/components/engine_rocks/src/checkpoint.rs +++ b/components/engine_rocks/src/checkpoint.rs @@ -15,6 +15,14 @@ impl Checkpointable for RocksEngine { Err(e) => Err(r2e(e)), } } + + fn merge(&self, dbs: &[&Self]) -> Result<()> { + let mut mopts = rocksdb::MergeInstanceOptions::default(); + mopts.merge_memtable = false; + mopts.allow_source_write = true; + let inner: Vec<_> = dbs.iter().map(|e| e.as_inner().as_ref()).collect(); + self.as_inner().merge_instances(&mopts, &inner).map_err(r2e) + } } pub struct RocksEngineCheckpointer(rocksdb::Checkpointer); diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 23ff7cf5f50..1cbef379e3c 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -189,8 +189,10 @@ impl RocksPersistenceListener { impl rocksdb::EventListener for RocksPersistenceListener { fn on_memtable_sealed(&self, info: &MemTableInfo) { + // Note: first_seqno is effectively the smallest seqno of memtable. + // earliest_seqno has ambiguous semantics. self.0 - .on_memtable_sealed(info.cf_name().to_string(), info.earliest_seqno()); + .on_memtable_sealed(info.cf_name().to_string(), info.first_seqno()); } fn on_flush_completed(&self, job: &FlushJobInfo) { diff --git a/components/engine_traits/src/checkpoint.rs b/components/engine_traits/src/checkpoint.rs index 6ea3556938f..6b966d806fe 100644 --- a/components/engine_traits/src/checkpoint.rs +++ b/components/engine_traits/src/checkpoint.rs @@ -8,6 +8,8 @@ pub trait Checkpointable { type Checkpointer: Checkpointer; fn new_checkpointer(&self) -> Result; + + fn merge(&self, dbs: &[&Self]) -> Result<()>; } pub trait Checkpointer { diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index d35233bc310..8b0566f2cfb 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -29,7 +29,7 @@ use crate::{data_cf_offset, RaftEngine, RaftLogBatch, DATA_CFS_LEN}; pub struct ApplyProgress { cf: String, apply_index: u64, - earliest_seqno: u64, + smallest_seqno: u64, } impl ApplyProgress { @@ -123,8 +123,8 @@ impl PersistenceListener { /// Called when memtable is frozen. /// - /// `earliest_seqno` should be the smallest seqno of the memtable. - pub fn on_memtable_sealed(&self, cf: String, earliest_seqno: u64) { + /// `smallest_seqno` should be the smallest seqno of the memtable. + pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64) { // The correctness relies on the assumption that there will be only one // thread writting to the DB and increasing apply index. // Apply index will be set within DB lock, so it's correct even with manual @@ -133,16 +133,16 @@ impl PersistenceListener { let apply_index = self.state.applied_index.load(Ordering::SeqCst); let mut prs = self.progress.lock().unwrap(); let flushed = prs.last_flushed[offset]; - if flushed > earliest_seqno { + if flushed > smallest_seqno { panic!( "sealed seqno has been flushed {} {} {} <= {}", - cf, apply_index, earliest_seqno, flushed + cf, apply_index, smallest_seqno, flushed ); } prs.prs.push_back(ApplyProgress { cf, apply_index, - earliest_seqno, + smallest_seqno, }); } @@ -170,8 +170,7 @@ impl PersistenceListener { cursor.move_next(); continue; } - // Note flushed largest_seqno equals to earliest_seqno of next memtable. - if pr.earliest_seqno < largest_seqno { + if pr.smallest_seqno <= largest_seqno { match &mut flushed_pr { None => flushed_pr = cursor.remove_current(), Some(flushed_pr) => { diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 4833030fec3..3f7bf408aa8 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -51,7 +51,7 @@ use time::Timespec; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, - operation::{SharedReadTablet, SPLIT_PREFIX}, + operation::{SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, worker::{pd, tablet_gc}, @@ -62,6 +62,7 @@ use crate::{ pub struct StoreContext { /// A logger without any KV. It's clean for creating new PeerFSM. pub logger: Logger, + pub store_id: u64, pub coprocessor_host: CoprocessorHost, /// The transport for sending messages to peers on other stores. pub trans: T, @@ -392,6 +393,10 @@ impl StorePollerBuilder { continue; } let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; + // Keep the checkpoint even if source is destroyed. + if prefix == MERGE_SOURCE_PREFIX { + continue; + } let fsm = match peers.get(®ion_id) { Some((_, fsm)) => fsm, None => { @@ -405,14 +410,17 @@ impl StorePollerBuilder { if prefix == SPLIT_PREFIX { file_system::remove_dir_all(&path)?; continue; - } - if prefix.is_empty() { + } else if prefix == MERGE_IN_PROGRESS_PREFIX { + continue; + } else if prefix.is_empty() { // Stale split data can be deleted. if fsm.peer().storage().tablet_index() > tablet_index { file_system::remove_dir_all(&path)?; } + } else { + debug_assert!(false, "unexpected tablet prefix: {}", path.display()); + warn!(self.logger, "unexpected tablet prefix"; "path" => %path.display()); } - // TODO: handle other prefix } // TODO: list all available tablets and destroy those which are not in the // peers. @@ -432,6 +440,7 @@ where let cfg = self.cfg.value().clone(); let mut poll_ctx = StoreContext { logger: self.logger.clone(), + store_id: self.store_id, trans: self.trans.clone(), current_time: None, has_ready: false, diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index e1bf5169d55..2afd8fbf773 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -21,7 +21,7 @@ use tikv_util::{ }; use crate::{ - operation::DataTrace, + operation::{CatchUpLogs, DataTrace}, raft::Apply, router::{ApplyRes, ApplyTask, PeerMsg}, }; @@ -31,6 +31,8 @@ use crate::{ /// Using a trait to make signiture simpler. pub trait ApplyResReporter { fn report(&self, apply_res: ApplyRes); + + fn redirect_catch_up_logs(&self, c: CatchUpLogs); } impl, S: FsmScheduler> ApplyResReporter for Mailbox { @@ -38,6 +40,11 @@ impl, S: FsmScheduler> ApplyResReporter for M // TODO: check shutdown. let _ = self.force_send(PeerMsg::ApplyRes(apply_res)); } + + fn redirect_catch_up_logs(&self, c: CatchUpLogs) { + let msg = PeerMsg::RedirectCatchUpLogs(c); + let _ = self.force_send(msg); + } } /// Schedule task to `ApplyFsm`. diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 77860b0ff49..5e10aa0ef72 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -217,7 +217,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerTick::PdHeartbeat => self.on_pd_heartbeat(), PeerTick::CompactLog => self.on_compact_log_tick(false), PeerTick::SplitRegionCheck => self.on_split_region_check(), - PeerTick::CheckMerge => unimplemented!(), + PeerTick::CheckMerge => self.fsm.peer_mut().on_check_merge(self.store_ctx), PeerTick::CheckPeerStaleState => unimplemented!(), PeerTick::EntryCacheEvict => self.on_entry_cache_evict(), PeerTick::CheckLeaderLease => unimplemented!(), @@ -330,6 +330,20 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .fsm .peer_mut() .on_cleanup_import_sst(self.store_ctx, ssts), + PeerMsg::AskCommitMerge(req) => { + self.fsm.peer_mut().on_ask_commit_merge(self.store_ctx, req) + } + PeerMsg::AckCommitMerge { index, target_id } => { + self.fsm.peer_mut().on_ack_commit_merge(index, target_id) + } + PeerMsg::RejectCommitMerge { index } => { + self.fsm.peer_mut().on_reject_commit_merge(index) + } + PeerMsg::RedirectCatchUpLogs(c) => self + .fsm + .peer_mut() + .on_redirect_catch_up_logs(self.store_ctx, c), + PeerMsg::CatchUpLogs(c) => self.fsm.peer_mut().on_catch_up_logs(self.store_ctx, c), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index afb7aa5d0d8..4b4255b3d3e 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -276,6 +276,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { .fsm .store .on_store_unreachable(self.store_ctx, to_store_id), + StoreMsg::AskCommitMerge(req) => { + self.fsm.store.on_ask_commit_merge(self.store_ctx, req) + } #[cfg(feature = "testexport")] StoreMsg::WaitFlush { region_id, ch } => { self.fsm.store.on_wait_flush(self.store_ctx, region_id, ch) diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index ed4d22a59b4..8ae195539b2 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -13,6 +13,8 @@ //! Updates truncated index, and compacts logs if the corresponding changes have //! been persisted in kvdb. +use std::path::PathBuf; + use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}; use protobuf::Message; @@ -309,6 +311,33 @@ impl Peer { )); } + #[inline] + pub fn record_tombstone_tablet_path( + &mut self, + ctx: &StoreContext, + old_tablet: PathBuf, + new_tablet_index: u64, + ) { + info!( + self.logger, + "record tombstone tablet"; + "prev_tablet_path" => old_tablet.display(), + "new_tablet_index" => new_tablet_index + ); + let compact_log_context = self.compact_log_context_mut(); + compact_log_context + .tombstone_tablets_wait_index + .push(new_tablet_index); + let _ = ctx + .schedulers + .tablet_gc + .schedule(tablet_gc::Task::prepare_destroy_path( + old_tablet, + self.region_id(), + new_tablet_index, + )); + } + /// Returns if there's any tombstone being removed. #[inline] pub fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { @@ -369,15 +398,6 @@ impl Peer { mut res: CompactLogResult, ) { let first_index = self.entry_storage().first_index(); - if res.compact_index <= first_index { - debug!( - self.logger, - "compact index <= first index, no need to compact"; - "compact_index" => res.compact_index, - "first_index" => first_index, - ); - return; - } if let Some(i) = self.merge_context().and_then(|c| c.max_compact_log_index()) && res.compact_index > i { @@ -389,6 +409,22 @@ impl Peer { ); res.compact_index = i; } + if res.compact_index <= first_index { + debug!( + self.logger, + "compact index <= first index, no need to compact"; + "compact_index" => res.compact_index, + "first_index" => first_index, + ); + return; + } + assert!( + res.compact_index < self.compact_log_context().last_applying_index, + "{}: {}, {}", + SlogFormat(&self.logger), + res.compact_index, + self.compact_log_context().last_applying_index + ); // TODO: check entry_cache_warmup_state self.entry_storage_mut() .compact_entry_cache(res.compact_index); diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 7bc20068736..b2bea379299 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -192,6 +192,10 @@ impl Peer { self.maybe_schedule_gc_peer_tick(); } } + ctx.store_meta + .lock() + .unwrap() + .set_region(self.region(), true, &self.logger); ctx.coprocessor_host.on_region_changed( self.region(), RegionChangeEvent::Update(RegionChangeReason::ChangePeer), diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs new file mode 100644 index 00000000000..876ba5b1a95 --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -0,0 +1,792 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains merge related processing logic. +//! +//! ## Propose +//! +//! The proposal is initiated by the source region. After `PrepareMerge` is +//! applied, the source peer will send an `AskCommitMerge` message to the target +//! peer. (For simplicity, we send this message regardless of whether the target +//! peer is leader.) The message will also carry some source region logs that +//! may not be committed by some source peers. +//! +//! The source region cannot serve any writes until the merge is committed or +//! rollback-ed. This is guaranteed by `MergeContext::prepare_status`. +//! +//! ## Apply (`Apply::apply_commit_merge`) +//! +//! At first, target region will not apply the `CommitMerge` command. Instead +//! the apply progress will be paused and it redirects the log entries from +//! source region, as a `CatchUpLogs` message, to the local source region peer. +//! When the source region peer has applied all logs up to the prior +//! `PrepareMerge` command, it will signal the target peer. Here we use a +//! temporary channel instead of directly sending message between apply FSMs +//! like in v1. +//! +//! Here is a complete view of the process: +//! +//! ```text +//! | Store 1 | Store 2 | +//! | Source Peer | Target Leader | Source Peer | Target Peer | +//! | +//! apply PrepareMerge +//! \ +//! +--------------+ +//! `AskCommitMerge`\ +//! \ +//! propose CommitMerge ---------------> append CommitMerge +//! apply CommitMerge apply CommitMerge +//! on apply res /| +//! /| +------------+ | +//! +---------------+ | / `CatchUpLogs` | +//! / `AckCommitMerge` | / | +//! / (complete) append logs (pause) +//! destroy self | . +//! apply PrepareMerge . +//! | . +//! +-----------> (continue) +//! | | +//! destroy self (complete) +//! ``` + +use std::{ + any::Any, + cmp, fs, io, + path::{Path, PathBuf}, +}; + +use crossbeam::channel::SendError; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry}; +use futures::channel::oneshot; +use kvproto::{ + metapb::Region, + raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, CommitMergeRequest, RaftCmdRequest}, + raft_serverpb::{MergedRecord, PeerState, RegionLocalState}, +}; +use protobuf::Message; +use raft::{GetEntriesContext, Storage, INVALID_ID, NO_LIMIT}; +use raftstore::{ + coprocessor::RegionChangeReason, + store::{ + fsm::new_admin_request, metrics::PEER_ADMIN_CMD_COUNTER, util, ProposalContext, Transport, + }, + Result, +}; +use slog::{debug, error, info, Logger}; +use tikv_util::{ + config::ReadableDuration, + log::SlogFormat, + slog_panic, + store::{find_peer, region_on_same_stores}, + time::Instant, +}; + +use super::merge_source_path; +use crate::{ + batch::StoreContext, + fsm::ApplyResReporter, + operation::{AdminCmdResult, SharedReadTablet}, + raft::{Apply, Peer}, + router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, +}; + +#[derive(Debug)] +pub struct CommitMergeResult { + pub index: u64, + // Only used to respond `CatchUpLogs` to source peer. + prepare_merge_index: u64, + source_path: PathBuf, + region_state: RegionLocalState, + source: Region, + source_safe_ts: u64, + tablet: Box, +} + +#[derive(Debug)] +pub struct CatchUpLogs { + target_region_id: u64, + merge: CommitMergeRequest, + // safe_ts. + tx: oneshot::Sender, +} + +pub const MERGE_IN_PROGRESS_PREFIX: &str = "merge-in-progress"; + +struct MergeInProgressGuard(PathBuf); + +impl MergeInProgressGuard { + // `index` is the commit index of `CommitMergeRequest` + fn new( + logger: &Logger, + registry: &TabletRegistry, + target_region_id: u64, + index: u64, + tablet_path: &Path, + ) -> io::Result> { + let name = registry.tablet_name(MERGE_IN_PROGRESS_PREFIX, target_region_id, index); + let marker_path = registry.tablet_root().join(name); + if !marker_path.exists() { + if tablet_path.exists() { + return Ok(None); + } else { + fs::create_dir(&marker_path)?; + file_system::sync_dir(marker_path.parent().unwrap())?; + } + } else if tablet_path.exists() { + info!(logger, "remove incomplete merged tablet"; "path" => %tablet_path.display()); + fs::remove_dir_all(tablet_path)?; + } + Ok(Some(Self(marker_path))) + } + + fn defuse(self) -> io::Result<()> { + fs::remove_dir(&self.0)?; + file_system::sync_dir(self.0.parent().unwrap()) + } +} + +fn commit_of_merge(r: &CommitMergeRequest) -> u64 { + r.get_source_state().get_merge_state().get_commit() +} + +// Source peer initiates commit merge on target peer. +impl Peer { + // Called after applying `PrepareMerge`. + pub fn start_commit_merge(&mut self, store_ctx: &mut StoreContext) { + assert!(self.applied_merge_state().is_some()); + // Target already committed `CommitMerge`. + if let Some(c) = &self.merge_context().unwrap().catch_up_logs { + if self.catch_up_logs_ready(c) { + let c = self.merge_context_mut().catch_up_logs.take().unwrap(); + self.finish_catch_up_logs(store_ctx, c); + } + } else { + self.on_check_merge(store_ctx); + } + } + + // Match v1::on_check_merge. + pub fn on_check_merge(&mut self, store_ctx: &mut StoreContext) { + if !self.serving() || self.applied_merge_state().is_none() { + return; + } + self.add_pending_tick(PeerTick::CheckMerge); + self.ask_target_peer_to_commit_merge(store_ctx); + } + + // Match v1::schedule_merge. + fn ask_target_peer_to_commit_merge(&mut self, store_ctx: &mut StoreContext) { + let state = self.applied_merge_state().unwrap(); + let target = state.get_target(); + let target_id = target.get_id(); + + let (min_index, _) = self.calculate_min_progress().unwrap(); + let low = cmp::max(min_index + 1, state.get_min_index()); + // TODO: move this into raft module. + // > over >= to include the PrepareMerge proposal. + let entries = if low > state.get_commit() { + Vec::new() + } else { + // TODO: fetch entries in async way + match self.storage().entries( + low, + state.get_commit() + 1, + NO_LIMIT, + GetEntriesContext::empty(false), + ) { + Ok(ents) => ents, + Err(e) => slog_panic!( + self.logger, + "failed to get merge entires"; + "err" => ?e, + "low" => low, + "commit" => state.get_commit() + ), + } + }; + + let target_peer = find_peer(target, store_ctx.store_id).unwrap(); + let mut request = new_admin_request(target.get_id(), target_peer.clone()); + request + .mut_header() + .set_region_epoch(target.get_region_epoch().clone()); + let mut admin = AdminRequest::default(); + admin.set_cmd_type(AdminCmdType::CommitMerge); + admin.mut_commit_merge().set_entries(entries.into()); + admin + .mut_commit_merge() + .set_source_state(self.storage().region_state().clone()); + request.set_admin_request(admin); + // Please note that, here assumes that the unit of network isolation is store + // rather than peer. So a quorum stores of source region should also be the + // quorum stores of target region. Otherwise we need to enable proposal + // forwarding. + let msg = PeerMsg::AskCommitMerge(request); + // If target peer is destroyed, life.rs is responsible for telling us to + // rollback. + match store_ctx.router.force_send(target_id, msg) { + Ok(_) => (), + Err(SendError(PeerMsg::AskCommitMerge(msg))) => { + if let Err(e) = store_ctx + .router + .force_send_control(StoreMsg::AskCommitMerge(msg)) + { + if store_ctx.router.is_shutdown() { + return; + } + slog_panic!( + self.logger, + "fails to send `AskCommitMerge` msg to store"; + "error" => ?e, + ); + } + } + _ => unreachable!(), + } + } +} + +// Target peer handles the commit merge request. +impl Peer { + pub fn on_ask_commit_merge( + &mut self, + store_ctx: &mut StoreContext, + req: RaftCmdRequest, + ) { + match self.validate_commit_merge(&req) { + Some(true) if self.is_leader() => { + let (ch, _) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); + } + Some(false) => { + let commit_merge = req.get_admin_request().get_commit_merge(); + let source_id = commit_merge.get_source_state().get_region().get_id(); + let _ = store_ctx.router.force_send( + source_id, + PeerMsg::RejectCommitMerge { + index: commit_of_merge(commit_merge), + }, + ); + } + _ => (), + } + } + + fn validate_commit_merge(&self, req: &RaftCmdRequest) -> Option { + let expected_epoch = req.get_header().get_region_epoch(); + let merge = req.get_admin_request().get_commit_merge(); + assert!(merge.has_source_state() && merge.get_source_state().has_merge_state()); + let source_region = merge.get_source_state().get_region(); + let region = self.region(); + if self + .storage() + .region_state() + .get_merged_records() + .iter() + .any(|p| p.get_source_region_id() == source_region.get_id()) + { + info!( + self.logger, + "ignore commit merge because peer is already in merged_records"; + "source" => ?source_region, + ); + None + } else if util::is_epoch_stale(expected_epoch, region.get_region_epoch()) { + info!( + self.logger, + "reject commit merge because of stale"; + "current_epoch" => ?region.get_region_epoch(), + "expected_epoch" => ?expected_epoch, + ); + Some(false) + } else if expected_epoch == region.get_region_epoch() { + assert!( + util::is_sibling_regions(source_region, region), + "{}: {:?}, {:?}", + SlogFormat(&self.logger), + source_region, + region + ); + assert!( + region_on_same_stores(source_region, region), + "{:?}, {:?}", + source_region, + region + ); + // Best effort. Remove when trim check is implemented. + if self.storage().has_dirty_data() { + info!(self.logger, "ignore commit merge because of dirty data"); + None + } else { + Some(true) + } + } else { + info!( + self.logger, + "ignore commit merge because self epoch is stale"; + "source" => ?source_region, + ); + None + } + } + + pub fn propose_commit_merge( + &mut self, + store_ctx: &mut StoreContext, + req: RaftCmdRequest, + ) -> Result { + let mut proposal_ctx = ProposalContext::empty(); + proposal_ctx.insert(ProposalContext::COMMIT_MERGE); + let data = req.write_to_bytes().unwrap(); + self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + } +} + +impl Apply { + // Match v1::exec_commit_merge. + pub async fn apply_commit_merge( + &mut self, + req: &AdminRequest, + index: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + PEER_ADMIN_CMD_COUNTER.commit_merge.all.inc(); + + self.flush(); + + // Note: compared to v1, doesn't validate region state from kvdb any more. + let reg = self.tablet_registry(); + let merge = req.get_commit_merge(); + let merge_commit = commit_of_merge(merge); + let source_state = merge.get_source_state(); + let source_region = source_state.get_region(); + let source_path = merge_source_path(reg, source_region.get_id(), merge_commit); + let mut source_safe_ts = 0; + + let mut start_time = Instant::now_coarse(); + let mut wait_duration = None; + let force_send = (|| { + fail::fail_point!("force_send_catch_up_logs", |_| true); + false + })(); + if !source_path.exists() || force_send { + let (tx, rx) = oneshot::channel(); + self.res_reporter().redirect_catch_up_logs(CatchUpLogs { + target_region_id: self.region_id(), + merge: merge.clone(), + tx, + }); + match rx.await { + Ok(ts) => { + source_safe_ts = ts; + } + Err(_) => { + if tikv_util::thread_group::is_shutdown(!cfg!(test)) { + return futures::future::pending().await; + } else { + slog_panic!( + self.logger, + "source peer is missing when getting checkpoint for merge" + ); + } + } + } + let now = Instant::now_coarse(); + wait_duration = Some(now.saturating_duration_since(start_time)); + start_time = now; + }; + fail::fail_point!("after_acquire_source_checkpoint", |_| Err( + tikv_util::box_err!("fp") + )); + + info!( + self.logger, + "execute CommitMerge"; + "commit" => merge_commit, + "entries" => merge.get_entries().len(), + "index" => index, + "source_region" => ?source_region, + ); + + let ctx = TabletContext::new(source_region, None); + let source_tablet = reg + .tablet_factory() + .open_tablet(ctx, &source_path) + .unwrap_or_else(|e| { + slog_panic!(self.logger, "failed to open source checkpoint"; "err" => ?e); + }); + let open_time = Instant::now_coarse(); + + let mut region = self.region().clone(); + // Use a max value so that pd can ensure overlapped region has a priority. + let version = cmp::max( + source_region.get_region_epoch().get_version(), + region.get_region_epoch().get_version(), + ) + 1; + region.mut_region_epoch().set_version(version); + if keys::enc_end_key(®ion) == keys::enc_start_key(source_region) { + region.set_end_key(source_region.get_end_key().to_vec()); + } else { + region.set_start_key(source_region.get_start_key().to_vec()); + } + + let path = reg.tablet_path(self.region_id(), index); + + // Avoid seqno jump back between self.tablet and the newly created tablet. + // If we are recovering, this flush would just be a noop. + self.tablet().flush_cfs(&[], true).unwrap(); + let flush_time = Instant::now_coarse(); + + let mut ctx = TabletContext::new(®ion, Some(index)); + ctx.flush_state = Some(self.flush_state().clone()); + let guard = MergeInProgressGuard::new(&self.logger, reg, self.region_id(), index, &path) + .unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to create MergeInProgressGuard"; + "path" => %path.display(), + "error" => ?e + ) + }); + let tablet = reg.tablet_factory().open_tablet(ctx, &path).unwrap(); + if let Some(guard) = guard { + tablet + .merge(&[&source_tablet, self.tablet()]) + .unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to merge tablet"; + "path" => %path.display(), + "error" => ?e + ) + }); + guard.defuse().unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to defuse MergeInProgressGuard"; + "path" => %path.display(), + "error" => ?e + ) + }); + } else { + info!(self.logger, "reuse merged tablet"); + } + let merge_time = Instant::now_coarse(); + fail::fail_point!("after_merge_source_checkpoint", |_| Err( + tikv_util::box_err!("fp") + )); + + info!( + self.logger, + "applied CommitMerge"; + "source_region" => ?source_region, + "wait" => ?wait_duration.map(|d| format!("{}", ReadableDuration(d))), + "open" => %ReadableDuration(open_time.saturating_duration_since(start_time)), + "merge" => %ReadableDuration(flush_time.saturating_duration_since(open_time)), + "flush" => %ReadableDuration(merge_time.saturating_duration_since(flush_time)), + ); + + self.set_tablet(tablet.clone()); + + let state = self.region_state_mut(); + state.set_region(region.clone()); + state.set_state(PeerState::Normal); + assert!(!state.has_merge_state()); + state.set_tablet_index(index); + let mut removed_records: Vec<_> = state.take_removed_records().into(); + removed_records.append(&mut source_state.get_removed_records().into()); + state.set_removed_records(removed_records.into()); + let mut merged_records: Vec<_> = state.take_merged_records().into(); + merged_records.append(&mut source_state.get_merged_records().into()); + state.set_merged_records(merged_records.into()); + let mut merged_record = MergedRecord::default(); + merged_record.set_source_region_id(source_region.get_id()); + merged_record.set_source_epoch(source_region.get_region_epoch().clone()); + merged_record.set_source_peers(source_region.get_peers().into()); + merged_record.set_target_region_id(region.get_id()); + merged_record.set_target_epoch(region.get_region_epoch().clone()); + merged_record.set_target_peers(region.get_peers().into()); + merged_record.set_index(index); + state.mut_merged_records().push(merged_record); + + PEER_ADMIN_CMD_COUNTER.commit_merge.success.inc(); + + Ok(( + AdminResponse::default(), + AdminCmdResult::CommitMerge(CommitMergeResult { + index, + prepare_merge_index: merge_commit, + source_path, + region_state: self.region_state().clone(), + source: source_region.to_owned(), + source_safe_ts, + tablet: Box::new(tablet), + }), + )) + } +} + +// Source peer catches up logs (optionally), and destroy itself. +impl Peer { + // Target peer. + #[inline] + pub fn on_redirect_catch_up_logs( + &mut self, + store_ctx: &mut StoreContext, + catch_up_logs: CatchUpLogs, + ) { + let source_id = catch_up_logs.merge.get_source_state().get_region().get_id(); + assert_eq!(catch_up_logs.target_region_id, self.region_id()); + let _ = store_ctx + .router + .force_send(source_id, PeerMsg::CatchUpLogs(catch_up_logs)); + } + + // Match v1::on_catch_up_logs_for_merge. + pub fn on_catch_up_logs( + &mut self, + store_ctx: &mut StoreContext, + mut catch_up_logs: CatchUpLogs, + ) { + let source_id = catch_up_logs.merge.get_source_state().get_region().get_id(); + if source_id != self.region_id() { + slog_panic!( + self.logger, + "get unexpected catch_up_logs"; + "merge" => ?catch_up_logs.merge, + ); + } + + // Context would be empty if this peer hasn't applied PrepareMerge. + if let Some(cul) = self.merge_context().and_then(|c| c.catch_up_logs.as_ref()) { + slog_panic!( + self.logger, + "get conflicting catch_up_logs"; + "new" => ?catch_up_logs.merge, + "current" => ?cul.merge, + ); + } + if !self.catch_up_logs_ready(&catch_up_logs) { + // Directly append these logs to raft log and then commit them. + match self.maybe_append_merge_entries(&catch_up_logs.merge) { + Some(last_index) => { + info!( + self.logger, + "append and commit entries to source region"; + "last_index" => last_index, + ); + self.set_has_ready(); + } + None => { + info!(self.logger, "no need to catch up logs"); + } + } + catch_up_logs.merge.clear_entries(); + self.merge_context_mut().catch_up_logs = Some(catch_up_logs); + } else { + self.finish_catch_up_logs(store_ctx, catch_up_logs); + } + } + + #[inline] + fn catch_up_logs_ready(&self, catch_up_logs: &CatchUpLogs) -> bool { + if let Some(state) = self.applied_merge_state() + && state.get_commit() == commit_of_merge(&catch_up_logs.merge) + { + assert_eq!( + state.get_target().get_id(), + catch_up_logs.target_region_id + ); + true + } else { + false + } + } + + fn maybe_append_merge_entries(&mut self, merge: &CommitMergeRequest) -> Option { + let mut entries = merge.get_entries(); + let merge_commit = commit_of_merge(merge); + if entries.is_empty() { + // Though the entries is empty, it is possible that one source peer has caught + // up the logs but commit index is not updated. If other source peers are + // already destroyed, so the raft group will not make any progress, namely the + // source peer can not get the latest commit index anymore. + // Here update the commit index to let source apply rest uncommitted entries. + return if merge_commit > self.raft_group().raft.raft_log.committed { + self.raft_group_mut().raft.raft_log.commit_to(merge_commit); + Some(merge_commit) + } else { + None + }; + } + let first = entries.first().unwrap(); + // make sure message should be with index not smaller than committed + let mut log_idx = first.get_index() - 1; + debug!( + self.logger, + "append merge entries"; + "log_index" => log_idx, + "merge_commit" => merge_commit, + "commit_index" => self.raft_group().raft.raft_log.committed, + ); + if log_idx < self.raft_group().raft.raft_log.committed { + // There may be some logs not included in CommitMergeRequest's entries, like + // CompactLog, so the commit index may exceed the last index of the entires from + // CommitMergeRequest. If that, no need to append + if self.raft_group().raft.raft_log.committed - log_idx >= entries.len() as u64 { + return None; + } + entries = &entries[(self.raft_group().raft.raft_log.committed - log_idx) as usize..]; + log_idx = self.raft_group().raft.raft_log.committed; + } + let log_term = self.index_term(log_idx); + + let last_log = entries.last().unwrap(); + if last_log.term > self.term() { + // Hack: In normal flow, when leader sends the entries, it will use a term + // that's not less than the last log term. And follower will update its states + // correctly. For merge, we append the log without raft, so we have to take care + // of term explicitly to get correct metadata. + info!( + self.logger, + "become follower for new logs"; + "new_log_term" => last_log.term, + "new_log_index" => last_log.index, + "term" => self.term(), + ); + self.raft_group_mut() + .raft + .become_follower(last_log.term, INVALID_ID); + } + + self.raft_group_mut() + .raft + .raft_log + .maybe_append(log_idx, log_term, merge_commit, entries) + .map(|(_, last_index)| last_index) + } + + #[inline] + fn finish_catch_up_logs(&mut self, store_ctx: &mut StoreContext, c: CatchUpLogs) { + let safe_ts = store_ctx + .store_meta + .lock() + .unwrap() + .region_read_progress + .get(&self.region_id()) + .unwrap() + .safe_ts(); + if c.tx.send(safe_ts).is_err() { + error!( + self.logger, + "failed to respond to merge target, are we shutting down?" + ); + } + self.take_merge_context(); + self.mark_for_destroy(None); + } +} + +impl Peer { + // Match v1::on_ready_commit_merge. + pub fn on_apply_res_commit_merge( + &mut self, + store_ctx: &mut StoreContext, + mut res: CommitMergeResult, + ) { + let region = res.region_state.get_region(); + assert!( + res.source.get_end_key() == region.get_end_key() + || res.source.get_start_key() == region.get_start_key() + ); + let tablet: EK = match res.tablet.downcast() { + Ok(t) => *t, + Err(t) => unreachable!("tablet type should be the same: {:?}", t), + }; + let acquired_source_safe_ts_before = res.source_safe_ts > 0; + + { + let mut meta = store_ctx.store_meta.lock().unwrap(); + if let Some(p) = meta.region_read_progress.get(&res.source.get_id()) { + res.source_safe_ts = p.safe_ts(); + } + meta.set_region(region, true, &self.logger); + let (reader, read_tablet) = meta.readers.get_mut(®ion.get_id()).unwrap(); + self.set_region( + &store_ctx.coprocessor_host, + reader, + region.clone(), + RegionChangeReason::CommitMerge, + res.index, + ); + + // Tablet should be updated in lock to match the epoch. + *read_tablet = SharedReadTablet::new(tablet.clone()); + + // After the region commit merged, the region's key range is extended and the + // region's `safe_ts` should reset to `min(source_safe_ts, target_safe_ts)` + self.read_progress_mut().merge_safe_ts( + res.source_safe_ts, + res.index, + &store_ctx.coprocessor_host, + ); + self.txn_context() + .after_commit_merge(store_ctx, self.term(), region, &self.logger); + } + + // We could only have gotten safe ts by sending `CatchUpLogs` earlier. If we + // haven't, need to acknowledge that we have committed the merge, so that the + // source peer can destroy itself. Note that the timing is deliberately + // delayed after reading `store_ctx.meta` to get the source safe ts + // before its meta gets cleaned up. + if !acquired_source_safe_ts_before { + let _ = store_ctx.router.force_send( + res.source.get_id(), + PeerMsg::AckCommitMerge { + index: res.prepare_merge_index, + target_id: self.region_id(), + }, + ); + } + + if let Some(tablet) = self.set_tablet(tablet) { + self.record_tombstone_tablet(store_ctx, tablet, res.index); + } + self.record_tombstone_tablet_path(store_ctx, res.source_path, res.index); + + // make approximate size and keys updated in time. + // the reason why follower need to update is that there is a issue that after + // merge and then transfer leader, the new leader may have stale size and keys. + self.force_split_check(store_ctx); + self.region_buckets_info_mut().set_bucket_stat(None); + + let region_id = self.region_id(); + self.state_changes_mut() + .put_region_state(region_id, res.index, &res.region_state) + .unwrap(); + self.storage_mut().set_region_state(res.region_state); + self.storage_mut() + .apply_trace_mut() + .on_admin_flush(res.index); + self.set_has_extra_write(); + + if self.is_leader() { + self.region_heartbeat_pd(store_ctx); + info!( + self.logger, + "notify pd with merge"; + "source_region" => ?res.source, + "target_region" => ?self.region(), + ); + self.add_pending_tick(PeerTick::SplitRegionCheck); + } + } + + // Called on source peer. + pub fn on_ack_commit_merge(&mut self, index: u64, target_id: u64) { + // We don't check it against merge state because source peer might just restart + // and haven't replayed `PrepareMerge` yet. + info!(self.logger, "destroy self on AckCommitMerge"; "index" => index, "target_id" => target_id); + self.take_merge_context(); + self.mark_for_destroy(None); + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs index a3895a1b435..0b198eec2a6 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs @@ -1,11 +1,16 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +pub mod commit; pub mod prepare; +pub mod rollback; -use engine_traits::{KvEngine, RaftEngine}; +use std::path::PathBuf; + +use commit::CatchUpLogs; +use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{ raft_cmdpb::RaftCmdRequest, - raft_serverpb::{PeerState, RegionLocalState}, + raft_serverpb::{MergeState, PeerState, RegionLocalState}, }; use prepare::PrepareStatus; use raft::{ProgressState, INVALID_INDEX}; @@ -15,9 +20,24 @@ use tikv_util::box_err; use crate::raft::Peer; +pub const MERGE_SOURCE_PREFIX: &str = "merge-source"; + +// `index` is the commit index of `PrepareMergeRequest`, `commit` field of +// `CommitMergeRequest`. +fn merge_source_path( + registry: &TabletRegistry, + source_region_id: u64, + index: u64, +) -> PathBuf { + let tablet_name = registry.tablet_name(MERGE_SOURCE_PREFIX, source_region_id, index); + registry.tablet_root().join(tablet_name) +} + +/// This context is only used at source region. #[derive(Default)] pub struct MergeContext { prepare_status: Option, + catch_up_logs: Option, } impl MergeContext { @@ -70,7 +90,7 @@ impl Peer { } /// Returns (minimal matched, minimal committed) - pub fn calculate_min_progress(&self) -> Result<(u64, u64)> { + fn calculate_min_progress(&self) -> Result<(u64, u64)> { let (mut min_m, mut min_c) = (None, None); if let Some(progress) = self.raft_group().status().progress { for (id, pr) in progress.iter() { @@ -109,4 +129,15 @@ impl Peer { } Ok((min_m, min_c)) } + + #[inline] + fn applied_merge_state(&self) -> Option<&MergeState> { + self.merge_context().and_then(|ctx| { + if let Some(PrepareStatus::Applied(state)) = ctx.prepare_status.as_ref() { + Some(state) + } else { + None + } + }) + } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index f9df2d9ea1a..f031ac5d20e 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -29,7 +29,7 @@ use std::mem; -use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, CF_LOCK}; +use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, CF_LOCK}; use kvproto::{ raft_cmdpb::{ AdminCmdType, AdminRequest, AdminResponse, CmdType, PrepareMergeRequest, PutRequest, @@ -46,12 +46,13 @@ use raftstore::{ Error, Result, }; use slog::{debug, info}; -use tikv_util::{box_err, log::SlogFormat, store::region_on_same_stores}; +use tikv_util::{box_err, log::SlogFormat, slog_panic, store::region_on_same_stores}; +use super::merge_source_path; use crate::{ batch::StoreContext, fsm::ApplyResReporter, - operation::AdminCmdResult, + operation::{AdminCmdResult, SimpleWriteReqDecoder}, raft::{Apply, Peer}, router::CmdResChannel, }; @@ -97,6 +98,7 @@ impl Peer { store_ctx: &mut StoreContext, mut req: RaftCmdRequest, ) -> Result { + // Best effort. Remove when trim check is implemented. if self.storage().has_dirty_data() { return Err(box_err!( "{} source peer has dirty data, try again later", @@ -245,11 +247,12 @@ impl Peer { if entry.get_data().is_empty() { continue; } - let cmd: RaftCmdRequest = - util::parse_data_at(entry.get_data(), entry.get_index(), "tag"); - if !cmd.has_admin_request() { - continue; - } + let Err(cmd) = SimpleWriteReqDecoder::new( + &self.logger, + entry.get_data(), + entry.get_index(), + entry.get_term(), + ) else { continue }; let cmd_type = cmd.get_admin_request().get_cmd_type(); match cmd_type { AdminCmdType::TransferLeader @@ -458,6 +461,29 @@ impl Apply { PEER_ADMIN_CMD_COUNTER.prepare_merge.success.inc(); + let _ = self.flush(); + let tablet = self.tablet().clone(); + let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to create checkpoint object"; + "error" => ?e + ) + }); + let reg = self.tablet_registry(); + let path = merge_source_path(reg, self.region_id(), log_index); + // We might be replaying this command. + if !path.exists() { + checkpointer.create_at(&path, None, 0).unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to create checkpoint"; + "path" => %path.display(), + "error" => ?e + ) + }); + } + Ok(( AdminResponse::default(), AdminCmdResult::PrepareMerge(PrepareMergeResult { @@ -501,7 +527,6 @@ impl Peer { .enter_prepare_merge(res.state.get_commit()); self.merge_context_mut().prepare_status = Some(PrepareStatus::Applied(res.state)); - // TODO: self. - // update_merge_progress_on_apply_res_prepare_merge(store_ctx); + self.start_commit_merge(store_ctx); } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs new file mode 100644 index 00000000000..ab571298bb0 --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -0,0 +1,12 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use slog::warn; + +use crate::raft::Peer; + +impl Peer { + pub fn on_reject_commit_merge(&mut self, index: u64) { + warn!(self.logger, "target peer rejected commit merge"; "index" => index); + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index fe84413ff28..f59a5e6e0f2 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -11,8 +11,11 @@ use compact_log::CompactLogResult; use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; -use merge::prepare::PrepareMergeResult; -pub use merge::MergeContext; +use merge::{commit::CommitMergeResult, prepare::PrepareMergeResult}; +pub use merge::{ + commit::{CatchUpLogs, MERGE_IN_PROGRESS_PREFIX}, + MergeContext, MERGE_SOURCE_PREFIX, +}; use protobuf::Message; use raftstore::{ store::{cmd_resp, fsm::apply, msg::ErrorCallback}, @@ -39,6 +42,7 @@ pub enum AdminCmdResult { CompactLog(CompactLogResult), UpdateGcPeers(UpdateGcPeersResult), PrepareMerge(PrepareMergeResult), + CommitMerge(CommitMergeResult), } impl Peer { @@ -140,6 +144,7 @@ impl Peer { self.propose(ctx, data) } AdminCmdType::PrepareMerge => self.propose_prepare_merge(ctx, req), + AdminCmdType::CommitMerge => self.propose_commit_merge(ctx, req), _ => unimplemented!(), } }; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 82bae03f062..4560fa93689 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -260,6 +260,12 @@ impl Peer { } } + pub fn force_split_check(&mut self, ctx: &mut StoreContext) { + let control = self.split_flow_control_mut(); + control.size_diff_hint = ctx.cfg.region_split_check_diff().0 as i64; + self.add_pending_tick(PeerTick::SplitRegionCheck); + } + pub fn on_request_split( &mut self, ctx: &mut StoreContext, @@ -873,7 +879,9 @@ mod test { use super::*; use crate::{ - fsm::ApplyResReporter, operation::test_util::create_tmp_importer, raft::Apply, + fsm::ApplyResReporter, + operation::{test_util::create_tmp_importer, CatchUpLogs}, + raft::Apply, router::ApplyRes, }; @@ -892,6 +900,8 @@ mod test { fn report(&self, apply_res: ApplyRes) { let _ = self.sender.send(apply_res); } + + fn redirect_catch_up_logs(&self, _c: CatchUpLogs) {} } fn new_split_req(key: &[u8], id: u64, children: Vec) -> SplitRequest { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index f14c2c905a3..0337c0cf32a 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -63,8 +63,9 @@ mod control; mod write; pub use admin::{ - report_split_init_finish, temp_split_path, AdminCmdResult, CompactLogContext, MergeContext, - RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, + report_split_init_finish, temp_split_path, AdminCmdResult, CatchUpLogs, CompactLogContext, + MergeContext, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, + MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; pub use control::ProposalControl; use pd_client::{BucketMeta, BucketStat}; @@ -359,6 +360,7 @@ impl Peer { AdminCmdResult::CompactLog(res) => self.on_apply_res_compact_log(ctx, res), AdminCmdResult::UpdateGcPeers(state) => self.on_apply_res_update_gc_peers(state), AdminCmdResult::PrepareMerge(res) => self.on_apply_res_prepare_merge(ctx, res), + AdminCmdResult::CommitMerge(res) => self.on_apply_res_commit_merge(ctx, res), } } self.region_buckets_info_mut() @@ -619,7 +621,7 @@ impl Apply { AdminCmdType::Split => self.apply_split(admin_req, log_index)?, AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index)?, AdminCmdType::PrepareMerge => self.apply_prepare_merge(admin_req, log_index)?, - AdminCmdType::CommitMerge => unimplemented!(), + AdminCmdType::CommitMerge => self.apply_commit_merge(admin_req, log_index).await?, AdminCmdType::RollbackMerge => unimplemented!(), AdminCmdType::TransferLeader => { self.apply_transfer_leader(admin_req, entry.term)? diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 9e9cc2f5fc0..525be1991bd 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -194,6 +194,40 @@ impl Store { } } + #[inline] + pub fn on_ask_commit_merge( + &mut self, + ctx: &mut StoreContext, + req: RaftCmdRequest, + ) where + EK: KvEngine, + ER: RaftEngine, + T: Transport, + { + let region_id = req.get_header().get_region_id(); + let mut raft_msg = Box::::default(); + raft_msg.set_region_id(region_id); + raft_msg.set_region_epoch(req.get_header().get_region_epoch().clone()); + raft_msg.set_to_peer(req.get_header().get_peer().clone()); + + // It will create the peer if it does not exist + self.on_raft_message(ctx, raft_msg); + + if let Err(SendError(PeerMsg::AskCommitMerge(req))) = ctx + .router + .force_send(region_id, PeerMsg::AskCommitMerge(req)) + { + let commit_merge = req.get_admin_request().get_commit_merge(); + let source_id = commit_merge.get_source().get_id(); + let _ = ctx.router.force_send( + source_id, + PeerMsg::RejectCommitMerge { + index: commit_merge.get_commit(), + }, + ); + } + } + /// When a message's recipient doesn't exist, it will be redirected to /// store. Store is responsible for checking if it's neccessary to create /// a peer to handle the message. @@ -256,10 +290,12 @@ impl Store { } if msg.has_extra_msg() { let extra_msg = msg.get_extra_msg(); + // Only the direct request has `is_tombstone` set to false. We are certain this + // message needs to be forwarded. if extra_msg.get_type() == ExtraMessageType::MsgGcPeerRequest && extra_msg.has_check_gc_peer() { - forward_destroy_source_peer(ctx, &msg); + forward_destroy_to_source_peer(ctx, &msg); return; } } @@ -356,7 +392,7 @@ fn build_peer_destroyed_report(tombstone_msg: &mut RaftMessage) -> Option(ctx: &mut StoreContext, msg: &RaftMessage) +fn forward_destroy_to_source_peer(ctx: &mut StoreContext, msg: &RaftMessage) where EK: KvEngine, ER: RaftEngine, @@ -373,6 +409,8 @@ where tombstone_msg.set_region_epoch(check_gc_peer.get_check_region_epoch().clone()); tombstone_msg.set_is_tombstone(true); // No need to set epoch as we don't know what it is. + // This message will not be handled by `on_gc_peer_request` due to + // `is_tombstone` being true. tombstone_msg .mut_extra_msg() .set_type(ExtraMessageType::MsgGcPeerRequest); @@ -455,7 +493,7 @@ impl Peer { return; } - forward_destroy_source_peer(ctx, msg); + forward_destroy_to_source_peer(ctx, msg); } /// A peer confirms it's destroyed. diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 5514d966cea..3511a432c15 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -9,9 +9,10 @@ mod ready; mod txn_ext; pub use command::{ - AdminCmdResult, ApplyFlowControl, CommittedEntries, CompactLogContext, MergeContext, - ProposalControl, RequestHalfSplit, RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, - SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, SPLIT_PREFIX, + AdminCmdResult, ApplyFlowControl, CatchUpLogs, CommittedEntries, CompactLogContext, + MergeContext, ProposalControl, RequestHalfSplit, RequestSplit, SimpleWriteBinary, + SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, + MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; pub use life::{DestroyProgress, GcPeerContext}; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3591a17d989..d93502a734d 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -213,6 +213,13 @@ impl Peer { self.on_gc_peer_request(ctx, &msg); return; } + ExtraMessageType::MsgWantRollbackMerge => { + if self.is_leader() { + // TODO: + // self.merge_context_mut().maybe_add_rollback_peer(); + return; + } + } _ => (), } } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 5eae3078a0a..3db8590d7ed 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -250,6 +250,10 @@ impl Peer { self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(snapshot_index); + if self.proposal_control().is_merging() { + // After applying a snapshot, merge is rollbacked implicitly. + // TODO: self.rollback_merge(ctx); + } let read_tablet = SharedReadTablet::new(tablet.clone()); { let mut meta = ctx.store_meta.lock().unwrap(); diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index e30bc25eec4..272b2526b39 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -69,6 +69,20 @@ impl TxnContext { pessimistic_locks.version = region.get_region_epoch().get_version(); } + #[inline] + pub fn after_commit_merge( + &self, + ctx: &StoreContext, + term: u64, + region: &Region, + logger: &Logger, + ) { + // If a follower merges into a leader, a more recent read may happen + // on the leader of the follower. So max ts should be updated after + // a region merge. + self.require_updating_max_ts(ctx, term, region, logger); + } + #[inline] pub fn on_became_follower(&self, term: u64, region: &Region) { let mut pessimistic_locks = self.ext.pessimistic_locks.write(); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index e510c85cbf9..8ee311401a9 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -26,6 +26,7 @@ use raftstore::{ }, }; use slog::Logger; +use tikv_util::slog_panic; use super::storage::Storage; use crate::{ @@ -828,4 +829,12 @@ impl Peer { pub fn last_sent_snapshot_index(&self) -> u64 { self.last_sent_snapshot_index } + + #[inline] + pub fn index_term(&self, idx: u64) -> u64 { + match self.raft_group.raft.raft_log.term(idx) { + Ok(t) => t, + Err(e) => slog_panic!(self.logger, "failed to load term"; "index" => idx, "err" => ?e), + } + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index d386ed0acae..cff915fd248 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -340,7 +340,7 @@ mod tests { use super::*; use crate::{ fsm::ApplyResReporter, - operation::{test_util::create_tmp_importer, write_initial_states}, + operation::{test_util::create_tmp_importer, write_initial_states, CatchUpLogs}, raft::Apply, router::ApplyRes, }; @@ -369,6 +369,7 @@ mod tests { impl ApplyResReporter for TestRouter { fn report(&self, _res: ApplyRes) {} + fn redirect_catch_up_logs(&self, _c: CatchUpLogs) {} } fn new_region() -> Region { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 88ac0ba7948..26fbde3644a 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -19,7 +19,7 @@ use super::{ }, ApplyRes, }; -use crate::operation::{RequestHalfSplit, RequestSplit, SimpleWriteBinary, SplitInit}; +use crate::operation::{CatchUpLogs, RequestHalfSplit, RequestSplit, SimpleWriteBinary, SplitInit}; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] @@ -208,6 +208,18 @@ pub enum PeerMsg { tablet_index: u64, }, CleanupImportSst(Box<[SstMeta]>), + AskCommitMerge(RaftCmdRequest), + AckCommitMerge { + index: u64, + target_id: u64, + }, + RejectCommitMerge { + index: u64, + }, + // From target [`Apply`] to target [`Peer`]. + RedirectCatchUpLogs(CatchUpLogs), + // From target [`Peer`] to source [`Peer`]. + CatchUpLogs(CatchUpLogs), /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), @@ -278,6 +290,7 @@ pub enum StoreMsg { StoreUnreachable { to_store_id: u64, }, + AskCommitMerge(RaftCmdRequest), /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush { diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index d6846f61e4b..703f38c3516 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -15,7 +15,8 @@ pub use self::{ internal_message::ApplyRes, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ - CmdResChannel, CmdResChannelBuilder, CmdResEvent, CmdResStream, CmdResSubscriber, - DebugInfoChannel, DebugInfoSubscriber, QueryResChannel, QueryResult, ReadResponse, + BaseSubscriber, CmdResChannel, CmdResChannelBuilder, CmdResEvent, CmdResStream, + CmdResSubscriber, DebugInfoChannel, DebugInfoSubscriber, QueryResChannel, QueryResult, + ReadResponse, }, }; diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index e2f6884dd54..f73b9398df6 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -11,5 +11,6 @@ mod cluster; mod test_basic_write; mod test_bootstrap; mod test_life; +mod test_merge; mod test_split; mod test_trace_apply; diff --git a/components/raftstore-v2/tests/failpoints/test_merge.rs b/components/raftstore-v2/tests/failpoints/test_merge.rs new file mode 100644 index 00000000000..3979d61743a --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_merge.rs @@ -0,0 +1,109 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use engine_traits::Peekable; +use tikv_util::store::new_peer; + +use crate::cluster::{ + life_helper::assert_peer_not_exist, merge_helper::merge_region, split_helper::split_region, + Cluster, +}; + +#[test] +fn test_source_and_target_both_replay() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let router = &mut cluster.routers[0]; + + let region_1 = router.region_detail(2); + let peer_1 = region_1.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let peer_2 = new_peer(store_id, peer_1.get_id() + 1); + let region_1_id = region_1.get_id(); + let region_2_id = region_1_id + 1; + let (region_1, region_2) = split_region( + router, + region_1, + peer_1.clone(), + region_2_id, + peer_2, + Some(format!("k{}k", region_1_id).as_bytes()), + Some(format!("k{}k", region_2_id).as_bytes()), + format!("k{}", region_2_id).as_bytes(), + format!("k{}", region_2_id).as_bytes(), + false, + ); + + { + let _fp = fail::FailGuard::new("after_acquire_source_checkpoint", "1*return->off"); + merge_region(router, region_1, peer_1, region_2, false); + } + + cluster.restart(0); + let router = &mut cluster.routers[0]; + // Wait for replay. + let mut retry = 0; + while retry < 50 { + // Read region 1 data from region 2. + let snapshot = router.stale_snapshot(region_2_id); + let key = format!("k{region_1_id}k"); + if let Ok(Some(_)) = snapshot.get_value(key.as_bytes()) { + return; + } + retry += 1; + std::thread::sleep(Duration::from_millis(100)); + } + panic!("merge not replayed after 5s"); +} + +#[test] +fn test_source_destroy_before_target_apply() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let router = &mut cluster.routers[0]; + + let region_1 = router.region_detail(2); + let peer_1 = region_1.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let peer_2 = new_peer(store_id, peer_1.get_id() + 1); + let region_1_id = region_1.get_id(); + let region_2_id = region_1_id + 1; + let (region_1, region_2) = split_region( + router, + region_1, + peer_1.clone(), + region_2_id, + peer_2, + Some(format!("k{}k", region_1_id).as_bytes()), + Some(format!("k{}k", region_2_id).as_bytes()), + format!("k{}", region_2_id).as_bytes(), + format!("k{}", region_2_id).as_bytes(), + false, + ); + + { + // Sending CatchUpLogs will make source destroy early (without waiting for + // AckCommitMerge). + let _fp1 = fail::FailGuard::new("force_send_catch_up_logs", "1*return->off"); + let _fp2 = fail::FailGuard::new("after_acquire_source_checkpoint", "1*return->off"); + merge_region(router, region_1, peer_1.clone(), region_2, false); + } + assert_peer_not_exist(region_1_id, peer_1.get_id(), router); + + cluster.restart(0); + let router = &mut cluster.routers[0]; + // Wait for replay. + let mut retry = 0; + while retry < 50 { + // Read region 1 data from region 2. + let snapshot = router.stale_snapshot(region_2_id); + let key = format!("k{region_1_id}k"); + if let Ok(Some(_)) = snapshot.get_value(key.as_bytes()) { + return; + } + retry += 1; + std::thread::sleep(Duration::from_millis(100)); + } + panic!("merge not replayed after 5s"); +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 451f7131cc9..1685b5154e7 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -49,7 +49,7 @@ use sst_importer::SstImporter; use tempfile::TempDir; use test_pd::mocker::Service; use tikv_util::{ - config::{ReadableDuration, VersionTrack}, + config::{ReadableDuration, ReadableSize, VersionTrack}, store::new_peer, worker::{LazyWorker, Worker}, }; @@ -67,6 +67,7 @@ pub fn check_skip_wal(path: &str) { assert!(found, "no WAL found in {}", path); } +#[derive(Clone)] pub struct TestRouter(RaftRouter); impl Deref for TestRouter { @@ -464,6 +465,9 @@ impl Transport for TestTransport { pub fn v2_default_config() -> Config { let mut config = Config::default(); config.store_io_pool_size = 1; + if config.region_split_check_diff.is_none() { + config.region_split_check_diff = Some(ReadableSize::mb(96 / 16)); + } config } @@ -758,6 +762,63 @@ pub mod split_helper { } } +pub mod merge_helper { + use std::{thread, time::Duration}; + + use futures::executor::block_on; + use kvproto::{ + metapb, + raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest}, + }; + use raftstore_v2::router::PeerMsg; + + use super::TestRouter; + + pub fn merge_region( + router: &mut TestRouter, + source: metapb::Region, + source_peer: metapb::Peer, + target: metapb::Region, + check: bool, + ) -> metapb::Region { + let region_id = source.id; + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header() + .set_region_epoch(source.get_region_epoch().clone()); + req.mut_header().set_peer(source_peer); + + let mut admin_req = AdminRequest::default(); + admin_req.set_cmd_type(AdminCmdType::PrepareMerge); + admin_req.mut_prepare_merge().set_target(target.clone()); + req.set_admin_request(admin_req); + + let (msg, sub) = PeerMsg::admin_command(req); + router.send(region_id, msg).unwrap(); + let resp = block_on(sub.result()).unwrap(); + if check { + assert!(!resp.get_header().has_error(), "{:?}", resp); + } + + // TODO: when persistent implementation is ready, we can use tablet index of + // the parent to check whether the split is done. Now, just sleep a second. + thread::sleep(Duration::from_secs(1)); + + let new_target = router.region_detail(target.id); + if check { + if new_target.get_start_key() == source.get_start_key() { + // [source, target] => new_target + assert_eq!(new_target.get_end_key(), target.get_end_key()); + } else { + // [target, source] => new_target + assert_eq!(new_target.get_start_key(), target.get_start_key()); + assert_eq!(new_target.get_end_key(), source.get_end_key()); + } + } + new_target + } +} + pub mod life_helper { use std::assert_matches::assert_matches; diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index fbf54eaa243..12fe47ec48a 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -11,6 +11,7 @@ mod cluster; mod test_basic_write; mod test_conf_change; mod test_life; +mod test_merge; mod test_pd_heartbeat; mod test_read; mod test_split; diff --git a/components/raftstore-v2/tests/integrations/test_merge.rs b/components/raftstore-v2/tests/integrations/test_merge.rs new file mode 100644 index 00000000000..c08c2bde484 --- /dev/null +++ b/components/raftstore-v2/tests/integrations/test_merge.rs @@ -0,0 +1,113 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use engine_traits::{Peekable, RaftEngineReadOnly}; +use kvproto::metapb::{Peer, Region}; +use raftstore::store::RAFT_INIT_LOG_INDEX; +use tikv_util::store::new_peer; + +use crate::cluster::{merge_helper::merge_region, split_helper::split_region, Cluster, TestRouter}; + +#[test] +fn test_merge() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let raft_engine = cluster.node(0).running_state().unwrap().raft_engine.clone(); + let router = &mut cluster.routers[0]; + + let do_split = + |r: &mut TestRouter, region: Region, peer: &Peer, v: u64| -> (Region, Region, Peer) { + let rid = region.get_id(); + let old_region_state = raft_engine + .get_region_state(rid, u64::MAX) + .unwrap() + .unwrap(); + let new_peer = new_peer(store_id, peer.get_id() + 1); + let (lhs, rhs) = split_region( + r, + region, + peer.clone(), + rid + 1, + new_peer.clone(), + Some(format!("k{}{}", rid, v).as_bytes()), + Some(format!("k{}{}", rid + 1, v).as_bytes()), + format!("k{}", rid + 1).as_bytes(), + format!("k{}", rid + 1).as_bytes(), + false, + ); + let region_state = raft_engine + .get_region_state(rid, u64::MAX) + .unwrap() + .unwrap(); + assert!(region_state.get_tablet_index() > old_region_state.get_tablet_index()); + assert_eq!( + region_state.get_region().get_region_epoch().get_version(), + old_region_state + .get_region() + .get_region_epoch() + .get_version() + + 1, + ); + let region_state = raft_engine + .get_region_state(rid + 1, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); + (lhs, rhs, new_peer) + }; + + let region_1 = router.region_detail(2); + let peer_1 = region_1.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + // Split into 6. + let (region_1, region_2, peer_2) = do_split(router, region_1, &peer_1, 1); + let (region_2, region_3, peer_3) = do_split(router, region_2, &peer_2, 2); + let (region_3, region_4, peer_4) = do_split(router, region_3, &peer_3, 3); + let (region_4, region_5, peer_5) = do_split(router, region_4, &peer_4, 4); + let (region_5, region_6, peer_6) = do_split(router, region_5, &peer_5, 5); + drop(raft_engine); + // The last region version is smaller. + for (i, v) in [1, 2, 3, 4, 5, 5].iter().enumerate() { + let rid = region_1.get_id() + i as u64; + let snapshot = router.stale_snapshot(rid); + let key = format!("k{rid}{v}"); + assert!( + snapshot.get_value(key.as_bytes()).unwrap().is_some(), + "{} {:?}", + rid, + key + ); + } + + let region_2 = merge_region(router, region_1.clone(), peer_1, region_2, true); + { + let snapshot = router.stale_snapshot(region_2.get_id()); + let key = format!("k{}1", region_1.get_id()); + assert!(snapshot.get_value(key.as_bytes()).unwrap().is_some()); + } + let region_5 = merge_region(router, region_6.clone(), peer_6, region_5, true); + { + let snapshot = router.stale_snapshot(region_5.get_id()); + let key = format!("k{}5", region_6.get_id()); + assert!(snapshot.get_value(key.as_bytes()).unwrap().is_some()); + } + let region_3 = merge_region(router, region_2, peer_2, region_3, true); + let region_4 = merge_region(router, region_3, peer_3, region_4, true); + let region_5 = merge_region(router, region_4, peer_4, region_5, true); + + cluster.restart(0); + let router = &mut cluster.routers[0]; + let snapshot = router.stale_snapshot(region_5.get_id()); + for (i, v) in [1, 2, 3, 4, 5, 5].iter().enumerate() { + let rid = region_1.get_id() + i as u64; + let key = format!("k{rid}{v}"); + assert!( + snapshot.get_value(key.as_bytes()).unwrap().is_some(), + "{} {:?}", + rid, + key + ); + } +} diff --git a/components/raftstore-v2/tests/integrations/test_split.rs b/components/raftstore-v2/tests/integrations/test_split.rs index 7cea980beac..9dab98be598 100644 --- a/components/raftstore-v2/tests/integrations/test_split.rs +++ b/components/raftstore-v2/tests/integrations/test_split.rs @@ -15,17 +15,19 @@ fn test_split() { let store_id = cluster.node(0).id(); let raft_engine = cluster.node(0).running_state().unwrap().raft_engine.clone(); let router = &mut cluster.routers[0]; - // let factory = cluster.node(0).tablet_factory(); - let region_id = 2; - let peer = new_peer(store_id, 3); - let region = router.region_detail(region_id); - router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let region_2 = 2; + let region = router.region_detail(region_2); + let peer = region.get_peers()[0].clone(); + router.wait_applied_to_current_term(region_2, Duration::from_secs(3)); - // Region 2 ["", ""] peer(1, 3) - // -> Region 2 ["", "k22"] peer(1, 3) + // Region 2 ["", ""] + // -> Region 2 ["", "k22"] // Region 1000 ["k22", ""] peer(1, 10) - let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); + let region_state = raft_engine + .get_region_state(region_2, u64::MAX) + .unwrap() + .unwrap(); assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); let (left, mut right) = split_region( router, @@ -39,26 +41,32 @@ fn test_split() { b"k22", false, ); - let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); + let region_state = raft_engine + .get_region_state(region_2, u64::MAX) + .unwrap() + .unwrap(); assert_ne!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); assert_eq!( region_state.get_region().get_region_epoch().get_version(), INIT_EPOCH_VER + 1 ); let region_state0 = raft_engine - .get_region_state(2, region_state.get_tablet_index()) + .get_region_state(region_2, region_state.get_tablet_index()) .unwrap() .unwrap(); assert_eq!(region_state, region_state0); - let flushed_index = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + let flushed_index = raft_engine + .get_flushed_index(region_2, CF_RAFT) + .unwrap() + .unwrap(); assert!( flushed_index >= region_state.get_tablet_index(), "{flushed_index} >= {}", region_state.get_tablet_index() ); - // Region 2 ["", "k22"] peer(1, 3) - // -> Region 2 ["", "k11"] peer(1, 3) + // Region 2 ["", "k22"] + // -> Region 2 ["", "k11"] // Region 1001 ["k11", "k22"] peer(1, 11) let _ = split_region( router, @@ -72,7 +80,10 @@ fn test_split() { b"k11", false, ); - let region_state = raft_engine.get_region_state(2, u64::MAX).unwrap().unwrap(); + let region_state = raft_engine + .get_region_state(region_2, u64::MAX) + .unwrap() + .unwrap(); assert_ne!( region_state.get_tablet_index(), region_state0.get_tablet_index() @@ -82,11 +93,14 @@ fn test_split() { INIT_EPOCH_VER + 2 ); let region_state1 = raft_engine - .get_region_state(2, region_state.get_tablet_index()) + .get_region_state(region_2, region_state.get_tablet_index()) .unwrap() .unwrap(); assert_eq!(region_state, region_state1); - let flushed_index = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + let flushed_index = raft_engine + .get_flushed_index(region_2, CF_RAFT) + .unwrap() + .unwrap(); assert!( flushed_index >= region_state.get_tablet_index(), "{flushed_index} >= {}", @@ -96,8 +110,9 @@ fn test_split() { // Region 1000 ["k22", ""] peer(1, 10) // -> Region 1000 ["k22", "k33"] peer(1, 10) // Region 1002 ["k33", ""] peer(1, 12) + let region_1000 = 1000; let region_state = raft_engine - .get_region_state(1000, u64::MAX) + .get_region_state(region_1000, u64::MAX) .unwrap() .unwrap(); assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); @@ -115,7 +130,7 @@ fn test_split() { ) .1; let region_state = raft_engine - .get_region_state(1000, u64::MAX) + .get_region_state(region_1000, u64::MAX) .unwrap() .unwrap(); assert_ne!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); @@ -124,17 +139,21 @@ fn test_split() { INIT_EPOCH_VER + 2 ); let region_state2 = raft_engine - .get_region_state(1000, region_state.get_tablet_index()) + .get_region_state(region_1000, region_state.get_tablet_index()) .unwrap() .unwrap(); assert_eq!(region_state, region_state2); - let flushed_index = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + let flushed_index = raft_engine + .get_flushed_index(region_1000, CF_RAFT) + .unwrap() + .unwrap(); assert!( flushed_index >= region_state.get_tablet_index(), "{flushed_index} >= {}", region_state.get_tablet_index() ); + // 1002 -> 1002, 1003 let split_key = Key::from_raw(b"k44").append_ts(TimeStamp::zero()); let actual_split_key = split_key.clone().truncate_ts().unwrap(); split_region( diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 9ccfdb93cfe..5c6297c124d 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -40,7 +40,7 @@ futures = "0.3" grpcio = { workspace = true } grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } keys = { workspace = true } -kvproto = { git = "https://github.com/pingcap/kvproto.git" } +kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } pd_client = { workspace = true } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 44ce6a69358..6a953ed9ca2 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -28,7 +28,8 @@ use kvproto::{ Response, StatusCmdType, }, raft_serverpb::{ - PeerState, RaftApplyState, RaftLocalState, RaftMessage, RegionLocalState, StoreIdent, + PeerState, RaftApplyState, RaftLocalState, RaftMessage, RaftTruncatedState, + RegionLocalState, StoreIdent, }, }; use pd_client::PdClient; @@ -48,8 +49,8 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{ is_error_response, new_admin_request, new_delete_cmd, new_delete_range_cmd, new_get_cf_cmd, - new_peer, new_put_cf_cmd, new_region_detail_cmd, new_region_leader_cmd, new_request, - new_snap_cmd, new_status_request, new_store, new_tikv_config_with_api_ver, + new_peer, new_prepare_merge, new_put_cf_cmd, new_region_detail_cmd, new_region_leader_cmd, + new_request, new_snap_cmd, new_status_request, new_store, new_tikv_config_with_api_ver, new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, PartitionFilterFactory, RawEngine, }; @@ -276,10 +277,40 @@ pub trait Simulator { // todo: unwrap? res = sub.result().fuse() => Ok(res.unwrap()), _ = timeout_f.compat().fuse() => Err(Error::Timeout(format!("request timeout for {:?}", timeout))), - } }) } + + fn async_command_on_node(&self, node_id: u64, mut request: RaftCmdRequest) { + let region_id = request.get_header().get_region_id(); + + let (msg, _sub) = if request.has_admin_request() { + PeerMsg::admin_command(request) + } else { + let requests = request.get_requests(); + let mut write_encoder = SimpleWriteEncoder::with_capacity(64); + for req in requests { + match req.get_cmd_type() { + CmdType::Put => { + let put = req.get_put(); + write_encoder.put(put.get_cf(), put.get_key(), put.get_value()); + } + CmdType::Delete => { + let delete = req.get_delete(); + write_encoder.delete(delete.get_cf(), delete.get_key()); + } + CmdType::DeleteRange => { + unimplemented!() + } + _ => unreachable!(), + } + } + PeerMsg::simple_write(Box::new(request.take_header()), write_encoder.encode()) + }; + + self.async_peer_msg_on_node(node_id, region_id, msg) + .unwrap(); + } } pub struct Cluster { @@ -1033,6 +1064,27 @@ impl Cluster { status_resp.take_region_detail() } + pub fn truncated_state(&self, region_id: u64, store_id: u64) -> RaftTruncatedState { + self.apply_state(region_id, store_id).take_truncated_state() + } + + pub fn wait_log_truncated(&self, region_id: u64, store_id: u64, index: u64) { + let timer = Instant::now(); + loop { + let truncated_state = self.truncated_state(region_id, store_id); + if truncated_state.get_index() >= index { + return; + } + if timer.saturating_elapsed() >= Duration::from_secs(5) { + panic!( + "[region {}] log is still not truncated to {}: {:?} on store {}", + region_id, index, truncated_state, store_id, + ); + } + thread::sleep(Duration::from_millis(10)); + } + } + pub fn get(&mut self, key: &[u8]) -> Option> { self.get_impl(CF_DEFAULT, key, false) } @@ -1376,6 +1428,73 @@ impl Cluster { } } + fn new_prepare_merge(&self, source: u64, target: u64) -> RaftCmdRequest { + let region = block_on(self.pd_client.get_region_by_id(target)) + .unwrap() + .unwrap(); + let prepare_merge = new_prepare_merge(region); + let source_region = block_on(self.pd_client.get_region_by_id(source)) + .unwrap() + .unwrap(); + new_admin_request( + source_region.get_id(), + source_region.get_region_epoch(), + prepare_merge, + ) + } + + pub fn merge_region(&mut self, source: u64, target: u64, _cb: Callback) { + // FIXME: callback is ignored. + let mut req = self.new_prepare_merge(source, target); + let leader = self.leader_of_region(source).unwrap(); + req.mut_header().set_peer(leader.clone()); + self.sim + .rl() + .async_command_on_node(leader.get_store_id(), req); + } + + pub fn try_merge(&mut self, source: u64, target: u64) -> RaftCmdResponse { + self.call_command_on_leader( + self.new_prepare_merge(source, target), + Duration::from_secs(5), + ) + .unwrap() + } + + pub fn must_try_merge(&mut self, source: u64, target: u64) { + let resp = self.try_merge(source, target); + if is_error_response(&resp) { + panic!( + "{} failed to try merge to {}, resp {:?}", + source, target, resp + ); + } + } + + /// Make sure region not exists on that store. + pub fn must_region_not_exist(&mut self, region_id: u64, store_id: u64) { + let mut try_cnt = 0; + loop { + let status_cmd = new_region_detail_cmd(); + let peer = new_peer(store_id, 0); + let req = new_status_request(region_id, peer, status_cmd); + let resp = self.call_command(req, Duration::from_secs(5)).unwrap(); + if resp.get_header().has_error() && resp.get_header().get_error().has_region_not_found() + { + return; + } + + if try_cnt > 250 { + panic!( + "region {} still exists on store {} after {} tries: {:?}", + region_id, store_id, try_cnt, resp + ); + } + try_cnt += 1; + sleep_ms(20); + } + } + pub fn get_snap_dir(&self, node_id: u64) -> String { self.sim.rl().get_snap_dir(node_id) } @@ -1454,6 +1573,10 @@ impl Cluster { self.get_engine(store_id).get_raft_local_state(region_id) } + pub fn raft_local_state(&self, region_id: u64, store_id: u64) -> RaftLocalState { + self.get_raft_local_state(region_id, store_id).unwrap() + } + pub fn shutdown(&mut self) { debug!("about to shutdown cluster"); let keys = match self.sim.read() { @@ -1567,10 +1690,10 @@ impl Peekable for WrapFactory { ) -> engine_traits::Result> { let region_id = self.region_id_of_key(key); - if let Ok(Some(state)) = self.get_region_state(region_id) { - if state.state == PeerState::Tombstone { - return Ok(None); - } + if let Ok(Some(state)) = self.get_region_state(region_id) + && state.state == PeerState::Tombstone + { + return Ok(None); } match self.get_tablet(key) { @@ -1587,10 +1710,10 @@ impl Peekable for WrapFactory { ) -> engine_traits::Result> { let region_id = self.region_id_of_key(key); - if let Ok(Some(state)) = self.get_region_state(region_id) { - if state.state == PeerState::Tombstone { - return Ok(None); - } + if let Ok(Some(state)) = self.get_region_state(region_id) + && state.state == PeerState::Tombstone + { + return Ok(None); } match self.get_tablet(key) { diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index ea7e9f6f6e9..685affe45d0 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -2,6 +2,7 @@ #![allow(incomplete_features)] #![feature(type_alias_impl_trait)] #![feature(return_position_impl_trait_in_trait)] +#![feature(let_chains)] mod cluster; mod node; diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index ec8e3fe2635..cc09dd09c4c 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -256,7 +256,7 @@ pub struct ServerCluster { snap_paths: HashMap, snap_mgrs: HashMap, pd_client: Arc, - // raft_client: RaftClient, + raft_client: RaftClient, concurrency_managers: HashMap, env: Arc, pub pending_services: HashMap, @@ -288,7 +288,7 @@ impl ServerCluster { worker.scheduler(), Arc::new(ThreadLoadPool::with_threshold(usize::MAX)), ); - let _raft_client = RaftClient::new(conn_builder); + let raft_client = RaftClient::new(conn_builder); ServerCluster { metas: HashMap::default(), addrs: map, @@ -300,7 +300,7 @@ impl ServerCluster { snap_paths: HashMap::default(), pending_services: HashMap::default(), health_services: HashMap::default(), - // raft_client, + raft_client, concurrency_managers: HashMap::default(), env, txn_extra_schedulers: HashMap::default(), @@ -786,6 +786,12 @@ impl Simulator for ServerCluster { router.send_peer_msg(region_id, msg) } + fn send_raft_msg(&mut self, msg: RaftMessage) -> raftstore::Result<()> { + self.raft_client.send(msg).unwrap(); + self.raft_client.flush(); + Ok(()) + } + fn get_router(&self, node_id: u64) -> Option> { self.metas.get(&node_id).map(|m| m.raw_router.clone()) } @@ -797,10 +803,6 @@ impl Simulator for ServerCluster { .unwrap() .to_owned() } - - fn send_raft_msg(&mut self, _msg: RaftMessage) -> raftstore::Result<()> { - unimplemented!() - } } impl Cluster { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index b7a9ea6f1af..5c9d9ac5d54 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -662,9 +662,9 @@ pub fn configure_for_merge(config: &mut Config) { config.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); } -pub fn ignore_merge_target_integrity(cluster: &mut Cluster) { - cluster.cfg.raft_store.dev_assert = false; - cluster.pd_client.ignore_merge_target_integrity(); +pub fn ignore_merge_target_integrity(config: &mut Config, pd_client: &TestPdClient) { + config.raft_store.dev_assert = false; + pd_client.ignore_merge_target_integrity(); } pub fn configure_for_lease_read( diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index c22136d04de..1a733be5d8c 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -425,7 +425,7 @@ fn test_node_merge_multiple_snapshots_not_together() { fn test_node_merge_multiple_snapshots(together: bool) { let mut cluster = new_node_cluster(0, 3); configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); // make it gc quickly to trigger snapshot easily diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index de1187f35b1..151e278d0d1 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -3,7 +3,7 @@ use std::{iter::*, sync::*, thread, time::*}; use api_version::{test_kv_format_impl, KvFormat}; -use engine_traits::{Peekable, CF_LOCK, CF_RAFT, CF_WRITE}; +use engine_traits::{CF_LOCK, CF_WRITE}; use kvproto::{ raft_cmdpb::CmdType, raft_serverpb::{PeerState, RaftMessage, RegionLocalState}, @@ -12,14 +12,16 @@ use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; +use test_raftstore_macro::test_case; +use test_raftstore_v2::Simulator as _; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; use txn_types::{Key, PessimisticLock}; /// Test if merge is working as expected in a general condition. -#[test] +#[test_case(test_raftstore::new_node_cluster)] fn test_node_base_merge() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.rocksdb.titan.enabled = true; configure_for_merge(&mut cluster.cfg); @@ -84,14 +86,9 @@ fn test_node_base_merge() { let version = left.get_region_epoch().get_version(); let conf_ver = left.get_region_epoch().get_conf_ver(); 'outer: for i in 1..4 { - let state_key = keys::region_state_key(left.get_id()); let mut state = RegionLocalState::default(); for _ in 0..3 { - state = cluster - .get_engine(i) - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + state = cluster.region_local_state(left.get_id(), i); if state.get_state() == PeerState::Tombstone { let epoch = state.get_region().get_region_epoch(); assert_eq!(epoch.get_version(), version + 1); @@ -106,9 +103,95 @@ fn test_node_base_merge() { cluster.must_put(b"k4", b"v4"); } -#[test] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_base_merge_v2() { + let mut cluster = new_cluster(0, 3); + // TODO: v2 doesn't support titan yet. + // cluster.cfg.rocksdb.titan.enabled = true; + configure_for_merge(&mut cluster.cfg); + + cluster.run(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + for i in 0..3 { + must_get_equal(&cluster.get_engine(i + 1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); + } + + let pd_client = Arc::clone(&cluster.pd_client); + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + assert_eq!(region.get_id(), right.get_id()); + assert_eq!(left.get_end_key(), right.get_start_key()); + assert_eq!(right.get_start_key(), b"k2"); + let get = new_request( + right.get_id(), + right.get_region_epoch().clone(), + vec![new_get_cmd(b"k1")], + false, + ); + debug!("requesting {:?}", get); + let resp = cluster + .call_command_on_leader(get, Duration::from_secs(5)) + .unwrap(); + assert!(resp.get_header().has_error(), "{:?}", resp); + assert!( + resp.get_header().get_error().has_key_not_in_region(), + "{:?}", + resp + ); + + pd_client.must_merge(left.get_id(), right.get_id()); + + let region = pd_client.get_region(b"k1").unwrap(); + assert_eq!(region.get_id(), right.get_id()); + assert_eq!(region.get_start_key(), left.get_start_key()); + assert_eq!(region.get_end_key(), right.get_end_key()); + let origin_epoch = left.get_region_epoch(); + let new_epoch = region.get_region_epoch(); + // PrepareMerge + CommitMerge, so it should be 2. + assert_eq!(new_epoch.get_version(), origin_epoch.get_version() + 2); + assert_eq!(new_epoch.get_conf_ver(), origin_epoch.get_conf_ver()); + let get = new_request( + region.get_id(), + new_epoch.to_owned(), + vec![new_get_cmd(b"k1")], + false, + ); + debug!("requesting {:?}", get); + let resp = cluster + .call_command_on_leader(get, Duration::from_secs(5)) + .unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); + + let version = left.get_region_epoch().get_version(); + let conf_ver = left.get_region_epoch().get_conf_ver(); + 'outer: for i in 1..4 { + let mut state = RegionLocalState::default(); + for _ in 0..3 { + state = cluster.region_local_state(left.get_id(), i); + if state.get_state() == PeerState::Tombstone { + let epoch = state.get_region().get_region_epoch(); + assert_eq!(epoch.get_version(), version + 1); + assert_eq!(epoch.get_conf_ver(), conf_ver + 1); + continue 'outer; + } + thread::sleep(Duration::from_millis(500)); + } + panic!("store {} is still not merged: {:?}", i, state); + } + + cluster.must_put(b"k4", b"v4"); +} + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_with_slow_learner() { - let mut cluster = new_node_cluster(0, 2); + let mut cluster = new_cluster(0, 2); configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.raft_log_gc_threshold = 40; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(40); @@ -181,9 +264,10 @@ fn test_node_merge_with_slow_learner() { } /// Test whether merge will be aborted if prerequisites is not met. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_prerequisites_check() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); @@ -229,13 +313,14 @@ fn test_node_merge_prerequisites_check() { 3, ))); // It doesn't matter if the index and term is correct. - let compact_log = new_compact_log_request(100, 10); + let compact_log = new_compact_log_request(0, 10); let req = new_admin_request(right.get_id(), right.get_region_epoch(), compact_log); debug!("requesting {:?}", req); - let res = cluster + let _res = cluster .call_command_on_leader(req, Duration::from_secs(3)) .unwrap(); - assert!(res.get_header().has_error(), "{:?}", res); + // v2 doesn't respond error. + // assert!(res.get_header().has_error(), "{:?}", res); let res = cluster.try_merge(right.get_id(), left.get_id()); // log gap (min_matched, last_index] contains admin entries. assert!(res.get_header().has_error(), "{:?}", res); @@ -262,11 +347,12 @@ fn test_node_merge_prerequisites_check() { } /// Test if stale peer will be handled properly after merge. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_check_merged_message() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -322,87 +408,84 @@ fn test_node_check_merged_message() { must_get_none(&engine3, b"v5"); } -#[test] -fn test_node_merge_slow_split_right() { - test_node_merge_slow_split(true); -} - -#[test] -fn test_node_merge_slow_split_left() { - test_node_merge_slow_split(false); -} - // Test if a merge handled properly when there is a unfinished slow split before // merge. -fn test_node_merge_slow_split(is_right_derive: bool) { - let mut cluster = new_node_cluster(0, 3); - configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); - let pd_client = Arc::clone(&cluster.pd_client); - pd_client.disable_default_operator(); - cluster.cfg.raft_store.right_derive_when_split = is_right_derive; - - cluster.run(); - - cluster.must_put(b"k1", b"v1"); - cluster.must_put(b"k3", b"v3"); - - let region = pd_client.get_region(b"k1").unwrap(); - cluster.must_split(®ion, b"k2"); - let left = pd_client.get_region(b"k1").unwrap(); - let right = pd_client.get_region(b"k3").unwrap(); - - let target_leader = right - .get_peers() - .iter() - .find(|p| p.get_store_id() == 1) - .unwrap() - .clone(); - cluster.must_transfer_leader(right.get_id(), target_leader); - let target_leader = left - .get_peers() - .iter() - .find(|p| p.get_store_id() == 2) - .unwrap() - .clone(); - cluster.must_transfer_leader(left.get_id(), target_leader); - must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); - - // So cluster becomes: - // left region: 1 2(leader) I 3 - // right region: 1(leader) 2 I 3 - // I means isolation.(here just means 3 can not receive append log) - cluster.add_send_filter(CloneFilterFactory( - RegionPacketFilter::new(left.get_id(), 3) - .direction(Direction::Recv) - .msg_type(MessageType::MsgAppend), - )); - cluster.add_send_filter(CloneFilterFactory( - RegionPacketFilter::new(right.get_id(), 3) - .direction(Direction::Recv) - .msg_type(MessageType::MsgAppend), - )); - cluster.must_split(&right, b"k3"); - - // left region and right region on store 3 fall behind - // so after split, the new generated region is not on store 3 now - let right1 = pd_client.get_region(b"k2").unwrap(); - let right2 = pd_client.get_region(b"k3").unwrap(); - assert_ne!(right1.get_id(), right2.get_id()); - pd_client.must_merge(left.get_id(), right1.get_id()); - // after merge, the left region still exists on store 3 - - cluster.must_put(b"k0", b"v0"); - cluster.clear_send_filters(); - must_get_equal(&cluster.get_engine(3), b"k0", b"v0"); +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_merge_slow_split() { + fn imp(is_right_derive: bool) { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.cfg.raft_store.right_derive_when_split = is_right_derive; + + cluster.run(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k3").unwrap(); + + let target_leader = right + .get_peers() + .iter() + .find(|p| p.get_store_id() == 1) + .unwrap() + .clone(); + cluster.must_transfer_leader(right.get_id(), target_leader); + let target_leader = left + .get_peers() + .iter() + .find(|p| p.get_store_id() == 2) + .unwrap() + .clone(); + cluster.must_transfer_leader(left.get_id(), target_leader); + must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); + + // So cluster becomes: + // left region: 1 2(leader) I 3 + // right region: 1(leader) 2 I 3 + // I means isolation.(here just means 3 can not receive append log) + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(left.get_id(), 3) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppend), + )); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(right.get_id(), 3) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppend), + )); + cluster.must_split(&right, b"k3"); + + // left region and right region on store 3 fall behind + // so after split, the new generated region is not on store 3 now + let right1 = pd_client.get_region(b"k2").unwrap(); + let right2 = pd_client.get_region(b"k3").unwrap(); + assert_ne!(right1.get_id(), right2.get_id()); + pd_client.must_merge(left.get_id(), right1.get_id()); + // after merge, the left region still exists on store 3 + + cluster.must_put(b"k0", b"v0"); + cluster.clear_send_filters(); + must_get_equal(&cluster.get_engine(3), b"k0", b"v0"); + } + imp(true); + imp(false); } /// Test various cases that a store is isolated during merge. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_dist_isolation() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -474,11 +557,12 @@ fn test_node_merge_dist_isolation() { /// Similar to `test_node_merge_dist_isolation`, but make the isolated store /// way behind others so others have to send it a snapshot. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_brain_split() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); @@ -528,12 +612,7 @@ fn test_node_merge_brain_split() { cluster.must_put(b"k40", b"v5"); // Make sure the two regions are already merged on store 3. - let state_key = keys::region_state_key(left.get_id()); - let state: RegionLocalState = cluster - .get_engine(3) - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state = cluster.region_local_state(left.get_id(), 3); assert_eq!(state.get_state(), PeerState::Tombstone); must_get_equal(&cluster.get_engine(3), b"k40", b"v5"); for i in 1..100 { @@ -577,9 +656,10 @@ fn test_node_merge_brain_split() { } /// Test whether approximate size and keys are updated after merge -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_approximate_size_and_keys() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(20); cluster.run(); @@ -653,9 +733,10 @@ fn test_merge_approximate_size_and_keys() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_update_region() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); // Election timeout and max leader lease is 1s. configure_for_lease_read(&mut cluster.cfg, Some(100), Some(10)); @@ -733,9 +814,10 @@ fn test_node_merge_update_region() { /// Test if merge is working properly when merge entries is empty but commit /// index is not updated. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_catch_up_logs_empty_entries() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.run(); @@ -788,9 +870,10 @@ fn test_node_merge_catch_up_logs_empty_entries() { cluster.must_region_not_exist(left.get_id(), 3); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_with_slow_promote() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -830,11 +913,12 @@ fn test_merge_with_slow_promote() { /// logically) /// - A split => C (-∞, k3), A [k3, +∞) /// - Then network recovery -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_isolated_store_with_no_target_peer() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); cluster.cfg.raft_store.right_derive_when_split = true; let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -889,9 +973,10 @@ fn test_merge_isolated_store_with_no_target_peer() { /// Test whether a isolated peer can recover when two other regions merge to its /// region -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_cascade_merge_isolated() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -940,9 +1025,10 @@ fn test_merge_cascade_merge_isolated() { // Test if a learner can be destroyed properly when it's isolated and removed by // conf change before its region merge to another region -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_isolated_not_in_merge_learner() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -986,9 +1072,10 @@ fn test_merge_isolated_not_in_merge_learner() { // Test if a learner can be destroyed properly when it's isolated and removed by // conf change before another region merge to its region -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_isolated_stale_learner() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.right_derive_when_split = true; // Do not rely on pd to remove stale peer @@ -1037,9 +1124,10 @@ fn test_merge_isolated_stale_learner() { /// 2. Be the last removed peer in its peer list /// 3. Then its region merges to another region. /// 4. Isolation disappears -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_isolated_not_in_merge_learner_2() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1082,9 +1170,10 @@ fn test_merge_isolated_not_in_merge_learner_2() { /// Test if a peer can be removed if its target peer has been removed and /// doesn't apply the CommitMerge log. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_remove_target_peer_isolated() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1140,45 +1229,46 @@ fn test_merge_remove_target_peer_isolated() { } } -#[test] +#[test_case(test_raftstore::new_server_cluster_with_api_ver)] +#[test_case(test_raftstore_v2::new_server_cluster_with_api_ver)] fn test_sync_max_ts_after_region_merge() { - test_kv_format_impl!(test_sync_max_ts_after_region_merge_impl); -} + fn imp() { + let mut cluster = new_cluster(0, 3, F::TAG); + configure_for_merge(&mut cluster.cfg); + cluster.run(); -fn test_sync_max_ts_after_region_merge_impl() { - let mut cluster = new_server_cluster_with_api_ver(0, 3, F::TAG); - configure_for_merge(&mut cluster.cfg); - cluster.run(); + // Transfer leader to node 1 first to ensure all operations happen on node 1 + cluster.must_transfer_leader(1, new_peer(1, 1)); - // Transfer leader to node 1 first to ensure all operations happen on node 1 - cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); - cluster.must_put(b"k1", b"v1"); - cluster.must_put(b"k3", b"v3"); - - let region = cluster.get_region(b"k1"); - cluster.must_split(®ion, b"k2"); - let left = cluster.get_region(b"k1"); - let right = cluster.get_region(b"k3"); + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); - let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); - wait_for_synced(&mut cluster, 1, 1); - let max_ts = cm.max_ts(); + let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); + wait_for_synced(&mut cluster, 1, 1); + let max_ts = cm.max_ts(); - cluster.pd_client.trigger_tso_failure(); - // Merge left to right - cluster.pd_client.must_merge(left.get_id(), right.get_id()); + cluster.pd_client.trigger_tso_failure(); + // Merge left to right + cluster.pd_client.must_merge(left.get_id(), right.get_id()); - wait_for_synced(&mut cluster, 1, 1); - let new_max_ts = cm.max_ts(); - assert!(new_max_ts > max_ts); + wait_for_synced(&mut cluster, 1, 1); + let new_max_ts = cm.max_ts(); + assert!(new_max_ts > max_ts); + } + test_kv_format_impl!(imp); } /// If a follower is demoted by a snapshot, its meta will be changed. The case /// is to ensure asserts in code can tolerate the change. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_snapshot_demote() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); configure_for_merge(&mut cluster.cfg); configure_for_snapshot(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); @@ -1232,9 +1322,10 @@ fn test_merge_snapshot_demote() { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_propose_in_memory_pessimistic_locks() { - let mut cluster = new_server_cluster(0, 2); + let mut cluster = new_cluster(0, 2); configure_for_merge(&mut cluster.cfg); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -1311,9 +1402,10 @@ fn test_propose_in_memory_pessimistic_locks() { ); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +// #[test_case(test_raftstore_v2::new_server_cluster)] fn test_merge_pessimistic_locks_when_gap_is_too_large() { - let mut cluster = new_server_cluster(0, 2); + let mut cluster = new_cluster(0, 2); configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; @@ -1361,9 +1453,10 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { assert_eq!(cluster.must_get(b"k1").unwrap(), b"new_val"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_merge_pessimistic_locks_repeated_merge() { - let mut cluster = new_server_cluster(0, 2); + let mut cluster = new_cluster(0, 2); configure_for_merge(&mut cluster.cfg); cluster.cfg.pessimistic_txn.pipelined = true; cluster.cfg.pessimistic_txn.in_memory = true; @@ -1428,11 +1521,12 @@ fn test_merge_pessimistic_locks_repeated_merge() { /// Check if merge is cleaned up if the merge target is destroyed several times /// before it's ever scheduled. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_long_isolated() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); - ignore_merge_target_integrity(&mut cluster); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1447,8 +1541,8 @@ fn test_node_merge_long_isolated() { let right = pd_client.get_region(b"k3").unwrap(); cluster.must_transfer_leader(right.get_id(), new_peer(3, 3)); - let target_leader = peer_on_store(&left, 3); - cluster.must_transfer_leader(left.get_id(), target_leader); + let left_leader = peer_on_store(&left, 3); + cluster.must_transfer_leader(left.get_id(), left_leader); must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); // So cluster becomes: @@ -1490,9 +1584,10 @@ fn test_node_merge_long_isolated() { must_get_none(&cluster.get_engine(1), b"k1"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_stale_message_after_merge() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); @@ -1538,9 +1633,10 @@ fn test_stale_message_after_merge() { /// Check whether merge should be prevented if follower may not have enough /// logs. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_prepare_merge_with_reset_matched() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -1587,9 +1683,10 @@ fn test_prepare_merge_with_reset_matched() { /// Check if prepare merge min index is chosen correctly even if all match /// indexes are correct. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +// #[test_case(test_raftstore_v2::new_server_cluster)] fn test_prepare_merge_with_5_nodes_snapshot() { - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); From 7ab1702fd84e500a428aa3d02baae24aa3ba46c4 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 24 Mar 2023 12:18:42 +0800 Subject: [PATCH 0602/1149] raftstore-v2: flush memtable before proposing split (#14437) close tikv/tikv#14447 flush memtable before proposing split Signed-off-by: SpadeA-Tang --- Cargo.lock | 2 +- components/raftstore-v2/src/batch/store.rs | 17 ++- components/raftstore-v2/src/lib.rs | 5 +- .../operation/command/admin/compact_log.rs | 2 +- .../operation/command/admin/merge/commit.rs | 2 +- .../src/operation/command/admin/mod.rs | 96 ++++++++++++++- .../src/operation/command/admin/split.rs | 13 +- .../command/admin/transfer_leader.rs | 4 +- .../raftstore-v2/src/operation/ready/mod.rs | 18 ++- components/raftstore-v2/src/raft/peer.rs | 12 ++ components/raftstore-v2/src/worker/mod.rs | 1 + .../raftstore-v2/src/worker/tablet_flush.rs | 115 ++++++++++++++++++ components/raftstore/src/store/fsm/peer.rs | 1 + components/test_raftstore-v2/src/cluster.rs | 4 + components/test_raftstore-v2/src/node.rs | 4 + components/test_raftstore/src/node.rs | 4 + components/txn_types/src/types.rs | 2 + .../raftstore/test_split_region.rs | 47 ++----- 18 files changed, 299 insertions(+), 50 deletions(-) create mode 100644 components/raftstore-v2/src/worker/tablet_flush.rs diff --git a/Cargo.lock b/Cargo.lock index e12ee05562d..1cb40d842cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2712,7 +2712,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#df1ae63d0cfe2f5e01d2016a1839a7e88ef2da38" +source = "git+https://github.com/pingcap/kvproto.git#af969693ce8a7884e5bdc5d81c728f657d33065a" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 3f7bf408aa8..fe152bb3990 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -43,7 +43,7 @@ use tikv_util::{ sys::SysQuota, time::{duration_to_sec, Instant as TiInstant}, timer::SteadyTimer, - worker::{LazyWorker, Scheduler, Worker}, + worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, }; @@ -54,7 +54,7 @@ use crate::{ operation::{SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{pd, tablet_gc}, + worker::{pd, tablet_flush, tablet_gc}, Error, Result, }; @@ -474,6 +474,7 @@ pub struct Schedulers { pub pd: Scheduler, pub tablet_gc: Scheduler>, pub write: WriteSenders, + pub tablet_flush: Scheduler, // Following is not maintained by raftstore itself. pub split_check: Scheduler, @@ -497,6 +498,7 @@ struct Workers { tablet_gc: Worker, async_write: StoreWriters, purge: Option, + tablet_flush: Worker, // Following is not maintained by raftstore itself. background: Worker, @@ -504,12 +506,16 @@ struct Workers { impl Workers { fn new(background: Worker, pd: LazyWorker, purge: Option) -> Self { + let tablet_flush = WorkerBuilder::new("tablet_flush-worker") + .thread_count(2) + .create(); Self { async_read: Worker::new("async-read-worker"), pd, tablet_gc: Worker::new("tablet-gc-worker"), async_write: StoreWriters::new(None), purge, + tablet_flush, background, } } @@ -519,6 +525,7 @@ impl Workers { self.async_read.stop(); self.pd.stop(); self.tablet_gc.stop(); + self.tablet_flush.stop(); if let Some(w) = self.purge { w.stop(); } @@ -637,12 +644,18 @@ impl StoreSystem { ), ); + let tablet_flush_scheduler = workers.tablet_flush.start( + "tablet-flush-worker", + tablet_flush::Runner::new(router.clone(), tablet_registry.clone(), self.logger.clone()), + ); + let schedulers = Schedulers { read: read_scheduler, pd: workers.pd.scheduler(), tablet_gc: tablet_gc_scheduler, write: workers.async_write.senders(), split_check: split_check_scheduler, + tablet_flush: tablet_flush_scheduler, }; let builder = StorePollerBuilder::new( diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index bbb73676ffb..04745d01fbe 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -42,4 +42,7 @@ pub use bootstrap::Bootstrap; pub use fsm::StoreMeta; pub use operation::{write_initial_states, SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; -pub use worker::pd::{PdReporter, Task as PdTask}; +pub use worker::{ + pd::{PdReporter, Task as PdTask}, + tablet_flush::Task as TabletFlushTask, +}; diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 8ae195539b2..1cc9ccbb1c3 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -115,7 +115,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, impl Peer { // Mirrors v1::on_raft_gc_log_tick. - fn maybe_propose_compact_log( + fn maybe_propose_compact_log( &mut self, store_ctx: &mut StoreContext, force: bool, diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 876ba5b1a95..2756d0174dd 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -248,7 +248,7 @@ impl Peer { // Target peer handles the commit merge request. impl Peer { - pub fn on_ask_commit_merge( + pub fn on_ask_commit_merge( &mut self, store_ctx: &mut StoreContext, req: RaftCmdRequest, diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index f59a5e6e0f2..28fceb2d95b 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -10,7 +10,11 @@ pub use compact_log::CompactLogContext; use compact_log::CompactLogResult; use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; +use kvproto::{ + metapb::PeerRole, + raft_cmdpb::{AdminCmdType, RaftCmdRequest}, + raft_serverpb::{ExtraMessageType, FlushMemtable, RaftMessage}, +}; use merge::{commit::CommitMergeResult, prepare::PrepareMergeResult}; pub use merge::{ commit::{CatchUpLogs, MERGE_IN_PROGRESS_PREFIX}, @@ -18,10 +22,15 @@ pub use merge::{ }; use protobuf::Message; use raftstore::{ - store::{cmd_resp, fsm::apply, msg::ErrorCallback}, + store::{ + cmd_resp, + fsm::{apply, apply::validate_batch_split}, + msg::ErrorCallback, + Transport, + }, Error, }; -use slog::info; +use slog::{error, info}; use split::SplitResult; pub use split::{ report_split_init_finish, temp_split_path, RequestHalfSplit, RequestSplit, SplitFlowControl, @@ -47,7 +56,7 @@ pub enum AdminCmdResult { impl Peer { #[inline] - pub fn on_admin_command( + pub fn on_admin_command( &mut self, ctx: &mut StoreContext, mut req: RaftCmdRequest, @@ -122,7 +131,84 @@ impl Peer { AdminCmdType::Split => Err(box_err!( "Split is deprecated. Please use BatchSplit instead." )), - AdminCmdType::BatchSplit => self.propose_split(ctx, req), + AdminCmdType::BatchSplit => { + #[allow(clippy::question_mark)] + if let Err(err) = validate_batch_split(req.get_admin_request(), self.region()) { + Err(err) + } else { + // To reduce the impact of the expensive operation of `checkpoint` (it will + // flush memtables of the rocksdb) in applying batch split, we split the + // BatchSplit cmd into two phases: + // + // 1. Schedule flush memtable task so that the memtables of the rocksdb can + // be flushed in advance in a way that will not block the normal raft + // operations (`checkpoint` will still cause flush but it will be + // significantly lightweight). At the same time, send flush memtable msgs to + // the follower so that they can flush memtalbes in advance too. + // + // 2. When the task finishes, it will propose a batch split with + // `SPLIT_SECOND_PHASE` flag. + if !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + .contains(WriteBatchFlags::SPLIT_SECOND_PHASE) + { + if self.tablet_being_flushed() { + return; + } + + let region_id = self.region().get_id(); + self.set_tablet_being_flushed(true); + info!( + self.logger, + "Schedule flush tablet"; + ); + if let Err(e) = ctx.schedulers.tablet_flush.schedule( + crate::TabletFlushTask::TabletFlush { + region_id, + req: Some(req), + is_leader: true, + ch: Some(ch), + }, + ) { + error!( + self.logger, + "Fail to schedule flush task"; + "err" => ?e, + ) + } + + let peers = self.region().get_peers().to_vec(); + for p in peers { + if p == *self.peer() + || p.get_role() != PeerRole::Voter + || p.is_witness + { + continue; + } + let mut msg = RaftMessage::default(); + msg.set_region_id(region_id); + msg.set_from_peer(self.peer().clone()); + msg.set_to_peer(p.clone()); + msg.set_region_epoch(self.region().get_region_epoch().clone()); + let extra_msg = msg.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgFlushMemtable); + let mut flush_memtable = FlushMemtable::new(); + flush_memtable.set_region_id(region_id); + extra_msg.set_flush_memtable(flush_memtable); + + self.send_raft_message(ctx, msg); + } + + return; + } + + info!( + self.logger, + "Propose split"; + ); + self.set_tablet_being_flushed(false); + self.propose_split(ctx, req) + } + } AdminCmdType::TransferLeader => { // Containing TRANSFER_LEADER_PROPOSAL flag means the this transfer leader // request should be proposed to the raft group diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 4560fa93689..e1577830d25 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -54,7 +54,7 @@ use raftstore::{ Result, }; use slog::{error, info, warn}; -use tikv_util::{log::SlogFormat, slog_panic}; +use tikv_util::{log::SlogFormat, slog_panic, time::Instant}; use crate::{ batch::StoreContext, @@ -362,7 +362,6 @@ impl Peer { store_ctx: &mut StoreContext, req: RaftCmdRequest, ) -> Result { - validate_batch_split(req.get_admin_request(), self.region())?; // We rely on ConflictChecker to detect conflicts, so no need to set proposal // context. let data = req.write_to_bytes().unwrap(); @@ -484,6 +483,7 @@ impl Apply { ) }); + let now = Instant::now(); let reg = self.tablet_registry(); for new_region in ®ions { let new_region_id = new_region.id; @@ -521,6 +521,15 @@ impl Apply { ) }); } + let elapsed = now.saturating_elapsed(); + // to be removed after when it's stable + info!( + self.logger, + "create checkpoint time consumes"; + "region" => ?self.region(), + "duration" => ?elapsed + ); + let reg = self.tablet_registry(); let path = reg.tablet_path(region_id, log_index); let mut ctx = TabletContext::new(®ions[derived_index], Some(log_index)); diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 54aa9845e17..e7bd84c973c 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -15,7 +15,7 @@ use raft::{eraftpb, ProgressState, Storage}; use raftstore::{ store::{ fsm::new_admin_request, make_transfer_leader_response, metrics::PEER_ADMIN_CMD_COUNTER, - TRANSFER_LEADER_COMMAND_REPLY_CTX, + Transport, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, Result, }; @@ -146,7 +146,7 @@ impl Peer { true } - pub fn on_transfer_leader_msg( + pub fn on_transfer_leader_msg( &mut self, ctx: &mut StoreContext, msg: &eraftpb::Message, diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index d93502a734d..68da61cf45e 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -213,6 +213,22 @@ impl Peer { self.on_gc_peer_request(ctx, &msg); return; } + ExtraMessageType::MsgFlushMemtable => { + let region_epoch = msg.as_ref().get_region_epoch(); + if util::is_epoch_stale(region_epoch, self.region().get_region_epoch()) { + return; + } + let _ = + ctx.schedulers + .tablet_flush + .schedule(crate::TabletFlushTask::TabletFlush { + region_id: self.region().get_id(), + req: None, + is_leader: false, + ch: None, + }); + return; + } ExtraMessageType::MsgWantRollbackMerge => { if self.is_leader() { // TODO: @@ -352,7 +368,7 @@ impl Peer { /// /// The message is pushed into the send buffer, it may not be sent out until /// transport is flushed explicitly. - fn send_raft_message( + pub(crate) fn send_raft_message( &mut self, ctx: &mut StoreContext, msg: RaftMessage, diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 8ee311401a9..b93fc0f5047 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -45,6 +45,7 @@ const REGION_READ_PROGRESS_CAP: usize = 128; pub struct Peer { raft_group: RawNode>, tablet: CachedTablet, + tablet_being_flushed: bool, /// Statistics for self. self_stat: PeerStat, @@ -155,6 +156,7 @@ impl Peer { let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { tablet: cached_tablet, + tablet_being_flushed: false, self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), @@ -302,6 +304,16 @@ impl Peer { self.peer().get_id() } + #[inline] + pub fn tablet_being_flushed(&self) -> bool { + self.tablet_being_flushed + } + + #[inline] + pub fn set_tablet_being_flushed(&mut self, v: bool) { + self.tablet_being_flushed = v; + } + #[inline] pub fn storage(&self) -> &Storage { self.raft_group.store() diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index 6fafd01df85..121c41906d7 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,4 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. pub mod pd; +pub mod tablet_flush; pub mod tablet_gc; diff --git a/components/raftstore-v2/src/worker/tablet_flush.rs b/components/raftstore-v2/src/worker/tablet_flush.rs new file mode 100644 index 00000000000..c53296a5cb6 --- /dev/null +++ b/components/raftstore-v2/src/worker/tablet_flush.rs @@ -0,0 +1,115 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::fmt::{Display, Formatter}; + +use engine_traits::{KvEngine, RaftEngine, TabletRegistry, DATA_CFS}; +use kvproto::raft_cmdpb::RaftCmdRequest; +use slog::{error, info, Logger}; +use tikv_util::{time::Instant, worker::Runnable}; +use txn_types::WriteBatchFlags; + +use crate::{ + router::{CmdResChannel, PeerMsg, RaftRequest}, + StoreRouter, +}; + +pub enum Task { + TabletFlush { + region_id: u64, + req: Option, + is_leader: bool, + ch: Option, + }, +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Task::TabletFlush { region_id, .. } => { + write!(f, "Flush tablet before split for region {}", region_id) + } + } + } +} + +pub struct Runner { + router: StoreRouter, + tablet_registry: TabletRegistry, + logger: Logger, +} + +impl Runner { + pub fn new( + router: StoreRouter, + tablet_registry: TabletRegistry, + logger: Logger, + ) -> Self { + Self { + router, + tablet_registry, + logger, + } + } + + fn flush_tablet( + &mut self, + region_id: u64, + req: Option, + is_leader: bool, + ch: Option, + ) { + let Some(Some(tablet)) = self + .tablet_registry + .get(region_id) + .map(|mut cache| cache.latest().cloned()) else {return}; + let now = Instant::now(); + tablet.flush_cfs(DATA_CFS, true).unwrap(); + let elapsed = now.saturating_elapsed(); + // to be removed after when it's stable + info!( + self.logger, + "flush memtable time consumes"; + "region_id" => region_id, + "duration" => ?elapsed, + "is_leader" => is_leader, + ); + + if !is_leader { + return; + } + + let mut req = req.unwrap(); + req.mut_header() + .set_flags(WriteBatchFlags::SPLIT_SECOND_PHASE.bits()); + if let Err(e) = self.router.send( + region_id, + PeerMsg::AdminCommand(RaftRequest::new(req, ch.unwrap())), + ) { + error!( + self.logger, + "send split request fail in the second phase"; + "region_id" => region_id, + "err" => ?e, + ); + } + } +} + +impl Runnable for Runner +where + EK: KvEngine, + ER: RaftEngine, +{ + type Task = Task; + + fn run(&mut self, task: Self::Task) { + match task { + Task::TabletFlush { + region_id, + req, + is_leader, + ch, + } => self.flush_tablet(region_id, req, is_leader, ch), + } + } +} diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 3eca179d770..67054d5bd11 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2797,6 +2797,7 @@ where } // It's v2 only message and ignore does no harm. ExtraMessageType::MsgGcPeerRequest | ExtraMessageType::MsgGcPeerResponse => (), + ExtraMessageType::MsgFlushMemtable => (), } } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 6a953ed9ca2..0f352ebc5bf 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1035,6 +1035,10 @@ impl Cluster { region_end_key }; + if amended_start_key > amended_end_key { + return Ok(()); + } + tablet.scan(cf, amended_start_key, amended_end_key, fill_cache, f) } diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 5617787bb70..058a9caf186 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -437,12 +437,16 @@ impl Simulator for NodeCluster { } } +// Compare to server cluster, node cluster does not have server layer and +// storage layer. pub fn new_node_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) } +// This cluster does not support batch split, we expect it to transfer the +// `BatchSplit` request to `split` request pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index c75adf33645..c4c516fb7f9 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -505,12 +505,16 @@ impl Simulator for NodeCluster { } } +// Compare to server cluster, node cluster does not have server layer and +// storage layer. pub fn new_node_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) } +// This cluster does not support batch split, we expect it to transfer the +// `BatchSplit` request to `split` request pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 15779df426a..23df1a89940 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -570,6 +570,8 @@ bitflags! { const TRANSFER_LEADER_PROPOSAL = 0b00000100; /// Indicates this request is a flashback transaction. const FLASHBACK = 0b00001000; + /// Indicates the relevant tablet has been flushed, and we can propose split now. + const SPLIT_SECOND_PHASE = 0b00010000; } } diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index f8d6ff9b468..2673a34b0d2 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -7,7 +7,7 @@ use std::{ time::Duration, }; -use engine_traits::{Iterable, Peekable, CF_DEFAULT, CF_WRITE}; +use engine_traits::{Peekable, CF_DEFAULT, CF_WRITE}; use keys::data_key; use kvproto::{metapb, pdpb, raft_cmdpb::*, raft_serverpb::RaftMessage}; use pd_client::PdClient; @@ -145,7 +145,14 @@ fn test_server_split_region_twice() { rx1.recv_timeout(Duration::from_secs(5)).unwrap(); } -fn test_auto_split_region(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore::new_incompatible_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_auto_split_region() { + let count = 5; + let mut cluster = new_cluster(0, count); cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(100); cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(REGION_MAX_SIZE)); cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(REGION_SPLIT_SIZE)); @@ -159,7 +166,7 @@ fn test_auto_split_region(cluster: &mut Cluster) { let region = pd_client.get_region(b"").unwrap(); - let last_key = put_till_size(cluster, REGION_SPLIT_SIZE, &mut range); + let last_key = put_till_size(&mut cluster, REGION_SPLIT_SIZE, &mut range); // it should be finished in millis if split. thread::sleep(Duration::from_millis(300)); @@ -169,7 +176,7 @@ fn test_auto_split_region(cluster: &mut Cluster) { assert_eq!(region, target); let max_key = put_cf_till_size( - cluster, + &mut cluster, CF_WRITE, REGION_MAX_SIZE - REGION_SPLIT_SIZE + check_size_diff, &mut range, @@ -195,9 +202,9 @@ fn test_auto_split_region(cluster: &mut Cluster) { let leader = cluster.leader_of_region(left.get_id()).unwrap(); let store_id = leader.get_store_id(); let mut size = 0; - cluster.engines[&store_id] - .kv + cluster .scan( + store_id, CF_DEFAULT, &data_key(b""), &data_key(middle_key), @@ -223,34 +230,6 @@ fn test_auto_split_region(cluster: &mut Cluster) { assert!(resp.get_header().get_error().has_key_not_in_region()); } -#[test] -fn test_node_auto_split_region() { - let count = 5; - let mut cluster = new_node_cluster(0, count); - test_auto_split_region(&mut cluster); -} - -#[test] -fn test_incompatible_node_auto_split_region() { - let count = 5; - let mut cluster = new_incompatible_node_cluster(0, count); - test_auto_split_region(&mut cluster); -} - -#[test] -fn test_server_auto_split_region() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - test_auto_split_region(&mut cluster); -} - -#[test] -fn test_incompatible_server_auto_split_region() { - let count = 5; - let mut cluster = new_incompatible_server_cluster(0, count); - test_auto_split_region(&mut cluster); -} - // A filter that disable commitment by heartbeat. #[derive(Clone)] struct EraseHeartbeatCommit; From 6d4f40c0a5d2d9f42edb738b0ba22d2ce1a9491e Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 27 Mar 2023 14:26:44 +0800 Subject: [PATCH 0603/1149] snapshot: feedback multi rocksdb (#14400) close tikv/tikv#14436 Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore-v2/src/operation/pd.rs | 3 + .../raftstore/src/store/async_io/read.rs | 13 ++-- components/raftstore/src/store/snap.rs | 61 +++++++++++++++++++ src/server/snap.rs | 5 +- src/server/tablet_snap.rs | 18 ++++-- 5 files changed, 89 insertions(+), 11 deletions(-) diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 3b5e7d32f89..4bb6a06c162 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -43,9 +43,12 @@ impl Store { let meta = ctx.store_meta.lock().unwrap(); stats.set_region_count(meta.readers.len() as u32); } + + let snap_stats = ctx.snap_mgr.stats(); // todo: imple snapshot status report stats.set_sending_snap_count(0); stats.set_receiving_snap_count(0); + stats.set_snapshot_stats(snap_stats.stats.into()); STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC .with_label_values(&["sending"]) diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index 985134048dd..cee6373c5bd 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -214,6 +214,7 @@ where snapshot.mut_metadata().set_index(last_applied_index); let conf_state = util::conf_state_from_region(region_state.get_region()); snapshot.mut_metadata().set_conf_state(conf_state); + // Set snapshot data. let mut snap_data = RaftSnapshotData::default(); snap_data.set_region(region_state.get_region().clone()); @@ -222,7 +223,6 @@ where snap_data.set_removed_records(region_state.get_removed_records().into()); snap_data.set_merged_records(region_state.get_merged_records().into()); snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); - // create checkpointer. let snap_key = TabletSnapKey::from_region_snap(region_id, to_peer, &snapshot); let mut res = None; @@ -232,11 +232,8 @@ where error!("failed to create checkpointer"; "region_id" => region_id, "error" => %e); SNAP_COUNTER.generate.fail.inc(); } else { + let generate_duration_secs = start.saturating_elapsed().as_secs(); let elapsed = start.saturating_elapsed_secs(); - SNAP_COUNTER.generate.success.inc(); - SNAP_HISTOGRAM.generate.observe(elapsed); - SNAPSHOT_SIZE_HISTOGRAM.observe(total_size as f64); - SNAPSHOT_KV_COUNT_HISTOGRAM.observe(total_keys as f64); info!( "snapshot generated"; "region_id" => region_id, @@ -246,6 +243,12 @@ where "total_size" => total_size, "total_keys" => total_keys, ); + self.snap_mgr() + .begin_snapshot(snap_key, start, generate_duration_secs); + SNAP_COUNTER.generate.success.inc(); + SNAP_HISTOGRAM.generate.observe(elapsed); + SNAPSHOT_SIZE_HISTOGRAM.observe(total_size as f64); + SNAPSHOT_KV_COUNT_HISTOGRAM.observe(total_keys as f64); res = Some(Box::new((snapshot, to_peer))) } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 57cdbd2a75c..d0c55c144ed 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1975,6 +1975,7 @@ pub struct TabletSnapManager { // directory to store snapfile. base: PathBuf, receiving: Arc>>, + stats: Arc>>, } impl TabletSnapManager { @@ -1994,9 +1995,45 @@ impl TabletSnapManager { Ok(Self { base: path, receiving: Arc::default(), + stats: Arc::default(), }) } + pub fn begin_snapshot(&self, key: TabletSnapKey, start: Instant, generate_duration_sec: u64) { + let mut stat = SnapshotStat::default(); + stat.set_generate_duration_sec(generate_duration_sec); + self.stats.lock().unwrap().insert(key, (start, stat)); + } + + pub fn finish_snapshot(&self, key: TabletSnapKey, send: Instant) { + let region_id = key.region_id; + self.stats + .lock() + .unwrap() + .entry(key) + .and_modify(|(start, stat)| { + stat.set_send_duration_sec(send.saturating_elapsed().as_secs()); + stat.set_total_duration_sec(start.saturating_elapsed().as_secs()); + stat.set_region_id(region_id); + }); + } + + pub fn stats(&self) -> SnapStats { + let stats: Vec = self + .stats + .lock() + .unwrap() + .drain_filter(|_, (_, stat)| stat.get_region_id() > 0) + .map(|(_, (_, stat))| stat) + .filter(|stat| stat.get_total_duration_sec() > 1) + .collect(); + SnapStats { + sending_count: 0, + receiving_count: 0, + stats, + } + } + pub fn tablet_gen_path(&self, key: &TabletSnapKey) -> PathBuf { let prefix = format!("{}_{}", SNAP_GEN_PREFIX, key); PathBuf::from(&self.base).join(prefix) @@ -3056,6 +3093,30 @@ pub mod tests { assert!(!file_system::file_exists(&sst_path)); } + #[test] + fn test_snapshot_stats() { + let snap_dir = Builder::new() + .prefix("test_snapshot_stats") + .tempdir() + .unwrap(); + let start = Instant::now(); + let mgr = TabletSnapManager::new(snap_dir.path()).unwrap(); + let key = TabletSnapKey::new(1, 1, 1, 1); + mgr.begin_snapshot(key.clone(), start - time::Duration::from_secs(2), 1); + // filter out the snapshot that is not finished + assert!(mgr.stats().stats.is_empty()); + mgr.finish_snapshot(key.clone(), start - time::Duration::from_secs(1)); + let stats = mgr.stats().stats; + assert_eq!(stats.len(), 1); + assert_eq!(stats[0].get_total_duration_sec(), 2); + assert!(mgr.stats().stats.is_empty()); + + // filter out the total duration seconds less than one sencond. + mgr.begin_snapshot(key.clone(), start, 1); + mgr.finish_snapshot(key, start); + assert_eq!(mgr.stats().stats.len(), 0); + } + #[test] fn test_build_with_encryption() { let (_enc_dir, key_manager) = diff --git a/src/server/snap.rs b/src/server/snap.rs index afce0e8a2fd..d06e49ab7a8 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -146,7 +146,7 @@ pub fn send_snap( if let Err(e) = snap_data.merge_from_bytes(snap.get_data()) { return Err(Error::Io(IoError::new(ErrorKind::Other, e))); } - let key = SnapKey::from_region_snap(snap_data.get_region().get_id(), snap); + let key = SnapKey::from_region_snap(msg.get_region_id(), snap); let snap_start = snap_data.get_meta().get_start(); let generate_duration_sec = snap_data.get_meta().get_generate_duration_sec(); (key, snap_start, generate_duration_sec) @@ -202,6 +202,7 @@ pub fn send_snap( fail_point!("snapshot_delete_after_send"); mgr.delete_snapshot(&key, &chunks.snap, true); let cost = UnixSecs::now().into_inner().saturating_sub(snap_start); + let send_duration_sec = timer.saturating_elapsed().as_secs(); // it should ignore if the duration of snapshot is less than 1s to decrease the // grpc data size. if cost >= 1 { @@ -209,7 +210,7 @@ pub fn send_snap( stat.set_region_id(key.region_id); stat.set_transport_size(total_size); stat.set_generate_duration_sec(generate_duration_sec); - stat.set_send_duration_sec(timer.saturating_elapsed().as_secs()); + stat.set_send_duration_sec(send_duration_sec); stat.set_total_duration_sec(cost); mgr.collect_stat(stat); } diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index a5a8b24d10b..4524b8645ff 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -60,6 +60,7 @@ use tikv_util::{ config::{ReadableSize, Tracker, VersionTrack}, time::Instant, worker::Runnable, + DeferContext, }; use tokio::runtime::{Builder as RuntimeBuilder, Runtime}; @@ -81,7 +82,11 @@ fn is_sst(file_name: &str) -> bool { async fn read_to(f: &mut File, to: &mut Vec, size: usize, limiter: &Limiter) -> Result<()> { // It's likely in page cache already. - limiter.consume(size / 2).await; + let cost = size / 2; + limiter.consume(cost).await; + SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC + .send + .inc_by(cost as u64); to.clear(); to.reserve_exact(size); let mut buf: BorrowedBuf<'_> = to.spare_capacity_mut().into(); @@ -310,6 +315,9 @@ async fn accept_one_file( )); } limiter.consume(chunk_len).await; + SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC + .recv + .inc_by(chunk_len as u64); digest.write(&chunk.data); f.write_all(&chunk.data)?; if exp_size == file_size { @@ -604,9 +612,6 @@ async fn send_snap_files( let mut req = TabletSnapshotRequest::default(); req.mut_end().set_checksum(checksum); sender.send((req, WriteFlags::default())).await?; - SNAP_LIMIT_TRANSPORT_BYTES_COUNTER_STATIC - .send - .inc_by(total_sent); info!("sent all snap file finish"; "snap_key" => %key, "region_id" => region_id, "to_peer" => to_peer); sender.close().await?; Ok(total_sent) @@ -633,6 +638,10 @@ pub fn send_snap( msg.get_to_peer().get_id(), msg.get_message().get_snapshot(), ); + let deregister = { + let (mgr, key) = (mgr.clone(), key.clone()); + DeferContext::new(move || mgr.finish_snapshot(key.clone(), timer)) + }; let cb = ChannelBuilder::new(env) .stream_initial_window_size(cfg.grpc_stream_initial_window_size.0 as i32) @@ -652,6 +661,7 @@ pub fn send_snap( let recv_result = receiver.next().await; send_timer.observe_duration(); drop(client); + drop(deregister); mgr.delete_snapshot(&key); match recv_result { None => Ok(SendStat { From 88eb52d38431bdc4b2ff4fc30d30c085f7c72596 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 27 Mar 2023 21:18:06 +0800 Subject: [PATCH 0604/1149] resource_control: reset resource group virtual time when it is about to overflow (#14464) ref tikv/tikv#14353, ref pingcap/tidb#42595, ref pingcap/tidb#42596 Signed-off-by: glorv --- .../resource_control/src/resource_group.rs | 155 +++++++++--------- 1 file changed, 75 insertions(+), 80 deletions(-) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 690a3e3812f..0b0f24e8f62 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cmp::{max, min}, sync::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, @@ -26,8 +27,8 @@ const TASK_EXTRA_FACTOR_BY_LEVEL: [u64; 3] = [0, 20, 100]; pub const MIN_PRIORITY_UPDATE_INTERVAL: Duration = Duration::from_secs(1); /// default resource group name const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; -/// default value of max RU quota. -const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; +/// The maximum RU quota that can be configured. +const MAX_RU_QUOTA: u64 = i32::MAX as u64; #[cfg(test)] const LOW_PRIORITY: u32 = 1; @@ -35,6 +36,11 @@ const MEDIUM_PRIORITY: u32 = 8; #[cfg(test)] const HIGH_PRIORITY: u32 = 16; +// the global maxinum of virtual time is u64::MAX / 16, so when the virtual +// time of all groups are bigger than half of this value, we rest them to avoid +// virtual time overflow. +const RESET_VT_THRESHOLD: u64 = (u64::MAX >> 4) / 2; + pub enum ResourceConsumeType { CpuTime(Duration), IoBytes(u64), @@ -146,11 +152,6 @@ pub struct ResourceController { // increase the real cost after task is executed; but don't increase it at write because // the cost is known so we just pre-consume it. is_read: bool, - // Track the maximum ru quota used to calculate the factor of each resource group. - // factor = max_ru_quota / group_ru_quota * 10.0 - // We use mutex here to ensure when we need to change this value and do adjust all resource - // groups' factors, it can't be changed concurrently. - max_ru_quota: Mutex, // record consumption of each resource group, name --> resource_group resource_consumptions: RwLock, GroupPriorityTracker>>, @@ -162,7 +163,6 @@ impl ResourceController { let controller = Self { name, is_read, - max_ru_quota: Mutex::new(DEFAULT_MAX_RU_QUOTA), resource_consumptions: RwLock::new(HashMap::default()), last_min_vt: AtomicU64::new(0), }; @@ -175,12 +175,12 @@ impl ResourceController { controller } - fn calculate_factor(max_quota: u64, quota: u64) -> u64 { + fn calculate_factor(mut quota: u64) -> u64 { + quota = min(quota, MAX_RU_QUOTA); if quota > 0 { - // we use max_quota / quota as the resource group factor, but because we need to - // cast the value to integer, so we times it by 10 to ensure the accuracy is - // enough. - (max_quota as f64 / quota as f64 * 10.0).round() as u64 + // the maxinum ru quota is very big, so the precision lost due to + // integer division is very small. + MAX_RU_QUOTA / quota } else { 1 } @@ -191,13 +191,8 @@ impl ResourceController { // map 0 to medium priority(default priority) group_priority = MEDIUM_PRIORITY; } - let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); - if ru_quota > *max_ru_quota { - *max_ru_quota = ru_quota; - // adjust all group weight because the current value is too small. - self.adjust_all_resource_group_factors(ru_quota); - } - let weight = Self::calculate_factor(*max_ru_quota, ru_quota); + + let weight = Self::calculate_factor(ru_quota); let vt_delta_for_get = if self.is_read { DEFAULT_PRIORITY_PER_READ_TASK * weight @@ -205,7 +200,6 @@ impl ResourceController { 0 }; let group = GroupPriorityTracker { - ru_quota, group_priority, weight, virtual_time: AtomicU64::new(self.last_min_vt.load(Ordering::Acquire)), @@ -216,20 +210,6 @@ impl ResourceController { self.resource_consumptions.write().insert(name, group); } - // we calculate the weight of each resource group based on the currently maximum - // ru quota, if a incoming resource group has a bigger quota, we need to - // adjust all the existing groups. As we expect this won't happen very - // often, and iterate 10k entry cost less than 5ms, so the performance is - // acceptable. - fn adjust_all_resource_group_factors(&self, max_ru_quota: u64) { - self.resource_consumptions - .write() - .iter_mut() - .for_each(|(_, tracker)| { - tracker.weight = Self::calculate_factor(max_ru_quota, tracker.ru_quota); - }); - } - fn remove_resource_group(&self, name: &[u8]) { // do not remove the default resource group, reset to default setting instead. if DEFAULT_RESOURCE_GROUP_NAME.as_bytes() == name { @@ -267,30 +247,36 @@ impl ResourceController { .iter() .for_each(|(_, tracker)| { let vt = tracker.current_vt(); - if min_vt > vt { - min_vt = vt; - } - if max_vt < vt { - max_vt = vt; - } + min_vt = min(min_vt, vt); + max_vt = max(max_vt, vt); }); // TODO: use different threshold for different resource type // needn't do update if the virtual different is less than 100ms/100KB. - if min_vt + 100_000 >= max_vt { + if min_vt + 100_000 >= max_vt && max_vt < RESET_VT_THRESHOLD { return; } + let near_overflow = min_vt > RESET_VT_THRESHOLD; self.resource_consumptions .read() .iter() .for_each(|(_, tracker)| { let vt = tracker.current_vt(); - if vt < max_vt { + // NOTE: this decrease vt is not atomic across all resource groups, + // but it should be ok as this operation should be extremely rare + // and the impact is not big. + if near_overflow { + tracker.decrease_vt(RESET_VT_THRESHOLD - (max_vt - vt) / 2); + } else if vt < max_vt { // TODO: is increase by half is a good choice. tracker.increase_vt((max_vt - vt) / 2); } }); + if near_overflow { + info!("all reset groups' virtual time are near overflow, do reset"); + max_vt -= RESET_VT_THRESHOLD; + } // max_vt is actually a little bigger than the current min vt, but we don't // need totally accurate here. self.last_min_vt.store(max_vt, Ordering::Relaxed); @@ -323,8 +309,6 @@ fn concat_priority_vt(group_priority: u32, vt: u64) -> u64 { } struct GroupPriorityTracker { - // the ru setting of this group. - ru_quota: u64, group_priority: u32, weight: u64, virtual_time: AtomicU64, @@ -355,6 +339,11 @@ impl GroupPriorityTracker { self.virtual_time.fetch_add(vt_delta, Ordering::Relaxed); } + #[inline] + fn decrease_vt(&self, vt_delta: u64) { + self.virtual_time.fetch_sub(vt_delta, Ordering::Relaxed); + } + // TODO: make it delta type as generic to avoid mixed consume different types. #[inline] fn consume(&self, delta: ResourceConsumeType) { @@ -462,26 +451,25 @@ pub(crate) mod tests { assert_eq!(resource_ctl.resource_consumptions.read().len(), 3); let group1 = resource_ctl.resource_group("test".as_bytes()); - assert_eq!(group1.weight, 500); let group2 = resource_ctl.resource_group("test2".as_bytes()); - assert_eq!(group2.weight, 250); + assert_eq!(group1.weight, group2.weight * 2); assert_eq!(group1.current_vt(), 0); let mut extras1 = Extras::single_level(); extras1.set_metadata("test".as_bytes().to_owned()); assert_eq!( resource_ctl.priority_of(&extras1), - concat_priority_vt(LOW_PRIORITY, 25_000) + concat_priority_vt(LOW_PRIORITY, group1.weight * 50) ); - assert_eq!(group1.current_vt(), 25_000); + assert_eq!(group1.current_vt(), group1.weight * 50); let mut extras2 = Extras::single_level(); extras2.set_metadata("test2".as_bytes().to_owned()); assert_eq!( resource_ctl.priority_of(&extras2), - concat_priority_vt(MEDIUM_PRIORITY, 12_500) + concat_priority_vt(MEDIUM_PRIORITY, group2.weight * 50) ); - assert_eq!(group2.current_vt(), 12_500); + assert_eq!(group2.current_vt(), group2.weight * 50); let mut extras3 = Extras::single_level(); extras3.set_metadata("unknown_group".as_bytes().to_owned()); @@ -505,13 +493,14 @@ pub(crate) mod tests { ResourceConsumeType::CpuTime(Duration::from_micros(10000)), ); - assert_eq!(group1.current_vt(), 5_025_000); + assert_eq!(group1.current_vt(), group1.weight * 10050); assert_eq!(group1.current_vt(), group2.current_vt() * 2); // test update all group vts resource_manager.advance_min_virtual_time(); let group1_vt = group1.current_vt(); - assert_eq!(group1_vt, 5_025_000); + let group1_weight = group1.weight; + assert_eq!(group1_vt, group1.weight * 10050); assert!(group2.current_vt() >= group1.current_vt() * 3 / 4); assert!( resource_ctl @@ -524,45 +513,51 @@ pub(crate) mod tests { drop(group2); // test add 1 new resource group - let new_group = new_resource_group_ru("new_group".into(), 500, HIGH_PRIORITY); + let new_group = new_resource_group_ru("new_group".into(), 600, HIGH_PRIORITY); resource_manager.add_resource_group(new_group); assert_eq!(resource_ctl.resource_consumptions.read().len(), 4); let group3 = resource_ctl.resource_group("new_group".as_bytes()); - assert_eq!(group3.weight, 200); + assert!(group1_weight - 10 <= group3.weight * 3 && group3.weight * 3 <= group1_weight + 10); assert!(group3.current_vt() >= group1_vt / 2); } #[test] - fn test_adjust_resource_group_weight() { + fn test_reset_resource_group_vt() { let resource_manager = ResourceGroupManager::default(); - let resource_ctl = resource_manager.derive_controller("test_read".into(), true); - let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); + let resource_ctl = resource_manager.derive_controller("test_write".into(), false); - let group1 = new_resource_group_ru("test1".into(), 5000, 0); + let group1 = new_resource_group_ru("g1".into(), i32::MAX as u64, 1); resource_manager.add_resource_group(group1); - assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); - assert_eq!( - resource_ctl_write.resource_group("test1".as_bytes()).weight, - 20 - ); + let group2 = new_resource_group_ru("g2".into(), 1, 16); + resource_manager.add_resource_group(group2); - // add a resource group with big ru - let group1 = new_resource_group_ru("test2".into(), 50000, 0); - resource_manager.add_resource_group(group1); - assert_eq!(*resource_ctl.max_ru_quota.lock().unwrap(), 50000); - assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 100); - assert_eq!(resource_ctl.resource_group("test2".as_bytes()).weight, 10); - // resource_ctl_write should be unchanged. - assert_eq!(*resource_ctl_write.max_ru_quota.lock().unwrap(), 50000); - assert_eq!( - resource_ctl_write.resource_group("test1".as_bytes()).weight, - 100 - ); - assert_eq!( - resource_ctl_write.resource_group("test2".as_bytes()).weight, - 10 - ); + let g1 = resource_ctl.resource_group("g1".as_bytes()); + let g2 = resource_ctl.resource_group("g2".as_bytes()); + let threshold = 1 << 59; + let mut last_g2_vt = 0; + for i in 0..8 { + resource_ctl.consume("g2".as_bytes(), ResourceConsumeType::IoBytes(1 << 25)); + resource_manager.advance_min_virtual_time(); + if i < 7 { + assert!(g2.current_vt() < threshold); + } + // after 8 round, g1's vt still under the threshold and is still increasing. + assert!(g1.current_vt() < threshold && g1.current_vt() > last_g2_vt); + last_g2_vt = g2.current_vt(); + } + + resource_ctl.consume("g2".as_bytes(), ResourceConsumeType::IoBytes(1 << 25)); + resource_manager.advance_min_virtual_time(); + assert!(g1.current_vt() > threshold); + + // adjust again, the virtual time of each group should decrease + resource_manager.advance_min_virtual_time(); + let g1_vt = g1.current_vt(); + let g2_vt = g2.current_vt(); + assert!(g2_vt < threshold / 2); + assert!(g1_vt < threshold / 2 && g1_vt < g2_vt); + assert_eq!(resource_ctl.last_min_vt.load(Ordering::Relaxed), g2_vt); } #[test] From c930237abab7f125c9d516e3e40d829d423e7a49 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 28 Mar 2023 13:26:53 +0800 Subject: [PATCH 0605/1149] raftstore-v2: implement snapshot backup for raftstore v2 (#14438) ref tikv/tikv#12842 Signed-off-by: 3pointer Co-authored-by: Ti Chi Robot --- components/backup/src/endpoint.rs | 67 ++++++++-------- components/backup/src/service.rs | 90 ++++++++++++---------- components/backup/src/writer.rs | 57 +++++++------- components/engine_traits/src/sst.rs | 2 +- components/server/src/server.rs | 11 ++- components/server/src/server2.rs | 33 +++++++- components/test_backup/src/lib.rs | 4 +- components/test_raftstore-v2/src/server.rs | 4 +- components/test_raftstore/src/server.rs | 4 +- components/tikv_kv/src/lib.rs | 28 ++++++- src/import/mod.rs | 26 +------ src/import/sst_service.rs | 6 +- 12 files changed, 189 insertions(+), 143 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 896020cf51a..4fb1705ebab 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -11,8 +11,7 @@ use std::{ use async_channel::SendError; use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; use concurrency_manager::ConcurrencyManager; -use engine_rocks::RocksEngine; -use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, SstCompressionType}; +use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, KvEngine, SstCompressionType}; use external_storage::{BackendConfig, HdfsConfig}; use external_storage_export::{create_storage, ExternalStorage}; use futures::{channel::mpsc::*, executor::block_on}; @@ -28,7 +27,7 @@ use raftstore::coprocessor::RegionInfoProvider; use tikv::{ config::BackupConfig, storage::{ - kv::{CursorBuilder, Engine, ScanMode, SnapContext}, + kv::{CursorBuilder, Engine, LocalTablets, ScanMode, SnapContext}, mvcc::Error as MvccError, raw::raw_mvcc::RawMvccSnapshot, txn::{EntryBatch, Error as TxnError, SnapshotStore, TxnEntryScanner, TxnEntryStore}, @@ -163,12 +162,12 @@ pub struct BackupRange { /// The generic saveable writer. for generic `InMemBackupFiles`. /// Maybe what we really need is make Writer a trait... -enum KvWriter { - Txn(BackupWriter), - Raw(BackupRawKvWriter), +enum KvWriter { + Txn(BackupWriter), + Raw(BackupRawKvWriter), } -impl std::fmt::Debug for KvWriter { +impl std::fmt::Debug for KvWriter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Txn(_) => f.debug_tuple("Txn").finish(), @@ -177,7 +176,7 @@ impl std::fmt::Debug for KvWriter { } } -impl KvWriter { +impl KvWriter { async fn save(self, storage: &dyn ExternalStorage) -> Result> { match self { Self::Txn(writer) => writer.save(storage).await, @@ -194,8 +193,8 @@ impl KvWriter { } #[derive(Debug)] -struct InMemBackupFiles { - files: KvWriter, +struct InMemBackupFiles { + files: KvWriter, start_key: Vec, end_key: Vec, start_version: TimeStamp, @@ -203,8 +202,8 @@ struct InMemBackupFiles { region: Region, } -async fn save_backup_file_worker( - rx: async_channel::Receiver, +async fn save_backup_file_worker( + rx: async_channel::Receiver>, tx: UnboundedSender, storage: Arc, codec: KeyValueCodec, @@ -276,10 +275,10 @@ async fn save_backup_file_worker( /// Send the save task to the save worker. /// Record the wait time at the same time. -async fn send_to_worker_with_metrics( - tx: &async_channel::Sender, - files: InMemBackupFiles, -) -> std::result::Result<(), SendError> { +async fn send_to_worker_with_metrics( + tx: &async_channel::Sender>, + files: InMemBackupFiles, +) -> std::result::Result<(), SendError>> { let files = match tx.try_send(files) { Ok(_) => return Ok(()), Err(e) => e.into_inner(), @@ -294,12 +293,12 @@ impl BackupRange { /// Get entries from the scanner and save them to storage async fn backup( &self, - writer_builder: BackupWriterBuilder, + writer_builder: BackupWriterBuilder, mut engine: E, concurrency_manager: ConcurrencyManager, backup_ts: TimeStamp, begin_ts: TimeStamp, - saver: async_channel::Sender, + saver: async_channel::Sender>, storage_name: &str, ) -> Result { assert!(!self.codec.is_raw_kv); @@ -460,9 +459,9 @@ impl BackupRange { Ok(stat) } - fn backup_raw( + fn backup_raw( &self, - writer: &mut BackupRawKvWriter, + writer: &mut BackupRawKvWriter, snapshot: &S, ) -> Result { assert!(self.codec.is_raw_kv); @@ -524,14 +523,14 @@ impl BackupRange { async fn backup_raw_kv_to_file( &self, mut engine: E, - db: RocksEngine, + db: E::Local, limiter: &Limiter, file_name: String, cf: CfNameWrap, compression_type: Option, compression_level: i32, cipher: CipherInfo, - saver_tx: async_channel::Sender, + saver_tx: async_channel::Sender>, ) -> Result { let mut writer = match BackupRawKvWriter::new( db, @@ -679,7 +678,7 @@ pub struct Endpoint { store_id: u64, pool: RefCell, io_pool: Runtime, - db: RocksEngine, + tablets: LocalTablets, config_manager: ConfigManager, concurrency_manager: ConcurrencyManager, softlimit: SoftLimitKeeper, @@ -834,7 +833,7 @@ impl Endpoint { store_id: u64, engine: E, region_info: R, - db: RocksEngine, + tablets: LocalTablets, config: BackupConfig, concurrency_manager: ConcurrencyManager, api_version: ApiVersion, @@ -850,7 +849,7 @@ impl Endpoint { engine, region_info, pool: RefCell::new(pool), - db, + tablets, io_pool: rt, softlimit, config_manager, @@ -885,14 +884,14 @@ impl Endpoint { &self, prs: Arc>>, request: Request, - saver_tx: async_channel::Sender, + saver_tx: async_channel::Sender>, resp_tx: UnboundedSender, _backend: Arc, ) { let start_ts = request.start_ts; let backup_ts = request.end_ts; let engine = self.engine.clone(); - let db = self.db.clone(); + let tablets = self.tablets.clone(); let store_id = self.store_id; let concurrency_manager = self.concurrency_manager.clone(); let batch_size = self.config_manager.0.read().unwrap().batch_size; @@ -947,12 +946,19 @@ impl Endpoint { }); let name = backup_file_name(store_id, &brange.region, key, _backend.name()); let ct = to_sst_compression_type(request.compression_type); + let db = match tablets.get(brange.region.id) { + Some(t) => t, + None => { + warn!("backup region not found"; "region" => ?brange.region.id); + return; + } + }; let stat = if is_raw_kv { brange .backup_raw_kv_to_file( engine, - db.clone(), + db.into_owned(), &request.limiter, name, cf.into(), @@ -967,7 +973,7 @@ impl Endpoint { store_id, request.limiter.clone(), brange.region.clone(), - db.clone(), + db.into_owned(), ct, request.compression_level, sst_max_size, @@ -1270,6 +1276,7 @@ pub mod tests { use tikv::{ coprocessor::checksum_crc64_xor, storage::{ + kv::LocalTablets, txn::tests::{must_commit, must_prewrite_put}, RocksEngine, TestEngineBuilder, }, @@ -1402,7 +1409,7 @@ pub mod tests { 1, rocks, MockRegionInfoProvider::new(need_encode_key), - db, + LocalTablets::Singleton(db), BackupConfig { num_threads: 4, batch_size: 8, diff --git a/components/backup/src/service.rs b/components/backup/src/service.rs index dd3355b1e92..237234c061e 100644 --- a/components/backup/src/service.rs +++ b/components/backup/src/service.rs @@ -1,47 +1,53 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::{marker::PhantomData, sync::atomic::*}; +use std::sync::atomic::*; -use engine_traits::KvEngine; +use engine_traits::{KvEngine, RaftEngine}; use futures::{channel::mpsc, FutureExt, SinkExt, StreamExt, TryFutureExt}; use grpcio::{self, *}; use kvproto::brpb::*; -use raftstore::{ - router::RaftStoreRouter, - store::msg::{PeerMsg, SignificantMsg}, +use raftstore::store::{ + fsm::store::RaftRouter, + msg::{PeerMsg, SignificantMsg}, }; use tikv_util::{error, info, worker::*}; use super::Task; /// Service handles the RPC messages for the `Backup` service. - #[derive(Clone)] -pub struct Service { +pub struct Service { scheduler: Scheduler, - router: RR, - _phantom: PhantomData, + router: Option>, } -impl Service +impl Service where - E: KvEngine, - RR: RaftStoreRouter, + EK: KvEngine, + ER: RaftEngine, { - /// Create a new backup service. - pub fn new(scheduler: Scheduler, router: RR) -> Self { + // Create a new backup service without router, this used for raftstore v2. + // because we don't have RaftStoreRouter any more. + pub fn new(scheduler: Scheduler) -> Self { + Service { + scheduler, + router: None, + } + } + + // Create a new backup service with router, this used for raftstore v1. + pub fn with_router(scheduler: Scheduler, router: RaftRouter) -> Self { Service { scheduler, - router, - _phantom: PhantomData, + router: Some(router), } } } -impl Backup for Service +impl Backup for Service where - E: KvEngine, - RR: RaftStoreRouter, + EK: KvEngine, + ER: RaftEngine, { fn check_pending_admin_op( &mut self, @@ -50,25 +56,33 @@ where mut sink: ServerStreamingSink, ) { let (tx, rx) = mpsc::unbounded(); - self.router.broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::CheckPendingAdmin(tx.clone())) - }); - - let send_task = async move { - let mut s = rx.map(|resp| Ok((resp, WriteFlags::default()))); - sink.send_all(&mut s).await?; - sink.close().await?; - Ok(()) - } - .map(|res: Result<()>| match res { - Ok(_) => { - info!("check admin closed"); + match &self.router { + Some(router) => { + router.broadcast_normal(|| { + PeerMsg::SignificantMsg(SignificantMsg::CheckPendingAdmin(tx.clone())) + }); + let send_task = async move { + let mut s = rx.map(|resp| Ok((resp, WriteFlags::default()))); + sink.send_all(&mut s).await?; + sink.close().await?; + Ok(()) + } + .map(|res: Result<()>| match res { + Ok(_) => { + info!("check admin closed"); + } + Err(e) => { + error!("check admin canceled"; "error" => ?e); + } + }); + ctx.spawn(send_task); } - Err(e) => { - error!("check admin canceled"; "error" => ?e); + None => { + // check pending admin reqeust is used for EBS Backup. + // for raftstore v2. we don't need it for now. so just return unimplemented + unimplemented_call!(ctx, sink) } - }); - ctx.spawn(send_task); + } } fn backup( @@ -131,7 +145,6 @@ mod tests { use engine_rocks::RocksEngine; use external_storage_export::make_local_backend; - use raftstore::router::RaftStoreBlackHole; use tikv::storage::txn::tests::{must_commit, must_prewrite_put}; use tikv_util::worker::{dummy_scheduler, ReceiverWrapper}; use txn_types::TimeStamp; @@ -142,8 +155,7 @@ mod tests { fn new_rpc_suite() -> (Server, BackupClient, ReceiverWrapper) { let env = Arc::new(EnvBuilder::new().build()); let (scheduler, rx) = dummy_scheduler(); - let backup_service = - super::Service::::new(scheduler, RaftStoreBlackHole); + let backup_service = super::Service::::new(scheduler); let builder = ServerBuilder::new(env.clone()).register_service(create_backup(backup_service)); let mut server = builder.bind("127.0.0.1", 0).build().unwrap(); diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 7a853fe485f..715c4f68291 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -3,10 +3,9 @@ use std::{fmt::Display, io::Read}; use encryption::{EncrypterReader, Iv}; -use engine_rocks::{RocksEngine, RocksSstWriter, RocksSstWriterBuilder}; use engine_traits::{ - CfName, ExternalSstFileInfo, SstCompressionType, SstWriter, SstWriterBuilder, CF_DEFAULT, - CF_WRITE, + CfName, ExternalSstFileInfo, KvEngine, SstCompressionType, SstExt, SstWriter, SstWriterBuilder, + CF_DEFAULT, CF_WRITE, }; use external_storage_export::{ExternalStorage, UnpinReader}; use file_system::Sha256Reader; @@ -49,16 +48,16 @@ impl From for CfName { } } -struct Writer { - writer: RocksSstWriter, +struct Writer { + writer: W, total_kvs: u64, total_bytes: u64, checksum: u64, digest: crc64fast::Digest, } -impl Writer { - fn new(writer: RocksSstWriter) -> Self { +impl Writer { + fn new(writer: W) -> Self { Writer { writer, total_kvs: 0, @@ -98,9 +97,7 @@ impl Writer { Ok(()) } - // FIXME: we cannot get sst_info in [save_and_build_file], which may cause the - // !Send type [RocksEnternalSstFileInfo] sent between threads. - fn finish_read(writer: RocksSstWriter) -> Result<(u64, impl Read)> { + fn finish_read(writer: W) -> Result<(u64, impl Read)> { let (sst_info, sst_reader) = writer.finish_read()?; Ok((sst_info.file_size(), sst_reader)) } @@ -163,28 +160,28 @@ impl Writer { } } -pub struct BackupWriterBuilder { +pub struct BackupWriterBuilder { store_id: u64, limiter: Limiter, region: Region, - db: RocksEngine, + db: EK, compression_type: Option, compression_level: i32, sst_max_size: u64, cipher: CipherInfo, } -impl BackupWriterBuilder { +impl BackupWriterBuilder { pub fn new( store_id: u64, limiter: Limiter, region: Region, - db: RocksEngine, + db: EK, compression_type: Option, compression_level: i32, sst_max_size: u64, cipher: CipherInfo, - ) -> BackupWriterBuilder { + ) -> BackupWriterBuilder { Self { store_id, limiter, @@ -197,7 +194,7 @@ impl BackupWriterBuilder { } } - pub fn build(&self, start_key: Vec, storage_name: &str) -> Result { + pub fn build(&self, start_key: Vec, storage_name: &str) -> Result> { let key = file_system::sha256(&start_key).ok().map(hex::encode); let store_id = self.store_id; let name = backup_file_name(store_id, &self.region, key, storage_name); @@ -214,34 +211,34 @@ impl BackupWriterBuilder { } /// A writer writes txn entries into SST files. -pub struct BackupWriter { +pub struct BackupWriter { name: String, - default: Writer, - write: Writer, + default: Writer<::SstWriter>, + write: Writer<::SstWriter>, limiter: Limiter, sst_max_size: u64, cipher: CipherInfo, } -impl BackupWriter { +impl BackupWriter { /// Create a new BackupWriter. pub fn new( - db: RocksEngine, + db: EK, name: &str, compression_type: Option, compression_level: i32, limiter: Limiter, sst_max_size: u64, cipher: CipherInfo, - ) -> Result { - let default = RocksSstWriterBuilder::new() + ) -> Result> { + let default = ::SstWriterBuilder::new() .set_in_memory(true) .set_cf(CF_DEFAULT) .set_db(&db) .set_compression_type(compression_type) .set_compression_level(compression_level) .build(name)?; - let write = RocksSstWriterBuilder::new() + let write = ::SstWriterBuilder::new() .set_in_memory(true) .set_cf(CF_WRITE) .set_db(&db) @@ -338,19 +335,19 @@ impl BackupWriter { } /// A writer writes Raw kv into SST files. -pub struct BackupRawKvWriter { +pub struct BackupRawKvWriter { name: String, cf: CfName, - writer: Writer, + writer: Writer<::SstWriter>, limiter: Limiter, cipher: CipherInfo, codec: KeyValueCodec, } -impl BackupRawKvWriter { +impl BackupRawKvWriter { /// Create a new BackupRawKvWriter. pub fn new( - db: RocksEngine, + db: EK, name: &str, cf: CfNameWrap, limiter: Limiter, @@ -358,8 +355,8 @@ impl BackupRawKvWriter { compression_level: i32, cipher: CipherInfo, codec: KeyValueCodec, - ) -> Result { - let writer = RocksSstWriterBuilder::new() + ) -> Result> { + let writer = ::SstWriterBuilder::new() .set_in_memory(true) .set_cf(cf.into()) .set_db(&db) diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index a97fe7a8b87..ea08df3bb50 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -28,7 +28,7 @@ pub trait SstReader: RefIterable + Sized { /// SstWriter is used to create sst files that can be added to database later. pub trait SstWriter: Send { type ExternalSstFileInfo: ExternalSstFileInfo; - type ExternalSstFileReader: std::io::Read; + type ExternalSstFileReader: std::io::Read + Send; /// Add key, value to currently opened file /// REQUIRES: key is after any previously added key according to comparator. diff --git a/components/server/src/server.rs b/components/server/src/server.rs index b9563f295b5..e37c6f9fe3b 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -91,7 +91,7 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, - import::{ImportSstService, LocalTablets, SstImporter}, + import::{ImportSstService, SstImporter}, read_pool::{ build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, }, @@ -111,6 +111,7 @@ use tikv::{ storage::{ self, config_manager::StorageConfigManger, + kv::LocalTablets, mvcc::MvccConsistencyCheckObserver, txn::flow_controller::{EngineFlowController, FlowController}, Engine, Storage, @@ -1319,10 +1320,8 @@ where // Backup service. let mut backup_worker = Box::new(self.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); - let backup_service = backup::Service::>::new( - backup_scheduler, - self.router.clone(), - ); + let backup_service = + backup::Service::::with_router(backup_scheduler, self.router.clone()); if servers .server .register_service(create_backup(backup_service)) @@ -1335,7 +1334,7 @@ where servers.node.id(), engines.engine.clone(), self.region_info_accessor.clone(), - engines.engines.kv.clone(), + LocalTablets::Singleton(engines.engines.kv.clone()), self.config.backup.clone(), self.concurrency_manager.clone(), self.config.storage.api_version(), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index ef38c3e2286..6c96ce62ffb 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -48,7 +48,7 @@ use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; use kvproto::{ - deadlock::create_deadlock, diagnosticspb::create_diagnostics, + brpb::create_backup, deadlock::create_deadlock, diagnosticspb::create_diagnostics, import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, resource_usage_agent::create_resource_metering_pub_sub, }; @@ -74,7 +74,7 @@ use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, - import::{ImportSstService, LocalTablets, SstImporter}, + import::{ImportSstService, SstImporter}, read_pool::{ build_yatp_read_pool, ReadPool, ReadPoolConfigManager, UPDATE_EWMA_TIME_SLICE_INTERVAL, }, @@ -92,6 +92,7 @@ use tikv::{ storage::{ self, config_manager::StorageConfigManger, + kv::LocalTablets, mvcc::MvccConsistencyCheckObserver, txn::flow_controller::{FlowController, TabletFlowController}, Engine, Storage, @@ -981,6 +982,34 @@ where let servers = self.servers.as_mut().unwrap(); let engines = self.engines.as_ref().unwrap(); + // Backup service. + let mut backup_worker = Box::new(self.background_worker.lazy_build("backup-endpoint")); + let backup_scheduler = backup_worker.scheduler(); + let backup_service = backup::Service::::new(backup_scheduler); + if servers + .server + .register_service(create_backup(backup_service)) + .is_some() + { + fatal!("failed to register backup service"); + } + + let backup_endpoint = backup::Endpoint::new( + self.node.as_ref().unwrap().id(), + engines.engine.clone(), + self.region_info_accessor.clone().unwrap(), + LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), + self.config.backup.clone(), + self.concurrency_manager.clone(), + self.config.storage.api_version(), + self.causal_ts_provider.clone(), + ); + self.cfg_controller.as_mut().unwrap().register( + tikv::config::Module::Backup, + Box::new(backup_endpoint.get_config_manager()), + ); + backup_worker.start(backup_endpoint); + // Import SST service. let import_service = ImportSstService::new( self.config.import.clone(), diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index cb669070b9e..34eb6e8aa9e 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -26,7 +26,7 @@ use tikv::{ config::BackupConfig, coprocessor::{checksum_crc64_xor, dag::TikvStorage}, storage::{ - kv::{Engine, SnapContext}, + kv::{Engine, LocalTablets, SnapContext}, SnapshotStore, }, }; @@ -85,7 +85,7 @@ impl TestSuite { *id, sim.storages[id].clone(), sim.region_info_accessors[id].clone(), - engines.kv.clone(), + LocalTablets::Singleton(engines.kv.clone()), BackupConfig { num_threads: 4, batch_size: 8, diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index cc09dd09c4c..ed2a44d80fa 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -49,7 +49,7 @@ use test_pd_client::TestPdClient; use test_raftstore::{filter_send, AddressMap, Config, Filter}; use tikv::{ coprocessor, coprocessor_v2, - import::{ImportSstService, LocalTablets, SstImporter}, + import::{ImportSstService, SstImporter}, read_pool::ReadPool, server::{ gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, @@ -59,7 +59,7 @@ use tikv::{ }, storage::{ self, - kv::{FakeExtension, RaftExtension, SnapContext}, + kv::{FakeExtension, LocalTablets, RaftExtension, SnapContext}, txn::flow_controller::{EngineFlowController, FlowController}, Engine, Storage, }, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 54da33fa3dd..e7b43850e27 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -50,7 +50,7 @@ use test_pd_client::TestPdClient; use tikv::{ config::ConfigController, coprocessor, coprocessor_v2, - import::{ImportSstService, LocalTablets, SstImporter}, + import::{ImportSstService, SstImporter}, read_pool::ReadPool, server::{ gc_worker::GcWorker, @@ -65,7 +65,7 @@ use tikv::{ }, storage::{ self, - kv::{FakeExtension, SnapContext}, + kv::{FakeExtension, LocalTablets, SnapContext}, txn::flow_controller::{EngineFlowController, FlowController}, Engine, Storage, }, diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index c5313620995..05d039d2690 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -25,6 +25,7 @@ mod rocksdb_engine; mod stats; use std::{ + borrow::Cow, cell::UnsafeCell, error, num::NonZeroU64, @@ -35,8 +36,8 @@ use std::{ use collections::HashMap; use engine_traits::{ - CfName, IterOptions, KvEngine as LocalEngine, Mutable, MvccProperties, ReadOptions, WriteBatch, - CF_DEFAULT, CF_LOCK, + CfName, IterOptions, KvEngine as LocalEngine, Mutable, MvccProperties, ReadOptions, + TabletRegistry, WriteBatch, CF_DEFAULT, CF_LOCK, }; use error_code::{self, ErrorCode, ErrorCodeExt}; use futures::{compat::Future01CompatExt, future::BoxFuture, prelude::*}; @@ -784,6 +785,29 @@ pub fn write_modifies(kv_engine: &impl LocalEngine, modifies: Vec) -> Re Ok(()) } +#[derive(Clone)] +pub enum LocalTablets { + Singleton(EK), + Registry(TabletRegistry), +} + +impl LocalTablets { + /// Get the tablet of the given region. + /// + /// If `None` is returned, the region may not exist or may not initialized. + /// If there are multiple versions of tablet, the latest one is returned + /// with best effort. + pub fn get(&self, region_id: u64) -> Option> { + match self { + LocalTablets::Singleton(tablet) => Some(Cow::Borrowed(tablet)), + LocalTablets::Registry(registry) => { + let mut cached = registry.get(region_id)?; + cached.latest().cloned().map(Cow::Owned) + } + } + } +} + pub const TEST_ENGINE_CFS: &[CfName] = &[CF_DEFAULT, "cf"]; pub mod tests { diff --git a/src/import/mod.rs b/src/import/mod.rs index 7ee5647f723..e2fa3729e52 100644 --- a/src/import/mod.rs +++ b/src/import/mod.rs @@ -15,9 +15,8 @@ mod duplicate_detect; mod sst_service; -use std::{borrow::Cow, fmt::Debug}; +use std::fmt::Debug; -use engine_traits::TabletRegistry; use grpcio::{RpcStatus, RpcStatusCode}; pub use sst_importer::{Config, Error, Result, SstImporter, TxnSstWriter}; @@ -49,26 +48,3 @@ macro_rules! send_rpc_response { let _ = res.map_err(|e| warn!("send rpc response"; "err" => %e)).await; }}; } - -#[derive(Clone)] -pub enum LocalTablets { - Singleton(EK), - Registry(TabletRegistry), -} - -impl LocalTablets { - /// Get the tablet of the given region. - /// - /// If `None` is returned, the region may not exist or may not initialized. - /// If there are multiple versions of tablet, the latest one is returned - /// with best effort. - fn get(&self, region_id: u64) -> Option> { - match self { - LocalTablets::Singleton(tablet) => Some(Cow::Borrowed(tablet)), - LocalTablets::Registry(registry) => { - let mut cached = registry.get(region_id)?; - cached.latest().cloned().map(Cow::Owned) - } - } - } -} diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index b23046bfe4b..12cb0ca892b 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -29,7 +29,9 @@ use sst_importer::{ error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, ConfigManager, Error, Result, SstImporter, }; -use tikv_kv::{Engine, Modify, SnapContext, Snapshot, SnapshotExt, WriteData, WriteEvent}; +use tikv_kv::{ + Engine, LocalTablets, Modify, SnapContext, Snapshot, SnapshotExt, WriteData, WriteEvent, +}; use tikv_util::{ config::ReadableSize, future::create_stream_with_buffer, @@ -40,7 +42,7 @@ use tikv_util::{ use tokio::{runtime::Runtime, time::sleep}; use txn_types::{Key, WriteRef, WriteType}; -use super::{make_rpc_error, LocalTablets}; +use super::make_rpc_error; use crate::{ import::duplicate_detect::DuplicateDetector, server::CONFIG_ROCKSDB_GAUGE, From 5a2ff323d6fae82c624ad802e8100d3154d01ba1 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 28 Mar 2023 16:26:54 +0800 Subject: [PATCH 0606/1149] config: fix alias name snap-max-write-bytes-per-sec (#14463) close tikv/tikv#14455 Signed-off-by: Jay Lee Co-authored-by: Ti Chi Robot --- src/server/config.rs | 2 +- tests/integrations/config/mod.rs | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/server/config.rs b/src/server/config.rs index 5f15e72ae2f..d954ebac36f 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -146,7 +146,7 @@ pub struct Config { #[serde(with = "perf_level_serde")] #[online_config(skip)] pub end_point_perf_level: PerfLevel, - #[serde(alias = "snap_max_write_bytes_per_sec")] + #[serde(alias = "snap-max-write-bytes-per-sec")] pub snap_io_max_bytes_per_sec: ReadableSize, pub snap_max_total_size: ReadableSize, #[online_config(skip)] diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index ff6807fa6a1..02b5c711e96 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -903,3 +903,24 @@ fn test_log_backward_compatible() { assert_eq!(cfg.log.format, LogFormat::Json); assert_eq!(cfg.log.file.max_size, 1024); } + +#[test] +fn test_rename_compatibility() { + let old_content = r#" +[server] +snap-max-write-bytes-per-sec = "10MiB" + +[storage] +engine = "raft-kv2" + "#; + let new_content = r#" +[server] +snap-io-max-bytes-per-sec = "10MiB" + +[storage] +engine = "partitioned-raft-kv" + "#; + let old_cfg: TikvConfig = toml::from_str(old_content).unwrap(); + let new_cfg: TikvConfig = toml::from_str(new_content).unwrap(); + assert_eq_debug(&old_cfg, &new_cfg); +} From 9eeda1416ff050cc6468c9ded18b9719545a6691 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 28 Mar 2023 17:12:54 -0700 Subject: [PATCH 0607/1149] fix io breakdown for foreground write (#14456) ref tikv/tikv#12842 async io thread's write should be foreground write Signed-off-by: qi.xu Co-authored-by: qi.xu --- components/raftstore/src/store/async_io/write.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index e94f7360c23..0da8d1546b5 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -20,6 +20,7 @@ use engine_traits::{ }; use error_code::ErrorCodeExt; use fail::fail_point; +use file_system::{set_io_type, IoType}; use kvproto::raft_serverpb::{RaftLocalState, RaftMessage}; use parking_lot::Mutex; use protobuf::Message; @@ -1026,6 +1027,7 @@ where thread::Builder::new() .name(thd_name!(tag)) .spawn_wrapper(move || { + set_io_type(IoType::ForegroundWrite); worker.run(); })?; cached_senders.push(tx); From f8bf08c567ada625db421c949b0de3757e16589b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 29 Mar 2023 11:52:55 +0800 Subject: [PATCH 0608/1149] log-backup: Using PD as metastore (#14278) close tikv/tikv#13867 This also makes `etcd-client` and `tonic` optional requirements, you can enable them by `metastore-etcd`. Signed-off-by: hillium Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- Cargo.lock | 3 + components/backup-stream/Cargo.toml | 11 +- components/backup-stream/src/endpoint.rs | 1 - components/backup-stream/src/errors.rs | 5 + .../backup-stream/src/metadata/client.rs | 165 +++------ components/backup-stream/src/metadata/keys.rs | 30 +- components/backup-stream/src/metadata/mod.rs | 1 + .../backup-stream/src/metadata/store/mod.rs | 35 +- .../backup-stream/src/metadata/store/pd.rs | 324 ++++++++++++++++++ components/backup-stream/src/metadata/test.rs | 16 +- components/pd_client/src/client.rs | 98 +++++- components/pd_client/src/lib.rs | 1 + components/pd_client/src/meta_storage.rs | 302 ++++++++++++++++ components/pd_client/src/metrics.rs | 4 + components/pd_client/src/util.rs | 6 + components/server/src/server.rs | 22 +- components/test_pd/src/mocker/meta_storage.rs | 113 ++++++ components/test_pd/src/mocker/mod.rs | 21 +- components/test_pd/src/server.rs | 46 ++- components/tikv_util/src/codec/mod.rs | 28 ++ 20 files changed, 1058 insertions(+), 174 deletions(-) create mode 100644 components/backup-stream/src/metadata/store/pd.rs create mode 100644 components/pd_client/src/meta_storage.rs create mode 100644 components/test_pd/src/mocker/meta_storage.rs diff --git a/Cargo.lock b/Cargo.lock index 1cb40d842cd..62746ba6bcb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -511,6 +511,7 @@ dependencies = [ "async-compression", "async-trait", "bytes", + "cfg-if 1.0.0", "chrono", "concurrency_manager", "crossbeam", @@ -536,6 +537,7 @@ dependencies = [ "online_config", "openssl", "pd_client", + "pin-project", "prometheus", "protobuf", "raft", @@ -548,6 +550,7 @@ dependencies = [ "slog-global", "tempdir", "tempfile", + "test_pd", "test_raftstore", "test_util", "thiserror", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index f3f1b482be0..d6d6f7a6fc4 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -11,6 +11,8 @@ test-engines-rocksdb = ["tikv/test-engines-rocksdb"] failpoints = ["tikv/failpoints", "fail/failpoints"] backup-stream-debug = [] +metastore-etcd = ["tonic", "etcd-client"] + [[test]] name = "integration" path = "tests/mod.rs" @@ -22,6 +24,7 @@ harness = true async-compression = { version = "0.3.14", features = ["tokio", "zstd"] } async-trait = { version = "0.1" } bytes = "1" +cfg-if = "1" chrono = "0.4" concurrency_manager = { workspace = true } crossbeam = "0.8" @@ -32,7 +35,7 @@ engine_traits = { workspace = true } error_code = { workspace = true } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"] } +etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"], optional = true } external_storage = { workspace = true } external_storage_export = { workspace = true } fail = "0.5" @@ -49,10 +52,12 @@ log_wrappers = { workspace = true } online_config = { workspace = true } openssl = "0.10" pd_client = { workspace = true } +pin-project = "1.0" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raftstore = { workspace = true } +rand = "0.8.0" regex = "1" resolved_ts = { workspace = true } security = { path = "../security" } @@ -67,7 +72,7 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } -tonic = "0.8" +tonic = { version = "0.8", optional = true } txn_types = { workspace = true } uuid = "0.8" yatp = { workspace = true } @@ -78,9 +83,9 @@ engine_panic = { workspace = true } grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } -rand = "0.8.0" tempdir = "0.3" tempfile = "3.0" +test_pd = { workspace = true } test_raftstore = { workspace = true } test_util = { workspace = true } url = "2" diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index d8c0e09744f..68f040217ea 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -620,7 +620,6 @@ where let task_clone = task.clone(); let run = async move { let task_name = task.info.get_name(); - cli.init_task(&task.info).await?; let ranges = cli.ranges_of_task(task_name).await?; info!( "register backup stream ranges"; diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index 2fecf0ac514..c3cc91da9ff 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -5,6 +5,7 @@ use std::{ }; use error_code::ErrorCodeExt; +#[cfg(feature = "metastore-etcd")] use etcd_client::Error as EtcdError; use grpcio::Error as GrpcError; use kvproto::{errorpb::Error as StoreError, metapb::*}; @@ -21,6 +22,7 @@ use crate::{endpoint::Task, metrics}; pub enum Error { #[error("gRPC meet error {0}")] Grpc(#[from] GrpcError), + #[cfg(feature = "metasotre-etcd")] #[error("Etcd meet error {0}")] Etcd(#[from] EtcdErrorExt), #[error("Protobuf meet error {0}")] @@ -52,12 +54,14 @@ pub enum Error { Other(#[from] Box), } +#[cfg(feature = "metastore-etcd")] impl From for Error { fn from(value: EtcdError) -> Self { Self::Etcd(value.into()) } } +#[cfg(feature = "metastore-etcd")] #[derive(ThisError, Debug)] pub enum EtcdErrorExt { #[error("{0}")] @@ -72,6 +76,7 @@ impl ErrorCodeExt for Error { fn error_code(&self) -> error_code::ErrorCode { use error_code::backup_stream::*; match self { + #[cfg(feature = "metastore-etcd")] Error::Etcd(_) => ETCD, Error::Protobuf(_) => PROTO, Error::NoSuchTask { .. } => NO_SUCH_TASK, diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 97e8d2140b5..fca8a07b654 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -15,8 +15,8 @@ use super::{ checkpoint_cache::CheckpointCache, keys::{self, KeyValue, MetaKey}, store::{ - CondTransaction, Condition, GetExtra, Keys, KvEvent, KvEventType, MetaStore, Snapshot, - Subscription, Transaction, WithRevision, + CondTransaction, Condition, Keys, KvEvent, KvEventType, MetaStore, Snapshot, Subscription, + Transaction, WithRevision, }, }; use crate::{ @@ -48,6 +48,7 @@ impl Debug for StreamTask { .field("table_filter", &self.info.table_filter) .field("start_ts", &self.info.start_ts) .field("end_ts", &self.info.end_ts) + .field("is_paused", &self.is_paused) .finish() } } @@ -292,8 +293,7 @@ impl MetadataClient { ) -> Result> { let key = MetaKey::last_error_of(name, store_id); - let s = self.meta_store.snapshot().await?; - let r = s.get(Keys::Key(key)).await?; + let r = self.meta_store.get_latest(Keys::Key(key)).await?.inner; if r.is_empty() { return Ok(None); } @@ -304,8 +304,11 @@ impl MetadataClient { /// check whether the task is paused. pub async fn check_task_paused(&self, name: &str) -> Result { - let snap = self.meta_store.snapshot().await?; - let kvs = snap.get(Keys::Key(MetaKey::pause_of(name))).await?; + let kvs = self + .meta_store + .get_latest(Keys::Key(MetaKey::pause_of(name))) + .await? + .inner; Ok(!kvs.is_empty()) } @@ -317,8 +320,11 @@ impl MetadataClient { } pub async fn get_tasks_pause_status(&self) -> Result, bool>> { - let snap = self.meta_store.snapshot().await?; - let kvs = snap.get(Keys::Prefix(MetaKey::pause_prefix())).await?; + let kvs = self + .meta_store + .get_latest(Keys::Prefix(MetaKey::pause_prefix())) + .await? + .inner; let mut pause_hash = HashMap::new(); let prefix_len = MetaKey::pause_prefix_len(); @@ -338,10 +344,9 @@ impl MetadataClient { } let items = self .meta_store - .snapshot() + .get_latest(Keys::Key(MetaKey::task_of(name))) .await? - .get(Keys::Key(MetaKey::task_of(name))) - .await?; + .inner; if items.is_empty() { return Ok(None); } @@ -362,11 +367,13 @@ impl MetadataClient { "faild to connect etcd client".to_string(), )) }); - let snap = self.meta_store.snapshot().await?; - let kvs = snap.get(Keys::Prefix(MetaKey::tasks())).await?; + let kvs = self + .meta_store + .get_latest(Keys::Prefix(MetaKey::tasks())) + .await?; - let mut tasks = Vec::with_capacity(kvs.len()); - for kv in kvs { + let mut tasks = Vec::with_capacity(kvs.inner.len()); + for kv in kvs.inner { let t = protobuf::parse_from_bytes::(kv.value())?; let paused = self.check_task_paused(t.get_name()).await?; tasks.push(StreamTask { @@ -376,7 +383,7 @@ impl MetadataClient { } Ok(WithRevision { inner: tasks, - revision: snap.revision(), + revision: kvs.revision, }) } @@ -455,13 +462,14 @@ impl MetadataClient { defer! { super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_step"]).observe(now.saturating_elapsed().as_secs_f64()) } - let snap = self.meta_store.snapshot().await?; - let ts = snap - .get(Keys::Key(MetaKey::storage_checkpoint_of( + let ts = self + .meta_store + .get_latest(Keys::Key(MetaKey::storage_checkpoint_of( task_name, self.store_id, ))) - .await?; + .await? + .inner; match ts.as_slice() { [ts, ..] => Ok(TimeStamp::new(parse_ts_from_bytes(ts.value())?)), @@ -488,13 +496,14 @@ impl MetadataClient { defer! { super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_step"]).observe(now.saturating_elapsed().as_secs_f64()) } - let snap = self.meta_store.snapshot().await?; - let ts = snap - .get(Keys::Key(MetaKey::next_backup_ts_of( + let ts = self + .meta_store + .get_latest(Keys::Key(MetaKey::next_backup_ts_of( task_name, self.store_id, ))) - .await?; + .await? + .inner; match ts.as_slice() { [ts, ..] => Ok(TimeStamp::new(parse_ts_from_bytes(ts.value())?)), @@ -507,96 +516,16 @@ impl MetadataClient { &self, task_name: &str, ) -> Result, Vec)>>> { - let snap = self.meta_store.snapshot().await?; - let ranges = snap - .get(Keys::Prefix(MetaKey::ranges_of(task_name))) + let ranges = self + .meta_store + .get_latest(Keys::Prefix(MetaKey::ranges_of(task_name))) .await?; - Ok(WithRevision { - revision: snap.revision(), - inner: ranges - .into_iter() + Ok(ranges.map(|rs| { + rs.into_iter() .map(|mut kv: KeyValue| kv.take_range(task_name)) - .collect(), - }) - } - - /// Perform a two-phase bisection search algorithm for the intersection of - /// all ranges and the specificated range (usually region range.) - /// TODO: explain the algorithm? - pub async fn range_overlap_of_task( - &self, - task_name: &str, - (start_key, end_key): (Vec, Vec), - ) -> Result, Vec)>>> { - let now = Instant::now(); - defer! { - super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_range_search"]).observe(now.saturating_elapsed().as_secs_f64()) - } - let snap = self.meta_store.snapshot().await?; - - let mut prev = snap - .get_extra( - Keys::Range( - MetaKey::ranges_of(task_name), - MetaKey::range_of(task_name, &start_key), - ), - GetExtra { - desc_order: true, - limit: 1, - ..Default::default() - }, - ) - .await?; - let all = snap - .get(Keys::Range( - MetaKey::range_of(task_name, &start_key), - MetaKey::range_of(task_name, &end_key), - )) - .await?; - - let mut result = Vec::with_capacity(all.len() + 1); - if !prev.kvs.is_empty() { - let kv = &mut prev.kvs[0]; - if kv.value() > start_key.as_slice() { - result.push(kv.take_range(task_name)); - } - } - for mut kv in all { - result.push(kv.take_range(task_name)); - } - Ok(WithRevision { - revision: snap.revision(), - inner: result, - }) - } - - /// access the next backup ts of some task and some region. - pub async fn progress_of_task(&self, task_name: &str) -> Result { - let now = Instant::now(); - defer! { - super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["task_progress_get"]).observe(now.saturating_elapsed().as_secs_f64()) - } - let task = self.get_task(task_name).await?; - if task.is_none() { - return Err(Error::NoSuchTask { - task_name: task_name.to_owned(), - }); - } - - let timestamp = self.meta_store.snapshot().await?; - let items = timestamp - .get(Keys::Key(MetaKey::next_backup_ts_of( - task_name, - self.store_id, - ))) - .await?; - if items.is_empty() { - Ok(task.unwrap().info.start_ts) - } else { - assert_eq!(items.len(), 1); - parse_ts_from_bytes(items[0].1.as_slice()) - } + .collect() + })) } pub async fn checkpoints_of(&self, task_name: &str) -> Result> { @@ -604,10 +533,10 @@ impl MetadataClient { defer! { super::metrics::METADATA_OPERATION_LATENCY.with_label_values(&["checkpoints_of"]).observe(now.saturating_elapsed().as_secs_f64()) } - let snap = self.meta_store.snapshot().await?; - let checkpoints = snap - .get(Keys::Prefix(MetaKey::next_backup_ts(task_name))) + let checkpoints = self.meta_store + .get_latest(Keys::Prefix(MetaKey::next_backup_ts(task_name))) .await? + .inner .iter() .filter_map(|kv| { Checkpoint::from_kv(kv) @@ -674,6 +603,7 @@ impl MetadataClient { /// remove some task, without the ranges. /// only for testing. + #[cfg(test)] pub async fn remove_task(&self, name: &str) -> Result<()> { self.meta_store .delete(Keys::Key(MetaKey::task_of(name))) @@ -722,8 +652,11 @@ impl MetadataClient { return Ok(c); } let key = MetaKey::next_bakcup_ts_of_region(task, region); - let s = self.meta_store.snapshot().await?; - let r = s.get(Keys::Key(key.clone())).await?; + let r = self + .meta_store + .get_latest(Keys::Key(key.clone())) + .await? + .inner; let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; diff --git a/components/backup-stream/src/metadata/keys.rs b/components/backup-stream/src/metadata/keys.rs index f7a2c960ec4..26b04abe16f 100644 --- a/components/backup-stream/src/metadata/keys.rs +++ b/components/backup-stream/src/metadata/keys.rs @@ -2,7 +2,7 @@ use kvproto::metapb::Region; -const PREFIX: &str = "/tidb/br-stream"; +pub(super) const PREFIX: &str = "/tidb/br-stream"; const PATH_INFO: &str = "/info"; const PATH_NEXT_BACKUP_TS: &str = "/checkpoint"; const PATH_STORAGE_CHECKPOINT: &str = "/storage-checkpoint"; @@ -28,17 +28,26 @@ const TASKS_PREFIX: &str = "/tidb/br-stream/info/"; /// For the storage checkpoint ts of tasks: /// /storage-checkpoint// -> /// ``` -#[derive(Clone)] +#[derive(Clone, Eq, PartialEq)] pub struct MetaKey(pub Vec); /// A simple key value pair of metadata. -#[derive(Clone, Debug)] +#[derive(Clone, Eq, PartialEq)] pub struct KeyValue(pub MetaKey, pub Vec); +impl std::fmt::Debug for KeyValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("KV") + .field(&self.0) + .field(&format_args!("{}", self.1.escape_ascii())) + .finish() + } +} + impl std::fmt::Debug for MetaKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("MetaKey") - .field(&self.0.escape_ascii()) + f.debug_tuple("K") + .field(&format_args!("{}", self.0.escape_ascii())) .finish() } } @@ -177,16 +186,7 @@ impl MetaKey { /// return the key that keeps the range [self, self.next_prefix()) contains /// all keys with the prefix `self`. pub fn next_prefix(&self) -> Self { - let mut next_prefix = self.clone(); - for i in (0..next_prefix.0.len()).rev() { - if next_prefix.0[i] == u8::MAX { - next_prefix.0.pop(); - } else { - next_prefix.0[i] += 1; - break; - } - } - next_prefix + Self(tikv_util::codec::next_prefix_of(self.0.clone())) } } diff --git a/components/backup-stream/src/metadata/mod.rs b/components/backup-stream/src/metadata/mod.rs index 20887a24b02..a96e2f9bcb6 100644 --- a/components/backup-stream/src/metadata/mod.rs +++ b/components/backup-stream/src/metadata/mod.rs @@ -8,4 +8,5 @@ pub mod store; pub mod test; pub use client::{Checkpoint, CheckpointProvider, MetadataClient, MetadataEvent, StreamTask}; +#[cfg(feature = "metastore-etcd")] pub use store::lazy_etcd::{ConnectionConfig, LazyEtcdClient}; diff --git a/components/backup-stream/src/metadata/store/mod.rs b/components/backup-stream/src/metadata/store/mod.rs index e5d1f03e715..7cecda9720e 100644 --- a/components/backup-stream/src/metadata/store/mod.rs +++ b/components/backup-stream/src/metadata/store/mod.rs @@ -1,6 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -pub mod lazy_etcd; +cfg_if::cfg_if! { + if #[cfg(feature = "metastore-etcd")] { + pub mod etcd; + pub mod lazy_etcd; + pub use etcd::EtcdStore; + } +} // Note: these mods also used for integration tests, // so we cannot compile them only when `#[cfg(test)]`. @@ -9,11 +15,11 @@ pub mod lazy_etcd; pub mod slash_etc; pub use slash_etc::SlashEtcStore; -pub mod etcd; +pub mod pd; + use std::{cmp::Ordering, future::Future, pin::Pin, time::Duration}; use async_trait::async_trait; -pub use etcd::EtcdStore; use tokio_stream::Stream; // ==== Generic interface definition ==== @@ -22,6 +28,7 @@ use crate::errors::Result; pub type BoxStream = Pin + Send>>; pub type BoxFuture = Pin + Send>>; +pub use pd::PdStore; #[derive(Debug, Default)] pub struct Transaction { @@ -108,10 +115,19 @@ pub struct WithRevision { pub inner: T, } +impl WithRevision { + pub fn map(self, f: impl FnOnce(T) -> R) -> WithRevision { + WithRevision { + revision: self.revision, + inner: f(self.inner), + } + } +} + /// The key set for getting. /// I guess there should be a `&[u8]` in meta key, /// but the etcd client requires Into> :( -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Keys { Prefix(MetaKey), Range(MetaKey, MetaKey), @@ -160,7 +176,7 @@ pub trait Snapshot: Send + Sync + 'static { } } -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum KvEventType { Put, Delete, @@ -207,4 +223,13 @@ pub trait MetaStore: Clone + Send + Sync { async fn delete(&self, keys: Keys) -> Result<()> { self.txn(Transaction::default().delete(keys)).await } + /// Get the latest version of some keys. + async fn get_latest(&self, keys: Keys) -> Result>> { + let s = self.snapshot().await?; + let keys = s.get(keys).await?; + Ok(WithRevision { + revision: s.revision(), + inner: keys, + }) + } } diff --git a/components/backup-stream/src/metadata/store/pd.rs b/components/backup-stream/src/metadata/store/pd.rs new file mode 100644 index 00000000000..5b2e2b466e5 --- /dev/null +++ b/components/backup-stream/src/metadata/store/pd.rs @@ -0,0 +1,324 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{collections::VecDeque, fmt::Display, pin::Pin, task::ready}; + +use async_trait::async_trait; +use futures::{stream, Stream}; +use kvproto::meta_storagepb::{self as mpb, WatchResponse}; +use pd_client::meta_storage::{Get, MetaStorageClient, Put, Watch}; +use pin_project::pin_project; +use tikv_util::{box_err, info}; + +use super::{ + GetResponse, Keys, KvChangeSubscription, KvEvent, KvEventType, MetaStore, Snapshot, + WithRevision, +}; +use crate::{ + debug, + errors::{Error, Result}, + metadata::keys::{KeyValue, MetaKey, PREFIX}, +}; + +fn convert_kv(mut kv: mpb::KeyValue) -> KeyValue { + let k = kv.take_key(); + let v = kv.take_value(); + KeyValue(MetaKey(k), v) +} + +#[derive(Clone)] +pub struct PdStore { + client: M, +} + +impl PdStore { + pub fn new(s: M) -> Self { + Self { client: s } + } +} + +fn unimplemented(name: impl Display) -> Error { + Error::Io(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!("the behavior {} hasn't been implemented yet.", name), + )) +} + +#[pin_project] +struct PdWatchStream { + #[pin] + inner: S, + buf: VecDeque, +} + +impl PdWatchStream { + /// Create a new Watch Stream from PD, with a function to cancel the stream. + fn new(inner: S) -> Self { + Self { + inner, + buf: Default::default(), + } + } +} + +impl>> Stream for PdWatchStream { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + loop { + let this = self.as_mut().project(); + let buf = this.buf; + if let Some(x) = buf.pop_front() { + return Some(Ok(x)).into(); + } + let resp = ready!(this.inner.poll_next(cx)); + match resp { + None => return None.into(), + Some(Err(err)) => return Some(Err(Error::Pd(err))).into(), + Some(Ok(mut x)) => { + if x.get_header().has_error() { + return Some(Err(Error::Other(box_err!( + "watch stream returns error: {:?}", + x.get_header().get_error() + )))) + .into(); + } + assert!(buf.is_empty()); + for mut e in x.take_events().into_iter() { + let ty = match e.get_type() { + kvproto::meta_storagepb::EventEventType::Put => KvEventType::Put, + kvproto::meta_storagepb::EventEventType::Delete => KvEventType::Delete, + }; + let kv = KvEvent { + kind: ty, + pair: convert_kv(e.take_kv()), + }; + buf.push_back(kv); + } + } + } + } + } +} + +#[async_trait] +impl Snapshot for RevOnly { + async fn get_extra(&self, _keys: Keys, _extra: super::GetExtra) -> Result { + Err(unimplemented("PdStore::snapshot::get")) + } + + fn revision(&self) -> i64 { + self.0 + } +} + +pub struct RevOnly(i64); + +#[async_trait] +impl< + St: Stream> + Send + 'static, + PD: MetaStorageClient + Clone, +> MetaStore for PdStore +{ + type Snap = RevOnly; + + async fn snapshot(&self) -> Result { + // hacking here: when we are doing point querying, the server won't return + // revision. So we are going to query a non-exist prefix here. + let rev = self + .client + .get(Get::of(PREFIX.as_bytes().to_vec()).prefixed().limit(0)) + .await? + .get_header() + .get_revision(); + info!("pd meta client getting snapshot."; "rev" => %rev); + Ok(RevOnly(rev)) + } + + async fn watch( + &self, + keys: super::Keys, + start_rev: i64, + ) -> Result { + info!("pd meta client creating watch stream."; "keys" => ?keys, "rev" => %start_rev); + match keys { + Keys::Prefix(k) => { + use futures::stream::StreamExt; + let stream = self + .client + .watch(Watch::of(k).prefixed().from_rev(start_rev)); + let (stream, cancel) = stream::abortable(PdWatchStream::new(stream)); + Ok(KvChangeSubscription { + stream: stream.boxed(), + cancel: Box::pin(async move { cancel.abort() }), + }) + } + _ => Err(unimplemented("watch distinct keys or range of keys")), + } + } + + async fn txn(&self, _txn: super::Transaction) -> Result<()> { + Err(unimplemented("PdStore::txn")) + } + + async fn txn_cond(&self, _txn: super::CondTransaction) -> Result<()> { + Err(unimplemented("PdStore::txn_cond")) + } + + async fn set(&self, mut kv: KeyValue) -> Result<()> { + debug!("pd meta client setting."; "pair" => ?kv); + self.client + .put(Put::of(kv.take_key(), kv.take_value())) + .await?; + Ok(()) + } + + async fn get_latest(&self, keys: Keys) -> Result>> { + let spec = match keys.clone() { + Keys::Prefix(p) => Get::of(p).prefixed(), + Keys::Key(k) => Get::of(k), + Keys::Range(s, e) => Get::of(s).range_to(e), + }; + // Note: we skipped check `more` here, because we haven't make pager. + let mut resp = self.client.get(spec).await?; + let inner = resp + .take_kvs() + .into_iter() + .map(convert_kv) + .collect::>(); + let revision = resp.get_header().get_revision(); + debug!("pd meta client getting."; "range" => ?keys, "rev" => %revision, "result" => ?inner); + Ok(WithRevision { inner, revision }) + } +} + +#[cfg(test)] +mod tests { + use std::{sync::Arc, time::Duration}; + + use futures::{Future, StreamExt}; + use pd_client::{ + meta_storage::{Checked, Source, Sourced}, + RpcClient, + }; + use test_pd::{mocker::MetaStorage, util::*, Server as PdServer}; + use tikv_util::config::ReadableDuration; + + use super::PdStore; + use crate::metadata::{ + keys::{KeyValue, MetaKey}, + store::{Keys, MetaStore}, + }; + + fn new_test_server_and_client( + factory: impl FnOnce(RpcClient) -> C, + ) -> (PdServer, PdStore) { + let server = PdServer::with_case(1, Arc::::default()); + let eps = server.bind_addrs(); + let client = + new_client_with_update_interval(eps, None, ReadableDuration(Duration::from_secs(99))); + (server, PdStore::new(factory(client))) + } + + fn w(f: impl Future) -> T { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap() + .block_on(f) + } + + #[test] + fn test_query() { + let (_s, c) = new_test_server_and_client(|c| Sourced::new(Arc::new(c), Source::LogBackup)); + + let kv = |k, v: &str| KeyValue(MetaKey::task_of(k), v.as_bytes().to_vec()); + let insert = |k, v| w(c.set(kv(k, v))).unwrap(); + insert("a", "the signpost of flowers"); + insert("b", "the milky hills"); + insert("c", "the rusty sky"); + + let k = w(c.get_latest(Keys::Key(MetaKey::task_of("a")))).unwrap(); + assert_eq!( + k.inner.as_slice(), + [kv("a", "the signpost of flowers")].as_slice() + ); + let k = w(c.get_latest(Keys::Key(MetaKey::task_of("d")))).unwrap(); + assert_eq!(k.inner.as_slice(), [].as_slice()); + + let k = w(c.get_latest(Keys::Prefix(MetaKey::tasks()))).unwrap(); + assert_eq!( + k.inner.as_slice(), + [ + kv("a", "the signpost of flowers"), + kv("b", "the milky hills"), + kv("c", "the rusty sky"), + ] + .as_slice() + ) + } + + #[test] + fn test_watch() { + let (_s, c) = new_test_server_and_client(|c| Sourced::new(Arc::new(c), Source::LogBackup)); + let kv = |k, v: &str| KeyValue(MetaKey::task_of(k), v.as_bytes().to_vec()); + let insert = |k, v| w(c.set(kv(k, v))).unwrap(); + + insert("a", "the guest in vermilion"); + let res = w(c.get_latest(Keys::Prefix(MetaKey::tasks()))).unwrap(); + assert_eq!(res.inner.as_slice(), &[kv("a", "the guest in vermilion")]); + let mut ws = w(c.watch(Keys::Prefix(MetaKey::tasks()), res.revision + 1)).unwrap(); + let mut items = vec![]; + insert("a", "looking up at the ocean"); + items.push(w(ws.stream.next()).unwrap().unwrap()); + insert("b", "a folktale in the polar day"); + items.push(w(ws.stream.next()).unwrap().unwrap()); + w(ws.cancel); + assert!(w(ws.stream.next()).is_none()); + + assert_eq!(items[0].pair, kv("a", "looking up at the ocean")); + assert_eq!(items[1].pair, kv("b", "a folktale in the polar day")); + } + + #[test] + fn test_check_error() { + // Without AutoHeader, it will fail due to the source is empty. + let (_s, c) = new_test_server_and_client(|c| Checked::new(Arc::new(c))); + let kv = |k, v: &str| KeyValue(MetaKey::task_of(k), v.as_bytes().to_vec()); + let insert = |k, v| w(c.set(kv(k, v))); + + insert("c", "the rainbow-like summer").unwrap_err(); + w(c.get_latest(Keys::Key(MetaKey(vec![42u8])))).unwrap_err(); + assert!(w(c.watch(Keys::Key(MetaKey(vec![42u8])), 42)).is_err()); + } + + #[test] + fn test_retry() { + use tikv_util::defer; + + defer! {{ + fail::remove("meta_storage_get"); + }}; + let (_s, c) = new_test_server_and_client(|c| Sourced::new(Arc::new(c), Source::LogBackup)); + + let kv = |k, v: &str| KeyValue(MetaKey::task_of(k), v.as_bytes().to_vec()); + let insert = |k, v| w(c.set(kv(k, v))).unwrap(); + insert("rejectme", "this key would be rejected by the failpoint."); + + fail::cfg("meta_storage_get", "4*return").unwrap(); + let res = w(c.get_latest(Keys::Key(MetaKey::task_of("rejectme")))) + .expect("should success when temporary failing"); + assert_eq!(res.inner.len(), 1); + assert_eq!( + res.inner[0], + kv("rejectme", "this key would be rejected by the failpoint.") + ); + + // FIXME: this would take about 10s to run and influences unit tests run... + fail::cfg("meta_storage_get", "return").unwrap(); + w(c.get_latest(Keys::Key(MetaKey::task_of("rejectme")))) + .expect_err("should fail when ever failing"); + } +} diff --git a/components/backup-stream/src/metadata/test.rs b/components/backup-stream/src/metadata/test.rs index a57722089bf..bb2b7fe1577 100644 --- a/components/backup-stream/src/metadata/test.rs +++ b/components/backup-stream/src/metadata/test.rs @@ -54,21 +54,7 @@ async fn test_basic() -> Result<()> { cli.insert_task_with_range(&task, ranges).await?; let remote_ranges = cli.ranges_of_task(name).await?.inner; assert_range_matches(remote_ranges, ranges); - let overlap_ranges = cli - .range_overlap_of_task(name, (b"7".to_vec(), b"9".to_vec())) - .await? - .inner; - assert_range_matches(overlap_ranges, &[(b"6", b"8"), (b"8", b"9")]); - let overlap_ranges = cli - .range_overlap_of_task(name, (b"1".to_vec(), b"5".to_vec())) - .await? - .inner; - assert_range_matches(overlap_ranges, &[(b"1", b"2"), (b"4", b"5")]); - let overlap_ranges = cli - .range_overlap_of_task(name, (b"1".to_vec(), b"4".to_vec())) - .await? - .inner; - assert_range_matches(overlap_ranges, &[(b"1", b"2")]); + Ok(()) } diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 917176b454e..36f7aaa983b 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -14,12 +14,16 @@ use futures::{ channel::mpsc, compat::{Compat, Future01CompatExt}, executor::block_on, - future::{self, BoxFuture, FutureExt, TryFutureExt}, + future::{self, BoxFuture, FutureExt, TryFlattenStream, TryFutureExt}, sink::SinkExt, - stream::StreamExt, + stream::{ErrInto, StreamExt}, + TryStreamExt, }; use grpcio::{EnvBuilder, Environment, WriteFlags}; use kvproto::{ + meta_storagepb::{ + self as mpb, GetRequest, GetResponse, PutRequest, WatchRequest, WatchResponse, + }, metapb, pdpb::{self, Member}, replication_modepb::{RegionReplicationStatus, ReplicationStatus, StoreDrAutoSyncStatus}, @@ -33,6 +37,7 @@ use txn_types::TimeStamp; use yatp::{task::future::TaskCell, ThreadPool}; use super::{ + meta_storage::{Get, MetaStorageClient, Put, Watch}, metrics::*, util::{call_option_inner, check_resp_header, sync_request, Client, PdConnector}, BucketStat, Config, Error, FeatureGate, PdClient, PdFuture, RegionInfo, RegionStat, Result, @@ -42,6 +47,7 @@ use super::{ pub const CQ_COUNT: usize = 1; pub const CLIENT_PREFIX: &str = "pd"; +#[derive(Clone)] pub struct RpcClient { cluster_id: u64, pd_client: Arc, @@ -1117,3 +1123,91 @@ impl PdClient for RpcClient { .execute() } } + +impl RpcClient { + fn fill_cluster_id_for(&self, header: &mut mpb::RequestHeader) { + header.cluster_id = self.cluster_id; + } +} + +impl MetaStorageClient for RpcClient { + fn get(&self, mut req: Get) -> PdFuture { + let timer = Instant::now(); + self.fill_cluster_id_for(req.inner.mut_header()); + let executor = move |client: &Client, req: GetRequest| { + let handler = { + let inner = client.inner.rl(); + let r = inner + .meta_storage + .get_async_opt(&req, call_option_inner(&inner)); + futures::future::ready(r).err_into().try_flatten() + }; + Box::pin(async move { + fail::fail_point!("meta_storage_get", req.key.ends_with(b"rejectme"), |_| { + Err(super::Error::Grpc(grpcio::Error::RemoteStopped)) + }); + let resp = handler.await?; + PD_REQUEST_HISTOGRAM_VEC + .meta_storage_get + .observe(timer.saturating_elapsed_secs()); + Ok(resp) + }) as _ + }; + + self.pd_client + .request(req.into(), executor, LEADER_CHANGE_RETRY) + .execute() + } + + fn put(&self, mut req: Put) -> PdFuture { + let timer = Instant::now(); + self.fill_cluster_id_for(req.inner.mut_header()); + let executor = move |client: &Client, req: PutRequest| { + let handler = { + let inner = client.inner.rl(); + let r = inner + .meta_storage + .put_async_opt(&req, call_option_inner(&inner)); + futures::future::ready(r).err_into().try_flatten() + }; + Box::pin(async move { + let resp = handler.await?; + PD_REQUEST_HISTOGRAM_VEC + .meta_storage_put + .observe(timer.saturating_elapsed_secs()); + Ok(resp) + }) as _ + }; + + self.pd_client + .request(req.into(), executor, LEADER_CHANGE_RETRY) + .execute() + } + + fn watch(&self, mut req: Watch) -> Self::WatchStream { + let timer = Instant::now(); + self.fill_cluster_id_for(req.inner.mut_header()); + let executor = move |client: &Client, req: WatchRequest| { + let handler = { + let inner = client.inner.rl(); + inner.meta_storage.watch(&req) + }; + Box::pin(async move { + let resp = handler?; + PD_REQUEST_HISTOGRAM_VEC + .meta_storage_watch + .observe(timer.saturating_elapsed_secs()); + Ok(resp.err_into()) + }) as _ + }; + + self.pd_client + .request(req.into(), executor, LEADER_CHANGE_RETRY) + .execute() + .try_flatten_stream() + } + + type WatchStream = TryFlattenStream< + PdFuture, crate::Error>>, + >; +} diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 86e52eaf2a5..ba287621272 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -14,6 +14,7 @@ mod util; mod config; pub mod errors; +pub mod meta_storage; use std::{cmp::Ordering, ops::Deref, sync::Arc, time::Duration}; use futures::future::BoxFuture; diff --git a/components/pd_client/src/meta_storage.rs b/components/pd_client/src/meta_storage.rs new file mode 100644 index 00000000000..109986665bd --- /dev/null +++ b/components/pd_client/src/meta_storage.rs @@ -0,0 +1,302 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! `meta_storage` is the API set for storing generic KV pairs. +//! It is a trimmed version of the KV service of etcd, along with some metrics. + +use std::{pin::Pin, sync::Arc, task::ready}; + +use futures::{FutureExt, Stream}; +use kvproto::meta_storagepb as pb; +use tikv_util::{box_err, codec}; + +use crate::{Error, PdFuture, Result}; + +/// The etcd INF end key. +/// Unlike TiKV, they have chosen the slice `[0u8]` as the infinity. +const INF: [u8; 1] = [0u8]; + +/// A Get request to the meta storage. +#[derive(Clone, Debug)] +pub struct Get { + pub(crate) inner: pb::GetRequest, +} + +impl From for pb::GetRequest { + fn from(value: Get) -> Self { + value.inner + } +} + +impl Get { + /// Create a new get request, querying for exactly one key. + pub fn of(key: impl Into>) -> Self { + let mut inner = pb::GetRequest::default(); + inner.set_key(key.into()); + Self { inner } + } + + /// Enhance the query, make it be able to query the prefix of keys. + /// The prefix is the key passed to the method [`of`](Get::of). + pub fn prefixed(mut self) -> Self { + let mut next = codec::next_prefix_of(self.inner.key.clone()); + if next.is_empty() { + next = INF.to_vec(); + } + self.inner.set_range_end(next); + self + } + + /// Enhance the query, make it be able to query a range of keys. + /// The prefix is the key passed to the method [`of`](Get::of). + pub fn range_to(mut self, to: impl Into>) -> Self { + self.inner.set_range_end(to.into()); + self + } + + /// Specify the revision of the query. + pub fn rev(mut self, rev: i64) -> Self { + self.inner.set_revision(rev); + self + } + + pub fn limit(mut self, limit: i64) -> Self { + self.inner.set_limit(limit); + self + } +} + +/// A Put request to the meta store. +#[derive(Clone, Debug)] +pub struct Put { + pub(crate) inner: pb::PutRequest, +} + +impl Put { + /// Create a put request of the key value. + pub fn of(key: impl Into>, value: impl Into>) -> Self { + let mut inner = pb::PutRequest::default(); + inner.set_key(key.into()); + inner.set_value(value.into()); + Self { inner } + } + + /// Enhance the put request, allow it to return the previous kv pair. + pub fn fetch_prev_kv(mut self) -> Self { + self.inner.prev_kv = true; + self + } +} + +impl From for pb::PutRequest { + fn from(value: Put) -> Self { + value.inner + } +} + +#[derive(Clone, Debug)] +pub struct Watch { + pub(crate) inner: pb::WatchRequest, +} + +impl Watch { + /// Create a watch request for a key. + pub fn of(key: impl Into>) -> Self { + let mut inner = pb::WatchRequest::default(); + inner.set_key(key.into()); + + Self { inner } + } + + /// Enhance the request to allow it watch keys with the same prefix. + pub fn prefixed(mut self) -> Self { + let mut next = codec::next_prefix_of(self.inner.key.clone()); + if next.is_empty() { + next = INF.to_vec(); + } + self.inner.set_range_end(next); + self + } + + /// Enhance the request to allow it watch keys until the range end. + pub fn range_to(mut self, to: impl Into>) -> Self { + self.inner.set_range_end(to.into()); + self + } + + /// Enhance the request to make it watch from a specified revision. + pub fn from_rev(mut self, rev: i64) -> Self { + self.inner.set_start_revision(rev); + self + } +} + +impl From for pb::WatchRequest { + fn from(value: Watch) -> Self { + value.inner + } +} + +/// The descriptor of source (caller) of the requests. +#[derive(Clone, Copy)] +pub enum Source { + LogBackup = 0, +} + +impl std::fmt::Display for Source { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Source::LogBackup => f.write_str("log_backup"), + } + } +} + +/// A wrapper over client which would fill the source field in the header for +/// all requests. +#[derive(Clone)] +pub struct Sourced { + inner: S, + source: Source, +} + +impl Sourced { + pub fn new(inner: S, source: Source) -> Self { + Self { inner, source } + } + + fn prepare_header(&self, h: &mut pb::RequestHeader) { + h.set_source(self.source.to_string()); + } +} + +impl MetaStorageClient for Sourced { + type WatchStream = S::WatchStream; + + fn get(&self, mut req: Get) -> PdFuture { + self.prepare_header(req.inner.mut_header()); + self.inner.get(req) + } + + fn put(&self, mut req: Put) -> PdFuture { + self.prepare_header(req.inner.mut_header()); + self.inner.put(req) + } + + fn watch(&self, mut req: Watch) -> Self::WatchStream { + self.prepare_header(req.inner.mut_header()); + self.inner.watch(req) + } +} + +/// A wrapper that makes every response and stream event get checked. +/// When there is an error in the header, this client would return a [`Err`] +/// variant directly. +#[derive(Clone)] +pub struct Checked(S); + +impl Checked { + pub fn new(client: S) -> Self { + Self(client) + } +} + +/// A wrapper that checks every event in the stream and returns an error +/// variant when there is error in the header. +pub struct CheckedStream(S); + +fn check_resp_header(header: &pb::ResponseHeader) -> Result<()> { + if header.has_error() { + match header.get_error().get_type() { + pb::ErrorType::Ok => Ok(()), + pb::ErrorType::Unknown => Err(Error::Other(box_err!( + "{}", + header.get_error().get_message() + ))), + pb::ErrorType::DataCompacted => Err(Error::DataCompacted( + header.get_error().get_message().to_owned(), + )), + }?; + } + Ok(()) +} + +impl>> Stream for CheckedStream { + type Item = Result; + + fn poll_next( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + // SAFETY: trivial projection. + let inner = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0) }; + let item = ready!(inner.poll_next(cx)); + item.map(|r| { + r.and_then(|resp| { + check_resp_header(resp.get_header())?; + Ok(resp) + }) + }) + .into() + } +} + +impl MetaStorageClient for Checked { + type WatchStream = CheckedStream; + + fn get(&self, req: Get) -> PdFuture { + self.0 + .get(req) + .map(|resp| { + resp.and_then(|r| { + check_resp_header(r.get_header())?; + Ok(r) + }) + }) + .boxed() + } + + fn put(&self, req: Put) -> PdFuture { + self.0 + .put(req) + .map(|resp| { + resp.and_then(|r| { + check_resp_header(r.get_header())?; + Ok(r) + }) + }) + .boxed() + } + + fn watch(&self, req: Watch) -> Self::WatchStream { + CheckedStream(self.0.watch(req)) + } +} + +impl MetaStorageClient for Arc { + type WatchStream = S::WatchStream; + + fn get(&self, req: Get) -> PdFuture { + Arc::as_ref(self).get(req) + } + + fn put(&self, req: Put) -> PdFuture { + Arc::as_ref(self).put(req) + } + + fn watch(&self, req: Watch) -> Self::WatchStream { + Arc::as_ref(self).watch(req) + } +} + +/// A client which is able to play with the `meta_storage` service. +pub trait MetaStorageClient: Send + Sync + 'static { + // Note: Perhaps we'd better make it generic over response here, however that + // would make `CheckedStream` impossible(How can we check ALL types? Or we may + // make traits like `MetaStorageResponse` and constraint over the T), thankfully + // there is only one streaming RPC in this service. + /// The stream that yielded by the watch RPC. + type WatchStream: Stream>; + + fn get(&self, req: Get) -> PdFuture; + fn put(&self, req: Put) -> PdFuture; + fn watch(&self, req: Watch) -> Self::WatchStream; +} diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index a4ef9c5ce4e..e1f1100444a 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -32,6 +32,10 @@ make_static_metric! { is_recovering_marked, store_heartbeat, tso, + + meta_storage_put, + meta_storage_get, + meta_storage_watch, } pub struct PDRequestEventHistogramVec: Histogram { diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index fd58cd921d8..f3a8451f321 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -22,6 +22,7 @@ use grpcio::{ Environment, Error::RpcFailure, MetadataBuilder, Result as GrpcResult, RpcStatusCode, }; use kvproto::{ + meta_storagepb::MetaStorageClient as MetaStorageStub, metapb::BucketStats, pdpb::{ ErrorType, GetMembersRequest, GetMembersResponse, Member, PdClient as PdClientStub, @@ -104,6 +105,7 @@ pub struct Inner { pub pending_heartbeat: Arc, pub pending_buckets: Arc, pub tso: TimestampOracle, + pub meta_storage: MetaStorageStub, last_try_reconnect: Instant, } @@ -181,6 +183,8 @@ impl Client { let (buckets_tx, buckets_resp) = client_stub .report_buckets_opt(target.call_option()) .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "report_buckets", e)); + let meta_storage = + kvproto::meta_storagepb::MetaStorageClient::new(client_stub.client.channel().clone()); Client { timer: GLOBAL_TIMER_HANDLE.clone(), inner: RwLock::new(Inner { @@ -198,6 +202,7 @@ impl Client { pending_buckets: Arc::default(), last_try_reconnect: Instant::now(), tso, + meta_storage, }), feature_gate: FeatureGate::default(), enable_forwarding, @@ -238,6 +243,7 @@ impl Client { inner.buckets_sender = Either::Left(Some(buckets_tx)); inner.buckets_resp = Some(buckets_resp); + inner.meta_storage = MetaStorageStub::new(client_stub.client.channel().clone()); inner.client_stub = client_stub; inner.members = members; inner.tso = tso; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index e37c6f9fe3b..f721097a514 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -29,9 +29,7 @@ use std::{ use api_version::{dispatch_api_version, KvFormat}; use backup_stream::{ - config::BackupStreamConfigManager, - metadata::{ConnectionConfig, LazyEtcdClient}, - observer::BackupStreamObserver, + config::BackupStreamConfigManager, metadata::store::PdStore, observer::BackupStreamObserver, }; use causal_ts::CausalTsProviderImpl; use cdc::{CdcConfigManager, MemoryQuota}; @@ -62,7 +60,10 @@ use kvproto::{ kvrpcpb::ApiVersion, logbackuppb::create_log_backup, recoverdatapb::create_recover_data, resource_usage_agent::create_resource_metering_pub_sub, }; -use pd_client::{PdClient, RpcClient}; +use pd_client::{ + meta_storage::{Checked, Sourced}, + PdClient, RpcClient, +}; use raft_log_engine::RaftLogEngine; use raftstore::{ coprocessor::{ @@ -1040,17 +1041,12 @@ where Box::new(BackupStreamConfigManager(backup_stream_worker.scheduler())), ); - let etcd_cli = LazyEtcdClient::new( - self.config.pd.endpoints.as_slice(), - ConnectionConfig { - keep_alive_interval: self.config.server.grpc_keepalive_time.0, - keep_alive_timeout: self.config.server.grpc_keepalive_timeout.0, - tls: Arc::clone(&self.security_mgr), - }, - ); let backup_stream_endpoint = backup_stream::Endpoint::new( node.id(), - etcd_cli, + PdStore::new(Checked::new(Sourced::new( + Arc::clone(&self.pd_client), + pd_client::meta_storage::Source::LogBackup, + ))), self.config.backup_stream.clone(), backup_stream_scheduler.clone(), backup_stream_ob, diff --git a/components/test_pd/src/mocker/meta_storage.rs b/components/test_pd/src/mocker/meta_storage.rs new file mode 100644 index 00000000000..311c3884722 --- /dev/null +++ b/components/test_pd/src/mocker/meta_storage.rs @@ -0,0 +1,113 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::{Arc, Mutex}; + +use futures::{executor::block_on, SinkExt, StreamExt}; +use grpcio::{RpcStatus, RpcStatusCode}; +use kvproto::meta_storagepb as mpb; + +use super::etcd::{Etcd, KeyValue, Keys, KvEventType, MetaKey}; +use crate::PdMocker; + +#[derive(Default)] +pub struct MetaStorage { + store: Arc>, +} + +fn convert_kv(from: KeyValue) -> mpb::KeyValue { + let mut kv = mpb::KeyValue::default(); + kv.set_key(from.0.0); + kv.set_value(from.1); + kv +} + +fn check_header(h: &mpb::RequestHeader) -> super::Result<()> { + if h.get_source().is_empty() { + return Err(format!("Please provide header.source; req = {:?}", h)); + } + Ok(()) +} + +fn header_of_revision(r: i64) -> mpb::ResponseHeader { + let mut h = mpb::ResponseHeader::default(); + h.set_revision(r); + h +} + +impl PdMocker for MetaStorage { + fn meta_store_get(&self, req: mpb::GetRequest) -> Option> { + if let Err(err) = check_header(req.get_header()) { + return Some(Err(err)); + } + + let store = self.store.lock().unwrap(); + let key = if req.get_range_end().is_empty() { + Keys::Key(MetaKey(req.get_key().to_vec())) + } else { + Keys::Range( + MetaKey(req.get_key().to_vec()), + MetaKey(req.get_range_end().to_vec()), + ) + }; + let (items, rev) = store.get_key(key); + let mut resp = mpb::GetResponse::new(); + resp.set_kvs(items.into_iter().map(convert_kv).collect()); + resp.set_header(header_of_revision(rev)); + Some(Ok(resp)) + } + + fn meta_store_put(&self, mut req: mpb::PutRequest) -> Option> { + if let Err(err) = check_header(req.get_header()) { + return Some(Err(err)); + } + + let mut store = self.store.lock().unwrap(); + block_on(store.set(KeyValue(MetaKey(req.take_key()), req.take_value()))).unwrap(); + Some(Ok(Default::default())) + } + + fn meta_store_watch( + &self, + req: mpb::WatchRequest, + mut sink: grpcio::ServerStreamingSink, + ctx: &grpcio::RpcContext<'_>, + ) -> bool { + if let Err(err) = check_header(req.get_header()) { + ctx.spawn(async move { + sink.fail(RpcStatus::with_message( + RpcStatusCode::INVALID_ARGUMENT, + err, + )) + .await + .unwrap() + }); + return true; + } + + let mut store = self.store.lock().unwrap(); + let key = if req.get_range_end().is_empty() { + Keys::Key(MetaKey(req.get_key().to_vec())) + } else { + Keys::Range( + MetaKey(req.get_key().to_vec()), + MetaKey(req.get_range_end().to_vec()), + ) + }; + let mut watcher = + block_on(store.watch(key, req.get_start_revision())).expect("should be infallible"); + ctx.spawn(async move { + while let Some(x) = watcher.next().await { + let mut event = mpb::Event::new(); + event.set_kv(convert_kv(x.pair)); + event.set_type(match x.kind { + KvEventType::Put => mpb::EventEventType::Put, + KvEventType::Delete => mpb::EventEventType::Delete, + }); + let mut resp = mpb::WatchResponse::default(); + resp.set_events(vec![event].into()); + sink.send((resp, Default::default())).await.unwrap(); + } + }); + true + } +} diff --git a/components/test_pd/src/mocker/mod.rs b/components/test_pd/src/mocker/mod.rs index d8282ca3df0..f4b6dafb6b6 100644 --- a/components/test_pd/src/mocker/mod.rs +++ b/components/test_pd/src/mocker/mod.rs @@ -3,12 +3,13 @@ use std::result; use futures::executor::block_on; -use kvproto::pdpb::*; +use kvproto::{meta_storagepb as mpb, pdpb::*}; mod bootstrap; pub mod etcd; mod incompatible; mod leader_change; +mod meta_storage; mod retry; mod service; mod split; @@ -18,6 +19,7 @@ pub use self::{ bootstrap::AlreadyBootstrapped, incompatible::Incompatible, leader_change::LeaderChange, + meta_storage::MetaStorage, retry::{NotRetry, Retry}, service::Service, split::Split, @@ -28,6 +30,23 @@ pub const DEFAULT_CLUSTER_ID: u64 = 42; pub type Result = result::Result; pub trait PdMocker { + fn meta_store_get(&self, _req: mpb::GetRequest) -> Option> { + None + } + + fn meta_store_put(&self, _req: mpb::PutRequest) -> Option> { + None + } + + fn meta_store_watch( + &self, + _req: mpb::WatchRequest, + _sink: grpcio::ServerStreamingSink, + _ctx: &grpcio::RpcContext<'_>, + ) -> bool { + false + } + fn load_global_config( &self, _req: &LoadGlobalConfigRequest, diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index b1909485ac8..1662e27f00f 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -16,7 +16,10 @@ use grpcio::{ ClientStreamingSink, DuplexSink, EnvBuilder, RequestStream, RpcContext, RpcStatus, RpcStatusCode, Server as GrpcServer, ServerBuilder, ServerStreamingSink, UnarySink, WriteFlags, }; -use kvproto::pdpb::*; +use kvproto::{ + meta_storagepb_grpc::{create_meta_storage, MetaStorage}, + pdpb::*, +}; use pd_client::Error as PdError; use security::*; @@ -70,14 +73,17 @@ impl Server { } pub fn start(&mut self, mgr: &SecurityManager, eps: Vec<(String, u16)>) { - let service = create_pd(self.mocker.clone()); + let pd = create_pd(self.mocker.clone()); + let meta_store = create_meta_storage(self.mocker.clone()); let env = Arc::new( EnvBuilder::new() .cq_count(1) .name_prefix(thd_name!("mock-server")) .build(), ); - let mut sb = ServerBuilder::new(env).register_service(service); + let mut sb = ServerBuilder::new(env) + .register_service(pd) + .register_service(meta_store); for (host, port) in eps { sb = mgr.bind(sb, &host, port); } @@ -187,6 +193,40 @@ impl Clone for PdMock { } } +impl MetaStorage for PdMock { + fn watch( + &mut self, + ctx: grpcio::RpcContext<'_>, + req: kvproto::meta_storagepb::WatchRequest, + sink: grpcio::ServerStreamingSink, + ) { + match &self.case { + Some(x) => { + x.meta_store_watch(req, sink, &ctx); + } + None => grpcio::unimplemented_call!(ctx, sink), + } + } + + fn get( + &mut self, + ctx: grpcio::RpcContext<'_>, + req: kvproto::meta_storagepb::GetRequest, + sink: grpcio::UnarySink, + ) { + hijack_unary(self, ctx, sink, |m| m.meta_store_get(req.clone())) + } + + fn put( + &mut self, + ctx: grpcio::RpcContext<'_>, + req: kvproto::meta_storagepb::PutRequest, + sink: grpcio::UnarySink, + ) { + hijack_unary(self, ctx, sink, |m| m.meta_store_put(req.clone())) + } +} + impl Pd for PdMock { fn load_global_config( &mut self, diff --git a/components/tikv_util/src/codec/mod.rs b/components/tikv_util/src/codec/mod.rs index fa0ec4d7d16..0e1e7aa6fdb 100644 --- a/components/tikv_util/src/codec/mod.rs +++ b/components/tikv_util/src/codec/mod.rs @@ -22,6 +22,34 @@ pub fn read_slice<'a>(data: &mut BytesSlice<'a>, size: usize) -> Result) -> Vec { + let mut next_prefix = key; + for i in (0..next_prefix.len()).rev() { + if next_prefix[i] == u8::MAX { + next_prefix.pop(); + } else { + next_prefix[i] += 1; + break; + } + } + // By definition, the empty key means infinity. + // When we have meet keys like [0xff], return empty slice here is expected. + next_prefix +} + #[derive(Debug, Error)] pub enum Error { #[error("{0}")] From 6f0f814b3981185105014405a2102bd849b0af06 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Wed, 29 Mar 2023 15:38:54 +0800 Subject: [PATCH 0609/1149] pitr: support modifying the config tikv.log-backup.max-flush-interval online. (#14425) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#14433 Signed-off-by: joccau Signed-off-by: Zak Zhao <57036248+joccau@users.noreply.github.com> Co-authored-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> --- components/backup-stream/src/config.rs | 38 +++++++--- components/backup-stream/src/endpoint.rs | 16 +++- components/backup-stream/src/router.rs | 94 +++++++++++++++++++----- components/server/src/server.rs | 9 ++- src/config/mod.rs | 21 +++--- tests/integrations/config/mod.rs | 2 +- 6 files changed, 131 insertions(+), 49 deletions(-) diff --git a/components/backup-stream/src/config.rs b/components/backup-stream/src/config.rs index dfee838c333..03afa47dd97 100644 --- a/components/backup-stream/src/config.rs +++ b/components/backup-stream/src/config.rs @@ -1,26 +1,40 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use online_config::{ConfigChange, ConfigManager}; -use tikv_util::worker::Scheduler; +use std::sync::{Arc, RwLock}; + +use online_config::{ConfigChange, ConfigManager, OnlineConfig}; +use tikv::config::BackupStreamConfig; +use tikv_util::{info, worker::Scheduler}; use crate::endpoint::Task; -pub struct BackupStreamConfigManager(pub Scheduler); +#[derive(Clone)] +pub struct BackupStreamConfigManager { + pub scheduler: Scheduler, + pub config: Arc>, +} + +impl BackupStreamConfigManager { + pub fn new(scheduler: Scheduler, cfg: BackupStreamConfig) -> Self { + let config = Arc::new(RwLock::new(cfg)); + Self { scheduler, config } + } +} impl ConfigManager for BackupStreamConfigManager { fn dispatch( &mut self, change: ConfigChange, ) -> std::result::Result<(), Box> { - self.0.schedule(Task::ChangeConfig(change))?; - Ok(()) - } -} + info!( + "log backup config changed"; + "change" => ?change, + ); + let mut cfg = self.config.as_ref().write().unwrap(); + cfg.update(change)?; + cfg.validate()?; -impl std::ops::Deref for BackupStreamConfigManager { - type Target = Scheduler; - - fn deref(&self) -> &Self::Target { - &self.0 + self.scheduler.schedule(Task::ChangeConfig(cfg.clone()))?; + Ok(()) } } diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 68f040217ea..c8302f6dd9e 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -13,7 +13,6 @@ use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, }; -use online_config::ConfigChange; use pd_client::PdClient; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, RegionInfoProvider}, @@ -877,6 +876,15 @@ where } } + fn on_update_change_config(&mut self, cfg: BackupStreamConfig) { + info!( + "update log backup config"; + "config" => ?cfg, + ); + self.range_router.udpate_config(&cfg); + self.config = cfg; + } + /// Modify observe over some region. /// This would register the region to the RaftStore. pub fn on_modify_observe(&self, op: ObserveOp) { @@ -898,8 +906,8 @@ where Task::ModifyObserve(op) => self.on_modify_observe(op), Task::ForceFlush(task) => self.on_force_flush(task), Task::FatalError(task, err) => self.on_fatal_error(task, err), - Task::ChangeConfig(_) => { - warn!("change config online isn't supported for now.") + Task::ChangeConfig(cfg) => { + self.on_update_change_config(cfg); } Task::Sync(cb, mut cond) => { if cond(&self.range_router) { @@ -1081,7 +1089,7 @@ impl fmt::Debug for RegionCheckpointOperation { pub enum Task { WatchTask(TaskOp), BatchEvent(Vec), - ChangeConfig(ConfigChange), + ChangeConfig(BackupStreamConfig), /// Change the observe status of some region. ModifyObserve(ObserveOp), /// Convert status of some task into `flushing` and do flush then. diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 5b862f732a2..4b1022e7b39 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -31,6 +31,7 @@ use protobuf::Message; use raftstore::coprocessor::CmdBatch; use slog_global::debug; use tidb_query_datatype::codec::table::decode_table_id; +use tikv::config::BackupStreamConfig; use tikv_util::{ box_err, codec::stream_event::EventEncoder, @@ -341,9 +342,9 @@ pub struct RouterInner { /// too many temporary files. scheduler: Scheduler, /// The size limit of temporary file per task. - temp_file_size_limit: u64, + temp_file_size_limit: AtomicU64, /// The max duration the local data can be pending. - max_flush_interval: Duration, + max_flush_interval: SyncRwLock, } impl std::fmt::Debug for RouterInner { @@ -368,11 +369,17 @@ impl RouterInner { tasks: Mutex::new(HashMap::default()), prefix, scheduler, - temp_file_size_limit, - max_flush_interval, + temp_file_size_limit: AtomicU64::new(temp_file_size_limit), + max_flush_interval: SyncRwLock::new(max_flush_interval), } } + pub fn udpate_config(&self, config: &BackupStreamConfig) { + *self.max_flush_interval.write().unwrap() = config.max_flush_interval.0; + self.temp_file_size_limit + .store(config.file_size_limit.0, Ordering::SeqCst); + } + /// Find the task for a region. If `end_key` is empty, search from start_key /// to +inf. It simply search for a random possible overlapping range and /// get its task. @@ -430,7 +437,6 @@ impl RouterInner { let stream_task = StreamTaskInfo::new( prefix_path, task, - self.max_flush_interval, ranges.clone(), merged_file_size_limit, compression_type, @@ -507,6 +513,7 @@ impl RouterInner { async fn on_event(&self, task: String, events: ApplyEvents) -> Result<()> { let task_info = self.get_task_info(&task).await?; task_info.on_events(events).await?; + let file_size_limit = self.temp_file_size_limit.load(Ordering::SeqCst); // When this event make the size of temporary files exceeds the size limit, make // a flush. Note that we only flush if the size is less than the limit before @@ -515,10 +522,10 @@ impl RouterInner { "backup stream statics size"; "task" => ?task, "next_size" => task_info.total_size(), - "size_limit" => self.temp_file_size_limit, + "size_limit" => file_size_limit, ); let cur_size = task_info.total_size(); - if cur_size > self.temp_file_size_limit && !task_info.is_flushing() { + if cur_size > file_size_limit && !task_info.is_flushing() { info!("try flushing task"; "task" => %task, "size" => %cur_size); if task_info.set_flushing_status_cas(false, true).is_ok() { if let Err(e) = self.scheduler.schedule(Task::Flush(task)) { @@ -592,6 +599,8 @@ impl RouterInner { /// tick aims to flush log/meta to extern storage periodically. pub async fn tick(&self) { + let max_flush_interval = self.max_flush_interval.rl().to_owned(); + for (name, task_info) in self.tasks.lock().await.iter() { if let Err(e) = self .scheduler @@ -602,7 +611,9 @@ impl RouterInner { // if stream task need flush this time, schedule Task::Flush, or update time // justly. - if task_info.should_flush() && task_info.set_flushing_status_cas(false, true).is_ok() { + if task_info.should_flush(&max_flush_interval) + && task_info.set_flushing_status_cas(false, true).is_ok() + { info!( "backup stream trigger flush task by tick"; "task" => ?task_info, @@ -763,8 +774,6 @@ pub struct StreamTaskInfo { flushing_meta_files: RwLock>, /// last_flush_ts represents last time this task flushed to storage. last_flush_time: AtomicPtr, - /// flush_interval represents the tick interval of flush, setting by users. - flush_interval: Duration, /// The min resolved TS of all regions involved. min_resolved_ts: TimeStamp, /// Total size of all temporary files in byte. @@ -825,7 +834,6 @@ impl StreamTaskInfo { pub async fn new( temp_dir: PathBuf, task: StreamTask, - flush_interval: Duration, ranges: Vec<(Vec, Vec)>, merged_file_size_limit: u64, compression_type: CompressionType, @@ -846,7 +854,6 @@ impl StreamTaskInfo { flushing_files: RwLock::default(), flushing_meta_files: RwLock::default(), last_flush_time: AtomicPtr::new(Box::into_raw(Box::new(Instant::now()))), - flush_interval, total_size: AtomicUsize::new(0), flushing: AtomicBool::new(false), flush_fail_count: AtomicUsize::new(0), @@ -946,12 +953,11 @@ impl StreamTaskInfo { unsafe { Box::from_raw(ptr) }; } - pub fn should_flush(&self) -> bool { + pub fn should_flush(&self, flush_interval: &Duration) -> bool { // When it doesn't flush since 0.8x of auto-flush interval, we get ready to // start flushing. So that we will get a buffer for the cost of actual // flushing. - self.get_last_flush_time().saturating_elapsed_secs() - >= self.flush_interval.as_secs_f64() * 0.8 + self.get_last_flush_time().saturating_elapsed_secs() >= flush_interval.as_secs_f64() * 0.8 } pub fn is_flushing(&self) -> bool { @@ -1511,15 +1517,17 @@ mod tests { use external_storage::{ExternalData, NoopStorage}; use futures::AsyncReadExt; use kvproto::brpb::{Local, Noop, StorageBackend, StreamBackupTaskInfo}; + use online_config::{ConfigManager, OnlineConfig}; use tikv_util::{ codec::number::NumberEncoder, + config::ReadableDuration, worker::{dummy_scheduler, ReceiverWrapper}, }; use tokio::fs::File; use txn_types::{Write, WriteType}; use super::*; - use crate::utils; + use crate::{config::BackupStreamConfigManager, utils}; #[derive(Debug)] struct KvEventsBuilder { @@ -1835,7 +1843,6 @@ mod tests { let task = StreamTaskInfo::new( tmp_dir.path().to_path_buf(), stream_task, - Duration::from_secs(300), vec![(vec![], vec![])], merged_file_size_limit, CompressionType::Zstd, @@ -2194,7 +2201,6 @@ mod tests { let task = StreamTaskInfo::new( tmp_dir.path().to_path_buf(), stream_task, - Duration::from_secs(300), vec![(vec![], vec![])], 0x100000, CompressionType::Zstd, @@ -2308,4 +2314,56 @@ mod tests { assert_eq!(result.is_ok(), true); Ok(()) } + + #[test] + fn test_update_config() { + let (sched, rx) = dummy_scheduler(); + let cfg = BackupStreamConfig::default(); + let router = Arc::new(RouterInner::new( + PathBuf::new(), + sched.clone(), + 1, + cfg.max_flush_interval.0, + )); + + let mut cfg_manager = BackupStreamConfigManager::new(sched, cfg.clone()); + + let _new_cfg = BackupStreamConfig { + max_flush_interval: ReadableDuration::minutes(2), + ..Default::default() + }; + + let changed = cfg.diff(&_new_cfg); + cfg_manager.dispatch(changed).unwrap(); + + let cmds = collect_recv(rx); + assert_eq!(cmds.len(), 1); + match &cmds[0] { + Task::ChangeConfig(cfg) => { + assert!(matches!(cfg, _new_cfg)); + router.udpate_config(cfg); + assert_eq!( + router.max_flush_interval.rl().to_owned(), + _new_cfg.max_flush_interval.0 + ); + } + _ => panic!("unexpected cmd!"), + } + } + + #[test] + fn test_udpate_invalid_config() { + let cfg = BackupStreamConfig::default(); + let (sched, _) = dummy_scheduler(); + let mut cfg_manager = BackupStreamConfigManager::new(sched, cfg.clone()); + + let new_cfg = BackupStreamConfig { + max_flush_interval: ReadableDuration::secs(0), + ..Default::default() + }; + + let changed = cfg.diff(&new_cfg); + let r = cfg_manager.dispatch(changed); + assert!(r.is_err()); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index f721097a514..06df19da1d6 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1027,7 +1027,7 @@ where ); // Start backup stream - let backup_stream_scheduler = if self.config.backup_stream.enable { + let backup_stream_scheduler = if self.config.log_backup.enable { // Create backup stream. let mut backup_stream_worker = Box::new(LazyWorker::new("backup-stream")); let backup_stream_scheduler = backup_stream_worker.scheduler(); @@ -1038,7 +1038,10 @@ where // Register config manager. cfg_controller.register( tikv::config::Module::BackupStream, - Box::new(BackupStreamConfigManager(backup_stream_worker.scheduler())), + Box::new(BackupStreamConfigManager::new( + backup_stream_worker.scheduler(), + self.config.log_backup.clone(), + )), ); let backup_stream_endpoint = backup_stream::Endpoint::new( @@ -1047,7 +1050,7 @@ where Arc::clone(&self.pd_client), pd_client::meta_storage::Source::LogBackup, ))), - self.config.backup_stream.clone(), + self.config.log_backup.clone(), backup_stream_scheduler.clone(), backup_stream_ob, self.region_info_accessor.clone(), diff --git a/src/config/mod.rs b/src/config/mod.rs index 3eb15ba8ace..f8bbd1be9f5 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2662,7 +2662,7 @@ impl Default for BackupConfig { pub struct BackupStreamConfig { #[online_config(skip)] pub min_ts_interval: ReadableDuration, - #[online_config(skip)] + pub max_flush_interval: ReadableDuration, #[online_config(skip)] pub num_threads: usize, @@ -2670,7 +2670,7 @@ pub struct BackupStreamConfig { pub enable: bool, #[online_config(skip)] pub temp_path: String, - #[online_config(skip)] + pub file_size_limit: ReadableSize, #[online_config(skip)] pub initial_scan_pending_memory_quota: ReadableSize, @@ -3136,8 +3136,7 @@ pub struct TikvConfig { #[online_config(submodule)] // The term "log backup" and "backup stream" are identity. // The "log backup" should be the only product name exposed to the user. - #[serde(rename = "log-backup")] - pub backup_stream: BackupStreamConfig, + pub log_backup: BackupStreamConfig, #[online_config(submodule)] pub pessimistic_txn: PessimisticTxnConfig, @@ -3202,7 +3201,7 @@ impl Default for TikvConfig { cdc: CdcConfig::default(), resolved_ts: ResolvedTsConfig::default(), resource_metering: ResourceMeteringConfig::default(), - backup_stream: BackupStreamConfig::default(), + log_backup: BackupStreamConfig::default(), causal_ts: CausalTsConfig::default(), resource_control: ResourceControlConfig::default(), } @@ -3333,8 +3332,8 @@ impl TikvConfig { ); } - if self.backup_stream.temp_path.is_empty() { - self.backup_stream.temp_path = + if self.log_backup.temp_path.is_empty() { + self.log_backup.temp_path = config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; } @@ -3360,7 +3359,7 @@ impl TikvConfig { .validate(self.storage.engine == EngineType::RaftKv2)?; self.import.validate()?; self.backup.validate()?; - self.backup_stream.validate()?; + self.log_backup.validate()?; self.cdc.validate()?; self.pessimistic_txn.validate()?; self.gc.validate()?; @@ -4149,7 +4148,7 @@ impl From<&str> for Module { "security" => Module::Security, "import" => Module::Import, "backup" => Module::Backup, - "backup_stream" => Module::BackupStream, + "log_backup" => Module::BackupStream, "pessimistic_txn" => Module::PessimisticTxn, "gc" => Module::Gc, "cdc" => Module::Cdc, @@ -5645,7 +5644,7 @@ mod tests { cfg.raftdb.max_sub_compactions = default_cfg.raftdb.max_sub_compactions; cfg.raftdb.titan.max_background_gc = default_cfg.raftdb.titan.max_background_gc; cfg.backup.num_threads = default_cfg.backup.num_threads; - cfg.backup_stream.num_threads = default_cfg.backup_stream.num_threads; + cfg.log_backup.num_threads = default_cfg.log_backup.num_threads; // There is another set of config values that we can't directly compare: // When the default values are `None`, but are then resolved to `Some(_)` later @@ -5835,7 +5834,7 @@ mod tests { ("security", Module::Security), ("import", Module::Import), ("backup", Module::Backup), - ("backup_stream", Module::BackupStream), + ("log_backup", Module::BackupStream), ("pessimistic_txn", Module::PessimisticTxn), ("gc", Module::Gc), ("cdc", Module::Cdc), diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 02b5c711e96..7d40cde87d5 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -774,7 +774,7 @@ fn test_serde_custom_tikv_config() { }, ..Default::default() }; - value.backup_stream = BackupStreamConfig { + value.log_backup = BackupStreamConfig { max_flush_interval: ReadableDuration::secs(11), num_threads: 7, enable: true, From d269912fa7ede76f66ca80830701d9fc260bb5c4 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 29 Mar 2023 17:38:54 +0800 Subject: [PATCH 0610/1149] txn: record the latch wait, flow control throttle, quota delay and scheduler process duration (#14476) ref tikv/tikv#12362 Signed-off-by: cfzjywxk Co-authored-by: Ti Chi Robot --- components/tracker/src/lib.rs | 6 ++ src/storage/txn/scheduler.rs | 95 ++++++++++++++++++++----- tests/integrations/server/kv_service.rs | 1 + 3 files changed, 86 insertions(+), 16 deletions(-) diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 35ae0fc15f2..fafd8415039 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -43,6 +43,9 @@ impl Tracker { } pub fn write_write_detail(&self, detail: &mut pb::WriteDetail) { + detail.set_latch_wait_nanos(self.metrics.latch_wait_nanos); + detail.set_process_nanos(self.metrics.scheduler_process_nanos); + detail.set_throttle_nanos(self.metrics.scheduler_throttle_nanos); detail.set_pessimistic_lock_wait_nanos(self.metrics.pessimistic_lock_wait_nanos); detail.set_store_batch_wait_nanos(self.metrics.wf_batch_wait_nanos); detail.set_propose_send_wait_nanos( @@ -132,6 +135,9 @@ pub struct RequestMetrics { pub internal_key_skipped_count: u64, pub deleted_key_skipped_count: u64, pub pessimistic_lock_wait_nanos: u64, + pub latch_wait_nanos: u64, + pub scheduler_process_nanos: u64, + pub scheduler_throttle_nanos: u64, // temp instant used in raftstore metrics, first be the instant when creating the write // callback, then reset when it is ready to apply pub write_instant: Option, diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 17110a07e7b..85c41124b89 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -52,7 +52,7 @@ use resource_metering::{FutureExt, ResourceTagFactory}; use smallvec::{smallvec, SmallVec}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData, WriteEvent}; use tikv_util::{quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE}; -use tracker::{get_tls_tracker_token, set_tls_tracker_token, TrackerToken}; +use tracker::{get_tls_tracker_token, set_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS}; use txn_types::TimeStamp; use crate::{ @@ -188,9 +188,15 @@ impl TaskContext { } fn on_schedule(&mut self) { + let elapsed = self.latch_timer.saturating_elapsed(); + if let Some(task) = &self.task.as_ref() { + GLOBAL_TRACKERS.with_tracker(task.tracker, |tracker| { + tracker.metrics.latch_wait_nanos = elapsed.as_nanos() as u64; + }); + } SCHED_LATCH_HISTOGRAM_VEC .get(self.tag) - .observe(self.latch_timer.saturating_elapsed_secs()); + .observe(elapsed.as_secs_f64()); } // Try to own this TaskContext by setting `owned` from false to true. @@ -779,6 +785,7 @@ impl TxnScheduler { new_acquired_locks: Vec, tag: CommandKind, group_name: &str, + sched_details: &SchedulerDetails, ) { // TODO: Does async apply prewrite worth a special metric here? if pipelined { @@ -820,6 +827,15 @@ impl TxnScheduler { SCHED_STAGE_COUNTER_VEC.get(tag).next_cmd.inc(); self.schedule_command(None, cmd, cb, None); } else { + GLOBAL_TRACKERS.with_tracker(sched_details.tracker, |tracker| { + tracker.metrics.scheduler_process_nanos = sched_details + .start_process_instant + .saturating_elapsed() + .as_nanos() + as u64; + tracker.metrics.scheduler_throttle_nanos = + sched_details.flow_control_nanos + sched_details.quota_limit_delay_nanos; + }); cb.execute(pr); } } else { @@ -1073,7 +1089,7 @@ impl TxnScheduler { let region_id = task.cmd.ctx().get_region_id(); let ts = task.cmd.ts(); - let mut statistics = Statistics::default(); + let mut sched_details = SchedulerDetails::new(task.tracker, timer); match &task.cmd { Command::Prewrite(_) | Command::PrewritePessimistic(_) => { tls_collect_query(region_id, QueryKind::Prewrite); @@ -1092,18 +1108,19 @@ impl TxnScheduler { fail_point!("scheduler_process"); if task.cmd.readonly() { - self.process_read(snapshot, task, &mut statistics); + self.process_read(snapshot, task, &mut sched_details); } else { - self.process_write(snapshot, task, &mut statistics).await; + self.process_write(snapshot, task, &mut sched_details).await; }; - tls_collect_scan_details(tag.get_str(), &statistics); + tls_collect_scan_details(tag.get_str(), &sched_details.stat); let elapsed = timer.saturating_elapsed(); slow_log!( elapsed, - "[region {}] scheduler handle command: {}, ts: {}", + "[region {}] scheduler handle command: {}, ts: {}, details: {:?}", region_id, tag, - ts + ts, + sched_details, ); } .in_resource_metering_tag(resource_tag) @@ -1112,7 +1129,7 @@ impl TxnScheduler { /// Processes a read command within a worker thread, then posts /// `ReadFinished` message back to the `TxnScheduler`. - fn process_read(self, snapshot: E::Snap, task: Task, statistics: &mut Statistics) { + fn process_read(self, snapshot: E::Snap, task: Task, sched_details: &mut SchedulerDetails) { fail_point!("txn_before_process_read"); debug!("process read cmd in worker pool"; "cid" => task.cid); @@ -1122,7 +1139,7 @@ impl TxnScheduler { let cmd = task.cmd; let pr = unsafe { with_perf_context::(tag, || { - cmd.process_read(snapshot, statistics) + cmd.process_read(snapshot, &mut sched_details.stat) .unwrap_or_else(|e| ProcessResult::Failed { err: e.into() }) }) }; @@ -1135,7 +1152,12 @@ impl TxnScheduler { /// Processes a write command within a worker thread, then posts either a /// `WriteFinished` message if successful or a `FinishedWithErr` message /// back to the `TxnScheduler`. - async fn process_write(self, snapshot: E::Snap, task: Task, statistics: &mut Statistics) { + async fn process_write( + self, + snapshot: E::Snap, + task: Task, + sched_details: &mut SchedulerDetails, + ) { fail_point!("txn_before_process_write"); let write_bytes = task.cmd.write_bytes(); let tag = task.cmd.tag(); @@ -1174,7 +1196,7 @@ impl TxnScheduler { lock_mgr: &self.inner.lock_mgr, concurrency_manager, extra_op: task.extra_op, - statistics, + statistics: &mut sched_details.stat, async_apply_prewrite: self.inner.enable_async_apply_prewrite, raw_ext, }; @@ -1192,17 +1214,32 @@ impl TxnScheduler { res }; + let process_end = Instant::now(); if write_result.is_ok() { // TODO: write bytes can be a bit inaccurate due to error requests or in-memory // pessimistic locks. sample.add_write_bytes(write_bytes); } - let read_bytes = statistics.cf_statistics(CF_DEFAULT).flow_stats.read_bytes - + statistics.cf_statistics(CF_LOCK).flow_stats.read_bytes - + statistics.cf_statistics(CF_WRITE).flow_stats.read_bytes; + let read_bytes = sched_details + .stat + .cf_statistics(CF_DEFAULT) + .flow_stats + .read_bytes + + sched_details + .stat + .cf_statistics(CF_LOCK) + .flow_stats + .read_bytes + + sched_details + .stat + .cf_statistics(CF_WRITE) + .flow_stats + .read_bytes; sample.add_read_bytes(read_bytes); let quota_delay = quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { + let actual_quota_delay = process_end.saturating_elapsed(); + sched_details.quota_limit_delay_nanos = actual_quota_delay.as_nanos() as u64; TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(tag) .inc_by(quota_delay.as_micros() as u64); @@ -1298,6 +1335,7 @@ impl TxnScheduler { new_acquired_locks, tag, &group_name, + sched_details, ); return; } @@ -1329,6 +1367,7 @@ impl TxnScheduler { new_acquired_locks, tag, &group_name, + sched_details, ); return; } @@ -1383,7 +1422,9 @@ impl TxnScheduler { .await .unwrap(); } - SCHED_THROTTLE_TIME.observe(start.saturating_elapsed_secs()); + let elapsed = start.saturating_elapsed(); + SCHED_THROTTLE_TIME.observe(elapsed.as_secs_f64()); + sched_details.flow_control_nanos = elapsed.as_nanos() as u64; } } @@ -1516,6 +1557,7 @@ impl TxnScheduler { new_acquired_locks, tag, &group_name, + sched_details, ); KV_COMMAND_KEYWRITE_HISTOGRAM_VEC .get(tag) @@ -1788,6 +1830,27 @@ enum PessimisticLockMode { InMemory, } +#[derive(Debug)] +struct SchedulerDetails { + tracker: TrackerToken, + stat: Statistics, + start_process_instant: Instant, + quota_limit_delay_nanos: u64, + flow_control_nanos: u64, +} + +impl SchedulerDetails { + fn new(tracker: TrackerToken, start_process_instant: Instant) -> Self { + SchedulerDetails { + tracker, + stat: Default::default(), + start_process_instant, + quota_limit_delay_nanos: 0, + flow_control_nanos: 0, + } + } +} + #[cfg(test)] mod tests { use std::thread; diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 284a3f1cb89..5e47ad4745b 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2430,6 +2430,7 @@ fn test_commands_write_detail() { // assert!(wd.get_apply_mutex_lock_nanos() > 0); assert!(wd.get_apply_write_wal_nanos() > 0); assert!(wd.get_apply_write_memtable_nanos() > 0); + assert!(wd.get_process_nanos() > 0); }; let mut mutation = Mutation::default(); From 7e6dac46bfdca76b4b40c55845c4250e163b6405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 30 Mar 2023 14:36:55 +0800 Subject: [PATCH 0611/1149] sst_import: make apply asynchronous (#14363) ref tikv/tikv#13848 Signed-off-by: hillium Signed-off-by: Yu Juncen --- components/backup-stream/src/utils.rs | 18 +- components/external_storage/src/lib.rs | 2 +- components/sst_importer/src/metrics.rs | 17 +- components/sst_importer/src/sst_importer.rs | 430 ++++++------------- components/tikv_kv/src/lib.rs | 2 +- src/import/mod.rs | 1 + src/import/raft_writer.rs | 451 ++++++++++++++++++++ src/import/sst_service.rs | 128 +++--- 8 files changed, 695 insertions(+), 354 deletions(-) create mode 100644 src/import/raft_writer.rs diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 77c689da70d..d94ba59b2d5 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -445,15 +445,6 @@ pub struct CallbackWaitGroup { on_finish_all: std::sync::Mutex>>, } -/// A shortcut for making an opaque future type for return type or argument -/// type, which is sendable and not borrowing any variables. -/// -/// `fut![T]` == `impl Future + Send + 'static` -#[macro_export(crate)] -macro_rules! future { - ($t:ty) => { impl core::future::Future + Send + 'static }; -} - impl CallbackWaitGroup { pub fn new() -> Arc { Arc::new(Self { @@ -831,6 +822,15 @@ impl<'a> slog::KV for SlogRegion<'a> { } } +/// A shortcut for making an opaque future type for return type or argument +/// type, which is sendable and not borrowing any variables. +/// +/// `future![T]` == `impl Future + Send + 'static` +#[macro_export] +macro_rules! future { + ($t:ty) => { impl core::future::Future + Send + 'static }; +} + pub fn debug_iter(t: impl Iterator) -> impl std::fmt::Debug { DebugIter(RefCell::new(t)) } diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index c344f09968b..211a1b52ad6 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -345,7 +345,7 @@ where pub const MIN_READ_SPEED: usize = 8192; pub async fn read_external_storage_info_buff( - reader: &mut (dyn AsyncRead + Unpin), + reader: &mut (dyn AsyncRead + Unpin + Send), speed_limiter: &Limiter, expected_length: u64, expected_sha256: Option>, diff --git a/components/sst_importer/src/metrics.rs b/components/sst_importer/src/metrics.rs index 6b4af299ba8..2737d592fc0 100644 --- a/components/sst_importer/src/metrics.rs +++ b/components/sst_importer/src/metrics.rs @@ -55,12 +55,12 @@ lazy_static! { pub static ref IMPORTER_DOWNLOAD_BYTES: Histogram = register_histogram!( "tikv_import_download_bytes", "Bucketed histogram of importer download bytes", - exponential_buckets(1024.0, 2.0, 20).unwrap() + exponential_buckets(16.0, 2.0, 20).unwrap() ).unwrap(); pub static ref IMPORTER_APPLY_BYTES: Histogram = register_histogram!( "tikv_import_apply_bytes", "Bucketed histogram of importer apply bytes", - exponential_buckets(1024.0, 2.0, 20).unwrap() + exponential_buckets(16.0, 2.0, 20).unwrap() ) .unwrap(); pub static ref IMPORTER_INGEST_DURATION: HistogramVec = register_histogram_vec!( @@ -113,7 +113,18 @@ lazy_static! { ).unwrap(); pub static ref CACHE_EVENT: IntCounterVec = register_int_counter_vec!( "tikv_import_apply_cache_event", - "The events of caching. event = {add, remove, out-of-quota}", + "The events of caching. event = {add, remove, out-of-quota, hit}", + &["type"] + ).unwrap(); + pub static ref APPLIER_EVENT: IntCounterVec = register_int_counter_vec!( + "tikv_import_applier_event", + "The events of applier event.", &["type"] ).unwrap(); + pub static ref APPLIER_ENGINE_REQUEST_DURATION: HistogramVec = register_histogram_vec!( + "tikv_import_engine_request", + "The request lifetime track of requesting the RaftKv.", + &["type"], + exponential_buckets(0.01, 4.0, 8).unwrap() + ).unwrap(); } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5b55974dff3..907874c6928 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -9,7 +9,7 @@ use std::{ path::{Path, PathBuf}, sync::{ atomic::{AtomicU64, Ordering}, - Arc, Condvar, Mutex, + Arc, }, time::Duration, }; @@ -40,7 +40,10 @@ use tikv_util::{ time::{Instant, Limiter}, HandyRwLock, }; -use tokio::runtime::{Handle, Runtime}; +use tokio::{ + runtime::{Handle, Runtime}, + sync::OnceCell, +}; use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ @@ -106,128 +109,10 @@ impl Drop for MemUsePermit { #[derive(Clone, Debug)] pub enum CacheKvFile { - Mem(Remote), + Mem(Arc>), Fs(Arc), } -/// Remote presents a "remote" object which can be downloaded and then cached. -/// The remote object should generally implement the `ShareOwned` trait. -/// This structure doesn't manage how it is downloaded, it just manages the -/// state. You need to provide the manually downloaded data to the -/// [`DownloadPromise`]. -/// Below is the state transform of this: -/// ```text -/// DownloadPromise::fulfill -/// +-----------+ +-----------+ -/// |Downloading+-------->|Cached | -/// +--+--------+ +-----------+ -/// | ^ -/// | | -/// DownloadPromise | | Somebody takes -/// dropped | | over the duty. -/// v | -/// +--------+--+ -/// |Leaked | -/// +-----------+ -/// ``` -#[derive(Debug)] -pub struct Remote(Arc<(Mutex>, Condvar)>); - -impl Clone for Remote { - fn clone(&self) -> Self { - Self(Arc::clone(&self.0)) - } -} - -/// When holding this, the holder has promised to downloading the remote object -/// into local, then provide it to others waiting the object, by -/// [`Self::fulfill()`]. -pub struct DownloadPromise(Arc<(Mutex>, Condvar)>); - -impl DownloadPromise { - /// provide the downloaded data and make it cached. - pub fn fulfill(self, item: T) -> Remote { - let mut l = self.0.as_ref().0.lock().unwrap(); - debug_assert!(matches!(*l, FileCacheInner::Downloading)); - *l = FileCacheInner::Cached(item); - self.0.as_ref().1.notify_all(); - drop(l); - Remote(Arc::clone(&self.0)) - } -} - -impl Drop for DownloadPromise { - fn drop(&mut self) { - let mut l = self.0.as_ref().0.lock().unwrap(); - if matches!(*l, FileCacheInner::Downloading) { - *l = FileCacheInner::Leaked; - self.0.as_ref().1.notify_one(); - } - } -} - -impl Remote { - /// create a downloading remote object. - /// it returns the handle to the remote object and a [`DownloadPromise`], - /// the latter can be used to fulfill the remote object. - /// - /// # Examples - /// ``` - /// # use sst_importer::sst_importer::Remote; - /// let (remote_obj, promise) = Remote::download(); - /// promise.fulfill(42); - /// assert_eq!(remote_obj.get(), Some(42)); - /// ``` - pub fn download() -> (Self, DownloadPromise) { - let inner = Arc::new((Mutex::new(FileCacheInner::Downloading), Condvar::new())); - (Self(Arc::clone(&inner)), DownloadPromise(inner)) - } - - /// Block and wait until the remote object is downloaded. - /// # Returns - /// If the remote object has been fulfilled, return `None`. - /// If the remote object hasn't been fulfilled, return a - /// [`DownloadPromise`]: it is time to take over the duty of downloading. - /// - /// # Examples - /// ``` - /// # use sst_importer::sst_importer::Remote; - /// let (remote_obj, promise) = Remote::download(); - /// drop(promise); - /// let new_promise = remote_obj.wait_until_fill(); - /// new_promise - /// .expect("wait_until_fill should return new promise when old promise dropped") - /// .fulfill(42); - /// assert!(remote_obj.wait_until_fill().is_none()); - /// ``` - pub fn wait_until_fill(&self) -> Option> { - let mut l = self.0.as_ref().0.lock().unwrap(); - loop { - match *l { - FileCacheInner::Downloading => { - l = self.0.as_ref().1.wait(l).unwrap(); - } - FileCacheInner::Leaked => { - *l = FileCacheInner::Downloading; - return Some(DownloadPromise(Arc::clone(&self.0))); - } - FileCacheInner::Cached(_) => return None, - } - } - } -} - -impl Remote { - /// Fetch the internal object of the remote object. - pub fn get(&self) -> Option<::Shared> { - let l = self.0.as_ref().0.lock().unwrap(); - match *l { - FileCacheInner::Downloading | FileCacheInner::Leaked => None, - FileCacheInner::Cached(ref t) => Some(t.share_owned()), - } - } -} - /// returns a error indices that we are going to panic in a invalid state. /// (Rust panic information cannot be send to BR, hence client cannot know /// what happens, so we pack it into a `Result`.) @@ -238,18 +123,16 @@ fn bug(message: impl std::fmt::Display) -> Error { )) } -#[derive(Clone, Debug, PartialEq, Eq)] -enum FileCacheInner { - Downloading, - Leaked, - Cached(T), -} - impl CacheKvFile { // get the ref count of item. pub fn ref_count(&self) -> usize { match self { - CacheKvFile::Mem(buff) => Arc::strong_count(&buff.0), + CacheKvFile::Mem(buff) => { + if let Some(a) = buff.get() { + return Arc::strong_count(&a.content); + } + Arc::strong_count(buff) + } CacheKvFile::Fs(path) => Arc::strong_count(path), } } @@ -257,7 +140,7 @@ impl CacheKvFile { // check the item is expired. pub fn is_expired(&self, start: &Instant) -> bool { match self { - // The expired duration for memeory is 60s. + // The expired duration for memory is 60s. CacheKvFile::Mem(_) => start.saturating_elapsed() >= Duration::from_secs(60), // The expired duration for local file is 10min. CacheKvFile::Fs(_) => start.saturating_elapsed() >= Duration::from_secs(600), @@ -275,7 +158,8 @@ pub struct SstImporter { compression_types: HashMap, cached_storage: CacheMap, - download_rt: Runtime, + // We need to keep reference to the runtime so background tasks won't be dropped. + _download_rt: Runtime, file_locks: Arc>, mem_use: Arc, mem_limit: Arc, @@ -323,7 +207,7 @@ impl SstImporter { compression_types: HashMap::with_capacity(2), file_locks: Arc::new(DashMap::default()), cached_storage, - download_rt, + _download_rt: download_rt, mem_use: Arc::new(AtomicU64::new(0)), mem_limit: Arc::new(AtomicU64::new(memory_limit)), }) @@ -491,6 +375,7 @@ impl SstImporter { self.switcher.get_mode() } + #[cfg(test)] fn download_file_from_external_storage( &self, file_length: u64, @@ -501,7 +386,7 @@ impl SstImporter { speed_limiter: &Limiter, restore_config: external_storage_export::RestoreConfig, ) -> Result<()> { - self.download_rt + self._download_rt .block_on(self.async_download_file_from_external_storage( file_length, src_file_name, @@ -614,7 +499,7 @@ impl SstImporter { let mut need_retain = true; match c { CacheKvFile::Mem(buff) => { - let buflen = buff.get().map(|v| v.len()).unwrap_or_default(); + let buflen = buff.get().map(|v| v.content.len()).unwrap_or_default(); // The term of recycle memeory is 60s. if c.ref_count() == 1 && c.is_expired(start) { CACHE_EVENT.with_label_values(&["remove"]).inc(); @@ -685,48 +570,14 @@ impl SstImporter { } } - pub fn do_read_kv_file( + async fn exec_download( &self, meta: &KvMeta, rewrite_rule: &RewriteRule, ext_storage: Arc, speed_limiter: &Limiter, - ) -> Result { + ) -> Result { let start = Instant::now(); - let dst_name = format!("{}_{}", meta.get_name(), meta.get_range_offset()); - - let promise = { - let lock = self.file_locks.entry(dst_name); - IMPORTER_APPLY_DURATION - .with_label_values(&["download-get-lock"]) - .observe(start.saturating_elapsed().as_secs_f64()); - - match lock { - Entry::Occupied(mut ent) => match ent.get_mut() { - (CacheKvFile::Mem(buff), last_used) => { - *last_used = Instant::now(); - match buff.wait_until_fill() { - Some(handle) => handle, - None => return Ok(ent.get().0.clone()), - } - } - _ => { - return Err(bug(concat!( - "using both read-to-memory and download-to-file is unacceptable for now.", - "(If you think it is possible in the future you are reading this, ", - "please change this line to `return item.get.0.clone()`)", - "(Please also check the state transform is OK too.)", - ))); - } - }, - Entry::Vacant(ent) => { - let (cache, handle) = Remote::download(); - ent.insert((CacheKvFile::Mem(cache), Instant::now())); - handle - } - } - }; - let permit = self .request_memory(meta) .ok_or_else(|| Error::ResourceNotEnough(String::from("memory is limited")))?; @@ -755,24 +606,75 @@ impl SstImporter { file_crypter: None, }; - let buff = self.read_kv_files_from_external_storage( - file_length, - meta.get_name(), - ext_storage, - speed_limiter, - restore_config, - )?; + let buff = self + .read_kv_files_from_external_storage( + file_length, + meta.get_name(), + ext_storage, + speed_limiter, + restore_config, + ) + .await?; IMPORTER_DOWNLOAD_BYTES.observe(file_length as _); IMPORTER_APPLY_DURATION - .with_label_values(&["download"]) + .with_label_values(&["exec_download"]) .observe(start.saturating_elapsed().as_secs_f64()); let rewrite_buff = self.rewrite_kv_file(buff, rewrite_rule)?; - Ok(CacheKvFile::Mem(promise.fulfill(LoadedFile { + Ok(LoadedFile { content: Arc::from(rewrite_buff.into_boxed_slice()), permit, - }))) + }) + } + + pub async fn do_read_kv_file( + &self, + meta: &KvMeta, + rewrite_rule: &RewriteRule, + ext_storage: Arc, + speed_limiter: &Limiter, + ) -> Result { + let start = Instant::now(); + let dst_name = format!("{}_{}", meta.get_name(), meta.get_range_offset()); + + let cache = { + let lock = self.file_locks.entry(dst_name); + IMPORTER_APPLY_DURATION + .with_label_values(&["download-get-lock"]) + .observe(start.saturating_elapsed().as_secs_f64()); + + match lock { + Entry::Occupied(mut ent) => match ent.get_mut() { + (CacheKvFile::Mem(buff), last_used) => { + *last_used = Instant::now(); + Arc::clone(buff) + } + _ => { + return Err(bug(concat!( + "using both read-to-memory and download-to-file is unacceptable for now.", + "(If you think it is possible in the future you are reading this, ", + "please change this line to `return item.get.0.clone()`)", + "(Please also check the state transform is OK too.)", + ))); + } + }, + Entry::Vacant(ent) => { + let cache = Arc::new(OnceCell::new()); + ent.insert((CacheKvFile::Mem(Arc::clone(&cache)), Instant::now())); + cache + } + } + }; + + if cache.initialized() { + CACHE_EVENT.with_label_values(&["hit"]).inc(); + } + + cache + .get_or_try_init(|| self.exec_download(meta, rewrite_rule, ext_storage, speed_limiter)) + .await?; + Ok(CacheKvFile::Mem(cache)) } pub fn wrap_kms( @@ -795,7 +697,7 @@ impl SstImporter { } } - fn read_kv_files_from_external_storage( + async fn read_kv_files_from_external_storage( &self, file_length: u64, file_name: &str, @@ -821,15 +723,14 @@ impl SstImporter { encrypt_wrap_reader(file_crypter, inner)? }; - let r = - self.download_rt - .block_on(external_storage_export::read_external_storage_info_buff( - &mut reader, - speed_limiter, - file_length, - expected_sha256, - external_storage_export::MIN_READ_SPEED, - )); + let r = external_storage_export::read_external_storage_info_buff( + &mut reader, + speed_limiter, + file_length, + expected_sha256, + external_storage_export::MIN_READ_SPEED, + ) + .await; let url = ext_storage.url()?.to_string(); let buff = r.map_err(|e| Error::CannotReadExternalStorage { url: url.to_string(), @@ -841,7 +742,7 @@ impl SstImporter { Ok(buff) } - pub fn read_from_kv_file( + pub async fn read_from_kv_file( &self, meta: &KvMeta, rewrite_rule: &RewriteRule, @@ -850,13 +751,20 @@ impl SstImporter { speed_limiter: &Limiter, ) -> Result> { let c = if self.import_support_download() { - self.do_download_kv_file(meta, backend, speed_limiter)? + self.do_download_kv_file(meta, backend, speed_limiter) + .await? } else { - self.do_read_kv_file(meta, rewrite_rule, ext_storage, speed_limiter)? + self.do_read_kv_file(meta, rewrite_rule, ext_storage, speed_limiter) + .await? }; match c { // If cache memroy, it has been rewrite, return buffer directly. - CacheKvFile::Mem(buff) => buff.get().ok_or_else(|| bug("invalid cache state")), + CacheKvFile::Mem(buff) => Ok(Arc::clone( + &buff + .get() + .ok_or_else(|| bug("invalid cache state"))? + .content, + )), // If cache file name, it need to read and rewrite. CacheKvFile::Fs(path) => { let file = File::open(path.as_ref())?; @@ -870,7 +778,7 @@ impl SstImporter { } } - pub fn do_download_kv_file( + pub async fn do_download_kv_file( &self, meta: &KvMeta, backend: &StorageBackend, @@ -910,7 +818,7 @@ impl SstImporter { expected_sha256, file_crypter: None, }; - self.download_file_from_external_storage( + self.async_download_file_from_external_storage( meta.get_length(), src_name, path.temp.clone(), @@ -918,8 +826,10 @@ impl SstImporter { false, // don't support encrypt for now. speed_limiter, + "", restore_config, - )?; + ) + .await?; info!( "download file finished {}, offset {}, length {}", src_name, @@ -1082,7 +992,7 @@ impl SstImporter { speed_limiter: Limiter, engine: E, ) -> Result> { - self.download_rt.block_on(self.download_ext( + self._download_rt.block_on(self.download_ext( meta, backend, name, @@ -1475,10 +1385,7 @@ mod tests { use tempfile::Builder; use test_sst_importer::*; use test_util::new_test_key_manager; - use tikv_util::{ - codec::stream_event::EventEncoder, stream::block_on_external_io, - sys::thread::StdThreadBuildWrapper, - }; + use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io}; use txn_types::{Value, WriteType}; use uuid::Uuid; @@ -2052,17 +1959,16 @@ mod tests { // test do_read_kv_file() let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); - let output = importer - .do_read_kv_file( - &kv_meta, - rewrite_rule, - ext_storage, - &Limiter::new(f64::INFINITY), - ) - .unwrap(); + let output = block_on_external_io(importer.do_read_kv_file( + &kv_meta, + rewrite_rule, + ext_storage, + &Limiter::new(f64::INFINITY), + )) + .unwrap(); assert!( - matches!(output.clone(), CacheKvFile::Mem(rc) if &*rc.get().unwrap() == buff.as_slice()), + matches!(output.clone(), CacheKvFile::Mem(rc) if &*rc.get().unwrap().content == buff.as_slice()), "{:?}", output ); @@ -2116,15 +2022,14 @@ mod tests { ..Default::default() }; - let output = importer - .read_kv_files_from_external_storage( - kv_meta.get_length(), - kv_meta.get_name(), - ext_storage.clone(), - &Limiter::new(f64::INFINITY), - restore_config, - ) - .unwrap(); + let output = block_on_external_io(importer.read_kv_files_from_external_storage( + kv_meta.get_length(), + kv_meta.get_name(), + ext_storage.clone(), + &Limiter::new(f64::INFINITY), + restore_config, + )) + .unwrap(); assert_eq!( buff, output, @@ -2140,15 +2045,14 @@ mod tests { ..Default::default() }; - let output = importer - .read_kv_files_from_external_storage( - len, - kv_meta.get_name(), - ext_storage, - &Limiter::new(f64::INFINITY), - restore_config, - ) - .unwrap(); + let output = block_on_external_io(importer.read_kv_files_from_external_storage( + len, + kv_meta.get_name(), + ext_storage, + &Limiter::new(f64::INFINITY), + restore_config, + )) + .unwrap(); assert_eq!(&buff[offset as _..(offset + len) as _], &output[..]); } @@ -2182,15 +2086,14 @@ mod tests { // test do_download_kv_file(). assert!(importer.import_support_download()); - let output = importer - .read_from_kv_file( - &kv_meta, - rewrite_rule, - ext_storage, - &backend, - &Limiter::new(f64::INFINITY), - ) - .unwrap(); + let output = block_on_external_io(importer.read_from_kv_file( + &kv_meta, + rewrite_rule, + ext_storage, + &backend, + &Limiter::new(f64::INFINITY), + )) + .unwrap(); assert_eq!(*output, buff); check_file_exists(&path.save, None); @@ -3101,7 +3004,7 @@ mod tests { SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); let key = "file1"; - let (r, _) = Remote::download(); + let r = Arc::new(OnceCell::new()); let value = (CacheKvFile::Mem(r), Instant::now()); let lock = importer.file_locks.entry(key.to_string()).or_insert(value); @@ -3119,53 +3022,4 @@ mod tests { let _buff = v.0.clone(); assert_eq!(v.0.ref_count(), 2); } - - #[test] - fn test_remote_waiting() { - let (r, dl) = Remote::download(); - let r2 = r.clone(); - let js = (0..2) - .map(|_| { - let r = r.clone(); - std::thread::spawn(move || { - assert!(r.wait_until_fill().is_none()); - r.get() - }) - }) - .collect::>(); - dl.fulfill(42); - for j in js { - assert!(matches!(j.join(), Ok(Some(42)))); - } - assert_eq!(r2.get(), Some(42)); - } - - #[test] - fn test_remote_drop_in_one_thread() { - let (r, dl) = Remote::download(); - drop(dl); - let p = r.wait_until_fill(); - assert!(p.is_some()); - p.unwrap().fulfill("Kitty"); - assert_eq!(r.get(), Some("Kitty")); - } - - #[test] - fn test_remote_take_duty() { - let (r, dl) = Remote::download(); - let js = (0..4).map(|i| { - let r = r.clone(); - std::thread::Builder::new() - .name(format!("rd-{}", i)) - .spawn_wrapper(move || match r.wait_until_fill() { - Some(x) => x.fulfill(42).get(), - None => r.get(), - }) - .unwrap() - }); - drop(dl); - for j in js { - assert!(matches!(j.join(), Ok(Some(42)))); - } - } } diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 05d039d2690..9e6c1b9ca3a 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -225,7 +225,7 @@ impl PessimisticLockPair for Modify { } } -#[derive(Default)] +#[derive(Default, Debug)] pub struct WriteData { pub modifies: Vec, pub extra: TxnExtra, diff --git a/src/import/mod.rs b/src/import/mod.rs index e2fa3729e52..6fe43b9aa32 100644 --- a/src/import/mod.rs +++ b/src/import/mod.rs @@ -13,6 +13,7 @@ //! inside TiKV because it needs to interact with raftstore. mod duplicate_detect; +mod raft_writer; mod sst_service; use std::fmt::Debug; diff --git a/src/import/raft_writer.rs b/src/import/raft_writer.rs new file mode 100644 index 00000000000..a40297b932e --- /dev/null +++ b/src/import/raft_writer.rs @@ -0,0 +1,451 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +//! This module contains types for asynchronously applying the write batches +//! into the storage. + +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; + +use futures::{Future, Stream, StreamExt}; +use kvproto::kvrpcpb::Context; +use sst_importer::metrics::{APPLIER_ENGINE_REQUEST_DURATION, APPLIER_EVENT, IMPORTER_APPLY_BYTES}; +use tikv_kv::{with_tls_engine, Engine, WriteData, WriteEvent}; +use tikv_util::time::Instant; +use tokio::sync::{Semaphore, SemaphorePermit}; + +use crate::storage; + +pub async fn wait_write( + mut s: impl Stream + Send + Unpin, +) -> storage::Result<()> { + match s.next().await { + Some(WriteEvent::Finished(Ok(()))) => Ok(()), + Some(WriteEvent::Finished(Err(e))) => Err(e.into()), + Some(e) => Err(box_err!("unexpected event: {:?}", e)), + None => Err(box_err!("stream closed")), + } +} + +const MAX_CONCURRENCY_PER_REGION: usize = 16; + +async fn acquire_semaphore(smp: &Arc) -> Option> { + if let Ok(pmt) = smp.try_acquire() { + return Some(pmt); + } + APPLIER_EVENT.with_label_values(&["raft-throttled"]).inc(); + smp.acquire().await.ok() +} + +#[derive(Clone, Default)] +/// A structure for throttling write throughput by region. +/// It uses the [`Engine`] stored in the thread local storage to write data. +/// Check the method [`tikv_kv::set_tls_engine`] for more details about the +/// thread local engine. +pub(crate) struct ThrottledTlsEngineWriter(Arc>); + +impl ThrottledTlsEngineWriter { + /// Write into the thread local storage engine. + /// + /// # Safety + /// + /// Before polling the future this returns, make sure the carrier thread's + /// `TLS_ENGINE_ANY` is an engine typed `E`, or at least has the same + /// memory layout of `E`. + pub unsafe fn write( + &self, + wd: WriteData, + ctx: Context, + ) -> impl Future> + Send + 'static { + let mut this = self.0.lock().unwrap(); + let max_permit = this.max_permit; + let start = Instant::now_coarse(); + let sem = this + .sems + .entry(ctx.get_region_id()) + .or_insert_with(|| { + APPLIER_EVENT.with_label_values(&["new-writer"]).inc(); + Arc::new(Semaphore::new(max_permit)) + }) + .clone(); + async move { + APPLIER_ENGINE_REQUEST_DURATION + .with_label_values(&["queuing"]) + .observe(start.saturating_elapsed_secs()); + let start = Instant::now_coarse(); + let _prm = match acquire_semaphore(&sem).await { + Some(prm) => prm, + // When the permit has been closed. (Maybe tikv is shutting down?) + None => { + return Err(box_err!( + "the semaphore bind to region {} has been closed", + ctx.get_region_id() + )); + } + }; + + APPLIER_ENGINE_REQUEST_DURATION + .with_label_values(&["get_permit"]) + .observe(start.saturating_elapsed_secs()); + let start = Instant::now_coarse(); + let size = wd.size(); + let fut = with_tls_engine::(move |engine| { + engine.async_write(&ctx, wd, WriteEvent::BASIC_EVENT, None) + }); + let res = wait_write(fut).await; + + APPLIER_ENGINE_REQUEST_DURATION + .with_label_values(&["apply"]) + .observe(start.saturating_elapsed_secs()); + IMPORTER_APPLY_BYTES.observe(size as _); + res + } + } + + /// try to trigger a run of GC. + /// + /// # Returns + /// + /// If we still need to do keep doing GC (there are other references to the + /// handle), return `true`, otherwise `false`. + pub fn try_gc(&self) -> bool { + if Arc::strong_count(&self.0) == 1 { + return false; + } + + let mut this = self.0.lock().unwrap(); + + let before_count = this.sems.len(); + this.sems.retain(|_, v| Arc::strong_count(v) > 1); + let after_count = this.sems.len(); + + APPLIER_EVENT + .with_label_values(&["gc-writer"]) + .inc_by((before_count.saturating_sub(after_count)) as _); + true + } + + #[cfg(test)] + pub fn with_max_concurrency_per_region(conc: usize) -> Self { + let mut inner = Inner::default(); + inner.max_permit = conc; + Self(Arc::new(Mutex::new(inner))) + } + + #[cfg(test)] + pub fn inspect_inflight(&self) -> HashMap { + let this = self.0.lock().unwrap(); + let max_permit = this.max_permit; + this.sems + .iter() + .map(|(rid, sem)| (*rid, max_permit - sem.available_permits())) + .collect() + } + + #[cfg(test)] + pub fn inspect_worker(&self) -> usize { + let this = self.0.lock().unwrap(); + this.sems.len() + } +} + +struct Inner { + sems: HashMap>, + max_permit: usize, +} + +impl Default for Inner { + fn default() -> Self { + Self { + sems: Default::default(), + max_permit: MAX_CONCURRENCY_PER_REGION, + } + } +} + +#[cfg(test)] +mod test { + use std::{convert::identity, iter::IntoIterator, sync::Mutex, time::Duration}; + + use engine_rocks::RocksEngineIterator; + use engine_traits::{Iterator, ALL_CFS, CF_DEFAULT, CF_WRITE}; + use futures::{future::join_all, Future}; + use kvproto::kvrpcpb::Context; + use tempfile::TempDir; + use tikv_kv::{Engine, Modify, RocksEngine, SnapContext, Snapshot, WriteData, WriteEvent}; + use tikv_util::sys::thread::ThreadBuildWrapper; + use tokio::runtime::{Builder, Runtime}; + use txn_types::{Key, TimeStamp, Write, WriteType}; + + use super::ThrottledTlsEngineWriter; + use crate::storage::TestEngineBuilder; + + struct Suite { + handle: ThrottledTlsEngineWriter, + rt: Runtime, + eng: RocksEngine, + + tso: u64, + mirror: RocksEngine, + + _temp_dirs: [TempDir; 2], + } + + impl Suite { + fn wait(&self, fut: impl Future) -> T { + self.rt.block_on(fut) + } + + fn batch<'a, 'b, 'this: 'a + 'b>( + &mut self, + region_id: u64, + f: impl FnOnce(&mut dyn FnMut(&'a str, &'b str)) + 'this, + ) -> (WriteData, Context) { + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + let mut b = vec![]; + let mut t = |key, value| txn(key, value, &mut self.tso, &mut b); + f(&mut t); + let batch = WriteData::new(b.clone(), Default::default()); + let batch2 = WriteData::new(b, Default::default()); + self.wait(write_to_engine(&ctx, &self.mirror, batch)); + (batch2, ctx) + } + + fn send_to_applier( + &self, + args: impl std::iter::Iterator, + ) -> impl Future { + let fut = args + .map(|arg| { + self.rt.spawn( + // SAFETY: we have already register the engine. + unsafe { self.handle.write::(arg.0, arg.1) }, + ) + }) + .collect::>(); + async { + join_all( + fut.into_iter() + .map(|fut| async move { fut.await.unwrap().unwrap() }), + ) + .await; + } + } + + fn check(&mut self, name: &str) { + for cf in ALL_CFS { + let the_mirror = iterate_over(&mut self.mirror, cf); + let real_world = iterate_over(&mut self.eng, cf); + compare_iter(the_mirror, real_world) + .map_err(|err| format!("case {name}: {err}")) + .unwrap(); + } + } + } + + fn create_applier(max_pending_raft_cmd: usize) -> Suite { + let temp_dirs = [TempDir::new().unwrap(), TempDir::new().unwrap()]; + let engine = TestEngineBuilder::new() + .path(temp_dirs[0].path()) + .build() + .unwrap(); + let eng = engine.clone(); + let engine = Mutex::new(engine); + let mirror = TestEngineBuilder::new() + .path(temp_dirs[1].path()) + .build() + .unwrap(); + let rt = Builder::new_multi_thread() + .enable_all() + .worker_threads(1) + .after_start_wrapper(move || tikv_kv::set_tls_engine(engine.lock().unwrap().clone())) + // SAFETY: see the line above. + .before_stop_wrapper(|| unsafe { tikv_kv::destroy_tls_engine::() }) + .build() + .unwrap(); + let handle = + ThrottledTlsEngineWriter::with_max_concurrency_per_region(max_pending_raft_cmd); + Suite { + handle, + rt, + eng, + tso: 1u64, + mirror, + _temp_dirs: temp_dirs, + } + } + + async fn write_to_engine(ctx: &Context, e: &RocksEngine, batch: WriteData) { + use futures_util::StreamExt; + e.async_write(ctx, batch, WriteEvent::BASIC_EVENT, None) + .next() + .await + .unwrap(); + } + + fn iterate_over(e: &mut RocksEngine, cf: &'static str) -> RocksEngineIterator { + let snap = e.snapshot(SnapContext::default()).unwrap(); + let mut iter = snap.iter(cf, Default::default()).unwrap(); + iter.seek_to_first().unwrap(); + iter + } + + fn check_eq( + a: T, + b: T, + tag: &str, + show: impl Fn(T) -> D, + ) -> Result<(), String> { + if a != b { + return Err(format!("{} not match: {} vs {}", tag, show(a), show(b))); + } + Ok(()) + } + + fn compare_iter(mut i1: impl Iterator, mut i2: impl Iterator) -> Result<(), String> { + while i1.valid().unwrap() && i2.valid().unwrap() { + check_eq(i1.key(), i2.key(), "key", <[u8]>::escape_ascii)?; + check_eq(i1.value(), i2.value(), "value", <[u8]>::escape_ascii)?; + i1.next().unwrap(); + i2.next().unwrap(); + } + check_eq(i1.valid().unwrap(), i2.valid().unwrap(), "length", identity)?; + Ok(()) + } + + fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { + let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); + let v = Write::new(ty, TimeStamp::new(start_ts), None); + (k.into_encoded(), v.as_ref().to_bytes()) + } + + fn default(key: &[u8], val: &[u8], start_ts: u64) -> (Vec, Vec) { + let k = Key::from_raw(key).append_ts(TimeStamp::new(start_ts)); + (k.into_encoded(), val.to_owned()) + } + + fn default_req(key: &[u8], val: &[u8], start_ts: u64) -> Modify { + let (k, v) = default(key, val, start_ts); + Modify::Put(CF_DEFAULT, Key::from_encoded(k), v) + } + + fn write_req(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> Modify { + let (k, v) = write(key, ty, commit_ts, start_ts); + if ty == WriteType::Delete { + Modify::Delete(CF_WRITE, Key::from_encoded(k)) + } else { + Modify::Put(CF_WRITE, Key::from_encoded(k), v) + } + } + + fn txn(key: &str, value: &str, tso: &mut u64, append_to: &mut Vec) { + let start = *tso; + let commit = *tso + 1; + *tso += 2; + append_to.extend([ + default_req(key.as_bytes(), value.as_bytes(), start), + write_req(key.as_bytes(), WriteType::Put, start, commit), + ]) + } + + #[test] + fn test_basic() { + let mut suite = create_applier(16); + let b1 = suite.batch(1, |t| { + t("1", "amazing world in my dream"); + t("2", "gazing at the abyss"); + }); + let b2 = suite.batch(2, |t| { + t("3", "the forest leaves drop"); + t("4", "the meaningless words in a test case"); + }); + let fut = suite.send_to_applier(vec![b1, b2].into_iter()); + suite.wait(fut); + + suite.check("basic"); + } + + #[test] + // Clippy doesn't know about the romantic relationship between lazy evaluation and + // side-effective ;) + #[allow(clippy::needless_collect)] + fn test_inflight_max() { + let mut suite = create_applier(3); + + let b1 = (1..6) + .map(|_| { + suite.batch(1, move |t| { + t("al-kīmiyā", "following the light of the moon and stars, the guide of the sun and winds."); + }) + }) + .collect::>(); + let b2 = (1..3) + .map(|_| { + suite.batch(2, move |t| { + t( + "sole key to this mystery", + "fib this n = if n < 2 then n else this (n-1) + this (n-2)", + ); + }) + }) + .collect::>(); + fail::cfg("rockskv_write_modifies", "sleep(5000)").unwrap(); + let fut = suite.send_to_applier(b1.into_iter().chain(b2)); + std::thread::sleep(Duration::from_secs(1)); + let pending_requests = suite.handle.inspect_inflight(); + assert_eq!(*pending_requests.get(&1).unwrap(), 3usize); + assert_eq!(*pending_requests.get(&2).unwrap(), 2usize); + fail::cfg("rockskv_write_modifies", "off").unwrap(); + suite.wait(fut); + + suite.check("inflight_max"); + } + + #[test] + fn test_gc() { + let mut suite = create_applier(16); + let b1 = suite.batch(1, |t| { + t("where is the sun", "it is in the clear sky"); + t("where are the words", "they are in some language model"); + t( + "where is the language model", + "I dunno, these sentences are generated by a human.", + ); + }); + let b2 = suite.batch(2, |t| { + t("...and this case needs two batches", "why?"); + t( + "It is by... tradition.", + "If a case is TOO short, who will believe it is effective?", + ); + t( + "Perhaps we should make the `RocksEngine` be able to distinguish requests.", + "So...", + ); + t( + "We can block `b2` but not for `b1`", + "then we can check there should be only one running worker.", + ); + }); + assert_eq!(suite.handle.inspect_worker(), 0); + fail::cfg("rockskv_async_write", "sleep(5000)").unwrap(); + let fut = suite.send_to_applier(std::iter::once(b1)); + assert_eq!(suite.handle.inspect_worker(), 1); + let fut2 = suite.send_to_applier(std::iter::once(b2)); + assert_eq!(suite.handle.inspect_worker(), 2); + + fail::cfg("rockskv_async_write", "off").unwrap(); + suite.wait(async move { + fut.await; + fut2.await; + }); + + let hnd = suite.handle.clone(); + assert!(hnd.try_gc()); + assert_eq!(suite.handle.inspect_worker(), 0); + + drop(suite); + assert!(!hnd.try_gc()); + } +} diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 12cb0ca892b..4707b348bc5 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -2,6 +2,7 @@ use std::{ collections::{HashMap, VecDeque}, + convert::identity, future::Future, path::PathBuf, sync::{Arc, Mutex}, @@ -11,8 +12,7 @@ use std::{ use collections::HashSet; use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; -use futures::{sink::SinkExt, stream::TryStreamExt, Stream, StreamExt, TryFutureExt}; -use futures_executor::{ThreadPool, ThreadPoolBuilder}; +use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -42,14 +42,24 @@ use tikv_util::{ use tokio::{runtime::Runtime, time::sleep}; use txn_types::{Key, WriteRef, WriteType}; -use super::make_rpc_error; +use super::{ + make_rpc_error, + raft_writer::{self, wait_write}, +}; use crate::{ import::duplicate_detect::DuplicateDetector, server::CONFIG_ROCKSDB_GAUGE, storage::{self, errors::extract_region_error_from_error}, }; -const MAX_INFLIGHT_RAFT_MSGS: usize = 64; +/// The concurrency of sending raft request for every `apply` requests. +/// This value `16` would mainly influence the speed of applying a huge file: +/// when we downloading the files into disk, loading all of them into memory may +/// lead to OOM. This would be able to back-pressure them. +/// (only log files greater than 16 * 7M = 112M would be throttled by this.) +/// NOTE: Perhaps add a memory quota for download to disk mode and get rid of +/// this value? +const REQUEST_WRITE_CONCURRENCY: usize = 16; /// The extra bytes required by the wire encoding. /// Generally, a field (and a embedded message) would introduce 2 extra /// bytes. In detail, they are: @@ -63,6 +73,10 @@ const MAX_INFLIGHT_RAFT_MSGS: usize = 64; /// content length is greater than 128, however when the length is greater than /// 128, the extra 1~4 bytes can be ignored. const WIRE_EXTRA_BYTES: usize = 10; +/// The interval of running the GC for +/// [`raft_writer::ThrottledTlsEngineWriter`]. There aren't too many items held +/// in the writer. So we can run the GC less frequently. +const WRITER_GC_INTERVAL: Duration = Duration::from_secs(300); fn transfer_error(err: storage::Error) -> ImportPbError { let mut e = ImportPbError::default(); @@ -73,13 +87,15 @@ fn transfer_error(err: storage::Error) -> ImportPbError { e } -async fn wait_write(mut s: impl Stream + Send + Unpin) -> storage::Result<()> { - match s.next().await { - Some(WriteEvent::Finished(Ok(()))) => Ok(()), - Some(WriteEvent::Finished(Err(e))) => Err(e.into()), - Some(e) => Err(box_err!("unexpected event: {:?}", e)), - None => Err(box_err!("stream closed")), +fn convert_join_error(err: tokio::task::JoinError) -> ImportPbError { + let mut e = ImportPbError::default(); + if err.is_cancelled() { + e.set_message("task canceled, probably runtime is shutting down.".to_owned()); } + if err.is_panic() { + e.set_message(format!("panicked! {}", err)) + } + e } /// ImportSstService provides tikv-server with the ability to ingest SST files. @@ -92,16 +108,12 @@ pub struct ImportSstService { tablets: LocalTablets, engine: E, threads: Arc, - // For now, PiTR cannot be executed in the tokio runtime because it is synchronous and may - // blocks. (tokio is so strict... it panics if we do insane things like blocking in an async - // context.) - // We need to execute these code in a context which allows blocking. - // FIXME: Make PiTR restore asynchronous. Get rid of this pool. - block_threads: Arc, importer: Arc, limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, + + writer: raft_writer::ThrottledTlsEngineWriter, } struct RequestCollector { @@ -272,6 +284,7 @@ impl ImportSstService { importer: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); + let eng = Mutex::new(engine.clone()); let threads = tokio::runtime::Builder::new_multi_thread() .worker_threads(cfg.num_threads) .enable_all() @@ -280,26 +293,27 @@ impl ImportSstService { tikv_util::thread_group::set_properties(props.clone()); tikv_alloc::add_thread_memory_accessor(); set_io_type(IoType::Import); + tikv_kv::set_tls_engine(eng.lock().unwrap().clone()); }) - .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) - .build() - .unwrap(); - let props = tikv_util::thread_group::current_properties(); - let block_threads = ThreadPoolBuilder::new() - .pool_size(cfg.num_threads) - .name_prefix("sst-importer") - .after_start_wrapper(move || { - tikv_util::thread_group::set_properties(props.clone()); - tikv_alloc::add_thread_memory_accessor(); - set_io_type(IoType::Import); + .before_stop_wrapper(move || { + tikv_alloc::remove_thread_memory_accessor(); + // SAFETY: we have set the engine at some lines above with type `E`. + unsafe { tikv_kv::destroy_tls_engine::() }; }) - .before_stop_wrapper(move || tikv_alloc::remove_thread_memory_accessor()) - .create() + .build() .unwrap(); if let LocalTablets::Singleton(tablet) = &tablets { importer.start_switch_mode_check(threads.handle(), tablet.clone()); } + let writer = raft_writer::ThrottledTlsEngineWriter::default(); + let gc_handle = writer.clone(); + threads.spawn(async move { + while gc_handle.try_gc() { + tokio::time::sleep(WRITER_GC_INTERVAL).await; + } + }); + let cfg_mgr = ConfigManager::new(cfg); threads.spawn(Self::tick(importer.clone(), cfg_mgr.clone())); @@ -307,12 +321,12 @@ impl ImportSstService { cfg: cfg_mgr, tablets, threads: Arc::new(threads), - block_threads: Arc::new(block_threads), engine, importer, limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, + writer, } } @@ -475,7 +489,7 @@ impl ImportSstService { async fn apply_imp( mut req: ApplyRequest, importer: Arc, - engine: E, + writer: raft_writer::ThrottledTlsEngineWriter, limiter: Limiter, max_raft_size: usize, ) -> std::result::Result, ImportPbError> { @@ -500,13 +514,15 @@ impl ImportSstService { let mut tasks = metas.iter().zip(rules.iter()).peekable(); while let Some((meta, rule)) = tasks.next() { - let buff = importer.read_from_kv_file( - meta, - rule, - ext_storage.clone(), - req.get_storage_backend(), - &limiter, - )?; + let buff = importer + .read_from_kv_file( + meta, + rule, + ext_storage.clone(), + req.get_storage_backend(), + &limiter, + ) + .await?; if let Some(mut r) = importer.do_apply_kv_file( meta.get_start_key(), meta.get_end_key(), @@ -524,20 +540,29 @@ impl ImportSstService { } let is_last_task = tasks.peek().is_none(); - for req in collector.drain_pending_writes(is_last_task) { - let f = engine.async_write(&context, req, WriteEvent::BASIC_EVENT, None); - inflight_futures.push_back(f); - if inflight_futures.len() >= MAX_INFLIGHT_RAFT_MSGS { - wait_write(inflight_futures.pop_front().unwrap()) - .await - .map_err(transfer_error)?; + for w in collector.drain_pending_writes(is_last_task) { + // Record the start of a task would greatly help us to inspect pending + // tasks. + APPLIER_EVENT.with_label_values(&["begin_req"]).inc(); + // SAFETY: we have registered the thread local storage engine into the thread + // when creating them. + let task = unsafe { + writer + .write::(w, context.clone()) + .map_err(transfer_error) + }; + inflight_futures.push_back( + tokio::spawn(task) + .map_err(convert_join_error) + .map(|x| x.and_then(identity)), + ); + if inflight_futures.len() >= REQUEST_WRITE_CONCURRENCY { + inflight_futures.pop_front().unwrap().await?; } } } assert!(collector.is_empty()); - for f in inflight_futures { - wait_write(f).await.map_err(transfer_error)?; - } + futures::future::try_join_all(inflight_futures).await?; Ok(range) } @@ -728,7 +753,6 @@ impl ImportSst for ImportSstService { sst_importer::metrics::IMPORTER_APPLY_DURATION .with_label_values(&[label]) .observe(start.saturating_elapsed().as_secs_f64()); - crate::send_rpc_response!(Ok(resp), sink, label, timer); }; self.threads.spawn(handle_task); @@ -740,9 +764,9 @@ impl ImportSst for ImportSstService { let label = "apply"; let start = Instant::now(); let importer = self.importer.clone(); - let engine = self.engine.clone(); let limiter = self.limiter.clone(); let max_raft_size = self.raft_entry_max_size.0 as usize; + let applier = self.writer.clone(); let handle_task = async move { // Records how long the apply task waits to be scheduled. @@ -752,7 +776,7 @@ impl ImportSst for ImportSstService { let mut resp = ApplyResponse::default(); - match Self::apply_imp(req, importer, engine, limiter, max_raft_size).await { + match Self::apply_imp(req, importer, applier, limiter, max_raft_size).await { Ok(Some(r)) => resp.set_range(r), Err(e) => resp.set_error(e), _ => {} @@ -761,7 +785,7 @@ impl ImportSst for ImportSstService { debug!("finished apply kv file with {:?}", resp); crate::send_rpc_response!(Ok(resp), sink, label, start); }; - self.block_threads.spawn_ok(handle_task); + self.threads.spawn(handle_task); } /// Downloads the file and performs key-rewrite for later ingesting. From e0d25f90fff0bda07976cdee2c81ec1cba0029b1 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Thu, 30 Mar 2023 16:08:55 +0800 Subject: [PATCH 0612/1149] server: Introduce a common layer between server and server2 (#14395) ref tikv/tikv#14401 server: Introduce a common layer between server and server2 Signed-off-by: CalvinNeo Co-authored-by: Ti Chi Robot --- components/server/src/common.rs | 238 +++++++++++ components/server/src/lib.rs | 1 + components/server/src/server.rs | 441 ++++++-------------- components/server/src/server2.rs | 403 +++++------------- components/test_raftstore-v2/src/cluster.rs | 49 +-- components/test_raftstore/src/cluster.rs | 14 +- 6 files changed, 499 insertions(+), 647 deletions(-) create mode 100644 components/server/src/common.rs diff --git a/components/server/src/common.rs b/components/server/src/common.rs new file mode 100644 index 00000000000..5c6dfa16120 --- /dev/null +++ b/components/server/src/common.rs @@ -0,0 +1,238 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +use std::{ + cmp, env, + net::SocketAddr, + path::{Path, PathBuf}, + sync::{mpsc, Arc}, + u64, +}; + +use encryption_export::{data_key_manager_from_config, DataKeyManager}; +use engine_rocks::FlowInfo; +use error_code::ErrorCodeExt; +use file_system::{set_io_rate_limiter, BytesFetcher, File}; +use tikv::config::TikvConfig; +use tikv_util::sys::{disk, path_in_diff_mount_point}; + +/// This is the common layer of TiKV-like servers. By holding it in its own +/// TikvServer implementation, one can easily access the common ability of a +/// TiKV server. +pub struct TikvServerCore { + pub config: TikvConfig, + pub store_path: PathBuf, + pub lock_files: Vec, + pub encryption_key_manager: Option>, + pub flow_info_sender: Option>, + pub flow_info_receiver: Option>, +} + +impl TikvServerCore { + pub fn check_conflict_addr(&mut self) { + let cur_addr: SocketAddr = self + .config + .server + .addr + .parse() + .expect("failed to parse into a socket address"); + let cur_ip = cur_addr.ip(); + let cur_port = cur_addr.port(); + let lock_dir = get_lock_dir(); + + let search_base = env::temp_dir().join(lock_dir); + file_system::create_dir_all(&search_base) + .unwrap_or_else(|_| panic!("create {} failed", search_base.display())); + + for entry in file_system::read_dir(&search_base).unwrap().flatten() { + if !entry.file_type().unwrap().is_file() { + continue; + } + let file_path = entry.path(); + let file_name = file_path.file_name().unwrap().to_str().unwrap(); + if let Ok(addr) = file_name.replace('_', ":").parse::() { + let ip = addr.ip(); + let port = addr.port(); + if cur_port == port + && (cur_ip == ip || cur_ip.is_unspecified() || ip.is_unspecified()) + { + let _ = try_lock_conflict_addr(file_path); + } + } + } + + let cur_path = search_base.join(cur_addr.to_string().replace(':', "_")); + let cur_file = try_lock_conflict_addr(cur_path); + self.lock_files.push(cur_file); + } + + pub fn init_fs(&mut self) { + let lock_path = self.store_path.join(Path::new("LOCK")); + + let f = File::create(lock_path.as_path()) + .unwrap_or_else(|e| fatal!("failed to create lock at {}: {}", lock_path.display(), e)); + if f.try_lock_exclusive().is_err() { + fatal!( + "lock {} failed, maybe another instance is using this directory.", + self.store_path.display() + ); + } + self.lock_files.push(f); + + if tikv_util::panic_mark_file_exists(&self.config.storage.data_dir) { + fatal!( + "panic_mark_file {} exists, there must be something wrong with the db. \ + Do not remove the panic_mark_file and force the TiKV node to restart. \ + Please contact TiKV maintainers to investigate the issue. \ + If needed, use scale in and scale out to replace the TiKV node. \ + https://docs.pingcap.com/tidb/stable/scale-tidb-using-tiup", + tikv_util::panic_mark_file_path(&self.config.storage.data_dir).display() + ); + } + + // Allocate a big file to make sure that TiKV have enough space to + // recover from disk full errors. This file is created in data_dir rather than + // db_path, because we must not increase store size of db_path. + fn calculate_reserved_space(capacity: u64, reserved_size_from_config: u64) -> u64 { + let mut reserved_size = reserved_size_from_config; + if reserved_size_from_config != 0 { + reserved_size = + cmp::max((capacity as f64 * 0.05) as u64, reserved_size_from_config); + } + reserved_size + } + fn reserve_physical_space(data_dir: &String, available: u64, reserved_size: u64) { + let path = Path::new(data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); + if let Err(e) = file_system::remove_file(path) { + warn!("failed to remove space holder on starting: {}", e); + } + + // place holder file size is 20% of total reserved space. + if available > reserved_size { + file_system::reserve_space_for_recover(data_dir, reserved_size / 5) + .map_err(|e| panic!("Failed to reserve space for recovery: {}.", e)) + .unwrap(); + } else { + warn!("no enough disk space left to create the place holder file"); + } + } + + let disk_stats = fs2::statvfs(&self.config.storage.data_dir).unwrap(); + let mut capacity = disk_stats.total_space(); + if self.config.raft_store.capacity.0 > 0 { + capacity = cmp::min(capacity, self.config.raft_store.capacity.0); + } + // reserve space for kv engine + let kv_reserved_size = + calculate_reserved_space(capacity, self.config.storage.reserve_space.0); + disk::set_disk_reserved_space(kv_reserved_size); + reserve_physical_space( + &self.config.storage.data_dir, + disk_stats.available_space(), + kv_reserved_size, + ); + + let raft_data_dir = if self.config.raft_engine.enable { + self.config.raft_engine.config().dir + } else { + self.config.raft_store.raftdb_path.clone() + }; + + let separated_raft_mount_path = + path_in_diff_mount_point(&self.config.storage.data_dir, &raft_data_dir); + if separated_raft_mount_path { + let raft_disk_stats = fs2::statvfs(&raft_data_dir).unwrap(); + // reserve space for raft engine if raft engine is deployed separately + let raft_reserved_size = calculate_reserved_space( + raft_disk_stats.total_space(), + self.config.storage.reserve_raft_space.0, + ); + disk::set_raft_disk_reserved_space(raft_reserved_size); + reserve_physical_space( + &raft_data_dir, + raft_disk_stats.available_space(), + raft_reserved_size, + ); + } + } + + pub fn init_yatp(&self) { + yatp::metrics::set_namespace(Some("tikv")); + prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL0_CHANCE.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL_ELAPSED.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_EXEC_DURATION.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_POLL_DURATION.clone())).unwrap(); + prometheus::register(Box::new(yatp::metrics::TASK_EXEC_TIMES.clone())).unwrap(); + } + + pub fn init_encryption(&mut self) { + self.encryption_key_manager = data_key_manager_from_config( + &self.config.security.encryption, + &self.config.storage.data_dir, + ) + .map_err(|e| { + panic!( + "Encryption failed to initialize: {}. code: {}", + e, + e.error_code() + ) + }) + .unwrap() + .map(Arc::new); + } + + pub fn init_io_utility(&mut self) -> BytesFetcher { + let stats_collector_enabled = file_system::init_io_stats_collector() + .map_err(|e| warn!("failed to init I/O stats collector: {}", e)) + .is_ok(); + + let limiter = Arc::new( + self.config + .storage + .io_rate_limit + .build(!stats_collector_enabled /* enable_statistics */), + ); + let fetcher = if stats_collector_enabled { + BytesFetcher::FromIoStatsCollector() + } else { + BytesFetcher::FromRateLimiter(limiter.statistics().unwrap()) + }; + // Set up IO limiter even when rate limit is disabled, so that rate limits can + // be dynamically applied later on. + set_io_rate_limiter(Some(limiter)); + fetcher + } + + pub fn init_flow_receiver(&mut self) -> engine_rocks::FlowListener { + let (tx, rx) = mpsc::channel(); + self.flow_info_sender = Some(tx.clone()); + self.flow_info_receiver = Some(rx); + engine_rocks::FlowListener::new(tx) + } +} + +#[cfg(unix)] +fn get_lock_dir() -> String { + format!("{}_TIKV_LOCK_FILES", unsafe { libc::getuid() }) +} + +#[cfg(not(unix))] +fn get_lock_dir() -> String { + "TIKV_LOCK_FILES".to_owned() +} + +fn try_lock_conflict_addr>(path: P) -> File { + let f = File::create(path.as_ref()).unwrap_or_else(|e| { + fatal!( + "failed to create lock at {}: {}", + path.as_ref().display(), + e + ) + }); + + if f.try_lock_exclusive().is_err() { + fatal!( + "{} already in use, maybe another instance is binding with this address.", + path.as_ref().file_name().unwrap().to_str().unwrap() + ); + } + f +} diff --git a/components/server/src/lib.rs b/components/server/src/lib.rs index d5c8e352a88..144cc1885d5 100644 --- a/components/server/src/lib.rs +++ b/components/server/src/lib.rs @@ -9,6 +9,7 @@ extern crate tikv_util; #[macro_use] pub mod setup; +pub mod common; pub mod memory; pub mod raft_engine_switch; pub mod server; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 06df19da1d6..35fc96a3460 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -15,8 +15,7 @@ use std::{ cmp, collections::HashMap, convert::TryFrom, - env, fmt, - net::SocketAddr, + fmt, path::{Path, PathBuf}, str::FromStr, sync::{ @@ -34,11 +33,11 @@ use backup_stream::{ use causal_ts::CausalTsProviderImpl; use cdc::{CdcConfigManager, MemoryQuota}; use concurrency_manager::ConcurrencyManager; -use encryption_export::{data_key_manager_from_config, DataKeyManager}; +use encryption_export::DataKeyManager; use engine_rocks::{ flush_engine_statistics, from_rocks_compression_type, raw::{Cache, Env}, - FlowInfo, RocksEngine, RocksStatistics, + RocksEngine, RocksStatistics, }; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ @@ -46,10 +45,8 @@ use engine_traits::{ RaftEngine, SingletonFactory, StatisticsReporter, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, }; -use error_code::ErrorCodeExt; use file_system::{ - get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor, - MetricsManager as IoMetricsManager, + get_io_rate_limiter, BytesFetcher, IoBudgetAdjustor, MetricsManager as IoMetricsManager, }; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; @@ -137,7 +134,7 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - memory::*, raft_engine_switch::*, setup::*, signal_handler, + common::TikvServerCore, memory::*, raft_engine_switch::*, setup::*, signal_handler, tikv_util::sys::thread::ThreadBuildWrapper, }; @@ -159,16 +156,16 @@ fn run_impl(config: TikvConfig) { let mut tikv = TikvServer::::init::(config); // Must be called after `TikvServer::init`. - let memory_limit = tikv.config.memory_usage_limit.unwrap().0; - let high_water = (tikv.config.memory_usage_high_water * memory_limit as f64) as u64; + let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; + let high_water = (tikv.core.config.memory_usage_high_water * memory_limit as f64) as u64; register_memory_usage_high_water(high_water); - tikv.check_conflict_addr(); - tikv.init_fs(); - tikv.init_yatp(); - tikv.init_encryption(); - let fetcher = tikv.init_io_utility(); - let listener = tikv.init_flow_receiver(); + tikv.core.check_conflict_addr(); + tikv.core.init_fs(); + tikv.core.init_yatp(); + tikv.core.init_encryption(); + let fetcher = tikv.core.init_io_utility(); + let listener = tikv.core.init_flow_receiver(); let (engines, engines_info) = tikv.init_raw_engines(listener); tikv.init_engines(engines.clone()); let server_config = tikv.init_servers::(); @@ -227,18 +224,14 @@ const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); /// A complete TiKV server. struct TikvServer { - config: TikvConfig, + core: TikvServerCore, cfg_controller: Option, security_mgr: Arc, pd_client: Arc, router: RaftRouter, - flow_info_sender: Option>, - flow_info_receiver: Option>, system: Option>, resolver: Option, - store_path: PathBuf, snap_mgr: Option, // Will be filled in `init_servers`. - encryption_key_manager: Option>, engines: Option>, kv_statistics: Option>, raft_statistics: Option>, @@ -246,7 +239,6 @@ struct TikvServer { region_info_accessor: RegionInfoAccessor, coprocessor_host: Option>, to_stop: Vec>, - lock_files: Vec, concurrency_manager: ConcurrencyManager, env: Arc, background_worker: Worker, @@ -401,16 +393,21 @@ where let check_leader_worker = WorkerBuilder::new("check_leader").thread_count(1).create(); TikvServer { - config, + core: TikvServerCore { + config, + store_path, + lock_files: vec![], + encryption_key_manager: None, + flow_info_sender: None, + flow_info_receiver: None, + }, cfg_controller: Some(cfg_controller), security_mgr, pd_client, router, system: Some(system), resolver: None, - store_path, snap_mgr: None, - encryption_key_manager: None, engines: None, kv_statistics: None, raft_statistics: None, @@ -418,13 +415,10 @@ where region_info_accessor, coprocessor_host, to_stop: vec![], - lock_files: vec![], concurrency_manager, env, background_worker, check_leader_worker, - flow_info_sender: None, - flow_info_receiver: None, sst_worker: None, quota_limiter, resource_manager, @@ -505,166 +499,6 @@ where pd_client } - fn check_conflict_addr(&mut self) { - let cur_addr: SocketAddr = self - .config - .server - .addr - .parse() - .expect("failed to parse into a socket address"); - let cur_ip = cur_addr.ip(); - let cur_port = cur_addr.port(); - let lock_dir = get_lock_dir(); - - let search_base = env::temp_dir().join(lock_dir); - file_system::create_dir_all(&search_base) - .unwrap_or_else(|_| panic!("create {} failed", search_base.display())); - - for entry in file_system::read_dir(&search_base).unwrap().flatten() { - if !entry.file_type().unwrap().is_file() { - continue; - } - let file_path = entry.path(); - let file_name = file_path.file_name().unwrap().to_str().unwrap(); - if let Ok(addr) = file_name.replace('_', ":").parse::() { - let ip = addr.ip(); - let port = addr.port(); - if cur_port == port - && (cur_ip == ip || cur_ip.is_unspecified() || ip.is_unspecified()) - { - let _ = try_lock_conflict_addr(file_path); - } - } - } - - let cur_path = search_base.join(cur_addr.to_string().replace(':', "_")); - let cur_file = try_lock_conflict_addr(cur_path); - self.lock_files.push(cur_file); - } - - fn init_fs(&mut self) { - let lock_path = self.store_path.join(Path::new("LOCK")); - - let f = File::create(lock_path.as_path()) - .unwrap_or_else(|e| fatal!("failed to create lock at {}: {}", lock_path.display(), e)); - if f.try_lock_exclusive().is_err() { - fatal!( - "lock {} failed, maybe another instance is using this directory.", - self.store_path.display() - ); - } - self.lock_files.push(f); - - if tikv_util::panic_mark_file_exists(&self.config.storage.data_dir) { - fatal!( - "panic_mark_file {} exists, there must be something wrong with the db. \ - Do not remove the panic_mark_file and force the TiKV node to restart. \ - Please contact TiKV maintainers to investigate the issue. \ - If needed, use scale in and scale out to replace the TiKV node. \ - https://docs.pingcap.com/tidb/stable/scale-tidb-using-tiup", - tikv_util::panic_mark_file_path(&self.config.storage.data_dir).display() - ); - } - - // We truncate a big file to make sure that both raftdb and kvdb of TiKV have - // enough space to do compaction and region migration when TiKV recover. - // This file is created in data_dir rather than db_path, because we must not - // increase store size of db_path. - fn calculate_reserved_space(capacity: u64, reserved_size_from_config: u64) -> u64 { - let mut reserved_size = reserved_size_from_config; - if reserved_size_from_config != 0 { - reserved_size = - cmp::max((capacity as f64 * 0.05) as u64, reserved_size_from_config); - } - reserved_size - } - fn reserve_physical_space(data_dir: &String, available: u64, reserved_size: u64) { - let path = Path::new(data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); - if let Err(e) = file_system::remove_file(path) { - warn!("failed to remove space holder on starting: {}", e); - } - - // place holder file size is 20% of total reserved space. - if available > reserved_size { - file_system::reserve_space_for_recover(data_dir, reserved_size / 5) - .map_err(|e| panic!("Failed to reserve space for recovery: {}.", e)) - .unwrap(); - } else { - warn!("no enough disk space left to create the place holder file"); - } - } - - let disk_stats = fs2::statvfs(&self.config.storage.data_dir).unwrap(); - let mut capacity = disk_stats.total_space(); - if self.config.raft_store.capacity.0 > 0 { - capacity = cmp::min(capacity, self.config.raft_store.capacity.0); - } - // reserve space for kv engine - let kv_reserved_size = - calculate_reserved_space(capacity, self.config.storage.reserve_space.0); - disk::set_disk_reserved_space(kv_reserved_size); - reserve_physical_space( - &self.config.storage.data_dir, - disk_stats.available_space(), - kv_reserved_size, - ); - - let raft_data_dir = if self.config.raft_engine.enable { - self.config.raft_engine.config().dir - } else { - self.config.raft_store.raftdb_path.clone() - }; - - let separated_raft_mount_path = - path_in_diff_mount_point(&self.config.storage.data_dir, &raft_data_dir); - if separated_raft_mount_path { - let raft_disk_stats = fs2::statvfs(&raft_data_dir).unwrap(); - // reserve space for raft engine if raft engine is deployed separately - let raft_reserved_size = calculate_reserved_space( - raft_disk_stats.total_space(), - self.config.storage.reserve_raft_space.0, - ); - disk::set_raft_disk_reserved_space(raft_reserved_size); - reserve_physical_space( - &raft_data_dir, - raft_disk_stats.available_space(), - raft_reserved_size, - ); - } - } - - fn init_yatp(&self) { - yatp::metrics::set_namespace(Some("tikv")); - prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL0_CHANCE.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL_ELAPSED.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::TASK_EXEC_DURATION.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::TASK_POLL_DURATION.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::TASK_EXEC_TIMES.clone())).unwrap(); - } - - fn init_encryption(&mut self) { - self.encryption_key_manager = data_key_manager_from_config( - &self.config.security.encryption, - &self.config.storage.data_dir, - ) - .map_err(|e| { - panic!( - "Encryption failed to initialize: {}. code: {}", - e, - e.error_code() - ) - }) - .unwrap() - .map(Arc::new); - } - - fn init_flow_receiver(&mut self) -> engine_rocks::FlowListener { - let (tx, rx) = mpsc::channel(); - self.flow_info_sender = Some(tx.clone()); - self.flow_info_receiver = Some(rx); - engine_rocks::FlowListener::new(tx) - } - fn init_engines(&mut self, engines: Engines) { let store_meta = Arc::new(Mutex::new(StoreMeta::new(PENDING_MSG_CAP))); let engine = RaftKv::new( @@ -693,8 +527,8 @@ where let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( engines.engine.clone(), - self.flow_info_sender.take().unwrap(), - self.config.gc.clone(), + self.core.flow_info_sender.take().unwrap(), + self.core.config.gc.clone(), self.pd_client.feature_gate().clone(), Arc::new(self.region_info_accessor.clone()), ); @@ -710,9 +544,9 @@ where fn init_servers(&mut self) -> Arc> { let flow_controller = Arc::new(FlowController::Singleton(EngineFlowController::new( - &self.config.storage.flow_control, + &self.core.config.storage.flow_control, self.engines.as_ref().unwrap().engine.kv_engine().unwrap(), - self.flow_info_receiver.take().unwrap(), + self.core.flow_info_receiver.take().unwrap(), ))); let mut gc_worker = self.init_gc_worker(); let mut ttl_checker = Box::new(LazyWorker::new("ttl-checker")); @@ -740,7 +574,7 @@ where .engine .set_txn_extra_scheduler(Arc::new(txn_extra_scheduler)); - let lock_mgr = LockManager::new(&self.config.pessimistic_txn); + let lock_mgr = LockManager::new(&self.core.config.pessimistic_txn); cfg_controller.register( tikv::config::Module::PessimisticTxn, Box::new(lock_mgr.config_manager()), @@ -756,19 +590,23 @@ where let sst_runner = RecoveryRunner::new( engines.engines.kv.clone(), engines.store_meta.clone(), - self.config.storage.background_error_recovery_window.into(), + self.core + .config + .storage + .background_error_recovery_window + .into(), DEFAULT_CHECK_INTERVAL, ); sst_worker.start_with_timer(sst_runner); } - let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { + let unified_read_pool = if self.core.config.readpool.is_unified_pool_enabled() { let resource_ctl = self .resource_manager .as_ref() .map(|m| m.derive_controller("unified-read-pool".into(), true)); Some(build_yatp_read_pool( - &self.config.readpool.unified, + &self.core.config.readpool.unified, pd_sender.clone(), engines.engine.clone(), resource_ctl, @@ -804,16 +642,18 @@ where // Start resource metering. let (recorder_notifier, collector_reg_handle, resource_tag_factory, recorder_worker) = - resource_metering::init_recorder(self.config.resource_metering.precision.as_millis()); + resource_metering::init_recorder( + self.core.config.resource_metering.precision.as_millis(), + ); self.to_stop.push(recorder_worker); let (reporter_notifier, data_sink_reg_handle, reporter_worker) = resource_metering::init_reporter( - self.config.resource_metering.clone(), + self.core.config.resource_metering.clone(), collector_reg_handle.clone(), ); self.to_stop.push(reporter_worker); let (address_change_notifier, single_target_worker) = resource_metering::init_single_target( - self.config.resource_metering.receiver_address.clone(), + self.core.config.resource_metering.receiver_address.clone(), self.env.clone(), data_sink_reg_handle.clone(), ); @@ -821,7 +661,7 @@ where let rsmeter_pubsub_service = resource_metering::PubSubService::new(data_sink_reg_handle); let cfg_manager = resource_metering::ConfigManager::new( - self.config.resource_metering.clone(), + self.core.config.resource_metering.clone(), recorder_notifier, reporter_notifier, address_change_notifier, @@ -831,11 +671,11 @@ where Box::new(cfg_manager), ); - let storage_read_pool_handle = if self.config.readpool.storage.use_unified_pool() { + let storage_read_pool_handle = if self.core.config.readpool.storage.use_unified_pool() { unified_read_pool.as_ref().unwrap().handle() } else { let storage_read_pools = ReadPool::from(storage::build_read_pool( - &self.config.readpool.storage, + &self.core.config.readpool.storage, pd_sender.clone(), engines.engine.clone(), )); @@ -844,7 +684,7 @@ where let storage = Storage::<_, _, F>::from_engine( engines.engine.clone(), - &self.config.storage, + &self.core.config.storage, storage_read_pool_handle, lock_mgr.clone(), self.concurrency_manager.clone(), @@ -882,20 +722,21 @@ where // Create snapshot manager, server. let snap_path = self + .core .store_path .join(Path::new("snap")) .to_str() .unwrap() .to_owned(); - let bps = i64::try_from(self.config.server.snap_io_max_bytes_per_sec.0) + let bps = i64::try_from(self.core.config.server.snap_io_max_bytes_per_sec.0) .unwrap_or_else(|_| fatal!("snap_io_max_bytes_per_sec > i64::max_value")); let snap_mgr = SnapManagerBuilder::default() .max_write_bytes_per_sec(bps) - .max_total_size(self.config.server.snap_max_total_size.0) - .encryption_key_manager(self.encryption_key_manager.clone()) - .max_per_file_size(self.config.raft_store.max_snapshot_file_raw_size.0) + .max_total_size(self.core.config.server.snap_max_total_size.0) + .encryption_key_manager(self.core.encryption_key_manager.clone()) + .max_per_file_size(self.core.config.raft_store.max_snapshot_file_raw_size.0) .enable_multi_snapshot_files( self.pd_client .feature_gate() @@ -904,11 +745,11 @@ where .build(snap_path); // Create coprocessor endpoint. - let cop_read_pool_handle = if self.config.readpool.coprocessor.use_unified_pool() { + let cop_read_pool_handle = if self.core.config.readpool.coprocessor.use_unified_pool() { unified_read_pool.as_ref().unwrap().handle() } else { let cop_read_pools = ReadPool::from(coprocessor::readpool_impl::build_read_pool( - &self.config.readpool.coprocessor, + &self.core.config.readpool.coprocessor, pd_sender, engines.engine.clone(), )); @@ -916,7 +757,7 @@ where }; let mut unified_read_pool_scale_receiver = None; - if self.config.readpool.is_unified_pool_enabled() { + if self.core.config.readpool.is_unified_pool_enabled() { let (unified_read_pool_scale_notifier, rx) = mpsc::sync_channel(10); cfg_controller.register( tikv::config::Module::Readpool, @@ -924,8 +765,8 @@ where unified_read_pool.as_ref().unwrap().handle(), unified_read_pool_scale_notifier, &self.background_worker, - self.config.readpool.unified.max_thread_count, - self.config.readpool.unified.auto_adjust_pool_size, + self.core.config.readpool.unified.max_thread_count, + self.core.config.readpool.unified.auto_adjust_pool_size, )), ); unified_read_pool_scale_receiver = Some(rx); @@ -941,7 +782,7 @@ where ); // Create resolved ts worker - let rts_worker = if self.config.resolved_ts.enable { + let rts_worker = if self.core.config.resolved_ts.enable { let worker = Box::new(LazyWorker::new("resolved-ts")); // Register the resolved ts observer let resolved_ts_ob = resolved_ts::Observer::new(worker.scheduler()); @@ -966,23 +807,24 @@ where .check_leader_worker .start("check-leader", check_leader_runner); - let server_config = Arc::new(VersionTrack::new(self.config.server.clone())); + let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); - self.config + self.core + .config .raft_store .validate( - self.config.coprocessor.region_split_size(), - self.config.coprocessor.enable_region_bucket(), - self.config.coprocessor.region_bucket_size, + self.core.config.coprocessor.region_split_size(), + self.core.config.coprocessor.enable_region_bucket(), + self.core.config.coprocessor.region_bucket_size, ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); - let raft_store = Arc::new(VersionTrack::new(self.config.raft_store.clone())); + let raft_store = Arc::new(VersionTrack::new(self.core.config.raft_store.clone())); let health_service = HealthService::default(); let mut node = Node::new( self.system.take().unwrap(), &server_config.value().clone(), raft_store.clone(), - self.config.storage.api_version(), + self.core.config.storage.api_version(), self.pd_client.clone(), state, self.background_worker.clone(), @@ -1006,7 +848,7 @@ where resource_tag_factory, Arc::clone(&self.quota_limiter), ), - coprocessor_v2::Endpoint::new(&self.config.coprocessor_v2), + coprocessor_v2::Endpoint::new(&self.core.config.coprocessor_v2), self.resolver.clone().unwrap(), Either::Left(snap_mgr.clone()), gc_worker.clone(), @@ -1027,7 +869,7 @@ where ); // Start backup stream - let backup_stream_scheduler = if self.config.log_backup.enable { + let backup_stream_scheduler = if self.core.config.log_backup.enable { // Create backup stream. let mut backup_stream_worker = Box::new(LazyWorker::new("backup-stream")); let backup_stream_scheduler = backup_stream_worker.scheduler(); @@ -1040,7 +882,7 @@ where tikv::config::Module::BackupStream, Box::new(BackupStreamConfigManager::new( backup_stream_worker.scheduler(), - self.config.log_backup.clone(), + self.core.config.log_backup.clone(), )), ); @@ -1050,7 +892,7 @@ where Arc::clone(&self.pd_client), pd_client::meta_storage::Source::LogBackup, ))), - self.config.log_backup.clone(), + self.core.config.log_backup.clone(), backup_stream_scheduler.clone(), backup_stream_ob, self.region_info_accessor.clone(), @@ -1073,22 +915,30 @@ where None }; - let import_path = self.store_path.join("import"); + let import_path = self.core.store_path.join("import"); let mut importer = SstImporter::new( - &self.config.import, + &self.core.config.import, import_path, - self.encryption_key_manager.clone(), - self.config.storage.api_version(), + self.core.encryption_key_manager.clone(), + self.core.config.storage.api_version(), ) .unwrap(); for (cf_name, compression_type) in &[ ( CF_DEFAULT, - self.config.rocksdb.defaultcf.bottommost_level_compression, + self.core + .config + .rocksdb + .defaultcf + .bottommost_level_compression, ), ( CF_WRITE, - self.config.rocksdb.writecf.bottommost_level_compression, + self.core + .config + .rocksdb + .writecf + .bottommost_level_compression, ), ] { importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); @@ -1109,7 +959,7 @@ where ); let split_config_manager = - SplitConfigManager::new(Arc::new(VersionTrack::new(self.config.split.clone()))); + SplitConfigManager::new(Arc::new(VersionTrack::new(self.core.config.split.clone()))); cfg_controller.register( tikv::config::Module::Split, Box::new(split_config_manager.clone()), @@ -1117,14 +967,14 @@ where let auto_split_controller = AutoSplitController::new( split_config_manager, - self.config.server.grpc_concurrency, - self.config.readpool.unified.max_thread_count, + self.core.config.server.grpc_concurrency, + self.core.config.readpool.unified.max_thread_count, unified_read_pool_scale_receiver, ); // `ConsistencyCheckObserver` must be registered before `Node::start`. let safe_point = Arc::new(AtomicU64::new(0)); - let observer = match self.config.coprocessor.consistency_check_method { + let observer = match self.core.config.coprocessor.consistency_check_method { ConsistencyCheckMethod::Mvcc => BoxConsistencyCheckObserver::new( MvccConsistencyCheckObserver::new(safe_point.clone()), ), @@ -1169,22 +1019,22 @@ where fatal!("failed to start auto_gc on storage, error: {}", e); } - initial_metric(&self.config.metric); - if self.config.storage.enable_ttl { + initial_metric(&self.core.config.metric); + if self.core.config.storage.enable_ttl { ttl_checker.start_with_timer(TtlChecker::new( self.engines.as_ref().unwrap().engine.kv_engine().unwrap(), self.region_info_accessor.clone(), - self.config.storage.ttl_check_poll_interval.into(), + self.core.config.storage.ttl_check_poll_interval.into(), )); self.to_stop.push(ttl_checker); } // Start CDC. - let cdc_memory_quota = MemoryQuota::new(self.config.cdc.sink_memory_quota.0 as _); + let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); let cdc_endpoint = cdc::Endpoint::new( - self.config.server.cluster_id, - &self.config.cdc, - self.config.storage.api_version(), + self.core.config.server.cluster_id, + &self.core.config.cdc, + self.core.config.storage.api_version(), self.pd_client.clone(), cdc_scheduler.clone(), self.router.clone(), @@ -1203,7 +1053,7 @@ where // Start resolved ts if let Some(mut rts_worker) = rts_worker { let rts_endpoint = resolved_ts::Endpoint::new( - &self.config.resolved_ts, + &self.core.config.resolved_ts, rts_worker.scheduler(), self.router.clone(), engines.store_meta.clone(), @@ -1244,8 +1094,8 @@ where // Import SST service. let import_service = ImportSstService::new( - self.config.import.clone(), - self.config.raft_store.raft_entry_max_size, + self.core.config.import.clone(), + self.core.config.raft_store.raft_entry_max_size, engines.engine.clone(), LocalTablets::Singleton(engines.engines.kv.clone()), servers.importer.clone(), @@ -1285,8 +1135,8 @@ where // Create Diagnostics service let diag_service = DiagnosticsService::new( servers.server.get_debug_thread_pool().clone(), - self.config.log.file.filename.clone(), - self.config.slow_log_file.clone(), + self.core.config.log.file.filename.clone(), + self.core.config.slow_log_file.clone(), ); if servers .server @@ -1312,7 +1162,7 @@ where self.pd_client.clone(), self.resolver.clone().unwrap(), self.security_mgr.clone(), - &self.config.pessimistic_txn, + &self.core.config.pessimistic_txn, ) .unwrap_or_else(|e| fatal!("failed to start lock manager: {}", e)); @@ -1334,9 +1184,9 @@ where engines.engine.clone(), self.region_info_accessor.clone(), LocalTablets::Singleton(engines.engines.kv.clone()), - self.config.backup.clone(), + self.core.config.backup.clone(), self.concurrency_manager.clone(), - self.config.storage.api_version(), + self.core.config.storage.api_version(), self.causal_ts_provider.clone(), ); self.cfg_controller.as_mut().unwrap().register( @@ -1392,28 +1242,6 @@ where } } - fn init_io_utility(&mut self) -> BytesFetcher { - let stats_collector_enabled = file_system::init_io_stats_collector() - .map_err(|e| warn!("failed to init I/O stats collector: {}", e)) - .is_ok(); - - let limiter = Arc::new( - self.config - .storage - .io_rate_limit - .build(!stats_collector_enabled /* enable_statistics */), - ); - let fetcher = if stats_collector_enabled { - BytesFetcher::FromIoStatsCollector() - } else { - BytesFetcher::FromRateLimiter(limiter.statistics().unwrap()) - }; - // Set up IO limiter even when rate limit is disabled, so that rate limits can - // be dynamically applied later on. - set_io_rate_limiter(Some(limiter)); - fetcher - } - fn init_metrics_flusher( &mut self, fetcher: BytesFetcher, @@ -1422,7 +1250,7 @@ where let mut engine_metrics = EngineMetricsManager::::new( self.tablet_registry.clone().unwrap(), self.kv_statistics.clone(), - self.config.rocksdb.titan.enabled, + self.core.config.rocksdb.titan.enabled, self.engines.as_ref().unwrap().engines.raft.clone(), self.raft_statistics.clone(), ); @@ -1538,9 +1366,9 @@ where } fn init_storage_stats_task(&self, engines: Engines) { - let config_disk_capacity: u64 = self.config.raft_store.capacity.0; - let data_dir = self.config.storage.data_dir.clone(); - let store_path = self.store_path.clone(); + let config_disk_capacity: u64 = self.core.config.raft_store.capacity.0; + let data_dir = self.core.config.storage.data_dir.clone(); + let store_path = self.core.store_path.clone(); let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); let reserve_raft_space = disk::get_raft_disk_reserved_space(); @@ -1669,6 +1497,7 @@ where fn init_sst_recovery_sender(&mut self) -> Option> { if !self + .core .config .storage .background_error_recovery_window @@ -1697,14 +1526,14 @@ where fn run_status_server(&mut self) { // Create a status server. - let status_enabled = !self.config.server.status_addr.is_empty(); + let status_enabled = !self.core.config.server.status_addr.is_empty(); if status_enabled { let mut status_server = match StatusServer::new( - self.config.server.status_thread_pool_size, + self.core.config.server.status_thread_pool_size, self.cfg_controller.take().unwrap(), - Arc::new(self.config.security.clone()), + Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), - self.store_path.clone(), + self.core.store_path.clone(), self.resource_manager.clone(), ) { Ok(status_server) => Box::new(status_server), @@ -1714,7 +1543,7 @@ where } }; // Start the status server. - if let Err(e) = status_server.start(self.config.server.status_addr.clone()) { + if let Err(e) = status_server.start(self.core.config.server.status_addr.clone()) { error_unknown!(%e; "failed to bind addr for status service"); } else { self.to_stop.push(status_server); @@ -1859,26 +1688,31 @@ impl TikvServer { flow_listener: engine_rocks::FlowListener, ) -> (Engines, Arc) { let block_cache = self + .core .config .storage .block_cache - .build_shared_cache(self.config.storage.engine); + .build_shared_cache(self.core.config.storage.engine); let env = self + .core .config - .build_shared_rocks_env(self.encryption_key_manager.clone(), get_io_rate_limiter()) + .build_shared_rocks_env( + self.core.encryption_key_manager.clone(), + get_io_rate_limiter(), + ) .unwrap(); // Create raft engine let (raft_engine, raft_statistics) = CER::build( - &self.config, + &self.core.config, &env, - &self.encryption_key_manager, + &self.core.encryption_key_manager, &block_cache, ); self.raft_statistics = raft_statistics; // Create kv engine. - let builder = KvEngineFactoryBuilder::new(env, &self.config, block_cache) + let builder = KvEngineFactoryBuilder::new(env, &self.core.config, block_cache) .compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { router: Mutex::new(self.router.clone()), })) @@ -1887,7 +1721,7 @@ impl TikvServer { .flow_listener(flow_listener); let factory = Box::new(builder.build()); let kv_engine = factory - .create_shared_db(&self.store_path) + .create_shared_db(&self.core.store_path) .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); self.kv_statistics = Some(factory.rocks_statistics()); let engines = Engines::new(kv_engine.clone(), raft_engine); @@ -1897,8 +1731,11 @@ impl TikvServer { tikv::config::Module::Rocksdb, Box::new(DbConfigManger::new(kv_engine.clone(), DbType::Kv)), ); - let reg = TabletRegistry::new(Box::new(SingletonFactory::new(kv_engine)), &self.store_path) - .unwrap(); + let reg = TabletRegistry::new( + Box::new(SingletonFactory::new(kv_engine)), + &self.core.store_path, + ) + .unwrap(); // It always use the singleton kv_engine, use arbitrary id and suffix. let ctx = TabletContext::with_infinite_region(0, Some(0)); reg.load(ctx, false).unwrap(); @@ -1977,34 +1814,6 @@ fn check_system_config(config: &TikvConfig) { } } -fn try_lock_conflict_addr>(path: P) -> File { - let f = File::create(path.as_ref()).unwrap_or_else(|e| { - fatal!( - "failed to create lock at {}: {}", - path.as_ref().display(), - e - ) - }); - - if f.try_lock_exclusive().is_err() { - fatal!( - "{} already in use, maybe another instance is binding with this address.", - path.as_ref().file_name().unwrap().to_str().unwrap() - ); - } - f -} - -#[cfg(unix)] -fn get_lock_dir() -> String { - format!("{}_TIKV_LOCK_FILES", unsafe { libc::getuid() }) -} - -#[cfg(not(unix))] -fn get_lock_dir() -> String { - "TIKV_LOCK_FILES".to_owned() -} - /// A small trait for components which can be trivially stopped. Lets us keep /// a list of these in `TiKV`, rather than storing each component individually. pub(crate) trait Stop { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 6c96ce62ffb..8bc898d50b4 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -14,8 +14,6 @@ use std::{ cmp, collections::HashMap, - env, - net::SocketAddr, path::{Path, PathBuf}, str::FromStr, sync::{ @@ -29,20 +27,18 @@ use std::{ use api_version::{dispatch_api_version, KvFormat}; use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; -use encryption_export::{data_key_manager_from_config, DataKeyManager}; +use encryption_export::DataKeyManager; use engine_rocks::{ flush_engine_statistics, from_rocks_compression_type, raw::{Cache, Env}, - FlowInfo, RocksEngine, RocksStatistics, + RocksEngine, RocksStatistics, }; use engine_traits::{ CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, RaftEngine, StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, }; -use error_code::ErrorCodeExt; use file_system::{ - get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor, - MetricsManager as IoMetricsManager, + get_io_rate_limiter, BytesFetcher, IoBudgetAdjustor, MetricsManager as IoMetricsManager, }; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; @@ -117,8 +113,8 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - memory::*, raft_engine_switch::*, server::Stop, setup::*, signal_handler, - tikv_util::sys::thread::ThreadBuildWrapper, + common::TikvServerCore, memory::*, raft_engine_switch::*, server::Stop, setup::*, + signal_handler, tikv_util::sys::thread::ThreadBuildWrapper, }; // minimum number of core kept for background requests @@ -139,16 +135,16 @@ fn run_impl(config: TikvConfig) { let mut tikv = TikvServer::::init::(config); // Must be called after `TikvServer::init`. - let memory_limit = tikv.config.memory_usage_limit.unwrap().0; - let high_water = (tikv.config.memory_usage_high_water * memory_limit as f64) as u64; + let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; + let high_water = (tikv.core.config.memory_usage_high_water * memory_limit as f64) as u64; register_memory_usage_high_water(high_water); - tikv.check_conflict_addr(); - tikv.init_fs(); - tikv.init_yatp(); - tikv.init_encryption(); - let fetcher = tikv.init_io_utility(); - let listener = tikv.init_flow_receiver(); + tikv.core.check_conflict_addr(); + tikv.core.init_fs(); + tikv.core.init_yatp(); + tikv.core.init_encryption(); + let fetcher = tikv.core.init_io_utility(); + let listener = tikv.core.init_flow_receiver(); let engines_info = tikv.init_engines(listener); let server_config = tikv.init_servers::(); tikv.register_services(); @@ -207,18 +203,14 @@ const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); /// A complete TiKV server. struct TikvServer { - config: TikvConfig, + core: TikvServerCore, cfg_controller: Option, security_mgr: Arc, pd_client: Arc, - flow_info_sender: Option>, - flow_info_receiver: Option>, router: Option>, node: Option>, resolver: Option, - store_path: PathBuf, snap_mgr: Option, // Will be filled in `init_servers`. - encryption_key_manager: Option>, engines: Option>, kv_statistics: Option>, raft_statistics: Option>, @@ -226,7 +218,6 @@ struct TikvServer { region_info_accessor: Option, coprocessor_host: Option>, to_stop: Vec>, - lock_files: Vec, concurrency_manager: ConcurrencyManager, env: Arc, background_worker: Worker, @@ -341,16 +332,21 @@ where let check_leader_worker = WorkerBuilder::new("check_leader").thread_count(1).create(); TikvServer { - config, + core: TikvServerCore { + config, + store_path, + lock_files: vec![], + encryption_key_manager: None, + flow_info_sender: None, + flow_info_receiver: None, + }, cfg_controller: Some(cfg_controller), security_mgr, pd_client, router: None, node: None, resolver: None, - store_path, snap_mgr: None, - encryption_key_manager: None, engines: None, kv_statistics: None, raft_statistics: None, @@ -358,13 +354,10 @@ where region_info_accessor: None, coprocessor_host: None, to_stop: vec![], - lock_files: vec![], concurrency_manager, env, background_worker, check_leader_worker, - flow_info_sender: None, - flow_info_receiver: None, sst_worker: None, quota_limiter, resource_manager, @@ -444,172 +437,12 @@ where pd_client } - fn check_conflict_addr(&mut self) { - let cur_addr: SocketAddr = self - .config - .server - .addr - .parse() - .expect("failed to parse into a socket address"); - let cur_ip = cur_addr.ip(); - let cur_port = cur_addr.port(); - let lock_dir = get_lock_dir(); - - let search_base = env::temp_dir().join(lock_dir); - file_system::create_dir_all(&search_base) - .unwrap_or_else(|_| panic!("create {} failed", search_base.display())); - - for entry in file_system::read_dir(&search_base).unwrap().flatten() { - if !entry.file_type().unwrap().is_file() { - continue; - } - let file_path = entry.path(); - let file_name = file_path.file_name().unwrap().to_str().unwrap(); - if let Ok(addr) = file_name.replace('_', ":").parse::() { - let ip = addr.ip(); - let port = addr.port(); - if cur_port == port - && (cur_ip == ip || cur_ip.is_unspecified() || ip.is_unspecified()) - { - let _ = try_lock_conflict_addr(file_path); - } - } - } - - let cur_path = search_base.join(cur_addr.to_string().replace(':', "_")); - let cur_file = try_lock_conflict_addr(cur_path); - self.lock_files.push(cur_file); - } - - fn init_fs(&mut self) { - let lock_path = self.store_path.join(Path::new("LOCK")); - - let f = File::create(lock_path.as_path()) - .unwrap_or_else(|e| fatal!("failed to create lock at {}: {}", lock_path.display(), e)); - if f.try_lock_exclusive().is_err() { - fatal!( - "lock {} failed, maybe another instance is using this directory.", - self.store_path.display() - ); - } - self.lock_files.push(f); - - if tikv_util::panic_mark_file_exists(&self.config.storage.data_dir) { - fatal!( - "panic_mark_file {} exists, there must be something wrong with the db. \ - Do not remove the panic_mark_file and force the TiKV node to restart. \ - Please contact TiKV maintainers to investigate the issue. \ - If needed, use scale in and scale out to replace the TiKV node. \ - https://docs.pingcap.com/tidb/stable/scale-tidb-using-tiup", - tikv_util::panic_mark_file_path(&self.config.storage.data_dir).display() - ); - } - - // We truncate a big file to make sure that both raftdb and kvdb of TiKV have - // enough space to do compaction and region migration when TiKV recover. - // This file is created in data_dir rather than db_path, because we must not - // increase store size of db_path. - fn calculate_reserved_space(capacity: u64, reserved_size_from_config: u64) -> u64 { - let mut reserved_size = reserved_size_from_config; - if reserved_size_from_config != 0 { - reserved_size = - cmp::max((capacity as f64 * 0.05) as u64, reserved_size_from_config); - } - reserved_size - } - fn reserve_physical_space(data_dir: &String, available: u64, reserved_size: u64) { - let path = Path::new(data_dir).join(file_system::SPACE_PLACEHOLDER_FILE); - if let Err(e) = file_system::remove_file(path) { - warn!("failed to remove space holder on starting: {}", e); - } - - // place holder file size is 20% of total reserved space. - if available > reserved_size { - file_system::reserve_space_for_recover(data_dir, reserved_size / 5) - .map_err(|e| panic!("Failed to reserve space for recovery: {}.", e)) - .unwrap(); - } else { - warn!("no enough disk space left to create the place holder file"); - } - } - - let disk_stats = fs2::statvfs(&self.config.storage.data_dir).unwrap(); - let mut capacity = disk_stats.total_space(); - if self.config.raft_store.capacity.0 > 0 { - capacity = cmp::min(capacity, self.config.raft_store.capacity.0); - } - // reserve space for kv engine - let kv_reserved_size = - calculate_reserved_space(capacity, self.config.storage.reserve_space.0); - disk::set_disk_reserved_space(kv_reserved_size); - reserve_physical_space( - &self.config.storage.data_dir, - disk_stats.available_space(), - kv_reserved_size, - ); - - let raft_data_dir = if self.config.raft_engine.enable { - self.config.raft_engine.config().dir - } else { - self.config.raft_store.raftdb_path.clone() - }; - - let separated_raft_mount_path = - path_in_diff_mount_point(&self.config.storage.data_dir, &raft_data_dir); - if separated_raft_mount_path { - let raft_disk_stats = fs2::statvfs(&raft_data_dir).unwrap(); - // reserve space for raft engine if raft engine is deployed separately - let raft_reserved_size = calculate_reserved_space( - raft_disk_stats.total_space(), - self.config.storage.reserve_raft_space.0, - ); - disk::set_raft_disk_reserved_space(raft_reserved_size); - reserve_physical_space( - &raft_data_dir, - raft_disk_stats.available_space(), - raft_reserved_size, - ); - } - } - - fn init_yatp(&self) { - yatp::metrics::set_namespace(Some("tikv")); - prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL0_CHANCE.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::MULTILEVEL_LEVEL_ELAPSED.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::TASK_EXEC_DURATION.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::TASK_POLL_DURATION.clone())).unwrap(); - prometheus::register(Box::new(yatp::metrics::TASK_EXEC_TIMES.clone())).unwrap(); - } - - fn init_encryption(&mut self) { - self.encryption_key_manager = data_key_manager_from_config( - &self.config.security.encryption, - &self.config.storage.data_dir, - ) - .map_err(|e| { - panic!( - "Encryption failed to initialize: {}. code: {}", - e, - e.error_code() - ) - }) - .unwrap() - .map(Arc::new); - } - - fn init_flow_receiver(&mut self) -> engine_rocks::FlowListener { - let (tx, rx) = mpsc::channel(); - self.flow_info_sender = Some(tx.clone()); - self.flow_info_receiver = Some(rx); - engine_rocks::FlowListener::new(tx) - } - fn init_gc_worker(&mut self) -> GcWorker> { let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( engines.engine.clone(), - self.flow_info_sender.take().unwrap(), - self.config.gc.clone(), + self.core.flow_info_sender.take().unwrap(), + self.core.config.gc.clone(), self.pd_client.feature_gate().clone(), Arc::new(self.region_info_accessor.clone().unwrap()), ); @@ -625,9 +458,9 @@ where fn init_servers(&mut self) -> Arc> { let flow_controller = Arc::new(FlowController::Tablet(TabletFlowController::new( - &self.config.storage.flow_control, + &self.core.config.storage.flow_control, self.tablet_registry.clone().unwrap(), - self.flow_info_receiver.take().unwrap(), + self.core.flow_info_receiver.take().unwrap(), ))); let mut gc_worker = self.init_gc_worker(); let ttl_checker = Box::new(LazyWorker::new("ttl-checker")); @@ -644,7 +477,7 @@ where cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); - let lock_mgr = LockManager::new(&self.config.pessimistic_txn); + let lock_mgr = LockManager::new(&self.core.config.pessimistic_txn); cfg_controller.register( tikv::config::Module::PessimisticTxn, Box::new(lock_mgr.config_manager()), @@ -659,13 +492,13 @@ where slog_global::borrow_global().new(slog::o!()), ); - let unified_read_pool = if self.config.readpool.is_unified_pool_enabled() { + let unified_read_pool = if self.core.config.readpool.is_unified_pool_enabled() { let resource_ctl = self .resource_manager .as_ref() .map(|m| m.derive_controller("unified-read-pool".into(), true)); Some(build_yatp_read_pool( - &self.config.readpool.unified, + &self.core.config.readpool.unified, pd_sender.clone(), engines.engine.clone(), resource_ctl, @@ -701,16 +534,18 @@ where // Start resource metering. let (recorder_notifier, collector_reg_handle, resource_tag_factory, recorder_worker) = - resource_metering::init_recorder(self.config.resource_metering.precision.as_millis()); + resource_metering::init_recorder( + self.core.config.resource_metering.precision.as_millis(), + ); self.to_stop.push(recorder_worker); let (reporter_notifier, data_sink_reg_handle, reporter_worker) = resource_metering::init_reporter( - self.config.resource_metering.clone(), + self.core.config.resource_metering.clone(), collector_reg_handle.clone(), ); self.to_stop.push(reporter_worker); let (address_change_notifier, single_target_worker) = resource_metering::init_single_target( - self.config.resource_metering.receiver_address.clone(), + self.core.config.resource_metering.receiver_address.clone(), self.env.clone(), data_sink_reg_handle.clone(), ); @@ -718,7 +553,7 @@ where let rsmeter_pubsub_service = resource_metering::PubSubService::new(data_sink_reg_handle); let cfg_manager = resource_metering::ConfigManager::new( - self.config.resource_metering.clone(), + self.core.config.resource_metering.clone(), recorder_notifier, reporter_notifier, address_change_notifier, @@ -728,11 +563,11 @@ where Box::new(cfg_manager), ); - let storage_read_pool_handle = if self.config.readpool.storage.use_unified_pool() { + let storage_read_pool_handle = if self.core.config.readpool.storage.use_unified_pool() { unified_read_pool.as_ref().unwrap().handle() } else { let storage_read_pools = ReadPool::from(storage::build_read_pool( - &self.config.readpool.storage, + &self.core.config.readpool.storage, pd_sender.clone(), engines.engine.clone(), )); @@ -741,7 +576,7 @@ where let storage = Storage::<_, _, F>::from_engine( engines.engine.clone(), - &self.config.storage, + &self.core.config.storage, storage_read_pool_handle, lock_mgr.clone(), self.concurrency_manager.clone(), @@ -779,6 +614,7 @@ where // Create snapshot manager, server. let snap_path = self + .core .store_path .join(Path::new("tablet_snap")) .to_str() @@ -791,11 +627,11 @@ where }; // Create coprocessor endpoint. - let cop_read_pool_handle = if self.config.readpool.coprocessor.use_unified_pool() { + let cop_read_pool_handle = if self.core.config.readpool.coprocessor.use_unified_pool() { unified_read_pool.as_ref().unwrap().handle() } else { let cop_read_pools = ReadPool::from(coprocessor::readpool_impl::build_read_pool( - &self.config.readpool.coprocessor, + &self.core.config.readpool.coprocessor, pd_sender, engines.engine.clone(), )); @@ -803,7 +639,7 @@ where }; let mut unified_read_pool_scale_receiver = None; - if self.config.readpool.is_unified_pool_enabled() { + if self.core.config.readpool.is_unified_pool_enabled() { let (unified_read_pool_scale_notifier, rx) = mpsc::sync_channel(10); cfg_controller.register( tikv::config::Module::Readpool, @@ -811,8 +647,8 @@ where unified_read_pool.as_ref().unwrap().handle(), unified_read_pool_scale_notifier, &self.background_worker, - self.config.readpool.unified.max_thread_count, - self.config.readpool.unified.auto_adjust_pool_size, + self.core.config.readpool.unified.max_thread_count, + self.core.config.readpool.unified.auto_adjust_pool_size, )), ); unified_read_pool_scale_receiver = Some(rx); @@ -826,17 +662,18 @@ where .check_leader_worker .start("check-leader", check_leader_runner); - let server_config = Arc::new(VersionTrack::new(self.config.server.clone())); + let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); - self.config + self.core + .config .raft_store .validate( - self.config.coprocessor.region_split_size(), - self.config.coprocessor.enable_region_bucket(), - self.config.coprocessor.region_bucket_size, + self.core.config.coprocessor.region_split_size(), + self.core.config.coprocessor.enable_region_bucket(), + self.core.config.coprocessor.region_bucket_size, ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); - let raft_store = Arc::new(VersionTrack::new(self.config.raft_store.clone())); + let raft_store = Arc::new(VersionTrack::new(self.core.config.raft_store.clone())); let health_service = HealthService::default(); let node = self.node.as_ref().unwrap(); @@ -855,7 +692,7 @@ where resource_tag_factory, Arc::clone(&self.quota_limiter), ), - coprocessor_v2::Endpoint::new(&self.config.coprocessor_v2), + coprocessor_v2::Endpoint::new(&self.core.config.coprocessor_v2), self.resolver.clone().unwrap(), Either::Right(snap_mgr.clone()), gc_worker.clone(), @@ -875,22 +712,30 @@ where )), ); - let import_path = self.store_path.join("import"); + let import_path = self.core.store_path.join("import"); let mut importer = SstImporter::new( - &self.config.import, + &self.core.config.import, import_path, - self.encryption_key_manager.clone(), - self.config.storage.api_version(), + self.core.encryption_key_manager.clone(), + self.core.config.storage.api_version(), ) .unwrap(); for (cf_name, compression_type) in &[ ( CF_DEFAULT, - self.config.rocksdb.defaultcf.bottommost_level_compression, + self.core + .config + .rocksdb + .defaultcf + .bottommost_level_compression, ), ( CF_WRITE, - self.config.rocksdb.writecf.bottommost_level_compression, + self.core + .config + .rocksdb + .writecf + .bottommost_level_compression, ), ] { importer.set_compression_type(cf_name, from_rocks_compression_type(*compression_type)); @@ -900,7 +745,7 @@ where // V2 starts split-check worker within raftstore. let split_config_manager = - SplitConfigManager::new(Arc::new(VersionTrack::new(self.config.split.clone()))); + SplitConfigManager::new(Arc::new(VersionTrack::new(self.core.config.split.clone()))); cfg_controller.register( tikv::config::Module::Split, Box::new(split_config_manager.clone()), @@ -908,14 +753,14 @@ where let auto_split_controller = AutoSplitController::new( split_config_manager, - self.config.server.grpc_concurrency, - self.config.readpool.unified.max_thread_count, + self.core.config.server.grpc_concurrency, + self.core.config.readpool.unified.max_thread_count, unified_read_pool_scale_receiver, ); // `ConsistencyCheckObserver` must be registered before `Node::start`. let safe_point = Arc::new(AtomicU64::new(0)); - let observer = match self.config.coprocessor.consistency_check_method { + let observer = match self.core.config.coprocessor.consistency_check_method { ConsistencyCheckMethod::Mvcc => BoxConsistencyCheckObserver::new( MvccConsistencyCheckObserver::new(safe_point.clone()), ), @@ -966,7 +811,7 @@ where fatal!("failed to start auto_gc on storage, error: {}", e); } - initial_metric(&self.config.metric); + initial_metric(&self.core.config.metric); self.servers = Some(Servers { lock_mgr, @@ -999,9 +844,9 @@ where engines.engine.clone(), self.region_info_accessor.clone().unwrap(), LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), - self.config.backup.clone(), + self.core.config.backup.clone(), self.concurrency_manager.clone(), - self.config.storage.api_version(), + self.core.config.storage.api_version(), self.causal_ts_provider.clone(), ); self.cfg_controller.as_mut().unwrap().register( @@ -1012,8 +857,8 @@ where // Import SST service. let import_service = ImportSstService::new( - self.config.import.clone(), - self.config.raft_store.raft_entry_max_size, + self.core.config.import.clone(), + self.core.config.raft_store.raft_entry_max_size, engines.engine.clone(), LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), servers.importer.clone(), @@ -1036,8 +881,8 @@ where // Create Diagnostics service let diag_service = DiagnosticsService::new( servers.server.get_debug_thread_pool().clone(), - self.config.log.file.filename.clone(), - self.config.slow_log_file.clone(), + self.core.config.log.file.filename.clone(), + self.core.config.slow_log_file.clone(), ); if servers .server @@ -1063,7 +908,7 @@ where self.pd_client.clone(), self.resolver.clone().unwrap(), self.security_mgr.clone(), - &self.config.pessimistic_txn, + &self.core.config.pessimistic_txn, ) .unwrap_or_else(|e| fatal!("failed to start lock manager: {}", e)); @@ -1078,28 +923,6 @@ where } } - fn init_io_utility(&mut self) -> BytesFetcher { - let stats_collector_enabled = file_system::init_io_stats_collector() - .map_err(|e| warn!("failed to init I/O stats collector: {}", e)) - .is_ok(); - - let limiter = Arc::new( - self.config - .storage - .io_rate_limit - .build(!stats_collector_enabled /* enable_statistics */), - ); - let fetcher = if stats_collector_enabled { - BytesFetcher::FromIoStatsCollector() - } else { - BytesFetcher::FromRateLimiter(limiter.statistics().unwrap()) - }; - // Set up IO limiter even when rate limit is disabled, so that rate limits can - // be dynamically applied later on. - set_io_rate_limiter(Some(limiter)); - fetcher - } - fn init_metrics_flusher( &mut self, fetcher: BytesFetcher, @@ -1108,7 +931,7 @@ where let mut engine_metrics = EngineMetricsManager::::new( self.tablet_registry.clone().unwrap(), self.kv_statistics.clone(), - self.config.rocksdb.titan.enabled, + self.core.config.rocksdb.titan.enabled, self.engines.as_ref().unwrap().raft_engine.clone(), self.raft_statistics.clone(), ); @@ -1224,9 +1047,9 @@ where } fn init_storage_stats_task(&self) { - let config_disk_capacity: u64 = self.config.raft_store.capacity.0; - let data_dir = self.config.storage.data_dir.clone(); - let store_path = self.store_path.clone(); + let config_disk_capacity: u64 = self.core.config.raft_store.capacity.0; + let data_dir = self.core.config.storage.data_dir.clone(); + let store_path = self.core.store_path.clone(); let snap_mgr = self.snap_mgr.clone().unwrap(); let reserve_space = disk::get_disk_reserved_space(); let reserve_raft_space = disk::get_raft_disk_reserved_space(); @@ -1359,6 +1182,7 @@ where fn init_sst_recovery_sender(&mut self) -> Option> { if !self + .core .config .storage .background_error_recovery_window @@ -1391,14 +1215,14 @@ where fn run_status_server(&mut self) { // Create a status server. - let status_enabled = !self.config.server.status_addr.is_empty(); + let status_enabled = !self.core.config.server.status_addr.is_empty(); if status_enabled { let mut status_server = match StatusServer::new( - self.config.server.status_thread_pool_size, + self.core.config.server.status_thread_pool_size, self.cfg_controller.take().unwrap(), - Arc::new(self.config.security.clone()), + Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), - self.store_path.clone(), + self.core.store_path.clone(), self.resource_manager.clone(), ) { Ok(status_server) => Box::new(status_server), @@ -1408,7 +1232,7 @@ where } }; // Start the status server. - if let Err(e) = status_server.start(self.config.server.status_addr.clone()) { + if let Err(e) = status_server.start(self.core.config.server.status_addr.clone()) { error_unknown!(%e; "failed to bind addr for status service"); } else { self.to_stop.push(status_server); @@ -1553,31 +1377,36 @@ impl TikvServer { flow_listener: engine_rocks::FlowListener, ) -> Arc { let block_cache = self + .core .config .storage .block_cache - .build_shared_cache(self.config.storage.engine); + .build_shared_cache(self.core.config.storage.engine); let env = self + .core .config - .build_shared_rocks_env(self.encryption_key_manager.clone(), get_io_rate_limiter()) + .build_shared_rocks_env( + self.core.encryption_key_manager.clone(), + get_io_rate_limiter(), + ) .unwrap(); // Create raft engine let (raft_engine, raft_statistics) = CER::build( - &self.config, + &self.core.config, &env, - &self.encryption_key_manager, + &self.core.encryption_key_manager, &block_cache, ); self.raft_statistics = raft_statistics; // Create kv engine. - let builder = KvEngineFactoryBuilder::new(env, &self.config, block_cache) + let builder = KvEngineFactoryBuilder::new(env, &self.core.config, block_cache) .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); - let mut node = NodeV2::new(&self.config.server, self.pd_client.clone(), None); - node.try_bootstrap_store(&self.config.raft_store, &raft_engine) + let mut node = NodeV2::new(&self.core.config.server, self.pd_client.clone(), None); + node.try_bootstrap_store(&self.core.config.raft_store, &raft_engine) .unwrap_or_else(|e| fatal!("failed to bootstrap store: {:?}", e)); assert_ne!(node.id(), 0); @@ -1590,7 +1419,7 @@ impl TikvServer { ))); let factory = Box::new(builder.build()); self.kv_statistics = Some(factory.rocks_statistics()); - let registry = TabletRegistry::new(factory, self.store_path.join("tablets")) + let registry = TabletRegistry::new(factory, self.core.store_path.join("tablets")) .unwrap_or_else(|e| fatal!("failed to create tablet registry {:?}", e)); let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( @@ -1609,7 +1438,7 @@ impl TikvServer { let router = RaftRouter::new(node.id(), router); let mut coprocessor_host: CoprocessorHost = CoprocessorHost::new( router.store_router().clone(), - self.config.coprocessor.clone(), + self.core.config.coprocessor.clone(), ); let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); @@ -1690,34 +1519,6 @@ fn check_system_config(config: &TikvConfig) { } } -fn try_lock_conflict_addr>(path: P) -> File { - let f = File::create(path.as_ref()).unwrap_or_else(|e| { - fatal!( - "failed to create lock at {}: {}", - path.as_ref().display(), - e - ) - }); - - if f.try_lock_exclusive().is_err() { - fatal!( - "{} already in use, maybe another instance is binding with this address.", - path.as_ref().file_name().unwrap().to_str().unwrap() - ); - } - f -} - -#[cfg(unix)] -fn get_lock_dir() -> String { - format!("{}_TIKV_LOCK_FILES", unsafe { libc::getuid() }) -} - -#[cfg(not(unix))] -fn get_lock_dir() -> String { - "TIKV_LOCK_FILES".to_owned() -} - pub struct EngineMetricsManager { tablet_registry: TabletRegistry, kv_statistics: Option>, diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 0f352ebc5bf..307b399b29e 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1218,7 +1218,7 @@ impl Cluster { pub fn apply_state(&self, region_id: u64, store_id: u64) -> RaftApplyState { self.get_engine(store_id) - .get_apply_state(region_id) + .raft_apply_state(region_id) .unwrap() .unwrap() } @@ -1568,13 +1568,15 @@ impl Cluster { pub fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { self.get_engine(store_id) - .get_region_state(region_id) + .region_local_state(region_id) .unwrap() .unwrap() } pub fn get_raft_local_state(&self, region_id: u64, store_id: u64) -> Option { - self.get_engine(store_id).get_raft_local_state(region_id) + self.get_engine(store_id) + .raft_local_state(region_id) + .unwrap() } pub fn raft_local_state(&self, region_id: u64, store_id: u64) -> RaftLocalState { @@ -1667,21 +1669,6 @@ impl WrapFactory { let region_id = self.region_id_of_key(key); self.tablet_registry.get(region_id)?.latest().cloned() } - - pub fn get_region_state( - &self, - region_id: u64, - ) -> engine_traits::Result> { - self.raft_engine.get_region_state(region_id, u64::MAX) - } - - pub fn get_apply_state(&self, region_id: u64) -> engine_traits::Result> { - self.raft_engine.get_apply_state(region_id, u64::MAX) - } - - pub fn get_raft_local_state(&self, region_id: u64) -> Option { - self.raft_engine.get_raft_state(region_id).unwrap() - } } impl Peekable for WrapFactory { @@ -1694,10 +1681,10 @@ impl Peekable for WrapFactory { ) -> engine_traits::Result> { let region_id = self.region_id_of_key(key); - if let Ok(Some(state)) = self.get_region_state(region_id) - && state.state == PeerState::Tombstone - { - return Ok(None); + if let Ok(Some(state)) = self.region_local_state(region_id) { + if state.state == PeerState::Tombstone { + return Ok(None); + } } match self.get_tablet(key) { @@ -1714,10 +1701,10 @@ impl Peekable for WrapFactory { ) -> engine_traits::Result> { let region_id = self.region_id_of_key(key); - if let Ok(Some(state)) = self.get_region_state(region_id) - && state.state == PeerState::Tombstone - { - return Ok(None); + if let Ok(Some(state)) = self.region_local_state(region_id) { + if state.state == PeerState::Tombstone { + return Ok(None); + } } match self.get_tablet(key) { @@ -1783,6 +1770,14 @@ impl RawEngine for WrapFactory { &self, region_id: u64, ) -> engine_traits::Result> { - self.get_region_state(region_id) + self.raft_engine.get_region_state(region_id, u64::MAX) + } + + fn raft_apply_state(&self, region_id: u64) -> engine_traits::Result> { + self.raft_engine.get_apply_state(region_id, u64::MAX) + } + + fn raft_local_state(&self, region_id: u64) -> engine_traits::Result> { + self.raft_engine.get_raft_state(region_id) } } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 2a73f5e239c..14bf1d280d5 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1951,9 +1951,9 @@ pub trait RawEngine: Peekable + SyncMutable { fn region_local_state(&self, region_id: u64) -> engine_traits::Result>; - fn raft_apply_state(&self, _region_id: u64) -> engine_traits::Result> { - unimplemented!() - } + fn raft_apply_state(&self, _region_id: u64) -> engine_traits::Result>; + + fn raft_local_state(&self, _region_id: u64) -> engine_traits::Result>; } impl RawEngine for RocksEngine { @@ -1963,4 +1963,12 @@ impl RawEngine for RocksEngine { ) -> engine_traits::Result> { self.get_msg_cf(CF_RAFT, &keys::region_state_key(region_id)) } + + fn raft_apply_state(&self, region_id: u64) -> engine_traits::Result> { + self.get_msg_cf(CF_RAFT, &keys::apply_state_key(region_id)) + } + + fn raft_local_state(&self, region_id: u64) -> engine_traits::Result> { + self.get_msg_cf(CF_RAFT, &keys::raft_state_key(region_id)) + } } From ee13695952073c51e6a036c03aef427c3c46538f Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 31 Mar 2023 15:28:55 +0800 Subject: [PATCH 0613/1149] raftstore-v2: change log level (#14500) ref tikv/tikv#12842 Signed-off-by: Spade A --- components/raftstore-v2/src/operation/command/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 0337c0cf32a..0ae2f1741c3 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -43,7 +43,7 @@ use raftstore::{ }, Error, Result, }; -use slog::{error, info, warn}; +use slog::{debug, error, info, warn}; use tikv_util::{ box_err, log::SlogFormat, @@ -335,7 +335,7 @@ impl Peer { ) { if !self.serving() || !apply_res.admin_result.is_empty() { // TODO: remove following log once stable. - info!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); + debug!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); } // It must just applied a snapshot. if apply_res.applied_index < self.entry_storage().first_index() { From d42aa0b47b8a4a39fa91529b350111cea9441b35 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 31 Mar 2023 17:24:55 +0800 Subject: [PATCH 0614/1149] engine: update raft-engine (#14495) close tikv/tikv#14468 Update the dependency to `raft-engine` lib, to fix the bug that the size of `prefill-for-recycle` is not adaptive to dynamic regions. Signed-off-by: Lucasliang --- Cargo.lock | 118 ++++++++++++++++++++--- components/raft_log_engine/src/engine.rs | 6 +- metrics/grafana/tikv_details.json | 4 +- 3 files changed, 112 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62746ba6bcb..e8162267354 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,6 +52,17 @@ dependencies = [ "version_check 0.9.4", ] +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", + "version_check 0.9.4", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -648,7 +659,7 @@ dependencies = [ "cexpr 0.6.0", "clang-sys", "clap 2.33.0", - "env_logger", + "env_logger 0.9.0", "lazy_static", "lazycell", "log", @@ -1650,6 +1661,40 @@ dependencies = [ "termcolor", ] +[[package]] +name = "env_logger" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc 0.2.139", + "winapi 0.3.9", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc 0.2.139", +] + [[package]] name = "error-chain" version = "0.12.1" @@ -2349,11 +2394,11 @@ checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] name = "hashbrown" -version = "0.12.0" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c21d40587b92fa6a6c6e3c1bdbf87d75511db5672f9c93175574b3a00df1758" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash", + "ahash 0.8.3", ] [[package]] @@ -2380,6 +2425,15 @@ dependencies = [ "libc 0.2.139", ] +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc 0.2.139", +] + [[package]] name = "hex" version = "0.3.2" @@ -2565,7 +2619,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ - "ahash", + "ahash 0.7.4", "atty", "indexmap", "itoa 1.0.1", @@ -2615,6 +2669,16 @@ dependencies = [ "raft", ] +[[package]] +name = "io-lifetimes" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" +dependencies = [ + "libc 0.2.139", + "windows-sys 0.42.0", +] + [[package]] name = "iovec" version = "0.1.4" @@ -2639,6 +2703,18 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +dependencies = [ + "hermit-abi 0.2.6", + "io-lifetimes", + "rustix", + "windows-sys 0.42.0", +] + [[package]] name = "itertools" version = "0.10.0" @@ -2902,6 +2978,12 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + [[package]] name = "lock_api" version = "0.4.6" @@ -3457,7 +3539,7 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.3", "libc 0.2.139", ] @@ -4186,14 +4268,14 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#404e3fefaeeb4da6b7650268d500cfd3fbd29cae" +source = "git+https://github.com/tikv/raft-engine.git#39f4db451295dbd8b30db4f94f220182c2c65be9" dependencies = [ "byteorder", "crc32fast", "crossbeam", "fail", "fs2", - "hashbrown 0.12.0", + "hashbrown 0.13.2", "hex 0.4.2", "if_chain", "lazy_static", @@ -4220,10 +4302,10 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#404e3fefaeeb4da6b7650268d500cfd3fbd29cae" +source = "git+https://github.com/tikv/raft-engine.git#39f4db451295dbd8b30db4f94f220182c2c65be9" dependencies = [ "clap 3.1.6", - "env_logger", + "env_logger 0.10.0", "raft-engine", ] @@ -4744,7 +4826,7 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f06953bb8b9e4307cb7ccc0d9d018e2ddd25a30d32831f631ce4fe8f17671f7" dependencies = [ - "ahash", + "ahash 0.7.4", "bitflags", "instant", "num-traits", @@ -4960,6 +5042,20 @@ dependencies = [ "semver 1.0.4", ] +[[package]] +name = "rustix" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc 0.2.139", + "linux-raw-sys", + "windows-sys 0.42.0", +] + [[package]] name = "rustversion" version = "1.0.4" diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index a9e75ca9580..7b107bc0cc9 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -23,7 +23,7 @@ use kvproto::{ }; use raft::eraftpb::Entry; use raft_engine::{ - env::{DefaultFileSystem, FileSystem, Handle, WriteExt}, + env::{DefaultFileSystem, FileSystem, Handle, Permission, WriteExt}, Command, Engine as RawRaftEngine, Error as RaftEngineError, LogBatch, MessageExt, }; pub use raft_engine::{Config as RaftEngineConfig, ReadableSize, RecoveryMode}; @@ -180,10 +180,10 @@ impl FileSystem for ManagedFileSystem { }) } - fn open>(&self, path: P) -> IoResult { + fn open>(&self, path: P, perm: Permission) -> IoResult { Ok(ManagedHandle { path: path.as_ref().to_path_buf(), - base: Arc::new(self.base_file_system.open(path.as_ref())?), + base: Arc::new(self.base_file_system.open(path.as_ref(), perm)?), }) } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 9600222547e..184ad7a756b 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -31980,9 +31980,9 @@ }, { "exemplar": true, - "expr": "avg(raft_engine_recycled_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "expr": "avg(raft_engine_recycled_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", "intervalFactor": 1, - "legendFormat": "recycle", + "legendFormat": "{{type}} - recycle", "refId": "C" } ], From 503174b571788f01aef154157cafd53bacd6b860 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Tue, 4 Apr 2023 11:56:56 +0800 Subject: [PATCH 0615/1149] Parameterize Simulator and Cluster in test_raftstore-v2 (#14493) ref tikv/tikv#14401 Simulator and Cluster in test_raftstore-v2 Signed-off-by: CalvinNeo Co-authored-by: Xinye Tao --- Cargo.toml | 2 +- components/engine_panic/Cargo.toml | 3 + components/engine_panic/src/engine.rs | 4 + components/engine_rocks/Cargo.toml | 1 + components/engine_rocks/src/engine.rs | 5 ++ components/engine_traits/src/engine.rs | 5 ++ components/server/src/common.rs | 34 ++++++++ components/server/src/server.rs | 41 ++-------- components/server/src/server2.rs | 45 ++-------- components/test_raftstore-v2/src/cluster.rs | 91 +++++++++++++-------- components/test_raftstore-v2/src/node.rs | 24 ++++-- components/test_raftstore-v2/src/server.rs | 64 +++++++++++---- components/test_raftstore-v2/src/util.rs | 28 ++++--- components/test_raftstore/src/cluster.rs | 8 +- components/test_raftstore/src/util.rs | 24 ++++-- 15 files changed, 227 insertions(+), 152 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f8e67d70c04..1bd9377d5f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ cloud-azure = [ "encryption_export/cloud-azure", "sst_importer/cloud-azure", ] -testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport"] +testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport"] test-engine-kv-rocksdb = [ "engine_test/test-engine-kv-rocksdb" ] diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index 55e42f2595f..ec77e2b715f 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -5,6 +5,9 @@ description = "An example TiKV storage engine that does nothing but panic" edition = "2018" publish = false +[features] +testexport = [] + [dependencies] engine_traits = { workspace = true } kvproto = { workspace = true } diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index 6bca7d46485..d8faf8fee01 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -22,6 +22,10 @@ impl KvEngine for PanicEngine { fn bad_downcast(&self) -> &T { panic!() } + #[cfg(any(test, feature = "testexport"))] + fn inner_refcount(&self) -> usize { + panic!() + } } impl Peekable for PanicEngine { diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index 4c2b7bf5a52..d31ed947520 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -10,6 +10,7 @@ jemalloc = ["rocksdb/jemalloc"] portable = ["rocksdb/portable"] sse = ["rocksdb/sse"] failpoints = ["fail/failpoints"] +testexport = [] # Disables runtime checks of invariants required by RocksDB that are redundant # with assertions inside RocksDB itself. This makes it possible to test those diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 6499880490f..6c6231ca42f 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -194,6 +194,11 @@ impl KvEngine for RocksEngine { let e: &dyn Any = &self.db; e.downcast_ref().expect("bad engine downcast") } + + #[cfg(any(test, feature = "testexport"))] + fn inner_refcount(&self) -> usize { + Arc::strong_count(&self.db) + } } impl Iterable for RocksEngine { diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index e76765e2ed6..aa90c23b429 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -66,4 +66,9 @@ pub trait KvEngine: fn can_apply_snapshot(&self, _is_timeout: bool, _new_batch: bool, _region_id: u64) -> bool { true } + + /// A method for test to expose inner db refcount in order to make sure a + /// full release of engine. + #[cfg(any(test, feature = "testexport"))] + fn inner_refcount(&self) -> usize; } diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 5c6dfa16120..67044dafc00 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -236,3 +236,37 @@ fn try_lock_conflict_addr>(path: P) -> File { } f } + +const RESERVED_OPEN_FDS: u64 = 1000; +pub fn check_system_config(config: &TikvConfig) { + info!("beginning system configuration check"); + let mut rocksdb_max_open_files = config.rocksdb.max_open_files; + if config.rocksdb.titan.enabled { + // Titan engine maintains yet another pool of blob files and uses the same max + // number of open files setup as rocksdb does. So we double the max required + // open files here + rocksdb_max_open_files *= 2; + } + if let Err(e) = tikv_util::config::check_max_open_fds( + RESERVED_OPEN_FDS + (rocksdb_max_open_files + config.raftdb.max_open_files) as u64, + ) { + fatal!("{}", e); + } + + // Check RocksDB data dir + if let Err(e) = tikv_util::config::check_data_dir(&config.storage.data_dir) { + warn!( + "check: rocksdb-data-dir"; + "path" => &config.storage.data_dir, + "err" => %e + ); + } + // Check raft data dir + if let Err(e) = tikv_util::config::check_data_dir(&config.raft_store.raftdb_path) { + warn!( + "check: raftdb-path"; + "path" => &config.raft_store.raftdb_path, + "err" => %e + ); + } +} diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 35fc96a3460..718ebbc0b3b 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -134,7 +134,11 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - common::TikvServerCore, memory::*, raft_engine_switch::*, setup::*, signal_handler, + common::{check_system_config, TikvServerCore}, + memory::*, + raft_engine_switch::*, + setup::*, + signal_handler, tikv_util::sys::thread::ThreadBuildWrapper, }; @@ -214,8 +218,6 @@ pub fn run_tikv(config: TikvConfig) { }) } -const RESERVED_OPEN_FDS: u64 = 1000; - const DEFAULT_METRICS_FLUSH_INTERVAL: Duration = Duration::from_millis(10_000); const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); @@ -1781,39 +1783,6 @@ fn pre_start() { } } -fn check_system_config(config: &TikvConfig) { - info!("beginning system configuration check"); - let mut rocksdb_max_open_files = config.rocksdb.max_open_files; - if config.rocksdb.titan.enabled { - // Titan engine maintains yet another pool of blob files and uses the same max - // number of open files setup as rocksdb does. So we double the max required - // open files here - rocksdb_max_open_files *= 2; - } - if let Err(e) = tikv_util::config::check_max_open_fds( - RESERVED_OPEN_FDS + (rocksdb_max_open_files + config.raftdb.max_open_files) as u64, - ) { - fatal!("{}", e); - } - - // Check RocksDB data dir - if let Err(e) = tikv_util::config::check_data_dir(&config.storage.data_dir) { - warn!( - "check: rocksdb-data-dir"; - "path" => &config.storage.data_dir, - "err" => %e - ); - } - // Check raft data dir - if let Err(e) = tikv_util::config::check_data_dir(&config.raft_store.raftdb_path) { - warn!( - "check: raftdb-path"; - "path" => &config.raft_store.raftdb_path, - "err" => %e - ); - } -} - /// A small trait for components which can be trivially stopped. Lets us keep /// a list of these in `TiKV`, rather than storing each component individually. pub(crate) trait Stop { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 8bc898d50b4..31ced8547ea 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -113,8 +113,13 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - common::TikvServerCore, memory::*, raft_engine_switch::*, server::Stop, setup::*, - signal_handler, tikv_util::sys::thread::ThreadBuildWrapper, + common::{check_system_config, TikvServerCore}, + memory::*, + raft_engine_switch::*, + server::Stop, + setup::*, + signal_handler, + tikv_util::sys::thread::ThreadBuildWrapper, }; // minimum number of core kept for background requests @@ -193,8 +198,6 @@ pub fn run_tikv(config: TikvConfig) { }) } -const RESERVED_OPEN_FDS: u64 = 1000; - const DEFAULT_METRICS_FLUSH_INTERVAL: Duration = Duration::from_millis(10_000); const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); @@ -1485,40 +1488,6 @@ fn pre_start() { ); } } - -fn check_system_config(config: &TikvConfig) { - info!("beginning system configuration check"); - let mut rocksdb_max_open_files = config.rocksdb.max_open_files; - if config.rocksdb.titan.enabled { - // Titan engine maintains yet another pool of blob files and uses the same max - // number of open files setup as rocksdb does. So we double the max required - // open files here - rocksdb_max_open_files *= 2; - } - if let Err(e) = tikv_util::config::check_max_open_fds( - RESERVED_OPEN_FDS + (rocksdb_max_open_files + config.raftdb.max_open_files) as u64, - ) { - fatal!("{}", e); - } - - // Check RocksDB data dir - if let Err(e) = tikv_util::config::check_data_dir(&config.storage.data_dir) { - warn!( - "check: rocksdb-data-dir"; - "path" => &config.storage.data_dir, - "err" => %e - ); - } - // Check raft data dir - if let Err(e) = tikv_util::config::check_data_dir(&config.raft_store.raftdb_path) { - warn!( - "check: raftdb-path"; - "path" => &config.raft_store.raftdb_path, - "err" => %e - ); - } -} - pub struct EngineMetricsManager { tablet_registry: TabletRegistry, kv_statistics: Option>, diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 307b399b29e..015062534e4 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -10,11 +10,11 @@ use std::{ use collections::{HashMap, HashSet}; use encryption_export::DataKeyManager; -use engine_rocks::{RocksDbVector, RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - Iterable, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, ReadOptions, - SyncMutable, TabletRegistry, CF_DEFAULT, + KvEngine, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, ReadOptions, SyncMutable, + TabletRegistry, CF_DEFAULT, }; use file_system::IoRateLimiter; use futures::{compat::Future01CompatExt, executor::block_on, select, Future, FutureExt}; @@ -65,13 +65,11 @@ use tikv_util::{ HandyRwLock, }; -use crate::create_test_engine; - // We simulate 3 or 5 nodes, each has a store. // Sometimes, we use fixed id to test, which means the id // isn't allocated by pd, and node id, store id are same. // E,g, for node 1, the node id and store id are both 1. -pub trait Simulator { +pub trait Simulator { // Pass 0 to let pd allocate a node id if db is empty. // If node id > 0, the node must be created in db already, // and the node id must be the same as given argument. @@ -81,10 +79,10 @@ pub trait Simulator { &mut self, node_id: u64, cfg: Config, - store_meta: Arc>>, + store_meta: Arc>>, key_mgr: Option>, raft_engine: RaftTestEngine, - tablet_registry: TabletRegistry, + tablet_registry: TabletRegistry, resource_manager: &Option>, ) -> ServerResult; @@ -97,7 +95,7 @@ pub trait Simulator { fn add_recv_filter(&mut self, node_id: u64, filter: Box); fn clear_recv_filters(&mut self, node_id: u64); - fn get_router(&self, node_id: u64) -> Option>; + fn get_router(&self, node_id: u64) -> Option>; fn get_snap_dir(&self, node_id: u64) -> String; fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()>; @@ -183,9 +181,7 @@ pub trait Simulator { fn async_snapshot( &mut self, request: RaftCmdRequest, - ) -> impl Future< - Output = std::result::Result, RaftCmdResponse>, - > + Send; + ) -> impl Future, RaftCmdResponse>> + Send; fn async_peer_msg_on_node(&self, node_id: u64, region_id: u64, msg: PeerMsg) -> Result<()>; @@ -313,16 +309,16 @@ pub trait Simulator { } } -pub struct Cluster { +pub struct Cluster, EK: KvEngine> { pub cfg: Config, leaders: HashMap, pub count: usize, pub paths: Vec, - pub engines: Vec<(TabletRegistry, RaftTestEngine)>, - pub tablet_registries: HashMap>, + pub engines: Vec<(TabletRegistry, RaftTestEngine)>, + pub tablet_registries: HashMap>, pub raft_engines: HashMap, - pub store_metas: HashMap>>>, + pub store_metas: HashMap>>>, key_managers: Vec>>, pub io_rate_limiter: Option>, key_managers_map: HashMap>>, @@ -334,16 +330,46 @@ pub struct Cluster { pub sim: Arc>, pub pd_client: Arc, resource_manager: Option>, + pub engine_creator: Box< + dyn Fn( + Option<(u64, u64)>, + Option>, + &Config, + ) -> ( + TabletRegistry, + RaftTestEngine, + Option>, + TempDir, + LazyWorker, + Arc, + Option>, + ), + >, } -impl Cluster { +impl, EK: KvEngine> Cluster { pub fn new( id: u64, count: usize, sim: Arc>, pd_client: Arc, api_version: ApiVersion, - ) -> Cluster { + engine_creator: Box< + dyn Fn( + Option<(u64, u64)>, + Option>, + &Config, + ) -> ( + TabletRegistry, + RaftTestEngine, + Option>, + TempDir, + LazyWorker, + Arc, + Option>, + ), + >, + ) -> Cluster { Cluster { cfg: Config { tikv: new_tikv_config_with_api_ver(id, api_version), @@ -367,6 +393,7 @@ impl Cluster { resource_manager: Some(Arc::new(ResourceGroupManager::default())), sim, pd_client, + engine_creator, } } @@ -417,7 +444,7 @@ impl Cluster { // id indicates cluster id store_id fn create_engine(&mut self, id: Option<(u64, u64)>) { let (reg, raft_engine, key_manager, dir, sst_worker, kv_statistics, raft_statistics) = - create_test_engine(id, self.io_rate_limiter.clone(), &self.cfg); + (self.engine_creator)(id, self.io_rate_limiter.clone(), &self.cfg); self.engines.push((reg, raft_engine)); self.key_managers.push(key_manager); self.paths.push(dir); @@ -536,7 +563,7 @@ impl Cluster { if let Some(tablet) = tablet.latest() { let mut tried = 0; while tried < 10 { - if Arc::strong_count(tablet.as_inner()) <= 3 { + if tablet.inner_refcount() <= 3 { break; } thread::sleep(Duration::from_millis(10)); @@ -632,7 +659,7 @@ impl Cluster { } } - pub fn get_engine(&self, node_id: u64) -> WrapFactory { + pub fn get_engine(&self, node_id: u64) -> WrapFactory { WrapFactory::new( self.pd_client.clone(), self.raft_engines[&node_id].clone(), @@ -1503,7 +1530,7 @@ impl Cluster { self.sim.rl().get_snap_dir(node_id) } - pub fn get_router(&self, node_id: u64) -> Option> { + pub fn get_router(&self, node_id: u64) -> Option> { self.sim.rl().get_router(node_id) } @@ -1632,24 +1659,24 @@ pub fn bootstrap_store( Ok(()) } -impl Drop for Cluster { +impl, EK: KvEngine> Drop for Cluster { fn drop(&mut self) { test_util::clear_failpoints(); self.shutdown(); } } -pub struct WrapFactory { +pub struct WrapFactory { pd_client: Arc, raft_engine: RaftTestEngine, - tablet_registry: TabletRegistry, + tablet_registry: TabletRegistry, } -impl WrapFactory { +impl WrapFactory { pub fn new( pd_client: Arc, raft_engine: RaftTestEngine, - tablet_registry: TabletRegistry, + tablet_registry: TabletRegistry, ) -> Self { Self { raft_engine, @@ -1664,15 +1691,15 @@ impl WrapFactory { self.pd_client.get_region(key).unwrap().get_id() } - fn get_tablet(&self, key: &[u8]) -> Option { + fn get_tablet(&self, key: &[u8]) -> Option { // todo: unwrap let region_id = self.region_id_of_key(key); self.tablet_registry.get(region_id)?.latest().cloned() } } -impl Peekable for WrapFactory { - type DbVector = RocksDbVector; +impl Peekable for WrapFactory { + type DbVector = EK::DbVector; fn get_value_opt( &self, @@ -1722,7 +1749,7 @@ impl Peekable for WrapFactory { } } -impl SyncMutable for WrapFactory { +impl SyncMutable for WrapFactory { fn put(&self, key: &[u8], value: &[u8]) -> engine_traits::Result<()> { match self.get_tablet(key) { Some(tablet) => tablet.put(key, value), @@ -1765,7 +1792,7 @@ impl SyncMutable for WrapFactory { } } -impl RawEngine for WrapFactory { +impl RawEngine for WrapFactory { fn region_local_state( &self, region_id: u64, diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 058a9caf186..a02af6ad177 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -166,7 +166,7 @@ impl NodeCluster { } } -impl Simulator for NodeCluster { +impl Simulator for NodeCluster { fn get_node_ids(&self) -> HashSet { self.nodes.keys().cloned().collect() } @@ -439,16 +439,30 @@ impl Simulator for NodeCluster { // Compare to server cluster, node cluster does not have server layer and // storage layer. -pub fn new_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_node_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); - Cluster::new(id, count, sim, pd_client, ApiVersion::V1) + Cluster::new( + id, + count, + sim, + pd_client, + ApiVersion::V1, + Box::new(&crate::create_test_engine), + ) } // This cluster does not support batch split, we expect it to transfer the // `BatchSplit` request to `split` request -pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); - Cluster::new(id, count, sim, pd_client, ApiVersion::V1) + Cluster::new( + id, + count, + sim, + pd_client, + ApiVersion::V1, + Box::new(&crate::create_test_engine), + ) } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index ed2a44d80fa..921d3b991ab 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -677,7 +677,7 @@ impl ServerCluster { } } -impl Simulator for ServerCluster { +impl Simulator for ServerCluster { fn get_node_ids(&self) -> HashSet { self.metas.keys().cloned().collect() } @@ -805,7 +805,7 @@ impl Simulator for ServerCluster { } } -impl Cluster { +impl Cluster { pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { let mut try_snapshot = || -> Option> { let leader = self.leader_of_region(region_id)?; @@ -833,35 +833,60 @@ impl Cluster { } } -pub fn new_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_server_cluster(id: u64, count: usize) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); - Cluster::new(id, count, sim, pd_client, ApiVersion::V1) + Cluster::new( + id, + count, + sim, + pd_client, + ApiVersion::V1, + Box::new(crate::create_test_engine), + ) } -pub fn new_incompatible_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_incompatible_server_cluster( + id: u64, + count: usize, +) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); - Cluster::new(id, count, sim, pd_client, ApiVersion::V1) + Cluster::new( + id, + count, + sim, + pd_client, + ApiVersion::V1, + Box::new(crate::create_test_engine), + ) } pub fn new_server_cluster_with_api_ver( id: u64, count: usize, api_ver: ApiVersion, -) -> Cluster { +) -> Cluster { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); - Cluster::new(id, count, sim, pd_client, api_ver) + Cluster::new( + id, + count, + sim, + pd_client, + api_ver, + Box::new(crate::create_test_engine), + ) } -pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, Context) { +pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, Context) +{ must_new_cluster_and_kv_client_mul(1) } pub fn must_new_cluster_and_kv_client_mul( count: usize, -) -> (Cluster, TikvClient, Context) { +) -> (Cluster, TikvClient, Context) { let (cluster, leader, ctx) = must_new_cluster_mul(count); let env = Arc::new(Environment::new(1)); @@ -871,14 +896,16 @@ pub fn must_new_cluster_and_kv_client_mul( (cluster, client, ctx) } -pub fn must_new_cluster_mul(count: usize) -> (Cluster, metapb::Peer, Context) { +pub fn must_new_cluster_mul( + count: usize, +) -> (Cluster, metapb::Peer, Context) { must_new_and_configure_cluster_mul(count, |_| ()) } fn must_new_and_configure_cluster_mul( count: usize, - mut configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + mut configure: impl FnMut(&mut Cluster), +) -> (Cluster, metapb::Peer, Context) { let mut cluster = new_server_cluster(0, count); configure(&mut cluster); cluster.run(); @@ -894,8 +921,8 @@ fn must_new_and_configure_cluster_mul( } pub fn must_new_and_configure_cluster_and_kv_client( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, TikvClient, Context) { + configure: impl FnMut(&mut Cluster), +) -> (Cluster, TikvClient, Context) { let (cluster, leader, ctx) = must_new_and_configure_cluster(configure); let env = Arc::new(Environment::new(1)); @@ -907,12 +934,13 @@ pub fn must_new_and_configure_cluster_and_kv_client( } pub fn must_new_and_configure_cluster( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + configure: impl FnMut(&mut Cluster), +) -> (Cluster, metapb::Peer, Context) { must_new_and_configure_cluster_mul(1, configure) } -pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClient, u64) { +pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClient, u64) +{ let (cluster, leader, _) = must_new_cluster_mul(1); let env = Arc::new(Environment::new(1)); diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index e2cc88c569c..9f68beaad35 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -5,7 +5,7 @@ use std::{fmt::Write, sync::Arc, thread, time::Duration}; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; -use engine_traits::{TabletRegistry, CF_DEFAULT}; +use engine_traits::{KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; use futures::Future; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; @@ -86,16 +86,16 @@ pub fn create_test_engine( } /// Keep putting random kvs until specified size limit is reached. -pub fn put_till_size( - cluster: &mut Cluster, +pub fn put_till_size, EK: KvEngine>( + cluster: &mut Cluster, limit: u64, range: &mut dyn Iterator, ) -> Vec { put_cf_till_size(cluster, CF_DEFAULT, limit, range) } -pub fn put_cf_till_size( - cluster: &mut Cluster, +pub fn put_cf_till_size, EK: KvEngine>( + cluster: &mut Cluster, cf: &'static str, limit: u64, range: &mut dyn Iterator, @@ -134,8 +134,8 @@ pub fn configure_for_snapshot(config: &mut Config) { config.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); } -pub fn configure_for_lease_read_v2( - cluster: &mut Cluster, +pub fn configure_for_lease_read_v2, EK: KvEngine>( + cluster: &mut Cluster, base_tick_ms: Option, election_ticks: Option, ) -> Duration { @@ -162,7 +162,11 @@ pub fn configure_for_lease_read_v2( election_timeout } -pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, region_id: u64) { +pub fn wait_for_synced( + cluster: &mut Cluster, + node_id: u64, + region_id: u64, +) { let mut storage = cluster .sim .read() @@ -193,8 +197,8 @@ pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, regio } // Issue a read request on the specified peer. -pub fn read_on_peer( - cluster: &mut Cluster, +pub fn read_on_peer, EK: KvEngine>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -211,8 +215,8 @@ pub fn read_on_peer( cluster.read(None, request, timeout) } -pub fn async_read_on_peer( - cluster: &mut Cluster, +pub fn async_read_on_peer, EK: KvEngine>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 14bf1d280d5..988625d3750 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -12,7 +12,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; -use engine_rocks::{RocksDbVector, RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, @@ -1947,7 +1947,9 @@ impl Drop for Cluster { } } -pub trait RawEngine: Peekable + SyncMutable { +pub trait RawEngine: + Peekable + SyncMutable +{ fn region_local_state(&self, region_id: u64) -> engine_traits::Result>; @@ -1956,7 +1958,7 @@ pub trait RawEngine: Peekable + SyncMutable { fn raft_local_state(&self, _region_id: u64) -> engine_traits::Result>; } -impl RawEngine for RocksEngine { +impl RawEngine for RocksEngine { fn region_local_state( &self, region_id: u64, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 5c9d9ac5d54..81753d49600 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -16,8 +16,8 @@ use encryption_export::{ use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - CfNamesExt, Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, CF_DEFAULT, - CF_RAFT, + CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, + CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::executor::block_on; @@ -60,7 +60,12 @@ use txn_types::Key; use crate::{Cluster, Config, RawEngine, ServerCluster, Simulator}; -pub fn must_get(engine: &impl RawEngine, cf: &str, key: &[u8], value: Option<&[u8]>) { +pub fn must_get( + engine: &impl RawEngine, + cf: &str, + key: &[u8], + value: Option<&[u8]>, +) { for _ in 1..300 { let res = engine.get_value_cf(cf, &keys::data_key(key)).unwrap(); if let (Some(value), Some(res)) = (value, res.as_ref()) { @@ -86,19 +91,24 @@ pub fn must_get(engine: &impl RawEngine, cf: &str, key: &[u8], value: Option<&[u ) } -pub fn must_get_equal(engine: &impl RawEngine, key: &[u8], value: &[u8]) { +pub fn must_get_equal(engine: &impl RawEngine, key: &[u8], value: &[u8]) { must_get(engine, "default", key, Some(value)); } -pub fn must_get_none(engine: &impl RawEngine, key: &[u8]) { +pub fn must_get_none(engine: &impl RawEngine, key: &[u8]) { must_get(engine, "default", key, None); } -pub fn must_get_cf_equal(engine: &impl RawEngine, cf: &str, key: &[u8], value: &[u8]) { +pub fn must_get_cf_equal( + engine: &impl RawEngine, + cf: &str, + key: &[u8], + value: &[u8], +) { must_get(engine, cf, key, Some(value)); } -pub fn must_get_cf_none(engine: &impl RawEngine, cf: &str, key: &[u8]) { +pub fn must_get_cf_none(engine: &impl RawEngine, cf: &str, key: &[u8]) { must_get(engine, cf, key, None); } From b778db2e4766a71edd24023d409bd778961120f0 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 7 Apr 2023 05:24:57 +0800 Subject: [PATCH 0616/1149] raftstore-v2: thread name fix (#14461) ref tikv/tikv#12842 Signed-off-by: Spade A --- components/causal_ts/src/tso.rs | 2 +- components/raftstore-v2/src/batch/store.rs | 2 +- components/raftstore-v2/src/worker/tablet_flush.rs | 3 ++- components/server/src/server.rs | 2 +- components/server/src/server2.rs | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/components/causal_ts/src/tso.rs b/components/causal_ts/src/tso.rs index 2c99d8c068a..51f1824f7a6 100644 --- a/components/causal_ts/src/tso.rs +++ b/components/causal_ts/src/tso.rs @@ -355,7 +355,7 @@ impl BatchTsoProvider { let s = Self { pd_client: pd_client.clone(), batch_list: Arc::new(TsoBatchList::new(cache_multiplier)), - causal_ts_worker: WorkerBuilder::new("causal_ts_batch_tso_worker").create(), + causal_ts_worker: WorkerBuilder::new("causal-ts-batch-tso-worker").create(), renew_interval, renew_parameter, renew_request_tx, diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index fe152bb3990..66b0414b7c3 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -506,7 +506,7 @@ struct Workers { impl Workers { fn new(background: Worker, pd: LazyWorker, purge: Option) -> Self { - let tablet_flush = WorkerBuilder::new("tablet_flush-worker") + let tablet_flush = WorkerBuilder::new("tablet-flush-worker") .thread_count(2) .create(); Self { diff --git a/components/raftstore-v2/src/worker/tablet_flush.rs b/components/raftstore-v2/src/worker/tablet_flush.rs index c53296a5cb6..e7d2c534f80 100644 --- a/components/raftstore-v2/src/worker/tablet_flush.rs +++ b/components/raftstore-v2/src/worker/tablet_flush.rs @@ -3,7 +3,7 @@ use std::fmt::{Display, Formatter}; use engine_traits::{KvEngine, RaftEngine, TabletRegistry, DATA_CFS}; -use kvproto::raft_cmdpb::RaftCmdRequest; +use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; use slog::{error, info, Logger}; use tikv_util::{time::Instant, worker::Runnable}; use txn_types::WriteBatchFlags; @@ -79,6 +79,7 @@ impl Runner { } let mut req = req.unwrap(); + assert!(req.get_admin_request().get_cmd_type() == AdminCmdType::BatchSplit); req.mut_header() .set_flags(WriteBatchFlags::SPLIT_SECOND_PHASE.bits()); if let Err(e) = self.router.send( diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 718ebbc0b3b..be5edf0cf41 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -392,7 +392,7 @@ where // Run check leader in a dedicate thread, because it is time sensitive // and crucial to TiCDC replication lag. - let check_leader_worker = WorkerBuilder::new("check_leader").thread_count(1).create(); + let check_leader_worker = WorkerBuilder::new("check-leader").thread_count(1).create(); TikvServer { core: TikvServerCore { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 31ced8547ea..81ec94207a9 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -332,7 +332,7 @@ where // Run check leader in a dedicate thread, because it is time sensitive // and crucial to TiCDC replication lag. - let check_leader_worker = WorkerBuilder::new("check_leader").thread_count(1).create(); + let check_leader_worker = WorkerBuilder::new("check-leader").thread_count(1).create(); TikvServer { core: TikvServerCore { From f1d2de3580ec1d74a8d9b107b8729dffb953afee Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 7 Apr 2023 13:58:58 +0800 Subject: [PATCH 0617/1149] server, test: Refactor NodeCluster and ServerCluster (#14512) ref tikv/tikv#14401 Parameterize NodeCluster and ServerCluster Signed-off-by: CalvinNeo Co-authored-by: Xinye Tao --- components/server/src/common.rs | 525 ++++++++++++++++++- components/server/src/server.rs | 571 ++------------------- components/server/src/server2.rs | 534 ++----------------- components/test_raftstore-v2/src/node.rs | 54 +- components/test_raftstore-v2/src/server.rs | 153 +++--- components/test_raftstore-v2/src/util.rs | 4 +- components/test_raftstore/src/util.rs | 2 +- 7 files changed, 723 insertions(+), 1120 deletions(-) diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 67044dafc00..2d2ae7bd398 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -1,22 +1,73 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +//! This mod is exported to make convenience for creating TiKV-like servers. + use std::{ - cmp, env, + cmp, + collections::HashMap, + env, fmt, net::SocketAddr, path::{Path, PathBuf}, - sync::{mpsc, Arc}, + sync::{ + atomic::{AtomicU32, Ordering}, + mpsc, Arc, + }, + time::Duration, u64, }; use encryption_export::{data_key_manager_from_config, DataKeyManager}; -use engine_rocks::FlowInfo; +use engine_rocks::{ + flush_engine_statistics, + raw::{Cache, Env}, + FlowInfo, RocksEngine, RocksStatistics, +}; +use engine_traits::{ + CachedTablet, CfOptionsExt, FlowControlFactorsExt, KvEngine, RaftEngine, StatisticsReporter, + TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, +}; use error_code::ErrorCodeExt; -use file_system::{set_io_rate_limiter, BytesFetcher, File}; -use tikv::config::TikvConfig; -use tikv_util::sys::{disk, path_in_diff_mount_point}; +use file_system::{get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor}; +use grpcio::Environment; +use pd_client::{PdClient, RpcClient}; +use raft_log_engine::RaftLogEngine; +use security::SecurityManager; +use tikv::{ + config::{ConfigController, DbConfigManger, DbType, TikvConfig}, + server::{status_server::StatusServer, DEFAULT_CLUSTER_ID}, +}; +use tikv_util::{ + config::{ensure_dir_exist, RaftDataStateMachine}, + math::MovingAvgU32, + metrics::INSTANCE_BACKEND_CPU_QUOTA, + quota_limiter::QuotaLimiter, + sys::{cpu_time::ProcessStat, disk, path_in_diff_mount_point, SysQuota}, + time::Instant, + worker::{LazyWorker, Worker}, +}; + +use crate::{raft_engine_switch::*, setup::validate_and_persist_config}; -/// This is the common layer of TiKV-like servers. By holding it in its own -/// TikvServer implementation, one can easily access the common ability of a -/// TiKV server. +// minimum number of core kept for background requests +const BACKGROUND_REQUEST_CORE_LOWER_BOUND: f64 = 1.0; +// max ratio of core quota for background requests +const BACKGROUND_REQUEST_CORE_MAX_RATIO: f64 = 0.95; +// default ratio of core quota for background requests = core_number * 0.5 +const BACKGROUND_REQUEST_CORE_DEFAULT_RATIO: f64 = 0.5; +// indication of TiKV instance is short of cpu +const SYSTEM_BUSY_THRESHOLD: f64 = 0.80; +// indication of TiKV instance in healthy state when cpu usage is in [0.5, 0.80) +const SYSTEM_HEALTHY_THRESHOLD: f64 = 0.50; +// pace of cpu quota adjustment +const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu +const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); + +/// This is the common part of TiKV-like servers. It is a collection of all +/// capabilities a TikvServer should have or may take advantage of. By holding +/// it in its own TikvServer implementation, one can easily access the common +/// ability of a TiKV server. +// Fields in this struct are all public since they are open for other TikvServer +// to use, e.g. a custom TikvServer may alter some fields in `config` or push +// some services into `to_stop`. pub struct TikvServerCore { pub config: TikvConfig, pub store_path: PathBuf, @@ -24,9 +75,57 @@ pub struct TikvServerCore { pub encryption_key_manager: Option>, pub flow_info_sender: Option>, pub flow_info_receiver: Option>, + pub to_stop: Vec>, + pub background_worker: Worker, } impl TikvServerCore { + /// Initialize and check the config + /// + /// Warnings are logged and fatal errors exist. + /// + /// # Fatal errors + /// + /// - If `dynamic config` feature is enabled and failed to register config + /// to PD + /// - If some critical configs (like data dir) are differrent from last run + /// - If the config can't pass `validate()` + /// - If the max open file descriptor limit is not high enough to support + /// the main database and the raft database. + pub fn init_config(mut config: TikvConfig) -> ConfigController { + validate_and_persist_config(&mut config, true); + + ensure_dir_exist(&config.storage.data_dir).unwrap(); + if !config.rocksdb.wal_dir.is_empty() { + ensure_dir_exist(&config.rocksdb.wal_dir).unwrap(); + } + if config.raft_engine.enable { + ensure_dir_exist(&config.raft_engine.config().dir).unwrap(); + } else { + ensure_dir_exist(&config.raft_store.raftdb_path).unwrap(); + if !config.raftdb.wal_dir.is_empty() { + ensure_dir_exist(&config.raftdb.wal_dir).unwrap(); + } + } + + check_system_config(&config); + + tikv_util::set_panic_hook(config.abort_on_panic, &config.storage.data_dir); + + info!( + "using config"; + "config" => serde_json::to_string(&config).unwrap(), + ); + if config.panic_when_unexpected_key_or_data { + info!("panic-when-unexpected-key-or-data is on"); + tikv_util::set_panic_when_unexpected_key_or_data(true); + } + + config.write_into_metrics(); + + ConfigController::new(config) + } + pub fn check_conflict_addr(&mut self) { let cur_addr: SocketAddr = self .config @@ -207,6 +306,112 @@ impl TikvServerCore { self.flow_info_receiver = Some(rx); engine_rocks::FlowListener::new(tx) } + + pub fn connect_to_pd_cluster( + config: &mut TikvConfig, + env: Arc, + security_mgr: Arc, + ) -> Arc { + let pd_client = Arc::new( + RpcClient::new(&config.pd, Some(env), security_mgr) + .unwrap_or_else(|e| fatal!("failed to create rpc client: {}", e)), + ); + + let cluster_id = pd_client + .get_cluster_id() + .unwrap_or_else(|e| fatal!("failed to get cluster id: {}", e)); + if cluster_id == DEFAULT_CLUSTER_ID { + fatal!("cluster id can't be {}", DEFAULT_CLUSTER_ID); + } + config.server.cluster_id = cluster_id; + info!( + "connect to PD cluster"; + "cluster_id" => cluster_id + ); + + pd_client + } + + // Only background cpu quota tuning is implemented at present. iops and frontend + // quota tuning is on the way + pub fn init_quota_tuning_task(&self, quota_limiter: Arc) { + // No need to do auto tune when capacity is really low + if SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO + < BACKGROUND_REQUEST_CORE_LOWER_BOUND + { + return; + }; + + // Determine the base cpu quota + let base_cpu_quota = + // if cpu quota is not specified, start from optimistic case + if quota_limiter.cputime_limiter(false).is_infinite() { + 1000_f64 + * f64::max( + BACKGROUND_REQUEST_CORE_LOWER_BOUND, + SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_DEFAULT_RATIO, + ) + } else { + quota_limiter.cputime_limiter(false) / 1000_f64 + }; + + // Calculate the celling and floor quota + let celling_quota = f64::min( + base_cpu_quota * 2.0, + 1_000_f64 * SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO, + ); + let floor_quota = f64::max( + base_cpu_quota * 0.5, + 1_000_f64 * BACKGROUND_REQUEST_CORE_LOWER_BOUND, + ); + + let mut proc_stats: ProcessStat = ProcessStat::cur_proc_stat().unwrap(); + self.background_worker.spawn_interval_task( + DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL, + move || { + if quota_limiter.auto_tune_enabled() { + let cputime_limit = quota_limiter.cputime_limiter(false); + let old_quota = if cputime_limit.is_infinite() { + base_cpu_quota + } else { + cputime_limit / 1000_f64 + }; + let cpu_usage = match proc_stats.cpu_usage() { + Ok(r) => r, + Err(_e) => 0.0, + }; + // Try tuning quota when cpu_usage is correctly collected. + // rule based tuning: + // - if instance is busy, shrink cpu quota for analyze by one quota pace until + // lower bound is hit; + // - if instance cpu usage is healthy, no op; + // - if instance is idle, increase cpu quota by one quota pace until upper + // bound is hit. + if cpu_usage > 0.0f64 { + let mut target_quota = old_quota; + + let cpu_util = cpu_usage / SysQuota::cpu_cores_quota(); + if cpu_util >= SYSTEM_BUSY_THRESHOLD { + target_quota = + f64::max(target_quota - CPU_QUOTA_ADJUSTMENT_PACE, floor_quota); + } else if cpu_util < SYSTEM_HEALTHY_THRESHOLD { + target_quota = + f64::min(target_quota + CPU_QUOTA_ADJUSTMENT_PACE, celling_quota); + } + + if old_quota != target_quota { + quota_limiter.set_cpu_time_limit(target_quota as usize, false); + debug!( + "cpu_time_limiter tuned for backend request"; + "cpu_util" => ?cpu_util, + "new quota" => ?target_quota); + INSTANCE_BACKEND_CPU_QUOTA.set(target_quota as i64); + } + } + } + }, + ); + } } #[cfg(unix)] @@ -270,3 +475,305 @@ pub fn check_system_config(config: &TikvConfig) { ); } } + +pub struct EnginesResourceInfo { + tablet_registry: TabletRegistry, + raft_engine: Option, + latest_normalized_pending_bytes: AtomicU32, + normalized_pending_bytes_collector: MovingAvgU32, +} + +impl EnginesResourceInfo { + const SCALE_FACTOR: u64 = 100; + + pub fn new( + tablet_registry: TabletRegistry, + raft_engine: Option, + max_samples_to_preserve: usize, + ) -> Self { + EnginesResourceInfo { + tablet_registry, + raft_engine, + latest_normalized_pending_bytes: AtomicU32::new(0), + normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), + } + } + + pub fn update( + &self, + _now: Instant, + cached_latest_tablets: &mut HashMap>, + ) { + let mut normalized_pending_bytes = 0; + + fn fetch_engine_cf(engine: &RocksEngine, cf: &str, normalized_pending_bytes: &mut u32) { + if let Ok(cf_opts) = engine.get_options_cf(cf) { + if let Ok(Some(b)) = engine.get_cf_pending_compaction_bytes(cf) { + if cf_opts.get_soft_pending_compaction_bytes_limit() > 0 { + *normalized_pending_bytes = std::cmp::max( + *normalized_pending_bytes, + (b * EnginesResourceInfo::SCALE_FACTOR + / cf_opts.get_soft_pending_compaction_bytes_limit()) + as u32, + ); + } + } + } + } + + if let Some(raft_engine) = &self.raft_engine { + fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); + } + + self.tablet_registry + .for_each_opened_tablet(|id, db: &mut CachedTablet| { + cached_latest_tablets.insert(id, db.clone()); + true + }); + + // todo(SpadeA): Now, there's a potential race condition problem where the + // tablet could be destroyed after the clone and before the fetching + // which could result in programme panic. It's okay now as the single global + // kv_engine will not be destroyed in normal operation and v2 is not + // ready for operation. Furthermore, this race condition is general to v2 as + // tablet clone is not a case exclusively happened here. We should + // propose another PR to tackle it such as destory tablet lazily in a GC + // thread. + + for (_, cache) in cached_latest_tablets.iter_mut() { + let Some(tablet) = cache.latest() else { continue }; + for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { + fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); + } + } + + // Clear ensures that these tablets are not hold forever. + cached_latest_tablets.clear(); + + let (_, avg) = self + .normalized_pending_bytes_collector + .add(normalized_pending_bytes); + self.latest_normalized_pending_bytes.store( + std::cmp::max(normalized_pending_bytes, avg), + Ordering::Relaxed, + ); + } + + #[cfg(any(test, feature = "testexport"))] + pub fn latest_normalized_pending_bytes(&self) -> u32 { + self.latest_normalized_pending_bytes.load(Ordering::Relaxed) + } +} + +impl IoBudgetAdjustor for EnginesResourceInfo { + fn adjust(&self, total_budgets: usize) -> usize { + let score = self.latest_normalized_pending_bytes.load(Ordering::Relaxed) as f32 + / Self::SCALE_FACTOR as f32; + // Two reasons for adding `sqrt` on top: + // 1) In theory the convergence point is independent of the value of pending + // bytes (as long as backlog generating rate equals consuming rate, which is + // determined by compaction budgets), a convex helps reach that point while + // maintaining low level of pending bytes. + // 2) Variance of compaction pending bytes grows with its magnitude, a filter + // with decreasing derivative can help balance such trend. + let score = score.sqrt(); + // The target global write flow slides between Bandwidth / 2 and Bandwidth. + let score = 0.5 + score / 2.0; + (total_budgets as f32 * score) as usize + } +} + +/// A small trait for components which can be trivially stopped. Lets us keep +/// a list of these in `TiKV`, rather than storing each component individually. +pub trait Stop { + fn stop(self: Box); +} + +impl Stop for StatusServer +where + R: 'static + Send, +{ + fn stop(self: Box) { + (*self).stop() + } +} + +impl Stop for Worker { + fn stop(self: Box) { + Worker::stop(&self); + } +} + +impl Stop for LazyWorker { + fn stop(self: Box) { + self.stop_worker(); + } +} + +pub trait ConfiguredRaftEngine: RaftEngine { + fn build( + _: &TikvConfig, + _: &Arc, + _: &Option>, + _: &Cache, + ) -> (Self, Option>); + fn as_rocks_engine(&self) -> Option<&RocksEngine>; + fn register_config(&self, _cfg_controller: &mut ConfigController); +} + +impl ConfiguredRaftEngine for T { + default fn build( + _: &TikvConfig, + _: &Arc, + _: &Option>, + _: &Cache, + ) -> (Self, Option>) { + unimplemented!() + } + default fn as_rocks_engine(&self) -> Option<&RocksEngine> { + None + } + default fn register_config(&self, _cfg_controller: &mut ConfigController) {} +} + +impl ConfiguredRaftEngine for RocksEngine { + fn build( + config: &TikvConfig, + env: &Arc, + key_manager: &Option>, + block_cache: &Cache, + ) -> (Self, Option>) { + let mut raft_data_state_machine = RaftDataStateMachine::new( + &config.storage.data_dir, + &config.raft_engine.config().dir, + &config.raft_store.raftdb_path, + ); + let should_dump = raft_data_state_machine.before_open_target(); + + let raft_db_path = &config.raft_store.raftdb_path; + let config_raftdb = &config.raftdb; + let statistics = Arc::new(RocksStatistics::new_titan()); + let raft_db_opts = config_raftdb.build_opt(env.clone(), Some(&statistics)); + let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); + let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) + .expect("failed to open raftdb"); + + if should_dump { + let raft_engine = + RaftLogEngine::new(config.raft_engine.config(), key_manager.clone(), None) + .expect("failed to open raft engine for migration"); + dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /* threads */); + raft_engine.stop(); + drop(raft_engine); + raft_data_state_machine.after_dump_data(); + } + (raftdb, Some(statistics)) + } + + fn as_rocks_engine(&self) -> Option<&RocksEngine> { + Some(self) + } + + fn register_config(&self, cfg_controller: &mut ConfigController) { + cfg_controller.register( + tikv::config::Module::Raftdb, + Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), + ); + } +} + +impl ConfiguredRaftEngine for RaftLogEngine { + fn build( + config: &TikvConfig, + env: &Arc, + key_manager: &Option>, + block_cache: &Cache, + ) -> (Self, Option>) { + let mut raft_data_state_machine = RaftDataStateMachine::new( + &config.storage.data_dir, + &config.raft_store.raftdb_path, + &config.raft_engine.config().dir, + ); + let should_dump = raft_data_state_machine.before_open_target(); + + let raft_config = config.raft_engine.config(); + let raft_engine = + RaftLogEngine::new(raft_config, key_manager.clone(), get_io_rate_limiter()) + .expect("failed to open raft engine"); + + if should_dump { + let config_raftdb = &config.raftdb; + let raft_db_opts = config_raftdb.build_opt(env.clone(), None); + let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); + let raftdb = engine_rocks::util::new_engine_opt( + &config.raft_store.raftdb_path, + raft_db_opts, + raft_cf_opts, + ) + .expect("failed to open raftdb for migration"); + dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /* threads */); + raftdb.stop(); + drop(raftdb); + raft_data_state_machine.after_dump_data(); + } + (raft_engine, None) + } +} + +const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); +pub struct EngineMetricsManager { + tablet_registry: TabletRegistry, + kv_statistics: Option>, + kv_is_titan: bool, + raft_engine: ER, + raft_statistics: Option>, + last_reset: Instant, +} + +impl EngineMetricsManager { + pub fn new( + tablet_registry: TabletRegistry, + kv_statistics: Option>, + kv_is_titan: bool, + raft_engine: ER, + raft_statistics: Option>, + ) -> Self { + EngineMetricsManager { + tablet_registry, + kv_statistics, + kv_is_titan, + raft_engine, + raft_statistics, + last_reset: Instant::now(), + } + } + + pub fn flush(&mut self, now: Instant) { + let mut reporter = EK::StatisticsReporter::new("kv"); + self.tablet_registry + .for_each_opened_tablet(|_, db: &mut CachedTablet| { + if let Some(db) = db.latest() { + reporter.collect(db); + } + true + }); + reporter.flush(); + self.raft_engine.flush_metrics("raft"); + + if let Some(s) = self.kv_statistics.as_ref() { + flush_engine_statistics(s, "kv", self.kv_is_titan); + } + if let Some(s) = self.raft_statistics.as_ref() { + flush_engine_statistics(s, "raft", false); + } + if now.saturating_duration_since(self.last_reset) >= DEFAULT_ENGINE_METRICS_RESET_INTERVAL { + if let Some(s) = self.kv_statistics.as_ref() { + s.reset(); + } + if let Some(s) = self.raft_statistics.as_ref() { + s.reset(); + } + self.last_reset = now; + } + } +} diff --git a/components/server/src/server.rs b/components/server/src/server.rs index be5edf0cf41..cc07ff85471 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -15,13 +15,9 @@ use std::{ cmp, collections::HashMap, convert::TryFrom, - fmt, path::{Path, PathBuf}, str::FromStr, - sync::{ - atomic::{AtomicU32, AtomicU64, Ordering}, - mpsc, Arc, Mutex, - }, + sync::{atomic::AtomicU64, mpsc, Arc, Mutex}, time::Duration, u64, }; @@ -33,21 +29,13 @@ use backup_stream::{ use causal_ts::CausalTsProviderImpl; use cdc::{CdcConfigManager, MemoryQuota}; use concurrency_manager::ConcurrencyManager; -use encryption_export::DataKeyManager; -use engine_rocks::{ - flush_engine_statistics, from_rocks_compression_type, - raw::{Cache, Env}, - RocksEngine, RocksStatistics, -}; +use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ - CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, - RaftEngine, SingletonFactory, StatisticsReporter, TabletContext, TabletRegistry, CF_DEFAULT, - CF_LOCK, CF_WRITE, -}; -use file_system::{ - get_io_rate_limiter, BytesFetcher, IoBudgetAdjustor, MetricsManager as IoMetricsManager, + Engines, KvEngine, MiscExt, RaftEngine, SingletonFactory, TabletContext, TabletRegistry, + CF_DEFAULT, CF_WRITE, }; +use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; @@ -103,8 +91,7 @@ use tikv::{ status_server::StatusServer, tablet_snap::NoSnapshotCache, ttl::TtlChecker, - KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, DEFAULT_CLUSTER_ID, - GRPC_THREAD_PREFIX, + KvEngineFactoryBuilder, Node, RaftKv, Server, CPU_CORES_QUOTA_GAUGE, GRPC_THREAD_PREFIX, }, storage::{ self, @@ -117,14 +104,9 @@ use tikv::{ }; use tikv_util::{ check_environment_variables, - config::{ensure_dir_exist, RaftDataStateMachine, VersionTrack}, - math::MovingAvgU32, - metrics::INSTANCE_BACKEND_CPU_QUOTA, + config::VersionTrack, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, - sys::{ - cpu_time::ProcessStat, disk, path_in_diff_mount_point, register_memory_usage_high_water, - SysQuota, - }, + sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, @@ -134,27 +116,13 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - common::{check_system_config, TikvServerCore}, + common::{ConfiguredRaftEngine, EngineMetricsManager, EnginesResourceInfo, TikvServerCore}, memory::*, - raft_engine_switch::*, setup::*, signal_handler, tikv_util::sys::thread::ThreadBuildWrapper, }; -// minimum number of core kept for background requests -const BACKGROUND_REQUEST_CORE_LOWER_BOUND: f64 = 1.0; -// max ratio of core quota for background requests -const BACKGROUND_REQUEST_CORE_MAX_RATIO: f64 = 0.95; -// default ratio of core quota for background requests = core_number * 0.5 -const BACKGROUND_REQUEST_CORE_DEFAULT_RATIO: f64 = 0.5; -// indication of TiKV instance is short of cpu -const SYSTEM_BUSY_THRESHOLD: f64 = 0.80; -// indication of TiKV instance in healthy state when cpu usage is in [0.5, 0.80) -const SYSTEM_HEALTHY_THRESHOLD: f64 = 0.50; -// pace of cpu quota adjustment -const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu - #[inline] fn run_impl(config: TikvConfig) { let mut tikv = TikvServer::::init::(config); @@ -178,7 +146,7 @@ fn run_impl(config: TikvConfig) { tikv.init_storage_stats_task(engines); tikv.run_server(server_config); tikv.run_status_server(); - tikv.init_quota_tuning_task(tikv.quota_limiter.clone()); + tikv.core.init_quota_tuning_task(tikv.quota_limiter.clone()); signal_handler::wait_for_signal( Some(tikv.engines.take().unwrap().engines), @@ -220,9 +188,7 @@ pub fn run_tikv(config: TikvConfig) { const DEFAULT_METRICS_FLUSH_INTERVAL: Duration = Duration::from_millis(10_000); const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); -const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); -const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); /// A complete TiKV server. struct TikvServer { @@ -240,10 +206,8 @@ struct TikvServer { servers: Option>, region_info_accessor: RegionInfoAccessor, coprocessor_host: Option>, - to_stop: Vec>, concurrency_manager: ConcurrencyManager, env: Arc, - background_worker: Worker, check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, @@ -292,8 +256,11 @@ where .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) .build(), ); - let pd_client = - Self::connect_to_pd_cluster(&mut config, env.clone(), Arc::clone(&security_mgr)); + let pd_client = TikvServerCore::connect_to_pd_cluster( + &mut config, + env.clone(), + Arc::clone(&security_mgr), + ); // check if TiKV need to run in snapshot recovery mode let is_recovering_marked = match pd_client.is_recovering_marked() { Err(e) => { @@ -320,7 +287,7 @@ where } // Initialize and check config - let cfg_controller = Self::init_config(config); + let cfg_controller = TikvServerCore::init_config(config); let config = cfg_controller.get_current(); let store_path = Path::new(&config.storage.data_dir).to_owned(); @@ -402,6 +369,8 @@ where encryption_key_manager: None, flow_info_sender: None, flow_info_receiver: None, + to_stop: vec![], + background_worker, }, cfg_controller: Some(cfg_controller), security_mgr, @@ -416,10 +385,8 @@ where servers: None, region_info_accessor, coprocessor_host, - to_stop: vec![], concurrency_manager, env, - background_worker, check_leader_worker, sst_worker: None, quota_limiter, @@ -430,77 +397,6 @@ where } } - /// Initialize and check the config - /// - /// Warnings are logged and fatal errors exist. - /// - /// # Fatal errors - /// - /// - If `dynamic config` feature is enabled and failed to register config - /// to PD - /// - If some critical configs (like data dir) are differrent from last run - /// - If the config can't pass `validate()` - /// - If the max open file descriptor limit is not high enough to support - /// the main database and the raft database. - fn init_config(mut config: TikvConfig) -> ConfigController { - validate_and_persist_config(&mut config, true); - - ensure_dir_exist(&config.storage.data_dir).unwrap(); - if !config.rocksdb.wal_dir.is_empty() { - ensure_dir_exist(&config.rocksdb.wal_dir).unwrap(); - } - if config.raft_engine.enable { - ensure_dir_exist(&config.raft_engine.config().dir).unwrap(); - } else { - ensure_dir_exist(&config.raft_store.raftdb_path).unwrap(); - if !config.raftdb.wal_dir.is_empty() { - ensure_dir_exist(&config.raftdb.wal_dir).unwrap(); - } - } - - check_system_config(&config); - - tikv_util::set_panic_hook(config.abort_on_panic, &config.storage.data_dir); - - info!( - "using config"; - "config" => serde_json::to_string(&config).unwrap(), - ); - if config.panic_when_unexpected_key_or_data { - info!("panic-when-unexpected-key-or-data is on"); - tikv_util::set_panic_when_unexpected_key_or_data(true); - } - - config.write_into_metrics(); - - ConfigController::new(config) - } - - fn connect_to_pd_cluster( - config: &mut TikvConfig, - env: Arc, - security_mgr: Arc, - ) -> Arc { - let pd_client = Arc::new( - RpcClient::new(&config.pd, Some(env), security_mgr) - .unwrap_or_else(|e| fatal!("failed to create rpc client: {}", e)), - ); - - let cluster_id = pd_client - .get_cluster_id() - .unwrap_or_else(|e| fatal!("failed to get cluster id: {}", e)); - if cluster_id == DEFAULT_CLUSTER_ID { - fatal!("cluster id can't be {}", DEFAULT_CLUSTER_ID); - } - config.server.cluster_id = cluster_id; - info!( - "connect to PD cluster"; - "cluster_id" => cluster_id - ); - - pd_client - } - fn init_engines(&mut self, engines: Engines) { let store_meta = Arc::new(Mutex::new(StoreMeta::new(PENDING_MSG_CAP))); let engine = RaftKv::new( @@ -612,14 +508,14 @@ where pd_sender.clone(), engines.engine.clone(), resource_ctl, - CleanupMethod::Remote(self.background_worker.remote()), + CleanupMethod::Remote(self.core.background_worker.remote()), )) } else { None }; if let Some(unified_read_pool) = &unified_read_pool { let handle = unified_read_pool.handle(); - self.background_worker.spawn_interval_task( + self.core.background_worker.spawn_interval_task( UPDATE_EWMA_TIME_SLICE_INTERVAL, move || { handle.update_ewma_time_slice(); @@ -647,19 +543,19 @@ where resource_metering::init_recorder( self.core.config.resource_metering.precision.as_millis(), ); - self.to_stop.push(recorder_worker); + self.core.to_stop.push(recorder_worker); let (reporter_notifier, data_sink_reg_handle, reporter_worker) = resource_metering::init_reporter( self.core.config.resource_metering.clone(), collector_reg_handle.clone(), ); - self.to_stop.push(reporter_worker); + self.core.to_stop.push(reporter_worker); let (address_change_notifier, single_target_worker) = resource_metering::init_single_target( self.core.config.resource_metering.receiver_address.clone(), self.env.clone(), data_sink_reg_handle.clone(), ); - self.to_stop.push(single_target_worker); + self.core.to_stop.push(single_target_worker); let rsmeter_pubsub_service = resource_metering::PubSubService::new(data_sink_reg_handle); let cfg_manager = resource_metering::ConfigManager::new( @@ -714,7 +610,7 @@ where let (resolver, state) = resolve::new_resolver( self.pd_client.clone(), - &self.background_worker, + &self.core.background_worker, storage.get_engine().raft_extension(), ); self.resolver = Some(resolver); @@ -766,7 +662,7 @@ where Box::new(ReadPoolConfigManager::new( unified_read_pool.as_ref().unwrap().handle(), unified_read_pool_scale_notifier, - &self.background_worker, + &self.core.background_worker, self.core.config.readpool.unified.max_thread_count, self.core.config.readpool.unified.auto_adjust_pool_size, )), @@ -829,7 +725,7 @@ where self.core.config.storage.api_version(), self.pd_client.clone(), state, - self.background_worker.clone(), + self.core.background_worker.clone(), Some(health_service.clone()), None, ); @@ -911,7 +807,7 @@ where Arc::clone(&self.security_mgr), ); backup_stream_worker.start(backup_stream_endpoint); - self.to_stop.push(backup_stream_worker); + self.core.to_stop.push(backup_stream_worker); Some(backup_stream_scheduler) } else { None @@ -953,6 +849,7 @@ where self.coprocessor_host.clone().unwrap(), ); let split_check_scheduler = self + .core .background_worker .start("split-check", split_check_runner); cfg_controller.register( @@ -1028,7 +925,7 @@ where self.region_info_accessor.clone(), self.core.config.storage.ttl_check_poll_interval.into(), )); - self.to_stop.push(ttl_checker); + self.core.to_stop.push(ttl_checker); } // Start CDC. @@ -1050,7 +947,7 @@ where self.causal_ts_provider.clone(), ); cdc_worker.start_with_timer(cdc_endpoint); - self.to_stop.push(cdc_worker); + self.core.to_stop.push(cdc_worker); // Start resolved ts if let Some(mut rts_worker) = rts_worker { @@ -1065,7 +962,7 @@ where self.security_mgr.clone(), ); rts_worker.start_with_timer(rts_endpoint); - self.to_stop.push(rts_worker); + self.core.to_stop.push(rts_worker); } cfg_controller.register( @@ -1169,7 +1066,7 @@ where .unwrap_or_else(|e| fatal!("failed to start lock manager: {}", e)); // Backup service. - let mut backup_worker = Box::new(self.background_worker.lazy_build("backup-endpoint")); + let mut backup_worker = Box::new(self.core.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); let backup_service = backup::Service::::with_router(backup_scheduler, self.router.clone()); @@ -1265,13 +1162,15 @@ where // `cached_latest_tablets` is passed to `update` to avoid memory // allocation each time when calling `update`. let mut cached_latest_tablets = HashMap::default(); - self.background_worker - .spawn_interval_task(DEFAULT_METRICS_FLUSH_INTERVAL, move || { + self.core.background_worker.spawn_interval_task( + DEFAULT_METRICS_FLUSH_INTERVAL, + move || { let now = Instant::now(); engine_metrics.flush(now); io_metrics.flush(now); engines_info_clone.update(now, &mut cached_latest_tablets); - }); + }, + ); if let Some(limiter) = get_io_rate_limiter() { limiter.set_low_priority_io_adjustor_if_needed(Some(engines_info)); } @@ -1279,90 +1178,11 @@ where let mut mem_trace_metrics = MemoryTraceManager::default(); mem_trace_metrics.register_provider(MEMTRACE_RAFTSTORE.clone()); mem_trace_metrics.register_provider(MEMTRACE_COPROCESSOR.clone()); - self.background_worker - .spawn_interval_task(DEFAULT_MEMTRACE_FLUSH_INTERVAL, move || { + self.core.background_worker.spawn_interval_task( + DEFAULT_MEMTRACE_FLUSH_INTERVAL, + move || { let now = Instant::now(); mem_trace_metrics.flush(now); - }); - } - - // Only background cpu quota tuning is implemented at present. iops and frontend - // quota tuning is on the way - fn init_quota_tuning_task(&self, quota_limiter: Arc) { - // No need to do auto tune when capacity is really low - if SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO - < BACKGROUND_REQUEST_CORE_LOWER_BOUND - { - return; - }; - - // Determine the base cpu quota - let base_cpu_quota = - // if cpu quota is not specified, start from optimistic case - if quota_limiter.cputime_limiter(false).is_infinite() { - 1000_f64 - * f64::max( - BACKGROUND_REQUEST_CORE_LOWER_BOUND, - SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_DEFAULT_RATIO, - ) - } else { - quota_limiter.cputime_limiter(false) / 1000_f64 - }; - - // Calculate the celling and floor quota - let celling_quota = f64::min( - base_cpu_quota * 2.0, - 1_000_f64 * SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO, - ); - let floor_quota = f64::max( - base_cpu_quota * 0.5, - 1_000_f64 * BACKGROUND_REQUEST_CORE_LOWER_BOUND, - ); - - let mut proc_stats: ProcessStat = ProcessStat::cur_proc_stat().unwrap(); - self.background_worker.spawn_interval_task( - DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL, - move || { - if quota_limiter.auto_tune_enabled() { - let cputime_limit = quota_limiter.cputime_limiter(false); - let old_quota = if cputime_limit.is_infinite() { - base_cpu_quota - } else { - cputime_limit / 1000_f64 - }; - let cpu_usage = match proc_stats.cpu_usage() { - Ok(r) => r, - Err(_e) => 0.0, - }; - // Try tuning quota when cpu_usage is correctly collected. - // rule based tuning: - // - if instance is busy, shrink cpu quota for analyze by one quota pace until - // lower bound is hit; - // - if instance cpu usage is healthy, no op; - // - if instance is idle, increase cpu quota by one quota pace until upper - // bound is hit. - if cpu_usage > 0.0f64 { - let mut target_quota = old_quota; - - let cpu_util = cpu_usage / SysQuota::cpu_cores_quota(); - if cpu_util >= SYSTEM_BUSY_THRESHOLD { - target_quota = - f64::max(target_quota - CPU_QUOTA_ADJUSTMENT_PACE, floor_quota); - } else if cpu_util < SYSTEM_HEALTHY_THRESHOLD { - target_quota = - f64::min(target_quota + CPU_QUOTA_ADJUSTMENT_PACE, celling_quota); - } - - if old_quota != target_quota { - quota_limiter.set_cpu_time_limit(target_quota as usize, false); - debug!( - "cpu_time_limiter tuned for backend request"; - "cpu_util" => ?cpu_util, - "new quota" => ?target_quota); - INSTANCE_BACKEND_CPU_QUOTA.set(target_quota as i64); - } - } - } }, ); } @@ -1395,7 +1215,7 @@ where (disk::DiskUsage::Normal, disk::DiskUsage::Normal) => disk::DiskUsage::Normal, } } - self.background_worker + self.core.background_worker .spawn_interval_task(DEFAULT_STORAGE_STATS_INTERVAL, move || { let disk_stats = match fs2::statvfs(&store_path) { Err(e) => { @@ -1548,7 +1368,7 @@ where if let Err(e) = status_server.start(self.core.config.server.status_addr.clone()) { error_unknown!(%e; "failed to bind addr for status service"); } else { - self.to_stop.push(status_server); + self.core.to_stop.push(status_server); } } } @@ -1570,117 +1390,7 @@ where sst_worker.stop_worker(); } - self.to_stop.into_iter().for_each(|s| s.stop()); - } -} - -pub trait ConfiguredRaftEngine: RaftEngine { - fn build( - _: &TikvConfig, - _: &Arc, - _: &Option>, - _: &Cache, - ) -> (Self, Option>); - fn as_rocks_engine(&self) -> Option<&RocksEngine>; - fn register_config(&self, _cfg_controller: &mut ConfigController); -} - -impl ConfiguredRaftEngine for T { - default fn build( - _: &TikvConfig, - _: &Arc, - _: &Option>, - _: &Cache, - ) -> (Self, Option>) { - unimplemented!() - } - default fn as_rocks_engine(&self) -> Option<&RocksEngine> { - None - } - default fn register_config(&self, _cfg_controller: &mut ConfigController) {} -} - -impl ConfiguredRaftEngine for RocksEngine { - fn build( - config: &TikvConfig, - env: &Arc, - key_manager: &Option>, - block_cache: &Cache, - ) -> (Self, Option>) { - let mut raft_data_state_machine = RaftDataStateMachine::new( - &config.storage.data_dir, - &config.raft_engine.config().dir, - &config.raft_store.raftdb_path, - ); - let should_dump = raft_data_state_machine.before_open_target(); - - let raft_db_path = &config.raft_store.raftdb_path; - let config_raftdb = &config.raftdb; - let statistics = Arc::new(RocksStatistics::new_titan()); - let raft_db_opts = config_raftdb.build_opt(env.clone(), Some(&statistics)); - let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) - .expect("failed to open raftdb"); - - if should_dump { - let raft_engine = - RaftLogEngine::new(config.raft_engine.config(), key_manager.clone(), None) - .expect("failed to open raft engine for migration"); - dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /* threads */); - raft_engine.stop(); - drop(raft_engine); - raft_data_state_machine.after_dump_data(); - } - (raftdb, Some(statistics)) - } - - fn as_rocks_engine(&self) -> Option<&RocksEngine> { - Some(self) - } - - fn register_config(&self, cfg_controller: &mut ConfigController) { - cfg_controller.register( - tikv::config::Module::Raftdb, - Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), - ); - } -} - -impl ConfiguredRaftEngine for RaftLogEngine { - fn build( - config: &TikvConfig, - env: &Arc, - key_manager: &Option>, - block_cache: &Cache, - ) -> (Self, Option>) { - let mut raft_data_state_machine = RaftDataStateMachine::new( - &config.storage.data_dir, - &config.raft_store.raftdb_path, - &config.raft_engine.config().dir, - ); - let should_dump = raft_data_state_machine.before_open_target(); - - let raft_config = config.raft_engine.config(); - let raft_engine = - RaftLogEngine::new(raft_config, key_manager.clone(), get_io_rate_limiter()) - .expect("failed to open raft engine"); - - if should_dump { - let config_raftdb = &config.raftdb; - let raft_db_opts = config_raftdb.build_opt(env.clone(), None); - let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let raftdb = engine_rocks::util::new_engine_opt( - &config.raft_store.raftdb_path, - raft_db_opts, - raft_cf_opts, - ) - .expect("failed to open raftdb for migration"); - dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /* threads */); - raftdb.stop(); - drop(raftdb); - raft_data_state_machine.after_dump_data(); - } - (raft_engine, None) + self.core.to_stop.into_iter().for_each(|s| s.stop()); } } @@ -1783,198 +1493,9 @@ fn pre_start() { } } -/// A small trait for components which can be trivially stopped. Lets us keep -/// a list of these in `TiKV`, rather than storing each component individually. -pub(crate) trait Stop { - fn stop(self: Box); -} - -impl Stop for StatusServer -where - R: 'static + Send, -{ - fn stop(self: Box) { - (*self).stop() - } -} - -impl Stop for Worker { - fn stop(self: Box) { - Worker::stop(&self); - } -} - -impl Stop for LazyWorker { - fn stop(self: Box) { - self.stop_worker(); - } -} - -pub struct EngineMetricsManager { - tablet_registry: TabletRegistry, - kv_statistics: Option>, - kv_is_titan: bool, - raft_engine: ER, - raft_statistics: Option>, - last_reset: Instant, -} - -impl EngineMetricsManager { - pub fn new( - tablet_registry: TabletRegistry, - kv_statistics: Option>, - kv_is_titan: bool, - raft_engine: ER, - raft_statistics: Option>, - ) -> Self { - EngineMetricsManager { - tablet_registry, - kv_statistics, - kv_is_titan, - raft_engine, - raft_statistics, - last_reset: Instant::now(), - } - } - - pub fn flush(&mut self, now: Instant) { - let mut reporter = EK::StatisticsReporter::new("kv"); - self.tablet_registry - .for_each_opened_tablet(|_, db: &mut CachedTablet| { - if let Some(db) = db.latest() { - reporter.collect(db); - } - true - }); - reporter.flush(); - self.raft_engine.flush_metrics("raft"); - - if let Some(s) = self.kv_statistics.as_ref() { - flush_engine_statistics(s, "kv", self.kv_is_titan); - } - if let Some(s) = self.raft_statistics.as_ref() { - flush_engine_statistics(s, "raft", false); - } - if now.saturating_duration_since(self.last_reset) >= DEFAULT_ENGINE_METRICS_RESET_INTERVAL { - if let Some(s) = self.kv_statistics.as_ref() { - s.reset(); - } - if let Some(s) = self.raft_statistics.as_ref() { - s.reset(); - } - self.last_reset = now; - } - } -} - -pub struct EnginesResourceInfo { - tablet_registry: TabletRegistry, - raft_engine: Option, - latest_normalized_pending_bytes: AtomicU32, - normalized_pending_bytes_collector: MovingAvgU32, -} - -impl EnginesResourceInfo { - const SCALE_FACTOR: u64 = 100; - - fn new( - tablet_registry: TabletRegistry, - raft_engine: Option, - max_samples_to_preserve: usize, - ) -> Self { - EnginesResourceInfo { - tablet_registry, - raft_engine, - latest_normalized_pending_bytes: AtomicU32::new(0), - normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), - } - } - - pub fn update( - &self, - _now: Instant, - cached_latest_tablets: &mut HashMap>, - ) { - let mut normalized_pending_bytes = 0; - - fn fetch_engine_cf(engine: &RocksEngine, cf: &str, normalized_pending_bytes: &mut u32) { - if let Ok(cf_opts) = engine.get_options_cf(cf) { - if let Ok(Some(b)) = engine.get_cf_pending_compaction_bytes(cf) { - if cf_opts.get_soft_pending_compaction_bytes_limit() > 0 { - *normalized_pending_bytes = std::cmp::max( - *normalized_pending_bytes, - (b * EnginesResourceInfo::SCALE_FACTOR - / cf_opts.get_soft_pending_compaction_bytes_limit()) - as u32, - ); - } - } - } - } - - if let Some(raft_engine) = &self.raft_engine { - fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); - } - - self.tablet_registry - .for_each_opened_tablet(|id, db: &mut CachedTablet| { - cached_latest_tablets.insert(id, db.clone()); - true - }); - - // todo(SpadeA): Now, there's a potential race condition problem where the - // tablet could be destroyed after the clone and before the fetching - // which could result in programme panic. It's okay now as the single global - // kv_engine will not be destroyed in normal operation and v2 is not - // ready for operation. Furthermore, this race condition is general to v2 as - // tablet clone is not a case exclusively happened here. We should - // propose another PR to tackle it such as destory tablet lazily in a GC - // thread. - - for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { continue }; - for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { - fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); - } - } - - // Clear ensures that these tablets are not hold forever. - cached_latest_tablets.clear(); - - let (_, avg) = self - .normalized_pending_bytes_collector - .add(normalized_pending_bytes); - self.latest_normalized_pending_bytes.store( - std::cmp::max(normalized_pending_bytes, avg), - Ordering::Relaxed, - ); - } -} - -impl IoBudgetAdjustor for EnginesResourceInfo { - fn adjust(&self, total_budgets: usize) -> usize { - let score = self.latest_normalized_pending_bytes.load(Ordering::Relaxed) as f32 - / Self::SCALE_FACTOR as f32; - // Two reasons for adding `sqrt` on top: - // 1) In theory the convergence point is independent of the value of pending - // bytes (as long as backlog generating rate equals consuming rate, which is - // determined by compaction budgets), a convex helps reach that point while - // maintaining low level of pending bytes. - // 2) Variance of compaction pending bytes grows with its magnitude, a filter - // with decreasing derivative can help balance such trend. - let score = score.sqrt(); - // The target global write flow slides between Bandwidth / 2 and Bandwidth. - let score = 0.5 + score / 2.0; - (total_budgets as f32 * score) as usize - } -} - #[cfg(test)] mod test { - use std::{ - collections::HashMap, - sync::{atomic::Ordering, Arc}, - }; + use std::{collections::HashMap, sync::Arc}; use engine_rocks::raw::Env; use engine_traits::{ @@ -2054,9 +1575,7 @@ mod test { // bytes of tablet_1_20 assert_eq!( (new_pending_compaction_bytes * 100) as u32, - engines_info - .latest_normalized_pending_bytes - .load(Ordering::Relaxed) + engines_info.latest_normalized_pending_bytes() ); } } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 81ec94207a9..a29c344884f 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -16,10 +16,7 @@ use std::{ collections::HashMap, path::{Path, PathBuf}, str::FromStr, - sync::{ - atomic::{AtomicU32, AtomicU64, Ordering}, - mpsc, Arc, - }, + sync::{atomic::AtomicU64, mpsc, Arc}, time::Duration, u64, }; @@ -27,19 +24,9 @@ use std::{ use api_version::{dispatch_api_version, KvFormat}; use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; -use encryption_export::DataKeyManager; -use engine_rocks::{ - flush_engine_statistics, from_rocks_compression_type, - raw::{Cache, Env}, - RocksEngine, RocksStatistics, -}; -use engine_traits::{ - CachedTablet, CfOptions, CfOptionsExt, Engines, FlowControlFactorsExt, KvEngine, MiscExt, - RaftEngine, StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, -}; -use file_system::{ - get_io_rate_limiter, BytesFetcher, IoBudgetAdjustor, MetricsManager as IoMetricsManager, -}; +use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; +use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine, TabletRegistry, CF_DEFAULT, CF_WRITE}; +use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; @@ -82,8 +69,7 @@ use tikv::{ resolve, service::DiagnosticsService, status_server::StatusServer, - KvEngineFactoryBuilder, NodeV2, RaftKv2, Server, CPU_CORES_QUOTA_GAUGE, DEFAULT_CLUSTER_ID, - GRPC_THREAD_PREFIX, + KvEngineFactoryBuilder, NodeV2, RaftKv2, Server, CPU_CORES_QUOTA_GAUGE, GRPC_THREAD_PREFIX, }, storage::{ self, @@ -96,14 +82,9 @@ use tikv::{ }; use tikv_util::{ check_environment_variables, - config::{ensure_dir_exist, RaftDataStateMachine, VersionTrack}, - math::MovingAvgU32, - metrics::INSTANCE_BACKEND_CPU_QUOTA, + config::VersionTrack, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, - sys::{ - cpu_time::ProcessStat, disk, path_in_diff_mount_point, register_memory_usage_high_water, - SysQuota, - }, + sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, thread_group::GroupProperties, time::{Instant, Monitor}, worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, @@ -113,28 +94,13 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - common::{check_system_config, TikvServerCore}, + common::{ConfiguredRaftEngine, EngineMetricsManager, EnginesResourceInfo, TikvServerCore}, memory::*, - raft_engine_switch::*, - server::Stop, setup::*, signal_handler, tikv_util::sys::thread::ThreadBuildWrapper, }; -// minimum number of core kept for background requests -const BACKGROUND_REQUEST_CORE_LOWER_BOUND: f64 = 1.0; -// max ratio of core quota for background requests -const BACKGROUND_REQUEST_CORE_MAX_RATIO: f64 = 0.95; -// default ratio of core quota for background requests = core_number * 0.5 -const BACKGROUND_REQUEST_CORE_DEFAULT_RATIO: f64 = 0.5; -// indication of TiKV instance is short of cpu -const SYSTEM_BUSY_THRESHOLD: f64 = 0.80; -// indication of TiKV instance in healthy state when cpu usage is in [0.5, 0.80) -const SYSTEM_HEALTHY_THRESHOLD: f64 = 0.50; -// pace of cpu quota adjustment -const CPU_QUOTA_ADJUSTMENT_PACE: f64 = 200.0; // 0.2 vcpu - #[inline] fn run_impl(config: TikvConfig) { let mut tikv = TikvServer::::init::(config); @@ -157,7 +123,7 @@ fn run_impl(config: TikvConfig) { tikv.init_storage_stats_task(); tikv.run_server(server_config); tikv.run_status_server(); - tikv.init_quota_tuning_task(tikv.quota_limiter.clone()); + tikv.core.init_quota_tuning_task(tikv.quota_limiter.clone()); // TODO: support signal dump stats signal_handler::wait_for_signal( @@ -200,9 +166,7 @@ pub fn run_tikv(config: TikvConfig) { const DEFAULT_METRICS_FLUSH_INTERVAL: Duration = Duration::from_millis(10_000); const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); -const DEFAULT_ENGINE_METRICS_RESET_INTERVAL: Duration = Duration::from_millis(60_000); const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); -const DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL: Duration = Duration::from_secs(5); /// A complete TiKV server. struct TikvServer { @@ -220,10 +184,8 @@ struct TikvServer { servers: Option>, region_info_accessor: Option, coprocessor_host: Option>, - to_stop: Vec>, concurrency_manager: ConcurrencyManager, env: Arc, - background_worker: Worker, check_leader_worker: Worker, sst_worker: Option>>, quota_limiter: Arc, @@ -265,11 +227,14 @@ where .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) .build(), ); - let pd_client = - Self::connect_to_pd_cluster(&mut config, env.clone(), Arc::clone(&security_mgr)); + let pd_client = TikvServerCore::connect_to_pd_cluster( + &mut config, + env.clone(), + Arc::clone(&security_mgr), + ); // Initialize and check config - let cfg_controller = Self::init_config(config); + let cfg_controller = TikvServerCore::init_config(config); let config = cfg_controller.get_current(); let store_path = Path::new(&config.storage.data_dir).to_owned(); @@ -342,6 +307,8 @@ where encryption_key_manager: None, flow_info_sender: None, flow_info_receiver: None, + to_stop: vec![], + background_worker, }, cfg_controller: Some(cfg_controller), security_mgr, @@ -356,10 +323,8 @@ where servers: None, region_info_accessor: None, coprocessor_host: None, - to_stop: vec![], concurrency_manager, env, - background_worker, check_leader_worker, sst_worker: None, quota_limiter, @@ -369,77 +334,6 @@ where } } - /// Initialize and check the config - /// - /// Warnings are logged and fatal errors exist. - /// - /// # Fatal errors - /// - /// - If `dynamic config` feature is enabled and failed to register config - /// to PD - /// - If some critical configs (like data dir) are differrent from last run - /// - If the config can't pass `validate()` - /// - If the max open file descriptor limit is not high enough to support - /// the main database and the raft database. - fn init_config(mut config: TikvConfig) -> ConfigController { - validate_and_persist_config(&mut config, true); - - ensure_dir_exist(&config.storage.data_dir).unwrap(); - if !config.rocksdb.wal_dir.is_empty() { - ensure_dir_exist(&config.rocksdb.wal_dir).unwrap(); - } - if config.raft_engine.enable { - ensure_dir_exist(&config.raft_engine.config().dir).unwrap(); - } else { - ensure_dir_exist(&config.raft_store.raftdb_path).unwrap(); - if !config.raftdb.wal_dir.is_empty() { - ensure_dir_exist(&config.raftdb.wal_dir).unwrap(); - } - } - - check_system_config(&config); - - tikv_util::set_panic_hook(config.abort_on_panic, &config.storage.data_dir); - - info!( - "using config"; - "config" => serde_json::to_string(&config).unwrap(), - ); - if config.panic_when_unexpected_key_or_data { - info!("panic-when-unexpected-key-or-data is on"); - tikv_util::set_panic_when_unexpected_key_or_data(true); - } - - config.write_into_metrics(); - - ConfigController::new(config) - } - - fn connect_to_pd_cluster( - config: &mut TikvConfig, - env: Arc, - security_mgr: Arc, - ) -> Arc { - let pd_client = Arc::new( - RpcClient::new(&config.pd, Some(env), security_mgr) - .unwrap_or_else(|e| fatal!("failed to create rpc client: {}", e)), - ); - - let cluster_id = pd_client - .get_cluster_id() - .unwrap_or_else(|e| fatal!("failed to get cluster id: {}", e)); - if cluster_id == DEFAULT_CLUSTER_ID { - fatal!("cluster id can't be {}", DEFAULT_CLUSTER_ID); - } - config.server.cluster_id = cluster_id; - info!( - "connect to PD cluster"; - "cluster_id" => cluster_id - ); - - pd_client - } - fn init_gc_worker(&mut self) -> GcWorker> { let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( @@ -505,14 +399,14 @@ where pd_sender.clone(), engines.engine.clone(), resource_ctl, - CleanupMethod::Remote(self.background_worker.remote()), + CleanupMethod::Remote(self.core.background_worker.remote()), )) } else { None }; if let Some(unified_read_pool) = &unified_read_pool { let handle = unified_read_pool.handle(); - self.background_worker.spawn_interval_task( + self.core.background_worker.spawn_interval_task( UPDATE_EWMA_TIME_SLICE_INTERVAL, move || { handle.update_ewma_time_slice(); @@ -540,19 +434,19 @@ where resource_metering::init_recorder( self.core.config.resource_metering.precision.as_millis(), ); - self.to_stop.push(recorder_worker); + self.core.to_stop.push(recorder_worker); let (reporter_notifier, data_sink_reg_handle, reporter_worker) = resource_metering::init_reporter( self.core.config.resource_metering.clone(), collector_reg_handle.clone(), ); - self.to_stop.push(reporter_worker); + self.core.to_stop.push(reporter_worker); let (address_change_notifier, single_target_worker) = resource_metering::init_single_target( self.core.config.resource_metering.receiver_address.clone(), self.env.clone(), data_sink_reg_handle.clone(), ); - self.to_stop.push(single_target_worker); + self.core.to_stop.push(single_target_worker); let rsmeter_pubsub_service = resource_metering::PubSubService::new(data_sink_reg_handle); let cfg_manager = resource_metering::ConfigManager::new( @@ -607,7 +501,7 @@ where let (resolver, state) = resolve::new_resolver( self.pd_client.clone(), - &self.background_worker, + &self.core.background_worker, storage.get_engine().raft_extension(), ); self.resolver = Some(resolver); @@ -649,7 +543,7 @@ where Box::new(ReadPoolConfigManager::new( unified_read_pool.as_ref().unwrap().handle(), unified_read_pool_scale_notifier, - &self.background_worker, + &self.core.background_worker, self.core.config.readpool.unified.max_thread_count, self.core.config.readpool.unified.auto_adjust_pool_size, )), @@ -791,7 +685,7 @@ where self.coprocessor_host.clone().unwrap(), auto_split_controller, collector_reg_handle, - self.background_worker.clone(), + self.core.background_worker.clone(), pd_worker, raft_store, &state, @@ -831,7 +725,7 @@ where let engines = self.engines.as_ref().unwrap(); // Backup service. - let mut backup_worker = Box::new(self.background_worker.lazy_build("backup-endpoint")); + let mut backup_worker = Box::new(self.core.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); let backup_service = backup::Service::::new(backup_scheduler); if servers @@ -947,13 +841,15 @@ where // `cached_latest_tablets` is passed to `update` to avoid memory // allocation each time when calling `update`. let mut cached_latest_tablets = HashMap::default(); - self.background_worker - .spawn_interval_task(DEFAULT_METRICS_FLUSH_INTERVAL, move || { + self.core.background_worker.spawn_interval_task( + DEFAULT_METRICS_FLUSH_INTERVAL, + move || { let now = Instant::now(); engine_metrics.flush(now); io_metrics.flush(now); engines_info_clone.update(now, &mut cached_latest_tablets); - }); + }, + ); if let Some(limiter) = get_io_rate_limiter() { limiter.set_low_priority_io_adjustor_if_needed(Some(engines_info)); } @@ -961,90 +857,11 @@ where let mut mem_trace_metrics = MemoryTraceManager::default(); mem_trace_metrics.register_provider(MEMTRACE_RAFTSTORE.clone()); mem_trace_metrics.register_provider(MEMTRACE_COPROCESSOR.clone()); - self.background_worker - .spawn_interval_task(DEFAULT_MEMTRACE_FLUSH_INTERVAL, move || { + self.core.background_worker.spawn_interval_task( + DEFAULT_MEMTRACE_FLUSH_INTERVAL, + move || { let now = Instant::now(); mem_trace_metrics.flush(now); - }); - } - - // Only background cpu quota tuning is implemented at present. iops and frontend - // quota tuning is on the way - fn init_quota_tuning_task(&self, quota_limiter: Arc) { - // No need to do auto tune when capacity is really low - if SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO - < BACKGROUND_REQUEST_CORE_LOWER_BOUND - { - return; - }; - - // Determine the base cpu quota - let base_cpu_quota = - // if cpu quota is not specified, start from optimistic case - if quota_limiter.cputime_limiter(false).is_infinite() { - 1000_f64 - * f64::max( - BACKGROUND_REQUEST_CORE_LOWER_BOUND, - SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_DEFAULT_RATIO, - ) - } else { - quota_limiter.cputime_limiter(false) / 1000_f64 - }; - - // Calculate the celling and floor quota - let celling_quota = f64::min( - base_cpu_quota * 2.0, - 1_000_f64 * SysQuota::cpu_cores_quota() * BACKGROUND_REQUEST_CORE_MAX_RATIO, - ); - let floor_quota = f64::max( - base_cpu_quota * 0.5, - 1_000_f64 * BACKGROUND_REQUEST_CORE_LOWER_BOUND, - ); - - let mut proc_stats: ProcessStat = ProcessStat::cur_proc_stat().unwrap(); - self.background_worker.spawn_interval_task( - DEFAULT_QUOTA_LIMITER_TUNE_INTERVAL, - move || { - if quota_limiter.auto_tune_enabled() { - let cputime_limit = quota_limiter.cputime_limiter(false); - let old_quota = if cputime_limit.is_infinite() { - base_cpu_quota - } else { - cputime_limit / 1000_f64 - }; - let cpu_usage = match proc_stats.cpu_usage() { - Ok(r) => r, - Err(_e) => 0.0, - }; - // Try tuning quota when cpu_usage is correctly collected. - // rule based tuning: - // - if instance is busy, shrink cpu quota for analyze by one quota pace until - // lower bound is hit; - // - if instance cpu usage is healthy, no op; - // - if instance is idle, increase cpu quota by one quota pace until upper - // bound is hit. - if cpu_usage > 0.0f64 { - let mut target_quota = old_quota; - - let cpu_util = cpu_usage / SysQuota::cpu_cores_quota(); - if cpu_util >= SYSTEM_BUSY_THRESHOLD { - target_quota = - f64::max(target_quota - CPU_QUOTA_ADJUSTMENT_PACE, floor_quota); - } else if cpu_util < SYSTEM_HEALTHY_THRESHOLD { - target_quota = - f64::min(target_quota + CPU_QUOTA_ADJUSTMENT_PACE, celling_quota); - } - - if old_quota != target_quota { - quota_limiter.set_cpu_time_limit(target_quota as usize, false); - debug!( - "cpu_time_limiter tuned for backend request"; - "cpu_util" => ?cpu_util, - "new quota" => ?target_quota); - INSTANCE_BACKEND_CPU_QUOTA.set(target_quota as i64); - } - } - } }, ); } @@ -1079,7 +896,7 @@ where (disk::DiskUsage::Normal, disk::DiskUsage::Normal) => disk::DiskUsage::Normal, } } - self.background_worker + self.core.background_worker .spawn_interval_task(DEFAULT_STORAGE_STATS_INTERVAL, move || { let disk_stats = match fs2::statvfs(&store_path) { Err(e) => { @@ -1238,7 +1055,7 @@ where if let Err(e) = status_server.start(self.core.config.server.status_addr.clone()) { error_unknown!(%e; "failed to bind addr for status service"); } else { - self.to_stop.push(status_server); + self.core.to_stop.push(status_server); } } } @@ -1260,117 +1077,7 @@ where sst_worker.stop_worker(); } - self.to_stop.into_iter().for_each(|s| s.stop()); - } -} - -pub trait ConfiguredRaftEngine: RaftEngine { - fn build( - _: &TikvConfig, - _: &Arc, - _: &Option>, - _: &Cache, - ) -> (Self, Option>); - fn as_rocks_engine(&self) -> Option<&RocksEngine>; - fn register_config(&self, _cfg_controller: &mut ConfigController); -} - -impl ConfiguredRaftEngine for T { - default fn build( - _: &TikvConfig, - _: &Arc, - _: &Option>, - _: &Cache, - ) -> (Self, Option>) { - unimplemented!() - } - default fn as_rocks_engine(&self) -> Option<&RocksEngine> { - None - } - default fn register_config(&self, _cfg_controller: &mut ConfigController) {} -} - -impl ConfiguredRaftEngine for RocksEngine { - fn build( - config: &TikvConfig, - env: &Arc, - key_manager: &Option>, - block_cache: &Cache, - ) -> (Self, Option>) { - let mut raft_data_state_machine = RaftDataStateMachine::new( - &config.storage.data_dir, - &config.raft_engine.config().dir, - &config.raft_store.raftdb_path, - ); - let should_dump = raft_data_state_machine.before_open_target(); - - let raft_db_path = &config.raft_store.raftdb_path; - let config_raftdb = &config.raftdb; - let statistics = Arc::new(RocksStatistics::new_titan()); - let raft_db_opts = config_raftdb.build_opt(env.clone(), Some(&statistics)); - let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let raftdb = engine_rocks::util::new_engine_opt(raft_db_path, raft_db_opts, raft_cf_opts) - .expect("failed to open raftdb"); - - if should_dump { - let raft_engine = - RaftLogEngine::new(config.raft_engine.config(), key_manager.clone(), None) - .expect("failed to open raft engine for migration"); - dump_raft_engine_to_raftdb(&raft_engine, &raftdb, 8 /* threads */); - raft_engine.stop(); - drop(raft_engine); - raft_data_state_machine.after_dump_data(); - } - (raftdb, Some(statistics)) - } - - fn as_rocks_engine(&self) -> Option<&RocksEngine> { - Some(self) - } - - fn register_config(&self, cfg_controller: &mut ConfigController) { - cfg_controller.register( - tikv::config::Module::Raftdb, - Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), - ); - } -} - -impl ConfiguredRaftEngine for RaftLogEngine { - fn build( - config: &TikvConfig, - env: &Arc, - key_manager: &Option>, - block_cache: &Cache, - ) -> (Self, Option>) { - let mut raft_data_state_machine = RaftDataStateMachine::new( - &config.storage.data_dir, - &config.raft_store.raftdb_path, - &config.raft_engine.config().dir, - ); - let should_dump = raft_data_state_machine.before_open_target(); - - let raft_config = config.raft_engine.config(); - let raft_engine = - RaftLogEngine::new(raft_config, key_manager.clone(), get_io_rate_limiter()) - .expect("failed to open raft engine"); - - if should_dump { - let config_raftdb = &config.raftdb; - let raft_db_opts = config_raftdb.build_opt(env.clone(), None); - let raft_cf_opts = config_raftdb.build_cf_opts(block_cache); - let raftdb = engine_rocks::util::new_engine_opt( - &config.raft_store.raftdb_path, - raft_db_opts, - raft_cf_opts, - ) - .expect("failed to open raftdb for migration"); - dump_raftdb_to_raft_engine(&raftdb, &raft_engine, 8 /* threads */); - raftdb.stop(); - drop(raftdb); - raft_data_state_machine.after_dump_data(); - } - (raft_engine, None) + self.core.to_stop.into_iter().for_each(|s| s.stop()); } } @@ -1488,171 +1195,10 @@ fn pre_start() { ); } } -pub struct EngineMetricsManager { - tablet_registry: TabletRegistry, - kv_statistics: Option>, - kv_is_titan: bool, - raft_engine: ER, - raft_statistics: Option>, - last_reset: Instant, -} - -impl EngineMetricsManager { - pub fn new( - tablet_registry: TabletRegistry, - kv_statistics: Option>, - kv_is_titan: bool, - raft_engine: ER, - raft_statistics: Option>, - ) -> Self { - EngineMetricsManager { - tablet_registry, - kv_statistics, - kv_is_titan, - raft_engine, - raft_statistics, - last_reset: Instant::now(), - } - } - - pub fn flush(&mut self, now: Instant) { - let mut reporter = EK::StatisticsReporter::new("kv"); - self.tablet_registry - .for_each_opened_tablet(|_, db: &mut CachedTablet| { - if let Some(db) = db.latest() { - reporter.collect(db); - } - true - }); - reporter.flush(); - self.raft_engine.flush_metrics("raft"); - - if let Some(s) = self.kv_statistics.as_ref() { - flush_engine_statistics(s, "kv", self.kv_is_titan); - } - if let Some(s) = self.raft_statistics.as_ref() { - flush_engine_statistics(s, "raft", false); - } - if now.saturating_duration_since(self.last_reset) >= DEFAULT_ENGINE_METRICS_RESET_INTERVAL { - if let Some(s) = self.kv_statistics.as_ref() { - s.reset(); - } - if let Some(s) = self.raft_statistics.as_ref() { - s.reset(); - } - self.last_reset = now; - } - } -} - -pub struct EnginesResourceInfo { - tablet_registry: TabletRegistry, - raft_engine: Option, - latest_normalized_pending_bytes: AtomicU32, - normalized_pending_bytes_collector: MovingAvgU32, -} - -impl EnginesResourceInfo { - const SCALE_FACTOR: u64 = 100; - - fn new( - tablet_registry: TabletRegistry, - raft_engine: Option, - max_samples_to_preserve: usize, - ) -> Self { - EnginesResourceInfo { - tablet_registry, - raft_engine, - latest_normalized_pending_bytes: AtomicU32::new(0), - normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), - } - } - - pub fn update( - &self, - _now: Instant, - cached_latest_tablets: &mut HashMap>, - ) { - let mut normalized_pending_bytes = 0; - - fn fetch_engine_cf(engine: &RocksEngine, cf: &str, normalized_pending_bytes: &mut u32) { - if let Ok(cf_opts) = engine.get_options_cf(cf) { - if let Ok(Some(b)) = engine.get_cf_pending_compaction_bytes(cf) { - if cf_opts.get_soft_pending_compaction_bytes_limit() > 0 { - *normalized_pending_bytes = std::cmp::max( - *normalized_pending_bytes, - (b * EnginesResourceInfo::SCALE_FACTOR - / cf_opts.get_soft_pending_compaction_bytes_limit()) - as u32, - ); - } - } - } - } - - if let Some(raft_engine) = &self.raft_engine { - fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); - } - - self.tablet_registry - .for_each_opened_tablet(|id, db: &mut CachedTablet| { - cached_latest_tablets.insert(id, db.clone()); - true - }); - - // todo(SpadeA): Now, there's a potential race condition problem where the - // tablet could be destroyed after the clone and before the fetching - // which could result in programme panic. It's okay now as the single global - // kv_engine will not be destroyed in normal operation and v2 is not - // ready for operation. Furthermore, this race condition is general to v2 as - // tablet clone is not a case exclusively happened here. We should - // propose another PR to tackle it such as destory tablet lazily in a GC - // thread. - - for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { continue }; - for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { - fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); - } - } - - // Clear ensures that these tablets are not hold forever. - cached_latest_tablets.clear(); - - let (_, avg) = self - .normalized_pending_bytes_collector - .add(normalized_pending_bytes); - self.latest_normalized_pending_bytes.store( - std::cmp::max(normalized_pending_bytes, avg), - Ordering::Relaxed, - ); - } -} - -impl IoBudgetAdjustor for EnginesResourceInfo { - fn adjust(&self, total_budgets: usize) -> usize { - let score = self.latest_normalized_pending_bytes.load(Ordering::Relaxed) as f32 - / Self::SCALE_FACTOR as f32; - // Two reasons for adding `sqrt` on top: - // 1) In theory the convergence point is independent of the value of pending - // bytes (as long as backlog generating rate equals consuming rate, which is - // determined by compaction budgets), a convex helps reach that point while - // maintaining low level of pending bytes. - // 2) Variance of compaction pending bytes grows with its magnitude, a filter - // with decreasing derivative can help balance such trend. - let score = score.sqrt(); - // The target global write flow slides between Bandwidth / 2 and Bandwidth. - let score = 0.5 + score / 2.0; - (total_budgets as f32 * score) as usize - } -} #[cfg(test)] mod test { - use std::{ - collections::HashMap, - sync::{atomic::Ordering, Arc}, - }; + use std::{collections::HashMap, sync::Arc}; use engine_rocks::raw::Env; use engine_traits::{ @@ -1732,9 +1278,7 @@ mod test { // bytes of tablet_1_20 assert_eq!( (new_pending_compaction_bytes * 100) as u32, - engines_info - .latest_normalized_pending_bytes - .load(Ordering::Relaxed) + engines_info.latest_normalized_pending_bytes() ); } } diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index a02af6ad177..0e96d976449 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -10,7 +10,7 @@ use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{RaftEngine, RaftEngineReadOnly, TabletRegistry}; +use engine_traits::{KvEngine, RaftEngine, RaftEngineReadOnly, TabletRegistry}; use futures::Future; use kvproto::{ kvrpcpb::ApiVersion, @@ -53,12 +53,12 @@ use tikv_util::{ use crate::{Cluster, RaftStoreRouter, SimulateTransport, Simulator, SnapshotRouter}; #[derive(Clone)] -pub struct ChannelTransport { - core: Arc>, +pub struct ChannelTransport { + core: Arc>>, } -impl ChannelTransport { - pub fn new() -> ChannelTransport { +impl ChannelTransport { + pub fn new() -> Self { ChannelTransport { core: Arc::new(Mutex::new(ChannelTransportCore { snap_paths: HashMap::default(), @@ -67,12 +67,12 @@ impl ChannelTransport { } } - pub fn core(&self) -> &Arc> { + pub fn core(&self) -> &Arc>> { &self.core } } -impl Transport for ChannelTransport { +impl Transport for ChannelTransport { fn send(&mut self, msg: RaftMessage) -> raftstore::Result<()> { let from_store = msg.get_from_peer().get_store_id(); let to_store = msg.get_to_peer().get_store_id(); @@ -131,30 +131,30 @@ impl Transport for ChannelTransport { fn flush(&mut self) {} } -pub struct ChannelTransportCore { +pub struct ChannelTransportCore { pub snap_paths: HashMap, - pub routers: HashMap>>, + pub routers: HashMap>>, } -impl Default for ChannelTransport { +impl Default for ChannelTransport { fn default() -> Self { Self::new() } } -type SimulateChannelTransport = SimulateTransport; +type SimulateChannelTransport = SimulateTransport>; -pub struct NodeCluster { - trans: ChannelTransport, +pub struct NodeCluster { + trans: ChannelTransport, pd_client: Arc, - nodes: HashMap>, - simulate_trans: HashMap, + nodes: HashMap>, + simulate_trans: HashMap>, concurrency_managers: HashMap, // snap_mgrs: HashMap, } -impl NodeCluster { - pub fn new(pd_client: Arc) -> NodeCluster { +impl NodeCluster { + pub fn new(pd_client: Arc) -> Self { NodeCluster { trans: ChannelTransport::new(), pd_client, @@ -166,7 +166,7 @@ impl NodeCluster { } } -impl Simulator for NodeCluster { +impl Simulator for NodeCluster { fn get_node_ids(&self) -> HashSet { self.nodes.keys().cloned().collect() } @@ -189,10 +189,10 @@ impl Simulator for NodeCluster { &mut self, node_id: u64, cfg: Config, - store_meta: Arc>>, + store_meta: Arc>>, key_manager: Option>, raft_engine: RaftTestEngine, - tablet_registry: TabletRegistry, + tablet_registry: TabletRegistry, _resource_manager: &Option>, ) -> ServerResult { assert!(!self.nodes.contains_key(&node_id)); @@ -345,9 +345,8 @@ impl Simulator for NodeCluster { fn async_snapshot( &mut self, request: RaftCmdRequest, - ) -> impl Future< - Output = std::result::Result, RaftCmdResponse>, - > + Send { + ) -> impl Future, RaftCmdResponse>> + Send + { let node_id = request.get_header().get_peer().get_store_id(); if !self .trans @@ -409,7 +408,7 @@ impl Simulator for NodeCluster { .unwrap(); } - fn get_router(&self, node_id: u64) -> Option> { + fn get_router(&self, node_id: u64) -> Option> { self.nodes.get(&node_id).map(|node| node.router().clone()) } @@ -439,7 +438,7 @@ impl Simulator for NodeCluster { // Compare to server cluster, node cluster does not have server layer and // storage layer. -pub fn new_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_node_cluster(id: u64, count: usize) -> Cluster, RocksEngine> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new( @@ -454,7 +453,10 @@ pub fn new_node_cluster(id: u64, count: usize) -> Cluster Cluster { +pub fn new_incompatible_node_cluster( + id: u64, + count: usize, +) -> Cluster, RocksEngine> { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new( diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 921d3b991ab..804a5e4a22f 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -12,7 +12,7 @@ use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use futures::{executor::block_on, Future}; @@ -86,23 +86,26 @@ impl FlowStatsReporter for DummyReporter { fn report_write_stats(&self, _write_stats: WriteStats) {} } -type SimulateRaftExtension = ::RaftExtension; -type SimulateStoreTransport = SimulateTransport>; -type SimulateServerTransport = - SimulateTransport>; +type SimulateRaftExtension = as Engine>::RaftExtension; +type SimulateStoreTransport = SimulateTransport>; +type SimulateServerTransport = + SimulateTransport, PdStoreAddrResolver>>; -pub type SimulateEngine = RaftKv2; +pub type SimulateEngine = RaftKv2; // TestRaftKvv2 behaves the same way with RaftKv2, except that it has filters // that can mock various network conditions. #[derive(Clone)] -pub struct TestRaftKv2 { - raftkv: SimulateEngine, +pub struct TestRaftKv2 { + raftkv: SimulateEngine, filters: Arc>>>, } -impl TestRaftKv2 { - pub fn new(raftkv: SimulateEngine, filters: Arc>>>) -> TestRaftKv2 { +impl TestRaftKv2 { + pub fn new( + raftkv: SimulateEngine, + filters: Arc>>>, + ) -> TestRaftKv2 { TestRaftKv2 { raftkv, filters } } @@ -111,15 +114,15 @@ impl TestRaftKv2 { } } -impl Engine for TestRaftKv2 { - type Snap = RegionSnapshot<::Snapshot>; - type Local = RocksEngine; +impl Engine for TestRaftKv2 { + type Snap = RegionSnapshot; + type Local = EK; fn kv_engine(&self) -> Option { self.raftkv.kv_engine() } - type RaftExtension = TestExtension; + type RaftExtension = TestExtension; fn raft_extension(&self) -> Self::RaftExtension { TestExtension::new(self.raftkv.raft_extension(), self.filters.clone()) } @@ -131,12 +134,12 @@ impl Engine for TestRaftKv2 { self.raftkv.modify_on_kv_engine(region_modifies) } - type SnapshotRes = ::SnapshotRes; + type SnapshotRes = as Engine>::SnapshotRes; fn async_snapshot(&mut self, ctx: SnapContext<'_>) -> Self::SnapshotRes { self.raftkv.async_snapshot(ctx) } - type WriteRes = ::WriteRes; + type WriteRes = as Engine>::WriteRes; fn async_write( &self, ctx: &Context, @@ -159,21 +162,21 @@ impl Engine for TestRaftKv2 { } #[derive(Clone)] -pub struct TestExtension { - extension: Extension, +pub struct TestExtension { + extension: Extension, filters: Arc>>>, } -impl TestExtension { +impl TestExtension { pub fn new( - extension: Extension, + extension: Extension, filters: Arc>>>, ) -> Self { TestExtension { extension, filters } } } -impl RaftExtension for TestExtension { +impl RaftExtension for TestExtension { fn feed(&self, msg: RaftMessage, key_message: bool) { let send = |msg| -> raftstore::Result<()> { self.extension.feed(msg, key_message); @@ -236,22 +239,22 @@ impl RaftExtension for TestExtension { } } -pub struct ServerMeta { - node: NodeV2, - server: Server, - sim_router: SimulateStoreTransport, - sim_trans: SimulateServerTransport, - raw_router: StoreRouter, - gc_worker: GcWorker, +pub struct ServerMeta { + node: NodeV2, + server: Server>, + sim_router: SimulateStoreTransport, + sim_trans: SimulateServerTransport, + raw_router: StoreRouter, + gc_worker: GcWorker>, rsmeter_cleanup: Box, } type PendingServices = Vec Service>>; -pub struct ServerCluster { - metas: HashMap, +pub struct ServerCluster { + metas: HashMap>, addrs: AddressMap, - pub storages: HashMap, + pub storages: HashMap>, pub region_info_accessors: HashMap, snap_paths: HashMap, snap_mgrs: HashMap, @@ -266,8 +269,8 @@ pub struct ServerCluster { pub causal_ts_providers: HashMap>, } -impl ServerCluster { - pub fn new(pd_client: Arc) -> ServerCluster { +impl ServerCluster { + pub fn new(pd_client: Arc) -> Self { let env = Arc::new( EnvBuilder::new() .cq_count(2) @@ -316,10 +319,10 @@ impl ServerCluster { &mut self, node_id: u64, mut cfg: Config, - store_meta: Arc>>, + store_meta: Arc>>, key_manager: Option>, raft_engine: RaftTestEngine, - tablet_registry: TabletRegistry, + tablet_registry: TabletRegistry, resource_manager: &Option>, ) -> ServerResult { let (snap_mgr, snap_mgs_path) = if !self.snap_mgrs.contains_key(&node_id) { @@ -639,7 +642,7 @@ impl ServerCluster { Ok(node_id) } - pub fn get_gc_worker(&self, node_id: u64) -> &GcWorker { + pub fn get_gc_worker(&self, node_id: u64) -> &GcWorker> { &self.metas.get(&node_id).unwrap().gc_worker } @@ -677,7 +680,7 @@ impl ServerCluster { } } -impl Simulator for ServerCluster { +impl Simulator for ServerCluster { fn get_node_ids(&self) -> HashSet { self.metas.keys().cloned().collect() } @@ -718,10 +721,10 @@ impl Simulator for ServerCluster { &mut self, node_id: u64, cfg: Config, - store_meta: Arc>>, + store_meta: Arc>>, key_manager: Option>, raft_engine: RaftTestEngine, - tablet_registry: TabletRegistry, + tablet_registry: TabletRegistry, resource_manager: &Option>, ) -> ServerResult { dispatch_api_version!( @@ -754,9 +757,8 @@ impl Simulator for ServerCluster { fn async_snapshot( &mut self, request: kvproto::raft_cmdpb::RaftCmdRequest, - ) -> impl Future< - Output = std::result::Result, RaftCmdResponse>, - > + Send { + ) -> impl Future, RaftCmdResponse>> + Send + { let node_id = request.get_header().get_peer().get_store_id(); let mut router = match self.metas.get(&node_id) { None => { @@ -792,7 +794,7 @@ impl Simulator for ServerCluster { Ok(()) } - fn get_router(&self, node_id: u64) -> Option> { + fn get_router(&self, node_id: u64) -> Option> { self.metas.get(&node_id).map(|m| m.raw_router.clone()) } @@ -805,9 +807,9 @@ impl Simulator for ServerCluster { } } -impl Cluster { - pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { - let mut try_snapshot = || -> Option> { +impl Cluster, EK> { + pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { + let mut try_snapshot = || -> Option> { let leader = self.leader_of_region(region_id)?; let store_id = leader.store_id; let epoch = self.get_region_epoch(region_id); @@ -833,7 +835,10 @@ impl Cluster { } } -pub fn new_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_server_cluster( + id: u64, + count: usize, +) -> Cluster, RocksEngine> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new( @@ -849,7 +854,7 @@ pub fn new_server_cluster(id: u64, count: usize) -> Cluster Cluster { +) -> Cluster, RocksEngine> { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new( @@ -866,7 +871,7 @@ pub fn new_server_cluster_with_api_ver( id: u64, count: usize, api_ver: ApiVersion, -) -> Cluster { +) -> Cluster, RocksEngine> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new( @@ -879,14 +884,21 @@ pub fn new_server_cluster_with_api_ver( ) } -pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, Context) -{ +pub fn must_new_cluster_and_kv_client() -> ( + Cluster, RocksEngine>, + TikvClient, + Context, +) { must_new_cluster_and_kv_client_mul(1) } pub fn must_new_cluster_and_kv_client_mul( count: usize, -) -> (Cluster, TikvClient, Context) { +) -> ( + Cluster, RocksEngine>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_cluster_mul(count); let env = Arc::new(Environment::new(1)); @@ -898,14 +910,22 @@ pub fn must_new_cluster_and_kv_client_mul( } pub fn must_new_cluster_mul( count: usize, -) -> (Cluster, metapb::Peer, Context) { +) -> ( + Cluster, RocksEngine>, + metapb::Peer, + Context, +) { must_new_and_configure_cluster_mul(count, |_| ()) } fn must_new_and_configure_cluster_mul( count: usize, - mut configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + mut configure: impl FnMut(&mut Cluster, RocksEngine>), +) -> ( + Cluster, RocksEngine>, + metapb::Peer, + Context, +) { let mut cluster = new_server_cluster(0, count); configure(&mut cluster); cluster.run(); @@ -921,8 +941,12 @@ fn must_new_and_configure_cluster_mul( } pub fn must_new_and_configure_cluster_and_kv_client( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, TikvClient, Context) { + configure: impl FnMut(&mut Cluster, RocksEngine>), +) -> ( + Cluster, RocksEngine>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_and_configure_cluster(configure); let env = Arc::new(Environment::new(1)); @@ -934,13 +958,20 @@ pub fn must_new_and_configure_cluster_and_kv_client( } pub fn must_new_and_configure_cluster( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + configure: impl FnMut(&mut Cluster, RocksEngine>), +) -> ( + Cluster, RocksEngine>, + metapb::Peer, + Context, +) { must_new_and_configure_cluster_mul(1, configure) } -pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClient, u64) -{ +pub fn must_new_cluster_and_debug_client() -> ( + Cluster, RocksEngine>, + DebugClient, + u64, +) { let (cluster, leader, _) = must_new_cluster_mul(1); let env = Arc::new(Environment::new(1)); diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 9f68beaad35..b9e6464c5d8 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -11,7 +11,7 @@ use futures::Future; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; use raftstore::Result; use rand::RngCore; -use server::server2::ConfiguredRaftEngine; +use server::common::ConfiguredRaftEngine; use tempfile::TempDir; use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, Config}; use tikv::{ @@ -163,7 +163,7 @@ pub fn configure_for_lease_read_v2, EK: KvEngine>( } pub fn wait_for_synced( - cluster: &mut Cluster, + cluster: &mut Cluster, RocksEngine>, node_id: u64, region_id: u64, ) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 81753d49600..cdfe5c8f475 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -43,7 +43,7 @@ use raftstore::{ RaftRouterCompactedEventSender, Result, }; use rand::RngCore; -use server::server::ConfiguredRaftEngine; +use server::common::ConfiguredRaftEngine; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ From 83ce09188780c40be9b780d4995f1ae26f32995d Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 7 Apr 2023 16:20:58 +0800 Subject: [PATCH 0618/1149] cdc: batch send resolved ts exponentially to speed up TiCDC resolve lock (#14465) close pingcap/tiflow#8561, ref tikv/tikv#11993 cdc: batch send resolved ts exponentially to speed up TiCDC resolve lock Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/src/endpoint.rs | 144 +++++++++++++++++++++++++-------- 1 file changed, 110 insertions(+), 34 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index b5e15ceee23..efc82e27d6c 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cell::RefCell, cmp::{Ord, Ordering as CmpOrdering, PartialOrd, Reverse}, collections::BinaryHeap, fmt, @@ -297,16 +298,8 @@ impl ResolvedRegionHeap { (min_resolved_ts, outliers) } - fn to_hash_set(&self) -> (TimeStamp, HashSet) { - let mut min_resolved_ts = TimeStamp::max(); - let mut regions = HashSet::with_capacity_and_hasher(self.heap.len(), Default::default()); - for resolved_region in &self.heap { - regions.insert(resolved_region.0.region_id); - if min_resolved_ts > resolved_region.0.resolved_ts { - min_resolved_ts = resolved_region.0.resolved_ts; - } - } - (min_resolved_ts, regions) + fn is_empty(&self) -> bool { + self.heap.is_empty() } fn clear(&mut self) { @@ -349,7 +342,7 @@ pub struct Endpoint { sink_memory_quota: MemoryQuota, old_value_cache: OldValueCache, - resolved_region_heap: ResolvedRegionHeap, + resolved_region_heap: RefCell, causal_ts_provider: Option>, @@ -444,9 +437,9 @@ impl, E: KvEngine> Endpoint { concurrency_manager, min_resolved_ts: TimeStamp::max(), min_ts_region_id: 0, - resolved_region_heap: ResolvedRegionHeap { + resolved_region_heap: RefCell::new(ResolvedRegionHeap { heap: BinaryHeap::new(), - }, + }), old_value_cache, resolved_region_count: 0, unresolved_region_count: 0, @@ -837,7 +830,7 @@ impl, E: KvEngine> Endpoint { fn on_min_ts(&mut self, regions: Vec, min_ts: TimeStamp, current_ts: TimeStamp) { // Reset resolved_regions to empty. - let resolved_regions = &mut self.resolved_region_heap; + let mut resolved_regions = self.resolved_region_heap.borrow_mut(); resolved_regions.clear(); let total_region_count = regions.len(); @@ -883,6 +876,7 @@ impl, E: KvEngine> Endpoint { "min_resolved_ts" => self.min_resolved_ts, "min_ts_region_id" => self.min_ts_region_id, "min_ts" => min_ts, + "lag" => ?Duration::from_millis(lag_millis), "ok" => advance_ok, "none" => advance_failed_none, "stale" => advance_failed_stale, @@ -896,13 +890,14 @@ impl, E: KvEngine> Endpoint { // so 1) downstreams know where they should send resolve lock requests, // and 2) resolved ts of normal regions does not fallback. // - // Max number of outliers, in most cases, only a few regions are outliers. - // TODO: figure out how to avoid create hashset every time, saving some CPU. - let max_outlier_count = 32; - let (outlier_min_resolved_ts, outlier_regions) = resolved_regions.pop(max_outlier_count); - let (normal_min_resolved_ts, normal_regions) = resolved_regions.to_hash_set(); - self.broadcast_resolved_ts(outlier_min_resolved_ts, outlier_regions); - self.broadcast_resolved_ts(normal_min_resolved_ts, normal_regions); + // Regions are separated exponentially to reduce resolved ts events and + // save CPU for both TiKV and TiCDC. + let mut batch_count = 8; + while !resolved_regions.is_empty() { + let (outlier_min_resolved_ts, outlier_regions) = resolved_regions.pop(batch_count); + self.broadcast_resolved_ts(outlier_min_resolved_ts, outlier_regions); + batch_count *= 4; + } } fn broadcast_resolved_ts(&self, min_resolved_ts: TimeStamp, regions: HashSet) { @@ -1194,6 +1189,7 @@ impl, E: KvEngine> RunnableWithTimer for Endpoin // Reclaim resolved_region_heap memory. self.resolved_region_heap + .borrow_mut() .reset_and_shrink_to(self.capture_regions.len()); CDC_CAPTURED_REGION_COUNT.set(self.capture_regions.len() as i64); @@ -1276,7 +1272,11 @@ mod tests { }; use super::*; - use crate::{channel, delegate::ObservedRange, recv_timeout}; + use crate::{ + channel, + delegate::{post_init_downstream, ObservedRange}, + recv_timeout, + }; struct TestEndpointSuite { // The order must ensure `endpoint` be dropped before other fields. @@ -2477,11 +2477,6 @@ mod tests { assert!(regions.contains(&5)); assert!(regions.contains(&6)); - // Empty regions - let (ts, regions) = heap.to_hash_set(); - assert_eq!(ts, TimeStamp::max()); - assert!(regions.is_empty()); - let mut heap1 = ResolvedRegionHeap { heap: BinaryHeap::new(), }; @@ -2495,13 +2490,6 @@ mod tests { assert_eq!(regions.len(), 1); assert!(regions.contains(&3)); - let (ts, regions) = heap1.to_hash_set(); - assert_eq!(ts, 4.into()); - assert_eq!(regions.len(), 3); - assert!(regions.contains(&4)); - assert!(regions.contains(&5)); - assert!(regions.contains(&6)); - heap1.reset_and_shrink_to(3); assert_eq!(3, heap1.heap.capacity()); assert!(heap1.heap.is_empty()); @@ -2510,4 +2498,92 @@ mod tests { heap1.clear(); assert!(heap1.heap.is_empty()); } + + #[test] + fn test_on_min_ts() { + let cfg = CdcConfig { + // Disable automatic advance resolved ts during test. + min_ts_interval: ReadableDuration(Duration::from_secs(1000)), + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); + let quota = crate::channel::MemoryQuota::new(usize::MAX); + let (tx, mut rx) = channel::channel(1, quota); + let mut rx = rx.drain(); + + let conn = Conn::new(tx, String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + + let mut regions = vec![]; + for id in 1..4097 { + regions.push(id); + suite.add_region(id, 100); + + let mut req = ChangeDataRequest::default(); + req.set_region_id(id); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + id, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + on_init_downstream(&downstream.get_state()); + post_init_downstream(&downstream.get_state()); + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + version: version.clone(), + }); + + let mut resolver = Resolver::new(id); + resolver.track_lock(TimeStamp::compose(0, id), vec![], None); + let mut region = Region::default(); + region.id = id; + region.set_region_epoch(region_epoch); + let failed = suite + .capture_regions + .get_mut(&id) + .unwrap() + .on_region_ready(resolver, region); + assert!(failed.is_empty()); + } + suite + .task_rx + .recv_timeout(Duration::from_millis(100)) + .unwrap_err(); + + suite.run(Task::MinTs { + regions, + min_ts: TimeStamp::compose(0, 4096), + current_ts: TimeStamp::compose(0, 4096), + }); + + // There should be at least 3 resolved ts events. + let mut last_resolved_ts = 0; + let mut last_batch_count = 0; + for _ in 0..3 { + let event = recv_timeout(&mut rx, Duration::from_millis(100)) + .unwrap() + .unwrap() + .0; + assert!(last_resolved_ts < event.resolved_ts().ts, "{:?}", event); + assert!( + last_batch_count < event.resolved_ts().regions.len(), + "{:?}", + event + ); + last_resolved_ts = event.resolved_ts().ts; + last_batch_count = event.resolved_ts().regions.len(); + } + } } From 4199ed9ddd307d74656ac25d7c2c1692fdac9f8b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:46:58 +0800 Subject: [PATCH 0619/1149] tikv_util: cgroup path parsing fix (#14537) close tikv/tikv#14538 Signed-off-by: Spade A --- components/tikv_util/src/sys/cgroup.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/components/tikv_util/src/sys/cgroup.rs b/components/tikv_util/src/sys/cgroup.rs index 371d51e0b70..052a607a8c9 100644 --- a/components/tikv_util/src/sys/cgroup.rs +++ b/components/tikv_util/src/sys/cgroup.rs @@ -183,15 +183,19 @@ fn is_cgroup2_unified_mode() -> Result { // // The format is "::". For example, // "10:cpuset:/test-cpuset". +// +// Note: path may contains ":" in some envrionment. fn parse_proc_cgroup_v1(lines: &str) -> HashMap { let mut subsystems = HashMap::new(); for line in lines.lines().map(|s| s.trim()).filter(|s| !s.is_empty()) { let mut iter = line.split(':'); if let Some(_id) = iter.next() { if let Some(systems) = iter.next() { - if let Some(path) = iter.next() { + // If the path itself contains ":", we need to concat them + let path = iter.collect::>().join(":"); + if !path.is_empty() { for system in systems.split(',') { - subsystems.insert(system.to_owned(), path.to_owned()); + subsystems.insert(system.to_owned(), path.clone()); } continue; } @@ -697,4 +701,19 @@ mod tests { .unwrap(); assert!(child.wait().unwrap().success()); } + + #[test] + fn test_cgroup_path_with_semicolon() { + let id = "1"; + let devices = "test_device"; + let path = "/dir1:dir2:dir3"; + let mut lines = String::new(); + lines.push_str(id); + lines.push(':'); + lines.push_str(devices); + lines.push(':'); + lines.push_str(path); + let ret = parse_proc_cgroup_v1(&lines); + assert_eq!(ret.get(devices).unwrap(), path); + } } From abb672b8218307e3811281c22643e0eb2e13cc2c Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 7 Apr 2023 17:02:58 -0700 Subject: [PATCH 0620/1149] [raftstore-v2]: check apply_scheduler before using in on_refresh_region_buckets (#14526) close tikv/tikv#14506 check apply_scheduler before using it in on_refresh_region_buckets. This is to solve the race condition when the peer is just created by split meanwhile a refresh bucket is called immediately. Signed-off-by: tonyxuqqi Co-authored-by: buffer --- .../raftstore-v2/src/operation/bucket.rs | 16 ++--- .../raftstore-v2/src/operation/command/mod.rs | 1 + .../raftstore-v2/src/operation/query/mod.rs | 3 + .../raftstore-v2/tests/failpoints/mod.rs | 1 + .../tests/failpoints/test_bucket.rs | 58 ++++++++++++++++++ .../tests/integrations/cluster.rs | 60 ++++++++++++++++++- .../raftstore-v2/tests/integrations/mod.rs | 1 + components/raftstore/src/store/region_meta.rs | 2 + 8 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 components/raftstore-v2/tests/failpoints/test_bucket.rs diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index be4ca092d98..317ed89ef8d 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -254,14 +254,16 @@ impl Peer { let meta = region_buckets.meta.clone(); self.region_buckets_info_mut() .set_bucket_stat(Some(region_buckets.clone())); - - let mut store_meta = store_ctx.store_meta.lock().unwrap(); - if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { - reader.0.update(ReadProgress::region_buckets(meta)); + { + let mut store_meta = store_ctx.store_meta.lock().unwrap(); + if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { + reader.0.update(ReadProgress::region_buckets(meta)); + } + } + // it's possible that apply_scheduler is not initialized yet + if let Some(apply_scheduler) = self.apply_scheduler() { + apply_scheduler.send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } - self.apply_scheduler() - .unwrap() - .send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } #[inline] diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 0ae2f1741c3..9ef5592c64e 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -148,6 +148,7 @@ impl Peer { .apply_pool .spawn(async move { apply_fsm.handle_all_tasks().await }) .unwrap(); + fail::fail_point!("delay_set_apply_scheduler", |_| {}); self.set_apply_scheduler(apply_scheduler); } diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index fc7cee35fa5..55bc100dec2 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -401,6 +401,9 @@ impl Peer { .raft_log .term(meta.raft_apply.commit_index) .unwrap(); + if let Some(bucket_stats) = self.region_buckets_info().bucket_stat() { + meta.bucket_keys = bucket_stats.meta.keys.clone(); + } debug!(self.logger, "on query debug info"; "tick" => self.raft_group().raft.election_elapsed, "election_timeout" => self.raft_group().raft.randomized_election_timeout(), diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index f73b9398df6..6148cb4eae1 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -10,6 +10,7 @@ mod cluster; mod test_basic_write; mod test_bootstrap; +mod test_bucket; mod test_life; mod test_merge; mod test_split; diff --git a/components/raftstore-v2/tests/failpoints/test_bucket.rs b/components/raftstore-v2/tests/failpoints/test_bucket.rs new file mode 100644 index 00000000000..f136cf6dc53 --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_bucket.rs @@ -0,0 +1,58 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::Duration; + +use engine_traits::RaftEngineReadOnly; +use raftstore::store::RAFT_INIT_LOG_INDEX; +use tikv_util::store::new_peer; + +use crate::cluster::{split_helper::split_region_and_refresh_bucket, Cluster}; + +/// Test refresh bucket. +#[test] +fn test_refresh_bucket() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let raft_engine = cluster.node(0).running_state().unwrap().raft_engine.clone(); + let router = &mut cluster.routers[0]; + + let region_2 = 2; + let region = router.region_detail(region_2); + let peer = region.get_peers()[0].clone(); + router.wait_applied_to_current_term(region_2, Duration::from_secs(3)); + + // Region 2 ["", ""] + // -> Region 2 ["", "k22"] + // Region 1000 ["k22", ""] peer(1, 10) + let region_state = raft_engine + .get_region_state(region_2, u64::MAX) + .unwrap() + .unwrap(); + assert_eq!(region_state.get_tablet_index(), RAFT_INIT_LOG_INDEX); + + // to simulate the delay of set_apply_scheduler + fail::cfg("delay_set_apply_scheduler", "sleep(1000)").unwrap(); + split_region_and_refresh_bucket( + router, + region, + peer, + 1000, + new_peer(store_id, 10), + b"k22", + false, + ); + + for _i in 1..100 { + std::thread::sleep(Duration::from_millis(50)); + let meta = router + .must_query_debug_info(1000, Duration::from_secs(1)) + .unwrap(); + if !meta.bucket_keys.is_empty() { + assert_eq!(meta.bucket_keys.len(), 4); // include region start/end keys + assert_eq!(meta.bucket_keys[1], b"1".to_vec()); + assert_eq!(meta.bucket_keys[2], b"2".to_vec()); + return; + } + } + panic!("timeout for updating buckets"); // timeout +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 1685b5154e7..9c81f9545a3 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -31,11 +31,11 @@ use kvproto::{ use pd_client::RpcClient; use raft::eraftpb::MessageType; use raftstore::{ - coprocessor::{Config as CopConfig, CoprocessorHost}, + coprocessor::{Config as CopConfig, CoprocessorHost, StoreHandle}, store::{ region_meta::{RegionLocalState, RegionMeta}, - AutoSplitController, Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, Transport, - RAFT_INIT_LOG_INDEX, + AutoSplitController, Bucket, Config, RegionSnapshot, TabletSnapKey, TabletSnapManager, + Transport, RAFT_INIT_LOG_INDEX, }, }; use raftstore_v2::{ @@ -232,6 +232,11 @@ impl TestRouter { } region } + + pub fn refresh_bucket(&self, region_id: u64, region_epoch: RegionEpoch, buckets: Vec) { + self.store_router() + .refresh_region_buckets(region_id, region_epoch, buckets, None); + } } pub struct RunningState { @@ -653,6 +658,7 @@ pub mod split_helper { metapb, pdpb, raft_cmdpb::{AdminCmdType, AdminRequest, RaftCmdRequest, RaftCmdResponse, SplitRequest}, }; + use raftstore::store::Bucket; use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; use super::TestRouter; @@ -760,6 +766,54 @@ pub mod split_helper { (left, right) } + + // Split the region and refresh bucket immediately + // This is to simulate the case when the splitted peer's storage is not + // initialized yet when refresh bucket happens + pub fn split_region_and_refresh_bucket( + router: &mut TestRouter, + region: metapb::Region, + peer: metapb::Peer, + split_region_id: u64, + split_peer: metapb::Peer, + propose_key: &[u8], + right_derive: bool, + ) { + let region_id = region.id; + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + req.mut_header().set_peer(peer); + + let mut split_id = pdpb::SplitId::new(); + split_id.new_region_id = split_region_id; + split_id.new_peer_ids = vec![split_peer.id]; + let admin_req = new_batch_split_region_request( + vec![propose_key.to_vec()], + vec![split_id], + right_derive, + ); + req.mut_requests().clear(); + req.set_admin_request(admin_req); + + let (msg, sub) = PeerMsg::admin_command(req); + router.send(region_id, msg).unwrap(); + block_on(sub.result()).unwrap(); + + let meta = router + .must_query_debug_info(split_region_id, Duration::from_secs(1)) + .unwrap(); + let epoch = &meta.region_state.epoch; + let buckets = vec![Bucket { + keys: vec![b"1".to_vec(), b"2".to_vec()], + size: 100, + }]; + let mut region_epoch = kvproto::metapb::RegionEpoch::default(); + region_epoch.set_conf_ver(epoch.conf_ver); + region_epoch.set_version(epoch.version); + router.refresh_bucket(split_region_id, region_epoch, buckets); + } } pub mod merge_helper { diff --git a/components/raftstore-v2/tests/integrations/mod.rs b/components/raftstore-v2/tests/integrations/mod.rs index 12fe47ec48a..a4cdfda9179 100644 --- a/components/raftstore-v2/tests/integrations/mod.rs +++ b/components/raftstore-v2/tests/integrations/mod.rs @@ -7,6 +7,7 @@ // TODO: test conflict control in integration tests after split is supported. +#[allow(dead_code)] mod cluster; mod test_basic_write; mod test_conf_change; diff --git a/components/raftstore/src/store/region_meta.rs b/components/raftstore/src/store/region_meta.rs index 4d44673e057..30239be528c 100644 --- a/components/raftstore/src/store/region_meta.rs +++ b/components/raftstore/src/store/region_meta.rs @@ -246,6 +246,7 @@ pub struct RegionMeta { pub raft_status: RaftStatus, pub raft_apply: RaftApplyState, pub region_state: RegionLocalState, + pub bucket_keys: Vec>, } impl RegionMeta { @@ -308,6 +309,7 @@ impl RegionMeta { }), tablet_index: local_state.get_tablet_index(), }, + bucket_keys: vec![], } } } From 68298d834be1844eaf254d5237eed4856605833c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 10 Apr 2023 15:36:59 +0800 Subject: [PATCH 0621/1149] log-backup: use conservativer batch strategy (#14490) close tikv/tikv#14313 Signed-off-by: hillium Co-authored-by: Xinye Tao --- src/import/sst_service.rs | 142 ++++++++++++++++++++++++++++++++------ 1 file changed, 122 insertions(+), 20 deletions(-) diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 4707b348bc5..c235c60a4e6 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -61,7 +61,7 @@ use crate::{ /// this value? const REQUEST_WRITE_CONCURRENCY: usize = 16; /// The extra bytes required by the wire encoding. -/// Generally, a field (and a embedded message) would introduce 2 extra +/// Generally, a field (and a embedded message) would introduce some extra /// bytes. In detail, they are: /// - 2 bytes for the request type (Tag+Value). /// - 2 bytes for every string or bytes field (Tag+Length), they are: @@ -69,10 +69,12 @@ const REQUEST_WRITE_CONCURRENCY: usize = 16; /// . + the value field /// . + the CF field (None for CF_DEFAULT) /// - 2 bytes for the embedded message field `PutRequest` (Tag+Length). +/// - 2 bytes for the request itself (which would be embedded into a +/// [`RaftCmdRequest`].) /// In fact, the length field is encoded by varint, which may grow when the /// content length is greater than 128, however when the length is greater than /// 128, the extra 1~4 bytes can be ignored. -const WIRE_EXTRA_BYTES: usize = 10; +const WIRE_EXTRA_BYTES: usize = 12; /// The interval of running the GC for /// [`raft_writer::ThrottledTlsEngineWriter`]. There aren't too many items held /// in the writer. So we can run the GC less frequently. @@ -118,6 +120,7 @@ pub struct ImportSstService { struct RequestCollector { max_raft_req_size: usize, + /// Retain the last ts of each key in each request. /// This is used for write CF because resolved ts observer hates duplicated /// key in the same request. @@ -180,10 +183,25 @@ impl RequestCollector { self.accept(cf, m); } + /// check whether the unpacked size would exceed the max_raft_req_size after + /// accepting the modify. + fn should_send_batch_before_adding(&self, m: &Modify) -> bool { + let message_size = m.size() + WIRE_EXTRA_BYTES; + // If there isn't any records in the collector, and there is a huge modify, we + // should give it a change to enter the collector. Or we may generate empty + // batch. + self.unpacked_size != 0 /* batched */ + && message_size + self.unpacked_size > self.max_raft_req_size /* exceed the max_raft_req_size */ + } + // we need to remove duplicate keys in here, since // in https://github.com/tikv/tikv/blob/a401f78bc86f7e6ea6a55ad9f453ae31be835b55/components/resolved_ts/src/cmd.rs#L204 // will panic if found duplicated entry during Vec. fn accept(&mut self, cf: &str, m: Modify) { + if self.should_send_batch_before_adding(&m) { + self.pack_all(); + } + let k = m.key(); match cf { CF_WRITE => { @@ -221,10 +239,6 @@ impl RequestCollector { } _ => unreachable!(), } - - if self.unpacked_size >= self.max_raft_req_size { - self.pack_all(); - } } #[cfg(test)] @@ -495,7 +509,7 @@ impl ImportSstService { ) -> std::result::Result, ImportPbError> { let mut range: Option = None; - let mut collector = RequestCollector::new(max_raft_size * 7 / 8); + let mut collector = RequestCollector::new(max_raft_size / 2); let context = req.take_context(); let mut metas = req.take_metas(); let mut rules = req.take_rewrite_rules(); @@ -1175,12 +1189,16 @@ mod test { use std::collections::HashMap; use engine_traits::{CF_DEFAULT, CF_WRITE}; - use kvproto::raft_cmdpb::Request; + use kvproto::{ + kvrpcpb::Context, + metapb::RegionEpoch, + raft_cmdpb::{RaftCmdRequest, Request}, + }; use protobuf::Message; - use tikv_kv::Modify; - use txn_types::{Key, TimeStamp, Write, WriteType}; + use tikv_kv::{Modify, WriteData}; + use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::import::sst_service::RequestCollector; + use crate::{import::sst_service::RequestCollector, server::raftkv}; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1361,23 +1379,107 @@ mod test { assert!(request_collector.is_empty()); } + fn convert_write_batch_to_request_raftkv1(ctx: &Context, batch: WriteData) -> RaftCmdRequest { + let reqs: Vec = batch.modifies.into_iter().map(Into::into).collect(); + let txn_extra = batch.extra; + let mut header = raftkv::new_request_header(ctx); + if batch.avoid_batch { + header.set_uuid(uuid::Uuid::new_v4().as_bytes().to_vec()); + } + let mut flags = 0; + if txn_extra.one_pc { + flags |= WriteBatchFlags::ONE_PC.bits(); + } + if txn_extra.allowed_in_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } + header.set_flags(flags); + + let mut cmd = RaftCmdRequest::default(); + cmd.set_header(header); + cmd.set_requests(reqs.into()); + cmd + } + + fn fake_ctx() -> Context { + let mut fake_ctx = Context::new(); + fake_ctx.set_region_id(42); + fake_ctx.set_region_epoch({ + let mut e = RegionEpoch::new(); + e.set_version(1024); + e.set_conf_ver(56); + e + }); + fake_ctx + } + #[test] fn test_collector_size() { let mut request_collector = RequestCollector::new(1024); - for i in 0..100u64 { - request_collector.accept(CF_DEFAULT, default_req(&i.to_ne_bytes(), b"egg", i)); + for i in 0..100u8 { + request_collector.accept(CF_DEFAULT, default_req(&i.to_ne_bytes(), b"egg", i as _)); } - let pws = request_collector.pending_writes; + let pws = request_collector.drain_pending_writes(true); for w in pws { - let req_size = w - .modifies - .into_iter() - .map(Request::from) - .map(|x| x.compute_size()) - .sum::(); + let req_size = convert_write_batch_to_request_raftkv1(&fake_ctx(), w).compute_size(); + assert!(req_size < 1024, "{}", req_size); + } + } + + #[test] + fn test_collector_huge_write_liveness() { + let mut request_collector = RequestCollector::new(1024); + for i in 0..100u8 { + if i % 10 == 2 { + // Inject some huge requests. + request_collector.accept( + CF_DEFAULT, + default_req(&i.to_ne_bytes(), &[42u8; 1025], i as _), + ); + } else { + request_collector.accept(CF_DEFAULT, default_req(&i.to_ne_bytes(), b"egg", i as _)); + } + } + let pws = request_collector.drain_pending_writes(true); + let mut total = 0; + for w in pws { + let req = convert_write_batch_to_request_raftkv1(&fake_ctx(), w); + let req_size = req.compute_size(); + total += req.get_requests().len(); + assert!(req_size < 2048, "{}", req_size); + } + assert_eq!(total, 100); + } + + #[test] + fn test_collector_mid_size_write_no_exceed_max() { + let mut request_collector = RequestCollector::new(1024); + for i in 0..100u8 { + if i % 10 == 2 { + let huge_req = default_req(&i.to_ne_bytes(), &[42u8; 960], i as _); + // Inject some huge requests. + request_collector.accept(CF_DEFAULT, huge_req); + } else { + request_collector.accept( + CF_DEFAULT, + default_req( + &i.to_ne_bytes(), + b"noodles with beef, egg, bacon and spinach; in chicken soup", + i as _, + ), + ); + } + } + let pws = request_collector.drain_pending_writes(true); + let mut total = 0; + for w in pws { + let req = convert_write_batch_to_request_raftkv1(&fake_ctx(), w); + let req_size = req.compute_size(); + total += req.get_requests().len(); assert!(req_size < 1024, "{}", req_size); } + assert_eq!(total, 100); } } From 6433784b557c99518dea89612cf699b490c8f35f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 11 Apr 2023 11:57:00 +0800 Subject: [PATCH 0622/1149] log-backup: eliminate some verbose logs (#14454) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#14453, ref tikv/tikv#14453 Signed-off-by: hillium Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Co-authored-by: qupeng Co-authored-by: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> --- Cargo.lock | 1 + components/backup-stream/Cargo.toml | 1 + components/backup-stream/src/endpoint.rs | 4 ++++ components/backup-stream/src/metrics.rs | 23 ++++++++++++++++--- components/backup-stream/src/observer.rs | 2 +- .../backup-stream/src/subscription_manager.rs | 14 ++++++++--- .../backup-stream/src/subscription_track.rs | 2 +- 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e8162267354..a508216a0e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -550,6 +550,7 @@ dependencies = [ "pd_client", "pin-project", "prometheus", + "prometheus-static-metric", "protobuf", "raft", "raftstore", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index d6d6f7a6fc4..005849391e9 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -54,6 +54,7 @@ openssl = "0.10" pd_client = { workspace = true } pin-project = "1.0" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } +prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raftstore = { workspace = true } diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index c8302f6dd9e..c5ab6352b31 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -980,6 +980,10 @@ where }); } RegionCheckpointOperation::PrepareMinTsForResolve => { + if self.observer.is_hibernating() { + metrics::MISC_EVENTS.skip_resolve_no_subscription.inc(); + return; + } let min_ts = self.pool.block_on(self.prepare_min_ts()); let start_time = Instant::now(); // We need to reschedule the `Resolve` task to queue, because the subscription diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index d7f836833b0..225d583ca5c 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -2,6 +2,7 @@ use lazy_static::lazy_static; use prometheus::*; +use prometheus_static_metric::*; /// The status of a task. /// The ordering of this imples the priority for presenting to the user. @@ -155,9 +156,11 @@ lazy_static! { &["stage"] ) .unwrap(); - pub static ref LOST_LEADER_REGION: IntCounter = register_int_counter!( - "tikv_log_backup_lost_leader_region", - "The regions that lost leadership during resolving" + pub static ref MISC_EVENTS: MiscEvents = register_static_int_counter_vec!( + MiscEvents, + "tikv_log_backup_misc_events", + "Events counter, including 'plain' events(i.e. events without extra information).", + &["name"] ) .unwrap(); pub static ref MIN_TS_RESOLVE_DURATION: Histogram = register_histogram!( @@ -167,3 +170,17 @@ lazy_static! { ) .unwrap(); } + +make_static_metric! { + pub label_enum MiscEventsName { + skip_resolve_non_leader, + skip_resolve_no_subscription, + } + + pub struct MiscEvents: IntCounter { + "name" => { + skip_resolve_non_leader, + skip_resolve_no_subscription, + } + } +} diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index 1a0a0f7cc9e..92ab6bc757e 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -100,7 +100,7 @@ impl BackupStreamObserver { /// Check whether there are any task range registered to the observer. /// when there isn't any task, we can ignore the events, so we don't need to /// handle useless events. (Also won't yield verbose logs.) - fn is_hibernating(&self) -> bool { + pub fn is_hibernating(&self) -> bool { self.ranges.rl().is_empty() } } diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 6e72d66a98b..e4ce02c9e27 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -38,7 +38,7 @@ use crate::{ metrics, observer::BackupStreamObserver, router::{Router, TaskSelector}, - subscription_track::{ResolveResult, SubscriptionTracer}, + subscription_track::{CheckpointType, ResolveResult, SubscriptionTracer}, try_send, utils::{self, CallbackWaitGroup, Work}, Task, @@ -407,7 +407,10 @@ where mut leader_checker: LeadershipResolver, ) { while let Some(op) = message_box.recv().await { - info!("backup stream: on_modify_observe"; "op" => ?op); + // Skip some trivial resolve commands. + if !matches!(op, ObserveOp::ResolveRegions { .. }) { + info!("backup stream: on_modify_observe"; "op" => ?op); + } match op { ObserveOp::Start { region } => { fail::fail_point!("delay_on_start_observe"); @@ -477,7 +480,12 @@ where // If there isn't any region observed, the `min_ts` can be used as resolved ts // safely. let rts = min_region.map(|rs| rs.checkpoint).unwrap_or(min_ts); - info!("getting checkpoint"; "defined_by_region" => ?min_region); + if min_region + .map(|mr| mr.checkpoint_type != CheckpointType::MinTs) + .unwrap_or(false) + { + info!("getting non-trivial checkpoint"; "defined_by_region" => ?min_region); + } callback(ResolvedRegions::new(rts, cps)); } } diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index c13339d1c29..7fee1b1b438 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -250,7 +250,7 @@ impl SubscriptionTracer { SubscribeState::Running(sub) => { let contains = rs.contains(®ion_id); if !contains { - crate::metrics::LOST_LEADER_REGION.inc(); + crate::metrics::MISC_EVENTS.skip_resolve_non_leader.inc(); } contains.then(|| ResolveResult::resolve(sub, min_ts)) } From e61b51df06539ee50eeaea18dff81c6d72f1fdd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 11 Apr 2023 13:51:00 +0800 Subject: [PATCH 0623/1149] log-backup: make initial scanning more robust (#14403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#14451 Signed-off-by: hillium Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 3 + components/backup-stream/src/event_loader.rs | 11 +- .../backup-stream/src/metadata/client.rs | 5 +- components/backup-stream/src/metadata/keys.rs | 4 + .../src/metadata/store/slash_etc.rs | 6 +- components/backup-stream/src/observer.rs | 24 +--- .../backup-stream/src/subscription_manager.rs | 110 +++++++++++++++--- components/backup-stream/tests/mod.rs | 63 +++++++++- 8 files changed, 171 insertions(+), 55 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index c5ab6352b31..45d132b001b 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1159,6 +1159,7 @@ pub enum ObserveOp { region: Region, handle: ObserveHandle, err: Box, + has_failed_for: u8, }, ResolveRegions { callback: ResolveRegionsCallback, @@ -1189,11 +1190,13 @@ impl std::fmt::Debug for ObserveOp { region, handle, err, + has_failed_for, } => f .debug_struct("NotifyFailToStartObserve") .field("region", &utils::debug_region(region)) .field("handle", handle) .field("err", err) + .field("has_failed_for", has_failed_for) .finish(), Self::ResolveRegions { min_ts, .. } => f .debug_struct("ResolveRegions") diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 8b808a16cca..6c825bf30c5 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -1,10 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - marker::PhantomData, - sync::{atomic::Ordering, Arc}, - time::Duration, -}; +use std::{marker::PhantomData, sync::Arc, time::Duration}; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; use futures::executor::block_on; @@ -488,13 +484,10 @@ where // is still little chance to lost data: For example, if a region cannot elect // the leader for long time. (say, net work partition) At that time, we have // nowhere to record the lock status of this region. - let success = try_send!( + try_send!( self.scheduler, Task::ModifyObserve(ObserveOp::Start { region: r.region }) ); - if success { - crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.fetch_add(1, Ordering::SeqCst); - } } } Ok(()) diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index fca8a07b654..1fdc1b3b1e8 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -426,7 +426,10 @@ impl MetadataClient { let stream = watcher .stream .filter_map(|item| match item { - Ok(kv_event) => MetadataEvent::from_watch_pause_event(&kv_event), + Ok(kv_event) => { + debug!("watch pause event"; "raw" => ?kv_event); + MetadataEvent::from_watch_pause_event(&kv_event) + } Err(err) => Some(MetadataEvent::Error { err }), }) .map(|event| { diff --git a/components/backup-stream/src/metadata/keys.rs b/components/backup-stream/src/metadata/keys.rs index 26b04abe16f..87c0e036172 100644 --- a/components/backup-stream/src/metadata/keys.rs +++ b/components/backup-stream/src/metadata/keys.rs @@ -167,6 +167,10 @@ impl MetaKey { Self(format!("{}{}/{}", PREFIX, PATH_PAUSE, name).into_bytes()) } + pub fn last_errors_of(name: &str) -> Self { + Self(format!("{}{}/{}", PREFIX, PATH_LAST_ERROR, name).into_bytes()) + } + pub fn last_error_of(name: &str, store: u64) -> Self { Self(format!("{}{}/{}/{}", PREFIX, PATH_LAST_ERROR, name, store).into_bytes()) } diff --git a/components/backup-stream/src/metadata/store/slash_etc.rs b/components/backup-stream/src/metadata/store/slash_etc.rs index 0d6484b0c1e..a564d069d14 100644 --- a/components/backup-stream/src/metadata/store/slash_etc.rs +++ b/components/backup-stream/src/metadata/store/slash_etc.rs @@ -39,11 +39,7 @@ struct Key(Vec, i64); impl std::fmt::Debug for Key { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_tuple("Key") - .field(&format_args!( - "{}@{}", - log_wrappers::Value::key(&self.0), - self.1 - )) + .field(&format_args!("{}@{}", self.0.escape_ascii(), self.1)) .finish() } } diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index 92ab6bc757e..169c3b72268 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -1,9 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, RwLock, -}; +use std::sync::{Arc, RwLock}; use engine_traits::KvEngine; use kvproto::metapb::Region; @@ -18,20 +15,6 @@ use crate::{ utils::SegmentSet, }; -/// The inflight `StartObserve` message count. -/// Currently, we handle the `StartObserve` message in the main loop(endpoint -/// thread), which may take longer time than expected. So when we are starting -/// to observe many region (e.g. failover), there may be many pending messages, -/// those messages won't block the advancing of checkpoint ts. So the checkpoint -/// ts may be too late and losing some data. -/// -/// This is a temporary solution for this problem: If this greater than (1), -/// then it implies that there are some inflight wait-for-initialized regions, -/// we should block the resolved ts from advancing in that condition. -/// -/// FIXME: Move handler of `ModifyObserve` to another thread, and remove this :( -pub static IN_FLIGHT_START_OBSERVE_MESSAGE: AtomicUsize = AtomicUsize::new(0); - /// An Observer for Backup Stream. /// /// It observes raftstore internal events, such as: @@ -141,15 +124,12 @@ impl CmdObserver for BackupStreamObserver { fn on_applied_current_term(&self, role: StateRole, region: &Region) { if role == StateRole::Leader && self.should_register_region(region) { - let success = try_send!( + try_send!( self.scheduler, Task::ModifyObserve(ObserveOp::Start { region: region.clone(), }) ); - if success { - IN_FLIGHT_START_OBSERVE_MESSAGE.fetch_add(1, Ordering::SeqCst); - } } } } diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index e4ce02c9e27..316f0d9fb53 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -48,6 +48,21 @@ type ScanPool = yatp::ThreadPool; const INITIAL_SCAN_FAILURE_MAX_RETRY_TIME: usize = 10; +// The retry parameters for failed to get last checkpoint ts. +// When PD is temporarily disconnected, we may need this retry. +// The total duration of retrying is about 345s ( 20 * 16 + 15 ), +// which is longer than the RPO promise. +const TRY_START_OBSERVE_MAX_RETRY_TIME: u8 = 24; +const RETRY_AWAIT_BASIC_DURATION: Duration = Duration::from_secs(1); +const RETRY_AWAIT_MAX_DURATION: Duration = Duration::from_secs(16); + +fn backoff_for_start_observe(failed_for: u8) -> Duration { + Ord::min( + RETRY_AWAIT_BASIC_DURATION * (1 << failed_for), + RETRY_AWAIT_MAX_DURATION, + ) +} + /// a request for doing initial scanning. struct ScanCmd { region: Region, @@ -418,7 +433,6 @@ where metrics::INITIAL_SCAN_REASON .with_label_values(&["leader-changed"]) .inc(); - crate::observer::IN_FLIGHT_START_OBSERVE_MESSAGE.fetch_sub(1, Ordering::SeqCst); } ObserveOp::Stop { ref region } => { self.subs.deregister_region_if(region, |_, _| true); @@ -441,6 +455,7 @@ where region, handle, err, + has_failed_for, } => { info!("retry observe region"; "region" => %region.get_id(), "err" => %err); // No need for retrying observe canceled. @@ -451,7 +466,7 @@ where region.get_start_key().to_owned(), region.get_end_key().to_owned(), ); - match self.retry_observe(region, handle).await { + match self.retry_observe(region, handle, has_failed_for).await { Ok(()) => {} Err(e) => { let msg = Task::FatalError( @@ -519,7 +534,8 @@ where Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { region: region.clone(), handle, - err: Box::new(e) + err: Box::new(e), + has_failed_for: 0, }) ); } @@ -560,22 +576,59 @@ where } async fn start_observe(&self, region: Region) { + self.start_observe_with_failure_count(region, 0).await + } + + async fn start_observe_with_failure_count(&self, region: Region, has_failed_for: u8) { let handle = ObserveHandle::new(); + let schd = self.scheduler.clone(); self.subs.add_pending_region(®ion); if let Err(err) = self.try_start_observe(®ion, handle.clone()).await { - warn!("failed to start observe, retrying"; "err" => %err); - try_send!( - self.scheduler, - Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { - region, - handle, - err: Box::new(err) - }) - ); + warn!("failed to start observe, would retry"; "err" => %err, utils::slog_region(®ion)); + tokio::spawn(async move { + #[cfg(not(feature = "failpoints"))] + let delay = backoff_for_start_observe(has_failed_for); + #[cfg(feature = "failpoints")] + let delay = (|| { + fail::fail_point!("subscribe_mgr_retry_start_observe_delay", |v| { + let dur = v + .expect("should provide delay time (in ms)") + .parse::() + .expect("should be number (in ms)"); + Duration::from_millis(dur) + }); + backoff_for_start_observe(has_failed_for) + })(); + tokio::time::sleep(delay).await; + try_send!( + schd, + Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { + region, + handle, + err: Box::new(err), + has_failed_for: has_failed_for + 1 + }) + ) + }); } } - async fn retry_observe(&self, region: Region, handle: ObserveHandle) -> Result<()> { + async fn retry_observe( + &self, + region: Region, + handle: ObserveHandle, + failure_count: u8, + ) -> Result<()> { + if failure_count > TRY_START_OBSERVE_MAX_RETRY_TIME { + return Err(Error::Other( + format!( + "retry time exceeds for region {:?}", + utils::debug_region(®ion) + ) + .into(), + )); + } + let (tx, rx) = crossbeam::channel::bounded(1); self.regions .find_region_by_id( @@ -626,7 +679,8 @@ where metrics::INITIAL_SCAN_REASON .with_label_values(&["retry"]) .inc(); - self.start_observe(region).await; + self.start_observe_with_failure_count(region, failure_count) + .await; Ok(()) } @@ -750,4 +804,32 @@ mod test { should_finish_in(move || drop(pool), Duration::from_secs(5)); } + + #[test] + fn test_backoff_for_start_observe() { + assert_eq!( + super::backoff_for_start_observe(0), + super::RETRY_AWAIT_BASIC_DURATION + ); + assert_eq!( + super::backoff_for_start_observe(1), + super::RETRY_AWAIT_BASIC_DURATION * 2 + ); + assert_eq!( + super::backoff_for_start_observe(2), + super::RETRY_AWAIT_BASIC_DURATION * 4 + ); + assert_eq!( + super::backoff_for_start_observe(3), + super::RETRY_AWAIT_BASIC_DURATION * 8 + ); + assert_eq!( + super::backoff_for_start_observe(4), + super::RETRY_AWAIT_MAX_DURATION + ); + assert_eq!( + super::backoff_for_start_observe(5), + super::RETRY_AWAIT_MAX_DURATION + ); + } } diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index db4f84924b0..7b2fe88b8a1 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -819,10 +819,15 @@ mod test { use std::time::{Duration, Instant}; use backup_stream::{ - errors::Error, router::TaskSelector, GetCheckpointResult, RegionCheckpointOperation, - RegionSet, Task, + errors::Error, + metadata::{ + keys::MetaKey, + store::{Keys, MetaStore}, + }, + router::TaskSelector, + GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; - use futures::{Stream, StreamExt}; + use futures::{executor::block_on, Stream, StreamExt}; use pd_client::PdClient; use test_raftstore::IsolationFilterFactory; use tikv_util::{box_err, defer, info, HandyRwLock}; @@ -1370,7 +1375,7 @@ mod test { .schedule(Task::ForceFlush("r".to_owned())) .unwrap(); suite.sync(); - std::thread::sleep(Duration::from_secs(1)); + std::thread::sleep(Duration::from_secs(2)); run_async_test(suite.check_for_write_records( suite.flushed_files.path(), round1.iter().map(|x| x.as_slice()), @@ -1429,4 +1434,54 @@ mod test { round1.iter().map(|k| k.as_slice()), )) } + + #[test] + fn test_retry_abort() { + let mut suite = super::SuiteBuilder::new_named("retry_abort") + .nodes(1) + .build(); + defer! { + fail::list().into_iter().for_each(|(name, _)| fail::remove(name)) + }; + + suite.must_register_task(1, "retry_abort"); + fail::cfg("subscribe_mgr_retry_start_observe_delay", "return(10)").unwrap(); + fail::cfg("try_start_observe", "return()").unwrap(); + + suite.must_split(&make_split_key_at_record(1, 42)); + std::thread::sleep(Duration::from_secs(2)); + + let error = run_async_test(suite.get_meta_cli().get_last_error("retry_abort", 1)).unwrap(); + let error = error.expect("no error uploaded"); + error + .get_error_message() + .find("retry") + .expect("error doesn't contain retry"); + fail::cfg("try_start_observe", "10*return()").unwrap(); + // Resume the task manually... + run_async_test(async { + suite + .meta_store + .delete(Keys::Key(MetaKey::pause_of("retry_abort"))) + .await?; + suite + .meta_store + .delete(Keys::Prefix(MetaKey::last_errors_of("retry_abort"))) + .await?; + backup_stream::errors::Result::Ok(()) + }) + .unwrap(); + + suite.sync(); + suite.wait_with(move |r| block_on(r.get_task_info("retry_abort")).is_ok()); + let items = run_async_test(suite.write_records(0, 128, 1)); + suite.force_flush_files("retry_abort"); + suite.wait_for_flush(); + run_async_test( + suite.check_for_write_records( + suite.flushed_files.path(), + items.iter().map(Vec::as_slice), + ), + ); + } } From 9a073f274735052c846b2f66c20fa4b155d8ac6f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 11 Apr 2023 15:59:01 +0800 Subject: [PATCH 0624/1149] *: register cdc/resolved_ts endpoint in server2 (#14543) ref tikv/tikv#14542 Register cdc/resolved_ts endpoint in server2 Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/src/endpoint.rs | 78 +++++++------- components/cdc/src/initializer.rs | 57 +++++------ components/cdc/tests/mod.rs | 8 +- components/raftstore-v2/src/fsm/store.rs | 7 +- components/raftstore-v2/src/router/imp.rs | 28 +++++- components/raftstore/src/router.rs | 67 ++++++++++++- components/raftstore/src/store/fsm/store.rs | 6 ++ components/resolved_ts/src/advance.rs | 19 ++-- components/resolved_ts/src/endpoint.rs | 43 ++++---- components/resolved_ts/src/scanner.rs | 32 +++--- components/resolved_ts/tests/mod.rs | 4 +- components/server/src/server.rs | 8 +- components/server/src/server2.rs | 106 +++++++++++++++++--- components/test_raftstore/src/server.rs | 4 +- 14 files changed, 326 insertions(+), 141 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index efc82e27d6c..68650130211 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -30,12 +30,15 @@ use online_config::{ConfigChange, OnlineConfig}; use pd_client::{Feature, PdClient}; use raftstore::{ coprocessor::{CmdBatch, ObserveId}, - router::RaftStoreRouter, - store::fsm::{ChangeObserver, StoreMeta}, + router::CdcHandle, + store::fsm::{store::StoreRegionMeta, ChangeObserver}, }; use resolved_ts::{LeadershipResolver, Resolver}; use security::SecurityManager; -use tikv::{config::CdcConfig, storage::Statistics}; +use tikv::{ + config::CdcConfig, + storage::{kv::LocalTablets, Statistics}, +}; use tikv_util::{ debug, defer, error, impl_display_as_debug, info, mpsc::bounded, @@ -312,20 +315,20 @@ impl ResolvedRegionHeap { } } -pub struct Endpoint { +pub struct Endpoint { cluster_id: u64, capture_regions: HashMap, connections: HashMap, scheduler: Scheduler, - raft_router: T, - engine: E, + cdc_handle: T, + tablets: LocalTablets, observer: CdcObserver, pd_client: Arc, timer: SteadyTimer, tso_worker: Runtime, - store_meta: Arc>, + store_meta: Arc>, /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, @@ -355,23 +358,23 @@ pub struct Endpoint { warn_resolved_ts_repeat_count: usize, } -impl, E: KvEngine> Endpoint { +impl, E: KvEngine, S: StoreRegionMeta> Endpoint { pub fn new( cluster_id: u64, config: &CdcConfig, api_version: ApiVersion, pd_client: Arc, scheduler: Scheduler, - raft_router: T, - engine: E, + cdc_handle: T, + tablets: LocalTablets, observer: CdcObserver, - store_meta: Arc>, + store_meta: Arc>, concurrency_manager: ConcurrencyManager, env: Arc, security_mgr: Arc, sink_memory_quota: MemoryQuota, causal_ts_provider: Option>, - ) -> Endpoint { + ) -> Endpoint { let workers = Builder::new_multi_thread() .thread_name("cdcwkr") .worker_threads(config.incremental_scan_threads) @@ -405,10 +408,10 @@ impl, E: KvEngine> Endpoint { // Assume 1KB per entry. let max_scan_batch_size = 1024; - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); + let region_read_progress = store_meta.lock().unwrap().region_read_progress().clone(); let store_resolver_gc_interval = Duration::from_secs(60); let leader_resolver = LeadershipResolver::new( - store_meta.lock().unwrap().store_id.unwrap(), + store_meta.lock().unwrap().store_id(), pd_client.clone(), env, security_mgr, @@ -430,8 +433,8 @@ impl, E: KvEngine> Endpoint { api_version, workers, scan_concurrency_semaphore, - raft_router, - engine, + cdc_handle, + tablets, observer, store_meta, concurrency_manager, @@ -643,7 +646,7 @@ impl, E: KvEngine> Endpoint { return; } - let txn_extra_op = match self.store_meta.lock().unwrap().readers.get(®ion_id) { + let txn_extra_op = match self.store_meta.lock().unwrap().reader(region_id) { Some(reader) => reader.txn_extra_op.clone(), None => { error!("cdc register for a not found region"; "region_id" => region_id); @@ -723,7 +726,7 @@ impl, E: KvEngine> Endpoint { let observed_range = downstream_.observed_range; let region_epoch = request.take_region_epoch(); let mut init = Initializer { - engine: self.engine.clone(), + tablet: self.tablets.get(region_id).map(|t| t.into_owned()), sched, observed_range, region_id, @@ -744,12 +747,12 @@ impl, E: KvEngine> Endpoint { filter_loop, }; - let raft_router = self.raft_router.clone(); + let cdc_handle = self.cdc_handle.clone(); let concurrency_semaphore = self.scan_concurrency_semaphore.clone(); self.workers.spawn(async move { CDC_SCAN_TASKS.with_label_values(&["total"]).inc(); match init - .initialize(change_cmd, raft_router, concurrency_semaphore) + .initialize(change_cmd, cdc_handle, concurrency_semaphore) .await { Ok(()) => { @@ -1009,7 +1012,7 @@ impl, E: KvEngine> Endpoint { let timeout = self.timer.delay(interval.unwrap_or_default()); let pd_client = self.pd_client.clone(); let scheduler = self.scheduler.clone(); - let raft_router = self.raft_router.clone(); + let cdc_handle = self.cdc_handle.clone(); let regions: Vec = self.capture_regions.keys().copied().collect(); let cm: ConcurrencyManager = self.concurrency_manager.clone(); let hibernate_regions_compatible = self.config.hibernate_regions_compatible; @@ -1074,7 +1077,7 @@ impl, E: KvEngine> Endpoint { } else { CDC_RESOLVED_TS_ADVANCE_METHOD.set(0); leader_resolver - .resolve_by_raft(regions, min_ts, raft_router) + .resolve_by_raft(regions, min_ts, cdc_handle) .await }; leader_resolver_tx.send(leader_resolver).unwrap(); @@ -1107,7 +1110,9 @@ impl, E: KvEngine> Endpoint { } } -impl, E: KvEngine> Runnable for Endpoint { +impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable + for Endpoint +{ type Task = Task; fn run(&mut self, task: Task) { @@ -1183,7 +1188,9 @@ impl, E: KvEngine> Runnable for Endpoint { } } -impl, E: KvEngine> RunnableWithTimer for Endpoint { +impl, E: KvEngine, S: StoreRegionMeta + Send> RunnableWithTimer + for Endpoint +{ fn on_timeout(&mut self) { CDC_ENDPOINT_PENDING_TASKS.set(self.scheduler.pending_tasks() as _); @@ -1258,7 +1265,8 @@ mod tests { }; use raftstore::{ errors::{DiscardReason, Error as RaftStoreError}, - store::{msg::CasualMessage, PeerMsg, ReadDelegate}, + router::{CdcRaftRouter, RaftStoreRouter}, + store::{fsm::StoreMeta, msg::CasualMessage, PeerMsg, ReadDelegate}, }; use test_pd_client::TestPdClient; use test_raftstore::MockRaftStoreRouter; @@ -1280,8 +1288,8 @@ mod tests { struct TestEndpointSuite { // The order must ensure `endpoint` be dropped before other fields. - endpoint: Endpoint, - raft_router: MockRaftStoreRouter, + endpoint: Endpoint, RocksEngine, StoreMeta>, + cdc_handle: CdcRaftRouter, task_rx: ReceiverWrapper, raft_rxs: HashMap>>, leader_resolver: Option, @@ -1291,7 +1299,7 @@ mod tests { // It's important to matain raft receivers in `raft_rxs`, otherwise all cases // need to drop `endpoint` and `rx` in order manually. fn add_region(&mut self, region_id: u64, cap: usize) { - let rx = self.raft_router.add_region(region_id, cap); + let rx = self.cdc_handle.add_region(region_id, cap); self.raft_rxs.insert(region_id, rx); self.add_local_reader(region_id); } @@ -1305,7 +1313,7 @@ mod tests { } fn fill_raft_rx(&self, region_id: u64) { - let router = &self.raft_router; + let router = &self.cdc_handle; loop { match router.send_casual_msg(region_id, CasualMessage::ClearRegionSize) { Ok(_) => continue, @@ -1321,7 +1329,7 @@ mod tests { } impl Deref for TestEndpointSuite { - type Target = Endpoint; + type Target = Endpoint, RocksEngine, StoreMeta>; fn deref(&self) -> &Self::Target { &self.endpoint } @@ -1348,7 +1356,7 @@ mod tests { causal_ts_provider: Option>, ) -> TestEndpointSuite { let (task_sched, task_rx) = dummy_scheduler(); - let raft_router = MockRaftStoreRouter::new(); + let cdc_handle = CdcRaftRouter(MockRaftStoreRouter::new()); let mut store_meta = StoreMeta::new(0); store_meta.store_id = Some(1); let region_read_progress = store_meta.region_read_progress.clone(); @@ -1370,14 +1378,14 @@ mod tests { api_version, pd_client, task_sched.clone(), - raft_router.clone(), - engine.unwrap_or_else(|| { + cdc_handle.clone(), + LocalTablets::Singleton(engine.unwrap_or_else(|| { TestEngineBuilder::new() .build_without_cache() .unwrap() .kv_engine() .unwrap() - }), + })), CdcObserver::new(task_sched), Arc::new(StdMutex::new(store_meta)), ConcurrencyManager::new(1.into()), @@ -1389,7 +1397,7 @@ mod tests { TestEndpointSuite { endpoint: ep, - raft_router, + cdc_handle, task_rx, raft_rxs: HashMap::default(), leader_resolver: Some(leader_resolver), diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 68850ac55ac..8f6f8ed38a7 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -17,10 +17,10 @@ use kvproto::{ }; use raftstore::{ coprocessor::ObserveId, - router::RaftStoreRouter, + router::CdcHandle, store::{ fsm::ChangeObserver, - msg::{Callback, ReadResponse, SignificantMsg}, + msg::{Callback, ReadResponse}, }, }; use resolved_ts::Resolver; @@ -75,7 +75,7 @@ pub(crate) enum Scanner { } pub(crate) struct Initializer { - pub(crate) engine: E, + pub(crate) tablet: Option, pub(crate) sched: Scheduler, pub(crate) sink: crate::channel::Sink, @@ -102,10 +102,10 @@ pub(crate) struct Initializer { } impl Initializer { - pub(crate) async fn initialize>( + pub(crate) async fn initialize>( &mut self, - change_cmd: ChangeObserver, - raft_router: T, + change_observer: ChangeObserver, + cdc_handle: T, concurrency_semaphore: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); @@ -142,24 +142,22 @@ impl Initializer { let (incremental_scan_barrier_cb, incremental_scan_barrier_fut) = tikv_util::future::paired_future_callback(); let barrier = CdcEvent::Barrier(Some(incremental_scan_barrier_cb)); - if let Err(e) = raft_router.significant_send( + if let Err(e) = cdc_handle.capture_change( self.region_id, - SignificantMsg::CaptureChange { - cmd: change_cmd, - region_epoch, - callback: Callback::read(Box::new(move |resp| { - if let Err(e) = sched.schedule(Task::InitDownstream { - region_id, - downstream_id, - downstream_state, - sink, - incremental_scan_barrier: barrier, - cb: Box::new(move || cb(resp)), - }) { - error!("cdc schedule cdc task failed"; "error" => ?e); - } - })), - }, + region_epoch, + change_observer, + Callback::read(Box::new(move |resp| { + if let Err(e) = sched.schedule(Task::InitDownstream { + region_id, + downstream_id, + downstream_state, + sink, + incremental_scan_barrier: barrier, + cb: Box::new(move || cb(resp)), + }) { + error!("cdc schedule cdc task failed"; "error" => ?e); + } + })), ) { warn!("cdc send capture change cmd failed"; "region_id" => self.region_id, "error" => ?e); @@ -515,7 +513,11 @@ impl Initializer { let start_key = data_key(snap.lower_bound().unwrap_or_default()); let end_key = data_end_key(snap.upper_bound().unwrap_or_default()); let range = Range::new(&start_key, &end_key); - let collection = match self.engine.table_properties_collection(CF_WRITE, &[range]) { + let tablet = match self.tablet.as_ref() { + Some(t) => t, + None => return false, + }; + let collection = match tablet.table_properties_collection(CF_WRITE, &[range]) { Ok(collection) => collection, Err(_) => return false, }; @@ -572,7 +574,7 @@ mod tests { cdcpb::{EventLogType, Event_oneof_event}, errorpb::Error as ErrorHeader, }; - use raftstore::{coprocessor::ObserveHandle, store::RegionSnapshot}; + use raftstore::{coprocessor::ObserveHandle, router::CdcRaftRouter, store::RegionSnapshot}; use test_raftstore::MockRaftStoreRouter; use tikv::storage::{ kv::Engine, @@ -636,12 +638,11 @@ mod tests { .unwrap(); let downstream_state = Arc::new(AtomicCell::new(DownstreamState::Initializing)); let initializer = Initializer { - engine: engine.unwrap_or_else(|| { + tablet: engine.or_else(|| { TestEngineBuilder::new() .build_without_cache() .unwrap() .kv_engine() - .unwrap() }), sched: receiver_worker.scheduler(), sink, @@ -978,7 +979,7 @@ mod tests { mock_initializer(total_bytes, buffer, None, kv_api, false); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); - let raft_router = MockRaftStoreRouter::new(); + let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); let concurrency_semaphore = Arc::new(Semaphore::new(1)); initializer.downstream_state.store(DownstreamState::Stopped); diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 843b6b2f1d0..89ed4e6dbb1 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -20,9 +20,9 @@ use kvproto::{ tikvpb::TikvClient, }; use online_config::OnlineConfig; -use raftstore::coprocessor::CoprocessorHost; +use raftstore::{coprocessor::CoprocessorHost, router::CdcRaftRouter}; use test_raftstore::*; -use tikv::{config::CdcConfig, server::DEFAULT_CLUSTER_ID}; +use tikv::{config::CdcConfig, server::DEFAULT_CLUSTER_ID, storage::kv::LocalTablets}; use tikv_util::{ config::ReadableDuration, worker::{LazyWorker, Runnable}, @@ -185,8 +185,8 @@ impl TestSuiteBuilder { cluster.cfg.storage.api_version(), pd_cli.clone(), worker.scheduler(), - raft_router, - cluster.engines[id].kv.clone(), + CdcRaftRouter(raft_router), + LocalTablets::Singleton(cluster.engines[id].kv.clone()), cdc_ob, cluster.store_metas[id].clone(), cm.clone(), diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 4b4255b3d3e..e9b224b7375 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -13,7 +13,7 @@ use futures::{compat::Future01CompatExt, FutureExt}; use keys::{data_end_key, data_key}; use kvproto::metapb::Region; use raftstore::store::{ - fsm::store::StoreRegionMeta, Config, RegionReadProgressRegistry, Transport, + fsm::store::StoreRegionMeta, Config, ReadDelegate, RegionReadProgressRegistry, Transport, }; use slog::{info, o, Logger}; use tikv_util::{ @@ -133,6 +133,11 @@ impl StoreRegionMeta for StoreMeta { } } } + + #[inline] + fn reader(&self, region_id: u64) -> Option<&ReadDelegate> { + self.readers.get(®ion_id).map(|e| &e.0) + } } pub struct Store { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 67b0a7adeb7..9bffe2b7983 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -9,10 +9,16 @@ use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ + metapb::RegionEpoch, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; -use raftstore::store::{AsyncReadNotifier, FetchedLogs, GenSnapRes, RegionSnapshot}; +use raftstore::{ + router::CdcHandle, + store::{ + fsm::ChangeObserver, AsyncReadNotifier, Callback, FetchedLogs, GenSnapRes, RegionSnapshot, + }, +}; use slog::warn; use super::PeerMsg; @@ -169,3 +175,23 @@ impl RaftRouter { } } } + +impl CdcHandle for RaftRouter { + fn capture_change( + &self, + _region_id: u64, + _region_epoch: RegionEpoch, + _change_observer: ChangeObserver, + _callback: Callback, + ) -> crate::Result<()> { + unimplemented!() + } + + fn check_leadership( + &self, + _region_id: u64, + _callback: Callback, + ) -> crate::Result<()> { + unimplemented!() + } +} diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 0f22eb483a0..3a76a5ad26f 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -13,7 +13,7 @@ use tikv_util::time::ThreadReadId; use crate::{ store::{ - fsm::RaftRouter, + fsm::{ChangeObserver, RaftRouter}, transport::{CasualRouter, ProposalRouter, SignificantRouter}, Callback, CasualMessage, LocalReader, PeerMsg, RaftCmdExtraOpts, RaftCommand, SignificantMsg, StoreMsg, StoreRouter, @@ -384,3 +384,68 @@ impl crate::coprocessor::StoreHandle for RaftRoute ); } } + +/// A handle for cdc and pitr to schedule some command back to raftstore. +pub trait CdcHandle: Clone + Send +where + EK: KvEngine, +{ + fn capture_change( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + change_observer: ChangeObserver, + callback: Callback, + ) -> RaftStoreResult<()>; + + fn check_leadership( + &self, + region_id: u64, + callback: Callback, + ) -> RaftStoreResult<()>; +} + +/// A wrapper of SignificantRouter that is specialized for implementing +/// CdcHandle. +#[derive(Clone)] +pub struct CdcRaftRouter(pub T); + +impl std::ops::Deref for CdcRaftRouter { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl CdcHandle for CdcRaftRouter +where + EK: KvEngine, + T: SignificantRouter + Send + Clone, +{ + fn capture_change( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + change_observer: ChangeObserver, + callback: Callback, + ) -> RaftStoreResult<()> { + self.0.significant_send( + region_id, + SignificantMsg::CaptureChange { + cmd: change_observer, + region_epoch, + callback, + }, + ) + } + + fn check_leadership( + &self, + region_id: u64, + callback: Callback, + ) -> RaftStoreResult<()> { + self.0 + .significant_send(region_id, SignificantMsg::LeaderCallback(callback)) + } +} diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index a546b286a68..7c71dc3825e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -120,6 +120,7 @@ pub struct StoreInfo { /// of raftstore. pub trait StoreRegionMeta: Send { fn store_id(&self) -> u64; + fn reader(&self, region_id: u64) -> Option<&ReadDelegate>; fn region_read_progress(&self) -> &RegionReadProgressRegistry; fn search_region(&self, start_key: &[u8], end_key: &[u8], visitor: impl FnMut(&Region)); } @@ -189,6 +190,11 @@ impl StoreRegionMeta for StoreMeta { fn region_read_progress(&self) -> &RegionReadProgressRegistry { &self.region_read_progress } + + #[inline] + fn reader(&self, region_id: u64) -> Option<&ReadDelegate> { + self.readers.get(®ion_id) + } } impl StoreMeta { diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 611d8a84424..4739b679393 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -24,11 +24,8 @@ use kvproto::{ use pd_client::PdClient; use protobuf::Message; use raftstore::{ - router::RaftStoreRouter, - store::{ - msg::{Callback, SignificantMsg}, - util::RegionReadProgressRegistry, - }, + router::CdcHandle, + store::{msg::Callback, util::RegionReadProgressRegistry}, }; use security::SecurityManager; use tikv_util::{ @@ -225,18 +222,18 @@ impl LeadershipResolver { &self, regions: Vec, min_ts: TimeStamp, - raft_router: T, + cdc_handle: T, ) -> Vec where - T: 'static + RaftStoreRouter, + T: 'static + CdcHandle, E: KvEngine, { let mut reqs = Vec::with_capacity(regions.len()); for region_id in regions { - let raft_router_clone = raft_router.clone(); + let cdc_handle_clone = cdc_handle.clone(); let req = async move { let (tx, rx) = tokio::sync::oneshot::channel(); - let msg = SignificantMsg::LeaderCallback(Callback::read(Box::new(move |resp| { + let callback = Callback::read(Box::new(move |resp| { let resp = if resp.response.get_header().has_error() { None } else { @@ -245,8 +242,8 @@ impl LeadershipResolver { if tx.send(resp).is_err() { error!("cdc send tso response failed"; "region_id" => region_id); } - }))); - if let Err(e) = raft_router_clone.significant_send(region_id, msg) { + })); + if let Err(e) = cdc_handle_clone.check_leadership(region_id, callback) { warn!("cdc send LeaderCallback failed"; "err" => ?e, "min_ts" => min_ts); return None; } diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 8d2ee1631b4..23be4a62fc5 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -19,9 +19,9 @@ use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; use pd_client::PdClient; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, ObserveId}, - router::RaftStoreRouter, + router::CdcHandle, store::{ - fsm::StoreMeta, + fsm::store::StoreRegionMeta, util::{self, RegionReadProgress, RegionReadProgressRegistry}, }, }; @@ -266,11 +266,11 @@ impl ObserveRegion { } } -pub struct Endpoint { +pub struct Endpoint { store_id: Option, cfg: ResolvedTsConfig, advance_notify: Arc, - store_meta: Arc>, + store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, regions: HashMap, scanner_pool: ScannerPool, @@ -279,16 +279,17 @@ pub struct Endpoint { _phantom: PhantomData<(T, E)>, } -impl Endpoint +impl Endpoint where - T: 'static + RaftStoreRouter, + T: 'static + CdcHandle, E: KvEngine, + S: StoreRegionMeta, { pub fn new( cfg: &ResolvedTsConfig, scheduler: Scheduler, - raft_router: T, - store_meta: Arc>, + cdc_handle: T, + store_meta: Arc>, pd_client: Arc, concurrency_manager: ConcurrencyManager, env: Arc, @@ -296,7 +297,7 @@ where ) -> Self { let (region_read_progress, store_id) = { let meta = store_meta.lock().unwrap(); - (meta.region_read_progress.clone(), meta.store_id) + (meta.region_read_progress().clone(), meta.store_id()) }; let advance_worker = AdvanceTsWorker::new( cfg.advance_ts_interval.0, @@ -304,10 +305,10 @@ where scheduler.clone(), concurrency_manager, ); - let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, raft_router); + let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, cdc_handle); let store_resolver_gc_interval = Duration::from_secs(60); let leader_resolver = LeadershipResolver::new( - store_id.unwrap(), + store_id, pd_client.clone(), env, security_mgr, @@ -315,7 +316,7 @@ where store_resolver_gc_interval, ); let ep = Self { - store_id, + store_id: Some(store_id), cfg: cfg.clone(), advance_notify: Arc::new(Notify::new()), scheduler, @@ -492,8 +493,8 @@ where let region; { let meta = self.store_meta.lock().unwrap(); - match meta.regions.get(®ion_id) { - Some(r) => region = r.clone(), + match meta.reader(region_id) { + Some(r) => region = r.region.as_ref().clone(), None => return, } } @@ -592,8 +593,8 @@ where fn get_or_init_store_id(&mut self) -> Option { self.store_id.or_else(|| { let meta = self.store_meta.lock().unwrap(); - self.store_id = meta.store_id; - meta.store_id + self.store_id = Some(meta.store_id()); + self.store_id }) } } @@ -698,10 +699,11 @@ impl fmt::Display for Task { } } -impl Runnable for Endpoint +impl Runnable for Endpoint where - T: 'static + RaftStoreRouter, + T: 'static + CdcHandle, E: KvEngine, + S: StoreRegionMeta, { type Task = Task; @@ -754,10 +756,11 @@ impl ConfigManager for ResolvedTsConfigManager { const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s -impl RunnableWithTimer for Endpoint +impl RunnableWithTimer for Endpoint where - T: 'static + RaftStoreRouter, + T: 'static + CdcHandle, E: KvEngine, + S: StoreRegionMeta, { fn on_timeout(&mut self) { let store_id = self.get_or_init_store_id(); diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 7877de718ba..a8c4e5bb44f 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -7,12 +7,8 @@ use futures::compat::Future01CompatExt; use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb::Region}; use raftstore::{ coprocessor::{ObserveHandle, ObserveId}, - router::RaftStoreRouter, - store::{ - fsm::ChangeObserver, - msg::{Callback, SignificantMsg}, - RegionSnapshot, - }, + router::CdcHandle, + store::{fsm::ChangeObserver, msg::Callback, RegionSnapshot}, }; use tikv::storage::{ kv::{ScanMode as MvccScanMode, Snapshot}, @@ -64,12 +60,12 @@ pub enum ScanEntry { #[derive(Clone)] pub struct ScannerPool { workers: Arc, - raft_router: T, + cdc_handle: T, _phantom: PhantomData, } -impl, E: KvEngine> ScannerPool { - pub fn new(count: usize, raft_router: T) -> Self { +impl, E: KvEngine> ScannerPool { + pub fn new(count: usize, cdc_handle: T) -> Self { let workers = Arc::new( Builder::new_multi_thread() .thread_name("inc-scan") @@ -81,15 +77,15 @@ impl, E: KvEngine> ScannerPool { ); Self { workers, - raft_router, + cdc_handle, _phantom: PhantomData::default(), } } pub fn spawn_task(&self, mut task: ScanTask) { - let raft_router = self.raft_router.clone(); + let cdc_handle = self.cdc_handle.clone(); let fut = async move { - let snap = match Self::get_snapshot(&mut task, raft_router).await { + let snap = match Self::get_snapshot(&mut task, cdc_handle).await { Ok(snap) => snap, Err(e) => { warn!("resolved_ts scan get snapshot failed"; "err" => ?e); @@ -181,7 +177,7 @@ impl, E: KvEngine> ScannerPool { async fn get_snapshot( task: &mut ScanTask, - raft_router: T, + cdc_handle: T, ) -> Result> { let mut last_err = None; for retry_times in 0..=GET_SNAPSHOT_RETRY_TIME { @@ -201,13 +197,11 @@ impl, E: KvEngine> ScannerPool { } let (cb, fut) = tikv_util::future::paired_future_callback(); let change_cmd = ChangeObserver::from_rts(task.region.id, task.handle.clone()); - raft_router.significant_send( + cdc_handle.capture_change( task.region.id, - SignificantMsg::CaptureChange { - cmd: change_cmd, - region_epoch: task.region.get_region_epoch().clone(), - callback: Callback::read(Box::new(cb)), - }, + task.region.get_region_epoch().clone(), + change_cmd, + Callback::read(Box::new(cb)), )?; let mut resp = box_try!(fut.await); if resp.response.get_header().has_error() { diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 314a11db1a2..36705f9c015 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -13,7 +13,7 @@ use kvproto::{ tikvpb::TikvClient, }; use online_config::ConfigValue; -use raftstore::coprocessor::CoprocessorHost; +use raftstore::{coprocessor::CoprocessorHost, router::CdcRaftRouter}; use resolved_ts::{Observer, Task}; use test_raftstore::*; use tikv::config::ResolvedTsConfig; @@ -81,7 +81,7 @@ impl TestSuite { let rts_endpoint = resolved_ts::Endpoint::new( &cfg, worker.scheduler(), - raft_router, + CdcRaftRouter(raft_router), cluster.store_metas[id].clone(), pd_cli.clone(), cm.clone(), diff --git a/components/server/src/server.rs b/components/server/src/server.rs index cc07ff85471..3243b207aca 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -55,7 +55,7 @@ use raftstore::{ config::SplitCheckConfigManager, BoxConsistencyCheckObserver, ConsistencyCheckMethod, CoprocessorHost, RawConsistencyCheckObserver, RegionInfoAccessor, }, - router::ServerRaftStoreRouter, + router::{CdcRaftRouter, ServerRaftStoreRouter}, store::{ config::RaftstoreConfigManager, fsm, @@ -936,8 +936,8 @@ where self.core.config.storage.api_version(), self.pd_client.clone(), cdc_scheduler.clone(), - self.router.clone(), - self.engines.as_ref().unwrap().engines.kv.clone(), + CdcRaftRouter(self.router.clone()), + LocalTablets::Singleton(self.engines.as_ref().unwrap().engines.kv.clone()), cdc_ob, engines.store_meta.clone(), self.concurrency_manager.clone(), @@ -954,7 +954,7 @@ where let rts_endpoint = resolved_ts::Endpoint::new( &self.core.config.resolved_ts, rts_worker.scheduler(), - self.router.clone(), + CdcRaftRouter(self.router.clone()), engines.store_meta.clone(), self.pd_client.clone(), self.concurrency_manager.clone(), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index a29c344884f..83b83ad190e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -23,6 +23,7 @@ use std::{ use api_version::{dispatch_api_version, KvFormat}; use causal_ts::CausalTsProviderImpl; +use cdc::{CdcConfigManager, MemoryQuota}; use concurrency_manager::ConcurrencyManager; use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine, TabletRegistry, CF_DEFAULT, CF_WRITE}; @@ -31,8 +32,8 @@ use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; use kvproto::{ - brpb::create_backup, deadlock::create_deadlock, diagnosticspb::create_diagnostics, - import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, + brpb::create_backup, cdcpb_grpc::create_change_data, deadlock::create_deadlock, + diagnosticspb::create_diagnostics, import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, resource_usage_agent::create_resource_metering_pub_sub, }; use pd_client::{PdClient, RpcClient}; @@ -87,7 +88,7 @@ use tikv_util::{ sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, thread_group::GroupProperties, time::{Instant, Monitor}, - worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, + worker::{Builder as WorkerBuilder, LazyWorker, Scheduler}, yatp_pool::CleanupMethod, Either, }; @@ -186,7 +187,8 @@ struct TikvServer { coprocessor_host: Option>, concurrency_manager: ConcurrencyManager, env: Arc, - check_leader_worker: Worker, + cdc_scheduler: Option>, + cdc_memory_quota: Option, sst_worker: Option>>, quota_limiter: Arc, resource_manager: Option>, @@ -295,10 +297,6 @@ where info!("Causal timestamp provider startup."); } - // Run check leader in a dedicate thread, because it is time sensitive - // and crucial to TiCDC replication lag. - let check_leader_worker = WorkerBuilder::new("check-leader").thread_count(1).create(); - TikvServer { core: TikvServerCore { config, @@ -325,7 +323,8 @@ where coprocessor_host: None, concurrency_manager, env, - check_leader_worker, + cdc_scheduler: None, + cdc_memory_quota: None, sst_worker: None, quota_limiter, resource_manager, @@ -381,7 +380,7 @@ where ); lock_mgr.register_detector_role_change_observer(self.coprocessor_host.as_mut().unwrap()); - let engines = self.engines.as_ref().unwrap(); + let engines = self.engines.as_mut().unwrap(); let pd_worker = LazyWorker::new("pd-worker"); let pd_sender = raftstore_v2::PdReporter::new( @@ -551,13 +550,82 @@ where unified_read_pool_scale_receiver = Some(rx); } + // Run check leader in a dedicate thread, because it is time sensitive + // and crucial to TiCDC replication lag. + let check_leader_worker = + Box::new(WorkerBuilder::new("check-leader").thread_count(1).create()); + // Create check leader runer. let check_leader_runner = CheckLeaderRunner::new( self.router.as_ref().unwrap().store_meta().clone(), self.coprocessor_host.clone().unwrap(), ); - let check_leader_scheduler = self - .check_leader_worker - .start("check-leader", check_leader_runner); + let check_leader_scheduler = check_leader_worker.start("check-leader", check_leader_runner); + self.core.to_stop.push(check_leader_worker); + + // Create cdc worker. + let mut cdc_worker = Box::new(LazyWorker::new("cdc")); + let cdc_scheduler = cdc_worker.scheduler(); + let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); + engines + .engine + .set_txn_extra_scheduler(Arc::new(txn_extra_scheduler)); + // Register cdc observer. + let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); + cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); + // Register cdc config manager. + cfg_controller.register( + tikv::config::Module::Cdc, + Box::new(CdcConfigManager(cdc_worker.scheduler())), + ); + // Start cdc endpoint. + let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); + let cdc_endpoint = cdc::Endpoint::new( + self.core.config.server.cluster_id, + &self.core.config.cdc, + self.core.config.storage.api_version(), + self.pd_client.clone(), + cdc_scheduler.clone(), + self.router.clone().unwrap(), + LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), + cdc_ob, + self.router.as_ref().unwrap().store_meta().clone(), + self.concurrency_manager.clone(), + self.env.clone(), + self.security_mgr.clone(), + cdc_memory_quota.clone(), + self.causal_ts_provider.clone(), + ); + cdc_worker.start_with_timer(cdc_endpoint); + self.core.to_stop.push(cdc_worker); + self.cdc_scheduler = Some(cdc_scheduler); + self.cdc_memory_quota = Some(cdc_memory_quota); + + // Create resolved ts. + if self.core.config.resolved_ts.enable { + let mut rts_worker = Box::new(LazyWorker::new("resolved-ts")); + // Register the resolved ts observer + let resolved_ts_ob = resolved_ts::Observer::new(rts_worker.scheduler()); + resolved_ts_ob.register_to(self.coprocessor_host.as_mut().unwrap()); + // Register config manager for resolved ts worker + cfg_controller.register( + tikv::config::Module::ResolvedTs, + Box::new(resolved_ts::ResolvedTsConfigManager::new( + rts_worker.scheduler(), + )), + ); + let rts_endpoint = resolved_ts::Endpoint::new( + &self.core.config.resolved_ts, + rts_worker.scheduler(), + self.router.clone().unwrap(), + self.router.as_ref().unwrap().store_meta().clone(), + self.pd_client.clone(), + self.concurrency_manager.clone(), + self.env.clone(), + self.security_mgr.clone(), + ); + rts_worker.start_with_timer(rts_endpoint); + self.core.to_stop.push(rts_worker); + } let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); @@ -775,6 +843,18 @@ where .unwrap() .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); + let cdc_service = cdc::Service::new( + self.cdc_scheduler.as_ref().unwrap().clone(), + self.cdc_memory_quota.as_ref().unwrap().clone(), + ); + if servers + .server + .register_service(create_change_data(cdc_service)) + .is_some() + { + fatal!("failed to register cdc service"); + } + // Create Diagnostics service let diag_service = DiagnosticsService::new( servers.server.get_debug_thread_pool().clone(), diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index e7b43850e27..967ae4b980c 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -33,7 +33,7 @@ use pd_client::PdClient; use raftstore::{ coprocessor::{CoprocessorHost, RegionInfoAccessor}, errors::Error as RaftError, - router::{LocalReadRouter, RaftStoreRouter, ServerRaftStoreRouter}, + router::{CdcRaftRouter, LocalReadRouter, RaftStoreRouter, ServerRaftStoreRouter}, store::{ fsm::{store::StoreMeta, ApplyRouter, RaftBatchSystem, RaftRouter}, msg::RaftCmdExtraOpts, @@ -355,7 +355,7 @@ impl ServerCluster { let rts_endpoint = resolved_ts::Endpoint::new( &cfg.resolved_ts, rts_worker.scheduler(), - raft_router, + CdcRaftRouter(raft_router), store_meta.clone(), self.pd_client.clone(), concurrency_manager.clone(), From ba805983dbadacee514bc01918d6c544f3426f4f Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 12 Apr 2023 15:09:01 +0800 Subject: [PATCH 0625/1149] storage: implement the row value checksum encode logic in tikv side (#14529) ref tikv/tikv#14528 Signed-off-by: cfzjywxk Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/tidb_query_datatype/Cargo.toml | 1 + .../src/codec/row/v2/encoder_for_test.rs | 186 ++++++++++++++++++ 3 files changed, 188 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a508216a0e9..ff47b828c17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6321,6 +6321,7 @@ dependencies = [ "chrono-tz", "codec", "collections", + "crc32fast", "encoding_rs 0.8.29 (git+https://github.com/xiongjiwei/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c)", "error_code", "hex 0.4.2", diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index c1be29a956d..97fb2d101b6 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -16,6 +16,7 @@ chrono = "0.4" chrono-tz = "0.5.1" codec = { workspace = true } collections = { workspace = true } +crc32fast = "1.2" encoding_rs = { git = "https://github.com/xiongjiwei/encoding_rs.git", rev = "68e0bc5a72a37a78228d80cd98047326559cf43c" } error_code = { workspace = true } hex = "0.4" diff --git a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs index bedbc7324ce..343f2520230 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs @@ -47,6 +47,7 @@ const MAX_U8: u64 = u8::MAX as u64; const MAX_U16: u64 = u16::MAX as u64; const MAX_U32: u64 = u32::MAX as u64; +#[derive(Clone)] pub struct Column { id: i64, value: ScalarValue, @@ -89,8 +90,107 @@ impl Column { } } +/// Checksum +/// - HEADER(1 byte) +/// - VER: version(3 bit) +/// - E: has extra checksum +/// - CHECKSUM(4 bytes) +/// - little-endian CRC32(IEEE) when hdr.ver = 0 (default) +pub trait ChecksumHandler { + // update_col updates the checksum with the encoded value of the column. + fn checksum(&mut self, buf: &[u8]) -> Result<()>; + + // header_value returns the checksum header value. + fn header_value(&self) -> u8; + + // value returns the checksum value. + fn value(&self) -> u32; +} + +pub struct Crc32RowChecksumHandler { + header: ChecksumHeader, + hasher: crc32fast::Hasher, +} + +impl ChecksumHandler for Crc32RowChecksumHandler { + fn checksum(&mut self, buf: &[u8]) -> Result<()> { + self.hasher.update(buf); + Ok(()) + } + + fn header_value(&self) -> u8 { + self.header.value() + } + + fn value(&self) -> u32 { + self.hasher.clone().finalize() + } +} + +pub struct ChecksumHeader(u8); + +impl ChecksumHeader { + fn new() -> Self { + ChecksumHeader(0) + } + + #[cfg(test)] + fn set_version(&mut self, ver: u8) { + self.0 &= !0b111; + self.0 |= ver & 0b111; + } + + fn set_extra_checksum(&mut self) { + self.0 |= 0b1000; + } + + fn value(&self) -> u8 { + self.0 + } +} + +impl Crc32RowChecksumHandler { + pub fn new(has_extra_checksum: bool) -> Self { + let mut res = Crc32RowChecksumHandler { + header: ChecksumHeader::new(), + hasher: crc32fast::Hasher::new(), + }; + if has_extra_checksum { + res.header.set_extra_checksum(); + } + + res + } +} + +impl Default for Crc32RowChecksumHandler { + fn default() -> Self { + Self::new(false) + } +} + pub trait RowEncoder: NumberEncoder { fn write_row(&mut self, ctx: &mut EvalContext, columns: Vec) -> Result<()> { + self.write_row_impl(ctx, columns, None, None) + } + + fn write_row_with_checksum( + &mut self, + ctx: &mut EvalContext, + columns: Vec, + extra_checksum: Option, + ) -> Result<()> { + let mut handler = Crc32RowChecksumHandler::new(extra_checksum.is_some()); + self.write_row_impl(ctx, columns, Some(&mut handler), extra_checksum) + } + + fn write_row_impl( + &mut self, + ctx: &mut EvalContext, + columns: Vec, + mut checksum_handler: Option<&mut dyn ChecksumHandler>, + extra_checksum: Option, + ) -> Result<()> { let mut is_big = false; let mut null_ids = Vec::with_capacity(columns.len()); let mut non_null_ids = Vec::with_capacity(columns.len()); @@ -140,6 +240,18 @@ pub trait RowEncoder: NumberEncoder { } self.write_bytes(&offset_wtr)?; self.write_bytes(&value_wtr)?; + + if let Some(checksum_handler) = checksum_handler.as_mut() { + let header_val = checksum_handler.header_value(); + checksum_handler.checksum(value_wtr.as_slice())?; + let val = checksum_handler.value(); + self.write_u8(header_val)?; + self.write_u32_le(val)?; + if let Some(extra) = extra_checksum { + self.write_u32_le(extra)?; + } + } + Ok(()) } @@ -226,11 +338,16 @@ impl ScalarValueEncoder for T {} mod tests { use std::str::FromStr; + use codec::number::NumberDecoder; + use super::{Column, RowEncoder}; use crate::{ codec::{ data_type::ScalarValue, mysql::{duration::NANOS_PER_SEC, Decimal, Duration, Json, Time}, + row::v2::encoder_for_test::{ + ChecksumHandler, Crc32RowChecksumHandler, ScalarValueEncoder, + }, }, expr::EvalContext, }; @@ -303,4 +420,73 @@ mod tests { assert_eq!(exp, buf); } + + #[test] + fn test_encode_checksum() { + let encode_col_values = |ctx: &mut EvalContext, non_null_cols: Vec| -> Vec { + let mut res = vec![]; + for col in non_null_cols { + res.write_value(ctx, &col).unwrap(); + } + res + }; + let get_non_null_columns = |cols: &Vec| -> Vec { + let mut res = vec![]; + for col in cols { + if col.value.is_some() { + res.push(col.clone()); + } + } + res.sort_by_key(|c| c.id); + res + }; + let cols = vec![ + Column::new(1, 1000), + Column::new(12, 2), + Column::new(335, ScalarValue::Int(None)), + Column::new(3, 3), + Column::new(8, 32767), + ]; + + let mut buf = vec![]; + let mut handler = Crc32RowChecksumHandler::new(false); + handler.header.set_version(0); + buf.write_row_impl( + &mut EvalContext::default(), + cols.clone(), + Some(&mut handler), + None, + ) + .unwrap(); + + let exp = { + let mut hasher = crc32fast::Hasher::new(); + hasher.update( + encode_col_values(&mut EvalContext::default(), get_non_null_columns(&cols)) + .as_slice(), + ); + hasher.finalize() + }; + let mut val_slice = &buf[buf.len() - 4..]; + assert_eq!(exp, handler.value()); + assert_eq!(exp, val_slice.read_u32_le().unwrap()); + assert_eq!(0, handler.header_value()); + + buf.clear(); + let mut handler = Crc32RowChecksumHandler::new(true); + handler.header.set_version(1); + buf.write_row_impl( + &mut EvalContext::default(), + cols, + Some(&mut handler), + Some(exp), + ) + .unwrap(); + let mut val_slice = &buf[buf.len() - 4..]; + let mut extra_val_slice = &buf[buf.len() - 8..buf.len() - 4]; + assert_eq!(exp, handler.value()); + assert_eq!(exp, val_slice.read_u32_le().unwrap()); + assert_eq!(exp, extra_val_slice.read_u32_le().unwrap()); + assert_eq!(9, handler.header_value()); + } } From a4f995fb138c4aaa51475a625fa06795e92525ce Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 12 Apr 2023 16:31:01 +0800 Subject: [PATCH 0626/1149] server2: disable cdc and resolved_ts as they are not fully implemented (#14560) ref tikv/tikv#14542 Disable cdc and resolved_ts as they are not fully implemented Signed-off-by: Neil Shen --- components/server/src/server2.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 83b83ad190e..86d3a9a696f 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -563,7 +563,7 @@ where self.core.to_stop.push(check_leader_worker); // Create cdc worker. - let mut cdc_worker = Box::new(LazyWorker::new("cdc")); + let cdc_worker = Box::new(LazyWorker::new("cdc")); let cdc_scheduler = cdc_worker.scheduler(); let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); engines @@ -579,7 +579,7 @@ where ); // Start cdc endpoint. let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); - let cdc_endpoint = cdc::Endpoint::new( + let _cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, self.core.config.storage.api_version(), @@ -595,14 +595,15 @@ where cdc_memory_quota.clone(), self.causal_ts_provider.clone(), ); - cdc_worker.start_with_timer(cdc_endpoint); - self.core.to_stop.push(cdc_worker); + // TODO: enable cdc. + // cdc_worker.start_with_timer(cdc_endpoint); + // self.core.to_stop.push(cdc_worker); self.cdc_scheduler = Some(cdc_scheduler); self.cdc_memory_quota = Some(cdc_memory_quota); // Create resolved ts. if self.core.config.resolved_ts.enable { - let mut rts_worker = Box::new(LazyWorker::new("resolved-ts")); + let rts_worker = Box::new(LazyWorker::new("resolved-ts")); // Register the resolved ts observer let resolved_ts_ob = resolved_ts::Observer::new(rts_worker.scheduler()); resolved_ts_ob.register_to(self.coprocessor_host.as_mut().unwrap()); @@ -613,7 +614,7 @@ where rts_worker.scheduler(), )), ); - let rts_endpoint = resolved_ts::Endpoint::new( + let _rts_endpoint = resolved_ts::Endpoint::new( &self.core.config.resolved_ts, rts_worker.scheduler(), self.router.clone().unwrap(), @@ -623,8 +624,9 @@ where self.env.clone(), self.security_mgr.clone(), ); - rts_worker.start_with_timer(rts_endpoint); - self.core.to_stop.push(rts_worker); + // TODO: enable resolved_ts. + // rts_worker.start_with_timer(rts_endpoint); + // self.core.to_stop.push(rts_worker); } let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); From 3630ba96c37092f7309eb7e858500dbd76d8614d Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Wed, 12 Apr 2023 19:11:02 +0800 Subject: [PATCH 0627/1149] txn: Support check for_update_ts when prewriting (#14492) ref tikv/tikv#14311 Supports checking for_update_ts for specific keys during prewrite to avoid potential lost update that might be caused by allowing locking with conflict. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/resolved_ts/src/cmd.rs | 1 + src/storage/mod.rs | 6 + src/storage/mvcc/mod.rs | 34 ++- src/storage/mvcc/reader/reader.rs | 2 + src/storage/mvcc/txn.rs | 9 +- .../txn/actions/acquire_pessimistic_lock.rs | 2 - src/storage/txn/actions/prewrite.rs | 196 ++++++++++++- src/storage/txn/actions/tests.rs | 90 +++++- src/storage/txn/commands/mod.rs | 23 ++ src/storage/txn/commands/prewrite.rs | 263 ++++++++++++++++-- src/storage/txn/mod.rs | 1 + src/storage/txn/store.rs | 1 + tests/benches/hierarchy/mvcc/mod.rs | 2 + tests/benches/hierarchy/txn/mod.rs | 2 + tests/failpoints/cases/test_storage.rs | 4 + 16 files changed, 594 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff47b828c17..b654e34fb77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2792,7 +2792,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#af969693ce8a7884e5bdc5d81c728f657d33065a" +source = "git+https://github.com/pingcap/kvproto.git#ce835ae20dfcb5f69f0aea04236070932c815b6a" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index d3bda563a4f..47d14304112 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -424,6 +424,7 @@ mod tests { Mutation::make_put(k1.clone(), b"v4".to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); one_pc_commit(true, &mut txn, 10.into()); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8f955f3850d..11740bcc2bf 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -8254,6 +8254,7 @@ mod tests { None, false, AssertionLevel::Off, + vec![], Context::default(), ), expect_ok_callback(tx.clone(), 0), @@ -9620,6 +9621,7 @@ mod tests { Some(vec![b"e".to_vec()]), false, AssertionLevel::Off, + vec![], Context::default(), ), Box::new(move |res| { @@ -9718,6 +9720,7 @@ mod tests { None, false, AssertionLevel::Off, + vec![], Default::default(), ), expect_ok_callback(tx.clone(), 0), @@ -9768,6 +9771,7 @@ mod tests { Some(vec![k2.to_vec()]), false, AssertionLevel::Off, + vec![], Default::default(), ), expect_ok_callback(tx.clone(), 0), @@ -10604,6 +10608,7 @@ mod tests { None, false, AssertionLevel::Off, + vec![], Context::default(), ), Box::new(move |res| { @@ -10662,6 +10667,7 @@ mod tests { None, false, AssertionLevel::Off, + vec![], Context::default(), ), Box::new(move |res| { diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 3dca7a219f9..0f133b99941 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -132,10 +132,14 @@ pub enum ErrorInner { KeyVersion, #[error( - "pessimistic lock not found, start_ts:{}, key:{}", - .start_ts, log_wrappers::Value::key(.key) + "pessimistic lock not found, start_ts:{}, key:{}, reason: {:?}", + .start_ts, log_wrappers::Value::key(.key), .reason )] - PessimisticLockNotFound { start_ts: TimeStamp, key: Vec }, + PessimisticLockNotFound { + start_ts: TimeStamp, + key: Vec, + reason: PessimisticLockNotFoundReason, + }, #[error( "min_commit_ts {} is larger than max_commit_ts {}, start_ts: {}", @@ -257,12 +261,15 @@ impl ErrorInner { key: key.to_owned(), }) } - ErrorInner::PessimisticLockNotFound { start_ts, key } => { - Some(ErrorInner::PessimisticLockNotFound { - start_ts: *start_ts, - key: key.to_owned(), - }) - } + ErrorInner::PessimisticLockNotFound { + start_ts, + key, + reason, + } => Some(ErrorInner::PessimisticLockNotFound { + start_ts: *start_ts, + key: key.to_owned(), + reason: *reason, + }), ErrorInner::CommitTsTooLarge { start_ts, min_commit_ts, @@ -421,6 +428,15 @@ pub fn default_not_found_error(key: Vec, hint: &str) -> Error { } } +#[derive(Debug, Clone, Copy)] +pub enum PessimisticLockNotFoundReason { + LockTsMismatch, + LockMissingAmendFail, + LockForUpdateTsMismatch, + NonLockKeyConflict, + FailpointInjected, +} + pub mod tests { use std::borrow::Cow; diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 36e8816ad25..7c15c6d7735 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -886,6 +886,7 @@ pub mod tests { m, &None, SkipPessimisticCheck, + None, ) .unwrap(); self.write(txn.into_modifies()); @@ -910,6 +911,7 @@ pub mod tests { m, &None, DoPessimisticCheck, + None, ) .unwrap(); self.write(txn.into_modifies()); diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 9e87bf748b7..f395b07e7f8 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -9,7 +9,7 @@ use kvproto::kvrpcpb::LockInfo; use txn_types::{Key, Lock, PessimisticLock, TimeStamp, Value}; use super::metrics::{GC_DELETE_VERSIONS_HISTOGRAM, MVCC_VERSIONS_HISTOGRAM}; -use crate::storage::kv::Modify; +use crate::storage::{kv::Modify, mvcc::PessimisticLockNotFoundReason}; pub const MAX_TXN_WRITE_SIZE: usize = 32 * 1024; @@ -306,6 +306,7 @@ pub(crate) fn make_txn_error( "pessimisticlocknotfound" => ErrorInner::PessimisticLockNotFound { start_ts, key: key.to_raw().unwrap(), + reason: PessimisticLockNotFoundReason::FailpointInjected, }, _ => ErrorInner::Other(box_err!("unexpected error string")), } @@ -815,6 +816,7 @@ pub(crate) mod tests { Mutation::make_put(key.clone(), v.to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); assert!(txn.write_size() > 0); @@ -859,6 +861,7 @@ pub(crate) mod tests { Mutation::make_put(Key::from_raw(key), value.to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap_err(); @@ -872,6 +875,7 @@ pub(crate) mod tests { Mutation::make_put(Key::from_raw(key), value.to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); } @@ -1312,6 +1316,7 @@ pub(crate) mod tests { mutation, &Some(vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()]), SkipPessimisticCheck, + None, ) .unwrap(); let modifies = txn.into_modifies(); @@ -1370,6 +1375,7 @@ pub(crate) mod tests { mutation, &Some(vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()]), DoPessimisticCheck, + None, ) .unwrap(); let modifies = txn.into_modifies(); @@ -1439,6 +1445,7 @@ pub(crate) mod tests { mutation, &Some(vec![b"key1".to_vec(), b"key2".to_vec(), b"key3".to_vec()]), DoPessimisticCheck, + None, ) .unwrap(); assert_eq!(min_commit_ts.into_inner(), 100); diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 987af9fbed7..afdbace9e7a 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -452,7 +452,6 @@ pub mod tests { TestEngineBuilder, }; - #[cfg(test)] pub fn acquire_pessimistic_lock_allow_lock_with_conflict( engine: &mut E, key: &[u8], @@ -496,7 +495,6 @@ pub mod tests { res.map(|r| r.0) } - #[cfg(test)] pub fn must_succeed_allow_lock_with_conflict( engine: &mut E, key: &[u8], diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 1e655846d08..69cf8b32578 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -19,7 +19,8 @@ use crate::storage::{ MVCC_CONFLICT_COUNTER, MVCC_DUPLICATE_CMD_COUNTER_VEC, MVCC_PREWRITE_ASSERTION_PERF_COUNTER_VEC, }, - Error, ErrorInner, Lock, LockType, MvccTxn, Result, SnapshotReader, + Error, ErrorInner, Lock, LockType, MvccTxn, PessimisticLockNotFoundReason, Result, + SnapshotReader, }, txn::{ actions::check_data_constraint::check_data_constraint, sched_pool::tls_can_enable, @@ -36,6 +37,7 @@ pub fn prewrite( mutation: Mutation, secondary_keys: &Option>>, pessimistic_action: PrewriteRequestPessimisticAction, + expected_for_update_ts: Option, ) -> Result<(TimeStamp, OldValue)> { let mut mutation = PrewriteMutation::from_mutation(mutation, secondary_keys, pessimistic_action, txn_props)?; @@ -63,7 +65,7 @@ pub fn prewrite( let mut lock_amended = false; let lock_status = match reader.load_lock(&mutation.key)? { - Some(lock) => mutation.check_lock(lock, pessimistic_action)?, + Some(lock) => mutation.check_lock(lock, pessimistic_action, expected_for_update_ts)?, None if matches!(pessimistic_action, DoPessimisticCheck) => { amend_pessimistic_lock(&mut mutation, reader)?; lock_amended = true; @@ -218,16 +220,18 @@ pub enum TransactionKind { Pessimistic(TimeStamp), } +#[derive(Clone, Copy)] enum LockStatus { // Lock has already been locked; min_commit_ts of lock. Locked(TimeStamp), - Pessimistic, + // Key is pessimistic-locked; for_update_ts of lock. + Pessimistic(TimeStamp), None, } impl LockStatus { fn has_pessimistic_lock(&self) -> bool { - matches!(self, LockStatus::Pessimistic) + matches!(self, LockStatus::Pessimistic(_)) } } @@ -309,6 +313,7 @@ impl<'a> PrewriteMutation<'a> { &mut self, lock: Lock, pessimistic_action: PrewriteRequestPessimisticAction, + expected_for_update_ts: Option, ) -> Result { if lock.ts != self.txn_props.start_ts { // Abort on lock belonging to other transaction if @@ -323,6 +328,7 @@ impl<'a> PrewriteMutation<'a> { return Err(ErrorInner::PessimisticLockNotFound { start_ts: self.txn_props.start_ts, key: self.key.to_raw()?, + reason: PessimisticLockNotFoundReason::LockTsMismatch, } .into()); } @@ -344,12 +350,59 @@ impl<'a> PrewriteMutation<'a> { .into()); } + if let Some(ts) = expected_for_update_ts && lock.for_update_ts != ts { + // The constraint on for_update_ts of the pessimistic lock is violated. + // Consider the following case: + // + // 1. A pessimistic lock of transaction `T1` succeeded with`WakeUpModeForceLock` + // enabled, then it returns to the client and the client continues its + // execution. + // 2. The lock is lost for some reason such as pipelined locking or in-memory + // pessimistic lock. + // 3. Another transaction `T2` writes the key and committed. + // 4. The key then receives a stale pessimistic lock request of `T1` that has + // been received in step 1 (maybe because of retrying due to network issue + // in step 1). Since it allows locking with conflict, though there's a newer + // version that's later than the request's `for_update_ts`, the request can + // still acquire the lock. However no one will check the response, which + // tells the latest commit_ts it met. + // 5. The transaction `T1` commits. When it prewrites it checks if each key is + // pessimistic-locked. + // + // Transaction `T1` won't notice anything wrong without this check since it + // does have a pessimistic lock of the same transaction. However, actually + // one of the key is locked in a larger version than that the client would + // expect. As a result, the conflict between transaction `T1` and `T2` is + // missed. + // To avoid this problem, we check the for_update_ts written on the + // pessimistic locks that's acquired in force-locking mode. If it doesn't match + // the one known by the client, the lock that we expected to have will be + // regarded as missing. + // + // It's actually theoretically safe to allow `lock.for_update_ts` < + // `expected_for_update_ts`, but the possibility to encounter this case is very + // low. For simplicity, we don't consider that case and only allow + // `lock.for_update_ts` to exactly match that we expect. + warn!("pessimistic lock have different for_update_ts than expected. the expected lock must have been lost"; + "key" => %self.key, + "start_ts" => self.txn_props.start_ts, + "expected_for_update_ts" => ts, + "lock" => ?lock); + + return Err(ErrorInner::PessimisticLockNotFound { + start_ts: self.txn_props.start_ts, + key: self.key.to_raw()?, + reason: PessimisticLockNotFoundReason::LockForUpdateTsMismatch, + } + .into()); + } + // The lock is pessimistic and owned by this txn, go through to overwrite it. // The ttl and min_commit_ts of the lock may have been pushed forward. self.lock_ttl = std::cmp::max(self.lock_ttl, lock.ttl); self.min_commit_ts = std::cmp::max(self.min_commit_ts, lock.min_commit_ts); - return Ok(LockStatus::Pessimistic); + return Ok(LockStatus::Pessimistic(lock.for_update_ts)); } // Duplicated command. No need to overwrite the lock and data. @@ -430,6 +483,7 @@ impl<'a> PrewriteMutation<'a> { return Err(ErrorInner::PessimisticLockNotFound { start_ts: self.txn_props.start_ts, key: self.key.clone().into_raw()?, + reason: PessimisticLockNotFoundReason::NonLockKeyConflict, } .into()); } @@ -458,13 +512,20 @@ impl<'a> PrewriteMutation<'a> { ) -> Result { let mut try_one_pc = self.try_one_pc(); + let for_update_ts_to_write = match (self.txn_props.for_update_ts(), lock_status) { + (from_prewrite_req, LockStatus::Pessimistic(from_pessimistic_lock)) => { + std::cmp::max(from_prewrite_req, from_pessimistic_lock) + } + (for_update_ts_from_req, _) => for_update_ts_from_req, + }; + let mut lock = Lock::new( self.lock_type.unwrap(), self.txn_props.primary.to_vec(), self.txn_props.start_ts, self.lock_ttl, None, - self.txn_props.for_update_ts(), + for_update_ts_to_write, self.txn_props.txn_size, self.min_commit_ts, ) @@ -758,6 +819,7 @@ fn amend_pessimistic_lock( return Err(ErrorInner::PessimisticLockNotFound { start_ts: reader.start_ts, key: mutation.key.clone().into_raw()?, + reason: PessimisticLockNotFoundReason::LockMissingAmendFail, } .into()); } @@ -798,7 +860,10 @@ pub mod tests { #[cfg(test)] use crate::storage::{ kv::RocksSnapshot, - txn::{commands::prewrite::fallback_1pc_locks, tests::*}, + txn::{ + commands::pessimistic_rollback::tests::must_success as must_pessimistic_rollback, + commands::prewrite::fallback_1pc_locks, tests::*, + }, }; use crate::storage::{mvcc::tests::*, Engine}; @@ -869,6 +934,7 @@ pub mod tests { Mutation::make_insert(Key::from_raw(key), value.to_vec()), &None, SkipPessimisticCheck, + None, )?; // Insert must be None if the key is not lock, or be Unspecified if the // key is already locked. @@ -900,6 +966,7 @@ pub mod tests { Mutation::make_check_not_exists(Key::from_raw(key)), &None, DoPessimisticCheck, + None, )?; assert_eq!(old_value, OldValue::Unspecified); Ok(()) @@ -922,6 +989,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &Some(vec![b"k2".to_vec()]), SkipPessimisticCheck, + None, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -935,6 +1003,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &Some(vec![]), SkipPessimisticCheck, + None, ) .unwrap_err(); assert!(matches!( @@ -970,6 +1039,7 @@ pub mod tests { Mutation::make_check_not_exists(Key::from_raw(b"k0")), &Some(vec![]), SkipPessimisticCheck, + None, ) .unwrap(); assert!(min_ts > props.start_ts); @@ -990,6 +1060,7 @@ pub mod tests { Mutation::make_check_not_exists(Key::from_raw(b"k0")), &Some(vec![]), SkipPessimisticCheck, + None, ) .unwrap(); assert_eq!(cm.max_ts(), props.start_ts); @@ -1005,6 +1076,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &Some(vec![b"k2".to_vec()]), SkipPessimisticCheck, + None, ) .unwrap(); assert!(min_ts > 42.into()); @@ -1028,6 +1100,7 @@ pub mod tests { mutation.clone(), &Some(vec![b"k4".to_vec()]), SkipPessimisticCheck, + None, ) .unwrap(); assert!(min_ts > 44.into()); @@ -1050,6 +1123,7 @@ pub mod tests { mutation.clone(), &Some(vec![b"k6".to_vec()]), SkipPessimisticCheck, + None, ) .unwrap(); assert!(min_ts > 45.into()); @@ -1069,6 +1143,7 @@ pub mod tests { mutation.clone(), &Some(vec![b"k8".to_vec()]), SkipPessimisticCheck, + None, ) .unwrap(); assert!(min_ts >= 46.into()); @@ -1099,6 +1174,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -1112,6 +1188,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap_err(); assert!(matches!( @@ -1159,6 +1236,7 @@ pub mod tests { Mutation::make_check_not_exists(Key::from_raw(key)), &None, SkipPessimisticCheck, + None, )?; assert_eq!(old_value, OldValue::Unspecified); Ok(()) @@ -1197,6 +1275,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &Some(vec![b"k2".to_vec()]), DoPessimisticCheck, + None, ) .unwrap(); // Pessimistic txn skips constraint check, does not read previous write. @@ -1211,6 +1290,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &Some(vec![]), DoPessimisticCheck, + None, ) .unwrap_err(); } @@ -1248,6 +1328,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), &None, DoPessimisticCheck, + None, ) .unwrap(); // Pessimistic txn skips constraint check, does not read previous write. @@ -1262,6 +1343,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k2"), b"v2".to_vec()), &None, DoPessimisticCheck, + None, ) .unwrap_err(); } @@ -1369,6 +1451,7 @@ pub mod tests { Mutation::make_check_not_exists(Key::from_raw(key)), &None, SkipPessimisticCheck, + None, ); if success { let res = res.unwrap(); @@ -1384,6 +1467,7 @@ pub mod tests { Mutation::make_insert(Key::from_raw(key), b"value".to_vec()), &None, SkipPessimisticCheck, + None, ); if success { let res = res.unwrap(); @@ -1440,6 +1524,7 @@ pub mod tests { Mutation::make_put(key.clone(), b"value".to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); assert_eq!(&old_value, expected_value, "key: {}", key); @@ -1694,6 +1779,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(b"k1"), b"value".to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); assert_eq!( @@ -1749,6 +1835,7 @@ pub mod tests { Mutation::make_insert(Key::from_raw(b"k1"), b"v2".to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); assert_eq!(old_value, OldValue::None); @@ -1887,6 +1974,7 @@ pub mod tests { Mutation::make_put(Key::from_raw(key), b"v2".to_vec()), &None, SkipPessimisticCheck, + None, )?; Ok(old_value) })], @@ -1924,6 +2012,7 @@ pub mod tests { Mutation::make_insert(Key::from_raw(key), b"v2".to_vec()), &None, SkipPessimisticCheck, + None, )?; Ok(old_value) })], @@ -2507,4 +2596,97 @@ pub mod tests { assert_eq!(lock.versions_to_last_change, 0); must_rollback(&mut engine, key, 40, false); } + + #[test] + fn test_pessimistic_prewrite_check_for_update_ts() { + let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); + let key = b"k"; + let value = b"v"; + + let prewrite = &must_pessimistic_prewrite_put_check_for_update_ts; + let prewrite_err = &must_pessimistic_prewrite_put_check_for_update_ts_err; + + let mut test_normal = |start_ts: u64, + lock_for_update_ts: u64, + prewrite_req_for_update_ts: u64, + expected_for_update_ts: u64, + success: bool, + commit_ts: u64| { + // In actual cases these kinds of pessimistic locks should be locked in + // `allow_locking_with_conflict` mode. For simplicity, we pass a large + // for_update_ts to the pessimistic lock to simulate that case. + must_acquire_pessimistic_lock(&mut engine, key, key, start_ts, lock_for_update_ts); + must_pessimistic_locked(&mut engine, key, start_ts, lock_for_update_ts); + if success { + prewrite( + &mut engine, + key, + value, + key, + start_ts, + prewrite_req_for_update_ts, + Some(expected_for_update_ts), + ); + must_locked(&mut engine, key, start_ts); + // Test idempotency. + prewrite( + &mut engine, + key, + value, + key, + start_ts, + prewrite_req_for_update_ts, + Some(expected_for_update_ts), + ); + let prewrite_lock = must_locked(&mut engine, key, start_ts); + assert_le!( + TimeStamp::from(lock_for_update_ts), + prewrite_lock.for_update_ts + ); + must_commit(&mut engine, key, start_ts, commit_ts); + must_unlocked(&mut engine, key); + } else { + let e = prewrite_err( + &mut engine, + key, + value, + key, + start_ts, + prewrite_req_for_update_ts, + Some(expected_for_update_ts), + ); + match e { + Error(box ErrorInner::PessimisticLockNotFound { .. }) => (), + e => panic!("unexpected error: {:?}", e), + } + must_pessimistic_locked(&mut engine, key, start_ts, lock_for_update_ts); + must_pessimistic_rollback(&mut engine, key, start_ts, lock_for_update_ts); + must_unlocked(&mut engine, key); + } + }; + + test_normal(10, 10, 10, 10, true, 19); + // Note that the `for_update_ts` field in prewrite request is not guaranteed to + // be greater or equal to the max for_update_ts that has been written to + // a pessimistic lock during the transaction. + test_normal(20, 20, 20, 24, false, 0); + test_normal(30, 35, 30, 35, true, 39); + test_normal(40, 45, 40, 40, false, 0); + test_normal(50, 55, 56, 51, false, 0); + + // Amend pessimistic lock cases. Once amend-lock is passed, it can be guaranteed + // there are no conflict, so the check won't fail. + // Amending succeeds. + must_unlocked(&mut engine, key); + prewrite(&mut engine, key, value, key, 100, 105, Some(102)); + must_locked(&mut engine, key, 100); + must_commit(&mut engine, key, 100, 125); + + // Amending fails. + must_unlocked(&mut engine, key); + prewrite_err(&mut engine, key, value, key, 120, 120, Some(120)); + must_unlocked(&mut engine, key); + prewrite_err(&mut engine, key, value, key, 120, 130, Some(130)); + must_unlocked(&mut engine, key); + } } diff --git a/src/storage/txn/actions/tests.rs b/src/storage/txn/actions/tests.rs index e6872ef493f..0fc73804aff 100644 --- a/src/storage/txn/actions/tests.rs +++ b/src/storage/txn/actions/tests.rs @@ -33,7 +33,7 @@ pub fn must_prewrite_put_impl( is_retry_request: bool, assertion: Assertion, assertion_level: AssertionLevel, -) { +) -> TimeStamp { must_prewrite_put_impl_with_should_not_exist( engine, key, @@ -42,6 +42,7 @@ pub fn must_prewrite_put_impl( secondary_keys, ts, pessimistic_action, + None, lock_ttl, for_update_ts, txn_size, @@ -53,7 +54,7 @@ pub fn must_prewrite_put_impl( false, None, 0, - ); + ) } pub fn must_prewrite_insert_impl( @@ -81,6 +82,7 @@ pub fn must_prewrite_insert_impl( secondary_keys, ts, pessimistic_action, + None, lock_ttl, for_update_ts, txn_size, @@ -103,6 +105,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( secondary_keys: &Option>>, ts: TimeStamp, pessimistic_action: PrewriteRequestPessimisticAction, + expected_for_update_ts: Option, lock_ttl: u64, for_update_ts: TimeStamp, txn_size: u64, @@ -114,7 +117,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( should_not_exist: bool, region_id: Option, txn_source: u64, -) { +) -> TimeStamp { let mut ctx = Context::default(); ctx.set_txn_source(txn_source); if let Some(region_id) = region_id { @@ -144,7 +147,7 @@ pub fn must_prewrite_put_impl_with_should_not_exist( } else { CommitKind::TwoPc }; - prewrite( + let (min_commit_ts, _) = prewrite( &mut txn, &mut reader, &TransactionProperties { @@ -163,9 +166,11 @@ pub fn must_prewrite_put_impl_with_should_not_exist( mutation, secondary_keys, pessimistic_action, + expected_for_update_ts, ) .unwrap(); write(engine, &ctx, txn.into_modifies()); + min_commit_ts } pub fn must_prewrite_put( @@ -210,6 +215,7 @@ pub fn must_prewrite_put_on_region( &None, ts.into(), SkipPessimisticCheck, + None, 0, TimeStamp::default(), 0, @@ -240,6 +246,7 @@ pub fn must_prewrite_put_with_txn_soucre( &None, ts.into(), SkipPessimisticCheck, + None, 0, TimeStamp::default(), 0, @@ -415,7 +422,7 @@ pub fn must_pessimistic_prewrite_put_async_commit( for_update_ts: impl Into, pessimistic_action: PrewriteRequestPessimisticAction, min_commit_ts: impl Into, -) { +) -> TimeStamp { assert!(secondary_keys.is_some()); must_prewrite_put_impl( engine, @@ -433,6 +440,38 @@ pub fn must_pessimistic_prewrite_put_async_commit( false, Assertion::None, AssertionLevel::Off, + ) +} + +pub fn must_pessimistic_prewrite_put_check_for_update_ts( + engine: &mut E, + key: &[u8], + value: &[u8], + pk: &[u8], + ts: impl Into, + for_update_ts: impl Into, + expected_for_update_ts: Option, +) { + must_prewrite_put_impl_with_should_not_exist( + engine, + key, + value, + pk, + &None, + ts.into(), + DoPessimisticCheck, + expected_for_update_ts.map(Into::into), + 0, + for_update_ts.into(), + 0, + TimeStamp::default(), + TimeStamp::default(), + false, + Assertion::None, + AssertionLevel::Off, + false, + None, + 0, ); } @@ -485,6 +524,8 @@ pub fn must_prewrite_put_err_impl( ts.into(), for_update_ts.into(), pessimistic_action, + None, + 0, max_commit_ts.into(), is_retry_request, assertion, @@ -516,6 +557,8 @@ pub fn must_prewrite_insert_err_impl( ts.into(), for_update_ts.into(), pessimistic_action, + None, + 0, max_commit_ts.into(), is_retry_request, assertion, @@ -533,6 +576,8 @@ pub fn must_prewrite_put_err_impl_with_should_not_exist( ts: impl Into, for_update_ts: impl Into, pessimistic_action: PrewriteRequestPessimisticAction, + expected_for_update_ts: Option, + min_commit_ts: impl Into, max_commit_ts: impl Into, is_retry_request: bool, assertion: Assertion, @@ -559,14 +604,16 @@ pub fn must_prewrite_put_err_impl_with_should_not_exist( props.is_retry_request = is_retry_request; props.commit_kind = commit_kind; props.assertion_level = assertion_level; + props.min_commit_ts = min_commit_ts.into(); prewrite( &mut txn, &mut reader, &props, mutation, - &None, + secondary_keys, pessimistic_action, + expected_for_update_ts, ) .unwrap_err() } @@ -644,6 +691,34 @@ pub fn must_pessimistic_prewrite_insert_err( ) } +pub fn must_pessimistic_prewrite_put_check_for_update_ts_err( + engine: &mut E, + key: &[u8], + value: &[u8], + pk: &[u8], + ts: impl Into, + for_update_ts: impl Into, + expected_for_update_ts: Option, +) -> Error { + must_prewrite_put_err_impl_with_should_not_exist( + engine, + key, + value, + pk, + &None, + ts, + for_update_ts, + DoPessimisticCheck, + expected_for_update_ts.map(Into::into), + 0, + 0, + false, + Assertion::None, + AssertionLevel::Off, + false, + ) +} + pub fn must_retry_pessimistic_prewrite_put_err( engine: &mut E, key: &[u8], @@ -703,6 +778,7 @@ fn must_prewrite_delete_impl( mutation, &None, pessimistic_action, + None, ) .unwrap(); @@ -781,6 +857,7 @@ fn must_prewrite_lock_impl( mutation, &None, pessimistic_action, + None, ) .unwrap(); @@ -817,6 +894,7 @@ pub fn must_prewrite_lock_err( Mutation::make_lock(Key::from_raw(key)), &None, SkipPessimisticCheck, + None, ) .unwrap_err(); } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 5b94ea5bd85..54f5029bd6c 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -197,6 +197,7 @@ impl From for TypedCommand { secondary_keys, req.get_try_one_pc(), req.get_assertion_level(), + req.take_for_update_ts_constraints().into(), req.take_context(), ) } @@ -932,6 +933,28 @@ pub mod test_util { prewrite_command(engine, cm, statistics, cmd) } + pub fn pessimistic_prewrite_check_for_update_ts( + engine: &mut E, + statistics: &mut Statistics, + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, + primary: Vec, + start_ts: u64, + for_update_ts: u64, + for_update_ts_constraints: impl IntoIterator, + ) -> Result { + let cmd = PrewritePessimistic::with_for_update_ts_constraints( + mutations, + primary, + start_ts.into(), + for_update_ts.into(), + for_update_ts_constraints + .into_iter() + .map(|(size, ts)| (size, TimeStamp::from(ts))), + ); + let cm = ConcurrencyManager::new(start_ts.into()); + prewrite_command(engine, cm, statistics, cmd) + } + pub fn commit( engine: &mut E, statistics: &mut Statistics, diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index fbd4bf5984a..feaa641300f 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -10,7 +10,7 @@ use std::mem; use engine_traits::CF_WRITE; use kvproto::kvrpcpb::{ - AssertionLevel, ExtraOp, + AssertionLevel, ExtraOp, PrewriteRequestForUpdateTsConstraint, PrewriteRequestPessimisticAction::{self, *}, }; use tikv_kv::SnapshotExt; @@ -283,6 +283,8 @@ command! { /// Assertions is a mechanism to check the constraint on the previous version of data /// that must be satisfied as long as data is consistent. assertion_level: AssertionLevel, + /// Constraints on the pessimistic locks that have to be checked when prewriting. + for_update_ts_constraints: Vec, } } @@ -290,7 +292,7 @@ impl std::fmt::Display for PrewritePessimistic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "kv::command::pessimistic_prewrite mutations({:?}) primary({:?}) secondary_len({:?})@ {} {} {} {} {} {} {:?}| {:?}", + "kv::command::pessimistic_prewrite mutations({:?}) primary({:?}) secondary_len({:?})@ {} {} {} {} {} {} {:?} (for_update_ts constraints: {:?}) | {:?}", self.mutations, log_wrappers::Value::key(self.primary.as_slice()), self.secondary_keys.as_ref().map(|sk| sk.len()), @@ -301,6 +303,7 @@ impl std::fmt::Display for PrewritePessimistic { self.max_commit_ts, self.try_one_pc, self.assertion_level, + self.for_update_ts_constraints, self.ctx, ) } @@ -331,6 +334,7 @@ impl PrewritePessimistic { None, false, AssertionLevel::Off, + vec![], Context::default(), ) } @@ -355,19 +359,62 @@ impl PrewritePessimistic { None, true, AssertionLevel::Off, + vec![], Context::default(), ) } - fn into_prewriter(self) -> Prewriter { - Prewriter { + #[cfg(test)] + pub fn with_for_update_ts_constraints( + mutations: Vec<(Mutation, PrewriteRequestPessimisticAction)>, + primary: Vec, + start_ts: TimeStamp, + for_update_ts: TimeStamp, + for_update_ts_constraints: impl IntoIterator, + ) -> TypedCommand { + PrewritePessimistic::new( + mutations, + primary, + start_ts, + 0, + for_update_ts, + 0, + TimeStamp::default(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + for_update_ts_constraints + .into_iter() + .map(|(index, expected_for_update_ts)| { + let mut constraint = PrewriteRequestForUpdateTsConstraint::default(); + constraint.set_index(index as u32); + constraint.set_expected_for_update_ts(expected_for_update_ts.into_inner()); + constraint + }) + .collect(), + Context::default(), + ) + } + + fn into_prewriter(self) -> Result> { + let mut mutations: Vec = + self.mutations.into_iter().map(Into::into).collect(); + for item in self.for_update_ts_constraints { + let index = item.index as usize; + if index >= mutations.len() { + return Err(ErrorInner::Other(box_err!("prewrite request invalid: for_update_ts constraint set for index {} while {} mutations were given", index, mutations.len())).into()); + } + mutations[index].expected_for_update_ts = Some(item.expected_for_update_ts.into()); + } + Ok(Prewriter { kind: Pessimistic { for_update_ts: self.for_update_ts, }, start_ts: self.start_ts, txn_size: self.txn_size, primary: self.primary, - mutations: self.mutations, + mutations, try_one_pc: self.try_one_pc, secondary_keys: self.secondary_keys, @@ -379,7 +426,7 @@ impl PrewritePessimistic { ctx: self.ctx, old_values: OldValues::default(), - } + }) } } @@ -392,7 +439,7 @@ impl CommandExt for PrewritePessimistic { fn write_bytes(&self) -> usize { let mut bytes = 0; for (m, _) in &self.mutations { - match *m { + match m { Mutation::Put((ref key, ref value), _) | Mutation::Insert((ref key, ref value), _) => { bytes += key.as_encoded().len(); @@ -412,7 +459,7 @@ impl CommandExt for PrewritePessimistic { impl WriteCommand for PrewritePessimistic { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { - self.into_prewriter().process_write(snapshot, context) + self.into_prewriter()?.process_write(snapshot, context) } } @@ -556,6 +603,7 @@ impl Prewriter { for m in mem::take(&mut self.mutations) { let pessimistic_action = m.pessimistic_action(); + let expected_for_update_ts = m.pessimistic_expected_for_update_ts(); let m = m.into_mutation(); let key = m.key().clone(); let mutation_type = m.mutation_type(); @@ -566,7 +614,15 @@ impl Prewriter { } let need_min_commit_ts = secondaries.is_some() || self.try_one_pc; - let prewrite_result = prewrite(txn, reader, &props, m, secondaries, pessimistic_action); + let prewrite_result = prewrite( + txn, + reader, + &props, + m, + secondaries, + pessimistic_action, + expected_for_update_ts, + ); match prewrite_result { Ok((ts, old_value)) if !(need_min_commit_ts && ts.is_zero()) => { if need_min_commit_ts && final_min_commit_ts < ts { @@ -791,7 +847,7 @@ struct Pessimistic { } impl PrewriteKind for Pessimistic { - type Mutation = (Mutation, PrewriteRequestPessimisticAction); + type Mutation = PessimisticMutation; fn txn_kind(&self) -> TransactionKind { TransactionKind::Pessimistic(self.for_update_ts) @@ -801,11 +857,11 @@ impl PrewriteKind for Pessimistic { /// The type of mutation and, optionally, its extra information, differing for /// the optimistic and pessimistic transaction. /// For optimistic txns, this is `Mutation`. -/// For pessimistic txns, this is `(Mutation, PessimisticAction)`, where the -/// action indicates what kind of operations(checks) need to be performed. -/// The action also implies the type of the lock status. +/// For pessimistic txns, this is `PessimisticMutation` which contains a +/// `Mutation` and some other extra information necessary for pessimistic txns. trait MutationLock { fn pessimistic_action(&self) -> PrewriteRequestPessimisticAction; + fn pessimistic_expected_for_update_ts(&self) -> Option; fn into_mutation(self) -> Mutation; } @@ -814,18 +870,55 @@ impl MutationLock for Mutation { SkipPessimisticCheck } + fn pessimistic_expected_for_update_ts(&self) -> Option { + None + } + fn into_mutation(self) -> Mutation { self } } -impl MutationLock for (Mutation, PrewriteRequestPessimisticAction) { +#[derive(Debug)] +pub struct PessimisticMutation { + pub mutation: Mutation, + /// Indicates what kind of operations(checks) need to be performed, and also + /// implies the type of the lock status. + pub pessimistic_action: PrewriteRequestPessimisticAction, + /// Specifies whether it needs to check the `for_update_ts` field in the + /// pessimistic lock during prewrite. If any, the check only passes if the + /// `for_update_ts` field in pessimistic lock is not greater than the + /// expected value. + pub expected_for_update_ts: Option, +} + +impl MutationLock for PessimisticMutation { fn pessimistic_action(&self) -> PrewriteRequestPessimisticAction { - self.1 + self.pessimistic_action + } + + fn pessimistic_expected_for_update_ts(&self) -> Option { + self.expected_for_update_ts } fn into_mutation(self) -> Mutation { - self.0 + self.mutation + } +} + +impl PessimisticMutation { + pub fn new(mutation: Mutation, pessimistic_action: PrewriteRequestPessimisticAction) -> Self { + Self { + mutation, + pessimistic_action, + expected_for_update_ts: None, + } + } +} + +impl From<(Mutation, PrewriteRequestPessimisticAction)> for PessimisticMutation { + fn from(value: (Mutation, PrewriteRequestPessimisticAction)) -> Self { + PessimisticMutation::new(value.0, value.1) } } @@ -901,8 +994,8 @@ mod tests { commands::{ check_txn_status::tests::must_success as must_check_txn_status, test_util::{ - commit, pessimistic_prewrite_with_cm, prewrite, prewrite_command, - prewrite_with_cm, rollback, + commit, pessimistic_prewrite_check_for_update_ts, pessimistic_prewrite_with_cm, + prewrite, prewrite_command, prewrite_with_cm, rollback, }, }, tests::{ @@ -1451,6 +1544,7 @@ mod tests { Some(vec![]), false, AssertionLevel::Off, + vec![], Context::default(), ); @@ -1491,6 +1585,7 @@ mod tests { Some(vec![k2.to_vec()]), false, AssertionLevel::Off, + vec![], Context::default(), ); @@ -1697,6 +1792,7 @@ mod tests { secondary_keys, case.one_pc, AssertionLevel::Off, + vec![], Context::default(), ) } else { @@ -1937,6 +2033,7 @@ mod tests { Some(vec![]), false, AssertionLevel::Off, + vec![], Context::default(), ); let context = WriteContext { @@ -2076,6 +2173,7 @@ mod tests { secondary_keys, false, AssertionLevel::Off, + vec![], ctx, ); prewrite_command(engine, cm.clone(), statistics, cmd) @@ -2546,6 +2644,7 @@ mod tests { Some(vec![]), false, AssertionLevel::Off, + vec![], Context::default(), ); let res = prewrite_command(&mut engine, cm, &mut statistics, cmd).unwrap(); @@ -2736,4 +2835,132 @@ mod tests { assert_eq!(write.last_change_ts, TimeStamp::zero()); assert_eq!(write.versions_to_last_change, 0); } + + #[test] + fn test_pessimistic_prewrite_check_for_update_ts() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let mut statistics = Statistics::default(); + + let k1 = b"k1"; + let k2 = b"k2"; + let k3 = b"k3"; + + // In actual cases these kinds of pessimistic locks should be locked in + // `allow_locking_with_conflict` mode. For simplicity, we pass a large + // for_update_ts to the pessimistic lock to simulate that case. + must_acquire_pessimistic_lock(&mut engine, k1, k1, 10, 10); + must_acquire_pessimistic_lock(&mut engine, k2, k1, 10, 20); + must_acquire_pessimistic_lock(&mut engine, k3, k1, 10, 20); + + let check_lock_unchanged = |engine: &mut _| { + must_pessimistic_locked(engine, k1, 10, 10); + must_pessimistic_locked(engine, k2, 10, 20); + must_pessimistic_locked(engine, k3, 10, 20); + }; + + let must_be_pessimistic_lock_not_found = |e| match e { + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. }, + ))) => (), + e => panic!( + "error type not match: expected PessimisticLockNotFound, got {:?}", + e + ), + }; + + let mutations = vec![ + ( + Mutation::make_put(Key::from_raw(k1), b"v1".to_vec()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(Key::from_raw(k2), b"v2".to_vec()), + DoPessimisticCheck, + ), + ( + Mutation::make_put(Key::from_raw(k3), b"v3".to_vec()), + DoPessimisticCheck, + ), + ]; + + let e = pessimistic_prewrite_check_for_update_ts( + &mut engine, + &mut statistics, + mutations.clone(), + k1.to_vec(), + 10, + 15, + vec![(1, 15)], + ) + .unwrap_err(); + must_be_pessimistic_lock_not_found(e); + check_lock_unchanged(&mut engine); + + let e = pessimistic_prewrite_check_for_update_ts( + &mut engine, + &mut statistics, + mutations.clone(), + k1.to_vec(), + 10, + 15, + vec![(0, 15), (1, 15), (2, 15)], + ) + .unwrap_err(); + must_be_pessimistic_lock_not_found(e); + check_lock_unchanged(&mut engine); + + let e = pessimistic_prewrite_check_for_update_ts( + &mut engine, + &mut statistics, + mutations.clone(), + k1.to_vec(), + 10, + 15, + vec![(2, 15), (0, 20)], + ) + .unwrap_err(); + must_be_pessimistic_lock_not_found(e); + check_lock_unchanged(&mut engine); + + // lock.for_update_ts < expected is disallowed too. + let e = pessimistic_prewrite_check_for_update_ts( + &mut engine, + &mut statistics, + mutations.clone(), + k1.to_vec(), + 10, + 15, + vec![(0, 15), (2, 20)], + ) + .unwrap_err(); + must_be_pessimistic_lock_not_found(e); + check_lock_unchanged(&mut engine); + + // Index out of bound (invalid request). + pessimistic_prewrite_check_for_update_ts( + &mut engine, + &mut statistics, + mutations.clone(), + k1.to_vec(), + 10, + 15, + vec![(3, 30)], + ) + .unwrap_err(); + check_lock_unchanged(&mut engine); + + pessimistic_prewrite_check_for_update_ts( + &mut engine, + &mut statistics, + mutations, + k1.to_vec(), + 10, + 15, + vec![(0, 10), (2, 20)], + ) + .unwrap(); + must_locked(&mut engine, k1, 10); + must_locked(&mut engine, k2, 10); + must_locked(&mut engine, k3, 10); + } } diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index f43e309f503..640c534fc86 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -241,6 +241,7 @@ pub mod tests { must_err as must_acquire_pessimistic_lock_err, must_err_return_value as must_acquire_pessimistic_lock_return_value_err, must_pessimistic_locked, must_succeed as must_acquire_pessimistic_lock, + must_succeed_allow_lock_with_conflict as must_acquire_pessimistic_lock_allow_lock_with_conflict, must_succeed_for_large_txn as must_acquire_pessimistic_lock_for_large_txn, must_succeed_impl as must_acquire_pessimistic_lock_impl, must_succeed_return_value as must_acquire_pessimistic_lock_return_value, diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 46879d38e9f..1b4a7d5624c 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -731,6 +731,7 @@ mod tests { Mutation::make_put(Key::from_raw(key), key.to_vec()), &None, SkipPessimisticCheck, + None, ) .unwrap(); } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index 7a79b984aaf..92dacfe6dc9 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -56,6 +56,7 @@ where Mutation::make_put(Key::from_raw(k), v.clone()), &None, SkipPessimisticCheck, + None, ) .unwrap(); } @@ -107,6 +108,7 @@ fn mvcc_prewrite>(b: &mut Bencher<'_>, config: &B mutation, &None, SkipPessimisticCheck, + None, ) .unwrap(); } diff --git a/tests/benches/hierarchy/txn/mod.rs b/tests/benches/hierarchy/txn/mod.rs index 404266e2c6f..1a4d047562d 100644 --- a/tests/benches/hierarchy/txn/mod.rs +++ b/tests/benches/hierarchy/txn/mod.rs @@ -52,6 +52,7 @@ where Mutation::make_put(Key::from_raw(k), v.clone()), &None, SkipPessimisticCheck, + None, ) .unwrap(); } @@ -100,6 +101,7 @@ fn txn_prewrite>(b: &mut Bencher<'_>, config: &Be mutation, &None, SkipPessimisticCheck, + None, ) .unwrap(); let write_data = WriteData::from_modifies(txn.into_modifies()); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 42cda54281e..ba6339b666d 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -408,6 +408,7 @@ fn test_pipelined_pessimistic_lock() { None, false, AssertionLevel::Off, + vec![], Context::default(), ), expect_ok_callback(tx.clone(), 0), @@ -759,6 +760,7 @@ fn test_async_commit_prewrite_with_stale_max_ts_impl() { Some(vec![b"xk2".to_vec()]), false, AssertionLevel::Off, + vec![], ctx.clone(), ), Box::new(move |res: storage::Result<_>| { @@ -898,6 +900,7 @@ fn test_async_apply_prewrite_impl( secondaries, false, AssertionLevel::Off, + vec![], ctx.clone(), ), Box::new(move |r| tx.send(r).unwrap()), @@ -1232,6 +1235,7 @@ fn test_async_apply_prewrite_1pc_impl( None, true, AssertionLevel::Off, + vec![], ctx.clone(), ), Box::new(move |r| tx.send(r).unwrap()), From 61380e35cad161576342ea895b63a99dfabb97da Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 12 Apr 2023 20:31:01 +0800 Subject: [PATCH 0628/1149] resource_control: fix virtual time overflow (#14509) close tikv/tikv#14507 Signed-off-by: glorv Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/resource_control/Cargo.toml | 3 + .../resource_control/src/resource_group.rs | 203 ++++++++++++++++-- 3 files changed, 194 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b654e34fb77..10d3a7f37eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4772,6 +4772,7 @@ dependencies = [ "pin-project", "prometheus", "protobuf", + "rand 0.8.5", "serde", "slog", "slog-global", diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 9a488b06d77..ec13d9cdbdb 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -30,3 +30,6 @@ test_pd = { workspace = true } test_pd_client = { workspace = true } tikv_util = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } + +[dev-dependencies] +rand = "0.8" diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 0b0f24e8f62..7435fc17d01 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cell::Cell, cmp::{max, min}, sync::{ atomic::{AtomicU64, Ordering}, @@ -11,12 +12,13 @@ use std::{ use collections::HashMap; use dashmap::{mapref::one::Ref, DashMap}; +use fail::fail_point; use kvproto::{ kvrpcpb::CommandPri, resource_manager::{GroupMode, ResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; -use tikv_util::info; +use tikv_util::{info, time::Instant}; use yatp::queue::priority::TaskPriorityProvider; // a read task cost at least 50us. @@ -27,6 +29,8 @@ const TASK_EXTRA_FACTOR_BY_LEVEL: [u64; 3] = [0, 20, 100]; pub const MIN_PRIORITY_UPDATE_INTERVAL: Duration = Duration::from_secs(1); /// default resource group name const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; +/// default value of max RU quota. +const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; /// The maximum RU quota that can be configured. const MAX_RU_QUOTA: u64 = i32::MAX as u64; @@ -152,12 +156,26 @@ pub struct ResourceController { // increase the real cost after task is executed; but don't increase it at write because // the cost is known so we just pre-consume it. is_read: bool, + // Track the maximum ru quota used to calculate the factor of each resource group. + // factor = max_ru_quota / group_ru_quota * 10.0 + // We use mutex here to ensure when we need to change this value and do adjust all resource + // groups' factors, it can't be changed concurrently. + // NOTE: becuase the ru config for "default" group is very large and it can cause very big + // group weight, we will not count this value by default. + max_ru_quota: Mutex, // record consumption of each resource group, name --> resource_group resource_consumptions: RwLock, GroupPriorityTracker>>, - + // the latest min vt, this value is used to init new added group vt last_min_vt: AtomicU64, + // the last time min vt is overflow + last_rest_vt_time: Cell, } +// we are ensure to visit the `last_rest_vt_time` by only 1 thread so it's +// thread safe. +unsafe impl Send for ResourceController {} +unsafe impl Sync for ResourceController {} + impl ResourceController { pub fn new(name: String, is_read: bool) -> Self { let controller = Self { @@ -165,6 +183,8 @@ impl ResourceController { is_read, resource_consumptions: RwLock::new(HashMap::default()), last_min_vt: AtomicU64::new(0), + max_ru_quota: Mutex::new(DEFAULT_MAX_RU_QUOTA), + last_rest_vt_time: Cell::new(Instant::now_coarse()), }; // add the "default" resource group controller.add_resource_group( @@ -175,24 +195,39 @@ impl ResourceController { controller } - fn calculate_factor(mut quota: u64) -> u64 { - quota = min(quota, MAX_RU_QUOTA); - if quota > 0 { - // the maxinum ru quota is very big, so the precision lost due to - // integer division is very small. - MAX_RU_QUOTA / quota - } else { + fn calculate_factor(max_quota: u64, quota: u64) -> u64 { + // we don't adjust the max_quota if it's the "default" group's default + // value(u32::MAX), so here it is possible that the quota is bigger than + // the max quota + if quota == 0 || quota > max_quota { 1 + } else { + // we use max_quota / quota as the resource group factor, but because we need to + // cast the value to integer, so we times it by 10 to ensure the accuracy is + // enough. + let max_quota = min(max_quota * 10, MAX_RU_QUOTA); + (max_quota as f64 / quota as f64).round() as u64 } } - fn add_resource_group(&self, name: Vec, ru_quota: u64, mut group_priority: u32) { + fn add_resource_group(&self, name: Vec, mut ru_quota: u64, mut group_priority: u32) { if group_priority == 0 { // map 0 to medium priority(default priority) group_priority = MEDIUM_PRIORITY; } + if ru_quota > MAX_RU_QUOTA { + ru_quota = MAX_RU_QUOTA; + } - let weight = Self::calculate_factor(ru_quota); + let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); + // skip to adjust max ru if it is the "default" group and the ru config eq + // MAX_RU_QUOTA + if ru_quota > *max_ru_quota && (name != "default".as_bytes() || ru_quota < MAX_RU_QUOTA) { + *max_ru_quota = ru_quota; + // adjust all group weight because the current value is too small. + self.adjust_all_resource_group_factors(ru_quota); + } + let weight = Self::calculate_factor(*max_ru_quota, ru_quota); let vt_delta_for_get = if self.is_read { DEFAULT_PRIORITY_PER_READ_TASK * weight @@ -200,6 +235,7 @@ impl ResourceController { 0 }; let group = GroupPriorityTracker { + ru_quota, group_priority, weight, virtual_time: AtomicU64::new(self.last_min_vt.load(Ordering::Acquire)), @@ -210,6 +246,20 @@ impl ResourceController { self.resource_consumptions.write().insert(name, group); } + // we calculate the weight of each resource group based on the currently maximum + // ru quota, if a incoming resource group has a bigger quota, we need to + // adjust all the existing groups. As we expect this won't happen very + // often, and iterate 10k entry cost less than 5ms, so the performance is + // acceptable. + fn adjust_all_resource_group_factors(&self, max_ru_quota: u64) { + self.resource_consumptions + .write() + .iter_mut() + .for_each(|(_, tracker)| { + tracker.weight = Self::calculate_factor(max_ru_quota, tracker.ru_quota); + }); + } + fn remove_resource_group(&self, name: &[u8]) { // do not remove the default resource group, reset to default setting instead. if DEFAULT_RESOURCE_GROUP_NAME.as_bytes() == name { @@ -240,6 +290,7 @@ impl ResourceController { } pub fn update_min_virtual_time(&self) { + let start = Instant::now_coarse(); let mut min_vt = u64::MAX; let mut max_vt = 0; self.resource_consumptions @@ -257,6 +308,8 @@ impl ResourceController { return; } + fail_point!("increase_vt_duration_update_min_vt"); + let near_overflow = min_vt > RESET_VT_THRESHOLD; self.resource_consumptions .read() @@ -267,15 +320,19 @@ impl ResourceController { // but it should be ok as this operation should be extremely rare // and the impact is not big. if near_overflow { - tracker.decrease_vt(RESET_VT_THRESHOLD - (max_vt - vt) / 2); + tracker.decrease_vt(RESET_VT_THRESHOLD); } else if vt < max_vt { // TODO: is increase by half is a good choice. tracker.increase_vt((max_vt - vt) / 2); } }); if near_overflow { - info!("all reset groups' virtual time are near overflow, do reset"); + let end = Instant::now_coarse(); + info!("all resource groups' virtual time are near overflow, do reset"; + "min" => min_vt, "max" => max_vt, "dur" => ?end.duration_since(start), + "reset_dur" => ?end.duration_since(self.last_rest_vt_time.get())); max_vt -= RESET_VT_THRESHOLD; + self.last_rest_vt_time.set(end); } // max_vt is actually a little bigger than the current min vt, but we don't // need totally accurate here. @@ -309,6 +366,8 @@ fn concat_priority_vt(group_priority: u32, vt: u64) -> u64 { } struct GroupPriorityTracker { + // the ru setting of this group. + ru_quota: u64, group_priority: u32, weight: u64, virtual_time: AtomicU64, @@ -357,6 +416,7 @@ impl GroupPriorityTracker { #[cfg(test)] pub(crate) mod tests { + use rand::{thread_rng, RngCore}; use yatp::queue::Extras; use super::*; @@ -560,6 +620,123 @@ pub(crate) mod tests { assert_eq!(resource_ctl.last_min_vt.load(Ordering::Relaxed), g2_vt); } + #[test] + fn test_adjust_resource_group_weight() { + let resource_manager = ResourceGroupManager::default(); + let resource_ctl = resource_manager.derive_controller("test_read".into(), true); + let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); + + let group1 = new_resource_group_ru("test1".into(), 5000, 0); + resource_manager.add_resource_group(group1); + assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); + assert_eq!( + resource_ctl_write.resource_group("test1".as_bytes()).weight, + 20 + ); + + // add a resource group with big ru + let group1 = new_resource_group_ru("test2".into(), 50000, 0); + resource_manager.add_resource_group(group1); + assert_eq!(*resource_ctl.max_ru_quota.lock().unwrap(), 50000); + assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 100); + assert_eq!(resource_ctl.resource_group("test2".as_bytes()).weight, 10); + // resource_ctl_write should be unchanged. + assert_eq!(*resource_ctl_write.max_ru_quota.lock().unwrap(), 50000); + assert_eq!( + resource_ctl_write.resource_group("test1".as_bytes()).weight, + 100 + ); + assert_eq!( + resource_ctl_write.resource_group("test2".as_bytes()).weight, + 10 + ); + + // add the default "default" group, the ru weight should not change. + // add a resource group with big ru + let group = new_resource_group_ru("default".into(), u32::MAX as u64, 0); + resource_manager.add_resource_group(group); + assert_eq!( + resource_ctl_write.resource_group("test1".as_bytes()).weight, + 100 + ); + assert_eq!( + resource_ctl_write + .resource_group("default".as_bytes()) + .weight, + 1 + ); + + // change the default group to another value, it can impact the ru then. + let group = new_resource_group_ru("default".into(), 100000, 0); + resource_manager.add_resource_group(group); + assert_eq!( + resource_ctl_write.resource_group("test1".as_bytes()).weight, + 200 + ); + assert_eq!( + resource_ctl_write + .resource_group("default".as_bytes()) + .weight, + 10 + ); + } + + #[test] + fn test_reset_resource_group_vt_overflow() { + let resource_manager = ResourceGroupManager::default(); + let resource_ctl = resource_manager.derive_controller("test_write".into(), false); + let mut rng = thread_rng(); + + let mut min_delta = u64::MAX; + let mut max_delta = 0; + for i in 0..10 { + let name = format!("g{}", i); + let g = new_resource_group_ru(name.clone(), 100, 1); + resource_manager.add_resource_group(g); + let delta = rng.next_u64() % 10000 + 1; + min_delta = delta.min(min_delta); + max_delta = delta.max(max_delta); + resource_ctl + .resource_group(name.as_bytes()) + .increase_vt(RESET_VT_THRESHOLD + delta); + } + resource_ctl + .resource_group("default".as_bytes()) + .increase_vt(RESET_VT_THRESHOLD + 1); + + let old_max_vt = resource_ctl + .resource_consumptions + .read() + .iter() + .fold(0, |v, (_, g)| v.max(g.current_vt())); + let resource_ctl_cloned = resource_ctl.clone(); + fail::cfg_callback("increase_vt_duration_update_min_vt", move || { + resource_ctl_cloned + .resource_consumptions + .read() + .iter() + .enumerate() + .for_each(|(i, (_, tracker))| { + if i % 2 == 0 { + tracker.increase_vt(max_delta - min_delta); + } + }); + }) + .unwrap(); + resource_ctl.update_min_virtual_time(); + fail::remove("increase_vt_duration_update_min_vt"); + + let new_max_vt = resource_ctl + .resource_consumptions + .read() + .iter() + .fold(0, |v, (_, g)| v.max(g.current_vt())); + // check all vt has decreased by RESET_VT_THRESHOLD. + assert!(new_max_vt < max_delta * 2); + // check fail-point takes effect, the `new_max_vt` has increased. + assert!(old_max_vt - RESET_VT_THRESHOLD < new_max_vt); + } + #[test] fn test_retain_resource_groups() { let resource_manager = ResourceGroupManager::default(); From 515bebb405788442a56296429508e01e8d4bcb11 Mon Sep 17 00:00:00 2001 From: ekexium Date: Wed, 12 Apr 2023 21:35:01 +0800 Subject: [PATCH 0629/1149] txn: return duration_to_last_update when lock wait timeout (#14499) close tikv/tikv#14497 When a timeout occurs during waiting for a lock, provide the duration of time that has passed since the last update of the lock wait. Let the client decide whether it is necessary to resolve locks based on this info. Signed-off-by: ekexium Co-authored-by: Ti Chi Robot --- src/server/lock_manager/waiter_manager.rs | 95 +++++++++++++++++++++-- 1 file changed, 88 insertions(+), 7 deletions(-) diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index d8271998653..5f433571431 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -203,6 +203,7 @@ pub(crate) struct Waiter { pub diag_ctx: DiagnosticContext, delay: Delay, start_waiting_time: Instant, + last_updated_time: Option, } impl Waiter { @@ -224,6 +225,7 @@ impl Waiter { delay: Delay::new(deadline), diag_ctx, start_waiting_time, + last_updated_time: None, } } @@ -264,8 +266,13 @@ impl Waiter { self.cancel(None) } - fn cancel_for_timeout(self, _skip_resolving_lock: bool) -> KeyLockWaitInfo { - let lock_info = self.wait_info.lock_info.clone(); + fn cancel_for_timeout(self) -> KeyLockWaitInfo { + let mut lock_info = self.wait_info.lock_info.clone(); + lock_info.set_duration_to_last_update_ms( + self.last_updated_time + .map(|t| t.elapsed().as_millis() as u64) + .unwrap_or_default(), + ); // lock_info.set_skip_resolving_lock(skip_resolving_lock); let error = MvccError::from(MvccErrorInner::KeyIsLocked(lock_info)); self.cancel(Some(StorageError::from(TxnError::from(error)))) @@ -343,8 +350,10 @@ impl WaitTable { fn update_waiter( &mut self, update_event: &UpdateWaitForEvent, + now: Instant, ) -> Option<(KeyLockWaitInfo, DiagnosticContext)> { let waiter = self.waiter_pool.get_mut(&update_event.token)?; + waiter.last_updated_time = Some(now); assert_eq!(waiter.wait_info.key, update_event.wait_info.key); @@ -511,7 +520,7 @@ impl WaiterManager { let mut wait_table = wait_table.borrow_mut(); if let Some(waiter) = wait_table.take_waiter(token) { let start_ts = waiter.start_ts; - let wait_info = waiter.cancel_for_timeout(false); + let wait_info = waiter.cancel_for_timeout(); detector_scheduler.clean_up_wait_for(start_ts, wait_info); } }); @@ -537,8 +546,9 @@ impl WaiterManager { fn handle_update_wait_for(&mut self, events: Vec) { let mut wait_table = self.wait_table.borrow_mut(); + let now = Instant::now(); for event in events { - let previous_wait_info = wait_table.update_waiter(&event); + let previous_wait_info = wait_table.update_waiter(&event, now); if event.is_first_lock { continue; @@ -647,7 +657,7 @@ impl FutureRunnable for WaiterManager { #[cfg(test)] pub mod tests { - use std::{sync::mpsc, time::Duration}; + use std::{sync::mpsc, thread::sleep, time::Duration}; use futures::{executor::block_on, future::FutureExt}; use kvproto::kvrpcpb::LockInfo; @@ -673,6 +683,7 @@ pub mod tests { diag_ctx: DiagnosticContext::default(), delay: Delay::new(Instant::now()), start_waiting_time: Instant::now(), + last_updated_time: None, } } @@ -869,7 +880,7 @@ pub mod tests { #[test] fn test_waiter_notify() { let (waiter, lock_info, f) = new_test_waiter(10.into(), 20.into(), 20); - waiter.cancel_for_timeout(false); + waiter.cancel_for_timeout(); expect_key_is_locked(block_on(f).unwrap(), lock_info); // Deadlock @@ -902,7 +913,7 @@ pub mod tests { waiter.reset_timeout(Instant::now() + Duration::from_millis(100)); let (tx, rx) = mpsc::sync_channel(1); let f = waiter.on_timeout(move || tx.send(1).unwrap()); - waiter.cancel_for_timeout(false); + waiter.cancel_for_timeout(); assert_elapsed(|| block_on(f), 0, 200); rx.try_recv().unwrap_err(); } @@ -1140,4 +1151,74 @@ pub mod tests { ); worker.stop().unwrap(); } + + #[test] + fn test_duration_to_last_update() { + let (mut worker, scheduler) = start_waiter_manager(1000, 100); + let key = Key::from_raw(b"foo"); + let (waiter_ts, lock) = ( + 10.into(), + LockDigest { + ts: 20.into(), + hash: key.gen_hash(), + }, + ); + // waiter1 is updated when waiting, while waiter2(f2) is not. + let (waiter1, ..) = new_test_waiter_with_key(waiter_ts, lock.ts, &key.to_raw().unwrap()); + let (waiter2, _, f2) = new_test_waiter_with_key(100.into(), 100.into(), "foo".as_bytes()); + scheduler.wait_for( + LockWaitToken(Some(1)), + 1, + RegionEpoch::default(), + 1, + waiter1.start_ts, + waiter1.wait_info, + WaitTimeout::Millis(1000), + waiter1.cancel_callback, + DiagnosticContext::default(), + ); + scheduler.wait_for( + LockWaitToken(Some(2)), + 1, + RegionEpoch::default(), + 1, + waiter2.start_ts, + waiter2.wait_info, + WaitTimeout::Millis(1000), + waiter2.cancel_callback, + DiagnosticContext::default(), + ); + + // then update waiter + sleep(Duration::from_millis(500)); + let event = UpdateWaitForEvent { + token: LockWaitToken(Some(1)), + start_ts: waiter1.start_ts, + is_first_lock: false, + wait_info: KeyLockWaitInfo { + key: key.clone(), + lock_digest: Default::default(), + lock_info: LockInfo { + key: key.to_raw().unwrap(), + ..Default::default() + }, + }, + }; + scheduler.update_wait_for(vec![event]); + + assert_elapsed( + || match block_on(f2).unwrap() { + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc( + MvccError(box MvccErrorInner::KeyIsLocked(res)), + )))) => { + assert_eq!(res.duration_to_last_update_ms, 0); + } + e => panic!("unexpected error: {:?}", e), + }, + 400, + 600, + ); + + worker.stop().unwrap(); + } } From b9ca84e61ae8ad1794fec440bb20cc00c6ae7912 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 14 Apr 2023 15:45:01 +0800 Subject: [PATCH 0630/1149] raftstore-v2: implement CaptureChange and LeaderCallback (#14558) ref tikv/tikv#14542 raftstore-v2: implement CaptureChange and LeaderCallback Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/fsm/apply.rs | 4 + components/raftstore-v2/src/fsm/peer.rs | 2 + .../src/operation/query/capture.rs | 124 ++++++++++++++++++ .../raftstore-v2/src/operation/query/mod.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 22 +++- components/raftstore-v2/src/router/imp.rs | 74 +++++++++-- .../src/router/internal_message.rs | 2 + components/raftstore-v2/src/router/message.rs | 18 ++- components/raftstore-v2/src/router/mod.rs | 6 +- .../src/router/response_channel.rs | 41 ++++++ components/raftstore/src/store/fsm/apply.rs | 8 +- components/raftstore/src/store/fsm/mod.rs | 4 +- components/raftstore/src/store/msg.rs | 2 +- 13 files changed, 284 insertions(+), 24 deletions(-) create mode 100644 components/raftstore-v2/src/operation/query/capture.rs diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 2afd8fbf773..6c0989e72ae 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -48,6 +48,7 @@ impl, S: FsmScheduler> ApplyResReporter for M } /// Schedule task to `ApplyFsm`. +#[derive(Clone)] pub struct ApplyScheduler { sender: Sender, } @@ -136,6 +137,9 @@ impl ApplyFsm { ApplyTask::RefreshBucketStat(bucket_meta) => { self.apply.on_refresh_buckets(bucket_meta) } + ApplyTask::CaptureApply(capture_change) => { + self.apply.on_capture_apply(capture_change) + } } self.apply.maybe_flush().await; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 5e10aa0ef72..d2506d0dd21 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -344,6 +344,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_redirect_catch_up_logs(self.store_ctx, c), PeerMsg::CatchUpLogs(c) => self.fsm.peer_mut().on_catch_up_logs(self.store_ctx, c), + PeerMsg::CaptureChange(capture_change) => self.on_capture_change(capture_change), + PeerMsg::LeaderCallback(ch) => self.on_leader_callback(ch), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs new file mode 100644 index 00000000000..03014644261 --- /dev/null +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -0,0 +1,124 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Arc; + +use engine_traits::{KvEngine, RaftEngine}; +use fail::fail_point; +use kvproto::raft_cmdpb::RaftCmdResponse; +use raftstore::{ + coprocessor::ObserveHandle, + store::{ + cmd_resp, + fsm::{ + apply::{notify_stale_req_with_msg, ObserverType}, + new_read_index_request, ChangeObserver, + }, + msg::ErrorCallback, + util::compare_region_epoch, + RegionSnapshot, + }, +}; + +use crate::{ + fsm::{ApplyResReporter, PeerFsmDelegate}, + raft::Apply, + router::{message::CaptureChange, ApplyTask, QueryResChannel, QueryResult}, +}; + +impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> + PeerFsmDelegate<'a, EK, ER, T> +{ + pub fn on_leader_callback(&mut self, ch: QueryResChannel) { + let peer = self.fsm.peer(); + let msg = new_read_index_request( + peer.region_id(), + peer.region().get_region_epoch().clone(), + peer.peer().clone(), + ); + self.on_query(msg, ch); + } + + pub fn on_capture_change(&mut self, capture_change: CaptureChange) { + fail_point!("raft_on_capture_change"); + + // TODO: Allow to capture change even is in flashback state. + // TODO: add a test case for this kind of situation. + + let apply_router = self.fsm.peer().apply_scheduler().unwrap().clone(); + let (ch, _) = QueryResChannel::with_callback(Box::new(move |res| { + if let QueryResult::Response(resp) = res && resp.get_header().has_error() { + // Return error + capture_change.snap_cb.report_error(resp.clone()); + return; + } + apply_router.send(ApplyTask::CaptureApply(capture_change)) + })); + self.on_leader_callback(ch); + } +} + +impl Apply { + pub fn on_capture_apply(&mut self, capture_change: CaptureChange) { + let CaptureChange { + observer, + region_epoch, + snap_cb, + } = capture_change; + let ChangeObserver { region_id, ty } = observer; + + let is_stale_cmd = match ty { + ObserverType::Cdc(ObserveHandle { id, .. }) => self.observe_info_mut().cdc_id.id > id, + ObserverType::Rts(ObserveHandle { id, .. }) => self.observe_info_mut().rts_id.id > id, + ObserverType::Pitr(ObserveHandle { id, .. }) => self.observe_info_mut().pitr_id.id > id, + }; + if is_stale_cmd { + notify_stale_req_with_msg( + self.term(), + format!( + "stale observe id {:?}, current id: {:?}", + ty.handle().id, + self.observe_info_mut().pitr_id.id + ), + snap_cb, + ); + return; + } + + assert_eq!(self.region_id(), region_id); + let snapshot = match compare_region_epoch( + ®ion_epoch, + self.region(), + false, // check_conf_ver + true, // check_ver + true, // include_region + ) { + Ok(()) => { + // Commit the writebatch for ensuring the following snapshot can get all + // previous writes. + self.flush(); + RegionSnapshot::from_snapshot( + Arc::new(self.tablet().snapshot()), + Arc::new(self.region().clone()), + ) + } + Err(e) => { + // Return error if epoch not match + snap_cb.report_error(cmd_resp::new_error(e)); + return; + } + }; + + match ty { + ObserverType::Cdc(id) => { + self.observe_info_mut().cdc_id = id; + } + ObserverType::Rts(id) => { + self.observe_info_mut().rts_id = id; + } + ObserverType::Pitr(id) => { + self.observe_info_mut().pitr_id = id; + } + } + snap_cb.set_result((RaftCmdResponse::default(), Some(Box::new(snapshot)))); + } +} diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 55bc100dec2..81fb4e5e9de 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -42,6 +42,7 @@ use crate::{ }, }; +mod capture; mod lease; mod local; mod replica; diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 5e7c7e84f84..d5ecb8c3026 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -7,9 +7,12 @@ use engine_traits::{ }; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; -use raftstore::store::{ - fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, - Config, ReadTask, +use raftstore::{ + coprocessor::CmdObserveInfo, + store::{ + fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, + Config, ReadTask, + }, }; use slog::Logger; use sst_importer::SstImporter; @@ -59,6 +62,8 @@ pub struct Apply { res_reporter: R, read_scheduler: Scheduler>, sst_importer: Arc, + observe_info: CmdObserveInfo, + pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, pub(crate) buckets: Option, @@ -110,6 +115,7 @@ impl Apply { metrics: ApplyMetrics::default(), buckets, sst_importer, + observe_info: CmdObserveInfo::default(), logger, } } @@ -269,4 +275,14 @@ impl Apply { pub fn sst_importer(&self) -> &SstImporter { &self.sst_importer } + + #[inline] + pub fn observe_info_mut(&mut self) -> &mut CmdObserveInfo { + &mut self.observe_info + } + + #[inline] + pub fn term(&self) -> u64 { + self.applied_term + } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 9bffe2b7983..b28dc95aa35 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -5,10 +5,11 @@ use std::{ sync::{Arc, Mutex}, }; -use crossbeam::channel::TrySendError; +use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ + kvrpcpb::ExtraOp, metapb::RegionEpoch, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -21,7 +22,7 @@ use raftstore::{ }; use slog::warn; -use super::PeerMsg; +use super::{build_any_channel, message::CaptureChange, PeerMsg, QueryResChannel, QueryResult}; use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; impl AsyncReadNotifier for StoreRouter { @@ -179,19 +180,72 @@ impl RaftRouter { impl CdcHandle for RaftRouter { fn capture_change( &self, - _region_id: u64, - _region_epoch: RegionEpoch, - _change_observer: ChangeObserver, - _callback: Callback, + region_id: u64, + region_epoch: RegionEpoch, + observer: ChangeObserver, + callback: Callback, ) -> crate::Result<()> { - unimplemented!() + let (snap_cb, _) = build_any_channel(Box::new(move |args| { + let (resp, snap) = (&args.0, args.1.take()); + if let Some(snap) = snap { + let snapshot: RegionSnapshot = match snap.downcast() { + Ok(s) => *s, + Err(t) => unreachable!("snapshot type should be the same: {:?}", t), + }; + callback.invoke_read(raftstore::store::ReadResponse { + response: Default::default(), + snapshot: Some(snapshot), + txn_extra_op: ExtraOp::Noop, + }) + } else { + callback.invoke_read(raftstore::store::ReadResponse { + response: resp.clone(), + snapshot: None, + txn_extra_op: ExtraOp::Noop, + }); + } + })); + if let Err(SendError(msg)) = self.router.force_send( + region_id, + PeerMsg::CaptureChange(CaptureChange { + observer, + region_epoch, + snap_cb, + }), + ) { + warn!(self.router.logger(), "failed to send capture change msg"; "msg" => ?msg); + return Err(crate::Error::RegionNotFound(region_id)); + } + Ok(()) } fn check_leadership( &self, - _region_id: u64, - _callback: Callback, + region_id: u64, + callback: Callback, ) -> crate::Result<()> { - unimplemented!() + let (ch, _) = QueryResChannel::with_callback(Box::new(|res| { + let resp = match res { + QueryResult::Read(_) => raftstore::store::ReadResponse { + response: Default::default(), + snapshot: None, + txn_extra_op: ExtraOp::Noop, + }, + QueryResult::Response(resp) => raftstore::store::ReadResponse { + response: resp.clone(), + snapshot: None, + txn_extra_op: ExtraOp::Noop, + }, + }; + callback.invoke_read(resp); + })); + if let Err(SendError(msg)) = self + .router + .force_send(region_id, PeerMsg::LeaderCallback(ch)) + { + warn!(self.router.logger(), "failed to send capture change msg"; "msg" => ?msg); + return Err(crate::Error::RegionNotFound(region_id)); + } + Ok(()) } } diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 764e8df7dfd..6c8d1136b3a 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -3,6 +3,7 @@ use pd_client::{BucketMeta, BucketStat}; use raftstore::store::fsm::ApplyMetrics; +use super::message::CaptureChange; use crate::operation::{AdminCmdResult, CommittedEntries, DataTrace, GenSnapTask}; #[derive(Debug)] @@ -13,6 +14,7 @@ pub enum ApplyTask { UnsafeWrite(Box<[u8]>), ManualFlush, RefreshBucketStat(std::sync::Arc), + CaptureApply(CaptureChange), } #[derive(Debug, Default)] diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 26fbde3644a..43dfab3ba98 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -9,13 +9,16 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, raft_serverpb::RaftMessage, }; -use raftstore::store::{metrics::RaftEventDurationType, FetchedLogs, GenSnapRes}; +use raftstore::store::{ + fsm::ChangeObserver, metrics::RaftEventDurationType, FetchedLogs, GenSnapRes, +}; use resource_control::ResourceMetered; use tikv_util::time::Instant; use super::{ response_channel::{ - CmdResChannel, CmdResSubscriber, DebugInfoChannel, QueryResChannel, QueryResSubscriber, + AnyResChannel, CmdResChannel, CmdResSubscriber, DebugInfoChannel, QueryResChannel, + QueryResSubscriber, }, ApplyRes, }; @@ -131,6 +134,14 @@ pub struct UnsafeWrite { pub data: SimpleWriteBinary, } +#[derive(Debug)] +pub struct CaptureChange { + pub observer: ChangeObserver, + pub region_epoch: RegionEpoch, + // A callback accpets a snapshot. + pub snap_cb: AnyResChannel, +} + /// Message that can be sent to a peer. #[derive(Debug)] pub enum PeerMsg { @@ -220,6 +231,9 @@ pub enum PeerMsg { RedirectCatchUpLogs(CatchUpLogs), // From target [`Peer`] to source [`Peer`]. CatchUpLogs(CatchUpLogs), + /// Capture changes of a region. + CaptureChange(CaptureChange), + LeaderCallback(QueryResChannel), /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 703f38c3516..2d0011c1ef0 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -15,8 +15,8 @@ pub use self::{ internal_message::ApplyRes, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ - BaseSubscriber, CmdResChannel, CmdResChannelBuilder, CmdResEvent, CmdResStream, - CmdResSubscriber, DebugInfoChannel, DebugInfoSubscriber, QueryResChannel, QueryResult, - ReadResponse, + build_any_channel, AnyResChannel, AnyResSubscriber, BaseSubscriber, CmdResChannel, + CmdResChannelBuilder, CmdResEvent, CmdResStream, CmdResSubscriber, DebugInfoChannel, + DebugInfoSubscriber, QueryResChannel, QueryResult, ReadResponse, }, }; diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index f70b6635982..97321aae9d1 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -13,6 +13,7 @@ //! 4. there will be no callback leak. use std::{ + any::Any, cell::UnsafeCell, fmt::{self, Debug, Formatter}, future::Future, @@ -471,6 +472,36 @@ impl CmdResChannelBuilder { } } +pub type AnyResChannel = BaseChannel<(RaftCmdResponse, Option>)>; + +impl Debug for AnyResChannel { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "AnyResChannel") + } +} + +impl ErrorCallback for AnyResChannel { + fn report_error(self, err: RaftCmdResponse) { + self.set_result((err, None)); + } + + fn is_none(&self) -> bool { + false + } +} + +pub type AnyResSubscriber = BaseSubscriber<(RaftCmdResponse, Option>)>; + +pub fn build_any_channel( + f: Box>)) + Send>, +) -> (AnyResChannel, AnyResSubscriber) { + let (c, s) = pair(); + unsafe { + *c.core.before_set.get() = Some(f); + } + (c, s) +} + impl CmdResChannel { // Valid range is [1, 30] const PROPOSED_EVENT: u64 = 1; @@ -585,6 +616,16 @@ impl QueryResChannel { pub fn pair() -> (Self, QueryResSubscriber) { pair() } + + pub fn with_callback( + f: Box, + ) -> (Self, QueryResSubscriber) { + let (c, s) = pair(); + unsafe { + *c.core.before_set.get() = Some(f); + } + (c, s) + } } impl ErrorCallback for QueryResChannel { diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 16a8bacbced..54ca2274162 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3651,14 +3651,14 @@ impl Debug for GenSnapTask { } #[derive(Debug)] -enum ObserverType { +pub enum ObserverType { Cdc(ObserveHandle), Rts(ObserveHandle), Pitr(ObserveHandle), } impl ObserverType { - fn handle(&self) -> &ObserveHandle { + pub fn handle(&self) -> &ObserveHandle { match self { ObserverType::Cdc(h) => h, ObserverType::Rts(h) => h, @@ -3669,8 +3669,8 @@ impl ObserverType { #[derive(Debug)] pub struct ChangeObserver { - ty: ObserverType, - region_id: u64, + pub ty: ObserverType, + pub region_id: u64, } impl ChangeObserver { diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index ffba120056c..6f51c97c0d5 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -17,7 +17,9 @@ pub use self::{ Registration, SwitchWitness, TaskRes as ApplyTaskRes, }, metrics::{GlobalStoreStat, LocalStoreStat}, - peer::{new_admin_request, DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO}, + peer::{ + new_admin_request, new_read_index_request, DestroyPeerJob, PeerFsm, MAX_PROPOSAL_SIZE_RATIO, + }, store::{ create_raft_batch_system, RaftBatchSystem, RaftPollerBuilder, RaftRouter, StoreInfo, StoreMeta, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 935210951f0..c36e9880694 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -507,7 +507,7 @@ where store_id: u64, group_id: u64, }, - /// Capture the changes of the region. + /// Capture changes of a region. CaptureChange { cmd: ChangeObserver, region_epoch: RegionEpoch, From 51b56135ed3be0f827e54564a9e621b34fb45938 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 14 Apr 2023 16:45:01 +0800 Subject: [PATCH 0631/1149] Move simple write into raftstore (#14544) ref tikv/tikv#14575 Move simple write into raftstore Signed-off-by: CalvinNeo --- .../operation/command/admin/merge/prepare.rs | 3 +- .../raftstore-v2/src/operation/command/mod.rs | 17 +++-- .../src/operation/command/write/mod.rs | 8 +-- components/raftstore-v2/src/router/message.rs | 17 ++--- components/raftstore/src/store/mod.rs | 1 + .../src/store}/simple_write.rs | 71 +++++++++++++------ 6 files changed, 77 insertions(+), 40 deletions(-) rename components/{raftstore-v2/src/operation/command/write => raftstore/src/store}/simple_write.rs (89%) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index f031ac5d20e..16a8382cfad 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -52,7 +52,7 @@ use super::merge_source_path; use crate::{ batch::StoreContext, fsm::ApplyResReporter, - operation::{AdminCmdResult, SimpleWriteReqDecoder}, + operation::{command::parse_at, AdminCmdResult, SimpleWriteReqDecoder}, raft::{Apply, Peer}, router::CmdResChannel, }; @@ -248,6 +248,7 @@ impl Peer { continue; } let Err(cmd) = SimpleWriteReqDecoder::new( + |buf, index, term| parse_at(&self.logger, buf, index, term), &self.logger, entry.get_data(), entry.get_index(), diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 9ef5592c64e..b45ad23a1b1 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -26,7 +26,6 @@ use engine_traits::{KvEngine, PerfContext, RaftEngine, WriteBatch, WriteOptions} use kvproto::raft_cmdpb::{ AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, }; -use protobuf::Message; use raft::eraftpb::{ConfChange, ConfChangeV2, Entry, EntryType}; use raft_proto::ConfChangeI; use raftstore::{ @@ -69,9 +68,10 @@ pub use admin::{ }; pub use control::ProposalControl; use pd_client::{BucketMeta, BucketStat}; -pub use write::{ - SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, -}; +use protobuf::Message; +pub use write::{SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder}; +pub type SimpleWriteReqEncoder = + raftstore::store::simple_write::SimpleWriteReqEncoder; use self::write::SimpleWrite; @@ -463,7 +463,13 @@ impl Apply { impl Apply { pub fn apply_unsafe_write(&mut self, data: Box<[u8]>) { - let decoder = match SimpleWriteReqDecoder::new(&self.logger, &data, u64::MAX, u64::MAX) { + let decoder = match SimpleWriteReqDecoder::new( + |buf, index, term| parse_at(&self.logger, buf, index, term), + &self.logger, + &data, + u64::MAX, + u64::MAX, + ) { Ok(decoder) => decoder, Err(req) => unreachable!("unexpected request: {:?}", req), }; @@ -556,6 +562,7 @@ impl Apply { let log_index = entry.get_index(); let req = match entry.get_entry_type() { EntryType::EntryNormal => match SimpleWriteReqDecoder::new( + |buf, index, term| parse_at(&self.logger, buf, index, term), &self.logger, entry.get_data(), log_index, diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index b017a7b0ef7..9f4afec9ad6 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -17,19 +17,17 @@ use tikv_util::slog_panic; use crate::{ batch::StoreContext, fsm::ApplyResReporter, + operation::SimpleWriteReqEncoder, raft::{Apply, Peer}, router::{ApplyTask, CmdResChannel}, }; mod ingest; -mod simple_write; -pub use simple_write::{ - SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, +pub use raftstore::store::simple_write::{ + SimpleWrite, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, }; -pub use self::simple_write::SimpleWrite; - impl Peer { #[inline] pub fn on_simple_write( diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 43dfab3ba98..3f761c74f94 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -10,19 +10,20 @@ use kvproto::{ raft_serverpb::RaftMessage, }; use raftstore::store::{ - fsm::ChangeObserver, metrics::RaftEventDurationType, FetchedLogs, GenSnapRes, + fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, + FetchedLogs, GenSnapRes, }; use resource_control::ResourceMetered; use tikv_util::time::Instant; -use super::{ - response_channel::{ - AnyResChannel, CmdResChannel, CmdResSubscriber, DebugInfoChannel, QueryResChannel, - QueryResSubscriber, - }, - ApplyRes, +use super::response_channel::{ + AnyResChannel, CmdResChannel, CmdResSubscriber, DebugInfoChannel, QueryResChannel, + QueryResSubscriber, +}; +use crate::{ + operation::{CatchUpLogs, RequestHalfSplit, RequestSplit, SplitInit}, + router::ApplyRes, }; -use crate::operation::{CatchUpLogs, RequestHalfSplit, RequestSplit, SimpleWriteBinary, SplitInit}; #[derive(Debug, Clone, Copy, PartialEq, Hash)] #[repr(u8)] diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index fe3c12427bd..c007b622ee1 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -22,6 +22,7 @@ mod hibernate_state; mod peer_storage; mod region_snapshot; mod replication_mode; +pub mod simple_write; pub mod snap; mod txn_ext; mod worker; diff --git a/components/raftstore-v2/src/operation/command/write/simple_write.rs b/components/raftstore/src/store/simple_write.rs similarity index 89% rename from components/raftstore-v2/src/operation/command/write/simple_write.rs rename to components/raftstore/src/store/simple_write.rs index 5f72fa62738..cdae8f18c97 100644 --- a/components/raftstore-v2/src/operation/command/write/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -8,11 +8,10 @@ use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, }; use protobuf::{CodedInputStream, Message}; -use raftstore::store::WriteCallback; use slog::Logger; use tikv_util::slog_panic; -use crate::{operation::command::parse_at, router::CmdResChannel}; +use crate::store::{msg::ErrorCallback, WriteCallback}; // MAGIC number to hint simple write codec is used. If it's a protobuf message, // the first one or several bytes are for field tag, which can't be zero. @@ -38,19 +37,25 @@ impl SimpleWriteBinary { } /// We usually use `RaftCmdRequest` for read write request. But the codec is -/// not efficient enough for simple request. `SimpleWrite` is introduce to make -/// codec alloc less and fast. +/// not efficient enough for simple request. `SimpleWrite` is introduce to +/// make codec alloc less and fast. #[derive(Debug)] -pub struct SimpleWriteReqEncoder { +pub struct SimpleWriteReqEncoder +where + C: ErrorCallback + WriteCallback, +{ header: Box, buf: Vec, - channels: Vec, + channels: Vec, size_limit: usize, write_type: WriteType, notify_proposed: bool, } -impl SimpleWriteReqEncoder { +impl SimpleWriteReqEncoder +where + C: ErrorCallback + WriteCallback, +{ /// Create a request encoder. /// /// If `notify_proposed` is true, channels will be called `notify_proposed` @@ -60,7 +65,7 @@ impl SimpleWriteReqEncoder { bin: SimpleWriteBinary, size_limit: usize, notify_proposed: bool, - ) -> SimpleWriteReqEncoder { + ) -> SimpleWriteReqEncoder { let mut buf = Vec::with_capacity(256); buf.push(MAGIC_PREFIX); header.write_length_delimited_to_vec(&mut buf).unwrap(); @@ -102,12 +107,12 @@ impl SimpleWriteReqEncoder { } #[inline] - pub fn encode(self) -> (Vec, Vec) { + pub fn encode(self) -> (Vec, Vec) { (self.buf, self.channels) } #[inline] - pub fn add_response_channel(&mut self, mut ch: CmdResChannel) { + pub fn add_response_channel(&mut self, mut ch: C) { if self.notify_proposed { ch.notify_proposed(); } @@ -239,6 +244,7 @@ pub struct SimpleWriteReqDecoder<'a> { impl<'a> SimpleWriteReqDecoder<'a> { pub fn new( + fallback: impl FnOnce(&'a [u8], u64, u64) -> RaftCmdRequest, logger: &Logger, buf: &'a [u8], index: u64, @@ -263,7 +269,7 @@ impl<'a> SimpleWriteReqDecoder<'a> { buf: &buf[1 + read as usize..], }) } - _ => Err(parse_at(logger, buf, index, term)), + _ => Err(fallback(buf, index, term)), } } @@ -479,6 +485,11 @@ mod tests { use slog::o; use super::*; + use crate::store::Callback; + + fn decoder_fallback(data: &[u8], index: u64, _: u64) -> RaftCmdRequest { + crate::store::util::parse_data_at(data, index, "") + } #[test] fn test_codec() { @@ -490,18 +501,29 @@ mod tests { let mut header = Box::::default(); header.set_term(2); - let mut req_encoder = SimpleWriteReqEncoder::new(header.clone(), bin, usize::MAX, false); + let mut req_encoder = SimpleWriteReqEncoder::>::new( + header.clone(), + bin, + usize::MAX, + false, + ); let mut encoder = SimpleWriteEncoder::with_capacity(512); encoder.delete_range(CF_LOCK, b"key", b"key", true); encoder.delete_range("cf", b"key", b"key", false); let bin = encoder.encode(); assert!(!req_encoder.amend(&header, &bin)); - let req_encoder2 = SimpleWriteReqEncoder::new(header.clone(), bin, 0, false); + let req_encoder2 = SimpleWriteReqEncoder::>::new( + header.clone(), + bin, + 0, + false, + ); let (bytes, _) = req_encoder.encode(); let logger = slog_global::borrow_global().new(o!()); - let mut decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + let mut decoder = + SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); let SimpleWrite::Put(put) = write else { panic!("should be put") }; @@ -516,7 +538,7 @@ mod tests { assert_matches!(decoder.next(), None); let (bytes, _) = req_encoder2.encode(); - decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; assert_eq!(dr.cf, CF_LOCK); @@ -544,9 +566,12 @@ mod tests { .collect(); encoder.ingest(exp.clone()); let bin = encoder.encode(); - let req_encoder = SimpleWriteReqEncoder::new(header, bin, 0, false); + let req_encoder = SimpleWriteReqEncoder::>::new( + header, bin, 0, false, + ); let (bytes, _) = req_encoder.encode(); - let mut decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + let mut decoder = + SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; assert_eq!(exp, ssts); @@ -589,7 +614,8 @@ mod tests { raft_cmd.mut_requests().push(req); let bytes = raft_cmd.write_to_bytes().unwrap(); let logger = slog_global::borrow_global().new(o!()); - let decoded = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap_err(); + let decoded = + SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap_err(); // SimpleWriteReqDecoder should be able to decode naive RaftCmdRequest. assert_eq!(decoded, raft_cmd); @@ -599,7 +625,8 @@ mod tests { let mut header = Box::::default(); header.set_term(2); - let mut req_encoder = SimpleWriteReqEncoder::new(header.clone(), bin.clone(), 512, false); + let mut req_encoder: SimpleWriteReqEncoder> = + SimpleWriteReqEncoder::new(header.clone(), bin.clone(), 512, false); let mut header2 = Box::::default(); header2.set_term(4); @@ -610,7 +637,8 @@ mod tests { bin2.freeze(); // Frozen bin can't be merged with other bin. assert!(!req_encoder.amend(&header, &bin2)); - let mut req_encoder2 = SimpleWriteReqEncoder::new(header.clone(), bin2.clone(), 512, false); + let mut req_encoder2: SimpleWriteReqEncoder> = + SimpleWriteReqEncoder::new(header.clone(), bin2.clone(), 512, false); assert!(!req_encoder2.amend(&header, &bin)); // Batch should not excceed max size limit. @@ -620,7 +648,8 @@ mod tests { assert!(!req_encoder.amend(&header, &encoder.encode())); let (bytes, _) = req_encoder.encode(); - let mut decoder = SimpleWriteReqDecoder::new(&logger, &bytes, 0, 0).unwrap(); + let mut decoder = + SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); let SimpleWrite::Put(put) = req else { panic!("should be put") }; From a693d6305a87d22c67c661607d9004cceba14e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 17 Apr 2023 10:03:17 +0800 Subject: [PATCH 0632/1149] importer: added grafana for point in time restore. (#14564) close tikv/tikv#14573 Signed-off-by: Yu Juncen Co-authored-by: Ti Chi Robot --- metrics/grafana/tikv_details.json | 1449 ++++++++++++++++++++++++++++- 1 file changed, 1448 insertions(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 184ad7a756b..adb4aa34dcd 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -40951,6 +40951,1453 @@ "title": "Backup & Import", "type": "row" }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 23763573235, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 45 + }, + "hiddenSeries": false, + "id": 23763573350, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "queryType": "randomWalk", + "refId": "A" + }, + { + "hide": false, + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "(AP)apply-99": "#88509f", + "(AP)get_permit-99": "#922870", + "(AP)queuing-99": "#9d0041", + "(DL)exec_download-99": "#73a0fe", + "(DL)queue-99": "#7d78ce", + "exec_download-99": "light-orange", + "get_permit-99": "red", + "queuing-99": "blue", + "total-99": "rgb(252, 252, 252)" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 9, + "x": 6, + "y": 45 + }, + "hiddenSeries": false, + "id": 23763573351, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2473", + "alias": "total-99", + "bars": false, + "fill": 2, + "lines": true, + "linewidth": 0, + "stack": false, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[1m])) by (le, request))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "total-99", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"queue|exec_download\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "(DL){{type}}-99", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "(AP){{type}}-99", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P99 RPC Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2453", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2454", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 9, + "x": 15, + "y": 45 + }, + "hiddenSeries": false, + "id": 23763573352, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (instance, request)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}} :: {{request}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "total - {{request}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Import RPC Ops", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "cps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 52 + }, + "hiddenSeries": false, + "id": 23763573032, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_import_apply_cache_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (instance, type)", + "interval": "", + "legendFormat": "{{instance}} :: {{type}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cache Events", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "cps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573348, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Overall RPC Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573558, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"exec_download\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Read File into Memory Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#37872D", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573229, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queuing\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Queuing Time", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 60 + }, + "hiddenSeries": false, + "id": 23763573349, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tikv_import_apply_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{instance}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Apply Request Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1486", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1487", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 60 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573344, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_download_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Downloaded File Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "decbytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolatePurples", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 60 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573233, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_apply_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Apply Batch Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": null, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "decbytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 60 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573230, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"get_permit\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Blocked by Concurrency Time", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "ops" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 66 + }, + "hiddenSeries": false, + "id": 23763573118, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tikv_import_applier_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"begin_req\"}[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "{{instance}} :: {{type}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Apply Request Speed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2886", + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2887", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "decbytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 68 + }, + "hiddenSeries": false, + "id": 23763573346, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tikv_import_apply_cached_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "interval": "", + "legendFormat": "{{instance}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cached File in Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 68 + }, + "hiddenSeries": false, + "id": 23763573119, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "increase(tikv_import_applier_event{instance=~\"$instance\", type!=\"begin_req\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 3, + "legendFormat": "{{instance}} :: {{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Engine Requests Unfinished", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:304", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:305", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 68 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573231, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Apply Time", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 71 + }, + "hiddenSeries": false, + "id": 23763573449, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Raft Store Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2886", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2887", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Point In Time Restore", + "type": "row" + }, { "collapsed": true, "datasource": null, @@ -44799,7 +46246,7 @@ "h": 1, "w": 24, "x": 0, - "y": 48 + "y": 49 }, "id": 4466, "panels": [ From 58986c7725efda7276bd796e9c04a5c79ead84c9 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Mon, 17 Apr 2023 10:19:17 +0800 Subject: [PATCH 0633/1149] txn: Let commit rollback pessimistic lock instead of committing as WriteType::Lock (#14557) close tikv/tikv#14551 Changes the behavior of `commit` meeting pessimistic lock, from committing as WriteType::Lock to rolling-back. It's correct considering that the key is nolonger part of that transaction. This change fixes the problem that stale pessimistic lock requests with force-locking enabled (which is used by TiDB in fair-locking mode) may overwrite the commit record of another transaction and cause data loss. Signed-off-by: MyonKeminta Co-authored-by: Ti Chi Robot --- src/storage/mvcc/txn.rs | 2 +- src/storage/txn/actions/commit.rs | 70 ++++++++++++++++++++++++------- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index f395b07e7f8..d5e55e251ae 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -1238,7 +1238,7 @@ pub(crate) mod tests { must_acquire_pessimistic_lock(&mut engine, k, k, 10, 10); must_commit_err(&mut engine, k, 20, 30); must_commit(&mut engine, k, 10, 20); - must_seek_write(&mut engine, k, 30, 10, 20, WriteType::Lock); + must_seek_write_none(&mut engine, k, 30); } #[test] diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index bfb1d39f768..8259991dde6 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -21,8 +21,8 @@ pub fn commit( crate::storage::mvcc::txn::make_txn_error(err, &key, reader.start_ts,).into() )); - let mut lock = match reader.load_lock(&key)? { - Some(mut lock) if lock.ts == reader.start_ts => { + let (mut lock, commit) = match reader.load_lock(&key)? { + Some(lock) if lock.ts == reader.start_ts => { // A lock with larger min_commit_ts than current commit_ts can't be committed if commit_ts < lock.min_commit_ts { info!( @@ -43,20 +43,21 @@ pub fn commit( // It's an abnormal routine since pessimistic locks shouldn't be committed in // our transaction model. But a pessimistic lock will be left if the pessimistic - // rollback request fails to send and the transaction need not to acquire this - // lock again(due to WriteConflict). If the transaction is committed, we should - // commit this pessimistic lock too. + // rollback request fails to send or TiKV receives duplicated stale pessimistic + // lock request, and the transaction need not to acquire this lock again(due to + // WriteConflict). If the transaction is committed, we should remove the + // pessimistic lock (like pessimistic_rollback) instead of committing. if lock.lock_type == LockType::Pessimistic { warn!( - "commit a pessimistic lock with Lock type"; + "rollback a pessimistic lock when trying to commit"; "key" => %key, "start_ts" => reader.start_ts, "commit_ts" => commit_ts, ); - // Commit with WriteType::Lock. - lock.lock_type = LockType::Lock; + (lock, false) + } else { + (lock, true) } - lock } _ => { return match reader.get_txn_commit_record(&key)?.info() { @@ -87,6 +88,14 @@ pub fn commit( }; } }; + + if !commit { + // Rollback a stale pessimistic lock. This function must be called by + // resolve-lock in this case. + assert_eq!(lock.lock_type, LockType::Pessimistic); + return Ok(txn.unlock_key(key, lock.is_pessimistic_txn(), TimeStamp::zero())); + } + let mut write = Write::new( WriteType::from_lock_type(lock.lock_type).unwrap(), reader.start_ts, @@ -123,7 +132,10 @@ pub mod tests { }; #[cfg(test)] use crate::storage::{ - mvcc::SHORT_VALUE_MAX_LEN, txn::commands::check_txn_status, TestEngineBuilder, TxnStatus, + mvcc::SHORT_VALUE_MAX_LEN, + txn::commands::check_txn_status, + txn::tests::{must_acquire_pessimistic_lock, must_pessimistic_prewrite_put}, + TestEngineBuilder, TxnStatus, }; use crate::storage::{ mvcc::{tests::*, MvccTxn}, @@ -135,8 +147,8 @@ pub mod tests { key: &[u8], start_ts: impl Into, commit_ts: impl Into, - ) { - must_succeed_impl(engine, key, start_ts, commit_ts, None); + ) -> Option { + must_succeed_impl(engine, key, start_ts, commit_ts, None) } pub fn must_succeed_on_region( @@ -145,8 +157,8 @@ pub mod tests { key: &[u8], start_ts: impl Into, commit_ts: impl Into, - ) { - must_succeed_impl(engine, key, start_ts, commit_ts, Some(region_id)); + ) -> Option { + must_succeed_impl(engine, key, start_ts, commit_ts, Some(region_id)) } fn must_succeed_impl( @@ -155,7 +167,7 @@ pub mod tests { start_ts: impl Into, commit_ts: impl Into, region_id: Option, - ) { + ) -> Option { let mut ctx = Context::default(); if let Some(region_id) = region_id { ctx.region_id = region_id; @@ -169,8 +181,9 @@ pub mod tests { let cm = ConcurrencyManager::new(start_ts); let mut txn = MvccTxn::new(start_ts, cm); let mut reader = SnapshotReader::new(start_ts, snapshot, true); - commit(&mut txn, &mut reader, Key::from_raw(key), commit_ts.into()).unwrap(); + let res = commit(&mut txn, &mut reader, Key::from_raw(key), commit_ts.into()).unwrap(); write(engine, &ctx, txn.into_modifies()); + res } pub fn must_err( @@ -368,4 +381,29 @@ pub mod tests { assert_eq!(write.txn_source, source); } } + + #[test] + fn test_commit_rollback_pessimistic_lock() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + let k1 = b"k1"; + let k2 = b"k2"; + + must_acquire_pessimistic_lock(&mut engine, k1, k1, 10, 10); + must_acquire_pessimistic_lock(&mut engine, k2, k1, 10, 10); + must_pessimistic_prewrite_put(&mut engine, k1, b"v1", k1, 10, 10, DoPessimisticCheck); + let res = must_succeed(&mut engine, k1, 10, 20).unwrap(); + assert_eq!(res.key, Key::from_raw(k1)); + assert_eq!(res.start_ts, 10.into()); + assert_eq!(res.commit_ts, 20.into()); + + let res = must_succeed(&mut engine, k2, 10, 20).unwrap(); + assert_eq!(res.key, Key::from_raw(k2)); + assert_eq!(res.start_ts, 10.into()); + assert_eq!(res.commit_ts, 0.into()); + + must_written(&mut engine, k1, 10, 20, WriteType::Put); + must_not_have_write(&mut engine, k2, 20); + must_not_have_write(&mut engine, k2, 10); + } } From 3bf312166e3e47d5c0755a0ba141aa252df5b7ff Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 17 Apr 2023 13:31:18 +0800 Subject: [PATCH 0634/1149] raftstore: enable v1 to receive snapshot from v2 (#14559) ref tikv/tikv#14579 enable v1 to receive snapshot from v2 Signed-off-by: Spade A Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/mod.rs | 4 +- .../raftstore/src/store/peer_storage.rs | 4 + components/raftstore/src/store/snap.rs | 50 +++++++- components/raftstore/src/store/worker/mod.rs | 2 + .../raftstore/src/store/worker/region.rs | 5 +- components/test_raftstore-v2/src/cluster.rs | 11 +- components/test_raftstore-v2/src/node.rs | 9 +- components/test_raftstore-v2/src/server.rs | 12 ++ components/test_raftstore/src/server.rs | 4 + src/server/server.rs | 11 +- src/server/snap.rs | 87 ++++++++++--- src/server/tablet_snap.rs | 20 +-- tests/integrations/config/dynamic/snap.rs | 1 + tests/integrations/raftstore/test_snap.rs | 120 +++++++++++++++++- 14 files changed, 296 insertions(+), 44 deletions(-) diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index c007b622ee1..ed97c58ab86 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -83,7 +83,7 @@ pub use self::{ LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, - StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, - NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, ENGINE, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, TIFLASH, }, }; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 6ac38b60dfe..d89eafc3a46 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1629,6 +1629,7 @@ pub mod tests { let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); + mgr.init().unwrap(); let mut worker = Worker::new("region-worker").lazy_build("region-worker"); let sched = worker.scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); @@ -1765,6 +1766,7 @@ pub mod tests { let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mut mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); + mgr.init().unwrap(); mgr.set_enable_multi_snapshot_files(true); mgr.set_max_per_file_size(500); let mut worker = Worker::new("region-worker").lazy_build("region-worker"); @@ -1836,6 +1838,7 @@ pub mod tests { let td = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); + mgr.init().unwrap(); let mut worker = Worker::new("region-worker").lazy_build("region-worker"); let sched = worker.scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); @@ -1915,6 +1918,7 @@ pub mod tests { let td1 = Builder::new().prefix("tikv-store-test").tempdir().unwrap(); let snap_dir = Builder::new().prefix("snap").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); + mgr.init().unwrap(); let mut worker = LazyWorker::new("snap-manager"); let sched = worker.scheduler(); let (dummy_scheduler, _) = dummy_scheduler(); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index d0c55c144ed..bdf96126dd2 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1387,6 +1387,9 @@ struct SnapManagerCore { pub struct SnapManager { core: SnapManagerCore, max_total_size: Arc, + + // only used to receive snapshot from v2 + tablet_snap_manager: TabletSnapManager, } impl Clone for SnapManager { @@ -1394,6 +1397,7 @@ impl Clone for SnapManager { SnapManager { core: self.core.clone(), max_total_size: self.max_total_size.clone(), + tablet_snap_manager: self.tablet_snap_manager.clone(), } } } @@ -1433,6 +1437,8 @@ impl SnapManager { } } } + + self.tablet_snap_manager.init()?; Ok(()) } @@ -1620,7 +1626,9 @@ impl SnapManager { /// /// NOTE: don't call it in raftstore thread. pub fn get_total_snap_size(&self) -> Result { - self.core.get_total_snap_size() + let size_v1 = self.core.get_total_snap_size()?; + let size_v2 = self.tablet_snap_manager.total_snap_size()?; + Ok(size_v1 + size_v2) } pub fn max_total_snap_size(&self) -> u64 { @@ -1755,6 +1763,14 @@ impl SnapManager { pub fn delete_snapshot(&self, key: &SnapKey, snap: &Snapshot, check_entry: bool) -> bool { self.core.delete_snapshot(key, snap, check_entry) } + + pub fn tablet_snap_manager(&self) -> &TabletSnapManager { + &self.tablet_snap_manager + } + + pub fn limiter(&self) -> &Limiter { + &self.core.limiter + } } impl SnapManagerCore { @@ -1896,9 +1912,14 @@ impl SnapManagerBuilder { } else { u64::MAX }; + let path = path.into(); + let mut path_v2 = path.clone(); + // the path for tablet snap manager, it will be empty if the cluster is not + // to receive snapshot from cluster of raftstore-v2 + path_v2.push_str("_v2"); let mut snapshot = SnapManager { core: SnapManagerCore { - base: path.into(), + base: path, registry: Default::default(), limiter, temp_sst_id: Arc::new(AtomicU64::new(0)), @@ -1910,6 +1931,7 @@ impl SnapManagerBuilder { stats: Default::default(), }, max_total_size: Arc::new(AtomicU64::new(max_total_size)), + tablet_snap_manager: TabletSnapManager::new_without_init(&path_v2), }; snapshot.set_max_per_file_size(self.max_per_file_size); // set actual max_per_file_size snapshot @@ -1999,6 +2021,29 @@ impl TabletSnapManager { }) } + pub fn new_without_init>(path: T) -> Self { + let path = path.into(); + Self { + base: path, + receiving: Arc::default(), + stats: Arc::default(), + } + } + + pub fn init(&self) -> io::Result<()> { + if !self.base.exists() { + file_system::create_dir_all(&self.base)?; + } + if !self.base.is_dir() { + return Err(io::Error::new( + ErrorKind::Other, + format!("{} should be a directory", self.base.display()), + )); + } + file_system::clean_up_trash(&self.base)?; + Ok(()) + } + pub fn begin_snapshot(&self, key: TabletSnapKey, start: Instant, generate_duration_sec: u64) { let mut stat = SnapshotStat::default(); stat.set_generate_duration_sec(generate_duration_sec); @@ -3015,6 +3060,7 @@ pub mod tests { let snap_mgr = SnapManagerBuilder::default() .max_total_size(max_total_size) .build::<_>(snapfiles_path.path().to_str().unwrap()); + snap_mgr.init().unwrap(); let snapshot = engine.kv.snapshot(); // Add an oldest snapshot for receiving. diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index ac23f4e58d5..eddcfe1757a 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -16,6 +16,8 @@ mod split_check; mod split_config; mod split_controller; +pub use region::{ENGINE, TIFLASH}; + #[cfg(test)] pub use self::region::tests::make_raftstore_cfg as make_region_worker_raftstore_cfg; pub use self::{ diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 7dc894204ec..d6d9d0272d3 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -52,8 +52,8 @@ use crate::{ const CLEANUP_MAX_REGION_COUNT: usize = 64; -const TIFLASH: &str = "tiflash"; -const ENGINE: &str = "engine"; +pub const TIFLASH: &str = "tiflash"; +pub const ENGINE: &str = "engine"; /// Region related task #[derive(Debug)] @@ -1143,6 +1143,7 @@ pub(crate) mod tests { let snap_dir = Builder::new().prefix("snap_dir").tempdir().unwrap(); let mgr = SnapManager::new(snap_dir.path().to_str().unwrap()); + mgr.init().unwrap(); let bg_worker = Worker::new("snap-manager"); let mut worker = bg_worker.lazy_build("snap-manager"); let sched = worker.scheduler(); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 015062534e4..eafa7a45403 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -36,7 +36,7 @@ use pd_client::PdClient; use raftstore::{ store::{ cmd_resp, initial_region, util::check_key_in_region, Bucket, BucketRange, Callback, - RegionSnapshot, WriteResponse, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, + RegionSnapshot, TabletSnapManager, WriteResponse, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, }, Error, Result, }; @@ -97,6 +97,7 @@ pub trait Simulator { fn get_router(&self, node_id: u64) -> Option>; fn get_snap_dir(&self, node_id: u64) -> String; + fn get_snap_mgr(&self, node_id: u64) -> &TabletSnapManager; fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()>; fn read(&mut self, request: RaftCmdRequest, timeout: Duration) -> Result { @@ -1530,6 +1531,10 @@ impl, EK: KvEngine> Cluster { self.sim.rl().get_snap_dir(node_id) } + pub fn get_snap_mgr(&self, node_id: u64) -> TabletSnapManager { + self.sim.rl().get_snap_mgr(node_id).clone() + } + pub fn get_router(&self, node_id: u64) -> Option> { self.sim.rl().get_router(node_id) } @@ -1696,6 +1701,10 @@ impl WrapFactory { let region_id = self.region_id_of_key(key); self.tablet_registry.get(region_id)?.latest().cloned() } + + pub fn get_tablet_by_id(&self, id: u64) -> Option { + self.tablet_registry.get(id)?.latest().cloned() + } } impl Peekable for WrapFactory { diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 0e96d976449..c770a6144bd 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -150,7 +150,7 @@ pub struct NodeCluster { nodes: HashMap>, simulate_trans: HashMap>, concurrency_managers: HashMap, - // snap_mgrs: HashMap, + snap_mgrs: HashMap, } impl NodeCluster { @@ -161,7 +161,7 @@ impl NodeCluster { nodes: HashMap::default(), simulate_trans: HashMap::default(), concurrency_managers: HashMap::default(), - // snap_mgrs: HashMap::default(), + snap_mgrs: HashMap::default(), } } } @@ -237,6 +237,7 @@ impl Simulator for NodeCluster { let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; + self.snap_mgrs.insert(node_id, snap_mgr.clone()); let raft_router = RaftRouter::new_with_store_meta(node.router().clone(), store_meta); // Create coprocessor. @@ -421,6 +422,10 @@ impl Simulator for NodeCluster { .to_owned() } + fn get_snap_mgr(&self, node_id: u64) -> &TabletSnapManager { + self.snap_mgrs.get(&node_id).unwrap() + } + fn add_recv_filter(&mut self, node_id: u64, filter: Box) { let mut trans = self.trans.core.lock().unwrap(); trans.routers.get_mut(&node_id).unwrap().add_filter(filter); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 804a5e4a22f..3de9e5aa956 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -805,6 +805,10 @@ impl Simulator for ServerCluster { .unwrap() .to_owned() } + + fn get_snap_mgr(&self, node_id: u64) -> &TabletSnapManager { + self.snap_mgrs.get(&node_id).unwrap() + } } impl Cluster, EK> { @@ -833,6 +837,14 @@ impl Cluster, EK> { } panic!("failed to get snapshot of region {}", region_id); } + + pub fn get_addr(&self, node_id: u64) -> String { + self.sim.rl().get_addr(node_id) + } + + pub fn get_security_mgr(&self) -> Arc { + self.sim.rl().security_mgr.clone() + } } pub fn new_server_cluster( diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 967ae4b980c..a77fc5d3dd2 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -809,6 +809,10 @@ impl Cluster { pub fn raft_extension(&self, node_id: u64) -> SimulateRaftExtension { self.sim.rl().storages[&node_id].raft_extension() } + + pub fn get_addr(&self, node_id: u64) -> String { + self.sim.rl().get_addr(node_id) + } } pub fn new_server_cluster(id: u64, count: usize) -> Cluster { diff --git a/src/server/server.rs b/src/server/server.rs index 15de7f0d4e7..8e1a33880d6 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -13,7 +13,7 @@ use futures::{compat::Stream01CompatExt, stream::StreamExt}; use grpcio::{ChannelBuilder, Environment, ResourceQuota, Server as GrpcServer, ServerBuilder}; use grpcio_health::{create_health, HealthService, ServingStatus}; use kvproto::tikvpb::*; -use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager}; +use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager, ENGINE, TIFLASH}; use security::SecurityManager; use tikv_util::{ config::VersionTrack, @@ -70,6 +70,7 @@ pub struct Server { // For sending/receiving snapshots. snap_mgr: Either, snap_worker: LazyWorker, + tiflash_engine: bool, // Currently load statistics is done in the thread. stats_pool: Option, @@ -178,6 +179,12 @@ where let trans = ServerTransport::new(raft_client); health_service.set_serving_status("", ServingStatus::NotServing); + let tiflash_engine = cfg + .value() + .labels + .iter() + .any(|entry| entry.0 == ENGINE && entry.1 == TIFLASH); + let svr = Server { env: Arc::clone(&env), builder_or_server: Some(builder), @@ -193,6 +200,7 @@ where debug_thread_pool, health_service, timer: GLOBAL_TIMER_HANDLE.clone(), + tiflash_engine, }; Ok(svr) @@ -262,6 +270,7 @@ where self.raft_router.clone(), security_mgr, cfg, + self.tiflash_engine, ); self.snap_worker.start(snap_runner); } diff --git a/src/server/snap.rs b/src/server/snap.rs index d06e49ab7a8..0512a75214a 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -13,11 +13,12 @@ use std::{ use file_system::{IoType, WithIoType}; use futures::{ - future::{Future, FutureExt, TryFutureExt}, + future::{Future, TryFutureExt}, sink::SinkExt, stream::{Stream, StreamExt, TryStreamExt}, task::{Context, Poll}, }; +use futures_util::FutureExt; use grpcio::{ ChannelBuilder, ClientStreamingSink, DuplexSink, Environment, RequestStream, RpcStatus, RpcStatusCode, WriteFlags, @@ -43,7 +44,7 @@ use tikv_util::{ use tokio::runtime::{Builder as RuntimeBuilder, Runtime}; use super::{metrics::*, Config, Error, Result}; -use crate::tikv_util::sys::thread::ThreadBuildWrapper; +use crate::{server::tablet_snap::NoSnapshotCache, tikv_util::sys::thread::ThreadBuildWrapper}; pub type Callback = Box) + Send>; @@ -342,7 +343,6 @@ fn recv_snap( } context.finish(raft_router) }; - async move { match recv_task.await { Ok(()) => sink.success(Done::default()).await.map_err(Error::from), @@ -364,17 +364,23 @@ pub struct Runner { cfg: Config, sending_count: Arc, recving_count: Arc, + can_receive_tablet_snapshot: bool, } impl Runner { + // `can_receive_tablet_snapshot` being true means we are using tiflash engine + // within a raft group with raftstore-v2. It is set be true to enable runner + // to receive tablet snapshot from v2. pub fn new( env: Arc, snap_mgr: SnapManager, r: R, security_mgr: Arc, cfg: Arc>, + can_receive_tablet_snapshot: bool, ) -> Self { let cfg_tracker = cfg.clone().tracker("snap-sender".to_owned()); + let config = cfg.value().clone(); let snap_worker = Runner { env, snap_mgr, @@ -388,9 +394,10 @@ impl Runner { raft_router: r, security_mgr, cfg_tracker, - cfg: cfg.value().clone(), + cfg: config, sending_count: Arc::new(AtomicUsize::new(0)), recving_count: Arc::new(AtomicUsize::new(0)), + can_receive_tablet_snapshot, }; snap_worker } @@ -415,6 +422,22 @@ impl Runner { self.cfg = incoming.clone(); } } + + fn receiving_busy(&self) -> Option { + let task_num = self.recving_count.load(Ordering::SeqCst); + if task_num >= self.cfg.concurrent_recv_snap_limit { + warn!("too many recving snapshot tasks, ignore"); + return Some(RpcStatus::with_message( + RpcStatusCode::RESOURCE_EXHAUSTED, + format!( + "the number of received snapshot tasks {} exceeded the limitation {}", + task_num, self.cfg.concurrent_recv_snap_limit + ), + )); + } + + None + } } impl Runnable for Runner { @@ -423,19 +446,11 @@ impl Runnable for Runner { fn run(&mut self, task: Task) { match task { Task::Recv { stream, sink } => { - let task_num = self.recving_count.load(Ordering::SeqCst); - if task_num >= self.cfg.concurrent_recv_snap_limit { - warn!("too many recving snapshot tasks, ignore"); - let status = RpcStatus::with_message( - RpcStatusCode::RESOURCE_EXHAUSTED, - format!( - "the number of received snapshot tasks {} exceeded the limitation {}", - task_num, self.cfg.concurrent_recv_snap_limit - ), - ); + if let Some(status) = self.receiving_busy() { self.pool.spawn(sink.fail(status)); return; } + SNAP_TASK_COUNTER_STATIC.recv.inc(); let snap_mgr = self.snap_mgr.clone(); @@ -451,12 +466,44 @@ impl Runnable for Runner { }; self.pool.spawn(task); } - Task::RecvTablet { sink, .. } => { - let status = RpcStatus::with_message( - RpcStatusCode::UNIMPLEMENTED, - "tablet snap is not supported".to_string(), - ); - self.pool.spawn(sink.fail(status).map(|_| ())); + Task::RecvTablet { stream, sink } => { + if !self.can_receive_tablet_snapshot { + let status = RpcStatus::with_message( + RpcStatusCode::UNIMPLEMENTED, + "tablet snap is not supported".to_string(), + ); + self.pool.spawn(sink.fail(status).map(|_| ())); + return; + } + + if let Some(status) = self.receiving_busy() { + self.pool.spawn(sink.fail(status)); + return; + } + + SNAP_TASK_COUNTER_STATIC.recv.inc(); + + let snap_mgr = self.snap_mgr.tablet_snap_manager().clone(); + let raft_router = self.raft_router.clone(); + let recving_count = self.recving_count.clone(); + recving_count.fetch_add(1, Ordering::SeqCst); + let limiter = self.snap_mgr.limiter().clone(); + let task = async move { + let result = crate::server::tablet_snap::recv_snap( + stream, + sink, + snap_mgr, + raft_router, + NoSnapshotCache, // do not use cache in v1 + limiter, + ) + .await; + recving_count.fetch_sub(1, Ordering::SeqCst); + if let Err(e) = result { + error!("failed to recv snapshot"; "err" => %e); + } + }; + self.pool.spawn(task); } Task::Send { addr, msg, cb } => { fail_point!("send_snapshot"); diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 4524b8645ff..cbcd1a228f8 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -123,7 +123,7 @@ impl SnapCacheBuilder for NoSnapshotCache { } } -struct RecvTabletSnapContext<'a> { +pub(crate) struct RecvTabletSnapContext<'a> { key: TabletSnapKey, raft_msg: RaftMessage, use_cache: bool, @@ -134,7 +134,7 @@ struct RecvTabletSnapContext<'a> { } impl<'a> RecvTabletSnapContext<'a> { - fn new(mut head: TabletSnapshotRequest, mgr: &'a TabletSnapManager) -> Result { + pub(crate) fn new(mut head: TabletSnapshotRequest, mgr: &'a TabletSnapManager) -> Result { if !head.has_head() { return Err(box_err!("no raft message in the first chunk")); } @@ -161,7 +161,7 @@ impl<'a> RecvTabletSnapContext<'a> { }) } - fn finish(self, raft_router: R) -> Result<()> { + pub fn finish(self, raft_router: R) -> Result<()> { let key = self.key; raft_router.feed(self.raft_msg, true); info!("saving all snapshot files"; "snap_key" => %key, "takes" => ?self.start.saturating_elapsed()); @@ -169,7 +169,7 @@ impl<'a> RecvTabletSnapContext<'a> { } } -fn io_type_from_raft_message(msg: &RaftMessage) -> Result { +pub(crate) fn io_type_from_raft_message(msg: &RaftMessage) -> Result { let snapshot = msg.get_message().get_snapshot(); let data = snapshot.get_data(); let mut snapshot_data = RaftSnapshotData::default(); @@ -194,7 +194,7 @@ fn protocol_error(exp: &str, act: impl Debug) -> Error { /// actual data of an SST; /// 3. The last `PREVIEW_CHUNK_LEN` bytes are the same, this contains checksum, /// properties and other medata of an SST. -async fn is_sst_match_preview( +pub(crate) async fn is_sst_match_preview( preview_meta: &TabletSnapshotFileMeta, target: &Path, buffer: &mut Vec, @@ -233,7 +233,7 @@ async fn is_sst_match_preview( Ok(*buffer == preview_meta.trailing_chunk) } -async fn cleanup_cache( +pub(crate) async fn cleanup_cache( path: &Path, stream: &mut (impl Stream> + Unpin), sink: &mut (impl Sink<(TabletSnapshotResponse, WriteFlags), Error = grpcio::Error> + Unpin), @@ -291,7 +291,7 @@ async fn cleanup_cache( Ok((reused, missing)) } -async fn accept_one_file( +pub(crate) async fn accept_one_file( path: &Path, mut chunk: TabletSnapshotFileChunk, stream: &mut (impl Stream> + Unpin), @@ -334,7 +334,7 @@ async fn accept_one_file( } } -async fn accept_missing( +pub(crate) async fn accept_missing( path: &Path, missing_ssts: Vec, stream: &mut (impl Stream> + Unpin), @@ -380,7 +380,7 @@ async fn accept_missing( } } -async fn recv_snap_files<'a>( +pub(crate) async fn recv_snap_files<'a>( snap_mgr: &'a TabletSnapManager, cache_builder: impl SnapCacheBuilder, mut stream: impl Stream> + Unpin, @@ -426,7 +426,7 @@ async fn recv_snap_files<'a>( Ok(context) } -async fn recv_snap( +pub(crate) async fn recv_snap( stream: RequestStream, sink: DuplexSink, snap_mgr: TabletSnapManager, diff --git a/tests/integrations/config/dynamic/snap.rs b/tests/integrations/config/dynamic/snap.rs index bb91d0d62eb..fa1d6a6fe52 100644 --- a/tests/integrations/config/dynamic/snap.rs +++ b/tests/integrations/config/dynamic/snap.rs @@ -65,6 +65,7 @@ fn start_server( RaftRouterWrap::new(raft_router), security_mgr, Arc::clone(&server_config), + false, ); snap_worker.start(snap_runner); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index fc0364c13b0..f3bd7583ab3 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -11,21 +11,26 @@ use std::{ time::Duration, }; -use engine_traits::{KvEngine, RaftEngineReadOnly}; +use engine_rocks::{RocksCfOptions, RocksDbOptions}; +use engine_traits::{Checkpointer, KvEngine, Peekable, RaftEngineReadOnly, SyncMutable, LARGE_CFS}; use file_system::{IoOp, IoType}; use futures::executor::block_on; use grpcio::Environment; use kvproto::raft_serverpb::*; use raft::eraftpb::{Message, MessageType, Snapshot}; -use raftstore::{store::*, Result}; +use raftstore::{ + store::{snap::TABLET_SNAPSHOT_VERSION, *}, + Result, +}; use rand::Rng; use security::SecurityManager; use test_raftstore::*; use test_raftstore_macro::test_case; -use tikv::server::snap::send_snap; +use test_raftstore_v2::WrapFactory; +use tikv::server::{snap::send_snap, tablet_snap::send_snap as send_snap_v2}; use tikv_util::{ config::*, - time::{Instant, UnixSecs}, + time::{Instant, Limiter, UnixSecs}, HandyRwLock, }; @@ -733,3 +738,110 @@ fn test_snapshot_clean_up_logs_with_log_gc() { // No new log is proposed, so there should be no log at all. assert!(dest.is_empty(), "{:?}", dest); } + +fn generate_snap( + engine: &WrapFactory, + region_id: u64, + snap_mgr: &TabletSnapManager, +) -> (RaftMessage, TabletSnapKey) { + let tablet = engine.get_tablet_by_id(region_id).unwrap(); + let region_state = engine.region_local_state(region_id).unwrap().unwrap(); + let apply_state = engine.raft_apply_state(region_id).unwrap().unwrap(); + + // Construct snapshot by hand + let mut snapshot = Snapshot::default(); + snapshot.mut_metadata().set_term(apply_state.commit_term); + snapshot.mut_metadata().set_index(apply_state.applied_index); + let conf_state = raftstore::store::util::conf_state_from_region(region_state.get_region()); + snapshot.mut_metadata().set_conf_state(conf_state); + + let mut snap_data = RaftSnapshotData::default(); + snap_data.set_region(region_state.get_region().clone()); + snap_data.set_version(TABLET_SNAPSHOT_VERSION); + use protobuf::Message; + snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); + let snap_key = TabletSnapKey::from_region_snap(region_id, 1, &snapshot); + let checkpointer_path = snap_mgr.tablet_gen_path(&snap_key); + let mut checkpointer = tablet.new_checkpointer().unwrap(); + checkpointer + .create_at(checkpointer_path.as_path(), None, 0) + .unwrap(); + + let mut msg = RaftMessage::default(); + msg.region_id = region_id; + msg.set_to_peer(new_peer(1, 1)); + msg.mut_message().set_snapshot(snapshot); + msg.mut_message().set_msg_type(MessageType::MsgSnapshot); + msg.set_region_epoch(region_state.get_region().get_region_epoch().clone()); + + (msg, snap_key) +} + +#[test] +fn test_v1_receive_snap_from_v2() { + let test_receive_snap = |key_num| { + let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); + let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); + + cluster_v1 + .cfg + .server + .labels + .insert(String::from("engine"), String::from("tiflash")); + + cluster_v1.run(); + cluster_v2.run(); + + let s1_addr = cluster_v1.get_addr(1); + let region = cluster_v2.get_region(b""); + let region_id = region.get_id(); + let engine = cluster_v2.get_engine(1); + let tablet = engine.get_tablet_by_id(region_id).unwrap(); + + for i in 0..key_num { + let k = format!("zk{:04}", i); + tablet.put(k.as_bytes(), &random_long_vec(1024)).unwrap(); + } + + let snap_mgr = cluster_v2.get_snap_mgr(1); + let security_mgr = cluster_v2.get_security_mgr(); + let (msg, snap_key) = generate_snap(&engine, region_id, &snap_mgr); + let cfg = tikv::server::Config::default(); + let limit = Limiter::new(f64::INFINITY); + let env = Arc::new(Environment::new(1)); + let _ = block_on(async { + send_snap_v2(env, snap_mgr, security_mgr, &cfg, &s1_addr, msg, limit) + .unwrap() + .await + }); + + // The snapshot has been received by cluster v1, so check it's completeness + let snap_mgr = cluster_v1.get_snap_mgr(1); + let path = snap_mgr.tablet_snap_manager().final_recv_path(&snap_key); + let rocksdb = engine_rocks::util::new_engine_opt( + path.as_path().to_str().unwrap(), + RocksDbOptions::default(), + LARGE_CFS + .iter() + .map(|&cf| (cf, RocksCfOptions::default())) + .collect(), + ) + .unwrap(); + + for i in 0..key_num { + let k = format!("zk{:04}", i); + assert!( + rocksdb + .get_value_cf("default", k.as_bytes()) + .unwrap() + .is_some() + ); + } + }; + + // test small snapshot + test_receive_snap(20); + + // test large snapshot + test_receive_snap(5000); +} From 90477057e5b396de1593e8fb6d738469d9f987b4 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:13:18 +0800 Subject: [PATCH 0635/1149] raftstore: fix snap manager init (#14591) ref tikv/tikv#14579 fix snap manager init Signed-off-by: Spade A --- components/raftstore/src/store/snap.rs | 51 +++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index bdf96126dd2..091609cf63e 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1408,6 +1408,12 @@ impl SnapManager { } pub fn init(&self) -> io::Result<()> { + self.init_core()?; + self.tablet_snap_manager.init()?; + Ok(()) + } + + fn init_core(&self) -> io::Result<()> { let enc_enabled = self.core.encryption_key_manager.is_some(); info!( "Initializing SnapManager, encryption is enabled: {}", @@ -1438,7 +1444,6 @@ impl SnapManager { } } - self.tablet_snap_manager.init()?; Ok(()) } @@ -2163,7 +2168,7 @@ impl TabletSnapManager { #[cfg(test)] pub mod tests { use std::{ - cmp, + cmp, fs, io::{self, Read, Seek, SeekFrom, Write}, path::{Path, PathBuf}, sync::{ @@ -3197,4 +3202,46 @@ pub mod tests { assert!(snap_mgr.delete_snapshot(&key, &s1, false)); } } + + #[test] + fn test_init() { + let builder = SnapManagerBuilder::default(); + let snap_dir = Builder::new() + .prefix("test_snap_path_does_not_exist") + .tempdir() + .unwrap(); + let path = snap_dir.path().join("snap"); + let snap_mgr = builder.build(path.as_path().to_str().unwrap()); + snap_mgr.init().unwrap(); + + assert!(path.exists()); + let mut path = path.as_path().to_str().unwrap().to_string(); + path.push_str("_v2"); + assert!(Path::new(&path).exists()); + + let builder = SnapManagerBuilder::default(); + let snap_dir = Builder::new() + .prefix("test_snap_path_exist") + .tempdir() + .unwrap(); + let path = snap_dir.path(); + let snap_mgr = builder.build(path.to_str().unwrap()); + snap_mgr.init().unwrap(); + + let mut path = path.to_str().unwrap().to_string(); + path.push_str("_v2"); + assert!(Path::new(&path).exists()); + + let builder = SnapManagerBuilder::default(); + let snap_dir = Builder::new() + .prefix("test_tablet_snap_path_exist") + .tempdir() + .unwrap(); + let path = snap_dir.path().join("snap/v2"); + fs::create_dir_all(path).unwrap(); + let path = snap_dir.path().join("snap"); + let snap_mgr = builder.build(path.to_str().unwrap()); + snap_mgr.init().unwrap(); + assert!(path.exists()); + } } From dee46499a6f288dc8222817fc1755aa4d667cefa Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 18 Apr 2023 13:51:19 +0800 Subject: [PATCH 0636/1149] *: support observe apply in raftstore v2 (#14562) ref tikv/tikv#14542 *: support observe apply in raftstore v2 Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- components/cdc/src/endpoint.rs | 6 +- components/cdc/tests/mod.rs | 1 + components/raftstore-v2/src/fsm/apply.rs | 7 ++- .../src/operation/command/admin/split.rs | 7 ++- .../raftstore-v2/src/operation/command/mod.rs | 35 ++++++++--- .../src/operation/query/capture.rs | 61 +++++++++++++++---- components/raftstore-v2/src/raft/apply.rs | 33 ++++++++-- components/raftstore-v2/src/raft/storage.rs | 15 +++-- components/raftstore/src/coprocessor/mod.rs | 10 ++- .../raftstore/src/store/region_snapshot.rs | 5 ++ .../raftstore/src/store/simple_write.rs | 55 ++++++++++++++++- components/server/src/server.rs | 2 + components/server/src/server2.rs | 42 +++++++------ src/config/mod.rs | 12 +++- 14 files changed, 234 insertions(+), 57 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 68650130211..dfeb4f78045 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -333,6 +333,7 @@ pub struct Endpoint { /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, + raftstore_v2: bool, config: CdcConfig, api_version: ApiVersion, @@ -362,6 +363,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, scheduler: Scheduler, @@ -430,6 +432,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint ?e); return; } - if let Err(e) = validate_cfg.validate() { + if let Err(e) = validate_cfg.validate(self.raftstore_v2) { warn!("cdc config update failed"; "error" => ?e); return; } @@ -1375,6 +1378,7 @@ mod tests { let ep = Endpoint::new( DEFAULT_CLUSTER_ID, cfg, + false, api_version, pd_client, task_sched.clone(), diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index 89ed4e6dbb1..f2663c79287 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -182,6 +182,7 @@ impl TestSuiteBuilder { let mut cdc_endpoint = cdc::Endpoint::new( DEFAULT_CLUSTER_ID, &cfg, + false, cluster.cfg.storage.api_version(), pd_cli.clone(), worker.scheduler(), diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 6c0989e72ae..08d7f7946ec 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -11,7 +11,10 @@ use engine_traits::{FlushState, KvEngine, TabletRegistry}; use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; -use raftstore::store::{Config, ReadTask}; +use raftstore::{ + coprocessor::CoprocessorHost, + store::{Config, ReadTask}, +}; use slog::Logger; use sst_importer::SstImporter; use tikv_util::{ @@ -79,6 +82,7 @@ impl ApplyFsm { applied_term: u64, buckets: Option, sst_importer: Arc, + coprocessor_host: CoprocessorHost, logger: Logger, ) -> (ApplyScheduler, Self) { let (tx, rx) = future::unbounded(WakePolicy::Immediately); @@ -94,6 +98,7 @@ impl ApplyFsm { applied_term, buckets, sst_importer, + coprocessor_host, logger, ); ( diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index e1577830d25..0b53476273f 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -878,7 +878,10 @@ mod test { raft_cmdpb::{BatchSplitRequest, SplitRequest}, raft_serverpb::{PeerState, RegionLocalState}, }; - use raftstore::store::{cmd_resp::new_error, Config}; + use raftstore::{ + coprocessor::CoprocessorHost, + store::{cmd_resp::new_error, Config}, + }; use slog::o; use tempfile::TempDir; use tikv_util::{ @@ -1026,6 +1029,7 @@ mod test { let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); let (_tmp_dir, importer) = create_tmp_importer(); + let host = CoprocessorHost::::default(); let mut apply = Apply::new( &Config::default(), region @@ -1043,6 +1047,7 @@ mod test { 5, None, importer, + host, logger.clone(), ); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index b45ad23a1b1..af31dc5a397 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -29,6 +29,7 @@ use kvproto::raft_cmdpb::{ use raft::eraftpb::{ConfChange, ConfChangeV2, Entry, EntryType}; use raft_proto::ConfChangeI; use raftstore::{ + coprocessor::ObserveLevel, store::{ cmd_resp, fsm::{ @@ -75,7 +76,12 @@ pub type SimpleWriteReqEncoder = use self::write::SimpleWrite; -fn parse_at(logger: &slog::Logger, buf: &[u8], index: u64, term: u64) -> M { +pub(crate) fn parse_at( + logger: &slog::Logger, + buf: &[u8], + index: u64, + term: u64, +) -> M { let mut m = M::default(); match m.merge_from_bytes(buf) { Ok(()) => m, @@ -141,6 +147,7 @@ impl Peer { self.entry_storage().applied_term(), buckets, store_ctx.sst_importer.clone(), + store_ctx.coprocessor_host.clone(), logger, ); @@ -533,8 +540,8 @@ impl Apply { wb.set_save_point(); set_save_point = true; } - let resp = match self.apply_entry(&e).await { - Ok(resp) => resp, + let (req, resp) = match self.apply_entry(&e).await { + Ok(req_resp) => req_resp, Err(e) => { if let Some(wb) = &mut self.write_batch { if set_save_point { @@ -543,9 +550,10 @@ impl Apply { wb.clear(); } } - cmd_resp::new_error(e) + (RaftCmdRequest::default(), cmd_resp::new_error(e)) } }; + self.observe_apply(e.get_index(), e.get_term(), req, &resp); self.callbacks_mut().push((ch, resp)); } else { assert!(ch.is_empty()); @@ -557,7 +565,7 @@ impl Apply { } #[inline] - async fn apply_entry(&mut self, entry: &Entry) -> Result { + async fn apply_entry(&mut self, entry: &Entry) -> Result<(RaftCmdRequest, RaftCmdResponse)> { let mut conf_change = None; let log_index = entry.get_index(); let req = match entry.get_entry_type() { @@ -576,7 +584,11 @@ impl Apply { true, true, )?; - let res = Ok(new_response(decoder.header())); + let mut req = RaftCmdRequest::default(); + if self.observe().level != ObserveLevel::None { + req = decoder.to_raft_cmd_request(); + } + let resp = new_response(decoder.header()); for req in decoder { match req { SimpleWrite::Put(put) => { @@ -599,7 +611,7 @@ impl Apply { } } } - return res; + return Ok((req, resp)); } Err(req) => req, }, @@ -657,7 +669,7 @@ impl Apply { } let mut resp = new_response(req.get_header()); resp.set_admin_response(admin_resp); - Ok(resp) + Ok((req, resp)) } else { for r in req.get_requests() { match r.get_cmd_type() { @@ -684,7 +696,8 @@ impl Apply { _ => unimplemented!(), } } - Ok(new_response(req.get_header())) + let resp = new_response(req.get_header()); + Ok((req, resp)) } } @@ -772,6 +785,10 @@ impl Apply { buckets.clear_stats(); } + // Call it before invoking callback for preventing Commit is executed before + // Prewrite is observed. + self.flush_observed_apply(); + // Report result first and then invoking callbacks. This may delays callback a // little bit, but can make sure all following messages must see the side // effect of admin commands. diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 03014644261..94b58f41809 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -1,12 +1,12 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::{mem, sync::Arc}; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; -use kvproto::raft_cmdpb::RaftCmdResponse; +use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}; use raftstore::{ - coprocessor::ObserveHandle, + coprocessor::{Cmd, CmdBatch, ObserveHandle, ObserveLevel}, store::{ cmd_resp, fsm::{ @@ -18,6 +18,7 @@ use raftstore::{ RegionSnapshot, }, }; +use slog::info; use crate::{ fsm::{ApplyResReporter, PeerFsmDelegate}, @@ -67,9 +68,9 @@ impl Apply { let ChangeObserver { region_id, ty } = observer; let is_stale_cmd = match ty { - ObserverType::Cdc(ObserveHandle { id, .. }) => self.observe_info_mut().cdc_id.id > id, - ObserverType::Rts(ObserveHandle { id, .. }) => self.observe_info_mut().rts_id.id > id, - ObserverType::Pitr(ObserveHandle { id, .. }) => self.observe_info_mut().pitr_id.id > id, + ObserverType::Cdc(ObserveHandle { id, .. }) => self.observe().info.cdc_id.id > id, + ObserverType::Rts(ObserveHandle { id, .. }) => self.observe().info.rts_id.id > id, + ObserverType::Pitr(ObserveHandle { id, .. }) => self.observe().info.pitr_id.id > id, }; if is_stale_cmd { notify_stale_req_with_msg( @@ -77,7 +78,7 @@ impl Apply { format!( "stale observe id {:?}, current id: {:?}", ty.handle().id, - self.observe_info_mut().pitr_id.id + self.observe().info, ), snap_cb, ); @@ -96,10 +97,13 @@ impl Apply { // Commit the writebatch for ensuring the following snapshot can get all // previous writes. self.flush(); - RegionSnapshot::from_snapshot( + let (applied_index, _) = self.apply_progress(); + let snap = RegionSnapshot::from_snapshot( Arc::new(self.tablet().snapshot()), Arc::new(self.region().clone()), - ) + ); + snap.set_apply_index(applied_index); + snap } Err(e) => { // Return error if epoch not match @@ -108,17 +112,50 @@ impl Apply { } }; + let observe = self.observe_mut(); match ty { ObserverType::Cdc(id) => { - self.observe_info_mut().cdc_id = id; + observe.info.cdc_id = id; } ObserverType::Rts(id) => { - self.observe_info_mut().rts_id = id; + observe.info.rts_id = id; } ObserverType::Pitr(id) => { - self.observe_info_mut().pitr_id = id; + observe.info.pitr_id = id; } } + let level = observe.info.observe_level(); + observe.level = level; + info!(self.logger, "capture update observe level"; "level" => ?level); snap_cb.set_result((RaftCmdResponse::default(), Some(Box::new(snapshot)))); } + + pub fn observe_apply( + &mut self, + index: u64, + term: u64, + req: RaftCmdRequest, + resp: &RaftCmdResponse, + ) { + if self.observe().level == ObserveLevel::None { + return; + } + + let cmd = Cmd::new(index, term, req, resp.clone()); + self.observe_mut().cmds.push(cmd); + } + + pub fn flush_observed_apply(&mut self) { + let level = self.observe().level; + if level == ObserveLevel::None { + return; + } + + let region_id = self.region_id(); + let mut cmd_batch = CmdBatch::new(&self.observe().info, region_id); + let cmds = mem::take(&mut self.observe_mut().cmds); + cmd_batch.extend(&self.observe().info, region_id, cmds); + self.coprocessor_host() + .on_flush_applied_cmd_batch(level, vec![cmd_batch], self.tablet()); + } } diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index d5ecb8c3026..d32b8bdbb80 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -8,7 +8,7 @@ use engine_traits::{ use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; use raftstore::{ - coprocessor::CmdObserveInfo, + coprocessor::{Cmd, CmdObserveInfo, CoprocessorHost, ObserveLevel}, store::{ fsm::{apply::DEFAULT_APPLY_WB_SIZE, ApplyMetrics}, Config, ReadTask, @@ -23,6 +23,12 @@ use crate::{ router::CmdResChannel, }; +pub(crate) struct Observe { + pub info: CmdObserveInfo, + pub level: ObserveLevel, + pub cmds: Vec, +} + /// Apply applies all the committed commands to kv db. pub struct Apply { peer: metapb::Peer, @@ -62,7 +68,8 @@ pub struct Apply { res_reporter: R, read_scheduler: Scheduler>, sst_importer: Arc, - observe_info: CmdObserveInfo, + observe: Observe, + coprocessor_host: CoprocessorHost, pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, @@ -83,6 +90,7 @@ impl Apply { applied_term: u64, buckets: Option, sst_importer: Arc, + coprocessor_host: CoprocessorHost, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry @@ -115,7 +123,12 @@ impl Apply { metrics: ApplyMetrics::default(), buckets, sst_importer, - observe_info: CmdObserveInfo::default(), + observe: Observe { + info: CmdObserveInfo::default(), + level: ObserveLevel::None, + cmds: vec![], + }, + coprocessor_host, logger, } } @@ -277,12 +290,22 @@ impl Apply { } #[inline] - pub fn observe_info_mut(&mut self) -> &mut CmdObserveInfo { - &mut self.observe_info + pub(crate) fn observe(&mut self) -> &Observe { + &self.observe + } + + #[inline] + pub(crate) fn observe_mut(&mut self) -> &mut Observe { + &mut self.observe } #[inline] pub fn term(&self) -> u64 { self.applied_term } + + #[inline] + pub fn coprocessor_host(&self) -> &CoprocessorHost { + &self.coprocessor_host + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index cff915fd248..ee9be348c89 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -318,7 +318,7 @@ mod tests { use engine_test::{ ctor::{CfOptions, DbOptions}, - kv::TestTabletFactory, + kv::{KvTestEngine, TestTabletFactory}, }; use engine_traits::{ FlushState, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS, @@ -328,10 +328,13 @@ mod tests { raft_serverpb::PeerState, }; use raft::{Error as RaftError, StorageError}; - use raftstore::store::{ - util::new_empty_snapshot, write_to_db_for_test, AsyncReadNotifier, Config, FetchedLogs, - GenSnapRes, ReadRunner, TabletSnapKey, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, - RAFT_INIT_LOG_TERM, + use raftstore::{ + coprocessor::CoprocessorHost, + store::{ + util::new_empty_snapshot, write_to_db_for_test, AsyncReadNotifier, Config, FetchedLogs, + GenSnapRes, ReadRunner, TabletSnapKey, TabletSnapManager, WriteTask, + RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + }, }; use slog::o; use tempfile::TempDir; @@ -500,6 +503,7 @@ mod tests { let mut state = RegionLocalState::default(); state.set_region(region.clone()); let (_tmp_dir, importer) = create_tmp_importer(); + let host = CoprocessorHost::::default(); // setup peer applyer let mut apply = Apply::new( &Config::default(), @@ -513,6 +517,7 @@ mod tests { 5, None, importer, + host, logger, ); diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 82b6dce17ee..7dc5142e734 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -438,7 +438,7 @@ impl CmdObserveInfo { /// PiTR: Observer supports the `backup-log` function. /// RTS: Observer supports the `resolved-ts` advancing (and follower read, /// etc.). - fn observe_level(&self) -> ObserveLevel { + pub fn observe_level(&self) -> ObserveLevel { let cdc = if self.cdc_id.is_observing() { // `cdc` observe all data ObserveLevel::All @@ -512,6 +512,14 @@ impl CmdBatch { self.cmds.push(cmd) } + pub fn extend(&mut self, observe_info: &CmdObserveInfo, region_id: u64, cmds: Vec) { + assert_eq!(region_id, self.region_id); + assert_eq!(observe_info.cdc_id.id, self.cdc_id); + assert_eq!(observe_info.rts_id.id, self.rts_id); + assert_eq!(observe_info.pitr_id.id, self.pitr_id); + self.cmds.extend(cmds) + } + pub fn into_iter(self, region_id: u64) -> IntoIter { assert_eq!(region_id, self.region_id); self.cmds.into_iter() diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index ccf5f94e39e..4073b71c60d 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -85,6 +85,11 @@ where self.snap.as_ref() } + #[inline] + pub fn set_apply_index(&self, apply_index: u64) { + self.apply_index.store(apply_index, Ordering::SeqCst); + } + #[inline] pub fn get_apply_index(&self) -> Result { let apply_index = self.apply_index.load(Ordering::SeqCst); diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index cdae8f18c97..57056f984bd 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -5,7 +5,7 @@ use std::assert_matches::debug_assert_matches; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::{ import_sstpb::SstMeta, - raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftRequestHeader, Request}, }; use protobuf::{CodedInputStream, Message}; use slog::Logger; @@ -277,6 +277,59 @@ impl<'a> SimpleWriteReqDecoder<'a> { pub fn header(&self) -> &RaftRequestHeader { &self.header } + + pub fn to_raft_cmd_request(&self) -> RaftCmdRequest { + let mut req = RaftCmdRequest::default(); + req.set_header(self.header().clone()); + let decoder = Self { + header: Default::default(), + buf: self.buf, + }; + for s in decoder { + match s { + SimpleWrite::Put(Put { cf, key, value }) => { + let mut request = Request::default(); + request.set_cmd_type(CmdType::Put); + request.mut_put().set_cf(cf.to_owned()); + request.mut_put().set_key(key.to_owned()); + request.mut_put().set_value(value.to_owned()); + req.mut_requests().push(request); + } + SimpleWrite::Delete(Delete { cf, key }) => { + let mut request = Request::default(); + request.set_cmd_type(CmdType::Delete); + request.mut_delete().set_cf(cf.to_owned()); + request.mut_delete().set_key(key.to_owned()); + req.mut_requests().push(request); + } + SimpleWrite::DeleteRange(DeleteRange { + cf, + start_key, + end_key, + notify_only, + }) => { + let mut request = Request::default(); + request.set_cmd_type(CmdType::DeleteRange); + request.mut_delete_range().set_cf(cf.to_owned()); + request + .mut_delete_range() + .set_start_key(start_key.to_owned()); + request.mut_delete_range().set_end_key(end_key.to_owned()); + request.mut_delete_range().set_notify_only(notify_only); + req.mut_requests().push(request); + } + SimpleWrite::Ingest(ssts) => { + for sst in ssts { + let mut request = Request::default(); + request.set_cmd_type(CmdType::IngestSst); + request.mut_ingest_sst().set_sst(sst); + req.mut_requests().push(request); + } + } + } + } + req + } } impl<'a> Iterator for SimpleWriteReqDecoder<'a> { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 3243b207aca..d1c8e09ef96 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -95,6 +95,7 @@ use tikv::{ }, storage::{ self, + config::EngineType, config_manager::StorageConfigManger, kv::LocalTablets, mvcc::MvccConsistencyCheckObserver, @@ -933,6 +934,7 @@ where let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, + self.core.config.storage.engine == EngineType::RaftKv2, self.core.config.storage.api_version(), self.pd_client.clone(), cdc_scheduler.clone(), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 86d3a9a696f..83bcc2a55fe 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -74,6 +74,7 @@ use tikv::{ }, storage::{ self, + config::EngineType, config_manager::StorageConfigManger, kv::LocalTablets, mvcc::MvccConsistencyCheckObserver, @@ -187,6 +188,7 @@ struct TikvServer { coprocessor_host: Option>, concurrency_manager: ConcurrencyManager, env: Arc, + cdc_worker: Option>>, cdc_scheduler: Option>, cdc_memory_quota: Option, sst_worker: Option>>, @@ -323,6 +325,7 @@ where coprocessor_host: None, concurrency_manager, env, + cdc_worker: None, cdc_scheduler: None, cdc_memory_quota: None, sst_worker: None, @@ -563,28 +566,25 @@ where self.core.to_stop.push(check_leader_worker); // Create cdc worker. - let cdc_worker = Box::new(LazyWorker::new("cdc")); - let cdc_scheduler = cdc_worker.scheduler(); - let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); - engines - .engine - .set_txn_extra_scheduler(Arc::new(txn_extra_scheduler)); + let mut cdc_worker = self.cdc_worker.take().unwrap(); + let cdc_scheduler = self.cdc_scheduler.clone().unwrap(); // Register cdc observer. let cdc_ob = cdc::CdcObserver::new(cdc_scheduler.clone()); cdc_ob.register_to(self.coprocessor_host.as_mut().unwrap()); // Register cdc config manager. cfg_controller.register( tikv::config::Module::Cdc, - Box::new(CdcConfigManager(cdc_worker.scheduler())), + Box::new(CdcConfigManager(cdc_scheduler.clone())), ); // Start cdc endpoint. let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); - let _cdc_endpoint = cdc::Endpoint::new( + let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, + self.core.config.storage.engine == EngineType::RaftKv2, self.core.config.storage.api_version(), self.pd_client.clone(), - cdc_scheduler.clone(), + cdc_scheduler, self.router.clone().unwrap(), LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), cdc_ob, @@ -595,15 +595,13 @@ where cdc_memory_quota.clone(), self.causal_ts_provider.clone(), ); - // TODO: enable cdc. - // cdc_worker.start_with_timer(cdc_endpoint); - // self.core.to_stop.push(cdc_worker); - self.cdc_scheduler = Some(cdc_scheduler); + cdc_worker.start_with_timer(cdc_endpoint); + self.core.to_stop.push(cdc_worker); self.cdc_memory_quota = Some(cdc_memory_quota); // Create resolved ts. if self.core.config.resolved_ts.enable { - let rts_worker = Box::new(LazyWorker::new("resolved-ts")); + let mut rts_worker = Box::new(LazyWorker::new("resolved-ts")); // Register the resolved ts observer let resolved_ts_ob = resolved_ts::Observer::new(rts_worker.scheduler()); resolved_ts_ob.register_to(self.coprocessor_host.as_mut().unwrap()); @@ -614,7 +612,7 @@ where rts_worker.scheduler(), )), ); - let _rts_endpoint = resolved_ts::Endpoint::new( + let rts_endpoint = resolved_ts::Endpoint::new( &self.core.config.resolved_ts, rts_worker.scheduler(), self.router.clone().unwrap(), @@ -624,9 +622,8 @@ where self.env.clone(), self.security_mgr.clone(), ); - // TODO: enable resolved_ts. - // rts_worker.start_with_timer(rts_endpoint); - // self.core.to_stop.push(rts_worker); + rts_worker.start_with_timer(rts_endpoint); + self.core.to_stop.push(rts_worker); } let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); @@ -1234,7 +1231,12 @@ impl TikvServer { ); let region_info_accessor = RegionInfoAccessor::new(&mut coprocessor_host); - let engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); + let cdc_worker = Box::new(LazyWorker::new("cdc")); + let cdc_scheduler = cdc_worker.scheduler(); + let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); + + let mut engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); + engine.set_txn_extra_scheduler(Arc::new(txn_extra_scheduler)); self.engines = Some(TikvEngines { raft_engine, @@ -1244,6 +1246,8 @@ impl TikvServer { self.node = Some(node); self.coprocessor_host = Some(coprocessor_host); self.region_info_accessor = Some(region_info_accessor); + self.cdc_worker = Some(cdc_worker); + self.cdc_scheduler = Some(cdc_scheduler); engines_info } diff --git a/src/config/mod.rs b/src/config/mod.rs index f8bbd1be9f5..2e81de6d829 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2789,7 +2789,7 @@ impl Default for CdcConfig { } impl CdcConfig { - pub fn validate(&mut self) -> Result<(), Box> { + pub fn validate(&mut self, raftstore_v2: bool) -> Result<(), Box> { let default_cfg = CdcConfig::default(); if self.min_ts_interval.is_zero() { warn!( @@ -2823,6 +2823,13 @@ impl CdcConfig { ); self.incremental_scan_ts_filter_ratio = default_cfg.incremental_scan_ts_filter_ratio; } + if raftstore_v2 && self.hibernate_regions_compatible { + warn!( + "cdc.hibernate_regions_compatible is overwritten to false for partitioned-raft-kv" + ); + self.hibernate_regions_compatible = false; + } + Ok(()) } } @@ -3360,7 +3367,8 @@ impl TikvConfig { self.import.validate()?; self.backup.validate()?; self.log_backup.validate()?; - self.cdc.validate()?; + self.cdc + .validate(self.storage.engine == EngineType::RaftKv2)?; self.pessimistic_txn.validate()?; self.gc.validate()?; self.resolved_ts.validate()?; From e024556874a44a281cd9a50d454e905e03bd0eef Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 18 Apr 2023 14:51:18 +0800 Subject: [PATCH 0637/1149] raftstore-v2: schedule raft tick after apply snapshot (#14550) ref tikv/tikv#14532, close tikv/tikv#14548 schedule raft tick after apply snapshot Signed-off-by: Spade A --- .../raftstore-v2/src/operation/command/mod.rs | 16 ++------- .../src/operation/ready/apply_trace.rs | 8 +++++ .../raftstore-v2/src/operation/ready/mod.rs | 1 + components/raftstore-v2/src/raft/peer.rs | 27 +++++++++++++-- tests/failpoints/cases/test_snap.rs | 33 +++++++++++++++++++ 5 files changed, 70 insertions(+), 15 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index af31dc5a397..ce4a415cf00 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -43,7 +43,7 @@ use raftstore::{ }, Error, Result, }; -use slog::{debug, error, info, warn}; +use slog::{debug, error, warn}; use tikv_util::{ box_err, log::SlogFormat, @@ -55,7 +55,7 @@ use crate::{ batch::StoreContext, fsm::{ApplyFsm, ApplyResReporter}, raft::{Apply, Peer}, - router::{ApplyRes, ApplyTask, CmdResChannel, PeerTick}, + router::{ApplyRes, ApplyTask, CmdResChannel}, }; mod admin; @@ -401,17 +401,7 @@ impl Peer { apply_res.applied_index, progress_to_be_updated, ); - if self.pause_for_recovery() - && self.storage().entry_storage().commit_index() <= apply_res.applied_index - { - info!(self.logger, "recovery completed"; "apply_index" => apply_res.applied_index); - self.set_pause_for_recovery(false); - // Flush to avoid recover again and again. - if let Some(scheduler) = self.apply_scheduler() { - scheduler.send(ApplyTask::ManualFlush); - } - self.add_pending_tick(PeerTick::Raft); - } + self.try_compelete_recovery(); if !self.pause_for_recovery() && self.storage_mut().apply_trace_mut().should_flush() { if let Some(scheduler) = self.apply_scheduler() { scheduler.send(ApplyTask::ManualFlush); diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 71e282728f7..90b7930c368 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -33,6 +33,7 @@ use engine_traits::{ data_cf_offset, ApplyProgress, KvEngine, RaftEngine, RaftLogBatch, TabletRegistry, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; +use fail::fail_point; use kvproto::{ metapb::Region, raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, @@ -404,6 +405,13 @@ impl Storage { } }; apply_state.set_applied_index(applied_index); + let mut reset_apply_index = || { + // Make node reply from start. + fail_point!("RESET_APPLY_INDEX_WHEN_RESTART", |_| { + apply_state.set_applied_index(5); + }); + }; + reset_apply_index(); Self::create( store_id, diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 68da61cf45e..9419549e580 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -595,6 +595,7 @@ impl Peer { self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); + self.try_compelete_recovery(); self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); if !ready.persisted_messages().is_empty() { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index b93fc0f5047..6b5898b6297 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -25,7 +25,7 @@ use raftstore::{ TabletSnapManager, WriteTask, }, }; -use slog::Logger; +use slog::{info, Logger}; use tikv_util::slog_panic; use super::storage::Storage; @@ -35,7 +35,7 @@ use crate::{ AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, GcPeerContext, MergeContext, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, TxnContext, }, - router::{CmdResChannel, PeerTick, QueryResChannel}, + router::{ApplyTask, CmdResChannel, PeerTick, QueryResChannel}, Result, }; @@ -464,6 +464,29 @@ impl Peer { self.pause_for_recovery } + #[inline] + // we may have skipped scheduling raft tick when start due to noticable gap + // between commit index and apply index. We should scheduling it when raft log + // apply catches up. + pub fn try_compelete_recovery(&mut self) { + if self.pause_for_recovery() + && self.storage().entry_storage().commit_index() + <= self.storage().entry_storage().applied_index() + { + info!( + self.logger, + "recovery completed"; + "apply_index" => self.storage().entry_storage().applied_index() + ); + self.set_pause_for_recovery(false); + // Flush to avoid recover again and again. + if let Some(scheduler) = self.apply_scheduler() { + scheduler.send(ApplyTask::ManualFlush); + } + self.add_pending_tick(PeerTick::Raft); + } + } + #[inline] pub fn insert_peer_cache(&mut self, peer: metapb::Peer) { for p in self.raft_group.store().region().get_peers() { diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 64b03f6d0b3..4ca18dcd716 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -900,3 +900,36 @@ fn test_snapshot_recover_from_raft_write_failure_with_uncommitted_log() { cluster.must_put(format!("k1{}", i).as_bytes(), b"v1"); } } + +#[test] +fn test_snapshot_complete_recover_raft_tick() { + // https://github.com/tikv/tikv/issues/14548 gives the description of what the following tests. + let mut cluster = test_raftstore_v2::new_node_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(50); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); + + cluster.run(); + + let region = cluster.get_region(b"k"); + cluster.must_transfer_leader(region.get_id(), new_peer(1, 1)); + for i in 0..200 { + let k = format!("k{:04}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + cluster.stop_node(2); + for i in 200..300 { + let k = format!("k{:04}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + fail::cfg("APPLY_COMMITTED_ENTRIES", "pause").unwrap(); + fail::cfg("RESET_APPLY_INDEX_WHEN_RESTART", "return").unwrap(); + cluster.run_node(2).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + fail::remove("APPLY_COMMITTED_ENTRIES"); + cluster.stop_node(1); + + cluster.must_put(b"k0500", b"val"); + assert_eq!(cluster.must_get(b"k0500").unwrap(), b"val".to_vec()); +} From 8dd75f8693a99e33681a13939ac7e11d5ccff29f Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 18 Apr 2023 00:35:19 -0700 Subject: [PATCH 0638/1149] update storage.engine config to actual engine type used in existing cluster (#14541) ref tikv/tikv#12842 update storage.engine config to actual engine type used in existing cluster Signed-off-by: tonyxuqqi --- src/config/mod.rs | 5 ++- src/storage/config.rs | 74 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 2e81de6d829..dcbfdc4e441 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -89,6 +89,7 @@ use crate::{ }; pub const DEFAULT_ROCKSDB_SUB_DIR: &str = "db"; +pub const DEFAULT_TABLET_SUB_DIR: &str = "tablets"; /// By default, block cache size will be set to 45% of system memory. pub const BLOCK_CACHE_RATE: f64 = 0.45; @@ -3291,7 +3292,9 @@ impl TikvConfig { let kv_data_exists = if self.storage.engine == EngineType::RaftKv { RocksEngine::exists(&kv_db_path) } else { - Path::new(&self.storage.data_dir).join("tablets").exists() + Path::new(&self.storage.data_dir) + .join(DEFAULT_TABLET_SUB_DIR) + .exists() }; RaftDataStateMachine::new( diff --git a/src/storage/config.rs b/src/storage/config.rs index f65ed15cece..d301849528d 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -2,7 +2,7 @@ //! Storage configuration. -use std::{cmp::max, error::Error}; +use std::{cmp::max, error::Error, path::Path}; use engine_rocks::raw::{Cache, LRUCacheOptions, MemoryAllocator}; use file_system::{IoPriority, IoRateLimitMode, IoRateLimiter, IoType}; @@ -14,7 +14,10 @@ use tikv_util::{ sys::SysQuota, }; -use crate::config::{BLOCK_CACHE_RATE, MIN_BLOCK_CACHE_SHARD_SIZE, RAFTSTORE_V2_BLOCK_CACHE_RATE}; +use crate::config::{ + BLOCK_CACHE_RATE, DEFAULT_ROCKSDB_SUB_DIR, DEFAULT_TABLET_SUB_DIR, MIN_BLOCK_CACHE_SHARD_SIZE, + RAFTSTORE_V2_BLOCK_CACHE_RATE, +}; pub const DEFAULT_DATA_DIR: &str = "./"; const DEFAULT_GC_RATIO_THRESHOLD: f64 = 1.1; @@ -110,10 +113,43 @@ impl Default for Config { } impl Config { + fn validate_engine_type(&mut self) -> Result<(), Box> { + let v1_kv_db_path = + config::canonicalize_sub_path(&self.data_dir, DEFAULT_ROCKSDB_SUB_DIR).unwrap(); + let v2_tablet_path = + config::canonicalize_sub_path(&self.data_dir, DEFAULT_TABLET_SUB_DIR).unwrap(); + + let kv_data_exists = Path::new(&v1_kv_db_path).exists(); + let v2_tablet_exists = Path::new(&v2_tablet_path).exists(); + if kv_data_exists && v2_tablet_exists { + return Err("Both raft-kv and partitioned-raft-kv's data folders exist".into()); + } + + // v1's data exists, but the engine type is v2 + if kv_data_exists && self.engine == EngineType::RaftKv2 { + info!( + "TiKV has data for raft-kv engine but the engine type in config is partitioned-raft-kv. Ignore the config and keep raft-kv instead" + ); + self.engine = EngineType::RaftKv; + } + + // if v2's data exists, but the engine type is v1 + if v2_tablet_exists && self.engine == EngineType::RaftKv { + info!( + "TiKV has data for partitioned-raft-kv engine but the engine type in config is raft-kv. Ignore the config and keep partitioned-raft-kv instead" + ); + self.engine = EngineType::RaftKv2; + } + Ok(()) + } + pub fn validate(&mut self) -> Result<(), Box> { if self.data_dir != DEFAULT_DATA_DIR { self.data_dir = config::canonicalize_path(&self.data_dir)? } + + self.validate_engine_type()?; + if self.scheduler_concurrency > MAX_SCHED_CONCURRENCY { warn!( "TiKV has optimized latch since v4.0, so it is not necessary to set large schedule \ @@ -393,6 +429,8 @@ impl IoRateLimitConfig { #[cfg(test)] mod tests { + use std::fs; + use super::*; #[test] @@ -411,6 +449,38 @@ mod tests { cfg.validate().unwrap_err(); } + #[test] + fn test_validate_engine_type_config() { + let mut cfg = Config::default(); + cfg.engine = EngineType::RaftKv; + cfg.validate().unwrap(); + assert_eq!(cfg.engine, EngineType::RaftKv); + + cfg.engine = EngineType::RaftKv2; + cfg.validate().unwrap(); + assert_eq!(cfg.engine, EngineType::RaftKv2); + + let v1_kv_db_path = + config::canonicalize_sub_path(&cfg.data_dir, DEFAULT_ROCKSDB_SUB_DIR).unwrap(); + fs::create_dir_all(&v1_kv_db_path).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.engine, EngineType::RaftKv); + fs::remove_dir_all(&v1_kv_db_path).unwrap(); + + let v2_tablet_path = + config::canonicalize_sub_path(&cfg.data_dir, DEFAULT_TABLET_SUB_DIR).unwrap(); + fs::create_dir_all(&v2_tablet_path).unwrap(); + cfg.engine = EngineType::RaftKv; + cfg.validate().unwrap(); + assert_eq!(cfg.engine, EngineType::RaftKv2); + + // both v1 and v2 data exists, throw error + fs::create_dir_all(&v1_kv_db_path).unwrap(); + cfg.validate().unwrap_err(); + fs::remove_dir_all(&v1_kv_db_path).unwrap(); + fs::remove_dir_all(&v2_tablet_path).unwrap(); + } + #[test] fn test_adjust_shard_bits() { let config = BlockCacheConfig::default(); From 0c113a6370b205f13241890903dd166a839b7347 Mon Sep 17 00:00:00 2001 From: ekexium Date: Tue, 18 Apr 2023 15:59:20 +0800 Subject: [PATCH 0639/1149] txn: round up last_update_duration_ms (#14571) ref tikv/tikv#14497 Round up last_update_duration_ms, so that duration in (0, 1ms] won't be treated as 0. Co-authored-by: Ti Chi Robot --- src/server/lock_manager/waiter_manager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/lock_manager/waiter_manager.rs b/src/server/lock_manager/waiter_manager.rs index 5f433571431..c0e97e25e3a 100644 --- a/src/server/lock_manager/waiter_manager.rs +++ b/src/server/lock_manager/waiter_manager.rs @@ -270,10 +270,10 @@ impl Waiter { let mut lock_info = self.wait_info.lock_info.clone(); lock_info.set_duration_to_last_update_ms( self.last_updated_time - .map(|t| t.elapsed().as_millis() as u64) + // round up, so that duration in (0, 1ms] won't be treated as 0. + .map(|t| (t.elapsed().as_millis() as u64).max(1)) .unwrap_or_default(), ); - // lock_info.set_skip_resolving_lock(skip_resolving_lock); let error = MvccError::from(MvccErrorInner::KeyIsLocked(lock_info)); self.cancel(Some(StorageError::from(TxnError::from(error)))) } From 9fc86635ba57a79ea1ae25c393d847027e15824b Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 18 Apr 2023 16:35:20 +0800 Subject: [PATCH 0640/1149] raftstore-v2: check peer trim status before prepare merge (#14374) ref tikv/tikv#12842 None Signed-off-by: tabokie --- .../operation/command/admin/merge/prepare.rs | 169 ++++++++++++++++-- components/raftstore-v2/src/operation/life.rs | 33 +++- .../raftstore-v2/src/operation/ready/mod.rs | 19 ++ components/raftstore-v2/src/raft/peer.rs | 1 + .../tests/failpoints/test_merge.rs | 6 +- .../tests/integrations/cluster.rs | 41 +++-- .../tests/integrations/test_merge.rs | 17 +- components/raftstore/src/store/fsm/peer.rs | 5 +- tests/integrations/raftstore/test_merge.rs | 8 +- 9 files changed, 249 insertions(+), 50 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 16a8382cfad..601b4568866 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -27,15 +27,19 @@ //! Start the tick (`Peer::on_check_merge`) to periodically check the //! eligibility of merge. -use std::mem; +use std::{mem, time::Duration}; +use collections::HashMap; use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, CF_LOCK}; use kvproto::{ + metapb::RegionEpoch, raft_cmdpb::{ AdminCmdType, AdminRequest, AdminResponse, CmdType, PrepareMergeRequest, PutRequest, RaftCmdRequest, Request, }, - raft_serverpb::{MergeState, PeerState, RegionLocalState}, + raft_serverpb::{ + ExtraMessage, ExtraMessageType, MergeState, PeerState, RaftMessage, RegionLocalState, + }, }; use parking_lot::RwLockUpgradableReadGuard; use protobuf::Message; @@ -46,7 +50,9 @@ use raftstore::{ Error, Result, }; use slog::{debug, info}; -use tikv_util::{box_err, log::SlogFormat, slog_panic, store::region_on_same_stores}; +use tikv_util::{ + box_err, log::SlogFormat, slog_panic, store::region_on_same_stores, time::Instant, +}; use super::merge_source_path; use crate::{ @@ -57,6 +63,8 @@ use crate::{ router::CmdResChannel, }; +const TRIM_CHECK_TIMEOUT: Duration = Duration::from_secs(10); + #[derive(Clone)] pub struct PreProposeContext { pub min_matched: u64, @@ -64,6 +72,12 @@ pub struct PreProposeContext { } pub enum PrepareStatus { + WaitForTrimStatus { + start_time: Instant, + // Peers that we are not sure if trimmed. + pending_peers: HashMap, + req: Option, + }, /// When a fence is present, we (1) delay the PrepareMerge /// command `cmd` until all writes before `idx` are applied (2) reject all /// in-coming write proposals. @@ -93,27 +107,28 @@ pub struct PrepareMergeResult { } impl Peer { - pub fn propose_prepare_merge( + pub fn propose_prepare_merge( &mut self, store_ctx: &mut StoreContext, mut req: RaftCmdRequest, ) -> Result { - // Best effort. Remove when trim check is implemented. - if self.storage().has_dirty_data() { - return Err(box_err!( - "{} source peer has dirty data, try again later", - SlogFormat(&self.logger) - )); - } self.validate_prepare_merge_command( store_ctx, req.get_admin_request().get_prepare_merge(), )?; + // We need to check three things in order: + // (1) `start_check_trim_status` + // (2) `check_logs_before_prepare_merge` + // (3) `check_pessimistic_locks` + // Check 1 and 3 are async, they yield by returning + // `Error::PendingPrepareMerge`. let pre_propose = if let Some(r) = self.already_checked_pessimistic_locks()? { r - } else { + } else if self.already_checked_trim_status()? { let r = self.check_logs_before_prepare_merge(store_ctx)?; self.check_pessimistic_locks(r, &mut req)? + } else { + return self.start_check_trim_status(store_ctx, &mut req); }; req.mut_admin_request() .mut_prepare_merge() @@ -280,6 +295,110 @@ impl Peer { }) } + fn start_check_trim_status( + &mut self, + store_ctx: &mut StoreContext, + req: &mut RaftCmdRequest, + ) -> Result { + if self.storage().has_dirty_data() { + return Err(box_err!( + "source peer {} not trimmed, skip merging.", + self.peer_id() + )); + } + let target = req.get_admin_request().get_prepare_merge().get_target(); + let mut pending_peers = HashMap::default(); + for region in [self.region(), target] { + for p in region.get_peers() { + if p.get_id() == self.peer_id() { + continue; + } + let mut msg = RaftMessage::default(); + msg.set_region_id(region.get_id()); + msg.set_from_peer(self.peer().clone()); + msg.set_to_peer(p.clone()); + msg.set_region_epoch(region.get_region_epoch().clone()); + msg.mut_extra_msg() + .set_type(ExtraMessageType::MsgAvailabilityRequest); + msg.mut_extra_msg() + .mut_availability_context() + .set_from_region_id(self.region_id()); + store_ctx.trans.send(msg)?; + pending_peers.insert(p.get_id(), region.get_region_epoch().clone()); + } + } + + let status = &mut self.merge_context_mut().prepare_status; + // Shouldn't enter this call if trim check is already underway. + assert!(status.is_none()); + *status = Some(PrepareStatus::WaitForTrimStatus { + start_time: Instant::now_coarse(), + pending_peers, + req: Some(mem::take(req)), + }); + Err(Error::PendingPrepareMerge) + } + + pub fn merge_on_availability_response( + &mut self, + store_ctx: &mut StoreContext, + from_peer: u64, + resp: &ExtraMessage, + ) { + if self.merge_context().is_some() + && let Some(PrepareStatus::WaitForTrimStatus { pending_peers, req, .. }) = self + .merge_context_mut() + .prepare_status + .as_mut() + && req.is_some() + { + assert!(resp.has_availability_context()); + let from_region = resp.get_availability_context().get_from_region_id(); + let from_epoch = resp.get_availability_context().get_from_region_epoch(); + let trimmed = resp.get_availability_context().get_trimmed(); + if let Some(epoch) = pending_peers.get(&from_peer) + && util::is_region_epoch_equal(from_epoch, epoch) + { + if !trimmed { + info!( + self.logger, + "cancel merge because source peer is not trimmed"; + "region_id" => from_region, + "peer_id" => from_peer, + ); + self.take_merge_context(); + return; + } else { + pending_peers.remove(&from_peer); + } + } + if pending_peers.is_empty() { + let (ch, _) = CmdResChannel::pair(); + let req = req.take().unwrap(); + self.on_admin_command(store_ctx, req, ch); + } + } + } + + fn already_checked_trim_status(&mut self) -> Result { + match self + .merge_context() + .as_ref() + .and_then(|c| c.prepare_status.as_ref()) + { + Some(PrepareStatus::WaitForTrimStatus { pending_peers, .. }) => { + if pending_peers.is_empty() { + Ok(true) + } else { + Err(Error::PendingPrepareMerge) + } + } + None => Ok(false), + // Shouldn't reach here after calling `already_checked_pessimistic_locks` first. + _ => unreachable!(), + } + } + fn check_pessimistic_locks( &mut self, ctx: PreProposeContext, @@ -327,7 +446,7 @@ impl Peer { if applied_index < *fence { info!( self.logger, - "reject PrepareMerge because applied_index has not reached prepare_merge_fence"; + "suspend PrepareMerge because applied_index has not reached prepare_merge_fence"; "applied_index" => applied_index, "prepare_merge_fence" => fence, ); @@ -340,7 +459,25 @@ impl Peer { "another merge is in-progress, merge_state: {:?}.", state )), - None => Ok(None), + _ => Ok(None), + } + } + + #[inline] + pub fn maybe_clean_up_stale_merge_context(&mut self) { + // Check if there's a stale trim check. Ideally this should be implemented as a + // tick. But this is simpler. + if let Some(PrepareStatus::WaitForTrimStatus { + start_time, req, .. + }) = self + .merge_context() + .as_ref() + .and_then(|c| c.prepare_status.as_ref()) + && req.is_some() + && start_time.saturating_elapsed() > TRIM_CHECK_TIMEOUT + { + info!(self.logger, "cancel merge because trim check timed out"); + self.take_merge_context(); } } @@ -351,6 +488,10 @@ impl Peer { store_ctx: &mut StoreContext, applied_index: u64, ) { + if self.merge_context().is_none() { + return; + } + // Check the fence. if let Some(req) = self .merge_context_mut() .maybe_take_pending_prepare(applied_index) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 525be1991bd..7c7d1f37275 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -34,7 +34,7 @@ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ metapb::{self, Region}, raft_cmdpb::{AdminCmdType, RaftCmdRequest}, - raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}, + raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage}, }; use raftstore::store::{util, Transport, WriteTask}; use slog::{debug, error, info, warn}; @@ -422,6 +422,35 @@ where } impl Peer { + pub fn on_availability_request( + &mut self, + ctx: &mut StoreContext, + from_region_id: u64, + from_peer: &metapb::Peer, + ) { + let mut msg = RaftMessage::default(); + msg.set_region_id(from_region_id); + msg.set_from_peer(self.peer().clone()); + msg.set_to_peer(from_peer.clone()); + msg.mut_extra_msg() + .set_type(ExtraMessageType::MsgAvailabilityResponse); + let report = msg.mut_extra_msg().mut_availability_context(); + report.set_from_region_id(self.region_id()); + report.set_from_region_epoch(self.region().get_region_epoch().clone()); + report.set_trimmed(!self.storage().has_dirty_data()); + let _ = ctx.trans.send(msg); + } + + #[inline] + pub fn on_availability_response( + &mut self, + ctx: &mut StoreContext, + from_peer: u64, + resp: &ExtraMessage, + ) { + self.merge_on_availability_response(ctx, from_peer, resp); + } + pub fn maybe_schedule_gc_peer_tick(&mut self) { let region_state = self.storage().region_state(); if !region_state.get_removed_records().is_empty() @@ -441,7 +470,6 @@ impl Peer { { let tombstone_msg = self.tombstone_message_for_same_region(peer.clone()); self.add_message(tombstone_msg); - self.set_has_ready(); true } else { false @@ -464,7 +492,6 @@ impl Peer { cmp::Ordering::Less => { if let Some(msg) = build_peer_destroyed_report(msg) { self.add_message(msg); - self.set_has_ready(); } } // No matter it's greater or equal, the current peer must be destroyed. diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 9419549e580..a9e72f02f8e 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -95,6 +95,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, if self.fsm.peer_mut().tick() { self.fsm.peer_mut().set_has_ready(); } + self.fsm.peer_mut().maybe_clean_up_stale_merge_context(); self.schedule_tick(PeerTick::Raft); } @@ -236,6 +237,24 @@ impl Peer { return; } } + ExtraMessageType::MsgAvailabilityRequest => { + self.on_availability_request( + ctx, + msg.get_extra_msg() + .get_availability_context() + .get_from_region_id(), + msg.get_from_peer(), + ); + return; + } + ExtraMessageType::MsgAvailabilityResponse => { + self.on_availability_response( + ctx, + msg.get_from_peer().get_id(), + msg.get_extra_msg(), + ); + return; + } _ => (), } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 6b5898b6297..494ae183da6 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -831,6 +831,7 @@ impl Peer { #[inline] pub fn add_message(&mut self, msg: RaftMessage) { self.pending_messages.push(msg); + self.set_has_ready(); } #[inline] diff --git a/components/raftstore-v2/tests/failpoints/test_merge.rs b/components/raftstore-v2/tests/failpoints/test_merge.rs index 3979d61743a..d660221d5ee 100644 --- a/components/raftstore-v2/tests/failpoints/test_merge.rs +++ b/components/raftstore-v2/tests/failpoints/test_merge.rs @@ -37,7 +37,7 @@ fn test_source_and_target_both_replay() { { let _fp = fail::FailGuard::new("after_acquire_source_checkpoint", "1*return->off"); - merge_region(router, region_1, peer_1, region_2, false); + merge_region(&cluster, 0, region_1, peer_1, region_2, false); } cluster.restart(0); @@ -87,9 +87,9 @@ fn test_source_destroy_before_target_apply() { // AckCommitMerge). let _fp1 = fail::FailGuard::new("force_send_catch_up_logs", "1*return->off"); let _fp2 = fail::FailGuard::new("after_acquire_source_checkpoint", "1*return->off"); - merge_region(router, region_1, peer_1.clone(), region_2, false); + merge_region(&cluster, 0, region_1, peer_1.clone(), region_2, false); } - assert_peer_not_exist(region_1_id, peer_1.get_id(), router); + assert_peer_not_exist(region_1_id, peer_1.get_id(), &cluster.routers[0]); cluster.restart(0); let router = &mut cluster.routers[0]; diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 9c81f9545a3..4bd0cef8846 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -826,10 +826,11 @@ pub mod merge_helper { }; use raftstore_v2::router::PeerMsg; - use super::TestRouter; + use super::Cluster; pub fn merge_region( - router: &mut TestRouter, + cluster: &Cluster, + store_offset: usize, source: metapb::Region, source_peer: metapb::Peer, target: metapb::Region, @@ -848,25 +849,35 @@ pub mod merge_helper { req.set_admin_request(admin_req); let (msg, sub) = PeerMsg::admin_command(req); - router.send(region_id, msg).unwrap(); - let resp = block_on(sub.result()).unwrap(); - if check { - assert!(!resp.get_header().has_error(), "{:?}", resp); - } + cluster.routers[store_offset].send(region_id, msg).unwrap(); + // They may communicate about trimmed status. + cluster.dispatch(region_id, vec![]); + let _ = block_on(sub.result()).unwrap(); + // We don't check the response because it needs to do a lot of checks async + // before actually proposing the command. // TODO: when persistent implementation is ready, we can use tablet index of // the parent to check whether the split is done. Now, just sleep a second. thread::sleep(Duration::from_secs(1)); - let new_target = router.region_detail(target.id); + let mut new_target = cluster.routers[store_offset].region_detail(target.id); if check { - if new_target.get_start_key() == source.get_start_key() { - // [source, target] => new_target - assert_eq!(new_target.get_end_key(), target.get_end_key()); - } else { - // [target, source] => new_target - assert_eq!(new_target.get_start_key(), target.get_start_key()); - assert_eq!(new_target.get_end_key(), source.get_end_key()); + for i in 1..=100 { + let r1 = new_target.get_start_key() == source.get_start_key() + && new_target.get_end_key() == target.get_end_key(); + let r2 = new_target.get_start_key() == target.get_start_key() + && new_target.get_end_key() == source.get_end_key(); + if r1 || r2 { + break; + } else if i == 100 { + panic!( + "still not merged after 5s: {:?} + {:?} != {:?}", + source, target, new_target + ); + } else { + thread::sleep(Duration::from_millis(50)); + new_target = cluster.routers[store_offset].region_detail(target.id); + } } } new_target diff --git a/components/raftstore-v2/tests/integrations/test_merge.rs b/components/raftstore-v2/tests/integrations/test_merge.rs index c08c2bde484..7d9dbef720e 100644 --- a/components/raftstore-v2/tests/integrations/test_merge.rs +++ b/components/raftstore-v2/tests/integrations/test_merge.rs @@ -81,25 +81,24 @@ fn test_merge() { ); } - let region_2 = merge_region(router, region_1.clone(), peer_1, region_2, true); + let region_2 = merge_region(&cluster, 0, region_1.clone(), peer_1, region_2, true); { - let snapshot = router.stale_snapshot(region_2.get_id()); + let snapshot = cluster.routers[0].stale_snapshot(region_2.get_id()); let key = format!("k{}1", region_1.get_id()); assert!(snapshot.get_value(key.as_bytes()).unwrap().is_some()); } - let region_5 = merge_region(router, region_6.clone(), peer_6, region_5, true); + let region_5 = merge_region(&cluster, 0, region_6.clone(), peer_6, region_5, true); { - let snapshot = router.stale_snapshot(region_5.get_id()); + let snapshot = cluster.routers[0].stale_snapshot(region_5.get_id()); let key = format!("k{}5", region_6.get_id()); assert!(snapshot.get_value(key.as_bytes()).unwrap().is_some()); } - let region_3 = merge_region(router, region_2, peer_2, region_3, true); - let region_4 = merge_region(router, region_3, peer_3, region_4, true); - let region_5 = merge_region(router, region_4, peer_4, region_5, true); + let region_3 = merge_region(&cluster, 0, region_2, peer_2, region_3, true); + let region_4 = merge_region(&cluster, 0, region_3, peer_3, region_4, true); + let region_5 = merge_region(&cluster, 0, region_4, peer_4, region_5, true); cluster.restart(0); - let router = &mut cluster.routers[0]; - let snapshot = router.stale_snapshot(region_5.get_id()); + let snapshot = cluster.routers[0].stale_snapshot(region_5.get_id()); for (i, v) in [1, 2, 3, 4, 5, 5].iter().enumerate() { let rid = region_1.get_id() + i as u64; let key = format!("k{rid}{v}"); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 67054d5bd11..f2d1c7ffc0e 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2796,8 +2796,9 @@ where self.on_voter_replicated_index_response(msg.get_extra_msg()); } // It's v2 only message and ignore does no harm. - ExtraMessageType::MsgGcPeerRequest | ExtraMessageType::MsgGcPeerResponse => (), - ExtraMessageType::MsgFlushMemtable => (), + ExtraMessageType::MsgGcPeerRequest + | ExtraMessageType::MsgGcPeerResponse + | ExtraMessageType::MsgFlushMemtable => (), } } diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 151e278d0d1..404cb418d33 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -189,7 +189,7 @@ fn test_node_base_merge_v2() { } #[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] +// No v2, it requires all peers to be available to check trim status. fn test_node_merge_with_slow_learner() { let mut cluster = new_cluster(0, 2); configure_for_merge(&mut cluster.cfg); @@ -410,8 +410,8 @@ fn test_node_check_merged_message() { // Test if a merge handled properly when there is a unfinished slow split before // merge. +// No v2, it requires all peers to be available to check trim status. #[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_slow_split() { fn imp(is_right_derive: bool) { let mut cluster = new_cluster(0, 3); @@ -913,8 +913,8 @@ fn test_merge_with_slow_promote() { /// logically) /// - A split => C (-∞, k3), A [k3, +∞) /// - Then network recovery +// No v2, it requires all peers to be available to check trim status. #[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_isolated_store_with_no_target_peer() { let mut cluster = new_cluster(0, 4); configure_for_merge(&mut cluster.cfg); @@ -973,8 +973,8 @@ fn test_merge_isolated_store_with_no_target_peer() { /// Test whether a isolated peer can recover when two other regions merge to its /// region +// No v2, it requires all peers to be available to check trim status. #[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] fn test_merge_cascade_merge_isolated() { let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); From 58ed39a1e3009f0be39cb5607d8b0dae2632d2a5 Mon Sep 17 00:00:00 2001 From: Jay Date: Tue, 18 Apr 2023 16:59:20 +0800 Subject: [PATCH 0641/1149] *: correct io type for raft engine write and purge (#14578) ref tikv/tikv#14462 so background purge can be rate limited. Co-authored-by: Ti Chi Robot --- components/file_system/src/lib.rs | 11 ++++- components/file_system/src/rate_limiter.rs | 10 ++--- components/raft_log_engine/src/engine.rs | 12 ++++-- components/raftstore-v2/src/batch/store.rs | 3 +- .../raftstore/src/store/async_io/read.rs | 1 + components/raftstore/src/store/fsm/store.rs | 2 + components/server/src/common.rs | 42 ++++++++++++------- src/storage/config.rs | 1 + 8 files changed, 54 insertions(+), 28 deletions(-) diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 0b6213094af..413a4ef827e 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -72,6 +72,7 @@ pub enum IoType { Gc = 8, Import = 9, Export = 10, + RewriteLog = 11, } impl IoType { @@ -88,6 +89,7 @@ impl IoType { IoType::Gc => "gc", IoType::Import => "import", IoType::Export => "export", + IoType::RewriteLog => "log_rewrite", } } } @@ -145,8 +147,13 @@ impl IoPriority { } } - fn unsafe_from_u32(i: u32) -> Self { - unsafe { std::mem::transmute(i) } + fn from_u32(i: u32) -> Self { + match i { + 0 => IoPriority::Low, + 1 => IoPriority::Medium, + 2 => IoPriority::High, + _ => panic!("unknown io priority {}", i), + } } } diff --git a/components/file_system/src/rate_limiter.rs b/components/file_system/src/rate_limiter.rs index feffb6dcf14..79c7094b186 100644 --- a/components/file_system/src/rate_limiter.rs +++ b/components/file_system/src/rate_limiter.rs @@ -497,9 +497,7 @@ impl IoRateLimiter { pub fn request(&self, io_type: IoType, io_op: IoOp, mut bytes: usize) -> usize { if self.mode.contains(io_op) { bytes = self.throughput_limiter.request( - IoPriority::unsafe_from_u32( - self.priority_map[io_type as usize].load(Ordering::Relaxed), - ), + IoPriority::from_u32(self.priority_map[io_type as usize].load(Ordering::Relaxed)), bytes, ); } @@ -518,7 +516,7 @@ impl IoRateLimiter { bytes = self .throughput_limiter .async_request( - IoPriority::unsafe_from_u32( + IoPriority::from_u32( self.priority_map[io_type as usize].load(Ordering::Relaxed), ), bytes, @@ -535,9 +533,7 @@ impl IoRateLimiter { fn request_with_skewed_clock(&self, io_type: IoType, io_op: IoOp, mut bytes: usize) -> usize { if self.mode.contains(io_op) { bytes = self.throughput_limiter.request_with_skewed_clock( - IoPriority::unsafe_from_u32( - self.priority_map[io_type as usize].load(Ordering::Relaxed), - ), + IoPriority::from_u32(self.priority_map[io_type as usize].load(Ordering::Relaxed)), bytes, ); } diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 7b107bc0cc9..621d708b057 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -14,7 +14,7 @@ use engine_traits::{ RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, Result, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; -use file_system::{IoOp, IoRateLimiter, IoType}; +use file_system::{IoOp, IoRateLimiter, IoType, WithIoType}; use kvproto::{ metapb::Region, raft_serverpb::{ @@ -66,7 +66,8 @@ impl Read for ManagedReader { fn read(&mut self, buf: &mut [u8]) -> IoResult { let mut size = buf.len(); if let Some(ref mut limiter) = self.rate_limiter { - size = limiter.request(IoType::ForegroundRead, IoOp::Read, size); + let io_type = file_system::get_io_type(); + size = limiter.request(io_type, IoOp::Read, size); } match self.inner.as_mut() { Either::Left(reader) => reader.read(&mut buf[..size]), @@ -96,7 +97,8 @@ impl Write for ManagedWriter { fn write(&mut self, buf: &[u8]) -> IoResult { let mut size = buf.len(); if let Some(ref mut limiter) = self.rate_limiter { - size = limiter.request(IoType::ForegroundWrite, IoOp::Write, size); + let io_type = file_system::get_io_type(); + size = limiter.request(io_type, IoOp::Write, size); } match self.inner.as_mut() { Either::Left(writer) => writer.write(&buf[..size]), @@ -653,6 +655,8 @@ impl RaftEngine for RaftLogEngine { } fn consume(&self, batch: &mut Self::LogBatch, sync: bool) -> Result { + // Always use ForegroundWrite as all `consume` calls share the same write queue. + let _guard = WithIoType::new(IoType::ForegroundWrite); self.0.write(&mut batch.0, sync).map_err(transfer_error) } @@ -663,6 +667,8 @@ impl RaftEngine for RaftLogEngine { _: usize, _: usize, ) -> Result { + // Always use ForegroundWrite as all `consume` calls share the same write queue. + let _guard = WithIoType::new(IoType::ForegroundWrite); self.0.write(&mut batch.0, sync).map_err(transfer_error) } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 66b0414b7c3..14282cc09f9 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -17,7 +17,7 @@ use collections::HashMap; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use file_system::{set_io_type, IoType}; +use file_system::{set_io_type, IoType, WithIoType}; use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; use pd_client::PdClient; use raft::{StateRole, INVALID_ID}; @@ -582,6 +582,7 @@ impl StoreSystem { let logger = self.logger.clone(); let router = router.clone(); worker.spawn_interval_task(cfg.value().raft_engine_purge_interval.0, move || { + let _guard = WithIoType::new(IoType::RewriteLog); match raft_clone.manual_purge() { Ok(regions) => { for r in regions { diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index cee6373c5bd..ced7b0f4418 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -153,6 +153,7 @@ where tried_cnt, term, } => { + let _guard = WithIoType::new(IoType::Replication); let mut ents = Vec::with_capacity(std::cmp::min((high - low) as usize, MAX_INIT_ENTRY_COUNT)); let res = self.raft_engine.fetch_entries_to( diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 7c71dc3825e..f28c4170459 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -28,6 +28,7 @@ use engine_traits::{ RaftLogBatch, Range, WriteBatch, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use fail::fail_point; +use file_system::{IoType, WithIoType}; use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; @@ -1546,6 +1547,7 @@ impl RaftBatchSystem { let raft_clone = engines.raft.clone(); let router_clone = self.router(); worker.spawn_interval_task(cfg.value().raft_engine_purge_interval.0, move || { + let _guard = WithIoType::new(IoType::RewriteLog); match raft_clone.manual_purge() { Ok(regions) => { for region_id in regions { diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 2d2ae7bd398..10da6ec9c74 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -22,8 +22,8 @@ use engine_rocks::{ FlowInfo, RocksEngine, RocksStatistics, }; use engine_traits::{ - CachedTablet, CfOptionsExt, FlowControlFactorsExt, KvEngine, RaftEngine, StatisticsReporter, - TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, + data_cf_offset, CachedTablet, CfOptionsExt, FlowControlFactorsExt, KvEngine, RaftEngine, + StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, }; use error_code::ErrorCodeExt; use file_system::{get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor}; @@ -504,25 +504,24 @@ impl EnginesResourceInfo { _now: Instant, cached_latest_tablets: &mut HashMap>, ) { - let mut normalized_pending_bytes = 0; + let mut compaction_pending_bytes = [0; DATA_CFS.len()]; + let mut soft_pending_compaction_bytes_limit = [0; DATA_CFS.len()]; - fn fetch_engine_cf(engine: &RocksEngine, cf: &str, normalized_pending_bytes: &mut u32) { + let mut fetch_engine_cf = |engine: &RocksEngine, cf: &str| { if let Ok(cf_opts) = engine.get_options_cf(cf) { if let Ok(Some(b)) = engine.get_cf_pending_compaction_bytes(cf) { - if cf_opts.get_soft_pending_compaction_bytes_limit() > 0 { - *normalized_pending_bytes = std::cmp::max( - *normalized_pending_bytes, - (b * EnginesResourceInfo::SCALE_FACTOR - / cf_opts.get_soft_pending_compaction_bytes_limit()) - as u32, - ); - } + let offset = data_cf_offset(cf); + compaction_pending_bytes[offset] += b; + soft_pending_compaction_bytes_limit[offset] = cmp::max( + cf_opts.get_soft_pending_compaction_bytes_limit(), + soft_pending_compaction_bytes_limit[offset], + ); } } - } + }; if let Some(raft_engine) = &self.raft_engine { - fetch_engine_cf(raft_engine, CF_DEFAULT, &mut normalized_pending_bytes); + fetch_engine_cf(raft_engine, CF_DEFAULT); } self.tablet_registry @@ -543,13 +542,26 @@ impl EnginesResourceInfo { for (_, cache) in cached_latest_tablets.iter_mut() { let Some(tablet) = cache.latest() else { continue }; for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { - fetch_engine_cf(tablet, cf, &mut normalized_pending_bytes); + fetch_engine_cf(tablet, cf); } } // Clear ensures that these tablets are not hold forever. cached_latest_tablets.clear(); + let mut normalized_pending_bytes = 0; + for (pending, limit) in compaction_pending_bytes + .iter() + .zip(soft_pending_compaction_bytes_limit) + { + if limit > 0 { + normalized_pending_bytes = cmp::max( + normalized_pending_bytes, + (*pending * EnginesResourceInfo::SCALE_FACTOR / limit) as u32, + ) + } + } + let (_, avg) = self .normalized_pending_bytes_collector .add(normalized_pending_bytes); diff --git a/src/storage/config.rs b/src/storage/config.rs index d301849528d..63250176694 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -394,6 +394,7 @@ impl IoRateLimitConfig { limiter.set_io_priority(IoType::Gc, self.gc_priority); limiter.set_io_priority(IoType::Import, self.import_priority); limiter.set_io_priority(IoType::Export, self.export_priority); + limiter.set_io_priority(IoType::RewriteLog, self.compaction_priority); limiter.set_io_priority(IoType::Other, self.other_priority); limiter } From a4a287980c2b9b3cbcd6ef05a7ec5ac0e655146d Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 18 Apr 2023 17:33:20 +0800 Subject: [PATCH 0642/1149] encryption: fix key collision handling (#14586) close tikv/tikv#14585 Fix a bug that a newly generated encryption key might erase an old key and make data unreadable Signed-off-by: tabokie --- components/encryption/src/manager/mod.rs | 44 +++++++++++++++++------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index 0f3233d7819..a367ad44df2 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -1,6 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + collections::hash_map::Entry, io::{Error as IoError, ErrorKind, Result as IoResult}, path::{Path, PathBuf}, sync::{ @@ -287,11 +288,15 @@ impl Dicts { Ok(Some(())) } - fn rotate_key(&self, key_id: u64, key: DataKey, master_key: &dyn Backend) -> Result<()> { + fn rotate_key(&self, key_id: u64, key: DataKey, master_key: &dyn Backend) -> Result { info!("encryption: rotate data key."; "key_id" => key_id); { let mut key_dict = self.key_dict.lock().unwrap(); - key_dict.keys.insert(key_id, key); + match key_dict.keys.entry(key_id) { + // key id collides + Entry::Occupied(_) => return Ok(false), + Entry::Vacant(e) => e.insert(key), + }; key_dict.current_key_id = key_id; }; @@ -299,7 +304,7 @@ impl Dicts { self.save_key_dict(master_key)?; // Update current data key id. self.current_key_id.store(key_id, Ordering::SeqCst); - Ok(()) + Ok(true) } fn maybe_rotate_data_key( @@ -337,15 +342,30 @@ impl Dicts { let duration = now.duration_since(UNIX_EPOCH).unwrap(); let creation_time = duration.as_secs(); - let (key_id, key) = generate_data_key(method); - let data_key = DataKey { - key, - method, - creation_time, - was_exposed: false, - ..Default::default() - }; - self.rotate_key(key_id, data_key, master_key) + // Generate new data key. + let generate_limit = 10; + for _ in 0..generate_limit { + let (key_id, key) = generate_data_key(method); + if key_id == 0 { + // 0 is invalid + continue; + } + let data_key = DataKey { + key, + method, + creation_time, + was_exposed: false, + ..Default::default() + }; + + let ok = self.rotate_key(key_id, data_key, master_key)?; + if !ok { + // key id collides, retry + continue; + } + return Ok(()); + } + Err(box_err!("key id collides {} times!", generate_limit)) } } From a473cb3e5ae3cd5a606db82e53b67b636eecfbf5 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Wed, 19 Apr 2023 11:15:20 +0800 Subject: [PATCH 0643/1149] sched_pool: auto switch between the single queue and priority queue (#14582) ref tikv/tikv#14353, ref tikv/tikv#14375 Addresses performance regression in the priority queue caused by design. The scheduler worker will now automatically switch between the `single-queue pool` and `priority-queue pool` based on the resource group settings. - Once the group is reserved, use the `single-queue pool` - Once the group is customized, use the `priority-queue pool` Signed-off-by: nolouch --- .../resource_control/src/resource_group.rs | 26 ++- metrics/grafana/tikv_fast_tune.json | 2 +- src/storage/mod.rs | 34 +++ src/storage/mvcc/txn.rs | 4 +- src/storage/txn/sched_pool.rs | 215 ++++++++++++------ tests/failpoints/cases/test_storage.rs | 110 ++++++++- 6 files changed, 311 insertions(+), 80 deletions(-) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 7435fc17d01..9a7a2e7b3cc 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -4,7 +4,7 @@ use std::{ cell::Cell, cmp::{max, min}, sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicU64, Ordering}, Arc, Mutex, }, time::Duration, @@ -169,6 +169,8 @@ pub struct ResourceController { last_min_vt: AtomicU64, // the last time min vt is overflow last_rest_vt_time: Cell, + // whether the settings is customized by user + customized: AtomicBool, } // we are ensure to visit the `last_rest_vt_time` by only 1 thread so it's @@ -185,6 +187,7 @@ impl ResourceController { last_min_vt: AtomicU64::new(0), max_ru_quota: Mutex::new(DEFAULT_MAX_RU_QUOTA), last_rest_vt_time: Cell::new(Instant::now_coarse()), + customized: AtomicBool::new(false), }; // add the "default" resource group controller.add_resource_group( @@ -244,6 +247,16 @@ impl ResourceController { // maybe update existed group self.resource_consumptions.write().insert(name, group); + self.check_customized(); + } + + fn check_customized(&self) { + let groups = self.resource_consumptions.read(); + if groups.len() == 1 && groups.get(DEFAULT_RESOURCE_GROUP_NAME.as_bytes()).is_some() { + self.customized.store(false, Ordering::Release); + return; + } + self.customized.store(true, Ordering::Release); } // we calculate the weight of each resource group based on the currently maximum @@ -268,9 +281,15 @@ impl ResourceController { 0, MEDIUM_PRIORITY, ); + self.check_customized(); return; } self.resource_consumptions.write().remove(name); + self.check_customized(); + } + + pub fn is_customized(&self) -> bool { + self.customized.load(Ordering::Acquire) } #[inline] @@ -625,7 +644,8 @@ pub(crate) mod tests { let resource_manager = ResourceGroupManager::default(); let resource_ctl = resource_manager.derive_controller("test_read".into(), true); let resource_ctl_write = resource_manager.derive_controller("test_write".into(), false); - + assert_eq!(resource_ctl.is_customized(), false); + assert_eq!(resource_ctl_write.is_customized(), false); let group1 = new_resource_group_ru("test1".into(), 5000, 0); resource_manager.add_resource_group(group1); assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); @@ -633,6 +653,8 @@ pub(crate) mod tests { resource_ctl_write.resource_group("test1".as_bytes()).weight, 20 ); + assert_eq!(resource_ctl.is_customized(), true); + assert_eq!(resource_ctl_write.is_customized(), true); // add a resource group with big ru let group1 = new_resource_group_ru("test2".into(), 50000, 0); diff --git a/metrics/grafana/tikv_fast_tune.json b/metrics/grafana/tikv_fast_tune.json index b096bb418fe..85e9d5c7f02 100644 --- a/metrics/grafana/tikv_fast_tune.json +++ b/metrics/grafana/tikv_fast_tune.json @@ -2712,7 +2712,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker-pool\"}[1m]))", + "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker-.*\"}[1m]))", "format": "time_series", "hide": false, "intervalFactor": 1, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 11740bcc2bf..faacc4cf4cb 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3242,6 +3242,40 @@ impl TestStorageBuilder { Some(Arc::new(ResourceController::new("test".to_owned(), false))), ) } + + pub fn build_for_resource_controller( + self, + resource_controller: Arc, + ) -> Result, L, F>> { + let engine = TxnTestEngine { + engine: self.engine, + txn_ext: Arc::new(TxnExt::default()), + }; + let read_pool = build_read_pool_for_test( + &crate::config::StorageReadPoolConfig::default_for_test(), + engine.clone(), + ); + + Storage::from_engine( + engine, + &self.config, + ReadPool::from(read_pool).handle(), + self.lock_mgr, + ConcurrencyManager::new(1.into()), + DynamicConfigs { + pipelined_pessimistic_lock: self.pipelined_pessimistic_lock, + in_memory_pessimistic_lock: self.in_memory_pessimistic_lock, + wake_up_delay_duration_ms: self.wake_up_delay_duration_ms, + }, + Arc::new(FlowController::Singleton(EngineFlowController::empty())), + DummyReporter, + ResourceTagFactory::new_for_test(), + Arc::new(QuotaLimiter::default()), + latest_feature_gate(), + None, + Some(resource_controller), + ) + } } pub trait ResponseBatchConsumer: Send { diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index d5e55e251ae..0eaca54f226 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -9,7 +9,7 @@ use kvproto::kvrpcpb::LockInfo; use txn_types::{Key, Lock, PessimisticLock, TimeStamp, Value}; use super::metrics::{GC_DELETE_VERSIONS_HISTOGRAM, MVCC_VERSIONS_HISTOGRAM}; -use crate::storage::{kv::Modify, mvcc::PessimisticLockNotFoundReason}; +use crate::storage::kv::Modify; pub const MAX_TXN_WRITE_SIZE: usize = 32 * 1024; @@ -246,7 +246,7 @@ pub(crate) fn make_txn_error( ) -> crate::storage::mvcc::ErrorInner { use kvproto::kvrpcpb::WriteConflictReason; - use crate::storage::mvcc::ErrorInner; + use crate::storage::mvcc::{ErrorInner, PessimisticLockNotFoundReason}; if let Some(s) = s { match s.to_ascii_lowercase().as_str() { "keyislocked" => { diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 49539d51d8c..4036de7a8b2 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -55,17 +55,96 @@ impl PoolTicker for SchedTicker { } #[derive(Clone)] -pub enum SchedPool { +pub enum QueueType { // separated thread pools for different priority commands - Vanilla { - high_worker_pool: FuturePool, - worker_pool: FuturePool, - }, - // one priority based thread pool to handle all commands - Priority { - worker_pool: FuturePool, - resource_ctl: Arc, - }, + Vanilla, + // automatically switch between the `single-queue pool` and `priority-queue pool` based on the + // resource group settings, only used when the resource control feature is enabled. + Dynamic, +} + +#[derive(Clone)] +struct VanillaQueue { + high_worker_pool: FuturePool, + worker_pool: FuturePool, +} + +impl VanillaQueue { + fn spawn( + &self, + priority_level: CommandPri, + f: impl futures::Future + Send + 'static, + ) -> Result<(), Full> { + if priority_level == CommandPri::High { + self.high_worker_pool.spawn(f) + } else { + self.worker_pool.spawn(f) + } + } + + fn scale_pool_size(&self, pool_size: usize) { + self.high_worker_pool + .scale_pool_size(std::cmp::max(1, pool_size / 2)); + self.worker_pool.scale_pool_size(pool_size); + } + + fn get_pool_size(&self, priority_level: CommandPri) -> usize { + if priority_level == CommandPri::High { + self.high_worker_pool.get_pool_size() + } else { + self.worker_pool.get_pool_size() + } + } +} + +#[derive(Clone)] +struct PriorityQueue { + worker_pool: FuturePool, + resource_ctl: Arc, +} + +impl PriorityQueue { + fn spawn( + &self, + group_name: &str, + priority_level: CommandPri, + f: impl futures::Future + Send + 'static, + ) -> Result<(), Full> { + let fixed_level = match priority_level { + CommandPri::High => Some(0), + CommandPri::Normal => None, + CommandPri::Low => Some(2), + }; + // TODO: maybe use a better way to generate task_id + let task_id = rand::random::(); + let mut extras = Extras::new_multilevel(task_id, fixed_level); + extras.set_metadata(group_name.as_bytes().to_owned()); + self.worker_pool.spawn_with_extras( + ControlledFuture::new( + async move { + f.await; + }, + self.resource_ctl.clone(), + group_name.as_bytes().to_owned(), + ), + extras, + ) + } + + fn scale_pool_size(&self, pool_size: usize) { + self.worker_pool.scale_pool_size(pool_size); + } + + fn get_pool_size(&self) -> usize { + self.worker_pool.get_pool_size() + } +} + +#[derive(Clone)] +pub struct SchedPool { + vanilla: VanillaQueue, + priority: Option, + queue_type: QueueType, } impl SchedPool { @@ -74,7 +153,7 @@ impl SchedPool { pool_size: usize, reporter: R, feature_gate: FeatureGate, - _resource_ctl: Option>, + resource_ctl: Option>, ) -> Self { let builder = |pool_size: usize, name_prefix: &str| { let engine = Arc::new(Mutex::new(engine.clone())); @@ -102,94 +181,82 @@ impl SchedPool { tls_flush(&reporter); }) }; - // FIXME: for performance issue, disable priority pool temporarily - // if let Some(ref r) = resource_ctl { - // SchedPool::Priority { - // worker_pool: builder(pool_size, "sched-worker-pool") - // .build_priority_future_pool(r.clone()), - // resource_ctl: r.clone(), - // } - // } else { - SchedPool::Vanilla { + let vanilla = VanillaQueue { worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), - high_worker_pool: builder(std::cmp::max(1, pool_size / 2), "sched-high-pri-pool") + high_worker_pool: builder(std::cmp::max(1, pool_size / 2), "sched-worker-high") .build_future_pool(), + }; + let priority = resource_ctl.as_ref().map(|r| PriorityQueue { + worker_pool: builder(pool_size, "sched-worker-priority") + .build_priority_future_pool(r.clone()), + resource_ctl: r.clone(), + }); + let queue_type = if resource_ctl.is_some() { + QueueType::Dynamic + } else { + QueueType::Vanilla + }; + + SchedPool { + vanilla, + priority, + queue_type, } - // } } pub fn spawn( &self, group_name: &str, - priority: CommandPri, + priority_level: CommandPri, f: impl futures::Future + Send + 'static, ) -> Result<(), Full> { - match self { - SchedPool::Vanilla { - high_worker_pool, - worker_pool, - } => { - if priority == CommandPri::High { - high_worker_pool.spawn(f) + match self.queue_type { + QueueType::Vanilla => self.vanilla.spawn(priority_level, f), + QueueType::Dynamic => { + if self.can_use_priority() { + fail_point!("priority_pool_task"); + self.priority + .as_ref() + .unwrap() + .spawn(group_name, priority_level, f) } else { - worker_pool.spawn(f) + fail_point!("single_queue_pool_task"); + self.vanilla.spawn(priority_level, f) } } - SchedPool::Priority { - worker_pool, - resource_ctl, - } => { - let fixed_level = match priority { - CommandPri::High => Some(0), - CommandPri::Normal => None, - CommandPri::Low => Some(2), - }; - // TODO: maybe use a better way to generate task_id - let task_id = rand::random::(); - let mut extras = Extras::new_multilevel(task_id, fixed_level); - extras.set_metadata(group_name.as_bytes().to_owned()); - worker_pool.spawn_with_extras( - ControlledFuture::new( - async move { - f.await; - }, - resource_ctl.clone(), - group_name.as_bytes().to_owned(), - ), - extras, - ) - } } } pub fn scale_pool_size(&self, pool_size: usize) { - match self { - SchedPool::Vanilla { - high_worker_pool, - worker_pool, - } => { - high_worker_pool.scale_pool_size(std::cmp::max(1, pool_size / 2)); - worker_pool.scale_pool_size(pool_size); + match self.queue_type { + QueueType::Vanilla => { + self.vanilla.scale_pool_size(pool_size); } - SchedPool::Priority { worker_pool, .. } => { - worker_pool.scale_pool_size(pool_size); + QueueType::Dynamic => { + let priority = self.priority.as_ref().unwrap(); + priority.scale_pool_size(pool_size); + self.vanilla.scale_pool_size(pool_size); } } } - pub fn get_pool_size(&self, priority: CommandPri) -> usize { - match self { - SchedPool::Vanilla { - high_worker_pool, - worker_pool, - } => { - if priority == CommandPri::High { - high_worker_pool.get_pool_size() + fn can_use_priority(&self) -> bool { + match self.queue_type { + QueueType::Vanilla => false, + QueueType::Dynamic => self.priority.as_ref().unwrap().resource_ctl.is_customized(), + } + } + + pub fn get_pool_size(&self, priority_level: CommandPri) -> usize { + match self.queue_type { + QueueType::Vanilla => self.vanilla.get_pool_size(priority_level), + QueueType::Dynamic => { + if self.can_use_priority() { + self.priority.as_ref().unwrap().get_pool_size() } else { - worker_pool.get_pool_size() + self.vanilla.get_pool_size(priority_level) } } - SchedPool::Priority { worker_pool, .. } => worker_pool.get_pool_size(), } } } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index ba6339b666d..dd57f28ab94 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -22,6 +22,7 @@ use kvproto::{ }, tikvpb::TikvClient, }; +use resource_control::ResourceGroupManager; use test_raftstore::*; use tikv::{ config::{ConfigController, Module}, @@ -279,7 +280,6 @@ fn test_scale_scheduler_pool() { ctx.set_region_id(region.id); ctx.set_region_epoch(region.get_region_epoch().clone()); ctx.set_peer(cluster.leader_of_region(region.id).unwrap()); - let do_prewrite = |key: &[u8], val: &[u8]| { // prewrite let (prewrite_tx, prewrite_rx) = channel(); @@ -332,6 +332,114 @@ fn test_scale_scheduler_pool() { fail::remove(snapshot_fp); } +#[test] +fn test_scheduler_pool_auto_switch_for_resource_ctl() { + let mut cluster = new_server_cluster(0, 1); + cluster.run(); + + let engine = cluster + .sim + .read() + .unwrap() + .storages + .get(&1) + .unwrap() + .clone(); + let resource_manager = ResourceGroupManager::default(); + let resource_ctl = resource_manager.derive_controller("test".to_string(), true); + + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) + .config(cluster.cfg.tikv.storage.clone()) + .build_for_resource_controller(resource_ctl) + .unwrap(); + + let region = cluster.get_region(b"k1"); + let mut ctx = Context::default(); + ctx.set_region_id(region.id); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(cluster.leader_of_region(region.id).unwrap()); + + let do_prewrite = |key: &[u8], val: &[u8]| { + // prewrite + let (prewrite_tx, prewrite_rx) = channel(); + storage + .sched_txn_command( + commands::Prewrite::new( + vec![Mutation::make_put(Key::from_raw(key), val.to_vec())], + key.to_vec(), + 10.into(), + 100, + false, + 2, + TimeStamp::default(), + TimeStamp::default(), + None, + false, + AssertionLevel::Off, + ctx.clone(), + ), + Box::new(move |res: storage::Result<_>| { + let _ = prewrite_tx.send(res); + }), + ) + .unwrap(); + prewrite_rx.recv_timeout(Duration::from_secs(2)) + }; + + let (sender, receiver) = channel(); + let priority_queue_sender = Mutex::new(sender.clone()); + let single_queue_sender = Mutex::new(sender); + fail::cfg_callback("priority_pool_task", move || { + let sender = priority_queue_sender.lock().unwrap(); + sender.send("priority_queue").unwrap(); + }) + .unwrap(); + fail::cfg_callback("single_queue_pool_task", move || { + let sender = single_queue_sender.lock().unwrap(); + sender.send("single_queue").unwrap(); + }) + .unwrap(); + + // Default is use single queue + assert_eq!(do_prewrite(b"k1", b"v1").is_ok(), true); + assert_eq!( + receiver.recv_timeout(Duration::from_millis(500)).unwrap(), + "single_queue" + ); + + // Add group use priority queue + use kvproto::resource_manager::{GroupMode, GroupRequestUnitSettings, ResourceGroup}; + let mut group = ResourceGroup::new(); + group.set_name("rg1".to_string()); + group.set_mode(GroupMode::RuMode); + let mut ru_setting = GroupRequestUnitSettings::new(); + ru_setting.mut_r_u().mut_settings().set_fill_rate(100000); + group.set_r_u_settings(ru_setting); + resource_manager.add_resource_group(group); + thread::sleep(Duration::from_millis(200)); + assert_eq!(do_prewrite(b"k2", b"v2").is_ok(), true); + assert_eq!( + receiver.recv_timeout(Duration::from_millis(500)).unwrap(), + "priority_queue" + ); + + // Delete group use single queue + resource_manager.remove_resource_group("rg1"); + thread::sleep(Duration::from_millis(200)); + assert_eq!(do_prewrite(b"k3", b"v3").is_ok(), true); + assert_eq!( + receiver.recv_timeout(Duration::from_millis(500)).unwrap(), + "single_queue" + ); + + // Scale pool size + let scheduler = storage.get_scheduler(); + let pool = scheduler.get_sched_pool(); + assert_eq!(pool.get_pool_size(CommandPri::Normal), 1); + pool.scale_pool_size(2); + assert_eq!(pool.get_pool_size(CommandPri::Normal), 2); +} + #[test] fn test_pipelined_pessimistic_lock() { let rockskv_async_write_fp = "rockskv_async_write"; From 7d7786356a2e3cae03793542a6a3772fa8a8a348 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 18 Apr 2023 23:15:19 -0700 Subject: [PATCH 0644/1149] fix engine type config on existing cluster (#14603) ref tikv/tikv#12842 Adjust engine type config before running tikv server. Otherwise it will be too late. Fix some other compile warnings. Signed-off-by: qi.xu Signed-off-by: tonyxuqqi Co-authored-by: qi.xu --- cmd/tikv-server/src/main.rs | 7 +++++++ src/storage/config.rs | 10 +++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 1d846d72bdb..e64afdf1868 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -210,6 +210,13 @@ fn main() { process::exit(0); } + // engine config needs to be validated + // so that it can adjust the engine type before too late + if let Err(e) = config.storage.validate_engine_type() { + println!("invalid storage.engine configuration: {}", e); + process::exit(1) + } + match config.storage.engine { EngineType::RaftKv => server::server::run_tikv(config), EngineType::RaftKv2 => server::server2::run_tikv(config), diff --git a/src/storage/config.rs b/src/storage/config.rs index 63250176694..9fc052e0ee0 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -113,7 +113,11 @@ impl Default for Config { } impl Config { - fn validate_engine_type(&mut self) -> Result<(), Box> { + pub fn validate_engine_type(&mut self) -> Result<(), Box> { + if self.data_dir != DEFAULT_DATA_DIR { + self.data_dir = config::canonicalize_path(&self.data_dir)? + } + let v1_kv_db_path = config::canonicalize_sub_path(&self.data_dir, DEFAULT_ROCKSDB_SUB_DIR).unwrap(); let v2_tablet_path = @@ -144,10 +148,6 @@ impl Config { } pub fn validate(&mut self) -> Result<(), Box> { - if self.data_dir != DEFAULT_DATA_DIR { - self.data_dir = config::canonicalize_path(&self.data_dir)? - } - self.validate_engine_type()?; if self.scheduler_concurrency > MAX_SCHED_CONCURRENCY { From dd8322ce7d1a6119780ca2f67e84ede0db9040fd Mon Sep 17 00:00:00 2001 From: 3pointer Date: Wed, 19 Apr 2023 15:21:19 +0800 Subject: [PATCH 0645/1149] raftstore-v2: adapt catch up new peer for pending peers after split (#14549) close tikv/tikv#14572 Signed-off-by: 3pointer Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/operation/life.rs | 65 +++++++++++++++++- components/raftstore-v2/src/operation/mod.rs | 2 +- components/raftstore-v2/src/operation/pd.rs | 27 ++++++-- .../raftstore-v2/src/operation/ready/mod.rs | 9 +++ components/raftstore-v2/src/raft/peer.rs | 68 +++++++++++++++++-- components/test_pd_client/src/pd.rs | 1 + .../raftstore/test_split_region.rs | 34 ++++++++++ 7 files changed, 194 insertions(+), 12 deletions(-) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 7c7d1f37275..65e7ab7906a 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -36,9 +36,12 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage}, }; -use raftstore::store::{util, Transport, WriteTask}; +use raftstore::store::{metrics::RAFT_PEER_PENDING_DURATION, util, Transport, WriteTask}; use slog::{debug, error, info, warn}; -use tikv_util::store::find_peer; +use tikv_util::{ + store::find_peer, + time::{duration_to_sec, Instant}, +}; use super::command::SplitInit; use crate::{ @@ -107,6 +110,64 @@ impl DestroyProgress { } } +#[derive(Default)] +pub struct AbnormalPeerContext { + /// Record the instants of peers being added into the configuration. + /// Remove them after they are not pending any more. + /// (u64, Instant) represents (peer id, time when peer starts pending) + pending_peers: Vec<(u64, Instant)>, + /// A inaccurate cache about which peer is marked as down. + down_peers: Vec, +} + +impl AbnormalPeerContext { + #[inline] + pub fn is_empty(&self) -> bool { + self.pending_peers.is_empty() && self.down_peers.is_empty() + } + + #[inline] + pub fn reset(&mut self) { + self.pending_peers.clear(); + self.down_peers.clear(); + } + + #[inline] + pub fn down_peers(&self) -> &[u64] { + &self.down_peers + } + + #[inline] + pub fn down_peers_mut(&mut self) -> &mut Vec { + &mut self.down_peers + } + + #[inline] + pub fn pending_peers(&self) -> &[(u64, Instant)] { + &self.pending_peers + } + + #[inline] + pub fn pending_peers_mut(&mut self) -> &mut Vec<(u64, Instant)> { + &mut self.pending_peers + } + + #[inline] + pub fn retain_pending_peers(&mut self, f: impl FnMut(&mut (u64, Instant)) -> bool) -> bool { + let len = self.pending_peers.len(); + self.pending_peers.retain_mut(f); + len != self.pending_peers.len() + } + + #[inline] + pub fn flush_metrics(&self) { + let _ = self.pending_peers.iter().map(|(_, pending_after)| { + let elapsed = duration_to_sec(pending_after.saturating_elapsed()); + RAFT_PEER_PENDING_DURATION.observe(elapsed); + }); + } +} + #[derive(Default)] pub struct GcPeerContext { confirmed_ids: Vec, diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 3511a432c15..f5eb4ebdb6f 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -14,7 +14,7 @@ pub use command::{ SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; -pub use life::{DestroyProgress, GcPeerContext}; +pub use life::{AbnormalPeerContext, DestroyProgress, GcPeerContext}; pub use ready::{ write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, SnapState, StateStorage, }; diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 4bb6a06c162..f45cae390da 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -8,8 +8,8 @@ use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; use kvproto::{metapb, pdpb}; use raftstore::store::{metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, Transport}; -use slog::error; -use tikv_util::slog_panic; +use slog::{debug, error}; +use tikv_util::{slog_panic, time::Instant}; use crate::{ batch::StoreContext, @@ -122,7 +122,7 @@ impl Peer { } /// Collects all pending peers and update `peers_start_pending_time`. - fn collect_pending_peers(&self, ctx: &StoreContext) -> Vec { + fn collect_pending_peers(&mut self, ctx: &StoreContext) -> Vec { let mut pending_peers = Vec::with_capacity(self.region().get_peers().len()); let status = self.raft_group().status(); let truncated_idx = self @@ -135,9 +135,10 @@ impl Peer { return pending_peers; } - // TODO: update `peers_start_pending_time`. + self.abnormal_peer_context().flush_metrics(); let progresses = status.progress.unwrap().iter(); + let mut peers_start_pending_time = Vec::with_capacity(self.region().get_peers().len()); for (&id, progress) in progresses { if id == self.peer_id() { continue; @@ -156,6 +157,21 @@ impl Peer { if progress.matched < truncated_idx { if let Some(p) = self.peer_from_cache(id) { pending_peers.push(p); + if !self + .abnormal_peer_context() + .pending_peers() + .iter() + .any(|p| p.0 == id) + { + let now = Instant::now(); + peers_start_pending_time.push((id, now)); + debug!( + self.logger, + "peer start pending"; + "get_peer_id" => id, + "time" => ?now, + ); + } } else { if ctx.cfg.dev_assert { slog_panic!( @@ -172,6 +188,9 @@ impl Peer { } } } + self.abnormal_peer_context_mut() + .pending_peers_mut() + .append(&mut peers_start_pending_time); pending_peers } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index a9e72f02f8e..1222310d9a6 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -290,6 +290,7 @@ impl Peer { // TODO: drop all msg append when the peer is uninitialized and has conflict // ranges with other peers. let from_peer = msg.take_from_peer(); + let from_peer_id = from_peer.get_id(); if self.is_leader() && from_peer.get_id() != INVALID_ID { self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); } @@ -317,6 +318,14 @@ impl Peer { } } + // There are two different cases to check peers can be bring back. + // 1. If the peer is pending, then only AppendResponse can bring it back to up. + // 2. If the peer is down, then HeartbeatResponse and AppendResponse can bring + // it back to up. + if self.any_new_peer_catch_up(from_peer_id) { + self.region_heartbeat_pd(ctx) + } + self.set_has_ready(); } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 494ae183da6..d35dfe22184 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -20,20 +20,22 @@ use raftstore::{ coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, store::{ fsm::ApplyMetrics, + metrics::RAFT_PEER_PENDING_DURATION, util::{Lease, RegionReadProgress}, Config, EntryStorage, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, ReadProgress, TabletSnapManager, WriteTask, }, }; -use slog::{info, Logger}; -use tikv_util::slog_panic; +use slog::{debug, info, Logger}; +use tikv_util::{slog_panic, time::duration_to_sec}; use super::storage::Storage; use crate::{ fsm::ApplyScheduler, operation::{ - AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, GcPeerContext, - MergeContext, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, TxnContext, + AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, + GcPeerContext, MergeContext, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, + TxnContext, }, router::{ApplyTask, CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -115,6 +117,8 @@ pub struct Peer { pending_messages: Vec, gc_peer_context: GcPeerContext, + + abnormal_peer_context: AbnormalPeerContext, } impl Peer { @@ -199,6 +203,7 @@ impl Peer { ), pending_messages: vec![], gc_peer_context: GcPeerContext::default(), + abnormal_peer_context: AbnormalPeerContext::default(), }; // If this region has only one peer and I am the one, campaign directly. @@ -561,8 +566,9 @@ impl Peer { ) } - pub fn collect_down_peers(&self, max_duration: Duration) -> Vec { + pub fn collect_down_peers(&mut self, max_duration: Duration) -> Vec { let mut down_peers = Vec::new(); + let mut down_peer_ids = Vec::new(); let now = Instant::now(); for p in self.region().get_peers() { if p.get_id() == self.peer_id() { @@ -575,9 +581,11 @@ impl Peer { stats.set_peer(p.clone()); stats.set_down_seconds(elapsed.as_secs()); down_peers.push(stats); + down_peer_ids.push(p.get_id()); } } } + *self.abnormal_peer_context_mut().down_peers_mut() = down_peer_ids; // TODO: `refill_disk_full_peers` down_peers } @@ -873,4 +881,54 @@ impl Peer { Err(e) => slog_panic!(self.logger, "failed to load term"; "index" => idx, "err" => ?e), } } + + #[inline] + pub fn abnormal_peer_context_mut(&mut self) -> &mut AbnormalPeerContext { + &mut self.abnormal_peer_context + } + + #[inline] + pub fn abnormal_peer_context(&self) -> &AbnormalPeerContext { + &self.abnormal_peer_context + } + + pub fn any_new_peer_catch_up(&mut self, from_peer_id: u64) -> bool { + // no pending or down peers + if self.abnormal_peer_context.is_empty() { + return false; + } + if !self.is_leader() { + self.abnormal_peer_context.reset(); + return false; + } + + if self + .abnormal_peer_context + .down_peers() + .contains(&from_peer_id) + { + return true; + } + + let logger = self.logger.clone(); + self.abnormal_peer_context + .retain_pending_peers(|(peer_id, pending_after)| { + // TODO check wait data peers here + let truncated_idx = self.raft_group.store().entry_storage().truncated_index(); + if let Some(progress) = self.raft_group.raft.prs().get(*peer_id) { + if progress.matched >= truncated_idx { + let elapsed = duration_to_sec(pending_after.saturating_elapsed()); + RAFT_PEER_PENDING_DURATION.observe(elapsed); + debug!( + logger, + "peer has caught up logs"; + "from_peer_id" => %from_peer_id, + "takes" => %elapsed, + ); + return false; + } + } + true + }) + } } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index d3bbce685c0..1c2cc573eb9 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -439,6 +439,7 @@ struct PdCluster { // region id -> leader leaders: HashMap, down_peers: HashMap, + // peer id -> peer pending_peers: HashMap, is_bootstraped: bool, diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 2673a34b0d2..48b226ba40e 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1225,3 +1225,37 @@ fn test_gen_split_check_bucket_ranges() { // the bucket_ranges should be None to refresh the bucket cluster.send_half_split_region_message(®ion, None); } + +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_catch_up_peers_after_split() { + let mut cluster = new_cluster(0, 3); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run(); + + let left_key = b"k1"; + let right_key = b"k3"; + let split_key = b"k2"; + cluster.must_put(left_key, b"v1"); + cluster.must_put(right_key, b"v3"); + + // Left and right key must be in same region before split. + let region = pd_client.get_region(left_key).unwrap(); + let region2 = pd_client.get_region(right_key).unwrap(); + assert_eq!(region.get_id(), region2.get_id()); + + // Split with split_key, so left_key must in left, and right_key in right. + cluster.must_split(®ion, split_key); + + // Get new split region by right_key because default right_derive is false. + let right_region = pd_client.get_region(right_key).unwrap(); + + let pending_peers = pd_client.get_pending_peers(); + + // Ensure new split region has no pending peers. + for p in right_region.get_peers() { + assert!(!pending_peers.contains_key(&p.id)) + } +} From a4000f655fcfca9d2fbdf6932b46e3995c420bb9 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 19 Apr 2023 17:15:19 +0800 Subject: [PATCH 0646/1149] snapshot-v2: delete idle snapshot file (#14590) close tikv/tikv#14581 1. change `delete_snapshot` to delete some snapshot directory those has same region_id and to_peer_id 2. the delete_snapshot will be happened in peer destory or send_snapshot finished. Signed-off-by: bufferflies <1045931706@qq.com> --- components/file_system/src/lib.rs | 12 +++++- components/raftstore/src/store/snap.rs | 47 ++++++----------------- src/server/server.rs | 3 +- src/server/tablet_snap.rs | 12 +++++- tests/integrations/raftstore/test_snap.rs | 24 +++++++++++- 5 files changed, 57 insertions(+), 41 deletions(-) diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 413a4ef827e..a3701c6ecac 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -476,9 +476,14 @@ pub fn trash_dir_all(path: impl AsRef) -> io::Result<()> { /// to resume all those removal in the given directory. #[inline] pub fn clean_up_trash(path: impl AsRef) -> io::Result<()> { + clean_up_dir(path, TRASH_PREFIX) +} + +/// clean up all files starts with the given prefix in the given directory. +pub fn clean_up_dir(path: impl AsRef, prefix: &str) -> io::Result<()> { for e in read_dir(path)? { let e = e?; - if e.file_name().to_string_lossy().starts_with(TRASH_PREFIX) { + if e.file_name().to_string_lossy().starts_with(prefix) { remove_dir_all(e.path())?; } } @@ -680,5 +685,10 @@ mod tests { assert!(trash_sub_dir0.exists()); clean_up_trash(data_path).unwrap(); assert!(!trash_sub_dir0.exists()); + + create_dir_all(&sub_dir0).unwrap(); + assert!(sub_dir0.exists()); + clean_up_dir(data_path, "sub").unwrap(); + assert!(!sub_dir0.exists()); } } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 091609cf63e..68d3c7fba51 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1408,12 +1408,6 @@ impl SnapManager { } pub fn init(&self) -> io::Result<()> { - self.init_core()?; - self.tablet_snap_manager.init()?; - Ok(()) - } - - fn init_core(&self) -> io::Result<()> { let enc_enabled = self.core.encryption_key_manager.is_some(); info!( "Initializing SnapManager, encryption is enabled: {}", @@ -1632,7 +1626,7 @@ impl SnapManager { /// NOTE: don't call it in raftstore thread. pub fn get_total_snap_size(&self) -> Result { let size_v1 = self.core.get_total_snap_size()?; - let size_v2 = self.tablet_snap_manager.total_snap_size()?; + let size_v2 = self.tablet_snap_manager.total_snap_size().unwrap_or(0); Ok(size_v1 + size_v2) } @@ -1918,10 +1912,11 @@ impl SnapManagerBuilder { u64::MAX }; let path = path.into(); + assert!(!path.is_empty()); let mut path_v2 = path.clone(); - // the path for tablet snap manager, it will be empty if the cluster is not - // to receive snapshot from cluster of raftstore-v2 path_v2.push_str("_v2"); + let tablet_snap_mgr = TabletSnapManager::new(&path_v2).unwrap(); + let mut snapshot = SnapManager { core: SnapManagerCore { base: path, @@ -1936,7 +1931,7 @@ impl SnapManagerBuilder { stats: Default::default(), }, max_total_size: Arc::new(AtomicU64::new(max_total_size)), - tablet_snap_manager: TabletSnapManager::new_without_init(&path_v2), + tablet_snap_manager: tablet_snap_mgr, }; snapshot.set_max_per_file_size(self.max_per_file_size); // set actual max_per_file_size snapshot @@ -2007,7 +2002,6 @@ pub struct TabletSnapManager { impl TabletSnapManager { pub fn new>(path: T) -> io::Result { - // Initialize the directory if it doesn't exist. let path = path.into(); if !path.exists() { file_system::create_dir_all(&path)?; @@ -2018,6 +2012,7 @@ impl TabletSnapManager { format!("{} should be a directory", path.display()), )); } + file_system::clean_up_dir(&path, SNAP_GEN_PREFIX)?; file_system::clean_up_trash(&path)?; Ok(Self { base: path, @@ -2026,29 +2021,6 @@ impl TabletSnapManager { }) } - pub fn new_without_init>(path: T) -> Self { - let path = path.into(); - Self { - base: path, - receiving: Arc::default(), - stats: Arc::default(), - } - } - - pub fn init(&self) -> io::Result<()> { - if !self.base.exists() { - file_system::create_dir_all(&self.base)?; - } - if !self.base.is_dir() { - return Err(io::Error::new( - ErrorKind::Other, - format!("{} should be a directory", self.base.display()), - )); - } - file_system::clean_up_trash(&self.base)?; - Ok(()) - } - pub fn begin_snapshot(&self, key: TabletSnapKey, start: Instant, generate_duration_sec: u64) { let mut stat = SnapshotStat::default(); stat.set_generate_duration_sec(generate_duration_sec); @@ -3163,9 +3135,12 @@ pub mod tests { assert!(mgr.stats().stats.is_empty()); // filter out the total duration seconds less than one sencond. - mgr.begin_snapshot(key.clone(), start, 1); - mgr.finish_snapshot(key, start); + let path = mgr.tablet_gen_path(&key); + std::fs::create_dir_all(&path).unwrap(); + assert!(path.exists()); + mgr.delete_snapshot(&key); assert_eq!(mgr.stats().stats.len(), 0); + assert!(!path.exists()); } #[test] diff --git a/src/server/server.rs b/src/server/server.rs index 8e1a33880d6..6e294eda45e 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -579,6 +579,7 @@ mod tests { ); let addr = Arc::new(Mutex::new(None)); let (check_leader_scheduler, _) = tikv_util::worker::dummy_scheduler(); + let path = tempfile::TempDir::new().unwrap(); let mut server = Server::new( mock_store_id, &cfg, @@ -590,7 +591,7 @@ mod tests { quick_fail: Arc::clone(&quick_fail), addr: Arc::clone(&addr), }, - Either::Left(SnapManager::new("")), + Either::Left(SnapManager::new(path.path().to_str().unwrap())), gc_worker, check_leader_scheduler, env, diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index cbcd1a228f8..07a85109006 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -640,7 +640,10 @@ pub fn send_snap( ); let deregister = { let (mgr, key) = (mgr.clone(), key.clone()); - DeferContext::new(move || mgr.finish_snapshot(key.clone(), timer)) + DeferContext::new(move || { + mgr.finish_snapshot(key.clone(), timer); + mgr.delete_snapshot(&key); + }) }; let cb = ChannelBuilder::new(env) @@ -662,7 +665,6 @@ pub fn send_snap( send_timer.observe_duration(); drop(client); drop(deregister); - mgr.delete_snapshot(&key); match recv_result { None => Ok(SendStat { key, @@ -807,6 +809,12 @@ where let region_id = msg.get_region_id(); if self.sending_count.load(Ordering::SeqCst) >= self.cfg.concurrent_send_snap_limit { + let key = TabletSnapKey::from_region_snap( + msg.get_region_id(), + msg.get_to_peer().get_id(), + msg.get_message().get_snapshot(), + ); + self.snap_mgr.delete_snapshot(&key); warn!( "too many sending snapshot tasks, drop Send Snap[to: {}, snap: {:?}]", addr, msg diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index f3bd7583ab3..3171aaa1a9e 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -782,6 +782,7 @@ fn test_v1_receive_snap_from_v2() { let test_receive_snap = |key_num| { let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); + let mut cluster_v1_tikv = test_raftstore::new_server_cluster(1, 1); cluster_v1 .cfg @@ -791,8 +792,10 @@ fn test_v1_receive_snap_from_v2() { cluster_v1.run(); cluster_v2.run(); + cluster_v1_tikv.run(); let s1_addr = cluster_v1.get_addr(1); + let s2_addr = cluster_v1_tikv.get_addr(1); let region = cluster_v2.get_region(b""); let region_id = region.get_id(); let engine = cluster_v2.get_engine(1); @@ -810,10 +813,29 @@ fn test_v1_receive_snap_from_v2() { let limit = Limiter::new(f64::INFINITY); let env = Arc::new(Environment::new(1)); let _ = block_on(async { - send_snap_v2(env, snap_mgr, security_mgr, &cfg, &s1_addr, msg, limit) + send_snap_v2( + env.clone(), + snap_mgr.clone(), + security_mgr.clone(), + &cfg, + &s1_addr, + msg.clone(), + limit.clone(), + ) + .unwrap() + .await + }); + let send_result = block_on(async { + send_snap_v2(env, snap_mgr, security_mgr, &cfg, &s2_addr, msg, limit) .unwrap() .await }); + // snapshot should be rejected by cluster v1 tikv, and the snapshot should be + // deleted. + assert!(send_result.is_err()); + let dir = cluster_v2.get_snap_dir(1); + let read_dir = std::fs::read_dir(dir).unwrap(); + assert_eq!(0, read_dir.count()); // The snapshot has been received by cluster v1, so check it's completeness let snap_mgr = cluster_v1.get_snap_mgr(1); From fdc01d17d43e6ce6848f661cb55180c022ddfde6 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 19 Apr 2023 17:35:19 +0800 Subject: [PATCH 0647/1149] raftstore-v2: memtable pre-flush should be async (#14567) ref tikv/tikv#14566 memtable pre-flush should be async Signed-off-by: Spade A Co-authored-by: Xinye Tao Co-authored-by: Ti Chi Robot --- components/raftstore-v2/src/batch/store.rs | 35 ++---- components/raftstore-v2/src/lib.rs | 2 +- .../operation/command/admin/compact_log.rs | 24 ++-- .../src/operation/command/admin/mod.rs | 53 ++++++-- .../src/operation/command/admin/split.rs | 40 +++--- .../src/operation/command/write/ingest.rs | 8 +- .../raftstore-v2/src/operation/ready/mod.rs | 35 +++--- .../src/operation/ready/snapshot.rs | 6 +- components/raftstore-v2/src/worker/mod.rs | 3 +- .../src/worker/{tablet_gc.rs => tablet.rs} | 65 +++++++++- .../raftstore-v2/src/worker/tablet_flush.rs | 116 ------------------ components/txn_types/src/types.rs | 2 +- 12 files changed, 171 insertions(+), 218 deletions(-) rename components/raftstore-v2/src/worker/{tablet_gc.rs => tablet.rs} (85%) delete mode 100644 components/raftstore-v2/src/worker/tablet_flush.rs diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 14282cc09f9..1e72341d651 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -43,7 +43,7 @@ use tikv_util::{ sys::SysQuota, time::{duration_to_sec, Instant as TiInstant}, timer::SteadyTimer, - worker::{Builder as WorkerBuilder, LazyWorker, Scheduler, Worker}, + worker::{LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, }; @@ -54,7 +54,7 @@ use crate::{ operation::{SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{pd, tablet_flush, tablet_gc}, + worker::{pd, tablet}, Error, Result, }; @@ -472,9 +472,8 @@ where pub struct Schedulers { pub read: Scheduler>, pub pd: Scheduler, - pub tablet_gc: Scheduler>, + pub tablet: Scheduler>, pub write: WriteSenders, - pub tablet_flush: Scheduler, // Following is not maintained by raftstore itself. pub split_check: Scheduler, @@ -484,7 +483,7 @@ impl Schedulers { fn stop(&self) { self.read.stop(); self.pd.stop(); - self.tablet_gc.stop(); + self.tablet.stop(); self.split_check.stop(); } } @@ -495,10 +494,9 @@ struct Workers { /// Worker for fetching raft logs asynchronously async_read: Worker, pd: LazyWorker, - tablet_gc: Worker, + tablet: Worker, async_write: StoreWriters, purge: Option, - tablet_flush: Worker, // Following is not maintained by raftstore itself. background: Worker, @@ -506,16 +504,12 @@ struct Workers { impl Workers { fn new(background: Worker, pd: LazyWorker, purge: Option) -> Self { - let tablet_flush = WorkerBuilder::new("tablet-flush-worker") - .thread_count(2) - .create(); Self { async_read: Worker::new("async-read-worker"), pd, - tablet_gc: Worker::new("tablet-gc-worker"), + tablet: Worker::new("tablet-worker"), async_write: StoreWriters::new(None), purge, - tablet_flush, background, } } @@ -524,8 +518,7 @@ impl Workers { self.async_write.shutdown(); self.async_read.stop(); self.pd.stop(); - self.tablet_gc.stop(); - self.tablet_flush.stop(); + self.tablet.stop(); if let Some(w) = self.purge { w.stop(); } @@ -636,27 +629,21 @@ impl StoreSystem { ), ); - let tablet_gc_scheduler = workers.tablet_gc.start_with_timer( - "tablet-gc-worker", - tablet_gc::Runner::new( + let tablet_gc_scheduler = workers.tablet.start_with_timer( + "tablet-worker", + tablet::Runner::new( tablet_registry.clone(), sst_importer.clone(), self.logger.clone(), ), ); - let tablet_flush_scheduler = workers.tablet_flush.start( - "tablet-flush-worker", - tablet_flush::Runner::new(router.clone(), tablet_registry.clone(), self.logger.clone()), - ); - let schedulers = Schedulers { read: read_scheduler, pd: workers.pd.scheduler(), - tablet_gc: tablet_gc_scheduler, + tablet: tablet_gc_scheduler, write: workers.async_write.senders(), split_check: split_check_scheduler, - tablet_flush: tablet_flush_scheduler, }; let builder = StorePollerBuilder::new( diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 04745d01fbe..bcfaf383024 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -44,5 +44,5 @@ pub use operation::{write_initial_states, SimpleWriteBinary, SimpleWriteEncoder, pub use raftstore::{store::Config, Error, Result}; pub use worker::{ pd::{PdReporter, Task as PdTask}, - tablet_flush::Task as TabletFlushTask, + tablet::Task as TabletTask, }; diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 1cc9ccbb1c3..383b54aa3b4 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -34,7 +34,7 @@ use crate::{ operation::AdminCmdResult, raft::{Apply, Peer}, router::{CmdResChannel, PeerTick}, - worker::tablet_gc, + worker::tablet, }; #[derive(Debug)] @@ -44,7 +44,7 @@ pub struct CompactLogContext { last_applying_index: u64, /// Tombstone tablets can only be destroyed when the tablet that replaces it /// is persisted. This is a list of tablet index that awaits to be - /// persisted. When persisted_apply is advanced, we need to notify tablet_gc + /// persisted. When persisted_apply is advanced, we need to notify tablet /// worker to destroy them. tombstone_tablets_wait_index: Vec, } @@ -303,8 +303,8 @@ impl Peer { .push(new_tablet_index); let _ = ctx .schedulers - .tablet_gc - .schedule(tablet_gc::Task::prepare_destroy( + .tablet + .schedule(tablet::Task::prepare_destroy( old_tablet, self.region_id(), new_tablet_index, @@ -330,8 +330,8 @@ impl Peer { .push(new_tablet_index); let _ = ctx .schedulers - .tablet_gc - .schedule(tablet_gc::Task::prepare_destroy_path( + .tablet + .schedule(tablet::Task::prepare_destroy_path( old_tablet, self.region_id(), new_tablet_index, @@ -381,14 +381,14 @@ impl Peer { }; let region_id = self.region_id(); let applied_index = self.entry_storage().applied_index(); - let sched = ctx.schedulers.tablet_gc.clone(); - let _ = sched.schedule(tablet_gc::Task::prepare_destroy( + let sched = ctx.schedulers.tablet.clone(); + let _ = sched.schedule(tablet::Task::prepare_destroy( tablet, self.region_id(), applied_index, )); task.persisted_cbs.push(Box::new(move || { - let _ = sched.schedule(tablet_gc::Task::destroy(region_id, applied_index)); + let _ = sched.schedule(tablet::Task::destroy(region_id, applied_index)); })); } @@ -506,14 +506,14 @@ impl Peer { } } if self.remove_tombstone_tablets(new_persisted) { - let sched = store_ctx.schedulers.tablet_gc.clone(); + let sched = store_ctx.schedulers.tablet.clone(); if !task.has_snapshot { task.persisted_cbs.push(Box::new(move || { - let _ = sched.schedule(tablet_gc::Task::destroy(region_id, new_persisted)); + let _ = sched.schedule(tablet::Task::destroy(region_id, new_persisted)); })); } else { // In snapshot, the index is persisted, tablet can be destroyed directly. - let _ = sched.schedule(tablet_gc::Task::destroy(region_id, new_persisted)); + let _ = sched.schedule(tablet::Task::destroy(region_id, new_persisted)); } } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 28fceb2d95b..69c9b39aaa2 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -39,7 +39,11 @@ pub use split::{ use tikv_util::{box_err, log::SlogFormat}; use txn_types::WriteBatchFlags; -use crate::{batch::StoreContext, raft::Peer, router::CmdResChannel}; +use crate::{ + batch::StoreContext, + raft::Peer, + router::{CmdResChannel, PeerMsg, RaftRequest}, +}; #[derive(Debug)] pub enum AdminCmdResult { @@ -147,9 +151,9 @@ impl Peer { // the follower so that they can flush memtalbes in advance too. // // 2. When the task finishes, it will propose a batch split with - // `SPLIT_SECOND_PHASE` flag. + // `PRE_FLUSH_FINISHED` flag. if !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) - .contains(WriteBatchFlags::SPLIT_SECOND_PHASE) + .contains(WriteBatchFlags::PRE_FLUSH_FINISHED) { if self.tablet_being_flushed() { return; @@ -161,14 +165,42 @@ impl Peer { self.logger, "Schedule flush tablet"; ); - if let Err(e) = ctx.schedulers.tablet_flush.schedule( - crate::TabletFlushTask::TabletFlush { + + let mailbox = match ctx.router.mailbox(region_id) { + Some(mailbox) => mailbox, + None => { + // None means the node is shutdown concurrently and thus the + // mailboxes in router have been cleared + assert!( + ctx.router.is_shutdown(), + "{} router should have been closed", + SlogFormat(&self.logger) + ); + return; + } + }; + + let logger = self.logger.clone(); + let on_flush_finish = move || { + req.mut_header() + .set_flags(WriteBatchFlags::PRE_FLUSH_FINISHED.bits()); + if let Err(e) = mailbox + .try_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) + { + error!( + logger, + "send split request fail after pre-flush finished"; + "err" => ?e, + ); + } + }; + + if let Err(e) = + ctx.schedulers.tablet.schedule(crate::TabletTask::Flush { region_id, - req: Some(req), - is_leader: true, - ch: Some(ch), - }, - ) { + cb: Some(Box::new(on_flush_finish)), + }) + { error!( self.logger, "Fail to schedule flush task"; @@ -176,6 +208,7 @@ impl Peer { ) } + // Notify followers to flush their relevant memtables let peers = self.region().get_peers().to_vec(); for p in peers { if p == *self.peer() diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0b53476273f..4c6fdad3aa2 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -62,7 +62,7 @@ use crate::{ operation::{AdminCmdResult, SharedReadTablet}, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, - worker::tablet_gc, + worker::tablet, Error, }; @@ -132,8 +132,8 @@ pub fn report_split_init_finish( if let Err(e) = ctx .schedulers - .tablet_gc - .schedule(tablet_gc::Task::direct_destroy_path(temp_split_path( + .tablet + .schedule(tablet::Task::direct_destroy_path(temp_split_path( &ctx.tablet_registry, finish_region_id, ))) @@ -641,16 +641,13 @@ impl Peer { } }; let tablet_index = res.tablet_index; - let _ = store_ctx - .schedulers - .tablet_gc - .schedule(tablet_gc::Task::trim( - self.tablet().unwrap().clone(), - derived, - move || { - let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); - }, - )); + let _ = store_ctx.schedulers.tablet.schedule(tablet::Task::trim( + self.tablet().unwrap().clone(), + derived, + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, + )); let last_region_id = res.regions.last().unwrap().get_id(); let mut new_ids = HashSet::default(); @@ -771,16 +768,13 @@ impl Peer { if self.storage().has_dirty_data() { let tablet_index = self.storage().tablet_index(); if let Some(mailbox) = store_ctx.router.mailbox(region_id) { - let _ = store_ctx - .schedulers - .tablet_gc - .schedule(tablet_gc::Task::trim( - self.tablet().unwrap().clone(), - self.region(), - move || { - let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); - }, - )); + let _ = store_ctx.schedulers.tablet.schedule(tablet::Task::trim( + self.tablet().unwrap().clone(), + self.region(), + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, + )); } else { // None means the node is shutdown concurrently and thus the // mailboxes in router have been cleared diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 73459740393..bc15765437f 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -16,7 +16,7 @@ use crate::{ fsm::{ApplyResReporter, Store, StoreFsmDelegate}, raft::{Apply, Peer}, router::{PeerMsg, StoreTick}, - worker::tablet_gc, + worker::tablet, }; impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { @@ -53,7 +53,7 @@ impl Store { if let Err(TrySendError::Disconnected(msg)) = ctx.router.send(region_id, PeerMsg::CleanupImportSst(ssts.into())) && !ctx.router.is_shutdown() { let PeerMsg::CleanupImportSst(ssts) = msg else { unreachable!() }; - let _ = ctx.schedulers.tablet_gc.schedule(tablet_gc::Task::CleanupImportSst(ssts)); + let _ = ctx.schedulers.tablet.schedule(tablet::Task::CleanupImportSst(ssts)); } } @@ -75,8 +75,8 @@ impl Peer { } let _ = ctx .schedulers - .tablet_gc - .schedule(tablet_gc::Task::CleanupImportSst(stale_ssts.into())); + .tablet + .schedule(tablet::Task::CleanupImportSst(stale_ssts.into())); } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 1222310d9a6..9a29c705aff 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -56,7 +56,7 @@ use crate::{ fsm::{PeerFsmDelegate, Store}, raft::{Peer, Storage}, router::{PeerMsg, PeerTick}, - worker::tablet_gc, + worker::tablet, }; const PAUSE_FOR_RECOVERY_GAP: u64 = 128; @@ -119,16 +119,13 @@ impl Peer { let region_id = self.region_id(); let mailbox = store_ctx.router.mailbox(region_id).unwrap(); let tablet_index = self.storage().tablet_index(); - let _ = store_ctx - .schedulers - .tablet_gc - .schedule(tablet_gc::Task::trim( - self.tablet().unwrap().clone(), - self.region(), - move || { - let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); - }, - )); + let _ = store_ctx.schedulers.tablet.schedule(tablet::Task::trim( + self.tablet().unwrap().clone(), + self.region(), + move || { + let _ = mailbox.force_send(PeerMsg::TabletTrimmed { tablet_index }); + }, + )); } let entry_storage = self.storage().entry_storage(); let committed_index = entry_storage.commit_index(); @@ -219,15 +216,13 @@ impl Peer { if util::is_epoch_stale(region_epoch, self.region().get_region_epoch()) { return; } - let _ = - ctx.schedulers - .tablet_flush - .schedule(crate::TabletFlushTask::TabletFlush { - region_id: self.region().get_id(), - req: None, - is_leader: false, - ch: None, - }); + let _ = ctx + .schedulers + .tablet + .schedule(crate::worker::tablet::Task::Flush { + region_id: self.region().get_id(), + cb: None, + }); return; } ExtraMessageType::MsgWantRollbackMerge => { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 3db8590d7ed..12b4a97e710 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -49,7 +49,7 @@ use crate::{ operation::{command::temp_split_path, SharedReadTablet}, raft::{Apply, Peer, Storage}, router::ApplyTask, - worker::tablet_gc, + worker::tablet, Result, StoreContext, }; @@ -282,8 +282,8 @@ impl Peer { if self.remove_tombstone_tablets(snapshot_index) { let _ = ctx .schedulers - .tablet_gc - .schedule(tablet_gc::Task::destroy(region_id, snapshot_index)); + .tablet + .schedule(tablet::Task::destroy(region_id, snapshot_index)); } } } diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index 121c41906d7..2fa7255afd3 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,5 +1,4 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. pub mod pd; -pub mod tablet_flush; -pub mod tablet_gc; +pub mod tablet; diff --git a/components/raftstore-v2/src/worker/tablet_gc.rs b/components/raftstore-v2/src/worker/tablet.rs similarity index 85% rename from components/raftstore-v2/src/worker/tablet_gc.rs rename to components/raftstore-v2/src/worker/tablet.rs index 5799398c080..db09c4ba3be 100644 --- a/components/raftstore-v2/src/worker/tablet_gc.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -8,11 +8,12 @@ use std::{ }; use collections::HashMap; -use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry}; +use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, DATA_CFS}; use kvproto::{import_sstpb::SstMeta, metapb::Region}; use slog::{debug, error, info, warn, Logger}; use sst_importer::SstImporter; use tikv_util::{ + time::Instant, worker::{Runnable, RunnableWithTimer}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, @@ -41,6 +42,14 @@ pub enum Task { DirectDestroy { tablet: Either }, /// Cleanup ssts. CleanupImportSst(Box<[SstMeta]>), + /// Flush memtable before split + /// + /// cb is some iff the task is sent from leader, it is used to real propose + /// split when flush finishes + Flush { + region_id: u64, + cb: Option>, + }, } impl Display for Task { @@ -77,6 +86,17 @@ impl Display for Task { Task::CleanupImportSst(ssts) => { write!(f, "cleanup import ssts {:?}", ssts) } + Task::Flush { + region_id, + cb: on_flush_finish, + } => { + write!( + f, + "flush tablet for region_id {}, is leader {}", + region_id, + on_flush_finish.is_some() + ) + } } } } @@ -160,7 +180,7 @@ impl Runner { waiting_destroy_tasks: HashMap::default(), pending_destroy_tasks: Vec::new(), background_pool: YatpPoolBuilder::new(DefaultTicker::default()) - .name_prefix("tablet-gc-bg") + .name_prefix("tablet-bg") .thread_count( 0, DEFAULT_BACKGROUND_POOL_SIZE, @@ -311,6 +331,46 @@ impl Runner { } } } + + fn flush_tablet(&self, region_id: u64, cb: Option>) { + let Some(Some(tablet)) = self + .tablet_registry + .get(region_id) + .map(|mut cache| cache.latest().cloned()) else {return}; + + // The callback `cb` being some means it's the task sent from + // leader, we should sync flush memtables and call it after the flush complete + // where the split will be proposed again with extra flag. + if let Some(cb) = cb { + let logger = self.logger.clone(); + let now = Instant::now(); + self.background_pool + .spawn(async move { + // sync flush for leader to let the flush happend before later checkpoint. + tablet.flush_cfs(DATA_CFS, true).unwrap(); + let elapsed = now.saturating_elapsed(); + // to be removed after when it's stable + info!( + logger, + "flush memtable for leader"; + "region_id" => region_id, + "duration" => ?elapsed, + ); + + drop(tablet); + cb(); + }) + .unwrap(); + } else { + info!( + self.logger, + "flush memtable for follower"; + "region_id" => region_id, + ); + + tablet.flush_cfs(DATA_CFS, false).unwrap(); + } + } } impl Runnable for Runner @@ -338,6 +398,7 @@ where } => self.destroy(region_id, persisted_index), Task::DirectDestroy { tablet, .. } => self.direct_destroy(tablet), Task::CleanupImportSst(ssts) => self.cleanup_ssts(ssts), + Task::Flush { region_id, cb } => self.flush_tablet(region_id, cb), } } } diff --git a/components/raftstore-v2/src/worker/tablet_flush.rs b/components/raftstore-v2/src/worker/tablet_flush.rs deleted file mode 100644 index e7d2c534f80..00000000000 --- a/components/raftstore-v2/src/worker/tablet_flush.rs +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. - -use std::fmt::{Display, Formatter}; - -use engine_traits::{KvEngine, RaftEngine, TabletRegistry, DATA_CFS}; -use kvproto::raft_cmdpb::{AdminCmdType, RaftCmdRequest}; -use slog::{error, info, Logger}; -use tikv_util::{time::Instant, worker::Runnable}; -use txn_types::WriteBatchFlags; - -use crate::{ - router::{CmdResChannel, PeerMsg, RaftRequest}, - StoreRouter, -}; - -pub enum Task { - TabletFlush { - region_id: u64, - req: Option, - is_leader: bool, - ch: Option, - }, -} - -impl Display for Task { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Task::TabletFlush { region_id, .. } => { - write!(f, "Flush tablet before split for region {}", region_id) - } - } - } -} - -pub struct Runner { - router: StoreRouter, - tablet_registry: TabletRegistry, - logger: Logger, -} - -impl Runner { - pub fn new( - router: StoreRouter, - tablet_registry: TabletRegistry, - logger: Logger, - ) -> Self { - Self { - router, - tablet_registry, - logger, - } - } - - fn flush_tablet( - &mut self, - region_id: u64, - req: Option, - is_leader: bool, - ch: Option, - ) { - let Some(Some(tablet)) = self - .tablet_registry - .get(region_id) - .map(|mut cache| cache.latest().cloned()) else {return}; - let now = Instant::now(); - tablet.flush_cfs(DATA_CFS, true).unwrap(); - let elapsed = now.saturating_elapsed(); - // to be removed after when it's stable - info!( - self.logger, - "flush memtable time consumes"; - "region_id" => region_id, - "duration" => ?elapsed, - "is_leader" => is_leader, - ); - - if !is_leader { - return; - } - - let mut req = req.unwrap(); - assert!(req.get_admin_request().get_cmd_type() == AdminCmdType::BatchSplit); - req.mut_header() - .set_flags(WriteBatchFlags::SPLIT_SECOND_PHASE.bits()); - if let Err(e) = self.router.send( - region_id, - PeerMsg::AdminCommand(RaftRequest::new(req, ch.unwrap())), - ) { - error!( - self.logger, - "send split request fail in the second phase"; - "region_id" => region_id, - "err" => ?e, - ); - } - } -} - -impl Runnable for Runner -where - EK: KvEngine, - ER: RaftEngine, -{ - type Task = Task; - - fn run(&mut self, task: Self::Task) { - match task { - Task::TabletFlush { - region_id, - req, - is_leader, - ch, - } => self.flush_tablet(region_id, req, is_leader, ch), - } - } -} diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 23df1a89940..a83a68c7ba6 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -571,7 +571,7 @@ bitflags! { /// Indicates this request is a flashback transaction. const FLASHBACK = 0b00001000; /// Indicates the relevant tablet has been flushed, and we can propose split now. - const SPLIT_SECOND_PHASE = 0b00010000; + const PRE_FLUSH_FINISHED = 0b00010000; } } From 56f5d93e5b2654a55d437bef26aa0b7c367baf68 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 19 Apr 2023 22:55:20 +0800 Subject: [PATCH 0648/1149] raftstore-v2: add capture tests (#14587) ref tikv/tikv#14542 raftstore-v2: add capture tests Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- Cargo.lock | 1 + components/raftstore-v2/Cargo.toml | 1 + .../raftstore-v2/src/operation/command/mod.rs | 4 +- .../src/operation/query/capture.rs | 264 +++++++++++++++++- components/raftstore/src/coprocessor/mod.rs | 7 +- .../raftstore/src/store/simple_write.rs | 117 +++++++- components/server/src/server2.rs | 2 + src/config/mod.rs | 10 + 8 files changed, 396 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10d3a7f37eb..1f0011894b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4426,6 +4426,7 @@ dependencies = [ "collections", "concurrency_manager", "crossbeam", + "engine_rocks", "engine_test", "engine_traits", "error_code", diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 3dfeb512980..ad13ea5ab74 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -65,6 +65,7 @@ txn_types = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] +engine_rocks = { workspace = true } engine_test = { workspace = true } slog-global = { workspace = true } tempfile = "3.0" diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index ce4a415cf00..2f2df5a0333 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -99,8 +99,8 @@ pub(crate) fn parse_at( pub struct CommittedEntries { /// Entries need to be applied. Note some entries may not be included for /// flow control. - entry_and_proposals: Vec<(Entry, Vec)>, - committed_time: Instant, + pub entry_and_proposals: Vec<(Entry, Vec)>, + pub committed_time: Instant, } fn new_response(header: &RaftRequestHeader) -> RaftCmdResponse { diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 94b58f41809..5fdbde187e4 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -1,6 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{mem, sync::Arc}; +use std::sync::Arc; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; @@ -10,7 +10,7 @@ use raftstore::{ store::{ cmd_resp, fsm::{ - apply::{notify_stale_req_with_msg, ObserverType}, + apply::{notify_stale_req_with_msg, ObserverType, SHRINK_PENDING_CMD_QUEUE_CAP}, new_read_index_request, ChangeObserver, }, msg::ErrorCallback, @@ -152,10 +152,264 @@ impl Apply { } let region_id = self.region_id(); - let mut cmd_batch = CmdBatch::new(&self.observe().info, region_id); - let cmds = mem::take(&mut self.observe_mut().cmds); - cmd_batch.extend(&self.observe().info, region_id, cmds); + let observe = self.observe_mut(); + let mut cmd_batch = CmdBatch::new(&observe.info, region_id); + cmd_batch.extend(&observe.info, region_id, observe.cmds.drain(..)); + if observe.cmds.capacity() > SHRINK_PENDING_CMD_QUEUE_CAP { + observe.cmds.shrink_to(SHRINK_PENDING_CMD_QUEUE_CAP); + } self.coprocessor_host() .on_flush_applied_cmd_batch(level, vec![cmd_batch], self.tablet()); } } + +#[cfg(test)] +mod test { + use std::sync::{ + mpsc::{channel, Receiver, Sender}, + Arc, Mutex, + }; + + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, TestTabletFactory}, + }; + use engine_traits::{ + FlushState, Peekable, TabletContext, TabletRegistry, CF_DEFAULT, DATA_CFS, + }; + use futures::executor::block_on; + use kvproto::{ + metapb::{Region, RegionEpoch}, + raft_cmdpb::RaftRequestHeader, + raft_serverpb::{PeerState, RegionLocalState}, + }; + use raft::{ + prelude::{Entry, EntryType}, + StateRole, + }; + use raftstore::{ + coprocessor::{BoxCmdObserver, CmdObserver, CoprocessorHost}, + store::Config, + }; + use slog::o; + use tempfile::TempDir; + use tikv_util::{store::new_peer, time::Instant, worker::dummy_scheduler}; + + use super::*; + use crate::{ + fsm::ApplyResReporter, + operation::{ + test_util::create_tmp_importer, CatchUpLogs, CommittedEntries, SimpleWriteReqEncoder, + }, + raft::Apply, + router::{build_any_channel, ApplyRes}, + SimpleWriteEncoder, + }; + + struct MockReporter { + sender: Sender, + } + + impl MockReporter { + fn new() -> (Self, Receiver) { + let (tx, rx) = channel(); + (MockReporter { sender: tx }, rx) + } + } + + impl ApplyResReporter for MockReporter { + fn report(&self, apply_res: ApplyRes) { + let _ = self.sender.send(apply_res); + } + + fn redirect_catch_up_logs(&self, _c: CatchUpLogs) {} + } + + #[derive(Clone)] + struct TestObserver { + sender: Sender>, + } + + impl TestObserver { + fn new() -> (Self, Receiver>) { + let (tx, rx) = channel(); + (TestObserver { sender: tx }, rx) + } + } + + impl raftstore::coprocessor::Coprocessor for TestObserver {} + impl CmdObserver for TestObserver { + fn on_flush_applied_cmd_batch( + &self, + _max_level: ObserveLevel, + cmd_batches: &mut Vec, + _engine: &E, + ) { + self.sender.send(cmd_batches.clone()).unwrap(); + } + + fn on_applied_current_term(&self, _: StateRole, _: &Region) {} + } + + fn new_put_entry( + region_id: u64, + region_epoch: RegionEpoch, + k: &[u8], + v: &[u8], + term: u64, + index: u64, + ) -> Entry { + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.put(CF_DEFAULT, k, v); + let mut header = Box::::default(); + header.set_region_id(region_id); + header.set_region_epoch(region_epoch); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let (bin, _) = req_encoder.encode(); + let mut e = Entry::default(); + e.set_entry_type(EntryType::EntryNormal); + e.set_term(term); + e.set_index(index); + e.set_data(bin.into()); + e + } + + #[test] + fn test_capture_apply() { + let store_id = 2; + + let mut region = Region::default(); + region.set_id(1); + region.set_end_key(b"k20".to_vec()); + region.mut_region_epoch().set_version(3); + let peers = vec![new_peer(2, 3)]; + region.set_peers(peers.into()); + + let logger = slog_global::borrow_global().new(o!()); + let path = TempDir::new().unwrap(); + let cf_opts = DATA_CFS + .iter() + .copied() + .map(|cf| (cf, CfOptions::default())) + .collect(); + let factory = Box::new(TestTabletFactory::new(DbOptions::default(), cf_opts)); + let reg = TabletRegistry::new(factory, path.path()).unwrap(); + let ctx = TabletContext::new(®ion, Some(5)); + reg.load(ctx, true).unwrap(); + + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Normal); + region_state.set_region(region.clone()); + region_state.set_tablet_index(5); + + let (read_scheduler, _rx) = dummy_scheduler(); + let (reporter, _) = MockReporter::new(); + let (_tmp_dir, importer) = create_tmp_importer(); + let (ob, cmds_rx) = TestObserver::new(); + let mut host = CoprocessorHost::::default(); + host.registry + .register_cmd_observer(0, BoxCmdObserver::new(ob)); + let mut apply = Apply::new( + &Config::default(), + region + .get_peers() + .iter() + .find(|p| p.store_id == store_id) + .unwrap() + .clone(), + region_state, + reporter, + reg, + read_scheduler, + Arc::new(FlushState::new(5)), + None, + 5, + None, + importer, + host, + logger.clone(), + ); + + let snap = Arc::new(Mutex::new(None)); + let snap_ = snap.clone(); + let (snap_cb, _) = build_any_channel(Box::new(move |args| { + let snap = args.1.take().unwrap(); + let snapshot: RegionSnapshot = match snap.downcast() { + Ok(s) => *s, + Err(t) => unreachable!("snapshot type should be the same: {:?}", t), + }; + *snap_.lock().unwrap() = Some(snapshot); + })); + + // put (k1, v1); + // capture_apply; + // put (k2, v2); + let apply_tasks = vec![ + ApplyTask::CommittedEntries(CommittedEntries { + entry_and_proposals: vec![( + new_put_entry( + region.id, + region.get_region_epoch().clone(), + b"k1", + b"v1", + 5, + 6, + ), + vec![], + )], + committed_time: Instant::now(), + }), + ApplyTask::CaptureApply(CaptureChange { + observer: ChangeObserver::from_cdc(region.id, ObserveHandle::new()), + region_epoch: region.get_region_epoch().clone(), + snap_cb, + }), + ApplyTask::CommittedEntries(CommittedEntries { + entry_and_proposals: vec![( + new_put_entry( + region.id, + region.get_region_epoch().clone(), + b"k2", + b"v2", + 5, + 7, + ), + vec![], + )], + committed_time: Instant::now(), + }), + ]; + + for task in apply_tasks { + match task { + ApplyTask::CommittedEntries(ce) => { + block_on(async { apply.apply_committed_entries(ce).await }); + } + ApplyTask::CaptureApply(capture_change) => { + apply.on_capture_apply(capture_change); + } + _ => unreachable!(), + } + } + apply.flush(); + + // must read (k1, v1) from snapshot and capture (k2, v2) + let snap = snap.lock().unwrap().take().unwrap(); + let v1 = snap.get_value_cf(CF_DEFAULT, b"k1").unwrap().unwrap(); + assert_eq!(v1, b"v1"); + let v2 = snap.get_value_cf(CF_DEFAULT, b"k2").unwrap(); + assert!(v2.is_none()); + + let cmds = cmds_rx.try_recv().unwrap(); + assert_eq!(cmds[0].len(), 1); + let put2 = &cmds[0].cmds[0]; + assert_eq!(put2.term, 5); + assert_eq!(put2.index, 7); + let request = &put2.request.requests[0]; + assert_eq!(request.get_put().get_cf(), CF_DEFAULT); + assert_eq!(request.get_put().get_key(), b"k2"); + assert_eq!(request.get_put().get_value(), b"v2"); + let response = &put2.response; + assert!(!response.get_header().has_error()); + } +} diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 7dc5142e734..f5bdd8664e6 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -512,7 +512,12 @@ impl CmdBatch { self.cmds.push(cmd) } - pub fn extend(&mut self, observe_info: &CmdObserveInfo, region_id: u64, cmds: Vec) { + pub fn extend>( + &mut self, + observe_info: &CmdObserveInfo, + region_id: u64, + cmds: I, + ) { assert_eq!(region_id, self.region_id); assert_eq!(observe_info.cdc_id.id, self.cdc_id); assert_eq!(observe_info.rts_id.id, self.rts_id); diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index 57056f984bd..a303a586935 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -679,7 +679,12 @@ mod tests { let mut header = Box::::default(); header.set_term(2); let mut req_encoder: SimpleWriteReqEncoder> = - SimpleWriteReqEncoder::new(header.clone(), bin.clone(), 512, false); + SimpleWriteReqEncoder::>::new( + header.clone(), + bin.clone(), + 512, + false, + ); let mut header2 = Box::::default(); header2.set_term(4); @@ -691,7 +696,12 @@ mod tests { // Frozen bin can't be merged with other bin. assert!(!req_encoder.amend(&header, &bin2)); let mut req_encoder2: SimpleWriteReqEncoder> = - SimpleWriteReqEncoder::new(header.clone(), bin2.clone(), 512, false); + SimpleWriteReqEncoder::>::new( + header.clone(), + bin2.clone(), + 512, + false, + ); assert!(!req_encoder2.amend(&header, &bin)); // Batch should not excceed max size limit. @@ -713,4 +723,107 @@ mod tests { let res = decoder.next(); assert!(res.is_none(), "{:?}", res); } + + #[test] + fn test_to_raft_cmd_request() { + let logger = slog_global::borrow_global().new(o!()); + + // Test header. + let mut header = Box::::default(); + header.set_term(2); + let req_encoder = SimpleWriteReqEncoder::>::new( + header.clone(), + SimpleWriteEncoder::with_capacity(512).encode(), + 512, + false, + ); + let (bin, _) = req_encoder.encode(); + assert_eq!( + header.as_ref(), + SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) + .unwrap() + .to_raft_cmd_request() + .get_header(), + ); + + // Test put. + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.put(CF_WRITE, b"write", b"value"); + let req_encoder = SimpleWriteReqEncoder::>::new( + header.clone(), + encoder.encode(), + 512, + false, + ); + let (bin, _) = req_encoder.encode(); + let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) + .unwrap() + .to_raft_cmd_request(); + assert_eq!(req.get_requests().len(), 1); + assert_eq!(req.get_requests()[0].get_put().get_cf(), CF_WRITE); + assert_eq!(req.get_requests()[0].get_put().get_key(), b"write"); + assert_eq!(req.get_requests()[0].get_put().get_value(), b"value"); + + // Test delete. + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.delete(CF_DEFAULT, b"write"); + let req_encoder = SimpleWriteReqEncoder::>::new( + header.clone(), + encoder.encode(), + 512, + false, + ); + let (bin, _) = req_encoder.encode(); + let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) + .unwrap() + .to_raft_cmd_request(); + assert_eq!(req.get_requests().len(), 1); + assert_eq!(req.get_requests()[0].get_delete().get_cf(), CF_DEFAULT); + assert_eq!(req.get_requests()[0].get_delete().get_key(), b"write"); + + // Test delete range. + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.delete_range(CF_LOCK, b"start", b"end", true); + let req_encoder = SimpleWriteReqEncoder::>::new( + header.clone(), + encoder.encode(), + 512, + false, + ); + let (bin, _) = req_encoder.encode(); + let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) + .unwrap() + .to_raft_cmd_request(); + assert_eq!(req.get_requests().len(), 1); + assert_eq!(req.get_requests()[0].get_delete_range().get_cf(), CF_LOCK); + assert_eq!( + req.get_requests()[0].get_delete_range().get_start_key(), + b"start" + ); + assert_eq!( + req.get_requests()[0].get_delete_range().get_end_key(), + b"end" + ); + assert_eq!( + req.get_requests()[0].get_delete_range().get_notify_only(), + true + ); + + // Test ingest. + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.ingest(vec![SstMeta::default(); 5]); + let req_encoder = SimpleWriteReqEncoder::>::new( + header, + encoder.encode(), + 512, + false, + ); + let (bin, _) = req_encoder.encode(); + let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) + .unwrap() + .to_raft_cmd_request(); + assert_eq!(req.get_requests().len(), 5); + assert!(req.get_requests()[0].has_ingest_sst()); + assert!(req.get_requests()[4].has_ingest_sst()); + } } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 83bcc2a55fe..0e11049c395 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1236,6 +1236,8 @@ impl TikvServer { let txn_extra_scheduler = cdc::CdcTxnExtraScheduler::new(cdc_scheduler.clone()); let mut engine = RaftKv2::new(router.clone(), region_info_accessor.region_leaders()); + // Set txn extra scheduler immediately to make sure every clone has the + // scheduler. engine.set_txn_extra_scheduler(Arc::new(txn_extra_scheduler)); self.engines = Some(TikvEngines { diff --git a/src/config/mod.rs b/src/config/mod.rs index dcbfdc4e441..2115236ed71 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5827,6 +5827,16 @@ mod tests { "#; let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); + + let content = r#" + [storage] + engine = "partitioned-raft-kv" + [cdc] + hibernate-regions-compatible = true + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert!(!cfg.cdc.hibernate_regions_compatible); } #[test] From 5ef9d8abedf331def22f1ab9ad635ba12e4fbe4f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 19 Apr 2023 23:53:19 +0800 Subject: [PATCH 0649/1149] rafstore-v2: fill start and end keys for initial messages (#14607) close tikv/tikv#14606 rafstore-v2: fill start and key for initial messages Signed-off-by: Neil Shen Co-authored-by: Ti Chi Robot --- .../raftstore-v2/src/operation/ready/mod.rs | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 9a29c705aff..f37791638d5 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -26,6 +26,7 @@ use std::{cmp, time::Instant}; use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::{ + metapb, raft_cmdpb::AdminCmdType, raft_serverpb::{ExtraMessageType, RaftMessage}, }; @@ -34,8 +35,10 @@ use raft::{eraftpb, prelude::MessageType, Ready, SnapshotStatus, StateRole, INVA use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{ - needs_evict_entry_cache, util, worker_metrics::SNAP_COUNTER, FetchedLogs, ReadProgress, - Transport, WriteCallback, WriteTask, + needs_evict_entry_cache, + util::{self, is_initial_msg}, + worker_metrics::SNAP_COUNTER, + FetchedLogs, ReadProgress, Transport, WriteCallback, WriteTask, }, }; use slog::{debug, error, info, trace, warn}; @@ -370,6 +373,7 @@ impl Peer { return None; } }; + let to_peer_is_learner = to_peer.get_role() == metapb::PeerRole::Learner; let mut raft_msg = self.prepare_raft_message(); @@ -383,6 +387,25 @@ impl Peer { "to" => msg.get_to(), ); } + + // Filling start and end key is only needed for being compatible with + // raftstore v1 tiflash engine. + // + // There could be two cases: + // - Target peer already exists but has not established communication with + // leader yet + // - Target peer is added newly due to member change or region split, but it's + // not created yet + // For both cases the region start key and end key are attached in RequestVote + // and Heartbeat message for the store of that peer to check whether to create a + // new peer when receiving these messages, or just to wait for a pending region + // split to perform later. + if self.storage().is_initialized() && is_initial_msg(&msg) && to_peer_is_learner { + let region = self.region(); + raft_msg.set_start_key(region.get_start_key().to_vec()); + raft_msg.set_end_key(region.get_end_key().to_vec()); + } + raft_msg.set_message(msg); Some(raft_msg) } From 32b2a88e4b8388b1d687e4837837863437a3122d Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 20 Apr 2023 09:15:19 +0800 Subject: [PATCH 0650/1149] resource_control: take global resource consumption into consideration (#14605) close tikv/tikv#14604 resource control takes global resource consumption into consideration Signed-off-by: Connor1996 Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- .../resource_control/src/resource_group.rs | 41 +++-- components/server/src/server.rs | 3 +- components/server/src/server2.rs | 3 +- components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/coprocessor/endpoint.rs | 8 +- src/server/raftkv/mod.rs | 6 +- src/server/server.rs | 4 + src/server/service/kv.rs | 77 ++++++++-- src/storage/mod.rs | 142 +++++++++++++----- src/storage/txn/commands/mod.rs | 1 + 12 files changed, 218 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f0011894b6..32dfbbfc072 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2792,7 +2792,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#ce835ae20dfcb5f69f0aea04236070932c815b6a" +source = "git+https://github.com/pingcap/kvproto.git#dc3cd8784a19bc7f058dbeb19cd8cc4672ee9aad" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 9a7a2e7b3cc..0a808811217 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -14,7 +14,7 @@ use collections::HashMap; use dashmap::{mapref::one::Ref, DashMap}; use fail::fail_point; use kvproto::{ - kvrpcpb::CommandPri, + kvrpcpb::{CommandPri, ResourceControlContext}, resource_manager::{GroupMode, ResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; @@ -54,7 +54,7 @@ pub enum ResourceConsumeType { #[derive(Default)] pub struct ResourceGroupManager { resource_groups: DashMap, - registry: Mutex>>, + registry: RwLock>>, } impl ResourceGroupManager { @@ -85,7 +85,7 @@ impl ResourceGroupManager { pub fn add_resource_group(&self, rg: ResourceGroup) { let group_name = rg.get_name().to_ascii_lowercase(); - self.registry.lock().unwrap().iter().for_each(|controller| { + self.registry.read().iter().for_each(|controller| { let ru_quota = Self::get_ru_setting(&rg, controller.is_read); controller.add_resource_group(group_name.clone().into_bytes(), ru_quota, rg.priority); }); @@ -95,7 +95,7 @@ impl ResourceGroupManager { pub fn remove_resource_group(&self, name: &str) { let group_name = name.to_ascii_lowercase(); - self.registry.lock().unwrap().iter().for_each(|controller| { + self.registry.read().iter().for_each(|controller| { controller.remove_resource_group(group_name.as_bytes()); }); info!("remove resource group"; "name"=> name); @@ -112,7 +112,7 @@ impl ResourceGroupManager { ret }); if !removed_names.is_empty() { - self.registry.lock().unwrap().iter().for_each(|controller| { + self.registry.read().iter().for_each(|controller| { for name in &removed_names { controller.remove_resource_group(name.as_bytes()); } @@ -130,7 +130,7 @@ impl ResourceGroupManager { pub fn derive_controller(&self, name: String, is_read: bool) -> Arc { let controller = Arc::new(ResourceController::new(name, is_read)); - self.registry.lock().unwrap().push(controller.clone()); + self.registry.write().push(controller.clone()); for g in &self.resource_groups { let ru_quota = Self::get_ru_setting(g.value(), controller.is_read); controller.add_resource_group(g.key().clone().into_bytes(), ru_quota, g.priority); @@ -139,10 +139,29 @@ impl ResourceGroupManager { } pub fn advance_min_virtual_time(&self) { - for controller in self.registry.lock().unwrap().iter() { + for controller in self.registry.read().iter() { controller.update_min_virtual_time(); } } + + pub fn consume_penalty(&self, ctx: &ResourceControlContext) { + for controller in self.registry.read().iter() { + // FIXME: Should consume CPU time for read controller and write bytes for write + // controller, once CPU process time of scheduler worker is tracked. Currently, + // we consume write bytes for read controller as the + // order of magnitude of CPU time and write bytes is similar. + controller.consume( + ctx.resource_group_name.as_bytes(), + ResourceConsumeType::CpuTime(Duration::from_nanos( + (ctx.get_penalty().total_cpu_time_ms * 1_000_000.0) as u64, + )), + ); + controller.consume( + ctx.resource_group_name.as_bytes(), + ResourceConsumeType::IoBytes(ctx.get_penalty().write_bytes as u64), + ); + } + } } pub struct ResourceController { @@ -304,8 +323,8 @@ impl ResourceController { }) } - pub fn consume(&self, name: &[u8], delta: ResourceConsumeType) { - self.resource_group(name).consume(delta) + pub fn consume(&self, name: &[u8], resource: ResourceConsumeType) { + self.resource_group(name).consume(resource) } pub fn update_min_virtual_time(&self) { @@ -424,8 +443,8 @@ impl GroupPriorityTracker { // TODO: make it delta type as generic to avoid mixed consume different types. #[inline] - fn consume(&self, delta: ResourceConsumeType) { - let vt_delta = match delta { + fn consume(&self, resource: ResourceConsumeType) { + let vt_delta = match resource { ResourceConsumeType::CpuTime(dur) => dur.as_micros() as u64, ResourceConsumeType::IoBytes(bytes) => bytes, } * self.weight; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index d1c8e09ef96..890089a6950 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -745,7 +745,7 @@ where cop_read_pool_handle, self.concurrency_manager.clone(), resource_tag_factory, - Arc::clone(&self.quota_limiter), + self.quota_limiter.clone(), ), coprocessor_v2::Endpoint::new(&self.core.config.coprocessor_v2), self.resolver.clone().unwrap(), @@ -756,6 +756,7 @@ where unified_read_pool, debug_thread_pool, health_service, + self.resource_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); cfg_controller.register( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 0e11049c395..81575b8cbf6 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -654,7 +654,7 @@ where cop_read_pool_handle, self.concurrency_manager.clone(), resource_tag_factory, - Arc::clone(&self.quota_limiter), + self.quota_limiter.clone(), ), coprocessor_v2::Endpoint::new(&self.core.config.coprocessor_v2), self.resolver.clone().unwrap(), @@ -665,6 +665,7 @@ where unified_read_pool, debug_thread_pool, health_service, + self.resource_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); cfg_controller.register( diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 3de9e5aa956..f110578784f 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -545,6 +545,7 @@ impl ServerCluster { None, debug_thread_pool.clone(), health_service.clone(), + resource_manager.clone(), ) .unwrap(); svr.register_service(create_diagnostics(diag_service.clone())); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index a77fc5d3dd2..4c060cef2ce 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -536,6 +536,7 @@ impl ServerCluster { None, debug_thread_pool.clone(), health_service.clone(), + resource_manager.clone(), ) .unwrap(); svr.register_service(create_import_sst(import_service.clone())); diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 43bf20f582b..71c3d5548a9 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -68,9 +68,9 @@ pub struct Endpoint { slow_log_threshold: Duration, - _phantom: PhantomData, - quota_limiter: Arc, + + _phantom: PhantomData, } impl tikv_util::AssertSend for Endpoint {} @@ -104,8 +104,8 @@ impl Endpoint { stream_channel_size: cfg.end_point_stream_channel_size, max_handle_duration: cfg.end_point_request_max_handle_duration.0, slow_log_threshold: cfg.end_point_slow_log_threshold.0, - _phantom: Default::default(), quota_limiter, + _phantom: Default::default(), } } @@ -488,6 +488,7 @@ impl Endpoint { .new_tag_with_key_ranges(&req_ctx.context, key_ranges); let group_name = req_ctx .context + .get_resource_control_context() .get_resource_group_name() .as_bytes() .to_owned(); @@ -727,6 +728,7 @@ impl Endpoint { let priority = req_ctx.context.get_priority(); let group_name = req_ctx .context + .get_resource_control_context() .get_resource_group_name() .as_bytes() .to_owned(); diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index e175fa502f8..697a4b39d63 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -160,7 +160,11 @@ pub fn new_request_header(ctx: &Context) -> RaftRequestHeader { } header.set_sync_log(ctx.get_sync_log()); header.set_replica_read(ctx.get_replica_read()); - header.set_resource_group_name(ctx.get_resource_group_name().to_owned()); + header.set_resource_group_name( + ctx.get_resource_control_context() + .get_resource_group_name() + .to_owned(), + ); header } diff --git a/src/server/server.rs b/src/server/server.rs index 6e294eda45e..b3db4b4b57f 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -14,6 +14,7 @@ use grpcio::{ChannelBuilder, Environment, ResourceQuota, Server as GrpcServer, S use grpcio_health::{create_health, HealthService, ServingStatus}; use kvproto::tikvpb::*; use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager, ENGINE, TIFLASH}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use tikv_util::{ config::VersionTrack, @@ -103,6 +104,7 @@ where yatp_read_pool: Option, debug_thread_pool: Arc, health_service: HealthService, + resource_manager: Option>, ) -> Result { // A helper thread (or pool) for transport layer. let stats_pool = if cfg.value().stats_concurrency > 0 { @@ -139,6 +141,7 @@ where cfg.value().enable_request_batch, proxy, cfg.value().reject_messages_on_memory_ratio, + resource_manager, ); let addr = SocketAddr::from_str(&cfg.value().addr)?; @@ -598,6 +601,7 @@ mod tests { None, debug_thread_pool, HealthService::default(), + None, ) .unwrap(); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 6fc3a3ebd76..2c77ee4e0bd 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -26,6 +26,7 @@ use raftstore::{ }, Error as RaftStoreError, Result as RaftStoreResult, }; +use resource_control::ResourceGroupManager; use tikv_alloc::trace::MemoryTraceGuard; use tikv_kv::{RaftExtension, StageLatencyStats}; use tikv_util::{ @@ -86,6 +87,8 @@ pub struct Service { // Go `server::Config` to get more details. reject_messages_on_memory_ratio: f64, + + resource_manager: Option>, } impl Drop for Service { @@ -108,6 +111,7 @@ impl Clone for Service Service { enable_req_batch: bool, proxy: Proxy, reject_messages_on_memory_ratio: f64, + resource_manager: Option>, ) -> Self { Service { store_id, @@ -139,6 +144,7 @@ impl Service { grpc_thread_load, proxy, reject_messages_on_memory_ratio, + resource_manager, } } @@ -177,9 +183,12 @@ macro_rules! handle_request { let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); - let resource_group_name = req.get_context().get_resource_group_name(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = &self.resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_group_name]) + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); let resp = $future_name(&self.storage, req); let task = async move { @@ -456,6 +465,14 @@ impl Tikv for Service { fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.mut_context().take_request_source(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = &self.resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); + let begin_instant = Instant::now(); let future = future_copr(&self.copr, Some(ctx.peer()), req); let task = async move { @@ -486,6 +503,14 @@ impl Tikv for Service { sink: UnarySink, ) { let source = req.mut_context().take_request_source(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = &self.resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); + let begin_instant = Instant::now(); let future = future_raw_coprocessor(&self.copr_v2, &self.storage, req); let task = async move { @@ -567,6 +592,13 @@ impl Tikv for Service { mut sink: ServerStreamingSink, ) { let begin_instant = Instant::now(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = &self.resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } + GRPC_RESOURCE_GROUP_COUNTER_VEC + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let mut stream = self .copr @@ -820,8 +852,8 @@ impl Tikv for Service { mut sink: DuplexSink, ) { forward_duplex!(self.proxy, batch_commands, ctx, stream, sink); - let (tx, rx) = unbounded(WakePolicy::TillReach(GRPC_MSG_NOTIFY_SIZE)); + let (tx, rx) = unbounded(WakePolicy::TillReach(GRPC_MSG_NOTIFY_SIZE)); let ctx = Arc::new(ctx); let peer = ctx.peer(); let storage = self.storage.clone(); @@ -829,6 +861,7 @@ impl Tikv for Service { let copr_v2 = self.copr_v2.clone(); let pool_size = storage.get_normal_pool_size(); let batch_builder = BatcherBuilder::new(self.enable_req_batch, pool_size); + let resource_manager = self.resource_manager.clone(); let request_handler = stream.try_for_each(move |mut req| { let request_ids = req.take_request_ids(); let requests: Vec<_> = req.take_requests().into(); @@ -845,6 +878,7 @@ impl Tikv for Service { id, req, &tx, + &resource_manager, ); if let Some(batch) = batcher.as_mut() { batch.maybe_commit(&storage, &tx); @@ -1054,6 +1088,7 @@ fn handle_batch_commands_request( id: u64, req: batch_commands_request::Request, tx: &Sender, + resource_manager: &Option>, ) { // To simplify code and make the logic more clear. macro_rules! oneof { @@ -1075,10 +1110,13 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, Some(batch_commands_request::request::Cmd::Get(mut req)) => { - let resource_group_name = req.get_context().get_resource_group_name(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_group_name]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) }) { @@ -1093,10 +1131,13 @@ fn handle_batch_commands_request( } }, Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { - let resource_group_name = req.get_context().get_resource_group_name(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_group_name]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_raw_get(&req) }) { @@ -1111,10 +1152,13 @@ fn handle_batch_commands_request( } }, Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { - let resource_group_name = req.get_context().get_resource_group_name(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_group_name]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = future_copr(copr, Some(peer.to_string()), req) @@ -1142,10 +1186,13 @@ fn handle_batch_commands_request( ); } $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { - let resource_group_name = req.get_context().get_resource_group_name(); + let resource_control_ctx = req.get_context().get_resource_control_context(); + if let Some(resource_manager) = resource_manager { + resource_manager.consume_penalty(resource_control_ctx); + } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_group_name]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = $future_fn($($arg,)* req) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index faacc4cf4cb..37263ce9a12 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -141,6 +141,25 @@ use crate::{ pub type Result = std::result::Result; pub type Callback = Box) + Send>; +macro_rules! check_key_size { + ($key_iter:expr, $max_key_size:expr, $callback:ident) => { + for k in $key_iter { + let key_size = k.len(); + if key_size > $max_key_size { + $callback(Err(Error::from(ErrorInner::KeyTooLarge { + size: key_size, + limit: $max_key_size, + }))); + return Ok(()); + } + } + }; +} + +/// Storage for Api V1 +/// To be convenience for test cases unrelated to RawKV. +pub type StorageApiV1 = Storage; + /// [`Storage`](Storage) implements transactional KV APIs and raw KV APIs on a /// given [`Engine`]. An [`Engine`] provides low level KV functionality. /// [`Engine`] has multiple implementations. When a TiKV server is running, a @@ -196,10 +215,6 @@ pub struct Storage { _phantom: PhantomData, } -/// Storage for Api V1 -/// To be convenience for test cases unrelated to RawKV. -pub type StorageApiV1 = Storage; - impl Clone for Storage { #[inline] fn clone(&self) -> Self { @@ -219,7 +234,7 @@ impl Clone for Storage { api_version: self.api_version, causal_ts_provider: self.causal_ts_provider.clone(), resource_tag_factory: self.resource_tag_factory.clone(), - quota_limiter: Arc::clone(&self.quota_limiter), + quota_limiter: self.quota_limiter.clone(), _phantom: PhantomData, } } @@ -242,21 +257,6 @@ impl Drop for Storage { } } -macro_rules! check_key_size { - ($key_iter:expr, $max_key_size:expr, $callback:ident) => { - for k in $key_iter { - let key_size = k.len(); - if key_size > $max_key_size { - $callback(Err(Error::from(ErrorInner::KeyTooLarge { - size: key_size, - limit: $max_key_size, - }))); - return Ok(()); - } - } - }; -} - impl Storage { /// Create a `Storage` from given engine. pub fn from_engine( @@ -598,7 +598,11 @@ impl Storage { let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -756,6 +760,7 @@ impl Storage { let priority = requests[0].get_context().get_priority(); let group_name = requests[0] .get_context() + .get_resource_control_context() .get_resource_group_name() .as_bytes() .to_owned(); @@ -938,7 +943,11 @@ impl Storage { let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = keys .iter() @@ -1117,7 +1126,11 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::scan; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1283,7 +1296,11 @@ impl Storage { ) -> impl Future>> { const CMD: CommandKind = CommandKind::scan_lock; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1591,7 +1608,11 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_get; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -1672,6 +1693,7 @@ impl Storage { let priority = gets[0].get_context().get_priority(); let group_name = gets[0] .get_context() + .get_resource_control_context() .get_resource_group_name() .as_bytes() .to_owned(); @@ -1803,7 +1825,11 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_batch_get; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = keys.iter().map(|k| (k.clone(), k.clone())).collect(); let resource_tag = self @@ -1949,7 +1975,10 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); @@ -2061,7 +2090,10 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); @@ -2126,7 +2158,10 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); @@ -2187,7 +2222,10 @@ impl Storage { let engine = self.engine.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); @@ -2235,7 +2273,10 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); @@ -2299,7 +2340,11 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_scan; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag(&ctx); let api_version = self.api_version; @@ -2426,7 +2471,11 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_batch_scan; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2578,7 +2627,11 @@ impl Storage { ) -> impl Future>> { const CMD: CommandKind = CommandKind::raw_get_key_ttl; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -2667,7 +2720,10 @@ impl Storage { } let sched = self.get_scheduler(); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { let key = F::encode_raw_key_owned(key, None); let cmd = RawCompareAndSwap::new(cf, key, previous_value, value, ttl, api_version, ctx); @@ -2700,7 +2756,10 @@ impl Storage { let sched = self.get_scheduler(); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls, None); let cmd = RawAtomicStore::new(cf, modifies, ctx); @@ -2725,7 +2784,10 @@ impl Storage { let cf = Self::rawkv_cf(&cf, self.api_version)?; let sched = self.get_scheduler(); let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .to_owned(); self.sched_raw_command(&group_name, priority, CMD, async move { // Do NOT encode ts here as RawAtomicStore use key to gen lock let modifies = keys @@ -2749,7 +2811,11 @@ impl Storage { ) -> impl Future> { const CMD: CommandKind = CommandKind::raw_checksum; let priority = ctx.get_priority(); - let group_name = ctx.get_resource_group_name().as_bytes().to_owned(); + let group_name = ctx + .get_resource_control_context() + .get_resource_group_name() + .as_bytes() + .to_owned(); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 54f5029bd6c..4c01629ef48 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -719,6 +719,7 @@ impl Command { pub fn group_name(&self) -> String { self.command_ext() .get_ctx() + .get_resource_control_context() .get_resource_group_name() .to_owned() } From 21a98d84312219997a1d34bc5c52e696ea40427f Mon Sep 17 00:00:00 2001 From: lijie Date: Thu, 20 Apr 2023 12:06:46 +0800 Subject: [PATCH 0651/1149] chore: bump version to v7.2.0-alpha (#14615) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32dfbbfc072..7541dd2666c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6419,7 +6419,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.1.0-alpha" +version = "7.2.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 1bd9377d5f0..5363de8bd59 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.1.0-alpha" +version = "7.2.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 4b5846f85b7f6a58c3d44f764d4fe722fccaa64b Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 20 Apr 2023 16:23:19 +0800 Subject: [PATCH 0652/1149] engine: add configurations for filter enhancements (#14527) ref tikv/tikv#12842 Add some configurations for RocksDB filter enhancements Signed-off-by: tabokie --- Cargo.lock | 6 +- etc/config-template.toml | 10 +++ src/config/mod.rs | 82 ++++++++++++++++------ tests/integrations/config/mod.rs | 10 +++ tests/integrations/config/test-custom.toml | 10 +++ 5 files changed, 95 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7541dd2666c..da49ade1d6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2921,7 +2921,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#a9fbe325939c166ffc5f80e63066f5d8594a1fff" +source = "git+https://github.com/tikv/rust-rocksdb.git#ce788e498f1d70ab7cbf44dcaca5049bbc05a943" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2940,7 +2940,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#a9fbe325939c166ffc5f80e63066f5d8594a1fff" +source = "git+https://github.com/tikv/rust-rocksdb.git#ce788e498f1d70ab7cbf44dcaca5049bbc05a943" dependencies = [ "bzip2-sys", "cc", @@ -4867,7 +4867,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#a9fbe325939c166ffc5f80e63066f5d8594a1fff" +source = "git+https://github.com/tikv/rust-rocksdb.git#ce788e498f1d70ab7cbf44dcaca5049bbc05a943" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/etc/config-template.toml b/etc/config-template.toml index aec5e108949..89f39be79ca 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -693,6 +693,12 @@ ## filter. # block-based-bloom-filter = false +## Use Ribbon filter for levels higher or equal to this value. Use non-block-based bloom filter for +## lower levels. When this is set, `block-based-bloom-filter` will be ignored. +## Only effective for `format-version` >= 5. +## Disabled by default. +# ribbon-filter-above-level = 0 + # level0-file-num-compaction-trigger = 4 ## Soft limit on number of level-0 files. @@ -789,6 +795,10 @@ ## while using `Raw` mode. # optimize-filters-for-hits = true +## Option to generate Bloom/Ribbon filters that minimize memory internal fragmentation. +## Only effective for `format-version` >= 5. +# optimize-filters-for-memory = false + ## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries. ## The optimization can help reduce compaction IO, and allow us to use larger SST file size ## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on diff --git a/src/config/mod.rs b/src/config/mod.rs index 2115236ed71..2efe9ea4c9b 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -309,12 +309,16 @@ macro_rules! cf_config { #[online_config(skip)] pub optimize_filters_for_hits: bool, #[online_config(skip)] + pub optimize_filters_for_memory: bool, + #[online_config(skip)] pub whole_key_filtering: bool, #[online_config(skip)] pub bloom_filter_bits_per_key: i32, #[online_config(skip)] pub block_based_bloom_filter: bool, #[online_config(skip)] + pub ribbon_filter_above_level: Option, + #[online_config(skip)] pub read_amp_bytes_per_bit: u32, #[serde(with = "rocks_config::compression_type_level_serde")] #[online_config(skip)] @@ -431,6 +435,9 @@ macro_rules! write_into_metrics { $metrics .with_label_values(&[$tag, "optimize_filters_for_hits"]) .set(($cf.optimize_filters_for_hits as i32).into()); + $metrics + .with_label_values(&[$tag, "optimize_filters_for_memory"]) + .set(($cf.optimize_filters_for_memory as i32).into()); $metrics .with_label_values(&[$tag, "whole_key_filtering"]) .set(($cf.whole_key_filtering as i32).into()); @@ -440,6 +447,11 @@ macro_rules! write_into_metrics { $metrics .with_label_values(&[$tag, "block_based_bloom_filter"]) .set(($cf.block_based_bloom_filter as i32).into()); + if let Some(level) = $cf.ribbon_filter_above_level { + $metrics + .with_label_values(&[$tag, "ribbon_filter_above_level"]) + .set((level as i32).into()); + } $metrics .with_label_values(&[$tag, "read_amp_bytes_per_bit"]) @@ -548,16 +560,24 @@ macro_rules! build_cf_opt { block_base_opts .set_pin_l0_filter_and_index_blocks_in_cache($opt.pin_l0_filter_and_index_blocks); if $opt.use_bloom_filter { - block_base_opts.set_bloom_filter( - $opt.bloom_filter_bits_per_key as f64, - $opt.block_based_bloom_filter, - ); + if let Some(level) = $opt.ribbon_filter_above_level { + block_base_opts.set_ribbon_filter( + $opt.bloom_filter_bits_per_key as f64, + level as i32 - 1, // bloom_before_level + ); + } else { + block_base_opts.set_bloom_filter( + $opt.bloom_filter_bits_per_key as f64, + $opt.block_based_bloom_filter, + ); + } block_base_opts.set_whole_key_filtering($opt.whole_key_filtering); } block_base_opts.set_read_amp_bytes_per_bit($opt.read_amp_bytes_per_bit); block_base_opts.set_prepopulate_block_cache($opt.prepopulate_block_cache); block_base_opts.set_format_version($opt.format_version); block_base_opts.set_checksum($opt.checksum); + block_base_opts.set_optimize_filters_for_memory($opt.optimize_filters_for_memory); let mut cf_opts = RocksCfOptions::default(); cf_opts.set_block_based_table_factory(&block_base_opts); cf_opts.set_num_levels($opt.num_levels); @@ -650,9 +670,11 @@ impl Default for DefaultCfConfig { pin_l0_filter_and_index_blocks: true, use_bloom_filter: true, optimize_filters_for_hits: true, + optimize_filters_for_memory: false, whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -814,9 +836,11 @@ impl Default for WriteCfConfig { pin_l0_filter_and_index_blocks: true, use_bloom_filter: true, optimize_filters_for_hits: false, + optimize_filters_for_memory: false, whole_key_filtering: false, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -940,9 +964,11 @@ impl Default for LockCfConfig { pin_l0_filter_and_index_blocks: true, use_bloom_filter: true, optimize_filters_for_hits: false, + optimize_filters_for_memory: false, whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [DBCompressionType::No; 7], write_buffer_size: ReadableSize::mb(32), @@ -1033,9 +1059,11 @@ impl Default for RaftCfConfig { pin_l0_filter_and_index_blocks: true, use_bloom_filter: true, optimize_filters_for_hits: true, + optimize_filters_for_memory: false, whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [DBCompressionType::No; 7], write_buffer_size: ReadableSize::mb(128), @@ -1544,9 +1572,11 @@ impl Default for RaftDefaultCfConfig { pin_l0_filter_and_index_blocks: true, use_bloom_filter: false, optimize_filters_for_hits: true, + optimize_filters_for_memory: false, whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -5688,31 +5718,43 @@ mod tests { cfg.raft_engine.mut_config().memory_limit = None; cfg.coprocessor_v2.coprocessor_plugin_directory = None; // Default is `None`, which is represented by not setting the key. cfg.rocksdb.write_buffer_limit = None; + // cfg.rocksdb.defaultcf.enable_compaction_guard = None; - cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger = None; - cfg.rocksdb.defaultcf.level0_stop_writes_trigger = None; - cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = None; - cfg.rocksdb.defaultcf.hard_pending_compaction_bytes_limit = None; cfg.rocksdb.writecf.enable_compaction_guard = None; - cfg.rocksdb.writecf.level0_slowdown_writes_trigger = None; - cfg.rocksdb.writecf.level0_stop_writes_trigger = None; - cfg.rocksdb.writecf.soft_pending_compaction_bytes_limit = None; - cfg.rocksdb.writecf.hard_pending_compaction_bytes_limit = None; cfg.rocksdb.lockcf.enable_compaction_guard = None; - cfg.rocksdb.lockcf.level0_slowdown_writes_trigger = None; - cfg.rocksdb.lockcf.level0_stop_writes_trigger = None; - cfg.rocksdb.lockcf.soft_pending_compaction_bytes_limit = None; - cfg.rocksdb.lockcf.hard_pending_compaction_bytes_limit = None; cfg.rocksdb.raftcf.enable_compaction_guard = None; - cfg.rocksdb.raftcf.level0_slowdown_writes_trigger = None; - cfg.rocksdb.raftcf.level0_stop_writes_trigger = None; - cfg.rocksdb.raftcf.soft_pending_compaction_bytes_limit = None; - cfg.rocksdb.raftcf.hard_pending_compaction_bytes_limit = None; cfg.raftdb.defaultcf.enable_compaction_guard = None; + // + cfg.rocksdb.defaultcf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.writecf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.lockcf.level0_slowdown_writes_trigger = None; + cfg.rocksdb.raftcf.level0_slowdown_writes_trigger = None; cfg.raftdb.defaultcf.level0_slowdown_writes_trigger = None; + // + cfg.rocksdb.defaultcf.level0_stop_writes_trigger = None; + cfg.rocksdb.writecf.level0_stop_writes_trigger = None; + cfg.rocksdb.lockcf.level0_stop_writes_trigger = None; + cfg.rocksdb.raftcf.level0_stop_writes_trigger = None; cfg.raftdb.defaultcf.level0_stop_writes_trigger = None; + // + cfg.rocksdb.defaultcf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.writecf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.lockcf.soft_pending_compaction_bytes_limit = None; + cfg.rocksdb.raftcf.soft_pending_compaction_bytes_limit = None; cfg.raftdb.defaultcf.soft_pending_compaction_bytes_limit = None; + // + cfg.rocksdb.defaultcf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.writecf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.lockcf.hard_pending_compaction_bytes_limit = None; + cfg.rocksdb.raftcf.hard_pending_compaction_bytes_limit = None; cfg.raftdb.defaultcf.hard_pending_compaction_bytes_limit = None; + // + cfg.rocksdb.defaultcf.ribbon_filter_above_level = None; + cfg.rocksdb.writecf.ribbon_filter_above_level = None; + cfg.rocksdb.lockcf.ribbon_filter_above_level = None; + cfg.rocksdb.raftcf.ribbon_filter_above_level = None; + cfg.raftdb.defaultcf.ribbon_filter_above_level = None; + cfg.coprocessor .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 7d40cde87d5..34b558f39c0 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -329,9 +329,11 @@ fn test_serde_custom_tikv_config() { pin_l0_filter_and_index_blocks: false, use_bloom_filter: false, optimize_filters_for_hits: false, + optimize_filters_for_memory: true, whole_key_filtering: true, bloom_filter_bits_per_key: 123, block_based_bloom_filter: true, + ribbon_filter_above_level: Some(1), read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -384,9 +386,11 @@ fn test_serde_custom_tikv_config() { pin_l0_filter_and_index_blocks: false, use_bloom_filter: false, optimize_filters_for_hits: true, + optimize_filters_for_memory: true, whole_key_filtering: true, bloom_filter_bits_per_key: 123, block_based_bloom_filter: true, + ribbon_filter_above_level: Some(1), read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -453,9 +457,11 @@ fn test_serde_custom_tikv_config() { pin_l0_filter_and_index_blocks: false, use_bloom_filter: false, optimize_filters_for_hits: true, + optimize_filters_for_memory: true, whole_key_filtering: true, bloom_filter_bits_per_key: 123, block_based_bloom_filter: true, + ribbon_filter_above_level: Some(1), read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -522,9 +528,11 @@ fn test_serde_custom_tikv_config() { pin_l0_filter_and_index_blocks: false, use_bloom_filter: false, optimize_filters_for_hits: false, + optimize_filters_for_memory: true, whole_key_filtering: true, bloom_filter_bits_per_key: 123, block_based_bloom_filter: true, + ribbon_filter_above_level: Some(1), read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, @@ -620,9 +628,11 @@ fn test_serde_custom_tikv_config() { pin_l0_filter_and_index_blocks: false, use_bloom_filter: false, optimize_filters_for_hits: false, + optimize_filters_for_memory: true, whole_key_filtering: true, bloom_filter_bits_per_key: 123, block_based_bloom_filter: true, + ribbon_filter_above_level: Some(1), read_amp_bytes_per_bit: 0, compression_per_level: [ DBCompressionType::No, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 416505a7318..28a30fcec04 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -288,9 +288,11 @@ cache-index-and-filter-blocks = false pin-l0-filter-and-index-blocks = false use-bloom-filter = false optimize-filters-for-hits = false +optimize-filters-for-memory = true whole-key-filtering = true bloom-filter-bits-per-key = 123 block-based-bloom-filter = true +ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 compression-per-level = [ "no", @@ -355,9 +357,11 @@ cache-index-and-filter-blocks = false pin-l0-filter-and-index-blocks = false use-bloom-filter = false optimize-filters-for-hits = true +optimize-filters-for-memory = true whole-key-filtering = true bloom-filter-bits-per-key = 123 block-based-bloom-filter = true +ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 compression-per-level = [ "no", @@ -405,9 +409,11 @@ cache-index-and-filter-blocks = false pin-l0-filter-and-index-blocks = false use-bloom-filter = false optimize-filters-for-hits = true +optimize-filters-for-memory = true whole-key-filtering = true bloom-filter-bits-per-key = 123 block-based-bloom-filter = true +ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 compression-per-level = [ "no", @@ -455,9 +461,11 @@ cache-index-and-filter-blocks = false pin-l0-filter-and-index-blocks = false use-bloom-filter = false optimize-filters-for-hits = false +optimize-filters-for-memory = true whole-key-filtering = true bloom-filter-bits-per-key = 123 block-based-bloom-filter = true +ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 compression-per-level = [ "no", @@ -537,9 +545,11 @@ cache-index-and-filter-blocks = false pin-l0-filter-and-index-blocks = false use-bloom-filter = false optimize-filters-for-hits = false +optimize-filters-for-memory = true whole-key-filtering = true bloom-filter-bits-per-key = 123 block-based-bloom-filter = true +ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 compression-per-level = [ "no", From 3867b954fff137f24c26af9350252a6b5cdca6e1 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 20 Apr 2023 17:35:20 +0800 Subject: [PATCH 0653/1149] raftstore-v2: support renaming encrypted dir (inefficiently) and batch importing data keys (#14583) ref tikv/tikv#12842, ref tikv/tikv#14095, ref tikv/tikv#14097 support renaming encrypted dir (inefficiently) and batch importing data keys Signed-off-by: tabokie --- Cargo.lock | 12 +- components/encryption/Cargo.toml | 1 + components/encryption/export/src/lib.rs | 5 +- components/encryption/src/file_dict_file.rs | 56 ++- components/encryption/src/lib.rs | 119 ++++++ components/encryption/src/manager/mod.rs | 386 ++++++++++++++++-- components/engine_rocks/src/sst.rs | 4 +- components/engine_test/src/lib.rs | 5 +- components/engine_traits_tests/Cargo.toml | 4 + .../engine_traits_tests/src/checkpoint.rs | 49 +++ components/engine_traits_tests/src/ctor.rs | 40 +- components/engine_traits_tests/src/lib.rs | 1 + components/file_system/src/lib.rs | 76 ---- components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/batch/store.rs | 35 +- components/raftstore-v2/src/fsm/peer.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 11 +- .../src/operation/ready/apply_trace.rs | 12 +- .../raftstore-v2/src/operation/ready/mod.rs | 5 +- .../src/operation/ready/snapshot.rs | 31 +- components/raftstore-v2/src/raft/peer.rs | 6 +- components/raftstore-v2/src/raft/storage.rs | 8 +- .../tests/integrations/cluster.rs | 23 +- .../raftstore/src/store/async_io/read.rs | 9 +- components/raftstore/src/store/snap.rs | 39 +- components/server/src/server2.rs | 10 +- components/test_raftstore-v2/src/node.rs | 14 +- components/test_raftstore-v2/src/server.rs | 14 +- components/test_util/src/encryption.rs | 12 +- src/server/engine_factory.rs | 12 +- src/server/raftkv2/node.rs | 5 + src/server/tablet_snap.rs | 3 + tests/failpoints/cases/test_encryption.rs | 8 +- tests/integrations/import/util.rs | 2 +- 34 files changed, 828 insertions(+), 194 deletions(-) create mode 100644 components/engine_traits_tests/src/checkpoint.rs diff --git a/Cargo.lock b/Cargo.lock index da49ade1d6b..7f2a1e91650 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1486,6 +1486,7 @@ dependencies = [ "tikv_util", "tokio", "toml", + "walkdir", ] [[package]] @@ -1630,10 +1631,14 @@ dependencies = [ name = "engine_traits_tests" version = "0.0.1" dependencies = [ + "encryption", + "encryption_export", "engine_test", "engine_traits", + "kvproto", "panic_hook", "tempfile", + "test_util", "tikv_alloc", ] @@ -2921,7 +2926,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#ce788e498f1d70ab7cbf44dcaca5049bbc05a943" +source = "git+https://github.com/tikv/rust-rocksdb.git#062638a741adcd9074659eb28cbe7f6a676938d5" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -2940,7 +2945,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#ce788e498f1d70ab7cbf44dcaca5049bbc05a943" +source = "git+https://github.com/tikv/rust-rocksdb.git#062638a741adcd9074659eb28cbe7f6a676938d5" dependencies = [ "bzip2-sys", "cc", @@ -4426,6 +4431,7 @@ dependencies = [ "collections", "concurrency_manager", "crossbeam", + "encryption_export", "engine_rocks", "engine_test", "engine_traits", @@ -4867,7 +4873,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#ce788e498f1d70ab7cbf44dcaca5049bbc05a943" +source = "git+https://github.com/tikv/rust-rocksdb.git#062638a741adcd9074659eb28cbe7f6a676938d5" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 94ab0d39957..deac60223a7 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -37,6 +37,7 @@ thiserror = "1.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "rt"] } +walkdir = "2" [dev-dependencies] matches = "0.1.8" diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index e29a41cd07e..be86db83082 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -14,8 +14,9 @@ use derive_more::Deref; #[cfg(feature = "cloud-aws")] pub use encryption::KmsBackend; pub use encryption::{ - from_engine_encryption_method, Backend, DataKeyManager, DataKeyManagerArgs, DecrypterReader, - EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, + clean_up_dir, clean_up_trash, from_engine_encryption_method, trash_dir_all, Backend, + DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, FileConfig, Iv, + KmsConfig, MasterKeyConfig, Result, }; use encryption::{ DataKeyPair, EncryptedKey, FileBackend, KmsProvider, PlainKey, PlaintextBackend, diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index 4a2609cacb5..cfa945a5cd7 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -217,10 +217,11 @@ impl FileDictionaryFile { Ok(file_dict) } - /// Append an insert operation to the log file. + /// Append an insert operation to the log file. The record is guaranteed to + /// be persisted if `sync` is set. /// /// Warning: `self.write(file_dict)` must be called before. - pub fn insert(&mut self, name: &str, info: &FileInfo) -> Result<()> { + pub fn insert(&mut self, name: &str, info: &FileInfo, sync: bool) -> Result<()> { self.file_dict.files.insert(name.to_owned(), info.clone()); if self.enable_log { let file = self.append_file.as_mut().unwrap(); @@ -231,12 +232,16 @@ impl FileDictionaryFile { let truncate_num: usize = truncate_num.map_or(0, |c| c.parse().unwrap()); bytes.truncate(truncate_num); file.write_all(&bytes)?; - file.sync_all()?; + if sync { + file.sync_all()?; + } Ok(()) }); file.write_all(&bytes)?; - file.sync_all()?; + if sync { + file.sync_all()?; + } self.file_size += bytes.len(); self.check_compact()?; @@ -250,13 +255,15 @@ impl FileDictionaryFile { /// Append a remove operation to the log file. /// /// Warning: `self.write(file_dict)` must be called before. - pub fn remove(&mut self, name: &str) -> Result<()> { + pub fn remove(&mut self, name: &str, sync: bool) -> Result<()> { self.file_dict.files.remove(name); if self.enable_log { let file = self.append_file.as_mut().unwrap(); let bytes = Self::convert_record_to_bytes(name, LogRecord::Remove)?; file.write_all(&bytes)?; - file.sync_all()?; + if sync { + file.sync_all()?; + } self.removed += 1; self.file_size += bytes.len(); @@ -268,6 +275,13 @@ impl FileDictionaryFile { Ok(()) } + pub fn sync(&mut self) -> Result<()> { + if self.enable_log { + self.append_file.as_mut().unwrap().sync_all()?; + } + Ok(()) + } + /// This function needs to be called after each append operation to check /// if compact is needed. fn check_compact(&mut self) -> Result<()> { @@ -407,9 +421,9 @@ mod tests { let info4 = create_file_info(4, EncryptionMethod::Aes128Ctr); let info5 = create_file_info(3, EncryptionMethod::Aes128Ctr); - file_dict_file.insert("info1", &info1).unwrap(); - file_dict_file.insert("info2", &info2).unwrap(); - file_dict_file.insert("info3", &info3).unwrap(); + file_dict_file.insert("info1", &info1, true).unwrap(); + file_dict_file.insert("info2", &info2, true).unwrap(); + file_dict_file.insert("info3", &info3, true).unwrap(); let file_dict = file_dict_file.recovery().unwrap(); @@ -418,9 +432,9 @@ mod tests { assert_eq!(*file_dict.files.get("info3").unwrap(), info3); assert_eq!(file_dict.files.len(), 3); - file_dict_file.remove("info2").unwrap(); - file_dict_file.remove("info1").unwrap(); - file_dict_file.insert("info2", &info4).unwrap(); + file_dict_file.remove("info2", true).unwrap(); + file_dict_file.remove("info1", true).unwrap(); + file_dict_file.insert("info2", &info4, true).unwrap(); let file_dict = file_dict_file.recovery().unwrap(); assert_eq!(file_dict.files.get("info1"), None); @@ -428,8 +442,8 @@ mod tests { assert_eq!(*file_dict.files.get("info3").unwrap(), info3); assert_eq!(file_dict.files.len(), 2); - file_dict_file.insert("info5", &info5).unwrap(); - file_dict_file.remove("info3").unwrap(); + file_dict_file.insert("info5", &info5, true).unwrap(); + file_dict_file.remove("info3", true).unwrap(); let file_dict = file_dict_file.recovery().unwrap(); assert_eq!(file_dict.files.get("info1"), None); @@ -460,7 +474,7 @@ mod tests { .unwrap(); let info = create_file_info(1, EncryptionMethod::Aes256Ctr); - file_dict_file.insert("info", &info).unwrap(); + file_dict_file.insert("info", &info, true).unwrap(); let (_, file_dict) = FileDictionaryFile::open( tempdir.path(), @@ -550,14 +564,14 @@ mod tests { ) .unwrap(); - file_dict.insert("f1", &info1).unwrap(); - file_dict.insert("f2", &info2).unwrap(); - file_dict.insert("f3", &info3).unwrap(); + file_dict.insert("f1", &info1, true).unwrap(); + file_dict.insert("f2", &info2, true).unwrap(); + file_dict.insert("f3", &info3, true).unwrap(); - file_dict.insert("f4", &info4).unwrap(); - file_dict.remove("f3").unwrap(); + file_dict.insert("f4", &info4, true).unwrap(); + file_dict.remove("f3", true).unwrap(); - file_dict.remove("f2").unwrap(); + file_dict.remove("f2", true).unwrap(); } // Try open as v1 file. Should fail. { diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index 7f9079ed030..c16142eb30b 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -10,6 +10,8 @@ mod manager; mod master_key; mod metrics; +use std::{io::ErrorKind, path::Path}; + pub use self::{ config::*, crypter::{ @@ -27,3 +29,120 @@ pub use self::{ Backend, DataKeyPair, EncryptedKey, FileBackend, KmsBackend, KmsProvider, PlaintextBackend, }, }; + +const TRASH_PREFIX: &str = "TRASH-"; + +/// Remove a directory. +/// +/// Rename it before actually removal. +#[inline] +pub fn trash_dir_all( + path: impl AsRef, + key_manager: Option<&DataKeyManager>, +) -> std::io::Result<()> { + let path = path.as_ref(); + let name = match path.file_name() { + Some(n) => n, + None => { + return Err(std::io::Error::new( + ErrorKind::InvalidInput, + "path is invalid", + )); + } + }; + let trash_path = path.with_file_name(format!("{}{}", TRASH_PREFIX, name.to_string_lossy())); + if let Err(e) = file_system::rename(path, &trash_path) { + if e.kind() == ErrorKind::NotFound { + return Ok(()); + } + return Err(e); + } else if let Some(m) = key_manager { + m.remove_dir(path, Some(&trash_path))?; + } + file_system::remove_dir_all(trash_path) +} + +/// When using `trash_dir_all`, it's possible the directory is marked as trash +/// but not being actually deleted after a restart. This function can be used +/// to resume all those removal in the given directory. +#[inline] +pub fn clean_up_trash( + path: impl AsRef, + key_manager: Option<&DataKeyManager>, +) -> std::io::Result<()> { + for e in file_system::read_dir(path)? { + let e = e?; + let os_fname = e.file_name(); + let fname = os_fname.to_str().unwrap(); + if let Some(original) = fname.strip_prefix(TRASH_PREFIX) { + let original = e.path().with_file_name(original); + if let Some(m) = &key_manager { + m.remove_dir(&original, Some(&e.path()))?; + } + file_system::remove_dir_all(e.path())?; + } + } + Ok(()) +} + +/// Removes all directories with the given prefix. +#[inline] +pub fn clean_up_dir( + path: impl AsRef, + prefix: &str, + key_manager: Option<&DataKeyManager>, +) -> std::io::Result<()> { + for e in file_system::read_dir(path)? { + let e = e?; + let fname = e.file_name().to_str().unwrap().to_owned(); + if fname.starts_with(prefix) { + if let Some(m) = &key_manager { + m.remove_dir(&e.path(), None)?; + } + file_system::remove_dir_all(e.path())?; + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use tempfile::Builder; + + use super::*; + + #[test] + fn test_trash_dir_all() { + let tmp_dir = Builder::new() + .prefix("test_reserve_space_for_recover") + .tempdir() + .unwrap(); + let data_path = tmp_dir.path(); + let sub_dir0 = data_path.join("sub_dir0"); + let trash_sub_dir0 = data_path.join(format!("{}sub_dir0", TRASH_PREFIX)); + file_system::create_dir_all(&sub_dir0).unwrap(); + assert!(sub_dir0.exists()); + + trash_dir_all(&sub_dir0, None).unwrap(); + assert!(!sub_dir0.exists()); + assert!(!trash_sub_dir0.exists()); + + file_system::create_dir_all(&sub_dir0).unwrap(); + file_system::create_dir_all(&trash_sub_dir0).unwrap(); + trash_dir_all(&sub_dir0, None).unwrap(); + assert!(!sub_dir0.exists()); + assert!(!trash_sub_dir0.exists()); + + clean_up_trash(data_path, None).unwrap(); + + file_system::create_dir_all(&trash_sub_dir0).unwrap(); + assert!(trash_sub_dir0.exists()); + clean_up_trash(data_path, None).unwrap(); + assert!(!trash_sub_dir0.exists()); + + file_system::create_dir_all(&sub_dir0).unwrap(); + assert!(sub_dir0.exists()); + clean_up_dir(data_path, "sub", None).unwrap(); + assert!(!sub_dir0.exists()); + } +} diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index a367ad44df2..be7008a33ae 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -2,7 +2,7 @@ use std::{ collections::hash_map::Entry, - io::{Error as IoError, ErrorKind, Result as IoResult}, + io::{self, Error as IoError, ErrorKind, Result as IoResult}, path::{Path, PathBuf}, sync::{ atomic::{AtomicU64, Ordering}, @@ -21,6 +21,7 @@ use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; use protobuf::Message; use tikv_util::{box_err, debug, error, info, sys::thread::StdThreadBuildWrapper, thd_name, warn}; +use tokio::sync::oneshot; use crate::{ config::EncryptionConfig, @@ -36,6 +37,7 @@ use crate::{ const KEY_DICT_NAME: &str = "key.dict"; const FILE_DICT_NAME: &str = "file.dict"; const ROTATE_CHECK_PERIOD: u64 = 600; // 10min +const GENERATE_DATA_KEY_LIMIT: usize = 10; struct Dicts { // Maps data file paths to key id and metadata. This file is stored as plaintext. @@ -193,7 +195,7 @@ impl Dicts { dict.files.get(fname).cloned() } - fn new_file(&self, fname: &str, method: EncryptionMethod) -> Result { + fn new_file(&self, fname: &str, method: EncryptionMethod, sync: bool) -> Result { let mut file_dict_file = self.file_dict_file.lock().unwrap(); let iv = if method != EncryptionMethod::Plaintext { Iv::new_ctr() @@ -212,7 +214,7 @@ impl Dicts { file_dict.files.len() as _ }; - file_dict_file.insert(fname, &file)?; + file_dict_file.insert(fname, &file, sync)?; ENCRYPTION_FILE_NUM_GAUGE.set(file_num); if method != EncryptionMethod::Plaintext { @@ -228,7 +230,7 @@ impl Dicts { // If the file does not exist, return Ok(()) // In either case the intent that the file not exist is achieved. - fn delete_file(&self, fname: &str) -> Result<()> { + fn delete_file(&self, fname: &str, sync: bool) -> Result<()> { let mut file_dict_file = self.file_dict_file.lock().unwrap(); let (file, file_num) = { let mut file_dict = self.file_dict.lock().unwrap(); @@ -246,7 +248,7 @@ impl Dicts { } }; - file_dict_file.remove(fname)?; + file_dict_file.remove(fname, sync)?; ENCRYPTION_FILE_NUM_GAUGE.set(file_num); if file.method != EncryptionMethod::Plaintext { debug!("delete encrypted file"; "fname" => fname); @@ -256,7 +258,7 @@ impl Dicts { Ok(()) } - fn link_file(&self, src_fname: &str, dst_fname: &str) -> Result> { + fn link_file(&self, src_fname: &str, dst_fname: &str, sync: bool) -> Result> { let mut file_dict_file = self.file_dict_file.lock().unwrap(); let (method, file, file_num) = { let mut file_dict = self.file_dict.lock().unwrap(); @@ -277,7 +279,7 @@ impl Dicts { let file_num = file_dict.files.len() as _; (method, file, file_num) }; - file_dict_file.insert(dst_fname, &file)?; + file_dict_file.insert(dst_fname, &file, sync)?; ENCRYPTION_FILE_NUM_GAUGE.set(file_num); if method != EncryptionMethod::Plaintext { @@ -343,8 +345,7 @@ impl Dicts { let creation_time = duration.as_secs(); // Generate new data key. - let generate_limit = 10; - for _ in 0..generate_limit { + for _ in 0..GENERATE_DATA_KEY_LIMIT { let (key_id, key) = generate_data_key(method); if key_id == 0 { // 0 is invalid @@ -365,7 +366,10 @@ impl Dicts { } return Ok(()); } - Err(box_err!("key id collides {} times!", generate_limit)) + Err(box_err!( + "key id collides {} times!", + GENERATE_DATA_KEY_LIMIT + )) } } @@ -385,17 +389,22 @@ fn check_stale_file_exist( "Clean stale file information in file dictionary: {:?}", fname ); - file_dict_file.remove(fname)?; + file_dict_file.remove(fname, true)?; let _ = file_dict.files.remove(fname); } Ok(()) } +enum RotateTask { + Terminate, + Save(oneshot::Sender<()>), +} + fn run_background_rotate_work( dict: Arc, method: EncryptionMethod, master_key: &dyn Backend, - terminal_recv: channel::Receiver<()>, + rx: channel::Receiver, ) { let check_period = std::cmp::min( Duration::from_secs(ROTATE_CHECK_PERIOD), @@ -409,9 +418,17 @@ fn run_background_rotate_work( dict.maybe_rotate_data_key(method, master_key) .expect("Rotating key operation encountered error in the background worker"); }, - recv(terminal_recv) -> _ => { - info!("Key rotate worker has been cancelled."); - break + recv(rx) -> r => { + match r { + Err(_) | Ok(RotateTask::Terminate) => { + info!("Key rotate worker has been cancelled."); + return; + } + Ok(RotateTask::Save(tx)) => { + dict.save_key_dict(master_key).expect("Saving key dict encountered error in the background worker"); + tx.send(()).unwrap(); + } + } }, } } @@ -430,7 +447,7 @@ fn generate_data_key(method: EncryptionMethod) -> (u64, Vec) { pub struct DataKeyManager { dicts: Arc, method: EncryptionMethod, - rotate_terminal: channel::Sender<()>, + rotate_tx: channel::Sender, background_worker: Option>, } @@ -499,7 +516,7 @@ impl DataKeyManager { if info.method != EncryptionMethod::Plaintext { let retain = f(fname); if !retain { - file_dict_file.remove(fname).unwrap(); + file_dict_file.remove(fname, true).unwrap(); } retain } else { @@ -598,7 +615,7 @@ impl DataKeyManager { dicts.maybe_rotate_data_key(method, &*master_key)?; let dicts = Arc::new(dicts); let dict_clone = dicts.clone(); - let (rotate_terminal, rx) = channel::bounded(1); + let (rotate_tx, rx) = channel::bounded(1); let background_worker = std::thread::Builder::new() .name(thd_name!("enc:key")) .spawn_wrapper(move || { @@ -610,7 +627,7 @@ impl DataKeyManager { Ok(DataKeyManager { dicts, method, - rotate_terminal, + rotate_tx, background_worker: Some(background_worker), }) } @@ -753,6 +770,50 @@ impl DataKeyManager { Ok(Some(encrypted_file)) } + /// Removes data keys under the directory `logical`. If `physical` is + /// present, if means the `logical` directory is already physically renamed + /// to `physical`. + /// There're two uses of this function: + /// + /// (1) without `physical`: `remove_dir` is called before + /// `fs::remove_dir_all`. User must guarantee that this directory won't be + /// read again even if the removal fails or panics. + /// + /// (2) with `physical`: Use `fs::rename` to rename the directory to trash. + /// Then `remove_dir` with `physical` set to the trash directory name. + /// Finally remove the trash directory. This is the safest way to delete a + /// directory. + pub fn remove_dir(&self, logical: &Path, physical: Option<&Path>) -> IoResult<()> { + let scan = physical.unwrap_or(logical); + debug_assert!(scan.is_dir()); + if !scan.exists() { + return Ok(()); + } + let mut iter = walkdir::WalkDir::new(scan).into_iter().peekable(); + while let Some(e) = iter.next() { + let e = e?; + if e.path_is_symlink() { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("unexpected symbolic link: {}", e.path().display()), + )); + } + let fname = e.path().to_str().unwrap(); + let sync = iter.peek().is_none(); + if let Some(p) = physical { + let sub = fname + .strip_prefix(p.to_str().unwrap()) + .unwrap() + .trim_start_matches('/'); + self.dicts + .delete_file(logical.join(sub).to_str().unwrap(), sync)?; + } else { + self.dicts.delete_file(fname, sync)?; + } + } + Ok(()) + } + /// Return which method this manager is using. pub fn encryption_method(&self) -> engine_traits::EncryptionMethod { crypter::to_engine_encryption_method(self.method) @@ -761,7 +822,7 @@ impl DataKeyManager { impl Drop for DataKeyManager { fn drop(&mut self) { - if let Err(e) = self.rotate_terminal.send(()) { + if let Err(e) = self.rotate_tx.send(RotateTask::Terminate) { info!("failed to terminate background rotation, are we shutting down?"; "err" => %e); } if let Some(Err(e)) = self.background_worker.take().map(|w| w.join()) { @@ -793,7 +854,7 @@ impl EncryptionKeyManager for DataKeyManager { fn new_file(&self, fname: &str) -> IoResult { let (_, data_key) = self.dicts.current_data_key(); let key = data_key.get_key().to_owned(); - let file = self.dicts.new_file(fname, self.method)?; + let file = self.dicts.new_file(fname, self.method, true)?; let encrypted_file = FileEncryptionInfo { key, method: crypter::to_engine_encryption_method(file.method), @@ -806,16 +867,166 @@ impl EncryptionKeyManager for DataKeyManager { fail_point!("key_manager_fails_before_delete_file", |_| IoResult::Err( std::io::ErrorKind::Other.into() )); - self.dicts.delete_file(fname)?; + // `RemoveDir` is not managed, but RocksDB may use `RenameFile` on a directory, + // which internally calls `LinkFile` and `DeleteFile`. + let path = Path::new(fname); + if path.is_dir() { + let mut iter = walkdir::WalkDir::new(path).into_iter().peekable(); + while let Some(e) = iter.next() { + self.dicts + .delete_file(e?.path().to_str().unwrap(), iter.peek().is_none())?; + } + } else { + self.dicts.delete_file(fname, true)?; + } Ok(()) } fn link_file(&self, src_fname: &str, dst_fname: &str) -> IoResult<()> { - self.dicts.link_file(src_fname, dst_fname)?; + let src_path = Path::new(src_fname); + let dst_path = Path::new(dst_fname); + if src_path.is_dir() { + let mut iter = walkdir::WalkDir::new(src_path) + .into_iter() + .filter(|e| e.as_ref().map_or(true, |e| !e.path().is_dir())) + .peekable(); + while let Some(e) = iter.next() { + let e = e?; + if e.path_is_symlink() { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("unexpected symbolic link: {}", e.path().display()), + )); + } + let sub_path = e.path().strip_prefix(src_path).unwrap(); + let src = e.path().to_str().unwrap(); + let dst_path = dst_path.join(sub_path); + let dst = dst_path.to_str().unwrap(); + self.dicts.link_file(src, dst, iter.peek().is_none())?; + } + } else { + self.dicts.link_file(src_fname, dst_fname, true)?; + } Ok(()) } } +/// An RAII-style importer of data keys. It automatically creates data key that +/// doesn't exist locally. It synchronizes log file in batch. It automatically +/// reverts changes if caller aborts. +pub struct DataKeyImporter<'a> { + manager: &'a DataKeyManager, + // Added file names. + file_additions: Vec, + // Added key ids. + key_additions: Vec, + committed: bool, +} + +#[allow(dead_code)] +impl<'a> DataKeyImporter<'a> { + pub fn new(manager: &'a DataKeyManager) -> Self { + Self { + manager, + file_additions: Vec::new(), + key_additions: Vec::new(), + committed: false, + } + } + + pub fn add(&mut self, fname: &str, iv: Vec, new_key: DataKey) -> Result<()> { + let method = new_key.method; + let mut key_id = None; + { + let mut key_dict = self.manager.dicts.key_dict.lock().unwrap(); + for (id, data_key) in &key_dict.keys { + if data_key.key == new_key.key { + key_id = Some(*id); + } + } + if key_id.is_none() { + for _ in 0..GENERATE_DATA_KEY_LIMIT { + // Match `generate_data_key`. + use rand::{rngs::OsRng, RngCore}; + let id = OsRng.next_u64(); + if let Entry::Vacant(e) = key_dict.keys.entry(id) { + key_id = Some(id); + e.insert(new_key); + self.key_additions.push(id); + break; + } + } + if key_id.is_none() { + return Err(box_err!( + "key id collides {} times!", + GENERATE_DATA_KEY_LIMIT + )); + } + } + } + + let file = FileInfo { + iv, + key_id: key_id.unwrap(), + method, + ..Default::default() + }; + let mut file_dict_file = self.manager.dicts.file_dict_file.lock().unwrap(); + let file_num = { + let mut file_dict = self.manager.dicts.file_dict.lock().unwrap(); + if let Entry::Vacant(e) = file_dict.files.entry(fname.to_owned()) { + e.insert(file.clone()); + } else { + return Err(box_err!("file name collides with existing file: {}", fname)); + } + file_dict.files.len() as _ + }; + file_dict_file.insert(fname, &file, false)?; + self.file_additions.push(fname.to_owned()); + ENCRYPTION_FILE_NUM_GAUGE.set(file_num); + Ok(()) + } + + pub fn commit(mut self) -> Result<()> { + let (tx, rx) = oneshot::channel(); + if !self.key_additions.is_empty() { + self.manager.rotate_tx.send(RotateTask::Save(tx)).unwrap(); + rx.blocking_recv().unwrap(); + } + if !self.file_additions.is_empty() { + self.manager.dicts.file_dict_file.lock().unwrap().sync()?; + } + self.committed = true; + Ok(()) + } + + pub fn rollback(&mut self) -> Result<()> { + assert!(!self.committed); + let mut iter = self.file_additions.drain(..).peekable(); + while let Some(f) = iter.next() { + self.manager.dicts.delete_file(&f, iter.peek().is_none())?; + } + for key_id in self.key_additions.drain(..) { + let mut key_dict = self.manager.dicts.key_dict.lock().unwrap(); + key_dict.keys.remove(&key_id); + } + let (tx, rx) = oneshot::channel(); + self.manager.rotate_tx.send(RotateTask::Save(tx)).unwrap(); + rx.blocking_recv().unwrap(); + Ok(()) + } +} + +impl<'a> Drop for DataKeyImporter<'a> { + fn drop(&mut self) { + if !self.committed { + if let Err(e) = self.rollback() { + warn!("failed to rollback imported data keys"; "err" => ?e); + } + } + } +} + #[cfg(test)] mod tests { use engine_traits::EncryptionMethod as EtEncryptionMethod; @@ -864,7 +1075,7 @@ mod tests { rotation_period: Duration::from_secs(60), enable_file_dictionary_log: true, file_dictionary_rewrite_threshold: 2, - dict_path: tmp_dir.path().as_os_str().to_str().unwrap().to_string(), + dict_path: tmp_dir.path().to_str().unwrap().to_string(), } } @@ -1473,4 +1684,131 @@ mod tests { } } } + + #[test] + fn test_rename_dir() { + let _guard = LOCK_FOR_GAUGE.lock().unwrap(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let manager = new_key_manager_def(&tmp_dir, Some(EncryptionMethod::Aes192Ctr)).unwrap(); + let subdir = tmp_dir.path().join("foo"); + std::fs::create_dir(&subdir).unwrap(); + let file_a = manager + .new_file(subdir.join("a").to_str().unwrap()) + .unwrap(); + File::create(subdir.join("a")).unwrap(); + let file_b = manager + .new_file(subdir.join("b").to_str().unwrap()) + .unwrap(); + File::create(subdir.join("b")).unwrap(); + + let dstdir = tmp_dir.path().join("bar"); + manager + .link_file(subdir.to_str().unwrap(), dstdir.to_str().unwrap()) + .unwrap(); + manager.delete_file(subdir.to_str().unwrap()).unwrap(); + + assert_eq!( + manager + .get_file(dstdir.join("a").to_str().unwrap()) + .unwrap(), + file_a + ); + assert_eq!( + manager + .get_file_exists(subdir.join("a").to_str().unwrap()) + .unwrap(), + None + ); + + assert_eq!( + manager + .get_file(dstdir.join("b").to_str().unwrap()) + .unwrap(), + file_b + ); + assert_eq!( + manager + .get_file_exists(subdir.join("b").to_str().unwrap()) + .unwrap(), + None + ); + } + + #[test] + fn test_import_keys() { + let _guard = LOCK_FOR_GAUGE.lock().unwrap(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let manager = new_key_manager_def(&tmp_dir, Some(EncryptionMethod::Aes192Ctr)).unwrap(); + + let mut importer = DataKeyImporter::new(&manager); + let file0 = manager.new_file("0").unwrap(); + + // conflict + importer + .add("0", file0.iv.clone(), DataKey::default()) + .unwrap_err(); + // same key + importer + .add( + "1", + file0.iv.clone(), + DataKey { + key: file0.key.clone(), + method: EncryptionMethod::Aes192Ctr, + ..Default::default() + }, + ) + .unwrap(); + // different key + let (_, key2) = generate_data_key(EncryptionMethod::Aes192Ctr); + importer + .add( + "2", + Iv::new_ctr().as_slice().to_owned(), + DataKey { + key: key2.clone(), + method: EncryptionMethod::Aes192Ctr, + ..Default::default() + }, + ) + .unwrap(); + + assert_eq!(manager.get_file("0").unwrap(), file0); + assert_eq!(manager.get_file("1").unwrap(), file0); + assert_eq!(manager.get_file("2").unwrap().key, key2); + + drop(importer); + assert_eq!(manager.get_file_exists("1").unwrap(), None); + assert_eq!(manager.get_file_exists("2").unwrap(), None); + + let mut importer = DataKeyImporter::new(&manager); + // same key + importer + .add( + "1", + file0.iv.clone(), + DataKey { + key: file0.key.clone(), + method: EncryptionMethod::Aes192Ctr, + ..Default::default() + }, + ) + .unwrap(); + // different key + importer + .add( + "2", + Iv::new_ctr().as_slice().to_owned(), + DataKey { + key: key2.clone(), + method: EncryptionMethod::Aes192Ctr, + ..Default::default() + }, + ) + .unwrap(); + // importer is dropped here. + importer.commit().unwrap(); + assert_eq!(manager.get_file("1").unwrap(), file0); + assert_eq!(manager.get_file("2").unwrap().key, key2); + } } diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 0518dd7feb5..85c30d74a87 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -376,7 +376,7 @@ mod tests { let mut writer = RocksSstWriterBuilder::new() .set_cf(CF_DEFAULT) .set_db(&engine) - .build(p.as_os_str().to_str().unwrap()) + .build(p.to_str().unwrap()) .unwrap(); writer.put(k, v).unwrap(); let sst_file = writer.finish().unwrap(); @@ -391,7 +391,7 @@ mod tests { .set_in_memory(true) .set_cf(CF_DEFAULT) .set_db(&engine) - .build(p.as_os_str().to_str().unwrap()) + .build(p.to_str().unwrap()) .unwrap(); writer.put(k, v).unwrap(); let mut buf = vec![]; diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index bc8b2f8baf2..932a1bcb51a 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -132,6 +132,9 @@ pub mod kv { let tombstone_path = path.with_extension(TOMBSTONE_SUFFIX); let _ = std::fs::remove_dir_all(&tombstone_path); std::fs::rename(path, &tombstone_path)?; + if let Some(m) = &self.db_opt.key_manager { + m.remove_dir(path, Some(&tombstone_path))?; + } std::fs::remove_dir_all(tombstone_path)?; Ok(()) } @@ -207,7 +210,7 @@ pub mod ctor { #[derive(Clone, Default)] pub struct DbOptions { - key_manager: Option>, + pub(crate) key_manager: Option>, rate_limiter: Option>, state_storage: Option>, enable_multi_batch_write: bool, diff --git a/components/engine_traits_tests/Cargo.toml b/components/engine_traits_tests/Cargo.toml index 301a7ee5d76..516135a86d2 100644 --- a/components/engine_traits_tests/Cargo.toml +++ b/components/engine_traits_tests/Cargo.toml @@ -25,8 +25,12 @@ test-engines-panic = [ ] [dependencies] +encryption = { workspace = true } +encryption_export = { workspace = true } engine_test = { workspace = true } engine_traits = { workspace = true } +kvproto = { workspace = true } panic_hook = { workspace = true } tempfile = "3.0" tikv_alloc = { workspace = true } +test_util = { workspace = true } diff --git a/components/engine_traits_tests/src/checkpoint.rs b/components/engine_traits_tests/src/checkpoint.rs new file mode 100644 index 00000000000..ad85b8f85ed --- /dev/null +++ b/components/engine_traits_tests/src/checkpoint.rs @@ -0,0 +1,49 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! Checkpoint tests + +use std::sync::Arc; + +use encryption_export::data_key_manager_from_config; +use engine_test::{ + ctor::{CfOptions, DbOptions, KvEngineConstructorExt}, + kv::KvTestEngine, +}; +use engine_traits::{ + Checkpointable, Checkpointer, KvEngine, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT, +}; + +use super::tempdir; + +#[test] +fn test_encrypted_checkpoint() { + let dir = tempdir(); + let root_path = dir.path(); + + let encryption_cfg = test_util::new_file_security_config(root_path); + let key_manager = Arc::new( + data_key_manager_from_config(&encryption_cfg, root_path.to_str().unwrap()) + .unwrap() + .unwrap(), + ); + + let mut db_opts = DbOptions::default(); + db_opts.set_key_manager(Some(key_manager)); + let cf_opts: Vec<_> = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + + let path1 = root_path.join("1").to_str().unwrap().to_owned(); + let db1 = KvTestEngine::new_kv_engine_opt(&path1, db_opts.clone(), cf_opts.clone()).unwrap(); + db1.put(b"foo", b"bar").unwrap(); + db1.sync().unwrap(); + + let path2 = root_path.join("2"); + let mut checkpointer = db1.new_checkpointer().unwrap(); + checkpointer.create_at(&path2, None, 0).unwrap(); + let db2 = + KvTestEngine::new_kv_engine_opt(path2.to_str().unwrap(), db_opts.clone(), cf_opts.clone()) + .unwrap(); + assert_eq!( + db2.get_value_cf(CF_DEFAULT, b"foo").unwrap().unwrap(), + b"bar" + ); +} diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index ab1eea4d958..dce6a64dff2 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -4,11 +4,12 @@ use std::fs; +use encryption_export::data_key_manager_from_config; use engine_test::{ ctor::{CfOptions, DbOptions, KvEngineConstructorExt}, kv::KvTestEngine, }; -use engine_traits::{KvEngine, SyncMutable, ALL_CFS}; +use engine_traits::{EncryptionKeyManager, KvEngine, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; use super::tempdir; @@ -90,3 +91,40 @@ fn new_engine_opt_readonly_dir() { err.unwrap_err(); } + +#[test] +fn new_engine_opt_renamed_dir() { + use std::sync::Arc; + let dir = tempdir(); + let root_path = dir.path(); + + let encryption_cfg = test_util::new_file_security_config(root_path); + let key_manager = Arc::new( + data_key_manager_from_config(&encryption_cfg, root_path.to_str().unwrap()) + .unwrap() + .unwrap(), + ); + + let mut db_opts = DbOptions::default(); + db_opts.set_key_manager(Some(key_manager.clone())); + let cf_opts: Vec<_> = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); + + let path = root_path.join("missing").to_str().unwrap().to_owned(); + { + let db = KvTestEngine::new_kv_engine_opt(&path, db_opts.clone(), cf_opts.clone()).unwrap(); + db.put(b"foo", b"bar").unwrap(); + db.sync().unwrap(); + } + let new_path = root_path.join("new").to_str().unwrap().to_owned(); + key_manager.link_file(&path, &new_path).unwrap(); + fs::rename(&path, &new_path).unwrap(); + key_manager.delete_file(&path).unwrap(); + { + let db = + KvTestEngine::new_kv_engine_opt(&new_path, db_opts.clone(), cf_opts.clone()).unwrap(); + assert_eq!( + db.get_value_cf(CF_DEFAULT, b"foo").unwrap().unwrap(), + b"bar" + ); + } +} diff --git a/components/engine_traits_tests/src/lib.rs b/components/engine_traits_tests/src/lib.rs index d9b6af12f09..1d9b6b4fa53 100644 --- a/components/engine_traits_tests/src/lib.rs +++ b/components/engine_traits_tests/src/lib.rs @@ -40,6 +40,7 @@ mod basic_read_write; mod cf_names; +mod checkpoint; mod ctor; mod delete_range; mod iterator; diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index a3701c6ecac..91e0a35da80 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -449,47 +449,6 @@ pub fn reserve_space_for_recover>(data_dir: P, file_size: u64) -> } } -const TRASH_PREFIX: &str = "TRASH-"; - -/// Remove a directory. -/// -/// Rename it before actually removal. -#[inline] -pub fn trash_dir_all(path: impl AsRef) -> io::Result<()> { - let path = path.as_ref(); - let name = match path.file_name() { - Some(n) => n, - None => return Err(io::Error::new(ErrorKind::InvalidInput, "path is invalid")), - }; - let trash_path = path.with_file_name(format!("{}{}", TRASH_PREFIX, name.to_string_lossy())); - if let Err(e) = rename(path, &trash_path) { - if e.kind() == ErrorKind::NotFound { - return Ok(()); - } - return Err(e); - } - remove_dir_all(trash_path) -} - -/// When using `trash_dir_all`, it's possible the directory is marked as trash -/// but not being actually deleted after a restart. This function can be used -/// to resume all those removal in the given directory. -#[inline] -pub fn clean_up_trash(path: impl AsRef) -> io::Result<()> { - clean_up_dir(path, TRASH_PREFIX) -} - -/// clean up all files starts with the given prefix in the given directory. -pub fn clean_up_dir(path: impl AsRef, prefix: &str) -> io::Result<()> { - for e in read_dir(path)? { - let e = e?; - if e.file_name().to_string_lossy().starts_with(prefix) { - remove_dir_all(e.path())?; - } - } - Ok(()) -} - #[cfg(test)] mod tests { use std::{io::Write, iter}; @@ -656,39 +615,4 @@ mod tests { reserve_space_for_recover(data_path, 0).unwrap(); assert!(!file.exists()); } - - #[test] - fn test_trash_dir_all() { - let tmp_dir = Builder::new() - .prefix("test_reserve_space_for_recover") - .tempdir() - .unwrap(); - let data_path = tmp_dir.path(); - let sub_dir0 = data_path.join("sub_dir0"); - let trash_sub_dir0 = data_path.join(format!("{}sub_dir0", TRASH_PREFIX)); - create_dir_all(&sub_dir0).unwrap(); - assert!(sub_dir0.exists()); - - trash_dir_all(&sub_dir0).unwrap(); - assert!(!sub_dir0.exists()); - assert!(!trash_sub_dir0.exists()); - - create_dir_all(&sub_dir0).unwrap(); - create_dir_all(&trash_sub_dir0).unwrap(); - trash_dir_all(&sub_dir0).unwrap(); - assert!(!sub_dir0.exists()); - assert!(!trash_sub_dir0.exists()); - - clean_up_trash(data_path).unwrap(); - - create_dir_all(&trash_sub_dir0).unwrap(); - assert!(trash_sub_dir0.exists()); - clean_up_trash(data_path).unwrap(); - assert!(!trash_sub_dir0.exists()); - - create_dir_all(&sub_dir0).unwrap(); - assert!(sub_dir0.exists()); - clean_up_dir(data_path, "sub").unwrap(); - assert!(!sub_dir0.exists()); - } } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index ad13ea5ab74..84daa4c40b5 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -35,6 +35,7 @@ causal_ts = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } crossbeam = "0.8" +encryption_export = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } fail = "0.5" diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1e72341d651..a9e3c223943 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -2,6 +2,7 @@ use std::{ ops::{Deref, DerefMut}, + path::Path, sync::{ atomic::{AtomicBool, Ordering}, Arc, Mutex, @@ -16,6 +17,7 @@ use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::TrySendError; +use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType, WithIoType}; use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; @@ -92,6 +94,7 @@ pub struct StoreContext { pub global_stat: GlobalStoreStat, pub store_stat: LocalStoreStat, pub sst_importer: Arc, + pub key_manager: Option>, } impl StoreContext { @@ -288,6 +291,7 @@ struct StorePollerBuilder { snap_mgr: TabletSnapManager, global_stat: GlobalStoreStat, sst_importer: Arc, + key_manager: Option>, } impl StorePollerBuilder { @@ -305,6 +309,7 @@ impl StorePollerBuilder { snap_mgr: TabletSnapManager, coprocessor_host: CoprocessorHost, sst_importer: Arc, + key_manager: Option>, ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; let max_pool_size = std::cmp::max( @@ -333,6 +338,7 @@ impl StorePollerBuilder { coprocessor_host, global_stat, sst_importer, + key_manager, } } @@ -364,8 +370,13 @@ impl StorePollerBuilder { } meta.set_region(storage.region(), storage.is_initialized(), &self.logger); - let (sender, peer_fsm) = - PeerFsm::new(&cfg, &self.tablet_registry, &self.snap_mgr, storage)?; + let (sender, peer_fsm) = PeerFsm::new( + &cfg, + &self.tablet_registry, + self.key_manager.as_deref(), + &self.snap_mgr, + storage, + )?; meta.region_read_progress .insert(region_id, peer_fsm.as_ref().peer().read_progress().clone()); @@ -383,13 +394,22 @@ impl StorePollerBuilder { Ok(regions) } + #[inline] + fn remove_dir(&self, p: &Path) -> Result<()> { + if let Some(m) = &self.key_manager { + m.remove_dir(p, None)?; + } + file_system::remove_dir_all(p)?; + Ok(()) + } + fn clean_up_tablets(&self, peers: &HashMap>) -> Result<()> { for entry in file_system::read_dir(self.tablet_registry.tablet_root())? { let entry = entry?; let path = entry.path(); if path.extension().map_or(false, |s| s == "tmp") { // The directory may be generated by an aborted checkpoint. - file_system::remove_dir_all(&path)?; + self.remove_dir(&path)?; continue; } let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; @@ -402,20 +422,20 @@ impl StorePollerBuilder { None => { // The peer is either destroyed or not created yet. It will be // recovered by leader heartbeats. - file_system::remove_dir_all(&path)?; + self.remove_dir(&path)?; continue; } }; // Valid split tablet should be installed during recovery. if prefix == SPLIT_PREFIX { - file_system::remove_dir_all(&path)?; + self.remove_dir(&path)?; continue; } else if prefix == MERGE_IN_PROGRESS_PREFIX { continue; } else if prefix.is_empty() { // Stale split data can be deleted. if fsm.peer().storage().tablet_index() > tablet_index { - file_system::remove_dir_all(&path)?; + self.remove_dir(&path)?; } } else { debug_assert!(false, "unexpected tablet prefix: {}", path.display()); @@ -461,6 +481,7 @@ where global_stat: self.global_stat.clone(), store_stat: self.global_stat.local(), sst_importer: self.sst_importer.clone(), + key_manager: self.key_manager.clone(), }; poll_ctx.update_ticks_timeout(); let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); @@ -554,6 +575,7 @@ impl StoreSystem { background: Worker, pd_worker: LazyWorker, sst_importer: Arc, + key_manager: Option>, ) -> Result<()> where T: Transport + 'static, @@ -660,6 +682,7 @@ impl StoreSystem { snap_mgr, coprocessor_host, sst_importer, + key_manager, ); self.workers = Some(workers); self.schedulers = Some(schedulers); diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index d2506d0dd21..3af66c4f81c 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -6,6 +6,7 @@ use std::borrow::Cow; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; +use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{errorpb, raft_cmdpb::RaftCmdResponse}; use raftstore::store::{Config, TabletSnapManager, Transport}; @@ -40,10 +41,11 @@ impl PeerFsm { pub fn new( cfg: &Config, tablet_registry: &TabletRegistry, + key_manager: Option<&DataKeyManager>, snap_mgr: &TabletSnapManager, storage: Storage, ) -> Result> { - let peer = Peer::new(cfg, tablet_registry, snap_mgr, storage)?; + let peer = Peer::new(cfg, tablet_registry, key_manager, snap_mgr, storage)?; info!(peer.logger, "create peer"; "raft_state" => ?peer.storage().raft_state(), "apply_state" => ?peer.storage().apply_state(), diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 65e7ab7906a..9d3a32f8f72 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -393,8 +393,15 @@ impl Store { ctx.schedulers.read.clone(), &ctx.logger, ) - .and_then(|s| PeerFsm::new(&ctx.cfg, &ctx.tablet_registry, &ctx.snap_mgr, s)) - { + .and_then(|s| { + PeerFsm::new( + &ctx.cfg, + &ctx.tablet_registry, + ctx.key_manager.as_deref(), + &ctx.snap_mgr, + s, + ) + }) { Ok(p) => p, res => { error!(self.logger(), "failed to create peer"; "region_id" => region_id, "peer_id" => to_peer.id, "err" => ?res.err()); diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 90b7930c368..f1a65fc1768 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -29,6 +29,7 @@ use std::{cmp, sync::Mutex}; +use encryption_export::DataKeyManager; use engine_traits::{ data_cf_offset, ApplyProgress, KvEngine, RaftEngine, RaftLogBatch, TabletRegistry, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, @@ -430,7 +431,12 @@ impl Storage { /// Region state is written before actually moving data. It's possible that /// the tablet is missing after restart. We need to move the data again /// after being restarted. - pub fn recover_tablet(&self, registry: &TabletRegistry, snap_mgr: &TabletSnapManager) { + pub fn recover_tablet( + &self, + registry: &TabletRegistry, + key_manager: Option<&DataKeyManager>, + snap_mgr: &TabletSnapManager, + ) { let tablet_index = self.region_state().get_tablet_index(); if tablet_index == 0 { // It's an uninitialized peer, nothing to recover. @@ -445,7 +451,7 @@ impl Storage { if tablet_index == RAFT_INIT_LOG_INDEX { // Its data may come from split or snapshot. Try split first. let split_path = temp_split_path(registry, region_id); - if install_tablet(registry, &split_path, region_id, tablet_index) { + if install_tablet(registry, key_manager, &split_path, region_id, tablet_index) { return; } } @@ -460,7 +466,7 @@ impl Storage { self.entry_storage().truncated_term(), tablet_index, ); - if install_tablet(registry, &snap_path, region_id, tablet_index) { + if install_tablet(registry, key_manager, &snap_path, region_id, tablet_index) { return; } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index f37791638d5..f63d9c97b86 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -1021,8 +1021,9 @@ impl Storage { if let Err(e) = self.apply_snapshot( ready.snapshot(), write_task, - ctx.snap_mgr.clone(), - ctx.tablet_registry.clone(), + &ctx.snap_mgr, + &ctx.tablet_registry, + ctx.key_manager.as_ref(), ) { SNAP_COUNTER.apply.fail.inc(); error!(self.logger(),"failed to apply snapshot";"error" => ?e) diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 12b4a97e710..5547df7d580 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -29,7 +29,11 @@ use std::{ }, }; -use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, ALL_CFS}; +use encryption_export::DataKeyManager; +use engine_traits::{ + EncryptionKeyManager, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, + ALL_CFS, +}; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; use raft::{eraftpb::Snapshot, StateRole}; @@ -136,6 +140,7 @@ pub fn recv_snap_path( /// Returns false if `source` doesn't exist. pub fn install_tablet( registry: &TabletRegistry, + key_manager: Option<&DataKeyManager>, source: &Path, region_id: u64, tablet_index: u64, @@ -151,7 +156,14 @@ pub fn install_tablet( source.display(), target_path.display() ); + if let Some(m) = &key_manager { + m.link_file(source.to_str().unwrap(), target_path.to_str().unwrap()) + .unwrap(); + } if let Err(e) = fs::rename(source, &target_path) { + if let Some(m) = &key_manager { + m.delete_file(target_path.to_str().unwrap()).unwrap(); + } panic!( "failed to rename tablet {} => {}: {:?}", source.display(), @@ -159,6 +171,9 @@ pub fn install_tablet( e ); } + if let Some(m) = &key_manager { + m.delete_file(source.to_str().unwrap()).unwrap(); + } true } @@ -544,8 +559,9 @@ impl Storage { &mut self, snap: &Snapshot, task: &mut WriteTask, - snap_mgr: TabletSnapManager, - reg: TabletRegistry, + snap_mgr: &TabletSnapManager, + reg: &TabletRegistry, + key_manager: Option<&Arc>, ) -> Result<()> { let region_id = self.region().get_id(); let peer_id = self.peer().get_id(); @@ -632,10 +648,10 @@ impl Storage { Some(init) if init.scheduled && last_index == RAFT_INIT_LOG_INDEX => { lb.put_dirty_mark(region_id, last_index, true).unwrap(); self.set_has_dirty_data(true); - (temp_split_path(®, region_id), false) + (temp_split_path(reg, region_id), false) } si => ( - recv_snap_path(&snap_mgr, region_id, peer_id, last_term, last_index), + recv_snap_path(snap_mgr, region_id, peer_id, last_term, last_index), si.is_some(), ), }; @@ -643,8 +659,10 @@ impl Storage { let logger = self.logger().clone(); // The snapshot require no additional processing such as ingest them to DB, but // it should load it into the factory after it persisted. + let reg = reg.clone(); + let key_manager = key_manager.cloned(); let hook = move || { - if !install_tablet(®, &path, region_id, last_index) { + if !install_tablet(®, key_manager.as_deref(), &path, region_id, last_index) { slog_panic!( logger, "failed to install tablet"; @@ -654,6 +672,7 @@ impl Storage { } if clean_split { let path = temp_split_path(®, region_id); + // TODO(tabokie) let _ = fs::remove_dir_all(path); } }; diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index d35dfe22184..e11c96922cd 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -7,6 +7,7 @@ use std::{ }; use collections::{HashMap, HashSet}; +use encryption_export::DataKeyManager; use engine_traits::{ CachedTablet, FlushState, KvEngine, RaftEngine, TabletContext, TabletRegistry, }; @@ -128,6 +129,7 @@ impl Peer { pub fn new( cfg: &Config, tablet_registry: &TabletRegistry, + key_manager: Option<&DataKeyManager>, snap_mgr: &TabletSnapManager, storage: Storage, ) -> Result { @@ -149,7 +151,9 @@ impl Peer { // old tablet and create new peer. We also can't get the correct range of the // region, which is required for kv data gc. if tablet_index != 0 { - raft_group.store().recover_tablet(tablet_registry, snap_mgr); + raft_group + .store() + .recover_tablet(tablet_registry, key_manager, snap_mgr); let mut ctx = TabletContext::new(®ion, Some(tablet_index)); ctx.flush_state = Some(flush_state.clone()); // TODO: Perhaps we should stop create the tablet automatically. diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index ee9be348c89..7edf8c02f09 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -398,7 +398,8 @@ mod tests { fn test_apply_snapshot() { let region = new_region(); let path = TempDir::new().unwrap(); - let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()).unwrap(); + let mgr = + TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap(), None).unwrap(); let engines = engine_test::new_temp_engine(&path); let raft_engine = engines.raft.clone(); let mut wb = raft_engine.log_batch(10); @@ -437,7 +438,7 @@ mod tests { .unwrap(); let snapshot = new_empty_snapshot(region.clone(), snap_index, snap_term, false); let mut task = WriteTask::new(region.get_id(), 5, 1); - s.apply_snapshot(&snapshot, &mut task, mgr, reg.clone()) + s.apply_snapshot(&snapshot, &mut task, &mgr, ®, None) .unwrap(); // Add more entries to check if old entries are cleared. If not, it should panic // with memtable hole when using raft engine. @@ -481,7 +482,8 @@ mod tests { write_initial_states(&mut wb, region.clone()).unwrap(); assert!(!wb.is_empty()); raft_engine.consume(&mut wb, true).unwrap(); - let mgr = TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap()).unwrap(); + let mgr = + TabletSnapManager::new(path.path().join("snap_dir").to_str().unwrap(), None).unwrap(); // building a tablet factory let ops = DbOptions::default(); let cf_opts = DATA_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 4bd0cef8846..83cf3646b9b 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -262,8 +262,19 @@ impl RunningState { causal_ts_provider: Option>, logger: &Logger, ) -> (TestRouter, Self) { + // TODO(tabokie): Enable encryption by default. (after snapshot encryption) + // let encryption_cfg = test_util::new_file_security_config(path); + // let key_manager = Some(Arc::new( + // data_key_manager_from_config(&encryption_cfg, path.to_str().unwrap()) + // .unwrap() + // .unwrap(), + // )); + let key_manager = None; + + let mut opts = engine_test::ctor::RaftDbOptions::default(); + opts.set_key_manager(key_manager.clone()); let raft_engine = - engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), None) + engine_test::raft::new_engine(&format!("{}", path.join("raft").display()), Some(opts)) .unwrap(); let mut bootstrap = Bootstrap::new(&raft_engine, 0, pd_client.as_ref(), logger.clone()); @@ -286,6 +297,7 @@ impl RunningState { raft_engine.clone(), router.clone(), ))); + db_opt.set_key_manager(key_manager.clone()); let factory = Box::new(TestTabletFactory::new(db_opt, cf_opts)); let registry = TabletRegistry::new(factory, path.join("tablets")).unwrap(); if let Some(region) = bootstrap.bootstrap_first_region(&store, store_id).unwrap() { @@ -302,14 +314,18 @@ impl RunningState { let router = RaftRouter::new(store_id, router); let store_meta = router.store_meta().clone(); - let snap_mgr = TabletSnapManager::new(path.join("tablets_snap").to_str().unwrap()).unwrap(); + let snap_mgr = TabletSnapManager::new( + path.join("tablets_snap").to_str().unwrap(), + key_manager.clone(), + ) + .unwrap(); let coprocessor_host = CoprocessorHost::new(router.store_router().clone(), cop_cfg.value().clone()); let importer = Arc::new( SstImporter::new( &Default::default(), path.join("importer"), - None, + key_manager.clone(), ApiVersion::V1, ) .unwrap(), @@ -336,6 +352,7 @@ impl RunningState { background.clone(), pd_worker, importer, + key_manager, ) .unwrap(); diff --git a/components/raftstore/src/store/async_io/read.rs b/components/raftstore/src/store/async_io/read.rs index ced7b0f4418..006fe0eb24c 100644 --- a/components/raftstore/src/store/async_io/read.rs +++ b/components/raftstore/src/store/async_io/read.rs @@ -122,14 +122,17 @@ impl ReadRunner { fn generate_snap(&self, snap_key: &TabletSnapKey, tablet: EK) -> crate::Result<()> { let checkpointer_path = self.snap_mgr().tablet_gen_path(snap_key); - if checkpointer_path.as_path().exists() { + if checkpointer_path.exists() { + // TODO: make `delete_snapshot` return error so we can use it here. // Remove the old checkpoint directly. - file_system::trash_dir_all(&checkpointer_path)?; + encryption::trash_dir_all( + &checkpointer_path, + self.snap_mgr().key_manager().as_deref(), + )?; } // Here not checkpoint to a temporary directory first, the temporary directory // logic already implemented in rocksdb. let mut checkpointer = tablet.new_checkpointer()?; - checkpointer.create_at(checkpointer_path.as_path(), None, 0)?; Ok(()) } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 68d3c7fba51..69d948e3ae4 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1915,7 +1915,7 @@ impl SnapManagerBuilder { assert!(!path.is_empty()); let mut path_v2 = path.clone(); path_v2.push_str("_v2"); - let tablet_snap_mgr = TabletSnapManager::new(&path_v2).unwrap(); + let tablet_snap_mgr = TabletSnapManager::new(&path_v2, self.key_manager.clone()).unwrap(); let mut snapshot = SnapManager { core: SnapManagerCore { @@ -1996,12 +1996,16 @@ impl Drop for ReceivingGuard<'_> { pub struct TabletSnapManager { // directory to store snapfile. base: PathBuf, + key_manager: Option>, receiving: Arc>>, stats: Arc>>, } impl TabletSnapManager { - pub fn new>(path: T) -> io::Result { + pub fn new>( + path: T, + key_manager: Option>, + ) -> io::Result { let path = path.into(); if !path.exists() { file_system::create_dir_all(&path)?; @@ -2012,10 +2016,11 @@ impl TabletSnapManager { format!("{} should be a directory", path.display()), )); } - file_system::clean_up_dir(&path, SNAP_GEN_PREFIX)?; - file_system::clean_up_trash(&path)?; + encryption::clean_up_dir(&path, SNAP_GEN_PREFIX, key_manager.as_deref())?; + encryption::clean_up_trash(&path, key_manager.as_deref())?; Ok(Self { base: path, + key_manager, receiving: Arc::default(), stats: Arc::default(), }) @@ -2073,16 +2078,17 @@ impl TabletSnapManager { pub fn delete_snapshot(&self, key: &TabletSnapKey) -> bool { let path = self.tablet_gen_path(key); - if path.exists() && let Err(e) = file_system::trash_dir_all(&path) { - error!( - "delete snapshot failed"; - "path" => %path.display(), - "err" => ?e, - ); - false - } else { - true + if path.exists() { + if let Err(e) = encryption::trash_dir_all(&path, self.key_manager.as_deref()) { + error!( + "delete snapshot failed"; + "path" => %path.display(), + "err" => ?e, + ); + return false; + } } + true } pub fn total_snap_size(&self) -> Result { @@ -2135,6 +2141,11 @@ impl TabletSnapManager { key, }) } + + #[inline] + pub fn key_manager(&self) -> &Option> { + &self.key_manager + } } #[cfg(test)] @@ -3123,7 +3134,7 @@ pub mod tests { .tempdir() .unwrap(); let start = Instant::now(); - let mgr = TabletSnapManager::new(snap_dir.path()).unwrap(); + let mgr = TabletSnapManager::new(snap_dir.path(), None).unwrap(); let key = TabletSnapKey::new(1, 1, 1, 1); mgr.begin_snapshot(key.clone(), start - time::Duration::from_secs(2), 1); // filter out the snapshot that is not finished diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 81575b8cbf6..202307f7767 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -520,10 +520,11 @@ where .unwrap() .to_owned(); - let snap_mgr = match TabletSnapManager::new(&snap_path) { - Ok(mgr) => mgr, - Err(e) => fatal!("failed to create snapshot manager at {}: {}", snap_path, e), - }; + let snap_mgr = + match TabletSnapManager::new(&snap_path, self.core.encryption_key_manager.clone()) { + Ok(mgr) => mgr, + Err(e) => fatal!("failed to create snapshot manager at {}: {}", snap_path, e), + }; // Create coprocessor endpoint. let cop_read_pool_handle = if self.core.config.readpool.coprocessor.use_unified_pool() { @@ -758,6 +759,7 @@ where raft_store, &state, importer.clone(), + self.core.encryption_key_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index c770a6144bd..ffa38b51796 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -231,7 +231,10 @@ impl Simulator for NodeCluster { { let tmp = test_util::temp_dir("test_cluster", cfg.prefer_mem); let snap_path = tmp.path().to_str().unwrap().to_owned(); - (TabletSnapManager::new(snap_path)?, Some(tmp)) + ( + TabletSnapManager::new(snap_path, key_manager.clone())?, + Some(tmp), + ) } else { let trans = self.trans.core.lock().unwrap(); let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; @@ -273,7 +276,13 @@ impl Simulator for NodeCluster { let importer = { let dir = Path::new(raft_engine.get_engine_path()).join("../import-sst"); Arc::new( - SstImporter::new(&cfg.import, dir, key_manager, cfg.storage.api_version()).unwrap(), + SstImporter::new( + &cfg.import, + dir, + key_manager.clone(), + cfg.storage.api_version(), + ) + .unwrap(), ) }; @@ -295,6 +304,7 @@ impl Simulator for NodeCluster { Arc::new(VersionTrack::new(raft_store)), &state, importer, + key_manager, )?; assert!( raft_engine diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index f110578784f..9bdd8568418 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -328,7 +328,10 @@ impl ServerCluster { let (snap_mgr, snap_mgs_path) = if !self.snap_mgrs.contains_key(&node_id) { let tmp = test_util::temp_dir("test_cluster", cfg.prefer_mem); let snap_path = tmp.path().to_str().unwrap().to_owned(); - (TabletSnapManager::new(snap_path)?, Some(tmp)) + ( + TabletSnapManager::new(snap_path, key_manager.clone())?, + Some(tmp), + ) } else { (self.snap_mgrs[&node_id].clone(), None) }; @@ -475,7 +478,13 @@ impl ServerCluster { let importer = { let dir = Path::new(raft_engine.get_engine_path()).join("../import-sst"); Arc::new( - SstImporter::new(&cfg.import, dir, key_manager, cfg.storage.api_version()).unwrap(), + SstImporter::new( + &cfg.import, + dir, + key_manager.clone(), + cfg.storage.api_version(), + ) + .unwrap(), ) }; let import_service = ImportSstService::new( @@ -598,6 +607,7 @@ impl ServerCluster { Arc::new(VersionTrack::new(raft_store)), &state, importer, + key_manager, )?; assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); diff --git a/components/test_util/src/encryption.rs b/components/test_util/src/encryption.rs index ba6ab56cc52..e09c0ce7cbb 100644 --- a/components/test_util/src/encryption.rs +++ b/components/test_util/src/encryption.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fs::File, io::Write, time::Duration}; +use std::{fs::File, io::Write, path::Path, time::Duration}; use encryption_export::{ create_backend, DataKeyManager, DataKeyManagerArgs, EncryptionConfig, FileConfig, @@ -15,15 +15,15 @@ pub fn create_test_key_file(path: &str) { .unwrap(); } -fn new_test_file_master_key(tmp: &tempfile::TempDir) -> MasterKeyConfig { - let key_path = tmp.path().join("test_key").to_str().unwrap().to_owned(); +fn new_test_file_master_key(tmp: &Path) -> MasterKeyConfig { + let key_path = tmp.join("test_key").to_str().unwrap().to_owned(); create_test_key_file(&key_path); MasterKeyConfig::File { config: FileConfig { path: key_path }, } } -pub fn new_file_security_config(dir: &tempfile::TempDir) -> EncryptionConfig { +pub fn new_file_security_config(dir: &Path) -> EncryptionConfig { let master_key_cfg = new_test_file_master_key(dir); EncryptionConfig { data_encryption_method: EncryptionMethod::Aes256Ctr, @@ -41,7 +41,7 @@ pub fn new_test_key_manager( master_key: Option, previous_master_key: Option, ) -> Result> { - let default_config = new_test_file_master_key(tmp_dir); + let default_config = new_test_file_master_key(tmp_dir.path()); let master_key = master_key.unwrap_or_else(|| default_config.clone()); let previous_master_key = previous_master_key.unwrap_or(default_config); DataKeyManager::new( @@ -52,7 +52,7 @@ pub fn new_test_key_manager( rotation_period: Duration::from_secs(60), enable_file_dictionary_log: true, file_dictionary_rewrite_threshold: 2, - dict_path: tmp_dir.path().as_os_str().to_str().unwrap().to_string(), + dict_path: tmp_dir.path().to_str().unwrap().to_string(), }, ) } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index c3976b8eeac..bf70a63acdb 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -2,6 +2,7 @@ use std::{path::Path, sync::Arc}; +use encryption_export::DataKeyManager; use engine_rocks::{ raw::{Cache, Env}, util::RangeCompactionFilterFactory, @@ -28,6 +29,7 @@ struct FactoryInner { api_version: ApiVersion, flow_listener: Option, sst_recovery_sender: Option>, + encryption_key_manager: Option>, db_resources: DbResources, cf_resources: CfResources, state_storage: Option>, @@ -48,6 +50,7 @@ impl KvEngineFactoryBuilder { api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, + encryption_key_manager: None, db_resources: config.rocksdb.build_resources(env), cf_resources: config.rocksdb.build_cf_resources(cache), state_storage: None, @@ -80,6 +83,11 @@ impl KvEngineFactoryBuilder { self } + pub fn encryption_key_manager(mut self, m: Option>) -> Self { + self.inner.encryption_key_manager = m; + self + } + /// Set whether enable lite mode. /// /// In lite mode, most listener/filters will not be installed. @@ -233,7 +241,9 @@ impl TabletFactory for KvEngineFactory { // kv_db_opts, // kv_cfs_opts, // )?; - let _ = file_system::trash_dir_all(path); + // TODO: use RocksDB::DestroyDB. + let _ = + encryption_export::trash_dir_all(path, self.inner.encryption_key_manager.as_deref()); if let Some(listener) = &self.inner.flow_listener { listener.clone_with(ctx.id).on_destroyed(); } diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index b9cc956d40e..f95e4a89848 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, Mutex}; use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; +use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletContext, TabletRegistry}; use kvproto::{metapb, replication_modepb::ReplicationStatus}; use pd_client::PdClient; @@ -104,6 +105,7 @@ where store_cfg: Arc>, state: &Mutex, sst_importer: Arc, + key_manager: Option>, ) -> Result<()> where T: Transport + 'static, @@ -143,6 +145,7 @@ where pd_worker, store_cfg, sst_importer, + key_manager, )?; Ok(()) @@ -205,6 +208,7 @@ where pd_worker: LazyWorker, store_cfg: Arc>, sst_importer: Arc, + key_manager: Option>, ) -> Result<()> where T: Transport + 'static, @@ -237,6 +241,7 @@ where background, pd_worker, sst_importer, + key_manager, )?; Ok(()) } diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 07a85109006..f1044031d9f 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -245,6 +245,7 @@ pub(crate) async fn cleanup_cache( let entry = entry?; let ft = entry.file_type()?; if ft.is_dir() { + // TODO(tabokie) fs::remove_dir_all(entry.path())?; continue; } @@ -406,6 +407,7 @@ pub(crate) async fn recv_snap_files<'a>( let path = snap_mgr.tmp_recv_path(&context.key); info!("begin to receive tablet snapshot files"; "file" => %path.display(), "region_id" => region_id); if path.exists() { + // TODO(tabokie) fs::remove_dir_all(&path)?; } let (reused, missing_ssts) = if context.use_cache { @@ -422,6 +424,7 @@ pub(crate) async fn recv_snap_files<'a>( let received = accept_missing(&path, missing_ssts, &mut stream, &limiter).await?; info!("received all tablet snapshot file"; "snap_key" => %context.key, "region_id" => region_id, "received" => received, "reused" => reused); let final_path = snap_mgr.final_recv_path(&context.key); + // TODO(tabokie) fs::rename(&path, final_path)?; Ok(context) } diff --git a/tests/failpoints/cases/test_encryption.rs b/tests/failpoints/cases/test_encryption.rs index 8b73188e569..eba0a515893 100644 --- a/tests/failpoints/cases/test_encryption.rs +++ b/tests/failpoints/cases/test_encryption.rs @@ -19,9 +19,9 @@ fn test_file_dict_file_record_corrupted() { // Crc32 (4 bytes) + File name length (2 bytes) + FileInfo length (2 bytes) + // Log type (1 bytes) fail::cfg("file_dict_log_append_incomplete", "return(9)").unwrap(); - file_dict_file.insert("info1", &info1).unwrap(); + file_dict_file.insert("info1", &info1, true).unwrap(); fail::remove("file_dict_log_append_incomplete"); - file_dict_file.insert("info2", &info2).unwrap(); + file_dict_file.insert("info2", &info2, true).unwrap(); // Intermediate record damage is not allowed. file_dict_file.recovery().unwrap_err(); @@ -34,9 +34,9 @@ fn test_file_dict_file_record_corrupted() { .unwrap(); let info1 = create_file_info(1, EncryptionMethod::Aes256Ctr); let info2 = create_file_info(2, EncryptionMethod::Unknown); - file_dict_file.insert("info1", &info1).unwrap(); + file_dict_file.insert("info1", &info1, true).unwrap(); fail::cfg("file_dict_log_append_incomplete", "return(9)").unwrap(); - file_dict_file.insert("info2", &info2).unwrap(); + file_dict_file.insert("info2", &info2, true).unwrap(); fail::remove("file_dict_log_append_incomplete"); // The ending record can be discarded. let file_dict = file_dict_file.recovery().unwrap(); diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index e757e7685ba..e6e2121a479 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -81,7 +81,7 @@ pub fn new_cluster_and_tikv_import_client_tde() -> ( ImportSstClient, ) { let tmp_dir = tempfile::TempDir::new().unwrap(); - let encryption_cfg = test_util::new_file_security_config(&tmp_dir); + let encryption_cfg = test_util::new_file_security_config(tmp_dir.path()); let mut security = test_util::new_security_cfg(None); security.encryption = encryption_cfg; let mut config = TikvConfig::default(); From ac7f14819744c714de1b21952b3efddab883006c Mon Sep 17 00:00:00 2001 From: 3pointer Date: Thu, 20 Apr 2023 20:01:20 +0800 Subject: [PATCH 0654/1149] raftstore-v2: adapt backup stream for raftstore-v2 (#14589) ref tikv/tikv#14614 Signed-off-by: 3pointer Co-authored-by: Ti Chi Robot --- components/backup-stream/src/endpoint.rs | 66 +++++++++++------ components/backup-stream/src/event_loader.rs | 48 ++++++------- components/backup-stream/src/lib.rs | 4 +- .../backup-stream/src/subscription_manager.rs | 26 +++---- components/backup-stream/tests/mod.rs | 26 ++++--- components/cdc/src/endpoint.rs | 6 +- components/resolved_ts/src/advance.rs | 71 +++++++++---------- components/server/src/server.rs | 28 +++++--- components/server/src/server2.rs | 65 ++++++++++++++++- 9 files changed, 218 insertions(+), 122 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 45d132b001b..c88b36da8db 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -8,7 +8,6 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use error_code::ErrorCodeExt; use futures::FutureExt; -use grpcio::Environment; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, @@ -16,11 +15,9 @@ use kvproto::{ use pd_client::PdClient; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, RegionInfoProvider}, - router::RaftStoreRouter, - store::RegionReadProgressRegistry, + router::CdcHandle, }; -use resolved_ts::LeadershipResolver; -use security::SecurityManager; +use resolved_ts::{resolve_by_raft, LeadershipResolver}; use tikv::config::BackupStreamConfig; use tikv_util::{ box_err, @@ -100,7 +97,7 @@ impl Endpoint where R: RegionInfoProvider + 'static + Clone, E: KvEngine, - RT: RaftStoreRouter + 'static, + RT: CdcHandle + 'static, PDC: PdClient + 'static, S: MetaStore + 'static, { @@ -114,10 +111,7 @@ where router: RT, pd_client: Arc, concurrency_manager: ConcurrencyManager, - // Required by Leadership Resolver. - env: Arc, - region_read_progress: RegionReadProgressRegistry, - security_mgr: Arc, + resolver: BackupStreamResolver, ) -> Self { crate::metrics::STREAM_ENABLED.inc(); let pool = create_tokio_runtime((config.num_threads / 2).max(1), "backup-stream") @@ -154,14 +148,7 @@ where let initial_scan_throughput_quota = Limiter::new(limit); info!("the endpoint of stream backup started"; "path" => %config.temp_path); let subs = SubscriptionTracer::default(); - let leadership_resolver = LeadershipResolver::new( - store_id, - Arc::clone(&pd_client) as _, - env, - security_mgr, - region_read_progress, - Duration::from_secs(60), - ); + let (region_operator, op_loop) = RegionSubscriptionManager::start( InitialDataLoader::new( router.clone(), @@ -177,7 +164,7 @@ where meta_client.clone(), pd_client.clone(), ((config.num_threads + 1) / 2).max(1), - leadership_resolver, + resolver, ); pool.spawn(op_loop); let mut checkpoint_mgr = CheckpointManager::default(); @@ -212,7 +199,7 @@ where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, E: KvEngine, - RT: RaftStoreRouter + 'static, + RT: CdcHandle + 'static, PDC: PdClient + 'static, { fn get_meta_client(&self) -> MetadataClient { @@ -1049,6 +1036,29 @@ fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult { + // for raftstore-v1, we use LeadershipResolver to check leadership of a region. + V1(LeadershipResolver), + // for raftstore-v2, it has less regions. we use CDCHandler to check leadership of a region. + V2(RT, PhantomData), +} + +impl BackupStreamResolver +where + RT: CdcHandle + 'static, + EK: KvEngine, +{ + pub async fn resolve(&mut self, regions: Vec, min_ts: TimeStamp) -> Vec { + match self { + BackupStreamResolver::V1(x) => x.resolve(regions, min_ts).await, + BackupStreamResolver::V2(x, _) => { + let x = x.clone(); + resolve_by_raft(regions, min_ts, x).await + } + } + } +} + #[derive(Debug)] pub enum RegionSet { /// The universal set. @@ -1282,7 +1292,7 @@ where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, E: KvEngine, - RT: RaftStoreRouter + 'static, + RT: CdcHandle + 'static, PDC: PdClient + 'static, { type Task = Task; @@ -1295,7 +1305,9 @@ where #[cfg(test)] mod test { use engine_rocks::RocksEngine; - use raftstore::coprocessor::region_info_accessor::MockRegionInfoProvider; + use raftstore::{ + coprocessor::region_info_accessor::MockRegionInfoProvider, router::CdcRaftRouter, + }; use test_raftstore::MockRaftStoreRouter; use tikv_util::worker::dummy_scheduler; @@ -1311,7 +1323,15 @@ mod test { cli.insert_task_with_range(&task, &[]).await.unwrap(); fail::cfg("failed_to_get_tasks", "1*return").unwrap(); - Endpoint::<_, MockRegionInfoProvider, RocksEngine, MockRaftStoreRouter, MockPdClient>::start_and_watch_tasks(cli, sched).await.unwrap(); + Endpoint::< + _, + MockRegionInfoProvider, + RocksEngine, + CdcRaftRouter, + MockPdClient, + >::start_and_watch_tasks(cli, sched) + .await + .unwrap(); fail::remove("failed_to_get_tasks"); let _t1 = rx.recv().unwrap(); diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 6c825bf30c5..1b663c0e982 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -7,8 +7,8 @@ use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::CmdType}; use raftstore::{ coprocessor::{ObserveHandle, RegionInfoProvider}, - router::RaftStoreRouter, - store::{fsm::ChangeObserver, Callback, SignificantMsg}, + router::CdcHandle, + store::{fsm::ChangeObserver, Callback}, }; use tikv::storage::{ kv::StatisticsSummary, @@ -200,7 +200,7 @@ impl InitialDataLoader where E: KvEngine, R: RegionInfoProvider + Clone + 'static, - RT: RaftStoreRouter, + RT: CdcHandle, { pub fn new( router: RT, @@ -288,33 +288,33 @@ where let (callback, fut) = tikv_util::future::paired_future_callback::>(); + self.router - .significant_send( - region.id, - SignificantMsg::CaptureChange { - cmd, - region_epoch: region.get_region_epoch().clone(), - callback: Callback::read(Box::new(|snapshot| { - if snapshot.response.get_header().has_error() { - callback(Err(Error::RaftRequest( - snapshot.response.get_header().get_error().clone(), - ))); - return; - } - if let Some(snap) = snapshot.snapshot { - callback(Ok(snap)); - return; - } - callback(Err(Error::Other(box_err!( - "PROBABLY BUG: the response contains neither error nor snapshot" - )))) - })), - }, + .capture_change( + region.get_id(), + region.get_region_epoch().clone(), + cmd, + Callback::read(Box::new(|snapshot| { + if snapshot.response.get_header().has_error() { + callback(Err(Error::RaftRequest( + snapshot.response.get_header().get_error().clone(), + ))); + return; + } + if let Some(snap) = snapshot.snapshot { + callback(Ok(snap)); + return; + } + callback(Err(Error::Other(box_err!( + "PROBABLY BUG: the response contains neither error nor snapshot" + )))) + })), ) .context(format_args!( "failed to register the observer to region {}", region.get_id() ))?; + let snap = block_on(fut) .map_err(|err| { annotate!( diff --git a/components/backup-stream/src/lib.rs b/components/backup-stream/src/lib.rs index a36b42c227d..ac7ab1f718f 100644 --- a/components/backup-stream/src/lib.rs +++ b/components/backup-stream/src/lib.rs @@ -21,5 +21,7 @@ mod subscription_track; pub mod utils; pub use checkpoint_manager::GetCheckpointResult; -pub use endpoint::{Endpoint, ObserveOp, RegionCheckpointOperation, RegionSet, Task}; +pub use endpoint::{ + BackupStreamResolver, Endpoint, ObserveOp, RegionCheckpointOperation, RegionSet, Task, +}; pub use service::Service; diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 316f0d9fb53..bf1a5552f71 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -18,10 +18,9 @@ use pd_client::PdClient; use raft::StateRole; use raftstore::{ coprocessor::{ObserveHandle, RegionInfoProvider}, - router::RaftStoreRouter, + router::CdcHandle, store::fsm::ChangeObserver, }; -use resolved_ts::LeadershipResolver; use tikv::storage::Statistics; use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; use tokio::sync::mpsc::{channel, Receiver, Sender}; @@ -30,7 +29,7 @@ use yatp::task::callback::Handle as YatpHandle; use crate::{ annotate, - endpoint::ObserveOp, + endpoint::{BackupStreamResolver, ObserveOp}, errors::{Error, Result}, event_loader::InitialDataLoader, future, @@ -144,7 +143,7 @@ impl InitialScan for InitialDataLoader where E: KvEngine, R: RegionInfoProvider + Clone + 'static, - RT: RaftStoreRouter, + RT: CdcHandle, { fn do_initial_scan( &self, @@ -376,11 +375,11 @@ where meta_cli: MetadataClient, pd_client: Arc, scan_pool_size: usize, - leader_checker: LeadershipResolver, + resolver: BackupStreamResolver, ) -> (Self, future![()]) where E: KvEngine, - RT: RaftStoreRouter + 'static, + RT: CdcHandle + 'static, { let (tx, rx) = channel(MESSAGE_BUFFER_SIZE); let scan_pool_handle = spawn_executors(initial_loader.clone(), scan_pool_size); @@ -396,7 +395,7 @@ where scan_pool_handle: Arc::new(scan_pool_handle), scans: CallbackWaitGroup::new(), }; - let fut = op.clone().region_operator_loop(rx, leader_checker); + let fut = op.clone().region_operator_loop(rx, resolver); (op, fut) } @@ -416,11 +415,14 @@ where } /// the handler loop. - async fn region_operator_loop( + async fn region_operator_loop( self, mut message_box: Receiver, - mut leader_checker: LeadershipResolver, - ) { + mut resolver: BackupStreamResolver, + ) where + E: KvEngine, + RT: CdcHandle + 'static, + { while let Some(op) = message_box.recv().await { // Skip some trivial resolve commands. if !matches!(op, ObserveOp::ResolveRegions { .. }) { @@ -487,9 +489,7 @@ where warn!("waiting for initial scanning done timed out, forcing progress!"; "take" => ?now.saturating_elapsed(), "timedout" => %timedout); } - let regions = leader_checker - .resolve(self.subs.current_regions(), min_ts) - .await; + let regions = resolver.resolve(self.subs.current_regions(), min_ts).await; let cps = self.subs.resolve_with(min_ts, regions); let min_region = cps.iter().min_by_key(|rs| rs.checkpoint); // If there isn't any region observed, the `min_ts` can be used as resolved ts diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/mod.rs index 7b2fe88b8a1..9dc38e36320 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/mod.rs @@ -19,7 +19,8 @@ use backup_stream::{ }, observer::BackupStreamObserver, router::Router, - utils, Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, + utils, BackupStreamResolver, Endpoint, GetCheckpointResult, RegionCheckpointOperation, + RegionSet, Service, Task, }; use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt}; use grpcio::{ChannelBuilder, Server, ServerBuilder}; @@ -32,6 +33,8 @@ use kvproto::{ }; use pd_client::PdClient; use protobuf::parse_from_bytes; +use raftstore::router::CdcRaftRouter; +use resolved_ts::LeadershipResolver; use tempdir::TempDir; use test_raftstore::{new_server_cluster, Cluster, ServerCluster}; use test_util::retry; @@ -335,11 +338,24 @@ impl Suite { let worker = self.endpoints.get_mut(&id).unwrap(); let sim = cluster.sim.wl(); let raft_router = sim.get_server_router(id); + let raft_router = CdcRaftRouter(raft_router); let cm = sim.get_concurrency_manager(id); let regions = sim.region_info_accessors.get(&id).unwrap().clone(); let ob = self.obs.get(&id).unwrap().clone(); cfg.enable = true; cfg.temp_path = format!("/{}/{}", self.temp_files.path().display(), id); + let resolver = LeadershipResolver::new( + id, + cluster.pd_client.clone(), + Arc::clone(&self.env), + Arc::clone(&sim.security_mgr), + cluster.store_metas[&id] + .lock() + .unwrap() + .region_read_progress + .clone(), + Duration::from_secs(60), + ); let endpoint = Endpoint::new( id, self.meta_store.clone(), @@ -350,13 +366,7 @@ impl Suite { raft_router, cluster.pd_client.clone(), cm, - Arc::clone(&self.env), - cluster.store_metas[&id] - .lock() - .unwrap() - .region_read_progress - .clone(), - Arc::clone(&sim.security_mgr), + BackupStreamResolver::V1(resolver), ); worker.start(endpoint); } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index dfeb4f78045..fd4580d4aea 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -33,7 +33,7 @@ use raftstore::{ router::CdcHandle, store::fsm::{store::StoreRegionMeta, ChangeObserver}, }; -use resolved_ts::{LeadershipResolver, Resolver}; +use resolved_ts::{resolve_by_raft, LeadershipResolver, Resolver}; use security::SecurityManager; use tikv::{ config::CdcConfig, @@ -1079,9 +1079,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint( - &self, - regions: Vec, - min_ts: TimeStamp, - cdc_handle: T, - ) -> Vec - where - T: 'static + CdcHandle, - E: KvEngine, - { - let mut reqs = Vec::with_capacity(regions.len()); - for region_id in regions { - let cdc_handle_clone = cdc_handle.clone(); - let req = async move { - let (tx, rx) = tokio::sync::oneshot::channel(); - let callback = Callback::read(Box::new(move |resp| { - let resp = if resp.response.get_header().has_error() { - None - } else { - Some(region_id) - }; - if tx.send(resp).is_err() { - error!("cdc send tso response failed"; "region_id" => region_id); - } - })); - if let Err(e) = cdc_handle_clone.check_leadership(region_id, callback) { - warn!("cdc send LeaderCallback failed"; "err" => ?e, "min_ts" => min_ts); - return None; - } - rx.await.unwrap_or(None) - }; - reqs.push(req); - } - - let resps = futures::future::join_all(reqs).await; - resps.into_iter().flatten().collect::>() - } - // Confirms leadership of region peer before trying to advance resolved ts. // This function broadcasts a special message to all stores, gets the leader id // of them to confirm whether current peer has a quorum which accepts its @@ -454,6 +416,39 @@ impl LeadershipResolver { } } +pub async fn resolve_by_raft(regions: Vec, min_ts: TimeStamp, cdc_handle: T) -> Vec +where + T: 'static + CdcHandle, + E: KvEngine, +{ + let mut reqs = Vec::with_capacity(regions.len()); + for region_id in regions { + let cdc_handle_clone = cdc_handle.clone(); + let req = async move { + let (tx, rx) = tokio::sync::oneshot::channel(); + let callback = Callback::read(Box::new(move |resp| { + let resp = if resp.response.get_header().has_error() { + None + } else { + Some(region_id) + }; + if tx.send(resp).is_err() { + error!("cdc send tso response failed"; "region_id" => region_id); + } + })); + if let Err(e) = cdc_handle_clone.check_leadership(region_id, callback) { + warn!("cdc send LeaderCallback failed"; "err" => ?e, "min_ts" => min_ts); + return None; + } + rx.await.unwrap_or(None) + }; + reqs.push(req); + } + + let resps = futures::future::join_all(reqs).await; + resps.into_iter().flatten().collect::>() +} + fn region_has_quorum(peers: &[Peer], stores: &[u64]) -> bool { let mut voters = 0; let mut incoming_voters = 0; diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 890089a6950..ec3468c6c68 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -25,6 +25,7 @@ use std::{ use api_version::{dispatch_api_version, KvFormat}; use backup_stream::{ config::BackupStreamConfigManager, metadata::store::PdStore, observer::BackupStreamObserver, + BackupStreamResolver, }; use causal_ts::CausalTsProviderImpl; use cdc::{CdcConfigManager, MemoryQuota}; @@ -68,6 +69,7 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; +use resolved_ts::LeadershipResolver; use resource_control::{ ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, }; @@ -786,6 +788,21 @@ where )), ); + let region_read_progress = engines + .store_meta + .lock() + .unwrap() + .region_read_progress + .clone(); + let leadership_resolver = LeadershipResolver::new( + node.id(), + self.pd_client.clone(), + self.env.clone(), + self.security_mgr.clone(), + region_read_progress, + Duration::from_secs(60), + ); + let backup_stream_endpoint = backup_stream::Endpoint::new( node.id(), PdStore::new(Checked::new(Sourced::new( @@ -796,17 +813,10 @@ where backup_stream_scheduler.clone(), backup_stream_ob, self.region_info_accessor.clone(), - self.router.clone(), + CdcRaftRouter(self.router.clone()), self.pd_client.clone(), self.concurrency_manager.clone(), - Arc::clone(&self.env), - engines - .store_meta - .lock() - .unwrap() - .region_read_progress - .clone(), - Arc::clone(&self.security_mgr), + BackupStreamResolver::V1(leadership_resolver), ); backup_stream_worker.start(backup_stream_endpoint); self.core.to_stop.push(backup_stream_worker); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 202307f7767..4d1a9f2daf6 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -14,6 +14,7 @@ use std::{ cmp, collections::HashMap, + marker::PhantomData, path::{Path, PathBuf}, str::FromStr, sync::{atomic::AtomicU64, mpsc, Arc}, @@ -22,6 +23,10 @@ use std::{ }; use api_version::{dispatch_api_version, KvFormat}; +use backup_stream::{ + config::BackupStreamConfigManager, metadata::store::PdStore, observer::BackupStreamObserver, + BackupStreamResolver, +}; use causal_ts::CausalTsProviderImpl; use cdc::{CdcConfigManager, MemoryQuota}; use concurrency_manager::ConcurrencyManager; @@ -34,9 +39,12 @@ use grpcio_health::HealthService; use kvproto::{ brpb::create_backup, cdcpb_grpc::create_change_data, deadlock::create_deadlock, diagnosticspb::create_diagnostics, import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, - resource_usage_agent::create_resource_metering_pub_sub, + logbackuppb::create_log_backup, resource_usage_agent::create_resource_metering_pub_sub, +}; +use pd_client::{ + meta_storage::{Checked, Sourced}, + PdClient, RpcClient, }; -use pd_client::{PdClient, RpcClient}; use raft_log_engine::RaftLogEngine; use raftstore::{ coprocessor::{ @@ -191,6 +199,7 @@ struct TikvServer { cdc_worker: Option>>, cdc_scheduler: Option>, cdc_memory_quota: Option, + backup_stream_scheduler: Option>, sst_worker: Option>>, quota_limiter: Arc, resource_manager: Option>, @@ -328,6 +337,7 @@ where cdc_worker: None, cdc_scheduler: None, cdc_memory_quota: None, + backup_stream_scheduler: None, sst_worker: None, quota_limiter, resource_manager, @@ -627,6 +637,46 @@ where self.core.to_stop.push(rts_worker); } + // Start backup stream + self.backup_stream_scheduler = if self.core.config.log_backup.enable { + // Create backup stream. + let mut backup_stream_worker = Box::new(LazyWorker::new("backup-stream")); + let backup_stream_scheduler = backup_stream_worker.scheduler(); + + // Register backup-stream observer. + let backup_stream_ob = BackupStreamObserver::new(backup_stream_scheduler.clone()); + backup_stream_ob.register_to(self.coprocessor_host.as_mut().unwrap()); + // Register config manager. + cfg_controller.register( + tikv::config::Module::BackupStream, + Box::new(BackupStreamConfigManager::new( + backup_stream_worker.scheduler(), + self.core.config.log_backup.clone(), + )), + ); + + let backup_stream_endpoint = backup_stream::Endpoint::new( + self.node.as_ref().unwrap().id(), + PdStore::new(Checked::new(Sourced::new( + Arc::clone(&self.pd_client), + pd_client::meta_storage::Source::LogBackup, + ))), + self.core.config.log_backup.clone(), + backup_stream_scheduler.clone(), + backup_stream_ob, + self.region_info_accessor.as_ref().unwrap().clone(), + self.router.clone().unwrap(), + self.pd_client.clone(), + self.concurrency_manager.clone(), + BackupStreamResolver::V2(self.router.clone().unwrap(), PhantomData), + ); + backup_stream_worker.start(backup_stream_endpoint); + self.core.to_stop.push(backup_stream_worker); + Some(backup_stream_scheduler) + } else { + None + }; + let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); self.core @@ -840,6 +890,17 @@ where fatal!("failed to register import service"); } + if let Some(sched) = self.backup_stream_scheduler.take() { + let pitr_service = backup_stream::Service::new(sched); + if servers + .server + .register_service(create_log_backup(pitr_service)) + .is_some() + { + fatal!("failed to register log backup service"); + } + } + self.cfg_controller .as_mut() .unwrap() From dcf7f055f4ae869849161bd55f76dace8997be70 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 21 Apr 2023 10:05:20 +0800 Subject: [PATCH 0655/1149] raftstore-v2: report sending/recving count (#14617) close tikv/tikv#14581 store heartbeat will report sending/recving count to the pd . Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore-v2/src/operation/pd.rs | 4 +-- components/raftstore/src/store/snap.rs | 16 +++++++-- src/server/tablet_snap.rs | 21 +++++------- tests/failpoints/cases/test_snap.rs | 38 ++++++++++----------- 4 files changed, 42 insertions(+), 37 deletions(-) diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index f45cae390da..7ad82959fa8 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -46,8 +46,8 @@ impl Store { let snap_stats = ctx.snap_mgr.stats(); // todo: imple snapshot status report - stats.set_sending_snap_count(0); - stats.set_receiving_snap_count(0); + stats.set_sending_snap_count(snap_stats.sending_count as u32); + stats.set_receiving_snap_count(snap_stats.receiving_count as u32); stats.set_snapshot_stats(snap_stats.stats.into()); STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 69d948e3ae4..eb407b8d2bf 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1999,6 +1999,8 @@ pub struct TabletSnapManager { key_manager: Option>, receiving: Arc>>, stats: Arc>>, + sending_count: Arc, + recving_count: Arc, } impl TabletSnapManager { @@ -2023,6 +2025,8 @@ impl TabletSnapManager { key_manager, receiving: Arc::default(), stats: Arc::default(), + sending_count: Arc::default(), + recving_count: Arc::default(), }) } @@ -2055,8 +2059,8 @@ impl TabletSnapManager { .filter(|stat| stat.get_total_duration_sec() > 1) .collect(); SnapStats { - sending_count: 0, - receiving_count: 0, + sending_count: self.sending_count.load(Ordering::SeqCst), + receiving_count: self.recving_count.load(Ordering::SeqCst), stats, } } @@ -2142,6 +2146,14 @@ impl TabletSnapManager { }) } + pub fn sending_count(&self) -> &Arc { + &self.sending_count + } + + pub fn recving_count(&self) -> &Arc { + &self.recving_count + } + #[inline] pub fn key_manager(&self) -> &Option> { &self.key_manager diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index f1044031d9f..8e5a3293909 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -25,10 +25,7 @@ use std::{ fs::{self, File}, io::{BorrowedBuf, Read, Seek, SeekFrom, Write}, path::Path, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, + sync::{atomic::Ordering, Arc}, time::Duration, }; @@ -355,6 +352,9 @@ pub(crate) async fn accept_missing( } // Now receive other files. loop { + fail_point!("receiving_snapshot_net_error", |_| { + Err(box_err!("failed to receive snapshot")) + }); let chunk = match stream.next().await { Some(Ok(mut req)) if req.has_chunk() => req.take_chunk(), Some(Ok(req)) if req.has_end() => { @@ -691,8 +691,6 @@ pub struct TabletRunner { raft_router: R, cfg_tracker: Tracker, cfg: Config, - sending_count: Arc, - recving_count: Arc, cache_builder: B, limiter: Limiter, } @@ -730,8 +728,6 @@ impl TabletRunner { security_mgr, cfg_tracker, cfg: config, - sending_count: Arc::new(AtomicUsize::new(0)), - recving_count: Arc::new(AtomicUsize::new(0)), cache_builder, limiter, }; @@ -776,7 +772,8 @@ where self.pool.spawn(sink.fail(status).map(|_| ())); } Task::RecvTablet { stream, sink } => { - let task_num = self.recving_count.load(Ordering::SeqCst); + let recving_count = self.snap_mgr.recving_count().clone(); + let task_num = recving_count.load(Ordering::SeqCst); if task_num >= self.cfg.concurrent_recv_snap_limit { warn!("too many recving snapshot tasks, ignore"); let status = RpcStatus::with_message( @@ -793,7 +790,6 @@ where let snap_mgr = self.snap_mgr.clone(); let raft_router = self.raft_router.clone(); - let recving_count = self.recving_count.clone(); recving_count.fetch_add(1, Ordering::SeqCst); let limiter = self.limiter.clone(); let cache_builder = self.cache_builder.clone(); @@ -810,8 +806,8 @@ where } Task::Send { addr, msg, cb } => { let region_id = msg.get_region_id(); - if self.sending_count.load(Ordering::SeqCst) >= self.cfg.concurrent_send_snap_limit - { + let sending_count = self.snap_mgr.sending_count().clone(); + if sending_count.load(Ordering::SeqCst) >= self.cfg.concurrent_send_snap_limit { let key = TabletSnapKey::from_region_snap( msg.get_region_id(), msg.get_to_peer().get_id(), @@ -830,7 +826,6 @@ where let env = Arc::clone(&self.env); let mgr = self.snap_mgr.clone(); let security_mgr = Arc::clone(&self.security_mgr); - let sending_count = Arc::clone(&self.sending_count); sending_count.fetch_add(1, Ordering::SeqCst); let limiter = self.limiter.clone(); let send_task = send_snap( diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 4ca18dcd716..a090ba8530c 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -641,33 +641,31 @@ fn test_snapshot_gc_after_failed() { cluster.sim.wl().clear_recv_filters(3); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_sending_fail_with_net_error() { - let mut cluster = new_server_cluster(1, 2); + let mut cluster = new_cluster(1, 2); configure_for_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.snap_gc_timeout = ReadableDuration::millis(300); - let pd_client = Arc::clone(&cluster.pd_client); - // Disable default max peer count check. + let pd_client = cluster.pd_client.clone(); + // Disable default max peer number check. pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); cluster.must_put(b"k1", b"v1"); let (send_tx, send_rx) = mpsc::sync_channel(1); // only send one MessageType::MsgSnapshot message - cluster.sim.wl().add_send_filter( - 1, - Box::new( - RegionPacketFilter::new(r1, 1) - .allow(1) - .direction(Direction::Send) - .msg_type(MessageType::MsgSnapshot) - .set_msg_callback(Arc::new(move |m: &RaftMessage| { - if m.get_message().get_msg_type() == MessageType::MsgSnapshot { - let _ = send_tx.send(()); - } - })), - ), - ); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(r1, 1) + .allow(1) + .direction(Direction::Send) + .msg_type(MessageType::MsgSnapshot) + .set_msg_callback(Arc::new(move |m: &RaftMessage| { + if m.get_message().get_msg_type() == MessageType::MsgSnapshot { + let _ = send_tx.send(()); + } + })), + )); // peer2 will interrupt in receiving snapshot fail::cfg("receiving_snapshot_net_error", "return()").unwrap(); @@ -678,8 +676,8 @@ fn test_sending_fail_with_net_error() { // need to wait receiver handle the snapshot request sleep_ms(100); - // peer2 will not become learner so ti will has k1 key and receiving count will - // zero + // peer2 can't receive any snapshot, so it doesn't have any key valuse. + // but the receiving_count should be zero if receiving snapshot is failed. let engine2 = cluster.get_engine(2); must_get_none(&engine2, b"k1"); assert_eq!(cluster.get_snap_mgr(2).stats().receiving_count, 0); From 2984dd18a36833084cbc65509717c64d81944873 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 21 Apr 2023 13:05:20 +0800 Subject: [PATCH 0656/1149] raftstore,server: add enable_v2_compatible_learner config (#14616) ref tikv/tikv#14579 raftstore,server: add enable_v2_compatible_learner config The new config is added to clean up hard code tiflash check Signed-off-by: Neil Shen --- components/raftstore/src/store/config.rs | 9 +++++ components/raftstore/src/store/mod.rs | 4 +-- components/raftstore/src/store/snap.rs | 33 +++++++++++++------ components/raftstore/src/store/worker/mod.rs | 2 -- .../raftstore/src/store/worker/region.rs | 4 +-- components/server/src/server.rs | 3 ++ components/test_raftstore/src/node.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/config/mod.rs | 16 +++++++++ src/server/server.rs | 11 +------ src/server/snap.rs | 25 +++++++------- tests/integrations/config/dynamic/snap.rs | 1 - tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + tests/integrations/raftstore/test_snap.rs | 11 +++---- 15 files changed, 77 insertions(+), 46 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 301f3cea0cc..aabf173e674 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -334,6 +334,14 @@ pub struct Config { #[online_config(hidden)] // Interval to check if need to request snapshot. pub check_request_snapshot_interval: ReadableDuration, + + /// Make raftstore v1 learners compatible with raftstore v2 by: + /// * Recving tablet snapshot from v2. + /// * Responsing GcPeerRequest from v2. + #[doc(hidden)] + #[online_config(hidden)] + #[serde(alias = "enable-partitioned-raft-kv-compatible-learner")] + pub enable_v2_compatible_learner: bool, } impl Default for Config { @@ -449,6 +457,7 @@ impl Default for Config { check_peers_availability_interval: ReadableDuration::secs(30), // TODO: make its value reasonable check_request_snapshot_interval: ReadableDuration::minutes(1), + enable_v2_compatible_learner: false, } } } diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index ed97c58ab86..c007b622ee1 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -83,7 +83,7 @@ pub use self::{ LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, - StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, ENGINE, - NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, TIFLASH, + StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }, }; diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index eb407b8d2bf..ee488bbc5aa 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1389,7 +1389,7 @@ pub struct SnapManager { max_total_size: Arc, // only used to receive snapshot from v2 - tablet_snap_manager: TabletSnapManager, + tablet_snap_manager: Option, } impl Clone for SnapManager { @@ -1626,7 +1626,11 @@ impl SnapManager { /// NOTE: don't call it in raftstore thread. pub fn get_total_snap_size(&self) -> Result { let size_v1 = self.core.get_total_snap_size()?; - let size_v2 = self.tablet_snap_manager.total_snap_size().unwrap_or(0); + let size_v2 = self + .tablet_snap_manager + .as_ref() + .map(|s| s.total_snap_size().unwrap_or(0)) + .unwrap_or(0); Ok(size_v1 + size_v2) } @@ -1763,8 +1767,8 @@ impl SnapManager { self.core.delete_snapshot(key, snap, check_entry) } - pub fn tablet_snap_manager(&self) -> &TabletSnapManager { - &self.tablet_snap_manager + pub fn tablet_snap_manager(&self) -> Option<&TabletSnapManager> { + self.tablet_snap_manager.as_ref() } pub fn limiter(&self) -> &Limiter { @@ -1873,6 +1877,7 @@ pub struct SnapManagerBuilder { max_total_size: u64, max_per_file_size: u64, enable_multi_snapshot_files: bool, + enable_receive_tablet_snapshot: bool, key_manager: Option>, } @@ -1895,6 +1900,10 @@ impl SnapManagerBuilder { self.enable_multi_snapshot_files = enabled; self } + pub fn enable_receive_tablet_snapshot(mut self, enabled: bool) -> SnapManagerBuilder { + self.enable_receive_tablet_snapshot = enabled; + self + } #[must_use] pub fn encryption_key_manager(mut self, m: Option>) -> SnapManagerBuilder { self.key_manager = m; @@ -1915,7 +1924,11 @@ impl SnapManagerBuilder { assert!(!path.is_empty()); let mut path_v2 = path.clone(); path_v2.push_str("_v2"); - let tablet_snap_mgr = TabletSnapManager::new(&path_v2, self.key_manager.clone()).unwrap(); + let tablet_snap_manager = if self.enable_receive_tablet_snapshot { + Some(TabletSnapManager::new(&path_v2, self.key_manager.clone()).unwrap()) + } else { + None + }; let mut snapshot = SnapManager { core: SnapManagerCore { @@ -1931,7 +1944,7 @@ impl SnapManagerBuilder { stats: Default::default(), }, max_total_size: Arc::new(AtomicU64::new(max_total_size)), - tablet_snap_manager: tablet_snap_mgr, + tablet_snap_manager, }; snapshot.set_max_per_file_size(self.max_per_file_size); // set actual max_per_file_size snapshot @@ -3202,8 +3215,8 @@ pub mod tests { } #[test] - fn test_init() { - let builder = SnapManagerBuilder::default(); + fn test_init_enable_receive_tablet_snapshot() { + let builder = SnapManagerBuilder::default().enable_receive_tablet_snapshot(true); let snap_dir = Builder::new() .prefix("test_snap_path_does_not_exist") .tempdir() @@ -3217,7 +3230,7 @@ pub mod tests { path.push_str("_v2"); assert!(Path::new(&path).exists()); - let builder = SnapManagerBuilder::default(); + let builder = SnapManagerBuilder::default().enable_receive_tablet_snapshot(true); let snap_dir = Builder::new() .prefix("test_snap_path_exist") .tempdir() @@ -3230,7 +3243,7 @@ pub mod tests { path.push_str("_v2"); assert!(Path::new(&path).exists()); - let builder = SnapManagerBuilder::default(); + let builder = SnapManagerBuilder::default().enable_receive_tablet_snapshot(true); let snap_dir = Builder::new() .prefix("test_tablet_snap_path_exist") .tempdir() diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index eddcfe1757a..ac23f4e58d5 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -16,8 +16,6 @@ mod split_check; mod split_config; mod split_controller; -pub use region::{ENGINE, TIFLASH}; - #[cfg(test)] pub use self::region::tests::make_raftstore_cfg as make_region_worker_raftstore_cfg; pub use self::{ diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index d6d9d0272d3..d889047a0f9 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -52,8 +52,8 @@ use crate::{ const CLEANUP_MAX_REGION_COUNT: usize = 64; -pub const TIFLASH: &str = "tiflash"; -pub const ENGINE: &str = "engine"; +const TIFLASH: &str = "tiflash"; +const ENGINE: &str = "engine"; /// Region related task #[derive(Debug)] diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ec3468c6c68..625e8d8a31b 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -643,6 +643,9 @@ where .feature_gate() .can_enable(MULTI_FILES_SNAPSHOT_FEATURE), ) + .enable_receive_tablet_snapshot( + self.core.config.raft_store.enable_v2_compatible_learner, + ) .build(snap_path); // Create coprocessor endpoint. diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index c4c516fb7f9..75ab0064a17 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -273,6 +273,7 @@ impl Simulator for NodeCluster { .encryption_key_manager(key_manager) .max_per_file_size(cfg.raft_store.max_snapshot_file_raw_size.0) .enable_multi_snapshot_files(true) + .enable_receive_tablet_snapshot(cfg.raft_store.enable_v2_compatible_learner) .build(tmp.path().to_str().unwrap()); (snap_mgr, Some(tmp)) } else { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 4c060cef2ce..da97b31ab3a 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -458,6 +458,7 @@ impl ServerCluster { .encryption_key_manager(key_manager) .max_per_file_size(cfg.raft_store.max_snapshot_file_raw_size.0) .enable_multi_snapshot_files(true) + .enable_receive_tablet_snapshot(cfg.raft_store.enable_v2_compatible_learner) .build(tmp_str); self.snap_mgrs.insert(node_id, snap_mgr.clone()); let server_cfg = Arc::new(VersionTrack::new(cfg.server.clone())); diff --git a/src/config/mod.rs b/src/config/mod.rs index 2efe9ea4c9b..5d20b027c4e 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3291,6 +3291,15 @@ impl TikvConfig { if self.rocksdb.titan.enabled { return Err("partitioned-raft-kv doesn't support titan.".into()); } + + if self.raft_store.enable_v2_compatible_learner { + self.raft_store.enable_v2_compatible_learner = false; + warn!( + "raftstore.enable-partitioned-raft-kv-compatible-learner was true but \ + storage.engine was partitioned-raft-kv, no need to enable \ + enable-partitioned-raft-kv-compatible-learner, overwrite to false" + ); + } } self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; @@ -5442,6 +5451,13 @@ mod tests { cfg.storage.block_cache.capacity = Some(ReadableSize(system * 3 / 4)); cfg.validate().unwrap(); assert_eq!(cfg.memory_usage_limit.unwrap(), ReadableSize(system)); + + // Test raftstore.enable-partitioned-raft-kv-compatible-learner. + let mut cfg = TikvConfig::default(); + cfg.raft_store.enable_v2_compatible_learner = true; + cfg.storage.engine = EngineType::RaftKv2; + cfg.validate().unwrap(); + assert!(!cfg.raft_store.enable_v2_compatible_learner); } #[test] diff --git a/src/server/server.rs b/src/server/server.rs index b3db4b4b57f..45778835d29 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -13,7 +13,7 @@ use futures::{compat::Stream01CompatExt, stream::StreamExt}; use grpcio::{ChannelBuilder, Environment, ResourceQuota, Server as GrpcServer, ServerBuilder}; use grpcio_health::{create_health, HealthService, ServingStatus}; use kvproto::tikvpb::*; -use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager, ENGINE, TIFLASH}; +use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager}; use resource_control::ResourceGroupManager; use security::SecurityManager; use tikv_util::{ @@ -71,7 +71,6 @@ pub struct Server { // For sending/receiving snapshots. snap_mgr: Either, snap_worker: LazyWorker, - tiflash_engine: bool, // Currently load statistics is done in the thread. stats_pool: Option, @@ -182,12 +181,6 @@ where let trans = ServerTransport::new(raft_client); health_service.set_serving_status("", ServingStatus::NotServing); - let tiflash_engine = cfg - .value() - .labels - .iter() - .any(|entry| entry.0 == ENGINE && entry.1 == TIFLASH); - let svr = Server { env: Arc::clone(&env), builder_or_server: Some(builder), @@ -203,7 +196,6 @@ where debug_thread_pool, health_service, timer: GLOBAL_TIMER_HANDLE.clone(), - tiflash_engine, }; Ok(svr) @@ -273,7 +265,6 @@ where self.raft_router.clone(), security_mgr, cfg, - self.tiflash_engine, ); self.snap_worker.start(snap_runner); } diff --git a/src/server/snap.rs b/src/server/snap.rs index 0512a75214a..00883094471 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -364,7 +364,6 @@ pub struct Runner { cfg: Config, sending_count: Arc, recving_count: Arc, - can_receive_tablet_snapshot: bool, } impl Runner { @@ -377,7 +376,6 @@ impl Runner { r: R, security_mgr: Arc, cfg: Arc>, - can_receive_tablet_snapshot: bool, ) -> Self { let cfg_tracker = cfg.clone().tracker("snap-sender".to_owned()); let config = cfg.value().clone(); @@ -397,7 +395,6 @@ impl Runner { cfg: config, sending_count: Arc::new(AtomicUsize::new(0)), recving_count: Arc::new(AtomicUsize::new(0)), - can_receive_tablet_snapshot, }; snap_worker } @@ -467,14 +464,17 @@ impl Runnable for Runner { self.pool.spawn(task); } Task::RecvTablet { stream, sink } => { - if !self.can_receive_tablet_snapshot { - let status = RpcStatus::with_message( - RpcStatusCode::UNIMPLEMENTED, - "tablet snap is not supported".to_string(), - ); - self.pool.spawn(sink.fail(status).map(|_| ())); - return; - } + let tablet_snap_mgr = match self.snap_mgr.tablet_snap_manager() { + Some(s) => s.clone(), + None => { + let status = RpcStatus::with_message( + RpcStatusCode::UNIMPLEMENTED, + "tablet snap is not supported".to_string(), + ); + self.pool.spawn(sink.fail(status).map(|_| ())); + return; + } + }; if let Some(status) = self.receiving_busy() { self.pool.spawn(sink.fail(status)); @@ -483,7 +483,6 @@ impl Runnable for Runner { SNAP_TASK_COUNTER_STATIC.recv.inc(); - let snap_mgr = self.snap_mgr.tablet_snap_manager().clone(); let raft_router = self.raft_router.clone(); let recving_count = self.recving_count.clone(); recving_count.fetch_add(1, Ordering::SeqCst); @@ -492,7 +491,7 @@ impl Runnable for Runner { let result = crate::server::tablet_snap::recv_snap( stream, sink, - snap_mgr, + tablet_snap_mgr, raft_router, NoSnapshotCache, // do not use cache in v1 limiter, diff --git a/tests/integrations/config/dynamic/snap.rs b/tests/integrations/config/dynamic/snap.rs index fa1d6a6fe52..bb91d0d62eb 100644 --- a/tests/integrations/config/dynamic/snap.rs +++ b/tests/integrations/config/dynamic/snap.rs @@ -65,7 +65,6 @@ fn start_server( RaftRouterWrap::new(raft_router), security_mgr, Arc::clone(&server_config), - false, ); snap_worker.start(snap_runner); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 34b558f39c0..cd7680e8147 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -260,6 +260,7 @@ fn test_serde_custom_tikv_config() { check_request_snapshot_interval: ReadableDuration::minutes(1), slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, + enable_v2_compatible_learner: false, }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 28a30fcec04..e3940cc7067 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -226,6 +226,7 @@ report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" unreachable-backoff = "111s" max-entry-cache-warmup-duration = "2s" +enable-partitioned-raft-kv-compatible-learner = false [coprocessor] split-region-on-table = false diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 3171aaa1a9e..f9a124a4395 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -784,11 +784,7 @@ fn test_v1_receive_snap_from_v2() { let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); let mut cluster_v1_tikv = test_raftstore::new_server_cluster(1, 1); - cluster_v1 - .cfg - .server - .labels - .insert(String::from("engine"), String::from("tiflash")); + cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; cluster_v1.run(); cluster_v2.run(); @@ -839,7 +835,10 @@ fn test_v1_receive_snap_from_v2() { // The snapshot has been received by cluster v1, so check it's completeness let snap_mgr = cluster_v1.get_snap_mgr(1); - let path = snap_mgr.tablet_snap_manager().final_recv_path(&snap_key); + let path = snap_mgr + .tablet_snap_manager() + .unwrap() + .final_recv_path(&snap_key); let rocksdb = engine_rocks::util::new_engine_opt( path.as_path().to_str().unwrap(), RocksDbOptions::default(), From b11c299ff1f4f6051b6e534398e1f2aa1cc2fca1 Mon Sep 17 00:00:00 2001 From: Hangjie Mo Date: Fri, 21 Apr 2023 16:05:20 +0800 Subject: [PATCH 0657/1149] copr: fix extral physical table id when idx key < `MAX_OLD_ENCODED_VALUE_LEN` (#14618) close tikv/tikv#14619 fix a bug with `process_old_collation_kv` function. related with https://github.com/tikv/tikv/pull/11931, forget process `physical_table_id_column_cnt` in process_old_collation_kv function Signed-off-by: Jason Mo Co-authored-by: Ti Chi Robot --- .../src/index_scan_executor.rs | 86 +++++++++++++++++-- 1 file changed, 81 insertions(+), 5 deletions(-) diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index de59b843eb5..3a5c53a4d09 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -444,10 +444,12 @@ impl IndexScanExecutorImpl { Ok(()) } - // Process index values that are in old collation. - // NOTE: We should extract the index columns from the key first, and extract the - // handles from value if there is no handle in the key. Otherwise, extract the - // handles from the key. + // Process index values that are in old collation, when + // `new_collations_enabled_on_first_bootstrap` = true also will access this + // function. + // NOTE: We should extract the index columns from the key first, + // and extract the handles from value if there is no handle in the key. + // Otherwise, extract the handles from the key. fn process_old_collation_kv( &mut self, mut key_payload: &[u8], @@ -479,9 +481,11 @@ impl IndexScanExecutorImpl { } DecodeCommonHandle => { // Otherwise, if the handle is common handle, we extract it from the key. + let end_index = + columns.columns_len() - self.pid_column_cnt - self.physical_table_id_column_cnt; Self::extract_columns_from_datum_format( &mut key_payload, - &mut columns[self.columns_id_without_handle.len()..], + &mut columns[self.columns_id_without_handle.len()..end_index], )?; } } @@ -3295,6 +3299,78 @@ mod tests { ); } + #[test] + fn test_common_handle_with_physical_table_id() { + // CREATE TABLE `tcommonhash` ( + // `a` int(11) NOT NULL, + // `b` int(11) DEFAULT NULL, + // `c` int(11) NOT NULL, + // `d` int(11) NOT NUL, + // PRIMARY KEY (`a`,`c`,`d`) /*T![clustered_index] CLUSTERED */, + // KEY `idx_bc` (`b`,`c`) + // ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin + // insert into tcommonhash values (1, 2, 3, 1); + + // idx_bc + let mut idx_exe = IndexScanExecutorImpl { + context: Default::default(), + schema: vec![ + FieldTypeTp::Long.into(), + FieldTypeTp::Long.into(), + FieldTypeTp::Long.into(), + FieldTypeTp::Long.into(), + FieldTypeTp::Long.into(), + // EXTRA_PHYSICAL_TABLE_ID_COL + FieldTypeTp::Long.into(), + ], + columns_id_without_handle: vec![2, 3], + columns_id_for_common_handle: vec![1, 3, 4], + decode_handle_strategy: DecodeHandleStrategy::DecodeCommonHandle, + pid_column_cnt: 0, + physical_table_id_column_cnt: 1, + index_version: -1, + }; + let mut columns = idx_exe.build_column_vec(10); + idx_exe + .process_kv_pair( + &[ + 0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5c, 0x5f, 0x69, 0x80, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x2, 0x3, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x3, + 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x3, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x1, 0x3, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x3, 0x80, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x1, + ], + &[0x0, 0x7d, 0x1], + &mut columns, + ) + .unwrap(); + assert_eq!( + columns[0].raw().last().unwrap().read_datum().unwrap(), + Datum::I64(2) + ); + assert_eq!( + columns[1].raw().last().unwrap().read_datum().unwrap(), + Datum::I64(3) + ); + assert_eq!( + columns[2].raw().last().unwrap().read_datum().unwrap(), + Datum::I64(1) + ); + assert_eq!( + columns[3].raw().last().unwrap().read_datum().unwrap(), + Datum::I64(3) + ); + assert_eq!( + columns[4].raw().last().unwrap().read_datum().unwrap(), + Datum::I64(1) + ); + assert_eq!( + // physical table id + columns[5].mut_decoded().to_int_vec()[0].unwrap(), + 92 + ); + } + #[test] fn test_common_handle_index_latin1_bin() { use tidb_query_datatype::builder::FieldTypeBuilder; From 666edeedaef2b326a7b5f2f96fac2473fcdd08fd Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 21 Apr 2023 16:47:20 +0800 Subject: [PATCH 0658/1149] raftstore: enable raftstore-v1 apply tablet snapshot sent from raftstore-v2 (#14584) ref tikv/tikv#14579 enable raftstore-v1 apply tablet snapshot sent from raftstore-v2 Signed-off-by: Spade A Co-authored-by: Ti Chi Robot --- Cargo.lock | 2 +- components/raftstore/src/store/snap.rs | 76 ++++++++++ components/test_raftstore/src/server.rs | 13 ++ src/server/metrics.rs | 1 + src/server/snap.rs | 4 +- src/server/tablet_snap.rs | 33 ++++- tests/integrations/raftstore/test_snap.rs | 168 +++++++++++++++++++++- 7 files changed, 289 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7f2a1e91650..bda2a12187d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2797,7 +2797,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#dc3cd8784a19bc7f058dbeb19cd8cc4672ee9aad" +source = "git+https://github.com/pingcap/kvproto.git#10e7620a630db63d769503ba99c7389f19fb6516" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index ee488bbc5aa..12440abb5d0 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1013,6 +1013,28 @@ impl Snapshot { delete_file_if_exist(&self.meta_file.tmp_path).unwrap(); } } + + // This is only used for v2 compatibility. + fn new_for_tablet_snapshot>( + dir: T, + key: &SnapKey, + mgr: &SnapManagerCore, + tablet_snapshot_path: &str, + for_balance: bool, + ) -> RaftStoreResult { + let mut s = Self::new(dir, key, false, CheckPolicy::ErrNotAllowed, mgr)?; + s.init_for_building()?; + let mut meta = gen_snapshot_meta(&s.cf_files[..], for_balance)?; + meta.tablet_snap_path = tablet_snapshot_path.to_string(); + s.meta_file.meta = Some(meta); + s.save_meta_file()?; + Ok(s) + } + + #[cfg(any(test, feature = "testexport"))] + pub fn tablet_snap_path(&self) -> Option { + Some(self.meta_file.meta.as_ref()?.tablet_snap_path.clone()) + } } impl fmt::Debug for Snapshot { @@ -1606,6 +1628,38 @@ impl SnapManager { Ok(Box::new(f)) } + // Tablet snapshot is the snapshot sent from raftstore-v2. + // We enable v1 to receive it to enable tiflash node to receive and apply + // snapshot from raftstore-v2. + // To make it easy, we maintain an empty `store::snapshot` with tablet snapshot + // path storing in it. So tiflash node can detect it and apply properly. + pub fn gen_empty_snapshot_for_tablet_snapshot( + &self, + tablet_snap_key: &TabletSnapKey, + for_balance: bool, + ) -> RaftStoreResult<()> { + let _lock = self.core.registry.rl(); + let base = &self.core.base; + let tablet_snap_path = self + .tablet_snap_manager + .as_ref() + .unwrap() + .final_recv_path(tablet_snap_key); + let snap_key = SnapKey::new( + tablet_snap_key.region_id, + tablet_snap_key.term, + tablet_snap_key.idx, + ); + let _ = Snapshot::new_for_tablet_snapshot( + base, + &snap_key, + &self.core, + tablet_snap_path.to_str().unwrap(), + for_balance, + )?; + Ok(()) + } + pub fn get_snapshot_for_applying(&self, key: &SnapKey) -> RaftStoreResult> { let _lock = self.core.registry.rl(); let base = &self.core.base; @@ -3214,6 +3268,28 @@ pub mod tests { } } + #[test] + fn test_generate_snap_for_tablet_snapshot() { + let snap_dir = Builder::new().prefix("test_snapshot").tempdir().unwrap(); + let snap_mgr = SnapManagerBuilder::default() + .enable_receive_tablet_snapshot(true) + .build(snap_dir.path().to_str().unwrap()); + snap_mgr.init().unwrap(); + let tablet_snap_key = TabletSnapKey::new(1, 2, 3, 4); + snap_mgr + .gen_empty_snapshot_for_tablet_snapshot(&tablet_snap_key, false) + .unwrap(); + + let snap_key = SnapKey::new(1, 3, 4); + let s = snap_mgr.get_snapshot_for_applying(&snap_key).unwrap(); + let expect_path = snap_mgr + .tablet_snap_manager() + .as_ref() + .unwrap() + .final_recv_path(&tablet_snap_key); + assert_eq!(expect_path.to_str().unwrap(), s.tablet_snap_path().unwrap()); + } + #[test] fn test_init_enable_receive_tablet_snapshot() { let builder = SnapManagerBuilder::default().enable_receive_tablet_snapshot(true); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index da97b31ab3a..a59dafd4504 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -815,6 +815,19 @@ impl Cluster { pub fn get_addr(&self, node_id: u64) -> String { self.sim.rl().get_addr(node_id) } + + pub fn register_hook( + &self, + node_id: u64, + register: Box)>, + ) { + self.sim + .wl() + .coprocessor_hooks + .entry(node_id) + .or_default() + .push(register); + } } pub fn new_server_cluster(id: u64, count: usize) -> Cluster { diff --git a/src/server/metrics.rs b/src/server/metrics.rs index d35c58cbf34..37c3ce1048f 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -77,6 +77,7 @@ make_auto_flush_static_metric! { pub label_enum SnapTask { send, recv, + recv_v2, } pub label_enum ResolveStore { diff --git a/src/server/snap.rs b/src/server/snap.rs index 00883094471..4324f17459e 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -481,12 +481,13 @@ impl Runnable for Runner { return; } - SNAP_TASK_COUNTER_STATIC.recv.inc(); + SNAP_TASK_COUNTER_STATIC.recv_v2.inc(); let raft_router = self.raft_router.clone(); let recving_count = self.recving_count.clone(); recving_count.fetch_add(1, Ordering::SeqCst); let limiter = self.snap_mgr.limiter().clone(); + let snap_mgr_v1 = self.snap_mgr.clone(); let task = async move { let result = crate::server::tablet_snap::recv_snap( stream, @@ -495,6 +496,7 @@ impl Runnable for Runner { raft_router, NoSnapshotCache, // do not use cache in v1 limiter, + Some(snap_mgr_v1), ) .await; recving_count.fetch_sub(1, Ordering::SeqCst); diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 8e5a3293909..cb7ec7c988a 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -50,7 +50,10 @@ use kvproto::{ tikvpb::TikvClient, }; use protobuf::Message; -use raftstore::store::snap::{ReceivingGuard, TabletSnapKey, TabletSnapManager}; +use raftstore::store::{ + snap::{ReceivingGuard, TabletSnapKey, TabletSnapManager}, + SnapManager, +}; use security::SecurityManager; use tikv_kv::RaftExtension; use tikv_util::{ @@ -426,6 +429,7 @@ pub(crate) async fn recv_snap_files<'a>( let final_path = snap_mgr.final_recv_path(&context.key); // TODO(tabokie) fs::rename(&path, final_path)?; + Ok(context) } @@ -436,12 +440,24 @@ pub(crate) async fn recv_snap( raft_router: R, cache_builder: impl SnapCacheBuilder, limiter: Limiter, + snap_mgr_v1: Option, ) -> Result<()> { let stream = stream.map_err(Error::from); let mut sink = sink; let res = recv_snap_files(&snap_mgr, cache_builder, stream, &mut sink, limiter) .await - .and_then(|context| context.finish(raft_router)); + .and_then(|context| { + // some means we are in raftstore-v1 config and received a tablet snapshot from + // raftstore-v2. Now, it can only happen in tiflash node within a raftstore-v2 + // cluster. + if let Some(snap_mgr_v1) = snap_mgr_v1 { + snap_mgr_v1.gen_empty_snapshot_for_tablet_snapshot( + &context.key, + context.io_type == IoType::LoadBalance, + )?; + } + context.finish(raft_router) + }); match res { Ok(()) => sink.close().await.map_err(Error::from), Err(e) => { @@ -794,9 +810,16 @@ where let limiter = self.limiter.clone(); let cache_builder = self.cache_builder.clone(); let task = async move { - let result = - recv_snap(stream, sink, snap_mgr, raft_router, cache_builder, limiter) - .await; + let result = recv_snap( + stream, + sink, + snap_mgr, + raft_router, + cache_builder, + limiter, + None, + ) + .await; recving_count.fetch_sub(1, Ordering::SeqCst); if let Err(e) = result { error!("failed to recv snapshot"; "err" => %e); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index f9a124a4395..f474b5cdb8e 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -11,6 +11,7 @@ use std::{ time::Duration, }; +use collections::HashMap; use engine_rocks::{RocksCfOptions, RocksDbOptions}; use engine_traits::{Checkpointer, KvEngine, Peekable, RaftEngineReadOnly, SyncMutable, LARGE_CFS}; use file_system::{IoOp, IoType}; @@ -19,6 +20,7 @@ use grpcio::Environment; use kvproto::raft_serverpb::*; use raft::eraftpb::{Message, MessageType, Snapshot}; use raftstore::{ + coprocessor::{ApplySnapshotObserver, BoxApplySnapshotObserver, Coprocessor, CoprocessorHost}, store::{snap::TABLET_SNAPSHOT_VERSION, *}, Result, }; @@ -747,10 +749,14 @@ fn generate_snap( let tablet = engine.get_tablet_by_id(region_id).unwrap(); let region_state = engine.region_local_state(region_id).unwrap().unwrap(); let apply_state = engine.raft_apply_state(region_id).unwrap().unwrap(); + let raft_state = engine.raft_local_state(region_id).unwrap().unwrap(); // Construct snapshot by hand let mut snapshot = Snapshot::default(); - snapshot.mut_metadata().set_term(apply_state.commit_term); + // use commit term for simplicity + snapshot + .mut_metadata() + .set_term(raft_state.get_hard_state().term + 1); snapshot.mut_metadata().set_index(apply_state.applied_index); let conf_state = raftstore::store::util::conf_state_from_region(region_state.get_region()); snapshot.mut_metadata().set_conf_state(conf_state); @@ -771,6 +777,8 @@ fn generate_snap( msg.region_id = region_id; msg.set_to_peer(new_peer(1, 1)); msg.mut_message().set_snapshot(snapshot); + msg.mut_message() + .set_term(raft_state.get_hard_state().commit + 1); msg.mut_message().set_msg_type(MessageType::MsgSnapshot); msg.set_region_epoch(region_state.get_region().get_region_epoch().clone()); @@ -866,3 +874,161 @@ fn test_v1_receive_snap_from_v2() { // test large snapshot test_receive_snap(5000); } + +#[derive(Clone)] +struct MockApplySnapshotObserver { + tablet_snap_paths: Arc>>, +} + +impl Coprocessor for MockApplySnapshotObserver {} + +impl ApplySnapshotObserver for MockApplySnapshotObserver { + fn should_pre_apply_snapshot(&self) -> bool { + true + } + + fn pre_apply_snapshot( + &self, + _: &mut raftstore::coprocessor::ObserverContext<'_>, + peer_id: u64, + _: &raftstore::store::SnapKey, + snap: Option<&raftstore::store::Snapshot>, + ) { + let tablet_path = snap.unwrap().tablet_snap_path().as_ref().unwrap().clone(); + self.tablet_snap_paths + .lock() + .unwrap() + .insert(peer_id, (false, tablet_path)); + } + + fn post_apply_snapshot( + &self, + _: &mut raftstore::coprocessor::ObserverContext<'_>, + peer_id: u64, + _: &raftstore::store::SnapKey, + snap: Option<&raftstore::store::Snapshot>, + ) { + let tablet_path = snap.unwrap().tablet_snap_path().as_ref().unwrap().clone(); + match self.tablet_snap_paths.lock().unwrap().entry(peer_id) { + collections::HashMapEntry::Occupied(mut entry) => { + if entry.get_mut().1 == tablet_path { + entry.get_mut().0 = true; + } + } + collections::HashMapEntry::Vacant(_) => {} + } + } +} + +#[test] +fn test_v1_apply_snap_from_v2() { + let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); + let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); + cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; + + let observer = MockApplySnapshotObserver { + tablet_snap_paths: Arc::default(), + }; + let observer_clone = observer.clone(); + cluster_v1.register_hook( + 1, + Box::new(move |host: &mut CoprocessorHost<_>| { + host.registry.register_apply_snapshot_observer( + 1, + BoxApplySnapshotObserver::new(observer_clone.clone()), + ); + }), + ); + + cluster_v1.run(); + cluster_v2.run(); + + let region = cluster_v2.get_region(b""); + cluster_v2.must_split(®ion, b"k0010"); + + let s1_addr = cluster_v1.get_addr(1); + let region_id = region.get_id(); + let engine = cluster_v2.get_engine(1); + + for i in 0..50 { + let k = format!("k{:04}", i); + cluster_v2.must_put(k.as_bytes(), b"val"); + } + cluster_v2.flush_data(); + + let tablet_snap_mgr = cluster_v2.get_snap_mgr(1); + let security_mgr = cluster_v2.get_security_mgr(); + let (msg, snap_key) = generate_snap(&engine, region_id, &tablet_snap_mgr); + let cfg = tikv::server::Config::default(); + let limit = Limiter::new(f64::INFINITY); + let env = Arc::new(Environment::new(1)); + let _ = block_on(async { + send_snap_v2( + env.clone(), + tablet_snap_mgr.clone(), + security_mgr.clone(), + &cfg, + &s1_addr, + msg, + limit.clone(), + ) + .unwrap() + .await + }); + + let snap_mgr = cluster_v1.get_snap_mgr(region_id); + let path = snap_mgr + .tablet_snap_manager() + .as_ref() + .unwrap() + .final_recv_path(&snap_key); + let path_str = path.as_path().to_str().unwrap(); + + check_observer(&observer, region_id, path_str); + + let region = cluster_v2.get_region(b"k0011"); + let region_id = region.get_id(); + let (msg, snap_key) = generate_snap(&engine, region_id, &tablet_snap_mgr); + let _ = block_on(async { + send_snap_v2( + env, + tablet_snap_mgr, + security_mgr, + &cfg, + &s1_addr, + msg, + limit, + ) + .unwrap() + .await + }); + + let snap_mgr = cluster_v1.get_snap_mgr(region_id); + let path = snap_mgr + .tablet_snap_manager() + .as_ref() + .unwrap() + .final_recv_path(&snap_key); + let path_str = path.as_path().to_str().unwrap(); + + check_observer(&observer, region_id, path_str); +} + +fn check_observer(observer: &MockApplySnapshotObserver, region_id: u64, snap_path: &str) { + for _ in 0..10 { + if let Some(pair) = observer + .tablet_snap_paths + .as_ref() + .lock() + .unwrap() + .get(®ion_id) + { + if pair.0 && pair.1 == snap_path { + return; + } + } + std::thread::sleep(Duration::from_millis(200)); + } + + panic!("cannot find {:?} in observer", snap_path); +} From 20b75dc4436dc19f1d41acadde9704041dbb7c0c Mon Sep 17 00:00:00 2001 From: qupeng Date: Fri, 21 Apr 2023 19:09:20 +0800 Subject: [PATCH 0659/1149] raft: peers shouldn't hibernate incorrectly when one node fails (#14574) ref tikv/tikv#14547 raft: peers shouldn't hibernate incorrectly when one node fails Signed-off-by: qupeng Co-authored-by: Ti Chi Robot --- components/raftstore/src/store/fsm/store.rs | 44 +++++++++----- components/raftstore/src/store/metrics.rs | 7 +++ components/test_raftstore-v2/src/server.rs | 18 ++++-- components/test_raftstore/src/server.rs | 18 ++++-- src/server/mod.rs | 2 +- src/server/raft_client.rs | 56 +++++++++++++----- src/server/server.rs | 2 +- src/server/service/kv.rs | 41 ++++++++++++- tests/failpoints/cases/test_hibernate.rs | 51 ++++++++++++++++ .../integrations/raftstore/test_tombstone.rs | 2 +- tests/integrations/server/raft_client.rs | 58 +------------------ 11 files changed, 198 insertions(+), 101 deletions(-) diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index f28c4170459..c64b2a53c37 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -678,7 +678,12 @@ struct Store { stopped: bool, start_time: Option, consistency_check_time: HashMap, - last_unreachable_report: HashMap, + store_reachability: HashMap, +} + +struct StoreReachability { + last_broadcast: Instant, + received_message_count: u64, } pub struct StoreFsm @@ -702,7 +707,7 @@ where stopped: false, start_time: None, consistency_check_time: HashMap::default(), - last_unreachable_report: HashMap::default(), + store_reachability: HashMap::default(), }, receiver: rx, }); @@ -2876,22 +2881,35 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER fn on_store_unreachable(&mut self, store_id: u64) { let now = Instant::now(); let unreachable_backoff = self.ctx.cfg.unreachable_backoff.0; - if self - .fsm - .store - .last_unreachable_report - .get(&store_id) - .map_or(unreachable_backoff, |t| now.saturating_duration_since(*t)) - < unreachable_backoff - { - return; - } + let new_messages = MESSAGE_RECV_BY_STORE + .with_label_values(&[&format!("{}", store_id)]) + .get(); + match self.fsm.store.store_reachability.entry(store_id) { + HashMapEntry::Vacant(x) => { + x.insert(StoreReachability { + last_broadcast: now, + received_message_count: new_messages, + }); + } + HashMapEntry::Occupied(x) => { + let ob = x.into_mut(); + if now.saturating_duration_since(ob.last_broadcast) < unreachable_backoff + // If there are no new messages come from `store_id`, it's not + // necessary to do redundant broadcasts. + || (new_messages <= ob.received_message_count && new_messages > 0) + { + return; + } + ob.last_broadcast = now; + ob.received_message_count = new_messages; + } + }; + info!( "broadcasting unreachable"; "store_id" => self.fsm.store.id, "unreachable_store_id" => store_id, ); - self.fsm.store.last_unreachable_report.insert(store_id, now); // It's possible to acquire the lock and only send notification to // involved regions. However loop over all the regions can take a // lot of time, which may block other operations. diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 7df8819c998..c69875ae998 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -868,4 +868,11 @@ lazy_static! { "Total snapshot generate limit used", ) .unwrap(); + + pub static ref MESSAGE_RECV_BY_STORE: IntCounterVec = register_int_counter_vec!( + "tikv_raftstore_message_recv_by_store", + "Messages received by store", + &["store"] + ) + .unwrap(); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 9bdd8568418..85941088e2e 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -259,7 +259,8 @@ pub struct ServerCluster { snap_paths: HashMap, snap_mgrs: HashMap, pd_client: Arc, - raft_client: RaftClient, + raft_clients: HashMap>, + conn_builder: ConnectionBuilder, concurrency_managers: HashMap, env: Arc, pub pending_services: HashMap, @@ -291,7 +292,6 @@ impl ServerCluster { worker.scheduler(), Arc::new(ThreadLoadPool::with_threshold(usize::MAX)), ); - let raft_client = RaftClient::new(conn_builder); ServerCluster { metas: HashMap::default(), addrs: map, @@ -303,7 +303,8 @@ impl ServerCluster { snap_paths: HashMap::default(), pending_services: HashMap::default(), health_services: HashMap::default(), - raft_client, + raft_clients: HashMap::default(), + conn_builder, concurrency_managers: HashMap::default(), env, txn_extra_schedulers: HashMap::default(), @@ -650,6 +651,8 @@ impl ServerCluster { self.concurrency_managers .insert(node_id, concurrency_manager); + let client = RaftClient::new(node_id, self.conn_builder.clone()); + self.raft_clients.insert(node_id, client); Ok(node_id) } @@ -763,6 +766,7 @@ impl Simulator for ServerCluster { (meta.rsmeter_cleanup)(); } self.storages.remove(&node_id); + let _ = self.raft_clients.remove(&node_id); } fn async_snapshot( @@ -800,8 +804,12 @@ impl Simulator for ServerCluster { } fn send_raft_msg(&mut self, msg: RaftMessage) -> raftstore::Result<()> { - self.raft_client.send(msg).unwrap(); - self.raft_client.flush(); + let from_store = msg.get_from_peer().store_id; + assert_ne!(from_store, 0); + if let Some(client) = self.raft_clients.get_mut(&from_store) { + client.send(msg).unwrap(); + client.flush(); + } Ok(()) } diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index a59dafd4504..ec6cb0a235c 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -154,7 +154,8 @@ pub struct ServerCluster { snap_paths: HashMap, snap_mgrs: HashMap, pd_client: Arc, - raft_client: RaftClient, + raft_clients: HashMap>, + conn_builder: ConnectionBuilder, concurrency_managers: HashMap, env: Arc, pub causal_ts_providers: HashMap>, @@ -182,7 +183,6 @@ impl ServerCluster { worker.scheduler(), Arc::new(ThreadLoadPool::with_threshold(usize::MAX)), ); - let raft_client = RaftClient::new(conn_builder); ServerCluster { metas: HashMap::default(), addrs: map, @@ -196,7 +196,8 @@ impl ServerCluster { pending_services: HashMap::default(), coprocessor_hooks: HashMap::default(), health_services: HashMap::default(), - raft_client, + raft_clients: HashMap::default(), + conn_builder, concurrency_managers: HashMap::default(), env, txn_extra_schedulers: HashMap::default(), @@ -645,6 +646,8 @@ impl ServerCluster { self.concurrency_managers .insert(node_id, concurrency_manager); + let client = RaftClient::new(node_id, self.conn_builder.clone()); + self.raft_clients.insert(node_id, client); Ok(node_id) } } @@ -698,6 +701,7 @@ impl Simulator for ServerCluster { } (meta.rsmeter_cleanup)(); } + let _ = self.raft_clients.remove(&node_id); } fn get_node_ids(&self) -> HashSet { @@ -739,8 +743,12 @@ impl Simulator for ServerCluster { } fn send_raft_msg(&mut self, raft_msg: raft_serverpb::RaftMessage) -> Result<()> { - self.raft_client.send(raft_msg).unwrap(); - self.raft_client.flush(); + let from_store = raft_msg.get_from_peer().store_id; + assert_ne!(from_store, 0); + if let Some(client) = self.raft_clients.get_mut(&from_store) { + client.send(raft_msg).unwrap(); + client.flush(); + } Ok(()) } diff --git a/src/server/mod.rs b/src/server/mod.rs index 773e2040f17..e432b3aa51b 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -34,7 +34,7 @@ pub use self::{ metrics::{CONFIG_ROCKSDB_GAUGE, CPU_CORES_QUOTA_GAUGE, MEM_TRACE_SUM_GAUGE}, node::Node, proxy::{build_forward_option, get_target_address, Proxy}, - raft_client::{ConnectionBuilder, RaftClient}, + raft_client::{ConnectionBuilder, MetadataSourceStoreId, RaftClient}, raftkv::RaftKv, raftkv2::{Extension, NodeV2, RaftKv2}, resolve::{PdStoreAddrResolver, StoreAddrResolver}, diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 17de1d3365d..f30e5b36045 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -25,8 +25,8 @@ use futures::{ }; use futures_timer::Delay; use grpcio::{ - Channel, ChannelBuilder, ClientCStreamReceiver, ClientCStreamSender, Environment, - RpcStatusCode, WriteFlags, + CallOption, Channel, ChannelBuilder, ClientCStreamReceiver, ClientCStreamSender, Environment, + MetadataBuilder, RpcStatusCode, WriteFlags, }; use kvproto::{ raft_serverpb::{Done, RaftMessage, RaftSnapshotData}, @@ -50,6 +50,21 @@ use crate::server::{ StoreAddrResolver, }; +pub struct MetadataSourceStoreId {} + +impl MetadataSourceStoreId { + pub const KEY: &str = "source_store_id"; + + pub fn parse(value: &[u8]) -> u64 { + let value = std::str::from_utf8(value).unwrap(); + value.parse::().unwrap() + } + + pub fn format(id: u64) -> String { + format!("{}", id) + } +} + static CONN_ID: AtomicI32 = AtomicI32::new(0); const _ON_RESOLVE_FP: &str = "transport_snapshot_on_resolve"; @@ -616,6 +631,7 @@ impl ConnectionBuilder { /// StreamBackEnd watches lifetime of a connection and handles reconnecting, /// spawn new RPC. struct StreamBackEnd { + self_store_id: u64, store_id: u64, queue: Arc, builder: ConnectionBuilder, @@ -697,7 +713,8 @@ where } fn batch_call(&self, client: &TikvClient, addr: String) -> oneshot::Receiver { - let (batch_sink, batch_stream) = client.batch_raft().unwrap(); + let (batch_sink, batch_stream) = client.batch_raft_opt(self.get_call_option()).unwrap(); + let (tx, rx) = oneshot::channel(); let mut call = RaftCall { sender: AsyncRaftSender { @@ -721,7 +738,8 @@ where } fn call(&self, client: &TikvClient, addr: String) -> oneshot::Receiver { - let (sink, stream) = client.raft().unwrap(); + let (sink, stream) = client.raft_opt(self.get_call_option()).unwrap(); + let (tx, rx) = oneshot::channel(); let mut call = RaftCall { sender: AsyncRaftSender { @@ -742,6 +760,15 @@ where }); rx } + + fn get_call_option(&self) -> CallOption { + let mut metadata = MetadataBuilder::with_capacity(1); + let value = MetadataSourceStoreId::format(self.self_store_id); + metadata + .add_str(MetadataSourceStoreId::KEY, &value) + .unwrap(); + CallOption::default().headers(metadata.build()) + } } async fn maybe_backoff(backoff: Duration, last_wake_time: &mut Option) { @@ -782,7 +809,6 @@ async fn start( R: RaftExtension + Unpin + Send + 'static, { let mut last_wake_time = None; - let mut first_time = true; let backoff_duration = back_end.builder.cfg.value().raft_client_max_backoff.0; let mut addr_channel = None; loop { @@ -828,15 +854,10 @@ async fn start( // shutdown. back_end.clear_pending_message("unreachable"); - // broadcast is time consuming operation which would blocks raftstore, so report - // unreachable only once until being connected again. - if first_time { - first_time = false; - back_end - .builder - .router - .report_store_unreachable(back_end.store_id); - } + back_end + .builder + .router + .report_store_unreachable(back_end.store_id); continue; } else { debug!("connection established"; "store_id" => back_end.store_id, "addr" => %addr); @@ -868,7 +889,6 @@ async fn start( .router .report_store_unreachable(back_end.store_id); addr_channel = None; - first_time = false; } } } @@ -926,6 +946,7 @@ struct CachedQueue { /// raft_client.flush(); /// ``` pub struct RaftClient { + self_store_id: u64, pool: Arc>, cache: LruCache<(u64, usize), CachedQueue>, need_flush: Vec<(u64, usize)>, @@ -940,13 +961,14 @@ where S: StoreAddrResolver + Send + 'static, R: RaftExtension + Unpin + Send + 'static, { - pub fn new(builder: ConnectionBuilder) -> Self { + pub fn new(self_store_id: u64, builder: ConnectionBuilder) -> Self { let future_pool = Arc::new( yatp::Builder::new(thd_name!("raft-stream")) .max_thread_count(1) .build_future_pool(), ); RaftClient { + self_store_id, pool: Arc::default(), cache: LruCache::with_capacity_and_sample(0, 7), need_flush: vec![], @@ -982,6 +1004,7 @@ where queue.set_conn_state(ConnState::Paused); } let back_end = StreamBackEnd { + self_store_id: self.self_store_id, store_id, queue: queue.clone(), builder: self.builder.clone(), @@ -1143,6 +1166,7 @@ where { fn clone(&self) -> Self { RaftClient { + self_store_id: self.self_store_id, pool: self.pool.clone(), cache: LruCache::with_capacity_and_sample(0, 7), need_flush: vec![], diff --git a/src/server/server.rs b/src/server/server.rs index 45778835d29..8a50f44f363 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -176,7 +176,7 @@ where lazy_worker.scheduler(), grpc_thread_load.clone(), ); - let raft_client = RaftClient::new(conn_builder); + let raft_client = RaftClient::new(store_id, conn_builder); let trans = ServerTransport::new(raft_client); health_service.set_serving_status("", ServingStatus::NotServing); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 2c77ee4e0bd..9895067fcb3 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -21,7 +21,7 @@ use raft::eraftpb::MessageType; use raftstore::{ store::{ memory::{MEMTRACE_APPLYS, MEMTRACE_RAFT_ENTRIES, MEMTRACE_RAFT_MESSAGES}, - metrics::RAFT_ENTRIES_CACHES_GAUGE, + metrics::{MESSAGE_RECV_BY_STORE, RAFT_ENTRIES_CACHES_GAUGE}, CheckLeaderTask, }, Error as RaftStoreError, Result as RaftStoreResult, @@ -45,7 +45,7 @@ use crate::{ coprocessor_v2, forward_duplex, forward_unary, log_net_error, server::{ gc_worker::GcWorker, load_statistics::ThreadLoadPool, metrics::*, snap::Task as SnapTask, - Error, Proxy, Result as ServerResult, + Error, MetadataSourceStoreId, Proxy, Result as ServerResult, }, storage::{ self, @@ -168,9 +168,23 @@ impl Service { ch.report_reject_message(id, peer_id); return Ok(()); } + + fail_point!("receive_raft_message_from_outside"); ch.feed(msg, false); Ok(()) } + + fn get_store_id_from_metadata(ctx: &RpcContext<'_>) -> Option { + let metadata = ctx.request_headers(); + for i in 0..metadata.len() { + let (key, value) = metadata.get(i).unwrap(); + if key == MetadataSourceStoreId::KEY { + let store_id = MetadataSourceStoreId::parse(value); + return Some(store_id); + } + } + None + } } macro_rules! handle_request { @@ -636,6 +650,14 @@ impl Tikv for Service { stream: RequestStream, sink: ClientStreamingSink, ) { + let source_store_id = Self::get_store_id_from_metadata(&ctx); + let message_received = + source_store_id.map(|x| MESSAGE_RECV_BY_STORE.with_label_values(&[&format!("{}", x)])); + info!( + "raft RPC is called, new gRPC stream established"; + "source_store_id" => ?source_store_id, + ); + let store_id = self.store_id; let ch = self.storage.get_engine().raft_extension(); let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; @@ -652,6 +674,9 @@ impl Tikv for Service { // `StoreNotMatch` to let tikv to resolve a correct address from PD return Err(Error::from(err)); } + if let Some(ref counter) = message_received { + counter.inc(); + } } Ok::<(), Error>(()) }; @@ -678,7 +703,14 @@ impl Tikv for Service { stream: RequestStream, sink: ClientStreamingSink, ) { - info!("batch_raft RPC is called, new gRPC stream established"); + let source_store_id = Self::get_store_id_from_metadata(&ctx); + let message_received = + source_store_id.map(|x| MESSAGE_RECV_BY_STORE.with_label_values(&[&format!("{}", x)])); + info!( + "batch_raft RPC is called, new gRPC stream established"; + "source_store_id" => ?source_store_id, + ); + let store_id = self.store_id; let ch = self.storage.get_engine().raft_extension(); let reject_messages_on_memory_ratio = self.reject_messages_on_memory_ratio; @@ -699,6 +731,9 @@ impl Tikv for Service { return Err(Error::from(err)); } } + if let Some(ref counter) = message_received { + counter.inc_by(len as u64); + } } Ok::<(), Error>(()) }; diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 6bbed4ac641..4dc404e58b8 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -81,3 +81,54 @@ fn test_break_leadership_on_restart() { // incorrectly. rx.recv_timeout(Duration::from_secs(2)).unwrap_err(); } + +// This case creates a cluster with 3 TiKV instances, and then wait all peers +// hibernate. +// +// After that, propose a command and stop the leader node immediately. +// With failpoint `receive_raft_message_from_outside`, we can make the proposal +// reach 2 followers *after* `StoreUnreachable` is broadcasted. +// +// 2 followers may become GroupState::Chaos after `StoreUnreachable` is +// received, and become `GroupState::Ordered` after the proposal is received. +// But they should keep wakeful for a while. +#[test] +fn test_store_disconnect_with_hibernate() { + let mut cluster = new_server_cluster(0, 3); + let base_tick_ms = 50; + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); + cluster.cfg.raft_store.raft_heartbeat_ticks = 2; + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + cluster.cfg.raft_store.unreachable_backoff = ReadableDuration::millis(500); + cluster.cfg.server.raft_client_max_backoff = ReadableDuration::millis(200); + // So the random election timeout will always be 10, which makes the case more + // stable. + cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; + cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; + configure_for_hibernate(&mut cluster); + cluster.pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + cluster.pd_client.must_add_peer(r, new_peer(2, 2)); + cluster.pd_client.must_add_peer(r, new_peer(3, 3)); + + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + // Wait until all peers of region 1 hibernate. + thread::sleep(Duration::from_millis(base_tick_ms * 30)); + + // Stop the region leader. + fail::cfg("receive_raft_message_from_outside", "pause").unwrap(); + let _ = cluster.async_put(b"k2", b"v2").unwrap(); + cluster.stop_node(1); + + // Wait for a while so that the failpoint can be triggered on followers. + thread::sleep(Duration::from_millis(100)); + fail::remove("receive_raft_message_from_outside"); + + // Wait for a while. Peers of region 1 shouldn't hibernate. + thread::sleep(Duration::from_millis(base_tick_ms * 30)); + must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); + must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); +} diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index 3d7fc235cad..972a75212b4 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -80,7 +80,7 @@ fn test_tombstone(cluster: &mut Cluster) { raft_msg.set_region_id(r1); // Use an invalid from peer to ignore gc peer message. - raft_msg.set_from_peer(new_peer(0, 0)); + raft_msg.set_from_peer(new_peer(100, 100)); raft_msg.set_to_peer(new_peer(2, 2)); raft_msg.mut_region_epoch().set_conf_ver(0); raft_msg.mut_region_epoch().set_version(0); diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index fa7a86f12c4..aad9ab7ceb1 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -19,7 +19,7 @@ use kvproto::{ tikvpb::BatchRaftMessage, }; use raft::eraftpb::Entry; -use raftstore::{errors::DiscardReason, store::StoreMsg}; +use raftstore::errors::DiscardReason; use tikv::server::{ self, load_statistics::ThreadLoadPool, raftkv::RaftRouterWrap, resolve, resolve::Callback, Config, ConnectionBuilder, RaftClient, StoreAddrResolver, TestRaftStoreRouter, @@ -28,7 +28,6 @@ use tikv_kv::{FakeExtension, RaftExtension}; use tikv_util::{ config::{ReadableDuration, VersionTrack}, worker::{Builder as WorkerBuilder, LazyWorker}, - Either, }; use super::*; @@ -73,7 +72,7 @@ where worker.scheduler(), loads, ); - RaftClient::new(builder) + RaftClient::new(0, builder) } fn get_raft_client_by_port(port: u16) -> RaftClient { @@ -206,59 +205,6 @@ fn test_raft_client_reconnect() { drop(mock_server); } -#[test] -// Test raft_client reports store unreachable only once until being connected -// again -fn test_raft_client_report_unreachable() { - let msg_count = Arc::new(AtomicUsize::new(0)); - let batch_msg_count = Arc::new(AtomicUsize::new(0)); - let service = MockKvForRaft::new(Arc::clone(&msg_count), Arc::clone(&batch_msg_count), true); - let (mut mock_server, port) = create_mock_server(service, 60100, 60200).unwrap(); - - let (tx, rx) = mpsc::channel(); - let (significant_msg_sender, _significant_msg_receiver) = mpsc::channel(); - let router = TestRaftStoreRouter::new(tx, significant_msg_sender); - let wrap = RaftRouterWrap::new(router); - let mut raft_client = get_raft_client(wrap, StaticResolver::new(port)); - - // server is disconnected - mock_server.shutdown(); - drop(mock_server); - - raft_client.send(RaftMessage::default()).unwrap(); - let msg = rx.recv_timeout(Duration::from_millis(200)).unwrap(); - if let Either::Right(StoreMsg::StoreUnreachable { store_id }) = msg { - assert_eq!(store_id, 0); - } else { - panic!("expect StoreUnreachable"); - } - // no more unreachable message is sent until it's connected again. - rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); - - // restart the mock server. - let service = MockKvForRaft::new(Arc::clone(&msg_count), batch_msg_count, true); - let mut mock_server = create_mock_server_on(service, port); - - // make sure the connection is connected, otherwise the following sent messages - // may be dropped - std::thread::sleep(Duration::from_millis(200)); - (0..50).for_each(|_| raft_client.send(RaftMessage::default()).unwrap()); - raft_client.flush(); - check_msg_count(500, &msg_count, 50); - - // server is disconnected - mock_server.take().unwrap().shutdown(); - - let msg = rx.recv_timeout(Duration::from_millis(200)).unwrap(); - if let Either::Right(StoreMsg::StoreUnreachable { store_id }) = msg { - assert_eq!(store_id, 0); - } else { - panic!("expect StoreUnreachable"); - } - // no more unreachable message is sent until it's connected again. - rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); -} - #[test] fn test_batch_size_limit() { let msg_count = Arc::new(AtomicUsize::new(0)); From 0de1123800389db278b666a4180b1984d3407338 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 24 Apr 2023 16:46:45 +0800 Subject: [PATCH 0660/1149] raftstore-v2: prevent resolving store 0 (#14645) * raftstore-v2: prevent resolving store 0 Do not cache invaild peer otherwise it may send raft message to store 0 during region split. Signed-off-by: Neil Shen * address comments Signed-off-by: Neil Shen --------- Signed-off-by: Neil Shen --- components/raftstore-v2/src/operation/life.rs | 56 +++++++++--- .../src/operation/ready/apply_trace.rs | 5 +- .../raftstore-v2/src/operation/ready/mod.rs | 16 +++- .../test_raftstore/src/transport_simulate.rs | 8 +- tests/failpoints/cases/test_split_region.rs | 90 ++++++++++++++++++- .../integrations/raftstore/test_hibernate.rs | 6 +- 6 files changed, 155 insertions(+), 26 deletions(-) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 9d3a32f8f72..c9145e909d1 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -213,6 +213,26 @@ fn check_if_to_peer_destroyed( Ok(false) } +// An empty raft message for creating peer fsm. +fn empty_split_message(store_id: u64, region: &Region) -> Box { + let mut raft_msg = Box::::default(); + raft_msg.set_region_id(region.get_id()); + raft_msg.set_region_epoch(region.get_region_epoch().clone()); + raft_msg.set_to_peer( + region + .get_peers() + .iter() + .find(|p| p.get_store_id() == store_id) + .unwrap() + .clone(), + ); + raft_msg +} + +pub fn is_empty_split_message(msg: &RaftMessage) -> bool { + !msg.has_from_peer() && msg.has_to_peer() && msg.has_region_epoch() && !msg.has_message() +} + impl Store { /// The method is called during split. /// The creation process is: @@ -230,17 +250,31 @@ impl Store { { let derived_region_id = msg.derived_region_id; let region_id = msg.region.id; - let mut raft_msg = Box::::default(); - raft_msg.set_region_id(region_id); - raft_msg.set_region_epoch(msg.region.get_region_epoch().clone()); - raft_msg.set_to_peer( - msg.region - .get_peers() - .iter() - .find(|p| p.get_store_id() == self.store_id()) - .unwrap() - .clone(), - ); + let raft_msg = empty_split_message(self.store_id(), &msg.region); + + (|| { + fail::fail_point!( + "on_store_2_split_init_race_with_initial_message", + self.store_id() == 2, + |_| { + let mut initial_msg = raft_msg.clone(); + initial_msg.set_from_peer( + msg.region + .get_peers() + .iter() + .find(|p| p.get_store_id() != self.store_id()) + .unwrap() + .clone(), + ); + let m = initial_msg.mut_message(); + m.set_msg_type(raft::prelude::MessageType::MsgRequestPreVote); + m.set_term(raftstore::store::RAFT_INIT_LOG_TERM); + m.set_index(raftstore::store::RAFT_INIT_LOG_INDEX); + assert!(util::is_initial_msg(initial_msg.get_message())); + self.on_raft_message(ctx, initial_msg); + } + ) + })(); // It will create the peer if it does not exist self.on_raft_message(ctx, raft_msg); diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index f1a65fc1768..6c9c73479ba 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -406,13 +406,12 @@ impl Storage { } }; apply_state.set_applied_index(applied_index); - let mut reset_apply_index = || { + (|| { // Make node reply from start. fail_point!("RESET_APPLY_INDEX_WHEN_RESTART", |_| { apply_state.set_applied_index(5); }); - }; - reset_apply_index(); + })(); Self::create( store_id, diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index f63d9c97b86..009b31921b3 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -57,6 +57,7 @@ pub use self::{ use crate::{ batch::StoreContext, fsm::{PeerFsmDelegate, Store}, + operation::life::is_empty_split_message, raft::{Peer, Storage}, router::{PeerMsg, PeerTick}, worker::tablet, @@ -289,10 +290,14 @@ impl Peer { // ranges with other peers. let from_peer = msg.take_from_peer(); let from_peer_id = from_peer.get_id(); - if self.is_leader() && from_peer.get_id() != INVALID_ID { - self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); + if from_peer_id != INVALID_ID { + if self.is_leader() { + self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); + } + // We only cache peer with an vaild ID. + // It prevents cache peer(0,0) which is sent by region split. + self.insert_peer_cache(from_peer); } - self.insert_peer_cache(from_peer); let pre_committed_index = self.raft_group().raft.raft_log.committed; if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) @@ -307,6 +312,11 @@ impl Peer { ctx.raft_metrics.message_dropped.stale_msg.inc(); return; } + // As this peer is already created, the empty split message is meaningless. + if is_empty_split_message(&msg) { + ctx.raft_metrics.message_dropped.stale_msg.inc(); + return; + } if let Err(e) = self.raft_group_mut().step(msg.take_message()) { error!(self.logger, "raft step error"; "err" => ?e); diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index a49a41af4e3..ef569e3987a 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -831,18 +831,18 @@ impl Filter for LeaseReadFilter { #[derive(Clone)] pub struct DropMessageFilter { - ty: MessageType, + retain: Arc bool + Sync + Send>, } impl DropMessageFilter { - pub fn new(ty: MessageType) -> DropMessageFilter { - DropMessageFilter { ty } + pub fn new(retain: Arc bool + Sync + Send>) -> DropMessageFilter { + DropMessageFilter { retain } } } impl Filter for DropMessageFilter { fn before(&self, msgs: &mut Vec) -> Result<()> { - msgs.retain(|m| m.get_message().get_msg_type() != self.ty); + msgs.retain(|m| (self.retain)(m)); Ok(()) } } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 792a21217ad..096bbc12ed8 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -17,14 +17,16 @@ use kvproto::{ Mutation, Op, PessimisticLockRequest, PrewriteRequest, PrewriteRequestPessimisticAction::*, }, metapb::Region, - raft_serverpb::RaftMessage, + raft_serverpb::{PeerState, RaftMessage}, tikvpb::TikvClient, }; use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::{ store::{ - config::Config as RaftstoreConfig, util::is_vote_msg, Callback, PeerMsg, WriteResponse, + config::Config as RaftstoreConfig, + util::{is_initial_msg, is_vote_msg}, + Callback, PeerMsg, WriteResponse, }, Result, }; @@ -32,6 +34,8 @@ use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{ config::{ReadableDuration, ReadableSize}, + mpsc::{unbounded, Sender}, + time::Instant, HandyRwLock, }; use txn_types::{Key, PessimisticLock}; @@ -1187,3 +1191,85 @@ fn test_split_race_with_conf_change() { cluster.must_put(b"k06", b"val"); assert_eq!(cluster.must_get(b"k06").unwrap(), b"val".to_vec()); } + +// split init races with request prevote should not send messages to store 0. +// +// 1. split region. +// 2. send split init to store because peer is no exist. +// 3. store receives request prevote from normal peer. +// 4. store receives split init. +// 5. store creates peer via request prevote. +// 6. store sends empty raft message to peer. +// 7. store sends split init to peer. +// 7. peer inserts peer(0,0) to cache and step the empty meassge. +// 8. peer handles split snapshot from split init and response to peer(0,0). +// 9. transport tries to resolve store 0. +// +// We must prevent peer incorrectly inserting peer(0,0) to cache and send +// messages to store 0. +#[test] +fn test_split_init_race_with_initial_msg_v2() { + // test case for raftstore-v2 + use test_raftstore_v2::*; + + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + let split_key1 = b"k01"; + let region = cluster.get_region(split_key1); + cluster.must_transfer_leader( + region.get_id(), + region + .get_peers() + .iter() + .find(|p| p.get_store_id() == 1) + .unwrap() + .to_owned(), + ); + + // Drop initial messages to store 2. + cluster.add_recv_filter_on_node( + 2, + Box::new(DropMessageFilter::new(Arc::new(|m| { + !is_initial_msg(m.get_message()) + }))), + ); + let (tx, rx) = unbounded(); + cluster.add_send_filter_on_node(2, Box::new(TeeFilter { pipe: tx })); + + fail::cfg("on_store_2_split_init_race_with_initial_message", "return").unwrap(); + cluster.must_split(®ion, split_key1); + + // Wait for store 2 split. + let new_region = cluster.get_region(b"k00"); + let start = Instant::now(); + loop { + sleep_ms(500); + let region_state = cluster.region_local_state(new_region.get_id(), 2); + if region_state.get_state() == PeerState::Normal { + break; + } + if start.saturating_elapsed() > Duration::from_secs(5) { + panic!("timeout"); + } + } + cluster.clear_send_filter_on_node(2); + while let Ok(msg) = rx.recv_timeout(Duration::from_millis(500)) { + if msg.get_to_peer().get_store_id() == 0 { + panic!("must not send messages to store 0"); + } + } +} + +struct TeeFilter { + pipe: Sender, +} + +impl Filter for TeeFilter { + fn before(&self, msgs: &mut Vec) -> Result<()> { + for msg in msgs { + let _ = self.pipe.send(msg.clone()); + } + Ok(()) + } +} diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index 23c859a21bd..73156becb0d 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -199,9 +199,9 @@ fn test_transfer_leader_delay() { ); cluster.clear_send_filters(); - cluster.add_send_filter(CloneFilterFactory(DropMessageFilter::new( - MessageType::MsgTimeoutNow, - ))); + cluster.add_send_filter(CloneFilterFactory(DropMessageFilter::new(Arc::new(|m| { + m.get_message().get_msg_type() != MessageType::MsgTimeoutNow + })))); let router = cluster.sim.wl().get_router(1).unwrap(); router .send_raft_message(messages.lock().unwrap().pop().unwrap()) From 1674d3c487063425cfe865e047036daa22ec07de Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 24 Apr 2023 18:18:47 +0800 Subject: [PATCH 0661/1149] raftstore: delete tablet snap if exists (#14647) * done Signed-off-by: Spade A * add panic Signed-off-by: Spade A --------- Signed-off-by: Spade A Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/snap.rs | 9 +++++++-- tests/integrations/raftstore/test_snap.rs | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 12440abb5d0..4f347002f67 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -20,8 +20,8 @@ use engine_traits::{CfName, EncryptionKeyManager, KvEngine, CF_DEFAULT, CF_LOCK, use error_code::{self, ErrorCode, ErrorCodeExt}; use fail::fail_point; use file_system::{ - calc_crc32, calc_crc32_and_size, delete_file_if_exist, file_exists, get_file_size, sync_dir, - File, Metadata, OpenOptions, + calc_crc32, calc_crc32_and_size, delete_dir_if_exist, delete_file_if_exist, file_exists, + get_file_size, sync_dir, File, Metadata, OpenOptions, }; use keys::{enc_end_key, enc_start_key}; use kvproto::{ @@ -1008,6 +1008,11 @@ impl Snapshot { } } } + if let Some(ref meta) = self.meta_file.meta { + if !meta.tablet_snap_path.is_empty() { + delete_dir_if_exist(&meta.tablet_snap_path).unwrap(); + } + } delete_file_if_exist(&self.meta_file.path).unwrap(); if self.hold_tmp_files { delete_file_if_exist(&self.meta_file.tmp_path).unwrap(); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index f474b5cdb8e..a620bb3a990 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -925,6 +925,7 @@ fn test_v1_apply_snap_from_v2() { let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; + cluster_v1.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(200); let observer = MockApplySnapshotObserver { tablet_snap_paths: Arc::default(), @@ -1012,6 +1013,16 @@ fn test_v1_apply_snap_from_v2() { let path_str = path.as_path().to_str().unwrap(); check_observer(&observer, region_id, path_str); + + // Verify that the tablet snap will be gced + for _ in 0..10 { + if !path.exists() { + return; + } + std::thread::sleep(Duration::from_millis(200)); + } + + panic!("tablet snap {:?} still exists", path_str); } fn check_observer(observer: &MockApplySnapshotObserver, region_id: u64, snap_path: &str) { From c44266730fb856145c46bc59ff5e0912d18744bb Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 24 Apr 2023 21:19:51 -0700 Subject: [PATCH 0662/1149] [raftstore-v2]: add the missed apply log duration and fix apply wait time (#14530) ref tikv/tikv#14321 Add the apply log duration metrics. Signed-off-by: tonyxuqqi --- .../raftstore-v2/src/operation/command/mod.rs | 22 ++++++++++++++----- .../src/operation/query/capture.rs | 4 +--- .../raftstore/src/store/local_metrics.rs | 5 +++-- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 2f2df5a0333..b9256f031fe 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -37,7 +37,9 @@ use raftstore::{ Proposal, }, local_metrics::RaftMetrics, - metrics::{APPLY_TASK_WAIT_TIME_HISTOGRAM, APPLY_TIME_HISTOGRAM}, + metrics::{ + APPLY_TASK_WAIT_TIME_HISTOGRAM, APPLY_TIME_HISTOGRAM, STORE_APPLY_LOG_HISTOGRAM, + }, msg::ErrorCallback, util, Config, Transport, WriteCallback, }, @@ -100,7 +102,6 @@ pub struct CommittedEntries { /// Entries need to be applied. Note some entries may not be included for /// flow control. pub entry_and_proposals: Vec<(Entry, Vec)>, - pub committed_time: Instant, } fn new_response(header: &RaftRequestHeader) -> RaftCmdResponse { @@ -306,7 +307,6 @@ impl Peer { // memtables in kv engine is flushed. let apply = CommittedEntries { entry_and_proposals, - committed_time: Instant::now(), }; assert!( self.apply_scheduler().is_some() || ctx.router.is_shutdown(), @@ -517,14 +517,17 @@ impl Apply { #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); - APPLY_TASK_WAIT_TIME_HISTOGRAM - .observe(duration_to_sec(ce.committed_time.saturating_elapsed())); + let now = std::time::Instant::now(); + let apply_wait_time = APPLY_TASK_WAIT_TIME_HISTOGRAM.local(); for (e, ch) in ce.entry_and_proposals { if self.tombstone() { apply::notify_req_region_removed(self.region_id(), ch); continue; } if !e.get_data().is_empty() { + for tracker in ch.write_trackers() { + tracker.observe(now, &apply_wait_time, |t| &mut t.metrics.apply_wait_nanos); + } let mut set_save_point = false; if let Some(wb) = &mut self.write_batch { wb.set_save_point(); @@ -787,7 +790,14 @@ impl Apply { let apply_time = APPLY_TIME_HISTOGRAM.local(); for (ch, resp) in callbacks.drain(..) { for tracker in ch.write_trackers() { - tracker.observe(now, &apply_time, |t| &mut t.metrics.apply_time_nanos); + let mut apply_wait_nanos = 0_u64; + let apply_time_nanos = tracker.observe(now, &apply_time, |t| { + apply_wait_nanos = t.metrics.apply_wait_nanos; + &mut t.metrics.apply_time_nanos + }); + STORE_APPLY_LOG_HISTOGRAM.observe(duration_to_sec(Duration::from_nanos( + apply_time_nanos - apply_wait_nanos, + ))); } ch.set_result(resp); } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 5fdbde187e4..5393dfacc98 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -193,7 +193,7 @@ mod test { }; use slog::o; use tempfile::TempDir; - use tikv_util::{store::new_peer, time::Instant, worker::dummy_scheduler}; + use tikv_util::{store::new_peer, worker::dummy_scheduler}; use super::*; use crate::{ @@ -357,7 +357,6 @@ mod test { ), vec![], )], - committed_time: Instant::now(), }), ApplyTask::CaptureApply(CaptureChange { observer: ChangeObserver::from_cdc(region.id, ObserveHandle::new()), @@ -376,7 +375,6 @@ mod test { ), vec![], )], - committed_time: Instant::now(), }), ]; diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 0e6a09cbf0b..baf63814416 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -246,11 +246,11 @@ impl TimeTracker { now: std::time::Instant, local_metric: &LocalHistogram, tracker_metric: impl FnOnce(&mut Tracker) -> &mut u64, - ) { + ) -> u64 { let dur = now.saturating_duration_since(self.start); local_metric.observe(dur.as_secs_f64()); if self.token == INVALID_TRACKER_TOKEN { - return; + return 0; } GLOBAL_TRACKERS.with_tracker(self.token, |tracker| { let metric = tracker_metric(tracker); @@ -258,6 +258,7 @@ impl TimeTracker { *metric = dur.as_nanos() as u64; } }); + dur.as_nanos() as u64 } #[inline] From c7cf0c667bd5cbe453c7571760be199196f7ef13 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 25 Apr 2023 15:07:51 +0800 Subject: [PATCH 0663/1149] raftstore: make v1 learner compatible with gc peer (#14601) close tikv/tikv#14595 Make tiflash engine compatible with gc peer Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/batch/store.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 70 ++-------- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/router/response_channel.rs | 18 ++- components/raftstore/src/store/fsm/life.rs | 92 +++++++++++++ components/raftstore/src/store/fsm/mod.rs | 1 + components/raftstore/src/store/fsm/peer.rs | 31 ++++- components/raftstore/src/store/fsm/store.rs | 18 +++ .../raftstore/src/store/worker/region.rs | 15 +-- tests/integrations/raftstore/mod.rs | 1 + tests/integrations/raftstore/test_life.rs | 126 ++++++++++++++++++ 11 files changed, 295 insertions(+), 83 deletions(-) create mode 100644 components/raftstore/src/store/fsm/life.rs create mode 100644 tests/integrations/raftstore/test_life.rs diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index a9e3c223943..1f6245cc010 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cmp, ops::{Deref, DerefMut}, path::Path, sync::{ @@ -120,7 +121,8 @@ impl StoreContext { self.cfg.report_region_buckets_tick_interval.0; self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = self.cfg.check_long_uncommitted_interval.0; - self.tick_batch[PeerTick::GcPeer as usize].wait_duration = Duration::from_secs(60); + self.tick_batch[PeerTick::GcPeer as usize].wait_duration = + 60 * cmp::min(Duration::from_secs(1), self.cfg.raft_base_tick_interval.0); } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index c9145e909d1..8b431ad3a98 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -36,7 +36,11 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage}, }; -use raftstore::store::{metrics::RAFT_PEER_PENDING_DURATION, util, Transport, WriteTask}; +use raftstore::store::{ + fsm::life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, + metrics::RAFT_PEER_PENDING_DURATION, + util, Transport, WriteTask, +}; use slog::{debug, error, info, warn}; use tikv_util::{ store::find_peer, @@ -390,7 +394,9 @@ impl Store { if extra_msg.get_type() == ExtraMessageType::MsgGcPeerRequest && extra_msg.has_check_gc_peer() { - forward_destroy_to_source_peer(ctx, &msg); + forward_destroy_to_source_peer(&msg, |m| { + let _ = ctx.router.send_raft_message(m.into()); + }); return; } } @@ -467,62 +473,6 @@ impl Store { } } -/// Tell leader that `to_peer` from `tombstone_msg` is destroyed. -fn build_peer_destroyed_report(tombstone_msg: &mut RaftMessage) -> Option { - let to_region_id = if tombstone_msg.has_extra_msg() { - assert_eq!( - tombstone_msg.get_extra_msg().get_type(), - ExtraMessageType::MsgGcPeerRequest - ); - tombstone_msg - .get_extra_msg() - .get_check_gc_peer() - .get_from_region_id() - } else { - tombstone_msg.get_region_id() - }; - if to_region_id == 0 || tombstone_msg.get_from_peer().get_id() == 0 { - return None; - } - let mut msg = RaftMessage::default(); - msg.set_region_id(to_region_id); - msg.set_from_peer(tombstone_msg.take_to_peer()); - msg.set_to_peer(tombstone_msg.take_from_peer()); - msg.mut_extra_msg() - .set_type(ExtraMessageType::MsgGcPeerResponse); - Some(msg) -} - -/// Forward the destroy request from target peer to merged source peer. -fn forward_destroy_to_source_peer(ctx: &mut StoreContext, msg: &RaftMessage) -where - EK: KvEngine, - ER: RaftEngine, - T: Transport, -{ - let extra_msg = msg.get_extra_msg(); - // Instead of respond leader directly, send a message to target region to - // double check it's really destroyed. - let check_gc_peer = extra_msg.get_check_gc_peer(); - let mut tombstone_msg = Box::::default(); - tombstone_msg.set_region_id(check_gc_peer.get_check_region_id()); - tombstone_msg.set_from_peer(msg.get_from_peer().clone()); - tombstone_msg.set_to_peer(check_gc_peer.get_check_peer().clone()); - tombstone_msg.set_region_epoch(check_gc_peer.get_check_region_epoch().clone()); - tombstone_msg.set_is_tombstone(true); - // No need to set epoch as we don't know what it is. - // This message will not be handled by `on_gc_peer_request` due to - // `is_tombstone` being true. - tombstone_msg - .mut_extra_msg() - .set_type(ExtraMessageType::MsgGcPeerRequest); - tombstone_msg - .mut_extra_msg() - .mut_check_gc_peer() - .set_from_region_id(check_gc_peer.get_from_region_id()); - let _ = ctx.router.send_raft_message(tombstone_msg); -} - impl Peer { pub fn on_availability_request( &mut self, @@ -622,7 +572,9 @@ impl Peer { return; } - forward_destroy_to_source_peer(ctx, msg); + forward_destroy_to_source_peer(msg, |m| { + let _ = ctx.router.send_raft_message(m.into()); + }); } /// A peer confirms it's destroyed. diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 009b31921b3..5f294d7e5b6 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -399,7 +399,7 @@ impl Peer { } // Filling start and end key is only needed for being compatible with - // raftstore v1 tiflash engine. + // raftstore v1 learners (e.g. tiflash engine). // // There could be two cases: // - Target peer already exists but has not established communication with diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index 97321aae9d1..c300b6d8726 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -273,6 +273,14 @@ impl BaseChannel { pub fn set_result(self, res: Res) { self.core.set_result(res); } + + pub fn with_callback(f: Box) -> (Self, BaseSubscriber) { + let (c, s) = pair(); + unsafe { + *c.core.before_set.get() = Some(f); + } + (c, s) + } } impl Drop for BaseChannel { @@ -616,16 +624,6 @@ impl QueryResChannel { pub fn pair() -> (Self, QueryResSubscriber) { pair() } - - pub fn with_callback( - f: Box, - ) -> (Self, QueryResSubscriber) { - let (c, s) = pair(); - unsafe { - *c.core.before_set.get() = Some(f); - } - (c, s) - } } impl ErrorCallback for QueryResChannel { diff --git a/components/raftstore/src/store/fsm/life.rs b/components/raftstore/src/store/fsm/life.rs new file mode 100644 index 00000000000..59aa8b316f0 --- /dev/null +++ b/components/raftstore/src/store/fsm/life.rs @@ -0,0 +1,92 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains functions that relates to peer liftime management and +//! are shared with raftstore and raftstore v2. + +use engine_traits::{KvEngine, CF_RAFT}; +use kvproto::raft_serverpb::{ExtraMessageType, PeerState, RaftMessage, RegionLocalState}; + +use crate::store::util::is_epoch_stale; + +/// Tell leader that `to_peer` from `tombstone_msg` is destroyed. +pub fn build_peer_destroyed_report(tombstone_msg: &mut RaftMessage) -> Option { + let to_region_id = if tombstone_msg.has_extra_msg() { + assert_eq!( + tombstone_msg.get_extra_msg().get_type(), + ExtraMessageType::MsgGcPeerRequest + ); + tombstone_msg + .get_extra_msg() + .get_check_gc_peer() + .get_from_region_id() + } else { + tombstone_msg.get_region_id() + }; + if to_region_id == 0 || tombstone_msg.get_from_peer().get_id() == 0 { + return None; + } + let mut msg = RaftMessage::default(); + msg.set_region_id(to_region_id); + msg.set_from_peer(tombstone_msg.take_to_peer()); + msg.set_to_peer(tombstone_msg.take_from_peer()); + msg.mut_extra_msg() + .set_type(ExtraMessageType::MsgGcPeerResponse); + Some(msg) +} + +/// Forward the destroy request from target peer to merged source peer. +pub fn forward_destroy_to_source_peer(msg: &RaftMessage, forward: T) { + let extra_msg = msg.get_extra_msg(); + // Instead of respond leader directly, send a message to target region to + // double check it's really destroyed. + let check_gc_peer = extra_msg.get_check_gc_peer(); + let mut tombstone_msg = RaftMessage::default(); + tombstone_msg.set_region_id(check_gc_peer.get_check_region_id()); + tombstone_msg.set_from_peer(msg.get_from_peer().clone()); + tombstone_msg.set_to_peer(check_gc_peer.get_check_peer().clone()); + tombstone_msg.set_region_epoch(check_gc_peer.get_check_region_epoch().clone()); + tombstone_msg.set_is_tombstone(true); + // No need to set epoch as we don't know what it is. + // This message will not be handled by `on_gc_peer_request` due to + // `is_tombstone` being true. + tombstone_msg + .mut_extra_msg() + .set_type(ExtraMessageType::MsgGcPeerRequest); + tombstone_msg + .mut_extra_msg() + .mut_check_gc_peer() + .set_from_region_id(check_gc_peer.get_from_region_id()); + forward(tombstone_msg); +} + +pub fn handle_tombstone_message_on_learner( + engine: &EK, + store_id: u64, + mut msg: RaftMessage, +) -> Option { + let region_id = msg.get_region_id(); + let region_state_key = keys::region_state_key(region_id); + let local_state: RegionLocalState = match engine.get_msg_cf(CF_RAFT, ®ion_state_key) { + Ok(Some(s)) => s, + e => panic!( + "[store {}] failed to get regions state of {:?}: {:?}", + store_id, + msg.get_region_id(), + e + ), + }; + + if local_state.get_state() != PeerState::Tombstone { + return None; + } + + // In v2, we rely on leader to confirm destroy actively. + let local_epoch = local_state.get_region().get_region_epoch(); + // The region in this peer is already destroyed + if msg.get_region_epoch() == local_epoch || is_epoch_stale(msg.get_region_epoch(), local_epoch) + { + return build_peer_destroyed_report(&mut msg); + } + + None +} diff --git a/components/raftstore/src/store/fsm/mod.rs b/components/raftstore/src/store/fsm/mod.rs index 6f51c97c0d5..f342c1ec733 100644 --- a/components/raftstore/src/store/fsm/mod.rs +++ b/components/raftstore/src/store/fsm/mod.rs @@ -5,6 +5,7 @@ //! stores. They are mixed for now, will be separated in the future. pub mod apply; +pub mod life; mod metrics; mod peer; pub mod store; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index f2d1c7ffc0e..72eb3c59753 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -64,6 +64,7 @@ use tracker::GLOBAL_TRACKERS; use txn_types::WriteBatchFlags; use self::memtrace::*; +use super::life::forward_destroy_to_source_peer; #[cfg(any(test, feature = "testexport"))] use crate::store::PeerInternalStat; use crate::{ @@ -2740,6 +2741,25 @@ where } } + // In v1, gc_peer_request is handled to be compatible with v2. + // Note: it needs to be consistent with Peer::on_gc_peer_request in v2. + fn on_gc_peer_request(&mut self, msg: RaftMessage) { + let extra_msg = msg.get_extra_msg(); + + if !extra_msg.has_check_gc_peer() || extra_msg.get_index() == 0 { + // Corrupted message. + return; + } + if self.fsm.peer.get_store().applied_index() < extra_msg.get_index() { + // Merge not finish. + return; + } + + forward_destroy_to_source_peer(&msg, |m| { + let _ = self.ctx.router.send_raft_message(m); + }); + } + fn on_extra_message(&mut self, mut msg: RaftMessage) { match msg.get_extra_msg().get_type() { ExtraMessageType::MsgRegionWakeUp | ExtraMessageType::MsgCheckStalePeer => { @@ -2795,10 +2815,15 @@ where ExtraMessageType::MsgVoterReplicatedIndexResponse => { self.on_voter_replicated_index_response(msg.get_extra_msg()); } + ExtraMessageType::MsgGcPeerRequest => { + // To make learner (e.g. tiflash engine) compatiable with raftstore v2, + // it needs to response GcPeerResponse. + if self.ctx.cfg.enable_v2_compatible_learner { + self.on_gc_peer_request(msg); + } + } // It's v2 only message and ignore does no harm. - ExtraMessageType::MsgGcPeerRequest - | ExtraMessageType::MsgGcPeerResponse - | ExtraMessageType::MsgFlushMemtable => (), + ExtraMessageType::MsgGcPeerResponse | ExtraMessageType::MsgFlushMemtable => (), } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index c64b2a53c37..03c0688e8f2 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -78,6 +78,7 @@ use crate::{ config::Config, fsm::{ create_apply_batch_system, + life::handle_tombstone_message_on_learner, metrics::*, peer::{ maybe_destroy_source, new_admin_request, PeerFsm, PeerFsmDelegate, SenderFsmPair, @@ -2072,6 +2073,23 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER .inc(); return Ok(()); } + + // To make learner (e.g. tiflash engine) compatiable with raftstore v2, + // it needs to response GcPeerResponse. + if msg.get_is_tombstone() && self.ctx.cfg.enable_v2_compatible_learner { + if let Some(msg) = + handle_tombstone_message_on_learner(&self.ctx.engines.kv, self.fsm.store.id, msg) + { + let _ = self.ctx.trans.send(msg); + } + // else { + // TODO: we should create the peer and destroy immediately to leave + // a tombstone record, otherwise it leaks removed_record + // and merged_record. + // } + return Ok(()); + } + if msg.get_is_tombstone() || msg.has_merge_target() { // Target tombstone peer doesn't exist, so ignore it. return Ok(()); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index d889047a0f9..0696e70b766 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -4,7 +4,7 @@ use std::{ collections::{ BTreeMap, Bound::{Excluded, Included, Unbounded}, - HashMap, VecDeque, + VecDeque, }, fmt::{self, Display, Formatter}, sync::{ @@ -16,6 +16,7 @@ use std::{ u64, }; +use collections::HashMap; use engine_traits::{DeleteStrategy, KvEngine, Mutable, Range, WriteBatch, CF_LOCK, CF_RAFT}; use fail::fail_point; use file_system::{IoType, WithIoType}; @@ -803,14 +804,10 @@ where } else { let is_tiflash = self.pd_client.as_ref().map_or(false, |pd_client| { if let Ok(s) = pd_client.get_store(to_store_id) { - if let Some(_l) = s.get_labels().iter().find(|l| { - l.key.to_lowercase() == ENGINE - && l.value.to_lowercase() == TIFLASH - }) { - return true; - } else { - return false; - } + return s.get_labels().iter().any(|label| { + label.get_key().to_lowercase() == ENGINE + && label.get_value().to_lowercase() == TIFLASH + }); } true }); diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index 08657f7e75a..5f6703afe05 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -11,6 +11,7 @@ mod test_flashback; mod test_hibernate; mod test_joint_consensus; mod test_lease_read; +mod test_life; mod test_merge; mod test_multi; mod test_prevote; diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs new file mode 100644 index 00000000000..de394325f08 --- /dev/null +++ b/tests/integrations/raftstore/test_life.rs @@ -0,0 +1,126 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; + +use kvproto::raft_serverpb::{PeerState, RaftMessage}; +use raftstore::errors::Result; +use test_raftstore::{new_learner_peer, sleep_ms, Filter, FilterFactory, Simulator as S1}; +use test_raftstore_v2::Simulator as S2; +use tikv_util::time::Instant; + +struct ForwardFactory { + node_id: u64, + chain_send: Arc, +} + +impl FilterFactory for ForwardFactory { + fn generate(&self, _: u64) -> Vec> { + vec![Box::new(ForwardFilter { + node_id: self.node_id, + chain_send: self.chain_send.clone(), + })] + } +} + +struct ForwardFilter { + node_id: u64, + chain_send: Arc, +} + +impl Filter for ForwardFilter { + fn before(&self, msgs: &mut Vec) -> Result<()> { + for m in msgs.drain(..) { + if self.node_id == m.get_to_peer().get_store_id() { + (self.chain_send)(m); + } + } + Ok(()) + } +} + +// Create two clusters in v1 and v2, mock tiflash engine by adding tiflash +// labels to v1 cluster. Forwards v2 leader messages to v1 learner, and v1 +// learner messages to v2 leaders. +// Make sure when removing learner, v2 leader can clean up removed_record and +// merged_record eventually. +#[test] +fn test_gc_peer_tiflash_engine() { + let mut cluster_v1 = test_raftstore::new_node_cluster(1, 2); + let mut cluster_v2 = test_raftstore_v2::new_node_cluster(1, 2); + cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; + cluster_v1.pd_client.disable_default_operator(); + cluster_v2.pd_client.disable_default_operator(); + let r11 = cluster_v1.run_conf_change(); + let r21 = cluster_v2.run_conf_change(); + + // Add learner (2, 10). + cluster_v1 + .pd_client + .must_add_peer(r11, new_learner_peer(2, 10)); + cluster_v2 + .pd_client + .must_add_peer(r21, new_learner_peer(2, 10)); + // Make sure learner states are match. + let start = Instant::now(); + loop { + if cluster_v1.get_raft_local_state(r11, 2).is_some() + && cluster_v1.get_raft_local_state(r11, 2) == cluster_v2.get_raft_local_state(r21, 2) + && cluster_v1.region_local_state(r11, 2).state == PeerState::Normal + && cluster_v2.region_local_state(r21, 2).state == PeerState::Normal + && cluster_v1.apply_state(r11, 2).truncated_state + == cluster_v2.apply_state(r21, 2).truncated_state + { + break; + } + if start.saturating_elapsed() > Duration::from_secs(5) { + panic!("timeout"); + } + } + + let trans1 = Mutex::new(cluster_v1.sim.read().unwrap().get_router(2).unwrap()); + let trans2 = Mutex::new(cluster_v2.sim.read().unwrap().get_router(1).unwrap()); + + // For cluster 1, it intercepts msgs sent to leader node, and then + // forwards to cluster 2 leader node. + let factory1 = ForwardFactory { + node_id: 1, + chain_send: Arc::new(move |m| { + info!("send to trans2"; "msg" => ?m); + let _ = trans2.lock().unwrap().send_raft_message(Box::new(m)); + }), + }; + cluster_v1.add_send_filter(factory1); + // For cluster 2, it intercepts msgs sent to learner node, and then + // forwards to cluster 1 learner node. + let factory2 = ForwardFactory { + node_id: 2, + chain_send: Arc::new(move |m| { + info!("send to trans1"; "msg" => ?m); + let _ = trans1.lock().unwrap().send_raft_message(m); + }), + }; + cluster_v2.add_send_filter(factory2); + + cluster_v2 + .pd_client + .must_remove_peer(r21, new_learner_peer(2, 10)); + + // Make sure leader cleans up removed_records. + let start = Instant::now(); + loop { + sleep_ms(500); + if cluster_v2 + .region_local_state(r21, 1) + .get_removed_records() + .is_empty() + { + break; + } + if start.saturating_elapsed() > Duration::from_secs(5) { + panic!("timeout"); + } + } +} From 5dc8360d9f778ba52e409fcee966a838257d38c2 Mon Sep 17 00:00:00 2001 From: Rustin Date: Tue, 25 Apr 2023 15:23:51 +0800 Subject: [PATCH 0664/1149] cdc: support filter lossy DDL changes (#14629) close tikv/tikv#14630 cdc: support filter lossy DDL changes. We don't need to send those changes downstream. Signed-off-by: hi-rustin Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/delegate.rs | 126 ++++++++++++++++++++++++++++-- components/cdc/src/initializer.rs | 37 +++++++-- components/cdc/src/lib.rs | 1 + components/cdc/src/txn_source.rs | 116 +++++++++++++++++++++++++++ 4 files changed, 269 insertions(+), 11 deletions(-) create mode 100644 components/cdc/src/txn_source.rs diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index c4212c426be..adca54dace0 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -39,6 +39,7 @@ use crate::{ metrics::*, old_value::{OldValueCache, OldValueCallback}, service::ConnId, + txn_source::TxnSource, Error, Result, }; @@ -550,8 +551,10 @@ impl Delegate { row_size = 0; } } - // if the `txn_source` is not 0 and we should filter it out, skip this event. - if row.txn_source != 0 && filter_loop { + let lossy_ddl_filter = TxnSource::is_lossy_ddl_reorg_source_set(row.txn_source); + let cdc_write_filter = + TxnSource::is_cdc_write_source_set(row.txn_source) && filter_loop; + if lossy_ddl_filter || cdc_write_filter { continue; } if current_rows_size + row_size >= CDC_EVENT_MAX_BYTES { @@ -648,6 +651,14 @@ impl Delegate { return Ok(()); } + // Filter the entries which are lossy DDL events. + // We don't need to send them to downstream. + let entries = entries + .iter() + .filter(|x| !TxnSource::is_lossy_ddl_reorg_source_set(x.txn_source)) + .cloned() + .collect::>(); + let downstreams = self.downstreams(); assert!( !downstreams.is_empty(), @@ -655,15 +666,15 @@ impl Delegate { self.region_id ); - // collect the change event cause by user write, which is `txn_source` = 0. - // for changefeed which only need the user write, send the `filtered`, or else, - // send them all. + // Collect the change event cause by user write, which cdc write source is not + // set. For changefeed which only need the user write, + // send the `filtered_entries`, or else, send them all. let mut filtered_entries = None; for downstream in downstreams { if downstream.filter_loop { let filtered = entries .iter() - .filter(|x| x.txn_source == 0) + .filter(|x| !TxnSource::is_cdc_write_source_set(x.txn_source)) .cloned() .collect::>(); if !filtered.is_empty() { @@ -692,9 +703,11 @@ impl Delegate { } else { downstream.observed_range.filter_entries(entries.clone()) }; + if entries_clone.is_empty() { return Ok(()); } + let event = Event { region_id, index, @@ -1468,6 +1481,107 @@ mod tests { assert_eq!(e.events[0].get_entries().get_entries().len(), 2, "{:?}", e); } + fn test_downstream_txn_source_filter(txn_source: TxnSource, filter_loop: bool) { + // Create a new delegate that observes [a, f). + let observed_range = ObservedRange::new( + Key::from_raw(b"a").into_encoded(), + Key::from_raw(b"f").into_encoded(), + ) + .unwrap(); + let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); + let mut delegate = Delegate::new(1, txn_extra_op); + assert!(delegate.handle.is_observing()); + + let mut map = HashMap::default(); + for k in b'a'..=b'e' { + let mut put = PutRequest::default(); + put.key = Key::from_raw(&[k]).into_encoded(); + put.cf = "lock".to_owned(); + let mut lock = Lock::new( + LockType::Put, + put.key.clone(), + 1.into(), + 10, + None, + TimeStamp::zero(), + 0, + TimeStamp::zero(), + ); + // Only the key `a` is a normal write. + if k != b'a' { + lock = lock.set_txn_source(txn_source.into()); + } + put.value = lock.to_bytes(); + delegate + .sink_txn_put( + put, + false, + &mut map, + |_: &mut EventRow, _: TimeStamp| Ok(()), + ) + .unwrap(); + } + assert_eq!(map.len(), 5); + + let (sink, mut drain) = channel(1, MemoryQuota::new(1024)); + let downstream = Downstream { + id: DownstreamId::new(), + req_id: 1, + conn_id: ConnId::new(), + peer: String::new(), + region_epoch: RegionEpoch::default(), + sink: Some(sink), + state: Arc::new(AtomicCell::new(DownstreamState::Normal)), + kv_api: ChangeDataRequestKvApi::TiDb, + filter_loop, + observed_range, + }; + delegate.add_downstream(downstream); + let entries = map.values().map(|(r, _)| r).cloned().collect(); + delegate + .sink_downstream(entries, 1, ChangeDataRequestKvApi::TiDb) + .unwrap(); + + let (mut tx, mut rx) = futures::channel::mpsc::unbounded(); + let runtime = tokio::runtime::Runtime::new().unwrap(); + runtime.spawn(async move { + drain.forward(&mut tx).await.unwrap(); + }); + let (e, _) = recv_timeout(&mut rx, std::time::Duration::from_secs(5)) + .unwrap() + .unwrap(); + assert_eq!(e.events[0].get_entries().get_entries().len(), 1, "{:?}", e); + } + + #[test] + fn test_downstream_filter_cdc_write_entires() { + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + + test_downstream_txn_source_filter(txn_source, true); + } + + #[test] + fn test_downstream_filter_lossy_ddl_entires() { + let mut txn_source = TxnSource::default(); + txn_source.set_lossy_ddl_reorg_source(1); + test_downstream_txn_source_filter(txn_source, false); + + // With cdr write source and filter loop is false, we should still ignore lossy + // ddl changes. + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + txn_source.set_lossy_ddl_reorg_source(1); + test_downstream_txn_source_filter(txn_source, false); + + // With cdr write source and filter loop is true, we should still ignore some + // events. + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + txn_source.set_lossy_ddl_reorg_source(1); + test_downstream_txn_source_filter(txn_source, true); + } + #[test] fn test_decode_rawkv() { let cases = vec![ diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 8f6f8ed38a7..c06b13424ba 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -591,6 +591,7 @@ mod tests { use tokio::runtime::{Builder, Runtime}; use super::*; + use crate::txn_source::TxnSource; struct ReceiverRunnable { tx: Sender, @@ -786,18 +787,16 @@ mod tests { worker.stop(); } - #[test] - fn test_initializer_filter_loop() { + fn test_initializer_txn_source_filter(txn_source: TxnSource, filter_loop: bool) { let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); let mut total_bytes = 0; - for i in 10..100 { let (k, v) = (&[b'k', i], &[b'v', i]); total_bytes += k.len(); total_bytes += v.len(); let ts = TimeStamp::new(i as _); - must_prewrite_put_with_txn_soucre(&mut engine, k, v, k, ts, 1); + must_prewrite_put_with_txn_soucre(&mut engine, k, v, k, ts, txn_source.into()); } let snap = engine.snapshot(Default::default()).unwrap(); @@ -808,7 +807,7 @@ mod tests { buffer, engine.kv_engine(), ChangeDataRequestKvApi::TiDb, - true, + filter_loop, ); let th = pool.spawn(async move { initializer @@ -833,6 +832,34 @@ mod tests { worker.stop(); } + #[test] + fn test_initializer_cdc_write_filter() { + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + test_initializer_txn_source_filter(txn_source, true); + } + + #[test] + fn test_initializer_lossy_ddl_filter() { + let mut txn_source = TxnSource::default(); + txn_source.set_lossy_ddl_reorg_source(1); + test_initializer_txn_source_filter(txn_source, false); + + // With cdr write source and filter loop is false, we should still ignore lossy + // ddl changes. + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + txn_source.set_lossy_ddl_reorg_source(1); + test_initializer_txn_source_filter(txn_source, false); + + // With cdr write source and filter loop is true, we should still ignore all + // events. + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + txn_source.set_lossy_ddl_reorg_source(1); + test_initializer_txn_source_filter(txn_source, true); + } + // Test `hint_min_ts` works fine with `ExtraOp::ReadOldValue`. // Whether `DeltaScanner` emits correct old values or not is already tested by // another case `test_old_value_with_hint_min_ts`, so here we only care about diff --git a/components/cdc/src/lib.rs b/components/cdc/src/lib.rs index 7d63bf5c115..c913cefb92e 100644 --- a/components/cdc/src/lib.rs +++ b/components/cdc/src/lib.rs @@ -13,6 +13,7 @@ pub mod metrics; mod observer; mod old_value; mod service; +mod txn_source; pub use channel::{recv_timeout, CdcEvent, MemoryQuota}; pub use config::CdcConfigManager; diff --git a/components/cdc/src/txn_source.rs b/components/cdc/src/txn_source.rs new file mode 100644 index 00000000000..81dc9f95096 --- /dev/null +++ b/components/cdc/src/txn_source.rs @@ -0,0 +1,116 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +// The bitmap: +// |RESERVED|LOSSY_DDL_REORG_SOURCE_BITS|CDC_WRITE_SOURCE_BITS| +// | 48 | 8 | 4(RESERVED) | 4 | +// +// TiCDC uses 1 - 255 to indicate the source of TiDB. +// For now, 1 - 15 are reserved for TiCDC to implement BDR synchronization. +// 16 - 255 are reserved for extendability. +const CDC_WRITE_SOURCE_BITS: u64 = 8; +const CDC_WRITE_SOURCE_MAX: u64 = (1 << CDC_WRITE_SOURCE_BITS) - 1; + +// TiCDC uses 1-255 to indicate the change from a lossy DDL reorg Backfill job. +// For now, we only use 1 for column reorg backfill job. +#[cfg(test)] +const LOSSY_DDL_REORG_SOURCE_BITS: u64 = 8; +#[cfg(test)] +const LOSSY_DDL_COLUMN_REORG_SOURCE: u64 = 1; +#[cfg(test)] +const LOSSY_DDL_REORG_SOURCE_MAX: u64 = (1 << LOSSY_DDL_REORG_SOURCE_BITS) - 1; +const LOSSY_DDL_REORG_SOURCE_SHIFT: u64 = CDC_WRITE_SOURCE_BITS; + +/// For kv.TxnSource +/// We use an uint64 to represent the source of a transaction. +/// The first 8 bits are reserved for TiCDC, and the next 8 bits are reserved +/// for Lossy DDL reorg Backfill job. The remaining 48 bits are reserved for +/// extendability. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub(crate) struct TxnSource(u64); + +impl TxnSource { + #[cfg(test)] + pub(crate) fn set_cdc_write_source(&mut self, value: u64) { + if value > CDC_WRITE_SOURCE_MAX { + unreachable!("Only use it in tests") + } + self.0 |= value; + } + + #[cfg(test)] + pub(crate) fn get_cdc_write_source(&self) -> u64 { + self.0 & CDC_WRITE_SOURCE_MAX + } + + pub(crate) fn is_cdc_write_source_set(txn_source: u64) -> bool { + (txn_source & CDC_WRITE_SOURCE_MAX) != 0 + } + + #[cfg(test)] + pub(crate) fn set_lossy_ddl_reorg_source(&mut self, value: u64) { + if value > LOSSY_DDL_REORG_SOURCE_MAX { + unreachable!("Only use it in tests") + } + self.0 |= value << LOSSY_DDL_REORG_SOURCE_SHIFT; + } + + #[cfg(test)] + pub(crate) fn get_lossy_ddl_reorg_source(&self) -> u64 { + (self.0 >> LOSSY_DDL_REORG_SOURCE_SHIFT) & LOSSY_DDL_REORG_SOURCE_MAX + } + + pub(crate) fn is_lossy_ddl_reorg_source_set(txn_source: u64) -> bool { + (txn_source >> LOSSY_DDL_REORG_SOURCE_SHIFT) != 0 + } +} + +impl From for u64 { + fn from(val: TxnSource) -> Self { + val.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_cdc_write_source() { + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + assert_eq!(txn_source.get_cdc_write_source(), 1); + } + + #[test] + fn test_is_cdc_write_source_set() { + let mut txn_source = TxnSource::default(); + txn_source.set_cdc_write_source(1); + assert_eq!(TxnSource::is_cdc_write_source_set(txn_source.0), true); + + let txn_source = TxnSource::default(); + assert_eq!(TxnSource::is_cdc_write_source_set(txn_source.0), false); + } + + #[test] + fn test_get_lossy_ddl_reorg_source() { + let mut txn_source = TxnSource::default(); + txn_source.set_lossy_ddl_reorg_source(LOSSY_DDL_COLUMN_REORG_SOURCE); + assert_eq!( + txn_source.get_lossy_ddl_reorg_source(), + LOSSY_DDL_COLUMN_REORG_SOURCE + ); + } + + #[test] + fn test_is_lossy_ddl_reorg_source_set() { + let mut txn_source = TxnSource::default(); + txn_source.set_lossy_ddl_reorg_source(LOSSY_DDL_COLUMN_REORG_SOURCE); + assert_eq!(TxnSource::is_lossy_ddl_reorg_source_set(txn_source.0), true); + + let txn_source = TxnSource::default(); + assert_eq!( + TxnSource::is_lossy_ddl_reorg_source_set(txn_source.0), + false + ); + } +} From eb2ad9865a6f321a3612b07ca723436b99ea0255 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 26 Apr 2023 13:07:51 +0800 Subject: [PATCH 0665/1149] cloud: update dependencies on Azure. (#14610) close tikv/tikv#14609 Update the Azure SDK to latest version to support later developments. Signed-off-by: LykxSassinator --- Cargo.lock | 427 +++++++++++++++++++-------- components/cloud/azure/Cargo.toml | 13 +- components/cloud/azure/src/azblob.rs | 142 +++++---- 3 files changed, 413 insertions(+), 169 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bda2a12187d..48360c51100 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,6 +175,15 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-lock" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa24f727524730b077666307f2734b4a1a1c57acb79193127dcc8914d5242dd7" +dependencies = [ + "event-listener", +] + [[package]] name = "async-speed-limit" version = "0.4.0" @@ -229,18 +238,6 @@ dependencies = [ "syn", ] -[[package]] -name = "async-timer" -version = "1.0.0-beta.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d962799a5863fdf06fbf594e04102130582d010379137e9a98a7e2e693a5885" -dependencies = [ - "error-code", - "libc 0.2.139", - "wasm-bindgen", - "winapi 0.3.9", -] - [[package]] name = "async-trait" version = "0.1.58" @@ -282,7 +279,7 @@ name = "aws" version = "0.0.1" dependencies = [ "async-trait", - "base64", + "base64 0.13.0", "bytes", "cloud", "fail", @@ -364,91 +361,113 @@ dependencies = [ "azure_core", "azure_identity", "azure_storage", - "base64", - "chrono", + "azure_storage_blobs", + "base64 0.13.0", "cloud", "futures 0.3.15", "futures-util", "kvproto", "lazy_static", "oauth2", + "openssl", "regex", + "serde", + "serde_json", "slog", "slog-global", "tikv_util", + "time 0.3.20", "tokio", "url", + "uuid 1.2.1", ] [[package]] name = "azure_core" -version = "0.1.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#b3c53f4cec4a6b541e49388b51e696dc892f18a3" +version = "0.11.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" dependencies = [ "async-trait", - "base64", + "base64 0.21.0", "bytes", - "chrono", "dyn-clone", "futures 0.3.15", "getrandom 0.2.3", - "http", + "http-types", "log", - "oauth2", + "paste", + "pin-project", + "quick-xml 0.28.2", "rand 0.8.5", "reqwest", "rustc_version 0.4.0", "serde", - "serde_derive", "serde_json", - "thiserror", + "time 0.3.20", "url", - "uuid 0.8.2", + "uuid 1.2.1", ] [[package]] name = "azure_identity" -version = "0.1.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#b3c53f4cec4a6b541e49388b51e696dc892f18a3" +version = "0.11.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" dependencies = [ - "async-timer", + "async-lock", "async-trait", "azure_core", - "chrono", + "fix-hidden-lifetime-bug", "futures 0.3.15", "log", "oauth2", - "reqwest", + "pin-project", "serde", "serde_json", - "thiserror", + "time 0.3.20", "url", + "uuid 1.2.1", ] [[package]] name = "azure_storage" -version = "0.1.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#b3c53f4cec4a6b541e49388b51e696dc892f18a3" +version = "0.11.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" dependencies = [ "RustyXML", "async-trait", "azure_core", - "base64", "bytes", - "chrono", "futures 0.3.15", - "http", + "hmac 0.12.1", "log", - "md5", "once_cell", - "ring", "serde", - "serde-xml-rs", "serde_derive", "serde_json", - "thiserror", + "sha2 0.10.6", + "time 0.3.20", "url", - "uuid 0.8.2", + "uuid 1.2.1", +] + +[[package]] +name = "azure_storage_blobs" +version = "0.11.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" +dependencies = [ + "RustyXML", + "azure_core", + "azure_storage", + "bytes", + "futures 0.3.15", + "log", + "md5", + "serde", + "serde_derive", + "serde_json", + "time 0.3.20", + "url", + "uuid 1.2.1", ] [[package]] @@ -588,6 +607,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "base64" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" + [[package]] name = "batch-system" version = "0.1.0" @@ -700,6 +725,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "boolinator" version = "2.4.0" @@ -931,7 +965,7 @@ dependencies = [ "num-integer", "num-traits", "serde", - "time", + "time 0.1.42", ] [[package]] @@ -1113,6 +1147,15 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "cpufeatures" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +dependencies = [ + "libc 0.2.139", +] + [[package]] name = "cpuid-bool" version = "0.1.2" @@ -1282,6 +1325,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "crypto-mac" version = "0.10.0" @@ -1400,6 +1453,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "digest" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common", + "subtle", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -1556,7 +1620,7 @@ dependencies = [ "tempfile", "tikv_alloc", "tikv_util", - "time", + "time 0.1.42", "toml", "tracker", "txn_types", @@ -1711,16 +1775,6 @@ dependencies = [ "version_check 0.1.5", ] -[[package]] -name = "error-code" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5115567ac25674e0043e472be13d14e537f37ea8aa4bdc4aef0c89add1db1ff" -dependencies = [ - "libc 0.2.139", - "str-buf", -] - [[package]] name = "error_code" version = "0.0.1" @@ -1861,6 +1915,15 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f35ce9c8fb9891c75ceadbc330752951a4e369b50af10775955aeb9af3eee34b" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "ffi-support" version = "0.4.2" @@ -1935,6 +1998,26 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "fix-hidden-lifetime-bug" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ae9c2016a663983d4e40a9ff967d6dcac59819672f0b47f2b17574e99c33c8" +dependencies = [ + "fix-hidden-lifetime-bug-proc_macros", +] + +[[package]] +name = "fix-hidden-lifetime-bug-proc_macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -2106,6 +2189,21 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-macro" version = "0.3.15" @@ -2459,7 +2557,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" dependencies = [ "crypto-mac", - "digest", + "digest 0.9.0", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.6", ] [[package]] @@ -2501,6 +2608,26 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel", + "base64 0.13.0", + "futures-lite", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.8.0" @@ -2619,6 +2746,12 @@ dependencies = [ "hashbrown 0.9.1", ] +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "inferno" version = "0.11.3" @@ -2632,7 +2765,7 @@ dependencies = [ "lazy_static", "log", "num-format", - "quick-xml", + "quick-xml 0.22.0", "rgb", "str_stack", ] @@ -3064,8 +3197,8 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" dependencies = [ - "block-buffer", - "digest", + "block-buffer 0.9.0", + "digest 0.9.0", "opaque-debug", ] @@ -3549,22 +3682,30 @@ dependencies = [ "libc 0.2.139", ] +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc 0.2.139", +] + [[package]] name = "oauth2" version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e47cfc4c0a1a519d9a025ebfbac3a2439d1b5cdf397d72dcb79b11d9920dab" dependencies = [ - "base64", + "base64 0.13.0", "chrono", "getrandom 0.2.3", "http", "rand 0.8.5", - "reqwest", "serde", "serde_json", "serde_path_to_error", - "sha2", + "sha2 0.9.1", "thiserror", "url", ] @@ -3617,9 +3758,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openssl" -version = "0.10.41" +version = "0.10.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "618febf65336490dfcf20b73f885f5651a0c89c64c2d4a8c3662585a70bf5bd0" +checksum = "7e30d8bc91859781f0a943411186324d580f2bbeb71b452fe91ae344806af3f1" dependencies = [ "bitflags", "cfg-if 1.0.0", @@ -3658,11 +3799,10 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.75" +version = "0.9.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5f9bd0c2710541a3cda73d6f9ac4f1b240de4ae261065d309dbe73d9dceb42f" +checksum = "0d3d193fb1488ad46ffe3aaabc912cc931d02ee8518fe2959aea8ef52718b0c0" dependencies = [ - "autocfg", "cc", "libc 0.2.139", "openssl-src", @@ -3702,6 +3842,12 @@ dependencies = [ name = "panic_hook" version = "0.0.1" +[[package]] +name = "parking" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14f2252c834a40ed9bb5422029649578e63aa341ac401f74e719dd1afda8394e" + [[package]] name = "parking_lot" version = "0.11.1" @@ -4247,6 +4393,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.28.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.18" @@ -4346,7 +4502,7 @@ dependencies = [ "slog-global", "tempfile", "tikv_util", - "time", + "time 0.1.42", "tracker", ] @@ -4413,7 +4569,7 @@ dependencies = [ "tidb_query_datatype", "tikv_alloc", "tikv_util", - "time", + "time 0.1.42", "tokio", "tracker", "txn_types", @@ -4462,7 +4618,7 @@ dependencies = [ "test_util", "thiserror", "tikv_util", - "time", + "time 0.1.42", "tracker", "txn_types", "yatp", @@ -4693,7 +4849,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0460542b551950620a3648c6aa23318ac6b3cd779114bd873209e6e8b5eb1c34" dependencies = [ - "base64", + "base64 0.13.0", "bytes", "encoding_rs 0.8.29 (registry+https://github.com/rust-lang/crates.io-index)", "futures-core", @@ -4711,7 +4867,6 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde", - "serde_json", "serde_urlencoded", "tokio", "tokio-native-tls", @@ -4885,7 +5040,7 @@ version = "0.46.0" source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ "async-trait", - "base64", + "base64 0.13.0", "bytes", "crc32fast", "futures 0.3.15", @@ -4966,13 +5121,13 @@ name = "rusoto_signature" version = "0.46.0" source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" dependencies = [ - "base64", + "base64 0.13.0", "bytes", "chrono", - "digest", + "digest 0.9.0", "futures 0.3.15", "hex 0.4.2", - "hmac", + "hmac 0.10.1", "http", "hyper", "log", @@ -4982,7 +5137,7 @@ dependencies = [ "rusoto_credential", "rustc_version 0.3.3", "serde", - "sha2", + "sha2 0.9.1", "tokio", ] @@ -5203,25 +5358,13 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.106" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399" +checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" dependencies = [ "serde_derive", ] -[[package]] -name = "serde-xml-rs" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0bf1ba0696ccf0872866277143ff1fd14d22eec235d2b23702f95e6660f7dfa" -dependencies = [ - "log", - "serde", - "thiserror", - "xml-rs", -] - [[package]] name = "serde_cbor" version = "0.11.1" @@ -5234,9 +5377,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.106" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c" +checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" dependencies = [ "proc-macro2", "quote", @@ -5273,6 +5416,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror", +] + [[package]] name = "serde_repr" version = "0.1.9" @@ -5381,13 +5535,24 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" dependencies = [ - "block-buffer", + "block-buffer 0.9.0", "cfg-if 0.1.10", "cpuid-bool", - "digest", + "digest 0.9.0", "opaque-debug", ] +[[package]] +name = "sha2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest 0.10.6", +] + [[package]] name = "shlex" version = "0.1.1" @@ -5638,12 +5803,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "str-buf" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d44a3643b4ff9caf57abcee9c2c621d6c03d9135e0d8b589bd9afb5992cb176a" - [[package]] name = "str_stack" version = "0.1.0" @@ -5737,9 +5896,9 @@ dependencies = [ [[package]] name = "subtle" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343f3f510c2915908f155e94f17220b19ccfacf2a64a2a5d8004f2c3e311e7fd" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "symbolic-common" @@ -5807,7 +5966,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d20ec2d6525a66afebdff9e1d8ef143c9deae9a3b040c61d3cfa9ae6fda80060" dependencies = [ - "base64", + "base64 0.13.0", "bytes", "chrono", "futures-util", @@ -5827,7 +5986,7 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9435c9348e480fad0f2215d5602e2dfad03df8a6398c4e7ceaeaa42758f26a8a" dependencies = [ - "base64", + "base64 0.13.0", "chrono", "http", "lock_api", @@ -6135,7 +6294,7 @@ dependencies = [ "slog-global", "tempfile", "tikv_util", - "time", + "time 0.1.42", ] [[package]] @@ -6213,7 +6372,7 @@ dependencies = [ "tikv", "tikv_kv", "tikv_util", - "time", + "time 0.1.42", "tipb", "tipb_helper", "tokio", @@ -6311,7 +6470,7 @@ dependencies = [ "serde_json", "thiserror", "tikv_util", - "time", + "time 0.1.42", "yatp", ] @@ -6320,7 +6479,7 @@ name = "tidb_query_datatype" version = "0.0.1" dependencies = [ "api_version", - "base64", + "base64 0.13.0", "bitfield", "bitflags", "boolinator", @@ -6390,7 +6549,7 @@ dependencies = [ name = "tidb_query_expr" version = "0.0.1" dependencies = [ - "base64", + "base64 0.13.0", "bstr", "byteorder", "chrono", @@ -6416,7 +6575,7 @@ dependencies = [ "tidb_query_common", "tidb_query_datatype", "tikv_util", - "time", + "time 0.1.42", "tipb", "tipb_helper", "twoway", @@ -6537,7 +6696,7 @@ dependencies = [ "tikv_alloc", "tikv_kv", "tikv_util", - "time", + "time 0.1.42", "tipb", "tokio", "tokio-openssl", @@ -6598,7 +6757,7 @@ dependencies = [ "tikv", "tikv_alloc", "tikv_util", - "time", + "time 0.1.42", "tokio", "toml", "txn_types", @@ -6645,7 +6804,7 @@ dependencies = [ "serde_json", "server", "tikv", - "time", + "time 0.1.42", "toml", ] @@ -6756,7 +6915,7 @@ dependencies = [ "tempfile", "thiserror", "tikv_alloc", - "time", + "time 0.1.42", "tokio", "tokio-executor", "tokio-timer", @@ -6778,6 +6937,35 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "time" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +dependencies = [ + "itoa 1.0.1", + "libc 0.2.139", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +dependencies = [ + "time-core", +] + [[package]] name = "tinytemplate" version = "1.2.0" @@ -6936,7 +7124,7 @@ dependencies = [ "async-stream 0.3.3", "async-trait", "axum", - "base64", + "base64 0.13.0", "bytes", "futures-core", "futures-util", @@ -7124,9 +7312,9 @@ dependencies = [ [[package]] name = "typenum" -version = "1.12.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "ucd-trie" @@ -7221,6 +7409,9 @@ name = "uuid" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" +dependencies = [ + "getrandom 0.2.3", +] [[package]] name = "valgrind_request" @@ -7262,6 +7453,12 @@ dependencies = [ "syn", ] +[[package]] +name = "waker-fn" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" + [[package]] name = "walkdir" version = "2.3.1" diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 0a45ccc2c63..b9ba7732e9e 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -6,20 +6,25 @@ publish = false [dependencies] async-trait = "0.1" -azure_core = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_identity = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_storage = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false, features = ["account", "blob"] } +azure_core = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_identity = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_storage = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } +azure_storage_blobs = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } base64 = "0.13" -chrono = "0.4" cloud = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { workspace = true } lazy_static = "1.4.0" oauth2 = { version = "4.0.0", default-features = false } +openssl = { version = "0.10.50" } regex = "1" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" slog = { workspace = true } slog-global = { workspace = true } tikv_util = { workspace = true } +time = { version = "0.3", features = ["local-offset"] } tokio = { version = "1.5", features = ["time"] } url = "2.0" +uuid = { version = "1.0", features = ["v4"] } diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 47d2d731da8..7f7483a3e8a 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -8,17 +8,15 @@ use std::{ use async_trait::async_trait; use azure_core::{ auth::{TokenCredential, TokenResponse}, - prelude::*, + new_http_client, }; -use azure_identity::token_credentials::{ClientSecretCredential, TokenCredentialOptions}; -use azure_storage::{ - blob::prelude::*, - core::{prelude::*, ConnectionStringBuilder}, -}; -use chrono::{Duration as ChronoDuration, Utc}; +use azure_identity::{ClientSecretCredential, TokenCredentialOptions}; +use azure_storage::{prelude::*, ConnectionString, ConnectionStringBuilder}; +use azure_storage_blobs::prelude::*; use cloud::blob::{ none_to_empty, BlobConfig, BlobStorage, BucketConf, PutResource, StringNonEmpty, }; +use futures::TryFutureExt; use futures_util::{ io::{AsyncRead, AsyncReadExt}, stream, @@ -33,6 +31,7 @@ use tikv_util::{ debug, stream::{retry, RetryError}, }; +use time::OffsetDateTime; use tokio::{ sync::Mutex, time::{timeout, Duration}, @@ -310,10 +309,9 @@ impl AzureUploader { .get_client() .await .map_err(|e| e.to_string())? - .as_blob_client(&self.name) + .blob_client(&self.name) .put_block_blob(data.to_vec()) .access_tier(self.storage_class) - .execute() .await?; Ok(()) }) @@ -414,13 +412,13 @@ impl ContainerBuilder for TokenCredContainerBuilder { { let token_response = self.token_cache.read().unwrap(); if let Some(ref t) = *token_response { - let interval = t.0.expires_on - Utc::now(); + let interval = (t.0.expires_on - OffsetDateTime::now_utc()).whole_minutes(); // keep token updated 5 minutes before it expires - if interval > ChronoDuration::minutes(TOKEN_UPDATE_LEFT_TIME_MINS) { + if interval > TOKEN_UPDATE_LEFT_TIME_MINS { return Ok(t.1.clone()); } - if interval > ChronoDuration::minutes(TOKEN_EXPIRE_LEFT_TIME_MINS) { + if interval > TOKEN_EXPIRE_LEFT_TIME_MINS { // there still have time to use the token, // and only need one thread to update token. if let Ok(l) = self.modify_place.try_lock() { @@ -443,9 +441,9 @@ impl ContainerBuilder for TokenCredContainerBuilder { { let token_response = self.token_cache.read().unwrap(); if let Some(ref t) = *token_response { - let interval = t.0.expires_on - Utc::now(); + let interval = (t.0.expires_on - OffsetDateTime::now_utc()).whole_minutes(); // token is already updated - if interval > ChronoDuration::minutes(TOKEN_UPDATE_LEFT_TIME_MINS) { + if interval > TOKEN_UPDATE_LEFT_TIME_MINS { return Ok(t.1.clone()); } } @@ -457,14 +455,12 @@ impl ContainerBuilder for TokenCredContainerBuilder { .get_token(&self.token_resource) .await .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", &e)))?; - let http_client = new_http_client(); - let storage_client = StorageAccountClient::new_bearer_token( - http_client, + let blob_service = BlobServiceClient::new( self.account_name.clone(), - token.token.secret(), - ) - .as_storage_client() - .as_container_client(self.container_name.clone()); + StorageCredentials::BearerToken(token.token.secret().into()), + ); + let storage_client = + Arc::new(blob_service.container_client(self.container_name.clone())); { let mut token_response = self.token_cache.write().unwrap(); @@ -493,22 +489,54 @@ impl AzureStorage { Self::new(Config::from_input(input)?) } + /// Mock a dummpy AzureStorage with a shared key Config for + /// testing by Azurite tool. + /// + /// This function should only be used for testing Blob with a + /// local Azurite server. + #[cfg(test)] + #[allow(dead_code)] + fn from_dummy_input(input: InputConfig) -> io::Result { + let config = Config::from_input(input)?; + let bucket = (*config.bucket.bucket).to_owned(); + Ok(AzureStorage { + config, + client_builder: Arc::new(SharedKeyContainerBuilder { + container_client: Arc::new( + ClientBuilder::emulator() + .blob_service_client() + .container_client(bucket), + ), + }), + }) + } + pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) } pub fn new(config: Config) -> io::Result { + let bucket = (*config.bucket.bucket).to_owned(); // priority: explicit shared key > env Azure AD > env shared key if let Some(connection_string) = config.parse_plaintext_account_url() { - let bucket = (*config.bucket.bucket).to_owned(); - let http_client = new_http_client(); - let container_client = StorageAccountClient::new_connection_string( - http_client.clone(), - connection_string.as_str(), - ) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", &e)))? - .as_storage_client() - .as_container_client(bucket); + let account_name = config.get_account_name()?; + let storage_credentials = ConnectionString::new(&connection_string) + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidInput, + format!("invalid configurations for SharedKey, err: {}", e), + ) + })? + .storage_credentials() + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidInput, + format!("invalid credentials for blob, err: {}", e), + ) + })?; + let container_client = Arc::new( + BlobServiceClient::new(account_name, storage_credentials).container_client(bucket), + ); let client_builder = Arc::new(SharedKeyContainerBuilder { container_client }); Ok(AzureStorage { @@ -516,10 +544,10 @@ impl AzureStorage { client_builder, }) } else if let Some(credential_info) = config.credential_info.as_ref() { - let bucket = (*config.bucket.bucket).to_owned(); let account_name = config.get_account_name()?; let token_resource = format!("https://{}.blob.core.windows.net", &account_name); let cred = ClientSecretCredential::new( + new_http_client(), credential_info.tenant_id.clone(), credential_info.client_id.to_string(), credential_info.client_secret.secret().clone(), @@ -538,15 +566,24 @@ impl AzureStorage { client_builder, }) } else if let Some(connection_string) = config.parse_env_plaintext_account_url() { - let bucket = (*config.bucket.bucket).to_owned(); - let http_client = new_http_client(); - let container_client = StorageAccountClient::new_connection_string( - http_client.clone(), - connection_string.as_str(), - ) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", &e)))? - .as_storage_client() - .as_container_client(bucket); + let account_name = config.get_account_name()?; + let storage_credentials = ConnectionString::new(&connection_string) + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidInput, + format!("invald configurations for SharedKey from ENV, err: {}", e), + ) + })? + .storage_credentials() + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidInput, + format!("invalid credentials for blob, err: {}", e), + ) + })?; + let container_client = Arc::new( + BlobServiceClient::new(account_name, storage_credentials).container_client(bucket), + ); let client_builder = Arc::new(SharedKeyContainerBuilder { container_client }); Ok(AzureStorage { @@ -576,7 +613,7 @@ impl AzureStorage { let name = self.maybe_prefix_key(name); debug!("read file from Azure storage"; "key" => %name); let t = async move { - let blob_client = self.client_builder.get_client().await?.as_blob_client(name); + let blob_client = self.client_builder.get_client().await?.blob_client(name); let builder = if let Some(r) = range { blob_client.get().range(r) @@ -584,15 +621,20 @@ impl AzureStorage { blob_client.get() }; - builder - .execute() - .await - .map(|res| res.data) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", e))) + let mut chunk: Vec = vec![]; + let mut stream = builder.into_stream(); + while let Some(value) = stream.next().await { + let value = value?.data.collect().await?; + chunk.extend(&value); + } + azure_core::Result::Ok(chunk) }; - let k = stream::once(t); - let t = k.boxed().into_async_read(); - Box::new(t) + let stream = stream::once( + t.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", e))), + ) + .boxed() + .into_async_read(); + Box::new(stream) } } @@ -716,7 +758,7 @@ mod tests { input.set_endpoint("http://127.0.0.1:10000/devstoreaccount1".to_owned()); input.set_prefix("backup 01/prefix/".to_owned()); - let storage = AzureStorage::from_input(input).unwrap(); + let storage = AzureStorage::from_dummy_input(input).unwrap(); assert_eq!(storage.maybe_prefix_key("t"), "backup 01/prefix/t"); let mut magic_contents = String::new(); for _ in 0..4096 { From b96fe4de8c028e9731eb7ee9c1a158cccd3ee8ea Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 26 Apr 2023 13:37:51 +0800 Subject: [PATCH 0666/1149] update pprof (#14635) close tikv/tikv#14224 Fix fd leak caused by continuous profiling Signed-off-by: tabokie --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48360c51100..269b749145f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4129,9 +4129,9 @@ dependencies = [ [[package]] name = "pprof" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e20150f965e0e4c925982b9356da71c84bcd56cb66ef4e894825837cbcf6613e" +checksum = "196ded5d4be535690899a4631cc9f18cdc41b7ebf24a79400f46f48e49a11059" dependencies = [ "backtrace", "cfg-if 1.0.0", @@ -4139,7 +4139,7 @@ dependencies = [ "inferno", "libc 0.2.139", "log", - "nix 0.24.1", + "nix 0.26.2", "once_cell", "parking_lot 0.12.1", "protobuf", From 8656623b8b9c9a590b9f61aedcce23ee38ed5023 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Wed, 26 Apr 2023 14:21:52 +0800 Subject: [PATCH 0667/1149] txn: Check whether the primary matches when handling check_txn_status requests (#14637) close tikv/tikv#14636, ref pingcap/tidb#42937 Makes TiKV support checking whether the lock is primary when handling check_txn_status. Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- components/error_code/src/storage.rs | 2 + etc/error_code.toml | 5 ++ src/storage/errors.rs | 7 ++ src/storage/mod.rs | 10 ++- src/storage/mvcc/mod.rs | 5 ++ src/storage/txn/actions/check_txn_status.rs | 8 ++ src/storage/txn/commands/check_txn_status.rs | 88 ++++++++++++++++---- src/storage/txn/commands/mod.rs | 1 + 9 files changed, 110 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 269b749145f..7e5ea1bc862 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2930,7 +2930,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#10e7620a630db63d769503ba99c7389f19fb6516" +source = "git+https://github.com/pingcap/kvproto.git#14ac513b9eff75028da1a56f54d36bfb082ac54f" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/storage.rs b/components/error_code/src/storage.rs index e2cf34094c3..8b41e7a797e 100644 --- a/components/error_code/src/storage.rs +++ b/components/error_code/src/storage.rs @@ -43,5 +43,7 @@ define_error_codes!( ASSERTION_FAILED => ("AssertionFailed", "", ""), LOCK_IF_EXISTS_FAILED => ("LockIfExistsFailed", "", ""), + PRIMARY_MISMATCH => ("PrimaryMismatch", "", ""), + UNKNOWN => ("Unknown", "", "") ); diff --git a/etc/error_code.toml b/etc/error_code.toml index 4fae4d9ea57..839c4f33f32 100644 --- a/etc/error_code.toml +++ b/etc/error_code.toml @@ -753,6 +753,11 @@ error = ''' KV:Storage:LockIfExistsFailed ''' +["KV:Storage:PrimaryMismatch"] +error = ''' +KV:Storage:PrimaryMismatch +''' + ["KV:Storage:Unknown"] error = ''' KV:Storage:Unknown diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 92568d22e45..07ea4b5589e 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -424,6 +424,13 @@ pub fn extract_key_error(err: &Error) -> kvrpcpb::KeyError { assertion_failed.set_existing_commit_ts(existing_commit_ts.into_inner()); key_error.set_assertion_failed(assertion_failed); } + Error(box ErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::PrimaryMismatch(lock_info), + ))))) => { + let mut primary_mismatch = kvrpcpb::PrimaryMismatch::default(); + primary_mismatch.set_lock_info(lock_info.clone()); + key_error.set_primary_mismatch(primary_mismatch); + } _ => { error!(?*err; "txn aborts"); key_error.set_abort(format!("{:?}", err)); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 37263ce9a12..897968ef671 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -7910,6 +7910,7 @@ mod tests { false, false, false, + true, Context::default(), ), expect_fail_callback(tx.clone(), 0, |e| match e { @@ -7936,6 +7937,7 @@ mod tests { true, false, false, + true, Context::default(), ), expect_value_callback(tx.clone(), 0, LockNotExist), @@ -7993,6 +7995,7 @@ mod tests { true, false, false, + true, Context::default(), ), expect_value_callback( @@ -8038,6 +8041,7 @@ mod tests { true, false, false, + true, Context::default(), ), expect_value_callback(tx.clone(), 0, committed(ts(20, 0))), @@ -8049,7 +8053,7 @@ mod tests { .sched_txn_command( commands::Prewrite::with_lock_ttl( vec![Mutation::make_put(k.clone(), v)], - k.as_encoded().to_vec(), + k.to_raw().unwrap(), ts(25, 0), 100, ), @@ -8069,6 +8073,7 @@ mod tests { true, false, false, + true, Context::default(), ), expect_value_callback(tx.clone(), 0, TtlExpire), @@ -9411,6 +9416,7 @@ mod tests { false, false, false, + true, Context::default(), ), expect_value_callback( @@ -9447,6 +9453,7 @@ mod tests { false, false, false, + true, Context::default(), ), expect_value_callback(tx.clone(), 0, TxnStatus::TtlExpire), @@ -9840,6 +9847,7 @@ mod tests { true, false, false, + true, Default::default(), ), expect_ok_callback(tx.clone(), 0), diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 0f133b99941..2f9a75b2a03 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -169,6 +169,9 @@ pub enum ErrorInner { )] LockIfExistsFailed { start_ts: TimeStamp, key: Vec }, + #[error("check_txn_status sent to secondary lock, current lock: {0:?}")] + PrimaryMismatch(kvproto::kvrpcpb::LockInfo), + #[error("{0:?}")] Other(#[from] Box), } @@ -298,6 +301,7 @@ impl ErrorInner { key: key.clone(), }) } + ErrorInner::PrimaryMismatch(l) => Some(ErrorInner::PrimaryMismatch(l.clone())), ErrorInner::Io(_) | ErrorInner::Other(_) => None, } } @@ -400,6 +404,7 @@ impl ErrorCodeExt for Error { ErrorInner::CommitTsTooLarge { .. } => error_code::storage::COMMIT_TS_TOO_LARGE, ErrorInner::AssertionFailed { .. } => error_code::storage::ASSERTION_FAILED, ErrorInner::LockIfExistsFailed { .. } => error_code::storage::LOCK_IF_EXISTS_FAILED, + ErrorInner::PrimaryMismatch(_) => error_code::storage::PRIMARY_MISMATCH, ErrorInner::Other(_) => error_code::storage::UNKNOWN, } } diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index a3cd3253201..b0e1ff66232 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -24,7 +24,15 @@ pub fn check_txn_status_lock_exists( caller_start_ts: TimeStamp, force_sync_commit: bool, resolving_pessimistic_lock: bool, + verify_is_primary: bool, ) -> Result<(TxnStatus, Option)> { + if verify_is_primary && !primary_key.is_encoded_from(&lock.primary) { + // Return the current lock info to tell the client what the actual primary is. + return Err( + ErrorInner::PrimaryMismatch(lock.into_lock_info(primary_key.into_raw()?)).into(), + ); + } + // Never rollback or push forward min_commit_ts in check_txn_status if it's // using async commit. Rollback of async-commit locks are done during // ResolveLock. diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 895c753b160..e915c0357d4 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -51,6 +51,11 @@ command! { // lock, the transaction status could not be decided if the primary lock is pessimistic too and // it's still uncertain. resolving_pessimistic_lock: bool, + // Whether it's needed to check wheter the lock on the key (if any) is the primary lock. + // This is for handling some corner cases when pessimistic transactions changes its primary + // (see https://github.com/pingcap/tidb/issues/42937 for details). + // Must be set to true, unless the client is old version that doesn't support this behavior. + verify_is_primary: bool, } } @@ -107,6 +112,7 @@ impl WriteCommand for CheckTxnStatus { self.caller_start_ts, self.force_sync_commit, self.resolving_pessimistic_lock, + self.verify_is_primary, )?, l => ( check_txn_status_missing_lock( @@ -145,7 +151,7 @@ impl WriteCommand for CheckTxnStatus { #[cfg(test)] pub mod tests { use concurrency_manager::ConcurrencyManager; - use kvproto::kvrpcpb::{Context, PrewriteRequestPessimisticAction::*}; + use kvproto::kvrpcpb::{self, Context, LockInfo, PrewriteRequestPessimisticAction::*}; use tikv_util::deadline::Deadline; use txn_types::{Key, WriteType}; @@ -153,8 +159,10 @@ pub mod tests { use crate::storage::{ kv::Engine, lock_manager::MockLockManager, + mvcc, mvcc::tests::*, txn::{ + self, commands::{pessimistic_rollback, WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, @@ -188,6 +196,7 @@ pub mod tests { rollback_if_not_exist, force_sync_commit, resolving_pessimistic_lock, + verify_is_primary: true, deadline: Deadline::from_now(DEFAULT_EXECUTION_DURATION_LIMIT), }; let result = command @@ -220,7 +229,7 @@ pub mod tests { rollback_if_not_exist: bool, force_sync_commit: bool, resolving_pessimistic_lock: bool, - ) { + ) -> txn::Error { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); let current_ts = current_ts.into(); @@ -235,23 +244,28 @@ pub mod tests { rollback_if_not_exist, force_sync_commit, resolving_pessimistic_lock, + verify_is_primary: true, deadline: Deadline::from_now(DEFAULT_EXECUTION_DURATION_LIMIT), }; - assert!( - command - .process_write( - snapshot, - WriteContext { - lock_mgr: &MockLockManager::new(), - concurrency_manager: cm, - extra_op: Default::default(), - statistics: &mut Default::default(), - async_apply_prewrite: false, - raw_ext: None, - }, + command + .process_write( + snapshot, + WriteContext { + lock_mgr: &MockLockManager::new(), + concurrency_manager: cm, + extra_op: Default::default(), + statistics: &mut Default::default(), + async_apply_prewrite: false, + raw_ext: None, + }, + ) + .map(|r| { + panic!( + "expected check_txn_status fail but succeeded with result: {:?}", + r.pr ) - .is_err() - ); + }) + .unwrap_err() } fn committed(commit_ts: impl Into) -> impl FnOnce(TxnStatus) -> bool { @@ -1188,4 +1202,46 @@ pub mod tests { assert!(rollback.last_change_ts.is_zero()); assert_eq!(rollback.versions_to_last_change, 0); } + + #[test] + fn test_verify_is_primary() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + + let check_lock = |l: LockInfo, key: &'_ [u8], primary: &'_ [u8], lock_type| { + assert_eq!(&l.key, key); + assert_eq!(l.lock_type, lock_type); + assert_eq!(&l.primary_lock, primary); + }; + + let check_error = |e, key: &'_ [u8], primary: &'_ [u8], lock_type| match e { + txn::Error(box txn::ErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::PrimaryMismatch(lock_info), + ))) => { + check_lock(lock_info, key, primary, lock_type); + } + e => panic!("unexpected error: {:?}", e), + }; + + must_acquire_pessimistic_lock(&mut engine, b"k1", b"k2", 1, 1); + let e = must_err(&mut engine, b"k1", 1, 1, 0, true, false, true); + check_error(e, b"k1", b"k2", kvrpcpb::Op::PessimisticLock); + let lock = must_pessimistic_locked(&mut engine, b"k1", 1, 1); + check_lock( + lock.into_lock_info(b"k1".to_vec()), + b"k1", + b"k2", + kvrpcpb::Op::PessimisticLock, + ); + + must_pessimistic_prewrite_put(&mut engine, b"k1", b"v1", b"k2", 1, 1, DoPessimisticCheck); + let e = must_err(&mut engine, b"k1", 1, 1, 0, true, false, true); + check_error(e, b"k1", b"k2", kvrpcpb::Op::Put); + let lock = must_locked(&mut engine, b"k1", 1); + check_lock( + lock.into_lock_info(b"k1".to_vec()), + b"k1", + b"k2", + kvrpcpb::Op::Put, + ); + } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 4c01629ef48..5e484d385f2 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -306,6 +306,7 @@ impl From for TypedCommand { req.get_rollback_if_not_exist(), req.get_force_sync_commit(), req.get_resolving_pessimistic_lock(), + req.get_verify_is_primary(), req.take_context(), ) } From 0f3013ed10d72ecc059cfe099316dfa17a49c47c Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 27 Apr 2023 11:01:52 +0800 Subject: [PATCH 0668/1149] raftstore-v2: fix stale read by correct updating peers (#14665) close tikv/tikv#14664 Fix stale read by correct updating peers Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi --- .../raftstore-v2/src/operation/ready/mod.rs | 2 + components/raftstore/src/store/util.rs | 22 ++- components/test_raftstore-v2/src/server.rs | 35 ++++- tests/integrations/raftstore/mod.rs | 1 + .../integrations/raftstore/test_lease_read.rs | 46 +----- .../integrations/raftstore/test_stale_read.rs | 133 ++++++++++++++++++ 6 files changed, 188 insertions(+), 51 deletions(-) create mode 100644 tests/integrations/raftstore/test_stale_read.rs diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 5f294d7e5b6..58c7e904037 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -907,6 +907,8 @@ impl Peer { } _ => {} } + self.read_progress() + .update_leader_info(ss.leader_id, term, self.region()); let target = self.refresh_leader_transferee(); ctx.coprocessor_host.on_role_change( self.region(), diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 82a04ec6f4b..c3a553c89c1 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1347,6 +1347,10 @@ impl RegionReadProgress { core.leader_info.leader_term = term; if !is_region_epoch_equal(region.get_region_epoch(), &core.leader_info.epoch) { core.leader_info.epoch = region.get_region_epoch().clone(); + } + if core.leader_info.peers != region.get_peers() { + // In v2, we check peers and region epoch independently, because + // peers are incomplete but epoch is set correctly during split. core.leader_info.peers = region.get_peers().to_vec(); } core.leader_info.leader_store_id = @@ -2275,7 +2279,8 @@ mod tests { } let cap = 10; - let rrp = RegionReadProgress::new(&Default::default(), 10, cap, 1); + let mut region = Region::default(); + let rrp = RegionReadProgress::new(®ion, 10, cap, 1); for i in 1..=20 { rrp.update_safe_ts(i, i); } @@ -2322,5 +2327,20 @@ mod tests { rrp.update_safe_ts(400, 0); rrp.update_safe_ts(0, 700); assert_eq!(pending_items_num(&rrp), 0); + + // update leader info, epoch + region.mut_region_epoch().version += 1; + rrp.update_leader_info(1, 5, ®ion); + assert_eq!( + rrp.core.lock().unwrap().get_local_leader_info().epoch, + *region.get_region_epoch(), + ); + // update leader info, peers + region.mut_peers().push(new_peer(1, 2)); + rrp.update_leader_info(1, 5, ®ion); + assert_eq!( + rrp.core.lock().unwrap().get_local_leader_info().peers, + *region.get_peers(), + ); } } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 85941088e2e..35671c227f4 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -246,6 +246,7 @@ pub struct ServerMeta { sim_trans: SimulateServerTransport, raw_router: StoreRouter, gc_worker: GcWorker>, + rts_worker: Option>, rsmeter_cleanup: Box, } @@ -417,7 +418,30 @@ impl ServerCluster { ); gc_worker.start(node_id).unwrap(); - // todo: resolved ts + let rts_worker = if cfg.resolved_ts.enable { + // Resolved ts worker + let mut rts_worker = LazyWorker::new("resolved-ts"); + let rts_ob = resolved_ts::Observer::new(rts_worker.scheduler()); + rts_ob.register_to(&mut coprocessor_host); + // resolved ts endpoint needs store id. + store_meta.lock().unwrap().store_id = node_id; + // Resolved ts endpoint + let rts_endpoint = resolved_ts::Endpoint::new( + &cfg.resolved_ts, + rts_worker.scheduler(), + raft_router.clone(), + store_meta.clone(), + self.pd_client.clone(), + concurrency_manager.clone(), + self.env.clone(), + self.security_mgr.clone(), + ); + // Start the worker + rts_worker.start(rts_endpoint); + Some(rts_worker) + } else { + None + }; if ApiVersion::V2 == F::TAG { let casual_ts_provider: Arc = Arc::new( @@ -644,6 +668,7 @@ impl ServerCluster { sim_router, gc_worker, sim_trans: simulate_trans, + rts_worker, rsmeter_cleanup, }, ); @@ -759,10 +784,10 @@ impl Simulator for ServerCluster { if let Some(mut meta) = self.metas.remove(&node_id) { meta.server.stop().unwrap(); meta.node.stop(); - // // resolved ts worker started, let's stop it - // if let Some(worker) = meta.rts_worker { - // worker.stop_worker(); - // } + // resolved ts worker started, let's stop it + if let Some(worker) = meta.rts_worker { + worker.stop_worker(); + } (meta.rsmeter_cleanup)(); } self.storages.remove(&node_id); diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index 5f6703afe05..ce19c56e067 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -26,6 +26,7 @@ mod test_snap; mod test_snap_recovery; mod test_split_region; mod test_stale_peer; +mod test_stale_read; mod test_stats; mod test_status_command; mod test_tombstone; diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 6d8319ebae6..8ac364faae9 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -10,7 +10,7 @@ use std::{ }; use engine_rocks::RocksSnapshot; -use kvproto::{kvrpcpb::Op, metapb}; +use kvproto::metapb; use more_asserts::assert_le; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; @@ -828,47 +828,3 @@ fn test_node_local_read_renew_lease() { thread::sleep(request_wait); } } - -#[test] -fn test_stale_read_with_ts0() { - let mut cluster = new_server_cluster(0, 3); - let pd_client = Arc::clone(&cluster.pd_client); - pd_client.disable_default_operator(); - cluster.cfg.resolved_ts.enable = true; - cluster.run(); - - let leader = new_peer(1, 1); - cluster.must_transfer_leader(1, leader.clone()); - let mut leader_client = PeerClient::new(&cluster, 1, leader); - - let mut follower_client2 = PeerClient::new(&cluster, 1, new_peer(2, 2)); - - // Set the `stale_read` flag - leader_client.ctx.set_stale_read(true); - follower_client2.ctx.set_stale_read(true); - - let commit_ts1 = leader_client.must_kv_write( - &pd_client, - vec![new_mutation(Op::Put, &b"key1"[..], &b"value1"[..])], - b"key1".to_vec(), - ); - - let commit_ts2 = leader_client.must_kv_write( - &pd_client, - vec![new_mutation(Op::Put, &b"key1"[..], &b"value2"[..])], - b"key1".to_vec(), - ); - - follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); - follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), commit_ts2); - assert!( - follower_client2 - .kv_read(b"key1".to_vec(), 0) - .region_error - .into_option() - .unwrap() - .not_leader - .is_some() - ); - assert!(leader_client.kv_read(b"key1".to_vec(), 0).not_found); -} diff --git a/tests/integrations/raftstore/test_stale_read.rs b/tests/integrations/raftstore/test_stale_read.rs new file mode 100644 index 00000000000..9cbbc6ca8ba --- /dev/null +++ b/tests/integrations/raftstore/test_stale_read.rs @@ -0,0 +1,133 @@ +// Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{cell::RefCell, sync::Arc, time::Duration}; + +use grpcio::{ChannelBuilder, Environment}; +use kvproto::{ + kvrpcpb::{Context, Op}, + metapb::{Peer, Region}, + tikvpb_grpc::TikvClient, +}; +use test_raftstore::{new_mutation, new_peer, new_server_cluster, PeerClient}; +use test_raftstore_macro::test_case; +use tikv_util::{config::ReadableDuration, time::Instant}; + +use crate::tikv_util::HandyRwLock; + +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_stale_read_with_ts0() { + let mut cluster = new_cluster(0, 3); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.cfg.resolved_ts.enable = true; + cluster.cfg.resolved_ts.advance_ts_interval = ReadableDuration::millis(200); + cluster.run(); + + let region_id = 1; + let env = Arc::new(Environment::new(1)); + let new_client = |peer: Peer| { + let cli = TikvClient::new( + ChannelBuilder::new(env.clone()) + .connect(&cluster.sim.rl().get_addr(peer.get_store_id())), + ); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(peer); + ctx.set_region_epoch(epoch); + PeerClient { cli, ctx } + }; + let leader = new_peer(1, 1); + let mut leader_client = new_client(leader.clone()); + let follower = new_peer(2, 2); + let mut follower_client2 = new_client(follower); + + cluster.must_transfer_leader(1, leader); + + // Set the `stale_read` flag + leader_client.ctx.set_stale_read(true); + follower_client2.ctx.set_stale_read(true); + + let commit_ts1 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"key1"[..], &b"value1"[..])], + b"key1".to_vec(), + ); + + let commit_ts2 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"key1"[..], &b"value2"[..])], + b"key1".to_vec(), + ); + + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), commit_ts1); + follower_client2.must_kv_read_equal(b"key1".to_vec(), b"value2".to_vec(), commit_ts2); + assert!( + follower_client2 + .kv_read(b"key1".to_vec(), 0) + .region_error + .into_option() + .unwrap() + .not_leader + .is_some() + ); + assert!(leader_client.kv_read(b"key1".to_vec(), 0).not_found); +} + +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_stale_read_resolved_ts_advance() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.resolved_ts.enable = true; + cluster.cfg.resolved_ts.advance_ts_interval = ReadableDuration::millis(200); + + cluster.run(); + let cluster = RefCell::new(cluster); + + let must_resolved_ts_advance = |region: &Region| { + let cluster = cluster.borrow_mut(); + let ts = cluster.store_metas[®ion.get_peers()[0].get_store_id()] + .lock() + .unwrap() + .region_read_progress + .get_resolved_ts(®ion.get_id()) + .unwrap(); + let now = Instant::now(); + for peer in region.get_peers() { + loop { + let new_ts = cluster.store_metas[&peer.get_store_id()] + .lock() + .unwrap() + .region_read_progress + .get_resolved_ts(®ion.get_id()) + .unwrap(); + if new_ts <= ts { + if now.saturating_elapsed() > Duration::from_secs(5) { + panic!("timeout"); + } + continue; + } + break; + } + } + }; + + // Make sure resolved ts advances. + let region = cluster.borrow().get_region(&[]); + must_resolved_ts_advance(®ion); + + // Test transfer leader. + cluster + .borrow_mut() + .must_transfer_leader(region.get_id(), region.get_peers()[1].clone()); + must_resolved_ts_advance(®ion); + + // Test split. + let split_key = b"k1"; + cluster.borrow_mut().must_split(®ion, split_key); + let left = cluster.borrow().get_region(&[]); + let right = cluster.borrow().get_region(split_key); + must_resolved_ts_advance(&left); + must_resolved_ts_advance(&right); +} From 6b18e8f72ef36246b9e8aba8c4cd2983eff0e460 Mon Sep 17 00:00:00 2001 From: you06 Date: Thu, 27 Apr 2023 13:31:52 +0800 Subject: [PATCH 0669/1149] metrics: add missing `check_leader` gRPC metrics (#14662) close tikv/tikv#14658 Record the missing check_leader gRPC metrics. Signed-off-by: you06 --- src/server/service/kv.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 9895067fcb3..a1feb0f7b60 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1004,6 +1004,7 @@ impl Tikv for Service { mut request: CheckLeaderRequest, sink: UnarySink, ) { + let begin_instant = Instant::now(); let addr = ctx.peer(); let ts = request.get_ts(); let leaders = request.take_regions().into(); @@ -1025,6 +1026,10 @@ impl Tikv for Service { } return Err(Error::from(e)); } + let elapsed = begin_instant.saturating_elapsed(); + GRPC_MSG_HISTOGRAM_STATIC + .check_leader + .observe(elapsed.as_secs_f64()); ServerResult::Ok(()) } .map_err(move |e| { From 38462f242e9026710514139fa5ccf42429bec929 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 26 Apr 2023 23:39:51 -0700 Subject: [PATCH 0670/1149] [raftstore-v2]: optimize the load based split config based on region size (#14625) ref tikv/tikv#12842 1) optimize the load based split config based on region size 2) polish a log message when it cannot find a target peer of the message. Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- components/raftstore/src/store/mod.rs | 4 +- components/raftstore/src/store/worker/mod.rs | 6 ++- .../src/store/worker/split_config.rs | 23 ++++++++-- src/config/mod.rs | 42 +++++++++++++++---- 5 files changed, 63 insertions(+), 14 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 58c7e904037..62e8fda7ba0 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -379,7 +379,7 @@ impl Peer { let to_peer = match self.peer_from_cache(msg.to) { Some(p) => p, None => { - warn!(self.logger, "failed to look up recipient peer"; "to_peer" => msg.to); + warn!(self.logger, "failed to look up recipient peer"; "to_peer" => msg.to, "message_type" => ?msg.msg_type); return None; } }; diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index c007b622ee1..7a2c04e2450 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -84,6 +84,8 @@ pub use self::{ ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, - NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, + DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, }, }; diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index ac23f4e58d5..62d27b2e88b 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -44,6 +44,10 @@ pub use self::{ split_check::{ Bucket, BucketRange, KeyEntry, Runner as SplitCheckRunner, Task as SplitCheckTask, }, - split_config::{SplitConfig, SplitConfigManager}, + split_config::{ + SplitConfig, SplitConfigManager, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, + DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + }, split_controller::{AutoSplitController, ReadStats, SplitConfigChange, SplitInfo, WriteStats}, }; diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index 7857ae10d8e..8fec853bb00 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -6,13 +6,18 @@ use lazy_static::lazy_static; use online_config::{ConfigChange, ConfigManager, OnlineConfig}; use parking_lot::Mutex; use serde::{Deserialize, Serialize}; -use tikv_util::{config::VersionTrack, info}; +use tikv_util::{ + config::{ReadableSize, VersionTrack}, + info, +}; const DEFAULT_DETECT_TIMES: u64 = 10; const DEFAULT_SAMPLE_THRESHOLD: u64 = 100; pub(crate) const DEFAULT_SAMPLE_NUM: usize = 20; -const DEFAULT_QPS_THRESHOLD: usize = 3000; -const DEFAULT_BYTE_THRESHOLD: usize = 30 * 1024 * 1024; +pub const DEFAULT_QPS_THRESHOLD: usize = 3000; +pub const DEFAULT_BIG_REGION_QPS_THRESHOLD: usize = 7000; +pub const DEFAULT_BYTE_THRESHOLD: usize = 30 * 1024 * 1024; +pub const DEFAULT_BIG_REGION_BYTE_THRESHOLD: usize = 100 * 1024 * 1024; // We get balance score by // abs(sample.left-sample.right)/(sample.right+sample.left). It will be used to @@ -43,7 +48,8 @@ const DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.8; // `REGION_CPU_OVERLOAD_THRESHOLD_RATIO` as a percentage of the Unified Read // Poll, it will be added into the hot region list and may be split later as the // top hot CPU region. -pub(crate) const REGION_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.25; +pub const REGION_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.25; +pub const BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO: f64 = 0.75; lazy_static! { static ref SPLIT_CONFIG: Mutex>>> = Mutex::new(None); @@ -134,6 +140,15 @@ impl SplitConfig { } Ok(()) } + + pub fn optimize_for(&mut self, region_size: ReadableSize) { + const LARGE_REGION_SIZE_IN_MB: u64 = 4096; + if region_size.as_mb() >= LARGE_REGION_SIZE_IN_MB { + self.qps_threshold = DEFAULT_BIG_REGION_QPS_THRESHOLD; + self.region_cpu_overload_threshold_ratio = BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO; + self.byte_threshold = DEFAULT_BIG_REGION_BYTE_THRESHOLD; + } + } } #[derive(Clone)] diff --git a/src/config/mod.rs b/src/config/mod.rs index 5d20b027c4e..62a7de89130 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3399,6 +3399,8 @@ impl TikvConfig { self.coprocessor .optimize_for(self.storage.engine == EngineType::RaftKv2); self.coprocessor.validate()?; + self.split + .optimize_for(self.coprocessor.region_split_size()); self.raft_store.validate( self.coprocessor.region_split_size(), self.coprocessor.enable_region_bucket(), @@ -4337,9 +4339,16 @@ mod tests { use grpcio::ResourceQuota; use itertools::Itertools; use kvproto::kvrpcpb::CommandPri; - use raftstore::coprocessor::{ - config::{RAFTSTORE_V2_SPLIT_SIZE, SPLIT_SIZE}, - region_info_accessor::MockRegionInfoProvider, + use raftstore::{ + coprocessor::{ + config::{RAFTSTORE_V2_SPLIT_SIZE, SPLIT_SIZE}, + region_info_accessor::MockRegionInfoProvider, + }, + store::{ + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, + DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, + REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + }, }; use slog::Level; use tempfile::Builder; @@ -5780,18 +5789,37 @@ mod tests { #[test] fn test_region_size_config() { let mut default_cfg = TikvConfig::default(); - default_cfg.coprocessor.optimize_for(false); - default_cfg.coprocessor.validate().unwrap(); + default_cfg.storage.engine = EngineType::RaftKv; + default_cfg.validate().unwrap(); assert_eq!(default_cfg.coprocessor.region_split_size(), SPLIT_SIZE); assert!(!default_cfg.coprocessor.enable_region_bucket()); + assert_eq!(default_cfg.split.qps_threshold, DEFAULT_QPS_THRESHOLD); + assert_eq!( + default_cfg.split.region_cpu_overload_threshold_ratio, + REGION_CPU_OVERLOAD_THRESHOLD_RATIO + ); + assert_eq!(default_cfg.split.byte_threshold, DEFAULT_BYTE_THRESHOLD); + let mut default_cfg = TikvConfig::default(); - default_cfg.coprocessor.optimize_for(true); - default_cfg.coprocessor.validate().unwrap(); + default_cfg.storage.engine = EngineType::RaftKv2; + default_cfg.validate().unwrap(); assert_eq!( default_cfg.coprocessor.region_split_size(), RAFTSTORE_V2_SPLIT_SIZE ); + assert_eq!( + default_cfg.split.qps_threshold, + DEFAULT_BIG_REGION_QPS_THRESHOLD + ); + assert_eq!( + default_cfg.split.region_cpu_overload_threshold_ratio, + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO + ); + assert_eq!( + default_cfg.split.byte_threshold, + DEFAULT_BIG_REGION_BYTE_THRESHOLD + ); assert!(default_cfg.coprocessor.enable_region_bucket()); let mut default_cfg = TikvConfig::default(); From 9e73fed6351ad8f4a2ab8aa7ad4ac76a344cb604 Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Thu, 27 Apr 2023 15:35:51 +0800 Subject: [PATCH 0671/1149] raftstore: support decode simple write request in v1 (#14638) ref tikv/tikv#14575 Support decode simple write request in v1 Signed-off-by: lidezhu Co-authored-by: Xinye Tao --- components/raftstore/src/store/fsm/apply.rs | 455 +++++++++++++++++- components/raftstore/src/store/util.rs | 59 ++- tests/integrations/raftstore/mod.rs | 1 + tests/integrations/raftstore/test_snap.rs | 93 +--- .../raftstore/test_v1_v2_mixed.rs | 268 +++++++++++ 5 files changed, 770 insertions(+), 106 deletions(-) create mode 100644 tests/integrations/raftstore/test_v1_v2_mixed.rs diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 54ca2274162..d1ba6d4e774 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -463,6 +463,8 @@ where /// `ApplyRes` uncommitted. Data will finally be written to kvdb in /// `flush`. uncommitted_res_count: usize, + + enable_v2_compatible_learner: bool, } impl ApplyContext @@ -519,6 +521,7 @@ where key_buffer: Vec::with_capacity(1024), disable_wal: false, uncommitted_res_count: 0, + enable_v2_compatible_learner: cfg.enable_v2_compatible_learner, } } @@ -1210,7 +1213,18 @@ where if !data.is_empty() { if !self.peer.is_witness || !can_witness_skip(entry) { - let cmd = util::parse_data_at(data, index, &self.tag); + let cmd = match util::parse_raft_cmd_request(data, index, term, &self.tag) { + util::RaftCmd::V1(cmd) => cmd, + util::RaftCmd::V2(simple_write_decoder) => { + if !apply_ctx.enable_v2_compatible_learner { + panic!( + "{} can not handle v2 command when enable_v2_compatible_learner is false", + self.tag + ); + } + simple_write_decoder.to_raft_cmd_request() + } + }; if apply_ctx.yield_high_latency_operation && has_high_latency_operation(&cmd) { self.priority = Priority::Low; } @@ -5000,7 +5014,12 @@ mod tests { use super::*; use crate::{ coprocessor::*, - store::{msg::WriteResponse, peer_storage::RAFT_INIT_LOG_INDEX, Config, RegionTask}, + store::{ + msg::WriteResponse, + peer_storage::RAFT_INIT_LOG_INDEX, + simple_write::{SimpleWriteEncoder, SimpleWriteReqEncoder}, + Config, RegionTask, + }, }; impl GenSnapTask { @@ -5611,6 +5630,93 @@ mod tests { } } + struct EntryBuilderUsingSimpleWrite { + entry: Entry, + header: Box, + encoder: SimpleWriteEncoder, + } + + impl EntryBuilderUsingSimpleWrite { + fn new(index: u64, term: u64) -> EntryBuilderUsingSimpleWrite { + let encoder = SimpleWriteEncoder::with_capacity(64); + let header = Box::::default(); + let mut entry = Entry::default(); + entry.set_index(index); + entry.set_term(term); + EntryBuilderUsingSimpleWrite { + entry, + header, + encoder, + } + } + + fn epoch(mut self, conf_ver: u64, version: u64) -> EntryBuilderUsingSimpleWrite { + let mut epoch = RegionEpoch::default(); + epoch.set_version(version); + epoch.set_conf_ver(conf_ver); + self.header.set_region_epoch(epoch); + self + } + + fn put(mut self, key: &[u8], value: &[u8]) -> EntryBuilderUsingSimpleWrite { + self.encoder.put(CF_DEFAULT, key, value); + self + } + + fn put_cf(mut self, cf: &str, key: &[u8], value: &[u8]) -> EntryBuilderUsingSimpleWrite { + self.encoder.put(cf, key, value); + self + } + + fn delete(mut self, key: &[u8]) -> EntryBuilderUsingSimpleWrite { + self.encoder.delete(CF_DEFAULT, key); + self + } + + fn delete_cf(mut self, cf: &str, key: &[u8]) -> EntryBuilderUsingSimpleWrite { + self.encoder.delete(cf, key); + self + } + + fn delete_range( + mut self, + start_key: &[u8], + end_key: &[u8], + ) -> EntryBuilderUsingSimpleWrite { + self.encoder + .delete_range(CF_DEFAULT, start_key, end_key, false); + self + } + + fn delete_range_cf( + mut self, + cf: &str, + start_key: &[u8], + end_key: &[u8], + ) -> EntryBuilderUsingSimpleWrite { + self.encoder.delete_range(cf, start_key, end_key, false); + self + } + + fn ingest_sst(mut self, meta: &SstMeta) -> EntryBuilderUsingSimpleWrite { + self.encoder.ingest(vec![meta.clone()]); + self + } + + fn build(mut self) -> Entry { + let bin = self.encoder.encode(); + let req_encoder = SimpleWriteReqEncoder::>::new( + self.header.clone(), + bin, + 1000, + false, + ); + let (bytes, _) = req_encoder.encode(); + self.entry.set_data(bytes.into()); + self.entry + } + } + #[derive(Clone, Default)] struct ApplyObserver { pre_query_count: Arc, @@ -6093,6 +6199,351 @@ mod tests { system.shutdown(); } + #[test] + fn test_handle_raft_committed_entries_from_v2() { + let (_path, engine) = create_tmp_engine("test-delegate"); + let (import_dir, importer) = create_tmp_importer("test-delegate"); + let obs = ApplyObserver::default(); + let mut host = CoprocessorHost::::default(); + host.registry + .register_query_observer(1, BoxQueryObserver::new(obs.clone())); + + let (tx, rx) = mpsc::channel(); + let (region_scheduler, _) = dummy_scheduler(); + let sender = Box::new(TestNotifier { tx }); + let mut config = Config::default(); + config.enable_v2_compatible_learner = true; + let cfg = Arc::new(VersionTrack::new(config)); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "test-store".to_owned(), + cfg, + sender, + region_scheduler, + coprocessor_host: host, + importer: importer.clone(), + engine: engine.clone(), + router: router.clone(), + store_id: 1, + pending_create_peers, + }; + system.spawn("test-handle-raft".to_owned(), builder); + + let peer_id = 3; + let mut reg = Registration { + id: peer_id, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.mut_peers().push(new_peer(2, 3)); + reg.region.set_end_key(b"k5".to_vec()); + reg.region.mut_region_epoch().set_conf_ver(1); + reg.region.mut_region_epoch().set_version(3); + router.schedule_task(1, Msg::Registration(reg)); + + let (capture_tx, capture_rx) = mpsc::channel(); + let put_entry = EntryBuilderUsingSimpleWrite::new(1, 1) + .put(b"k1", b"v1") + .put(b"k2", b"v1") + .put(b"k3", b"v1") + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 1, + vec![put_entry], + vec![cb(1, 1, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let dk_k1 = keys::data_key(b"k1"); + let dk_k2 = keys::data_key(b"k2"); + let dk_k3 = keys::data_key(b"k3"); + assert_eq!(engine.get_value(&dk_k1).unwrap().unwrap(), b"v1"); + assert_eq!(engine.get_value(&dk_k2).unwrap().unwrap(), b"v1"); + assert_eq!(engine.get_value(&dk_k3).unwrap().unwrap(), b"v1"); + validate(&router, 1, |delegate| { + assert_eq!(delegate.applied_term, 1); + assert_eq!(delegate.apply_state.get_applied_index(), 1); + }); + fetch_apply_res(&rx); + + let put_entry = EntryBuilderUsingSimpleWrite::new(2, 2) + .put_cf(CF_LOCK, b"k1", b"v1") + .epoch(1, 3) + .build(); + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 2, vec![put_entry], vec![]))); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.region_id, 1); + assert_eq!(apply_res.apply_state.get_applied_index(), 2); + assert_eq!(apply_res.applied_term, 2); + assert!(apply_res.exec_res.is_empty()); + assert!(apply_res.metrics.written_bytes >= 5); + assert_eq!(apply_res.metrics.written_keys, 2); + assert_eq!(apply_res.metrics.size_diff_hint, 5); + assert_eq!(apply_res.metrics.lock_cf_written_bytes, 5); + assert_eq!( + engine.get_value_cf(CF_LOCK, &dk_k1).unwrap().unwrap(), + b"v1" + ); + + let put_entry = EntryBuilderUsingSimpleWrite::new(3, 2) + .put(b"k2", b"v2") + .epoch(1, 1) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 2, + vec![put_entry], + vec![cb(3, 2, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().get_error().has_epoch_not_match()); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.applied_term, 2); + assert_eq!(apply_res.apply_state.get_applied_index(), 3); + + let put_entry = EntryBuilderUsingSimpleWrite::new(4, 2) + .put(b"k3", b"v3") + .put(b"k5", b"v5") + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 2, + vec![put_entry], + vec![cb(4, 2, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().get_error().has_key_not_in_region()); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.applied_term, 2); + assert_eq!(apply_res.apply_state.get_applied_index(), 4); + // a writebatch should be atomic. + assert_eq!(engine.get_value(&dk_k3).unwrap().unwrap(), b"v1"); + + let put_entry = EntryBuilderUsingSimpleWrite::new(5, 3) + .delete(b"k1") + .delete_cf(CF_LOCK, b"k1") + .delete_cf(CF_WRITE, b"k1") + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 3, + vec![put_entry], + vec![cb(5, 2, capture_tx.clone()), cb(5, 3, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + // stale command should be cleared. + assert!(resp.get_header().get_error().has_stale_command()); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert!(engine.get_value(&dk_k1).unwrap().is_none()); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.metrics.lock_cf_written_bytes, 3); + assert_eq!(apply_res.metrics.delete_keys_hint, 2); + assert_eq!(apply_res.metrics.size_diff_hint, -9); + + let delete_entry = EntryBuilderUsingSimpleWrite::new(6, 3) + .delete(b"k5") + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 3, + vec![delete_entry], + vec![cb(6, 3, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().get_error().has_key_not_in_region()); + fetch_apply_res(&rx); + + let delete_range_entry = EntryBuilderUsingSimpleWrite::new(7, 3) + .delete_range(b"", b"") + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 3, + vec![delete_range_entry], + vec![cb(7, 3, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().get_error().has_key_not_in_region()); + assert_eq!(engine.get_value(&dk_k3).unwrap().unwrap(), b"v1"); + fetch_apply_res(&rx); + + let delete_range_entry = EntryBuilderUsingSimpleWrite::new(8, 3) + .delete_range_cf(CF_DEFAULT, b"", b"k5") + .delete_range_cf(CF_LOCK, b"", b"k5") + .delete_range_cf(CF_WRITE, b"", b"k5") + .epoch(1, 3) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 3, + vec![delete_range_entry], + vec![cb(8, 3, capture_tx.clone())], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + assert!(engine.get_value(&dk_k1).unwrap().is_none()); + assert!(engine.get_value(&dk_k2).unwrap().is_none()); + assert!(engine.get_value(&dk_k3).unwrap().is_none()); + + // The region was rescheduled from normal-priority handler to + // low-priority handler, so the first apple_res.exec_res should be empty. + let apply_res = fetch_apply_res(&rx); + assert!(apply_res.exec_res.is_empty()); + // The entry should be applied now. + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.applied_term, 3); + assert_eq!(apply_res.apply_state.get_applied_index(), 8); + + // UploadSST + let sst_path = import_dir.path().join("test.sst"); + let mut sst_epoch = RegionEpoch::default(); + sst_epoch.set_conf_ver(1); + sst_epoch.set_version(3); + let sst_range = (0, 100); + let (mut meta1, data1) = gen_sst_file(&sst_path, sst_range); + meta1.set_region_id(1); + meta1.set_region_epoch(sst_epoch); + let mut file1 = importer.create(&meta1).unwrap(); + file1.append(&data1).unwrap(); + file1.finish().unwrap(); + let (mut meta2, data2) = gen_sst_file(&sst_path, sst_range); + meta2.set_region_id(1); + meta2.mut_region_epoch().set_conf_ver(1); + meta2.mut_region_epoch().set_version(1234); + let mut file2 = importer.create(&meta2).unwrap(); + file2.append(&data2).unwrap(); + file2.finish().unwrap(); + + // IngestSst + let put_ok = EntryBuilderUsingSimpleWrite::new(9, 3) + .put(&[sst_range.0], &[sst_range.1]) + .epoch(0, 3) + .build(); + // Add a put above to test flush before ingestion. + let capture_tx_clone = capture_tx.clone(); + let ingest_ok = EntryBuilderUsingSimpleWrite::new(10, 3) + .ingest_sst(&meta1) + .epoch(0, 3) + .build(); + let ingest_epoch_not_match = EntryBuilderUsingSimpleWrite::new(11, 3) + .ingest_sst(&meta2) + .epoch(0, 3) + .build(); + let entries = vec![put_ok, ingest_ok, ingest_epoch_not_match]; + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 3, + entries, + vec![ + cb(9, 3, capture_tx.clone()), + proposal( + false, + 10, + 3, + Callback::write(Box::new(move |resp: WriteResponse| { + // Sleep until yield timeout. + thread::sleep(Duration::from_millis(500)); + capture_tx_clone.send(resp.response).unwrap(); + })), + ), + cb(11, 3, capture_tx.clone()), + ], + )), + ); + + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + check_db_range(&engine, sst_range); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(resp.get_header().has_error()); + + // The region was rescheduled to normal-priority handler because of + // nomral put command, so the first apple_res.exec_res should be empty. + let apply_res = fetch_apply_res(&rx); + assert!(apply_res.exec_res.is_empty()); + // The region was rescheduled low-priority becasuee of ingest command, + // only put entry has been applied; + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.applied_term, 3); + assert_eq!(apply_res.apply_state.get_applied_index(), 9); + // The region will yield after timeout. + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.applied_term, 3); + assert_eq!(apply_res.apply_state.get_applied_index(), 10); + // The third entry should be applied now. + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.applied_term, 3); + assert_eq!(apply_res.apply_state.get_applied_index(), 11); + + let write_batch_max_keys = ::WRITE_BATCH_MAX_KEYS; + + let mut props = vec![]; + let mut entries = vec![]; + for i in 0..write_batch_max_keys { + let put_entry = EntryBuilder::new(i as u64 + 12, 3) + .put(b"k", b"v") + .epoch(1, 3) + .build(); + entries.push(put_entry); + props.push(cb(i as u64 + 12, 3, capture_tx.clone())); + } + router.schedule_task(1, Msg::apply(apply(peer_id, 1, 3, entries, props))); + for _ in 0..write_batch_max_keys { + capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + } + let index = write_batch_max_keys + 11; + // The region was rescheduled to normal-priority handler. Discard the first + // apply_res. + fetch_apply_res(&rx); + let apply_res = fetch_apply_res(&rx); + assert_eq!(apply_res.apply_state.get_applied_index(), index as u64); + assert_eq!(obs.pre_query_count.load(Ordering::SeqCst), index); + assert_eq!(obs.post_query_count.load(Ordering::SeqCst), index); + + system.shutdown(); + } + #[test] fn test_apply_yield_with_msg_size() { let (_path, engine) = create_tmp_engine("test-apply-yield"); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index c3a553c89c1..f5a23538ad5 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -43,7 +43,11 @@ use tokio::sync::Notify; use txn_types::WriteBatchFlags; use super::{metrics::PEER_ADMIN_CMD_COUNTER_VEC, peer_storage, Config}; -use crate::{coprocessor::CoprocessorHost, store::snap::SNAPSHOT_VERSION, Error, Result}; +use crate::{ + coprocessor::CoprocessorHost, + store::{simple_write::SimpleWriteReqDecoder, snap::SNAPSHOT_VERSION}, + Error, Result, +}; const INVALID_TIMESTAMP: u64 = u64::MAX; @@ -749,18 +753,30 @@ pub fn get_entry_header(entry: &Entry) -> RaftRequestHeader { if entry.get_entry_type() != EntryType::EntryNormal { return RaftRequestHeader::default(); } - // request header is encoded into data - let mut is = CodedInputStream::from_bytes(entry.get_data()); - if is.eof().unwrap() { - return RaftRequestHeader::default(); - } - let (field_number, _) = is.read_tag_unpack().unwrap(); - let t = is.read_message().unwrap(); - // Header field is of number 1 - if field_number != 1 { - panic!("unexpected field number: {} {:?}", field_number, t); + let logger = slog_global::get_global().new(slog::o!()); + match SimpleWriteReqDecoder::new( + |_, _, _| RaftCmdRequest::default(), + &logger, + entry.get_data(), + entry.get_index(), + entry.get_term(), + ) { + Ok(decoder) => decoder.header().clone(), + Err(_) => { + // request header is encoded into data + let mut is = CodedInputStream::from_bytes(entry.get_data()); + if is.eof().unwrap() { + return RaftRequestHeader::default(); + } + let (field_number, _) = is.read_tag_unpack().unwrap(); + let t = is.read_message().unwrap(); + // Header field is of number 1 + if field_number != 1 { + panic!("unexpected field number: {} {:?}", field_number, t); + } + t + } } - t } /// Parse data of entry `index`. @@ -784,6 +800,25 @@ pub fn parse_data_at(data: &[u8], index: u64, tag: &str) - result } +pub enum RaftCmd<'a> { + V1(RaftCmdRequest), + V2(SimpleWriteReqDecoder<'a>), +} + +pub fn parse_raft_cmd_request<'a>(data: &'a [u8], index: u64, term: u64, tag: &str) -> RaftCmd<'a> { + let logger = slog_global::get_global().new(slog::o!()); + match SimpleWriteReqDecoder::new( + |_, _, _| parse_data_at(data, index, tag), + &logger, + data, + index, + term, + ) { + Ok(simple_write_decoder) => RaftCmd::V2(simple_write_decoder), + Err(cmd) => RaftCmd::V1(cmd), + } +} + /// Check if two regions are sibling. /// /// They are sibling only when they share borders and don't overlap. diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index ce19c56e067..3bb93f6809b 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -34,4 +34,5 @@ mod test_transfer_leader; mod test_transport; mod test_unsafe_recovery; mod test_update_region_size; +mod test_v1_v2_mixed; mod test_witness; diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index a620bb3a990..c790d10be45 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -12,8 +12,7 @@ use std::{ }; use collections::HashMap; -use engine_rocks::{RocksCfOptions, RocksDbOptions}; -use engine_traits::{Checkpointer, KvEngine, Peekable, RaftEngineReadOnly, SyncMutable, LARGE_CFS}; +use engine_traits::{Checkpointer, KvEngine, RaftEngineReadOnly}; use file_system::{IoOp, IoType}; use futures::executor::block_on; use grpcio::Environment; @@ -785,96 +784,6 @@ fn generate_snap( (msg, snap_key) } -#[test] -fn test_v1_receive_snap_from_v2() { - let test_receive_snap = |key_num| { - let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); - let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); - let mut cluster_v1_tikv = test_raftstore::new_server_cluster(1, 1); - - cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; - - cluster_v1.run(); - cluster_v2.run(); - cluster_v1_tikv.run(); - - let s1_addr = cluster_v1.get_addr(1); - let s2_addr = cluster_v1_tikv.get_addr(1); - let region = cluster_v2.get_region(b""); - let region_id = region.get_id(); - let engine = cluster_v2.get_engine(1); - let tablet = engine.get_tablet_by_id(region_id).unwrap(); - - for i in 0..key_num { - let k = format!("zk{:04}", i); - tablet.put(k.as_bytes(), &random_long_vec(1024)).unwrap(); - } - - let snap_mgr = cluster_v2.get_snap_mgr(1); - let security_mgr = cluster_v2.get_security_mgr(); - let (msg, snap_key) = generate_snap(&engine, region_id, &snap_mgr); - let cfg = tikv::server::Config::default(); - let limit = Limiter::new(f64::INFINITY); - let env = Arc::new(Environment::new(1)); - let _ = block_on(async { - send_snap_v2( - env.clone(), - snap_mgr.clone(), - security_mgr.clone(), - &cfg, - &s1_addr, - msg.clone(), - limit.clone(), - ) - .unwrap() - .await - }); - let send_result = block_on(async { - send_snap_v2(env, snap_mgr, security_mgr, &cfg, &s2_addr, msg, limit) - .unwrap() - .await - }); - // snapshot should be rejected by cluster v1 tikv, and the snapshot should be - // deleted. - assert!(send_result.is_err()); - let dir = cluster_v2.get_snap_dir(1); - let read_dir = std::fs::read_dir(dir).unwrap(); - assert_eq!(0, read_dir.count()); - - // The snapshot has been received by cluster v1, so check it's completeness - let snap_mgr = cluster_v1.get_snap_mgr(1); - let path = snap_mgr - .tablet_snap_manager() - .unwrap() - .final_recv_path(&snap_key); - let rocksdb = engine_rocks::util::new_engine_opt( - path.as_path().to_str().unwrap(), - RocksDbOptions::default(), - LARGE_CFS - .iter() - .map(|&cf| (cf, RocksCfOptions::default())) - .collect(), - ) - .unwrap(); - - for i in 0..key_num { - let k = format!("zk{:04}", i); - assert!( - rocksdb - .get_value_cf("default", k.as_bytes()) - .unwrap() - .is_some() - ); - } - }; - - // test small snapshot - test_receive_snap(20); - - // test large snapshot - test_receive_snap(5000); -} - #[derive(Clone)] struct MockApplySnapshotObserver { tablet_snap_paths: Arc>>, diff --git a/tests/integrations/raftstore/test_v1_v2_mixed.rs b/tests/integrations/raftstore/test_v1_v2_mixed.rs new file mode 100644 index 00000000000..1514529b209 --- /dev/null +++ b/tests/integrations/raftstore/test_v1_v2_mixed.rs @@ -0,0 +1,268 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; + +use engine_rocks::{RocksCfOptions, RocksDbOptions}; +use engine_traits::{Checkpointer, KvEngine, Peekable, SyncMutable, LARGE_CFS}; +use futures::executor::block_on; +use grpcio::Environment; +use kvproto::raft_serverpb::{RaftMessage, *}; +use raft::eraftpb::{MessageType, Snapshot}; +use raftstore::{ + errors::Result, + store::{snap::TABLET_SNAPSHOT_VERSION, TabletSnapKey, TabletSnapManager}, +}; +use rand::Rng; +use test_raftstore::{ + new_learner_peer, Direction, Filter, FilterFactory, RegionPacketFilter, Simulator as S1, *, +}; +use test_raftstore_v2::{Simulator as S2, WrapFactory}; +use tikv::server::tablet_snap::send_snap as send_snap_v2; +use tikv_util::time::Limiter; + +struct ForwardFactory { + node_id: u64, + chain_send: Arc, +} + +impl FilterFactory for ForwardFactory { + fn generate(&self, _: u64) -> Vec> { + vec![Box::new(ForwardFilter { + node_id: self.node_id, + chain_send: self.chain_send.clone(), + })] + } +} + +struct ForwardFilter { + node_id: u64, + chain_send: Arc, +} + +impl Filter for ForwardFilter { + fn before(&self, msgs: &mut Vec) -> Result<()> { + for m in msgs.drain(..) { + if self.node_id == m.get_to_peer().get_store_id() { + (self.chain_send)(m); + } + } + Ok(()) + } +} + +fn generate_snap( + engine: &WrapFactory, + region_id: u64, + snap_mgr: &TabletSnapManager, +) -> (RaftMessage, TabletSnapKey) { + let tablet = engine.get_tablet_by_id(region_id).unwrap(); + let region_state = engine.region_local_state(region_id).unwrap().unwrap(); + let apply_state = engine.raft_apply_state(region_id).unwrap().unwrap(); + let raft_state = engine.raft_local_state(region_id).unwrap().unwrap(); + + // Construct snapshot by hand + let mut snapshot = Snapshot::default(); + // use commit term for simplicity + snapshot + .mut_metadata() + .set_term(raft_state.get_hard_state().term + 1); + snapshot.mut_metadata().set_index(apply_state.applied_index); + let conf_state = raftstore::store::util::conf_state_from_region(region_state.get_region()); + snapshot.mut_metadata().set_conf_state(conf_state); + + let mut snap_data = RaftSnapshotData::default(); + snap_data.set_region(region_state.get_region().clone()); + snap_data.set_version(TABLET_SNAPSHOT_VERSION); + use protobuf::Message; + snapshot.set_data(snap_data.write_to_bytes().unwrap().into()); + let snap_key = TabletSnapKey::from_region_snap(region_id, 1, &snapshot); + let checkpointer_path = snap_mgr.tablet_gen_path(&snap_key); + let mut checkpointer = tablet.new_checkpointer().unwrap(); + checkpointer + .create_at(checkpointer_path.as_path(), None, 0) + .unwrap(); + + let mut msg = RaftMessage::default(); + msg.region_id = region_id; + msg.set_to_peer(new_peer(1, 1)); + msg.mut_message().set_snapshot(snapshot); + msg.mut_message() + .set_term(raft_state.get_hard_state().commit + 1); + msg.mut_message().set_msg_type(MessageType::MsgSnapshot); + msg.set_region_epoch(region_state.get_region().get_region_epoch().clone()); + + (msg, snap_key) +} + +fn random_long_vec(length: usize) -> Vec { + let mut rng = rand::thread_rng(); + let mut value = Vec::with_capacity(1024); + (0..length).for_each(|_| value.push(rng.gen::())); + value +} + +#[test] +fn test_v1_receive_snap_from_v2() { + let test_receive_snap = |key_num| { + let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); + let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); + let mut cluster_v1_tikv = test_raftstore::new_server_cluster(1, 1); + + cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; + + cluster_v1.run(); + cluster_v2.run(); + cluster_v1_tikv.run(); + + let s1_addr = cluster_v1.get_addr(1); + let s2_addr = cluster_v1_tikv.get_addr(1); + let region = cluster_v2.get_region(b""); + let region_id = region.get_id(); + let engine = cluster_v2.get_engine(1); + let tablet = engine.get_tablet_by_id(region_id).unwrap(); + + for i in 0..key_num { + let k = format!("zk{:04}", i); + tablet.put(k.as_bytes(), &random_long_vec(1024)).unwrap(); + } + + let snap_mgr = cluster_v2.get_snap_mgr(1); + let security_mgr = cluster_v2.get_security_mgr(); + let (msg, snap_key) = generate_snap(&engine, region_id, &snap_mgr); + let cfg = tikv::server::Config::default(); + let limit = Limiter::new(f64::INFINITY); + let env = Arc::new(Environment::new(1)); + let _ = block_on(async { + send_snap_v2( + env.clone(), + snap_mgr.clone(), + security_mgr.clone(), + &cfg, + &s1_addr, + msg.clone(), + limit.clone(), + ) + .unwrap() + .await + }); + let send_result = block_on(async { + send_snap_v2(env, snap_mgr, security_mgr, &cfg, &s2_addr, msg, limit) + .unwrap() + .await + }); + // snapshot should be rejected by cluster v1 tikv, and the snapshot should be + // deleted. + assert!(send_result.is_err()); + let dir = cluster_v2.get_snap_dir(1); + let read_dir = std::fs::read_dir(dir).unwrap(); + assert_eq!(0, read_dir.count()); + + // The snapshot has been received by cluster v1, so check it's completeness + let snap_mgr = cluster_v1.get_snap_mgr(1); + let path = snap_mgr + .tablet_snap_manager() + .unwrap() + .final_recv_path(&snap_key); + let rocksdb = engine_rocks::util::new_engine_opt( + path.as_path().to_str().unwrap(), + RocksDbOptions::default(), + LARGE_CFS + .iter() + .map(|&cf| (cf, RocksCfOptions::default())) + .collect(), + ) + .unwrap(); + + for i in 0..key_num { + let k = format!("zk{:04}", i); + assert!( + rocksdb + .get_value_cf("default", k.as_bytes()) + .unwrap() + .is_some() + ); + } + }; + + // test small snapshot + test_receive_snap(20); + + // test large snapshot + test_receive_snap(5000); +} + +#[test] +fn test_v1_simple_write() { + let mut cluster_v2 = test_raftstore_v2::new_node_cluster(1, 2); + let mut cluster_v1 = test_raftstore::new_node_cluster(1, 2); + cluster_v1.cfg.tikv.raft_store.enable_v2_compatible_learner = true; + cluster_v1.pd_client.disable_default_operator(); + cluster_v2.pd_client.disable_default_operator(); + let r11 = cluster_v1.run_conf_change(); + let r21 = cluster_v2.run_conf_change(); + + cluster_v1.must_put(b"k0", b"v0"); + cluster_v2.must_put(b"k0", b"v0"); + cluster_v1 + .pd_client + .must_add_peer(r11, new_learner_peer(2, 10)); + cluster_v2 + .pd_client + .must_add_peer(r21, new_learner_peer(2, 10)); + check_key_in_engine(&cluster_v1.get_engine(2), b"zk0", b"v0"); + check_key_in_engine(&cluster_v2.get_engine(2), b"zk0", b"v0"); + let trans1 = Mutex::new(cluster_v1.sim.read().unwrap().get_router(2).unwrap()); + let trans2 = Mutex::new(cluster_v2.sim.read().unwrap().get_router(1).unwrap()); + + let factory1 = ForwardFactory { + node_id: 1, + chain_send: Arc::new(move |m| { + info!("send to trans2"; "msg" => ?m); + let _ = trans2.lock().unwrap().send_raft_message(Box::new(m)); + }), + }; + cluster_v1.add_send_filter(factory1); + let factory2 = ForwardFactory { + node_id: 2, + chain_send: Arc::new(move |m| { + info!("send to trans1"; "msg" => ?m); + let _ = trans1.lock().unwrap().send_raft_message(m); + }), + }; + cluster_v2.add_send_filter(factory2); + let filter11 = Box::new( + RegionPacketFilter::new(r11, 2) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppend) + .msg_type(MessageType::MsgAppendResponse) + .msg_type(MessageType::MsgSnapshot) + .msg_type(MessageType::MsgHeartbeat) + .msg_type(MessageType::MsgHeartbeatResponse), + ); + cluster_v1.add_recv_filter_on_node(2, filter11); + + cluster_v2.must_put(b"k1", b"v1"); + assert_eq!( + cluster_v2.must_get(b"k1").unwrap(), + "v1".as_bytes().to_vec() + ); + check_key_in_engine(&cluster_v1.get_engine(2), b"zk1", b"v1"); + + cluster_v1.shutdown(); + cluster_v2.shutdown(); +} + +fn check_key_in_engine(engine: &T, key: &[u8], value: &[u8]) { + for _ in 0..10 { + if let Ok(Some(vec)) = engine.get_value(key) { + assert_eq!(vec.to_vec(), value.to_vec()); + return; + } + std::thread::sleep(Duration::from_millis(200)); + } + + panic!("cannot find key {:?} in engine", key); +} From 53a5f095e179f7fbaaaa0775871e1ce457dad251 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Fri, 28 Apr 2023 15:11:53 +0800 Subject: [PATCH 0672/1149] storage: add checksum logic in row slice, add cop and get test cases (#14611) ref tikv/tikv#14528 Signed-off-by: cfzjywxk --- components/test_coprocessor/src/fixture.rs | 93 ++++++- components/test_coprocessor/src/store.rs | 68 ++++- .../src/codec/data_type/scalar.rs | 36 +++ .../src/codec/row/v2/encoder_for_test.rs | 260 ++++++++++++++---- .../src/codec/row/v2/mod.rs | 1 + .../src/codec/row/v2/row_slice.rs | 160 ++++++++++- .../tidb_query_datatype/src/def/field_type.rs | 2 +- src/storage/mvcc/reader/point_getter.rs | 27 ++ tests/integrations/coprocessor/test_select.rs | 33 +++ 9 files changed, 618 insertions(+), 62 deletions(-) diff --git a/components/test_coprocessor/src/fixture.rs b/components/test_coprocessor/src/fixture.rs index a53ba4500bc..5e94d3e47fe 100644 --- a/components/test_coprocessor/src/fixture.rs +++ b/components/test_coprocessor/src/fixture.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::Context; use resource_metering::ResourceTagFactory; -use tidb_query_datatype::codec::Datum; +use tidb_query_datatype::codec::{row::v2::CODEC_VERSION, Datum}; use tikv::{ config::CoprReadPoolConfig, coprocessor::{readpool_impl, Endpoint}, @@ -71,6 +71,27 @@ pub fn init_data_with_engine_and_commit( init_data_with_details(ctx, engine, tbl, vals, commit, &Config::default()) } +pub fn init_data_with_engine_and_commit_v2_checksum( + ctx: Context, + engine: E, + tbl: &ProductTable, + vals: &[(i64, Option<&str>, i64)], + commit: bool, + with_checksum: bool, + extra_checksum: Option, +) -> (Store, Endpoint, Arc) { + init_data_with_details_v2_checksum( + ctx, + engine, + tbl, + vals, + commit, + &Config::default(), + with_checksum, + extra_checksum, + ) +} + pub fn init_data_with_details( ctx: Context, engine: E, @@ -78,6 +99,43 @@ pub fn init_data_with_details( vals: &[(i64, Option<&str>, i64)], commit: bool, cfg: &Config, +) -> (Store, Endpoint, Arc) { + init_data_with_details_impl(ctx, engine, tbl, vals, commit, cfg, 0, false, None) +} + +pub fn init_data_with_details_v2_checksum( + ctx: Context, + engine: E, + tbl: &ProductTable, + vals: &[(i64, Option<&str>, i64)], + commit: bool, + cfg: &Config, + with_checksum: bool, + extra_checksum: Option, +) -> (Store, Endpoint, Arc) { + init_data_with_details_impl( + ctx, + engine, + tbl, + vals, + commit, + cfg, + CODEC_VERSION, + with_checksum, + extra_checksum, + ) +} + +fn init_data_with_details_impl( + ctx: Context, + engine: E, + tbl: &ProductTable, + vals: &[(i64, Option<&str>, i64)], + commit: bool, + cfg: &Config, + codec_ver: u8, + with_checksum: bool, + extra_checksum: Option, ) -> (Store, Endpoint, Arc) { let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .build() @@ -86,12 +144,20 @@ pub fn init_data_with_details( store.begin(); for &(id, name, count) in vals { - store + let mut inserts = store .insert_into(tbl) .set(&tbl["id"], Datum::I64(id)) .set(&tbl["name"], name.map(str::as_bytes).into()) - .set(&tbl["count"], Datum::I64(count)) - .execute_with_ctx(ctx.clone()); + .set(&tbl["count"], Datum::I64(count)); + if codec_ver == CODEC_VERSION { + inserts = inserts + .set_v2(&tbl["id"], id.into()) + .set_v2(&tbl["name"], name.unwrap().into()) + .set_v2(&tbl["count"], count.into()); + inserts.execute_with_v2_checksum(ctx.clone(), with_checksum, extra_checksum); + } else { + inserts.execute_with_ctx(ctx.clone()); + } } if commit { store.commit_with_ctx(ctx); @@ -140,3 +206,22 @@ pub fn init_with_data_ext( ) -> (Store, Endpoint, Arc) { init_data_with_commit(tbl, vals, true) } + +pub fn init_data_with_commit_v2_checksum( + tbl: &ProductTable, + vals: &[(i64, Option<&str>, i64)], + with_checksum: bool, + extra_checksum: Option, +) -> (Store, Endpoint) { + let engine = TestEngineBuilder::new().build().unwrap(); + let (store, endpoint, _) = init_data_with_engine_and_commit_v2_checksum( + Context::default(), + engine, + tbl, + vals, + true, + with_checksum, + extra_checksum, + ); + (store, endpoint) +} diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 278e210bc98..96f405d8f39 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -6,7 +6,12 @@ use collections::HashMap; use kvproto::kvrpcpb::{Context, IsolationLevel}; use test_storage::SyncTestStorageApiV1; use tidb_query_datatype::{ - codec::{datum, table, Datum}, + codec::{ + data_type::ScalarValue, + datum, + row::v2::encoder_for_test::{Column as ColumnV2, RowEncoder}, + table, Datum, + }, expr::EvalContext, }; use tikv::{ @@ -26,6 +31,7 @@ pub struct Insert<'a, E: Engine> { store: &'a mut Store, table: &'a Table, values: BTreeMap, + values_v2: BTreeMap, } impl<'a, E: Engine> Insert<'a, E> { @@ -34,6 +40,7 @@ impl<'a, E: Engine> Insert<'a, E> { store, table, values: BTreeMap::new(), + values_v2: BTreeMap::new(), } } @@ -44,10 +51,26 @@ impl<'a, E: Engine> Insert<'a, E> { self } + pub fn set_v2(mut self, col: &Column, value: ScalarValue) -> Self { + assert!(self.table.column_by_id(col.id).is_some()); + self.values_v2.insert(col.id, value); + self + } + pub fn execute(self) -> i64 { self.execute_with_ctx(Context::default()) } + fn prepare_index_kv(&self, handle: &Datum, buf: &mut Vec<(Vec, Vec)>) { + for (&id, idxs) in &self.table.idxs { + let mut v: Vec<_> = idxs.iter().map(|id| self.values[id].clone()).collect(); + v.push(handle.clone()); + let encoded = datum::encode_key(&mut EvalContext::default(), &v).unwrap(); + let idx_key = table::encode_index_seek_key(self.table.id, id, &encoded); + buf.push((idx_key, vec![0])); + } + } + pub fn execute_with_ctx(self, ctx: Context) -> i64 { let handle = self .values @@ -59,13 +82,44 @@ impl<'a, E: Engine> Insert<'a, E> { let values: Vec<_> = self.values.values().cloned().collect(); let value = table::encode_row(&mut EvalContext::default(), values, &ids).unwrap(); let mut kvs = vec![(key, value)]; - for (&id, idxs) in &self.table.idxs { - let mut v: Vec<_> = idxs.iter().map(|id| self.values[id].clone()).collect(); - v.push(handle.clone()); - let encoded = datum::encode_key(&mut EvalContext::default(), &v).unwrap(); - let idx_key = table::encode_index_seek_key(self.table.id, id, &encoded); - kvs.push((idx_key, vec![0])); + self.prepare_index_kv(&handle, &mut kvs); + self.store.put(ctx, kvs); + handle.i64() + } + + pub fn execute_with_v2_checksum( + self, + ctx: Context, + with_checksum: bool, + extra_checksum: Option, + ) -> i64 { + let handle = self + .values + .get(&self.table.handle_id) + .cloned() + .unwrap_or_else(|| Datum::I64(next_id())); + let key = table::encode_row_key(self.table.id, handle.i64()); + let mut columns: Vec = Vec::new(); + for (id, value) in self.values_v2.iter() { + let col_info = self.table.column_by_id(*id).unwrap(); + columns.push(ColumnV2::new_with_ft( + *id, + col_info.as_field_type(), + value.to_owned(), + )); + } + let mut val_buf = Vec::new(); + if with_checksum { + val_buf + .write_row_with_checksum(&mut EvalContext::default(), columns, extra_checksum) + .unwrap(); + } else { + val_buf + .write_row(&mut EvalContext::default(), columns) + .unwrap(); } + let mut kvs = vec![(key, val_buf)]; + self.prepare_index_kv(&handle, &mut kvs); self.store.put(ctx, kvs); handle.i64() } diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index d476fd2d370..c74423107e4 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -162,6 +162,14 @@ impl From for ScalarValue { } } +impl From<&str> for ScalarValue { + #[inline] + fn from(s: &str) -> ScalarValue { + let bytes = Bytes::from(s); + ScalarValue::Bytes(Some(bytes)) + } +} + impl From for Option { #[inline] fn from(s: ScalarValue) -> Option { @@ -401,6 +409,34 @@ impl_as_ref! { Decimal, as_decimal } impl_as_ref! { DateTime, as_date_time } impl_as_ref! { Duration, as_duration } +impl ScalarValue { + #[inline] + pub fn as_enum(&self) -> Option> { + match self { + ScalarValue::Enum(x) => x.as_ref().map(|x| x.as_ref()), + other => panic!( + "Cannot cast {} scalar value into {}", + other.eval_type(), + stringify!(Int), + ), + } + } +} + +impl ScalarValue { + #[inline] + pub fn as_set(&self) -> Option> { + match self { + ScalarValue::Set(x) => x.as_ref().map(|x| x.as_ref()), + other => panic!( + "Cannot cast {} scalar value into {}", + other.eval_type(), + stringify!(Int), + ), + } + } +} + impl ScalarValue { #[inline] pub fn as_json(&self) -> Option> { diff --git a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs index 343f2520230..5ac1cad3b32 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/encoder_for_test.rs @@ -24,12 +24,13 @@ use std::{i16, i32, i8, u16, u32, u8}; use codec::prelude::*; +use num_traits::Zero; use tipb::FieldType; use crate::{ codec::{ data_type::ScalarValue, - mysql::{decimal::DecimalEncoder, json::JsonEncoder}, + mysql::{decimal::DecimalEncoder, json::JsonEncoder, Duration}, Error, Result, }, expr::EvalContext, @@ -63,6 +64,14 @@ impl Column { } } + pub fn new_with_ft(id: i64, ft: FieldType, value: impl Into) -> Self { + Column { + id, + ft, + value: value.into(), + } + } + pub fn ft(&self) -> &FieldType { &self.ft } @@ -88,6 +97,142 @@ impl Column { self.ft.as_mut_accessor().set_decimal(decimal); self } + + // The encode rule follows https://github.com/pingcap/tidb/pull/43141. + // It's different from the other encoding rules and used for verification + // test cases in tikv, the actual checksum encoding would be done on the + // tidb side with row value generation. + pub fn encode_for_checksum(&self, buf: &mut Vec) -> Result<()> { + match self.ft.as_accessor().tp() { + FieldTypeTp::Tiny + | FieldTypeTp::Short + | FieldTypeTp::Long + | FieldTypeTp::LongLong + | FieldTypeTp::Int24 + | FieldTypeTp::Year => { + let res = self.value.as_int().ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))?; + buf.write_u64_le(*res as u64)?; + } + FieldTypeTp::VarChar + | FieldTypeTp::VarString + | FieldTypeTp::String + | FieldTypeTp::TinyBlob + | FieldTypeTp::MediumBlob + | FieldTypeTp::LongBlob + | FieldTypeTp::Blob => { + let res = self.value.as_bytes().ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))?; + buf.write_u32_le(res.len() as u32)?; + buf.write_bytes(res)?; + } + FieldTypeTp::Timestamp + | FieldTypeTp::DateTime + | FieldTypeTp::Date + | FieldTypeTp::NewDate => { + let time = self + .value + .as_date_time() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))? + .to_numeric_string(); + buf.write_u32_le(time.len() as u32)?; + buf.write_bytes(time.as_bytes())?; + } + FieldTypeTp::Duration => { + let dur = self + .value + .as_duration() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))? + .to_numeric_string(); + buf.write_u32_le(dur.len() as u32)?; + buf.write_bytes(dur.as_bytes())?; + } + FieldTypeTp::Float | FieldTypeTp::Double => { + let mut val = self + .value + .as_real() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))? + .to_owned(); + if val.is_infinite() || val.is_nan() { + // Because ticdc has such a transform. + val.set_zero(); + } + buf.write_u64_le(val.to_bits())?; + } + FieldTypeTp::NewDecimal => { + let dec = self + .value + .as_decimal() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))? + .to_string(); + buf.write_u32_le(dec.len() as u32)?; + buf.write_bytes(dec.as_bytes())?; + } + FieldTypeTp::Enum => { + let res = self + .value + .as_enum() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft + )))? + .value(); + buf.write_u64_le(res)?; + } + FieldTypeTp::Set => { + let res = self + .value + .as_set() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft + )))? + .value(); + buf.write_u64_le(res)?; + } + FieldTypeTp::Bit => { + // TODO: it's not supported yet. In current test only `INT` and `Varchar` + // types would be used. + buf.write_u64_le(u64::MAX)?; + } + FieldTypeTp::Json => { + let res = self + .value + .as_json() + .ok_or(Error::InvalidDataType(format!( + "invalid type: {:?}", + self.ft, + )))? + .to_string(); + buf.write_u32_le(res.len() as u32)?; + buf.write_bytes(res.as_bytes())?; + } + FieldTypeTp::Null | FieldTypeTp::Geometry => {} + _ => { + return Err(Error::Other(box_err!( + "unsupported type {:?}", + self.ft.as_accessor().tp() + ))); + } + }; + Ok(()) + } } /// Checksum @@ -97,8 +242,8 @@ impl Column { /// - CHECKSUM(4 bytes) /// - little-endian CRC32(IEEE) when hdr.ver = 0 (default) pub trait ChecksumHandler { - // update_col updates the checksum with the encoded value of the column. - fn checksum(&mut self, buf: &[u8]) -> Result<()>; + // checksum calculates the checksum value according to the input column values. + fn checksum(&mut self, cols: &[Column]) -> Result<()>; // header_value returns the checksum header value. fn header_value(&self) -> u8; @@ -110,11 +255,31 @@ pub trait ChecksumHandler { pub struct Crc32RowChecksumHandler { header: ChecksumHeader, hasher: crc32fast::Hasher, + buf: Vec, +} + +fn get_non_null_columns(cols: &[Column]) -> Vec { + let mut res = vec![]; + for col in cols { + if col.value.is_some() { + res.push(col.clone()); + } + } + res.sort_by_key(|c| c.id); + res } impl ChecksumHandler for Crc32RowChecksumHandler { - fn checksum(&mut self, buf: &[u8]) -> Result<()> { - self.hasher.update(buf); + fn checksum(&mut self, cols: &[Column]) -> Result<()> { + // For testing purposes, the DDL compatibility was not fully considered for + // checksum calculation, using all non-null columns regardless of the column's + // DDL status, such as write-reorg. + // Reference: https://github.com/pingcap/tidb/pull/43141. + for col in get_non_null_columns(cols) { + self.buf.clear(); + col.encode_for_checksum(&mut self.buf)?; + self.hasher.update(self.buf.as_slice()); + } Ok(()) } @@ -154,6 +319,7 @@ impl Crc32RowChecksumHandler { let mut res = Crc32RowChecksumHandler { header: ChecksumHeader::new(), hasher: crc32fast::Hasher::new(), + buf: Vec::new(), }; if has_extra_checksum { res.header.set_extra_checksum(); @@ -181,14 +347,15 @@ pub trait RowEncoder: NumberEncoder { extra_checksum: Option, ) -> Result<()> { let mut handler = Crc32RowChecksumHandler::new(extra_checksum.is_some()); - self.write_row_impl(ctx, columns, Some(&mut handler), extra_checksum) + handler.checksum(&columns)?; + self.write_row_impl(ctx, columns, Some(&handler), extra_checksum) } fn write_row_impl( &mut self, ctx: &mut EvalContext, columns: Vec, - mut checksum_handler: Option<&mut dyn ChecksumHandler>, + checksum_handler: Option<&dyn ChecksumHandler>, extra_checksum: Option, ) -> Result<()> { let mut is_big = false; @@ -225,7 +392,7 @@ pub trait RowEncoder: NumberEncoder { // encode begins self.write_u8(super::CODEC_VERSION)?; - self.write_flag(is_big)?; + self.write_flag(is_big, checksum_handler.is_some())?; self.write_u16_le(non_null_ids.len() as u16)?; self.write_u16_le(null_ids.len() as u16)?; @@ -241,9 +408,8 @@ pub trait RowEncoder: NumberEncoder { self.write_bytes(&offset_wtr)?; self.write_bytes(&value_wtr)?; - if let Some(checksum_handler) = checksum_handler.as_mut() { + if let Some(checksum_handler) = checksum_handler { let header_val = checksum_handler.header_value(); - checksum_handler.checksum(value_wtr.as_slice())?; let val = checksum_handler.value(); self.write_u8(header_val)?; self.write_u32_le(val)?; @@ -256,11 +422,12 @@ pub trait RowEncoder: NumberEncoder { } #[inline] - fn write_flag(&mut self, is_big: bool) -> codec::Result<()> { - let flag = if is_big { - super::Flags::BIG - } else { - super::Flags::default() + fn write_flag(&mut self, is_big: bool, has_checksum: bool) -> codec::Result<()> { + let flag = match (is_big, has_checksum) { + (true, true) => super::Flags::BIG | super::Flags::WITH_CHECKSUM, + (true, false) => super::Flags::BIG, + (false, true) => super::Flags::WITH_CHECKSUM, + (false, false) => super::Flags::default(), }; self.write_u8(flag.bits) } @@ -334,6 +501,26 @@ pub trait ScalarValueEncoder: NumberEncoder + DecimalEncoder + JsonEncoder { } impl ScalarValueEncoder for T {} +// This is a helper function for test. +pub fn prepare_cols_for_test() -> Vec { + vec![ + Column::new_with_ft(1, FieldType::from(FieldTypeTp::Short), 1000), + Column::new_with_ft(12, FieldType::from(FieldTypeTp::Long), 2), + Column::new_with_ft( + 335, + FieldType::from(FieldTypeTp::Short), + ScalarValue::Int(None), + ), + Column::new_with_ft(3, FieldType::from(FieldTypeTp::Float), 3.55), + Column::new_with_ft(8, FieldType::from(FieldTypeTp::VarChar), b"abc".to_vec()), + Column::new_with_ft( + 17, + FieldType::from(FieldTypeTp::Duration), + Duration::from_millis(34, 2).unwrap(), + ), + ] +} + #[cfg(test)] mod tests { use std::str::FromStr; @@ -346,7 +533,8 @@ mod tests { data_type::ScalarValue, mysql::{duration::NANOS_PER_SEC, Decimal, Duration, Json, Time}, row::v2::encoder_for_test::{ - ChecksumHandler, Crc32RowChecksumHandler, ScalarValueEncoder, + get_non_null_columns, prepare_cols_for_test, ChecksumHandler, + Crc32RowChecksumHandler, }, }, expr::EvalContext, @@ -423,48 +611,30 @@ mod tests { #[test] fn test_encode_checksum() { - let encode_col_values = |ctx: &mut EvalContext, non_null_cols: Vec| -> Vec { + let encode_col_values = |non_null_cols: Vec| -> Vec { let mut res = vec![]; for col in non_null_cols { - res.write_value(ctx, &col).unwrap(); - } - res - }; - let get_non_null_columns = |cols: &Vec| -> Vec { - let mut res = vec![]; - for col in cols { - if col.value.is_some() { - res.push(col.clone()); - } + col.encode_for_checksum(&mut res).unwrap(); } - res.sort_by_key(|c| c.id); res }; - let cols = vec![ - Column::new(1, 1000), - Column::new(12, 2), - Column::new(335, ScalarValue::Int(None)), - Column::new(3, 3), - Column::new(8, 32767), - ]; + let cols = prepare_cols_for_test(); let mut buf = vec![]; let mut handler = Crc32RowChecksumHandler::new(false); handler.header.set_version(0); + handler.checksum(&cols).unwrap(); buf.write_row_impl( &mut EvalContext::default(), cols.clone(), - Some(&mut handler), + Some(&handler), None, ) .unwrap(); let exp = { let mut hasher = crc32fast::Hasher::new(); - hasher.update( - encode_col_values(&mut EvalContext::default(), get_non_null_columns(&cols)) - .as_slice(), - ); + hasher.update(encode_col_values(get_non_null_columns(&cols)).as_slice()); hasher.finalize() }; let mut val_slice = &buf[buf.len() - 4..]; @@ -475,13 +645,9 @@ mod tests { buf.clear(); let mut handler = Crc32RowChecksumHandler::new(true); handler.header.set_version(1); - buf.write_row_impl( - &mut EvalContext::default(), - cols, - Some(&mut handler), - Some(exp), - ) - .unwrap(); + handler.checksum(&cols).unwrap(); + buf.write_row_impl(&mut EvalContext::default(), cols, Some(&handler), Some(exp)) + .unwrap(); let mut val_slice = &buf[buf.len() - 4..]; let mut extra_val_slice = &buf[buf.len() - 8..buf.len() - 4]; assert_eq!(exp, handler.value()); diff --git a/components/tidb_query_datatype/src/codec/row/v2/mod.rs b/components/tidb_query_datatype/src/codec/row/v2/mod.rs index b0cec291410..d7a6578f74d 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/mod.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/mod.rs @@ -11,6 +11,7 @@ bitflags! { #[derive(Default)] struct Flags: u8 { const BIG = 1; + const WITH_CHECKSUM = 2; } } diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index 5d0c7329d54..da117c96e2c 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -10,6 +10,7 @@ use num_traits::PrimInt; use crate::codec::{Error, Result}; +#[derive(Debug)] pub enum RowSlice<'a> { Small { origin: &'a [u8], @@ -17,6 +18,7 @@ pub enum RowSlice<'a> { null_ids: LeBytes<'a, u8>, offsets: LeBytes<'a, u16>, values: LeBytes<'a, u8>, + checksum: Option, }, Big { origin: &'a [u8], @@ -24,9 +26,49 @@ pub enum RowSlice<'a> { null_ids: LeBytes<'a, u32>, offsets: LeBytes<'a, u32>, values: LeBytes<'a, u8>, + checksum: Option, }, } +/// Checksum +/// - HEADER(1 byte) +/// - VER: version(3 bit) +/// - E: has extra checksum +/// - CHECKSUM(4 bytes) +/// - little-endian CRC32(IEEE) when hdr.ver = 0 (default) +#[derive(Copy, Clone, Debug)] +pub struct Checksum { + header: u8, + val: u32, + extra_val: u32, +} + +impl Checksum { + fn new(header: u8, val: u32) -> Self { + Self { + header, + val, + extra_val: 0, + } + } + + pub fn get_checksum_val(&self) -> u32 { + self.val + } + + pub fn has_extra_checksum(&self) -> bool { + (self.header & 0b1000) > 0 + } + + fn set_extra_checksum(&mut self, extra_val: u32) { + self.extra_val = extra_val; + } + + pub fn get_extra_checksum_val(&self) -> u32 { + self.extra_val + } +} + impl RowSlice<'_> { /// # Panics /// @@ -34,18 +76,21 @@ impl RowSlice<'_> { pub fn from_bytes(mut data: &[u8]) -> Result> { let origin = data; assert_eq!(data.read_u8()?, super::CODEC_VERSION); - let is_big = super::Flags::from_bits_truncate(data.read_u8()?) == super::Flags::BIG; + let flags = super::Flags::from_bits_truncate(data.read_u8()?); + let is_big = flags.contains(super::Flags::BIG); + let with_checksum = flags.contains(super::Flags::WITH_CHECKSUM); // read ids count let non_null_cnt = data.read_u16_le()? as usize; let null_cnt = data.read_u16_le()? as usize; - let row = if is_big { + let mut row = if is_big { RowSlice::Big { origin, non_null_ids: read_le_bytes(&mut data, non_null_cnt)?, null_ids: read_le_bytes(&mut data, null_cnt)?, offsets: read_le_bytes(&mut data, non_null_cnt)?, values: LeBytes::new(data), + checksum: None, } } else { RowSlice::Small { @@ -54,7 +99,20 @@ impl RowSlice<'_> { null_ids: read_le_bytes(&mut data, null_cnt)?, offsets: read_le_bytes(&mut data, non_null_cnt)?, values: LeBytes::new(data), + checksum: None, + } + }; + if with_checksum { + let mut checksum_bytes = row.cut_checksum_bytes(non_null_cnt); + assert!(checksum_bytes.len() == 5 || checksum_bytes.len() == 9); + let header = checksum_bytes.read_u8()?; + let val = checksum_bytes.read_u32_le()?; + let mut checksum = Checksum::new(header, val); + if checksum.has_extra_checksum() { + let extra_val = checksum_bytes.read_u32_le()?; + checksum.set_extra_checksum(extra_val); } + row.set_checksum(Some(checksum)); }; Ok(row) } @@ -166,6 +224,46 @@ impl RowSlice<'_> { Ok(None) } } + + #[inline] + // Return the checksum byte slice, remove it from the `values` field of + // `RowSlice`. + pub fn cut_checksum_bytes(&mut self, non_null_col_num: usize) -> &[u8] { + match self { + RowSlice::Big { + offsets, values, .. + } => { + let last_slice_idx = offsets.get(non_null_col_num - 1).unwrap() as usize; + let slice = values.slice; + *values = LeBytes::new(&slice[..last_slice_idx]); + &slice[last_slice_idx..] + } + RowSlice::Small { + offsets, values, .. + } => { + let last_slice_idx = offsets.get(non_null_col_num - 1).unwrap() as usize; + let slice = values.slice; + *values = LeBytes::new(&slice[..last_slice_idx]); + &slice[last_slice_idx..] + } + } + } + + #[inline] + pub fn get_checksum(&self) -> Option { + match self { + RowSlice::Big { checksum, .. } => *checksum, + RowSlice::Small { checksum, .. } => *checksum, + } + } + + #[inline] + fn set_checksum(&mut self, checksum_input: Option) { + match self { + RowSlice::Big { checksum, .. } => *checksum = checksum_input, + RowSlice::Small { checksum, .. } => *checksum = checksum_input, + } + } } /// Decodes `len` number of ints from `buf` in little endian @@ -189,6 +287,7 @@ where } #[cfg(target_endian = "little")] +#[derive(Debug)] pub struct LeBytes<'a, T: PrimInt> { slice: &'a [u8], _marker: PhantomData, @@ -255,12 +354,17 @@ mod tests { use std::u16; use codec::prelude::NumberEncoder; + use tipb::FieldType; use super::{ super::encoder_for_test::{Column, RowEncoder}, read_le_bytes, RowSlice, }; - use crate::{codec::data_type::ScalarValue, expr::EvalContext}; + use crate::{ + codec::data_type::{Duration, ScalarValue}, + expr::EvalContext, + FieldTypeTp, + }; #[test] fn test_read_le_bytes() { @@ -354,6 +458,56 @@ mod tests { assert!(!row.search_in_null_ids(0xFF0021)); assert!(!row.search_in_null_ids(0xFF00000021)); } + + fn encoded_data_with_checksum(extra_checksum: Option, null_row_id: i64) -> Vec { + let cols = vec![ + Column::new_with_ft(1, FieldType::from(FieldTypeTp::Short), 1000), + Column::new_with_ft(12, FieldType::from(FieldTypeTp::Long), 2), + Column::new_with_ft( + null_row_id, + FieldType::from(FieldTypeTp::Short), + ScalarValue::Int(None), + ), + Column::new_with_ft(3, FieldType::from(FieldTypeTp::Float), 3.55), + Column::new_with_ft(8, FieldType::from(FieldTypeTp::VarChar), b"abc".to_vec()), + Column::new_with_ft( + 17, + FieldType::from(FieldTypeTp::Duration), + Duration::from_millis(34, 2).unwrap(), + ), + ]; + let mut buf = vec![]; + buf.write_row_with_checksum(&mut EvalContext::default(), cols, extra_checksum) + .unwrap(); + buf + } + + #[test] + fn test_decode_with_checksum() { + for null_row_id in [235, 355] { + for extra_checksum in [None, Some(37217)] { + let data = encoded_data_with_checksum(extra_checksum, null_row_id); + let row = RowSlice::from_bytes(&data).unwrap(); + assert_eq!(null_row_id > 255, row.is_big()); + assert_eq!(Some((0, 2)), row.search_in_non_null_ids(1).unwrap()); + assert_eq!(Some((2, 10)), row.search_in_non_null_ids(3).unwrap()); + assert_eq!(Some((10, 13)), row.search_in_non_null_ids(8).unwrap()); + assert_eq!(Some((13, 14)), row.search_in_non_null_ids(12).unwrap()); + assert_eq!(Some((14, 18)), row.search_in_non_null_ids(17).unwrap()); + assert_eq!(None, row.search_in_non_null_ids(235).unwrap()); + assert!(row.search_in_null_ids(null_row_id)); + assert!(!row.search_in_null_ids(8)); + + let checksum = row.get_checksum().unwrap(); + assert!(checksum.get_checksum_val() > 0); + assert_eq!(extra_checksum.is_some(), checksum.has_extra_checksum()); + assert_eq!( + extra_checksum.unwrap_or(0), + checksum.get_extra_checksum_val() + ); + } + } + } } #[cfg(test)] diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index 903ec738e89..e8debe626f7 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -50,7 +50,7 @@ pub enum FieldTypeTp { } impl FieldTypeTp { - fn from_i32(i: i32) -> Option { + pub fn from_i32(i: i32) -> Option { if (i >= FieldTypeTp::Unspecified as i32 && i <= FieldTypeTp::Bit as i32) || (i >= FieldTypeTp::Json as i32 && i <= FieldTypeTp::Geometry as i32) { diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 2f215986ca9..8b9399b7d05 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -415,6 +415,13 @@ impl PointGetter { mod tests { use engine_rocks::ReadPerfInstant; use kvproto::kvrpcpb::{Assertion, AssertionLevel, PrewriteRequestPessimisticAction::*}; + use tidb_query_datatype::{ + codec::row::v2::{ + encoder_for_test::{prepare_cols_for_test, RowEncoder}, + RowSlice, + }, + expr::EvalContext, + }; use txn_types::SHORT_VALUE_MAX_LEN; use super::*; @@ -1289,4 +1296,24 @@ mod tests { assert_eq!(s.write.next, 0); assert_eq!(s.write.get, 0); } + + #[test] + fn test_point_get_with_checksum() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let k = b"k"; + let mut val_buf = Vec::new(); + let columns = prepare_cols_for_test(); + val_buf + .write_row_with_checksum(&mut EvalContext::default(), columns, Some(123)) + .unwrap(); + + must_prewrite_put(&mut engine, k, val_buf.as_slice(), k, 1); + must_commit(&mut engine, k, 1, 2); + + let mut getter = new_point_getter(&mut engine, 40.into()); + let val = getter.get(&Key::from_raw(k)).unwrap().unwrap(); + assert_eq!(val, val_buf.as_slice()); + let row_slice = RowSlice::from_bytes(val.as_slice()).unwrap(); + assert!(row_slice.get_checksum().unwrap().get_checksum_val() > 0); + } } diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index d5f8d55e320..8c29ea8490d 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2054,6 +2054,39 @@ fn test_buckets() { wait_refresh_buckets(0); } +#[test] +fn test_select_v2_format_with_checksum() { + let data = vec![ + (1, Some("name:0"), 2), + (2, Some("name:4"), 3), + (4, Some("name:3"), 1), + (5, Some("name:1"), 4), + (9, Some("name:8"), 7), + (10, Some("name:6"), 8), + ]; + + let product = ProductTable::new(); + for extra_checksum in [None, Some(132423)] { + // The row value encoded with checksum bytes should have no impact on cop task + // processing and related result chunk filling. + let (_, endpoint) = + init_data_with_commit_v2_checksum(&product, &data, true, extra_checksum); + let req = DagSelect::from(&product).build(); + let mut resp = handle_select(&endpoint, req); + let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); + for (row, (id, name, cnt)) in spliter.zip(data.clone()) { + let name_datum = name.map(|s| s.as_bytes()).into(); + let expected_encoded = datum::encode_value( + &mut EvalContext::default(), + &[Datum::I64(id), name_datum, cnt.into()], + ) + .unwrap(); + let result_encoded = datum::encode_value(&mut EvalContext::default(), &row).unwrap(); + assert_eq!(result_encoded, &*expected_encoded); + } + } +} + #[test] fn test_batch_request() { let data = vec![ From 63d79d3cec8175dad0164ad6f7b390b573a54652 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 28 Apr 2023 18:13:52 +0800 Subject: [PATCH 0673/1149] raftstore-v2: support dynamic config write buffer settings (#14565) ref tikv/tikv#12842 support dynamically adjusting write buffer settings Signed-off-by: tabokie Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_panic/src/db_options.rs | 8 ++++ components/engine_rocks/src/db_options.rs | 18 +++++++++ components/engine_traits/src/db_options.rs | 2 + components/tikv_util/src/config.rs | 2 +- src/config/configurable.rs | 46 ++++++++++++++++++++-- src/config/mod.rs | 16 +++++++- 6 files changed, 85 insertions(+), 7 deletions(-) diff --git a/components/engine_panic/src/db_options.rs b/components/engine_panic/src/db_options.rs index 47ce356deac..c081a5c1d12 100644 --- a/components/engine_panic/src/db_options.rs +++ b/components/engine_panic/src/db_options.rs @@ -44,6 +44,14 @@ impl DbOptions for PanicDbOptions { panic!() } + fn set_flush_size(&mut self, f: usize) -> Result<()> { + panic!() + } + + fn set_flush_oldest_first(&mut self, f: bool) -> Result<()> { + panic!() + } + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { panic!() } diff --git a/components/engine_rocks/src/db_options.rs b/components/engine_rocks/src/db_options.rs index f437cc7b433..c9ef2cfda98 100644 --- a/components/engine_rocks/src/db_options.rs +++ b/components/engine_rocks/src/db_options.rs @@ -91,6 +91,24 @@ impl DbOptions for RocksDbOptions { Ok(()) } + fn set_flush_size(&mut self, f: usize) -> Result<()> { + if let Some(m) = self.0.get_write_buffer_manager() { + m.set_flush_size(f); + } else { + return Err(box_err!("write buffer manager not found")); + } + Ok(()) + } + + fn set_flush_oldest_first(&mut self, f: bool) -> Result<()> { + if let Some(m) = self.0.get_write_buffer_manager() { + m.set_flush_oldest_first(f); + } else { + return Err(box_err!("write buffer manager not found")); + } + Ok(()) + } + fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions) { self.0.set_titandb_options(opts.as_raw()) } diff --git a/components/engine_traits/src/db_options.rs b/components/engine_traits/src/db_options.rs index fcfc17ea78f..2c6e9c3d4e8 100644 --- a/components/engine_traits/src/db_options.rs +++ b/components/engine_traits/src/db_options.rs @@ -20,6 +20,8 @@ pub trait DbOptions { fn set_rate_bytes_per_sec(&mut self, rate_bytes_per_sec: i64) -> Result<()>; fn get_rate_limiter_auto_tuned(&self) -> Option; fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()>; + fn set_flush_size(&mut self, f: usize) -> Result<()>; + fn set_flush_oldest_first(&mut self, f: bool) -> Result<()>; fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions); } diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index c55cebea0ff..c3ace2a5dfe 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -255,7 +255,7 @@ impl<'de> Deserialize<'de> for ReadableSize { } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd, Default)] pub struct ReadableDuration(pub Duration); impl Add for ReadableDuration { diff --git a/src/config/configurable.rs b/src/config/configurable.rs index 7cbcc731eb6..142d14a0304 100644 --- a/src/config/configurable.rs +++ b/src/config/configurable.rs @@ -14,6 +14,8 @@ pub trait ConfigurableDb { fn set_cf_config(&self, cf: &str, opts: &[(&str, &str)]) -> ConfigRes; fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes; fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes; + fn set_flush_size(&self, f: usize) -> ConfigRes; + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes; fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes; } @@ -49,6 +51,16 @@ impl ConfigurableDb for RocksEngine { } } + fn set_flush_size(&self, f: usize) -> ConfigRes { + let mut opt = self.get_db_options(); + opt.set_flush_size(f).map_err(Box::from) + } + + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes { + let mut opt = self.get_db_options(); + opt.set_flush_oldest_first(f).map_err(Box::from) + } + fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes { let opt = self.get_options_cf(CF_DEFAULT).unwrap(); // FIXME unwrap opt.set_block_cache_capacity(capacity as u64) @@ -113,18 +125,44 @@ impl ConfigurableDb for TabletRegistry { fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes { loop_registry(self, |cache| { if let Some(latest) = cache.latest() { - latest.set_rate_bytes_per_sec(rate_bytes_per_sec)? + latest.set_rate_bytes_per_sec(rate_bytes_per_sec)?; + Ok(false) + } else { + Ok(true) } - Ok(true) }) } fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes { loop_registry(self, |cache| { if let Some(latest) = cache.latest() { - latest.set_rate_limiter_auto_tuned(auto_tuned)? + latest.set_rate_limiter_auto_tuned(auto_tuned)?; + Ok(false) + } else { + Ok(true) + } + }) + } + + fn set_flush_size(&self, f: usize) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_flush_size(f)?; + Ok(false) + } else { + Ok(true) + } + }) + } + + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_flush_oldest_first(f)?; + Ok(false) + } else { + Ok(true) } - Ok(true) }) } diff --git a/src/config/mod.rs b/src/config/mod.rs index 62a7de89130..7284fef25db 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1228,13 +1228,11 @@ pub struct DbConfig { pub enable_unordered_write: bool, #[online_config(skip)] pub allow_concurrent_memtable_write: Option, - #[online_config(skip)] pub write_buffer_limit: Option, #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] pub write_buffer_stall_ratio: f32, - #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] pub write_buffer_flush_oldest_first: bool, @@ -1958,6 +1956,20 @@ impl ConfigManager for DbConfigManger { .set_rate_limiter_auto_tuned(rate_limiter_auto_tuned)?; } + if let Some(size) = change + .drain_filter(|(name, _)| name == "write_buffer_limit") + .next() + { + self.db.set_flush_size(size.1.into())?; + } + + if let Some(f) = change + .drain_filter(|(name, _)| name == "write_buffer_flush_oldest_first") + .next() + { + self.db.set_flush_oldest_first(f.1.into())?; + } + if let Some(background_jobs_config) = change .drain_filter(|(name, _)| name == "max_background_jobs") .next() From d5c01113daa5a25d969020d852c9d098eb9ce749 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 5 May 2023 10:35:55 +0800 Subject: [PATCH 0674/1149] raftstore: pub snapshot_meta (#14674) ref tikv/tikv#14575 Make snapshot_meta accessible Signed-off-by: CalvinNeo --- components/raftstore/src/store/snap.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 4f347002f67..62744501195 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1040,6 +1040,10 @@ impl Snapshot { pub fn tablet_snap_path(&self) -> Option { Some(self.meta_file.meta.as_ref()?.tablet_snap_path.clone()) } + + pub fn snapshot_meta(&self) -> &Option { + &self.meta_file.meta + } } impl fmt::Debug for Snapshot { From 28b8c670d878ebf8c61bb7524367c94934c583a3 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 4 May 2023 22:45:55 -0700 Subject: [PATCH 0675/1149] status_api: add get_engine_type api (#14651) ref tikv/tikv#12842 Add engine_type API in status server. Returns "raft-kv" if it's v1 and "partitioned-raft-kv" if it's v2. Signed-off-by: tonyxuqqi --- src/config/mod.rs | 7 +++++ src/server/status_server/mod.rs | 53 +++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/src/config/mod.rs b/src/config/mod.rs index 7284fef25db..a29dcf5b9f1 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -4337,6 +4337,13 @@ impl ConfigController { pub fn get_current(&self) -> TikvConfig { self.inner.read().unwrap().current.clone() } + + pub fn get_engine_type(&self) -> &'static str { + if self.get_current().storage.engine == EngineType::RaftKv2 { + return "partitioned-raft-kv"; + } + "raft-kv" + } } #[cfg(test)] diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 2ce7a8714c0..1b689138f11 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -411,6 +411,16 @@ where } } + async fn get_engine_type(cfg_controller: &ConfigController) -> hyper::Result> { + let engine_type = cfg_controller.get_engine_type(); + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("Content-Length", engine_type.len()) + .body(engine_type.into()) + .unwrap(); + Ok(response) + } + pub fn stop(self) { let _ = self.tx.send(()); self.thread_pool.shutdown_timeout(Duration::from_secs(3)); @@ -609,6 +619,9 @@ where (Method::POST, "/config") => { Self::update_config(cfg_controller.clone(), req).await } + (Method::GET, "/engine_type") => { + Self::get_engine_type(&cfg_controller).await + } // This interface is used for configuration file hosting scenarios, // TiKV will not update configuration files, and this interface will // silently ignore configration items that cannot be updated online, @@ -1024,6 +1037,7 @@ mod tests { use crate::{ config::{ConfigController, TikvConfig}, server::status_server::{profile::TEST_PROFILE_MUTEX, LogLevelRequest, StatusServer}, + storage::config::EngineType, }; #[derive(Clone)] @@ -1573,4 +1587,43 @@ mod tests { block_on(handle).unwrap(); status_server.stop(); } + + #[test] + fn test_get_engine_type() { + let mut multi_rocks_cfg = TikvConfig::default(); + multi_rocks_cfg.storage.engine = EngineType::RaftKv2; + let cfgs = [TikvConfig::default(), multi_rocks_cfg]; + let resp_strs = ["raft-kv", "partitioned-raft-kv"]; + for (cfg, resp_str) in IntoIterator::into_iter(cfgs).zip(resp_strs) { + let temp_dir = tempfile::TempDir::new().unwrap(); + let mut status_server = StatusServer::new( + 1, + ConfigController::new(cfg), + Arc::new(SecurityConfig::default()), + MockRouter, + temp_dir.path().to_path_buf(), + None, + ) + .unwrap(); + let addr = "127.0.0.1:0".to_owned(); + let _ = status_server.start(addr); + let client = Client::new(); + let uri = Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/engine_type") + .build() + .unwrap(); + + let handle = status_server.thread_pool.spawn(async move { + let res = client.get(uri).await.unwrap(); + assert_eq!(res.status(), StatusCode::OK); + let body_bytes = hyper::body::to_bytes(res.into_body()).await.unwrap(); + let engine_type = String::from_utf8(body_bytes.as_ref().to_owned()).unwrap(); + assert_eq!(engine_type, resp_str); + }); + block_on(handle).unwrap(); + status_server.stop(); + } + } } From 487212c7ea4521fb3b43aa60e008f8e307cd7b02 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Sat, 6 May 2023 14:40:56 +0800 Subject: [PATCH 0676/1149] tikv control for raftstore v2 (#14402) ref tikv/tikv#14654 implement tikv control for raftstore v2 Signed-off-by: Spade A Signed-off-by: SpadeA-Tang --- cmd/tikv-ctl/src/executor.rs | 167 ++++- src/server/debug.rs | 2 +- src/server/debug2.rs | 992 ++++++++++++++++++++++++++ src/server/mod.rs | 1 + src/storage/mvcc/consistency_check.rs | 4 +- src/storage/mvcc/mod.rs | 4 +- tests/integrations/server/debugger.rs | 163 +++++ tests/integrations/server/mod.rs | 1 + 8 files changed, 1322 insertions(+), 12 deletions(-) create mode 100644 src/server/debug2.rs create mode 100644 tests/integrations/server/debugger.rs diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index df095e44425..69ac2dc1058 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1,13 +1,15 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::ToOwned, cmp::Ordering, pin::Pin, str, string::ToString, sync::Arc, time::Duration, u64, + borrow::ToOwned, cmp::Ordering, path::Path, pin::Pin, str, string::ToString, sync::Arc, + time::Duration, u64, }; use encryption_export::data_key_manager_from_config; use engine_rocks::util::{db_exist, new_engine_opt}; use engine_traits::{ - Engines, Error as EngineError, RaftEngine, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, + Engines, Error as EngineError, RaftEngine, TabletRegistry, ALL_CFS, CF_DEFAULT, CF_LOCK, + CF_WRITE, DATA_CFS, }; use futures::{executor::block_on, future, stream, Stream, StreamExt, TryStreamExt}; use grpcio::{ChannelBuilder, Environment}; @@ -25,12 +27,16 @@ use raft_log_engine::RaftLogEngine; use raftstore::store::{util::build_key_range, INIT_EPOCH_CONF_VER}; use security::SecurityManager; use serde_json::json; +use server::fatal; +use slog_global::crit; use tikv::{ config::{ConfigController, TikvConfig}, server::{ debug::{BottommostLevelCompaction, Debugger, RegionInfo}, + debug2::DebuggerV2, KvEngineFactoryBuilder, }, + storage::config::EngineType, }; use tikv_util::escape; @@ -72,13 +78,10 @@ pub fn new_debug_executor( let factory = KvEngineFactoryBuilder::new(env.clone(), cfg, cache) .lite(true) .build(); - let kv_db = match factory.create_shared_db(data_dir) { - Ok(db) => db, - Err(e) => handle_engine_error(e), - }; let cfg_controller = ConfigController::default(); if !cfg.raft_engine.enable { + assert_eq!(EngineType::RaftKv, cfg.storage.engine); let raft_db_opts = cfg.raftdb.build_opt(env, None); let raft_db_cf_opts = cfg.raftdb.build_cf_opts(factory.block_cache()); let raft_path = cfg.infer_raft_db_path(Some(data_dir)).unwrap(); @@ -90,6 +93,12 @@ pub fn new_debug_executor( Ok(db) => db, Err(e) => handle_engine_error(e), }; + + let kv_db = match factory.create_shared_db(data_dir) { + Ok(db) => db, + Err(e) => handle_engine_error(e), + }; + let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); Box::new(debugger) as Box } else { @@ -100,8 +109,24 @@ pub fn new_debug_executor( tikv_util::logger::exit_process_gracefully(-1); } let raft_db = RaftLogEngine::new(config, key_manager, None /* io_rate_limiter */).unwrap(); - let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); - Box::new(debugger) as Box + match cfg.storage.engine { + EngineType::RaftKv => { + let kv_db = match factory.create_shared_db(data_dir) { + Ok(db) => db, + Err(e) => handle_engine_error(e), + }; + + let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); + Box::new(debugger) as Box + } + EngineType::RaftKv2 => { + let registry = + TabletRegistry::new(Box::new(factory), Path::new(data_dir).join("tablets")) + .unwrap_or_else(|e| fatal!("failed to create tablet registry {:?}", e)); + let debugger = DebuggerV2::new(registry, raft_db, cfg_controller); + Box::new(debugger) as Box + } + } } } @@ -1117,3 +1142,129 @@ fn handle_engine_error(err: EngineError) -> ! { tikv_util::logger::exit_process_gracefully(-1); } + +impl DebugExecutor for DebuggerV2 { + fn check_local_mode(&self) {} + + fn get_all_regions_in_store(&self) -> Vec { + self.get_all_regions_in_store() + .unwrap_or_else(|e| perror_and_exit("Debugger::get_all_regions_in_store", e)) + } + + fn get_value_by_key(&self, cf: &str, key: Vec) -> Vec { + self.get(DbType::Kv, cf, &key) + .unwrap_or_else(|e| perror_and_exit("Debugger::get", e)) + } + + fn get_region_size(&self, region: u64, cfs: Vec<&str>) -> Vec<(String, usize)> { + self.region_size(region, cfs) + .unwrap_or_else(|e| perror_and_exit("Debugger::region_size", e)) + .into_iter() + .map(|(cf, size)| (cf.to_owned(), size)) + .collect() + } + + fn get_region_info(&self, region: u64) -> RegionInfo { + self.region_info(region) + .unwrap_or_else(|e| perror_and_exit("Debugger::region_info", e)) + } + + fn get_raft_log(&self, region: u64, index: u64) -> Entry { + self.raft_log(region, index) + .unwrap_or_else(|e| perror_and_exit("Debugger::raft_log", e)) + } + + fn get_mvcc_infos(&self, from: Vec, to: Vec, limit: u64) -> MvccInfoStream { + let iter = self + .scan_mvcc(&from, &to, limit) + .unwrap_or_else(|e| perror_and_exit("Debugger::scan_mvcc", e)); + let stream = stream::iter(iter).map_err(|e| e.to_string()); + Box::pin(stream) + } + + fn raw_scan_impl(&self, _from_key: &[u8], _end_key: &[u8], _limit: usize, _cf: &str) { + unimplemented!() + } + + fn do_compaction( + &self, + db: DbType, + cf: &str, + from: &[u8], + to: &[u8], + threads: u32, + bottommost: BottommostLevelCompaction, + ) { + self.compact(db, cf, from, to, threads, bottommost) + .unwrap_or_else(|e| perror_and_exit("Debugger::compact", e)); + } + + fn set_region_tombstone(&self, _regions: Vec) { + unimplemented!() + } + + fn set_region_tombstone_by_id(&self, _regions: Vec) { + unimplemented!() + } + + fn recover_regions(&self, _regions: Vec, _read_only: bool) { + unimplemented!() + } + + fn recover_all(&self, _threads: usize, _read_only: bool) { + unimplemented!() + } + + fn print_bad_regions(&self) { + unimplemented!() + } + + fn remove_fail_stores( + &self, + _store_ids: Vec, + _region_ids: Option>, + _promote_learner: bool, + ) { + unimplemented!() + } + + fn drop_unapplied_raftlog(&self, _region_ids: Option>) { + unimplemented!() + } + + fn recreate_region(&self, _sec_mgr: Arc, _pd_cfg: &PdConfig, _region_id: u64) { + unimplemented!() + } + + fn dump_metrics(&self, _tags: Vec<&str>) { + unimplemented!() + } + + fn check_region_consistency(&self, _: u64) { + unimplemented!() + } + + fn modify_tikv_config(&self, _config_name: &str, _config_value: &str) { + unimplemented!() + } + + fn dump_region_properties(&self, _region_id: u64) { + unimplemented!() + } + + fn dump_range_properties(&self, _start: Vec, _end: Vec) { + unimplemented!() + } + + fn dump_store_info(&self) { + unimplemented!() + } + + fn dump_cluster_info(&self) { + unimplemented!() + } + + fn reset_to_version(&self, _version: u64) { + unimplemented!() + } +} diff --git a/src/server/debug.rs b/src/server/debug.rs index c16621f4d85..2e2b34970b8 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -68,7 +68,7 @@ pub struct RegionInfo { } impl RegionInfo { - fn new( + pub fn new( raft_local: Option, raft_apply: Option, region_local: Option, diff --git a/src/server/debug2.rs b/src/server/debug2.rs new file mode 100644 index 00000000000..caa1e01d77e --- /dev/null +++ b/src/server/debug2.rs @@ -0,0 +1,992 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_rocks::{raw::CompactOptions, util::get_cf_handle, RocksEngine, RocksEngineIterator}; +use engine_traits::{ + CachedTablet, Iterable, Peekable, RaftEngine, TabletContext, TabletRegistry, CF_DEFAULT, + CF_LOCK, CF_WRITE, +}; +use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; +use kvproto::{ + debugpb::Db as DbType, + kvrpcpb::MvccInfo, + metapb, + raft_serverpb::{PeerState, RegionLocalState}, +}; +use nom::AsBytes; +use raft::prelude::Entry; +use raftstore::store::util::check_key_in_region; + +use super::debug::{BottommostLevelCompaction, RegionInfo}; +use crate::{ + config::ConfigController, + server::debug::{Error, Result}, + storage::mvcc::{MvccInfoCollector, MvccInfoScanner}, +}; + +// return the region containing the seek_key or the next region if not existed +fn seek_region( + seek_key: &[u8], + sorted_region_states: &[RegionLocalState], +) -> Option { + if sorted_region_states.is_empty() { + return None; + } + + let idx = match sorted_region_states + .binary_search_by(|state| state.get_region().get_start_key().cmp(seek_key)) + { + Ok(idx) => return Some(sorted_region_states[idx].clone()), + Err(idx) => idx, + }; + + // idx == 0 means seek_key is less than the first region's start key + if idx == 0 { + return Some(sorted_region_states[idx].clone()); + } + + let region_state = &sorted_region_states[idx - 1]; + if check_key_in_region(seek_key, region_state.get_region()).is_err() { + return sorted_region_states.get(idx).cloned(); + } + + Some(region_state.clone()) +} + +pub struct MvccInfoIteratorV2 { + scanner: Option>, + tablet_reg: TabletRegistry, + sorted_region_states: Vec, + cur_region: metapb::Region, + start: Vec, + end: Vec, + limit: usize, + count: usize, +} + +impl MvccInfoIteratorV2 { + pub fn new( + sorted_region_states: Vec, + tablet_reg: TabletRegistry, + start: &[u8], + end: &[u8], + limit: usize, + ) -> Result { + let seek_key = if start.is_empty() { + start + } else { + &start[DATA_PREFIX_KEY.len()..] + }; + + if let Some(mut first_region_state) = seek_region(seek_key, &sorted_region_states) { + let mut tablet_cache = get_tablet_cache( + &tablet_reg, + first_region_state.get_region().get_id(), + Some(first_region_state.clone()), + )?; + + let tablet = tablet_cache.latest().unwrap(); + let scanner = Some( + MvccInfoScanner::new( + |cf, opts| tablet.iterator_opt(cf, opts).map_err(|e| box_err!(e)), + if start.is_empty() { None } else { Some(start) }, + if end.is_empty() { None } else { Some(end) }, + MvccInfoCollector::default(), + ) + .map_err(|e| -> Error { box_err!(e) })?, + ); + + Ok(MvccInfoIteratorV2 { + scanner, + tablet_reg, + sorted_region_states, + cur_region: first_region_state.take_region(), + start: start.to_vec(), + end: end.to_vec(), + limit, + count: 0, + }) + } else { + Ok(MvccInfoIteratorV2 { + scanner: None, + tablet_reg, + sorted_region_states, + cur_region: metapb::Region::default(), + start: start.to_vec(), + end: end.to_vec(), + limit, + count: 0, + }) + } + } +} + +impl Iterator for MvccInfoIteratorV2 { + type Item = raftstore::Result<(Vec, MvccInfo)>; + + fn next(&mut self) -> Option, MvccInfo)>> { + if self.scanner.is_none() || (self.limit != 0 && self.count >= self.limit) { + return None; + } + + loop { + match self.scanner.as_mut().unwrap().next_item() { + Ok(Some(item)) => { + self.count += 1; + return Some(Ok(item)); + } + Ok(None) => { + let cur_end_key = self.cur_region.get_end_key(); + if cur_end_key.is_empty() { + return None; + } + + let next_region_state = seek_region(cur_end_key, &self.sorted_region_states); + if next_region_state.is_none() { + self.scanner = None; + return None; + } + + let next_region_state = next_region_state.unwrap(); + if &self.cur_region == next_region_state.get_region() { + return None; + } + self.cur_region = next_region_state.get_region().clone(); + let mut tablet_cache = get_tablet_cache( + &self.tablet_reg, + next_region_state.get_region().get_id(), + Some(next_region_state.clone()), + ) + .unwrap(); + let tablet = tablet_cache.latest().unwrap(); + self.scanner = Some( + MvccInfoScanner::new( + |cf, opts| tablet.iterator_opt(cf, opts).map_err(|e| box_err!(e)), + if self.start.is_empty() { + None + } else { + Some(self.start.as_bytes()) + }, + if self.end.is_empty() { + None + } else { + Some(self.end.as_bytes()) + }, + MvccInfoCollector::default(), + ) + .unwrap(), + ); + } + Err(e) => return Some(Err(e)), + } + } + } +} + +// Debugger for raftstore-v2 +#[derive(Clone)] +pub struct DebuggerV2 { + tablet_reg: TabletRegistry, + raft_engine: ER, + _cfg_controller: ConfigController, +} + +impl DebuggerV2 { + pub fn new( + tablet_reg: TabletRegistry, + raft_engine: ER, + cfg_controller: ConfigController, + ) -> Self { + println!("Debugger for raftstore-v2 is used"); + DebuggerV2 { + tablet_reg, + raft_engine, + _cfg_controller: cfg_controller, + } + } + + pub fn get_all_regions_in_store(&self) -> Result> { + let mut region_ids = vec![]; + self.raft_engine + .for_each_raft_group::(&mut |region_id| { + region_ids.push(region_id); + Ok(()) + }) + .unwrap(); + Ok(region_ids) + } + + pub fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { + validate_db_and_cf(db, cf)?; + let region_state = + find_region_state_by_key(&self.raft_engine, &key[DATA_PREFIX_KEY.len()..])?; + let mut tablet_cache = get_tablet_cache( + &self.tablet_reg, + region_state.get_region().get_id(), + Some(region_state), + )?; + let tablet = tablet_cache.latest().unwrap(); + match tablet.get_value_cf(cf, key) { + Ok(Some(v)) => Ok(v.to_vec()), + Ok(None) => Err(Error::NotFound(format!( + "value for key {:?} in db {:?}", + key, db + ))), + Err(e) => Err(box_err!(e)), + } + } + + pub fn raft_log(&self, region_id: u64, log_index: u64) -> Result { + if let Some(log) = box_try!(self.raft_engine.get_entry(region_id, log_index)) { + return Ok(log); + } + Err(Error::NotFound(format!( + "raft log for region {} at index {}", + region_id, log_index + ))) + } + + pub fn region_info(&self, region_id: u64) -> Result { + let raft_state = box_try!(self.raft_engine.get_raft_state(region_id)); + let apply_state = box_try!(self.raft_engine.get_apply_state(region_id, u64::MAX)); + let region_state = box_try!(self.raft_engine.get_region_state(region_id, u64::MAX)); + + match (raft_state, apply_state, region_state) { + (None, None, None) => Err(Error::NotFound(format!("info for region {}", region_id))), + (raft_state, apply_state, region_state) => { + Ok(RegionInfo::new(raft_state, apply_state, region_state)) + } + } + } + + pub fn region_size>( + &self, + region_id: u64, + cfs: Vec, + ) -> Result> { + match self.raft_engine.get_region_state(region_id, u64::MAX) { + Ok(Some(region_state)) => { + if region_state.get_state() != PeerState::Normal { + return Err(Error::NotFound(format!( + "region {:?} has been deleted", + region_id + ))); + } + let region = region_state.get_region(); + let start_key = &keys::data_key(region.get_start_key()); + let end_key = &keys::data_end_key(region.get_end_key()); + let mut sizes = vec![]; + let mut tablet_cache = + get_tablet_cache(&self.tablet_reg, region.id, Some(region_state))?; + let tablet = tablet_cache.latest().unwrap(); + for cf in cfs { + let mut size = 0; + box_try!(tablet.scan(cf.as_ref(), start_key, end_key, false, |k, v| { + size += k.len() + v.len(); + Ok(true) + })); + sizes.push((cf, size)); + } + Ok(sizes) + } + Ok(None) => Err(Error::NotFound(format!("none region {:?}", region_id))), + Err(e) => Err(box_err!(e)), + } + } + + /// Scan MVCC Infos for given range `[start, end)`. + pub fn scan_mvcc(&self, start: &[u8], end: &[u8], limit: u64) -> Result { + if end.is_empty() && limit == 0 { + return Err(Error::InvalidArgument("no limit and to_key".to_owned())); + } + if !end.is_empty() && start > end { + return Err(Error::InvalidArgument( + "start key should not be larger than end key".to_owned(), + )); + } + + let mut region_states = vec![]; + self.raft_engine + .for_each_raft_group::(&mut |region_id| { + let region_state = self + .raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + if region_state.state == PeerState::Normal { + region_states.push(region_state); + } + Ok(()) + }) + .unwrap(); + + region_states.sort_by(|r1, r2| { + r1.get_region() + .get_start_key() + .cmp(r2.get_region().get_start_key()) + }); + + MvccInfoIteratorV2::new( + region_states, + self.tablet_reg.clone(), + start, + end, + limit as usize, + ) + } + + /// Compact the cf[start..end) in the db. + pub fn compact( + &self, + db: DbType, + cf: &str, + start: &[u8], + end: &[u8], + threads: u32, + bottommost: BottommostLevelCompaction, + ) -> Result<()> { + validate_db_and_cf(db, cf)?; + if db == DbType::Raft { + return Err(box_err!("Get raft db is not allowed")); + } + let mut compactions = vec![]; + self.raft_engine + .for_each_raft_group::(&mut |region_id| { + let region_state = self + .raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + if region_state.state != PeerState::Normal { + return Ok(()); + } + + if let Some((start_key, end_key)) = + range_in_region((start, end), region_state.get_region()) + { + let start = if start_key.is_empty() { + None + } else { + Some(data_key(start_key)) + }; + let end = if end_key.is_empty() { + None + } else { + Some(data_key(end_key)) + }; + compactions.push((region_id, start, end, region_state)); + }; + + Ok(()) + }) + .unwrap(); + + for (region_id, start_key, end_key, region_state) in compactions { + let mut tablet_cache = + get_tablet_cache(&self.tablet_reg, region_id, Some(region_state))?; + let talbet = tablet_cache.latest().unwrap(); + info!("Debugger starts manual compact"; "talbet" => ?talbet, "cf" => cf); + let mut opts = CompactOptions::new(); + opts.set_max_subcompactions(threads as i32); + opts.set_exclusive_manual_compaction(false); + opts.set_bottommost_level_compaction(bottommost.0); + let handle = box_try!(get_cf_handle(talbet.as_inner(), cf)); + talbet.as_inner().compact_range_cf_opt( + handle, + &opts, + start_key.as_ref().map(|k| k.as_bytes()), + end_key.as_ref().map(|k| k.as_bytes()), + ); + info!("Debugger finishes manual compact"; "db" => ?db, "cf" => cf); + } + + Ok(()) + } +} + +fn validate_db_and_cf(db: DbType, cf: &str) -> Result<()> { + match (db, cf) { + (DbType::Kv, CF_DEFAULT) + | (DbType::Kv, CF_WRITE) + | (DbType::Kv, CF_LOCK) + | (DbType::Raft, CF_DEFAULT) => Ok(()), + _ => Err(Error::InvalidArgument(format!( + "invalid cf {:?} for db {:?}", + cf, db + ))), + } +} + +// Return the overlap range (without data prefix) of the `range` in region or +// None if they are exclusive +// Note: generally, range should start with `DATA_PREFIX_KEY`, but they can also +// be empty in case of compacting whole cluster for example. +// Note: the range end being `DATA_PREFIX_KEY` and `DATA_MAX_KEY` both means the +// largest key +fn range_in_region<'a>( + range: (&'a [u8], &'a [u8]), + region: &'a metapb::Region, +) -> Option<(&'a [u8], &'a [u8])> { + let range_start = if !range.0.is_empty() { + range.0 + } else { + DATA_PREFIX_KEY + }; + + let range_end = if !range.1.is_empty() && range.1 != DATA_MAX_KEY { + range.1 + } else { + DATA_PREFIX_KEY + }; + + if range_start == DATA_PREFIX_KEY && range_end == DATA_PREFIX_KEY { + return Some((region.get_start_key(), region.get_end_key())); + } else if range_start == DATA_PREFIX_KEY { + assert!(range_end.starts_with(DATA_PREFIX_KEY)); + if region.get_start_key() < &range_end[DATA_PREFIX_KEY.len()..] { + return Some(( + region.get_start_key(), + smaller_key( + &range_end[DATA_PREFIX_KEY.len()..], + region.get_end_key(), + true, + ), + )); + } + None + } else if range_end == DATA_PREFIX_KEY { + assert!(range_start.starts_with(DATA_PREFIX_KEY)); + if &range_start[DATA_PREFIX_KEY.len()..] < region.get_end_key() + || region.get_end_key().is_empty() + { + return Some(( + larger_key( + &range_start[DATA_PREFIX_KEY.len()..], + region.get_start_key(), + false, + ), + region.get_end_key(), + )); + } + None + } else { + assert!(range_start.starts_with(DATA_PREFIX_KEY)); + assert!(range_end.starts_with(DATA_PREFIX_KEY)); + let start_key = larger_key( + &range_start[DATA_PREFIX_KEY.len()..], + region.get_start_key(), + false, + ); + let end_key = smaller_key( + &range_end[DATA_PREFIX_KEY.len()..], + region.get_end_key(), + true, + ); + if start_key < end_key { + return Some((start_key, end_key)); + } + None + } +} + +fn find_region_state_by_key( + raft_engine: &ER, + key: &[u8], +) -> Result { + let mut region_ids = vec![]; + raft_engine + .for_each_raft_group::(&mut |region_id| { + region_ids.push(region_id); + Ok(()) + }) + .unwrap(); + + for region_id in region_ids { + if let Ok(Some(region_state)) = raft_engine.get_region_state(region_id, u64::MAX) { + let region = region_state.get_region(); + if check_key_in_region(key, region).is_ok() { + if region_state.get_state() != PeerState::Normal { + break; + } + return Ok(region_state); + } + } + } + + Err(Error::NotFound(format!( + "Not found region containing {:?}", + key + ))) +} + +fn get_tablet_cache( + tablet_reg: &TabletRegistry, + region_id: u64, + state: Option, +) -> Result> { + if let Some(tablet_cache) = tablet_reg.get(region_id) { + Ok(tablet_cache) + } else { + let region_state = state.unwrap(); + let ctx = TabletContext::new(region_state.get_region(), Some(region_state.tablet_index)); + match tablet_reg.load(ctx, false) { + Ok(tablet_cache) => Ok(tablet_cache), + Err(e) => { + println!( + "tablet load failed, region_state {:?}", + region_state.get_state() + ); + return Err(box_err!(e)); + } + } + } +} + +// `key1` and `key2` should both be start_key or end_key. +fn smaller_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { + if end_key && key1.is_empty() { + return key2; + } + if end_key && key2.is_empty() { + return key1; + } + if key1 < key2 { + return key1; + } + key2 +} + +// `key1` and `key2` should both be start_key or end_key. +fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { + if end_key && key1.is_empty() { + return key1; + } + if end_key && key2.is_empty() { + return key2; + } + if key1 < key2 { + return key2; + } + key1 +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use engine_traits::{RaftLogBatch, SyncMutable, CF_DEFAULT, CF_LOCK, CF_WRITE}; + use kvproto::{metapb, raft_serverpb::*}; + use raft::prelude::EntryType; + use raft_log_engine::RaftLogEngine; + + use super::*; + use crate::{ + config::TikvConfig, + server::KvEngineFactoryBuilder, + storage::{txn::tests::must_prewrite_put, TestEngineBuilder}, + }; + + const INITIAL_TABLET_INDEX: u64 = 5; + const INITIAL_APPLY_INDEX: u64 = 5; + + fn new_debugger(path: &Path) -> DebuggerV2 { + let mut cfg = TikvConfig::default(); + cfg.storage.data_dir = path.to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); + let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); + + let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); + + DebuggerV2::new(reg, raft_engine, ConfigController::default()) + } + + #[test] + fn test_get() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + let region_id = 1; + + let mut region = metapb::Region::default(); + region.set_id(region_id); + region.set_start_key(b"k10".to_vec()); + region.set_end_key(b"k20".to_vec()); + let mut state = RegionLocalState::default(); + state.set_region(region.clone()); + state.set_tablet_index(5); + + let ctx = TabletContext::new(®ion, Some(5)); + let mut tablet_cache = debugger.tablet_reg.load(ctx, true).unwrap(); + let tablet = tablet_cache.latest().unwrap(); + + let mut wb = raft_engine.log_batch(10); + wb.put_region_state(region_id, 10, &state).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); + + let cfs = vec![CF_DEFAULT, CF_LOCK, CF_WRITE]; + let (k, v) = (keys::data_key(b"k15"), b"v"); + for cf in &cfs { + tablet.put_cf(cf, k.as_slice(), v).unwrap(); + } + + for cf in &cfs { + let got = debugger.get(DbType::Kv, cf, &k).unwrap(); + assert_eq!(&got, v); + } + + match debugger.get(DbType::Kv, CF_DEFAULT, b"k15") { + Err(Error::NotFound(_)) => (), + _ => panic!("expect Error::NotFound(_)"), + } + + let mut wb = raft_engine.log_batch(10); + state.set_state(PeerState::Tombstone); + wb.put_region_state(region_id, 10, &state).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); + for cf in &cfs { + debugger.get(DbType::Kv, cf, &k).unwrap_err(); + } + } + + #[test] + fn test_raft_log() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + let (region_id, log_index) = (1, 1); + + let mut entry = Entry::default(); + entry.set_term(1); + entry.set_index(1); + entry.set_entry_type(EntryType::EntryNormal); + entry.set_data(vec![42].into()); + let mut wb = raft_engine.log_batch(10); + RaftLogBatch::append(&mut wb, region_id, None, vec![entry.clone()]).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); + + assert_eq!(debugger.raft_log(region_id, log_index).unwrap(), entry); + match debugger.raft_log(region_id + 1, log_index + 1) { + Err(Error::NotFound(_)) => (), + _ => panic!("expect Error::NotFound(_)"), + } + } + + #[test] + fn test_region_info() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + let region_id = 1; + + let mut wb = raft_engine.log_batch(10); + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(42); + RaftLogBatch::put_raft_state(&mut wb, region_id, &raft_state).unwrap(); + + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(42); + RaftLogBatch::put_apply_state(&mut wb, region_id, 42, &apply_state).unwrap(); + + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Tombstone); + RaftLogBatch::put_region_state(&mut wb, region_id, 42, ®ion_state).unwrap(); + + raft_engine.consume(&mut wb, true).unwrap(); + + assert_eq!( + debugger.region_info(region_id).unwrap(), + RegionInfo::new(Some(raft_state), Some(apply_state), Some(region_state)) + ); + match debugger.region_info(region_id + 1) { + Err(Error::NotFound(_)) => (), + _ => panic!("expect Error::NotFound(_)"), + } + } + + #[test] + fn test_region_size() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + let region_id = 1; + + let mut region = metapb::Region::default(); + region.set_id(region_id); + region.set_start_key(b"k10".to_vec()); + region.set_end_key(b"k20".to_vec()); + let mut state = RegionLocalState::default(); + state.set_region(region.clone()); + state.set_tablet_index(5); + + let ctx = TabletContext::new(®ion, Some(5)); + let mut tablet_cache = debugger.tablet_reg.load(ctx, true).unwrap(); + let tablet = tablet_cache.latest().unwrap(); + + let mut wb = raft_engine.log_batch(10); + wb.put_region_state(region_id, 10, &state).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); + + let cfs = vec![CF_DEFAULT, CF_LOCK, CF_WRITE]; + let (k, v) = (keys::data_key(b"k15"), b"v"); + for cf in &cfs { + tablet.put_cf(cf, k.as_slice(), v).unwrap(); + } + + let sizes = debugger.region_size(region_id, cfs.clone()).unwrap(); + assert_eq!(sizes.len(), 3); + for (cf, size) in sizes { + cfs.iter().find(|&&c| c == cf).unwrap(); + assert_eq!(size, k.len() + v.len()); + } + + // test for region that has not been trimmed + let (k, v) = (keys::data_key(b"k05"), b"v"); + let k1 = keys::data_key(b"k25"); + for cf in &cfs { + tablet.put_cf(cf, k.as_slice(), v).unwrap(); + tablet.put_cf(cf, k1.as_slice(), v).unwrap(); + } + + let sizes = debugger.region_size(region_id, cfs.clone()).unwrap(); + assert_eq!(sizes.len(), 3); + for (cf, size) in sizes { + cfs.iter().find(|&&c| c == cf).unwrap(); + assert_eq!(size, k.len() + v.len()); + } + + state.set_state(PeerState::Tombstone); + let mut wb = raft_engine.log_batch(10); + wb.put_region_state(region_id, 10, &state).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); + debugger.region_size(region_id, cfs.clone()).unwrap_err(); + } + + // For simplicity, the format of the key is inline with data in + // prepare_data_on_disk + fn extract_key(key: &[u8]) -> &[u8] { + &key[1..4] + } + + // Prepare some data + // Data for each region: + // Region 1: k00 .. k04 + // Region 2: k05 .. k09 + // Region 3: k10 .. k14 + // Region 4: k15 .. k19 + // Region 5: k20 .. k24 + // Region 6: k26 .. k28 + fn prepare_data_on_disk(path: &Path) { + let mut cfg = TikvConfig::default(); + cfg.storage.data_dir = path.to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + cfg.gc.enable_compaction_filter = false; + let cache = cfg + .storage + .block_cache + .build_shared_cache(cfg.storage.engine); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); + let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); + + let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); + let mut wb = raft_engine.log_batch(5); + for i in 0..6 { + let mut region = metapb::Region::default(); + let start_key = format!("k{:02}", i * 5); + let end_key = format!("k{:02}", (i + 1) * 5); + region.set_id(i + 1); + region.set_start_key(start_key.into_bytes()); + region.set_end_key(end_key.into_bytes()); + let mut region_state = RegionLocalState::default(); + region_state.set_tablet_index(INITIAL_TABLET_INDEX); + if region.get_id() == 4 { + region_state.set_state(PeerState::Tombstone); + } else if region.get_id() == 6 { + region.set_start_key(b"k26".to_vec()); + region.set_end_key(b"k28".to_vec()); + } + region_state.set_region(region); + + let tablet_path = reg.tablet_path(i + 1, INITIAL_TABLET_INDEX); + // Use tikv_kv::RocksEngine instead of loading tablet from registry in order to + // use prewrite method to prepare mvcc data + let mut engine = TestEngineBuilder::new().path(tablet_path).build().unwrap(); + for i in i * 5..(i + 1) * 5 { + let key = format!("zk{:02}", i); + let val = format!("val{:02}", i); + // Use prewrite only is enough for preparing mvcc data + must_prewrite_put( + &mut engine, + key.as_bytes(), + val.as_bytes(), + key.as_bytes(), + 10, + ); + } + + wb.put_region_state(i + 1, INITIAL_APPLY_INDEX, ®ion_state) + .unwrap(); + } + raft_engine.consume(&mut wb, true).unwrap(); + } + + #[test] + fn test_scan_mvcc() { + let dir = test_util::temp_dir("test-debugger", false); + prepare_data_on_disk(dir.path()); + let debugger = new_debugger(dir.path()); + // Test scan with bad start, end or limit. + assert!(debugger.scan_mvcc(b"z", b"", 0).is_err()); + assert!(debugger.scan_mvcc(b"z", b"x", 3).is_err()); + + let verify_scanner = |range, scanner: &mut MvccInfoIteratorV2| { + for i in range { + let key = format!("k{:02}", i).into_bytes(); + assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); + } + }; + + // full scann + let mut scanner = debugger.scan_mvcc(b"", b"", 100).unwrap(); + verify_scanner(0..15, &mut scanner); + verify_scanner(20..25, &mut scanner); + verify_scanner(26..28, &mut scanner); + assert!(scanner.next().is_none()); + + // Range has more elements than limit + let mut scanner = debugger.scan_mvcc(b"zk01", b"zk09", 5).unwrap(); + verify_scanner(1..6, &mut scanner); + assert!(scanner.next().is_none()); + + // Range has less elements than limit + let mut scanner = debugger.scan_mvcc(b"zk07", b"zk10", 10).unwrap(); + verify_scanner(7..10, &mut scanner); + assert!(scanner.next().is_none()); + + // Start from the key where no region contains it + let mut scanner = debugger.scan_mvcc(b"zk16", b"", 100).unwrap(); + verify_scanner(20..25, &mut scanner); + verify_scanner(26..28, &mut scanner); + assert!(scanner.next().is_none()); + + // Scan a range not existed in the cluster + let mut scanner = debugger.scan_mvcc(b"zk16", b"zk19", 100).unwrap(); + assert!(scanner.next().is_none()); + + // The end key is less than the start_key of the first region + let mut scanner = debugger.scan_mvcc(b"", b"zj", 100).unwrap(); + assert!(scanner.next().is_none()); + } + + #[test] + fn test_compact() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let compact = |db, cf| debugger.compact(db, cf, &[0], &[0xFF], 1, Some("skip").into()); + compact(DbType::Kv, CF_DEFAULT).unwrap(); + compact(DbType::Kv, CF_LOCK).unwrap(); + compact(DbType::Kv, CF_WRITE).unwrap(); + compact(DbType::Raft, CF_DEFAULT).unwrap_err(); + } + + #[test] + fn test_range_in_region() { + let mut region = metapb::Region::default(); + region.set_start_key(b"k01".to_vec()); + region.set_end_key(b"k10".to_vec()); + + let ranges = vec![ + ("", "", "k01", "k10"), + ("z", "z", "k01", "k10"), + ("zk00", "", "k01", "k10"), + ("zk00", "z", "k01", "k10"), + ("", "zk11", "k01", "k10"), + ("z", "zk11", "k01", "k10"), + ("zk02", "zk07", "k02", "k07"), + ("zk00", "zk07", "k01", "k07"), + ("zk02", "zk11", "k02", "k10"), + ("zk02", "{", "k02", "k10"), + ]; + + for (range_start, range_end, expect_start, expect_end) in ranges { + assert_eq!( + (expect_start.as_bytes(), expect_end.as_bytes()), + range_in_region((range_start.as_bytes(), range_end.as_bytes()), ®ion).unwrap() + ); + } + + let ranges = vec![("zk05", "zk02"), ("zk11", ""), ("", "zk00")]; + for (range_start, range_end) in ranges { + assert!( + range_in_region((range_start.as_bytes(), range_end.as_bytes()), ®ion).is_none() + ); + } + + region.set_start_key(b"".to_vec()); + region.set_end_key(b"k10".to_vec()); + + let ranges = vec![ + ("", "", "", "k10"), + ("z", "z", "", "k10"), + ("zk00", "", "k00", "k10"), + ("zk00", "z", "k00", "k10"), + ("", "zk11", "", "k10"), + ("z", "zk11", "", "k10"), + ("zk02", "zk07", "k02", "k07"), + ("zk02", "zk11", "k02", "k10"), + ("zk02", "{", "k02", "k10"), + ]; + + for (range_start, range_end, expect_start, expect_end) in ranges { + assert_eq!( + (expect_start.as_bytes(), expect_end.as_bytes()), + range_in_region((range_start.as_bytes(), range_end.as_bytes()), ®ion).unwrap() + ); + } + + let ranges = vec![("zk05", "zk02"), ("zk11", "")]; + for (range_start, range_end) in ranges { + assert!( + range_in_region((range_start.as_bytes(), range_end.as_bytes()), ®ion).is_none() + ); + } + + region.set_start_key(b"k01".to_vec()); + region.set_end_key(b"".to_vec()); + + let ranges = vec![ + ("", "", "k01", ""), + ("z", "z", "k01", ""), + ("zk00", "", "k01", ""), + ("zk00", "z", "k01", ""), + ("", "zk11", "k01", "k11"), + ("z", "zk11", "k01", "k11"), + ("zk02", "zk07", "k02", "k07"), + ("zk02", "zk11", "k02", "k11"), + ("zk02", "{", "k02", ""), + ]; + + for (range_start, range_end, expect_start, expect_end) in ranges { + assert_eq!( + (expect_start.as_bytes(), expect_end.as_bytes()), + range_in_region((range_start.as_bytes(), range_end.as_bytes()), ®ion).unwrap() + ); + } + + let ranges = vec![("zk05", "zk02"), ("", "zk00")]; + for (range_start, range_end) in ranges { + assert!( + range_in_region((range_start.as_bytes(), range_end.as_bytes()), ®ion).is_none() + ); + } + } +} diff --git a/src/server/mod.rs b/src/server/mod.rs index e432b3aa51b..00d9fe70d4f 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -5,6 +5,7 @@ mod raft_client; pub mod config; pub mod debug; +pub mod debug2; mod engine_factory; pub mod errors; pub mod gc_worker; diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index 487ae61d5e8..311447601f8 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -175,7 +175,7 @@ impl MvccInfoScanner { }) } - fn next_item(&mut self) -> Result> { + pub fn next_item(&mut self) -> Result> { let mut lock_ok = box_try!(self.lock_iter.valid()); let mut writes_ok = box_try!(self.write_iter.valid()); @@ -221,7 +221,7 @@ impl MvccInfoScanner { } #[derive(Clone, Default)] -struct MvccInfoCollector { +pub struct MvccInfoCollector { current_item: Vec, mvcc_info: MvccInfo, } diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 2f9a75b2a03..1779c116ccd 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -20,7 +20,9 @@ pub use txn_types::{ }; pub use self::{ - consistency_check::{Mvcc as MvccConsistencyCheckObserver, MvccInfoIterator}, + consistency_check::{ + Mvcc as MvccConsistencyCheckObserver, MvccInfoCollector, MvccInfoIterator, MvccInfoScanner, + }, metrics::{GC_DELETE_VERSIONS_HISTOGRAM, MVCC_VERSIONS_HISTOGRAM}, reader::*, txn::{GcInfo, MvccTxn, ReleasedLock, MAX_TXN_WRITE_SIZE}, diff --git a/tests/integrations/server/debugger.rs b/tests/integrations/server/debugger.rs new file mode 100644 index 00000000000..dbd862ba633 --- /dev/null +++ b/tests/integrations/server/debugger.rs @@ -0,0 +1,163 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use collections::{HashMap, HashSet}; +use engine_rocks::{raw::Range, util::get_cf_handle}; +use engine_traits::{CachedTablet, MiscExt, CF_WRITE}; +use keys::{data_key, DATA_MAX_KEY}; +use kvproto::debugpb::Db; +use tikv::{ + config::ConfigController, + server::debug2::DebuggerV2, + storage::mvcc::{TimeStamp, Write, WriteType}, +}; +use txn_types::Key; + +fn gen_mvcc_put_kv( + k: &[u8], + v: &[u8], + start_ts: TimeStamp, + commit_ts: TimeStamp, +) -> (Vec, Vec) { + let k = Key::from_encoded(data_key(k)); + let k = k.append_ts(commit_ts); + let w = Write::new(WriteType::Put, start_ts, Some(v.to_vec())); + (k.as_encoded().clone(), w.as_ref().to_bytes()) +} + +fn gen_delete_k(k: &[u8], commit_ts: TimeStamp) -> Vec { + let k = Key::from_encoded(data_key(k)); + let k = k.append_ts(commit_ts); + k.as_encoded().clone() +} + +#[test] +fn test_compact() { + let (split_key, _) = gen_mvcc_put_kv(b"k10", b"", 1.into(), 2.into()); + let (split_key2, _) = gen_mvcc_put_kv(b"k20", b"", 1.into(), 2.into()); + let regions = vec![ + (1, b"".to_vec(), split_key.clone()), + (1000, split_key.clone(), split_key2.clone()), + (1002, split_key2.clone(), b"".to_vec()), + ]; + + let check_compact = |from: Vec, to: Vec, regions_compacted: HashSet| { + let count = 1; + let mut cluster = test_raftstore_v2::new_node_cluster(0, count); + cluster.cfg.raft_store.right_derive_when_split = false; + cluster.run(); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, &split_key); + let region = cluster.get_region(&split_key); + cluster.must_split(®ion, &split_key2); + + for i in 0..30 { + let (k, v) = (format!("k{:02}", i), format!("value{}", i)); + let (k, v) = gen_mvcc_put_kv(k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + cluster.must_put_cf(CF_WRITE, &k, &v); + } + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|_, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + db.flush_cf(CF_WRITE, true).unwrap(); + } + true + }) + } + + for i in 0..30 { + let k = format!("k{:02}", i); + let k = gen_delete_k(k.as_bytes(), 2.into()); + cluster.must_delete_cf(CF_WRITE, &k); + } + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|_, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + db.flush_cf(CF_WRITE, true).unwrap(); + } + true + }) + } + + let mut tablet_size_before_compact = HashMap::default(); + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|region_id, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + let cf_handle = get_cf_handle(db.as_inner(), CF_WRITE).unwrap(); + let approximate_size = db + .as_inner() + .get_approximate_sizes_cf(cf_handle, &[Range::new(b"", DATA_MAX_KEY)])[0]; + tablet_size_before_compact.insert(region_id, approximate_size); + } + true + }) + } + + let debugger = DebuggerV2::new( + cluster.engines[0].0.clone(), + cluster.raft_engines.get(&1).unwrap().clone(), + ConfigController::default(), + ); + + debugger + .compact(Db::Kv, CF_WRITE, &from, &to, 1, Some("skip").into()) + .unwrap(); + + let mut tablet_size_after_compact = HashMap::default(); + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|region_id, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + let cf_handle = get_cf_handle(db.as_inner(), CF_WRITE).unwrap(); + let approximate_size = db + .as_inner() + .get_approximate_sizes_cf(cf_handle, &[Range::new(b"", DATA_MAX_KEY)])[0]; + tablet_size_after_compact.insert(region_id, approximate_size); + } + true + }) + } + for (id, &size) in &tablet_size_after_compact { + if regions_compacted.contains(id) { + assert!(size == 0); + continue; + } + + assert_eq!(tablet_size_before_compact[id], size); + } + }; + + // compact the middle region + let region = regions[1].clone(); + let mut regions_compacted = HashSet::default(); + regions_compacted.insert(region.0); + let from = keys::data_key(®ion.1); + let to = keys::data_end_key(®ion.2); + check_compact(from, to, regions_compacted); + + // compact first two regions + let region1 = regions[0].clone(); + let region2 = regions[1].clone(); + let mut regions_compacted = HashSet::default(); + regions_compacted.insert(region1.0); + regions_compacted.insert(region2.0); + let from = keys::data_key(®ion1.1); + let to = keys::data_end_key(®ion2.2); + check_compact(from, to, regions_compacted); + + // compact all regions by specifying specific keys + let region1 = regions[0].clone(); + let region2 = regions[2].clone(); + let mut regions_compacted = HashSet::default(); + let _ = regions + .iter() + .map(|(id, ..)| regions_compacted.insert(*id)) + .collect::>(); + let from = keys::data_key(®ion1.1); + let to = keys::data_end_key(®ion2.2); + check_compact(from, to, regions_compacted.clone()); + + // compact all regions + check_compact(b"".to_vec(), b"".to_vec(), regions_compacted.clone()); + check_compact(b"z".to_vec(), b"z".to_vec(), regions_compacted.clone()); + check_compact(b"z".to_vec(), b"{".to_vec(), regions_compacted); +} diff --git a/tests/integrations/server/mod.rs b/tests/integrations/server/mod.rs index dc89eb63fc8..fb813106cce 100644 --- a/tests/integrations/server/mod.rs +++ b/tests/integrations/server/mod.rs @@ -1,5 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +mod debugger; mod gc_worker; mod kv_service; mod lock_manager; From 1b01a18f7f72f4b322943410c37adf02e0be5496 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 8 May 2023 15:20:57 +0800 Subject: [PATCH 0677/1149] raftstore-v2: fix split removes records unexpectedly (#14690) close tikv/tikv#14689 raftstore-v2: fix split removes records unexpectedly Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/raft/peer.rs | 13 +++--- .../raftstore/test_split_region.rs | 42 ++++++++++++++++++- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index e11c96922cd..8d2360d2695 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -14,7 +14,7 @@ use engine_traits::{ use kvproto::{ metapb::{self, PeerRole}, pdpb, - raft_serverpb::{RaftMessage, RegionLocalState}, + raft_serverpb::RaftMessage, }; use raft::{RawNode, StateRole}; use raftstore::{ @@ -261,11 +261,12 @@ impl Peer { self.leader_lease.expire_remote_lease(); } - let mut region_state = RegionLocalState::default(); - region_state.set_region(region.clone()); - region_state.set_tablet_index(tablet_index); - region_state.set_state(self.storage().region_state().get_state()); - self.storage_mut().set_region_state(region_state); + self.storage_mut() + .region_state_mut() + .set_region(region.clone()); + self.storage_mut() + .region_state_mut() + .set_tablet_index(tablet_index); let progress = ReadProgress::region(region); // Always update read delegate's region to avoid stale region info after a diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 48b226ba40e..071856cbd29 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -9,7 +9,11 @@ use std::{ use engine_traits::{Peekable, CF_DEFAULT, CF_WRITE}; use keys::data_key; -use kvproto::{metapb, pdpb, raft_cmdpb::*, raft_serverpb::RaftMessage}; +use kvproto::{ + metapb, pdpb, + raft_cmdpb::*, + raft_serverpb::{ExtraMessageType, RaftMessage}, +}; use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::{ @@ -1259,3 +1263,39 @@ fn test_catch_up_peers_after_split() { assert!(!pending_peers.contains_key(&p.id)) } } + +#[test] +fn test_split_region_keep_records() { + let mut cluster = test_raftstore_v2::new_node_cluster(0, 3); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let r1 = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + pd_client.must_add_peer(r1, new_peer(2, 2)); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + pd_client.must_remove_peer(r1, new_peer(2, 2)); + + let leader = cluster.leader_of_region(r1).unwrap(); + cluster.add_send_filter_on_node( + leader.get_store_id(), + Box::new(DropMessageFilter::new(Arc::new(|m: &RaftMessage| { + // Drop all gc peer requests and responses. + !(m.has_extra_msg() + && (m.get_extra_msg().get_type() == ExtraMessageType::MsgGcPeerRequest + || m.get_extra_msg().get_type() == ExtraMessageType::MsgGcPeerResponse)) + }))), + ); + + // Make sure split has applied. + let region = pd_client.get_region(b"").unwrap(); + cluster.must_split(®ion, b"k1"); + cluster.must_put(b"k2", b"v2"); + cluster.must_put(b"k0", b"v0"); + + let region_state = cluster.region_local_state(r1, leader.get_store_id()); + assert!( + !region_state.get_removed_records().is_empty(), + "{:?}", + region_state + ); +} From 39de299d9a91da49642cf5f1aa056fc5564cdc1a Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 8 May 2023 15:38:57 +0800 Subject: [PATCH 0678/1149] storage: set write detail for all txn write commands (#14640) close tikv/tikv#14639 Set write detail for all txn write commands Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/endpoint.rs | 8 --- src/server/service/kv.rs | 78 +++++++++++++------------ src/storage/txn/scheduler.rs | 44 ++++++++++---- tests/integrations/server/kv_service.rs | 44 +++++++++++++- 4 files changed, 116 insertions(+), 58 deletions(-) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 71c3d5548a9..6b360fa4538 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -1674,10 +1674,6 @@ mod tests { // Response 1 // - // Note: `process_wall_time_ms` includes `total_process_time` and - // `total_suspend_time`. Someday it will be separated, but for now, - // let's just consider the combination. - // // In the worst case, `total_suspend_time` could be totally req2 payload. // So here: req1 payload <= process time <= (req1 payload + req2 payload) let resp = &rx.recv().unwrap()[0]; @@ -1701,10 +1697,6 @@ mod tests { // Response 2 // - // Note: `process_wall_time_ms` includes `total_process_time` and - // `total_suspend_time`. Someday it will be separated, but for now, - // let's just consider the combination. - // // In the worst case, `total_suspend_time` could be totally req1 payload. // So here: req2 payload <= process time <= (req1 payload + req2 payload) let resp = &rx.recv().unwrap()[0]; diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index a1feb0f7b60..67d367dc351 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1155,8 +1155,8 @@ fn handle_batch_commands_request( resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) }) { @@ -1197,8 +1197,8 @@ fn handle_batch_commands_request( resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = future_copr(copr, Some(peer.to_string()), req) @@ -1231,8 +1231,8 @@ fn handle_batch_commands_request( resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); let resp = $future_fn($($arg,)* req) @@ -2022,7 +2022,31 @@ fn future_raw_coprocessor( } macro_rules! txn_command_future { - ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) {$($prelude: stmt)*}; ($v: ident, $resp: ident, $tracker: ident) { $else_branch: expr }) => { + ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) {$($prelude: stmt)*}; ($v: ident, $resp: ident, $tracker: ident) $else_branch: block) => { + txn_command_future!(inner $fn_name, $req_ty, $resp_ty, ($req) {$($prelude)*}; ($v, $resp, $tracker) { + $else_branch + GLOBAL_TRACKERS.with_tracker($tracker, |tracker| { + tracker.write_scan_detail($resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail($resp.mut_exec_details_v2().mut_write_detail()); + }); + }); + }; + + ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($v: ident, $resp: ident, $tracker: ident) $else_branch: block ) => { + txn_command_future!(inner $fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp, $tracker) { + $else_branch + GLOBAL_TRACKERS.with_tracker($tracker, |tracker| { + tracker.write_scan_detail($resp.mut_exec_details_v2().mut_scan_detail_v2()); + tracker.write_write_detail($resp.mut_exec_details_v2().mut_write_detail()); + }); + }); + }; + + ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($v: ident, $resp: ident) $else_branch: block ) => { + txn_command_future!(inner $fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp, tracker) { $else_branch }); + }; + + (inner $fn_name: ident, $req_ty: ident, $resp_ty: ident, ($req: ident) {$($prelude: stmt)*}; ($v: ident, $resp: ident, $tracker: ident) $else_branch: block) => { fn $fn_name( storage: &Storage, $req: $req_ty, @@ -2049,31 +2073,21 @@ macro_rules! txn_command_future { if let Some(err) = extract_region_error(&$v) { $resp.set_region_error(err); } else { - $else_branch; + $else_branch } Ok($resp) } } }; - ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($v: ident, $resp: ident, $tracker: ident) { $else_branch: expr }) => { - txn_command_future!($fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp, $tracker) { $else_branch }); - }; - ($fn_name: ident, $req_ty: ident, $resp_ty: ident, ($v: ident, $resp: ident) { $else_branch: expr }) => { - txn_command_future!($fn_name, $req_ty, $resp_ty, (req) {}; ($v, $resp, tracker) { $else_branch }); - }; } -txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp, tracker) {{ +txn_command_future!(future_prewrite, PrewriteRequest, PrewriteResponse, (v, resp, tracker) { if let Ok(v) = &v { resp.set_min_commit_ts(v.min_commit_ts.into_inner()); resp.set_one_pc_commit_ts(v.one_pc_commit_ts.into_inner()); - GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); - tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); - }); } resp.set_errors(extract_key_errors(v.map(|v| v.locks)).into()); -}}); +}); txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, PessimisticLockResponse, (req) { let mode = req.get_wake_up_mode() @@ -2104,21 +2118,17 @@ txn_command_future!(future_acquire_pessimistic_lock, PessimisticLockRequest, Pes resp.set_errors(vec![extract_key_error(&e)].into()) }, } - GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); - tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); - }); }} ); -txn_command_future!(future_pessimistic_rollback, PessimisticRollbackRequest, PessimisticRollbackResponse, (v, resp) { +txn_command_future!(future_pessimistic_rollback, PessimisticRollbackRequest, PessimisticRollbackResponse, (v, resp, tracker) { resp.set_errors(extract_key_errors(v).into()) }); -txn_command_future!(future_batch_rollback, BatchRollbackRequest, BatchRollbackResponse, (v, resp) { +txn_command_future!(future_batch_rollback, BatchRollbackRequest, BatchRollbackResponse, (v, resp, tracker) { if let Err(e) = v { resp.set_error(extract_key_error(&e)); - } + }; }); -txn_command_future!(future_resolve_lock, ResolveLockRequest, ResolveLockResponse, (v, resp) { +txn_command_future!(future_resolve_lock, ResolveLockRequest, ResolveLockResponse, (v, resp, tracker) { if let Err(e) = v { resp.set_error(extract_key_error(&e)); } @@ -2127,11 +2137,7 @@ txn_command_future!(future_commit, CommitRequest, CommitResponse, (v, resp, trac match v { Ok(TxnStatus::Committed { commit_ts }) => { resp.set_commit_version(commit_ts.into_inner()); - GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(resp.mut_exec_details_v2().mut_scan_detail_v2()); - tracker.write_write_detail(resp.mut_exec_details_v2().mut_write_detail()); - }); - } + }, Ok(_) => unreachable!(), Err(e) => resp.set_error(extract_key_error(&e)), } @@ -2145,7 +2151,7 @@ txn_command_future!(future_cleanup, CleanupRequest, CleanupResponse, (v, resp) { } } }); -txn_command_future!(future_txn_heart_beat, TxnHeartBeatRequest, TxnHeartBeatResponse, (v, resp) { +txn_command_future!(future_txn_heart_beat, TxnHeartBeatRequest, TxnHeartBeatResponse, (v, resp, tracker) { match v { Ok(txn_status) => { if let TxnStatus::Uncommitted { lock, .. } = txn_status { @@ -2158,7 +2164,7 @@ txn_command_future!(future_txn_heart_beat, TxnHeartBeatRequest, TxnHeartBeatResp } }); txn_command_future!(future_check_txn_status, CheckTxnStatusRequest, CheckTxnStatusResponse, - (v, resp) { + (v, resp, tracker) { match v { Ok(txn_status) => match txn_status { TxnStatus::RolledBack => resp.set_action(Action::NoAction), @@ -2181,7 +2187,7 @@ txn_command_future!(future_check_txn_status, CheckTxnStatusRequest, CheckTxnStat Err(e) => resp.set_error(extract_key_error(&e)), } }); -txn_command_future!(future_check_secondary_locks, CheckSecondaryLocksRequest, CheckSecondaryLocksResponse, (status, resp) { +txn_command_future!(future_check_secondary_locks, CheckSecondaryLocksRequest, CheckSecondaryLocksResponse, (status, resp, tracker) { match status { Ok(SecondaryLocksStatus::Locked(locks)) => { resp.set_locks(locks.into()); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 85c41124b89..cd3b711baa8 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -628,7 +628,7 @@ impl TxnScheduler { let this = self.clone(); self.get_sched_pool() .spawn(&group_name, pri, async move { - this.finish_with_err(cid, err); + this.finish_with_err(cid, err, None); }) .unwrap(); } @@ -670,7 +670,7 @@ impl TxnScheduler { self.get_sched_pool() .spawn(&task.cmd.group_name(), task.cmd.priority(), async move { fail_point!("scheduler_start_execute"); - if sched.check_task_deadline_exceeded(&task) { + if sched.check_task_deadline_exceeded(&task, None) { return; } @@ -704,7 +704,11 @@ impl TxnScheduler { .unwrap() .try_own() { - sched.finish_with_err(task.cid, StorageErrorInner::DeadlineExceeded); + sched.finish_with_err( + task.cid, + StorageErrorInner::DeadlineExceeded, + None, + ); return; } @@ -724,7 +728,7 @@ impl TxnScheduler { SCHED_STAGE_COUNTER_VEC.get(tag).snapshot_err.inc(); info!("get snapshot failed"; "cid" => task.cid, "err" => ?err); - sched.finish_with_err(task.cid, Error::from(err)); + sched.finish_with_err(task.cid, Error::from(err), None); } } }) @@ -732,7 +736,7 @@ impl TxnScheduler { } /// Calls the callback with an error. - fn finish_with_err(&self, cid: u64, err: ER) + fn finish_with_err(&self, cid: u64, err: ER, sched_details: Option<&SchedulerDetails>) where StorageError: From, { @@ -744,6 +748,16 @@ impl TxnScheduler { let pr = ProcessResult::Failed { err: StorageError::from(err), }; + if let Some(details) = sched_details { + GLOBAL_TRACKERS.with_tracker(details.tracker, |tracker| { + tracker.metrics.scheduler_process_nanos = details + .start_process_instant + .saturating_elapsed() + .as_nanos() as u64; + tracker.metrics.scheduler_throttle_nanos = + details.flow_control_nanos + details.quota_limit_delay_nanos; + }); + } if let Some(cb) = tctx.cb { cb.execute(pr); } @@ -1075,7 +1089,7 @@ impl TxnScheduler { /// Process the task in the current thread. async fn process(self, snapshot: E::Snap, task: Task) { - if self.check_task_deadline_exceeded(&task) { + if self.check_task_deadline_exceeded(&task, None) { return; } @@ -1184,7 +1198,7 @@ impl TxnScheduler { .await; if let Err(err) = raw_ext { info!("get_raw_ext failed"; "cid" => cid, "err" => ?err); - scheduler.finish_with_err(cid, err); + scheduler.finish_with_err(cid, err, Some(sched_details)); return; } let raw_ext = raw_ext.unwrap(); @@ -1265,7 +1279,7 @@ impl TxnScheduler { Err(err) => { SCHED_STAGE_COUNTER_VEC.get(tag).prepare_write_err.inc(); debug!("write command failed"; "cid" => cid, "err" => ?err); - scheduler.finish_with_err(cid, err); + scheduler.finish_with_err(cid, err, Some(sched_details)); return; } // Initiates an async write operation on the storage engine, there'll be a @@ -1411,7 +1425,11 @@ impl TxnScheduler { break; } if now >= deadline.inner() { - scheduler.finish_with_err(cid, StorageErrorInner::DeadlineExceeded); + scheduler.finish_with_err( + cid, + StorageErrorInner::DeadlineExceeded, + Some(sched_details), + ); self.inner.flow_controller.unconsume(region_id, write_size); SCHED_THROTTLE_TIME.observe(start.saturating_elapsed_secs()); return; @@ -1621,9 +1639,13 @@ impl TxnScheduler { /// If the task has expired, return `true` and call the callback of /// the task with a `DeadlineExceeded` error. #[inline] - fn check_task_deadline_exceeded(&self, task: &Task) -> bool { + fn check_task_deadline_exceeded( + &self, + task: &Task, + sched_details: Option<&SchedulerDetails>, + ) -> bool { if let Err(e) = task.cmd.deadline().check() { - self.finish_with_err(task.cid, e); + self.finish_with_err(task.cid, e, sched_details); true } else { false diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 5e47ad4745b..4f14c06ad4a 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2407,6 +2407,7 @@ fn test_storage_with_quota_limiter_disable() { #[test] fn test_commands_write_detail() { + test_util::init_log_for_test(); let (_cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.pessimistic_txn.pipelined = false; cluster.cfg.pessimistic_txn.in_memory = false; @@ -2417,7 +2418,6 @@ fn test_commands_write_detail() { assert!(sc.get_get_snapshot_nanos() > 0); }; let check_write_detail = |wd: &WriteDetail| { - assert!(wd.get_store_batch_wait_nanos() > 0); assert!(wd.get_persist_log_nanos() > 0); assert!(wd.get_raft_db_write_leader_wait_nanos() > 0); assert!(wd.get_raft_db_sync_log_nanos() > 0); @@ -2471,13 +2471,51 @@ fn test_commands_write_detail() { check_write_detail(prewrite_resp.get_exec_details_v2().get_write_detail()); let mut commit_req = CommitRequest::default(); - commit_req.set_context(ctx); - commit_req.set_keys(vec![k].into()); + commit_req.set_context(ctx.clone()); + commit_req.set_keys(vec![k.clone()].into()); commit_req.set_start_version(20); commit_req.set_commit_version(30); let commit_resp = client.kv_commit(&commit_req).unwrap(); check_scan_detail(commit_resp.get_exec_details_v2().get_scan_detail_v2()); check_write_detail(commit_resp.get_exec_details_v2().get_write_detail()); + + let mut txn_heartbeat_req = TxnHeartBeatRequest::default(); + txn_heartbeat_req.set_context(ctx.clone()); + txn_heartbeat_req.set_primary_lock(k.clone()); + txn_heartbeat_req.set_start_version(20); + txn_heartbeat_req.set_advise_lock_ttl(1000); + let txn_heartbeat_resp = client.kv_txn_heart_beat(&txn_heartbeat_req).unwrap(); + check_scan_detail( + txn_heartbeat_resp + .get_exec_details_v2() + .get_scan_detail_v2(), + ); + assert!( + txn_heartbeat_resp + .get_exec_details_v2() + .get_write_detail() + .get_process_nanos() + > 0 + ); + + let mut check_txn_status_req = CheckTxnStatusRequest::default(); + check_txn_status_req.set_context(ctx); + check_txn_status_req.set_primary_key(k); + check_txn_status_req.set_lock_ts(20); + check_txn_status_req.set_rollback_if_not_exist(true); + let check_txn_status_resp = client.kv_check_txn_status(&check_txn_status_req).unwrap(); + check_scan_detail( + check_txn_status_resp + .get_exec_details_v2() + .get_scan_detail_v2(), + ); + assert!( + check_txn_status_resp + .get_exec_details_v2() + .get_write_detail() + .get_process_nanos() + > 0 + ); } #[test] From 5ce3a6b78dbf498e4decbfcb5b0b7d1d7ea61dba Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 9 May 2023 14:30:58 +0800 Subject: [PATCH 0679/1149] raftstore-v2: offload checkpoint during split (#14646) close tikv/tikv#14711 offload checkpoint during split Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/batch/store.rs | 19 ++- components/raftstore-v2/src/fsm/apply.rs | 3 + .../src/operation/command/admin/split.rs | 155 ++++++++++-------- .../raftstore-v2/src/operation/command/mod.rs | 5 +- .../src/operation/query/capture.rs | 3 + components/raftstore-v2/src/raft/apply.rs | 10 ++ components/raftstore-v2/src/raft/storage.rs | 5 +- .../raftstore-v2/src/worker/checkpoint.rs | 145 ++++++++++++++++ components/raftstore-v2/src/worker/mod.rs | 1 + 9 files changed, 269 insertions(+), 77 deletions(-) create mode 100644 components/raftstore-v2/src/worker/checkpoint.rs diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 1f6245cc010..9b01501ddd9 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -46,7 +46,7 @@ use tikv_util::{ sys::SysQuota, time::{duration_to_sec, Instant as TiInstant}, timer::SteadyTimer, - worker::{LazyWorker, Scheduler, Worker}, + worker::{Builder, LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, }; @@ -57,7 +57,7 @@ use crate::{ operation::{SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{pd, tablet}, + worker::{checkpoint, pd, tablet}, Error, Result, }; @@ -496,6 +496,7 @@ pub struct Schedulers { pub read: Scheduler>, pub pd: Scheduler, pub tablet: Scheduler>, + pub checkpoint: Scheduler, pub write: WriteSenders, // Following is not maintained by raftstore itself. @@ -518,6 +519,7 @@ struct Workers { async_read: Worker, pd: LazyWorker, tablet: Worker, + checkpoint: Worker, async_write: StoreWriters, purge: Option, @@ -527,10 +529,12 @@ struct Workers { impl Workers { fn new(background: Worker, pd: LazyWorker, purge: Option) -> Self { + let checkpoint = Builder::new("checkpoint-worker").thread_count(2).create(); Self { async_read: Worker::new("async-read-worker"), pd, tablet: Worker::new("tablet-worker"), + checkpoint, async_write: StoreWriters::new(None), purge, background, @@ -542,6 +546,7 @@ impl Workers { self.async_read.stop(); self.pd.stop(); self.tablet.stop(); + self.checkpoint.stop(); if let Some(w) = self.purge { w.stop(); } @@ -653,7 +658,7 @@ impl StoreSystem { ), ); - let tablet_gc_scheduler = workers.tablet.start_with_timer( + let tablet_scheduler = workers.tablet.start_with_timer( "tablet-worker", tablet::Runner::new( tablet_registry.clone(), @@ -662,10 +667,16 @@ impl StoreSystem { ), ); + let checkpoint_scheduler = workers.checkpoint.start( + "checkpoint-worker", + checkpoint::Runner::new(self.logger.clone(), tablet_registry.clone()), + ); + let schedulers = Schedulers { read: read_scheduler, pd: workers.pd.scheduler(), - tablet: tablet_gc_scheduler, + tablet: tablet_scheduler, + checkpoint: checkpoint_scheduler, write: workers.async_write.senders(), split_check: split_check_scheduler, }; diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 08d7f7946ec..9a3bc753810 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -27,6 +27,7 @@ use crate::{ operation::{CatchUpLogs, DataTrace}, raft::Apply, router::{ApplyRes, ApplyTask, PeerMsg}, + worker::checkpoint, }; /// A trait for reporting apply result. @@ -77,6 +78,7 @@ impl ApplyFsm { res_reporter: R, tablet_registry: TabletRegistry, read_scheduler: Scheduler>, + checkpoint_scheduler: Scheduler, flush_state: Arc, log_recovery: Option>, applied_term: u64, @@ -99,6 +101,7 @@ impl ApplyFsm { buckets, sst_importer, coprocessor_host, + checkpoint_scheduler, logger, ); ( diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 4c6fdad3aa2..cd2c8428a46 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,14 +25,19 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::{any::Any, borrow::Cow, cmp, path::PathBuf}; +use std::{ + any::Any, + borrow::Cow, + cmp, + path::{Path, PathBuf}, + time::Duration, +}; use collections::HashSet; use crossbeam::channel::SendError; -use engine_traits::{ - Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, -}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry}; use fail::fail_point; +use futures::channel::oneshot; use kvproto::{ metapb::{self, Region, RegionEpoch}, pdpb::CheckPolicy, @@ -54,7 +59,7 @@ use raftstore::{ Result, }; use slog::{error, info, warn}; -use tikv_util::{log::SlogFormat, slog_panic, time::Instant}; +use tikv_util::{log::SlogFormat, slog_panic, time::Instant, worker::Scheduler}; use crate::{ batch::StoreContext, @@ -62,7 +67,7 @@ use crate::{ operation::{AdminCmdResult, SharedReadTablet}, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, - worker::tablet, + worker::{checkpoint, tablet}, Error, }; @@ -370,7 +375,7 @@ impl Peer { } impl Apply { - pub fn apply_split( + pub async fn apply_split( &mut self, req: &AdminRequest, log_index: u64, @@ -388,10 +393,10 @@ impl Apply { // This method is executed only when there are unapplied entries after being // restarted. So there will be no callback, it's OK to return a response // that does not matched with its request. - self.apply_batch_split(req, log_index) + self.apply_batch_split(req, log_index).await } - pub fn apply_batch_split( + pub async fn apply_batch_split( &mut self, req: &AdminRequest, log_index: u64, @@ -469,65 +474,36 @@ impl Apply { // write batch self.flush(); - // todo(SpadeA): Here: we use a temporary solution that we use checkpoint API to - // clone new tablets. It may cause large jitter as we need to flush the - // memtable. And more what is more important is that after removing WAL, the API - // will never flush. - // We will freeze the memtable rather than flush it in the following PR. - let tablet = self.tablet().clone(); - let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint object"; - "error" => ?e - ) - }); - let now = Instant::now(); - let reg = self.tablet_registry(); - for new_region in ®ions { - let new_region_id = new_region.id; - if new_region_id == region_id { - continue; - } - - let split_temp_path = temp_split_path(reg, new_region_id); - checkpointer - .create_at(&split_temp_path, None, 0) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint"; - "path" => %split_temp_path.display(), - "error" => ?e - ) - }); - } + let split_region_ids = regions + .iter() + .map(|r| r.get_id()) + .filter(|id| id != ®ion_id) + .collect::>(); + let (_, _, cur_suffix) = self + .tablet_registry() + .parse_tablet_name(Path::new(self.tablet().path())) + .unwrap(); + let scheduler: _ = self.checkpoint_scheduler().clone(); + let checkpoint_duration = async_checkpoint( + &scheduler, + region_id, + split_region_ids, + cur_suffix, + log_index, + ) + .await; - let derived_path = self.tablet_registry().tablet_path(region_id, log_index); - // If it's recovered from restart, it's possible the target path exists already. - // And because checkpoint is atomic, so we don't need to worry about corruption. - // And it's also wrong to delete it and remake as it may has applied and flushed - // some data to the new checkpoint before being restarted. - if !derived_path.exists() { - checkpointer - .create_at(&derived_path, None, 0) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint"; - "path" => %derived_path.display(), - "error" => ?e - ) - }); - } + // It should equal to checkpoint_duration + the duration of rescheduling current + // apply peer let elapsed = now.saturating_elapsed(); // to be removed after when it's stable info!( self.logger, - "create checkpoint time consumes"; + "checkpoint done and resume batch split execution"; "region" => ?self.region(), - "duration" => ?elapsed + "checkpoint_duration" => ?checkpoint_duration, + "total_duration" => ?elapsed, ); let reg = self.tablet_registry(); @@ -560,6 +536,27 @@ impl Apply { } } +// asynchronously execute the checkpoint creation and return the duration spent +// by it +async fn async_checkpoint( + scheduler: &Scheduler, + parent_region: u64, + split_regions: Vec, + cur_suffix: u64, + log_index: u64, +) -> Duration { + let (tx, rx) = oneshot::channel(); + let task = checkpoint::Task::Checkpoint { + cur_suffix, + log_index, + parent_region, + split_regions, + sender: tx, + }; + scheduler.schedule_force(task).unwrap(); + rx.await.unwrap() +} + impl Peer { pub fn on_apply_res_split( &mut self, @@ -867,6 +864,7 @@ mod test { use engine_traits::{ FlushState, Peekable, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, DATA_CFS, }; + use futures::executor::block_on; use kvproto::{ metapb::RegionEpoch, raft_cmdpb::{BatchSplitRequest, SplitRequest}, @@ -879,8 +877,9 @@ mod test { use slog::o; use tempfile::TempDir; use tikv_util::{ + defer, store::{new_learner_peer, new_peer}, - worker::dummy_scheduler, + worker::{dummy_scheduler, Worker}, }; use super::*; @@ -947,7 +946,8 @@ mod test { req.set_splits(splits); // Exec batch split - let (resp, apply_res) = apply.apply_batch_split(&req, log_index).unwrap(); + let (resp, apply_res) = + block_on(async { apply.apply_batch_split(&req, log_index).await }).unwrap(); let regions = resp.get_splits().get_regions(); assert!(regions.len() == region_boundries.len()); @@ -990,6 +990,11 @@ mod test { assert!(reg.tablet_factory().exists(&path)); } } + + let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { panic!() }; + // update cache + let mut cache = apply.tablet_registry().get(parent_id).unwrap(); + cache.set(*tablet.downcast().unwrap()); } #[test] @@ -1020,6 +1025,13 @@ mod test { region_state.set_region(region.clone()); region_state.set_tablet_index(5); + let checkpoint_worker = Worker::new("checkpoint-worker"); + let checkpoint_scheduler = checkpoint_worker.start( + "checkpoint-worker", + checkpoint::Runner::new(logger.clone(), reg.clone()), + ); + defer!(checkpoint_worker.stop()); + let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); let (_tmp_dir, importer) = create_tmp_importer(); @@ -1042,6 +1054,7 @@ mod test { None, importer, host, + checkpoint_scheduler, logger.clone(), ); @@ -1050,13 +1063,13 @@ mod test { splits.mut_requests().push(new_split_req(b"k1", 1, vec![])); let mut req = AdminRequest::default(); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 0).unwrap_err(); + let err = block_on(async { apply.apply_batch_split(&req, 0).await }).unwrap_err(); // 3 followers are required. assert!(err.to_string().contains("invalid new peer id count")); splits.mut_requests().clear(); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 6).unwrap_err(); + let err = block_on(async { apply.apply_batch_split(&req, 6).await }).unwrap_err(); // Empty requests should be rejected. assert!(err.to_string().contains("missing split requests")); @@ -1064,7 +1077,9 @@ mod test { .mut_requests() .push(new_split_req(b"k11", 1, vec![11, 12, 13])); req.set_splits(splits.clone()); - let resp = new_error(apply.apply_batch_split(&req, 0).unwrap_err()); + let resp = + new_error(block_on(async { apply.apply_batch_split(&req, 0).await }).unwrap_err()); + // Out of range keys should be rejected. assert!( resp.get_header().get_error().has_key_not_in_region(), @@ -1077,7 +1092,7 @@ mod test { .mut_requests() .push(new_split_req(b"", 1, vec![11, 12, 13])); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 7).unwrap_err(); + let err = block_on(async { apply.apply_batch_split(&req, 7).await }).unwrap_err(); // Empty key will not in any region exclusively. assert!(err.to_string().contains("missing split key"), "{:?}", err); @@ -1089,7 +1104,7 @@ mod test { .mut_requests() .push(new_split_req(b"k1", 1, vec![11, 12, 13])); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 8).unwrap_err(); + let err = block_on(async { apply.apply_batch_split(&req, 8).await }).unwrap_err(); // keys should be in ascend order. assert!( err.to_string().contains("invalid split request"), @@ -1105,7 +1120,7 @@ mod test { .mut_requests() .push(new_split_req(b"k2", 1, vec![11, 12])); req.set_splits(splits.clone()); - let err = apply.apply_batch_split(&req, 9).unwrap_err(); + let err = block_on(async { apply.apply_batch_split(&req, 9).await }).unwrap_err(); // All requests should be checked. assert!(err.to_string().contains("id count"), "{:?}", err); @@ -1223,7 +1238,7 @@ mod test { .mut_requests() .push(new_split_req(b"k05", 70, vec![71, 72, 73])); req.set_splits(splits); - apply.apply_batch_split(&req, 51).unwrap(); + block_on(async { apply.apply_batch_split(&req, 51).await }).unwrap(); assert!(apply.write_batch.is_none()); assert_eq!( apply diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index b9256f031fe..11ada3697c0 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -143,6 +143,7 @@ impl Peer { mailbox, store_ctx.tablet_registry.clone(), read_scheduler, + store_ctx.schedulers.checkpoint.clone(), self.flush_state().clone(), self.storage().apply_trace().log_recovery(), self.entry_storage().applied_term(), @@ -631,8 +632,8 @@ impl Apply { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { AdminCmdType::CompactLog => self.apply_compact_log(admin_req, log_index)?, - AdminCmdType::Split => self.apply_split(admin_req, log_index)?, - AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index)?, + AdminCmdType::Split => self.apply_split(admin_req, log_index).await?, + AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index).await?, AdminCmdType::PrepareMerge => self.apply_prepare_merge(admin_req, log_index)?, AdminCmdType::CommitMerge => self.apply_commit_merge(admin_req, log_index).await?, AdminCmdType::RollbackMerge => unimplemented!(), diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 5393dfacc98..c1a622cd1f9 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -309,6 +309,8 @@ mod test { let mut host = CoprocessorHost::::default(); host.registry .register_cmd_observer(0, BoxCmdObserver::new(ob)); + + let (dummy_scheduler, _) = dummy_scheduler(); let mut apply = Apply::new( &Config::default(), region @@ -327,6 +329,7 @@ mod test { None, importer, host, + dummy_scheduler, logger.clone(), ); diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index d32b8bdbb80..bf1c81e88c8 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -21,6 +21,7 @@ use tikv_util::{log::SlogFormat, worker::Scheduler}; use crate::{ operation::{AdminCmdResult, ApplyFlowControl, DataTrace}, router::CmdResChannel, + worker::checkpoint, }; pub(crate) struct Observe { @@ -71,6 +72,8 @@ pub struct Apply { observe: Observe, coprocessor_host: CoprocessorHost, + checkpoint_scheduler: Scheduler, + pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, pub(crate) buckets: Option, @@ -91,6 +94,7 @@ impl Apply { buckets: Option, sst_importer: Arc, coprocessor_host: CoprocessorHost, + checkpoint_scheduler: Scheduler, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry @@ -123,6 +127,7 @@ impl Apply { metrics: ApplyMetrics::default(), buckets, sst_importer, + checkpoint_scheduler, observe: Observe { info: CmdObserveInfo::default(), level: ObserveLevel::None, @@ -308,4 +313,9 @@ impl Apply { pub fn coprocessor_host(&self) -> &CoprocessorHost { &self.coprocessor_host } + + #[inline] + pub fn checkpoint_scheduler(&self) -> &Scheduler { + &self.checkpoint_scheduler + } } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 7edf8c02f09..0572a933fd5 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -338,7 +338,7 @@ mod tests { }; use slog::o; use tempfile::TempDir; - use tikv_util::worker::Worker; + use tikv_util::worker::{dummy_scheduler, Worker}; use super::*; use crate::{ @@ -506,6 +506,8 @@ mod tests { state.set_region(region.clone()); let (_tmp_dir, importer) = create_tmp_importer(); let host = CoprocessorHost::::default(); + + let (dummy_scheduler, _) = dummy_scheduler(); // setup peer applyer let mut apply = Apply::new( &Config::default(), @@ -520,6 +522,7 @@ mod tests { None, importer, host, + dummy_scheduler, logger, ); diff --git a/components/raftstore-v2/src/worker/checkpoint.rs b/components/raftstore-v2/src/worker/checkpoint.rs new file mode 100644 index 00000000000..0cbc9a11a42 --- /dev/null +++ b/components/raftstore-v2/src/worker/checkpoint.rs @@ -0,0 +1,145 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::Display, + path::{Path, PathBuf}, + time::Duration, +}; + +use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; +use futures::channel::oneshot::Sender; +use raftstore::store::RAFT_INIT_LOG_INDEX; +use slog::Logger; +use tikv_util::{slog_panic, time::Instant, worker::Runnable}; + +use crate::operation::SPLIT_PREFIX; + +pub enum Task { + Checkpoint { + // it is only used to assert + cur_suffix: u64, + log_index: u64, + parent_region: u64, + split_regions: Vec, + sender: Sender, + }, +} + +impl Display for Task { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Task::Checkpoint { + log_index, + parent_region, + split_regions, + .. + } => write!( + f, + "create checkpoint for batch split, parent region_id {}, source region_ids {:?}, log_index {}", + parent_region, split_regions, log_index, + ), + } + } +} + +pub struct Runner { + logger: Logger, + tablet_registry: TabletRegistry, +} + +pub fn temp_split_path(registry: &TabletRegistry, region_id: u64) -> PathBuf { + let tablet_name = registry.tablet_name(SPLIT_PREFIX, region_id, RAFT_INIT_LOG_INDEX); + registry.tablet_root().join(tablet_name) +} + +impl Runner { + pub fn new(logger: Logger, tablet_registry: TabletRegistry) -> Self { + Self { + logger, + tablet_registry, + } + } + + fn checkpoint( + &self, + parent_region: u64, + split_regions: Vec, + cur_suffix: u64, + log_index: u64, + sender: Sender, + ) { + let now = Instant::now(); + + let mut cache = self.tablet_registry.get(parent_region).unwrap(); + let tablet = cache.latest().unwrap(); + let (_, _, suffix) = self + .tablet_registry + .parse_tablet_name(Path::new(tablet.path())) + .unwrap(); + assert_eq!(cur_suffix, suffix); + + let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to create checkpoint object"; + "region_id" => parent_region, + "error" => ?e + ) + }); + + for id in split_regions { + let split_temp_path = temp_split_path(&self.tablet_registry, id); + checkpointer + .create_at(&split_temp_path, None, 0) + .unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to create checkpoint"; + "region_id" => parent_region, + "path" => %split_temp_path.display(), + "error" => ?e + ) + }); + } + + let derived_path = self.tablet_registry.tablet_path(parent_region, log_index); + + // If it's recovered from restart, it's possible the target path exists already. + // And because checkpoint is atomic, so we don't need to worry about corruption. + // And it's also wrong to delete it and remake as it may has applied and flushed + // some data to the new checkpoint before being restarted. + if !derived_path.exists() { + checkpointer + .create_at(&derived_path, None, 0) + .unwrap_or_else(|e| { + slog_panic!( + self.logger, + "fails to create checkpoint"; + "region_id" => parent_region, + "path" => %derived_path.display(), + "error" => ?e + ) + }); + } + + sender.send(now.saturating_elapsed()).unwrap(); + } +} + +impl Runnable for Runner { + type Task = Task; + + fn run(&mut self, task: Self::Task) { + match task { + Task::Checkpoint { + cur_suffix, + log_index, + parent_region, + split_regions, + sender, + } => { + self.checkpoint(parent_region, split_regions, cur_suffix, log_index, sender); + } + } + } +} diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index 2fa7255afd3..b75525018d6 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,4 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +pub mod checkpoint; pub mod pd; pub mod tablet; From 15fd61ff88c96047b8ffada0c4993a65a57d2f3d Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 9 May 2023 09:40:07 -0700 Subject: [PATCH 0680/1149] add make docker_test to run tikv test in docker environment (#14678) close tikv/tikv#14677 Add a make option "make docker_test" to run test in docker. Also make necessary changes to enable running test in docker. Signed-off-by: qi.xu Co-authored-by: qi.xu --- CONTRIBUTING.md | 14 ++++++++++++ Dockerfile.test | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 9 ++++++++ scripts/test | 1 + 4 files changed, 81 insertions(+) create mode 100644 Dockerfile.test diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 41b2ef7a528..b2e1c37bc44 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -97,6 +97,20 @@ See the [style doc](https://github.com/rust-lang/fmt-rfcs/blob/master/guide/guid Please follow this style to make TiKV easy to review, maintain, and develop. +### Run test in docker + +Alternatively, you can run test in a docker environment. Simply running the following command, it will build the pingcap/tikv_dev image and run the tikv unittests. And you may re-use the pingcap/tikv_dev image directly for ad-hoc test. + +```bash +make docker_test +``` + +Note that you may find many messages below, which in fact are not errors. They're emitted by rustc or cargo. + +```bash +: Invalid conf pair: prof:true +``` + ### Build issues To reduce compilation time and disk usage, TiKV builds do not include full debugging information by default — only tests package will have line debug info enabled. To change debuginfo, just precede build commands with `RUSTFLAGS=-Cdebuginfo=1` (for line numbers), or `RUSTFLAGS=-Cdebuginfo=2` (for full debuginfo). For example, diff --git a/Dockerfile.test b/Dockerfile.test new file mode 100644 index 00000000000..da23a7a30b6 --- /dev/null +++ b/Dockerfile.test @@ -0,0 +1,57 @@ +# This Docker image contains a minimal build environment for TiKV +# +# It contains all the tools necessary to reproduce official production builds of TiKV + +# We need to use CentOS 7 because many of our users choose this as their deploy machine. +# Since the glibc it uses (2.17) is from 2012 (https://sourceware.org/glibc/wiki/Glibc%20Timeline) +# it is our lowest common denominator in terms of distro support. + +# Some commands in this script are structured in order to reduce the number of layers Docker +# generates. Unfortunately Docker is limited to only 125 layers: +# https://github.com/moby/moby/blob/a9507c6f76627fdc092edc542d5a7ef4a6df5eec/layer/layer.go#L50-L53 + +# We require epel packages, so enable the fedora EPEL repo then install dependencies. +# Install the system dependencies +# Attempt to clean and rebuild the cache to avoid 404s + +# To avoid rebuilds we first install all Cargo dependencies + + +# The prepare image avoid ruining the cache of the builder +FROM centos:7.6.1810 as builder + +RUN yum install -y epel-release && \ + yum clean all && \ + yum makecache + +RUN yum install -y centos-release-scl && \ + yum install -y \ + devtoolset-8 \ + perl cmake3 && \ + yum clean all + +# CentOS gives cmake 3 a weird binary name, so we link it to something more normal +# This is required by many build scripts, including ours. +RUN ln -s /usr/bin/cmake3 /usr/bin/cmake +ENV LIBRARY_PATH /usr/local/lib:$LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH + +# Install protoc +RUN curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip" +RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/ +ENV PATH /usr/local/bin/:$PATH + +# Install Rustup +RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y +ENV PATH /root/.cargo/bin/:$PATH + +# Install the Rust toolchain +WORKDIR /tikv +COPY rust-toolchain ./ +RUN rustup self update \ + && rustup set profile minimal \ + && rustup default $(cat "rust-toolchain") + +RUN cargo install cargo-nextest --locked + +ENTRYPOINT ["sh", "-c", "source /opt/rh/devtoolset-8/enable && \"$@\"", "-s"] diff --git a/Makefile b/Makefile index 6e8cada8b6f..30209caa3d9 100644 --- a/Makefile +++ b/Makefile @@ -137,6 +137,7 @@ export TIKV_BUILD_GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD 2> /dev/ export DOCKER_IMAGE_NAME ?= "pingcap/tikv" export DOCKER_IMAGE_TAG ?= "latest" +export DEV_DOCKER_IMAGE_NAME ?= "pingcap/tikv_dev" # Turn on cargo pipelining to add more build parallelism. This has shown decent # speedups in TiKV. @@ -396,6 +397,14 @@ docker: --build-arg GIT_BRANCH=${TIKV_BUILD_GIT_BRANCH} \ . +docker_test: + docker build -f Dockerfile.test \ + -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + . + docker run -i -v $(shell pwd):/tikv \ + ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + make test + ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/scripts/test b/scripts/test index e4c46c6a620..d98f627dcf1 100755 --- a/scripts/test +++ b/scripts/test @@ -28,6 +28,7 @@ export DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}:${LOCAL_DIR}/lib" export LOG_LEVEL=DEBUG export RUST_BACKTRACE=full +echo ${TIKV_ENABLE_FEATURES} cargo $CUSTOM_TEST_COMMAND --workspace \ --exclude fuzz --exclude fuzzer-afl --exclude fuzzer-honggfuzz \ --exclude fuzzer-libfuzzer --exclude fuzz-targets \ From 8aa8f5480d5c75091f4ac0f0d10738fae5fe4148 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 9 May 2023 23:48:08 -0700 Subject: [PATCH 0681/1149] raftstore-v2: add raftstore-v2's test cases under kv_service (#14719) ref tikv/tikv#12842 Add integration test cases for raftstore-v2 (reuse v1's test cases). Flashback test cases are not included because the function PR is not merged yet Signed-off-by: qi.xu Co-authored-by: qi.xu --- components/test_raftstore-v2/src/cluster.rs | 6 +- components/test_raftstore-v2/src/server.rs | 43 ++++ components/test_raftstore/src/server.rs | 38 +++ tests/failpoints/cases/test_coprocessor.rs | 10 +- tests/integrations/server/kv_service.rs | 251 +++++++++----------- 5 files changed, 196 insertions(+), 152 deletions(-) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index eafa7a45403..9f30070fea7 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -54,7 +54,7 @@ use test_raftstore::{ new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, PartitionFilterFactory, RawEngine, }; -use tikv::server::Result as ServerResult; +use tikv::{server::Result as ServerResult, storage::config::EngineType}; use tikv_util::{ box_err, box_try, debug, error, safe_panic, thread_group::GroupProperties, @@ -371,9 +371,11 @@ impl, EK: KvEngine> Cluster { ), >, ) -> Cluster { + let mut tikv_cfg = new_tikv_config_with_api_ver(id, api_version); + tikv_cfg.storage.engine = EngineType::RaftKv2; Cluster { cfg: Config { - tikv: new_tikv_config_with_api_ver(id, api_version), + tikv: tikv_cfg, prefer_mem: true, }, count, diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 35671c227f4..28ded8d5371 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1037,3 +1037,46 @@ pub fn must_new_cluster_and_debug_client() -> ( (cluster, client, leader.get_store_id()) } + +pub fn setup_cluster() -> ( + Cluster, RocksEngine>, + TikvClient, + String, + Context, +) { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let leader_addr = cluster.sim.rl().get_addr(leader.get_store_id()); + let region = cluster.get_region(b"k1"); + let follower = region + .get_peers() + .iter() + .find(|p| **p != leader) + .unwrap() + .clone(); + let follower_addr = cluster.sim.rl().get_addr(follower.get_store_id()); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env).connect(&follower_addr); + let client = TikvClient::new(channel); + + // Verify not setting forwarding header will result in store not match. + let mut put_req = kvproto::kvrpcpb::RawPutRequest::default(); + put_req.set_context(ctx.clone()); + let put_resp = client.raw_put(&put_req).unwrap(); + assert!( + put_resp.get_region_error().has_store_not_match(), + "{:?}", + put_resp + ); + assert!(put_resp.error.is_empty(), "{:?}", put_resp); + (cluster, client, leader_addr, ctx) +} diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index ec6cb0a235c..f8e8f594554 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -928,3 +928,41 @@ pub fn must_new_and_configure_cluster_and_kv_client( (cluster, client, ctx) } + +pub fn setup_cluster() -> (Cluster, TikvClient, String, Context) { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let leader_addr = cluster.sim.rl().get_addr(leader.get_store_id()); + let region = cluster.get_region(b"k1"); + let follower = region + .get_peers() + .iter() + .find(|p| **p != leader) + .unwrap() + .clone(); + let follower_addr = cluster.sim.rl().get_addr(follower.get_store_id()); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env).connect(&follower_addr); + let client = TikvClient::new(channel); + + // Verify not setting forwarding header will result in store not match. + let mut put_req = kvproto::kvrpcpb::RawPutRequest::default(); + put_req.set_context(ctx.clone()); + let put_resp = client.raw_put(&put_req).unwrap(); + assert!( + put_resp.get_region_error().has_store_not_match(), + "{:?}", + put_resp + ); + assert!(put_resp.error.is_empty(), "{:?}", put_resp); + (cluster, client, leader_addr, ctx) +} diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index d7f6540a3c6..d397d602d84 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -11,7 +11,7 @@ use kvproto::{ use more_asserts::{assert_ge, assert_le}; use protobuf::Message; use test_coprocessor::*; -use test_raftstore::{must_get_equal, new_peer, new_server_cluster}; +use test_raftstore_macro::test_case; use test_storage::*; use tidb_query_datatype::{ codec::{datum, Datum}, @@ -361,14 +361,14 @@ fn test_paging_scan_multi_ranges() { } } -#[test] +// TODO: #[test_case(test_raftstore_v2::must_new_cluster_and_kv_client_mul)] +#[test_case(test_raftstore::must_new_cluster_and_kv_client_mul)] fn test_read_index_lock_checking_on_follower() { - let mut cluster = new_server_cluster(0, 2); - + let (mut cluster, _client, _ctx) = new_cluster(2); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); - let rid = cluster.run_conf_change(); + let rid = 1; cluster.must_put(b"k1", b"v1"); pd_client.must_add_peer(rid, new_peer(2, 2)); must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 4f14c06ad4a..530a963ee0c 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -44,7 +44,7 @@ use tikv::{ gc_worker::sync_gc, service::{batch_commands_request, batch_commands_response}, }, - storage::txn::FLASHBACK_BATCH_SIZE, + storage::{config::EngineType, txn::FLASHBACK_BATCH_SIZE}, }; use tikv_util::{ config::ReadableSize, @@ -53,9 +53,10 @@ use tikv_util::{ }; use txn_types::{Key, Lock, LockType, TimeStamp}; -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_rawkv() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let v0 = b"v0".to_vec(); let v1 = b"v1".to_vec(); let (k, v) = (b"key".to_vec(), b"v2".to_vec()); @@ -123,9 +124,10 @@ fn test_rawkv() { assert!(delete_resp.error.is_empty()); } -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] fn test_rawkv_ttl() { - let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { + let (cluster, leader, ctx) = new_cluster(|cluster| { cluster.cfg.storage.enable_ttl = true; }); @@ -271,9 +273,10 @@ fn test_rawkv_ttl() { assert!(!prewrite_resp.get_errors().is_empty()); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_basic() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; @@ -336,9 +339,10 @@ fn test_mvcc_basic() { } } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_rollback_and_cleanup() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; @@ -551,10 +555,10 @@ fn test_mvcc_resolve_lock_gc_and_delete() { assert!(del_resp.error.is_empty()); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] #[cfg(feature = "failpoints")] fn test_mvcc_flashback_failed_after_first_batch() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut ts = 0; for i in 0..FLASHBACK_BATCH_SIZE * 2 { // Meet the constraints of the alphabetical order for test @@ -672,9 +676,9 @@ fn test_mvcc_flashback_failed_after_first_batch() { ); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] fn test_mvcc_flashback() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut ts = 0; // Need to write many batches. for i in 0..2000 { @@ -714,9 +718,9 @@ fn test_mvcc_flashback() { must_kv_read_equal(&client, ctx, b"key@1".to_vec(), b"value@1".to_vec(), ts); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_block_rw() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); // Prepare the flashback. must_prepare_flashback(&client, ctx.clone(), 1, 2); // Try to read version 3 (after flashback, FORBIDDEN). @@ -772,9 +776,9 @@ fn test_mvcc_flashback_block_rw() { must_finish_flashback(&client, ctx, 1, 2, 3); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_block_scheduling() { - let (mut cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (mut cluster, client, ctx) = new_cluster(); // Prepare the flashback. must_prepare_flashback(&client, ctx.clone(), 0, 1); // Try to transfer leader. @@ -789,9 +793,9 @@ fn test_mvcc_flashback_block_scheduling() { must_finish_flashback(&client, ctx, 0, 1, 2); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_unprepared() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); @@ -825,9 +829,9 @@ fn test_mvcc_flashback_unprepared() { assert_eq!(get_resp.value, b"".to_vec()); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_with_unlimited_range() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let mut ts = 0; write_and_read_key(&client, &ctx, &mut ts, k.clone(), v.clone()); @@ -865,9 +869,10 @@ fn test_mvcc_flashback_with_unlimited_range() { // raft related RPC is tested as parts of test_snapshot.rs, so skip here. -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_coprocessor() { - let (_cluster, client, _) = must_new_cluster_and_kv_client(); + let (_cluster, client, _) = new_cluster(); // SQL push down commands let mut req = Request::default(); req.set_tp(REQ_TYPE_DAG); @@ -1238,9 +1243,10 @@ fn test_double_run_node() { cluster.shutdown(); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_pessimistic_lock() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); // Prewrite @@ -1294,9 +1300,10 @@ fn test_pessimistic_lock() { } } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_pessimistic_lock_resumable() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); // Resumable pessimistic lock request with multi-key is not supported yet. let resp = kv_pessimistic_lock_resumable( @@ -1516,9 +1523,10 @@ fn test_pessimistic_lock_resumable() { } } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_check_txn_status_with_max_ts() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); let lock_ts = 10; @@ -1538,29 +1546,10 @@ fn test_check_txn_status_with_max_ts() { must_kv_commit(&client, ctx, vec![k], lock_ts, lock_ts + 1, lock_ts + 1); } -fn build_client(cluster: &Cluster) -> (TikvClient, Context) { - let region = cluster.get_region(b""); - let leader = region.get_peers()[0].clone(); - let addr = cluster.sim.rl().get_addr(leader.get_store_id()); - - let env = Arc::new(Environment::new(1)); - let channel = ChannelBuilder::new(env).connect(&addr); - let client = TikvClient::new(channel); - - let mut ctx = Context::default(); - ctx.set_region_id(leader.get_id()); - ctx.set_region_epoch(region.get_region_epoch().clone()); - ctx.set_peer(leader); - - (client, ctx) -} - -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_batch_commands() { - let mut cluster = new_server_cluster(0, 1); - cluster.run(); - - let (client, _) = build_client(&cluster); + let (_cluster, client, _ctx) = new_cluster(); let (mut sender, receiver) = client.batch_commands().unwrap(); for _ in 0..1000 { let mut batch_req = BatchCommandsRequest::default(); @@ -1591,12 +1580,10 @@ fn test_batch_commands() { rx.recv_timeout(Duration::from_secs(1)).unwrap(); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_empty_commands() { - let mut cluster = new_server_cluster(0, 1); - cluster.run(); - - let (client, _) = build_client(&cluster); + let (_cluster, client, _ctx) = new_cluster(); let (mut sender, receiver) = client.batch_commands().unwrap(); for _ in 0..1000 { let mut batch_req = BatchCommandsRequest::default(); @@ -1631,12 +1618,10 @@ fn test_empty_commands() { rx.recv_timeout(Duration::from_secs(5)).unwrap(); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_async_commit_check_txn_status() { - let mut cluster = new_server_cluster(0, 1); - cluster.run(); - - let (client, ctx) = build_client(&cluster); + let (cluster, client, ctx) = new_cluster(); let start_ts = block_on(cluster.pd_client.get_tso()).unwrap(); let mut req = PrewriteRequest::default(); @@ -1661,16 +1646,14 @@ fn test_async_commit_check_txn_status() { assert_ne!(resp.get_action(), Action::MinCommitTsPushed); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_prewrite_check_max_commit_ts() { - let mut cluster = new_server_cluster(0, 1); - cluster.run(); + let (cluster, client, ctx) = new_cluster(); let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); cm.update_max_ts(100.into()); - let (client, ctx) = build_client(&cluster); - let mut req = PrewriteRequest::default(); req.set_context(ctx.clone()); req.set_primary_lock(b"k1".to_vec()); @@ -1732,9 +1715,10 @@ fn test_prewrite_check_max_commit_ts() { cm.read_range_check(None, None, |_, _| Err(())).unwrap(); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_txn_heart_beat() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut req = TxnHeartBeatRequest::default(); let k = b"k".to_vec(); let start_ts = 10; @@ -1754,9 +1738,11 @@ fn test_txn_heart_beat() { ); } -fn test_with_memory_lock_cluster(f: impl FnOnce(TikvClient, Context, /* raw_key */ Vec, Lock)) { - let (cluster, client, ctx) = must_new_cluster_and_kv_client(); - let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); +fn test_with_memory_lock_cluster( + cm: ConcurrencyManager, + client: TikvClient, + f: impl FnOnce(TikvClient, /* raw_key */ Vec, Lock), +) { let raw_key = b"key".to_vec(); let key = Key::from_raw(&raw_key); let guard = block_on(cm.lock_key(&key)); @@ -1774,12 +1760,15 @@ fn test_with_memory_lock_cluster(f: impl FnOnce(TikvClient, Context, /* raw_key guard.with_lock(|l| { *l = Some(lock.clone()); }); - f(client, ctx, raw_key, lock); + f(client, raw_key, lock); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_batch_get_memory_lock() { - test_with_memory_lock_cluster(|client, ctx, raw_key, lock| { + let (cluster, client, ctx) = new_cluster(); + let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); + test_with_memory_lock_cluster(cm, client, |client, raw_key, lock| { let mut req = BatchGetRequest::default(); req.set_context(ctx); req.set_keys(vec![b"unlocked".to_vec(), raw_key.clone()].into()); @@ -1791,9 +1780,12 @@ fn test_batch_get_memory_lock() { }); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_kv_scan_memory_lock() { - test_with_memory_lock_cluster(|client, ctx, raw_key, lock| { + let (cluster, client, ctx) = new_cluster(); + let cm = cluster.sim.read().unwrap().get_concurrency_manager(1); + test_with_memory_lock_cluster(cm, client, |client, raw_key, lock| { let mut req = ScanRequest::default(); req.set_context(ctx); req.set_start_key(b"a".to_vec()); @@ -1847,51 +1839,12 @@ macro_rules! test_func_init { }}; } -fn setup_cluster() -> (Cluster, TikvClient, CallOption, Context) { - let mut cluster = new_server_cluster(0, 3); - cluster.run(); - - let region_id = 1; - let leader = cluster.leader_of_region(region_id).unwrap(); - let leader_addr = cluster.sim.rl().get_addr(leader.get_store_id()); - let region = cluster.get_region(b"k1"); - let follower = region - .get_peers() - .iter() - .find(|p| **p != leader) - .unwrap() - .clone(); - let follower_addr = cluster.sim.rl().get_addr(follower.get_store_id()); - let epoch = cluster.get_region_epoch(region_id); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader); - ctx.set_region_epoch(epoch); - - let env = Arc::new(Environment::new(1)); - let channel = ChannelBuilder::new(env).connect(&follower_addr); - let client = TikvClient::new(channel); - - // Verify not setting forwarding header will result in store not match. - let mut put_req = RawPutRequest::default(); - put_req.set_context(ctx.clone()); - let put_resp = client.raw_put(&put_req).unwrap(); - assert!( - put_resp.get_region_error().has_store_not_match(), - "{:?}", - put_resp - ); - assert!(put_resp.error.is_empty(), "{:?}", put_resp); - - let call_opt = server::build_forward_option(&leader_addr).timeout(Duration::from_secs(3)); - (cluster, client, call_opt, ctx) -} - /// Check all supported requests can go through proxy correctly. -#[test] +#[test_case(test_raftstore::setup_cluster)] +#[test_case(test_raftstore_v2::setup_cluster)] fn test_tikv_forwarding() { - let (_cluster, client, call_opt, ctx) = setup_cluster(); - + let (_cluster, client, leader_addr, ctx) = new_cluster(); + let call_opt = server::build_forward_option(&leader_addr).timeout(Duration::from_secs(3)); // Verify not setting forwarding header will result in store not match. let mut put_req = RawPutRequest::default(); put_req.set_context(ctx.clone()); @@ -2049,9 +2002,11 @@ fn test_tikv_forwarding() { /// Test if forwarding works correctly if the target node is shutdown and /// restarted. -#[test] +#[test_case(test_raftstore::setup_cluster)] +#[test_case(test_raftstore_v2::setup_cluster)] fn test_forwarding_reconnect() { - let (mut cluster, client, call_opt, ctx) = setup_cluster(); + let (mut cluster, client, leader_addr, ctx) = new_cluster(); + let call_opt = server::build_forward_option(&leader_addr).timeout(Duration::from_secs(3)); let leader = cluster.leader_of_region(1).unwrap(); cluster.stop_node(leader.get_store_id()); @@ -2074,11 +2029,10 @@ fn test_forwarding_reconnect() { assert!(!resp.get_region_error().has_store_not_match(), "{:?}", resp); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_health_check() { - let mut cluster = new_server_cluster(0, 1); - cluster.run(); - + let (mut cluster, _client, _ctx) = new_cluster(); let addr = cluster.sim.rl().get_addr(1); let env = Arc::new(Environment::new(1)); @@ -2095,9 +2049,10 @@ fn test_health_check() { client.check(&req).unwrap_err(); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_get_lock_wait_info_api() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let client2 = client.clone(); let mut ctx1 = ctx.clone(); @@ -2139,7 +2094,8 @@ fn test_get_lock_wait_info_api() { // * rfc: https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. // * proto: https://github.com/pingcap/kvproto/blob/master/proto/kvrpcpb.proto, // enum APIVersion. -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] fn test_txn_api_version() { const TIDB_KEY_CASE: &[u8] = b"t_a"; const TXN_KEY_CASE: &[u8] = b"x\0a"; @@ -2198,9 +2154,8 @@ fn test_txn_api_version() { for (i, (storage_api_version, req_api_version, key, errcode)) in test_data.into_iter().enumerate() { - let (cluster, leader, mut ctx) = must_new_and_configure_cluster(|cluster| { - cluster.cfg.storage.set_api_version(storage_api_version) - }); + let (cluster, leader, mut ctx) = + new_cluster(|cluster| cluster.cfg.storage.set_api_version(storage_api_version)); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); @@ -2339,9 +2294,10 @@ fn test_txn_api_version() { } } -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] fn test_storage_with_quota_limiter_enable() { - let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { + let (cluster, leader, ctx) = new_cluster(|cluster| { // write_bandwidth is limited to 1, which means that every write request will // trigger the limit. let quota_config = QuotaConfig { @@ -2375,9 +2331,10 @@ fn test_storage_with_quota_limiter_enable() { assert!(begin.elapsed() > Duration::from_millis(500)); } -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] fn test_storage_with_quota_limiter_disable() { - let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { + let (cluster, leader, ctx) = new_cluster(|cluster| { // all limit set to 0, which means quota limiter not work. let quota_config = QuotaConfig::default(); cluster.cfg.quota = quota_config; @@ -2405,10 +2362,11 @@ fn test_storage_with_quota_limiter_disable() { assert!(begin.elapsed() < Duration::from_millis(500)); } -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster_and_kv_client)] fn test_commands_write_detail() { test_util::init_log_for_test(); - let (_cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, ctx) = new_cluster(|cluster| { cluster.cfg.pessimistic_txn.pipelined = false; cluster.cfg.pessimistic_txn.in_memory = false; }); @@ -2428,7 +2386,11 @@ fn test_commands_write_detail() { // Mutex has been removed from write path. // Ref https://github.com/facebook/rocksdb/pull/7516 // assert!(wd.get_apply_mutex_lock_nanos() > 0); - assert!(wd.get_apply_write_wal_nanos() > 0); + + // MultiRocksDB does not have wal + if cluster.cfg.storage.engine == EngineType::RaftKv { + assert!(wd.get_apply_write_wal_nanos() > 0); + } assert!(wd.get_apply_write_memtable_nanos() > 0); assert!(wd.get_process_nanos() > 0); }; @@ -2518,12 +2480,10 @@ fn test_commands_write_detail() { ); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_rpc_wall_time() { - let mut cluster = new_server_cluster(0, 1); - cluster.run(); - - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let k = b"key".to_vec(); let mut get_req = GetRequest::default(); get_req.set_context(ctx); @@ -2584,9 +2544,10 @@ fn test_rpc_wall_time() { } } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_pessimistic_lock_execution_tracking() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"k1".to_vec(), b"k2".to_vec()); // Add a prewrite lock. From aa82f08c5a7fb42aa71273a6cd11eb085d845b98 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 10 May 2023 17:12:09 +0800 Subject: [PATCH 0682/1149] raftstore-v2: implement delete range (#14714) close tikv/tikv#14723 implement delete range for raftstore-v2 Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/command/mod.rs | 3 + .../src/operation/command/write/mod.rs | 101 ++++++++++++++++-- components/raftstore-v2/src/raft/apply.rs | 9 ++ components/test_raftstore-v2/src/cluster.rs | 8 +- components/test_raftstore-v2/src/util.rs | 37 ++++++- components/test_raftstore/src/util.rs | 37 ++++++- tests/integrations/raftstore/test_single.rs | 50 ++------- 7 files changed, 190 insertions(+), 55 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 11ada3697c0..293afdcc49b 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -486,6 +486,7 @@ impl Apply { dr.start_key, dr.end_key, dr.notify_only, + self.use_delete_range(), ); } SimpleWrite::Ingest(_) => { @@ -598,6 +599,7 @@ impl Apply { dr.start_key, dr.end_key, dr.notify_only, + self.use_delete_range(), )?; } SimpleWrite::Ingest(ssts) => { @@ -685,6 +687,7 @@ impl Apply { dr.get_start_key(), dr.get_end_key(), dr.get_notify_only(), + self.use_delete_range(), )?; } _ => unimplemented!(), diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 9f4afec9ad6..a12d3e68f45 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,6 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{data_cf_offset, KvEngine, Mutable, RaftEngine, CF_DEFAULT}; +use engine_traits::{ + data_cf_offset, DeleteStrategy, KvEngine, Mutable, RaftEngine, Range as EngineRange, ALL_CFS, + CF_DEFAULT, +}; use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ @@ -12,7 +15,8 @@ use raftstore::{ }, Error, Result, }; -use tikv_util::slog_panic; +use slog::info; +use tikv_util::{box_err, slog_panic}; use crate::{ batch::StoreContext, @@ -222,13 +226,94 @@ impl Apply { #[inline] pub fn apply_delete_range( &mut self, - _cf: &str, - _index: u64, - _start_key: &[u8], - _end_key: &[u8], - _notify_only: bool, + mut cf: &str, + index: u64, + start_key: &[u8], + end_key: &[u8], + notify_only: bool, + use_delete_range: bool, ) -> Result<()> { - // TODO: reuse the same delete as split/merge. + PEER_WRITE_CMD_COUNTER.delete_range.inc(); + let off = data_cf_offset(cf); + if self.should_skip(off, index) { + return Ok(()); + } + if !end_key.is_empty() && start_key >= end_key { + return Err(box_err!( + "invalid delete range command, start_key: {:?}, end_key: {:?}", + start_key, + end_key + )); + } + // region key range has no data prefix, so we must use origin key to check. + util::check_key_in_region(start_key, self.region())?; + let end_key = keys::data_end_key(end_key); + let region_end_key = keys::data_end_key(self.region().get_end_key()); + if end_key > region_end_key { + return Err(Error::KeyNotInRegion( + end_key.to_vec(), + self.region().clone(), + )); + } + + if cf.is_empty() { + cf = CF_DEFAULT; + } + + if !ALL_CFS.iter().any(|x| *x == cf) { + return Err(box_err!("invalid delete range command, cf: {:?}", cf)); + } + + let start_key = keys::data_key(start_key); + + info!( + self.logger, + "execute delete range"; + "range_start" => log_wrappers::Value::key(&start_key), + "range_end" => log_wrappers::Value::key(&end_key), + "notify_only" => notify_only, + "use_delete_range" => use_delete_range, + ); + + // Use delete_files_in_range to drop as many sst files as possible, this + // is a way to reclaim disk space quickly after drop a table/index. + if !notify_only { + let range = vec![EngineRange::new(&start_key, &end_key)]; + let fail_f = |e: engine_traits::Error, strategy: DeleteStrategy| { + slog_panic!( + self.logger, + "failed to delete"; + "strategy" => ?strategy, + "range_start" => log_wrappers::Value::key(&start_key), + "range_end" => log_wrappers::Value::key(&end_key), + "error" => ?e, + ) + }; + let tablet = self.tablet(); + tablet + .delete_ranges_cf(cf, DeleteStrategy::DeleteFiles, &range) + .unwrap_or_else(|e| fail_f(e, DeleteStrategy::DeleteFiles)); + + let strategy = if use_delete_range { + DeleteStrategy::DeleteByRange + } else { + DeleteStrategy::DeleteByKey + }; + // Delete all remaining keys. + tablet + .delete_ranges_cf(cf, strategy.clone(), &range) + .unwrap_or_else(move |e| fail_f(e, strategy)); + + // to do: support titan? + // tablet + // .delete_ranges_cf(cf, DeleteStrategy::DeleteBlobs, &range) + // .unwrap_or_else(move |e| fail_f(e, + // DeleteStrategy::DeleteBlobs)); + } + if index != u64::MAX { + self.modifications_mut()[off] = index; + } + Ok(()) } } diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index bf1c81e88c8..986c841233e 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -74,6 +74,9 @@ pub struct Apply { checkpoint_scheduler: Scheduler, + // Whether to use the delete range API instead of deleting one by one. + use_delete_range: bool, + pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, pub(crate) buckets: Option, @@ -128,6 +131,7 @@ impl Apply { buckets, sst_importer, checkpoint_scheduler, + use_delete_range: cfg.use_delete_range, observe: Observe { info: CmdObserveInfo::default(), level: ObserveLevel::None, @@ -318,4 +322,9 @@ impl Apply { pub fn checkpoint_scheduler(&self) -> &Scheduler { &self.checkpoint_scheduler } + + #[inline] + pub fn use_delete_range(&self) -> bool { + self.use_delete_range + } } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 9f30070fea7..1a3c69c1bb5 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -251,7 +251,13 @@ pub trait Simulator { write_encoder.delete(delete.get_cf(), delete.get_key()); } CmdType::DeleteRange => { - unimplemented!() + let delete_range = req.get_delete_range(); + write_encoder.delete_range( + delete_range.get_cf(), + delete_range.get_start_key(), + delete_range.get_end_key(), + delete_range.get_notify_only(), + ); } _ => unreachable!(), } diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index b9e6464c5d8..6e942e45e75 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -5,12 +5,12 @@ use std::{fmt::Write, sync::Arc, thread, time::Duration}; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; -use engine_traits::{KvEngine, TabletRegistry, CF_DEFAULT}; +use engine_traits::{CfName, KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; use futures::Future; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; use raftstore::Result; -use rand::RngCore; +use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, Config}; @@ -233,3 +233,36 @@ pub fn async_read_on_peer, EK: KvEngine>( request.mut_header().set_replica_read(replica_read); cluster.sim.wl().async_read(request) } + +pub fn test_delete_range, EK: KvEngine>(cluster: &mut Cluster, cf: CfName) { + let data_set: Vec<_> = (1..500) + .map(|i| { + ( + format!("key{:08}", i).into_bytes(), + format!("value{}", i).into_bytes(), + ) + }) + .collect(); + for kvs in data_set.chunks(50) { + let requests = kvs.iter().map(|(k, v)| new_put_cf_cmd(cf, k, v)).collect(); + // key9 is always the last region. + cluster.batch_put(b"key9", requests).unwrap(); + } + + // delete_range request with notify_only set should not actually delete data. + cluster.must_notify_delete_range_cf(cf, b"", b""); + + let mut rng = rand::thread_rng(); + for _ in 0..50 { + let (k, v) = data_set.choose(&mut rng).unwrap(); + assert_eq!(cluster.get_cf(cf, k).unwrap(), *v); + } + + // Empty keys means the whole range. + cluster.must_delete_range_cf(cf, b"", b""); + + for _ in 0..50 { + let k = &data_set.choose(&mut rng).unwrap().0; + assert!(cluster.get_cf(cf, k).is_none()); + } +} diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index cdfe5c8f475..2ed565745a7 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -16,7 +16,7 @@ use encryption_export::{ use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, + CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; @@ -42,7 +42,7 @@ use raftstore::{ store::{fsm::RaftRouter, *}, RaftRouterCompactedEventSender, Result, }; -use rand::RngCore; +use rand::{seq::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; use test_pd_client::TestPdClient; @@ -1430,3 +1430,36 @@ pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, regio } assert!(snapshot.ext().is_max_ts_synced()); } + +pub fn test_delete_range(cluster: &mut Cluster, cf: CfName) { + let data_set: Vec<_> = (1..500) + .map(|i| { + ( + format!("key{:08}", i).into_bytes(), + format!("value{}", i).into_bytes(), + ) + }) + .collect(); + for kvs in data_set.chunks(50) { + let requests = kvs.iter().map(|(k, v)| new_put_cf_cmd(cf, k, v)).collect(); + // key9 is always the last region. + cluster.batch_put(b"key9", requests).unwrap(); + } + + // delete_range request with notify_only set should not actually delete data. + cluster.must_notify_delete_range_cf(cf, b"", b""); + + let mut rng = rand::thread_rng(); + for _ in 0..50 { + let (k, v) = data_set.choose(&mut rng).unwrap(); + assert_eq!(cluster.get_cf(cf, k).unwrap(), *v); + } + + // Empty keys means the whole range. + cluster.must_delete_range_cf(cf, b"", b""); + + for _ in 0..50 { + let k = &data_set.choose(&mut rng).unwrap().0; + assert!(cluster.get_cf(cf, k).is_none()); + } +} diff --git a/tests/integrations/raftstore/test_single.rs b/tests/integrations/raftstore/test_single.rs index 7fedc3c1cd4..d21a134a0c3 100644 --- a/tests/integrations/raftstore/test_single.rs +++ b/tests/integrations/raftstore/test_single.rs @@ -2,51 +2,15 @@ use std::time::Duration; -use engine_traits::{CfName, CF_DEFAULT, CF_WRITE}; +use engine_traits::{CF_DEFAULT, CF_WRITE}; use raftstore::store::RAFT_INIT_LOG_INDEX; use rand::prelude::*; -use test_raftstore::{new_put_cf_cmd, new_put_cmd, new_request, sleep_ms}; +use test_raftstore::{new_put_cmd, new_request, sleep_ms}; use test_raftstore_macro::test_case; use tikv_util::{config::*, time::Instant}; // TODO add epoch not match test cases. -fn test_delete_range( - cluster: &mut test_raftstore::Cluster, - cf: CfName, -) { - let data_set: Vec<_> = (1..500) - .map(|i| { - ( - format!("key{:08}", i).into_bytes(), - format!("value{}", i).into_bytes(), - ) - }) - .collect(); - for kvs in data_set.chunks(50) { - let requests = kvs.iter().map(|(k, v)| new_put_cf_cmd(cf, k, v)).collect(); - // key9 is always the last region. - cluster.batch_put(b"key9", requests).unwrap(); - } - - // delete_range request with notify_only set should not actually delete data. - cluster.must_notify_delete_range_cf(cf, b"", b""); - - let mut rng = rand::thread_rng(); - for _ in 0..50 { - let (k, v) = data_set.choose(&mut rng).unwrap(); - assert_eq!(cluster.get_cf(cf, k).unwrap(), *v); - } - - // Empty keys means the whole range. - cluster.must_delete_range_cf(cf, b"", b""); - - for _ in 0..50 { - let k = &data_set.choose(&mut rng).unwrap().0; - assert!(cluster.get_cf(cf, k).is_none()); - } -} - #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] @@ -127,9 +91,10 @@ fn test_delete() { } } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_use_delete_range() { - let mut cluster = test_raftstore::new_node_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.cfg.raft_store.use_delete_range = true; cluster.run(); test_delete_range(&mut cluster, CF_DEFAULT); @@ -137,9 +102,10 @@ fn test_node_use_delete_range() { test_delete_range(&mut cluster, CF_WRITE); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_not_use_delete_range() { - let mut cluster = test_raftstore::new_node_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.cfg.raft_store.use_delete_range = false; cluster.run(); test_delete_range(&mut cluster, CF_DEFAULT); From 657f70c0cc15855d950d666b9fd1db423561f2b9 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 10 May 2023 17:38:08 +0800 Subject: [PATCH 0683/1149] raftstore-v2: send tablet when offload checkpoint (#14720) close tikv/tikv#14711 Signed-off-by: SpadeA-Tang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/batch/store.rs | 2 +- components/raftstore-v2/src/fsm/apply.rs | 2 +- .../src/operation/command/admin/split.rs | 31 +++++-------------- components/raftstore-v2/src/raft/apply.rs | 6 ++-- .../raftstore-v2/src/worker/checkpoint.rs | 29 +++++------------ 5 files changed, 21 insertions(+), 49 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 9b01501ddd9..d77786c9e6f 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -496,7 +496,7 @@ pub struct Schedulers { pub read: Scheduler>, pub pd: Scheduler, pub tablet: Scheduler>, - pub checkpoint: Scheduler, + pub checkpoint: Scheduler>, pub write: WriteSenders, // Following is not maintained by raftstore itself. diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 9a3bc753810..99ce483f35e 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -78,7 +78,7 @@ impl ApplyFsm { res_reporter: R, tablet_registry: TabletRegistry, read_scheduler: Scheduler>, - checkpoint_scheduler: Scheduler, + checkpoint_scheduler: Scheduler>, flush_state: Arc, log_recovery: Option>, applied_term: u64, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index cd2c8428a46..9ac4de5e3c0 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,13 +25,7 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::{ - any::Any, - borrow::Cow, - cmp, - path::{Path, PathBuf}, - time::Duration, -}; +use std::{any::Any, borrow::Cow, cmp, path::PathBuf, time::Duration}; use collections::HashSet; use crossbeam::channel::SendError; @@ -480,19 +474,10 @@ impl Apply { .map(|r| r.get_id()) .filter(|id| id != ®ion_id) .collect::>(); - let (_, _, cur_suffix) = self - .tablet_registry() - .parse_tablet_name(Path::new(self.tablet().path())) - .unwrap(); let scheduler: _ = self.checkpoint_scheduler().clone(); - let checkpoint_duration = async_checkpoint( - &scheduler, - region_id, - split_region_ids, - cur_suffix, - log_index, - ) - .await; + let tablet = self.tablet().clone(); + let checkpoint_duration = + async_checkpoint(tablet, &scheduler, region_id, split_region_ids, log_index).await; // It should equal to checkpoint_duration + the duration of rescheduling current // apply peer @@ -538,16 +523,16 @@ impl Apply { // asynchronously execute the checkpoint creation and return the duration spent // by it -async fn async_checkpoint( - scheduler: &Scheduler, +async fn async_checkpoint( + tablet: EK, + scheduler: &Scheduler>, parent_region: u64, split_regions: Vec, - cur_suffix: u64, log_index: u64, ) -> Duration { let (tx, rx) = oneshot::channel(); let task = checkpoint::Task::Checkpoint { - cur_suffix, + tablet, log_index, parent_region, split_regions, diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 986c841233e..80ff21ebfbd 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -72,7 +72,7 @@ pub struct Apply { observe: Observe, coprocessor_host: CoprocessorHost, - checkpoint_scheduler: Scheduler, + checkpoint_scheduler: Scheduler>, // Whether to use the delete range API instead of deleting one by one. use_delete_range: bool, @@ -97,7 +97,7 @@ impl Apply { buckets: Option, sst_importer: Arc, coprocessor_host: CoprocessorHost, - checkpoint_scheduler: Scheduler, + checkpoint_scheduler: Scheduler>, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry @@ -319,7 +319,7 @@ impl Apply { } #[inline] - pub fn checkpoint_scheduler(&self) -> &Scheduler { + pub fn checkpoint_scheduler(&self) -> &Scheduler> { &self.checkpoint_scheduler } diff --git a/components/raftstore-v2/src/worker/checkpoint.rs b/components/raftstore-v2/src/worker/checkpoint.rs index 0cbc9a11a42..e10f62584d5 100644 --- a/components/raftstore-v2/src/worker/checkpoint.rs +++ b/components/raftstore-v2/src/worker/checkpoint.rs @@ -1,10 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - fmt::Display, - path::{Path, PathBuf}, - time::Duration, -}; +use std::{fmt::Display, path::PathBuf, time::Duration}; use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; use futures::channel::oneshot::Sender; @@ -14,18 +10,18 @@ use tikv_util::{slog_panic, time::Instant, worker::Runnable}; use crate::operation::SPLIT_PREFIX; -pub enum Task { +pub enum Task { Checkpoint { // it is only used to assert - cur_suffix: u64, log_index: u64, parent_region: u64, split_regions: Vec, + tablet: EK, sender: Sender, }, } -impl Display for Task { +impl Display for Task { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Task::Checkpoint { @@ -64,20 +60,11 @@ impl Runner { &self, parent_region: u64, split_regions: Vec, - cur_suffix: u64, log_index: u64, + tablet: EK, sender: Sender, ) { let now = Instant::now(); - - let mut cache = self.tablet_registry.get(parent_region).unwrap(); - let tablet = cache.latest().unwrap(); - let (_, _, suffix) = self - .tablet_registry - .parse_tablet_name(Path::new(tablet.path())) - .unwrap(); - assert_eq!(cur_suffix, suffix); - let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { slog_panic!( self.logger, @@ -127,18 +114,18 @@ impl Runner { } impl Runnable for Runner { - type Task = Task; + type Task = Task; fn run(&mut self, task: Self::Task) { match task { Task::Checkpoint { - cur_suffix, log_index, parent_region, split_regions, + tablet, sender, } => { - self.checkpoint(parent_region, split_regions, cur_suffix, log_index, sender); + self.checkpoint(parent_region, split_regions, log_index, tablet, sender); } } } From 288d173385a653e2ff08bee8a71237fbf157cb56 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 10 May 2023 17:58:08 +0800 Subject: [PATCH 0684/1149] tikv control (tikv-client) for raftstore-v2 (#14698) ref tikv/tikv#14654 implement tikv-client for raftstore-v2 Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/executor.rs | 24 +- components/server/src/server.rs | 13 +- components/server/src/server2.rs | 31 +- components/test_raftstore-v2/src/server.rs | 53 ++- components/test_raftstore/src/server.rs | 12 +- src/lib.rs | 2 + src/server/debug.rs | 477 +++++++++++---------- src/server/debug2.rs | 116 +++-- src/server/service/debug.rs | 41 +- tests/integrations/server/debugger.rs | 4 +- tests/integrations/server/kv_service.rs | 128 +++++- 11 files changed, 582 insertions(+), 319 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 69ac2dc1058..a0d4a039d2c 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -32,8 +32,8 @@ use slog_global::crit; use tikv::{ config::{ConfigController, TikvConfig}, server::{ - debug::{BottommostLevelCompaction, Debugger, RegionInfo}, - debug2::DebuggerV2, + debug::{BottommostLevelCompaction, Debugger, DebuggerImpl, RegionInfo}, + debug2::DebuggerImplV2, KvEngineFactoryBuilder, }, storage::config::EngineType, @@ -99,7 +99,7 @@ pub fn new_debug_executor( Err(e) => handle_engine_error(e), }; - let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); + let debugger = DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller); Box::new(debugger) as Box } else { let mut config = cfg.raft_engine.config(); @@ -116,14 +116,14 @@ pub fn new_debug_executor( Err(e) => handle_engine_error(e), }; - let debugger = Debugger::new(Engines::new(kv_db, raft_db), cfg_controller); + let debugger = DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller); Box::new(debugger) as Box } EngineType::RaftKv2 => { let registry = TabletRegistry::new(Box::new(factory), Path::new(data_dir).join("tablets")) .unwrap_or_else(|e| fatal!("failed to create tablet registry {:?}", e)); - let debugger = DebuggerV2::new(registry, raft_db, cfg_controller); + let debugger = DebuggerImplV2::new(registry, raft_db, cfg_controller); Box::new(debugger) as Box } } @@ -894,11 +894,11 @@ impl DebugExecutor for DebugClient { } } -impl DebugExecutor for Debugger { +impl DebugExecutor for DebuggerImpl { fn check_local_mode(&self) {} fn get_all_regions_in_store(&self) -> Vec { - self.get_all_regions_in_store() + Debugger::get_all_regions_in_store(self) .unwrap_or_else(|e| perror_and_exit("Debugger::get_all_regions_in_store", e)) } @@ -954,7 +954,7 @@ impl DebugExecutor for Debugger { threads: u32, bottommost: BottommostLevelCompaction, ) { - self.compact(db, cf, from, to, threads, bottommost) + Debugger::compact(self, db, cf, from, to, threads, bottommost) .unwrap_or_else(|e| perror_and_exit("Debugger::compact", e)); } @@ -998,7 +998,7 @@ impl DebugExecutor for Debugger { } fn recover_all(&self, threads: usize, read_only: bool) { - Debugger::recover_all(self, threads, read_only) + DebuggerImpl::recover_all(self, threads, read_only) .unwrap_or_else(|e| perror_and_exit("Debugger::recover all", e)); } @@ -1143,11 +1143,11 @@ fn handle_engine_error(err: EngineError) -> ! { tikv_util::logger::exit_process_gracefully(-1); } -impl DebugExecutor for DebuggerV2 { +impl DebugExecutor for DebuggerImplV2 { fn check_local_mode(&self) {} fn get_all_regions_in_store(&self) -> Vec { - self.get_all_regions_in_store() + Debugger::get_all_regions_in_store(self) .unwrap_or_else(|e| perror_and_exit("Debugger::get_all_regions_in_store", e)) } @@ -1195,7 +1195,7 @@ impl DebugExecutor for DebuggerV2 { threads: u32, bottommost: BottommostLevelCompaction, ) { - self.compact(db, cf, from, to, threads, bottommost) + Debugger::compact(self, db, cf, from, to, threads, bottommost) .unwrap_or_else(|e| perror_and_exit("Debugger::compact", e)); } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 625e8d8a31b..3f9c27ab645 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -85,6 +85,7 @@ use tikv::{ }, server::{ config::{Config as ServerConfig, ServerConfigManager}, + debug::{Debugger, DebuggerImpl}, gc_worker::{AutoGcConfig, GcWorker}, lock_manager::LockManager, raftkv::ReplicaReadLockChecker, @@ -1030,14 +1031,18 @@ where .unwrap() .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); + let mut debugger = DebuggerImpl::new( + engines.engines.clone(), + self.cfg_controller.as_ref().unwrap().clone(), + ); + debugger.set_kv_statistics(self.kv_statistics.clone()); + debugger.set_raft_statistics(self.raft_statistics.clone()); + // Debug service. let debug_service = DebugService::new( - engines.engines.clone(), - self.kv_statistics.clone(), - self.raft_statistics.clone(), + debugger, servers.server.get_debug_thread_pool().clone(), engines.engine.raft_extension(), - self.cfg_controller.as_ref().unwrap().clone(), ); if servers .server diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 4d1a9f2daf6..6207b778691 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -38,8 +38,9 @@ use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; use kvproto::{ brpb::create_backup, cdcpb_grpc::create_change_data, deadlock::create_deadlock, - diagnosticspb::create_diagnostics, import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, - logbackuppb::create_log_backup, resource_usage_agent::create_resource_metering_pub_sub, + debugpb_grpc::create_debug, diagnosticspb::create_diagnostics, + import_sstpb_grpc::create_import_sst, kvrpcpb::ApiVersion, logbackuppb::create_log_backup, + resource_usage_agent::create_resource_metering_pub_sub, }; use pd_client::{ meta_storage::{Checked, Sourced}, @@ -72,11 +73,13 @@ use tikv::{ }, server::{ config::{Config as ServerConfig, ServerConfigManager}, + debug::Debugger, + debug2::DebuggerImplV2, gc_worker::{AutoGcConfig, GcWorker}, lock_manager::LockManager, raftkv::ReplicaReadLockChecker, resolve, - service::DiagnosticsService, + service::{DebugService, DiagnosticsService}, status_server::StatusServer, KvEngineFactoryBuilder, NodeV2, RaftKv2, Server, CPU_CORES_QUOTA_GAUGE, GRPC_THREAD_PREFIX, }, @@ -906,6 +909,28 @@ where .unwrap() .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); + let mut debugger = DebuggerImplV2::new( + self.tablet_registry.clone().unwrap(), + self.engines.as_ref().unwrap().raft_engine.clone(), + self.cfg_controller.as_ref().unwrap().clone(), + ); + debugger.set_kv_statistics(self.kv_statistics.clone()); + debugger.set_raft_statistics(self.raft_statistics.clone()); + + // Debug service. + let debug_service = DebugService::new( + debugger, + servers.server.get_debug_thread_pool().clone(), + engines.engine.raft_extension(), + ); + if servers + .server + .register_service(create_debug(debug_service)) + .is_some() + { + fatal!("failed to register debug service"); + } + let cdc_service = cdc::Service::new( self.cdc_scheduler.as_ref().unwrap().clone(), self.cdc_memory_quota.as_ref().unwrap().clone(), diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 28ded8d5371..b95313e8cdd 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -20,7 +20,7 @@ use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Servic use grpcio_health::HealthService; use kvproto::{ deadlock_grpc::create_deadlock, - debugpb_grpc::DebugClient, + debugpb_grpc::{create_debug, DebugClient}, diagnosticspb_grpc::create_diagnostics, import_sstpb_grpc::create_import_sst, kvrpcpb::{ApiVersion, Context}, @@ -48,14 +48,20 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{filter_send, AddressMap, Config, Filter}; use tikv::{ + config::ConfigController, coprocessor, coprocessor_v2, import::{ImportSstService, SstImporter}, read_pool::ReadPool, server::{ - gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, - raftkv::ReplicaReadLockChecker, resolve, service::DiagnosticsService, ConnectionBuilder, - Error, Extension, NodeV2, PdStoreAddrResolver, RaftClient, RaftKv2, Result as ServerResult, - Server, ServerTransport, + debug2::DebuggerImplV2, + gc_worker::GcWorker, + load_statistics::ThreadLoadPool, + lock_manager::LockManager, + raftkv::ReplicaReadLockChecker, + resolve, + service::{DebugService, DiagnosticsService}, + ConnectionBuilder, Error, Extension, NodeV2, PdStoreAddrResolver, RaftClient, RaftKv2, + Result as ServerResult, Server, ServerTransport, }, storage::{ self, @@ -73,7 +79,7 @@ use tikv_util::{ worker::{Builder as WorkerBuilder, LazyWorker}, Either, HandyRwLock, }; -use tokio::runtime::Builder as TokioBuilder; +use tokio::runtime::{Builder as TokioBuilder, Handle}; use txn_types::TxnExtraScheduler; use crate::{Cluster, RaftStoreRouter, SimulateTransport, Simulator, SnapshotRouter}; @@ -251,6 +257,7 @@ pub struct ServerMeta { } type PendingServices = Vec Service>>; +type PendingDebugService = Box, Handle) -> Service>; pub struct ServerCluster { metas: HashMap>, @@ -265,6 +272,9 @@ pub struct ServerCluster { concurrency_managers: HashMap, env: Arc, pub pending_services: HashMap, + // This is used to work around that server cluster is generic over KvEngine while the debug + // service implementation is specific overal RocksDB. + pub pending_debug_service: Option>, pub health_services: HashMap, pub security_mgr: Arc, pub txn_extra_schedulers: HashMap>, @@ -303,6 +313,7 @@ impl ServerCluster { snap_mgrs: HashMap::default(), snap_paths: HashMap::default(), pending_services: HashMap::default(), + pending_debug_service: None::>, health_services: HashMap::default(), raft_clients: HashMap::default(), conn_builder, @@ -556,7 +567,7 @@ impl ServerCluster { ); let debug_thread_handle = debug_thread_pool.handle().clone(); let diag_service = DiagnosticsService::new( - debug_thread_handle, + debug_thread_handle.clone(), cfg.log.file.filename.clone(), cfg.slow_log_file.clone(), ); @@ -590,6 +601,9 @@ impl ServerCluster { svr.register_service(fact()); } } + if let Some(debug_service) = &self.pending_debug_service { + svr.register_service(debug_service(self, debug_thread_handle.clone())); + } match svr.build_and_bind() { Ok(_) => { server = Some(svr); @@ -1028,7 +1042,30 @@ pub fn must_new_cluster_and_debug_client() -> ( DebugClient, u64, ) { - let (cluster, leader, _) = must_new_cluster_mul(1); + let mut cluster = new_server_cluster(0, 1); + cluster.create_engines(); + let region_id = cluster.bootstrap_conf_change(); + + { + let mut sim = cluster.sim.wl(); + let tablet_registry = cluster.tablet_registries.get(&1).unwrap().clone(); + let raft_engine = cluster.raft_engines.get(&1).unwrap().clone(); + let debugger = + DebuggerImplV2::new(tablet_registry, raft_engine, ConfigController::default()); + + sim.pending_debug_service = Some(Box::new(move |cluster, debug_thread_handle| { + let raft_extension = cluster.storages.get(&1).unwrap().raft_extension(); + + create_debug(DebugService::new( + debugger.clone(), + debug_thread_handle, + raft_extension, + )) + })); + } + + cluster.start().unwrap(); + let leader = cluster.leader_of_region(region_id).unwrap(); let env = Arc::new(Environment::new(1)); let channel = diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f8e8f594554..8d4d4deea69 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -53,6 +53,7 @@ use tikv::{ import::{ImportSstService, SstImporter}, read_pool::ReadPool, server::{ + debug::DebuggerImpl, gc_worker::GcWorker, load_statistics::ThreadLoadPool, lock_manager::LockManager, @@ -487,15 +488,10 @@ impl ServerCluster { .build() .unwrap(), ); + + let debugger = DebuggerImpl::new(engines.clone(), ConfigController::default()); let debug_thread_handle = debug_thread_pool.handle().clone(); - let debug_service = DebugService::new( - engines.clone(), - None, - None, - debug_thread_handle, - extension, - ConfigController::default(), - ); + let debug_service = DebugService::new(debugger, debug_thread_handle, extension); let apply_router = system.apply_router(); // Create node. diff --git a/src/lib.rs b/src/lib.rs index 4da16ee0e74..b3e9ebaf8e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,8 @@ #![feature(let_chains)] #![feature(read_buf)] #![feature(type_alias_impl_trait)] +#![allow(incomplete_features)] +#![feature(return_position_impl_trait_in_trait)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/debug.rs b/src/server/debug.rs index 2e2b34970b8..7ce7c832f48 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -22,6 +22,7 @@ use engine_traits::{ }; use kvproto::{ debugpb::{self, Db as DbType}, + kvrpcpb::MvccInfo, metapb::{PeerRole, Region}, raft_serverpb::*, }; @@ -125,8 +126,56 @@ trait InnerRocksEngineExtractor { fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine>; } +pub trait Debugger { + fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result>; + + fn raft_log(&self, region_id: u64, log_index: u64) -> Result; + + fn region_info(&self, region_id: u64) -> Result; + + fn region_size>(&self, region_id: u64, cfs: Vec) -> Result>; + + /// Scan MVCC Infos for given range `[start, end)`. + fn scan_mvcc( + &self, + start: &[u8], + end: &[u8], + limit: u64, + ) -> Result, MvccInfo)>> + Send>; + + /// Compact the cf[start..end) in the db. + fn compact( + &self, + db: DbType, + cf: &str, + start: &[u8], + end: &[u8], + threads: u32, + bottommost: BottommostLevelCompaction, + ) -> Result<()>; + + /// Get all regions holding region meta data from raft CF in KV storage. + fn get_all_regions_in_store(&self) -> Result>; + + fn get_store_ident(&self) -> Result; + + fn dump_kv_stats(&self) -> Result; + + fn dump_raft_stats(&self) -> Result; + + fn modify_tikv_config(&self, config_name: &str, config_value: &str) -> Result<()>; + + fn get_region_properties(&self, region_id: u64) -> Result>; + + fn reset_to_version(&self, version: u64); + + fn set_kv_statistics(&mut self, s: Option>); + + fn set_raft_statistics(&mut self, s: Option>); +} + #[derive(Clone)] -pub struct Debugger { +pub struct DebuggerImpl { engines: Engines, kv_statistics: Option>, raft_statistics: Option>, @@ -134,7 +183,7 @@ pub struct Debugger { cfg_controller: ConfigController, } -impl InnerRocksEngineExtractor for Debugger { +impl InnerRocksEngineExtractor for DebuggerImpl { default fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { match db { DbType::Kv => Ok(&self.engines.kv), @@ -144,7 +193,7 @@ impl InnerRocksEngineExtractor for Debugger { } } -impl InnerRocksEngineExtractor for Debugger { +impl InnerRocksEngineExtractor for DebuggerImpl { fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { match db { DbType::Kv => Ok(&self.engines.kv), @@ -154,13 +203,13 @@ impl InnerRocksEngineExtractor for Debugger { } } -impl Debugger { +impl DebuggerImpl { pub fn new( engines: Engines, cfg_controller: ConfigController, - ) -> Debugger { + ) -> DebuggerImpl { let reset_to_version_manager = ResetToVersionManager::new(engines.kv.clone()); - Debugger { + DebuggerImpl { engines, kv_statistics: None, raft_statistics: None, @@ -169,160 +218,10 @@ impl Debugger { } } - pub fn set_kv_statistics(&mut self, s: Option>) { - self.kv_statistics = s; - } - - pub fn set_raft_statistics(&mut self, s: Option>) { - self.raft_statistics = s; - } - pub fn get_engine(&self) -> &Engines { &self.engines } - pub fn dump_kv_stats(&self) -> Result { - let mut kv_str = box_try!(MiscExt::dump_stats(&self.engines.kv)); - if let Some(s) = self.kv_statistics.as_ref() && let Some(s) = s.to_string() { - kv_str.push_str(&s); - } - Ok(kv_str) - } - - pub fn dump_raft_stats(&self) -> Result { - let mut raft_str = box_try!(RaftEngine::dump_stats(&self.engines.raft)); - if let Some(s) = self.raft_statistics.as_ref() && let Some(s) = s.to_string() { - raft_str.push_str(&s); - } - Ok(raft_str) - } - - /// Get all regions holding region meta data from raft CF in KV storage. - pub fn get_all_regions_in_store(&self) -> Result> { - let db = &self.engines.kv; - let cf = CF_RAFT; - let start_key = keys::REGION_META_MIN_KEY; - let end_key = keys::REGION_META_MAX_KEY; - let mut regions = Vec::with_capacity(128); - box_try!(db.scan(cf, start_key, end_key, false, |key, _| { - let (id, suffix) = box_try!(keys::decode_region_meta_key(key)); - if suffix != keys::REGION_STATE_SUFFIX { - return Ok(true); - } - regions.push(id); - Ok(true) - })); - regions.sort_unstable(); - Ok(regions) - } - - pub fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { - validate_db_and_cf(db, cf)?; - let db = self.get_db_from_type(db)?; - match db.get_value_cf(cf, key) { - Ok(Some(v)) => Ok(v.to_vec()), - Ok(None) => Err(Error::NotFound(format!( - "value for key {:?} in db {:?}", - key, db - ))), - Err(e) => Err(box_err!(e)), - } - } - - pub fn raft_log(&self, region_id: u64, log_index: u64) -> Result { - if let Some(e) = box_try!(self.engines.raft.get_entry(region_id, log_index)) { - return Ok(e); - } - Err(Error::NotFound(format!( - "raft log for region {} at index {}", - region_id, log_index - ))) - } - - pub fn region_info(&self, region_id: u64) -> Result { - let raft_state = box_try!(self.engines.raft.get_raft_state(region_id)); - - let apply_state_key = keys::apply_state_key(region_id); - let apply_state = box_try!( - self.engines - .kv - .get_msg_cf::(CF_RAFT, &apply_state_key) - ); - - let region_state_key = keys::region_state_key(region_id); - let region_state = box_try!( - self.engines - .kv - .get_msg_cf::(CF_RAFT, ®ion_state_key) - ); - - match (raft_state, apply_state, region_state) { - (None, None, None) => Err(Error::NotFound(format!("info for region {}", region_id))), - (raft_state, apply_state, region_state) => { - Ok(RegionInfo::new(raft_state, apply_state, region_state)) - } - } - } - - pub fn region_size>( - &self, - region_id: u64, - cfs: Vec, - ) -> Result> { - let region_state_key = keys::region_state_key(region_id); - match self - .engines - .kv - .get_msg_cf::(CF_RAFT, ®ion_state_key) - { - Ok(Some(region_state)) => { - let region = region_state.get_region(); - let start_key = &keys::data_key(region.get_start_key()); - let end_key = &keys::data_end_key(region.get_end_key()); - let mut sizes = vec![]; - for cf in cfs { - let mut size = 0; - box_try!(self.engines.kv.scan( - cf.as_ref(), - start_key, - end_key, - false, - |k, v| { - size += k.len() + v.len(); - Ok(true) - } - )); - sizes.push((cf, size)); - } - Ok(sizes) - } - Ok(None) => Err(Error::NotFound(format!("none region {:?}", region_id))), - Err(e) => Err(box_err!(e)), - } - } - - /// Scan MVCC Infos for given range `[start, end)`. - pub fn scan_mvcc( - &self, - start: &[u8], - end: &[u8], - limit: u64, - ) -> Result> { - if end.is_empty() && limit == 0 { - return Err(Error::InvalidArgument("no limit and to_key".to_owned())); - } - MvccInfoIterator::new( - |cf, opts| { - let kv = &self.engines.kv; - kv.iterator_opt(cf, opts).map_err(|e| box_err!(e)) - }, - if start.is_empty() { None } else { Some(start) }, - if end.is_empty() { None } else { Some(end) }, - limit as usize, - ) - .map_err(|e| box_err!(e)) - } - /// Scan raw keys for given range `[start, end)` in given cf. pub fn raw_scan( &self, @@ -352,32 +251,6 @@ impl Debugger { Ok(res) } - /// Compact the cf[start..end) in the db. - pub fn compact( - &self, - db: DbType, - cf: &str, - start: &[u8], - end: &[u8], - threads: u32, - bottommost: BottommostLevelCompaction, - ) -> Result<()> { - validate_db_and_cf(db, cf)?; - let db = self.get_db_from_type(db)?; - let handle = box_try!(get_cf_handle(db.as_inner(), cf)); - let start = if start.is_empty() { None } else { Some(start) }; - let end = if end.is_empty() { None } else { Some(end) }; - info!("Debugger starts manual compact"; "db" => ?db, "cf" => cf); - let mut opts = CompactOptions::new(); - opts.set_max_subcompactions(threads as i32); - opts.set_exclusive_manual_compaction(false); - opts.set_bottommost_level_compaction(bottommost.0); - db.as_inner() - .compact_range_cf_opt(handle, &opts, start, end); - info!("Debugger finishes manual compact"; "db" => ?db, "cf" => cf); - Ok(()) - } - /// Set regions to tombstone by manual, and apply other status(such as /// peers, version, and key range) from `region` which comes from PD /// normally. @@ -859,7 +732,183 @@ impl Debugger { Ok(()) } - pub fn get_store_ident(&self) -> Result { + fn get_region_state(&self, region_id: u64) -> Result { + let region_state_key = keys::region_state_key(region_id); + let region_state = box_try!( + self.engines + .kv + .get_msg_cf::(CF_RAFT, ®ion_state_key) + ); + match region_state { + Some(v) => Ok(v), + None => Err(Error::NotFound(format!("region {}", region_id))), + } + } + + pub fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { + let mut props = dump_write_cf_properties( + &self.engines.kv, + &keys::data_key(start), + &keys::data_end_key(end), + )?; + let mut props1 = dump_default_cf_properties( + &self.engines.kv, + &keys::data_key(start), + &keys::data_end_key(end), + )?; + props.append(&mut props1); + Ok(props) + } +} + +impl Debugger for DebuggerImpl { + fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { + validate_db_and_cf(db, cf)?; + let db = self.get_db_from_type(db)?; + match db.get_value_cf(cf, key) { + Ok(Some(v)) => Ok(v.to_vec()), + Ok(None) => Err(Error::NotFound(format!( + "value for key {:?} in db {:?}", + key, db + ))), + Err(e) => Err(box_err!(e)), + } + } + + fn raft_log(&self, region_id: u64, log_index: u64) -> Result { + if let Some(e) = box_try!(self.engines.raft.get_entry(region_id, log_index)) { + return Ok(e); + } + Err(Error::NotFound(format!( + "raft log for region {} at index {}", + region_id, log_index + ))) + } + + fn region_info(&self, region_id: u64) -> Result { + let raft_state = box_try!(self.engines.raft.get_raft_state(region_id)); + + let apply_state_key = keys::apply_state_key(region_id); + let apply_state = box_try!( + self.engines + .kv + .get_msg_cf::(CF_RAFT, &apply_state_key) + ); + + let region_state_key = keys::region_state_key(region_id); + let region_state = box_try!( + self.engines + .kv + .get_msg_cf::(CF_RAFT, ®ion_state_key) + ); + + match (raft_state, apply_state, region_state) { + (None, None, None) => Err(Error::NotFound(format!("info for region {}", region_id))), + (raft_state, apply_state, region_state) => { + Ok(RegionInfo::new(raft_state, apply_state, region_state)) + } + } + } + + fn region_size>(&self, region_id: u64, cfs: Vec) -> Result> { + let region_state_key = keys::region_state_key(region_id); + match self + .engines + .kv + .get_msg_cf::(CF_RAFT, ®ion_state_key) + { + Ok(Some(region_state)) => { + let region = region_state.get_region(); + let start_key = &keys::data_key(region.get_start_key()); + let end_key = &keys::data_end_key(region.get_end_key()); + let mut sizes = vec![]; + for cf in cfs { + let mut size = 0; + box_try!(self.engines.kv.scan( + cf.as_ref(), + start_key, + end_key, + false, + |k, v| { + size += k.len() + v.len(); + Ok(true) + } + )); + sizes.push((cf, size)); + } + Ok(sizes) + } + Ok(None) => Err(Error::NotFound(format!("none region {:?}", region_id))), + Err(e) => Err(box_err!(e)), + } + } + + fn scan_mvcc( + &self, + start: &[u8], + end: &[u8], + limit: u64, + ) -> Result, MvccInfo)>> + Send> { + if end.is_empty() && limit == 0 { + return Err(Error::InvalidArgument("no limit and to_key".to_owned())); + } + MvccInfoIterator::new( + |cf, opts| { + let kv = &self.engines.kv; + kv.iterator_opt(cf, opts).map_err(|e| box_err!(e)) + }, + if start.is_empty() { None } else { Some(start) }, + if end.is_empty() { None } else { Some(end) }, + limit as usize, + ) + .map_err(|e| box_err!(e)) + } + + /// Compact the cf[start..end) in the db. + fn compact( + &self, + db: DbType, + cf: &str, + start: &[u8], + end: &[u8], + threads: u32, + bottommost: BottommostLevelCompaction, + ) -> Result<()> { + validate_db_and_cf(db, cf)?; + let db = self.get_db_from_type(db)?; + let handle = box_try!(get_cf_handle(db.as_inner(), cf)); + let start = if start.is_empty() { None } else { Some(start) }; + let end = if end.is_empty() { None } else { Some(end) }; + info!("Debugger starts manual compact"; "db" => ?db, "cf" => cf); + let mut opts = CompactOptions::new(); + opts.set_max_subcompactions(threads as i32); + opts.set_exclusive_manual_compaction(false); + opts.set_bottommost_level_compaction(bottommost.0); + db.as_inner() + .compact_range_cf_opt(handle, &opts, start, end); + info!("Debugger finishes manual compact"; "db" => ?db, "cf" => cf); + Ok(()) + } + + fn get_all_regions_in_store(&self) -> Result> { + let db = &self.engines.kv; + let cf = CF_RAFT; + let start_key = keys::REGION_META_MIN_KEY; + let end_key = keys::REGION_META_MAX_KEY; + let mut regions = Vec::with_capacity(128); + box_try!(db.scan(cf, start_key, end_key, false, |key, _| { + let (id, suffix) = box_try!(keys::decode_region_meta_key(key)); + if suffix != keys::REGION_STATE_SUFFIX { + return Ok(true); + } + regions.push(id); + Ok(true) + })); + regions.sort_unstable(); + Ok(regions) + } + + fn get_store_ident(&self) -> Result { let db = &self.engines.kv; db.get_msg::(keys::STORE_IDENT_KEY) .map_err(|e| box_err!(e)) @@ -869,7 +918,23 @@ impl Debugger { }) } - pub fn modify_tikv_config(&self, config_name: &str, config_value: &str) -> Result<()> { + fn dump_kv_stats(&self) -> Result { + let mut kv_str = box_try!(MiscExt::dump_stats(&self.engines.kv)); + if let Some(s) = self.kv_statistics.as_ref() && let Some(s) = s.to_string() { + kv_str.push_str(&s); + } + Ok(kv_str) + } + + fn dump_raft_stats(&self) -> Result { + let mut raft_str = box_try!(RaftEngine::dump_stats(&self.engines.raft)); + if let Some(s) = self.raft_statistics.as_ref() && let Some(s) = s.to_string() { + raft_str.push_str(&s); + } + Ok(raft_str) + } + + fn modify_tikv_config(&self, config_name: &str, config_value: &str) -> Result<()> { if let Err(e) = self.cfg_controller.update_config(config_name, config_value) { return Err(Error::Other( format!("failed to update config, err: {:?}", e).into(), @@ -878,20 +943,7 @@ impl Debugger { Ok(()) } - fn get_region_state(&self, region_id: u64) -> Result { - let region_state_key = keys::region_state_key(region_id); - let region_state = box_try!( - self.engines - .kv - .get_msg_cf::(CF_RAFT, ®ion_state_key) - ); - match region_state { - Some(v) => Ok(v), - None => Err(Error::NotFound(format!("region {}", region_id))), - } - } - - pub fn get_region_properties(&self, region_id: u64) -> Result> { + fn get_region_properties(&self, region_id: u64) -> Result> { let region_state = self.get_region_state(region_id)?; let region = region_state.get_region(); let start = keys::enc_start_key(region); @@ -919,23 +971,16 @@ impl Debugger { Ok(res) } - pub fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { - let mut props = dump_write_cf_properties( - &self.engines.kv, - &keys::data_key(start), - &keys::data_end_key(end), - )?; - let mut props1 = dump_default_cf_properties( - &self.engines.kv, - &keys::data_key(start), - &keys::data_end_key(end), - )?; - props.append(&mut props1); - Ok(props) + fn reset_to_version(&self, version: u64) { + self.reset_to_version_manager.start(version.into()); } - pub fn reset_to_version(&self, version: u64) { - self.reset_to_version_manager.start(version.into()); + fn set_kv_statistics(&mut self, s: Option>) { + self.kv_statistics = s; + } + + fn set_raft_statistics(&mut self, s: Option>) { + self.raft_statistics = s; } } @@ -1560,16 +1605,16 @@ mod tests { } } - fn new_debugger() -> Debugger { + fn new_debugger() -> DebuggerImpl { let tmp = Builder::new().prefix("test_debug").tempdir().unwrap(); let path = tmp.path().to_str().unwrap(); let engine = engine_rocks::util::new_engine(path, ALL_CFS).unwrap(); let engines = Engines::new(engine.clone(), engine); - Debugger::new(engines, ConfigController::default()) + DebuggerImpl::new(engines, ConfigController::default()) } - impl Debugger { + impl DebuggerImpl { fn set_store_id(&self, store_id: u64) { let mut ident = self.get_store_ident().unwrap_or_default(); ident.set_store_id(store_id); diff --git a/src/server/debug2.rs b/src/server/debug2.rs index caa1e01d77e..bea3da7ca4a 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1,6 +1,10 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::{raw::CompactOptions, util::get_cf_handle, RocksEngine, RocksEngineIterator}; +use std::sync::Arc; + +use engine_rocks::{ + raw::CompactOptions, util::get_cf_handle, RocksEngine, RocksEngineIterator, RocksStatistics, +}; use engine_traits::{ CachedTablet, Iterable, Peekable, RaftEngine, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, @@ -10,13 +14,13 @@ use kvproto::{ debugpb::Db as DbType, kvrpcpb::MvccInfo, metapb, - raft_serverpb::{PeerState, RegionLocalState}, + raft_serverpb::{PeerState, RegionLocalState, StoreIdent}, }; use nom::AsBytes; use raft::prelude::Entry; use raftstore::store::util::check_key_in_region; -use super::debug::{BottommostLevelCompaction, RegionInfo}; +use super::debug::{BottommostLevelCompaction, Debugger, RegionInfo}; use crate::{ config::ConfigController, server::debug::{Error, Result}, @@ -184,38 +188,33 @@ impl Iterator for MvccInfoIteratorV2 { // Debugger for raftstore-v2 #[derive(Clone)] -pub struct DebuggerV2 { +pub struct DebuggerImplV2 { tablet_reg: TabletRegistry, raft_engine: ER, + kv_statistics: Option>, + raft_statistics: Option>, _cfg_controller: ConfigController, } -impl DebuggerV2 { +impl DebuggerImplV2 { pub fn new( tablet_reg: TabletRegistry, raft_engine: ER, cfg_controller: ConfigController, ) -> Self { println!("Debugger for raftstore-v2 is used"); - DebuggerV2 { + DebuggerImplV2 { tablet_reg, raft_engine, _cfg_controller: cfg_controller, + kv_statistics: None, + raft_statistics: None, } } +} - pub fn get_all_regions_in_store(&self) -> Result> { - let mut region_ids = vec![]; - self.raft_engine - .for_each_raft_group::(&mut |region_id| { - region_ids.push(region_id); - Ok(()) - }) - .unwrap(); - Ok(region_ids) - } - - pub fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { +impl Debugger for DebuggerImplV2 { + fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { validate_db_and_cf(db, cf)?; let region_state = find_region_state_by_key(&self.raft_engine, &key[DATA_PREFIX_KEY.len()..])?; @@ -235,7 +234,7 @@ impl DebuggerV2 { } } - pub fn raft_log(&self, region_id: u64, log_index: u64) -> Result { + fn raft_log(&self, region_id: u64, log_index: u64) -> Result { if let Some(log) = box_try!(self.raft_engine.get_entry(region_id, log_index)) { return Ok(log); } @@ -245,7 +244,7 @@ impl DebuggerV2 { ))) } - pub fn region_info(&self, region_id: u64) -> Result { + fn region_info(&self, region_id: u64) -> Result { let raft_state = box_try!(self.raft_engine.get_raft_state(region_id)); let apply_state = box_try!(self.raft_engine.get_apply_state(region_id, u64::MAX)); let region_state = box_try!(self.raft_engine.get_region_state(region_id, u64::MAX)); @@ -258,11 +257,7 @@ impl DebuggerV2 { } } - pub fn region_size>( - &self, - region_id: u64, - cfs: Vec, - ) -> Result> { + fn region_size>(&self, region_id: u64, cfs: Vec) -> Result> { match self.raft_engine.get_region_state(region_id, u64::MAX) { Ok(Some(region_state)) => { if region_state.get_state() != PeerState::Normal { @@ -293,8 +288,12 @@ impl DebuggerV2 { } } - /// Scan MVCC Infos for given range `[start, end)`. - pub fn scan_mvcc(&self, start: &[u8], end: &[u8], limit: u64) -> Result { + fn scan_mvcc( + &self, + start: &[u8], + end: &[u8], + limit: u64, + ) -> Result, MvccInfo)>> + Send> { if end.is_empty() && limit == 0 { return Err(Error::InvalidArgument("no limit and to_key".to_owned())); } @@ -334,8 +333,7 @@ impl DebuggerV2 { ) } - /// Compact the cf[start..end) in the db. - pub fn compact( + fn compact( &self, db: DbType, cf: &str, @@ -401,6 +399,49 @@ impl DebuggerV2 { Ok(()) } + + fn get_all_regions_in_store(&self) -> Result> { + let mut region_ids = vec![]; + self.raft_engine + .for_each_raft_group::(&mut |region_id| { + region_ids.push(region_id); + Ok(()) + }) + .unwrap(); + Ok(region_ids) + } + + fn dump_kv_stats(&self) -> Result { + unimplemented!() + } + + fn dump_raft_stats(&self) -> Result { + unimplemented!() + } + + fn modify_tikv_config(&self, _config_name: &str, _config_value: &str) -> Result<()> { + unimplemented!() + } + + fn get_store_ident(&self) -> Result { + unimplemented!() + } + + fn get_region_properties(&self, _region_id: u64) -> Result> { + unimplemented!() + } + + fn reset_to_version(&self, _version: u64) { + unimplemented!() + } + + fn set_kv_statistics(&mut self, s: Option>) { + self.kv_statistics = s; + } + + fn set_raft_statistics(&mut self, s: Option>) { + self.raft_statistics = s; + } } fn validate_db_and_cf(db: DbType, cf: &str) -> Result<()> { @@ -588,7 +629,7 @@ mod tests { const INITIAL_TABLET_INDEX: u64 = 5; const INITIAL_APPLY_INDEX: u64 = 5; - fn new_debugger(path: &Path) -> DebuggerV2 { + fn new_debugger(path: &Path) -> DebuggerImplV2 { let mut cfg = TikvConfig::default(); cfg.storage.data_dir = path.to_str().unwrap().to_string(); cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); @@ -604,7 +645,7 @@ mod tests { let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); - DebuggerV2::new(reg, raft_engine, ConfigController::default()) + DebuggerImplV2::new(reg, raft_engine, ConfigController::default()) } #[test] @@ -848,12 +889,13 @@ mod tests { assert!(debugger.scan_mvcc(b"z", b"", 0).is_err()); assert!(debugger.scan_mvcc(b"z", b"x", 3).is_err()); - let verify_scanner = |range, scanner: &mut MvccInfoIteratorV2| { - for i in range { - let key = format!("k{:02}", i).into_bytes(); - assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); - } - }; + let verify_scanner = + |range, scanner: &mut dyn Iterator, MvccInfo)>>| { + for i in range { + let key = format!("k{:02}", i).into_bytes(); + assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); + } + }; // full scann let mut scanner = debugger.scan_mvcc(b"", b"", 100).unwrap(); diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index e0ec9173ad5..7b2a694c99a 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -1,9 +1,5 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; - -use engine_rocks::{RocksEngine, RocksStatistics}; -use engine_traits::{Engines, RaftEngine}; use futures::{ future::{Future, FutureExt, TryFutureExt}, sink::SinkExt, @@ -18,10 +14,7 @@ use tikv_kv::RaftExtension; use tikv_util::metrics; use tokio::runtime::Handle; -use crate::{ - config::ConfigController, - server::debug::{Debugger, Error, Result}, -}; +use crate::server::debug::{Debugger, Error, Result}; fn error_to_status(e: Error) -> RpcStatus { let (code, msg) = match e { @@ -45,26 +38,24 @@ fn error_to_grpc_error(tag: &'static str, e: Error) -> GrpcError { /// Service handles the RPC messages for the `Debug` service. #[derive(Clone)] -pub struct Service { +pub struct Service +where + T: RaftExtension, + D: Debugger, +{ pool: Handle, - debugger: Debugger, + debugger: D, raft_router: T, } -impl Service { +impl Service +where + T: RaftExtension, + D: Debugger, +{ /// Constructs a new `Service` with `Engines`, a `RaftExtension` and a /// `GcWorker`. - pub fn new( - engines: Engines, - kv_statistics: Option>, - raft_statistics: Option>, - pool: Handle, - raft_router: T, - cfg_controller: ConfigController, - ) -> Self { - let mut debugger = Debugger::new(engines, cfg_controller); - debugger.set_kv_statistics(kv_statistics); - debugger.set_raft_statistics(raft_statistics); + pub fn new(debugger: D, pool: Handle, raft_router: T) -> Self { Service { pool, debugger, @@ -93,7 +84,11 @@ impl Service { } } -impl debugpb::Debug for Service { +impl debugpb::Debug for Service +where + T: RaftExtension + 'static, + D: Debugger + Clone + Send + 'static, +{ fn get(&mut self, ctx: RpcContext<'_>, mut req: GetRequest, sink: UnarySink) { const TAG: &str = "debug_get"; diff --git a/tests/integrations/server/debugger.rs b/tests/integrations/server/debugger.rs index dbd862ba633..e8a7bccb052 100644 --- a/tests/integrations/server/debugger.rs +++ b/tests/integrations/server/debugger.rs @@ -7,7 +7,7 @@ use keys::{data_key, DATA_MAX_KEY}; use kvproto::debugpb::Db; use tikv::{ config::ConfigController, - server::debug2::DebuggerV2, + server::{debug::Debugger, debug2::DebuggerImplV2}, storage::mvcc::{TimeStamp, Write, WriteType}, }; use txn_types::Key; @@ -93,7 +93,7 @@ fn test_compact() { }) } - let debugger = DebuggerV2::new( + let debugger = DebuggerImplV2::new( cluster.engines[0].0.clone(), cluster.raft_engines.get(&1).unwrap().clone(), ConfigController::default(), diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 530a963ee0c..0866fbcba75 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -945,9 +945,10 @@ fn test_split_region_impl(is_raw_kv: bool) { ); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_debug_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] fn test_debug_get() { - let (cluster, debug_client, store_id) = must_new_cluster_and_debug_client(); + let (cluster, debug_client, store_id) = new_cluster(); let (k, v) = (b"key", b"value"); // Put some data. @@ -973,9 +974,10 @@ fn test_debug_get() { } } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_debug_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] fn test_debug_raft_log() { - let (cluster, debug_client, store_id) = must_new_cluster_and_debug_client(); + let (cluster, debug_client, store_id) = new_cluster(); // Put some data. let engine = cluster.get_raft_engine(store_id); @@ -1011,6 +1013,8 @@ fn test_debug_raft_log() { } } +// Note: if modified in the future, should be sync with +// `test_debug_region_info_v2` #[test] fn test_debug_region_info() { let (cluster, debug_client, store_id) = must_new_cluster_and_debug_client(); @@ -1074,6 +1078,67 @@ fn test_debug_region_info() { } } +// Note: if modified in the future, should be sync with `test_debug_region_info` +#[test] +fn test_debug_region_info_v2() { + let (cluster, debug_client, store_id) = test_raftstore_v2::must_new_cluster_and_debug_client(); + + let raft_engine = cluster.get_raft_engine(store_id); + let region_id = 100; + let mut raft_state = raft_serverpb::RaftLocalState::default(); + raft_state.set_last_index(42); + let mut lb = raft_engine.log_batch(10); + lb.put_raft_state(region_id, &raft_state).unwrap(); + + let mut apply_state = raft_serverpb::RaftApplyState::default(); + apply_state.set_applied_index(42); + lb.put_apply_state(region_id, 42, &apply_state).unwrap(); + + let mut region_state = raft_serverpb::RegionLocalState::default(); + region_state.set_state(raft_serverpb::PeerState::Tombstone); + lb.put_region_state(region_id, 42, ®ion_state).unwrap(); + + raft_engine.consume(&mut lb, false).unwrap(); + assert_eq!( + raft_engine.get_raft_state(region_id).unwrap().unwrap(), + raft_state + ); + + assert_eq!( + raft_engine + .get_apply_state(region_id, u64::MAX) + .unwrap() + .unwrap(), + apply_state + ); + + assert_eq!( + raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(), + region_state + ); + + // Debug region_info + let mut req = debugpb::RegionInfoRequest::default(); + req.set_region_id(region_id); + let mut resp = debug_client.region_info(&req).unwrap(); + assert_eq!(resp.take_raft_local_state(), raft_state); + assert_eq!(resp.take_raft_apply_state(), apply_state); + assert_eq!(resp.take_region_local_state(), region_state); + + req.set_region_id(region_id + 1); + match debug_client.region_info(&req).unwrap_err() { + Error::RpcFailure(status) => { + assert_eq!(status.code(), RpcStatusCode::NOT_FOUND); + } + _ => panic!("expect NotFound"), + } +} + +// Note: if modified in the future, should be sync with +// `test_debug_region_size_v2` #[test] fn test_debug_region_size() { let (cluster, debug_client, store_id) = must_new_cluster_and_debug_client(); @@ -1122,6 +1187,56 @@ fn test_debug_region_size() { } } +// Note: if modified in the future, should be sync with `test_debug_region_size` +#[test] +fn test_debug_region_size_v2() { + let (cluster, debug_client, store_id) = test_raftstore_v2::must_new_cluster_and_debug_client(); + let raft_engine = cluster.get_raft_engine(store_id); + let engine = cluster.get_engine(store_id); + + let mut lb = raft_engine.log_batch(10); + // Put some data. + let region_id = 1; + let mut region = metapb::Region::default(); + region.set_id(region_id); + region.set_start_key(b"a".to_vec()); + region.set_end_key(b"z".to_vec()); + let mut state = RegionLocalState::default(); + state.set_region(region); + state.set_tablet_index(5); + lb.put_region_state(region_id, 5, &state).unwrap(); + raft_engine.consume(&mut lb, false).unwrap(); + + let cfs = vec![CF_DEFAULT, CF_LOCK, CF_WRITE]; + // At lease 8 bytes for the WRITE cf. + let (k, v) = (keys::data_key(b"kkkk_kkkk"), b"v"); + for cf in &cfs { + engine.put_cf(cf, k.as_slice(), v).unwrap(); + } + + let mut req = debugpb::RegionSizeRequest::default(); + req.set_region_id(region_id); + req.set_cfs(cfs.iter().map(|s| s.to_string()).collect()); + let entries: Vec<_> = debug_client + .region_size(&req) + .unwrap() + .take_entries() + .into(); + assert_eq!(entries.len(), 3); + for e in entries { + cfs.iter().find(|&&c| c == e.cf).unwrap(); + assert!(e.size > 0); + } + + req.set_region_id(region_id + 1); + match debug_client.region_size(&req).unwrap_err() { + Error::RpcFailure(status) => { + assert_eq!(status.code(), RpcStatusCode::NOT_FOUND); + } + _ => panic!("expect NotFound"), + } +} + #[test] #[cfg(feature = "failpoints")] fn test_debug_fail_point() { @@ -1159,9 +1274,10 @@ fn test_debug_fail_point() { ); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_debug_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] fn test_debug_scan_mvcc() { - let (cluster, debug_client, store_id) = must_new_cluster_and_debug_client(); + let (cluster, debug_client, store_id) = new_cluster(); let engine = cluster.get_engine(store_id); // Put some data. From 358564185d38d0570345dcf907e0f80c2619f2e2 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 10 May 2023 18:58:08 +0800 Subject: [PATCH 0685/1149] sst_import: keep import sst if the ingest apply index has not flush the rocksdb. (#14669) close tikv/tikv#14663 record the sst apply index and delete them when the flushed index is higher than the apply index. Signed-off-by: bufferflies <1045931706@qq.com> --- components/engine_traits/src/flush.rs | 45 +++++++++++++++- components/raftstore-v2/src/fsm/apply.rs | 4 +- .../src/operation/command/admin/split.rs | 4 +- .../raftstore-v2/src/operation/command/mod.rs | 2 + .../src/operation/command/write/ingest.rs | 31 ++++++++++- .../src/operation/query/capture.rs | 3 +- .../src/operation/ready/apply_trace.rs | 15 ++++-- components/raftstore-v2/src/raft/apply.rs | 10 +++- components/raftstore-v2/src/raft/peer.rs | 10 +++- components/raftstore-v2/src/raft/storage.rs | 4 +- tests/failpoints/cases/test_import_service.rs | 45 ++++++++++++++++ tests/integrations/import/util.rs | 52 +++++++++++++++++++ 12 files changed, 211 insertions(+), 14 deletions(-) diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 8b0566f2cfb..d79ee9631ca 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -13,10 +13,10 @@ //! be used as the start state. use std::{ - collections::LinkedList, + collections::{HashMap, LinkedList}, sync::{ atomic::{AtomicU64, Ordering}, - Arc, Mutex, + Arc, Mutex, RwLock, }, }; @@ -54,6 +54,47 @@ struct FlushProgress { last_flushed: [u64; DATA_CFS_LEN], } +/// A share state between raftstore and underlying engine. +/// +/// raftstore will update state changes and corresponding sst apply index, when +/// apply ingest sst request, it should ensure the sst can be deleted +/// if the flushed index greater than it . +#[derive(Debug, Clone)] +pub struct SstApplyState { + sst_map: Arc, u64>>>, +} + +impl Default for SstApplyState { + fn default() -> Self { + Self { + sst_map: Arc::new(RwLock::new(HashMap::new())), + } + } +} + +impl SstApplyState { + #[inline] + pub fn registe_ssts(&self, uuids: Vec>, sst_applied_index: u64) { + let mut map = self.sst_map.write().unwrap(); + for uuid in uuids { + map.insert(uuid, sst_applied_index); + } + } + + /// Query the sst applied index. + #[inline] + pub fn sst_applied_index(&self, uuid: &Vec) -> Option { + self.sst_map.read().unwrap().get(uuid).copied() + } + + pub fn delete_ssts(&self, uuids: Vec>) { + let mut map = self.sst_map.write().unwrap(); + for uuid in uuids { + map.remove(&uuid); + } + } +} + /// A share state between raftstore and underlying engine. /// /// raftstore will update state changes and corresponding apply index, when diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 99ce483f35e..ff1c5414de3 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -7,7 +7,7 @@ use std::{ use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; -use engine_traits::{FlushState, KvEngine, TabletRegistry}; +use engine_traits::{FlushState, KvEngine, SstApplyState, TabletRegistry}; use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; @@ -80,6 +80,7 @@ impl ApplyFsm { read_scheduler: Scheduler>, checkpoint_scheduler: Scheduler>, flush_state: Arc, + sst_apply_state: SstApplyState, log_recovery: Option>, applied_term: u64, buckets: Option, @@ -96,6 +97,7 @@ impl ApplyFsm { tablet_registry, read_scheduler, flush_state, + sst_apply_state, log_recovery, applied_term, buckets, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 9ac4de5e3c0..0bb3abcec67 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -847,7 +847,8 @@ mod test { kv::{KvTestEngine, TestTabletFactory}, }; use engine_traits::{ - FlushState, Peekable, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, DATA_CFS, + FlushState, Peekable, SstApplyState, TabletContext, TabletRegistry, WriteBatch, CF_DEFAULT, + DATA_CFS, }; use futures::executor::block_on; use kvproto::{ @@ -1034,6 +1035,7 @@ mod test { reg, read_scheduler, Arc::new(FlushState::new(5)), + SstApplyState::default(), None, 5, None, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 293afdcc49b..4b0e25caa8b 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -136,6 +136,7 @@ impl Peer { let logger = self.logger.clone(); let read_scheduler = self.storage().read_scheduler(); let buckets = self.region_buckets_info().bucket_stat().clone(); + let sst_apply_state = self.sst_apply_state().clone(); let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( &store_ctx.cfg, self.peer().clone(), @@ -145,6 +146,7 @@ impl Peer { read_scheduler, store_ctx.schedulers.checkpoint.clone(), self.flush_state().clone(), + sst_apply_state, self.storage().apply_trace().log_recovery(), self.entry_storage().applied_term(), buckets, diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index bc15765437f..90382de24aa 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -67,12 +67,34 @@ impl Peer { ctx: &mut StoreContext, ssts: Box<[SstMeta]>, ) { - let epoch = self.region().get_region_epoch(); let mut stale_ssts = Vec::from(ssts); - stale_ssts.retain(|sst| util::is_epoch_stale(sst.get_region_epoch(), epoch)); + let epoch = self.region().get_region_epoch(); + stale_ssts.retain(|sst| { + fail::fail_point!("on_cleanup_import_sst", |_| true); + util::is_epoch_stale(sst.get_region_epoch(), epoch) + }); + + // some sst needs to be kept if the log didn't flush the disk. + let flushed_indexes = self.storage().apply_trace().flushed_indexes(); + stale_ssts.retain(|sst| { + let off = data_cf_offset(sst.get_cf_name()); + let uuid = sst.get_uuid().to_vec(); + let sst_index = self.sst_apply_state().sst_applied_index(&uuid); + if let Some(index) = sst_index { + return flushed_indexes.as_ref()[off] >= index; + } + true + }); + + fail::fail_point!("on_cleanup_import_sst_schedule"); if stale_ssts.is_empty() { return; } + let uuids = stale_ssts + .iter() + .map(|sst| sst.get_uuid().to_vec()) + .collect(); + self.sst_apply_state().delete_ssts(uuids); let _ = ctx .schedulers .tablet @@ -116,6 +138,11 @@ impl Apply { slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); } } + let uuids = infos + .iter() + .map(|info| info.meta.get_uuid().to_vec()) + .collect::>(); + self.set_sst_applied_index(uuids, index); Ok(()) } } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index c1a622cd1f9..4f3d58424b9 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -175,7 +175,7 @@ mod test { kv::{KvTestEngine, TestTabletFactory}, }; use engine_traits::{ - FlushState, Peekable, TabletContext, TabletRegistry, CF_DEFAULT, DATA_CFS, + FlushState, Peekable, SstApplyState, TabletContext, TabletRegistry, CF_DEFAULT, DATA_CFS, }; use futures::executor::block_on; use kvproto::{ @@ -324,6 +324,7 @@ mod test { reg, read_scheduler, Arc::new(FlushState::new(5)), + SstApplyState::default(), None, 5, None, diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 6c9c73479ba..9d7cae00e9d 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -298,10 +298,7 @@ impl ApplyTrace { /// `None` is returned. #[inline] pub fn log_recovery(&self) -> Option> { - let mut flushed_indexes = [0; DATA_CFS_LEN]; - for (off, pr) in self.data_cfs.iter().enumerate() { - flushed_indexes[off] = pr.flushed; - } + let flushed_indexes = self.flushed_indexes(); for i in flushed_indexes { if i > self.admin.flushed { return Some(Box::new(flushed_indexes)); @@ -310,6 +307,16 @@ impl ApplyTrace { None } + /// Get the flushed indexes of all data CF that is needed when recoverying + /// logs. It does not check the admin cf. + pub fn flushed_indexes(&self) -> DataTrace { + let mut flushed_indexes = [0; DATA_CFS_LEN]; + for (off, pr) in self.data_cfs.iter().enumerate() { + flushed_indexes[off] = pr.flushed; + } + flushed_indexes + } + pub fn restore_snapshot(&mut self, index: u64) { for pr in self.data_cfs.iter_mut() { pr.last_modified = index; diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 80ff21ebfbd..b3d74109c8d 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -3,7 +3,7 @@ use std::{mem, sync::Arc}; use engine_traits::{ - FlushState, KvEngine, PerfContextKind, TabletRegistry, WriteBatch, DATA_CFS_LEN, + FlushState, KvEngine, PerfContextKind, SstApplyState, TabletRegistry, WriteBatch, DATA_CFS_LEN, }; use kvproto::{metapb, raft_cmdpb::RaftCmdResponse, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; @@ -57,6 +57,7 @@ pub struct Apply { modifications: DataTrace, admin_cmd_result: Vec, flush_state: Arc, + sst_apply_state: SstApplyState, /// The flushed indexes of each column family before being restarted. /// /// If an apply index is less than the flushed index, the log can be @@ -92,6 +93,7 @@ impl Apply { tablet_registry: TabletRegistry, read_scheduler: Scheduler>, flush_state: Arc, + sst_apply_state: SstApplyState, log_recovery: Option>, applied_term: u64, buckets: Option, @@ -126,6 +128,7 @@ impl Apply { key_buffer: vec![], res_reporter, flush_state, + sst_apply_state, log_recovery, metrics: ApplyMetrics::default(), buckets, @@ -279,6 +282,11 @@ impl Apply { &self.flush_state } + #[inline] + pub fn set_sst_applied_index(&mut self, uuid: Vec>, apply_index: u64) { + self.sst_apply_state.registe_ssts(uuid, apply_index); + } + #[inline] pub fn log_recovery(&self) -> &Option> { &self.log_recovery diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 8d2360d2695..ae6d6f5bf81 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -9,7 +9,7 @@ use std::{ use collections::{HashMap, HashSet}; use encryption_export::DataKeyManager; use engine_traits::{ - CachedTablet, FlushState, KvEngine, RaftEngine, TabletContext, TabletRegistry, + CachedTablet, FlushState, KvEngine, RaftEngine, SstApplyState, TabletContext, TabletRegistry, }; use kvproto::{ metapb::{self, PeerRole}, @@ -107,6 +107,7 @@ pub struct Peer { /// advancing apply index. state_changes: Option>, flush_state: Arc, + sst_apply_state: SstApplyState, /// lead_transferee if this peer(leader) is in a leadership transferring. leader_transferee: u64, @@ -147,6 +148,7 @@ impl Peer { let region = raft_group.store().region_state().get_region().clone(); let flush_state: Arc = Arc::new(FlushState::new(applied_index)); + let sst_apply_state = SstApplyState::default(); // We can't create tablet if tablet index is 0. It can introduce race when gc // old tablet and create new peer. We also can't get the correct range of the // region, which is required for kv data gc. @@ -199,6 +201,7 @@ impl Peer { split_trace: vec![], state_changes: None, flush_state, + sst_apply_state, split_flow_control: SplitFlowControl::default(), leader_transferee: raft::INVALID_ID, long_uncommitted_threshold: cmp::max( @@ -793,6 +796,11 @@ impl Peer { &self.flush_state } + #[inline] + pub fn sst_apply_state(&self) -> &SstApplyState { + &self.sst_apply_state + } + pub fn reset_flush_state(&mut self, index: u64) { self.flush_state = Arc::new(FlushState::new(index)); } diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 0572a933fd5..298ba5d451f 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -321,7 +321,8 @@ mod tests { kv::{KvTestEngine, TestTabletFactory}, }; use engine_traits::{ - FlushState, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, DATA_CFS, + FlushState, RaftEngine, RaftLogBatch, SstApplyState, TabletContext, TabletRegistry, + DATA_CFS, }; use kvproto::{ metapb::{Peer, Region}, @@ -517,6 +518,7 @@ mod tests { reg, sched, Arc::new(FlushState::new(5)), + SstApplyState::default(), None, 5, None, diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 3fdb464c718..475acbe9f3c 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -19,6 +19,7 @@ use tikv_util::HandyRwLock; mod util; use self::util::{ check_ingested_kvs, new_cluster_and_tikv_import_client, new_cluster_and_tikv_import_client_tde, + open_cluster_and_tikv_import_client_v2, send_upload_sst, }; // Opening sst writer involves IO operation, it may block threads for a while. @@ -248,3 +249,47 @@ fn test_ingest_file_twice_and_conflict() { resp.get_error().get_message() ); } + +#[test] +fn test_ingest_sst_v2() { + let mut cluster = test_raftstore_v2::new_server_cluster(1, 1); + let (ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(None, &mut cluster); + let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + let sst_range = (0, 100); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + + // No region id and epoch. + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta); + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + fail::cfg("on_cleanup_import_sst", "return").unwrap(); + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("on_cleanup_import_sst_schedule", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + rx.recv_timeout(std::time::Duration::from_secs(20)).unwrap(); + let mut count = 0; + for path in &cluster.paths { + let sst_dir = path.path().join("import-sst"); + for entry in std::fs::read_dir(sst_dir).unwrap() { + let entry = entry.unwrap(); + if entry.file_type().unwrap().is_file() { + count += 1; + } + } + } + fail::remove("on_cleanup_import_sst"); + fail::remove("on_cleanup_import_sst_schedule"); + assert_ne!(0, count); +} diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index e6e2121a479..cb1e0e336be 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -2,6 +2,7 @@ use std::{sync::Arc, thread, time::Duration}; +use engine_rocks::RocksEngine; use futures::{executor::block_on, stream, SinkExt}; use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; use kvproto::{import_sstpb::*, kvrpcpb::*, tikvpb::*}; @@ -68,6 +69,57 @@ pub fn open_cluster_and_tikv_import_client( (cluster, ctx, tikv, import) } +#[allow(dead_code)] +pub fn open_cluster_and_tikv_import_client_v2( + cfg: Option, + cluster: &mut test_raftstore_v2::Cluster< + test_raftstore_v2::ServerCluster, + RocksEngine, + >, +) -> (Context, TikvClient, ImportSstClient) { + let cfg = cfg.unwrap_or_else(|| { + let mut config = TikvConfig::default(); + config.server.addr = "127.0.0.1:0".to_owned(); + let cleanup_interval = Duration::from_millis(10); + config.raft_store.cleanup_import_sst_interval.0 = cleanup_interval; + config.server.grpc_concurrency = 1; + config + }); + cluster.cfg = Config { + tikv: cfg.clone(), + prefer_mem: true, + }; + cluster.run(); + + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + + let ch = { + let env = Arc::new(Environment::new(1)); + let node = ctx.get_peer().get_store_id(); + let builder = ChannelBuilder::new(env) + .http2_max_ping_strikes(i32::MAX) // For pings without data from clients. + .keepalive_time(cluster.cfg.server.grpc_keepalive_time.into()) + .keepalive_timeout(cluster.cfg.server.grpc_keepalive_timeout.into()); + + if cfg.security != SecurityConfig::default() { + let creds = test_util::new_channel_cred(); + builder.secure_connect(&cluster.sim.rl().get_addr(node), creds) + } else { + builder.connect(&cluster.sim.rl().get_addr(node)) + } + }; + let tikv = TikvClient::new(ch.clone()); + let import = ImportSstClient::new(ch); + + (ctx, tikv, import) +} + pub fn new_cluster_and_tikv_import_client() -> (Cluster, Context, TikvClient, ImportSstClient) { open_cluster_and_tikv_import_client(None) From 90abbca4d7323b2628e318d9f0d84e8ed2e55581 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 10 May 2023 23:53:20 +0800 Subject: [PATCH 0686/1149] txn: check memory lock for max ts replica read (#14716) close tikv/tikv#14715 Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tracker/src/lib.rs | 7 +- components/txn_types/src/lock.rs | 27 +++++++- src/server/raftkv/mod.rs | 2 +- tests/failpoints/cases/test_transaction.rs | 75 +++++++++++++++++++++- tests/integrations/storage/test_raftkv.rs | 35 ++++++---- 5 files changed, 126 insertions(+), 20 deletions(-) diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index fafd8415039..0682439bb45 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -68,7 +68,12 @@ impl Tracker { self.metrics.wf_commit_log_nanos - self.metrics.wf_batch_wait_nanos, ); detail.set_apply_batch_wait_nanos(self.metrics.apply_wait_nanos); - detail.set_apply_log_nanos(self.metrics.apply_time_nanos - self.metrics.apply_wait_nanos); + // When async_prewrite_apply is set, the `apply_time_nanos` could be less than + // apply_wait_nanos. + if self.metrics.apply_time_nanos > self.metrics.apply_wait_nanos { + detail + .set_apply_log_nanos(self.metrics.apply_time_nanos - self.metrics.apply_wait_nanos); + } detail.set_apply_mutex_lock_nanos(self.metrics.apply_mutex_lock_nanos); detail.set_apply_write_leader_wait_nanos(self.metrics.apply_thread_wait_nanos); detail.set_apply_write_wal_nanos(self.metrics.apply_write_wal_nanos); diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index c8e37823bc4..103353318e0 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -409,6 +409,7 @@ impl Lock { key: &Key, ts: TimeStamp, bypass_locks: &TsSet, + is_replica_read: bool, ) -> Result<()> { if lock.ts > ts || lock.lock_type == LockType::Lock @@ -429,6 +430,14 @@ impl Lock { let raw_key = key.to_raw()?; + // Disable replica read for autocommit max ts read, to avoid breaking + // linearizability. See https://github.com/pingcap/tidb/issues/43583 for details. + if ts == TimeStamp::max() && is_replica_read { + return Err(Error::from(ErrorInner::KeyIsLocked( + lock.into_owned().into_lock_info(raw_key), + ))); + } + if ts == TimeStamp::max() && raw_key == lock.primary && !lock.use_async_commit { // When `ts == TimeStamp::max()` (which means to get latest committed version // for primary key), and current key is the primary key, we ignore @@ -478,7 +487,7 @@ impl Lock { iso_level: IsolationLevel, ) -> Result<()> { match iso_level { - IsolationLevel::Si => Lock::check_ts_conflict_si(lock, key, ts, bypass_locks), + IsolationLevel::Si => Lock::check_ts_conflict_si(lock, key, ts, bypass_locks, false), IsolationLevel::RcCheckTs => { Lock::check_ts_conflict_rc_check_ts(lock, key, ts, bypass_locks) } @@ -486,6 +495,22 @@ impl Lock { } } + pub fn check_ts_conflict_for_replica_read( + lock: Cow<'_, Self>, + key: &Key, + ts: TimeStamp, + bypass_locks: &TsSet, + iso_level: IsolationLevel, + ) -> Result<()> { + match iso_level { + IsolationLevel::Si => Lock::check_ts_conflict_si(lock, key, ts, bypass_locks, true), + IsolationLevel::RcCheckTs => { + unreachable!() + } + _ => Ok(()), + } + } + pub fn is_pessimistic_txn(&self) -> bool { !self.for_update_ts.is_zero() } diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 697a4b39d63..8cd9c2eed9e 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -735,7 +735,7 @@ impl ReadIndexObserver for ReplicaReadLockChecker { start_key.as_ref(), end_key.as_ref(), |key, lock| { - txn_types::Lock::check_ts_conflict( + txn_types::Lock::check_ts_conflict_for_replica_read( Cow::Borrowed(lock), key, start_ts, diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 564b5f393ec..a0e69108125 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -13,8 +13,8 @@ use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::{ - self as pb, AssertionLevel, Context, Op, PessimisticLockRequest, PrewriteRequest, - PrewriteRequestPessimisticAction::*, + self as pb, AssertionLevel, Context, GetRequest, Op, PessimisticLockRequest, + PrewriteRequest, PrewriteRequestPessimisticAction::*, }, tikvpb::TikvClient, }; @@ -26,7 +26,7 @@ use storage::{ }, txn::{self, commands}, }; -use test_raftstore::new_server_cluster; +use test_raftstore::{configure_for_lease_read, new_server_cluster}; use tikv::storage::{ self, kv::SnapshotExt, @@ -609,3 +609,72 @@ fn test_concurrent_write_after_transfer_leader_invalidates_locks() { &lock.into_lock().into_lock_info(b"key".to_vec()) ); } + +#[test] +fn test_read_index_with_max_ts() { + let mut cluster = new_server_cluster(0, 3); + // Increase the election tick to make this test case running reliably. + // Use async apply prewrite to let tikv response before applying on the leader + // peer. + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); + cluster.cfg.storage.enable_async_apply_prewrite = true; + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let k0 = b"k0"; + let v0 = b"v0"; + let r1 = cluster.run_conf_change(); + let p2 = new_peer(2, 2); + cluster.pd_client.must_add_peer(r1, p2.clone()); + let p3 = new_peer(3, 3); + cluster.pd_client.must_add_peer(r1, p3.clone()); + cluster.must_put(k0, v0); + cluster.pd_client.must_none_pending_peer(p2.clone()); + cluster.pd_client.must_none_pending_peer(p3.clone()); + + let region = cluster.get_region(k0); + cluster.must_transfer_leader(region.get_id(), p3.clone()); + + // Block all write cmd applying of Peer 3(leader), then start to write to it. + let k1 = b"k1"; + let v1 = b"v1"; + let mut ctx_p3 = Context::default(); + ctx_p3.set_region_id(region.get_id()); + ctx_p3.set_region_epoch(region.get_region_epoch().clone()); + ctx_p3.set_peer(p3.clone()); + let mut ctx_p2 = ctx_p3.clone(); + ctx_p2.set_peer(p2.clone()); + + let start_ts = 10; + let mut mutation = pb::Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k1.to_vec(); + mutation.value = v1.to_vec(); + let mut req = PrewriteRequest::default(); + req.set_context(ctx_p3); + req.set_mutations(vec![mutation].into()); + req.set_start_version(start_ts); + req.try_one_pc = true; + req.set_primary_lock(k1.to_vec()); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env.clone()).connect(&cluster.sim.rl().get_addr(p3.get_store_id())); + let client_p3 = TikvClient::new(channel); + fail::cfg("on_apply_write_cmd", "sleep(2000)").unwrap(); + client_p3.kv_prewrite(&req).unwrap(); + + // The apply is blocked on leader, so the read index request with max ts should + // see the memory lock as it would be dropped after finishing apply. + let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(p2.get_store_id())); + let client_p2 = TikvClient::new(channel); + let mut req = GetRequest::new(); + req.key = k1.to_vec(); + req.version = u64::MAX; + ctx_p2.replica_read = true; + req.set_context(ctx_p2); + let resp = client_p2.kv_get(&req).unwrap(); + assert!(resp.region_error.is_none()); + assert_eq!(resp.error.unwrap().locked.unwrap().lock_version, start_ts); + fail::remove("on_apply_write_cmd"); +} diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 3dcdab0cf6b..5f7594a3672 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -11,7 +11,7 @@ use raft::eraftpb::MessageType; use test_raftstore::*; use tikv::storage::{kv::*, CfStatistics}; use tikv_util::{codec::bytes, HandyRwLock}; -use txn_types::{Key, Lock, LockType}; +use txn_types::{Key, Lock, LockType, TimeStamp}; #[test] fn test_raftkv() { @@ -255,20 +255,27 @@ fn test_read_on_replica_check_memory_locks() { follower_ctx.set_region_epoch(region.get_region_epoch().clone()); follower_ctx.set_peer(follower_peer.as_ref().unwrap().clone()); follower_ctx.set_replica_read(true); - let mut range = KeyRange::default(); - range.set_start_key(encoded_key.as_encoded().to_vec()); - let follower_snap_ctx = SnapContext { - pb_ctx: &follower_ctx, - start_ts: Some(100.into()), - key_ranges: vec![range], - ..Default::default() - }; - let mut follower_storage = cluster.sim.rl().storages[&follower_id].clone(); - match follower_storage.snapshot(follower_snap_ctx) { - Err(Error(box ErrorInner::KeyIsLocked(lock_info))) => { - assert_eq!(lock_info, lock.into_lock_info(raw_key.to_vec())) + for use_max_ts in [false, true] { + let mut range = KeyRange::default(); + range.set_start_key(encoded_key.as_encoded().to_vec()); + let ts = if use_max_ts { + Some(TimeStamp::max()) + } else { + Some(100.into()) + }; + let follower_snap_ctx = SnapContext { + pb_ctx: &follower_ctx, + start_ts: ts, + key_ranges: vec![range], + ..Default::default() + }; + let mut follower_storage = cluster.sim.rl().storages[&follower_id].clone(); + match follower_storage.snapshot(follower_snap_ctx) { + Err(Error(box ErrorInner::KeyIsLocked(lock_info))) => { + assert_eq!(lock_info, lock.clone().into_lock_info(raw_key.to_vec())) + } + other => panic!("unexpected result: {:?}", other), } - other => panic!("unexpected result: {:?}", other), } } From bd665121a4297f8a7b67912a3adafcf429ca2cd4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 11 May 2023 15:59:21 +0800 Subject: [PATCH 0687/1149] raftstore-v2: implement replica read (#14706) close tikv/tikv#14701 raftstore-v2: implement replica read Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/operation/command/control.rs | 1 + .../raftstore-v2/src/operation/query/lease.rs | 40 +++- .../raftstore-v2/src/operation/query/local.rs | 45 ++++- .../raftstore-v2/src/operation/query/mod.rs | 18 +- .../src/operation/query/replica.rs | 26 ++- .../raftstore-v2/src/operation/ready/mod.rs | 10 +- components/raftstore-v2/src/router/imp.rs | 5 +- .../raftstore/src/store/worker/metrics.rs | 8 + components/test_raftstore-v2/src/cluster.rs | 11 +- components/test_raftstore-v2/src/node.rs | 5 +- components/test_raftstore-v2/src/util.rs | 7 +- components/test_raftstore/src/util.rs | 17 +- components/tikv_util/src/future.rs | 25 ++- components/tikv_util/src/macros.rs | 2 +- src/server/raftkv2/mod.rs | 2 +- tests/failpoints/cases/test_disk_full.rs | 6 +- tests/failpoints/cases/test_hibernate.rs | 4 +- tests/failpoints/cases/test_replica_read.rs | 10 +- .../integrations/raftstore/test_hibernate.rs | 16 +- .../integrations/raftstore/test_lease_read.rs | 8 +- .../raftstore/test_replica_read.rs | 187 +++++++----------- 21 files changed, 278 insertions(+), 175 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/control.rs b/components/raftstore-v2/src/operation/command/control.rs index 586d9f5c019..f05c9ca5297 100644 --- a/components/raftstore-v2/src/operation/command/control.rs +++ b/components/raftstore-v2/src/operation/command/control.rs @@ -82,6 +82,7 @@ pub struct ProposalControl { // should be empty or 1 element. And access speed is not a concern. proposed_admin_cmd: LinkedList, has_pending_prepare_merge: bool, + // Commit index of prepare merge. applied_prepare_merge_index: u64, term: u64, } diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 3185f1bd24b..d0bd7b9e7ac 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -4,10 +4,13 @@ use std::sync::Mutex; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::RaftCmdRequest; -use raftstore::store::{ - can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, - msg::ReadCallback, propose_read_index, should_renew_lease, util::LeaseState, ReadDelegate, - ReadIndexRequest, ReadProgress, Transport, +use raftstore::{ + store::{ + can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, + msg::ReadCallback, propose_read_index, should_renew_lease, util::LeaseState, ReadDelegate, + ReadIndexRequest, ReadProgress, Transport, + }, + Error, Result, }; use slog::debug; use tikv_util::time::monotonic_raw_now; @@ -22,6 +25,35 @@ use crate::{ }; impl Peer { + pub fn pre_read_index(&self) -> Result<()> { + fail::fail_point!("before_propose_readindex", |s| if s + .map_or(true, |s| s.parse().unwrap_or(true)) + { + Ok(()) + } else { + Err(tikv_util::box_err!( + "[{}] {} can not read due to injected failure", + self.region_id(), + self.peer_id() + )) + }); + + // See more in ready_to_handle_read(). + if self.proposal_control().is_splitting() { + return Err(Error::ReadIndexNotReady { + reason: "can not read index due to split", + region_id: self.region_id(), + }); + } + if self.proposal_control().is_merging() { + return Err(Error::ReadIndexNotReady { + reason: "can not read index due to merge", + region_id: self.region_id(), + }); + } + Ok(()) + } + pub(crate) fn read_index_leader( &mut self, ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index f574571f790..32a8960f18e 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -36,7 +36,7 @@ use crate::{ StoreRouter, }; -pub trait MsgRouter: Clone + Send { +pub trait MsgRouter: Clone + Send + 'static { fn send(&self, addr: u64, msg: PeerMsg) -> std::result::Result<(), TrySendError>; } @@ -184,10 +184,15 @@ where Ok(ReadRequestPolicy::StaleRead) => { ReadResult::Ok((delegate, ReadRequestPolicy::StaleRead)) } - // It can not handle other policies. // TODO: we should only abort when lease expires. For other cases we should retry // infinitely. - Ok(ReadRequestPolicy::ReadIndex) => ReadResult::Redirect, + Ok(ReadRequestPolicy::ReadIndex) => { + if req.get_header().get_replica_read() { + ReadResult::Ok((delegate, ReadRequestPolicy::ReadIndex)) + } else { + ReadResult::Redirect + } + } Err(e) => ReadResult::Err(e), } } @@ -195,6 +200,7 @@ where fn try_get_snapshot( &mut self, req: &RaftCmdRequest, + after_read_index: bool, ) -> ReadResult, RaftCmdResponse> { match self.pre_propose_raft_command(req) { ReadResult::Ok((mut delegate, policy)) => { @@ -243,7 +249,26 @@ where .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); snap } - _ => unreachable!(), + ReadRequestPolicy::ReadIndex => { + // ReadIndex is returned only for replica read. + if !after_read_index { + // It needs to read index before getting snapshot. + return ReadResult::Redirect; + } + + let region = Arc::clone(&delegate.region); + let snap = RegionSnapshot::from_snapshot( + Arc::new(delegate.cached_tablet.cache().snapshot()), + region, + ); + + TLS_LOCAL_READ_METRICS.with(|m| { + m.borrow_mut().local_executed_requests.inc(); + m.borrow_mut().local_executed_replica_read_requests.inc() + }); + + snap + } }; snap.txn_ext = Some(delegate.txn_ext.clone()); @@ -274,12 +299,13 @@ where pub fn snapshot( &mut self, mut req: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send - { + ) -> impl Future, RaftCmdResponse>> + + Send + + 'static { let region_id = req.header.get_ref().region_id; let mut tried_cnt = 0; let res = loop { - let res = self.try_get_snapshot(&req); + let res = self.try_get_snapshot(&req, false /* after_read_index */); match res { ReadResult::Ok(snap) => break Either::Left(Ok(snap)), ReadResult::Err(e) => break Either::Left(Err(e)), @@ -329,7 +355,7 @@ where // If query successful, try again. req.mut_header().set_read_quorum(false); loop { - let r = reader.try_get_snapshot(&req); + let r = reader.try_get_snapshot(&req, true /* after_read_index */); match r { ReadResult::Ok(snap) => return Ok(snap), ReadResult::Err(e) => return Err(e), @@ -366,7 +392,8 @@ where &self, region_id: u64, req: &RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> { + ) -> impl Future, RaftCmdResponse>> + 'static + { let mut req = req.clone(); // Remote lease is updated step by step. It's possible local reader expires // while the raftstore doesn't. So we need to trigger an update diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 81fb4e5e9de..5c42d4dfa8e 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -186,7 +186,21 @@ impl Peer { req: RaftCmdRequest, ch: QueryResChannel, ) { - // TODO: add pre_read_index to handle splitting or merging + if let Err(e) = self.pre_read_index() { + debug!( + self.logger, + "prevents unsafe read index"; + "err" => ?e, + ); + ctx.raft_metrics.propose.unsafe_read_index.inc(); + let mut resp = RaftCmdResponse::default(); + let term = self.term(); + cmd_resp::bind_term(&mut resp, term); + cmd_resp::bind_error(&mut resp, e); + ch.report_error(resp); + return; + } + if self.is_leader() { self.read_index_leader(ctx, req, ch); } else { @@ -282,7 +296,7 @@ impl Peer { self.storage().apply_state().get_applied_index() >= read_index // If it is in pending merge state(i.e. applied PrepareMerge), the data may be stale. // TODO: Add a test to cover this case - && self.proposal_control().has_applied_prepare_merge() + && !self.proposal_control().has_applied_prepare_merge() } #[inline] diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index 901fd9726f6..0565d985925 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -9,7 +9,7 @@ use raftstore::{ fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::{ErrorCallback, ReadCallback}, - propose_read_index, ReadIndexRequest, Transport, + propose_read_index, Config, ReadIndexContext, ReadIndexRequest, Transport, }, Error, }; @@ -23,6 +23,29 @@ use crate::{ router::{QueryResChannel, QueryResult, ReadResponse}, }; impl Peer { + /// `ReadIndex` requests could be lost in network, so on followers commands + /// could queue in `pending_reads` forever. Sending a new `ReadIndex` + /// periodically can resolve this. + pub fn retry_pending_reads(&mut self, cfg: &Config) { + if self.is_leader() + || !self.pending_reads_mut().check_needs_retry(cfg) + || self.pre_read_index().is_err() + { + return; + } + + let read = self.pending_reads().back().unwrap(); + debug!( + self.logger, + "request to get a read index from follower, retry"; + "request_id" => ?read.id, + ); + let ctx = + ReadIndexContext::fields_to_bytes(read.id, read.addition_request.as_deref(), None); + debug_assert!(read.read_index.is_none()); + self.raft_group_mut().read_index(ctx); + } + /// read index on follower /// /// call set_has_ready if it's proposed. @@ -49,6 +72,7 @@ impl Peer { .get_mut(0) .filter(|req| req.has_read_index()) .map(|req| req.take_read_index()); + // No need to check `dropped` as it only meaningful for leader. let (id, _dropped) = propose_read_index(self.raft_group_mut(), request.as_ref(), None); let now = monotonic_raw_now(); let mut read = ReadIndexRequest::with_command(id, req, ch, now); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 62e8fda7ba0..242ce55842d 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -96,7 +96,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, /// Raft relies on periodic ticks to keep the state machine sync with other /// peers. pub fn on_raft_tick(&mut self) { - if self.fsm.peer_mut().tick() { + if self.fsm.peer_mut().tick(self.store_ctx) { self.fsm.peer_mut().set_has_ready(); } self.fsm.peer_mut().maybe_clean_up_stale_merge_context(); @@ -152,11 +152,15 @@ impl Peer { } #[inline] - fn tick(&mut self) -> bool { + fn tick(&mut self, store_ctx: &mut StoreContext) -> bool { // When it's handling snapshot, it's pointless to tick as all the side // affects have to wait till snapshot is applied. On the other hand, ticking // will bring other corner cases like elections. - !self.is_handling_snapshot() && self.serving() && self.raft_group_mut().tick() + if self.is_handling_snapshot() || !self.serving() { + return false; + } + self.retry_pending_reads(&store_ctx.cfg); + self.raft_group_mut().tick() } pub fn on_peer_unreachable(&mut self, to_peer_id: u64) { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index b28dc95aa35..325e4ee4a1b 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -159,8 +159,9 @@ impl RaftRouter { pub fn snapshot( &mut self, req: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send - { + ) -> impl Future, RaftCmdResponse>> + + Send + + 'static { self.local_reader.snapshot(req) } diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 36a217be607..2ad06d9c69d 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -70,6 +70,7 @@ make_static_metric! { pub struct LocalReadMetrics { pub local_executed_requests: LocalIntCounter, pub local_executed_stale_read_requests: LocalIntCounter, + pub local_executed_replica_read_requests: LocalIntCounter, pub local_executed_snapshot_cache_hit: LocalIntCounter, pub reject_reason: LocalReadRejectCounter, pub renew_lease_advance: LocalIntCounter, @@ -81,6 +82,7 @@ thread_local! { LocalReadMetrics { local_executed_requests: LOCAL_READ_EXECUTED_REQUESTS.local(), local_executed_stale_read_requests: LOCAL_READ_EXECUTED_STALE_READ_REQUESTS.local(), + local_executed_replica_read_requests: LOCAL_READ_EXECUTED_REPLICA_READ_REQUESTS.local(), local_executed_snapshot_cache_hit: LOCAL_READ_EXECUTED_CACHE_REQUESTS.local(), reject_reason: LocalReadRejectCounter::from(&LOCAL_READ_REJECT_VEC), renew_lease_advance: LOCAL_READ_RENEW_LEASE_ADVANCE_COUNTER.local(), @@ -98,6 +100,7 @@ pub fn maybe_tls_local_read_metrics_flush() { if m.last_flush_time.saturating_elapsed() >= Duration::from_millis(METRICS_FLUSH_INTERVAL) { m.local_executed_requests.flush(); m.local_executed_stale_read_requests.flush(); + m.local_executed_replica_read_requests.flush(); m.local_executed_snapshot_cache_hit.flush(); m.reject_reason.flush(); m.renew_lease_advance.flush(); @@ -180,6 +183,11 @@ lazy_static! { "Total number of stale read requests directly executed by local reader." ) .unwrap(); + pub static ref LOCAL_READ_EXECUTED_REPLICA_READ_REQUESTS: IntCounter = register_int_counter!( + "tikv_raftstore_local_read_executed_replica_read_requests", + "Total number of stale read requests directly executed by local reader." + ) + .unwrap(); pub static ref RAFT_LOG_GC_WRITE_DURATION_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_raft_log_gc_write_duration_secs", "Bucketed histogram of write duration of raft log gc.", diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 1a3c69c1bb5..efbbea96173 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -117,7 +117,7 @@ pub trait Simulator { fn async_read( &mut self, request: RaftCmdRequest, - ) -> impl Future> + Send { + ) -> impl Future> + Send + 'static { let mut req_clone = request.clone(); req_clone.clear_requests(); req_clone.mut_requests().push(new_snap_cmd()); @@ -174,7 +174,10 @@ pub trait Simulator { Ok(response) } - Err(e) => Ok(e), + Err(e) => { + error!("cluster.async_read fails"; "error" => ?e); + Ok(e) + } } } } @@ -182,7 +185,9 @@ pub trait Simulator { fn async_snapshot( &mut self, request: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send; + ) -> impl Future, RaftCmdResponse>> + + Send + + 'static; fn async_peer_msg_on_node(&self, node_id: u64, region_id: u64, msg: PeerMsg) -> Result<()>; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index ffa38b51796..86b48715301 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -356,8 +356,9 @@ impl Simulator for NodeCluster { fn async_snapshot( &mut self, request: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send - { + ) -> impl Future, RaftCmdResponse>> + + Send + + 'static { let node_id = request.get_header().get_peer().get_store_id(); if !self .trans diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 6e942e45e75..780b89fd032 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -7,7 +7,7 @@ use engine_rocks::{RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{CfName, KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; -use futures::Future; +use futures::future::BoxFuture; use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; use raftstore::Result; use rand::{prelude::SliceRandom, RngCore}; @@ -222,7 +222,7 @@ pub fn async_read_on_peer, EK: KvEngine>( key: &[u8], read_quorum: bool, replica_read: bool, -) -> impl Future> { +) -> BoxFuture<'static, Result> { let mut request = new_request( region.get_id(), region.get_region_epoch().clone(), @@ -231,7 +231,8 @@ pub fn async_read_on_peer, EK: KvEngine>( ); request.mut_header().set_peer(peer); request.mut_header().set_replica_read(replica_read); - cluster.sim.wl().async_read(request) + let f = cluster.sim.wl().async_read(request); + Box::pin(async move { f.await }) } pub fn test_delete_range, EK: KvEngine>(cluster: &mut Cluster, cf: CfName) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 2ed565745a7..7f21338be1a 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -3,6 +3,7 @@ use std::{ fmt::Write, path::Path, + result::Result as StdResult, str::FromStr, sync::{mpsc, Arc, Mutex}, thread, @@ -20,7 +21,7 @@ use engine_traits::{ CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; -use futures::executor::block_on; +use futures::{channel::oneshot, executor::block_on, future::BoxFuture}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ encryptionpb::EncryptionMethod, @@ -435,7 +436,7 @@ pub fn async_read_on_peer( key: &[u8], read_quorum: bool, replica_read: bool, -) -> mpsc::Receiver { +) -> BoxFuture<'static, StdResult> { let node_id = peer.get_store_id(); let mut request = new_request( region.get_id(), @@ -445,10 +446,10 @@ pub fn async_read_on_peer( ); request.mut_header().set_peer(peer); request.mut_header().set_replica_read(replica_read); - let (tx, rx) = mpsc::sync_channel(1); + let (tx, rx) = oneshot::channel(); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); cluster.sim.wl().async_read(node_id, None, request, cb); - rx + Box::pin(async move { rx.await }) } pub fn batch_read_on_peer( @@ -645,11 +646,11 @@ pub fn configure_for_request_snapshot(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); } -pub fn configure_for_hibernate(cluster: &mut Cluster) { +pub fn configure_for_hibernate(config: &mut Config) { // Uses long check interval to make leader keep sleeping during tests. - cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::secs(20); - cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::secs(40); - cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::secs(10); + config.raft_store.abnormal_leader_missing_duration = ReadableDuration::secs(20); + config.raft_store.max_leader_missing_duration = ReadableDuration::secs(40); + config.raft_store.peer_stale_state_check_interval = ReadableDuration::secs(10); } pub fn configure_for_snapshot(config: &mut Config) { diff --git a/components/tikv_util/src/future.rs b/components/tikv_util/src/future.rs index 7b22bebb482..875a8d97811 100644 --- a/components/tikv_util/src/future.rs +++ b/components/tikv_util/src/future.rs @@ -1,6 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + borrow::BorrowMut, cell::UnsafeCell, sync::{ atomic::{AtomicU8, Ordering}, @@ -15,7 +16,7 @@ use futures::{ task::{self, ArcWake, Context, Poll}, }; -use crate::callback::must_call; +use crate::{callback::must_call, timer::GLOBAL_TIMER_HANDLE}; /// Generates a paired future and callback so that when callback is being /// called, its result is automatically passed as a future result. @@ -209,6 +210,28 @@ pub fn try_poll(f: impl Future) -> Option { }) } +// Run a future with a timeout on the current thread. Returns Err if times out. +#[allow(clippy::result_unit_err)] +pub fn block_on_timeout(mut fut: B, dur: std::time::Duration) -> Result +where + F: std::future::Future + Unpin, + B: BorrowMut, +{ + use futures_util::compat::Future01CompatExt; + + let mut timeout = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + dur) + .compat() + .fuse(); + let mut f = fut.borrow_mut().fuse(); + futures::executor::block_on(async { + futures::select! { + _ = timeout => Err(()), + item = f => Ok(item), + } + }) +} + #[cfg(test)] mod tests { use std::sync::atomic::AtomicUsize; diff --git a/components/tikv_util/src/macros.rs b/components/tikv_util/src/macros.rs index 10889046a3b..75323426f70 100644 --- a/components/tikv_util/src/macros.rs +++ b/components/tikv_util/src/macros.rs @@ -11,7 +11,7 @@ macro_rules! box_err { e.into() }); ($f:tt, $($arg:expr),+) => ({ - box_err!(format!($f, $($arg),+)) + $crate::box_err!(format!($f, $($arg),+)) }); } diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 5434da9ce91..6e92420e622 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -150,7 +150,7 @@ impl tikv_kv::Engine for RaftKv2 { Ok(()) } - type SnapshotRes = impl Future> + Send; + type SnapshotRes = impl Future> + Send + 'static; fn async_snapshot(&mut self, mut ctx: tikv_kv::SnapContext<'_>) -> Self::SnapshotRes { let mut req = Request::default(); req.set_cmd_type(CmdType::Snap); diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index f1b135ef86a..aba59d8c239 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -11,7 +11,7 @@ use kvproto::{ use raft::eraftpb::MessageType; use raftstore::store::msg::*; use test_raftstore::*; -use tikv_util::{config::ReadableDuration, time::Instant}; +use tikv_util::{config::ReadableDuration, future::block_on_timeout, time::Instant}; fn assert_disk_full(resp: &RaftCmdResponse) { assert!(resp.get_header().get_error().has_disk_full()); @@ -67,7 +67,9 @@ fn ensure_disk_usage_is_reported( let peer = new_peer(store_id, peer_id); let key = region.get_start_key(); let ch = async_read_on_peer(cluster, peer, region.clone(), key, true, true); - ch.recv_timeout(Duration::from_secs(1)).unwrap(); + block_on_timeout(ch, Duration::from_secs(1)) + .unwrap() + .unwrap(); } fn test_disk_full_leader_behaviors(usage: DiskUsage) { diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 4dc404e58b8..d8670d9a21f 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -23,7 +23,7 @@ fn test_break_leadership_on_restart() { // stable. cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.pd_client.disable_default_operator(); let r = cluster.run_conf_change(); cluster.pd_client.must_add_peer(r, new_peer(2, 2)); @@ -105,7 +105,7 @@ fn test_store_disconnect_with_hibernate() { // stable. cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.pd_client.disable_default_operator(); let r = cluster.run_conf_change(); cluster.pd_client.must_add_peer(r, new_peer(2, 2)); diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 9f844f582e4..82e1e255d5e 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -12,7 +12,7 @@ use futures::executor::block_on; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState}; use raft::eraftpb::MessageType; use test_raftstore::*; -use tikv_util::{config::ReadableDuration, HandyRwLock}; +use tikv_util::{config::ReadableDuration, future::block_on_timeout, HandyRwLock}; use txn_types::{Key, Lock, LockType}; #[test] @@ -400,7 +400,9 @@ fn test_new_split_learner_can_not_find_leader() { let new_region = cluster.get_region(b"k2"); let learner_peer = find_peer(&new_region, 3).unwrap().clone(); let resp_ch = async_read_on_peer(&mut cluster, learner_peer, new_region, b"k2", true, true); - let resp = resp_ch.recv_timeout(Duration::from_secs(3)).unwrap(); + let resp = block_on_timeout(resp_ch, Duration::from_secs(3)) + .unwrap() + .unwrap(); let exp_value = resp.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); } @@ -476,7 +478,9 @@ fn test_replica_read_after_transfer_leader() { fail::remove(on_peer_collect_message_2); - let resp = resp_ch.recv_timeout(Duration::from_secs(3)).unwrap(); + let resp = block_on_timeout(resp_ch, Duration::from_secs(3)) + .unwrap() + .unwrap(); let exp_value = resp.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); } diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index 73156becb0d..b6408f9ce91 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -15,7 +15,7 @@ use tikv_util::{time::Instant, HandyRwLock}; #[test] fn test_proposal_prevent_sleep() { let mut cluster = new_node_cluster(0, 3); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -108,7 +108,7 @@ fn test_proposal_prevent_sleep() { #[test] fn test_single_voter_restart() { let mut cluster = new_server_cluster(0, 2); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.pd_client.disable_default_operator(); cluster.run_conf_change(); cluster.pd_client.must_add_peer(1, new_learner_peer(2, 2)); @@ -127,7 +127,7 @@ fn test_single_voter_restart() { #[test] fn test_prompt_learner() { let mut cluster = new_server_cluster(0, 4); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(20); cluster.pd_client.disable_default_operator(); cluster.run_conf_change(); @@ -169,7 +169,7 @@ fn test_prompt_learner() { #[test] fn test_transfer_leader_delay() { let mut cluster = new_node_cluster(0, 3); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -237,7 +237,7 @@ fn test_transfer_leader_delay() { #[test] fn test_split_delay() { let mut cluster = new_server_cluster(0, 4); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(20); cluster.pd_client.disable_default_operator(); cluster.run_conf_change(); @@ -277,7 +277,7 @@ fn test_split_delay() { #[test] fn test_inconsistent_configuration() { let mut cluster = new_node_cluster(0, 3); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -362,7 +362,7 @@ fn test_inconsistent_configuration() { fn test_hibernate_feature_gate() { let mut cluster = new_node_cluster(0, 3); cluster.pd_client.reset_version("4.0.0"); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -411,7 +411,7 @@ fn test_hibernate_feature_gate() { #[test] fn test_leader_demoted_when_hibernated() { let mut cluster = new_node_cluster(0, 4); - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); cluster.pd_client.disable_default_operator(); let r = cluster.run_conf_change(); cluster.pd_client.must_add_peer(r, new_peer(2, 2)); diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 8ac364faae9..7b07b281236 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -16,7 +16,7 @@ use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::store::{Callback, RegionSnapshot}; use test_raftstore::*; -use tikv_util::{config::*, time::Instant, HandyRwLock}; +use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; // A helper function for testing the lease reads and lease renewing. // The leader keeps a record of its leader lease, and uses the system's @@ -500,8 +500,10 @@ fn test_read_index_stale_in_suspect_lease() { cluster.must_put(b"k2", b"v2"); must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); // Ensure peer 3 is ready to become leader. - let rx = async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k2", true, true); - let resp = rx.recv_timeout(Duration::from_secs(3)).unwrap(); + let resp_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k2", true, true); + let resp = block_on_timeout(resp_ch, Duration::from_secs(3)) + .unwrap() + .unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); assert_eq!( resp.get_responses()[0].get_get().get_value(), diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 0359bacf436..10bc86e0b2b 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -12,13 +12,15 @@ use std::{ time::Duration, }; -use futures::{compat::Future01CompatExt, executor::block_on, FutureExt}; +use futures::executor::block_on; use kvproto::raft_serverpb::RaftMessage; use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::{store::ReadIndexContext, Result}; -use test_raftstore::*; -use tikv_util::{config::*, time::Instant, timer::GLOBAL_TIMER_HANDLE, HandyRwLock}; +use test_raftstore::{Simulator as S1, *}; +use test_raftstore_macro::test_case; +use test_raftstore_v2::Simulator as S2; +use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; use txn_types::{Key, Lock, LockType}; use uuid::Uuid; @@ -53,9 +55,10 @@ impl Filter for CommitToFilter { } } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_replica_read_not_applied() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Increase the election tick to make this test case running reliably. configure_for_lease_read(&mut cluster.cfg, Some(50), Some(30)); @@ -100,27 +103,33 @@ fn test_replica_read_not_applied() { let r1 = cluster.get_region(b"k1"); // Read index on follower should be blocked instead of get an old value. - let resp1_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k1", true, true); - resp1_ch.recv_timeout(Duration::from_secs(1)).unwrap_err(); + let mut resp1_ch = + async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k1", true, true); + block_on_timeout(resp1_ch.as_mut(), Duration::from_secs(1)).unwrap_err(); // Unpark all append responses so that the new leader can commit its first // entry. let router = cluster.sim.wl().get_router(2).unwrap(); for raft_msg in mem::take::>(dropped_msgs.lock().unwrap().as_mut()) { - router.send_raft_message(raft_msg).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(raft_msg.into()).unwrap(); } // The old read index request won't be blocked forever as it's retried // internally. cluster.sim.wl().clear_send_filters(1); cluster.sim.wl().clear_recv_filters(2); - let resp1 = resp1_ch.recv_timeout(Duration::from_secs(6)).unwrap(); + let resp1 = block_on_timeout(resp1_ch, Duration::from_secs(6)) + .unwrap() + .unwrap(); let exp_value = resp1.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); // New read index requests can be resolved quickly. let resp2_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1, b"k1", true, true); - let resp2 = resp2_ch.recv_timeout(Duration::from_secs(3)).unwrap(); + let resp2 = block_on_timeout(resp2_ch, Duration::from_secs(3)) + .unwrap() + .unwrap(); let exp_value = resp2.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); } @@ -150,8 +159,8 @@ fn test_replica_read_on_hibernate() { let r1 = cluster.get_region(b"k1"); // Read index on follower should be blocked. - let resp1_ch = async_read_on_peer(&mut cluster, new_peer(1, 1), r1, b"k1", true, true); - resp1_ch.recv_timeout(Duration::from_secs(1)).unwrap_err(); + let mut resp1_ch = async_read_on_peer(&mut cluster, new_peer(1, 1), r1, b"k1", true, true); + block_on_timeout(resp1_ch.as_mut(), Duration::from_secs(1)).unwrap_err(); let (tx, rx) = mpsc::sync_channel(1024); let cb = Arc::new(move |msg: &RaftMessage| { @@ -220,7 +229,9 @@ fn test_read_hibernated_region() { cluster.pd_client.trigger_leader_info_loss(); // This request will fail because no valid leader. let resp1_ch = async_read_on_peer(&mut cluster, p2.clone(), region.clone(), b"k1", true, true); - let resp1 = resp1_ch.recv_timeout(Duration::from_secs(5)).unwrap(); + let resp1 = block_on_timeout(resp1_ch, Duration::from_secs(5)) + .unwrap() + .unwrap(); assert!( resp1.get_header().get_error().has_not_leader(), "{:?}", @@ -243,16 +254,19 @@ fn test_read_hibernated_region() { // Wait for the leader is woken up. thread::sleep(Duration::from_millis(500)); let resp2_ch = async_read_on_peer(&mut cluster, p2, region, b"k1", true, true); - let resp2 = resp2_ch.recv_timeout(Duration::from_secs(5)).unwrap(); + let resp2 = block_on_timeout(resp2_ch, Duration::from_secs(5)) + .unwrap() + .unwrap(); assert!(!resp2.get_header().has_error(), "{:?}", resp2); } /// The read index response can advance the commit index. -/// But in previous implemtation, we forget to set term in read index response +/// But in previous implementation, we forget to set term in read index response /// which causes panic in raft-rs. This test is to reproduce the case. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_replica_read_on_stale_peer() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(30)); let pd_client = Arc::clone(&cluster.pd_client); @@ -278,14 +292,13 @@ fn test_replica_read_on_stale_peer() { cluster.must_put(b"k2", b"v2"); let resp1_ch = async_read_on_peer(&mut cluster, peer_on_store3, region, b"k2", true, true); // must be timeout - resp1_ch - .recv_timeout(Duration::from_micros(100)) - .unwrap_err(); + block_on_timeout(resp1_ch, Duration::from_micros(100)).unwrap_err(); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_index_out_of_order() { - let mut cluster = new_node_cluster(0, 2); + let mut cluster = new_cluster(0, 2); // Use long election timeout and short lease. configure_for_lease_read(&mut cluster.cfg, Some(1000), Some(10)); @@ -312,20 +325,25 @@ fn test_read_index_out_of_order() { // Can't get read resonse because heartbeat responses are blocked. let r1 = cluster.get_region(b"k1"); - let resp1 = async_read_on_peer(&mut cluster, new_peer(1, 1), r1.clone(), b"k1", true, true); - resp1.recv_timeout(Duration::from_secs(2)).unwrap_err(); + let mut resp1 = async_read_on_peer(&mut cluster, new_peer(1, 1), r1.clone(), b"k1", true, true); + block_on_timeout(resp1.as_mut(), Duration::from_secs(2)).unwrap_err(); pd_client.must_remove_peer(rid, new_peer(2, 2)); // After peer 2 is removed, we can get 2 read responses. let resp2 = async_read_on_peer(&mut cluster, new_peer(1, 1), r1, b"k1", true, true); - resp2.recv_timeout(Duration::from_secs(1)).unwrap(); - resp1.recv_timeout(Duration::from_secs(1)).unwrap(); + block_on_timeout(resp2, Duration::from_secs(1)) + .unwrap() + .unwrap(); + block_on_timeout(resp1, Duration::from_secs(1)) + .unwrap() + .unwrap(); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_index_retry_lock_checking() { - let mut cluster = new_node_cluster(0, 2); + let mut cluster = new_cluster(0, 2); // Use long election timeout and short lease. configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); @@ -397,11 +415,12 @@ fn test_read_index_retry_lock_checking() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_split_isolation() { - let mut cluster = new_node_cluster(0, 2); + let mut cluster = new_cluster(0, 2); // Use long election timeout and short lease. - configure_for_hibernate(&mut cluster); + configure_for_hibernate(&mut cluster.cfg); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(20)); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(11); let pd_client = Arc::clone(&cluster.pd_client); @@ -446,7 +465,9 @@ fn test_split_isolation() { // cannot be created. for _ in 0..10 { let resp = async_read_on_peer(&mut cluster, peer.clone(), r2.clone(), b"k1", true, true); - let resp = resp.recv_timeout(Duration::from_secs(1)).unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(1)) + .unwrap() + .unwrap(); if !resp.get_header().has_error() { return; } @@ -458,9 +479,10 @@ fn test_split_isolation() { /// Testing after applying snapshot, the `ReadDelegate` stored at `StoreMeta` /// will be replace with the new `ReadDelegate`, and the `ReadDelegate` stored /// at `LocalReader` should also be updated -#[test] -fn test_read_local_after_snapshpot_replace_peer() { - let mut cluster = new_node_cluster(0, 3); +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_read_local_after_snapshot_replace_peer() { + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); @@ -484,7 +506,9 @@ fn test_read_local_after_snapshpot_replace_peer() { // wait applying snapshot finish sleep_ms(100); let resp = async_read_on_peer(&mut cluster, new_peer(3, 3), r, b"k1", true, true); - let resp = resp.recv_timeout(Duration::from_secs(1)).unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(1)) + .unwrap() + .unwrap(); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); // trigger leader send snapshot to peer 3 @@ -513,10 +537,12 @@ fn test_read_local_after_snapshpot_replace_peer() { let r = cluster.get_region(b"k1"); let resp = async_read_on_peer(&mut cluster, new_peer(3, 1003), r, b"k3", true, true); - let resp = resp.recv_timeout(Duration::from_secs(1)).unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(1)) + .unwrap() + .unwrap(); // should not have `mismatch peer id` error if resp.get_header().has_error() { - panic!("unexpect err: {:?}", resp.get_header().get_error()); + panic!("unexpected err: {:?}", resp.get_header().get_error()); } let exp_value = resp.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v3"); @@ -524,73 +550,10 @@ fn test_read_local_after_snapshpot_replace_peer() { /// The case checks if a malformed request should not corrupt the leader's read /// queue. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_malformed_read_index() { - let mut cluster = new_node_cluster(0, 3); - configure_for_lease_read(&mut cluster.cfg, Some(50), None); - cluster.cfg.raft_store.raft_log_gc_threshold = 12; - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); - cluster.cfg.raft_store.hibernate_regions = true; - cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); - let pd_client = Arc::clone(&cluster.pd_client); - pd_client.disable_default_operator(); - - let region_id = cluster.run_conf_change(); - pd_client.must_add_peer(region_id, new_peer(2, 2)); - pd_client.must_add_peer(region_id, new_peer(3, 3)); - cluster.must_transfer_leader(1, new_peer(1, 1)); - cluster.must_put(b"k1", b"v1"); - for i in 1..=3 { - must_get_equal(&cluster.get_engine(i), b"k1", b"v1"); - } - - // Wait till lease expires. - std::thread::sleep( - cluster - .cfg - .raft_store - .raft_store_max_leader_lease() - .to_std() - .unwrap(), - ); - let region = cluster.get_region(b"k1"); - // Send a malformed request to leader - let mut raft_msg = raft::eraftpb::Message::default(); - raft_msg.set_msg_type(MessageType::MsgReadIndex); - let rctx = ReadIndexContext { - id: Uuid::new_v4(), - request: None, - locked: None, - }; - let mut e = raft::eraftpb::Entry::default(); - e.set_data(rctx.to_bytes().into()); - raft_msg.mut_entries().push(e); - raft_msg.from = 1; - raft_msg.to = 1; - let mut message = RaftMessage::default(); - message.set_region_id(region_id); - message.set_from_peer(new_peer(1, 1)); - message.set_to_peer(new_peer(1, 1)); - message.set_region_epoch(region.get_region_epoch().clone()); - message.set_message(raft_msg); - // So the read won't be handled soon. - cluster.add_send_filter(IsolationFilterFactory::new(1)); - cluster.send_raft_msg(message).unwrap(); - // Also send a correct request. If the malformed request doesn't corrupt - // the read queue, the correct request should be responded. - let resp = async_read_on_peer(&mut cluster, new_peer(1, 1), region, b"k1", true, false); - cluster.clear_send_filters(); - let resp = resp.recv_timeout(Duration::from_secs(10)).unwrap(); - assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); -} - -/// The case checks if a malformed request should not corrupt the leader's read -/// queue. -#[test] -fn test_malformed_read_index_v2() { - use test_raftstore_v2::*; - - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.cfg.raft_store.raft_log_gc_threshold = 12; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); @@ -644,18 +607,8 @@ fn test_malformed_read_index_v2() { // the read queue, the correct request should be responded. let resp = async_read_on_peer(&mut cluster, new_peer(1, 1), region, b"k1", true, false); cluster.clear_send_filters(); - - let timeout = Duration::from_secs(10); - let timeout_f = GLOBAL_TIMER_HANDLE - .delay(std::time::Instant::now() + timeout) - .compat(); - let resp = futures::executor::block_on(async move { - futures::select! { - res = resp.fuse() => res.unwrap(), - e = timeout_f.fuse() => { - panic!("request timeout for {:?}: {:?}", timeout,e); - }, - } - }); + let resp = block_on_timeout(resp, Duration::from_secs(10)) + .unwrap() + .unwrap(); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); } From 761de11487fc19558b700c63e41cd9fcb32a6995 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 11 May 2023 17:11:21 +0800 Subject: [PATCH 0688/1149] server: pass encryption key with tablet snapshot (#14097) ref tikv/tikv#12842 Signed-off-by: tabokie --- Cargo.lock | 3 +- components/encryption/export/src/lib.rs | 4 +- components/encryption/src/io.rs | 4 + components/encryption/src/lib.rs | 2 +- components/encryption/src/manager/mod.rs | 58 ++- components/raftstore-v2/Cargo.toml | 1 + .../raftstore-v2/src/operation/ready/mod.rs | 1 - .../src/operation/ready/snapshot.rs | 7 +- components/raftstore-v2/src/raft/storage.rs | 3 +- .../tests/integrations/cluster.rs | 36 +- components/test_raftstore-v2/src/cluster.rs | 4 +- components/test_raftstore-v2/src/util.rs | 16 +- components/test_util/src/encryption.rs | 2 +- src/server/tablet_snap.rs | 403 +++++++++++------- tests/integrations/raftstore/test_snap.rs | 40 +- .../raftstore/test_v1_v2_mixed.rs | 30 +- 16 files changed, 387 insertions(+), 227 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e5ea1bc862..a570532e20c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2930,7 +2930,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#14ac513b9eff75028da1a56f54d36bfb082ac54f" +source = "git+https://github.com/pingcap/kvproto.git#2475f5bed8c358e9e9942546007b962f4530a1a6" dependencies = [ "futures 0.3.15", "grpcio", @@ -4621,6 +4621,7 @@ dependencies = [ "time 0.1.42", "tracker", "txn_types", + "walkdir", "yatp", ] diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index be86db83082..31730f162c6 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -15,8 +15,8 @@ use derive_more::Deref; pub use encryption::KmsBackend; pub use encryption::{ clean_up_dir, clean_up_trash, from_engine_encryption_method, trash_dir_all, Backend, - DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, FileConfig, Iv, - KmsConfig, MasterKeyConfig, Result, + DataKeyImporter, DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, + FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, }; use encryption::{ DataKeyPair, EncryptedKey, FileBackend, KmsProvider, PlainKey, PlaintextBackend, diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index e02aafabe88..4e4baf516cb 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -82,6 +82,10 @@ impl DecrypterReader { iv, )?)) } + + pub fn inner(&self) -> &R { + &self.0.reader + } } impl Read for DecrypterReader { diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index c16142eb30b..b5d5c5571cc 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -24,7 +24,7 @@ pub use self::{ io::{ create_aes_ctr_crypter, DecrypterReader, DecrypterWriter, EncrypterReader, EncrypterWriter, }, - manager::{DataKeyManager, DataKeyManagerArgs}, + manager::{DataKeyImporter, DataKeyManager, DataKeyManagerArgs}, master_key::{ Backend, DataKeyPair, EncryptedKey, FileBackend, KmsBackend, KmsProvider, PlaintextBackend, }, diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index be7008a33ae..e340bebcc5a 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -21,7 +21,6 @@ use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; use protobuf::Message; use tikv_util::{box_err, debug, error, info, sys::thread::StdThreadBuildWrapper, thd_name, warn}; -use tokio::sync::oneshot; use crate::{ config::EncryptionConfig, @@ -397,7 +396,7 @@ fn check_stale_file_exist( enum RotateTask { Terminate, - Save(oneshot::Sender<()>), + Save(std::sync::mpsc::Sender<()>), } fn run_background_rotate_work( @@ -770,6 +769,27 @@ impl DataKeyManager { Ok(Some(encrypted_file)) } + /// Returns initial vector and data key. + pub fn get_file_internal(&self, fname: &str) -> IoResult, DataKey)>> { + let (key_id, iv) = { + match self.dicts.get_file(fname) { + Some(file) if file.method != EncryptionMethod::Plaintext => (file.key_id, file.iv), + _ => return Ok(None), + } + }; + // Fail if key is specified but not found. + let k = match self.dicts.key_dict.lock().unwrap().keys.get(&key_id) { + Some(k) => k.clone(), + None => { + return Err(IoError::new( + ErrorKind::NotFound, + format!("key not found for id {}", key_id), + )); + } + }; + Ok(Some((iv, k))) + } + /// Removes data keys under the directory `logical`. If `physical` is /// present, if means the `logical` directory is already physically renamed /// to `physical`. @@ -988,10 +1008,17 @@ impl<'a> DataKeyImporter<'a> { } pub fn commit(mut self) -> Result<()> { - let (tx, rx) = oneshot::channel(); if !self.key_additions.is_empty() { - self.manager.rotate_tx.send(RotateTask::Save(tx)).unwrap(); - rx.blocking_recv().unwrap(); + let (tx, rx) = std::sync::mpsc::channel(); + self.manager + .rotate_tx + .send(RotateTask::Save(tx)) + .map_err(|_| { + Error::Other(box_err!("Failed to request background key dict rotation")) + })?; + rx.recv().map_err(|_| { + Error::Other(box_err!("Failed to wait for background key dict rotation")) + })?; } if !self.file_additions.is_empty() { self.manager.dicts.file_dict_file.lock().unwrap().sync()?; @@ -1006,13 +1033,22 @@ impl<'a> DataKeyImporter<'a> { while let Some(f) = iter.next() { self.manager.dicts.delete_file(&f, iter.peek().is_none())?; } - for key_id in self.key_additions.drain(..) { - let mut key_dict = self.manager.dicts.key_dict.lock().unwrap(); - key_dict.keys.remove(&key_id); + if !self.key_additions.is_empty() { + for key_id in self.key_additions.drain(..) { + let mut key_dict = self.manager.dicts.key_dict.lock().unwrap(); + key_dict.keys.remove(&key_id); + } + let (tx, rx) = std::sync::mpsc::channel(); + self.manager + .rotate_tx + .send(RotateTask::Save(tx)) + .map_err(|_| { + Error::Other(box_err!("Failed to request background key dict rotation")) + })?; + rx.recv().map_err(|_| { + Error::Other(box_err!("Failed to wait for background key dict rotation")) + })?; } - let (tx, rx) = oneshot::channel(); - self.manager.rotate_tx.send(RotateTask::Save(tx)).unwrap(); - rx.blocking_recv().unwrap(); Ok(()) } } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 84daa4c40b5..15be69624e3 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -72,6 +72,7 @@ slog-global = { workspace = true } tempfile = "3.0" test_pd = { workspace = true } test_util = { workspace = true } +walkdir = "2" [[test]] name = "raftstore-v2-failpoints" diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 242ce55842d..b874fd773b7 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -1039,7 +1039,6 @@ impl Storage { write_task, &ctx.snap_mgr, &ctx.tablet_registry, - ctx.key_manager.as_ref(), ) { SNAP_COUNTER.apply.fail.inc(); error!(self.logger(),"failed to apply snapshot";"error" => ?e) diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 5547df7d580..d592ddea423 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -561,7 +561,6 @@ impl Storage { task: &mut WriteTask, snap_mgr: &TabletSnapManager, reg: &TabletRegistry, - key_manager: Option<&Arc>, ) -> Result<()> { let region_id = self.region().get_id(); let peer_id = self.peer().get_id(); @@ -660,7 +659,7 @@ impl Storage { // The snapshot require no additional processing such as ingest them to DB, but // it should load it into the factory after it persisted. let reg = reg.clone(); - let key_manager = key_manager.cloned(); + let key_manager = snap_mgr.key_manager().clone(); let hook = move || { if !install_tablet(®, key_manager.as_deref(), &path, region_id, last_index) { slog_panic!( @@ -672,7 +671,9 @@ impl Storage { } if clean_split { let path = temp_split_path(®, region_id); - // TODO(tabokie) + if let Some(m) = key_manager { + let _ = m.remove_dir(&path, None); + } let _ = fs::remove_dir_all(path); } }; diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 298ba5d451f..747fd035fd8 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -439,8 +439,7 @@ mod tests { .unwrap(); let snapshot = new_empty_snapshot(region.clone(), snap_index, snap_term, false); let mut task = WriteTask::new(region.get_id(), 5, 1); - s.apply_snapshot(&snapshot, &mut task, &mgr, ®, None) - .unwrap(); + s.apply_snapshot(&snapshot, &mut task, &mgr, ®).unwrap(); // Add more entries to check if old entries are cleared. If not, it should panic // with memtable hole when using raft engine. let entries = (snap_index + 1..=snap_index + 10) diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 83cf3646b9b..5a52c0809db 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -15,12 +15,13 @@ use causal_ts::CausalTsProviderImpl; use collections::HashSet; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{self, Receiver, Sender, TrySendError}; +use encryption_export::{data_key_manager_from_config, DataKeyImporter}; use engine_test::{ ctor::{CfOptions, DbOptions}, kv::{KvTestEngine, KvTestSnapshot, TestTabletFactory}, raft::RaftTestEngine, }; -use engine_traits::{TabletContext, TabletRegistry, DATA_CFS}; +use engine_traits::{EncryptionKeyManager, TabletContext, TabletRegistry, DATA_CFS}; use futures::executor::block_on; use kvproto::{ kvrpcpb::ApiVersion, @@ -262,14 +263,12 @@ impl RunningState { causal_ts_provider: Option>, logger: &Logger, ) -> (TestRouter, Self) { - // TODO(tabokie): Enable encryption by default. (after snapshot encryption) - // let encryption_cfg = test_util::new_file_security_config(path); - // let key_manager = Some(Arc::new( - // data_key_manager_from_config(&encryption_cfg, path.to_str().unwrap()) - // .unwrap() - // .unwrap(), - // )); - let key_manager = None; + let encryption_cfg = test_util::new_file_security_config(path); + let key_manager = Some(Arc::new( + data_key_manager_from_config(&encryption_cfg, path.to_str().unwrap()) + .unwrap() + .unwrap(), + )); let mut opts = engine_test::ctor::RaftDbOptions::default(); opts.set_key_manager(key_manager.clone()); @@ -633,7 +632,24 @@ impl Cluster { let gen_path = from_snap_mgr.tablet_gen_path(&key); let recv_path = to_snap_mgr.final_recv_path(&key); assert!(gen_path.exists()); - std::fs::rename(gen_path, recv_path.clone()).unwrap(); + if let Some(m) = from_snap_mgr.key_manager() { + let mut importer = + DataKeyImporter::new(to_snap_mgr.key_manager().as_deref().unwrap()); + for e in walkdir::WalkDir::new(&gen_path).into_iter() { + let e = e.unwrap(); + let new_path = recv_path.join(e.path().file_name().unwrap()); + if let Some((iv, key)) = + m.get_file_internal(e.path().to_str().unwrap()).unwrap() + { + importer.add(new_path.to_str().unwrap(), iv, key).unwrap(); + } + } + importer.commit().unwrap(); + } + std::fs::rename(&gen_path, &recv_path).unwrap(); + if let Some(m) = from_snap_mgr.key_manager() { + m.delete_file(gen_path.to_str().unwrap()).unwrap(); + } assert!(recv_path.exists()); } regions.insert(msg.get_region_id()); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index efbbea96173..41d3e563cbf 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -80,7 +80,7 @@ pub trait Simulator { node_id: u64, cfg: Config, store_meta: Arc>>, - key_mgr: Option>, + key_manager: Option>, raft_engine: RaftTestEngine, tablet_registry: TabletRegistry, resource_manager: &Option>, @@ -517,6 +517,7 @@ impl, EK: KvEngine> Cluster { debug!("starting node {}", node_id); let tablet_registry = self.tablet_registries[&node_id].clone(); let raft_engine = self.raft_engines[&node_id].clone(); + let key_mgr = self.key_managers_map[&node_id].clone(); let cfg = self.cfg.clone(); // if let Some(labels) = self.labels.get(&node_id) { @@ -538,7 +539,6 @@ impl, EK: KvEngine> Cluster { tikv_util::thread_group::set_properties(Some(props)); debug!("calling run node"; "node_id" => node_id); - let key_mgr = self.key_managers_map.get(&node_id).unwrap().clone(); self.sim.wl().run_node( node_id, cfg, diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 780b89fd032..312d9be99c4 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt::Write, sync::Arc, thread, time::Duration}; +use std::{fmt::Write, path::Path, sync::Arc, thread, time::Duration}; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{RocksEngine, RocksStatistics}; @@ -8,7 +8,9 @@ use engine_test::raft::RaftTestEngine; use engine_traits::{CfName, KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; use futures::future::BoxFuture; -use kvproto::{kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse}; +use kvproto::{ + encryptionpb::EncryptionMethod, kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse, +}; use raftstore::Result; use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; @@ -126,12 +128,22 @@ pub fn put_cf_till_size, EK: KvEngine>( key.into_bytes() } +pub fn configure_for_encryption(config: &mut Config) { + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + + let cfg = &mut config.security.encryption; + cfg.data_encryption_method = EncryptionMethod::Aes128Ctr; + cfg.data_key_rotation_period = ReadableDuration(Duration::from_millis(100)); + cfg.master_key = test_util::new_test_file_master_key(manifest_dir); +} + pub fn configure_for_snapshot(config: &mut Config) { // Truncate the log quickly so that we can force sending snapshot. config.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); config.raft_store.raft_log_gc_count_limit = Some(2); config.raft_store.merge_max_log_gap = 1; config.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); + configure_for_encryption(config); } pub fn configure_for_lease_read_v2, EK: KvEngine>( diff --git a/components/test_util/src/encryption.rs b/components/test_util/src/encryption.rs index e09c0ce7cbb..3f1691b3d21 100644 --- a/components/test_util/src/encryption.rs +++ b/components/test_util/src/encryption.rs @@ -15,7 +15,7 @@ pub fn create_test_key_file(path: &str) { .unwrap(); } -fn new_test_file_master_key(tmp: &Path) -> MasterKeyConfig { +pub fn new_test_file_master_key(tmp: &Path) -> MasterKeyConfig { let key_path = tmp.join("test_key").to_str().unwrap().to_owned(); create_test_key_file(&key_path); MasterKeyConfig::File { diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index cb7ec7c988a..4b5bd81e243 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -3,8 +3,13 @@ //! This file contains the implementation of sending and receiving tablet //! snapshot. //! -//! Different from v1, tablet snapshot always tries to use cache to speed up -//! transfering speed. The protocol is quite simple: +//! v2 snapshot transfers engine data in its original form, instead of creating +//! new files like in v1. It's possible that receiver and sender share some data +//! files because they might derive from the same snapshot. To optimize transfer +//! speed, we first compare their file list and only send files missing from +//! receiver's "cache". +//! +//! # Protocol //! //! sender receiver //! send snapshot meta ----> receive snapshot meta @@ -16,14 +21,12 @@ //! wait for receiver <----- close sender //! finish -#[cfg(any(test, feature = "testexport"))] -use std::io; use std::{ cmp, convert::TryFrom, fmt::Debug, fs::{self, File}, - io::{BorrowedBuf, Read, Seek, SeekFrom, Write}, + io::{self, BorrowedBuf, Read, Seek, SeekFrom, Write}, path::Path, sync::{atomic::Ordering, Arc}, time::Duration, @@ -31,10 +34,11 @@ use std::{ use collections::HashMap; use crc64fast::Digest; -use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; +use encryption_export::{DataKeyImporter, DataKeyManager}; +use engine_traits::{Checkpointer, EncryptionKeyManager, KvEngine, TabletRegistry}; use file_system::{IoType, OpenOptions, WithIoType}; use futures::{ - future::{Future, FutureExt}, + future::FutureExt, sink::{Sink, SinkExt}, stream::{Stream, StreamExt, TryStreamExt}, }; @@ -60,7 +64,7 @@ use tikv_util::{ config::{ReadableSize, Tracker, VersionTrack}, time::Instant, worker::Runnable, - DeferContext, + DeferContext, Either, }; use tokio::runtime::{Builder as RuntimeBuilder, Runtime}; @@ -80,7 +84,12 @@ fn is_sst(file_name: &str) -> bool { file_name.ends_with(".sst") } -async fn read_to(f: &mut File, to: &mut Vec, size: usize, limiter: &Limiter) -> Result<()> { +async fn read_to( + f: &mut impl Read, + to: &mut Vec, + size: usize, + limiter: &Limiter, +) -> Result<()> { // It's likely in page cache already. let cost = size / 2; limiter.consume(cost).await; @@ -97,6 +106,48 @@ async fn read_to(f: &mut File, to: &mut Vec, size: usize, limiter: &Limiter) Ok(()) } +struct EncryptedFile(Either>); + +impl Read for EncryptedFile { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match &mut self.0 { + Either::Left(f) => f.read(buf), + Either::Right(f) => f.read(buf), + } + } +} + +impl EncryptedFile { + fn open(key_manager: &Option>, path: &Path) -> Result { + let f = File::open(path)?; + let inner = if let Some(m) = key_manager { + Either::Right( + m.open_file_with_reader(path, f) + .map_err(|e| Error::Other(e.into()))?, + ) + } else { + Either::Left(f) + }; + Ok(Self(inner)) + } + + fn seek(&mut self, to: SeekFrom) -> Result { + let r = match &mut self.0 { + Either::Left(f) => f.seek(to)?, + Either::Right(f) => f.seek(to)?, + }; + Ok(r) + } + + fn len(&self) -> Result { + let r = match &self.0 { + Either::Left(f) => f.metadata()?.len(), + Either::Right(f) => f.inner().metadata()?.len(), + }; + Ok(r) + } +} + pub trait SnapCacheBuilder: Send + Sync { fn build(&self, region_id: u64, path: &Path) -> Result<()>; } @@ -194,15 +245,15 @@ fn protocol_error(exp: &str, act: impl Debug) -> Error { /// actual data of an SST; /// 3. The last `PREVIEW_CHUNK_LEN` bytes are the same, this contains checksum, /// properties and other medata of an SST. -pub(crate) async fn is_sst_match_preview( +async fn is_sst_match_preview( preview_meta: &TabletSnapshotFileMeta, target: &Path, buffer: &mut Vec, limiter: &Limiter, + key_manager: &Option>, ) -> Result { - let mut f = File::open(target)?; - let exist_len = f.metadata()?.len(); - if exist_len != preview_meta.file_size { + let mut f = EncryptedFile::open(key_manager, target)?; + if f.len()? != preview_meta.file_size { return Ok(false); } @@ -233,11 +284,12 @@ pub(crate) async fn is_sst_match_preview( Ok(*buffer == preview_meta.trailing_chunk) } -pub(crate) async fn cleanup_cache( +async fn cleanup_cache( path: &Path, stream: &mut (impl Stream> + Unpin), sink: &mut (impl Sink<(TabletSnapshotResponse, WriteFlags), Error = grpcio::Error> + Unpin), limiter: &Limiter, + key_manager: &Option>, ) -> Result<(u64, Vec)> { let mut reused = 0; let mut exists = HashMap::default(); @@ -245,7 +297,9 @@ pub(crate) async fn cleanup_cache( let entry = entry?; let ft = entry.file_type()?; if ft.is_dir() { - // TODO(tabokie) + if let Some(m) = key_manager { + m.remove_dir(&entry.path(), None)?; + } fs::remove_dir_all(entry.path())?; continue; } @@ -270,7 +324,7 @@ pub(crate) async fn cleanup_cache( let mut buffer = Vec::with_capacity(PREVIEW_CHUNK_LEN); for meta in preview.take_metas().into_vec() { if is_sst(&meta.file_name) && let Some(p) = exists.remove(&meta.file_name) { - if is_sst_match_preview(&meta, &p, &mut buffer, limiter).await? { + if is_sst_match_preview(&meta, &p, &mut buffer, limiter, key_manager).await? { reused += meta.file_size; continue; } @@ -292,19 +346,27 @@ pub(crate) async fn cleanup_cache( Ok((reused, missing)) } -pub(crate) async fn accept_one_file( +async fn accept_one_file( path: &Path, mut chunk: TabletSnapshotFileChunk, stream: &mut (impl Stream> + Unpin), limiter: &Limiter, + key_importer: &mut Option>, digest: &mut Digest, ) -> Result { + let iv = chunk.take_iv(); + let key = if chunk.has_key() { + Some(chunk.take_key()) + } else { + None + }; let name = chunk.file_name; digest.write(name.as_bytes()); + let path = path.join(&name); let mut f = OpenOptions::new() .write(true) .create_new(true) - .open(path.join(&name))?; + .open(&path)?; let exp_size = chunk.file_size; let mut file_size = 0; loop { @@ -323,6 +385,16 @@ pub(crate) async fn accept_one_file( f.write_all(&chunk.data)?; if exp_size == file_size { f.sync_data()?; + if let Some(key) = key { + if let Some(i) = key_importer { + i.add(path.to_str().unwrap(), iv, key) + .map_err(|e| Error::Other(e.into()))?; + } else { + return Err(Error::Other( + "encryption not enabled on receiving end".to_string().into(), + )); + } + } return Ok(exp_size); } chunk = match stream.next().await { @@ -335,14 +407,16 @@ pub(crate) async fn accept_one_file( } } -pub(crate) async fn accept_missing( +async fn accept_missing( path: &Path, missing_ssts: Vec, stream: &mut (impl Stream> + Unpin), limiter: &Limiter, + key_manager: &Option>, ) -> Result { let mut digest = Digest::default(); let mut received_bytes: u64 = 0; + let mut key_importer = key_manager.as_deref().map(|m| DataKeyImporter::new(m)); for name in missing_ssts { let chunk = match stream.next().await { Some(Ok(mut req)) if req.has_chunk() => req.take_chunk(), @@ -351,7 +425,8 @@ pub(crate) async fn accept_missing( if chunk.file_name != name { return Err(protocol_error(&name, &chunk.file_name)); } - received_bytes += accept_one_file(path, chunk, stream, limiter, &mut digest).await?; + received_bytes += + accept_one_file(path, chunk, stream, limiter, &mut key_importer, &mut digest).await?; } // Now receive other files. loop { @@ -370,6 +445,9 @@ pub(crate) async fn accept_missing( File::open(path)?.sync_data()?; let res = stream.next().await; return if res.is_none() { + if let Some(i) = key_importer { + i.commit().map_err(|e| Error::Other(e.into()))?; + } Ok(received_bytes) } else { Err(protocol_error("None", res)) @@ -380,11 +458,12 @@ pub(crate) async fn accept_missing( if chunk.file_name.is_empty() { return Err(protocol_error("file_name", &chunk.file_name)); } - received_bytes += accept_one_file(path, chunk, stream, limiter, &mut digest).await?; + received_bytes += + accept_one_file(path, chunk, stream, limiter, &mut key_importer, &mut digest).await?; } } -pub(crate) async fn recv_snap_files<'a>( +async fn recv_snap_imp<'a>( snap_mgr: &'a TabletSnapManager, cache_builder: impl SnapCacheBuilder, mut stream: impl Stream> + Unpin, @@ -410,7 +489,9 @@ pub(crate) async fn recv_snap_files<'a>( let path = snap_mgr.tmp_recv_path(&context.key); info!("begin to receive tablet snapshot files"; "file" => %path.display(), "region_id" => region_id); if path.exists() { - // TODO(tabokie) + if let Some(m) = snap_mgr.key_manager() { + m.remove_dir(&path, None)?; + } fs::remove_dir_all(&path)?; } let (reused, missing_ssts) = if context.use_cache { @@ -418,24 +499,40 @@ pub(crate) async fn recv_snap_files<'a>( info!("not using cache"; "region_id" => region_id, "err" => ?e); fs::create_dir_all(&path)?; } - cleanup_cache(&path, &mut stream, sink, &limiter).await? + cleanup_cache(&path, &mut stream, sink, &limiter, snap_mgr.key_manager()).await? } else { info!("not using cache"; "region_id" => region_id); fs::create_dir_all(&path)?; (0, vec![]) }; - let received = accept_missing(&path, missing_ssts, &mut stream, &limiter).await?; + let received = accept_missing( + &path, + missing_ssts, + &mut stream, + &limiter, + snap_mgr.key_manager(), + ) + .await?; info!("received all tablet snapshot file"; "snap_key" => %context.key, "region_id" => region_id, "received" => received, "reused" => reused); let final_path = snap_mgr.final_recv_path(&context.key); - // TODO(tabokie) - fs::rename(&path, final_path)?; - + if let Some(m) = snap_mgr.key_manager() { + m.link_file(path.to_str().unwrap(), final_path.to_str().unwrap())?; + } + fs::rename(&path, &final_path).map_err(|e| { + if let Some(m) = snap_mgr.key_manager() { + let _ = m.delete_file(final_path.to_str().unwrap()); + } + e + })?; + if let Some(m) = snap_mgr.key_manager() { + m.delete_file(path.to_str().unwrap())?; + } Ok(context) } pub(crate) async fn recv_snap( stream: RequestStream, - sink: DuplexSink, + mut sink: DuplexSink, snap_mgr: TabletSnapManager, raft_router: R, cache_builder: impl SnapCacheBuilder, @@ -443,8 +540,7 @@ pub(crate) async fn recv_snap( snap_mgr_v1: Option, ) -> Result<()> { let stream = stream.map_err(Error::from); - let mut sink = sink; - let res = recv_snap_files(&snap_mgr, cache_builder, stream, &mut sink, limiter) + let res = recv_snap_imp(&snap_mgr, cache_builder, stream, &mut sink, limiter) .await .and_then(|context| { // some means we are in raftstore-v1 config and received a tablet snapshot from @@ -459,18 +555,20 @@ pub(crate) async fn recv_snap( context.finish(raft_router) }); match res { - Ok(()) => sink.close().await.map_err(Error::from), + Ok(()) => sink.close().await?, Err(e) => { let status = RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); - sink.fail(status).await.map_err(Error::from) + sink.fail(status).await?; } } + Ok(()) } async fn build_one_preview( path: &Path, iter: &mut impl Iterator, limiter: &Limiter, + key_manager: &Option>, ) -> Result { let mut preview = TabletSnapshotPreview::default(); for _ in 0..PREVIEW_BATCH_SIZE { @@ -481,7 +579,7 @@ async fn build_one_preview( let mut meta = TabletSnapshotFileMeta::default(); meta.file_name = name.clone(); meta.file_size = size; - let mut f = File::open(path.join(name))?; + let mut f = EncryptedFile::open(key_manager, &path.join(name))?; let to_read = cmp::min(size as usize, PREVIEW_CHUNK_LEN); read_to(&mut f, &mut meta.head_chunk, to_read, limiter).await?; if size > PREVIEW_CHUNK_LEN as u64 { @@ -501,6 +599,7 @@ async fn find_missing( sender: &mut (impl Sink<(TabletSnapshotRequest, WriteFlags), Error = Error> + Unpin), receiver: &mut (impl Stream> + Unpin), limiter: &Limiter, + key_manager: &Option>, ) -> Result> { let mut sst_sizes = 0; let mut ssts = HashMap::default(); @@ -533,19 +632,15 @@ async fn find_missing( head.mut_head().set_use_cache(true); // Send immediately to make receiver collect cache earlier. sender.send((head, WriteFlags::default())).await?; - let sst_count = ssts.len(); - // PREVIEW_BATCH_SIZE -> 1, PREVIEW_BATCH_SIZE + 1 = 2. sst_count can't be 0. - let batch_count = (sst_count - 1) / PREVIEW_BATCH_SIZE + 1; - let mut ssts_iter = ssts.iter(); - for _ in 0..batch_count { - let req = build_one_preview(path, &mut ssts_iter, limiter).await?; + let mut ssts_iter = ssts.iter().peekable(); + while ssts_iter.peek().is_some() { + let mut req = build_one_preview(path, &mut ssts_iter, limiter, key_manager).await?; + let is_end = ssts_iter.peek().is_none(); + req.mut_preview().end = is_end; sender - .send((req, WriteFlags::default().buffer_hint(true))) + .send((req, WriteFlags::default().buffer_hint(!is_end))) .await?; } - let mut req = build_one_preview(path, &mut ssts_iter, limiter).await?; - req.mut_preview().end = true; - sender.send((req, WriteFlags::default())).await?; let accepted = match receiver.next().await { Some(Ok(mut req)) if req.has_files() => req.take_files().take_file_name(), @@ -568,15 +663,23 @@ async fn send_missing( missing: Vec<(String, u64)>, sender: &mut (impl Sink<(TabletSnapshotRequest, WriteFlags), Error = Error> + Unpin), limiter: &Limiter, + key_manager: &Option>, ) -> Result<(u64, u64)> { let mut total_sent = 0; let mut digest = Digest::default(); for (name, mut file_size) in missing { + let file_path = path.join(&name); let mut chunk = TabletSnapshotFileChunk::default(); chunk.file_name = name; digest.write(chunk.file_name.as_bytes()); chunk.file_size = file_size; total_sent += file_size; + if let Some(m) = key_manager + && let Some((iv, key)) = m.get_file_internal(file_path.to_str().unwrap())? + { + chunk.iv = iv; + chunk.set_key(key); + } if file_size == 0 { let mut req = TabletSnapshotRequest::default(); req.set_chunk(chunk); @@ -586,7 +689,8 @@ async fn send_missing( continue; } - let mut f = File::open(path.join(&chunk.file_name))?; + // Send encrypted content. + let mut f = File::open(&file_path)?; loop { let to_read = cmp::min(FILE_CHUNK_LEN as u64, file_size) as usize; read_to(&mut f, &mut chunk.data, to_read, limiter).await?; @@ -606,49 +710,16 @@ async fn send_missing( Ok((total_sent, digest.sum64())) } -async fn send_snap_files( - mgr: &TabletSnapManager, - mut sender: impl Sink<(TabletSnapshotRequest, WriteFlags), Error = Error> + Unpin, - receiver: &mut (impl Stream> + Unpin), - msg: RaftMessage, - key: TabletSnapKey, - limiter: Limiter, -) -> Result { - let region_id = key.region_id; - let to_peer = key.to_peer; - let path = mgr.tablet_gen_path(&key); - info!("begin to send snapshot file"; "snap_key" => %key, "region_id" => region_id, "to_peer" => to_peer); - let io_type = io_type_from_raft_message(&msg)?; - let _with_io_type = WithIoType::new(io_type); - let mut head = TabletSnapshotRequest::default(); - head.mut_head().set_message(msg); - let missing = find_missing(&path, head, &mut sender, receiver, &limiter).await?; - let (total_sent, checksum) = send_missing(&path, missing, &mut sender, &limiter).await?; - // In gRPC, stream in serverside can finish without error (when the connection - // is closed). So we need to use an explicit `Done` to indicate all messages - // are sent. In V1, we have checksum and meta list, so this is not a - // problem. - let mut req = TabletSnapshotRequest::default(); - req.mut_end().set_checksum(checksum); - sender.send((req, WriteFlags::default())).await?; - info!("sent all snap file finish"; "snap_key" => %key, "region_id" => region_id, "to_peer" => to_peer); - sender.close().await?; - Ok(total_sent) -} - /// Send the snapshot to specified address. /// /// It will first send the normal raft snapshot message and then send the /// snapshot file. -pub fn send_snap( - env: Arc, - mgr: TabletSnapManager, - security_mgr: Arc, - cfg: &Config, - addr: &str, +pub async fn send_snap( + client: TikvClient, + snap_mgr: TabletSnapManager, msg: RaftMessage, limiter: Limiter, -) -> Result>> { +) -> Result { assert!(msg.get_message().has_snapshot()); let timer = Instant::now(); let send_timer = SEND_SNAP_HISTOGRAM.start_coarse_timer(); @@ -658,45 +729,55 @@ pub fn send_snap( msg.get_message().get_snapshot(), ); let deregister = { - let (mgr, key) = (mgr.clone(), key.clone()); + let (snap_mgr, key) = (snap_mgr.clone(), key.clone()); DeferContext::new(move || { - mgr.finish_snapshot(key.clone(), timer); - mgr.delete_snapshot(&key); + snap_mgr.finish_snapshot(key.clone(), timer); + snap_mgr.delete_snapshot(&key); }) }; - - let cb = ChannelBuilder::new(env) - .stream_initial_window_size(cfg.grpc_stream_initial_window_size.0 as i32) - .keepalive_time(cfg.grpc_keepalive_time.0) - .keepalive_timeout(cfg.grpc_keepalive_timeout.0) - .default_compression_algorithm(cfg.grpc_compression_algorithm()) - .default_gzip_compression_level(cfg.grpc_gzip_compression_level) - .default_grpc_min_message_size_to_compress(cfg.grpc_min_message_size_to_compress); - - let channel = security_mgr.connect(cb, addr); - let client = TikvClient::new(channel); let (sink, mut receiver) = client.tablet_snapshot()?; - let send_task = async move { - let sink = sink.sink_map_err(Error::from); - let total_size = - send_snap_files(&mgr, sink, &mut receiver, msg, key.clone(), limiter).await?; - let recv_result = receiver.next().await; - send_timer.observe_duration(); - drop(client); - drop(deregister); - match recv_result { - None => Ok(SendStat { - key, - total_size, - elapsed: timer.saturating_elapsed(), - }), - Some(Err(e)) => Err(e.into()), - Some(Ok(resp)) => Err(Error::Other( - format!("receive unexpected response {:?}", resp).into(), - )), - } - }; - Ok(send_task) + let mut sink = sink.sink_map_err(Error::from); + let path = snap_mgr.tablet_gen_path(&key); + info!("begin to send snapshot file"; "snap_key" => %key); + let io_type = io_type_from_raft_message(&msg)?; + let _with_io_type = WithIoType::new(io_type); + let mut head = TabletSnapshotRequest::default(); + head.mut_head().set_message(msg); + let missing = find_missing( + &path, + head, + &mut sink, + &mut receiver, + &limiter, + snap_mgr.key_manager(), + ) + .await?; + let (total_size, checksum) = + send_missing(&path, missing, &mut sink, &limiter, snap_mgr.key_manager()).await?; + // In gRPC, stream in serverside can finish without error (when the connection + // is closed). So we need to use an explicit `Done` to indicate all messages + // are sent. In V1, we have checksum and meta list, so this is not a + // problem. + let mut req = TabletSnapshotRequest::default(); + req.mut_end().set_checksum(checksum); + sink.send((req, WriteFlags::default())).await?; + info!("sent all snap file finish"; "snap_key" => %key); + sink.close().await?; + let recv_result = receiver.next().await; + send_timer.observe_duration(); + drop(client); + drop(deregister); + match recv_result { + None => Ok(SendStat { + key, + total_size, + elapsed: timer.saturating_elapsed(), + }), + Some(Err(e)) => Err(e.into()), + Some(Ok(resp)) => Err(Error::Other( + format!("receive unexpected response {:?}", resp).into(), + )), + } } pub struct TabletRunner { @@ -804,12 +885,14 @@ where } SNAP_TASK_COUNTER_STATIC.recv.inc(); + recving_count.fetch_add(1, Ordering::SeqCst); + let snap_mgr = self.snap_mgr.clone(); let raft_router = self.raft_router.clone(); - recving_count.fetch_add(1, Ordering::SeqCst); let limiter = self.limiter.clone(); let cache_builder = self.cache_builder.clone(); - let task = async move { + + self.pool.spawn(async move { let result = recv_snap( stream, sink, @@ -824,8 +907,7 @@ where if let Err(e) = result { error!("failed to recv snapshot"; "err" => %e); } - }; - self.pool.spawn(task); + }); } Task::Send { addr, msg, cb } => { let region_id = msg.get_region_id(); @@ -846,25 +928,31 @@ where } SNAP_TASK_COUNTER_STATIC.send.inc(); - let env = Arc::clone(&self.env); - let mgr = self.snap_mgr.clone(); - let security_mgr = Arc::clone(&self.security_mgr); sending_count.fetch_add(1, Ordering::SeqCst); + + let snap_mgr = self.snap_mgr.clone(); + let security_mgr = Arc::clone(&self.security_mgr); let limiter = self.limiter.clone(); - let send_task = send_snap( - env, - mgr, - security_mgr, - &self.cfg.clone(), - &addr, - msg, - limiter, - ); - let task = async move { - let res = match send_task { - Err(e) => Err(e), - Ok(f) => f.await, - }; + + let channel_builder = ChannelBuilder::new(self.env.clone()) + .stream_initial_window_size(self.cfg.grpc_stream_initial_window_size.0 as i32) + .keepalive_time(self.cfg.grpc_keepalive_time.0) + .keepalive_timeout(self.cfg.grpc_keepalive_timeout.0) + .default_compression_algorithm(self.cfg.grpc_compression_algorithm()) + .default_gzip_compression_level(self.cfg.grpc_gzip_compression_level) + .default_grpc_min_message_size_to_compress( + self.cfg.grpc_min_message_size_to_compress, + ); + let channel = security_mgr.connect(channel_builder, &addr); + let client = TikvClient::new(channel); + + self.pool.spawn(async move { + let res = send_snap( + client, + snap_mgr, + msg, + limiter, + ).await; match res { Ok(stat) => { info!( @@ -882,9 +970,7 @@ where } }; sending_count.fetch_sub(1, Ordering::SeqCst); - }; - - self.pool.spawn(task); + }); } Task::RefreshConfigEvent => { self.refresh_cfg(); @@ -917,17 +1003,36 @@ pub fn copy_tablet_snapshot( let recv_path = recver_snap_mgr.tmp_recv_path(&recv_context.key); fs::create_dir_all(&recv_path)?; + let mut key_importer = recver_snap_mgr + .key_manager() + .as_deref() + .map(|m| DataKeyImporter::new(m)); for path in files { - let sender_name = path.file_name().unwrap().to_str().unwrap(); - let mut sender_f = File::open(&path)?; - - let recv_p = recv_path.join(sender_name); - let mut recv_f = File::create(recv_p)?; - - while io::copy(&mut sender_f, &mut recv_f)? != 0 {} + let recv = recv_path.join(path.file_name().unwrap()); + std::fs::copy(&path, &recv)?; + if let Some(m) = sender_snap_mgr.key_manager() + && let Some((iv, key)) = m.get_file_internal(path.to_str().unwrap())? + { + key_importer.as_mut().unwrap().add(recv.to_str().unwrap(), iv, key).unwrap(); + } + } + if let Some(i) = key_importer { + i.commit().unwrap(); } let final_path = recver_snap_mgr.final_recv_path(&recv_context.key); - fs::rename(&recv_path, final_path)?; + if let Some(m) = recver_snap_mgr.key_manager() { + m.link_file(recv_path.to_str().unwrap(), final_path.to_str().unwrap())?; + } + fs::rename(&recv_path, &final_path).map_err(|e| { + if let Some(m) = recver_snap_mgr.key_manager() { + let _ = m.delete_file(final_path.to_str().unwrap()); + } + e + })?; + if let Some(m) = recver_snap_mgr.key_manager() { + m.delete_file(recv_path.to_str().unwrap())?; + } + Ok(()) } diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index c790d10be45..d69f4aa70f0 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -15,8 +15,11 @@ use collections::HashMap; use engine_traits::{Checkpointer, KvEngine, RaftEngineReadOnly}; use file_system::{IoOp, IoType}; use futures::executor::block_on; -use grpcio::Environment; -use kvproto::raft_serverpb::*; +use grpcio::{self, ChannelBuilder, Environment}; +use kvproto::{ + raft_serverpb::{RaftMessage, RaftSnapshotData}, + tikvpb::TikvClient, +}; use raft::eraftpb::{Message, MessageType, Snapshot}; use raftstore::{ coprocessor::{ApplySnapshotObserver, BoxApplySnapshotObserver, Coprocessor, CoprocessorHost}, @@ -869,21 +872,14 @@ fn test_v1_apply_snap_from_v2() { let tablet_snap_mgr = cluster_v2.get_snap_mgr(1); let security_mgr = cluster_v2.get_security_mgr(); let (msg, snap_key) = generate_snap(&engine, region_id, &tablet_snap_mgr); - let cfg = tikv::server::Config::default(); let limit = Limiter::new(f64::INFINITY); let env = Arc::new(Environment::new(1)); let _ = block_on(async { - send_snap_v2( - env.clone(), - tablet_snap_mgr.clone(), - security_mgr.clone(), - &cfg, - &s1_addr, - msg, - limit.clone(), - ) - .unwrap() - .await + let client = + TikvClient::new(security_mgr.connect(ChannelBuilder::new(env.clone()), &s1_addr)); + send_snap_v2(client, tablet_snap_mgr.clone(), msg, limit.clone()) + .await + .unwrap() }); let snap_mgr = cluster_v1.get_snap_mgr(region_id); @@ -900,17 +896,11 @@ fn test_v1_apply_snap_from_v2() { let region_id = region.get_id(); let (msg, snap_key) = generate_snap(&engine, region_id, &tablet_snap_mgr); let _ = block_on(async { - send_snap_v2( - env, - tablet_snap_mgr, - security_mgr, - &cfg, - &s1_addr, - msg, - limit, - ) - .unwrap() - .await + let client = + TikvClient::new(security_mgr.connect(ChannelBuilder::new(env.clone()), &s1_addr)); + send_snap_v2(client, tablet_snap_mgr, msg, limit) + .await + .unwrap() }); let snap_mgr = cluster_v1.get_snap_mgr(region_id); diff --git a/tests/integrations/raftstore/test_v1_v2_mixed.rs b/tests/integrations/raftstore/test_v1_v2_mixed.rs index 1514529b209..27e2173e04d 100644 --- a/tests/integrations/raftstore/test_v1_v2_mixed.rs +++ b/tests/integrations/raftstore/test_v1_v2_mixed.rs @@ -8,8 +8,11 @@ use std::{ use engine_rocks::{RocksCfOptions, RocksDbOptions}; use engine_traits::{Checkpointer, KvEngine, Peekable, SyncMutable, LARGE_CFS}; use futures::executor::block_on; -use grpcio::Environment; -use kvproto::raft_serverpb::{RaftMessage, *}; +use grpcio::{ChannelBuilder, Environment}; +use kvproto::{ + raft_serverpb::{RaftMessage, *}, + tikvpb::TikvClient, +}; use raft::eraftpb::{MessageType, Snapshot}; use raftstore::{ errors::Result, @@ -132,26 +135,19 @@ fn test_v1_receive_snap_from_v2() { let snap_mgr = cluster_v2.get_snap_mgr(1); let security_mgr = cluster_v2.get_security_mgr(); let (msg, snap_key) = generate_snap(&engine, region_id, &snap_mgr); - let cfg = tikv::server::Config::default(); let limit = Limiter::new(f64::INFINITY); let env = Arc::new(Environment::new(1)); let _ = block_on(async { - send_snap_v2( - env.clone(), - snap_mgr.clone(), - security_mgr.clone(), - &cfg, - &s1_addr, - msg.clone(), - limit.clone(), - ) - .unwrap() - .await + let client = + TikvClient::new(security_mgr.connect(ChannelBuilder::new(env.clone()), &s1_addr)); + send_snap_v2(client, snap_mgr.clone(), msg.clone(), limit.clone()) + .await + .unwrap() }); let send_result = block_on(async { - send_snap_v2(env, snap_mgr, security_mgr, &cfg, &s2_addr, msg, limit) - .unwrap() - .await + let client = + TikvClient::new(security_mgr.connect(ChannelBuilder::new(env.clone()), &s2_addr)); + send_snap_v2(client, snap_mgr, msg, limit).await }); // snapshot should be rejected by cluster v1 tikv, and the snapshot should be // deleted. From 523ff95f6a652a5d9d8e199ebd689147f0908e33 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 16 May 2023 19:07:17 +0800 Subject: [PATCH 0689/1149] raftstore-v2: check locks on step read index (#14736) ref tikv/tikv#14701 raftstore-v2: check locks on step read index Signed-off-by: Neil Shen --- components/raftstore-v2/src/batch/store.rs | 10 + .../src/operation/command/admin/split.rs | 5 + .../raftstore-v2/src/operation/command/mod.rs | 7 + .../raftstore-v2/src/operation/query/lease.rs | 54 ++++- .../raftstore-v2/src/operation/query/local.rs | 14 +- .../raftstore-v2/src/operation/query/mod.rs | 7 +- .../src/operation/query/replica.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 18 +- .../src/operation/ready/snapshot.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 4 + components/raftstore-v2/src/raft/peer.rs | 9 +- components/raftstore/src/store/peer.rs | 12 +- components/raftstore/src/store/worker/read.rs | 16 +- components/test_raftstore-v2/src/cluster.rs | 71 ++++--- components/test_raftstore-v2/src/node.rs | 6 +- components/test_raftstore-v2/src/server.rs | 7 +- components/test_raftstore-v2/src/util.rs | 49 ++++- components/test_raftstore/src/cluster.rs | 20 +- components/test_raftstore/src/util.rs | 54 +++-- components/tikv_util/src/mpsc/future.rs | 15 +- .../cases/test_cmd_epoch_checker.rs | 55 ++--- tests/failpoints/cases/test_conf_change.rs | 2 +- tests/failpoints/cases/test_disk_full.rs | 14 +- tests/failpoints/cases/test_merge.rs | 2 +- tests/failpoints/cases/test_replica_read.rs | 194 +++++++++--------- tests/failpoints/cases/test_stale_read.rs | 2 +- .../integrations/raftstore/test_hibernate.rs | 2 +- .../raftstore/test_joint_consensus.rs | 15 +- .../integrations/raftstore/test_lease_read.rs | 10 +- tests/integrations/raftstore/test_merge.rs | 4 +- .../raftstore/test_replica_read.rs | 68 +++--- .../raftstore/test_replication_mode.rs | 34 ++- tests/integrations/storage/test_raftkv.rs | 2 +- 33 files changed, 489 insertions(+), 296 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index d77786c9e6f..a4d42eec167 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -224,6 +224,16 @@ impl PollHandler>) -> HandleResult { + fail::fail_point!( + "pause_on_peer_collect_message", + fsm.deref_mut().peer().peer_id() == 1, + |_| unreachable!() + ); + fail::fail_point!( + "on_peer_collect_message_2", + fsm.deref_mut().peer().peer_id() == 2, + |_| unreachable!() + ); debug_assert!(self.peer_msg_buf.is_empty()); let batch_size = self.messages_per_tick(); let received_cnt = fsm.recv(&mut self.peer_msg_buf, batch_size); diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0bb3abcec67..9dbd27f336d 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -400,6 +400,11 @@ impl Apply { self.peer().get_store_id() == 3, |_| { unreachable!() } ); + fail_point!( + "apply_before_split_1_3", + self.peer_id() == 3 && self.region_id() == 1, + |_| { unreachable!() } + ); PEER_ADMIN_CMD_COUNTER.batch_split.all.inc(); let region = self.region(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 4b0e25caa8b..6643fe1558f 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -574,6 +574,13 @@ impl Apply { entry.get_term(), ) { Ok(decoder) => { + fail::fail_point!( + "on_apply_write_cmd", + cfg!(release) || self.peer_id() == 3, + |_| { + unimplemented!(); + } + ); util::compare_region_epoch( decoder.header().get_region_epoch(), self.region(), diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index d0bd7b9e7ac..f76d724f06c 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -4,6 +4,10 @@ use std::sync::Mutex; use engine_traits::{KvEngine, RaftEngine}; use kvproto::raft_cmdpb::RaftCmdRequest; +use raft::{ + eraftpb::{self, MessageType}, + Storage, +}; use raftstore::{ store::{ can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, @@ -25,6 +29,43 @@ use crate::{ }; impl Peer { + pub fn on_step_read_index( + &mut self, + ctx: &mut StoreContext, + m: &mut eraftpb::Message, + ) -> bool { + assert_eq!(m.get_msg_type(), MessageType::MsgReadIndex); + + fail::fail_point!("on_step_read_index_msg"); + ctx.coprocessor_host + .on_step_read_index(m, self.state_role()); + // Must use the commit index of `PeerStorage` instead of the commit index + // in raft-rs which may be greater than the former one. + // For more details, see the annotations above `on_leader_commit_idx_changed`. + let index = self.storage().entry_storage().commit_index(); + // Check if the log term of this index is equal to current term, if so, + // this index can be used to reply the read index request if the leader holds + // the lease. Please also take a look at raft-rs. + if self.storage().term(index).unwrap() == self.term() { + let state = self.inspect_lease(); + if let LeaseState::Valid = state { + // If current peer has valid lease, then we could handle the + // request directly, rather than send a heartbeat to check quorum. + let mut resp = eraftpb::Message::default(); + resp.set_msg_type(MessageType::MsgReadIndexResp); + resp.term = self.term(); + resp.to = m.from; + + resp.index = index; + resp.set_entries(m.take_entries()); + + self.raft_group_mut().raft.msgs.push(resp); + return true; + } + } + false + } + pub fn pre_read_index(&self) -> Result<()> { fail::fail_point!("before_propose_readindex", |s| if s .map_or(true, |s| s.parse().unwrap_or(true)) @@ -88,7 +129,7 @@ impl Peer { .get_mut(0) .filter(|req| req.has_read_index()) .map(|req| req.take_read_index()); - let (id, dropped) = propose_read_index(self.raft_group_mut(), request.as_ref(), None); + let (id, dropped) = propose_read_index(self.raft_group_mut(), request.as_ref()); if dropped { // The message gets dropped silently, can't be handled anymore. notify_stale_req(self.term(), ch); @@ -198,7 +239,7 @@ impl Peer { let term = self.term(); self.leader_lease_mut() .maybe_new_remote_lease(term) - .map(ReadProgress::leader_lease) + .map(ReadProgress::set_leader_lease) }; if let Some(progress) = progress { let mut meta = store_meta.lock().unwrap(); @@ -212,6 +253,15 @@ impl Peer { } } + // Expire lease and unset lease in read delegate on role changed to follower. + pub(crate) fn expire_lease_on_became_follower(&mut self, store_meta: &Mutex>) { + self.leader_lease_mut().expire(); + let mut meta = store_meta.lock().unwrap(); + if let Some((reader, _)) = meta.readers.get_mut(&self.region_id()) { + self.maybe_update_read_progress(reader, ReadProgress::unset_leader_lease()); + } + } + pub(crate) fn maybe_update_read_progress( &self, reader: &mut ReadDelegate, diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 32a8960f18e..9101b1850e8 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -340,7 +340,17 @@ where Some(query_res) => { if query_res.read().is_none() { let QueryResult::Response(res) = query_res else { unreachable!() }; - assert!(res.get_header().has_error(), "{:?}", res); + // Get an error explicitly in header, + // or leader reports KeyIsLocked error via read index. + assert!( + res.get_header().has_error() + || res + .get_responses() + .get(0) + .map_or(false, |r| r.get_read_index().has_locked()), + "{:?}", + res + ); return Err(res); } } @@ -906,7 +916,7 @@ mod tests { .get_mut(&1) .unwrap() .0 - .update(ReadProgress::leader_lease(remote)); + .update(ReadProgress::set_leader_lease(remote)); }), rx, ch_tx.clone(), diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 5c42d4dfa8e..6e130a085dd 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -164,7 +164,12 @@ impl Peer { return Err(e); } - // TODO: check applying snapshot + // Check whether the peer is initialized. + if !self.storage().is_initialized() { + raft_metrics.invalid_proposal.region_not_initialized.inc(); + let region_id = msg.get_header().get_region_id(); + return Err(Error::RegionNotInitialized(region_id)); + } // Check whether the term is stale. if let Err(e) = util::check_term(msg.get_header(), self.term()) { diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index 0565d985925..b4edbd2097a 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -73,7 +73,7 @@ impl Peer { .filter(|req| req.has_read_index()) .map(|req| req.take_read_index()); // No need to check `dropped` as it only meaningful for leader. - let (id, _dropped) = propose_read_index(self.raft_group_mut(), request.as_ref(), None); + let (id, _dropped) = propose_read_index(self.raft_group_mut(), request.as_ref()); let now = monotonic_raw_now(); let mut read = ReadIndexRequest::with_command(id, req, ch, now); read.addition_request = request.map(Box::new); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index b874fd773b7..c0772eda0b7 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -298,7 +298,7 @@ impl Peer { if self.is_leader() { self.add_peer_heartbeat(from_peer.get_id(), Instant::now()); } - // We only cache peer with an vaild ID. + // We only cache peer with an valid ID. // It prevents cache peer(0,0) which is sent by region split. self.insert_peer_cache(from_peer); } @@ -316,6 +316,14 @@ impl Peer { ctx.raft_metrics.message_dropped.stale_msg.inc(); return; } + if msg.get_message().get_msg_type() == MessageType::MsgReadIndex + && self.is_leader() + && self.on_step_read_index(ctx, msg.mut_message()) + { + // Read index has respond in `on_step_read_index`. + return; + } + // As this peer is already created, the empty split message is meaningless. if is_empty_split_message(&msg) { ctx.raft_metrics.message_dropped.stale_msg.inc(); @@ -611,6 +619,12 @@ impl Peer { |entry| entry.index == self.raft_group().raft.raft_log.last_index() )); + fail::fail_point!( + "before_handle_snapshot_ready_3", + self.peer_id() == 3 && self.get_pending_snapshot().is_some(), + |_| () + ); + self.on_role_changed(ctx, &ready); if let Some(hs) = ready.hs() { @@ -903,7 +917,7 @@ impl Peer { self.maybe_schedule_gc_peer_tick(); } StateRole::Follower => { - self.leader_lease_mut().expire(); + self.expire_lease_on_became_follower(&ctx.store_meta); self.storage_mut().cancel_generating_snap(None); self.txn_context() .on_became_follower(self.term(), self.region()); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index d592ddea423..696bf025984 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -661,6 +661,7 @@ impl Storage { let reg = reg.clone(); let key_manager = snap_mgr.key_manager().clone(); let hook = move || { + fail::fail_point!("region_apply_snap"); if !install_tablet(®, key_manager.as_deref(), &path, region_id, last_index) { slog_panic!( logger, diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index b3d74109c8d..155afbfc1a3 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -211,6 +211,10 @@ impl Apply { self.region().get_id() } + pub fn peer_id(&self) -> u64 { + self.peer.get_id() + } + /// The tablet can't be public yet, otherwise content of latest tablet /// doesn't matches its epoch in both readers and peer fsm. #[inline] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index ae6d6f5bf81..1500737da3b 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -16,7 +16,7 @@ use kvproto::{ pdpb, raft_serverpb::RaftMessage, }; -use raft::{RawNode, StateRole}; +use raft::{eraftpb, RawNode, StateRole}; use raftstore::{ coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason}, store::{ @@ -285,7 +285,7 @@ impl Peer { if let Some(progress) = self .leader_lease .maybe_new_remote_lease(self.term()) - .map(ReadProgress::leader_lease) + .map(ReadProgress::set_leader_lease) { self.maybe_update_read_progress(reader, progress); } @@ -432,6 +432,11 @@ impl Peer { self.raft_group.raft.raft_log.persisted } + #[inline] + pub fn get_pending_snapshot(&self) -> Option<&eraftpb::Snapshot> { + self.raft_group.snap() + } + #[inline] pub fn self_stat(&self) -> &PeerStat { &self.self_stat diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8dc69a0def4..f21fccddff5 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -27,7 +27,7 @@ use fail::fail_point; use getset::{Getters, MutGetters}; use kvproto::{ errorpb, - kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp, LockInfo}, + kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb::{self, PeerRole}, pdpb::{self, PeerStats}, raft_cmdpb::{ @@ -571,13 +571,12 @@ pub fn start_unsafe_recovery_report( pub fn propose_read_index( raft_group: &mut RawNode, request: Option<&raft_cmdpb::ReadIndexRequest>, - locked: Option<&LockInfo>, ) -> (Uuid, bool) { let last_pending_read_count = raft_group.raft.pending_read_count(); let last_ready_read_count = raft_group.raft.ready_read_count(); let id = Uuid::new_v4(); - raft_group.read_index(ReadIndexContext::fields_to_bytes(id, request, locked)); + raft_group.read_index(ReadIndexContext::fields_to_bytes(id, request, None)); let pending_read_count = raft_group.raft.pending_read_count(); let ready_read_count = raft_group.raft.ready_read_count(); @@ -3614,7 +3613,7 @@ where let term = self.term(); self.leader_lease .maybe_new_remote_lease(term) - .map(ReadProgress::leader_lease) + .map(ReadProgress::set_leader_lease) }; if let Some(progress) = progress { let mut meta = ctx.store_meta.lock().unwrap(); @@ -4051,7 +4050,7 @@ where .get_mut(0) .filter(|req| req.has_read_index()) .map(|req| req.take_read_index()); - let (id, dropped) = self.propose_read_index(request.as_ref(), None); + let (id, dropped) = self.propose_read_index(request.as_ref()); if dropped && self.is_leader() { // The message gets dropped silently, can't be handled anymore. apply::notify_stale_req(self.term(), cb); @@ -4098,9 +4097,8 @@ where pub fn propose_read_index( &mut self, request: Option<&raft_cmdpb::ReadIndexRequest>, - locked: Option<&LockInfo>, ) -> (Uuid, bool) { - propose_read_index(&mut self.raft_group, request, locked) + propose_read_index(&mut self.raft_group, request) } /// Returns (minimal matched, minimal committed_index) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 022bd457cd5..49171123f4a 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -496,7 +496,7 @@ impl ReadDelegate { self.applied_term = applied_term; } Progress::LeaderLease(leader_lease) => { - self.leader_lease = Some(leader_lease); + self.leader_lease = leader_lease; } Progress::RegionBuckets(bucket_meta) => { self.bucket_meta = Some(bucket_meta); @@ -631,7 +631,7 @@ pub enum Progress { Region(metapb::Region), Term(u64), AppliedTerm(u64), - LeaderLease(RemoteLease), + LeaderLease(Option), RegionBuckets(Arc), WaitData(bool), } @@ -649,8 +649,12 @@ impl Progress { Progress::AppliedTerm(applied_term) } - pub fn leader_lease(lease: RemoteLease) -> Progress { - Progress::LeaderLease(lease) + pub fn set_leader_lease(lease: RemoteLease) -> Progress { + Progress::LeaderLease(Some(lease)) + } + + pub fn unset_leader_lease() -> Progress { + Progress::LeaderLease(None) } pub fn region_buckets(bucket_meta: Arc) -> Progress { @@ -1536,7 +1540,7 @@ mod tests { cmd.mut_header().set_term(term6 + 3); lease.expire_remote_lease(); let remote_lease = lease.maybe_new_remote_lease(term6 + 3).unwrap(); - let pg = Progress::leader_lease(remote_lease); + let pg = Progress::set_leader_lease(remote_lease); { let mut meta = store_meta.lock().unwrap(); meta.readers.get_mut(&1).unwrap().update(pg); @@ -1668,7 +1672,7 @@ mod tests { { let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. let remote = lease.maybe_new_remote_lease(3).unwrap(); - let pg = Progress::leader_lease(remote); + let pg = Progress::set_leader_lease(remote); let mut meta = store_meta.lock().unwrap(); meta.readers.get_mut(&1).unwrap().update(pg); } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 41d3e563cbf..89971ca1c80 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -17,7 +17,9 @@ use engine_traits::{ TabletRegistry, CF_DEFAULT, }; use file_system::IoRateLimiter; -use futures::{compat::Future01CompatExt, executor::block_on, select, Future, FutureExt}; +use futures::{ + compat::Future01CompatExt, executor::block_on, future::BoxFuture, select, Future, FutureExt, +}; use keys::{data_key, validate_data_key, DATA_PREFIX_KEY}; use kvproto::{ errorpb::Error as PbError, @@ -48,15 +50,17 @@ use resource_control::ResourceGroupManager; use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{ - is_error_response, new_admin_request, new_delete_cmd, new_delete_range_cmd, new_get_cf_cmd, - new_peer, new_prepare_merge, new_put_cf_cmd, new_region_detail_cmd, new_region_leader_cmd, - new_request, new_snap_cmd, new_status_request, new_store, new_tikv_config_with_api_ver, - new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, PartitionFilterFactory, - RawEngine, + check_raft_cmd_request, is_error_response, new_admin_request, new_delete_cmd, + new_delete_range_cmd, new_get_cf_cmd, new_peer, new_prepare_merge, new_put_cf_cmd, + new_region_detail_cmd, new_region_leader_cmd, new_request, new_status_request, new_store, + new_tikv_config_with_api_ver, new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, + PartitionFilterFactory, RawEngine, }; use tikv::{server::Result as ServerResult, storage::config::EngineType}; use tikv_util::{ - box_err, box_try, debug, error, safe_panic, + box_err, box_try, debug, error, + future::block_on_timeout, + safe_panic, thread_group::GroupProperties, time::{Instant, ThreadReadId}, timer::GLOBAL_TIMER_HANDLE, @@ -101,12 +105,13 @@ pub trait Simulator { fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()>; fn read(&mut self, request: RaftCmdRequest, timeout: Duration) -> Result { + let node_id = request.get_header().get_peer().get_store_id(); let timeout_f = GLOBAL_TIMER_HANDLE .delay(std::time::Instant::now() + timeout) .compat(); futures::executor::block_on(async move { futures::select! { - res = self.async_read(request).fuse() => res, + res = self.async_read(node_id, request).fuse() => res, e = timeout_f.fuse() => { Err(Error::Timeout(format!("request timeout for {:?}: {:?}", timeout,e))) }, @@ -116,12 +121,13 @@ pub trait Simulator { fn async_read( &mut self, + node_id: u64, request: RaftCmdRequest, ) -> impl Future> + Send + 'static { let mut req_clone = request.clone(); - req_clone.clear_requests(); - req_clone.mut_requests().push(new_snap_cmd()); - let snap = self.async_snapshot(req_clone); + // raftstore v2 only supports snap request. + req_clone.mut_requests()[0].set_cmd_type(CmdType::Snap); + let snap = self.async_snapshot(node_id, req_clone); async move { match snap.await { Ok(snap) => { @@ -184,6 +190,7 @@ pub trait Simulator { fn async_snapshot( &mut self, + node_id: u64, request: RaftCmdRequest, ) -> impl Future, RaftCmdResponse>> + Send @@ -203,7 +210,7 @@ pub trait Simulator { timeout: Duration, ) -> Result { let region_id = request.get_header().get_region_id(); - let (msg, sub) = PeerMsg::raft_query(request); + let (msg, sub) = PeerMsg::raft_query(request.clone()); match self.async_peer_msg_on_node(node_id, region_id, msg) { Ok(()) => {} Err(e) => { @@ -213,17 +220,17 @@ pub trait Simulator { } } - let timeout_f = GLOBAL_TIMER_HANDLE.delay(std::time::Instant::now() + timeout); - // todo: unwrap? - match block_on(async move { - select! { - res = sub.result().fuse() => Ok(res.unwrap()), - _ = timeout_f.compat().fuse() => Err(Error::Timeout(format!("request timeout for {:?}", timeout))), - + let mut fut = Box::pin(sub.result()); + match block_on_timeout(fut.as_mut(), timeout) + .map_err(|e| Error::Timeout(format!("request timeout for {:?}: {:?}", timeout, e)))? + { + Some(QueryResult::Read(_)) => unreachable!(), + Some(QueryResult::Response(resp)) => Ok(resp), + None => { + error!("call_query_on_node receives none response"; "request" => ?request); + // Do not unwrap here, sometimes raftstore v2 may return none. + return Err(box_err!("receives none response {:?}", request)); } - }).unwrap() { - QueryResult::Read(_) => unreachable!(), - QueryResult::Response(resp) => Ok(resp), } } @@ -289,10 +296,20 @@ pub trait Simulator { }) } - fn async_command_on_node(&self, node_id: u64, mut request: RaftCmdRequest) { + fn async_command_on_node( + &mut self, + node_id: u64, + mut request: RaftCmdRequest, + ) -> BoxFuture<'static, RaftCmdResponse> { let region_id = request.get_header().get_region_id(); - let (msg, _sub) = if request.has_admin_request() { + let is_read = check_raft_cmd_request(&request); + if is_read { + let fut = self.async_read(node_id, request); + return Box::pin(async move { fut.await.unwrap() }); + } + + let (msg, sub) = if request.has_admin_request() { PeerMsg::admin_command(request) } else { let requests = request.get_requests(); @@ -318,6 +335,7 @@ pub trait Simulator { self.async_peer_msg_on_node(node_id, region_id, msg) .unwrap(); + Box::pin(async move { sub.result().await.unwrap() }) } } @@ -1493,8 +1511,9 @@ impl, EK: KvEngine> Cluster { let mut req = self.new_prepare_merge(source, target); let leader = self.leader_of_region(source).unwrap(); req.mut_header().set_peer(leader.clone()); - self.sim - .rl() + let _ = self + .sim + .wl() .async_command_on_node(leader.get_store_id(), req); } diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 86b48715301..535306c5ae9 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -164,6 +164,10 @@ impl NodeCluster { snap_mgrs: HashMap::default(), } } + + pub fn get_concurrency_manager(&self, node_id: u64) -> ConcurrencyManager { + self.concurrency_managers.get(&node_id).unwrap().clone() + } } impl Simulator for NodeCluster { @@ -355,11 +359,11 @@ impl Simulator for NodeCluster { fn async_snapshot( &mut self, + node_id: u64, request: RaftCmdRequest, ) -> impl Future, RaftCmdResponse>> + Send + 'static { - let node_id = request.get_header().get_peer().get_store_id(); if !self .trans .core diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index b95313e8cdd..38bc43af526 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -810,10 +810,11 @@ impl Simulator for ServerCluster { fn async_snapshot( &mut self, + node_id: u64, request: kvproto::raft_cmdpb::RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send - { - let node_id = request.get_header().get_peer().get_store_id(); + ) -> impl Future, RaftCmdResponse>> + + Send + + 'static { let mut router = match self.metas.get(&node_id) { None => { let mut resp = RaftCmdResponse::default(); diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 312d9be99c4..77117553eec 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -9,7 +9,10 @@ use engine_traits::{CfName, KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; use futures::future::BoxFuture; use kvproto::{ - encryptionpb::EncryptionMethod, kvrpcpb::Context, metapb, raft_cmdpb::RaftCmdResponse, + encryptionpb::EncryptionMethod, + kvrpcpb::Context, + metapb, + raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, }; use raftstore::Result; use rand::{prelude::SliceRandom, RngCore}; @@ -21,10 +24,11 @@ use tikv::{ storage::{ config::EngineType, kv::{SnapContext, SnapshotExt}, - Engine, Snapshot, + point_key_range, Engine, Snapshot, }, }; use tikv_util::{config::ReadableDuration, worker::LazyWorker, HandyRwLock}; +use txn_types::Key; use crate::{bootstrap_store, cluster::Cluster, ServerCluster, Simulator}; @@ -234,7 +238,7 @@ pub fn async_read_on_peer, EK: KvEngine>( key: &[u8], read_quorum: bool, replica_read: bool, -) -> BoxFuture<'static, Result> { +) -> BoxFuture<'static, RaftCmdResponse> { let mut request = new_request( region.get_id(), region.get_region_epoch().clone(), @@ -243,8 +247,43 @@ pub fn async_read_on_peer, EK: KvEngine>( ); request.mut_header().set_peer(peer); request.mut_header().set_replica_read(replica_read); - let f = cluster.sim.wl().async_read(request); - Box::pin(async move { f.await }) + let node_id = request.get_header().get_peer().get_store_id(); + let f = cluster.sim.wl().async_read(node_id, request); + Box::pin(async move { f.await.unwrap() }) +} + +pub fn async_read_index_on_peer, EK: KvEngine>( + cluster: &mut Cluster, + peer: metapb::Peer, + region: metapb::Region, + key: &[u8], + read_quorum: bool, +) -> BoxFuture<'static, RaftCmdResponse> { + let mut cmd = new_get_cmd(key); + cmd.mut_read_index().set_start_ts(u64::MAX); + cmd.mut_read_index() + .mut_key_ranges() + .push(point_key_range(Key::from_raw(key))); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![cmd], + read_quorum, + ); + // Use replica read to issue a read index. + request.mut_header().set_replica_read(true); + request.mut_header().set_peer(peer); + let node_id = request.get_header().get_peer().get_store_id(); + let f = cluster.sim.wl().async_read(node_id, request); + Box::pin(async move { f.await.unwrap() }) +} + +pub fn async_command_on_node, EK: KvEngine>( + cluster: &mut Cluster, + node_id: u64, + request: RaftCmdRequest, +) -> BoxFuture<'static, RaftCmdResponse> { + cluster.sim.wl().async_command_on_node(node_id, request) } pub fn test_delete_range, EK: KvEngine>(cluster: &mut Cluster, cf: CfName) { diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 988625d3750..7ea8ee81f8f 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -51,6 +51,7 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::server::Result as ServerResult; use tikv_util::{ + mpsc::future, thread_group::GroupProperties, time::{Instant, ThreadReadId}, worker::LazyWorker, @@ -121,7 +122,7 @@ pub trait Simulator { timeout: Duration, ) -> Result { let node_id = request.get_header().get_peer().get_store_id(); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); self.async_read(node_id, batch_id, request, cb); rx.recv_timeout(timeout) .map_err(|_| Error::Timeout(format!("request timeout for {:?}", timeout))) @@ -141,7 +142,7 @@ pub trait Simulator { request: RaftCmdRequest, timeout: Duration, ) -> Result { - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); match self.async_command_on_node(node_id, request, cb) { Ok(()) => {} @@ -968,7 +969,7 @@ impl Cluster { pub fn async_request( &mut self, req: RaftCmdRequest, - ) -> Result> { + ) -> Result> { self.async_request_with_opts(req, Default::default()) } @@ -976,7 +977,7 @@ impl Cluster { &mut self, mut req: RaftCmdRequest, opts: RaftCmdExtraOpts, - ) -> Result> { + ) -> Result> { let region_id = req.get_header().get_region_id(); let leader = self.leader_of_region(region_id).unwrap(); req.mut_header().set_peer(leader.clone()); @@ -987,7 +988,10 @@ impl Cluster { Ok(rx) } - pub fn async_exit_joint(&mut self, region_id: u64) -> Result> { + pub fn async_exit_joint( + &mut self, + region_id: u64, + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); @@ -1003,7 +1007,7 @@ impl Cluster { &mut self, key: &[u8], value: &[u8], - ) -> Result> { + ) -> Result> { let mut region = self.get_region(key); let reqs = vec![new_put_cmd(key, value)]; let put = new_request(region.get_id(), region.take_region_epoch(), reqs, false); @@ -1014,7 +1018,7 @@ impl Cluster { &mut self, region_id: u64, peer: metapb::Peer, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); @@ -1027,7 +1031,7 @@ impl Cluster { &mut self, region_id: u64, peer: metapb::Peer, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 7f21338be1a..45b4a98456a 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -3,7 +3,6 @@ use std::{ fmt::Write, path::Path, - result::Result as StdResult, str::FromStr, sync::{mpsc, Arc, Mutex}, thread, @@ -21,7 +20,7 @@ use engine_traits::{ CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; -use futures::{channel::oneshot, executor::block_on, future::BoxFuture}; +use futures::{executor::block_on, future::BoxFuture, StreamExt}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ encryptionpb::EncryptionMethod, @@ -56,7 +55,9 @@ use tikv::{ }, }; pub use tikv_util::store::{find_peer, new_learner_peer, new_peer}; -use tikv_util::{config::*, escape, time::ThreadReadId, worker::LazyWorker, HandyRwLock}; +use tikv_util::{ + config::*, escape, mpsc::future, time::ThreadReadId, worker::LazyWorker, HandyRwLock, +}; use txn_types::Key; use crate::{Cluster, Config, RawEngine, ServerCluster, Simulator}; @@ -365,7 +366,7 @@ impl Drop for CallbackLeakDetector { } } -pub fn make_cb(cmd: &RaftCmdRequest) -> (Callback, mpsc::Receiver) { +pub fn check_raft_cmd_request(cmd: &RaftCmdRequest) -> bool { let mut is_read = cmd.has_status_request(); let mut is_write = cmd.has_admin_request(); for req in cmd.get_requests() { @@ -378,8 +379,14 @@ pub fn make_cb(cmd: &RaftCmdRequest) -> (Callback, mpsc::Receiver } } assert!(is_read ^ is_write, "Invalid RaftCmdRequest: {:?}", cmd); + is_read +} - let (tx, rx) = mpsc::channel(); +pub fn make_cb( + cmd: &RaftCmdRequest, +) -> (Callback, future::Receiver) { + let is_read = check_raft_cmd_request(cmd); + let (tx, rx) = future::bounded(1, future::WakePolicy::Immediately); let mut detector = CallbackLeakDetector::default(); let cb = if is_read { Callback::read(Box::new(move |resp: ReadResponse| { @@ -401,7 +408,7 @@ pub fn make_cb_ext( cmd: &RaftCmdRequest, proposed: Option, committed: Option, -) -> (Callback, mpsc::Receiver) { +) -> (Callback, future::Receiver) { let (cb, receiver) = make_cb(cmd); if let Callback::Write { cb, .. } = cb { (Callback::write_ext(cb, proposed, committed), receiver) @@ -436,7 +443,7 @@ pub fn async_read_on_peer( key: &[u8], read_quorum: bool, replica_read: bool, -) -> BoxFuture<'static, StdResult> { +) -> BoxFuture<'static, RaftCmdResponse> { let node_id = peer.get_store_id(); let mut request = new_request( region.get_id(), @@ -446,10 +453,13 @@ pub fn async_read_on_peer( ); request.mut_header().set_peer(peer); request.mut_header().set_replica_read(replica_read); - let (tx, rx) = oneshot::channel(); + let (tx, mut rx) = future::bounded(1, future::WakePolicy::Immediately); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); cluster.sim.wl().async_read(node_id, None, request, cb); - Box::pin(async move { rx.await }) + Box::pin(async move { + let fut = rx.next(); + fut.await.unwrap() + }) } pub fn batch_read_on_peer( @@ -509,7 +519,7 @@ pub fn async_read_index_on_peer( region: metapb::Region, key: &[u8], read_quorum: bool, -) -> mpsc::Receiver { +) -> BoxFuture<'static, RaftCmdResponse> { let node_id = peer.get_store_id(); let mut cmd = new_read_index_cmd(); cmd.mut_read_index().set_start_ts(u64::MAX); @@ -523,10 +533,30 @@ pub fn async_read_index_on_peer( read_quorum, ); request.mut_header().set_peer(peer); - let (tx, rx) = mpsc::sync_channel(1); + let (tx, mut rx) = future::bounded(1, future::WakePolicy::Immediately); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); cluster.sim.wl().async_read(node_id, None, request, cb); - rx + Box::pin(async move { + let fut = rx.next(); + fut.await.unwrap() + }) +} + +pub fn async_command_on_node( + cluster: &mut Cluster, + node_id: u64, + request: RaftCmdRequest, +) -> BoxFuture<'static, RaftCmdResponse> { + let (cb, mut rx) = make_cb(&request); + cluster + .sim + .rl() + .async_command_on_node(node_id, request, cb) + .unwrap(); + Box::pin(async move { + let fut = rx.next(); + fut.await.unwrap() + }) } pub fn must_get_value(resp: &RaftCmdResponse) -> Vec { diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index 00598f5295d..4492e33a933 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -6,14 +6,18 @@ use std::{ pin::Pin, sync::atomic::{self, AtomicUsize, Ordering}, task::{Context, Poll}, + time::Duration, }; +pub use crossbeam::channel::{RecvTimeoutError, TryRecvError}; use crossbeam::{ - channel::{SendError, TryRecvError}, + channel::SendError, queue::{ArrayQueue, SegQueue}, }; use futures::{task::AtomicWaker, Stream, StreamExt}; +use crate::future::block_on_timeout; + enum QueueType { Unbounded(SegQueue), Bounded(ArrayQueue), @@ -176,6 +180,15 @@ impl Receiver { } Err(TryRecvError::Disconnected) } + + pub fn recv_timeout(&mut self, dur: Duration) -> Result { + let fut = self.next(); + match block_on_timeout(fut, dur) { + Ok(Some(v)) => Ok(v), + Ok(None) => Err(RecvTimeoutError::Disconnected), + Err(_) => Err(RecvTimeoutError::Timeout), + } + } } impl Drop for Receiver { diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index feaa1af76ef..73bc741d9bb 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -10,23 +10,26 @@ use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}; use raft::eraftpb::MessageType; use raftstore::store::msg::*; use test_raftstore::*; -use tikv_util::HandyRwLock; +use tikv_util::{mpsc::future, HandyRwLock}; struct CbReceivers { proposed: mpsc::Receiver<()>, committed: mpsc::Receiver<()>, - applied: mpsc::Receiver, + applied: future::Receiver, } impl CbReceivers { - fn assert_not_ready(&self) { + fn assert_not_ready(&mut self) { sleep_ms(100); assert_eq!(self.proposed.try_recv().unwrap_err(), TryRecvError::Empty); assert_eq!(self.committed.try_recv().unwrap_err(), TryRecvError::Empty); - assert_eq!(self.applied.try_recv().unwrap_err(), TryRecvError::Empty); + assert_eq!( + self.applied.try_recv().unwrap_err(), + crossbeam::channel::TryRecvError::Empty + ); } - fn assert_ok(&self) { + fn assert_ok(&mut self) { self.assert_applied_ok(); // proposed and committed should be invoked before applied self.proposed.try_recv().unwrap(); @@ -34,14 +37,14 @@ impl CbReceivers { } // When fails to propose, only applied callback will be invoked. - fn assert_err(&self) { + fn assert_err(&mut self) { let resp = self.applied.recv_timeout(Duration::from_secs(1)).unwrap(); assert!(resp.get_header().has_error(), "{:?}", resp); self.proposed.try_recv().unwrap_err(); self.committed.try_recv().unwrap_err(); } - fn assert_applied_ok(&self) { + fn assert_applied_ok(&mut self) { let resp = self.applied.recv_timeout(Duration::from_secs(1)).unwrap(); assert!( !resp.get_header().has_error(), @@ -119,7 +122,7 @@ fn test_reject_proposal_during_region_split() { fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"k1"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -141,13 +144,13 @@ fn test_reject_proposal_during_region_split() { ); // The write request fails due to epoch not match. - for r in receivers { + for mut r in receivers { r.assert_err(); } // New write request can succeed. let write_req = make_write_req(&mut cluster, b"k1"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -198,7 +201,7 @@ fn test_reject_proposal_during_region_merge() { fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"a"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -222,7 +225,7 @@ fn test_reject_proposal_during_region_merge() { .has_error() ); // The write request fails due to epoch not match. - for r in receivers { + for mut r in receivers { r.assert_err(); } @@ -234,7 +237,7 @@ fn test_reject_proposal_during_region_merge() { fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"a"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -251,7 +254,7 @@ fn test_reject_proposal_during_region_merge() { fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"k"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -266,13 +269,13 @@ fn test_reject_proposal_during_region_merge() { fail::remove(commit_merge_fp); pd_client.check_merged_timeout(source.get_id(), Duration::from_secs(5)); // The write request fails due to epoch not match. - for r in receivers { + for mut r in receivers { r.assert_err(); } // New write request can succeed. let write_req = make_write_req(&mut cluster, b"k"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -321,7 +324,7 @@ fn test_reject_proposal_during_rollback_region_merge() { fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"a"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -336,7 +339,7 @@ fn test_reject_proposal_during_rollback_region_merge() { // New write request can succeed. let write_req = make_write_req(&mut cluster, b"a"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -374,7 +377,7 @@ fn test_reject_proposal_during_leader_transfer() { fail::cfg(force_delay_propose_batch_raft_command_fp, "2*return").unwrap(); } let write_req = make_write_req(&mut cluster, b"k"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -395,14 +398,14 @@ fn test_accept_proposal_during_conf_change() { let conf_change_fp = "apply_on_conf_change_all_1"; fail::cfg(conf_change_fp, "pause").unwrap(); - let add_peer_rx = cluster.async_add_peer(r, new_peer(2, 2)).unwrap(); + let mut add_peer_rx = cluster.async_add_peer(r, new_peer(2, 2)).unwrap(); add_peer_rx .recv_timeout(Duration::from_millis(100)) .unwrap_err(); // Conf change doesn't affect proposals. let write_req = make_write_req(&mut cluster, b"k"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -445,7 +448,7 @@ fn test_not_invoke_committed_cb_when_fail_to_commit() { // proposal. cluster.partition(vec![1], vec![2, 3]); let write_req = make_write_req(&mut cluster, b"k1"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -489,7 +492,7 @@ fn test_propose_before_transfer_leader() { fail::cfg(force_delay_propose_batch_raft_command_fp, "return").unwrap(); let write_req = make_write_req(&mut cluster, b"k1"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -518,7 +521,7 @@ fn test_propose_before_split_and_merge() { fail::cfg(force_delay_propose_batch_raft_command_fp, "return").unwrap(); let write_req = make_write_req(&mut cluster, b"k1"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -542,7 +545,7 @@ fn test_propose_before_split_and_merge() { cluster.must_transfer_leader(right.get_id(), right_peer2); let write_req = make_write_req(&mut cluster, b"k0"); - let (cb, cb_receivers) = make_cb(&write_req); + let (cb, mut cb_receivers) = make_cb(&write_req); cluster .sim .rl() @@ -552,7 +555,7 @@ fn test_propose_before_split_and_merge() { cb_receivers.assert_proposed_ok(); let write_req2 = make_write_req(&mut cluster, b"k2"); - let (cb2, cb_receivers2) = make_cb(&write_req2); + let (cb2, mut cb_receivers2) = make_cb(&write_req2); cluster .sim .rl() diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index 0a1be37cab6..c3612e64127 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -110,7 +110,7 @@ fn test_write_after_destroy() { let mut epoch = cluster.pd_client.get_region_epoch(r1); let mut admin_req = new_admin_request(r1, &epoch, conf_change); admin_req.mut_header().set_peer(new_peer(1, 1)); - let (cb1, rx1) = make_cb(&admin_req); + let (cb1, mut rx1) = make_cb(&admin_req); let engines_3 = cluster.get_all_engines(3); let region = block_on(cluster.pd_client.get_region_by_id(r1)) .unwrap() diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index aba59d8c239..fc28560c7f1 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -67,9 +67,7 @@ fn ensure_disk_usage_is_reported( let peer = new_peer(store_id, peer_id); let key = region.get_start_key(); let ch = async_read_on_peer(cluster, peer, region.clone(), key, true, true); - block_on_timeout(ch, Duration::from_secs(1)) - .unwrap() - .unwrap(); + block_on_timeout(ch, Duration::from_secs(1)).unwrap(); } fn test_disk_full_leader_behaviors(usage: DiskUsage) { @@ -88,7 +86,7 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { // Test new normal proposals won't be allowed when disk is full. let old_last_index = cluster.raft_local_state(1, 1).last_index; - let rx = cluster.async_put(b"k2", b"v2").unwrap(); + let mut rx = cluster.async_put(b"k2", b"v2").unwrap(); assert_disk_full(&rx.recv_timeout(Duration::from_secs(2)).unwrap()); let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); @@ -301,7 +299,7 @@ fn test_majority_disk_full() { } // Normal proposals will be rejected because of majority peers' disk full. - let ch = cluster.async_put(b"k2", b"v2").unwrap(); + let mut ch = cluster.async_put(b"k2", b"v2").unwrap(); let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); @@ -312,7 +310,7 @@ fn test_majority_disk_full() { let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); assert!(!resp.get_header().has_error()); @@ -337,7 +335,7 @@ fn test_majority_disk_full() { let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); let resp = ch.recv_timeout(Duration::from_secs(10)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); @@ -356,7 +354,7 @@ fn test_majority_disk_full() { let put = new_request(1, epoch, reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![3]); diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 1a733be5d8c..16796cfa555 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1530,7 +1530,7 @@ fn test_retry_pending_prepare_merge_fail() { let (propose_tx, propose_rx) = mpsc::sync_channel(10); fail::cfg_callback("after_propose", move || propose_tx.send(()).unwrap()).unwrap(); - let rx = cluster.async_put(b"k1", b"v11").unwrap(); + let mut rx = cluster.async_put(b"k1", b"v11").unwrap(); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 82e1e255d5e..64f363f0ced 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -7,17 +7,21 @@ use std::{ }; use crossbeam::channel; -use engine_traits::{Peekable, RaftEngineReadOnly, CF_RAFT}; +use engine_traits::RaftEngineReadOnly; use futures::executor::block_on; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState}; use raft::eraftpb::MessageType; -use test_raftstore::*; +use test_raftstore::{Simulator as S1, *}; +use test_raftstore_macro::test_case; +use test_raftstore_v2::Simulator as S2; +use tikv::storage::config::EngineType; use tikv_util::{config::ReadableDuration, future::block_on_timeout, HandyRwLock}; use txn_types::{Key, Lock, LockType}; -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_wait_for_apply_index() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Increase the election tick to make this test case running reliably. configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); @@ -52,18 +56,13 @@ fn test_wait_for_apply_index() { ); request.mut_header().set_peer(p3); request.mut_header().set_replica_read(true); - let (cb, rx) = make_cb(&request); - cluster - .sim - .rl() - .async_command_on_node(3, request, cb) - .unwrap(); + let mut rx = async_command_on_node(&mut cluster, 3, request); // Must timeout here - rx.recv_timeout(Duration::from_millis(500)).unwrap_err(); + block_on_timeout(rx.as_mut(), Duration::from_millis(500)).unwrap_err(); fail::remove("on_apply_write_cmd"); // After write cmd applied, the follower read will be executed. - match rx.recv_timeout(Duration::from_secs(3)) { + match block_on_timeout(rx.as_mut(), Duration::from_secs(3)) { Ok(resp) => { assert_eq!(resp.get_responses().len(), 1); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); @@ -72,10 +71,11 @@ fn test_wait_for_apply_index() { } } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_duplicate_read_index_ctx() { // Initialize cluster - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); cluster.cfg.raft_store.raft_heartbeat_ticks = 1; let pd_client = Arc::clone(&cluster.pd_client); @@ -118,43 +118,40 @@ fn test_duplicate_read_index_ctx() { true, ); request.mut_header().set_peer(p2); - let (cb2, rx2) = make_cb(&request); + // In v2, we use replica read to force issue a read index. + if cluster.cfg.storage.engine == EngineType::RaftKv2 { + request.mut_requests()[0] = new_get_cmd(b"k0"); + request.mut_header().set_replica_read(true); + } // send to peer 2 - cluster - .sim - .rl() - .async_command_on_node(2, request.clone(), cb2) - .unwrap(); + let mut rx2 = async_command_on_node(&mut cluster, 2, request.clone()); rx.recv_timeout(Duration::from_secs(5)).unwrap(); must_get_equal(&cluster.get_engine(3), b"k0", b"v0"); request.mut_header().set_peer(p3); - let (cb3, rx3) = make_cb(&request); // send to peer 3 - cluster - .sim - .rl() - .async_command_on_node(3, request, cb3) - .unwrap(); + let mut rx3 = async_command_on_node(&mut cluster, 3, request); rx.recv_timeout(Duration::from_secs(5)).unwrap(); let router = cluster.sim.wl().get_router(1).unwrap(); fail::cfg("pause_on_peer_collect_message", "pause").unwrap(); cluster.sim.wl().clear_recv_filters(1); for raft_msg in std::mem::take(&mut *dropped_msgs.lock().unwrap()) { - router.send_raft_message(raft_msg).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(raft_msg.into()).unwrap(); } fail::remove("pause_on_peer_collect_message"); // read index response must not be dropped - rx2.recv_timeout(Duration::from_secs(5)).unwrap(); - rx3.recv_timeout(Duration::from_secs(5)).unwrap(); + block_on_timeout(rx2.as_mut(), Duration::from_secs(5)).unwrap(); + block_on_timeout(rx3.as_mut(), Duration::from_secs(5)).unwrap(); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_before_init() { // Initialize cluster - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -184,13 +181,8 @@ fn test_read_before_init() { ); request.mut_header().set_peer(p3); request.mut_header().set_replica_read(true); - let (cb, rx) = make_cb(&request); - cluster - .sim - .rl() - .async_command_on_node(3, request, cb) - .unwrap(); - let resp = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + let mut rx = async_command_on_node(&mut cluster, 3, request); + let resp = block_on_timeout(rx.as_mut(), Duration::from_secs(5)).unwrap(); fail::remove("before_handle_snapshot_ready_3"); assert!( resp.get_header() @@ -202,10 +194,11 @@ fn test_read_before_init() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_applying_snapshot() { // Initialize cluster - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -224,14 +217,11 @@ fn test_read_applying_snapshot() { cluster.pd_client.must_add_peer(r1, p3.clone()); thread::sleep(Duration::from_millis(500)); - // Check if peer 3 is applying snapshot - let region_key = keys::region_state_key(r1); - let region_state: RegionLocalState = cluster - .get_engine(3) - .get_msg_cf(CF_RAFT, ®ion_key) - .unwrap() - .unwrap(); - assert_eq!(region_state.get_state(), PeerState::Applying); + // Check if peer 3 is applying snapshot for raftstore v1. + if cluster.cfg.storage.engine == EngineType::RaftKv { + let region_state: RegionLocalState = cluster.region_local_state(r1, 3); + assert_eq!(region_state.get_state(), PeerState::Applying); + } let region = cluster.get_region(b"k0"); assert_eq!(cluster.leader_of_region(r1).unwrap(), p1); @@ -243,28 +233,26 @@ fn test_read_applying_snapshot() { ); request.mut_header().set_peer(p3); request.mut_header().set_replica_read(true); - let (cb, rx) = make_cb(&request); - cluster - .sim - .rl() - .async_command_on_node(3, request, cb) - .unwrap(); - let resp = match rx.recv_timeout(Duration::from_secs(5)) { - Ok(r) => r, + let mut rx = async_command_on_node(&mut cluster, 3, request); + match block_on_timeout(rx.as_mut(), Duration::from_secs(5)) { + Ok(resp) => { + // In raftstore v1, read fails due to snapshot. + assert!(cluster.cfg.storage.engine == EngineType::RaftKv); + assert!( + resp.get_header() + .get_error() + .get_message() + .contains("applying snapshot"), + "{:?}", + resp.get_header().get_error() + ); + } Err(_) => { - fail::remove("region_apply_snap"); - panic!("cannot receive response"); + // In raftstore v2, snapshot blocks reads. + assert!(cluster.cfg.storage.engine == EngineType::RaftKv2); } }; fail::remove("region_apply_snap"); - assert!( - resp.get_header() - .get_error() - .get_message() - .contains("applying snapshot"), - "{:?}", - resp.get_header().get_error() - ); } #[test] @@ -328,7 +316,7 @@ fn test_read_after_cleanup_range_for_snap() { request.mut_header().set_peer(p3); request.mut_header().set_replica_read(true); // Send follower read request to peer 3 - let (cb1, rx1) = make_cb(&request); + let (cb1, mut rx1) = make_cb(&request); cluster .sim .rl() @@ -362,9 +350,10 @@ fn test_read_after_cleanup_range_for_snap() { /// slowly and drops the no-op entry from the new leader, and it had to wait for /// a heartbeat timeout to know its leader before that it can't handle any read /// request. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_new_split_learner_can_not_find_leader() { - let mut cluster = new_node_cluster(0, 4); + let mut cluster = new_cluster(0, 4); configure_for_lease_read(&mut cluster.cfg, Some(5000), None); let pd_client = Arc::clone(&cluster.pd_client); @@ -400,18 +389,17 @@ fn test_new_split_learner_can_not_find_leader() { let new_region = cluster.get_region(b"k2"); let learner_peer = find_peer(&new_region, 3).unwrap().clone(); let resp_ch = async_read_on_peer(&mut cluster, learner_peer, new_region, b"k2", true, true); - let resp = block_on_timeout(resp_ch, Duration::from_secs(3)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp_ch, Duration::from_secs(3)).unwrap(); let exp_value = resp.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); } /// Test if the read index request can get a correct response when the commit /// index of leader if not up-to-date after transferring leader. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_replica_read_after_transfer_leader() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); @@ -468,7 +456,8 @@ fn test_replica_read_after_transfer_leader() { let router = cluster.sim.wl().get_router(2).unwrap(); for raft_msg in std::mem::take(&mut *dropped_msgs.lock().unwrap()) { - router.send_raft_message(raft_msg).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(raft_msg.into()).unwrap(); } let new_region = cluster.get_region(b"k1"); @@ -478,18 +467,17 @@ fn test_replica_read_after_transfer_leader() { fail::remove(on_peer_collect_message_2); - let resp = block_on_timeout(resp_ch, Duration::from_secs(3)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp_ch, Duration::from_secs(3)).unwrap(); let exp_value = resp.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); } // This test is for reproducing the bug that some replica reads was sent to a // leader and shared a same read index because of the optimization on leader. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_index_after_transfer_leader() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); @@ -548,10 +536,12 @@ fn test_read_index_after_transfer_leader() { // Send heartbeat and append responses to advance read index. let router = cluster.sim.wl().get_router(2).unwrap(); for msg in append_msgs { - router.send_raft_message(msg.clone()).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(msg.clone().into()).unwrap(); } for msg in heartbeat_msgs { - router.send_raft_message(msg.clone()).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(msg.clone().into()).unwrap(); } fail::remove(on_peer_collect_message_2); // Wait for read index has been advanced. @@ -567,11 +557,12 @@ fn test_read_index_after_transfer_leader() { ) }); for msg in vote_msgs { - router.send_raft_message(msg.clone()).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(msg.clone().into()).unwrap(); } - for resp in responses { - resp.recv_timeout(Duration::from_millis(200)).unwrap(); + for mut resp in responses { + block_on_timeout(resp.as_mut(), Duration::from_millis(200)).unwrap(); } cluster.sim.wl().clear_recv_filters(2); @@ -580,7 +571,8 @@ fn test_read_index_after_transfer_leader() { /// Test if the read index request can get a correct response when the commit /// index of leader if not up-to-date after transferring leader. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_batch_read_index_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); @@ -619,7 +611,8 @@ fn test_batch_read_index_after_transfer_leader() { let router = cluster.sim.wl().get_router(2).unwrap(); for raft_msg in std::mem::take(&mut *dropped_msgs.lock().unwrap()) { - router.send_raft_message(raft_msg).unwrap(); + #[allow(clippy::useless_conversion)] + router.send_raft_message(raft_msg.into()).unwrap(); } let mut resps = Vec::with_capacity(2); @@ -637,7 +630,7 @@ fn test_batch_read_index_after_transfer_leader() { let resps = resps .into_iter() - .map(|x| x.recv_timeout(Duration::from_secs(5)).unwrap()) + .map(|mut x| x.recv_timeout(Duration::from_secs(5)).unwrap()) .collect::>(); // `term` in the header is `current_term`, not term of the entry at @@ -657,9 +650,13 @@ fn test_batch_read_index_after_transfer_leader() { } } -#[test] +// Read index on follower must also return KeyIsLocked error. +// +// Note: this test case does not applicable to raftstore v2, because it no +// longer support read index from users. +#[test_case(test_raftstore::new_node_cluster)] fn test_read_index_lock_checking_on_follower() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -679,7 +676,7 @@ fn test_read_index_lock_checking_on_follower() { fail::cfg("before_propose_readindex", "1*pause").unwrap(); let mut resp = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1.clone(), b"k1", true); for i in 0..=20 { - let res = resp.recv_timeout(Duration::from_millis(500)); + let res = block_on_timeout(resp.as_mut(), Duration::from_millis(500)); if res.is_err() { break; } @@ -721,7 +718,7 @@ fn test_read_index_lock_checking_on_follower() { // We must make sure the lock check is done on peer 3. fail::remove("before_propose_readindex"); - let resp = resp.recv_timeout(Duration::from_millis(2000)).unwrap(); + let resp = block_on_timeout(resp.as_mut(), Duration::from_millis(2000)).unwrap(); assert_eq!( &lock.into_lock_info(b"k1".to_vec()), resp.get_responses()[0].get_read_index().get_locked(), @@ -730,9 +727,10 @@ fn test_read_index_lock_checking_on_follower() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_index_lock_checking_on_false_leader() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); // Use long election timeout and short lease. configure_for_lease_read(&mut cluster.cfg, Some(50), Some(200)); cluster.cfg.raft_store.raft_store_max_leader_lease = @@ -797,8 +795,8 @@ fn test_read_index_lock_checking_on_false_leader() { // Read index from peer 2, the read index message will be sent to the old leader // peer 1. But the lease of peer 1 has expired and it cannot get majority of // heartbeat. So, we cannot get the result here. - let resp = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k1", true); - resp.recv_timeout(Duration::from_millis(300)).unwrap_err(); + let mut resp = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k1", true); + block_on_timeout(resp.as_mut(), Duration::from_millis(300)).unwrap_err(); // Now, restore the network partition. Peer 1 should now become follower and // drop its pending read index request. Peer 2 cannot get the result now. @@ -809,10 +807,10 @@ fn test_read_index_lock_checking_on_false_leader() { ); cluster.sim.wl().add_recv_filter(2, recv_filter); cluster.clear_send_filters(); - resp.recv_timeout(Duration::from_millis(300)).unwrap_err(); + block_on_timeout(resp.as_mut(), Duration::from_millis(300)).unwrap_err(); // After cleaning all filters, peer 2 will retry and will get error. cluster.sim.wl().clear_recv_filters(2); - let resp = resp.recv_timeout(Duration::from_millis(2000)).unwrap(); + let resp = block_on_timeout(resp.as_mut(), Duration::from_secs(2)).unwrap(); assert!(resp.get_header().has_error()); } diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index 475ed71a1b0..523bb54f7cb 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -455,7 +455,7 @@ fn test_read_after_peer_destroyed() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index b6408f9ce91..86962330f0f 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -62,7 +62,7 @@ fn test_proposal_prevent_sleep() { true, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); // send to peer 2 cluster .sim diff --git a/tests/integrations/raftstore/test_joint_consensus.rs b/tests/integrations/raftstore/test_joint_consensus.rs index 7845ecec43d..282d0d0525c 100644 --- a/tests/integrations/raftstore/test_joint_consensus.rs +++ b/tests/integrations/raftstore/test_joint_consensus.rs @@ -1,9 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - sync::{mpsc, Arc}, - time::*, -}; +use std::{sync::Arc, time::*}; use kvproto::{ metapb::{self, PeerRole, Region}, @@ -13,7 +10,7 @@ use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use raftstore::Result; use test_raftstore::*; -use tikv_util::store::find_peer; +use tikv_util::{mpsc::future, store::find_peer}; /// Tests multiple confchange commands can be done by one request #[test] @@ -167,23 +164,23 @@ fn test_request_in_joint_state() { // Isolated peer 2, so the old configuation can't reach quorum cluster.add_send_filter(IsolationFilterFactory::new(2)); - let rx = cluster + let mut rx = cluster .async_request(put_request(®ion, 1, b"k3", b"v3")) .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); cluster.clear_send_filters(); // Isolated peer 3, so the new configuation can't reach quorum cluster.add_send_filter(IsolationFilterFactory::new(3)); - let rx = cluster + let mut rx = cluster .async_request(put_request(®ion, 1, b"k4", b"v4")) .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); cluster.clear_send_filters(); diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 7b07b281236..e1905c99476 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -430,7 +430,7 @@ fn test_node_callback_when_destroyed() { let get = new_get_cmd(b"k1"); let mut req = new_request(1, epoch, vec![get], true); req.mut_header().set_peer(leader); - let (cb, rx) = make_cb(&req); + let (cb, mut rx) = make_cb(&req); cluster .sim .rl() @@ -501,9 +501,7 @@ fn test_read_index_stale_in_suspect_lease() { must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); // Ensure peer 3 is ready to become leader. let resp_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1.clone(), b"k2", true, true); - let resp = block_on_timeout(resp_ch, Duration::from_secs(3)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp_ch, Duration::from_secs(3)).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); assert_eq!( resp.get_responses()[0].get_get().get_value(), @@ -651,7 +649,7 @@ fn test_not_leader_read_lease() { true, ); req.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&req); + let (cb, mut rx) = make_cb(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.must_transfer_leader(region_id, new_peer(3, 3)); @@ -718,7 +716,7 @@ fn test_read_index_after_write() { ); req.mut_header() .set_peer(new_peer(1, region_on_store1.get_id())); - let (cb, rx) = make_cb(&req); + let (cb, mut rx) = make_cb(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.sim.wl().clear_recv_filters(2); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 404cb418d33..dda92230ec8 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1445,10 +1445,10 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { // The gap is too large, so the previous merge should fail. And this new put // request should be allowed. - let res = cluster.async_put(b"k1", b"new_val").unwrap(); + let mut res = cluster.async_put(b"k1", b"new_val").unwrap(); cluster.clear_send_filters(); - res.recv().unwrap(); + res.recv_timeout(Duration::from_secs(5)).unwrap(); assert_eq!(cluster.must_get(b"k1").unwrap(), b"new_val"); } diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 10bc86e0b2b..40189017645 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -119,17 +119,13 @@ fn test_replica_read_not_applied() { // internally. cluster.sim.wl().clear_send_filters(1); cluster.sim.wl().clear_recv_filters(2); - let resp1 = block_on_timeout(resp1_ch, Duration::from_secs(6)) - .unwrap() - .unwrap(); + let resp1 = block_on_timeout(resp1_ch, Duration::from_secs(6)).unwrap(); let exp_value = resp1.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); // New read index requests can be resolved quickly. let resp2_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), r1, b"k1", true, true); - let resp2 = block_on_timeout(resp2_ch, Duration::from_secs(3)) - .unwrap() - .unwrap(); + let resp2 = block_on_timeout(resp2_ch, Duration::from_secs(3)).unwrap(); let exp_value = resp2.get_responses()[0].get_get().get_value(); assert_eq!(exp_value, b"v2"); } @@ -229,9 +225,7 @@ fn test_read_hibernated_region() { cluster.pd_client.trigger_leader_info_loss(); // This request will fail because no valid leader. let resp1_ch = async_read_on_peer(&mut cluster, p2.clone(), region.clone(), b"k1", true, true); - let resp1 = block_on_timeout(resp1_ch, Duration::from_secs(5)) - .unwrap() - .unwrap(); + let resp1 = block_on_timeout(resp1_ch, Duration::from_secs(5)).unwrap(); assert!( resp1.get_header().get_error().has_not_leader(), "{:?}", @@ -254,9 +248,7 @@ fn test_read_hibernated_region() { // Wait for the leader is woken up. thread::sleep(Duration::from_millis(500)); let resp2_ch = async_read_on_peer(&mut cluster, p2, region, b"k1", true, true); - let resp2 = block_on_timeout(resp2_ch, Duration::from_secs(5)) - .unwrap() - .unwrap(); + let resp2 = block_on_timeout(resp2_ch, Duration::from_secs(5)).unwrap(); assert!(!resp2.get_header().has_error(), "{:?}", resp2); } @@ -332,16 +324,12 @@ fn test_read_index_out_of_order() { // After peer 2 is removed, we can get 2 read responses. let resp2 = async_read_on_peer(&mut cluster, new_peer(1, 1), r1, b"k1", true, true); - block_on_timeout(resp2, Duration::from_secs(1)) - .unwrap() - .unwrap(); - block_on_timeout(resp1, Duration::from_secs(1)) - .unwrap() - .unwrap(); + block_on_timeout(resp2, Duration::from_secs(1)).unwrap(); + block_on_timeout(resp1, Duration::from_secs(1)).unwrap(); } #[test_case(test_raftstore::new_node_cluster)] -// #[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_index_retry_lock_checking() { let mut cluster = new_cluster(0, 2); @@ -371,10 +359,10 @@ fn test_read_index_retry_lock_checking() { // Can't get response because read index responses are blocked. let r1 = cluster.get_region(b"k1"); - let resp1 = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1.clone(), b"k1", true); - let resp2 = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k2", true); - resp1.recv_timeout(Duration::from_secs(2)).unwrap_err(); - resp2.try_recv().unwrap_err(); + let mut resp1 = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1.clone(), b"k1", true); + let mut resp2 = async_read_index_on_peer(&mut cluster, new_peer(2, 2), r1, b"k2", true); + block_on_timeout(resp1.as_mut(), Duration::from_secs(2)).unwrap_err(); + block_on_timeout(resp2.as_mut(), Duration::from_millis(1)).unwrap_err(); // k1 has a memory lock let leader_cm = cluster.sim.rl().get_concurrency_manager(1); @@ -396,22 +384,22 @@ fn test_read_index_retry_lock_checking() { cluster.sim.wl().clear_recv_filters(2); // resp1 should contain key is locked error assert!( - resp1 - .recv_timeout(Duration::from_secs(2)) + block_on_timeout(resp1, Duration::from_secs(2)) .unwrap() .responses[0] .get_read_index() .has_locked() ); // resp2 should has a successful read index + let resp = block_on_timeout(resp2, Duration::from_secs(2)).unwrap(); assert!( - resp2 - .recv_timeout(Duration::from_secs(2)) - .unwrap() - .responses[0] - .get_read_index() - .get_read_index() - > 0 + !resp.get_header().has_error() + && resp + .get_responses() + .get(0) + .map_or(true, |r| !r.get_read_index().has_locked()), + "{:?}", + resp, ); } @@ -465,9 +453,7 @@ fn test_split_isolation() { // cannot be created. for _ in 0..10 { let resp = async_read_on_peer(&mut cluster, peer.clone(), r2.clone(), b"k1", true, true); - let resp = block_on_timeout(resp, Duration::from_secs(1)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(1)).unwrap(); if !resp.get_header().has_error() { return; } @@ -506,9 +492,7 @@ fn test_read_local_after_snapshot_replace_peer() { // wait applying snapshot finish sleep_ms(100); let resp = async_read_on_peer(&mut cluster, new_peer(3, 3), r, b"k1", true, true); - let resp = block_on_timeout(resp, Duration::from_secs(1)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(1)).unwrap(); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); // trigger leader send snapshot to peer 3 @@ -537,9 +521,7 @@ fn test_read_local_after_snapshot_replace_peer() { let r = cluster.get_region(b"k1"); let resp = async_read_on_peer(&mut cluster, new_peer(3, 1003), r, b"k3", true, true); - let resp = block_on_timeout(resp, Duration::from_secs(1)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(1)).unwrap(); // should not have `mismatch peer id` error if resp.get_header().has_error() { panic!("unexpected err: {:?}", resp.get_header().get_error()); @@ -607,8 +589,6 @@ fn test_malformed_read_index() { // the read queue, the correct request should be responded. let resp = async_read_on_peer(&mut cluster, new_peer(1, 1), region, b"k1", true, false); cluster.clear_send_filters(); - let resp = block_on_timeout(resp, Duration::from_secs(10)) - .unwrap() - .unwrap(); + let resp = block_on_timeout(resp, Duration::from_secs(10)).unwrap(); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); } diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index d20249bc53f..cb7de1fad35 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -1,16 +1,12 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - sync::{mpsc, Arc}, - thread, - time::Duration, -}; +use std::{sync::Arc, thread, time::Duration}; use kvproto::replication_modepb::*; use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use test_raftstore::*; -use tikv_util::{config::*, HandyRwLock}; +use tikv_util::{config::*, mpsc::future, HandyRwLock}; fn prepare_cluster() -> Cluster { let mut cluster = new_server_cluster(0, 3); @@ -53,7 +49,7 @@ fn test_dr_auto_sync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -75,7 +71,7 @@ fn test_dr_auto_sync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -83,7 +79,7 @@ fn test_dr_auto_sync() { .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); must_get_none(&cluster.get_engine(1), b"k2"); let state = cluster.pd_client.region_replication_status(region.get_id()); @@ -105,7 +101,7 @@ fn test_sync_recover_after_apply_snapshot() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -113,7 +109,7 @@ fn test_sync_recover_after_apply_snapshot() { .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); must_get_none(&cluster.get_engine(1), b"k2"); let state = cluster.pd_client.region_replication_status(region.get_id()); @@ -252,7 +248,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -260,7 +256,7 @@ fn test_switching_replication_mode() { .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); must_get_none(&cluster.get_engine(1), b"k2"); let state = cluster.pd_client.region_replication_status(region.get_id()); @@ -288,7 +284,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -296,7 +292,7 @@ fn test_switching_replication_mode() { .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); must_get_none(&cluster.get_engine(1), b"k3"); let state = cluster.pd_client.region_replication_status(region.get_id()); @@ -329,7 +325,7 @@ fn test_replication_mode_allowlist() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -337,7 +333,7 @@ fn test_replication_mode_allowlist() { .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); // clear allowlist. @@ -417,7 +413,7 @@ fn test_migrate_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() @@ -425,7 +421,7 @@ fn test_migrate_replication_mode() { .unwrap(); assert_eq!( rx.recv_timeout(Duration::from_millis(100)), - Err(mpsc::RecvTimeoutError::Timeout) + Err(future::RecvTimeoutError::Timeout) ); must_get_none(&cluster.get_engine(1), b"k2"); let state = cluster.pd_client.region_replication_status(region.get_id()); diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 5f7594a3672..72ea50fa184 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -322,7 +322,7 @@ fn test_invalid_read_index_when_no_leader() { true, ); request.mut_header().set_peer(follower.clone()); - let (cb, rx) = make_cb(&request); + let (cb, mut rx) = make_cb(&request); cluster .sim .rl() From 0207316e5968f1902fa5f3b1cfdae61d587c7361 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 17 May 2023 05:39:36 -0400 Subject: [PATCH 0690/1149] Migrate to 2021 edition (#14724) close tikv/tikv#14726 Signed-off-by: Yuri Astrakhan --- Cargo.toml | 2 +- cmd/tikv-ctl/Cargo.toml | 2 +- cmd/tikv-server/Cargo.toml | 2 +- components/api_version/Cargo.toml | 2 +- components/backup-stream/Cargo.toml | 2 +- components/backup/Cargo.toml | 2 +- components/backup/src/endpoint.rs | 3 +++ components/batch-system/Cargo.toml | 2 +- components/case_macros/Cargo.toml | 2 +- components/causal_ts/Cargo.toml | 2 +- components/cdc/Cargo.toml | 2 +- components/cdc/src/initializer.rs | 8 ++++++++ components/cloud/Cargo.toml | 2 +- components/cloud/aws/Cargo.toml | 2 +- components/cloud/azure/Cargo.toml | 2 +- components/cloud/gcp/Cargo.toml | 2 +- components/cloud/gcp/src/gcs.rs | 1 - components/codec/Cargo.toml | 2 +- components/collections/Cargo.toml | 2 +- components/concurrency_manager/Cargo.toml | 2 +- components/coprocessor_plugin_api/Cargo.toml | 2 +- components/coprocessor_plugin_api/src/util.rs | 2 +- components/encryption/Cargo.toml | 2 +- components/encryption/export/Cargo.toml | 2 +- components/engine_panic/Cargo.toml | 2 +- components/engine_rocks/Cargo.toml | 2 +- components/engine_rocks_helper/Cargo.toml | 2 +- components/engine_test/Cargo.toml | 2 +- components/engine_traits/Cargo.toml | 2 +- components/engine_traits_tests/Cargo.toml | 2 +- components/error_code/Cargo.toml | 2 +- components/external_storage/Cargo.toml | 2 +- components/external_storage/export/Cargo.toml | 2 +- components/file_system/Cargo.toml | 2 +- components/into_other/Cargo.toml | 2 +- components/keys/Cargo.toml | 2 +- components/log_wrappers/Cargo.toml | 2 +- components/memory_trace_macros/Cargo.toml | 2 +- components/online_config/Cargo.toml | 2 +- components/online_config/online_config_derive/Cargo.toml | 2 +- components/online_config/src/lib.rs | 2 -- components/panic_hook/Cargo.toml | 2 +- components/pd_client/Cargo.toml | 2 +- components/pd_client/src/client.rs | 6 ++++++ components/profiler/Cargo.toml | 2 +- components/raft_log_engine/Cargo.toml | 2 +- components/raftstore/Cargo.toml | 2 +- components/raftstore/src/store/fsm/apply.rs | 3 +++ components/raftstore/src/store/peer.rs | 4 ++++ components/raftstore/src/store/replication_mode.rs | 3 +++ components/raftstore/src/store/worker/pd.rs | 3 +++ components/resolved_ts/Cargo.toml | 2 +- components/resolved_ts/tests/mod.rs | 3 +++ components/resource_metering/Cargo.toml | 2 +- components/security/Cargo.toml | 2 +- components/server/Cargo.toml | 2 +- components/sst_importer/Cargo.toml | 2 +- components/test_backup/Cargo.toml | 2 +- components/test_coprocessor/Cargo.toml | 2 +- .../test_coprocessor_plugin/example_plugin/Cargo.toml | 2 +- components/test_pd/Cargo.toml | 2 +- components/test_pd/src/server.rs | 3 +++ components/test_pd_client/Cargo.toml | 2 +- components/test_raftstore-v2/Cargo.toml | 2 +- components/test_raftstore_macro/Cargo.toml | 2 +- components/test_sst_importer/Cargo.toml | 2 +- components/test_storage/Cargo.toml | 2 +- components/test_util/Cargo.toml | 2 +- components/tidb_query_aggr/Cargo.toml | 2 +- components/tidb_query_aggr/src/impl_avg.rs | 2 -- components/tidb_query_aggr/src/impl_first.rs | 2 -- components/tidb_query_aggr/src/impl_sum.rs | 2 -- components/tidb_query_aggr/src/impl_variance.rs | 2 -- components/tidb_query_aggr/src/util.rs | 2 -- components/tidb_query_codegen/Cargo.toml | 2 +- components/tidb_query_codegen/src/rpn_function.rs | 2 +- components/tidb_query_common/Cargo.toml | 3 +-- components/tidb_query_datatype/Cargo.toml | 2 +- .../tidb_query_datatype/src/codec/batch/lazy_column.rs | 2 -- components/tidb_query_datatype/src/codec/chunk/column.rs | 2 -- .../src/codec/data_type/chunked_vec_json.rs | 2 -- components/tidb_query_datatype/src/def/eval_type.rs | 2 -- components/tidb_query_executors/Cargo.toml | 2 +- components/tidb_query_executors/src/util/scan_executor.rs | 2 -- components/tidb_query_expr/Cargo.toml | 2 +- components/tikv_alloc/Cargo.toml | 2 +- components/tikv_kv/Cargo.toml | 2 +- components/tikv_util/Cargo.toml | 2 +- components/tipb_helper/Cargo.toml | 2 +- components/tracker/Cargo.toml | 2 +- components/txn_types/Cargo.toml | 2 +- fuzz/Cargo.toml | 2 +- fuzz/targets/Cargo.toml | 2 +- src/import/sst_service.rs | 3 +++ src/storage/txn/scheduler.rs | 3 +++ tests/Cargo.toml | 2 +- tests/integrations/storage/test_storage.rs | 6 ++++++ 97 files changed, 121 insertions(+), 97 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5363de8bd59..f83a086f00a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ keywords = ["KV", "distributed-systems", "raft"] homepage = "https://tikv.org" repository = "https://github.com/tikv/tikv/" readme = "README.md" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 718d760e3d4..5ea6174d2d3 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -2,7 +2,7 @@ name = "tikv-ctl" version = "0.0.1" license = "Apache-2.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 4bba926a68e..080ed278ba7 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -2,7 +2,7 @@ name = "tikv-server" version = "0.0.1" license = "Apache-2.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/api_version/Cargo.toml b/components/api_version/Cargo.toml index c80607145bd..3518e99030f 100644 --- a/components/api_version/Cargo.toml +++ b/components/api_version/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "api_version" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 005849391e9..4869fc818d2 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "backup-stream" version = "0.1.0" -edition = "2018" +edition = "2021" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 4f12dd04c36..6cb4edfe7dc 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "backup" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 4fb1705ebab..9913c668202 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -899,6 +899,9 @@ impl Endpoint { let limit = self.softlimit.limit(); self.pool.borrow_mut().spawn(async move { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &request; loop { // when get the guard, release it until we finish scanning a batch, // because if we were suspended during scanning, diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index af57bbef930..ac69d544a21 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "batch-system" version = "0.1.0" -edition = "2018" +edition = "2021" [features] default = ["test-runner"] diff --git a/components/case_macros/Cargo.toml b/components/case_macros/Cargo.toml index 83e9f215b6c..ff6e837938a 100644 --- a/components/case_macros/Cargo.toml +++ b/components/case_macros/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "case_macros" version = "0.1.0" -edition = "2018" +edition = "2021" [lib] proc-macro = true diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index 71af0419a68..9b7925371c5 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "causal_ts" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 3dfbb402d2e..1c695bb92bb 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cdc" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index c06b13424ba..bd1cc311f06 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -1030,6 +1030,14 @@ mod tests { let (tx1, rx1) = sync_channel(1); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); pool.spawn(async move { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = ( + &initializer, + &change_cmd, + &raft_router, + &concurrency_semaphore, + ); let res = initializer .initialize(change_cmd, raft_router, concurrency_semaphore) .await; diff --git a/components/cloud/Cargo.toml b/components/cloud/Cargo.toml index 10f8b113b2b..3931370390e 100644 --- a/components/cloud/Cargo.toml +++ b/components/cloud/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cloud" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 24518515ea0..6c387e99974 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "aws" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index b9ba7732e9e..b9fe8046def 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "azure" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index 4c3b8994ffc..f47506a6222 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "gcp" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index 61e432c9431..c43e4e63969 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -465,7 +465,6 @@ impl BlobStorage for GcsStorage { "no content to write", )); } - use std::convert::TryFrom; let key = self.maybe_prefix_key(name); debug!("save file to GCS storage"; "key" => %key); diff --git a/components/codec/Cargo.toml b/components/codec/Cargo.toml index 8b00f077863..08cf49aff16 100644 --- a/components/codec/Cargo.toml +++ b/components/codec/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "codec" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/collections/Cargo.toml b/components/collections/Cargo.toml index dca0afbc2c8..e92618a884b 100644 --- a/components/collections/Cargo.toml +++ b/components/collections/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "collections" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index b391c1d239a..846f140dc46 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -1,5 +1,5 @@ [package] -edition = "2018" +edition = "2021" name = "concurrency_manager" publish = false version = "0.0.1" diff --git a/components/coprocessor_plugin_api/Cargo.toml b/components/coprocessor_plugin_api/Cargo.toml index 84b0b197fd2..0c4753bc2ce 100644 --- a/components/coprocessor_plugin_api/Cargo.toml +++ b/components/coprocessor_plugin_api/Cargo.toml @@ -2,7 +2,7 @@ name = "coprocessor_plugin_api" version = "0.1.0" description = "Types and trait for custom coprocessor plugins for TiKV." -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 606082c0c4e..31d75610d75 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -118,7 +118,7 @@ macro_rules! declare_plugin { #[no_mangle] pub unsafe extern "C" fn _plugin_create( host_allocator: $crate::allocator::HostAllocatorPtr, - ) -> *mut $crate::CoprocessorPlugin { + ) -> *mut dyn $crate::CoprocessorPlugin { #[cfg(not(test))] HOST_ALLOCATOR.set_allocator(host_allocator); diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index deac60223a7..4a9a8634d49 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "encryption" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index 164ea312e5d..90b75852b08 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "encryption_export" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index ec77e2b715f..2fad106519d 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -2,7 +2,7 @@ name = "engine_panic" version = "0.0.1" description = "An example TiKV storage engine that does nothing but panic" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index d31ed947520..af8a44db44c 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "engine_rocks" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/engine_rocks_helper/Cargo.toml b/components/engine_rocks_helper/Cargo.toml index b8847fa6ba8..632e2e1a6f5 100644 --- a/components/engine_rocks_helper/Cargo.toml +++ b/components/engine_rocks_helper/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "engine_rocks_helper" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/engine_test/Cargo.toml b/components/engine_test/Cargo.toml index 16e538acc51..f834d5556a4 100644 --- a/components/engine_test/Cargo.toml +++ b/components/engine_test/Cargo.toml @@ -2,7 +2,7 @@ name = "engine_test" version = "0.0.1" description = "A single engine that masquerades as all other engines, for testing" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 664bc72afc5..53cd960244d 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "engine_traits" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/engine_traits_tests/Cargo.toml b/components/engine_traits_tests/Cargo.toml index 516135a86d2..f0b230efccd 100644 --- a/components/engine_traits_tests/Cargo.toml +++ b/components/engine_traits_tests/Cargo.toml @@ -2,7 +2,7 @@ name = "engine_traits_tests" version = "0.0.1" description = "Engine-agnostic tests for the engine_traits interface" -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/components/error_code/Cargo.toml b/components/error_code/Cargo.toml index b98fc8dfcb5..307532c1bb6 100644 --- a/components/error_code/Cargo.toml +++ b/components/error_code/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "error_code" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 4ff13e564ff..aed49aad3ab 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "external_storage" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml index 61e9bfa58df..48e911ad5c2 100644 --- a/components/external_storage/export/Cargo.toml +++ b/components/external_storage/export/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "external_storage_export" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [[bin]] diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index 2252ebc3f1b..fbd96c3c348 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "file_system" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/into_other/Cargo.toml b/components/into_other/Cargo.toml index d31f04f4e12..a7778fded0e 100644 --- a/components/into_other/Cargo.toml +++ b/components/into_other/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "into_other" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/keys/Cargo.toml b/components/keys/Cargo.toml index b5a6412d00a..a34ae0df79d 100644 --- a/components/keys/Cargo.toml +++ b/components/keys/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "keys" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/log_wrappers/Cargo.toml b/components/log_wrappers/Cargo.toml index 4c9e62b6876..c472755947f 100644 --- a/components/log_wrappers/Cargo.toml +++ b/components/log_wrappers/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "log_wrappers" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/memory_trace_macros/Cargo.toml b/components/memory_trace_macros/Cargo.toml index a5d79834dda..8dc800cf7b1 100644 --- a/components/memory_trace_macros/Cargo.toml +++ b/components/memory_trace_macros/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "memory_trace_macros" version = "0.1.0" -edition = "2018" +edition = "2021" [lib] proc-macro = true diff --git a/components/online_config/Cargo.toml b/components/online_config/Cargo.toml index 098da6bb428..9d67f1cf1de 100644 --- a/components/online_config/Cargo.toml +++ b/components/online_config/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "online_config" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/online_config/online_config_derive/Cargo.toml b/components/online_config/online_config_derive/Cargo.toml index 64d055a66d2..bcc206e907c 100644 --- a/components/online_config/online_config_derive/Cargo.toml +++ b/components/online_config/online_config_derive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "online_config_derive" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index 18d9cc0fd71..8be3c2087b4 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -133,8 +133,6 @@ pub trait ConfigManager: Send + Sync { #[cfg(test)] mod tests { - use std::convert::TryFrom; - use serde::Serialize; use super::*; diff --git a/components/panic_hook/Cargo.toml b/components/panic_hook/Cargo.toml index 5eebe0a14c7..cfe4030bd08 100644 --- a/components/panic_hook/Cargo.toml +++ b/components/panic_hook/Cargo.toml @@ -1,5 +1,5 @@ [package] name = "panic_hook" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false diff --git a/components/pd_client/Cargo.toml b/components/pd_client/Cargo.toml index 976ad90432a..7be69dd4136 100644 --- a/components/pd_client/Cargo.toml +++ b/components/pd_client/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pd_client" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 36f7aaa983b..35fc00fb631 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -215,6 +215,9 @@ impl RpcClient { }; Box::pin(async move { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &req; let mut resp = handler.await?; check_resp_header(resp.get_header())?; let region = if resp.has_region() { @@ -1143,6 +1146,9 @@ impl MetaStorageClient for RpcClient { futures::future::ready(r).err_into().try_flatten() }; Box::pin(async move { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &req; fail::fail_point!("meta_storage_get", req.key.ends_with(b"rejectme"), |_| { Err(super::Error::Grpc(grpcio::Error::RemoteStopped)) }); diff --git a/components/profiler/Cargo.toml b/components/profiler/Cargo.toml index e5583a631d5..a3382229791 100644 --- a/components/profiler/Cargo.toml +++ b/components/profiler/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "profiler" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index cbccea9dbe0..99540c13aa3 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -2,7 +2,7 @@ name = "raft_log_engine" version = "0.0.1" publish = false -edition = "2018" +edition = "2021" [dependencies] encryption = { workspace = true } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index cbf943800ee..70712e61d7d 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -3,7 +3,7 @@ name = "raftstore" version = "0.0.1" authors = ["The TiKV Authors"] license = "Apache-2.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index d1ba6d4e774..dc54bf1a2d3 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -5351,6 +5351,9 @@ mod tests { reg.apply_state.set_applied_index(3); router.schedule_task(2, Msg::Registration(reg.dup())); validate(&router, 2, move |delegate| { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = ® assert_eq!(delegate.id(), 1); assert_eq!(delegate.peer, peer); assert_eq!(delegate.tag, "[region 2] 1"); diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index f21fccddff5..91c9c9cef6a 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -6061,6 +6061,10 @@ mod tests { fn must_call() -> ExtCallback { let mut d = DropPanic(true); Box::new(move || { + // Must move the entire struct to closure, + // or else it will be dropped early in 2021 edition + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &d; d.0 = false; }) } diff --git a/components/raftstore/src/store/replication_mode.rs b/components/raftstore/src/store/replication_mode.rs index 5f4602cde05..b83aff3d991 100644 --- a/components/raftstore/src/store/replication_mode.rs +++ b/components/raftstore/src/store/replication_mode.rs @@ -329,6 +329,9 @@ mod tests { ); // But a calculated group id can't be changed. let res = panic_hook::recover_safe(move || { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &state; state .group .register_store(1, vec![label1.clone(), label3.clone()]) diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 74fa4d046f1..656f42f3f44 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1861,6 +1861,9 @@ where .pd_client .report_region_buckets(&delta, Duration::from_secs(interval_second)); let f = async move { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = δ if let Err(e) = resp.await { debug!( "failed to send buckets"; diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index db3c0643cb7..61a0118be5d 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "resolved_ts" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 36705f9c015..4e6226f8935 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -62,6 +62,9 @@ impl TestSuite { obs.insert(id, rts_ob.clone()); sim.coprocessor_hooks.entry(id).or_default().push(Box::new( move |host: &mut CoprocessorHost<_>| { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &rts_ob; rts_ob.register_to(host); }, )); diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index f8e26e01c50..068b26483ff 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "resource_metering" version = "0.0.1" -edition = "2018" +edition = "2021" [dependencies] collections = { workspace = true } diff --git a/components/security/Cargo.toml b/components/security/Cargo.toml index fdf7ab8e29e..8a7421be75d 100644 --- a/components/security/Cargo.toml +++ b/components/security/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "security" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 554dbaa63f9..0286a671fa1 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -2,7 +2,7 @@ name = "server" version = "0.0.1" license = "Apache-2.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index 8e2799b7437..28212b6fe36 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sst_importer" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index 1798b50c82b..59300f993e3 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_backup" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/test_coprocessor/Cargo.toml b/components/test_coprocessor/Cargo.toml index 03047d75e87..a09626eedac 100644 --- a/components/test_coprocessor/Cargo.toml +++ b/components/test_coprocessor/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_coprocessor" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/test_coprocessor_plugin/example_plugin/Cargo.toml b/components/test_coprocessor_plugin/example_plugin/Cargo.toml index 6bbc8b25012..854eacbb2c1 100644 --- a/components/test_coprocessor_plugin/example_plugin/Cargo.toml +++ b/components/test_coprocessor_plugin/example_plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "example_coprocessor_plugin" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/components/test_pd/Cargo.toml b/components/test_pd/Cargo.toml index 7747ac1bbc6..811b9e7b0ca 100644 --- a/components/test_pd/Cargo.toml +++ b/components/test_pd/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_pd" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 1662e27f00f..615da206d2c 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -258,6 +258,9 @@ impl Pd for PdMock { ) { let cli = self.etcd_client.clone(); let future = async move { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &req; let mut watcher = match cli .lock() .await diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml index 3b002970236..975d4baff1b 100644 --- a/components/test_pd_client/Cargo.toml +++ b/components/test_pd_client/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_pd_client" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 5c6297c124d..58294e58b34 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_raftstore-v2" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/test_raftstore_macro/Cargo.toml b/components/test_raftstore_macro/Cargo.toml index 7a05f56ed3d..327527f3ae5 100644 --- a/components/test_raftstore_macro/Cargo.toml +++ b/components/test_raftstore_macro/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_raftstore_macro" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/components/test_sst_importer/Cargo.toml b/components/test_sst_importer/Cargo.toml index f951a6755e6..09bdb722de3 100644 --- a/components/test_sst_importer/Cargo.toml +++ b/components/test_sst_importer/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_sst_importer" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false description = "test helpers for sst_importer" diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index b1172b5d559..17fa91f3005 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_storage" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/test_util/Cargo.toml b/components/test_util/Cargo.toml index 64dbb2456ce..b4a23b5eda1 100644 --- a/components/test_util/Cargo.toml +++ b/components/test_util/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "test_util" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/tidb_query_aggr/Cargo.toml b/components/tidb_query_aggr/Cargo.toml index facc9d32f36..f7b0378a173 100644 --- a/components/tidb_query_aggr/Cargo.toml +++ b/components/tidb_query_aggr/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tidb_query_aggr" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false description = "Vector aggr functions of query engine to run TiDB pushed down executors" diff --git a/components/tidb_query_aggr/src/impl_avg.rs b/components/tidb_query_aggr/src/impl_avg.rs index 6337c8de6c5..9872be3bd22 100644 --- a/components/tidb_query_aggr/src/impl_avg.rs +++ b/components/tidb_query_aggr/src/impl_avg.rs @@ -30,8 +30,6 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserAvg { out_schema: &mut Vec, out_exp: &mut Vec, ) -> Result> { - use std::convert::TryFrom; - use tidb_query_datatype::FieldTypeAccessor; assert_eq!(root_expr.get_tp(), ExprType::Avg); diff --git a/components/tidb_query_aggr/src/impl_first.rs b/components/tidb_query_aggr/src/impl_first.rs index b7ccd077598..3eb9a8e04aa 100644 --- a/components/tidb_query_aggr/src/impl_first.rs +++ b/components/tidb_query_aggr/src/impl_first.rs @@ -29,8 +29,6 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserFirst { out_schema: &mut Vec, out_exp: &mut Vec, ) -> Result> { - use std::convert::TryFrom; - use tidb_query_datatype::FieldTypeAccessor; assert_eq!(root_expr.get_tp(), ExprType::First); diff --git a/components/tidb_query_aggr/src/impl_sum.rs b/components/tidb_query_aggr/src/impl_sum.rs index 85f31b8f459..b24657f2475 100644 --- a/components/tidb_query_aggr/src/impl_sum.rs +++ b/components/tidb_query_aggr/src/impl_sum.rs @@ -27,8 +27,6 @@ impl super::parser::AggrDefinitionParser for AggrFnDefinitionParserSum { out_schema: &mut Vec, out_exp: &mut Vec, ) -> Result> { - use std::convert::TryFrom; - use tidb_query_datatype::FieldTypeAccessor; assert_eq!(root_expr.get_tp(), ExprType::Sum); diff --git a/components/tidb_query_aggr/src/impl_variance.rs b/components/tidb_query_aggr/src/impl_variance.rs index 190446c3809..40b85e07f23 100644 --- a/components/tidb_query_aggr/src/impl_variance.rs +++ b/components/tidb_query_aggr/src/impl_variance.rs @@ -71,8 +71,6 @@ impl super::AggrDefinitionParser for AggrFnDefinitionParserVari out_schema: &mut Vec, out_exp: &mut Vec, ) -> Result> { - use std::convert::TryFrom; - use tidb_query_datatype::FieldTypeAccessor; assert!(V::check_expr_type(root_expr.get_tp())); diff --git a/components/tidb_query_aggr/src/util.rs b/components/tidb_query_aggr/src/util.rs index c4ba7a05766..c4361e685a5 100644 --- a/components/tidb_query_aggr/src/util.rs +++ b/components/tidb_query_aggr/src/util.rs @@ -1,7 +1,5 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::convert::TryFrom; - use tidb_query_common::Result; use tidb_query_datatype::{builder::FieldTypeBuilder, EvalType, FieldTypeAccessor, FieldTypeTp}; use tidb_query_expr::{impl_cast::get_cast_fn_rpn_node, RpnExpression, RpnExpressionBuilder}; diff --git a/components/tidb_query_codegen/Cargo.toml b/components/tidb_query_codegen/Cargo.toml index 5379e6ae66d..c3ae8d8106c 100644 --- a/components/tidb_query_codegen/Cargo.toml +++ b/components/tidb_query_codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tidb_query_codegen" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index dfdede3a3b3..33976939c83 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -794,7 +794,7 @@ fn generate_init_metadata_fn( fn generate_downcast_metadata(has_metadata: bool) -> TokenStream { if has_metadata { quote! { - let metadata = std::any::Any::downcast_ref(metadata).expect("downcast metadata error"); + let metadata = ::downcast_ref(metadata).expect("downcast metadata error"); } } else { quote! {} diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index f192b22a5f6..32dee06f46d 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tidb_query_common" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false description = "Common utility of a query engine to run TiDB pushed down executors" @@ -25,4 +25,3 @@ yatp = { workspace = true } [dev-dependencies] byteorder = "1.2" - diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 97fb2d101b6..db25dacc74a 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tidb_query_datatype" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false description = "Data type of a query engine to run TiDB pushed down executors" diff --git a/components/tidb_query_datatype/src/codec/batch/lazy_column.rs b/components/tidb_query_datatype/src/codec/batch/lazy_column.rs index 11d290f9c31..b95b892e3f0 100644 --- a/components/tidb_query_datatype/src/codec/batch/lazy_column.rs +++ b/components/tidb_query_datatype/src/codec/batch/lazy_column.rs @@ -1,7 +1,5 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::convert::TryFrom; - use tikv_util::buffer_vec::BufferVec; use tipb::FieldType; diff --git a/components/tidb_query_datatype/src/codec/chunk/column.rs b/components/tidb_query_datatype/src/codec/chunk/column.rs index ef1c2602864..d308248e4eb 100644 --- a/components/tidb_query_datatype/src/codec/chunk/column.rs +++ b/components/tidb_query_datatype/src/codec/chunk/column.rs @@ -1,7 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::convert::TryFrom; - use codec::{ buffer::{BufferReader, BufferWriter}, number::{NumberDecoder, NumberEncoder}, diff --git a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs index 9ef17dc61eb..4d6ac39c006 100644 --- a/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs +++ b/components/tidb_query_datatype/src/codec/data_type/chunked_vec_json.rs @@ -1,7 +1,5 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::convert::TryFrom; - use super::{bit_vec::BitVec, ChunkRef, ChunkedVec, Json, JsonRef, JsonType, UnsafeRefInto}; use crate::impl_chunked_vec_common; diff --git a/components/tidb_query_datatype/src/def/eval_type.rs b/components/tidb_query_datatype/src/def/eval_type.rs index e6cd7da1b6a..0f04334aff2 100644 --- a/components/tidb_query_datatype/src/def/eval_type.rs +++ b/components/tidb_query_datatype/src/def/eval_type.rs @@ -90,8 +90,6 @@ impl std::convert::TryFrom for EvalType { #[cfg(test)] mod tests { - use std::convert::TryFrom; - use super::*; use crate::{FieldTypeAccessor, FieldTypeTp::*}; diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 3fb3fdca2bb..30fe64252ac 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tidb_query_executors" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false description = "A vector query engine to run TiDB pushed down executors" diff --git a/components/tidb_query_executors/src/util/scan_executor.rs b/components/tidb_query_executors/src/util/scan_executor.rs index be134725de6..1b7ca895f21 100644 --- a/components/tidb_query_executors/src/util/scan_executor.rs +++ b/components/tidb_query_executors/src/util/scan_executor.rs @@ -151,8 +151,6 @@ pub fn field_type_from_column_info(ci: &ColumnInfo) -> FieldType { /// Checks whether the given columns info are supported. pub fn check_columns_info_supported(columns_info: &[ColumnInfo]) -> Result<()> { - use std::convert::TryFrom; - use tidb_query_datatype::{EvalType, FieldTypeAccessor}; for column in columns_info { diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 95f37308e59..8a178401905 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tidb_query_expr" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false description = "Vector expressions of query engine to run TiDB pushed down executors" diff --git a/components/tikv_alloc/Cargo.toml b/components/tikv_alloc/Cargo.toml index 086744cab8f..968969f3332 100644 --- a/components/tikv_alloc/Cargo.toml +++ b/components/tikv_alloc/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tikv_alloc" version = "0.1.0" -edition = "2018" +edition = "2021" authors = ["Brian Anderson "] publish = false diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 8197637243e..1f6fd641cd3 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -3,7 +3,7 @@ name = "tikv_kv" version = "0.1.0" authors = ["The TiKV Authors"] description = "The key-value abstraction directly used by TiKV" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index b501322e152..35c4940ae70 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tikv_util" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [features] diff --git a/components/tipb_helper/Cargo.toml b/components/tipb_helper/Cargo.toml index bfbadabaea3..2954a1f0d4f 100644 --- a/components/tipb_helper/Cargo.toml +++ b/components/tipb_helper/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tipb_helper" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/tracker/Cargo.toml b/components/tracker/Cargo.toml index 84a3f5da0ab..a43dd0c566d 100644 --- a/components/tracker/Cargo.toml +++ b/components/tracker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tracker" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/components/txn_types/Cargo.toml b/components/txn_types/Cargo.toml index 987b7216d22..0db4d0a6dff 100644 --- a/components/txn_types/Cargo.toml +++ b/components/txn_types/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "txn_types" version = "0.1.0" -edition = "2018" +edition = "2021" publish = false [dependencies] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index eed204a2992..a617ae8b693 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -2,7 +2,7 @@ name = "fuzz" version = "0.0.1" publish = false -edition = "2018" +edition = "2021" [[bin]] name = "fuzz" diff --git a/fuzz/targets/Cargo.toml b/fuzz/targets/Cargo.toml index 878ce33aea9..92d8eb48b08 100644 --- a/fuzz/targets/Cargo.toml +++ b/fuzz/targets/Cargo.toml @@ -2,7 +2,7 @@ name = "fuzz-targets" version = "0.0.1" publish = false -edition = "2018" +edition = "2021" [lib] path = "mod.rs" diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index c235c60a4e6..f0151ffc8f5 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -628,6 +628,9 @@ macro_rules! impl_write { }; let writer = rx .try_fold(writer, |mut writer, req| async move { + // Migrated to 2021 migration. This let statement is probably not + // needed, see https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &req; let batch = match req.chunk { Some($chunk_ty::Batch(b)) => b, _ => return Err(Error::InvalidChunk), diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index cd3b711baa8..0acf1de49a3 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1366,6 +1366,9 @@ impl TxnScheduler { // Safety: `self.sched_pool` ensures a TLS engine exists. unsafe { with_tls_engine(|engine: &mut E| { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &to_be_write; // We skip writing the raftstore, but to improve CDC old value hit rate, // we should send the old values to the CDC scheduler. engine.schedule_txn_extra(to_be_write.extra); diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 331575339a5..facaa2eeae9 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tests" version = "0.0.1" -edition = "2018" +edition = "2021" publish = false [[test]] diff --git a/tests/integrations/storage/test_storage.rs b/tests/integrations/storage/test_storage.rs index b0c60ae5aab..0221205454d 100644 --- a/tests/integrations/storage/test_storage.rs +++ b/tests/integrations/storage/test_storage.rs @@ -1237,6 +1237,9 @@ fn test_isolation_inc() { let (punch_card, store, oracle) = (Arc::clone(&punch_card), store.clone(), Arc::clone(&oracle)); threads.push(thread::spawn(move || { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &store; for _ in 0..INC_PER_THREAD { let number = inc(&store.store, &oracle, b"key").unwrap() as usize; let mut punch = punch_card.lock().unwrap(); @@ -1326,6 +1329,9 @@ fn test_isolation_multi_inc() { for _ in 0..THREAD_NUM { let (store, oracle) = (store.clone(), Arc::clone(&oracle)); threads.push(thread::spawn(move || { + // Migrated to 2021 migration. This let statement is probably not needed, see + // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = &store; for _ in 0..INC_PER_THREAD { assert!(inc_multi(&store.store, &oracle, KEY_NUM)); } From 2e3ae25d17793418c9e3407b0be798d3a8fe1813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 18 May 2023 12:29:35 +0800 Subject: [PATCH 0691/1149] sst_importer: load SST start key from sst while cleaning up (#14746) close tikv/tikv#14745 Signed-off-by: hillium --- Cargo.lock | 2 + components/engine_panic/src/sst.rs | 8 +- components/engine_rocks/src/encryption.rs | 6 + components/engine_rocks/src/sst.rs | 17 ++- components/engine_traits/src/sst.rs | 5 +- .../raftstore/src/store/worker/cleanup_sst.rs | 37 +++++- components/sst_importer/Cargo.toml | 6 + components/sst_importer/src/import_file.rs | 122 ++++++++++++++++-- components/sst_importer/src/sst_importer.rs | 17 ++- 9 files changed, 200 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a570532e20c..f2bcf0652b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5762,6 +5762,7 @@ dependencies = [ "dashmap", "encryption", "engine_rocks", + "engine_test", "engine_traits", "error_code", "external_storage_export", @@ -5776,6 +5777,7 @@ dependencies = [ "online_config", "openssl", "prometheus", + "protobuf", "rand 0.8.5", "serde", "serde_derive", diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index a0f1479604c..119cd5884a3 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -1,6 +1,6 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{marker::PhantomData, path::PathBuf}; +use std::{marker::PhantomData, path::PathBuf, sync::Arc}; use engine_traits::{ CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, RefIterable, Result, @@ -21,6 +21,12 @@ impl SstReader for PanicSstReader { fn open(path: &str) -> Result { panic!() } + fn open_encrypted( + path: &str, + mgr: Arc, + ) -> Result { + panic!() + } fn verify_checksum(&self) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 3caf07a0276..99d492c4792 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -31,6 +31,12 @@ pub struct WrappedEncryptionKeyManager { manager: Arc, } +impl WrappedEncryptionKeyManager { + pub fn new(manager: Arc) -> Self { + Self { manager } + } +} + impl DBEncryptionKeyManager for WrappedEncryptionKeyManager { fn get_file(&self, fname: &str) -> Result { self.manager diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 85c30d74a87..145fa9a7bce 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -3,8 +3,8 @@ use std::{path::PathBuf, sync::Arc}; use engine_traits::{ - Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstCompressionType, - SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, + EncryptionKeyManager, Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, + SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, }; use fail::fail_point; use kvproto::import_sstpb::SstMeta; @@ -13,8 +13,11 @@ use rocksdb::{ EnvOptions, ExternalSstFileInfo as RawExternalSstFileInfo, SequentialFile, SstFileReader, SstFileWriter, DB, }; +use tikv_util::box_err; -use crate::{engine::RocksEngine, options::RocksReadOptions, r2e}; +use crate::{ + encryption::WrappedEncryptionKeyManager, engine::RocksEngine, options::RocksReadOptions, r2e, +}; impl SstExt for RocksEngine { type SstReader = RocksSstReader; @@ -63,6 +66,14 @@ impl SstReader for RocksSstReader { fn open(path: &str) -> Result { Self::open_with_env(path, None) } + fn open_encrypted(path: &str, mgr: Arc) -> Result { + let env = Env::new_key_managed_encrypted_env( + Arc::default(), + WrappedEncryptionKeyManager::new(mgr), + ) + .map_err(|err| Error::Other(box_err!("failed to open encrypted env: {}", err)))?; + Self::open_with_env(path, Some(Arc::new(env))) + } fn verify_checksum(&self) -> Result<()> { self.inner.verify_checksum().map_err(r2e)?; Ok(()) diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index ea08df3bb50..4a728df1e97 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -1,10 +1,10 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::path::PathBuf; +use std::{path::PathBuf, sync::Arc}; use kvproto::import_sstpb::SstMeta; -use crate::{errors::Result, RefIterable}; +use crate::{errors::Result, EncryptionKeyManager, RefIterable}; #[derive(Clone, Debug)] pub struct SstMetaInfo { @@ -22,6 +22,7 @@ pub trait SstExt: Sized { /// SstReader is used to read an SST file. pub trait SstReader: RefIterable + Sized { fn open(path: &str) -> Result; + fn open_encrypted(path: &str, mgr: Arc) -> Result; fn verify_checksum(&self) -> Result<()>; } diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 5e58ab77b63..8174b872f4b 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,15 +1,17 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt, marker::PhantomData, sync::Arc}; +use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; use engine_traits::KvEngine; -use kvproto::import_sstpb::SstMeta; +use kvproto::{import_sstpb::SstMeta, metapb::Region}; use pd_client::PdClient; use sst_importer::SstImporter; use tikv_util::{error, worker::Runnable}; use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; +type Result = std::result::Result>; + pub enum Task { DeleteSst { ssts: Vec }, ValidateSst { ssts: Vec }, @@ -64,12 +66,39 @@ where } } + fn get_region_by_meta(&self, sst: &SstMeta) -> Result { + // The SST meta has been delivered with a range, use it directly. + // For now, no case will reach this. But this still could be a guard for + // reducing the superise in the future... + if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { + return self + .pd_client + .get_region(sst.get_range().get_start()) + .map_err(Into::into); + } + // Once there isn't range provided. + let query_by_start_key_of_full_meta = || { + let start_key = self + .importer + .load_start_key_by_meta::(sst)? + .ok_or_else(|| -> Box { + "failed to load start key from sst, the sst might be empty".into() + })?; + let region = self.pd_client.get_region(&start_key)?; + Result::Ok(region) + }; + query_by_start_key_of_full_meta() + .map_err(|err| + format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() + ) + } + /// Validates whether the SST is stale or not. fn handle_validate_sst(&self, ssts: Vec) { let store_id = self.store_id; let mut invalid_ssts = Vec::new(); for sst in ssts { - match self.pd_client.get_region(sst.get_range().get_start()) { + match self.get_region_by_meta(&sst) { Ok(r) => { // The region id may or may not be the same as the // SST file, but it doesn't matter, because the @@ -87,7 +116,7 @@ where invalid_ssts.push(sst); } Err(e) => { - error!(%e; "get region failed"); + error!("get region failed"; "err" => %e); } } } diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index 28212b6fe36..bb4e64657ce 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -12,6 +12,10 @@ cloud-azure = ["external_storage_export/cloud-azure"] cloud-storage-grpc = ["external_storage_export/cloud-storage-grpc"] cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] +test-engines-rocksdb = [ + "engine_test/test-engines-rocksdb", +] + [dependencies] api_version = { workspace = true } crc32fast = "1.2" @@ -32,6 +36,7 @@ log_wrappers = { workspace = true } online_config = { workspace = true } openssl = "0.10" prometheus = { version = "0.13", default-features = false } +protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8" serde = "1.0" serde_derive = "1.0" @@ -45,6 +50,7 @@ txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } [dev-dependencies] +engine_test = { workspace = true } tempfile = "3.0" test_sst_importer = { workspace = true } test_util = { workspace = true } diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 84d2f67bbab..a99c7c0f7e1 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -12,7 +12,8 @@ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ - iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, + iter_option, EncryptionKeyManager, IterOptions, Iterator, KvEngine, RefIterable, SstExt, + SstMetaInfo, SstReader, }; use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; use keys::data_key; @@ -166,11 +167,12 @@ impl ImportFile { fn cleanup(&mut self) -> Result<()> { self.file.take(); - if self.path.temp.exists() { + let path = &self.path.temp; + if path.exists() { if let Some(ref manager) = self.key_manager { - manager.delete_file(self.path.temp.to_str().unwrap())?; + manager.delete_file(path.to_str().unwrap())?; } - file_system::remove_file(&self.path.temp)?; + file_system::remove_file(path)?; } Ok(()) } @@ -413,6 +415,31 @@ impl ImportDir { Ok(()) } + pub fn load_start_key_by_meta( + &self, + meta: &SstMeta, + km: Option>, + ) -> Result>> { + let path = self.join(meta)?; + let r = match km { + Some(km) => E::SstReader::open_encrypted(&path.save.to_string_lossy(), km)?, + None => E::SstReader::open(&path.save.to_string_lossy())?, + }; + let opts = IterOptions::new(None, None, false); + let mut i = r.iter(opts)?; + if !i.seek_to_first()? || !i.valid()? { + return Ok(None); + } + // Should we warn if the key doesn't start with the prefix key? (Is that + // possible?) + // Also note this brings implicit coupling between this and + // RocksEngine. Perhaps it is better to make the engine to provide + // decode functions. Anyway we have directly used the RocksSstReader + // somewhere... This won't make things worse. + let real_key = i.key().strip_prefix(keys::DATA_PREFIX_KEY); + Ok(real_key.map(ToOwned::to_owned)) + } + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { @@ -421,9 +448,9 @@ impl ImportDir { continue; } let path = e.path(); - match path_to_sst_meta(&path) { + match parse_meta_from_path(&path) { Ok(sst) => ssts.push(sst), - Err(e) => error!(%e; "path_to_sst_meta failed"; "path" => %path.to_str().unwrap(),), + Err(e) => error!(%e; "path_to_sst_meta failed"; "path" => %path.display(),), } } Ok(ssts) @@ -444,7 +471,7 @@ pub fn sst_meta_to_path(meta: &SstMeta) -> Result { ))) } -pub fn path_to_sst_meta>(path: P) -> Result { +pub fn parse_meta_from_path>(path: P) -> Result { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -496,7 +523,7 @@ mod test { let expected_path = format!("{}_1_2_3_default.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let new_meta = path_to_sst_meta(path).unwrap(); + let new_meta = parse_meta_from_path(path).unwrap(); assert_eq!(meta, new_meta); } @@ -516,7 +543,84 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let new_meta = path_to_sst_meta(path).unwrap(); + let new_meta = parse_meta_from_path(path).unwrap(); assert_eq!(meta, new_meta); } + + #[cfg(feature = "test-engines-rocksdb")] + fn test_path_with_range_and_km(km: Option) { + use engine_rocks::{RocksEngine, RocksSstWriterBuilder}; + use engine_test::ctor::{CfOptions, DbOptions}; + use engine_traits::{SstWriter, SstWriterBuilder}; + use tempfile::TempDir; + let arcmgr = km.map(Arc::new); + let tmp = TempDir::new().unwrap(); + let dir = ImportDir::new(tmp.path()).unwrap(); + let mut meta = SstMeta::default(); + let mut rng = Range::new(); + rng.set_start(b"hello".to_vec()); + let uuid = Uuid::new_v4(); + meta.set_uuid(uuid.as_bytes().to_vec()); + meta.set_region_id(1); + meta.set_range(rng); + meta.mut_region_epoch().set_conf_ver(222); + meta.mut_region_epoch().set_version(333); + let mut db_opt = DbOptions::default(); + db_opt.set_key_manager(arcmgr.clone()); + let e = engine_test::kv::new_engine_opt( + &tmp.path().join("eng").to_string_lossy(), + db_opt, + vec![(CF_DEFAULT, CfOptions::new())], + ) + .unwrap(); + let f = dir.create(&meta, arcmgr.clone()).unwrap(); + let dp = f.path.clone(); + let mut w = RocksSstWriterBuilder::new() + .set_db(&e) + .set_cf(CF_DEFAULT) + .build(f.path.temp.to_str().unwrap()) + .unwrap(); + w.put(b"zhello", concat!("This is the start key of the SST, ", + "how about some of our users uploads metas with range not aligned with the content of SST?", + "No, at least for now, tidb-lightning won't do so.").as_bytes()).unwrap(); + w.put( + b"zworld", + concat!( + "This is the end key of the SST, ", + "you might notice that all keys have a extra prefix 'z', that was appended by the RocksEngine implementation.", + "It is a little weird that the user key isn't the same in SST. But anyway reasonable. We have bypassed some layers." + ) + .as_bytes(), + ) + .unwrap(); + w.finish().unwrap(); + dp.save(arcmgr.as_deref()).unwrap(); + let mut ssts = dir.list_ssts().unwrap(); + ssts.iter_mut().for_each(|meta| { + let start = dir + .load_start_key_by_meta::(meta, arcmgr.clone()) + .unwrap() + .unwrap(); + meta.mut_range().set_start(start) + }); + assert_eq!(ssts, vec![meta]); + } + + #[test] + #[cfg(feature = "test-engines-rocksdb")] + fn test_path_with_range() { + test_path_with_range_and_km(None) + } + + #[test] + #[cfg(feature = "test-engines-rocksdb")] + fn test_path_with_range_encrypted() { + use tempfile::TempDir; + use test_util::new_test_key_manager; + let dir = TempDir::new().unwrap(); + let enc = new_test_key_manager(&dir, None, None, None) + .unwrap() + .unwrap(); + test_path_with_range_and_km(Some(enc)); + } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 907874c6928..90226668e5f 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -199,8 +199,10 @@ impl SstImporter { "size" => ?memory_limit, ); + let dir = ImportDir::new(root)?; + Ok(SstImporter { - dir: ImportDir::new(root)?, + dir, key_manager, switcher, api_version, @@ -1275,10 +1277,23 @@ impl SstImporter { } } + /// List the basic information of the current SST files. + /// The information contains UUID, region ID, region Epoch. + /// Other fields may be left blank. pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } + /// Load the start key by a metadata. + /// This will open the internal SST and try to load the first user key. + /// (For RocksEngine, that is the key without the 'z' prefix.) + /// When the SST is empty or the first key cannot be parsed as user key, + /// return None. + pub fn load_start_key_by_meta(&self, meta: &SstMeta) -> Result>> { + self.dir + .load_start_key_by_meta::(meta, self.key_manager.clone()) + } + pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { let mut default_meta = meta.clone(); default_meta.set_cf_name(CF_DEFAULT.to_owned()); From 55aed38793548a172842d02b1787ed3f5e07b17c Mon Sep 17 00:00:00 2001 From: jinshayumi <791682161@QQ.COM> Date: Fri, 19 May 2023 10:09:36 +0800 Subject: [PATCH 0692/1149] tikv-ctl: support get range properties under remote mode (#14681) close tikv/tikv#14620 --- Cargo.lock | 2 +- cmd/tikv-ctl/src/executor.rs | 12 ++++- src/server/debug.rs | 32 +++++++------- src/server/debug2.rs | 4 ++ src/server/service/debug.rs | 29 +++++++++++++ tests/integrations/server/kv_service.rs | 58 +++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2bcf0652b3..dfe4894ac51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2930,7 +2930,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#2475f5bed8c358e9e9942546007b962f4530a1a6" +source = "git+https://github.com/pingcap/kvproto.git#6e0e8a7deaa199418f4e8c7e3b63fcb89e153771" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index a0d4a039d2c..4cb28fbbc87 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -865,8 +865,16 @@ impl DebugExecutor for DebugClient { } } - fn dump_range_properties(&self, _: Vec, _: Vec) { - unimplemented!("only available for local mode"); + fn dump_range_properties(&self, start: Vec, end: Vec) { + let mut req = GetRangePropertiesRequest::default(); + req.set_start_key(start); + req.set_end_key(end); + let resp = self + .get_range_properties(&req) + .unwrap_or_else(|e| perror_and_exit("DebugClient::get_range_properties", e)); + for prop in resp.get_properties() { + println!("{}: {}", prop.get_key(), prop.get_value()) + } } fn dump_store_info(&self) { diff --git a/src/server/debug.rs b/src/server/debug.rs index 7ce7c832f48..26d418e6871 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -172,6 +172,8 @@ pub trait Debugger { fn set_kv_statistics(&mut self, s: Option>); fn set_raft_statistics(&mut self, s: Option>); + + fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result>; } #[derive(Clone)] @@ -744,21 +746,6 @@ impl DebuggerImpl { None => Err(Error::NotFound(format!("region {}", region_id))), } } - - pub fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { - let mut props = dump_write_cf_properties( - &self.engines.kv, - &keys::data_key(start), - &keys::data_end_key(end), - )?; - let mut props1 = dump_default_cf_properties( - &self.engines.kv, - &keys::data_key(start), - &keys::data_end_key(end), - )?; - props.append(&mut props1); - Ok(props) - } } impl Debugger for DebuggerImpl { @@ -943,6 +930,21 @@ impl Debugger for DebuggerImpl { Ok(()) } + fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { + let mut props = dump_write_cf_properties( + &self.engines.kv, + &keys::data_key(start), + &keys::data_end_key(end), + )?; + let mut props1 = dump_default_cf_properties( + &self.engines.kv, + &keys::data_key(start), + &keys::data_end_key(end), + )?; + props.append(&mut props1); + Ok(props) + } + fn get_region_properties(&self, region_id: u64) -> Result> { let region_state = self.get_region_state(region_id)?; let region = region_state.get_region(); diff --git a/src/server/debug2.rs b/src/server/debug2.rs index bea3da7ca4a..dfd5f9bab83 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -431,6 +431,10 @@ impl Debugger for DebuggerImplV2 { unimplemented!() } + fn get_range_properties(&self, _: &[u8], _: &[u8]) -> Result> { + unimplemented!() + } + fn reset_to_version(&self, _version: u64) { unimplemented!() } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 7b2a694c99a..e969dfa897f 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -433,6 +433,35 @@ where self.handle_response(ctx, sink, f, TAG); } + fn get_range_properties( + &mut self, + ctx: RpcContext<'_>, + req: GetRangePropertiesRequest, + sink: UnarySink, + ) { + const TAG: &str = "get_range_properties"; + let debugger = self.debugger.clone(); + + let f = + self.pool + .spawn(async move { + debugger.get_range_properties(req.get_start_key(), req.get_end_key()) + }) + .map(|res| res.unwrap()) + .map_ok(|props| { + let mut resp = GetRangePropertiesResponse::default(); + for (key, value) in props { + let mut prop = GetRangePropertiesResponseRangeProperty::default(); + prop.set_key(key); + prop.set_value(value); + resp.mut_properties().push(prop) + } + resp + }); + + self.handle_response(ctx, sink, f, TAG); + } + fn get_store_info( &mut self, ctx: RpcContext<'_>, diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 0866fbcba75..6971808b25a 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -945,6 +945,64 @@ fn test_split_region_impl(is_raw_kv: bool) { ); } +#[test_case(test_raftstore::must_new_cluster_and_debug_client)] +// #[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] +fn test_debug_store() { + let (mut cluster, debug_client, store_id) = new_cluster(); + let cluster_id = cluster.id(); + let req = debugpb::GetClusterInfoRequest::default(); + let resp = debug_client.get_cluster_info(&req).unwrap(); + assert_eq!(resp.get_cluster_id(), cluster_id); + + let req = debugpb::GetStoreInfoRequest::default(); + let resp = debug_client.get_store_info(&req).unwrap(); + assert_eq!(store_id, resp.get_store_id()); + + cluster.must_put(b"a", b"val"); + cluster.must_put(b"c", b"val"); + cluster.flush_data(); + thread::sleep(Duration::from_millis(25)); + assert_eq!(b"val".to_vec(), cluster.must_get(b"a").unwrap()); + assert_eq!(b"val".to_vec(), cluster.must_get(b"c").unwrap()); + + let mut req = debugpb::GetMetricsRequest::default(); + req.set_all(true); + let resp = debug_client.get_metrics(&req).unwrap(); + assert_eq!(store_id, resp.get_store_id()); + assert!(!resp.get_rocksdb_kv().is_empty()); + assert!(resp.get_rocksdb_raft().is_empty()); + + let mut req = debugpb::GetRegionPropertiesRequest::default(); + req.set_region_id(1); + let resp = debug_client.get_region_properties(&req).unwrap(); + resp.get_props() + .iter() + .find(|p| { + p.get_name() == "defaultcf.num_entries" && p.get_value().parse::().unwrap() >= 2 + }) + .unwrap(); + + let req = debugpb::GetRangePropertiesRequest::default(); + let resp = debug_client.get_range_properties(&req).unwrap(); + resp.get_properties() + .iter() + .find(|p| { + p.get_key() == "defaultcf.num_entries" && p.get_value().parse::().unwrap() >= 2 + }) + .unwrap(); + + let mut req = debugpb::GetRangePropertiesRequest::default(); + req.set_start_key(b"d".to_vec()); + req.set_end_key(b"".to_vec()); + let resp = debug_client.get_range_properties(&req).unwrap(); + resp.get_properties() + .iter() + .find(|p| { + p.get_key() == "defaultcf.num_entries" && p.get_value().parse::().unwrap() < 2 + }) + .unwrap(); +} + #[test_case(test_raftstore::must_new_cluster_and_debug_client)] #[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] fn test_debug_get() { From f8e6ffe67431588ff7f35310bb4db57fbc585890 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 19 May 2023 13:47:36 +0800 Subject: [PATCH 0693/1149] raftstore-v2: support flashback region (#14685) close tikv/tikv#14684 raftstore-v2: support flashback region Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/operation/command/admin/flashback.rs | 108 +++++++++ .../src/operation/command/admin/mod.rs | 8 +- .../raftstore-v2/src/operation/command/mod.rs | 42 +++- .../src/operation/query/capture.rs | 15 +- components/raftstore/src/coprocessor/mod.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 43 ++-- components/raftstore/src/store/fsm/peer.rs | 9 +- components/raftstore/src/store/util.rs | 13 +- components/raftstore/src/store/worker/read.rs | 32 ++- components/test_raftstore-v2/src/cluster.rs | 51 ++++- components/test_raftstore-v2/src/server.rs | 14 +- components/test_raftstore/src/cluster.rs | 39 ++-- components/test_raftstore/src/util.rs | 26 +++ src/server/raftkv/mod.rs | 2 +- src/server/raftkv2/mod.rs | 73 +++++- .../integrations/raftstore/test_flashback.rs | 211 ++++++++++++------ tests/integrations/server/kv_service.rs | 40 ++-- 17 files changed, 552 insertions(+), 175 deletions(-) create mode 100644 components/raftstore-v2/src/operation/command/admin/flashback.rs diff --git a/components/raftstore-v2/src/operation/command/admin/flashback.rs b/components/raftstore-v2/src/operation/command/admin/flashback.rs new file mode 100644 index 00000000000..15d9070de45 --- /dev/null +++ b/components/raftstore-v2/src/operation/command/admin/flashback.rs @@ -0,0 +1,108 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use fail::fail_point; +use kvproto::{ + raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}, + raft_serverpb::RegionLocalState, +}; +use protobuf::Message; +use raftstore::{coprocessor::RegionChangeReason, store::metrics::PEER_ADMIN_CMD_COUNTER, Result}; + +use super::AdminCmdResult; +use crate::{ + batch::StoreContext, + fsm::ApplyResReporter, + raft::{Apply, Peer}, +}; + +#[derive(Debug)] +pub struct FlashbackResult { + index: u64, + region_state: RegionLocalState, +} + +impl Peer { + pub fn propose_flashback( + &mut self, + store_ctx: &mut StoreContext, + req: RaftCmdRequest, + ) -> Result { + let data = req.write_to_bytes().unwrap(); + self.propose(store_ctx, data) + } +} + +impl Apply { + pub fn apply_flashback( + &mut self, + index: u64, + req: &AdminRequest, + ) -> Result<(AdminResponse, AdminCmdResult)> { + // Modify flashback fields in region state. + // + // Note: region state is persisted by `Peer::on_apply_res_flashback`. + let region = self.region_state_mut().mut_region(); + match req.get_cmd_type() { + AdminCmdType::PrepareFlashback => { + PEER_ADMIN_CMD_COUNTER.prepare_flashback.success.inc(); + + region.set_is_in_flashback(true); + region.set_flashback_start_ts(req.get_prepare_flashback().get_start_ts()); + } + AdminCmdType::FinishFlashback => { + PEER_ADMIN_CMD_COUNTER.finish_flashback.success.inc(); + + region.set_is_in_flashback(false); + region.clear_flashback_start_ts(); + } + _ => unreachable!(), + } + Ok(( + AdminResponse::default(), + AdminCmdResult::Flashback(FlashbackResult { + index, + region_state: self.region_state().clone(), + }), + )) + } +} + +impl Peer { + // Match v1 on_set_flashback_state. + pub fn on_apply_res_flashback( + &mut self, + store_ctx: &mut StoreContext, + mut res: FlashbackResult, + ) { + (|| { + fail_point!("keep_peer_fsm_flashback_state_false", |_| { + res.region_state.mut_region().set_is_in_flashback(false); + }) + })(); + slog::debug!(self.logger, + "flashback update region"; + "region" => ?res.region_state.get_region()); + let region_id = self.region_id(); + { + let mut meta = store_ctx.store_meta.lock().unwrap(); + meta.set_region(res.region_state.get_region(), true, &self.logger); + let (reader, _) = meta.readers.get_mut(®ion_id).unwrap(); + self.set_region( + &store_ctx.coprocessor_host, + reader, + res.region_state.get_region().clone(), + RegionChangeReason::Flashback, + res.region_state.get_tablet_index(), + ); + } + + self.state_changes_mut() + .put_region_state(region_id, res.index, &res.region_state) + .unwrap(); + self.set_has_extra_write(); + + // Compares to v1, v2 does not expire remote lease, because only + // local reader can serve read requests. + } +} diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 69c9b39aaa2..9f3475a25d3 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -2,6 +2,7 @@ mod compact_log; mod conf_change; +mod flashback; mod merge; mod split; mod transfer_leader; @@ -39,6 +40,7 @@ pub use split::{ use tikv_util::{box_err, log::SlogFormat}; use txn_types::WriteBatchFlags; +use self::flashback::FlashbackResult; use crate::{ batch::StoreContext, raft::Peer, @@ -56,6 +58,7 @@ pub enum AdminCmdResult { UpdateGcPeers(UpdateGcPeersResult), PrepareMerge(PrepareMergeResult), CommitMerge(CommitMergeResult), + Flashback(FlashbackResult), } impl Peer { @@ -264,7 +267,10 @@ impl Peer { } AdminCmdType::PrepareMerge => self.propose_prepare_merge(ctx, req), AdminCmdType::CommitMerge => self.propose_commit_merge(ctx, req), - _ => unimplemented!(), + AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { + self.propose_flashback(ctx, req) + } + _ => unimplemented!("{:?}", req), } }; match &res { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 6643fe1558f..e68449e8026 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -41,7 +41,8 @@ use raftstore::{ APPLY_TASK_WAIT_TIME_HISTOGRAM, APPLY_TIME_HISTOGRAM, STORE_APPLY_LOG_HISTOGRAM, }, msg::ErrorCallback, - util, Config, Transport, WriteCallback, + util::{self, check_flashback_state}, + Config, Transport, WriteCallback, }, Error, Result, }; @@ -193,6 +194,29 @@ impl Peer { } return Err(e); } + // Check whether the region is in the flashback state and the request could be + // proposed. Skip the not prepared error because the + // `self.region().is_in_flashback` may not be the latest right after applying + // the `PrepareFlashback` admin command, we will let it pass here and check in + // the apply phase and because a read-only request doesn't need to be applied, + // so it will be allowed during the flashback progress, for example, a snapshot + // request. + if let Err(e) = util::check_flashback_state( + self.region().get_is_in_flashback(), + self.region().get_flashback_start_ts(), + header, + admin_type, + self.region_id(), + true, + ) { + match e { + Error::FlashbackInProgress(..) => { + metrics.invalid_proposal.flashback_in_progress.inc() + } + _ => unreachable!("{:?}", e), + } + return Err(e); + } Ok(()) } @@ -372,6 +396,7 @@ impl Peer { AdminCmdResult::UpdateGcPeers(state) => self.on_apply_res_update_gc_peers(state), AdminCmdResult::PrepareMerge(res) => self.on_apply_res_prepare_merge(ctx, res), AdminCmdResult::CommitMerge(res) => self.on_apply_res_commit_merge(ctx, res), + AdminCmdResult::Flashback(res) => self.on_apply_res_flashback(ctx, res), } } self.region_buckets_info_mut() @@ -639,6 +664,16 @@ impl Apply { }; util::check_req_region_epoch(&req, self.region(), true)?; + let header = req.get_header(); + let admin_type = req.admin_request.as_ref().map(|req| req.get_cmd_type()); + check_flashback_state( + self.region().get_is_in_flashback(), + self.region().get_flashback_start_ts(), + header, + admin_type, + self.region_id(), + false, + )?; if req.has_admin_request() { let admin_req = req.get_admin_request(); let (admin_resp, admin_result) = match req.get_admin_request().get_cmd_type() { @@ -659,8 +694,9 @@ impl Apply { } AdminCmdType::ComputeHash => unimplemented!(), AdminCmdType::VerifyHash => unimplemented!(), - AdminCmdType::PrepareFlashback => unimplemented!(), - AdminCmdType::FinishFlashback => unimplemented!(), + AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { + self.apply_flashback(log_index, admin_req)? + } AdminCmdType::BatchSwitchWitness => unimplemented!(), AdminCmdType::UpdateGcPeer => self.apply_update_gc_peer(log_index, admin_req), AdminCmdType::InvalidAdmin => { diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 4f3d58424b9..9debb8e0364 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -19,6 +19,7 @@ use raftstore::{ }, }; use slog::info; +use txn_types::WriteBatchFlags; use crate::{ fsm::{ApplyResReporter, PeerFsmDelegate}, @@ -31,20 +32,26 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> { pub fn on_leader_callback(&mut self, ch: QueryResChannel) { let peer = self.fsm.peer(); - let msg = new_read_index_request( + let mut msg = new_read_index_request( peer.region_id(), peer.region().get_region_epoch().clone(), peer.peer().clone(), ); + + // Allow to capture change even is in flashback state. + // TODO: add a test case for this kind of situation. + if self.fsm.peer().region().get_is_in_flashback() { + let mut flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); + flags.insert(WriteBatchFlags::FLASHBACK); + msg.mut_header().set_flags(flags.bits()); + } + self.on_query(msg, ch); } pub fn on_capture_change(&mut self, capture_change: CaptureChange) { fail_point!("raft_on_capture_change"); - // TODO: Allow to capture change even is in flashback state. - // TODO: add a test case for this kind of situation. - let apply_router = self.fsm.peer().apply_scheduler().unwrap().clone(); let (ch, _) = QueryResChannel::with_callback(Box::new(move |res| { if let QueryResult::Response(resp) = res && resp.get_header().has_error() { diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index f5bdd8664e6..7c84b09ce7e 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -305,6 +305,7 @@ pub enum RegionChangeReason { CommitMerge, RollbackMerge, SwitchWitness, + Flashback, } #[derive(Clone, Copy, Debug, PartialEq)] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index dc54bf1a2d3..e61ee006c83 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -306,7 +306,7 @@ pub enum ExecResult { TransferLeader { term: u64, }, - SetFlashbackState { + Flashback { region: Region, }, BatchSwitchWitness(SwitchWitness), @@ -1496,7 +1496,7 @@ where ExecResult::CommitMerge { ref region, .. } => (Some(region.clone()), None), ExecResult::RollbackMerge { ref region, .. } => (Some(region.clone()), None), ExecResult::IngestSst { ref ssts } => (None, Some(ssts.clone())), - ExecResult::SetFlashbackState { ref region } => (Some(region.clone()), None), + ExecResult::Flashback { ref region } => (Some(region.clone()), None), _ => (None, None), }, _ => (None, None), @@ -1565,7 +1565,7 @@ where self.region = region.clone(); self.is_merging = false; } - ExecResult::SetFlashbackState { ref region } => { + ExecResult::Flashback { ref region } => { self.region = region.clone(); } ExecResult::BatchSwitchWitness(ref switches) => { @@ -1662,10 +1662,13 @@ where let include_region = req.get_header().get_region_epoch().get_version() >= self.last_merge_version; check_req_region_epoch(req, &self.region, include_region)?; + let header = req.get_header(); + let admin_type = req.admin_request.as_ref().map(|req| req.get_cmd_type()); check_flashback_state( self.region.is_in_flashback, self.region.flashback_start_ts, - req, + header, + admin_type, self.region_id(), false, )?; @@ -2986,31 +2989,37 @@ where ctx: &mut ApplyContext, req: &AdminRequest, ) -> Result<(AdminResponse, ApplyResult)> { - let is_in_flashback = req.get_cmd_type() == AdminCmdType::PrepareFlashback; - // Modify the region meta in memory. + // Modify flashback fields in region state. let mut region = self.region.clone(); - region.set_is_in_flashback(is_in_flashback); - region.set_flashback_start_ts(req.get_prepare_flashback().get_start_ts()); - // Modify the `RegionLocalState` persisted in disk. - write_peer_state(ctx.kv_wb_mut(), ®ion, PeerState::Normal, None).unwrap_or_else(|e| { - panic!( - "{} failed to change the flashback state to {} for region {:?}: {:?}", - self.tag, is_in_flashback, region, e - ) - }); - match req.get_cmd_type() { AdminCmdType::PrepareFlashback => { PEER_ADMIN_CMD_COUNTER.prepare_flashback.success.inc(); + + region.set_is_in_flashback(true); + region.set_flashback_start_ts(req.get_prepare_flashback().get_start_ts()); } AdminCmdType::FinishFlashback => { PEER_ADMIN_CMD_COUNTER.finish_flashback.success.inc(); + + region.set_is_in_flashback(false); + region.clear_flashback_start_ts(); } _ => unreachable!(), } + + // Modify the `RegionLocalState` persisted in disk. + write_peer_state(ctx.kv_wb_mut(), ®ion, PeerState::Normal, None).unwrap_or_else(|e| { + panic!( + "{} failed to change the flashback state to {:?} for region {:?}: {:?}", + self.tag, + req.get_cmd_type(), + region, + e + ) + }); Ok(( AdminResponse::default(), - ApplyResult::Res(ExecResult::SetFlashbackState { region }), + ApplyResult::Res(ExecResult::Flashback { region }), )) } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 72eb3c59753..910a08c3a0b 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5012,7 +5012,7 @@ where } ExecResult::IngestSst { ssts } => self.on_ingest_sst_result(ssts), ExecResult::TransferLeader { term } => self.on_transfer_leader(term), - ExecResult::SetFlashbackState { region } => self.on_set_flashback_state(region), + ExecResult::Flashback { region } => self.on_set_flashback_state(region), ExecResult::BatchSwitchWitness(switches) => { self.on_ready_batch_switch_witness(switches) } @@ -5260,10 +5260,13 @@ where // the apply phase and because a read-only request doesn't need to be applied, // so it will be allowed during the flashback progress, for example, a snapshot // request. + let header = msg.get_header(); + let admin_type = msg.admin_request.as_ref().map(|req| req.get_cmd_type()); if let Err(e) = util::check_flashback_state( self.region().is_in_flashback, self.region().flashback_start_ts, - msg, + header, + admin_type, region_id, true, ) { @@ -5280,7 +5283,7 @@ where .invalid_proposal .flashback_not_prepared .inc(), - _ => unreachable!(), + _ => unreachable!("{:?}", e), } return Err(e); } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index f5a23538ad5..539dfa22403 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -344,27 +344,28 @@ pub fn compare_region_epoch( pub fn check_flashback_state( is_in_flashback: bool, flashback_start_ts: u64, - req: &RaftCmdRequest, + header: &RaftRequestHeader, + admin_type: Option, region_id: u64, skip_not_prepared: bool, ) -> Result<()> { // The admin flashback cmd could be proposed/applied under any state. - if req.has_admin_request() - && (req.get_admin_request().get_cmd_type() == AdminCmdType::PrepareFlashback - || req.get_admin_request().get_cmd_type() == AdminCmdType::FinishFlashback) + if let Some(ty) = admin_type + && (ty == AdminCmdType::PrepareFlashback + || ty == AdminCmdType::FinishFlashback) { return Ok(()); } // TODO: only use `flashback_start_ts` to check flashback state. let is_in_flashback = is_in_flashback || flashback_start_ts > 0; - let is_flashback_request = WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + let is_flashback_request = WriteBatchFlags::from_bits_truncate(header.get_flags()) .contains(WriteBatchFlags::FLASHBACK); // If the region is in the flashback state: // - A request with flashback flag will be allowed. // - A read request whose `read_ts` is smaller than `flashback_start_ts` will // be allowed. if is_in_flashback && !is_flashback_request { - if let Ok(read_ts) = decode_u64(&mut req.get_header().get_flag_data()) { + if let Ok(read_ts) = decode_u64(&mut header.get_flag_data()) { if read_ts != 0 && read_ts < flashback_start_ts { return Ok(()); } diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 49171123f4a..488d24ac134 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -832,20 +832,32 @@ where // be performed. let is_in_flashback = delegate.region.is_in_flashback; let flashback_start_ts = delegate.region.flashback_start_ts; - if let Err(e) = - util::check_flashback_state(is_in_flashback, flashback_start_ts, req, region_id, false) - { - TLS_LOCAL_READ_METRICS.with(|m| match e { + let header = req.get_header(); + let admin_type = req.admin_request.as_ref().map(|req| req.get_cmd_type()); + if let Err(e) = util::check_flashback_state( + is_in_flashback, + flashback_start_ts, + header, + admin_type, + region_id, + true, + ) { + debug!("rejected by flashback state"; + "error" => ?e, + "is_in_flashback" => is_in_flashback, + "tag" => &delegate.tag); + match e { Error::FlashbackNotPrepared(_) => { - m.borrow_mut().reject_reason.flashback_not_prepared.inc() + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().reject_reason.flashback_not_prepared.inc()); } Error::FlashbackInProgress(..) => { - m.borrow_mut().reject_reason.flashback_in_progress.inc() + TLS_LOCAL_READ_METRICS + .with(|m| m.borrow_mut().reject_reason.flashback_in_progress.inc()); } - _ => unreachable!(), - }); - debug!("rejected by flashback state"; "is_in_flashback" => is_in_flashback, "tag" => &delegate.tag); - return Ok(None); + _ => unreachable!("{:?}", e), + }; + return Err(e); } Ok(Some(delegate)) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 89971ca1c80..25a7e2ab6e2 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -26,8 +26,8 @@ use kvproto::{ kvrpcpb::ApiVersion, metapb::{self, Buckets, PeerRole, RegionEpoch}, raft_cmdpb::{ - AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RegionDetailResponse, Request, - Response, StatusCmdType, + AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftCmdResponse, RegionDetailResponse, + Request, Response, StatusCmdType, }, raft_serverpb::{ PeerState, RaftApplyState, RaftLocalState, RaftMessage, RaftTruncatedState, @@ -68,6 +68,7 @@ use tikv_util::{ worker::LazyWorker, HandyRwLock, }; +use txn_types::WriteBatchFlags; // We simulate 3 or 5 nodes, each has a store. // Sometimes, we use fixed id to test, which means the id @@ -717,7 +718,7 @@ impl, EK: KvEngine> Cluster { // mixed read and write requests are not supportted pub fn call_command( - &mut self, + &self, request: RaftCmdRequest, timeout: Duration, ) -> Result { @@ -870,7 +871,7 @@ impl, EK: KvEngine> Cluster { } pub fn query_leader( - &mut self, + &self, store_id: u64, region_id: u64, timeout: Duration, @@ -1673,6 +1674,48 @@ impl, EK: KvEngine> Cluster { debug!("all nodes are shut down."); } + + pub fn must_send_flashback_msg( + &mut self, + region_id: u64, + cmd_type: AdminCmdType, + ) -> BoxFuture<'static, RaftCmdResponse> { + let leader = self.leader_of_region(region_id).unwrap(); + let store_id = leader.get_store_id(); + let region_epoch = self.get_region_epoch(region_id); + let mut admin = AdminRequest::default(); + admin.set_cmd_type(cmd_type); + let mut req = RaftCmdRequest::default(); + req.mut_header().set_region_id(region_id); + req.mut_header().set_region_epoch(region_epoch); + req.mut_header().set_peer(leader); + req.set_admin_request(admin); + req.mut_header() + .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let (msg, sub) = PeerMsg::admin_command(req); + let router = self.sim.rl().get_router(store_id).unwrap(); + if let Err(e) = router.send(region_id, msg) { + panic!( + "router send flashback msg {:?} failed, error: {}", + cmd_type, e + ); + } + Box::pin(async move { sub.result().await.unwrap() }) + } + + pub fn must_send_wait_flashback_msg(&mut self, region_id: u64, cmd_type: AdminCmdType) { + let resp = self.must_send_flashback_msg(region_id, cmd_type); + block_on(async { + let resp = resp.await; + if resp.get_header().has_error() { + panic!( + "call flashback msg {:?} failed, error: {:?}", + cmd_type, + resp.get_header().get_error() + ); + } + }); + } } pub fn bootstrap_store( diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 38bc43af526..c7a8234c9f1 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -15,7 +15,7 @@ use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; -use futures::{executor::block_on, Future}; +use futures::{executor::block_on, future::BoxFuture, Future}; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; use kvproto::{ @@ -165,6 +165,18 @@ impl Engine for TestRaftKv2 { fn schedule_txn_extra(&self, txn_extra: txn_types::TxnExtra) { self.raftkv.schedule_txn_extra(txn_extra) } + + fn start_flashback( + &self, + ctx: &Context, + start_ts: u64, + ) -> BoxFuture<'static, storage::kv::Result<()>> { + self.raftkv.start_flashback(ctx, start_ts) + } + + fn end_flashback(&self, ctx: &Context) -> BoxFuture<'static, storage::kv::Result<()>> { + self.raftkv.end_flashback(ctx) + } } #[derive(Clone)] diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 7ea8ee81f8f..c916ec7448e 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -19,7 +19,7 @@ use engine_traits::{ WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; -use futures::{self, channel::oneshot, executor::block_on}; +use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture}; use kvproto::{ errorpb::Error as PbError, kvrpcpb::{ApiVersion, Context, DiskFullOpt}, @@ -1488,8 +1488,7 @@ impl Cluster { &mut self, region_id: u64, cmd_type: AdminCmdType, - cb: Callback, - ) { + ) -> BoxFuture<'static, RaftCmdResponse> { let leader = self.leader_of_region(region_id).unwrap(); let store_id = leader.get_store_id(); let region_epoch = self.get_region_epoch(region_id); @@ -1502,10 +1501,13 @@ impl Cluster { req.set_admin_request(admin); req.mut_header() .set_flags(WriteBatchFlags::FLASHBACK.bits()); + let (result_tx, result_rx) = oneshot::channel(); let router = self.sim.rl().get_router(store_id).unwrap(); if let Err(e) = router.send_command( req, - cb, + Callback::write(Box::new(move |resp| { + result_tx.send(resp.response).unwrap(); + })), RaftCmdExtraOpts { deadline: None, disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, @@ -1516,27 +1518,22 @@ impl Cluster { cmd_type, e ); } + Box::pin(async move { result_rx.await.unwrap() }) } pub fn must_send_wait_flashback_msg(&mut self, region_id: u64, cmd_type: AdminCmdType) { self.wait_applied_to_current_term(region_id, Duration::from_secs(3)); - let (result_tx, result_rx) = oneshot::channel(); - self.must_send_flashback_msg( - region_id, - cmd_type, - Callback::write(Box::new(move |resp| { - if resp.response.get_header().has_error() { - result_tx - .send(Some(resp.response.get_header().get_error().clone())) - .unwrap(); - return; - } - result_tx.send(None).unwrap(); - })), - ); - if let Some(e) = block_on(result_rx).unwrap() { - panic!("call flashback msg {:?} failed, error: {:?}", cmd_type, e); - } + let resp = self.must_send_flashback_msg(region_id, cmd_type); + block_on(async { + let resp = resp.await; + if resp.get_header().has_error() { + panic!( + "call flashback msg {:?} failed, error: {:?}", + cmd_type, + resp.get_header().get_error() + ); + } + }); } pub fn wait_applied_to_current_term(&mut self, region_id: u64, timeout: Duration) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 45b4a98456a..079e3abf1ef 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -873,6 +873,32 @@ pub fn must_kv_read_equal(client: &TikvClient, ctx: Context, key: Vec, val: assert_eq!(get_resp.take_value(), val); } +pub fn must_kv_read_not_found(client: &TikvClient, ctx: Context, key: Vec, ts: u64) { + let mut get_req = GetRequest::default(); + get_req.set_context(ctx); + get_req.set_key(key); + get_req.set_version(ts); + + for _ in 1..250 { + let get_resp = client.kv_get(&get_req).unwrap(); + if get_resp.has_region_error() || get_resp.has_error() { + thread::sleep(Duration::from_millis(20)); + } else if get_resp.get_not_found() { + return; + } + } + + // Last try + let get_resp = client.kv_get(&get_req).unwrap(); + assert!( + !get_resp.has_region_error(), + "{:?}", + get_resp.get_region_error() + ); + assert!(!get_resp.has_error(), "{:?}", get_resp.get_error()); + assert!(get_resp.get_not_found()); +} + pub fn write_and_read_key( client: &TikvClient, ctx: &Context, diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 8cd9c2eed9e..039f987c398 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -117,7 +117,7 @@ where Snap(RegionSnapshot), } -fn check_raft_cmd_response(resp: &mut RaftCmdResponse) -> Result<()> { +pub fn check_raft_cmd_response(resp: &mut RaftCmdResponse) -> Result<()> { if resp.get_header().has_error() { return Err(Error::RequestFailed(resp.take_header().take_error())); } diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 6e92420e622..d4a158bffda 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -12,8 +12,11 @@ use std::{ use collections::HashSet; use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; -use futures::{Future, Stream, StreamExt}; -use kvproto::raft_cmdpb::{CmdType, RaftCmdRequest, Request}; +use futures::{future::BoxFuture, Future, Stream, StreamExt}; +use kvproto::{ + kvrpcpb::Context, + raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, Request}, +}; pub use node::NodeV2; pub use raft_extension::Extension; use raftstore::store::{util::encode_start_ts_into_flag_data, RegionSnapshot}; @@ -29,7 +32,10 @@ use txn_types::{TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::{ metrics::{ASYNC_REQUESTS_COUNTER_VEC, ASYNC_REQUESTS_DURATIONS_VEC}, - raftkv::{get_status_kind_from_engine_error, new_request_header}, + raftkv::{ + check_raft_cmd_response, get_status_kind_from_engine_error, new_flashback_req, + new_request_header, + }, }; struct Transform { @@ -169,10 +175,9 @@ impl tikv_kv::Engine for RaftKv2 { if ctx.pb_ctx.get_stale_read() && need_encoded_start_ts { flags |= WriteBatchFlags::STALE_READ.bits(); } - // TODO: flashback is not supported yet. - // if ctx.allowed_in_flashback { - // flags |= WriteBatchFlags::FLASHBACK.bits(); - // } + if ctx.allowed_in_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } header.set_flags(flags); // Encode `start_ts` in `flag_data` for the check of stale read and flashback. if need_encoded_start_ts { @@ -235,10 +240,9 @@ impl tikv_kv::Engine for RaftKv2 { if batch.extra.one_pc { flags |= WriteBatchFlags::ONE_PC.bits(); } - // TODO: flashback is not supported yet. - // if batch.extra.allowed_in_flashback { - // flags |= WriteBatchFlags::FLASHBACK.bits(); - // } + if batch.extra.allowed_in_flashback { + flags |= WriteBatchFlags::FLASHBACK.bits(); + } header.set_flags(flags); self.schedule_txn_extra(batch.extra); @@ -313,4 +317,51 @@ impl tikv_kv::Engine for RaftKv2 { } } } + + fn start_flashback( + &self, + ctx: &Context, + start_ts: u64, + ) -> BoxFuture<'static, tikv_kv::Result<()>> { + // Send an `AdminCmdType::PrepareFlashback` to prepare the raftstore for the + // later flashback. Once invoked, we will update the persistent region meta and + // the memory state of the flashback in Peer FSM to reject all read, write + // and scheduling operations for this region when propose/apply before we + // start the actual data flashback transaction command in the next phase. + let mut req = new_flashback_req(ctx, AdminCmdType::PrepareFlashback); + req.mut_admin_request() + .mut_prepare_flashback() + .set_start_ts(start_ts); + exec_admin(&self.router, req) + } + + fn end_flashback(&self, ctx: &Context) -> BoxFuture<'static, tikv_kv::Result<()>> { + // Send an `AdminCmdType::FinishFlashback` to unset the persistence state + // in `RegionLocalState` and region's meta, and when that admin cmd is applied, + // will update the memory state of the flashback + let req = new_flashback_req(ctx, AdminCmdType::FinishFlashback); + exec_admin(&self.router, req) + } +} + +fn exec_admin( + router: &RaftRouter, + req: RaftCmdRequest, +) -> BoxFuture<'static, tikv_kv::Result<()>> { + let region_id = req.get_header().get_region_id(); + let admin_type = req.get_admin_request().get_cmd_type(); + let (msg, sub) = PeerMsg::admin_command(req); + let res = router.check_send(region_id, msg); + Box::pin(async move { + res?; + let mut resp = sub.result().await.ok_or_else(|| -> tikv_kv::Error { + box_err!( + "region {} exec_admin {:?} without response", + region_id, + admin_type + ) + })?; + check_raft_cmd_response(&mut resp)?; + Ok(()) + }) } diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index eec5ea9b94c..0b703cf32dd 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -5,40 +5,58 @@ use std::{ time::{Duration, Instant}, }; -use futures::{channel::oneshot, executor::block_on}; +use engine_rocks::RocksEngine; +use futures::executor::block_on; use kvproto::{ errorpb::FlashbackInProgress, metapb, - raft_cmdpb::{AdminCmdType, CmdType, RaftCmdResponse, Request}, + raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, Request}, + raft_serverpb::RegionLocalState, }; use raftstore::store::Callback; use test_raftstore::*; +use test_raftstore_macro::test_case; use txn_types::WriteBatchFlags; const TEST_KEY: &[u8] = b"k1"; const TEST_VALUE: &[u8] = b"v1"; -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_allow_read_only_request() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(30)); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_put(TEST_KEY, TEST_VALUE); let mut region = cluster.get_region(TEST_KEY); - let mut snap_req = Request::default(); - snap_req.set_cmd_type(CmdType::Snap); - // Get snapshot normally. - let snap_resp = request(&mut cluster, &mut region.clone(), snap_req.clone(), false); - assert!(!snap_resp.get_header().has_error()); - // Get snapshot with flashback flag without in the flashback state. - let snap_resp = request(&mut cluster, &mut region.clone(), snap_req.clone(), true); - assert!(!snap_resp.get_header().has_error()); - // Get snapshot with flashback flag with in the flashback state. + let mut get_req = Request::default(); + get_req.set_cmd_type(CmdType::Get); + // Get normally. + let snap_resp = request(&mut cluster, &mut region.clone(), get_req.clone(), false); + assert!( + !snap_resp.get_header().has_error(), + "{:?}", + snap_resp.get_header() + ); + // Get with flashback flag without in the flashback state. + let snap_resp = request(&mut cluster, &mut region.clone(), get_req.clone(), true); + assert!( + !snap_resp.get_header().has_error(), + "{:?}", + snap_resp.get_header() + ); + // Get with flashback flag with in the flashback state. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); - let snap_resp = request(&mut cluster, &mut region.clone(), snap_req.clone(), true); - assert!(!snap_resp.get_header().has_error()); - // Get snapshot without flashback flag with in the flashback state. - let snap_resp = request(&mut cluster, &mut region, snap_req, false); + let snap_resp = request(&mut cluster, &mut region.clone(), get_req.clone(), true); + assert!( + !snap_resp.get_header().has_error(), + "{:?}", + snap_resp.get_header() + ); + // Get without flashback flag with in the flashback state. + let snap_resp = request(&mut cluster, &mut region, get_req, false); assert!( snap_resp .get_header() @@ -51,10 +69,11 @@ fn test_allow_read_only_request() { cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); } -#[test] #[cfg(feature = "failpoints")] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_after_prepare_flashback() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -73,7 +92,9 @@ fn test_read_after_prepare_flashback() { cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); } -#[test] +#[cfg(feature = "failpoints")] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_prepare_flashback_after_split() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -98,23 +119,13 @@ fn test_prepare_flashback_after_split() { // Make sure the admin split cmd is ready. sleep(Duration::from_millis(100)); // Send the prepare flashback msg. - let (result_tx, result_rx) = oneshot::channel(); - cluster.must_send_flashback_msg( - old_region.get_id(), - AdminCmdType::PrepareFlashback, - Callback::write(Box::new(move |resp| { - if resp.response.get_header().has_error() { - result_tx - .send(Some(resp.response.get_header().get_error().clone())) - .unwrap(); - return; - } - result_tx.send(None).unwrap(); - })), - ); + let resp = cluster.must_send_flashback_msg(old_region.get_id(), AdminCmdType::PrepareFlashback); // Remove the pause to make these two commands are in the same batch to apply. fail::remove(on_handle_apply_fp); - let prepare_flashback_err = block_on(result_rx).unwrap().unwrap(); + let prepare_flashback_err = block_on(async { + let resp = resp.await; + resp.get_header().get_error().clone() + }); assert!( prepare_flashback_err.has_epoch_not_match(), "prepare flashback should fail with epoch not match, but got {:?}", @@ -133,7 +144,9 @@ fn test_prepare_flashback_after_split() { must_check_flashback_state(&mut cluster, right_region.get_id(), 1, false); } -#[test] +// #[cfg(feature = "failpoints")] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_prepare_flashback_after_conf_change() { let mut cluster = new_node_cluster(0, 3); // Disable default max peer count check. @@ -150,23 +163,13 @@ fn test_prepare_flashback_after_conf_change() { // Make sure the conf change cmd is ready. sleep(Duration::from_millis(100)); // Send the prepare flashback msg. - let (result_tx, result_rx) = oneshot::channel(); - cluster.must_send_flashback_msg( - region_id, - AdminCmdType::PrepareFlashback, - Callback::write(Box::new(move |resp| { - if resp.response.get_header().has_error() { - result_tx - .send(Some(resp.response.get_header().get_error().clone())) - .unwrap(); - return; - } - result_tx.send(None).unwrap(); - })), - ); + let resp = cluster.must_send_flashback_msg(region_id, AdminCmdType::PrepareFlashback); // Remove the pause to make these two commands are in the same batch to apply. fail::remove(on_handle_apply_fp); - let prepare_flashback_err = block_on(result_rx).unwrap().unwrap(); + let prepare_flashback_err = block_on(async { + let resp = resp.await; + resp.get_header().get_error().clone() + }); assert!( prepare_flashback_err.has_epoch_not_match(), "prepare flashback should fail with epoch not match, but got {:?}", @@ -179,7 +182,8 @@ fn test_prepare_flashback_after_conf_change() { must_check_flashback_state(&mut cluster, region_id, 1, false); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_unprepared() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -193,9 +197,10 @@ fn test_flashback_unprepared() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_for_schedule() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); cluster.must_transfer_leader(1, new_peer(2, 2)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -224,7 +229,8 @@ fn test_flashback_for_schedule() { cluster.must_transfer_leader(1, new_peer(2, 2)); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_for_write() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -259,7 +265,8 @@ fn test_flashback_for_write() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_for_read() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -341,7 +348,8 @@ fn test_flashback_for_local_read() { must_request_with_flashback_flag(&mut cluster, &mut region, new_get_cmd(TEST_KEY)); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_for_status_cmd_as_region_detail() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -365,7 +373,8 @@ fn test_flashback_for_status_cmd_as_region_detail() { assert_eq!(region_detail.get_leader(), &leader); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_for_check_is_in_persist() { let mut cluster = new_node_cluster(0, 3); cluster.run(); @@ -381,7 +390,8 @@ fn test_flashback_for_check_is_in_persist() { must_check_flashback_state(&mut cluster, 1, 2, false); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_flashback_for_apply_snapshot() { let mut cluster = new_node_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); @@ -450,8 +460,67 @@ fn test_flashback_for_apply_snapshot() { ); } -fn must_check_flashback_state( - cluster: &mut Cluster, +trait ClusterI { + fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState; + fn query_leader( + &self, + store_id: u64, + region_id: u64, + timeout: Duration, + ) -> Option; + fn call_command( + &self, + request: RaftCmdRequest, + timeout: Duration, + ) -> raftstore::Result; +} + +impl ClusterI for Cluster { + fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { + Cluster::::region_local_state(self, region_id, store_id) + } + fn query_leader( + &self, + store_id: u64, + region_id: u64, + timeout: Duration, + ) -> Option { + Cluster::::query_leader(self, store_id, region_id, timeout) + } + fn call_command( + &self, + request: RaftCmdRequest, + timeout: Duration, + ) -> raftstore::Result { + Cluster::::call_command(self, request, timeout) + } +} + +type ClusterV2 = + test_raftstore_v2::Cluster, RocksEngine>; +impl ClusterI for ClusterV2 { + fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { + ClusterV2::region_local_state(self, region_id, store_id) + } + fn query_leader( + &self, + store_id: u64, + region_id: u64, + timeout: Duration, + ) -> Option { + ClusterV2::query_leader(self, store_id, region_id, timeout) + } + fn call_command( + &self, + request: RaftCmdRequest, + timeout: Duration, + ) -> raftstore::Result { + ClusterV2::call_command(self, request, timeout) + } +} + +fn must_check_flashback_state( + cluster: &mut T, region_id: u64, store_id: u64, is_in_flashback: bool, @@ -473,8 +542,8 @@ fn must_check_flashback_state( ); } -fn request( - cluster: &mut Cluster, +fn request( + cluster: &mut T, region: &mut metapb::Region, req: Request, with_flashback_flag: bool, @@ -497,8 +566,8 @@ fn request( } // Make sure the request could be executed with flashback flag. -fn must_request_with_flashback_flag( - cluster: &mut Cluster, +fn must_request_with_flashback_flag( + cluster: &mut T, region: &mut metapb::Region, req: Request, ) { @@ -506,8 +575,8 @@ fn must_request_with_flashback_flag( assert!(!resp.get_header().has_error(), "{:?}", resp); } -fn must_get_flashback_not_prepared_error( - cluster: &mut Cluster, +fn must_get_flashback_not_prepared_error( + cluster: &mut T, region: &mut metapb::Region, req: Request, ) { @@ -516,8 +585,8 @@ fn must_get_flashback_not_prepared_error( } // Make sure the request could be executed without flashback flag. -fn must_request_without_flashback_flag( - cluster: &mut Cluster, +fn must_request_without_flashback_flag( + cluster: &mut T, region: &mut metapb::Region, req: Request, ) { @@ -525,8 +594,8 @@ fn must_request_without_flashback_flag( assert!(!resp.get_header().has_error(), "{:?}", resp); } -fn must_get_flashback_in_progress_error( - cluster: &mut Cluster, +fn must_get_flashback_in_progress_error( + cluster: &mut T, region: &mut metapb::Region, req: Request, ) { diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 6971808b25a..57ed2c258db 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -556,6 +556,7 @@ fn test_mvcc_resolve_lock_gc_and_delete() { } #[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] #[cfg(feature = "failpoints")] fn test_mvcc_flashback_failed_after_first_batch() { let (_cluster, client, ctx) = new_cluster(); @@ -677,6 +678,7 @@ fn test_mvcc_flashback_failed_after_first_batch() { } #[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_flashback() { let (_cluster, client, ctx) = new_cluster(); let mut ts = 0; @@ -719,10 +721,12 @@ fn test_mvcc_flashback() { } #[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_block_rw() { let (_cluster, client, ctx) = new_cluster(); // Prepare the flashback. must_prepare_flashback(&client, ctx.clone(), 1, 2); + // Try to read version 3 (after flashback, FORBIDDEN). let (k, v) = (b"key".to_vec(), b"value".to_vec()); // Get @@ -731,7 +735,11 @@ fn test_mvcc_flashback_block_rw() { get_req.key = k.clone(); get_req.version = 3; let get_resp = client.kv_get(&get_req).unwrap(); - assert!(get_resp.get_region_error().has_flashback_in_progress()); + assert!( + get_resp.get_region_error().has_flashback_in_progress(), + "{:?}", + get_resp + ); assert!(!get_resp.has_error()); assert!(get_resp.value.is_empty()); // Scan @@ -777,6 +785,7 @@ fn test_mvcc_flashback_block_rw() { } #[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_block_scheduling() { let (mut cluster, client, ctx) = new_cluster(); // Prepare the flashback. @@ -787,13 +796,16 @@ fn test_mvcc_flashback_block_scheduling() { transfer_leader_resp .get_header() .get_error() - .has_flashback_in_progress() + .has_flashback_in_progress(), + "{:?}", + transfer_leader_resp ); // Finish the flashback. must_finish_flashback(&client, ctx, 0, 1, 2); } #[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_unprepared() { let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); @@ -813,23 +825,14 @@ fn test_mvcc_flashback_unprepared() { must_kv_read_equal(&client, ctx.clone(), k.clone(), v, 6); // Flashback with preparing. must_flashback_to_version(&client, ctx.clone(), 0, 6, 7); - let mut get_req = GetRequest::default(); - get_req.set_context(ctx.clone()); - get_req.key = k; - get_req.version = 7; - let get_resp = client.kv_get(&get_req).unwrap(); - assert!(!get_resp.has_region_error()); - assert!(!get_resp.has_error()); - assert_eq!(get_resp.value, b"".to_vec()); + must_kv_read_not_found(&client, ctx.clone(), k.clone(), 7); // Mock the flashback retry. must_finish_flashback(&client, ctx.clone(), 0, 6, 7); - let get_resp = client.kv_get(&get_req).unwrap(); - assert!(!get_resp.has_region_error()); - assert!(!get_resp.has_error()); - assert_eq!(get_resp.value, b"".to_vec()); + must_kv_read_not_found(&client, ctx, k, 7); } #[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_mvcc_flashback_with_unlimited_range() { let (_cluster, client, ctx) = new_cluster(); let (k, v) = (b"key".to_vec(), b"value".to_vec()); @@ -857,14 +860,7 @@ fn test_mvcc_flashback_with_unlimited_range() { assert!(!resp.has_region_error()); assert!(resp.get_error().is_empty()); - let mut get_req = GetRequest::default(); - get_req.set_context(ctx); - get_req.key = k; - get_req.version = 7; - let get_resp = client.kv_get(&get_req).unwrap(); - assert!(!get_resp.has_region_error()); - assert!(!get_resp.has_error()); - assert_eq!(get_resp.value, b"".to_vec()); + must_kv_read_not_found(&client, ctx, k, 7); } // raft related RPC is tested as parts of test_snapshot.rs, so skip here. From 526726efc813fe32c1381fce7f4a8500874b258a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 19 May 2023 16:49:36 +0800 Subject: [PATCH 0694/1149] raftstore-v2: check and compact region (#14536) close tikv/tikv#14757 impl check and compact region for raftstore-v2 Signed-off-by: Spade A Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_panic/src/engine.rs | 2 +- components/engine_panic/src/misc.rs | 9 +- components/engine_rocks/src/engine.rs | 2 +- components/engine_rocks/src/misc.rs | 13 +- components/engine_rocks/src/properties.rs | 20 +- .../engine_rocks/src/range_properties.rs | 7 +- components/engine_traits/src/engine.rs | 2 +- components/engine_traits/src/misc.rs | 17 +- components/raftstore-v2/src/batch/store.rs | 12 +- components/raftstore-v2/src/fsm/store.rs | 12 + components/raftstore-v2/src/lib.rs | 1 + components/raftstore-v2/src/operation/misc.rs | 106 +++++ components/raftstore-v2/src/operation/mod.rs | 1 + components/raftstore-v2/src/router/message.rs | 2 + .../src/worker/cleanup/compact.rs | 365 ++++++++++++++++++ .../raftstore-v2/src/worker/cleanup/mod.rs | 42 ++ components/raftstore-v2/src/worker/mod.rs | 1 + components/raftstore/src/store/config.rs | 75 +++- components/raftstore/src/store/fsm/store.rs | 4 +- .../raftstore/src/store/worker/compact.rs | 33 +- components/server/src/server.rs | 1 + components/server/src/server2.rs | 1 + components/test_raftstore-v2/src/node.rs | 2 + components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/node.rs | 2 + components/test_raftstore/src/server.rs | 1 + src/config/mod.rs | 3 + tests/integrations/config/mod.rs | 4 +- tests/integrations/config/test-custom.toml | 2 + .../raftstore/test_compact_after_delete.rs | 155 +++++++- 30 files changed, 830 insertions(+), 68 deletions(-) create mode 100644 components/raftstore-v2/src/operation/misc.rs create mode 100644 components/raftstore-v2/src/worker/cleanup/compact.rs create mode 100644 components/raftstore-v2/src/worker/cleanup/mod.rs diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index d8faf8fee01..b5ce0d1516e 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -22,7 +22,7 @@ impl KvEngine for PanicEngine { fn bad_downcast(&self) -> &T { panic!() } - #[cfg(any(test, feature = "testexport"))] + #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize { panic!() } diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 5603bf43c77..027612d588e 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{DeleteStrategy, MiscExt, Range, Result, StatisticsReporter}; +use engine_traits::{DeleteStrategy, MiscExt, Range, RangeStats, Result, StatisticsReporter}; use crate::engine::PanicEngine; @@ -100,12 +100,7 @@ impl MiscExt for PanicEngine { panic!() } - fn get_range_entries_and_versions( - &self, - cf: &str, - start: &[u8], - end: &[u8], - ) -> Result> { + fn get_range_stats(&self, cf: &str, start: &[u8], end: &[u8]) -> Result> { panic!() } diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 6c6231ca42f..293b74e3bca 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -195,7 +195,7 @@ impl KvEngine for RocksEngine { e.downcast_ref().expect("bad engine downcast") } - #[cfg(any(test, feature = "testexport"))] + #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize { Arc::strong_count(&self.db) } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 8d5bb3d43ef..d4ffa564861 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -2,7 +2,7 @@ use engine_traits::{ CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, - Range, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, + Range, RangeStats, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, }; use rocksdb::Range as RocksRange; use tikv_util::{box_try, keybuilder::KeyBuilder}; @@ -353,15 +353,8 @@ impl MiscExt for RocksEngine { Ok(total) } - fn get_range_entries_and_versions( - &self, - cf: &str, - start: &[u8], - end: &[u8], - ) -> Result> { - Ok(crate::properties::get_range_entries_and_versions( - self, cf, start, end, - )) + fn get_range_stats(&self, cf: &str, start: &[u8], end: &[u8]) -> Result> { + Ok(crate::properties::get_range_stats(self, cf, start, end)) } fn is_stalled_or_stopped(&self) -> bool { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index a95a9aecf7b..d1158ac9c4e 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -9,7 +9,7 @@ use std::{ }; use api_version::{ApiV2, KeyMode, KvFormat}; -use engine_traits::{raw_ttl::ttl_current_ts, MvccProperties, Range}; +use engine_traits::{raw_ttl::ttl_current_ts, MvccProperties, Range, RangeStats}; use rocksdb::{ DBEntryType, TablePropertiesCollector, TablePropertiesCollectorFactory, TitanBlobIndex, UserCollectedProperties, @@ -530,12 +530,12 @@ impl TablePropertiesCollectorFactory } } -pub fn get_range_entries_and_versions( +pub fn get_range_stats( engine: &crate::RocksEngine, cf: &str, start: &[u8], end: &[u8], -) -> Option<(u64, u64)> { +) -> Option { let range = Range::new(start, end); let collection = match engine.get_properties_of_tables_in_range(cf, &[range]) { Ok(v) => v, @@ -557,8 +557,11 @@ pub fn get_range_entries_and_versions( num_entries += v.num_entries(); props.add(&mvcc); } - - Some((num_entries, props.num_versions)) + Some(RangeStats { + num_entries, + num_versions: props.num_versions, + num_rows: props.num_rows, + }) } #[cfg(test)] @@ -773,10 +776,9 @@ mod tests { let start_keys = keys::data_key(&[]); let end_keys = keys::data_end_key(&[]); - let (entries, versions) = - get_range_entries_and_versions(&db, CF_WRITE, &start_keys, &end_keys).unwrap(); - assert_eq!(entries, (cases.len() * 2) as u64); - assert_eq!(versions, cases.len() as u64); + let range_stats = get_range_stats(&db, CF_WRITE, &start_keys, &end_keys).unwrap(); + assert_eq!(range_stats.num_entries, (cases.len() * 2) as u64); + assert_eq!(range_stats.num_versions, cases.len() as u64); } #[test] diff --git a/components/engine_rocks/src/range_properties.rs b/components/engine_rocks/src/range_properties.rs index 101a004982a..dfc41db5f6e 100644 --- a/components/engine_rocks/src/range_properties.rs +++ b/components/engine_rocks/src/range_properties.rs @@ -9,7 +9,7 @@ use tikv_util::{box_err, box_try, debug, info}; use crate::{ engine::RocksEngine, - properties::{get_range_entries_and_versions, RangeProperties}, + properties::{get_range_stats, RangeProperties}, }; impl RangePropertiesExt for RocksEngine { @@ -27,9 +27,8 @@ impl RangePropertiesExt for RocksEngine { let start = &range.start_key; let end = &range.end_key; - let (_, keys) = - get_range_entries_and_versions(self, CF_WRITE, start, end).unwrap_or_default(); - Ok(keys) + let range_stats = get_range_stats(self, CF_WRITE, start, end).unwrap_or_default(); + Ok(range_stats.num_versions) } fn get_range_approximate_keys_cf( diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index aa90c23b429..b3ee1c93b05 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -69,6 +69,6 @@ pub trait KvEngine: /// A method for test to expose inner db refcount in order to make sure a /// full release of engine. - #[cfg(any(test, feature = "testexport"))] + #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize; } diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index c2d317f529f..1a05a5de374 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -54,6 +54,16 @@ pub trait StatisticsReporter { fn flush(&mut self); } +#[derive(Default)] +pub struct RangeStats { + // The number of entries + pub num_entries: u64, + // The number of MVCC versions of all rows (num_entries - tombstones). + pub num_versions: u64, + // The number of rows. + pub num_rows: u64, +} + pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { type StatisticsReporter: StatisticsReporter; @@ -121,12 +131,7 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn get_num_keys(&self) -> Result; - fn get_range_entries_and_versions( - &self, - cf: &str, - start: &[u8], - end: &[u8], - ) -> Result>; + fn get_range_stats(&self, cf: &str, start: &[u8], end: &[u8]) -> Result>; fn is_stalled_or_stopped(&self) -> bool; } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index a4d42eec167..25fbde2ed27 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -57,7 +57,7 @@ use crate::{ operation::{SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX}, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{checkpoint, pd, tablet}, + worker::{checkpoint, cleanup, pd, tablet}, Error, Result, }; @@ -508,6 +508,7 @@ pub struct Schedulers { pub tablet: Scheduler>, pub checkpoint: Scheduler>, pub write: WriteSenders, + pub cleanup: Scheduler, // Following is not maintained by raftstore itself. pub split_check: Scheduler, @@ -532,6 +533,7 @@ struct Workers { checkpoint: Worker, async_write: StoreWriters, purge: Option, + cleanup_worker: Worker, // Following is not maintained by raftstore itself. background: Worker, @@ -547,6 +549,7 @@ impl Workers { checkpoint, async_write: StoreWriters::new(None), purge, + cleanup_worker: Worker::new("cleanup-worker"), background, } } @@ -677,6 +680,12 @@ impl StoreSystem { ), ); + let compact_runner = + cleanup::CompactRunner::new(tablet_registry.clone(), self.logger.clone()); + let cleanup_worker_scheduler = workers + .cleanup_worker + .start("cleanup-worker", cleanup::Runner::new(compact_runner)); + let checkpoint_scheduler = workers.checkpoint.start( "checkpoint-worker", checkpoint::Runner::new(self.logger.clone(), tablet_registry.clone()), @@ -689,6 +698,7 @@ impl StoreSystem { checkpoint: checkpoint_scheduler, write: workers.async_write.senders(), split_check: split_check_scheduler, + cleanup: cleanup_worker_scheduler, }; let builder = StorePollerBuilder::new( diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index e9b224b7375..c7f228f7f9c 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -142,6 +142,7 @@ impl StoreRegionMeta for StoreMeta { pub struct Store { id: u64, + last_compact_checked_key: Vec, // Unix time when it's started. start_time: Option, logger: Logger, @@ -151,6 +152,7 @@ impl Store { pub fn new(id: u64, logger: Logger) -> Store { Store { id, + last_compact_checked_key: keys::DATA_MIN_KEY.to_vec(), start_time: None, logger: logger.new(o!("store_id" => id)), } @@ -160,6 +162,14 @@ impl Store { self.id } + pub fn last_compact_checked_key(&self) -> &Vec { + &self.last_compact_checked_key + } + + pub fn set_last_compact_checked_key(&mut self, key: Vec) { + self.last_compact_checked_key = key; + } + pub fn start_time(&self) -> Option { self.start_time } @@ -239,6 +249,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { StoreTick::CleanupImportSst, self.store_ctx.cfg.cleanup_import_sst_interval.0, ); + self.register_compact_check_tick(); } pub fn schedule_tick(&mut self, tick: StoreTick, timeout: Duration) { @@ -263,6 +274,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { match tick { StoreTick::PdStoreHeartbeat => self.on_pd_store_heartbeat(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst(), + StoreTick::CompactCheck => self.on_compact_check_tick(), _ => unimplemented!(), } } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index bcfaf383024..697d953e5c8 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -43,6 +43,7 @@ pub use fsm::StoreMeta; pub use operation::{write_initial_states, SimpleWriteBinary, SimpleWriteEncoder, StateStorage}; pub use raftstore::{store::Config, Error, Result}; pub use worker::{ + cleanup::CompactTask, pd::{PdReporter, Task as PdTask}, tablet::Task as TabletTask, }; diff --git a/components/raftstore-v2/src/operation/misc.rs b/components/raftstore-v2/src/operation/misc.rs new file mode 100644 index 00000000000..c2c3d643965 --- /dev/null +++ b/components/raftstore-v2/src/operation/misc.rs @@ -0,0 +1,106 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::collections::{ + Bound::{Excluded, Unbounded}, + HashSet, +}; + +use engine_traits::{KvEngine, RaftEngine, CF_DEFAULT, CF_WRITE}; +use slog::{debug, error, info}; + +use crate::{ + fsm::StoreFsmDelegate, + router::StoreTick, + worker::cleanup::{self, CompactThreshold}, + CompactTask::CheckAndCompact, +}; + +impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { + pub fn register_compact_check_tick(&mut self) { + self.schedule_tick( + StoreTick::CompactCheck, + self.store_ctx.cfg.region_compact_check_interval.0, + ) + } + + pub fn on_compact_check_tick(&mut self) { + self.register_compact_check_tick(); + if self.store_ctx.schedulers.cleanup.is_busy() { + info!( + self.store_ctx.logger, + "compact worker is busy, check space redundancy next time"; + ); + return; + } + + // Use HashSet here as the region end_keys in store_meta is not unique. + let mut regions_to_check: HashSet = HashSet::default(); + + let (largest_end_key, last_check_key) = { + // Start from last checked key. + let mut last_check_key = self.fsm.store.last_compact_checked_key(); + + let meta = self.store_ctx.store_meta.lock().unwrap(); + if meta.region_ranges.is_empty() { + debug!( + self.store_ctx.logger, + "there is no range need to check"; + ); + return; + } + // Collect continuous ranges. + let ranges = meta.region_ranges.range(( + Excluded((last_check_key.clone(), u64::MAX)), + Unbounded::<(Vec, u64)>, + )); + + for region_range in ranges { + last_check_key = ®ion_range.0.0; + regions_to_check.insert(*region_range.1); + + if regions_to_check.len() >= self.store_ctx.cfg.region_compact_check_step() as usize + { + break; + } + } + + ( + meta.region_ranges.keys().last().unwrap().0.to_vec(), + last_check_key.clone(), + ) + }; + + if largest_end_key == last_check_key { + // Next task will start from the very beginning. + self.fsm + .store + .set_last_compact_checked_key(keys::DATA_MIN_KEY.to_vec()); + } else { + self.fsm.store.set_last_compact_checked_key(last_check_key); + } + + // Schedule the task. + let cf_names = vec![CF_DEFAULT.to_owned(), CF_WRITE.to_owned()]; + if let Err(e) = self + .store_ctx + .schedulers + .cleanup + .schedule(cleanup::Task::Compact(CheckAndCompact { + cf_names, + region_ids: regions_to_check.into_iter().collect::>(), + compact_threshold: CompactThreshold::new( + self.store_ctx.cfg.region_compact_min_tombstones, + self.store_ctx.cfg.region_compact_tombstones_percent, + self.store_ctx.cfg.region_compact_min_redundant_rows, + self.store_ctx.cfg.region_compact_redundant_rows_percent, + ), + })) + { + error!( + self.store_ctx.logger, + "schedule space check task failed"; + "err" => ?e, + ); + } + } +} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index f5eb4ebdb6f..e0107122da9 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -3,6 +3,7 @@ mod bucket; mod command; mod life; +mod misc; mod pd; mod query; mod ready; diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 3f761c74f94..9ce4e8a8807 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -89,6 +89,7 @@ pub enum StoreTick { SnapGc, ConsistencyCheck, CleanupImportSst, + CompactCheck, } impl StoreTick { @@ -99,6 +100,7 @@ impl StoreTick { StoreTick::SnapGc => RaftEventDurationType::snap_gc, StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, + StoreTick::CompactCheck => RaftEventDurationType::compact_check, } } } diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs new file mode 100644 index 00000000000..c7d7aef897d --- /dev/null +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -0,0 +1,365 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + error::Error as StdError, + fmt::{self, Display, Formatter}, +}; + +use engine_traits::{KvEngine, RangeStats, TabletRegistry, CF_WRITE}; +use fail::fail_point; +use keys::{DATA_MAX_KEY, DATA_MIN_KEY}; +use slog::{debug, error, info, warn, Logger}; +use thiserror::Error; +use tikv_util::{box_try, worker::Runnable}; + +pub enum Task { + CheckAndCompact { + // Column families need to compact + cf_names: Vec, + region_ids: Vec, + compact_threshold: CompactThreshold, + }, +} + +pub struct CompactThreshold { + tombstones_num_threshold: u64, + tombstones_percent_threshold: u64, + redundant_rows_threshold: u64, + redundant_rows_percent_threshold: u64, +} + +impl CompactThreshold { + pub fn new( + tombstones_num_threshold: u64, + tombstones_percent_threshold: u64, + redundant_rows_threshold: u64, + redundant_rows_percent_threshold: u64, + ) -> Self { + Self { + tombstones_num_threshold, + tombstones_percent_threshold, + redundant_rows_percent_threshold, + redundant_rows_threshold, + } + } +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match *self { + Task::CheckAndCompact { + ref cf_names, + ref region_ids, + ref compact_threshold, + } => f + .debug_struct("CheckAndCompact") + .field("cf_names", cf_names) + .field("regions", region_ids) + .field( + "tombstones_num_threshold", + &compact_threshold.tombstones_num_threshold, + ) + .field( + "tombstones_percent_threshold", + &compact_threshold.tombstones_percent_threshold, + ) + .field( + "redundant_rows_threshold", + &compact_threshold.redundant_rows_threshold, + ) + .field( + "redundant_rows_percent_threshold", + &compact_threshold.redundant_rows_percent_threshold, + ) + .finish(), + } + } +} + +#[derive(Debug, Error)] +pub enum Error { + #[error("compact failed {0:?}")] + Other(#[from] Box), +} + +pub struct Runner { + logger: Logger, + tablet_registry: TabletRegistry, +} + +impl Runner +where + E: KvEngine, +{ + pub fn new(tablet_registry: TabletRegistry, logger: Logger) -> Runner { + Runner { + logger, + tablet_registry, + } + } +} + +impl Runnable for Runner +where + E: KvEngine, +{ + type Task = Task; + + fn run(&mut self, task: Self::Task) { + match task { + Task::CheckAndCompact { + cf_names, + region_ids, + compact_threshold, + } => match collect_regions_to_compact( + &self.tablet_registry, + region_ids, + compact_threshold, + &self.logger, + ) { + Ok(mut region_ids) => { + for region_id in region_ids.drain(..) { + let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else {continue}; + let Some(tablet) = tablet_cache.latest() else {continue}; + for cf in &cf_names { + if let Err(e) = + tablet.compact_range_cf(cf, None, None, false, 1 /* threads */) + { + error!( + self.logger, + "compact range failed"; + "region_id" => region_id, + "cf" => cf, + "err" => %e, + ); + } + } + info!( + self.logger, + "compaction range finished"; + "region_id" => region_id, + ); + fail_point!("raftstore-v2::CheckAndCompact::AfterCompact"); + } + } + Err(e) => warn!( + self.logger, + "check ranges need reclaim failed"; "err" => %e + ), + }, + } + } +} + +fn need_compact(range_stats: &RangeStats, compact_threshold: &CompactThreshold) -> bool { + if range_stats.num_entries < range_stats.num_versions { + return false; + } + + // We trigger region compaction when their are to many tombstones as well as + // redundant keys, both of which can severly impact scan operation: + let estimate_num_del = range_stats.num_entries - range_stats.num_versions; + let redundant_keys = range_stats.num_entries - range_stats.num_rows; + (redundant_keys >= compact_threshold.redundant_rows_threshold + && redundant_keys * 100 + >= compact_threshold.redundant_rows_percent_threshold * range_stats.num_entries) + || (estimate_num_del >= compact_threshold.tombstones_num_threshold + && estimate_num_del * 100 + >= compact_threshold.tombstones_percent_threshold * range_stats.num_entries) +} + +fn collect_regions_to_compact( + reg: &TabletRegistry, + region_ids: Vec, + compact_threshold: CompactThreshold, + logger: &Logger, +) -> Result, Error> { + fail_point!("on_collect_regions_to_compact"); + debug!( + logger, + "received compaction check"; + "regions" => ?region_ids + ); + let mut regions_to_compact = vec![]; + for id in region_ids { + let Some(mut tablet_cache) = reg.get(id) else {continue}; + let Some(tablet) = tablet_cache.latest() else {continue}; + if tablet.auto_compactions_is_disabled().expect("cf") { + info!( + logger, + "skip compact check when disabled auto compactions"; + "region_id" => id, + ); + continue; + } + + if let Some(range_stats) = + box_try!(tablet.get_range_stats(CF_WRITE, DATA_MIN_KEY, DATA_MAX_KEY)) + { + info!( + logger, + "get range entries and versions"; + "num_entries" => range_stats.num_entries, + "num_versions" => range_stats.num_versions, + "num_rows" => range_stats.num_rows, + "region_id" => id, + ); + if need_compact(&range_stats, &compact_threshold) { + regions_to_compact.push(id); + } + } + } + Ok(regions_to_compact) +} + +#[cfg(test)] +mod tests { + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, TestTabletFactory}, + }; + use engine_traits::{MiscExt, SyncMutable, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK}; + use keys::data_key; + use kvproto::metapb::Region; + use tempfile::Builder; + use txn_types::{Key, TimeStamp, Write, WriteType}; + + use super::*; + + fn build_test_factory(name: &'static str) -> (tempfile::TempDir, TabletRegistry) { + let dir = Builder::new().prefix(name).tempdir().unwrap(); + let mut cf_opts = CfOptions::new(); + cf_opts.set_level_zero_file_num_compaction_trigger(8); + let factory = Box::new(TestTabletFactory::new( + DbOptions::default(), + vec![ + (CF_DEFAULT, CfOptions::new()), + (CF_LOCK, CfOptions::new()), + (CF_WRITE, cf_opts), + ], + )); + let registry = TabletRegistry::new(factory, dir.path()).unwrap(); + (dir, registry) + } + + fn mvcc_put(db: &KvTestEngine, k: &[u8], v: &[u8], start_ts: TimeStamp, commit_ts: TimeStamp) { + let k = Key::from_encoded(data_key(k)).append_ts(commit_ts); + let w = Write::new(WriteType::Put, start_ts, Some(v.to_vec())); + db.put_cf(CF_WRITE, k.as_encoded(), &w.as_ref().to_bytes()) + .unwrap(); + } + + fn delete(db: &KvTestEngine, k: &[u8], commit_ts: TimeStamp) { + let k = Key::from_encoded(data_key(k)).append_ts(commit_ts); + db.delete_cf(CF_WRITE, k.as_encoded()).unwrap(); + } + + #[test] + fn test_compact_range() { + let (_dir, registry) = build_test_factory("compact-range-test"); + + let mut region = Region::default(); + region.set_id(2); + let ctx = TabletContext::new(®ion, Some(5)); + let mut cache = registry.load(ctx, true).unwrap(); + let tablet = cache.latest().unwrap(); + + // mvcc_put 0..5 + for i in 0..5 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(tablet, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + mvcc_put(tablet, k.as_bytes(), v.as_bytes(), 3.into(), 4.into()); + } + tablet.flush_cf(CF_WRITE, true).unwrap(); + + // gc 0..5 + for i in 0..5 { + let k = format!("k{}", i); + delete(tablet, k.as_bytes(), 4.into()); + } + tablet.flush_cf(CF_WRITE, true).unwrap(); + + let (start, end) = (data_key(b"k0"), data_key(b"k5")); + let range_stats = tablet + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(range_stats.num_entries, 15); + assert_eq!(range_stats.num_versions, 10); + assert_eq!(range_stats.num_rows, 5); + + region.set_id(3); + let ctx = TabletContext::new(®ion, Some(5)); + let mut cache = registry.load(ctx, true).unwrap(); + let tablet = cache.latest().unwrap(); + // mvcc_put 5..10 + for i in 5..10 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(tablet, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + } + for i in 5..8 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(tablet, k.as_bytes(), v.as_bytes(), 3.into(), 4.into()); + } + tablet.flush_cf(CF_WRITE, true).unwrap(); + + let (s, e) = (data_key(b"k5"), data_key(b"k9")); + let range_stats = tablet.get_range_stats(CF_WRITE, &s, &e).unwrap().unwrap(); + assert_eq!(range_stats.num_entries, 8); + assert_eq!(range_stats.num_versions, 8); + assert_eq!(range_stats.num_rows, 5); + + // gc 5..8 + for i in 5..8 { + let k = format!("k{}", i); + delete(tablet, k.as_bytes(), 4.into()); + } + tablet.flush_cf(CF_WRITE, true).unwrap(); + + let (s, e) = (data_key(b"k5"), data_key(b"k9")); + let range_stats = tablet.get_range_stats(CF_WRITE, &s, &e).unwrap().unwrap(); + assert_eq!(range_stats.num_entries, 11); + assert_eq!(range_stats.num_versions, 8); + assert_eq!(range_stats.num_rows, 5); + + let logger = slog_global::borrow_global().new(slog::o!()); + + // collect regions according to tombstone's parameters + let regions = collect_regions_to_compact( + ®istry, + vec![2, 3, 4], + CompactThreshold::new(4, 30, 100, 100), + &logger, + ) + .unwrap(); + assert!(regions.len() == 1 && regions[0] == 2); + + let regions = collect_regions_to_compact( + ®istry, + vec![2, 3, 4], + CompactThreshold::new(3, 25, 100, 100), + &logger, + ) + .unwrap(); + assert!(regions.len() == 2 && !regions.contains(&4)); + + // collect regions accroding to redundant rows' parameter + let regions = collect_regions_to_compact( + ®istry, + vec![2, 3, 4], + CompactThreshold::new(100, 100, 9, 60), + &logger, + ) + .unwrap(); + assert!(regions.len() == 1 && regions[0] == 2); + + let regions = collect_regions_to_compact( + ®istry, + vec![2, 3, 4], + CompactThreshold::new(100, 100, 5, 50), + &logger, + ) + .unwrap(); + assert!(regions.len() == 2 && !regions.contains(&4)); + } +} diff --git a/components/raftstore-v2/src/worker/cleanup/mod.rs b/components/raftstore-v2/src/worker/cleanup/mod.rs new file mode 100644 index 00000000000..0d04fd1eb70 --- /dev/null +++ b/components/raftstore-v2/src/worker/cleanup/mod.rs @@ -0,0 +1,42 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::fmt::{self, Display, Formatter}; + +pub use compact::{CompactThreshold, Runner as CompactRunner, Task as CompactTask}; +use engine_traits::KvEngine; +use tikv_util::worker::Runnable; + +mod compact; + +pub enum Task { + Compact(CompactTask), +} + +impl Display for Task { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Task::Compact(ref t) => t.fmt(f), + } + } +} + +pub struct Runner { + compact: CompactRunner, + // todo: more cleanup related runner may be added later +} + +impl Runner { + pub fn new(compact: CompactRunner) -> Runner { + Runner { compact } + } +} + +impl Runnable for Runner { + type Task = Task; + + fn run(&mut self, task: Task) { + match task { + Task::Compact(t) => self.compact.run(t), + } + } +} diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index b75525018d6..93ec453c030 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. pub mod checkpoint; +pub mod cleanup; pub mod pd; pub mod tablet; diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index aabf173e674..ecdfbe85d3f 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -103,12 +103,17 @@ pub struct Config { /// Interval (ms) to check whether start compaction for a region. pub region_compact_check_interval: ReadableDuration, /// Number of regions for each time checking. - pub region_compact_check_step: u64, + pub region_compact_check_step: Option, /// Minimum number of tombstones to trigger manual compaction. pub region_compact_min_tombstones: u64, /// Minimum percentage of tombstones to trigger manual compaction. /// Should between 1 and 100. pub region_compact_tombstones_percent: u64, + /// Minimum number of redundant rows to trigger manual compaction. + pub region_compact_min_redundant_rows: u64, + /// Minimum percentage of redundant rows to trigger manual compaction. + /// Should between 1 and 100. + pub region_compact_redundant_rows_percent: u64, pub pd_heartbeat_tick_interval: ReadableDuration, pub pd_store_heartbeat_tick_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, @@ -371,9 +376,11 @@ impl Default for Config { split_region_check_tick_interval: ReadableDuration::secs(10), region_split_check_diff: None, region_compact_check_interval: ReadableDuration::minutes(5), - region_compact_check_step: 100, + region_compact_check_step: None, region_compact_min_tombstones: 10000, region_compact_tombstones_percent: 30, + region_compact_min_redundant_rows: 50000, + region_compact_redundant_rows_percent: 20, pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), notify_capacity: 40960, @@ -513,6 +520,10 @@ impl Config { self.raft_log_gc_size_limit.unwrap() } + pub fn region_compact_check_step(&self) -> u64 { + self.region_compact_check_step.unwrap() + } + #[inline] pub fn warmup_entry_cache_enabled(&self) -> bool { self.max_entry_cache_warmup_duration.0 != Duration::from_secs(0) @@ -532,6 +543,16 @@ impl Config { false } + pub fn optimize_for(&mut self, raft_kv_v2: bool) { + if self.region_compact_check_step.is_none() { + if raft_kv_v2 { + self.region_compact_check_step = Some(5); + } else { + self.region_compact_check_step = Some(100); + } + } + } + pub fn validate( &mut self, region_split_size: ReadableSize, @@ -799,6 +820,7 @@ impl Config { } } } + assert!(self.region_compact_check_step.is_some()); Ok(()) } @@ -875,13 +897,19 @@ impl Config { .set(self.region_compact_check_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["region_compact_check_step"]) - .set(self.region_compact_check_step as f64); + .set(self.region_compact_check_step.unwrap_or_default() as f64); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["region_compact_min_tombstones"]) .set(self.region_compact_min_tombstones as f64); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["region_compact_tombstones_percent"]) .set(self.region_compact_tombstones_percent as f64); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["region_compact_min_redundant_rows"]) + .set(self.region_compact_min_redundant_rows as f64); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["region_compact_tombstones_percent"]) + .set(self.region_compact_tombstones_percent as f64); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["pd_heartbeat_tick_interval"]) .set(self.pd_heartbeat_tick_interval.as_secs_f64()); @@ -1120,6 +1148,7 @@ mod tests { fn test_config_validate() { let split_size = coprocessor::config::SPLIT_SIZE; let mut cfg = Config::new(); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!( cfg.raft_min_election_timeout_ticks, @@ -1131,41 +1160,50 @@ mod tests { ); cfg.raft_heartbeat_ticks = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_election_timeout_ticks = 10; cfg.raft_heartbeat_ticks = 10; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_min_election_timeout_ticks = 5; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg.raft_min_election_timeout_ticks = 25; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg.raft_min_election_timeout_ticks = 10; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg.raft_heartbeat_ticks = 11; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_threshold = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_size_limit = Some(ReadableSize(0)); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_size_limit = None; + cfg.optimize_for(false); cfg.validate(ReadableSize(20), false, ReadableSize(0)) .unwrap(); assert_eq!(cfg.raft_log_gc_size_limit, Some(ReadableSize(15))); @@ -1174,23 +1212,27 @@ mod tests { cfg.raft_base_tick_interval = ReadableDuration::secs(1); cfg.raft_election_timeout_ticks = 10; cfg.raft_store_max_leader_lease = ReadableDuration::secs(20); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_count_limit = Some(100); cfg.merge_max_log_gap = 110; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_count_limit = None; + cfg.optimize_for(false); cfg.validate(ReadableSize::mb(1), false, ReadableSize(0)) .unwrap(); assert_eq!(cfg.raft_log_gc_count_limit, Some(768)); cfg = Config::new(); cfg.merge_check_tick_interval = ReadableDuration::secs(0); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); @@ -1198,64 +1240,76 @@ mod tests { cfg.raft_base_tick_interval = ReadableDuration::secs(1); cfg.raft_election_timeout_ticks = 10; cfg.peer_stale_state_check_interval = ReadableDuration::secs(5); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.peer_stale_state_check_interval = ReadableDuration::minutes(2); cfg.abnormal_leader_missing_duration = ReadableDuration::minutes(1); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.abnormal_leader_missing_duration = ReadableDuration::minutes(2); cfg.max_leader_missing_duration = ReadableDuration::minutes(1); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.local_read_batch_size = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.max_batch_size = Some(0); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.pool_size = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.max_batch_size = Some(0); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.pool_size = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.max_batch_size = Some(10241); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.max_batch_size = Some(10241); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.hibernate_regions = true; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(256)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(256)); cfg = Config::new(); cfg.hibernate_regions = false; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(1024)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(256)); @@ -1264,17 +1318,20 @@ mod tests { cfg.hibernate_regions = true; cfg.store_batch_system.max_batch_size = Some(123); cfg.apply_batch_system.max_batch_size = Some(234); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(123)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(234)); cfg = Config::new(); cfg.future_poll_size = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg = Config::new(); cfg.snap_generator_pool_size = 0; + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); @@ -1282,6 +1339,7 @@ mod tests { cfg.raft_base_tick_interval = ReadableDuration::secs(1); cfg.raft_election_timeout_ticks = 11; cfg.raft_store_max_leader_lease = ReadableDuration::secs(11); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); @@ -1289,43 +1347,54 @@ mod tests { cfg.hibernate_regions = true; cfg.max_peer_down_duration = ReadableDuration::minutes(5); cfg.peer_stale_state_check_interval = ReadableDuration::minutes(5); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.max_peer_down_duration, ReadableDuration::minutes(10)); cfg = Config::new(); cfg.raft_max_size_per_msg = ReadableSize(0); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg.raft_max_size_per_msg = ReadableSize::gb(64); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg.raft_max_size_per_msg = ReadableSize::gb(3); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg = Config::new(); cfg.raft_entry_max_size = ReadableSize(0); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg.raft_entry_max_size = ReadableSize::mb(3073); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)) .unwrap_err(); cfg.raft_entry_max_size = ReadableSize::gb(3); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); cfg = Config::new(); + cfg.optimize_for(false); cfg.validate(split_size, false, ReadableSize(0)).unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 16); cfg = Config::new(); + cfg.optimize_for(false); cfg.validate(split_size, true, split_size / 8).unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 16); cfg = Config::new(); + cfg.optimize_for(false); cfg.validate(split_size, true, split_size / 20).unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 20); cfg = Config::new(); cfg.region_split_check_diff = Some(ReadableSize(1)); + cfg.optimize_for(false); cfg.validate(split_size, true, split_size / 20).unwrap(); assert_eq!(cfg.region_split_check_diff(), ReadableSize(1)); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 03c0688e8f2..9a7df9d5473 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -2420,7 +2420,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER // Start from last checked key. let mut ranges_need_check = - Vec::with_capacity(self.ctx.cfg.region_compact_check_step as usize + 1); + Vec::with_capacity(self.ctx.cfg.region_compact_check_step() as usize + 1); ranges_need_check.push(self.fsm.store.last_compact_checked_key.clone()); let largest_key = { @@ -2440,7 +2440,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER )); ranges_need_check.extend( left_ranges - .take(self.ctx.cfg.region_compact_check_step as usize) + .take(self.ctx.cfg.region_compact_check_step() as usize) .map(|(k, _)| k.to_owned()), ); diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index 7bc7052b277..4448e26a5b3 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -206,12 +206,11 @@ fn collect_ranges_need_compact( for range in ranges.windows(2) { // Get total entries and total versions in this range and checks if it needs to // be compacted. - if let Some((num_ent, num_ver)) = - box_try!(engine.get_range_entries_and_versions(CF_WRITE, &range[0], &range[1])) + if let Some(range_stats) = box_try!(engine.get_range_stats(CF_WRITE, &range[0], &range[1])) { if need_compact( - num_ent, - num_ver, + range_stats.num_entries, + range_stats.num_versions, tombstones_num_threshold, tombstones_percent_threshold, ) { @@ -357,12 +356,12 @@ mod tests { engine.flush_cf(CF_WRITE, true).unwrap(); let (start, end) = (data_key(b"k0"), data_key(b"k5")); - let (entries, version) = engine - .get_range_entries_and_versions(CF_WRITE, &start, &end) + let range_stats = engine + .get_range_stats(CF_WRITE, &start, &end) .unwrap() .unwrap(); - assert_eq!(entries, 10); - assert_eq!(version, 5); + assert_eq!(range_stats.num_entries, 10); + assert_eq!(range_stats.num_versions, 5); // mvcc_put 5..10 for i in 5..10 { @@ -372,12 +371,9 @@ mod tests { engine.flush_cf(CF_WRITE, true).unwrap(); let (s, e) = (data_key(b"k5"), data_key(b"k9")); - let (entries, version) = engine - .get_range_entries_and_versions(CF_WRITE, &s, &e) - .unwrap() - .unwrap(); - assert_eq!(entries, 5); - assert_eq!(version, 5); + let range_stats = engine.get_range_stats(CF_WRITE, &s, &e).unwrap().unwrap(); + assert_eq!(range_stats.num_entries, 5); + assert_eq!(range_stats.num_versions, 5); let ranges_need_to_compact = collect_ranges_need_compact( &engine, @@ -399,12 +395,9 @@ mod tests { engine.flush_cf(CF_WRITE, true).unwrap(); let (s, e) = (data_key(b"k5"), data_key(b"k9")); - let (entries, version) = engine - .get_range_entries_and_versions(CF_WRITE, &s, &e) - .unwrap() - .unwrap(); - assert_eq!(entries, 10); - assert_eq!(version, 5); + let range_stats = engine.get_range_stats(CF_WRITE, &s, &e).unwrap().unwrap(); + assert_eq!(range_stats.num_entries, 10); + assert_eq!(range_stats.num_versions, 5); let ranges_need_to_compact = collect_ranges_need_compact( &engine, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 3f9c27ab645..228358d52a1 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -714,6 +714,7 @@ where let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); + self.core.config.raft_store.optimize_for(false); self.core .config .raft_store diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 6207b778691..da970a7e749 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -682,6 +682,7 @@ where let server_config = Arc::new(VersionTrack::new(self.core.config.server.clone())); + self.core.config.raft_store.optimize_for(true); self.core .config .raft_store diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 535306c5ae9..1674d860ccc 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -204,6 +204,7 @@ impl Simulator for NodeCluster { let simulate_trans = SimulateTransport::new(self.trans.clone()); let mut raft_store = cfg.raft_store.clone(); + raft_store.optimize_for(true); raft_store .validate( cfg.coprocessor.region_split_size(), @@ -323,6 +324,7 @@ impl Simulator for NodeCluster { let enable_region_bucket = cfg.coprocessor.enable_region_bucket(); let region_bucket_size = cfg.coprocessor.region_bucket_size; let mut raftstore_cfg = cfg.tikv.raft_store; + raftstore_cfg.optimize_for(true); raftstore_cfg .validate(region_split_size, enable_region_bucket, region_bucket_size) .unwrap(); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index c7a8234c9f1..30da5a4fc8f 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -375,6 +375,7 @@ impl ServerCluster { // Create node. let mut raft_store = cfg.raft_store.clone(); + raft_store.optimize_for(true); raft_store .validate( cfg.coprocessor.region_split_size(), diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 75ab0064a17..3f0168fa361 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -237,6 +237,7 @@ impl Simulator for NodeCluster { let simulate_trans = SimulateTransport::new(self.trans.clone()); let mut raft_store = cfg.raft_store.clone(); + raft_store.optimize_for(false); raft_store .validate( cfg.coprocessor.region_split_size(), @@ -352,6 +353,7 @@ impl Simulator for NodeCluster { let enable_region_bucket = cfg.coprocessor.enable_region_bucket(); let region_bucket_size = cfg.coprocessor.region_bucket_size; let mut raftstore_cfg = cfg.tikv.raft_store; + raftstore_cfg.optimize_for(false); raftstore_cfg .validate(region_split_size, enable_region_bucket, region_bucket_size) .unwrap(); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 8d4d4deea69..1dcf63635a2 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -496,6 +496,7 @@ impl ServerCluster { let apply_router = system.apply_router(); // Create node. let mut raft_store = cfg.raft_store.clone(); + raft_store.optimize_for(false); raft_store .validate( cfg.coprocessor.region_split_size(), diff --git a/src/config/mod.rs b/src/config/mod.rs index a29dcf5b9f1..0b3f43a48ee 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3413,6 +3413,8 @@ impl TikvConfig { self.coprocessor.validate()?; self.split .optimize_for(self.coprocessor.region_split_size()); + self.raft_store + .optimize_for(self.storage.engine == EngineType::RaftKv2); self.raft_store.validate( self.coprocessor.region_split_size(), self.coprocessor.enable_region_bucket(), @@ -5754,6 +5756,7 @@ mod tests { default_cfg.rocksdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); default_cfg.rocksdb.lockcf.target_file_size_base = Some(ReadableSize::mb(8)); default_cfg.raftdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); + default_cfg.raft_store.region_compact_check_step = Some(100); // Other special cases. cfg.pd.retry_max_count = default_cfg.pd.retry_max_count; // Both -1 and isize::MAX are the same. diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index cd7680e8147..661be858964 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -194,9 +194,11 @@ fn test_serde_custom_tikv_config() { region_split_check_diff: Some(ReadableSize::mb(20)), region_compact_check_interval: ReadableDuration::secs(12), clean_stale_peer_delay: ReadableDuration::secs(0), - region_compact_check_step: 1_234, + region_compact_check_step: Some(1_234), region_compact_min_tombstones: 999, region_compact_tombstones_percent: 33, + region_compact_min_redundant_rows: 999, + region_compact_redundant_rows_percent: 33, pd_heartbeat_tick_interval: ReadableDuration::minutes(12), pd_store_heartbeat_tick_interval: ReadableDuration::secs(12), notify_capacity: 12_345, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index e3940cc7067..6f5d9dc6cdc 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -167,6 +167,8 @@ clean-stale-peer-delay = "0s" region-compact-check-step = 1234 region-compact-min-tombstones = 999 region-compact-tombstones-percent = 33 +region-compact-min-redundant-rows = 999 +region-compact-redundant-rows-percent = 33 pd-heartbeat-tick-interval = "12m" pd-store-heartbeat-tick-interval = "12s" snap-mgr-gc-tick-interval = "12m" diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 13cfb535e97..6ba405bb918 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -5,8 +5,9 @@ use std::{ time::Duration, }; +use collections::HashMap; use engine_rocks::{raw::Range, util::get_cf_handle}; -use engine_traits::{MiscExt, CF_WRITE}; +use engine_traits::{CachedTablet, MiscExt, CF_WRITE}; use keys::{data_key, DATA_MAX_KEY}; use test_raftstore::*; use tikv::storage::mvcc::{TimeStamp, Write, WriteType}; @@ -35,7 +36,7 @@ fn test_compact_after_delete(cluster: &mut Cluster) { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); cluster.cfg.raft_store.region_compact_min_tombstones = 500; cluster.cfg.raft_store.region_compact_tombstones_percent = 50; - cluster.cfg.raft_store.region_compact_check_step = 1; + cluster.cfg.raft_store.region_compact_check_step = Some(1); cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); @@ -85,3 +86,153 @@ fn test_node_compact_after_delete() { let mut cluster = new_node_cluster(0, count); test_compact_after_delete(&mut cluster); } + +#[test] +fn test_node_compact_after_delete_v2() { + let count = 1; + let mut cluster = test_raftstore_v2::new_node_cluster(0, count); + + cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_compact_min_tombstones = 50; + cluster.cfg.raft_store.region_compact_tombstones_percent = 50; + // disable it + cluster.cfg.raft_store.region_compact_min_redundant_rows = 10000000; + cluster.cfg.raft_store.region_compact_check_step = Some(2); + cluster.cfg.rocksdb.titan.enabled = true; + cluster.run(); + + let region = cluster.get_region(b""); + let (split_key, _) = gen_mvcc_put_kv(b"k100", b"", 1.into(), 2.into()); + cluster.must_split(®ion, &split_key); + + for i in 0..200 { + let (k, v) = (format!("k{:03}", i), format!("value{}", i)); + let (k, v) = gen_mvcc_put_kv(k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + cluster.must_put_cf(CF_WRITE, &k, &v); + } + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|_, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + db.flush_cf(CF_WRITE, true).unwrap(); + } + true + }) + } + + let (sender, receiver) = mpsc::channel(); + let sync_sender = Mutex::new(sender); + fail::cfg_callback("raftstore-v2::CheckAndCompact::AfterCompact", move || { + let sender = sync_sender.lock().unwrap(); + sender.send(true).unwrap(); + }) + .unwrap(); + for i in 0..200 { + let k = format!("k{:03}", i); + let k = gen_delete_k(k.as_bytes(), 2.into()); + cluster.must_delete_cf(CF_WRITE, &k); + } + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|_, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + db.flush_cf(CF_WRITE, true).unwrap(); + } + true + }) + } + + // wait for 2 regions' compaction. + receiver.recv_timeout(Duration::from_millis(5000)).unwrap(); + receiver.recv_timeout(Duration::from_millis(5000)).unwrap(); + + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|_, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + let cf_handle = get_cf_handle(db.as_inner(), CF_WRITE).unwrap(); + let approximate_size = db + .as_inner() + .get_approximate_sizes_cf(cf_handle, &[Range::new(b"", DATA_MAX_KEY)])[0]; + assert_eq!(approximate_size, 0); + } + true + }) + } +} + +#[test] +fn test_node_compact_after_update_v2() { + let count = 1; + let mut cluster = test_raftstore_v2::new_node_cluster(0, count); + + cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); + // disable it + cluster.cfg.raft_store.region_compact_min_tombstones = 1000000; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = 40; + cluster.cfg.raft_store.region_compact_min_redundant_rows = 50; + cluster.cfg.raft_store.region_compact_check_step = Some(2); + cluster.cfg.rocksdb.titan.enabled = true; + cluster.run(); + + let region = cluster.get_region(b""); + let (split_key, _) = gen_mvcc_put_kv(b"k100", b"", 1.into(), 2.into()); + cluster.must_split(®ion, &split_key); + + for i in 0..200 { + let (k, v) = (format!("k{:03}", i), format!("value{}", i)); + let (k, v) = gen_mvcc_put_kv(k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + cluster.must_put_cf(CF_WRITE, &k, &v); + + let (k, v) = (format!("k{:03}", i), format!("value{}", i)); + let (k, v) = gen_mvcc_put_kv(k.as_bytes(), v.as_bytes(), 3.into(), 4.into()); + cluster.must_put_cf(CF_WRITE, &k, &v); + } + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|_, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + db.flush_cf(CF_WRITE, true).unwrap(); + } + true + }) + } + + fail::cfg("on_collect_regions_to_compact", "pause").unwrap(); + let mut db_size_before_compact = HashMap::default(); + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|id, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + let cf_handle = get_cf_handle(db.as_inner(), CF_WRITE).unwrap(); + let approximate_size = db + .as_inner() + .get_approximate_sizes_cf(cf_handle, &[Range::new(b"", DATA_MAX_KEY)])[0]; + db_size_before_compact.insert(id, approximate_size); + } + true + }) + } + fail::remove("on_collect_regions_to_compact"); + + let (sender, receiver) = mpsc::channel(); + let sync_sender = Mutex::new(sender); + fail::cfg_callback("raftstore-v2::CheckAndCompact::AfterCompact", move || { + let sender = sync_sender.lock().unwrap(); + sender.send(true).unwrap(); + }) + .unwrap(); + + // wait for 2 regions' compaction. + receiver.recv_timeout(Duration::from_millis(5000)).unwrap(); + receiver.recv_timeout(Duration::from_millis(5000)).unwrap(); + + for (registry, _) in &cluster.engines { + registry.for_each_opened_tablet(|id, db: &mut CachedTablet<_>| { + if let Some(db) = db.latest() { + let cf_handle = get_cf_handle(db.as_inner(), CF_WRITE).unwrap(); + let approximate_size = db + .as_inner() + .get_approximate_sizes_cf(cf_handle, &[Range::new(b"", DATA_MAX_KEY)])[0]; + let size_before = db_size_before_compact.get(&id).unwrap(); + assert!(approximate_size < *size_before); + } + true + }) + } +} From 9913a8e1372dfdae0d957eb5c25b1fd8c2de290a Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 22 May 2023 11:55:37 +0800 Subject: [PATCH 0695/1149] encryption: tidy encryption slices and supply basements for KMS on Azure (#14694) ref tikv/tikv#14693 This pr includes: * Tidy the previous implementation on KMS part. * Add credentials to make preparations for supporting encryption at rest on Azure. Signed-off-by: Lucasliang --- Cargo.lock | 2 + components/cloud/aws/src/kms.rs | 4 +- components/cloud/azure/Cargo.toml | 4 + components/cloud/azure/src/lib.rs | 3 + .../certificate_credentials.rs | 266 ++++++++++++++++++ .../cloud/azure/src/token_credentials/mod.rs | 3 + components/cloud/src/kms.rs | 53 +++- components/encryption/Cargo.toml | 1 + components/encryption/export/src/lib.rs | 98 +------ components/encryption/src/crypter.rs | 41 +-- components/encryption/src/errors.rs | 36 +++ components/encryption/src/lib.rs | 8 +- components/encryption/src/master_key/kms.rs | 83 +++--- components/encryption/src/master_key/mem.rs | 6 +- components/encryption/src/master_key/mod.rs | 2 +- 15 files changed, 417 insertions(+), 193 deletions(-) create mode 100644 components/cloud/azure/src/token_credentials/certificate_credentials.rs create mode 100644 components/cloud/azure/src/token_credentials/mod.rs diff --git a/Cargo.lock b/Cargo.lock index dfe4894ac51..ae6fc81d772 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -364,6 +364,7 @@ dependencies = [ "azure_storage_blobs", "base64 0.13.0", "cloud", + "fail", "futures 0.3.15", "futures-util", "kvproto", @@ -1521,6 +1522,7 @@ dependencies = [ "async-trait", "byteorder", "bytes", + "cloud", "crc32fast", "crossbeam", "derive_more", diff --git a/components/cloud/aws/src/kms.rs b/components/cloud/aws/src/kms.rs index 040db46bb53..baa54e39fd5 100644 --- a/components/cloud/aws/src/kms.rs +++ b/components/cloud/aws/src/kms.rs @@ -5,7 +5,7 @@ use std::ops::Deref; use async_trait::async_trait; use cloud::{ error::{Error, KmsError, Result}, - kms::{Config, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}, + kms::{Config, CryptographyType, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}, }; use rusoto_core::{request::DispatchSignedRequest, RusotoError}; use rusoto_credential::ProvideAwsCredentials; @@ -119,7 +119,7 @@ impl KmsProvider for AwsKms { let plaintext_key = response.plaintext.unwrap().as_ref().to_vec(); Ok(DataKeyPair { encrypted: EncryptedKey::new(ciphertext_key)?, - plaintext: PlainKey::new(plaintext_key)?, + plaintext: PlainKey::new(plaintext_key, CryptographyType::AesGcm256)?, }) }) } diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index b9fe8046def..e06d00251b5 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -4,6 +4,9 @@ version = "0.0.1" edition = "2021" publish = false +[features] +failpoints = ["fail/failpoints"] + [dependencies] async-trait = "0.1" azure_core = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } @@ -12,6 +15,7 @@ azure_storage = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk- azure_storage_blobs = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } base64 = "0.13" cloud = { workspace = true } +fail = "0.5" futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { workspace = true } diff --git a/components/cloud/azure/src/lib.rs b/components/cloud/azure/src/lib.rs index 01f57d7b0cf..100ea9047c4 100644 --- a/components/cloud/azure/src/lib.rs +++ b/components/cloud/azure/src/lib.rs @@ -1,4 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. mod azblob; +mod token_credentials; + pub use azblob::{AzureStorage, Config}; +pub use token_credentials::certificate_credentials::ClientCertificateCredentialExt; diff --git a/components/cloud/azure/src/token_credentials/certificate_credentials.rs b/components/cloud/azure/src/token_credentials/certificate_credentials.rs new file mode 100644 index 00000000000..73e31441a0d --- /dev/null +++ b/components/cloud/azure/src/token_credentials/certificate_credentials.rs @@ -0,0 +1,266 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{str, sync::Arc, time::Duration}; + +use azure_core::{ + auth::{AccessToken, TokenCredential, TokenResponse}, + base64, content_type, + error::{Error, ErrorKind}, + headers, new_http_client, HttpClient, Method, Request, +}; +use azure_identity::authority_hosts::AZURE_PUBLIC_CLOUD; +use openssl::{ + error::ErrorStack, + hash::{hash, DigestBytes, MessageDigest}, + pkcs12::Pkcs12, + pkey::{PKey, Private}, + sign::Signer, + x509::X509, +}; +use serde::Deserialize; +use time::OffsetDateTime; +use url::{form_urlencoded, Url}; + +/// Refresh time to use in seconds +const DEFAULT_REFRESH_TIME: i64 = 300; + +/// Provides options to configure how the Identity library makes authentication +/// requests to Azure Active Directory. +#[derive(Clone, Debug, PartialEq)] +struct CertificateCredentialOptions { + authority_host: String, + send_certificate_chain: bool, +} + +impl Default for CertificateCredentialOptions { + fn default() -> Self { + Self { + authority_host: AZURE_PUBLIC_CLOUD.to_owned(), + send_certificate_chain: true, + } + } +} + +impl CertificateCredentialOptions { + fn authority_host(&self) -> &str { + &self.authority_host + } +} + +#[derive(Deserialize, Debug, Default)] +#[serde(default)] +struct AadTokenResponse { + token_type: String, + expires_in: u64, + ext_expires_in: u64, + access_token: String, +} + +/// Enables authentication to Azure Active Directory using a client certificate +/// that was generated for an App Registration. It will automatically cache the +/// latest token from Azure Active Directory. +/// +/// In order to use subject name validation send_cert_chain option must be set +/// to true The certificate is expected to be in base64 encoded PKCS12 format. +/// +/// TODO: make `ClientCertificateCredentialExt` directly extended from +/// `ClientCertificateCredential` if `ClientCertificateCredential` is nightly +/// released. +pub struct ClientCertificateCredentialExt { + http_client: Arc, + + tenant_id: String, + client_id: String, + /// Certificate in PKCS12 format, encoded in base64 + certificate: String, + /// Certificate Pass, default with "" + certificate_pass: String, + options: CertificateCredentialOptions, +} + +impl ClientCertificateCredentialExt { + /// Create a new ClientCertificateCredentialExt + pub fn new( + tenant_id: String, + client_id: String, + certificate: String, + certificate_pass: String, + ) -> Self { + Self { + http_client: new_http_client(), + tenant_id, + client_id, + certificate, + certificate_pass, + options: CertificateCredentialOptions::default(), + } + } + + /// Build a new ClientCertificateCredentialExt according to + /// a given certificate. + pub fn build( + tenant_id: String, + client_id: String, + certificate_path: String, + ) -> Result> { + let bytes = std::fs::read(certificate_path)?; + Ok(ClientCertificateCredentialExt::new( + tenant_id, + client_id, + base64::encode(bytes), + "".into(), + )) + } + + fn options(&self) -> &CertificateCredentialOptions { + &self.options + } + + fn sign(jwt: &str, pkey: &PKey) -> Result, ErrorStack> { + let mut signer = Signer::new(MessageDigest::sha256(), pkey)?; + signer.update(jwt.as_bytes())?; + signer.sign_to_vec() + } + + fn get_thumbprint(cert: &X509) -> Result { + let der = cert.to_der()?; + let digest = hash(MessageDigest::sha1(), &der)?; + Ok(digest) + } + + fn as_jwt_part(part: &[u8]) -> String { + base64::encode_url_safe(part) + } +} + +fn get_encoded_cert(cert: &X509) -> azure_core::Result { + Ok(format!( + "\"{}\"", + base64::encode(cert.to_pem().map_err(openssl_error)?) + )) +} + +fn openssl_error(err: ErrorStack) -> azure_core::error::Error { + Error::new(ErrorKind::Credential, err) +} + +// Not care about "wasm32" platform, this is the requirement from +// [`TokenCredential`](https://github.com/Azure/azure-sdk-for-rust/blob/main/sdk/core/src/auth.rs#L39-L42). +#[cfg_attr(target_arch = "wasm32", async_trait::async_trait(?Send))] +#[cfg_attr(not(target_arch = "wasm32"), async_trait::async_trait)] +impl TokenCredential for ClientCertificateCredentialExt { + // As previous [TODO] shows, following operations in `get_token` is just + // extended from `ClientCertificateCredential::get_token()` as a special + // version with caching feature and stable feature. + // Reference of the REST API: https://learn.microsoft.com/en-us/azure/key-vault/general/common-parameters-and-headers. + async fn get_token(&self, resource: &str) -> azure_core::Result { + let options = self.options(); + let url = &format!( + "{}/{}/oauth2/v2.0/token", + options.authority_host(), + self.tenant_id + ); + + let certificate = base64::decode(&self.certificate) + .map_err(|_| Error::message(ErrorKind::Credential, "Base64 decode failed"))?; + let certificate = Pkcs12::from_der(&certificate) + .map_err(openssl_error)? + .parse2(&self.certificate_pass) + .map_err(openssl_error)?; + + if certificate.cert.as_ref().is_none() { + return Err(Error::message( + ErrorKind::Credential, + "Certificate not found", + )); + } + let cert = certificate.cert.as_ref().unwrap(); + + if certificate.pkey.as_ref().is_none() { + return Err(Error::message( + ErrorKind::Credential, + "Private key not found", + )); + } + let pkey = certificate.pkey.as_ref().unwrap(); + + let thumbprint = + ClientCertificateCredentialExt::get_thumbprint(cert).map_err(openssl_error)?; + + let uuid = uuid::Uuid::new_v4(); + let current_time = OffsetDateTime::now_utc().unix_timestamp(); + let expiry_time = current_time + DEFAULT_REFRESH_TIME; + let x5t = base64::encode(thumbprint); + + let header = match options.send_certificate_chain { + true => { + let base_signature = get_encoded_cert(cert)?; + let x5c = match certificate.ca { + Some(chain) => { + let chain = chain + .into_iter() + .map(|x| get_encoded_cert(&x)) + .collect::>>()? + .join(","); + format! {"{},{}", base_signature, chain} + } + None => base_signature, + }; + format!( + r#"{{"alg":"RS256","typ":"JWT", "x5t":"{}", "x5c":[{}]}}"#, + x5t, x5c + ) + } + false => format!(r#"{{"alg":"RS256","typ":"JWT", "x5t":"{}"}}"#, x5t), + }; + let header = ClientCertificateCredentialExt::as_jwt_part(header.as_bytes()); + + let payload = format!( + r#"{{"aud":"{}","exp":{},"iss": "{}", "jti": "{}", "nbf": {}, "sub": "{}"}}"#, + url, expiry_time, self.client_id, uuid, current_time, self.client_id + ); + let payload = ClientCertificateCredentialExt::as_jwt_part(payload.as_bytes()); + + let jwt = format!("{}.{}", header, payload); + let signature = ClientCertificateCredentialExt::sign(&jwt, pkey).map_err(openssl_error)?; + let sig = ClientCertificateCredentialExt::as_jwt_part(&signature); + let client_assertion = format!("{}.{}", jwt, sig); + + let encoded = { + let mut encoded = &mut form_urlencoded::Serializer::new(String::new()); + encoded = encoded + .append_pair("client_id", self.client_id.as_str()) + .append_pair("scope", format!("{}/.default", resource).as_str()) + .append_pair( + "client_assertion_type", + "urn:ietf:params:oauth:client-assertion-type:jwt-bearer", + ) + .append_pair("client_assertion", client_assertion.as_str()) + .append_pair("grant_type", "client_credentials"); + encoded.finish() + }; + + let url = Url::parse(url)?; + let mut req = Request::new(url, Method::Post); + req.insert_header( + headers::CONTENT_TYPE, + content_type::APPLICATION_X_WWW_FORM_URLENCODED, + ); + req.set_body(encoded); + + let rsp = self.http_client.execute_request(&req).await?; + let rsp_status = rsp.status(); + let rsp_body = rsp.into_body().collect().await?; + + if !rsp_status.is_success() { + return Err(ErrorKind::http_response_from_body(rsp_status, &rsp_body).into_error()); + } + + let response: AadTokenResponse = serde_json::from_slice(&rsp_body)?; + let token = TokenResponse::new( + AccessToken::new(response.access_token.to_string()), + OffsetDateTime::now_utc() + Duration::from_secs(response.expires_in), + ); + Ok(token) + } +} diff --git a/components/cloud/azure/src/token_credentials/mod.rs b/components/cloud/azure/src/token_credentials/mod.rs new file mode 100644 index 00000000000..2035a1f8338 --- /dev/null +++ b/components/cloud/azure/src/token_credentials/mod.rs @@ -0,0 +1,3 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +pub mod certificate_credentials; diff --git a/components/cloud/src/kms.rs b/components/cloud/src/kms.rs index c5cbde412c6..f1dfe783b5b 100644 --- a/components/cloud/src/kms.rs +++ b/components/cloud/src/kms.rs @@ -3,6 +3,7 @@ use async_trait::async_trait; use derive_more::Deref; use kvproto::encryptionpb::MasterKeyKms; +use tikv_util::box_err; use crate::error::{Error, KmsError, Result}; @@ -66,15 +67,55 @@ impl EncryptedKey { } } +#[repr(u8)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum CryptographyType { + Plain = 0, + AesGcm256, + // .. +} + +impl CryptographyType { + #[inline] + pub fn target_key_size(&self) -> usize { + match self { + CryptographyType::Plain => 0, // Plain text has no limitation + CryptographyType::AesGcm256 => 32, + } + } +} + // PlainKey is a newtype used to mark a vector a plaintext key. // It requires the vec to be a valid AesGcmCrypter key. -#[derive(Deref)] -pub struct PlainKey(Vec); +pub struct PlainKey { + tag: CryptographyType, + key: Vec, +} impl PlainKey { - pub fn new(key: Vec) -> Result { - // TODO: crypter.rs in encryption performs additional validation - Ok(Self(key)) + pub fn new(key: Vec, t: CryptographyType) -> Result { + let limitation = t.target_key_size(); + if limitation > 0 && key.len() != limitation { + Err(Error::KmsError(KmsError::Other(box_err!( + "encryption method and key length mismatch, expect {} get + {}", + limitation, + key.len() + )))) + } else { + Ok(Self { key, tag: t }) + } + } + + pub fn key_tag(&self) -> CryptographyType { + self.tag + } +} + +impl core::ops::Deref for PlainKey { + type Target = Vec; + fn deref(&self) -> &Self::Target { + &self.key } } @@ -93,6 +134,8 @@ pub struct DataKeyPair { pub plaintext: PlainKey, } +/// `Key Management Service Provider`, serving for managing master key on +/// different cloud. #[async_trait] pub trait KmsProvider: Sync + Send + 'static + std::fmt::Debug { async fn generate_data_key(&self) -> Result; diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 4a9a8634d49..021c9f23002 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -11,6 +11,7 @@ failpoints = ["fail/failpoints"] async-trait = "0.1" byteorder = "1.2" bytes = "1.0" +cloud = { workspace = true } crc32fast = "1.2" crossbeam = "0.8" derive_more = "0.99.3" diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index 31730f162c6..022d9b65c4e 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -1,16 +1,9 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt::Debug, path::Path}; +use std::path::Path; -use async_trait::async_trait; #[cfg(feature = "cloud-aws")] use aws::{AwsKms, STORAGE_VENDOR_NAME_AWS}; -#[cfg(feature = "cloud-aws")] use cloud::kms::Config as CloudConfig; -use cloud::{ - kms::{EncryptedKey as CloudEncryptedKey, KmsProvider as CloudKmsProvider}, - Error as CloudError, -}; -use derive_more::Deref; #[cfg(feature = "cloud-aws")] pub use encryption::KmsBackend; pub use encryption::{ @@ -18,12 +11,8 @@ pub use encryption::{ DataKeyImporter, DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, }; -use encryption::{ - DataKeyPair, EncryptedKey, FileBackend, KmsProvider, PlainKey, PlaintextBackend, - RetryCodedError, -}; -use error_code::{self, ErrorCode, ErrorCodeExt}; -use tikv_util::{box_err, error, info, stream::RetryError}; +use encryption::{cloud_convert_error, FileBackend, PlaintextBackend}; +use tikv_util::{box_err, error, info}; pub fn data_key_manager_from_config( config: &EncryptionConfig, @@ -48,10 +37,6 @@ pub fn create_backend(config: &MasterKeyConfig) -> Result> { result } -fn cloud_convert_error(msg: String) -> Box CloudConvertError> { - Box::new(|err: CloudError| CloudConvertError(err, msg)) -} - pub fn create_cloud_backend(config: &KmsConfig) -> Result> { info!("Encryption init cloud backend"; "region" => &config.region, @@ -64,10 +49,9 @@ pub fn create_cloud_backend(config: &KmsConfig) -> Result> { STORAGE_VENDOR_NAME_AWS | "" => { let conf = CloudConfig::from_proto(config.clone().into_proto()) .map_err(cloud_convert_error("aws from proto".to_owned()))?; - let kms_provider = CloudKms(Box::new( - AwsKms::new(conf).map_err(cloud_convert_error("new AWS KMS".to_owned()))?, - )); - Ok(Box::new(KmsBackend::new(Box::new(kms_provider))?) as Box) + let kms_provider = + Box::new(AwsKms::new(conf).map_err(cloud_convert_error("new AWS KMS".to_owned()))?); + Ok(Box::new(KmsBackend::new(kms_provider)?) as Box) } provider => Err(Error::Other(box_err!("provider not found {}", provider))), } @@ -82,73 +66,3 @@ fn create_backend_inner(config: &MasterKeyConfig) -> Result> { MasterKeyConfig::Kms { config } => return create_cloud_backend(config), }) } - -// CloudKMS adapts the KmsProvider definition from the cloud crate to that of -// the encryption crate -#[derive(Debug, Deref)] -struct CloudKms(Box); - -#[async_trait] -impl KmsProvider for CloudKms { - async fn generate_data_key(&self) -> Result { - let cdk = (**self) - .generate_data_key() - .await - .map_err(cloud_convert_error(format!( - "{} generate data key API", - self.name() - )))?; - Ok(DataKeyPair { - plaintext: PlainKey::new(cdk.plaintext.to_vec())?, - encrypted: EncryptedKey::new(cdk.encrypted.to_vec())?, - }) - } - - async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result> { - let key = CloudEncryptedKey::new((*data_key).to_vec()).map_err(cloud_convert_error( - format!("{} data key init for decrypt", self.name()), - ))?; - Ok((**self) - .decrypt_data_key(&key) - .await - .map_err(cloud_convert_error(format!( - "{} decrypt data key API", - self.name() - )))?) - } - - fn name(&self) -> &str { - (**self).name() - } -} - -// CloudConverError adapts cloud errors to encryption errors -// As the abstract RetryCodedError -#[derive(Debug)] -pub struct CloudConvertError(CloudError, String); - -impl RetryCodedError for CloudConvertError {} - -impl std::fmt::Display for CloudConvertError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{} {}", &self.0, &self.1)) - } -} - -impl std::convert::From for Error { - fn from(err: CloudConvertError) -> Error { - Error::RetryCodedError(Box::new(err) as Box) - } -} - -impl RetryError for CloudConvertError { - fn is_retryable(&self) -> bool { - self.0.is_retryable() - } -} - -impl ErrorCodeExt for CloudConvertError { - fn error_code(&self) -> ErrorCode { - self.0.error_code() - } -} diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index 7379b8a32a3..3940d392be6 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -1,12 +1,12 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use byteorder::{BigEndian, ByteOrder}; -use derive_more::Deref; +use cloud::kms::PlainKey; use engine_traits::EncryptionMethod as EtEncryptionMethod; use kvproto::encryptionpb::EncryptionMethod; use openssl::symm::{self, Cipher as OCipher}; use rand::{rngs::OsRng, RngCore}; -use tikv_util::{box_err, impl_display_as_debug}; +use tikv_util::box_err; use crate::{Error, Result}; @@ -147,7 +147,7 @@ impl<'k> AesGcmCrypter<'k> { let mut tag = AesGcmTag([0u8; GCM_TAG_LEN]); let ciphertext = symm::encrypt_aead( cipher, - &self.key.0, + self.key.as_slice(), Some(self.iv.as_slice()), &[], // AAD pt, @@ -160,7 +160,7 @@ impl<'k> AesGcmCrypter<'k> { let cipher = OCipher::aes_256_gcm(); let plaintext = symm::decrypt_aead( cipher, - &self.key.0, + self.key.as_slice(), Some(self.iv.as_slice()), &[], // AAD ct, @@ -187,38 +187,9 @@ pub fn verify_encryption_config(method: EncryptionMethod, key: &[u8]) -> Result< Ok(()) } -// PlainKey is a newtype used to mark a vector a plaintext key. -// It requires the vec to be a valid AesGcmCrypter key. -#[derive(Deref)] -pub struct PlainKey(Vec); - -impl PlainKey { - pub fn new(key: Vec) -> Result { - if key.len() != AesGcmCrypter::KEY_LEN { - return Err(box_err!( - "encryption method and key length mismatch, expect {} get {}", - AesGcmCrypter::KEY_LEN, - key.len() - )); - } - Ok(Self(key)) - } -} - -// Don't expose the key in a debug print -impl std::fmt::Debug for PlainKey { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("PlainKey") - .field(&"REDACTED".to_string()) - .finish() - } -} - -// Don't expose the key in a display print -impl_display_as_debug!(PlainKey); - #[cfg(test)] mod tests { + use cloud::kms::CryptographyType; use hex::FromHex; use super::*; @@ -268,7 +239,7 @@ mod tests { let pt = Vec::from_hex(pt).unwrap(); let ct = Vec::from_hex(ct).unwrap(); - let key = PlainKey::new(Vec::from_hex(key).unwrap()).unwrap(); + let key = PlainKey::new(Vec::from_hex(key).unwrap(), CryptographyType::AesGcm256).unwrap(); let iv = Iv::from_slice(Vec::from_hex(iv).unwrap().as_slice()).unwrap(); let tag = Vec::from_hex(tag).unwrap(); diff --git a/components/encryption/src/errors.rs b/components/encryption/src/errors.rs index 2ee9aa51424..da23d923be7 100644 --- a/components/encryption/src/errors.rs +++ b/components/encryption/src/errors.rs @@ -7,6 +7,7 @@ use std::{ result, }; +use cloud::error::Error as CloudError; use error_code::{self, ErrorCode, ErrorCodeExt}; use openssl::error::ErrorStack as CrypterError; use protobuf::ProtobufError; @@ -108,3 +109,38 @@ impl RetryError for Error { } } } + +// CloudConverError adapts cloud errors to encryption errors +// As the abstract RetryCodedError +#[derive(Debug)] +pub struct CloudConvertError(CloudError, String); + +impl RetryCodedError for CloudConvertError {} + +impl std::fmt::Display for CloudConvertError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{} {}", &self.0, &self.1)) + } +} + +impl std::convert::From for Error { + fn from(err: CloudConvertError) -> Error { + Error::RetryCodedError(Box::new(err) as Box) + } +} + +impl RetryError for CloudConvertError { + fn is_retryable(&self) -> bool { + self.0.is_retryable() + } +} + +impl ErrorCodeExt for CloudConvertError { + fn error_code(&self) -> ErrorCode { + self.0.error_code() + } +} + +pub fn cloud_convert_error(msg: String) -> Box CloudConvertError> { + Box::new(|err: CloudError| CloudConvertError(err, msg)) +} diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index b5d5c5571cc..d37d2945273 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -16,18 +16,16 @@ pub use self::{ config::*, crypter::{ from_engine_encryption_method, to_engine_encryption_method, verify_encryption_config, - AesGcmCrypter, Iv, PlainKey, + AesGcmCrypter, Iv, }, encrypted_file::EncryptedFile, - errors::{Error, Result, RetryCodedError}, + errors::{cloud_convert_error, Error, Result, RetryCodedError}, file_dict_file::FileDictionaryFile, io::{ create_aes_ctr_crypter, DecrypterReader, DecrypterWriter, EncrypterReader, EncrypterWriter, }, manager::{DataKeyImporter, DataKeyManager, DataKeyManagerArgs}, - master_key::{ - Backend, DataKeyPair, EncryptedKey, FileBackend, KmsBackend, KmsProvider, PlaintextBackend, - }, + master_key::{Backend, FileBackend, KmsBackend, PlaintextBackend}, }; const TRASH_PREFIX: &str = "TRASH-"; diff --git a/components/encryption/src/master_key/kms.rs b/components/encryption/src/master_key/kms.rs index 8520e7a0cbe..1af0f10a9be 100644 --- a/components/encryption/src/master_key/kms.rs +++ b/components/encryption/src/master_key/kms.rs @@ -2,48 +2,17 @@ use std::{sync::Mutex, time::Duration}; -use async_trait::async_trait; -use derive_more::Deref; +use cloud::kms::{CryptographyType, DataKeyPair, EncryptedKey, KmsProvider, PlainKey}; use kvproto::encryptionpb::EncryptedContent; use tikv_util::{ - box_err, error, + box_err, stream::{retry, with_timeout}, sys::thread::ThreadBuildWrapper, }; use tokio::runtime::{Builder, Runtime}; use super::{metadata::MetadataKey, Backend, MemAesGcmBackend}; -use crate::{ - crypter::{Iv, PlainKey}, - Error, Result, -}; - -#[async_trait] -pub trait KmsProvider: Sync + Send + 'static + std::fmt::Debug { - async fn generate_data_key(&self) -> Result; - async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result>; - fn name(&self) -> &str; -} - -// EncryptedKey is a newtype used to mark data as an encrypted key -// It requires the vec to be non-empty -#[derive(PartialEq, Clone, Debug, Deref)] -pub struct EncryptedKey(Vec); - -impl EncryptedKey { - pub fn new(key: Vec) -> Result { - if key.is_empty() { - error!("Encrypted content is empty"); - } - Ok(Self(key)) - } -} - -#[derive(Debug)] -pub struct DataKeyPair { - pub encrypted: EncryptedKey, - pub plaintext: PlainKey, -} +use crate::{crypter::Iv, errors::cloud_convert_error, Error, Result}; #[derive(Debug)] struct State { @@ -99,12 +68,16 @@ impl KmsBackend { let mut opt_state = self.state.lock().unwrap(); if opt_state.is_none() { let runtime = self.runtime.lock().unwrap(); - let data_key = runtime.block_on(retry(|| { - with_timeout(self.timeout_duration, self.kms_provider.generate_data_key()) - }))?; + let data_key = runtime + .block_on(retry(|| { + with_timeout(self.timeout_duration, self.kms_provider.generate_data_key()) + })) + .map_err(cloud_convert_error("get data key failed".into()))?; *opt_state = Some(State::new_from_datakey(DataKeyPair { - plaintext: PlainKey::new(data_key.plaintext.clone())?, - encrypted: EncryptedKey::new((*data_key.encrypted).clone())?, + plaintext: PlainKey::new(data_key.plaintext.clone(), CryptographyType::AesGcm256) + .map_err(cloud_convert_error("invalid plain key".into()))?, + encrypted: EncryptedKey::new((*data_key.encrypted).clone()) + .map_err(cloud_convert_error("invalid encrypted key".into()))?, })?); } let state = opt_state.as_ref().unwrap(); @@ -149,7 +122,8 @@ impl KmsBackend { let ciphertext_key = match content.metadata.get(MetadataKey::KmsCiphertextKey.as_str()) { None => return Err(box_err!("KMS ciphertext key not found")), - Some(key) => EncryptedKey::new(key.to_vec())?, + Some(key) => EncryptedKey::new(key.to_vec()) + .map_err(cloud_convert_error("invalid encrypted key".into()))?, }; { @@ -161,15 +135,18 @@ impl KmsBackend { } { let runtime = self.runtime.lock().unwrap(); - let plaintext = runtime.block_on(retry(|| { - with_timeout( - self.timeout_duration, - self.kms_provider.decrypt_data_key(&ciphertext_key), - ) - }))?; + let plaintext = runtime + .block_on(retry(|| { + with_timeout( + self.timeout_duration, + self.kms_provider.decrypt_data_key(&ciphertext_key), + ) + })) + .map_err(cloud_convert_error("decrypt encrypted key failed".into()))?; let data_key = DataKeyPair { encrypted: ciphertext_key, - plaintext: PlainKey::new(plaintext)?, + plaintext: PlainKey::new(plaintext, CryptographyType::AesGcm256) + .map_err(cloud_convert_error("invalid plain key".into()))?, }; let state = State::new_from_datakey(data_key)?; let content = state.encryption_backend.decrypt_content(content)?; @@ -196,6 +173,9 @@ impl Backend for KmsBackend { #[cfg(test)] mod fake { + use async_trait::async_trait; + use cloud::{error::Result, kms::KmsProvider}; + use super::*; const FAKE_VENDOR_NAME: &str = "FAKE"; @@ -209,7 +189,7 @@ mod fake { impl FakeKms { pub fn new(plaintext_key: Vec) -> Self { Self { - plaintext_key: PlainKey::new(plaintext_key).unwrap(), + plaintext_key: PlainKey::new(plaintext_key, CryptographyType::AesGcm256).unwrap(), } } } @@ -219,7 +199,8 @@ mod fake { async fn generate_data_key(&self) -> Result { Ok(DataKeyPair { encrypted: EncryptedKey::new(FAKE_DATA_KEY_ENCRYPTED.to_vec())?, - plaintext: PlainKey::new(self.plaintext_key.clone()).unwrap(), + plaintext: PlainKey::new(self.plaintext_key.clone(), CryptographyType::AesGcm256) + .unwrap(), }) } @@ -242,10 +223,10 @@ mod tests { #[test] fn test_state() { - let plaintext = PlainKey::new(vec![1u8; 32]).unwrap(); + let plaintext = PlainKey::new(vec![1u8; 32], CryptographyType::AesGcm256).unwrap(); let encrypted = EncryptedKey::new(vec![2u8; 32]).unwrap(); let data_key = DataKeyPair { - plaintext: PlainKey::new(plaintext.clone()).unwrap(), + plaintext: PlainKey::new(plaintext.clone(), CryptographyType::AesGcm256).unwrap(), encrypted: encrypted.clone(), }; let encrypted2 = EncryptedKey::new(vec![3u8; 32]).unwrap(); diff --git a/components/encryption/src/master_key/mem.rs b/components/encryption/src/master_key/mem.rs index 8e65b85fff6..619acc38ebf 100644 --- a/components/encryption/src/master_key/mem.rs +++ b/components/encryption/src/master_key/mem.rs @@ -1,10 +1,11 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use cloud::kms::{CryptographyType, PlainKey}; use kvproto::encryptionpb::EncryptedContent; use tikv_util::box_err; use super::metadata::*; -use crate::{crypter::*, AesGcmCrypter, Error, Iv, Result}; +use crate::{crypter::*, errors::cloud_convert_error, AesGcmCrypter, Error, Iv, Result}; /// An in-memory backend, it saves master key in memory. #[derive(Debug)] @@ -15,7 +16,8 @@ pub(crate) struct MemAesGcmBackend { impl MemAesGcmBackend { pub fn new(key: Vec) -> Result { Ok(MemAesGcmBackend { - key: PlainKey::new(key)?, + key: PlainKey::new(key, CryptographyType::AesGcm256) + .map_err(cloud_convert_error("new AWS KMS".to_owned()))?, }) } diff --git a/components/encryption/src/master_key/mod.rs b/components/encryption/src/master_key/mod.rs index 59578a2bcf0..6797565c118 100644 --- a/components/encryption/src/master_key/mod.rs +++ b/components/encryption/src/master_key/mod.rs @@ -28,7 +28,7 @@ mod metadata; use self::metadata::*; mod kms; -pub use self::kms::{DataKeyPair, EncryptedKey, KmsBackend, KmsProvider}; +pub use self::kms::KmsBackend; #[derive(Default, Debug, Clone)] pub struct PlaintextBackend {} From c69c97f716ef29d86203726e636de81e747247c3 Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 22 May 2023 18:15:38 +0800 Subject: [PATCH 0696/1149] fix: find the correct last_change_ts after upgrade from versions < 6.5 (#14784) close tikv/tikv#14780 When making a LOCK record, if we do not know `last_change_ts`, iterate to the last actual write to find out. This also fixes the problem that ROLLBACK may prevent LOCKs making using of the optimization as mentioned in https://github.com/tikv/tikv/pull/13834 NOTE a implementation change: If versions_to_last_change > 0 but last_change_ts == 0, the key does not have a PUT/DELETE record before this write record, **OR the previous change is a DELETE**. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/txn_types/src/write.rs | 25 +--- src/storage/mvcc/reader/reader.rs | 113 +++++++++++++++++- .../txn/actions/acquire_pessimistic_lock.rs | 10 +- src/storage/txn/actions/common.rs | 54 +++++++++ src/storage/txn/actions/mod.rs | 1 + src/storage/txn/actions/prewrite.rs | 24 ++-- 6 files changed, 190 insertions(+), 37 deletions(-) create mode 100644 src/storage/txn/actions/common.rs diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 1a20518e423..7d5bfb9fe2b 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -158,6 +158,9 @@ pub struct Write { pub last_change_ts: TimeStamp, /// The number of versions that need skipping from this record /// to find the latest PUT/DELETE record + /// NOTE: `last_change_ts` == 0 && `versions_to_last_change` > 0 means the + /// key does not exist. Either there is no such key **or the last write + /// is a DELETE**. pub versions_to_last_change: u64, /// The source of this txn. pub txn_source: u64, @@ -274,25 +277,6 @@ impl Write { txn_source: self.txn_source, } } - - /// Returns the new `last_change_ts` and `versions_to_last_change` according - /// to this write record. - pub fn next_last_change_info(&self, commit_ts: TimeStamp) -> (TimeStamp, u64) { - match self.write_type { - WriteType::Put | WriteType::Delete => (commit_ts, 1), - WriteType::Lock | WriteType::Rollback => { - // If neither `last_change_ts` nor `versions_to_last_change` exists, do not - // set `last_change_ts` to indicate we don't know where is the last change. - // This should not happen if data is written in new version TiKV. If we hope to - // support data from old TiKV, consider iterating to the last change to find it. - if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { - (self.last_change_ts, self.versions_to_last_change + 1) - } else { - (TimeStamp::zero(), 0) - } - } - } - } } #[derive(PartialEq, Clone)] @@ -322,7 +306,8 @@ pub struct WriteRef<'a> { /// The number of versions that need skipping from this record /// to find the latest PUT/DELETE record. /// If versions_to_last_change > 0 but last_change_ts == 0, the key does not - /// have a PUT/DELETE record before this write record. + /// have a PUT/DELETE record before this write record, OR the previous + /// change is a DELETE. pub versions_to_last_change: u64, /// The source of this txn. pub txn_source: u64, diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 7c15c6d7735..4aeb424c1ff 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -371,7 +371,8 @@ impl MvccReader { } /// Gets the write record of the specified key's latest version before - /// specified `ts`, and additionally the write record's `commit_ts`, if any. + /// specified `ts` (i.e. a PUT or a DELETE), and additionally the write + /// record's `commit_ts`, if any. /// /// See also [`MvccReader::get_write`]. pub fn get_write_with_commit_ts( @@ -401,6 +402,10 @@ impl MvccReader { return Ok(None); } if write.versions_to_last_change < SEEK_BOUND { + if ts.is_zero() { + // this should only happen in tests + return Ok(None); + } ts = commit_ts.prev(); } else { let commit_ts = write.last_change_ts; @@ -759,6 +764,10 @@ impl MvccReader { pub fn snapshot_ext(&self) -> S::Ext<'_> { self.snapshot.ext() } + + pub fn snapshot(&self) -> &S { + &self.snapshot + } } #[cfg(test)] @@ -778,6 +787,7 @@ pub mod tests { kvrpcpb::{AssertionLevel, Context, PrewriteRequestPessimisticAction::*}, metapb::{Peer, Region}, }; + use pd_client::FeatureGate; use raftstore::store::RegionSnapshot; use txn_types::{LockType, Mutation}; @@ -786,8 +796,8 @@ pub mod tests { kv::Modify, mvcc::{tests::write, MvccReader, MvccTxn}, txn::{ - acquire_pessimistic_lock, cleanup, commit, gc, prewrite, CommitKind, TransactionKind, - TransactionProperties, + acquire_pessimistic_lock, cleanup, commit, gc, prewrite, + sched_pool::set_tls_feature_gate, CommitKind, TransactionKind, TransactionProperties, }, Engine, TestEngineBuilder, }; @@ -2442,4 +2452,101 @@ pub mod tests { assert_eq!(reader.statistics.write.next, 0); assert_eq!(reader.statistics.write.get, 0); } + + #[test] + fn test_skip_lock_after_upgrade_6_5() { + let path = tempfile::Builder::new() + .prefix("_test_storage_mvcc_reader_skip_lock_after_upgrade_6_5") + .tempdir() + .unwrap(); + let path = path.path().to_str().unwrap(); + let region = make_region(1, vec![], vec![]); + let db = open_db(path, true); + let mut engine = RegionEngine::new(&db, ®ion); + let k = b"k"; + + // 6.1.0, locks were written + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.1.0").unwrap(); + set_tls_feature_gate(feature_gate); + + engine.put(k, 1, 2); + // 10 locks were put + for start_ts in (6..30).into_iter().step_by(2) { + engine.lock(k, start_ts, start_ts + 1); + } + + // in 6.5 a new lock was put, and it should contain a `last_change_ts`. + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + + engine.lock(k, 30, 31); + let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); + let mut reader = MvccReader::new(snap, None, false); + let res = reader + .get_write_with_commit_ts(&Key::from_raw(k), 100.into(), None) + .unwrap(); + assert!(res.is_some()); + let res = res.unwrap(); + assert_eq!(res.1, 2.into()); + assert_eq!(res.0.write_type, WriteType::Put); + assert_eq!(reader.statistics.write.seek, 1); + assert_eq!(reader.statistics.write.next, 0); + + // same as above, but for delete + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.1.0").unwrap(); + set_tls_feature_gate(feature_gate); + engine.delete(k, 51, 52); + for start_ts in (56..80).into_iter().step_by(2) { + engine.lock(k, start_ts, start_ts + 1); + } + let feature_gate = FeatureGate::default(); + feature_gate.set_version("6.5.0").unwrap(); + set_tls_feature_gate(feature_gate); + engine.lock(k, 80, 81); + let snap = RegionSnapshot::::from_raw(db, region); + let mut reader = MvccReader::new(snap, None, false); + let res = reader + .get_write_with_commit_ts(&Key::from_raw(k), 100.into(), None) + .unwrap(); + assert!(res.is_none()); + assert_eq!(reader.statistics.write.seek, 1); + assert_eq!(reader.statistics.write.next, 0); + } + + #[test] + fn test_locks_interleaving_rollbacks() { + // a ROLLBACK inside a chain of LOCKs won't prevent LOCKs from tracking the + // correct `last_change_ts` + let path = tempfile::Builder::new() + .prefix("_test_storage_mvcc_reader_locks_interleaving_rollbacks") + .tempdir() + .unwrap(); + let path = path.path().to_str().unwrap(); + let region = make_region(1, vec![], vec![]); + let db = open_db(path, true); + let mut engine = RegionEngine::new(&db, ®ion); + let k = b"k"; + engine.put(k, 1, 2); + + for start_ts in (6..30).into_iter().step_by(2) { + engine.lock(k, start_ts, start_ts + 1); + } + engine.rollback(k, 30); + engine.lock(k, 31, 32); + + let snap = RegionSnapshot::::from_raw(db, region); + let mut reader = MvccReader::new(snap, None, false); + let res = reader + .get_write_with_commit_ts(&Key::from_raw(k), 100.into(), None) + .unwrap(); + assert!(res.is_some()); + let res = res.unwrap(); + assert_eq!(res.0.write_type, WriteType::Put); + assert_eq!(res.1, 2.into()); + assert_eq!(reader.statistics.write.seek, 1); + assert_eq!(reader.statistics.write.next, 0); + } } diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index afdbace9e7a..d558741997f 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -10,7 +10,8 @@ use crate::storage::{ Error as MvccError, ErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, }, txn::{ - actions::check_data_constraint::check_data_constraint, sched_pool::tls_can_enable, + actions::{check_data_constraint::check_data_constraint, common::next_last_change_info}, + sched_pool::tls_can_enable, scheduler::LAST_CHANGE_TS, }, types::PessimisticLockKeyResult, @@ -313,7 +314,8 @@ pub fn acquire_pessimistic_lock( Err(e) })?; - (last_change_ts, versions_to_last_change) = write.next_last_change_info(commit_ts); + (last_change_ts, versions_to_last_change) = + next_last_change_info(&key, &write, txn.start_ts, reader, commit_ts)?; // Load value if locked_with_conflict, so that when the client (TiDB) need to // read the value during statement retry, it will be possible to read the value @@ -1775,7 +1777,7 @@ pub mod tests { must_succeed(&mut engine, key, key, 80, 80); let lock = must_pessimistic_locked(&mut engine, key, 80, 80); assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.versions_to_last_change, 1); pessimistic_rollback::tests::must_success(&mut engine, key, 80, 80); // Latest version is a ROLLBACK without last_change_ts @@ -1791,7 +1793,7 @@ pub mod tests { must_succeed(&mut engine, key, key, 95, 95); let lock = must_pessimistic_locked(&mut engine, key, 95, 95); assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.versions_to_last_change, 1); pessimistic_rollback::tests::must_success(&mut engine, key, 95, 95); // Latest version is a LOCK with last_change_ts diff --git a/src/storage/txn/actions/common.rs b/src/storage/txn/actions/common.rs new file mode 100644 index 00000000000..afe0e200f58 --- /dev/null +++ b/src/storage/txn/actions/common.rs @@ -0,0 +1,54 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use tikv_kv::{Snapshot, SEEK_BOUND}; +use txn_types::{Key, TimeStamp, Write, WriteType}; + +use crate::storage::mvcc::{Result, SnapshotReader}; + +/// Returns the new `last_change_ts` and `versions_to_last_change` according +/// to this write record. If it is unknown from the given write, try iterate to +/// the last change and find the answer. +pub fn next_last_change_info( + key: &Key, + write: &Write, + start_ts: TimeStamp, + original_reader: &mut SnapshotReader, + commit_ts: TimeStamp, +) -> Result<(TimeStamp, u64)> { + match write.write_type { + WriteType::Put | WriteType::Delete => Ok((commit_ts, 1)), + WriteType::Lock | WriteType::Rollback => { + assert!(write.last_change_ts.is_zero() || write.versions_to_last_change > 0); + if !write.last_change_ts.is_zero() || write.versions_to_last_change != 0 { + Ok((write.last_change_ts, write.versions_to_last_change + 1)) + } else { + // If neither `last_change_ts` nor `versions_to_last_change` exists, it means we + // do not know the last change info, probably because it comes from an older + // version TiKV. To support data from old TiKV, we iterate to the last change to + // find it. + + // TODO: can we reuse the reader? + let snapshot = original_reader.reader.snapshot().clone(); + let mut reader = SnapshotReader::new(start_ts, snapshot, true); + + // Note that the scan can also utilize `last_change`. So once it finds a LOCK + // version with useful `last_change` pointer, it just needs one more `seek` or + // several `next`s to get to the final result. + let res = reader.get_write_with_commit_ts(key, commit_ts); + let stat = reader.take_statistics(); + original_reader.reader.statistics.add(&stat); + match res? { + // last_change_ts == 0 && versions_to_last_change > 0 means the key does not + // exist. + None => Ok((TimeStamp::zero(), 1)), + Some((w, last_change_ts)) => { + assert!(matches!(w.write_type, WriteType::Put)); + // We don't know how many versions there are. Make `versions_to_last_change` + // big enough so that later reads won't try to `next` to it. + Ok((last_change_ts, SEEK_BOUND + 1)) + } + } + } + } + } +} diff --git a/src/storage/txn/actions/mod.rs b/src/storage/txn/actions/mod.rs index d3aa3807446..3b861b709ef 100644 --- a/src/storage/txn/actions/mod.rs +++ b/src/storage/txn/actions/mod.rs @@ -12,6 +12,7 @@ pub mod check_data_constraint; pub mod check_txn_status; pub mod cleanup; pub mod commit; +pub mod common; pub mod flashback_to_version; pub mod gc; pub mod prewrite; diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 69cf8b32578..2fe53b32ccd 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -23,8 +23,10 @@ use crate::storage::{ SnapshotReader, }, txn::{ - actions::check_data_constraint::check_data_constraint, sched_pool::tls_can_enable, - scheduler::LAST_CHANGE_TS, LockInfo, + actions::{check_data_constraint::check_data_constraint, common::next_last_change_info}, + sched_pool::tls_can_enable, + scheduler::LAST_CHANGE_TS, + LockInfo, }, Snapshot, }; @@ -433,7 +435,7 @@ impl<'a> PrewriteMutation<'a> { } if seek_ts == TimeStamp::max() { (self.last_change_ts, self.versions_to_last_change) = - write.next_last_change_info(commit_ts); + next_last_change_info(&self.key, &write, reader.start_ts, reader, commit_ts)?; } match self.txn_props.kind { TransactionKind::Optimistic(_) => { @@ -824,7 +826,7 @@ fn amend_pessimistic_lock( .into()); } (mutation.last_change_ts, mutation.versions_to_last_change) = - write.next_last_change_info(*commit_ts); + next_last_change_info(&mutation.key, write, reader.start_ts, reader, *commit_ts)?; } else { // last_change_ts == 0 && versions_to_last_change > 0 means the key actually // does not exist. @@ -2426,8 +2428,10 @@ pub mod tests { assert_eq!(lock.versions_to_last_change, 1); must_rollback(&mut engine, key, 55, false); - // Latest version is a LOCK without last_change_ts. Set the last_change_ts of - // the new record to zero. + // Latest version is a LOCK without last_change_ts. It iterates back to find the + // actual last write. In this case it is a DELETE, so it returns + // (last_change_ts == 0 && versions_to_last_change == 1), indicating the key + // does not exist. let write = Write::new(WriteType::Lock, 60.into(), None); engine .put_cf( @@ -2440,11 +2444,11 @@ pub mod tests { prewrite_func(&mut engine, LockType::Lock, 70); let lock = must_locked(&mut engine, key, 70); assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.versions_to_last_change, 1); must_rollback(&mut engine, key, 70, false); - // Latest version is a ROLLBACK without last_change_ts. Set the last_change_ts - // of the new record to zero. + // Latest version is a ROLLBACK without last_change_ts. Iterate back to find the + // DELETE. let write = Write::new(WriteType::Rollback, 75.into(), None); engine .put_cf( @@ -2457,7 +2461,7 @@ pub mod tests { prewrite_func(&mut engine, LockType::Lock, 85); let lock = must_locked(&mut engine, key, 85); assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.versions_to_last_change, 1); must_rollback(&mut engine, key, 85, false); // Latest version is a LOCK with last_change_ts From b01a4e6b0b7733167b1548862cfc4134c279471b Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 23 May 2023 11:13:38 +0800 Subject: [PATCH 0697/1149] sst_import: run size check after ingest sst file (#14771) close tikv/tikv#14752 run size check after ingest sst file Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: tonyxuqqi --- .../src/operation/command/admin/split.rs | 1 + .../src/operation/command/write/ingest.rs | 12 ++++++- tests/failpoints/cases/test_import_service.rs | 36 ++++++++++++++++--- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 9dbd27f336d..d5923227ffa 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -239,6 +239,7 @@ impl Peer { } pub fn on_update_region_keys(&mut self, keys: u64) { + fail_point!("on_update_region_keys"); self.split_flow_control_mut().approximate_keys = Some(keys); self.add_pending_tick(PeerTick::SplitRegionCheck); self.add_pending_tick(PeerTick::PdHeartbeat); diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 90382de24aa..20c7c92ee71 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -107,6 +107,8 @@ impl Apply { pub fn apply_ingest(&mut self, index: u64, ssts: Vec) -> Result<()> { PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); let mut infos = Vec::with_capacity(ssts.len()); + let mut size: i64 = 0; + let mut keys: u64 = 0; for sst in &ssts { // This may not be enough as ingest sst may not trigger flush at all. let off = data_cf_offset(sst.get_cf_name()); @@ -125,7 +127,11 @@ impl Apply { return Err(e); } match self.sst_importer().validate(sst) { - Ok(meta_info) => infos.push(meta_info), + Ok(meta_info) => { + size += meta_info.total_bytes as i64; + keys += meta_info.total_kvs; + infos.push(meta_info) + } Err(e) => { slog_panic!(self.logger, "corrupted sst"; "sst" => ?sst, "error" => ?e); } @@ -143,6 +149,10 @@ impl Apply { .map(|info| info.meta.get_uuid().to_vec()) .collect::>(); self.set_sst_applied_index(uuids, index); + + self.metrics.size_diff_hint += size; + self.metrics.written_bytes += size as u64; + self.metrics.written_keys += keys; Ok(()) } } diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 475acbe9f3c..8d335666215 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -12,7 +12,8 @@ use kvproto::import_sstpb::*; use tempfile::Builder; use test_raftstore::Simulator; use test_sst_importer::*; -use tikv_util::HandyRwLock; +use tikv::config::TikvConfig; +use tikv_util::{config::ReadableSize, HandyRwLock}; #[allow(dead_code)] #[path = "../../integrations/import/util.rs"] @@ -253,7 +254,16 @@ fn test_ingest_file_twice_and_conflict() { #[test] fn test_ingest_sst_v2() { let mut cluster = test_raftstore_v2::new_server_cluster(1, 1); - let (ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(None, &mut cluster); + let mut config = TikvConfig::default(); + config.server.addr = "127.0.0.1:0".to_owned(); + let cleanup_interval = Duration::from_millis(10); + config.raft_store.cleanup_import_sst_interval.0 = cleanup_interval; + config.raft_store.split_region_check_tick_interval.0 = cleanup_interval; + config.raft_store.pd_heartbeat_tick_interval.0 = cleanup_interval; + config.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); + config.server.grpc_concurrency = 1; + + let (ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config), &mut cluster); let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); let sst_path = temp_dir.path().join("test.sst"); let sst_range = (0, 100); @@ -267,7 +277,8 @@ fn test_ingest_sst_v2() { meta.set_region_id(ctx.get_region_id()); meta.set_region_epoch(ctx.get_region_epoch().clone()); send_upload_sst(&import, &meta, &data).unwrap(); - ingest.set_sst(meta); + ingest.set_sst(meta.clone()); + let resp = import.ingest(&ingest).unwrap(); assert!(!resp.has_error(), "{:?}", resp.get_error()); fail::cfg("on_cleanup_import_sst", "return").unwrap(); @@ -277,7 +288,6 @@ fn test_ingest_sst_v2() { tx.lock().unwrap().send(()).unwrap(); }) .unwrap(); - rx.recv_timeout(std::time::Duration::from_secs(20)).unwrap(); let mut count = 0; for path in &cluster.paths { @@ -289,7 +299,25 @@ fn test_ingest_sst_v2() { } } } + + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("on_update_region_keys", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(20)).unwrap(); + + fail::remove("on_update_region_keys"); fail::remove("on_cleanup_import_sst"); fail::remove("on_cleanup_import_sst_schedule"); assert_ne!(0, count); + + std::thread::sleep(std::time::Duration::from_secs(1)); + + let region_keys = cluster + .pd_client + .get_region_approximate_keys(ctx.get_region_id()) + .unwrap(); + assert_eq!(100, region_keys); } From 736a9804c89c808ef7b653cca8253b6c02ff4c33 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 23 May 2023 14:25:38 +0800 Subject: [PATCH 0698/1149] raftstore-v2: implement rollback merge (#14511) ref tikv/tikv#12842 Implement rollback merge for raftstore-v2 Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- Cargo.lock | 2 +- components/raftstore-v2/src/batch/store.rs | 1 - components/raftstore-v2/src/fsm/peer.rs | 7 +- .../operation/command/admin/compact_log.rs | 29 +++ .../operation/command/admin/merge/commit.rs | 66 +++---- .../src/operation/command/admin/merge/mod.rs | 11 +- .../operation/command/admin/merge/prepare.rs | 10 +- .../operation/command/admin/merge/rollback.rs | 186 +++++++++++++++++- .../src/operation/command/admin/mod.rs | 7 +- .../raftstore-v2/src/operation/command/mod.rs | 7 +- components/raftstore-v2/src/operation/life.rs | 27 ++- components/raftstore-v2/src/operation/mod.rs | 8 +- .../raftstore-v2/src/operation/ready/mod.rs | 8 +- .../src/operation/ready/snapshot.rs | 2 +- components/raftstore-v2/src/worker/tablet.rs | 48 ++++- .../tests/failpoints/test_merge.rs | 68 ++++++- 16 files changed, 409 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae6fc81d772..7c670fec2c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2932,7 +2932,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#6e0e8a7deaa199418f4e8c7e3b63fcb89e153771" +source = "git+https://github.com/pingcap/kvproto.git#5d6aacbe966eb499b5463f88bb003cf61bd35f76" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 25fbde2ed27..7398c24ba80 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -425,7 +425,6 @@ impl StorePollerBuilder { continue; } let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; - // Keep the checkpoint even if source is destroyed. if prefix == MERGE_SOURCE_PREFIX { continue; } diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 3af66c4f81c..7d7a66e7357 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -338,9 +338,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::AckCommitMerge { index, target_id } => { self.fsm.peer_mut().on_ack_commit_merge(index, target_id) } - PeerMsg::RejectCommitMerge { index } => { - self.fsm.peer_mut().on_reject_commit_merge(index) - } + PeerMsg::RejectCommitMerge { index } => self + .fsm + .peer_mut() + .on_reject_commit_merge(self.store_ctx, index), PeerMsg::RedirectCatchUpLogs(c) => self .fsm .peer_mut() diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 383b54aa3b4..8b9992b2b85 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -338,6 +338,35 @@ impl Peer { )); } + #[inline] + pub fn record_tombstone_tablet_path_callback( + &mut self, + ctx: &StoreContext, + old_tablet: PathBuf, + new_tablet_index: u64, + cb: impl FnOnce() + Send + 'static, + ) { + info!( + self.logger, + "record tombstone tablet"; + "prev_tablet_path" => old_tablet.display(), + "new_tablet_index" => new_tablet_index + ); + let compact_log_context = self.compact_log_context_mut(); + compact_log_context + .tombstone_tablets_wait_index + .push(new_tablet_index); + let _ = ctx + .schedulers + .tablet + .schedule(tablet::Task::prepare_destroy_path_callback( + old_tablet, + self.region_id(), + new_tablet_index, + cb, + )); + } + /// Returns if there's any tombstone being removed. #[inline] pub fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 2756d0174dd..00d07c19afc 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -153,13 +153,13 @@ fn commit_of_merge(r: &CommitMergeRequest) -> u64 { impl Peer { // Called after applying `PrepareMerge`. pub fn start_commit_merge(&mut self, store_ctx: &mut StoreContext) { + fail::fail_point!("start_commit_merge"); assert!(self.applied_merge_state().is_some()); // Target already committed `CommitMerge`. if let Some(c) = &self.merge_context().unwrap().catch_up_logs { - if self.catch_up_logs_ready(c) { - let c = self.merge_context_mut().catch_up_logs.take().unwrap(); - self.finish_catch_up_logs(store_ctx, c); - } + assert!(self.catch_up_logs_ready(c)); + let c = self.merge_context_mut().catch_up_logs.take().unwrap(); + self.finish_catch_up_logs(store_ctx, c); } else { self.on_check_merge(store_ctx); } @@ -253,44 +253,37 @@ impl Peer { store_ctx: &mut StoreContext, req: RaftCmdRequest, ) { - match self.validate_commit_merge(&req) { - Some(true) if self.is_leader() => { - let (ch, _) = CmdResChannel::pair(); - self.on_admin_command(store_ctx, req, ch); - } - Some(false) => { - let commit_merge = req.get_admin_request().get_commit_merge(); - let source_id = commit_merge.get_source_state().get_region().get_id(); - let _ = store_ctx.router.force_send( - source_id, - PeerMsg::RejectCommitMerge { - index: commit_of_merge(commit_merge), - }, - ); - } - _ => (), - } - } - - fn validate_commit_merge(&self, req: &RaftCmdRequest) -> Option { let expected_epoch = req.get_header().get_region_epoch(); let merge = req.get_admin_request().get_commit_merge(); assert!(merge.has_source_state() && merge.get_source_state().has_merge_state()); let source_region = merge.get_source_state().get_region(); let region = self.region(); - if self + if let Some(r) = self .storage() .region_state() .get_merged_records() .iter() - .any(|p| p.get_source_region_id() == source_region.get_id()) + .find(|p| p.get_source_region_id() == source_region.get_id()) { info!( self.logger, - "ignore commit merge because peer is already in merged_records"; + "ack commit merge because peer is already in merged_records"; "source" => ?source_region, + "index" => r.get_index(), + ); + let index = commit_of_merge(req.get_admin_request().get_commit_merge()); + // If target caught up by snapshot, the source checkpoint hasn't been used. + let source_path = + merge_source_path(&store_ctx.tablet_registry, source_region.get_id(), index); + assert!(source_path.exists()); + self.record_tombstone_tablet_path(store_ctx, source_path, r.get_index()); + let _ = store_ctx.router.force_send( + source_region.get_id(), + PeerMsg::AckCommitMerge { + index, + target_id: self.region_id(), + }, ); - None } else if util::is_epoch_stale(expected_epoch, region.get_region_epoch()) { info!( self.logger, @@ -298,7 +291,10 @@ impl Peer { "current_epoch" => ?region.get_region_epoch(), "expected_epoch" => ?expected_epoch, ); - Some(false) + let index = commit_of_merge(req.get_admin_request().get_commit_merge()); + let _ = store_ctx + .router + .force_send(source_region.get_id(), PeerMsg::RejectCommitMerge { index }); } else if expected_epoch == region.get_region_epoch() { assert!( util::is_sibling_regions(source_region, region), @@ -313,12 +309,10 @@ impl Peer { source_region, region ); - // Best effort. Remove when trim check is implemented. - if self.storage().has_dirty_data() { - info!(self.logger, "ignore commit merge because of dirty data"); - None - } else { - Some(true) + assert!(!self.storage().has_dirty_data()); + if self.is_leader() { + let (ch, _) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); } } else { info!( @@ -326,7 +320,6 @@ impl Peer { "ignore commit merge because self epoch is stale"; "source" => ?source_region, ); - None } } @@ -506,6 +499,7 @@ impl Apply { merged_record.set_target_epoch(region.get_region_epoch().clone()); merged_record.set_target_peers(region.get_peers().into()); merged_record.set_index(index); + merged_record.set_source_index(merge_commit); state.mut_merged_records().push(merged_record); PEER_ADMIN_CMD_COUNTER.commit_merge.success.inc(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs index 0b198eec2a6..253630c2bc1 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs @@ -24,7 +24,7 @@ pub const MERGE_SOURCE_PREFIX: &str = "merge-source"; // `index` is the commit index of `PrepareMergeRequest`, `commit` field of // `CommitMergeRequest`. -fn merge_source_path( +pub fn merge_source_path( registry: &TabletRegistry, source_region_id: u64, index: u64, @@ -76,6 +76,15 @@ impl MergeContext { None } } + + #[inline] + pub fn prepare_merge_index(&self) -> Option { + if let Some(PrepareStatus::Applied(state)) = self.prepare_status.as_ref() { + Some(state.get_commit()) + } else { + None + } + } } impl Peer { diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 601b4568866..6004e5d7c7c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -626,6 +626,14 @@ impl Apply { }); } + // Notes on the lifetime of this checkpoint: + // - Target region is responsible to clean up if it has proposed `CommitMerge`. + // It will destroy the checkpoint if the persisted apply index is advanced. It + // will also destroy the checkpoint before sending `GcPeerResponse` to target + // leader. + // - Otherwise, the `PrepareMerge` is rollback-ed. In this case the source + // region is responsible to clean up (see `rollback_merge`). + Ok(( AdminResponse::default(), AdminCmdResult::PrepareMerge(PrepareMergeResult { @@ -653,7 +661,7 @@ impl Peer { reader, region, RegionChangeReason::PrepareMerge, - res.state.get_commit(), + self.storage().region_state().get_tablet_index(), ); } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index ab571298bb0..ec602c61eb2 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -1,12 +1,188 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, RaftEngine}; -use slog::warn; +//! The rollback of `PrepareMerge` command. -use crate::raft::Peer; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use kvproto::{ + metapb, + raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse}, + raft_serverpb::PeerState, +}; +use protobuf::Message; +use raftstore::{ + coprocessor::RegionChangeReason, + store::{fsm::new_admin_request, metrics::PEER_ADMIN_CMD_COUNTER, LocksStatus, Transport}, + Result, +}; +use slog::{error, info}; +use tikv_util::slog_panic; + +use super::merge_source_path; +use crate::{ + batch::StoreContext, + fsm::ApplyResReporter, + operation::AdminCmdResult, + raft::{Apply, Peer}, +}; + +#[derive(Debug)] +pub struct RollbackMergeResult { + commit: u64, + region: metapb::Region, +} + +impl Peer { + // Match v1::on_check_merge. + pub fn on_reject_commit_merge( + &mut self, + store_ctx: &mut StoreContext, + index: u64, + ) { + if self + .merge_context() + .map_or(true, |c| c.prepare_merge_index() != Some(index)) + { + return; + } + self.propose_rollback_merge(store_ctx, index); + } + + pub fn propose_rollback_merge( + &mut self, + store_ctx: &mut StoreContext, + index: u64, + ) { + info!( + self.logger, + "rollback prepare merge"; + "index" => index, + ); + let mut request = new_admin_request(self.region_id(), self.peer().clone()); + request + .mut_header() + .set_region_epoch(self.region().get_region_epoch().clone()); + let mut admin = AdminRequest::default(); + admin.set_cmd_type(AdminCmdType::RollbackMerge); + admin.mut_rollback_merge().set_commit(index); + request.set_admin_request(admin); + if let Err(e) = self.propose(store_ctx, request.write_to_bytes().unwrap()) { + error!(self.logger, "failed to propose RollbackMerge"; "err" => ?e); + } + } +} + +impl Apply { + // Match v1::exec_rollback_merge. + pub fn apply_rollback_merge( + &mut self, + req: &AdminRequest, + _index: u64, + ) -> Result<(AdminResponse, AdminCmdResult)> { + PEER_ADMIN_CMD_COUNTER.rollback_merge.all.inc(); + if self.region_state().get_state() != PeerState::Merging { + slog_panic!( + self.logger, + "unexpected state of merging region"; + "state" => ?self.region_state(), + ); + } + let rollback = req.get_rollback_merge(); + let merge_state = self.region_state().get_merge_state(); + if merge_state.get_commit() != rollback.get_commit() { + slog_panic!( + self.logger, + "unexpected merge state of merging region"; + "state" => ?merge_state, + ); + } + let mut region = self.region().clone(); + let version = region.get_region_epoch().get_version(); + // Update version to avoid duplicated rollback requests. + region.mut_region_epoch().set_version(version + 1); + self.region_state_mut().set_region(region.clone()); + self.region_state_mut().set_state(PeerState::Normal); + self.region_state_mut().take_merge_state(); + + PEER_ADMIN_CMD_COUNTER.rollback_merge.success.inc(); + Ok(( + AdminResponse::default(), + AdminCmdResult::RollbackMerge(RollbackMergeResult { + commit: rollback.get_commit(), + region, + }), + )) + } +} impl Peer { - pub fn on_reject_commit_merge(&mut self, index: u64) { - warn!(self.logger, "target peer rejected commit merge"; "index" => index); + // Match v1::on_ready_rollback_merge. + pub fn on_apply_res_rollback_merge( + &mut self, + store_ctx: &mut StoreContext, + res: RollbackMergeResult, + ) { + assert_ne!(res.commit, 0); + let current = self.merge_context().and_then(|c| c.prepare_merge_index()); + if current != Some(res.commit) { + slog_panic!( + self.logger, + "rollbacks a wrong merge"; + "pending_commit" => ?current, + "commit" => res.commit, + ); + } + { + let mut meta = store_ctx.store_meta.lock().unwrap(); + meta.set_region(&res.region, true, &self.logger); + let (reader, _) = meta.readers.get_mut(&res.region.get_id()).unwrap(); + self.set_region( + &store_ctx.coprocessor_host, + reader, + res.region.clone(), + RegionChangeReason::RollbackMerge, + self.storage().region_state().get_tablet_index(), + ); + } + let region_state = self.storage().region_state().clone(); + let region_id = self.region_id(); + self.state_changes_mut() + .put_region_state(region_id, res.commit, ®ion_state) + .unwrap(); + self.set_has_extra_write(); + + self.rollback_merge(store_ctx); + } + + /// This can be called directly without proposal, in which case a snapshot + /// rollbacks the merge. + pub fn rollback_merge(&mut self, store_ctx: &mut StoreContext) { + let index = self + .merge_context() + .and_then(|c| c.prepare_merge_index()) + .unwrap_or_else(|| slog_panic!(self.logger, "no applied prepare merge to rollback")); + // Clear merge releted data + let checkpoint_path = + merge_source_path(&store_ctx.tablet_registry, self.region_id(), index); + if checkpoint_path.exists() { + // Don't remove it immediately so that next restart we don't need to waste time + // making the checkpoint again. We double check in `clean_up_tablets` to ensure + // this checkpoint isn't leaked. + self.record_tombstone_tablet_path(store_ctx, checkpoint_path, index); + } + self.proposal_control_mut().leave_prepare_merge(index); + self.take_merge_context(); + + // Resume updating `safe_ts` + self.read_progress_mut().resume(); + + if self.is_leader() { + { + let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); + if pessimistic_locks.status == LocksStatus::MergingRegion { + pessimistic_locks.status = LocksStatus::Normal; + } + } + self.region_heartbeat_pd(store_ctx); + } } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9f3475a25d3..40c27e2c8cf 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -16,10 +16,12 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessageType, FlushMemtable, RaftMessage}, }; -use merge::{commit::CommitMergeResult, prepare::PrepareMergeResult}; +use merge::{ + commit::CommitMergeResult, prepare::PrepareMergeResult, rollback::RollbackMergeResult, +}; pub use merge::{ commit::{CatchUpLogs, MERGE_IN_PROGRESS_PREFIX}, - MergeContext, MERGE_SOURCE_PREFIX, + merge_source_path, MergeContext, MERGE_SOURCE_PREFIX, }; use protobuf::Message; use raftstore::{ @@ -59,6 +61,7 @@ pub enum AdminCmdResult { PrepareMerge(PrepareMergeResult), CommitMerge(CommitMergeResult), Flashback(FlashbackResult), + RollbackMerge(RollbackMergeResult), } impl Peer { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index e68449e8026..e10b77642be 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -66,8 +66,8 @@ mod control; mod write; pub use admin::{ - report_split_init_finish, temp_split_path, AdminCmdResult, CatchUpLogs, CompactLogContext, - MergeContext, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, + merge_source_path, report_split_init_finish, temp_split_path, AdminCmdResult, CatchUpLogs, + CompactLogContext, MergeContext, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; pub use control::ProposalControl; @@ -397,6 +397,7 @@ impl Peer { AdminCmdResult::PrepareMerge(res) => self.on_apply_res_prepare_merge(ctx, res), AdminCmdResult::CommitMerge(res) => self.on_apply_res_commit_merge(ctx, res), AdminCmdResult::Flashback(res) => self.on_apply_res_flashback(ctx, res), + AdminCmdResult::RollbackMerge(res) => self.on_apply_res_rollback_merge(ctx, res), } } self.region_buckets_info_mut() @@ -682,7 +683,7 @@ impl Apply { AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index).await?, AdminCmdType::PrepareMerge => self.apply_prepare_merge(admin_req, log_index)?, AdminCmdType::CommitMerge => self.apply_commit_merge(admin_req, log_index).await?, - AdminCmdType::RollbackMerge => unimplemented!(), + AdminCmdType::RollbackMerge => self.apply_rollback_merge(admin_req, log_index)?, AdminCmdType::TransferLeader => { self.apply_transfer_leader(admin_req, entry.term)? } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 8b431ad3a98..45c18400627 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -174,6 +174,7 @@ impl AbnormalPeerContext { #[derive(Default)] pub struct GcPeerContext { + // Peers that are confirmed to be deleted. confirmed_ids: Vec, } @@ -572,8 +573,30 @@ impl Peer { return; } + let check = extra_msg.get_check_gc_peer(); + let check_peer_id = check.get_check_peer().get_id(); + let records = self.storage().region_state().get_merged_records(); + let Some(record) = records.iter().find(|r| { + r.get_source_peers().iter().any(|p| p.get_id() == check_peer_id) + }) else { return }; + let source_index = record.get_source_index(); forward_destroy_to_source_peer(msg, |m| { - let _ = ctx.router.send_raft_message(m.into()); + let source_checkpoint = super::merge_source_path( + &ctx.tablet_registry, + check.get_check_region_id(), + source_index, + ); + if source_checkpoint.exists() { + let router = ctx.router.clone(); + self.record_tombstone_tablet_path_callback( + ctx, + source_checkpoint, + extra_msg.get_index(), + move || { + let _ = router.send_raft_message(m.into()); + }, + ); + } }); } @@ -600,6 +623,8 @@ impl Peer { ctx.confirmed_ids.push(gc_peer_id); } + // Removes deleted peers from region state by proposing a `UpdateGcPeer` + // command. pub fn on_gc_peer_tick(&mut self, ctx: &mut StoreContext) { if !self.is_leader() { return; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index e0107122da9..f5e2077ad0a 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -10,10 +10,10 @@ mod ready; mod txn_ext; pub use command::{ - AdminCmdResult, ApplyFlowControl, CatchUpLogs, CommittedEntries, CompactLogContext, - MergeContext, ProposalControl, RequestHalfSplit, RequestSplit, SimpleWriteBinary, - SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, SplitFlowControl, - MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, + merge_source_path, AdminCmdResult, ApplyFlowControl, CatchUpLogs, CommittedEntries, + CompactLogContext, MergeContext, ProposalControl, RequestHalfSplit, RequestSplit, + SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, + SplitFlowControl, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; pub use life::{AbnormalPeerContext, DestroyProgress, GcPeerContext}; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index c0772eda0b7..2cacfac8c72 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -233,13 +233,7 @@ impl Peer { }); return; } - ExtraMessageType::MsgWantRollbackMerge => { - if self.is_leader() { - // TODO: - // self.merge_context_mut().maybe_add_rollback_peer(); - return; - } - } + ExtraMessageType::MsgWantRollbackMerge => return, ExtraMessageType::MsgAvailabilityRequest => { self.on_availability_request( ctx, diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 696bf025984..e5e0a651816 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -267,7 +267,7 @@ impl Peer { self.raft_group_mut().advance_apply_to(snapshot_index); if self.proposal_control().is_merging() { // After applying a snapshot, merge is rollbacked implicitly. - // TODO: self.rollback_merge(ctx); + self.rollback_merge(ctx); } let read_tablet = SharedReadTablet::new(tablet.clone()); { diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index db09c4ba3be..dadc27eb390 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -33,6 +33,7 @@ pub enum Task { tablet: Either, region_id: u64, wait_for_persisted: u64, + cb: Option>, }, Destroy { region_id: u64, @@ -118,6 +119,7 @@ impl Task { tablet: Either::Left(tablet), region_id, wait_for_persisted, + cb: None, } } @@ -127,6 +129,22 @@ impl Task { tablet: Either::Right(path), region_id, wait_for_persisted, + cb: None, + } + } + + #[inline] + pub fn prepare_destroy_path_callback( + path: PathBuf, + region_id: u64, + wait_for_persisted: u64, + cb: impl FnOnce() + Send + 'static, + ) -> Self { + Task::PrepareDestroy { + tablet: Either::Right(path), + region_id, + wait_for_persisted, + cb: Some(Box::new(cb)), } } @@ -158,9 +176,9 @@ pub struct Runner { sst_importer: Arc, logger: Logger, - // region_id -> [(tablet_path, wait_for_persisted)]. - waiting_destroy_tasks: HashMap>, - pending_destroy_tasks: Vec, + // region_id -> [(tablet_path, wait_for_persisted, callback)]. + waiting_destroy_tasks: HashMap>)>>, + pending_destroy_tasks: Vec<(PathBuf, Option>)>, // An independent pool to run tasks that are time-consuming but doesn't take CPU resources, // such as waiting for RocksDB compaction. @@ -259,20 +277,24 @@ impl Runner { region_id: u64, tablet: Either, wait_for_persisted: u64, + cb: Option>, ) { let path = self.pause_background_work(tablet); self.waiting_destroy_tasks .entry(region_id) .or_default() - .push((path, wait_for_persisted)); + .push((path, wait_for_persisted, cb)); } fn destroy(&mut self, region_id: u64, persisted: u64) { if let Some(v) = self.waiting_destroy_tasks.get_mut(®ion_id) { - v.retain(|(path, wait)| { + v.retain_mut(|(path, wait, cb)| { if *wait <= persisted { + let cb = cb.take(); if !Self::process_destroy_task(&self.logger, &self.tablet_registry, path) { - self.pending_destroy_tasks.push(path.clone()); + self.pending_destroy_tasks.push((path.clone(), cb)); + } else if let Some(cb) = cb { + cb(); } return false; } @@ -284,7 +306,7 @@ impl Runner { fn direct_destroy(&mut self, tablet: Either) { let path = self.pause_background_work(tablet); if !Self::process_destroy_task(&self.logger, &self.tablet_registry, &path) { - self.pending_destroy_tasks.push(path); + self.pending_destroy_tasks.push((path, None)); } } @@ -391,7 +413,8 @@ where region_id, tablet, wait_for_persisted, - } => self.prepare_destroy(region_id, tablet, wait_for_persisted), + cb, + } => self.prepare_destroy(region_id, tablet, wait_for_persisted, cb), Task::Destroy { region_id, persisted_index, @@ -408,8 +431,13 @@ where EK: KvEngine, { fn on_timeout(&mut self) { - self.pending_destroy_tasks - .retain(|task| !Self::process_destroy_task(&self.logger, &self.tablet_registry, task)); + self.pending_destroy_tasks.retain_mut(|(path, cb)| { + let r = Self::process_destroy_task(&self.logger, &self.tablet_registry, path); + if r && let Some(cb) = cb.take() { + cb(); + } + r + }); } fn get_interval(&self) -> Duration { diff --git a/components/raftstore-v2/tests/failpoints/test_merge.rs b/components/raftstore-v2/tests/failpoints/test_merge.rs index d660221d5ee..9321c06b5f5 100644 --- a/components/raftstore-v2/tests/failpoints/test_merge.rs +++ b/components/raftstore-v2/tests/failpoints/test_merge.rs @@ -1,12 +1,14 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::time::Duration; +use std::{sync::Mutex, time::Duration}; use engine_traits::Peekable; use tikv_util::store::new_peer; use crate::cluster::{ - life_helper::assert_peer_not_exist, merge_helper::merge_region, split_helper::split_region, + life_helper::assert_peer_not_exist, + merge_helper::merge_region, + split_helper::{put, split_region}, Cluster, }; @@ -107,3 +109,65 @@ fn test_source_destroy_before_target_apply() { } panic!("merge not replayed after 5s"); } + +#[test] +fn test_rollback() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let router = &mut cluster.routers[0]; + + let region_1 = router.region_detail(2); + let peer_1 = region_1.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let peer_2 = new_peer(store_id, peer_1.get_id() + 1); + let region_1_id = region_1.get_id(); + let region_2_id = region_1_id + 1; + let (region_1, region_2) = split_region( + router, + region_1, + peer_1.clone(), + region_2_id, + peer_2.clone(), + Some(format!("k{}k", region_1_id).as_bytes()), + Some(format!("k{}k", region_2_id).as_bytes()), + format!("k{}", region_2_id).as_bytes(), + format!("k{}", region_2_id).as_bytes(), + false, + ); + + let region_3_id = region_2_id + 1; + let peer_3 = new_peer(store_id, peer_2.get_id() + 1); + let router_clone = Mutex::new(cluster.routers[0].clone()); + let region_2_clone = region_2.clone(); + fail::cfg_callback("start_commit_merge", move || { + split_region( + &mut router_clone.lock().unwrap(), + region_2_clone.clone(), + peer_2.clone(), + region_3_id, + peer_3.clone(), + Some(format!("k{}k", region_2_id).as_bytes()), + Some(format!("k{}k", region_3_id).as_bytes()), + format!("k{}", region_3_id).as_bytes(), + format!("k{}", region_3_id).as_bytes(), + false, + ); + fail::remove("start_commit_merge"); + }) + .unwrap(); + merge_region(&cluster, 0, region_1, peer_1, region_2, false); + + let mut resp = Default::default(); + for _ in 0..10 { + resp = put( + &mut cluster.routers[0], + region_1_id, + format!("k{}k2", region_1_id).as_bytes(), + ); + if !resp.get_header().has_error() { + return; + } + std::thread::sleep(Duration::from_millis(100)); + } + assert!(!resp.get_header().has_error(), "{:?}", resp); +} From 54a7e86e172429ccb9a9d9232eeebcccc0c9d1b3 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 23 May 2023 15:19:38 +0800 Subject: [PATCH 0699/1149] command: distinguish different states in the flashback tag statistics (#14792) ref tikv/tikv#14788 - Distinguish different states in the flashback tag statistics. - Move `incr_cmd_metric` into `schedule_command` to make sure all commands could be counted in. Signed-off-by: JmPotato Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/metrics.rs | 5 ++- src/storage/mod.rs | 2 -- .../txn/commands/flashback_to_version.rs | 24 +++++++++++++- .../flashback_to_version_read_phase.rs | 32 +++++++++++++++++-- src/storage/txn/scheduler.rs | 1 + 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index 4837567ee43..e9477b56b0f 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -141,7 +141,10 @@ make_auto_flush_static_metric! { pause, key_mvcc, start_ts_mvcc, - flashback_to_version, + flashback_to_version_read_lock, + flashback_to_version_read_write, + flashback_to_version_rollback_lock, + flashback_to_version_write, raw_get, raw_batch_get, raw_scan, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 897968ef671..2605b1ad262 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1502,7 +1502,6 @@ impl Storage { }); fail_point!("storage_drop_message", |_| Ok(())); - cmd.incr_cmd_metric(); self.sched.run_cmd(cmd, T::callback(callback)); Ok(()) @@ -1515,7 +1514,6 @@ impl Storage { callback: Callback, ) { let cmd: Command = cmd.into(); - cmd.incr_cmd_metric(); sched.run_cmd(cmd, T::callback(callback)); } diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index da12bc8906c..7873b736d1e 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -9,6 +9,7 @@ use txn_types::{Key, TimeStamp}; use crate::storage::{ kv::WriteData, lock_manager::LockManager, + metrics::{CommandKind, KV_COMMAND_COUNTER_VEC_STATIC}, mvcc::{MvccReader, MvccTxn}, txn::{ actions::flashback_to_version::{ @@ -40,7 +41,6 @@ command! { impl CommandExt for FlashbackToVersion { ctx!(); - tag!(flashback_to_version); request_type!(KvFlashbackToVersion); fn gen_lock(&self) -> latch::Lock { @@ -67,6 +67,28 @@ impl CommandExt for FlashbackToVersion { FlashbackToVersionState::Commit { key_to_commit } => key_to_commit.as_encoded().len(), } } + + fn tag(&self) -> CommandKind { + match self.state { + FlashbackToVersionState::RollbackLock { .. } => { + CommandKind::flashback_to_version_rollback_lock + } + _ => CommandKind::flashback_to_version_write, + } + } + + fn incr_cmd_metric(&self) { + match self.state { + FlashbackToVersionState::RollbackLock { .. } => { + KV_COMMAND_COUNTER_VEC_STATIC + .flashback_to_version_rollback_lock + .inc(); + } + _ => KV_COMMAND_COUNTER_VEC_STATIC + .flashback_to_version_write + .inc(), + } + } } impl WriteCommand for FlashbackToVersion { diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 4be0239aad2..1812816966a 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -1,11 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. +// #[PerformanceCriticalPath] use std::ops::Bound; -// #[PerformanceCriticalPath] use txn_types::{Key, Lock, TimeStamp}; use crate::storage::{ + metrics::{CommandKind, KV_COMMAND_COUNTER_VEC_STATIC}, mvcc::MvccReader, txn::{ actions::flashback_to_version::{check_flashback_commit, get_first_user_key}, @@ -96,7 +97,6 @@ command! { impl CommandExt for FlashbackToVersionReadPhase { ctx!(); - tag!(flashback_to_version); request_type!(KvFlashbackToVersion); property!(readonly); gen_lock!(empty); @@ -104,6 +104,34 @@ impl CommandExt for FlashbackToVersionReadPhase { fn write_bytes(&self) -> usize { 0 } + + fn tag(&self) -> CommandKind { + match self.state { + FlashbackToVersionState::RollbackLock { .. } => { + CommandKind::flashback_to_version_read_lock + } + FlashbackToVersionState::FlashbackWrite { .. } => { + CommandKind::flashback_to_version_read_write + } + _ => unreachable!(), + } + } + + fn incr_cmd_metric(&self) { + match self.state { + FlashbackToVersionState::RollbackLock { .. } => { + KV_COMMAND_COUNTER_VEC_STATIC + .flashback_to_version_read_lock + .inc(); + } + FlashbackToVersionState::FlashbackWrite { .. } => { + KV_COMMAND_COUNTER_VEC_STATIC + .flashback_to_version_read_write + .inc(); + } + _ => unreachable!(), + } + } } /// The whole flashback progress contains four phases: diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 0acf1de49a3..146217e4b22 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -538,6 +538,7 @@ impl TxnScheduler { let tag = cmd.tag(); let priority_tag = get_priority_tag(cmd.priority()); + cmd.incr_cmd_metric(); SCHED_STAGE_COUNTER_VEC.get(tag).new.inc(); SCHED_COMMANDS_PRI_COUNTER_VEC_STATIC .get(priority_tag) From 21f3ef9c7a3f6615bd2d7cb347dd904f840e510f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 23 May 2023 15:47:38 +0800 Subject: [PATCH 0700/1149] engine_rocks: flush memtable does not wait for write stall (#14708) close tikv/tikv#14633 We meet region split blocked about 200s. This is caused by rocksdb checkpoint waiting for write stall before flush memtable. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +++--- components/engine_rocks/src/event_listener.rs | 3 +++ components/engine_rocks/src/misc.rs | 6 ++++-- src/server/gc_worker/gc_worker.rs | 10 +++++----- tests/failpoints/cases/test_gc_metrics.rs | 4 ++-- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7c670fec2c8..e9051b685a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3061,7 +3061,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#062638a741adcd9074659eb28cbe7f6a676938d5" +source = "git+https://github.com/tikv/rust-rocksdb.git#b72e0fa0f2e5a0016bb55c11756ee714445c0365" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -3080,7 +3080,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#062638a741adcd9074659eb28cbe7f6a676938d5" +source = "git+https://github.com/tikv/rust-rocksdb.git#b72e0fa0f2e5a0016bb55c11756ee714445c0365" dependencies = [ "bzip2-sys", "cc", @@ -5031,7 +5031,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#062638a741adcd9074659eb28cbe7f6a676938d5" +source = "git+https://github.com/tikv/rust-rocksdb.git#b72e0fa0f2e5a0016bb55c11756ee714445c0365" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 1cbef379e3c..9628c61c23f 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -120,6 +120,9 @@ impl rocksdb::EventListener for RocksEventListener { DBBackgroundErrorReason::Compaction => "compaction", DBBackgroundErrorReason::WriteCallback => "write_callback", DBBackgroundErrorReason::MemTable => "memtable", + DBBackgroundErrorReason::ManifestWrite => "manifest_write", + DBBackgroundErrorReason::FlushNoWAL => "flush_no_wal", + DBBackgroundErrorReason::ManifestWriteNoWAL => "manifest_write_no_wal", }; if err.starts_with("Corruption") || err.starts_with("IO error") { diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index d4ffa564861..c4b5fa4946e 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -138,12 +138,14 @@ impl MiscExt for RocksEngine { handles.push(util::get_cf_handle(self.as_inner(), cf)?); } } - self.as_inner().flush_cfs(&handles, wait).map_err(r2e) + self.as_inner() + .flush_cfs(&handles, wait, false) + .map_err(r2e) } fn flush_cf(&self, cf: &str, wait: bool) -> Result<()> { let handle = util::get_cf_handle(self.as_inner(), cf)?; - self.as_inner().flush_cf(handle, wait).map_err(r2e) + self.as_inner().flush_cf(handle, wait, false).map_err(r2e) } fn delete_ranges_cf( diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 87ab5c10575..f402148fa95 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1855,7 +1855,7 @@ mod tests { must_prewrite_delete(&mut prefixed_engine, &k, &k, 151); must_commit(&mut prefixed_engine, &k, 151, 152); } - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); db.compact_range_cf(cf, None, None); for i in 0..100 { @@ -1930,7 +1930,7 @@ mod tests { must_commit(&mut prefixed_engine, &k, 151, 152); keys.push(Key::from_raw(&k)); } - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek, 0); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.next, 0); @@ -2088,7 +2088,7 @@ mod tests { for i in 10u64..30 { must_rollback(&mut prefixed_engine, b"k2\x00", i, true); } - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); must_gc(&mut prefixed_engine, b"k2\x00", 30); // Test tombstone counter works @@ -2147,7 +2147,7 @@ mod tests { must_prewrite_put(&mut prefixed_engine, b"k2", b"v2", b"k2", start_ts); must_commit(&mut prefixed_engine, b"k2", start_ts, commit_ts); } - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); let safepoint = versions as u64 * 2; runner @@ -2180,7 +2180,7 @@ mod tests { must_commit(&mut engine, b"key", 10, 20); let db = engine.kv_engine().unwrap().as_inner().clone(); let cf = get_cf_handle(&db, CF_WRITE).unwrap(); - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); let gate = FeatureGate::default(); gate.set_version("5.0.0").unwrap(); diff --git a/tests/failpoints/cases/test_gc_metrics.rs b/tests/failpoints/cases/test_gc_metrics.rs index 348b81aaea7..2a25285f54a 100644 --- a/tests/failpoints/cases/test_gc_metrics.rs +++ b/tests/failpoints/cases/test_gc_metrics.rs @@ -176,7 +176,7 @@ fn test_txn_gc_keys_handled() { must_commit(&mut prefixed_engine, &k, 151, 152); } - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); db.compact_range_cf(cf, None, None); @@ -344,7 +344,7 @@ fn test_raw_gc_keys_handled() { engine.write(&ctx, batch).unwrap(); let cf = get_cf_handle(&db, CF_DEFAULT).unwrap(); - db.flush_cf(cf, true).unwrap(); + db.flush_cf(cf, true, false).unwrap(); db.compact_range_cf(cf, None, None); From 3482a1cad647e5f90c808958e1217bf423e066a8 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 23 May 2023 16:39:39 +0800 Subject: [PATCH 0701/1149] raftstore-v2: ignore unimplemented extra msg (#14751) close tikv/tikv#14749 ignore unimplemented extra msg Signed-off-by: Spade A Signed-off-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore-v2/src/fsm/store.rs | 6 +++++- .../raftstore-v2/src/operation/command/admin/mod.rs | 8 ++++++-- components/raftstore-v2/src/operation/command/mod.rs | 6 +++++- components/raftstore-v2/src/operation/ready/mod.rs | 7 ++++++- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index c7f228f7f9c..486a4c4813e 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -275,7 +275,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { StoreTick::PdStoreHeartbeat => self.on_pd_store_heartbeat(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst(), StoreTick::CompactCheck => self.on_compact_check_tick(), - _ => unimplemented!(), + _ => slog_panic!( + self.store_ctx.logger, + "unimplemented"; + "tick" => ?tick, + ), } } diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 40c27e2c8cf..12e1565f283 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -39,7 +39,7 @@ pub use split::{ report_split_init_finish, temp_split_path, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, SPLIT_PREFIX, }; -use tikv_util::{box_err, log::SlogFormat}; +use tikv_util::{box_err, log::SlogFormat, slog_panic}; use txn_types::WriteBatchFlags; use self::flashback::FlashbackResult; @@ -273,7 +273,11 @@ impl Peer { AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { self.propose_flashback(ctx, req) } - _ => unimplemented!("{:?}", req), + _ => slog_panic!( + self.logger, + "unimplemented"; + "admin_type" => ?cmd_type, + ), } }; match &res { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index e10b77642be..a7a82e3c161 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -736,7 +736,11 @@ impl Apply { self.use_delete_range(), )?; } - _ => unimplemented!(), + _ => slog_panic!( + self.logger, + "unimplemented"; + "request_type" => ?r.get_cmd_type(), + ), } } let resp = new_response(req.get_header()); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 2cacfac8c72..5242f79379b 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -281,7 +281,12 @@ impl Peer { } } if msg.has_extra_msg() { - unimplemented!(); + warn!( + self.logger, + "unimplemented extra msg, ignore it now"; + "extra_msg_type" => ?msg.get_extra_msg().get_type(), + ); + return; } // TODO: drop all msg append when the peer is uninitialized and has conflict From 9aad050bc7377a1745568782ab7dd8278ccf0e3a Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 23 May 2023 16:57:39 +0800 Subject: [PATCH 0702/1149] raftstorev2: fix miss ready after handling read index (#14789) close tikv/tikv#14786 raftstorev2: fix miss ready after handling read index Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/ready/mod.rs | 21 +++++++++---------- tests/failpoints/cases/test_replica_read.rs | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 5242f79379b..6eb1fd10d51 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -305,6 +305,12 @@ impl Peer { if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) } else { + // As this peer is already created, the empty split message is meaningless. + if is_empty_split_message(&msg) { + ctx.raft_metrics.message_dropped.stale_msg.inc(); + return; + } + // This can be a message that sent when it's still a follower. Nevertheleast, // it's meaningless to continue to handle the request as callbacks are cleared. if msg.get_message().get_msg_type() == MessageType::MsgReadIndex @@ -315,21 +321,14 @@ impl Peer { ctx.raft_metrics.message_dropped.stale_msg.inc(); return; } + if msg.get_message().get_msg_type() == MessageType::MsgReadIndex && self.is_leader() && self.on_step_read_index(ctx, msg.mut_message()) { - // Read index has respond in `on_step_read_index`. - return; - } - - // As this peer is already created, the empty split message is meaningless. - if is_empty_split_message(&msg) { - ctx.raft_metrics.message_dropped.stale_msg.inc(); - return; - } - - if let Err(e) = self.raft_group_mut().step(msg.take_message()) { + // Read index has respond in `on_step_read_index`, + // No need to step again. + } else if let Err(e) = self.raft_group_mut().step(msg.take_message()) { error!(self.logger, "raft step error"; "err" => ?e); } else { let committed_index = self.raft_group().raft.raft_log.committed; diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 64f363f0ced..e1fed7abcb8 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -351,7 +351,7 @@ fn test_read_after_cleanup_range_for_snap() { /// a heartbeat timeout to know its leader before that it can't handle any read /// request. #[test_case(test_raftstore::new_node_cluster)] -// #[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_new_split_learner_can_not_find_leader() { let mut cluster = new_cluster(0, 4); configure_for_lease_read(&mut cluster.cfg, Some(5000), None); From 46a09a42ccc831f27b18dcc2150d1fbf1304cb01 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 23 May 2023 19:09:39 +0800 Subject: [PATCH 0703/1149] raftstorev2: fix transfer leader rejection due to admin cmd (#14790) close tikv/tikv#14785 raftstorev2: fix transfer leader rejection due to admin cmd Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/operation/command/admin/mod.rs | 5 ++++- .../raftstore-v2/src/operation/command/mod.rs | 1 + components/raftstore-v2/src/router/message.rs | 21 +++++++++++++++++++ components/test_raftstore-v2/src/cluster.rs | 18 ++++++---------- tests/failpoints/cases/test_replica_read.rs | 4 ++-- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 12e1565f283..b3778d359c8 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -99,6 +99,7 @@ impl Peer { return; } + let is_transfer_leader = cmd_type == AdminCmdType::TransferLeader; let pre_transfer_leader = cmd_type == AdminCmdType::TransferLeader && !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) .contains(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL); @@ -119,7 +120,9 @@ impl Peer { ch.report_error(resp); return; } - if let Some(conflict) = self.proposal_control_mut().check_conflict(Some(cmd_type)) { + // Do not check conflict for transfer leader, otherwise we may not + // transfer leadership out of busy nodes in time. + if !is_transfer_leader && let Some(conflict) = self.proposal_control_mut().check_conflict(Some(cmd_type)) { conflict.delay_channel(ch); return; } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index a7a82e3c161..0bbf6a05601 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -547,6 +547,7 @@ impl Apply { #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); + fail::fail_point!("on_handle_apply_2", self.peer_id() == 2, |_| {}); let now = std::time::Instant::now(); let apply_wait_time = APPLY_TASK_WAIT_TIME_HISTOGRAM.local(); for (e, ch) in ce.entry_and_proposals { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 9ce4e8a8807..79f9d915c0d 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -296,6 +296,27 @@ impl PeerMsg { sub, ) } + + #[cfg(feature = "testexport")] + pub fn request_split_with_callback( + epoch: metapb::RegionEpoch, + split_keys: Vec>, + source: String, + f: Box, + ) -> (Self, CmdResSubscriber) { + let (ch, sub) = CmdResChannel::with_callback(f); + ( + PeerMsg::RequestSplit { + request: RequestSplit { + epoch, + split_keys, + source: source.into(), + }, + ch, + }, + sub, + ) + } } #[derive(Debug)] diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 25a7e2ab6e2..95b3c89dd97 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1325,7 +1325,7 @@ impl, EK: KvEngine> Cluster { let transfer_leader = new_admin_request(region_id, &epoch, new_transfer_leader_cmd(leader)); // todo(SpadeA): modify let resp = self - .call_command_on_leader(transfer_leader, Duration::from_secs(500)) + .call_command_on_leader(transfer_leader, Duration::from_secs(5)) .unwrap(); assert_eq!( resp.get_admin_response().get_cmd_type(), @@ -1371,29 +1371,23 @@ impl, EK: KvEngine> Cluster { &mut self, region: &metapb::Region, split_key: &[u8], - mut cb: Callback, + cb: Callback, ) { let leader = self.leader_of_region(region.get_id()).unwrap(); let router = self.sim.rl().get_router(leader.get_store_id()).unwrap(); let split_key = split_key.to_vec(); - let (split_region_req, mut sub) = PeerMsg::request_split( + let (split_region_req, _) = PeerMsg::request_split_with_callback( region.get_region_epoch().clone(), vec![split_key], "test".into(), + Box::new(move |resp| { + cb.invoke_with_response(resp.clone()); + }), ); router .check_send(region.get_id(), split_region_req) .unwrap(); - - block_on(async { - sub.wait_proposed().await; - cb.invoke_proposed(); - sub.wait_committed().await; - cb.invoke_committed(); - let res = sub.result().await.unwrap(); - cb.invoke_with_response(res) - }); } pub fn must_split(&mut self, region: &metapb::Region, split_key: &[u8]) { diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index e1fed7abcb8..943bdc874cf 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -475,7 +475,7 @@ fn test_replica_read_after_transfer_leader() { // This test is for reproducing the bug that some replica reads was sent to a // leader and shared a same read index because of the optimization on leader. #[test_case(test_raftstore::new_node_cluster)] -// #[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_read_index_after_transfer_leader() { let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); @@ -572,7 +572,7 @@ fn test_read_index_after_transfer_leader() { /// Test if the read index request can get a correct response when the commit /// index of leader if not up-to-date after transferring leader. #[test_case(test_raftstore::new_node_cluster)] -// #[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_batch_read_index_after_transfer_leader() { let mut cluster = new_node_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); From 3139c789f5647479b32b1aa86498d5a350b45377 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 24 May 2023 10:09:38 +0800 Subject: [PATCH 0704/1149] raftstore-v2: reset has_dirty_data after receiving snapshot from leader (#14804) close tikv/tikv#14798 reset has_dirty_data after receiving snapshot from leader Signed-off-by: Spade A Co-authored-by: tonyxuqqi --- .../src/operation/ready/snapshot.rs | 7 ++ components/raftstore-v2/src/worker/tablet.rs | 2 + src/server/tablet_snap.rs | 1 + tests/failpoints/cases/test_split_region.rs | 85 ++++++++++++++++++- 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index e5e0a651816..b2a9be988e6 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -34,6 +34,7 @@ use engine_traits::{ EncryptionKeyManager, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, ALL_CFS, }; +use fail::fail_point; use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; use protobuf::Message; use raft::{eraftpb::Snapshot, StateRole}; @@ -287,11 +288,17 @@ impl Peer { !s.scheduled || snapshot_index != RAFT_INIT_LOG_INDEX }) { info!(self.logger, "apply tablet snapshot completely"); + // Tablet sent from region leader should have already be trimmed. + self.storage_mut().set_has_dirty_data(false); SNAP_COUNTER.apply.success.inc(); + + fail_point!("apply_snapshot_complete"); } if let Some(init) = split { info!(self.logger, "init split with snapshot finished"); self.post_split_init(ctx, init); + + fail_point!("post_split_init_complete"); } self.schedule_apply_fsm(ctx); if self.remove_tombstone_tablets(snapshot_index) { diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index dadc27eb390..629eaf030e6 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -9,6 +9,7 @@ use std::{ use collections::HashMap; use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, DATA_CFS}; +use fail::fail_point; use kvproto::{import_sstpb::SstMeta, metapb::Region}; use slog::{debug, error, info, warn, Logger}; use sst_importer::SstImporter; @@ -255,6 +256,7 @@ impl Runner { } // drop before callback. drop(tablet); + fail_point!("tablet_trimmed_finished"); cb(); }) .unwrap(); diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 4b5bd81e243..a1d2a12bc91 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -552,6 +552,7 @@ pub(crate) async fn recv_snap( context.io_type == IoType::LoadBalance, )?; } + fail_point!("finish_receiving_snapshot"); context.finish(raft_router) }); match res { diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 096bbc12ed8..ffcaa370936 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -3,7 +3,8 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, - mpsc, Arc, Mutex, + mpsc::{self, channel}, + Arc, Mutex, }, thread, time::Duration, @@ -1273,3 +1274,85 @@ impl Filter for TeeFilter { Ok(()) } } + +// Split regions as well as parent will set has_dirty_data be true after +// applying batch split. And after completing tablet trim, has_dirty_data will +// be reset to be false. We encounterred a case where has_dirty_data may be true +// forever which leads to it unable to send snapshot after it becomes leader: +// +// 1. split region +// 2. the splitted region set has_dirty_data be true in `apply_snapshot` +// 3. the splitted region schedule tablet trim task in `on_applied_snapshot` +// with tablet index 5 +// 4. the splitted region received a snapshot sent from its +// leader +// 5. after finishing applying this snapshot, the tablet index in storage +// changed to 6 +// 6. tablet trim complete and callbacked to raftstore +// 7. tablet index cannot be matched, so fail to reset has_dirty_data +#[test] +fn test_not_reset_has_dirty_data_due_to_slow_split() { + let mut cluster = test_raftstore_v2::new_server_cluster(0, 3); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(15); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); + cluster.run(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + // split will be blocked for store 3 + fail::cfg("apply_before_split_1_3", "pause").unwrap(); + fail::cfg("finish_receiving_snapshot", "pause").unwrap(); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k0080"); + + cluster.add_recv_filter_on_node(3, Box::new(DropMessageFilter::new(Arc::new(|_| false)))); + + // prepare some data and split + for i in 0..40 { + let k = format!("k{:04}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + fail::cfg("tablet_trimmed_finished", "pause").unwrap(); + cluster.clear_recv_filter_on_node(3); + + fail::remove("apply_before_split_1_3"); + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("post_split_init_complete", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(50)).unwrap(); + fail::remove("finish_receiving_snapshot"); + + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("apply_snapshot_complete", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(50)).unwrap(); + + let split_region = cluster.get_region(b"k0010"); + cluster.must_transfer_leader( + split_region.get_id(), + split_region + .get_peers() + .iter() + .find(|p| p.get_store_id() == 3) + .unwrap() + .clone(), + ); + + // ensure node 3 can send snapshot to node 1 + cluster.stop_node(1); + for i in 40..80 { + let k = format!("k{:04}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + cluster.stop_node(2); + cluster.run_node(1).unwrap(); + + cluster.must_put(b"k00001", b"val"); +} From 77bdd414164c8bf33f0926d7fa78b496ea2042cc Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 24 May 2023 11:45:38 +0800 Subject: [PATCH 0705/1149] engine: fix raft-engine encryption key deletion order (#14770) close tikv/tikv#14761 Fix a race between encryption key and raft log file deletion that can cause server unable to boot. Signed-off-by: tabokie Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/encryption/src/file_dict_file.rs | 18 +++++++++++++----- components/raft_log_engine/src/engine.rs | 3 ++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index cfa945a5cd7..6563de30372 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -10,7 +10,7 @@ use file_system::{rename, File, OpenOptions}; use kvproto::encryptionpb::{EncryptedContent, FileDictionary, FileInfo}; use protobuf::Message; use rand::{thread_rng, RngCore}; -use tikv_util::{box_err, set_panic_mark, warn}; +use tikv_util::{box_err, info, set_panic_mark, warn}; use crate::{ encrypted_file::{EncryptedFile, Header, Version, TMP_FILE_SUFFIX}, @@ -134,11 +134,19 @@ impl FileDictionaryFile { .open(&tmp_path) .unwrap(); - let header = Header::new(&file_dict_bytes, Version::V2); - tmp_file.write_all(&header.to_bytes())?; + let header = Header::new(&file_dict_bytes, Version::V2).to_bytes(); + tmp_file.write_all(&header)?; tmp_file.write_all(&file_dict_bytes)?; tmp_file.sync_all()?; + let new_size = header.len() + file_dict_bytes.len(); + info!( + "installing new dictionary file"; + "name" => tmp_path.display(), + "old_size" => self.file_size, + "new_size" => new_size, + ); + self.file_size = new_size; // Replace old file with the tmp file aomticlly. rename(&tmp_path, &origin_path)?; let base_dir = File::open(&self.base)?; @@ -148,9 +156,8 @@ impl FileDictionaryFile { } else { let file = EncryptedFile::new(&self.base, &self.name); file.write(&file_dict_bytes, &PlaintextBackend::default())?; + self.file_size = file_dict_bytes.len(); } - // rough size, excluding EncryptedFile meta. - self.file_size = file_dict_bytes.len(); Ok(()) } @@ -197,6 +204,7 @@ impl FileDictionaryFile { } } Err(e @ Error::TailRecordParseIncomplete) => { + // We will call `rewrite` later to trim the corruption. warn!( "{:?} occurred and the last complete filename is {}", e, last_record_name diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 621d708b057..512da0b79a4 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -190,10 +190,11 @@ impl FileSystem for ManagedFileSystem { } fn delete>(&self, path: P) -> IoResult<()> { + self.base_file_system.delete(path.as_ref())?; if let Some(ref manager) = self.key_manager { manager.delete_file(path.as_ref().to_str().unwrap())?; } - self.base_file_system.delete(path) + Ok(()) } fn rename>(&self, src_path: P, dst_path: P) -> IoResult<()> { From a732337f5713c49bd9eda543fb7735ca47bbbbe4 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 24 May 2023 13:09:38 +0800 Subject: [PATCH 0706/1149] config: clean up v2 config validation (#14777) ref tikv/tikv#12842, ref pingcap/docs#13231 - allow encryption for v2 - rearrange validation logic for better readability - skip some data path checks for v2, and disallow setting kvdb wal for v2 - use the correct path for checking when raft-engine is enabled - default write-buffer-limit less than 15GB Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- components/raftstore-v2/src/raft/apply.rs | 2 + components/security/src/lib.rs | 17 +- components/tikv_util/src/config.rs | 42 ++-- etc/config-template.toml | 13 + src/config/mod.rs | 294 +++++++++++++--------- 5 files changed, 226 insertions(+), 142 deletions(-) diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 155afbfc1a3..f2945f7469b 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -211,6 +211,8 @@ impl Apply { self.region().get_id() } + #[allow(unused)] + #[inline] pub fn peer_id(&self) -> u64 { self.peer.get_id() } diff --git a/components/security/src/lib.rs b/components/security/src/lib.rs index bbd296ae1f7..e30003b9832 100644 --- a/components/security/src/lib.rs +++ b/components/security/src/lib.rs @@ -87,7 +87,7 @@ pub struct ClientSuite { impl SecurityConfig { /// Validates ca, cert and private key. - pub fn validate(&self, raftstore_v2: bool) -> Result<(), Box> { + pub fn validate(&self) -> Result<(), Box> { check_key_file("ca key", &self.ca_path)?; check_key_file("cert key", &self.cert_path)?; check_key_file("private key", &self.key_path)?; @@ -97,13 +97,6 @@ impl SecurityConfig { { return Err("ca, cert and private key should be all configured.".into()); } - if raftstore_v2 - && self.encryption.data_encryption_method - != kvproto::encryptionpb::EncryptionMethod::Plaintext - { - return Err("encryption is not supported for partitioned-raft-kv".into()); - } - Ok(()) } @@ -304,7 +297,7 @@ mod tests { fn test_security() { let cfg = SecurityConfig::default(); // default is disable secure connection. - cfg.validate(false).unwrap(); + cfg.validate().unwrap(); let mgr = SecurityManager::new(&cfg).unwrap(); assert!(mgr.cfg.ca_path.is_empty()); assert!(mgr.cfg.cert_path.is_empty()); @@ -313,7 +306,7 @@ mod tests { let assert_cfg = |c: fn(&mut SecurityConfig), valid: bool| { let mut invalid_cfg = cfg.clone(); c(&mut invalid_cfg); - assert_eq!(invalid_cfg.validate(false).is_ok(), valid); + assert_eq!(invalid_cfg.validate().is_ok(), valid); }; // invalid path should be rejected. @@ -341,11 +334,11 @@ mod tests { c.cert_path = format!("{}", example_cert.display()); c.key_path = format!("{}", example_key.display()); // incomplete configuration. - c.validate(false).unwrap_err(); + c.validate().unwrap_err(); // data should be loaded from file after validating. c.ca_path = format!("{}", example_ca.display()); - c.validate(false).unwrap(); + c.validate().unwrap(); let (ca, cert, key) = c.load_certs().unwrap_or_default(); assert_eq!(ca, vec![0]); diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index c3ace2a5dfe..07fea59a7da 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -131,28 +131,35 @@ impl Mul for ReadableSize { } } -impl Serialize for ReadableSize { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { +impl fmt::Display for ReadableSize { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let size = self.0; - let mut buffer = String::new(); if size == 0 { - write!(buffer, "{}KiB", size).unwrap(); + write!(f, "{}KiB", size) } else if size % PIB == 0 { - write!(buffer, "{}PiB", size / PIB).unwrap(); + write!(f, "{}PiB", size / PIB) } else if size % TIB == 0 { - write!(buffer, "{}TiB", size / TIB).unwrap(); + write!(f, "{}TiB", size / TIB) } else if size % GIB == 0 { - write!(buffer, "{}GiB", size / GIB).unwrap(); + write!(f, "{}GiB", size / GIB) } else if size % MIB == 0 { - write!(buffer, "{}MiB", size / MIB).unwrap(); + write!(f, "{}MiB", size / MIB) } else if size % KIB == 0 { - write!(buffer, "{}KiB", size / KIB).unwrap(); + write!(f, "{}KiB", size / KIB) } else { - write!(buffer, "{}B", size).unwrap(); + write!(f, "{}B", size) } + } +} + +impl Serialize for ReadableSize { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut buffer = String::new(); + write!(buffer, "{}", self).unwrap(); serializer.serialize_str(&buffer) } } @@ -164,11 +171,11 @@ impl FromStr for ReadableSize { fn from_str(s: &str) -> Result { let size_str = s.trim(); if size_str.is_empty() { - return Err(format!("{:?} is not a valid size.", s)); + return Err(format!("{s:?} is not a valid size.")); } if !size_str.is_ascii() { - return Err(format!("ASCII string is expected, but got {:?}", s)); + return Err(format!("ASCII string is expected, but got {s:?}")); } // size: digits and '.' as decimal separator @@ -198,15 +205,14 @@ impl FromStr for ReadableSize { } _ => { return Err(format!( - "only B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, and PiB are supported: {:?}", - s + "only B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, and PiB are supported: {s:?}" )); } }; match size.parse::() { Ok(n) => Ok(ReadableSize((n * unit as f64) as u64)), - Err(_) => Err(format!("invalid size string: {:?}", s)), + Err(_) => Err(format!("invalid size string: {s:?}")), } } } diff --git a/etc/config-template.toml b/etc/config-template.toml index 89f39be79ca..255ec3eea4c 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -230,6 +230,14 @@ ## The path to RocksDB directory. # data-dir = "./" +## Specifies the engine type. This configuration can only be specified when creating a new cluster +## and cannot be modifies once being specified. +## +## Available types are: +## "raft-kv": The default engine type in versions earlier than TiDB v6.6.0. +## "partitioned-raft-kv": The new storage engine type introduced in TiDB v6.6.0. +# engine = "raft-kv" + ## The number of slots in Scheduler latches, which controls write concurrency. ## In most cases you can use the default value. When importing data, you can set it to a larger ## value. @@ -630,7 +638,12 @@ ## Memory usage limit for Raft Engine. Undersized write buffers will be flushed to satisfy the ## requirement. +## ## No limit when not specified. +## +## When storage.engine is "raft-kv", default is no limit. +## When storage.engine is "partitioned-raft-kv", default value is 25% of available system memory or +## 15GiB, whichever is smaller. # write-buffer-limit = "1GB" ## Options for `Titan`. diff --git a/src/config/mod.rs b/src/config/mod.rs index 0b3f43a48ee..8ee16eef90d 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -108,6 +108,7 @@ pub const MIN_BLOCK_CACHE_SHARD_SIZE: usize = 128 * MIB as usize; const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; /// Tentative value. const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.25; +const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(15).0; const LOCKCF_MIN_MEM: usize = 256 * MIB as usize; const LOCKCF_MAX_MEM: usize = GIB as usize; @@ -1326,9 +1327,12 @@ impl DbConfig { self.enable_multi_batch_write.get_or_insert(false); self.allow_concurrent_memtable_write.get_or_insert(false); let total_mem = SysQuota::memory_limit_in_bytes() as f64; - self.write_buffer_limit.get_or_insert(ReadableSize( + // purge-threshold is set to twice the limit. Too large limit will cause trouble + // to raft log replay. + self.write_buffer_limit.get_or_insert(ReadableSize(cmp::min( (total_mem * WRITE_BUFFER_MEMORY_LIMIT_RATE) as u64, - )); + WRITE_BUFFER_MEMORY_LIMIT_MAX, + ))); // In RaftKv2, every region uses its own rocksdb instance, it's actually the // even stricter compaction guard, so use the same output file size base. self.writecf @@ -3277,16 +3281,14 @@ impl TikvConfig { } } + // FIXME: consider engine_type. pub fn infer_kv_engine_path(&self, data_dir: Option<&str>) -> Result> { let data_dir = data_dir.unwrap_or(&self.storage.data_dir); config::canonicalize_sub_path(data_dir, DEFAULT_ROCKSDB_SUB_DIR) } pub fn validate(&mut self) -> Result<(), Box> { - self.log.validate()?; - self.readpool.validate()?; - self.storage.validate()?; - + // Setting up data paths. if self.cfg_path.is_empty() { self.cfg_path = Path::new(&self.storage.data_dir) .join(LAST_CONFIG_FILE) @@ -3294,16 +3296,105 @@ impl TikvConfig { .unwrap() .to_owned(); } + self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; + self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; + if self.log_backup.temp_path.is_empty() { + self.log_backup.temp_path = + config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; + } + + // Validating data paths. + if self.raft_engine.config.dir == self.raft_store.raftdb_path { + return Err("raft_engine.config.dir can't be same as raft_store.raftdb_path".into()); + } + let kv_data_exists = match self.storage.engine { + EngineType::RaftKv => { + let kv_db_path = self.infer_kv_engine_path(None)?; + let kv_db_wal_path = if self.rocksdb.wal_dir.is_empty() { + config::canonicalize_path(&kv_db_path)? + } else { + config::canonicalize_path(&self.rocksdb.wal_dir)? + }; + if self.raft_engine.enable { + if kv_db_path == self.raft_engine.config.dir { + return Err("raft-engine.dir can't be same as storage.data_dir/db".into()); + } + } else { + if kv_db_path == self.raft_store.raftdb_path { + return Err( + "raft_store.raftdb_path can't be same as storage.data_dir/db".into(), + ); + } + let raft_db_wal_path = if self.raftdb.wal_dir.is_empty() { + config::canonicalize_path(&self.raft_store.raftdb_path)? + } else { + config::canonicalize_path(&self.raftdb.wal_dir)? + }; + if kv_db_wal_path == raft_db_wal_path { + return Err("raftdb.wal_dir can't be same as rocksdb.wal_dir".into()); + } + } + // Check blob file dir is empty when titan is disabled + if !self.rocksdb.titan.enabled { + let titandb_path = if self.rocksdb.titan.dirname.is_empty() { + Path::new(&kv_db_path).join("titandb") + } else { + Path::new(&self.rocksdb.titan.dirname).to_path_buf() + }; + if let Err(e) = tikv_util::config::check_data_dir_empty( + titandb_path.to_str().unwrap(), + "blob", + ) { + return Err(format!( + "check: titandb-data-dir-empty; err: \"{}\"; \ + hint: You have disabled titan when its data directory is not empty. \ + To properly shutdown titan, please enter fallback blob-run-mode and \ + wait till titandb files are all safely ingested.", + e + ) + .into()); + } + } + RocksEngine::exists(&kv_db_path) + } + EngineType::RaftKv2 => { + if !self.rocksdb.wal_dir.is_empty() { + return Err( + "partitioned-raft-kv doesn't support configuring rocksdb.wal-dir".into(), + ); + } + Path::new(&self.storage.data_dir) + .join(DEFAULT_TABLET_SUB_DIR) + .exists() + } + }; + RaftDataStateMachine::new( + &self.storage.data_dir, + &self.raft_store.raftdb_path, + &self.raft_engine.config.dir, + ) + .validate(kv_data_exists)?; + // Optimize. + self.rocksdb.optimize_for(self.storage.engine); + self.coprocessor + .optimize_for(self.storage.engine == EngineType::RaftKv2); + self.split + .optimize_for(self.coprocessor.region_split_size()); + self.raft_store + .optimize_for(self.storage.engine == EngineType::RaftKv2); if self.storage.engine == EngineType::RaftKv2 { self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); + } + + // Validate for v2. + if self.storage.engine == EngineType::RaftKv2 { if !self.raft_engine.enable { return Err("partitioned-raft-kv only supports raft log engine.".into()); } if self.rocksdb.titan.enabled { return Err("partitioned-raft-kv doesn't support titan.".into()); } - if self.raft_store.enable_v2_compatible_learner { self.raft_store.enable_v2_compatible_learner = false; warn!( @@ -3314,68 +3405,7 @@ impl TikvConfig { } } - self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; - self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; - - if self.raft_engine.config.dir == self.raft_store.raftdb_path { - return Err("raft_engine.config.dir can't be same as raft_store.raftdb_path".into()); - } - - let kv_db_path = self.infer_kv_engine_path(None)?; - if kv_db_path == self.raft_store.raftdb_path { - return Err("raft_store.raftdb_path can't be same as storage.data_dir/db".into()); - } - - let kv_db_wal_path = if self.rocksdb.wal_dir.is_empty() { - config::canonicalize_path(&kv_db_path)? - } else { - config::canonicalize_path(&self.rocksdb.wal_dir)? - }; - let raft_db_wal_path = if self.raftdb.wal_dir.is_empty() { - config::canonicalize_path(&self.raft_store.raftdb_path)? - } else { - config::canonicalize_path(&self.raftdb.wal_dir)? - }; - if kv_db_wal_path == raft_db_wal_path { - return Err("raftdb.wal_dir can't be same as rocksdb.wal_dir".into()); - } - - let kv_data_exists = if self.storage.engine == EngineType::RaftKv { - RocksEngine::exists(&kv_db_path) - } else { - Path::new(&self.storage.data_dir) - .join(DEFAULT_TABLET_SUB_DIR) - .exists() - }; - - RaftDataStateMachine::new( - &self.storage.data_dir, - &self.raft_store.raftdb_path, - &self.raft_engine.config.dir, - ) - .validate(kv_data_exists)?; - - // Check blob file dir is empty when titan is disabled - if !self.rocksdb.titan.enabled { - let titandb_path = if self.rocksdb.titan.dirname.is_empty() { - Path::new(&kv_db_path).join("titandb") - } else { - Path::new(&self.rocksdb.titan.dirname).to_path_buf() - }; - if let Err(e) = - tikv_util::config::check_data_dir_empty(titandb_path.to_str().unwrap(), "blob") - { - return Err(format!( - "check: titandb-data-dir-empty; err: \"{}\"; \ - hint: You have disabled titan when its data directory is not empty. \ - To properly shutdown titan, please enter fallback blob-run-mode and \ - wait till titandb files are all safely ingested.", - e - ) - .into()); - } - } - + // Validate raftstore with other components. let expect_keepalive = self.raft_store.raft_heartbeat_interval() * 2; if expect_keepalive > self.server.grpc_keepalive_time.0 { return Err(format!( @@ -3385,7 +3415,6 @@ impl TikvConfig { ) .into()); } - if self.raft_store.hibernate_regions && !self.cdc.hibernate_regions_compatible { warn!( "raftstore.hibernate-regions was enabled but cdc.hibernate-regions-compatible \ @@ -3393,47 +3422,7 @@ impl TikvConfig { ); } - if self.log_backup.temp_path.is_empty() { - self.log_backup.temp_path = - config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; - } - - self.rocksdb.optimize_for(self.storage.engine); - - self.rocksdb.validate()?; - self.raftdb.validate()?; - self.raft_engine.validate()?; - self.server.validate()?; - self.pd.validate()?; - - // cannot pass EngineType directly as component raftstore cannot have dependency - // on tikv - self.coprocessor - .optimize_for(self.storage.engine == EngineType::RaftKv2); - self.coprocessor.validate()?; - self.split - .optimize_for(self.coprocessor.region_split_size()); - self.raft_store - .optimize_for(self.storage.engine == EngineType::RaftKv2); - self.raft_store.validate( - self.coprocessor.region_split_size(), - self.coprocessor.enable_region_bucket(), - self.coprocessor.region_bucket_size, - )?; - self.security - .validate(self.storage.engine == EngineType::RaftKv2)?; - self.import.validate()?; - self.backup.validate()?; - self.log_backup.validate()?; - self.cdc - .validate(self.storage.engine == EngineType::RaftKv2)?; - self.pessimistic_txn.validate()?; - self.gc.validate()?; - self.resolved_ts.validate()?; - self.resource_metering.validate()?; - self.quota.validate()?; - self.causal_ts.validate()?; - + // Validate flow control and rocksdb write stall. if self.storage.flow_control.enable { self.rocksdb.defaultcf.disable_write_stall = true; self.rocksdb.writecf.disable_write_stall = true; @@ -3511,6 +3500,7 @@ impl TikvConfig { fill_cf_opts!(self.rocksdb.lockcf, flow_control_cfg); fill_cf_opts!(self.rocksdb.raftcf, flow_control_cfg); + // Validate memory usage limit. if let Some(memory_usage_limit) = self.memory_usage_limit { let total = SysQuota::memory_limit_in_bytes(); if memory_usage_limit.0 > total { @@ -3530,7 +3520,6 @@ impl TikvConfig { self.memory_usage_limit = Some(Self::suggested_memory_usage_limit()); } } - let mut limit = self.memory_usage_limit.unwrap(); let total = ReadableSize(SysQuota::memory_limit_in_bytes()); if limit.0 > total.0 { @@ -3550,6 +3539,34 @@ impl TikvConfig { ); } + // Validate sub-components. + self.log.validate()?; + self.readpool.validate()?; + self.storage.validate()?; + self.rocksdb.validate()?; + self.raftdb.validate()?; + self.raft_engine.validate()?; + self.server.validate()?; + self.pd.validate()?; + self.coprocessor.validate()?; + self.raft_store.validate( + self.coprocessor.region_split_size(), + self.coprocessor.enable_region_bucket(), + self.coprocessor.region_bucket_size, + )?; + self.security.validate()?; + self.import.validate()?; + self.backup.validate()?; + self.log_backup.validate()?; + self.cdc + .validate(self.storage.engine == EngineType::RaftKv2)?; + self.pessimistic_txn.validate()?; + self.gc.validate()?; + self.resolved_ts.validate()?; + self.resource_metering.validate()?; + self.quota.validate()?; + self.causal_ts.validate()?; + Ok(()) } @@ -5508,40 +5525,93 @@ mod tests { cfg.validate().unwrap(); } + // raft path == kv path { let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = false; cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "db"); cfg.validate().unwrap_err(); + + let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = true; + cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data"); + cfg.raft_engine.config.dir = tmp_path_string_generate!(tmp_path, "data", "db"); + cfg.validate().unwrap_err(); + + let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = true; + cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data"); + cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "db"); + cfg.validate().unwrap(); } + // raft path == kv wal path { let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = false; cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); cfg.validate().unwrap_err(); + + let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = true; + cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); + cfg.raft_store.raftdb_path = + tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); + cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); + cfg.validate().unwrap(); } + // raft wal path == kv path { let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = false; cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb", "db"); cfg.validate().unwrap_err(); + + let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = true; + cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); + cfg.raft_store.raftdb_path = + tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); + cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb", "db"); + cfg.validate().unwrap(); } + // raft wal path == kv wal path { let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = false; cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); cfg.validate().unwrap_err(); + + let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = true; + cfg.rocksdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); + cfg.raftdb.wal_dir = tmp_path_string_generate!(tmp_path, "data", "wal"); + cfg.validate().unwrap(); } { let mut cfg = TikvConfig::default(); + cfg.storage.engine = EngineType::RaftKv; + cfg.raft_engine.enable = false; cfg.storage.data_dir = tmp_path_string_generate!(tmp_path, "data", "kvdb"); cfg.raft_store.raftdb_path = tmp_path_string_generate!(tmp_path, "data", "raftdb", "db"); From 4f8d2592d4028fb0204ec8d37d024b254c31e91a Mon Sep 17 00:00:00 2001 From: you06 Date: Wed, 24 May 2023 15:35:39 +0800 Subject: [PATCH 0707/1149] add gzip compression for check_leader requests (#14721) ref tikv/tikv#14553 Add gzip compression for check_leader requests. Signed-off-by: you06 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/advance.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 65d1c1139c6..36c95347111 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -15,7 +15,9 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use fail::fail_point; use futures::{compat::Future01CompatExt, future::select_all, FutureExt, TryFutureExt}; -use grpcio::{ChannelBuilder, Environment, Error as GrpcError, RpcStatusCode}; +use grpcio::{ + ChannelBuilder, CompressionAlgorithms, Environment, Error as GrpcError, RpcStatusCode, +}; use kvproto::{ kvrpcpb::{CheckLeaderRequest, CheckLeaderResponse}, metapb::{Peer, PeerRole}, @@ -44,6 +46,8 @@ use txn_types::TimeStamp; use crate::{endpoint::Task, metrics::*}; const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s +const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; +const DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS: usize = 4096; pub struct AdvanceTsWorker { pd_client: Arc, @@ -520,10 +524,17 @@ async fn get_tikv_client( let mut clients = tikv_clients.lock().await; let start = Instant::now_coarse(); // hack: so it's different args, grpc will always create a new connection. - let cb = ChannelBuilder::new(env.clone()).raw_cfg_int( - CString::new("random id").unwrap(), - CONN_ID.fetch_add(1, Ordering::SeqCst), - ); + // the check leader requests may be large but not frequent, compress it to + // reduce the traffic. + let cb = ChannelBuilder::new(env.clone()) + .raw_cfg_int( + CString::new("random id").unwrap(), + CONN_ID.fetch_add(1, Ordering::SeqCst), + ) + .default_compression_algorithm(CompressionAlgorithms::GRPC_COMPRESS_GZIP) + .default_gzip_compression_level(DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL) + .default_grpc_min_message_size_to_compress(DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS); + let channel = security_mgr.connect(cb, &store.peer_address); let cli = TikvClient::new(channel); clients.insert(store_id, cli.clone()); From 1d2520589ae56711fb98d372d37e9ad3a500f1de Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 24 May 2023 17:13:38 -0700 Subject: [PATCH 0708/1149] add more specific error type in ErrorHeaderKind (#14809) close tikv/tikv#14808 Add more specific error type of raftstore error to metrics Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- src/server/metrics.rs | 14 ++++++++++++++ src/storage/errors.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 37c3ce1048f..e690eff718e 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -469,6 +469,13 @@ make_auto_flush_static_metric! { err_store_not_match, err_raft_entry_too_large, err_leader_memory_lock_check, + err_read_index_not_ready, + err_proposal_in_merging_mode, + err_data_is_not_ready, + err_region_not_initialized, + err_disk_full, + err_recovery_in_progress, + err_flashback_in_progress, } pub label_enum RequestTypeKind { @@ -497,6 +504,13 @@ impl From for RequestStatusKind { ErrorHeaderKind::StaleCommand => RequestStatusKind::err_stale_command, ErrorHeaderKind::StoreNotMatch => RequestStatusKind::err_store_not_match, ErrorHeaderKind::RaftEntryTooLarge => RequestStatusKind::err_raft_entry_too_large, + ErrorHeaderKind::ReadIndexNotReady => RequestStatusKind::err_read_index_not_ready, + ErrorHeaderKind::ProposalInMergeMode => RequestStatusKind::err_proposal_in_merging_mode, + ErrorHeaderKind::DataNotReady => RequestStatusKind::err_data_is_not_ready, + ErrorHeaderKind::RegionNotInitialized => RequestStatusKind::err_region_not_found, + ErrorHeaderKind::DiskFull => RequestStatusKind::err_disk_full, + ErrorHeaderKind::RecoveryInProgress => RequestStatusKind::err_recovery_in_progress, + ErrorHeaderKind::FlashbackInProgress => RequestStatusKind::err_flashback_in_progress, ErrorHeaderKind::Other => RequestStatusKind::err_other, } } diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 07ea4b5589e..d3c56c48984 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -176,6 +176,13 @@ pub enum ErrorHeaderKind { StaleCommand, StoreNotMatch, RaftEntryTooLarge, + ReadIndexNotReady, + ProposalInMergeMode, + DataNotReady, + RegionNotInitialized, + DiskFull, + RecoveryInProgress, + FlashbackInProgress, Other, } @@ -193,6 +200,13 @@ impl ErrorHeaderKind { ErrorHeaderKind::StaleCommand => "stale_command", ErrorHeaderKind::StoreNotMatch => "store_not_match", ErrorHeaderKind::RaftEntryTooLarge => "raft_entry_too_large", + ErrorHeaderKind::ReadIndexNotReady => "read_index_not_ready", + ErrorHeaderKind::ProposalInMergeMode => "proposal_in_merge_mode", + ErrorHeaderKind::DataNotReady => "data_not_ready", + ErrorHeaderKind::RegionNotInitialized => "region_not_initialized", + ErrorHeaderKind::DiskFull => "disk_full", + ErrorHeaderKind::RecoveryInProgress => "recovery_in_progress", + ErrorHeaderKind::FlashbackInProgress => "flashback_in_progress", ErrorHeaderKind::Other => "other", } } @@ -227,6 +241,20 @@ pub fn get_error_kind_from_header(header: &errorpb::Error) -> ErrorHeaderKind { ErrorHeaderKind::StoreNotMatch } else if header.has_raft_entry_too_large() { ErrorHeaderKind::RaftEntryTooLarge + } else if header.has_read_index_not_ready() { + ErrorHeaderKind::ReadIndexNotReady + } else if header.has_proposal_in_merging_mode() { + ErrorHeaderKind::ProposalInMergeMode + } else if header.has_data_is_not_ready() { + ErrorHeaderKind::DataNotReady + } else if header.has_region_not_initialized() { + ErrorHeaderKind::RegionNotInitialized + } else if header.has_disk_full() { + ErrorHeaderKind::DiskFull + } else if header.has_recovery_in_progress() { + ErrorHeaderKind::RecoveryInProgress + } else if header.has_flashback_in_progress() { + ErrorHeaderKind::FlashbackInProgress } else { ErrorHeaderKind::Other } From 7f9aaf71567b97ae2faf45b8f9d005ec6fef3501 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 24 May 2023 22:05:38 -0700 Subject: [PATCH 0709/1149] raftstore-v2: add replay watch (#14739) ref tikv/tikv#14173 This is part of the code from PR 14501. This change is to record the actual time of the replay raft log. Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- components/raftstore-v2/src/batch/store.rs | 9 +- components/raftstore-v2/src/fsm/peer.rs | 9 +- .../raftstore-v2/src/operation/command/mod.rs | 2 +- components/raftstore-v2/src/operation/life.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 3 +- .../raftstore-v2/src/operation/ready/mod.rs | 85 +++++++++++++++++-- components/raftstore-v2/src/raft/peer.rs | 20 ++--- components/raftstore-v2/src/router/message.rs | 5 +- 8 files changed, 106 insertions(+), 29 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 7398c24ba80..3fc312cdffc 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -54,7 +54,9 @@ use time::Timespec; use crate::{ fsm::{PeerFsm, PeerFsmDelegate, SenderFsmPair, StoreFsm, StoreFsmDelegate, StoreMeta}, - operation::{SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX}, + operation::{ + ReplayWatch, SharedReadTablet, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, + }, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, worker::{checkpoint, cleanup, pd, tablet}, @@ -747,8 +749,11 @@ impl StoreSystem { router.register_all(mailboxes); // Make sure Msg::Start is the first message each FSM received. + let watch = Arc::new(ReplayWatch::new(self.logger.clone())); for addr in address { - router.force_send(addr, PeerMsg::Start).unwrap(); + router + .force_send(addr, PeerMsg::Start(Some(watch.clone()))) + .unwrap(); } router.send_control(StoreMsg::Start).unwrap(); Ok(()) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 7d7a66e7357..ca93dd1813b 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -2,7 +2,7 @@ //! This module contains the peer implementation for batch system. -use std::borrow::Cow; +use std::{borrow::Cow, sync::Arc}; use batch_system::{BasicMailbox, Fsm}; use crossbeam::channel::TryRecvError; @@ -20,6 +20,7 @@ use tikv_util::{ use crate::{ batch::StoreContext, + operation::ReplayWatch, raft::{Peer, Storage}, router::{PeerMsg, PeerTick, QueryResult}, Result, @@ -187,8 +188,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.store_ctx.tick_batch[idx].ticks.push(cb); } - fn on_start(&mut self) { - if !self.fsm.peer.maybe_pause_for_recovery(self.store_ctx) { + fn on_start(&mut self, watch: Option>) { + if !self.fsm.peer.maybe_pause_for_replay(self.store_ctx, watch) { self.schedule_tick(PeerTick::Raft); } self.schedule_tick(PeerTick::SplitRegionCheck); @@ -269,7 +270,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::SplitInitFinish(region_id) => { self.fsm.peer.on_split_init_finish(region_id) } - PeerMsg::Start => self.on_start(), + PeerMsg::Start(w) => self.on_start(w), PeerMsg::Noop => unimplemented!(), PeerMsg::Persisted { peer_id, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 0bbf6a05601..db702aef6b8 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -431,7 +431,7 @@ impl Peer { progress_to_be_updated, ); self.try_compelete_recovery(); - if !self.pause_for_recovery() && self.storage_mut().apply_trace_mut().should_flush() { + if !self.pause_for_replay() && self.storage_mut().apply_trace_mut().should_flush() { if let Some(scheduler) = self.apply_scheduler() { scheduler.send(ApplyTask::ManualFlush); } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 45c18400627..70ebbcea348 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -456,7 +456,7 @@ impl Store { let mailbox = BasicMailbox::new(tx, fsm, ctx.router.state_cnt().clone()); if ctx .router - .send_and_register(region_id, mailbox, PeerMsg::Start) + .send_and_register(region_id, mailbox, PeerMsg::Start(None)) .is_err() { panic!( diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index f5e2077ad0a..8d8c24fee19 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -17,7 +17,8 @@ pub use command::{ }; pub use life::{AbnormalPeerContext, DestroyProgress, GcPeerContext}; pub use ready::{ - write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, SnapState, StateStorage, + write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, ReplayWatch, SnapState, + StateStorage, }; pub(crate) use self::{ diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 6eb1fd10d51..30f08285926 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -21,7 +21,15 @@ mod apply_trace; mod async_writer; mod snapshot; -use std::{cmp, time::Instant}; +use std::{ + cmp, + fmt::{self, Debug, Formatter}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Instant, +}; use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; @@ -41,7 +49,7 @@ use raftstore::{ FetchedLogs, ReadProgress, Transport, WriteCallback, WriteTask, }, }; -use slog::{debug, error, info, trace, warn}; +use slog::{debug, error, info, trace, warn, Logger}; use tikv_util::{ log::SlogFormat, slog_panic, @@ -63,7 +71,56 @@ use crate::{ worker::tablet, }; -const PAUSE_FOR_RECOVERY_GAP: u64 = 128; +const PAUSE_FOR_REPLAY_GAP: u64 = 128; + +pub struct ReplayWatch { + normal_peers: AtomicUsize, + paused_peers: AtomicUsize, + logger: Logger, + timer: Instant, +} + +impl Debug for ReplayWatch { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("ReplayWatch") + .field("normal_peers", &self.normal_peers) + .field("paused_peers", &self.paused_peers) + .field("logger", &self.logger) + .field("timer", &self.timer) + .finish() + } +} + +impl ReplayWatch { + pub fn new(logger: Logger) -> Self { + Self { + normal_peers: AtomicUsize::new(0), + paused_peers: AtomicUsize::new(0), + logger, + timer: Instant::now(), + } + } + + pub fn inc_normal_peer(&self) { + self.normal_peers.fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_paused_peer(&self) { + self.paused_peers.fetch_add(1, Ordering::Relaxed); + } +} + +impl Drop for ReplayWatch { + fn drop(&mut self) { + info!( + self.logger, + "The raft log replay completed"; + "normal_peers" => self.normal_peers.load(Ordering::Relaxed), + "paused_peers" => self.paused_peers.load(Ordering::Relaxed), + "elapsed" => ?self.timer.elapsed() + ); + } +} impl Store { pub fn on_store_unreachable( @@ -115,7 +172,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } impl Peer { - pub fn maybe_pause_for_recovery(&mut self, store_ctx: &mut StoreContext) -> bool { + pub fn maybe_pause_for_replay( + &mut self, + store_ctx: &mut StoreContext, + watch: Option>, + ) -> bool { // The task needs to be scheduled even if the tablet may be replaced during // recovery. Otherwise if there are merges during recovery, the FSM may // be paused forever. @@ -139,14 +200,22 @@ impl Peer { // it may block for ever when there is unapplied conf change. self.set_has_ready(); } - if committed_index > applied_index + PAUSE_FOR_RECOVERY_GAP { + if committed_index > applied_index + PAUSE_FOR_REPLAY_GAP { // If there are too many the missing logs, we need to skip ticking otherwise // it may block the raftstore thread for a long time in reading logs for // election timeout. - info!(self.logger, "pause for recovery"; "applied" => applied_index, "committed" => committed_index); - self.set_pause_for_recovery(true); + info!(self.logger, "pause for replay"; "applied" => applied_index, "committed" => committed_index); + + // when committed_index > applied_index + PAUSE_FOR_REPLAY_GAP, the peer must be + // created from StoreSystem on TiKV Start + let w = watch.unwrap(); + w.inc_paused_peer(); + self.set_replay_watch(Some(w)); true } else { + if let Some(w) = watch { + w.inc_normal_peer(); + } false } } @@ -189,7 +258,7 @@ impl Peer { "from_peer_id" => msg.get_from_peer().get_id(), "to_peer_id" => msg.get_to_peer().get_id(), ); - if self.pause_for_recovery() && msg.get_message().get_msg_type() == MessageType::MsgAppend { + if self.pause_for_replay() && msg.get_message().get_msg_type() == MessageType::MsgAppend { ctx.raft_metrics.message_dropped.recovery.inc(); return; } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 1500737da3b..eeb47500fbf 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -35,8 +35,8 @@ use crate::{ fsm::ApplyScheduler, operation::{ AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, - GcPeerContext, MergeContext, ProposalControl, SimpleWriteReqEncoder, SplitFlowControl, - TxnContext, + GcPeerContext, MergeContext, ProposalControl, ReplayWatch, SimpleWriteReqEncoder, + SplitFlowControl, TxnContext, }, router::{ApplyTask, CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -76,7 +76,7 @@ pub struct Peer { has_ready: bool, /// Sometimes there is no ready at all, but we need to trigger async write. has_extra_write: bool, - pause_for_recovery: bool, + replay_watch: Option>, /// Writer for persisting side effects asynchronously. pub(crate) async_writer: AsyncWriter, @@ -179,7 +179,7 @@ impl Peer { apply_scheduler: None, has_ready: false, has_extra_write: false, - pause_for_recovery: false, + replay_watch: None, destroy_progress: DestroyProgress::None, raft_group, logger, @@ -473,13 +473,13 @@ impl Peer { } #[inline] - pub fn set_pause_for_recovery(&mut self, pause: bool) { - self.pause_for_recovery = pause; + pub fn set_replay_watch(&mut self, watch: Option>) { + self.replay_watch = watch; } #[inline] - pub fn pause_for_recovery(&self) -> bool { - self.pause_for_recovery + pub fn pause_for_replay(&self) -> bool { + self.replay_watch.is_some() } #[inline] @@ -487,7 +487,7 @@ impl Peer { // between commit index and apply index. We should scheduling it when raft log // apply catches up. pub fn try_compelete_recovery(&mut self) { - if self.pause_for_recovery() + if self.pause_for_replay() && self.storage().entry_storage().commit_index() <= self.storage().entry_storage().applied_index() { @@ -496,7 +496,7 @@ impl Peer { "recovery completed"; "apply_index" => self.storage().entry_storage().applied_index() ); - self.set_pause_for_recovery(false); + self.set_replay_watch(None); // Flush to avoid recover again and again. if let Some(scheduler) = self.apply_scheduler() { scheduler.send(ApplyTask::ManualFlush); diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 79f9d915c0d..484d383108d 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] +use std::sync::Arc; use kvproto::{ import_sstpb::SstMeta, @@ -21,7 +22,7 @@ use super::response_channel::{ QueryResSubscriber, }; use crate::{ - operation::{CatchUpLogs, RequestHalfSplit, RequestSplit, SplitInit}, + operation::{CatchUpLogs, ReplayWatch, RequestHalfSplit, RequestSplit, SplitInit}, router::ApplyRes, }; @@ -169,7 +170,7 @@ pub enum PeerMsg { LogsFetched(FetchedLogs), SnapshotGenerated(GenSnapRes), /// Start the FSM. - Start, + Start(Option>), /// Messages from peer to peer in the same store SplitInit(Box), SplitInitFinish(u64), From 3ac9893e67e303df9abe172cff497eade2972334 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 26 May 2023 12:07:39 +0800 Subject: [PATCH 0710/1149] implement debug interface such as `store_info`, `cluster_info`,`dump_kv` (#14742) ref tikv/tikv#14654 debugger v2 impl `store_info`, `cluster_info`,`dump_kv` Signed-off-by: bufferflies <1045931706@qq.com> --- cmd/tikv-ctl/src/cmd.rs | 4 +- cmd/tikv-ctl/src/executor.rs | 49 +++++-- components/test_raftstore/src/server.rs | 2 +- src/server/debug.rs | 46 +++--- src/server/debug2.rs | 185 +++++++++++++++++------- src/server/service/debug.rs | 1 + tests/integrations/server/kv_service.rs | 3 +- 7 files changed, 198 insertions(+), 92 deletions(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 42678386f5a..b4d74ae7db6 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -496,11 +496,11 @@ pub enum Cmd { /// Show range properties RangeProperties { #[structopt(long, default_value = "")] - /// hex start key + /// hex start key (not starts with "z") start: String, #[structopt(long, default_value = "")] - /// hex end key + /// hex end key (not starts with "z") end: String, }, /// Split the region diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 4cb28fbbc87..541863d0227 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1085,7 +1085,8 @@ impl DebugExecutor for DebuggerImpl { } fn dump_metrics(&self, _tags: Vec<&str>) { - unimplemented!("only available for online mode"); + println!("only available for online mode"); + tikv_util::logger::exit_process_gracefully(-1); } fn check_region_consistency(&self, _: u64) { @@ -1236,40 +1237,58 @@ impl DebugExecutor for DebuggerImplV2 { unimplemented!() } - fn drop_unapplied_raftlog(&self, _region_ids: Option>) { - unimplemented!() - } + fn drop_unapplied_raftlog(&self, _region_ids: Option>) {} - fn recreate_region(&self, _sec_mgr: Arc, _pd_cfg: &PdConfig, _region_id: u64) { + fn recreate_region(&self, _mgr: Arc, _pd_cfg: &PdConfig, _region_id: u64) { unimplemented!() } fn dump_metrics(&self, _tags: Vec<&str>) { - unimplemented!() + println!("only available for online mode"); + tikv_util::logger::exit_process_gracefully(-1); } fn check_region_consistency(&self, _: u64) { - unimplemented!() + println!("only support remote mode"); + tikv_util::logger::exit_process_gracefully(-1); } - fn modify_tikv_config(&self, _config_name: &str, _config_value: &str) { - unimplemented!() + fn modify_tikv_config(&self, _: &str, _: &str) { + println!("only support remote mode"); + tikv_util::logger::exit_process_gracefully(-1); } - fn dump_region_properties(&self, _region_id: u64) { - unimplemented!() + fn dump_region_properties(&self, region_id: u64) { + let props = self + .get_region_properties(region_id) + .unwrap_or_else(|e| perror_and_exit("Debugger::get_region_properties", e)); + for (name, value) in props { + println!("{}: {}", name, value); + } } - fn dump_range_properties(&self, _start: Vec, _end: Vec) { - unimplemented!() + fn dump_range_properties(&self, start: Vec, end: Vec) { + let props = self + .get_range_properties(&start, &end) + .unwrap_or_else(|e| perror_and_exit("Debugger::get_range_properties", e)); + for (name, value) in props { + println!("{}: {}", name, value); + } } fn dump_store_info(&self) { - unimplemented!() + let store_ident_info = self.get_store_ident(); + if let Ok(ident) = store_ident_info { + println!("store id: {}", ident.get_store_id()); + println!("api version: {:?}", ident.get_api_version()); + } } fn dump_cluster_info(&self) { - unimplemented!() + let store_ident_info = self.get_store_ident(); + if let Ok(ident) = store_ident_info { + println!("cluster id: {}", ident.get_cluster_id()); + } } fn reset_to_version(&self, _version: u64) { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 1dcf63635a2..4d2ac77b1bc 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -489,7 +489,7 @@ impl ServerCluster { .unwrap(), ); - let debugger = DebuggerImpl::new(engines.clone(), ConfigController::default()); + let debugger = DebuggerImpl::new(engines.clone(), ConfigController::new(cfg.tikv.clone())); let debug_thread_handle = debug_thread_pool.handle().clone(); let debug_service = DebugService::new(debugger, debug_thread_handle, extension); diff --git a/src/server/debug.rs b/src/server/debug.rs index 26d418e6871..396339dcb7a 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -16,9 +16,9 @@ use engine_rocks::{ RocksEngine, RocksEngineIterator, RocksMvccProperties, RocksStatistics, RocksWriteBatchVec, }; use engine_traits::{ - Engines, IterOptions, Iterable, Iterator as EngineIterator, MiscExt, Mutable, MvccProperties, - Peekable, RaftEngine, RaftLogBatch, Range, RangePropertiesExt, SyncMutable, WriteBatch, - WriteBatchExt, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + Engines, Error as EngineTraitError, IterOptions, Iterable, Iterator as EngineIterator, MiscExt, + Mutable, MvccProperties, Peekable, RaftEngine, RaftLogBatch, Range, RangePropertiesExt, + SyncMutable, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{ debugpb::{self, Db as DbType}, @@ -58,6 +58,9 @@ pub enum Error { #[error("{0:?}")] Other(#[from] Box), + + #[error("Engine error {0}")] + EngineTrait(#[from] EngineTraitError), } /// Describes the meta information of a Region. @@ -930,27 +933,11 @@ impl Debugger for DebuggerImpl { Ok(()) } - fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { - let mut props = dump_write_cf_properties( - &self.engines.kv, - &keys::data_key(start), - &keys::data_end_key(end), - )?; - let mut props1 = dump_default_cf_properties( - &self.engines.kv, - &keys::data_key(start), - &keys::data_end_key(end), - )?; - props.append(&mut props1); - Ok(props) - } - fn get_region_properties(&self, region_id: u64) -> Result> { let region_state = self.get_region_state(region_id)?; let region = region_state.get_region(); let start = keys::enc_start_key(region); let end = keys::enc_end_key(region); - let mut res = dump_write_cf_properties(&self.engines.kv, &start, &end)?; let mut res1 = dump_default_cf_properties(&self.engines.kv, &start, &end)?; res.append(&mut res1); @@ -969,7 +956,6 @@ impl Debugger for DebuggerImpl { "region.middle_key_by_approximate_size".to_owned(), hex::encode(middle_key), )); - Ok(res) } @@ -984,15 +970,29 @@ impl Debugger for DebuggerImpl { fn set_raft_statistics(&mut self, s: Option>) { self.raft_statistics = s; } + + fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { + let mut props = dump_write_cf_properties( + &self.engines.kv, + &keys::data_key(start), + &keys::data_end_key(end), + )?; + let mut props1 = dump_default_cf_properties( + &self.engines.kv, + &keys::data_key(start), + &keys::data_end_key(end), + )?; + props.append(&mut props1); + Ok(props) + } } -fn dump_default_cf_properties( +pub fn dump_default_cf_properties( db: &RocksEngine, start: &[u8], end: &[u8], ) -> Result> { let mut num_entries = 0; // number of Rocksdb K/V entries. - let collection = box_try!(db.get_range_properties_cf(CF_DEFAULT, start, end)); let num_files = collection.len(); @@ -1019,7 +1019,7 @@ fn dump_default_cf_properties( Ok(res) } -fn dump_write_cf_properties( +pub fn dump_write_cf_properties( db: &RocksEngine, start: &[u8], end: &[u8], diff --git a/src/server/debug2.rs b/src/server/debug2.rs index dfd5f9bab83..0875ec39868 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -6,8 +6,8 @@ use engine_rocks::{ raw::CompactOptions, util::get_cf_handle, RocksEngine, RocksEngineIterator, RocksStatistics, }; use engine_traits::{ - CachedTablet, Iterable, Peekable, RaftEngine, TabletContext, TabletRegistry, CF_DEFAULT, - CF_LOCK, CF_WRITE, + CachedTablet, Iterable, MiscExt, Peekable, RaftEngine, TabletContext, TabletRegistry, + CF_DEFAULT, CF_LOCK, CF_WRITE, }; use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; use kvproto::{ @@ -18,12 +18,12 @@ use kvproto::{ }; use nom::AsBytes; use raft::prelude::Entry; -use raftstore::store::util::check_key_in_region; +use raftstore::{coprocessor::get_region_approximate_middle, store::util::check_key_in_region}; use super::debug::{BottommostLevelCompaction, Debugger, RegionInfo}; use crate::{ config::ConfigController, - server::debug::{Error, Result}, + server::debug::{dump_default_cf_properties, dump_write_cf_properties, Error, Result}, storage::mvcc::{MvccInfoCollector, MvccInfoScanner}, }; @@ -193,7 +193,7 @@ pub struct DebuggerImplV2 { raft_engine: ER, kv_statistics: Option>, raft_statistics: Option>, - _cfg_controller: ConfigController, + cfg_controller: ConfigController, } impl DebuggerImplV2 { @@ -206,7 +206,7 @@ impl DebuggerImplV2 { DebuggerImplV2 { tablet_reg, raft_engine, - _cfg_controller: cfg_controller, + cfg_controller, kv_statistics: None, raft_statistics: None, } @@ -346,38 +346,7 @@ impl Debugger for DebuggerImplV2 { if db == DbType::Raft { return Err(box_err!("Get raft db is not allowed")); } - let mut compactions = vec![]; - self.raft_engine - .for_each_raft_group::(&mut |region_id| { - let region_state = self - .raft_engine - .get_region_state(region_id, u64::MAX) - .unwrap() - .unwrap(); - if region_state.state != PeerState::Normal { - return Ok(()); - } - - if let Some((start_key, end_key)) = - range_in_region((start, end), region_state.get_region()) - { - let start = if start_key.is_empty() { - None - } else { - Some(data_key(start_key)) - }; - let end = if end_key.is_empty() { - None - } else { - Some(data_key(end_key)) - }; - compactions.push((region_id, start, end, region_state)); - }; - - Ok(()) - }) - .unwrap(); - + let compactions = find_region_states_by_key_range(&self.raft_engine, start, end); for (region_id, start_key, end_key, region_state) in compactions { let mut tablet_cache = get_tablet_cache(&self.tablet_reg, region_id, Some(region_state))?; @@ -412,27 +381,82 @@ impl Debugger for DebuggerImplV2 { } fn dump_kv_stats(&self) -> Result { - unimplemented!() + let mut kv_str = String::new(); + self.tablet_reg.for_each_opened_tablet(|_, cached| { + if let Some(tablet) = cached.latest() { + let str = MiscExt::dump_stats(tablet).unwrap(); + kv_str.push_str(&str); + } + true + }); + if let Some(s) = self.kv_statistics.as_ref() && let Some(s) = s.to_string() { + kv_str.push_str(&s); + } + Ok(kv_str) } fn dump_raft_stats(&self) -> Result { - unimplemented!() + let mut raft_str = box_try!(RaftEngine::dump_stats(&self.raft_engine)); + if let Some(s) = self.raft_statistics.as_ref() && let Some(s) = s.to_string() { + raft_str.push_str(&s); + } + Ok(raft_str) } - fn modify_tikv_config(&self, _config_name: &str, _config_value: &str) -> Result<()> { - unimplemented!() + fn modify_tikv_config(&self, config_name: &str, config_value: &str) -> Result<()> { + if let Err(e) = self.cfg_controller.update_config(config_name, config_value) { + return Err(Error::Other( + format!("failed to update config, err: {:?}", e).into(), + )); + } + Ok(()) } fn get_store_ident(&self) -> Result { - unimplemented!() + self.raft_engine + .get_store_ident() + .transpose() + .unwrap() + .map_err(|e| Error::EngineTrait(e)) } - fn get_region_properties(&self, _region_id: u64) -> Result> { - unimplemented!() - } + fn get_region_properties(&self, region_id: u64) -> Result> { + let region_state = match self.raft_engine.get_region_state(region_id, u64::MAX) { + Ok(Some(region_state)) => region_state, + Ok(None) => return Err(Error::NotFound(format!("none region {:?}", region_id))), + Err(e) => return Err(Error::EngineTrait(e)), + }; - fn get_range_properties(&self, _: &[u8], _: &[u8]) -> Result> { - unimplemented!() + if region_state.state != PeerState::Normal { + return Err(Error::NotFound(format!("none region {:?}", region_id))); + } + let region = region_state.get_region(); + let start = keys::enc_start_key(region); + let end = keys::enc_end_key(region); + + let mut tablet_cache = + get_tablet_cache(&self.tablet_reg, region.id, Some(region_state.clone())).unwrap(); + let tablet = tablet_cache.latest().unwrap(); + let mut res = dump_write_cf_properties(tablet, &start, &end)?; + let mut res1 = dump_default_cf_properties(tablet, &start, &end)?; + res.append(&mut res1); + + let middle_key = match box_try!(get_region_approximate_middle(tablet, region)) { + Some(data_key) => keys::origin_key(&data_key).to_vec(), + None => Vec::new(), + }; + + res.push(( + "region.start_key".to_owned(), + hex::encode(®ion.start_key), + )); + res.push(("region.end_key".to_owned(), hex::encode(®ion.end_key))); + res.push(( + "region.middle_key_by_approximate_size".to_owned(), + hex::encode(middle_key), + )); + + Ok(res) } fn reset_to_version(&self, _version: u64) { @@ -446,6 +470,32 @@ impl Debugger for DebuggerImplV2 { fn set_raft_statistics(&mut self, s: Option>) { self.raft_statistics = s; } + + fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { + let mut props = vec![]; + let start = &keys::data_key(start); + let end = &keys::data_end_key(end); + let regions = find_region_states_by_key_range(&self.raft_engine, start, end); + for (region_id, start_key, end_key, region_state) in regions { + let mut tablet_cache = + get_tablet_cache(&self.tablet_reg, region_id, Some(region_state)).unwrap(); + let talbet = tablet_cache.latest().unwrap(); + let mut prop = dump_write_cf_properties( + talbet, + start_key.as_ref().map(|k| (k.as_bytes())).unwrap_or(start), + end_key.as_ref().map(|k| k.as_bytes()).unwrap_or(end), + ) + .unwrap(); + props.append(&mut prop); + let mut prop = dump_default_cf_properties( + talbet, + start_key.as_ref().map(|k| k.as_bytes()).unwrap_or(start), + end_key.as_ref().map(|k| k.as_bytes()).unwrap_or(end), + )?; + props.append(&mut prop); + } + Ok(props) + } } fn validate_db_and_cf(db: DbType, cf: &str) -> Result<()> { @@ -482,7 +532,6 @@ fn range_in_region<'a>( } else { DATA_PREFIX_KEY }; - if range_start == DATA_PREFIX_KEY && range_end == DATA_PREFIX_KEY { return Some((region.get_start_key(), region.get_end_key())); } else if range_start == DATA_PREFIX_KEY { @@ -533,6 +582,44 @@ fn range_in_region<'a>( } } +fn find_region_states_by_key_range( + raft_engine: &ER, + start: &[u8], + end: &[u8], +) -> Vec<(u64, Option>, Option>, RegionLocalState)> { + let mut regions = vec![]; + raft_engine + .for_each_raft_group::(&mut |region_id| { + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + if region_state.state != PeerState::Normal { + return Ok(()); + } + + if let Some((start_key, end_key)) = + range_in_region((start, end), region_state.get_region()) + { + let start = if start_key.is_empty() { + None + } else { + Some(data_key(start_key)) + }; + let end = if end_key.is_empty() { + None + } else { + Some(data_key(end_key)) + }; + regions.push((region_id, start, end, region_state)); + }; + + Ok(()) + }) + .unwrap(); + regions +} + fn find_region_state_by_key( raft_engine: &ER, key: &[u8], diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index e969dfa897f..83d3932ead6 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -21,6 +21,7 @@ fn error_to_status(e: Error) -> RpcStatus { Error::NotFound(msg) => (RpcStatusCode::NOT_FOUND, msg), Error::InvalidArgument(msg) => (RpcStatusCode::INVALID_ARGUMENT, msg), Error::Other(e) => (RpcStatusCode::UNKNOWN, format!("{:?}", e)), + Error::EngineTrait(e) => (RpcStatusCode::UNKNOWN, format!("{:?}", e)), }; RpcStatus::with_message(code, msg) } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 57ed2c258db..e1efb366af1 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -942,7 +942,7 @@ fn test_split_region_impl(is_raw_kv: bool) { } #[test_case(test_raftstore::must_new_cluster_and_debug_client)] -// #[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_debug_client)] fn test_debug_store() { let (mut cluster, debug_client, store_id) = new_cluster(); let cluster_id = cluster.id(); @@ -989,7 +989,6 @@ fn test_debug_store() { let mut req = debugpb::GetRangePropertiesRequest::default(); req.set_start_key(b"d".to_vec()); - req.set_end_key(b"".to_vec()); let resp = debug_client.get_range_properties(&req).unwrap(); resp.get_properties() .iter() From a8058d691005db0efec49952f5f86ed1ef81c17b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 29 May 2023 13:08:41 +0800 Subject: [PATCH 0711/1149] tikv-ctl: implement region tombstone for raftstore-v2 (#14816) ref tikv/tikv#14654 implement region tombstone for raftstore-v2 Signed-off-by: Spade A Co-authored-by: tonyxuqqi --- cmd/tikv-ctl/src/executor.rs | 26 ++- src/server/debug2.rs | 329 ++++++++++++++++++++++++++++++++++- 2 files changed, 345 insertions(+), 10 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 541863d0227..5e12972b58b 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1208,12 +1208,30 @@ impl DebugExecutor for DebuggerImplV2 { .unwrap_or_else(|e| perror_and_exit("Debugger::compact", e)); } - fn set_region_tombstone(&self, _regions: Vec) { - unimplemented!() + fn set_region_tombstone(&self, regions: Vec) { + let ret = self + .set_region_tombstone(regions) + .unwrap_or_else(|e| perror_and_exit("Debugger::set_region_tombstone", e)); + if ret.is_empty() { + println!("success!"); + return; + } + for (region_id, error) in ret { + println!("region: {}, error: {}", region_id, error); + } } - fn set_region_tombstone_by_id(&self, _regions: Vec) { - unimplemented!() + fn set_region_tombstone_by_id(&self, region_ids: Vec) { + let ret = self + .set_region_tombstone_by_id(region_ids) + .unwrap_or_else(|e| perror_and_exit("Debugger::set_region_tombstone_by_id", e)); + if ret.is_empty() { + println!("success!"); + return; + } + for (region_id, error) in ret { + println!("region: {}, error: {}", region_id, error); + } } fn recover_regions(&self, _regions: Vec, _read_only: bool) { diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 0875ec39868..776880f6895 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -6,8 +6,8 @@ use engine_rocks::{ raw::CompactOptions, util::get_cf_handle, RocksEngine, RocksEngineIterator, RocksStatistics, }; use engine_traits::{ - CachedTablet, Iterable, MiscExt, Peekable, RaftEngine, TabletContext, TabletRegistry, - CF_DEFAULT, CF_LOCK, CF_WRITE, + CachedTablet, Iterable, MiscExt, Peekable, RaftEngine, RaftLogBatch, TabletContext, + TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; use kvproto::{ @@ -211,6 +211,130 @@ impl DebuggerImplV2 { raft_statistics: None, } } + + /// Set regions to tombstone by manual, and apply other status(such as + /// peers, version, and key range) from `region` which comes from PD + /// normally. + pub fn set_region_tombstone(&self, regions: Vec) -> Result> { + let store_id = self.get_store_ident()?.get_store_id(); + let mut lb = self.raft_engine.log_batch(regions.len()); + + let mut errors = Vec::with_capacity(regions.len()); + for region in regions { + let region_id = region.get_id(); + if let Err(e) = set_region_tombstone(&self.raft_engine, store_id, region, &mut lb) { + errors.push((region_id, e)); + } + } + + if errors.is_empty() { + box_try!(self.raft_engine.consume(&mut lb, true)); + } + + Ok(errors) + } + + pub fn set_region_tombstone_by_id(&self, regions: Vec) -> Result> { + let mut lb = self.raft_engine.log_batch(regions.len()); + let mut errors = Vec::with_capacity(regions.len()); + for region_id in regions { + let mut region_state = match self + .raft_engine + .get_region_state(region_id, u64::MAX) + .map_err(|e| box_err!(e)) + .and_then(|s| s.ok_or_else(|| Error::Other("Can't find RegionLocalState".into()))) + { + Ok(region_state) => region_state, + Err(e) => { + errors.push((region_id, e)); + continue; + } + }; + + let apply_state = match self + .raft_engine + .get_apply_state(region_id, u64::MAX) + .map_err(|e| box_err!(e)) + .and_then(|s| s.ok_or_else(|| Error::Other("Can't find RaftApplyState".into()))) + { + Ok(apply_state) => apply_state, + Err(e) => { + errors.push((region_id, e)); + continue; + } + }; + + if region_state.get_state() == PeerState::Tombstone { + info!("skip {} because it's already tombstone", region_id); + continue; + } + region_state.set_state(PeerState::Tombstone); + box_try!(lb.put_region_state( + region_id, + apply_state.get_applied_index(), + ®ion_state + )); + } + + if errors.is_empty() { + box_try!(self.raft_engine.consume(&mut lb, true)); + } + Ok(errors) + } +} + +fn set_region_tombstone( + raft_engine: &ER, + store_id: u64, + region: metapb::Region, + lb: &mut ::LogBatch, +) -> Result<()> { + let id = region.get_id(); + + let mut region_state = raft_engine + .get_region_state(id, u64::MAX) + .map_err(|e| box_err!(e)) + .and_then(|s| s.ok_or_else(|| Error::Other("Can't find RegionLocalState".into())))?; + if region_state.get_state() == PeerState::Tombstone { + return Ok(()); + } + + let peer_id = region_state + .get_region() + .get_peers() + .iter() + .find(|p| p.get_store_id() == store_id) + .map(|p| p.get_id()) + .ok_or_else(|| Error::Other("RegionLocalState doesn't contains the peer itself".into()))?; + + let old_conf_ver = region_state.get_region().get_region_epoch().get_conf_ver(); + let new_conf_ver = region.get_region_epoch().get_conf_ver(); + if new_conf_ver <= old_conf_ver { + return Err(box_err!( + "invalid conf_ver: please make sure you have removed the peer by PD" + )); + } + + // If the store is not in peers, or it's still in but its peer_id + // has changed, we know the peer is marked as tombstone success. + let scheduled = region + .get_peers() + .iter() + .find(|p| p.get_store_id() == store_id) + .map_or(true, |p| p.get_id() != peer_id); + if !scheduled { + return Err(box_err!("The peer is still in target peers")); + } + + let apply_state = raft_engine + .get_apply_state(id, u64::MAX) + .map_err(|e| box_err!(e)) + .and_then(|s| s.ok_or_else(|| Error::Other("Can't find RaftApplyState".into())))?; + region_state.set_state(PeerState::Tombstone); + region_state.set_region(region); + box_try!(lb.put_region_state(id, apply_state.get_applied_index(), ®ion_state)); + + Ok(()) } impl Debugger for DebuggerImplV2 { @@ -415,9 +539,11 @@ impl Debugger for DebuggerImplV2 { fn get_store_ident(&self) -> Result { self.raft_engine .get_store_ident() - .transpose() - .unwrap() .map_err(|e| Error::EngineTrait(e)) + .and_then(|ident| match ident { + Some(ident) => Ok(ident), + None => Err(Error::NotFound("No store ident key".to_owned())), + }) } fn get_region_properties(&self, region_id: u64) -> Result> { @@ -705,8 +831,13 @@ fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { mod tests { use std::path::Path; - use engine_traits::{RaftLogBatch, SyncMutable, CF_DEFAULT, CF_LOCK, CF_WRITE}; - use kvproto::{metapb, raft_serverpb::*}; + use engine_traits::{ + RaftEngineReadOnly, RaftLogBatch, SyncMutable, CF_DEFAULT, CF_LOCK, CF_WRITE, + }; + use kvproto::{ + metapb::{self, PeerRole}, + raft_serverpb::*, + }; use raft::prelude::EntryType; use raft_log_engine::RaftLogEngine; @@ -739,6 +870,44 @@ mod tests { DebuggerImplV2::new(reg, raft_engine, ConfigController::default()) } + impl DebuggerImplV2 { + fn set_store_id(&self, store_id: u64) { + let mut ident = self.get_store_ident().unwrap_or_default(); + ident.set_store_id(store_id); + let mut lb = self.raft_engine.log_batch(3); + lb.put_store_ident(&ident).unwrap(); + self.raft_engine.consume(&mut lb, true).unwrap(); + } + } + + fn init_region_state( + raft_engine: &ER, + region_id: u64, + stores: &[u64], + mut learner: usize, + ) -> metapb::Region { + let mut region = metapb::Region::default(); + region.set_id(region_id); + for (i, &store_id) in stores.iter().enumerate() { + let mut peer = metapb::Peer::default(); + peer.set_id(i as u64); + peer.set_store_id(store_id); + if learner > 0 { + peer.set_role(PeerRole::Learner); + learner -= 1; + } + region.mut_peers().push(peer); + } + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Normal); + region_state.set_region(region.clone()); + let mut lb = raft_engine.log_batch(3); + lb.put_region_state(region_id, INITIAL_APPLY_INDEX, ®ion_state) + .unwrap(); + raft_engine.consume(&mut lb, true).unwrap(); + region + } + #[test] fn test_get() { let dir = test_util::temp_dir("test-debugger", false); @@ -1122,4 +1291,152 @@ mod tests { ); } } + + #[test] + fn test_tombstone_regions() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + debugger.set_store_id(11); + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(5); + let mut lb = debugger.raft_engine.log_batch(10); + + // region 1 with peers at stores 11, 12, 13. + let region_1 = init_region_state(&debugger.raft_engine, 1, &[11, 12, 13], 0); + lb.put_apply_state(1, 5, &apply_state).unwrap(); + // Got the target region from pd, which doesn't contains the store. + let mut target_region_1 = region_1.clone(); + target_region_1.mut_peers().remove(0); + target_region_1.mut_region_epoch().set_conf_ver(100); + + // region 2 with peers at stores 11, 12, 13. + let region_2 = init_region_state(&debugger.raft_engine, 2, &[11, 12, 13], 0); + lb.put_apply_state(2, 5, &apply_state).unwrap(); + // Got the target region from pd, which has different peer_id. + let mut target_region_2 = region_2.clone(); + target_region_2.mut_peers()[0].set_id(100); + target_region_2.mut_region_epoch().set_conf_ver(100); + + // region 3 with peers at stores 21, 22, 23. + let region_3 = init_region_state(&debugger.raft_engine, 3, &[21, 22, 23], 0); + lb.put_apply_state(3, 5, &apply_state).unwrap(); + // Got the target region from pd but the peers are not changed. + let mut target_region_3 = region_3; + target_region_3.mut_region_epoch().set_conf_ver(100); + + // region 4 with peers at stores 11, 12, 13. + let region_4 = init_region_state(&debugger.raft_engine, 4, &[11, 12, 13], 0); + lb.put_apply_state(4, 5, &apply_state).unwrap(); + // Got the target region from pd but region epoch are not changed. + let mut target_region_4 = region_4; + target_region_4.mut_peers()[0].set_id(100); + + // region 5 with peers at stores 11, 12, 13. + let region_5 = init_region_state(&debugger.raft_engine, 5, &[11, 12, 13], 0); + lb.put_apply_state(5, 5, &apply_state).unwrap(); + // Got the target region from pd but peer is not scheduled. + let mut target_region_5 = region_5; + target_region_5.mut_region_epoch().set_conf_ver(100); + + debugger.raft_engine.consume(&mut lb, true).unwrap(); + + let must_meet_error = |region_with_error: metapb::Region| { + let error_region_id = region_with_error.get_id(); + let regions = vec![ + target_region_1.clone(), + target_region_2.clone(), + region_with_error, + ]; + let errors = debugger.set_region_tombstone(regions).unwrap(); + assert_eq!(errors.len(), 1); + assert_eq!(errors[0].0, error_region_id); + assert_eq!( + debugger + .raft_engine + .get_region_state(1, u64::MAX) + .unwrap() + .unwrap() + .take_region(), + region_1 + ); + + assert_eq!( + debugger + .raft_engine + .get_region_state(2, u64::MAX) + .unwrap() + .unwrap() + .take_region(), + region_2 + ); + }; + + // Test with bad target region. No region state in rocksdb should be changed. + must_meet_error(target_region_3); + must_meet_error(target_region_4); + must_meet_error(target_region_5); + + // After set_region_tombstone success, all region should be adjusted. + let target_regions = vec![target_region_1, target_region_2]; + let errors = debugger.set_region_tombstone(target_regions).unwrap(); + assert!(errors.is_empty()); + for ®ion_id in &[1, 2] { + let state = debugger + .raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap() + .get_state(); + assert_eq!(state, PeerState::Tombstone); + } + } + + #[test] + fn test_tombstone_regions_by_id() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + debugger.set_store_id(11); + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(5); + let mut lb = debugger.raft_engine.log_batch(10); + + // tombstone region 1 which currently not exists. + let errors = debugger.set_region_tombstone_by_id(vec![1]).unwrap(); + assert!(!errors.is_empty()); + + // region 1 with peers at stores 11, 12, 13. + init_region_state(&debugger.raft_engine, 1, &[11, 12, 13], 0); + lb.put_apply_state(1, 5, &apply_state).unwrap(); + debugger.raft_engine.consume(&mut lb, true).unwrap(); + let mut expected_state = debugger + .raft_engine + .get_region_state(1, u64::MAX) + .unwrap() + .unwrap(); + expected_state.set_state(PeerState::Tombstone); + + // tombstone region 1. + let errors = debugger.set_region_tombstone_by_id(vec![1]).unwrap(); + assert!(errors.is_empty()); + assert_eq!( + debugger + .raft_engine + .get_region_state(1, u64::MAX) + .unwrap() + .unwrap(), + expected_state + ); + + // tombstone region 1 again. + let errors = debugger.set_region_tombstone_by_id(vec![1]).unwrap(); + assert!(errors.is_empty()); + assert_eq!( + debugger + .raft_engine + .get_region_state(1, u64::MAX) + .unwrap() + .unwrap(), + expected_state + ); + } } From a24d9d6838603b2b274376e52f92f67a10fecd84 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 29 May 2023 14:32:41 +0800 Subject: [PATCH 0712/1149] txn: persist fair lock type in lock information and handle stale fair lock resolve (#14692) ref tikv/tikv#13298, ref pingcap/tidb#43540 Signed-off-by: cfzjywxk --- components/cdc/src/delegate.rs | 2 + components/cdc/tests/integrations/test_cdc.rs | 1 + .../concurrency_manager/benches/lock_table.rs | 1 + components/concurrency_manager/src/lib.rs | 12 +- .../concurrency_manager/src/lock_table.rs | 5 + .../concurrency_manager/tests/memory_usage.rs | 1 + components/raftstore/src/store/txn_ext.rs | 2 + components/snap_recovery/src/data_resolver.rs | 1 + components/tikv_kv/src/lib.rs | 2 + components/txn_types/src/lock.rs | 71 +++++-- src/coprocessor/endpoint.rs | 1 + src/server/debug.rs | 1 + src/server/reset_to_version.rs | 1 + src/storage/mod.rs | 14 +- src/storage/mvcc/consistency_check.rs | 1 + src/storage/mvcc/mod.rs | 8 +- src/storage/mvcc/reader/reader.rs | 1 + src/storage/mvcc/reader/scanner/forward.rs | 1 + .../txn/actions/acquire_pessimistic_lock.rs | 69 +++++-- src/storage/txn/actions/check_txn_status.rs | 180 +++++++++++++++-- src/storage/txn/actions/commit.rs | 6 +- .../txn/actions/flashback_to_version.rs | 1 + src/storage/txn/actions/prewrite.rs | 4 +- .../txn/commands/check_secondary_locks.rs | 168 +++++++++++++--- src/storage/txn/commands/check_txn_status.rs | 184 ++++++++++++++++++ .../txn/commands/pessimistic_rollback.rs | 4 +- src/storage/txn/scheduler.rs | 1 + src/storage/types.rs | 8 + tests/failpoints/cases/test_coprocessor.rs | 1 + tests/failpoints/cases/test_merge.rs | 4 + tests/failpoints/cases/test_replica_read.rs | 2 + tests/failpoints/cases/test_split_region.rs | 2 + tests/failpoints/cases/test_transaction.rs | 1 + .../failpoints/cases/test_transfer_leader.rs | 3 + tests/integrations/coprocessor/test_select.rs | 1 + tests/integrations/raftstore/test_merge.rs | 3 + tests/integrations/raftstore/test_multi.rs | 1 + .../raftstore/test_replica_read.rs | 1 + .../raftstore/test_split_region.rs | 2 + .../raftstore/test_transfer_leader.rs | 2 + tests/integrations/server/kv_service.rs | 2 + tests/integrations/storage/test_raftkv.rs | 1 + 42 files changed, 685 insertions(+), 92 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index adca54dace0..a03eaae7ef7 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -1438,6 +1438,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ) .to_bytes(); delegate @@ -1506,6 +1507,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ); // Only the key `a` is a normal write. if k != b'a' { diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 3e5345e51f8..51d60a06f5b 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -1228,6 +1228,7 @@ fn test_cdc_resolve_ts_checking_concurrency_manager_impl() { 0.into(), 1, ts.into(), + false, )) }); guard diff --git a/components/concurrency_manager/benches/lock_table.rs b/components/concurrency_manager/benches/lock_table.rs index 52c9bea960a..33ae220ee51 100644 --- a/components/concurrency_manager/benches/lock_table.rs +++ b/components/concurrency_manager/benches/lock_table.rs @@ -32,6 +32,7 @@ fn prepare_cm() -> ConcurrencyManager { 10.into(), 1, 20.into(), + false, )); }); // Leak the guard so the lock won't be removed. diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index 342f2139e08..ce77cb87a42 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -161,7 +161,17 @@ mod tests { fn new_lock(ts: impl Into, primary: &[u8], lock_type: LockType) -> Lock { let ts = ts.into(); - Lock::new(lock_type, primary.to_vec(), ts, 0, None, 0.into(), 1, ts) + Lock::new( + lock_type, + primary.to_vec(), + ts, + 0, + None, + 0.into(), + 1, + ts, + false, + ) } #[tokio::test] diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index ad013a863a1..db6995fa1d0 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -183,6 +183,7 @@ mod test { 10.into(), 1, 10.into(), + false, ); let guard = lock_table.lock_key(&key_k).await; guard.with_lock(|l| { @@ -212,6 +213,7 @@ mod test { 20.into(), 1, 20.into(), + false, ); let guard = lock_table.lock_key(&Key::from_raw(b"k")).await; guard.with_lock(|l| { @@ -227,6 +229,7 @@ mod test { 10.into(), 1, 10.into(), + false, ); let guard = lock_table.lock_key(&Key::from_raw(b"l")).await; guard.with_lock(|l| { @@ -284,6 +287,7 @@ mod test { 20.into(), 1, 20.into(), + false, ); let guard_a = lock_table.lock_key(&Key::from_raw(b"a")).await; guard_a.with_lock(|l| { @@ -304,6 +308,7 @@ mod test { 30.into(), 2, 30.into(), + false, ) .use_async_commit(vec![b"c".to_vec()]); let guard_b = lock_table.lock_key(&Key::from_raw(b"b")).await; diff --git a/components/concurrency_manager/tests/memory_usage.rs b/components/concurrency_manager/tests/memory_usage.rs index 34ce9986a61..76fab7e185c 100644 --- a/components/concurrency_manager/tests/memory_usage.rs +++ b/components/concurrency_manager/tests/memory_usage.rs @@ -48,6 +48,7 @@ fn test_memory_usage() { 10.into(), 1, 20.into(), + false, ); // Key already exists diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index ccc4027e9d1..20963fc186f 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -326,6 +326,7 @@ mod tests { min_commit_ts: 110.into(), last_change_ts: 105.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, } } @@ -428,6 +429,7 @@ mod tests { min_commit_ts: 20.into(), last_change_ts: 5.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, }, deleted, ), diff --git a/components/snap_recovery/src/data_resolver.rs b/components/snap_recovery/src/data_resolver.rs index 4ef8e7a6410..68b9a7d2bc6 100644 --- a/components/snap_recovery/src/data_resolver.rs +++ b/components/snap_recovery/src/data_resolver.rs @@ -382,6 +382,7 @@ mod tests { for_update_ts.into(), 0, TimeStamp::zero(), + false, ); kv.push((CF_LOCK, Key::from_raw(key), lock.to_bytes())); } diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 9e6c1b9ca3a..48b667adf7a 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -1327,6 +1327,7 @@ mod unit_tests { min_commit_ts: 102.into(), last_change_ts: 80.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, }, ), Modify::DeleteRange( @@ -1371,6 +1372,7 @@ mod unit_tests { min_commit_ts: 102.into(), last_change_ts: 80.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, } .into_lock() .to_bytes(), diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 103353318e0..3018b030490 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -35,6 +35,7 @@ const ASYNC_COMMIT_PREFIX: u8 = b'a'; const ROLLBACK_TS_PREFIX: u8 = b'r'; const LAST_CHANGE_PREFIX: u8 = b'l'; const TXN_SOURCE_PREFIX: u8 = b's'; +const PESSIMISTIC_LOCK_WITH_CONFLICT_PREFIX: u8 = b'F'; impl LockType { pub fn from_mutation(mutation: &Mutation) -> Option { @@ -103,6 +104,8 @@ pub struct Lock { /// application is limited to setting this value under `0x80`, /// so there will no more cost to change it to `u64`. pub txn_source: u64, + /// The lock is locked with conflict using fair lock mode. + pub is_locked_with_conflict: bool, } impl std::fmt::Debug for Lock { @@ -129,6 +132,7 @@ impl std::fmt::Debug for Lock { .field("last_change_ts", &self.last_change_ts) .field("versions_to_last_change", &self.versions_to_last_change) .field("txn_source", &self.txn_source) + .field("is_locked_with_conflict", &self.is_locked_with_conflict) .finish() } } @@ -143,6 +147,7 @@ impl Lock { for_update_ts: TimeStamp, txn_size: u64, min_commit_ts: TimeStamp, + is_locked_with_conflict: bool, ) -> Self { Self { lock_type, @@ -159,6 +164,7 @@ impl Lock { last_change_ts: TimeStamp::zero(), versions_to_last_change: 0, txn_source: 0, + is_locked_with_conflict, } } @@ -239,6 +245,9 @@ impl Lock { b.push(TXN_SOURCE_PREFIX); b.encode_var_u64(self.txn_source).unwrap(); } + if self.is_locked_with_conflict { + b.push(PESSIMISTIC_LOCK_WITH_CONFLICT_PREFIX); + } b } @@ -274,6 +283,9 @@ impl Lock { if self.txn_source != 0 { size += 1 + MAX_VAR_U64_LEN; } + if self.is_locked_with_conflict { + size += 1; + } size } @@ -300,6 +312,7 @@ impl Lock { TimeStamp::zero(), 0, TimeStamp::zero(), + false, )); } @@ -313,6 +326,7 @@ impl Lock { let mut last_change_ts = TimeStamp::zero(); let mut versions_to_last_change = 0; let mut txn_source = 0; + let mut is_locked_with_conflict = false; while !b.is_empty() { match b.read_u8()? { SHORT_VALUE_PREFIX => { @@ -353,6 +367,9 @@ impl Lock { TXN_SOURCE_PREFIX => { txn_source = number::decode_var_u64(&mut b)?; } + PESSIMISTIC_LOCK_WITH_CONFLICT_PREFIX => { + is_locked_with_conflict = true; + } _ => { // To support forward compatibility, all fields should be serialized in order // and stop parsing if meets an unknown byte. @@ -369,6 +386,7 @@ impl Lock { for_update_ts, txn_size, min_commit_ts, + is_locked_with_conflict, ) .set_last_change(last_change_ts, versions_to_last_change) .set_txn_source(txn_source); @@ -411,10 +429,7 @@ impl Lock { bypass_locks: &TsSet, is_replica_read: bool, ) -> Result<()> { - if lock.ts > ts - || lock.lock_type == LockType::Lock - || lock.lock_type == LockType::Pessimistic - { + if lock.ts > ts || lock.lock_type == LockType::Lock || lock.is_pessimistic_lock() { // Ignore lock when lock.ts > ts or lock's type is Lock or Pessimistic return Ok(()); } @@ -458,7 +473,7 @@ impl Lock { ts: TimeStamp, bypass_locks: &TsSet, ) -> Result<()> { - if lock.lock_type == LockType::Lock || lock.lock_type == LockType::Pessimistic { + if lock.lock_type == LockType::Lock || lock.is_pessimistic_lock() { // Ignore lock when the lock's type is Lock or Pessimistic. return Ok(()); } @@ -518,6 +533,10 @@ impl Lock { pub fn is_pessimistic_lock(&self) -> bool { self.lock_type == LockType::Pessimistic } + + pub fn is_pessimistic_lock_with_conflict(&self) -> bool { + self.is_pessimistic_lock() && self.is_locked_with_conflict + } } /// A specialized lock only for pessimistic lock. This saves memory for cases @@ -533,6 +552,7 @@ pub struct PessimisticLock { pub last_change_ts: TimeStamp, pub versions_to_last_change: u64, + pub is_locked_with_conflict: bool, } impl PessimisticLock { @@ -546,6 +566,7 @@ impl PessimisticLock { self.for_update_ts, 0, self.min_commit_ts, + self.is_locked_with_conflict, ) .set_last_change(self.last_change_ts, self.versions_to_last_change) } @@ -561,6 +582,7 @@ impl PessimisticLock { self.for_update_ts, 0, self.min_commit_ts, + self.is_locked_with_conflict, ) .set_last_change(self.last_change_ts, self.versions_to_last_change) } @@ -580,6 +602,7 @@ impl std::fmt::Debug for PessimisticLock { .field("min_commit_ts", &self.min_commit_ts) .field("last_change_ts", &self.last_change_ts) .field("versions_to_last_change", &self.versions_to_last_change) + .field("is_locked_with_conflict", &self.is_locked_with_conflict) .finish() } } @@ -643,6 +666,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ), Lock::new( LockType::Delete, @@ -653,6 +677,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ), Lock::new( LockType::Put, @@ -663,6 +688,7 @@ mod tests { 10.into(), 0, TimeStamp::zero(), + false, ), Lock::new( LockType::Delete, @@ -673,6 +699,7 @@ mod tests { 10.into(), 0, TimeStamp::zero(), + false, ), Lock::new( LockType::Put, @@ -683,6 +710,7 @@ mod tests { TimeStamp::zero(), 16, TimeStamp::zero(), + false, ), Lock::new( LockType::Delete, @@ -693,6 +721,7 @@ mod tests { TimeStamp::zero(), 16, TimeStamp::zero(), + false, ), Lock::new( LockType::Put, @@ -703,6 +732,7 @@ mod tests { 10.into(), 16, TimeStamp::zero(), + false, ), Lock::new( LockType::Delete, @@ -713,6 +743,7 @@ mod tests { 10.into(), 0, TimeStamp::zero(), + false, ), Lock::new( LockType::Put, @@ -723,6 +754,7 @@ mod tests { 333.into(), 444, 555.into(), + false, ), Lock::new( LockType::Put, @@ -733,6 +765,7 @@ mod tests { 333.into(), 444, 555.into(), + false, ) .use_async_commit(vec![]), Lock::new( @@ -744,6 +777,7 @@ mod tests { 333.into(), 444, 555.into(), + false, ) .use_async_commit(vec![b"k".to_vec()]), Lock::new( @@ -755,6 +789,7 @@ mod tests { 333.into(), 444, 555.into(), + false, ) .use_async_commit(vec![ b"k1".to_vec(), @@ -771,6 +806,7 @@ mod tests { 333.into(), 444, 555.into(), + false, ) .use_async_commit(vec![ b"k1".to_vec(), @@ -788,6 +824,7 @@ mod tests { 333.into(), 444, 555.into(), + false, ) .with_rollback_ts(vec![12.into(), 24.into(), 13.into()]), Lock::new( @@ -799,6 +836,7 @@ mod tests { 6.into(), 16, 8.into(), + false, ) .set_last_change(0.into(), 2), Lock::new( @@ -810,6 +848,7 @@ mod tests { 6.into(), 16, 8.into(), + false, ) .set_last_change(4.into(), 2) .set_txn_source(1), @@ -833,6 +872,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ); let mut v = lock.to_bytes(); Lock::parse(&v[..4]).unwrap_err(); @@ -854,6 +894,7 @@ mod tests { TimeStamp::zero(), 1, TimeStamp::zero(), + false, ); let empty = Default::default(); @@ -1008,6 +1049,7 @@ mod tests { 100.into(), 1, TimeStamp::zero(), + false, ); let empty = Default::default(); @@ -1051,6 +1093,7 @@ mod tests { 101.into(), 10, 127.into(), + false, ) .use_async_commit(vec![ b"secondary_k1".to_vec(), @@ -1067,7 +1110,7 @@ mod tests { min_commit_ts: TimeStamp(127), use_async_commit: true, \ secondaries: [7365636F6E646172795F6B31, 7365636F6E646172795F6B6B6B6B6B32, \ 7365636F6E646172795F6B336B336B336B336B336B33, 7365636F6E646172795F6B34], rollback_ts: [], \ - last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0 }" + last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -1077,7 +1120,7 @@ mod tests { "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, \ short_value: ?, for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [?, ?, ?, ?], rollback_ts: [], \ - last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0 }" + last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" ); lock.short_value = None; @@ -1087,7 +1130,7 @@ mod tests { "Lock { lock_type: Put, primary_key: 706B, start_ts: TimeStamp(100), ttl: 3, short_value: , \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ - versions_to_last_change: 4, txn_source: 0 }" + versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -1097,7 +1140,7 @@ mod tests { "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, short_value: ?, \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ - versions_to_last_change: 4, txn_source: 0 }" + versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" ); } @@ -1111,6 +1154,7 @@ mod tests { min_commit_ts: 20.into(), last_change_ts: 8.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, }; let expected_lock = Lock { lock_type: LockType::Pessimistic, @@ -1127,6 +1171,7 @@ mod tests { last_change_ts: 8.into(), versions_to_last_change: 2, txn_source: 0, + is_locked_with_conflict: false, }; assert_eq!(pessimistic_lock.to_lock(), expected_lock); assert_eq!(pessimistic_lock.into_lock(), expected_lock); @@ -1142,12 +1187,13 @@ mod tests { min_commit_ts: 20.into(), last_change_ts: 8.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, }; assert_eq!( format!("{:?}", pessimistic_lock), "PessimisticLock { primary_key: 7072696D617279, start_ts: TimeStamp(5), ttl: 1000, \ for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), last_change_ts: TimeStamp(8), \ - versions_to_last_change: 2 }" + versions_to_last_change: 2, is_locked_with_conflict: false }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", pessimistic_lock); @@ -1156,7 +1202,7 @@ mod tests { redact_result, "PessimisticLock { primary_key: ?, start_ts: TimeStamp(5), ttl: 1000, \ for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), last_change_ts: TimeStamp(8), \ - versions_to_last_change: 2 }" + versions_to_last_change: 2, is_locked_with_conflict: false }" ); } @@ -1170,8 +1216,9 @@ mod tests { min_commit_ts: 20.into(), last_change_ts: 8.into(), versions_to_last_change: 2, + is_locked_with_conflict: false, }; // 7 bytes for primary key, 16 bytes for Box<[u8]>, and 6 8-byte integers. - assert_eq!(lock.memory_size(), 7 + 16 + 6 * 8); + assert_eq!(lock.memory_size(), 7 + 16 + 7 * 8); } } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 6b360fa4538..01f09941a59 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -1947,6 +1947,7 @@ mod tests { 0.into(), 1, 20.into(), + false, )); }); diff --git a/src/server/debug.rs b/src/server/debug.rs index 396339dcb7a..6f1d99e612d 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -2213,6 +2213,7 @@ mod tests { for_update_ts.into(), 0, TimeStamp::zero(), + false, ); kv.push((CF_LOCK, Key::from_raw(key), lock.to_bytes(), expect)); } diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 1ea98acc1c8..0c1004bb305 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -333,6 +333,7 @@ mod tests { for_update_ts.into(), 0, TimeStamp::zero(), + false, ); kv.push((CF_LOCK, Key::from_raw(key), lock.to_bytes())); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 2605b1ad262..1e98a1b8257 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2941,7 +2941,7 @@ pub async fn get_raw_key_guard( // get maximum resolved-ts from concurrency_manager.global_min_lock_ts let encode_key = ApiV2::encode_raw_key(&raw_key, Some(ts)); let key_guard = concurrency_manager.lock_key(&encode_key).await; - let lock = Lock::new(LockType::Put, raw_key, ts, 0, None, 0.into(), 1, ts); + let lock = Lock::new(LockType::Put, raw_key, ts, 0, None, 0.into(), 1, ts, false); key_guard.with_lock(|l| *l = Some(lock)); Ok(Some(key_guard)) } else { @@ -7514,6 +7514,7 @@ mod tests { 0.into(), 1, 20.into(), + false, )); }); guard @@ -7843,6 +7844,7 @@ mod tests { 0.into(), 0, 0.into(), + false, ) }; @@ -8009,6 +8011,7 @@ mod tests { 0.into(), 3, ts(10, 1), + false, ) .use_async_commit(vec![b"k1".to_vec(), b"k2".to_vec()]), false, @@ -9430,6 +9433,7 @@ mod tests { 0.into(), 0, 0.into(), + false, ), false, ), @@ -9479,6 +9483,7 @@ mod tests { 0.into(), 1, 20.into(), + false, )); }); @@ -10172,6 +10177,7 @@ mod tests { 10.into(), 0, 11.into(), + false, ), ), ( @@ -10185,6 +10191,7 @@ mod tests { 10.into(), 0, 11.into(), + false, ), ), ], @@ -10213,6 +10220,7 @@ mod tests { 10.into(), 0, 11.into(), + false, ), ), ( @@ -10226,6 +10234,7 @@ mod tests { 10.into(), 0, 11.into(), + false, ), ), ], @@ -10256,6 +10265,7 @@ mod tests { 10.into(), 0, 11.into(), + false, ), ), ( @@ -10269,6 +10279,7 @@ mod tests { 10.into(), 0, 11.into(), + false, ), ), ], @@ -10667,6 +10678,7 @@ mod tests { min_commit_ts: 11.into(), last_change_ts: TimeStamp::zero(), versions_to_last_change: 1, + is_locked_with_conflict: false, }, false ) diff --git a/src/storage/mvcc/consistency_check.rs b/src/storage/mvcc/consistency_check.rs index 311447601f8..5233526ef9e 100644 --- a/src/storage/mvcc/consistency_check.rs +++ b/src/storage/mvcc/consistency_check.rs @@ -532,6 +532,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ); let value = lock.to_bytes(); engine diff --git a/src/storage/mvcc/mod.rs b/src/storage/mvcc/mod.rs index 1779c116ccd..6e4848c8579 100644 --- a/src/storage/mvcc/mod.rs +++ b/src/storage/mvcc/mod.rs @@ -598,7 +598,7 @@ pub mod tests { let mut reader = MvccReader::new(snapshot, None, true); let lock = reader.load_lock(&Key::from_raw(key)).unwrap().unwrap(); assert_eq!(lock.ts, start_ts.into()); - assert_ne!(lock.lock_type, LockType::Pessimistic); + assert!(!lock.is_pessimistic_lock()); lock } @@ -612,7 +612,7 @@ pub mod tests { let mut reader = MvccReader::new(snapshot, None, true); let lock = reader.load_lock(&Key::from_raw(key)).unwrap().unwrap(); assert_eq!(lock.ts, start_ts.into()); - assert_ne!(lock.lock_type, LockType::Pessimistic); + assert!(!lock.is_pessimistic_lock()); assert_eq!(lock.ttl, ttl); } @@ -631,9 +631,9 @@ pub mod tests { assert_eq!(lock.ttl, ttl); assert_eq!(lock.min_commit_ts, min_commit_ts.into()); if is_pessimistic { - assert_eq!(lock.lock_type, LockType::Pessimistic); + assert!(lock.is_pessimistic_lock()) } else { - assert_ne!(lock.lock_type, LockType::Pessimistic); + assert!(!lock.is_pessimistic_lock()); } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 4aeb424c1ff..41da39bb28b 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -1674,6 +1674,7 @@ pub mod tests { for_update_ts, 0, TimeStamp::zero(), + false, ) .set_last_change( TimeStamp::zero(), diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 8828033c8a1..9da1b48d3ff 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -1007,6 +1007,7 @@ pub mod test_util { self.for_update_ts, 0, 0.into(), + false, ) .set_last_change(self.last_change_ts, self.versions_to_last_change); TxnEntry::Prewrite { diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index d558741997f..60450be1b40 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -2,7 +2,7 @@ use kvproto::kvrpcpb::WriteConflictReason; // #[PerformanceCriticalPath] -use txn_types::{Key, LockType, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteType}; +use txn_types::{Key, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteType}; use crate::storage::{ mvcc::{ @@ -109,7 +109,7 @@ pub fn acquire_pessimistic_lock( if lock.ts != reader.start_ts { return Err(ErrorInner::KeyIsLocked(lock.into_lock_info(key.into_raw()?)).into()); } - if lock.lock_type != LockType::Pessimistic { + if !lock.is_pessimistic_lock() { return Err(ErrorInner::LockTypeNotMatch { start_ts: reader.start_ts, key: key.into_raw()?, @@ -206,6 +206,7 @@ pub fn acquire_pessimistic_lock( min_commit_ts, last_change_ts: lock.last_change_ts, versions_to_last_change: lock.versions_to_last_change, + is_locked_with_conflict: lock.is_pessimistic_lock_with_conflict(), }; txn.put_pessimistic_lock(key, lock, false); } else { @@ -371,6 +372,7 @@ pub fn acquire_pessimistic_lock( min_commit_ts, last_change_ts, versions_to_last_change, + is_locked_with_conflict: conflict_info.is_some(), }; // When lock_only_if_exists is false, always acquire pessimistic lock, otherwise @@ -464,6 +466,7 @@ pub mod tests { need_check_existence: bool, should_not_exist: bool, lock_only_if_exists: bool, + ttl: u64, ) -> MvccResult { let ctx = Context::default(); let snapshot = engine.snapshot(Default::default()).unwrap(); @@ -477,7 +480,7 @@ pub mod tests { Key::from_raw(key), pk, should_not_exist, - 1, + ttl, for_update_ts.into(), need_value, need_check_existence, @@ -505,6 +508,7 @@ pub mod tests { for_update_ts: impl Into, need_value: bool, need_check_existence: bool, + ttl: u64, ) -> PessimisticLockKeyResult { acquire_pessimistic_lock_allow_lock_with_conflict( engine, @@ -516,6 +520,7 @@ pub mod tests { need_check_existence, false, false, + ttl, ) .unwrap() } @@ -751,7 +756,7 @@ pub mod tests { let lock = reader.load_lock(&Key::from_raw(key)).unwrap().unwrap(); assert_eq!(lock.ts, start_ts.into()); assert_eq!(lock.for_update_ts, for_update_ts.into()); - assert_eq!(lock.lock_type, LockType::Pessimistic); + assert!(lock.is_pessimistic_lock()); lock } @@ -1845,22 +1850,22 @@ pub mod tests { must_commit(&mut engine, b"k1", 10, 20); // Normal cases. - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, false, false) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, false, false, 1) .assert_empty(); must_pessimistic_rollback(&mut engine, b"k1", 10, 30); must_unlocked(&mut engine, b"k1"); - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, false, true) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, false, true, 1) .assert_existence(true); must_pessimistic_rollback(&mut engine, b"k1", 10, 30); must_unlocked(&mut engine, b"k1"); - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, true, false) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, true, false, 1) .assert_value(Some(b"v1")); must_pessimistic_rollback(&mut engine, b"k1", 10, 30); must_unlocked(&mut engine, b"k1"); - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, true, true) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 30, true, true, 1) .assert_value(Some(b"v1")); must_pessimistic_rollback(&mut engine, b"k1", 10, 30); must_unlocked(&mut engine, b"k1"); @@ -1877,26 +1882,29 @@ pub mod tests { 15, need_value, need_check_existence, + 1, ) .assert_locked_with_conflict(Some(b"v1"), 20); - must_pessimistic_locked(&mut engine, b"k1", 10, 20); + let lock = must_pessimistic_locked(&mut engine, b"k1", 10, 20); + assert!(lock.is_pessimistic_lock_with_conflict()); must_pessimistic_rollback(&mut engine, b"k1", 10, 20); must_unlocked(&mut engine, b"k1"); } // Idempotency - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 50, false, false) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 50, false, false, 1) .assert_empty(); - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 40, false, false) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 40, false, false, 1) .assert_locked_with_conflict(Some(b"v1"), 50); - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 15, false, false) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 15, false, false, 1) .assert_locked_with_conflict(Some(b"v1"), 50); - must_pessimistic_locked(&mut engine, b"k1", 10, 50); + let lock = must_pessimistic_locked(&mut engine, b"k1", 10, 50); + assert!(!lock.is_pessimistic_lock_with_conflict()); must_pessimistic_rollback(&mut engine, b"k1", 10, 50); must_unlocked(&mut engine, b"k1"); // Lock waiting. - must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 50, false, false) + must_succeed_allow_lock_with_conflict(&mut engine, b"k1", b"k1", 10, 50, false, false, 1) .assert_empty(); let err = acquire_pessimistic_lock_allow_lock_with_conflict( &mut engine, @@ -1908,6 +1916,7 @@ pub mod tests { false, false, false, + 1, ) .unwrap_err(); assert!(matches!(err, MvccError(box ErrorInner::KeyIsLocked(_)))); @@ -1921,6 +1930,7 @@ pub mod tests { false, false, false, + 1, ) .unwrap_err(); assert!(matches!(err, MvccError(box ErrorInner::KeyIsLocked(_)))); @@ -2077,6 +2087,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2098,6 +2109,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2120,6 +2132,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2143,6 +2156,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_locked_with_conflict(None, 60); @@ -2158,6 +2172,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_locked_with_conflict(None, 60); @@ -2175,6 +2190,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_locked_with_conflict(None, 70); @@ -2194,6 +2210,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_empty(); @@ -2210,6 +2227,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_empty(); @@ -2232,6 +2250,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_locked_with_conflict(None, 20); @@ -2248,6 +2267,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap() .assert_empty(); @@ -2268,6 +2288,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2289,6 +2310,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2315,6 +2337,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2334,6 +2357,7 @@ pub mod tests { false, true, false, + 1, ) .unwrap_err(); match e { @@ -2361,10 +2385,12 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_locked_with_conflict(Some(b"v1"), 30); - must_pessimistic_locked(&mut engine, b"k1", 10, 30); + let lock = must_pessimistic_locked(&mut engine, b"k1", 10, 30); + assert!(lock.is_pessimistic_lock_with_conflict()); // Key exists and already locked (idempotency). acquire_pessimistic_lock_allow_lock_with_conflict( @@ -2377,10 +2403,12 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_locked_with_conflict(Some(b"v1"), 30); - must_pessimistic_locked(&mut engine, b"k1", 10, 30); + let lock = must_pessimistic_locked(&mut engine, b"k1", 10, 30); + assert!(lock.is_pessimistic_lock_with_conflict()); // Key exists and is locked with a larger for_update_ts (stale request) must_succeed(&mut engine, b"k1", b"k1", 10, 40); @@ -2394,10 +2422,12 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_locked_with_conflict(Some(b"v1"), 40); - must_pessimistic_locked(&mut engine, b"k1", 10, 40); + let lock = must_pessimistic_locked(&mut engine, b"k1", 10, 40); + assert!(lock.is_pessimistic_lock_with_conflict()); // Key not exist. must_pessimistic_prewrite_delete(&mut engine, b"k1", b"k1", 10, 40, DoPessimisticCheck); @@ -2414,6 +2444,7 @@ pub mod tests { false, false, true, + 1, ) .unwrap_err(); match e { @@ -2439,6 +2470,7 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_value(None); @@ -2458,6 +2490,7 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_value(Some(b"v2")); @@ -2474,6 +2507,7 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_value(Some(b"v2")); @@ -2497,6 +2531,7 @@ pub mod tests { false, false, true, + 1, ) .unwrap() .assert_locked_with_conflict(Some(b"v2"), 50); diff --git a/src/storage/txn/actions/check_txn_status.rs b/src/storage/txn/actions/check_txn_status.rs index b0e1ff66232..6e786aec5fa 100644 --- a/src/storage/txn/actions/check_txn_status.rs +++ b/src/storage/txn/actions/check_txn_status.rs @@ -12,9 +12,85 @@ use crate::storage::{ Snapshot, TxnStatus, }; -// Check whether there's an overlapped write record, and then perform rollback. -// The actual behavior to do the rollback differs according to whether there's -// an overlapped write record. +// The returned `TxnStatus` is Some(..) if the transaction status is already +// determined. +fn check_txn_status_from_pessimistic_primary_lock( + txn: &mut MvccTxn, + reader: &mut SnapshotReader, + primary_key: Key, + lock: &Lock, + current_ts: TimeStamp, + resolving_pessimistic_lock: bool, +) -> Result<(Option, Option)> { + assert!(lock.is_pessimistic_lock()); + // Check the storage information first in case the force lock could be stale. + // See https://github.com/pingcap/tidb/issues/43540 for more details. + if lock.is_pessimistic_lock_with_conflict() { + // Use `check_txn_status_missing_lock` to check if there exists a commit or + // rollback record in the write CF, if so the current primary + // pessimistic lock is stale. Otherwise the primary pessimistic lock is + // regarded as valid, and the transaction status is determined by it. + if let Some(txn_status) = check_determined_txn_status(reader, &primary_key)? { + info!("unlock stale pessimistic primary lock"; + "primary_key" => ?&primary_key, + "lock" => ?&lock, + "current_ts" => current_ts, + "resolving_pessimistic_lock" => ?resolving_pessimistic_lock, + ); + let released = txn.unlock_key(primary_key, true, TimeStamp::zero()); + MVCC_CHECK_TXN_STATUS_COUNTER_VEC.pessimistic_rollback.inc(); + return Ok((Some(txn_status), released)); + } + } + + // The primary pessimistic lock has expired, and this lock is regarded as valid + // primary lock. If `resolving_pessimistic_lock` is false, it means the + // secondary lock is a prewrite lock and the transaction must already be in + // commit phase, thus the primary key must NOT change any more. In this case + // if primary lock expires, unlock it and put a rollback record. + // If `resolving_pessimistic_lock` is true. The transaction may still be ongoing + // and it's not in commit phase, the primary key could still change. If the + // primary lock expires, just pessimistically rollback it but do NOT put an + // rollback record. + if lock.ts.physical() + lock.ttl < current_ts.physical() { + return if resolving_pessimistic_lock { + let released = txn.unlock_key(primary_key, true, TimeStamp::zero()); + MVCC_CHECK_TXN_STATUS_COUNTER_VEC.pessimistic_rollback.inc(); + Ok((Some(TxnStatus::PessimisticRollBack), released)) + } else { + let released = rollback_lock(txn, reader, primary_key, lock, true, true)?; + MVCC_CHECK_TXN_STATUS_COUNTER_VEC.rollback.inc(); + Ok((Some(TxnStatus::TtlExpire), released)) + }; + } + + Ok((None, None)) +} + +/// Evaluate transaction status if a lock exists with the anticipated +/// 'start_ts'. +/// +/// 1. Validate whether the existing lock indeed corresponds to the +/// primary lock. The primary key may switch under certain circumstances. If +/// it's a stale lock, the transaction status should not be determined by it. +/// Refer to https://github.com/pingcap/tidb/issues/42937 for additional information. +/// Note that the primary key should remain unaltered if the transaction is +/// already in the commit or 2PC phase. +/// +/// 2. Manage the check in accordance with the primary lock type: +/// 2.1 For the pessimistic type: +/// 2.1.1 If it's a forced lock, validate the storage data initially to ensure +/// the forced lock isn't stale. +/// 2.1.2 If it's a regular lock, verify the lock's TTL and the current +/// timestamp to determine the status. If the `resolving_pessimistic` parameter +/// is true, perform a pessimistic rollback, else carry out a real rollback. +/// 2.2 For the prewrite type, verify the lock's TTL and the current timestamp +/// to decide the status. +/// +/// 3. Perform required operations on the valid primary lock, such as +/// incrementing `min_commit_ts`. The actual procedure for executing the +/// rollback differs based on the presence or absence of an overlapping write +/// record. pub fn check_txn_status_lock_exists( txn: &mut MvccTxn, reader: &mut SnapshotReader, @@ -25,12 +101,48 @@ pub fn check_txn_status_lock_exists( force_sync_commit: bool, resolving_pessimistic_lock: bool, verify_is_primary: bool, + rollback_if_not_exist: bool, ) -> Result<(TxnStatus, Option)> { if verify_is_primary && !primary_key.is_encoded_from(&lock.primary) { - // Return the current lock info to tell the client what the actual primary is. - return Err( - ErrorInner::PrimaryMismatch(lock.into_lock_info(primary_key.into_raw()?)).into(), - ); + // If the resolving lock is a prewrite lock and the current lock is a + // pessimistic lock, the primary key in the prewrite lock must be valid. + // So if the current lock dose not match it must be invalid, unlock the + // invalid lock and check the transaction status with the lock missing path. + return match (resolving_pessimistic_lock, lock.is_pessimistic_lock()) { + (false, true) => { + info!("unlock invalid pessimistic primary lock"; + "primary_key" => ?&primary_key, + "lock" => ?&lock, + "current_ts" => current_ts, + "resolving_pessimistic_lock" => ?resolving_pessimistic_lock, + ); + let txn_status = check_txn_status_missing_lock( + txn, + reader, + primary_key.clone(), + None, + MissingLockAction::rollback(rollback_if_not_exist), + resolving_pessimistic_lock, + )?; + let released = txn.unlock_key(primary_key, true, TimeStamp::zero()); + MVCC_CHECK_TXN_STATUS_COUNTER_VEC.pessimistic_rollback.inc(); + Ok((txn_status, released)) + } + _ => { + warn!("mismatch primary key and lock"; + "primary_key" => ?&primary_key, + "lock" => ?&lock, + "current_ts" => current_ts, + "resolving_pessimistic_lock" => ?resolving_pessimistic_lock, + "rollback_if_not_exist" => rollback_if_not_exist, + ); + // Return the current lock info to tell the client what the actual primary is. + Err( + ErrorInner::PrimaryMismatch(lock.into_lock_info(primary_key.into_raw()?)) + .into(), + ) + } + }; } // Never rollback or push forward min_commit_ts in check_txn_status if it's @@ -49,20 +161,24 @@ pub fn check_txn_status_lock_exists( } let is_pessimistic_txn = !lock.for_update_ts.is_zero(); - if lock.ts.physical() + lock.ttl < current_ts.physical() { - // If the lock is expired, clean it up. - // If the resolving and primary key lock are both pessimistic locks, just unlock - // the primary pessimistic lock and do not write rollback records. - return if resolving_pessimistic_lock && lock.lock_type == LockType::Pessimistic { - let released = txn.unlock_key(primary_key, is_pessimistic_txn, TimeStamp::zero()); - MVCC_CHECK_TXN_STATUS_COUNTER_VEC.pessimistic_rollback.inc(); - Ok((TxnStatus::PessimisticRollBack, released)) - } else { - let released = - rollback_lock(txn, reader, primary_key, &lock, is_pessimistic_txn, true)?; - MVCC_CHECK_TXN_STATUS_COUNTER_VEC.rollback.inc(); - Ok((TxnStatus::TtlExpire, released)) - }; + if lock.is_pessimistic_lock() { + let check_result = check_txn_status_from_pessimistic_primary_lock( + txn, + reader, + primary_key.clone(), + &lock, + current_ts, + resolving_pessimistic_lock, + )?; + // Return if the primary lock is stale or the transaction status is decided. + if let (Some(txn_status), Some(released_lock)) = check_result { + return Ok((txn_status, Some(released_lock))); + } + assert!(check_result.0.is_none() && check_result.1.is_none()); + } else if lock.ts.physical() + lock.ttl < current_ts.physical() { + let released = rollback_lock(txn, reader, primary_key, &lock, is_pessimistic_txn, true)?; + MVCC_CHECK_TXN_STATUS_COUNTER_VEC.rollback.inc(); + return Ok((TxnStatus::TtlExpire, released)); } // If lock.min_commit_ts is 0, it's not a large transaction and we can't push @@ -95,6 +211,28 @@ pub fn check_txn_status_lock_exists( Ok((TxnStatus::uncommitted(lock, min_commit_ts_pushed), None)) } +// Check transaction status from storage for the primary key, this function +// would have no impact on the transaction status, it is read only and would not +// write anything. The returned `TxnStatus` is Some(..) if it's already +// determined. +pub fn check_determined_txn_status( + reader: &mut SnapshotReader, + primary_key: &Key, +) -> Result> { + MVCC_CHECK_TXN_STATUS_COUNTER_VEC.get_commit_info.inc(); + match reader.get_txn_commit_record(primary_key)? { + TxnCommitRecord::SingleRecord { commit_ts, write } => { + if write.write_type == WriteType::Rollback { + Ok(Some(TxnStatus::RolledBack)) + } else { + Ok(Some(TxnStatus::committed(commit_ts))) + } + } + TxnCommitRecord::OverlappedRollback { .. } => Ok(Some(TxnStatus::RolledBack)), + TxnCommitRecord::None { .. } => Ok(None), + } +} + pub fn check_txn_status_missing_lock( txn: &mut MvccTxn, reader: &mut SnapshotReader, diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 8259991dde6..2f9e45bfe7f 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -6,7 +6,7 @@ use txn_types::{Key, TimeStamp, Write, WriteType}; use crate::storage::{ mvcc::{ metrics::{MVCC_CONFLICT_COUNTER, MVCC_DUPLICATE_CMD_COUNTER_VEC}, - ErrorInner, LockType, MvccTxn, ReleasedLock, Result as MvccResult, SnapshotReader, + ErrorInner, MvccTxn, ReleasedLock, Result as MvccResult, SnapshotReader, }, Snapshot, }; @@ -47,7 +47,7 @@ pub fn commit( // lock request, and the transaction need not to acquire this lock again(due to // WriteConflict). If the transaction is committed, we should remove the // pessimistic lock (like pessimistic_rollback) instead of committing. - if lock.lock_type == LockType::Pessimistic { + if lock.is_pessimistic_lock() { warn!( "rollback a pessimistic lock when trying to commit"; "key" => %key, @@ -92,7 +92,7 @@ pub fn commit( if !commit { // Rollback a stale pessimistic lock. This function must be called by // resolve-lock in this case. - assert_eq!(lock.lock_type, LockType::Pessimistic); + assert!(lock.is_pessimistic_lock()); return Ok(txn.unlock_key(key, lock.is_pessimistic_txn(), TimeStamp::zero())); } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index bb0c95eb935..fbaaec8ab43 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -185,6 +185,7 @@ pub fn prewrite_flashback_key( TimeStamp::zero(), 1, TimeStamp::zero(), + false, ), false, // Assuming flashback transactions won't participate any lock conflicts. ); diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 2fe53b32ccd..8821f2ec73f 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -341,7 +341,7 @@ impl<'a> PrewriteMutation<'a> { self.last_change_ts = lock.last_change_ts; self.versions_to_last_change = lock.versions_to_last_change; - if lock.lock_type == LockType::Pessimistic { + if lock.is_pessimistic_lock() { // TODO: remove it in future if !self.txn_props.is_pessimistic() { return Err(ErrorInner::LockTypeNotMatch { @@ -530,6 +530,7 @@ impl<'a> PrewriteMutation<'a> { for_update_ts_to_write, self.txn_props.txn_size, self.min_commit_ts, + false, ) .set_txn_source(self.txn_props.txn_source); // Only Lock needs to record `last_change_ts` in its write record, Put or Delete @@ -2556,6 +2557,7 @@ pub mod tests { ts.into(), 5, ts.into(), + false, ) .set_last_change(last_change_ts.into(), versions_to_last_change); engine diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index d21d47871d4..92985c4d90d 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -6,7 +6,7 @@ use txn_types::{Key, Lock, WriteType}; use crate::storage::{ kv::WriteData, lock_manager::LockManager, - mvcc::{LockType, MvccTxn, SnapshotReader, TimeStamp, TxnCommitRecord}, + mvcc::{MvccTxn, OverlappedWrite, ReleasedLock, SnapshotReader, TimeStamp, TxnCommitRecord}, txn::{ actions::check_txn_status::{collapse_prev_rollback, make_rollback}, commands::{ @@ -54,6 +54,89 @@ enum SecondaryLockStatus { RolledBack, } +// The returned `bool` indicates whether the rollback record should be written, +// it should be true if and only if the txn commit record is not found, thus +// a rollback record would be written later. +fn check_determined_txn_status( + reader: &mut ReaderWithStats<'_, S>, + key: &Key, +) -> Result<(SecondaryLockStatus, bool, Option)> { + match reader.get_txn_commit_record(key)? { + TxnCommitRecord::SingleRecord { commit_ts, write } => { + let status = if write.write_type != WriteType::Rollback { + SecondaryLockStatus::Committed(commit_ts) + } else { + SecondaryLockStatus::RolledBack + }; + // We needn't write a rollback once there is a write record for it: + // If it's a committed record, it cannot be changed. + // If it's a rollback record, it either comes from another + // check_secondary_lock (thus protected) or the client stops commit + // actively. So we don't need to make it protected again. + Ok((status, false, None)) + } + TxnCommitRecord::OverlappedRollback { .. } => { + Ok((SecondaryLockStatus::RolledBack, false, None)) + } + TxnCommitRecord::None { overlapped_write } => { + Ok((SecondaryLockStatus::RolledBack, true, overlapped_write)) + } + } +} + +fn check_status_from_lock( + txn: &mut MvccTxn, + reader: &mut ReaderWithStats<'_, S>, + lock: Lock, + key: &Key, + region_id: u64, +) -> Result<( + SecondaryLockStatus, + bool, + Option, + Option, +)> { + let mut overlapped_write = None; + if lock.is_pessimistic_lock_with_conflict() { + assert!(lock.is_pessimistic_lock()); + let (status, need_rollback, rollback_overlapped_write) = + check_determined_txn_status(reader, key)?; + // If there exists commit or rollback record, the pessimistic lock is stale, in + // this case the returned need_rollback is false. + if !need_rollback { + let released_lock = txn.unlock_key(key.clone(), true, TimeStamp::zero()); + return Ok(( + status, + need_rollback, + rollback_overlapped_write, + released_lock, + )); + } + overlapped_write = rollback_overlapped_write; + } + + if lock.is_pessimistic_lock() { + let released_lock = txn.unlock_key(key.clone(), true, TimeStamp::zero()); + // If the `is_pessimistic_lock_with_conflict` is true, the `overlapped_write` is + // already fetched in the above `check_determined_txn_status` call. So + // we don't need to fetch it again and the `overlapped_write` could be + // reused here. + let overlapped_write_res = if lock.is_pessimistic_lock_with_conflict() { + overlapped_write + } else { + reader.get_txn_commit_record(key)?.unwrap_none(region_id) + }; + Ok(( + SecondaryLockStatus::RolledBack, + true, + overlapped_write_res, + released_lock, + )) + } else { + Ok((SecondaryLockStatus::Locked(lock), false, None, None)) + } +} + impl WriteCommand for CheckSecondaryLocks { fn process_write(self, snapshot: S, context: WriteContext<'_, L>) -> Result { // It is not allowed for commit to overwrite a protected rollback. So we update @@ -76,40 +159,16 @@ impl WriteCommand for CheckSecondaryLocks { let (status, need_rollback, rollback_overlapped_write) = match reader.load_lock(&key)? { // The lock exists, the lock information is returned. Some(lock) if lock.ts == self.start_ts => { - if lock.lock_type == LockType::Pessimistic { - released_lock = txn.unlock_key(key.clone(), true, TimeStamp::zero()); - let overlapped_write = - reader.get_txn_commit_record(&key)?.unwrap_none(region_id); - (SecondaryLockStatus::RolledBack, true, overlapped_write) - } else { - (SecondaryLockStatus::Locked(lock), false, None) - } + let (status, need_rollback, rollback_overlapped_write, lock_released) = + check_status_from_lock(&mut txn, &mut reader, lock, &key, region_id)?; + released_lock = lock_released; + (status, need_rollback, rollback_overlapped_write) } // Searches the write CF for the commit record of the lock and returns the commit // timestamp (0 if the lock is not committed). l => { mismatch_lock = l; - match reader.get_txn_commit_record(&key)? { - TxnCommitRecord::SingleRecord { commit_ts, write } => { - let status = if write.write_type != WriteType::Rollback { - SecondaryLockStatus::Committed(commit_ts) - } else { - SecondaryLockStatus::RolledBack - }; - // We needn't write a rollback once there is a write record for it: - // If it's a committed record, it cannot be changed. - // If it's a rollback record, it either comes from another - // check_secondary_lock (thus protected) or the client stops commit - // actively. So we don't need to make it protected again. - (status, false, None) - } - TxnCommitRecord::OverlappedRollback { .. } => { - (SecondaryLockStatus::RolledBack, false, None) - } - TxnCommitRecord::None { overlapped_write } => { - (SecondaryLockStatus::RolledBack, true, overlapped_write) - } - } + check_determined_txn_status(&mut reader, &key)? } }; // If the lock does not exist or is a pessimistic lock, to prevent the @@ -346,5 +405,54 @@ pub mod tests { res => panic!("unexpected lock status: {:?}", res), } must_get_overlapped_rollback(&mut engine, b"k1", 15, 13, WriteType::Lock, Some(0)); + + // Lock CF has an stale pessimistic lock, the transaction is already committed + // or rolled back. + // + // LOCK CF | WRITE CF + // ------------------------------------ + // | 15: start_ts = 13 with overlapped rollback + // | 14: rollback + // | 11: rollback + // | 9: start_ts = 7 + // | 5: rollback + // | 3: start_ts = 1 + must_acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"key", + 7, + 7, + true, + false, + 10, + ) + .assert_locked_with_conflict(None, 15); + match check_secondary(b"k1", 7) { + SecondaryLocksStatus::Committed(ts) => { + assert!(ts.eq(&9.into())); + } + res => panic!("unexpected lock status: {:?}", res), + } + must_unlocked(&mut engine, b"k1"); + + // Lock CF has an pessimistic lock, the transaction status is not found + // in storage. + must_acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + b"k1", + b"key", + 8, + 8, + true, + false, + 10, + ) + .assert_locked_with_conflict(None, 15); + match check_secondary(b"k1", 8) { + SecondaryLocksStatus::RolledBack => {} + res => panic!("unexpected lock status: {:?}", res), + } + must_unlocked(&mut engine, b"k1"); } } diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index e915c0357d4..d8172a60091 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -113,6 +113,7 @@ impl WriteCommand for CheckTxnStatus { self.force_sync_commit, self.resolving_pessimistic_lock, self.verify_is_primary, + self.rollback_if_not_exist, )?, l => ( check_txn_status_missing_lock( @@ -163,6 +164,7 @@ pub mod tests { mvcc::tests::*, txn::{ self, + actions::acquire_pessimistic_lock::tests::acquire_pessimistic_lock_allow_lock_with_conflict, commands::{pessimistic_rollback, WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, @@ -296,6 +298,18 @@ pub mod tests { } } + fn pessimistic_rollback() -> impl FnOnce(TxnStatus) -> bool { + move |s| s == PessimisticRollBack + } + + fn ttl_expire() -> impl FnOnce(TxnStatus) -> bool { + move |s| s == TtlExpire + } + + fn lock_not_exist() -> impl FnOnce(TxnStatus) -> bool { + move |s| s == LockNotExist + } + #[test] fn test_check_async_commit_txn_status() { let do_test = |rollback_if_not_exist: bool| { @@ -1244,4 +1258,174 @@ pub mod tests { kvrpcpb::Op::Put, ); } + + #[test] + fn test_check_txn_status_resolving_primary_pessimistic_lock() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let k1 = b"k1"; + let v1 = b"v1"; + let k2 = b"k2"; + let v2 = b"v2"; + let ts = TimeStamp::compose; + + must_acquire_pessimistic_lock_with_ttl(&mut engine, k1, k1, ts(1, 0), ts(1, 0), 10); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k2, k1, ts(1, 0), ts(1, 0), 10); + must_pessimistic_prewrite_put( + &mut engine, + k1, + v1, + k1, + ts(1, 0), + ts(1, 0), + DoPessimisticCheck, + ); + must_pessimistic_prewrite_put( + &mut engine, + k2, + v2, + k1, + ts(1, 0), + ts(1, 0), + DoPessimisticCheck, + ); + must_commit(&mut engine, k1, ts(1, 0), ts(2, 0)); + + // 1. Test resolve the stale pessimistic primary lock. Note the force lock + // could succeed only if there's no corresponding rollback record. + must_acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + k1, + k1, + ts(1, 0), + ts(1, 0), + false, + false, + 10, + ) + .assert_locked_with_conflict(Some(v1), ts(2, 0)); + + // Try to resolve k2, the stale pessimistic lock is on k1, the check txn status + // result should be "committed". + must_success( + &mut engine, + k1, + ts(1, 0), + ts(5, 0), + ts(5, 0), + false, + false, + false, + committed(ts(2, 0)), + ); + must_commit(&mut engine, k2, ts(1, 0), ts(2, 0)); + + // 2. Test resolve the normal pessimistic primary lock. + must_acquire_pessimistic_lock_with_ttl(&mut engine, k1, k1, ts(11, 0), ts(11, 0), 10); + must_acquire_pessimistic_lock_with_ttl(&mut engine, k2, k1, ts(11, 0), ts(11, 0), 10); + + // 2.1 The secondary is pessimistic which means `resolving_pessimistic` is true, + // and the primary does not expire. + must_success( + &mut engine, + k1, + ts(11, 0), + ts(15, 0), + ts(15, 0), + false, + false, + false, + uncommitted(10, 0, false), + ); + + // 2.2 The secondary is pessimistic, the primary has expired. The primary + // pessimistic lock should be rolled back pessimsitically. + must_success( + &mut engine, + k1, + ts(11, 0), + ts(25, 0), + ts(25, 0), + false, + false, + true, + pessimistic_rollback(), + ); + + // 2.3 The secondary is prewrite lock, the primary has expired. The + // transaction would be rolled back with persist rollback record on primary key. + must_acquire_pessimistic_lock_with_ttl(&mut engine, k1, k1, ts(11, 0), ts(11, 0), 10); + must_success( + &mut engine, + k1, + ts(11, 0), + ts(25, 0), + ts(25, 0), + false, + false, + false, + ttl_expire(), + ); + must_get_rollback_protected(&mut engine, k1, ts(11, 0), true); + must_rollback(&mut engine, k2, ts(11, 0), false); + + // 3. The stale pessimistic lock is invalid whose primary key is not equal to + // the primary key of the resolving key. + must_acquire_pessimistic_lock_with_ttl(&mut engine, k2, k1, ts(12, 0), ts(12, 0), 10); + // 3.1 The primary key does match error is returned. + must_err( + &mut engine, + k2, + ts(12, 0), + ts(25, 0), + ts(25, 0), + false, + false, + true, + ); + // 3.2 The txn not found error is returned because rollback_if_not_exist is + // false. + must_err( + &mut engine, + k2, + ts(12, 0), + ts(25, 0), + ts(25, 0), + false, + false, + false, + ); + // 3.3 The invalid lock is pessimistically rolled back and the protected + // rollback is written. + must_success( + &mut engine, + k2, + ts(12, 0), + ts(25, 0), + ts(25, 0), + true, + false, + false, + lock_not_exist(), + ); + must_unlocked(&mut engine, k2); + must_get_rollback_protected(&mut engine, k2, ts(12, 0), true); + + // 4. The stale pessimistic lock request would succeed if there's no lock and + // rollback record. + must_prewrite_put(&mut engine, k1, v2, k1, ts(31, 0)); + must_commit(&mut engine, k1, ts(31, 0), ts(32, 0)); + acquire_pessimistic_lock_allow_lock_with_conflict( + &mut engine, + k1, + k1, + ts(11, 0), + ts(11, 0), + false, + false, + false, + false, + 10, + ) + .unwrap_err(); + } } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index b34399cb64a..4e0bf8c8c56 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -3,7 +3,7 @@ // #[PerformanceCriticalPath] use std::mem; -use txn_types::{Key, LockType, TimeStamp}; +use txn_types::{Key, TimeStamp}; use crate::storage::{ kv::WriteData, @@ -69,7 +69,7 @@ impl WriteCommand for PessimisticRollback { .into() )); let released_lock: MvccResult<_> = if let Some(lock) = reader.load_lock(&key)? { - if lock.lock_type == LockType::Pessimistic + if lock.is_pessimistic_lock() && lock.ts == self.start_ts && lock.for_update_ts <= self.for_update_ts { diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 146217e4b22..665e99fd6d4 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -2012,6 +2012,7 @@ mod tests { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ), )], Context::default(), diff --git a/src/storage/types.rs b/src/storage/types.rs index 7774dcda9ec..62e614587c8 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -122,6 +122,14 @@ impl TxnStatus { pub fn committed(commit_ts: TimeStamp) -> Self { Self::Committed { commit_ts } } + + // Returns if the transaction is already committed or rolled back. + pub fn is_decided(&self) -> bool { + matches!( + self, + TxnStatus::RolledBack | TxnStatus::TtlExpire | TxnStatus::Committed { .. } + ) + } } #[derive(Debug)] diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index d397d602d84..366684269f1 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -403,6 +403,7 @@ fn test_read_index_lock_checking_on_follower() { 10.into(), 1, 20.into(), + false, ) .use_async_commit(vec![]); // Set a memory lock which is in the coprocessor query range on the leader diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 16796cfa555..6e588099bd9 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1348,6 +1348,7 @@ fn test_merge_with_concurrent_pessimistic_locking() { min_commit_ts: 30.into(), last_change_ts: 15.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }, )]) .unwrap(); @@ -1437,6 +1438,7 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { min_commit_ts: 30.into(), last_change_ts: 15.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks @@ -1518,6 +1520,7 @@ fn test_retry_pending_prepare_merge_fail() { min_commit_ts: 30.into(), last_change_ts: 15.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks @@ -1594,6 +1597,7 @@ fn test_merge_pessimistic_locks_propose_fail() { min_commit_ts: 30.into(), last_change_ts: 15.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 943bdc874cf..cf38dcc92ce 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -708,6 +708,7 @@ fn test_read_index_lock_checking_on_follower() { 10.into(), 1, 20.into(), + false, ) .use_async_commit(vec![]); let guard = block_on(leader_cm.lock_key(&Key::from_raw(b"k1"))); @@ -787,6 +788,7 @@ fn test_read_index_lock_checking_on_false_leader() { 10.into(), 1, 20.into(), + false, ) .use_async_commit(vec![]); let guard = block_on(leader_cm.lock_key(&Key::from_raw(b"k1"))); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index ffcaa370936..12da88a89a4 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -952,6 +952,7 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { min_commit_ts: (commit_ts + 10).into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; let lock_c = PessimisticLock { primary: b"c".to_vec().into_boxed_slice(), @@ -961,6 +962,7 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { min_commit_ts: (commit_ts + 10).into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; { let mut locks = txn_ext.pessimistic_locks.write(); diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index a0e69108125..0e40d412eaf 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -568,6 +568,7 @@ fn test_concurrent_write_after_transfer_leader_invalidates_locks() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index ed4a8501188..e6f5b56aa92 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -136,6 +136,7 @@ fn test_delete_lock_proposed_after_proposing_locks_impl(transfer_msg_count: usiz min_commit_ts: 20.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }, )]) .unwrap(); @@ -215,6 +216,7 @@ fn test_delete_lock_proposed_before_proposing_locks() { min_commit_ts: 20.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }, )]) .unwrap(); @@ -299,6 +301,7 @@ fn test_read_lock_after_become_follower() { min_commit_ts: for_update_ts, last_change_ts: start_ts.prev(), versions_to_last_change: 1, + is_locked_with_conflict: false, }, )]) .unwrap(); diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 8c29ea8490d..02edf45cd31 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2324,6 +2324,7 @@ fn test_batch_request() { TimeStamp::zero(), 1, TimeStamp::zero(), + false, ); cluster.must_put_cf(CF_LOCK, lock_key.as_encoded(), lock.to_bytes().as_slice()); } diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index dda92230ec8..3b2c0f04c40 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1356,6 +1356,7 @@ fn test_propose_in_memory_pessimistic_locks() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks @@ -1374,6 +1375,7 @@ fn test_propose_in_memory_pessimistic_locks() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks @@ -1485,6 +1487,7 @@ fn test_merge_pessimistic_locks_repeated_merge() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; txn_ext .pessimistic_locks diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index ef368bbe0cb..1c3a016e0e0 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -835,6 +835,7 @@ fn test_leader_drop_with_pessimistic_lock() { min_commit_ts: 10.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }, )]) .unwrap(); diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 40189017645..66a77db318d 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -375,6 +375,7 @@ fn test_read_index_retry_lock_checking() { 10.into(), 1, 20.into(), + false, ) .use_async_commit(vec![]); let guard = block_on(leader_cm.lock_key(&Key::from_raw(b"k1"))); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 071856cbd29..45420b63c29 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -916,6 +916,7 @@ fn test_split_with_in_memory_pessimistic_locks() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; let lock_c = PessimisticLock { primary: b"c".to_vec().into_boxed_slice(), @@ -925,6 +926,7 @@ fn test_split_with_in_memory_pessimistic_locks() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; { let mut locks = txn_ext.pessimistic_locks.write(); diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index 6ed9b3c487b..1888e548ff8 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -273,6 +273,7 @@ fn test_propose_in_memory_pessimistic_locks() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; // Write a pessimistic lock to the in-memory pessimistic lock table. { @@ -316,6 +317,7 @@ fn test_memory_pessimistic_locks_status_after_transfer_leader_failure() { min_commit_ts: 30.into(), last_change_ts: 5.into(), versions_to_last_change: 3, + is_locked_with_conflict: false, }; // Write a pessimistic lock to the in-memory pessimistic lock table. txn_ext diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index e1efb366af1..30c28654fb8 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1348,6 +1348,7 @@ fn test_debug_scan_mvcc() { TimeStamp::zero(), 0, TimeStamp::zero(), + false, ) .to_bytes(); engine.put_cf(CF_LOCK, k.as_slice(), &v).unwrap(); @@ -1924,6 +1925,7 @@ fn test_with_memory_lock_cluster( 10.into(), 1, 20.into(), + false, ) .use_async_commit(vec![]); guard.with_lock(|l| { diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 72ea50fa184..1fb8075e10f 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -233,6 +233,7 @@ fn test_read_on_replica_check_memory_locks() { 10.into(), 1, 20.into(), + false, ); let guard = block_on(leader_cm.lock_key(&encoded_key)); guard.with_lock(|l| *l = Some(lock.clone())); From b1954b0d226bd050537cbd45d57b36a03c6fee55 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Mon, 29 May 2023 19:40:41 +0800 Subject: [PATCH 0713/1149] raftstore, metrics: add metrics for the number of peers in flashback state (#14774) ref tikv/tikv#14748 Add metrics for the number of peers in the flashback state. Signed-off-by: JmPotato Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/operation/command/admin/flashback.rs | 14 ++- components/raftstore/src/store/fsm/apply.rs | 8 ++ components/raftstore/src/store/metrics.rs | 5 + metrics/grafana/tikv_details.json | 112 +++++++++++++++++- 4 files changed, 136 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/flashback.rs b/components/raftstore-v2/src/operation/command/admin/flashback.rs index 15d9070de45..f45691a009a 100644 --- a/components/raftstore-v2/src/operation/command/admin/flashback.rs +++ b/components/raftstore-v2/src/operation/command/admin/flashback.rs @@ -7,7 +7,11 @@ use kvproto::{ raft_serverpb::RegionLocalState, }; use protobuf::Message; -use raftstore::{coprocessor::RegionChangeReason, store::metrics::PEER_ADMIN_CMD_COUNTER, Result}; +use raftstore::{ + coprocessor::RegionChangeReason, + store::metrics::{PEER_ADMIN_CMD_COUNTER, PEER_IN_FLASHBACK_STATE}, + Result, +}; use super::AdminCmdResult; use crate::{ @@ -46,12 +50,20 @@ impl Apply { match req.get_cmd_type() { AdminCmdType::PrepareFlashback => { PEER_ADMIN_CMD_COUNTER.prepare_flashback.success.inc(); + // First time enter into the flashback state, inc the counter. + if !region.is_in_flashback { + PEER_IN_FLASHBACK_STATE.inc() + } region.set_is_in_flashback(true); region.set_flashback_start_ts(req.get_prepare_flashback().get_start_ts()); } AdminCmdType::FinishFlashback => { PEER_ADMIN_CMD_COUNTER.finish_flashback.success.inc(); + // Leave the flashback state, dec the counter. + if region.is_in_flashback { + PEER_IN_FLASHBACK_STATE.dec() + } region.set_is_in_flashback(false); region.clear_flashback_start_ts(); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index e61ee006c83..0ade422573e 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2994,12 +2994,20 @@ where match req.get_cmd_type() { AdminCmdType::PrepareFlashback => { PEER_ADMIN_CMD_COUNTER.prepare_flashback.success.inc(); + // First time enter into the flashback state, inc the counter. + if !region.is_in_flashback { + PEER_IN_FLASHBACK_STATE.inc() + } region.set_is_in_flashback(true); region.set_flashback_start_ts(req.get_prepare_flashback().get_start_ts()); } AdminCmdType::FinishFlashback => { PEER_ADMIN_CMD_COUNTER.finish_flashback.success.inc(); + // Leave the flashback state, dec the counter. + if region.is_in_flashback { + PEER_IN_FLASHBACK_STATE.dec() + } region.set_is_in_flashback(false); region.clear_flashback_start_ts(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c69875ae998..eb6002c9a6f 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -875,4 +875,9 @@ lazy_static! { &["store"] ) .unwrap(); + + pub static ref PEER_IN_FLASHBACK_STATE: IntGauge = register_int_gauge!( + "tikv_raftstore_peer_in_flashback_state", + "Total number of peers in the flashback state" + ).unwrap(); } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index adb4aa34dcd..0ad9bbc4def 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -15982,8 +15982,8 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 118 + "x": 0, + "y": 36 }, "hiddenSeries": false, "id": 23763572060, @@ -16090,6 +16090,114 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "hiddenSeries": false, + "id": 23763573619, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_raftstore_peer_in_flashback_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_raftstore_peer_in_flashback_state", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Peer in Flashback State", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:70", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:71", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, From 0ea7fedc3ddf98770905a110d23b5612c2dda484 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 29 May 2023 20:14:41 +0800 Subject: [PATCH 0714/1149] raftstorev2: avoid unnecessary snapshot after split (#14830) close tikv/tikv#14787 raftstorev2: avoid unnecessary snapshot after split Signed-off-by: Neil Shen --- .../src/operation/command/admin/mod.rs | 2 +- .../src/operation/command/admin/split.rs | 66 ++++++++- .../raftstore-v2/src/operation/command/mod.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 3 +- .../raftstore-v2/src/operation/ready/mod.rs | 26 +++- .../src/operation/ready/snapshot.rs | 3 + components/raftstore-v2/src/raft/peer.rs | 12 +- components/raftstore/src/store/config.rs | 16 ++- components/raftstore/src/store/util.rs | 22 +-- src/server/config.rs | 3 +- .../integrations/config/dynamic/raftstore.rs | 4 +- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + .../raftstore/test_replica_read.rs | 7 +- tests/integrations/raftstore/test_snap.rs | 6 + .../raftstore/test_split_region.rs | 136 +++++++++++++++++- 16 files changed, 281 insertions(+), 29 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index b3778d359c8..0491eee5470 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -37,7 +37,7 @@ use slog::{error, info}; use split::SplitResult; pub use split::{ report_split_init_finish, temp_split_path, RequestHalfSplit, RequestSplit, SplitFlowControl, - SplitInit, SPLIT_PREFIX, + SplitInit, SplitPendingAppend, SPLIT_PREFIX, }; use tikv_util::{box_err, log::SlogFormat, slog_panic}; use txn_types::WriteBatchFlags; diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index d5923227ffa..395a2cfd2be 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -20,7 +20,7 @@ //! parent peer, then send to the store the relevant info needed to create and //! initialize the split regions. //! -//! Split peer creation and initlization: +//! Split peer creation and initialization: //! - on_split_init: In normal cases, the uninitialized split region will be //! created by the store, and here init it using the data sent from the parent //! peer. @@ -36,7 +36,7 @@ use kvproto::{ metapb::{self, Region, RegionEpoch}, pdpb::CheckPolicy, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, - raft_serverpb::RaftSnapshotData, + raft_serverpb::{RaftMessage, RaftSnapshotData}, }; use protobuf::Message; use raft::{prelude::Snapshot, INVALID_ID}; @@ -178,6 +178,32 @@ impl SplitFlowControl { } } +pub struct SplitPendingAppend { + append_msg: Option<(Box, Instant)>, + range_overlapped: bool, +} + +impl SplitPendingAppend { + pub fn set_range_overlapped(&mut self, range_overlapped: bool) { + if self.range_overlapped { + self.range_overlapped = range_overlapped; + } + } + + pub fn take_append_message(&mut self) -> Option> { + self.append_msg.take().map(|(msg, _)| msg) + } +} + +impl Default for SplitPendingAppend { + fn default() -> SplitPendingAppend { + SplitPendingAppend { + append_msg: None, + range_overlapped: true, + } + } +} + pub fn temp_split_path(registry: &TabletRegistry, region_id: u64) -> PathBuf { let tablet_name = registry.tablet_name(SPLIT_PREFIX, region_id, RAFT_INIT_LOG_INDEX); registry.tablet_root().join(tablet_name) @@ -549,6 +575,42 @@ async fn async_checkpoint( } impl Peer { + pub fn ready_to_handle_first_append_message( + &mut self, + store_ctx: &mut StoreContext, + msg: &RaftMessage, + ) -> bool { + // The peer does not overlap with other regions. It means the parent + // region in this node might be stale and has been removed, so there is + // no split init and messages need to be handled immediately. + if !self.split_pending_append_mut().range_overlapped { + return true; + } + + if self.split_pending_append_mut().append_msg.is_none() { + self.split_pending_append_mut() + .append_msg + .replace((msg.clone().into(), Instant::now_coarse())); + return false; + } + let logger = self.logger.clone(); + let append_msg = &mut self.split_pending_append_mut().append_msg; + let dur = append_msg.as_ref().unwrap().1.saturating_elapsed(); + if dur < store_ctx.cfg.snap_wait_split_duration.0 { + append_msg.as_mut().unwrap().0 = msg.clone().into(); + // We consider a message is too early if it is replaced. + store_ctx + .raft_metrics + .message_dropped + .region_nonexistent + .inc(); + return false; + } + append_msg.take(); + warn!(logger, "handle first message now, split may be slow"; "duration" => ?dur); + true + } + pub fn on_apply_res_split( &mut self, store_ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index db702aef6b8..1099aeb11f0 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -68,7 +68,7 @@ mod write; pub use admin::{ merge_source_path, report_split_init_finish, temp_split_path, AdminCmdResult, CatchUpLogs, CompactLogContext, MergeContext, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, - MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, + SplitPendingAppend, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; pub use control::ProposalControl; use pd_client::{BucketMeta, BucketStat}; diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 8d8c24fee19..5da7ec3c242 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -13,7 +13,8 @@ pub use command::{ merge_source_path, AdminCmdResult, ApplyFlowControl, CatchUpLogs, CommittedEntries, CompactLogContext, MergeContext, ProposalControl, RequestHalfSplit, RequestSplit, SimpleWriteBinary, SimpleWriteEncoder, SimpleWriteReqDecoder, SimpleWriteReqEncoder, - SplitFlowControl, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, + SplitFlowControl, SplitPendingAppend, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, + SPLIT_PREFIX, }; pub use life::{AbnormalPeerContext, DestroyProgress, GcPeerContext}; pub use ready::{ diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 30f08285926..6a373ac3921 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -34,7 +34,6 @@ use std::{ use engine_traits::{KvEngine, RaftEngine}; use error_code::ErrorCodeExt; use kvproto::{ - metapb, raft_cmdpb::AdminCmdType, raft_serverpb::{ExtraMessageType, RaftMessage}, }; @@ -43,8 +42,9 @@ use raft::{eraftpb, prelude::MessageType, Ready, SnapshotStatus, StateRole, INVA use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{ + fsm::store::StoreRegionMeta, needs_evict_entry_cache, - util::{self, is_initial_msg}, + util::{self, is_first_append_entry, is_initial_msg}, worker_metrics::SNAP_COUNTER, FetchedLogs, ReadProgress, Transport, WriteCallback, WriteTask, }, @@ -370,6 +370,25 @@ impl Peer { // It prevents cache peer(0,0) which is sent by region split. self.insert_peer_cache(from_peer); } + + // Delay first append message and wait for split snapshot, + // so that slow split does not trigger leader to send a snapshot. + if !self.storage().is_initialized() { + if is_initial_msg(msg.get_message()) { + let mut is_overlapped = false; + let meta = ctx.store_meta.lock().unwrap(); + meta.search_region(msg.get_start_key(), msg.get_end_key(), |_| { + is_overlapped = true; + }); + self.split_pending_append_mut() + .set_range_overlapped(is_overlapped); + } else if is_first_append_entry(msg.get_message()) + && !self.ready_to_handle_first_append_message(ctx, &msg) + { + return; + } + } + let pre_committed_index = self.raft_group().raft.raft_log.committed; if msg.get_message().get_msg_type() == MessageType::MsgTransferLeader { self.on_transfer_leader_msg(ctx, msg.get_message(), msg.disk_usage) @@ -462,7 +481,6 @@ impl Peer { return None; } }; - let to_peer_is_learner = to_peer.get_role() == metapb::PeerRole::Learner; let mut raft_msg = self.prepare_raft_message(); @@ -489,7 +507,7 @@ impl Peer { // and Heartbeat message for the store of that peer to check whether to create a // new peer when receiving these messages, or just to wait for a pending region // split to perform later. - if self.storage().is_initialized() && is_initial_msg(&msg) && to_peer_is_learner { + if self.storage().is_initialized() && is_initial_msg(&msg) { let region = self.region(); raft_msg.set_start_key(region.get_start_key().to_vec()); raft_msg.set_end_key(region.get_end_key().to_vec()); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index b2a9be988e6..6598fa883e6 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -307,6 +307,9 @@ impl Peer { .tablet .schedule(tablet::Task::destroy(region_id, snapshot_index)); } + if let Some(msg) = self.split_pending_append_mut().take_append_message() { + let _ = ctx.router.send_raft_message(msg); + } } } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index eeb47500fbf..22565fb3b41 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -36,7 +36,7 @@ use crate::{ operation::{ AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, GcPeerContext, MergeContext, ProposalControl, ReplayWatch, SimpleWriteReqEncoder, - SplitFlowControl, TxnContext, + SplitFlowControl, SplitPendingAppend, TxnContext, }, router::{ApplyTask, CmdResChannel, PeerTick, QueryResChannel}, Result, @@ -100,6 +100,11 @@ pub struct Peer { // Trace which peers have not finished split. split_trace: Vec<(u64, HashSet)>, split_flow_control: SplitFlowControl, + /// `MsgAppend` messages from newly split leader should be step after peer + /// steps snapshot from split, otherwise leader may send an unnecessary + /// snapshot. So the messages are recorded temporarily and will be handled + /// later. + split_pending_append: SplitPendingAppend, /// Apply related State changes that needs to be persisted to raft engine. /// @@ -199,6 +204,7 @@ impl Peer { proposal_control: ProposalControl::new(0), pending_ticks: Vec::new(), split_trace: vec![], + split_pending_append: SplitPendingAppend::default(), state_changes: None, flush_state, sst_apply_state, @@ -796,6 +802,10 @@ impl Peer { &mut self.split_trace } + pub fn split_pending_append_mut(&mut self) -> &mut SplitPendingAppend { + &mut self.split_pending_append + } + #[inline] pub fn flush_state(&self) -> &Arc { &self.flush_state diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index ecdfbe85d3f..6b13fcdd064 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -20,7 +20,7 @@ use tikv_util::{ use time::Duration as TimeDuration; use super::worker::{RaftStoreBatchComponent, RefreshConfigTask}; -use crate::Result; +use crate::{coprocessor::config::RAFTSTORE_V2_SPLIT_SIZE, Result}; lazy_static! { pub static ref CONFIG_RAFTSTORE_GAUGE: prometheus::GaugeVec = register_gauge_vec!( @@ -31,6 +31,15 @@ lazy_static! { .unwrap(); } +#[doc(hidden)] +pub const DEFAULT_SNAP_MAX_BYTES_PER_SEC: u64 = 100 * 1024 * 1024; + +// The default duration of waiting split. If a split does not finish in +// one-third of receiving snapshot time, split is likely very slow, so it is +// better to prioritize accepting a snapshot +const DEFAULT_SNAP_WAIT_SPLIT_DURATION: ReadableDuration = + ReadableDuration::secs(RAFTSTORE_V2_SPLIT_SIZE.0 / DEFAULT_SNAP_MAX_BYTES_PER_SEC / 3); + with_prefix!(prefix_apply "apply-"); with_prefix!(prefix_store "store-"); #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] @@ -118,6 +127,10 @@ pub struct Config { pub pd_store_heartbeat_tick_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, pub snap_gc_timeout: ReadableDuration, + /// The duration of snapshot waits for region split. It prevents leader from + /// sending unnecessary snapshots when split is slow. + /// It is only effective in raftstore v2. + pub snap_wait_split_duration: ReadableDuration, pub lock_cf_compact_interval: ReadableDuration, pub lock_cf_compact_bytes_threshold: ReadableSize, @@ -386,6 +399,7 @@ impl Default for Config { notify_capacity: 40960, snap_mgr_gc_tick_interval: ReadableDuration::minutes(1), snap_gc_timeout: ReadableDuration::hours(4), + snap_wait_split_duration: DEFAULT_SNAP_WAIT_SPLIT_DURATION, messages_per_tick: 4096, max_peer_down_duration: ReadableDuration::minutes(10), max_leader_missing_duration: ReadableDuration::hours(2), diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 539dfa22403..3a8ad8278fb 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -98,13 +98,13 @@ fn is_first_vote_msg(msg: &eraftpb::Message) -> bool { /// received but there is no such region in `Store::region_peers`. In this case /// we should put `msg` into `pending_msg` instead of create the peer. #[inline] -fn is_first_append_entry(msg: &eraftpb::Message) -> bool { +pub fn is_first_append_entry(msg: &eraftpb::Message) -> bool { match msg.get_msg_type() { MessageType::MsgAppend => { - let ent = msg.get_entries(); - ent.len() == 1 - && ent[0].data.is_empty() - && ent[0].index == peer_storage::RAFT_INIT_LOG_INDEX + 1 + let entries = msg.get_entries(); + !entries.is_empty() + && entries[0].data.is_empty() + && entries[0].index == peer_storage::RAFT_INIT_LOG_INDEX + 1 } _ => false, } @@ -2075,12 +2075,12 @@ mod tests { for (msg_type, index, is_append) in tbl { let mut msg = Message::default(); msg.set_msg_type(msg_type); - let ent = { - let mut e = Entry::default(); - e.set_index(index); - e - }; - msg.set_entries(vec![ent].into()); + let mut ent = Entry::default(); + ent.set_index(index); + msg.mut_entries().push(ent.clone()); + assert_eq!(is_first_append_entry(&msg), is_append); + ent.set_index(index + 1); + msg.mut_entries().push(ent); assert_eq!(is_first_append_entry(&msg), is_append); } } diff --git a/src/server/config.rs b/src/server/config.rs index d954ebac36f..f84d93ef4a5 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -6,6 +6,7 @@ use collections::HashMap; use engine_traits::{perf_level_serde, PerfLevel}; use grpcio::{CompressionAlgorithms, ResourceQuota}; use online_config::{ConfigChange, ConfigManager, OnlineConfig}; +use raftstore::store::config::DEFAULT_SNAP_MAX_BYTES_PER_SEC; pub use raftstore::store::Config as RaftStoreConfig; use regex::Regex; use tikv_util::{ @@ -42,8 +43,6 @@ const DEFAULT_ENDPOINT_STREAM_BATCH_ROW_LIMIT: usize = 128; // At least 4 long coprocessor requests are allowed to run concurrently. const MIN_ENDPOINT_MAX_CONCURRENCY: usize = 4; -const DEFAULT_SNAP_MAX_BYTES_PER_SEC: u64 = 100 * 1024 * 1024; - const DEFAULT_MAX_GRPC_SEND_MSG_LEN: i32 = 10 * 1024 * 1024; /// A clone of `grpc::CompressionAlgorithms` with serde supports. diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 003d63d9a47..1748ad4c291 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -27,7 +27,7 @@ use tikv::{ import::SstImporter, }; use tikv_util::{ - config::{ReadableSize, VersionTrack}, + config::{ReadableDuration, ReadableSize, VersionTrack}, worker::{dummy_scheduler, LazyWorker, Worker}, }; @@ -163,6 +163,7 @@ fn test_update_raftstore_config() { ("raftstore.store-max-batch-size", "4321"), ("raftstore.raft-entry-max-size", "32MiB"), ("raftstore.apply-yield-write-size", "10KiB"), + ("raftstore.snap-wait-split-duration", "10s"), ]); cfg_controller.update(change).unwrap(); @@ -176,6 +177,7 @@ fn test_update_raftstore_config() { raft_store.store_batch_system.max_batch_size = Some(4321); raft_store.raft_max_size_per_msg = ReadableSize::mb(128); raft_store.raft_entry_max_size = ReadableSize::mb(32); + raft_store.snap_wait_split_duration = ReadableDuration::secs(10); let validate_store_cfg = |raft_cfg: &Config| { let raftstore_cfg = raft_cfg.clone(); validate_store(&router, move |cfg: &Config| { diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 661be858964..890fc76e206 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -204,6 +204,7 @@ fn test_serde_custom_tikv_config() { notify_capacity: 12_345, snap_mgr_gc_tick_interval: ReadableDuration::minutes(12), snap_gc_timeout: ReadableDuration::hours(12), + snap_wait_split_duration: ReadableDuration::hours(12), messages_per_tick: 12_345, max_peer_down_duration: ReadableDuration::minutes(12), max_leader_missing_duration: ReadableDuration::hours(12), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 6f5d9dc6cdc..a1fa7169a57 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -173,6 +173,7 @@ pd-heartbeat-tick-interval = "12m" pd-store-heartbeat-tick-interval = "12s" snap-mgr-gc-tick-interval = "12m" snap-gc-timeout = "12h" +snap-wait-split-duration = "12h" lock-cf-compact-interval = "12m" lock-cf-compact-bytes-threshold = "123MB" notify-capacity = 12345 diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 66a77db318d..441e14cf3d8 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -452,15 +452,18 @@ fn test_split_isolation() { cluster.run_node(2).unwrap(); // Originally leader of region ['', 'k2'] will go to sleep, so the learner peer // cannot be created. - for _ in 0..10 { + let start = Instant::now(); + loop { let resp = async_read_on_peer(&mut cluster, peer.clone(), r2.clone(), b"k1", true, true); let resp = block_on_timeout(resp, Duration::from_secs(1)).unwrap(); if !resp.get_header().has_error() { return; } + if start.saturating_elapsed() > Duration::from_secs(5) { + panic!("test failed: {:?}", resp); + } thread::sleep(Duration::from_millis(200)); } - panic!("test failed"); } /// Testing after applying snapshot, the `ReadDelegate` stored at `StoreMeta` diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index d69f4aa70f0..465c54d9a77 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -236,6 +236,12 @@ fn test_concurrent_snap() { cluster.cfg.rocksdb.titan.enabled = true; // Disable raft log gc in this test case. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); + // For raftstore v2, after split, follower delays first messages (see + // is_first_message() for details), so leader does not send snapshot to + // follower and CollectSnapshotFilter holds parent region snapshot forever. + // We need to set a short wait duration so that leader can send snapshot + // in time and thus CollectSnapshotFilter can send parent region snapshot. + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::millis(100); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 45420b63c29..89558f16319 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1266,9 +1266,9 @@ fn test_catch_up_peers_after_split() { } } -#[test] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_split_region_keep_records() { - let mut cluster = test_raftstore_v2::new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); @@ -1301,3 +1301,135 @@ fn test_split_region_keep_records() { region_state ); } + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_slow_split_does_not_cause_snapshot() { + // We use three nodes([1, 2, 3]) for this test. + let mut cluster = new_cluster(0, 3); + configure_for_lease_read(&mut cluster.cfg, None, Some(5000)); + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::hours(1); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + cluster.must_transfer_leader(region_id, new_peer(3, 3)); + cluster.must_put(b"k2", b"v2"); + cluster.must_transfer_leader(region_id, new_peer(1, 1)); + + // isolate node 3 for region 1. + cluster.add_recv_filter_on_node(3, Box::new(RegionPacketFilter::new(1, 3))); + + let (notify_tx, notify_rx) = std::sync::mpsc::channel(); + cluster.add_send_filter_on_node( + 1, + Box::new(MessageTypeNotifier::new( + MessageType::MsgSnapshot, + notify_tx, + Arc::new(std::sync::atomic::AtomicBool::new(true)), + )), + ); + + // split (-inf, +inf) -> (-inf, k1), [k1, +inf] + let region = pd_client.get_region(b"").unwrap(); + cluster.must_split(®ion, b"k1"); + + // Leader must not send snapshot to new peer on node 3. + notify_rx.recv_timeout(Duration::from_secs(3)).unwrap_err(); + cluster.must_put(b"k0", b"v0"); + // ... even after node 3 applied split. + cluster.clear_recv_filter_on_node(3); + + let new_region = pd_client.get_region(b"").unwrap(); + let new_peer3 = find_peer(&new_region, 3).unwrap(); + cluster.must_transfer_leader(new_region.get_id(), new_peer3.clone()); + + notify_rx.try_recv().unwrap_err(); +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_slow_split_does_not_prevent_snapshot() { + // We use three nodes([1, 2, 3]) for this test. + let mut cluster = new_cluster(0, 3); + configure_for_lease_read(&mut cluster.cfg, None, Some(5000)); + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::secs(2); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + cluster.must_transfer_leader(region_id, new_peer(3, 3)); + cluster.must_put(b"k2", b"v2"); + cluster.must_transfer_leader(region_id, new_peer(1, 1)); + + // isolate node 3 for region 1. + cluster.add_recv_filter_on_node(3, Box::new(RegionPacketFilter::new(1, 3))); + + let (notify_tx, notify_rx) = std::sync::mpsc::channel(); + cluster.add_send_filter_on_node( + 1, + Box::new(MessageTypeNotifier::new( + MessageType::MsgSnapshot, + notify_tx, + Arc::new(std::sync::atomic::AtomicBool::new(true)), + )), + ); + + // split (-inf, +inf) -> (-inf, k1), [k1, +inf] + let region = pd_client.get_region(b"").unwrap(); + cluster.must_split(®ion, b"k1"); + + // Leader must not send snapshot to new peer on node 3. + notify_rx + .recv_timeout(cluster.cfg.raft_store.snap_wait_split_duration.0 / 2) + .unwrap_err(); + + // A follower can receive a snapshot from leader if split is really slow. + thread::sleep(2 * cluster.cfg.raft_store.snap_wait_split_duration.0); + let new_region = pd_client.get_region(b"").unwrap(); + let new_peer3 = find_peer(&new_region, 3).unwrap(); + cluster.must_transfer_leader(new_region.get_id(), new_peer3.clone()); + + notify_rx.try_recv().unwrap(); +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_slow_split_does_not_prevent_leader_election() { + // We use three nodes([1, 2, 3]) for this test. + let mut cluster = new_cluster(0, 3); + configure_for_lease_read(&mut cluster.cfg, None, Some(5000)); + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::hours(1); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + + // Do not let node 2 and 3 split. + cluster.add_recv_filter_on_node(2, Box::new(EraseHeartbeatCommit)); + cluster.add_recv_filter_on_node(3, Box::new(EraseHeartbeatCommit)); + + let (notify_tx, notify_rx) = std::sync::mpsc::channel(); + cluster.add_recv_filter_on_node( + 1, + Box::new(MessageTypeNotifier::new( + MessageType::MsgRequestVoteResponse, + notify_tx, + Arc::new(std::sync::atomic::AtomicBool::new(true)), + )), + ); + + // split (-inf, +inf) -> (-inf, k1), [k1, +inf] + let region = pd_client.get_region(b"").unwrap(); + cluster.must_split(®ion, b"k1"); + + // Node 1 must receive request vote response twice. + notify_rx.recv_timeout(Duration::from_secs(1)).unwrap(); + notify_rx.recv_timeout(Duration::from_secs(1)).unwrap(); + + cluster.must_put(b"k0", b"v0"); +} From 41da1b2161b736da12a532f67e6f84ca079ef634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 30 May 2023 16:40:41 +0800 Subject: [PATCH 0715/1149] util: evict until fit when inserting (#14847) close tikv/tikv#14815 LRU cache now will try to make its size strictly less than the capacity. Signed-off-by: hillium Co-authored-by: qupeng --- components/cdc/src/old_value.rs | 32 ++++++++++++++++++++++ components/tikv_util/src/lru.rs | 47 +++++++++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index d91266c92c2..e343ccc226f 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -615,4 +615,36 @@ mod tests { let perf_delta = perf_instant.delta(); assert_eq!(perf_delta.block_read_count, 1); } + + #[test] + fn test_old_value_capacity_not_exceed_quota() { + let mut cache = OldValueCache::new(ReadableSize(1000)); + fn short_val() -> OldValue { + OldValue::Value { + value: b"s".to_vec(), + } + } + fn long_val() -> OldValue { + OldValue::Value { + value: vec![b'l'; 1024], + } + } + fn enc(i: i32) -> Key { + Key::from_encoded(i32::to_ne_bytes(i).to_vec()) + } + + for i in 0..100 { + cache.insert(enc(i), (short_val(), None)); + } + for i in 100..200 { + // access the previous key for making it not be evicted + cache.cache.get(&enc(i - 1)); + cache.insert(enc(i), (long_val(), None)); + } + assert!( + cache.cache.size() <= 1000, + "but it is {}", + cache.cache.size() + ); + } } diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index 2488fe7ef36..76fad6e8a34 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -245,7 +245,6 @@ where let current_size = SizePolicy::::current(&self.size_policy); match self.map.entry(key) { HashMapEntry::Occupied(mut e) => { - // TODO: evict entries if size exceeds capacity. self.size_policy.on_remove(e.key(), &e.get().value); self.size_policy.on_insert(e.key(), &value); let mut entry = e.get_mut(); @@ -254,7 +253,6 @@ where } HashMapEntry::Vacant(v) => { let record = if self.capacity <= current_size { - // TODO: evict not only one entry to fit capacity. let res = self.trace.reuse_tail(v.key().clone()); old_key = Some(res.0); res.1 @@ -270,6 +268,28 @@ where let entry = self.map.remove(&o).unwrap(); self.size_policy.on_remove(&o, &entry.value); } + + // NOTE: now when inserting a value larger than the capacity, actually this + // implementation will clean the whole cache. + // Perhaps we can reject entries larger than capacity goes in the LRU cache, but + // that is impossible for now: the `SizePolicy` trait doesn't provide the + // interface of querying the actual size of an item. + self.evict_until_fit() + } + + fn evict_until_fit(&mut self) { + let cap = self.capacity; + loop { + let current_size = self.size_policy.current(); + // Should we keep at least one entry? So our users won't lose their fresh record + // once it exceeds the capacity. + if current_size <= cap || self.map.is_empty() { + break; + } + let key = self.trace.remove_tail(); + let val = self.map.remove(&key).unwrap(); + self.size_policy.on_remove(&key, &val.value); + } } #[inline] @@ -583,4 +603,27 @@ mod tests { assert_eq!(map.get(&i), Some(&vec![b' '])); } } + + #[test] + fn test_oversized() { + let mut cache = LruCache::with_capacity_sample_and_trace(42, 0, TestTracker(0)); + cache.insert( + 42, + b"this is the answer... but will it being inserted?".to_vec(), + ); + assert!(cache.size() <= 42); + cache.insert(42, b"Aha, perhaps an shorter answer.".to_vec()); + assert!(cache.size() <= 42); + cache.insert(43, b"Yet a new challenger.".to_vec()); + assert!(cache.size() <= 42); + + for i in 0..100 { + cache.insert(i, vec![i as _]); + assert!(cache.size() <= 42); + } + for i in 90..200 { + cache.insert(i, vec![i as _; 8]); + assert!(cache.size() <= 42); + } + } } From e159200cb2458c3744ffbab9bbf284e71f03b866 Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 31 May 2023 12:24:41 +0800 Subject: [PATCH 0716/1149] tikv-ctl: add tikv-ctl fork-readonly-tikv (#14841) close tikv/tikv#14357 tikv-ctl: add tikv-ctl reuse-readonly-remains Signed-off-by: qupeng --- Cargo.lock | 8 + cmd/tikv-ctl/Cargo.toml | 1 + cmd/tikv-ctl/src/cmd.rs | 35 ++ cmd/tikv-ctl/src/fork_readonly_tikv.rs | 318 ++++++++++++++++++ cmd/tikv-ctl/src/main.rs | 138 +++++++- cmd/tikv-server/Cargo.toml | 7 + components/raftstore/src/store/config.rs | 9 + components/raftstore/src/store/snap.rs | 53 ++- .../raftstore/src/store/worker/region.rs | 3 + components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/util.rs | 78 ++++- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 13 files changed, 633 insertions(+), 21 deletions(-) create mode 100644 cmd/tikv-ctl/src/fork_readonly_tikv.rs diff --git a/Cargo.lock b/Cargo.lock index e9051b685a3..ccb01f6cf57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6746,6 +6746,7 @@ dependencies = [ "prometheus", "protobuf", "raft", + "raft-engine", "raft-engine-ctl", "raft_log_engine", "raftstore", @@ -6806,9 +6807,16 @@ version = "0.0.1" dependencies = [ "cc", "clap 2.33.0", + "encryption_export", + "engine_traits", + "keys", + "kvproto", + "raft-engine", + "regex", "serde_json", "server", "tikv", + "tikv_util", "time 0.1.42", "toml", ] diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 5ea6174d2d3..3af251fa86b 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -70,6 +70,7 @@ pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft-engine = { git = "https://github.com/tikv/raft-engine.git" } raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } raft_log_engine = { workspace = true } raftstore = { workspace = true } diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index b4d74ae7db6..d18367597fe 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -565,6 +565,41 @@ pub enum Cmd { }, #[structopt(external_subcommand)] External(Vec), + /// Usage: tikv-ctl show-cluster-id --config + ShowClusterId { + /// Data directory path of the given TiKV instance. + #[structopt(long)] + data_dir: String, + }, + /// Usage: tikv-ctl fork-readonly-tikv + /// + /// fork-readonly-tikv is for creating a tikv-server agent based on a + /// read-only TiKV remains. The agent can be used for recovery because + /// all committed transactions can be accessed correctly, without any + /// modifications on the remained TiKV. + /// + /// NOTE: The remained TiKV can't run concurrently with the agent. + ReuseReadonlyRemains { + /// Data directory path of the remained TiKV. + #[structopt(long)] + data_dir: String, + + /// Data directory to create the agent. + #[structopt(long)] + agent_dir: String, + + /// Reuse snapshot files of the remained TiKV: symlink or copy. + #[structopt(long, default_value = "symlink")] + snaps: String, + + /// Reuse rocksdb files of the remained TiKV: symlink or copy. + /// + /// NOTE: the last one WAL file will still be copied even if `symlink` + /// is specified, because the last one WAL file isn't read-only when + /// opening a RocksDB instance. + #[structopt(long, default_value = "symlink")] + rocksdb_files: String, + }, } #[derive(StructOpt)] diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs new file mode 100644 index 00000000000..ef3ae7f8023 --- /dev/null +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -0,0 +1,318 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fs::ReadDir, + path::{Path, PathBuf}, + process, + sync::Arc, +}; + +use encryption_export::data_key_manager_from_config; +use raft_engine::{env::DefaultFileSystem, Engine as RaftEngine}; +use regex::Regex; +use tikv::config::TikvConfig; + +pub const SYMLINK: &str = "symlink"; +pub const COPY: &str = "copy"; + +pub fn run(config: &TikvConfig, agent_dir: &str, reuse_snaps: &str, reuse_rocksdb_files: &str) { + if data_key_manager_from_config(&config.security.encryption, &config.storage.data_dir) + .unwrap() + .is_some() + { + eprintln!("reuse_redonly_remains with encryption enabled isn't expected"); + process::exit(-1); + } + + if let Err(e) = create_dir(agent_dir) { + eprintln!("create agent directory fail: {}", e); + process::exit(-1); + } + println!("create agent directory success"); + + if let Err(e) = dup_snaps(config, agent_dir, reuse_snaps == "symlink") { + eprintln!("dup snapshot files fail: {}", e); + process::exit(-1); + } + println!("dup snapshot files success"); + + if let Err(e) = dup_kv_engine_files(config, agent_dir, reuse_rocksdb_files == "symlink") { + eprintln!("dup kv engine files fail: {}", e); + process::exit(-1); + } + println!("dup kv engine files success"); + + if let Err(e) = dup_raft_engine_files(config, agent_dir, reuse_rocksdb_files == "symlink") { + eprintln!("dup raft engine fail: {}", e); + process::exit(-1); + } + println!("dup raft engine success"); +} + +const SNAP_NAMES: &str = "^.+\\.(meta|sst)$"; +const ROCKSDB_WALS: &str = "^([0-9]+).log$"; + +/// Create a directory at `path`, return an error if exists. +fn create_dir>(path: P) -> Result<(), String> { + let path = path.as_ref(); + std::fs::create_dir(path).map_err(|e| format!("create_dir({}): {}", path.display(), e)) +} + +/// Symlink or copy snapshot files from the original TiKV instance (specified by +/// `config`) to `agent_dir`. TiKV may try to change snapshot files, which can +/// be avoid by setting `raftstore::store::config::snap_apply_copy_symlink` to +/// `true`. +fn dup_snaps(config: &TikvConfig, agent_dir: &str, use_symlink: bool) -> Result<(), String> { + let mut src = PathBuf::from(&config.storage.data_dir); + src.push("snap"); + let mut dst = PathBuf::from(agent_dir); + dst.push("snap"); + create_dir(&dst)?; + + let ptn = Regex::new(SNAP_NAMES).unwrap(); + reuse_stuffs( + &src, + &dst, + |name| -> bool { ptn.is_match(name) }, + use_symlink, + ) +} + +/// Symlink or copy KV engine files from the original TiKV instance (specified +/// by `config`) to `agent_dir`. Then `agent_dir` can be used to run a new TiKV +/// instance, without any modifications on the original TiKV data. +// There are 3 types of files in RocksDB: +// * SST files, which won't be changed in any cases; +// * WAL files, which is named with a sequence ID; all won't be changed except +// the last one. +// * Manifest files, which won't be changed in any cases. +fn dup_kv_engine_files( + config: &TikvConfig, + agent_dir: &str, + use_symlink: bool, +) -> Result<(), String> { + let mut dst_config = TikvConfig::default(); + dst_config.storage.data_dir = agent_dir.to_owned(); + let dst = dst_config.infer_kv_engine_path(None).unwrap(); + create_dir(&dst)?; + + // Firstly, dup all files except LOCK. + let src = config.infer_kv_engine_path(None).unwrap(); + reuse_stuffs(&src, &dst, |name| -> bool { name != "LOCK" }, use_symlink)?; + + if !config.rocksdb.wal_dir.is_empty() { + reuse_stuffs( + &config.rocksdb.wal_dir, + &dst, + |_| -> bool { true }, + use_symlink, + )?; + if use_symlink { + replace_symlink_with_copy(&config.rocksdb.wal_dir, &dst, rocksdb_files_should_copy)?; + } + } else if use_symlink { + replace_symlink_with_copy(&src, &dst, rocksdb_files_should_copy)?; + } + + Ok(()) +} + +/// Symlink or copy Raft engine files from the original TiKV instance (specified +/// by `config`) to `agent_dir`. Then `agent_dir` can be used to run a new TiKV +/// instance, without any modifications on the original TiKV data. +fn dup_raft_engine_files( + config: &TikvConfig, + agent_dir: &str, + use_symlink: bool, +) -> Result<(), String> { + let mut dst_config = TikvConfig::default(); + dst_config.storage.data_dir = agent_dir.to_owned(); + + if config.raft_engine.enable { + let dst = dst_config.infer_raft_engine_path(None).unwrap(); + let mut raft_engine_cfg = config.raft_engine.config(); + raft_engine_cfg.dir = config.infer_raft_engine_path(None).unwrap(); + // NOTE: it's ok to used `DefaultFileSystem` whatever the original instance is + // encrypted or not because only `open` is used in `RaftEngine::fork`. Seems + // this behavior will never be changed, however we can custom a file system + // which panics in all other calls later. + let details = RaftEngine::fork(&raft_engine_cfg, Arc::new(DefaultFileSystem), dst)?; + for copied in &details.copied { + add_write_permission(copied)?; + } + } else { + let dst = dst_config.infer_raft_db_path(None).unwrap(); + create_dir(&dst)?; + let src = config.infer_raft_db_path(None).unwrap(); + reuse_stuffs(&src, &dst, |name| -> bool { name != "LOCK" }, use_symlink)?; + + if !config.raftdb.wal_dir.is_empty() { + reuse_stuffs( + &config.raftdb.wal_dir, + &dst, + |_| -> bool { true }, + use_symlink, + )?; + if use_symlink { + replace_symlink_with_copy(&config.raftdb.wal_dir, &dst, rocksdb_files_should_copy)?; + } + } else if use_symlink { + replace_symlink_with_copy(&src, &dst, rocksdb_files_should_copy)?; + } + } + + Ok(()) +} + +fn reuse_stuffs(src: P, dst: Q, selector: F, use_symlink: bool) -> Result<(), String> +where + P: AsRef, + Q: AsRef, + F: Fn(&str) -> bool, +{ + let src = src.as_ref(); + let dst = dst.as_ref(); + for entry in read_dir(src)? { + let entry = entry.map_err(|e| format!("dir_entry: {}", e))?; + let fname = entry.file_name().to_str().unwrap().to_owned(); + if selector(&fname) { + let src = entry.path().canonicalize().unwrap(); + let dst = PathBuf::from(dst).join(fname); + if use_symlink { + symlink(src, dst)?; + } else { + copy(src, dst)?; + } + } + } + Ok(()) +} + +fn replace_symlink_with_copy(src: &str, dst: &str, selector: F) -> Result<(), String> +where + F: Fn(&mut dyn Iterator) -> Vec, +{ + let mut names = Vec::new(); + for entry in read_dir(dst)? { + let entry = entry.map_err(|e| format!("dir_entry: {}", e))?; + let fname = entry.file_name().to_str().unwrap().to_owned(); + names.push(fname); + } + + let src = PathBuf::from(src); + let dst = PathBuf::from(dst); + for name in (selector)(&mut names.into_iter()) { + let src = src.join(&name); + let dst = dst.join(&name); + replace_file(src, &dst)?; + add_write_permission(dst)?; + } + + Ok(()) +} + +// `iter` emits all file names in a RocksDB instance. This function gets a list +// of files which should be copied instead of symlinked when building an agent +// directory. +// +// Q: so which files should be copied? +// A: the last one WAL file. +fn rocksdb_files_should_copy(iter: &mut dyn Iterator) -> Vec { + let ptn = Regex::new(ROCKSDB_WALS).unwrap(); + let mut names = Vec::new(); + for name in iter { + if let Some(caps) = ptn.captures(&name) { + let number = caps.get(1).unwrap().as_str(); + let number = number.parse::().unwrap(); + let name = caps.get(0).unwrap().as_str().to_owned(); + names.push((number, name)); + } + } + names.sort_by_key(|a| a.0); + names.pop().map_or_else(|| vec![], |a| vec![a.1]) +} + +fn read_dir>(path: P) -> Result { + let path = path.as_ref(); + std::fs::read_dir(path).map_err(|e| format!("read_dir({}): {}", path.display(), e)) +} + +fn symlink, Q: AsRef>(src: P, dst: Q) -> Result<(), String> { + let src = src.as_ref(); + let dst = dst.as_ref(); + std::os::unix::fs::symlink(src, dst) + .map_err(|e| format!("symlink({}, {}): {}", src.display(), dst.display(), e)) +} + +fn copy, Q: AsRef>(src: P, dst: Q) -> Result<(), String> { + let src = src.as_ref(); + let dst = dst.as_ref(); + std::fs::copy(src, dst) + .map(|_| ()) + .map_err(|e| format!("copy({}, {}): {}", src.display(), dst.display(), e)) +} + +fn replace_file(src: P, dst: Q) -> Result<(), String> +where + P: AsRef, + Q: AsRef, +{ + let src = src.as_ref(); + let dst = dst.as_ref(); + std::fs::remove_file(dst).map_err(|e| format!("remove_file({}): {}", dst.display(), e))?; + std::fs::copy(src, dst) + .map(|_| ()) + .map_err(|e| format!("copy({}, {}): {}", src.display(), dst.display(), e)) +} + +fn add_write_permission>(path: P) -> Result<(), String> { + let path = path.as_ref(); + let mut pmt = std::fs::metadata(path) + .map_err(|e| format!("metadata({}): {}", path.display(), e))? + .permissions(); + pmt.set_readonly(false); + std::fs::set_permissions(path, pmt) + .map_err(|e| format!("set_permissions({}): {}", path.display(), e)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_snap_exts() { + let re = Regex::new(SNAP_NAMES).unwrap(); + for (s, matched) in [ + ("123.meta", true), + ("123.sst", true), + ("123.sst.tmp", false), + ("123.sst.clone", false), + ] { + assert_eq!(re.is_match(s), matched); + } + } + + #[test] + fn test_rocksdb_files_should_copy() { + let mut names = [ + "00123.log", + "00123.log.backup", + "old.00123.log", + "00123.sst", + "001abc23.log", + "LOCK", + ] + .iter() + .map(|x| String::from(*x)); + let x = rocksdb_files_should_copy(&mut names); + assert_eq!(x.len(), 1); + assert_eq!(x[0], "00123.log"); + + let mut names = ["87654321.log", "00123.log"] + .iter() + .map(|x| String::from(*x)); + let x = rocksdb_files_should_copy(&mut names); + assert_eq!(x.len(), 1); + assert_eq!(x[0], "87654321.log"); + } +} diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index f547a2cee3a..5376049cf2b 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -7,6 +7,7 @@ extern crate log; mod cmd; mod executor; +mod fork_readonly_tikv; mod util; use std::{ @@ -14,7 +15,7 @@ use std::{ fs::{self, File, OpenOptions}, io::{self, BufRead, BufReader, Read}, path::Path, - str, + process, str, string::ToString, sync::Arc, thread, @@ -27,7 +28,7 @@ use encryption_export::{ DecrypterReader, Iv, }; use engine_rocks::get_env; -use engine_traits::EncryptionKeyManager; +use engine_traits::{EncryptionKeyManager, Peekable}; use file_system::calc_crc32; use futures::executor::block_on; use gag::BufferRedirect; @@ -36,16 +37,21 @@ use kvproto::{ debugpb::{Db as DbType, *}, encryptionpb::EncryptionMethod, kvrpcpb::SplitRegionRequest, - raft_serverpb::SnapshotMeta, + raft_serverpb::{SnapshotMeta, StoreIdent}, tikvpb::TikvClient, }; use pd_client::{Config as PdConfig, PdClient, RpcClient}; use protobuf::Message; +use raft_engine::RecoveryMode; use raft_log_engine::ManagedFileSystem; use regex::Regex; use security::{SecurityConfig, SecurityManager}; use structopt::{clap::ErrorKind, StructOpt}; -use tikv::{config::TikvConfig, server::debug::BottommostLevelCompaction}; +use tikv::{ + config::TikvConfig, + server::{debug::BottommostLevelCompaction, KvEngineFactoryBuilder}, + storage::config::EngineType, +}; use tikv_util::{escape, run_and_wait_child_process, sys::thread::StdThreadBuildWrapper, unescape}; use txn_types::Key; @@ -235,6 +241,111 @@ fn main() { let key = unescape(&key); split_region(&pd_client, mgr, region_id, key); } + Cmd::ShowClusterId { data_dir } => { + if opt.config.is_none() { + clap::Error { + message: String::from("(--config) must be specified"), + kind: ErrorKind::MissingRequiredArgument, + info: None, + } + .exit(); + } + if data_dir.is_empty() { + clap::Error { + message: String::from("(--data-dir) must be specified"), + kind: ErrorKind::MissingRequiredArgument, + info: None, + } + .exit(); + } + cfg.storage.data_dir = data_dir; + // Disable auto compactions and GCs to avoid modifications. + cfg.rocksdb.defaultcf.disable_auto_compactions = true; + cfg.rocksdb.writecf.disable_auto_compactions = true; + cfg.rocksdb.lockcf.disable_auto_compactions = true; + cfg.rocksdb.raftcf.disable_auto_compactions = true; + cfg.raftdb.defaultcf.disable_auto_compactions = true; + cfg.rocksdb.titan.disable_gc = true; + match read_cluster_id(&cfg) { + Ok(id) => { + println!("cluster-id: {}", id); + process::exit(0); + } + Err(e) => { + eprintln!("read cluster ID fail: {}", e); + process::exit(-1); + } + } + } + Cmd::ReuseReadonlyRemains { + data_dir, + agent_dir, + snaps, + rocksdb_files, + } => { + if opt.config.is_none() { + clap::Error { + message: String::from("(--config) must be specified"), + kind: ErrorKind::MissingRequiredArgument, + info: None, + } + .exit(); + } + if data_dir.is_empty() { + clap::Error { + message: String::from("(--data-dir) must be specified"), + kind: ErrorKind::MissingRequiredArgument, + info: None, + } + .exit(); + } + cfg.storage.data_dir = data_dir; + if cfg.storage.engine == EngineType::RaftKv2 { + clap::Error { + message: String::from("storage.engine can only be raftkv"), + kind: ErrorKind::InvalidValue, + info: None, + } + .exit(); + } + if cfg.raft_engine.config().enable_log_recycle { + clap::Error { + message: String::from("raft-engine.enable-log-recycle can only be false"), + kind: ErrorKind::InvalidValue, + info: None, + } + .exit(); + } + if cfg.raft_engine.config().recovery_mode != RecoveryMode::TolerateTailCorruption { + clap::Error { + message: String::from( + "raft-engine.recovery-mode can only be tolerate-tail-corruption", + ), + kind: ErrorKind::InvalidValue, + info: None, + } + .exit(); + } + if snaps != fork_readonly_tikv::SYMLINK && snaps != fork_readonly_tikv::COPY { + clap::Error { + message: String::from("(--snaps) can only be symlink or copy"), + kind: ErrorKind::InvalidValue, + info: None, + } + .exit(); + } + if rocksdb_files != fork_readonly_tikv::SYMLINK + && rocksdb_files != fork_readonly_tikv::COPY + { + clap::Error { + message: String::from("(--rocksdb_files) can only be symlink or copy"), + kind: ErrorKind::InvalidValue, + info: None, + } + .exit(); + } + fork_readonly_tikv::run(&cfg, &agent_dir, &snaps, &rocksdb_files) + } // Commands below requires either the data dir or the host. cmd => { let data_dir = opt.data_dir.as_deref(); @@ -935,3 +1046,22 @@ fn flush_std_buffer_to_log( out_buffer.read_to_string(&mut out).unwrap(); println!("{}, err redirect:{}, out redirect:{}", msg, err, out); } + +fn read_cluster_id(config: &TikvConfig) -> Result { + let env = config + .build_shared_rocks_env(None, None) + .map_err(|e| format!("build_shared_rocks_env fail: {}", e))?; + let cache = config + .storage + .block_cache + .build_shared_cache(config.storage.engine); + let kv_engine = KvEngineFactoryBuilder::new(env, config, cache) + .build() + .create_shared_db(&config.storage.data_dir) + .map_err(|e| format!("create_shared_db fail: {}", e))?; + let ident = kv_engine + .get_msg::(keys::STORE_IDENT_KEY) + .unwrap() + .unwrap(); + Ok(ident.cluster_id) +} diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 080ed278ba7..409dc84a62d 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -33,9 +33,16 @@ pprof-fp = ["tikv/pprof-fp"] [dependencies] clap = "2.32" +encryption_export = { workspace = true } +engine_traits = { workspace = true } +keys = { workspace = true } +kvproto = { workspace = true } +raft-engine = { git = "https://github.com/tikv/raft-engine.git" } +regex = "1" serde_json = { version = "1.0", features = ["preserve_order"] } server = { workspace = true } tikv = { workspace = true } +tikv_util = { workspace = true } toml = "0.5" [build-dependencies] diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 6b13fcdd064..087119c87f1 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -158,6 +158,14 @@ pub struct Config { #[online_config(skip)] pub snap_apply_batch_size: ReadableSize, + /// When applying a Region snapshot, its SST files can be modified by TiKV + /// itself. However those files could be read-only, for example, a TiKV + /// [agent](cmd/tikv-agent) is started based on a read-only remains. So + /// we can set `snap_apply_copy_symlink` to `true` to make a copy on + /// those SST files. + #[online_config(skip)] + pub snap_apply_copy_symlink: bool, + // used to periodically check whether schedule pending applies in region runner #[doc(hidden)] #[online_config(skip)] @@ -407,6 +415,7 @@ impl Default for Config { peer_stale_state_check_interval: ReadableDuration::minutes(5), leader_transfer_max_log_lag: 128, snap_apply_batch_size: ReadableSize::mb(10), + snap_apply_copy_symlink: false, region_worker_tick_interval: if cfg!(feature = "test") { ReadableDuration::millis(200) } else { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 62744501195..f35041da856 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -182,6 +182,7 @@ where pub abort: Arc, pub write_batch_size: usize, pub coprocessor_host: CoprocessorHost, + pub ingest_copy_symlink: bool, } // A helper function to copy snapshot. @@ -770,32 +771,26 @@ impl Snapshot { ) } - fn validate(&self, for_send: bool) -> RaftStoreResult<()> { + fn validate(&self, post_check: F) -> RaftStoreResult<()> + where + F: Fn(&CfFile, usize) -> RaftStoreResult<()>, + { for cf_file in &self.cf_files { let file_paths = cf_file.file_paths(); - let clone_file_paths = cf_file.clone_file_paths(); - for (i, file_path) in file_paths.iter().enumerate() { + for i in 0..file_paths.len() { if cf_file.size[i] == 0 { // Skip empty file. The checksum of this cf file should be 0 and // this is checked when loading the snapshot meta. continue; } - let file_path = Path::new(file_path); check_file_size_and_checksum( - file_path, + Path::new(&file_paths[i]), cf_file.size[i], cf_file.checksum[i], self.mgr.encryption_key_manager.as_ref(), )?; - - if !for_send && !plain_file_used(cf_file.cf) { - sst_importer::prepare_sst_for_ingestion( - file_path, - Path::new(&clone_file_paths[i]), - self.mgr.encryption_key_manager.as_deref(), - )?; - } + post_check(cf_file, i)?; } } Ok(()) @@ -851,7 +846,7 @@ impl Snapshot { { fail_point!("snapshot_enter_do_build"); if self.exists() { - match self.validate(true) { + match self.validate(|_, _| -> RaftStoreResult<()> { Ok(()) }) { Ok(()) => return Ok(()), Err(e) => { error!(?e; @@ -1103,7 +1098,28 @@ impl Snapshot { } pub fn apply(&mut self, options: ApplyOptions) -> Result<()> { - box_try!(self.validate(false)); + let post_check = |cf_file: &CfFile, offset: usize| { + if !plain_file_used(cf_file.cf) { + let file_paths = cf_file.file_paths(); + let clone_file_paths = cf_file.clone_file_paths(); + if options.ingest_copy_symlink && is_symlink(&file_paths[offset])? { + sst_importer::copy_sst_for_ingestion( + &file_paths[offset], + &clone_file_paths[offset], + self.mgr.encryption_key_manager.as_deref(), + )?; + } else { + sst_importer::prepare_sst_for_ingestion( + &file_paths[offset], + &clone_file_paths[offset], + self.mgr.encryption_key_manager.as_deref(), + )?; + } + } + Ok(()) + }; + + box_try!(self.validate(post_check)); let abort_checker = ApplyAbortChecker(options.abort); let coprocessor_host = options.coprocessor_host; @@ -2236,6 +2252,11 @@ impl TabletSnapManager { } } +fn is_symlink>(path: P) -> Result { + let metadata = box_try!(std::fs::symlink_metadata(path)); + Ok(metadata.is_symlink()) +} + #[cfg(test)] pub mod tests { use std::{ @@ -2642,6 +2663,7 @@ pub mod tests { abort: Arc::new(AtomicUsize::new(JOB_STATUS_RUNNING)), write_batch_size: TEST_WRITE_BATCH_SIZE, coprocessor_host: CoprocessorHost::::default(), + ingest_copy_symlink: false, }; // Verify the snapshot applying is ok. s4.apply(options).unwrap(); @@ -2886,6 +2908,7 @@ pub mod tests { abort: Arc::new(AtomicUsize::new(JOB_STATUS_RUNNING)), write_batch_size: TEST_WRITE_BATCH_SIZE, coprocessor_host: CoprocessorHost::::default(), + ingest_copy_symlink: false, }; s5.apply(options).unwrap_err(); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 0696e70b766..d4e6039b7ea 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -347,6 +347,7 @@ where { batch_size: usize, use_delete_range: bool, + ingest_copy_symlink: bool, clean_stale_tick: usize, clean_stale_check_interval: Duration, clean_stale_ranges_tick: usize, @@ -390,6 +391,7 @@ where Runner { batch_size: cfg.value().snap_apply_batch_size.0 as usize, use_delete_range: cfg.value().use_delete_range, + ingest_copy_symlink: cfg.value().snap_apply_copy_symlink, clean_stale_tick: 0, clean_stale_check_interval: Duration::from_millis( cfg.value().region_worker_tick_interval.as_millis(), @@ -475,6 +477,7 @@ where abort: Arc::clone(&abort), write_batch_size: self.batch_size, coprocessor_host: self.coprocessor_host.clone(), + ingest_copy_symlink: self.ingest_copy_symlink, }; s.apply(options)?; self.coprocessor_host diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index e073ff941ae..07d9de4cff2 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -29,5 +29,5 @@ pub use self::{ import_file::sst_meta_to_path, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, - util::prepare_sst_for_ingestion, + util::{copy_sst_for_ingestion, prepare_sst_for_ingestion}, }; diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 501061e92c0..deb72675cf4 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -65,6 +65,45 @@ pub fn prepare_sst_for_ingestion, Q: AsRef>( Ok(()) } +/// Just like prepare_sst_for_ingestion, but +/// * always use copy instead of hard link; +/// * add write permission on the copied file if necessary. +pub fn copy_sst_for_ingestion, Q: AsRef>( + path: P, + clone: Q, + encryption_key_manager: Option<&DataKeyManager>, +) -> Result<()> { + let path = path.as_ref().to_str().unwrap(); + let clone = clone.as_ref().to_str().unwrap(); + + if Path::new(clone).exists() { + file_system::remove_file(clone).map_err(|e| format!("remove {}: {:?}", clone, e))?; + } + // always try to remove the file from key manager because the clean up in + // rocksdb is not atomic, thus the file may be deleted but key in key + // manager is not. + if let Some(key_manager) = encryption_key_manager { + key_manager.delete_file(clone)?; + } + + file_system::copy_and_sync(path, clone) + .map_err(|e| format!("copy from {} to {}: {:?}", path, clone, e))?; + + let mut pmts = file_system::metadata(clone)?.permissions(); + if pmts.readonly() { + pmts.set_readonly(false); + file_system::set_permissions(clone, pmts)?; + } + + // sync clone dir + File::open(Path::new(clone).parent().unwrap())?.sync_all()?; + if let Some(key_manager) = encryption_key_manager { + key_manager.link_file(path, clone)?; + } + + Ok(()) +} + pub fn url_for(storage: &E) -> String { storage .url() @@ -88,7 +127,7 @@ mod tests { use tempfile::Builder; use test_util::encryption::new_test_key_manager; - use super::prepare_sst_for_ingestion; + use super::{copy_sst_for_ingestion, prepare_sst_for_ingestion}; #[cfg(unix)] fn check_hard_link>(path: P, nlink: u64) { @@ -226,4 +265,41 @@ mod tests { let manager = Arc::new(key_manager.unwrap().unwrap()); check_prepare_sst_for_ingestion(None, None, Some(&manager), true /* was_encrypted */); } + + #[test] + fn test_copy_sst_for_ingestion() { + let path = Builder::new() + .prefix("_util_rocksdb_test_copy_sst_for_ingestion") + .tempdir() + .unwrap(); + let path_str = path.path().to_str().unwrap(); + + let sst_dir = Builder::new() + .prefix("_util_rocksdb_test_copy_sst_for_ingestion_sst") + .tempdir() + .unwrap(); + let sst_path = sst_dir.path().join("abc.sst"); + let sst_clone = sst_dir.path().join("abc.sst.clone"); + + let kvs = [("k1", "v1"), ("k2", "v2"), ("k3", "v3")]; + + let db_opts = RocksDbOptions::default(); + let cf_opts = vec![(CF_DEFAULT, RocksCfOptions::default())]; + let db = new_engine_opt(path_str, db_opts, cf_opts).unwrap(); + + gen_sst_with_kvs(&db, CF_DEFAULT, sst_path.to_str().unwrap(), &kvs); + + copy_sst_for_ingestion(&sst_path, &sst_clone, None).unwrap(); + check_hard_link(&sst_path, 1); + check_hard_link(&sst_clone, 1); + + copy_sst_for_ingestion(&sst_path, &sst_clone, None).unwrap(); + check_hard_link(&sst_path, 1); + check_hard_link(&sst_clone, 1); + + db.ingest_external_file_cf(CF_DEFAULT, &[sst_clone.to_str().unwrap()]) + .unwrap(); + check_db_with_kvs(&db, CF_DEFAULT, &kvs); + assert!(!sst_clone.exists()); + } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 890fc76e206..669657e03e3 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -212,6 +212,7 @@ fn test_serde_custom_tikv_config() { peer_stale_state_check_interval: ReadableDuration::hours(2), leader_transfer_max_log_lag: 123, snap_apply_batch_size: ReadableSize::mb(12), + snap_apply_copy_symlink: true, region_worker_tick_interval: ReadableDuration::millis(1000), clean_stale_ranges_tick: 10, lock_cf_compact_interval: ReadableDuration::minutes(12), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index a1fa7169a57..c08d91ad888 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -184,6 +184,7 @@ abnormal-leader-missing-duration = "6h" peer-stale-state-check-interval = "2h" leader-transfer-max-log-lag = 123 snap-apply-batch-size = "12MB" +snap-apply-copy-symlink = true consistency-check-interval = "12s" report-region-flow-interval = "12m" raft-store-max-leader-lease = "12s" From 9aa1d7350db02900998a8dcb0f77dcc01341c169 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 1 Jun 2023 11:12:42 +0800 Subject: [PATCH 0717/1149] raftstore: block in-memory pessimistic locks during the flashback (#14859) ref tikv/tikv#13303, close pingcap/tidb#44292 During the Flashback process, we should prevent any read or write operations on the in-memory pessimistic lock table and clear it like rolling back other locks to ensure that Flashback can proceed smoothly. Signed-off-by: JmPotato --- .../src/operation/command/admin/flashback.rs | 23 ++++++- components/raftstore/src/store/fsm/peer.rs | 15 +++++ components/raftstore/src/store/txn_ext.rs | 1 + components/test_raftstore-v2/src/server.rs | 10 ++- components/test_raftstore/src/server.rs | 10 ++- src/storage/mvcc/reader/reader.rs | 18 ++++- .../txn/commands/flashback_to_version.rs | 1 + .../flashback_to_version_read_phase.rs | 1 + .../integrations/raftstore/test_flashback.rs | 66 ++++++++++++++++++- 9 files changed, 138 insertions(+), 7 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/flashback.rs b/components/raftstore-v2/src/operation/command/admin/flashback.rs index f45691a009a..b7acdcc5f0a 100644 --- a/components/raftstore-v2/src/operation/command/admin/flashback.rs +++ b/components/raftstore-v2/src/operation/command/admin/flashback.rs @@ -9,7 +9,10 @@ use kvproto::{ use protobuf::Message; use raftstore::{ coprocessor::RegionChangeReason, - store::metrics::{PEER_ADMIN_CMD_COUNTER, PEER_IN_FLASHBACK_STATE}, + store::{ + metrics::{PEER_ADMIN_CMD_COUNTER, PEER_IN_FLASHBACK_STATE}, + LocksStatus, + }, Result, }; @@ -85,7 +88,7 @@ impl Peer { pub fn on_apply_res_flashback( &mut self, store_ctx: &mut StoreContext, - mut res: FlashbackResult, + #[allow(unused_mut)] mut res: FlashbackResult, ) { (|| { fail_point!("keep_peer_fsm_flashback_state_false", |_| { @@ -114,6 +117,22 @@ impl Peer { .unwrap(); self.set_has_extra_write(); + let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); + pessimistic_locks.status = if res.region_state.get_region().is_in_flashback { + // To prevent the insertion of any new pessimistic locks, set the lock status + // to `LocksStatus::IsInFlashback` and clear all the existing locks. + pessimistic_locks.clear(); + LocksStatus::IsInFlashback + } else if self.is_leader() { + // If the region is not in flashback, the leader can continue to insert + // pessimistic locks. + LocksStatus::Normal + } else { + // If the region is not in flashback and the peer is not the leader, it + // cannot insert pessimistic locks. + LocksStatus::NotLeader + } + // Compares to v1, v2 does not expire remote lease, because only // local reader can serve read requests. } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 910a08c3a0b..8231e5b4f3e 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6489,6 +6489,21 @@ where })()); // Let the leader lease to None to ensure that local reads are not executed. self.fsm.peer.leader_lease_mut().expire_remote_lease(); + let mut pessimistic_locks = self.fsm.peer.txn_ext.pessimistic_locks.write(); + pessimistic_locks.status = if self.region().is_in_flashback { + // To prevent the insertion of any new pessimistic locks, set the lock status + // to `LocksStatus::IsInFlashback` and clear all the existing locks. + pessimistic_locks.clear(); + LocksStatus::IsInFlashback + } else if self.fsm.peer.is_leader() { + // If the region is not in flashback, the leader can continue to insert + // pessimistic locks. + LocksStatus::Normal + } else { + // If the region is not in flashback and the peer is not the leader, it + // cannot insert pessimistic locks. + LocksStatus::NotLeader + } } fn on_ready_batch_switch_witness(&mut self, sw: SwitchWitness) { diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 20963fc186f..4ace4ba8026 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -125,6 +125,7 @@ pub enum LocksStatus { TransferringLeader, MergingRegion, NotLeader, + IsInFlashback, } impl fmt::Debug for PeerPessimisticLocks { diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 30da5a4fc8f..a439056617f 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -885,6 +885,14 @@ impl Simulator for ServerCluster { impl Cluster, EK> { pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { + self.must_get_snapshot_of_region_with_ctx(region_id, SnapContext::default()) + } + + pub fn must_get_snapshot_of_region_with_ctx( + &mut self, + region_id: u64, + snap_ctx: SnapContext<'_>, + ) -> RegionSnapshot { let mut try_snapshot = || -> Option> { let leader = self.leader_of_region(region_id)?; let store_id = leader.store_id; @@ -897,7 +905,7 @@ impl Cluster, EK> { let mut storage = self.sim.rl().storages.get(&store_id).unwrap().clone(); let snap_ctx = SnapContext { pb_ctx: &ctx, - ..Default::default() + ..snap_ctx.clone() }; storage.snapshot(snap_ctx).ok() }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 4d2ac77b1bc..3244a0e15f7 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -788,6 +788,14 @@ impl Simulator for ServerCluster { impl Cluster { pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { + self.must_get_snapshot_of_region_with_ctx(region_id, Default::default()) + } + + pub fn must_get_snapshot_of_region_with_ctx( + &mut self, + region_id: u64, + snap_ctx: SnapContext<'_>, + ) -> RegionSnapshot { let mut try_snapshot = || -> Option> { let leader = self.leader_of_region(region_id)?; let store_id = leader.store_id; @@ -800,7 +808,7 @@ impl Cluster { let mut storage = self.sim.rl().storages.get(&store_id).unwrap().clone(); let snap_ctx = SnapContext { pb_ctx: &ctx, - ..Default::default() + ..snap_ctx.clone() }; storage.snapshot(snap_ctx).ok() }; diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 41da39bb28b..68d0d50f0b8 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -5,9 +5,10 @@ use std::ops::Bound; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::{ - errorpb::{self, EpochNotMatch, StaleCommand}, + errorpb::{self, EpochNotMatch, FlashbackInProgress, StaleCommand}, kvrpcpb::Context, }; +use raftstore::store::LocksStatus; use tikv_kv::{SnapshotExt, SEEK_BOUND}; use txn_types::{Key, Lock, OldValue, TimeStamp, Value, Write, WriteRef, WriteType}; @@ -146,6 +147,8 @@ pub struct MvccReader { term: u64, #[allow(dead_code)] version: u64, + + allow_in_flashback: bool, } impl MvccReader { @@ -164,6 +167,7 @@ impl MvccReader { fill_cache, term: 0, version: 0, + allow_in_flashback: false, } } @@ -182,6 +186,7 @@ impl MvccReader { fill_cache: !ctx.get_not_fill_cache(), term: ctx.get_term(), version: ctx.get_region_epoch().get_version(), + allow_in_flashback: false, } } @@ -267,6 +272,13 @@ impl MvccReader { err.set_epoch_not_match(EpochNotMatch::default()); return Some(Err(KvError::from(err).into())); } + // If the region is in the flashback state, it should not be allowed to read the + // locks. + if locks.status == LocksStatus::IsInFlashback && !self.allow_in_flashback { + let mut err = errorpb::Error::default(); + err.set_flashback_in_progress(FlashbackInProgress::default()); + return Some(Err(KvError::from(err).into())); + } locks.get(key).map(|(lock, _)| { // For write commands that are executed in serial, it should be impossible @@ -768,6 +780,10 @@ impl MvccReader { pub fn snapshot(&self) -> &S { &self.snapshot } + + pub fn set_allow_in_flashback(&mut self, set_allow_in_flashback: bool) { + self.allow_in_flashback = set_allow_in_flashback; + } } #[cfg(test)] diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 7873b736d1e..37d288fa266 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -95,6 +95,7 @@ impl WriteCommand for FlashbackToVersion { fn process_write(mut self, snapshot: S, context: WriteContext<'_, L>) -> Result { let mut reader = MvccReader::new_with_ctx(snapshot.clone(), Some(ScanMode::Forward), &self.ctx); + reader.set_allow_in_flashback(true); let mut txn = MvccTxn::new(TimeStamp::zero(), context.concurrency_manager); match self.state { FlashbackToVersionState::RollbackLock { diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 1812816966a..8af482069d9 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -153,6 +153,7 @@ impl ReadCommand for FlashbackToVersionReadPhase { fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result { let tag = self.tag().get_str(); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); + reader.set_allow_in_flashback(true); // Filter out the SST that does not have a newer version than `self.version` in // `CF_WRITE`, i.e, whose latest `commit_ts` <= `self.version` in the later // scan. By doing this, we can only flashback those keys that have version diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 0b703cf32dd..0e996d1726e 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -13,14 +13,76 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, Request}, raft_serverpb::RegionLocalState, }; -use raftstore::store::Callback; +use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; use test_raftstore_macro::test_case; -use txn_types::WriteBatchFlags; +use tikv::storage::kv::SnapContext; +use txn_types::{Key, PessimisticLock, WriteBatchFlags}; const TEST_KEY: &[u8] = b"k1"; const TEST_VALUE: &[u8] = b"v1"; +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_flashback_with_in_memory_pessimistic_locks() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_heartbeat_ticks = 20; + cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + + let region = cluster.get_region(TEST_KEY); + // Write a pessimistic lock to the in-memory pessimistic lock table. + { + let snapshot = cluster.must_get_snapshot_of_region(region.get_id()); + let txn_ext = snapshot.txn_ext.unwrap(); + let mut pessimistic_locks = txn_ext.pessimistic_locks.write(); + assert!(pessimistic_locks.is_writable()); + pessimistic_locks + .insert(vec![( + Key::from_raw(TEST_KEY), + PessimisticLock { + primary: TEST_KEY.to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 3000, + for_update_ts: 20.into(), + min_commit_ts: 30.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, + is_locked_with_conflict: false, + }, + )]) + .unwrap(); + assert_eq!(pessimistic_locks.len(), 1); + } + // Prepare flashback. + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::PrepareFlashback); + // Check the in-memory pessimistic lock table. + { + let snapshot = cluster.must_get_snapshot_of_region_with_ctx( + region.get_id(), + SnapContext { + allowed_in_flashback: true, + ..Default::default() + }, + ); + let txn_ext = snapshot.txn_ext.unwrap(); + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + assert!(!pessimistic_locks.is_writable()); + assert_eq!(pessimistic_locks.status, LocksStatus::IsInFlashback); + assert_eq!(pessimistic_locks.len(), 0); + } + // Finish flashback. + cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); + // Check the in-memory pessimistic lock table. + { + let snapshot = cluster.must_get_snapshot_of_region(region.get_id()); + let txn_ext = snapshot.txn_ext.unwrap(); + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + assert!(pessimistic_locks.is_writable()); + assert_eq!(pessimistic_locks.len(), 0); + } +} + #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] fn test_allow_read_only_request() { From 5ac85a0d327ea7caf86f1cbd6352c2fe56c56e27 Mon Sep 17 00:00:00 2001 From: Hu# Date: Mon, 5 Jun 2023 12:25:40 +0800 Subject: [PATCH 0718/1149] cmd: extend tikv-ctl to support the Flashback command trigger (#14768) close tikv/tikv#14748 - Support flashback the whole cluster. `tikv-ctl --pd 127.0.0.1:2379 flashback --version 430315739761082369` - Support flashback cluster within the given region ids or key ranges. `tikv-ctl --pd 127.0.0.1:2379 flashback region 2,4,6 --version 430315739761082369` `tikv-ctl --pd 127.0.0.1:2379 flashback --start 7480000000000000FF4E5F728000000000FF1443770000000000FA --end 7480000000000000FF4E5F728000000000FF21C4420000000000FA --version 430315739761082369` Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 3 +- cmd/tikv-ctl/Cargo.toml | 1 + cmd/tikv-ctl/src/cmd.rs | 27 +++ cmd/tikv-ctl/src/executor.rs | 95 +++++++++-- cmd/tikv-ctl/src/main.rs | 213 +++++++++++++++++++++++- cmd/tikv-ctl/src/util.rs | 1 + components/pd_client/src/client.rs | 48 ++++++ components/pd_client/src/lib.rs | 17 ++ components/pd_client/src/metrics.rs | 1 + components/server/src/server.rs | 43 +++-- components/test_raftstore/src/server.rs | 11 +- components/tikv_kv/src/lib.rs | 2 +- src/server/debug.rs | 186 +++++++++++++++++++-- src/server/debug2.rs | 13 ++ src/server/service/debug.rs | 33 +++- src/server/service/kv.rs | 98 +++++------ src/server/service/mod.rs | 5 +- 17 files changed, 691 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ccb01f6cf57..9bfb57c5da4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2932,7 +2932,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#5d6aacbe966eb499b5463f88bb003cf61bd35f76" +source = "git+https://github.com/pingcap/kvproto.git#b8e6dcdd1030c1705883c1e3d41970fce62f5e46" dependencies = [ "futures 0.3.15", "grpcio", @@ -6720,6 +6720,7 @@ dependencies = [ name = "tikv-ctl" version = "0.0.1" dependencies = [ + "api_version", "backup", "cc", "cdc", diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 3af251fa86b..93a8c3c04d3 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -45,6 +45,7 @@ test-engines-panic = [ nortcheck = ["engine_rocks/nortcheck"] [dependencies] +api_version = { workspace = true } backup = { workspace = true } cdc = { workspace = true } chrono = "0.4" diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index d18367597fe..17c1a6dd8eb 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -600,6 +600,33 @@ pub enum Cmd { #[structopt(long, default_value = "symlink")] rocksdb_files: String, }, + /// flashback data in cluster to a certain version + /// + /// NOTE: Should use `./pd-ctl config set halt-scheduling true` to halt PD + /// scheduling before flashback. + Flashback { + #[structopt(short = "v")] + /// the version to flashback + version: u64, + + #[structopt( + short = "r", + aliases = &["region"], + use_delimiter = true, + require_delimiter = true, + value_delimiter = "," + )] + /// specific regions to flashback + regions: Option>, + + #[structopt(long, default_value = "")] + /// hex start key + start: String, + + #[structopt(long, default_value = "")] + /// hex end key + end: String, + }, } #[derive(StructOpt)] diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 5e12972b58b..becb65069a7 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1,10 +1,11 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::ToOwned, cmp::Ordering, path::Path, pin::Pin, str, string::ToString, sync::Arc, + borrow::ToOwned, cmp::Ordering, path::Path, pin::Pin, result, str, string::ToString, sync::Arc, time::Duration, u64, }; +use api_version::{ApiV1, KvFormat}; use encryption_export::data_key_manager_from_config; use engine_rocks::util::{db_exist, new_engine_opt}; use engine_traits::{ @@ -15,7 +16,7 @@ use futures::{executor::block_on, future, stream, Stream, StreamExt, TryStreamEx use grpcio::{ChannelBuilder, Environment}; use kvproto::{ debugpb::{Db as DbType, *}, - kvrpcpb::MvccInfo, + kvrpcpb::{KeyRange, MvccInfo}, metapb::{Peer, Region}, raft_cmdpb::RaftCmdRequest, raft_serverpb::PeerState, @@ -36,7 +37,12 @@ use tikv::{ debug2::DebuggerImplV2, KvEngineFactoryBuilder, }, - storage::config::EngineType, + storage::{ + config::EngineType, + kv::MockEngine, + lock_manager::{LockManager, MockLockManager}, + Engine, + }, }; use tikv_util::escape; @@ -48,7 +54,7 @@ pub const METRICS_ROCKSDB_RAFT: &str = "rocksdb_raft"; pub const METRICS_JEMALLOC: &str = "jemalloc"; pub const LOCK_FILE_ERROR: &str = "IO error: While lock file"; -type MvccInfoStream = Pin, MvccInfo), String>>>>; +type MvccInfoStream = Pin, MvccInfo), String>>>>; pub fn new_debug_executor( cfg: &TikvConfig, @@ -99,7 +105,9 @@ pub fn new_debug_executor( Err(e) => handle_engine_error(e), }; - let debugger = DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller); + let debugger: DebuggerImpl<_, MockEngine, MockLockManager, ApiV1> = + DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller, None); + Box::new(debugger) as Box } else { let mut config = cfg.raft_engine.config(); @@ -116,7 +124,8 @@ pub fn new_debug_executor( Err(e) => handle_engine_error(e), }; - let debugger = DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller); + let debugger: DebuggerImpl<_, MockEngine, MockLockManager, ApiV1> = + DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller, None); Box::new(debugger) as Box } EngineType::RaftKv2 => { @@ -678,6 +687,15 @@ pub trait DebugExecutor { fn dump_cluster_info(&self); fn reset_to_version(&self, version: u64); + + fn flashback_to_version( + &self, + _version: u64, + _region_id: u64, + _key_range: KeyRange, + _start_ts: u64, + _commit_ts: u64, + ) -> Result<(), KeyRange>; } impl DebugExecutor for DebugClient { @@ -687,7 +705,7 @@ impl DebugExecutor for DebugClient { } fn get_all_regions_in_store(&self) -> Vec { - DebugClient::get_all_regions_in_store(self, &GetAllRegionsInStoreRequest::default()) + self.get_all_regions_in_store(&GetAllRegionsInStoreRequest::default()) .unwrap_or_else(|e| perror_and_exit("DebugClient::get_all_regions_in_store", e)) .take_regions() } @@ -897,12 +915,45 @@ impl DebugExecutor for DebugClient { fn reset_to_version(&self, version: u64) { let mut req = ResetToVersionRequest::default(); req.set_ts(version); - DebugClient::reset_to_version(self, &req) + self.reset_to_version(&req) .unwrap_or_else(|e| perror_and_exit("DebugClient::get_cluster_info", e)); } + + fn flashback_to_version( + &self, + version: u64, + region_id: u64, + key_range: KeyRange, + start_ts: u64, + commit_ts: u64, + ) -> Result<(), KeyRange> { + let mut req = FlashbackToVersionRequest::default(); + req.set_version(version); + req.set_region_id(region_id); + req.set_start_key(key_range.get_start_key().to_owned()); + req.set_end_key(key_range.get_end_key().to_owned()); + req.set_start_ts(start_ts); + req.set_commit_ts(commit_ts); + match self.flashback_to_version(&req) { + Ok(_) => Ok(()), + Err(err) => { + println!( + "flashback key_range {:?} with start_ts {:?}, commit_ts {:?} need to retry, err is {:?}", + key_range, start_ts, commit_ts, err + ); + Err(key_range) + } + } + } } -impl DebugExecutor for DebuggerImpl { +impl DebugExecutor for DebuggerImpl +where + ER: RaftEngine, + E: Engine, + L: LockManager, + K: KvFormat, +{ fn check_local_mode(&self) {} fn get_all_regions_in_store(&self) -> Vec { @@ -1085,7 +1136,7 @@ impl DebugExecutor for DebuggerImpl { } fn dump_metrics(&self, _tags: Vec<&str>) { - println!("only available for online mode"); + println!("only available for remote mode"); tikv_util::logger::exit_process_gracefully(-1); } @@ -1135,6 +1186,17 @@ impl DebugExecutor for DebuggerImpl { fn reset_to_version(&self, version: u64) { Debugger::reset_to_version(self, version); } + + fn flashback_to_version( + &self, + _version: u64, + _region_id: u64, + _key_range: KeyRange, + _start_ts: u64, + _commit_ts: u64, + ) -> Result<(), KeyRange> { + unimplemented!("only available for remote mode"); + } } fn handle_engine_error(err: EngineError) -> ! { @@ -1262,7 +1324,7 @@ impl DebugExecutor for DebuggerImplV2 { } fn dump_metrics(&self, _tags: Vec<&str>) { - println!("only available for online mode"); + println!("only available for remote mode"); tikv_util::logger::exit_process_gracefully(-1); } @@ -1312,4 +1374,15 @@ impl DebugExecutor for DebuggerImplV2 { fn reset_to_version(&self, _version: u64) { unimplemented!() } + + fn flashback_to_version( + &self, + _region_id: u64, + _version: u64, + _key_range: KeyRange, + _start_ts: u64, + _commit_ts: u64, + ) -> Result<(), KeyRange> { + unimplemented!("only available for remote mode"); + } } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 5376049cf2b..613d6168f7f 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -23,6 +23,7 @@ use std::{ u64, }; +use collections::HashMap; use encryption_export::{ create_backend, data_key_manager_from_config, from_engine_encryption_method, DataKeyManager, DecrypterReader, Iv, @@ -30,7 +31,7 @@ use encryption_export::{ use engine_rocks::get_env; use engine_traits::{EncryptionKeyManager, Peekable}; use file_system::calc_crc32; -use futures::executor::block_on; +use futures::{executor::block_on, future::try_join_all}; use gag::BufferRedirect; use grpcio::{CallOption, ChannelBuilder, Environment}; use kvproto::{ @@ -44,6 +45,7 @@ use pd_client::{Config as PdConfig, PdClient, RpcClient}; use protobuf::Message; use raft_engine::RecoveryMode; use raft_log_engine::ManagedFileSystem; +use raftstore::store::util::build_key_range; use regex::Regex; use security::{SecurityConfig, SecurityManager}; use structopt::{clap::ErrorKind, StructOpt}; @@ -346,6 +348,25 @@ fn main() { } fork_readonly_tikv::run(&cfg, &agent_dir, &snaps, &rocksdb_files) } + Cmd::Flashback { + version, + regions, + start, + end, + } => { + let start_key = from_hex(&start).unwrap(); + let end_key = from_hex(&end).unwrap(); + let pd_client = get_pd_rpc_client(opt.pd, Arc::clone(&mgr)); + flashback_whole_cluster( + &pd_client, + &cfg, + Arc::clone(&mgr), + regions.unwrap_or_default(), + version, + start_key, + end_key, + ); + } // Commands below requires either the data dir or the host. cmd => { let data_dir = opt.data_dir.as_deref(); @@ -776,9 +797,193 @@ fn compact_whole_cluster( handles.push(h); } - for h in handles { - h.join().unwrap(); - } + handles.into_iter().for_each(|h| h.join().unwrap()); +} + +const FLASHBACK_TIMEOUT: u64 = 1800; // 1800s +const WAIT_APPLY_FLASHBACK_STATE: u64 = 100; // 100ms + +fn flashback_whole_cluster( + pd_client: &RpcClient, + cfg: &TikvConfig, + mgr: Arc, + region_ids: Vec, + version: u64, + start_key: Vec, + end_key: Vec, +) { + let pd_client = pd_client.clone(); + let cfg = cfg.clone(); + let runtime = tokio::runtime::Builder::new_multi_thread() + .thread_name("flashback") + .enable_time() + .build() + .unwrap(); + + block_on(runtime.spawn(async move { + // Prepare all regions for flashback. + let start_ts = pd_client.get_tso().await.unwrap(); + let mut stores_leader = load_leaders_to_each_store(&pd_client, start_key, end_key); + // Need to retry if all regions are not finish prepare. + loop { + let mut futures = Vec::default(); + stores_leader + .clone() + .into_iter() + .for_each(|(store_id, leaders)| { + let addr = pd_client.get_store(store_id).unwrap().address; + leaders + .into_iter() + .filter(|(_, region_id)| { + region_ids.is_empty() || region_ids.contains(region_id) + }) + .for_each(|(key_range, region_id)| { + // Prepare flashback region version by key range. + let key_range = build_key_range(&key_range.0, &key_range.1, false); + + let addr = addr.clone(); + let cfg_inner = cfg.clone(); + let mgr = Arc::clone(&mgr); + let f = async move { + let debug_executor = + new_debug_executor(&cfg_inner, None, Some(&addr), mgr); + debug_executor.flashback_to_version( + version, + region_id, + key_range, + start_ts.into_inner(), + 0, + ) + }; + futures.push(f); + }) + }); + + // Wait for finishing prepare flashback. + match tokio::time::timeout( + Duration::from_secs(FLASHBACK_TIMEOUT), + try_join_all(futures), + ) + .await + { + Ok(res) => { + if let Err(key_range) = res { + // Retry specific key range to prepare flashback. + let retry_stores_leader = load_leaders_to_each_store( + &pd_client, + key_range.get_start_key().to_vec(), + key_range.get_end_key().to_vec(), + ); + // Need to update `stores_leader` to replace stale key range. + for (store_id, leaders) in retry_stores_leader { + let regions = stores_leader + .entry(store_id) + .or_insert_with(HashMap::default); + regions.extend(leaders); + } + thread::sleep(Duration::from_micros(WAIT_APPLY_FLASHBACK_STATE)); + continue; + } + break; + } + Err(e) => { + println!( + "prepare flashback with start_ts {:?} timeout. err: {:?}", + start_ts, e + ); + return; + } + } + } + + // Start flashback for all regions. + let commit_ts = pd_client.get_tso().await.unwrap(); + loop { + let mut futures = Vec::default(); + stores_leader + .clone() + .into_iter() + .for_each(|(store_id, leaders)| { + let addr = pd_client.get_store(store_id).unwrap().address; + leaders + .into_iter() + .filter(|(_, region_id)| { + region_ids.is_empty() || region_ids.contains(region_id) + }) + .for_each(|(key_range, region_id)| { + // Flashback region version by key range. + let key_range = build_key_range(&key_range.0, &key_range.1, false); + + let addr = addr.clone(); + let cfg_inner = cfg.clone(); + let mgr = Arc::clone(&mgr); + let f = async move { + let debug_executor = + new_debug_executor(&cfg_inner, None, Some(&addr), mgr); + debug_executor.flashback_to_version( + version, + region_id, + key_range, + start_ts.into_inner(), + commit_ts.into_inner(), + ) + }; + futures.push(f); + }) + }); + + // Wait for finishing flashback to version. + match tokio::time::timeout( + Duration::from_secs(FLASHBACK_TIMEOUT), + try_join_all(futures), + ) + .await + { + Ok(res) => match res { + Ok(_) => break, + Err(_) => { + thread::sleep(Duration::from_micros(WAIT_APPLY_FLASHBACK_STATE)); + continue; + } + }, + Err(e) => { + println!( + "finish flashback with start_ts {:?}, commit_ts: {:?} timeout. err: {:?}", + e, start_ts, commit_ts + ); + return; + } + } + } + })) + .unwrap(); + + println!("flashback all stores success!"); +} + +fn load_leaders_to_each_store( + pd_client: &RpcClient, + start_key: Vec, + end_key: Vec, +) -> HashMap, Vec), u64>> { + // Get all regions in the cluster. + let res = pd_client.batch_load_regions(start_key, end_key); + // Put all regions in right stores. + let mut store_regions = HashMap::default(); + res.into_iter().for_each(|batch| { + batch.into_iter().for_each(|r| { + let store_id = r.get_leader().get_store_id(); + let regions = store_regions + .entry(store_id) + .or_insert_with(HashMap::default); + let mut cur_region = r.get_region().clone(); + regions.insert( + (cur_region.take_start_key(), cur_region.take_end_key()), + cur_region.get_id(), + ); + }); + }); + store_regions } fn read_fail_file(path: &str) -> Vec<(String, String)> { diff --git a/cmd/tikv-ctl/src/util.rs b/cmd/tikv-ctl/src/util.rs index 0e67c905e8d..6d17ba67652 100644 --- a/cmd/tikv-ctl/src/util.rs +++ b/cmd/tikv-ctl/src/util.rs @@ -82,6 +82,7 @@ pub fn check_intersect_of_range(key_range: &KeyRange, key_range_limit: &KeyRange #[cfg(test)] mod tests { + use kvproto::kvrpcpb::KeyRange; use raftstore::store::util::build_key_range; use super::*; diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 35fc00fb631..ed42547b998 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -46,6 +46,7 @@ use super::{ pub const CQ_COUNT: usize = 1; pub const CLIENT_PREFIX: &str = "pd"; +const DEFAULT_REGION_PER_BATCH: i32 = 128; #[derive(Clone)] pub struct RpcClient { @@ -413,6 +414,53 @@ impl PdClient for RpcClient { }) } + fn scan_regions( + &self, + start_key: &[u8], + end_key: &[u8], + limit: i32, + ) -> Result> { + let _timer = PD_REQUEST_HISTOGRAM_VEC.scan_regions.start_coarse_timer(); + + let mut req = pdpb::ScanRegionsRequest::default(); + req.set_header(self.header()); + req.set_start_key(start_key.to_vec()); + req.set_end_key(end_key.to_vec()); + req.set_limit(limit); + + let mut resp = sync_request(&self.pd_client, LEADER_CHANGE_RETRY, |client, option| { + client.scan_regions_opt(&req, option) + })?; + check_resp_header(resp.get_header())?; + Ok(resp.take_regions().into()) + } + + fn batch_load_regions( + &self, + mut start_key: Vec, + end_key: Vec, + ) -> Vec> { + let mut res = Vec::new(); + + loop { + let regions = self + .scan_regions(&start_key, &end_key, DEFAULT_REGION_PER_BATCH) + .unwrap(); + if regions.is_empty() { + break; + } + res.push(regions.clone()); + + let end_region = regions.last().unwrap().get_region(); + if end_region.get_end_key().is_empty() { + break; + } + start_key = end_region.get_end_key().to_vec(); + } + + res + } + fn get_cluster_id(&self) -> Result { Ok(self.cluster_id) } diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index ba287621272..e2d2ef750b8 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -457,6 +457,23 @@ pub trait PdClient: Send + Sync { unimplemented!(); } + fn scan_regions( + &self, + _start_key: &[u8], + _end_key: &[u8], + _limit: i32, + ) -> Result> { + unimplemented!(); + } + + fn batch_load_regions( + &self, + mut _start_key: Vec, + mut _end_key: Vec, + ) -> Vec> { + unimplemented!(); + } + /// Gets store state if it is not a tombstone store asynchronously. fn get_store_stats_async(&self, _store_id: u64) -> BoxFuture<'_, Result> { unimplemented!(); diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index e1f1100444a..77f2e990e0f 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -32,6 +32,7 @@ make_static_metric! { is_recovering_marked, store_heartbeat, tso, + scan_regions, meta_storage_put, meta_storage_get, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 228358d52a1..db46b45b1ce 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -129,7 +129,7 @@ use crate::{ #[inline] fn run_impl(config: TikvConfig) { - let mut tikv = TikvServer::::init::(config); + let mut tikv = TikvServer::::init(config); // Must be called after `TikvServer::init`. let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; @@ -144,7 +144,7 @@ fn run_impl(config: TikvConfig) { let listener = tikv.core.init_flow_receiver(); let (engines, engines_info) = tikv.init_raw_engines(listener); tikv.init_engines(engines.clone()); - let server_config = tikv.init_servers::(); + let server_config = tikv.init_servers(); tikv.register_services(); tikv.init_metrics_flusher(fetcher, engines_info); tikv.init_storage_stats_task(engines); @@ -195,7 +195,7 @@ const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); /// A complete TiKV server. -struct TikvServer { +struct TikvServer { core: TikvServerCore, cfg_controller: Option, security_mgr: Arc, @@ -207,7 +207,7 @@ struct TikvServer { engines: Option>, kv_statistics: Option>, raft_statistics: Option>, - servers: Option>, + servers: Option>, region_info_accessor: RegionInfoAccessor, coprocessor_host: Option>, concurrency_manager: ConcurrencyManager, @@ -227,7 +227,7 @@ struct TikvEngines { engine: RaftKv>, } -struct Servers { +struct Servers { lock_mgr: LockManager, server: LocalServer, node: Node, @@ -236,16 +236,18 @@ struct Servers { cdc_memory_quota: MemoryQuota, rsmeter_pubsub_service: resource_metering::PubSubService, backup_stream_scheduler: Option>, + debugger: DebuggerImpl>, LockManager, F>, } type LocalServer = Server>; type LocalRaftKv = RaftKv>; -impl TikvServer +impl TikvServer where ER: RaftEngine, + F: KvFormat, { - fn init(mut config: TikvConfig) -> TikvServer { + fn init(mut config: TikvConfig) -> TikvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -444,7 +446,7 @@ where gc_worker } - fn init_servers(&mut self) -> Arc> { + fn init_servers(&mut self) -> Arc> { let flow_controller = Arc::new(FlowController::Singleton(EngineFlowController::new( &self.core.config.storage.flow_control, self.engines.as_ref().unwrap().engine.kv_engine().unwrap(), @@ -532,6 +534,7 @@ where let debug_thread_pool = Arc::new( Builder::new_multi_thread() .thread_name(thd_name!("debugger")) + .enable_time() .worker_threads(1) .after_start_wrapper(move || { tikv_alloc::add_thread_memory_accessor(); @@ -746,7 +749,7 @@ where node.id(), &server_config, &self.security_mgr, - storage, + storage.clone(), coprocessor::Endpoint::new( &server_config.value(), cop_read_pool_handle, @@ -991,6 +994,15 @@ where )), ); + // Create Debugger. + let mut debugger = DebuggerImpl::new( + engines.engines.clone(), + self.cfg_controller.as_ref().unwrap().clone(), + Some(storage), + ); + debugger.set_kv_statistics(self.kv_statistics.clone()); + debugger.set_raft_statistics(self.raft_statistics.clone()); + self.servers = Some(Servers { lock_mgr, server, @@ -1000,6 +1012,7 @@ where cdc_memory_quota, rsmeter_pubsub_service, backup_stream_scheduler, + debugger, }); server_config @@ -1032,19 +1045,13 @@ where .unwrap() .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); - let mut debugger = DebuggerImpl::new( - engines.engines.clone(), - self.cfg_controller.as_ref().unwrap().clone(), - ); - debugger.set_kv_statistics(self.kv_statistics.clone()); - debugger.set_raft_statistics(self.raft_statistics.clone()); - // Debug service. let debug_service = DebugService::new( - debugger, + servers.debugger.clone(), servers.server.get_debug_thread_pool().clone(), engines.engine.raft_extension(), ); + info!("start register debug service"); if servers .server .register_service(create_debug(debug_service)) @@ -1416,7 +1423,7 @@ where } } -impl TikvServer { +impl TikvServer { fn init_raw_engines( &mut self, flow_listener: engine_rocks::FlowListener, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 3244a0e15f7..261f3862e31 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -8,7 +8,7 @@ use std::{ usize, }; -use api_version::{dispatch_api_version, KvFormat}; +use api_version::{dispatch_api_version, ApiV1, KvFormat}; use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; @@ -66,7 +66,8 @@ use tikv::{ }, storage::{ self, - kv::{FakeExtension, LocalTablets, SnapContext}, + kv::{FakeExtension, LocalTablets, MockEngine, SnapContext}, + lock_manager::MockLockManager, txn::flow_controller::{EngineFlowController, FlowController}, Engine, Storage, }, @@ -489,7 +490,11 @@ impl ServerCluster { .unwrap(), ); - let debugger = DebuggerImpl::new(engines.clone(), ConfigController::new(cfg.tikv.clone())); + let debugger: DebuggerImpl<_, MockEngine, MockLockManager, ApiV1> = DebuggerImpl::new( + engines.clone(), + ConfigController::new(cfg.tikv.clone()), + None, + ); let debug_thread_handle = debug_thread_pool.handle().clone(); let debug_service = DebugService::new(debugger, debug_thread_handle, extension); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 48b667adf7a..d7954268aa7 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -58,7 +58,7 @@ use txn_types::{Key, PessimisticLock, TimeStamp, TxnExtra, Value}; pub use self::{ btree_engine::{BTreeEngine, BTreeEngineIterator, BTreeEngineSnapshot}, cursor::{Cursor, CursorBuilder}, - mock_engine::{ExpectedWrite, MockEngineBuilder}, + mock_engine::{ExpectedWrite, MockEngine, MockEngineBuilder}, raft_extension::{FakeExtension, RaftExtension}, rocksdb_engine::{RocksEngine, RocksSnapshot}, stats::{ diff --git a/src/server/debug.rs b/src/server/debug.rs index 6f1d99e612d..a61f6d7b56a 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -9,6 +9,7 @@ use std::{ thread::{Builder as ThreadBuilder, JoinHandle}, }; +use api_version::KvFormat; use collections::HashSet; use engine_rocks::{ raw::{CompactOptions, DBBottommostLevelCompaction}, @@ -20,9 +21,10 @@ use engine_traits::{ Mutable, MvccProperties, Peekable, RaftEngine, RaftLogBatch, Range, RangePropertiesExt, SyncMutable, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; +use futures::future::Future; use kvproto::{ debugpb::{self, Db as DbType}, - kvrpcpb::MvccInfo, + kvrpcpb::{self, Context, MvccInfo}, metapb::{PeerRole, Region}, raft_serverpb::*, }; @@ -33,17 +35,23 @@ use raftstore::{ store::{write_initial_apply_state, write_initial_raft_state, write_peer_state, PeerStorage}, }; use thiserror::Error; +use tikv_kv::Engine; use tikv_util::{ config::ReadableSize, keybuilder::KeyBuilder, store::find_peer, sys::thread::StdThreadBuildWrapper, worker::Worker, }; use txn_types::Key; +use super::service::{future_flashback_to_version, future_prepare_flashback_to_version}; pub use crate::storage::mvcc::MvccInfoIterator; use crate::{ config::ConfigController, server::reset_to_version::ResetToVersionManager, - storage::mvcc::{Lock, LockType, TimeStamp, Write, WriteRef, WriteType}, + storage::{ + lock_manager::LockManager, + mvcc::{Lock, LockType, TimeStamp, Write, WriteRef, WriteType}, + Storage, + }, }; pub type Result = result::Result; @@ -61,6 +69,9 @@ pub enum Error { #[error("Engine error {0}")] EngineTrait(#[from] EngineTraitError), + + #[error("Flashback Failed {0:?}")] + FlashbackFailed(String), } /// Describes the meta information of a Region. @@ -172,6 +183,16 @@ pub trait Debugger { fn reset_to_version(&self, version: u64); + fn key_range_flashback_to_version( + &self, + version: u64, + region_id: u64, + start_key: &[u8], + end_key: &[u8], + start_ts: u64, + commit_ts: u64, + ) -> impl Future> + Send; + fn set_kv_statistics(&mut self, s: Option>); fn set_raft_statistics(&mut self, s: Option>); @@ -180,15 +201,28 @@ pub trait Debugger { } #[derive(Clone)] -pub struct DebuggerImpl { +pub struct DebuggerImpl +where + ER: RaftEngine, + E: Engine, + L: LockManager, + K: KvFormat, +{ engines: Engines, kv_statistics: Option>, raft_statistics: Option>, reset_to_version_manager: ResetToVersionManager, cfg_controller: ConfigController, + storage: Option>, } -impl InnerRocksEngineExtractor for DebuggerImpl { +impl InnerRocksEngineExtractor for DebuggerImpl +where + ER: RaftEngine, + E: Engine, + L: LockManager, + K: KvFormat, +{ default fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { match db { DbType::Kv => Ok(&self.engines.kv), @@ -198,7 +232,12 @@ impl InnerRocksEngineExtractor for DebuggerImpl { } } -impl InnerRocksEngineExtractor for DebuggerImpl { +impl InnerRocksEngineExtractor for DebuggerImpl +where + E: Engine, + L: LockManager, + K: KvFormat, +{ fn get_db_from_type(&self, db: DbType) -> Result<&RocksEngine> { match db { DbType::Kv => Ok(&self.engines.kv), @@ -208,11 +247,18 @@ impl InnerRocksEngineExtractor for DebuggerImpl { } } -impl DebuggerImpl { +impl DebuggerImpl +where + ER: RaftEngine, + E: Engine, + L: LockManager, + K: KvFormat, +{ pub fn new( engines: Engines, cfg_controller: ConfigController, - ) -> DebuggerImpl { + storage: Option>, + ) -> DebuggerImpl { let reset_to_version_manager = ResetToVersionManager::new(engines.kv.clone()); DebuggerImpl { engines, @@ -220,6 +266,7 @@ impl DebuggerImpl { raft_statistics: None, reset_to_version_manager, cfg_controller, + storage, } } @@ -751,7 +798,13 @@ impl DebuggerImpl { } } -impl Debugger for DebuggerImpl { +impl Debugger for DebuggerImpl +where + ER: RaftEngine, + E: Engine, + L: LockManager, + K: KvFormat, +{ fn get(&self, db: DbType, cf: &str, key: &[u8]) -> Result> { validate_db_and_cf(db, cf)?; let db = self.get_db_from_type(db)?; @@ -963,6 +1016,35 @@ impl Debugger for DebuggerImpl { self.reset_to_version_manager.start(version.into()); } + fn key_range_flashback_to_version( + &self, + version: u64, + region_id: u64, + start_key: &[u8], + end_key: &[u8], + start_ts: u64, + commit_ts: u64, + ) -> impl Future> + Send { + let store_id = self.get_store_ident().unwrap().get_store_id(); + let r = self.region_info(region_id).unwrap(); + let region = r + .region_local_state + .as_ref() + .map(|s| s.get_region().clone()) + .unwrap(); + + async_key_range_flashback_to_version( + self.storage.as_ref().unwrap().clone(), + region, + version, + store_id, + start_key.to_vec(), + end_key.to_vec(), + start_ts, + commit_ts, + ) + } + fn set_kv_statistics(&mut self, s: Option>) { self.kv_statistics = s; } @@ -987,6 +1069,76 @@ impl Debugger for DebuggerImpl { } } +async fn async_key_range_flashback_to_version( + storage: Storage, + region: Region, + version: u64, + store_id: u64, + start_key: Vec, + end_key: Vec, + start_ts: u64, + commit_ts: u64, +) -> Result<()> { + let is_in_flashback = region.get_is_in_flashback(); + let in_prepare_state = TimeStamp::from(commit_ts).is_zero(); + if in_prepare_state && is_in_flashback { + return Ok(()); + } else if !in_prepare_state && !is_in_flashback { + return Err(Error::FlashbackFailed("not in flashback state".into())); + } + + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().to_owned()); + let peer = find_peer(®ion, store_id).unwrap(); + ctx.set_peer(peer.clone()); + + // Flashback will encode the key, so we need to use raw key. + let start_key = Key::from_encoded_slice(&start_key) + .to_raw() + .unwrap_or_default(); + let end_key = Key::from_encoded_slice(&end_key) + .to_raw() + .unwrap_or_default(); + + // Means now is prepare flashback. + if in_prepare_state { + let mut req = kvrpcpb::PrepareFlashbackToVersionRequest::new(); + req.set_version(version); + req.set_start_key(start_key.clone()); + req.set_end_key(end_key.clone()); + req.set_context(ctx.clone()); + req.set_start_ts(start_ts); + + let resp = future_prepare_flashback_to_version(storage, req) + .await + .unwrap(); + if !resp.get_error().is_empty() || resp.has_region_error() { + error!("exec prepare flashback failed"; "err" => ?resp.get_error(), "region_err" => ?resp.get_region_error()); + return Err(Error::FlashbackFailed( + "exec prepare flashback failed.".into(), + )); + } + } else { + let mut req = kvrpcpb::FlashbackToVersionRequest::new(); + req.set_version(version); + req.set_start_key(start_key.clone()); + req.set_end_key(end_key.clone()); + req.set_context(ctx.clone()); + req.set_start_ts(start_ts); + req.set_commit_ts(commit_ts); + + let resp = future_flashback_to_version(storage, req).await.unwrap(); + if !resp.get_error().is_empty() || resp.has_region_error() { + error!("exec finish flashback failed"; "err" => ?resp.get_error(), "region_err" => ?resp.get_region_error()); + return Err(Error::FlashbackFailed( + "exec finish flashback failed.".into(), + )); + } + } + Ok(()) +} + pub fn dump_default_cf_properties( db: &RocksEngine, start: &[u8], @@ -1472,6 +1624,7 @@ fn divide_db(db: &RocksEngine, parts: usize) -> raftstore::Result>> #[cfg(test)] mod tests { + use api_version::ApiV1; use engine_rocks::{util::new_engine_opt, RocksCfOptions, RocksDbOptions, RocksEngine}; use engine_traits::{Mutable, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; use kvproto::{ @@ -1480,9 +1633,13 @@ mod tests { }; use raft::eraftpb::EntryType; use tempfile::Builder; + use tikv_kv::MockEngine; use super::*; - use crate::storage::mvcc::{Lock, LockType}; + use crate::storage::{ + lock_manager::MockLockManager, + mvcc::{Lock, LockType}, + }; fn init_region_state( engine: &RocksEngine, @@ -1607,16 +1764,21 @@ mod tests { } } - fn new_debugger() -> DebuggerImpl { + fn new_debugger() -> DebuggerImpl { let tmp = Builder::new().prefix("test_debug").tempdir().unwrap(); let path = tmp.path().to_str().unwrap(); let engine = engine_rocks::util::new_engine(path, ALL_CFS).unwrap(); let engines = Engines::new(engine.clone(), engine); - DebuggerImpl::new(engines, ConfigController::default()) + DebuggerImpl::new(engines, ConfigController::default(), None) } - impl DebuggerImpl { + impl DebuggerImpl + where + E: Engine, + L: LockManager, + K: KvFormat, + { fn set_store_id(&self, store_id: u64) { let mut ident = self.get_store_ident().unwrap_or_default(); ident.set_store_id(store_id); diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 776880f6895..368f4eb1696 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -9,6 +9,7 @@ use engine_traits::{ CachedTablet, Iterable, MiscExt, Peekable, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, }; +use futures::future::Future; use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; use kvproto::{ debugpb::Db as DbType, @@ -597,6 +598,18 @@ impl Debugger for DebuggerImplV2 { self.raft_statistics = s; } + fn key_range_flashback_to_version( + &self, + _version: u64, + _region_id: u64, + _start_key: &[u8], + _end_key: &[u8], + _start_ts: u64, + _commit_ts: u64, + ) -> impl Future> + Send { + async move { unimplemented!() } + } + fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { let mut props = vec![]; let start = &keys::data_key(start); diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 83d3932ead6..20543cf6736 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -22,6 +22,7 @@ fn error_to_status(e: Error) -> RpcStatus { Error::InvalidArgument(msg) => (RpcStatusCode::INVALID_ARGUMENT, msg), Error::Other(e) => (RpcStatusCode::UNKNOWN, format!("{:?}", e)), Error::EngineTrait(e) => (RpcStatusCode::UNKNOWN, format!("{:?}", e)), + Error::FlashbackFailed(msg) => (RpcStatusCode::UNKNOWN, msg), }; RpcStatus::with_message(code, msg) } @@ -54,8 +55,8 @@ where T: RaftExtension, D: Debugger, { - /// Constructs a new `Service` with `Engines`, a `RaftExtension` and a - /// `GcWorker`. + /// Constructs a new `Service` with `Engines`, a `RaftExtension`, a + /// `GcWorker` and a `RegionInfoAccessor`. pub fn new(debugger: D, pool: Handle, raft_router: T) -> Self { Service { pool, @@ -547,6 +548,34 @@ where self.debugger.reset_to_version(req.get_ts()); sink.success(ResetToVersionResponse::default()); } + + fn flashback_to_version( + &mut self, + ctx: RpcContext<'_>, + req: FlashbackToVersionRequest, + sink: UnarySink, + ) { + let debugger = self.debugger.clone(); + let f = self + .pool + .spawn(async move { + let check = debugger.key_range_flashback_to_version( + req.get_version(), + req.get_region_id(), + req.get_start_key(), + req.get_end_key(), + req.get_start_ts(), + req.get_commit_ts(), + ); + match check.await { + Ok(_) => Ok(FlashbackToVersionResponse::default()), + Err(err) => Err(err), + } + }) + .map(|res| res.unwrap()); + + self.handle_response(ctx, sink, f, "debug_flashback_to_version"); + } } mod region_size_response { diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 67d367dc351..0c319a73baa 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -423,7 +423,7 @@ impl Tikv for Service { let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); - let resp = future_prepare_flashback_to_version(&self.storage, req); + let resp = future_prepare_flashback_to_version(self.storage.clone(), req); let task = async move { let resp = resp.await?; let elapsed = begin_instant.saturating_elapsed(); @@ -454,7 +454,7 @@ impl Tikv for Service { let begin_instant = Instant::now(); let source = req.mut_context().take_request_source(); - let resp = future_flashback_to_version(&self.storage, req); + let resp = future_flashback_to_version(self.storage.clone(), req); let task = async move { let resp = resp.await?; let elapsed = begin_instant.saturating_elapsed(); @@ -1259,8 +1259,8 @@ fn handle_batch_commands_request( ResolveLock, future_resolve_lock(storage), kv_resolve_lock; Gc, future_gc(), kv_gc; DeleteRange, future_delete_range(storage), kv_delete_range; - PrepareFlashbackToVersion, future_prepare_flashback_to_version(storage), kv_prepare_flashback_to_version; - FlashbackToVersion, future_flashback_to_version(storage), kv_flashback_to_version; + PrepareFlashbackToVersion, future_prepare_flashback_to_version(storage.clone()), kv_prepare_flashback_to_version; + FlashbackToVersion, future_flashback_to_version(storage.clone()), kv_flashback_to_version; RawBatchGet, future_raw_batch_get(storage), raw_batch_get; RawPut, future_raw_put(storage), raw_put; RawBatchPut, future_raw_batch_put(storage), raw_batch_put; @@ -1578,65 +1578,59 @@ fn future_delete_range( // the actual flashback operation. // NOTICE: the caller needs to make sure the version we want to flashback won't // be between any transactions that have not been fully committed. -fn future_prepare_flashback_to_version( +pub async fn future_prepare_flashback_to_version( // Keep this param to hint the type of E for the compiler. - storage: &Storage, + storage: Storage, req: PrepareFlashbackToVersionRequest, -) -> impl Future> { - let storage = storage.clone(); - async move { - let f = storage - .get_engine() - .start_flashback(req.get_context(), req.get_start_ts()); - let mut res = f.await.map_err(storage::Error::from); +) -> ServerResult { + let f = storage + .get_engine() + .start_flashback(req.get_context(), req.get_start_ts()); + let mut res = f.await.map_err(storage::Error::from); + if matches!(res, Ok(())) { + // After the region is put into the flashback state, we need to do a special + // prewrite to prevent `resolved_ts` from advancing. + let (cb, f) = paired_future_callback(); + res = storage.sched_txn_command(req.clone().into(), cb); if matches!(res, Ok(())) { - // After the region is put into the flashback state, we need to do a special - // prewrite to prevent `resolved_ts` from advancing. - let (cb, f) = paired_future_callback(); - res = storage.sched_txn_command(req.clone().into(), cb); - if matches!(res, Ok(())) { - res = f.await.unwrap_or_else(|e| Err(box_err!(e))); - } - } - let mut resp = PrepareFlashbackToVersionResponse::default(); - if let Some(e) = extract_region_error(&res) { - resp.set_region_error(e); - } else if let Err(e) = res { - resp.set_error(format!("{}", e)); + res = f.await.unwrap_or_else(|e| Err(box_err!(e))); } - Ok(resp) } + let mut resp = PrepareFlashbackToVersionResponse::default(); + if let Some(e) = extract_region_error(&res) { + resp.set_region_error(e); + } else if let Err(e) = res { + resp.set_error(format!("{}", e)); + } + Ok(resp) } // Flashback the region to a specific point with the given `version`, please // make sure the region is "locked" by `PrepareFlashbackToVersion` first, // otherwise this request will fail. -fn future_flashback_to_version( - storage: &Storage, +pub async fn future_flashback_to_version( + storage: Storage, req: FlashbackToVersionRequest, -) -> impl Future> { - let storage = storage.clone(); - async move { - // Perform the data flashback transaction command. We will check if the region - // is in the flashback state when proposing the flashback modification. - let (cb, f) = paired_future_callback(); - let mut res = storage.sched_txn_command(req.clone().into(), cb); - if matches!(res, Ok(())) { - res = f.await.unwrap_or_else(|e| Err(box_err!(e))); - } - if matches!(res, Ok(())) { - // Only finish when flashback executed successfully. - let f = storage.get_engine().end_flashback(req.get_context()); - res = f.await.map_err(storage::Error::from); - } - let mut resp = FlashbackToVersionResponse::default(); - if let Some(err) = extract_region_error(&res) { - resp.set_region_error(err); - } else if let Err(e) = res { - resp.set_error(format!("{}", e)); - } - Ok(resp) - } +) -> ServerResult { + // Perform the data flashback transaction command. We will check if the region + // is in the flashback state when proposing the flashback modification. + let (cb, f) = paired_future_callback(); + let mut res = storage.sched_txn_command(req.clone().into(), cb); + if matches!(res, Ok(())) { + res = f.await.unwrap_or_else(|e| Err(box_err!(e))); + } + if matches!(res, Ok(())) { + // Only finish when flashback executed successfully. + let f = storage.get_engine().end_flashback(req.get_context()); + res = f.await.map_err(storage::Error::from); + } + let mut resp = FlashbackToVersionResponse::default(); + if let Some(err) = extract_region_error(&res) { + resp.set_region_error(err); + } else if let Err(e) = res { + resp.set_error(format!("{}", e)); + } + Ok(resp) } fn future_raw_get( diff --git a/src/server/service/mod.rs b/src/server/service/mod.rs index 1576e7db41c..793f4fd7906 100644 --- a/src/server/service/mod.rs +++ b/src/server/service/mod.rs @@ -9,8 +9,9 @@ pub use self::{ debug::Service as DebugService, diagnostics::Service as DiagnosticsService, kv::{ - batch_commands_request, batch_commands_response, GrpcRequestDuration, - MeasuredBatchResponse, MeasuredSingleResponse, Service as KvService, + batch_commands_request, batch_commands_response, future_flashback_to_version, + future_prepare_flashback_to_version, GrpcRequestDuration, MeasuredBatchResponse, + MeasuredSingleResponse, Service as KvService, }, }; From 73f9ef5758042f79f08768ea1f704c1cada18f7a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:51:39 +0800 Subject: [PATCH 0719/1149] raftstore-v2: actively handle ResCmdChannel after peer is destroyed (#14863) close tikv/tikv#14675, ref tikv/tikv#14675 actively handle ResCmdChannel after peer is destroyed. Signed-off-by: Spade A Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/operation/command/write/mod.rs | 3 + components/raftstore-v2/src/operation/life.rs | 10 +- components/tikv_util/src/store/peer.rs | 8 ++ src/storage/txn/scheduler.rs | 7 ++ tests/failpoints/cases/test_transaction.rs | 101 +++++++++++++++++- 5 files changed, 125 insertions(+), 4 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index a12d3e68f45..1ff1809be9d 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -4,6 +4,7 @@ use engine_traits::{ data_cf_offset, DeleteStrategy, KvEngine, Mutable, RaftEngine, Range as EngineRange, ALL_CFS, CF_DEFAULT, }; +use fail::fail_point; use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ @@ -132,6 +133,8 @@ impl Peer { }; let (data, chs) = encoder.encode(); let res = self.propose(ctx, data); + fail_point!("after_propose_pending_writes"); + self.post_propose_command(ctx, res, chs, call_proposed_on_success); } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 70ebbcea348..20756df8bac 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -37,7 +37,11 @@ use kvproto::{ raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage}, }; use raftstore::store::{ - fsm::life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, + fsm::{ + apply, + life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, + Proposal, + }, metrics::RAFT_PEER_PENDING_DURATION, util, Transport, WriteTask, }; @@ -777,6 +781,10 @@ impl Peer { let _ = ctx.router.send_raft_message(msg); } self.pending_reads_mut().clear_all(Some(region_id)); + for Proposal { cb, .. } in self.proposals_mut().queue_mut().drain(..) { + apply::notify_req_region_removed(region_id, cb); + } + self.clear_apply_scheduler(); } } diff --git a/components/tikv_util/src/store/peer.rs b/components/tikv_util/src/store/peer.rs index bbc96bb786f..4af3cbdc254 100644 --- a/components/tikv_util/src/store/peer.rs +++ b/components/tikv_util/src/store/peer.rs @@ -37,6 +37,14 @@ pub fn new_peer(store_id: u64, peer_id: u64) -> Peer { peer } +pub fn new_incoming_voter(store_id: u64, peer_id: u64) -> Peer { + let mut peer = Peer::default(); + peer.set_store_id(store_id); + peer.set_id(peer_id); + peer.set_role(PeerRole::IncomingVoter); + peer +} + pub fn new_learner_peer(store_id: u64, peer_id: u64) -> Peer { let mut peer = Peer::default(); peer.set_store_id(store_id); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 665e99fd6d4..e7173bc9fef 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1597,10 +1597,17 @@ impl TxnScheduler { } } } + // If it's not finished while the channel is closed, it means the write // is undeterministic. in this case, we don't know whether the // request is finished or not, so we should not release latch as // it may break correctness. + // However, not release latch will cause deadlock which may ultimately block all + // following txns, so we panic here. + panic!( + "response channel is unexpectedly dropped, tag {:?}, cid {}", + tag, cid + ); } /// Returns whether it succeeds to write pessimistic locks to the in-memory diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 0e40d412eaf..5ad15fa3202 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -3,7 +3,7 @@ use std::{ sync::{ mpsc::{channel, sync_channel}, - Arc, + Arc, Mutex, }, thread, time::Duration, @@ -18,6 +18,7 @@ use kvproto::{ }, tikvpb::TikvClient, }; +use raft::prelude::{ConfChangeType, MessageType}; use raftstore::store::LocksStatus; use storage::{ mvcc::{ @@ -26,7 +27,10 @@ use storage::{ }, txn::{self, commands}, }; -use test_raftstore::{configure_for_lease_read, new_server_cluster}; +use test_raftstore::{ + configure_for_lease_read, new_learner_peer, new_server_cluster, try_kv_prewrite, + DropMessageFilter, +}; use tikv::storage::{ self, kv::SnapshotExt, @@ -37,7 +41,10 @@ use tikv::storage::{ }, Snapshot, TestEngineBuilder, TestStorageBuilderApiV1, }; -use tikv_util::{store::new_peer, HandyRwLock}; +use tikv_util::{ + store::{new_peer, peer::new_incoming_voter}, + HandyRwLock, +}; use txn_types::{Key, Mutation, PessimisticLock, TimeStamp}; #[test] @@ -679,3 +686,91 @@ fn test_read_index_with_max_ts() { assert_eq!(resp.error.unwrap().locked.unwrap().lock_version, start_ts); fail::remove("on_apply_write_cmd"); } + +// This test mocks the situation described in the PR#14863 +#[test] +fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { + let (mut cluster, _, mut ctx) = test_raftstore_v2::must_new_cluster_mul(4); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + pd_client.add_peer(1, new_learner_peer(4, 4)); + + std::thread::sleep(Duration::from_millis(500)); + + pd_client.joint_confchange( + 1, + vec![ + (ConfChangeType::AddNode, new_peer(4, 4)), + (ConfChangeType::AddLearnerNode, new_learner_peer(1, 1)), + ], + ); + + std::thread::sleep(Duration::from_millis(500)); + + let leader = cluster.leader_of_region(1).unwrap(); + let epoch = cluster.get_region_epoch(1); + ctx.set_region_id(1); + ctx.set_peer(leader.clone()); + ctx.set_region_epoch(epoch); + + let env = Arc::new(Environment::new(1)); + let ch = ChannelBuilder::new(env) + .connect(&cluster.sim.read().unwrap().get_addr(leader.get_store_id())); + let client = TikvClient::new(ch); + + cluster.add_send_filter_on_node( + 1, + Box::new(DropMessageFilter::new(Arc::new(move |m| { + let msg_type = m.get_message().get_msg_type(); + let to_store = m.get_to_peer().get_store_id(); + !(msg_type == MessageType::MsgAppend && (to_store == 2 || to_store == 3)) + }))), + ); + + cluster.add_send_filter_on_node( + 4, + Box::new(DropMessageFilter::new(Arc::new(move |m| { + let msg_type = m.get_message().get_msg_type(); + let to_store = m.get_to_peer().get_store_id(); + !(msg_type == MessageType::MsgAppend && to_store == 1) + }))), + ); + + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + // ensure the cmd is proposed before transfer leader + fail::cfg_callback("after_propose_pending_writes", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + let handle = std::thread::spawn(move || { + let mut mutations = vec![]; + for key in vec![b"key3".to_vec(), b"key4".to_vec()] { + let mut mutation = kvproto::kvrpcpb::Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(key); + mutations.push(mutation); + } + let _ = try_kv_prewrite(&client, ctx, mutations, b"key3".to_vec(), 10); + }); + + rx.recv_timeout(std::time::Duration::from_secs(50)).unwrap(); + pd_client.transfer_leader(1, new_peer(4, 4), vec![]); + + pd_client.region_leader_must_be(1, new_incoming_voter(4, 4)); + pd_client.must_leave_joint(1); + + pd_client.must_joint_confchange( + 1, + vec![(ConfChangeType::RemoveNode, new_learner_peer(1, 1))], + ); + pd_client.must_leave_joint(1); + + cluster.clear_send_filter_on_node(1); + cluster.clear_send_filter_on_node(4); + + handle.join().unwrap(); +} From a3ed31519b42e43c4392d2b349a1b24815dea93b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 6 Jun 2023 11:59:41 +0800 Subject: [PATCH 0720/1149] tikv-ctl: implement bad regions for raftstore-v2 (#14836) ref tikv/tikv#14654 implement bad regions for raftstore-v2 Signed-off-by: Spade A --- cmd/tikv-ctl/src/executor.rs | 11 ++- components/raftstore-v2/src/lib.rs | 2 + src/server/debug.rs | 1 + src/server/debug2.rs | 154 ++++++++++++++++++++++++++++- 4 files changed, 164 insertions(+), 4 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index becb65069a7..79f7a6f2842 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1305,7 +1305,16 @@ impl DebugExecutor for DebuggerImplV2 { } fn print_bad_regions(&self) { - unimplemented!() + let bad_regions = self + .bad_regions() + .unwrap_or_else(|e| perror_and_exit("Debugger::bad_regions", e)); + if !bad_regions.is_empty() { + for (region_id, error) in bad_regions { + println!("{}: {}", region_id, error); + } + return; + } + println!("all regions are healthy") } fn remove_fail_stores( diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 697d953e5c8..5b5e132b9ce 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -47,3 +47,5 @@ pub use worker::{ pd::{PdReporter, Task as PdTask}, tablet::Task as TabletTask, }; + +pub use crate::raft::Storage; diff --git a/src/server/debug.rs b/src/server/debug.rs index a61f6d7b56a..54b1eccae30 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -2132,6 +2132,7 @@ mod tests { let kv_engine = &debugger.engines.kv; let raft_engine = &debugger.engines.raft; let store_id = 1; // It's a fake id. + debugger.set_store_id(store_id); let mut wb1 = raft_engine.write_batch(); let cf1 = CF_DEFAULT; diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 368f4eb1696..ac9ccdedf7c 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -18,8 +18,11 @@ use kvproto::{ raft_serverpb::{PeerState, RegionLocalState, StoreIdent}, }; use nom::AsBytes; -use raft::prelude::Entry; +use raft::{prelude::Entry, RawNode}; use raftstore::{coprocessor::get_region_approximate_middle, store::util::check_key_in_region}; +use raftstore_v2::Storage; +use slog::o; +use tikv_util::{config::ReadableSize, store::find_peer, worker::Worker}; use super::debug::{BottommostLevelCompaction, Debugger, RegionInfo}; use crate::{ @@ -213,6 +216,72 @@ impl DebuggerImplV2 { } } + pub fn bad_regions(&self) -> Result> { + let store_id = self.get_store_ident()?.get_store_id(); + let mut res = Vec::new(); + let fake_read_worker = Worker::new("fake-read-worker").lazy_build("fake-read-worker"); + + let logger = slog_global::borrow_global().new(o!()); + let check_region_state = |region_id: u64| -> Result<()> { + let region_state = + box_try!(self.raft_engine.get_region_state(region_id, u64::MAX)).unwrap(); + match region_state.get_state() { + PeerState::Tombstone | PeerState::Applying => return Ok(()), + _ => {} + } + + let region = region_state.get_region(); + let peer_id = find_peer(region, store_id) + .map(|peer| peer.get_id()) + .ok_or_else(|| { + Error::Other( + format!( + "RegionLocalState doesn't contains peer itself, {:?}", + region_state + ) + .into(), + ) + })?; + + let logger = logger.new(o!("region_id" => region_id, "peer_id" => peer_id)); + let storage = box_try!(Storage::::new( + region_id, + store_id, + self.raft_engine.clone(), + fake_read_worker.scheduler(), + &logger + )) + .unwrap(); + + let raft_cfg = raft::Config { + id: peer_id, + election_tick: 10, + heartbeat_tick: 2, + max_size_per_msg: ReadableSize::mb(1).0, + max_inflight_msgs: 256, + check_quorum: true, + skip_bcast_commit: true, + ..Default::default() + }; + + box_try!(RawNode::new(&raft_cfg, storage, &logger)); + Ok(()) + }; + + box_try!( + self.raft_engine + .for_each_raft_group::(&mut |region_id| { + if let Err(e) = check_region_state(region_id) { + res.push((region_id, e)); + } + + Ok(()) + }) + ); + + Ok(res) + } + /// Set regions to tombstone by manual, and apply other status(such as /// peers, version, and key range) from `region` which comes from PD /// normally. @@ -845,14 +914,15 @@ mod tests { use std::path::Path; use engine_traits::{ - RaftEngineReadOnly, RaftLogBatch, SyncMutable, CF_DEFAULT, CF_LOCK, CF_WRITE, + RaftEngineReadOnly, RaftLogBatch, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use kvproto::{ - metapb::{self, PeerRole}, + metapb::{self, Peer, PeerRole}, raft_serverpb::*, }; use raft::prelude::EntryType; use raft_log_engine::RaftLogEngine; + use raftstore::store::RAFT_INIT_LOG_INDEX; use super::*; use crate::{ @@ -1305,6 +1375,84 @@ mod tests { } } + #[test] + fn test_bad_regions() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let store_id = 1; + debugger.set_store_id(store_id); + + let mut lb = debugger.raft_engine.log_batch(30); + + let put_region_state = + |lb: &mut raft_log_engine::RaftLogBatch, region_id: u64, peers: &[u64]| { + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Normal); + let region = region_state.mut_region(); + region.set_id(region_id); + let peers = peers + .iter() + .enumerate() + .map(|(_, &sid)| Peer { + id: region_id, + store_id: sid, + ..Default::default() + }) + .collect::>(); + region.set_peers(peers.into()); + lb.put_region_state(region_id, 5, ®ion_state).unwrap(); + }; + + let put_apply_state = + |lb: &mut raft_log_engine::RaftLogBatch, region_id: u64, apply_index: u64| { + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(apply_index); + lb.put_apply_state(region_id, apply_index, &apply_state) + .unwrap(); + + for cf in ALL_CFS { + lb.put_flushed_index(region_id, cf, 5, apply_index).unwrap(); + } + }; + + let put_raft_state = |lb: &mut raft_log_engine::RaftLogBatch, + region_id: u64, + last_index: u64, + commit_index: u64| { + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(last_index); + raft_state.mut_hard_state().set_commit(commit_index); + lb.put_raft_state(region_id, &raft_state).unwrap(); + }; + + for ®ion_id in &[10, 11, 12] { + put_region_state(&mut lb, region_id, &[store_id]); + } + + // last index < commit index + put_raft_state(&mut lb, 10, 100, 110); + put_apply_state(&mut lb, 10, RAFT_INIT_LOG_INDEX); + + // commit index < last index < apply index, or commit index < apply index < last + // index. + put_raft_state(&mut lb, 11, 100, 90); + put_apply_state(&mut lb, 11, 110); + put_raft_state(&mut lb, 12, 100, 90); + put_apply_state(&mut lb, 12, 95); + + // region state doesn't contains the peer itself. + put_region_state(&mut lb, 13, &[]); + + debugger.raft_engine.consume(&mut lb, true).unwrap(); + + let mut bad_regions = debugger.bad_regions().unwrap(); + bad_regions.sort_by(|a, b| a.0.cmp(&b.0)); + assert_eq!(bad_regions.len(), 4); + for (i, (region_id, _)) in bad_regions.into_iter().enumerate() { + assert_eq!(region_id, (10 + i) as u64); + } + } + #[test] fn test_tombstone_regions() { let dir = test_util::temp_dir("test-debugger", false); From c486f7ce6364bd27c93d2236aeef56bc698e2f9e Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 6 Jun 2023 21:39:40 +0800 Subject: [PATCH 0721/1149] raftstore-v2: reduce raft log size (#14700) ref tikv/tikv#12842 Reduce raft log size of raftstore-v2 Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- Cargo.lock | 21 ++-- components/engine_panic/src/misc.rs | 15 +++ components/engine_panic/src/raft_engine.rs | 6 +- components/engine_rocks/src/misc.rs | 96 ++++++++++++++++++- components/engine_rocks/src/raft_engine.rs | 22 +---- components/engine_rocks/src/raw.rs | 8 +- components/engine_traits/src/misc.rs | 12 +++ components/engine_traits/src/raft_engine.rs | 15 ++- components/raft_log_engine/src/engine.rs | 13 +-- components/raftstore-v2/src/batch/store.rs | 35 ++++++- .../tests/integrations/cluster.rs | 8 +- components/raftstore/src/store/config.rs | 8 ++ src/config/mod.rs | 19 +++- src/server/gc_worker/gc_worker.rs | 22 +++-- tests/failpoints/cases/test_gc_metrics.rs | 10 +- tests/failpoints/cases/test_snap.rs | 2 +- tests/failpoints/cases/test_stale_peer.rs | 2 +- tests/integrations/config/mod.rs | 3 +- tests/integrations/config/test-custom.toml | 1 + tests/integrations/raftstore/test_snap.rs | 2 +- .../integrations/raftstore/test_tombstone.rs | 2 +- 21 files changed, 247 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9bfb57c5da4..4d74871168e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3061,7 +3061,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b72e0fa0f2e5a0016bb55c11756ee714445c0365" +source = "git+https://github.com/tikv/rust-rocksdb.git#9db52f6188f7052e87dae21f1f41772263aef3c6" dependencies = [ "bindgen 0.57.0", "bzip2-sys", @@ -3080,7 +3080,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#b72e0fa0f2e5a0016bb55c11756ee714445c0365" +source = "git+https://github.com/tikv/rust-rocksdb.git#9db52f6188f7052e87dae21f1f41772263aef3c6" dependencies = [ "bzip2-sys", "cc", @@ -3238,6 +3238,15 @@ dependencies = [ "libc 0.2.139", ] +[[package]] +name = "memmap2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d28bba84adfe6646737845bc5ebbfa2c08424eb1c37e94a1fd2a82adb56a872" +dependencies = [ + "libc 0.2.139", +] + [[package]] name = "memoffset" version = "0.6.4" @@ -4432,7 +4441,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#39f4db451295dbd8b30db4f94f220182c2c65be9" +source = "git+https://github.com/tikv/raft-engine.git#773b89fb24276995e5fd26a7e28550796966d9cd" dependencies = [ "byteorder", "crc32fast", @@ -4446,7 +4455,7 @@ dependencies = [ "libc 0.2.139", "log", "lz4-sys", - "memmap2", + "memmap2 0.6.2", "nix 0.26.2", "num-derive", "num-traits", @@ -5031,7 +5040,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b72e0fa0f2e5a0016bb55c11756ee714445c0365" +source = "git+https://github.com/tikv/rust-rocksdb.git#9db52f6188f7052e87dae21f1f41772263aef3c6" dependencies = [ "libc 0.2.139", "librocksdb_sys", @@ -5912,7 +5921,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac457d054f793cedfde6f32d21d692b8351cfec9084fefd0470c0373f6d799bc" dependencies = [ "debugid", - "memmap2", + "memmap2 0.5.3", "stable_deref_trait", "uuid 1.2.1", ] diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 027612d588e..531af76b3de 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -31,6 +31,14 @@ impl MiscExt for PanicEngine { panic!() } + fn flush_oldest_cf( + &self, + wait: bool, + age_threshold: Option, + ) -> Result<()> { + panic!() + } + fn delete_ranges_cf( &self, cf: &str, @@ -107,4 +115,11 @@ impl MiscExt for PanicEngine { fn is_stalled_or_stopped(&self) -> bool { panic!() } + + fn get_active_memtable_stats_cf( + &self, + cf: &str, + ) -> Result> { + panic!() + } } diff --git a/components/engine_panic/src/raft_engine.rs b/components/engine_panic/src/raft_engine.rs index c0539c1edd5..39d0e2a1d62 100644 --- a/components/engine_panic/src/raft_engine.rs +++ b/components/engine_panic/src/raft_engine.rs @@ -31,10 +31,6 @@ impl RaftEngineReadOnly for PanicEngine { panic!() } - fn get_all_entries_to(&self, region_id: u64, buf: &mut Vec) -> Result<()> { - panic!() - } - fn is_empty(&self) -> Result { panic!() } @@ -79,7 +75,7 @@ impl RaftEngineReadOnly for PanicEngine { impl RaftEngineDebug for PanicEngine { fn scan_entries(&self, _: u64, _: F) -> Result<()> where - F: FnMut(&Entry) -> Result, + F: FnMut(Entry) -> Result, { panic!() } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index c4b5fa4946e..5fae5a68e96 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -4,7 +4,7 @@ use engine_traits::{ CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, Range, RangeStats, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, }; -use rocksdb::Range as RocksRange; +use rocksdb::{FlushOptions, Range as RocksRange}; use tikv_util::{box_try, keybuilder::KeyBuilder}; use crate::{ @@ -138,14 +138,52 @@ impl MiscExt for RocksEngine { handles.push(util::get_cf_handle(self.as_inner(), cf)?); } } - self.as_inner() - .flush_cfs(&handles, wait, false) - .map_err(r2e) + let mut fopts = FlushOptions::default(); + fopts.set_wait(wait); + fopts.set_allow_write_stall(true); + self.as_inner().flush_cfs(&handles, &fopts).map_err(r2e) } fn flush_cf(&self, cf: &str, wait: bool) -> Result<()> { let handle = util::get_cf_handle(self.as_inner(), cf)?; - self.as_inner().flush_cf(handle, wait, false).map_err(r2e) + let mut fopts = FlushOptions::default(); + fopts.set_wait(wait); + fopts.set_allow_write_stall(true); + self.as_inner().flush_cf(handle, &fopts).map_err(r2e) + } + + // Don't flush if a memtable is just flushed within the threshold. + fn flush_oldest_cf( + &self, + wait: bool, + age_threshold: Option, + ) -> Result<()> { + let cfs = self.cf_names(); + let mut handles = Vec::with_capacity(cfs.len()); + for cf in cfs { + handles.push(util::get_cf_handle(self.as_inner(), cf)?); + } + if let Some((handle, time)) = handles + .into_iter() + .filter_map(|handle| { + self.as_inner() + .get_approximate_active_memtable_stats_cf(handle) + .map(|(_, time)| (handle, time)) + }) + .min_by(|(_, a), (_, b)| a.cmp(b)) + && age_threshold.map_or(true, |threshold| time <= threshold) + { + let mut fopts = FlushOptions::default(); + fopts.set_wait(wait); + fopts.set_allow_write_stall(true); + fopts.set_check_if_compaction_disabled(true); + fopts.set_expected_oldest_key_time(time); + return self + .as_inner() + .flush_cf(handle, &fopts) + .map_err(r2e); + } + Ok(()) } fn delete_ranges_cf( @@ -372,6 +410,16 @@ impl MiscExt for RocksEngine { .unwrap_or_default() != 0 } + + fn get_active_memtable_stats_cf( + &self, + cf: &str, + ) -> Result> { + let handle = util::get_cf_handle(self.as_inner(), cf)?; + Ok(self + .as_inner() + .get_approximate_active_memtable_stats_cf(handle)) + } } #[cfg(test)] @@ -692,4 +740,42 @@ mod tests { let expected = vec![(b"k1".to_vec(), b"k8".to_vec())]; assert_eq!(sst_range, expected); } + + #[test] + fn test_flush_oldest() { + let path = Builder::new() + .prefix("test_flush_oldest") + .tempdir() + .unwrap(); + let path_str = path.path().to_str().unwrap(); + + let mut opts = RocksDbOptions::default(); + opts.create_if_missing(true); + + let db = new_engine(path_str, ALL_CFS).unwrap(); + db.put_cf("default", b"k", b"v").unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + db.put_cf("write", b"k", b"v").unwrap(); + db.put_cf("lock", b"k", b"v").unwrap(); + assert_eq!( + db.get_total_sst_files_size_cf("default").unwrap().unwrap(), + 0 + ); + assert_eq!(db.get_total_sst_files_size_cf("write").unwrap().unwrap(), 0); + assert_eq!(db.get_total_sst_files_size_cf("lock").unwrap().unwrap(), 0); + let now = std::time::SystemTime::now(); + db.flush_oldest_cf(true, Some(now - std::time::Duration::from_secs(5))) + .unwrap(); + assert_eq!( + db.get_total_sst_files_size_cf("default").unwrap().unwrap(), + 0 + ); + assert_eq!(db.get_total_sst_files_size_cf("write").unwrap().unwrap(), 0); + assert_eq!(db.get_total_sst_files_size_cf("lock").unwrap().unwrap(), 0); + db.flush_oldest_cf(true, Some(now - std::time::Duration::from_secs(1))) + .unwrap(); + assert_eq!(db.get_total_sst_files_size_cf("write").unwrap().unwrap(), 0); + assert_eq!(db.get_total_sst_files_size_cf("lock").unwrap().unwrap(), 0); + assert!(db.get_total_sst_files_size_cf("default").unwrap().unwrap() > 0); + } } diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index a0a5acd5dd8..c11186ef443 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -108,24 +108,6 @@ impl RaftEngineReadOnly for RocksEngine { Err(Error::EntriesUnavailable) } - fn get_all_entries_to(&self, region_id: u64, buf: &mut Vec) -> Result<()> { - let start_key = keys::raft_log_key(region_id, 0); - let end_key = keys::raft_log_key(region_id, u64::MAX); - self.scan( - CF_DEFAULT, - &start_key, - &end_key, - false, // fill_cache - |_, value| { - let mut entry = Entry::default(); - entry.merge_from_bytes(value)?; - buf.push(entry); - Ok(true) - }, - )?; - Ok(()) - } - fn is_empty(&self) -> Result { let mut is_empty = true; self.scan(CF_DEFAULT, b"", b"", false, |_, _| { @@ -178,7 +160,7 @@ impl RaftEngineReadOnly for RocksEngine { impl RaftEngineDebug for RocksEngine { fn scan_entries(&self, raft_group_id: u64, mut f: F) -> Result<()> where - F: FnMut(&Entry) -> Result, + F: FnMut(Entry) -> Result, { let start_key = keys::raft_log_key(raft_group_id, 0); let end_key = keys::raft_log_key(raft_group_id, u64::MAX); @@ -190,7 +172,7 @@ impl RaftEngineDebug for RocksEngine { |_, value| { let mut entry = Entry::default(); entry.merge_from_bytes(value)?; - f(&entry) + f(entry) }, ) } diff --git a/components/engine_rocks/src/raw.rs b/components/engine_rocks/src/raw.rs index 474137534f8..f2c6d862280 100644 --- a/components/engine_rocks/src/raw.rs +++ b/components/engine_rocks/src/raw.rs @@ -12,8 +12,8 @@ pub use rocksdb::{ CompactionFilterValueType, CompactionJobInfo, CompactionOptions, CompactionPriority, ConcurrentTaskLimiter, DBBottommostLevelCompaction, DBCompactionFilter, DBCompactionStyle, DBCompressionType, DBEntryType, DBRateLimiterMode, DBRecoveryMode, DBStatisticsTickerType, - DBTableFileCreationReason, DBTitanDBBlobRunMode, Env, EventListener, IngestExternalFileOptions, - LRUCacheOptions, MemoryAllocator, PerfContext, PrepopulateBlockCache, Range, RateLimiter, - SliceTransform, Statistics, TablePropertiesCollector, TablePropertiesCollectorFactory, - WriteBufferManager, + DBTableFileCreationReason, DBTitanDBBlobRunMode, Env, EventListener, FlushOptions, + IngestExternalFileOptions, LRUCacheOptions, MemoryAllocator, PerfContext, + PrepopulateBlockCache, Range, RateLimiter, SliceTransform, Statistics, + TablePropertiesCollector, TablePropertiesCollectorFactory, WriteBufferManager, }; diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 1a05a5de374..a537d1c5d2a 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -74,6 +74,12 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn flush_cf(&self, cf: &str, wait: bool) -> Result<()>; + fn flush_oldest_cf( + &self, + wait: bool, + age_threshold: Option, + ) -> Result<()>; + fn delete_ranges_cfs(&self, strategy: DeleteStrategy, ranges: &[Range<'_>]) -> Result<()> { for cf in self.cf_names() { self.delete_ranges_cf(cf, strategy.clone(), ranges)?; @@ -134,4 +140,10 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn get_range_stats(&self, cf: &str, start: &[u8], end: &[u8]) -> Result>; fn is_stalled_or_stopped(&self) -> bool; + + /// Returns size and age of active memtable if there's one. + fn get_active_memtable_stats_cf( + &self, + cf: &str, + ) -> Result>; } diff --git a/components/engine_traits/src/raft_engine.rs b/components/engine_traits/src/raft_engine.rs index 671fed8b3cf..01b8fca875b 100644 --- a/components/engine_traits/src/raft_engine.rs +++ b/components/engine_traits/src/raft_engine.rs @@ -47,23 +47,28 @@ pub trait RaftEngineReadOnly: Sync + Send + 'static { max_size: Option, to: &mut Vec, ) -> Result; - - /// Get all available entries in the region. - fn get_all_entries_to(&self, region_id: u64, buf: &mut Vec) -> Result<()>; } pub trait RaftEngineDebug: RaftEngine + Sync + Send + 'static { /// Scan all log entries of given raft group in order. fn scan_entries(&self, raft_group_id: u64, f: F) -> Result<()> where - F: FnMut(&Entry) -> Result; + F: FnMut(Entry) -> Result; + + /// Get all available entries in the region. + fn get_all_entries_to(&self, raft_group_id: u64, buf: &mut Vec) -> Result<()> { + self.scan_entries(raft_group_id, |e| { + buf.push(e); + Ok(true) + }) + } /// Put all data of given raft group into a log batch. fn dump_all_data(&self, region_id: u64) -> ::LogBatch { let mut batch = self.log_batch(0); let mut entries = Vec::new(); self.scan_entries(region_id, |e| { - entries.push(e.clone()); + entries.push(e); Ok(true) }) .unwrap(); diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 512da0b79a4..418684715f9 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -521,15 +521,6 @@ impl RaftEngineReadOnly for RaftLogEngine { .map_err(transfer_error) } - fn get_all_entries_to(&self, raft_group_id: u64, buf: &mut Vec) -> Result<()> { - if let Some(first) = self.0.first_index(raft_group_id) { - let last = self.0.last_index(raft_group_id).unwrap(); - buf.reserve((last - first + 1) as usize); - self.fetch_entries_to(raft_group_id, first, last + 1, None, buf)?; - } - Ok(()) - } - fn is_empty(&self) -> Result { self.get_store_ident().map(|i| i.is_none()) } @@ -629,12 +620,12 @@ impl RaftEngineReadOnly for RaftLogEngine { impl RaftEngineDebug for RaftLogEngine { fn scan_entries(&self, raft_group_id: u64, mut f: F) -> Result<()> where - F: FnMut(&Entry) -> Result, + F: FnMut(Entry) -> Result, { if let Some(first_index) = self.first_index(raft_group_id) { for idx in first_index..=self.last_index(raft_group_id).unwrap() { if let Some(entry) = self.get_entry(raft_group_id, idx)? { - if !f(&entry)? { + if !f(entry)? { break; } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 3fc312cdffc..38342e8eea7 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -44,7 +44,7 @@ use tikv_util::{ config::{Tracker, VersionTrack}, log::SlogFormat, sys::SysQuota, - time::{duration_to_sec, Instant as TiInstant}, + time::{duration_to_sec, Instant as TiInstant, Limiter}, timer::SteadyTimer, worker::{Builder, LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, @@ -63,6 +63,9 @@ use crate::{ Error, Result, }; +const MIN_MANUAL_FLUSH_RATE: f64 = 0.3; +const MAX_MANUAL_FLUSH_PERIOD: Duration = Duration::from_secs(60); + /// A per-thread context shared by the [`StoreFsm`] and multiple [`PeerFsm`]s. pub struct StoreContext { /// A logger without any KV. It's clean for creating new PeerFSM. @@ -617,12 +620,40 @@ impl StoreSystem { let raft_clone = raft_engine.clone(); let logger = self.logger.clone(); let router = router.clone(); + let registry = tablet_registry.clone(); + let limiter = Limiter::new(MIN_MANUAL_FLUSH_RATE); + let mut max_rate = cfg.value().max_manual_flush_rate; + if max_rate < MIN_MANUAL_FLUSH_RATE { + max_rate = MIN_MANUAL_FLUSH_RATE; + } worker.spawn_interval_task(cfg.value().raft_engine_purge_interval.0, move || { let _guard = WithIoType::new(IoType::RewriteLog); match raft_clone.manual_purge() { - Ok(regions) => { + Ok(mut regions) => { + if regions.is_empty() { + return; + } + warn!(logger, "flushing oldest cf of regions {regions:?}"); + // Try to finish flush in 1m. + let rate = regions.len() as f64 / MAX_MANUAL_FLUSH_PERIOD.as_secs_f64(); + let rate = rate.clamp(MIN_MANUAL_FLUSH_RATE, max_rate); + limiter.set_speed_limit(rate); + // Return early if there're too many regions. + regions.truncate((rate * MAX_MANUAL_FLUSH_PERIOD.as_secs_f64()) as usize); + // Skip tablets that are flushed elsewhere. + let threshold = std::time::SystemTime::now() - Duration::from_secs(60 * 2); for r in regions { let _ = router.send(r, PeerMsg::ForceCompactLog); + if let Some(mut t) = registry.get(r) + && let Some(t) = t.latest() + { + if let Err(e) = t.flush_oldest_cf(true, Some(threshold)) { + warn!(logger, "failed to flush oldest cf"; "err" => %e); + } + } else { + continue; + } + std::thread::sleep(limiter.consume_duration(1)); } } Err(e) => { diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 5a52c0809db..0849a4cf96f 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -920,7 +920,7 @@ pub mod merge_helper { pub mod life_helper { use std::assert_matches::assert_matches; - use engine_traits::RaftEngine; + use engine_traits::RaftEngineDebug; use kvproto::raft_serverpb::{ExtraMessageType, PeerState}; use super::*; @@ -951,7 +951,11 @@ pub mod life_helper { // TODO: make raft engine support more suitable way to verify range is empty. /// Verify all states in raft engine are cleared. - pub fn assert_tombstone(raft_engine: &impl RaftEngine, region_id: u64, peer: &metapb::Peer) { + pub fn assert_tombstone( + raft_engine: &impl RaftEngineDebug, + region_id: u64, + peer: &metapb::Peer, + ) { let mut buf = vec![]; raft_engine.get_all_entries_to(region_id, &mut buf).unwrap(); assert!(buf.is_empty(), "{:?}", buf); diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 087119c87f1..b362867851a 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -94,6 +94,10 @@ pub struct Config { pub raft_log_reserve_max_ticks: usize, // Old logs in Raft engine needs to be purged peridically. pub raft_engine_purge_interval: ReadableDuration, + // TODO: make it auto adjusted based on background flush rate. + #[doc(hidden)] + #[online_config(hidden)] + pub max_manual_flush_rate: f64, // When a peer is not responding for this time, leader will not keep entry cache for it. pub raft_entry_cache_life_time: ReadableDuration, // Deprecated! The configuration has no effect. @@ -392,6 +396,7 @@ impl Default for Config { raft_log_gc_size_limit: None, raft_log_reserve_max_ticks: 6, raft_engine_purge_interval: ReadableDuration::secs(10), + max_manual_flush_rate: 1.0, raft_entry_cache_life_time: ReadableDuration::secs(30), raft_reject_transfer_leader_duration: ReadableDuration::secs(3), split_region_check_tick_interval: ReadableDuration::secs(10), @@ -905,6 +910,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["raft_engine_purge_interval"]) .set(self.raft_engine_purge_interval.as_secs_f64()); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["max_manual_flush_rate"]) + .set(self.max_manual_flush_rate); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["raft_entry_cache_life_time"]) .set(self.raft_entry_cache_life_time.as_secs_f64()); diff --git a/src/config/mod.rs b/src/config/mod.rs index 8ee16eef90d..61a4a7f05e4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1304,7 +1304,7 @@ impl Default for DbConfig { allow_concurrent_memtable_write: None, write_buffer_limit: None, write_buffer_stall_ratio: 0.0, - write_buffer_flush_oldest_first: false, + write_buffer_flush_oldest_first: true, paranoid_checks: None, defaultcf: DefaultCfConfig::default(), writecf: WriteCfConfig::default(), @@ -3392,6 +3392,23 @@ impl TikvConfig { if !self.raft_engine.enable { return Err("partitioned-raft-kv only supports raft log engine.".into()); } + let recovery_threads = cmp::min((SysQuota::cpu_cores_quota() * 1.5) as usize, 16); + if self.raft_engine.config.recovery_threads < recovery_threads { + info!( + "raft-engine.recovery-threads is too small. Set it to {} instead.", + recovery_threads, + ); + self.raft_engine.config.recovery_threads = recovery_threads; + } + // Filled in DbOptions::optimize_for. + let write_buffer_limit = self.rocksdb.write_buffer_limit.unwrap(); + if self.raft_engine.config.purge_threshold.0 < write_buffer_limit.0 * 2 { + self.raft_engine.config.purge_threshold.0 = write_buffer_limit.0 * 2; + info!( + "raft-engine.purge-threshold is too small. Set it to {} instead.", + self.raft_engine.config.purge_threshold, + ); + } if self.rocksdb.titan.enabled { return Err("partitioned-raft-kv doesn't support titan.".into()); } diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index f402148fa95..08232189552 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -1479,7 +1479,7 @@ mod tests { }; use api_version::{ApiV2, KvFormat, RawValue}; - use engine_rocks::{util::get_cf_handle, RocksEngine}; + use engine_rocks::{raw::FlushOptions, util::get_cf_handle, RocksEngine}; use engine_traits::Peekable as _; use futures::executor::block_on; use kvproto::{kvrpcpb::ApiVersion, metapb::Peer}; @@ -1855,7 +1855,9 @@ mod tests { must_prewrite_delete(&mut prefixed_engine, &k, &k, 151); must_commit(&mut prefixed_engine, &k, 151, 152); } - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); db.compact_range_cf(cf, None, None); for i in 0..100 { @@ -1930,7 +1932,9 @@ mod tests { must_commit(&mut prefixed_engine, &k, 151, 152); keys.push(Key::from_raw(&k)); } - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.seek, 0); assert_eq!(runner.mut_stats(GcKeyMode::txn).write.next, 0); @@ -2088,7 +2092,9 @@ mod tests { for i in 10u64..30 { must_rollback(&mut prefixed_engine, b"k2\x00", i, true); } - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); must_gc(&mut prefixed_engine, b"k2\x00", 30); // Test tombstone counter works @@ -2147,7 +2153,9 @@ mod tests { must_prewrite_put(&mut prefixed_engine, b"k2", b"v2", b"k2", start_ts); must_commit(&mut prefixed_engine, b"k2", start_ts, commit_ts); } - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); let safepoint = versions as u64 * 2; runner @@ -2180,7 +2188,9 @@ mod tests { must_commit(&mut engine, b"key", 10, 20); let db = engine.kv_engine().unwrap().as_inner().clone(); let cf = get_cf_handle(&db, CF_WRITE).unwrap(); - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); let gate = FeatureGate::default(); gate.set_version("5.0.0").unwrap(); diff --git a/tests/failpoints/cases/test_gc_metrics.rs b/tests/failpoints/cases/test_gc_metrics.rs index 2a25285f54a..486cedcbd95 100644 --- a/tests/failpoints/cases/test_gc_metrics.rs +++ b/tests/failpoints/cases/test_gc_metrics.rs @@ -7,7 +7,7 @@ use std::{ }; use api_version::{ApiV2, KvFormat, RawValue}; -use engine_rocks::{util::get_cf_handle, RocksEngine}; +use engine_rocks::{raw::FlushOptions, util::get_cf_handle, RocksEngine}; use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::*, @@ -176,7 +176,9 @@ fn test_txn_gc_keys_handled() { must_commit(&mut prefixed_engine, &k, 151, 152); } - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); db.compact_range_cf(cf, None, None); @@ -344,7 +346,9 @@ fn test_raw_gc_keys_handled() { engine.write(&ctx, batch).unwrap(); let cf = get_cf_handle(&db, CF_DEFAULT).unwrap(); - db.flush_cf(cf, true, false).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + db.flush_cf(cf, &fopts).unwrap(); db.compact_range_cf(cf, None, None); diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index a090ba8530c..faf0e6b2476 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -11,7 +11,7 @@ use std::{ time::Duration, }; -use engine_traits::RaftEngineReadOnly; +use engine_traits::{RaftEngineDebug, RaftEngineReadOnly}; use kvproto::raft_serverpb::RaftMessage; use raft::eraftpb::MessageType; use test_raftstore::*; diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index b171cebd173..39fa09ef014 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -6,7 +6,7 @@ use std::{ time::Duration, }; -use engine_traits::RaftEngineReadOnly; +use engine_traits::{RaftEngineDebug, RaftEngineReadOnly}; use futures::executor::block_on; use kvproto::raft_serverpb::{PeerState, RaftLocalState, RaftMessage}; use pd_client::PdClient; diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 669657e03e3..a05d6f2456c 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -188,6 +188,7 @@ fn test_serde_custom_tikv_config() { raft_log_gc_size_limit: Some(ReadableSize::kb(1)), raft_log_reserve_max_ticks: 100, raft_engine_purge_interval: ReadableDuration::minutes(20), + max_manual_flush_rate: 5.0, raft_entry_cache_life_time: ReadableDuration::secs(12), raft_reject_transfer_leader_duration: ReadableDuration::secs(3), split_region_check_tick_interval: ReadableDuration::secs(12), @@ -325,7 +326,7 @@ fn test_serde_custom_tikv_config() { enable_unordered_write: true, write_buffer_limit: Some(ReadableSize::gb(1)), write_buffer_stall_ratio: 0.0, - write_buffer_flush_oldest_first: false, + write_buffer_flush_oldest_first: true, defaultcf: DefaultCfConfig { block_size: ReadableSize::kb(12), block_cache_size: ReadableSize::gb(12), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index c08d91ad888..3f6c920071f 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -159,6 +159,7 @@ raft-log-gc-count-limit = 12 raft-log-gc-size-limit = "1KB" raft-log-reserve-max-ticks = 100 raft-engine-purge-interval = "20m" +max-manual-flush-rate = 5.0 raft-entry-cache-life-time = "12s" split-region-check-tick-interval = "12s" region-split-check-diff = "20MB" diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 465c54d9a77..fbe2a1d9cb2 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -12,7 +12,7 @@ use std::{ }; use collections::HashMap; -use engine_traits::{Checkpointer, KvEngine, RaftEngineReadOnly}; +use engine_traits::{Checkpointer, KvEngine, RaftEngineDebug}; use file_system::{IoOp, IoType}; use futures::executor::block_on; use grpcio::{self, ChannelBuilder, Environment}; diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index 972a75212b4..c1cd0befcf1 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -3,7 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use crossbeam::channel; -use engine_traits::{CfNamesExt, Iterable, Peekable, RaftEngineReadOnly, SyncMutable, CF_RAFT}; +use engine_traits::{CfNamesExt, Iterable, Peekable, RaftEngineDebug, SyncMutable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState, StoreIdent}; use protobuf::Message; use raft::eraftpb::MessageType; From ae9094e4364af60bb6c231a16341c654d13b8f08 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 6 Jun 2023 21:53:41 +0800 Subject: [PATCH 0722/1149] tikv-ctl: recognize engine type by data dir (#14881) ref tikv/tikv#14654 recognize engine type by data dir Signed-off-by: Spade A Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/executor.rs | 32 ++++++++++++++++++++++++++------ cmd/tikv-ctl/src/main.rs | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 79f7a6f2842..c40c43bd397 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -12,6 +12,7 @@ use engine_traits::{ Engines, Error as EngineError, RaftEngine, TabletRegistry, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, }; +use file_system::read_dir; use futures::{executor::block_on, future, stream, Stream, StreamExt, TryStreamExt}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -56,6 +57,27 @@ pub const LOCK_FILE_ERROR: &str = "IO error: While lock file"; type MvccInfoStream = Pin, MvccInfo), String>>>>; +fn get_engine_type(dir: &str) -> EngineType { + let mut entries = read_dir(dir).unwrap(); + let mut engine1 = false; + let mut engine2 = false; + while let Some(Ok(e)) = entries.next() { + if let Ok(ty) = e.file_type() && ty.is_dir() { + if e.file_name() == "tablets" { + engine2 = true; + } else if e.file_name() == "db" { + engine1 = true; + } + } + } + assert_ne!(engine1, engine2); + if engine1 { + EngineType::RaftKv + } else { + EngineType::RaftKv2 + } +} + pub fn new_debug_executor( cfg: &TikvConfig, data_dir: Option<&str>, @@ -68,15 +90,13 @@ pub fn new_debug_executor( // TODO: perhaps we should allow user skip specifying data path. let data_dir = data_dir.unwrap(); + let engine_type = get_engine_type(data_dir); let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .unwrap() .map(Arc::new); - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + let cache = cfg.storage.block_cache.build_shared_cache(engine_type); let env = cfg .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); @@ -87,7 +107,7 @@ pub fn new_debug_executor( let cfg_controller = ConfigController::default(); if !cfg.raft_engine.enable { - assert_eq!(EngineType::RaftKv, cfg.storage.engine); + assert_eq!(EngineType::RaftKv, engine_type); let raft_db_opts = cfg.raftdb.build_opt(env, None); let raft_db_cf_opts = cfg.raftdb.build_cf_opts(factory.block_cache()); let raft_path = cfg.infer_raft_db_path(Some(data_dir)).unwrap(); @@ -117,7 +137,7 @@ pub fn new_debug_executor( tikv_util::logger::exit_process_gracefully(-1); } let raft_db = RaftLogEngine::new(config, key_manager, None /* io_rate_limiter */).unwrap(); - match cfg.storage.engine { + match engine_type { EngineType::RaftKv => { let kv_db = match factory.create_shared_db(data_dir) { Ok(db) => db, diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 613d6168f7f..ac31f0700f9 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1,6 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. #![feature(once_cell)] +#![feature(let_chains)] #[macro_use] extern crate log; From a7e41e05e7ff339003c1974c269aee75d16d6f90 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Jun 2023 17:49:42 +0800 Subject: [PATCH 0723/1149] rocksdb: support setting `ttl` and `periodic_compaction_seconds`. (#14880) close tikv/tikv#14873 Expose the configuration settings on CF.ttl and CF.periodic_compaction_seconds. Signed-off-by: lucasliang --- etc/config-template.toml | 13 ++++++++++++ src/config/mod.rs | 36 ++++++++++++++++++++++++++++++++ tests/integrations/config/mod.rs | 10 +++++++++ 3 files changed, 59 insertions(+) diff --git a/etc/config-template.toml b/etc/config-template.toml index 255ec3eea4c..c91fd0646d2 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -881,6 +881,19 @@ ## The maximum number of concurrent compaction tasks. 0 stands for no limit. # max-compactions = 0 +## SST files containing updates older than TTL will go through the compaction +## process. This usually happens in a cascading way so that those entries +## will be compacted to bottommost level/file. +## +## Default: 30 days. +# ttl = "30d" + +## SST files older than this value will be picked up for compaction, and +## re-written to the same level as they were before. +## +## Default: 30 days. +# periodic-compaction-seconds = "30d" + ## Options for "Default" Column Family for `Titan`. [rocksdb.defaultcf.titan] ## The smallest value to store in blob files. Value smaller than diff --git a/src/config/mod.rs b/src/config/mod.rs index 61a4a7f05e4..7ac76bdd6d2 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -380,6 +380,14 @@ macro_rules! cf_config { pub checksum: ChecksumType, #[online_config(skip)] pub max_compactions: u32, + // `ttl == None` means using default setting in Rocksdb. + // `ttl` in Rocksdb is 30 days as default. + #[online_config(skip)] + pub ttl: Option, + // `periodic_compaction_seconds == None` means using default setting in Rocksdb. + // `periodic_compaction_seconds` in Rocksdb is 30 days as default. + #[online_config(skip)] + pub periodic_compaction_seconds: Option, #[online_config(submodule)] pub titan: TitanCfConfig, } @@ -648,6 +656,12 @@ macro_rules! build_cf_opt { if let Some(r) = $compaction_limiter { cf_opts.set_compaction_thread_limiter(r); } + if let Some(ttl) = $opt.ttl { + cf_opts.set_ttl(ttl.0.as_secs()); + } + if let Some(secs) = $opt.periodic_compaction_seconds { + cf_opts.set_periodic_compaction_seconds(secs.0.as_secs()); + } cf_opts }}; } @@ -718,6 +732,8 @@ impl Default for DefaultCfConfig { format_version: 2, checksum: ChecksumType::CRC32c, max_compactions: 0, + ttl: None, + periodic_compaction_seconds: None, titan: TitanCfConfig::default(), } } @@ -884,6 +900,8 @@ impl Default for WriteCfConfig { format_version: 2, checksum: ChecksumType::CRC32c, max_compactions: 0, + ttl: None, + periodic_compaction_seconds: None, titan, } } @@ -1004,6 +1022,8 @@ impl Default for LockCfConfig { format_version: 2, checksum: ChecksumType::CRC32c, max_compactions: 0, + ttl: None, + periodic_compaction_seconds: None, titan, } } @@ -1099,6 +1119,8 @@ impl Default for RaftCfConfig { format_version: 2, checksum: ChecksumType::CRC32c, max_compactions: 0, + ttl: None, + periodic_compaction_seconds: None, titan, } } @@ -1621,6 +1643,8 @@ impl Default for RaftDefaultCfConfig { format_version: 2, checksum: ChecksumType::CRC32c, max_compactions: 0, + ttl: None, + periodic_compaction_seconds: None, titan: TitanCfConfig::default(), } } @@ -5888,6 +5912,18 @@ mod tests { cfg.rocksdb.lockcf.ribbon_filter_above_level = None; cfg.rocksdb.raftcf.ribbon_filter_above_level = None; cfg.raftdb.defaultcf.ribbon_filter_above_level = None; + // ColumnFamily::ttl + cfg.rocksdb.defaultcf.ttl = None; + cfg.rocksdb.writecf.ttl = None; + cfg.rocksdb.lockcf.ttl = None; + cfg.rocksdb.raftcf.ttl = None; + cfg.raftdb.defaultcf.ttl = None; + // ColumnFamily::periodic_compaction_seconds + cfg.rocksdb.defaultcf.periodic_compaction_seconds = None; + cfg.rocksdb.writecf.periodic_compaction_seconds = None; + cfg.rocksdb.lockcf.periodic_compaction_seconds = None; + cfg.rocksdb.raftcf.periodic_compaction_seconds = None; + cfg.raftdb.defaultcf.periodic_compaction_seconds = None; cfg.coprocessor .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index a05d6f2456c..46e179a219c 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -383,6 +383,8 @@ fn test_serde_custom_tikv_config() { format_version: 5, checksum: ChecksumType::XXH3, max_compactions: 3, + ttl: None, + periodic_compaction_seconds: None, }, writecf: WriteCfConfig { block_size: ReadableSize::kb(12), @@ -454,6 +456,8 @@ fn test_serde_custom_tikv_config() { format_version: 5, checksum: ChecksumType::XXH3, max_compactions: 3, + ttl: None, + periodic_compaction_seconds: None, }, lockcf: LockCfConfig { block_size: ReadableSize::kb(12), @@ -525,6 +529,8 @@ fn test_serde_custom_tikv_config() { format_version: 5, checksum: ChecksumType::XXH3, max_compactions: 3, + ttl: None, + periodic_compaction_seconds: None, }, raftcf: RaftCfConfig { block_size: ReadableSize::kb(12), @@ -596,6 +602,8 @@ fn test_serde_custom_tikv_config() { format_version: 5, checksum: ChecksumType::XXH3, max_compactions: 3, + ttl: None, + periodic_compaction_seconds: None, }, titan: titan_db_config.clone(), }; @@ -682,6 +690,8 @@ fn test_serde_custom_tikv_config() { format_version: 5, checksum: ChecksumType::XXH3, max_compactions: 3, + ttl: None, + periodic_compaction_seconds: None, }, titan: titan_db_config, }; From 05bf36b3cf24b04cbf0bec44542ecaa94448f33d Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 7 Jun 2023 18:01:41 +0800 Subject: [PATCH 0724/1149] *: fix some typos (#14865) ref tikv/tikv#5456 Fix some typos. Signed-off-by: JmPotato Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- CHANGELOG.md | 2 +- components/raftstore/src/store/metrics.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 6 +++--- src/storage/mvcc/txn.rs | 2 +- tests/failpoints/cases/test_disk_full.rs | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26fd52f2bd5..3092de110a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -696,7 +696,7 @@ See also [TiDB Release Notes](https://github.com/pingcap/docs/blob/master/releas + Support batch-split command and empty batch command [#5470](https://github.com/tikv/tikv/pull/5470) + Fix `PointGetter` performance issue when there are concurrent write [#5495](https://github.com/tikv/tikv/pull/5495) + Fix the output on short version flag [#5501](https://github.com/tikv/tikv/pull/5501) -+ Support the pessmistic transaction API: txn-heart-beat [#5507](https://github.com/tikv/tikv/pull/5507) ++ Support the pessimistic transaction API: txn-heart-beat [#5507](https://github.com/tikv/tikv/pull/5507) + `titan` GC and monitoring improvement [#5517](https://github.com/tikv/tikv/pull/5517) + Update `grpcio` to v0.4.5 [#5523](https://github.com/tikv/tikv/pull/5523) + Support GRPC memory quota [#5524](https://github.com/tikv/tikv/pull/5524) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index eb6002c9a6f..796b49f5b49 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -769,7 +769,7 @@ lazy_static! { "Total number of pending write tasks from io rescheduling peers" ).unwrap(); - pub static ref STORE_INSPECT_DURTION_HISTOGRAM: HistogramVec = + pub static ref STORE_INSPECT_DURATION_HISTOGRAM: HistogramVec = register_histogram_vec!( "tikv_raftstore_inspect_duration_seconds", "Bucketed histogram of inspect duration.", diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 656f42f3f44..413389d8cf6 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -2244,17 +2244,17 @@ where Box::new(move |id, duration| { let dur = duration.sum(); - STORE_INSPECT_DURTION_HISTOGRAM + STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_process"]) .observe(tikv_util::time::duration_to_sec( duration.store_process_duration.unwrap(), )); - STORE_INSPECT_DURTION_HISTOGRAM + STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_wait"]) .observe(tikv_util::time::duration_to_sec( duration.store_wait_duration.unwrap(), )); - STORE_INSPECT_DURTION_HISTOGRAM + STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["all"]) .observe(tikv_util::time::duration_to_sec(dur)); if let Err(e) = scheduler.schedule(Task::UpdateSlowScore { id, duration }) { diff --git a/src/storage/mvcc/txn.rs b/src/storage/mvcc/txn.rs index 0eaca54f226..a446ef64d22 100644 --- a/src/storage/mvcc/txn.rs +++ b/src/storage/mvcc/txn.rs @@ -523,7 +523,7 @@ pub(crate) mod tests { } #[test] - fn test_mvcc_txn_pessmistic_prewrite_check_not_exist() { + fn test_mvcc_txn_pessimistic_prewrite_check_not_exist() { let mut engine = TestEngineBuilder::new().build().unwrap(); let k = b"k1"; try_pessimistic_prewrite_check_not_exists(&mut engine, k, k, 3).unwrap_err(); diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index fc28560c7f1..bd4271be12d 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -262,7 +262,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { let lock_ts = get_tso(&pd_client); lead_client.must_kv_pessimistic_lock(b"k8".to_vec(), lock_ts); - // Test pessmistic rollback is allowed. + // Test pessimistic rollback is allowed. fail::cfg(get_fp(usage, 1), "return").unwrap(); lead_client.must_kv_pessimistic_rollback(b"k8".to_vec(), lock_ts); From c0eb6b65fabcff7bd839896cfeae6e65627761ed Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 7 Jun 2023 21:27:41 +0800 Subject: [PATCH 0725/1149] scheduler: use must_call for async write callback, add assertion for expected path (#14872) ref tikv/tikv#14838 Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/error_code/src/storage.rs | 1 + components/test_raftstore/src/util.rs | 25 +++++++- components/tikv_kv/src/lib.rs | 4 ++ src/server/metrics.rs | 1 + src/server/raftkv/mod.rs | 69 ++++++++++++++++++----- src/storage/txn/scheduler.rs | 22 ++++++-- tests/failpoints/cases/test_kv_service.rs | 35 +++++++++++- tests/failpoints/cases/test_storage.rs | 40 ++++++------- 8 files changed, 155 insertions(+), 42 deletions(-) diff --git a/components/error_code/src/storage.rs b/components/error_code/src/storage.rs index 8b41e7a797e..b8eb3072391 100644 --- a/components/error_code/src/storage.rs +++ b/components/error_code/src/storage.rs @@ -44,6 +44,7 @@ define_error_codes!( LOCK_IF_EXISTS_FAILED => ("LockIfExistsFailed", "", ""), PRIMARY_MISMATCH => ("PrimaryMismatch", "", ""), + UNDETERMINED => ("Undetermined", "", ""), UNKNOWN => ("Unknown", "", "") ); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 079e3abf1ef..cd2fb8a2792 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1001,6 +1001,29 @@ pub fn try_kv_prewrite_with( use_async_commit: bool, try_one_pc: bool, ) -> PrewriteResponse { + try_kv_prewrite_with_impl( + client, + ctx, + muts, + pk, + ts, + for_update_ts, + use_async_commit, + try_one_pc, + ) + .unwrap() +} + +pub fn try_kv_prewrite_with_impl( + client: &TikvClient, + ctx: Context, + muts: Vec, + pk: Vec, + ts: u64, + for_update_ts: u64, + use_async_commit: bool, + try_one_pc: bool, +) -> grpcio::Result { let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { @@ -1014,7 +1037,7 @@ pub fn try_kv_prewrite_with( prewrite_req.min_commit_ts = prewrite_req.start_version + 1; prewrite_req.use_async_commit = use_async_commit; prewrite_req.try_one_pc = try_one_pc; - client.kv_prewrite(&prewrite_req).unwrap() + client.kv_prewrite(&prewrite_req) } pub fn try_kv_prewrite( diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index d7954268aa7..293ae7fccc1 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -572,6 +572,8 @@ pub enum ErrorInner { EmptyRequest, #[error("key is locked (backoff or cleanup) {0:?}")] KeyIsLocked(kvproto::kvrpcpb::LockInfo), + #[error("undetermined write result {0:?}")] + Undetermined(String), #[error("unknown error {0:?}")] Other(#[from] Box), } @@ -595,6 +597,7 @@ impl ErrorInner { ErrorInner::Timeout(d) => Some(ErrorInner::Timeout(d)), ErrorInner::EmptyRequest => Some(ErrorInner::EmptyRequest), ErrorInner::KeyIsLocked(ref info) => Some(ErrorInner::KeyIsLocked(info.clone())), + ErrorInner::Undetermined(ref msg) => Some(ErrorInner::Undetermined(msg.clone())), ErrorInner::Other(_) => None, } } @@ -632,6 +635,7 @@ impl ErrorCodeExt for Error { ErrorInner::KeyIsLocked(_) => error_code::storage::KEY_IS_LOCKED, ErrorInner::Timeout(_) => error_code::storage::TIMEOUT, ErrorInner::EmptyRequest => error_code::storage::EMPTY_REQUEST, + ErrorInner::Undetermined(_) => error_code::storage::UNDETERMINED, ErrorInner::Other(_) => error_code::storage::UNKNOWN, } } diff --git a/src/server/metrics.rs b/src/server/metrics.rs index e690eff718e..c287d18680d 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -476,6 +476,7 @@ make_auto_flush_static_metric! { err_disk_full, err_recovery_in_progress, err_flashback_in_progress, + err_undetermind, } pub label_enum RequestTypeKind { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 039f987c398..bbae97ea293 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -51,6 +51,7 @@ use raftstore::{ use thiserror::Error; use tikv_kv::{write_modifies, OnAppliedCb, WriteEvent}; use tikv_util::{ + callback::must_call, future::{paired_future_callback, paired_must_called_future_callback}, time::Instant, }; @@ -62,6 +63,8 @@ use crate::storage::{ kv::{Engine, Error as KvError, ErrorInner as KvErrorInner, Modify, SnapContext, WriteData}, }; +pub const ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG: &str = "async write on_applied callback is dropped"; + #[derive(Debug, Error)] pub enum Error { #[error("{}", .0.get_message())] @@ -79,6 +82,9 @@ pub enum Error { #[error("{0}")] InvalidRequest(String), + #[error("{0}")] + Undetermined(String), + #[error("timeout after {0:?}")] Timeout(Duration), } @@ -93,6 +99,7 @@ pub fn get_status_kind_from_engine_error(e: &kv::Error) -> RequestStatusKind { } KvError(box KvErrorInner::Timeout(_)) => RequestStatusKind::err_timeout, KvError(box KvErrorInner::EmptyRequest) => RequestStatusKind::err_empty_request, + KvError(box KvErrorInner::Undetermined(_)) => RequestStatusKind::err_undetermind, KvError(box KvErrorInner::Other(_)) => RequestStatusKind::err_other, } } @@ -103,6 +110,7 @@ impl From for kv::Error { fn from(e: Error) -> kv::Error { match e { Error::RequestFailed(e) => KvError::from(KvErrorInner::Request(e)), + Error::Undetermined(e) => KvError::from(KvErrorInner::Undetermined(e)), Error::Server(e) => e.into(), e => box_err!(e), } @@ -119,7 +127,11 @@ where pub fn check_raft_cmd_response(resp: &mut RaftCmdResponse) -> Result<()> { if resp.get_header().has_error() { - return Err(Error::RequestFailed(resp.take_header().take_error())); + let mut err = resp.take_header().take_error(); + if err.get_message() == ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG { + return Err(Error::Undetermined(err.take_message())); + } + return Err(Error::RequestFailed(err)); } Ok(()) @@ -205,6 +217,25 @@ pub fn drop_snapshot_callback() -> kv::Result { Err(kv::Error::from(kv::ErrorInner::Request(err))) } +pub fn async_write_callback_dropped_err() -> errorpb::Error { + let mut err = errorpb::Error::default(); + err.set_message(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()); + err +} + +pub fn drop_on_applied_callback() -> WriteResponse { + let bt = backtrace::Backtrace::new(); + error!("async write on_applied callback is dropped"; "backtrace" => ?bt); + let mut write_resp = WriteResponse { + response: Default::default(), + }; + write_resp + .response + .mut_header() + .set_error(async_write_callback_dropped_err()); + write_resp +} + struct WriteResCore { ev: AtomicU8, result: UnsafeCell>>, @@ -488,20 +519,30 @@ where Some(Box::new(move || tx.notify_committed()) as store::ExtCallback) }; let applied_tx = tx.clone(); - let applied_cb = Box::new(move |resp: WriteResponse| { - let mut res = match on_write_result::(resp) { - Ok(CmdRes::Resp(_)) => { - fail_point!("raftkv_async_write_finish"); - Ok(()) + let applied_cb = must_call( + Box::new(move |resp: WriteResponse| { + fail_point!("applied_cb_return_undetermined_err", |_| { + applied_tx.notify(Err(kv::Error::from(Error::Undetermined( + ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string(), + )))); + }); + let mut res = match on_write_result::(resp) { + Ok(CmdRes::Resp(_)) => { + fail_point!("raftkv_async_write_finish"); + Ok(()) + } + Ok(CmdRes::Snap(_)) => { + Err(box_err!("unexpect snapshot, should mutate instead.")) + } + Err(e) => Err(kv::Error::from(e)), + }; + if let Some(cb) = on_applied { + cb(&mut res); } - Ok(CmdRes::Snap(_)) => Err(box_err!("unexpect snapshot, should mutate instead.")), - Err(e) => Err(kv::Error::from(e)), - }; - if let Some(cb) = on_applied { - cb(&mut res); - } - applied_tx.notify(res); - }); + applied_tx.notify(res); + }), + drop_on_applied_callback, + ); let cb = StoreCallback::write_ext(applied_cb, proposed_cb, committed_cb); let extra_opts = RaftCmdExtraOpts { diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index e7173bc9fef..127d2a39c36 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -832,6 +832,12 @@ impl TxnScheduler { Err(e) => { if !Self::is_undetermined_error(&e) { do_wake_up = false; + } else { + panic!( + "undetermined error: {:?} cid={}, tag={}, process + result={:?}", + e, cid, tag, &pr + ); } ProcessResult::Failed { err: StorageError::from(e), @@ -1069,10 +1075,18 @@ impl TxnScheduler { .unwrap(); } - fn is_undetermined_error(_e: &tikv_kv::Error) -> bool { - // TODO: If there's some cases that `engine.async_write` returns error but it's - // still possible that the data is successfully written, return true. - false + // Return true if raftstore returns error and the underlying write status could + // not be decided. + fn is_undetermined_error(e: &tikv_kv::Error) -> bool { + if let tikv_kv::ErrorInner::Undetermined(err_msg) = &*(e.0) { + error!( + "undetermined error is encountered, exit the tikv-server msg={:?}", + err_msg + ); + true + } else { + false + } } fn early_response( diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index b81673af0e2..a2615c1c7b2 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -4,7 +4,10 @@ use std::{sync::Arc, time::Duration}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; -use test_raftstore::{must_kv_prewrite, must_new_cluster_and_kv_client, must_new_cluster_mul}; +use test_raftstore::{ + must_kv_prewrite, must_new_cluster_and_kv_client, must_new_cluster_mul, + try_kv_prewrite_with_impl, +}; #[test] fn test_batch_get_memory_lock() { @@ -68,3 +71,33 @@ fn test_snapshot_not_block_grpc() { must_kv_prewrite(&client, ctx, vec![mutation], b"k".to_vec(), 10); fail::remove("after-snapshot"); } + +#[test] +fn test_undetermined_write_err() { + let (cluster, leader, ctx) = must_new_cluster_mul(1); + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env) + .keepalive_time(Duration::from_millis(500)) + .keepalive_timeout(Duration::from_millis(500)) + .connect(&cluster.sim.read().unwrap().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(b"k".to_vec()); + mutation.set_value(b"v".to_vec()); + fail::cfg("applied_cb_return_undetermined_err", "return()").unwrap(); + let err = try_kv_prewrite_with_impl( + &client, + ctx, + vec![mutation], + b"k".to_vec(), + 10, + 0, + false, + false, + ) + .unwrap_err(); + assert_eq!(err.to_string(), "RpcFailure: 1-CANCELLED CANCELLED",); + fail::remove("applied_cb_return_undetermined_err"); +} diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index dd57f28ab94..3a20e080736 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -26,6 +26,7 @@ use resource_control::ResourceGroupManager; use test_raftstore::*; use tikv::{ config::{ConfigController, Module}, + server::raftkv::ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG, storage::{ self, config_manager::StorageConfigManger, @@ -41,6 +42,7 @@ use tikv::{ Error as StorageError, ErrorInner as StorageErrorInner, *, }, }; +use tikv_kv::ErrorInner::Undetermined; use tikv_util::{future::paired_future_callback, worker::dummy_scheduler, HandyRwLock}; use txn_types::{Key, Mutation, TimeStamp}; @@ -112,6 +114,9 @@ fn test_scheduler_leader_change_twice() { fn test_server_catching_api_error() { let raftkv_fp = "raftkv_early_error_report"; let mut cluster = new_server_cluster(0, 1); + // One scheduler worker thread would panic after processing the prewrite + // request because of undetermined error. + cluster.cfg.storage.scheduler_worker_pool_size = 2; cluster.run(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); @@ -140,12 +145,10 @@ fn test_server_catching_api_error() { prewrite_req.primary_lock = b"k3".to_vec(); prewrite_req.start_version = 1; prewrite_req.lock_ttl = prewrite_req.start_version + 1; - let prewrite_resp = client.kv_prewrite(&prewrite_req).unwrap(); - assert!(prewrite_resp.has_region_error(), "{:?}", prewrite_resp); - assert!( - prewrite_resp.get_region_error().has_region_not_found(), - "{:?}", - prewrite_resp + let prewrite_err = client.kv_prewrite(&prewrite_req).unwrap_err(); + assert_eq!( + prewrite_err.to_string(), + "RpcFailure: 1-CANCELLED CANCELLED" ); must_get_none(&cluster.get_engine(1), b"k3"); @@ -154,11 +157,9 @@ fn test_server_catching_api_error() { put_req.key = b"k3".to_vec(); put_req.value = b"v3".to_vec(); let put_resp = client.raw_put(&put_req).unwrap(); - assert!(put_resp.has_region_error(), "{:?}", put_resp); - assert!( - put_resp.get_region_error().has_region_not_found(), - "{:?}", - put_resp + assert_eq!( + put_resp.get_error(), + Undetermined(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()).to_string() ); must_get_none(&cluster.get_engine(1), b"k3"); @@ -198,11 +199,9 @@ fn test_raftkv_early_error_report() { put_req.key = k.to_vec(); put_req.value = b"v".to_vec(); let put_resp = client.raw_put(&put_req).unwrap(); - assert!(put_resp.has_region_error(), "{:?}", put_resp); - assert!( - put_resp.get_region_error().has_region_not_found(), - "{:?}", - put_resp + assert_eq!( + put_resp.get_error(), + Undetermined(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()).to_string() ); must_get_none(&cluster.get_engine(1), k); } @@ -218,15 +217,12 @@ fn test_raftkv_early_error_report() { put_req.value = b"v".to_vec(); let put_resp = client.raw_put(&put_req).unwrap(); if ctx.get_region_id() == injected_region_id { - assert!(put_resp.has_region_error(), "{:?}", put_resp); - assert!( - put_resp.get_region_error().has_region_not_found(), - "{:?}", - put_resp + assert_eq!( + put_resp.get_error(), + Undetermined(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()).to_string() ); must_get_none(&cluster.get_engine(1), k); } else { - assert!(!put_resp.has_region_error(), "{:?}", put_resp); must_get_equal(&cluster.get_engine(1), k, b"v"); } } From f91dd4e637cdb68ae51b642e715a735f02bc9800 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 8 Jun 2023 12:03:42 +0800 Subject: [PATCH 0726/1149] scheduler: record process and write durtaion details (#14895) ref tikv/tikv#12362 Signed-off-by: cfzjywxk --- src/storage/txn/scheduler.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 127d2a39c36..a5255969af5 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1237,13 +1237,14 @@ impl TxnScheduler { .map_err(StorageError::from) }) }; + let cmd_process_duration = begin_instant.saturating_elapsed(); + sched_details.cmd_process_nanos = cmd_process_duration.as_nanos() as u64; SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(tag) - .observe(begin_instant.saturating_elapsed_secs()); + .observe(cmd_process_duration.as_secs_f64()); res }; - let process_end = Instant::now(); if write_result.is_ok() { // TODO: write bytes can be a bit inaccurate due to error requests or in-memory // pessimistic locks. @@ -1267,8 +1268,7 @@ impl TxnScheduler { sample.add_read_bytes(read_bytes); let quota_delay = quota_limiter.consume_sample(sample, true).await; if !quota_delay.is_zero() { - let actual_quota_delay = process_end.saturating_elapsed(); - sched_details.quota_limit_delay_nanos = actual_quota_delay.as_nanos() as u64; + sched_details.quota_limit_delay_nanos = quota_delay.as_nanos() as u64; TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC .get(tag) .inc_by(quota_delay.as_micros() as u64); @@ -1526,6 +1526,7 @@ impl TxnScheduler { } }); + let async_write_start = Instant::now_coarse(); let mut res = unsafe { with_tls_engine(|e: &mut E| { e.async_write(&ctx, to_be_write, subscribed, Some(on_applied)) @@ -1607,6 +1608,8 @@ impl TxnScheduler { sched.inner.flow_controller.unconsume(region_id, write_size); } } + sched_details.async_write_nanos = + async_write_start.saturating_elapsed().as_nanos() as u64; return; } } @@ -1882,8 +1885,15 @@ struct SchedulerDetails { tracker: TrackerToken, stat: Statistics, start_process_instant: Instant, + // A write command processing can be divided into four stages: + // 1. The command is processed using a snapshot to generate the write content. + // 2. If the quota is exceeded, there will be a delay. + // 3. If the write flow exceeds the limit, it will be throttled. + // 4. Finally, the write request is sent to raftkv and responses are awaited. + cmd_process_nanos: u64, quota_limit_delay_nanos: u64, flow_control_nanos: u64, + async_write_nanos: u64, } impl SchedulerDetails { @@ -1892,8 +1902,10 @@ impl SchedulerDetails { tracker, stat: Default::default(), start_process_instant, + cmd_process_nanos: 0, quota_limit_delay_nanos: 0, flow_control_nanos: 0, + async_write_nanos: 0, } } } From 3e787242ccc94d57265c3e5c28b48050009f9105 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 8 Jun 2023 14:25:41 +0800 Subject: [PATCH 0727/1149] raftstore-v2: support warm up log cache and enable tests (#14846) close tikv/tikv#14845 raftstorev2: support warm up log cache and enable tests Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 2 +- .../operation/command/admin/compact_log.rs | 39 +- .../command/admin/transfer_leader.rs | 180 +++++--- .../raftstore-v2/src/operation/command/mod.rs | 14 +- .../src/operation/ready/async_writer.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 23 +- .../src/operation/ready/snapshot.rs | 2 + .../integrations/test_transfer_leader.rs | 23 +- components/test_raftstore-v2/src/cluster.rs | 88 +++- src/server/raftkv2/mod.rs | 2 + .../failpoints/cases/test_transfer_leader.rs | 392 ++++++++++-------- 11 files changed, 503 insertions(+), 264 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index ca93dd1813b..ba749b71b7d 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -280,7 +280,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .peer_mut() .on_persisted(self.store_ctx, peer_id, ready_number), PeerMsg::LogsFetched(fetched_logs) => { - self.fsm.peer_mut().on_logs_fetched(fetched_logs) + self.fsm.peer_mut().on_raft_log_fetched(fetched_logs) } PeerMsg::SnapshotGenerated(snap_res) => { self.fsm.peer_mut().on_snapshot_generated(snap_res) diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 8b9992b2b85..4a79ee18b1f 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -20,8 +20,9 @@ use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequ use protobuf::Message; use raftstore::{ store::{ - fsm::new_admin_request, metrics::REGION_MAX_LOG_LAG, needs_evict_entry_cache, Transport, - WriteTask, RAFT_INIT_LOG_INDEX, + entry_storage::MAX_WARMED_UP_CACHE_KEEP_TIME, fsm::new_admin_request, + metrics::REGION_MAX_LOG_LAG, needs_evict_entry_cache, Transport, WriteTask, + RAFT_INIT_LOG_INDEX, }, Result, }; @@ -42,6 +43,8 @@ pub struct CompactLogContext { skipped_ticks: usize, approximate_log_size: u64, last_applying_index: u64, + /// The index of last compacted raft log. + last_compacted_idx: u64, /// Tombstone tablets can only be destroyed when the tablet that replaces it /// is persisted. This is a list of tablet index that awaits to be /// persisted. When persisted_apply is advanced, we need to notify tablet @@ -55,6 +58,7 @@ impl CompactLogContext { skipped_ticks: 0, approximate_log_size: 0, last_applying_index, + last_compacted_idx: 0, tombstone_tablets_wait_index: vec![], } } @@ -81,6 +85,14 @@ impl CompactLogContext { pub fn last_applying_index(&self) -> u64 { self.last_applying_index } + + pub fn set_last_compacted_idx(&mut self, index: u64) { + self.last_compacted_idx = index; + } + + pub fn last_compacted_idx(&self) -> u64 { + self.last_compacted_idx + } } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { @@ -454,7 +466,15 @@ impl Peer { res.compact_index, self.compact_log_context().last_applying_index ); - // TODO: check entry_cache_warmup_state + + // Since this peer may be warming up the entry cache, log compaction should be + // temporarily skipped. Otherwise, the warmup task may fail. + if let Some(state) = self.entry_storage_mut().entry_cache_warmup_state_mut() { + if !state.check_stale(MAX_WARMED_UP_CACHE_KEEP_TIME) { + return; + } + } + self.entry_storage_mut() .compact_entry_cache(res.compact_index); self.storage_mut() @@ -464,7 +484,6 @@ impl Peer { .entry_storage_mut() .apply_state_mut() .mut_truncated_state(); - let old_truncated = truncated_state.get_index(); truncated_state.set_index(res.compact_index); truncated_state.set_term(res.compact_term); @@ -476,8 +495,9 @@ impl Peer { .unwrap(); self.set_has_extra_write(); - // All logs < perssited_apply will be deleted, so should check with +1. - if old_truncated + 1 < self.storage().apply_trace().persisted_apply_index() + // All logs < persisted_apply will be deleted. + let prev_first_index = first_index; + if prev_first_index < self.storage().apply_trace().persisted_apply_index() && let Some(index) = self.compact_log_index() { // Raft Engine doesn't care about first index. @@ -487,12 +507,13 @@ impl Peer { { error!(self.logger, "failed to compact raft logs"; "err" => ?e); } + self.compact_log_context_mut().set_last_compacted_idx(index); // Extra write set right above. } let context = self.compact_log_context_mut(); let applied = context.last_applying_index; - let total_cnt = applied - old_truncated; + let total_cnt = applied - prev_first_index; let remain_cnt = applied - res.compact_index; context.approximate_log_size = (context.approximate_log_size as f64 * (remain_cnt as f64 / total_cnt as f64)) as u64; @@ -549,9 +570,9 @@ impl Peer { } fn compact_log_index(&mut self) -> Option { - let truncated = self.entry_storage().truncated_index() + 1; + let first_index = self.entry_storage().first_index(); let persisted_applied = self.storage().apply_trace().persisted_apply_index(); - let compact_index = std::cmp::min(truncated, persisted_applied); + let compact_index = std::cmp::min(first_index, persisted_applied); if compact_index == RAFT_INIT_LOG_INDEX + 1 { // There is no logs at RAFT_INIT_LOG_INDEX, nothing to delete. return None; diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index e7bd84c973c..4cdeba3bc41 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -132,12 +132,11 @@ impl Peer { // It's only necessary to ping the target peer, but ping all for simplicity. self.raft_group_mut().ping(); - // todo: entry cache warmup - let mut msg = eraftpb::Message::new(); msg.set_to(peer.get_id()); msg.set_msg_type(eraftpb::MessageType::MsgTransferLeader); msg.set_from(self.peer_id()); + msg.set_index(self.entry_storage().entry_cache_first_index().unwrap_or(0)); // log term here represents the term of last log. For leader, the term of last // log is always its current term. Not just set term because raft library // forbids setting it for MsgTransferLeader messages. @@ -159,69 +158,74 @@ impl Peer { } if !self.is_leader() { - self.execute_transfer_leader(ctx, msg.get_from(), peer_disk_usage, false); - } else { - let from = match self.peer_from_cache(msg.get_from()) { - Some(p) => p, - None => return, - }; - match self.ready_to_transfer_leader(ctx, msg.get_index(), &from) { - Some(reason) => { + if self.maybe_reject_transfer_leader_msg(ctx, msg.get_from(), peer_disk_usage) + || !self.pre_ack_transfer_leader_msg(ctx, msg) + { + return; + } + + self.ack_transfer_leader_msg(false); + return; + } + + let from = match self.peer_from_cache(msg.get_from()) { + Some(p) => p, + None => return, + }; + match self.ready_to_transfer_leader(ctx, msg.get_index(), &from) { + Some(reason) => { + info!( + self.logger, + "reject to transfer leader"; + "to" => ?from, + "reason" => reason, + "index" => msg.get_index(), + "last_index" => self.storage().last_index().unwrap_or_default(), + ); + } + None => { + self.propose_pending_writes(ctx); + if self.propose_locks_before_transfer_leader(ctx, msg) { + // If some pessimistic locks are just proposed, we propose another + // TransferLeader command instead of transferring leader immediately. info!( self.logger, - "reject to transfer leader"; + "propose transfer leader command"; "to" => ?from, - "reason" => reason, - "index" => msg.get_index(), - "last_index" => self.storage().last_index().unwrap_or_default(), ); - } - None => { - self.propose_pending_writes(ctx); - if self.propose_locks_before_transfer_leader(ctx, msg) { - // If some pessimistic locks are just proposed, we propose another - // TransferLeader command instead of transferring leader immediately. - info!( - self.logger, - "propose transfer leader command"; - "to" => ?from, - ); - let mut cmd = - new_admin_request(self.region().get_id(), self.peer().clone()); - cmd.mut_header() - .set_region_epoch(self.region().get_region_epoch().clone()); - // Set this flag to propose this command like a normal proposal. - cmd.mut_header() - .set_flags(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL.bits()); - cmd.mut_admin_request() - .set_cmd_type(AdminCmdType::TransferLeader); - cmd.mut_admin_request().mut_transfer_leader().set_peer(from); - if let PeerMsg::AdminCommand(req) = PeerMsg::admin_command(cmd).0 { - self.on_admin_command(ctx, req.request, req.ch); - } else { - unreachable!(); - } + let mut cmd = new_admin_request(self.region().get_id(), self.peer().clone()); + cmd.mut_header() + .set_region_epoch(self.region().get_region_epoch().clone()); + // Set this flag to propose this command like a normal proposal. + cmd.mut_header() + .set_flags(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL.bits()); + cmd.mut_admin_request() + .set_cmd_type(AdminCmdType::TransferLeader); + cmd.mut_admin_request().mut_transfer_leader().set_peer(from); + if let PeerMsg::AdminCommand(req) = PeerMsg::admin_command(cmd).0 { + self.on_admin_command(ctx, req.request, req.ch); } else { - info!( - self.logger, - "transfer leader"; - "peer" => ?from, - ); - self.raft_group_mut().transfer_leader(from.get_id()); - self.refresh_leader_transferee(); + unreachable!(); } + } else { + info!( + self.logger, + "transfer leader"; + "peer" => ?from, + ); + self.raft_group_mut().transfer_leader(from.get_id()); + self.refresh_leader_transferee(); } } } } - pub fn execute_transfer_leader( + fn maybe_reject_transfer_leader_msg( &mut self, ctx: &mut StoreContext, from: u64, peer_disk_usage: DiskUsage, - reply_cmd: bool, // whether it is a reply to a TransferLeader command - ) { + ) -> bool { let pending_snapshot = self.is_handling_snapshot() || self.has_pending_snapshot(); if pending_snapshot || from != self.leader_id() @@ -238,9 +242,79 @@ impl Peer { "pending_snapshot" => pending_snapshot, "disk_usage" => ?ctx.self_disk_usage, ); - return; + return true; } + false + } + /// Before ack the transfer leader message sent by the leader. + /// Currently, it only warms up the entry cache in this stage. + /// + /// This return whether the msg should be acked. When cache is warmed up + /// or the warmup operation is timeout, it is true. + fn pre_ack_transfer_leader_msg( + &mut self, + ctx: &mut StoreContext, + msg: &eraftpb::Message, + ) -> bool { + if !ctx.cfg.warmup_entry_cache_enabled() { + return true; + } + + // The start index of warmup range. It is leader's entry_cache_first_index, + // which in general is equal to the lowest matched index. + let mut low = msg.get_index(); + let last_index = self.entry_storage().last_index(); + let mut should_ack_now = false; + + // Need not to warm up when the index is 0. + // There are two cases where index can be 0: + // 1. During rolling upgrade, old instances may not support warmup. + // 2. The leader's entry cache is empty. + if low == 0 || low > last_index { + // There is little possibility that the warmup_range_start + // is larger than the last index. Check the test case + // `test_when_warmup_range_start_is_larger_than_last_index` + // for details. + should_ack_now = true; + } else { + if low < self.compact_log_context().last_compacted_idx() { + low = self.compact_log_context().last_compacted_idx() + }; + // Check if the entry cache is already warmed up. + if let Some(first_index) = self.entry_storage().entry_cache_first_index() { + if low >= first_index { + fail::fail_point!("entry_cache_already_warmed_up"); + should_ack_now = true; + } + } + } + + if should_ack_now { + return true; + } + + // Check if the warmup operation is timeout if warmup is already started. + if let Some(state) = self + .storage_mut() + .entry_storage_mut() + .entry_cache_warmup_state_mut() + { + // If it is timeout, this peer should ack the message so that + // the leadership transfer process can continue. + state.check_task_timeout(ctx.cfg.max_entry_cache_warmup_duration.0) + } else { + self.storage_mut() + .entry_storage_mut() + .async_warm_up_entry_cache(low) + .is_none() + } + } + + pub fn ack_transfer_leader_msg( + &mut self, + reply_cmd: bool, // whether it is a reply to a TransferLeader command + ) { let mut msg = eraftpb::Message::new(); msg.set_from(self.peer_id()); msg.set_to(self.leader_id()); @@ -314,7 +388,7 @@ impl Apply { } impl Peer { - pub fn on_transfer_leader(&mut self, ctx: &mut StoreContext, term: u64) { + pub fn on_transfer_leader(&mut self, term: u64) { // If the term has changed between proposing and executing the TransferLeader // request, ignore it because this request may be stale. if term != self.term() { @@ -322,7 +396,7 @@ impl Peer { } // Reply to leader that it is ready to transfer leader now. - self.execute_transfer_leader(ctx, self.leader_id(), DiskUsage::Normal, true); + self.ack_transfer_leader_msg(true); self.set_has_ready(); } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 1099aeb11f0..1cae8075863 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -368,10 +368,13 @@ impl Peer { ctx: &mut StoreContext, apply_res: ApplyRes, ) { - if !self.serving() || !apply_res.admin_result.is_empty() { - // TODO: remove following log once stable. - debug!(self.logger, "on_apply_res"; "apply_res" => ?apply_res, "apply_trace" => ?self.storage().apply_trace()); - } + debug!( + self.logger, + "async apply finish"; + "res" => ?apply_res, + "serving" => self.serving(), + "apply_trace" => ?self.storage().apply_trace(), + ); // It must just applied a snapshot. if apply_res.applied_index < self.entry_storage().first_index() { // Ignore admin command side effects, otherwise it may split incomplete @@ -391,7 +394,7 @@ impl Peer { .on_admin_modify(res.tablet_index); self.on_apply_res_split(ctx, res) } - AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(ctx, term), + AdminCmdResult::TransferLeader(term) => self.on_transfer_leader(term), AdminCmdResult::CompactLog(res) => self.on_apply_res_compact_log(ctx, res), AdminCmdResult::UpdateGcPeers(state) => self.on_apply_res_update_gc_peers(state), AdminCmdResult::PrepareMerge(res) => self.on_apply_res_prepare_merge(ctx, res), @@ -547,6 +550,7 @@ impl Apply { #[inline] pub async fn apply_committed_entries(&mut self, ce: CommittedEntries) { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); + fail::fail_point!("on_handle_apply_1003", self.peer_id() == 1003, |_| {}); fail::fail_point!("on_handle_apply_2", self.peer_id() == 2, |_| {}); let now = std::time::Instant::now(); let apply_wait_time = APPLY_TASK_WAIT_TIME_HISTOGRAM.local(); diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index 96f1611d9f1..733031b0ff5 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -170,7 +170,7 @@ impl AsyncWriter { #[cfg(feature = "testexport")] impl AsyncWriter { - pub fn subscirbe_flush(&mut self, ch: crate::router::FlushChannel) { + pub fn subscribe_flush(&mut self, ch: crate::router::FlushChannel) { self.flush_subscribers .push_back((self.known_largest_number(), ch)); } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 6a373ac3921..c74dfd3e0d1 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -49,7 +49,7 @@ use raftstore::{ FetchedLogs, ReadProgress, Transport, WriteCallback, WriteTask, }, }; -use slog::{debug, error, info, trace, warn, Logger}; +use slog::{debug, error, info, warn, Logger}; use tikv_util::{ log::SlogFormat, slog_panic, @@ -192,7 +192,7 @@ impl Peer { }, )); } - let entry_storage = self.storage().entry_storage(); + let entry_storage = self.entry_storage(); let committed_index = entry_storage.commit_index(); let applied_index = entry_storage.applied_index(); if committed_index > applied_index { @@ -436,15 +436,26 @@ impl Peer { } /// Callback for fetching logs asynchronously. - pub fn on_logs_fetched(&mut self, fetched_logs: FetchedLogs) { + pub fn on_raft_log_fetched(&mut self, fetched_logs: FetchedLogs) { let FetchedLogs { context, logs } = fetched_logs; let low = logs.low; - if !self.is_leader() { + // If the peer is not the leader anymore and it's not in entry cache warmup + // state, or it is being destroyed, ignore the result. + if !self.is_leader() && self.entry_storage().entry_cache_warmup_state().is_none() + || !self.serving() + { self.entry_storage_mut().clean_async_fetch_res(low); return; } if self.term() != logs.term { self.entry_storage_mut().clean_async_fetch_res(low); + } else if self.entry_storage().entry_cache_warmup_state().is_some() { + if self.entry_storage_mut().maybe_warm_up_entry_cache(*logs) { + self.ack_transfer_leader_msg(false); + self.set_has_ready(); + } + self.entry_storage_mut().clean_async_fetch_res(low); + return; } else { self.entry_storage_mut() .update_async_fetch_res(low, Some(logs)); @@ -534,7 +545,7 @@ impl Peer { self.update_last_sent_snapshot_index(index); } - trace!( + debug!( self.logger, "send raft msg"; "msg_type" => ?msg_type, @@ -947,7 +958,7 @@ impl Peer { #[cfg(feature = "testexport")] pub fn on_wait_flush(&mut self, ch: crate::router::FlushChannel) { - self.async_writer.subscirbe_flush(ch); + self.async_writer.subscribe_flush(ch); } pub fn on_role_changed(&mut self, ctx: &mut StoreContext, ready: &Ready) { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 6598fa883e6..36774d993fc 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -233,6 +233,8 @@ impl Peer { .set_last_applying_index(persisted_index); let snapshot_index = self.entry_storage().truncated_index(); assert!(snapshot_index >= RAFT_INIT_LOG_INDEX, "{:?}", self.logger); + self.compact_log_context_mut() + .set_last_compacted_idx(snapshot_index + 1 /* first index */); // If leader sends a message append to the follower while it's applying // snapshot (via split init for example), the persisted_index may be larger // than the first index. But as long as first index is not larger, the diff --git a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs index 18d81ef16aa..3b0feefc406 100644 --- a/components/raftstore-v2/tests/integrations/test_transfer_leader.rs +++ b/components/raftstore-v2/tests/integrations/test_transfer_leader.rs @@ -13,7 +13,7 @@ use raftstore_v2::{ router::{PeerMsg, PeerTick}, SimpleWriteEncoder, }; -use tikv_util::store::new_peer; +use tikv_util::{store::new_peer, time::Instant}; use crate::cluster::Cluster; @@ -92,8 +92,27 @@ pub fn must_transfer_leader( admin_req.set_transfer_leader(transfer_req); let resp = router.admin_command(region_id, req).unwrap(); assert!(!resp.get_header().has_error(), "{:?}", resp); - cluster.dispatch(region_id, vec![]); + let start = Instant::now(); + loop { + if start.saturating_elapsed() > Duration::from_secs(5) { + break; + } + cluster.dispatch(region_id, vec![]); + let meta1 = router + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + let meta2 = router2 + .must_query_debug_info(region_id, Duration::from_secs(3)) + .unwrap(); + if meta1.raft_status.soft_state.leader_id == to_peer.id + && meta2.raft_status.soft_state.leader_id == to_peer.id + { + return; + } + std::thread::sleep(std::time::Duration::from_millis(100)); + } + // Last try. let meta = router .must_query_debug_info(region_id, Duration::from_secs(3)) .unwrap(); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 95b3c89dd97..91bd24136d5 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -37,13 +37,14 @@ use kvproto::{ use pd_client::PdClient; use raftstore::{ store::{ - cmd_resp, initial_region, util::check_key_in_region, Bucket, BucketRange, Callback, - RegionSnapshot, TabletSnapManager, WriteResponse, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, + cmd_resp, initial_region, region_meta::RegionMeta, util::check_key_in_region, Bucket, + BucketRange, Callback, RegionSnapshot, TabletSnapManager, WriteResponse, + INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, }, Error, Result, }; use raftstore_v2::{ - router::{PeerMsg, QueryResult}, + router::{DebugInfoChannel, PeerMsg, QueryResult}, write_initial_states, SimpleWriteEncoder, StoreMeta, StoreRouter, }; use resource_control::ResourceGroupManager; @@ -700,6 +701,23 @@ impl, EK: KvEngine> Cluster { ) } + pub fn add_new_engine(&mut self) -> u64 { + self.count += 1; + let node_id = self.count as u64; + self.create_engine(Some((self.id(), node_id))); + + let key_mgr = self.key_managers.last().unwrap().clone(); + self.key_managers_map.insert(node_id, key_mgr); + let (tablet_registry, raft_engine) = self.engines.last().unwrap().clone(); + self.raft_engines.insert(node_id, raft_engine); + self.tablet_registries.insert(node_id, tablet_registry); + self.sst_workers_map + .insert(node_id, self.sst_workers.len() - 1); + + self.run_node(node_id).unwrap(); + node_id + } + pub fn read( &self, // v2 does not need this @@ -1149,6 +1167,28 @@ impl, EK: KvEngine> Cluster { } } + pub fn wait_applied_index(&mut self, region_id: u64, store_id: u64, index: u64) { + let timer = Instant::now(); + loop { + let applied_index = self.apply_state(region_id, store_id).applied_index; + if applied_index >= index { + return; + } + if timer.saturating_elapsed() >= Duration::from_secs(5) { + panic!( + "[region {}] log is still not applied to {}: {} on store {}", + region_id, index, applied_index, store_id, + ); + } + let _ = self + .get_engine(store_id) + .get_tablet_by_id(region_id) + .unwrap() + .flush_cfs(&[], true); + thread::sleep(Duration::from_millis(200)); + } + } + pub fn get(&mut self, key: &[u8]) -> Option> { self.get_impl(CF_DEFAULT, key, false) } @@ -1276,11 +1316,47 @@ impl, EK: KvEngine> Cluster { } } + pub fn must_query_debug_info( + &self, + store_id: u64, + region_id: u64, + timeout: Duration, + ) -> Option { + let timer = Instant::now(); + while timer.saturating_elapsed() < timeout { + let (ch, sub) = DebugInfoChannel::pair(); + let msg = PeerMsg::QueryDebugInfo(ch); + let res = self.get_router(store_id).unwrap().send(region_id, msg); + if res.is_err() { + thread::sleep(Duration::from_millis(10)); + continue; + } + let res = block_on(sub.result()); + if res.is_some() { + return res; + } + } + None + } + pub fn apply_state(&self, region_id: u64, store_id: u64) -> RaftApplyState { - self.get_engine(store_id) - .raft_apply_state(region_id) - .unwrap() + // In raftstore v2, RaftApplyState is persisted infrequently, + // just return in memory RaftApplySate to mimic this API in test_raftstore. + let apply_state = self + .must_query_debug_info(store_id, region_id, Duration::from_secs(5)) .unwrap() + .raft_apply; + let mut state = RaftApplyState::default(); + state.set_applied_index(apply_state.applied_index); + state.set_commit_index(apply_state.commit_index); + state.set_commit_term(apply_state.commit_term); + state + .mut_truncated_state() + .set_index(apply_state.truncated_state.index); + state + .mut_truncated_state() + .set_term(apply_state.truncated_state.term); + state } pub fn add_send_filter_on_node(&mut self, node_id: u64, filter: Box) { diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index d4a158bffda..5935d542a37 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -232,6 +232,8 @@ impl tikv_kv::Engine for RaftKv2 { subscribed: u8, on_applied: Option, ) -> Self::WriteRes { + fail_point!("raftkv_async_write"); + let region_id = ctx.region_id; ASYNC_REQUESTS_COUNTER_VEC.write.all.inc(); let begin_instant = Instant::now_coarse(); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index e6f5b56aa92..dc66fe18df8 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -13,7 +13,9 @@ use grpcio::{ChannelBuilder, Environment}; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use pd_client::PdClient; use raft::eraftpb::MessageType; -use test_raftstore::*; +use test_raftstore::{Simulator as S1, *}; +use test_raftstore_macro::test_case; +use test_raftstore_v2::Simulator as S2; use tikv::storage::Snapshot; use tikv_util::{ config::{ReadableDuration, ReadableSize}, @@ -24,10 +26,11 @@ use txn_types::{Key, PessimisticLock}; /// When a follower applies log slowly, leader should not transfer leader /// to it. Otherwise, new leader may wait a long time to serve read/write /// requests. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_transfer_leader_slow_apply() { // 3 nodes cluster. - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); @@ -56,9 +59,10 @@ fn test_transfer_leader_slow_apply() { must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_prewrite_before_max_ts_is_synced() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; cluster.run(); @@ -67,10 +71,7 @@ fn test_prewrite_before_max_ts_is_synced() { let channel = ChannelBuilder::new(env).connect(&addr); let client = TikvClient::new(channel); - let do_prewrite = |cluster: &mut Cluster| { - let region_id = 1; - let leader = cluster.leader_of_region(region_id).unwrap(); - let epoch = cluster.get_region_epoch(region_id); + let do_prewrite = |region_id, leader, epoch| { let mut ctx = Context::default(); ctx.set_region_id(region_id); ctx.set_peer(leader); @@ -93,107 +94,114 @@ fn test_prewrite_before_max_ts_is_synced() { cluster.must_transfer_leader(1, new_peer(2, 2)); fail::cfg("test_raftstore_get_tso", "return(50)").unwrap(); cluster.must_transfer_leader(1, new_peer(1, 1)); - let resp = do_prewrite(&mut cluster); + let epoch = cluster.get_region_epoch(1); + let resp = do_prewrite(1, new_peer(1, 1), epoch.clone()); assert!(resp.get_region_error().has_max_timestamp_not_synced()); fail::remove("test_raftstore_get_tso"); thread::sleep(Duration::from_millis(200)); - let resp = do_prewrite(&mut cluster); + let resp = do_prewrite(1, new_peer(1, 1), epoch); assert!(!resp.get_region_error().has_max_timestamp_not_synced()); } -#[test] -fn test_delete_lock_proposed_after_proposing_locks_1() { - test_delete_lock_proposed_after_proposing_locks_impl(1); -} - -#[test] -fn test_delete_lock_proposed_after_proposing_locks_2() { - // Repeated transfer leader command before proposing the write command - test_delete_lock_proposed_after_proposing_locks_impl(2); -} - -fn test_delete_lock_proposed_after_proposing_locks_impl(transfer_msg_count: usize) { - let mut cluster = new_server_cluster(0, 3); - cluster.cfg.raft_store.raft_heartbeat_ticks = 20; - cluster.run(); - - let region_id = 1; - cluster.must_transfer_leader(1, new_peer(1, 1)); - let leader = cluster.leader_of_region(region_id).unwrap(); +macro_rules! test_delete_lock_proposed_after_proposing_locks_impl { + ($cluster:expr, $transfer_msg_count:expr) => { + $cluster.cfg.raft_store.raft_heartbeat_ticks = 20; + $cluster.run(); - let snapshot = cluster.must_get_snapshot_of_region(region_id); - let txn_ext = snapshot.txn_ext.unwrap(); - txn_ext - .pessimistic_locks - .write() - .insert(vec![( - Key::from_raw(b"key"), - PessimisticLock { - primary: b"key".to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 1000, - for_update_ts: 10.into(), - min_commit_ts: 20.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, - is_locked_with_conflict: false, - }, - )]) - .unwrap(); - - let addr = cluster.sim.rl().get_addr(1); - let env = Arc::new(Environment::new(1)); - let channel = ChannelBuilder::new(env).connect(&addr); - let client = TikvClient::new(channel); - - let mut req = CleanupRequest::default(); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_region_epoch(cluster.get_region_epoch(region_id)); - ctx.set_peer(leader); - req.set_context(ctx); - req.set_key(b"key".to_vec()); - req.set_start_version(10); - req.set_current_ts(u64::MAX); + let region_id = 1; + $cluster.must_transfer_leader(1, new_peer(1, 1)); + let leader = $cluster.leader_of_region(region_id).unwrap(); + + let snapshot = $cluster.must_get_snapshot_of_region(region_id); + let txn_ext = snapshot.txn_ext.unwrap(); + txn_ext + .pessimistic_locks + .write() + .insert(vec![( + Key::from_raw(b"key"), + PessimisticLock { + primary: b"key".to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 1000, + for_update_ts: 10.into(), + min_commit_ts: 20.into(), + last_change_ts: 5.into(), + versions_to_last_change: 3, + is_locked_with_conflict: false, + }, + )]) + .unwrap(); + + let addr = $cluster.sim.rl().get_addr(1); + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env).connect(&addr); + let client = TikvClient::new(channel); + + let mut req = CleanupRequest::default(); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_region_epoch($cluster.get_region_epoch(region_id)); + ctx.set_peer(leader); + req.set_context(ctx); + req.set_key(b"key".to_vec()); + req.set_start_version(10); + req.set_current_ts(u64::MAX); - // Pause the command after it mark the lock as deleted. - fail::cfg("raftkv_async_write", "pause").unwrap(); - let (tx, resp_rx) = mpsc::channel(); - thread::spawn(move || tx.send(client.kv_cleanup(&req).unwrap()).unwrap()); + // Pause the command after it mark the lock as deleted. + fail::cfg("raftkv_async_write", "pause").unwrap(); + let (tx, resp_rx) = mpsc::channel(); + thread::spawn(move || tx.send(client.kv_cleanup(&req).unwrap()).unwrap()); - thread::sleep(Duration::from_millis(200)); - resp_rx.try_recv().unwrap_err(); + thread::sleep(Duration::from_millis(200)); + resp_rx.try_recv().unwrap_err(); - for _ in 0..transfer_msg_count { - cluster.transfer_leader(1, new_peer(2, 2)); - } - thread::sleep(Duration::from_millis(200)); + for _ in 0..$transfer_msg_count { + $cluster.transfer_leader(1, new_peer(2, 2)); + } + thread::sleep(Duration::from_millis(200)); + + // Transfer leader will not make the command fail. + fail::remove("raftkv_async_write"); + let resp = resp_rx.recv().unwrap(); + assert!(!resp.has_region_error()); + + for _ in 0..10 { + thread::sleep(Duration::from_millis(100)); + $cluster.reset_leader_of_region(region_id); + if $cluster.leader_of_region(region_id).unwrap().id == 2 { + let snapshot = $cluster.must_get_snapshot_of_region(1); + assert!( + snapshot + .get_cf(CF_LOCK, &Key::from_raw(b"key")) + .unwrap() + .is_none() + ); + return; + } + } + panic!("region should succeed to transfer leader to peer 2"); + }; +} - // Transfer leader will not make the command fail. - fail::remove("raftkv_async_write"); - let resp = resp_rx.recv().unwrap(); - assert!(!resp.has_region_error()); +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_delete_lock_proposed_after_proposing_locks_1() { + let mut cluster = new_cluster(0, 3); + test_delete_lock_proposed_after_proposing_locks_impl!(cluster, 1); +} - for _ in 0..10 { - thread::sleep(Duration::from_millis(100)); - cluster.reset_leader_of_region(region_id); - if cluster.leader_of_region(region_id).unwrap().id == 2 { - let snapshot = cluster.must_get_snapshot_of_region(1); - assert!( - snapshot - .get_cf(CF_LOCK, &Key::from_raw(b"key")) - .unwrap() - .is_none() - ); - return; - } - } - panic!("region should succeed to transfer leader to peer 2"); +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_delete_lock_proposed_after_proposing_locks_2() { + // Repeated transfer leader command before proposing the write command + let mut cluster = new_cluster(0, 3); + test_delete_lock_proposed_after_proposing_locks_impl!(cluster, 2); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_delete_lock_proposed_before_proposing_locks() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; cluster.run(); @@ -269,9 +277,10 @@ fn test_delete_lock_proposed_before_proposing_locks() { panic!("region should succeed to transfer leader to peer 2"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_read_lock_after_become_follower() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_heartbeat_ticks = 20; cluster.run(); @@ -355,80 +364,90 @@ fn test_read_lock_after_become_follower() { /// 3. Insert another 20 entries. /// 4. Wait for some time so that part of the entry cache are compacted /// on the leader(store 1). -fn run_cluster_for_test_warmup_entry_cache(cluster: &mut Cluster) { - // Let the leader compact the entry cache. - cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); - cluster.run(); - - cluster.must_transfer_leader(1, new_peer(1, 1)); - - for i in 1..5u32 { - let k = i.to_string().into_bytes(); - let v = k.clone(); - cluster.must_put(&k, &v); - must_get_equal(&cluster.get_engine(3), &k, &v); - } +macro_rules! run_cluster_for_test_warmup_entry_cache { + ($cluster:expr) => { + // Let the leader compact the entry cache. + $cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); + $cluster.run(); + + $cluster.must_transfer_leader(1, new_peer(1, 1)); + + for i in 1..5u32 { + let k = i.to_string().into_bytes(); + let v = k.clone(); + $cluster.must_put(&k, &v); + must_get_equal(&$cluster.get_engine(3), &k, &v); + } - // Let store 3 fall behind. - cluster.add_send_filter(CloneFilterFactory( - RegionPacketFilter::new(1, 3).direction(Direction::Recv), - )); + // Let store 3 fall behind. + $cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 3).direction(Direction::Recv), + )); - for i in 1..20u32 { - let k = i.to_string().into_bytes(); - let v = k.clone(); - cluster.must_put(&k, &v); - must_get_equal(&cluster.get_engine(2), &k, &v); - } + for i in 1..20u32 { + let k = i.to_string().into_bytes(); + let v = k.clone(); + $cluster.must_put(&k, &v); + must_get_equal(&$cluster.get_engine(2), &k, &v); + } - // Wait until part of the leader's entry cache is compacted. - sleep_ms(cluster.cfg.raft_store.raft_log_gc_tick_interval.as_millis() * 2); + // Wait until part of the leader's entry cache is compacted. + sleep_ms( + $cluster + .cfg + .raft_store + .raft_log_gc_tick_interval + .as_millis() + * 2, + ); + }; } -fn prevent_from_gc_raft_log(cluster: &mut Cluster) { - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100000); - cluster.cfg.raft_store.raft_log_gc_threshold = 1000; - cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); - cluster.cfg.raft_store.raft_log_reserve_max_ticks = 20; +fn prevent_from_gc_raft_log(cfg: &mut Config) { + cfg.raft_store.raft_log_gc_count_limit = Some(100000); + cfg.raft_store.raft_log_gc_threshold = 1000; + cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); + cfg.raft_store.raft_log_reserve_max_ticks = 20; } -fn run_cluster_and_warm_up_cache_for_store2() -> Cluster { - let mut cluster = new_node_cluster(0, 3); - cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); - prevent_from_gc_raft_log(&mut cluster); - run_cluster_for_test_warmup_entry_cache(&mut cluster); - - let (sx, rx) = channel::unbounded(); - let recv_filter = Box::new( - RegionPacketFilter::new(1, 1) - .direction(Direction::Recv) - .msg_type(MessageType::MsgTransferLeader) - .set_msg_callback(Arc::new(move |m| { - sx.send(m.get_message().get_from()).unwrap(); - })), - ); - cluster.sim.wl().add_recv_filter(1, recv_filter); - - let (sx2, rx2) = channel::unbounded(); - fail::cfg_callback("on_entry_cache_warmed_up", move || sx2.send(true).unwrap()).unwrap(); - cluster.transfer_leader(1, new_peer(2, 2)); - - // Cache should be warmed up. - assert!(rx2.recv_timeout(Duration::from_millis(500)).unwrap()); - // It should ack the message just after cache is warmed up. - assert_eq!(rx.recv_timeout(Duration::from_millis(500)).unwrap(), 2); - cluster.sim.wl().clear_recv_filters(1); - cluster +macro_rules! run_cluster_and_warm_up_cache_for_store2 { + ($cluster:expr) => { + $cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); + prevent_from_gc_raft_log(&mut $cluster.cfg); + run_cluster_for_test_warmup_entry_cache!($cluster); + + let (sx, rx) = channel::unbounded(); + let recv_filter = Box::new( + RegionPacketFilter::new(1, 1) + .direction(Direction::Recv) + .msg_type(MessageType::MsgTransferLeader) + .set_msg_callback(Arc::new(move |m| { + sx.send(m.get_message().get_from()).unwrap(); + })), + ); + $cluster.sim.wl().add_recv_filter(1, recv_filter); + + let (sx2, rx2) = channel::unbounded(); + fail::cfg_callback("on_entry_cache_warmed_up", move || sx2.send(true).unwrap()).unwrap(); + $cluster.transfer_leader(1, new_peer(2, 2)); + + // Cache should be warmed up. + assert!(rx2.recv_timeout(Duration::from_millis(500)).unwrap()); + // It should ack the message just after cache is warmed up. + assert_eq!(rx.recv_timeout(Duration::from_millis(500)).unwrap(), 2); + $cluster.sim.wl().clear_recv_filters(1); + }; } /// Leader should carry a correct index in TransferLeaderMsg so that /// the follower can warm up the entry cache with this index. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_transfer_leader_msg_index() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_entry_cache_life_time = ReadableDuration::secs(1000); - prevent_from_gc_raft_log(&mut cluster); - run_cluster_for_test_warmup_entry_cache(&mut cluster); + prevent_from_gc_raft_log(&mut cluster.cfg); + run_cluster_for_test_warmup_entry_cache!(cluster); let (sx, rx) = channel::unbounded(); let recv_filter = Box::new( @@ -452,12 +471,13 @@ fn test_transfer_leader_msg_index() { /// The store should ack the transfer leader msg immediately /// when the warmup range start is larger than it's last index. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_when_warmup_range_start_is_larger_than_last_index() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_entry_cache_life_time = ReadableDuration::secs(1000); - prevent_from_gc_raft_log(&mut cluster); - run_cluster_for_test_warmup_entry_cache(&mut cluster); + prevent_from_gc_raft_log(&mut cluster.cfg); + run_cluster_for_test_warmup_entry_cache!(cluster); cluster.pd_client.disable_default_operator(); let s4 = cluster.add_new_engine(); @@ -490,14 +510,15 @@ fn test_when_warmup_range_start_is_larger_than_last_index() { /// When the start index of warmup range is compacted, the follower should /// still warm up and use the compacted_idx as the start index. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_when_warmup_range_start_is_compacted() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // GC raft log aggressively. cluster.cfg.raft_store.merge_max_log_gap = 1; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(5); cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); - run_cluster_for_test_warmup_entry_cache(&mut cluster); + run_cluster_for_test_warmup_entry_cache!(cluster); cluster.pd_client.disable_default_operator(); // Case `test_transfer_leader_msg_index` already proves that @@ -517,11 +538,12 @@ fn test_when_warmup_range_start_is_compacted() { } /// Transfer leader should work as normal when disable warming up entry cache. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_turnoff_warmup_entry_cache() { - let mut cluster = new_node_cluster(0, 3); - prevent_from_gc_raft_log(&mut cluster); - run_cluster_for_test_warmup_entry_cache(&mut cluster); + let mut cluster = new_cluster(0, 3); + prevent_from_gc_raft_log(&mut cluster.cfg); + run_cluster_for_test_warmup_entry_cache!(cluster); cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(0); fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); cluster.must_transfer_leader(1, new_peer(2, 2)); @@ -529,12 +551,13 @@ fn test_turnoff_warmup_entry_cache() { /// When the follower has not warmed up the entry cache and the timeout of /// warmup is very long, then the leadership transfer can never succeed. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_when_warmup_fail_and_its_timeout_is_too_long() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::secs(1000); - prevent_from_gc_raft_log(&mut cluster); - run_cluster_for_test_warmup_entry_cache(&mut cluster); + prevent_from_gc_raft_log(&mut cluster.cfg); + run_cluster_for_test_warmup_entry_cache!(cluster); fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); cluster.transfer_leader(1, new_peer(2, 2)); @@ -547,12 +570,13 @@ fn test_when_warmup_fail_and_its_timeout_is_too_long() { /// When the follower has not warmed up the entry cache and the timeout of /// warmup is pretty short, then the leadership transfer should succeed quickly. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_when_warmup_fail_and_its_timeout_is_short() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.max_entry_cache_warmup_duration = ReadableDuration::millis(10); - prevent_from_gc_raft_log(&mut cluster); - run_cluster_for_test_warmup_entry_cache(&mut cluster); + prevent_from_gc_raft_log(&mut cluster.cfg); + run_cluster_for_test_warmup_entry_cache!(cluster); fail::cfg("worker_async_fetch_raft_log", "pause").unwrap(); cluster.must_transfer_leader(1, new_peer(2, 2)); @@ -560,13 +584,16 @@ fn test_when_warmup_fail_and_its_timeout_is_short() { /// The follower should ack the msg when the cache is warmed up. /// Besides, the cache should be kept for a period of time. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_when_warmup_succeed_and_become_leader() { - let mut cluster = run_cluster_and_warm_up_cache_for_store2(); + let mut cluster = new_cluster(0, 3); + run_cluster_and_warm_up_cache_for_store2!(cluster); // Generally, the cache will be compacted during post_apply. // However, if the cache is warmed up recently, the cache should be kept. let applied_index = cluster.apply_state(1, 2).applied_index; + debug!("applied_index: {}", applied_index); cluster.must_put(b"kk1", b"vv1"); cluster.wait_applied_index(1, 2, applied_index + 1); @@ -579,9 +606,11 @@ fn test_when_warmup_succeed_and_become_leader() { /// The follower should exit warmup state if it does not become leader /// in a period of time. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_when_warmup_succeed_and_not_become_leader() { - let mut cluster = run_cluster_and_warm_up_cache_for_store2(); + let mut cluster = new_cluster(0, 3); + run_cluster_and_warm_up_cache_for_store2!(cluster); let (sx, rx) = channel::unbounded(); fail::cfg_callback("worker_async_fetch_raft_log", move || { @@ -593,6 +622,7 @@ fn test_when_warmup_succeed_and_not_become_leader() { // Since the warmup state is stale, the peer should exit warmup state, // and the entry cache should be compacted during post_apply. let applied_index = cluster.apply_state(1, 2).applied_index; + debug!("applied_index: {}", applied_index); cluster.must_put(b"kk1", b"vv1"); cluster.wait_applied_index(1, 2, applied_index + 1); // The peer should warm up cache again when it receives a new TransferLeaderMsg. From 79caea614c03837022b80782ba9b7dfb14ae0c3e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 8 Jun 2023 17:10:59 +0800 Subject: [PATCH 0728/1149] raftstore: fix missing CheckLongUncommitted tick (#14894) close tikv/tikv#14893 raftstore: fix missing CheckLongUncommitted tick Signed-off-by: Neil Shen --- .../raftstore-v2/src/operation/ready/mod.rs | 5 ++ components/raftstore/src/store/fsm/peer.rs | 7 +++ components/raftstore/src/store/peer.rs | 5 ++ tests/failpoints/cases/test_hibernate.rs | 48 ++++++++++++++++++- .../failpoints/cases/test_transfer_leader.rs | 41 +++++++++++++++- 5 files changed, 104 insertions(+), 2 deletions(-) diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index c74dfd3e0d1..fe519466463 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -1112,6 +1112,11 @@ impl Peer { } fn check_long_uncommitted_proposals(&mut self, ctx: &mut StoreContext) { + fail::fail_point!( + "on_check_long_uncommitted_proposals_1", + self.peer_id() == 1, + |_| {} + ); if self.has_long_uncommitted_proposals(ctx) { let status = self.raft_group().status(); let mut buffer: Vec<(u64, u64, u64)> = Vec::new(); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 8231e5b4f3e..24ac4681d63 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1963,6 +1963,7 @@ where self.register_check_leader_lease_tick(); self.register_report_region_buckets_tick(); self.register_check_peers_availability_tick(); + self.register_check_long_uncommitted_tick(); } if let Some(ForceLeaderState::ForceLeader { .. }) = self.fsm.peer.force_leader { @@ -2835,6 +2836,7 @@ where if self.fsm.peer.is_leader() { self.register_check_leader_lease_tick(); self.register_report_region_buckets_tick(); + self.register_check_long_uncommitted_tick(); } } @@ -5601,6 +5603,11 @@ where { return; } + fail_point!( + "on_check_long_uncommitted_tick_1", + self.fsm.peer.peer_id() == 1, + |_| {} + ); self.fsm.peer.check_long_uncommitted_proposals(self.ctx); self.register_check_long_uncommitted_tick(); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 91c9c9cef6a..d47c9784867 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -3099,6 +3099,11 @@ where /// Check long uncommitted proposals and log some info to help find why. pub fn check_long_uncommitted_proposals(&mut self, ctx: &mut PollContext) { + fail_point!( + "on_check_long_uncommitted_proposals_1", + self.peer_id() == 1, + |_| {} + ); if self.has_long_uncommitted_proposals(ctx) { let status = self.raft_group.status(); let mut buffer: Vec<(u64, u64, u64)> = Vec::new(); diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index d8670d9a21f..616a4e5e196 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - sync::{atomic::*, *}, + sync::{atomic::*, mpsc::channel, *}, thread, time::Duration, }; @@ -132,3 +132,49 @@ fn test_store_disconnect_with_hibernate() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); } + +#[test] +fn test_check_long_uncommitted_proposals_while_hibernate() { + let mut cluster = new_node_cluster(0, 3); + let base_tick_ms = 50; + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); + cluster.cfg.raft_store.raft_heartbeat_ticks = 2; + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + // So the random election timeout will always be 10, which makes the case more + // stable. + cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; + cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; + configure_for_hibernate(&mut cluster.cfg); + cluster.cfg.raft_store.check_long_uncommitted_interval = ReadableDuration::millis(200); + cluster.cfg.raft_store.long_uncommitted_base_threshold = ReadableDuration::millis(500); + cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(1); + + cluster.pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + cluster.pd_client.must_add_peer(r, new_peer(2, 2)); + cluster.pd_client.must_add_peer(r, new_peer(3, 3)); + + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + // Wait until all peers of region 1 hibernate. + fail::cfg("on_check_long_uncommitted_tick_1", "return").unwrap(); + thread::sleep(Duration::from_millis(base_tick_ms * 30)); + + // Must not tick CheckLongUncommitted after hibernate. + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_check_long_uncommitted_proposals_1", move || { + let _ = tx.lock().unwrap().send(()); + }) + .unwrap(); + rx.recv_timeout(2 * cluster.cfg.raft_store.long_uncommitted_base_threshold.0) + .unwrap_err(); + + // Must keep ticking CheckLongUncommitted if leader weak up. + fail::remove("on_check_long_uncommitted_tick_1"); + cluster.must_put(b"k1", b"v1"); + rx.recv_timeout(2 * cluster.cfg.raft_store.long_uncommitted_base_threshold.0) + .unwrap(); +} diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index dc66fe18df8..a1bf2f8a096 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -1,7 +1,10 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - sync::{mpsc, Arc}, + sync::{ + mpsc::{self, channel}, + Arc, Mutex, + }, thread, time::Duration, }; @@ -629,3 +632,39 @@ fn test_when_warmup_succeed_and_not_become_leader() { cluster.transfer_leader(1, new_peer(2, 2)); assert!(rx.recv_timeout(Duration::from_millis(500)).unwrap()); } + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_check_long_uncommitted_proposals_after_became_leader() { + let mut cluster = new_cluster(0, 3); + let base_tick_ms = 50; + configure_for_lease_read(&mut cluster.cfg, Some(base_tick_ms), Some(1000)); + cluster.cfg.raft_store.check_long_uncommitted_interval = ReadableDuration::millis(200); + cluster.cfg.raft_store.long_uncommitted_base_threshold = ReadableDuration::millis(500); + + cluster.pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + cluster.pd_client.must_add_peer(r, new_peer(2, 2)); + cluster.pd_client.must_add_peer(r, new_peer(3, 3)); + + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + cluster.transfer_leader(1, new_peer(2, 2)); + + // Must not tick CheckLongUncommitted after became follower. + thread::sleep(2 * cluster.cfg.raft_store.long_uncommitted_base_threshold.0); + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_check_long_uncommitted_proposals_1", move || { + let _ = tx.lock().unwrap().send(()); + }) + .unwrap(); + rx.recv_timeout(2 * cluster.cfg.raft_store.long_uncommitted_base_threshold.0) + .unwrap_err(); + + // Must keep ticking CheckLongUncommitted after became leader. + cluster.transfer_leader(1, new_peer(1, 1)); + rx.recv_timeout(2 * cluster.cfg.raft_store.long_uncommitted_base_threshold.0) + .unwrap(); +} From 81aeb584e45e2c9f3934358745f938305fc077b9 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 8 Jun 2023 14:58:46 -0700 Subject: [PATCH 0729/1149] RocksDB: change compaction-guard-min-output-file-size to 1MB (#14889) close tikv/tikv#14888 Change compaction-guard-min-output-file-size to 1MB --- etc/config-template.toml | 4 ++-- src/config/mod.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index c91fd0646d2..236dd9991c1 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -821,7 +821,7 @@ ## The lower bound of SST file size when compaction guard is enabled. The config prevent SST files ## being too small when compaction guard is enabled. -# compaction-guard-min-output-file-size = "8M" +# compaction-guard-min-output-file-size = "1M" ## The upper bound of SST file size when compaction guard is enabled. The config prevent SST files ## being too large when compaction guard is enabled. This config overrides target-file-size-base @@ -976,7 +976,7 @@ # dynamic-level-bytes = true # optimize-filters-for-hits = false # enable-compaction-guard = true -# compaction-guard-min-output-file-size = "8M" +# compaction-guard-min-output-file-size = "1M" # compaction-guard-max-output-file-size = "128M" # format-version = 2 # prepopulate-block-cache = "disabled" diff --git a/src/config/mod.rs b/src/config/mod.rs index 7ac76bdd6d2..caf194460c9 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -723,7 +723,7 @@ impl Default for DefaultCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(8), + compaction_guard_min_output_file_size: ReadableSize::mb(1), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Zstd, bottommost_zstd_compression_dict_size: 0, @@ -891,7 +891,7 @@ impl Default for WriteCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(8), + compaction_guard_min_output_file_size: ReadableSize::mb(1), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Zstd, bottommost_zstd_compression_dict_size: 0, @@ -1013,7 +1013,7 @@ impl Default for LockCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(8), + compaction_guard_min_output_file_size: ReadableSize::mb(1), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, @@ -1110,7 +1110,7 @@ impl Default for RaftCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(8), + compaction_guard_min_output_file_size: ReadableSize::mb(1), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, @@ -1634,7 +1634,7 @@ impl Default for RaftDefaultCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(8), + compaction_guard_min_output_file_size: ReadableSize::mb(1), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, From 8de74cc6a217c18968d5eddbd3c28682fcbdfe01 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 9 Jun 2023 17:20:46 +0800 Subject: [PATCH 0730/1149] raftstore-v2: propose no-op cmd for read index during transfer leader (#14878) ref tikv/tikv#14876 raftstore-v2: propose no-op cmd for read index during transfer leader Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/query/lease.rs | 43 ++-- .../raftstore-v2/src/operation/query/local.rs | 15 +- components/raftstore/src/store/peer.rs | 1 - components/test_raftstore-v2/src/cluster.rs | 24 ++ components/test_raftstore-v2/src/util.rs | 96 +++++++- .../integrations/raftstore/test_lease_read.rs | 223 +++++++++--------- 6 files changed, 259 insertions(+), 143 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index f76d724f06c..2c8b49ae172 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -3,7 +3,7 @@ use std::sync::Mutex; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_cmdpb::RaftCmdRequest; +use kvproto::raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}; use raft::{ eraftpb::{self, MessageType}, Storage, @@ -11,8 +11,9 @@ use raft::{ use raftstore::{ store::{ can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, - msg::ReadCallback, propose_read_index, should_renew_lease, util::LeaseState, ReadDelegate, - ReadIndexRequest, ReadProgress, Transport, + msg::ReadCallback, propose_read_index, should_renew_lease, + simple_write::SimpleWriteEncoder, util::LeaseState, ReadDelegate, ReadIndexRequest, + ReadProgress, Transport, }, Error, Result, }; @@ -25,7 +26,7 @@ use crate::{ batch::StoreContext, fsm::StoreMeta, raft::Peer, - router::{QueryResChannel, QueryResult, ReadResponse}, + router::{CmdResChannel, QueryResChannel, QueryResult, ReadResponse}, }; impl Peer { @@ -146,26 +147,24 @@ impl Peer { "request_id" => ?id, ); - self.set_has_ready(); // TimeoutNow has been sent out, so we need to propose explicitly to // update leader lease. - // TODO:add following when propose is done - // if self.leader_lease.is_suspect() { - // let req = RaftCmdRequest::default(); - // if let Ok(Either::Left(index)) = self.propose_normal(ctx, req) { - // let (callback, _) = CmdResChannel::pair(); - // let p = Proposal { - // is_conf_change: false, - // index, - // term: self.term(), - // cb: callback, - // propose_time: Some(now), - // must_pass_epoch_check: false, - // }; - // - // self.post_propose(ctx, p); - // } - // } + if self.leader_lease().is_suspect() { + self.propose_no_op(ctx); + } + + self.set_has_ready(); + } + + fn propose_no_op(&mut self, ctx: &mut StoreContext) { + let mut header = Box::::default(); + header.set_region_id(self.region_id()); + header.set_peer(self.peer().clone()); + header.set_region_epoch(self.region().get_region_epoch().clone()); + header.set_term(self.term()); + let empty_data = SimpleWriteEncoder::with_capacity(0).encode(); + let (ch, _) = CmdResChannel::pair(); + self.on_simple_write(ctx, header, empty_data, ch); } /// response the read index request diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 9101b1850e8..d9af1c6594f 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -200,7 +200,7 @@ where fn try_get_snapshot( &mut self, req: &RaftCmdRequest, - after_read_index: bool, + has_read_index_success: bool, ) -> ReadResult, RaftCmdResponse> { match self.pre_propose_raft_command(req) { ReadResult::Ok((mut delegate, policy)) => { @@ -215,15 +215,19 @@ where atomic::fence(atomic::Ordering::Release); let snapshot_ts = monotonic_raw_now(); - if !delegate.is_in_leader_lease(snapshot_ts) { + if !delegate.is_in_leader_lease(snapshot_ts) && !has_read_index_success { + // Redirect if it's not in lease and it has not finish read index. return ReadResult::Redirect; } TLS_LOCAL_READ_METRICS .with(|m| m.borrow_mut().local_executed_requests.inc()); - // Try renew lease in advance - self.maybe_renew_lease_in_advance(&delegate, req, snapshot_ts); + if !has_read_index_success { + // Try renew lease in advance only if it has not read index before. + // Because a successful read index has already renewed lease. + self.maybe_renew_lease_in_advance(&delegate, req, snapshot_ts); + } snap } ReadRequestPolicy::StaleRead => { @@ -251,7 +255,7 @@ where } ReadRequestPolicy::ReadIndex => { // ReadIndex is returned only for replica read. - if !after_read_index { + if !has_read_index_success { // It needs to read index before getting snapshot. return ReadResult::Redirect; } @@ -590,7 +594,6 @@ impl<'r> SnapRequestInspector<'r> { } // Local read should be performed, if and only if leader is in lease. - // None for now. match self.inspect_lease() { LeaseState::Valid => Ok(ReadRequestPolicy::ReadLocal), LeaseState::Expired | LeaseState::Suspect => { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index d47c9784867..8e438ef808d 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5638,7 +5638,6 @@ pub trait RequestInspector { } // Local read should be performed, if and only if leader is in lease. - // None for now. match self.inspect_lease() { LeaseState::Valid => Ok(RequestPolicy::ReadLocal), LeaseState::Expired | LeaseState::Suspect => { diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 91bd24136d5..00ca2239c34 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1167,6 +1167,30 @@ impl, EK: KvEngine> Cluster { } } + pub fn wait_last_index( + &mut self, + region_id: u64, + store_id: u64, + expected: u64, + timeout: Duration, + ) { + let timer = Instant::now(); + loop { + let raft_state = self.raft_local_state(region_id, store_id); + let cur_index = raft_state.get_last_index(); + if cur_index >= expected { + return; + } + if timer.saturating_elapsed() >= timeout { + panic!( + "[region {}] last index still not reach {}: {:?}", + region_id, expected, raft_state + ); + } + thread::sleep(Duration::from_millis(10)); + } + } + pub fn wait_applied_index(&mut self, region_id: u64, store_id: u64, index: u64) { let timer = Instant::now(); loop { diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 77117553eec..9a851e59e03 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -12,13 +12,13 @@ use kvproto::{ encryptionpb::EncryptionMethod, kvrpcpb::Context, metapb, - raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, + raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse}, }; -use raftstore::Result; +use raftstore::{store::ReadResponse, Result}; use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; -use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, Config}; +use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, Config}; use tikv::{ server::KvEngineFactoryBuilder, storage::{ @@ -27,7 +27,9 @@ use tikv::{ point_key_range, Engine, Snapshot, }, }; -use tikv_util::{config::ReadableDuration, worker::LazyWorker, HandyRwLock}; +use tikv_util::{ + config::ReadableDuration, escape, future::block_on_timeout, worker::LazyWorker, HandyRwLock, +}; use txn_types::Key; use crate::{bootstrap_store, cluster::Cluster, ServerCluster, Simulator}; @@ -252,6 +254,44 @@ pub fn async_read_on_peer, EK: KvEngine>( Box::pin(async move { f.await.unwrap() }) } +pub fn batch_read_on_peer, EK: KvEngine>( + cluster: &mut Cluster, + requests: &[(metapb::Peer, metapb::Region)], +) -> Vec::Snapshot>> { + let mut results = vec![]; + for (peer, region) in requests { + let node_id = peer.get_store_id(); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_snap_cmd()], + false, + ); + request.mut_header().set_peer(peer.clone()); + let snap = cluster.sim.wl().async_snapshot(node_id, request); + let resp = block_on_timeout( + Box::pin(async move { + match snap.await { + Ok(snap) => ReadResponse { + response: Default::default(), + snapshot: Some(snap), + txn_extra_op: Default::default(), + }, + Err(resp) => ReadResponse { + response: resp, + snapshot: None, + txn_extra_op: Default::default(), + }, + } + }), + Duration::from_secs(1), + ) + .unwrap(); + results.push(resp); + } + results +} + pub fn async_read_index_on_peer, EK: KvEngine>( cluster: &mut Cluster, peer: metapb::Peer, @@ -318,3 +358,51 @@ pub fn test_delete_range, EK: KvEngine>(cluster: &mut Cluster Vec { + if resp.get_header().has_error() { + panic!("failed to read {:?}", resp); + } + assert_eq!(resp.get_responses().len(), 1); + assert_eq!(resp.get_responses()[0].get_cmd_type(), CmdType::Get); + assert!(resp.get_responses()[0].has_get()); + resp.get_responses()[0].get_get().get_value().to_vec() +} + +pub fn must_read_on_peer, EK: KvEngine>( + cluster: &mut Cluster, + peer: metapb::Peer, + region: metapb::Region, + key: &[u8], + value: &[u8], +) { + let timeout = Duration::from_secs(5); + match read_on_peer(cluster, peer, region, key, false, timeout) { + Ok(ref resp) if value == must_get_value(resp).as_slice() => (), + other => panic!( + "read key {}, expect value {:?}, got {:?}", + log_wrappers::hex_encode_upper(key), + value, + other + ), + } +} + +pub fn must_error_read_on_peer, EK: KvEngine>( + cluster: &mut Cluster, + peer: metapb::Peer, + region: metapb::Region, + key: &[u8], + timeout: Duration, +) { + if let Ok(mut resp) = read_on_peer(cluster, peer, region, key, false, timeout) { + if !resp.get_header().has_error() { + let value = resp.mut_responses()[0].mut_get().take_value(); + panic!( + "key {}, expect error but got {}", + log_wrappers::hex_encode_upper(key), + escape(&value) + ); + } + } +} diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index e1905c99476..77794a415b6 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -16,6 +16,7 @@ use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::store::{Callback, RegionSnapshot}; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; // A helper function for testing the lease reads and lease renewing. @@ -32,99 +33,109 @@ use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock} // by writing a Raft log to the Raft quorum. It is called "consistent write". // All writes are consistent writes. Every time the leader performs a consistent // read/write, it will try to renew its lease. -fn test_renew_lease(cluster: &mut Cluster) { - // Avoid triggering the log compaction in this test case. - cluster.cfg.raft_store.raft_log_gc_threshold = 100; - // Increase the Raft tick interval to make this test case running reliably. - // Use large election timeout to make leadership stable. - configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); - // Override max leader lease to 2 seconds. - let max_lease = Duration::from_secs(2); - cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); - cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); - cluster.cfg.raft_store.renew_leader_lease_advance_duration = ReadableDuration::secs(0); +macro_rules! test_renew_lease { + ($cluster:expr) => { + // Avoid triggering the log compaction in this test case. + $cluster.cfg.raft_store.raft_log_gc_threshold = 100; + // Increase the Raft tick interval to make this test case running reliably. + // Use large election timeout to make leadership stable. + configure_for_lease_read(&mut $cluster.cfg, Some(50), Some(10_000)); + // Override max leader lease to 2 seconds. + let max_lease = Duration::from_secs(2); + $cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); + $cluster.cfg.raft_store.check_leader_lease_interval = ReadableDuration::hours(10); + $cluster.cfg.raft_store.renew_leader_lease_advance_duration = ReadableDuration::secs(0); + + let node_id = 1u64; + let store_id = 1u64; + let peer = new_peer(store_id, node_id); + $cluster.pd_client.disable_default_operator(); + let region_id = $cluster.run_conf_change(); + + let key = b"k"; + $cluster.must_put(key, b"v0"); + for id in 2..=$cluster.engines.len() as u64 { + $cluster + .pd_client + .must_add_peer(region_id, new_peer(id, id)); + must_get_equal(&$cluster.get_engine(id), key, b"v0"); + } - let node_id = 1u64; - let store_id = 1u64; - let peer = new_peer(store_id, node_id); - cluster.pd_client.disable_default_operator(); - let region_id = cluster.run_conf_change(); + // Write the initial value for a key. + let key = b"k"; + $cluster.must_put(key, b"v1"); + // Force `peer` to become leader. + let region = $cluster.get_region(key); + let region_id = region.get_id(); + $cluster.must_transfer_leader(region_id, peer.clone()); + let state = $cluster.raft_local_state(region_id, store_id); + let last_index = state.get_last_index(); - let key = b"k"; - cluster.must_put(key, b"v0"); - for id in 2..=cluster.engines.len() as u64 { - cluster.pd_client.must_add_peer(region_id, new_peer(id, id)); - must_get_equal(&cluster.get_engine(id), key, b"v0"); - } + let detector = LeaseReadFilter::default(); + $cluster.add_send_filter(CloneFilterFactory(detector.clone())); - // Write the initial value for a key. - let key = b"k"; - cluster.must_put(key, b"v1"); - // Force `peer` to become leader. - let region = cluster.get_region(key); - let region_id = region.get_id(); - cluster.must_transfer_leader(region_id, peer.clone()); - let state = cluster.raft_local_state(region_id, store_id); - let last_index = state.get_last_index(); + // Issue a read request and check the value on response. + must_read_on_peer(&mut $cluster, peer.clone(), region.clone(), key, b"v1"); + assert_eq!(detector.ctx.rl().len(), 0); - let detector = LeaseReadFilter::default(); - cluster.add_send_filter(CloneFilterFactory(detector.clone())); + let mut expect_lease_read = 0; - // Issue a read request and check the value on response. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); - assert_eq!(detector.ctx.rl().len(), 0); + if $cluster.engines.len() > 1 { + // Wait for the leader lease to expire. + thread::sleep(max_lease); - let mut expect_lease_read = 0; + // Issue a read request and check the value on response. + must_read_on_peer(&mut $cluster, peer.clone(), region.clone(), key, b"v1"); + + // Check if the leader does a index read and renewed its lease. + assert_eq!($cluster.leader_of_region(region_id), Some(peer.clone())); + expect_lease_read += 1; + assert_eq!(detector.ctx.rl().len(), expect_lease_read); + } - if cluster.engines.len() > 1 { // Wait for the leader lease to expire. thread::sleep(max_lease); - // Issue a read request and check the value on response. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); + // Issue a write request. + $cluster.must_put(key, b"v2"); - // Check if the leader does a index read and renewed its lease. - assert_eq!(cluster.leader_of_region(region_id), Some(peer.clone())); - expect_lease_read += 1; - assert_eq!(detector.ctx.rl().len(), expect_lease_read); - } + // Check if the leader has renewed its lease so that it can do lease read. + assert_eq!($cluster.leader_of_region(region_id), Some(peer.clone())); + let state = $cluster.raft_local_state(region_id, store_id); + assert_eq!(state.get_last_index(), last_index + 1); - // Wait for the leader lease to expire. - thread::sleep(max_lease); - - // Issue a write request. - cluster.must_put(key, b"v2"); - - // Check if the leader has renewed its lease so that it can do lease read. - assert_eq!(cluster.leader_of_region(region_id), Some(peer.clone())); - let state = cluster.raft_local_state(region_id, store_id); - assert_eq!(state.get_last_index(), last_index + 1); - - // Issue a read request and check the value on response. - must_read_on_peer(cluster, peer, region, key, b"v2"); + // Issue a read request and check the value on response. + must_read_on_peer(&mut $cluster, peer, region, key, b"v2"); - // Check if the leader does a local read. - assert_eq!(detector.ctx.rl().len(), expect_lease_read); + // Check if the leader does a local read. + assert_eq!(detector.ctx.rl().len(), expect_lease_read); + }; } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_one_node_renew_lease() { let count = 1; - let mut cluster = new_node_cluster(0, count); - test_renew_lease(&mut cluster); + let mut cluster = new_cluster(0, count); + test_renew_lease!(cluster); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_renew_lease() { let count = 3; - let mut cluster = new_node_cluster(0, count); - test_renew_lease(&mut cluster); + let mut cluster = new_cluster(0, count); + test_renew_lease!(cluster); } -// A helper function for testing the lease reads when the lease has expired. +// Test lease reads when the lease has expired. // If the leader lease has expired, there may be new leader elected and // the old leader will fail to renew its lease. -fn test_lease_expired(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_lease_expired() { + let count = 3; + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -154,23 +165,17 @@ fn test_lease_expired(cluster: &mut Cluster) { thread::sleep(election_timeout * 2); // Issue a read request and check the value on response. - must_error_read_on_peer(cluster, peer, region, key, Duration::from_secs(1)); + must_error_read_on_peer(&mut cluster, peer, region, key, Duration::from_secs(1)); } -#[test] -fn test_node_lease_expired() { - let count = 3; - let mut cluster = new_node_cluster(0, count); - test_lease_expired(&mut cluster); -} - -// A helper function for testing the leader holds unsafe lease during the leader -// transfer procedure, so it will not do lease read. -// Since raft will not propose any request during leader transfer procedure, -// consistent read/write could not be performed neither. +// Test leader holds unsafe lease during the leader transfer procedure. // When leader transfer procedure aborts later, the leader would use and update // the lease as usual. -fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster) { +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_lease_unsafe_during_leader_transfers() { + let count = 3; + let mut cluster = new_cluster(0, count); // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. @@ -203,13 +208,13 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster cluster.must_transfer_leader(region_id, peer.clone()); // Issue a read request and check the value on response. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); + must_read_on_peer(&mut cluster, peer.clone(), region.clone(), key, b"v1"); let state = cluster.raft_local_state(region_id, store_id); let last_index = state.get_last_index(); // Check if the leader does a local read. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); + must_read_on_peer(&mut cluster, peer.clone(), region.clone(), key, b"v1"); let state = cluster.raft_local_state(region_id, store_id); assert_eq!(state.get_last_index(), last_index); assert_eq!(detector.ctx.rl().len(), 0); @@ -233,11 +238,11 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster thread::sleep(election_timeout / 2); // Issue a read request and it will fall back to read index. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); + must_read_on_peer(&mut cluster, peer.clone(), region.clone(), key, b"v1"); assert_eq!(detector.ctx.rl().len(), 1); // And read index should not update lease. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); + must_read_on_peer(&mut cluster, peer.clone(), region.clone(), key, b"v1"); assert_eq!(detector.ctx.rl().len(), 2); // Make sure the leader transfer procedure timeouts. @@ -247,7 +252,7 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster // or consistent read/write and renew/reuse the lease as usual. // Issue a read request and check the value on response. - must_read_on_peer(cluster, peer.clone(), region.clone(), key, b"v1"); + must_read_on_peer(&mut cluster, peer.clone(), region.clone(), key, b"v1"); assert_eq!(detector.ctx.rl().len(), 3); // Check if the leader also propose an entry to renew its lease. @@ -267,27 +272,19 @@ fn test_lease_unsafe_during_leader_transfers(cluster: &mut Cluster } // Check if the leader does a local read. - must_read_on_peer(cluster, peer, region, key, b"v1"); + must_read_on_peer(&mut cluster, peer, region, key, b"v1"); let state = cluster.raft_local_state(region_id, store_id); assert_eq!(state.get_last_index(), last_index + 1); assert_eq!(detector.ctx.rl().len(), 3); } -#[test] -fn test_node_lease_unsafe_during_leader_transfers() { - let count = 3; - let mut cluster = new_node_cluster(0, count); - test_lease_unsafe_during_leader_transfers(&mut cluster); -} - -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +// TODO: batch get snapshot is not supported in raftstore v2 currently. +// https://github.com/tikv/tikv/issues/14876 fn test_node_batch_id_in_lease() { let count = 3; - let mut cluster = new_node_cluster(0, count); - test_batch_id_in_lease(&mut cluster); -} - -fn test_batch_id_in_lease(cluster: &mut Cluster) { + let mut cluster = new_cluster(0, count); let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -349,7 +346,7 @@ fn test_batch_id_in_lease(cluster: &mut Cluster) { .zip(regions) .map(|(p, r)| (p.clone(), r)) .collect(); - let responses = batch_read_on_peer(cluster, &requests); + let responses = batch_read_on_peer(&mut cluster, &requests); let snaps: Vec> = responses .into_iter() .map(|response| { @@ -371,7 +368,7 @@ fn test_batch_id_in_lease(cluster: &mut Cluster) { // make sure that region 2 could renew lease. cluster.must_put(b"k55", b"v2"); - let responses = batch_read_on_peer(cluster, &requests); + let responses = batch_read_on_peer(&mut cluster, &requests); let snaps2: Vec> = responses .into_iter() .map(|response| { @@ -452,10 +449,11 @@ fn test_node_callback_when_destroyed() { } /// Test if the callback proposed by read index is cleared correctly. -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_lease_read_callback_destroy() { // Only server cluster can fake sending message successfully in raftstore layer. - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Increase the Raft tick interval to make this test case running reliably. let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.run(); @@ -578,9 +576,10 @@ fn test_read_index_stale_in_suspect_lease() { drop(cluster); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_local_read_cache() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_lease_read(&mut cluster.cfg, Some(50), None); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -730,9 +729,12 @@ fn test_read_index_after_write() { ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +// TODO: Renew leader lease periodically is not supported in raftstore v2 +// currently. https://github.com/tikv/tikv/issues/14876 fn test_infinite_lease() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Avoid triggering the log compaction in this test case. cluster.cfg.raft_store.raft_log_gc_threshold = 100; // Increase the Raft tick interval to make this test case running reliably. @@ -787,9 +789,10 @@ fn test_infinite_lease() { // LocalReader will try to renew lease in advance, so the region that has // continuous reads should not go to hibernate. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_local_read_renew_lease() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(500); let (base_tick_ms, election_ticks) = (50, 10); configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10)); From 1f8872113d9709ce319d1ae6a5c9446ebc5d2a8f Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 12 Jun 2023 11:07:05 +0800 Subject: [PATCH 0731/1149] encryption: support KMS on Azure (#14819) close tikv/tikv#14693 This pull request introduces the integration of the Azure Key Management Service (KMS) into the project. The KMS functionality allows for secure key management and encryption operations in the Azure environment. And given the ongoing discussions regarding the final representation of encryption support on multi-backend clouds, this implementation aims to be as compatible as possible with previous works. --- Cargo.lock | 59 ++- components/cloud/aws/src/kms.rs | 2 + components/cloud/azure/Cargo.toml | 9 +- components/cloud/azure/src/kms.rs | 343 ++++++++++++++++++ components/cloud/azure/src/lib.rs | 4 + .../certificate_credentials.rs | 3 +- components/cloud/src/kms.rs | 38 ++ components/cloud/src/lib.rs | 2 +- components/encryption/export/Cargo.toml | 3 +- components/encryption/export/examples/ecli.rs | 64 +++- components/encryption/export/src/lib.rs | 57 ++- components/encryption/src/config.rs | 151 +++++++- components/encryption/src/master_key/mod.rs | 2 +- 13 files changed, 689 insertions(+), 48 deletions(-) create mode 100644 components/cloud/azure/src/kms.rs diff --git a/Cargo.lock b/Cargo.lock index 4d74871168e..5c609d6c2fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -360,6 +360,7 @@ dependencies = [ "async-trait", "azure_core", "azure_identity", + "azure_security_keyvault", "azure_storage", "azure_storage_blobs", "base64 0.13.0", @@ -385,8 +386,8 @@ dependencies = [ [[package]] name = "azure_core" -version = "0.11.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" +version = "0.12.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" dependencies = [ "async-trait", "base64 0.21.0", @@ -411,8 +412,8 @@ dependencies = [ [[package]] name = "azure_identity" -version = "0.11.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" +version = "0.12.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" dependencies = [ "async-lock", "async-trait", @@ -429,10 +430,25 @@ dependencies = [ "uuid 1.2.1", ] +[[package]] +name = "azure_security_keyvault" +version = "0.12.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" +dependencies = [ + "async-trait", + "azure_core", + "const_format", + "futures 0.3.15", + "serde", + "serde_json", + "time 0.3.20", + "url", +] + [[package]] name = "azure_storage" -version = "0.11.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" +version = "0.12.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" dependencies = [ "RustyXML", "async-trait", @@ -453,8 +469,8 @@ dependencies = [ [[package]] name = "azure_storage_blobs" -version = "0.11.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#e21e2ec6bae784a717ac7b3cf1123d3a9596f074" +version = "0.12.0" +source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" dependencies = [ "RustyXML", "azure_core", @@ -1113,6 +1129,26 @@ dependencies = [ "cache-padded", ] +[[package]] +name = "const_format" +version = "0.2.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7309d9b4d3d2c0641e018d449232f2e28f1b22933c137f157d3dbc14228b8c0e" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f47bf7270cf70d370f8f98c1abb6d2d4cf60a6845d30e05bfb90c6568650" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "coprocessor_plugin_api" version = "0.1.0" @@ -1561,6 +1597,7 @@ version = "0.0.1" dependencies = [ "async-trait", "aws", + "azure", "cloud", "derive_more", "encryption", @@ -7387,6 +7424,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + [[package]] name = "untrusted" version = "0.7.1" diff --git a/components/cloud/aws/src/kms.rs b/components/cloud/aws/src/kms.rs index baa54e39fd5..f158dfb709e 100644 --- a/components/cloud/aws/src/kms.rs +++ b/components/cloud/aws/src/kms.rs @@ -218,6 +218,7 @@ mod tests { region: "ap-southeast-2".to_string(), endpoint: String::new(), }, + azure: None, }; let dispatcher = @@ -261,6 +262,7 @@ mod tests { region: "ap-southeast-2".to_string(), endpoint: String::new(), }, + azure: None, }; // IncorrectKeyException diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index e06d00251b5..b74a44446d2 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -9,10 +9,11 @@ failpoints = ["fail/failpoints"] [dependencies] async-trait = "0.1" -azure_core = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_identity = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_storage = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } -azure_storage_blobs = { version = "0.11.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_core = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_identity = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_security_keyvault = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } +azure_storage = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } +azure_storage_blobs = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } base64 = "0.13" cloud = { workspace = true } fail = "0.5" diff --git a/components/cloud/azure/src/kms.rs b/components/cloud/azure/src/kms.rs new file mode 100644 index 00000000000..c743ae415f5 --- /dev/null +++ b/components/cloud/azure/src/kms.rs @@ -0,0 +1,343 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ops::Deref, sync::Arc}; + +use async_trait::async_trait; +use azure_core::{auth::TokenCredential, new_http_client, Error as AzureError}; +use azure_identity::{ + AutoRefreshingTokenCredential, ClientSecretCredential, TokenCredentialOptions, +}; +use azure_security_keyvault::{prelude::*, KeyClient}; +use cloud::{ + error::{Error as CloudError, KmsError, Result}, + kms::{Config, CryptographyType, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}, +}; +use tikv_util::box_err; + +use crate::{ClientCertificateCredentialExt, STORAGE_VENDOR_NAME_AZURE}; + +/// Use 256 bits for data key as default. +const DEFAULT_DATAKEY_SIZE: u8 = 32; +const ENCRYPTION_VENDOR_NAME_AZURE_KMS: &str = STORAGE_VENDOR_NAME_AZURE; + +pub struct AzureKms { + tenant_id: String, + client_id: String, + /// Keyvault client to encrypt/decrypt data key. + client: KeyClient, + current_key_id: KeyId, + keyvault_url: String, + /// Hsm client to get random bytes for generating data key. + hsm_client: KeyClient, + hsm_name: String, + hsm_url: String, +} + +impl std::fmt::Debug for AzureKms { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let keyvault_client = AzureKeyVaultClientDebug { + tenant_id: self.tenant_id.clone(), + client_id: self.client_id.clone(), + keyvault_url: self.keyvault_url.clone(), + }; + let hsm_client = AzureHsmClientDebug { + hsm_name: self.hsm_name.clone(), + hsm_url: self.hsm_url.clone(), + }; + f.debug_struct("AzureKms") + .field("keyvault_client", &keyvault_client) + .field("current_key_id", &self.current_key_id) + .field("hsm_client", &hsm_client) + .finish() + } +} + +impl AzureKms { + #[inline] + fn new_with_credentials( + config: Config, + keyvault_credentials: Creds, + hsm_credentials: Creds, + ) -> Result + where + Creds: TokenCredential + Send + Sync + 'static, + { + assert!(config.azure.is_some()); + let azure_cfg = config.azure.unwrap(); + let keyvault_client = new_key_client(&azure_cfg.keyvault_url, keyvault_credentials)?; + let hsm_client = new_key_client(&azure_cfg.hsm_url, hsm_credentials)?; + Ok(Self { + client: keyvault_client, + current_key_id: config.key_id, + tenant_id: azure_cfg.tenant_id, + client_id: azure_cfg.client_id, + keyvault_url: azure_cfg.keyvault_url, + hsm_client, + hsm_name: azure_cfg.hsm_name, + hsm_url: azure_cfg.hsm_url, + }) + } + + pub fn new(config: Config) -> Result { + assert!(config.azure.is_some()); + let azure_cfg = config.azure.clone().unwrap(); + // Priority: explicit certificate > path of local certificate > client secret. + // And credentials for accessing KeyVault and Hsm should be different. + if let Some(certificate) = azure_cfg.client_certificate.clone() { + // Certificate to accessing KeyVault. + let (keyvault_credential, hsm_credential) = ( + ClientCertificateCredentialExt::new( + azure_cfg.tenant_id.clone(), + azure_cfg.client_id.clone(), + certificate.clone(), + azure_cfg.client_certificate_password.clone(), + ), + ClientCertificateCredentialExt::new( + azure_cfg.tenant_id.clone(), + azure_cfg.client_id, + certificate, + azure_cfg.client_certificate_password, + ), + ); + Self::new_with_credentials(config, keyvault_credential, hsm_credential) + } else if let Some(certificate_path) = azure_cfg.client_certificate_path.clone() { + // Certificate recorded in a file to accessing KeyVault. + let (keyvault_credential, hsm_credential) = ( + ClientCertificateCredentialExt::build( + azure_cfg.tenant_id.clone(), + azure_cfg.client_id.clone(), + certificate_path.clone(), + azure_cfg.client_certificate_password.clone(), + ) + .map_err(|e| CloudError::Other(e))?, + ClientCertificateCredentialExt::build( + azure_cfg.tenant_id.clone(), + azure_cfg.client_id, + certificate_path, + azure_cfg.client_certificate_password, + ) + .map_err(|e| CloudError::Other(e))?, + ); + Self::new_with_credentials(config, keyvault_credential, hsm_credential) + } else if let Some(client_secret) = azure_cfg.client_secret.clone() { + // Client secret to access KeyVault. + let (keyvault_credential, hsm_credential) = ( + ClientSecretCredential::new( + new_http_client(), + azure_cfg.tenant_id.clone(), + azure_cfg.client_id.clone(), + client_secret.clone(), + TokenCredentialOptions::default(), + ), + ClientSecretCredential::new( + new_http_client(), + azure_cfg.tenant_id.clone(), + azure_cfg.client_id, + client_secret, + TokenCredentialOptions::default(), + ), + ); + Self::new_with_credentials(config, keyvault_credential, hsm_credential) + } else { + Err(CloudError::KmsError(KmsError::Other(box_err!( + "invalid configurations for Azure KMS" + )))) + } + } +} + +#[async_trait] +impl KmsProvider for AzureKms { + fn name(&self) -> &str { + ENCRYPTION_VENDOR_NAME_AZURE_KMS + } + + // On decrypt failure, the rule is to return WrongMasterKey error in case it is + // possible that a wrong master key has been used, or other error + // otherwise. + async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result> { + let decrypt_params = DecryptParameters { + ciphertext: data_key.clone().into_inner(), + // TODO: the final choice of encryption algorithm for Azure waited + // to be discussed. And as the AesGcm only valid for HSM, + // encrypt/decrypt just uses the Rsa256 as default currently. + decrypt_parameters_encryption: CryptographParamtersEncryption::Rsa( + RsaEncryptionParameters::new(EncryptionAlgorithm::RsaOaep256).unwrap(), + ), + }; + self.client + .decrypt(self.current_key_id.deref().clone(), decrypt_params) + .await + .map_err(convert_azure_error) + .map(|response| response.result.to_vec()) + } + + async fn generate_data_key(&self) -> Result { + // Firstly, it should use `GetRandomBytes` API to get random bytes, + // generated by remote Azure, as the plaintext of a new data key. + let random_bytes = { + let resp = self + .hsm_client + .get_random_bytes(&self.hsm_name, DEFAULT_DATAKEY_SIZE) + .await + .map_err(convert_azure_error)?; + resp.result + }; + let encrypt_params = EncryptParameters { + plaintext: random_bytes.clone(), + encrypt_parameters_encryption: CryptographParamtersEncryption::Rsa( + RsaEncryptionParameters::new(EncryptionAlgorithm::RsaOaep256).unwrap(), + ), + }; + self.client + .encrypt(&self.current_key_id.clone().into_inner(), encrypt_params) + .await + .map_err(convert_azure_error) + .and_then(|response| { + let ciphertext = response.result; + Ok(DataKeyPair { + encrypted: EncryptedKey::new(ciphertext)?, + plaintext: PlainKey::new(random_bytes, CryptographyType::AesGcm256)?, + }) + }) + } +} + +struct AzureKeyVaultClientDebug { + tenant_id: String, + client_id: String, + keyvault_url: String, +} + +impl std::fmt::Debug for AzureKeyVaultClientDebug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AzureKeyVaultClientDebug") + .field("tenant_id", &self.tenant_id) + .field("client_id", &self.client_id) + .field("keyvault_url", &self.keyvault_url) + .finish() + } +} + +struct AzureHsmClientDebug { + hsm_name: String, + hsm_url: String, +} + +impl std::fmt::Debug for AzureHsmClientDebug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AzureHsmClientDebug") + .field("hsm_name", &self.hsm_name) + .field("hsm_url", &self.hsm_url) + .finish() + } +} + +fn convert_azure_error(err: AzureError) -> CloudError { + let err_msg = if let Ok(e) = err.into_inner() { + e + } else { + Box::new(std::io::Error::new( + std::io::ErrorKind::Other, + "unknown error", + )) + }; + CloudError::KmsError(KmsError::Other(err_msg)) +} + +#[inline] +fn new_key_client(url: &str, credentials: Creds) -> Result +where + Creds: TokenCredential + Send + Sync + 'static, +{ + KeyClient::new( + url, + Arc::new(AutoRefreshingTokenCredential::new(Arc::new(credentials))), + ) + .map_err(|e| CloudError::Other(Box::new(e))) +} + +#[cfg(test)] +mod tests { + use cloud::kms::{Location, SubConfigAzure}; + + use super::*; + + #[test] + fn test_init_azure_kms() { + let err_azure_cfg = SubConfigAzure { + tenant_id: "tenant_id".to_owned(), + client_id: "client_id".to_owned(), + keyvault_url: "https://keyvault_url.vault.azure.net".to_owned(), + hsm_name: "hsm_name".to_owned(), + hsm_url: "https://hsm_url.managedhsm.azure.net/".to_owned(), + ..SubConfigAzure::default() + }; + let err_config = Config { + key_id: KeyId::new("test_key_id".to_string()).unwrap(), + vendor: STORAGE_VENDOR_NAME_AZURE.to_owned(), + location: Location { + region: "southeast".to_string(), + endpoint: String::new(), + }, + azure: Some(err_azure_cfg.clone()), + }; + AzureKms::new(err_config.clone()).unwrap_err(); + let azure_cfg = SubConfigAzure { + client_secret: Some("client_secret".to_owned()), + ..err_azure_cfg + }; + let config = Config { + azure: Some(azure_cfg), + ..err_config + }; + let azure_kms = AzureKms::new(config).unwrap(); + assert_eq!( + azure_kms.name(), + STORAGE_VENDOR_NAME_AZURE, + "{:?}", + azure_kms + ); + } + + #[tokio::test] + async fn test_azure_kms() { + // TODO: this is End2End test for testing the API connectivity + // and validity of AzureKms. And if you wanna to use + // this case, you should set a valid configuration for it. + let azure_cfg = SubConfigAzure { + tenant_id: "tenant_id".to_owned(), + client_id: "client_id".to_owned(), + keyvault_url: "https://keyvault_url.vault.azure.net".to_owned(), + hsm_name: "hsm_name".to_owned(), + hsm_url: "https://hsm_url.managedhsm.azure.net/".to_owned(), + client_certificate: Some("client_certificate".to_owned()), + client_certificate_path: Some("client_certificate_path".to_owned()), + client_secret: Some("client_secret".to_owned()), + ..SubConfigAzure::default() + }; + let config = Config { + key_id: KeyId::new("ExampleKey".to_string()).unwrap(), + vendor: "STORAGE_VENDOR_NAME_AZURE".to_owned(), + location: Location { + region: "us-west".to_string(), + endpoint: String::new(), + }, + azure: Some(azure_cfg), + }; + if config.vendor != STORAGE_VENDOR_NAME_AZURE { + AzureKms::new(config).unwrap(); + } else { + // Unless the configurations of Azure KMS is valid, following + // codes could be executed. + let azure_kms = AzureKms::new(config).unwrap(); + let data_key = azure_kms.generate_data_key().await.unwrap(); + let encrypted_data_key = EncryptedKey::new(data_key.encrypted.to_vec()).unwrap(); + let decrypt_data_key = azure_kms + .decrypt_data_key(&encrypted_data_key) + .await + .unwrap(); + assert_eq!(*data_key.plaintext, decrypt_data_key); + } + } +} diff --git a/components/cloud/azure/src/lib.rs b/components/cloud/azure/src/lib.rs index 100ea9047c4..b909c5bb92d 100644 --- a/components/cloud/azure/src/lib.rs +++ b/components/cloud/azure/src/lib.rs @@ -1,7 +1,11 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. mod azblob; +mod kms; mod token_credentials; pub use azblob::{AzureStorage, Config}; +pub use kms::AzureKms; pub use token_credentials::certificate_credentials::ClientCertificateCredentialExt; + +pub const STORAGE_VENDOR_NAME_AZURE: &str = "azure"; diff --git a/components/cloud/azure/src/token_credentials/certificate_credentials.rs b/components/cloud/azure/src/token_credentials/certificate_credentials.rs index 73e31441a0d..af39e7be5fa 100644 --- a/components/cloud/azure/src/token_credentials/certificate_credentials.rs +++ b/components/cloud/azure/src/token_credentials/certificate_credentials.rs @@ -102,13 +102,14 @@ impl ClientCertificateCredentialExt { tenant_id: String, client_id: String, certificate_path: String, + certificate_pass: String, ) -> Result> { let bytes = std::fs::read(certificate_path)?; Ok(ClientCertificateCredentialExt::new( tenant_id, client_id, base64::encode(bytes), - "".into(), + certificate_pass, )) } diff --git a/components/cloud/src/kms.rs b/components/cloud/src/kms.rs index f1dfe783b5b..31a09b7cad7 100644 --- a/components/cloud/src/kms.rs +++ b/components/cloud/src/kms.rs @@ -13,10 +13,33 @@ pub struct Location { pub endpoint: String, } +/// Configurations for Azure KMS. +#[derive(Debug, Default, Clone)] +pub struct SubConfigAzure { + pub tenant_id: String, + pub client_id: String, + + /// Url to access KeyVault + pub keyvault_url: String, + /// Key name in the HSM + pub hsm_name: String, + /// Url to access HSM + pub hsm_url: String, + /// Authorized certificate + pub client_certificate: Option, + /// Path of local authorized certificate + pub client_certificate_path: Option, + /// Password for the certificate + pub client_certificate_password: String, + /// Secret of the client. + pub client_secret: Option, +} + #[derive(Debug, Clone)] pub struct Config { pub key_id: KeyId, pub location: Location, + pub azure: Option, pub vendor: String, } @@ -28,9 +51,16 @@ impl Config { region: mk.region, endpoint: mk.endpoint, }, + azure: None, vendor: mk.vendor, }) } + + pub fn from_azure_kms_config(mk: MasterKeyKms, azure_kms_cfg: SubConfigAzure) -> Result { + let mut cfg = Config::from_proto(mk)?; + cfg.azure = Some(azure_kms_cfg); + Ok(cfg) + } } #[derive(PartialEq, Debug, Clone, Deref)] @@ -48,6 +78,10 @@ impl KeyId { Ok(KeyId(id)) } } + + pub fn into_inner(self) -> String { + self.0 + } } // EncryptedKey is a newtype used to mark data as an encrypted key @@ -65,6 +99,10 @@ impl EncryptedKey { Ok(Self(key)) } } + + pub fn into_inner(self) -> Vec { + self.0 + } } #[repr(u8)] diff --git a/components/cloud/src/lib.rs b/components/cloud/src/lib.rs index 4481680de0f..a41cfdbcb09 100644 --- a/components/cloud/src/lib.rs +++ b/components/cloud/src/lib.rs @@ -9,7 +9,7 @@ pub mod error; pub use error::{Error, ErrorTrait, Result}; pub mod kms; -pub use kms::{Config, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}; +pub use kms::{Config, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey, SubConfigAzure}; pub mod blob; pub use blob::{none_to_empty, BucketConf, StringNonEmpty}; diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index 90b75852b08..c1891a93480 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -8,11 +8,12 @@ publish = false default = ["cloud-aws", "cloud-gcp", "cloud-azure"] cloud-aws = ["aws"] cloud-gcp = [] -cloud-azure = [] +cloud-azure = ["azure"] [dependencies] async-trait = "0.1" aws = { workspace = true, optional = true } +azure = { workspace = true, optional = true } cloud = { workspace = true } derive_more = "0.99.3" encryption = { workspace = true } diff --git a/components/encryption/export/examples/ecli.rs b/components/encryption/export/examples/ecli.rs index ed2247cc77c..9f53a92df51 100644 --- a/components/encryption/export/examples/ecli.rs +++ b/components/encryption/export/examples/ecli.rs @@ -2,10 +2,11 @@ use std::io::{Read, Write}; +use azure::STORAGE_VENDOR_NAME_AZURE; pub use cloud::kms::Config as CloudConfig; #[cfg(feature = "cloud-aws")] use encryption_export::{create_cloud_backend, KmsConfig}; -use encryption_export::{Backend, Error, Result}; +use encryption_export::{AzureConfig, Backend, Error, Result}; use file_system::{File, OpenOptions}; use ini::ini::Ini; use kvproto::encryptionpb::EncryptedContent; @@ -45,13 +46,14 @@ pub struct Opt { #[derive(StructOpt)] #[structopt(rename_all = "kebab-case")] enum Command { - Kms(KmsCommand), + Aws(SubCommandAws), + Azure(SubCommandAzure), } #[derive(StructOpt)] #[structopt(rename_all = "kebab-case")] /// KMS backend. -struct KmsCommand { +struct SubCommandAws { /// KMS key id of backend. #[structopt(long)] key_id: String, @@ -63,8 +65,29 @@ struct KmsCommand { region: Option, } -fn create_kms_backend( - cmd: &KmsCommand, +#[derive(StructOpt)] +#[structopt(rename_all = "kebab-case")] +/// Command for KeyVault backend. +struct SubCommandAzure { + /// Tenant id. + #[structopt(long)] + tenant_id: String, + /// Client id. + #[structopt(long)] + client_id: String, + /// KMS key id of Azure backend. + #[structopt(long)] + key_id: String, + /// Remote endpoint of KeyVault + #[structopt(long)] + url: String, + /// Secret to access key. + #[structopt(short, long)] + secret: Option, +} + +fn create_aws_backend( + cmd: &SubCommandAws, credential_file: Option<&String>, ) -> Result> { let mut config = KmsConfig::default(); @@ -86,6 +109,30 @@ fn create_kms_backend( create_cloud_backend(&config) } +fn create_azure_backend( + cmd: &SubCommandAzure, + credential_file: Option<&String>, +) -> Result> { + let mut config = KmsConfig::default(); + + config.vendor = STORAGE_VENDOR_NAME_AZURE.to_owned(); + let mut azure_cfg = AzureConfig::default(); + azure_cfg.tenant_id = cmd.tenant_id.to_owned(); + azure_cfg.client_id = cmd.client_id.to_owned(); + config.key_id = cmd.key_id.to_owned(); + azure_cfg.keyvault_url = cmd.url.to_owned(); + azure_cfg.client_secret = cmd.secret.to_owned(); + azure_cfg.client_certificate_path = credential_file.cloned(); + if let Some(credential_file) = credential_file { + let ini = Ini::load_from_file(credential_file) + .map_err(|e| Error::Other(box_err!("Failed to parse credential file as ini: {}", e)))?; + let _props = ini + .section(Some("default")) + .ok_or_else(|| Error::Other(box_err!("fail to parse section")))?; + } + create_cloud_backend(&config) +} + #[allow(irrefutable_let_patterns)] fn process() -> Result<()> { let opt: Opt = Opt::from_args(); @@ -95,10 +142,9 @@ fn process() -> Result<()> { file.read_to_end(&mut content)?; let credential_file = opt.credential_file.as_ref(); - let backend = if let Command::Kms(ref cmd) = opt.command { - create_kms_backend(cmd, credential_file)? - } else { - unreachable!() + let backend = match opt.command { + Command::Aws(ref cmd) => create_aws_backend(cmd, credential_file)?, + Command::Azure(ref cmd) => create_azure_backend(cmd, credential_file)?, }; let output = match opt.operation { diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index 022d9b65c4e..8820402be6b 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -3,13 +3,15 @@ use std::path::Path; #[cfg(feature = "cloud-aws")] use aws::{AwsKms, STORAGE_VENDOR_NAME_AWS}; +#[cfg(feature = "cloud-azure")] +use azure::{AzureKms, STORAGE_VENDOR_NAME_AZURE}; use cloud::kms::Config as CloudConfig; #[cfg(feature = "cloud-aws")] pub use encryption::KmsBackend; pub use encryption::{ - clean_up_dir, clean_up_trash, from_engine_encryption_method, trash_dir_all, Backend, - DataKeyImporter, DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, - FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, + clean_up_dir, clean_up_trash, from_engine_encryption_method, trash_dir_all, AzureConfig, + Backend, DataKeyImporter, DataKeyManager, DataKeyManagerArgs, DecrypterReader, + EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, }; use encryption::{cloud_convert_error, FileBackend, PlaintextBackend}; use tikv_util::{box_err, error, info}; @@ -38,7 +40,7 @@ pub fn create_backend(config: &MasterKeyConfig) -> Result> { } pub fn create_cloud_backend(config: &KmsConfig) -> Result> { - info!("Encryption init cloud backend"; + info!("Encryption init aws backend"; "region" => &config.region, "endpoint" => &config.endpoint, "key_id" => &config.key_id, @@ -53,6 +55,21 @@ pub fn create_cloud_backend(config: &KmsConfig) -> Result> { Box::new(AwsKms::new(conf).map_err(cloud_convert_error("new AWS KMS".to_owned()))?); Ok(Box::new(KmsBackend::new(kms_provider)?) as Box) } + #[cfg(feature = "cloud-azure")] + STORAGE_VENDOR_NAME_AZURE => { + if config.azure.is_none() { + return Err(Error::Other(box_err!( + "invalid configurations for Azure KMS" + ))); + } + let (mk, azure_kms_cfg) = config.clone().convert_to_azure_kms_config(); + let conf = CloudConfig::from_azure_kms_config(mk, azure_kms_cfg) + .map_err(cloud_convert_error("azure from proto".to_owned()))?; + let keyvault_provider = Box::new( + AzureKms::new(conf).map_err(cloud_convert_error("new Azure KMS".to_owned()))?, + ); + Ok(Box::new(KmsBackend::new(keyvault_provider)?) as Box) + } provider => Err(Error::Other(box_err!("provider not found {}", provider))), } } @@ -66,3 +83,35 @@ fn create_backend_inner(config: &MasterKeyConfig) -> Result> { MasterKeyConfig::Kms { config } => return create_cloud_backend(config), }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(feature = "cloud-azure")] + fn test_kms_cloud_backend_azure() { + let config = KmsConfig { + key_id: "key_id".to_owned(), + region: "region".to_owned(), + endpoint: "endpoint".to_owned(), + vendor: STORAGE_VENDOR_NAME_AZURE.to_owned(), + azure: Some(AzureConfig { + tenant_id: "tenant_id".to_owned(), + client_id: "client_id".to_owned(), + keyvault_url: "https://keyvault_url.vault.azure.net".to_owned(), + hsm_name: "hsm_name".to_owned(), + hsm_url: "https://hsm_url.managedhsm.azure.net/".to_owned(), + client_secret: Some("client_secret".to_owned()), + ..AzureConfig::default() + }), + }; + let invalid_config = KmsConfig { + azure: None, + ..config.clone() + }; + create_cloud_backend(&invalid_config).unwrap_err(); + let backend = create_cloud_backend(&config).unwrap(); + assert!(backend.is_secure()); + } +} diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 3fff9064f58..23e049e0df4 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -1,5 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use cloud::kms::SubConfigAzure; use kvproto::encryptionpb::{EncryptionMethod, MasterKeyKms}; use online_config::OnlineConfig; use serde_derive::{Deserialize, Serialize}; @@ -46,6 +47,43 @@ pub struct FileConfig { pub path: String, } +// TODO: the representation of Azure KMS to users needs to be discussed. +#[derive(Clone, Default, Serialize, Deserialize, PartialEq)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct AzureConfig { + pub tenant_id: String, + pub client_id: String, + + /// Url to access KeyVault + pub keyvault_url: String, + /// Key name in the HSM + pub hsm_name: String, + /// Url to access HSM + pub hsm_url: String, + /// Authorized certificate, the certificate is expected to be in base64 + /// encoded PKCS12 format + pub client_certificate: Option, + /// Path of local authorized certificate + pub client_certificate_path: Option, + /// Password for the certificate + pub client_certificate_password: String, + /// Secret of the client. + pub client_secret: Option, +} + +impl std::fmt::Debug for AzureConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AzureConfig") + .field("tenant_id", &self.tenant_id) + .field("client_id", &self.client_id) + .field("keyvault_url", &self.keyvault_url) + .field("hsm_name", &self.hsm_name) + .field("hsm_url", &self.hsm_url) + .finish() + } +} + #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -54,6 +92,9 @@ pub struct KmsConfig { pub region: String, pub endpoint: String, pub vendor: String, + // followings are used for Azure Kms + #[online_config(skip)] + pub azure: Option, } impl KmsConfig { @@ -66,6 +107,31 @@ impl KmsConfig { ..MasterKeyKms::default() } } + + pub fn convert_to_azure_kms_config(self) -> (MasterKeyKms, SubConfigAzure) { + let azure_kms_cfg = { + let cfg = self.azure.unwrap(); + SubConfigAzure { + tenant_id: cfg.tenant_id, + client_id: cfg.client_id, + keyvault_url: cfg.keyvault_url, + hsm_name: cfg.hsm_name, + hsm_url: cfg.hsm_url, + client_certificate: cfg.client_certificate, + client_certificate_path: cfg.client_certificate_path, + client_certificate_password: cfg.client_certificate_password, + client_secret: cfg.client_secret, + } + }; + let mk = MasterKeyKms { + key_id: self.key_id, + region: self.region, + endpoint: self.endpoint, + vendor: self.vendor, + ..MasterKeyKms::default() + }; + (mk, azure_kms_cfg) + } } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] @@ -167,7 +233,7 @@ mod tests { #[test] fn test_kms_config() { - let kms_cfg = EncryptionConfig { + let kms_config = EncryptionConfig { data_encryption_method: EncryptionMethod::Aes128Ctr, data_key_rotation_period: ReadableDuration::days(14), master_key: MasterKeyConfig::Kms { @@ -176,31 +242,78 @@ mod tests { region: "region".to_owned(), endpoint: "endpoint".to_owned(), vendor: "".to_owned(), + azure: None, }, }, previous_master_key: MasterKeyConfig::Plaintext, enable_file_dictionary_log: true, file_dictionary_rewrite_threshold: 1000000, }; + let kms_config_azure = EncryptionConfig { + master_key: MasterKeyConfig::Kms { + config: KmsConfig { + key_id: "key_id".to_owned(), + region: "region".to_owned(), + endpoint: "endpoint".to_owned(), + vendor: "azure".to_owned(), + azure: Some(AzureConfig { + tenant_id: "tenant_id".to_owned(), + client_id: "client_id".to_owned(), + keyvault_url: "keyvault_url".to_owned(), + hsm_name: "hsm_name".to_owned(), + hsm_url: "hsm_url".to_owned(), + ..AzureConfig::default() + }), + }, + }, + ..kms_config.clone() + }; + // KMS with default(aws). let kms_str = r#" - data-encryption-method = "aes128-ctr" - data-key-rotation-period = "14d" - enable-file-dictionary-log = true - file-dictionary-rewrite-threshold = 1000000 - [previous-master-key] - type = "plaintext" - [master-key] - type = "kms" - key-id = "key_id" - region = "region" - endpoint = "endpoint" + data-encryption-method = "aes128-ctr" + data-key-rotation-period = "14d" + enable-file-dictionary-log = true + file-dictionary-rewrite-threshold = 1000000 + [previous-master-key] + type = "plaintext" + [master-key] + type = "kms" + key-id = "key_id" + region = "region" + endpoint = "endpoint" + "#; + // KMS with azure + let kms_str_azure = r#" + data-encryption-method = 'aes128-ctr' + data-key-rotation-period = '14d' + enable-file-dictionary-log = true + file-dictionary-rewrite-threshold = 1000000 + + [master-key] + type = 'kms' + key-id = 'key_id' + region = 'region' + endpoint = 'endpoint' + vendor = 'azure' + + [master-key.azure] + tenant-id = 'tenant_id' + client-id = 'client_id' + keyvault-url = 'keyvault_url' + hsm-name = 'hsm_name' + hsm-url = 'hsm_url' + + [previous-master-key] + type = 'plaintext' "#; - let cfg: EncryptionConfig = toml::from_str(kms_str).unwrap(); - assert_eq!( - cfg, - kms_cfg, - "\n{}\n", - toml::to_string_pretty(&kms_cfg).unwrap() - ); + for (kms_cfg, kms_str) in [(kms_config, kms_str), (kms_config_azure, kms_str_azure)] { + let cfg: EncryptionConfig = toml::from_str(kms_str).unwrap(); + assert_eq!( + cfg, + kms_cfg.clone(), + "\n{}\n", + toml::to_string_pretty(&kms_cfg).unwrap() + ); + } } } diff --git a/components/encryption/src/master_key/mod.rs b/components/encryption/src/master_key/mod.rs index 6797565c118..a674cd3a685 100644 --- a/components/encryption/src/master_key/mod.rs +++ b/components/encryption/src/master_key/mod.rs @@ -74,7 +74,7 @@ pub mod tests { use lazy_static::lazy_static; - use super::*; + use super::{Backend, *}; use crate::*; #[derive(Debug)] From 139193447406f114282a6168d8f4ddf6c90713f4 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 12 Jun 2023 11:21:05 +0800 Subject: [PATCH 0732/1149] tikv-ctl: implement region recover for raftstore-v2 (#14869) ref tikv/tikv#14654 implement region recover for raftstore-v2 Signed-off-by: Spade A Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/executor.rs | 18 +- src/server/debug.rs | 4 +- src/server/debug2.rs | 334 +++++++++++++++++++++++++++++++++-- 3 files changed, 332 insertions(+), 24 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index c40c43bd397..8583b2f8edb 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1316,12 +1316,22 @@ impl DebugExecutor for DebuggerImplV2 { } } - fn recover_regions(&self, _regions: Vec, _read_only: bool) { - unimplemented!() + fn recover_regions(&self, regions: Vec, read_only: bool) { + let ret = self + .recover_regions(regions, read_only) + .unwrap_or_else(|e| perror_and_exit("Debugger::recover regions", e)); + if ret.is_empty() { + println!("success!"); + return; + } + for (region_id, error) in ret { + println!("region: {}, error: {}", region_id, error); + } } - fn recover_all(&self, _threads: usize, _read_only: bool) { - unimplemented!() + fn recover_all(&self, threads: usize, read_only: bool) { + DebuggerImplV2::recover_all(self, threads, read_only) + .unwrap_or_else(|e| perror_and_exit("Debugger::recover all", e)); } fn print_bad_regions(&self) { diff --git a/src/server/debug.rs b/src/server/debug.rs index 54b1eccae30..b0705c12c33 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -1232,7 +1232,7 @@ pub fn dump_write_cf_properties( Ok(res) } -fn recover_mvcc_for_range( +pub fn recover_mvcc_for_range( db: &RocksEngine, start_key: &[u8], end_key: &[u8], @@ -1285,7 +1285,7 @@ pub struct MvccChecker { } impl MvccChecker { - fn new(db: RocksEngine, start_key: &[u8], end_key: &[u8]) -> Result { + pub fn new(db: RocksEngine, start_key: &[u8], end_key: &[u8]) -> Result { let start_key = keys::data_key(start_key); let end_key = keys::data_end_key(end_key); let gen_iter = |cf: &str| -> Result<_> { diff --git a/src/server/debug2.rs b/src/server/debug2.rs index ac9ccdedf7c..8b31a857916 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1,6 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::{sync::Arc, thread::JoinHandle}; use engine_rocks::{ raw::CompactOptions, util::get_cf_handle, RocksEngine, RocksEngineIterator, RocksStatistics, @@ -19,12 +19,17 @@ use kvproto::{ }; use nom::AsBytes; use raft::{prelude::Entry, RawNode}; -use raftstore::{coprocessor::get_region_approximate_middle, store::util::check_key_in_region}; +use raftstore::{ + coprocessor::{get_region_approximate_middle, get_region_approximate_size}, + store::util::check_key_in_region, +}; use raftstore_v2::Storage; use slog::o; -use tikv_util::{config::ReadableSize, store::find_peer, worker::Worker}; +use tikv_util::{ + config::ReadableSize, store::find_peer, sys::thread::StdThreadBuildWrapper, worker::Worker, +}; -use super::debug::{BottommostLevelCompaction, Debugger, RegionInfo}; +use super::debug::{recover_mvcc_for_range, BottommostLevelCompaction, Debugger, RegionInfo}; use crate::{ config::ConfigController, server::debug::{dump_default_cf_properties, dump_write_cf_properties, Error, Result}, @@ -216,6 +221,122 @@ impl DebuggerImplV2 { } } + pub fn recover_regions( + &self, + regions: Vec, + read_only: bool, + ) -> Result> { + let mut errors = Vec::with_capacity(regions.len()); + for region in regions { + let region_id = region.get_id(); + let region_state = box_try!(self.raft_engine.get_region_state(region_id, u64::MAX)) + .ok_or_else(|| Error::NotFound(format!("Not found region {:?}", region_id)))?; + if region_state.get_state() != PeerState::Normal { + info!( + "skip region"; + "region_id" => region_id, + "peer_state" => ?region_state.get_state(), + ); + continue; + } + + let mut tablet_cache = + get_tablet_cache(&self.tablet_reg, region_id, Some(region_state))?; + let tablet = tablet_cache.latest().unwrap(); + + if let Err(e) = recover_mvcc_for_range( + tablet, + region.get_start_key(), + region.get_end_key(), + read_only, + 0, + ) { + errors.push((region_id, e)); + } + } + + Ok(errors) + } + + pub fn recover_all(&self, threads: usize, read_only: bool) -> Result<()> { + info!("Calculating split keys..."); + + let region_groups = + deivde_regions_for_concurrency(&self.raft_engine, &self.tablet_reg, threads as u64)?; + + let mut handles = Vec::new(); + for (thread_index, region_group) in region_groups.into_iter().enumerate() { + let props = tikv_util::thread_group::current_properties(); + + let mut tablets = vec![]; + for r in ®ion_group { + let mut cache = get_tablet_cache(&self.tablet_reg, r.get_id(), None).unwrap(); + tablets.push(cache.latest().unwrap().clone()); + } + let thread = std::thread::Builder::new() + .name(format!("mvcc-recover-thread-{}", thread_index)) + .spawn_wrapper(move || { + tikv_util::thread_group::set_properties(props); + tikv_alloc::add_thread_memory_accessor(); + + let mut results = vec![]; + for (region, tablet) in region_group.into_iter().zip(tablets) { + info!( + "mvcc recover"; + "thread_index" => thread_index, + "region" => ?region, + ); + results.push(recover_mvcc_for_range( + &tablet, + region.get_start_key(), + region.get_end_key(), + read_only, + thread_index, + )); + } + + tikv_alloc::remove_thread_memory_accessor(); + results + }) + .unwrap(); + + handles.push(thread); + } + + let res = handles + .into_iter() + .map(|h: JoinHandle>>| h.join()) + .map(|results| { + if let Err(e) = &results { + error!("{:?}", e); + } else { + for r in results.as_ref().unwrap() { + if let Err(e) = r { + error!("{:?}", e); + } + } + } + results + }) + .all(|results| { + if results.is_err() { + return false; + } + for r in &results.unwrap() { + if !r.is_ok() { + return false; + } + } + true + }); + + if res { + Ok(()) + } else { + Err(box_err!("Not all threads finished successfully.")) + } + } + pub fn bad_regions(&self) -> Result> { let store_id = self.get_store_ident()?.get_store_id(); let mut res = Vec::new(); @@ -497,20 +618,7 @@ impl Debugger for DebuggerImplV2 { )); } - let mut region_states = vec![]; - self.raft_engine - .for_each_raft_group::(&mut |region_id| { - let region_state = self - .raft_engine - .get_region_state(region_id, u64::MAX) - .unwrap() - .unwrap(); - if region_state.state == PeerState::Normal { - region_states.push(region_state); - } - Ok(()) - }) - .unwrap(); + let mut region_states = get_all_region_states_with_normal_state(&self.raft_engine); region_states.sort_by(|r1, r2| { r1.get_region() @@ -881,6 +989,88 @@ fn get_tablet_cache( } } +fn get_all_region_states_with_normal_state( + raft_engine: &ER, +) -> Vec { + let mut region_states = vec![]; + raft_engine + .for_each_raft_group::(&mut |region_id| { + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + if region_state.state == PeerState::Normal { + region_states.push(region_state); + } + Ok(()) + }) + .unwrap(); + + region_states +} + +// This method devide all regions into `threads` of groups where each group has +// similar data volume (estimated by region size) so that we use `threads` of +// threads to execute them concurrently. +// Note: we cannot guarantee that we can divde them into exactly `threads` of +// groups for some cases, ex: [0, 0, 0, 0, 0, 100], we can at most return two +// groups for this. +fn deivde_regions_for_concurrency( + raft_engine: &ER, + registry: &TabletRegistry, + threads: u64, +) -> Result>> { + let region_states = get_all_region_states_with_normal_state(raft_engine); + + if threads == 1 { + return Ok(vec![ + region_states + .into_iter() + .map(|mut r| r.take_region()) + .collect(), + ]); + } + + let mut regions_groups = vec![]; + let mut region_sizes = vec![]; + let mut total_size = 0; + for region_state in region_states { + let mut tablet_cache = get_tablet_cache( + registry, + region_state.get_region().get_id(), + Some(region_state.clone()), + )?; + let tablet = tablet_cache.latest().unwrap(); + let region_size = box_try!(get_region_approximate_size( + tablet, + region_state.get_region(), + 0 + )); + region_sizes.push((region_size, region_state)); + total_size += region_size; + } + region_sizes.sort_by(|a, b| a.0.cmp(&b.0)); + + let group_size = (total_size + threads - 1) / threads; + let mut cur_group = vec![]; + let mut cur_size = 0; + for (region_size, mut region_state) in region_sizes.into_iter() { + cur_group.push(region_state.take_region()); + cur_size += region_size; + if cur_size >= group_size { + cur_size = 0; + regions_groups.push(cur_group); + cur_group = vec![]; + } + } + if !cur_group.is_empty() { + regions_groups.push(cur_group); + } + + assert!(regions_groups.len() <= threads as usize); + Ok(regions_groups) +} + // `key1` and `key2` should both be start_key or end_key. fn smaller_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { if end_key && key1.is_empty() { @@ -913,8 +1103,10 @@ fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { mod tests { use std::path::Path; + use collections::HashMap; use engine_traits::{ RaftEngineReadOnly, RaftLogBatch, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, + DATA_CFS, }; use kvproto::{ metapb::{self, Peer, PeerRole}, @@ -923,6 +1115,7 @@ mod tests { use raft::prelude::EntryType; use raft_log_engine::RaftLogEngine; use raftstore::store::RAFT_INIT_LOG_INDEX; + use tikv_util::store::new_peer; use super::*; use crate::{ @@ -1375,6 +1568,111 @@ mod tests { } } + #[test] + fn test_divide_regions_even() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + + let mut lb = debugger.raft_engine.log_batch(30); + for i in 0..20 { + let mut region = metapb::Region::default(); + region.set_peers(vec![new_peer(1, i + 1)].into()); + region.set_id(i + 1); + let ctx = TabletContext::new(®ion, Some(5)); + let mut cache = debugger.tablet_reg.load(ctx, true).unwrap(); + let tablet = cache.latest().unwrap(); + for j in 0..10 { + // (6 + 3) * 10 + let k = format!("zk{:04}", i * 100 + j); + tablet.put(k.as_bytes(), b"val").unwrap(); + } + tablet.flush_cfs(DATA_CFS, true).unwrap(); + + let mut region_state = RegionLocalState::default(); + region_state.set_region(region); + region_state.set_tablet_index(5); + lb.put_region_state(i + 1, 5, ®ion_state).unwrap(); + } + debugger.raft_engine.consume(&mut lb, true).unwrap(); + + let groups = + deivde_regions_for_concurrency(&debugger.raft_engine, &debugger.tablet_reg, 4).unwrap(); + assert_eq!(groups.len(), 4); + for g in groups { + assert_eq!(g.len(), 5); + } + + let groups = + deivde_regions_for_concurrency(&debugger.raft_engine, &debugger.tablet_reg, 3).unwrap(); + assert_eq!(groups[0].len(), 7); + assert_eq!(groups[1].len(), 7); + assert_eq!(groups[2].len(), 6); + } + + #[test] + fn test_divide_regions_uneven() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + + let mut lb = debugger.raft_engine.log_batch(30); + let mut region_sizes = HashMap::default(); + let mut total_size = 0; + let mut max_region_size = 0; + for i in 0..20 { + let mut region = metapb::Region::default(); + region.set_peers(vec![new_peer(1, i + 1)].into()); + region.set_id(i + 1); + let ctx = TabletContext::new(®ion, Some(5)); + let mut cache = debugger.tablet_reg.load(ctx, true).unwrap(); + let tablet = cache.latest().unwrap(); + for j in 0..=i { + let k = format!("zk{:04}", i * 100 + j); + tablet.put(k.as_bytes(), b"val").unwrap(); + } + + let group_size = (6 + 3) * (i + 1); + max_region_size = group_size; + total_size += group_size; + region_sizes.insert(i + 1, group_size); + tablet.flush_cfs(DATA_CFS, true).unwrap(); + + let mut region_state = RegionLocalState::default(); + region_state.set_region(region); + region_state.set_tablet_index(5); + lb.put_region_state(i + 1, 5, ®ion_state).unwrap(); + } + debugger.raft_engine.consume(&mut lb, true).unwrap(); + + let check_group = |groups: Vec>, group_size_threshold| { + let count = groups.iter().fold(0, |count, group| count + group.len()); + assert_eq!(count, 20); + for (i, group) in groups.iter().enumerate() { + let mut current_group_size = 0; + for region in group { + current_group_size += *region_sizes.get(®ion.get_id()).unwrap(); + } + // All groups should have total size > `group_size_threshold` except for the + // last region. + if i != groups.len() - 1 { + assert!( + current_group_size >= group_size_threshold + && current_group_size < group_size_threshold + max_region_size + ); + } + } + }; + + let groups = + deivde_regions_for_concurrency(&debugger.raft_engine, &debugger.tablet_reg, 4).unwrap(); + let group_size_threshold = total_size / 4; + check_group(groups, group_size_threshold); + + let groups = + deivde_regions_for_concurrency(&debugger.raft_engine, &debugger.tablet_reg, 7).unwrap(); + let group_size_threshold = total_size / 7; + check_group(groups, group_size_threshold); + } + #[test] fn test_bad_regions() { let dir = test_util::temp_dir("test-debugger", false); From 738affae7e1e925eb4bca874c642943c94cd76a9 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 13 Jun 2023 16:33:07 +0800 Subject: [PATCH 0733/1149] raftstore-v2: not update modification index for delete range (#14905) close tikv/tikv#14904 not update modification index for delete range Signed-off-by: Spade A --- .../src/operation/command/write/mod.rs | 6 +-- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_apply_trace.rs | 50 +++++++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 tests/failpoints/cases/test_apply_trace.rs diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 1ff1809be9d..56b5fc4b0d6 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -313,9 +313,9 @@ impl Apply { // .unwrap_or_else(move |e| fail_f(e, // DeleteStrategy::DeleteBlobs)); } - if index != u64::MAX { - self.modifications_mut()[off] = index; - } + + // delete range is an unsafe operation and it cannot be rollbacked to replay, so + // we don't update modification index for this operation. Ok(()) } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 9c90211c073..5f6d7191239 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -1,5 +1,6 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +mod test_apply_trace; mod test_async_fetch; mod test_async_io; mod test_backup; diff --git a/tests/failpoints/cases/test_apply_trace.rs b/tests/failpoints/cases/test_apply_trace.rs new file mode 100644 index 00000000000..4bd6b9dcbf7 --- /dev/null +++ b/tests/failpoints/cases/test_apply_trace.rs @@ -0,0 +1,50 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + thread::sleep, + time::{Duration, Instant}, +}; + +use engine_traits::{ + MiscExt, RaftEngineReadOnly, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, +}; + +// It tests that delete range for an empty cf does not block the progress of +// persisted_applied. See the description of the PR #14905. +#[test] +fn test_delete_range_does_not_block_flushed_index() { + use test_raftstore_v2::*; + + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + for i in 0..100 { + let key = format!("k{:03}", i); + cluster.must_put_cf(CF_WRITE, key.as_bytes(), b"val"); + cluster.must_put_cf(CF_LOCK, key.as_bytes(), b"val"); + } + + cluster.must_delete_range_cf(CF_DEFAULT, b"k000", b"k020"); + cluster.must_delete_range_cf(CF_DEFAULT, b"k020", b"k040"); + + let raft_engine = cluster.get_raft_engine(1); + let mut cache = cluster.engines[0].0.get(1).unwrap(); + let tablet = cache.latest().unwrap(); + tablet.flush_cfs(DATA_CFS, true).unwrap(); + + let start = Instant::now(); + loop { + let admin_flush = raft_engine.get_flushed_index(1, CF_RAFT).unwrap().unwrap(); + if admin_flush > 200 { + return; + } + if start.elapsed() > Duration::from_secs(5) { + panic!( + "persisted_apply is not progressed, current persisted_apply {}", + admin_flush + ); + } + // wait for persist admin flush index + sleep(Duration::from_millis(200)); + } +} From aa4d02a5ebdfb05f85a6b0bc881a61f57817fae4 Mon Sep 17 00:00:00 2001 From: ekexium Date: Tue, 13 Jun 2023 16:47:07 +0800 Subject: [PATCH 0734/1149] refactor: encapsulation of ChangeLock (#14803) ref tikv/tikv#13694 Encapsulate last_change_ts and versions_to_last_change into one enum. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/txn_ext.rs | 7 +- components/tikv_kv/src/lib.rs | 7 +- components/txn_types/src/lib.rs | 14 +- components/txn_types/src/lock.rs | 110 ++++++++-------- components/txn_types/src/types.rs | 79 +++++++++++ components/txn_types/src/write.rs | 73 ++++------- src/storage/mod.rs | 5 +- src/storage/mvcc/reader/point_getter.rs | 51 +++---- src/storage/mvcc/reader/reader.rs | 52 ++++---- src/storage/mvcc/reader/scanner/backward.rs | 35 +++-- src/storage/mvcc/reader/scanner/forward.rs | 50 ++++--- src/storage/mvcc/reader/scanner/mod.rs | 2 +- .../txn/actions/acquire_pessimistic_lock.rs | 49 +++---- src/storage/txn/actions/commit.rs | 17 +-- src/storage/txn/actions/common.rs | 74 ++++++----- .../txn/actions/flashback_to_version.rs | 2 +- src/storage/txn/actions/prewrite.rs | 124 ++++++++---------- src/storage/txn/commands/check_txn_status.rs | 5 +- src/storage/txn/commands/prewrite.rs | 28 ++-- src/storage/txn/store.rs | 7 +- src/storage/types.rs | 22 +++- tests/failpoints/cases/test_merge.rs | 14 +- tests/failpoints/cases/test_split_region.rs | 8 +- tests/failpoints/cases/test_transaction.rs | 5 +- .../failpoints/cases/test_transfer_leader.rs | 11 +- .../integrations/raftstore/test_flashback.rs | 5 +- tests/integrations/raftstore/test_merge.rs | 11 +- tests/integrations/raftstore/test_multi.rs | 5 +- .../raftstore/test_split_region.rs | 8 +- .../raftstore/test_transfer_leader.rs | 8 +- 30 files changed, 464 insertions(+), 424 deletions(-) diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 4ace4ba8026..0091fd4e7bb 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -311,6 +311,7 @@ mod tests { use std::sync::Mutex; use tikv_util::defer; + use txn_types::LastChange; use super::*; @@ -325,8 +326,7 @@ mod tests { ttl: 3000, for_update_ts: 110.into(), min_commit_ts: 110.into(), - last_change_ts: 105.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(105.into(), 2), is_locked_with_conflict: false, } } @@ -428,8 +428,7 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), - last_change_ts: 5.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(5.into(), 2), is_locked_with_conflict: false, }, deleted, diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 293ae7fccc1..55c9f66d9fd 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -1308,6 +1308,7 @@ pub mod tests { #[cfg(test)] mod unit_tests { use engine_traits::CF_WRITE; + use txn_types::LastChange; use super::*; use crate::raft_cmdpb; @@ -1329,8 +1330,7 @@ mod unit_tests { ttl: 200, for_update_ts: 101.into(), min_commit_ts: 102.into(), - last_change_ts: 80.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(80.into(), 2), is_locked_with_conflict: false, }, ), @@ -1374,8 +1374,7 @@ mod unit_tests { ttl: 200, for_update_ts: 101.into(), min_commit_ts: 102.into(), - last_change_ts: 80.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(80.into(), 2), is_locked_with_conflict: false, } .into_lock() diff --git a/components/txn_types/src/lib.rs b/components/txn_types/src/lib.rs index a1a759b21b9..61d2093366a 100644 --- a/components/txn_types/src/lib.rs +++ b/components/txn_types/src/lib.rs @@ -6,11 +6,6 @@ #[allow(unused_extern_crates)] extern crate tikv_alloc; -mod lock; -mod timestamp; -mod types; -mod write; - use std::io; use error_code::{self, ErrorCode, ErrorCodeExt}; @@ -19,11 +14,16 @@ pub use lock::{Lock, LockType, PessimisticLock}; use thiserror::Error; pub use timestamp::{TimeStamp, TsSet, TSO_PHYSICAL_SHIFT_BITS}; pub use types::{ - insert_old_value_if_resolved, is_short_value, Key, KvPair, Mutation, MutationType, OldValue, - OldValues, TxnExtra, TxnExtraScheduler, Value, WriteBatchFlags, SHORT_VALUE_MAX_LEN, + insert_old_value_if_resolved, is_short_value, Key, KvPair, LastChange, Mutation, MutationType, + OldValue, OldValues, TxnExtra, TxnExtraScheduler, Value, WriteBatchFlags, SHORT_VALUE_MAX_LEN, }; pub use write::{Write, WriteRef, WriteType}; +mod lock; +mod timestamp; +mod types; +mod write; + #[derive(Debug, Error)] pub enum ErrorInner { #[error("{0}")] diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 3018b030490..17d9dbe37e2 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -12,7 +12,7 @@ use tikv_util::codec::{ use crate::{ timestamp::{TimeStamp, TsSet}, types::{Key, Mutation, Value, SHORT_VALUE_PREFIX}, - Error, ErrorInner, Result, + Error, ErrorInner, LastChange, Result, }; #[derive(Debug, Clone, Copy, PartialEq)] @@ -89,13 +89,9 @@ pub struct Lock { // to the lock. pub rollback_ts: Vec, - /// The commit TS of the latest PUT/DELETE record - pub last_change_ts: TimeStamp, - /// The number of versions that need skipping from the latest version to - /// find the latest PUT/DELETE record. - /// If versions_to_last_change > 0 but last_change_ts == 0, the key does not - /// have a PUT/DELETE record. - pub versions_to_last_change: u64, + /// The position of the last actual write (PUT or DELETE), used to skip + /// consecutive LOCK records when reading. + pub last_change: LastChange, /// The source of this txn. It is used by ticdc, if the value is 0 ticdc /// will sync the kv change event to downstream, if it is not 0, ticdc /// may ignore this change event. @@ -129,8 +125,7 @@ impl std::fmt::Debug for Lock { .field("use_async_commit", &self.use_async_commit) .field("secondaries", &secondary_keys) .field("rollback_ts", &self.rollback_ts) - .field("last_change_ts", &self.last_change_ts) - .field("versions_to_last_change", &self.versions_to_last_change) + .field("last_change", &self.last_change) .field("txn_source", &self.txn_source) .field("is_locked_with_conflict", &self.is_locked_with_conflict) .finish() @@ -161,8 +156,7 @@ impl Lock { use_async_commit: false, secondaries: Vec::default(), rollback_ts: Vec::default(), - last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + last_change: LastChange::default(), txn_source: 0, is_locked_with_conflict, } @@ -182,13 +176,8 @@ impl Lock { } #[must_use] - pub fn set_last_change( - mut self, - last_change_ts: TimeStamp, - versions_to_last_change: u64, - ) -> Self { - self.last_change_ts = last_change_ts; - self.versions_to_last_change = versions_to_last_change; + pub fn set_last_change(mut self, last_change: LastChange) -> Self { + self.last_change = last_change; self } @@ -236,10 +225,14 @@ impl Lock { b.encode_u64(ts.into_inner()).unwrap(); } } - if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { + if matches!( + self.last_change, + LastChange::NotExist | LastChange::Exist { .. } + ) { + let (last_change_ts, versions) = self.last_change.to_parts(); b.push(LAST_CHANGE_PREFIX); - b.encode_u64(self.last_change_ts.into_inner()).unwrap(); - b.encode_var_u64(self.versions_to_last_change).unwrap(); + b.encode_u64(last_change_ts.into_inner()).unwrap(); + b.encode_var_u64(versions).unwrap(); } if self.txn_source != 0 { b.push(TXN_SOURCE_PREFIX); @@ -277,7 +270,10 @@ impl Lock { if !self.rollback_ts.is_empty() { size += 1 + MAX_VAR_U64_LEN + size_of::() * self.rollback_ts.len(); } - if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { + if matches!( + self.last_change, + LastChange::NotExist | LastChange::Exist { .. } + ) { size += 1 + size_of::() + MAX_VAR_U64_LEN; } if self.txn_source != 0 { @@ -324,7 +320,7 @@ impl Lock { let mut secondaries = Vec::new(); let mut rollback_ts = Vec::new(); let mut last_change_ts = TimeStamp::zero(); - let mut versions_to_last_change = 0; + let mut estimated_versions_to_last_change = 0; let mut txn_source = 0; let mut is_locked_with_conflict = false; while !b.is_empty() { @@ -362,7 +358,7 @@ impl Lock { } LAST_CHANGE_PREFIX => { last_change_ts = number::decode_u64(&mut b)?.into(); - versions_to_last_change = number::decode_var_u64(&mut b)?; + estimated_versions_to_last_change = number::decode_var_u64(&mut b)?; } TXN_SOURCE_PREFIX => { txn_source = number::decode_var_u64(&mut b)?; @@ -388,7 +384,10 @@ impl Lock { min_commit_ts, is_locked_with_conflict, ) - .set_last_change(last_change_ts, versions_to_last_change) + .set_last_change(LastChange::from_parts( + last_change_ts, + estimated_versions_to_last_change, + )) .set_txn_source(txn_source); if use_async_commit { lock = lock.use_async_commit(secondaries); @@ -550,8 +549,7 @@ pub struct PessimisticLock { pub for_update_ts: TimeStamp, pub min_commit_ts: TimeStamp, - pub last_change_ts: TimeStamp, - pub versions_to_last_change: u64, + pub last_change: LastChange, pub is_locked_with_conflict: bool, } @@ -568,7 +566,7 @@ impl PessimisticLock { self.min_commit_ts, self.is_locked_with_conflict, ) - .set_last_change(self.last_change_ts, self.versions_to_last_change) + .set_last_change(self.last_change.clone()) } // Same with `to_lock` but does not copy the primary key. @@ -584,7 +582,7 @@ impl PessimisticLock { self.min_commit_ts, self.is_locked_with_conflict, ) - .set_last_change(self.last_change_ts, self.versions_to_last_change) + .set_last_change(self.last_change) } pub fn memory_size(&self) -> usize { @@ -600,8 +598,7 @@ impl std::fmt::Debug for PessimisticLock { .field("ttl", &self.ttl) .field("for_update_ts", &self.for_update_ts) .field("min_commit_ts", &self.min_commit_ts) - .field("last_change_ts", &self.last_change_ts) - .field("versions_to_last_change", &self.versions_to_last_change) + .field("last_change", &self.last_change) .field("is_locked_with_conflict", &self.is_locked_with_conflict) .finish() } @@ -838,7 +835,7 @@ mod tests { 8.into(), false, ) - .set_last_change(0.into(), 2), + .set_last_change(LastChange::NotExist), Lock::new( LockType::Lock, b"pk".to_vec(), @@ -850,7 +847,7 @@ mod tests { 8.into(), false, ) - .set_last_change(4.into(), 2) + .set_last_change(LastChange::make_exist(4.into(), 2)) .set_txn_source(1), ]; for (i, lock) in locks.drain(..).enumerate() { @@ -1101,7 +1098,7 @@ mod tests { b"secondary_k3k3k3k3k3k3".to_vec(), b"secondary_k4".to_vec(), ]) - .set_last_change(80.into(), 4); + .set_last_change(LastChange::make_exist(80.into(), 4)); assert_eq!( format!("{:?}", lock), @@ -1110,7 +1107,8 @@ mod tests { min_commit_ts: TimeStamp(127), use_async_commit: true, \ secondaries: [7365636F6E646172795F6B31, 7365636F6E646172795F6B6B6B6B6B32, \ 7365636F6E646172795F6B336B336B336B336B336B33, 7365636F6E646172795F6B34], rollback_ts: [], \ - last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" + last_change: Exist { last_change_ts: TimeStamp(80), estimated_versions_to_last_change: 4 }, txn_source: 0\ + , is_locked_with_conflict: false }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -1120,7 +1118,8 @@ mod tests { "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, \ short_value: ?, for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ use_async_commit: true, secondaries: [?, ?, ?, ?], rollback_ts: [], \ - last_change_ts: TimeStamp(80), versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" + last_change: Exist { last_change_ts: TimeStamp(80), estimated_versions_to_last_change: 4 }, txn_source: 0\ + , is_locked_with_conflict: false }" ); lock.short_value = None; @@ -1129,8 +1128,9 @@ mod tests { format!("{:?}", lock), "Lock { lock_type: Put, primary_key: 706B, start_ts: TimeStamp(100), ttl: 3, short_value: , \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ - use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ - versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" + use_async_commit: true, secondaries: [], rollback_ts: [], \ + last_change: Exist { last_change_ts: TimeStamp(80), estimated_versions_to_last_change: 4 }, txn_source: 0\ + , is_locked_with_conflict: false }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", lock); @@ -1139,8 +1139,9 @@ mod tests { redact_result, "Lock { lock_type: Put, primary_key: ?, start_ts: TimeStamp(100), ttl: 3, short_value: ?, \ for_update_ts: TimeStamp(101), txn_size: 10, min_commit_ts: TimeStamp(127), \ - use_async_commit: true, secondaries: [], rollback_ts: [], last_change_ts: TimeStamp(80), \ - versions_to_last_change: 4, txn_source: 0, is_locked_with_conflict: false }" + use_async_commit: true, secondaries: [], rollback_ts: [], \ + last_change: Exist { last_change_ts: TimeStamp(80), estimated_versions_to_last_change: 4 }, txn_source: 0\ + , is_locked_with_conflict: false }" ); } @@ -1152,8 +1153,7 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), - last_change_ts: 8.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(8.into(), 2), is_locked_with_conflict: false, }; let expected_lock = Lock { @@ -1168,8 +1168,7 @@ mod tests { use_async_commit: false, secondaries: vec![], rollback_ts: vec![], - last_change_ts: 8.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(8.into(), 2), txn_source: 0, is_locked_with_conflict: false, }; @@ -1185,15 +1184,15 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), - last_change_ts: 8.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(8.into(), 2), is_locked_with_conflict: false, }; assert_eq!( format!("{:?}", pessimistic_lock), "PessimisticLock { primary_key: 7072696D617279, start_ts: TimeStamp(5), ttl: 1000, \ - for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), last_change_ts: TimeStamp(8), \ - versions_to_last_change: 2, is_locked_with_conflict: false }" + for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), \ + last_change: Exist { last_change_ts: TimeStamp(8), estimated_versions_to_last_change: 2 }\ + , is_locked_with_conflict: false }" ); log_wrappers::set_redact_info_log(true); let redact_result = format!("{:?}", pessimistic_lock); @@ -1201,8 +1200,9 @@ mod tests { assert_eq!( redact_result, "PessimisticLock { primary_key: ?, start_ts: TimeStamp(5), ttl: 1000, \ - for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), last_change_ts: TimeStamp(8), \ - versions_to_last_change: 2, is_locked_with_conflict: false }" + for_update_ts: TimeStamp(10), min_commit_ts: TimeStamp(20), \ + last_change: Exist { last_change_ts: TimeStamp(8), estimated_versions_to_last_change: 2 }\ + , is_locked_with_conflict: false }" ); } @@ -1214,11 +1214,11 @@ mod tests { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), - last_change_ts: 8.into(), - versions_to_last_change: 2, + last_change: LastChange::make_exist(8.into(), 2), is_locked_with_conflict: false, }; - // 7 bytes for primary key, 16 bytes for Box<[u8]>, and 6 8-byte integers. - assert_eq!(lock.memory_size(), 7 + 16 + 7 * 8); + // 7 bytes for primary key, 16 bytes for Box<[u8]>, 4 x 8-byte integers, 1 + // enum (8 + 2 * 8) and a bool. + assert_eq!(lock.memory_size(), 7 + 16 + 5 * 8 + 24); } } diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index a83a68c7ba6..624ac81212d 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -587,6 +587,72 @@ impl WriteBatchFlags { } } +/// The position info of the last actual write (PUT or DELETE) of a LOCK record. +/// Note that if the last change is a DELETE, its LastChange can be either +/// Exist(which points to it) or NotExist. +#[derive(Clone, Eq, PartialEq, Debug)] +pub enum LastChange { + Unknown, + /// The pointer may point to a PUT or a DELETE record. + Exist { + /// The commit TS of the latest PUT/DELETE record + last_change_ts: TimeStamp, + /// The estimated number of versions that need skipping from this record + /// to find the latest PUT/DELETE record. Note this could be inaccurate. + estimated_versions_to_last_change: u64, + }, + /// Either there is no previous write of the key or the last write is a + /// DELETE. + NotExist, +} + +impl LastChange { + pub fn make_exist(last_change_ts: TimeStamp, estimated_versions_to_last_change: u64) -> Self { + assert!(!last_change_ts.is_zero()); + assert!(estimated_versions_to_last_change > 0); + LastChange::Exist { + last_change_ts, + estimated_versions_to_last_change, + } + } + + // How `LastChange` is stored. + // (1) ts == 0 && version == 0 means Unknown. + // (2) ts == 0 && version > 0 means NotExist. In current implementation version + // is set to 1. In older implementations it can be any positive integer. So + // we accept any positive when deserializing. + // (3) ts > 0 && version > 0 means Exist. + + pub fn to_parts(&self) -> (TimeStamp, u64) { + match self { + LastChange::Unknown => (TimeStamp::zero(), 0), + LastChange::Exist { + last_change_ts, + estimated_versions_to_last_change, + } => (*last_change_ts, *estimated_versions_to_last_change), + LastChange::NotExist => (TimeStamp::zero(), 1), + } + } + + pub fn from_parts(last_change_ts: TimeStamp, estimated_versions_to_last_change: u64) -> Self { + if last_change_ts.is_zero() { + if estimated_versions_to_last_change > 0 { + LastChange::NotExist + } else { + LastChange::Unknown + } + } else { + Self::make_exist(last_change_ts, estimated_versions_to_last_change) + } + } +} + +impl Default for LastChange { + fn default() -> Self { + LastChange::Unknown + } +} + #[cfg(test)] mod tests { use super::*; @@ -759,4 +825,17 @@ mod tests { assert_eq!(another_key, key_with_ts); } } + + #[test] + fn test_serialize_last_change() { + let objs = vec![ + LastChange::Unknown, + LastChange::NotExist, + LastChange::make_exist(100.into(), 3), + ]; + for obj in objs { + let (ts, versions) = obj.to_parts(); + assert_eq!(obj, LastChange::from_parts(ts, versions)); + } + } } diff --git a/components/txn_types/src/write.rs b/components/txn_types/src/write.rs index 7d5bfb9fe2b..9aea94d0ec3 100644 --- a/components/txn_types/src/write.rs +++ b/components/txn_types/src/write.rs @@ -9,7 +9,7 @@ use crate::{ lock::LockType, timestamp::TimeStamp, types::{Value, SHORT_VALUE_PREFIX}, - Error, ErrorInner, Result, + Error, ErrorInner, LastChange, Result, }; #[derive(Debug, Clone, Copy, PartialEq)] @@ -154,14 +154,9 @@ pub struct Write { /// rollback, and it's next version's `commit_ts` is `ts` pub gc_fence: Option, - /// The commit TS of the latest PUT/DELETE record - pub last_change_ts: TimeStamp, - /// The number of versions that need skipping from this record - /// to find the latest PUT/DELETE record - /// NOTE: `last_change_ts` == 0 && `versions_to_last_change` > 0 means the - /// key does not exist. Either there is no such key **or the last write - /// is a DELETE**. - pub versions_to_last_change: u64, + /// The position of the last actual write (PUT or DELETE), used to skip + /// consecutive LOCK records when reading. + pub last_change: LastChange, /// The source of this txn. pub txn_source: u64, } @@ -183,8 +178,7 @@ impl std::fmt::Debug for Write { ) .field("has_overlapped_rollback", &self.has_overlapped_rollback) .field("gc_fence", &self.gc_fence) - .field("last_change_ts", &self.last_change_ts) - .field("versions_to_last_change", &self.versions_to_last_change) + .field("last_change", &self.last_change) .field("txn_source", &self.txn_source) .finish() } @@ -200,8 +194,7 @@ impl Write { short_value, has_overlapped_rollback: false, gc_fence: None, - last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + last_change: LastChange::default(), txn_source: 0, } } @@ -220,8 +213,7 @@ impl Write { short_value, has_overlapped_rollback: false, gc_fence: None, - last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + last_change: LastChange::default(), txn_source: 0, } } @@ -239,13 +231,8 @@ impl Write { } #[must_use] - pub fn set_last_change( - mut self, - last_change_ts: TimeStamp, - versions_to_last_change: u64, - ) -> Self { - self.last_change_ts = last_change_ts; - self.versions_to_last_change = versions_to_last_change; + pub fn set_last_change(mut self, last_change: LastChange) -> Self { + self.last_change = last_change; self } @@ -272,8 +259,7 @@ impl Write { short_value: self.short_value.as_deref(), has_overlapped_rollback: self.has_overlapped_rollback, gc_fence: self.gc_fence, - last_change_ts: self.last_change_ts, - versions_to_last_change: self.versions_to_last_change, + last_change: self.last_change.clone(), txn_source: self.txn_source, } } @@ -300,15 +286,7 @@ pub struct WriteRef<'a> { /// See [`Write::gc_fence`] for more detail. pub gc_fence: Option, - /// The commit TS of the last PUT/DELETE record before this write record. - /// It only exists if this is a LOCK/ROLLBACK record. - pub last_change_ts: TimeStamp, - /// The number of versions that need skipping from this record - /// to find the latest PUT/DELETE record. - /// If versions_to_last_change > 0 but last_change_ts == 0, the key does not - /// have a PUT/DELETE record before this write record, OR the previous - /// change is a DELETE. - pub versions_to_last_change: u64, + pub last_change: LastChange, /// The source of this txn. pub txn_source: u64, } @@ -329,7 +307,7 @@ impl WriteRef<'_> { let mut has_overlapped_rollback = false; let mut gc_fence = None; let mut last_change_ts = TimeStamp::zero(); - let mut versions_to_last_change = 0; + let mut estimated_versions_to_last_change = 0; let mut txn_source = 0; while !b.is_empty() { @@ -357,7 +335,7 @@ impl WriteRef<'_> { GC_FENCE_PREFIX => gc_fence = Some(number::decode_u64(&mut b)?.into()), LAST_CHANGE_PREFIX => { last_change_ts = number::decode_u64(&mut b)?.into(); - versions_to_last_change = number::decode_var_u64(&mut b)?; + estimated_versions_to_last_change = number::decode_var_u64(&mut b)?; } TXN_SOURCE_PREFIX => { txn_source = number::decode_var_u64(&mut b)?; @@ -376,8 +354,7 @@ impl WriteRef<'_> { short_value, has_overlapped_rollback, gc_fence, - last_change_ts, - versions_to_last_change, + last_change: LastChange::from_parts(last_change_ts, estimated_versions_to_last_change), txn_source, }) } @@ -398,10 +375,14 @@ impl WriteRef<'_> { b.push(GC_FENCE_PREFIX); b.encode_u64(ts.into_inner()).unwrap(); } - if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { + if matches!( + self.last_change, + LastChange::NotExist | LastChange::Exist { .. } + ) { + let (last_change_ts, versions) = self.last_change.to_parts(); b.push(LAST_CHANGE_PREFIX); - b.encode_u64(self.last_change_ts.into_inner()).unwrap(); - b.encode_var_u64(self.versions_to_last_change).unwrap(); + b.encode_u64(last_change_ts.into_inner()).unwrap(); + b.encode_var_u64(versions).unwrap(); } if self.txn_source != 0 { b.push(TXN_SOURCE_PREFIX); @@ -419,7 +400,10 @@ impl WriteRef<'_> { if self.gc_fence.is_some() { size += 1 + size_of::(); } - if !self.last_change_ts.is_zero() || self.versions_to_last_change != 0 { + if matches!( + self.last_change, + LastChange::NotExist | LastChange::Exist { .. } + ) { size += 1 + size_of::() + MAX_VAR_U64_LEN; } if self.txn_source != 0 { @@ -473,7 +457,7 @@ impl WriteRef<'_> { self.short_value.map(|v| v.to_owned()), ) .set_overlapped_rollback(self.has_overlapped_rollback, self.gc_fence) - .set_last_change(self.last_change_ts, self.versions_to_last_change) + .set_last_change(self.last_change.clone()) .set_txn_source(self.txn_source) } } @@ -533,8 +517,9 @@ mod tests { .set_overlapped_rollback(true, Some(2345678.into())), Write::new(WriteType::Put, 456.into(), Some(b"short_value".to_vec())) .set_overlapped_rollback(true, Some(421397468076048385.into())), - Write::new(WriteType::Lock, 456.into(), None).set_last_change(345.into(), 11), - Write::new(WriteType::Lock, 456.into(), None).set_last_change(0.into(), 11), + Write::new(WriteType::Lock, 456.into(), None) + .set_last_change(LastChange::make_exist(345.into(), 11)), + Write::new(WriteType::Lock, 456.into(), None).set_last_change(LastChange::NotExist), Write::new(WriteType::Lock, 456.into(), None).set_txn_source(1), ]; for (i, write) in writes.drain(..).enumerate() { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 1e98a1b8257..7dde593350f 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3749,7 +3749,7 @@ mod tests { use parking_lot::Mutex; use tikv_util::config::ReadableSize; use tracker::INVALID_TRACKER_TOKEN; - use txn_types::{Mutation, PessimisticLock, WriteType, SHORT_VALUE_MAX_LEN}; + use txn_types::{LastChange, Mutation, PessimisticLock, WriteType, SHORT_VALUE_MAX_LEN}; use super::{ config::EngineType, @@ -10676,8 +10676,7 @@ mod tests { ttl: 3000, for_update_ts: 10.into(), min_commit_ts: 11.into(), - last_change_ts: TimeStamp::zero(), - versions_to_last_change: 1, + last_change: LastChange::NotExist, is_locked_with_conflict: false, }, false diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 8b9399b7d05..cc4403229c1 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -6,7 +6,7 @@ use std::borrow::Cow; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use kvproto::kvrpcpb::{IsolationLevel, WriteConflictReason}; use tikv_kv::SEEK_BOUND; -use txn_types::{Key, Lock, LockType, TimeStamp, TsSet, Value, WriteRef, WriteType}; +use txn_types::{Key, LastChange, Lock, LockType, TimeStamp, TsSet, Value, WriteRef, WriteType}; use crate::storage::{ kv::{Cursor, CursorBuilder, ScanMode, Snapshot, Statistics}, @@ -315,28 +315,33 @@ impl PointGetter { return Ok(None); } WriteType::Lock | WriteType::Rollback => { - if write.versions_to_last_change > 0 && write.last_change_ts.is_zero() { - return Ok(None); - } - if write.versions_to_last_change < SEEK_BOUND { - // Continue iterate next `write`. - } else { - let commit_ts = write.last_change_ts; - let key_with_ts = user_key.clone().append_ts(commit_ts); - match self.snapshot.get_cf(CF_WRITE, &key_with_ts)? { - Some(v) => owned_value = v, - None => return Ok(None), + match write.last_change { + LastChange::NotExist => { + return Ok(None); + } + LastChange::Exist { + last_change_ts: commit_ts, + estimated_versions_to_last_change, + } if estimated_versions_to_last_change >= SEEK_BOUND => { + let key_with_ts = user_key.clone().append_ts(commit_ts); + match self.snapshot.get_cf(CF_WRITE, &key_with_ts)? { + Some(v) => owned_value = v, + None => return Ok(None), + } + self.statistics.write.get += 1; + write = WriteRef::parse(&owned_value)?; + assert!( + write.write_type == WriteType::Put + || write.write_type == WriteType::Delete, + "Write record pointed by last_change_ts {} should be Put or Delete, but got {:?}", + commit_ts, + write.write_type, + ); + continue; + } + _ => { + // Continue iterate next `write`. } - self.statistics.write.get += 1; - write = WriteRef::parse(&owned_value)?; - assert!( - write.write_type == WriteType::Put - || write.write_type == WriteType::Delete, - "Write record pointed by last_change_ts {} should be Put or Delete, but got {:?}", - commit_ts, - write.write_type, - ); - continue; } } } @@ -1291,7 +1296,7 @@ mod tests { must_get_none(&mut getter, k); let s = getter.take_statistics(); // We can know the key doesn't exist without skipping all these locks according - // to last_change_ts and versions_to_last_change. + // to last_change_ts and estimated_versions_to_last_change. assert_eq!(s.write.seek, 1); assert_eq!(s.write.next, 0); assert_eq!(s.write.get, 0); diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 68d0d50f0b8..3096a7376a5 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -10,7 +10,7 @@ use kvproto::{ }; use raftstore::store::LocksStatus; use tikv_kv::{SnapshotExt, SEEK_BOUND}; -use txn_types::{Key, Lock, OldValue, TimeStamp, Value, Write, WriteRef, WriteType}; +use txn_types::{Key, LastChange, Lock, OldValue, TimeStamp, Value, Write, WriteRef, WriteType}; use crate::storage::{ kv::{ @@ -409,22 +409,18 @@ impl MvccReader { WriteType::Delete => { return Ok(None); } - WriteType::Lock | WriteType::Rollback => { - if write.versions_to_last_change > 0 && write.last_change_ts.is_zero() { + WriteType::Lock | WriteType::Rollback => match write.last_change { + LastChange::NotExist => { return Ok(None); } - if write.versions_to_last_change < SEEK_BOUND { - if ts.is_zero() { - // this should only happen in tests - return Ok(None); - } - ts = commit_ts.prev(); - } else { - let commit_ts = write.last_change_ts; + LastChange::Exist { + last_change_ts: commit_ts, + estimated_versions_to_last_change, + } if estimated_versions_to_last_change >= SEEK_BOUND => { let key_with_ts = key.clone().append_ts(commit_ts); let Some(value) = self - .snapshot - .get_cf(CF_WRITE, &key_with_ts)? else { + .snapshot + .get_cf(CF_WRITE, &key_with_ts)? else { return Ok(None); }; self.statistics.write.get += 1; @@ -439,7 +435,14 @@ impl MvccReader { seek_res = Some((commit_ts, write)); continue; } - } + _ => { + if ts.is_zero() { + // this should only happen in tests + return Ok(None); + } + ts = commit_ts.prev(); + } + }, } } None => return Ok(None), @@ -805,7 +808,7 @@ pub mod tests { }; use pd_client::FeatureGate; use raftstore::store::RegionSnapshot; - use txn_types::{LockType, Mutation}; + use txn_types::{LastChange, LockType, Mutation}; use super::*; use crate::storage::{ @@ -1692,10 +1695,10 @@ pub mod tests { TimeStamp::zero(), false, ) - .set_last_change( + .set_last_change(LastChange::from_parts( TimeStamp::zero(), (lock_type == LockType::Lock || lock_type == LockType::Pessimistic) as u64, - ), + )), ) }) .collect(); @@ -2421,8 +2424,9 @@ pub mod tests { .unwrap(); assert_eq!(commit_ts, 2.into()); assert_eq!(write, w2); - // versions_to_last_change should be large enough to trigger a second get - // instead of calling a series of next, so the count of next should be 0 instead + // estimated_versions_to_last_change should be large enough to trigger a second + // get instead of calling a series of next, so the count of next should + // be 0 instead assert_eq!(reader.statistics.write.next, 0); assert_eq!(reader.statistics.write.get, 1); @@ -2433,8 +2437,9 @@ pub mod tests { .unwrap(); // If the type is Delete, get_write_with_commit_ts should return None. assert!(res.is_none()); - // versions_to_last_change should be large enough to trigger a second get - // instead of calling a series of next, so the count of next should be 0 instead + // estimated_versions_to_last_change should be large enough to trigger a second + // get instead of calling a series of next, so the count of next should + // be 0 instead assert_eq!(reader.statistics.write.next, 0); assert_eq!(reader.statistics.write.get, 1); } @@ -2463,7 +2468,7 @@ pub mod tests { .get_write_with_commit_ts(&Key::from_raw(k), 40.into(), None) .unwrap(); // We can know the key doesn't exist without skipping all these locks according - // to last_change_ts and versions_to_last_change. + // to last_change_ts and estimated_versions_to_last_change. assert!(res.is_none()); assert_eq!(reader.statistics.write.seek, 1); assert_eq!(reader.statistics.write.next, 0); @@ -2564,6 +2569,7 @@ pub mod tests { assert_eq!(res.0.write_type, WriteType::Put); assert_eq!(res.1, 2.into()); assert_eq!(reader.statistics.write.seek, 1); - assert_eq!(reader.statistics.write.next, 0); + assert_eq!(reader.statistics.write.next, 2); + assert_eq!(reader.statistics.write.get, 1); } } diff --git a/src/storage/mvcc/reader/scanner/backward.rs b/src/storage/mvcc/reader/scanner/backward.rs index ee1780b76b4..b786807b3f3 100644 --- a/src/storage/mvcc/reader/scanner/backward.rs +++ b/src/storage/mvcc/reader/scanner/backward.rs @@ -511,14 +511,14 @@ mod tests { let ctx = Context::default(); // Generate REVERSE_SEEK_BOUND / 2 Put for key [10]. let k = &[10_u8]; - for ts in 0..REVERSE_SEEK_BOUND / 2 { + for ts in 1..=REVERSE_SEEK_BOUND / 2 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); must_commit(&mut engine, k, ts, ts); } // Generate REVERSE_SEEK_BOUND + 1 Put for key [9]. let k = &[9_u8]; - for ts in 0..=REVERSE_SEEK_BOUND { + for ts in 1..=REVERSE_SEEK_BOUND + 1 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); must_commit(&mut engine, k, ts, ts); } @@ -526,9 +526,9 @@ mod tests { // Generate REVERSE_SEEK_BOUND / 2 Put and REVERSE_SEEK_BOUND / 2 + 1 Rollback // for key [8]. let k = &[8_u8]; - for ts in 0..=REVERSE_SEEK_BOUND { + for ts in 1..=REVERSE_SEEK_BOUND + 1 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); - if ts < REVERSE_SEEK_BOUND / 2 { + if ts < REVERSE_SEEK_BOUND / 2 + 1 { must_commit(&mut engine, k, ts, ts); } else { let modifies = vec![ @@ -547,16 +547,16 @@ mod tests { // Generate REVERSE_SEEK_BOUND / 2 Put, 1 Delete and REVERSE_SEEK_BOUND / 2 // Rollback for key [7]. let k = &[7_u8]; - for ts in 0..REVERSE_SEEK_BOUND / 2 { + for ts in 1..=REVERSE_SEEK_BOUND / 2 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); must_commit(&mut engine, k, ts, ts); } { - let ts = REVERSE_SEEK_BOUND / 2; + let ts = REVERSE_SEEK_BOUND / 2 + 1; must_prewrite_delete(&mut engine, k, k, ts); must_commit(&mut engine, k, ts, ts); } - for ts in REVERSE_SEEK_BOUND / 2 + 1..=REVERSE_SEEK_BOUND { + for ts in REVERSE_SEEK_BOUND / 2 + 2..=REVERSE_SEEK_BOUND + 1 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); let modifies = vec![ // ts is rather small, so it is ok to `as u8` @@ -572,14 +572,14 @@ mod tests { // Generate 1 PUT for key [6]. let k = &[6_u8]; - for ts in 0..1 { + for ts in 1..2 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); must_commit(&mut engine, k, ts, ts); } // Generate REVERSE_SEEK_BOUND + 1 Rollback for key [5]. let k = &[5_u8]; - for ts in 0..=REVERSE_SEEK_BOUND { + for ts in 1..=REVERSE_SEEK_BOUND + 1 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); let modifies = vec![ // ts is rather small, so it is ok to `as u8` @@ -596,7 +596,7 @@ mod tests { // Generate 1 PUT with ts = REVERSE_SEEK_BOUND and 1 PUT // with ts = REVERSE_SEEK_BOUND + 1 for key [4]. let k = &[4_u8]; - for ts in REVERSE_SEEK_BOUND..REVERSE_SEEK_BOUND + 2 { + for ts in REVERSE_SEEK_BOUND + 1..REVERSE_SEEK_BOUND + 3 { must_prewrite_put(&mut engine, k, &[ts as u8], k, ts); must_commit(&mut engine, k, ts, ts); } @@ -605,7 +605,7 @@ mod tests { // 4 4 5 5 5 5 5 6 7 7 7 7 7 8 8 8 8 8 9 9 9 9 9 10 10 let snapshot = engine.snapshot(Default::default()).unwrap(); - let mut scanner = ScannerBuilder::new(snapshot, REVERSE_SEEK_BOUND.into()) + let mut scanner = ScannerBuilder::new(snapshot, (REVERSE_SEEK_BOUND + 1).into()) .desc(true) .range(None, Some(Key::from_raw(&[11_u8]))) .build() @@ -625,7 +625,7 @@ mod tests { scanner.next().unwrap(), Some(( Key::from_raw(&[10_u8]), - vec![(REVERSE_SEEK_BOUND / 2 - 1) as u8] + vec![(REVERSE_SEEK_BOUND / 2) as u8] )) ); let statistics = scanner.take_statistics(); @@ -658,7 +658,7 @@ mod tests { // ^cursor assert_eq!( scanner.next().unwrap(), - Some((Key::from_raw(&[9_u8]), vec![REVERSE_SEEK_BOUND as u8])) + Some((Key::from_raw(&[9_u8]), vec![REVERSE_SEEK_BOUND as u8 + 1])) ); let statistics = scanner.take_statistics(); assert_eq!(statistics.write.prev, REVERSE_SEEK_BOUND as usize); @@ -696,10 +696,7 @@ mod tests { // ^cursor assert_eq!( scanner.next().unwrap(), - Some(( - Key::from_raw(&[8_u8]), - vec![(REVERSE_SEEK_BOUND / 2 - 1) as u8] - )) + Some((Key::from_raw(&[8_u8]), vec![(REVERSE_SEEK_BOUND / 2) as u8])) ); let statistics = scanner.take_statistics(); assert_eq!(statistics.write.prev, REVERSE_SEEK_BOUND as usize + 1); @@ -738,7 +735,7 @@ mod tests { // ^cursor assert_eq!( scanner.next().unwrap(), - Some((Key::from_raw(&[6_u8]), vec![0_u8])) + Some((Key::from_raw(&[6_u8]), vec![1_u8])) ); let statistics = scanner.take_statistics(); assert_eq!(statistics.write.prev, REVERSE_SEEK_BOUND as usize + 2); @@ -778,7 +775,7 @@ mod tests { // ^cursor assert_eq!( scanner.next().unwrap(), - Some((Key::from_raw(&[4_u8]), vec![REVERSE_SEEK_BOUND as u8])) + Some((Key::from_raw(&[4_u8]), vec![REVERSE_SEEK_BOUND as u8 + 1])) ); let statistics = scanner.take_statistics(); assert_eq!(statistics.write.prev, REVERSE_SEEK_BOUND as usize + 3); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 9da1b48d3ff..3437a1e5432 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -5,7 +5,7 @@ use std::{borrow::Cow, cmp::Ordering}; use engine_traits::CF_DEFAULT; use kvproto::kvrpcpb::{ExtraOp, IsolationLevel, WriteConflictReason}; -use txn_types::{Key, Lock, LockType, OldValue, TimeStamp, Value, WriteRef, WriteType}; +use txn_types::{Key, LastChange, Lock, LockType, OldValue, TimeStamp, Value, WriteRef, WriteType}; use super::ScannerConfig; use crate::storage::{ @@ -472,17 +472,22 @@ impl ScanPolicy for LatestKvPolicy { } WriteType::Delete => break None, WriteType::Lock | WriteType::Rollback => { - if write.versions_to_last_change > 0 && write.last_change_ts.is_zero() { - break None; - } - if write.versions_to_last_change < SEEK_BOUND { - // Continue iterate next `write`. - cursors.write.next(&mut statistics.write); - } else { - // Seek to the expected version directly. - let commit_ts = write.last_change_ts; - let key_with_ts = current_user_key.clone().append_ts(commit_ts); - cursors.write.seek(&key_with_ts, &mut statistics.write)?; + match write.last_change { + LastChange::NotExist => { + break None; + } + LastChange::Exist { + last_change_ts, + estimated_versions_to_last_change, + } if estimated_versions_to_last_change >= SEEK_BOUND => { + // Seek to the expected version directly. + let key_with_ts = current_user_key.clone().append_ts(last_change_ts); + cursors.write.seek(&key_with_ts, &mut statistics.write)?; + } + _ => { + // Continue iterate next `write`. + cursors.write.next(&mut statistics.write); + } } } } @@ -877,6 +882,8 @@ where } pub mod test_util { + use txn_types::LastChange; + use super::*; use crate::storage::{ mvcc::Write, @@ -896,7 +903,7 @@ pub mod test_util { pub for_update_ts: TimeStamp, pub old_value: OldValue, pub last_change_ts: TimeStamp, - pub versions_to_last_change: u64, + pub estimated_versions_to_last_change: u64, } impl Default for EntryBuilder { @@ -910,7 +917,7 @@ pub mod test_util { for_update_ts: 0.into(), old_value: OldValue::None, last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + estimated_versions_to_last_change: 0, } } } @@ -947,10 +954,10 @@ pub mod test_util { pub fn last_change( &mut self, last_change_ts: TimeStamp, - versions_to_last_change: u64, + estimated_versions_to_last_change: u64, ) -> &mut Self { self.last_change_ts = last_change_ts; - self.versions_to_last_change = versions_to_last_change; + self.estimated_versions_to_last_change = estimated_versions_to_last_change; self } pub fn build_commit(&self, wt: WriteType, is_short_value: bool) -> TxnEntry { @@ -971,8 +978,9 @@ pub mod test_util { None, ) }; - let write_value = Write::new(wt, self.start_ts, short) - .set_last_change(self.last_change_ts, self.versions_to_last_change); + let write_value = Write::new(wt, self.start_ts, short).set_last_change( + LastChange::from_parts(self.last_change_ts, self.estimated_versions_to_last_change), + ); TxnEntry::Commit { default: (key, value), write: (write_key.into_encoded(), write_value.as_ref().to_bytes()), @@ -1009,7 +1017,10 @@ pub mod test_util { 0.into(), false, ) - .set_last_change(self.last_change_ts, self.versions_to_last_change); + .set_last_change(LastChange::from_parts( + self.last_change_ts, + self.estimated_versions_to_last_change, + )); TxnEntry::Prewrite { default: (key, value), lock: (lock_key.into_encoded(), lock_value.to_bytes()), @@ -2140,6 +2151,7 @@ mod delta_entry_tests { use super::{super::ScannerBuilder, test_util::*, *}; use crate::storage::{mvcc::tests::write, txn::tests::*, Engine, Modify, TestEngineBuilder}; + /// Check whether everything works as usual when `Delta::get()` goes out of /// bound. #[test] diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index 5b87cca7f7a..d0cfde82704 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -928,7 +928,7 @@ mod tests { let (key, val1) = (b"foo", b"bar1"); if deep_write_seek { - for i in 0..SEEK_BOUND { + for i in 1..SEEK_BOUND { must_prewrite_put(&mut engine, key, val1, key, i); must_commit(&mut engine, key, i, i); } diff --git a/src/storage/txn/actions/acquire_pessimistic_lock.rs b/src/storage/txn/actions/acquire_pessimistic_lock.rs index 60450be1b40..2a9e49b45ff 100644 --- a/src/storage/txn/actions/acquire_pessimistic_lock.rs +++ b/src/storage/txn/actions/acquire_pessimistic_lock.rs @@ -2,7 +2,7 @@ use kvproto::kvrpcpb::WriteConflictReason; // #[PerformanceCriticalPath] -use txn_types::{Key, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteType}; +use txn_types::{Key, LastChange, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteType}; use crate::storage::{ mvcc::{ @@ -204,8 +204,7 @@ pub fn acquire_pessimistic_lock( ttl: lock_ttl, for_update_ts, min_commit_ts, - last_change_ts: lock.last_change_ts, - versions_to_last_change: lock.versions_to_last_change, + last_change: lock.last_change.clone(), is_locked_with_conflict: lock.is_pessimistic_lock_with_conflict(), }; txn.put_pessimistic_lock(key, lock, false); @@ -229,7 +228,7 @@ pub fn acquire_pessimistic_lock( // Following seek_write read the previous write. let (prev_write_loaded, mut prev_write) = (true, None); - let (mut last_change_ts, mut versions_to_last_change); + let mut last_change; if let Some((commit_ts, write)) = reader.seek_write(&key, TimeStamp::max())? { // Find a previous write. if need_old_value { @@ -315,8 +314,7 @@ pub fn acquire_pessimistic_lock( Err(e) })?; - (last_change_ts, versions_to_last_change) = - next_last_change_info(&key, &write, txn.start_ts, reader, commit_ts)?; + last_change = next_last_change_info(&key, &write, txn.start_ts, reader, commit_ts)?; // Load value if locked_with_conflict, so that when the client (TiDB) need to // read the value during statement retry, it will be possible to read the value @@ -346,12 +344,10 @@ pub fn acquire_pessimistic_lock( }; } } else { - // last_change_ts == 0 && versions_to_last_change > 0 means the key actually - // does not exist. - (last_change_ts, versions_to_last_change) = (TimeStamp::zero(), 1); + last_change = LastChange::NotExist; } if !tls_can_enable(LAST_CHANGE_TS) { - (last_change_ts, versions_to_last_change) = (TimeStamp::zero(), 0); + last_change = LastChange::Unknown; } let old_value = load_old_value( @@ -370,8 +366,7 @@ pub fn acquire_pessimistic_lock( ttl: lock_ttl, for_update_ts, min_commit_ts, - last_change_ts, - versions_to_last_change, + last_change, is_locked_with_conflict: conflict_info.is_some(), }; @@ -1742,15 +1737,13 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 10, 30); let lock = must_pessimistic_locked(&mut engine, key, 10, 30); - assert_eq!(lock.last_change_ts, TimeStamp::zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); pessimistic_rollback::tests::must_success(&mut engine, key, 10, 30); // Set cluster version to 6.5.0, last_change_ts should work now. feature_gate.set_version("6.5.0").unwrap(); must_succeed(&mut engine, key, key, 10, 30); let lock = must_pessimistic_locked(&mut engine, key, 10, 30); - assert_eq!(lock.last_change_ts, 20.into()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::make_exist(20.into(), 1)); pessimistic_rollback::tests::must_success(&mut engine, key, 10, 30); // Latest version is a DELETE @@ -1765,8 +1758,7 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 60, 70); let lock = must_pessimistic_locked(&mut engine, key, 60, 70); - assert_eq!(lock.last_change_ts, 50.into()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::make_exist(50.into(), 1)); pessimistic_rollback::tests::must_success(&mut engine, key, 60, 70); // Latest version is a LOCK without last_change_ts @@ -1781,8 +1773,7 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 80, 80); let lock = must_pessimistic_locked(&mut engine, key, 80, 80); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::NotExist); pessimistic_rollback::tests::must_success(&mut engine, key, 80, 80); // Latest version is a ROLLBACK without last_change_ts @@ -1797,12 +1788,12 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 95, 95); let lock = must_pessimistic_locked(&mut engine, key, 95, 95); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::NotExist); pessimistic_rollback::tests::must_success(&mut engine, key, 95, 95); // Latest version is a LOCK with last_change_ts - let write = Write::new(WriteType::Lock, 100.into(), None).set_last_change(40.into(), 4); + let write = Write::new(WriteType::Lock, 100.into(), None) + .set_last_change(LastChange::make_exist(40.into(), 4)); engine .put_cf( Default::default(), @@ -1813,12 +1804,12 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 120, 130); let lock = must_pessimistic_locked(&mut engine, key, 120, 130); - assert_eq!(lock.last_change_ts, 40.into()); - assert_eq!(lock.versions_to_last_change, 5); + assert_eq!(lock.last_change, LastChange::make_exist(40.into(), 5)); pessimistic_rollback::tests::must_success(&mut engine, key, 120, 130); // Latest version is a ROLLBACK with last_change_ts - let write = Write::new(WriteType::Rollback, 120.into(), None).set_last_change(40.into(), 5); + let write = Write::new(WriteType::Rollback, 120.into(), None) + .set_last_change(LastChange::make_exist(40.into(), 5)); engine .put_cf( Default::default(), @@ -1829,15 +1820,13 @@ pub mod tests { .unwrap(); must_succeed(&mut engine, key, key, 140, 140); let lock = must_pessimistic_locked(&mut engine, key, 140, 140); - assert_eq!(lock.last_change_ts, 40.into()); - assert_eq!(lock.versions_to_last_change, 6); + assert_eq!(lock.last_change, LastChange::make_exist(40.into(), 6)); pessimistic_rollback::tests::must_success(&mut engine, key, 140, 140); // Lock on a key with no write record must_succeed(&mut engine, b"k2", b"k2", 150, 150); let lock = must_pessimistic_locked(&mut engine, b"k2", 150, 150); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::NotExist); } #[test] diff --git a/src/storage/txn/actions/commit.rs b/src/storage/txn/actions/commit.rs index 2f9e45bfe7f..1685dde1c88 100644 --- a/src/storage/txn/actions/commit.rs +++ b/src/storage/txn/actions/commit.rs @@ -101,7 +101,7 @@ pub fn commit( reader.start_ts, lock.short_value.take(), ) - .set_last_change(lock.last_change_ts, lock.versions_to_last_change) + .set_last_change(lock.last_change.clone()) .set_txn_source(lock.txn_source); for ts in &lock.rollback_ts { @@ -121,7 +121,8 @@ pub mod tests { #[cfg(test)] use kvproto::kvrpcpb::PrewriteRequestPessimisticAction::*; use tikv_kv::SnapContext; - use txn_types::TimeStamp; + #[cfg(test)] + use txn_types::{LastChange, TimeStamp}; use super::*; #[cfg(test)] @@ -348,22 +349,18 @@ pub mod tests { // WriteType is Lock must_prewrite_lock(&mut engine, k, k, 15); let lock = must_locked(&mut engine, k, 15); - assert_eq!(lock.last_change_ts, 10.into()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::make_exist(10.into(), 1)); must_succeed(&mut engine, k, 15, 20); let write = must_written(&mut engine, k, 15, 20, WriteType::Lock); - assert_eq!(write.last_change_ts, 10.into()); - assert_eq!(write.versions_to_last_change, 1); + assert_eq!(write.last_change, LastChange::make_exist(10.into(), 1)); // WriteType is Put must_prewrite_put(&mut engine, k, b"v2", k, 25); let lock = must_locked(&mut engine, k, 25); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); must_succeed(&mut engine, k, 25, 30); let write = must_written(&mut engine, k, 25, 30, WriteType::Put); - assert!(write.last_change_ts.is_zero()); - assert_eq!(write.versions_to_last_change, 0); + assert_eq!(write.last_change, LastChange::Unknown); } #[test] diff --git a/src/storage/txn/actions/common.rs b/src/storage/txn/actions/common.rs index afe0e200f58..336302c7130 100644 --- a/src/storage/txn/actions/common.rs +++ b/src/storage/txn/actions/common.rs @@ -1,51 +1,57 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use tikv_kv::{Snapshot, SEEK_BOUND}; -use txn_types::{Key, TimeStamp, Write, WriteType}; +use tikv_kv::Snapshot; +use txn_types::{Key, LastChange, TimeStamp, Write, WriteType}; use crate::storage::mvcc::{Result, SnapshotReader}; -/// Returns the new `last_change_ts` and `versions_to_last_change` according -/// to this write record. If it is unknown from the given write, try iterate to -/// the last change and find the answer. +/// Returns the new `LastChange` according to this write record. If it is +/// unknown from the given write, try iterate to the last change and find the +/// answer. pub fn next_last_change_info( key: &Key, write: &Write, start_ts: TimeStamp, original_reader: &mut SnapshotReader, commit_ts: TimeStamp, -) -> Result<(TimeStamp, u64)> { +) -> Result { match write.write_type { - WriteType::Put | WriteType::Delete => Ok((commit_ts, 1)), + WriteType::Put | WriteType::Delete => Ok(LastChange::make_exist(commit_ts, 1)), WriteType::Lock | WriteType::Rollback => { - assert!(write.last_change_ts.is_zero() || write.versions_to_last_change > 0); - if !write.last_change_ts.is_zero() || write.versions_to_last_change != 0 { - Ok((write.last_change_ts, write.versions_to_last_change + 1)) - } else { - // If neither `last_change_ts` nor `versions_to_last_change` exists, it means we - // do not know the last change info, probably because it comes from an older - // version TiKV. To support data from old TiKV, we iterate to the last change to - // find it. + match &write.last_change { + LastChange::Exist { + last_change_ts, + estimated_versions_to_last_change, + } => Ok(LastChange::make_exist( + *last_change_ts, + estimated_versions_to_last_change + 1, + )), + LastChange::NotExist => Ok(LastChange::NotExist), + LastChange::Unknown => { + // We do not know the last change info, probably + // because it comes from an older version TiKV. To support data + // from old TiKV, we iterate to the last change to find it. - // TODO: can we reuse the reader? - let snapshot = original_reader.reader.snapshot().clone(); - let mut reader = SnapshotReader::new(start_ts, snapshot, true); - - // Note that the scan can also utilize `last_change`. So once it finds a LOCK - // version with useful `last_change` pointer, it just needs one more `seek` or - // several `next`s to get to the final result. - let res = reader.get_write_with_commit_ts(key, commit_ts); - let stat = reader.take_statistics(); - original_reader.reader.statistics.add(&stat); - match res? { - // last_change_ts == 0 && versions_to_last_change > 0 means the key does not - // exist. - None => Ok((TimeStamp::zero(), 1)), - Some((w, last_change_ts)) => { - assert!(matches!(w.write_type, WriteType::Put)); - // We don't know how many versions there are. Make `versions_to_last_change` - // big enough so that later reads won't try to `next` to it. - Ok((last_change_ts, SEEK_BOUND + 1)) + // TODO: can we reuse the reader? + let snapshot = original_reader.reader.snapshot().clone(); + let mut reader = SnapshotReader::new(start_ts, snapshot, true); + // Note that the scan can also utilize `last_change`. So once it finds a LOCK + // version with useful `last_change` pointer, it just needs one more `seek` or + // several `next`s to get to the final result. + let res = reader.get_write_with_commit_ts(key, commit_ts); + let stat = reader.take_statistics(); + original_reader.reader.statistics.add(&stat); + match res? { + // last_change_ts == 0 && estimated_versions_to_last_change > 0 means the + // key does not exist. + None => Ok(LastChange::NotExist), + Some((w, last_change_ts)) => { + assert!(matches!(w.write_type, WriteType::Put)); + Ok(LastChange::make_exist( + last_change_ts, + stat.write.next as u64, + )) + } } } } diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index fbaaec8ab43..47edac8b513 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -208,7 +208,7 @@ pub fn commit_flashback_key( flashback_start_ts, lock.short_value.take(), ) - .set_last_change(lock.last_change_ts, lock.versions_to_last_change) + .set_last_change(lock.last_change.clone()) .set_txn_source(lock.txn_source) .as_ref() .to_bytes(), diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 8821f2ec73f..cdae37dcf94 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -10,7 +10,8 @@ use kvproto::kvrpcpb::{ WriteConflictReason, }; use txn_types::{ - is_short_value, Key, Mutation, MutationType, OldValue, TimeStamp, Value, Write, WriteType, + is_short_value, Key, LastChange, Mutation, MutationType, OldValue, TimeStamp, Value, Write, + WriteType, }; use crate::storage::{ @@ -248,8 +249,7 @@ struct PrewriteMutation<'a> { lock_type: Option, lock_ttl: u64, - last_change_ts: TimeStamp, - versions_to_last_change: u64, + last_change: LastChange, should_not_exist: bool, should_not_write: bool, @@ -287,8 +287,7 @@ impl<'a> PrewriteMutation<'a> { lock_type, lock_ttl: txn_props.lock_ttl, - last_change_ts: TimeStamp::zero(), - versions_to_last_change: 0, + last_change: LastChange::default(), should_not_exist, should_not_write, @@ -338,8 +337,7 @@ impl<'a> PrewriteMutation<'a> { return Err(ErrorInner::KeyIsLocked(self.lock_info(lock)?).into()); } - self.last_change_ts = lock.last_change_ts; - self.versions_to_last_change = lock.versions_to_last_change; + self.last_change = lock.last_change.clone(); if lock.is_pessimistic_lock() { // TODO: remove it in future @@ -434,7 +432,7 @@ impl<'a> PrewriteMutation<'a> { self.write_conflict_error(&write, commit_ts, WriteConflictReason::SelfRolledBack)?; } if seek_ts == TimeStamp::max() { - (self.last_change_ts, self.versions_to_last_change) = + self.last_change = next_last_change_info(&self.key, &write, reader.start_ts, reader, commit_ts)?; } match self.txn_props.kind { @@ -499,9 +497,7 @@ impl<'a> PrewriteMutation<'a> { } // If seek_ts is max and it goes here, there is no write record for this key. if seek_ts == TimeStamp::max() { - // last_change_ts == 0 && versions_to_last_change > 0 means the key actually - // does not exist. - (self.last_change_ts, self.versions_to_last_change) = (TimeStamp::zero(), 1); + self.last_change = LastChange::NotExist; } Ok(None) } @@ -536,7 +532,7 @@ impl<'a> PrewriteMutation<'a> { // Only Lock needs to record `last_change_ts` in its write record, Put or Delete // records themselves are effective changes. if tls_can_enable(LAST_CHANGE_TS) && self.lock_type == Some(LockType::Lock) { - lock = lock.set_last_change(self.last_change_ts, self.versions_to_last_change); + lock = lock.set_last_change(self.last_change); } if let Some(value) = self.value { @@ -826,12 +822,10 @@ fn amend_pessimistic_lock( } .into()); } - (mutation.last_change_ts, mutation.versions_to_last_change) = + mutation.last_change = next_last_change_info(&mutation.key, write, reader.start_ts, reader, *commit_ts)?; } else { - // last_change_ts == 0 && versions_to_last_change > 0 means the key actually - // does not exist. - (mutation.last_change_ts, mutation.versions_to_last_change) = (TimeStamp::zero(), 1); + mutation.last_change = LastChange::NotExist; } // Used pipelined pessimistic lock acquiring in this txn but failed // Luckily no other txn modified this lock, amend it by treat it as optimistic @@ -2353,8 +2347,7 @@ pub mod tests { // Latest version does not exist prewrite_func(&mut engine, LockType::Lock, 2); let lock = must_locked(&mut engine, key, 2); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::NotExist); must_rollback(&mut engine, key, 2, false); // Latest change ts should not be enabled on TiKV 6.4 @@ -2372,8 +2365,7 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 10); let lock = must_locked(&mut engine, key, 10); - assert_eq!(lock.last_change_ts, TimeStamp::zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); must_rollback(&mut engine, key, 10, false); let feature_gate = FeatureGate::default(); @@ -2393,8 +2385,7 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Put, 25); let lock = must_locked(&mut engine, key, 25); - assert_eq!(lock.last_change_ts, TimeStamp::zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); must_rollback(&mut engine, key, 25, false); // Latest version is a PUT @@ -2409,8 +2400,7 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 40); let lock = must_locked(&mut engine, key, 40); - assert_eq!(lock.last_change_ts, 35.into()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::make_exist(35.into(), 1)); must_rollback(&mut engine, key, 40, false); // Latest version is a DELETE @@ -2425,14 +2415,13 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 55); let lock = must_locked(&mut engine, key, 55); - assert_eq!(lock.last_change_ts, 50.into()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::make_exist(50.into(), 1)); must_rollback(&mut engine, key, 55, false); // Latest version is a LOCK without last_change_ts. It iterates back to find the // actual last write. In this case it is a DELETE, so it returns - // (last_change_ts == 0 && versions_to_last_change == 1), indicating the key - // does not exist. + // (last_change_ts == 0 && estimated_versions_to_last_change == 1), indicating + // the key does not exist. let write = Write::new(WriteType::Lock, 60.into(), None); engine .put_cf( @@ -2444,8 +2433,7 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 70); let lock = must_locked(&mut engine, key, 70); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::NotExist); must_rollback(&mut engine, key, 70, false); // Latest version is a ROLLBACK without last_change_ts. Iterate back to find the @@ -2461,12 +2449,12 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 85); let lock = must_locked(&mut engine, key, 85); - assert!(lock.last_change_ts.is_zero()); - assert_eq!(lock.versions_to_last_change, 1); + assert_eq!(lock.last_change, LastChange::NotExist); must_rollback(&mut engine, key, 85, false); // Latest version is a LOCK with last_change_ts - let write = Write::new(WriteType::Lock, 90.into(), None).set_last_change(20.into(), 6); + let write = Write::new(WriteType::Lock, 90.into(), None) + .set_last_change(LastChange::make_exist(20.into(), 6)); engine .put_cf( Default::default(), @@ -2477,12 +2465,12 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 100); let lock = must_locked(&mut engine, key, 100); - assert_eq!(lock.last_change_ts, 20.into()); - assert_eq!(lock.versions_to_last_change, 7); + assert_eq!(lock.last_change, LastChange::make_exist(20.into(), 7)); must_rollback(&mut engine, key, 100, false); // Latest version is a LOCK with last_change_ts - let write = Write::new(WriteType::Lock, 105.into(), None).set_last_change(20.into(), 8); + let write = Write::new(WriteType::Lock, 105.into(), None) + .set_last_change(LastChange::make_exist(20.into(), 8)); engine .put_cf( Default::default(), @@ -2493,8 +2481,7 @@ pub mod tests { .unwrap(); prewrite_func(&mut engine, LockType::Lock, 120); let lock = must_locked(&mut engine, key, 120); - assert_eq!(lock.last_change_ts, 20.into()); - assert_eq!(lock.versions_to_last_change, 9); + assert_eq!(lock.last_change, LastChange::make_exist(20.into(), 9)); must_rollback(&mut engine, key, 120, false); } @@ -2546,60 +2533,61 @@ pub mod tests { let mut engine = crate::storage::TestEngineBuilder::new().build().unwrap(); let key = b"k"; - let put_lock = - |engine: &mut RocksEngine, ts: u64, last_change_ts: u64, versions_to_last_change| { - let lock = Lock::new( - LockType::Pessimistic, - key.to_vec(), - ts.into(), - 100, - None, - ts.into(), - 5, - ts.into(), - false, + let put_lock = |engine: &mut RocksEngine, + ts: u64, + last_change_ts: u64, + estimated_versions_to_last_change| { + let lock = Lock::new( + LockType::Pessimistic, + key.to_vec(), + ts.into(), + 100, + None, + ts.into(), + 5, + ts.into(), + false, + ) + .set_last_change(LastChange::from_parts( + last_change_ts.into(), + estimated_versions_to_last_change, + )); + engine + .put_cf( + Default::default(), + CF_LOCK, + Key::from_raw(key), + lock.to_bytes(), ) - .set_last_change(last_change_ts.into(), versions_to_last_change); - engine - .put_cf( - Default::default(), - CF_LOCK, - Key::from_raw(key), - lock.to_bytes(), - ) - .unwrap(); - }; + .unwrap(); + }; // Prewrite LOCK from pessimistic lock without `last_change_ts` put_lock(&mut engine, 10, 0, 0); must_pessimistic_prewrite_lock(&mut engine, key, key, 10, 10, DoPessimisticCheck); let lock = must_locked(&mut engine, key, 10); - assert_eq!(lock.last_change_ts, TimeStamp::zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); must_rollback(&mut engine, key, 10, false); // Prewrite LOCK from pessimistic lock with `last_change_ts` put_lock(&mut engine, 20, 15, 3); must_pessimistic_prewrite_lock(&mut engine, key, key, 20, 20, DoPessimisticCheck); let lock = must_locked(&mut engine, key, 20); - assert_eq!(lock.last_change_ts, 15.into()); - assert_eq!(lock.versions_to_last_change, 3); + assert_eq!(lock.last_change, LastChange::make_exist(15.into(), 3)); must_rollback(&mut engine, key, 20, false); // Prewrite PUT from pessimistic lock with `last_change_ts` put_lock(&mut engine, 30, 15, 5); must_pessimistic_prewrite_put(&mut engine, key, b"value", key, 30, 30, DoPessimisticCheck); let lock = must_locked(&mut engine, key, 30); - assert_eq!(lock.last_change_ts, TimeStamp::zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); must_rollback(&mut engine, key, 30, false); // Prewrite DELETE from pessimistic lock with `last_change_ts` put_lock(&mut engine, 40, 15, 5); must_pessimistic_prewrite_delete(&mut engine, key, key, 40, 30, DoPessimisticCheck); let lock = must_locked(&mut engine, key, 40); - assert_eq!(lock.last_change_ts, TimeStamp::zero()); - assert_eq!(lock.versions_to_last_change, 0); + assert_eq!(lock.last_change, LastChange::Unknown); must_rollback(&mut engine, key, 40, false); } diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index d8172a60091..dc99ebf3b01 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -154,7 +154,7 @@ pub mod tests { use concurrency_manager::ConcurrencyManager; use kvproto::kvrpcpb::{self, Context, LockInfo, PrewriteRequestPessimisticAction::*}; use tikv_util::deadline::Deadline; - use txn_types::{Key, WriteType}; + use txn_types::{Key, LastChange, WriteType}; use super::{TxnStatus::*, *}; use crate::storage::{ @@ -1213,8 +1213,7 @@ pub mod tests { must_commit(&mut engine, k, 7, 8); let rollback = must_written(&mut engine, k, 10, 10, WriteType::Rollback); - assert!(rollback.last_change_ts.is_zero()); - assert_eq!(rollback.versions_to_last_change, 0); + assert_eq!(rollback.last_change, LastChange::Unknown); } #[test] diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index feaa641300f..10446db6292 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -951,7 +951,7 @@ fn handle_1pc_locks(txn: &mut MvccTxn, commit_ts: TimeStamp) -> ReleasedLocks { txn.start_ts, lock.short_value, ) - .set_last_change(lock.last_change_ts, lock.versions_to_last_change) + .set_last_change(lock.last_change) .set_txn_source(lock.txn_source); // Transactions committed with 1PC should be impossible to overwrite rollback // records. @@ -978,7 +978,7 @@ mod tests { use engine_rocks::ReadPerfInstant; use engine_traits::CF_WRITE; use kvproto::kvrpcpb::{Assertion, Context, ExtraOp}; - use txn_types::{Key, Mutation, TimeStamp}; + use txn_types::{Key, LastChange, Mutation, TimeStamp}; use super::*; use crate::storage::{ @@ -2682,8 +2682,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 30, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(write.last_change_ts, 20.into()); - assert_eq!(write.versions_to_last_change, 1); + assert_eq!(write.last_change, LastChange::make_exist(20.into(), 1)); // 1PC write another LOCK let res = prewrite_with_cm( @@ -2698,8 +2697,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 50, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(write.last_change_ts, 20.into()); - assert_eq!(write.versions_to_last_change, 2); + assert_eq!(write.last_change, LastChange::make_exist(20.into(), 2)); // 1PC write a PUT let mutations = vec![Mutation::make_put(Key::from_raw(key), b"v2".to_vec())]; @@ -2715,8 +2713,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 70, res.one_pc_commit_ts, WriteType::Put); - assert_eq!(write.last_change_ts, TimeStamp::zero()); - assert_eq!(write.versions_to_last_change, 0); + assert_eq!(write.last_change, LastChange::Unknown); // TiKV 6.4 should not have last_change_ts. let feature_gate = FeatureGate::default(); @@ -2735,8 +2732,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 80, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(write.last_change_ts, TimeStamp::zero()); - assert_eq!(write.versions_to_last_change, 0); + assert_eq!(write.last_change, LastChange::Unknown); } #[test] @@ -2770,8 +2766,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 30, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(write.last_change_ts, 20.into()); - assert_eq!(write.versions_to_last_change, 1); + assert_eq!(write.last_change, LastChange::make_exist(20.into(), 1)); // Pessimistic 1PC write another LOCK must_acquire_pessimistic_lock(&mut engine, key, key, 50, 50); @@ -2788,8 +2783,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 50, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(write.last_change_ts, 20.into()); - assert_eq!(write.versions_to_last_change, 2); + assert_eq!(write.last_change, LastChange::make_exist(20.into(), 2)); // Pessimistic 1PC write a PUT must_acquire_pessimistic_lock(&mut engine, key, key, 70, 70); @@ -2810,8 +2804,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 70, res.one_pc_commit_ts, WriteType::Put); - assert_eq!(write.last_change_ts, TimeStamp::zero()); - assert_eq!(write.versions_to_last_change, 0); + assert_eq!(write.last_change, LastChange::Unknown); // TiKV 6.4 should not have last_change_ts. let feature_gate = FeatureGate::default(); @@ -2832,8 +2825,7 @@ mod tests { .unwrap(); must_unlocked(&mut engine, key); let write = must_written(&mut engine, key, 80, res.one_pc_commit_ts, WriteType::Lock); - assert_eq!(write.last_change_ts, TimeStamp::zero()); - assert_eq!(write.versions_to_last_change, 0); + assert_eq!(write.last_change, LastChange::Unknown); } #[test] diff --git a/src/storage/txn/store.rs b/src/storage/txn/store.rs index 1b4a7d5624c..800571a7e59 100644 --- a/src/storage/txn/store.rs +++ b/src/storage/txn/store.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use kvproto::kvrpcpb::IsolationLevel; -use txn_types::{Key, KvPair, Lock, OldValue, TimeStamp, TsSet, Value, WriteRef}; +use txn_types::{Key, KvPair, LastChange, Lock, OldValue, TimeStamp, TsSet, Value, WriteRef}; use super::{Error, ErrorInner, Result}; use crate::storage::{ @@ -167,14 +167,13 @@ impl TxnEntry { lock: (_, value), .. } => { let l = Lock::parse(value).unwrap(); - *value = l.set_last_change(TimeStamp::zero(), 0).to_bytes(); + *value = l.set_last_change(LastChange::Unknown).to_bytes(); } TxnEntry::Commit { write: (_, value), .. } => { let mut w = WriteRef::parse(value).unwrap(); - w.last_change_ts = TimeStamp::zero(); - w.versions_to_last_change = 0; + w.last_change = LastChange::Unknown; *value = w.to_bytes(); } } diff --git a/src/storage/types.rs b/src/storage/types.rs index 62e614587c8..065ccfc9dfa 100644 --- a/src/storage/types.rs +++ b/src/storage/types.rs @@ -5,7 +5,7 @@ use std::fmt::Debug; use kvproto::kvrpcpb; -use txn_types::{Key, Value}; +use txn_types::{Key, LastChange, Value}; use crate::storage::{ errors::SharedError, @@ -53,9 +53,13 @@ impl MvccInfo { write_info.set_start_ts(write.start_ts.into_inner()); write_info.set_commit_ts(commit_ts.into_inner()); write_info.set_short_value(write.short_value.unwrap_or_default()); - if !write.last_change_ts.is_zero() { - write_info.set_last_change_ts(write.last_change_ts.into_inner()); - write_info.set_versions_to_last_change(write.versions_to_last_change); + if !matches!( + write.last_change, + LastChange::NotExist | LastChange::Exist { .. } + ) { + let (last_change_ts, versions) = write.last_change.to_parts(); + write_info.set_last_change_ts(last_change_ts.into_inner()); + write_info.set_versions_to_last_change(versions); } write_info }) @@ -75,9 +79,13 @@ impl MvccInfo { lock_info.set_start_ts(lock.ts.into_inner()); lock_info.set_primary(lock.primary); lock_info.set_short_value(lock.short_value.unwrap_or_default()); - if !lock.last_change_ts.is_zero() { - lock_info.set_last_change_ts(lock.last_change_ts.into_inner()); - lock_info.set_versions_to_last_change(lock.versions_to_last_change); + if matches!( + lock.last_change, + LastChange::NotExist | LastChange::Exist { .. } + ) { + let (last_change_ts, versions) = lock.last_change.to_parts(); + lock_info.set_last_change_ts(last_change_ts.into_inner()); + lock_info.set_versions_to_last_change(versions); } mvcc_info.set_lock(lock_info); } diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 6e588099bd9..3cc72d44da1 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -22,7 +22,7 @@ use raftstore::store::*; use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, time::Instant, HandyRwLock}; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is rollback as expected. #[test] @@ -1346,8 +1346,7 @@ fn test_merge_with_concurrent_pessimistic_locking() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 15.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(15.into(), 3), is_locked_with_conflict: false, }, )]) @@ -1436,8 +1435,7 @@ fn test_merge_pessimistic_locks_with_concurrent_prewrite() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 15.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(15.into(), 3), is_locked_with_conflict: false, }; txn_ext @@ -1518,8 +1516,7 @@ fn test_retry_pending_prepare_merge_fail() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 15.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(15.into(), 3), is_locked_with_conflict: false, }; txn_ext @@ -1595,8 +1592,7 @@ fn test_merge_pessimistic_locks_propose_fail() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 15.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(15.into(), 3), is_locked_with_conflict: false, }; txn_ext diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 12da88a89a4..e5c1828a2dd 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -39,7 +39,7 @@ use tikv_util::{ time::Instant, HandyRwLock, }; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; #[test] fn test_follower_slow_split() { @@ -950,8 +950,7 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { ttl: 3000, for_update_ts: (commit_ts + 10).into(), min_commit_ts: (commit_ts + 10).into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; let lock_c = PessimisticLock { @@ -960,8 +959,7 @@ fn test_split_pessimistic_locks_with_concurrent_prewrite() { ttl: 3000, for_update_ts: (commit_ts + 10).into(), min_commit_ts: (commit_ts + 10).into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; { diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 5ad15fa3202..4e3ee2f298e 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -45,7 +45,7 @@ use tikv_util::{ store::{new_peer, peer::new_incoming_voter}, HandyRwLock, }; -use txn_types::{Key, Mutation, PessimisticLock, TimeStamp}; +use txn_types::{Key, LastChange, Mutation, PessimisticLock, TimeStamp}; #[test] fn test_txn_failpoints() { @@ -573,8 +573,7 @@ fn test_concurrent_write_after_transfer_leader_invalidates_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; txn_ext diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index a1bf2f8a096..b6d46ce5dd4 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -24,7 +24,7 @@ use tikv_util::{ config::{ReadableDuration, ReadableSize}, HandyRwLock, }; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; /// When a follower applies log slowly, leader should not transfer leader /// to it. Otherwise, new leader may wait a long time to serve read/write @@ -128,8 +128,7 @@ macro_rules! test_delete_lock_proposed_after_proposing_locks_impl { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }, )]) @@ -225,8 +224,7 @@ fn test_delete_lock_proposed_before_proposing_locks() { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 20.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }, )]) @@ -311,8 +309,7 @@ fn test_read_lock_after_become_follower() { ttl: 1000, for_update_ts, min_commit_ts: for_update_ts, - last_change_ts: start_ts.prev(), - versions_to_last_change: 1, + last_change: LastChange::make_exist(start_ts.prev(), 1), is_locked_with_conflict: false, }, )]) diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 0e996d1726e..515691000d2 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -17,7 +17,7 @@ use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::kv::SnapContext; -use txn_types::{Key, PessimisticLock, WriteBatchFlags}; +use txn_types::{Key, LastChange, PessimisticLock, WriteBatchFlags}; const TEST_KEY: &[u8] = b"k1"; const TEST_VALUE: &[u8] = b"v1"; @@ -46,8 +46,7 @@ fn test_flashback_with_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }, )]) diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 3b2c0f04c40..16fb78ae5bc 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -16,7 +16,7 @@ use test_raftstore_macro::test_case; use test_raftstore_v2::Simulator as _; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is working as expected in a general condition. #[test_case(test_raftstore::new_node_cluster)] @@ -1354,8 +1354,7 @@ fn test_propose_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; txn_ext @@ -1373,8 +1372,7 @@ fn test_propose_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; txn_ext @@ -1485,8 +1483,7 @@ fn test_merge_pessimistic_locks_repeated_merge() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; txn_ext diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index 1c3a016e0e0..8093a30872d 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -14,7 +14,7 @@ use rand::{Rng, RngCore}; use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; fn test_multi_base(cluster: &mut Cluster) { cluster.run(); @@ -833,8 +833,7 @@ fn test_leader_drop_with_pessimistic_lock() { ttl: 1000, for_update_ts: 10.into(), min_commit_ts: 10.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }, )]) diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 89558f16319..1310ca04a96 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -24,7 +24,7 @@ use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::config::*; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; pub const REGION_MAX_SIZE: u64 = 50000; pub const REGION_SPLIT_SIZE: u64 = 30000; @@ -914,8 +914,7 @@ fn test_split_with_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; let lock_c = PessimisticLock { @@ -924,8 +923,7 @@ fn test_split_with_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; { diff --git a/tests/integrations/raftstore/test_transfer_leader.rs b/tests/integrations/raftstore/test_transfer_leader.rs index 1888e548ff8..6f251d1cf8b 100644 --- a/tests/integrations/raftstore/test_transfer_leader.rs +++ b/tests/integrations/raftstore/test_transfer_leader.rs @@ -10,7 +10,7 @@ use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::Snapshot; use tikv_util::config::*; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock}; #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] @@ -271,8 +271,7 @@ fn test_propose_in_memory_pessimistic_locks() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; // Write a pessimistic lock to the in-memory pessimistic lock table. @@ -315,8 +314,7 @@ fn test_memory_pessimistic_locks_status_after_transfer_leader_failure() { ttl: 3000, for_update_ts: 20.into(), min_commit_ts: 30.into(), - last_change_ts: 5.into(), - versions_to_last_change: 3, + last_change: LastChange::make_exist(5.into(), 3), is_locked_with_conflict: false, }; // Write a pessimistic lock to the in-memory pessimistic lock table. From 580303bce947c172782c5dda78aa3afa32034c1a Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Tue, 13 Jun 2023 17:37:07 +0800 Subject: [PATCH 0735/1149] cloud: update azure sdk for br (#14852) close tikv/tikv#14851 Signed-off-by: Leavrth Signed-off-by: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 - components/cloud/azure/Cargo.toml | 2 - components/cloud/azure/src/azblob.rs | 121 +++++++++++++-------------- 3 files changed, 58 insertions(+), 67 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c609d6c2fb..bfef82e98d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,10 +369,8 @@ dependencies = [ "futures 0.3.15", "futures-util", "kvproto", - "lazy_static", "oauth2", "openssl", - "regex", "serde", "serde_json", "slog", diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index b74a44446d2..04f00c4bb60 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -20,10 +20,8 @@ fail = "0.5" futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { workspace = true } -lazy_static = "1.4.0" oauth2 = { version = "4.0.0", default-features = false } openssl = { version = "0.10.50" } -regex = "1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" slog = { workspace = true } diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 7f7483a3e8a..dbc52a898b0 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -1,6 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ env, io, + ops::Deref, str::FromStr, sync::{Arc, RwLock}, }; @@ -24,9 +25,7 @@ use futures_util::{ TryStreamExt, }; pub use kvproto::brpb::{AzureBlobStorage as InputConfig, Bucket as InputBucket, CloudDynamic}; -use lazy_static::lazy_static; use oauth2::{ClientId, ClientSecret}; -use regex::Regex; use tikv_util::{ debug, stream::{retry, RetryError}, @@ -60,9 +59,11 @@ pub struct Config { account_name: Option, shared_key: Option, + sas_token: Option, credential_info: Option, env_account_name: Option, env_shared_key: Option, + encryption_scope: Option, } impl std::fmt::Debug for Config { @@ -71,9 +72,11 @@ impl std::fmt::Debug for Config { .field("bucket", &self.bucket) .field("account_name", &self.account_name) .field("shared_key", &"?") + .field("sas_token", &"?") .field("credential_info", &self.credential_info) .field("env_account_name", &self.env_account_name) .field("env_shared_key", &"?") + .field("encryption_scope", &self.encryption_scope) .finish() } } @@ -85,9 +88,11 @@ impl Config { bucket, account_name: None, shared_key: None, + sas_token: None, credential_info: Self::load_credential_info(), env_account_name: Self::load_env_account_name(), env_shared_key: Self::load_env_shared_key(), + encryption_scope: None, } } @@ -129,9 +134,13 @@ impl Config { bucket, account_name: StringNonEmpty::opt(attrs.get("account_name").unwrap_or(def).clone()), shared_key: StringNonEmpty::opt(attrs.get("shared_key").unwrap_or(def).clone()), + sas_token: StringNonEmpty::opt(attrs.get("sas_token").unwrap_or(def).clone()), credential_info: Self::load_credential_info(), env_account_name: Self::load_env_account_name(), env_shared_key: Self::load_env_shared_key(), + encryption_scope: StringNonEmpty::opt( + attrs.get("encryption_scope").unwrap_or(def).clone(), + ), }) } @@ -148,9 +157,11 @@ impl Config { bucket, account_name: StringNonEmpty::opt(input.account_name), shared_key: StringNonEmpty::opt(input.shared_key), + sas_token: StringNonEmpty::opt(input.access_sig), credential_info: Self::load_credential_info(), env_account_name: Self::load_env_account_name(), env_shared_key: Self::load_env_shared_key(), + encryption_scope: StringNonEmpty::opt(input.encryption_scope), }) } @@ -225,7 +236,6 @@ impl BlobConfig for Config { enum RequestError { InvalidInput(Box, String), - InternalError(String), TimeOut(String), } @@ -233,9 +243,8 @@ impl From for io::Error { fn from(err: RequestError) -> Self { match err { RequestError::InvalidInput(e, tag) => { - Self::new(io::ErrorKind::InvalidInput, format!("{}: {}", tag, &e)) + Self::new(io::ErrorKind::InvalidInput, format!("{}: {:?}", tag, &e)) } - RequestError::InternalError(msg) => Self::new(io::ErrorKind::Other, msg), RequestError::TimeOut(msg) => Self::new(io::ErrorKind::TimedOut, msg), } } @@ -243,21 +252,10 @@ impl From for io::Error { impl RetryError for RequestError { fn is_retryable(&self) -> bool { - matches!(self, Self::TimeOut(_) | Self::InternalError(_)) + matches!(self, Self::TimeOut(_)) } } -fn err_is_retryable(err_info: &str) -> bool { - // HTTP Code 503: The server is busy - // HTTP Code 500: Operation could not be completed within the specified time. - // More details seen in https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes - lazy_static! { - static ref RE: Regex = Regex::new(r"status: 5[0-9][0-9],").unwrap(); - } - - RE.is_match(err_info) -} - const CONNECTION_TIMEOUT: Duration = Duration::from_secs(900); /// A helper for uploading a large file to Azure storage. @@ -265,7 +263,8 @@ struct AzureUploader { client_builder: Arc, name: String, - storage_class: AccessTier, + storage_class: Option, + encryption_scope: Option, } impl AzureUploader { @@ -279,11 +278,12 @@ impl AzureUploader { storage_class: Self::parse_storage_class(none_to_empty( config.bucket.storage_class.clone(), )), + encryption_scope: config.encryption_scope.clone(), } } - fn parse_storage_class(storage_class: String) -> AccessTier { - AccessTier::from_str(storage_class.as_str()).unwrap_or(AccessTier::Hot) + fn parse_storage_class(storage_class: String) -> Option { + AccessTier::from_str(storage_class.as_str()).ok() } /// Executes the upload process. @@ -305,34 +305,33 @@ impl AzureUploader { /// relatively cheap to retry the entire upload. async fn upload(&self, data: &[u8]) -> Result<(), RequestError> { match timeout(Self::get_timeout(), async { - self.client_builder + let builder = self + .client_builder .get_client() .await .map_err(|e| e.to_string())? .blob_client(&self.name) - .put_block_blob(data.to_vec()) - .access_tier(self.storage_class) - .await?; + .put_block_blob(data.to_vec()); + + // the encryption scope and the access tier can not be both in the HTTP headers + let builder = if let Some(scope) = &self.encryption_scope { + builder.encryption_scope(scope.deref().clone()) + } else if let Some(tier) = self.storage_class { + builder.access_tier(tier) + } else { + builder + }; + builder.await?; Ok(()) }) .await { Ok(res) => match res { Ok(_) => Ok(()), - Err(err) => { - let err_info = ToString::to_string(&err); - if err_is_retryable(&err_info) { - Err(RequestError::InternalError(format!( - "internal error: {}, retry later", - err_info - ))) - } else { - Err(RequestError::InvalidInput( - err, - "upload block failed".to_owned(), - )) - } - } + Err(err) => Err(RequestError::InvalidInput( + err, + "upload block failed".to_owned(), + )), }, Err(_) => Err(RequestError::TimeOut( "timeout after 15mins for complete in azure storage".to_owned(), @@ -516,10 +515,28 @@ impl AzureStorage { } pub fn new(config: Config) -> io::Result { + let account_name = config.get_account_name()?; let bucket = (*config.bucket.bucket).to_owned(); - // priority: explicit shared key > env Azure AD > env shared key - if let Some(connection_string) = config.parse_plaintext_account_url() { - let account_name = config.get_account_name()?; + // priority: + // explicit sas token > explicit shared key > env Azure AD > env shared key + if let Some(sas_token) = config.sas_token.as_ref() { + let token = sas_token.deref(); + let storage_credentials = StorageCredentials::sas_token(token).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidInput, + format!("invalid configurations for SAS token, err: {}", e), + ) + })?; + let container_client = Arc::new( + BlobServiceClient::new(account_name, storage_credentials).container_client(bucket), + ); + + let client_builder = Arc::new(SharedKeyContainerBuilder { container_client }); + Ok(AzureStorage { + config, + client_builder, + }) + } else if let Some(connection_string) = config.parse_plaintext_account_url() { let storage_credentials = ConnectionString::new(&connection_string) .map_err(|e| { io::Error::new( @@ -544,7 +561,6 @@ impl AzureStorage { client_builder, }) } else if let Some(credential_info) = config.credential_info.as_ref() { - let account_name = config.get_account_name()?; let token_resource = format!("https://{}.blob.core.windows.net", &account_name); let cred = ClientSecretCredential::new( new_http_client(), @@ -566,7 +582,6 @@ impl AzureStorage { client_builder, }) } else if let Some(connection_string) = config.parse_env_plaintext_account_url() { - let account_name = config.get_account_name()?; let storage_credentials = ConnectionString::new(&connection_string) .map_err(|e| { io::Error::new( @@ -821,24 +836,4 @@ mod tests { cd.set_bucket(bucket); cd } - - #[tokio::test] - async fn test_error_retryable() { - let err_info = "HTTP error status (status: 503,... The server is busy."; - assert!(err_is_retryable(err_info)); - let err_info = "HTTP error status (status: 500,... Operation could not be completed within the specified time."; - assert!(err_is_retryable(err_info)); - let err_info = - "HTTP error status (status: 409,... The blob type is invalid for this operation."; - assert!(!err_is_retryable(err_info)); - let err_info = "HTTP error status (status: 50,... "; - assert!(!err_is_retryable(err_info)); - let err = "NaN".parse::().unwrap_err(); - let err1 = RequestError::InvalidInput(Box::new(err), "invalid-input".to_owned()); - let err2 = RequestError::InternalError("internal-error".to_owned()); - let err3 = RequestError::TimeOut("time-out".to_owned()); - assert!(!err1.is_retryable()); - assert!(err2.is_retryable()); - assert!(err3.is_retryable()); - } } From 8ccb0042a9b67b5353e522b7141ce5f907250aaa Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 14 Jun 2023 13:19:07 +0800 Subject: [PATCH 0736/1149] raftstore-v2: support check leader lease tick (#14936) ref tikv/tikv#14876 raftstore-v2: support check leader lease tick Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 2 +- .../raftstore-v2/src/operation/query/lease.rs | 78 +++++++++++++++++-- .../raftstore-v2/src/operation/query/mod.rs | 4 +- .../src/operation/query/replica.rs | 4 +- .../raftstore-v2/src/operation/ready/mod.rs | 1 + components/raftstore/src/store/peer.rs | 2 +- .../integrations/raftstore/test_lease_read.rs | 4 +- 7 files changed, 79 insertions(+), 16 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index ba749b71b7d..bf930f6d80a 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -223,7 +223,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerTick::CheckMerge => self.fsm.peer_mut().on_check_merge(self.store_ctx), PeerTick::CheckPeerStaleState => unimplemented!(), PeerTick::EntryCacheEvict => self.on_entry_cache_evict(), - PeerTick::CheckLeaderLease => unimplemented!(), + PeerTick::CheckLeaderLease => self.on_check_leader_lease_tick(), PeerTick::ReactivateMemoryLock => { self.fsm.peer.on_reactivate_memory_lock_tick(self.store_ctx) } diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 2c8b49ae172..1f470eb9c4e 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -10,10 +10,14 @@ use raft::{ }; use raftstore::{ store::{ - can_amend_read, fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, - msg::ReadCallback, propose_read_index, should_renew_lease, - simple_write::SimpleWriteEncoder, util::LeaseState, ReadDelegate, ReadIndexRequest, - ReadProgress, Transport, + can_amend_read, + fsm::{apply::notify_stale_req, new_read_index_request}, + metrics::RAFT_READ_INDEX_PENDING_COUNT, + msg::ReadCallback, + propose_read_index, should_renew_lease, + simple_write::SimpleWriteEncoder, + util::LeaseState, + ReadDelegate, ReadIndexRequest, ReadProgress, Transport, }, Error, Result, }; @@ -24,9 +28,9 @@ use tracker::GLOBAL_TRACKERS; use crate::{ batch::StoreContext, - fsm::StoreMeta, + fsm::{PeerFsmDelegate, StoreMeta}, raft::Peer, - router::{CmdResChannel, QueryResChannel, QueryResult, ReadResponse}, + router::{CmdResChannel, PeerTick, QueryResChannel, QueryResult, ReadResponse}, }; impl Peer { @@ -96,7 +100,7 @@ impl Peer { Ok(()) } - pub(crate) fn read_index_leader( + pub(crate) fn read_index_leader( &mut self, ctx: &mut StoreContext, mut req: RaftCmdRequest, @@ -291,4 +295,64 @@ impl Peer { } state } + + // If lease expired, we will send a noop read index to renew lease. + fn try_renew_leader_lease(&mut self, ctx: &mut StoreContext) { + debug!(self.logger, + "renew lease"; + "region_id" => self.region_id(), + "peer_id" => self.peer_id(), + ); + + let current_time = *ctx.current_time.get_or_insert_with(monotonic_raw_now); + if self.need_renew_lease_at(ctx, current_time) { + let mut cmd = new_read_index_request( + self.region_id(), + self.region().get_region_epoch().clone(), + self.peer().clone(), + ); + cmd.mut_header().set_read_quorum(true); + let (ch, _) = QueryResChannel::pair(); + self.read_index(ctx, cmd, ch); + } + } + + fn need_renew_lease_at( + &self, + ctx: &mut StoreContext, + current_time: Timespec, + ) -> bool { + let renew_bound = match self.leader_lease().need_renew(current_time) { + Some(ts) => ts, + None => return false, + }; + let max_lease = ctx.cfg.raft_store_max_leader_lease(); + let has_overlapped_reads = self.pending_reads().back().map_or(false, |read| { + // If there is any read index whose lease can cover till next heartbeat + // then we don't need to propose a new one + read.propose_time + max_lease > renew_bound + }); + let has_overlapped_writes = self.proposals().back().map_or(false, |proposal| { + // If there is any write whose lease can cover till next heartbeat + // then we don't need to propose a new one + proposal + .propose_time + .map_or(false, |propose_time| propose_time + max_lease > renew_bound) + }); + !has_overlapped_reads && !has_overlapped_writes + } +} + +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { + fn register_check_leader_lease_tick(&mut self) { + self.schedule_tick(PeerTick::CheckLeaderLease) + } + + pub fn on_check_leader_lease_tick(&mut self) { + if !self.fsm.peer_mut().is_leader() { + return; + } + self.fsm.peer_mut().try_renew_leader_lease(self.store_ctx); + self.register_check_leader_lease_tick(); + } } diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 6e130a085dd..e7f795dc230 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -25,7 +25,7 @@ use raftstore::{ store::{ cmd_resp, local_metrics::RaftMetrics, metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::ErrorCallback, region_meta::RegionMeta, util, util::LeaseState, GroupState, - ReadIndexContext, ReadProgress, RequestPolicy, Transport, + ReadIndexContext, ReadProgress, RequestPolicy, }, Error, Result, }; @@ -185,7 +185,7 @@ impl Peer { // 1. The region is in merging or splitting; // 2. The message is stale and dropped by the Raft group internally; // 3. There is already a read request proposed in the current lease; - fn read_index( + fn read_index( &mut self, ctx: &mut StoreContext, req: RaftCmdRequest, diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index b4edbd2097a..1d64250c34b 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -9,7 +9,7 @@ use raftstore::{ fsm::apply::notify_stale_req, metrics::RAFT_READ_INDEX_PENDING_COUNT, msg::{ErrorCallback, ReadCallback}, - propose_read_index, Config, ReadIndexContext, ReadIndexRequest, Transport, + propose_read_index, Config, ReadIndexContext, ReadIndexRequest, }, Error, }; @@ -49,7 +49,7 @@ impl Peer { /// read index on follower /// /// call set_has_ready if it's proposed. - pub(crate) fn read_index_follower( + pub(crate) fn read_index_follower( &mut self, ctx: &mut StoreContext, mut req: RaftCmdRequest, diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index fe519466463..7953d5ea148 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -1010,6 +1010,7 @@ impl Peer { self.add_pending_tick(PeerTick::SplitRegionCheck); self.add_pending_tick(PeerTick::CheckLongUncommitted); self.add_pending_tick(PeerTick::ReportBuckets); + self.add_pending_tick(PeerTick::CheckLeaderLease); self.maybe_schedule_gc_peer_tick(); } StateRole::Follower => { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8e438ef808d..65f8aa0a8a6 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -224,7 +224,7 @@ impl ProposalQueue { } } - fn back(&self) -> Option<&Proposal> { + pub fn back(&self) -> Option<&Proposal> { self.queue.back() } } diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 77794a415b6..46af67a8cad 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -730,9 +730,7 @@ fn test_read_index_after_write() { } #[test_case(test_raftstore::new_node_cluster)] -// #[test_case(test_raftstore_v2::new_node_cluster)] -// TODO: Renew leader lease periodically is not supported in raftstore v2 -// currently. https://github.com/tikv/tikv/issues/14876 +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_infinite_lease() { let mut cluster = new_cluster(0, 3); // Avoid triggering the log compaction in this test case. From 160d3d25340ba6d5eb66d846d82cfe0a4570d0ad Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 15 Jun 2023 00:59:08 +0800 Subject: [PATCH 0737/1149] resource_control: support manage resource group background metadata (#14921) ref tikv/tikv#14900 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/channel.rs | 2 +- components/resource_control/src/lib.rs | 2 + .../resource_control/src/resource_group.rs | 272 ++++++++++-------- .../resource_control/src/resource_limiter.rs | 89 ++++++ components/resource_control/src/service.rs | 25 +- src/storage/mod.rs | 5 +- src/storage/txn/scheduler.rs | 10 +- 7 files changed, 268 insertions(+), 137 deletions(-) create mode 100644 components/resource_control/src/resource_limiter.rs diff --git a/components/resource_control/src/channel.rs b/components/resource_control/src/channel.rs index ccad4aba4bb..eec0accc259 100644 --- a/components/resource_control/src/channel.rs +++ b/components/resource_control/src/channel.rs @@ -193,7 +193,7 @@ mod tests { #[bench] fn bench_channel(b: &mut Bencher) { - let (tx, rx) = unbounded(Some(Arc::new(ResourceController::new( + let (tx, rx) = unbounded(Some(Arc::new(ResourceController::new_for_test( "test".to_owned(), false, )))); diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index b186cb8a0c7..113555f795f 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -21,6 +21,8 @@ pub use service::ResourceManagerService; pub mod channel; pub use channel::ResourceMetered; +mod resource_limiter; + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 0a808811217..ad0627ce0e6 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -11,16 +11,20 @@ use std::{ }; use collections::HashMap; -use dashmap::{mapref::one::Ref, DashMap}; +#[cfg(test)] +use dashmap::mapref::one::Ref; +use dashmap::DashMap; use fail::fail_point; use kvproto::{ kvrpcpb::{CommandPri, ResourceControlContext}, - resource_manager::{GroupMode, ResourceGroup}, + resource_manager::{GroupMode, ResourceGroup as PbResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; use tikv_util::{info, time::Instant}; use yatp::queue::priority::TaskPriorityProvider; +use crate::resource_limiter::ResourceLimiter; + // a read task cost at least 50us. const DEFAULT_PRIORITY_PER_READ_TASK: u64 = 50; // extra task schedule factor @@ -51,14 +55,36 @@ pub enum ResourceConsumeType { } /// ResourceGroupManager manages the metadata of each resource group. -#[derive(Default)] pub struct ResourceGroupManager { resource_groups: DashMap, registry: RwLock>>, } +impl Default for ResourceGroupManager { + fn default() -> Self { + let manager = Self { + resource_groups: Default::default(), + registry: Default::default(), + }; + + // init the default resource group by default. + let mut default_group = PbResourceGroup::new(); + default_group.name = DEFAULT_RESOURCE_GROUP_NAME.into(); + default_group.priority = MEDIUM_PRIORITY; + default_group.mode = GroupMode::RuMode; + default_group + .mut_r_u_settings() + .mut_r_u() + .mut_settings() + .fill_rate = MAX_RU_QUOTA; + manager.add_resource_group(default_group); + + manager + } +} + impl ResourceGroupManager { - fn get_ru_setting(rg: &ResourceGroup, is_read: bool) -> u64 { + fn get_ru_setting(rg: &PbResourceGroup, is_read: bool) -> u64 { match (rg.get_mode(), is_read) { // RU mode, read and write use the same setting. (GroupMode::RuMode, _) => rg @@ -83,14 +109,28 @@ impl ResourceGroupManager { } } - pub fn add_resource_group(&self, rg: ResourceGroup) { + pub fn add_resource_group(&self, rg: PbResourceGroup) { let group_name = rg.get_name().to_ascii_lowercase(); self.registry.read().iter().for_each(|controller| { let ru_quota = Self::get_ru_setting(&rg, controller.is_read); controller.add_resource_group(group_name.clone().into_bytes(), ru_quota, rg.priority); }); info!("add resource group"; "name"=> &rg.name, "ru" => rg.get_r_u_settings().get_r_u().get_settings().get_fill_rate()); - self.resource_groups.insert(group_name, rg); + let limiter = match self.resource_groups.get(&rg.name) { + Some(g) => g.limiter.clone(), + None => Self::build_resource_limiter(&rg), + }; + + self.resource_groups + .insert(group_name, ResourceGroup::new(rg, limiter)); + } + + fn build_resource_limiter(rg: &PbResourceGroup) -> Option> { + // TODO: only the "default" resource group support background tasks currently. + if rg.name == DEFAULT_RESOURCE_GROUP_NAME { + return Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY))); + } + None } pub fn remove_resource_group(&self, name: &str) { @@ -102,10 +142,14 @@ impl ResourceGroupManager { self.resource_groups.remove(&group_name); } - pub fn retain(&self, mut f: impl FnMut(&String, &ResourceGroup) -> bool) { + pub fn retain(&self, mut f: impl FnMut(&String, &PbResourceGroup) -> bool) { let mut removed_names = vec![]; self.resource_groups.retain(|k, v| { - let ret = f(k, v); + // avoid remove default group. + if k == DEFAULT_RESOURCE_GROUP_NAME { + return true; + } + let ret = f(k, &v.group); if !ret { removed_names.push(k.clone()); } @@ -120,20 +164,24 @@ impl ResourceGroupManager { } } - pub fn get_resource_group(&self, name: &str) -> Option> { + #[cfg(test)] + pub(crate) fn get_resource_group(&self, name: &str) -> Option> { self.resource_groups.get(&name.to_ascii_lowercase()) } - pub fn get_all_resource_groups(&self) -> Vec { - self.resource_groups.iter().map(|g| g.clone()).collect() + pub fn get_all_resource_groups(&self) -> Vec { + self.resource_groups + .iter() + .map(|g| g.group.clone()) + .collect() } pub fn derive_controller(&self, name: String, is_read: bool) -> Arc { let controller = Arc::new(ResourceController::new(name, is_read)); self.registry.write().push(controller.clone()); for g in &self.resource_groups { - let ru_quota = Self::get_ru_setting(g.value(), controller.is_read); - controller.add_resource_group(g.key().clone().into_bytes(), ru_quota, g.priority); + let ru_quota = Self::get_ru_setting(&g.value().group, controller.is_read); + controller.add_resource_group(g.key().clone().into_bytes(), ru_quota, g.group.priority); } controller } @@ -164,6 +212,27 @@ impl ResourceGroupManager { } } +pub(crate) struct ResourceGroup { + group: PbResourceGroup, + limiter: Option>, +} + +impl ResourceGroup { + fn new(group: PbResourceGroup, limiter: Option>) -> Self { + Self { group, limiter } + } + + #[cfg(test)] + pub(crate) fn get_ru_quota(&self) -> u64 { + assert!(self.group.has_r_u_settings()); + self.group + .get_r_u_settings() + .get_r_u() + .get_settings() + .get_fill_rate() + } +} + pub struct ResourceController { // resource controller name is not used currently. #[allow(dead_code)] @@ -198,8 +267,8 @@ unsafe impl Send for ResourceController {} unsafe impl Sync for ResourceController {} impl ResourceController { - pub fn new(name: String, is_read: bool) -> Self { - let controller = Self { + fn new(name: String, is_read: bool) -> Self { + Self { name, is_read, resource_consumptions: RwLock::new(HashMap::default()), @@ -207,8 +276,12 @@ impl ResourceController { max_ru_quota: Mutex::new(DEFAULT_MAX_RU_QUOTA), last_rest_vt_time: Cell::new(Instant::now_coarse()), customized: AtomicBool::new(false), - }; - // add the "default" resource group + } + } + + pub fn new_for_test(name: String, is_read: bool) -> Self { + let controller = Self::new(name, is_read); + // add the "default" resource group. controller.add_resource_group( DEFAULT_RESOURCE_GROUP_NAME.as_bytes().to_owned(), 0, @@ -244,7 +317,7 @@ impl ResourceController { let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); // skip to adjust max ru if it is the "default" group and the ru config eq // MAX_RU_QUOTA - if ru_quota > *max_ru_quota && (name != "default".as_bytes() || ru_quota < MAX_RU_QUOTA) { + if ru_quota > *max_ru_quota && (name != b"default" || ru_quota < MAX_RU_QUOTA) { *max_ru_quota = ru_quota; // adjust all group weight because the current value is too small. self.adjust_all_resource_group_factors(ru_quota); @@ -459,7 +532,7 @@ pub(crate) mod tests { use super::*; - pub fn new_resource_group_ru(name: String, ru: u64, group_priority: u32) -> ResourceGroup { + pub fn new_resource_group_ru(name: String, ru: u64, group_priority: u32) -> PbResourceGroup { new_resource_group(name, true, ru, ru, group_priority) } @@ -469,10 +542,10 @@ pub(crate) mod tests { read_tokens: u64, write_tokens: u64, group_priority: u32, - ) -> ResourceGroup { + ) -> PbResourceGroup { use kvproto::resource_manager::{GroupRawResourceSettings, GroupRequestUnitSettings}; - let mut group = ResourceGroup::new(); + let mut group = PbResourceGroup::new(); group.set_name(name); let mode = if is_ru_mode { GroupMode::RuMode @@ -507,54 +580,39 @@ pub(crate) mod tests { #[test] fn test_resource_group() { let resource_manager = ResourceGroupManager::default(); + assert_eq!(resource_manager.resource_groups.len(), 1); let group1 = new_resource_group_ru("TEST".into(), 100, 0); resource_manager.add_resource_group(group1); assert!(resource_manager.get_resource_group("test1").is_none()); let group = resource_manager.get_resource_group("test").unwrap(); - assert_eq!( - group - .value() - .get_r_u_settings() - .get_r_u() - .get_settings() - .get_fill_rate(), - 100 - ); + assert_eq!(group.get_ru_quota(), 100); drop(group); - assert_eq!(resource_manager.resource_groups.len(), 1); + assert_eq!(resource_manager.resource_groups.len(), 2); let group1 = new_resource_group_ru("Test".into(), 200, LOW_PRIORITY); resource_manager.add_resource_group(group1); let group = resource_manager.get_resource_group("test").unwrap(); - assert_eq!( - group - .value() - .get_r_u_settings() - .get_r_u() - .get_settings() - .get_fill_rate(), - 200 - ); - assert_eq!(group.value().get_priority(), 1); + assert_eq!(group.get_ru_quota(), 200); + assert_eq!(group.value().group.get_priority(), 1); drop(group); - assert_eq!(resource_manager.resource_groups.len(), 1); + assert_eq!(resource_manager.resource_groups.len(), 2); let group2 = new_resource_group_ru("test2".into(), 400, 0); resource_manager.add_resource_group(group2); - assert_eq!(resource_manager.resource_groups.len(), 2); + assert_eq!(resource_manager.resource_groups.len(), 3); let resource_ctl = resource_manager.derive_controller("test_read".into(), true); assert_eq!(resource_ctl.resource_consumptions.read().len(), 3); - let group1 = resource_ctl.resource_group("test".as_bytes()); - let group2 = resource_ctl.resource_group("test2".as_bytes()); + let group1 = resource_ctl.resource_group(b"test"); + let group2 = resource_ctl.resource_group(b"test2"); assert_eq!(group1.weight, group2.weight * 2); assert_eq!(group1.current_vt(), 0); let mut extras1 = Extras::single_level(); - extras1.set_metadata("test".as_bytes().to_owned()); + extras1.set_metadata(b"test".to_vec()); assert_eq!( resource_ctl.priority_of(&extras1), concat_priority_vt(LOW_PRIORITY, group1.weight * 50) @@ -562,7 +620,7 @@ pub(crate) mod tests { assert_eq!(group1.current_vt(), group1.weight * 50); let mut extras2 = Extras::single_level(); - extras2.set_metadata("test2".as_bytes().to_owned()); + extras2.set_metadata(b"test2".to_vec()); assert_eq!( resource_ctl.priority_of(&extras2), concat_priority_vt(MEDIUM_PRIORITY, group2.weight * 50) @@ -570,24 +628,19 @@ pub(crate) mod tests { assert_eq!(group2.current_vt(), group2.weight * 50); let mut extras3 = Extras::single_level(); - extras3.set_metadata("unknown_group".as_bytes().to_owned()); + extras3.set_metadata(b"unknown_group".to_vec()); assert_eq!( resource_ctl.priority_of(&extras3), concat_priority_vt(MEDIUM_PRIORITY, 50) ); - assert_eq!( - resource_ctl - .resource_group("default".as_bytes()) - .current_vt(), - 50 - ); + assert_eq!(resource_ctl.resource_group(b"default").current_vt(), 50); resource_ctl.consume( - "test".as_bytes(), + b"test", ResourceConsumeType::CpuTime(Duration::from_micros(10000)), ); resource_ctl.consume( - "test2".as_bytes(), + b"test2", ResourceConsumeType::CpuTime(Duration::from_micros(10000)), ); @@ -600,12 +653,7 @@ pub(crate) mod tests { let group1_weight = group1.weight; assert_eq!(group1_vt, group1.weight * 10050); assert!(group2.current_vt() >= group1.current_vt() * 3 / 4); - assert!( - resource_ctl - .resource_group("default".as_bytes()) - .current_vt() - >= group1.current_vt() / 2 - ); + assert!(resource_ctl.resource_group(b"default").current_vt() >= group1.current_vt() / 2); drop(group1); drop(group2); @@ -618,6 +666,30 @@ pub(crate) mod tests { let group3 = resource_ctl.resource_group("new_group".as_bytes()); assert!(group1_weight - 10 <= group3.weight * 3 && group3.weight * 3 <= group1_weight + 10); assert!(group3.current_vt() >= group1_vt / 2); + drop(group3); + + // test resource gorup resource limiter. + let group1 = resource_manager.get_resource_group("test").unwrap(); + assert!(group1.limiter.is_none()); + let default_group = resource_manager.get_resource_group("default").unwrap(); + let limiter = default_group.limiter.as_ref().unwrap().clone(); + assert!(limiter.cpu_limiter.get_rate_limit().is_infinite()); + assert!(limiter.io_limiter.get_rate_limit().is_infinite()); + limiter.cpu_limiter.set_rate_limit(100.0); + limiter.io_limiter.set_rate_limit(200.0); + drop(group1); + drop(default_group); + + let new_default = new_resource_group_ru("default".into(), 100, LOW_PRIORITY); + resource_manager.add_resource_group(new_default); + + let default_group = resource_manager.get_resource_group("default").unwrap(); + assert_eq!(default_group.get_ru_quota(), 100); + let new_limiter = default_group.limiter.as_ref().unwrap().clone(); + // check rate_limiter is not changed. + assert_eq!(new_limiter.cpu_limiter.get_rate_limit(), 100.0); + assert_eq!(new_limiter.io_limiter.get_rate_limit(), 200.0); + assert_eq!(&*new_limiter as *const _, &*limiter as *const _); } #[test] @@ -630,12 +702,12 @@ pub(crate) mod tests { let group2 = new_resource_group_ru("g2".into(), 1, 16); resource_manager.add_resource_group(group2); - let g1 = resource_ctl.resource_group("g1".as_bytes()); - let g2 = resource_ctl.resource_group("g2".as_bytes()); + let g1 = resource_ctl.resource_group(b"g1"); + let g2 = resource_ctl.resource_group(b"g2"); let threshold = 1 << 59; let mut last_g2_vt = 0; for i in 0..8 { - resource_ctl.consume("g2".as_bytes(), ResourceConsumeType::IoBytes(1 << 25)); + resource_ctl.consume(b"g2", ResourceConsumeType::IoBytes(1 << 25)); resource_manager.advance_min_virtual_time(); if i < 7 { assert!(g2.current_vt() < threshold); @@ -645,7 +717,7 @@ pub(crate) mod tests { last_g2_vt = g2.current_vt(); } - resource_ctl.consume("g2".as_bytes(), ResourceConsumeType::IoBytes(1 << 25)); + resource_ctl.consume(b"g2", ResourceConsumeType::IoBytes(1 << 25)); resource_manager.advance_min_virtual_time(); assert!(g1.current_vt() > threshold); @@ -667,11 +739,8 @@ pub(crate) mod tests { assert_eq!(resource_ctl_write.is_customized(), false); let group1 = new_resource_group_ru("test1".into(), 5000, 0); resource_manager.add_resource_group(group1); - assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 20); - assert_eq!( - resource_ctl_write.resource_group("test1".as_bytes()).weight, - 20 - ); + assert_eq!(resource_ctl.resource_group(b"test1").weight, 20); + assert_eq!(resource_ctl_write.resource_group(b"test1").weight, 20); assert_eq!(resource_ctl.is_customized(), true); assert_eq!(resource_ctl_write.is_customized(), true); @@ -679,47 +748,25 @@ pub(crate) mod tests { let group1 = new_resource_group_ru("test2".into(), 50000, 0); resource_manager.add_resource_group(group1); assert_eq!(*resource_ctl.max_ru_quota.lock().unwrap(), 50000); - assert_eq!(resource_ctl.resource_group("test1".as_bytes()).weight, 100); - assert_eq!(resource_ctl.resource_group("test2".as_bytes()).weight, 10); + assert_eq!(resource_ctl.resource_group(b"test1").weight, 100); + assert_eq!(resource_ctl.resource_group(b"test2").weight, 10); // resource_ctl_write should be unchanged. assert_eq!(*resource_ctl_write.max_ru_quota.lock().unwrap(), 50000); - assert_eq!( - resource_ctl_write.resource_group("test1".as_bytes()).weight, - 100 - ); - assert_eq!( - resource_ctl_write.resource_group("test2".as_bytes()).weight, - 10 - ); + assert_eq!(resource_ctl_write.resource_group(b"test1").weight, 100); + assert_eq!(resource_ctl_write.resource_group(b"test2").weight, 10); // add the default "default" group, the ru weight should not change. // add a resource group with big ru let group = new_resource_group_ru("default".into(), u32::MAX as u64, 0); resource_manager.add_resource_group(group); - assert_eq!( - resource_ctl_write.resource_group("test1".as_bytes()).weight, - 100 - ); - assert_eq!( - resource_ctl_write - .resource_group("default".as_bytes()) - .weight, - 1 - ); + assert_eq!(resource_ctl_write.resource_group(b"test1").weight, 100); + assert_eq!(resource_ctl_write.resource_group(b"default").weight, 1); // change the default group to another value, it can impact the ru then. let group = new_resource_group_ru("default".into(), 100000, 0); resource_manager.add_resource_group(group); - assert_eq!( - resource_ctl_write.resource_group("test1".as_bytes()).weight, - 200 - ); - assert_eq!( - resource_ctl_write - .resource_group("default".as_bytes()) - .weight, - 10 - ); + assert_eq!(resource_ctl_write.resource_group(b"test1").weight, 200); + assert_eq!(resource_ctl_write.resource_group(b"default").weight, 10); } #[test] @@ -742,7 +789,7 @@ pub(crate) mod tests { .increase_vt(RESET_VT_THRESHOLD + delta); } resource_ctl - .resource_group("default".as_bytes()) + .resource_group(b"default") .increase_vt(RESET_VT_THRESHOLD + 1); let old_max_vt = resource_ctl @@ -798,28 +845,19 @@ pub(crate) mod tests { ); resource_ctl_write.consume(b"default", ResourceConsumeType::IoBytes(10000)); - assert_eq!(resource_manager.get_all_resource_groups().len(), 10); - assert_eq!(resource_ctl.resource_consumptions.read().len(), 11); // 10 + 1(default) + // 10 + 1(default) + assert_eq!(resource_manager.get_all_resource_groups().len(), 11); + assert_eq!(resource_ctl.resource_consumptions.read().len(), 11); assert_eq!(resource_ctl_write.resource_consumptions.read().len(), 11); resource_manager.retain(|k, _v| k.starts_with("test")); - assert_eq!(resource_manager.get_all_resource_groups().len(), 5); + assert_eq!(resource_manager.get_all_resource_groups().len(), 6); assert_eq!(resource_ctl.resource_consumptions.read().len(), 6); assert_eq!(resource_ctl_write.resource_consumptions.read().len(), 6); assert!(resource_manager.get_resource_group("group1").is_none()); // should use the virtual time of default group for non-exist group - assert_ne!( - resource_ctl - .resource_group("group2".as_bytes()) - .current_vt(), - 0 - ); - assert_ne!( - resource_ctl_write - .resource_group("group2".as_bytes()) - .current_vt(), - 0 - ); + assert_ne!(resource_ctl.resource_group(b"group2").current_vt(), 0); + assert_ne!(resource_ctl_write.resource_group(b"group2").current_vt(), 0); } #[test] diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs new file mode 100644 index 00000000000..696dc8ded6d --- /dev/null +++ b/components/resource_control/src/resource_limiter.rs @@ -0,0 +1,89 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::Duration, +}; + +use tikv_util::time::Limiter; + +pub struct ResourceLimiter { + #[allow(dead_code)] + pub(crate) cpu_limiter: QuotaLimiter, + #[allow(dead_code)] + pub(crate) io_limiter: QuotaLimiter, +} + +impl ResourceLimiter { + pub fn new(cpu_limit: f64, io_limit: f64) -> Self { + Self { + cpu_limiter: QuotaLimiter::new(cpu_limit), + io_limiter: QuotaLimiter::new(io_limit), + } + } + + #[allow(dead_code)] + pub fn consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { + let cpu_dur = self.cpu_limiter.consume(cpu_time.as_micros() as u64); + let io_dur = self.io_limiter.consume(io_bytes); + cpu_dur.max(io_dur) + } +} + +pub(crate) struct QuotaLimiter { + #[allow(dead_code)] + limiter: Limiter, + // total waiting duration in us + #[allow(dead_code)] + total_wait_dur_us: AtomicU64, +} + +impl QuotaLimiter { + fn new(limit: f64) -> Self { + Self { + limiter: Limiter::new(limit), + total_wait_dur_us: AtomicU64::new(0), + } + } + + #[cfg(test)] + pub(crate) fn get_rate_limit(&self) -> f64 { + self.limiter.speed_limit() + } + + #[cfg(test)] + pub(crate) fn set_rate_limit(&self, mut limit: f64) { + // treat 0 as infinity. + if limit <= f64::EPSILON { + limit = f64::INFINITY; + } + self.limiter.set_speed_limit(limit); + } + + #[allow(dead_code)] + fn get_statistics(&self) -> GroupStatistics { + GroupStatistics { + total_consumed: self.limiter.total_bytes_consumed() as u64, + total_wait_dur_us: self.total_wait_dur_us.load(Ordering::Relaxed), + } + } + + #[allow(dead_code)] + fn consume(&self, value: u64) -> Duration { + if value == 0 { + return Duration::ZERO; + } + let dur = self.limiter.consume_duration(value as usize); + if dur != Duration::ZERO { + self.total_wait_dur_us + .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); + } + dur + } +} + +#[derive(Default, Clone, Copy, Debug)] +pub struct GroupStatistics { + pub total_consumed: u64, + pub total_wait_dur_us: u64, +} diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 82c01eae398..929bb48525b 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -199,15 +199,16 @@ pub mod tests { let resource_manager = ResourceGroupManager::default(); let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); + assert_eq!(s.manager.get_all_resource_groups().len(), 1); let group = new_resource_group("TEST".into(), true, 100, 100, 0); add_resource_group(s.pd_client.clone(), group); block_on(s.reload_all_resource_groups()); - assert_eq!(s.manager.get_all_resource_groups().len(), 1); + assert_eq!(s.manager.get_all_resource_groups().len(), 2); assert_eq!(s.revision, 1); delete_resource_group(s.pd_client.clone(), "TEST"); block_on(s.reload_all_resource_groups()); - assert_eq!(s.manager.get_all_resource_groups().len(), 0); + assert_eq!(s.manager.get_all_resource_groups().len(), 1); assert_eq!(s.revision, 2); server.stop(); @@ -220,7 +221,7 @@ pub mod tests { let mut s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); block_on(s.reload_all_resource_groups()); - assert_eq!(s.manager.get_all_resource_groups().len(), 0); + assert_eq!(s.manager.get_all_resource_groups().len(), 1); assert_eq!(s.revision, 0); // TODO: find a better way to observe the watch is ready. @@ -251,26 +252,18 @@ pub mod tests { // Mock modify let group2 = new_resource_group_ru("TEST2".into(), 50, 0); add_resource_group(s.pd_client.clone(), group2); - wait_watch_ready(&s, 2); + wait_watch_ready(&s, 3); // Mock delete delete_resource_group(s.pd_client.clone(), "TEST1"); // Wait for watcher - wait_watch_ready(&s, 1); + wait_watch_ready(&s, 2); let groups = s.manager.get_all_resource_groups(); - assert_eq!(groups.len(), 1); + assert_eq!(groups.len(), 2); assert!(s.manager.get_resource_group("TEST1").is_none()); let group = s.manager.get_resource_group("TEST2").unwrap(); - assert_eq!( - group - .value() - .get_r_u_settings() - .get_r_u() - .get_settings() - .get_fill_rate(), - 50 - ); + assert_eq!(group.get_ru_quota(), 50); server.stop(); } @@ -299,7 +292,7 @@ pub mod tests { // Wait watcher update std::thread::sleep(Duration::from_secs(1)); let groups = s.manager.get_all_resource_groups(); - assert_eq!(groups.len(), 2); + assert_eq!(groups.len(), 3); server.stop(); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 7dde593350f..a9f69156a2e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3303,7 +3303,10 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), None, - Some(Arc::new(ResourceController::new("test".to_owned(), false))), + Some(Arc::new(ResourceController::new_for_test( + "test".to_owned(), + false, + ))), ) } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index a5255969af5..06562130dc7 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1970,7 +1970,10 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), latest_feature_gate(), - Some(Arc::new(ResourceController::new("test".to_owned(), true))), + Some(Arc::new(ResourceController::new_for_test( + "test".to_owned(), + true, + ))), ), engine, ) @@ -2322,7 +2325,10 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), feature_gate.clone(), - Some(Arc::new(ResourceController::new("test".to_owned(), true))), + Some(Arc::new(ResourceController::new_for_test( + "test".to_owned(), + true, + ))), ); // Use sync mode if pipelined_pessimistic_lock is false. assert_eq!(scheduler.pessimistic_lock_mode(), PessimisticLockMode::Sync); From 5970a80d6b67746429dc1989d844db75f02e57b6 Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Thu, 15 Jun 2023 05:01:08 +0800 Subject: [PATCH 0738/1149] *: Update rust-rocksdb to fix build with clang 16 (#14951) close tikv/tikv#14950 Signed-off-by: gengliqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 156 +++++++++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bfef82e98d7..993ebfe6816 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -224,7 +224,7 @@ checksum = "393356ed99aa7bff0ac486dde592633b83ab02bd254d8c209d5b9f1d0f533480" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -235,7 +235,7 @@ checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -246,7 +246,7 @@ checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -673,44 +673,46 @@ checksum = "42d3c07869b846ba3306739375e9ed2f8055a8759fcf7f72ab7bf3bc4df38b9b" [[package]] name = "bindgen" -version = "0.57.0" +version = "0.59.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd4865004a46a0aafb2a0a5eb19d3c9fc46ee5f063a6cfc605c69ac9ecf5263d" +checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" dependencies = [ "bitflags", - "cexpr 0.4.0", + "cexpr", "clang-sys", + "clap 2.33.0", + "env_logger 0.9.0", "lazy_static", "lazycell", + "log", "peeking_take_while", "proc-macro2", "quote", "regex", "rustc-hash", - "shlex 0.1.1", + "shlex 1.1.0", + "which", ] [[package]] name = "bindgen" -version = "0.59.2" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", - "cexpr 0.6.0", + "cexpr", "clang-sys", - "clap 2.33.0", - "env_logger 0.9.0", "lazy_static", "lazycell", - "log", "peeking_take_while", + "prettyplease 0.2.6", "proc-macro2", "quote", "regex", "rustc-hash", "shlex 1.1.0", - "which", + "syn 2.0.18", ] [[package]] @@ -941,15 +943,6 @@ dependencies = [ "txn_types", ] -[[package]] -name = "cexpr" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" -dependencies = [ - "nom 5.1.0", -] - [[package]] name = "cexpr" version = "0.6.0" @@ -1046,7 +1039,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -1423,7 +1416,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.9.2", - "syn", + "syn 1.0.103", ] [[package]] @@ -1434,7 +1427,7 @@ checksum = "0cd3e432e52c0810b72898296a69d66b1d78d1517dff6cde7a130557a55a62c1" dependencies = [ "darling_core", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -1465,7 +1458,7 @@ checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -1476,7 +1469,7 @@ checksum = "a806e96c59a76a5ba6e18735b6cf833344671e61e7863f2edb5c518ea2cac95c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -1752,7 +1745,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -2052,7 +2045,7 @@ checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -2251,7 +2244,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -2423,7 +2416,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -3096,9 +3089,9 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#9db52f6188f7052e87dae21f1f41772263aef3c6" +source = "git+https://github.com/tikv/rust-rocksdb.git#2096b9a161f93e437f7adee49e68cd1570aea42f" dependencies = [ - "bindgen 0.57.0", + "bindgen 0.65.1", "bzip2-sys", "cc", "cmake", @@ -3115,7 +3108,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#9db52f6188f7052e87dae21f1f41772263aef3c6" +source = "git+https://github.com/tikv/rust-rocksdb.git#2096b9a161f93e437f7adee49e68cd1570aea42f" dependencies = [ "bzip2-sys", "cc", @@ -3213,7 +3206,7 @@ checksum = "c334ac67725febd94c067736ac46ef1c7cacf1c743ca14b9f917c2df2c20acd8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -3305,7 +3298,7 @@ name = "memory_trace_macros" version = "0.1.0" dependencies = [ "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -3454,7 +3447,7 @@ checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -3652,7 +3645,7 @@ checksum = "0c8b15b261814f992e33760b1fca9fe8b693d8a65299f20c9901688636cfb746" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -3787,7 +3780,7 @@ version = "0.1.0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -3825,7 +3818,7 @@ checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -4094,7 +4087,7 @@ checksum = "710faf75e1b33345361201d36d04e98ac1ed8909151a017ed384700836104c74" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -4209,7 +4202,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c142c0e46b57171fe0c528bee8c5b7569e80f0c17e377cd0e30ea57dbc11bb51" dependencies = [ "proc-macro2", - "syn", + "syn 1.0.103", +] + +[[package]] +name = "prettyplease" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1" +dependencies = [ + "proc-macro2", + "syn 2.0.18", ] [[package]] @@ -4221,7 +4224,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.103", "version_check 0.9.4", ] @@ -4250,9 +4253,9 @@ checksum = "369a6ed065f249a159e06c45752c780bda2fb53c995718f9e484d08daa9eb42e" [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" dependencies = [ "unicode-ident", ] @@ -4318,7 +4321,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -4344,11 +4347,11 @@ dependencies = [ "log", "multimap", "petgraph", - "prettyplease", + "prettyplease 0.1.21", "prost", "prost-types", "regex", - "syn", + "syn 1.0.103", "tempfile", "which", ] @@ -4363,7 +4366,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -4451,9 +4454,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.18" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -5054,7 +5057,7 @@ checksum = "75a39bc2aa9258b282ee5518dac493491a9c4c11a6d7361b9d2644c922fc6488" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5075,7 +5078,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#9db52f6188f7052e87dae21f1f41772263aef3c6" +source = "git+https://github.com/tikv/rust-rocksdb.git#2096b9a161f93e437f7adee49e68cd1570aea42f" dependencies = [ "libc 0.2.139", "librocksdb_sys", @@ -5430,7 +5433,7 @@ checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5482,7 +5485,7 @@ checksum = "1fe39d9fbb0ebf5eb2c7cb7e2a47e4f462fad1379f1166b8ae49ad9eae89a7ca" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5515,7 +5518,7 @@ checksum = "4070d2c9b9d258465ad1d82aabb985b84cd9a3afa94da25ece5a9938ba5f1606" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5705,7 +5708,7 @@ checksum = "a945ec7f7ce853e89ffa36be1e27dce9a43e82ff9093bf3461c30d5da74ed11b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5897,7 +5900,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5927,7 +5930,7 @@ dependencies = [ "heck 0.3.1", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -5940,7 +5943,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.103", ] [[package]] @@ -5982,6 +5985,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "sync_wrapper" version = "0.1.1" @@ -6294,7 +6308,7 @@ version = "0.0.1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -6462,7 +6476,7 @@ checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -6497,7 +6511,7 @@ dependencies = [ "heck 0.3.1", "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -7102,7 +7116,7 @@ checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -7211,11 +7225,11 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c6fd7c2581e36d63388a9e04c350c21beb7a8b059580b2e93993c526899ddc" dependencies = [ - "prettyplease", + "prettyplease 0.1.21", "proc-macro2", "prost-build", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -7290,7 +7304,7 @@ checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -7514,7 +7528,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a044005fd5c0fc1ebd79c622e5606431c6b879a6a19acafb754be9926a2de73e" dependencies = [ "quote", - "syn", + "syn 1.0.103", ] [[package]] @@ -7585,7 +7599,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 1.0.103", "wasm-bindgen-shared", ] @@ -7619,7 +7633,7 @@ checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.103", "wasm-bindgen-backend", "wasm-bindgen-shared", ] From aa85fa37c671814d43578637abe225a5e80256f1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 15 Jun 2023 14:47:09 +0800 Subject: [PATCH 0739/1149] raftstore-v2: dynamic change store pool size (#14945) ref tikv/tikv#14485 implement dynamic change store pool size Signed-off-by: Spade A --- components/raftstore-v2/src/batch/store.rs | 36 ++++- components/raftstore-v2/src/worker/mod.rs | 1 + .../raftstore-v2/src/worker/refresh_config.rs | 150 ++++++++++++++++++ components/raftstore/src/store/mod.rs | 12 +- components/raftstore/src/store/worker/mod.rs | 2 +- components/server/src/server2.rs | 14 +- components/test_raftstore-v2/src/node.rs | 27 ++-- components/test_raftstore-v2/src/util.rs | 18 +++ components/test_raftstore/src/util.rs | 18 +++ src/server/raftkv2/node.rs | 12 +- .../integrations/raftstore/test_scale_pool.rs | 135 +++++++++++++--- 11 files changed, 373 insertions(+), 52 deletions(-) create mode 100644 components/raftstore-v2/src/worker/refresh_config.rs diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 38342e8eea7..8e3bee7efda 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -32,8 +32,8 @@ use raftstore::{ GlobalStoreStat, LocalStoreStat, }, local_metrics::RaftMetrics, - AutoSplitController, Config, ReadRunner, ReadTask, SplitCheckRunner, SplitCheckTask, - StoreWriters, TabletSnapManager, Transport, WriteSenders, + AutoSplitController, Config, ReadRunner, ReadTask, RefreshConfigTask, SplitCheckRunner, + SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteSenders, }, }; use resource_metering::CollectorRegHandle; @@ -59,7 +59,7 @@ use crate::{ }, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{checkpoint, cleanup, pd, tablet}, + worker::{checkpoint, cleanup, pd, refresh_config, tablet}, Error, Result, }; @@ -292,6 +292,7 @@ impl PollHandler { cfg: Arc>, coprocessor_host: CoprocessorHost, @@ -539,6 +540,8 @@ struct Workers { purge: Option, cleanup_worker: Worker, + refresh_config_worker: LazyWorker, + // Following is not maintained by raftstore itself. background: Worker, } @@ -555,6 +558,7 @@ impl Workers { purge, cleanup_worker: Worker::new("cleanup-worker"), background, + refresh_config_worker: LazyWorker::new("refreash-config-worker"), } } @@ -564,6 +568,7 @@ impl Workers { self.pd.stop(); self.tablet.stop(); self.checkpoint.stop(); + self.refresh_config_worker.stop(); if let Some(w) = self.purge { w.stop(); } @@ -749,13 +754,21 @@ impl StoreSystem { sst_importer, key_manager, ); - self.workers = Some(workers); + self.schedulers = Some(schedulers); let peers = builder.init()?; // Choose a different name so we know what version is actually used. rs stands // for raft store. let tag = format!("rs-{}", store_id); - self.system.spawn(tag, builder); + self.system.spawn(tag, builder.clone()); + + let refresh_config_runner = refresh_config::Runner::new( + self.logger.clone(), + router.router().clone(), + self.system.build_pool_state(builder), + ); + assert!(workers.refresh_config_worker.start(refresh_config_runner)); + self.workers = Some(workers); let mut mailboxes = Vec::with_capacity(peers.len()); let mut address = Vec::with_capacity(peers.len()); @@ -790,6 +803,15 @@ impl StoreSystem { Ok(()) } + pub fn refresh_config_scheduler(&mut self) -> Scheduler { + assert!(self.workers.is_some()); + self.workers + .as_ref() + .unwrap() + .refresh_config_worker + .scheduler() + } + pub fn shutdown(&mut self) { self.shutdown.store(true, Ordering::Relaxed); @@ -855,6 +877,10 @@ impl StoreRouter { _ => unreachable!(), } } + + pub fn router(&self) -> &BatchRouter, StoreFsm> { + &self.router + } } impl Deref for StoreRouter { diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index 93ec453c030..ead4ca7043c 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -3,4 +3,5 @@ pub mod checkpoint; pub mod cleanup; pub mod pd; +pub mod refresh_config; pub mod tablet; diff --git a/components/raftstore-v2/src/worker/refresh_config.rs b/components/raftstore-v2/src/worker/refresh_config.rs new file mode 100644 index 00000000000..ea7d8724756 --- /dev/null +++ b/components/raftstore-v2/src/worker/refresh_config.rs @@ -0,0 +1,150 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{sync::Arc, thread}; + +use batch_system::{BatchRouter, Fsm, FsmTypes, HandlerBuilder, Poller, PoolState, Priority}; +use file_system::{set_io_type, IoType}; +use raftstore::store::{BatchComponent, RefreshConfigTask}; +use slog::{error, info, warn, Logger}; +use tikv_util::{sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable}; + +use crate::fsm::{PeerFsm, StoreFsm}; + +pub struct PoolController> { + pub logger: Logger, + pub router: BatchRouter, + pub state: PoolState, +} + +impl PoolController +where + N: Fsm, + C: Fsm, + H: HandlerBuilder, +{ + pub fn new(logger: Logger, router: BatchRouter, state: PoolState) -> Self { + PoolController { + logger, + router, + state, + } + } + + pub fn decrease_by(&mut self, size: usize) { + for _ in 0..size { + if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty, None) { + error!( + self.logger, + "failed to decrease thread pool"; + "decrease to" => size, + "err" => %e, + ); + return; + } + } + } + + pub fn increase_by(&mut self, size: usize) { + let name_prefix = self.state.name_prefix.clone(); + let mut workers = self.state.workers.lock().unwrap(); + for i in 0..size { + let handler = self.state.handler_builder.build(Priority::Normal); + let mut poller = Poller { + router: self.router.clone(), + fsm_receiver: self.state.fsm_receiver.clone(), + handler, + max_batch_size: self.state.max_batch_size, + reschedule_duration: self.state.reschedule_duration, + joinable_workers: Some(Arc::clone(&self.state.joinable_workers)), + }; + let props = tikv_util::thread_group::current_properties(); + let t = thread::Builder::new() + .name(thd_name!(format!( + "{}-{}", + name_prefix, + i + self.state.id_base, + ))) + .spawn_wrapper(move || { + tikv_util::thread_group::set_properties(props); + set_io_type(IoType::ForegroundWrite); + poller.poll(); + }) + .unwrap(); + workers.push(t); + } + self.state.id_base += size; + } +} + +pub struct Runner +where + EK: engine_traits::KvEngine, + ER: engine_traits::RaftEngine, + H: HandlerBuilder, StoreFsm>, +{ + logger: Logger, + raft_pool: PoolController, StoreFsm, H>, +} + +impl Runner +where + EK: engine_traits::KvEngine, + ER: engine_traits::RaftEngine, + H: HandlerBuilder, StoreFsm>, +{ + pub fn new( + logger: Logger, + router: BatchRouter, StoreFsm>, + raft_pool_state: PoolState, StoreFsm, H>, + ) -> Self { + let raft_pool = PoolController::new(logger.clone(), router, raft_pool_state); + Runner { logger, raft_pool } + } + + fn resize_raft_pool(&mut self, size: usize) { + let current_pool_size = self.raft_pool.state.expected_pool_size; + self.raft_pool.state.expected_pool_size = size; + match current_pool_size.cmp(&size) { + std::cmp::Ordering::Greater => self.raft_pool.decrease_by(current_pool_size - size), + std::cmp::Ordering::Less => self.raft_pool.increase_by(size - current_pool_size), + std::cmp::Ordering::Equal => return, + } + + info!( + self.logger, + "resize raft pool"; + "from" => current_pool_size, + "to" => self.raft_pool.state.expected_pool_size + ); + } +} + +impl Runnable for Runner +where + EK: engine_traits::KvEngine, + ER: engine_traits::RaftEngine, + H: HandlerBuilder, StoreFsm> + std::marker::Send, +{ + type Task = RefreshConfigTask; + + fn run(&mut self, task: Self::Task) { + match task { + RefreshConfigTask::ScalePool(component, size) => { + match component { + BatchComponent::Store => {} + BatchComponent::Apply => { + unreachable!("v2 does not have apply batch system") + } + }; + self.resize_raft_pool(size); + } + _ => { + warn!( + self.logger, + "not supported now"; + "config_change" => ?task, + ); + } + } + } +} diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 7a2c04e2450..c91cee3071a 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -78,12 +78,12 @@ pub use self::{ txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ - metrics as worker_metrics, AutoSplitController, Bucket, BucketRange, CachedReadDelegate, - CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, KeyEntry, - LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, - ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, - SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, - StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, + metrics as worker_metrics, AutoSplitController, BatchComponent, Bucket, BucketRange, + CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, + KeyEntry, LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, + ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, + RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, + SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 62d27b2e88b..0ace0240091 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -37,7 +37,7 @@ pub use self::{ StoreMetaDelegate, TrackVer, }, refresh_config::{ - BatchComponent as RaftStoreBatchComponent, Runner as RefreshConfigRunner, + BatchComponent as RaftStoreBatchComponent, BatchComponent, Runner as RefreshConfigRunner, Task as RefreshConfigTask, }, region::{Runner as RegionRunner, Task as RegionTask}, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index da970a7e749..85a7bf235b6 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -53,8 +53,8 @@ use raftstore::{ RawConsistencyCheckObserver, }, store::{ - memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, AutoSplitController, CheckLeaderRunner, - SplitConfigManager, TabletSnapManager, + config::RaftstoreConfigManager, memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, + AutoSplitController, CheckLeaderRunner, SplitConfigManager, TabletSnapManager, }, RegionInfoAccessor, }; @@ -810,13 +810,21 @@ where collector_reg_handle, self.core.background_worker.clone(), pd_worker, - raft_store, + raft_store.clone(), &state, importer.clone(), self.core.encryption_key_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); + cfg_controller.register( + tikv::config::Module::Raftstore, + Box::new(RaftstoreConfigManager::new( + self.node.as_mut().unwrap().refresh_config_scheduler(), + raft_store, + )), + ); + // Start auto gc. Must after `Node::start` because `node_id` is initialized // there. let store_id = self.node.as_ref().unwrap().id(); diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 1674d860ccc..0410d514ae4 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -22,8 +22,8 @@ use raftstore::{ coprocessor::CoprocessorHost, errors::Error as RaftError, store::{ - AutoSplitController, GlobalReplicationState, RegionSnapshot, SplitConfigManager, - TabletSnapKey, TabletSnapManager, Transport, + config::RaftstoreConfigManager, AutoSplitController, GlobalReplicationState, + RegionSnapshot, SplitConfigManager, TabletSnapKey, TabletSnapManager, Transport, }, Result, }; @@ -151,6 +151,7 @@ pub struct NodeCluster { simulate_trans: HashMap>, concurrency_managers: HashMap, snap_mgrs: HashMap, + cfg_controller: Option, } impl NodeCluster { @@ -162,12 +163,17 @@ impl NodeCluster { simulate_trans: HashMap::default(), concurrency_managers: HashMap::default(), snap_mgrs: HashMap::default(), + cfg_controller: None, } } pub fn get_concurrency_manager(&self, node_id: u64) -> ConcurrencyManager { self.concurrency_managers.get(&node_id).unwrap().clone() } + + pub fn get_cfg_controller(&self) -> Option<&ConfigController> { + self.cfg_controller.as_ref() + } } impl Simulator for NodeCluster { @@ -329,14 +335,14 @@ impl Simulator for NodeCluster { .validate(region_split_size, enable_region_bucket, region_bucket_size) .unwrap(); - // let raft_store = Arc::new(VersionTrack::new(raftstore_cfg)); - // cfg_controller.register( - // Module::Raftstore, - // Box::new(RaftstoreConfigManager::new( - // node.refresh_config_scheduler(), - // raft_store, - // )), - // ); + let raft_store = Arc::new(VersionTrack::new(raftstore_cfg)); + cfg_controller.register( + Module::Raftstore, + Box::new(RaftstoreConfigManager::new( + node.refresh_config_scheduler(), + raft_store, + )), + ); if let Some(tmp) = snap_mgs_path { self.trans @@ -356,6 +362,7 @@ impl Simulator for NodeCluster { self.nodes.insert(node_id, node); self.simulate_trans.insert(node_id, simulate_trans); + self.cfg_controller = Some(cfg_controller); Ok(node_id) } diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 9a851e59e03..f352a30504a 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -406,3 +406,21 @@ pub fn must_error_read_on_peer, EK: KvEngine>( } } } + +pub fn put_with_timeout, EK: KvEngine>( + cluster: &mut Cluster, + node_id: u64, + key: &[u8], + value: &[u8], + timeout: Duration, +) -> Result { + let mut region = cluster.get_region(key); + let region_id = region.get_id(); + let req = new_request( + region_id, + region.take_region_epoch(), + vec![new_put_cf_cmd(CF_DEFAULT, key, value)], + false, + ); + cluster.call_command_on_node(node_id, req, timeout) +} diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index cd2fb8a2792..0c83ca6fec5 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1543,3 +1543,21 @@ pub fn test_delete_range(cluster: &mut Cluster, cf: CfName) { assert!(cluster.get_cf(cf, k).is_none()); } } + +pub fn put_with_timeout( + cluster: &mut Cluster, + node_id: u64, + key: &[u8], + value: &[u8], + timeout: Duration, +) -> Result { + let mut region = cluster.get_region(key); + let region_id = region.get_id(); + let req = new_request( + region_id, + region.take_region_epoch(), + vec![new_put_cf_cmd(CF_DEFAULT, key, value)], + false, + ); + cluster.call_command_on_node(node_id, req, timeout) +} diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index f95e4a89848..e32fa28fd2b 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -11,8 +11,8 @@ use pd_client::PdClient; use raftstore::{ coprocessor::CoprocessorHost, store::{ - AutoSplitController, GlobalReplicationState, TabletSnapManager, Transport, - RAFT_INIT_LOG_INDEX, + AutoSplitController, GlobalReplicationState, RefreshConfigTask, TabletSnapManager, + Transport, RAFT_INIT_LOG_INDEX, }, }; use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreRouter, StoreSystem}; @@ -21,7 +21,7 @@ use slog::{info, o, Logger}; use sst_importer::SstImporter; use tikv_util::{ config::VersionTrack, - worker::{LazyWorker, Worker}, + worker::{LazyWorker, Scheduler, Worker}, }; use crate::server::{node::init_store, Result}; @@ -246,6 +246,12 @@ where Ok(()) } + /// Gets the Scheduler of RaftstoreConfigTask, it must be called after + /// start. + pub fn refresh_config_scheduler(&mut self) -> Scheduler { + self.system.as_mut().unwrap().1.refresh_config_scheduler() + } + /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); diff --git a/tests/integrations/raftstore/test_scale_pool.rs b/tests/integrations/raftstore/test_scale_pool.rs index 794cf90f4cb..c3c321a6f66 100644 --- a/tests/integrations/raftstore/test_scale_pool.rs +++ b/tests/integrations/raftstore/test_scale_pool.rs @@ -2,32 +2,12 @@ use std::{collections::HashMap, time::Duration}; -use engine_traits::CF_DEFAULT; -use kvproto::raft_cmdpb::RaftCmdResponse; -use raftstore::Result; use test_raftstore::*; use tikv_util::{ sys::thread::{self, Pid}, HandyRwLock, }; -fn put_with_timeout( - cluster: &mut Cluster, - key: &[u8], - value: &[u8], - timeout: Duration, -) -> Result { - let mut region = cluster.get_region(key); - let region_id = region.get_id(); - let req = new_request( - region_id, - region.take_region_epoch(), - vec![new_put_cf_cmd(CF_DEFAULT, key, value)], - false, - ); - cluster.call_command_on_node(0, req, timeout) -} - #[test] fn test_increase_pool() { let mut cluster = new_node_cluster(0, 1); @@ -41,7 +21,7 @@ fn test_increase_pool() { let _ = cluster.run_conf_change(); // Request cann't be handled as all pollers have been paused - put_with_timeout(&mut cluster, b"k1", b"k1", Duration::from_secs(1)).unwrap(); + put_with_timeout(&mut cluster, 1, b"k1", b"k1", Duration::from_secs(1)).unwrap_err(); must_get_none(&cluster.get_engine(1), b"k1"); { @@ -82,15 +62,70 @@ fn test_increase_pool() { fail::remove(fp1); } +#[test] +fn test_increase_pool_v2() { + use test_raftstore_v2::*; + + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.raft_store.store_batch_system.pool_size = 1; + cluster.pd_client.disable_default_operator(); + let fp1 = "poll"; + + // Pause at the entrance of the rafstore-1-0 thread + fail::cfg(fp1, "1*pause").unwrap(); + let _ = cluster.run_conf_change(); + + // Request cann't be handled as all pollers have been paused + put_with_timeout(&mut cluster, 1, b"k1", b"k1", Duration::from_secs(1)).unwrap_err(); + must_get_none(&cluster.get_engine(1), b"k1"); + + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store-pool-size".to_owned(), "2".to_owned()); + change + }; + // Update config, expand from 1 to 2 + cfg_controller.update(change).unwrap(); + assert_eq!( + cfg_controller + .get_current() + .raft_store + .store_batch_system + .pool_size, + 2 + ); + } + + // Request can be handled as usual + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); + + fail::remove(fp1); +} + fn get_poller_thread_ids() -> Vec { - let prefixs = ("raftstore", "apply-"); + get_poller_thread_ids_by_prefix(vec!["raftstore", "apply-"]) +} + +fn get_raft_poller_thread_ids() -> Vec { + get_poller_thread_ids_by_prefix(vec!["rs-"]) +} + +fn get_poller_thread_ids_by_prefix(prefixs: Vec<&str>) -> Vec { let mut poller_tids = vec![]; let pid = thread::process_id(); let all_tids: Vec<_> = thread::thread_ids(pid).unwrap(); for tid in all_tids { if let Ok(stat) = thread::full_thread_stat(pid, tid) { - if stat.command.starts_with(prefixs.0) || stat.command.starts_with(prefixs.1) { - poller_tids.push(tid); + for &prefix in &prefixs { + println!("command {:?}", stat.command); + if stat.command.starts_with(prefix) { + poller_tids.push(tid); + } } } } @@ -158,6 +193,58 @@ fn test_decrease_pool() { must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } +#[test] +fn test_decrease_pool_v2() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 1); + cluster.pd_client.disable_default_operator(); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + let _ = cluster.run_conf_change(); + + // Save current poller tids before shrinking + let original_poller_tids = get_raft_poller_thread_ids(); + + // Request can be handled as usual + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store_pool_size".to_owned(), "1".to_owned()); + change + }; + + // Update config, shrink from 2 to 1 + cfg_controller.update(change).unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + + assert_eq!( + cfg_controller + .get_current() + .raft_store + .store_batch_system + .pool_size, + 1 + ); + } + + // Save current poller tids after scaling down + let current_poller_tids = get_raft_poller_thread_ids(); + // Compared with before shrinking, the thread num should be reduced by one + assert_eq!(current_poller_tids.len(), original_poller_tids.len() - 1); + // After shrinking, all the left tids must be there before + for tid in current_poller_tids { + assert!(original_poller_tids.contains(&tid)); + } + + // Request can be handled as usual + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); +} + fn get_async_writers_tids() -> Vec { let prefix = "store-writer-"; let mut writers_tids = vec![]; From 0d9ab27b4c1f458186884ff06b57b61e4a50da90 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 15 Jun 2023 00:05:09 -0700 Subject: [PATCH 0740/1149] resource_control: use override priority if specified (#14926) close tikv/tikv#14925 use override priority to replace resource group priority for queries if specified Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.toml | 38 +-- components/resource_control/src/lib.rs | 3 +- .../resource_control/src/resource_group.rs | 239 +++++++++++++++--- src/coprocessor/endpoint.rs | 19 +- src/read_pool.rs | 67 +++-- src/storage/mod.rs | 165 ++++-------- src/storage/txn/commands/mod.rs | 8 +- src/storage/txn/sched_pool.rs | 13 +- src/storage/txn/scheduler.rs | 130 +++++----- 9 files changed, 396 insertions(+), 286 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f83a086f00a..34bf6667b32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,37 +21,15 @@ snmalloc = ["tikv_alloc/snmalloc"] portable = ["engine_rocks/portable"] sse = ["engine_rocks/sse"] mem-profiling = ["tikv_alloc/mem-profiling"] -failpoints = [ - "fail/failpoints", - "raftstore/failpoints", - "tikv_util/failpoints", - "engine_rocks/failpoints", -] -cloud-aws = [ - "encryption_export/cloud-aws", - "sst_importer/cloud-aws", -] -cloud-gcp = [ - "encryption_export/cloud-gcp", - "sst_importer/cloud-gcp", -] -cloud-azure = [ - "encryption_export/cloud-azure", - "sst_importer/cloud-azure", -] +failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints"] +cloud-aws = ["encryption_export/cloud-aws", "sst_importer/cloud-aws"] +cloud-gcp = ["encryption_export/cloud-gcp", "sst_importer/cloud-gcp"] +cloud-azure = ["encryption_export/cloud-azure", "sst_importer/cloud-azure"] testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport"] -test-engine-kv-rocksdb = [ - "engine_test/test-engine-kv-rocksdb" -] -test-engine-raft-raft-engine = [ - "engine_test/test-engine-raft-raft-engine" -] -test-engines-rocksdb = [ - "engine_test/test-engines-rocksdb", -] -test-engines-panic = [ - "engine_test/test-engines-panic", -] +test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] +test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] +test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] +test-engines-panic = ["engine_test/test-engines-panic"] cloud-storage-grpc = ["sst_importer/cloud-storage-grpc"] cloud-storage-dylib = ["sst_importer/cloud-storage-dylib"] pprof-fp = ["pprof/frame-pointer"] diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 113555f795f..fdb331a7fa4 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -6,7 +6,8 @@ use serde::{Deserialize, Serialize}; mod resource_group; pub use resource_group::{ - ResourceConsumeType, ResourceController, ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL, + ResourceConsumeType, ResourceController, ResourceGroupManager, TaskMetadata, + MIN_PRIORITY_UPDATE_INTERVAL, }; mod future; diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index ad0627ce0e6..ec78635d56c 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + borrow::Cow, cell::Cell, cmp::{max, min}, sync::{ @@ -456,14 +457,104 @@ impl ResourceController { CommandPri::Normal => 1, CommandPri::High => 0, }; - self.resource_group(name).get_priority(level) + self.resource_group(name).get_priority(level, None) + } +} + +const OVERRIDE_PRIORITY_MASK: u8 = 0b1000_0000; +const RESOURCE_GROUP_NAME_MASK: u8 = 0b0100_0000; + +#[derive(Clone, Default)] +pub struct TaskMetadata<'a> { + // The first byte is a bit map to indicate which field exists, + // then append override priority if nonzero, + // then append resource group name if not default + metadata: Cow<'a, [u8]>, +} + +impl<'a> TaskMetadata<'a> { + pub fn deep_clone(&self) -> TaskMetadata<'static> { + TaskMetadata { + metadata: Cow::Owned(self.metadata.to_vec()), + } + } + + pub fn from_ctx(ctx: &ResourceControlContext) -> Self { + let mut mask = 0; + let mut buf = vec![]; + if ctx.override_priority != 0 { + mask |= OVERRIDE_PRIORITY_MASK; + } + if !ctx.resource_group_name.is_empty() + && ctx.resource_group_name != DEFAULT_RESOURCE_GROUP_NAME + { + mask |= RESOURCE_GROUP_NAME_MASK; + } + if mask == 0 { + // if all are default value, no need to write anything to save copy cost + return Self { + metadata: Cow::Owned(buf), + }; + } + buf.push(mask); + if mask & OVERRIDE_PRIORITY_MASK != 0 { + buf.extend_from_slice(&(ctx.override_priority as u32).to_ne_bytes()); + } + if mask & RESOURCE_GROUP_NAME_MASK != 0 { + buf.extend_from_slice(ctx.resource_group_name.as_bytes()); + } + Self { + metadata: Cow::Owned(buf), + } + } + + fn from_bytes(bytes: &'a [u8]) -> Self { + Self { + metadata: Cow::Borrowed(bytes), + } + } + + pub fn to_vec(self) -> Vec { + self.metadata.into_owned() + } + + fn override_priority(&self) -> u32 { + if self.metadata.is_empty() { + return 0; + } + if self.metadata[0] & OVERRIDE_PRIORITY_MASK == 0 { + return 0; + } + u32::from_ne_bytes(self.metadata[1..5].try_into().unwrap()) + } + + pub fn group_name(&self) -> &[u8] { + if self.metadata.is_empty() { + return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); + } + if self.metadata[0] & RESOURCE_GROUP_NAME_MASK == 0 { + return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); + } + let start = if self.metadata[0] & OVERRIDE_PRIORITY_MASK != 0 { + 5 + } else { + 1 + }; + &self.metadata[start..] } } impl TaskPriorityProvider for ResourceController { fn priority_of(&self, extras: &yatp::queue::Extras) -> u64 { - self.resource_group(extras.metadata()) - .get_priority(extras.current_level() as usize) + let metadata = TaskMetadata::from_bytes(extras.metadata()); + self.resource_group(metadata.group_name()).get_priority( + extras.current_level() as usize, + if metadata.override_priority() == 0 { + None + } else { + Some(metadata.override_priority()) + }, + ) } } @@ -487,7 +578,7 @@ struct GroupPriorityTracker { } impl GroupPriorityTracker { - fn get_priority(&self, level: usize) -> u64 { + fn get_priority(&self, level: usize, override_priority: Option) -> u64 { let task_extra_priority = TASK_EXTRA_FACTOR_BY_LEVEL[level] * 1000 * self.weight; let vt = (if self.vt_delta_for_get > 0 { self.virtual_time @@ -496,7 +587,8 @@ impl GroupPriorityTracker { } else { self.virtual_time.load(Ordering::Relaxed) }) + task_extra_priority; - concat_priority_vt(self.group_priority, vt) + let priority = override_priority.unwrap_or(self.group_priority); + concat_priority_vt(priority, vt) } #[inline] @@ -611,30 +703,6 @@ pub(crate) mod tests { assert_eq!(group1.weight, group2.weight * 2); assert_eq!(group1.current_vt(), 0); - let mut extras1 = Extras::single_level(); - extras1.set_metadata(b"test".to_vec()); - assert_eq!( - resource_ctl.priority_of(&extras1), - concat_priority_vt(LOW_PRIORITY, group1.weight * 50) - ); - assert_eq!(group1.current_vt(), group1.weight * 50); - - let mut extras2 = Extras::single_level(); - extras2.set_metadata(b"test2".to_vec()); - assert_eq!( - resource_ctl.priority_of(&extras2), - concat_priority_vt(MEDIUM_PRIORITY, group2.weight * 50) - ); - assert_eq!(group2.current_vt(), group2.weight * 50); - - let mut extras3 = Extras::single_level(); - extras3.set_metadata(b"unknown_group".to_vec()); - assert_eq!( - resource_ctl.priority_of(&extras3), - concat_priority_vt(MEDIUM_PRIORITY, 50) - ); - assert_eq!(resource_ctl.resource_group(b"default").current_vt(), 50); - resource_ctl.consume( b"test", ResourceConsumeType::CpuTime(Duration::from_micros(10000)), @@ -644,14 +712,14 @@ pub(crate) mod tests { ResourceConsumeType::CpuTime(Duration::from_micros(10000)), ); - assert_eq!(group1.current_vt(), group1.weight * 10050); + assert_eq!(group1.current_vt(), group1.weight * 10000); assert_eq!(group1.current_vt(), group2.current_vt() * 2); // test update all group vts resource_manager.advance_min_virtual_time(); let group1_vt = group1.current_vt(); let group1_weight = group1.weight; - assert_eq!(group1_vt, group1.weight * 10050); + assert_eq!(group1_vt, group1.weight * 10000); assert!(group2.current_vt() >= group1.current_vt() * 3 / 4); assert!(resource_ctl.resource_group(b"default").current_vt() >= group1.current_vt() / 2); @@ -692,6 +760,89 @@ pub(crate) mod tests { assert_eq!(&*new_limiter as *const _, &*limiter as *const _); } + #[test] + fn test_resource_group_priority() { + let resource_manager = ResourceGroupManager::default(); + let group1 = new_resource_group_ru("test1".into(), 200, LOW_PRIORITY); + resource_manager.add_resource_group(group1); + let group2 = new_resource_group_ru("test2".into(), 400, 0); + resource_manager.add_resource_group(group2); + assert_eq!(resource_manager.resource_groups.len(), 3); + + let resource_ctl = resource_manager.derive_controller("test".into(), true); + + let group1 = resource_ctl.resource_group("test1".as_bytes()); + let group2 = resource_ctl.resource_group("test2".as_bytes()); + assert_eq!(group1.weight, group2.weight * 2); + assert_eq!(group1.current_vt(), 0); + + let mut extras1 = Extras::single_level(); + extras1.set_metadata( + TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: "test1".to_string(), + override_priority: 0, + ..Default::default() + }) + .to_vec(), + ); + assert_eq!( + resource_ctl.priority_of(&extras1), + concat_priority_vt(LOW_PRIORITY, group1.weight * 50) + ); + assert_eq!(group1.current_vt(), group1.weight * 50); + + let mut extras2 = Extras::single_level(); + extras2.set_metadata( + TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: "test2".to_string(), + override_priority: 0, + ..Default::default() + }) + .to_vec(), + ); + assert_eq!( + resource_ctl.priority_of(&extras2), + concat_priority_vt(MEDIUM_PRIORITY, group2.weight * 50) + ); + assert_eq!(group2.current_vt(), group2.weight * 50); + + // test override priority + let mut extras2_override = Extras::single_level(); + extras2_override.set_metadata( + TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: "test2".to_string(), + override_priority: LOW_PRIORITY as u64, + ..Default::default() + }) + .to_vec(), + ); + assert_eq!( + resource_ctl.priority_of(&extras2_override), + concat_priority_vt(LOW_PRIORITY, group2.weight * 100) + ); + assert_eq!(group2.current_vt(), group2.weight * 100); + + let mut extras3 = Extras::single_level(); + extras3.set_metadata( + TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: "unknown_group".to_string(), + override_priority: 0, + ..Default::default() + }) + .to_vec(), + ); + assert_eq!( + resource_ctl.priority_of(&extras3), + concat_priority_vt(MEDIUM_PRIORITY, 50) + ); + assert_eq!( + resource_ctl + .resource_group("default".as_bytes()) + .current_vt(), + 50 + ); + } + #[test] fn test_reset_resource_group_vt() { let resource_manager = ResourceGroupManager::default(); @@ -875,4 +1026,30 @@ pub(crate) mod tests { let v5 = concat_priority_vt(HIGH_PRIORITY, 10); assert!(v5 < v1); } + + #[test] + fn test_task_metadata() { + let cases = [ + ("default", 0u32), + ("default", 6u32), + ("test", 0u32), + ("test", 15u32), + ]; + + let metadata = TaskMetadata::from_ctx(&ResourceControlContext::default()); + assert_eq!(metadata.group_name(), b"default"); + for (group_name, priority) in cases { + let metadata = TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: group_name.to_string(), + override_priority: priority as u64, + ..Default::default() + }); + assert_eq!(metadata.override_priority(), priority); + assert_eq!(metadata.group_name(), group_name.as_bytes()); + let vec = metadata.to_vec(); + let metadata1 = TaskMetadata::from_bytes(&vec); + assert_eq!(metadata1.override_priority(), priority); + assert_eq!(metadata1.group_name(), group_name.as_bytes()); + } + } } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 01f09941a59..9339ae0bcfc 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -14,6 +14,7 @@ use engine_traits::PerfLevel; use futures::{channel::mpsc, future::Either, prelude::*}; use kvproto::{coprocessor as coppb, errorpb, kvrpcpb}; use protobuf::{CodedInputStream, Message}; +use resource_control::TaskMetadata; use resource_metering::{FutureExt, ResourceTagFactory, StreamExt}; use tidb_query_common::execute_stats::ExecSummary; use tikv_alloc::trace::MemoryTraceGuard; @@ -486,12 +487,7 @@ impl Endpoint { let resource_tag = self .resource_tag_factory .new_tag_with_key_ranges(&req_ctx.context, key_ranges); - let group_name = req_ctx - .context - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(req_ctx.context.get_resource_control_context()); // box the tracker so that moving it is cheap. let tracker = Box::new(Tracker::new(req_ctx, self.slow_log_threshold)); @@ -502,7 +498,7 @@ impl Endpoint { .in_resource_metering_tag(resource_tag), priority, task_id, - group_name, + metadata, ) .map_err(|_| Error::MaxPendingTasksExceeded); async move { res.await? } @@ -726,12 +722,7 @@ impl Endpoint { ) -> Result>> { let (tx, rx) = mpsc::channel::>(self.stream_channel_size); let priority = req_ctx.context.get_priority(); - let group_name = req_ctx - .context - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(req_ctx.context.get_resource_control_context()); let key_ranges = req_ctx .ranges .iter() @@ -754,7 +745,7 @@ impl Endpoint { }), priority, task_id, - group_name, + metadata, ) .map_err(|_| Error::MaxPendingTasksExceeded)?; Ok(rx) diff --git a/src/read_pool.rs b/src/read_pool.rs index 16d1a7091b7..3f61374b07f 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -16,7 +16,7 @@ use futures::{channel::oneshot, future::TryFutureExt}; use kvproto::{errorpb, kvrpcpb::CommandPri}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use prometheus::{core::Metric, Histogram, IntCounter, IntGauge}; -use resource_control::{ControlledFuture, ResourceController}; +use resource_control::{ControlledFuture, ResourceController, TaskMetadata}; use thiserror::Error; use tikv_util::{ sys::{cpu_time::ProcessStat, SysQuota}, @@ -120,7 +120,7 @@ impl ReadPoolHandle { f: F, priority: CommandPri, task_id: u64, - group_meta: Vec, + metadata: TaskMetadata<'_>, ) -> Result<(), ReadPoolError> where F: Future + Send + 'static, @@ -161,8 +161,9 @@ impl ReadPoolHandle { CommandPri::Normal => None, CommandPri::Low => Some(2), }; + let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); - extras.set_metadata(group_meta.clone()); + extras.set_metadata(metadata.to_vec()); let task_cell = if let Some(resource_ctl) = resource_ctl { TaskCell::new( TrackedFuture::new(ControlledFuture::new( @@ -171,7 +172,7 @@ impl ReadPoolHandle { running_tasks.dec(); }, resource_ctl.clone(), - group_meta, + group_name, )), extras, ) @@ -195,7 +196,7 @@ impl ReadPoolHandle { f: F, priority: CommandPri, task_id: u64, - group_meta: Vec, + metadata: TaskMetadata<'_>, ) -> impl Future> where F: Future + Send + 'static, @@ -209,7 +210,7 @@ impl ReadPoolHandle { }, priority, task_id, - group_meta, + metadata, ); async move { res?; @@ -804,18 +805,24 @@ mod tests { let (task3, _tx3) = gen_task(); let (task4, _tx4) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); + handle + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .unwrap(); + handle + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3, vec![]) { + match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default()) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } tx1.send(()).unwrap(); thread::sleep(Duration::from_millis(300)); - handle.spawn(task4, CommandPri::Normal, 4, vec![]).unwrap(); + handle + .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default()) + .unwrap(); } #[test] @@ -847,11 +854,15 @@ mod tests { let (task4, _tx4) = gen_task(); let (task5, _tx5) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); + handle + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .unwrap(); + handle + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3, vec![]) { + match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default()) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -859,10 +870,12 @@ mod tests { handle.scale_pool_size(3); assert_eq!(handle.get_normal_pool_size(), 3); - handle.spawn(task4, CommandPri::Normal, 4, vec![]).unwrap(); + handle + .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default()) + .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task5, CommandPri::Normal, 5, vec![]) { + match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default()) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -897,11 +910,15 @@ mod tests { let (task4, _tx4) = gen_task(); let (task5, _tx5) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); + handle + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .unwrap(); + handle + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3, vec![]) { + match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default()) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -913,10 +930,12 @@ mod tests { handle.scale_pool_size(1); assert_eq!(handle.get_normal_pool_size(), 1); - handle.spawn(task4, CommandPri::Normal, 4, vec![]).unwrap(); + handle + .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default()) + .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task5, CommandPri::Normal, 5, vec![]) { + match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default()) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -1016,8 +1035,12 @@ mod tests { let (task1, tx1) = gen_task(); let (task2, tx2) = gen_task(); - handle.spawn(task1, CommandPri::Normal, 1, vec![]).unwrap(); - handle.spawn(task2, CommandPri::Normal, 2, vec![]).unwrap(); + handle + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .unwrap(); + handle + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .unwrap(); tx1.send(()).unwrap(); tx2.send(()).unwrap(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a9f69156a2e..5aea4702cb5 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -90,7 +90,7 @@ use kvproto::{ use pd_client::FeatureGate; use raftstore::store::{util::build_key_range, ReadStats, TxnExt, WriteStats}; use rand::prelude::*; -use resource_control::ResourceController; +use resource_control::{ResourceController, TaskMetadata}; use resource_metering::{FutureExt, ResourceTagFactory}; use tikv_kv::{OnAppliedCb, SnapshotExt}; use tikv_util::{ @@ -598,11 +598,7 @@ impl Storage { let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -738,7 +734,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -758,12 +754,8 @@ impl Storage { const CMD: CommandKind = CommandKind::batch_get_command; // all requests in a batch have the same region, epoch, term, replica_read let priority = requests[0].get_context().get_priority(); - let group_name = requests[0] - .get_context() - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = + TaskMetadata::from_ctx(requests[0].get_context().get_resource_control_context()); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; let busy_threshold = @@ -927,7 +919,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -943,11 +935,7 @@ impl Storage { let stage_begin_ts = Instant::now(); const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let key_ranges = keys .iter() @@ -1103,7 +1091,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -1126,11 +1114,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::scan; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1282,7 +1266,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -1296,11 +1280,7 @@ impl Storage { ) -> impl Future>> { const CMD: CommandKind = CommandKind::scan_lock; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1430,7 +1410,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ); async move { res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) @@ -1521,7 +1501,7 @@ impl Storage { // TODO: separate the txn and raw commands if needed in the future. fn sched_raw_command( &self, - group_name: &str, + metadata: TaskMetadata<'_>, pri: CommandPri, tag: CommandKind, future: T, @@ -1532,7 +1512,7 @@ impl Storage { SCHED_STAGE_COUNTER_VEC.get(tag).new.inc(); self.sched .get_sched_pool() - .spawn(group_name, pri, future) + .spawn(metadata, pri, future) .map_err(|_| Error::from(ErrorInner::SchedTooBusy)) } @@ -1606,11 +1586,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_get; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -1675,7 +1651,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -1689,12 +1665,7 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_batch_get_command; // all requests in a batch have the same region, epoch, term, replica_read let priority = gets[0].get_context().get_priority(); - let group_name = gets[0] - .get_context() - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(gets[0].get_context().get_resource_control_context()); let priority_tag = get_priority_tag(priority); let api_version = self.api_version; let busy_threshold = Duration::from_millis(gets[0].get_context().busy_threshold_ms as u64); @@ -1810,7 +1781,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -1823,11 +1794,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_batch_get; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let key_ranges = keys.iter().map(|k| (k.clone(), k.clone())).collect(); let resource_tag = self @@ -1910,7 +1877,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -1973,11 +1940,8 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2088,11 +2052,8 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2156,11 +2117,8 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2220,11 +2178,8 @@ impl Storage { let engine = self.engine.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2271,11 +2226,8 @@ impl Storage { let concurrency_manager = self.concurrency_manager.clone(); let deadline = Self::get_deadline(&ctx); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } @@ -2338,11 +2290,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_scan; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag(&ctx); let api_version = self.api_version; @@ -2453,7 +2401,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -2469,11 +2417,7 @@ impl Storage { ) -> impl Future>>> { const CMD: CommandKind = CommandKind::raw_batch_scan; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2612,7 +2556,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -2625,11 +2569,7 @@ impl Storage { ) -> impl Future>> { const CMD: CommandKind = CommandKind::raw_get_key_ttl; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -2694,7 +2634,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ) } @@ -2718,11 +2658,8 @@ impl Storage { } let sched = self.get_scheduler(); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { let key = F::encode_raw_key_owned(key, None); let cmd = RawCompareAndSwap::new(cf, key, previous_value, value, ttl, api_version, ctx); Self::sched_raw_atomic_command( @@ -2754,11 +2691,8 @@ impl Storage { let sched = self.get_scheduler(); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { let modifies = Self::raw_batch_put_requests_to_modifies(cf, pairs, ttls, None); let cmd = RawAtomicStore::new(cf, modifies, ctx); Self::sched_raw_atomic_command( @@ -2782,11 +2716,8 @@ impl Storage { let cf = Self::rawkv_cf(&cf, self.api_version)?; let sched = self.get_scheduler(); let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .to_owned(); - self.sched_raw_command(&group_name, priority, CMD, async move { + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + self.sched_raw_command(metadata, priority, CMD, async move { // Do NOT encode ts here as RawAtomicStore use key to gen lock let modifies = keys .into_iter() @@ -2809,11 +2740,7 @@ impl Storage { ) -> impl Future> { const CMD: CommandKind = CommandKind::raw_checksum; let priority = ctx.get_priority(); - let group_name = ctx - .get_resource_control_context() - .get_resource_group_name() - .as_bytes() - .to_owned(); + let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2888,7 +2815,7 @@ impl Storage { .in_resource_metering_tag(resource_tag), priority, thread_rng().next_u64(), - group_name, + metadata, ); async move { @@ -2903,7 +2830,7 @@ impl Storage { future: Fut, priority: CommandPri, task_id: u64, - group_meta: Vec, + metadata: TaskMetadata<'_>, ) -> impl Future> where Fut: Future> + Send + 'static, @@ -2916,7 +2843,7 @@ impl Storage { } Either::Right( self.read_pool - .spawn_handle(future, priority, task_id, group_meta) + .spawn_handle(future, priority, task_id, metadata) .map_err(|_| Error::from(ErrorInner::SchedTooBusy)) .and_then(|res| future::ready(res)), ) diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 5e484d385f2..5896d6562f1 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -717,10 +717,12 @@ impl Command { self.command_ext().get_ctx().get_priority() } + pub fn resource_control_ctx(&self) -> &ResourceControlContext { + self.command_ext().get_ctx().get_resource_control_context() + } + pub fn group_name(&self) -> String { - self.command_ext() - .get_ctx() - .get_resource_control_context() + self.resource_control_ctx() .get_resource_group_name() .to_owned() } diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 4036de7a8b2..19736304373 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -12,7 +12,7 @@ use kvproto::{kvrpcpb::CommandPri, pdpb::QueryKind}; use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; -use resource_control::{ControlledFuture, ResourceController}; +use resource_control::{ControlledFuture, ResourceController, TaskMetadata}; use tikv_util::{ sys::SysQuota, yatp_pool::{Full, FuturePool, PoolTicker, YatpPoolBuilder}, @@ -106,7 +106,7 @@ struct PriorityQueue { impl PriorityQueue { fn spawn( &self, - group_name: &str, + metadata: TaskMetadata<'_>, priority_level: CommandPri, f: impl futures::Future + Send + 'static, ) -> Result<(), Full> { @@ -117,15 +117,16 @@ impl PriorityQueue { }; // TODO: maybe use a better way to generate task_id let task_id = rand::random::(); + let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); - extras.set_metadata(group_name.as_bytes().to_owned()); + extras.set_metadata(metadata.to_vec()); self.worker_pool.spawn_with_extras( ControlledFuture::new( async move { f.await; }, self.resource_ctl.clone(), - group_name.as_bytes().to_owned(), + group_name, ), extras, ) @@ -206,7 +207,7 @@ impl SchedPool { pub fn spawn( &self, - group_name: &str, + metadata: TaskMetadata<'_>, priority_level: CommandPri, f: impl futures::Future + Send + 'static, ) -> Result<(), Full> { @@ -218,7 +219,7 @@ impl SchedPool { self.priority .as_ref() .unwrap() - .spawn(group_name, priority_level, f) + .spawn(metadata, priority_level, f) } else { fail_point!("single_queue_pool_task"); self.vanilla.spawn(priority_level, f) diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 06562130dc7..ad88d53532e 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -47,7 +47,7 @@ use kvproto::{ use parking_lot::{Mutex, MutexGuard, RwLockWriteGuard}; use pd_client::{Feature, FeatureGate}; use raftstore::store::TxnExt; -use resource_control::ResourceController; +use resource_control::{ResourceController, TaskMetadata}; use resource_metering::{FutureExt, ResourceTagFactory}; use smallvec::{smallvec, SmallVec}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData, WriteEvent}; @@ -380,7 +380,7 @@ impl TxnSchedulerInner { fn acquire_lock_on_wakeup( &self, cid: u64, - ) -> Result, (String, CommandPri, StorageError)> { + ) -> Result, (TaskMetadata<'_>, CommandPri, StorageError)> { let mut task_slot = self.get_task_slot(cid); let tctx = task_slot.get_mut(&cid).unwrap(); // Check deadline early during acquiring latches to avoid expired requests @@ -393,7 +393,11 @@ impl TxnSchedulerInner { // acquired count is one more than the `owned_count` recorded in the // lock, so we increase one to make `release` work. tctx.lock.owned_count += 1; - return Err((cmd.group_name(), cmd.priority(), e.into())); + return Err(( + TaskMetadata::from_ctx(cmd.resource_control_ctx()), + cmd.priority(), + e.into(), + )); } if self.latches.acquire(&mut tctx.lock, cid) { tctx.on_schedule(); @@ -569,47 +573,51 @@ impl TxnScheduler { let deadline = cmd.deadline(); let sched = self.clone(); self.get_sched_pool() - .spawn(&cmd.group_name(), cmd.priority(), async move { - match unsafe { - with_tls_engine(|engine: &mut E| engine.precheck_write_with_ctx(&ctx)) - } { - // Precheck failed, try to return err early. - Err(e) => { - let cb = sched.inner.try_own_and_take_cb(cid); - // The task is not processing or finished currently. It's safe - // to response early here. In the future, the task will be waked up - // and it will finished with DeadlineExceeded error. - // As the cb is taken here, it will not be executed anymore. - if let Some(cb) = cb { - let pr = ProcessResult::Failed { - err: StorageError::from(e), - }; - Self::early_response( - cid, - cb, - pr, - tag, - CommandStageKind::precheck_write_err, - ); + .spawn( + TaskMetadata::from_ctx(cmd.resource_control_ctx()), + cmd.priority(), + async move { + match unsafe { + with_tls_engine(|engine: &mut E| engine.precheck_write_with_ctx(&ctx)) + } { + // Precheck failed, try to return err early. + Err(e) => { + let cb = sched.inner.try_own_and_take_cb(cid); + // The task is not processing or finished currently. It's safe + // to response early here. In the future, the task will be waked up + // and it will finished with DeadlineExceeded error. + // As the cb is taken here, it will not be executed anymore. + if let Some(cb) = cb { + let pr = ProcessResult::Failed { + err: StorageError::from(e), + }; + Self::early_response( + cid, + cb, + pr, + tag, + CommandStageKind::precheck_write_err, + ); + } } - } - Ok(()) => { - SCHED_STAGE_COUNTER_VEC.get(tag).precheck_write_ok.inc(); - // Check deadline in background. - GLOBAL_TIMER_HANDLE - .delay(deadline.to_std_instant()) - .compat() - .await - .unwrap(); - let cb = sched.inner.try_own_and_take_cb(cid); - if let Some(cb) = cb { - cb.execute(ProcessResult::Failed { - err: StorageErrorInner::DeadlineExceeded.into(), - }) + Ok(()) => { + SCHED_STAGE_COUNTER_VEC.get(tag).precheck_write_ok.inc(); + // Check deadline in background. + GLOBAL_TIMER_HANDLE + .delay(deadline.to_std_instant()) + .compat() + .await + .unwrap(); + let cb = sched.inner.try_own_and_take_cb(cid); + if let Some(cb) = cb { + cb.execute(ProcessResult::Failed { + err: StorageErrorInner::DeadlineExceeded.into(), + }) + } } } - } - }) + }, + ) .unwrap(); } @@ -623,12 +631,12 @@ impl TxnScheduler { self.execute(task); } Ok(None) => {} - Err((group_name, pri, err)) => { + Err((metadata, pri, err)) => { // Spawn the finish task to the pool to avoid stack overflow // when many queuing tasks fail successively. let this = self.clone(); self.get_sched_pool() - .spawn(&group_name, pri, async move { + .spawn(metadata, pri, async move { this.finish_with_err(cid, err, None); }) .unwrap(); @@ -667,9 +675,10 @@ impl TxnScheduler { fn execute(&self, mut task: Task) { set_tls_tracker_token(task.tracker); let sched = self.clone(); + let metadata = TaskMetadata::from_ctx(task.cmd.resource_control_ctx()); self.get_sched_pool() - .spawn(&task.cmd.group_name(), task.cmd.priority(), async move { + .spawn(metadata, task.cmd.priority(), async move { fail_point!("scheduler_start_execute"); if sched.check_task_deadline_exceeded(&task, None) { return; @@ -799,7 +808,7 @@ impl TxnScheduler { async_apply_prewrite: bool, new_acquired_locks: Vec, tag: CommandKind, - group_name: &str, + metadata: TaskMetadata<'_>, sched_details: &SchedulerDetails, ) { // TODO: Does async apply prewrite worth a special metric here? @@ -863,7 +872,7 @@ impl TxnScheduler { assert!(pipelined || async_apply_prewrite); } - self.on_acquired_locks_finished(group_name, new_acquired_locks); + self.on_acquired_locks_finished(metadata, new_acquired_locks); if do_wake_up { let woken_up_resumable_lock_requests = tctx.woken_up_resumable_lock_requests; @@ -950,7 +959,7 @@ impl TxnScheduler { fn on_release_locks( &self, - group_name: &str, + metadata: &TaskMetadata<'_>, released_locks: ReleasedLocks, ) -> SVec> { // This function is always called when holding the latch of the involved keys. @@ -994,7 +1003,7 @@ impl TxnScheduler { if !legacy_wake_up_list.is_empty() || !delay_wake_up_futures.is_empty() { self.wake_up_legacy_pessimistic_locks( - group_name, + metadata.clone(), legacy_wake_up_list, delay_wake_up_futures, ); @@ -1005,7 +1014,7 @@ impl TxnScheduler { fn on_acquired_locks_finished( &self, - group_name: &str, + metadata: TaskMetadata<'_>, new_acquired_locks: Vec, ) { if new_acquired_locks.is_empty() || self.inner.lock_wait_queues.is_empty() { @@ -1021,7 +1030,7 @@ impl TxnScheduler { } else { let lock_wait_queues = self.inner.lock_wait_queues.clone(); self.get_sched_pool() - .spawn(group_name, CommandPri::High, async move { + .spawn(metadata, CommandPri::High, async move { lock_wait_queues.update_lock_wait(new_acquired_locks); }) .unwrap(); @@ -1030,16 +1039,16 @@ impl TxnScheduler { fn wake_up_legacy_pessimistic_locks( &self, - group_name: &str, + metadata: TaskMetadata<'_>, legacy_wake_up_list: impl IntoIterator, ReleasedLock)> + Send + 'static, delayed_wake_up_futures: impl IntoIterator + Send + 'static, ) { let self1 = self.clone(); - let group_name1 = group_name.to_owned(); + let metadata1 = metadata.deep_clone(); self.get_sched_pool() - .spawn(group_name, CommandPri::High, async move { + .spawn(metadata, CommandPri::High, async move { for (lock_info, released_lock) in legacy_wake_up_list { let cb = lock_info.key_cb.unwrap().into_inner(); let e = StorageError::from(Error::from(MvccError::from( @@ -1057,9 +1066,10 @@ impl TxnScheduler { for f in delayed_wake_up_futures { let self2 = self1.clone(); + let metadata2 = metadata1.clone(); self1 .get_sched_pool() - .spawn(&group_name1, CommandPri::High, async move { + .spawn(metadata2, CommandPri::High, async move { let res = f.await; if let Some(resumable_lock_wait_entry) = res { self2.schedule_awakened_pessimistic_locks( @@ -1191,7 +1201,7 @@ impl TxnScheduler { let write_bytes = task.cmd.write_bytes(); let tag = task.cmd.tag(); let cid = task.cid; - let group_name = task.cmd.group_name(); + let metadata = TaskMetadata::from_ctx(task.cmd.resource_control_ctx()); let tracker = task.tracker; let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); @@ -1342,7 +1352,7 @@ impl TxnScheduler { } let woken_up_resumable_entries = if !released_locks.is_empty() { - scheduler.on_release_locks(&group_name, released_locks) + scheduler.on_release_locks(&metadata, released_locks) } else { smallvec![] }; @@ -1363,7 +1373,7 @@ impl TxnScheduler { false, new_acquired_locks, tag, - &group_name, + metadata, sched_details, ); return; @@ -1398,7 +1408,7 @@ impl TxnScheduler { false, new_acquired_locks, tag, - &group_name, + metadata, sched_details, ); return; @@ -1593,7 +1603,7 @@ impl TxnScheduler { is_async_apply_prewrite, new_acquired_locks, tag, - &group_name, + metadata, sched_details, ); KV_COMMAND_KEYWRITE_HISTOGRAM_VEC @@ -2187,7 +2197,7 @@ mod tests { // cannot run within 500ms. scheduler .get_sched_pool() - .spawn("", CommandPri::Normal, async { + .spawn(TaskMetadata::default(), CommandPri::Normal, async { thread::sleep(Duration::from_millis(500)) }) .unwrap(); From 82d1c20ae1ea42970b88850f0e87af17dd5d5f60 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 15 Jun 2023 00:23:10 -0700 Subject: [PATCH 0741/1149] resource_control: condition compile failpoint test (#14944) close tikv/tikv#14943 condition compile failpoint test Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/resource_group.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index ec78635d56c..9c7d0e682f1 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -920,6 +920,7 @@ pub(crate) mod tests { assert_eq!(resource_ctl_write.resource_group(b"default").weight, 10); } + #[cfg(feature = "failpoints")] #[test] fn test_reset_resource_group_vt_overflow() { let resource_manager = ResourceGroupManager::default(); From 4510531b275886bdc41fe3ad1ba462e3126a3712 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 15 Jun 2023 16:53:09 +0800 Subject: [PATCH 0742/1149] server: increase the async duration max bucket (#14924) ref tikv/tikv#14860 Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/metrics.rs | 6 +++--- src/server/metrics.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 796b49f5b49..699d861ff9a 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -419,13 +419,13 @@ lazy_static! { register_histogram!( "tikv_raftstore_store_wf_commit_log_duration_seconds", "Bucketed histogram of proposals' commit and persist duration.", - exponential_buckets(0.00001, 2.0, 26).unwrap() + exponential_buckets(0.00001, 2.0, 32).unwrap() // 10us ~ 42949s. ).unwrap(); pub static ref STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds", "Bucketed histogram of proposals' commit but not persist duration", - exponential_buckets(0.00001, 2.0, 26).unwrap() + exponential_buckets(0.00001, 2.0, 32).unwrap() // 10us ~ 42949s. ).unwrap(); pub static ref PEER_PROPOSAL_COUNTER_VEC: IntCounterVec = @@ -457,7 +457,7 @@ lazy_static! { register_histogram!( "tikv_raftstore_commit_log_duration_seconds", "Bucketed histogram of peer commits logs duration.", - exponential_buckets(0.00001, 2.0, 26).unwrap() + exponential_buckets(0.00001, 2.0, 32).unwrap() // 10us ~ 42949s. ).unwrap(); pub static ref STORE_APPLY_LOG_HISTOGRAM: Histogram = diff --git a/src/server/metrics.rs b/src/server/metrics.rs index c287d18680d..25ff3237c6f 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -528,7 +528,7 @@ lazy_static! { "tikv_storage_engine_async_request_duration_seconds", "Bucketed histogram of processing successful asynchronous requests.", &["type"], - exponential_buckets(0.00001, 2.0, 26).unwrap() + exponential_buckets(0.00001, 2.0, 32).unwrap() // 10us ~ 42949s. ) .unwrap(); } From e2afd2814e1a1a318776fcaee1de6ba14a46622d Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Fri, 16 Jun 2023 11:59:09 +0800 Subject: [PATCH 0743/1149] Support capture group in regexp_replace (#14938) close tikv/tikv#14937 Signed-off-by: gengliqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tidb_query_expr/src/impl_regexp.rs | 437 +++++++++++++++--- 1 file changed, 367 insertions(+), 70 deletions(-) diff --git a/components/tidb_query_expr/src/impl_regexp.rs b/components/tidb_query_expr/src/impl_regexp.rs index 2e5830740ee..a8112d41945 100644 --- a/components/tidb_query_expr/src/impl_regexp.rs +++ b/components/tidb_query_expr/src/impl_regexp.rs @@ -272,28 +272,96 @@ pub fn regexp_instr( Ok(Some(0)) } +#[derive(Clone)] +enum ReplaceInstruction { + SubstitutionNum(usize), + Literal(Vec), +} + +pub struct ReplaceMetaData { + regex: Option, + instructions: Option>, +} + +fn init_regexp_replace_data(expr: &mut Expr) -> Result { + let mut meta = ReplaceMetaData { + regex: init_regexp_data::(expr)?, + instructions: None, + }; + + let children = expr.mut_children(); + if children.len() >= 3 { + match children[2].get_tp() { + ExprType::Bytes | ExprType::String => { + meta.instructions = Some(init_replace_instructions(children[2].get_val())); + } + _ => {} + }; + } + + Ok(meta) +} + +fn init_replace_instructions(replace_expr: &[u8]) -> Vec { + let mut instructions = Vec::new(); + let len = replace_expr.len(); + let mut literal = Vec::new(); + let mut i = 0; + while i < len { + if replace_expr[i] == b'\\' { + if i + 1 >= len { + // This slash is in the end. Ignore it and break the loop. + break; + } + if replace_expr[i + 1].is_ascii_digit() { + if !literal.is_empty() { + instructions.push(ReplaceInstruction::Literal(literal)); + literal = Vec::new(); + } + instructions.push(ReplaceInstruction::SubstitutionNum( + (replace_expr[i + 1] - b'0').into(), + )); + } else { + literal.push(replace_expr[i + 1]); + } + i += 2; + } else { + literal.push(replace_expr[i]); + i += 1; + } + } + if !literal.is_empty() { + instructions.push(ReplaceInstruction::Literal(literal)); + } + + instructions +} + /// Currently, TiDB only supports regular expressions for utf-8 strings. /// See https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-replace. -#[rpn_fn(nullable, raw_varg, min_args = 3, max_args = 6, capture = [metadata], metadata_mapper = init_regexp_data::)] +#[rpn_fn(nullable, raw_varg, min_args = 3, max_args = 6, capture = [metadata], metadata_mapper = init_regexp_replace_data::)] #[inline] pub fn regexp_replace( - metadata: &Option, + metadata: &ReplaceMetaData, args: &[ScalarValueRef<'_>], ) -> Result> { let expr = match args[0].as_bytes() { Some(e) => std::str::from_utf8(e)?, None => return Ok(None), }; - let regex = match metadata { + let regex = match metadata.regex.as_ref() { Some(r) => Cow::Borrowed(r), None => match build_regexp_from_args::(args, REPLACE_MATCH_IDX)? { Some(r) => Cow::Owned(r), None => return Ok(None), }, }; - let replace_expr = match args[2].as_bytes() { - Some(e) => std::str::from_utf8(e)?, - None => return Ok(None), + let replace_inst = match metadata.instructions.as_ref() { + Some(i) => Cow::Borrowed(i), + None => match args[2].as_bytes() { + Some(e) => Cow::Owned(init_replace_instructions(e)), + None => return Ok(None), + }, }; let (mut before_trimmed, mut trimmed) = ("", expr); @@ -328,36 +396,56 @@ pub fn regexp_replace( } }; - if occurrence == 0 { - let rep = regex.replace_all(trimmed, replace_expr); - let mut result = String::with_capacity(before_trimmed.len() + rep.len()); - result.push_str(before_trimmed); - result.push_str(&rep); - - Ok(Some(result.into_bytes())) - } else { - if let Some(m) = regex.find_iter(trimmed).nth((occurrence - 1) as usize) { - let mut result = String::with_capacity( - before_trimmed.len() - + trimmed[..m.start()].len() - + replace_expr.len() - + trimmed[m.end()..].len(), - ); - result.push_str(before_trimmed); - result.push_str(&trimmed[..m.start()]); - result.push_str(replace_expr); - result.push_str(&trimmed[m.end()..]); - - return Ok(Some(result.into_bytes())); + let replace_work = |capture: ®ex::Captures, res: &mut Vec| -> Result<()> { + for inst in replace_inst.as_ref() { + match inst { + ReplaceInstruction::SubstitutionNum(num) => { + let m = capture.get(*num).ok_or_else(|| { + Error::regexp_error(format!( + "Substitution number is out of range: {}", + *num + )) + })?; + res.extend(m.as_str().as_bytes()); + } + ReplaceInstruction::Literal(lit) => { + res.extend(lit); + } + } } + Ok(()) + }; - Ok(Some(expr.as_bytes().to_vec())) + let mut result = Vec::new(); + result.extend(before_trimmed.as_bytes()); + let mut last_match = 0; + if occurrence == 0 { + for capture in regex.captures_iter(trimmed) { + // unwrap on 0 is OK because captures only reports matches. + let m = capture.get(0).unwrap(); + result.extend(trimmed[last_match..m.start()].as_bytes()); + last_match = m.end(); + replace_work(&capture, &mut result)?; + } + } else if let Some(capture) = regex.captures_iter(trimmed).nth((occurrence - 1) as usize) { + // unwrap on 0 is OK because captures only reports matches. + let m = capture.get(0).unwrap(); + result.extend(trimmed[0..m.start()].as_bytes()); + last_match = m.end(); + replace_work(&capture, &mut result)?; } + result.extend(trimmed[last_match..].as_bytes()); + + Ok(Some(result)) } #[cfg(test)] mod tests { - use tidb_query_datatype::{codec::batch::LazyBatchColumnVec, expr::EvalContext, FieldTypeTp}; + use tidb_query_datatype::{ + codec::batch::{LazyBatchColumn, LazyBatchColumnVec}, + expr::EvalContext, + EvalType, FieldTypeTp, + }; use tipb::ScalarFuncSig; use tipb_helper::ExprDefBuilder; @@ -1232,51 +1320,260 @@ mod tests { Some("的的的的的的的的"), false, ), + ( + r"abc", + r"\d*", + r"d", + None, + None, + None, + Some(r"dadbdcd"), + false, + ), + // Test capture group + ( + r"seafood fool", + r"foo(.?)", + r"123", + Some(3), + None, + None, + Some(r"sea123 123"), + false, + ), + ( + r"seafood fool", + r"foo(.?)", + r"123", + Some(5), + None, + None, + Some(r"seafood 123"), + false, + ), + ( + r"seafood fool", + r"foo(.?)", + r"z\12", + Some(3), + None, + None, + Some(r"seazd2 zl2"), + false, + ), + ( + r"seafood fool", + r"foo(.?)", + r"z\12", + Some(5), + None, + None, + Some(r"seafood zl2"), + false, + ), + ( + r"seafood fool", + r"foo(.?)", + r"z\12", + Some(3), + Some(0), + None, + Some(r"seazd2 zl2"), + false, + ), + ( + r"seafood foo", + r"foo(.?)", + r"z\12", + Some(3), + Some(2), + None, + Some(r"seafood z2"), + false, + ), + ( + r"stackoverflow", + r"(.{5})(.*)", + r"\\+\2+\1+\2+\1\", + None, + None, + None, + Some(r"\+overflow+stack+overflow+stack"), + false, + ), + ( + r"fooabcdefghij fooABCDEFGHIJ", + r"foo(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)", + r"\\\9\\\8-\7\\\6-\5\\\4-\3\\\2-\1\\", + Some(1), + Some(0), + None, + Some(r"\i\h-g\f-e\d-c\b-a\ \I\H-G\F-E\D-C\B-A\"), + false, + ), + ( + r"fool food foo", + r"foo(.?)", + r"\0+\1", + None, + None, + None, + Some(r"fool+l food+d foo+"), + false, + ), + // \2 is out of capture group's range. + ( + r"fool food foo", + r"foo(.?)", + r"\0+\2", + None, + None, + None, + None, + true, + ), + ( + r"https://go.mail/folder-1/online/ru-en/#lingvo/#1О 50000&price_ashka/rav4/page=/check.xml", + r"^https?://(?:www\\.)?([^/]+)/.*$", + r"a\12\13", + None, + None, + None, + Some(r"ago.mail2go.mail3"), + false, + ), + ( + r"http://saint-peters-total=меньше 1000-rublyayusche/catalogue/kolasuryat-v-2-kadyirovka-personal/serial_id=0&input_state/apartments/mokrotochki.net/upravda.ru/yandex.ru/GameMain.aspx?mult]/on/orders/50195&text=мыс и орелка в Балаш смотреть онлайн бесплатно в хорошем камбалакс&lr=20030393833539353862643188&op_promo=C-Teaser_id=06d162.html", + r"^https?://(?:www\\.)?([^/]+)/.*$", + r"aaa\1233", + None, + None, + None, + Some(r"aaasaint-peters-total=меньше 1000-rublyayusche233"), + false, + ), ]; for (expr, pattern, replace, pos, occur, match_type, expected, error) in cases { - let mut ctx = EvalContext::default(); - - let mut builder = - ExprDefBuilder::scalar_func(ScalarFuncSig::RegexpReplaceSig, FieldTypeTp::String); - builder = builder - .push_child(ExprDefBuilder::constant_bytes(expr.as_bytes().to_vec())) - .push_child(ExprDefBuilder::constant_bytes(pattern.as_bytes().to_vec())) - .push_child(ExprDefBuilder::constant_bytes(replace.as_bytes().to_vec())); - if let Some(p) = pos { - builder = builder.push_child(ExprDefBuilder::constant_int(p)); - } - if let Some(o) = occur { - builder = builder.push_child(ExprDefBuilder::constant_int(o)); - } - if let Some(m) = match_type { - builder = builder.push_child(ExprDefBuilder::constant_bytes(m.as_bytes().to_vec())); - } - - let node = builder.build(); - let exp = RpnExpressionBuilder::build_from_expr_tree(node, &mut ctx, 1).unwrap(); - - let schema = &[]; - let mut columns = LazyBatchColumnVec::empty(); - - let val = exp.eval(&mut ctx, schema, &mut columns, &[], 1); - - match val { - Ok(val) => { - assert!(val.is_vector()); - let v = val.vector_value().unwrap().as_ref().to_bytes_vec(); - assert_eq!(v.len(), 1); - assert_eq!( - v[0], - expected.map(|e| e.as_bytes().to_vec()), - "{:?} {:?} {:?}", - expr, - pattern, - replace, - ); + for i in 0..2 { + let use_column_ref = i == 1; + + let mut ctx = EvalContext::default(); + + let mut builder = ExprDefBuilder::scalar_func( + ScalarFuncSig::RegexpReplaceSig, + FieldTypeTp::String, + ); + + let mut schema = Vec::new(); + let mut columns = LazyBatchColumnVec::empty(); + if use_column_ref { + schema.extend_from_slice(&[ + FieldTypeTp::String.into(), + FieldTypeTp::String.into(), + FieldTypeTp::String.into(), + ]); + builder = builder + .push_child(ExprDefBuilder::column_ref(0, FieldTypeTp::String)) + .push_child(ExprDefBuilder::column_ref(1, FieldTypeTp::String)) + .push_child(ExprDefBuilder::column_ref(2, FieldTypeTp::String)); + let mut col_vec = Vec::new(); + let mut col = LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Bytes); + col.mut_decoded().push_bytes(Some(expr.as_bytes().to_vec())); + col_vec.push(col); + + let mut col = LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Bytes); + col.mut_decoded() + .push_bytes(Some(pattern.as_bytes().to_vec())); + col_vec.push(col); + + let mut col = LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Bytes); + col.mut_decoded() + .push_bytes(Some(replace.as_bytes().to_vec())); + col_vec.push(col); + + let mut count = 0; + if let Some(p) = pos { + count += 1; + schema.push(FieldTypeTp::Long.into()); + let mut col = + LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Int); + col.mut_decoded().push_int(Some(p)); + col_vec.push(col); + + builder = + builder.push_child(ExprDefBuilder::column_ref(3, FieldTypeTp::Long)); + } + if let Some(o) = occur { + assert_eq!(count, 1); + count += 1; + schema.push(FieldTypeTp::Long.into()); + let mut col = + LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Int); + col.mut_decoded().push_int(Some(o)); + col_vec.push(col); + + builder = + builder.push_child(ExprDefBuilder::column_ref(4, FieldTypeTp::Long)); + } + if let Some(m) = match_type { + assert_eq!(count, 2); + schema.push(FieldTypeTp::String.into()); + let mut col = + LazyBatchColumn::decoded_with_capacity_and_tp(1, EvalType::Bytes); + col.mut_decoded().push_bytes(Some(m.as_bytes().to_vec())); + col_vec.push(col); + + builder = + builder.push_child(ExprDefBuilder::column_ref(5, FieldTypeTp::String)); + } + + columns = LazyBatchColumnVec::from(col_vec); + } else { + builder = builder + .push_child(ExprDefBuilder::constant_bytes(expr.as_bytes().to_vec())) + .push_child(ExprDefBuilder::constant_bytes(pattern.as_bytes().to_vec())) + .push_child(ExprDefBuilder::constant_bytes(replace.as_bytes().to_vec())); + let mut count = 0; + if let Some(p) = pos { + count += 1; + builder = builder.push_child(ExprDefBuilder::constant_int(p)); + } + if let Some(o) = occur { + assert_eq!(count, 1); + count += 1; + builder = builder.push_child(ExprDefBuilder::constant_int(o)); + } + if let Some(m) = match_type { + assert_eq!(count, 2); + builder = builder + .push_child(ExprDefBuilder::constant_bytes(m.as_bytes().to_vec())); + } } - Err(e) => { - assert!(error, "val has error {:?}", e); + + let node = builder.build(); + let exp = RpnExpressionBuilder::build_from_expr_tree(node, &mut ctx, schema.len()) + .unwrap(); + + let val = exp.eval(&mut ctx, &schema, &mut columns, &[0], 1); + + match val { + Ok(val) => { + assert!(val.is_vector()); + let v = val.vector_value().unwrap().as_ref().to_bytes_vec(); + assert_eq!(v.len(), 1); + assert_eq!( + v[0], + expected.map(|e| e.as_bytes().to_vec()), + "{:?} {:?} {:?}", + expr, + pattern, + replace, + ); + } + Err(e) => { + assert!(error, "val has error {:?}", e); + } } } } From 36e6dcc31bc5c84ce1893a84683ede59439f043f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 16 Jun 2023 14:39:10 +0800 Subject: [PATCH 0744/1149] log-backup: split failpoints and integration tests (#14959) close tikv/tikv#14958 Now, the `backup-stream` test case should be run by: FAIL_POINT=1 ./scripts/test --package backup-stream --test failpoints -- all ./scripts/test --package backup-stream --test integration -- all Signed-off-by: hillium Co-authored-by: Xinye Tao --- components/backup-stream/Cargo.toml | 8 +- .../backup-stream/tests/failpoints/mod.rs | 251 ++++++ .../backup-stream/tests/integration/mod.rs | 435 +++++++++++ .../backup-stream/tests/{mod.rs => suite.rs} | 734 +----------------- 4 files changed, 728 insertions(+), 700 deletions(-) create mode 100644 components/backup-stream/tests/failpoints/mod.rs create mode 100644 components/backup-stream/tests/integration/mod.rs rename components/backup-stream/tests/{mod.rs => suite.rs} (50%) diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 4869fc818d2..0cb3814602c 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -15,7 +15,13 @@ metastore-etcd = ["tonic", "etcd-client"] [[test]] name = "integration" -path = "tests/mod.rs" +path = "tests/integration/mod.rs" +test = true +harness = true + +[[test]] +name = "failpoints" +path = "tests/failpoints/mod.rs" required-features = ["failpoints"] test = true harness = true diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs new file mode 100644 index 00000000000..c4b28f4686f --- /dev/null +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -0,0 +1,251 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(custom_test_frameworks)] +#![test_runner(test_util::run_failpoint_tests)] + +#[path = "../suite.rs"] +mod suite; +pub use suite::*; + +mod all { + + use std::time::Duration; + + use backup_stream::{ + metadata::{ + keys::MetaKey, + store::{Keys, MetaStore}, + }, + GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, + }; + use futures::executor::block_on; + use tikv_util::defer; + + use super::{ + make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, + }; + + #[test] + fn basic() { + let mut suite = SuiteBuilder::new_named("basic").build(); + fail::cfg("try_start_observe", "1*return").unwrap(); + + run_async_test(async { + // write data before the task starting, for testing incremental scanning. + let round1 = suite.write_records(0, 128, 1).await; + suite.must_register_task(1, "test_basic"); + suite.sync(); + let round2 = suite.write_records(256, 128, 1).await; + suite.force_flush_files("test_basic"); + suite.wait_for_flush(); + suite + .check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ) + .await; + }); + suite.cluster.shutdown(); + } + #[test] + fn frequent_initial_scan() { + let mut suite = SuiteBuilder::new_named("frequent_initial_scan") + .cfg(|c| c.num_threads = 1) + .build(); + let keys = (1..1024).map(|i| make_record_key(1, i)).collect::>(); + let start_ts = suite.tso(); + suite.must_kv_prewrite( + 1, + keys.clone() + .into_iter() + .map(|k| mutation(k, b"hello, world".to_vec())) + .collect(), + make_record_key(1, 886), + start_ts, + ); + fail::cfg("scan_after_get_snapshot", "pause").unwrap(); + suite.must_register_task(1, "frequent_initial_scan"); + let commit_ts = suite.tso(); + suite.commit_keys(keys, start_ts, commit_ts); + suite.run(|| { + Task::ModifyObserve(backup_stream::ObserveOp::Stop { + region: suite.cluster.get_region(&make_record_key(1, 886)), + }) + }); + suite.run(|| { + Task::ModifyObserve(backup_stream::ObserveOp::Start { + region: suite.cluster.get_region(&make_record_key(1, 886)), + }) + }); + fail::cfg("scan_after_get_snapshot", "off").unwrap(); + suite.force_flush_files("frequent_initial_scan"); + suite.wait_for_flush(); + std::thread::sleep(Duration::from_secs(1)); + let c = suite.global_checkpoint(); + assert!(c > commit_ts.into_inner(), "{} vs {}", c, commit_ts); + } + #[test] + fn region_failure() { + defer! {{ + fail::remove("try_start_observe"); + }} + let mut suite = SuiteBuilder::new_named("region_failure").build(); + let keys = run_async_test(suite.write_records(0, 128, 1)); + fail::cfg("try_start_observe", "1*return").unwrap(); + suite.must_register_task(1, "region_failure"); + suite.must_shuffle_leader(1); + let keys2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("region_failure"); + suite.wait_for_flush(); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + keys.union(&keys2).map(|s| s.as_slice()), + )); + } + #[test] + fn initial_scan_failure() { + defer! {{ + fail::remove("scan_and_async_send"); + }} + + let mut suite = SuiteBuilder::new_named("initial_scan_failure") + .nodes(1) + .build(); + let keys = run_async_test(suite.write_records(0, 128, 1)); + fail::cfg( + "scan_and_async_send", + "1*return(dive into the temporary dream, where the SLA never bothers)", + ) + .unwrap(); + suite.must_register_task(1, "initial_scan_failure"); + let keys2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("initial_scan_failure"); + suite.wait_for_flush(); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + keys.union(&keys2).map(|s| s.as_slice()), + )); + } + #[test] + fn failed_during_refresh_region() { + defer! { + fail::remove("get_last_checkpoint_of") + } + + let mut suite = SuiteBuilder::new_named("fail_to_refresh_region") + .nodes(1) + .build(); + + suite.must_register_task(1, "fail_to_refresh_region"); + let keys = run_async_test(suite.write_records(0, 128, 1)); + fail::cfg( + "get_last_checkpoint_of", + "1*return(the stream handler wants to become a batch processor, and the batch processor wants to be a stream handler.)", + ).unwrap(); + + suite.must_split(b"SOLE"); + let keys2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("fail_to_refresh_region"); + suite.wait_for_flush(); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + keys.union(&keys2).map(|s| s.as_slice()), + )); + let leader = suite.cluster.leader_of_region(1).unwrap().store_id; + let (tx, rx) = std::sync::mpsc::channel(); + suite.endpoints[&leader] + .scheduler() + .schedule(Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( + RegionSet::Universal, + Box::new(move |rs| { + let _ = tx.send(rs); + }), + ))) + .unwrap(); + + let regions = rx.recv_timeout(Duration::from_secs(10)).unwrap(); + assert!( + regions.iter().all(|item| { + matches!(item, GetCheckpointResult::Ok { checkpoint, .. } if checkpoint.into_inner() > 500) + }), + "{:?}", + regions + ); + } + #[test] + fn test_retry_abort() { + let mut suite = SuiteBuilder::new_named("retry_abort").nodes(1).build(); + defer! { + fail::list().into_iter().for_each(|(name, _)| fail::remove(name)) + }; + + suite.must_register_task(1, "retry_abort"); + fail::cfg("subscribe_mgr_retry_start_observe_delay", "return(10)").unwrap(); + fail::cfg("try_start_observe", "return()").unwrap(); + + suite.must_split(&make_split_key_at_record(1, 42)); + std::thread::sleep(Duration::from_secs(2)); + + let error = run_async_test(suite.get_meta_cli().get_last_error("retry_abort", 1)).unwrap(); + let error = error.expect("no error uploaded"); + error + .get_error_message() + .find("retry") + .expect("error doesn't contain retry"); + fail::cfg("try_start_observe", "10*return()").unwrap(); + // Resume the task manually... + run_async_test(async { + suite + .meta_store + .delete(Keys::Key(MetaKey::pause_of("retry_abort"))) + .await?; + suite + .meta_store + .delete(Keys::Prefix(MetaKey::last_errors_of("retry_abort"))) + .await?; + backup_stream::errors::Result::Ok(()) + }) + .unwrap(); + + suite.sync(); + suite.wait_with(move |r| block_on(r.get_task_info("retry_abort")).is_ok()); + let items = run_async_test(suite.write_records(0, 128, 1)); + suite.force_flush_files("retry_abort"); + suite.wait_for_flush(); + run_async_test( + suite.check_for_write_records( + suite.flushed_files.path(), + items.iter().map(Vec::as_slice), + ), + ); + } + #[test] + fn failure_and_split() { + let mut suite = SuiteBuilder::new_named("failure_and_split") + .nodes(1) + .build(); + fail::cfg("try_start_observe0", "pause").unwrap(); + + // write data before the task starting, for testing incremental scanning. + let round1 = run_async_test(suite.write_records(0, 128, 1)); + suite.must_register_task(1, "failure_and_split"); + suite.sync(); + + suite.must_split(&make_split_key_at_record(1, 42)); + suite.sync(); + std::thread::sleep(Duration::from_millis(200)); + fail::cfg("try_start_observe", "2*return").unwrap(); + fail::cfg("try_start_observe0", "off").unwrap(); + + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("failure_and_split"); + suite.wait_for_flush(); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + )); + let cp = suite.global_checkpoint(); + assert!(cp > 512, "it is {}", cp); + suite.cluster.shutdown(); + } +} diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs new file mode 100644 index 00000000000..fc92a751825 --- /dev/null +++ b/components/backup-stream/tests/integration/mod.rs @@ -0,0 +1,435 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +#![feature(custom_test_frameworks)] +#![test_runner(test_util::run_tests)] + +#[path = "../suite.rs"] +mod suite; + +mod all { + use std::time::{Duration, Instant}; + + use backup_stream::{ + errors::Error, router::TaskSelector, GetCheckpointResult, RegionCheckpointOperation, + RegionSet, Task, + }; + use futures::{Stream, StreamExt}; + use pd_client::PdClient; + use test_raftstore::IsolationFilterFactory; + use tikv_util::{box_err, defer, info, HandyRwLock}; + use tokio::time::timeout; + use txn_types::{Key, TimeStamp}; + + use super::suite::{ + make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, + }; + + #[test] + fn with_split() { + let mut suite = SuiteBuilder::new_named("with_split").build(); + run_async_test(async { + let round1 = suite.write_records(0, 128, 1).await; + suite.must_split(&make_split_key_at_record(1, 42)); + suite.must_register_task(1, "test_with_split"); + let round2 = suite.write_records(256, 128, 1).await; + suite.force_flush_files("test_with_split"); + suite.wait_for_flush(); + suite + .check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ) + .await; + }); + suite.cluster.shutdown(); + } + + /// This test tests whether we can handle some weird transactions and their + /// race with initial scanning. + /// Generally, those transactions: + /// - Has N mutations, which's values are all short enough to be inlined in + /// the `Write` CF. (N > 1024) + /// - Commit the mutation set M first. (for all m in M: Nth-Of-Key(m) > + /// 1024) + /// ```text + /// |--...-----^------*---*-*--*-*-*-> (The line is the Key Space - from "" to inf) + /// +The 1024th key (* = committed mutation) + /// ``` + /// - Before committing remaining mutations, PiTR triggered initial + /// scanning. + /// - The remaining mutations are committed before the instant when initial + /// scanning get the snapshot. + #[test] + fn with_split_txn() { + let mut suite = SuiteBuilder::new_named("split_txn").build(); + run_async_test(async { + let start_ts = suite.cluster.pd_client.get_tso().await.unwrap(); + let keys = (1..1960).map(|i| make_record_key(1, i)).collect::>(); + suite.must_kv_prewrite( + 1, + keys.clone() + .into_iter() + .map(|k| mutation(k, b"hello, world".to_vec())) + .collect(), + make_record_key(1, 1913), + start_ts, + ); + let commit_ts = suite.cluster.pd_client.get_tso().await.unwrap(); + suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); + suite.must_register_task(1, "test_split_txn"); + suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); + suite.force_flush_files("test_split_txn"); + suite.wait_for_flush(); + let keys_encoded = keys + .iter() + .map(|v| { + Key::from_raw(v.as_slice()) + .append_ts(commit_ts) + .into_encoded() + }) + .collect::>(); + suite + .check_for_write_records( + suite.flushed_files.path(), + keys_encoded.iter().map(Vec::as_slice), + ) + .await; + }); + suite.cluster.shutdown(); + } + + #[test] + /// This case tests whether the backup can continue when the leader failes. + fn leader_down() { + let mut suite = SuiteBuilder::new_named("leader_down").build(); + suite.must_register_task(1, "test_leader_down"); + suite.sync(); + let round1 = run_async_test(suite.write_records(0, 128, 1)); + let leader = suite.cluster.leader_of_region(1).unwrap().get_store_id(); + suite.cluster.stop_node(leader); + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite.force_flush_files("test_leader_down"); + suite.wait_for_flush(); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + )); + suite.cluster.shutdown(); + } + + #[test] + /// This case tests whether the checkpoint ts (next backup ts) can be + /// advanced correctly when async commit is enabled. + fn async_commit() { + let mut suite = SuiteBuilder::new_named("async_commit").nodes(3).build(); + run_async_test(async { + suite.must_register_task(1, "test_async_commit"); + suite.sync(); + suite.write_records(0, 128, 1).await; + let ts = suite.just_async_commit_prewrite(256, 1); + suite.write_records(258, 128, 1).await; + suite.force_flush_files("test_async_commit"); + std::thread::sleep(Duration::from_secs(4)); + assert_eq!(suite.global_checkpoint(), 256); + suite.just_commit_a_key(make_record_key(1, 256), TimeStamp::new(256), ts); + suite.force_flush_files("test_async_commit"); + suite.wait_for_flush(); + let cp = suite.global_checkpoint(); + assert!(cp > 256, "it is {:?}", cp); + }); + suite.cluster.shutdown(); + } + + #[test] + fn fatal_error() { + let mut suite = SuiteBuilder::new_named("fatal_error").nodes(3).build(); + suite.must_register_task(1, "test_fatal_error"); + suite.sync(); + run_async_test(suite.write_records(0, 1, 1)); + suite.force_flush_files("test_fatal_error"); + suite.wait_for_flush(); + run_async_test(suite.advance_global_checkpoint("test_fatal_error")).unwrap(); + let (victim, endpoint) = suite.endpoints.iter().next().unwrap(); + endpoint + .scheduler() + .schedule(Task::FatalError( + TaskSelector::ByName("test_fatal_error".to_owned()), + Box::new(Error::Other(box_err!("everything is alright"))), + )) + .unwrap(); + suite.sync(); + let err = run_async_test( + suite + .get_meta_cli() + .get_last_error("test_fatal_error", *victim), + ) + .unwrap() + .unwrap(); + info!("err"; "err" => ?err); + assert_eq!(err.error_code, error_code::backup_stream::OTHER.code); + assert!(err.error_message.contains("everything is alright")); + assert_eq!(err.store_id, *victim); + let paused = + run_async_test(suite.get_meta_cli().check_task_paused("test_fatal_error")).unwrap(); + assert!(paused); + let safepoints = suite.cluster.pd_client.gc_safepoints.rl(); + let checkpoint = suite.global_checkpoint(); + + assert!( + safepoints.iter().any(|sp| { + sp.serivce.contains(&format!("{}", victim)) + && sp.ttl >= Duration::from_secs(60 * 60 * 24) + && sp.safepoint.into_inner() == checkpoint - 1 + }), + "{:?}", + safepoints + ); + } + + #[test] + fn region_checkpoint_info() { + let mut suite = SuiteBuilder::new_named("checkpoint_info").nodes(1).build(); + suite.must_register_task(1, "checkpoint_info"); + suite.must_split(&make_split_key_at_record(1, 42)); + run_async_test(suite.write_records(0, 128, 1)); + suite.force_flush_files("checkpoint_info"); + suite.wait_for_flush(); + std::thread::sleep(Duration::from_secs(1)); + let (tx, rx) = std::sync::mpsc::channel(); + suite.run(|| { + let tx = tx.clone(); + Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( + RegionSet::Universal, + Box::new(move |rs| { + tx.send(rs).unwrap(); + }), + )) + }); + let checkpoints = rx.recv().unwrap(); + assert!(!checkpoints.is_empty(), "{:?}", checkpoints); + assert!( + checkpoints + .iter() + .all(|cp| matches!(cp, GetCheckpointResult::Ok { checkpoint, .. } if checkpoint.into_inner() > 256)), + "{:?}", + checkpoints + ); + } + + #[test] + fn upload_checkpoint_exits_in_time() { + defer! {{ + std::env::remove_var("LOG_BACKUP_UGC_SLEEP_AND_RETURN"); + }} + let suite = SuiteBuilder::new_named("upload_checkpoint_exits_in_time") + .nodes(1) + .build(); + std::env::set_var("LOG_BACKUP_UGC_SLEEP_AND_RETURN", "meow"); + let (_, victim) = suite.endpoints.iter().next().unwrap(); + let sched = victim.scheduler(); + sched + .schedule(Task::UpdateGlobalCheckpoint("greenwoods".to_owned())) + .unwrap(); + let start = Instant::now(); + let (tx, rx) = tokio::sync::oneshot::channel(); + sched + .schedule(Task::Sync( + Box::new(move || { + tx.send(Instant::now()).unwrap(); + }), + Box::new(|_| true), + )) + .unwrap(); + let end = run_async_test(rx).unwrap(); + assert!( + end - start < Duration::from_secs(10), + "take = {:?}", + end - start + ); + } + + /// This test case tests whether we correctly handle the pessimistic locks. + #[test] + fn pessimistic_lock() { + let mut suite = SuiteBuilder::new_named("pessimistic_lock").nodes(3).build(); + suite.must_kv_pessimistic_lock( + 1, + vec![make_record_key(1, 42)], + suite.tso(), + make_record_key(1, 42), + ); + suite.must_register_task(1, "pessimistic_lock"); + suite.must_kv_pessimistic_lock( + 1, + vec![make_record_key(1, 43)], + suite.tso(), + make_record_key(1, 43), + ); + let expected_tso = suite.tso().into_inner(); + suite.force_flush_files("pessimistic_lock"); + suite.wait_for_flush(); + std::thread::sleep(Duration::from_secs(1)); + run_async_test(suite.advance_global_checkpoint("pessimistic_lock")).unwrap(); + let checkpoint = run_async_test( + suite + .get_meta_cli() + .global_progress_of_task("pessimistic_lock"), + ) + .unwrap(); + // The checkpoint should be advanced: because PiTR is "Read" operation, + // which shouldn't be blocked by pessimistic locks. + assert!( + checkpoint > expected_tso, + "expected = {}; checkpoint = {}", + expected_tso, + checkpoint + ); + } + + async fn collect_all_current( + mut s: impl Stream + Unpin, + max_gap: Duration, + ) -> Vec { + let mut r = vec![]; + while let Ok(Some(x)) = timeout(max_gap, s.next()).await { + r.push(x); + } + r + } + + async fn collect_current(mut s: impl Stream + Unpin, goal: usize) -> Vec { + let mut r = vec![]; + while let Ok(Some(x)) = timeout(Duration::from_secs(10), s.next()).await { + r.push(x); + if r.len() >= goal { + return r; + } + } + r + } + + #[test] + fn subscribe_flushing() { + let mut suite = SuiteBuilder::new_named("sub_flush").build(); + let stream = suite.flush_stream(true); + for i in 1..10 { + let split_key = make_split_key_at_record(1, i * 20); + suite.must_split(&split_key); + suite.must_shuffle_leader(suite.cluster.get_region_id(&split_key)); + } + + let round1 = run_async_test(suite.write_records(0, 128, 1)); + suite.must_register_task(1, "sub_flush"); + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite.sync(); + suite.force_flush_files("sub_flush"); + + let mut items = run_async_test(async { + collect_current( + stream.flat_map(|(_, r)| futures::stream::iter(r.events.into_iter())), + 10, + ) + .await + }); + + items.sort_by(|x, y| x.start_key.cmp(&y.start_key)); + + println!("{:?}", items); + assert_eq!(items.len(), 10); + + assert_eq!(items.first().unwrap().start_key, Vec::::default()); + for w in items.windows(2) { + let a = &w[0]; + let b = &w[1]; + assert!(a.checkpoint > 512); + assert!(b.checkpoint > 512); + assert_eq!(a.end_key, b.start_key); + } + assert_eq!(items.last().unwrap().end_key, Vec::::default()); + + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(|x| x.as_slice()), + )); + } + + #[test] + fn resolved_follower() { + let mut suite = SuiteBuilder::new_named("r").build(); + let round1 = run_async_test(suite.write_records(0, 128, 1)); + suite.must_register_task(1, "r"); + suite.run(|| Task::RegionCheckpointsOp(RegionCheckpointOperation::PrepareMinTsForResolve)); + suite.sync(); + std::thread::sleep(Duration::from_secs(1)); + + let leader = suite.cluster.leader_of_region(1).unwrap(); + suite.must_shuffle_leader(1); + let round2 = run_async_test(suite.write_records(256, 128, 1)); + suite + .endpoints + .get(&leader.store_id) + .unwrap() + .scheduler() + .schedule(Task::ForceFlush("r".to_owned())) + .unwrap(); + suite.sync(); + std::thread::sleep(Duration::from_secs(2)); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.iter().map(|x| x.as_slice()), + )); + assert!(suite.global_checkpoint() > 256); + suite.force_flush_files("r"); + suite.wait_for_flush(); + assert!(suite.global_checkpoint() > 512); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(|x| x.as_slice()), + )); + } + + #[test] + fn network_partition() { + let mut suite = SuiteBuilder::new_named("network_partition") + .nodes(3) + .build(); + let stream = suite.flush_stream(true); + suite.must_register_task(1, "network_partition"); + let leader = suite.cluster.leader_of_region(1).unwrap(); + let round1 = run_async_test(suite.write_records(0, 64, 1)); + + suite + .cluster + .add_send_filter(IsolationFilterFactory::new(leader.store_id)); + suite.cluster.reset_leader_of_region(1); + suite + .cluster + .must_wait_for_leader_expire(leader.store_id, 1); + let leader2 = suite.cluster.leader_of_region(1).unwrap(); + assert_ne!(leader.store_id, leader2.store_id, "leader not switched."); + let ts = suite.tso(); + suite.must_kv_prewrite( + 1, + vec![mutation(make_record_key(1, 778), b"generator".to_vec())], + make_record_key(1, 778), + ts, + ); + suite.sync(); + suite.force_flush_files("network_partition"); + suite.wait_for_flush(); + + let cps = run_async_test(collect_all_current(stream, Duration::from_secs(2))); + assert!( + cps.iter() + .flat_map(|(_s, cp)| cp.events.iter().map(|resp| resp.checkpoint)) + .all(|cp| cp <= ts.into_inner()), + "ts={} cps={:?}", + ts, + cps + ); + run_async_test(suite.check_for_write_records( + suite.flushed_files.path(), + round1.iter().map(|k| k.as_slice()), + )) + } +} diff --git a/components/backup-stream/tests/mod.rs b/components/backup-stream/tests/suite.rs similarity index 50% rename from components/backup-stream/tests/mod.rs rename to components/backup-stream/tests/suite.rs index 9dc38e36320..87ea608e178 100644 --- a/components/backup-stream/tests/mod.rs +++ b/components/backup-stream/tests/suite.rs @@ -1,7 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -#![cfg(test)] - use std::{ collections::{HashMap, HashSet}, path::Path, @@ -51,11 +49,11 @@ use tikv_util::{ use txn_types::{Key, TimeStamp, WriteRef}; use walkdir::WalkDir; -fn mutation(k: Vec, v: Vec) -> Mutation { +pub fn mutation(k: Vec, v: Vec) -> Mutation { mutation_op(k, v, Op::Put) } -fn mutation_op(k: Vec, v: Vec, op: Op) -> Mutation { +pub fn mutation_op(k: Vec, v: Vec, op: Op) -> Mutation { let mut mutation = Mutation::default(); mutation.set_op(op); mutation.key = k; @@ -63,7 +61,7 @@ fn mutation_op(k: Vec, v: Vec, op: Op) -> Mutation { mutation } -fn make_table_key(table_id: i64, key: &[u8]) -> Vec { +pub fn make_table_key(table_id: i64, key: &[u8]) -> Vec { use std::io::Write; let mut table_key = b"t".to_vec(); // make it comparable to uint. @@ -74,13 +72,13 @@ fn make_table_key(table_id: i64, key: &[u8]) -> Vec { table_key } -fn make_record_key(table_id: i64, handle: u64) -> Vec { +pub fn make_record_key(table_id: i64, handle: u64) -> Vec { let mut record = make_table_key(table_id, b"_r"); record.encode_u64(handle ^ 0x8000_0000_0000_0000).unwrap(); record } -fn make_split_key_at_record(table_id: i64, handle: u64) -> Vec { +pub fn make_split_key_at_record(table_id: i64, handle: u64) -> Vec { let mut record = make_record_key(table_id, handle); // push an extra byte for don't put the key in the boundary of the region. // (Or the mock cluster may find wrong region for putting) @@ -95,7 +93,7 @@ fn make_encoded_record_key(table_id: i64, handle: u64, ts: u64) -> Vec { } #[derive(Clone)] -struct ErrorStore { +pub struct ErrorStore { inner: S, error_provider: Arc Result<()> + Send + Sync>, @@ -125,6 +123,7 @@ impl SuiteBuilder { self } + #[allow(dead_code)] pub fn inject_meta_store_error(mut self, f: F) -> Self where F: Fn(&str) -> Result<()> + Send + Sync + 'static, @@ -133,6 +132,7 @@ impl SuiteBuilder { self } + #[allow(dead_code)] pub fn cfg(mut self, f: impl FnOnce(&mut BackupStreamConfig) + 'static) -> Self { let old_f = self.cfg; self.cfg = Box::new(move |cfg| { @@ -227,9 +227,9 @@ impl MetaStore for ErrorStore { } pub struct Suite { - endpoints: HashMap>, - meta_store: ErrorStore, - cluster: Cluster, + pub endpoints: HashMap>, + pub meta_store: ErrorStore, + pub cluster: Cluster, tikv_cli: HashMap, log_backup_cli: HashMap, obs: HashMap, @@ -238,7 +238,7 @@ pub struct Suite { servers: Vec, temp_files: TempDir, - flushed_files: TempDir, + pub flushed_files: TempDir, case_name: String, } @@ -278,7 +278,7 @@ impl Suite { /// create a subscription stream. this has simply asserted no error, because /// in theory observing flushing should not emit error. change that if /// needed. - fn flush_stream( + pub fn flush_stream( &self, panic_while_fail: bool, ) -> impl Stream { @@ -371,16 +371,16 @@ impl Suite { worker.start(endpoint); } - fn get_meta_cli(&self) -> MetadataClient> { + pub fn get_meta_cli(&self) -> MetadataClient { MetadataClient::new(self.meta_store.clone(), 0) } - fn must_split(&mut self, key: &[u8]) { + pub fn must_split(&mut self, key: &[u8]) { let region = self.cluster.get_region(key); self.cluster.must_split(®ion, key); } - fn must_register_task(&self, for_table: i64, name: &str) { + pub fn must_register_task(&self, for_table: i64, name: &str) { let cli = self.get_meta_cli(); block_on(cli.insert_task_with_range( &self.simple_task(name), @@ -399,7 +399,7 @@ impl Suite { /// /// NOTE: this won't check the region consistency for now, the checkpoint /// may be weaker than expected. - fn global_checkpoint(&self) -> u64 { + pub fn global_checkpoint(&self) -> u64 { let (tx, rx) = std::sync::mpsc::channel(); self.run(|| { let tx = tx.clone(); @@ -425,7 +425,7 @@ impl Suite { .unwrap_or(0) } - async fn advance_global_checkpoint(&self, task: &str) -> Result<()> { + pub async fn advance_global_checkpoint(&self, task: &str) -> Result<()> { let cp = self.global_checkpoint(); self.meta_store .set(KeyValue( @@ -435,7 +435,12 @@ impl Suite { .await } - async fn write_records(&mut self, from: usize, n: usize, for_table: i64) -> HashSet> { + pub async fn write_records( + &mut self, + from: usize, + n: usize, + for_table: i64, + ) -> HashSet> { let mut inserted = HashSet::default(); for ts in (from..(from + n)).map(|x| x * 2) { let ts = ts as u64; @@ -456,7 +461,7 @@ impl Suite { inserted } - fn commit_keys(&mut self, keys: Vec>, start_ts: TimeStamp, commit_ts: TimeStamp) { + pub fn commit_keys(&mut self, keys: Vec>, start_ts: TimeStamp, commit_ts: TimeStamp) { let mut region_keys = HashMap::>>::new(); for k in keys { let enc_key = Key::from_raw(&k).into_encoded(); @@ -469,13 +474,13 @@ impl Suite { } } - fn just_commit_a_key(&mut self, key: Vec, start_ts: TimeStamp, commit_ts: TimeStamp) { + pub fn just_commit_a_key(&mut self, key: Vec, start_ts: TimeStamp, commit_ts: TimeStamp) { let enc_key = Key::from_raw(&key).into_encoded(); let region = self.cluster.get_region_id(&enc_key); self.must_kv_commit(region, vec![key], start_ts, commit_ts) } - fn just_async_commit_prewrite(&mut self, ts: u64, for_table: i64) -> TimeStamp { + pub fn just_async_commit_prewrite(&mut self, ts: u64, for_table: i64) -> TimeStamp { let key = make_record_key(for_table, ts); let muts = vec![mutation(key.clone(), b"hello, world".to_vec())]; let enc_key = Key::from_raw(&key).into_encoded(); @@ -485,19 +490,22 @@ impl Suite { ts } - fn force_flush_files(&self, task: &str) { + pub fn force_flush_files(&self, task: &str) { // TODO: use the callback to make the test more stable. self.run(|| Task::ForceFlush(task.to_owned())); self.sync(); } - fn run(&self, mut t: impl FnMut() -> Task) { + pub fn run(&self, mut t: impl FnMut() -> Task) { for worker in self.endpoints.values() { worker.scheduler().schedule(t()).unwrap(); } } - fn load_metadata_for_write_records(&self, path: &Path) -> HashMap> { + pub fn load_metadata_for_write_records( + &self, + path: &Path, + ) -> HashMap> { let mut meta_map: HashMap> = HashMap::new(); for entry in WalkDir::new(path) { let entry = entry.unwrap(); @@ -532,7 +540,7 @@ impl Suite { meta_map } - async fn check_for_write_records<'a>( + pub async fn check_for_write_records<'a>( &self, path: &Path, key_set: impl std::iter::Iterator, @@ -816,682 +824,10 @@ impl Suite { } } -fn run_async_test(test: impl Future) -> T { +pub fn run_async_test(test: impl Future) -> T { tokio::runtime::Builder::new_current_thread() .enable_all() .build() .unwrap() .block_on(test) } - -#[cfg(test)] -mod test { - use std::time::{Duration, Instant}; - - use backup_stream::{ - errors::Error, - metadata::{ - keys::MetaKey, - store::{Keys, MetaStore}, - }, - router::TaskSelector, - GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, - }; - use futures::{executor::block_on, Stream, StreamExt}; - use pd_client::PdClient; - use test_raftstore::IsolationFilterFactory; - use tikv_util::{box_err, defer, info, HandyRwLock}; - use tokio::time::timeout; - use txn_types::{Key, TimeStamp}; - - use crate::{ - make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, - }; - - #[test] - fn basic() { - let mut suite = super::SuiteBuilder::new_named("basic").build(); - fail::cfg("try_start_observe", "1*return").unwrap(); - - run_async_test(async { - // write data before the task starting, for testing incremental scanning. - let round1 = suite.write_records(0, 128, 1).await; - suite.must_register_task(1, "test_basic"); - suite.sync(); - let round2 = suite.write_records(256, 128, 1).await; - suite.force_flush_files("test_basic"); - suite.wait_for_flush(); - suite - .check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ) - .await; - }); - suite.cluster.shutdown(); - } - - #[test] - fn with_split() { - let mut suite = super::SuiteBuilder::new_named("with_split").build(); - run_async_test(async { - let round1 = suite.write_records(0, 128, 1).await; - suite.must_split(&make_split_key_at_record(1, 42)); - suite.must_register_task(1, "test_with_split"); - let round2 = suite.write_records(256, 128, 1).await; - suite.force_flush_files("test_with_split"); - suite.wait_for_flush(); - suite - .check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ) - .await; - }); - suite.cluster.shutdown(); - } - - /// This test tests whether we can handle some weird transactions and their - /// race with initial scanning. - /// Generally, those transactions: - /// - Has N mutations, which's values are all short enough to be inlined in - /// the `Write` CF. (N > 1024) - /// - Commit the mutation set M first. (for all m in M: Nth-Of-Key(m) > - /// 1024) - /// ```text - /// |--...-----^------*---*-*--*-*-*-> (The line is the Key Space - from "" to inf) - /// +The 1024th key (* = committed mutation) - /// ``` - /// - Before committing remaining mutations, PiTR triggered initial - /// scanning. - /// - The remaining mutations are committed before the instant when initial - /// scanning get the snapshot. - #[test] - fn with_split_txn() { - let mut suite = super::SuiteBuilder::new_named("split_txn").build(); - run_async_test(async { - let start_ts = suite.cluster.pd_client.get_tso().await.unwrap(); - let keys = (1..1960).map(|i| make_record_key(1, i)).collect::>(); - suite.must_kv_prewrite( - 1, - keys.clone() - .into_iter() - .map(|k| mutation(k, b"hello, world".to_vec())) - .collect(), - make_record_key(1, 1913), - start_ts, - ); - let commit_ts = suite.cluster.pd_client.get_tso().await.unwrap(); - suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); - suite.must_register_task(1, "test_split_txn"); - suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); - suite.force_flush_files("test_split_txn"); - suite.wait_for_flush(); - let keys_encoded = keys - .iter() - .map(|v| { - Key::from_raw(v.as_slice()) - .append_ts(commit_ts) - .into_encoded() - }) - .collect::>(); - suite - .check_for_write_records( - suite.flushed_files.path(), - keys_encoded.iter().map(Vec::as_slice), - ) - .await; - }); - suite.cluster.shutdown(); - } - - #[test] - fn frequent_initial_scan() { - let mut suite = super::SuiteBuilder::new_named("frequent_initial_scan") - .cfg(|c| c.num_threads = 1) - .build(); - let keys = (1..1024).map(|i| make_record_key(1, i)).collect::>(); - let start_ts = suite.tso(); - suite.must_kv_prewrite( - 1, - keys.clone() - .into_iter() - .map(|k| mutation(k, b"hello, world".to_vec())) - .collect(), - make_record_key(1, 886), - start_ts, - ); - fail::cfg("scan_after_get_snapshot", "pause").unwrap(); - suite.must_register_task(1, "frequent_initial_scan"); - let commit_ts = suite.tso(); - suite.commit_keys(keys, start_ts, commit_ts); - suite.run(|| { - Task::ModifyObserve(backup_stream::ObserveOp::Stop { - region: suite.cluster.get_region(&make_record_key(1, 886)), - }) - }); - suite.run(|| { - Task::ModifyObserve(backup_stream::ObserveOp::Start { - region: suite.cluster.get_region(&make_record_key(1, 886)), - }) - }); - fail::cfg("scan_after_get_snapshot", "off").unwrap(); - suite.force_flush_files("frequent_initial_scan"); - suite.wait_for_flush(); - std::thread::sleep(Duration::from_secs(1)); - let c = suite.global_checkpoint(); - assert!(c > commit_ts.into_inner(), "{} vs {}", c, commit_ts); - } - - #[test] - /// This case tests whether the backup can continue when the leader failes. - fn leader_down() { - let mut suite = super::SuiteBuilder::new_named("leader_down").build(); - suite.must_register_task(1, "test_leader_down"); - suite.sync(); - let round1 = run_async_test(suite.write_records(0, 128, 1)); - let leader = suite.cluster.leader_of_region(1).unwrap().get_store_id(); - suite.cluster.stop_node(leader); - let round2 = run_async_test(suite.write_records(256, 128, 1)); - suite.force_flush_files("test_leader_down"); - suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - )); - suite.cluster.shutdown(); - } - - #[test] - /// This case tests whether the checkpoint ts (next backup ts) can be - /// advanced correctly when async commit is enabled. - fn async_commit() { - let mut suite = super::SuiteBuilder::new_named("async_commit") - .nodes(3) - .build(); - run_async_test(async { - suite.must_register_task(1, "test_async_commit"); - suite.sync(); - suite.write_records(0, 128, 1).await; - let ts = suite.just_async_commit_prewrite(256, 1); - suite.write_records(258, 128, 1).await; - suite.force_flush_files("test_async_commit"); - std::thread::sleep(Duration::from_secs(4)); - assert_eq!(suite.global_checkpoint(), 256); - suite.just_commit_a_key(make_record_key(1, 256), TimeStamp::new(256), ts); - suite.force_flush_files("test_async_commit"); - suite.wait_for_flush(); - let cp = suite.global_checkpoint(); - assert!(cp > 256, "it is {:?}", cp); - }); - suite.cluster.shutdown(); - } - - #[test] - fn fatal_error() { - let mut suite = super::SuiteBuilder::new_named("fatal_error") - .nodes(3) - .build(); - suite.must_register_task(1, "test_fatal_error"); - suite.sync(); - run_async_test(suite.write_records(0, 1, 1)); - suite.force_flush_files("test_fatal_error"); - suite.wait_for_flush(); - run_async_test(suite.advance_global_checkpoint("test_fatal_error")).unwrap(); - let (victim, endpoint) = suite.endpoints.iter().next().unwrap(); - endpoint - .scheduler() - .schedule(Task::FatalError( - TaskSelector::ByName("test_fatal_error".to_owned()), - Box::new(Error::Other(box_err!("everything is alright"))), - )) - .unwrap(); - suite.sync(); - let err = run_async_test( - suite - .get_meta_cli() - .get_last_error("test_fatal_error", *victim), - ) - .unwrap() - .unwrap(); - info!("err"; "err" => ?err); - assert_eq!(err.error_code, error_code::backup_stream::OTHER.code); - assert!(err.error_message.contains("everything is alright")); - assert_eq!(err.store_id, *victim); - let paused = - run_async_test(suite.get_meta_cli().check_task_paused("test_fatal_error")).unwrap(); - assert!(paused); - let safepoints = suite.cluster.pd_client.gc_safepoints.rl(); - let checkpoint = suite.global_checkpoint(); - - assert!( - safepoints.iter().any(|sp| { - sp.serivce.contains(&format!("{}", victim)) - && sp.ttl >= Duration::from_secs(60 * 60 * 24) - && sp.safepoint.into_inner() == checkpoint - 1 - }), - "{:?}", - safepoints - ); - } - - #[test] - fn region_checkpoint_info() { - let mut suite = super::SuiteBuilder::new_named("checkpoint_info") - .nodes(1) - .build(); - suite.must_register_task(1, "checkpoint_info"); - suite.must_split(&make_split_key_at_record(1, 42)); - run_async_test(suite.write_records(0, 128, 1)); - suite.force_flush_files("checkpoint_info"); - suite.wait_for_flush(); - std::thread::sleep(Duration::from_secs(1)); - let (tx, rx) = std::sync::mpsc::channel(); - suite.run(|| { - let tx = tx.clone(); - Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( - RegionSet::Universal, - Box::new(move |rs| { - tx.send(rs).unwrap(); - }), - )) - }); - let checkpoints = rx.recv().unwrap(); - assert!(!checkpoints.is_empty(), "{:?}", checkpoints); - assert!( - checkpoints - .iter() - .all(|cp| matches!(cp, GetCheckpointResult::Ok { checkpoint, .. } if checkpoint.into_inner() > 256)), - "{:?}", - checkpoints - ); - } - - #[test] - fn region_failure() { - defer! {{ - fail::remove("try_start_observe"); - }} - let mut suite = SuiteBuilder::new_named("region_failure").build(); - let keys = run_async_test(suite.write_records(0, 128, 1)); - fail::cfg("try_start_observe", "1*return").unwrap(); - suite.must_register_task(1, "region_failure"); - suite.must_shuffle_leader(1); - let keys2 = run_async_test(suite.write_records(256, 128, 1)); - suite.force_flush_files("region_failure"); - suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - keys.union(&keys2).map(|s| s.as_slice()), - )); - } - - #[test] - fn initial_scan_failure() { - defer! {{ - fail::remove("scan_and_async_send"); - }} - - let mut suite = SuiteBuilder::new_named("initial_scan_failure") - .nodes(1) - .build(); - let keys = run_async_test(suite.write_records(0, 128, 1)); - fail::cfg( - "scan_and_async_send", - "1*return(dive into the temporary dream, where the SLA never bothers)", - ) - .unwrap(); - suite.must_register_task(1, "initial_scan_failure"); - let keys2 = run_async_test(suite.write_records(256, 128, 1)); - suite.force_flush_files("initial_scan_failure"); - suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - keys.union(&keys2).map(|s| s.as_slice()), - )); - } - - #[test] - fn upload_checkpoint_exits_in_time() { - defer! {{ - std::env::remove_var("LOG_BACKUP_UGC_SLEEP_AND_RETURN"); - }} - let suite = SuiteBuilder::new_named("upload_checkpoint_exits_in_time") - .nodes(1) - .build(); - std::env::set_var("LOG_BACKUP_UGC_SLEEP_AND_RETURN", "meow"); - let (_, victim) = suite.endpoints.iter().next().unwrap(); - let sched = victim.scheduler(); - sched - .schedule(Task::UpdateGlobalCheckpoint("greenwoods".to_owned())) - .unwrap(); - let start = Instant::now(); - let (tx, rx) = tokio::sync::oneshot::channel(); - sched - .schedule(Task::Sync( - Box::new(move || { - tx.send(Instant::now()).unwrap(); - }), - Box::new(|_| true), - )) - .unwrap(); - let end = run_async_test(rx).unwrap(); - assert!( - end - start < Duration::from_secs(10), - "take = {:?}", - end - start - ); - } - - #[test] - fn failed_during_refresh_region() { - defer! { - fail::remove("get_last_checkpoint_of") - } - - let mut suite = SuiteBuilder::new_named("fail_to_refresh_region") - .nodes(1) - .build(); - - suite.must_register_task(1, "fail_to_refresh_region"); - let keys = run_async_test(suite.write_records(0, 128, 1)); - fail::cfg( - "get_last_checkpoint_of", - "1*return(the stream handler wants to become a batch processor, and the batch processor wants to be a stream handler.)", - ).unwrap(); - - suite.must_split(b"SOLE"); - let keys2 = run_async_test(suite.write_records(256, 128, 1)); - suite.force_flush_files("fail_to_refresh_region"); - suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - keys.union(&keys2).map(|s| s.as_slice()), - )); - let leader = suite.cluster.leader_of_region(1).unwrap().store_id; - let (tx, rx) = std::sync::mpsc::channel(); - suite.endpoints[&leader] - .scheduler() - .schedule(Task::RegionCheckpointsOp(RegionCheckpointOperation::Get( - RegionSet::Universal, - Box::new(move |rs| { - let _ = tx.send(rs); - }), - ))) - .unwrap(); - - let regions = rx.recv_timeout(Duration::from_secs(10)).unwrap(); - assert!( - regions.iter().all(|item| { - matches!(item, GetCheckpointResult::Ok { checkpoint, .. } if checkpoint.into_inner() > 500) - }), - "{:?}", - regions - ); - } - - /// This test case tests whether we correctly handle the pessimistic locks. - #[test] - fn pessimistic_lock() { - let mut suite = SuiteBuilder::new_named("pessimistic_lock").nodes(3).build(); - suite.must_kv_pessimistic_lock( - 1, - vec![make_record_key(1, 42)], - suite.tso(), - make_record_key(1, 42), - ); - suite.must_register_task(1, "pessimistic_lock"); - suite.must_kv_pessimistic_lock( - 1, - vec![make_record_key(1, 43)], - suite.tso(), - make_record_key(1, 43), - ); - let expected_tso = suite.tso().into_inner(); - suite.force_flush_files("pessimistic_lock"); - suite.wait_for_flush(); - std::thread::sleep(Duration::from_secs(1)); - run_async_test(suite.advance_global_checkpoint("pessimistic_lock")).unwrap(); - let checkpoint = run_async_test( - suite - .get_meta_cli() - .global_progress_of_task("pessimistic_lock"), - ) - .unwrap(); - // The checkpoint should be advanced: because PiTR is "Read" operation, - // which shouldn't be blocked by pessimistic locks. - assert!( - checkpoint > expected_tso, - "expected = {}; checkpoint = {}", - expected_tso, - checkpoint - ); - } - - async fn collect_all_current( - mut s: impl Stream + Unpin, - max_gap: Duration, - ) -> Vec { - let mut r = vec![]; - while let Ok(Some(x)) = timeout(max_gap, s.next()).await { - r.push(x); - } - r - } - - async fn collect_current(mut s: impl Stream + Unpin, goal: usize) -> Vec { - let mut r = vec![]; - while let Ok(Some(x)) = timeout(Duration::from_secs(10), s.next()).await { - r.push(x); - if r.len() >= goal { - return r; - } - } - r - } - - #[test] - fn subscribe_flushing() { - let mut suite = super::SuiteBuilder::new_named("sub_flush").build(); - let stream = suite.flush_stream(true); - for i in 1..10 { - let split_key = make_split_key_at_record(1, i * 20); - suite.must_split(&split_key); - suite.must_shuffle_leader(suite.cluster.get_region_id(&split_key)); - } - - let round1 = run_async_test(suite.write_records(0, 128, 1)); - suite.must_register_task(1, "sub_flush"); - let round2 = run_async_test(suite.write_records(256, 128, 1)); - suite.sync(); - suite.force_flush_files("sub_flush"); - - let mut items = run_async_test(async { - collect_current( - stream.flat_map(|(_, r)| futures::stream::iter(r.events.into_iter())), - 10, - ) - .await - }); - - items.sort_by(|x, y| x.start_key.cmp(&y.start_key)); - - println!("{:?}", items); - assert_eq!(items.len(), 10); - - assert_eq!(items.first().unwrap().start_key, Vec::::default()); - for w in items.windows(2) { - let a = &w[0]; - let b = &w[1]; - assert!(a.checkpoint > 512); - assert!(b.checkpoint > 512); - assert_eq!(a.end_key, b.start_key); - } - assert_eq!(items.last().unwrap().end_key, Vec::::default()); - - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(|x| x.as_slice()), - )); - } - - #[test] - fn failure_and_split() { - let mut suite = super::SuiteBuilder::new_named("failure_and_split") - .nodes(1) - .build(); - fail::cfg("try_start_observe0", "pause").unwrap(); - - // write data before the task starting, for testing incremental scanning. - let round1 = run_async_test(suite.write_records(0, 128, 1)); - suite.must_register_task(1, "failure_and_split"); - suite.sync(); - - suite.must_split(&make_split_key_at_record(1, 42)); - suite.sync(); - std::thread::sleep(Duration::from_millis(200)); - fail::cfg("try_start_observe", "2*return").unwrap(); - fail::cfg("try_start_observe0", "off").unwrap(); - - let round2 = run_async_test(suite.write_records(256, 128, 1)); - suite.force_flush_files("failure_and_split"); - suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - )); - let cp = suite.global_checkpoint(); - assert!(cp > 512, "it is {}", cp); - suite.cluster.shutdown(); - } - - #[test] - fn resolved_follower() { - let mut suite = super::SuiteBuilder::new_named("r").build(); - let round1 = run_async_test(suite.write_records(0, 128, 1)); - suite.must_register_task(1, "r"); - suite.run(|| Task::RegionCheckpointsOp(RegionCheckpointOperation::PrepareMinTsForResolve)); - suite.sync(); - std::thread::sleep(Duration::from_secs(1)); - - let leader = suite.cluster.leader_of_region(1).unwrap(); - suite.must_shuffle_leader(1); - let round2 = run_async_test(suite.write_records(256, 128, 1)); - suite - .endpoints - .get(&leader.store_id) - .unwrap() - .scheduler() - .schedule(Task::ForceFlush("r".to_owned())) - .unwrap(); - suite.sync(); - std::thread::sleep(Duration::from_secs(2)); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - round1.iter().map(|x| x.as_slice()), - )); - assert!(suite.global_checkpoint() > 256); - suite.force_flush_files("r"); - suite.wait_for_flush(); - assert!(suite.global_checkpoint() > 512); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(|x| x.as_slice()), - )); - } - - #[test] - fn network_partition() { - let mut suite = super::SuiteBuilder::new_named("network_partition") - .nodes(3) - .build(); - let stream = suite.flush_stream(true); - suite.must_register_task(1, "network_partition"); - let leader = suite.cluster.leader_of_region(1).unwrap(); - let round1 = run_async_test(suite.write_records(0, 64, 1)); - - suite - .cluster - .add_send_filter(IsolationFilterFactory::new(leader.store_id)); - suite.cluster.reset_leader_of_region(1); - suite - .cluster - .must_wait_for_leader_expire(leader.store_id, 1); - let leader2 = suite.cluster.leader_of_region(1).unwrap(); - assert_ne!(leader.store_id, leader2.store_id, "leader not switched."); - let ts = suite.tso(); - suite.must_kv_prewrite( - 1, - vec![mutation(make_record_key(1, 778), b"generator".to_vec())], - make_record_key(1, 778), - ts, - ); - suite.sync(); - suite.force_flush_files("network_partition"); - suite.wait_for_flush(); - - let cps = run_async_test(collect_all_current(stream, Duration::from_secs(2))); - assert!( - cps.iter() - .flat_map(|(_s, cp)| cp.events.iter().map(|resp| resp.checkpoint)) - .all(|cp| cp <= ts.into_inner()), - "ts={} cps={:?}", - ts, - cps - ); - run_async_test(suite.check_for_write_records( - suite.flushed_files.path(), - round1.iter().map(|k| k.as_slice()), - )) - } - - #[test] - fn test_retry_abort() { - let mut suite = super::SuiteBuilder::new_named("retry_abort") - .nodes(1) - .build(); - defer! { - fail::list().into_iter().for_each(|(name, _)| fail::remove(name)) - }; - - suite.must_register_task(1, "retry_abort"); - fail::cfg("subscribe_mgr_retry_start_observe_delay", "return(10)").unwrap(); - fail::cfg("try_start_observe", "return()").unwrap(); - - suite.must_split(&make_split_key_at_record(1, 42)); - std::thread::sleep(Duration::from_secs(2)); - - let error = run_async_test(suite.get_meta_cli().get_last_error("retry_abort", 1)).unwrap(); - let error = error.expect("no error uploaded"); - error - .get_error_message() - .find("retry") - .expect("error doesn't contain retry"); - fail::cfg("try_start_observe", "10*return()").unwrap(); - // Resume the task manually... - run_async_test(async { - suite - .meta_store - .delete(Keys::Key(MetaKey::pause_of("retry_abort"))) - .await?; - suite - .meta_store - .delete(Keys::Prefix(MetaKey::last_errors_of("retry_abort"))) - .await?; - backup_stream::errors::Result::Ok(()) - }) - .unwrap(); - - suite.sync(); - suite.wait_with(move |r| block_on(r.get_task_info("retry_abort")).is_ok()); - let items = run_async_test(suite.write_records(0, 128, 1)); - suite.force_flush_files("retry_abort"); - suite.wait_for_flush(); - run_async_test( - suite.check_for_write_records( - suite.flushed_files.path(), - items.iter().map(Vec::as_slice), - ), - ); - } -} From 0daa38c454c8d518a62f372f5fe679c1e858871e Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 17 Jun 2023 05:39:08 +0800 Subject: [PATCH 0745/1149] pd_client: reduce PD reconnection (#14954) ref tikv/pd#6556, close tikv/tikv#14964 Signed-off-by: Ryan Leung Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/client.rs | 1 + components/pd_client/src/client_v2.rs | 2 +- components/pd_client/src/config.rs | 2 +- components/pd_client/src/util.rs | 10 +++++----- etc/config-template.toml | 2 +- tests/failpoints/cases/test_pd_client.rs | 4 ++-- tests/failpoints/cases/test_pd_client_legacy.rs | 8 ++++---- tests/integrations/pd/test_rpc_client.rs | 10 +++++----- tests/integrations/pd/test_rpc_client_legacy.rs | 12 ++++++------ 9 files changed, 26 insertions(+), 25 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index ed42547b998..6aeecc3bf65 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -103,6 +103,7 @@ impl RpcClient { target, tso.unwrap(), cfg.enable_forwarding, + cfg.retry_interval.0, )), monitor: monitor.clone(), }; diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index 11224ad894e..5b0d563f2b8 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -413,7 +413,7 @@ async fn reconnect_loop( use tikv_util::config::ReadableDuration; ReadableDuration::from_str(&s.unwrap()).unwrap().0 }); - request_timeout() + cfg.retry_interval.0 })(); let mut last_connect = StdInstant::now(); loop { diff --git a/components/pd_client/src/config.rs b/components/pd_client/src/config.rs index a02c2272490..f42cc3528d7 100644 --- a/components/pd_client/src/config.rs +++ b/components/pd_client/src/config.rs @@ -16,7 +16,7 @@ pub struct Config { /// /// Default is `"127.0.0.1:2379"`. pub endpoints: Vec, - /// The interval at which to retry a PD connection initialization. + /// The interval at which to retry a PD connection. /// /// Default is 300ms. pub retry_interval: ReadableDuration, diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index f3a8451f321..6fd8aac679f 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -49,7 +49,6 @@ const MAX_RETRY_TIMES: u64 = 5; const MAX_RETRY_DURATION: Duration = Duration::from_secs(10); // FIXME: Use a request-independent way to handle reconnection. -const GLOBAL_RECONNECT_INTERVAL: Duration = Duration::from_millis(100); // 0.1s pub const REQUEST_RECONNECT_INTERVAL: Duration = Duration::from_secs(1); // 1s #[derive(Clone)] @@ -160,6 +159,7 @@ pub struct Client { pub(crate) inner: RwLock, pub feature_gate: FeatureGate, enable_forwarding: bool, + retry_interval: Duration, } impl Client { @@ -171,6 +171,7 @@ impl Client { target: TargetInfo, tso: TimestampOracle, enable_forwarding: bool, + retry_interval: Duration, ) -> Client { if !target.direct_connected() { REQUEST_FORWARDED_GAUGE_VEC @@ -206,6 +207,7 @@ impl Client { }), feature_gate: FeatureGate::default(), enable_forwarding, + retry_interval, } } @@ -333,8 +335,7 @@ impl Client { let future = { let inner = self.inner.rl(); - if start.saturating_duration_since(inner.last_try_reconnect) < GLOBAL_RECONNECT_INTERVAL - { + if start.saturating_duration_since(inner.last_try_reconnect) < self.retry_interval { // Avoid unnecessary updating. // Prevent a large number of reconnections in a short time. PD_RECONNECT_COUNTER_VEC @@ -360,8 +361,7 @@ impl Client { { let mut inner = self.inner.wl(); - if start.saturating_duration_since(inner.last_try_reconnect) < GLOBAL_RECONNECT_INTERVAL - { + if start.saturating_duration_since(inner.last_try_reconnect) < self.retry_interval { // There may be multiple reconnections that pass the read lock at the same time. // Check again in the write lock to avoid unnecessary updating. PD_RECONNECT_COUNTER_VEC diff --git a/etc/config-template.toml b/etc/config-template.toml index 236dd9991c1..3e31375e6a9 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -344,7 +344,7 @@ ## PD endpoints. # endpoints = ["127.0.0.1:2379"] -## The interval at which to retry a PD connection initialization. +## The interval at which to retry a PD connection. ## Default is 300ms. # retry-interval = "300ms" diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 92942fa90f9..0115d6d7ba5 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -119,8 +119,8 @@ fn test_slow_periodical_update() { fail::cfg(pd_client_reconnect_fp, "pause").unwrap(); // Wait for the PD client thread blocking on the fail point. - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // The retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); let (tx, rx) = mpsc::channel(); let handle = thread::spawn(move || { diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index d6cf7f1817d..ac427c29e69 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -220,8 +220,8 @@ fn test_slow_periodical_update() { fail::cfg(pd_client_reconnect_fp, "pause").unwrap(); // Wait for the PD client thread blocking on the fail point. - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // The retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); let (tx, rx) = mpsc::channel(); let handle = thread::spawn(move || { @@ -245,8 +245,8 @@ fn test_reconnect_limit() { let pd_client_reconnect_fp = "pd_client_reconnect"; let (_server, client) = new_test_server_and_client(ReadableDuration::secs(100)); - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // The default retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); // The first reconnection will succeed, and the last_update will not be updated. fail::cfg(pd_client_reconnect_fp, "return").unwrap(); diff --git a/tests/integrations/pd/test_rpc_client.rs b/tests/integrations/pd/test_rpc_client.rs index ca37318aa8b..f0142f72176 100644 --- a/tests/integrations/pd/test_rpc_client.rs +++ b/tests/integrations/pd/test_rpc_client.rs @@ -366,8 +366,8 @@ fn restart_leader(mgr: SecurityManager) { server.stop(); server.start(&mgr, eps); - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // The default retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); let region = block_on(client.get_region_by_id(region.get_id())).unwrap(); assert_eq!(region.unwrap().get_id(), region_id); @@ -604,9 +604,9 @@ fn test_cluster_version() { assert!(feature_gate.can_enable(feature_b)); assert!(!feature_gate.can_enable(feature_c)); - // After reconnect the version should be still accessable. - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // After reconnect the version should be still accessible. + // The default retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); client.reconnect().unwrap(); assert!(feature_gate.can_enable(feature_b)); assert!(!feature_gate.can_enable(feature_c)); diff --git a/tests/integrations/pd/test_rpc_client_legacy.rs b/tests/integrations/pd/test_rpc_client_legacy.rs index d2ff6d6ac11..f0226336dbd 100644 --- a/tests/integrations/pd/test_rpc_client_legacy.rs +++ b/tests/integrations/pd/test_rpc_client_legacy.rs @@ -427,8 +427,8 @@ fn restart_leader(mgr: SecurityManager) { server.stop(); server.start(&mgr, eps); - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // The default retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); let region = block_on(client.get_region_by_id(region.get_id())).unwrap(); assert_eq!(region.unwrap().get_id(), region_id); @@ -518,7 +518,7 @@ fn test_pd_client_heartbeat_send_failed() { RegionStat::default(), None, )); - let rsp = rx.recv_timeout(Duration::from_millis(100)); + let rsp = rx.recv_timeout(Duration::from_millis(300)); if ok { assert!(rsp.is_ok()); assert_eq!(rsp.unwrap().get_region_id(), 1); @@ -677,9 +677,9 @@ fn test_cluster_version() { assert!(feature_gate.can_enable(feature_b)); assert!(!feature_gate.can_enable(feature_c)); - // After reconnect the version should be still accessable. - // The GLOBAL_RECONNECT_INTERVAL is 0.1s so sleeps 0.2s here. - thread::sleep(Duration::from_millis(200)); + // After reconnect the version should be still accessible. + // The default retry interval is 300ms so sleeps 400ms here. + thread::sleep(Duration::from_millis(400)); client.reconnect().unwrap(); assert!(feature_gate.can_enable(feature_b)); assert!(!feature_gate.can_enable(feature_c)); From abee321fa940327f9bed25b605a8003741225cab Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 19 Jun 2023 14:15:10 +0800 Subject: [PATCH 0746/1149] raftstore: implement get/set high priority background threads (#14955) ref tikv/tikv#12842 implement get/set high priority background threads Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +- components/engine_traits/src/flush.rs | 1 + src/config/configurable.rs | 28 ++++++++++ .../integrations/raftstore/test_scale_pool.rs | 55 ++++++++++++++++++- 4 files changed, 86 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 993ebfe6816..b2a2e01b32a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3089,7 +3089,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#2096b9a161f93e437f7adee49e68cd1570aea42f" +source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#2096b9a161f93e437f7adee49e68cd1570aea42f" +source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" dependencies = [ "bzip2-sys", "cc", @@ -5078,7 +5078,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#2096b9a161f93e437f7adee49e68cd1570aea42f" +source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" dependencies = [ "libc 0.2.139", "librocksdb_sys", diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index d79ee9631ca..68a07478bc6 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -191,6 +191,7 @@ impl PersistenceListener { /// /// `largest_seqno` should be the largest seqno of the generated file. pub fn on_flush_completed(&self, cf: &str, largest_seqno: u64, file_no: u64) { + fail_point!("on_flush_completed"); // Maybe we should hook the compaction to avoid the file is compacted before // being recorded. let offset = data_cf_offset(cf); diff --git a/src/config/configurable.rs b/src/config/configurable.rs index 142d14a0304..2f2dd66381d 100644 --- a/src/config/configurable.rs +++ b/src/config/configurable.rs @@ -17,6 +17,7 @@ pub trait ConfigurableDb { fn set_flush_size(&self, f: usize) -> ConfigRes; fn set_flush_oldest_first(&self, f: bool) -> ConfigRes; fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes; + fn set_high_priority_background_threads(&self, n: i32, allow_reduce: bool) -> ConfigRes; } impl ConfigurableDb for RocksEngine { @@ -66,6 +67,21 @@ impl ConfigurableDb for RocksEngine { opt.set_block_cache_capacity(capacity as u64) .map_err(Box::from) } + + fn set_high_priority_background_threads(&self, n: i32, allow_reduce: bool) -> ConfigRes { + assert!(n > 0); + if let Some(env) = self.as_inner().as_ref().env() { + let origin_threads = env.get_high_priority_background_threads(); + if n > origin_threads || allow_reduce { + env.set_high_priority_background_threads(n); + } + Ok(()) + } else { + Err(Box::from( + "set high priority background threads failed as env is not set".to_string(), + )) + } + } } fn loop_registry( @@ -176,4 +192,16 @@ impl ConfigurableDb for TabletRegistry { } }) } + + fn set_high_priority_background_threads(&self, n: i32, allow_reduce: bool) -> ConfigRes { + assert!(n > 0); + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_high_priority_background_threads(n, allow_reduce)?; + Ok(false) + } else { + Ok(true) + } + }) + } } diff --git a/tests/integrations/raftstore/test_scale_pool.rs b/tests/integrations/raftstore/test_scale_pool.rs index c3c321a6f66..393c47a7f5e 100644 --- a/tests/integrations/raftstore/test_scale_pool.rs +++ b/tests/integrations/raftstore/test_scale_pool.rs @@ -1,8 +1,14 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashMap, time::Duration}; +use std::{ + collections::HashMap, + sync::{mpsc::sync_channel, Mutex}, + time::Duration, +}; +use engine_traits::{MiscExt, Peekable}; use test_raftstore::*; +use tikv::config::ConfigurableDb; use tikv_util::{ sys::thread::{self, Pid}, HandyRwLock, @@ -433,3 +439,50 @@ fn test_resize_async_ios_failed_2() { cluster.must_put(b"k2", b"v2"); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } + +#[test] +fn test_adjust_hight_priority_background_threads() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 1); + cluster.cfg.rocksdb.max_background_flushes = 2; + // pause one flush thread + fail::cfg("on_flush_completed", "1*pause").unwrap(); + cluster.run(); + + cluster.must_put(b"k1", b"val"); + let registry = &cluster.engines[0].0; + // set high priority background thread (flush thread) to 1 so that puase one + // thread will make flush unable to proceed + registry + .set_high_priority_background_threads(1, true) + .unwrap(); + + let mut cache = registry.get(1).unwrap(); + let tablet = cache.latest().unwrap().clone(); + assert_eq!(tablet.get_value(b"zk1").unwrap().unwrap(), b"val"); + + let tablet2 = tablet.clone(); + let h = std::thread::spawn(move || { + // it will block at on_memtable_flush + tablet2.flush_cf("default", true).unwrap(); + }); + + cluster.must_put(b"k2", b"val"); + let (tx, rx) = sync_channel(1); + let tx = Mutex::new(tx); + let h2 = std::thread::spawn(move || { + tablet.flush_cf("default", true).unwrap(); + tx.lock().unwrap().send(()).unwrap(); + }); + + rx.recv_timeout(Duration::from_secs(2)).unwrap_err(); + + let registry = &cluster.engines[0].0; + registry + .set_high_priority_background_threads(2, false) + .unwrap(); + + fail::remove("on_flush_completed"); + h.join().unwrap(); + h2.join().unwrap(); +} From d7b847dbff41a9525119c851a64c9ab895a1e539 Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 19 Jun 2023 14:33:10 +0800 Subject: [PATCH 0747/1149] libs: update azure and raft-engine libs. (#14928) close tikv/tikv#14927 Update related libs on azure and raft-engine. Signed-off-by: lucasliang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 228 ++++++++++---------- src/config/mod.rs | 8 +- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 2 +- tests/integrations/config/test-default.toml | 1 - 5 files changed, 120 insertions(+), 120 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2a2e01b32a..df29801b930 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,6 +72,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e" + [[package]] name = "ansi_term" version = "0.11.0" @@ -264,7 +270,7 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -385,7 +391,7 @@ dependencies = [ [[package]] name = "azure_core" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" +source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" dependencies = [ "async-trait", "base64 0.21.0", @@ -411,7 +417,7 @@ dependencies = [ [[package]] name = "azure_identity" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" +source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" dependencies = [ "async-lock", "async-trait", @@ -431,7 +437,7 @@ dependencies = [ [[package]] name = "azure_security_keyvault" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" +source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" dependencies = [ "async-trait", "azure_core", @@ -446,7 +452,7 @@ dependencies = [ [[package]] name = "azure_storage" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" +source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" dependencies = [ "RustyXML", "async-trait", @@ -468,7 +474,7 @@ dependencies = [ [[package]] name = "azure_storage_blobs" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#1fac7b8760f2610afd74269e3cc27e76eb33e6a9" +source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" dependencies = [ "RustyXML", "azure_core", @@ -476,7 +482,6 @@ dependencies = [ "bytes", "futures 0.3.15", "log", - "md5", "serde", "serde_derive", "serde_json", @@ -494,7 +499,7 @@ dependencies = [ "addr2line", "cc", "cfg-if 1.0.0", - "libc 0.2.139", + "libc 0.2.146", "miniz_oxide 0.4.4", "object", "rustc-demangle", @@ -660,7 +665,7 @@ dependencies = [ "bcc-sys", "bitflags", "byteorder", - "libc 0.2.139", + "libc 0.2.146", "regex", "thiserror", ] @@ -803,7 +808,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", "pkg-config", ] @@ -829,7 +834,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7f788eaf239475a3c1e1acf89951255a46c4b9b46cf3e866fc4d0707b4b9e36" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "valgrind_request", ] @@ -993,7 +998,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1" dependencies = [ "glob", - "libc 0.2.139", + "libc 0.2.146", "libloading", ] @@ -1077,7 +1082,7 @@ dependencies = [ "byteorder", "bytes", "error_code", - "libc 0.2.139", + "libc 0.2.146", "panic_hook", "protobuf", "rand 0.8.5", @@ -1156,7 +1161,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" dependencies = [ "core-foundation-sys", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -1171,7 +1176,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -1181,7 +1186,7 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -1238,7 +1243,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63aaaf47e457badbcb376c65a49d0f182c317ebd97dc6d1ced94c8e1d09c0f3a" dependencies = [ "criterion", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -1508,7 +1513,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "redox_users", "winapi 0.3.9", ] @@ -1781,7 +1786,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" dependencies = [ "errno-dragonfly", - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -1792,7 +1797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -1909,7 +1914,7 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "libloading", "matches", "nix 0.24.1", @@ -1974,7 +1979,7 @@ dependencies = [ "crossbeam-utils 0.8.8", "fs2", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "maligned", "online_config", "openssl", @@ -1999,7 +2004,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed3d8a5e20435ff00469e51a0d82049bae66504b5c429920dadf9bb54d47b3f" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "thiserror", "winapi 0.3.9", ] @@ -2011,7 +2016,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.139", + "libc 0.2.146", "redox_syscall 0.2.11", "winapi 0.3.9", ] @@ -2024,7 +2029,7 @@ checksum = "d691fdb3f817632d259d09220d4cf0991dbb2c9e59e044a02a59194bf6e14484" dependencies = [ "cc", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -2072,7 +2077,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2adaffba6388640136149e18ed080b77a78611c1e1d6de75aedcdf78df5d4682" dependencies = [ "crc32fast", - "libc 0.2.139", + "libc 0.2.146", "libz-sys", "miniz_oxide 0.3.7", ] @@ -2113,7 +2118,7 @@ name = "fs2" version = "0.4.3" source = "git+https://github.com/tabokie/fs2-rs?branch=tikv#cd503764a19a99d74c1ab424dd13d6bcd093fcae" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -2139,7 +2144,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f41b048a94555da0f42f1d632e2e19510084fb8e303b0daa2816e733fb3644a0" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -2390,7 +2395,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.139", + "libc 0.2.146", "wasi 0.7.0", ] @@ -2402,7 +2407,7 @@ checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if 1.0.0", "js-sys", - "libc 0.2.139", + "libc 0.2.146", "wasi 0.10.2+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2451,7 +2456,7 @@ dependencies = [ "futures-executor", "futures-util", "grpcio-sys", - "libc 0.2.139", + "libc 0.2.146", "log", "parking_lot 0.11.1", "protobuf", @@ -2488,7 +2493,7 @@ dependencies = [ "bindgen 0.59.2", "cc", "cmake", - "libc 0.2.139", + "libc 0.2.146", "libz-sys", "openssl-sys", "pkg-config", @@ -2528,11 +2533,12 @@ checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] name = "hashbrown" -version = "0.13.2" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ "ahash 0.8.3", + "allocator-api2", ] [[package]] @@ -2556,7 +2562,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307c3c9f937f38e3534b1d6447ecf090cafcc9744e4a6360e8b037b2cf5af120" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -2565,7 +2571,7 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -2808,7 +2814,7 @@ checksum = "4816c66d2c8ae673df83366c18341538f234a26d65a9ecea5c348b453ac1d02f" dependencies = [ "bitflags", "inotify-sys", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -2817,7 +2823,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -2844,7 +2850,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "windows-sys 0.42.0", ] @@ -2854,7 +2860,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -2912,7 +2918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b1d42ef453b30b7387e113da1c83ab1605d90c5b4e0eb8e96d016ed3b8c160" dependencies = [ "getrandom 0.1.12", - "libc 0.2.139", + "libc 0.2.146", "log", ] @@ -3053,9 +3059,9 @@ checksum = "e32a70cf75e5846d53a673923498228bbec6a8624708a9ea5645f075d6276122" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" [[package]] name = "libfuzzer-sys" @@ -3095,7 +3101,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.139", + "libc 0.2.146", "libtitan_sys", "libz-sys", "lz4-sys", @@ -3113,7 +3119,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.139", + "libc 0.2.146", "libz-sys", "lz4-sys", "snappy-sys", @@ -3127,7 +3133,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", "pkg-config", "vcpkg", ] @@ -3189,7 +3195,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -3244,7 +3250,7 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -3253,7 +3259,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -3263,16 +3269,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] name = "memmap2" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d28bba84adfe6646737845bc5ebbfa2c08424eb1c37e94a1fd2a82adb56a872" +checksum = "180d4b35be83d33392d1d1bfbd2ae1eca7ff5de1a94d3fc87faaa99a069e7cbd" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -3352,7 +3358,7 @@ dependencies = [ "fuchsia-zircon-sys", "iovec", "kernel32-sys", - "libc 0.2.139", + "libc 0.2.146", "log", "miow", "net2", @@ -3366,7 +3372,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "log", "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.42.0", @@ -3412,7 +3418,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1587ebb20a5b04738f16cffa7e2526f1b8496b84f92920facd518362ff1559eb" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -3463,7 +3469,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" dependencies = [ "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "log", "openssl", "openssl-probe", @@ -3481,7 +3487,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -3493,7 +3499,7 @@ checksum = "8f17df307904acd05aa8e32e97bb20f2a0df1728bbc2d771ae8f9a90463441e9" dependencies = [ "bitflags", "cfg-if 1.0.0", - "libc 0.2.139", + "libc 0.2.146", "memoffset 0.6.4", ] @@ -3505,7 +3511,7 @@ checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ "bitflags", "cfg-if 1.0.0", - "libc 0.2.139", + "libc 0.2.146", "memoffset 0.7.1", "pin-utils", "static_assertions", @@ -3565,7 +3571,7 @@ dependencies = [ "fsevent", "fsevent-sys", "inotify", - "libc 0.2.139", + "libc 0.2.146", "mio 0.6.23", "mio-extras", "walkdir", @@ -3718,7 +3724,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi 0.1.3", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -3727,7 +3733,7 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -3804,7 +3810,7 @@ dependencies = [ "bitflags", "cfg-if 1.0.0", "foreign-types", - "libc 0.2.139", + "libc 0.2.146", "once_cell", "openssl-macros", "openssl-sys", @@ -3843,7 +3849,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d3d193fb1488ad46ffe3aaabc912cc931d02ee8518fe2959aea8ef52718b0c0" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", "openssl-src", "pkg-config", "vcpkg", @@ -3873,7 +3879,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -3916,7 +3922,7 @@ checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ "cfg-if 1.0.0", "instant", - "libc 0.2.139", + "libc 0.2.146", "redox_syscall 0.2.11", "smallvec", "winapi 0.3.9", @@ -3929,7 +3935,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.139", + "libc 0.2.146", "redox_syscall 0.2.11", "smallvec", "windows-sys 0.32.0", @@ -4006,7 +4012,7 @@ checksum = "b8f94885300e262ef461aa9fd1afbf7df3caf9e84e271a74925d1c6c8b24830f" dependencies = [ "bitflags", "byteorder", - "libc 0.2.139", + "libc 0.2.146", "mmap", "nom 4.2.3", "phf", @@ -4149,7 +4155,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d27361d7578b410d0eb5fe815c2b2105b01ab770a7c738cb9a231457a809fcc7" dependencies = [ "ipnetwork", - "libc 0.2.139", + "libc 0.2.146", "pnet_base", "pnet_sys", "winapi 0.2.8", @@ -4161,7 +4167,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82f881a6d75ac98c5541db6144682d1773bb14c6fc50c6ebac7086c8f7f23c29" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.2.8", "ws2_32-sys", ] @@ -4176,7 +4182,7 @@ dependencies = [ "cfg-if 1.0.0", "findshlibs", "inferno", - "libc 0.2.139", + "libc 0.2.146", "log", "nix 0.26.2", "once_cell", @@ -4270,7 +4276,7 @@ dependencies = [ "byteorder", "hex 0.4.2", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -4279,7 +4285,7 @@ version = "0.4.2" source = "git+https://github.com/tikv/procinfo-rs?rev=6599eb9dca74229b2c1fcc44118bef7eff127128#6599eb9dca74229b2c1fcc44118bef7eff127128" dependencies = [ "byteorder", - "libc 0.2.139", + "libc 0.2.146", "nom 2.2.1", "rustc_version 0.2.3", ] @@ -4304,7 +4310,7 @@ dependencies = [ "cfg-if 1.0.0", "fnv", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "memchr", "parking_lot 0.11.1", "protobuf", @@ -4479,21 +4485,21 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#773b89fb24276995e5fd26a7e28550796966d9cd" +source = "git+https://github.com/tikv/raft-engine.git#c9a95c82443f2f5e13ed5eac705f8c4ee5bba56c" dependencies = [ "byteorder", "crc32fast", "crossbeam", "fail", "fs2", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "hex 0.4.2", "if_chain", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "log", "lz4-sys", - "memmap2 0.6.2", + "memmap2 0.7.0", "nix 0.26.2", "num-derive", "num-traits", @@ -4513,7 +4519,7 @@ dependencies = [ [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#39f4db451295dbd8b30db4f94f220182c2c65be9" +source = "git+https://github.com/tikv/raft-engine.git#c9a95c82443f2f5e13ed5eac705f8c4ee5bba56c" dependencies = [ "clap 3.1.6", "env_logger 0.10.0", @@ -4681,7 +4687,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" dependencies = [ "fuchsia-cprng", - "libc 0.2.139", + "libc 0.2.146", "rand_core 0.3.1", "rdrand", "winapi 0.3.9", @@ -4694,7 +4700,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ "getrandom 0.1.12", - "libc 0.2.139", + "libc 0.2.146", "rand_chacha 0.2.1", "rand_core 0.5.1", "rand_hc", @@ -4706,7 +4712,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "rand_chacha 0.3.0", "rand_core 0.6.2", ] @@ -5004,7 +5010,7 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "log", "online_config", "pdqselect", @@ -5067,7 +5073,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b72b84d47e8ec5a4f2872e8262b8f8256c5be1c938a7d6d3a867a3ba8f722f74" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", "once_cell", "spin", "untrusted", @@ -5080,7 +5086,7 @@ name = "rocksdb" version = "0.3.0" source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "librocksdb_sys", ] @@ -5265,7 +5271,7 @@ dependencies = [ "bitflags", "errno", "io-lifetimes", - "libc 0.2.139", + "libc 0.2.146", "linux-raw-sys", "windows-sys 0.42.0", ] @@ -5343,7 +5349,7 @@ dependencies = [ "bitflags", "core-foundation", "core-foundation-sys", - "libc 0.2.139", + "libc 0.2.146", "security-framework-sys", ] @@ -5354,7 +5360,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3676258fd3cfe2c9a0ec99ce3038798d847ce3e4bb17746373eb9f0f1ac16339" dependencies = [ "core-foundation-sys", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -5549,7 +5555,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.139", + "libc 0.2.146", "log", "log_wrappers", "pd_client", @@ -5621,7 +5627,7 @@ version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "signal-hook-registry", ] @@ -5631,7 +5637,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -5764,7 +5770,7 @@ version = "0.1.0" source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", - "libc 0.2.139", + "libc 0.2.146", "pkg-config", ] @@ -5792,7 +5798,7 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "winapi 0.3.9", ] @@ -6010,7 +6016,7 @@ checksum = "ade661fa5e048ada64ad7901713301c21d2dbc5b65ee7967de8826c111452960" dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", - "libc 0.2.139", + "libc 0.2.146", "ntapi", "once_cell", "rayon", @@ -6093,7 +6099,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.139", + "libc 0.2.146", "rand 0.8.5", "redox_syscall 0.2.11", "remove_dir_all", @@ -6393,7 +6399,7 @@ dependencies = [ "hyper", "keys", "kvproto", - "libc 0.2.139", + "libc 0.2.146", "log_wrappers", "more-asserts", "online_config", @@ -6697,7 +6703,7 @@ dependencies = [ "keys", "kvproto", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "libloading", "log", "log_wrappers", @@ -6798,7 +6804,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.139", + "libc 0.2.146", "log", "log_wrappers", "pd_client", @@ -6834,7 +6840,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "paste", "tikv-jemalloc-sys", ] @@ -6847,7 +6853,7 @@ checksum = "aeab4310214fe0226df8bfeb893a291a58b19682e8a07e1e1d4483ad4200d315" dependencies = [ "cc", "fs_extra", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -6856,7 +6862,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "tikv-jemalloc-sys", ] @@ -6886,7 +6892,7 @@ version = "0.1.0" dependencies = [ "fxhash", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "mimalloc", "snmalloc-rs", "tcmalloc", @@ -6955,7 +6961,7 @@ dependencies = [ "http", "kvproto", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", "log", "log_wrappers", "mnt", @@ -7004,7 +7010,7 @@ version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "redox_syscall 0.1.56", "winapi 0.3.9", ] @@ -7016,7 +7022,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ "itoa 1.0.1", - "libc 0.2.139", + "libc 0.2.146", "num_threads", "serde", "time-core", @@ -7076,7 +7082,7 @@ checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" dependencies = [ "autocfg", "bytes", - "libc 0.2.139", + "libc 0.2.146", "memchr", "mio 0.8.5", "num_cpus", @@ -7468,7 +7474,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "055058552ca15c566082fc61da433ae678f78986a6f16957e33162d1b218792a" dependencies = [ "kernel32-sys", - "libc 0.2.139", + "libc 0.2.146", "winapi 0.2.8", ] @@ -7662,7 +7668,7 @@ checksum = "2a5a7e487e921cf220206864a94a89b6c6905bfc19f1057fa26a4cb360e5c1d2" dependencies = [ "either", "lazy_static", - "libc 0.2.139", + "libc 0.2.146", ] [[package]] @@ -7901,7 +7907,7 @@ version = "5.0.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" dependencies = [ - "libc 0.2.139", + "libc 0.2.146", "zstd-sys", ] @@ -7912,5 +7918,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", - "libc 0.2.139", + "libc 0.2.146", ] diff --git a/src/config/mod.rs b/src/config/mod.rs index caf194460c9..f1441e1c7ad 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1857,13 +1857,7 @@ impl Default for RaftEngineConfig { fn default() -> Self { Self { enable: true, - config: RawRaftEngineConfig { - // TODO: after update the dependency to `raft-engine` lib, revokes the - // following unelegant settings. - // Enable log recycling by default. - enable_log_recycle: true, - ..RawRaftEngineConfig::default() - }, + config: RawRaftEngineConfig::default(), } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 46e179a219c..e7208f98f4e 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -705,6 +705,7 @@ fn test_serde_custom_tikv_config() { raft_engine_config.recovery_read_block_size.0 = ReadableSize::kb(1).0; raft_engine_config.recovery_threads = 2; raft_engine_config.memory_limit = Some(RaftEngineReadableSize::gb(1)); + raft_engine_config.enable_log_recycle = false; value.storage = StorageConfig { data_dir: "/var".to_owned(), engine: EngineType::RaftKv2, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 3f6c920071f..a87d55c0ac7 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -618,7 +618,7 @@ recovery-mode = "tolerate-tail-corruption" recovery-read-block-size = "1KB" recovery-threads = 2 memory-limit = "1GB" -enable-log-recycle = true # enable by default +enable-log-recycle = false [security] ca-path = "invalid path" diff --git a/tests/integrations/config/test-default.toml b/tests/integrations/config/test-default.toml index ef3c83c00df..23e53b9daf3 100644 --- a/tests/integrations/config/test-default.toml +++ b/tests/integrations/config/test-default.toml @@ -39,7 +39,6 @@ [raftdb.defaultcf] [raft-engine] -enable-log-recycle = true # enable by default [security] From 2c5ff04e22bd8e21adf8b5ab9e86735ef6bf8845 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Mon, 19 Jun 2023 17:41:10 +0800 Subject: [PATCH 0748/1149] sst_importer: duplicate detection skip rest KV when meet delete (#14960) close tikv/tikv#14961 Signed-off-by: lance6716 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/import/duplicate_detect.rs | 53 +++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/src/import/duplicate_detect.rs b/src/import/duplicate_detect.rs index b1eaecab881..7adb3f51f90 100644 --- a/src/import/duplicate_detect.rs +++ b/src/import/duplicate_detect.rs @@ -103,29 +103,37 @@ impl DuplicateDetector { while let Some(current_write) = self.skip_lock_and_rollback(&start_key)? { let (current_key, commit_ts) = Key::split_on_ts_for(self.iter.key())?; - if current_write.write_type == WriteType::Put { - if commit_ts <= self.min_commit_ts - && !current_write - .as_ref() - .check_gc_fence_as_latest_version(self.min_commit_ts) - { + match current_write.write_type { + WriteType::Put => { + if commit_ts <= self.min_commit_ts + && !current_write + .as_ref() + .check_gc_fence_as_latest_version(self.min_commit_ts) + { + self.skip_all_version(&start_key)?; + return Ok(()); + } + + let write_value = if self.key_only { + None + } else { + Some(current_write) + }; + if write_info.is_some() { + duplicate_pairs.push(self.make_kv_pair( + &start_key, + write_info.take(), + end_commit_ts, + )?); + } + duplicate_pairs.push(self.make_kv_pair(current_key, write_value, commit_ts)?); + } + // ignore the KV that is deleted. + WriteType::Delete => { self.skip_all_version(&start_key)?; return Ok(()); } - - let write_value = if self.key_only { - None - } else { - Some(current_write) - }; - if write_info.is_some() { - duplicate_pairs.push(self.make_kv_pair( - &start_key, - write_info.take(), - end_commit_ts, - )?); - } - duplicate_pairs.push(self.make_kv_pair(current_key, write_value, commit_ts)?); + _ => {} } if commit_ts <= self.min_commit_ts { self.skip_all_version(&start_key)?; @@ -490,6 +498,11 @@ mod tests { ]; let snapshot = storage.get_snapshot(); let detector = DuplicateDetector::new(snapshot, b"0".to_vec(), None, 13, false).unwrap(); + check_duplicate_data(detector, expected_kvs.clone()); + + // in fact lightning will not set min_commit_ts + let snapshot = storage.get_snapshot(); + let detector = DuplicateDetector::new(snapshot, b"0".to_vec(), None, 0, false).unwrap(); check_duplicate_data(detector, expected_kvs); } } From 1ee541b7c8d33d8e7bf3a4b6f50bae2bdb7524bf Mon Sep 17 00:00:00 2001 From: lijie Date: Mon, 19 Jun 2023 18:02:35 +0800 Subject: [PATCH 0749/1149] chore: bump version to 7.3.0 (#14971) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df29801b930..af215639f3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6653,7 +6653,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.2.0-alpha" +version = "7.3.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 34bf6667b32..7dfc05261e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.2.0-alpha" +version = "7.3.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 9e969a58ec4898559181e84dcb58d049de2a2edd Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 21 Jun 2023 10:34:11 +0800 Subject: [PATCH 0750/1149] raftstore-v2: update some configuration for log gc (#14930) close tikv/tikv#14411 raft log gc size adjust in raftstore-v2 1. raft_log_gc_size_limit to 200MB 2. raft_log_gc_count_limit to 10000 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/config.rs | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index b362867851a..5928f6ac438 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -579,6 +579,18 @@ impl Config { self.region_compact_check_step = Some(100); } } + + // When use raft kv v2, we can set raft log gc size limit to a smaller value to + // avoid too many entry logs in cache. + // The snapshot support to increment snapshot sst, so the old snapshot files + // still be useful even if needs to sent snapshot again. + if self.raft_log_gc_size_limit.is_none() && raft_kv_v2 { + self.raft_log_gc_size_limit = Some(ReadableSize::mb(200)); + } + + if self.raft_log_gc_count_limit.is_none() && raft_kv_v2 { + self.raft_log_gc_count_limit = Some(10000); + } } pub fn validate( @@ -1428,5 +1440,20 @@ mod tests { cfg.optimize_for(false); cfg.validate(split_size, true, split_size / 20).unwrap(); assert_eq!(cfg.region_split_check_diff(), ReadableSize(1)); + + cfg = Config::new(); + cfg.optimize_for(true); + cfg.validate(split_size, true, split_size / 20).unwrap(); + assert_eq!(cfg.raft_log_gc_size_limit(), ReadableSize::mb(200)); + assert_eq!(cfg.raft_log_gc_count_limit(), 10000); + + cfg = Config::new(); + cfg.optimize_for(false); + cfg.validate(split_size, true, split_size / 20).unwrap(); + assert_eq!(cfg.raft_log_gc_size_limit(), split_size * 3 / 4); + assert_eq!( + cfg.raft_log_gc_count_limit(), + split_size * 3 / 4 / ReadableSize::kb(1) + ); } } From 00121f1bf78004a0fa74a9cbc26389ac016bb7ed Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 21 Jun 2023 15:37:12 +0800 Subject: [PATCH 0751/1149] raftstore-v2: reuse sending failed snapshot (#14957) close tikv/tikv#14740 reuse sending failed snapshot: 1. snapshot don't drop if sending failed, only drop after sending success or snapshot is stale 2. new timing task will response for clean up snapshot Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore-v2/src/batch/store.rs | 1 + components/raftstore-v2/src/fsm/peer.rs | 1 + components/raftstore-v2/src/fsm/store.rs | 6 ++ components/raftstore-v2/src/operation/misc.rs | 47 +++++++++++++- .../src/operation/ready/snapshot.rs | 54 +++++++++++++-- components/raftstore-v2/src/raft/peer.rs | 8 +++ components/raftstore-v2/src/raft/storage.rs | 2 - components/raftstore-v2/src/router/message.rs | 4 +- components/raftstore-v2/src/worker/tablet.rs | 27 +++++++- components/raftstore/src/store/snap.rs | 48 ++++++++++++++ src/server/tablet_snap.rs | 12 +--- tests/failpoints/cases/test_snap.rs | 65 ++++++++++++++++++- .../raftstore/test_v1_v2_mixed.rs | 14 ---- 13 files changed, 252 insertions(+), 37 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 8e3bee7efda..137862d742a 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -713,6 +713,7 @@ impl StoreSystem { tablet::Runner::new( tablet_registry.clone(), sst_importer.clone(), + snap_mgr.clone(), self.logger.clone(), ), ); diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index bf930f6d80a..cae442514ab 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -333,6 +333,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .fsm .peer_mut() .on_cleanup_import_sst(self.store_ctx, ssts), + PeerMsg::SnapGc(keys) => self.fsm.peer_mut().on_snap_gc(self.store_ctx, keys), PeerMsg::AskCommitMerge(req) => { self.fsm.peer_mut().on_ask_commit_merge(self.store_ctx, req) } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 486a4c4813e..6498d74b061 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -250,6 +250,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { self.store_ctx.cfg.cleanup_import_sst_interval.0, ); self.register_compact_check_tick(); + + self.schedule_tick( + StoreTick::SnapGc, + self.store_ctx.cfg.snap_mgr_gc_tick_interval.0, + ); } pub fn schedule_tick(&mut self, tick: StoreTick, timeout: Duration) { @@ -275,6 +280,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { StoreTick::PdStoreHeartbeat => self.on_pd_store_heartbeat(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst(), StoreTick::CompactCheck => self.on_compact_check_tick(), + StoreTick::SnapGc => self.on_snapshot_gc(), _ => slog_panic!( self.store_ctx.logger, "unimplemented"; diff --git a/components/raftstore-v2/src/operation/misc.rs b/components/raftstore-v2/src/operation/misc.rs index c2c3d643965..b3c5d9eb89e 100644 --- a/components/raftstore-v2/src/operation/misc.rs +++ b/components/raftstore-v2/src/operation/misc.rs @@ -5,13 +5,20 @@ use std::collections::{ HashSet, }; +use collections::HashMap; +use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, CF_DEFAULT, CF_WRITE}; +use raftstore::{store::TabletSnapKey, Result}; use slog::{debug, error, info}; use crate::{ - fsm::StoreFsmDelegate, - router::StoreTick, - worker::cleanup::{self, CompactThreshold}, + batch::StoreContext, + fsm::{Store, StoreFsmDelegate}, + router::{PeerMsg, StoreTick}, + worker::{ + cleanup::{self, CompactThreshold}, + tablet, + }, CompactTask::CheckAndCompact, }; @@ -103,4 +110,38 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { ); } } + + #[inline] + pub fn on_snapshot_gc(&mut self) { + if let Err(e) = self.fsm.store.on_snapshot_gc(self.store_ctx) { + error!(self.fsm.store.logger(), "cleanup import sst failed"; "error" => ?e); + } + self.schedule_tick( + StoreTick::SnapGc, + self.store_ctx.cfg.snap_mgr_gc_tick_interval.0, + ); + } +} + +impl Store { + #[inline] + fn on_snapshot_gc( + &mut self, + ctx: &mut StoreContext, + ) -> Result<()> { + let paths = ctx.snap_mgr.list_snapshot()?; + let mut region_keys: HashMap> = HashMap::default(); + for path in paths { + let key = TabletSnapKey::from_path(path)?; + region_keys.entry(key.region_id).or_default().push(key); + } + for (region_id, keys) in region_keys { + if let Err(TrySendError::Disconnected(msg)) = ctx.router.send(region_id, PeerMsg::SnapGc(keys.into())) + && !ctx.router.is_shutdown() { + let PeerMsg::SnapGc(keys) = msg else { unreachable!() }; + let _ = ctx.schedulers.tablet.schedule(tablet::Task::SnapGc(keys)); + } + } + Ok(()) + } } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 36774d993fc..e0625deb306 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -58,6 +58,10 @@ use crate::{ Result, StoreContext, }; +/// Snapshot generating task state. +/// snaposhot send success: Relax --> Generating --> Generated --> Sending --> +/// Relax snapshot send failed: Relax --> Generating --> Generated --> Sending +/// snapshot send again: Sending --> Relax #[derive(Debug)] pub enum SnapState { Relax, @@ -66,14 +70,16 @@ pub enum SnapState { index: Arc, }, Generated(Box), + Sending(Box), } impl PartialEq for SnapState { fn eq(&self, other: &SnapState) -> bool { match (self, other) { - (SnapState::Relax, SnapState::Relax) - | (SnapState::Generating { .. }, SnapState::Generating { .. }) => true, + (SnapState::Relax, SnapState::Relax) => true, + (SnapState::Generating { .. }, SnapState::Generating { .. }) => true, (SnapState::Generated(snap1), SnapState::Generated(snap2)) => *snap1 == *snap2, + (SnapState::Sending(snap1), SnapState::Sending(snap2)) => *snap1 == *snap2, _ => false, } } @@ -192,6 +198,25 @@ impl Peer { } } + pub fn on_snap_gc(&self, ctx: &mut StoreContext, keys: Box<[TabletSnapKey]>) { + let mut stale_keys = Vec::from(keys); + if self.is_leader() { + stale_keys.retain( + |key| match self.storage().snap_states.borrow().get(&key.to_peer) { + Some(SnapState::Relax) => true, + _ => !self.has_peer(key.to_peer), + }, + ) + } + if stale_keys.is_empty() { + return; + } + let _ = ctx + .schedulers + .tablet + .schedule(tablet::Task::SnapGc(stale_keys.into())); + } + pub fn on_snapshot_generated(&mut self, snapshot: GenSnapRes) { if self.storage_mut().on_snapshot_generated(snapshot) { self.raft_group_mut().ping(); @@ -219,6 +244,7 @@ impl Peer { "to" => ?to_peer, "status" => ?status, ); + self.storage().report_snapshot(to_peer_id, status); self.raft_group_mut().report_snapshot(to_peer_id, status); } @@ -373,10 +399,23 @@ impl Storage { false } + fn report_snapshot(&self, peer_id: u64, status: raft::SnapshotStatus) { + if status == raft::SnapshotStatus::Finish { + self.snap_states.borrow_mut().remove(&peer_id); + } + } + /// Gets a snapshot. Returns `SnapshotTemporarilyUnavailable` if there is no /// unavailable snapshot. pub fn snapshot(&self, request_index: u64, to: u64) -> raft::Result { if let Some(state) = self.snap_states.borrow_mut().get_mut(&to) { + info!( + self.logger(), + "requesting snapshot"; + "request_index" => request_index, + "request_peer" => to, + "state" => ?state, + ); match state { SnapState::Generating { ref canceled, .. } => { if canceled.load(Ordering::SeqCst) { @@ -389,10 +428,17 @@ impl Storage { } SnapState::Generated(ref s) => { let snap = *s.clone(); - *state = SnapState::Relax; if self.validate_snap(&snap, request_index) { + *state = SnapState::Sending(s.clone()); return Ok(snap); } + *state = SnapState::Relax; + } + SnapState::Sending(ref s) => { + if self.validate_snap(s, request_index) { + return Ok(*s.clone()); + } + *state = SnapState::Relax; } _ => {} }; @@ -410,7 +456,7 @@ impl Storage { } else { info!( self.logger(), - "requesting snapshot"; + "requesting new snapshot"; "request_index" => request_index, "request_peer" => to, ); diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 22565fb3b41..cb5ea9a8580 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -575,6 +575,14 @@ impl Peer { self.peer_heartbeats.remove(&peer_id); } + #[inline] + pub fn has_peer(&self, peer_id: u64) -> bool { + self.region() + .get_peers() + .iter() + .any(|p| p.get_id() == peer_id) + } + /// Returns whether or not the peer sent heartbeat after the provided /// deadline time. #[inline] diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 747fd035fd8..33272ae28bd 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -551,8 +551,6 @@ mod tests { s.snapshot(0, to_peer_id).unwrap(); // Test cancel snapshot - let snap = s.snapshot(0, 7); - assert_eq!(snap.unwrap_err(), unavailable); let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); let _res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 484d383108d..65026af9a1d 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -12,7 +12,7 @@ use kvproto::{ }; use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, - FetchedLogs, GenSnapRes, + FetchedLogs, GenSnapRes, TabletSnapKey, }; use resource_control::ResourceMetered; use tikv_util::time::Instant; @@ -241,6 +241,8 @@ pub enum PeerMsg { /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), + /// A message that used to check if a snapshot gc is happened. + SnapGc(Box<[TabletSnapKey]>), } impl ResourceMetered for PeerMsg {} diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 629eaf030e6..f786315a008 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -11,6 +11,7 @@ use collections::HashMap; use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, DATA_CFS}; use fail::fail_point; use kvproto::{import_sstpb::SstMeta, metapb::Region}; +use raftstore::store::{TabletSnapKey, TabletSnapManager}; use slog::{debug, error, info, warn, Logger}; use sst_importer::SstImporter; use tikv_util::{ @@ -41,7 +42,9 @@ pub enum Task { persisted_index: u64, }, /// Sometimes we know for sure a tablet can be destroyed directly. - DirectDestroy { tablet: Either }, + DirectDestroy { + tablet: Either, + }, /// Cleanup ssts. CleanupImportSst(Box<[SstMeta]>), /// Flush memtable before split @@ -52,6 +55,9 @@ pub enum Task { region_id: u64, cb: Option>, }, + + // Gc snapshot + SnapGc(Box<[TabletSnapKey]>), } impl Display for Task { @@ -99,6 +105,9 @@ impl Display for Task { on_flush_finish.is_some() ) } + Task::SnapGc(snap_keys) => { + write!(f, "gc snapshot {:?}", snap_keys) + } } } } @@ -175,6 +184,7 @@ impl Task { pub struct Runner { tablet_registry: TabletRegistry, sst_importer: Arc, + snap_mgr: TabletSnapManager, logger: Logger, // region_id -> [(tablet_path, wait_for_persisted, callback)]. @@ -190,11 +200,13 @@ impl Runner { pub fn new( tablet_registry: TabletRegistry, sst_importer: Arc, + snap_mgr: TabletSnapManager, logger: Logger, ) -> Self { Self { tablet_registry, sst_importer, + snap_mgr, logger, waiting_destroy_tasks: HashMap::default(), pending_destroy_tasks: Vec::new(), @@ -356,6 +368,14 @@ impl Runner { } } + fn snap_gc(&self, keys: Box<[TabletSnapKey]>) { + for key in Vec::from(keys) { + if !self.snap_mgr.delete_snapshot(&key) { + warn!(self.logger, "failed to gc snap"; "key" => ?key); + } + } + } + fn flush_tablet(&self, region_id: u64, cb: Option>) { let Some(Some(tablet)) = self .tablet_registry @@ -424,6 +444,7 @@ where Task::DirectDestroy { tablet, .. } => self.direct_destroy(tablet), Task::CleanupImportSst(ssts) => self.cleanup_ssts(ssts), Task::Flush { region_id, cb } => self.flush_tablet(region_id, cb), + Task::SnapGc(keys) => self.snap_gc(keys), } } } @@ -472,7 +493,9 @@ mod tests { let registry = TabletRegistry::new(factory, dir.path()).unwrap(); let logger = slog_global::borrow_global().new(slog::o!()); let (_dir, importer) = create_tmp_importer(); - let mut runner = Runner::new(registry.clone(), importer, logger); + let snap_dir = dir.path().join("snap"); + let snap_mgr = TabletSnapManager::new(snap_dir, None).unwrap(); + let mut runner = Runner::new(registry.clone(), importer, snap_mgr, logger); let mut region = Region::default(); let rid = 1; diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index f35041da856..e3b1c594ea5 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2054,6 +2054,22 @@ impl TabletSnapKey { let term = snap.get_metadata().get_term(); TabletSnapKey::new(region_id, to_peer, term, index) } + + pub fn from_path>(path: T) -> Result { + let path = path.into(); + let name = path.file_name().unwrap().to_str().unwrap(); + let numbers: Vec = name + .split('_') + .skip(1) + .filter_map(|s| s.parse().ok()) + .collect(); + if numbers.len() < 4 { + return Err(box_err!("invalid tablet snapshot file name:{}", name)); + } + Ok(TabletSnapKey::new( + numbers[0], numbers[1], numbers[2], numbers[3], + )) + } } impl Display for TabletSnapKey { @@ -2174,6 +2190,7 @@ impl TabletSnapManager { pub fn delete_snapshot(&self, key: &TabletSnapKey) -> bool { let path = self.tablet_gen_path(key); + debug!("delete tablet snapshot file";"path" => %path.display()); if path.exists() { if let Err(e) = encryption::trash_dir_all(&path, self.key_manager.as_deref()) { error!( @@ -2187,6 +2204,26 @@ impl TabletSnapManager { true } + pub fn list_snapshot(&self) -> Result> { + let mut paths = Vec::new(); + for entry in file_system::read_dir(&self.base)? { + let entry = match entry { + Ok(e) => e, + Err(e) if e.kind() == ErrorKind::NotFound => continue, + Err(e) => return Err(Error::from(e)), + }; + + let path = entry.path(); + if path.file_name().and_then(|n| n.to_str()).map_or(true, |n| { + !n.starts_with(SNAP_GEN_PREFIX) || n.ends_with(TMP_FILE_SUFFIX) + }) { + continue; + } + paths.push(path); + } + Ok(paths) + } + pub fn total_snap_size(&self) -> Result { let mut total_size = 0; for entry in file_system::read_dir(&self.base)? { @@ -3363,4 +3400,15 @@ pub mod tests { snap_mgr.init().unwrap(); assert!(path.exists()); } + + #[test] + fn test_from_path() { + let snap_dir = Builder::new().prefix("test_from_path").tempdir().unwrap(); + let path = snap_dir.path().join("gen_1_2_3_4"); + let key = TabletSnapKey::from_path(path).unwrap(); + let expect_key = TabletSnapKey::new(1, 2, 3, 4); + assert_eq!(expect_key, key); + let path = snap_dir.path().join("gen_1_2_3_4.tmp"); + TabletSnapKey::from_path(path).unwrap_err(); + } } diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index a1d2a12bc91..e9fa3c42af8 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -733,7 +733,6 @@ pub async fn send_snap( let (snap_mgr, key) = (snap_mgr.clone(), key.clone()); DeferContext::new(move || { snap_mgr.finish_snapshot(key.clone(), timer); - snap_mgr.delete_snapshot(&key); }) }; let (sink, mut receiver) = client.tablet_snapshot()?; @@ -914,14 +913,8 @@ where let region_id = msg.get_region_id(); let sending_count = self.snap_mgr.sending_count().clone(); if sending_count.load(Ordering::SeqCst) >= self.cfg.concurrent_send_snap_limit { - let key = TabletSnapKey::from_region_snap( - msg.get_region_id(), - msg.get_to_peer().get_id(), - msg.get_message().get_snapshot(), - ); - self.snap_mgr.delete_snapshot(&key); warn!( - "too many sending snapshot tasks, drop Send Snap[to: {}, snap: {:?}]", + "Too many sending snapshot tasks, drop Send Snap[to: {}, snap: {:?}]", addr, msg ); cb(Err(Error::Other("Too many sending snapshot tasks".into()))); @@ -950,12 +943,13 @@ where self.pool.spawn(async move { let res = send_snap( client, - snap_mgr, + snap_mgr.clone(), msg, limiter, ).await; match res { Ok(stat) => { + snap_mgr.delete_snapshot(&stat.key); info!( "sent snapshot"; "region_id" => region_id, diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index faf0e6b2476..7748b1d2985 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -385,8 +385,8 @@ fn test_shutdown_when_snap_gc() { } // Test if a peer handle the old snapshot properly. -#[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_receive_old_snapshot() { let mut cluster = new_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); @@ -931,3 +931,64 @@ fn test_snapshot_complete_recover_raft_tick() { cluster.must_put(b"k0500", b"val"); assert_eq!(cluster.must_get(b"k0500").unwrap(), b"val".to_vec()); } + +#[test] +fn test_snapshot_send_failed() { + let mut cluster = test_raftstore_v2::new_server_cluster(1, 2); + configure_for_snapshot(&mut cluster.cfg); + cluster.cfg.raft_store.snap_gc_timeout = ReadableDuration::millis(300); + cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + let pd_client = cluster.pd_client.clone(); + // Disable default max peer number check. + pd_client.disable_default_operator(); + let r1 = cluster.run_conf_change(); + cluster.must_put(b"zk1", b"v1"); + let (send_tx, send_rx) = mpsc::sync_channel(1); + // only send one MessageType::MsgSnapshot message + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(r1, 1) + .allow(1) + .direction(Direction::Send) + .msg_type(MessageType::MsgSnapshot) + .set_msg_callback(Arc::new(move |m: &RaftMessage| { + if m.get_message().get_msg_type() == MessageType::MsgSnapshot { + let _ = send_tx.try_send(()); + } + })), + )); + // peer2 will interrupt in receiving snapshot + fail::cfg("receiving_snapshot_net_error", "return()").unwrap(); + pd_client.must_add_peer(r1, new_learner_peer(2, 2)); + + // ready to send notify. + send_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + // need to wait receiver handle the snapshot request + sleep_ms(100); + + // peer2 can't receive any snapshot, so it doesn't have any key valuse. + // but the receiving_count should be zero if receiving snapshot is failed. + let engine2 = cluster.get_engine(2); + must_get_none(&engine2, b"zk1"); + assert_eq!(cluster.get_snap_mgr(2).stats().receiving_count, 0); + let mgr = cluster.get_snap_mgr(1); + assert!(!mgr.list_snapshot().unwrap().is_empty()); + + // clear fail point and wait snapshot finish. + fail::remove("receiving_snapshot_net_error"); + cluster.clear_send_filters(); + let (sender, receiver) = mpsc::channel(); + let sync_sender = Mutex::new(sender); + fail::cfg_callback("receiving_snapshot_net_error", move || { + let sender = sync_sender.lock().unwrap(); + sender.send(true).unwrap(); + }) + .unwrap(); + receiver.recv_timeout(Duration::from_secs(3)).unwrap(); + must_get_equal(&engine2, b"zk1", b"v1"); + + // remove peer and check snapshot should be deleted. + pd_client.must_remove_peer(r1, new_peer(2, 2)); + sleep_ms(100); + assert!(mgr.list_snapshot().unwrap().is_empty()); +} diff --git a/tests/integrations/raftstore/test_v1_v2_mixed.rs b/tests/integrations/raftstore/test_v1_v2_mixed.rs index 27e2173e04d..9ceb7938aaf 100644 --- a/tests/integrations/raftstore/test_v1_v2_mixed.rs +++ b/tests/integrations/raftstore/test_v1_v2_mixed.rs @@ -112,16 +112,13 @@ fn test_v1_receive_snap_from_v2() { let test_receive_snap = |key_num| { let mut cluster_v1 = test_raftstore::new_server_cluster(1, 1); let mut cluster_v2 = test_raftstore_v2::new_server_cluster(1, 1); - let mut cluster_v1_tikv = test_raftstore::new_server_cluster(1, 1); cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; cluster_v1.run(); cluster_v2.run(); - cluster_v1_tikv.run(); let s1_addr = cluster_v1.get_addr(1); - let s2_addr = cluster_v1_tikv.get_addr(1); let region = cluster_v2.get_region(b""); let region_id = region.get_id(); let engine = cluster_v2.get_engine(1); @@ -144,17 +141,6 @@ fn test_v1_receive_snap_from_v2() { .await .unwrap() }); - let send_result = block_on(async { - let client = - TikvClient::new(security_mgr.connect(ChannelBuilder::new(env.clone()), &s2_addr)); - send_snap_v2(client, snap_mgr, msg, limit).await - }); - // snapshot should be rejected by cluster v1 tikv, and the snapshot should be - // deleted. - assert!(send_result.is_err()); - let dir = cluster_v2.get_snap_dir(1); - let read_dir = std::fs::read_dir(dir).unwrap(); - assert_eq!(0, read_dir.count()); // The snapshot has been received by cluster v1, so check it's completeness let snap_mgr = cluster_v1.get_snap_mgr(1); From 5676de6f059d59e8ca2810ecfdce925a57588989 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 21 Jun 2023 18:20:42 +0800 Subject: [PATCH 0752/1149] tikv-ctl: implement drop unapplied log for v2 (#14920) ref tikv/tikv#14654 implement drop unapplied log for v2 Signed-off-by: Spade A --- cmd/tikv-ctl/src/executor.rs | 9 +- src/server/debug2.rs | 205 +++++++++++++++++++++++- tests/integrations/server/kv_service.rs | 1 + 3 files changed, 209 insertions(+), 6 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 8583b2f8edb..c26347f52e4 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1347,6 +1347,13 @@ impl DebugExecutor for DebuggerImplV2 { println!("all regions are healthy") } + fn drop_unapplied_raftlog(&self, region_ids: Option>) { + println!("removing unapplied raftlog on region {:?} ...", region_ids); + self.drop_unapplied_raftlog(region_ids) + .unwrap_or_else(|e| perror_and_exit("Debugger::remove_fail_stores", e)); + println!("success"); + } + fn remove_fail_stores( &self, _store_ids: Vec, @@ -1356,8 +1363,6 @@ impl DebugExecutor for DebuggerImplV2 { unimplemented!() } - fn drop_unapplied_raftlog(&self, _region_ids: Option>) {} - fn recreate_region(&self, _mgr: Arc, _pd_cfg: &PdConfig, _region_id: u64) { unimplemented!() } diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 8b31a857916..66fc39596c5 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -7,7 +7,7 @@ use engine_rocks::{ }; use engine_traits::{ CachedTablet, Iterable, MiscExt, Peekable, RaftEngine, RaftLogBatch, TabletContext, - TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, + TabletRegistry, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use futures::future::Future; use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; @@ -15,7 +15,7 @@ use kvproto::{ debugpb::Db as DbType, kvrpcpb::MvccInfo, metapb, - raft_serverpb::{PeerState, RegionLocalState, StoreIdent}, + raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent}, }; use nom::AsBytes; use raft::{prelude::Entry, RawNode}; @@ -472,6 +472,71 @@ impl DebuggerImplV2 { } Ok(errors) } + + pub fn drop_unapplied_raftlog(&self, region_ids: Option>) -> Result<()> { + let raft_engine = &self.raft_engine; + let region_ids = region_ids.unwrap_or(self.get_all_regions_in_store()?); + for region_id in region_ids { + let region_state = self.region_info(region_id)?; + // It's safe to unwrap region_local_state here, because + // get_all_regions_in_store() guarantees that the region state + // exists in kvdb. + if region_state.region_local_state.unwrap().state == PeerState::Tombstone { + continue; + } + + let old_raft_local_state = region_state.raft_local_state.ok_or_else(|| { + Error::Other(format!("No RaftLocalState found for region {}", region_id).into()) + })?; + let old_raft_apply_state = region_state.raft_apply_state.ok_or_else(|| { + Error::Other(format!("No RaftApplyState found for region {}", region_id).into()) + })?; + + let applied_index = old_raft_apply_state.applied_index; + let commit_index = old_raft_apply_state.commit_index; + let last_index = old_raft_local_state.last_index; + + if last_index == applied_index && commit_index == applied_index { + continue; + } + + let new_raft_local_state = RaftLocalState { + last_index: applied_index, + ..old_raft_local_state.clone() + }; + let new_raft_apply_state = RaftApplyState { + commit_index: applied_index, + ..old_raft_apply_state.clone() + }; + + info!( + "dropping unapplied raft log"; + "region_id" => region_id, + "old_raft_local_state" => ?old_raft_local_state, + "new_raft_local_state" => ?new_raft_local_state, + "old_raft_apply_state" => ?old_raft_apply_state, + "new_raft_apply_state" => ?new_raft_apply_state, + ); + + // flush the changes + let mut lb = raft_engine.log_batch(10); + box_try!(lb.put_apply_state(region_id, applied_index, &new_raft_apply_state)); + box_try!(lb.put_raft_state(region_id, &new_raft_local_state)); + box_try!(raft_engine.gc(region_id, applied_index + 1, last_index + 1, &mut lb)); + box_try!(raft_engine.consume(&mut lb, true)); + + info!( + "dropped unapplied raft log"; + "region_id" => region_id, + "old_raft_local_state" => ?old_raft_local_state, + "new_raft_local_state" => ?new_raft_local_state, + "old_raft_apply_state" => ?old_raft_apply_state, + "new_raft_apply_state" => ?new_raft_apply_state, + ); + } + + Ok(()) + } } fn set_region_tombstone( @@ -560,9 +625,28 @@ impl Debugger for DebuggerImplV2 { } fn region_info(&self, region_id: u64) -> Result { + let persisted_applied = box_try!(self.raft_engine.get_flushed_index(region_id, CF_RAFT)) + .ok_or_else(|| Error::NotFound(format!("info for region {}", region_id)))?; let raft_state = box_try!(self.raft_engine.get_raft_state(region_id)); - let apply_state = box_try!(self.raft_engine.get_apply_state(region_id, u64::MAX)); - let region_state = box_try!(self.raft_engine.get_region_state(region_id, u64::MAX)); + + // We used persisted_applied to acquire the apply state. It may not be the + // lastest apply state but it's the real persisted one which means the tikv will + // acquire this one during start. + let apply_state = box_try!( + self.raft_engine + .get_apply_state(region_id, persisted_applied) + ) + .map(|mut apply_state| { + // the persisted_applied is the raft log replay start point, so it's the real + // persisted applied_index + apply_state.applied_index = persisted_applied; + apply_state + }); + + let region_state = box_try!( + self.raft_engine + .get_region_state(region_id, persisted_applied) + ); match (raft_state, apply_state, region_state) { (None, None, None) => Err(Error::NotFound(format!("info for region {}", region_id))), @@ -1184,6 +1268,32 @@ mod tests { region } + fn init_raft_state( + raft_engine: &ER, + region_id: u64, + last_index: u64, + commit_index: u64, + applied_index: u64, + admin_flush: Option, + ) { + let mut lb = raft_engine.log_batch(10); + let mut apply_state = RaftApplyState::default(); + apply_state.set_applied_index(applied_index); + apply_state.set_commit_index(commit_index); + lb.put_apply_state(region_id, applied_index, &apply_state) + .unwrap(); + + let mut raft_state = RaftLocalState::default(); + raft_state.set_last_index(last_index); + lb.put_raft_state(region_id, &raft_state).unwrap(); + + if let Some(admin_flush) = admin_flush { + lb.put_flushed_index(region_id, CF_RAFT, 5, admin_flush) + .unwrap(); + } + raft_engine.consume(&mut lb, true).unwrap(); + } + #[test] fn test_get() { let dir = test_util::temp_dir("test-debugger", false); @@ -1275,6 +1385,13 @@ mod tests { region_state.set_state(PeerState::Tombstone); RaftLogBatch::put_region_state(&mut wb, region_id, 42, ®ion_state).unwrap(); + wb.put_flushed_index(region_id, CF_RAFT, 5, 42).unwrap(); + + // This will not be read + let mut apply_state2 = RaftApplyState::default(); + apply_state2.set_applied_index(100); + RaftLogBatch::put_apply_state(&mut wb, region_id, 100, &apply_state2).unwrap(); + raft_engine.consume(&mut wb, true).unwrap(); assert_eq!( @@ -1898,4 +2015,84 @@ mod tests { expected_state ); } + + #[test] + fn test_drop_unapplied_raftlog() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + + init_region_state(raft_engine, 1, &[100, 101], 1); + init_region_state(raft_engine, 2, &[100, 103], 1); + init_raft_state(raft_engine, 1, 100, 90, 80, Some(80)); + init_raft_state(raft_engine, 2, 80, 80, 80, Some(80)); + + let region_info_2_before = debugger.region_info(2).unwrap(); + + // Drop raftlog on all regions + debugger.drop_unapplied_raftlog(None).unwrap(); + let region_info_1 = debugger.region_info(1).unwrap(); + let region_info_2 = debugger.region_info(2).unwrap(); + + assert_eq!( + region_info_1.raft_local_state.as_ref().unwrap().last_index, + 80 + ); + assert_eq!( + region_info_1 + .raft_apply_state + .as_ref() + .unwrap() + .applied_index, + 80 + ); + assert_eq!( + region_info_1 + .raft_apply_state + .as_ref() + .unwrap() + .commit_index, + 80 + ); + assert_eq!(region_info_2, region_info_2_before); + } + + #[test] + // It tests that the latest apply state cannot be read as it is invisible + // on persisted_applied + fn test_drop_unapplied_raftlog_2() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + + init_region_state(raft_engine, 1, &[100, 101], 1); + init_raft_state(raft_engine, 1, 100, 90, 80, Some(80)); + // It will not be read due to less persisted_applied + init_raft_state(raft_engine, 1, 200, 190, 180, None); + + // Drop raftlog on all regions + debugger.drop_unapplied_raftlog(None).unwrap(); + let region_info_1 = debugger.region_info(1).unwrap(); + + assert_eq!( + region_info_1.raft_local_state.as_ref().unwrap().last_index, + 80 + ); + assert_eq!( + region_info_1 + .raft_apply_state + .as_ref() + .unwrap() + .applied_index, + 80 + ); + assert_eq!( + region_info_1 + .raft_apply_state + .as_ref() + .unwrap() + .commit_index, + 80 + ); + } } diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 30c28654fb8..f837500f981 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1151,6 +1151,7 @@ fn test_debug_region_info_v2() { region_state.set_state(raft_serverpb::PeerState::Tombstone); lb.put_region_state(region_id, 42, ®ion_state).unwrap(); + lb.put_flushed_index(region_id, CF_RAFT, 5, 42).unwrap(); raft_engine.consume(&mut lb, false).unwrap(); assert_eq!( raft_engine.get_raft_state(region_id).unwrap().unwrap(), From db306df2432056a7fe4e9b3fa0325ed2429c7705 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 21 Jun 2023 19:31:11 +0800 Subject: [PATCH 0753/1149] raftstore-v2: async delete range (#14952) ref tikv/tikv#14898 raftstore-v2: async delete range After drop some tables, delete range may take minutes to clean up data, This PR moves delete range execution from apply threads to tablet thread, so it does not blocks other regions and downgrades performance. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/apply.rs | 7 +- .../src/operation/command/admin/split.rs | 30 +-- .../raftstore-v2/src/operation/command/mod.rs | 34 ++- .../src/operation/command/write/mod.rs | 224 ++++++++++++++---- components/raftstore-v2/src/operation/mod.rs | 80 ++++++- .../src/operation/query/capture.rs | 63 +---- components/raftstore-v2/src/raft/apply.rs | 9 + components/raftstore-v2/src/raft/storage.rs | 6 +- components/raftstore-v2/src/worker/tablet.rs | 93 +++++++- 9 files changed, 405 insertions(+), 141 deletions(-) diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index ff1c5414de3..2c3df759680 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -28,6 +28,7 @@ use crate::{ raft::Apply, router::{ApplyRes, ApplyTask, PeerMsg}, worker::checkpoint, + TabletTask, }; /// A trait for reporting apply result. @@ -79,6 +80,7 @@ impl ApplyFsm { tablet_registry: TabletRegistry, read_scheduler: Scheduler>, checkpoint_scheduler: Scheduler>, + tablet_scheduler: Scheduler>, flush_state: Arc, sst_apply_state: SstApplyState, log_recovery: Option>, @@ -104,6 +106,7 @@ impl ApplyFsm { sst_importer, coprocessor_host, checkpoint_scheduler, + tablet_scheduler, logger, ); ( @@ -142,7 +145,9 @@ impl ApplyFsm { // TODO: flush by buffer size. ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, ApplyTask::Snapshot(snap_task) => self.apply.schedule_gen_snapshot(snap_task), - ApplyTask::UnsafeWrite(raw_write) => self.apply.apply_unsafe_write(raw_write), + ApplyTask::UnsafeWrite(raw_write) => { + self.apply.apply_unsafe_write(raw_write).await + } ApplyTask::ManualFlush => self.apply.on_manual_flush().await, ApplyTask::RefreshBucketStat(bucket_meta) => { self.apply.on_refresh_buckets(bucket_meta) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 395a2cfd2be..180bb628dbb 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -905,10 +905,7 @@ impl Peer { #[cfg(test)] mod test { - use std::sync::{ - mpsc::{channel, Receiver, Sender}, - Arc, - }; + use std::sync::Arc; use engine_test::{ ctor::{CfOptions, DbOptions}, @@ -938,31 +935,10 @@ mod test { use super::*; use crate::{ - fsm::ApplyResReporter, - operation::{test_util::create_tmp_importer, CatchUpLogs}, + operation::test_util::{create_tmp_importer, MockReporter}, raft::Apply, - router::ApplyRes, }; - struct MockReporter { - sender: Sender, - } - - impl MockReporter { - fn new() -> (Self, Receiver) { - let (tx, rx) = channel(); - (MockReporter { sender: tx }, rx) - } - } - - impl ApplyResReporter for MockReporter { - fn report(&self, apply_res: ApplyRes) { - let _ = self.sender.send(apply_res); - } - - fn redirect_catch_up_logs(&self, _c: CatchUpLogs) {} - } - fn new_split_req(key: &[u8], id: u64, children: Vec) -> SplitRequest { let mut req = SplitRequest::default(); req.set_split_key(key.to_vec()); @@ -1086,6 +1062,7 @@ mod test { ); defer!(checkpoint_worker.stop()); + let (tablet_scheduler, _) = dummy_scheduler(); let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); let (_tmp_dir, importer) = create_tmp_importer(); @@ -1110,6 +1087,7 @@ mod test { importer, host, checkpoint_scheduler, + tablet_scheduler, logger.clone(), ); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 1cae8075863..f50efa94745 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -146,6 +146,7 @@ impl Peer { store_ctx.tablet_registry.clone(), read_scheduler, store_ctx.schedulers.checkpoint.clone(), + store_ctx.schedulers.tablet.clone(), self.flush_state().clone(), sst_apply_state, self.storage().apply_trace().log_recovery(), @@ -491,7 +492,7 @@ impl Apply { } impl Apply { - pub fn apply_unsafe_write(&mut self, data: Box<[u8]>) { + pub async fn apply_unsafe_write(&mut self, data: Box<[u8]>) { let decoder = match SimpleWriteReqDecoder::new( |buf, index, term| parse_at(&self.logger, buf, index, term), &self.logger, @@ -511,14 +512,17 @@ impl Apply { let _ = self.apply_delete(delete.cf, u64::MAX, delete.key); } SimpleWrite::DeleteRange(dr) => { - let _ = self.apply_delete_range( - dr.cf, - u64::MAX, - dr.start_key, - dr.end_key, - dr.notify_only, - self.use_delete_range(), - ); + let use_delete_range = self.use_delete_range(); + let _ = self + .apply_delete_range( + dr.cf, + u64::MAX, + dr.start_key, + dr.end_key, + dr.notify_only, + use_delete_range, + ) + .await; } SimpleWrite::Ingest(_) => { error!( @@ -633,14 +637,16 @@ impl Apply { self.apply_delete(delete.cf, log_index, delete.key)?; } SimpleWrite::DeleteRange(dr) => { + let use_delete_range = self.use_delete_range(); self.apply_delete_range( dr.cf, log_index, dr.start_key, dr.end_key, dr.notify_only, - self.use_delete_range(), - )?; + use_delete_range, + ) + .await?; } SimpleWrite::Ingest(ssts) => { self.apply_ingest(log_index, ssts)?; @@ -731,6 +737,7 @@ impl Apply { self.apply_delete(delete.get_cf(), log_index, delete.get_key())?; } CmdType::DeleteRange => { + let use_delete_range = self.use_delete_range(); let dr = r.get_delete_range(); self.apply_delete_range( dr.get_cf(), @@ -738,8 +745,9 @@ impl Apply { dr.get_start_key(), dr.get_end_key(), dr.get_notify_only(), - self.use_delete_range(), - )?; + use_delete_range, + ) + .await?; } _ => slog_panic!( self.logger, diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 56b5fc4b0d6..c79b7880e43 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -1,10 +1,10 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - data_cf_offset, DeleteStrategy, KvEngine, Mutable, RaftEngine, Range as EngineRange, ALL_CFS, - CF_DEFAULT, + data_cf_offset, name_to_cf, KvEngine, Mutable, RaftEngine, ALL_CFS, CF_DEFAULT, }; use fail::fail_point; +use futures::channel::oneshot; use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ @@ -16,8 +16,8 @@ use raftstore::{ }, Error, Result, }; -use slog::info; -use tikv_util::{box_err, slog_panic}; +use slog::{error, info}; +use tikv_util::{box_err, slog_panic, time::Instant}; use crate::{ batch::StoreContext, @@ -25,6 +25,7 @@ use crate::{ operation::SimpleWriteReqEncoder, raft::{Apply, Peer}, router::{ApplyTask, CmdResChannel}, + TabletTask, }; mod ingest; @@ -227,7 +228,7 @@ impl Apply { } #[inline] - pub fn apply_delete_range( + pub async fn apply_delete_range( &mut self, mut cf: &str, index: u64, @@ -269,54 +270,195 @@ impl Apply { let start_key = keys::data_key(start_key); - info!( - self.logger, - "execute delete range"; - "range_start" => log_wrappers::Value::key(&start_key), - "range_end" => log_wrappers::Value::key(&end_key), - "notify_only" => notify_only, - "use_delete_range" => use_delete_range, - ); - + let start = Instant::now_coarse(); // Use delete_files_in_range to drop as many sst files as possible, this // is a way to reclaim disk space quickly after drop a table/index. if !notify_only { - let range = vec![EngineRange::new(&start_key, &end_key)]; - let fail_f = |e: engine_traits::Error, strategy: DeleteStrategy| { - slog_panic!( - self.logger, - "failed to delete"; - "strategy" => ?strategy, + let (notify, wait) = oneshot::channel(); + let delete_range = TabletTask::delete_range( + self.region_id(), + self.tablet().clone(), + name_to_cf(cf).unwrap(), + start_key.clone().into(), + end_key.clone().into(), + use_delete_range, + Box::new(move || { + notify.send(()).unwrap(); + }), + ); + if let Err(e) = self.tablet_scheduler().schedule_force(delete_range) { + error!(self.logger, "fail to delete range"; "range_start" => log_wrappers::Value::key(&start_key), "range_end" => log_wrappers::Value::key(&end_key), + "notify_only" => notify_only, "error" => ?e, - ) - }; - let tablet = self.tablet(); - tablet - .delete_ranges_cf(cf, DeleteStrategy::DeleteFiles, &range) - .unwrap_or_else(|e| fail_f(e, DeleteStrategy::DeleteFiles)); + ); + } - let strategy = if use_delete_range { - DeleteStrategy::DeleteByRange - } else { - DeleteStrategy::DeleteByKey - }; - // Delete all remaining keys. - tablet - .delete_ranges_cf(cf, strategy.clone(), &range) - .unwrap_or_else(move |e| fail_f(e, strategy)); - - // to do: support titan? - // tablet - // .delete_ranges_cf(cf, DeleteStrategy::DeleteBlobs, &range) - // .unwrap_or_else(move |e| fail_f(e, - // DeleteStrategy::DeleteBlobs)); + let _ = wait.await; } + info!( + self.logger, + "execute delete range"; + "range_start" => log_wrappers::Value::key(&start_key), + "range_end" => log_wrappers::Value::key(&end_key), + "notify_only" => notify_only, + "use_delete_range" => use_delete_range, + "duration" => ?start.saturating_elapsed(), + ); + // delete range is an unsafe operation and it cannot be rollbacked to replay, so // we don't update modification index for this operation. Ok(()) } } + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use engine_test::{ + ctor::{CfOptions, DbOptions}, + kv::{KvTestEngine, TestTabletFactory}, + }; + use engine_traits::{ + FlushState, Peekable, SstApplyState, TabletContext, TabletRegistry, CF_DEFAULT, DATA_CFS, + }; + use futures::executor::block_on; + use kvproto::{ + metapb::Region, + raft_serverpb::{PeerState, RegionLocalState}, + }; + use raftstore::{ + coprocessor::CoprocessorHost, + store::{Config, TabletSnapManager}, + }; + use slog::o; + use tempfile::TempDir; + use tikv_util::{ + store::new_peer, + worker::{dummy_scheduler, Worker}, + }; + + use crate::{ + operation::{ + test_util::{create_tmp_importer, new_delete_range_entry, new_put_entry, MockReporter}, + CommittedEntries, + }, + raft::Apply, + worker::tablet, + }; + + #[test] + fn test_delete_range() { + let store_id = 2; + + let mut region = Region::default(); + region.set_id(1); + region.set_end_key(b"k20".to_vec()); + region.mut_region_epoch().set_version(3); + let peers = vec![new_peer(2, 3)]; + region.set_peers(peers.into()); + + let logger = slog_global::borrow_global().new(o!()); + let path = TempDir::new().unwrap(); + let cf_opts = DATA_CFS + .iter() + .copied() + .map(|cf| (cf, CfOptions::default())) + .collect(); + let factory = Box::new(TestTabletFactory::new(DbOptions::default(), cf_opts)); + let reg = TabletRegistry::new(factory, path.path()).unwrap(); + let ctx = TabletContext::new(®ion, Some(5)); + reg.load(ctx, true).unwrap(); + let tablet = reg.get(region.get_id()).unwrap().latest().unwrap().clone(); + + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Normal); + region_state.set_region(region.clone()); + region_state.set_tablet_index(5); + + let (read_scheduler, _rx) = dummy_scheduler(); + let (reporter, _) = MockReporter::new(); + let (tmp_dir, importer) = create_tmp_importer(); + let host = CoprocessorHost::::default(); + + let snap_mgr = TabletSnapManager::new(tmp_dir.path(), None).unwrap(); + let tablet_worker = Worker::new("tablet-worker"); + let checkpoint_scheduler = tablet_worker.start( + "tablet-worker", + tablet::Runner::new(reg.clone(), importer.clone(), snap_mgr, logger.clone()), + ); + tikv_util::defer!(tablet_worker.stop()); + + let (dummy_scheduler, _) = dummy_scheduler(); + let mut apply = Apply::new( + &Config::default(), + region + .get_peers() + .iter() + .find(|p| p.store_id == store_id) + .unwrap() + .clone(), + region_state, + reporter, + reg, + read_scheduler, + Arc::new(FlushState::new(5)), + SstApplyState::default(), + None, + 5, + None, + importer, + host, + dummy_scheduler, + checkpoint_scheduler, + logger.clone(), + ); + + // put (k1, v1); + let ce = CommittedEntries { + entry_and_proposals: vec![( + new_put_entry( + region.id, + region.get_region_epoch().clone(), + b"k1", + b"v1", + 5, + 6, + ), + vec![], + )], + }; + block_on(async { apply.apply_committed_entries(ce).await }); + apply.flush(); + + // must read (k1, v1) from tablet. + let v1 = tablet.get_value_cf(CF_DEFAULT, b"zk1").unwrap().unwrap(); + assert_eq!(v1, b"v1"); + + // delete range + let ce = CommittedEntries { + entry_and_proposals: vec![( + new_delete_range_entry( + region.id, + region.get_region_epoch().clone(), + 5, + 7, + CF_DEFAULT, + region.get_start_key(), + region.get_end_key(), + false, // notify_only + ), + vec![], + )], + }; + block_on(async { apply.apply_committed_entries(ce).await }); + + // must get none for k1. + let res = tablet.get_value_cf(CF_DEFAULT, b"zk1").unwrap(); + assert!(res.is_none(), "{:?}", res); + } +} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 5da7ec3c242..4e6eacb8f28 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -31,12 +31,21 @@ pub(crate) use self::{ #[cfg(test)] pub mod test_util { - use std::sync::Arc; + use std::sync::{ + mpsc::{channel, Receiver, Sender}, + Arc, + }; - use kvproto::kvrpcpb::ApiVersion; + use engine_traits::{CfName, CF_DEFAULT}; + use kvproto::{kvrpcpb::ApiVersion, metapb::RegionEpoch, raft_cmdpb::RaftRequestHeader}; + use raft::prelude::{Entry, EntryType}; + use raftstore::store::simple_write::SimpleWriteEncoder; use sst_importer::SstImporter; use tempfile::TempDir; + use super::{CatchUpLogs, SimpleWriteReqEncoder}; + use crate::{fsm::ApplyResReporter, router::ApplyRes}; + pub fn create_tmp_importer() -> (TempDir, Arc) { let dir = TempDir::new().unwrap(); let importer = Arc::new( @@ -44,4 +53,71 @@ pub mod test_util { ); (dir, importer) } + + pub struct MockReporter { + sender: Sender, + } + + impl MockReporter { + pub fn new() -> (Self, Receiver) { + let (tx, rx) = channel(); + (MockReporter { sender: tx }, rx) + } + } + + impl ApplyResReporter for MockReporter { + fn report(&self, apply_res: ApplyRes) { + let _ = self.sender.send(apply_res); + } + + fn redirect_catch_up_logs(&self, _c: CatchUpLogs) {} + } + + pub fn new_put_entry( + region_id: u64, + region_epoch: RegionEpoch, + k: &[u8], + v: &[u8], + term: u64, + index: u64, + ) -> Entry { + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.put(CF_DEFAULT, k, v); + let mut header = Box::::default(); + header.set_region_id(region_id); + header.set_region_epoch(region_epoch); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let (bin, _) = req_encoder.encode(); + let mut e = Entry::default(); + e.set_entry_type(EntryType::EntryNormal); + e.set_term(term); + e.set_index(index); + e.set_data(bin.into()); + e + } + + pub fn new_delete_range_entry( + region_id: u64, + region_epoch: RegionEpoch, + term: u64, + index: u64, + cf: CfName, + start_key: &[u8], + end_key: &[u8], + notify_only: bool, + ) -> Entry { + let mut encoder = SimpleWriteEncoder::with_capacity(512); + encoder.delete_range(cf, start_key, end_key, notify_only); + let mut header = Box::::default(); + header.set_region_id(region_id); + header.set_region_epoch(region_epoch); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let (bin, _) = req_encoder.encode(); + let mut e = Entry::default(); + e.set_entry_type(EntryType::EntryNormal); + e.set_term(term); + e.set_index(index); + e.set_data(bin.into()); + e + } } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 9debb8e0364..8378b320665 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -186,14 +186,10 @@ mod test { }; use futures::executor::block_on; use kvproto::{ - metapb::{Region, RegionEpoch}, - raft_cmdpb::RaftRequestHeader, + metapb::Region, raft_serverpb::{PeerState, RegionLocalState}, }; - use raft::{ - prelude::{Entry, EntryType}, - StateRole, - }; + use raft::StateRole; use raftstore::{ coprocessor::{BoxCmdObserver, CmdObserver, CoprocessorHost}, store::Config, @@ -204,34 +200,14 @@ mod test { use super::*; use crate::{ - fsm::ApplyResReporter, operation::{ - test_util::create_tmp_importer, CatchUpLogs, CommittedEntries, SimpleWriteReqEncoder, + test_util::{create_tmp_importer, new_put_entry, MockReporter}, + CommittedEntries, }, raft::Apply, - router::{build_any_channel, ApplyRes}, - SimpleWriteEncoder, + router::build_any_channel, }; - struct MockReporter { - sender: Sender, - } - - impl MockReporter { - fn new() -> (Self, Receiver) { - let (tx, rx) = channel(); - (MockReporter { sender: tx }, rx) - } - } - - impl ApplyResReporter for MockReporter { - fn report(&self, apply_res: ApplyRes) { - let _ = self.sender.send(apply_res); - } - - fn redirect_catch_up_logs(&self, _c: CatchUpLogs) {} - } - #[derive(Clone)] struct TestObserver { sender: Sender>, @@ -258,29 +234,6 @@ mod test { fn on_applied_current_term(&self, _: StateRole, _: &Region) {} } - fn new_put_entry( - region_id: u64, - region_epoch: RegionEpoch, - k: &[u8], - v: &[u8], - term: u64, - index: u64, - ) -> Entry { - let mut encoder = SimpleWriteEncoder::with_capacity(512); - encoder.put(CF_DEFAULT, k, v); - let mut header = Box::::default(); - header.set_region_id(region_id); - header.set_region_epoch(region_epoch); - let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); - let (bin, _) = req_encoder.encode(); - let mut e = Entry::default(); - e.set_entry_type(EntryType::EntryNormal); - e.set_term(term); - e.set_index(index); - e.set_data(bin.into()); - e - } - #[test] fn test_capture_apply() { let store_id = 2; @@ -317,7 +270,8 @@ mod test { host.registry .register_cmd_observer(0, BoxCmdObserver::new(ob)); - let (dummy_scheduler, _) = dummy_scheduler(); + let (dummy_scheduler1, _) = dummy_scheduler(); + let (dummy_scheduler2, _) = dummy_scheduler(); let mut apply = Apply::new( &Config::default(), region @@ -337,7 +291,8 @@ mod test { None, importer, host, - dummy_scheduler, + dummy_scheduler1, + dummy_scheduler2, logger.clone(), ); diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index f2945f7469b..b0d84137cfd 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -22,6 +22,7 @@ use crate::{ operation::{AdminCmdResult, ApplyFlowControl, DataTrace}, router::CmdResChannel, worker::checkpoint, + TabletTask, }; pub(crate) struct Observe { @@ -74,6 +75,7 @@ pub struct Apply { coprocessor_host: CoprocessorHost, checkpoint_scheduler: Scheduler>, + tablet_scheduler: Scheduler>, // Whether to use the delete range API instead of deleting one by one. use_delete_range: bool, @@ -100,6 +102,7 @@ impl Apply { sst_importer: Arc, coprocessor_host: CoprocessorHost, checkpoint_scheduler: Scheduler>, + tablet_scheduler: Scheduler>, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry @@ -134,6 +137,7 @@ impl Apply { buckets, sst_importer, checkpoint_scheduler, + tablet_scheduler, use_delete_range: cfg.use_delete_range, observe: Observe { info: CmdObserveInfo::default(), @@ -337,6 +341,11 @@ impl Apply { &self.checkpoint_scheduler } + #[inline] + pub fn tablet_scheduler(&self) -> &Scheduler> { + &self.tablet_scheduler + } + #[inline] pub fn use_delete_range(&self) -> bool { self.use_delete_range diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 33272ae28bd..6434331019d 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -507,7 +507,8 @@ mod tests { let (_tmp_dir, importer) = create_tmp_importer(); let host = CoprocessorHost::::default(); - let (dummy_scheduler, _) = dummy_scheduler(); + let (dummy_scheduler1, _) = dummy_scheduler(); + let (dummy_scheduler2, _) = dummy_scheduler(); // setup peer applyer let mut apply = Apply::new( &Config::default(), @@ -523,7 +524,8 @@ mod tests { None, importer, host, - dummy_scheduler, + dummy_scheduler1, + dummy_scheduler2, logger, ); diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index f786315a008..8bf130397f7 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -8,13 +8,16 @@ use std::{ }; use collections::HashMap; -use engine_traits::{DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, DATA_CFS}; +use engine_traits::{ + CfName, DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, DATA_CFS, +}; use fail::fail_point; use kvproto::{import_sstpb::SstMeta, metapb::Region}; use raftstore::store::{TabletSnapKey, TabletSnapManager}; use slog::{debug, error, info, warn, Logger}; use sst_importer::SstImporter; use tikv_util::{ + slog_panic, time::Instant, worker::{Runnable, RunnableWithTimer}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, @@ -55,7 +58,15 @@ pub enum Task { region_id: u64, cb: Option>, }, - + DeleteRange { + region_id: u64, + tablet: EK, + cf: CfName, + start_key: Box<[u8]>, + end_key: Box<[u8]>, + use_delete_range: bool, + cb: Box, + }, // Gc snapshot SnapGc(Box<[TabletSnapKey]>), } @@ -105,6 +116,23 @@ impl Display for Task { on_flush_finish.is_some() ) } + Task::DeleteRange { + region_id, + cf, + start_key, + end_key, + .. + } => { + write!( + f, + "delete range cf {} [{}, {}) for region_id {}", + cf, + log_wrappers::Value::key(start_key), + log_wrappers::Value::key(end_key), + region_id, + ) + } + Task::SnapGc(snap_keys) => { write!(f, "gc snapshot {:?}", snap_keys) } @@ -179,6 +207,26 @@ impl Task { tablet: Either::Right(path), } } + + pub fn delete_range( + region_id: u64, + tablet: EK, + cf: CfName, + start_key: Box<[u8]>, + end_key: Box<[u8]>, + use_delete_range: bool, + cb: Box, + ) -> Self { + Task::DeleteRange { + region_id, + tablet, + cf, + start_key, + end_key, + use_delete_range, + cb, + } + } } pub struct Runner { @@ -415,6 +463,46 @@ impl Runner { tablet.flush_cfs(DATA_CFS, false).unwrap(); } } + + fn delete_range(&self, delete_range: Task) { + let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, use_delete_range, cb } = delete_range else { + slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) + }; + + let range = vec![Range::new(&start_key, &end_key)]; + let fail_f = |e: engine_traits::Error, strategy: DeleteStrategy| { + slog_panic!( + self.logger, + "failed to delete"; + "region_id" => region_id, + "strategy" => ?strategy, + "range_start" => log_wrappers::Value::key(&start_key), + "range_end" => log_wrappers::Value::key(&end_key), + "error" => ?e, + ) + }; + tablet + .delete_ranges_cf(cf, DeleteStrategy::DeleteFiles, &range) + .unwrap_or_else(|e| fail_f(e, DeleteStrategy::DeleteFiles)); + + let strategy = if use_delete_range { + DeleteStrategy::DeleteByRange + } else { + DeleteStrategy::DeleteByKey + }; + // Delete all remaining keys. + tablet + .delete_ranges_cf(cf, strategy.clone(), &range) + .unwrap_or_else(move |e| fail_f(e, strategy)); + + // TODO: support titan? + // tablet + // .delete_ranges_cf(cf, DeleteStrategy::DeleteBlobs, &range) + // .unwrap_or_else(move |e| fail_f(e, + // DeleteStrategy::DeleteBlobs)); + + cb(); + } } impl Runnable for Runner @@ -444,6 +532,7 @@ where Task::DirectDestroy { tablet, .. } => self.direct_destroy(tablet), Task::CleanupImportSst(ssts) => self.cleanup_ssts(ssts), Task::Flush { region_id, cb } => self.flush_tablet(region_id, cb), + delete_range @ Task::DeleteRange { .. } => self.delete_range(delete_range), Task::SnapGc(keys) => self.snap_gc(keys), } } From 92a77ac3547f4ab9ffd8d51f150797704e27a320 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 22 Jun 2023 01:26:41 +0800 Subject: [PATCH 0754/1149] *: update raft-engine (#14987) ref tikv/raft-engine#315, ref tikv/tikv#14743, close tikv/tikv#14743 Update raft-engine to fix data loss issue #14743 Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi --- Cargo.lock | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index af215639f3a..6aa9974a757 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1040,7 +1040,7 @@ version = "3.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da95d038ede1a964ce99f49cbe27a7fb538d1da595e4b4f70b8c8f338d17bf16" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", @@ -2552,9 +2552,9 @@ dependencies = [ [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -4347,7 +4347,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d8b442418ea0822409d9e7d047cbf1e7e9e1760b172bf9982cf29d517c93511" dependencies = [ "bytes", - "heck 0.4.0", + "heck 0.4.1", "itertools", "lazy_static", "log", @@ -4485,7 +4485,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#c9a95c82443f2f5e13ed5eac705f8c4ee5bba56c" +source = "git+https://github.com/tikv/raft-engine.git#de3ad04a2db9cdf795b1c82d7413b9b53bac92a8" dependencies = [ "byteorder", "crc32fast", @@ -4512,14 +4512,14 @@ dependencies = [ "scopeguard", "serde", "serde_repr", - "strum 0.24.1", + "strum 0.25.0", "thiserror", ] [[package]] name = "raft-engine-ctl" version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#c9a95c82443f2f5e13ed5eac705f8c4ee5bba56c" +source = "git+https://github.com/tikv/raft-engine.git#de3ad04a2db9cdf795b1c82d7413b9b53bac92a8" dependencies = [ "clap 3.1.6", "env_logger 0.10.0", @@ -5920,11 +5920,11 @@ dependencies = [ [[package]] name = "strum" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.24.2", + "strum_macros 0.25.0", ] [[package]] @@ -5941,15 +5941,15 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.24.2" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4faebde00e8ff94316c01800f9054fd2ba77d30d9e922541913051d1d978918b" +checksum = "fe9f3bd7d2e45dcc5e265fbb88d6513e4747d8ef9444cf01a533119bce28a157" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn 1.0.103", + "syn 2.0.18", ] [[package]] From 60c9d5ffa791699e4d1762905c52ae3e06b8b68f Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 26 Jun 2023 11:11:35 +0800 Subject: [PATCH 0755/1149] raftstore-v2: supply SlowTrend for v2 to detect network or disk ios' jitters (#14682) close tikv/tikv#14750 Supply `SlowTrend` in raftstore-v2 for detecting network or disk ios' jitters. Signed-off-by: tonyxuqqi Signed-off-by: lucasliang Co-authored-by: tonyxuqqi --- Cargo.lock | 1 + components/raftstore-v2/src/batch/store.rs | 29 +++- components/raftstore-v2/src/fsm/store.rs | 8 + components/raftstore-v2/src/operation/life.rs | 18 ++ .../raftstore-v2/src/operation/ready/mod.rs | 17 +- components/raftstore-v2/src/router/message.rs | 7 +- components/raftstore-v2/src/worker/pd/mod.rs | 119 +++++++++++++- .../raftstore-v2/src/worker/pd/slowness.rs | 154 ++++++++++++++++++ .../raftstore-v2/src/worker/pd/store.rs | 96 ++++++++++- .../raftstore/src/store/local_metrics.rs | 36 ++++ components/raftstore/src/store/metrics.rs | 4 +- components/raftstore/src/store/util.rs | 18 +- components/raftstore/src/store/worker/pd.rs | 37 ++++- tests/Cargo.toml | 1 + .../raftstore/test_status_command.rs | 71 +++++--- 15 files changed, 569 insertions(+), 47 deletions(-) create mode 100644 components/raftstore-v2/src/worker/pd/slowness.rs diff --git a/Cargo.lock b/Cargo.lock index 6aa9974a757..898ad6a60d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6413,6 +6413,7 @@ dependencies = [ "raft", "raft_log_engine", "raftstore", + "raftstore-v2", "rand 0.8.5", "rand_xorshift", "resource_control", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 137862d742a..2c5a2de41b2 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -32,8 +32,10 @@ use raftstore::{ GlobalStoreStat, LocalStoreStat, }, local_metrics::RaftMetrics, + util::LatencyInspector, AutoSplitController, Config, ReadRunner, ReadTask, RefreshConfigTask, SplitCheckRunner, - SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteSenders, + SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteRouterContext, + WriteSenders, }, }; use resource_metering::CollectorRegHandle; @@ -101,6 +103,9 @@ pub struct StoreContext { pub store_stat: LocalStoreStat, pub sst_importer: Arc, pub key_manager: Option>, + + /// Inspector for latency inspecting + pub pending_latency_inspect: Vec, } impl StoreContext { @@ -273,6 +278,27 @@ impl PollHandler>>]) { let dur = self.timer.saturating_elapsed(); + + let mut latency_inspect = std::mem::take(&mut self.poll_ctx.pending_latency_inspect); + for inspector in &mut latency_inspect { + inspector.record_store_process(dur); + } + // Use the valid size of async-ios for generating `writer_id` when the local + // senders haven't been updated by `poller.begin(). + let writer_id = rand::random::() + % std::cmp::min( + self.poll_ctx.cfg.store_io_pool_size, + self.poll_ctx.write_senders().size(), + ); + if let Err(err) = self.poll_ctx.write_senders()[writer_id].try_send( + raftstore::store::WriteMsg::LatencyInspect { + send_time: TiInstant::now(), + inspector: latency_inspect, + }, + None, + ) { + warn!(self.poll_ctx.logger, "send latency inspecting to write workers failed"; "err" => ?err); + } self.poll_ctx .raft_metrics .process_ready @@ -499,6 +525,7 @@ where store_stat: self.global_stat.local(), sst_importer: self.sst_importer.clone(), key_manager: self.key_manager.clone(), + pending_latency_inspect: vec![], }; poll_ctx.update_ticks_timeout(); let cfg_tracker = self.cfg.clone().tracker("raftstore".to_string()); diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 6498d74b061..0ee3c59812e 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -310,6 +310,14 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { StoreMsg::WaitFlush { region_id, ch } => { self.fsm.store.on_wait_flush(self.store_ctx, region_id, ch) } + StoreMsg::LatencyInspect { + send_time, + inspector, + } => self.fsm.store.on_update_latency_inspectors( + self.store_ctx, + send_time, + inspector, + ), } } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 20756df8bac..e2ad63ed62c 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -476,6 +476,24 @@ impl Store { let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg)); } } + + pub fn on_update_latency_inspectors( + &self, + ctx: &mut StoreContext, + start_ts: Instant, + mut inspector: util::LatencyInspector, + ) where + EK: KvEngine, + ER: RaftEngine, + T: Transport, + { + // Record the last statistics of commit-log-duration and store-write-duration. + inspector.record_store_wait(start_ts.saturating_elapsed()); + inspector.record_store_commit(ctx.raft_metrics.stat_commit_log.avg()); + // Reset the stat_commit_log and wait it to be refreshed in the next tick. + ctx.raft_metrics.stat_commit_log.reset(); + ctx.pending_latency_inspect.push(inspector); + } } impl Peer { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 7953d5ea148..1ceb4d9d154 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -54,7 +54,7 @@ use tikv_util::{ log::SlogFormat, slog_panic, store::find_peer, - time::{duration_to_sec, monotonic_raw_now}, + time::{duration_to_sec, monotonic_raw_now, Duration}, }; pub use self::{ @@ -911,6 +911,7 @@ impl Peer { return; } let now = Instant::now(); + let stat_raft_commit_log = &mut ctx.raft_metrics.stat_commit_log; for i in old_index + 1..=new_index { if let Some((term, trackers)) = self.proposals().find_trackers(i) { if self.entry_storage().term(i).map_or(false, |t| t == term) { @@ -921,10 +922,16 @@ impl Peer { &ctx.raft_metrics.wf_commit_not_persist_log }; for tracker in trackers { - tracker.observe(now, hist, |t| { - t.metrics.commit_not_persisted = !commit_persisted; - &mut t.metrics.wf_commit_log_nanos - }); + // Collect the metrics related to commit_log + // durations. + stat_raft_commit_log.record(Duration::from_nanos(tracker.observe( + now, + hist, + |t| { + t.metrics.commit_not_persisted = !commit_persisted; + &mut t.metrics.wf_commit_log_nanos + }, + ))); } } } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 65026af9a1d..844246edbdd 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -12,7 +12,7 @@ use kvproto::{ }; use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, - FetchedLogs, GenSnapRes, TabletSnapKey, + util::LatencyInspector, FetchedLogs, GenSnapRes, TabletSnapKey, }; use resource_control::ResourceMetered; use tikv_util::time::Instant; @@ -338,6 +338,11 @@ pub enum StoreMsg { region_id: u64, ch: super::FlushChannel, }, + /// Inspect the latency of raftstore. + LatencyInspect { + send_time: Instant, + inspector: LatencyInspector, + }, } impl ResourceMetered for StoreMsg {} diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index e06d161fe08..94222d67d2e 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -12,15 +12,17 @@ use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{metapb, pdpb}; use pd_client::{BucketStat, PdClient}; use raftstore::store::{ - util::KeysInfoFormatter, AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, - ReadStats, RegionReadProgressRegistry, SplitInfo, StoreStatsReporter, TabletSnapManager, - TxnExt, WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + metrics::STORE_INSPECT_DURATION_HISTOGRAM, + util::{KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, + AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, + RegionReadProgressRegistry, SplitInfo, StoreStatsReporter, TabletSnapManager, TxnExt, + WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }; use resource_metering::{Collector, CollectorRegHandle, RawRecords}; -use slog::{error, Logger}; +use slog::{error, warn, Logger}; use tikv_util::{ config::VersionTrack, - time::UnixSecs, + time::{Instant as TiInstant, UnixSecs}, worker::{Runnable, Scheduler}, }; use yatp::{task::future::TaskCell, Remote}; @@ -32,6 +34,7 @@ use crate::{ mod misc; mod region; +mod slowness; mod split; mod store; @@ -84,6 +87,16 @@ pub enum Task { store_id: u64, min_resolved_ts: u64, }, + // In slowness.rs + InspectLatency { + send_time: TiInstant, + inspector: LatencyInspector, + }, + TickSlownessStats, + UpdateSlownessStats { + tick_id: u64, + duration: RaftstoreDuration, + }, } impl Display for Task { @@ -148,6 +161,23 @@ impl Display for Task { "report min resolved ts: store {}, resolved ts {}", store_id, min_resolved_ts, ), + Task::InspectLatency { + send_time, + ref inspector, + } => write!( + f, + "inspect latency: send_time {:?}, inspector {:?}", + send_time, inspector + ), + Task::TickSlownessStats => write!(f, "tick slowness statistics"), + Task::UpdateSlownessStats { + tick_id, + ref duration, + } => write!( + f, + "update slowness statistics: tick_id {}, duration {:?}", + tick_id, duration + ), } } } @@ -171,6 +201,7 @@ where // For store. start_ts: UnixSecs, store_stat: store::StoreStat, + store_heartbeat_interval: std::time::Duration, // For region. region_peers: HashMap, @@ -183,6 +214,9 @@ where concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, + // For slowness detection + slowness_stats: slowness::SlownessStatistics, + logger: Logger, shutdown: Arc, cfg: Arc>, @@ -212,9 +246,11 @@ where shutdown: Arc, cfg: Arc>, ) -> Result { + let store_heartbeat_interval = cfg.value().pd_store_heartbeat_tick_interval.0; let mut stats_monitor = PdStatsMonitor::new( - cfg.value().pd_store_heartbeat_tick_interval.0 / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, cfg.value().report_min_resolved_ts_interval.0, + cfg.value().inspect_interval.0, PdReporter::new(pd_scheduler, logger.clone()), ); stats_monitor.start( @@ -223,6 +259,7 @@ where collector_reg_handle, store_id, )?; + let slowness_stats = slowness::SlownessStatistics::new(&cfg.value()); Ok(Self { store_id, pd_client, @@ -231,6 +268,7 @@ where snap_mgr, router, stats_monitor, + store_heartbeat_interval, remote, start_ts: UnixSecs::zero(), store_stat: store::StoreStat::default(), @@ -240,6 +278,7 @@ where is_hb_receiver_scheduled: false, concurrency_manager, causal_ts_provider, + slowness_stats, logger, shutdown, cfg, @@ -289,6 +328,14 @@ where store_id, min_resolved_ts, } => self.handle_report_min_resolved_ts(store_id, min_resolved_ts), + Task::InspectLatency { + send_time, + inspector, + } => self.handle_inspect_latency(send_time, inspector), + Task::TickSlownessStats => self.handle_slowness_stats_tick(), + Task::UpdateSlownessStats { tick_id, duration } => { + self.handle_update_slowness_stats(tick_id, duration) + } } } } @@ -372,6 +419,66 @@ impl StoreStatsReporter for PdReporter { ); } } + + fn update_latency_stats(&self, timer_tick: u64) { + // Tick slowness statistics. + { + if let Err(e) = self.scheduler.schedule(Task::TickSlownessStats) { + error!( + self.logger, + "failed to send tick slowness statistics to pd worker"; + "err" => ?e, + ); + } + } + // Tick a new latency inspector. + { + let scheduler = self.scheduler.clone(); + let logger = self.logger.clone(); + let tick_id = timer_tick; + + let inspector = LatencyInspector::new( + tick_id, + Box::new(move |tick_id, duration| { + let dur = duration.sum(); + + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_process"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_process_duration.unwrap(), + )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_wait"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_wait_duration.unwrap(), + )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_commit"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_commit_duration.unwrap(), + )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["all"]) + .observe(tikv_util::time::duration_to_sec(dur)); + if let Err(e) = + scheduler.schedule(Task::UpdateSlownessStats { tick_id, duration }) + { + warn!(logger, "schedule pd UpdateSlownessStats task failed"; "err" => ?e); + } + }), + ); + if let Err(e) = self.scheduler.schedule(Task::InspectLatency { + send_time: TiInstant::now(), + inspector, + }) { + error!( + self.logger, + "failed to send inspect latency to pd worker"; + "err" => ?e, + ); + } + } + } } mod requests { diff --git a/components/raftstore-v2/src/worker/pd/slowness.rs b/components/raftstore-v2/src/worker/pd/slowness.rs new file mode 100644 index 00000000000..dd345cbfed3 --- /dev/null +++ b/components/raftstore-v2/src/worker/pd/slowness.rs @@ -0,0 +1,154 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::time::{Duration, Instant}; + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::pdpb; +use pd_client::PdClient; +use raftstore::store::{metrics::*, util::RaftstoreDuration, Config}; +use tikv_util::trend::{RequestPerSecRecorder, Trend}; + +use super::Runner; +pub struct SlownessStatistics { + /// Detector to detect NetIo&DiskIo jitters. + slow_cause: Trend, + /// Reactor as an assistant detector to detect the QPS jitters. + slow_result: Trend, + slow_result_recorder: RequestPerSecRecorder, + last_tick_finished: bool, +} + +impl SlownessStatistics { + #[inline] + pub fn new(cfg: &Config) -> Self { + Self { + slow_cause: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(180), + Duration::from_secs(30), + Duration::from_secs(120), + Duration::from_secs(600), + 1, + tikv_util::time::duration_to_us(Duration::from_micros(500)), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_cause, + ), + slow_result: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(120), + Duration::from_secs(15), + Duration::from_secs(60), + Duration::from_secs(300), + 1, + 2000, + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L1"]), + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_result, + ), + slow_result_recorder: RequestPerSecRecorder::new(), + last_tick_finished: true, + } + } +} + +impl Runner +where + EK: KvEngine, + ER: RaftEngine, + T: PdClient + 'static, +{ + /// Record slowness periodically. + pub fn handle_update_slowness_stats(&mut self, _tick: u64, duration: RaftstoreDuration) { + self.slowness_stats.last_tick_finished = true; + // TODO: It's more appropriate to divide the factor into `Disk IO factor` and + // `Net IO factor`. + // Currently, to make the detection and judgement of Slowness of V2 compactible + // to V1, it summarizes all factors by `sum` simplily, approved valid to common + // cases when there exists IO jitters on Network or Disk. + self.slowness_stats.slow_cause.record( + tikv_util::time::duration_to_us(duration.sum()), + Instant::now(), + ); + } + + pub fn handle_slowness_stats_tick(&mut self) { + // The following code records a periodic "white noise", which helps + // mitigate any minor fluctuations in disk I/O or network I/O latency. + // After conducting extensive e2e testing, "100ms" has been determined + // to be the most suitable choice for it. + self.slowness_stats + .slow_cause + .record(100_000, Instant::now()); // 100ms + // Handle timeout if last tick is not finished as expected. + if !self.slowness_stats.last_tick_finished && self.is_store_heartbeat_delayed() { + // If the last slowness tick already reached abnormal state and was delayed for + // reporting by `store-heartbeat` to PD, we should report it here manually as a + // FAKE `store-heartbeat`. It's an assurance that the heartbeat to + // PD is not lost. Normally, this case rarely happens in + // raftstore-v2. + self.handle_fake_store_heartbeat(); + } + // Move to next tick. + self.slowness_stats.last_tick_finished = false; + } + + pub fn update_slowness_in_store_stats(&mut self, stats: &mut pdpb::StoreStats, query_num: u64) { + let mut slow_trend = pdpb::SlowTrend::default(); + // TODO: update the parameters of SlowTrend to make it can detect slowness + // in corner cases. + slow_trend.set_cause_rate(self.slowness_stats.slow_cause.increasing_rate()); + slow_trend.set_cause_value(self.slowness_stats.slow_cause.l0_avg()); + let total_query_num = self + .slowness_stats + .slow_result_recorder + .record_and_get_current_rps(query_num, Instant::now()); + if let Some(total_query_num) = total_query_num { + self.slowness_stats + .slow_result + .record(total_query_num as u64, Instant::now()); + slow_trend.set_result_value(self.slowness_stats.slow_result.l0_avg()); + let slow_trend_result_rate = self.slowness_stats.slow_result.increasing_rate(); + slow_trend.set_result_rate(slow_trend_result_rate); + STORE_SLOW_TREND_RESULT_GAUGE.set(slow_trend_result_rate); + STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(total_query_num); + } else { + // Just to mark the invalid range on the graphic + STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(-100.0); + } + stats.set_slow_trend(slow_trend); + self.flush_slowness_metrics(); + } + + fn flush_slowness_metrics(&mut self) { + // Report slowness of Trend. + STORE_SLOW_TREND_GAUGE.set(self.slowness_stats.slow_cause.increasing_rate()); + STORE_SLOW_TREND_L0_GAUGE.set(self.slowness_stats.slow_cause.l0_avg()); + STORE_SLOW_TREND_L1_GAUGE.set(self.slowness_stats.slow_cause.l1_avg()); + STORE_SLOW_TREND_L2_GAUGE.set(self.slowness_stats.slow_cause.l2_avg()); + STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slowness_stats.slow_cause.l0_l1_rate()); + STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slowness_stats.slow_cause.l1_l2_rate()); + STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE + .set(self.slowness_stats.slow_cause.l1_margin_error_base()); + STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE + .set(self.slowness_stats.slow_cause.l2_margin_error_base()); + // Report result of Trend. + STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slowness_stats.slow_result.l0_avg()); + STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slowness_stats.slow_result.l1_avg()); + STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slowness_stats.slow_result.l2_avg()); + STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slowness_stats.slow_result.l0_l1_rate()); + STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slowness_stats.slow_result.l1_l2_rate()); + STORE_SLOW_TREND_RESULT_L1_MARGIN_ERROR_GAUGE + .set(self.slowness_stats.slow_result.l1_margin_error_base()); + STORE_SLOW_TREND_RESULT_L2_MARGIN_ERROR_GAUGE + .set(self.slowness_stats.slow_result.l2_margin_error_base()); + } +} diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 8f30b85d6f3..64ac9ac9e8d 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -14,13 +14,23 @@ use pd_client::{ PdClient, }; use prometheus::local::LocalHistogram; -use slog::{error, warn}; -use tikv_util::{metrics::RecordPairVec, store::QueryStats, time::UnixSecs, topn::TopN}; +use raftstore::store::{metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, util::LatencyInspector}; +use slog::{error, info, warn}; +use tikv_util::{ + metrics::RecordPairVec, + store::QueryStats, + time::{Duration, Instant as TiInstant, UnixSecs}, + topn::TopN, +}; use super::Runner; +use crate::router::StoreMsg; const HOTSPOT_REPORT_CAPACITY: usize = 1000; +/// Max limitation of delayed store_heartbeat. +const STORE_HEARTBEAT_DELAY_LIMIT: u64 = Duration::from_secs(5 * 60).as_secs(); + fn hotspot_key_report_threshold() -> u64 { const HOTSPOT_KEY_RATE_THRESHOLD: u64 = 128; fail_point!("mock_hotspot_threshold", |_| { 0 }); @@ -215,6 +225,7 @@ where .store_stat .engine_total_query_num .sub_query_stats(&self.store_stat.engine_last_query_num); + let last_query_sum = res.get_all_query_num(); stats.set_query_stats(res.0); stats.set_cpu_usages(self.store_stat.store_cpu_usages.clone().into()); @@ -229,7 +240,15 @@ where self.store_stat .engine_last_query_num .fill_query_stats(&self.store_stat.engine_total_query_num); - self.store_stat.last_report_ts = UnixSecs::now(); + self.store_stat.last_report_ts = + if self.store_stat.last_report_ts.into_inner() as u32 == stats.get_start_time() { + // The given Task::StoreHeartbeat should be a fake heartbeat to PD, we won't + // update the last_report_ts to avoid incorrectly marking current TiKV node in + // normal state. + self.store_stat.last_report_ts + } else { + UnixSecs::now() + }; self.store_stat.region_bytes_written.flush(); self.store_stat.region_keys_written.flush(); self.store_stat.region_bytes_read.flush(); @@ -245,18 +264,83 @@ where .with_label_values(&["used"]) .set(used_size as i64); - // TODO: slow score + // Update slowness statistics + self.update_slowness_in_store_stats(&mut stats, last_query_sum); let resp = self.pd_client.store_heartbeat(stats, None, None); let logger = self.logger.clone(); let f = async move { - if let Err(e) = resp.await { - error!(logger, "store heartbeat failed"; "err" => ?e); + match resp.await { + Ok(mut resp) => { + // TODO: unsafe recovery + + // Attention, as Hibernate Region is eliminated in + // raftstore-v2, followings just mock the awaken + // operation. + if resp.awaken_regions.take().is_some() { + info!( + logger, + "Ignored AwakenRegions in raftstore-v2 as no hibernated regions in raftstore-v2" + ); + } + } + Err(e) => { + error!(logger, "store heartbeat failed"; "err" => ?e); + } } }; self.remote.spawn(f); } + /// Force to send a special heartbeat to pd when current store is hung on + /// some special circumstances, i.e. disk busy, handler busy and others. + pub fn handle_fake_store_heartbeat(&mut self) { + let mut stats = pdpb::StoreStats::default(); + stats.set_store_id(self.store_id); + stats.set_region_count(self.region_peers.len() as u32); + + let snap_stats = self.snap_mgr.stats(); + stats.set_sending_snap_count(snap_stats.sending_count as u32); + stats.set_receiving_snap_count(snap_stats.receiving_count as u32); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC + .with_label_values(&["sending"]) + .set(snap_stats.sending_count as i64); + STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC + .with_label_values(&["receiving"]) + .set(snap_stats.receiving_count as i64); + + // This calling means that the current node cannot report heartbeat in normaly + // scheduler. That is, the current node must in `busy` state. Meanwhile, mark + // this fake `StoreStats.start_time` == `store_stat.last_report_ts` to reveal + // that current heartbeat is fake and used for reporting slowness forcely. + stats.set_start_time(self.store_stat.last_report_ts.into_inner() as u32); + stats.set_is_busy(true); + + // We do not need to report store_info, so we just set `None` here. + self.handle_store_heartbeat(stats); + warn!(self.logger, "scheduling store_heartbeat timeout, force report store slow score to pd."; + "store_id" => self.store_id, + ); + } + + pub fn is_store_heartbeat_delayed(&self) -> bool { + let now = UnixSecs::now(); + let interval_second = now.into_inner() - self.store_stat.last_report_ts.into_inner(); + (interval_second >= self.store_heartbeat_interval.as_secs()) + && (interval_second <= STORE_HEARTBEAT_DELAY_LIMIT) + } + + pub fn handle_inspect_latency(&self, send_time: TiInstant, inspector: LatencyInspector) { + let msg = StoreMsg::LatencyInspect { + send_time, + inspector, + }; + if let Err(e) = self.router.send_control(msg) { + warn!(self.logger, "pd worker send latency inspecter failed"; + "err" => ?e); + } + } + pub fn handle_update_store_infos( &mut self, cpu_usages: RecordPairVec, diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index baf63814416..5460a57ae0f 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -68,6 +68,38 @@ impl RaftSendMessageMetrics { } } +#[derive(Default)] +pub struct RaftCommitLogStatistics { + pub last_commit_log_duration_sum: Duration, + pub last_commit_log_count_sum: u64, +} + +impl RaftCommitLogStatistics { + #[inline] + pub fn record(&mut self, dur: Duration) { + self.last_commit_log_count_sum += 1; + self.last_commit_log_duration_sum += dur; + } + + #[inline] + pub fn avg(&self) -> Duration { + if self.last_commit_log_count_sum > 0 { + Duration::from_micros( + self.last_commit_log_duration_sum.as_micros() as u64 + / self.last_commit_log_count_sum, + ) + } else { + Duration::default() + } + } + + #[inline] + pub fn reset(&mut self) { + self.last_commit_log_count_sum = 0; + self.last_commit_log_duration_sum = Duration::default(); + } +} + /// The buffered metrics counters for raft. pub struct RaftMetrics { // local counter @@ -97,6 +129,9 @@ pub struct RaftMetrics { pub wf_commit_log: LocalHistogram, pub wf_commit_not_persist_log: LocalHistogram, + // local statistics for slowness + pub stat_commit_log: RaftCommitLogStatistics, + pub leader_missing: Arc>>, last_flush_time: Instant, @@ -132,6 +167,7 @@ impl RaftMetrics { wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(), wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), + stat_commit_log: RaftCommitLogStatistics::default(), leader_missing: Arc::default(), last_flush_time: Instant::now_coarse(), } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 699d861ff9a..870ce74b0f9 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -781,7 +781,7 @@ lazy_static! { register_gauge!("tikv_raftstore_slow_score", "Slow score of the store.").unwrap(); pub static ref STORE_SLOW_TREND_GAUGE: Gauge = - register_gauge!("tikv_raftstore_slow_trend", "Slow trend changing rate").unwrap(); + register_gauge!("tikv_raftstore_slow_trend", "Slow trend changing rate.").unwrap(); pub static ref STORE_SLOW_TREND_L0_GAUGE: Gauge = register_gauge!("tikv_raftstore_slow_trend_l0", "Slow trend L0 window avg value.").unwrap(); @@ -811,7 +811,7 @@ lazy_static! { register_int_gauge_vec!( "tikv_raftstore_slow_trend_misc", "Slow trend uncatelogued gauge(s)", - &["type"] + &["window"] ).unwrap(); pub static ref STORE_SLOW_TREND_RESULT_VALUE_GAUGE: Gauge = diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 3a8ad8278fb..b2fa44b6d4c 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -5,7 +5,7 @@ use std::{ cmp, collections::{HashMap, VecDeque}, fmt, - fmt::Display, + fmt::{Debug, Display}, option::Option, sync::{ atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering}, @@ -1662,6 +1662,7 @@ pub struct RaftstoreDuration { pub store_wait_duration: Option, pub store_process_duration: Option, pub store_write_duration: Option, + pub store_commit_duration: Option, pub apply_wait_duration: Option, pub apply_process_duration: Option, } @@ -1671,6 +1672,7 @@ impl RaftstoreDuration { self.store_wait_duration.unwrap_or_default() + self.store_process_duration.unwrap_or_default() + self.store_write_duration.unwrap_or_default() + + self.store_commit_duration.unwrap_or_default() + self.apply_wait_duration.unwrap_or_default() + self.apply_process_duration.unwrap_or_default() } @@ -1683,6 +1685,16 @@ pub struct LatencyInspector { cb: Box, } +impl Debug for LatencyInspector { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + fmt, + "LatencyInspector: id {} duration: {:?}", + self.id, self.duration + ) + } +} + impl LatencyInspector { pub fn new(id: u64, cb: Box) -> Self { Self { @@ -1704,6 +1716,10 @@ impl LatencyInspector { self.duration.store_write_duration = Some(duration); } + pub fn record_store_commit(&mut self, duration: std::time::Duration) { + self.duration.store_commit_duration = Some(duration); + } + pub fn record_apply_wait(&mut self, duration: std::time::Duration) { self.duration.apply_wait_duration = Some(duration); } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 413389d8cf6..13cbdf2cfa5 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -486,6 +486,7 @@ pub trait StoreStatsReporter: Send + Clone + Sync + 'static + Collector { ); fn report_min_resolved_ts(&self, store_id: u64, min_resolved_ts: u64); fn auto_split(&self, split_infos: Vec); + fn update_latency_stats(&self, timer_tick: u64); } impl StoreStatsReporter for WrappedScheduler @@ -534,6 +535,11 @@ where ); } } + + fn update_latency_stats(&self, timer_tick: u64) { + debug!("update latency statistics not implemented for raftstore-v1"; + "tick" => timer_tick); + } } pub struct StatsMonitor @@ -549,13 +555,19 @@ where load_base_split_check_interval: Duration, collect_tick_interval: Duration, report_min_resolved_ts_interval: Duration, + inspect_latency_interval: Duration, } impl StatsMonitor where T: StoreStatsReporter, { - pub fn new(interval: Duration, report_min_resolved_ts_interval: Duration, reporter: T) -> Self { + pub fn new( + interval: Duration, + report_min_resolved_ts_interval: Duration, + inspect_latency_interval: Duration, + reporter: T, + ) -> Self { StatsMonitor { reporter, handle: None, @@ -568,7 +580,12 @@ where interval, ), report_min_resolved_ts_interval: config(report_min_resolved_ts_interval), - collect_tick_interval: cmp::min(default_collect_tick_interval(), interval), + // Use `inspect_latency_interval` as the minimal limitation for collecting tick. + collect_tick_interval: cmp::min( + inspect_latency_interval, + cmp::min(default_collect_tick_interval(), interval), + ), + inspect_latency_interval, } } @@ -581,7 +598,12 @@ where collector_reg_handle: CollectorRegHandle, store_id: u64, ) -> Result<(), io::Error> { - if self.collect_tick_interval < default_collect_tick_interval() { + if self.collect_tick_interval + < cmp::min( + self.inspect_latency_interval, + default_collect_tick_interval(), + ) + { info!( "interval is too small, skip stats monitoring. If we are running tests, it is normal, otherwise a check is needed." ); @@ -598,6 +620,9 @@ where let report_min_resolved_ts_interval = self .report_min_resolved_ts_interval .div_duration_f64(tick_interval) as u64; + let update_latency_stats_interval = self + .inspect_latency_interval + .div_duration_f64(tick_interval) as u64; let (timer_tx, timer_rx) = mpsc::channel(); self.timer = Some(timer_tx); @@ -659,6 +684,9 @@ where region_read_progress.get_min_resolved_ts(), ); } + if is_enable_tick(timer_cnt, update_latency_stats_interval) { + reporter.update_latency_stats(timer_cnt); + } timer_cnt += 1; } tikv_alloc::remove_thread_memory_accessor(); @@ -952,6 +980,7 @@ where let mut stats_monitor = StatsMonitor::new( interval, cfg.report_min_resolved_ts_interval.0, + cfg.inspect_interval.0, WrappedScheduler(scheduler.clone()), ); if let Err(e) = stats_monitor.start( @@ -1446,6 +1475,7 @@ where STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend_cause.l1_l2_rate()); STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l1_margin_error_base()); STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l2_margin_error_base()); + // Report results of all slow Trends. STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend_result.l0_avg()); STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend_result.l1_avg()); STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend_result.l2_avg()); @@ -2554,6 +2584,7 @@ mod tests { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), Duration::from_secs(0), + Duration::from_secs(interval), WrappedScheduler(scheduler), ); let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); diff --git a/tests/Cargo.toml b/tests/Cargo.toml index facaa2eeae9..2f74b4e2fe5 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -94,6 +94,7 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } raft_log_engine = { workspace = true } raftstore = { workspace = true } +raftstore-v2 = { workspace = true } rand = "0.8.3" resource_control = { workspace = true } slog = { workspace = true } diff --git a/tests/integrations/raftstore/test_status_command.rs b/tests/integrations/raftstore/test_status_command.rs index 9b88fbefc8c..8565d936d9f 100644 --- a/tests/integrations/raftstore/test_status_command.rs +++ b/tests/integrations/raftstore/test_status_command.rs @@ -1,13 +1,15 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use raftstore::store::{msg::StoreMsg, util::LatencyInspector}; -use test_raftstore::*; +use raftstore::store::{msg::StoreMsg as StoreMsgV1, util::LatencyInspector}; +use raftstore_v2::router::StoreMsg as StoreMsgV2; +use test_raftstore::Simulator as S1; +use test_raftstore_v2::Simulator as S2; use tikv_util::{time::Instant, HandyRwLock}; #[test] fn test_region_detail() { let count = 5; - let mut cluster = new_server_cluster(0, count); + let mut cluster = test_raftstore::new_server_cluster(0, count); cluster.run(); let leader = cluster.leader_of_region(1).unwrap(); @@ -28,29 +30,54 @@ fn test_region_detail() { #[test] fn test_latency_inspect() { - let mut cluster = new_node_cluster(0, 1); - cluster.cfg.raft_store.store_io_pool_size = 2; - cluster.run(); - let router = cluster.sim.wl().get_router(1).unwrap(); - let (tx, rx) = std::sync::mpsc::sync_channel(10); - let inspector = LatencyInspector::new( - 1, - Box::new(move |_, duration| { - let dur = duration.sum(); - tx.send(dur).unwrap(); - }), + let mut cluster_v1 = test_raftstore::new_node_cluster(0, 1); + cluster_v1.cfg.raft_store.store_io_pool_size = 2; + cluster_v1.run(); + let mut cluster_v2 = test_raftstore_v2::new_node_cluster(0, 1); + cluster_v2.run(); + let (router_v1, router_v2) = ( + cluster_v1.sim.wl().get_router(1).unwrap(), + cluster_v2.sim.wl().get_router(1).unwrap(), ); - let msg = StoreMsg::LatencyInspect { - send_time: Instant::now(), - inspector, - }; - router.send_control(msg).unwrap(); - rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + { + // Test send LatencyInspect to V1. + let (tx, rx) = std::sync::mpsc::sync_channel(10); + let inspector = LatencyInspector::new( + 1, + Box::new(move |_, duration| { + let dur = duration.sum(); + tx.send(dur).unwrap(); + }), + ); + let msg = StoreMsgV1::LatencyInspect { + send_time: Instant::now(), + inspector, + }; + router_v1.send_control(msg).unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + } + { + // Test send LatencyInspect to V2. + let (tx, rx) = std::sync::mpsc::sync_channel(10); + let inspector = LatencyInspector::new( + 1, + Box::new(move |_, duration| { + let dur = duration.sum(); + tx.send(dur).unwrap(); + }), + ); + let msg = StoreMsgV2::LatencyInspect { + send_time: Instant::now(), + inspector, + }; + router_v2.send_control(msg).unwrap(); + rx.recv_timeout(std::time::Duration::from_secs(2)).unwrap(); + } } #[test] fn test_sync_latency_inspect() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = test_raftstore::new_node_cluster(0, 1); cluster.cfg.raft_store.store_io_pool_size = 0; cluster.run(); let router = cluster.sim.wl().get_router(1).unwrap(); @@ -62,7 +89,7 @@ fn test_sync_latency_inspect() { tx.send(dur).unwrap(); }), ); - let msg = StoreMsg::LatencyInspect { + let msg = StoreMsgV1::LatencyInspect { send_time: Instant::now(), inspector, }; From de3d6655c3bb0eceac068207fe7ec8be30bdbcc7 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 26 Jun 2023 15:20:35 +0800 Subject: [PATCH 0756/1149] raftstore-v2: active flush before close (#14917) close tikv/tikv#14981 active flush before server stop Signed-off-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Signed-off-by: Spade A Co-authored-by: tonyxuqqi --- components/engine_traits/src/cf_defs.rs | 4 + components/engine_traits/src/flush.rs | 13 +++ components/raftstore-v2/src/batch/store.rs | 3 + components/raftstore-v2/src/fsm/peer.rs | 3 + .../src/operation/ready/apply_trace.rs | 86 +++++++++++++++++- components/raftstore-v2/src/raft/storage.rs | 8 ++ components/raftstore-v2/src/router/message.rs | 5 +- .../raftstore-v2/src/worker/refresh_config.rs | 3 +- components/server/src/server2.rs | 90 +++++++++++++++++-- src/config/configurable.rs | 2 +- src/config/mod.rs | 17 +++- .../integrations/raftstore/test_bootstrap.rs | 60 ++++++++++++- .../integrations/raftstore/test_scale_pool.rs | 1 - 13 files changed, 277 insertions(+), 18 deletions(-) diff --git a/components/engine_traits/src/cf_defs.rs b/components/engine_traits/src/cf_defs.rs index 27546dfc1c1..8e2f77daca8 100644 --- a/components/engine_traits/src/cf_defs.rs +++ b/components/engine_traits/src/cf_defs.rs @@ -16,6 +16,10 @@ pub fn data_cf_offset(cf: &str) -> usize { DATA_CFS.iter().position(|c| *c == cf).expect(cf) } +pub fn offset_to_cf(off: usize) -> &'static str { + DATA_CFS[off] +} + pub fn name_to_cf(name: &str) -> Option { if name.is_empty() { return Some(CF_DEFAULT); diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 68a07478bc6..4b7e0e6687d 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -103,12 +103,18 @@ impl SstApplyState { #[derive(Debug)] pub struct FlushState { applied_index: AtomicU64, + + // This is only used for flush before server stop. + // It provides a direct path for flush progress by letting raftstore directly know the current + // flush progress. + flushed_index: [AtomicU64; DATA_CFS_LEN], } impl FlushState { pub fn new(applied_index: u64) -> Self { Self { applied_index: AtomicU64::new(applied_index), + flushed_index: Default::default(), } } @@ -123,6 +129,11 @@ impl FlushState { pub fn applied_index(&self) -> u64 { self.applied_index.load(Ordering::Acquire) } + + #[inline] + pub fn flushed_index(&self) -> &[AtomicU64; DATA_CFS_LEN] { + &self.flushed_index + } } /// A helper trait to avoid exposing `RaftEngine` to `TabletFactory`. @@ -234,8 +245,10 @@ impl PersistenceListener { } } }; + let apply_index = pr.apply_index; self.storage .persist_progress(self.region_id, self.tablet_index, pr); + self.state.flushed_index[offset].store(apply_index, Ordering::SeqCst); } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 2c5a2de41b2..20d6fde85b8 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -541,6 +541,7 @@ pub struct Schedulers { pub checkpoint: Scheduler>, pub write: WriteSenders, pub cleanup: Scheduler, + pub refresh_config: Scheduler, // Following is not maintained by raftstore itself. pub split_check: Scheduler, @@ -756,6 +757,7 @@ impl StoreSystem { checkpoint::Runner::new(self.logger.clone(), tablet_registry.clone()), ); + let refresh_config_scheduler = workers.refresh_config_worker.scheduler(); let schedulers = Schedulers { read: read_scheduler, pd: workers.pd.scheduler(), @@ -764,6 +766,7 @@ impl StoreSystem { write: workers.async_write.senders(), split_check: split_check_scheduler, cleanup: cleanup_worker_scheduler, + refresh_config: refresh_config_scheduler, }; let builder = StorePollerBuilder::new( diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index cae442514ab..ff1fccbe0cd 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -353,6 +353,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::LeaderCallback(ch) => self.on_leader_callback(ch), #[cfg(feature = "testexport")] PeerMsg::WaitFlush(ch) => self.fsm.peer_mut().on_wait_flush(ch), + PeerMsg::FlushBeforeClose { tx } => { + self.fsm.peer_mut().flush_before_close(self.store_ctx, tx) + } } } // TODO: instead of propose pending commands immediately, we should use timeout. diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 9d7cae00e9d..d280dc4913e 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -27,12 +27,16 @@ //! All apply related states are associated with an apply index. During //! recovery states corresponding to the start index should be used. -use std::{cmp, sync::Mutex}; +use std::{ + cmp, + path::Path, + sync::{atomic::Ordering, mpsc::SyncSender, Mutex}, +}; use encryption_export::DataKeyManager; use engine_traits::{ - data_cf_offset, ApplyProgress, KvEngine, RaftEngine, RaftLogBatch, TabletRegistry, ALL_CFS, - CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, + data_cf_offset, offset_to_cf, ApplyProgress, KvEngine, RaftEngine, RaftLogBatch, + TabletRegistry, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; use fail::fail_point; use kvproto::{ @@ -46,6 +50,7 @@ use slog::{info, trace, Logger}; use tikv_util::{box_err, slog_panic, worker::Scheduler}; use crate::{ + batch::StoreContext, operation::{ command::temp_split_path, ready::snapshot::{install_tablet, recv_snap_path}, @@ -397,6 +402,12 @@ impl Storage { } let (trace, region_state) = ApplyTrace::recover(region_id, &engine)?; + info!( + logger, + "initial apply trace"; + "apply_trace" => ?trace, + "region_id" => region_id, + ); let raft_state = match engine.get_raft_state(region_id) { Ok(Some(s)) => s, @@ -544,6 +555,75 @@ impl Peer { } apply_trace.maybe_advance_admin_flushed(apply_index); } + + pub fn flush_before_close(&mut self, ctx: &StoreContext, tx: SyncSender<()>) { + info!( + self.logger, + "region flush before close begin"; + ); + let region_id = self.region_id(); + let flush_threshold: u64 = (|| { + fail_point!("flush_before_cluse_threshold", |t| { + t.unwrap().parse::().unwrap() + }); + 50 + })(); + + if let Some(tablet) = self.tablet().cloned() { + // flush the oldest cf one by one until we are under the replay count threshold + loop { + let replay_count = self.storage().estimate_replay_count(); + if replay_count < flush_threshold { + break; + } + info!( + self.logger, + "flush-before-close: replay count exceeds threshold, pick the oldest cf to flush"; + "count" => replay_count, + ); + tablet.flush_oldest_cf(true, None).unwrap(); + + let flush_state = self.flush_state().clone(); + let mut apply_trace = self.storage_mut().apply_trace_mut(); + let mut max_flush_index = 0; + + let flushed_indexes = flush_state.as_ref().flushed_index(); + for i in 0..flushed_indexes.len() { + let flush_index = flushed_indexes[i].load(Ordering::SeqCst); + let cf = offset_to_cf(i); + apply_trace.on_flush(cf, flush_index); + max_flush_index = u64::max(max_flush_index, flush_index); + } + + apply_trace.maybe_advance_admin_flushed(max_flush_index); + let admin_flush = apply_trace.admin.flushed; + apply_trace.persisted_applied = admin_flush; + + if self.storage().estimate_replay_count() < flush_threshold { + let (_, _, tablet_index) = ctx + .tablet_registry + .parse_tablet_name(Path::new(tablet.path())) + .unwrap(); + let mut lb = ctx.engine.log_batch(1); + lb.put_flushed_index(region_id, CF_RAFT, tablet_index, admin_flush) + .unwrap(); + ctx.engine.consume(&mut lb, true).unwrap(); + info!( + self.logger, + "flush before close flush admin for region"; + "admin_flush" => admin_flush, + ); + break; + } + } + } + + info!( + self.logger, + "region flush before close done"; + ); + let _ = tx.send(()); + } } #[cfg(test)] diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 6434331019d..5e6aa5c97ea 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -247,6 +247,14 @@ impl Storage { } } } + + // call `estimate` as persisted_applied is not guaranteed to be persisted + #[inline] + pub fn estimate_replay_count(&self) -> u64 { + let apply_index = self.apply_state().get_applied_index(); + let persisted_apply = self.apply_trace.persisted_apply_index(); + apply_index.saturating_sub(persisted_apply) + } } impl raft::Storage for Storage { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 844246edbdd..52b9099cdf0 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -1,7 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::sync::Arc; +use std::sync::{mpsc::SyncSender, Arc}; use kvproto::{ import_sstpb::SstMeta, @@ -241,6 +241,9 @@ pub enum PeerMsg { /// A message that used to check if a flush is happened. #[cfg(feature = "testexport")] WaitFlush(super::FlushChannel), + FlushBeforeClose { + tx: SyncSender<()>, + }, /// A message that used to check if a snapshot gc is happened. SnapGc(Box<[TabletSnapKey]>), } diff --git a/components/raftstore-v2/src/worker/refresh_config.rs b/components/raftstore-v2/src/worker/refresh_config.rs index ea7d8724756..804cfcce60e 100644 --- a/components/raftstore-v2/src/worker/refresh_config.rs +++ b/components/raftstore-v2/src/worker/refresh_config.rs @@ -131,12 +131,11 @@ where match task { RefreshConfigTask::ScalePool(component, size) => { match component { - BatchComponent::Store => {} + BatchComponent::Store => self.resize_raft_pool(size), BatchComponent::Apply => { unreachable!("v2 does not have apply batch system") } }; - self.resize_raft_pool(size); } _ => { warn!( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 85a7bf235b6..3ca2004930e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -17,7 +17,11 @@ use std::{ marker::PhantomData, path::{Path, PathBuf}, str::FromStr, - sync::{atomic::AtomicU64, mpsc, Arc}, + sync::{ + atomic::AtomicU64, + mpsc::{self, sync_channel}, + Arc, + }, time::Duration, u64, }; @@ -58,13 +62,19 @@ use raftstore::{ }, RegionInfoAccessor, }; -use raftstore_v2::{router::RaftRouter, StateStorage}; +use raftstore_v2::{ + router::{PeerMsg, RaftRouter}, + StateStorage, +}; use resource_control::{ ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, }; use security::SecurityManager; use tikv::{ - config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, + config::{ + loop_registry, ConfigController, ConfigurableDb, DbConfigManger, DbType, LogConfigManager, + TikvConfig, + }, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, @@ -1216,7 +1226,7 @@ where if status_enabled { let mut status_server = match StatusServer::new( self.core.config.server.status_thread_pool_size, - self.cfg_controller.take().unwrap(), + self.cfg_controller.clone().unwrap(), Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), self.core.store_path.clone(), @@ -1237,14 +1247,84 @@ where } } + fn flush_before_stop(&mut self) { + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.store_pool_size".to_owned(), "10".to_owned()); + change + }; + if let Err(e) = self + .cfg_controller + .as_mut() + .unwrap() + .update_without_persist(change) + { + warn!( + "config change failed"; + "error" => ?e, + ); + } + let tablet_registry = self.tablet_registry.as_ref().unwrap(); + // It should not return error. + if let Err(e) = loop_registry(tablet_registry, |cache| { + if let Some(latest) = cache.latest() { + latest.set_high_priority_background_threads(10, false)?; + Ok(false) + } else { + Ok(true) + } + }) { + warn!( + "increase high priority background threads failed during server stop (it will impact close speed)"; + "error" => ?e, + ); + } + + info!("server stop: flush begin"); + let engines = self.engines.as_mut().unwrap(); + let router = self.router.as_ref().unwrap(); + let mut rxs = vec![]; + engines + .raft_engine + .for_each_raft_group::(&mut |region_id| { + let (tx, rx) = sync_channel(1); + let flush_msg = PeerMsg::FlushBeforeClose { tx }; + if let Err(e) = router.store_router().force_send(region_id, flush_msg) { + warn!( + "flush-before-close: force send error"; + "error" => ?e, + "region_id" => region_id, + ); + } else { + rxs.push(rx); + } + + Ok(()) + }) + .unwrap(); + + for rx in rxs { + if let Err(e) = rx.recv() { + warn!( + "flush-before-close: receive error"; + "error" => ?e, + ); + } + } + + info!( + "server stop: flush done"; + ); + } + fn stop(mut self) { + self.flush_before_stop(); tikv_util::thread_group::mark_shutdown(); let mut servers = self.servers.unwrap(); servers .server .stop() .unwrap_or_else(|e| fatal!("failed to stop server: {}", e)); - self.node.as_mut().unwrap().stop(); self.region_info_accessor.as_mut().unwrap().stop(); diff --git a/src/config/configurable.rs b/src/config/configurable.rs index 2f2dd66381d..f006da501d2 100644 --- a/src/config/configurable.rs +++ b/src/config/configurable.rs @@ -84,7 +84,7 @@ impl ConfigurableDb for RocksEngine { } } -fn loop_registry( +pub fn loop_registry( registry: &TabletRegistry, mut f: impl FnMut(&mut CachedTablet) -> std::result::Result>, ) -> ConfigRes { diff --git a/src/config/mod.rs b/src/config/mod.rs index f1441e1c7ad..3124c18fa9b 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -22,7 +22,7 @@ use std::{ use api_version::ApiV1Ttl; use causal_ts::Config as CausalTsConfig; -pub use configurable::{ConfigRes, ConfigurableDb}; +pub use configurable::{loop_registry, ConfigRes, ConfigurableDb}; use encryption_export::DataKeyManager; use engine_rocks::{ config::{self as rocks_config, BlobRunMode, CompressionType, LogLevel as RocksLogLevel}, @@ -4305,7 +4305,12 @@ impl ConfigController { pub fn update(&self, change: HashMap) -> CfgResult<()> { let diff = to_config_change(change.clone())?; - self.update_impl(diff, Some(change)) + self.update_impl(diff, Some(change), true) + } + + pub fn update_without_persist(&self, change: HashMap) -> CfgResult<()> { + let diff = to_config_change(change.clone())?; + self.update_impl(diff, Some(change), false) } pub fn update_from_toml_file(&self) -> CfgResult<()> { @@ -4313,7 +4318,7 @@ impl ConfigController { match TikvConfig::from_file(Path::new(¤t.cfg_path), None) { Ok(incoming) => { let diff = current.diff(&incoming); - self.update_impl(diff, None) + self.update_impl(diff, None, true) } Err(e) => Err(e), } @@ -4323,6 +4328,7 @@ impl ConfigController { &self, mut diff: HashMap, change: Option>, + persist: bool, ) -> CfgResult<()> { diff = { let incoming = self.get_current(); @@ -4357,6 +4363,11 @@ impl ConfigController { debug!("all config change had been dispatched"; "change" => ?to_update); // we already verified the correctness at the beginning of this function. inner.current.update(to_update).unwrap(); + + if !persist { + return Ok(()); + } + // Write change to the config file if let Some(change) = change { let content = { diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index ee063e0f1e7..1564aff0b3e 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -1,16 +1,20 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use std::{ path::Path, - sync::{Arc, Mutex}, + sync::{mpsc::sync_channel, Arc, Mutex}, }; use concurrency_manager::ConcurrencyManager; -use engine_traits::{Engines, Peekable, ALL_CFS, CF_DEFAULT, CF_RAFT}; +use engine_traits::{ + Engines, Peekable, RaftEngine, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, + CF_WRITE, +}; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb::RegionLocalState}; use raftstore::{ coprocessor::CoprocessorHost, store::{bootstrap_store, fsm, fsm::store::StoreMeta, AutoSplitController, SnapManager}, }; +use raftstore_v2::router::PeerMsg; use resource_metering::CollectorRegHandle; use tempfile::Builder; use test_pd_client::{bootstrap_with_first_region, TestPdClient}; @@ -189,3 +193,55 @@ fn test_node_switch_api_version() { } } } + +#[test] +fn test_flush_before_stop() { + use test_raftstore_v2::*; + + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k020"); + + let region = cluster.get_region(b"k40"); + cluster.must_split(®ion, b"k040"); + + let region = cluster.get_region(b"k60"); + cluster.must_split(®ion, b"k070"); + + fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + + for i in 0..100 { + let key = format!("k{:03}", i); + cluster.must_put_cf(CF_WRITE, key.as_bytes(), b"val"); + cluster.must_put_cf(CF_LOCK, key.as_bytes(), b"val"); + } + + let router = cluster.get_router(1).unwrap(); + let raft_engine = cluster.get_raft_engine(1); + + let mut rxs = vec![]; + raft_engine + .for_each_raft_group::(&mut |id| { + let (tx, rx) = sync_channel(1); + rxs.push(rx); + let msg = PeerMsg::FlushBeforeClose { tx }; + router.force_send(id, msg).unwrap(); + + Ok(()) + }) + .unwrap(); + + for rx in rxs { + rx.recv().unwrap(); + } + + raft_engine + .for_each_raft_group::(&mut |id| { + let admin_flush = raft_engine.get_flushed_index(id, CF_RAFT).unwrap().unwrap(); + assert!(admin_flush >= 40); + Ok(()) + }) + .unwrap(); +} diff --git a/tests/integrations/raftstore/test_scale_pool.rs b/tests/integrations/raftstore/test_scale_pool.rs index 393c47a7f5e..2187b4f4bee 100644 --- a/tests/integrations/raftstore/test_scale_pool.rs +++ b/tests/integrations/raftstore/test_scale_pool.rs @@ -128,7 +128,6 @@ fn get_poller_thread_ids_by_prefix(prefixs: Vec<&str>) -> Vec { for tid in all_tids { if let Ok(stat) = thread::full_thread_stat(pid, tid) { for &prefix in &prefixs { - println!("command {:?}", stat.command); if stat.command.starts_with(prefix) { poller_tids.push(tid); } From 66aa8257fa1e840d57265fa539b37ccf9961ff89 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Mon, 26 Jun 2023 19:10:04 +0800 Subject: [PATCH 0757/1149] raftstore-v2: optimize prepare and commit merge (#14892) ref tikv/tikv#12842 - pre-flush memtable before prepare and commit merge - make checkpoint and merge async - use FuturePool instead of Worker for spawning unstructured tasks in raftstore Signed-off-by: tabokie --- components/engine_traits/src/misc.rs | 15 +- components/raftstore-v2/src/batch/store.rs | 21 +- components/raftstore-v2/src/fsm/apply.rs | 6 +- .../operation/command/admin/merge/commit.rs | 184 ++++++++++-------- .../operation/command/admin/merge/prepare.rs | 95 ++++++--- .../src/operation/command/admin/mod.rs | 114 +++++------ .../src/operation/command/admin/split.rs | 101 ++++++---- .../raftstore-v2/src/operation/command/mod.rs | 6 +- .../src/operation/command/write/mod.rs | 9 +- .../src/operation/query/capture.rs | 10 +- .../raftstore-v2/src/operation/ready/mod.rs | 2 + components/raftstore-v2/src/raft/apply.rs | 13 +- components/raftstore-v2/src/raft/peer.rs | 12 -- components/raftstore-v2/src/raft/storage.rs | 9 +- .../raftstore-v2/src/worker/checkpoint.rs | 132 ------------- components/raftstore-v2/src/worker/mod.rs | 1 - components/raftstore-v2/src/worker/tablet.rs | 115 ++++++++--- components/test_pd_client/src/pd.rs | 14 +- tests/integrations/raftstore/test_merge.rs | 4 +- 19 files changed, 457 insertions(+), 406 deletions(-) delete mode 100644 components/raftstore-v2/src/worker/checkpoint.rs diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index a537d1c5d2a..b5189bcc1a1 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -141,9 +141,22 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { fn is_stalled_or_stopped(&self) -> bool; - /// Returns size and age of active memtable if there's one. + /// Returns size and creation time of active memtable if there's one. fn get_active_memtable_stats_cf( &self, cf: &str, ) -> Result>; + + /// Whether there's active memtable with creation time older than + /// `threshold`. + fn has_old_active_memtable(&self, threshold: std::time::SystemTime) -> bool { + for cf in self.cf_names() { + if let Ok(Some((_, age))) = self.get_active_memtable_stats_cf(cf) { + if age < threshold { + return true; + } + } + } + false + } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 20d6fde85b8..828525f688c 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -61,7 +61,7 @@ use crate::{ }, raft::Storage, router::{PeerMsg, PeerTick, StoreMsg}, - worker::{checkpoint, cleanup, pd, refresh_config, tablet}, + worker::{cleanup, pd, refresh_config, tablet}, Error, Result, }; @@ -88,12 +88,13 @@ pub struct StoreContext { /// The precise timer for scheduling tick. pub timer: SteadyTimer, pub schedulers: Schedulers, - /// store meta pub store_meta: Arc>>, pub shutdown: Arc, pub engine: ER, pub tablet_registry: TabletRegistry, pub apply_pool: FuturePool, + /// A background pool used for high-priority works. + pub high_priority_pool: FuturePool, /// Disk usage for the store itself. pub self_disk_usage: DiskUsage, @@ -329,6 +330,7 @@ struct StorePollerBuilder { router: StoreRouter, schedulers: Schedulers, apply_pool: FuturePool, + high_priority_pool: FuturePool, logger: Logger, store_meta: Arc>>, shutdown: Arc, @@ -365,6 +367,11 @@ impl StorePollerBuilder { .after_start(move || set_io_type(IoType::ForegroundWrite)) .name_prefix("apply") .build_future_pool(); + let high_priority_pool = YatpPoolBuilder::new(DefaultTicker::default()) + .thread_count(1, 1, 1) + .after_start(move || set_io_type(IoType::ForegroundWrite)) + .name_prefix("store-bg") + .build_future_pool(); let global_stat = GlobalStoreStat::default(); StorePollerBuilder { cfg, @@ -374,6 +381,7 @@ impl StorePollerBuilder { trans, router, apply_pool, + high_priority_pool, logger, schedulers, store_meta, @@ -518,6 +526,7 @@ where engine: self.engine.clone(), tablet_registry: self.tablet_registry.clone(), apply_pool: self.apply_pool.clone(), + high_priority_pool: self.high_priority_pool.clone(), self_disk_usage: DiskUsage::Normal, snap_mgr: self.snap_mgr.clone(), coprocessor_host: self.coprocessor_host.clone(), @@ -538,7 +547,6 @@ pub struct Schedulers { pub read: Scheduler>, pub pd: Scheduler, pub tablet: Scheduler>, - pub checkpoint: Scheduler>, pub write: WriteSenders, pub cleanup: Scheduler, pub refresh_config: Scheduler, @@ -752,17 +760,12 @@ impl StoreSystem { .cleanup_worker .start("cleanup-worker", cleanup::Runner::new(compact_runner)); - let checkpoint_scheduler = workers.checkpoint.start( - "checkpoint-worker", - checkpoint::Runner::new(self.logger.clone(), tablet_registry.clone()), - ); - let refresh_config_scheduler = workers.refresh_config_worker.scheduler(); + let schedulers = Schedulers { read: read_scheduler, pd: workers.pd.scheduler(), tablet: tablet_scheduler, - checkpoint: checkpoint_scheduler, write: workers.async_write.senders(), split_check: split_check_scheduler, cleanup: cleanup_worker_scheduler, diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index 2c3df759680..f966b67634a 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -21,13 +21,13 @@ use tikv_util::{ mpsc::future::{self, Receiver, Sender, WakePolicy}, timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, + yatp_pool::FuturePool, }; use crate::{ operation::{CatchUpLogs, DataTrace}, raft::Apply, router::{ApplyRes, ApplyTask, PeerMsg}, - worker::checkpoint, TabletTask, }; @@ -79,8 +79,8 @@ impl ApplyFsm { res_reporter: R, tablet_registry: TabletRegistry, read_scheduler: Scheduler>, - checkpoint_scheduler: Scheduler>, tablet_scheduler: Scheduler>, + high_priority_pool: FuturePool, flush_state: Arc, sst_apply_state: SstApplyState, log_recovery: Option>, @@ -105,8 +105,8 @@ impl ApplyFsm { buckets, sst_importer, coprocessor_host, - checkpoint_scheduler, tablet_scheduler, + high_priority_pool, logger, ); ( diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 00d07c19afc..84ef4745288 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -175,7 +175,10 @@ impl Peer { } // Match v1::schedule_merge. - fn ask_target_peer_to_commit_merge(&mut self, store_ctx: &mut StoreContext) { + fn ask_target_peer_to_commit_merge( + &mut self, + store_ctx: &mut StoreContext, + ) { let state = self.applied_merge_state().unwrap(); let target = state.get_target(); let target_id = target.get_id(); @@ -222,27 +225,33 @@ impl Peer { // quorum stores of target region. Otherwise we need to enable proposal // forwarding. let msg = PeerMsg::AskCommitMerge(request); - // If target peer is destroyed, life.rs is responsible for telling us to - // rollback. - match store_ctx.router.force_send(target_id, msg) { - Ok(_) => (), - Err(SendError(PeerMsg::AskCommitMerge(msg))) => { - if let Err(e) = store_ctx - .router - .force_send_control(StoreMsg::AskCommitMerge(msg)) - { - if store_ctx.router.is_shutdown() { - return; + let router = store_ctx.router.clone(); + let logger = self.logger.clone(); + self.start_pre_flush( + store_ctx, + "commit_merge", + &target.clone(), + Box::new(move || { + // If target peer is destroyed, life.rs is responsible for telling us to + // rollback. + match router.force_send(target_id, msg) { + Ok(_) => (), + Err(SendError(PeerMsg::AskCommitMerge(msg))) => { + if let Err(e) = router.force_send_control(StoreMsg::AskCommitMerge(msg)) { + if router.is_shutdown() { + return; + } + slog_panic!( + logger, + "fails to send `AskCommitMerge` msg to store"; + "error" => ?e, + ); + } } - slog_panic!( - self.logger, - "fails to send `AskCommitMerge` msg to store"; - "error" => ?e, - ); + _ => unreachable!(), } - } - _ => unreachable!(), - } + }), + ); } } @@ -275,8 +284,9 @@ impl Peer { // If target caught up by snapshot, the source checkpoint hasn't been used. let source_path = merge_source_path(&store_ctx.tablet_registry, source_region.get_id(), index); - assert!(source_path.exists()); - self.record_tombstone_tablet_path(store_ctx, source_path, r.get_index()); + if source_path.exists() { + self.record_tombstone_tablet_path(store_ctx, source_path, r.get_index()); + } let _ = store_ctx.router.force_send( source_region.get_id(), PeerMsg::AckCommitMerge { @@ -400,15 +410,6 @@ impl Apply { "source_region" => ?source_region, ); - let ctx = TabletContext::new(source_region, None); - let source_tablet = reg - .tablet_factory() - .open_tablet(ctx, &source_path) - .unwrap_or_else(|e| { - slog_panic!(self.logger, "failed to open source checkpoint"; "err" => ?e); - }); - let open_time = Instant::now_coarse(); - let mut region = self.region().clone(); // Use a max value so that pd can ensure overlapped region has a priority. let version = cmp::max( @@ -422,62 +423,83 @@ impl Apply { region.set_start_key(source_region.get_start_key().to_vec()); } - let path = reg.tablet_path(self.region_id(), index); - - // Avoid seqno jump back between self.tablet and the newly created tablet. - // If we are recovering, this flush would just be a noop. - self.tablet().flush_cfs(&[], true).unwrap(); - let flush_time = Instant::now_coarse(); - + let logger = self.logger.clone(); + let region_id = self.region_id(); + let target_tablet = self.tablet().clone(); let mut ctx = TabletContext::new(®ion, Some(index)); ctx.flush_state = Some(self.flush_state().clone()); - let guard = MergeInProgressGuard::new(&self.logger, reg, self.region_id(), index, &path) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create MergeInProgressGuard"; - "path" => %path.display(), - "error" => ?e - ) - }); - let tablet = reg.tablet_factory().open_tablet(ctx, &path).unwrap(); - if let Some(guard) = guard { - tablet - .merge(&[&source_tablet, self.tablet()]) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to merge tablet"; - "path" => %path.display(), - "error" => ?e - ) - }); - guard.defuse().unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to defuse MergeInProgressGuard"; - "path" => %path.display(), - "error" => ?e - ) - }); - } else { - info!(self.logger, "reuse merged tablet"); - } - let merge_time = Instant::now_coarse(); + let reg_clone = reg.clone(); + let source_path_clone = source_path.clone(); + let source_region_clone = source_region.clone(); + let (tx, rx) = oneshot::channel(); + self.high_priority_pool() + .spawn(async move { + let source_ctx = TabletContext::new(&source_region_clone, None); + let source_tablet = reg_clone + .tablet_factory() + .open_tablet(source_ctx, &source_path_clone) + .unwrap_or_else(|e| { + slog_panic!(logger, "failed to open source checkpoint"; "err" => ?e); + }); + let open_time = Instant::now_coarse(); + + let path = reg_clone.tablet_path(region_id, index); + // Avoid seqno jump back between self.tablet and the newly created tablet. + // If we are recovering, this flush would just be a noop. + target_tablet.flush_cfs(&[], true).unwrap(); + let flush_time = Instant::now_coarse(); + + let guard = MergeInProgressGuard::new(&logger, ®_clone, region_id, index, &path) + .unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to create MergeInProgressGuard"; + "path" => %path.display(), + "error" => ?e + ) + }); + let tablet = reg_clone.tablet_factory().open_tablet(ctx, &path).unwrap(); + if let Some(guard) = guard { + tablet + .merge(&[&source_tablet, &target_tablet]) + .unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to merge tablet"; + "path" => %path.display(), + "error" => ?e + ) + }); + guard.defuse().unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to defuse MergeInProgressGuard"; + "path" => %path.display(), + "error" => ?e + ) + }); + } else { + info!(logger, "reuse merged tablet"); + } + let merge_time = Instant::now_coarse(); + info!( + logger, + "applied CommitMerge"; + "source_region" => ?source_region_clone, + "wait" => ?wait_duration.map(|d| format!("{}", ReadableDuration(d))), + "open" => %ReadableDuration(open_time.saturating_duration_since(start_time)), + "merge" => %ReadableDuration(flush_time.saturating_duration_since(open_time)), + "flush" => %ReadableDuration(merge_time.saturating_duration_since(flush_time)), + ); + tx.send(tablet).unwrap(); + }) + .unwrap(); + let tablet = rx.await.unwrap(); + fail::fail_point!("after_merge_source_checkpoint", |_| Err( tikv_util::box_err!("fp") )); - info!( - self.logger, - "applied CommitMerge"; - "source_region" => ?source_region, - "wait" => ?wait_duration.map(|d| format!("{}", ReadableDuration(d))), - "open" => %ReadableDuration(open_time.saturating_duration_since(start_time)), - "merge" => %ReadableDuration(flush_time.saturating_duration_since(open_time)), - "flush" => %ReadableDuration(merge_time.saturating_duration_since(flush_time)), - ); - self.set_tablet(tablet.clone()); let state = self.region_state_mut(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 6004e5d7c7c..f71670c2931 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -31,6 +31,7 @@ use std::{mem, time::Duration}; use collections::HashMap; use engine_traits::{Checkpointer, KvEngine, RaftEngine, RaftLogBatch, CF_LOCK}; +use futures::channel::oneshot; use kvproto::{ metapb::RegionEpoch, raft_cmdpb::{ @@ -49,10 +50,11 @@ use raftstore::{ store::{metrics::PEER_ADMIN_CMD_COUNTER, util, LocksStatus, ProposalContext, Transport}, Error, Result, }; -use slog::{debug, info}; +use slog::{debug, error, info, warn}; use tikv_util::{ box_err, log::SlogFormat, slog_panic, store::region_on_same_stores, time::Instant, }; +use txn_types::WriteBatchFlags; use super::merge_source_path; use crate::{ @@ -60,7 +62,7 @@ use crate::{ fsm::ApplyResReporter, operation::{command::parse_at, AdminCmdResult, SimpleWriteReqDecoder}, raft::{Apply, Peer}, - router::CmdResChannel, + router::{CmdResChannel, PeerMsg, RaftRequest}, }; const TRIM_CHECK_TIMEOUT: Duration = Duration::from_secs(10); @@ -76,6 +78,8 @@ pub enum PrepareStatus { start_time: Instant, // Peers that we are not sure if trimmed. pending_peers: HashMap, + // Only when all peers are trimmed, this proposal will be taken into the tablet flush + // callback. req: Option, }, /// When a fence is present, we (1) delay the PrepareMerge @@ -125,6 +129,17 @@ impl Peer { let pre_propose = if let Some(r) = self.already_checked_pessimistic_locks()? { r } else if self.already_checked_trim_status()? { + if !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + .contains(WriteBatchFlags::PRE_FLUSH_FINISHED) + { + // We will always schedule flush (`merge_on_availability_response`) when trim + // status passed. + warn!( + self.logger, + "flush should already be scheduled for prepare merge" + ); + return Err(Error::PendingPrepareMerge); + } let r = self.check_logs_before_prepare_merge(store_ctx)?; self.check_pessimistic_locks(r, &mut req)? } else { @@ -345,6 +360,7 @@ impl Peer { from_peer: u64, resp: &ExtraMessage, ) { + let region_id = self.region_id(); if self.merge_context().is_some() && let Some(PrepareStatus::WaitForTrimStatus { pending_peers, req, .. }) = self .merge_context_mut() @@ -373,9 +389,36 @@ impl Peer { } } if pending_peers.is_empty() { - let (ch, _) = CmdResChannel::pair(); - let req = req.take().unwrap(); - self.on_admin_command(store_ctx, req, ch); + let mailbox = match store_ctx.router.mailbox(region_id) { + Some(mailbox) => mailbox, + None => { + assert!( + store_ctx.router.is_shutdown(), + "{} router should have been closed", + SlogFormat(&self.logger) + ); + return; + } + }; + let mut req = req.take().unwrap(); + req.mut_header().set_flags(WriteBatchFlags::PRE_FLUSH_FINISHED.bits()); + let logger = self.logger.clone(); + let on_flush_finish = move || { + let (ch, _) = CmdResChannel::pair(); + if let Err(e) = mailbox.try_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) { + error!( + logger, + "send PrepareMerge request failed after pre-flush finished"; + "err" => ?e, + ); + } + }; + self.start_pre_flush( + store_ctx, + "prepare_merge", + &self.region().clone(), + Box::new(on_flush_finish), + ); } } } @@ -565,7 +608,7 @@ impl Peer { impl Apply { // Match v1::exec_prepare_merge. - pub fn apply_prepare_merge( + pub async fn apply_prepare_merge( &mut self, req: &AdminRequest, log_index: u64, @@ -604,26 +647,34 @@ impl Apply { PEER_ADMIN_CMD_COUNTER.prepare_merge.success.inc(); let _ = self.flush(); - let tablet = self.tablet().clone(); - let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint object"; - "error" => ?e - ) - }); let reg = self.tablet_registry(); let path = merge_source_path(reg, self.region_id(), log_index); // We might be replaying this command. if !path.exists() { - checkpointer.create_at(&path, None, 0).unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint"; - "path" => %path.display(), - "error" => ?e - ) - }); + let tablet = self.tablet().clone(); + let logger = self.logger.clone(); + let (tx, rx) = oneshot::channel(); + self.high_priority_pool() + .spawn(async move { + let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to create checkpoint object"; + "error" => ?e + ) + }); + checkpointer.create_at(&path, None, 0).unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to create checkpoint"; + "path" => %path.display(), + "error" => ?e + ) + }); + tx.send(()).unwrap(); + }) + .unwrap(); + rx.await.unwrap(); } // Notes on the lifetime of this checkpoint: diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 0491eee5470..9dbc3a06a87 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -12,7 +12,7 @@ use compact_log::CompactLogResult; use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ - metapb::PeerRole, + metapb::{PeerRole, Region}, raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessageType, FlushMemtable, RaftMessage}, }; @@ -164,22 +164,9 @@ impl Peer { if !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) .contains(WriteBatchFlags::PRE_FLUSH_FINISHED) { - if self.tablet_being_flushed() { - return; - } - - let region_id = self.region().get_id(); - self.set_tablet_being_flushed(true); - info!( - self.logger, - "Schedule flush tablet"; - ); - - let mailbox = match ctx.router.mailbox(region_id) { + let mailbox = match ctx.router.mailbox(self.region_id()) { Some(mailbox) => mailbox, None => { - // None means the node is shutdown concurrently and thus the - // mailboxes in router have been cleared assert!( ctx.router.is_shutdown(), "{} router should have been closed", @@ -188,58 +175,26 @@ impl Peer { return; } }; - + req.mut_header() + .set_flags(WriteBatchFlags::PRE_FLUSH_FINISHED.bits()); let logger = self.logger.clone(); let on_flush_finish = move || { - req.mut_header() - .set_flags(WriteBatchFlags::PRE_FLUSH_FINISHED.bits()); if let Err(e) = mailbox .try_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) { error!( logger, - "send split request fail after pre-flush finished"; + "send BatchSplit request failed after pre-flush finished"; "err" => ?e, ); } }; - - if let Err(e) = - ctx.schedulers.tablet.schedule(crate::TabletTask::Flush { - region_id, - cb: Some(Box::new(on_flush_finish)), - }) - { - error!( - self.logger, - "Fail to schedule flush task"; - "err" => ?e, - ) - } - - // Notify followers to flush their relevant memtables - let peers = self.region().get_peers().to_vec(); - for p in peers { - if p == *self.peer() - || p.get_role() != PeerRole::Voter - || p.is_witness - { - continue; - } - let mut msg = RaftMessage::default(); - msg.set_region_id(region_id); - msg.set_from_peer(self.peer().clone()); - msg.set_to_peer(p.clone()); - msg.set_region_epoch(self.region().get_region_epoch().clone()); - let extra_msg = msg.mut_extra_msg(); - extra_msg.set_type(ExtraMessageType::MsgFlushMemtable); - let mut flush_memtable = FlushMemtable::new(); - flush_memtable.set_region_id(region_id); - extra_msg.set_flush_memtable(flush_memtable); - - self.send_raft_message(ctx, msg); - } - + self.start_pre_flush( + ctx, + "split", + &self.region().clone(), + Box::new(on_flush_finish), + ); return; } @@ -247,7 +202,6 @@ impl Peer { self.logger, "Propose split"; ); - self.set_tablet_being_flushed(false); self.propose_split(ctx, req) } } @@ -302,4 +256,50 @@ impl Peer { } self.post_propose_command(ctx, res, vec![ch], true); } + + fn start_pre_flush( + &mut self, + ctx: &mut StoreContext, + reason: &'static str, + target: &Region, + on_local_flushed: Box, + ) { + let target_id = target.get_id(); + info!( + self.logger, + "Start pre flush tablet"; + "target" => target_id, + "reason" => reason, + ); + if let Err(e) = ctx.schedulers.tablet.schedule(crate::TabletTask::Flush { + region_id: target_id, + reason, + threshold: Some(std::time::Duration::from_secs(10)), + cb: Some(on_local_flushed), + }) { + error!( + self.logger, + "Fail to schedule flush task"; + "err" => ?e, + ) + } + // Notify followers to flush their relevant memtables + for p in target.get_peers() { + if p == self.peer() || p.get_role() != PeerRole::Voter || p.is_witness { + continue; + } + let mut msg = RaftMessage::default(); + msg.set_region_id(target_id); + msg.set_from_peer(self.peer().clone()); + msg.set_to_peer(p.clone()); + msg.set_region_epoch(target.get_region_epoch().clone()); + let extra_msg = msg.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgFlushMemtable); + let mut flush_memtable = FlushMemtable::new(); + flush_memtable.set_region_id(target_id); + extra_msg.set_flush_memtable(flush_memtable); + + self.send_raft_message(ctx, msg); + } + } } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 180bb628dbb..96e03f940a4 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -25,11 +25,13 @@ //! created by the store, and here init it using the data sent from the parent //! peer. -use std::{any::Any, borrow::Cow, cmp, path::PathBuf, time::Duration}; +use std::{any::Any, borrow::Cow, cmp, path::PathBuf}; use collections::HashSet; use crossbeam::channel::SendError; -use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry}; +use engine_traits::{ + Checkpointer, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, +}; use fail::fail_point; use futures::channel::oneshot; use kvproto::{ @@ -53,7 +55,7 @@ use raftstore::{ Result, }; use slog::{error, info, warn}; -use tikv_util::{log::SlogFormat, slog_panic, time::Instant, worker::Scheduler}; +use tikv_util::{log::SlogFormat, slog_panic, time::Instant}; use crate::{ batch::StoreContext, @@ -61,7 +63,7 @@ use crate::{ operation::{AdminCmdResult, SharedReadTablet}, raft::{Apply, Peer}, router::{CmdResChannel, PeerMsg, PeerTick, StoreMsg}, - worker::{checkpoint, tablet}, + worker::tablet, Error, }; @@ -506,11 +508,61 @@ impl Apply { .map(|r| r.get_id()) .filter(|id| id != ®ion_id) .collect::>(); - let scheduler: _ = self.checkpoint_scheduler().clone(); + let (tx, rx) = oneshot::channel(); let tablet = self.tablet().clone(); - let checkpoint_duration = - async_checkpoint(tablet, &scheduler, region_id, split_region_ids, log_index).await; + let logger = self.logger.clone(); + let tablet_registry = self.tablet_registry().clone(); + self.high_priority_pool() + .spawn(async move { + let checkpoint_start = Instant::now(); + let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to create checkpoint object"; + "region_id" => region_id, + "error" => ?e + ) + }); + + for id in split_region_ids { + let split_temp_path = temp_split_path(&tablet_registry, id); + checkpointer + .create_at(&split_temp_path, None, 0) + .unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to create checkpoint"; + "region_id" => region_id, + "path" => %split_temp_path.display(), + "error" => ?e + ) + }); + } + let derived_path = tablet_registry.tablet_path(region_id, log_index); + + // If it's recovered from restart, it's possible the target path exists already. + // And because checkpoint is atomic, so we don't need to worry about corruption. + // And it's also wrong to delete it and remake as it may has applied and flushed + // some data to the new checkpoint before being restarted. + if !derived_path.exists() { + checkpointer + .create_at(&derived_path, None, 0) + .unwrap_or_else(|e| { + slog_panic!( + logger, + "fails to create checkpoint"; + "region_id" => region_id, + "path" => %derived_path.display(), + "error" => ?e + ) + }); + } + + tx.send(checkpoint_start.saturating_elapsed()).unwrap(); + }) + .unwrap(); + let checkpoint_duration = rx.await.unwrap(); // It should equal to checkpoint_duration + the duration of rescheduling current // apply peer let elapsed = now.saturating_elapsed(); @@ -553,27 +605,6 @@ impl Apply { } } -// asynchronously execute the checkpoint creation and return the duration spent -// by it -async fn async_checkpoint( - tablet: EK, - scheduler: &Scheduler>, - parent_region: u64, - split_regions: Vec, - log_index: u64, -) -> Duration { - let (tx, rx) = oneshot::channel(); - let task = checkpoint::Task::Checkpoint { - tablet, - log_index, - parent_region, - split_regions, - sender: tx, - }; - scheduler.schedule_force(task).unwrap(); - rx.await.unwrap() -} - impl Peer { pub fn ready_to_handle_first_append_message( &mut self, @@ -928,9 +959,9 @@ mod test { use slog::o; use tempfile::TempDir; use tikv_util::{ - defer, store::{new_learner_peer, new_peer}, - worker::{dummy_scheduler, Worker}, + worker::dummy_scheduler, + yatp_pool::{DefaultTicker, YatpPoolBuilder}, }; use super::*; @@ -1055,13 +1086,7 @@ mod test { region_state.set_region(region.clone()); region_state.set_tablet_index(5); - let checkpoint_worker = Worker::new("checkpoint-worker"); - let checkpoint_scheduler = checkpoint_worker.start( - "checkpoint-worker", - checkpoint::Runner::new(logger.clone(), reg.clone()), - ); - defer!(checkpoint_worker.stop()); - + let high_priority_pool = YatpPoolBuilder::new(DefaultTicker::default()).build_future_pool(); let (tablet_scheduler, _) = dummy_scheduler(); let (read_scheduler, _rx) = dummy_scheduler(); let (reporter, _) = MockReporter::new(); @@ -1086,8 +1111,8 @@ mod test { None, importer, host, - checkpoint_scheduler, tablet_scheduler, + high_priority_pool, logger.clone(), ); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index f50efa94745..3aed3547ca5 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -145,8 +145,8 @@ impl Peer { mailbox, store_ctx.tablet_registry.clone(), read_scheduler, - store_ctx.schedulers.checkpoint.clone(), store_ctx.schedulers.tablet.clone(), + store_ctx.high_priority_pool.clone(), self.flush_state().clone(), sst_apply_state, self.storage().apply_trace().log_recovery(), @@ -692,7 +692,9 @@ impl Apply { AdminCmdType::CompactLog => self.apply_compact_log(admin_req, log_index)?, AdminCmdType::Split => self.apply_split(admin_req, log_index).await?, AdminCmdType::BatchSplit => self.apply_batch_split(admin_req, log_index).await?, - AdminCmdType::PrepareMerge => self.apply_prepare_merge(admin_req, log_index)?, + AdminCmdType::PrepareMerge => { + self.apply_prepare_merge(admin_req, log_index).await? + } AdminCmdType::CommitMerge => self.apply_commit_merge(admin_req, log_index).await?, AdminCmdType::RollbackMerge => self.apply_rollback_merge(admin_req, log_index)?, AdminCmdType::TransferLeader => { diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index c79b7880e43..f4481150f46 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -340,6 +340,7 @@ mod test { use tikv_util::{ store::new_peer, worker::{dummy_scheduler, Worker}, + yatp_pool::{DefaultTicker, YatpPoolBuilder}, }; use crate::{ @@ -387,13 +388,13 @@ mod test { let snap_mgr = TabletSnapManager::new(tmp_dir.path(), None).unwrap(); let tablet_worker = Worker::new("tablet-worker"); - let checkpoint_scheduler = tablet_worker.start( + let tablet_scheduler = tablet_worker.start( "tablet-worker", tablet::Runner::new(reg.clone(), importer.clone(), snap_mgr, logger.clone()), ); tikv_util::defer!(tablet_worker.stop()); + let high_priority_pool = YatpPoolBuilder::new(DefaultTicker::default()).build_future_pool(); - let (dummy_scheduler, _) = dummy_scheduler(); let mut apply = Apply::new( &Config::default(), region @@ -413,8 +414,8 @@ mod test { None, importer, host, - dummy_scheduler, - checkpoint_scheduler, + tablet_scheduler, + high_priority_pool, logger.clone(), ); diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 8378b320665..f83b7982cca 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -196,7 +196,11 @@ mod test { }; use slog::o; use tempfile::TempDir; - use tikv_util::{store::new_peer, worker::dummy_scheduler}; + use tikv_util::{ + store::new_peer, + worker::dummy_scheduler, + yatp_pool::{DefaultTicker, YatpPoolBuilder}, + }; use super::*; use crate::{ @@ -271,7 +275,7 @@ mod test { .register_cmd_observer(0, BoxCmdObserver::new(ob)); let (dummy_scheduler1, _) = dummy_scheduler(); - let (dummy_scheduler2, _) = dummy_scheduler(); + let high_priority_pool = YatpPoolBuilder::new(DefaultTicker::default()).build_future_pool(); let mut apply = Apply::new( &Config::default(), region @@ -292,7 +296,7 @@ mod test { importer, host, dummy_scheduler1, - dummy_scheduler2, + high_priority_pool, logger.clone(), ); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 1ceb4d9d154..d505442f55a 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -298,6 +298,8 @@ impl Peer { .tablet .schedule(crate::worker::tablet::Task::Flush { region_id: self.region().get_id(), + reason: "unknown", + threshold: Some(std::time::Duration::from_secs(10)), cb: None, }); return; diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index b0d84137cfd..96e48e52417 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -16,12 +16,11 @@ use raftstore::{ }; use slog::Logger; use sst_importer::SstImporter; -use tikv_util::{log::SlogFormat, worker::Scheduler}; +use tikv_util::{log::SlogFormat, worker::Scheduler, yatp_pool::FuturePool}; use crate::{ operation::{AdminCmdResult, ApplyFlowControl, DataTrace}, router::CmdResChannel, - worker::checkpoint, TabletTask, }; @@ -74,8 +73,8 @@ pub struct Apply { observe: Observe, coprocessor_host: CoprocessorHost, - checkpoint_scheduler: Scheduler>, tablet_scheduler: Scheduler>, + high_priority_pool: FuturePool, // Whether to use the delete range API instead of deleting one by one. use_delete_range: bool, @@ -101,8 +100,8 @@ impl Apply { buckets: Option, sst_importer: Arc, coprocessor_host: CoprocessorHost, - checkpoint_scheduler: Scheduler>, tablet_scheduler: Scheduler>, + high_priority_pool: FuturePool, logger: Logger, ) -> Self { let mut remote_tablet = tablet_registry @@ -136,8 +135,8 @@ impl Apply { metrics: ApplyMetrics::default(), buckets, sst_importer, - checkpoint_scheduler, tablet_scheduler, + high_priority_pool, use_delete_range: cfg.use_delete_range, observe: Observe { info: CmdObserveInfo::default(), @@ -337,8 +336,8 @@ impl Apply { } #[inline] - pub fn checkpoint_scheduler(&self) -> &Scheduler> { - &self.checkpoint_scheduler + pub fn high_priority_pool(&self) -> &FuturePool { + &self.high_priority_pool } #[inline] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index cb5ea9a8580..ef7363ee0c4 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -48,7 +48,6 @@ const REGION_READ_PROGRESS_CAP: usize = 128; pub struct Peer { raft_group: RawNode>, tablet: CachedTablet, - tablet_being_flushed: bool, /// Statistics for self. self_stat: PeerStat, @@ -171,7 +170,6 @@ impl Peer { let tag = format!("[region {}] {}", region.get_id(), peer_id); let mut peer = Peer { tablet: cached_tablet, - tablet_being_flushed: false, self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), @@ -323,16 +321,6 @@ impl Peer { self.peer().get_id() } - #[inline] - pub fn tablet_being_flushed(&self) -> bool { - self.tablet_being_flushed - } - - #[inline] - pub fn set_tablet_being_flushed(&mut self, v: bool) { - self.tablet_being_flushed = v; - } - #[inline] pub fn storage(&self) -> &Storage { self.raft_group.store() diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 5e6aa5c97ea..8bc4cc2d9c5 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -347,7 +347,10 @@ mod tests { }; use slog::o; use tempfile::TempDir; - use tikv_util::worker::{dummy_scheduler, Worker}; + use tikv_util::{ + worker::{dummy_scheduler, Worker}, + yatp_pool::{DefaultTicker, YatpPoolBuilder}, + }; use super::*; use crate::{ @@ -516,7 +519,7 @@ mod tests { let host = CoprocessorHost::::default(); let (dummy_scheduler1, _) = dummy_scheduler(); - let (dummy_scheduler2, _) = dummy_scheduler(); + let high_priority_pool = YatpPoolBuilder::new(DefaultTicker::default()).build_future_pool(); // setup peer applyer let mut apply = Apply::new( &Config::default(), @@ -533,7 +536,7 @@ mod tests { importer, host, dummy_scheduler1, - dummy_scheduler2, + high_priority_pool, logger, ); diff --git a/components/raftstore-v2/src/worker/checkpoint.rs b/components/raftstore-v2/src/worker/checkpoint.rs deleted file mode 100644 index e10f62584d5..00000000000 --- a/components/raftstore-v2/src/worker/checkpoint.rs +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{fmt::Display, path::PathBuf, time::Duration}; - -use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; -use futures::channel::oneshot::Sender; -use raftstore::store::RAFT_INIT_LOG_INDEX; -use slog::Logger; -use tikv_util::{slog_panic, time::Instant, worker::Runnable}; - -use crate::operation::SPLIT_PREFIX; - -pub enum Task { - Checkpoint { - // it is only used to assert - log_index: u64, - parent_region: u64, - split_regions: Vec, - tablet: EK, - sender: Sender, - }, -} - -impl Display for Task { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Task::Checkpoint { - log_index, - parent_region, - split_regions, - .. - } => write!( - f, - "create checkpoint for batch split, parent region_id {}, source region_ids {:?}, log_index {}", - parent_region, split_regions, log_index, - ), - } - } -} - -pub struct Runner { - logger: Logger, - tablet_registry: TabletRegistry, -} - -pub fn temp_split_path(registry: &TabletRegistry, region_id: u64) -> PathBuf { - let tablet_name = registry.tablet_name(SPLIT_PREFIX, region_id, RAFT_INIT_LOG_INDEX); - registry.tablet_root().join(tablet_name) -} - -impl Runner { - pub fn new(logger: Logger, tablet_registry: TabletRegistry) -> Self { - Self { - logger, - tablet_registry, - } - } - - fn checkpoint( - &self, - parent_region: u64, - split_regions: Vec, - log_index: u64, - tablet: EK, - sender: Sender, - ) { - let now = Instant::now(); - let mut checkpointer = tablet.new_checkpointer().unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint object"; - "region_id" => parent_region, - "error" => ?e - ) - }); - - for id in split_regions { - let split_temp_path = temp_split_path(&self.tablet_registry, id); - checkpointer - .create_at(&split_temp_path, None, 0) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint"; - "region_id" => parent_region, - "path" => %split_temp_path.display(), - "error" => ?e - ) - }); - } - - let derived_path = self.tablet_registry.tablet_path(parent_region, log_index); - - // If it's recovered from restart, it's possible the target path exists already. - // And because checkpoint is atomic, so we don't need to worry about corruption. - // And it's also wrong to delete it and remake as it may has applied and flushed - // some data to the new checkpoint before being restarted. - if !derived_path.exists() { - checkpointer - .create_at(&derived_path, None, 0) - .unwrap_or_else(|e| { - slog_panic!( - self.logger, - "fails to create checkpoint"; - "region_id" => parent_region, - "path" => %derived_path.display(), - "error" => ?e - ) - }); - } - - sender.send(now.saturating_elapsed()).unwrap(); - } -} - -impl Runnable for Runner { - type Task = Task; - - fn run(&mut self, task: Self::Task) { - match task { - Task::Checkpoint { - log_index, - parent_region, - split_regions, - tablet, - sender, - } => { - self.checkpoint(parent_region, split_regions, log_index, tablet, sender); - } - } - } -} diff --git a/components/raftstore-v2/src/worker/mod.rs b/components/raftstore-v2/src/worker/mod.rs index ead4ca7043c..4eee822b8c7 100644 --- a/components/raftstore-v2/src/worker/mod.rs +++ b/components/raftstore-v2/src/worker/mod.rs @@ -1,6 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -pub mod checkpoint; pub mod cleanup; pub mod pd; pub mod refresh_config; diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 8bf130397f7..6e01ee327d0 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -17,6 +17,7 @@ use raftstore::store::{TabletSnapKey, TabletSnapManager}; use slog::{debug, error, info, warn, Logger}; use sst_importer::SstImporter; use tikv_util::{ + config::ReadableDuration, slog_panic, time::Instant, worker::{Runnable, RunnableWithTimer}, @@ -50,12 +51,14 @@ pub enum Task { }, /// Cleanup ssts. CleanupImportSst(Box<[SstMeta]>), - /// Flush memtable before split - /// - /// cb is some iff the task is sent from leader, it is used to real propose - /// split when flush finishes + /// Flush memtable. Flush { region_id: u64, + reason: &'static str, + /// Do not flush if the active memtable is just flushed within this + /// threshold. + threshold: Option, + /// Callback will be called if memtable is fresh. cb: Option>, }, DeleteRange { @@ -88,32 +91,32 @@ impl Display for Task { .. } => write!( f, - "prepare destroy tablet for region_id {}, wait_for_persisted {}", - region_id, wait_for_persisted, + "prepare destroy tablet for region_id {region_id}, wait_for_persisted {wait_for_persisted}", ), Task::Destroy { region_id, persisted_index, } => write!( f, - "destroy tablet for region_id {} persisted_index {}", - region_id, persisted_index, + "destroy tablet for region_id {region_id}, persisted_index {persisted_index}", ), Task::DirectDestroy { .. } => { write!(f, "direct destroy tablet") } Task::CleanupImportSst(ssts) => { - write!(f, "cleanup import ssts {:?}", ssts) + write!(f, "cleanup import ssts {ssts:?}") } Task::Flush { region_id, + reason, + threshold, cb: on_flush_finish, } => { write!( f, - "flush tablet for region_id {}, is leader {}", - region_id, - on_flush_finish.is_some() + "flush tablet for region_id {region_id}, reason {reason}, threshold {:?}, has_cb {}", + threshold, + on_flush_finish.is_some(), ) } Task::DeleteRange { @@ -424,12 +427,26 @@ impl Runner { } } - fn flush_tablet(&self, region_id: u64, cb: Option>) { + fn flush_tablet( + &self, + region_id: u64, + reason: &'static str, + threshold: Option, + cb: Option>, + ) { let Some(Some(tablet)) = self .tablet_registry .get(region_id) - .map(|mut cache| cache.latest().cloned()) else {return}; - + .map(|mut cache| cache.latest().cloned()) else { + warn!( + self.logger, + "flush memtable failed to acquire tablet"; + "region_id" => region_id, + "reason" => reason, + ); + return + }; + let threshold = threshold.map(|t| std::time::SystemTime::now() - t); // The callback `cb` being some means it's the task sent from // leader, we should sync flush memtables and call it after the flush complete // where the split will be proposed again with extra flag. @@ -439,28 +456,63 @@ impl Runner { self.background_pool .spawn(async move { // sync flush for leader to let the flush happend before later checkpoint. - tablet.flush_cfs(DATA_CFS, true).unwrap(); - let elapsed = now.saturating_elapsed(); - // to be removed after when it's stable - info!( - logger, - "flush memtable for leader"; - "region_id" => region_id, - "duration" => ?elapsed, - ); - + if threshold.is_none() || tablet.has_old_active_memtable(threshold.unwrap()) { + let r = tablet.flush_cfs(DATA_CFS, true); + let elapsed = now.saturating_elapsed(); + if let Err(e) = r { + warn!( + logger, + "flush memtable for leader failed"; + "region_id" => region_id, + "reason" => reason, + "err" => ?e, + ); + return; + } else { + info!( + logger, + "flush memtable for leader"; + "region_id" => region_id, + "reason" => reason, + "duration" => %ReadableDuration(elapsed), + ); + } + } else { + info!( + logger, + "skipped flush memtable for leader"; + "region_id" => region_id, + "reason" => reason, + ); + } drop(tablet); cb(); }) .unwrap(); + } else if threshold.is_none() || tablet.has_old_active_memtable(threshold.unwrap()) { + if let Err(e) = tablet.flush_cfs(DATA_CFS, false) { + warn!( + self.logger, + "flush memtable for follower failed"; + "region_id" => region_id, + "reason" => reason, + "err" => ?e, + ); + } else { + info!( + self.logger, + "flush memtable for follower"; + "region_id" => region_id, + "reason" => reason, + ); + } } else { info!( self.logger, - "flush memtable for follower"; + "skipped flush memtable for follower"; "region_id" => region_id, + "reason" => reason, ); - - tablet.flush_cfs(DATA_CFS, false).unwrap(); } } @@ -531,7 +583,12 @@ where } => self.destroy(region_id, persisted_index), Task::DirectDestroy { tablet, .. } => self.direct_destroy(tablet), Task::CleanupImportSst(ssts) => self.cleanup_ssts(ssts), - Task::Flush { region_id, cb } => self.flush_tablet(region_id, cb), + Task::Flush { + region_id, + reason, + threshold, + cb, + } => self.flush_tablet(region_id, reason, threshold, cb), delete_range @ Task::DeleteRange { .. } => self.delete_range(delete_range), Task::SnapGc(keys) => self.snap_gc(keys), } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 1c2cc573eb9..c81230f6a16 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1328,9 +1328,21 @@ impl TestPdClient { } pub fn must_merge(&self, from: u64, target: u64) { + let epoch = self.get_region_epoch(target); self.merge_region(from, target); - self.check_merged_timeout(from, Duration::from_secs(5)); + self.check_merged_timeout(from, Duration::from_secs(10)); + let timer = Instant::now(); + loop { + if epoch.get_version() == self.get_region_epoch(target).get_version() { + if timer.saturating_elapsed() > Duration::from_secs(1) { + panic!("region {:?} is still not merged.", target); + } + } else { + return; + } + sleep_ms(10); + } } pub fn check_merged(&self, from: u64) -> bool { diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 16fb78ae5bc..fb9772c7189 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1634,7 +1634,9 @@ fn test_stale_message_after_merge() { /// Check whether merge should be prevented if follower may not have enough /// logs. #[test_case(test_raftstore::new_server_cluster)] -#[test_case(test_raftstore_v2::new_server_cluster)] +// FIXME: #[test_case(test_raftstore_v2::new_server_cluster)] +// In v2 `try_merge` always return error. Also the last `must_merge` sometimes +// cannot get an updated min_matched. fn test_prepare_merge_with_reset_matched() { let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); From bd11fb033e85512f0dd20cffaf1bcefc5e8c0bd6 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Mon, 26 Jun 2023 10:17:35 -0700 Subject: [PATCH 0758/1149] metrics: Add snapshot generation wait metric (#14941) close tikv/tikv#14940 Add snapshot generation wait duration metric Signed-off-by: Yang Zhang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore/src/store/worker/metrics.rs | 6 + .../raftstore/src/store/worker/region.rs | 3 + metrics/grafana/tikv_details.json | 9854 +++++++++-------- 3 files changed, 4975 insertions(+), 4888 deletions(-) diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 2ad06d9c69d..fd3f54d239d 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -134,6 +134,12 @@ lazy_static! { .unwrap(); pub static ref SNAP_HISTOGRAM: SnapHistogram = auto_flush_from!(SNAP_HISTOGRAM_VEC, SnapHistogram); + pub static ref SNAP_GEN_WAIT_DURATION_HISTOGRAM: Histogram = register_histogram!( + "tikv_raftstore_snapshot_generation_wait_duration_seconds", + "Bucketed histogram of raftstore snapshot generation wait duration", + exponential_buckets(0.00001, 2.0, 26).unwrap() + ) + .unwrap(); pub static ref CHECK_SPILT_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_check_split_duration_seconds", "Bucketed histogram of raftstore split check duration", diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index d4e6039b7ea..2ea5eb947fd 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -825,7 +825,10 @@ where router: self.router.clone(), start: UnixSecs::now(), }; + let scheduled_time = Instant::now_coarse(); self.pool.spawn(async move { + SNAP_GEN_WAIT_DURATION_HISTOGRAM + .observe(scheduled_time.saturating_elapsed_secs()); tikv_alloc::add_thread_memory_accessor(); ctx.handle_gen( region_id, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 0ad9bbc4def..3a679cdf138 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -14,7 +14,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "7.5.11" + "version": "7.5.10" }, { "type": "panel", @@ -70,7 +70,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1675760728538, + "iteration": 1686724160525, "links": [], "panels": [ { @@ -10526,7 +10526,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:113", "alias": "count", "dashLength": 1, "dashes": true, @@ -10537,7 +10536,6 @@ "zindex": -3 }, { - "$$hashKey": "object:114", "alias": "avg", "fill": 7 } @@ -10600,7 +10598,6 @@ }, "yaxes": [ { - "$$hashKey": "object:139", "format": "s", "label": null, "logBase": 1, @@ -10609,7 +10606,6 @@ "show": true }, { - "$$hashKey": "object:140", "format": "short", "label": null, "logBase": 1, @@ -10670,7 +10666,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:217", "alias": "count", "dashLength": 1, "dashes": true, @@ -10681,7 +10676,6 @@ "zindex": -3 }, { - "$$hashKey": "object:218", "alias": "avg", "fill": 7 } @@ -10744,7 +10738,6 @@ }, "yaxes": [ { - "$$hashKey": "object:243", "format": "s", "label": null, "logBase": 1, @@ -10753,7 +10746,6 @@ "show": true }, { - "$$hashKey": "object:244", "format": "short", "label": null, "logBase": 1, @@ -10814,7 +10806,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:529", "alias": "count", "dashLength": 1, "dashes": true, @@ -10825,7 +10816,6 @@ "zindex": -3 }, { - "$$hashKey": "object:530", "alias": "avg", "fill": 7 } @@ -10888,7 +10878,6 @@ }, "yaxes": [ { - "$$hashKey": "object:555", "format": "s", "label": null, "logBase": 1, @@ -10897,7 +10886,6 @@ "show": true }, { - "$$hashKey": "object:556", "format": "short", "label": null, "logBase": 1, @@ -10958,7 +10946,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:529", "alias": "count", "dashLength": 1, "dashes": true, @@ -10969,7 +10956,6 @@ "zindex": -3 }, { - "$$hashKey": "object:530", "alias": "avg", "fill": 7 } @@ -11032,7 +11018,6 @@ }, "yaxes": [ { - "$$hashKey": "object:555", "format": "s", "label": null, "logBase": 1, @@ -11041,7 +11026,6 @@ "show": true }, { - "$$hashKey": "object:556", "format": "short", "label": null, "logBase": 1, @@ -11242,7 +11226,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:733", "alias": "count", "dashLength": 1, "dashes": true, @@ -11253,7 +11236,6 @@ "zindex": -3 }, { - "$$hashKey": "object:734", "alias": "avg", "fill": 7 } @@ -11316,7 +11298,6 @@ }, "yaxes": [ { - "$$hashKey": "object:759", "format": "s", "label": null, "logBase": 1, @@ -11325,7 +11306,6 @@ "show": true }, { - "$$hashKey": "object:760", "format": "short", "label": null, "logBase": 1, @@ -11526,7 +11506,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:841", "alias": "count", "dashLength": 1, "dashes": true, @@ -11537,7 +11516,6 @@ "zindex": -3 }, { - "$$hashKey": "object:842", "alias": "avg", "fill": 7 } @@ -11600,7 +11578,6 @@ }, "yaxes": [ { - "$$hashKey": "object:867", "format": "s", "label": null, "logBase": 1, @@ -11609,7 +11586,6 @@ "show": true }, { - "$$hashKey": "object:868", "format": "short", "label": null, "logBase": 1, @@ -16067,7 +16043,6 @@ }, "yaxes": [ { - "$$hashKey": "object:270", "format": "s", "label": null, "logBase": 1, @@ -16076,7 +16051,6 @@ "show": true }, { - "$$hashKey": "object:271", "format": "short", "label": null, "logBase": 1, @@ -16175,7 +16149,6 @@ }, "yaxes": [ { - "$$hashKey": "object:70", "format": "short", "label": null, "logBase": 1, @@ -16184,7 +16157,6 @@ "show": true }, { - "$$hashKey": "object:71", "format": "short", "label": null, "logBase": 1, @@ -17291,7 +17263,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:521", "alias": "/pending-task/", "transform": "negative-Y", "yaxis": 2 @@ -17362,7 +17333,6 @@ }, "yaxes": [ { - "$$hashKey": "object:86", "decimals": null, "format": "s", "label": null, @@ -17372,7 +17342,6 @@ "show": true }, { - "$$hashKey": "object:87", "format": "short", "label": null, "logBase": 1, @@ -18569,237 +18538,233 @@ "yBucketSize": null }, { - "type": "graph", - "title": "Storage async snapshot duration", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async snapshot duration", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "x": 0, - "y": 35, + "h": 8, "w": 12, - "h": 8 + "x": 0, + "y": 35 }, + "hiddenSeries": false, "id": 20000, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { + "exemplar": true, "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", - "legendFormat": "99%", "interval": "", - "exemplar": true, - "refId": "A", + "intervalFactor": 2, + "legendFormat": "99%", "queryType": "randomWalk", - "intervalFactor": 2 + "refId": "A" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", - "legendFormat": "95%", - "interval": "", "exemplar": true, - "refId": "B", + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", "hide": false, - "intervalFactor": 2 + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%", + "refId": "B" }, { - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", - "legendFormat": "avg", - "interval": "", "exemplar": true, - "refId": "C", + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", "hide": false, - "intervalFactor": 2 + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg", + "refId": "C" } ], - "options": { - "alertThreshold": true + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Storage async snapshot duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "fieldConfig": { - "defaults": {}, - "overrides": [] + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "pluginVersion": "7.5.10", - "renderer": "flot", "yaxes": [ { + "format": "s", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "s", - "$$hashKey": "object:295" + "min": null, + "show": true }, { + "format": "short", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "short", - "$$hashKey": "object:296" + "min": null, + "show": true } ], - "xaxis": { - "show": true, - "mode": "time", - "name": null, - "values": [], - "buckets": null - }, "yaxis": { "align": false, "alignLevel": null - }, - "lines": true, - "fill": 1, - "linewidth": 1, + } + }, + { + "aliasColors": {}, + "bars": false, "dashLength": 10, - "spaceLength": 10, - "pointradius": 2, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async write duration", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "hiddenSeries": false, + "id": 20001, "legend": { - "show": true, - "values": false, - "min": false, - "max": false, + "avg": false, "current": false, + "max": false, + "min": false, + "show": true, "total": false, - "avg": false + "values": false }, + "lines": true, + "linewidth": 1, "nullPointMode": "null", - "tooltip": { - "value_type": "individual", - "shared": true, - "sort": 0 + "options": { + "alertThreshold": true }, - "aliasColors": {}, - "seriesOverrides": [], - "thresholds": [], - "timeRegions": [], - "description": "The storage async snapshot duration", - "datasource": "${DS_TEST-CLUSTER}", - "fillGradient": 0, - "dashes": false, - "hiddenSeries": false, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, "points": false, - "bars": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "stack": false, - "percentage": false, "steppedLine": false, - "timeFrom": null, - "timeShift": null - }, - { - "type": "graph", - "title": "Storage async write duration", - "gridPos": { - "x": 12, - "y": 35, - "w": 12, - "h": 8 - }, - "id": 20001, "targets": [ { + "exemplar": true, "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "legendFormat": "99%", "interval": "", - "exemplar": true, - "refId": "A", - "intervalFactor": 1 + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "legendFormat": "95%", - "interval": "", "exemplar": true, - "refId": "B", + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", "hide": false, - "intervalFactor": 1 + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" }, { - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", - "legendFormat": "avg", - "interval": "", "exemplar": true, - "refId": "C", + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", "hide": false, - "intervalFactor": 1 + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "refId": "C" } ], - "options": { - "alertThreshold": true + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Storage async write duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "pluginVersion": "7.5.10", - "renderer": "flot", "yaxes": [ { + "format": "s", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "s", - "$$hashKey": "object:494" + "min": null, + "show": true }, { + "format": "short", "label": null, - "show": true, "logBase": 1, - "min": null, "max": null, - "format": "short", - "$$hashKey": "object:495" + "min": null, + "show": true } ], - "xaxis": { - "show": true, - "mode": "time", - "name": null, - "values": [], - "buckets": null - }, "yaxis": { "align": false, "alignLevel": null - }, - "lines": true, - "fill": 2, - "linewidth": 1, - "dashLength": 10, - "spaceLength": 10, - "pointradius": 2, - "legend": { - "show": true, - "values": false, - "min": false, - "max": false, - "current": false, - "total": false, - "avg": false - }, - "nullPointMode": "null", - "tooltip": { - "value_type": "individual", - "shared": true, - "sort": 0 - }, - "aliasColors": {}, - "seriesOverrides": [], - "thresholds": [], - "timeRegions": [], - "description": "The storage async write duration", - "fillGradient": 0, - "dashes": false, - "hiddenSeries": false, - "points": false, - "bars": false, - "stack": false, - "percentage": false, - "steppedLine": false, - "timeFrom": null, - "timeShift": null + } } ], "repeat": null, @@ -21100,6 +21065,13 @@ "pointradius": 5, "points": false, "renderer": "flot", + "scopedVars": { + "command": { + "selected": false, + "text": "acquire_pessimistic_lock", + "value": "acquire_pessimistic_lock" + } + }, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -21163,7 +21135,6 @@ }, "yaxes": [ { - "$$hashKey": "object:95", "format": "s", "label": null, "logBase": 1, @@ -21172,7 +21143,6 @@ "show": true }, { - "$$hashKey": "object:96", "format": "short", "label": null, "logBase": 1, @@ -21279,7 +21249,7 @@ "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 28 }, "id": 2755, "panels": [ @@ -21805,7 +21775,7 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 29 }, "id": 2758, "panels": [ @@ -23145,7 +23115,7 @@ "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 30 }, "id": 2759, "panels": [ @@ -23158,14 +23128,20 @@ "description": "The rate of Raft snapshot messages sent", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, - "w": 8, + "w": 12, "x": 0, - "y": 20 + "y": 31 }, + "hiddenSeries": false, "id": 35, "legend": { "alignAsTable": true, @@ -23185,7 +23161,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -23251,23 +23231,29 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handling snapshots", + "description": "The number of snapshots in different states", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 20 + "w": 12, + "x": 12, + "y": 31 }, - "id": 37, + "hiddenSeries": false, + "id": 38, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": true, + "hideEmpty": false, "max": true, "min": false, "rightSide": true, @@ -23280,38 +23266,135 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": false, + "steppedLine": true, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_send_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "sum(tikv_raftstore_snapshot_traffic_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "send", + "legendFormat": "{{type}}", + "metric": "", "refId": "A", "step": 60 - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Snapshot state count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "apply", - "refId": "B", - "step": 60 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"generate\"}[1m])) by (le))", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time snapshot generation tasks waited to be scheduled. ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 38 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "generate", - "refId": "C", + "legendFormat": "", + "refId": "A", "step": 60 } ], @@ -23319,7 +23402,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% Handle snapshot duration", + "title": "99% Snapshot generation wait duration", "tooltip": { "msResolution": false, "shared": true, @@ -23363,23 +23446,29 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of snapshots in different states", + "description": "The time consumed when handling snapshots", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 20 + "w": 12, + "x": 12, + "y": 38 }, - "id": 38, + "hiddenSeries": false, + "id": 23763573704, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "max": true, "min": false, "rightSide": true, @@ -23392,31 +23481,50 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": true, + "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_snapshot_traffic_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "expr": "histogram_quantile(0.99, sum(rate(tikv_server_send_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "", + "legendFormat": "send", "refId": "A", "step": 60 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "apply", + "refId": "B", + "step": 60 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"generate\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "generate", + "refId": "C", + "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Snapshot state count", + "title": "99% Handle snapshot duration", "tooltip": { "msResolution": false, "shared": true, @@ -23433,7 +23541,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -23463,14 +23571,20 @@ "description": "The snapshot size (P99.99).9999", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 45 }, + "hiddenSeries": false, "id": 44, "legend": { "alignAsTable": true, @@ -23490,7 +23604,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -23560,14 +23678,20 @@ "description": "The number of KV within a snapshot in .9999", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 45 }, + "hiddenSeries": false, "id": 43, "legend": { "alignAsTable": true, @@ -23587,7 +23711,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -23657,14 +23785,20 @@ "description": "Action stats for snapshot generating and applying", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 66 + "y": 52 }, + "hiddenSeries": false, "id": 36, "legend": { "alignAsTable": true, @@ -23684,7 +23818,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -23759,14 +23897,20 @@ "description": "The speed of sending or receiving snapshot", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 66 + "y": 52 }, + "hiddenSeries": false, "id": 4201, "legend": { "alignAsTable": true, @@ -23786,7 +23930,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -23867,7 +24015,7 @@ "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 31 }, "id": 2760, "panels": [ @@ -24279,7 +24427,7 @@ "h": 1, "w": 24, "x": 0, - "y": 38 + "y": 32 }, "id": 2757, "panels": [ @@ -24299,6 +24447,10 @@ "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed to handle coprocessor read requests", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 8, "w": 12, @@ -24323,6 +24475,7 @@ "values": true }, "links": [], + "reverseYBuckets": false, "targets": [ { "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", @@ -24343,6 +24496,8 @@ "xAxis": { "show": true }, + "xBucketNumber": null, + "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", @@ -24353,13 +24508,6 @@ "splitFactor": null }, "yBucketBound": "upper", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "reverseYBuckets": false, - "xBucketNumber": null, - "xBucketSize": null, "yBucketNumber": null, "yBucketSize": null }, @@ -25083,7 +25231,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 33 }, "id": 3197, "panels": [ @@ -26177,7 +26325,7 @@ "h": 1, "w": 24, "x": 0, - "y": 40 + "y": 34 }, "id": 2761, "panels": [ @@ -26574,7 +26722,7 @@ "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 35 }, "id": 2762, "panels": [ @@ -31314,7 +31462,7 @@ "h": 1, "w": 24, "x": 0, - "y": 42 + "y": 36 }, "id": 12802, "panels": [ @@ -32363,7 +32511,7 @@ "h": 1, "w": 24, "x": 0, - "y": 43 + "y": 37 }, "id": 3301, "panels": [ @@ -35372,7 +35520,7 @@ "h": 1, "w": 24, "x": 0, - "y": 44 + "y": 38 }, "id": 2820, "panels": [ @@ -36311,15 +36459,20 @@ }, { "aliasColors": {}, + "bars": false, "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, + "description": "The number of active keys and waiters.", "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 8, @@ -36327,6 +36480,7 @@ "x": 12, "y": 75 }, + "hiddenSeries": false, "id": 23763573091, "legend": { "alignAsTable": true, @@ -36351,25 +36505,31 @@ "options": { "alertThreshold": true }, + "percentage": false, "pluginVersion": "7.5.11", "pointradius": 5, + "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(tikv_lock_wait_queue_entries_gauge_vec{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", - "legendFormat": "{{type}}", - "interval": "", "exemplar": true, + "expr": "sum(tikv_lock_wait_queue_entries_gauge_vec{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", "format": "time_series", + "interval": "", "intervalFactor": 2, + "legendFormat": "{{type}}", "refId": "A", "step": 4 } ], "thresholds": [], + "timeFrom": null, "timeRegions": [], + "timeShift": null, "title": "Pessimistic lock activities", "tooltip": { "msResolution": false, @@ -36406,22 +36566,23 @@ "yaxis": { "align": false, "alignLevel": null - }, - "bars": false, - "dashes": false, - "error": false, - "fillGradient": 0, - "hiddenSeries": false, - "percentage": false, - "points": false, - "stack": false, - "steppedLine": false, - "timeFrom": null, - "timeShift": null, - "description": "The number of active keys and waiters." + } }, { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "The length includes the entering transaction itself", "fieldConfig": { "defaults": {}, "overrides": [] @@ -36432,66 +36593,53 @@ "x": 0, "y": 83 }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, "id": 23763573092, "legend": { "show": false }, "links": [], "pluginVersion": "7.5.11", + "reverseYBuckets": false, "targets": [ { - "expr": "sum(increase(tikv_lock_wait_queue_length_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "legendFormat": "{{le}}", - "interval": "", "exemplar": true, + "expr": "sum(increase(tikv_lock_wait_queue_length_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", "format": "heatmap", + "interval": "", "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A", "step": 4 } ], + "timeFrom": null, + "timeShift": null, "title": "Lengths of lock wait queues when transaction enqueues", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", - "timeFrom": null, - "timeShift": null, - "description": "The length includes the entering transaction itself", - "heatmap": {}, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "mode": "spectrum", - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "exponent": 0.5, - "colorScheme": "interpolateOranges" - }, - "dataFormat": "tsbuckets", - "yBucketBound": "auto", - "reverseYBuckets": false, "xAxis": { "show": true }, + "xBucketNumber": null, + "xBucketSize": null, "yAxis": { - "show": true, - "format": "short", "decimals": null, + "format": "short", "logBase": 1, - "splitFactor": null, + "max": null, "min": null, - "max": null + "show": true, + "splitFactor": null }, - "xBucketSize": null, - "xBucketNumber": null, - "yBucketSize": null, + "yBucketBound": "auto", "yBucketNumber": null, - "highlightCards": true, - "hideZeroBuckets": true + "yBucketSize": null } ], "title": "Pessimistic Locking", @@ -36504,9 +36652,9 @@ "h": 1, "w": 24, "x": 0, - "y": 45 + "y": 39 }, - "id": 8389, + "id": 23763573235, "panels": [ { "aliasColors": {}, @@ -36514,49 +36662,40 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of resolved ts worker", - "editable": true, - "error": false, "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "percentunit" + }, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 8, + "h": 7, + "w": 6, "x": 0, - "y": 38 + "y": 45 }, "hiddenSeries": false, - "id": 8385, + "id": 23763573350, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -36565,22 +36704,24 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, + "exemplar": true, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "interval": "", "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "queryType": "randomWalk", + "refId": "A" + }, + { + "hide": false, + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Resolved TS Worker CPU", + "title": "CPU Usage", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -36599,7 +36740,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -36617,30 +36758,35 @@ } }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "(AP)apply-99": "#88509f", + "(AP)get_permit-99": "#922870", + "(AP)queuing-99": "#9d0041", + "(DL)exec_download-99": "#73a0fe", + "(DL)queue-99": "#7d78ce", + "exec_download-99": "light-orange", + "get_permit-99": "red", + "queuing-99": "blue", + "total-99": "rgb(252, 252, 252)" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of advance ts worker", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 0, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 38 + "h": 7, + "w": 9, + "x": 6, + "y": 45 }, "hiddenSeries": false, - "id": 9162, + "id": 23763573351, "legend": { "alignAsTable": true, "avg": false, @@ -36649,13 +36795,10 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, - "lines": true, + "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null", @@ -36663,32 +36806,58 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total-99", + "bars": false, + "fill": 2, + "lines": true, + "linewidth": 0, + "stack": false, + "yaxis": 2 + } + ], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", + "exemplar": true, + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[1m])) by (le, request))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-tso", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "total-99", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"queue|exec_download\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "(DL){{type}}-99", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "legendFormat": "(AP){{type}}-99", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Advance ts Worker CPU", + "title": "P99 RPC Duration", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -36703,15 +36872,15 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -36731,24 +36900,21 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": " \tThe CPU utilization of scan lock worker", - "editable": true, - "error": false, + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 38 + "h": 7, + "w": 9, + "x": 15, + "y": 45 }, "hiddenSeries": false, - "id": 9164, + "id": 23763573352, "legend": { "alignAsTable": true, "avg": false, @@ -36771,7 +36937,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -36781,22 +36947,34 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", + "exemplar": true, + "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (instance, request)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-scan", - "metric": "tikv_thread_cpu_seconds_total", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}} :: {{request}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", "refId": "A", - "step": 4 + "step": 10 + }, + { + "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "total - {{request}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "B", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Scan lock Worker CPU", + "title": "Import RPC Ops", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -36811,11 +36989,11 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -36838,48 +37016,40 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved tso and current time", - "editable": true, - "error": false, "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "cps" + }, "overrides": [] }, "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 12, + "w": 6, "x": 0, - "y": 46 + "y": 52 }, "hiddenSeries": false, - "id": 8387, + "id": 23763573032, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -36888,24 +37058,23 @@ "steppedLine": false, "targets": [ { - "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "exemplar": true, + "expr": "sum(rate(tikv_import_apply_cache_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (instance, type)", + "interval": "", + "legendFormat": "{{instance}} :: {{type}}", + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Max Resolved TS gap", + "title": "Cache Events", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -36917,7 +37086,7 @@ }, "yaxes": [ { - "format": "ms", + "format": "cps", "label": null, "logBase": 1, "max": null, @@ -36939,227 +37108,211 @@ } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved tso of leaders and current time", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 46 + "w": 6, + "x": 6, + "y": 52 }, - "hiddenSeries": false, - "id": 23763572077, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573348, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true + "show": false }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", + "expr": "sum(increase(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (le)", + "format": "heatmap", "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Max Leader Resolved TS gap", + "title": "Overall RPC Duration", "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that has minimal resolved ts", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 54 + "w": 6, + "x": 12, + "y": 52 }, - "hiddenSeries": false, - "id": 23763572078, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573558, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true + "show": false }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, + "expr": "sum(increase(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"exec_download\"}[$__rate_interval])) by (le)", + "format": "heatmap", "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Min Resolved TS Region", + "title": "Read File into Memory Duration", "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#37872D", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573229, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "exemplar": true, + "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queuing\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Queuing Time", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, @@ -37167,83 +37320,65 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that its leader has minimal resolved ts.", - "editable": true, - "error": false, "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "bytes" + }, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 54 + "h": 6, + "w": 6, + "x": 0, + "y": 60 }, "hiddenSeries": false, - "id": 23763572079, + "id": 23763573349, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, + "expr": "rate(tikv_import_apply_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])", "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Min Leader Resolved TS Region", + "title": "Apply Request Throughput", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -37255,15 +37390,15 @@ }, "yaxes": [ { - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -37279,54 +37414,52 @@ { "cards": { "cardPadding": null, - "cardRound": null + "cardRound": 2 }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", + "colorScheme": "interpolateBlues", "exponent": 0.5, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handle a check leader request", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 62 + "w": 6, + "x": 6, + "y": 60 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 9168, + "id": 23763573344, "legend": { "show": false }, - "links": [], + "pluginVersion": "7.5.11", "reverseYBuckets": false, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "exemplar": true, + "expr": "sum(increase(tikv_import_download_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", "format": "heatmap", + "interval": "", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "", - "refId": "A", - "step": 4 + "queryType": "randomWalk", + "refId": "A" } ], - "timeFrom": null, - "timeShift": null, - "title": "Check leader duration", + "title": "Downloaded File Size", "tooltip": { "show": true, - "showHistogram": false + "showHistogram": true }, "type": "heatmap", "xAxis": { @@ -37335,7 +37468,146 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": null, + "format": "decbytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolatePurples", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 60 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573233, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_apply_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Apply Batch Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": null, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "decbytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 60 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573230, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"get_permit\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Blocked by Concurrency Time", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, "format": "s", "logBase": 1, "max": null, @@ -37343,7 +37615,7 @@ "show": true, "splitFactor": null }, - "yBucketBound": "upper", + "yBucketBound": "auto", "yBucketNumber": null, "yBucketSize": null }, @@ -37353,81 +37625,66 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The status of resolved-ts observe regions", - "editable": true, - "error": false, "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "ops" + }, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 62 + "h": 5, + "w": 6, + "x": 0, + "y": 66 }, "hiddenSeries": false, - "id": 8377, + "id": 23763573118, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", - "format": "time_series", + "exemplar": true, + "expr": "rate(tikv_import_applier_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"begin_req\"}[$__rate_interval])", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "interval": "", + "legendFormat": "{{instance}} :: {{type}}", + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Observe region status", + "title": "Apply Request Speed", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -37439,11 +37696,11 @@ }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -37466,48 +37723,40 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of region count in a check leader request", - "editable": true, - "error": false, "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "decbytes" + }, "overrides": [] }, "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 70 + "w": 6, + "x": 6, + "y": 68 }, "hiddenSeries": false, - "id": 12308, + "id": 23763573346, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -37516,23 +37765,20 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, + "exemplar": true, + "expr": "tikv_import_apply_cached_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "interval": "", "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% CheckLeader request region count", + "title": "Cached File in Memory", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -37547,7 +37793,7 @@ }, "yaxes": [ { - "format": "short", + "format": "decbytes", "label": null, "logBase": 1, "max": null, @@ -37570,86 +37816,67 @@ }, { "aliasColors": {}, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of fail to advance resolved-ts", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 8, - "w": 12, + "w": 6, "x": 12, - "y": 70 + "y": 68 }, "hiddenSeries": false, - "id": 9166, + "id": 23763573119, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": false, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], + "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{reason}}", - "refId": "A", - "step": 10 + "exemplar": true, + "expr": "increase(tikv_import_applier_event{instance=~\"$instance\", type!=\"begin_req\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 3, + "legendFormat": "{{instance}} :: {{type}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Fail advance ts count", + "title": "Engine Requests Unfinished", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -37665,7 +37892,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -37682,55 +37909,116 @@ "alignLevel": null } }, + { + "cards": { + "cardPadding": null, + "cardRound": 2 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 68 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573231, + "legend": { + "show": false + }, + "pluginVersion": "7.5.11", + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Apply Time", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", - "editable": true, - "error": false, + "description": "", "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "bytes" + }, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 12, + "h": 5, + "w": 6, "x": 0, - "y": 78 + "y": 71 }, "hiddenSeries": false, - "id": 8379, + "id": 23763573449, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, + "pluginVersion": "7.5.11", + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -37739,21 +38027,21 @@ "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, + "exemplar": true, + "expr": "sum(tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}) by (instance)", + "hide": false, + "interval": "", "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "queryType": "randomWalk", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Lock heap size", + "title": "Raft Store Memory Usage", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -37772,7 +38060,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -37788,40 +38076,56 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Point In Time Restore", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 8389, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of the check leader request size", + "decimals": 1, + "description": " \tThe CPU utilization of resolved ts worker", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 78 + "w": 8, + "x": 0, + "y": 38 }, "hiddenSeries": false, - "id": 8383, + "id": 8385, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -37830,7 +38134,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, @@ -37845,29 +38149,20 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", "format": "time_series", - "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", + "metric": "tikv_thread_cpu_seconds_total", "refId": "A", - "step": 40 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{instance}}-check-num", - "refId": "B" + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% CheckLeader request size", + "title": "Resolved TS Worker CPU", "tooltip": { "msResolution": false, "shared": true, @@ -37884,11 +38179,11 @@ }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -37912,7 +38207,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "Total bytes of pending commands in the channel", + "description": " \tThe CPU utilization of advance ts worker", "editable": true, "error": false, "fieldConfig": { @@ -37924,12 +38219,12 @@ "grid": {}, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 86 + "w": 8, + "x": 8, + "y": 38 }, "hiddenSeries": false, - "id": 8381, + "id": 9162, "legend": { "alignAsTable": true, "avg": false, @@ -37962,19 +38257,20 @@ "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-tso", + "metric": "tikv_thread_cpu_seconds_total", "refId": "A", - "step": 10 + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Pending command size", + "title": "Advance ts Worker CPU", "tooltip": { "msResolution": false, "shared": true, @@ -37991,7 +38287,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -38011,44 +38307,41 @@ "align": false, "alignLevel": null } - } - ], - "title": "Resolved-TS", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 46 - }, - "id": 2763, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "fill": 1, + "decimals": 1, + "description": " \tThe CPU utilization of scan lock worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 505 + "h": 8, + "w": 8, + "x": 16, + "y": 38 }, - "id": 2696, + "hiddenSeries": false, + "id": 9164, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -38058,7 +38351,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -38068,19 +38365,22 @@ "steppedLine": false, "targets": [ { - "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", "format": "time_series", - "hide": false, "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "legendFormat": "{{instance}}-scan", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Allocator Stats", + "title": "Scan lock Worker CPU", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -38095,11 +38395,11 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -38115,47 +38415,31 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Memory", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 47 - }, - "id": 3922, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The gap between resolved tso and current time", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 65 + "y": 46 }, "hiddenSeries": false, - "id": 3924, + "id": 8387, "legend": { "alignAsTable": true, "avg": false, @@ -38165,7 +38449,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -38174,7 +38457,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -38183,58 +38466,30 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/backup-auto-throttle/", - "fill": 5, - "fillGradient": 2, - "linewidth": 0 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"b.*k.*w.*k.*\"}[1m])) by (instance)", + "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "backup-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", + "legendFormat": "{{instance}}", "refId": "A", - "step": 4 - }, - { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_io\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "backup-io-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 - }, - { - "exemplar": true, - "expr": "tikv_backup_softlimit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "backup-auto-throttle-{{instance}}", - "refId": "C" + "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Backup CPU Utilization", + "title": "Max Resolved TS gap", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -38246,7 +38501,7 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -38267,113 +38522,54 @@ "alignLevel": null } }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 12, - "y": 65 - }, - "id": 3926, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(tikv_backup_thread_pool_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Backup Thread Count", - "transform": "timeseries_aggregations", - "type": "table" - }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The gap between resolved tso of leaders and current time", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 32 + "x": 12, + "y": 46 }, "hiddenSeries": false, - "id": 23763571993, + "id": 23763572077, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -38383,21 +38579,25 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_cloud_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (cloud, req)", + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", "interval": "", - "legendFormat": "{{cloud}}-{{req}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "cloud request", + "title": "Max Leader Resolved TS gap", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -38409,7 +38609,7 @@ }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -38436,22 +38636,25 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "decimals": 1, + "description": "The region that has minimal resolved ts", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 65 + "h": 8, + "w": 12, + "x": 0, + "y": 54 }, "hiddenSeries": false, - "id": 5264, + "id": 23763572078, "legend": { "alignAsTable": true, "avg": false, @@ -38460,44 +38663,56 @@ "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_backup_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{error}}", - "refId": "D" + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Backup Errors", + "title": "Min Resolved TS Region", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -38521,7 +38736,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -38531,36 +38746,30 @@ } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "decimals": 1, + "description": "The region that its leader has minimal resolved ts.", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 72 + "h": 8, + "w": 12, + "x": 12, + "y": 54 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 3927, + "hiddenSeries": false, + "id": 23763572079, "legend": { "alignAsTable": true, "avg": false, @@ -38569,110 +38778,140 @@ "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"write\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 } ], - "title": "Backup Write CF SST Size", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Leader Resolved TS Region", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "bytes", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "max": null, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The time consumed when handle a check leader request", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 72 + "h": 8, + "w": 12, + "x": 0, + "y": 62 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 5266, + "id": 9168, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"default\"}[1m])) by (le)", + "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", "format": "heatmap", - "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "refId": "A", + "step": 4 } ], - "title": "Backup Default CF SST Size", + "timeFrom": null, + "timeShift": null, + "title": "Check leader duration", "tooltip": { "show": true, - "showHistogram": true + "showHistogram": false }, - "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -38680,8 +38919,8 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 1, - "format": "bytes", + "decimals": 0, + "format": "s", "logBase": 1, "max": null, "min": null, @@ -38698,23 +38937,25 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The status of resolved-ts observe regions", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 72 + "h": 8, + "w": 12, + "x": 12, + "y": 62 }, "hiddenSeries": false, - "id": 3929, + "id": 8377, "legend": { "alignAsTable": true, "avg": false, @@ -38723,7 +38964,8 @@ "min": false, "rightSide": true, "show": true, - "sort": "max", + "sideWidth": null, + "sort": "current", "sortDesc": true, "total": false, "values": true @@ -38743,7 +38985,7 @@ "seriesOverrides": [ { "alias": "total", - "yaxis": 2 + "lines": false } ], "spaceLength": 10, @@ -38751,31 +38993,20 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", "format": "time_series", "hide": false, "intervalFactor": 2, - "legendFormat": "total", - "metric": "", + "legendFormat": "{{type}}", "refId": "A", - "step": 4 - }, - { - "expr": "rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}} {{cf}}", - "metric": "", - "refId": "B", - "step": 4 + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Backup SST Generation Throughput", + "title": "Observe region status", "tooltip": { "msResolution": false, "shared": true, @@ -38792,15 +39023,15 @@ }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38813,255 +39044,13 @@ "alignLevel": null } }, - { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 4, - "x": 0, - "y": 79 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 5597, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Backup Scan SST Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 4, - "y": 79 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 3931, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"scan\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Backup Scan SST Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 10, - "y": 79 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 6905, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"save.*\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "Backup Save SST Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "Bucketed histogram of region count in a check leader request", "editable": true, "error": false, "fieldConfig": { @@ -39072,22 +39061,23 @@ "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 79 + "h": 8, + "w": 12, + "x": 0, + "y": 70 }, "hiddenSeries": false, - "id": 3928, + "id": 12308, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "max", + "sort": "current", "sortDesc": true, "total": false, "values": true @@ -39095,7 +39085,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -39110,41 +39100,26 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{type}} - 99%", - "metric": "", + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", "refId": "A", - "step": 4 - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - 95%", - "refId": "B", - "step": 4 - }, - { - "expr": "sum(rate(tikv_backup_range_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_backup_range_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - avg", - "refId": "C", - "step": 4 + "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Backup SST Duration", + "title": "99% CheckLeader request region count", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -39156,7 +39131,7 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -39177,87 +39152,6 @@ "alignLevel": null } }, - { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 86 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 3930, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "max(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" - } - ], - "title": "External Storage Create Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, @@ -39265,22 +39159,24 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "", + "description": "The count of fail to advance resolved-ts", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, + "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 12, - "y": 86 + "y": 70 }, - "id": 4936, + "hiddenSeries": false, + "id": 9166, "legend": { "alignAsTable": true, "avg": false, @@ -39299,42 +39195,44 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}-100%", - "refId": "E" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{type}}-99%", + "legendFormat": "{{instance}}-{{reason}}", "refId": "A", - "step": 4 + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "External Storage Create Duration", + "title": "Fail advance ts count", "tooltip": { "msResolution": false, "shared": true, - "sort": 1, + "sort": 0, "value_type": "cumulative" }, "type": "graph", @@ -39347,12 +39245,11 @@ }, "yaxes": [ { - "decimals": null, - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -39361,7 +39258,7 @@ "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -39376,27 +39273,28 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "", + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, + "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 93 + "y": 78 }, - "id": 5267, + "hiddenSeries": false, + "id": 8379, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, "max": true, "min": false, "rightSide": true, @@ -39410,7 +39308,10 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", "pointradius": 5, @@ -39422,31 +39323,24 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", + "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{req}}-99%", + "legendFormat": "{{instance}}", "refId": "A", - "step": 4 + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Checksum Request Duration", + "title": "Lock heap size", "tooltip": { "msResolution": false, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -39458,12 +39352,11 @@ }, "yaxes": [ { - "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -39472,7 +39365,7 @@ "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -39486,22 +39379,24 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "Bucketed histogram of the check leader request size", "editable": true, "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 12, - "y": 93 + "y": 78 }, - "id": 5269, + "hiddenSeries": false, + "id": 8383, "legend": { "alignAsTable": true, "avg": false, @@ -39511,20 +39406,22 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, @@ -39532,20 +39429,29 @@ "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_io_time_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{instance}} - {{device}}", - "metric": "tikv_thread_cpu_seconds_total", + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", "refId": "A", - "step": 4 + "step": 40 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{instance}}-check-num", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "IO Utilization", + "title": "99% CheckLeader request size", "tooltip": { "msResolution": false, "shared": true, @@ -39562,7 +39468,7 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -39590,6 +39496,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, + "description": "Total bytes of pending commands in the channel", "editable": true, "error": false, "fieldConfig": { @@ -39597,19 +39504,20 @@ "overrides": [] }, "fill": 0, + "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, - "x": 0, - "y": 100 + "x": 12, + "y": 86 }, - "id": 5925, + "hiddenSeries": false, + "id": 8381, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": true, "max": true, "min": false, "rightSide": true, @@ -39624,57 +39532,33 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/import-count.*/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "hide": false, "intervalFactor": 2, - "legendFormat": "import-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", + "legendFormat": "{{instance}}", "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance, tid) > 0", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 - }, - { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "import-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import CPU Utilization", + "title": "Pending command size", "tooltip": { "msResolution": false, "shared": true, @@ -39691,11 +39575,11 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -39711,136 +39595,227 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Resolved-TS", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 2763, + "panels": [ { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fontSize": "100%", + "fill": 1, "gridPos": { "h": 7, - "w": 4, - "x": 12, - "y": 100 + "w": 12, + "x": 0, + "y": 505 }, - "id": 5926, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true + "id": 2696, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{type}}", "refId": "A" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import Thread Count", - "transform": "timeseries_aggregations", - "type": "table" - }, + "title": "Allocator Stats", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Memory", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 3922, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "decimals": 1, + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 100 + "w": 12, + "x": 0, + "y": 65 }, - "id": 5932, + "hiddenSeries": false, + "id": 3924, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/backup-auto-throttle/", + "fill": 5, + "fillGradient": 2, + "linewidth": 0 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"b.*k.*w.*k.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "backup-{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_io\"}[1m])) by (instance)", "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "backup-io-{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "B", + "step": 4 + }, + { + "exemplar": true, + "expr": "tikv_backup_softlimit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "hide": false, - "intervalFactor": 1, - "legendFormat": "{{type}} {{error}} {{instance}}", - "refId": "D" + "interval": "", + "legendFormat": "backup-auto-throttle-{{instance}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import Errors", + "title": "Backup CPU Utilization", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -39855,19 +39830,19 @@ }, "yaxes": [ { - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -39876,39 +39851,110 @@ "alignLevel": null } }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 65 + }, + "id": 3926, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(tikv_backup_thread_pool_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by(instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Backup Thread Count", + "transform": "timeseries_aggregations", + "type": "table" + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 107 + "y": 32 }, - "id": 5931, + "hiddenSeries": false, + "id": 23763571993, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", "pointradius": 2, @@ -39920,27 +39966,18 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{request}}-99%", + "exemplar": true, + "expr": "sum(rate(tikv_cloud_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (cloud, req)", + "interval": "", + "legendFormat": "{{cloud}}-{{req}}", "refId": "A" - }, - { - "expr": "histogram_quantile(0.5, sum(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request, instance))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{request}}-50%", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import RPC Duration", + "title": "cloud request", "tooltip": { "shared": true, "sort": 0, @@ -39956,7 +39993,7 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -39983,20 +40020,22 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "decimals": 2, "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 107 + "w": 8, + "x": 16, + "y": 65 }, - "id": 6267, + "hiddenSeries": false, + "id": 5264, "legend": { "alignAsTable": true, "avg": false, @@ -40005,19 +40044,19 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -40026,31 +40065,19 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (instance, request)", + "expr": "delta(tikv_backup_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{instance}} - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "total - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", - "step": 10 + "legendFormat": "{{instance}}-{{error}}", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import RPC Ops", + "title": "Backup Errors", "tooltip": { "shared": true, "sort": 0, @@ -40066,19 +40093,19 @@ }, "yaxes": [ { - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -40110,14 +40137,14 @@ }, "gridPos": { "h": 7, - "w": 6, + "w": 8, "x": 0, - "y": 114 + "y": 72 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 5930, + "id": 3927, "legend": { "alignAsTable": true, "avg": false, @@ -40135,7 +40162,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"download|write\"}[1m])) by (le)", + "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"write\"}[1m])) by (le)", "format": "heatmap", "instant": false, "intervalFactor": 2, @@ -40143,7 +40170,7 @@ "refId": "A" } ], - "title": "Import Write/Download RPC Duration", + "title": "Backup Write CF SST Size", "tooltip": { "show": true, "showHistogram": true @@ -40157,7 +40184,7 @@ "xBucketSize": null, "yAxis": { "decimals": 1, - "format": "s", + "format": "bytes", "logBase": 1, "max": null, "min": null, @@ -40191,14 +40218,14 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 114 + "w": 8, + "x": 8, + "y": 72 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 5929, + "id": 5266, "legend": { "alignAsTable": true, "avg": false, @@ -40216,7 +40243,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queue\"}[1m])) by (le)", + "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"default\"}[1m])) by (le)", "format": "heatmap", "instant": false, "intervalFactor": 2, @@ -40224,7 +40251,7 @@ "refId": "A" } ], - "title": "Import Wait Duration", + "title": "Backup Default CF SST Size", "tooltip": { "show": true, "showHistogram": true @@ -40238,7 +40265,7 @@ "xBucketSize": null, "yAxis": { "decimals": 1, - "format": "s", + "format": "bytes", "logBase": 1, "max": null, "min": null, @@ -40250,9 +40277,130 @@ "yBucketSize": null }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 72 + }, + "hiddenSeries": false, + "id": 3929, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "total", + "metric": "", + "refId": "A", + "step": 4 + }, + { + "expr": "rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}} {{cf}}", + "metric": "", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Backup SST Generation Throughput", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 }, "color": { "cardColor": "#FF9830", @@ -40272,14 +40420,14 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 114 + "w": 4, + "x": 0, + "y": 79 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 6906, + "id": 5597, "legend": { "alignAsTable": true, "avg": false, @@ -40297,7 +40445,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"read\"}[1m])) by (le)", + "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le)", "format": "heatmap", "instant": false, "intervalFactor": 2, @@ -40305,7 +40453,7 @@ "refId": "A" } ], - "title": "Import Read SST Duration", + "title": "Backup Scan SST Duration", "tooltip": { "show": true, "showHistogram": true @@ -40354,13 +40502,13 @@ "gridPos": { "h": 7, "w": 6, - "x": 18, - "y": 114 + "x": 4, + "y": 79 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 5928, + "id": 3931, "legend": { "alignAsTable": true, "avg": false, @@ -40378,7 +40526,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"rewrite\"}[1m])) by (le)", + "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"scan\"}[1m])) by (le)", "format": "heatmap", "instant": false, "intervalFactor": 2, @@ -40386,7 +40534,7 @@ "refId": "A" } ], - "title": "Import Rewrite SST Duration", + "title": "Backup Scan SST Duration", "tooltip": { "show": true, "showHistogram": true @@ -40435,13 +40583,13 @@ "gridPos": { "h": 7, "w": 6, - "x": 0, - "y": 121 + "x": 10, + "y": 79 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 5939, + "id": 6905, "legend": { "alignAsTable": true, "avg": false, @@ -40459,7 +40607,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"ingest\"}[1m])) by (le)", + "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"save.*\"}[1m])) by (le)", "format": "heatmap", "instant": false, "intervalFactor": 2, @@ -40467,7 +40615,7 @@ "refId": "A" } ], - "title": "Import Ingest RPC Duration", + "title": "Backup Save SST Duration", "tooltip": { "show": true, "showHistogram": true @@ -40493,36 +40641,28 @@ "yBucketSize": null }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, + "grid": {}, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 121 + "w": 8, + "x": 16, + "y": 79 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 5938, + "hiddenSeries": false, + "id": 3928, "legend": { "alignAsTable": true, "avg": false, @@ -40531,47 +40671,95 @@ "min": false, "rightSide": true, "show": true, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max(rate(tikv_import_ingest_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "histogram_quantile(0.99, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" + "legendFormat": "{{type}} - 99%", + "metric": "", + "refId": "A", + "step": 4 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}} - 95%", + "refId": "B", + "step": 4 + }, + { + "expr": "sum(rate(tikv_backup_range_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_backup_range_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}} - avg", + "refId": "C", + "step": 4 } ], - "title": "Import Ingest SST Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Backup SST Duration", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "cards": { @@ -40596,14 +40784,14 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 121 + "w": 12, + "x": 0, + "y": 86 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 5937, + "id": 3930, "legend": { "alignAsTable": true, "avg": false, @@ -40621,7 +40809,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "max(rate(tikv_import_ingest_byte{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "expr": "max(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", "format": "heatmap", "instant": false, "intervalFactor": 2, @@ -40629,7 +40817,7 @@ "refId": "A" } ], - "title": "Import Ingest SST Bytes", + "title": "External Storage Create Duration", "tooltip": { "show": true, "showHistogram": true @@ -40660,72 +40848,78 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, + "grid": {}, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 121 + "w": 12, + "x": 12, + "y": 86 }, - "id": 5927, + "id": 4936, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "expr": "histogram_quantile(1, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{type}}-100%", + "refId": "E" }, { - "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "total", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{type}}-99%", + "refId": "A", + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import Download SST Throughput", + "title": "External Storage Create Duration", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, - "value_type": "individual" + "sort": 1, + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -40737,7 +40931,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -40745,12 +40940,12 @@ "show": true }, { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { @@ -40764,29 +40959,35 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, + "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, + "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 128 + "y": 93 }, - "id": 12309, + "id": 5267, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, @@ -40796,7 +40997,7 @@ "nullPointMode": "null as zero", "percentage": false, "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -40805,23 +41006,31 @@ "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_local_write_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{type}} {{instance}}", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "{{req}}-100%", + "refId": "E" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{req}}-99%", + "refId": "A", + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import Local Write keys", + "title": "Checksum Request Duration", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, - "value_type": "individual" + "sort": 1, + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -40833,20 +41042,21 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", - "show": true + "min": null, + "show": false } ], "yaxis": { @@ -40860,40 +41070,45 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "decimals": 1, + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, + "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 128 + "y": 93 }, - "id": 12310, + "id": 5269, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, - "lines": true, + "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pluginVersion": "7.5.7", "pointradius": 2, - "points": false, + "points": true, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, @@ -40901,20 +41116,22 @@ "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_local_write_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "rate(node_disk_io_time_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{type}} {{instance}}", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{device}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Import Local Write bytes", + "title": "IO Utilization", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -40929,19 +41146,19 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -40956,70 +41173,94 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The accumulated TTL expired KV count during backup", + "decimals": 1, + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, + "fill": 0, + "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 135 + "y": 100 }, - "hiddenSeries": false, - "id": 23763572861, + "id": 5925, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pluginVersion": "7.5.7", + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/import-count.*/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "interval": "", - "legendFormat": "{{instance}}", - "queryType": "randomWalk", + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "import-{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", "refId": "A", - "hide": true + "step": 4 }, { - "exemplar": true, - "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", - "hide": false, - "interval": "", - "legendFormat": "sum", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance, tid) > 0", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "backup-{{instance}}-{{tid}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "C", + "step": 4 + }, + { + "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "import-count-{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "D", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "TTL Expired", + "title": "Import CPU Utilization", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -41034,7 +41275,7 @@ }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -41054,61 +41295,113 @@ "align": false, "alignLevel": null } - } - ], - "title": "Backup & Import", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 44 - }, - "id": 23763573235, - "panels": [ + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 100 + }, + "id": 5926, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Import Thread Count", + "transform": "timeseries_aggregations", + "type": "table" + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 2, + "description": "", "fieldConfig": { - "defaults": { - "unit": "percentunit" - }, + "defaults": {}, "overrides": [] }, "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, - "w": 6, - "x": 0, - "y": 45 + "w": 8, + "x": 16, + "y": 100 }, - "hiddenSeries": false, - "id": 23763573350, + "id": 5932, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, + "links": [], + "nullPointMode": "null as zero", "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "7.5.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -41118,23 +41411,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", - "interval": "", - "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { + "expr": "delta(tikv_import_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "format": "time_series", "hide": false, - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{type}} {{error}} {{instance}}", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Usage", + "title": "Import Errors", "tooltip": { "shared": true, "sort": 0, @@ -41150,19 +41439,19 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -41172,18 +41461,8 @@ } }, { - "aliasColors": { - "(AP)apply-99": "#88509f", - "(AP)get_permit-99": "#922870", - "(AP)queuing-99": "#9d0041", - "(DL)exec_download-99": "#73a0fe", - "(DL)queue-99": "#7d78ce", - "exec_download-99": "light-orange", - "get_permit-99": "red", - "queuing-99": "blue", - "total-99": "rgb(252, 252, 252)" - }, - "bars": true, + "aliasColors": {}, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", @@ -41191,16 +41470,14 @@ "defaults": {}, "overrides": [] }, - "fill": 0, - "fillGradient": 0, + "fill": 1, "gridPos": { "h": 7, - "w": 9, - "x": 6, - "y": 45 + "w": 12, + "x": 0, + "y": 107 }, - "hiddenSeries": false, - "id": 23763573351, + "id": 5931, "legend": { "alignAsTable": true, "avg": false, @@ -41212,58 +41489,34 @@ "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "7.5.7", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:2473", - "alias": "total-99", - "bars": false, - "fill": 2, - "lines": true, - "linewidth": 0, - "stack": false, - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[1m])) by (le, request))", + "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request))", "format": "time_series", "hide": false, - "interval": "", "intervalFactor": 1, - "legendFormat": "total-99", + "legendFormat": "{{request}}-99%", "refId": "A" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"queue|exec_download\"}[1m])) by (le, type))", - "hide": false, - "interval": "", - "legendFormat": "(DL){{type}}-99", - "refId": "C" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "hide": false, - "interval": "", - "legendFormat": "(AP){{type}}-99", + "expr": "histogram_quantile(0.5, sum(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request, instance))", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{request}}-50%", "refId": "B" } ], @@ -41271,7 +41524,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "P99 RPC Duration", + "title": "Import RPC Duration", "tooltip": { "shared": true, "sort": 0, @@ -41287,7 +41540,6 @@ }, "yaxes": [ { - "$$hashKey": "object:2453", "format": "s", "label": null, "logBase": 1, @@ -41296,8 +41548,7 @@ "show": true }, { - "$$hashKey": "object:2454", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -41323,15 +41574,13 @@ "overrides": [] }, "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, - "w": 9, - "x": 15, - "y": 45 + "w": 12, + "x": 12, + "y": 107 }, - "hiddenSeries": false, - "id": 23763573352, + "id": 6267, "legend": { "alignAsTable": true, "avg": false, @@ -41350,11 +41599,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", @@ -41364,13 +41610,11 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (instance, request)", + "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (instance, request)", "format": "time_series", "hide": false, - "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} :: {{request}}", + "legendFormat": "{{instance}} - {{request}}", "metric": "tikv_grpc_msg_duration_seconds_bucket", "refId": "A", "step": 10 @@ -41427,153 +41671,68 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "unit": "cps" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 52 - }, - "hiddenSeries": false, - "id": 23763573032, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(tikv_import_apply_cache_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (instance, type)", - "interval": "", - "legendFormat": "{{instance}} :: {{type}}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Cache Events", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "cps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "cards": { - "cardPadding": null, - "cardRound": 2 + "cardPadding": 0, + "cardRound": 0 }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", + "cardColor": "#FF9830", + "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 6, - "x": 6, - "y": 52 + "x": 0, + "y": 114 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573348, + "id": 5930, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (le)", + "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"download|write\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", "refId": "A" } ], - "title": "Overall RPC Duration", + "title": "Import Write/Download RPC Duration", "tooltip": { "show": true, "showHistogram": true }, + "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -41581,7 +41740,7 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", "logBase": 1, "max": null, @@ -41589,60 +41748,72 @@ "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "cards": { - "cardPadding": null, - "cardRound": 2 + "cardPadding": 0, + "cardRound": 0 }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", + "cardColor": "#FF9830", + "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 6, - "x": 12, - "y": 52 + "x": 6, + "y": 114 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573558, + "id": 5929, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"exec_download\"}[$__rate_interval])) by (le)", + "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queue\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", "refId": "A" } ], - "title": "Read File into Memory Duration", + "title": "Import Wait Duration", "tooltip": { "show": true, "showHistogram": true }, + "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -41650,7 +41821,7 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", "logBase": 1, "max": null, @@ -41658,60 +41829,72 @@ "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "cards": { - "cardPadding": null, - "cardRound": 2 + "cardPadding": 0, + "cardRound": 0 }, "color": { - "cardColor": "#37872D", - "colorScale": "sqrt", + "cardColor": "#FF9830", + "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 6, - "x": 18, - "y": 52 + "x": 12, + "y": 114 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573229, + "id": 6906, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queuing\"}[$__rate_interval])) by (le)", + "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"read\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", "refId": "A" } ], - "title": "Queuing Time", + "title": "Import Read SST Duration", "tooltip": { "show": true, "showHistogram": true }, + "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -41719,7 +41902,7 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", "logBase": 1, "max": null, @@ -41727,159 +41910,153 @@ "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "unit": "bytes" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 0, - "y": 60 + "x": 18, + "y": 114 }, - "hiddenSeries": false, - "id": 23763573349, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 5928, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_import_apply_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{instance}}", - "queryType": "randomWalk", + "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"rewrite\"}[1m])) by (le)", + "format": "heatmap", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Apply Request Throughput", + "title": "Import Rewrite SST Duration", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:1486", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1487", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cards": { - "cardPadding": null, - "cardRound": 2 + "cardPadding": 0, + "cardRound": 0 }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateBlues", + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 6, - "x": 6, - "y": 60 + "x": 0, + "y": 121 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573344, + "id": 5939, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_download_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"ingest\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", "refId": "A" } ], - "title": "Downloaded File Size", + "title": "Import Ingest RPC Duration", "tooltip": { "show": true, "showHistogram": true }, + "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -41887,69 +42064,80 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "s", "logBase": 1, "max": null, "min": null, "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "cards": { - "cardPadding": null, - "cardRound": 2 + "cardPadding": 0, + "cardRound": 0 }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolatePurples", + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 6, - "x": 12, - "y": 60 + "x": 6, + "y": 121 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573233, + "id": 5938, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_apply_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "expr": "max(rate(tikv_import_ingest_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", "refId": "A" } ], - "title": "Apply Batch Size", + "title": "Import Ingest SST Duration", "tooltip": { "show": true, "showHistogram": true }, - "tooltipDecimals": null, + "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -41957,68 +42145,80 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "s", "logBase": 1, "max": null, "min": null, "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "cards": { - "cardPadding": null, - "cardRound": 2 + "cardPadding": 0, + "cardRound": 0 }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", + "cardColor": "#FF9830", + "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "gridPos": { - "h": 8, + "h": 7, "w": 6, - "x": 18, - "y": 60 + "x": 12, + "y": 121 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573230, + "id": 5937, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "links": [], "reverseYBuckets": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"get_permit\"}[$__rate_interval])) by (le)", + "expr": "max(rate(tikv_import_ingest_byte{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", "format": "heatmap", - "interval": "", + "instant": false, "intervalFactor": 2, "legendFormat": "{{le}}", - "queryType": "randomWalk", "refId": "A" } ], - "title": "Blocked by Concurrency Time", + "title": "Import Ingest SST Bytes", "tooltip": { "show": true, "showHistogram": true }, + "tooltipDecimals": 1, "type": "heatmap", "xAxis": { "show": true @@ -42026,7 +42226,7 @@ "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", "logBase": 1, "max": null, @@ -42034,7 +42234,7 @@ "show": true, "splitFactor": null }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, @@ -42045,61 +42245,67 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "fieldConfig": { - "defaults": { - "unit": "ops" - }, + "defaults": {}, "overrides": [] }, "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 5, + "h": 7, "w": 6, - "x": 0, - "y": 66 + "x": 18, + "y": 121 }, - "hiddenSeries": false, - "id": 23763573118, + "id": 5927, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "7.5.7", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_import_applier_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"begin_req\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "{{instance}} :: {{type}}", - "queryType": "randomWalk", + "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "refId": "A" + }, + { + "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Apply Request Speed", + "title": "Import Download SST Throughput", "tooltip": { "shared": true, "sort": 0, @@ -42115,8 +42321,7 @@ }, "yaxes": [ { - "$$hashKey": "object:2886", - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -42124,8 +42329,7 @@ "show": true }, { - "$$hashKey": "object:2887", - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -42144,39 +42348,38 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 2, + "description": "", "fieldConfig": { - "defaults": { - "unit": "decbytes" - }, + "defaults": {}, "overrides": [] }, "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 68 + "h": 7, + "w": 12, + "x": 0, + "y": 128 }, - "hiddenSeries": false, - "id": 23763573346, + "id": 12309, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, + "links": [], + "nullPointMode": "null as zero", "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "7.5.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -42186,19 +42389,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_import_apply_cached_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "expr": "delta(tikv_import_local_write_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}} {{instance}}", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Cached File in Memory", + "title": "Import Local Write keys", "tooltip": { "shared": true, "sort": 0, @@ -42214,19 +42417,19 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -42237,63 +42440,64 @@ }, { "aliasColors": {}, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "decimals": 2, + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 68 + "y": 128 }, - "hiddenSeries": false, - "id": 23763573119, + "id": 12310, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, + "links": [], + "nullPointMode": "null as zero", "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "7.5.7", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_import_applier_event{instance=~\"$instance\", type!=\"begin_req\"}[$__rate_interval])", - "interval": "", - "intervalFactor": 3, - "legendFormat": "{{instance}} :: {{type}}", - "refId": "A" + "expr": "delta(tikv_import_local_write_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}} {{instance}}", + "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Engine Requests Unfinished", + "title": "Import Local Write bytes", "tooltip": { "shared": true, "sort": 0, @@ -42309,21 +42513,19 @@ }, "yaxes": [ { - "$$hashKey": "object:304", - "format": "short", + "format": "decbytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "$$hashKey": "object:305", - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true } ], @@ -42333,114 +42535,156 @@ } }, { - "cards": { - "cardPadding": null, - "cardRound": 2 - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "The accumulated TTL expired KV count during backup", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 68 + "h": 7, + "w": 12, + "x": 0, + "y": 135 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 23763573231, + "hiddenSeries": false, + "id": 23763572861, "legend": { - "show": false + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true }, + "percentage": false, "pluginVersion": "7.5.11", - "reverseYBuckets": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[$__rate_interval])) by (le)", - "format": "heatmap", + "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "queryType": "randomWalk", "refId": "A" - } - ], - "title": "Apply Time", + }, + { + "exemplar": true, + "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "hide": false, + "interval": "", + "legendFormat": "sum", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL Expired", "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": null, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null - }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Backup & Import", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 4466, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": { - "unit": "bytes" - }, - "overrides": [] - }, + "description": "Total number of encryption data keys in use", "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 5, - "w": 6, + "h": 8, + "w": 12, "x": 0, - "y": 71 + "y": 58 }, - "hiddenSeries": false, - "id": 23763573449, + "id": 4464, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -42450,12 +42694,10 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}) by (instance)", - "hide": false, - "interval": "", + "expr": "tikv_encryption_data_key_storage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", "refId": "A" } ], @@ -42463,7 +42705,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Raft Store Memory Usage", + "title": "Encryption data keys", "tooltip": { "shared": true, "sort": 0, @@ -42479,8 +42721,8 @@ }, "yaxes": [ { - "$$hashKey": "object:2886", - "format": "bytes", + "decimals": 0, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -42488,7 +42730,7 @@ "show": true }, { - "$$hashKey": "object:2887", + "decimals": 0, "format": "short", "label": null, "logBase": 1, @@ -42501,570 +42743,184 @@ "align": false, "alignLevel": null } - } - ], - "title": "Point In Time Restore", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 54 - }, - "id": 13016, - "panels": [ + }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "Disabled", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Enabled", - "to": "", - "type": 1, - "value": "1" - } - ], - "noValue": "Disabled", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-red", - "value": 0 - }, - { - "color": "dark-green", - "value": 1 - } - ] - } - }, - "overrides": [] - }, + "description": "Number of files being encrypted", + "fill": 1, "gridPos": { - "h": 4, - "w": 5, - "x": 0, - "y": 55 + "h": 8, + "w": 12, + "x": 12, + "y": 58 }, - "id": 14361, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "id": 4554, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", + "expr": "tikv_encryption_file_num{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "refId": "A" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Endpoint Status", - "transformations": [], - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "The average flush size of last 30mins.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [] + "title": "Encrypted files", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "gridPos": { - "h": 8, - "w": 8, - "x": 5, - "y": 55 + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "id": 14507, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Average Flush Size ", - "type": "stat" + "yaxis": { + "align": false, + "alignLevel": null + } }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The current total flushed file number of this run.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, + "description": "Flag to indicate if encryption is initialized", + "fill": 1, "gridPos": { "h": 8, - "w": 8, - "x": 13, - "y": 55 + "w": 12, + "x": 0, + "y": 66 }, - "id": 14363, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "id": 4555, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true }, - "pluginVersion": "7.5.11", + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", + "expr": "tikv_encryption_is_initialized{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "refId": "A" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Flushed Files (Last 30m) Per Host", - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] + "title": "Encryption initialized", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "gridPos": { - "h": 2, - "w": 3, - "x": 21, - "y": 55 + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "id": 14508, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Flush Times (Last 30m)", - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 57 - }, - "id": 14362, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Flushed Size (Last 30m)", - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "Running", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Paused", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 3, - "text": "Error", - "to": "", - "type": 1, - "value": "2" - } - ], - "noValue": "Disabled", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1 - }, - { - "color": "dark-red", - "value": 2 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 0, - "y": 59 - }, - "id": 14907, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "min(tikv_log_backup_task_status{instance=~\"$instance\"})", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Task Status", - "transformations": [], - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "dark-blue", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 2, - "y": 59 - }, - "id": 15361, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "name" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "tidb_log_backup_advancer_owner > 0", - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Advancer Owner", - "type": "stat" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 60 - }, - "id": 14911, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Flush Files (Last 30m)", - "type": "stat" + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -43072,37 +42928,23 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, + "description": "Total size of encryption meta files", + "fill": 1, "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 63 + "h": 8, + "w": 12, + "x": 12, + "y": 66 }, - "hiddenSeries": false, - "id": 13262, + "id": 4556, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, - "hideEmpty": true, "max": true, - "min": false, - "rightSide": false, + "min": true, + "rightSide": true, "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, @@ -43110,12 +42952,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -43124,25 +42962,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", + "expr": "tikv_encryption_meta_file_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{name}}-{{instance}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Usage", + "title": "Encryption meta files size", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -43157,16 +42989,14 @@ }, "yaxes": [ { - "$$hashKey": "object:646", - "format": "percentunit", + "format": "decbytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:647", "format": "short", "label": null, "logBase": 1, @@ -43187,26 +43017,21 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 63 + "h": 8, + "w": 12, + "x": 0, + "y": 74 }, - "hiddenSeries": false, - "id": 12843, + "id": 4557, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, "total": false, "values": true @@ -43215,11 +43040,7 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -43229,21 +43050,25 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"encrypt_data_nanos\"}[1m])) by (req)", "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", + "intervalFactor": 1, + "legendFormat": "encrypt-{{req}}", "refId": "A" + }, + { + "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"decrypt_data_nanos\"}[1m])) by (req)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "decrypt-{{req}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Handle Event Rate", + "title": "Encrypt/decrypt data nanos", "tooltip": { "shared": true, "sort": 0, @@ -43259,16 +43084,14 @@ }, "yaxes": [ { - "$$hashKey": "object:563", - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:564", "format": "short", "label": null, "logBase": 1, @@ -43288,39 +43111,31 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The data rate of initial scanning emitting events.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "description": "Writing or reading file duration (second)", "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, + "h": 8, + "w": 12, "x": 12, - "y": 63 + "y": 74 }, - "hiddenSeries": false, - "id": 14135, + "id": 4559, "legend": { "alignAsTable": true, - "avg": true, - "current": false, + "avg": false, + "current": true, "max": true, "min": false, + "rightSide": true, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, + "links": [], + "nullPointMode": "null as zero", "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -43330,20 +43145,36 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", + "expr": "histogram_quantile(1, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "format": "time_series", + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "queryType": "randomWalk", + "legendFormat": "max-{{type}}-{{operation}}", "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "95%-{{type}}-{{operation}}", + "refId": "B" + }, + { + "expr": "sum(rate(tikv_encryption_write_read_file_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation) / sum(rate(tikv_encryption_write_read_file_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg-{{type}}-{{operation}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Initial Scan Generate Event Throughput", + "title": "Read/write encryption meta duration", "tooltip": { "shared": true, "sort": 0, @@ -43359,8 +43190,8 @@ }, "yaxes": [ { - "$$hashKey": "object:136", - "format": "binBps", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -43368,12 +43199,11 @@ "show": true }, { - "$$hashKey": "object:137", "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -43381,1381 +43211,1129 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Encryption", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 13016, + "panels": [ { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 600000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "name": "Checkpoint Lag Too Huge", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "fieldConfig": { "defaults": { - "unit": "ms" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 63 - }, - "hiddenSeries": false, - "id": 14774, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", - "instant": false, - "interval": "", - "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", - "hide": true, - "interval": "", - "legendFormat": "Current Time", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 600000, - "visible": true - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Abnormal Checkpoint TS Lag", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:228", - "format": "ms", - "label": null, - "logBase": 1, - "max": "3000000", - "min": "0", - "show": true + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "from": "", + "id": 1, + "text": "Disabled", + "to": "", + "type": 1, + "value": "0" + }, + { + "from": "", + "id": 2, + "text": "Enabled", + "to": "", + "type": 1, + "value": "1" + } + ], + "noValue": "Disabled", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(0, 0, 0, 0.2)", + "value": null + }, + { + "color": "dark-red", + "value": 0 + }, + { + "color": "dark-green", + "value": 1 + } + ] + } }, - { - "$$hashKey": "object:229", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The estimated memory usage by the streaming backup module.", - "fieldConfig": { - "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, + "h": 4, + "w": 5, "x": 0, - "y": 73 - }, - "hiddenSeries": false, - "id": 13100, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true + "y": 55 }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 14361, "options": { - "alertThreshold": true + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", - "format": "time_series", - "instant": false, + "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{ instance }}", + "queryType": "randomWalk", "refId": "A" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Memory Of Events", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:563", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:564", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Endpoint Status", + "transformations": [], + "type": "stat" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The average flush size of last 30mins.", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 73 - }, - "hiddenSeries": false, - "id": 14630, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true + "h": 8, + "w": 8, + "x": 5, + "y": 55 }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 14507, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", + "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", "hide": false, + "instant": true, "interval": "", - "legendFormat": "total", + "legendFormat": "{{ instance }}", "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Observed Region Count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:136", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:137", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Average Flush Size ", + "type": "stat" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.\n**They are retryable, don't worry.**", + "description": "The current total flushed file number of this run.", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 73 - }, - "hiddenSeries": false, - "id": 13101, - "legend": { - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + "h": 8, + "w": 8, + "x": 13, + "y": 55 }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 14363, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", + "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", + "instant": true, + "interval": "", + "legendFormat": "{{ instance }}", + "queryType": "randomWalk", "refId": "A" - }, - { - "exemplar": true, - "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:563", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:564", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Flushed Files (Last 30m) Per Host", + "type": "stat" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 73 - }, - "hiddenSeries": false, - "id": 14910, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "h": 2, + "w": 3, + "x": 21, + "y": 55 }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", + "id": 14508, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:240", - "alias": "Current Time", - "dashes": true, - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", - "instant": false, - "interval": "", - "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", + "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", "hide": false, + "instant": true, "interval": "", - "legendFormat": "Current Time", + "legendFormat": "{{ instance }}", "refId": "B" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Checkpoint TS of Tasks", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:169", - "format": "dateTimeAsIsoNoDateIfToday", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:170", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Flush Times (Last 30m)", + "type": "stat" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.", + "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 78 - }, - "hiddenSeries": false, - "id": 14908, - "legend": { - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + "h": 3, + "w": 3, + "x": 21, + "y": 57 }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 14362, "options": { - "alertThreshold": true + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "percentage": false, "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "increase(tikv_log_backup_fatal_errors{instance=~\"$instance\"}[$__interval])", - "format": "time_series", + "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", "hide": false, - "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", + "instant": true, + "interval": "", + "legendFormat": "{{ instance }}", "refId": "B" } ], - "thresholds": [ - { - "$$hashKey": "object:3232", - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0, - "yaxis": "left" - } - ], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Fatal Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:563", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:564", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Total Flushed Size (Last 30m)", + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateBlues", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of flushing a batch of file.", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "from": "", + "id": 1, + "text": "Running", + "to": "", + "type": 1, + "value": "0" + }, + { + "from": "", + "id": 2, + "text": "Paused", + "to": "", + "type": 1, + "value": "1" + }, + { + "from": "", + "id": 3, + "text": "Error", + "to": "", + "type": 1, + "value": "2" + } + ], + "noValue": "Disabled", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(0, 0, 0, 0.2)", + "value": null + }, + { + "color": "dark-green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "dark-red", + "value": 2 + } + ] + } + }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 6, + "h": 4, + "w": 2, "x": 0, - "y": 83 + "y": 59 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 14078, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "id": 14907, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "links": [], - "reverseYBuckets": false, + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "min(tikv_log_backup_task_status{instance=~\"$instance\"})", + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", + "queryType": "randomWalk", "refId": "A" } ], - "title": "Flush Duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "timeFrom": null, + "timeShift": null, + "title": "Task Status", + "transformations": [], + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateReds", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of scanning the initial data from local DB and transform them into apply events. \n", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + }, + "unit": "none" + }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 83 + "h": 4, + "w": 3, + "x": 2, + "y": 59 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 14136, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "id": 15361, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "name" }, - "links": [], - "reverseYBuckets": false, + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "tidb_log_backup_advancer_owner > 0", + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", "refId": "A" } ], - "title": "Initial scanning duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "timeFrom": null, + "timeShift": null, + "title": "Advancer Owner", + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of converting a raft request into a apply event. \n*This duration is for consuming a batch of events.*", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", "fieldConfig": { - "defaults": {}, + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 83 + "h": 3, + "w": 3, + "x": 21, + "y": 60 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13934, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "id": 14911, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "links": [], - "reverseYBuckets": false, + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", + "hide": false, + "instant": true, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" + "legendFormat": "{{ instance }}", + "refId": "B" } ], - "title": "Convert Raft Event duration", - "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "timeFrom": null, + "timeShift": null, + "title": "Flush Files (Last 30m)", + "type": "stat" }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of waiting the mutex of the controller. \n*This duration is for consuming a batch of events.*", + "decimals": 1, + "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 18, - "y": 83 + "x": 0, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 12840, + "hiddenSeries": false, + "id": 13262, "legend": { "alignAsTable": true, - "avg": false, + "avg": true, "current": true, + "hideEmpty": true, "max": true, "min": false, - "rightSide": true, + "rightSide": false, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", + "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A" + "legendFormat": "{{name}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 } ], - "title": "Wait for Lock Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateCividis", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of KV-modify of each raft command observed.", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 0, - "y": 90 + "x": 6, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 15059, + "hiddenSeries": false, + "id": 12843, "legend": { "alignAsTable": true, - "avg": false, + "avg": true, "current": true, - "max": true, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", - "format": "heatmap", + "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "format": "time_series", "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "Command Batch Size", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Handle Event Rate", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "short", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of saving an event into temporary file. \n*This duration is for consuming a batch of events.*", + "description": "The data rate of initial scanning emitting events.", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 6, - "y": 90 + "x": 12, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 12841, + "hiddenSeries": false, + "id": 14135, "legend": { "alignAsTable": true, - "avg": false, - "current": true, + "avg": true, + "current": false, "max": true, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, - "links": [], - "reverseYBuckets": false, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", + "queryType": "randomWalk", "refId": "A" } ], - "title": "Save to Temp File Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Initial Scan Generate Event Throughput", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 600000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Checkpoint Lag Too Huge", + "noDataState": "no_data", + "notifications": [] }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task. \n*This duration is for consuming a batch of events, for one region or one table.*", "fieldConfig": { - "defaults": {}, + "defaults": { + "unit": "ms" + }, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 12, - "y": 90 + "x": 18, + "y": 63 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13552, + "hiddenSeries": false, + "id": 14774, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, - "links": [], - "reverseYBuckets": false, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", - "format": "heatmap", + "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ task }}", "refId": "A" + }, + { + "exemplar": true, + "expr": "time() * 1000", + "hide": true, + "interval": "", + "legendFormat": "Current Time", + "refId": "B" } ], - "title": "Write to Temp File Duration", + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 600000, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Abnormal Checkpoint TS Lag", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": "3000000", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cards": { - "cardPadding": 0, - "cardRound": 0 - }, - "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": 0, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of collecting metadata and call the UNIX system call *write* for each event. \n*This duration is for consuming a batch of events, for one region or one table.*", + "description": "The estimated memory usage by the streaming backup module.", "fieldConfig": { "defaults": {}, "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 10, "w": 6, - "x": 18, - "y": 90 + "x": 0, + "y": 73 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13551, + "hiddenSeries": false, + "id": 13100, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, - "rightSide": true, "show": true, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", - "format": "heatmap", + "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", + "format": "time_series", "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "System Write Call Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Of Events", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -44763,7 +44341,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal message type count.", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -44771,21 +44349,21 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 97 + "h": 10, + "w": 6, + "x": 6, + "y": 73 }, "hiddenSeries": false, - "id": 14914, + "id": 14630, "legend": { "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "show": false, + "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -44798,25 +44376,39 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", + "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", "interval": "", - "legendFormat": "{{ message }}", + "intervalFactor": 2, + "legendFormat": "{{instance}}", "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", + "hide": false, + "interval": "", + "legendFormat": "total", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Type", + "title": "Observed Region Count", "tooltip": { "shared": true, "sort": 0, @@ -44832,17 +44424,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "ops", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44856,43 +44446,39 @@ } }, { - "aliasColors": { - "watch_task": "orange" - }, - "bars": false, + "aliasColors": {}, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "The errors met when backing up.\n**They are retryable, don't worry.**", "fieldConfig": { - "defaults": { - "unit": "s" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, + "h": 5, "w": 6, "x": 12, - "y": 97 + "y": 73 }, "hiddenSeries": false, - "id": 14912, + "id": 13101, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, + "hideZero": true, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, - "lines": true, + "lines": false, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true @@ -44904,23 +44490,35 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, - "steppedLine": false, + "stack": true, + "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", - "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", + "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{type}}@{{instance}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", + "hide": true, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Handling Duration (P99)", + "title": "Errors", "tooltip": { "shared": true, "sort": 0, @@ -44936,8 +44534,7 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44945,13 +44542,12 @@ "show": true }, { - "$$hashKey": "object:104", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -44965,7 +44561,6 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -44973,13 +44568,13 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, + "h": 10, "w": 6, "x": 18, - "y": 97 + "y": 73 }, "hiddenSeries": false, - "id": 14913, + "id": 14910, "legend": { "avg": false, "current": false, @@ -45000,25 +44595,39 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Current Time", + "dashes": true, + "fill": 0 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", + "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", + "legendFormat": "{{ task }}", "refId": "A" + }, + { + "exemplar": true, + "expr": "time() * 1000", + "hide": false, + "interval": "", + "legendFormat": "Current Time", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Message Handling Duration (P90)", + "title": "Checkpoint TS of Tasks", "tooltip": { "shared": true, "sort": 0, @@ -45034,16 +44643,14 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "dateTimeAsIsoNoDateIfToday", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", "format": "short", "label": null, "logBase": 1, @@ -45059,11 +44666,11 @@ }, { "aliasColors": {}, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + "description": "The errors met when backing up.", "fieldConfig": { "defaults": {}, "overrides": [] @@ -45071,25 +44678,26 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, + "h": 5, "w": 6, - "x": 0, - "y": 103 + "x": 12, + "y": 78 }, "hiddenSeries": false, - "id": 14271, + "id": 14908, "legend": { "avg": false, "current": false, + "hideZero": true, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, - "lines": true, + "lines": false, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true @@ -45101,23 +44709,44 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, - "steppedLine": false, + "stack": true, + "steppedLine": true, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", - "interval": "", - "legendFormat": "{{ cf }}", - "queryType": "randomWalk", + "expr": "increase(tikv_log_backup_fatal_errors{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{type}}@{{instance}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": true, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Throughput ", + "title": "Fatal Errors", "tooltip": { "shared": true, "sort": 0, @@ -45133,329 +44762,524 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "binBps", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": "0", "show": true }, { - "$$hashKey": "object:104", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration of flushing a batch of file.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 83 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 14078, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Flush Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateReds", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration of scanning the initial data from local DB and transform them into apply events. \n", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 83 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 14136, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Initial scanning duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateGreens", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration of converting a raft request into a apply event. \n*This duration is for consuming a batch of events.*", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 83 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 13934, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Convert Raft Event duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateGreens", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Misc statistics of RocksDB during initial scanning.", + "description": "The duration of waiting the mutex of the controller. \n*This duration is for consuming a batch of events.*", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 6, - "y": 103 + "x": 18, + "y": 83 }, - "hiddenSeries": false, - "id": 14270, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 12840, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, "interval": "", - "legendFormat": "{{ cf }}/{{ op }}", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Initial Scan RocksDB Operation ", + "title": "Wait for Lock Duration", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:103", - "format": "ops", - "label": null, - "logBase": 2, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:104", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": { - "leader-changed": "blue", - "region-changed": "purple" + "cards": { + "cardPadding": 0, + "cardRound": 0 }, - "bars": true, - "dashLength": 10, - "dashes": false, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateCividis", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The reason of triggering initial scanning.", + "description": "The number of KV-modify of each raft command observed.", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 12, - "y": 103 + "x": 0, + "y": 90 }, - "hiddenSeries": false, - "id": 14915, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15059, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": false + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Initial Scanning Trigger Reason", + "title": "Command Batch Size", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "short", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:2608", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2609", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": { - "del": "dark-red", - "put": "green" + "cards": { + "cardPadding": 0, + "cardRound": 0 }, - "bars": false, - "dashLength": 10, - "dashes": false, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The total cost of saving an event into temporary file. \n*This duration is for consuming a batch of events.*", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 6, + "h": 7, "w": 6, - "x": 18, - "y": 103 + "x": 6, + "y": 90 }, - "hiddenSeries": false, - "id": 15176, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 12841, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "current", + "sortDesc": true, "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true + "values": true }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "reverseYBuckets": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, "interval": "", - "legendFormat": "{{ type }}", - "queryType": "randomWalk", + "intervalFactor": 2, + "legendFormat": "{{le}}", "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Region Checkpoint Key Putting", + "title": "Save to Temp File Duration", "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "show": true, + "showHistogram": true }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, "show": true, - "values": [] + "splitFactor": null }, - "yaxes": [ - { - "$$hashKey": "object:2608", - "format": "cps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2609", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cards": { @@ -45473,7 +45297,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task. \n*This duration is for consuming a batch of events, for one region or one table.*", "fieldConfig": { "defaults": {}, "overrides": [] @@ -45481,13 +45305,13 @@ "gridPos": { "h": 7, "w": 6, - "x": 0, - "y": 109 + "x": 12, + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 15544, + "id": 13552, "legend": { "alignAsTable": true, "avg": false, @@ -45506,7 +45330,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -45515,7 +45339,7 @@ "refId": "A" } ], - "title": "Request Checkpoint Batch Size", + "title": "Write to Temp File Duration", "tooltip": { "show": true, "showHistogram": true @@ -45529,7 +45353,7 @@ "xBucketSize": null, "yAxis": { "decimals": 1, - "format": "none", + "format": "s", "logBase": 1, "max": null, "min": null, @@ -45556,7 +45380,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The duration of collecting metadata and call the UNIX system call *write* for each event. \n*This duration is for consuming a batch of events, for one region or one table.*", "fieldConfig": { "defaults": {}, "overrides": [] @@ -45564,13 +45388,13 @@ "gridPos": { "h": 7, "w": 6, - "x": 6, - "y": 109 + "x": 18, + "y": 90 }, "heatmap": {}, "hideZeroBuckets": true, "highlightCards": true, - "id": 15716, + "id": 13551, "legend": { "alignAsTable": true, "avg": false, @@ -45582,74 +45406,168 @@ "sort": "current", "sortDesc": true, "total": false, - "values": true + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "System Write Call Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The internal message type count.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 97 + }, + "hiddenSeries": false, + "id": 14914, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false }, - "links": [], - "reverseYBuckets": false, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", - "format": "heatmap", - "instant": false, + "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", + "legendFormat": "{{ message }}", + "queryType": "randomWalk", "refId": "A" } ], - "title": "Tick Duration", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Internal Message Type", "tooltip": { - "show": true, - "showHistogram": true - }, - "tooltipDecimals": 1, - "type": "heatmap", - "xAxis": { - "show": true + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 2, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { - "epoch-not-match": "purple", - "not-leader": "blue", "watch_task": "orange" }, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The reason of advancer failed to be advanced.", + "description": "The internal handling message duration.", "fieldConfig": { "defaults": { - "unit": "none" + "unit": "s" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 12, - "y": 109 + "y": 97 }, "hiddenSeries": false, - "id": 23763572666, + "id": 14912, "legend": { "alignAsTable": false, "avg": false, @@ -45661,7 +45579,7 @@ "total": false, "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { @@ -45674,33 +45592,23 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", - "hide": false, + "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ reason }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" - }, - { - "exemplar": true, - "expr": "", - "hide": false, - "interval": "", - "legendFormat": "", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Region Checkpoint Failure Reason", + "title": "Internal Message Handling Duration (P99)", "tooltip": { "shared": true, "sort": 0, @@ -45716,8 +45624,7 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -45725,13 +45632,12 @@ "show": true }, { - "$$hashKey": "object:104", - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { @@ -45740,44 +45646,36 @@ } }, { - "aliasColors": { - "fail": "red", - "success": "green", - "watch_task": "orange" - }, - "bars": true, + "aliasColors": {}, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The result of getting region checkpoints.", + "description": "The internal handling message duration.", "fieldConfig": { - "defaults": { - "unit": "none" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 18, - "y": 109 + "y": 97 }, "hiddenSeries": false, - "id": 23763572665, + "id": 14913, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, "show": true, "total": false, "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { @@ -45788,42 +45686,25 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:834", - "alias": "fail", - "transform": "negative-Y", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", - "hide": false, + "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ result }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" - }, - { - "exemplar": true, - "expr": "", - "hide": false, - "interval": "", - "legendFormat": "", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Request Result", + "title": "Internal Message Handling Duration (P90)", "tooltip": { "shared": true, "sort": 0, @@ -45839,8 +45720,7 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -45848,8 +45728,7 @@ "show": true }, { - "$$hashKey": "object:104", - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -45863,37 +45742,32 @@ } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", "fieldConfig": { - "defaults": { - "unit": "s" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 0, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 15359, + "id": 14271, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -45909,32 +45783,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1017", - "alias": "consistency-check", - "yaxis": 1 - }, - { - "$$hashKey": "object:1018", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1019", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", "interval": "", - "legendFormat": "{{ step }}", + "legendFormat": "{{ cf }}", "queryType": "randomWalk", "refId": "A" } @@ -45943,7 +45801,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Tick Duration (P99)", + "title": "Initial Scan RocksDB Throughput ", "tooltip": { "shared": true, "sort": 0, @@ -45959,17 +45817,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "binBps", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": "0", "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -45983,37 +45839,32 @@ } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "Misc statistics of RocksDB during initial scanning.", "fieldConfig": { - "defaults": { - "unit": "s" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 6, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 15360, + "id": 14270, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -46029,32 +45880,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1091", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1092", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "$$hashKey": "object:1093", - "alias": "consistency-check", - "yaxis": 1 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", "interval": "", - "legendFormat": "{{ step }}", + "legendFormat": "{{ cf }}/{{ op }}", "queryType": "randomWalk", "refId": "A" } @@ -46063,7 +45898,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Tick Duration (P90)", + "title": "Initial Scan RocksDB Operation ", "tooltip": { "shared": true, "sort": 0, @@ -46079,17 +45914,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "s", + "format": "ops", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": "0", "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -46104,77 +45937,58 @@ }, { "aliasColors": { - "watch_task": "orange" + "leader-changed": "blue", + "region-changed": "purple" }, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The frequent of getting region level checkpoint.", + "description": "The reason of triggering initial scanning.", "fieldConfig": { - "defaults": { - "unit": "none" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 12, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 23763572733, + "id": 14915, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, "show": true, "total": false, "values": false }, - "lines": true, + "lines": false, "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true + "alertThreshold": false }, "percentage": false, "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1091", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1092", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "$$hashKey": "object:1093", - "alias": "consistency-check", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", "interval": "", - "legendFormat": "{{ step }} {{ instance }}", + "legendFormat": "{{ message }}", "queryType": "randomWalk", "refId": "A" } @@ -46183,7 +45997,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Get Region Operation Count", + "title": "Initial Scanning Trigger Reason", "tooltip": { "shared": true, "sort": 0, @@ -46199,17 +46013,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -46224,41 +46036,38 @@ }, { "aliasColors": { - "watch_task": "orange" + "del": "dark-red", + "put": "green" }, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The variant of checkpoint group.", + "description": "", "fieldConfig": { - "defaults": { - "unit": "none" - }, + "defaults": {}, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 18, - "y": 116 + "y": 103 }, "hiddenSeries": false, - "id": 23763572734, + "id": 15176, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": false, "max": false, "min": false, "show": true, "total": false, "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { @@ -46269,33 +46078,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1091", - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "$$hashKey": "object:1092", - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "$$hashKey": "object:1093", - "alias": "consistency-check", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", + "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ step }} {{ instance }}", + "legendFormat": "{{ type }}", "queryType": "randomWalk", "refId": "A" } @@ -46304,7 +46096,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Try Advance Trigger Time", + "title": "Region Checkpoint Key Putting", "tooltip": { "shared": true, "sort": 0, @@ -46320,17 +46112,15 @@ }, "yaxes": [ { - "$$hashKey": "object:103", - "format": "none", + "format": "cps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:104", - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -46342,74 +46132,251 @@ "align": false, "alignLevel": null } - } - ], - "title": "Backup Log", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 49 - }, - "id": 4466, - "panels": [ + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 109 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15544, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Request Checkpoint Batch Size", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": 0, + "cardRound": 0 + }, + "color": { + "cardColor": "#FF9830", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 109 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 15716, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Tick Duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 1, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "epoch-not-match": "purple", + "not-leader": "blue", + "watch_task": "orange" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total number of encryption data keys in use", + "description": "The reason of advancer failed to be advanced.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 58 + "h": 7, + "w": 6, + "x": 12, + "y": 109 }, - "id": 4464, + "hiddenSeries": false, + "id": 23763572666, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "hideEmpty": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_data_key_storage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encryption data keys", + "title": "Region Checkpoint Failure Reason", "tooltip": { "shared": true, "sort": 0, @@ -46425,17 +46392,15 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "decimals": 0, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -46449,57 +46414,89 @@ } }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "fail": "red", + "success": "green", + "watch_task": "orange" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Number of files being encrypted", + "description": "The result of getting region checkpoints.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 58 + "h": 7, + "w": 6, + "x": 18, + "y": 109 }, - "id": 4554, + "hiddenSeries": false, + "id": 23763572665, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, "show": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "fail", + "transform": "negative-Y", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_file_num{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "exemplar": true, + "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ result }}", + "queryType": "randomWalk", "refId": "A" + }, + { + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encrypted files", + "title": "Request Result", "tooltip": { "shared": true, "sort": 0, @@ -46515,15 +46512,15 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -46537,49 +46534,76 @@ } }, { - "aliasColors": {}, + "aliasColors": { + "watch_task": "orange" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Flag to indicate if encryption is initialized", + "description": "The internal handling message duration.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 6, "x": 0, - "y": 66 + "y": 116 }, - "id": 4555, + "hiddenSeries": false, + "id": 15359, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, + "current": false, + "hideEmpty": false, "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "consistency-check", + "yaxis": 1 + }, + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_is_initialized{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", + "exemplar": true, + "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "interval": "", + "legendFormat": "{{ step }}", + "queryType": "randomWalk", "refId": "A" } ], @@ -46587,7 +46611,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encryption initialized", + "title": "Tick Duration (P99)", "tooltip": { "shared": true, "sort": 0, @@ -46603,17 +46627,15 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "decimals": 0, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -46627,49 +46649,76 @@ } }, { - "aliasColors": {}, + "aliasColors": { + "watch_task": "orange" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total size of encryption meta files", + "description": "The internal handling message duration.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 66 + "h": 7, + "w": 6, + "x": 6, + "y": 116 }, - "id": 4556, + "hiddenSeries": false, + "id": 15360, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "alias": "consistency-check", + "yaxis": 1 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_meta_file_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{name}}-{{instance}}", + "exemplar": true, + "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "interval": "", + "legendFormat": "{{ step }}", + "queryType": "randomWalk", "refId": "A" } ], @@ -46677,7 +46726,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encryption meta files size", + "title": "Tick Duration (P90)", "tooltip": { "shared": true, "sort": 0, @@ -46693,15 +46742,15 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -46715,64 +46764,84 @@ } }, { - "aliasColors": {}, + "aliasColors": { + "watch_task": "orange" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The frequent of getting region level checkpoint.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 74 + "h": 7, + "w": 6, + "x": 12, + "y": 116 }, - "id": 4557, + "hiddenSeries": false, + "id": 23763572733, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "hideEmpty": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "alias": "consistency-check", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"encrypt_data_nanos\"}[1m])) by (req)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "encrypt-{{req}}", + "exemplar": true, + "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "interval": "", + "legendFormat": "{{ step }} {{ instance }}", + "queryType": "randomWalk", "refId": "A" - }, - { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"decrypt_data_nanos\"}[1m])) by (req)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "decrypt-{{req}}", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Encrypt/decrypt data nanos", + "title": "Get Region Operation Count", "tooltip": { "shared": true, "sort": 0, @@ -46788,15 +46857,15 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -46810,75 +46879,85 @@ } }, { - "aliasColors": {}, - "bars": false, + "aliasColors": { + "watch_task": "orange" + }, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Writing or reading file duration (second)", + "description": "The variant of checkpoint group.", + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 74 + "h": 7, + "w": 6, + "x": 18, + "y": 116 }, - "id": 4559, + "hiddenSeries": false, + "id": 23763572734, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "max": true, + "current": false, + "hideEmpty": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, - "lines": true, + "lines": false, "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "get-checkpoints-of-store", + "yaxis": 2 + }, + { + "alias": "get-checkpoints-in-range", + "yaxis": 2 + }, + { + "alias": "consistency-check", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", - "format": "time_series", - "instant": false, + "exemplar": true, + "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", "interval": "", "intervalFactor": 2, - "legendFormat": "max-{{type}}-{{operation}}", + "legendFormat": "{{ step }} {{ instance }}", + "queryType": "randomWalk", "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "95%-{{type}}-{{operation}}", - "refId": "B" - }, - { - "expr": "sum(rate(tikv_encryption_write_read_file_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation) / sum(rate(tikv_encryption_write_read_file_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "avg-{{type}}-{{operation}}", - "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Read/write encryption meta duration", + "title": "Try Advance Trigger Time", "tooltip": { "shared": true, "sort": 0, @@ -46894,16 +46973,15 @@ }, "yaxes": [ { - "decimals": null, - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -46917,7 +46995,7 @@ } } ], - "title": "Encryption", + "title": "Backup Log", "type": "row" } ], @@ -47124,4 +47202,4 @@ "title": "Test-Cluster-TiKV-Details", "uid": "RDVQiEzZz", "version": 1 -} +} \ No newline at end of file From a4a3664c7796ec43186048811d091e16e0893f11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Wed, 28 Jun 2023 12:10:46 +0200 Subject: [PATCH 0759/1149] tikv_util: Allow building on FreeBSD (#15016) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#15015 Building tikv_util is now possible on FreeBSD Signed-off-by: Daniël van Eeden --- components/tikv_util/src/config.rs | 3 +++ components/tikv_util/src/sys/cpu_time.rs | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 07fea59a7da..c3d240d3c4f 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -647,6 +647,9 @@ pub fn ensure_dir_exist(path: &str) -> Result<(), Box> { #[cfg(unix)] pub fn check_max_open_fds(expect: u64) -> Result<(), ConfigError> { + #[cfg(target_os = "freebsd")] + let expect = expect as i64; + use std::mem; unsafe { diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index 69fbb2fb251..6ec1621c629 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -83,7 +83,7 @@ impl ProcessStat { } } -#[cfg(target_os = "linux")] +#[cfg(any(target_os = "linux", target_os = "freebsd"))] mod imp { use std::{fs::File, io, io::Read, time::Duration}; @@ -191,7 +191,7 @@ mod imp { } } -#[cfg(not(any(target_os = "linux", target_os = "macos")))] +#[cfg(target_os = "windows")] mod imp { use std::io; From 1ce8ee7df84503e889ff8bc6834a57128a858a16 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 28 Jun 2023 20:03:15 +0800 Subject: [PATCH 0760/1149] raftstore-v2: fix delete range (#15019) ref tikv/tikv#12842 - Do not use WAL for delete range writes - Keep modified index consistent with memtable state, add tests for two possible anomalies. Signed-off-by: tabokie --- components/engine_panic/src/misc.rs | 7 +- components/engine_rocks/src/misc.rs | 76 ++++++--- components/engine_traits/src/misc.rs | 22 ++- components/engine_traits/src/tablet.rs | 2 +- .../raftstore-v2/src/operation/command/mod.rs | 6 - .../src/operation/command/write/mod.rs | 20 +-- .../src/operation/ready/apply_trace.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 19 ++- components/raftstore-v2/src/worker/tablet.rs | 37 ++--- .../tests/failpoints/test_basic_write.rs | 156 +++++++++++++++++- components/raftstore/src/store/config.rs | 106 +++++++----- components/raftstore/src/store/fsm/apply.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 9 +- .../raftstore/src/store/worker/region.rs | 40 +++-- components/server/src/server.rs | 1 + components/server/src/server2.rs | 1 + components/test_raftstore-v2/src/node.rs | 8 +- components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/node.rs | 8 +- components/test_raftstore/src/server.rs | 1 + src/config/mod.rs | 1 + src/server/gc_worker/compaction_filter.rs | 1 + src/server/gc_worker/gc_worker.rs | 3 + .../gc_worker/rawkv_compaction_filter.rs | 3 +- src/server/reset_to_version.rs | 2 + tests/failpoints/cases/mod.rs | 1 - tests/failpoints/cases/test_apply_trace.rs | 50 ------ tests/integrations/raftstore/test_single.rs | 2 +- tests/integrations/storage/test_titan.rs | 5 +- 29 files changed, 402 insertions(+), 196 deletions(-) delete mode 100644 tests/failpoints/cases/test_apply_trace.rs diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 531af76b3de..114dc8a4853 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -1,6 +1,8 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{DeleteStrategy, MiscExt, Range, RangeStats, Result, StatisticsReporter}; +use engine_traits::{ + DeleteStrategy, MiscExt, Range, RangeStats, Result, StatisticsReporter, WriteOptions, +}; use crate::engine::PanicEngine; @@ -41,10 +43,11 @@ impl MiscExt for PanicEngine { fn delete_ranges_cf( &self, + wopts: &WriteOptions, cf: &str, strategy: DeleteStrategy, ranges: &[Range<'_>], - ) -> Result<()> { + ) -> Result { panic!() } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 5fae5a68e96..417431d0ffc 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -3,6 +3,7 @@ use engine_traits::{ CfNamesExt, DeleteStrategy, ImportExt, IterOptions, Iterable, Iterator, MiscExt, Mutable, Range, RangeStats, Result, SstWriter, SstWriterBuilder, WriteBatch, WriteBatchExt, + WriteOptions, }; use rocksdb::{FlushOptions, Range as RocksRange}; use tikv_util::{box_try, keybuilder::KeyBuilder}; @@ -23,10 +24,12 @@ impl RocksEngine { // of region will never be larger than max-region-size. fn delete_all_in_range_cf_by_ingest( &self, + wopts: &WriteOptions, cf: &str, sst_path: String, ranges: &[Range<'_>], - ) -> Result<()> { + ) -> Result { + let mut written = false; let mut ranges = ranges.to_owned(); ranges.sort_by(|a, b| a.start_key.cmp(b.start_key)); @@ -39,7 +42,7 @@ impl RocksEngine { .as_ref() .map_or(false, |key| key.as_slice() > r.start_key) { - self.delete_all_in_range_cf_by_key(cf, &r)?; + written |= self.delete_all_in_range_cf_by_key(wopts, cf, &r)?; continue; } last_end_key = Some(r.end_key.to_owned()); @@ -84,20 +87,26 @@ impl RocksEngine { } else { let mut wb = self.write_batch(); for key in data.iter() { - wb.delete_cf(cf, key)?; if wb.count() >= Self::WRITE_BATCH_MAX_KEYS { - wb.write()?; + wb.write_opt(wopts)?; wb.clear(); } + wb.delete_cf(cf, key)?; } if wb.count() > 0 { - wb.write()?; + wb.write_opt(wopts)?; + written = true; } } - Ok(()) + Ok(written) } - fn delete_all_in_range_cf_by_key(&self, cf: &str, range: &Range<'_>) -> Result<()> { + fn delete_all_in_range_cf_by_key( + &self, + wopts: &WriteOptions, + cf: &str, + range: &Range<'_>, + ) -> Result { let start = KeyBuilder::from_slice(range.start_key, 0, 0); let end = KeyBuilder::from_slice(range.end_key, 0, 0); let mut opts = IterOptions::new(Some(start), Some(end), false); @@ -110,18 +119,22 @@ impl RocksEngine { let mut it_valid = it.seek(range.start_key)?; let mut wb = self.write_batch(); while it_valid { - wb.delete_cf(cf, it.key())?; if wb.count() >= Self::WRITE_BATCH_MAX_KEYS { - wb.write()?; + wb.write_opt(wopts)?; wb.clear(); } + wb.delete_cf(cf, it.key())?; it_valid = it.next()?; } if wb.count() > 0 { - wb.write()?; + wb.write_opt(wopts)?; + if !wopts.disable_wal() { + self.sync_wal()?; + } + Ok(true) + } else { + Ok(false) } - self.sync_wal()?; - Ok(()) } } @@ -188,12 +201,14 @@ impl MiscExt for RocksEngine { fn delete_ranges_cf( &self, + wopts: &WriteOptions, cf: &str, strategy: DeleteStrategy, ranges: &[Range<'_>], - ) -> Result<()> { + ) -> Result { + let mut written = false; if ranges.is_empty() { - return Ok(()); + return Ok(written); } match strategy { DeleteStrategy::DeleteFiles => { @@ -209,7 +224,7 @@ impl MiscExt for RocksEngine { }) .collect(); if rocks_ranges.is_empty() { - return Ok(()); + return Ok(written); } self.as_inner() .delete_files_in_ranges_cf(handle, &rocks_ranges, false) @@ -229,7 +244,7 @@ impl MiscExt for RocksEngine { }) .collect(); if rocks_ranges.is_empty() { - return Ok(()); + return Ok(written); } self.as_inner() .delete_blob_files_in_ranges_cf(handle, &rocks_ranges, false) @@ -241,18 +256,19 @@ impl MiscExt for RocksEngine { for r in ranges.iter() { wb.delete_range_cf(cf, r.start_key, r.end_key)?; } - wb.write()?; + wb.write_opt(wopts)?; + written = true; } DeleteStrategy::DeleteByKey => { for r in ranges { - self.delete_all_in_range_cf_by_key(cf, r)?; + written |= self.delete_all_in_range_cf_by_key(wopts, cf, r)?; } } DeleteStrategy::DeleteByWriter { sst_path } => { - self.delete_all_in_range_cf_by_ingest(cf, sst_path, ranges)?; + written |= self.delete_all_in_range_cf_by_ingest(wopts, cf, sst_path, ranges)?; } } - Ok(()) + Ok(written) } fn get_approximate_memtable_stats_cf(&self, cf: &str, range: &Range<'_>) -> Result<(u64, u64)> { @@ -482,7 +498,8 @@ mod tests { wb.write().unwrap(); check_data(&db, ALL_CFS, kvs.as_slice()); - db.delete_ranges_cfs(strategy, ranges).unwrap(); + db.delete_ranges_cfs(&WriteOptions::default(), strategy, ranges) + .unwrap(); let mut kvs_left: Vec<_> = kvs; for r in ranges { @@ -620,10 +637,18 @@ mod tests { } check_data(&db, ALL_CFS, kvs.as_slice()); - db.delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[Range::new(b"k2", b"k4")]) - .unwrap(); - db.delete_ranges_cfs(DeleteStrategy::DeleteBlobs, &[Range::new(b"k2", b"k4")]) - .unwrap(); + db.delete_ranges_cfs( + &WriteOptions::default(), + DeleteStrategy::DeleteFiles, + &[Range::new(b"k2", b"k4")], + ) + .unwrap(); + db.delete_ranges_cfs( + &WriteOptions::default(), + DeleteStrategy::DeleteBlobs, + &[Range::new(b"k2", b"k4")], + ) + .unwrap(); check_data(&db, ALL_CFS, kvs_left.as_slice()); } @@ -668,6 +693,7 @@ mod tests { // Delete all in ["k2", "k4"). db.delete_ranges_cfs( + &WriteOptions::default(), DeleteStrategy::DeleteByRange, &[Range::new(b"kabcdefg2", b"kabcdefg4")], ) diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index b5189bcc1a1..4494c32a356 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -6,7 +6,8 @@ //! FIXME: Things here need to be moved elsewhere. use crate::{ - cf_names::CfNamesExt, errors::Result, flow_control_factors::FlowControlFactorsExt, range::Range, + cf_names::CfNamesExt, errors::Result, flow_control_factors::FlowControlFactorsExt, + range::Range, WriteBatchExt, WriteOptions, }; #[derive(Clone, Debug)] @@ -64,7 +65,7 @@ pub struct RangeStats { pub num_rows: u64, } -pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { +pub trait MiscExt: CfNamesExt + FlowControlFactorsExt + WriteBatchExt { type StatisticsReporter: StatisticsReporter; /// Flush all specified column families at once. @@ -80,19 +81,28 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt { age_threshold: Option, ) -> Result<()>; - fn delete_ranges_cfs(&self, strategy: DeleteStrategy, ranges: &[Range<'_>]) -> Result<()> { + /// Returns whether there's data written through kv interface. + fn delete_ranges_cfs( + &self, + wopts: &WriteOptions, + strategy: DeleteStrategy, + ranges: &[Range<'_>], + ) -> Result { + let mut written = false; for cf in self.cf_names() { - self.delete_ranges_cf(cf, strategy.clone(), ranges)?; + written |= self.delete_ranges_cf(wopts, cf, strategy.clone(), ranges)?; } - Ok(()) + Ok(written) } + /// Returns whether there's data written through kv interface. fn delete_ranges_cf( &self, + wopts: &WriteOptions, cf: &str, strategy: DeleteStrategy, ranges: &[Range<'_>], - ) -> Result<()>; + ) -> Result; /// Return the approximate number of records and size in the range of /// memtables of the cf. diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 14f7d186f76..c88f1548513 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -34,7 +34,7 @@ pub struct CachedTablet { } impl CachedTablet { - fn release(&mut self) { + pub fn release(&mut self) { self.cache = None; self.version = 0; } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 3aed3547ca5..8c63ab1dc41 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -512,7 +512,6 @@ impl Apply { let _ = self.apply_delete(delete.cf, u64::MAX, delete.key); } SimpleWrite::DeleteRange(dr) => { - let use_delete_range = self.use_delete_range(); let _ = self .apply_delete_range( dr.cf, @@ -520,7 +519,6 @@ impl Apply { dr.start_key, dr.end_key, dr.notify_only, - use_delete_range, ) .await; } @@ -637,14 +635,12 @@ impl Apply { self.apply_delete(delete.cf, log_index, delete.key)?; } SimpleWrite::DeleteRange(dr) => { - let use_delete_range = self.use_delete_range(); self.apply_delete_range( dr.cf, log_index, dr.start_key, dr.end_key, dr.notify_only, - use_delete_range, ) .await?; } @@ -739,7 +735,6 @@ impl Apply { self.apply_delete(delete.get_cf(), log_index, delete.get_key())?; } CmdType::DeleteRange => { - let use_delete_range = self.use_delete_range(); let dr = r.get_delete_range(); self.apply_delete_range( dr.get_cf(), @@ -747,7 +742,6 @@ impl Apply { dr.get_start_key(), dr.get_end_key(), dr.get_notify_only(), - use_delete_range, ) .await?; } diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index f4481150f46..17ab24836cd 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -235,7 +235,6 @@ impl Apply { start_key: &[u8], end_key: &[u8], notify_only: bool, - use_delete_range: bool, ) -> Result<()> { PEER_WRITE_CMD_COUNTER.delete_range.inc(); let off = data_cf_offset(cf); @@ -273,7 +272,7 @@ impl Apply { let start = Instant::now_coarse(); // Use delete_files_in_range to drop as many sst files as possible, this // is a way to reclaim disk space quickly after drop a table/index. - if !notify_only { + let written = if !notify_only { let (notify, wait) = oneshot::channel(); let delete_range = TabletTask::delete_range( self.region_id(), @@ -281,9 +280,8 @@ impl Apply { name_to_cf(cf).unwrap(), start_key.clone().into(), end_key.clone().into(), - use_delete_range, - Box::new(move || { - notify.send(()).unwrap(); + Box::new(move |written| { + notify.send(written).unwrap(); }), ); if let Err(e) = self.tablet_scheduler().schedule_force(delete_range) { @@ -295,8 +293,10 @@ impl Apply { ); } - let _ = wait.await; - } + wait.await.unwrap() + } else { + false + }; info!( self.logger, @@ -304,12 +304,12 @@ impl Apply { "range_start" => log_wrappers::Value::key(&start_key), "range_end" => log_wrappers::Value::key(&end_key), "notify_only" => notify_only, - "use_delete_range" => use_delete_range, "duration" => ?start.saturating_elapsed(), ); - // delete range is an unsafe operation and it cannot be rollbacked to replay, so - // we don't update modification index for this operation. + if index != u64::MAX && written { + self.modifications_mut()[off] = index; + } Ok(()) } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index d280dc4913e..9cf241d9ee6 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -344,6 +344,7 @@ impl ApplyTrace { #[inline] pub fn should_persist(&self) -> bool { + fail_point!("should_persist_apply_trace", |_| true); self.try_persist } } diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 96e48e52417..c72e8be1969 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -54,6 +54,12 @@ pub struct Apply { // can fetch the wrong apply index from flush_state. applied_index: u64, /// The largest index that have modified each column family. + /// + /// Caveats: This field must be consistent with the state of memtable. If + /// modified is advanced when memtable is empty, the admin flushed can never + /// be advanced. If modified is not advanced when memtable is written, the + /// corresponding Raft entry may be deleted before the change is fully + /// persisted (flushed). modifications: DataTrace, admin_cmd_result: Vec, flush_state: Arc, @@ -76,9 +82,6 @@ pub struct Apply { tablet_scheduler: Scheduler>, high_priority_pool: FuturePool, - // Whether to use the delete range API instead of deleting one by one. - use_delete_range: bool, - pub(crate) metrics: ApplyMetrics, pub(crate) logger: Logger, pub(crate) buckets: Option, @@ -112,6 +115,10 @@ impl Apply { assert_ne!(applied_index, 0, "{}", SlogFormat(&logger)); let tablet = remote_tablet.latest().unwrap().clone(); let perf_context = EK::get_perf_context(cfg.perf_level, PerfContextKind::RaftstoreApply); + assert!( + !cfg.use_delete_range, + "v2 doesn't support RocksDB delete range" + ); Apply { peer, tablet, @@ -137,7 +144,6 @@ impl Apply { sst_importer, tablet_scheduler, high_priority_pool, - use_delete_range: cfg.use_delete_range, observe: Observe { info: CmdObserveInfo::default(), level: ObserveLevel::None, @@ -344,9 +350,4 @@ impl Apply { pub fn tablet_scheduler(&self) -> &Scheduler> { &self.tablet_scheduler } - - #[inline] - pub fn use_delete_range(&self) -> bool { - self.use_delete_range - } } diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 6e01ee327d0..fe9b1f64fff 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -9,7 +9,7 @@ use std::{ use collections::HashMap; use engine_traits::{ - CfName, DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, DATA_CFS, + CfName, DeleteStrategy, KvEngine, Range, TabletContext, TabletRegistry, WriteOptions, DATA_CFS, }; use fail::fail_point; use kvproto::{import_sstpb::SstMeta, metapb::Region}; @@ -67,8 +67,7 @@ pub enum Task { cf: CfName, start_key: Box<[u8]>, end_key: Box<[u8]>, - use_delete_range: bool, - cb: Box, + cb: Box, }, // Gc snapshot SnapGc(Box<[TabletSnapKey]>), @@ -217,8 +216,7 @@ impl Task { cf: CfName, start_key: Box<[u8]>, end_key: Box<[u8]>, - use_delete_range: bool, - cb: Box, + cb: Box, ) -> Self { Task::DeleteRange { region_id, @@ -226,7 +224,6 @@ impl Task { cf, start_key, end_key, - use_delete_range, cb, } } @@ -277,7 +274,11 @@ impl Runner { let end_key = keys::data_end_key(&end); let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); - if let Err(e) = tablet.delete_ranges_cfs(DeleteStrategy::DeleteFiles, &[range1, range2]) { + let mut wopts = WriteOptions::default(); + wopts.set_disable_wal(true); + if let Err(e) = + tablet.delete_ranges_cfs(&wopts, DeleteStrategy::DeleteFiles, &[range1, range2]) + { error!( self.logger, "failed to trim tablet"; @@ -517,7 +518,7 @@ impl Runner { } fn delete_range(&self, delete_range: Task) { - let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, use_delete_range, cb } = delete_range else { + let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, cb } = delete_range else { slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) }; @@ -533,27 +534,25 @@ impl Runner { "error" => ?e, ) }; - tablet - .delete_ranges_cf(cf, DeleteStrategy::DeleteFiles, &range) + let mut wopts = WriteOptions::default(); + wopts.set_disable_wal(true); + let mut written = tablet + .delete_ranges_cf(&wopts, cf, DeleteStrategy::DeleteFiles, &range) .unwrap_or_else(|e| fail_f(e, DeleteStrategy::DeleteFiles)); - let strategy = if use_delete_range { - DeleteStrategy::DeleteByRange - } else { - DeleteStrategy::DeleteByKey - }; + let strategy = DeleteStrategy::DeleteByKey; // Delete all remaining keys. - tablet - .delete_ranges_cf(cf, strategy.clone(), &range) + written |= tablet + .delete_ranges_cf(&wopts, cf, strategy.clone(), &range) .unwrap_or_else(move |e| fail_f(e, strategy)); // TODO: support titan? // tablet - // .delete_ranges_cf(cf, DeleteStrategy::DeleteBlobs, &range) + // .delete_ranges_cf(&wopts, cf, DeleteStrategy::DeleteBlobs, &range) // .unwrap_or_else(move |e| fail_f(e, // DeleteStrategy::DeleteBlobs)); - cb(); + cb(written); } } diff --git a/components/raftstore-v2/tests/failpoints/test_basic_write.rs b/components/raftstore-v2/tests/failpoints/test_basic_write.rs index 55d85b90fa4..5947827c250 100644 --- a/components/raftstore-v2/tests/failpoints/test_basic_write.rs +++ b/components/raftstore-v2/tests/failpoints/test_basic_write.rs @@ -2,7 +2,9 @@ use std::{assert_matches::assert_matches, time::Duration}; -use engine_traits::{Peekable, CF_DEFAULT}; +use engine_traits::{ + CompactExt, DbOptionsExt, MiscExt, Peekable, RaftEngineReadOnly, CF_DEFAULT, CF_RAFT, CF_WRITE, +}; use futures::executor::block_on; use raftstore_v2::{router::PeerMsg, SimpleWriteEncoder}; @@ -93,3 +95,155 @@ fn test_write_batch_rollback() { assert_matches!(snap.get_value(b"key2"), Ok(None)); assert_eq!(snap.get_value(b"key3").unwrap().unwrap(), b"value"); } + +#[test] +fn test_delete_range() { + let mut cluster = Cluster::default(); + let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); + let router = &mut cluster.routers[0]; + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + { + let snap = router.stale_snapshot(2); + assert!(snap.get_value(b"key1").unwrap().is_none()); + } + // write to default and write cf. + for i in 0..10 { + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, format!("k{i}").as_bytes(), b"value"); + put.put(CF_WRITE, format!("k{i}").as_bytes(), b"value"); + let (msg, mut sub) = PeerMsg::simple_write(header.clone(), put.encode()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let snap = router.stale_snapshot(2); + assert_eq!( + snap.get_value(format!("k{i}").as_bytes()).unwrap().unwrap(), + b"value" + ); + assert_eq!( + snap.get_value_cf(CF_WRITE, format!("k{i}").as_bytes()) + .unwrap() + .unwrap(), + b"value" + ); + } + // flush all data. + cached.latest().unwrap().flush_cfs(&[], true).unwrap(); + // delete some in default cf. + let fp = fail::FailGuard::new("should_persist_apply_trace", "return"); + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + // it will write some tombstones. + put.delete_range(CF_DEFAULT, b"k3", b"k6", false); + let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + { + let snap = router.stale_snapshot(2); + assert!(snap.get_value(b"k4").unwrap().is_none()); + } + drop(fp); + cached + .latest() + .unwrap() + .set_db_options(&[("avoid_flush_during_shutdown", "true")]) + .unwrap(); + cached.release(); + drop(cached); + cluster.node(0).tablet_registry().remove(2); + // restart and check delete is re-applied. + cluster.restart(0); + cluster.routers[0].wait_applied_to_current_term(2, Duration::from_secs(3)); + let snap = cluster.routers[0].stale_snapshot(2); + assert_eq!(snap.get_value(b"k2").unwrap().unwrap(), b"value"); + assert!(snap.get_value(b"k3").unwrap().is_none()); + assert!(snap.get_value(b"k4").unwrap().is_none()); + assert!(snap.get_value(b"k4").unwrap().is_none()); + assert_eq!(snap.get_value(b"k6").unwrap().unwrap(), b"value"); +} + +// It tests that delete range for an empty cf does not block the progress of +// persisted_applied. See the description of the PR #14905. +#[test] +fn test_delete_range_does_not_block_flushed_index() { + let mut cluster = Cluster::default(); + let mut cached = cluster.node(0).tablet_registry().get(2).unwrap(); + let raft_engine = cluster.node(0).running_state().unwrap().raft_engine.clone(); + let router = &mut cluster.routers[0]; + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + + let _fp = fail::FailGuard::new("should_persist_apply_trace", "return"); + { + let snap = router.stale_snapshot(2); + assert!(snap.get_value(b"key").unwrap().is_none()); + } + // write to default cf and flush. + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_DEFAULT, b"key", b"value"); + let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let snap = router.stale_snapshot(2); + assert_eq!(snap.get_value(b"key").unwrap().unwrap(), b"value"); + // Must compact to non-L0 level. + cached + .latest() + .unwrap() + .compact_range_cf(CF_DEFAULT, Some(b"A"), Some(b"{"), false, 1) + .unwrap(); + // delete range by files. + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.delete_range(CF_DEFAULT, b"k", b"z", false); + let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + { + let snap = router.stale_snapshot(2); + assert!(snap.get_value(b"key").unwrap().is_none()); + // Make sure memtable is empty. + assert!( + cached + .latest() + .unwrap() + .get_active_memtable_stats_cf(CF_DEFAULT) + .unwrap() + .is_none() + ); + } + // record current admin flushed. + let admin_flushed = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + // write to write cf and flush. + let header = Box::new(router.new_request_for(2).take_header()); + let mut put = SimpleWriteEncoder::with_capacity(64); + put.put(CF_WRITE, b"key", b"value"); + let (msg, mut sub) = PeerMsg::simple_write(header, put.encode()); + router.send(2, msg).unwrap(); + assert!(block_on(sub.wait_proposed())); + assert!(block_on(sub.wait_committed())); + let resp = block_on(sub.result()).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let snap = router.stale_snapshot(2); + assert_eq!( + snap.get_value_cf(CF_WRITE, b"key").unwrap().unwrap(), + b"value" + ); + cached.latest().unwrap().flush_cf(CF_WRITE, true).unwrap(); + + let current_admin_flushed = raft_engine.get_flushed_index(2, CF_RAFT).unwrap().unwrap(); + assert!(current_admin_flushed > admin_flushed); +} diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 5928f6ac438..22c389099ab 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -598,6 +598,7 @@ impl Config { region_split_size: ReadableSize, enable_region_bucket: bool, region_bucket_size: ReadableSize, + raft_kv_v2: bool, ) -> Result<()> { if self.raft_heartbeat_ticks == 0 { return Err(box_err!("heartbeat tick must greater than 0")); @@ -861,6 +862,11 @@ impl Config { } } assert!(self.region_compact_check_step.is_some()); + if raft_kv_v2 && self.use_delete_range { + return Err(box_err!( + "partitioned-raft-kv doesn't support RocksDB delete range." + )); + } Ok(()) } @@ -1192,7 +1198,8 @@ mod tests { let split_size = coprocessor::config::SPLIT_SIZE; let mut cfg = Config::new(); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); assert_eq!( cfg.raft_min_election_timeout_ticks, cfg.raft_election_timeout_ticks @@ -1204,50 +1211,51 @@ mod tests { cfg.raft_heartbeat_ticks = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_election_timeout_ticks = 10; cfg.raft_heartbeat_ticks = 10; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_min_election_timeout_ticks = 5; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg.raft_min_election_timeout_ticks = 25; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg.raft_min_election_timeout_ticks = 10; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); cfg.raft_heartbeat_ticks = 11; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_threshold = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_size_limit = Some(ReadableSize(0)); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_size_limit = None; cfg.optimize_for(false); - cfg.validate(ReadableSize(20), false, ReadableSize(0)) + cfg.validate(ReadableSize(20), false, ReadableSize(0), false) .unwrap(); assert_eq!(cfg.raft_log_gc_size_limit, Some(ReadableSize(15))); @@ -1256,27 +1264,27 @@ mod tests { cfg.raft_election_timeout_ticks = 10; cfg.raft_store_max_leader_lease = ReadableDuration::secs(20); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_count_limit = Some(100); cfg.merge_max_log_gap = 110; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.raft_log_gc_count_limit = None; cfg.optimize_for(false); - cfg.validate(ReadableSize::mb(1), false, ReadableSize(0)) + cfg.validate(ReadableSize::mb(1), false, ReadableSize(0), false) .unwrap(); assert_eq!(cfg.raft_log_gc_count_limit, Some(768)); cfg = Config::new(); cfg.merge_check_tick_interval = ReadableDuration::secs(0); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); @@ -1284,76 +1292,78 @@ mod tests { cfg.raft_election_timeout_ticks = 10; cfg.peer_stale_state_check_interval = ReadableDuration::secs(5); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.peer_stale_state_check_interval = ReadableDuration::minutes(2); cfg.abnormal_leader_missing_duration = ReadableDuration::minutes(1); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.abnormal_leader_missing_duration = ReadableDuration::minutes(2); cfg.max_leader_missing_duration = ReadableDuration::minutes(1); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.local_read_batch_size = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.max_batch_size = Some(0); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.pool_size = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.max_batch_size = Some(0); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.pool_size = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.apply_batch_system.max_batch_size = Some(10241); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.store_batch_system.max_batch_size = Some(10241); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.hibernate_regions = true; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(256)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(256)); cfg = Config::new(); cfg.hibernate_regions = false; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(1024)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(256)); @@ -1362,20 +1372,21 @@ mod tests { cfg.store_batch_system.max_batch_size = Some(123); cfg.apply_batch_system.max_batch_size = Some(234); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); assert_eq!(cfg.store_batch_system.max_batch_size, Some(123)); assert_eq!(cfg.apply_batch_system.max_batch_size, Some(234)); cfg = Config::new(); cfg.future_poll_size = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); cfg.snap_generator_pool_size = 0; cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); @@ -1383,7 +1394,7 @@ mod tests { cfg.raft_election_timeout_ticks = 11; cfg.raft_store_max_leader_lease = ReadableDuration::secs(11); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg = Config::new(); @@ -1391,65 +1402,74 @@ mod tests { cfg.max_peer_down_duration = ReadableDuration::minutes(5); cfg.peer_stale_state_check_interval = ReadableDuration::minutes(5); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); assert_eq!(cfg.max_peer_down_duration, ReadableDuration::minutes(10)); cfg = Config::new(); cfg.raft_max_size_per_msg = ReadableSize(0); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg.raft_max_size_per_msg = ReadableSize::gb(64); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg.raft_max_size_per_msg = ReadableSize::gb(3); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); cfg = Config::new(); cfg.raft_entry_max_size = ReadableSize(0); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg.raft_entry_max_size = ReadableSize::mb(3073); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)) + cfg.validate(split_size, false, ReadableSize(0), false) .unwrap_err(); cfg.raft_entry_max_size = ReadableSize::gb(3); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); cfg = Config::new(); cfg.optimize_for(false); - cfg.validate(split_size, false, ReadableSize(0)).unwrap(); + cfg.validate(split_size, false, ReadableSize(0), false) + .unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 16); cfg = Config::new(); cfg.optimize_for(false); - cfg.validate(split_size, true, split_size / 8).unwrap(); + cfg.validate(split_size, true, split_size / 8, false) + .unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 16); cfg = Config::new(); cfg.optimize_for(false); - cfg.validate(split_size, true, split_size / 20).unwrap(); + cfg.validate(split_size, true, split_size / 20, false) + .unwrap(); assert_eq!(cfg.region_split_check_diff(), split_size / 20); cfg = Config::new(); cfg.region_split_check_diff = Some(ReadableSize(1)); cfg.optimize_for(false); - cfg.validate(split_size, true, split_size / 20).unwrap(); + cfg.validate(split_size, true, split_size / 20, false) + .unwrap(); assert_eq!(cfg.region_split_check_diff(), ReadableSize(1)); cfg = Config::new(); cfg.optimize_for(true); - cfg.validate(split_size, true, split_size / 20).unwrap(); + cfg.validate(split_size, true, split_size / 20, false) + .unwrap(); assert_eq!(cfg.raft_log_gc_size_limit(), ReadableSize::mb(200)); assert_eq!(cfg.raft_log_gc_count_limit(), 10000); cfg = Config::new(); cfg.optimize_for(false); - cfg.validate(split_size, true, split_size / 20).unwrap(); + cfg.validate(split_size, true, split_size / 20, false) + .unwrap(); assert_eq!(cfg.raft_log_gc_size_limit(), split_size * 3 / 4); assert_eq!( cfg.raft_log_gc_count_limit(), diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 0ade422573e..48580a864bb 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -31,7 +31,7 @@ use crossbeam::channel::{TryRecvError, TrySendError}; use engine_traits::{ util::SequenceNumber, DeleteStrategy, KvEngine, Mutable, PerfContext, PerfContextKind, RaftEngine, RaftEngineReadOnly, Range as EngineRange, Snapshot, SstMetaInfo, WriteBatch, - ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + WriteOptions, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use fail::fail_point; use kvproto::{ @@ -1948,8 +1948,9 @@ where e ) }; + let wopts = WriteOptions::default(); engine - .delete_ranges_cf(cf, DeleteStrategy::DeleteFiles, &range) + .delete_ranges_cf(&wopts, cf, DeleteStrategy::DeleteFiles, &range) .unwrap_or_else(|e| fail_f(e, DeleteStrategy::DeleteFiles)); let strategy = if use_delete_range { @@ -1959,10 +1960,10 @@ where }; // Delete all remaining keys. engine - .delete_ranges_cf(cf, strategy.clone(), &range) + .delete_ranges_cf(&wopts, cf, strategy.clone(), &range) .unwrap_or_else(move |e| fail_f(e, strategy)); engine - .delete_ranges_cf(cf, DeleteStrategy::DeleteBlobs, &range) + .delete_ranges_cf(&wopts, cf, DeleteStrategy::DeleteBlobs, &range) .unwrap_or_else(move |e| fail_f(e, DeleteStrategy::DeleteBlobs)); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 9a7df9d5473..0af232db86f 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1337,9 +1337,11 @@ impl RaftPollerBuilder { .map(|(start, end)| Range::new(start, end)) .collect(); - self.engines - .kv - .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &ranges)?; + self.engines.kv.delete_ranges_cfs( + &WriteOptions::default(), + DeleteStrategy::DeleteFiles, + &ranges, + )?; info!( "cleans up garbage data"; @@ -3041,6 +3043,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER drop(meta); if let Err(e) = self.ctx.engines.kv.delete_ranges_cfs( + &WriteOptions::default(), DeleteStrategy::DeleteByKey, &[Range::new(&start_key, &end_key)], ) { diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 2ea5eb947fd..63ca2f47ac0 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -17,7 +17,9 @@ use std::{ }; use collections::HashMap; -use engine_traits::{DeleteStrategy, KvEngine, Mutable, Range, WriteBatch, CF_LOCK, CF_RAFT}; +use engine_traits::{ + DeleteStrategy, KvEngine, Mutable, Range, WriteBatch, WriteOptions, CF_LOCK, CF_RAFT, +}; use fail::fail_point; use file_system::{IoType, WithIoType}; use kvproto::raft_serverpb::{PeerState, RaftApplyState, RegionLocalState}; @@ -583,10 +585,15 @@ where }) .collect(); self.engine - .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &df_ranges) - .unwrap_or_else(|e| { + .delete_ranges_cfs( + &WriteOptions::default(), + DeleteStrategy::DeleteFiles, + &df_ranges, + ) + .map_err(|e| { error!("failed to delete files in range"; "err" => %e); - }); + }) + .unwrap(); (start_key, end_key) } @@ -647,19 +654,29 @@ where .collect(); self.engine - .delete_ranges_cfs(DeleteStrategy::DeleteFiles, &ranges) - .unwrap_or_else(|e| { + .delete_ranges_cfs( + &WriteOptions::default(), + DeleteStrategy::DeleteFiles, + &ranges, + ) + .map_err(|e| { error!("failed to delete files in range"; "err" => %e); - }); + }) + .unwrap(); if let Err(e) = self.delete_all_in_range(&ranges) { error!("failed to cleanup stale range"; "err" => %e); return; } self.engine - .delete_ranges_cfs(DeleteStrategy::DeleteBlobs, &ranges) - .unwrap_or_else(|e| { + .delete_ranges_cfs( + &WriteOptions::default(), + DeleteStrategy::DeleteBlobs, + &ranges, + ) + .map_err(|e| { error!("failed to delete blobs in range"; "err" => %e); - }); + }) + .unwrap(); for (_, key, _) in region_ranges { assert!( @@ -686,6 +703,7 @@ where } fn delete_all_in_range(&self, ranges: &[Range<'_>]) -> Result<()> { + let wopts = WriteOptions::default(); for cf in self.engine.cf_names() { // CF_LOCK usually contains fewer keys than other CFs, so we delete them by key. let strategy = if cf == CF_LOCK { @@ -697,7 +715,7 @@ where sst_path: self.mgr.get_temp_path_for_ingest(), } }; - box_try!(self.engine.delete_ranges_cf(cf, strategy, ranges)); + box_try!(self.engine.delete_ranges_cf(&wopts, cf, strategy, ranges)); } Ok(()) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index db46b45b1ce..0855e6ae3c9 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -725,6 +725,7 @@ where self.core.config.coprocessor.region_split_size(), self.core.config.coprocessor.enable_region_bucket(), self.core.config.coprocessor.region_bucket_size, + false, ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); let raft_store = Arc::new(VersionTrack::new(self.core.config.raft_store.clone())); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 3ca2004930e..de0b2382690 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -700,6 +700,7 @@ where self.core.config.coprocessor.region_split_size(), self.core.config.coprocessor.enable_region_bucket(), self.core.config.coprocessor.region_bucket_size, + true, ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); let raft_store = Arc::new(VersionTrack::new(self.core.config.raft_store.clone())); diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 0410d514ae4..d6d8838e1b7 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -216,6 +216,7 @@ impl Simulator for NodeCluster { cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, + true, ) .unwrap(); @@ -332,7 +333,12 @@ impl Simulator for NodeCluster { let mut raftstore_cfg = cfg.tikv.raft_store; raftstore_cfg.optimize_for(true); raftstore_cfg - .validate(region_split_size, enable_region_bucket, region_bucket_size) + .validate( + region_split_size, + enable_region_bucket, + region_bucket_size, + true, + ) .unwrap(); let raft_store = Arc::new(VersionTrack::new(raftstore_cfg)); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index a439056617f..354c4b7fbb5 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -381,6 +381,7 @@ impl ServerCluster { cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, + true, ) .unwrap(); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 3f0168fa361..5221613f2b6 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -243,6 +243,7 @@ impl Simulator for NodeCluster { cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, + false, ) .unwrap(); let bg_worker = WorkerBuilder::new("background").thread_count(2).create(); @@ -355,7 +356,12 @@ impl Simulator for NodeCluster { let mut raftstore_cfg = cfg.tikv.raft_store; raftstore_cfg.optimize_for(false); raftstore_cfg - .validate(region_split_size, enable_region_bucket, region_bucket_size) + .validate( + region_split_size, + enable_region_bucket, + region_bucket_size, + false, + ) .unwrap(); let raft_store = Arc::new(VersionTrack::new(raftstore_cfg)); cfg_controller.register( diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 261f3862e31..7a4a69eb64f 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -507,6 +507,7 @@ impl ServerCluster { cfg.coprocessor.region_split_size(), cfg.coprocessor.enable_region_bucket(), cfg.coprocessor.region_bucket_size, + false, ) .unwrap(); let health_service = HealthService::default(); diff --git a/src/config/mod.rs b/src/config/mod.rs index 3124c18fa9b..f06f2936374 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3588,6 +3588,7 @@ impl TikvConfig { self.coprocessor.region_split_size(), self.coprocessor.enable_region_bucket(), self.coprocessor.region_bucket_size, + self.storage.engine == EngineType::RaftKv2, )?; self.security.validate()?; self.import.validate()?; diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index e6a5b923628..e7b17ca409e 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -1098,6 +1098,7 @@ pub mod tests { // Clean the engine, prepare for later tests. raw_engine .delete_ranges_cf( + &WriteOptions::default(), CF_WRITE, DeleteStrategy::DeleteFiles, &[Range::new(b"z", b"zz")], diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index 08232189552..c608470ba87 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -736,6 +736,7 @@ impl GcRunner { for cf in cfs { local_storage .delete_ranges_cf( + &WriteOptions::default(), cf, DeleteStrategy::DeleteFiles, &[Range::new(&start_data_key, &end_data_key)], @@ -759,6 +760,7 @@ impl GcRunner { // TODO: set use_delete_range with config here. local_storage .delete_ranges_cf( + &WriteOptions::default(), cf, DeleteStrategy::DeleteByKey, &[Range::new(&start_data_key, &end_data_key)], @@ -770,6 +772,7 @@ impl GcRunner { })?; local_storage .delete_ranges_cf( + &WriteOptions::default(), cf, DeleteStrategy::DeleteBlobs, &[Range::new(&start_data_key, &end_data_key)], diff --git a/src/server/gc_worker/rawkv_compaction_filter.rs b/src/server/gc_worker/rawkv_compaction_filter.rs index b2af5b73118..0a3bf9c6eb3 100644 --- a/src/server/gc_worker/rawkv_compaction_filter.rs +++ b/src/server/gc_worker/rawkv_compaction_filter.rs @@ -359,7 +359,7 @@ pub mod tests { use std::time::Duration; use api_version::RawValue; - use engine_traits::{DeleteStrategy, Peekable, Range, CF_DEFAULT}; + use engine_traits::{DeleteStrategy, Peekable, Range, WriteOptions, CF_DEFAULT}; use kvproto::kvrpcpb::{ApiVersion, Context}; use tikv_kv::{Engine, Modify, WriteData}; use txn_types::TimeStamp; @@ -516,6 +516,7 @@ pub mod tests { ); raw_engine .delete_ranges_cf( + &WriteOptions::default(), CF_DEFAULT, DeleteStrategy::DeleteByKey, &[Range::new( diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index 0c1004bb305..a9002260ae9 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -132,6 +132,7 @@ impl ResetToVersionWorker { box_try!(wb.delete_cf(CF_DEFAULT, default_key.as_encoded())); } if !wb.is_empty() { + // TODO: v2 needs disable_wal=true. wb.write().unwrap(); wb.clear(); } @@ -163,6 +164,7 @@ impl ResetToVersionWorker { } } if !wb.is_empty() { + // TODO: v2 needs disable_wal=true. wb.write().unwrap(); wb.clear(); } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 5f6d7191239..9c90211c073 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -1,6 +1,5 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -mod test_apply_trace; mod test_async_fetch; mod test_async_io; mod test_backup; diff --git a/tests/failpoints/cases/test_apply_trace.rs b/tests/failpoints/cases/test_apply_trace.rs deleted file mode 100644 index 4bd6b9dcbf7..00000000000 --- a/tests/failpoints/cases/test_apply_trace.rs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - thread::sleep, - time::{Duration, Instant}, -}; - -use engine_traits::{ - MiscExt, RaftEngineReadOnly, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, DATA_CFS, -}; - -// It tests that delete range for an empty cf does not block the progress of -// persisted_applied. See the description of the PR #14905. -#[test] -fn test_delete_range_does_not_block_flushed_index() { - use test_raftstore_v2::*; - - let mut cluster = new_server_cluster(0, 3); - cluster.run(); - - for i in 0..100 { - let key = format!("k{:03}", i); - cluster.must_put_cf(CF_WRITE, key.as_bytes(), b"val"); - cluster.must_put_cf(CF_LOCK, key.as_bytes(), b"val"); - } - - cluster.must_delete_range_cf(CF_DEFAULT, b"k000", b"k020"); - cluster.must_delete_range_cf(CF_DEFAULT, b"k020", b"k040"); - - let raft_engine = cluster.get_raft_engine(1); - let mut cache = cluster.engines[0].0.get(1).unwrap(); - let tablet = cache.latest().unwrap(); - tablet.flush_cfs(DATA_CFS, true).unwrap(); - - let start = Instant::now(); - loop { - let admin_flush = raft_engine.get_flushed_index(1, CF_RAFT).unwrap().unwrap(); - if admin_flush > 200 { - return; - } - if start.elapsed() > Duration::from_secs(5) { - panic!( - "persisted_apply is not progressed, current persisted_apply {}", - admin_flush - ); - } - // wait for persist admin flush index - sleep(Duration::from_millis(200)); - } -} diff --git a/tests/integrations/raftstore/test_single.rs b/tests/integrations/raftstore/test_single.rs index d21a134a0c3..d6fef53f2cc 100644 --- a/tests/integrations/raftstore/test_single.rs +++ b/tests/integrations/raftstore/test_single.rs @@ -92,7 +92,7 @@ fn test_delete() { } #[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] +// v2 doesn't support RocksDB delete range. fn test_node_use_delete_range() { let mut cluster = new_cluster(0, 1); cluster.cfg.raft_store.use_delete_range = true; diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 76eee9b1322..bea48ed8d59 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -12,7 +12,7 @@ use engine_rocks::{ use engine_test::new_temp_engine; use engine_traits::{ CfOptionsExt, CompactExt, DeleteStrategy, Engines, KvEngine, MiscExt, Range, SstWriter, - SstWriterBuilder, SyncMutable, CF_DEFAULT, CF_WRITE, + SstWriterBuilder, SyncMutable, WriteOptions, CF_DEFAULT, CF_WRITE, }; use keys::data_key; use kvproto::metapb::{Peer, Region}; @@ -311,6 +311,7 @@ fn test_delete_files_in_range_for_titan() { engines .kv .delete_ranges_cfs( + &WriteOptions::default(), DeleteStrategy::DeleteFiles, &[Range::new( &data_key(Key::from_raw(b"a").as_encoded()), @@ -321,6 +322,7 @@ fn test_delete_files_in_range_for_titan() { engines .kv .delete_ranges_cfs( + &WriteOptions::default(), DeleteStrategy::DeleteByKey, &[Range::new( &data_key(Key::from_raw(b"a").as_encoded()), @@ -331,6 +333,7 @@ fn test_delete_files_in_range_for_titan() { engines .kv .delete_ranges_cfs( + &WriteOptions::default(), DeleteStrategy::DeleteBlobs, &[Range::new( &data_key(Key::from_raw(b"a").as_encoded()), From fa7baa9a5ab38a2c0872ec772252bf16158f6fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:25:43 +0800 Subject: [PATCH 0761/1149] tikv_utils, *: merge `after_start_wrapper` and `before_stop_wrapper`. (#15017) close tikv/tikv#15014 Replace `after_start_wrapper` and `before_stop_wrapper` with `with_sys_and_custom_hooks`. Replace no-op(how strange!) `after_start_wrapper` and `before_stop_wrapper` with `with_sys_hooks`. Signed-off-by: hillium Co-authored-by: Xinye Tao --- clippy.toml | 8 ++-- components/backup-stream/src/endpoint.rs | 5 +- components/backup/src/utils.rs | 16 ++++--- components/cdc/src/endpoint.rs | 6 +-- components/cdc/src/initializer.rs | 3 +- components/encryption/src/master_key/kms.rs | 3 +- components/resolved_ts/src/advance.rs | 3 +- components/resolved_ts/src/scanner.rs | 3 +- components/server/src/server.rs | 12 +++-- components/server/src/server2.rs | 12 +++-- components/snap_recovery/src/services.rs | 12 +++-- components/sst_importer/src/sst_importer.rs | 16 ++++--- components/test_raftstore-v2/src/server.rs | 3 +- components/test_raftstore/src/server.rs | 3 +- components/tikv_util/src/sys/thread.rs | 53 +++++++++------------ src/import/raft_writer.rs | 8 ++-- src/import/sst_service.rs | 24 +++++----- src/server/server.rs | 6 +-- src/server/snap.rs | 6 ++- src/server/status_server/mod.rs | 6 ++- src/server/tablet_snap.rs | 6 ++- 21 files changed, 107 insertions(+), 107 deletions(-) diff --git a/clippy.toml b/clippy.toml index 1530b3cb60b..08a5b6beb4c 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,11 +1,11 @@ disallowed-methods = [ { path = "std::thread::Builder::spawn", reason = "Wrapper function `::spawn_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, - { path = "tokio::runtime::builder::Builder::on_thread_start", reason = "Wrapper function `::after_start_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, - { path = "tokio::runtime::builder::Builder::on_thread_stop", reason = "Wrapper function `::before_stop_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, + { path = "tokio::runtime::builder::Builder::on_thread_start", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, + { path = "tokio::runtime::builder::Builder::on_thread_stop", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, - { path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start", reason = "Wrapper function `::after_start_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, - { path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop", reason = "Wrapper function `::before_stop_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, + { path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, + { path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, ] avoid-breaking-exported-api = false upper-case-acronyms-aggressive = true diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index c88b36da8db..90de326e93a 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1027,10 +1027,9 @@ fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult TokioResu .thread_name(thread_name) .enable_io() .enable_time() - .after_start_wrapper(|| { - tikv_alloc::add_thread_memory_accessor(); - file_system::set_io_type(IoType::Export); - }) - .before_stop_wrapper(|| { - tikv_alloc::remove_thread_memory_accessor(); - }) + .with_sys_and_custom_hooks( + || { + tikv_alloc::add_thread_memory_accessor(); + file_system::set_io_type(IoType::Export); + }, + || { + tikv_alloc::remove_thread_memory_accessor(); + }, + ) .worker_threads(thread_count) .build() } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index fd4580d4aea..eb6f488cdb9 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -380,16 +380,14 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine> ScannerPool { Builder::new_multi_thread() .thread_name("inc-scan") .worker_threads(count) - .after_start_wrapper(|| {}) - .before_stop_wrapper(|| {}) + .with_sys_hooks() .build() .unwrap(), ); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 0855e6ae3c9..750633fa0d8 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -536,11 +536,13 @@ where .thread_name(thd_name!("debugger")) .enable_time() .worker_threads(1) - .after_start_wrapper(move || { - tikv_alloc::add_thread_memory_accessor(); - tikv_util::thread_group::set_properties(props.clone()); - }) - .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) + .with_sys_and_custom_hooks( + move || { + tikv_alloc::add_thread_memory_accessor(); + tikv_util::thread_group::set_properties(props.clone()); + }, + tikv_alloc::remove_thread_memory_accessor, + ) .build() .unwrap(), ); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index de0b2382690..c67e1eab2e0 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -445,11 +445,13 @@ where Builder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) - .after_start_wrapper(move || { - tikv_alloc::add_thread_memory_accessor(); - tikv_util::thread_group::set_properties(props.clone()); - }) - .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) + .with_sys_and_custom_hooks( + move || { + tikv_alloc::add_thread_memory_accessor(); + tikv_util::thread_group::set_properties(props.clone()); + }, + tikv_alloc::remove_thread_memory_accessor, + ) .build() .unwrap(), ); diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index 98d1942c7ff..a788ce4053e 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -73,11 +73,13 @@ impl RecoveryService { let threads = ThreadPoolBuilder::new() .pool_size(4) .name_prefix("recovery-service") - .after_start_wrapper(move || { - tikv_util::thread_group::set_properties(props.clone()); - tikv_alloc::add_thread_memory_accessor(); - }) - .before_stop_wrapper(|| tikv_alloc::remove_thread_memory_accessor()) + .with_sys_and_custom_hooks( + move || { + tikv_util::thread_group::set_properties(props.clone()); + tikv_alloc::add_thread_memory_accessor(); + }, + || tikv_alloc::remove_thread_memory_accessor(), + ) .create() .unwrap(); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 90226668e5f..bf05600bbfb 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -181,13 +181,15 @@ impl SstImporter { let download_rt = tokio::runtime::Builder::new_multi_thread() .worker_threads(1) .thread_name("sst_import_misc") - .after_start_wrapper(|| { - tikv_alloc::add_thread_memory_accessor(); - file_system::set_io_type(IoType::Import); - }) - .before_stop_wrapper(|| { - tikv_alloc::remove_thread_memory_accessor(); - }) + .with_sys_and_custom_hooks( + || { + tikv_alloc::add_thread_memory_accessor(); + file_system::set_io_type(IoType::Import); + }, + || { + tikv_alloc::remove_thread_memory_accessor(); + }, + ) .enable_all() .build()?; download_rt.spawn(cached_storage.gc_loop()); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 354c4b7fbb5..b398d51ad4c 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -574,8 +574,7 @@ impl ServerCluster { TokioBuilder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) - .after_start_wrapper(|| {}) - .before_stop_wrapper(|| {}) + .with_sys_hooks() .build() .unwrap(), ); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 7a4a69eb64f..30ad3175b2f 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -484,8 +484,7 @@ impl ServerCluster { TokioBuilder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) - .after_start_wrapper(|| {}) - .before_stop_wrapper(|| {}) + .with_sys_hooks() .build() .unwrap(), ); diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index 1f138669b96..46fddf1f513 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -373,13 +373,17 @@ pub trait StdThreadBuildWrapper { } pub trait ThreadBuildWrapper { - fn after_start_wrapper(&mut self, f: F) -> &mut Self + /// Register all system hooks along with a custom hook pair. + fn with_sys_and_custom_hooks(&mut self, after_start: F1, before_end: F2) -> &mut Self where - F: Fn() + Send + Sync + 'static; + F1: Fn() + Send + Sync + 'static, + F2: Fn() + Send + Sync + 'static; - fn before_stop_wrapper(&mut self, f: F) -> &mut Self - where - F: Fn() + Send + Sync + 'static; + /// Register some generic hooks like memory tracing or thread lifetime + /// tracing. + fn with_sys_hooks(&mut self) -> &mut Self { + self.with_sys_and_custom_hooks(|| {}, || {}) + } } lazy_static::lazy_static! { @@ -432,50 +436,38 @@ impl StdThreadBuildWrapper for std::thread::Builder { } impl ThreadBuildWrapper for tokio::runtime::Builder { - fn after_start_wrapper(&mut self, f: F) -> &mut Self + fn with_sys_and_custom_hooks(&mut self, start: F1, end: F2) -> &mut Self where - F: Fn() + Send + Sync + 'static, + F1: Fn() + Send + Sync + 'static, + F2: Fn() + Send + Sync + 'static, { #[allow(clippy::disallowed_methods)] self.on_thread_start(move || { call_thread_start_hooks(); add_thread_name_to_map(); - f(); + start(); }) - } - - fn before_stop_wrapper(&mut self, f: F) -> &mut Self - where - F: Fn() + Send + Sync + 'static, - { - #[allow(clippy::disallowed_methods)] - self.on_thread_stop(move || { - f(); + .on_thread_stop(move || { + end(); remove_thread_name_from_map(); }) } } impl ThreadBuildWrapper for futures::executor::ThreadPoolBuilder { - fn after_start_wrapper(&mut self, f: F) -> &mut Self + fn with_sys_and_custom_hooks(&mut self, start: F1, end: F2) -> &mut Self where - F: Fn() + Send + Sync + 'static, + F1: Fn() + Send + Sync + 'static, + F2: Fn() + Send + Sync + 'static, { #[allow(clippy::disallowed_methods)] self.after_start(move |_| { call_thread_start_hooks(); add_thread_name_to_map(); - f(); + start(); }) - } - - fn before_stop_wrapper(&mut self, f: F) -> &mut Self - where - F: Fn() + Send + Sync + 'static, - { - #[allow(clippy::disallowed_methods)] - self.before_stop(move |_| { - f(); + .before_stop(move |_| { + end(); remove_thread_name_from_map(); }) } @@ -599,8 +591,7 @@ mod tests { block_on( tokio::runtime::Builder::new_multi_thread() .thread_name(thread_name) - .after_start_wrapper(|| {}) - .before_stop_wrapper(|| {}) + .with_sys_hooks() .build() .unwrap() .spawn(async move { get_name_fn() }), diff --git a/src/import/raft_writer.rs b/src/import/raft_writer.rs index a40297b932e..e66bcededc2 100644 --- a/src/import/raft_writer.rs +++ b/src/import/raft_writer.rs @@ -259,9 +259,11 @@ mod test { let rt = Builder::new_multi_thread() .enable_all() .worker_threads(1) - .after_start_wrapper(move || tikv_kv::set_tls_engine(engine.lock().unwrap().clone())) - // SAFETY: see the line above. - .before_stop_wrapper(|| unsafe { tikv_kv::destroy_tls_engine::() }) + .with_sys_and_custom_hooks( + move || tikv_kv::set_tls_engine(engine.lock().unwrap().clone()), + // SAFETY: see the line above. + || unsafe { tikv_kv::destroy_tls_engine::() }, + ) .build() .unwrap(); let handle = diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index f0151ffc8f5..aeee3bbcb73 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -303,17 +303,19 @@ impl ImportSstService { .worker_threads(cfg.num_threads) .enable_all() .thread_name("sst-importer") - .after_start_wrapper(move || { - tikv_util::thread_group::set_properties(props.clone()); - tikv_alloc::add_thread_memory_accessor(); - set_io_type(IoType::Import); - tikv_kv::set_tls_engine(eng.lock().unwrap().clone()); - }) - .before_stop_wrapper(move || { - tikv_alloc::remove_thread_memory_accessor(); - // SAFETY: we have set the engine at some lines above with type `E`. - unsafe { tikv_kv::destroy_tls_engine::() }; - }) + .with_sys_and_custom_hooks( + move || { + tikv_util::thread_group::set_properties(props.clone()); + tikv_alloc::add_thread_memory_accessor(); + set_io_type(IoType::Import); + tikv_kv::set_tls_engine(eng.lock().unwrap().clone()); + }, + move || { + tikv_alloc::remove_thread_memory_accessor(); + // SAFETY: we have set the engine at some lines above with type `E`. + unsafe { tikv_kv::destroy_tls_engine::() }; + }, + ) .build() .unwrap(); if let LocalTablets::Singleton(tablet) = &tablets { diff --git a/src/server/server.rs b/src/server/server.rs index 8a50f44f363..26ce0b29bfc 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -111,8 +111,7 @@ where RuntimeBuilder::new_multi_thread() .thread_name(STATS_THREAD_PREFIX) .worker_threads(cfg.value().stats_concurrency) - .after_start_wrapper(|| {}) - .before_stop_wrapper(|| {}) + .with_sys_hooks() .build() .unwrap(), ) @@ -566,8 +565,7 @@ mod tests { TokioBuilder::new_multi_thread() .thread_name(thd_name!("debugger")) .worker_threads(1) - .after_start_wrapper(|| {}) - .before_stop_wrapper(|| {}) + .with_sys_hooks() .build() .unwrap(), ); diff --git a/src/server/snap.rs b/src/server/snap.rs index 4324f17459e..7c6c5abaa20 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -385,8 +385,10 @@ impl Runner { pool: RuntimeBuilder::new_multi_thread() .thread_name(thd_name!("snap-sender")) .worker_threads(DEFAULT_POOL_SIZE) - .after_start_wrapper(tikv_alloc::add_thread_memory_accessor) - .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) + .with_sys_and_custom_hooks( + tikv_alloc::add_thread_memory_accessor, + tikv_alloc::remove_thread_memory_accessor, + ) .build() .unwrap(), raft_router: r, diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 1b689138f11..2de6fee4a91 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -111,8 +111,10 @@ where .enable_all() .worker_threads(status_thread_pool_size) .thread_name("status-server") - .after_start_wrapper(|| debug!("Status server started")) - .before_stop_wrapper(|| debug!("stopping status server")) + .with_sys_and_custom_hooks( + || debug!("Status server started"), + || debug!("stopping status server"), + ) .build()?; let (tx, rx) = oneshot::channel::<()>(); diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index e9fa3c42af8..ce53a61125f 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -817,8 +817,10 @@ impl TabletRunner { pool: RuntimeBuilder::new_multi_thread() .thread_name(thd_name!("tablet-snap-sender")) .worker_threads(DEFAULT_POOL_SIZE) - .after_start_wrapper(tikv_alloc::add_thread_memory_accessor) - .before_stop_wrapper(tikv_alloc::remove_thread_memory_accessor) + .with_sys_and_custom_hooks( + tikv_alloc::add_thread_memory_accessor, + tikv_alloc::remove_thread_memory_accessor, + ) .build() .unwrap(), raft_router: r, From 56130211bf21cb3aea9c5ecec4952f6fedb33093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 29 Jun 2023 17:51:48 +0800 Subject: [PATCH 0762/1149] tikv_alloc: added a simple tracing to allocation (#14866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/tikv#8235 Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> Signed-off-by: hillium --- cmd/tikv-ctl/src/main.rs | 2 - components/backup-stream/src/endpoint.rs | 6 +- .../backup-stream/src/subscription_manager.rs | 2 - components/backup/src/utils.rs | 5 +- components/raftstore/src/store/worker/pd.rs | 3 +- .../raftstore/src/store/worker/region.rs | 3 +- components/server/src/server.rs | 3 +- components/server/src/server2.rs | 3 +- components/snap_recovery/src/data_resolver.rs | 6 - .../src/region_meta_collector.rs | 3 - components/snap_recovery/src/services.rs | 6 +- components/sst_importer/src/sst_importer.rs | 5 +- components/tikv_alloc/src/default.rs | 8 +- components/tikv_alloc/src/jemalloc.rs | 211 +++++++++++- components/tikv_alloc/src/lib.rs | 1 + .../src/metrics/allocator_metrics.rs | 38 ++- components/tikv_util/src/sys/thread.rs | 25 +- components/tikv_util/src/time.rs | 3 +- components/tikv_util/src/timer.rs | 2 +- components/tikv_util/src/worker/future.rs | 2 - components/tikv_util/src/yatp_pool/mod.rs | 5 +- metrics/grafana/tikv_details.json | 323 +++++++++++++++++- src/import/sst_service.rs | 3 +- src/server/debug.rs | 8 +- src/server/debug2.rs | 1 - src/server/gc_worker/gc_manager.rs | 3 +- src/server/reset_to_version.rs | 3 - src/server/snap.rs | 5 +- src/server/tablet_snap.rs | 5 +- .../singleton_flow_controller.rs | 2 - .../flow_controller/tablet_flow_controller.rs | 2 - 31 files changed, 603 insertions(+), 94 deletions(-) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index ac31f0700f9..dda59ac14d3 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -779,7 +779,6 @@ fn compact_whole_cluster( let h = thread::Builder::new() .name(format!("compact-{}", addr)) .spawn_wrapper(move || { - tikv_alloc::add_thread_memory_accessor(); let debug_executor = new_debug_executor(&cfg, None, Some(&addr), mgr); for cf in cfs { debug_executor.compact( @@ -792,7 +791,6 @@ fn compact_whole_cluster( bottommost, ); } - tikv_alloc::remove_thread_memory_accessor(); }) .unwrap(); handles.push(h); diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 90de326e93a..6f22cc67997 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1025,13 +1025,9 @@ fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult Sc let rx = rx.clone(); let stopped = stopped.clone(); pool.spawn(move |_: &mut YatpHandle<'_>| { - tikv_alloc::add_thread_memory_accessor(); let _io_guard = file_system::WithIoType::new(file_system::IoType::Replication); scan_executor_loop(init, rx, stopped); - tikv_alloc::remove_thread_memory_accessor(); }) } ScanPoolHandle { diff --git a/components/backup/src/utils.rs b/components/backup/src/utils.rs index cf314810a9d..9d85eb664eb 100644 --- a/components/backup/src/utils.rs +++ b/components/backup/src/utils.rs @@ -94,12 +94,9 @@ pub fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResu .enable_time() .with_sys_and_custom_hooks( || { - tikv_alloc::add_thread_memory_accessor(); file_system::set_io_type(IoType::Export); }, - || { - tikv_alloc::remove_thread_memory_accessor(); - }, + || {}, ) .worker_threads(thread_count) .build() diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 13cbdf2cfa5..3ee47b2e8a5 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -643,7 +643,7 @@ where .name(thd_name!("stats-monitor")) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); + // Create different `ThreadInfoStatistics` for different purposes to // make sure the record won't be disturbed. let mut collect_store_infos_thread_stats = ThreadInfoStatistics::new(); @@ -689,7 +689,6 @@ where } timer_cnt += 1; } - tikv_alloc::remove_thread_memory_accessor(); })?; self.handle = Some(h); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 63ca2f47ac0..525b8ce15fc 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -847,7 +847,7 @@ where self.pool.spawn(async move { SNAP_GEN_WAIT_DURATION_HISTOGRAM .observe(scheduled_time.saturating_elapsed_secs()); - tikv_alloc::add_thread_memory_accessor(); + ctx.handle_gen( region_id, last_applied_term, @@ -858,7 +858,6 @@ where for_balance, allow_multi_files_snapshot, ); - tikv_alloc::remove_thread_memory_accessor(); }); } task @ Task::Apply { .. } => { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 750633fa0d8..16d12e48aa9 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -538,10 +538,9 @@ where .worker_threads(1) .with_sys_and_custom_hooks( move || { - tikv_alloc::add_thread_memory_accessor(); tikv_util::thread_group::set_properties(props.clone()); }, - tikv_alloc::remove_thread_memory_accessor, + || {}, ) .build() .unwrap(), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index c67e1eab2e0..4587b6700fe 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -447,10 +447,9 @@ where .worker_threads(1) .with_sys_and_custom_hooks( move || { - tikv_alloc::add_thread_memory_accessor(); tikv_util::thread_group::set_properties(props.clone()); }, - tikv_alloc::remove_thread_memory_accessor, + || {}, ) .build() .unwrap(), diff --git a/components/snap_recovery/src/data_resolver.rs b/components/snap_recovery/src/data_resolver.rs index 68b9a7d2bc6..90edb8d6348 100644 --- a/components/snap_recovery/src/data_resolver.rs +++ b/components/snap_recovery/src/data_resolver.rs @@ -96,13 +96,10 @@ impl DataResolverManager { .name("cleanup_lock".to_string()) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); worker .cleanup_lock(&mut wb) .expect("cleanup lock failed when delete data from invalid cf"); - - tikv_alloc::remove_thread_memory_accessor(); }) .expect("failed to spawn resolve_kv_data thread"); self.workers.lock().unwrap().push(handle); @@ -123,14 +120,11 @@ impl DataResolverManager { .name("resolve_write".to_string()) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); if let Err(e) = worker.resolve_write(&mut wb) { error!("failed to resolve write cf"; "error" => ?e); } - - tikv_alloc::remove_thread_memory_accessor(); }) .expect("failed to spawn resolve_kv_data thread"); diff --git a/components/snap_recovery/src/region_meta_collector.rs b/components/snap_recovery/src/region_meta_collector.rs index 16e53b3b88b..bb8f68dbc68 100644 --- a/components/snap_recovery/src/region_meta_collector.rs +++ b/components/snap_recovery/src/region_meta_collector.rs @@ -55,13 +55,10 @@ impl RegionMetaCollector { .name("collector_region_meta".to_string()) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); worker .collect_report() .expect("collect region meta and report to br failure."); - - tikv_alloc::remove_thread_memory_accessor(); }) .expect("failed to spawn collector_region_meta thread"), ); diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index a788ce4053e..08201aebf3f 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -76,9 +76,8 @@ impl RecoveryService { .with_sys_and_custom_hooks( move || { tikv_util::thread_group::set_properties(props.clone()); - tikv_alloc::add_thread_memory_accessor(); }, - || tikv_alloc::remove_thread_memory_accessor(), + || {}, ) .create() .unwrap(); @@ -161,7 +160,7 @@ fn compact(engine: RocksEngine) -> Result<()> { .name(format!("compact-{}", cf)) .spawn_wrapper(move || { info!("recovery starts manual compact"; "cf" => cf.clone()); - tikv_alloc::add_thread_memory_accessor(); + let db = kv_db.as_inner(); let handle = get_cf_handle(db, cf.as_str()).unwrap(); let mut compact_opts = CompactOptions::new(); @@ -169,7 +168,6 @@ fn compact(engine: RocksEngine) -> Result<()> { compact_opts.set_exclusive_manual_compaction(false); compact_opts.set_bottommost_level_compaction(DBBottommostLevelCompaction::Skip); db.compact_range_cf_opt(handle, &compact_opts, None, None); - tikv_alloc::remove_thread_memory_accessor(); info!("recovery finishes manual compact"; "cf" => cf); }) diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index bf05600bbfb..d6d3dc3e46e 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -183,12 +183,9 @@ impl SstImporter { .thread_name("sst_import_misc") .with_sys_and_custom_hooks( || { - tikv_alloc::add_thread_memory_accessor(); file_system::set_io_type(IoType::Import); }, - || { - tikv_alloc::remove_thread_memory_accessor(); - }, + || {}, ) .enable_all() .build()?; diff --git a/components/tikv_alloc/src/default.rs b/components/tikv_alloc/src/default.rs index 6685baa8271..2674331c3cd 100644 --- a/components/tikv_alloc/src/default.rs +++ b/components/tikv_alloc/src/default.rs @@ -24,6 +24,12 @@ pub fn deactivate_prof() -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } -pub fn add_thread_memory_accessor() {} +/// # Safety +/// +/// It is safe. The unsafe marker is just for matching the function signature. +/// (So clippy will get happy even jemalloc isn't enabled.) +pub unsafe fn add_thread_memory_accessor() {} pub fn remove_thread_memory_accessor() {} + +pub fn iterate_thread_allocation_stats(_f: impl FnMut(&str, u64, u64)) {} diff --git a/components/tikv_alloc/src/jemalloc.rs b/components/tikv_alloc/src/jemalloc.rs index e8a21115142..876afa9fcd5 100644 --- a/components/tikv_alloc/src/jemalloc.rs +++ b/components/tikv_alloc/src/jemalloc.rs @@ -2,7 +2,13 @@ // The implementation of this crate when jemalloc is turned on -use std::{collections::HashMap, ptr, slice, sync::Mutex, thread}; +use std::{ + collections::HashMap, + ptr::{self, NonNull}, + slice, + sync::Mutex, + thread, +}; use libc::{self, c_char, c_void}; use tikv_jemalloc_ctl::{epoch, stats, Error}; @@ -21,20 +27,103 @@ lazy_static! { Mutex::new(HashMap::new()); } +/// The struct for tracing the statistic of another thread. +/// The target pointer should be bound to some TLS of another thread, this +/// structure is just "peeking" it -- with out modifying. +// It should be covariant so we wrap it with `NonNull`. +#[repr(transparent)] +struct PeekableRemoteStat(Option>); + +// SAFETY: all constructors of `PeekableRemoteStat` returns pointer points to a +// thread local variable. Once this be sent, a reasonable life time of this +// variable should be as long as the thread holding the underlying thread local +// variable. But it is impossible to express such lifetime in current Rust. +// Then it is the user's responsibility to trace that lifetime. +unsafe impl Send for PeekableRemoteStat {} + +impl PeekableRemoteStat { + /// Try access the underlying data. When the pointer is `nullptr`, returns + /// `None`. + /// + /// # Safety + /// + /// The pointer should not be dangling. (i.e. the thread to be traced should + /// be accessible.) + unsafe fn peek(&self) -> Option { + self.0 + .map(|nlp| unsafe { core::intrinsics::atomic_load_seqcst(nlp.as_ptr()) }) + } + + fn from_raw(ptr: *mut T) -> Self { + Self(NonNull::new(ptr)) + } +} + +impl PeekableRemoteStat { + fn allocated() -> Self { + // SAFETY: it is transparent. + // NOTE: perhaps we'd better add something like `as_raw()` for `ThreadLocal`... + Self::from_raw( + tikv_jemalloc_ctl::thread::allocatedp::read() + .map(|x| unsafe { std::mem::transmute(x) }) + .unwrap_or(std::ptr::null_mut()), + ) + } + + fn deallocated() -> Self { + // SAFETY: it is transparent. + Self::from_raw( + tikv_jemalloc_ctl::thread::deallocatedp::read() + .map(|x| unsafe { std::mem::transmute(x) }) + .unwrap_or(std::ptr::null_mut()), + ) + } +} + struct MemoryStatsAccessor { - // TODO: trace arena, allocated, deallocated. Original implement doesn't - // work actually. + allocated: PeekableRemoteStat, + deallocated: PeekableRemoteStat, thread_name: String, } -pub fn add_thread_memory_accessor() { +impl MemoryStatsAccessor { + fn get_allocated(&self) -> u64 { + // SAFETY: `add_thread_memory_accessor` is unsafe, and that is the only way for + // outer crates to create this. + unsafe { self.allocated.peek().unwrap_or_default() } + } + + fn get_deallocated(&self) -> u64 { + // SAFETY: `add_thread_memory_accessor` is unsafe, and that is the only way for + // outer crates to create this. + unsafe { self.deallocated.peek().unwrap_or_default() } + } +} + +/// Register the current thread to the collector that collects the jemalloc +/// allocation / deallocation info. +/// +/// Generally you should call this via `spawn_wrapper`s instead of invoke this +/// directly. The former is a safe function. +/// +/// # Safety +/// +/// Make sure the `remove_thread_memory_accessor` is called before the thread +/// exits. +pub unsafe fn add_thread_memory_accessor() { let mut thread_memory_map = THREAD_MEMORY_MAP.lock().unwrap(); - thread_memory_map.insert( - thread::current().id(), - MemoryStatsAccessor { - thread_name: thread::current().name().unwrap().to_string(), - }, - ); + thread_memory_map + .entry(thread::current().id()) + .or_insert_with(|| { + let allocated = PeekableRemoteStat::allocated(); + let deallocated = PeekableRemoteStat::deallocated(); + + MemoryStatsAccessor { + thread_name: thread::current().name().unwrap_or("").to_string(), + allocated, + deallocated, + } + }); } pub fn remove_thread_memory_accessor() { @@ -64,7 +153,15 @@ pub fn dump_stats() -> String { let thread_memory_map = THREAD_MEMORY_MAP.lock().unwrap(); for (_, accessor) in thread_memory_map.iter() { - memory_stats.push_str(format!("Thread [{}]: \n", accessor.thread_name).as_str()); + let alloc = accessor.get_allocated(); + let dealloc = accessor.get_deallocated(); + memory_stats.push_str( + format!( + "Thread [{}]: alloc_bytes={alloc},dealloc_bytes={dealloc}\n", + accessor.thread_name + ) + .as_str(), + ); } memory_stats } @@ -91,6 +188,35 @@ pub fn fetch_stats() -> Result, Error> { ])) } +/// remove the postfix of threads generated by the YATP (-*). +/// YATP will append the id of the threads in a thread pool, which will bring +/// too many labels to the metric (and usually the memory usage should be evenly +/// distributed among these threads). +/// Fine-grained memory statistic is still available in the interface provided +/// for `tikv-ctl`. +fn trim_yatp_suffix(s: &str) -> &str { + s.trim_end_matches(|c: char| c.is_ascii_digit() || c == '-') +} + +/// Iterate over the allocation stat. +/// Format of the callback: `(name, allocated, deallocated)`. +pub fn iterate_thread_allocation_stats(mut f: impl FnMut(&str, u64, u64)) { + // Given we have called `epoch::advance()` in `fetch_stats`, we (magically!) + // skip advancing the epoch here. + let thread_memory_map = THREAD_MEMORY_MAP.lock().unwrap(); + let mut collected = HashMap::<&str, (u64, u64)>::with_capacity(thread_memory_map.len()); + for (_, accessor) in thread_memory_map.iter() { + let ent = collected + .entry(trim_yatp_suffix(&accessor.thread_name)) + .or_default(); + ent.0 += accessor.get_allocated(); + ent.1 += accessor.get_deallocated(); + } + for (name, val) in collected { + f(name, val.0, val.1) + } +} + #[allow(clippy::cast_ptr_alignment)] extern "C" fn write_cb(printer: *mut c_void, msg: *const c_char) { unsafe { @@ -106,11 +232,72 @@ extern "C" fn write_cb(printer: *mut c_void, msg: *const c_char) { #[cfg(test)] mod tests { - + use crate::{ + add_thread_memory_accessor, imp::THREAD_MEMORY_MAP, remove_thread_memory_accessor, + }; + + fn assert_delta(name: impl std::fmt::Display, delta: f64, a: u64, b: u64) { + let (base, diff) = if a > b { (a, a - b) } else { (b, b - a) }; + let error = diff as f64 / base as f64; + assert!( + error < delta, + "{name}: the error is too huge: a={a}, b={b}, base={base}, diff={diff}, error={error}" + ); + } #[test] fn dump_stats() { assert_ne!(super::dump_stats().len(), 0); } + + #[test] + fn test_allocation_stat() { + let (tx, rx) = std::sync::mpsc::channel(); + let mut threads = vec![]; + for i in 1..6 { + let tx = tx.clone(); + // It is in test... let skip calling hooks. + #[allow(clippy::disallowed_methods)] + let hnd = std::thread::Builder::new() + .name(format!("test_allocation_stat_{i}")) + .spawn(move || { + if i == 5 { + return; + } + // SAFETY: we call `remove_thread_memory_accessor` below. + unsafe { + add_thread_memory_accessor(); + } + let (tx2, rx2) = std::sync::mpsc::channel::<()>(); + let v = vec![42u8; 1024 * 1024 * i]; + drop(v); + let _v2 = vec![42u8; 512 * 1024 * i]; + tx.send((i, std::thread::current().id(), tx2)).unwrap(); + drop(tx); + rx2.recv().unwrap(); + remove_thread_memory_accessor(); + }) + .unwrap(); + threads.push(hnd); + } + drop(tx); + + let chs = rx.into_iter().collect::>(); + let l = THREAD_MEMORY_MAP.lock().unwrap(); + for (i, tid, tx) in chs { + let a = l.get(&tid).unwrap(); + unsafe { + let alloc = a.allocated.peek().unwrap(); + let dealloc = a.deallocated.peek().unwrap(); + assert_delta(i, 0.05, alloc, (1024 + 512) * 1024 * i as u64); + assert_delta(i, 0.05, dealloc, (1024) * 1024 * i as u64); + } + tx.send(()).unwrap(); + } + drop(l); + for th in threads.into_iter() { + th.join().unwrap(); + } + } } #[cfg(feature = "mem-profiling")] diff --git a/components/tikv_alloc/src/lib.rs b/components/tikv_alloc/src/lib.rs index 507a1195a38..cbe1d8590bf 100644 --- a/components/tikv_alloc/src/lib.rs +++ b/components/tikv_alloc/src/lib.rs @@ -84,6 +84,7 @@ #![cfg_attr(test, feature(test))] #![cfg_attr(test, feature(custom_test_frameworks))] #![cfg_attr(test, test_runner(runner::run_env_conditional_tests))] +#![feature(core_intrinsics)] #[cfg(feature = "jemalloc")] #[macro_use] diff --git a/components/tikv_util/src/metrics/allocator_metrics.rs b/components/tikv_util/src/metrics/allocator_metrics.rs index 256a83710f6..260aa88ac8e 100644 --- a/components/tikv_util/src/metrics/allocator_metrics.rs +++ b/components/tikv_util/src/metrics/allocator_metrics.rs @@ -12,18 +12,32 @@ pub fn monitor_allocator_stats>(namespace: S) -> Result<()> { struct AllocStatsCollector { descs: Vec, - metrics: IntGaugeVec, + memory_stats: IntGaugeVec, + allocation: IntGaugeVec, } impl AllocStatsCollector { fn new>(namespace: S) -> Result { + let ns = namespace.into(); let stats = IntGaugeVec::new( - Opts::new("allocator_stats", "Allocator stats").namespace(namespace.into()), + Opts::new("allocator_stats", "Allocator stats").namespace(ns.clone()), &["type"], )?; + let allocation = IntGaugeVec::new( + Opts::new( + "allocator_thread_allocation", + "The allocation statistic for threads.", + ) + .namespace(ns), + &["type", "thread_name"], + )?; Ok(AllocStatsCollector { - descs: stats.desc().into_iter().cloned().collect(), - metrics: stats, + descs: [&stats, &allocation] + .iter() + .flat_map(|m| m.desc().into_iter().cloned()) + .collect(), + allocation, + memory_stats: stats, }) } } @@ -36,9 +50,21 @@ impl Collector for AllocStatsCollector { fn collect(&self) -> Vec { if let Ok(Some(stats)) = tikv_alloc::fetch_stats() { for stat in stats { - self.metrics.with_label_values(&[stat.0]).set(stat.1 as i64); + self.memory_stats + .with_label_values(&[stat.0]) + .set(stat.1 as i64); } } - self.metrics.collect() + tikv_alloc::iterate_thread_allocation_stats(|name, alloc, dealloc| { + self.allocation + .with_label_values(&["alloc", name]) + .set(alloc as _); + self.allocation + .with_label_values(&["dealloc", name]) + .set(dealloc as _); + }); + let mut g = self.memory_stats.collect(); + g.extend(self.allocation.collect().into_iter()); + g } } diff --git a/components/tikv_util/src/sys/thread.rs b/components/tikv_util/src/sys/thread.rs index 46fddf1f513..818d8795b31 100644 --- a/components/tikv_util/src/sys/thread.rs +++ b/components/tikv_util/src/sys/thread.rs @@ -7,6 +7,7 @@ use std::{io, io::Result, sync::Mutex, thread}; use collections::HashMap; +use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; /// A cross-platform CPU statistics data structure. #[derive(Debug, Copy, Clone, Default, PartialEq)] @@ -427,10 +428,14 @@ impl StdThreadBuildWrapper for std::thread::Builder { #[allow(clippy::disallowed_methods)] self.spawn(|| { call_thread_start_hooks(); + // SAFETY: we will call `remove_thread_memory_accessor` at defer. + unsafe { add_thread_memory_accessor() }; add_thread_name_to_map(); - let res = f(); - remove_thread_name_from_map(); - res + defer! {{ + remove_thread_name_from_map(); + remove_thread_memory_accessor(); + }}; + f() }) } } @@ -444,12 +449,19 @@ impl ThreadBuildWrapper for tokio::runtime::Builder { #[allow(clippy::disallowed_methods)] self.on_thread_start(move || { call_thread_start_hooks(); + // SAFETY: we will call `remove_thread_memory_accessor` at + // `before-stop_wrapper`. + // FIXME: What if the user only calls `after_start_wrapper`? + unsafe { + add_thread_memory_accessor(); + } add_thread_name_to_map(); start(); }) .on_thread_stop(move || { end(); remove_thread_name_from_map(); + remove_thread_memory_accessor(); }) } } @@ -463,12 +475,19 @@ impl ThreadBuildWrapper for futures::executor::ThreadPoolBuilder { #[allow(clippy::disallowed_methods)] self.after_start(move |_| { call_thread_start_hooks(); + // SAFETY: we will call `remove_thread_memory_accessor` at + // `before-stop_wrapper`. + // FIXME: What if the user only calls `after_start_wrapper`? + unsafe { + add_thread_memory_accessor(); + } add_thread_name_to_map(); start(); }) .before_stop(move |_| { end(); remove_thread_name_from_map(); + remove_thread_memory_accessor(); }) } } diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index 0df4ed4adac..7a3bb7cdb55 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -150,7 +150,7 @@ impl Monitor { .name(thd_name!("time-monitor")) .spawn_wrapper(move || { crate::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); + while rx.try_recv().is_err() { let before = now(); thread::sleep(Duration::from_millis(DEFAULT_WAIT_MS)); @@ -166,7 +166,6 @@ impl Monitor { on_jumped() } } - tikv_alloc::remove_thread_memory_accessor(); }) .unwrap(); diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index 30445780ac8..bb555e11794 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -104,7 +104,7 @@ pub fn start_global_timer(name: &str) -> Handle { .name(thd_name!(name)) .spawn_wrapper(move || { crate::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); + let mut timer = tokio_timer::Timer::default(); tx.send(timer.handle()).unwrap(); loop { diff --git a/components/tikv_util/src/worker/future.rs b/components/tikv_util/src/worker/future.rs index be7c05589cb..53addd7e3ba 100644 --- a/components/tikv_util/src/worker/future.rs +++ b/components/tikv_util/src/worker/future.rs @@ -101,7 +101,6 @@ where R: Runnable + Send + 'static, T: Display + Send + 'static, { - tikv_alloc::add_thread_memory_accessor(); let current_thread = thread::current(); let name = current_thread.name().unwrap(); let metrics_pending_task_count = WORKER_PENDING_TASK_VEC.with_label_values(&[name]); @@ -127,7 +126,6 @@ where .block_on(handle.run_until(task)); } runner.shutdown(); - tikv_alloc::remove_thread_memory_accessor(); } impl Worker { diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 930185a1440..fc80e69cd84 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -181,7 +181,10 @@ impl Runner for YatpPoolRunner { if let Some(f) = self.after_start.take() { f(); } - tikv_alloc::add_thread_memory_accessor() + // SAFETY: we will call `remove_thread_memory_accessor` at `end`. + unsafe { + tikv_alloc::add_thread_memory_accessor(); + } } fn handle(&mut self, local: &mut Local, mut task_cell: Self::TaskCell) -> bool { diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 3a679cdf138..706d00176e1 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -39617,14 +39617,20 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 505 + "y": 44 }, - "id": 2696, + "hiddenSeries": false, + "id": 23763573729, "legend": { "alignAsTable": true, "avg": false, @@ -39642,7 +39648,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -39662,6 +39672,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Allocator Stats", "tooltip": { @@ -39699,6 +39710,314 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 44 + }, + "hiddenSeries": false, + "id": 23763573730, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name) - sum(rate(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"dealloc\"}[$__rate_interval])) by (thread_name) != 0", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Send Allocated(+) / Release Received(-) Bytes Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 51 + }, + "hiddenSeries": false, + "id": 2696, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": true, + "show": false, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Newly Allocated Bytes by Thread", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 51 + }, + "hiddenSeries": false, + "id": 23763573731, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": true, + "show": false, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Recently Released Bytes by Thread", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index aeee3bbcb73..60f45933556 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -306,12 +306,11 @@ impl ImportSstService { .with_sys_and_custom_hooks( move || { tikv_util::thread_group::set_properties(props.clone()); - tikv_alloc::add_thread_memory_accessor(); + set_io_type(IoType::Import); tikv_kv::set_tls_engine(eng.lock().unwrap().clone()); }, move || { - tikv_alloc::remove_thread_memory_accessor(); // SAFETY: we have set the engine at some lines above with type `E`. unsafe { tikv_kv::destroy_tls_engine::() }; }, diff --git a/src/server/debug.rs b/src/server/debug.rs index b0705c12c33..9e01852455c 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -411,18 +411,14 @@ where .name(format!("mvcc-recover-thread-{}", thread_index)) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); + info!( "thread {}: started on range [{}, {})", thread_index, log_wrappers::Value::key(&start_key), log_wrappers::Value::key(&end_key) ); - - let result = - recover_mvcc_for_range(&db, &start_key, &end_key, read_only, thread_index); - tikv_alloc::remove_thread_memory_accessor(); - result + recover_mvcc_for_range(&db, &start_key, &end_key, read_only, thread_index) }) .unwrap(); diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 66fc39596c5..4c21731c41d 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -277,7 +277,6 @@ impl DebuggerImplV2 { .name(format!("mvcc-recover-thread-{}", thread_index)) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); let mut results = vec![]; for (region, tablet) in region_group.into_iter().zip(tablets) { diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index 4f528d8c356..be18f8216d5 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -289,9 +289,8 @@ impl GcMan .name(thd_name!("gc-manager")) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); + self.run(); - tikv_alloc::remove_thread_memory_accessor(); }) .map_err(|e| box_err!("failed to start gc manager: {:?}", e)); res.map(|join_handle| GcManagerHandle { diff --git a/src/server/reset_to_version.rs b/src/server/reset_to_version.rs index a9002260ae9..59b509407bf 100644 --- a/src/server/reset_to_version.rs +++ b/src/server/reset_to_version.rs @@ -231,7 +231,6 @@ impl ResetToVersionManager { .name("reset_to_version".to_string()) .spawn_wrapper(move || { tikv_util::thread_group::set_properties(props); - tikv_alloc::add_thread_memory_accessor(); while worker .process_next_batch(BATCH_SIZE, &mut wb) @@ -245,8 +244,6 @@ impl ResetToVersionManager { {} *worker.state.lock().unwrap() = ResetToVersionState::Done; info!("Reset to version done!"); - - tikv_alloc::remove_thread_memory_accessor(); }) .expect("failed to spawn reset_to_version thread"), ); diff --git a/src/server/snap.rs b/src/server/snap.rs index 7c6c5abaa20..34b32848ad3 100644 --- a/src/server/snap.rs +++ b/src/server/snap.rs @@ -384,11 +384,8 @@ impl Runner { snap_mgr, pool: RuntimeBuilder::new_multi_thread() .thread_name(thd_name!("snap-sender")) + .with_sys_hooks() .worker_threads(DEFAULT_POOL_SIZE) - .with_sys_and_custom_hooks( - tikv_alloc::add_thread_memory_accessor, - tikv_alloc::remove_thread_memory_accessor, - ) .build() .unwrap(), raft_router: r, diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index ce53a61125f..e83339bfad5 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -816,11 +816,8 @@ impl TabletRunner { snap_mgr, pool: RuntimeBuilder::new_multi_thread() .thread_name(thd_name!("tablet-snap-sender")) + .with_sys_hooks() .worker_threads(DEFAULT_POOL_SIZE) - .with_sys_and_custom_hooks( - tikv_alloc::add_thread_memory_accessor, - tikv_alloc::remove_thread_memory_accessor, - ) .build() .unwrap(), raft_router: r, diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index abf0689f1fc..f15806e7d94 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -632,7 +632,6 @@ impl FlowChecker { Builder::new() .name(thd_name!("flow-checker")) .spawn_wrapper(move || { - tikv_alloc::add_thread_memory_accessor(); let mut checker = self; let mut deadline = std::time::Instant::now(); let mut enabled = true; @@ -661,7 +660,6 @@ impl FlowChecker { checker.on_flow_info_msg(enabled, msg); } } - tikv_alloc::remove_thread_memory_accessor(); }) .unwrap() } diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index d4590b90acc..f53512b749c 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -135,7 +135,6 @@ impl FlowInfoDispatcher { Builder::new() .name(thd_name!("flow-checker")) .spawn_wrapper(move || { - tikv_alloc::add_thread_memory_accessor(); let mut deadline = std::time::Instant::now(); let mut enabled = true; loop { @@ -245,7 +244,6 @@ impl FlowInfoDispatcher { } } } - tikv_alloc::remove_thread_memory_accessor(); }) .unwrap() } From ba7164d5fc009ee6a06e866f12c86bbbfcbe2385 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 29 Jun 2023 20:06:48 +0800 Subject: [PATCH 0763/1149] coprocessor: request key needs to encode for `tls_collect_read_flow` (#14999) close tikv/tikv#14998 the request key needs to encode. If not , the bucket's key range has no intersection for the request's key range Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/metrics.rs | 7 +++ src/coprocessor/tracker.rs | 91 +++++++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index d757ec49d62..64905b3dfba 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -203,6 +203,13 @@ pub struct CopLocalMetrics { local_read_stats: ReadStats, } +impl CopLocalMetrics { + #[cfg(test)] + pub fn local_read_stats(&self) -> &ReadStats { + &self.local_read_stats + } +} + thread_local! { pub static TLS_COP_METRICS: RefCell = RefCell::new( CopLocalMetrics { diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index ca726be9a43..005d97dd4b5 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -342,8 +342,8 @@ impl Tracker { let peer = self.req_ctx.context.get_peer(); let region_id = self.req_ctx.context.get_region_id(); - let start_key = &self.req_ctx.lower_bound; - let end_key = &self.req_ctx.upper_bound; + let start_key = Key::from_raw(&self.req_ctx.lower_bound); + let end_key = Key::from_raw(&self.req_ctx.upper_bound); let reverse_scan = if let Some(reverse_scan) = self.req_ctx.is_desc_scan { reverse_scan } else { @@ -353,14 +353,14 @@ impl Tracker { tls_collect_query( region_id, peer, - Key::from_raw(start_key).as_encoded(), - Key::from_raw(end_key).as_encoded(), + start_key.as_encoded(), + end_key.as_encoded(), reverse_scan, ); tls_collect_read_flow( self.req_ctx.context.get_region_id(), - Some(start_key), - Some(end_key), + Some(start_key.as_encoded()), + Some(end_key.as_encoded()), &total_storage_stats, self.buckets.as_ref(), ); @@ -429,3 +429,82 @@ impl Drop for Tracker { } } } + +#[cfg(test)] +mod tests { + use std::{sync::Arc, time::Duration, vec}; + + use kvproto::kvrpcpb; + use pd_client::BucketMeta; + use tikv_kv::RocksEngine; + + use super::{PerfLevel, ReqContext, ReqTag, TimeStamp, Tracker, TLS_COP_METRICS}; + use crate::storage::Statistics; + + #[test] + fn test_track() { + let mut context = kvrpcpb::Context::default(); + context.set_region_id(1); + + let mut req_ctx = ReqContext::new( + ReqTag::test, + context, + vec![], + Duration::from_secs(0), + None, + None, + TimeStamp::max(), + None, + PerfLevel::EnableCount, + ); + req_ctx.lower_bound = vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 67, + ]; + req_ctx.upper_bound = vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 167, + ]; + let mut track: Tracker = Tracker::new(req_ctx, Duration::default()); + let mut bucket = BucketMeta::default(); + bucket.region_id = 1; + bucket.version = 1; + bucket.keys = vec![ + vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 255, 179, 95, 114, 128, 0, 0, 0, 0, 255, 0, 175, 155, + 0, 0, 0, 0, 0, 250, + ], + vec![ + 116, 128, 0, 255, 255, 255, 255, 255, 255, 254, 0, 0, 0, 0, 0, 0, 0, 248, + ], + ]; + track.buckets = Some(Arc::new(bucket)); + + let mut stat = Statistics::default(); + stat.write.flow_stats.read_keys = 10; + track.total_storage_stats = stat; + + track.track(); + drop(track); + TLS_COP_METRICS.with(|m| { + assert_eq!( + 10, + m.borrow() + .local_read_stats() + .region_infos + .get(&1) + .unwrap() + .flow + .read_keys + ); + assert_eq!( + vec![10], + m.borrow() + .local_read_stats() + .region_buckets + .get(&1) + .unwrap() + .stats + .read_keys + ); + }); + } +} From c70e518b1fbc3e25eae148e1a922dc1e73740d21 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 30 Jun 2023 16:17:12 +0800 Subject: [PATCH 0764/1149] raftstore-v2: bugfix for terminated `last_report_ts`. (#15033) ref tikv/tikv#15011 This pr fixes an unexpected error which will terminate the updating of `last_report_ts` in raftstore-v2. Signed-off-by: lucasliang --- components/raftstore-v2/src/worker/pd/mod.rs | 4 +- .../raftstore-v2/src/worker/pd/slowness.rs | 10 +++- .../raftstore-v2/src/worker/pd/store.rs | 31 ++++++------ .../raftstore-v2/tests/failpoints/mod.rs | 1 + .../tests/failpoints/test_pd_heartbeat.rs | 48 +++++++++++++++++++ .../tests/integrations/cluster.rs | 19 ++++++-- 6 files changed, 91 insertions(+), 22 deletions(-) create mode 100644 components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 94222d67d2e..1f4e09a1a71 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -297,7 +297,9 @@ where fn run(&mut self, task: Task) { self.maybe_schedule_heartbeat_receiver(); match task { - Task::StoreHeartbeat { stats } => self.handle_store_heartbeat(stats), + Task::StoreHeartbeat { stats } => { + self.handle_store_heartbeat(stats, false /* is_fake_hb */) + } Task::UpdateStoreInfos { cpu_usages, read_io_rates, diff --git a/components/raftstore-v2/src/worker/pd/slowness.rs b/components/raftstore-v2/src/worker/pd/slowness.rs index dd345cbfed3..c68c12b2b03 100644 --- a/components/raftstore-v2/src/worker/pd/slowness.rs +++ b/components/raftstore-v2/src/worker/pd/slowness.rs @@ -3,6 +3,7 @@ use std::time::{Duration, Instant}; use engine_traits::{KvEngine, RaftEngine}; +use fail::fail_point; use kvproto::pdpb; use pd_client::PdClient; use raftstore::store::{metrics::*, util::RaftstoreDuration, Config}; @@ -88,8 +89,15 @@ where self.slowness_stats .slow_cause .record(100_000, Instant::now()); // 100ms + // Handle timeout if last tick is not finished as expected. - if !self.slowness_stats.last_tick_finished && self.is_store_heartbeat_delayed() { + let mock_slowness_last_tick_unfinished = || { + fail_point!("mock_slowness_last_tick_unfinished", |_| { true }); + false + }; + if mock_slowness_last_tick_unfinished() + || (!self.slowness_stats.last_tick_finished && self.is_store_heartbeat_delayed()) + { // If the last slowness tick already reached abnormal state and was delayed for // reporting by `store-heartbeat` to PD, we should report it here manually as a // FAKE `store-heartbeat`. It's an assurance that the heartbeat to diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 64ac9ac9e8d..a17957b3347 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -28,7 +28,7 @@ use crate::router::StoreMsg; const HOTSPOT_REPORT_CAPACITY: usize = 1000; -/// Max limitation of delayed store_heartbeat. +/// Max limitation of delayed store heartbeat. const STORE_HEARTBEAT_DELAY_LIMIT: u64 = Duration::from_secs(5 * 60).as_secs(); fn hotspot_key_report_threshold() -> u64 { @@ -175,7 +175,7 @@ where ER: RaftEngine, T: PdClient + 'static, { - pub fn handle_store_heartbeat(&mut self, mut stats: pdpb::StoreStats) { + pub fn handle_store_heartbeat(&mut self, mut stats: pdpb::StoreStats, is_fake_hb: bool) { let mut report_peers = HashMap::default(); for (region_id, region_peer) in &mut self.region_peers { let read_bytes = region_peer.read_bytes - region_peer.last_store_report_read_bytes; @@ -240,15 +240,14 @@ where self.store_stat .engine_last_query_num .fill_query_stats(&self.store_stat.engine_total_query_num); - self.store_stat.last_report_ts = - if self.store_stat.last_report_ts.into_inner() as u32 == stats.get_start_time() { - // The given Task::StoreHeartbeat should be a fake heartbeat to PD, we won't - // update the last_report_ts to avoid incorrectly marking current TiKV node in - // normal state. - self.store_stat.last_report_ts - } else { - UnixSecs::now() - }; + self.store_stat.last_report_ts = if is_fake_hb { + // The given Task::StoreHeartbeat should be a fake heartbeat to PD, we won't + // update the last_report_ts to avoid incorrectly marking current TiKV node in + // normal state. + self.store_stat.last_report_ts + } else { + UnixSecs::now() + }; self.store_stat.region_bytes_written.flush(); self.store_stat.region_keys_written.flush(); self.store_stat.region_bytes_read.flush(); @@ -310,14 +309,12 @@ where .set(snap_stats.receiving_count as i64); // This calling means that the current node cannot report heartbeat in normaly - // scheduler. That is, the current node must in `busy` state. Meanwhile, mark - // this fake `StoreStats.start_time` == `store_stat.last_report_ts` to reveal - // that current heartbeat is fake and used for reporting slowness forcely. - stats.set_start_time(self.store_stat.last_report_ts.into_inner() as u32); + // scheduler. That is, the current node must in `busy` state. stats.set_is_busy(true); - // We do not need to report store_info, so we just set `None` here. - self.handle_store_heartbeat(stats); + // And here, the `is_fake_hb` should be marked with `True` to represent that + // this heartbeat message is a fake one. + self.handle_store_heartbeat(stats, true); warn!(self.logger, "scheduling store_heartbeat timeout, force report store slow score to pd."; "store_id" => self.store_id, ); diff --git a/components/raftstore-v2/tests/failpoints/mod.rs b/components/raftstore-v2/tests/failpoints/mod.rs index 6148cb4eae1..43f203ca810 100644 --- a/components/raftstore-v2/tests/failpoints/mod.rs +++ b/components/raftstore-v2/tests/failpoints/mod.rs @@ -13,5 +13,6 @@ mod test_bootstrap; mod test_bucket; mod test_life; mod test_merge; +mod test_pd_heartbeat; mod test_split; mod test_trace_apply; diff --git a/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs b/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs new file mode 100644 index 00000000000..b4faa3a8f13 --- /dev/null +++ b/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs @@ -0,0 +1,48 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use futures::executor::block_on; +use pd_client::PdClient; +use raftstore_v2::router::{StoreMsg, StoreTick}; +use tikv_util::config::ReadableDuration; + +use crate::cluster::{v2_default_config, Cluster}; + +#[test] +fn test_fake_store_heartbeat() { + fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); + + let cluster = Cluster::with_config_and_extra_setting(v2_default_config(), |config| { + config.pd_store_heartbeat_tick_interval = ReadableDuration::millis(10); + config.inspect_interval = ReadableDuration::millis(10); + }); + let store_id = cluster.node(0).id(); + let router = &cluster.routers[0]; + // Report store heartbeat to pd. + router + .store_router() + .send_control(StoreMsg::Tick(StoreTick::PdStoreHeartbeat)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + let prev_stats = block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); + if prev_stats.get_start_time() > 0 { + assert_ne!(prev_stats.get_capacity(), 0); + assert_ne!(prev_stats.get_used_size(), 0); + assert_eq!(prev_stats.get_keys_written(), 0); + } + // Inject failpoints to trigger reporting fake store heartbeat to pd. + fail::cfg("mock_slowness_last_tick_unfinished", "return(0)").unwrap(); + std::thread::sleep(std::time::Duration::from_millis(50)); + let after_stats = + block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); + assert_ne!(after_stats.get_capacity(), 0); + assert_ne!(after_stats.get_used_size(), 0); + assert_eq!(after_stats.get_keys_written(), 0); + if after_stats.get_start_time() == 0 { + assert!(after_stats.get_is_busy()); + } else { + assert!(!after_stats.get_is_busy()); + } + + fail::remove("mock_slowness_last_tick_unfinished"); + fail::remove("mock_collect_tick_interval"); +} diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 0849a4cf96f..f0669b03f9a 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -536,15 +536,27 @@ impl Cluster { Cluster::with_node_count(1, Some(config)) } + pub fn with_config_and_extra_setting( + config: Config, + extra_setting: impl FnMut(&mut Config), + ) -> Cluster { + Cluster::with_configs(1, Some(config), None, extra_setting) + } + pub fn with_node_count(count: usize, config: Option) -> Self { - Cluster::with_configs(count, config, None) + Cluster::with_configs(count, config, None, |_| {}) } pub fn with_cop_cfg(config: Option, coprocessor_cfg: CopConfig) -> Cluster { - Cluster::with_configs(1, config, Some(coprocessor_cfg)) + Cluster::with_configs(1, config, Some(coprocessor_cfg), |_| {}) } - pub fn with_configs(count: usize, config: Option, cop_cfg: Option) -> Self { + pub fn with_configs( + count: usize, + config: Option, + cop_cfg: Option, + mut extra_setting: impl FnMut(&mut Config), + ) -> Self { let pd_server = test_pd::Server::new(1); let logger = slog_global::borrow_global().new(o!()); let mut cluster = Cluster { @@ -560,6 +572,7 @@ impl Cluster { v2_default_config() }; disable_all_auto_ticks(&mut cfg); + extra_setting(&mut cfg); let cop_cfg = cop_cfg.unwrap_or_default(); for _ in 1..=count { let mut node = TestNode::with_pd(&cluster.pd_server, cluster.logger.clone()); From 425f6f262f577425612cf1663ea6cceada72c724 Mon Sep 17 00:00:00 2001 From: Yexiang Zhang Date: Fri, 30 Jun 2023 18:03:42 +0800 Subject: [PATCH 0765/1149] diagnostic: fix index out of bounds error in sysinfo (#15007) close tikv/tikv#15006 fix index out of bounds error in sysinfo. * update sysinfo version to the personal branch Signed-off-by: mornyx Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 5 ++--- Cargo.toml | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 898ad6a60d9..bdefdd742ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6010,9 +6010,8 @@ checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" [[package]] name = "sysinfo" -version = "0.26.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade661fa5e048ada64ad7901713301c21d2dbc5b65ee7967de8826c111452960" +version = "0.26.9" +source = "git+https://github.com/tikv/sysinfo?branch=0.26-fix-cpu#5a1bcf08816979624ef2ad79cfb896de432a9501" dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", diff --git a/Cargo.toml b/Cargo.toml index 7dfc05261e7..0a33a3f0351 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -193,6 +193,8 @@ fs2 = { git = "https://github.com/tabokie/fs2-rs", branch = "tikv" } # Remove this when a new version is release. We need to solve rust-lang/cmake-rs#143. cmake = { git = "https://github.com/rust-lang/cmake-rs" } +sysinfo ={ git = "https://github.com/tikv/sysinfo", branch = "0.26-fix-cpu" } + [target.'cfg(target_os = "linux")'.dependencies] procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to From f7a69f548f49750721bc05792ac5198e7ba8c91e Mon Sep 17 00:00:00 2001 From: wuhuizuo Date: Mon, 3 Jul 2023 13:54:43 +0800 Subject: [PATCH 0766/1149] ci: add codecov.yaml file (#14994) ref pingcap-qe/ci#2171, close tikv/tikv#14995 ci: add codecov.yaml file Signed-off-by: wuhuizuo --- codecov.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000000..1685e981dd6 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,32 @@ +coverage: + precision: 4 + round: down + range: "65...90" + + status: + project: + default: + target: auto + threshold: 3% # Allow the coverage to drop by threshold %, and posting a success status. + patch: + default: + target: auto + threshold: 3% + +comment: + layout: "header, diff, flags" + behavior: default + require_changes: false + +flag_management: + default_rules: # the rules that will be followed for any flag added, generally + carryforward: true + statuses: + - type: project + target: 85% + - type: patch + target: 85% + +ignore: + - tests/** # integration test cases or tools. + - fuzz/** # fuzz test cases or tools. From 13659574cb56010c0398bd518ee3d66acdea33a8 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Tue, 4 Jul 2023 00:50:12 +0800 Subject: [PATCH 0767/1149] pd: reduce frequent redundant log output (#15042) close tikv/tikv#15041 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/worker/pd/misc.rs | 15 +++++++++++---- components/raftstore/src/store/worker/pd.rs | 14 ++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/components/raftstore-v2/src/worker/pd/misc.rs b/components/raftstore-v2/src/worker/pd/misc.rs index 68c624b089a..6ade8d87de5 100644 --- a/components/raftstore-v2/src/worker/pd/misc.rs +++ b/components/raftstore-v2/src/worker/pd/misc.rs @@ -33,6 +33,8 @@ where let causal_ts_provider = self.causal_ts_provider.clone(); let logger = self.logger.clone(); let shutdown = self.shutdown.clone(); + let log_interval = Duration::from_secs(5); + let mut last_log_ts = Instant::now().checked_sub(log_interval).unwrap(); let f = async move { let mut success = false; @@ -73,10 +75,15 @@ where break; } Err(e) => { - warn!( - logger, - "failed to update max timestamp for region {}: {:?}", region_id, e - ); + if last_log_ts.elapsed() > log_interval { + warn!( + logger, + "failed to update max timestamp for region"; + "region_id" => region_id, + "error" => ?e + ); + last_log_ts = Instant::now(); + } } } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 3ee47b2e8a5..a16003c9bcf 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1754,6 +1754,8 @@ where let pd_client = self.pd_client.clone(); let concurrency_manager = self.concurrency_manager.clone(); let causal_ts_provider = self.causal_ts_provider.clone(); + let log_interval = Duration::from_secs(5); + let mut last_log_ts = Instant::now().checked_sub(log_interval).unwrap(); let f = async move { let mut success = false; @@ -1792,10 +1794,14 @@ where break; } Err(e) => { - warn!( - "failed to update max timestamp for region {}: {:?}", - region_id, e - ); + if last_log_ts.elapsed() > log_interval { + warn!( + "failed to update max timestamp for region"; + "region_id" => region_id, + "error" => ?e + ); + last_log_ts = Instant::now(); + } } } } From b679077cc1b53c5873246b3276e49530ff2bf4f2 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 4 Jul 2023 01:03:42 +0800 Subject: [PATCH 0768/1149] resource_control: wrap resource limiter in future (#14931) ref tikv/tikv#14900 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 + components/file_system/src/io_stats/mod.rs | 4 + components/file_system/src/io_stats/proc.rs | 7 + components/file_system/src/lib.rs | 17 +- components/resource_control/Cargo.toml | 4 +- components/resource_control/src/future.rs | 261 +++++++++++++++++- components/resource_control/src/lib.rs | 1 + .../resource_control/src/resource_limiter.rs | 14 +- 8 files changed, 301 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bdefdd742ae..13d4f1726e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4981,6 +4981,7 @@ dependencies = [ "crossbeam-skiplist", "dashmap", "fail", + "file_system", "futures 0.3.15", "kvproto", "lazy_static", @@ -4997,6 +4998,7 @@ dependencies = [ "test_pd", "test_pd_client", "tikv_util", + "tokio-timer", "yatp", ] diff --git a/components/file_system/src/io_stats/mod.rs b/components/file_system/src/io_stats/mod.rs index e4c0017451f..216c6ad7897 100644 --- a/components/file_system/src/io_stats/mod.rs +++ b/components/file_system/src/io_stats/mod.rs @@ -29,6 +29,10 @@ mod stub { pub fn fetch_io_bytes() -> [IoBytes; IoType::COUNT] { Default::default() } + + pub fn get_thread_io_bytes_total() -> Result { + Err("unimplemented".into()) + } } #[cfg(not(any(target_os = "linux", feature = "bcc-iosnoop")))] pub use stub::*; diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index fca0f6a64b1..d66a04aa4e8 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -180,6 +180,13 @@ pub fn fetch_io_bytes() -> [IoBytes; IoType::COUNT] { bytes } +pub fn get_thread_io_bytes_total() -> Result { + match LOCAL_IO_STATS.get() { + Some(s) => s.lock().id.fetch_io_bytes(), + None => Err("thread local io stats is None".into()), + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 91e0a35da80..7bf1a45dcb1 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -34,7 +34,9 @@ use std::{ }; pub use file::{File, OpenOptions}; -pub use io_stats::{get_io_type, init as init_io_stats_collector, set_io_type}; +pub use io_stats::{ + get_io_type, get_thread_io_bytes_total, init as init_io_stats_collector, set_io_type, +}; pub use metrics_manager::{BytesFetcher, MetricsManager}; use online_config::ConfigValue; use openssl::{ @@ -113,10 +115,10 @@ impl Drop for WithIoType { } #[repr(C)] -#[derive(Debug, Copy, Clone, Default)] +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] pub struct IoBytes { - read: u64, - write: u64, + pub read: u64, + pub write: u64, } impl std::ops::Sub for IoBytes { @@ -130,6 +132,13 @@ impl std::ops::Sub for IoBytes { } } +impl std::ops::AddAssign for IoBytes { + fn add_assign(&mut self, rhs: Self) { + self.read += rhs.read; + self.write += rhs.write; + } +} + #[repr(u32)] #[derive(Debug, Clone, PartialEq, Copy, EnumCount)] pub enum IoPriority { diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index ec13d9cdbdb..1ddb633ab15 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -14,7 +14,8 @@ crossbeam = "0.8" crossbeam-skiplist = "0.1" dashmap = "5.1" fail = "0.5" -futures = { version = "0.3" } +file_system = { workspace = true } +futures = { version = "0.3", features = ["compat"] } kvproto = { workspace = true } lazy_static = "1.0" online_config = { workspace = true } @@ -29,6 +30,7 @@ slog-global = { workspace = true } test_pd = { workspace = true } test_pd_client = { workspace = true } tikv_util = { workspace = true } +tokio-timer = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } [dev-dependencies] diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 8027a27b394..79e976c2699 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -5,12 +5,21 @@ use std::{ pin::Pin, sync::Arc, task::{Context, Poll}, + time::Duration, }; +use file_system::IoBytes; +use futures::compat::{Compat01As03, Future01CompatExt}; use pin_project::pin_project; -use tikv_util::time::Instant; +use tikv_util::{time::Instant, timer::GLOBAL_TIMER_HANDLE, warn}; +use tokio_timer::Delay; -use crate::resource_group::{ResourceConsumeType, ResourceController}; +use crate::{ + resource_group::{ResourceConsumeType, ResourceController}, + resource_limiter::ResourceLimiter, +}; + +const MAX_WAIT_DURATION: Duration = Duration::from_secs(10); #[pin_project] pub struct ControlledFuture { @@ -44,3 +53,251 @@ impl Future for ControlledFuture { res } } + +#[cfg(not(test))] +fn get_thread_io_bytes_stats() -> Result { + file_system::get_thread_io_bytes_total() +} + +#[cfg(test)] +fn get_thread_io_bytes_stats() -> Result { + use std::cell::Cell; + + fail::fail_point!("failed_to_get_thread_io_bytes_stats", |_| { + Err("get_thread_io_bytes_total failed".into()) + }); + thread_local! { + static TOTAL_BYTES: Cell = Cell::new(IoBytes::default()); + } + + let mut new_bytes = TOTAL_BYTES.get(); + new_bytes.read += 100; + new_bytes.write += 50; + TOTAL_BYTES.set(new_bytes); + Ok(new_bytes) +} + +// `LimitedFuture` wraps a Future with ResourceLimiter, it will automically +// statistics the cpu time and io bytes consumed by the future, and do async +// waiting according the configuration of the ResourceLimiter. +#[pin_project] +pub struct LimitedFuture { + #[pin] + f: F, + // `pre_delay` and `post_delay` is used to delay this task, at any time, at most one of the two + // is valid. A future can only be polled once in one round, so we uses two field here to + // workaround this restriction of the rust compiler. + #[pin] + pre_delay: OptionalFuture>, + #[pin] + post_delay: OptionalFuture>, + resource_limiter: Arc, + res: Poll, +} + +impl LimitedFuture { + #[allow(dead_code)] + pub fn new(f: F, resource_limiter: Arc) -> Self { + Self { + f, + pre_delay: None.into(), + post_delay: None.into(), + resource_limiter, + res: Poll::Pending, + } + } +} + +impl Future for LimitedFuture { + type Output = F::Output; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut this = self.project(); + if !this.post_delay.is_done() { + assert!(this.pre_delay.is_done()); + std::mem::swap(&mut *this.pre_delay, &mut *this.post_delay); + } + if !this.pre_delay.is_done() { + let res = this.pre_delay.poll(cx); + if res.is_pending() { + return Poll::Pending; + } + } + if this.res.is_ready() { + return std::mem::replace(this.res, Poll::Pending); + } + let last_io_bytes = match get_thread_io_bytes_stats() { + Ok(b) => Some(b), + Err(e) => { + warn!("load thread io bytes failed"; "err" => e); + None + } + }; + let start = Instant::now(); + let res = this.f.poll(cx); + let dur = start.saturating_elapsed(); + let io_bytes = if let Some(last_io_bytes) = last_io_bytes { + match get_thread_io_bytes_stats() { + Ok(io_bytes) => { + let delta = io_bytes - last_io_bytes; + delta.read + delta.write + } + Err(e) => { + warn!("load thread io bytes failed"; "err" => e); + 0 + } + } + } else { + 0 + }; + let mut wait_dur = this.resource_limiter.consume(dur, io_bytes); + if wait_dur == Duration::ZERO { + return res; + } + if wait_dur > MAX_WAIT_DURATION { + warn!("limiter future wait too long"; "wait" => ?wait_dur, "io" => io_bytes, "cpu" => ?dur); + wait_dur = MAX_WAIT_DURATION; + } + *this.post_delay = Some( + GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + wait_dur) + .compat(), + ) + .into(); + if this.post_delay.poll(cx).is_ready() { + return res; + } + *this.res = res; + Poll::Pending + } +} + +/// `OptionalFuture` is similar to futures::OptionFuture, but provide an extra +/// `is_done` method. +#[pin_project] +struct OptionalFuture { + #[pin] + f: Option, + done: bool, +} + +impl OptionalFuture { + fn new(f: Option) -> Self { + let done = f.is_none(); + Self { f, done } + } + + fn is_done(&self) -> bool { + self.done + } +} + +impl From> for OptionalFuture { + fn from(f: Option) -> Self { + Self::new(f) + } +} + +impl Future for OptionalFuture { + type Output = Option; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + match this.f.as_pin_mut() { + Some(x) => x.poll(cx).map(|r| { + *this.done = true; + Some(r) + }), + None => Poll::Ready(None), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::mpsc::{channel, Sender}; + + use tikv_util::yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}; + + use super::*; + use crate::resource_limiter::GroupStatistics; + + #[pin_project] + struct NotifyFuture { + #[pin] + f: F, + sender: Sender<()>, + } + + impl NotifyFuture { + fn new(f: F, sender: Sender<()>) -> Self { + Self { f, sender } + } + } + + impl Future for NotifyFuture { + type Output = F::Output; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + this.f.poll(cx).map(|r| { + this.sender.send(()).unwrap(); + r + }) + } + } + + async fn empty() {} + + #[test] + fn test_limited_future() { + let pool = YatpPoolBuilder::new(DefaultTicker::default()) + .thread_count(1, 1, 1) + .name_prefix("test") + .build_future_pool(); + + let resource_limiter = Arc::new(ResourceLimiter::new(f64::INFINITY, 1000.0)); + + fn spawn_and_wait(pool: &FuturePool, f: F, limiter: Arc) + where + F: Future + Send + 'static, + ::Output: Send, + { + let (sender, receiver) = channel::<()>(); + let fut = NotifyFuture::new(LimitedFuture::new(f, limiter), sender); + pool.spawn(fut).unwrap(); + receiver.recv().unwrap(); + } + + let mut i = 0; + let mut stats: GroupStatistics; + // consume the remain free limit quota. + loop { + i += 1; + spawn_and_wait(&pool, empty(), resource_limiter.clone()); + stats = resource_limiter.io_limiter.get_statistics(); + assert_eq!(stats.total_consumed, i * 150); + if stats.total_wait_dur_us > 0 { + break; + } + } + + let start = Instant::now(); + spawn_and_wait(&pool, empty(), resource_limiter.clone()); + let new_stats = resource_limiter.io_limiter.get_statistics(); + let delta = new_stats - stats; + let dur = start.saturating_elapsed(); + assert_eq!(delta.total_consumed, 150); + assert_eq!(delta.total_wait_dur_us, 150_000); + assert!(dur >= Duration::from_millis(150) && dur <= Duration::from_millis(160)); + + // fetch io bytes failed, consumed value is 0. + #[cfg(feature = "failpoints")] + { + fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); + spawn_and_wait(&pool, empty(), resource_limiter.clone()); + assert_eq!(resource_limiter.io_limiter.get_statistics(), new_stats); + fail::remove("failed_to_get_thread_io_bytes_stats"); + } + } +} diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index fdb331a7fa4..2fbb25c3394 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -1,5 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. #![feature(test)] +#![feature(local_key_cell_methods)] use online_config::OnlineConfig; use serde::{Deserialize, Serialize}; diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 696dc8ded6d..1c41ea79893 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -61,7 +61,7 @@ impl QuotaLimiter { } #[allow(dead_code)] - fn get_statistics(&self) -> GroupStatistics { + pub fn get_statistics(&self) -> GroupStatistics { GroupStatistics { total_consumed: self.limiter.total_bytes_consumed() as u64, total_wait_dur_us: self.total_wait_dur_us.load(Ordering::Relaxed), @@ -82,8 +82,18 @@ impl QuotaLimiter { } } -#[derive(Default, Clone, Copy, Debug)] +#[derive(Default, Clone, PartialEq, Eq, Copy, Debug)] pub struct GroupStatistics { pub total_consumed: u64, pub total_wait_dur_us: u64, } + +impl std::ops::Sub for GroupStatistics { + type Output = Self; + fn sub(self, rhs: Self) -> Self::Output { + Self { + total_consumed: self.total_consumed.saturating_sub(rhs.total_consumed), + total_wait_dur_us: self.total_wait_dur_us.saturating_sub(rhs.total_wait_dur_us), + } + } +} From 807297a8805d1cccef5bcb8adb3750016e1abf13 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 4 Jul 2023 13:41:14 +0800 Subject: [PATCH 0769/1149] tests: make test_flashback_with_in_memory_pessimistic_locks stable (#14986) ref tikv/tikv#13303, ref pingcap/tidb#44292 Make `test_flashback_with_in_memory_pessimistic_locks` stable. Signed-off-by: JmPotato --- .../integrations/raftstore/test_flashback.rs | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 515691000d2..5a28646db65 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -65,10 +65,15 @@ fn test_flashback_with_in_memory_pessimistic_locks() { }, ); let txn_ext = snapshot.txn_ext.unwrap(); - let pessimistic_locks = txn_ext.pessimistic_locks.read(); - assert!(!pessimistic_locks.is_writable()); - assert_eq!(pessimistic_locks.status, LocksStatus::IsInFlashback); - assert_eq!(pessimistic_locks.len(), 0); + eventually_meet( + Box::new(move || { + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + !pessimistic_locks.is_writable() + && pessimistic_locks.status == LocksStatus::IsInFlashback + && pessimistic_locks.is_empty() + }), + "pessimistic locks status should be LocksStatus::IsInFlashback", + ); } // Finish flashback. cluster.must_send_wait_flashback_msg(region.get_id(), AdminCmdType::FinishFlashback); @@ -76,10 +81,24 @@ fn test_flashback_with_in_memory_pessimistic_locks() { { let snapshot = cluster.must_get_snapshot_of_region(region.get_id()); let txn_ext = snapshot.txn_ext.unwrap(); - let pessimistic_locks = txn_ext.pessimistic_locks.read(); - assert!(pessimistic_locks.is_writable()); - assert_eq!(pessimistic_locks.len(), 0); + eventually_meet( + Box::new(move || { + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + pessimistic_locks.is_writable() && pessimistic_locks.is_empty() + }), + "pessimistic locks should be writable again", + ); + } +} + +fn eventually_meet(condition: Box bool>, purpose: &str) { + for _ in 0..30 { + if condition() { + return; + } + sleep(Duration::from_millis(100)); } + panic!("condition never meet: {}", purpose); } #[test_case(test_raftstore::new_node_cluster)] From b384b851b249d818aa388ae11998afda9036ac39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 4 Jul 2023 14:45:43 +0800 Subject: [PATCH 0770/1149] log-backup: cache files in memory instead of in local disk (#14672) close tikv/tikv#14628 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 13 +- components/backup-stream/src/lib.rs | 1 + components/backup-stream/src/metrics.rs | 22 + components/backup-stream/src/router.rs | 321 ++++--- components/backup-stream/src/tempfiles.rs | 1006 +++++++++++++++++++++ components/backup-stream/src/utils.rs | 47 +- components/backup-stream/tests/suite.rs | 43 +- src/config/mod.rs | 17 +- tests/integrations/config/mod.rs | 1 + 9 files changed, 1283 insertions(+), 188 deletions(-) create mode 100644 components/backup-stream/src/tempfiles.rs diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 6f22cc67997..ed8905423bc 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1,8 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - collections::HashSet, fmt, marker::PhantomData, path::PathBuf, sync::Arc, time::Duration, -}; +use std::{collections::HashSet, fmt, marker::PhantomData, sync::Arc, time::Duration}; use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; @@ -50,7 +48,7 @@ use crate::{ metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, metrics::{self, TaskStatus}, observer::BackupStreamObserver, - router::{ApplyEvents, Router, TaskSelector}, + router::{self, ApplyEvents, Router, TaskSelector}, subscription_manager::{RegionSubscriptionManager, ResolvedRegions}, subscription_track::{Ref, RefMut, ResolveResult, SubscriptionTracer}, try_send, @@ -118,12 +116,7 @@ where .expect("failed to create tokio runtime for backup stream worker."); let meta_client = MetadataClient::new(store, store_id); - let range_router = Router::new( - PathBuf::from(config.temp_path.clone()), - scheduler.clone(), - config.file_size_limit.0, - config.max_flush_interval.0, - ); + let range_router = Router::new(scheduler.clone(), router::Config::from(config.clone())); // spawn a worker to watch task changes from etcd periodically. let meta_client_clone = meta_client.clone(); diff --git a/components/backup-stream/src/lib.rs b/components/backup-stream/src/lib.rs index ac7ab1f718f..3d4690d7f48 100644 --- a/components/backup-stream/src/lib.rs +++ b/components/backup-stream/src/lib.rs @@ -16,6 +16,7 @@ pub mod router; mod service; mod subscription_manager; mod subscription_track; +mod tempfiles; // Publish it for integration test. // Perhaps we'd better move some of then into `tikv_util`. pub mod utils; diff --git a/components/backup-stream/src/metrics.rs b/components/backup-stream/src/metrics.rs index 225d583ca5c..3a2fc1d119d 100644 --- a/components/backup-stream/src/metrics.rs +++ b/components/backup-stream/src/metrics.rs @@ -169,6 +169,28 @@ lazy_static! { exponential_buckets(0.001, 2.0, 16).unwrap() ) .unwrap(); + pub static ref TEMP_FILE_MEMORY_USAGE: IntGauge = register_int_gauge!( + "tikv_log_backup_temp_file_memory_usage", + "The total memory usage of temporary files.", + ) + .unwrap(); + pub static ref TEMP_FILE_COUNT: IntGauge = register_int_gauge!( + "tikv_log_backup_temp_file_count", + "The number of temporary files." + ) + .unwrap(); + pub static ref TEMP_FILE_SWAP_OUT_BYTES: IntCounter = register_int_counter!( + "tikv_log_backup_temp_file_swap_out_bytes", + "The number of total bytes being swapped out to disk." + ) + .unwrap(); + pub static ref IN_DISK_TEMP_FILE_SIZE: Histogram = register_histogram!( + "tikv_log_backup_in_disk_temp_file_size", + "The histogram of the size of the temp files get swapped out in bytes.", + // The default minimal size of a file being able to be swapped out is 1M. + exponential_buckets((1024 * 1024) as f64, 2.0, 8).unwrap() + ).unwrap(); + } make_static_metric! { diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 4b1022e7b39..ff3254fa091 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -1,11 +1,9 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use core::pin::Pin; use std::{ borrow::Borrow, collections::HashMap, fmt::Display, - io, path::{Path, PathBuf}, result, sync::{ @@ -35,6 +33,7 @@ use tikv::config::BackupStreamConfig; use tikv_util::{ box_err, codec::stream_event::EventEncoder, + config::ReadableSize, error, info, time::{Instant, Limiter}, warn, @@ -42,7 +41,6 @@ use tikv_util::{ Either, HandyRwLock, }; use tokio::{ - fs::{remove_file, File}, io::AsyncWriteExt, sync::{Mutex, RwLock}, }; @@ -57,6 +55,7 @@ use crate::{ metadata::StreamTask, metrics::{HANDLE_KV_HISTOGRAM, SKIP_KV_COUNTER}, subscription_track::TwoPhaseResolver, + tempfiles::{self, TempFilePool}, try_send, utils::{self, CompressionWriter, FilesReader, SegmentMap, SlotMap, StopWatch}, }; @@ -294,20 +293,32 @@ impl ApplyEvent { #[derive(Debug, Clone)] pub struct Router(Arc); -impl Router { - /// Create a new router with the temporary folder. - pub fn new( - prefix: PathBuf, - scheduler: Scheduler, - temp_file_size_limit: u64, - max_flush_interval: Duration, - ) -> Self { - Self(Arc::new(RouterInner::new( +pub struct Config { + pub prefix: PathBuf, + pub temp_file_size_limit: u64, + pub temp_file_memory_quota: u64, + pub max_flush_interval: Duration, +} + +impl From for Config { + fn from(value: tikv::config::BackupStreamConfig) -> Self { + let prefix = PathBuf::from(value.temp_path); + let temp_file_size_limit = value.file_size_limit.0; + let temp_file_memory_quota = value.temp_file_memory_quota.0; + let max_flush_interval = value.max_flush_interval.0; + Self { prefix, - scheduler, temp_file_size_limit, + temp_file_memory_quota, max_flush_interval, - ))) + } + } +} + +impl Router { + /// Create a new router with the temporary folder. + pub fn new(scheduler: Scheduler, config: Config) -> Self { + Self(Arc::new(RouterInner::new(scheduler, config))) } } @@ -343,6 +354,7 @@ pub struct RouterInner { scheduler: Scheduler, /// The size limit of temporary file per task. temp_file_size_limit: AtomicU64, + temp_file_memory_quota: AtomicU64, /// The max duration the local data can be pending. max_flush_interval: SyncRwLock, } @@ -358,19 +370,15 @@ impl std::fmt::Debug for RouterInner { } impl RouterInner { - pub fn new( - prefix: PathBuf, - scheduler: Scheduler, - temp_file_size_limit: u64, - max_flush_interval: Duration, - ) -> Self { + pub fn new(scheduler: Scheduler, config: Config) -> Self { RouterInner { ranges: SyncRwLock::new(SegmentMap::default()), tasks: Mutex::new(HashMap::default()), - prefix, + prefix: config.prefix, scheduler, - temp_file_size_limit: AtomicU64::new(temp_file_size_limit), - max_flush_interval: SyncRwLock::new(max_flush_interval), + temp_file_size_limit: AtomicU64::new(config.temp_file_size_limit), + temp_file_memory_quota: AtomicU64::new(config.temp_file_memory_quota), + max_flush_interval: SyncRwLock::new(config.max_flush_interval), } } @@ -378,6 +386,15 @@ impl RouterInner { *self.max_flush_interval.write().unwrap() = config.max_flush_interval.0; self.temp_file_size_limit .store(config.file_size_limit.0, Ordering::SeqCst); + self.temp_file_memory_quota + .store(config.temp_file_memory_quota.0, Ordering::SeqCst); + let tasks = self.tasks.blocking_lock(); + for task in tasks.values() { + task.temp_file_pool + .config() + .cache_size + .store(config.temp_file_memory_quota.0 as usize, Ordering::SeqCst); + } } /// Find the task for a region. If `end_key` is empty, search from start_key @@ -425,23 +442,16 @@ impl RouterInner { // register task info ans range info to router pub async fn register_task( &self, - mut task: StreamTask, + task: StreamTask, ranges: Vec<(Vec, Vec)>, merged_file_size_limit: u64, ) -> Result<()> { - let compression_type = task.info.get_compression_type(); - let task_name = task.info.take_name(); + let task_name = task.info.get_name().to_owned(); // register task info - let prefix_path = self.prefix.join(&task_name); - let stream_task = StreamTaskInfo::new( - prefix_path, - task, - ranges.clone(), - merged_file_size_limit, - compression_type, - ) - .await?; + let cfg = self.tempfile_config_for_task(&task); + let stream_task = + StreamTaskInfo::new(task, ranges.clone(), merged_file_size_limit, cfg).await?; self.tasks .lock() .await @@ -453,6 +463,22 @@ impl RouterInner { Ok(()) } + fn tempfile_config_for_task(&self, task: &StreamTask) -> tempfiles::Config { + // Note: the scope of this config is per-task. That means, when there are + // multi tasks, we may need to share the pool over tasks, or at least share the + // quota between tasks -- but not for now. We don't support that. + tempfiles::Config { + // Note: will it be more effective to directly sharing the same atomic value? + cache_size: AtomicUsize::new( + self.temp_file_memory_quota.load(Ordering::SeqCst) as usize + ), + swap_files: self.prefix.join(task.info.get_name()), + content_compression: task.info.get_compression_type(), + minimal_swap_out_file_size: ReadableSize::mb(1).0 as _, + write_buffer_size: ReadableSize::kb(4).0 as _, + } + } + pub async fn unregister_task(&self, task_name: &str) -> Option { self.tasks.lock().await.remove(task_name).map(|t| { info!( @@ -763,8 +789,6 @@ pub struct StreamTaskInfo { pub(crate) storage: Arc, /// The listening range of the task. ranges: Vec<(Vec, Vec)>, - /// The parent directory of temporary files. - temp_dir: PathBuf, /// The temporary file index. Both meta (m prefixed keys) and data (t /// prefixed keys). files: SlotMap, @@ -791,37 +815,14 @@ pub struct StreamTaskInfo { global_checkpoint_ts: AtomicU64, /// The size limit of the merged file for this task. merged_file_size_limit: u64, - /// The compression type for this task. - compression_type: CompressionType, -} - -impl Drop for StreamTaskInfo { - fn drop(&mut self) { - let (success, failed): (Vec<_>, Vec<_>) = self - .flushing_files - .get_mut() - .drain(..) - .chain(self.flushing_meta_files.get_mut().drain(..)) - .map(|(_, f, _)| f.local_path) - .map(std::fs::remove_file) - .partition(|r| r.is_ok()); - info!("stream task info dropped[1/2], removing flushing_temp files"; "success" => %success.len(), "failure" => %failed.len()); - let (success, failed): (Vec<_>, Vec<_>) = self - .files - .get_mut() - .drain() - .map(|(_, f)| f.into_inner().local_path) - .map(std::fs::remove_file) - .partition(|r| r.is_ok()); - info!("stream task info dropped[2/2], removing temp files"; "success" => %success.len(), "failure" => %failed.len()); - } + /// The pool for holding the temporary files. + temp_file_pool: Arc, } impl std::fmt::Debug for StreamTaskInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StreamTaskInfo") .field("task", &self.task.info.name) - .field("temp_dir", &self.temp_dir) .field("min_resolved_ts", &self.min_resolved_ts) .field("total_size", &self.total_size) .field("flushing", &self.flushing) @@ -832,13 +833,13 @@ impl std::fmt::Debug for StreamTaskInfo { impl StreamTaskInfo { /// Create a new temporary file set at the `temp_dir`. pub async fn new( - temp_dir: PathBuf, task: StreamTask, ranges: Vec<(Vec, Vec)>, merged_file_size_limit: u64, - compression_type: CompressionType, + temp_pool_cfg: tempfiles::Config, ) -> Result { - tokio::fs::create_dir_all(&temp_dir).await?; + let temp_dir = &temp_pool_cfg.swap_files; + tokio::fs::create_dir_all(temp_dir).await?; let storage = Arc::from(create_storage( task.info.get_storage(), BackendConfig::default(), @@ -847,7 +848,6 @@ impl StreamTaskInfo { Ok(Self { task, storage, - temp_dir, ranges, min_resolved_ts: TimeStamp::max(), files: SlotMap::default(), @@ -859,7 +859,7 @@ impl StreamTaskInfo { flush_fail_count: AtomicUsize::new(0), global_checkpoint_ts: AtomicU64::new(start_ts), merged_file_size_limit, - compression_type, + temp_file_pool: Arc::new(TempFilePool::new(temp_pool_cfg)?), }) } @@ -878,8 +878,8 @@ impl StreamTaskInfo { // copying. #[allow(clippy::map_entry)] if !w.contains_key(&key) { - let path = self.temp_dir.join(key.temp_file_name()); - let val = Mutex::new(DataFile::new(path, self.compression_type).await?); + let path = key.temp_file_name(); + let val = Mutex::new(DataFile::new(path, &self.temp_file_pool).await?); w.insert(key, val); } @@ -923,7 +923,7 @@ impl StreamTaskInfo { futures::future::join_all( w.iter_mut() .chain(wm.iter_mut()) - .map(|(_, f, _)| async move { f.inner.as_mut().done().await }), + .map(|(_, f, _)| f.inner.done()), ) .await .into_iter() @@ -993,30 +993,29 @@ impl StreamTaskInfo { pub async fn clear_flushing_files(&self) { for (_, data_file, _) in self.flushing_files.write().await.drain(..) { - debug!("removing data file"; "size" => %data_file.file_size, "name" => %data_file.local_path.display()); + debug!("removing data file"; "size" => %data_file.file_size, "name" => %data_file.inner.path().display()); self.total_size .fetch_sub(data_file.file_size, Ordering::SeqCst); - if let Err(e) = data_file.remove_temp_file().await { - // if remove template failed, just skip it. - info!("remove template file"; "err" => ?e); + if !self.temp_file_pool.remove(data_file.inner.path()) { + warn!("Trying to remove file not exists."; "file" => %data_file.inner.path().display()); } } for (_, data_file, _) in self.flushing_meta_files.write().await.drain(..) { - debug!("removing meta data file"; "size" => %data_file.file_size, "name" => %data_file.local_path.display()); + debug!("removing meta data file"; "size" => %data_file.file_size, "name" => %data_file.inner.path().display()); self.total_size .fetch_sub(data_file.file_size, Ordering::SeqCst); - if let Err(e) = data_file.remove_temp_file().await { - // if remove template failed, just skip it. - info!("remove template file"; "err" => ?e); + if !self.temp_file_pool.remove(data_file.inner.path()) { + warn!("Trying to remove file not exists."; "file" => %data_file.inner.path().display()); } } } async fn merge_and_flush_log_files_to( storage: Arc, - files: &[(TempFileKey, DataFile, DataFileInfo)], + files: &mut [(TempFileKey, DataFile, DataFileInfo)], metadata: &mut MetadataInfo, is_meta: bool, + shared_pool: Arc, ) -> Result<()> { let mut data_files_open = Vec::new(); let mut data_file_infos = Vec::new(); @@ -1031,8 +1030,8 @@ impl StreamTaskInfo { // and push it into merged_file_info(DataFileGroup). file_info_clone.set_range_offset(stat_length); data_files_open.push({ - let file = File::open(data_file.local_path.clone()).await?; - let compress_length = file.metadata().await?.len(); + let file = shared_pool.open_raw_for_read(data_file.inner.path())?; + let compress_length = file.len().await?; stat_length += compress_length; file_info_clone.set_range_length(compress_length); file @@ -1107,19 +1106,20 @@ impl StreamTaskInfo { files_lock: &RwLock>, is_meta: bool, ) -> Result<()> { - let files = files_lock.write().await; + let mut files = files_lock.write().await; let mut batch_size = 0; // file[batch_begin_index, i) is a batch let mut batch_begin_index = 0; // TODO: upload the merged file concurrently, // then collect merged_file_infos and push them into `metadata`. - for (i, (_, _, info)) in files.iter().enumerate() { + for i in 0..files.len() { if batch_size >= self.merged_file_size_limit { Self::merge_and_flush_log_files_to( storage.clone(), - &files[batch_begin_index..i], + &mut files[batch_begin_index..i], metadata, is_meta, + self.temp_file_pool.clone(), ) .await?; @@ -1127,14 +1127,15 @@ impl StreamTaskInfo { batch_size = 0; } - batch_size += info.length; + batch_size += files[i].2.length; } if batch_begin_index < files.len() { Self::merge_and_flush_log_files_to( storage.clone(), - &files[batch_begin_index..], + &mut files[batch_begin_index..], metadata, is_meta, + self.temp_file_pool.clone(), ) .await?; } @@ -1287,13 +1288,12 @@ struct DataFile { min_begin_ts: Option, sha256: Hasher, // TODO: use lz4 with async feature - inner: Pin>, + inner: tempfiles::ForWrite, compression_type: CompressionType, start_key: Vec, end_key: Vec, number_of_entries: usize, file_size: usize, - local_path: PathBuf, } #[derive(Debug)] @@ -1360,31 +1360,25 @@ impl MetadataInfo { impl DataFile { /// create and open a logfile at the path. /// Note: if a file with same name exists, would truncate it. - async fn new(local_path: impl AsRef, compression_type: CompressionType) -> Result { + async fn new(local_path: impl AsRef, files: &Arc) -> Result { let sha256 = Hasher::new(MessageDigest::sha256()) .map_err(|err| Error::Other(box_err!("openssl hasher failed to init: {}", err)))?; - let inner = - utils::compression_writer_dispatcher(local_path.as_ref(), compression_type).await?; + let inner = files.open_for_write(local_path.as_ref())?; Ok(Self { min_ts: TimeStamp::max(), max_ts: TimeStamp::zero(), resolved_ts: TimeStamp::zero(), min_begin_ts: None, inner, - compression_type, + compression_type: files.config().content_compression, sha256, number_of_entries: 0, file_size: 0, start_key: vec![], end_key: vec![], - local_path: local_path.as_ref().to_owned(), }) } - async fn remove_temp_file(&self) -> io::Result<()> { - remove_file(&self.local_path).await - } - fn decode_begin_ts(value: Vec) -> Result { WriteRef::parse(&value).map_or_else( |e| { @@ -1495,7 +1489,6 @@ impl std::fmt::Debug for DataFile { .field("min_ts", &self.min_ts) .field("max_ts", &self.max_ts) .field("resolved_ts", &self.resolved_ts) - .field("local_path", &self.local_path.display()) .finish() } } @@ -1512,18 +1505,18 @@ struct TaskRange { #[cfg(test)] mod tests { - use std::{ffi::OsStr, time::Duration}; + use std::{ffi::OsStr, io, time::Duration}; use external_storage::{ExternalData, NoopStorage}; use futures::AsyncReadExt; use kvproto::brpb::{Local, Noop, StorageBackend, StreamBackupTaskInfo}; use online_config::{ConfigManager, OnlineConfig}; + use tempdir::TempDir; use tikv_util::{ codec::number::NumberEncoder, config::ReadableDuration, worker::{dummy_scheduler, ReceiverWrapper}, }; - use tokio::fs::File; use txn_types::{Write, WriteType}; use super::*; @@ -1534,6 +1527,16 @@ mod tests { events: ApplyEvents, } + fn make_tempfiles_cfg(p: &Path) -> tempfiles::Config { + tempfiles::Config { + cache_size: AtomicUsize::new(ReadableSize::mb(512).0 as _), + swap_files: p.to_owned(), + content_compression: CompressionType::Zstd, + minimal_swap_out_file_size: 0, + write_buffer_size: 0, + } + } + fn make_table_key(table_id: i64, key: &[u8]) -> Vec { use std::io::Write; let mut table_key = b"t".to_vec(); @@ -1621,7 +1624,15 @@ mod tests { #[test] fn test_register() { let (tx, _) = dummy_scheduler(); - let router = RouterInner::new(PathBuf::new(), tx, 1024, Duration::from_secs(300)); + let router = RouterInner::new( + tx, + Config { + prefix: PathBuf::new(), + temp_file_size_limit: 1024, + temp_file_memory_quota: 1024 * 2, + max_flush_interval: Duration::from_secs(300), + }, + ); // -----t1.start-----t1.end-----t2.start-----t2.end------ // --|------------|----------|------------|-----------|-- // case1 case2 case3 case4 case5 @@ -1721,10 +1732,18 @@ mod tests { #[tokio::test] async fn test_basic_file() -> Result<()> { let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); - tokio::fs::create_dir_all(&tmp).await?; + tokio::fs::create_dir_all(&tmp).await.unwrap(); let (tx, rx) = dummy_scheduler(); - let router = RouterInner::new(tmp.clone(), tx, 32, Duration::from_secs(300)); - let (stream_task, storage_path) = task("dummy".to_owned()).await?; + let router = RouterInner::new( + tx, + Config { + prefix: tmp.clone(), + temp_file_size_limit: 32, + temp_file_memory_quota: 32 * 2, + max_flush_interval: Duration::from_secs(300), + }, + ); + let (stream_task, storage_path) = task("dummy".to_owned()).await.unwrap(); must_register_table(&router, stream_task, 1).await; let start_ts = write_simple_data(&router).await; @@ -1734,9 +1753,11 @@ mod tests { let files = router.tasks.lock().await.get("dummy").unwrap().clone(); let mut meta = files .move_to_flushing_files() - .await? + .await + .unwrap() .generate_metadata(1) - .await?; + .await + .unwrap(); assert!( meta.file_groups @@ -1756,12 +1777,14 @@ mod tests { // we may run `generate_metadata` again with same files. let mut another_meta = files .move_to_flushing_files() - .await? + .await + .unwrap() .generate_metadata(1) - .await?; + .await + .unwrap(); - files.flush_log(&mut meta).await?; - files.flush_log(&mut another_meta).await?; + files.flush_log(&mut meta).await.unwrap(); + files.flush_log(&mut another_meta).await.unwrap(); // meta updated let files_num = meta .file_groups @@ -1782,7 +1805,7 @@ mod tests { } } - files.flush_meta(meta).await?; + files.flush_meta(meta).await.unwrap(); files.clear_flushing_files().await; drop(router); @@ -1841,11 +1864,10 @@ mod tests { }; let merged_file_size_limit = 0x10000; let task = StreamTaskInfo::new( - tmp_dir.path().to_path_buf(), stream_task, vec![(vec![], vec![])], merged_file_size_limit, - CompressionType::Zstd, + make_tempfiles_cfg(tmp_dir.path()), ) .await .unwrap(); @@ -1962,10 +1984,13 @@ mod tests { let (tx, _rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); let router = Arc::new(RouterInner::new( - tmp.clone(), tx, - 1, - Duration::from_secs(300), + Config { + prefix: tmp.clone(), + temp_file_size_limit: 1, + temp_file_memory_quota: 2, + max_flush_interval: Duration::from_secs(300), + }, )); let (task, _path) = task("error_prone".to_owned()).await?; must_register_table(router.as_ref(), task, 1).await; @@ -1996,7 +2021,15 @@ mod tests { async fn test_empty_resolved_ts() { let (tx, _rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); - let router = RouterInner::new(tmp.clone(), tx, 32, Duration::from_secs(300)); + let router = RouterInner::new( + tx, + Config { + prefix: tmp.clone(), + temp_file_size_limit: 32, + temp_file_memory_quota: 32 * 2, + max_flush_interval: Duration::from_secs(300), + }, + ); let mut stream_task = StreamBackupTaskInfo::default(); stream_task.set_name("nothing".to_string()); stream_task.set_storage(create_noop_storage_backend()); @@ -2024,10 +2057,13 @@ mod tests { let (tx, _rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); let router = Arc::new(RouterInner::new( - tmp.clone(), tx, - 1, - Duration::from_secs(300), + Config { + prefix: tmp.clone(), + temp_file_size_limit: 1, + temp_file_memory_quota: 2, + max_flush_interval: Duration::from_secs(300), + }, )); let (task, _path) = task("cleanup_test".to_owned()).await?; must_register_table(&router, task, 1).await; @@ -2065,10 +2101,13 @@ mod tests { let (tx, rx) = dummy_scheduler(); let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); let router = Arc::new(RouterInner::new( - tmp.clone(), tx, - 1, - Duration::from_secs(300), + Config { + prefix: tmp.clone(), + temp_file_size_limit: 1, + temp_file_memory_quota: 2, + max_flush_interval: Duration::from_secs(300), + }, )); let (task, _path) = task("flush_failure".to_owned()).await?; must_register_table(router.as_ref(), task, 1).await; @@ -2199,11 +2238,10 @@ mod tests { is_paused: false, }; let task = StreamTaskInfo::new( - tmp_dir.path().to_path_buf(), stream_task, vec![(vec![], vec![])], 0x100000, - CompressionType::Zstd, + make_tempfiles_cfg(tmp_dir.path()), ) .await .unwrap(); @@ -2288,30 +2326,34 @@ mod tests { #[tokio::test] async fn test_est_len_in_flush() -> Result<()> { - use tokio::io::AsyncWriteExt; let noop_s = NoopStorage::default(); let ms = MockCheckContentStorage { s: noop_s }; - let file_path = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); - let mut f = File::create(file_path.clone()).await?; - f.write_all("test-data".as_bytes()).await?; - let data_file = DataFile::new(file_path, CompressionType::Zstd) - .await - .unwrap(); + let file_name = format!("{}", uuid::Uuid::new_v4()); + let file_path = Path::new(&file_name); + let tempfile = TempDir::new("test_est_len_in_flush").unwrap(); + let cfg = make_tempfiles_cfg(tempfile.path()); + let pool = Arc::new(TempFilePool::new(cfg).unwrap()); + let mut f = pool.open_for_write(file_path).unwrap(); + f.write_all(b"test-data").await?; + f.done().await?; + let mut data_file = DataFile::new(&file_path, &pool).await.unwrap(); let info = DataFileInfo::new(); let mut meta = MetadataInfo::with_capacity(1); let kv_event = build_kv_event(1, 1); let tmp_key = TempFileKey::of(&kv_event.events[0], 1); - let files = vec![(tmp_key, data_file, info)]; + data_file.inner.done().await?; + let mut files = vec![(tmp_key, data_file, info)]; let result = StreamTaskInfo::merge_and_flush_log_files_to( Arc::new(ms), - &files[0..], + &mut files[0..], &mut meta, false, + pool.clone(), ) .await; - assert_eq!(result.is_ok(), true); + result.unwrap(); Ok(()) } @@ -2320,10 +2362,13 @@ mod tests { let (sched, rx) = dummy_scheduler(); let cfg = BackupStreamConfig::default(); let router = Arc::new(RouterInner::new( - PathBuf::new(), sched.clone(), - 1, - cfg.max_flush_interval.0, + Config { + prefix: PathBuf::new(), + temp_file_size_limit: 1, + temp_file_memory_quota: 2, + max_flush_interval: cfg.max_flush_interval.0, + }, )); let mut cfg_manager = BackupStreamConfigManager::new(sched, cfg.clone()); diff --git a/components/backup-stream/src/tempfiles.rs b/components/backup-stream/src/tempfiles.rs new file mode 100644 index 00000000000..add1ee67c12 --- /dev/null +++ b/components/backup-stream/src/tempfiles.rs @@ -0,0 +1,1006 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +//! This mod provides the ability of managing the temporary files generated by +//! log backup. + +use std::{ + collections::HashMap, + convert::identity, + fs::File as SyncOsFile, + path::{Path, PathBuf}, + pin::Pin, + sync::{ + atomic::{AtomicU8, AtomicUsize, Ordering}, + Arc, Mutex as BlockMutex, + }, + task::{ready, Context, Poll}, +}; + +use futures::TryFutureExt; +use kvproto::brpb::CompressionType; +use tikv_util::warn; +use tokio::{ + fs::File as OsFile, + io::{AsyncRead, AsyncWrite}, +}; + +use crate::{ + annotate, + errors::Result, + metrics::{ + IN_DISK_TEMP_FILE_SIZE, TEMP_FILE_COUNT, TEMP_FILE_MEMORY_USAGE, TEMP_FILE_SWAP_OUT_BYTES, + }, + utils::{CompressionWriter, ZstdCompressionWriter}, +}; + +#[derive(Debug)] +pub struct Config { + /// The max memory usage of the in memory file content. + pub cache_size: AtomicUsize, + /// The base directory for swapping out files. + pub swap_files: PathBuf, + /// The compression type applied for files. + pub content_compression: CompressionType, + /// Prevent files with size less than this being swapped out. + /// We perfer to swap larger files for reducing IOps. + pub minimal_swap_out_file_size: usize, + /// The buffer size for writting swap files. + /// Even some of files has been swapped out, when new content appended, + /// those content would be kept in memory before they reach a threshold. + /// This would help us to reduce the I/O system calls. + pub write_buffer_size: usize, +} + +pub struct TempFilePool { + cfg: Config, + current: AtomicUsize, + files: BlockMutex, + + #[cfg(test)] + override_swapout: Option< + Box Pin> + Send + Sync + 'static>, + >, +} + +impl std::fmt::Debug for TempFilePool { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TempFilePool") + .field("cfg", &self.cfg) + .field("current", &self.current) + .finish() + } +} + +struct File { + content: Arc>, + writer_count: Arc, + reader_count: Arc, +} + +enum PersistentFile { + Plain(OsFile), + #[cfg(test)] + Dynamic(Pin>), + Closed, +} + +impl std::fmt::Debug for PersistentFile { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Plain(_) => f.debug_tuple("Plain").finish(), + #[cfg(test)] + Self::Dynamic(_) => f.debug_tuple("Dynamic").finish(), + Self::Closed => f.debug_tuple("Closed").finish(), + } + } +} + +#[derive(Debug)] +struct FileCore { + in_mem: Vec, + external_file: Option, + + /// self.mem[0..written] has been written to out file. + written: usize, + the_pool: Arc, + rel_path: PathBuf, +} + +pub enum ForWrite { + ZstdCompressed(ZstdCompressionWriter), + Plain(ForWriteCore), +} + +#[derive(Debug)] +pub struct ForWriteCore { + core: Arc>, + + rel_path: PathBuf, + file_writer_count: Arc, + done_result: Option>, +} + +#[derive(Debug)] +pub struct ForRead { + content: Arc>, + + myfile: Option, + read: usize, + file_reader_count: Arc, +} + +#[derive(Default)] +struct FileSet { + items: HashMap, +} + +impl TempFilePool { + pub fn new(cfg: Config) -> Result { + if let Ok(true) = std::fs::metadata(&cfg.swap_files).map(|x| x.is_dir()) { + warn!("find content in the swap file directory node. truncating them."; "dir" => %cfg.swap_files.display()); + std::fs::remove_dir_all(&cfg.swap_files)?; + } + std::fs::create_dir_all(&cfg.swap_files)?; + + let this = Self { + cfg, + current: AtomicUsize::new(0usize), + files: BlockMutex::default(), + + #[cfg(test)] + override_swapout: None, + }; + Ok(this) + } + + pub fn open_for_write(self: &Arc, p: &Path) -> std::io::Result { + use std::io::{Error, ErrorKind}; + let mut fs = self.files.lock().unwrap(); + let f = fs.items.entry(p.to_owned()).or_insert_with(|| { + TEMP_FILE_COUNT.inc(); + File { + content: Arc::new(BlockMutex::new(FileCore::new( + Arc::clone(self), + p.to_owned(), + ))), + writer_count: Arc::default(), + reader_count: Arc::default(), + } + }); + if f.reader_count.load(Ordering::SeqCst) > 0 { + return Err(Error::new( + ErrorKind::Other, + "open_for_write isn't allowed when there are concurrent reading.", + )); + } + let fr = ForWriteCore { + core: Arc::clone(&f.content), + file_writer_count: Arc::clone(&f.writer_count), + rel_path: p.to_owned(), + done_result: None, + }; + f.writer_count.fetch_add(1, Ordering::SeqCst); + match self.cfg.content_compression { + CompressionType::Unknown => Ok(ForWrite::Plain(fr)), + CompressionType::Zstd => Ok(ForWrite::ZstdCompressed(ZstdCompressionWriter::new(fr))), + unknown_compression => Err(Error::new( + ErrorKind::Unsupported, + format!( + "the compression method {:?} isn't supported for now.", + unknown_compression + ), + )), + } + } + + /// Open a file reference for reading. + /// Please notice that once a compression applied, this would yield the + /// compressed content (won't decompress them.) -- that is what "raw" + /// implies. + /// "But why there isn't a `open_for_read` which decompresses the content?" + /// "Because in our use case, we only need the raw content -- we just send + /// it to external storage." + pub fn open_raw_for_read(&self, p: &Path) -> std::io::Result { + use std::io::{Error, ErrorKind}; + + let fs = self.files.lock().unwrap(); + let f = fs.items.get(p); + if f.is_none() { + return Err(Error::new( + ErrorKind::NotFound, + format!("file {} not found", p.display()), + )); + } + let f = f.unwrap(); + let refc = f.writer_count.load(Ordering::SeqCst); + if refc > 0 { + // NOTE: the current implementation doesn't allow us to write when there are + // readers, because once the writter swapped out the file, the reader may not + // notice that. Perhaps in the future, we can implement something + // like cursors to allow the reader be able to access consistent + // File snapshot even there are writers appending contents + // to the file. But that isn't needed for now. + return Err(Error::new( + ErrorKind::Other, + format!( + "open_for_read isn't allowed when there are concurrent writing (there are still {} reads for file {}.).", + refc, + p.display() + ), + )); + } + let st = f.content.lock().unwrap(); + let myfile = if st.external_file.is_some() { + Some(self.open_relative(p)?) + } else { + None + }; + f.reader_count.fetch_add(1, Ordering::SeqCst); + Ok(ForRead { + content: Arc::clone(&f.content), + myfile, + file_reader_count: Arc::clone(&f.reader_count), + read: 0, + }) + } + + /// Remove a file from the pool. + /// If there are still some reference to the file, the deletion may be + /// delaied until all reference to the file drop. + pub fn remove(&self, p: &Path) -> bool { + let mut files = self.files.lock().unwrap(); + let removed = files.items.remove(p).is_some(); + if removed { + TEMP_FILE_COUNT.dec(); + } + removed + } + + pub fn config(&self) -> &Config { + &self.cfg + } + + /// Create a file for writting. + /// This function is synchronous so we can call it easier in the polling + /// context. (Anyway, it is really hard to call an async function in the + /// polling context.) + fn create_relative(&self, p: &Path) -> std::io::Result { + let abs_path = self.cfg.swap_files.join(p); + #[cfg(test)] + let pfile = match &self.override_swapout { + Some(f) => PersistentFile::Dynamic(f(&abs_path)), + None => { + let file = OsFile::from_std(SyncOsFile::create(&abs_path)?); + PersistentFile::Plain(file) + } + }; + #[cfg(not(test))] + let pfile = { + let file = OsFile::from_std(SyncOsFile::create(abs_path)?); + PersistentFile::Plain(file) + }; + Ok(pfile) + } + + /// Open a file by a relative path. + /// This will open a raw OS file for reading. The file content may be + /// compressed if the configuration requires. + fn open_relative(&self, p: &Path) -> std::io::Result { + let file = SyncOsFile::open(self.cfg.swap_files.join(p))?; + Ok(OsFile::from_std(file)) + } + + fn delete_relative(&self, p: &Path) -> std::io::Result<()> { + std::fs::remove_file(self.cfg.swap_files.join(p))?; + Ok(()) + } +} + +impl ForWrite { + pub fn path(&self) -> &Path { + match self { + ForWrite::ZstdCompressed(z) => z.get_ref().path(), + ForWrite::Plain(r) => r.path(), + } + } +} + +#[async_trait::async_trait] +impl CompressionWriter for ForWrite { + async fn done(&mut self) -> Result<()> { + match self { + ForWrite::ZstdCompressed(z) => { + z.done().await?; + z.get_mut().done().await + } + ForWrite::Plain(c) => c.done().await, + } + } +} + +impl ForWriteCore { + pub fn path(&self) -> &Path { + &self.rel_path + } + + pub async fn done(&mut self) -> Result<()> { + // Given we have blocked new writes after we have `done`, it is safe to skip + // flushing here. + if let Some(res) = &self.done_result { + return res + .as_ref() + .map_err(|err| annotate!(err, "impossible to retry `done`")) + .copied(); + } + let core_lock = self.core.clone(); + // FIXME: For now, it cannot be awaited directly because `content` should be + // guarded by a sync mutex. Given the `sync_all` is an async function, + // it is almost impossible to implement some `poll` like things based on + // it. We also cannot use an async mutex to guard the `content` : that will + // make implementing `AsyncRead` and `AsyncWrite` become very very hard. + let res = if core_lock.lock().unwrap().external_file.is_some() { + tokio::task::spawn_blocking(move || { + let mut st = core_lock.lock().unwrap(); + if let Some(ext_file) = st.external_file.replace(PersistentFile::Closed) { + tokio::runtime::Handle::current().block_on(ext_file.done())?; + } + Result::Ok(()) + }) + .map_err(|err| annotate!(err, "joining the background `done` job")) + .await + .and_then(identity) + } else { + Ok(()) + }; + + // Some of `done` implementations may take the ownership to `self`, it will be + // really hard and dirty to make them retryable. given `done` merely + // fails, and once it failed, it is possible to lose data, just store and always + // return the error, so the task eventually fail. + self.done_result = Some(res.as_ref().map_err(|err| err.to_string()).copied()); + self.file_writer_count.fetch_sub(1, Ordering::SeqCst); + res + } +} + +impl FileCore { + fn poll_swap_out_unpin(&mut self, cx: &mut Context<'_>) -> Poll> { + loop { + let to_write = &self.in_mem[self.written..]; + let buf_size = self.the_pool.cfg.write_buffer_size; + if to_write.is_empty() { + modify_and_update_cap_diff(&mut self.in_mem, &self.the_pool.current, |v| { + v.clear(); + v.shrink_to(buf_size); + }); + IN_DISK_TEMP_FILE_SIZE.observe(self.written as _); + self.written = 0; + return Ok(()).into(); + } + if self.external_file.is_none() { + self.external_file = Some(self.the_pool.create_relative(&self.rel_path)?); + } + let ext_file = Pin::new(self.external_file.as_mut().unwrap()); + let n = ready!(ext_file.poll_write(cx, to_write))?; + if n == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::WriteZero, + "during swapping out file", + )) + .into(); + } + TEMP_FILE_SWAP_OUT_BYTES.inc_by(n as _); + self.written += n; + } + } + + fn append_to_buffer(&mut self, bs: &[u8]) { + modify_and_update_cap_diff(&mut self.in_mem, &self.the_pool.current, |v| { + v.extend_from_slice(bs); + }) + } + + #[inline(always)] + fn max_cache_size(&self) -> usize { + fail::fail_point!("override_log_backup_max_cache_size", |v| { + v.and_then(|x| x.parse::().ok()) + .unwrap_or_else(|| self.the_pool.cfg.cache_size.load(Ordering::Acquire)) + }); + self.the_pool.cfg.cache_size.load(Ordering::Acquire) + } + + fn should_swap_out(&self, new_data_size: usize) -> bool { + let mem_use = self.the_pool.current.load(Ordering::Acquire); + // If this write will trigger a reallocation... + let realloc_exceeds_quota = self.in_mem.len() + new_data_size > self.in_mem.capacity() + // And the allocation will exceed the memory quota. + && mem_use + self.in_mem.capacity() > self.max_cache_size(); + // If the current file is large enough to be swapped out. + // (For now, We don't want to swap out small files. That may consume many IO + // operations.) + let file_large_enough = self.in_mem.len() > self.the_pool.cfg.minimal_swap_out_file_size; + // If a file has already been swapped out, after filling a tiny buffer in + // memory, append new content to that file directly. + let already_swapped_out = + self.external_file.is_some() && self.in_mem.len() > self.the_pool.cfg.write_buffer_size; + // If there is pending swapping operation (Say, we have done some partial + // write.), always trigger swap out for releasing the in memory buffer. + let swapping = self.written > 0; + (realloc_exceeds_quota && file_large_enough) || already_swapped_out || swapping + } + + fn new(pool: Arc, rel_path: PathBuf) -> Self { + let cap = pool.cfg.write_buffer_size; + let v = Vec::with_capacity(cap); + pool.current.fetch_add(v.capacity(), Ordering::SeqCst); + Self { + in_mem: v, + external_file: None, + written: 0, + the_pool: pool, + rel_path, + } + } +} + +impl AsyncWrite for ForWriteCore { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + use std::io::{Error as IoErr, ErrorKind}; + if self.done_result.is_some() { + return Err(IoErr::new( + ErrorKind::BrokenPipe, + "the write part has been closed", + )) + .into(); + } + + let mut stat = self.core.lock().unwrap(); + + if stat.should_swap_out(buf.len()) { + ready!(stat.poll_swap_out_unpin(cx))?; + } + + stat.append_to_buffer(buf); + Ok(buf.len()).into() + } + + fn poll_flush( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + let mut stat = self.core.lock().unwrap(); + if let Some(f) = &mut stat.external_file { + ready!(Pin::new(f).poll_flush(cx))?; + } + Ok(()).into() + } + + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + let mut stat = self.core.lock().unwrap(); + if let Some(f) = &mut stat.external_file { + ready!(Pin::new(f).poll_shutdown(cx))?; + } + Ok(()).into() + } +} + +impl Drop for FileCore { + fn drop(&mut self) { + self.the_pool + .current + .fetch_sub(self.in_mem.capacity(), Ordering::SeqCst); + TEMP_FILE_MEMORY_USAGE.set(self.the_pool.current.load(Ordering::Acquire) as _); + if self.external_file.is_some() { + if let Err(err) = self.the_pool.delete_relative(&self.rel_path) { + warn!("failed to remove the file."; "file" => %self.rel_path.display(), "err" => %err); + } + } + } +} + +impl Drop for ForWriteCore { + fn drop(&mut self) { + if self.done_result.is_none() { + self.file_writer_count.fetch_sub(1, Ordering::SeqCst); + } + } +} + +impl Drop for ForRead { + fn drop(&mut self) { + self.file_reader_count.fetch_sub(1, Ordering::SeqCst); + } +} + +impl ForRead { + pub async fn len(&self) -> Result { + let len_in_file = if let Some(mf) = &self.myfile { + mf.metadata().await?.len() + } else { + 0 + }; + let st = self.content.lock().unwrap(); + let len_in_mem = st.in_mem.len() - st.written; + Ok(len_in_file + len_in_mem as u64) + } +} + +impl AsyncRead for ForRead { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + let this = self.get_mut(); + if this.read == 0 && this.myfile.is_some() { + let old = buf.remaining(); + let ext_file = Pin::new(this.myfile.as_mut().unwrap()); + ready!(ext_file.poll_read(cx, buf))?; + if buf.remaining() != old { + return Ok(()).into(); + } + } + let st = this.content.lock().unwrap(); + let rem = buf.remaining(); + let fill_len = Ord::min(st.in_mem.len() - this.read, rem); + let to_fill = &st.in_mem[this.read..this.read + fill_len]; + buf.put_slice(to_fill); + this.read += fill_len; + Ok(()).into() + } +} + +impl AsyncWrite for ForWrite { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + match self.get_mut() { + ForWrite::ZstdCompressed(c) => Pin::new(c).poll_write(cx, buf), + ForWrite::Plain(p) => Pin::new(p).poll_write(cx, buf), + } + } + + fn poll_flush( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + match self.get_mut() { + ForWrite::ZstdCompressed(c) => Pin::new(c).poll_flush(cx), + ForWrite::Plain(p) => Pin::new(p).poll_flush(cx), + } + } + + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + match self.get_mut() { + ForWrite::ZstdCompressed(c) => Pin::new(c).poll_shutdown(cx), + ForWrite::Plain(p) => Pin::new(p).poll_shutdown(cx), + } + } +} + +// NOTE: the implementation is exactly isomorphic to the implementation above. +// Perhaps we can implement AsyncWrite for Either where T, U : AsyncWrite. +impl AsyncWrite for PersistentFile { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + match self.get_mut() { + PersistentFile::Plain(f) => Pin::new(f).poll_write(cx, buf), + #[cfg(test)] + PersistentFile::Dynamic(d) => d.as_mut().poll_write(cx, buf), + PersistentFile::Closed => Err(std::io::Error::new( + std::io::ErrorKind::BrokenPipe, + "write to the tempfile has been marked done", + )) + .into(), + } + } + + fn poll_flush( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + match self.get_mut() { + PersistentFile::Plain(f) => Pin::new(f).poll_flush(cx), + #[cfg(test)] + PersistentFile::Dynamic(d) => d.as_mut().poll_flush(cx), + PersistentFile::Closed => Ok(()).into(), + } + } + + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + match self.get_mut() { + PersistentFile::Plain(f) => Pin::new(f).poll_shutdown(cx), + #[cfg(test)] + PersistentFile::Dynamic(d) => d.as_mut().poll_shutdown(cx), + PersistentFile::Closed => Ok(()).into(), + } + } +} + +impl PersistentFile { + async fn done(self) -> Result<()> { + match self { + PersistentFile::Plain(c) => { + // The current `sync` implementation of tokio file is spawning a new blocking + // thread. When we are spawning many blocking operations in the + // blocking threads, it is possible to dead lock (The current + // thread waiting for a thread that will be spawned after the + // current thread exits.) + // So we convert it to the std file and using the block version call. + let std_file = c.into_std().await; + std_file.sync_all()?; + Ok(()) + } + #[cfg(test)] + PersistentFile::Dynamic(_) => Ok(()), + PersistentFile::Closed => Ok(()), + } + } +} + +#[inline(always)] +fn modify_and_update_cap_diff(v: &mut Vec, record: &AtomicUsize, f: impl FnOnce(&mut Vec)) { + let cap_old = v.capacity(); + f(v); + let cap_new = v.capacity(); + // when cap_new less than cap_old, the `diff` should be: + // `usize::MAX - (cap_old - cap_new)`. + // Then, + // `(record + diff) % usize::MAX` = + // `(record - (cap_old - cap_new)) + usize::MAX` = + // record - (cap_old - cap_new). + let diff = cap_new.wrapping_sub(cap_old); + if diff > 0 { + // `fetch_add` will wrap around when overflowing (instead of panicking). + record.fetch_add(diff, Ordering::Release); + // We are not going to use `AcqRel` at previous read, because there may be + // concurrent write to the variable and we may upload stale data. + TEMP_FILE_MEMORY_USAGE.set(record.load(Ordering::Acquire) as _) + } +} + +#[cfg(test)] +mod test { + use std::{ + io::Read, + mem::ManuallyDrop, + path::Path, + pin::Pin, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + }; + + use async_compression::tokio::bufread::ZstdDecoder; + use kvproto::brpb::CompressionType; + use tempfile::tempdir; + use tokio::io::{AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader}; + use walkdir::WalkDir; + + use super::{Config, TempFilePool}; + use crate::{tempfiles::ForWrite, utils::CompressionWriter}; + + fn rt_for_test() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap() + } + + fn simple_pool_with_modify(m: impl FnOnce(&mut Config)) -> Arc { + let mut cfg = Config { + cache_size: AtomicUsize::new(100000), + swap_files: std::env::temp_dir().join(format!( + "backup_stream::tempfiles::test::{}", + std::process::id() + )), + content_compression: CompressionType::Unknown, + minimal_swap_out_file_size: 8192, + write_buffer_size: 4096, + }; + m(&mut cfg); + Arc::new(TempFilePool::new(cfg).unwrap()) + } + + fn simple_pool_with_soft_max(soft_max: usize) -> Arc { + simple_pool_with_modify(|cfg| { + cfg.cache_size = AtomicUsize::new(soft_max); + cfg.minimal_swap_out_file_size = 8192.min(soft_max) + }) + } + + #[test] + fn test_read() { + let pool = simple_pool_with_soft_max(255); + let mut f = pool.open_for_write("hello.txt".as_ref()).unwrap(); + let rt = rt_for_test(); + rt.block_on(f.write(b"Hello, world.")).unwrap(); + drop(f); + let mut cur = pool.open_raw_for_read("hello.txt".as_ref()).unwrap(); + rt.block_on(rt.spawn(async move { + let mut buf = [0u8; 6]; + assert_eq!(cur.read(&mut buf[..]).await.unwrap(), 6); + assert_eq!(&buf, b"Hello,"); + let mut buf = [0u8; 6]; + assert_eq!( + cur.read(&mut buf[..]).await.unwrap(), + 6, + "{}", + buf.escape_ascii() + ); + assert_eq!(&buf, b" world"); + })) + .unwrap(); + } + + #[test] + fn test_swapout() { + let pool = simple_pool_with_modify(|cfg| { + cfg.cache_size = AtomicUsize::new(30); + cfg.minimal_swap_out_file_size = 30; + cfg.write_buffer_size = 30; + }); + let mut f = pool.open_for_write("world.txt".as_ref()).unwrap(); + let rt = rt_for_test(); + rt.block_on(f.write(b"Once the word count...")).unwrap(); + rt.block_on(f.write(b"Reachs 30. The content of files shall be swaped out to the disk.")) + .unwrap(); + rt.block_on(f.write(b"Isn't it? This swap will be finished in this call.")) + .unwrap(); + rt.block_on(f.done()).unwrap(); + let mut cur = pool.open_raw_for_read("world.txt".as_ref()).unwrap(); + let mut buf = vec![]; + rt.block_on(cur.read_to_end(&mut buf)).unwrap(); + let excepted = b"Once the word count...Reachs 30. The content of files shall be swaped out to the disk.Isn't it? This swap will be finished in this call."; + assert_eq!( + excepted, + buf.as_slice(), + "\n{}\n ## \n{}", + excepted.escape_ascii(), + buf.escape_ascii() + ); + + // The newly written bytes would be kept in memory. + let excepted = b"Once the word count...Reachs 30. The content of files shall be swaped out to the disk."; + let mut local_file = pool + .open_relative("world.txt".as_ref()) + .unwrap() + .try_into_std() + .unwrap(); + buf.clear(); + local_file.read_to_end(&mut buf).unwrap(); + assert_eq!( + excepted, + buf.as_slice(), + "\n{}\n ## \n{}", + excepted.escape_ascii(), + buf.escape_ascii() + ); + } + + #[test] + fn test_compression() { + let pool = simple_pool_with_modify(|cfg| { + cfg.content_compression = CompressionType::Zstd; + cfg.cache_size = AtomicUsize::new(15); + cfg.minimal_swap_out_file_size = 15; + }); + let file_name = "compression.bin"; + let rt = rt_for_test(); + let mut f = pool.open_for_write(file_name.as_ref()).unwrap(); + let content_to_write : [&[u8]; 4] = [ + b"Today, we are going to test the compression.", + b"Well, once swaped out, the current implementation will keep new content in the buffer.", + b"...until it reachs a constant. (That may be configuriable while you are reading this.)", + b"Meow!", + ]; + for content in content_to_write { + assert_eq!(rt.block_on(f.write(content)).unwrap(), content.len()); + match &mut f { + // Flush the compressed writer so we can test swapping out. + ForWrite::ZstdCompressed(z) => rt.block_on(z.flush()).unwrap(), + ForWrite::Plain(_) => unreachable!(), + } + } + rt.block_on(f.done()).unwrap(); + + let r = pool.open_raw_for_read(file_name.as_ref()).unwrap(); + let mut buf = vec![]; + let mut dr = ZstdDecoder::new(BufReader::new(r)); + rt.block_on(dr.read_to_end(&mut buf)).unwrap(); + let required = content_to_write.join(&b""[..]); + assert_eq!(required, buf); + } + + #[test] + fn test_write_many_times() { + let mut pool = simple_pool_with_modify(|cfg| { + cfg.cache_size = AtomicUsize::new(15); + cfg.minimal_swap_out_file_size = 15; + }); + Arc::get_mut(&mut pool).unwrap().override_swapout = Some(Box::new(|p| { + println!("creating {}", p.display()); + Box::pin(ThrottleWrite(tokio::fs::File::from_std( + std::fs::File::create(p).unwrap(), + ))) + })); + struct ThrottleWrite(R); + impl AsyncWrite for ThrottleWrite { + fn poll_write( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &[u8], + ) -> std::task::Poll> { + let take = 2.min(buf.len()); + Pin::new(&mut self.0).poll_write(cx, &buf[..take]) + } + + fn poll_flush( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Pin::new(&mut self.0).poll_flush(cx) + } + + fn poll_shutdown( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Pin::new(&mut self.0).poll_shutdown(cx) + } + } + let file_name = "evil-os.txt"; + let rt = rt_for_test(); + let content_to_write: [&[u8]; 4] = [ + b"In this case, we are going to test over a evil OS.", + b"In that OS, every `write` system call only writes 2 bytes.", + b"That is a sort of hell... A nightmare of computer scientists.", + b"Thankfully we are just testing. May such OS never exist.", + ]; + + let mut f = pool.open_for_write(file_name.as_ref()).unwrap(); + for content in content_to_write { + assert_eq!(rt.block_on(f.write(content)).unwrap(), content.len()); + } + rt.block_on(f.done()).unwrap(); + let mut dr = pool.open_raw_for_read(file_name.as_ref()).unwrap(); + let mut buf = vec![]; + rt.block_on(dr.read_to_end(&mut buf)).unwrap(); + let required = content_to_write.join(&b""[..]); + assert_eq!(required, buf); + } + + #[test] + fn test_read_many_times() { + let pool = simple_pool_with_modify(|cfg| { + cfg.cache_size = AtomicUsize::new(15); + cfg.minimal_swap_out_file_size = 15; + }); + let file_name = "read many times.txt"; + let rt = rt_for_test(); + let mut f = pool.open_for_write(file_name.as_ref()).unwrap(); + let content_to_write: [&[u8]; 4] = [ + b"In this case, we are going to make sure that a file can be read many times after", + b"Before this file deleted, we should be able to read it many times.", + b"(Which is essential for retrying.)", + b"But when to delete them? You shall delete them after uploading them manually.", + ]; + + for content in content_to_write { + assert_eq!(rt.block_on(f.write(content)).unwrap(), content.len()); + } + rt.block_on(f.done()).unwrap(); + + let mut buf = vec![]; + for _ in 0..3 { + let mut r = pool.open_raw_for_read(file_name.as_ref()).unwrap(); + rt.block_on(r.read_to_end(&mut buf)).unwrap(); + assert_eq!(content_to_write.join(&b""[..]), buf.as_slice()); + buf.clear(); + } + pool.open_for_write(file_name.as_ref()) + .expect("should be able to write again once all reader exits"); + } + + fn assert_dir_empty(p: &Path) { + for file in WalkDir::new(p) { + let file = file.unwrap(); + if file.depth() > 0 { + panic!("file leaked: {}", file.path().display()); + } + } + } + + #[test] + fn test_not_leaked() { + // Open a distinct dir for this case. + let tmp = tempdir().unwrap(); + let pool = simple_pool_with_modify(|cfg| { + cfg.cache_size = AtomicUsize::new(15); + cfg.minimal_swap_out_file_size = 15; + cfg.swap_files = tmp.path().to_owned(); + }); + let rt = rt_for_test(); + let content_to_write: [&[u8]; 4] = [ + b"This case tests whether the resource(Say, files, memory.) leaked.", + b"That is it, but I wanna write 4 sentences to keep every case aliged.", + b"What to write? Perhaps some poems or lyrics.", + b"But will that bring some copyright conflicts? Emmm, 4 sentences already, bye.", + ]; + let file_names = ["object-a.txt", "object-b.txt"]; + + let mut buf = vec![]; + for file_name in file_names { + let mut f = pool.open_for_write(file_name.as_ref()).unwrap(); + for content in content_to_write { + assert_eq!(rt.block_on(f.write(content)).unwrap(), content.len()); + } + rt.block_on(f.done()).unwrap(); + let mut r = pool.open_raw_for_read(file_name.as_ref()).unwrap(); + rt.block_on(r.read_to_end(&mut buf)).unwrap(); + assert_eq!(content_to_write.join(&b""[..]), buf.as_slice()); + buf.clear(); + } + for file_name in file_names { + assert!(pool.remove(file_name.as_ref())); + } + assert_eq!(pool.current.load(Ordering::SeqCst), 0); + assert_dir_empty(tmp.path()); + } + + #[test] + fn test_panic_not_leaked() { + let tmp = tempdir().unwrap(); + let pool = simple_pool_with_modify(|cfg| { + cfg.cache_size = AtomicUsize::new(15); + cfg.minimal_swap_out_file_size = 15; + cfg.swap_files = tmp.path().to_owned(); + }); + let rt = rt_for_test(); + let content_to_write: [&[u8]; 4] = [ + b"This case is pretty like the previous case, the different is in this case...", + b"We are going to simulating TiKV panic. That will be implemented by leak the pool itself.", + b"Emm, is there information need to be added? Nope. Well let me write you a random string.", + b"A cat in my dream, leaps across the fence around the yard.", + ]; + let mut f = pool.open_for_write("delete-me.txt".as_ref()).unwrap(); + for content in content_to_write { + assert_eq!(rt.block_on(f.write(content)).unwrap(), content.len()); + } + drop(f); + // TiKV panicked! + let _ = ManuallyDrop::new(pool); + + let pool = simple_pool_with_modify(|cfg| { + cfg.swap_files = tmp.path().to_owned(); + }); + assert_dir_empty(tmp.path()); + let mut f = pool.open_for_write("delete-me.txt".as_ref()).unwrap(); + for content in content_to_write { + assert_eq!(rt.block_on(f.write(content)).unwrap(), content.len()); + } + drop(f); + // Happy path. + drop(pool); + assert_dir_empty(tmp.path()); + } +} diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index d94ba59b2d5..1b150eaa1f0 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -598,18 +598,18 @@ pub fn is_overlapping(range: (&[u8], &[u8]), range2: (&[u8], &[u8])) -> bool { } /// read files asynchronously in sequence -pub struct FilesReader { - files: Vec, +pub struct FilesReader { + files: Vec, index: usize, } -impl FilesReader { - pub fn new(files: Vec) -> Self { +impl FilesReader { + pub fn new(files: Vec) -> Self { FilesReader { files, index: 0 } } } -impl AsyncRead for FilesReader { +impl AsyncRead for FilesReader { fn poll_read( self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -635,7 +635,7 @@ impl AsyncRead for FilesReader { #[async_trait::async_trait] pub trait CompressionWriter: AsyncWrite + Sync + Send { /// call the `File.sync_all()` to flush immediately to disk. - async fn done(mut self: Pin<&mut Self>) -> Result<()>; + async fn done(&mut self) -> Result<()>; } /// a writer dispatcher for different compression type. @@ -644,11 +644,11 @@ pub trait CompressionWriter: AsyncWrite + Sync + Send { pub async fn compression_writer_dispatcher( local_path: impl AsRef, compression_type: CompressionType, -) -> Result>> { +) -> Result> { let inner = BufWriter::with_capacity(128 * 1024, File::create(local_path.as_ref()).await?); match compression_type { - CompressionType::Unknown => Ok(Box::pin(NoneCompressionWriter::new(inner))), - CompressionType::Zstd => Ok(Box::pin(ZstdCompressionWriter::new(inner))), + CompressionType::Unknown => Ok(Box::new(NoneCompressionWriter::new(inner))), + CompressionType::Zstd => Ok(Box::new(ZstdCompressionWriter::new(inner))), _ => Err(Error::Other(box_err!(format!( "the compression type is unimplemented, compression type id {:?}", compression_type @@ -688,7 +688,7 @@ impl AsyncWrite for NoneCompressionWriter { #[async_trait::async_trait] impl CompressionWriter for NoneCompressionWriter { - async fn done(mut self: Pin<&mut Self>) -> Result<()> { + async fn done(&mut self) -> Result<()> { let bufwriter = &mut self.inner; bufwriter.flush().await?; bufwriter.get_ref().sync_all().await?; @@ -697,19 +697,27 @@ impl CompressionWriter for NoneCompressionWriter { } /// use zstd compression algorithm -pub struct ZstdCompressionWriter { - inner: ZstdEncoder>, +pub struct ZstdCompressionWriter { + inner: ZstdEncoder, } -impl ZstdCompressionWriter { - pub fn new(inner: BufWriter) -> Self { +impl ZstdCompressionWriter { + pub fn new(inner: R) -> Self { ZstdCompressionWriter { inner: ZstdEncoder::with_quality(inner, Level::Fastest), } } + + pub fn get_ref(&self) -> &R { + self.inner.get_ref() + } + + pub fn get_mut(&mut self) -> &mut R { + self.inner.get_mut() + } } -impl AsyncWrite for ZstdCompressionWriter { +impl AsyncWrite for ZstdCompressionWriter { fn poll_write( self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -729,13 +737,12 @@ impl AsyncWrite for ZstdCompressionWriter { } #[async_trait::async_trait] -impl CompressionWriter for ZstdCompressionWriter { - async fn done(mut self: Pin<&mut Self>) -> Result<()> { +impl CompressionWriter for ZstdCompressionWriter { + async fn done(&mut self) -> Result<()> { let encoder = &mut self.inner; encoder.shutdown().await?; let bufwriter = encoder.get_mut(); bufwriter.flush().await?; - bufwriter.get_ref().sync_all().await?; Ok(()) } } @@ -1132,7 +1139,7 @@ mod test { .await .unwrap(); writer.write_all(content.as_bytes()).await.unwrap(); - writer.as_mut().done().await.unwrap(); + writer.done().await.unwrap(); let mut reader = BufReader::new(File::open(path1).await.unwrap()); let mut read_content = String::new(); @@ -1145,7 +1152,7 @@ mod test { .await .unwrap(); writer.write_all(content.as_bytes()).await.unwrap(); - writer.as_mut().done().await.unwrap(); + writer.done().await.unwrap(); use async_compression::tokio::bufread::ZstdDecoder; let mut reader = ZstdDecoder::new(BufReader::new(File::open(path2).await.unwrap())); diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 87ea608e178..68f53a4a65e 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -16,7 +16,7 @@ use backup_stream::{ MetadataClient, StreamTask, }, observer::BackupStreamObserver, - router::Router, + router::{Router, TaskSelector}, utils, BackupStreamResolver, Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, }; @@ -784,25 +784,30 @@ impl Suite { } pub fn wait_for_flush(&self) { - use std::ffi::OsString; - std::fs::File::open(&self.temp_files) - .unwrap() - .sync_all() - .unwrap(); - for _ in 0..100 { - if !walkdir::WalkDir::new(&self.temp_files) - .into_iter() - .any(|x| x.unwrap().path().extension() == Some(&OsString::from("log"))) - { - return; + let (tx, rx) = std::sync::mpsc::channel(); + self.run(|| { + let tx = tx.clone(); + Task::Sync( + Box::new(move || { + tx.send(()).unwrap(); + }), + Box::new(move |r| { + let task_names = block_on(r.select_task(TaskSelector::All.reference())); + for task_name in task_names { + let tsk = block_on(r.get_task_info(&task_name)); + if tsk.unwrap().is_flushing() { + return false; + } + } + true + }), + ) + }); + for _ in self.endpoints.iter() { + // Receive messages from each store. + if rx.recv_timeout(Duration::from_secs(30)).is_err() { + panic!("the temp isn't empty after the deadline"); } - std::thread::sleep(Duration::from_secs(1)); - } - let v = walkdir::WalkDir::new(&self.temp_files) - .into_iter() - .collect::>(); - if !v.is_empty() { - panic!("the temp isn't empty after the deadline ({:?})", v) } } diff --git a/src/config/mod.rs b/src/config/mod.rs index f06f2936374..acede457e55 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2737,6 +2737,13 @@ pub struct BackupStreamConfig { pub temp_path: String, pub file_size_limit: ReadableSize, + + #[doc(hidden)] + #[serde(skip_serializing)] + #[online_config(skip)] + // Let's hide this config for now. + pub temp_file_memory_quota: ReadableSize, + #[online_config(skip)] pub initial_scan_pending_memory_quota: ReadableSize, #[online_config(skip)] @@ -2776,7 +2783,14 @@ impl Default for BackupStreamConfig { fn default() -> Self { let cpu_num = SysQuota::cpu_cores_quota(); let total_mem = SysQuota::memory_limit_in_bytes(); + let file_size_limit = ReadableSize::mb(256); + // Don't use too many memory. + let temp_file_quota = total_mem / 16; let quota_size = (total_mem as f64 * 0.1).min(ReadableSize::mb(512).0 as _); + // 2x of the max pending bytes. The extra buffer make us easier to keep all + // files in memory. + let preferred_cache_size = file_size_limit.0 * 2; + let cache_size = ReadableSize(temp_file_quota.min(preferred_cache_size)); Self { min_ts_interval: ReadableDuration::secs(10), max_flush_interval: ReadableDuration::minutes(3), @@ -2785,9 +2799,10 @@ impl Default for BackupStreamConfig { enable: true, // TODO: may be use raft store directory temp_path: String::new(), - file_size_limit: ReadableSize::mb(256), + file_size_limit, initial_scan_pending_memory_quota: ReadableSize(quota_size as _), initial_scan_rate_limit: ReadableSize::mb(60), + temp_file_memory_quota: cache_size, } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index e7208f98f4e..b1642263855 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -810,6 +810,7 @@ fn test_serde_custom_tikv_config() { initial_scan_pending_memory_quota: ReadableSize::kb(2), initial_scan_rate_limit: ReadableSize::mb(3), min_ts_interval: ReadableDuration::secs(2), + ..Default::default() }; value.import = ImportConfig { num_threads: 123, From 6ca4a629a1c88c11c29063704fcf511e26970f43 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 4 Jul 2023 14:59:15 +0800 Subject: [PATCH 0771/1149] raftstore-v2: check region epoch before response read index (#15046) close tikv/tikv#14699 raftstore-v2: check region epoch before response read index Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/query/lease.rs | 22 ++++-- .../raftstore/test_split_region.rs | 69 ++++++++++++++++++- 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 1f470eb9c4e..84a8ad09ed3 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -10,13 +10,13 @@ use raft::{ }; use raftstore::{ store::{ - can_amend_read, + can_amend_read, cmd_resp, fsm::{apply::notify_stale_req, new_read_index_request}, metrics::RAFT_READ_INDEX_PENDING_COUNT, - msg::ReadCallback, + msg::{ErrorCallback, ReadCallback}, propose_read_index, should_renew_lease, simple_write::SimpleWriteEncoder, - util::LeaseState, + util::{check_req_region_epoch, LeaseState}, ReadDelegate, ReadIndexRequest, ReadProgress, Transport, }, Error, Result, @@ -186,7 +186,7 @@ impl Peer { ); RAFT_READ_INDEX_PENDING_COUNT.sub(read_index_req.cmds().len() as i64); let time = monotonic_raw_now(); - for (_, ch, mut read_index) in read_index_req.take_cmds().drain(..) { + for (req, ch, mut read_index) in read_index_req.take_cmds().drain(..) { ch.read_tracker().map(|tracker| { GLOBAL_TRACKERS.with_tracker(tracker, |t| { t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) @@ -197,6 +197,20 @@ impl Peer { }) }); + // Check region epoch before responding read index because region + // may be splitted or merged during read index. + if let Err(e) = check_req_region_epoch(&req, self.region(), true) { + debug!(self.logger, + "read index epoch not match"; + "region_id" => self.region_id(), + "err" => ?e, + ); + let mut response = cmd_resp::new_error(e); + cmd_resp::bind_term(&mut response, self.term()); + ch.report_error(response); + return; + } + // Key lock should not happen when read_index is running at the leader. // Because it only happens when concurrent read and write requests on the same // region on different TiKVs. diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 1310ca04a96..ee64d1a1b1d 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -20,12 +20,16 @@ use raftstore::{ store::{Bucket, BucketRange, Callback, WriteResponse}, Result, }; +use raftstore_v2::router::QueryResult; use test_raftstore::*; use test_raftstore_macro::test_case; +use test_raftstore_v2::Simulator as S2; use tikv::storage::{kv::SnapshotExt, Snapshot}; -use tikv_util::config::*; +use tikv_util::{config::*, future::block_on_timeout}; use txn_types::{Key, LastChange, PessimisticLock}; +use crate::tikv_util::HandyRwLock; + pub const REGION_MAX_SIZE: u64 = 50000; pub const REGION_SPLIT_SIZE: u64 = 30000; @@ -1431,3 +1435,66 @@ fn test_node_slow_split_does_not_prevent_leader_election() { cluster.must_put(b"k0", b"v0"); } + +// A filter that disable read index by heartbeat. +#[derive(Clone)] +struct EraseHeartbeatContext; + +impl Filter for EraseHeartbeatContext { + fn before(&self, msgs: &mut Vec) -> Result<()> { + for msg in msgs { + if msg.get_message().get_msg_type() == MessageType::MsgHeartbeat { + msg.mut_message().clear_context(); + } + } + Ok(()) + } +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_split_during_read_index() { + let mut cluster = new_cluster(0, 3); + configure_for_lease_read(&mut cluster.cfg, None, Some(5000)); + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::hours(1); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + + let region = cluster.get_region(b""); + + // Delay read index. + cluster.add_recv_filter_on_node(2, Box::new(EraseHeartbeatContext)); + cluster.add_recv_filter_on_node(3, Box::new(EraseHeartbeatContext)); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_read_index_cmd()], + true, + ); + request.mut_header().set_peer(new_peer(1, 1)); + let (msg, sub) = raftstore_v2::router::PeerMsg::raft_query(request.clone()); + cluster + .sim + .rl() + .async_peer_msg_on_node(1, region.get_id(), msg) + .unwrap(); + + cluster.split_region(®ion, b"a", Callback::None); + + // Enable read index + cluster.clear_recv_filter_on_node(2); + cluster.clear_recv_filter_on_node(3); + + match block_on_timeout( + Box::pin(async { sub.result().await }), + Duration::from_secs(5), + ) { + Ok(Some(QueryResult::Response(resp))) if resp.get_header().has_error() => {} + other => { + panic!("{:?}", other); + } + } +} From 4c7dd8bb18aba3c71584a7e84d8458ab51f382ef Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 4 Jul 2023 18:44:14 +0800 Subject: [PATCH 0772/1149] raftstore-v2: adaptive manual flush rate (#14909) ref tikv/tikv#12842 Lift up manual flush rate when total flush rate is high. Signed-off-by: tabokie Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_panic/src/misc.rs | 6 +- components/engine_rocks/src/misc.rs | 36 ++++++-- components/engine_traits/src/misc.rs | 19 +++- components/raftstore-v2/src/batch/store.rs | 101 +++++++++++++-------- components/raftstore/src/store/config.rs | 3 +- 5 files changed, 110 insertions(+), 55 deletions(-) diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 114dc8a4853..8da5c48d3e6 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -37,7 +37,7 @@ impl MiscExt for PanicEngine { &self, wait: bool, age_threshold: Option, - ) -> Result<()> { + ) -> Result { panic!() } @@ -125,4 +125,8 @@ impl MiscExt for PanicEngine { ) -> Result> { panic!() } + + fn get_accumulated_flush_count_cf(cf: &str) -> Result { + panic!() + } } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index 417431d0ffc..b1406cacdb8 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -9,8 +9,12 @@ use rocksdb::{FlushOptions, Range as RocksRange}; use tikv_util::{box_try, keybuilder::KeyBuilder}; use crate::{ - engine::RocksEngine, r2e, rocks_metrics::RocksStatisticsReporter, rocks_metrics_defs::*, - sst::RocksSstWriterBuilder, util, RocksSstWriter, + engine::RocksEngine, + r2e, + rocks_metrics::{RocksStatisticsReporter, STORE_ENGINE_EVENT_COUNTER_VEC}, + rocks_metrics_defs::*, + sst::RocksSstWriterBuilder, + util, RocksSstWriter, }; pub const MAX_DELETE_COUNT_BY_KEY: usize = 2048; @@ -170,7 +174,7 @@ impl MiscExt for RocksEngine { &self, wait: bool, age_threshold: Option, - ) -> Result<()> { + ) -> Result { let cfs = self.cf_names(); let mut handles = Vec::with_capacity(cfs.len()); for cf in cfs { @@ -191,12 +195,13 @@ impl MiscExt for RocksEngine { fopts.set_allow_write_stall(true); fopts.set_check_if_compaction_disabled(true); fopts.set_expected_oldest_key_time(time); - return self + self .as_inner() .flush_cf(handle, &fopts) - .map_err(r2e); + .map_err(r2e)?; + return Ok(true); } - Ok(()) + Ok(false) } fn delete_ranges_cf( @@ -436,6 +441,13 @@ impl MiscExt for RocksEngine { .as_inner() .get_approximate_active_memtable_stats_cf(handle)) } + + fn get_accumulated_flush_count_cf(cf: &str) -> Result { + let n = STORE_ENGINE_EVENT_COUNTER_VEC + .with_label_values(&["kv", cf, "flush"]) + .get(); + Ok(n) + } } #[cfg(test)] @@ -790,16 +802,20 @@ mod tests { assert_eq!(db.get_total_sst_files_size_cf("write").unwrap().unwrap(), 0); assert_eq!(db.get_total_sst_files_size_cf("lock").unwrap().unwrap(), 0); let now = std::time::SystemTime::now(); - db.flush_oldest_cf(true, Some(now - std::time::Duration::from_secs(5))) - .unwrap(); + assert!( + !db.flush_oldest_cf(true, Some(now - std::time::Duration::from_secs(5))) + .unwrap() + ); assert_eq!( db.get_total_sst_files_size_cf("default").unwrap().unwrap(), 0 ); assert_eq!(db.get_total_sst_files_size_cf("write").unwrap().unwrap(), 0); assert_eq!(db.get_total_sst_files_size_cf("lock").unwrap().unwrap(), 0); - db.flush_oldest_cf(true, Some(now - std::time::Duration::from_secs(1))) - .unwrap(); + assert!( + db.flush_oldest_cf(true, Some(now - std::time::Duration::from_secs(1))) + .unwrap() + ); assert_eq!(db.get_total_sst_files_size_cf("write").unwrap().unwrap(), 0); assert_eq!(db.get_total_sst_files_size_cf("lock").unwrap().unwrap(), 0); assert!(db.get_total_sst_files_size_cf("default").unwrap().unwrap() > 0); diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 4494c32a356..7871b3b8ecc 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -75,11 +75,9 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt + WriteBatchExt { fn flush_cf(&self, cf: &str, wait: bool) -> Result<()>; - fn flush_oldest_cf( - &self, - wait: bool, - age_threshold: Option, - ) -> Result<()>; + /// Returns `false` if all memtables are created after `threshold`. + fn flush_oldest_cf(&self, wait: bool, threshold: Option) + -> Result; /// Returns whether there's data written through kv interface. fn delete_ranges_cfs( @@ -169,4 +167,15 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt + WriteBatchExt { } false } + + // Global method. + fn get_accumulated_flush_count_cf(cf: &str) -> Result; + + fn get_accumulated_flush_count() -> Result { + let mut n = 0; + for cf in crate::ALL_CFS { + n += Self::get_accumulated_flush_count_cf(cf)?; + } + Ok(n) + } } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 828525f688c..2c24fe53631 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -21,6 +21,7 @@ use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType, WithIoType}; +use futures::compat::Future01CompatExt; use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; use pd_client::PdClient; use raft::{StateRole, INVALID_ID}; @@ -47,7 +48,7 @@ use tikv_util::{ log::SlogFormat, sys::SysQuota, time::{duration_to_sec, Instant as TiInstant, Limiter}, - timer::SteadyTimer, + timer::{SteadyTimer, GLOBAL_TIMER_HANDLE}, worker::{Builder, LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, Either, @@ -65,8 +66,8 @@ use crate::{ Error, Result, }; -const MIN_MANUAL_FLUSH_RATE: f64 = 0.3; -const MAX_MANUAL_FLUSH_PERIOD: Duration = Duration::from_secs(60); +const MIN_MANUAL_FLUSH_RATE: f64 = 0.2; +const MAX_MANUAL_FLUSH_PERIOD: Duration = Duration::from_secs(90); /// A per-thread context shared by the [`StoreFsm`] and multiple [`PeerFsm`]s. pub struct StoreContext { @@ -662,45 +663,71 @@ impl StoreSystem { let logger = self.logger.clone(); let router = router.clone(); let registry = tablet_registry.clone(); - let limiter = Limiter::new(MIN_MANUAL_FLUSH_RATE); - let mut max_rate = cfg.value().max_manual_flush_rate; - if max_rate < MIN_MANUAL_FLUSH_RATE { - max_rate = MIN_MANUAL_FLUSH_RATE; - } - worker.spawn_interval_task(cfg.value().raft_engine_purge_interval.0, move || { - let _guard = WithIoType::new(IoType::RewriteLog); - match raft_clone.manual_purge() { - Ok(mut regions) => { - if regions.is_empty() { - return; + let base_max_rate = cfg + .value() + .max_manual_flush_rate + .clamp(MIN_MANUAL_FLUSH_RATE, f64::INFINITY); + let mut last_flush = ( + EK::get_accumulated_flush_count().unwrap(), + TiInstant::now_coarse(), + ); + worker.spawn_interval_async_task(cfg.value().raft_engine_purge_interval.0, move || { + let regions = { + let _guard = WithIoType::new(IoType::RewriteLog); + match raft_clone.manual_purge() { + Err(e) => { + warn!(logger, "purge expired files"; "err" => %e); + Vec::new() + } + Ok(regions) => regions, + } + }; + // Lift up max rate if the background flush rate is high. + let flush_count = EK::get_accumulated_flush_count().unwrap(); + let now = TiInstant::now_coarse(); + let duration = now.saturating_duration_since(last_flush.1).as_secs_f64(); + let max_rate = if duration > 10.0 { + let total_flush_rate = (flush_count - last_flush.0) as f64 / duration; + last_flush = (flush_count, now); + base_max_rate.clamp(total_flush_rate, f64::INFINITY) + } else { + base_max_rate + }; + // Try to finish flush just in time. + let rate = regions.len() as f64 / MAX_MANUAL_FLUSH_PERIOD.as_secs_f64(); + let rate = rate.clamp(MIN_MANUAL_FLUSH_RATE, max_rate); + // Return early if there're too many regions. Otherwise even if we manage to + // compact regions, the space can't be reclaimed in time. + let mut to_flush = (rate * MAX_MANUAL_FLUSH_PERIOD.as_secs_f64()) as usize; + // Skip tablets that are flushed elsewhere. + let threshold = std::time::SystemTime::now() - MAX_MANUAL_FLUSH_PERIOD; + for r in ®ions { + let _ = router.send(*r, PeerMsg::ForceCompactLog); + } + let registry = registry.clone(); + let logger = logger.clone(); + let limiter = Limiter::new(rate); + async move { + for r in regions { + if to_flush == 0 { + break; } - warn!(logger, "flushing oldest cf of regions {regions:?}"); - // Try to finish flush in 1m. - let rate = regions.len() as f64 / MAX_MANUAL_FLUSH_PERIOD.as_secs_f64(); - let rate = rate.clamp(MIN_MANUAL_FLUSH_RATE, max_rate); - limiter.set_speed_limit(rate); - // Return early if there're too many regions. - regions.truncate((rate * MAX_MANUAL_FLUSH_PERIOD.as_secs_f64()) as usize); - // Skip tablets that are flushed elsewhere. - let threshold = std::time::SystemTime::now() - Duration::from_secs(60 * 2); - for r in regions { - let _ = router.send(r, PeerMsg::ForceCompactLog); - if let Some(mut t) = registry.get(r) - && let Some(t) = t.latest() - { - if let Err(e) = t.flush_oldest_cf(true, Some(threshold)) { - warn!(logger, "failed to flush oldest cf"; "err" => %e); + if let Some(mut t) = registry.get(r) + && let Some(t) = t.latest() + { + match t.flush_oldest_cf(true, Some(threshold)) { + Err(e) => warn!(logger, "failed to flush oldest cf"; "err" => %e), + Ok(true) => { + to_flush -= 1; + let time = + std::time::Instant::now() + limiter.consume_duration(1); + let _ = GLOBAL_TIMER_HANDLE.delay(time).compat().await; } - } else { - continue; + _ => (), } - std::thread::sleep(limiter.consume_duration(1)); } } - Err(e) => { - warn!(logger, "purge expired files"; "err" => %e); - } - }; + } }); Some(worker) } else { diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 22c389099ab..46eda405767 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -94,7 +94,6 @@ pub struct Config { pub raft_log_reserve_max_ticks: usize, // Old logs in Raft engine needs to be purged peridically. pub raft_engine_purge_interval: ReadableDuration, - // TODO: make it auto adjusted based on background flush rate. #[doc(hidden)] #[online_config(hidden)] pub max_manual_flush_rate: f64, @@ -396,7 +395,7 @@ impl Default for Config { raft_log_gc_size_limit: None, raft_log_reserve_max_ticks: 6, raft_engine_purge_interval: ReadableDuration::secs(10), - max_manual_flush_rate: 1.0, + max_manual_flush_rate: 2.0, raft_entry_cache_life_time: ReadableDuration::secs(30), raft_reject_transfer_leader_duration: ReadableDuration::secs(3), split_region_check_tick_interval: ReadableDuration::secs(10), From 337582cfbb993bcde033f087dfafef77934f2514 Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 5 Jul 2023 09:14:13 +0800 Subject: [PATCH 0773/1149] cdc: support EventFeedV2 RPC and introduce stream multiplexing (#14923) close tikv/tikv#14967 Signed-off-by: qupeng Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 12 +- components/cdc/src/delegate.rs | 4 + components/cdc/src/endpoint.rs | 729 ++++++++++++------ components/cdc/src/initializer.rs | 4 +- components/cdc/src/service.rs | 522 ++++++++----- .../cdc/tests/failpoints/test_endpoint.rs | 46 +- .../cdc/tests/failpoints/test_observe.rs | 63 +- components/cdc/tests/integrations/test_cdc.rs | 5 +- components/cdc/tests/mod.rs | 36 +- 9 files changed, 961 insertions(+), 460 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13d4f1726e4..c84f3230d46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2966,12 +2966,12 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#b8e6dcdd1030c1705883c1e3d41970fce62f5e46" +source = "git+https://github.com/pingcap/kvproto.git#05895f97d510500cb6651421f3d54efa8e1d6415" dependencies = [ "futures 0.3.15", "grpcio", "protobuf", - "protobuf-build 0.13.0", + "protobuf-build 0.14.1", "raft-proto", ] @@ -4402,7 +4402,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b2be70fa994657539e3c872cc54363c9bf28b0d7a7f774df70e9fd760df3bc4" dependencies = [ "bitflags", - "grpcio-compiler", "protobuf", "protobuf-codegen", "regex", @@ -4410,11 +4409,12 @@ dependencies = [ [[package]] name = "protobuf-build" -version = "0.14.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb3c02f54ecaf12572c1a60dbdb36b1f8f713a16105881143f2be84cca5bbe3" +checksum = "2df9942df2981178a930a72d442de47e2f0df18ad68e50a30f816f1848215ad0" dependencies = [ "bitflags", + "grpcio-compiler", "protobuf", "protobuf-codegen", "regex", @@ -4533,7 +4533,7 @@ source = "git+https://github.com/tikv/raft-rs?branch=master#f73766712a538c2f6eb1 dependencies = [ "bytes", "protobuf", - "protobuf-build 0.14.0", + "protobuf-build 0.14.1", ] [[package]] diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index a03eaae7ef7..7eb45480163 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -221,6 +221,9 @@ impl Downstream { pub fn get_conn_id(&self) -> ConnId { self.conn_id } + pub fn get_req_id(&self) -> u64 { + self.req_id + } } #[derive(Default)] @@ -710,6 +713,7 @@ impl Delegate { let event = Event { region_id, + request_id: downstream.get_req_id(), index, event: Some(Event_oneof_event::Entries(EventEntries { entries: entries_clone.into(), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index eb6f488cdb9..9cd7367c7ca 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -61,7 +61,7 @@ use crate::{ initializer::Initializer, metrics::*, old_value::{OldValueCache, OldValueCallback}, - service::{Conn, ConnId, FeatureGate}, + service::{validate_kv_api, Conn, ConnId, FeatureGate}, CdcObserver, Error, }; @@ -74,10 +74,16 @@ const WARN_RESOLVED_TS_LAG_THRESHOLD: Duration = Duration::from_secs(600); const WARN_RESOLVED_TS_COUNT_THRESHOLD: usize = 10; pub enum Deregister { + Conn(ConnId), + Request { + conn_id: ConnId, + request_id: u64, + }, Downstream { + conn_id: ConnId, + request_id: u64, region_id: u64, downstream_id: DownstreamId, - conn_id: ConnId, err: Option, }, Delegate { @@ -85,7 +91,6 @@ pub enum Deregister { observe_id: ObserveId, err: Error, }, - Conn(ConnId), } impl_display_as_debug!(Deregister); @@ -94,16 +99,30 @@ impl fmt::Debug for Deregister { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut de = f.debug_struct("Deregister"); match self { + Deregister::Conn(ref conn_id) => de + .field("deregister", &"conn") + .field("conn_id", conn_id) + .finish(), + Deregister::Request { + ref conn_id, + ref request_id, + } => de + .field("deregister", &"request") + .field("conn_id", conn_id) + .field("request_id", request_id) + .finish(), Deregister::Downstream { + ref conn_id, + ref request_id, ref region_id, ref downstream_id, - ref conn_id, ref err, } => de .field("deregister", &"downstream") + .field("conn_id", conn_id) + .field("request_id", request_id) .field("region_id", region_id) .field("downstream_id", downstream_id) - .field("conn_id", conn_id) .field("err", err) .finish(), Deregister::Delegate { @@ -116,10 +135,6 @@ impl fmt::Debug for Deregister { .field("observe_id", observe_id) .field("err", err) .finish(), - Deregister::Conn(ref conn_id) => de - .field("deregister", &"conn") - .field("conn_id", conn_id) - .finish(), } } } @@ -136,12 +151,16 @@ pub enum Task { request: ChangeDataRequest, downstream: Downstream, conn_id: ConnId, - version: semver::Version, }, Deregister(Deregister), OpenConn { conn: Conn, }, + SetConnVersion { + conn_id: ConnId, + version: semver::Version, + explicit_features: Vec<&'static str>, + }, MultiBatch { multi: Vec, old_value_cb: OldValueCallback, @@ -188,7 +207,6 @@ impl fmt::Debug for Task { ref request, ref downstream, ref conn_id, - ref version, .. } => de .field("type", &"register") @@ -196,7 +214,6 @@ impl fmt::Debug for Task { .field("request", request) .field("id", &downstream.get_id()) .field("conn_id", conn_id) - .field("version", version) .finish(), Task::Deregister(deregister) => de .field("type", &"deregister") @@ -206,6 +223,16 @@ impl fmt::Debug for Task { .field("type", &"open_conn") .field("conn_id", &conn.get_id()) .finish(), + Task::SetConnVersion { + ref conn_id, + ref version, + ref explicit_features, + } => de + .field("type", &"set_conn_version") + .field("conn_id", conn_id) + .field("version", version) + .field("explicit_features", explicit_features) + .finish(), Task::MultiBatch { multi, .. } => de .field("type", &"multi_batch") .field("multi_batch", &multi.len()) @@ -513,94 +540,94 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, + ) { + let mut delegate = match self.capture_regions.entry(region_id) { + HashMapEntry::Vacant(_) => return, + HashMapEntry::Occupied(x) => x, + }; + if delegate.get_mut().unsubscribe(downstream_id, err) { + let observe_id = delegate.get().handle.id; + delegate.remove(); + self.deregister_observe(region_id, observe_id); + } + } + + fn deregister_observe(&mut self, region_id: u64, observe_id: ObserveId) { + let oid = self.observer.unsubscribe_region(region_id, observe_id); + assert!( + oid.is_some(), + "unsubscribe region {} failed, ObserveId {:?}", + region_id, + observe_id, + ); + } + fn on_deregister(&mut self, deregister: Deregister) { info!("cdc deregister"; "deregister" => ?deregister); fail_point!("cdc_before_handle_deregister", |_| {}); match deregister { + Deregister::Conn(conn_id) => { + let conn = self.connections.remove(&conn_id).unwrap(); + conn.iter_downstreams(|_, region_id, downstream_id, _| { + self.deregister_downstream(region_id, downstream_id, None); + }); + } + Deregister::Request { + conn_id, + request_id, + } => { + let conn = self.connections.get_mut(&conn_id).unwrap(); + for (region, downstream) in conn.unsubscribe_request(request_id) { + self.deregister_downstream(region, downstream, None); + } + } Deregister::Downstream { + conn_id, + request_id, region_id, downstream_id, - conn_id, err, } => { - // The downstream wants to deregister - let mut is_last = false; - if let Some(delegate) = self.capture_regions.get_mut(®ion_id) { - is_last = delegate.unsubscribe(downstream_id, err); - } - if let Some(conn) = self.connections.get_mut(&conn_id) { - if let Some(id) = conn.downstream_id(region_id) { - if downstream_id == id { - conn.unsubscribe(region_id); - } + let conn = match self.connections.get_mut(&conn_id) { + Some(conn) => conn, + None => return, + }; + if let Some(new_downstream_id) = conn.get_downstream(request_id, region_id) { + // To avoid ABA problem, we must check the unique DownstreamId. + if new_downstream_id == downstream_id { + conn.unsubscribe(request_id, region_id); + self.deregister_downstream(region_id, downstream_id, err); } } - if is_last { - let delegate = self.capture_regions.remove(®ion_id).unwrap(); - // Do not continue to observe the events of the region. - let id = delegate.handle.id; - let oid = self.observer.unsubscribe_region(region_id, id); - assert!( - oid.is_some(), - "unsubscribe region {} failed, ObserveId {:?}", - region_id, - id - ); - } } Deregister::Delegate { region_id, observe_id, err, } => { - // Something went wrong, deregister all downstreams of the region. - - // To avoid ABA problem, we must check the unique ObserveId. - let need_remove = self - .capture_regions - .get(®ion_id) - .map_or(false, |d| d.handle.id == observe_id); - if need_remove { - if let Some(mut delegate) = self.capture_regions.remove(®ion_id) { - delegate.stop(err); + let mut delegate = match self.capture_regions.entry(region_id) { + HashMapEntry::Vacant(_) => return, + HashMapEntry::Occupied(x) => { + // To avoid ABA problem, we must check the unique ObserveId. + if x.get().handle.id != observe_id { + return; + } + x.remove() + } + }; + delegate.stop(err); + for downstream in delegate.downstreams() { + let request_id = downstream.get_req_id(); + for conn in &mut self.connections.values_mut() { + conn.unsubscribe(request_id, region_id); } - self.connections - .iter_mut() - .for_each(|(_, conn)| conn.unsubscribe(region_id)); - } - // Do not continue to observe the events of the region. - let oid = self.observer.unsubscribe_region(region_id, observe_id); - assert_eq!( - need_remove, - oid.is_some(), - "unsubscribe region {} failed, ObserveId {:?}", - region_id, - observe_id - ); - } - Deregister::Conn(conn_id) => { - // The connection is closed, deregister all downstreams of the connection. - if let Some(conn) = self.connections.remove(&conn_id) { - conn.take_downstreams().into_iter().for_each( - |(region_id, (downstream_id, _))| { - if let Some(delegate) = self.capture_regions.get_mut(®ion_id) { - delegate.unsubscribe(downstream_id, None); - if delegate.downstreams().is_empty() { - let delegate = self.capture_regions.remove(®ion_id).unwrap(); - // Do not continue to observe the events of the region. - let id = delegate.handle.id; - let oid = self.observer.unsubscribe_region(region_id, id); - assert!( - oid.is_some(), - "unsubscribe region {} failed, ObserveId {:?}", - region_id, - id - ); - } - } - }, - ); } + self.deregister_observe(region_id, delegate.handle.id); } } } @@ -610,33 +637,36 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint= FeatureGate::validate_cluster_id() && self.cluster_id != request_cluster_id { - let mut err_event = EventError::default(); - let mut err = ErrorClusterIdMismatch::default(); - err.set_current(self.cluster_id); - err.set_request(request_cluster_id); - err_event.set_cluster_id_mismatch(err); - - let _ = downstream.sink_error_event(region_id, err_event); - return; + // Check if the cluster id matches if supported. + if conn.features().contains(FeatureGate::VALIDATE_CLUSTER_ID) { + let request_cluster_id = request.get_header().get_cluster_id(); + if self.cluster_id != request_cluster_id { + let mut err_event = EventError::default(); + let mut err = ErrorClusterIdMismatch::default(); + err.set_current(self.cluster_id); + err.set_request(request_cluster_id); + err_event.set_cluster_id_mismatch(err); + + let _ = downstream.sink_error_event(region_id, err_event); + return; + } } - if !FeatureGate::validate_kv_api(kv_api, api_version) { + if !validate_kv_api(kv_api, api_version) { error!("cdc RawKv is supported by api-version 2 only. TxnKv is not supported now."); let mut err_event = EventError::default(); let mut err = ErrorCompatibility::default(); @@ -656,16 +686,10 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint region_id, "conn_id" => ?conn_id, - "req_id" => request.get_request_id(), + "req_id" => request_id, "downstream_id" => ?downstream_id); return; } @@ -692,7 +716,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint region_id, "conn_id" => ?conn.get_id(), - "req_id" => request.get_request_id(), + "req_id" => request_id, "observe_id" => ?observe_id, "downstream_id" => ?downstream_id); @@ -704,9 +728,9 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint { CDC_SCAN_TASKS.with_label_values(&["abort"]).inc(); - error!("cdc initialize fail: {}", e; "region_id" => region_id); + error!( + "cdc initialize fail: {}", e; "region_id" => region_id, + "conn_id" => ?init.conn_id, "request_id" => init.request_id, + ); init.deregister_downstream(e) } } @@ -809,9 +836,10 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint) { - let min_resolved_ts = min_resolved_ts.into_inner(); - let send_cdc_event = |regions: &HashSet, min_resolved_ts: u64, conn: &Conn| { - let downstream_regions = conn.get_downstreams(); + let send_cdc_event = |ts: u64, conn: &Conn, request_id: u64, regions: Vec| { let mut resolved_ts = ResolvedTs::default(); - resolved_ts.ts = min_resolved_ts; - resolved_ts.regions = Vec::with_capacity(downstream_regions.len()); - // Only send region ids that are captured by the connection. - for (region_id, (_, downstream_state)) in conn.get_downstreams() { - if regions.contains(region_id) && downstream_state.load().ready_for_advancing_ts() { - resolved_ts.regions.push(*region_id); - } - } - if resolved_ts.regions.is_empty() { - // Skip empty resolved ts message. - return; - } - // No need force send, as resolved ts messages is sent regularly. - // And errors can be ignored. + resolved_ts.ts = ts; + resolved_ts.request_id = request_id; + *resolved_ts.mut_regions() = regions; + let force_send = false; match conn .get_sink() @@ -939,64 +955,88 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint>::default(); + // one_way is fro STREAM_MULTIPLEXING disabled. + let mut one_way = HashMap::, Vec)>::default(); + for region_id in ®ions { + let d = match self.capture_regions.get(region_id) { + Some(d) => d, + None => continue, }; + for downstream in d.downstreams() { + if !downstream.get_state().load().ready_for_advancing_ts() { + continue; + } + let conn_id = downstream.get_conn_id(); + let features = self.connections.get(&conn_id).unwrap().features(); + if features.contains(FeatureGate::STREAM_MULTIPLEXING) { + multiplexing + .entry((conn_id, downstream.get_req_id())) + .or_insert_with(Default::default) + .push(*region_id); + } else { + let x = one_way.entry(conn_id).or_insert_with(Default::default); + x.0.push(downstream.get_req_id()); + x.1.push(*region_id); + } + } + } - if features.contains(FeatureGate::BATCH_RESOLVED_TS) { - send_cdc_event(®ions, min_resolved_ts, conn); + let min_resolved_ts = min_resolved_ts.into_inner(); + + for ((conn_id, request_id), regions) in multiplexing { + let conn = self.connections.get(&conn_id).unwrap(); + if conn.features().contains(FeatureGate::BATCH_RESOLVED_TS) { + send_cdc_event(min_resolved_ts, conn, request_id, regions); } else { - // Fallback to previous non-batch resolved ts event. - for region_id in ®ions { - self.broadcast_resolved_ts_compact(*region_id, min_resolved_ts, conn); + for region_id in regions { + self.broadcast_resolved_ts_compact( + conn, + request_id, + region_id, + min_resolved_ts, + ); + } + } + } + for (conn_id, reqs_regions) in one_way { + let conn = self.connections.get(&conn_id).unwrap(); + if conn.features().contains(FeatureGate::BATCH_RESOLVED_TS) { + send_cdc_event(min_resolved_ts, conn, 0, reqs_regions.1); + } else { + for i in 0..reqs_regions.0.len() { + self.broadcast_resolved_ts_compact( + conn, + reqs_regions.0[i], + reqs_regions.1[i], + min_resolved_ts, + ); } } } } - fn broadcast_resolved_ts_compact(&self, region_id: u64, resolved_ts: u64, conn: &Conn) { - let downstream_id = match conn.downstream_id(region_id) { - Some(downstream_id) => downstream_id, - // No such region registers in the connection. - None => { - debug!("cdc send resolved ts failed, no region downstream id found"; - "region_id" => region_id); - return; - } - }; - let delegate = match self.capture_regions.get(®ion_id) { - Some(delegate) => delegate, - // No such region registers in the endpoint. - None => { - info!("cdc send resolved ts failed, no region delegate found"; - "region_id" => region_id, "downstream_id" => ?downstream_id); - return; - } - }; - let downstream = match delegate.downstream(downstream_id) { - Some(downstream) => downstream, - // No such downstream registers in the delegate. - None => { - info!("cdc send resolved ts failed, no region downstream found"; - "region_id" => region_id, "downstream_id" => ?downstream_id); - return; - } - }; + fn broadcast_resolved_ts_compact( + &self, + conn: &Conn, + request_id: u64, + region_id: u64, + resolved_ts: u64, + ) { + let downstream_id = conn.get_downstream(request_id, region_id).unwrap(); + let delegate = self.capture_regions.get(®ion_id).unwrap(); + let downstream = delegate.downstream(downstream_id).unwrap(); if !downstream.get_state().load().ready_for_advancing_ts() { - // Only send resolved timestamp if the downstream is ready. return; } let resolved_ts_event = Event { region_id, + request_id, event: Some(Event_oneof_event::ResolvedTs(resolved_ts)), ..Default::default() }; - // No need force send, as resolved ts messages is sent regularly. - // And errors can be ignored. let force_send = false; let _ = downstream.sink_event(resolved_ts_event, force_send); } @@ -1107,6 +1147,16 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, + ) { + let conn = self.connections.get_mut(&conn_id).unwrap(); + conn.check_version_and_set_feature(version, explicit_features); + } } impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable @@ -1127,8 +1177,7 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable request, downstream, conn_id, - version, - } => self.on_register(request, downstream, conn_id, version), + } => self.on_register(request, downstream, conn_id), Task::ResolverReady { observe_id, resolver, @@ -1140,6 +1189,13 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable old_value_cb, } => self.on_multi_batch(multi, old_value_cb), Task::OpenConn { conn } => self.on_open_conn(conn), + Task::SetConnVersion { + conn_id, + version, + explicit_features, + } => { + self.on_set_conn_version(conn_id, version, explicit_features); + } Task::RegisterMinTsEvent { leader_resolver, event_time, @@ -1285,6 +1341,14 @@ mod tests { recv_timeout, }; + fn set_conn_verion_task(conn_id: ConnId, version: semver::Version) -> Task { + Task::SetConnVersion { + conn_id, + version, + explicit_features: vec![], + } + } + struct TestEndpointSuite { // The order must ensure `endpoint` be dropped before other fields. endpoint: Endpoint, RocksEngine, StoreMeta>, @@ -1419,13 +1483,17 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + suite.run(set_conn_verion_task( + conn_id, + FeatureGate::batch_resolved_ts(), + )); + let mut req_header = Header::default(); req_header.set_cluster_id(0); let mut req = ChangeDataRequest::default(); req.set_region_id(1); req.set_kv_api(ChangeDataRequestKvApi::TiDb); let region_epoch = req.get_region_epoch().clone(); - let version = FeatureGate::batch_resolved_ts(); // Compatibility error. let downstream = Downstream::new( @@ -1442,7 +1510,6 @@ mod tests { request: req.clone(), downstream, conn_id, - version: version.clone(), }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() @@ -1479,7 +1546,6 @@ mod tests { request: req.clone(), downstream, conn_id, - version: version.clone(), }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() @@ -1517,7 +1583,6 @@ mod tests { request: req, downstream, conn_id, - version, }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() @@ -1678,6 +1743,8 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + suite.run(set_conn_verion_task(conn_id, semver::Version::new(0, 0, 0))); + let mut req_header = Header::default(); req_header.set_cluster_id(0); let mut req = ChangeDataRequest::default(); @@ -1696,7 +1763,6 @@ mod tests { request: req, downstream, conn_id, - version: semver::Version::new(0, 0, 0), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); @@ -1726,10 +1792,16 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_verion_task(conn_id, version)); + let mut req_header = Header::default(); req_header.set_cluster_id(0); let mut req = ChangeDataRequest::default(); req.set_region_id(1); + req.set_request_id(1); let region_epoch = req.get_region_epoch().clone(); let downstream = Downstream::new( "".to_string(), @@ -1740,13 +1812,10 @@ mod tests { false, ObservedRange::default(), ); - // Enable batch resolved ts in the test. - let version = FeatureGate::batch_resolved_ts(); suite.run(Task::Register { request: req.clone(), downstream, conn_id, - version: version.clone(), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); suite @@ -1755,10 +1824,11 @@ mod tests { .unwrap_err(); // duplicate request error. + req.set_request_id(1); let downstream = Downstream::new( "".to_string(), - region_epoch.clone(), - 2, + region_epoch, + 1, conn_id, ChangeDataRequestKvApi::TiDb, false, @@ -1768,14 +1838,13 @@ mod tests { request: req.clone(), downstream, conn_id, - version: version.clone(), }); let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) .unwrap() .unwrap(); if let CdcEvent::Event(mut e) = cdc_event.0 { assert_eq!(e.region_id, 1); - assert_eq!(e.request_id, 2); + assert_eq!(e.request_id, 1); let event = e.event.take().unwrap(); match event { Event_oneof_event::Error(err) => { @@ -1792,45 +1861,6 @@ mod tests { .recv_timeout(Duration::from_millis(100)) .unwrap_err(); - // Compatibility error. - let downstream = Downstream::new( - "".to_string(), - region_epoch, - 3, - conn_id, - ChangeDataRequestKvApi::TiDb, - false, - ObservedRange::default(), - ); - suite.run(Task::Register { - request: req, - downstream, - conn_id, - // The version that does not support batch resolved ts. - version: semver::Version::new(0, 0, 0), - }); - let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) - .unwrap() - .unwrap(); - if let CdcEvent::Event(mut e) = cdc_event.0 { - assert_eq!(e.region_id, 1); - assert_eq!(e.request_id, 3); - let event = e.event.take().unwrap(); - match event { - Event_oneof_event::Error(err) => { - assert!(err.has_compatibility()); - } - other => panic!("unknown event {:?}", other), - } - } else { - panic!("unknown cdc event {:?}", cdc_event); - } - assert_eq!(suite.endpoint.capture_regions.len(), 1); - suite - .task_rx - .recv_timeout(Duration::from_millis(100)) - .unwrap_err(); - // The first scan task of a region is initiated in register, and when it // fails, it should send a deregister region task, otherwise the region // delegate does not have resolver. @@ -1838,6 +1868,7 @@ mod tests { // Test non-exist region in raft router. let mut req = ChangeDataRequest::default(); req.set_region_id(100); + req.set_request_id(1); let region_epoch = req.get_region_epoch().clone(); let downstream = Downstream::new( "".to_string(), @@ -1853,7 +1884,6 @@ mod tests { request: req.clone(), downstream, conn_id, - version: version.clone(), }); // Region 100 is inserted into capture_regions. assert_eq!(suite.endpoint.capture_regions.len(), 2); @@ -1871,6 +1901,7 @@ mod tests { // Test errors on CaptureChange message. req.set_region_id(101); + req.set_request_id(1); suite.add_region(101, 100); let downstream = Downstream::new( "".to_string(), @@ -1885,7 +1916,6 @@ mod tests { request: req, downstream, conn_id, - version, }); // Drop CaptureChange message, it should cause scan task failure. let timeout = Duration::from_millis(100); @@ -1944,6 +1974,11 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_verion_task(conn_id, version)); + let mut req_header = Header::default(); req_header.set_cluster_id(0); let mut req = ChangeDataRequest::default(); @@ -1959,13 +1994,10 @@ mod tests { ObservedRange::default(), ); downstream.get_state().store(DownstreamState::Normal); - // Enable batch resolved ts in the test. - let version = FeatureGate::batch_resolved_ts(); suite.run(Task::Register { request: req.clone(), downstream, conn_id, - version: version.clone(), }); let resolver = Resolver::new(1); let observe_id = suite.endpoint.capture_regions[&1].handle.id; @@ -2002,7 +2034,6 @@ mod tests { request: req.clone(), downstream, conn_id, - version, }); let resolver = Resolver::new(2); region.set_id(2); @@ -2033,7 +2064,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + suite.run(set_conn_verion_task(conn_id, semver::Version::new(4, 0, 5))); + req.set_region_id(3); + req.set_request_id(3); let downstream = Downstream::new( "".to_string(), region_epoch, @@ -2049,7 +2083,6 @@ mod tests { request: req, downstream, conn_id, - version: semver::Version::new(4, 0, 5), }); let resolver = Resolver::new(3); region.set_id(3); @@ -2101,6 +2134,8 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + suite.run(set_conn_verion_task(conn_id, semver::Version::new(0, 0, 0))); + let mut req_header = Header::default(); req_header.set_cluster_id(0); let mut req = ChangeDataRequest::default(); @@ -2120,16 +2155,16 @@ mod tests { request: req.clone(), downstream, conn_id, - version: semver::Version::new(0, 0, 0), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); let mut err_header = ErrorHeader::default(); err_header.set_not_leader(Default::default()); let deregister = Deregister::Downstream { + conn_id, + request_id: 0, region_id: 1, downstream_id, - conn_id, err: Some(Error::request(err_header.clone())), }; suite.run(Task::Deregister(deregister)); @@ -2164,14 +2199,14 @@ mod tests { request: req.clone(), downstream, conn_id, - version: semver::Version::new(0, 0, 0), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); let deregister = Deregister::Downstream { + conn_id, + request_id: 0, region_id: 1, downstream_id, - conn_id, err: Some(Error::request(err_header.clone())), }; suite.run(Task::Deregister(deregister)); @@ -2179,9 +2214,10 @@ mod tests { assert_eq!(suite.endpoint.capture_regions.len(), 1); let deregister = Deregister::Downstream { + conn_id, + request_id: 0, region_id: 1, downstream_id: new_downstream_id, - conn_id, err: Some(Error::request(err_header.clone())), }; suite.run(Task::Deregister(deregister)); @@ -2216,7 +2252,6 @@ mod tests { request: req, downstream, conn_id, - version: semver::Version::new(0, 0, 0), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); let deregister = Deregister::Delegate { @@ -2251,6 +2286,8 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_verion_task(conn_id, version)); for region_id in region_ids { suite.add_region(region_id, 100); @@ -2273,7 +2310,6 @@ mod tests { request: req.clone(), downstream, conn_id, - version: FeatureGate::batch_resolved_ts(), }); let resolver = Resolver::new(region_id); let observe_id = suite.endpoint.capture_regions[®ion_id].handle.id; @@ -2363,6 +2399,10 @@ mod tests { let conn_a = Conn::new(tx1, String::new()); let conn_id_a = conn_a.get_id(); suite.run(Task::OpenConn { conn: conn_a }); + suite.run(set_conn_verion_task( + conn_id_a, + semver::Version::new(0, 0, 0), + )); // Open conn b let (tx2, mut rx2) = channel::channel(1, quota); @@ -2370,6 +2410,10 @@ mod tests { let conn_b = Conn::new(tx2, String::new()); let conn_id_b = conn_b.get_id(); suite.run(Task::OpenConn { conn: conn_b }); + suite.run(set_conn_verion_task( + conn_id_b, + semver::Version::new(0, 0, 0), + )); // Register region 1 (epoch 2) at conn a. let mut req_header = Header::default(); @@ -2391,7 +2435,6 @@ mod tests { request: req.clone(), downstream, conn_id: conn_id_a, - version: semver::Version::new(0, 0, 0), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); let observe_id = suite.endpoint.capture_regions[&1].handle.id; @@ -2416,7 +2459,6 @@ mod tests { request: req.clone(), downstream, conn_id: conn_id_b, - version: semver::Version::new(0, 0, 0), }); assert_eq!(suite.endpoint.capture_regions.len(), 1); @@ -2522,6 +2564,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_verion_task(conn_id, version)); + let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2536,7 +2582,7 @@ mod tests { let downstream = Downstream::new( "".to_string(), region_epoch.clone(), - id, + 0, conn_id, ChangeDataRequestKvApi::TiDb, false, @@ -2544,13 +2590,10 @@ mod tests { ); on_init_downstream(&downstream.get_state()); post_init_downstream(&downstream.get_state()); - // Enable batch resolved ts in the test. - let version = FeatureGate::batch_resolved_ts(); suite.run(Task::Register { request: req.clone(), downstream, conn_id, - version: version.clone(), }); let mut resolver = Resolver::new(id); @@ -2594,4 +2637,202 @@ mod tests { last_batch_count = event.resolved_ts().regions.len(); } } + + #[test] + fn test_register_deregister_with_multiplexing() { + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(Duration::from_secs(60)), + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); + suite.add_region(1, 100); + let quota = crate::channel::MemoryQuota::new(usize::MAX); + let (tx, mut rx) = channel::channel(1, quota); + let mut rx = rx.drain(); + + let conn = Conn::new(tx, String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_verion_task(conn_id, version)); + + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + let mut req = ChangeDataRequest::default(); + + req.set_region_id(1); + req.set_request_id(1); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 1); + + // Subscribe one region with a different request_id is allowed. + req.set_request_id(2); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 2, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 2); + + // Subscribe one region with a same request_id is not allowed. + req.set_request_id(2); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 2, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 2); + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_duplicate_request() + }) + }); + assert!(check); + + // Deregister an unexist downstream. + suite.run(Task::Deregister(Deregister::Downstream { + conn_id, + request_id: 1, + region_id: 1, + downstream_id: DownstreamId::new(), + err: None, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 2); + + // Deregister an unexist delegate. + suite.run(Task::Deregister(Deregister::Delegate { + region_id: 1, + observe_id: ObserveId::new(), + err: Error::Rocks("test error".to_owned()), + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 2); + + // Deregister an exist downstream. + let downstream_id = suite.capture_regions[&1].downstreams()[0].get_id(); + suite.run(Task::Deregister(Deregister::Downstream { + conn_id, + request_id: 1, + region_id: 1, + downstream_id, + err: Some(Error::Rocks("test error".to_owned())), + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 1); + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_region_not_found() + }) + }); + assert!(check); + + // Subscribe one region with a different request_id is allowed. + req.set_request_id(1); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 2); + + // Deregister an exist delegate. + let observe_id = suite.capture_regions[&1].handle.id; + suite.run(Task::Deregister(Deregister::Delegate { + region_id: 1, + observe_id, + err: Error::Rocks("test error".to_owned()), + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 0); + assert_eq!(suite.capture_regions.len(), 0); + for _ in 0..2 { + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_region_not_found() + }) + }); + assert!(check); + } + + // Resubscribe the region. + for i in 1..=2 { + req.set_request_id(i as _); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + i as _, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), i); + } + + // Deregister the request. + suite.run(Task::Deregister(Deregister::Request { + conn_id, + request_id: 1, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 1); + suite.run(Task::Deregister(Deregister::Request { + conn_id, + request_id: 2, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 0); + assert_eq!(suite.capture_regions.len(), 0); + } } diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 229be748060..d41b7ae2702 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -290,6 +290,7 @@ impl Initializer { self.sink_scan_events(entries, done).await?; } + fail_point!("before_post_incremental_scan"); if !post_init_downstream(&self.downstream_state) { return on_cancel(); } @@ -493,9 +494,10 @@ impl Initializer { } } else { Deregister::Downstream { + conn_id: self.conn_id, + request_id: self.request_id, region_id: self.region_id, downstream_id: self.downstream_id, - conn_id: self.conn_id, err: Some(err), } }; diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index 215f2cdebca..8dc30ec75e0 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -1,24 +1,18 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - collections::hash_map::Entry, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, }; -use collections::HashMap; +use collections::{HashMap, HashMapEntry}; use crossbeam::atomic::AtomicCell; -use futures::{ - future::{self, TryFutureExt}, - sink::SinkExt, - stream::TryStreamExt, -}; -use grpcio::{DuplexSink, Error as GrpcError, RequestStream, RpcContext, RpcStatus, RpcStatusCode}; +use futures::stream::TryStreamExt; +use grpcio::{DuplexSink, RequestStream, RpcContext, RpcStatus, RpcStatusCode}; use kvproto::{ cdcpb::{ - ChangeData, ChangeDataEvent, ChangeDataRequest, ChangeDataRequestKvApi, Compatibility, + ChangeData, ChangeDataEvent, ChangeDataRequest, ChangeDataRequestKvApi, + ChangeDataRequest_oneof_request, }, kvrpcpb::ApiVersion, }; @@ -32,6 +26,11 @@ use crate::{ static CONNECTION_ID_ALLOC: AtomicUsize = AtomicUsize::new(0); +pub fn validate_kv_api(kv_api: ChangeDataRequestKvApi, api_version: ApiVersion) -> bool { + kv_api == ChangeDataRequestKvApi::TiDb + || (kv_api == ChangeDataRequestKvApi::RawKv && api_version == ApiVersion::V2) +} + /// A unique identifier of a Connection. #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] pub struct ConnId(usize); @@ -48,84 +47,88 @@ impl Default for ConnId { } } +// FeatureGate checks whether a feature is enabled or not on client versions. +// +// NOTE: default features can't be disabled by clients. Clients can only enable +// features by specifying GRPC headers. See `EventFeedHeaders`. bitflags::bitflags! { pub struct FeatureGate: u8 { const BATCH_RESOLVED_TS = 0b00000001; - // Uncomment when its ready. - // const LargeTxn = 0b00000010; + const VALIDATE_CLUSTER_ID = 0b00000010; + const STREAM_MULTIPLEXING = 0b00000100; } } impl FeatureGate { - // Returns the first version (v4.0.8) that supports batch resolved ts. - pub fn batch_resolved_ts() -> semver::Version { - semver::Version::new(4, 0, 8) - } - - // Returns the first version (v5.3.0) that supports validate cluster id. - pub(crate) fn validate_cluster_id() -> semver::Version { - semver::Version::new(5, 3, 0) + fn default_features(version: &semver::Version) -> FeatureGate { + let mut features = FeatureGate::empty(); + if *version >= semver::Version::new(4, 0, 8) { + features.set(FeatureGate::BATCH_RESOLVED_TS, true); + } + if *version >= semver::Version::new(5, 3, 0) { + features.set(FeatureGate::VALIDATE_CLUSTER_ID, true); + } + features } - pub(crate) fn validate_kv_api(kv_api: ChangeDataRequestKvApi, api_version: ApiVersion) -> bool { - kv_api == ChangeDataRequestKvApi::TiDb - || (kv_api == ChangeDataRequestKvApi::RawKv && api_version == ApiVersion::V2) + /// Returns the first version (v4.0.8) that supports batch resolved ts. + pub fn batch_resolved_ts() -> semver::Version { + semver::Version::new(4, 0, 8) } } pub struct Conn { id: ConnId, sink: Sink, - // region id -> DownstreamId - downstreams: HashMap>)>, + downstreams: HashMap, peer: String, + + // Set when the connection established, or the first request received. version: Option<(semver::Version, FeatureGate)>, } +#[derive(PartialEq, Eq, Hash)] +struct DownstreamKey { + request_id: u64, + region_id: u64, +} + +#[derive(Clone)] +struct DownstreamValue { + id: DownstreamId, + state: Arc>, +} + impl Conn { pub fn new(sink: Sink, peer: String) -> Conn { Conn { id: ConnId::new(), sink, downstreams: HashMap::default(), - version: None, peer, + version: None, } } - // TODO refactor into Error::Version. - pub fn check_version_and_set_feature(&mut self, ver: semver::Version) -> Option { - match &self.version { - Some((version, _)) => { - if version == &ver { - None - } else { - error!("cdc different version on the same connection"; - "previous version" => ?version, "version" => ?ver, - "downstream" => ?self.peer, "conn_id" => ?self.id); - Some(Compatibility { - required_version: version.to_string(), - ..Default::default() - }) - } - } - None => { - let mut features = FeatureGate::empty(); - if FeatureGate::batch_resolved_ts() <= ver { - features.toggle(FeatureGate::BATCH_RESOLVED_TS); - } - info!("cdc connection version"; - "version" => ver.to_string(), "features" => ?features, "downstream" => ?self.peer); - self.version = Some((ver, features)); - None - } + pub fn check_version_and_set_feature( + &mut self, + version: semver::Version, + explicit_features: Vec<&'static str>, + ) { + let mut features = FeatureGate::default_features(&version); + if explicit_features.contains(&EventFeedHeaders::STREAM_MULTIPLEXING) { + features.set(FeatureGate::STREAM_MULTIPLEXING, true); + } else { + // NOTE: we can handle more explicit features here. + } + + if self.version.replace((version, features)).is_some() { + panic!("should never be some"); } - // Return Err(Compatibility) when TiKV reaches the next major release, - // so that we can remove feature gates. } - pub fn get_feature(&self) -> Option<&FeatureGate> { - self.version.as_ref().map(|(_, f)| f) + pub fn features(&self) -> &FeatureGate { + self.version.as_ref().map(|(_, f)| f).unwrap() } pub fn get_peer(&self) -> &str { @@ -136,43 +139,102 @@ impl Conn { self.id } - pub fn get_downstreams( - &self, - ) -> &HashMap>)> { - &self.downstreams - } - - pub fn take_downstreams( - self, - ) -> HashMap>)> { - self.downstreams - } - pub fn get_sink(&self) -> &Sink { &self.sink } + pub fn get_downstream(&self, request_id: u64, region_id: u64) -> Option { + let key = DownstreamKey { + request_id, + region_id, + }; + self.downstreams.get(&key).map(|v| v.id) + } + pub fn subscribe( &mut self, + request_id: u64, region_id: u64, downstream_id: DownstreamId, downstream_state: Arc>, - ) -> bool { - match self.downstreams.entry(region_id) { - Entry::Occupied(_) => false, - Entry::Vacant(v) => { - v.insert((downstream_id, downstream_state)); - true + ) -> Option { + let key = DownstreamKey { + request_id, + region_id, + }; + match self.downstreams.entry(key) { + HashMapEntry::Occupied(value) => Some(value.get().id), + HashMapEntry::Vacant(v) => { + v.insert(DownstreamValue { + id: downstream_id, + state: downstream_state, + }); + None + } + } + } + + pub fn unsubscribe(&mut self, request_id: u64, region_id: u64) -> Option { + let key = DownstreamKey { + request_id, + region_id, + }; + self.downstreams.remove(&key).map(|value| value.id) + } + + pub fn unsubscribe_request(&mut self, request_id: u64) -> Vec<(u64, DownstreamId)> { + let mut downstreams = Vec::new(); + self.downstreams.retain(|key, value| -> bool { + if key.request_id == request_id { + downstreams.push((key.region_id, value.id)); + return false; } + true + }); + downstreams + } + + pub fn iter_downstreams(&self, mut f: F) + where + F: FnMut(u64, u64, DownstreamId, &Arc>), + { + for (key, value) in &self.downstreams { + f(key.request_id, key.region_id, value.id, &value.state); } } - pub fn unsubscribe(&mut self, region_id: u64) { - self.downstreams.remove(®ion_id); + #[cfg(test)] + pub fn downstreams_count(&self) -> usize { + self.downstreams.len() } +} + +// Examaples for all available headers: +// * features -> feature_a,feature_b +#[derive(Debug, Default)] +struct EventFeedHeaders { + features: Vec<&'static str>, +} - pub fn downstream_id(&self, region_id: u64) -> Option { - self.downstreams.get(®ion_id).map(|x| x.0) +impl EventFeedHeaders { + const FEATURES_KEY: &str = "features"; + const STREAM_MULTIPLEXING: &str = "stream-multiplexing"; + const FEATURES: &'static [&'static str] = &[Self::STREAM_MULTIPLEXING]; + + fn parse_features(value: &[u8]) -> Result, String> { + let value = std::str::from_utf8(value).unwrap_or_default(); + let (mut features, mut unknowns) = (Vec::new(), Vec::new()); + for feature in value.split(',').map(|x| x.trim()) { + if let Some(i) = Self::FEATURES.iter().position(|x| *x == feature) { + features.push(Self::FEATURES[i]); + } else { + unknowns.push(feature); + } + } + if !unknowns.is_empty() { + return Err(unknowns.join(",")); + } + Ok(features) } } @@ -195,132 +257,238 @@ impl Service { memory_quota, } } -} -impl ChangeData for Service { - fn event_feed( + // Parse HTTP/2 headers. Only for `Self::event_feed_v2`. + fn parse_headers(ctx: &RpcContext<'_>) -> Result { + let mut header = EventFeedHeaders::default(); + let metadata = ctx.request_headers(); + for i in 0..metadata.len() { + let (key, value) = metadata.get(i).unwrap(); + if key == EventFeedHeaders::FEATURES_KEY { + header.features = EventFeedHeaders::parse_features(value)?; + } + } + Ok(header) + } + + fn parse_version_from_request_header( + request: &ChangeDataRequest, + peer: &str, + ) -> semver::Version { + let version_field = request.get_header().get_ticdc_version(); + match semver::Version::parse(version_field) { + Ok(v) => v, + Err(e) => { + warn!( + "empty or invalid TiCDC version, please upgrading TiCDC"; + "version" => version_field, + "downstream" => ?peer, "region_id" => request.region_id, + "error" => ?e, + ); + semver::Version::new(0, 0, 0) + } + } + } + + fn set_conn_version( + scheduler: &Scheduler, + conn_id: ConnId, + version: semver::Version, + explicit_features: Vec<&'static str>, + ) -> Result<(), String> { + let task = Task::SetConnVersion { + conn_id, + version, + explicit_features, + }; + scheduler.schedule(task).map_err(|e| format!("{:?}", e)) + } + + fn handle_request( + scheduler: &Scheduler, + peer: &str, + request: ChangeDataRequest, + conn_id: ConnId, + ) -> Result<(), String> { + match request.request { + None | Some(ChangeDataRequest_oneof_request::Register(_)) => { + Self::handle_register(scheduler, peer, request, conn_id) + } + Some(ChangeDataRequest_oneof_request::Deregister(_)) => { + Self::handle_deregister(scheduler, request, conn_id) + } + _ => unreachable!(), + } + } + + fn handle_register( + scheduler: &Scheduler, + peer: &str, + request: ChangeDataRequest, + conn_id: ConnId, + ) -> Result<(), String> { + let observed_range = + match ObservedRange::new(request.start_key.clone(), request.end_key.clone()) { + Ok(observed_range) => observed_range, + Err(e) => { + warn!( + "cdc invalid observed start key or end key version"; + "downstream" => ?peer, "region_id" => request.region_id, + "error" => ?e, + ); + ObservedRange::default() + } + }; + let downstream = Downstream::new( + peer.to_owned(), + request.get_region_epoch().clone(), + request.request_id, + conn_id, + request.kv_api, + request.filter_loop, + observed_range, + ); + let task = Task::Register { + request, + downstream, + conn_id, + }; + scheduler.schedule(task).map_err(|e| format!("{:?}", e)) + } + + fn handle_deregister( + scheduler: &Scheduler, + request: ChangeDataRequest, + conn_id: ConnId, + ) -> Result<(), String> { + let task = Task::Deregister(Deregister::Request { + conn_id, + request_id: request.request_id, + }); + scheduler.schedule(task).map_err(|e| format!("{:?}", e)) + } + + // Differences between `Self::event_feed` and `Self::event_feed_v2`: + // + // ### Why `v2` + // `v2` is expected to resolve this problem: clients version is higher than + // server, In which case, `v1` compatibility check mechanism doesn't work. + // + // ### How `v2` + // In `v2`, clients tells requested features to connected servers. If a + // server finds a client requires unavailable features, it can fail the + // connection with an UNIMPLEMENTED status code. + // + // ### Details about `v2` features + // * stream-multiplexing: a region can be subscribed multiple times in one + // `Conn` with different `request_id`. + fn handle_event_feed( &mut self, ctx: RpcContext<'_>, stream: RequestStream, mut sink: DuplexSink, + event_feed_v2: bool, ) { + sink.enhance_batch(true); let (event_sink, mut event_drain) = channel(CDC_CHANNLE_CAPACITY, self.memory_quota.clone()); - let peer = ctx.peer(); - let conn = Conn::new(event_sink, peer.clone()); + let conn = Conn::new(event_sink, ctx.peer()); let conn_id = conn.get_id(); + let mut explicit_features = vec![]; - if let Err(status) = self - .scheduler - .schedule(Task::OpenConn { conn }) - .map_err(|e| { - RpcStatus::with_message(RpcStatusCode::INVALID_ARGUMENT, format!("{:?}", e)) - }) - { - error!("cdc connection initiate failed"; - "downstream" => ?peer, "error" => ?status); - ctx.spawn(sink.fail(status).unwrap_or_else(move |e| { - error!("cdc failed to send error"; - "downstream" => ?peer, "error" => ?e) - })); - return; - } - - let peer = ctx.peer(); - let scheduler = self.scheduler.clone(); - let recv_req = stream.try_for_each(move |request| { - let region_epoch = request.get_region_epoch().clone(); - let req_id = request.get_request_id(); - let req_kvapi = request.get_kv_api(); - let version = match semver::Version::parse(request.get_header().get_ticdc_version()) { - Ok(v) => v, + if event_feed_v2 { + let headers = match Self::parse_headers(&ctx) { + Ok(headers) => headers, Err(e) => { - warn!("empty or invalid TiCDC version, please upgrading TiCDC"; - "version" => request.get_header().get_ticdc_version(), - "downstream" => ?peer, - "error" => ?e); - semver::Version::new(0, 0, 0) + let peer = ctx.peer(); + error!("cdc connection with bad headers"; "downstream" => ?peer, "headers" => &e); + ctx.spawn(async move { + let status = RpcStatus::with_message(RpcStatusCode::UNIMPLEMENTED, e); + if let Err(e) = sink.fail(status).await { + error!("cdc failed to send error"; "downstream" => ?peer, "error" => ?e); + } + }); + return; } }; - let observed_range = - match ObservedRange::new(request.start_key.clone(), request.end_key.clone()) { - Ok(observed_range) => observed_range, - Err(e) => { - warn!("cdc invalid observed start key or end key version"; - "downstream" => ?peer, "error" => ?e); - ObservedRange::default() - } - }; - let downstream = Downstream::new( - peer.clone(), - region_epoch, - req_id, - conn_id, - req_kvapi, - request.filter_loop, - observed_range, - ); - let ret = scheduler - .schedule(Task::Register { - request, - downstream, - conn_id, - version, - }) - .map_err(|e| { - GrpcError::RpcFailure(RpcStatus::with_message( - RpcStatusCode::INVALID_ARGUMENT, - format!("{:?}", e), - )) - }); - future::ready(ret) - }); + explicit_features = headers.features; + } + info!("cdc connection created"; "downstream" => ctx.peer(), "features" => ?explicit_features); + + if let Err(e) = self.scheduler.schedule(Task::OpenConn { conn }) { + let peer = ctx.peer(); + error!("cdc connection initiate failed"; "downstream" => ?peer, "error" => ?e); + ctx.spawn(async move { + let status = RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); + if let Err(e) = sink.fail(status).await { + error!("cdc failed to send error"; "downstream" => ?peer, "error" => ?e); + } + }); + return; + } let peer = ctx.peer(); let scheduler = self.scheduler.clone(); - ctx.spawn(async move { - let res = recv_req.await; - // Unregister this downstream only. + let recv_req = async move { + let mut stream = stream.map_err(|e| format!("{:?}", e)); + if let Some(request) = stream.try_next().await? { + // Get version from the first request in the stream. + let version = Self::parse_version_from_request_header(&request, &peer); + Self::set_conn_version(&scheduler, conn_id, version, explicit_features)?; + Self::handle_request(&scheduler, &peer, request, conn_id)?; + } + while let Some(request) = stream.try_next().await? { + Self::handle_request(&scheduler, &peer, request, conn_id)?; + } let deregister = Deregister::Conn(conn_id); if let Err(e) = scheduler.schedule(Task::Deregister(deregister)) { error!("cdc deregister failed"; "error" => ?e, "conn_id" => ?conn_id); } - match res { - Ok(()) => { - info!("cdc receive closed"; "downstream" => peer, "conn_id" => ?conn_id); - } - Err(e) => { - warn!("cdc receive failed"; "error" => ?e, "downstream" => peer, "conn_id" => ?conn_id); - } + Ok::<(), String>(()) + }; + + let peer = ctx.peer(); + ctx.spawn(async move { + if let Err(e) = recv_req.await { + warn!("cdc receive failed"; "error" => ?e, "downstream" => peer, "conn_id" => ?conn_id); + } else { + info!("cdc receive closed"; "downstream" => peer, "conn_id" => ?conn_id); } }); let peer = ctx.peer(); - let scheduler = self.scheduler.clone(); - ctx.spawn(async move { #[cfg(feature = "failpoints")] sleep_before_drain_change_event().await; - - let res = event_drain.forward(&mut sink).await; - // Unregister this downstream only. - let deregister = Deregister::Conn(conn_id); - if let Err(e) = scheduler.schedule(Task::Deregister(deregister)) { - error!("cdc deregister failed"; "error" => ?e); - } - match res { - Ok(_s) => { - info!("cdc send closed"; "downstream" => peer, "conn_id" => ?conn_id); - let _ = sink.close().await; - } - Err(e) => { - warn!("cdc send failed"; "error" => ?e, "downstream" => peer, "conn_id" => ?conn_id); - } + if let Err(e) = event_drain.forward(&mut sink).await { + warn!("cdc send failed"; "error" => ?e, "downstream" => peer, "conn_id" => ?conn_id); + } else { + info!("cdc send closed"; "downstream" => peer, "conn_id" => ?conn_id); } }); } } +impl ChangeData for Service { + fn event_feed( + &mut self, + ctx: RpcContext<'_>, + stream: RequestStream, + sink: DuplexSink, + ) { + self.handle_event_feed(ctx, stream, sink, false); + } + + fn event_feed_v2( + &mut self, + ctx: RpcContext<'_>, + stream: RequestStream, + sink: DuplexSink, + ) { + self.handle_event_feed(ctx, stream, sink, true); + } +} + #[cfg(feature = "failpoints")] async fn sleep_before_drain_change_event() { use std::time::{Duration, Instant}; @@ -341,7 +509,7 @@ async fn sleep_before_drain_change_event() { mod tests { use std::{sync::Arc, time::Duration}; - use futures::executor::block_on; + use futures::{executor::block_on, SinkExt}; use grpcio::{self, ChannelBuilder, EnvBuilder, Server, ServerBuilder, WriteFlags}; use kvproto::cdcpb::{create_change_data, ChangeDataClient, ResolvedTs}; diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index 3fdd6048971..f7cc387625d 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -17,7 +17,7 @@ use test_raftstore::*; use tikv_util::{debug, worker::Scheduler, HandyRwLock}; use txn_types::TimeStamp; -use crate::{new_event_feed, ClientReceiver, TestSuite, TestSuiteBuilder}; +use crate::{new_event_feed, new_event_feed_v2, ClientReceiver, TestSuite, TestSuiteBuilder}; #[test] fn test_cdc_double_scan_deregister() { @@ -525,3 +525,47 @@ fn test_cdc_rawkv_resolved_ts() { fail::remove(pause_write_fp); handle.join().unwrap(); } + +// Test one region can be subscribed multiple times in one stream with different +// `request_id`s. +#[test] +fn test_cdc_stream_multiplexing() { + let cluster = new_server_cluster(0, 2); + cluster.pd_client.disable_default_operator(); + let mut suite = TestSuiteBuilder::new().cluster(cluster).build(); + let rid = suite.cluster.get_region(&[]).id; + let (mut req_tx, _, receive_event) = new_event_feed_v2(suite.get_region_cdc_client(rid)); + + // Subscribe the region with request_id 1. + let mut req = suite.new_changedata_request(rid); + req.request_id = 1; + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + receive_event(false); + + // Subscribe the region with request_id 2. + fail::cfg("before_post_incremental_scan", "pause").unwrap(); + let mut req = suite.new_changedata_request(rid); + req.request_id = 2; + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + receive_event(false); + + // Request 2 can't receive a ResolvedTs, because it's not ready. + for _ in 0..10 { + let event = receive_event(true); + let req_id = event.get_resolved_ts().get_request_id(); + assert_eq!(req_id, 1); + } + + // After request 2 is ready, it must receive a ResolvedTs. + fail::remove("before_post_incremental_scan"); + let mut request_2_ready = false; + for _ in 0..20 { + let event = receive_event(true); + let req_id = event.get_resolved_ts().get_request_id(); + if req_id == 2 { + request_2_ready = true; + break; + } + } + assert!(request_2_ready); +} diff --git a/components/cdc/tests/failpoints/test_observe.rs b/components/cdc/tests/failpoints/test_observe.rs index 480fcc4582f..4a34185de76 100644 --- a/components/cdc/tests/failpoints/test_observe.rs +++ b/components/cdc/tests/failpoints/test_observe.rs @@ -25,14 +25,18 @@ fn test_observe_duplicate_cmd() { } fn test_observe_duplicate_cmd_impl() { - let mut suite = TestSuite::new(3, F::TAG); + let mut suite = TestSuite::new(1, F::TAG); + suite.cluster.pd_client.disable_default_operator(); let region = suite.cluster.get_region(&[]); - let req = suite.new_changedata_request(region.get_id()); - let (mut req_tx, event_feed_wrap, receive_event) = + let mut req = suite.new_changedata_request(region.get_id()); + + req.request_id = 1; + let (mut req_tx_1, event_feed_wrap_1, receive_event_1) = new_event_feed(suite.get_region_cdc_client(region.get_id())); - block_on(req_tx.send((req.clone(), WriteFlags::default()))).unwrap(); - let mut events = receive_event(false).events.to_vec(); + block_on(req_tx_1.send((req.clone(), WriteFlags::default()))).unwrap(); + + let mut events = receive_event_1(false).events.to_vec(); assert_eq!(events.len(), 1); match events.pop().unwrap().event.unwrap() { Event_oneof_event::Entries(es) => { @@ -46,7 +50,6 @@ fn test_observe_duplicate_cmd_impl() { // If tikv enable ApiV2, txn key needs to start with 'x'; let (k, v) = ("xkey1".to_owned(), "value".to_owned()); - // Prewrite let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); let mut mutation = Mutation::default(); mutation.set_op(Op::Put); @@ -58,7 +61,7 @@ fn test_observe_duplicate_cmd_impl() { k.clone().into_bytes(), start_ts, ); - let mut events = receive_event(false).events.to_vec(); + let mut events = receive_event_1(false).events.to_vec(); assert_eq!(events.len(), 1); match events.pop().unwrap().event.unwrap() { Event_oneof_event::Entries(entries) => { @@ -67,31 +70,38 @@ fn test_observe_duplicate_cmd_impl() { } other => panic!("unknown event {:?}", other), } - let fp = "before_cdc_flush_apply"; - fail::cfg(fp, "pause").unwrap(); + + fail::cfg("before_cdc_flush_apply", "pause").unwrap(); // Async commit let commit_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); let commit_resp = suite.async_kv_commit(region.get_id(), vec![k.into_bytes()], start_ts, commit_ts); + sleep_ms(200); - // Close previous connection and open a new one twice time - let (mut req_tx, resp_rx) = suite - .get_region_cdc_client(region.get_id()) - .event_feed() - .unwrap(); - event_feed_wrap.replace(Some(resp_rx)); - block_on(req_tx.send((req.clone(), WriteFlags::default()))).unwrap(); - let (mut req_tx, resp_rx) = suite - .get_region_cdc_client(region.get_id()) - .event_feed() - .unwrap(); - event_feed_wrap.replace(Some(resp_rx)); - block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); - fail::remove(fp); + + // Open two new connections and close the old one. + let (mut req_tx_2, event_feed_wrap_2, _) = + new_event_feed(suite.get_region_cdc_client(region.get_id())); + req.request_id = 2; + block_on(req_tx_2.send((req.clone(), WriteFlags::default()))).unwrap(); + + let (mut req_tx_3, event_feed_wrap_3, receive_event_3) = + new_event_feed(suite.get_region_cdc_client(region.get_id())); + req.request_id = 3; + block_on(req_tx_3.send((req, WriteFlags::default()))).unwrap(); + + sleep_ms(200); + drop(req_tx_1); + drop(req_tx_2); + drop(event_feed_wrap_1); + drop(event_feed_wrap_2); + + fail::remove("before_cdc_flush_apply"); + // Receive Commit response block_on(commit_resp).unwrap(); - let mut events = receive_event(false).events.to_vec(); + let mut events = receive_event_3(false).events.to_vec(); assert_eq!(events.len(), 1); match events.pop().unwrap().event.unwrap() { Event_oneof_event::Entries(es) => { @@ -110,7 +120,7 @@ fn test_observe_duplicate_cmd_impl() { loop { // Even if there is no write, // resolved ts should be advanced regularly. - let event = receive_event(true); + let event = receive_event_3(true); if let Some(resolved_ts) = event.resolved_ts.as_ref() { assert_ne!(0, resolved_ts.ts); counter += 1; @@ -120,7 +130,8 @@ fn test_observe_duplicate_cmd_impl() { } } - event_feed_wrap.replace(None); + drop(req_tx_3); + drop(event_feed_wrap_3); suite.stop(); } diff --git a/components/cdc/tests/integrations/test_cdc.rs b/components/cdc/tests/integrations/test_cdc.rs index 51d60a06f5b..c1ac1706d52 100644 --- a/components/cdc/tests/integrations/test_cdc.rs +++ b/components/cdc/tests/integrations/test_cdc.rs @@ -382,8 +382,7 @@ fn test_cdc_cluster_id_mismatch_impl() { let mut req = suite.new_changedata_request(1); req.mut_header().set_ticdc_version("5.3.0".into()); req.mut_header().set_cluster_id(DEFAULT_CLUSTER_ID + 1); - let (mut req_tx, event_feed_wrap, receive_event) = - new_event_feed(suite.get_region_cdc_client(1)); + let (mut req_tx, _, receive_event) = new_event_feed(suite.get_region_cdc_client(1)); block_on(req_tx.send((req.clone(), WriteFlags::default()))).unwrap(); // Assert mismatch. @@ -399,6 +398,8 @@ fn test_cdc_cluster_id_mismatch_impl() { // Low version request. req.mut_header().set_ticdc_version("4.0.8".into()); req.mut_header().set_cluster_id(DEFAULT_CLUSTER_ID + 1); + let (mut req_tx, event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); let mut events = receive_event(false).events.to_vec(); assert_eq!(events.len(), 1); diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index f2663c79287..d2c4519d50d 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -12,7 +12,8 @@ use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; use futures::executor::block_on; use grpcio::{ - ChannelBuilder, ClientDuplexReceiver, ClientDuplexSender, ClientUnaryReceiver, Environment, + CallOption, ChannelBuilder, ClientDuplexReceiver, ClientDuplexSender, ClientUnaryReceiver, + Environment, MetadataBuilder, }; use kvproto::{ cdcpb::{create_change_data, ChangeDataClient, ChangeDataEvent, ChangeDataRequest}, @@ -48,7 +49,6 @@ impl ClientReceiver { std::mem::replace(&mut *self.receiver.lock().unwrap(), rx) } } - #[allow(clippy::type_complexity)] pub fn new_event_feed( client: &ChangeDataClient, @@ -57,7 +57,37 @@ pub fn new_event_feed( ClientReceiver, Box ChangeDataEvent + Send>, ) { - let (req_tx, resp_rx) = client.event_feed().unwrap(); + create_event_feed(client, false) +} + +#[allow(clippy::type_complexity)] +pub fn new_event_feed_v2( + client: &ChangeDataClient, +) -> ( + ClientDuplexSender, + ClientReceiver, + Box ChangeDataEvent + Send>, +) { + create_event_feed(client, true) +} + +#[allow(clippy::type_complexity)] +fn create_event_feed( + client: &ChangeDataClient, + stream_multiplexing: bool, +) -> ( + ClientDuplexSender, + ClientReceiver, + Box ChangeDataEvent + Send>, +) { + let (req_tx, resp_rx) = if stream_multiplexing { + let mut metadata = MetadataBuilder::with_capacity(1); + metadata.add_str("features", "stream-multiplexing").unwrap(); + let opt = CallOption::default().headers(metadata.build()); + client.event_feed_v2_opt(opt).unwrap() + } else { + client.event_feed().unwrap() + }; let event_feed_wrap = Arc::new(Mutex::new(Some(resp_rx))); let event_feed_wrap_clone = event_feed_wrap.clone(); From 512a7fbdd3585583ed36c4d6e67df2bceebc9191 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:12:43 +0800 Subject: [PATCH 0774/1149] sst_import: implement regional import mode for raftstore v2 (#15004) close tikv/tikv#14916 implement regional import mode for raftstore-v2. Signed-off-by: SpadeA-Tang Signed-off-by: Spade A --- Cargo.lock | 3 +- Cargo.toml | 2 +- components/raftstore-v2/src/fsm/store.rs | 2 +- .../src/operation/command/write/ingest.rs | 17 +- components/raftstore-v2/src/operation/mod.rs | 2 +- .../tests/integrations/cluster.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 9 +- components/raftstore/src/store/fsm/peer.rs | 4 +- components/server/src/server.rs | 2 + components/server/src/server2.rs | 2 + components/sst_importer/Cargo.toml | 12 +- components/sst_importer/src/import_mode2.rs | 305 ++++++++++++++++++ components/sst_importer/src/lib.rs | 2 + components/sst_importer/src/sst_importer.rs | 134 ++++++-- components/sst_importer/src/sst_writer.rs | 2 +- components/test_raftstore-v2/src/node.rs | 1 + components/test_raftstore-v2/src/server.rs | 5 +- components/test_raftstore/src/node.rs | 4 +- components/test_raftstore/src/server.rs | 2 + src/import/sst_service.rs | 104 ++++-- tests/failpoints/cases/test_import_service.rs | 3 +- .../integrations/config/dynamic/raftstore.rs | 2 +- tests/integrations/import/test_sst_service.rs | 138 ++++++++ tests/integrations/import/util.rs | 55 ++-- .../integrations/raftstore/test_bootstrap.rs | 4 +- tests/integrations/server/kv_service.rs | 4 +- 26 files changed, 735 insertions(+), 86 deletions(-) create mode 100644 components/sst_importer/src/import_mode2.rs diff --git a/Cargo.lock b/Cargo.lock index c84f3230d46..4eae3894b2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2966,7 +2966,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#05895f97d510500cb6651421f3d54efa8e1d6415" +source = "git+https://github.com/pingcap/kvproto.git#cd6769a15c69b48fd65ecb6d813b861156be4d61" dependencies = [ "futures 0.3.15", "grpcio", @@ -5815,6 +5815,7 @@ name = "sst_importer" version = "0.1.0" dependencies = [ "api_version", + "collections", "crc32fast", "dashmap", "encryption", diff --git a/Cargo.toml b/Cargo.toml index 0a33a3f0351..744ecdec44d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -328,7 +328,7 @@ resource_metering = { path = "components/resource_metering" } security = { path = "components/security" } server = { path = "components/server" } snap_recovery = { path = "components/snap_recovery" } -sst_importer = { path = "components/sst_importer" } +sst_importer = { path = "components/sst_importer", default-features = false } test_backup = { path = "components/test_backup" } test_coprocessor = { path = "components/test_coprocessor", default-features = false } example_coprocessor_plugin = { path = "components/test_coprocessor_plugin/example_plugin" } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 0ee3c59812e..e7cc2b52b9e 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -42,7 +42,7 @@ pub struct StoreMeta { /// to avoid end key conflict. pub(crate) region_ranges: BTreeMap<(Vec, u64), u64>, /// region_id -> (region, initialized) - pub(crate) regions: HashMap, + pub regions: HashMap, } impl StoreMeta { diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 20c7c92ee71..8715318db4e 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -9,6 +9,7 @@ use raftstore::{ Result, }; use slog::error; +use sst_importer::range_overlaps; use tikv_util::{box_try, slog_panic}; use crate::{ @@ -42,6 +43,7 @@ impl Store { if ssts.is_empty() { return Ok(()); } + let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts @@ -49,11 +51,22 @@ impl Store { .or_default() .push(sst); } + + let ranges = ctx.sst_importer.ranges_in_import(); for (region_id, ssts) in region_ssts { if let Err(TrySendError::Disconnected(msg)) = ctx.router.send(region_id, PeerMsg::CleanupImportSst(ssts.into())) && !ctx.router.is_shutdown() { - let PeerMsg::CleanupImportSst(ssts) = msg else { unreachable!() }; - let _ = ctx.schedulers.tablet.schedule(tablet::Task::CleanupImportSst(ssts)); + let PeerMsg::CleanupImportSst( ssts) = msg else { unreachable!() }; + let mut ssts = ssts.into_vec(); + ssts.retain(|sst| { + for range in &ranges { + if range_overlaps(range, sst.get_range()) { + return false; + } + } + true + }); + let _ = ctx.schedulers.tablet.schedule(tablet::Task::CleanupImportSst(ssts.into())); } } diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 4e6eacb8f28..663d051f7e1 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -49,7 +49,7 @@ pub mod test_util { pub fn create_tmp_importer() -> (TempDir, Arc) { let dir = TempDir::new().unwrap(); let importer = Arc::new( - SstImporter::new(&Default::default(), dir.path(), None, ApiVersion::V1).unwrap(), + SstImporter::new(&Default::default(), dir.path(), None, ApiVersion::V1, true).unwrap(), ); (dir, importer) } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index f0669b03f9a..eceb756cc18 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -326,6 +326,7 @@ impl RunningState { path.join("importer"), key_manager.clone(), ApiVersion::V1, + true, ) .unwrap(), ); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 48580a864bb..1566334bb87 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -5057,7 +5057,14 @@ mod tests { pub fn create_tmp_importer(path: &str) -> (TempDir, Arc) { let dir = Builder::new().prefix(path).tempdir().unwrap(); let importer = Arc::new( - SstImporter::new(&ImportConfig::default(), dir.path(), None, ApiVersion::V1).unwrap(), + SstImporter::new( + &ImportConfig::default(), + dir.path(), + None, + ApiVersion::V1, + false, + ) + .unwrap(), ); (dir, importer) } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 24ac4681d63..2bb67a55deb 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5724,7 +5724,9 @@ where // When Lightning or BR is importing data to TiKV, their ingest-request may fail // because of region-epoch not matched. So we hope TiKV do not check region size // and split region during importing. - if self.ctx.importer.get_mode() == SwitchMode::Import { + if self.ctx.importer.get_mode() == SwitchMode::Import + || self.ctx.importer.region_in_import_mode(self.region()) + { return; } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 16d12e48aa9..6f46c09ded4 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -841,6 +841,7 @@ where import_path, self.core.encryption_key_manager.clone(), self.core.config.storage.api_version(), + false, ) .unwrap(); for (cf_name, compression_type) in &[ @@ -1031,6 +1032,7 @@ where engines.engine.clone(), LocalTablets::Singleton(engines.engines.kv.clone()), servers.importer.clone(), + None, ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 4587b6700fe..f7a155c0dc1 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -750,6 +750,7 @@ where import_path, self.core.encryption_key_manager.clone(), self.core.config.storage.api_version(), + true, ) .unwrap(); for (cf_name, compression_type) in &[ @@ -903,6 +904,7 @@ where engines.engine.clone(), LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), servers.importer.clone(), + Some(self.router.as_ref().unwrap().store_meta().clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index bb4e64657ce..d292b44606e 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" publish = false [features] -default = ["cloud-aws", "cloud-gcp", "cloud-azure"] +default = ["cloud-aws", "cloud-gcp", "cloud-azure", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] cloud-aws = ["external_storage_export/cloud-aws"] cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] @@ -15,9 +15,19 @@ cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] test-engines-rocksdb = [ "engine_test/test-engines-rocksdb", ] +test-engine-kv-rocksdb = [ + "engine_test/test-engine-kv-rocksdb" +] +test-engine-raft-raft-engine = [ + "engine_test/test-engine-raft-raft-engine" +] +test-engines-panic = [ + "engine_test/test-engines-panic", +] [dependencies] api_version = { workspace = true } +collections = { workspace = true } crc32fast = "1.2" dashmap = "5" encryption = { workspace = true } diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs new file mode 100644 index 00000000000..c892d958c22 --- /dev/null +++ b/components/sst_importer/src/import_mode2.rs @@ -0,0 +1,305 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{Arc, Mutex}, + time::{Duration, Instant}, +}; + +use collections::{HashMap, HashSet}; +use futures_util::compat::Future01CompatExt; +use kvproto::{import_sstpb::Range, metapb::Region}; +use tikv_util::timer::GLOBAL_TIMER_HANDLE; +use tokio::runtime::Handle; + +use super::Config; + +#[derive(PartialEq, Eq, Hash, Clone)] +// implement hash so that it can be a key in HashMap +pub struct HashRange { + pub start_key: std::vec::Vec, + pub end_key: std::vec::Vec, +} + +impl From for HashRange { + fn from(key_range: Range) -> Self { + Self { + start_key: key_range.start, + end_key: key_range.end, + } + } +} + +struct ImportModeSwitcherInnerV2 { + timeout: Duration, + // range in import mode -> timeout to restore to normal mode + import_mode_ranges: HashMap, +} + +impl ImportModeSwitcherInnerV2 { + fn clear_import_mode_range(&mut self, range: HashRange) { + self.import_mode_ranges.remove(&range); + } +} + +#[derive(Clone)] +pub struct ImportModeSwitcherV2 { + inner: Arc>, +} + +impl ImportModeSwitcherV2 { + pub fn new(cfg: &Config) -> ImportModeSwitcherV2 { + let timeout = cfg.import_mode_timeout.0; + let inner = Arc::new(Mutex::new(ImportModeSwitcherInnerV2 { + timeout, + import_mode_ranges: HashMap::default(), + })); + ImportModeSwitcherV2 { inner } + } + + // Periodically perform timeout check to change import mode of some regions back + // to normal mode. + pub fn start(&self, executor: &Handle) { + // spawn a background future to put regions back into normal mode after timeout + let inner = self.inner.clone(); + let switcher = Arc::downgrade(&inner); + let timer_loop = async move { + let mut prev_range = None; + // loop until the switcher has been dropped + while let Some(switcher) = switcher.upgrade() { + let next_check = { + let now = Instant::now(); + let mut switcher = switcher.lock().unwrap(); + if let Some(range) = prev_range.take() { + if let Some(next_check) = switcher.import_mode_ranges.get(&range) { + if now >= *next_check { + switcher.clear_import_mode_range(range); + } + } + } + + let mut min_next_check = now + switcher.timeout; + for (range, next_check) in &switcher.import_mode_ranges { + if *next_check < min_next_check { + min_next_check = *next_check; + prev_range = Some(range.clone()); + } + } + min_next_check + }; + + let ok = GLOBAL_TIMER_HANDLE.delay(next_check).compat().await.is_ok(); + if !ok { + warn!("failed to delay with global timer"); + } + } + }; + executor.spawn(timer_loop); + } + + pub fn range_enter_import_mode(&self, range: Range) { + let range = HashRange::from(range); + let mut inner = self.inner.lock().unwrap(); + let next_check = Instant::now() + inner.timeout; + // if the range exists before, the timeout is updated + inner.import_mode_ranges.insert(range, next_check); + } + + pub fn clear_import_mode_range(&self, range: Range) { + let mut inner = self.inner.lock().unwrap(); + let range = HashRange::from(range); + inner.clear_import_mode_range(range); + } + + pub fn region_in_import_mode(&self, region: &Region) -> bool { + let inner = self.inner.lock().unwrap(); + for r in inner.import_mode_ranges.keys() { + if region_overlap_with_range(r, region) { + return true; + } + } + false + } + + pub fn range_in_import_mode(&self, range: &Range) -> bool { + let inner = self.inner.lock().unwrap(); + for r in inner.import_mode_ranges.keys() { + if range_overlaps(r, range) { + return true; + } + } + false + } + + pub fn ranges_in_import(&self) -> HashSet { + let inner = self.inner.lock().unwrap(); + HashSet::from_iter(inner.import_mode_ranges.keys().into_iter().cloned()) + } +} + +fn region_overlap_with_range(range: &HashRange, region: &Region) -> bool { + (region.end_key.is_empty() || range.start_key < region.end_key) + && (range.end_key.is_empty() || region.start_key < range.end_key) +} + +pub fn range_overlaps(range1: &HashRange, range2: &Range) -> bool { + (range2.end.is_empty() || range1.start_key < range2.end) + && (range1.end_key.is_empty() || range2.start < range1.end_key) +} + +#[cfg(test)] +mod test { + use std::thread; + + use tikv_util::config::ReadableDuration; + + use super::*; + + #[test] + fn test_region_range_overlaps() { + let verify_overlap = |ranges1: &[(&str, &str)], ranges2: &[(&str, &str)], overlap: bool| { + for r in ranges1 { + let hash_range = HashRange { + start_key: r.0.as_bytes().to_vec(), + end_key: r.1.as_bytes().to_vec(), + }; + + for r2 in ranges2 { + let mut region = Region::default(); + region.set_start_key(r2.0.as_bytes().to_vec()); + region.set_end_key(r2.1.as_bytes().to_vec()); + + if overlap { + assert!(region_overlap_with_range(&hash_range, ®ion)); + } else { + assert!(!region_overlap_with_range(&hash_range, ®ion)); + } + + let mut range = Range::default(); + range.set_start(r2.0.as_bytes().to_vec()); + range.set_end(r2.1.as_bytes().to_vec()); + if overlap { + assert!(range_overlaps(&hash_range, &range)); + } else { + assert!(!range_overlaps(&hash_range, &range)); + } + } + } + }; + + let ranges1 = vec![("", ""), ("", "k10"), ("k01", ""), ("k01", "k08")]; + let ranges2 = vec![("", ""), ("k02", "k07"), ("k07", "k11"), ("k07", "")]; + verify_overlap(&ranges1, &ranges2, true); + verify_overlap(&ranges2, &ranges1, true); + + let ranges1 = vec![("k10", "k20")]; + let ranges2 = vec![("", "k10"), ("k20", "k30"), ("k20", "")]; + verify_overlap(&ranges1, &ranges2, false); + verify_overlap(&ranges2, &ranges1, false); + } + + #[test] + fn test_region_import_mode() { + let cfg = Config::default(); + let switcher = ImportModeSwitcherV2::new(&cfg); + let mut regions = vec![]; + for i in 1..=5 { + let mut region = Region::default(); + region.set_id(i); + region.set_start_key(format!("k{:02}", (i - 1) * 10).into()); + region.set_end_key(format!("k{:02}", i * 10).into()); + regions.push(region); + } + + let mut key_range = Range::default(); + key_range.set_end(b"j".to_vec()); + switcher.range_enter_import_mode(key_range.clone()); + // no regions should be set in import mode + for i in 1..=5 { + assert!(!switcher.region_in_import_mode(®ions[i - 1])); + } + + let mut r = Range::default(); + r.set_end(b"k".to_vec()); + assert!(switcher.range_in_import_mode(&r)); + + // region 1 2 3 should be included + key_range.set_start(b"k09".to_vec()); + key_range.set_end(b"k21".to_vec()); + switcher.range_enter_import_mode(key_range.clone()); + for i in 1..=3 { + assert!(switcher.region_in_import_mode(®ions[i - 1])); + } + for i in 4..=5 { + assert!(!switcher.region_in_import_mode(®ions[i - 1])); + } + + let mut key_range2 = Range::default(); + // region 3 4 5 should be included + key_range2.set_start(b"k29".to_vec()); + key_range2.set_end(b"".to_vec()); + switcher.range_enter_import_mode(key_range2.clone()); + for i in 1..=5 { + assert!(switcher.region_in_import_mode(®ions[i - 1])); + } + + switcher.clear_import_mode_range(key_range); + for i in 1..=2 { + assert!(!switcher.region_in_import_mode(®ions[i - 1])); + } + for i in 3..5 { + assert!(switcher.region_in_import_mode(®ions[i - 1])); + } + + switcher.clear_import_mode_range(key_range2); + for i in 3..=5 { + assert!(!switcher.region_in_import_mode(®ions[i - 1])); + } + } + + #[test] + fn test_import_mode_timeout() { + let cfg = Config { + import_mode_timeout: ReadableDuration::millis(300), + ..Config::default() + }; + + let threads = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + let switcher = ImportModeSwitcherV2::new(&cfg); + let mut region = Region::default(); + region.set_id(1); + region.set_start_key(b"k1".to_vec()); + region.set_end_key(b"k3".to_vec()); + let mut region2 = Region::default(); + region2.set_id(2); + region2.set_start_key(b"k3".to_vec()); + region2.set_end_key(b"k5".to_vec()); + + let mut key_range = Range::default(); + key_range.set_start(b"k2".to_vec()); + key_range.set_end(b"k4".to_vec()); + switcher.range_enter_import_mode(key_range); + assert!(switcher.region_in_import_mode(®ion)); + assert!(switcher.region_in_import_mode(®ion2)); + + switcher.start(threads.handle()); + + thread::sleep(Duration::from_secs(1)); + threads.block_on(tokio::task::yield_now()); + + let mut key_range = Range::default(); + key_range.set_start(b"k4".to_vec()); + key_range.set_end(b"k5".to_vec()); + switcher.range_enter_import_mode(key_range); + + assert!(!switcher.region_in_import_mode(®ion)); + assert!(switcher.region_in_import_mode(®ion2)); + + thread::sleep(Duration::from_secs(1)); + threads.block_on(tokio::task::yield_now()); + assert!(!switcher.region_in_import_mode(®ion2)); + } +} diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index 07d9de4cff2..0cfc3bab774 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -20,6 +20,7 @@ mod util; #[macro_use] pub mod import_mode; mod caching; +pub mod import_mode2; pub mod metrics; pub mod sst_importer; @@ -27,6 +28,7 @@ pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, import_file::sst_meta_to_path, + import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, util::{copy_sst_for_ingestion, prepare_sst_for_ingestion}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index d6d3dc3e46e..8412273fa0f 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -14,6 +14,7 @@ use std::{ time::Duration, }; +use collections::HashSet; use dashmap::{mapref::entry::Entry, DashMap}; use encryption::{to_engine_encryption_method, DataKeyManager}; use engine_rocks::{get_env, RocksSstReader}; @@ -28,8 +29,9 @@ use external_storage_export::{ use file_system::{get_io_rate_limiter, IoType, OpenOptions}; use kvproto::{ brpb::{CipherInfo, StorageBackend}, - import_sstpb::*, + import_sstpb::{Range, *}, kvrpcpb::ApiVersion, + metapb::Region, }; use tikv_util::{ codec::{ @@ -38,7 +40,7 @@ use tikv_util::{ }, sys::{thread::ThreadBuildWrapper, SysQuota}, time::{Instant, Limiter}, - HandyRwLock, + Either, HandyRwLock, }; use tokio::{ runtime::{Handle, Runtime}, @@ -50,6 +52,7 @@ use crate::{ caching::cache_map::{CacheMap, ShareOwned}, import_file::{ImportDir, ImportFile}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, + import_mode2::{HashRange, ImportModeSwitcherV2}, metrics::*, sst_writer::{RawSstWriter, TxnSstWriter}, util, Config, ConfigManager as ImportConfigManager, Error, Result, @@ -152,7 +155,7 @@ impl CacheKvFile { pub struct SstImporter { dir: ImportDir, key_manager: Option>, - switcher: ImportModeSwitcher, + switcher: Either, // TODO: lift api_version as a type parameter. api_version: ApiVersion, compression_types: HashMap, @@ -171,8 +174,13 @@ impl SstImporter { root: P, key_manager: Option>, api_version: ApiVersion, + raft_kv_v2: bool, ) -> Result { - let switcher = ImportModeSwitcher::new(cfg); + let switcher = if raft_kv_v2 { + Either::Right(ImportModeSwitcherV2::new(cfg)) + } else { + Either::Left(ImportModeSwitcher::new(cfg)) + }; let cached_storage = CacheMap::default(); // We are going to run some background tasks here, (hyper needs to maintain the // connection, the cache map needs gc intervally.) so we must create a @@ -214,6 +222,48 @@ impl SstImporter { }) } + pub fn range_enter_import_mode(&self, range: Range) { + if let Either::Right(ref switcher) = self.switcher { + switcher.range_enter_import_mode(range) + } else { + unreachable!(); + } + } + + pub fn clear_import_mode_regions(&self, range: Range) { + if let Either::Right(ref switcher) = self.switcher { + switcher.clear_import_mode_range(range); + } else { + unreachable!(); + } + } + + // it always returns false for v1 + pub fn region_in_import_mode(&self, region: &Region) -> bool { + if let Either::Right(ref switcher) = self.switcher { + switcher.region_in_import_mode(region) + } else { + false + } + } + + // it always returns false for v1 + pub fn range_in_import_mode(&self, range: &Range) -> bool { + if let Either::Right(ref switcher) = self.switcher { + switcher.range_in_import_mode(range) + } else { + false + } + } + + pub fn ranges_in_import(&self) -> HashSet { + if let Either::Right(ref switcher) = self.switcher { + switcher.ranges_in_import() + } else { + unreachable!() + } + } + fn calcualte_usage_mem(mem_ratio: f64) -> u64 { ((SysQuota::memory_limit_in_bytes() as f64) * mem_ratio) as u64 } @@ -230,8 +280,11 @@ impl SstImporter { } } - pub fn start_switch_mode_check(&self, executor: &Handle, db: E) { - self.switcher.start(executor, db); + pub fn start_switch_mode_check(&self, executor: &Handle, db: Option) { + match &self.switcher { + Either::Left(switcher) => switcher.start(executor, db.unwrap()), + Either::Right(switcher) => switcher.start(executor), + } } pub fn get_path(&self, meta: &SstMeta) -> PathBuf { @@ -365,15 +418,29 @@ impl SstImporter { } pub fn enter_normal_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { - self.switcher.enter_normal_mode(&db, mf) + if let Either::Left(ref switcher) = self.switcher { + switcher.enter_normal_mode(&db, mf) + } else { + unreachable!(); + } } pub fn enter_import_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { - self.switcher.enter_import_mode(&db, mf) + if let Either::Left(ref switcher) = self.switcher { + switcher.enter_import_mode(&db, mf) + } else { + unreachable!(); + } } pub fn get_mode(&self) -> SwitchMode { - self.switcher.get_mode() + if let Either::Left(ref switcher) = self.switcher { + switcher.get_mode() + } else { + // v2 should use region_in_import_mode/range_in_import_mode to check regional + // mode + SwitchMode::Normal + } } #[cfg(test)] @@ -1909,7 +1976,7 @@ mod tests { ..Default::default() }; let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new(&cfg, import_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, import_dir, None, ApiVersion::V1, false).unwrap(); let mem_limit_old = importer.mem_limit.load(Ordering::SeqCst); // create new config and get the diff config. @@ -1961,6 +2028,7 @@ mod tests { import_dir, Some(key_manager), ApiVersion::V1, + false, ) .unwrap(); let ext_storage = { @@ -2020,6 +2088,7 @@ mod tests { import_dir, Some(key_manager), ApiVersion::V1, + false, ) .unwrap(); let ext_storage = { @@ -2083,7 +2152,7 @@ mod tests { ..Default::default() }; let importer = - SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1).unwrap(); + SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1, false).unwrap(); let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); let ext_storage = { importer.wrap_kms( @@ -2137,6 +2206,7 @@ mod tests { import_dir, Some(key_manager.clone()), ApiVersion::V1, + false, ) .unwrap(); @@ -2173,6 +2243,7 @@ mod tests { import_dir, Some(key_manager), ApiVersion::V1, + false, ) .unwrap(); @@ -2207,7 +2278,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2262,6 +2333,7 @@ mod tests { &importer_dir, Some(key_manager.clone()), ApiVersion::V1, + false, ) .unwrap(); @@ -2315,7 +2387,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2360,7 +2432,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_default().unwrap(); @@ -2404,7 +2476,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_write().unwrap(); @@ -2470,7 +2542,8 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = + SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2542,7 +2615,7 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); // note: the range doesn't contain the DATA_PREFIX 'z'. meta.mut_range().set_start(b"t123_r02".to_vec()); @@ -2588,7 +2661,7 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(b"t5_r02".to_vec()); meta.mut_range().set_end(b"t5_r12".to_vec()); @@ -2635,7 +2708,7 @@ mod tests { meta.set_uuid(vec![0u8; 16]); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); @@ -2660,7 +2733,7 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(vec![b'x']); meta.mut_range().set_end(vec![b'y']); @@ -2686,7 +2759,7 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); let result = importer.download::( @@ -2723,7 +2796,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2782,7 +2855,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2837,7 +2910,7 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer @@ -2884,7 +2957,8 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let mut importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let mut importer = + SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Snappy)); let db = create_sst_test_engine().unwrap(); @@ -2916,7 +2990,8 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let mut importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1).unwrap(); + let mut importer = + SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Zstd)); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); @@ -2965,7 +3040,7 @@ mod tests { fn test_import_support_download() { let import_dir = tempfile::tempdir().unwrap(); let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); + SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); assert_eq!(importer.import_support_download(), false); let import_dir = tempfile::tempdir().unwrap(); @@ -2977,6 +3052,7 @@ mod tests { import_dir, None, ApiVersion::V1, + false, ) .unwrap(); assert_eq!(importer.import_support_download(), true); @@ -2987,7 +3063,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); + SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); // test inc_mem_and_check() and dec_mem() successfully. @@ -3015,7 +3091,7 @@ mod tests { fn test_dashmap_lock() { let import_dir = tempfile::tempdir().unwrap(); let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1).unwrap(); + SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); let key = "file1"; let r = Arc::new(OnceCell::new()); diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index 70d30569557..0f9ac62f643 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -310,7 +310,7 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version).unwrap(); + let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); (f(&importer, &db, meta).unwrap(), importer_dir) diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index d6d8838e1b7..2d33a98b35d 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -293,6 +293,7 @@ impl Simulator for NodeCluster { dir, key_manager.clone(), cfg.storage.api_version(), + true, ) .unwrap(), ) diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index b398d51ad4c..9b792d9f5bb 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -487,7 +487,8 @@ impl ServerCluster { let (res_tag_factory, collector_reg_handle, rsmeter_cleanup) = self.init_resource_metering(&cfg.resource_metering); - let check_leader_runner = CheckLeaderRunner::new(store_meta, coprocessor_host.clone()); + let check_leader_runner = + CheckLeaderRunner::new(store_meta.clone(), coprocessor_host.clone()); let check_leader_scheduler = bg_worker.start("check-leader", check_leader_runner); let mut lock_mgr = LockManager::new(&cfg.pessimistic_txn); @@ -533,6 +534,7 @@ impl ServerCluster { dir, key_manager.clone(), cfg.storage.api_version(), + true, ) .unwrap(), ) @@ -543,6 +545,7 @@ impl ServerCluster { raft_kv_v2, LocalTablets::Registry(tablet_registry.clone()), Arc::clone(&importer), + Some(store_meta), ); // Create deadlock service. diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 5221613f2b6..7188ab98d98 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -299,7 +299,9 @@ impl Simulator for NodeCluster { let importer = { let dir = Path::new(engines.kv.path()).join("import-sst"); - Arc::new(SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version()).unwrap()) + Arc::new( + SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version(), false).unwrap(), + ) }; let local_reader = LocalReader::new( diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 30ad3175b2f..ab2da452bcc 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -437,6 +437,7 @@ impl ServerCluster { dir, key_manager.clone(), cfg.storage.api_version(), + false, ) .unwrap(), ) @@ -447,6 +448,7 @@ impl ServerCluster { engine, LocalTablets::Singleton(engines.kv.clone()), Arc::clone(&importer), + None, ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 60f45933556..99f920ceb94 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1,7 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - collections::{HashMap, VecDeque}, + collections::{HashMap, HashSet, VecDeque}, convert::identity, future::Future, path::PathBuf, @@ -9,7 +9,6 @@ use std::{ time::Duration, }; -use collections::HashSet; use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; @@ -25,6 +24,7 @@ use kvproto::{ }, kvrpcpb::Context, }; +use raftstore_v2::StoreMeta; use sst_importer::{ error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, ConfigManager, Error, Result, SstImporter, @@ -116,6 +116,9 @@ pub struct ImportSstService { raft_entry_max_size: ReadableSize, writer: raft_writer::ThrottledTlsEngineWriter, + + // it's some iff multi-rocksdb is enabled + store_meta: Option>>>, } struct RequestCollector { @@ -296,6 +299,7 @@ impl ImportSstService { engine: E, tablets: LocalTablets, importer: Arc, + store_meta: Option>>>, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -318,7 +322,9 @@ impl ImportSstService { .build() .unwrap(); if let LocalTablets::Singleton(tablet) = &tablets { - importer.start_switch_mode_check(threads.handle(), tablet.clone()); + importer.start_switch_mode_check(threads.handle(), Some(tablet.clone())); + } else { + importer.start_switch_mode_check::(threads.handle(), None); } let writer = raft_writer::ThrottledTlsEngineWriter::default(); @@ -342,6 +348,7 @@ impl ImportSstService { task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, writer, + store_meta, } } @@ -402,7 +409,36 @@ impl ImportSstService { return Some(errorpb); } }; - if self.importer.get_mode() == SwitchMode::Normal + + let reject_error = |region_id: Option| -> Option { + let mut errorpb = errorpb::Error::default(); + let err = if let Some(id) = region_id { + format!("too many sst files are ingesting for region {}", id) + } else { + "too many sst files are ingesting".to_string() + }; + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason(err.clone()); + errorpb.set_message(err); + errorpb.set_server_is_busy(server_is_busy_err); + Some(errorpb) + }; + + // store_meta being Some means it is v2 + if let Some(ref store_meta) = self.store_meta { + if let Some((region, _)) = store_meta.lock().unwrap().regions.get(®ion_id) { + if !self.importer.region_in_import_mode(region) + && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") + { + return reject_error(Some(region_id)); + } + } else { + let mut errorpb = errorpb::Error::default(); + errorpb.set_message(format!("region {} not found", region_id)); + errorpb.mut_region_not_found().set_region_id(region_id); + return Some(errorpb); + } + } else if self.importer.get_mode() == SwitchMode::Normal && tablet.ingest_maybe_slowdown_writes(CF_WRITE).expect("cf") { match tablet.get_sst_key_ranges(CF_WRITE, 0) { @@ -416,14 +452,9 @@ impl ImportSstService { error!("get sst key ranges failed"; "err" => ?e); } } - let mut errorpb = errorpb::Error::default(); - let err = "too many sst files are ingesting"; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(err.to_string()); - errorpb.set_message(err.to_string()); - errorpb.set_server_is_busy(server_is_busy_err); - return Some(errorpb); + return reject_error(None); } + None } @@ -658,10 +689,23 @@ macro_rules! impl_write { } impl ImportSst for ImportSstService { + // Switch mode for v1 and v2 is quite different. + // + // For v1, once it enters import mode, all regions are in import mode as there's + // only one kv rocksdb. + // + // V2 is different. The switch mode with import mode request carries a range + // where only regions overlapped with the range can enter import mode. + // And unlike v1, where some rocksdb configs will be changed when entering + // import mode, the config of the rocksdb will not change when entering import + // mode due to implementation complexity (a region's rocksdb can change + // overtime due to snapshot, split, and merge, which brings some + // implemention complexities). If it really needs, we will implement it in the + // future. fn switch_mode( &mut self, ctx: RpcContext<'_>, - req: SwitchModeRequest, + mut req: SwitchModeRequest, sink: UnarySink, ) { let label = "switch_mode"; @@ -672,17 +716,37 @@ impl ImportSst for ImportSstService { CONFIG_ROCKSDB_GAUGE.with_label_values(&[cf, name]).set(v); } - if let LocalTablets::Singleton(tablet) = &self.tablets { - match req.get_mode() { + match &self.tablets { + LocalTablets::Singleton(tablet) => match req.get_mode() { SwitchMode::Normal => self.importer.enter_normal_mode(tablet.clone(), mf), SwitchMode::Import => self.importer.enter_import_mode(tablet.clone(), mf), + }, + LocalTablets::Registry(_) => { + if req.get_mode() == SwitchMode::Import { + if req.has_range() { + let range = req.take_range(); + self.importer.range_enter_import_mode(range); + Ok(true) + } else { + Err(sst_importer::Error::Engine( + "partitioned-raft-kv only support switch mode with range set" + .into(), + )) + } + } else { + // case SwitchMode::Normal + if req.has_range() { + let range = req.take_range(); + self.importer.clear_import_mode_regions(range); + Ok(true) + } else { + Err(sst_importer::Error::Engine( + "partitioned-raft-kv only support switch mode with range set" + .into(), + )) + } + } } - } else if req.get_mode() != SwitchMode::Normal { - Err(sst_importer::Error::Engine( - "partitioned-raft-kv doesn't support import mode".into(), - )) - } else { - Ok(false) } }; match res { diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 8d335666215..6f99f90895d 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -253,7 +253,6 @@ fn test_ingest_file_twice_and_conflict() { #[test] fn test_ingest_sst_v2() { - let mut cluster = test_raftstore_v2::new_server_cluster(1, 1); let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); let cleanup_interval = Duration::from_millis(10); @@ -263,7 +262,7 @@ fn test_ingest_sst_v2() { config.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); config.server.grpc_concurrency = 1; - let (ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config), &mut cluster); + let (cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); let sst_path = temp_dir.path().join("test.sst"); let sst_range = (0, 100); diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 1748ad4c291..4a7ab76da8b 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -76,7 +76,7 @@ fn start_raftstore( .as_path() .display() .to_string(); - Arc::new(SstImporter::new(&cfg.import, p, None, cfg.storage.api_version()).unwrap()) + Arc::new(SstImporter::new(&cfg.import, p, None, cfg.storage.api_version(), false).unwrap()) }; let snap_mgr = { let p = dir diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index a47c817d2af..aa095b50c1e 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -1,5 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +use std::time::Duration; + use futures::{executor::block_on, stream::StreamExt}; use kvproto::{import_sstpb::*, kvrpcpb::Context, tikvpb::*}; use pd_client::PdClient; @@ -118,6 +120,86 @@ fn test_ingest_sst() { assert!(!resp.has_error(), "{:?}", resp.get_error()); } +fn switch_mode(import: &ImportSstClient, range: Range, mode: SwitchMode) { + let mut switch_req = SwitchModeRequest::default(); + switch_req.set_mode(mode); + switch_req.set_range(range); + let _ = import.switch_mode(&switch_req).unwrap(); +} + +#[test] +fn test_switch_mode_v2() { + let mut cfg = TikvConfig::default(); + cfg.server.grpc_concurrency = 1; + cfg.rocksdb.writecf.disable_auto_compactions = true; + cfg.raft_store.right_derive_when_split = true; + // cfg.rocksdb.writecf.level0_slowdown_writes_trigger = Some(2); + let (mut cluster, mut ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(cfg)); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, &[50]); + let region = cluster.get_region(&[50]); + ctx.set_region_epoch(region.get_region_epoch().clone()); + + let mut key_range = Range::default(); + key_range.set_start([50].to_vec()); + switch_mode(&import, key_range.clone(), SwitchMode::Import); + + let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); + + let upload_and_ingest = + |sst_range, import: &ImportSstClient, path_name, ctx: &Context| -> IngestResponse { + let sst_path = temp_dir.path().join(path_name); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + meta.set_cf_name("write".to_string()); + // Set region id and epoch. + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta); + import.ingest(&ingest).unwrap() + }; + + // The first one will be ingested at the bottom level. And as the following ssts + // are overlapped with the previous one, they will all be ingested at level 0. + for i in 0..10 { + let resp = upload_and_ingest((50, 100), &import, format!("test{}.sst", i), &ctx); + assert!(!resp.has_error()); + } + + // For this region, it is not in the key range, so it is normal mode. + let region = cluster.get_region(&[20]); + let mut ctx2 = ctx.clone(); + ctx2.set_region_id(region.get_id()); + ctx2.set_region_epoch(region.get_region_epoch().clone()); + ctx2.set_peer(region.get_peers()[0].clone()); + for i in 0..6 { + let resp = upload_and_ingest((0, 49), &import, format!("test-{}.sst", i), &ctx2); + if i < 5 { + assert!(!resp.has_error()); + } else { + assert!(resp.get_error().has_server_is_busy()); + } + } + // Propose another switch mode request to let this region to ingest. + let mut key_range2 = Range::default(); + key_range2.set_end([50].to_vec()); + switch_mode(&import, key_range2.clone(), SwitchMode::Import); + let resp = upload_and_ingest((0, 49), &import, "test-6.sst".to_string(), &ctx2); + assert!(!resp.has_error()); + // switching back to normal should make further ingest be rejected + switch_mode(&import, key_range2, SwitchMode::Normal); + let resp = upload_and_ingest((0, 49), &import, "test-7.sst".to_string(), &ctx2); + assert!(resp.get_error().has_server_is_busy()); + + // switch back to normal, so region 1 also starts to reject + switch_mode(&import, key_range, SwitchMode::Normal); + let resp = upload_and_ingest((50, 100), &import, "test10".to_string(), &ctx); + assert!(resp.get_error().has_server_is_busy()); +} + #[test] fn test_upload_and_ingest_with_tde() { let (_tmp_dir, _cluster, ctx, tikv, import) = new_cluster_and_tikv_import_client_tde(); @@ -270,6 +352,62 @@ fn test_cleanup_sst() { check_sst_deleted(&import, &meta, &data); } +#[test] +fn test_cleanup_sst_v2() { + let (mut cluster, ctx, _, import) = open_cluster_and_tikv_import_client_v2(None); + + let temp_dir = Builder::new().prefix("test_cleanup_sst").tempdir().unwrap(); + + let sst_path = temp_dir.path().join("test_split.sst"); + let sst_range = (0, 100); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + send_upload_sst(&import, &meta, &data).unwrap(); + + // Can not upload the same file when it exists. + assert_to_string_contains!( + send_upload_sst(&import, &meta, &data).unwrap_err(), + "FileExists" + ); + + // The uploaded SST should be deleted if the region split. + let region = cluster.get_region(&[]); + cluster.must_split(®ion, &[100]); + + check_sst_deleted(&import, &meta, &data); + + // upload an SST of an unexisted region + let sst_path = temp_dir.path().join("test_non_exist.sst"); + let sst_range = (0, 100); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + meta.set_region_id(9999); + send_upload_sst(&import, &meta, &data).unwrap(); + // This should be cleanuped + check_sst_deleted(&import, &meta, &data); + + let mut key_range = Range::default(); + key_range.set_start([50].to_vec()); + key_range.set_start([70].to_vec()); + // switch to import so that the overlapped sst will not be cleanuped + switch_mode(&import, key_range.clone(), SwitchMode::Import); + let sst_path = temp_dir.path().join("test_non_exist1.sst"); + let sst_range = (60, 80); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + meta.set_region_id(9999); + send_upload_sst(&import, &meta, &data).unwrap(); + std::thread::sleep(Duration::from_millis(500)); + assert_to_string_contains!( + send_upload_sst(&import, &meta, &data).unwrap_err(), + "FileExists" + ); + + // switch back to normal mode + switch_mode(&import, key_range, SwitchMode::Normal); + check_sst_deleted(&import, &meta, &data); +} + #[test] fn test_ingest_sst_region_not_found() { let (_cluster, mut ctx_not_found, _, import) = new_cluster_and_tikv_import_client(); diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index cb1e0e336be..cc5d22d517d 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -8,6 +8,7 @@ use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; use kvproto::{import_sstpb::*, kvrpcpb::*, tikvpb::*}; use security::SecurityConfig; use test_raftstore::*; +use test_raftstore_v2::{Cluster as ClusterV2, ServerCluster as ServerClusterV2}; use tikv::config::TikvConfig; use tikv_util::HandyRwLock; use uuid::Uuid; @@ -34,6 +35,31 @@ pub fn new_cluster(cfg: TikvConfig) -> (Cluster, Context) { (cluster, ctx) } +pub fn new_cluster_v2( + cfg: TikvConfig, +) -> ( + ClusterV2, RocksEngine>, + Context, +) { + let count = 1; + let mut cluster = test_raftstore_v2::new_server_cluster(0, count); + cluster.cfg = Config { + tikv: cfg, + prefer_mem: true, + }; + cluster.run(); + + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader); + ctx.set_region_epoch(epoch); + + (cluster, ctx) +} + pub fn open_cluster_and_tikv_import_client( cfg: Option, ) -> (Cluster, Context, TikvClient, ImportSstClient) { @@ -69,35 +95,24 @@ pub fn open_cluster_and_tikv_import_client( (cluster, ctx, tikv, import) } -#[allow(dead_code)] pub fn open_cluster_and_tikv_import_client_v2( cfg: Option, - cluster: &mut test_raftstore_v2::Cluster< - test_raftstore_v2::ServerCluster, - RocksEngine, - >, -) -> (Context, TikvClient, ImportSstClient) { +) -> ( + ClusterV2, RocksEngine>, + Context, + TikvClient, + ImportSstClient, +) { let cfg = cfg.unwrap_or_else(|| { let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); - let cleanup_interval = Duration::from_millis(10); + let cleanup_interval = Duration::from_millis(CLEANUP_SST_MILLIS); config.raft_store.cleanup_import_sst_interval.0 = cleanup_interval; config.server.grpc_concurrency = 1; config }); - cluster.cfg = Config { - tikv: cfg.clone(), - prefer_mem: true, - }; - cluster.run(); - let region_id = 1; - let leader = cluster.leader_of_region(region_id).unwrap(); - let epoch = cluster.get_region_epoch(region_id); - let mut ctx = Context::default(); - ctx.set_region_id(region_id); - ctx.set_peer(leader); - ctx.set_region_epoch(epoch); + let (cluster, ctx) = new_cluster_v2(cfg.clone()); let ch = { let env = Arc::new(Environment::new(1)); @@ -117,7 +132,7 @@ pub fn open_cluster_and_tikv_import_client_v2( let tikv = TikvClient::new(ch.clone()); let import = ImportSstClient::new(ch); - (ctx, tikv, import) + (cluster, ctx, tikv, import) } pub fn new_cluster_and_tikv_import_client() diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 1564aff0b3e..312cb7c9c5c 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -98,7 +98,9 @@ fn test_node_bootstrap_with_prepared_data() { let importer = { let dir = tmp_path.path().join("import-sst"); - Arc::new(SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version()).unwrap()) + Arc::new( + SstImporter::new(&cfg.import, dir, None, cfg.storage.api_version(), false).unwrap(), + ) }; let (split_check_scheduler, _) = dummy_scheduler(); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index f837500f981..fbf4f349877 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -1388,7 +1388,9 @@ fn test_double_run_node() { let coprocessor_host = CoprocessorHost::new(router, raftstore::coprocessor::Config::default()); let importer = { let dir = Path::new(MiscExt::path(&engines.kv)).join("import-sst"); - Arc::new(SstImporter::new(&ImportConfig::default(), dir, None, ApiVersion::V1).unwrap()) + Arc::new( + SstImporter::new(&ImportConfig::default(), dir, None, ApiVersion::V1, false).unwrap(), + ) }; let (split_check_scheduler, _) = dummy_scheduler(); From 48b18a66b8a469bea1a5621954b9dd5f078cdb4f Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 5 Jul 2023 12:56:13 +0800 Subject: [PATCH 0775/1149] raftstore-v2: optimize RocksDB config (#15055) ref tikv/tikv#12842 - Use latest data format - Enable ribbon filter for bottommost level - Limit compaction concurrency for defaultcf and writecf to 1, auto adjust when pending bytes is high - Set max-total-wal-size to 1 for v2 Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- Cargo.lock | 6 +- Cargo.toml | 2 + components/engine_panic/src/cf_options.rs | 3 + components/engine_rocks/src/cf_options.rs | 9 ++ components/engine_traits/src/cf_options.rs | 1 + components/server/src/common.rs | 71 ++++++++++--- components/server/src/server.rs | 3 +- components/server/src/server2.rs | 3 +- etc/config-template.toml | 9 ++ src/config/configurable.rs | 2 +- src/config/mod.rs | 115 +++++++++++++++------ tests/integrations/config/mod.rs | 22 ++-- tests/integrations/config/test-custom.toml | 10 +- 13 files changed, 185 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4eae3894b2f..d43672f0487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3095,7 +3095,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" +source = "git+https://github.com/tabokie/rust-rocksdb?branch=230703-thread-limiter#8cb107a4baf6feca81a88649d56e5e64df7239b1" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3114,7 +3114,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" +source = "git+https://github.com/tabokie/rust-rocksdb?branch=230703-thread-limiter#8cb107a4baf6feca81a88649d56e5e64df7239b1" dependencies = [ "bzip2-sys", "cc", @@ -5086,7 +5086,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#489b049dc974f9b164b7e6150ee5d9466847f74f" +source = "git+https://github.com/tabokie/rust-rocksdb?branch=230703-thread-limiter#8cb107a4baf6feca81a88649d56e5e64df7239b1" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index 744ecdec44d..f3e037322b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -202,6 +202,8 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. # [patch.'https://github.com/pingcap/kvproto'] # kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } +[patch.'https://github.com/tikv/rust-rocksdb'] +rocksdb = { git = "https://github.com/tabokie/rust-rocksdb", branch = "230703-thread-limiter" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md diff --git a/components/engine_panic/src/cf_options.rs b/components/engine_panic/src/cf_options.rs index cd4f7ee82d5..18fd78220b2 100644 --- a/components/engine_panic/src/cf_options.rs +++ b/components/engine_panic/src/cf_options.rs @@ -65,4 +65,7 @@ impl CfOptions for PanicCfOptions { fn set_sst_partitioner_factory(&mut self, factory: F) { panic!() } + fn set_max_compactions(&self, n: u32) -> Result<()> { + panic!() + } } diff --git a/components/engine_rocks/src/cf_options.rs b/components/engine_rocks/src/cf_options.rs index f2cc46d7a30..1162c67f210 100644 --- a/components/engine_rocks/src/cf_options.rs +++ b/components/engine_rocks/src/cf_options.rs @@ -121,4 +121,13 @@ impl CfOptions for RocksCfOptions { self.0 .set_sst_partitioner_factory(RocksSstPartitionerFactory(factory)); } + + fn set_max_compactions(&self, n: u32) -> Result<()> { + if let Some(limiter) = self.0.get_compaction_thread_limiter() { + limiter.set_limit(n); + } else { + return Err(box_err!("compaction thread limiter not found")); + } + Ok(()) + } } diff --git a/components/engine_traits/src/cf_options.rs b/components/engine_traits/src/cf_options.rs index 5fb85aedf95..1ed44825d37 100644 --- a/components/engine_traits/src/cf_options.rs +++ b/components/engine_traits/src/cf_options.rs @@ -30,4 +30,5 @@ pub trait CfOptions { fn get_disable_auto_compactions(&self) -> bool; fn get_disable_write_stall(&self) -> bool; fn set_sst_partitioner_factory(&mut self, factory: F); + fn set_max_compactions(&self, n: u32) -> Result<()>; } diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 10da6ec9c74..0209906cc55 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -22,8 +22,8 @@ use engine_rocks::{ FlowInfo, RocksEngine, RocksStatistics, }; use engine_traits::{ - data_cf_offset, CachedTablet, CfOptionsExt, FlowControlFactorsExt, KvEngine, RaftEngine, - StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, + data_cf_offset, CachedTablet, CfOptions, CfOptionsExt, FlowControlFactorsExt, KvEngine, + RaftEngine, StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, }; use error_code::ErrorCodeExt; use file_system::{get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor}; @@ -478,6 +478,8 @@ pub fn check_system_config(config: &TikvConfig) { pub struct EnginesResourceInfo { tablet_registry: TabletRegistry, + // The initial value of max_compactions. For kvdb.defaultcf, kvdb.writecf only. + base_max_compactions: [u32; 2], raft_engine: Option, latest_normalized_pending_bytes: AtomicU32, normalized_pending_bytes_collector: MovingAvgU32, @@ -487,12 +489,18 @@ impl EnginesResourceInfo { const SCALE_FACTOR: u64 = 100; pub fn new( + config: &TikvConfig, tablet_registry: TabletRegistry, raft_engine: Option, max_samples_to_preserve: usize, ) -> Self { + let base_max_compactions = [ + config.rocksdb.defaultcf.max_compactions.unwrap_or(0), + config.rocksdb.writecf.max_compactions.unwrap_or(0), + ]; EnginesResourceInfo { tablet_registry, + base_max_compactions, raft_engine, latest_normalized_pending_bytes: AtomicU32::new(0), normalized_pending_bytes_collector: MovingAvgU32::new(max_samples_to_preserve), @@ -530,15 +538,6 @@ impl EnginesResourceInfo { true }); - // todo(SpadeA): Now, there's a potential race condition problem where the - // tablet could be destroyed after the clone and before the fetching - // which could result in programme panic. It's okay now as the single global - // kv_engine will not be destroyed in normal operation and v2 is not - // ready for operation. Furthermore, this race condition is general to v2 as - // tablet clone is not a case exclusively happened here. We should - // propose another PR to tackle it such as destory tablet lazily in a GC - // thread. - for (_, cache) in cached_latest_tablets.iter_mut() { let Some(tablet) = cache.latest() else { continue }; for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { @@ -546,22 +545,62 @@ impl EnginesResourceInfo { } } - // Clear ensures that these tablets are not hold forever. - cached_latest_tablets.clear(); - let mut normalized_pending_bytes = 0; - for (pending, limit) in compaction_pending_bytes + for (i, (pending, limit)) in compaction_pending_bytes .iter() .zip(soft_pending_compaction_bytes_limit) + .enumerate() { if limit > 0 { normalized_pending_bytes = cmp::max( normalized_pending_bytes, (*pending * EnginesResourceInfo::SCALE_FACTOR / limit) as u32, - ) + ); + // kvdb defaultcf or writecf. + if (i == 1 || i == 2 ) + && let base = self.base_max_compactions[i-1] + && base > 0 + { + let level = *pending as f32 / limit as f32; + let delta = if level > 0.7 { + 2 + } else { + u32::from(level > 0.5) + }; + let cf = if i == 1 { + CF_DEFAULT + } else { + CF_WRITE + }; + if delta != 0 { + info!( + "adjusting `max-compactions`"; + "cf" => cf, + "n" => base + delta, + "pending_bytes" => *pending, + "soft_limit" => limit + ); + } + // We cannot get the current limit from limiter to avoid repeatedly setting the + // same value. But this operation is as simple as an atomic store. + cached_latest_tablets.iter_mut().any(|(_, tablet)| { + if let Some(latest) = tablet.latest() { + let opts = latest.get_options_cf(cf).unwrap(); + if let Err(e) = opts.set_max_compactions(base + delta) { + error!("failed to adjust `max-compactions`"; "err" => ?e); + } + true + } else { + false + } + }); + } } } + // Clear ensures that these tablets are not hold forever. + cached_latest_tablets.clear(); + let (_, avg) = self .normalized_pending_bytes_collector .add(normalized_pending_bytes); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 6f46c09ded4..27b45e6973b 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1488,6 +1488,7 @@ impl TikvServer { engines.raft.register_config(cfg_controller); let engines_info = Arc::new(EnginesResourceInfo::new( + &self.core.config, reg, engines.raft.as_rocks_engine().cloned(), 180, // max_samples_to_preserve @@ -1594,7 +1595,7 @@ mod test { assert!(old_pending_compaction_bytes > new_pending_compaction_bytes); - let engines_info = Arc::new(EnginesResourceInfo::new(reg, None, 10)); + let engines_info = Arc::new(EnginesResourceInfo::new(&config, reg, None, 10)); let mut cached_latest_tablets = HashMap::default(); engines_info.update(Instant::now(), &mut cached_latest_tablets); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index f7a155c0dc1..6b1bc2c331d 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1401,6 +1401,7 @@ impl TikvServer { raft_engine.register_config(cfg_controller); let engines_info = Arc::new(EnginesResourceInfo::new( + &self.core.config, registry, raft_engine.as_rocks_engine().cloned(), 180, // max_samples_to_preserve @@ -1534,7 +1535,7 @@ mod test { assert!(old_pending_compaction_bytes > new_pending_compaction_bytes); - let engines_info = Arc::new(EnginesResourceInfo::new(reg, None, 10)); + let engines_info = Arc::new(EnginesResourceInfo::new(&config, reg, None, 10)); let mut cached_latest_tablets = HashMap::default(); engines_info.update(Instant::now(), &mut cached_latest_tablets); diff --git a/etc/config-template.toml b/etc/config-template.toml index 3e31375e6a9..818b8b64a44 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -567,6 +567,8 @@ # wal-size-limit = 0 ## Max RocksDB WAL size in total +## When storage.engine is "raft-kv", default value is 4GB. +## When storage.engine is "partitioned-raft-kv", default value is 1. # max-total-wal-size = "4GB" ## Dump statistics periodically in information logs. @@ -850,6 +852,13 @@ ## This option only affects newly written tables. When reading existing tables, ## the information about version is read from the footer. ## +## 5 -- Can be read by TiKV's versions since 6.3. Full and partitioned filters +## use a generally faster and more accurate Bloom filter implementation, with a +## different schema. +## +## When storage.engine is "raft-kv", default value is 2. +## When storage.engine is "partitioned-raft-kv", default value is 5. +## # format-version = 2 ## If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and diff --git a/src/config/configurable.rs b/src/config/configurable.rs index f006da501d2..6fe9409c1c0 100644 --- a/src/config/configurable.rs +++ b/src/config/configurable.rs @@ -4,7 +4,7 @@ use std::{error::Error, io::Write}; use engine_rocks::RocksEngine; use engine_traits::{ - CachedTablet, CfOptionsExt, DbOptions, DbOptionsExt, TabletRegistry, CF_DEFAULT, + CachedTablet, CfOptions, CfOptionsExt, DbOptions, DbOptionsExt, TabletRegistry, CF_DEFAULT, }; pub type ConfigRes = Result<(), Box>; diff --git a/src/config/mod.rs b/src/config/mod.rs index acede457e55..137ab56e1e1 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -374,12 +374,12 @@ macro_rules! cf_config { #[online_config(skip)] pub prepopulate_block_cache: PrepopulateBlockCache, #[online_config(skip)] - pub format_version: u32, + pub format_version: Option, #[serde(with = "rocks_config::checksum_serde")] #[online_config(skip)] pub checksum: ChecksumType, #[online_config(skip)] - pub max_compactions: u32, + pub max_compactions: Option, // `ttl == None` means using default setting in Rocksdb. // `ttl` in Rocksdb is 30 days as default. #[online_config(skip)] @@ -408,8 +408,7 @@ macro_rules! cf_config { ) .into()); } - if self.format_version > 5 { - // TODO: allow version 5 if we have another LTS capable of reading it? + if self.format_version.map_or(false, |v| v > 5) { return Err("format-version larger than 5 is unsupported".into()); } self.titan.validate()?; @@ -584,7 +583,7 @@ macro_rules! build_cf_opt { } block_base_opts.set_read_amp_bytes_per_bit($opt.read_amp_bytes_per_bit); block_base_opts.set_prepopulate_block_cache($opt.prepopulate_block_cache); - block_base_opts.set_format_version($opt.format_version); + block_base_opts.set_format_version($opt.format_version.unwrap_or(2)); block_base_opts.set_checksum($opt.checksum); block_base_opts.set_optimize_filters_for_memory($opt.optimize_filters_for_memory); let mut cf_opts = RocksCfOptions::default(); @@ -729,9 +728,9 @@ impl Default for DefaultCfConfig { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::Disabled, - format_version: 2, + format_version: None, checksum: ChecksumType::CRC32c, - max_compactions: 0, + max_compactions: None, ttl: None, periodic_compaction_seconds: None, titan: TitanCfConfig::default(), @@ -897,9 +896,9 @@ impl Default for WriteCfConfig { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::Disabled, - format_version: 2, + format_version: None, checksum: ChecksumType::CRC32c, - max_compactions: 0, + max_compactions: None, ttl: None, periodic_compaction_seconds: None, titan, @@ -1019,9 +1018,9 @@ impl Default for LockCfConfig { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::Disabled, - format_version: 2, + format_version: None, checksum: ChecksumType::CRC32c, - max_compactions: 0, + max_compactions: None, ttl: None, periodic_compaction_seconds: None, titan, @@ -1116,9 +1115,9 @@ impl Default for RaftCfConfig { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::Disabled, - format_version: 2, + format_version: None, checksum: ChecksumType::CRC32c, - max_compactions: 0, + max_compactions: None, ttl: None, periodic_compaction_seconds: None, titan, @@ -1201,7 +1200,7 @@ pub struct DbConfig { pub wal_ttl_seconds: u64, #[online_config(skip)] pub wal_size_limit: ReadableSize, - pub max_total_wal_size: ReadableSize, + pub max_total_wal_size: Option, pub max_background_jobs: i32, pub max_background_flushes: i32, #[online_config(skip)] @@ -1296,7 +1295,7 @@ impl Default for DbConfig { wal_dir: "".to_owned(), wal_ttl_seconds: 0, wal_size_limit: ReadableSize::kb(0), - max_total_wal_size: ReadableSize::gb(4), + max_total_wal_size: None, max_background_jobs: bg_job_limits.max_background_jobs as i32, max_background_flushes: bg_job_limits.max_background_flushes as i32, max_manifest_file_size: ReadableSize::mb(128), @@ -1342,8 +1341,13 @@ impl DbConfig { match engine { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); + self.max_total_wal_size.get_or_insert(ReadableSize::gb(4)); self.defaultcf.enable_compaction_guard.get_or_insert(true); self.writecf.enable_compaction_guard.get_or_insert(true); + self.defaultcf.format_version.get_or_insert(2); + self.writecf.format_version.get_or_insert(2); + self.lockcf.format_version.get_or_insert(2); + self.raftcf.format_version.get_or_insert(2); } EngineType::RaftKv2 => { self.enable_multi_batch_write.get_or_insert(false); @@ -1355,6 +1359,7 @@ impl DbConfig { (total_mem * WRITE_BUFFER_MEMORY_LIMIT_RATE) as u64, WRITE_BUFFER_MEMORY_LIMIT_MAX, ))); + self.max_total_wal_size.get_or_insert(ReadableSize(1)); // In RaftKv2, every region uses its own rocksdb instance, it's actually the // even stricter compaction guard, so use the same output file size base. self.writecf @@ -1367,6 +1372,18 @@ impl DbConfig { self.writecf.disable_write_stall = true; self.lockcf.disable_write_stall = true; self.raftcf.disable_write_stall = true; + self.defaultcf.format_version.get_or_insert(5); + self.writecf.format_version.get_or_insert(5); + self.lockcf.format_version.get_or_insert(5); + self.raftcf.format_version.get_or_insert(5); + // According to FB, Ribbon filter is more cost-efficient for SST with 1h+ + // lifetime. We conservatively use it for L6. + self.defaultcf.ribbon_filter_above_level.get_or_insert(6); + self.writecf.ribbon_filter_above_level.get_or_insert(6); + // Initially only allow one compaction. Pace up when pending bytes is high. This + // strategy is consistent with single RocksDB. + self.defaultcf.max_compactions.get_or_insert(1); + self.writecf.max_compactions.get_or_insert(1); } } } @@ -1405,7 +1422,7 @@ impl DbConfig { } opts.set_wal_ttl_seconds(self.wal_ttl_seconds); opts.set_wal_size_limit_mb(self.wal_size_limit.as_mb()); - opts.set_max_total_wal_size(self.max_total_wal_size.0); + opts.set_max_total_wal_size(self.max_total_wal_size.unwrap_or(ReadableSize(0)).0); opts.set_max_background_jobs(self.max_background_jobs); // RocksDB will cap flush and compaction threads to at least one opts.set_max_background_flushes(self.max_background_flushes); @@ -1463,28 +1480,28 @@ impl DbConfig { pub fn build_cf_resources(&self, cache: Cache) -> CfResources { let mut compaction_thread_limiters = HashMap::new(); - if self.defaultcf.max_compactions > 0 { + if let Some(n) = self.defaultcf.max_compactions && n > 0 { compaction_thread_limiters.insert( CF_DEFAULT, - ConcurrentTaskLimiter::new(CF_DEFAULT, self.defaultcf.max_compactions), + ConcurrentTaskLimiter::new(CF_DEFAULT, n), ); } - if self.writecf.max_compactions > 0 { + if let Some(n) = self.writecf.max_compactions && n > 0 { compaction_thread_limiters.insert( CF_WRITE, - ConcurrentTaskLimiter::new(CF_WRITE, self.writecf.max_compactions), + ConcurrentTaskLimiter::new(CF_WRITE, n), ); } - if self.lockcf.max_compactions > 0 { + if let Some(n) = self.lockcf.max_compactions && n > 0 { compaction_thread_limiters.insert( CF_LOCK, - ConcurrentTaskLimiter::new(CF_LOCK, self.lockcf.max_compactions), + ConcurrentTaskLimiter::new(CF_LOCK, n), ); } - if self.raftcf.max_compactions > 0 { + if let Some(n) = self.raftcf.max_compactions && n > 0 { compaction_thread_limiters.insert( CF_RAFT, - ConcurrentTaskLimiter::new(CF_RAFT, self.raftcf.max_compactions), + ConcurrentTaskLimiter::new(CF_RAFT, n), ); } CfResources { @@ -1640,9 +1657,9 @@ impl Default for RaftDefaultCfConfig { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::Disabled, - format_version: 2, + format_version: Some(2), checksum: ChecksumType::CRC32c, - max_compactions: 0, + max_compactions: None, ttl: None, periodic_compaction_seconds: None, titan: TitanCfConfig::default(), @@ -1652,8 +1669,8 @@ impl Default for RaftDefaultCfConfig { impl RaftDefaultCfConfig { pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { - let limiter = if self.max_compactions > 0 { - Some(ConcurrentTaskLimiter::new(CF_DEFAULT, self.max_compactions)) + let limiter = if let Some(n) = self.max_compactions && n > 0 { + Some(ConcurrentTaskLimiter::new(CF_DEFAULT, n)) } else { None }; @@ -3855,11 +3872,31 @@ impl TikvConfig { } } - if last_cfg.raftdb.defaultcf.format_version > 5 - || last_cfg.rocksdb.defaultcf.format_version > 5 - || last_cfg.rocksdb.writecf.format_version > 5 - || last_cfg.rocksdb.lockcf.format_version > 5 - || last_cfg.rocksdb.raftcf.format_version > 5 + if last_cfg + .raftdb + .defaultcf + .format_version + .map_or(false, |v| v > 5) + || last_cfg + .rocksdb + .defaultcf + .format_version + .map_or(false, |v| v > 5) + || last_cfg + .rocksdb + .writecf + .format_version + .map_or(false, |v| v > 5) + || last_cfg + .rocksdb + .lockcf + .format_version + .map_or(false, |v| v > 5) + || last_cfg + .rocksdb + .raftcf + .format_version + .map_or(false, |v| v > 5) { return Err("format_version larger than 5 is unsupported".into()); } @@ -5897,6 +5934,7 @@ mod tests { cfg.raft_engine.mut_config().memory_limit = None; cfg.coprocessor_v2.coprocessor_plugin_directory = None; // Default is `None`, which is represented by not setting the key. cfg.rocksdb.write_buffer_limit = None; + cfg.rocksdb.max_total_wal_size = None; // cfg.rocksdb.defaultcf.enable_compaction_guard = None; cfg.rocksdb.writecf.enable_compaction_guard = None; @@ -5945,6 +5983,17 @@ mod tests { cfg.rocksdb.lockcf.periodic_compaction_seconds = None; cfg.rocksdb.raftcf.periodic_compaction_seconds = None; cfg.raftdb.defaultcf.periodic_compaction_seconds = None; + // + cfg.rocksdb.defaultcf.format_version = None; + cfg.rocksdb.writecf.format_version = None; + cfg.rocksdb.lockcf.format_version = None; + cfg.rocksdb.raftcf.format_version = None; + // + cfg.rocksdb.defaultcf.max_compactions = None; + cfg.rocksdb.writecf.max_compactions = None; + cfg.rocksdb.lockcf.max_compactions = None; + cfg.rocksdb.raftcf.max_compactions = None; + cfg.raftdb.defaultcf.max_compactions = None; cfg.coprocessor .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index b1642263855..01a408cce1b 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -295,7 +295,7 @@ fn test_serde_custom_tikv_config() { wal_dir: "/var".to_owned(), wal_ttl_seconds: 1, wal_size_limit: ReadableSize::kb(1), - max_total_wal_size: ReadableSize::gb(1), + max_total_wal_size: Some(ReadableSize::gb(1)), max_background_jobs: 12, max_background_flushes: 4, max_manifest_file_size: ReadableSize::mb(12), @@ -380,9 +380,9 @@ fn test_serde_custom_tikv_config() { bottommost_zstd_compression_dict_size: 1024, bottommost_zstd_compression_sample_size: 1024, prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, - format_version: 5, + format_version: Some(0), checksum: ChecksumType::XXH3, - max_compactions: 3, + max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, }, @@ -453,9 +453,9 @@ fn test_serde_custom_tikv_config() { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, - format_version: 5, + format_version: Some(0), checksum: ChecksumType::XXH3, - max_compactions: 3, + max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, }, @@ -526,9 +526,9 @@ fn test_serde_custom_tikv_config() { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, - format_version: 5, + format_version: Some(0), checksum: ChecksumType::XXH3, - max_compactions: 3, + max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, }, @@ -599,9 +599,9 @@ fn test_serde_custom_tikv_config() { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, - format_version: 5, + format_version: Some(0), checksum: ChecksumType::XXH3, - max_compactions: 3, + max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, }, @@ -687,9 +687,9 @@ fn test_serde_custom_tikv_config() { bottommost_zstd_compression_dict_size: 0, bottommost_zstd_compression_sample_size: 0, prepopulate_block_cache: PrepopulateBlockCache::FlushOnly, - format_version: 5, + format_version: Some(0), checksum: ChecksumType::XXH3, - max_compactions: 3, + max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, }, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index a87d55c0ac7..df777784b86 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -338,7 +338,7 @@ enable-compaction-guard = false compaction-guard-min-output-file-size = "12MB" compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" -format-version = 5 +format-version = 0 checksum = "xxh3" max-compactions = 3 @@ -403,7 +403,7 @@ enable-compaction-guard = false compaction-guard-min-output-file-size = "12MB" compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" -format-version = 5 +format-version = 0 checksum = "xxh3" max-compactions = 3 @@ -455,7 +455,7 @@ enable-compaction-guard = true compaction-guard-min-output-file-size = "12MB" compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" -format-version = 5 +format-version = 0 checksum = "xxh3" max-compactions = 3 @@ -507,7 +507,7 @@ enable-compaction-guard = true compaction-guard-min-output-file-size = "12MB" compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" -format-version = 5 +format-version = 0 checksum = "xxh3" max-compactions = 3 @@ -591,7 +591,7 @@ enable-compaction-guard = true compaction-guard-min-output-file-size = "12MB" compaction-guard-max-output-file-size = "34MB" prepopulate-block-cache = "flush-only" -format-version = 5 +format-version = 0 checksum = "xxh3" max-compactions = 3 From f69f721e71fa860c53c6c01a1c75728dd8068af4 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 6 Jul 2023 12:06:44 +0800 Subject: [PATCH 0776/1149] raftstore-v2: support dynamic change apply pool size and store io size (#15060) ref tikv/tikv#14485 support dynamic change apply pool size and store io size Signed-off-by: SpadeA-Tang Signed-off-by: Spade A --- Cargo.lock | 2 +- components/raftstore-v2/src/batch/store.rs | 24 +++- components/raftstore-v2/src/fsm/apply.rs | 2 + .../raftstore-v2/src/worker/refresh_config.rs | 110 ++++++++++++++++-- components/raftstore/src/store/mod.rs | 11 +- components/raftstore/src/store/worker/mod.rs | 2 +- .../src/store/worker/refresh_config.rs | 18 ++- components/test_raftstore-v2/src/util.rs | 10 +- .../tikv_util/src/yatp_pool/future_pool.rs | 6 + .../integrations/raftstore/test_scale_pool.rs | 79 ++++++++++++- 10 files changed, 237 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d43672f0487..7f9b7d9b195 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7866,7 +7866,7 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#5523a9a6a4d0d6242bdb02b0a344f7ee1477b39b" +source = "git+https://github.com/tikv/yatp.git?branch=master#5572a78702572087cab8ddcdd1fe30e5bf76ae42" dependencies = [ "crossbeam-deque", "crossbeam-skiplist", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 2c24fe53631..3240b954a87 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -35,8 +35,8 @@ use raftstore::{ local_metrics::RaftMetrics, util::LatencyInspector, AutoSplitController, Config, ReadRunner, ReadTask, RefreshConfigTask, SplitCheckRunner, - SplitCheckTask, StoreWriters, TabletSnapManager, Transport, WriteRouterContext, - WriteSenders, + SplitCheckTask, StoreWriters, StoreWritersContext, TabletSnapManager, Transport, + WriteRouterContext, WriteSenders, WriterContoller, }, }; use resource_metering::CollectorRegHandle; @@ -219,6 +219,8 @@ impl PollHandler Option { @@ -802,9 +804,9 @@ impl StoreSystem { let builder = StorePollerBuilder::new( cfg.clone(), store_id, - raft_engine, + raft_engine.clone(), tablet_registry, - trans, + trans.clone(), router.clone(), schedulers.clone(), self.logger.clone(), @@ -823,10 +825,24 @@ impl StoreSystem { let tag = format!("rs-{}", store_id); self.system.spawn(tag, builder.clone()); + let writer_control = WriterContoller::new( + StoreWritersContext { + store_id, + raft_engine, + kv_engine: None, + transfer: trans, + notifier: router.clone(), + cfg: cfg.clone(), + }, + workers.async_write.clone(), + ); + let apply_pool = builder.apply_pool.clone(); let refresh_config_runner = refresh_config::Runner::new( self.logger.clone(), router.router().clone(), self.system.build_pool_state(builder), + writer_control, + apply_pool, ); assert!(workers.refresh_config_worker.start(refresh_config_runner)); self.workers = Some(workers); diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index f966b67634a..e55c143a33a 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -8,6 +8,7 @@ use std::{ use batch_system::{Fsm, FsmScheduler, Mailbox}; use crossbeam::channel::TryRecvError; use engine_traits::{FlushState, KvEngine, SstApplyState, TabletRegistry}; +use fail::fail_point; use futures::{compat::Future01CompatExt, FutureExt, StreamExt}; use kvproto::{metapb, raft_serverpb::RegionLocalState}; use pd_client::BucketStat; @@ -141,6 +142,7 @@ impl ApplyFsm { } }; loop { + fail_point!("before_handle_tasks"); match task { // TODO: flush by buffer size. ApplyTask::CommittedEntries(ce) => self.apply.apply_committed_entries(ce).await, diff --git a/components/raftstore-v2/src/worker/refresh_config.rs b/components/raftstore-v2/src/worker/refresh_config.rs index 804cfcce60e..633a92a0e24 100644 --- a/components/raftstore-v2/src/worker/refresh_config.rs +++ b/components/raftstore-v2/src/worker/refresh_config.rs @@ -4,11 +4,16 @@ use std::{sync::Arc, thread}; use batch_system::{BatchRouter, Fsm, FsmTypes, HandlerBuilder, Poller, PoolState, Priority}; use file_system::{set_io_type, IoType}; -use raftstore::store::{BatchComponent, RefreshConfigTask}; +use raftstore::store::{BatchComponent, RefreshConfigTask, Transport, WriterContoller}; use slog::{error, info, warn, Logger}; -use tikv_util::{sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable}; +use tikv_util::{ + sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable, yatp_pool::FuturePool, +}; -use crate::fsm::{PeerFsm, StoreFsm}; +use crate::{ + fsm::{PeerFsm, StoreFsm}, + StoreRouter, +}; pub struct PoolController> { pub logger: Logger, @@ -76,29 +81,40 @@ where } } -pub struct Runner +pub struct Runner where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, H: HandlerBuilder, StoreFsm>, + T: Transport + 'static, { logger: Logger, raft_pool: PoolController, StoreFsm, H>, + writer_ctrl: WriterContoller>, + apply_pool: FuturePool, } -impl Runner +impl Runner where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, H: HandlerBuilder, StoreFsm>, + T: Transport + 'static, { pub fn new( logger: Logger, router: BatchRouter, StoreFsm>, raft_pool_state: PoolState, StoreFsm, H>, + writer_ctrl: WriterContoller>, + apply_pool: FuturePool, ) -> Self { let raft_pool = PoolController::new(logger.clone(), router, raft_pool_state); - Runner { logger, raft_pool } + Runner { + logger, + raft_pool, + writer_ctrl, + apply_pool, + } } fn resize_raft_pool(&mut self, size: usize) { @@ -117,13 +133,88 @@ where "to" => self.raft_pool.state.expected_pool_size ); } + + fn resize_apply_pool(&mut self, size: usize) { + let current_pool_size = self.apply_pool.get_pool_size(); + if current_pool_size == size { + return; + } + + // It may not take effect immediately. See comments of + // ThreadPool::scale_workers. + // Also, the size will be clamped between min_thread_count and the max_pool_size + // set when the apply_pool is initialized. This is fine as max_pool_size + // is relatively a large value and there's no use case to set a value + // larger than that. + self.apply_pool.scale_pool_size(size); + let (min_thread_count, max_thread_count) = self.apply_pool.thread_count_limit(); + if size > max_thread_count || size < min_thread_count { + warn!( + self.logger, + "apply pool scale size is out of bound, and the size is clamped"; + "size" => size, + "min_thread_limit" => min_thread_count, + "max_thread_count" => max_thread_count, + ); + } else { + info!( + self.logger, + "resize apply pool"; + "from" => current_pool_size, + "to" => size + ); + } + } + + /// Resizes the count of background threads in store_writers. + fn resize_store_writers(&mut self, size: usize) { + // The resizing of store writers will not directly update the local + // cached store writers in each poller. Each poller will timely + // correct its local cached in its next `poller.begin()` after + // the resize operation completed. + let current_size = self.writer_ctrl.expected_writers_size(); + self.writer_ctrl.set_expected_writers_size(size); + match current_size.cmp(&size) { + std::cmp::Ordering::Greater => { + if let Err(e) = self.writer_ctrl.mut_store_writers().decrease_to(size) { + error!( + self.logger, + "failed to decrease store writers size"; + "err_msg" => ?e + ); + } + } + std::cmp::Ordering::Less => { + let writer_meta = self.writer_ctrl.writer_meta().clone(); + if let Err(e) = self + .writer_ctrl + .mut_store_writers() + .increase_to(size, writer_meta) + { + error!( + self.logger, + "failed to increase store writers size"; + "err_msg" => ?e + ); + } + } + std::cmp::Ordering::Equal => return, + } + info!( + self.logger, + "resize store writers pool"; + "from" => current_size, + "to" => size + ); + } } -impl Runnable for Runner +impl Runnable for Runner where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, H: HandlerBuilder, StoreFsm> + std::marker::Send, + T: Transport + 'static, { type Task = RefreshConfigTask; @@ -132,11 +223,10 @@ where RefreshConfigTask::ScalePool(component, size) => { match component { BatchComponent::Store => self.resize_raft_pool(size), - BatchComponent::Apply => { - unreachable!("v2 does not have apply batch system") - } + BatchComponent::Apply => self.resize_apply_pool(size), }; } + RefreshConfigTask::ScaleWriters(size) => self.resize_store_writers(size), _ => { warn!( self.logger, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index c91cee3071a..8f421bd3133 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -33,8 +33,8 @@ pub use self::{ async_io::{ read::{AsyncReadNotifier, FetchedLogs, GenSnapRes, ReadRunner, ReadTask}, write::{ - write_to_db_for_test, PersistedNotifier, StoreWriters, Worker as WriteWorker, WriteMsg, - WriteTask, + write_to_db_for_test, PersistedNotifier, StoreWriters, StoreWritersContext, + Worker as WriteWorker, WriteMsg, WriteTask, }, write_router::{WriteRouter, WriteRouterContext, WriteSenders}, }, @@ -84,8 +84,9 @@ pub use self::{ ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, - BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, - DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, - NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, + DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + REGION_CPU_OVERLOAD_THRESHOLD_RATIO, }, }; diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 0ace0240091..084542c313d 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -38,7 +38,7 @@ pub use self::{ }, refresh_config::{ BatchComponent as RaftStoreBatchComponent, BatchComponent, Runner as RefreshConfigRunner, - Task as RefreshConfigTask, + Task as RefreshConfigTask, WriterContoller, }, region::{Runner as RegionRunner, Task as RegionTask}, split_check::{ diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index 6fcbd6a93e7..722a121850b 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -115,7 +115,7 @@ where } } -struct WriterContoller +pub struct WriterContoller where EK: engine_traits::KvEngine, ER: engine_traits::RaftEngine, @@ -145,6 +145,22 @@ where expected_writers_size: writers_size, } } + + pub fn expected_writers_size(&self) -> usize { + self.expected_writers_size + } + + pub fn set_expected_writers_size(&mut self, size: usize) { + self.expected_writers_size = size; + } + + pub fn mut_store_writers(&mut self) -> &mut StoreWriters { + &mut self.store_writers + } + + pub fn writer_meta(&self) -> &StoreWritersContext { + &self.writer_meta + } } #[derive(Debug, Clone, Copy)] diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index f352a30504a..0e8f67057a3 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -416,11 +416,19 @@ pub fn put_with_timeout, EK: KvEngine>( ) -> Result { let mut region = cluster.get_region(key); let region_id = region.get_id(); - let req = new_request( + let mut req = new_request( region_id, region.take_region_epoch(), vec![new_put_cf_cmd(CF_DEFAULT, key, value)], false, ); + req.mut_header().set_peer( + region + .get_peers() + .iter() + .find(|p| p.store_id == node_id) + .unwrap() + .clone(), + ); cluster.call_command_on_node(node_id, req, timeout) } diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index f010b508aaa..66595b0205c 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -108,6 +108,12 @@ impl FuturePool { { self.inner.spawn_handle(TrackedFuture::new(future)) } + + /// Return the min thread count and the max thread count that this pool can + /// scale to. + pub fn thread_count_limit(&self) -> (usize, usize) { + self.inner.pool.thread_count_limit() + } } struct PoolInner { diff --git a/tests/integrations/raftstore/test_scale_pool.rs b/tests/integrations/raftstore/test_scale_pool.rs index 2187b4f4bee..e27e6939a2a 100644 --- a/tests/integrations/raftstore/test_scale_pool.rs +++ b/tests/integrations/raftstore/test_scale_pool.rs @@ -8,6 +8,7 @@ use std::{ use engine_traits::{MiscExt, Peekable}; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::config::ConfigurableDb; use tikv_util::{ sys::thread::{self, Pid}, @@ -198,6 +199,72 @@ fn test_decrease_pool() { must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } +#[test] +fn test_increase_apply_pool_v2() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 1); + cluster.pd_client.disable_default_operator(); + cluster.cfg.raft_store.apply_batch_system.pool_size = 1; + let _ = cluster.run_conf_change(); + std::thread::sleep(std::time::Duration::from_millis(200)); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k10"); + let region = cluster.get_region(b"k11"); + cluster.must_split(®ion, b"k20"); + let region = cluster.get_region(b"k21"); + cluster.must_split(®ion, b"k30"); + + fail::cfg("before_handle_tasks", "1*pause").unwrap(); + put_with_timeout(&mut cluster, 1, b"k35", b"val", Duration::from_secs(2)).unwrap_err(); + + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.apply-pool-size".to_owned(), "2".to_owned()); + change + }; + + cfg_controller.update(change).unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + cluster.must_put(b"k05", b"val"); + cluster.must_put(b"k15", b"val"); + cluster.must_put(b"k25", b"val"); + + fail::remove("before_handle_tasks"); +} + +#[test] +fn test_decrease_apply_pool_v2() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 1); + cluster.pd_client.disable_default_operator(); + cluster.cfg.raft_store.apply_batch_system.pool_size = 3; + let _ = cluster.run_conf_change(); + + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller().unwrap(); + let change = { + let mut change = HashMap::new(); + change.insert("raftstore.apply-pool-size".to_owned(), "1".to_owned()); + change + }; + + cfg_controller.update(change).unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + fail::cfg("before_handle_tasks", "1*pause").unwrap(); + put_with_timeout(&mut cluster, 1, b"k10", b"val", Duration::from_secs(2)).unwrap_err(); + + fail::remove("before_handle_tasks"); +} + #[test] fn test_decrease_pool_v2() { use test_raftstore_v2::*; @@ -265,9 +332,10 @@ fn get_async_writers_tids() -> Vec { writers_tids } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_increase_async_ios() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.cfg.raft_store.store_io_pool_size = 1; cluster.pd_client.disable_default_operator(); cluster.run(); @@ -308,9 +376,10 @@ fn test_increase_async_ios() { must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_decrease_async_ios() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.cfg.raft_store.store_io_pool_size = 4; cluster.pd_client.disable_default_operator(); cluster.run(); @@ -356,6 +425,7 @@ fn test_decrease_async_ios() { } #[test] +// v2 sets store_io_pool_size to 1 in `validate` if store_io_pool_size = 0. fn test_resize_async_ios_failed_1() { let mut cluster = new_node_cluster(0, 1); cluster.cfg.raft_store.store_io_pool_size = 2; @@ -398,6 +468,7 @@ fn test_resize_async_ios_failed_1() { } #[test] +// v2 sets store_io_pool_size to 1 in `validate` if store_io_pool_size = 0. fn test_resize_async_ios_failed_2() { let mut cluster = new_node_cluster(0, 1); cluster.cfg.raft_store.store_io_pool_size = 0; From a5f1a26d3bb5121a845f644b5eebff3d6c041278 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 6 Jul 2023 12:36:44 +0800 Subject: [PATCH 0777/1149] raftstore-v2, coprocessor: use snapshot sequence number for cache (#14997) ref tikv/tikv#12, ref tikv/tikv#13, ref tikv/tikv#14570 In raftstore v2, a snapshot no longer include applied index, so this PR replaces applied index with snapshot sequence number for coprocessor cache. See https://github.com/tikv/tikv/issues/14570 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +++--- Cargo.toml | 6 ++++-- components/engine_panic/src/snapshot.rs | 7 +++++++ components/engine_rocks/src/snapshot.rs | 7 +++++++ components/engine_traits/src/lib.rs | 2 ++ components/engine_traits/src/snapshot.rs | 5 +++-- components/engine_traits/src/snapshot_misc.rs | 5 +++++ .../raftstore-v2/src/operation/query/local.rs | 2 ++ components/raftstore/src/store/region_snapshot.rs | 15 +++++++++++++++ components/tikv_kv/src/raftstore_impls.rs | 2 +- 10 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 components/engine_traits/src/snapshot_misc.rs diff --git a/Cargo.lock b/Cargo.lock index 7f9b7d9b195..00ef5e5dd4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3095,7 +3095,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tabokie/rust-rocksdb?branch=230703-thread-limiter#8cb107a4baf6feca81a88649d56e5e64df7239b1" +source = "git+https://github.com/tikv/rust-rocksdb.git#0a57dd2e2c3d1be9679a9ad6a3ed70dcb4b87eff" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3114,7 +3114,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tabokie/rust-rocksdb?branch=230703-thread-limiter#8cb107a4baf6feca81a88649d56e5e64df7239b1" +source = "git+https://github.com/tikv/rust-rocksdb.git#0a57dd2e2c3d1be9679a9ad6a3ed70dcb4b87eff" dependencies = [ "bzip2-sys", "cc", @@ -5086,7 +5086,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tabokie/rust-rocksdb?branch=230703-thread-limiter#8cb107a4baf6feca81a88649d56e5e64df7239b1" +source = "git+https://github.com/tikv/rust-rocksdb.git#0a57dd2e2c3d1be9679a9ad6a3ed70dcb4b87eff" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index f3e037322b8..10650db8924 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -202,8 +202,10 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229 # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. # [patch.'https://github.com/pingcap/kvproto'] # kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } -[patch.'https://github.com/tikv/rust-rocksdb'] -rocksdb = { git = "https://github.com/tabokie/rust-rocksdb", branch = "230703-thread-limiter" } +# +# After the PR to rust-rocksdb is merged, remember to comment this out and run `cargo update -p rocksdb`. +# [patch.'https://github.com/tikv/rust-rocksdb'] +# rocksdb = { git = "https://github.com/your_github_id/rust-rocksdb", branch = "your_branch" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md diff --git a/components/engine_panic/src/snapshot.rs b/components/engine_panic/src/snapshot.rs index f6cda5312cb..bb3e41d2aa3 100644 --- a/components/engine_panic/src/snapshot.rs +++ b/components/engine_panic/src/snapshot.rs @@ -4,6 +4,7 @@ use std::ops::Deref; use engine_traits::{ CfNamesExt, IterOptions, Iterable, Iterator, Peekable, ReadOptions, Result, Snapshot, + SnapshotMiscExt, }; use crate::{db_vector::PanicDbVector, engine::PanicEngine}; @@ -80,3 +81,9 @@ impl Iterator for PanicSnapshotIterator { panic!() } } + +impl SnapshotMiscExt for PanicSnapshot { + fn sequence_number(&self) -> u64 { + panic!() + } +} diff --git a/components/engine_rocks/src/snapshot.rs b/components/engine_rocks/src/snapshot.rs index 60a12c4ac6d..22cdea503ab 100644 --- a/components/engine_rocks/src/snapshot.rs +++ b/components/engine_rocks/src/snapshot.rs @@ -7,6 +7,7 @@ use std::{ use engine_traits::{ self, CfNamesExt, IterOptions, Iterable, Peekable, ReadOptions, Result, Snapshot, + SnapshotMiscExt, }; use rocksdb::{rocksdb_options::UnsafeSnap, DBIterator, DB}; @@ -103,3 +104,9 @@ impl CfNamesExt for RocksSnapshot { self.db.cf_names() } } + +impl SnapshotMiscExt for RocksSnapshot { + fn sequence_number(&self) -> u64 { + unsafe { self.snap.get_sequence_number() } + } +} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 45a3d18fa7a..537e0addf41 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -288,6 +288,8 @@ mod misc; pub use misc::*; mod snapshot; pub use crate::snapshot::*; +mod snapshot_misc; +pub use crate::snapshot_misc::SnapshotMiscExt; mod sst; pub use crate::sst::*; mod write_batch; diff --git a/components/engine_traits/src/snapshot.rs b/components/engine_traits/src/snapshot.rs index a5829161e25..6ab2bb78af1 100644 --- a/components/engine_traits/src/snapshot.rs +++ b/components/engine_traits/src/snapshot.rs @@ -2,7 +2,7 @@ use std::fmt::Debug; -use crate::{iterable::Iterable, peekable::Peekable, CfNamesExt}; +use crate::{iterable::Iterable, peekable::Peekable, CfNamesExt, SnapshotMiscExt}; /// A consistent read-only view of the database. /// @@ -10,6 +10,7 @@ use crate::{iterable::Iterable, peekable::Peekable, CfNamesExt}; /// clonable, call `into_sync` to create a `SyncSnapshot`. pub trait Snapshot where - Self: 'static + Peekable + Iterable + CfNamesExt + Send + Sync + Sized + Debug, + Self: + 'static + Peekable + Iterable + CfNamesExt + SnapshotMiscExt + Send + Sync + Sized + Debug, { } diff --git a/components/engine_traits/src/snapshot_misc.rs b/components/engine_traits/src/snapshot_misc.rs new file mode 100644 index 00000000000..2cea3d58d81 --- /dev/null +++ b/components/engine_traits/src/snapshot_misc.rs @@ -0,0 +1,5 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +pub trait SnapshotMiscExt { + fn sequence_number(&self) -> u64; +} diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index d9af1c6594f..36dbb26e4c7 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -211,6 +211,7 @@ where Arc::new(delegate.cached_tablet.cache().snapshot()), region, ); + // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); let snapshot_ts = monotonic_raw_now(); @@ -275,6 +276,7 @@ where } }; + snap.set_from_v2(); snap.txn_ext = Some(delegate.txn_ext.clone()); snap.term = NonZeroU64::new(delegate.term); snap.txn_extra_op = delegate.txn_extra_op.load(); diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 4073b71c60d..41662df149f 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -35,6 +35,7 @@ pub struct RegionSnapshot { snap: Arc, region: Arc, apply_index: Arc, + from_v2: bool, pub term: Option, pub txn_extra_op: TxnExtraOp, // `None` means the snapshot does not provide peer related transaction extensions. @@ -68,6 +69,7 @@ where // Use 0 to indicate that the apply index is missing and we need to KvGet it, // since apply index must be >= RAFT_INIT_LOG_INDEX. apply_index: Arc::new(AtomicU64::new(0)), + from_v2: false, term: None, txn_extra_op: TxnExtraOp::Noop, txn_ext: None, @@ -85,6 +87,18 @@ where self.snap.as_ref() } + pub fn set_from_v2(&mut self) { + self.from_v2 = true; + } + + pub fn get_data_version(&self) -> Result { + if self.from_v2 { + Ok(self.snap.sequence_number()) + } else { + self.get_apply_index() + } + } + #[inline] pub fn set_apply_index(&self, apply_index: u64) { self.apply_index.store(apply_index, Ordering::SeqCst); @@ -168,6 +182,7 @@ where snap: self.snap.clone(), region: Arc::clone(&self.region), apply_index: Arc::clone(&self.apply_index), + from_v2: self.from_v2, term: self.term, txn_extra_op: self.txn_extra_op, txn_ext: self.txn_ext.clone(), diff --git a/components/tikv_kv/src/raftstore_impls.rs b/components/tikv_kv/src/raftstore_impls.rs index e89087e565f..977c34254f6 100644 --- a/components/tikv_kv/src/raftstore_impls.rs +++ b/components/tikv_kv/src/raftstore_impls.rs @@ -29,7 +29,7 @@ pub struct RegionSnapshotExt<'a, S: Snapshot> { impl<'a, S: Snapshot> SnapshotExt for RegionSnapshotExt<'a, S> { #[inline] fn get_data_version(&self) -> Option { - self.snapshot.get_apply_index().ok() + self.snapshot.get_data_version().ok() } fn is_max_ts_synced(&self) -> bool { From 7f098aec85990b7939821b777d4f05451dc66d2f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 6 Jul 2023 15:09:45 +0800 Subject: [PATCH 0778/1149] raftstore: respond gc peer immediately if region state not found (#15057) ref pingcap/tiflash#7727, close tikv/tikv#15074 A peer may never be created if its parent peer skips split by applying a new snapshot. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/life.rs | 18 +++-- components/test_raftstore/src/cluster.rs | 19 +++++ components/test_raftstore/src/lib.rs | 2 + tests/integrations/raftstore/test_life.rs | 90 ++++++++++++++++++++-- 4 files changed, 116 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/store/fsm/life.rs b/components/raftstore/src/store/fsm/life.rs index 59aa8b316f0..e95f8978338 100644 --- a/components/raftstore/src/store/fsm/life.rs +++ b/components/raftstore/src/store/fsm/life.rs @@ -5,6 +5,7 @@ use engine_traits::{KvEngine, CF_RAFT}; use kvproto::raft_serverpb::{ExtraMessageType, PeerState, RaftMessage, RegionLocalState}; +use tikv_util::warn; use crate::store::util::is_epoch_stale; @@ -68,12 +69,17 @@ pub fn handle_tombstone_message_on_learner( let region_state_key = keys::region_state_key(region_id); let local_state: RegionLocalState = match engine.get_msg_cf(CF_RAFT, ®ion_state_key) { Ok(Some(s)) => s, - e => panic!( - "[store {}] failed to get regions state of {:?}: {:?}", - store_id, - msg.get_region_id(), - e - ), + e => { + warn!( + "[store {}] failed to get regions state of {:?}: {:?}", + store_id, + msg.get_region_id(), + e + ); + // A peer may never be created if its parent peer skips split by + // applying a new snapshot. + return build_peer_destroyed_report(&mut msg); + } }; if local_state.get_state() != PeerState::Tombstone { diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index c916ec7448e..23edf0efab1 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1277,6 +1277,25 @@ impl Cluster { ); } + pub fn wait_peer_state(&self, region_id: u64, store_id: u64, peer_state: PeerState) { + for _ in 0..100 { + if let Some(state) = self + .get_engine(store_id) + .get_msg_cf::( + engine_traits::CF_RAFT, + &keys::region_state_key(region_id), + ) + .unwrap() && state.get_state() == peer_state { + return; + } + sleep_ms(10); + } + panic!( + "[region {}] peer state still not reach {:?}", + region_id, peer_state + ); + } + pub fn wait_last_index( &mut self, region_id: u64, diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 950581a6ce8..04dfbd24de1 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,5 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(let_chains)] + #[macro_use] extern crate lazy_static; #[macro_use] diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs index de394325f08..fff3f0dea62 100644 --- a/tests/integrations/raftstore/test_life.rs +++ b/tests/integrations/raftstore/test_life.rs @@ -1,19 +1,22 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - sync::{Arc, Mutex}, + sync::{mpsc::channel, Arc, Mutex}, time::Duration, }; -use kvproto::raft_serverpb::{PeerState, RaftMessage}; +use kvproto::raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}; use raftstore::errors::Result; -use test_raftstore::{new_learner_peer, sleep_ms, Filter, FilterFactory, Simulator as S1}; +use test_raftstore::{ + new_learner_peer, new_peer, sleep_ms, Filter, FilterFactory, Simulator as S1, +}; use test_raftstore_v2::Simulator as S2; -use tikv_util::time::Instant; +use tikv_util::{time::Instant, HandyRwLock}; struct ForwardFactory { node_id: u64, chain_send: Arc, + keep_msg: bool, } impl FilterFactory for ForwardFactory { @@ -21,6 +24,7 @@ impl FilterFactory for ForwardFactory { vec![Box::new(ForwardFilter { node_id: self.node_id, chain_send: self.chain_send.clone(), + keep_msg: self.keep_msg, })] } } @@ -28,13 +32,22 @@ impl FilterFactory for ForwardFactory { struct ForwardFilter { node_id: u64, chain_send: Arc, + keep_msg: bool, } impl Filter for ForwardFilter { fn before(&self, msgs: &mut Vec) -> Result<()> { - for m in msgs.drain(..) { - if self.node_id == m.get_to_peer().get_store_id() { - (self.chain_send)(m); + if self.keep_msg { + for m in msgs { + if self.node_id == m.get_to_peer().get_store_id() { + (self.chain_send)(m.clone()); + } + } + } else { + for m in msgs.drain(..) { + if self.node_id == m.get_to_peer().get_store_id() { + (self.chain_send)(m); + } } } Ok(()) @@ -91,6 +104,7 @@ fn test_gc_peer_tiflash_engine() { info!("send to trans2"; "msg" => ?m); let _ = trans2.lock().unwrap().send_raft_message(Box::new(m)); }), + keep_msg: false, }; cluster_v1.add_send_filter(factory1); // For cluster 2, it intercepts msgs sent to learner node, and then @@ -101,6 +115,7 @@ fn test_gc_peer_tiflash_engine() { info!("send to trans1"; "msg" => ?m); let _ = trans1.lock().unwrap().send_raft_message(m); }), + keep_msg: false, }; cluster_v2.add_send_filter(factory2); @@ -124,3 +139,64 @@ fn test_gc_peer_tiflash_engine() { } } } + +#[test] +fn test_gc_removed_peer() { + let mut cluster = test_raftstore::new_node_cluster(1, 2); + cluster.cfg.raft_store.enable_v2_compatible_learner = true; + cluster.pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + let factory = ForwardFactory { + node_id: 1, + chain_send: Arc::new(move |m| { + if m.get_extra_msg().get_type() == ExtraMessageType::MsgGcPeerResponse { + let _ = tx.lock().unwrap().send(m); + } + }), + keep_msg: true, + }; + cluster.add_send_filter(factory); + + let must_gc_peer = |to_peer: kvproto::metapb::Peer| { + let epoch = cluster.get_region_epoch(region_id); + let mut msg = RaftMessage::default(); + msg.set_is_tombstone(true); + msg.set_region_id(region_id); + msg.set_from_peer(new_peer(1, 1)); + msg.set_to_peer(to_peer.clone()); + msg.set_region_epoch(epoch.clone()); + let extra_msg = msg.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgGcPeerRequest); + let check_peer = extra_msg.mut_check_gc_peer(); + check_peer.set_from_region_id(region_id); + check_peer.set_check_region_id(region_id); + check_peer.set_check_peer(to_peer.clone()); + check_peer.set_check_region_epoch(epoch); + + cluster.sim.wl().send_raft_msg(msg.clone()).unwrap(); + let gc_resp = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + assert_eq!(gc_resp.get_region_id(), region_id); + assert_eq!(*gc_resp.get_from_peer(), to_peer); + }; + + // Mock gc a peer that has been removed before creation. + must_gc_peer(new_learner_peer(2, 5)); + + cluster + .pd_client + .must_add_peer(region_id, new_learner_peer(2, 4)); + // Make sure learner is created. + cluster.wait_peer_state(region_id, 2, PeerState::Normal); + + cluster + .pd_client + .must_remove_peer(region_id, new_learner_peer(2, 4)); + // Make sure learner is removed. + cluster.wait_peer_state(region_id, 2, PeerState::Tombstone); + + // Mock gc peer request. GC learner(2, 4). + must_gc_peer(new_learner_peer(2, 4)); +} From 14dd2296e2c27fc5efb15abcb93d058beca34f2f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 7 Jul 2023 13:33:23 +0800 Subject: [PATCH 0779/1149] raftstore-v2: disable coprocessor cache (#15090) ref tikv/tikv#14997, close tikv/tikv#15091 Disable coprocessor cache temporarily because of issue #15091 Signed-off-by: Neil Shen --- components/raftstore-v2/src/operation/query/local.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 36dbb26e4c7..b5b6b676973 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -276,7 +276,8 @@ where } }; - snap.set_from_v2(); + // FIXME: Tests show it causing TiDB failed to start. + // snap.set_from_v2(); snap.txn_ext = Some(delegate.txn_ext.clone()); snap.term = NonZeroU64::new(delegate.term); snap.txn_extra_op = delegate.txn_extra_op.load(); From d33dcabb6ce8f64ae6b934a67b8f1fc459b18359 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:58:53 +0800 Subject: [PATCH 0780/1149] raftkv: Fix the problem that writing with failpoint raftkv_early_error_report enabled will cause TiKV panic (#15037) close tikv/tikv#15020 Fix the problem that writing with failpoint raftkv_early_error_report enabled will cause TiKV panic. We only wrap the `on_applied` callback with `must_call` when there's no error happening in-place in `async_write` function. Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/raftkv/mod.rs | 89 +++++++++++++------------- src/storage/txn/scheduler.rs | 2 +- tests/failpoints/cases/test_storage.rs | 40 ++++++------ 3 files changed, 69 insertions(+), 62 deletions(-) diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index bbae97ea293..1dd7d9536c6 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -506,56 +506,59 @@ where self.schedule_txn_extra(txn_extra); let (tx, rx) = WriteResFeed::pair(); - let proposed_cb = if !WriteEvent::subscribed_proposed(subscribed) { - None - } else { - let tx = tx.clone(); - Some(Box::new(move || tx.notify_proposed()) as store::ExtCallback) - }; - let committed_cb = if !WriteEvent::subscribed_committed(subscribed) { - None - } else { - let tx = tx.clone(); - Some(Box::new(move || tx.notify_committed()) as store::ExtCallback) - }; - let applied_tx = tx.clone(); - let applied_cb = must_call( - Box::new(move |resp: WriteResponse| { - fail_point!("applied_cb_return_undetermined_err", |_| { - applied_tx.notify(Err(kv::Error::from(Error::Undetermined( - ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string(), - )))); - }); - let mut res = match on_write_result::(resp) { - Ok(CmdRes::Resp(_)) => { - fail_point!("raftkv_async_write_finish"); - Ok(()) - } - Ok(CmdRes::Snap(_)) => { - Err(box_err!("unexpect snapshot, should mutate instead.")) - } - Err(e) => Err(kv::Error::from(e)), - }; - if let Some(cb) = on_applied { - cb(&mut res); - } - applied_tx.notify(res); - }), - drop_on_applied_callback, - ); - - let cb = StoreCallback::write_ext(applied_cb, proposed_cb, committed_cb); - let extra_opts = RaftCmdExtraOpts { - deadline: batch.deadline, - disk_full_opt: batch.disk_full_opt, - }; if res.is_ok() { + let proposed_cb = if !WriteEvent::subscribed_proposed(subscribed) { + None + } else { + let tx = tx.clone(); + Some(Box::new(move || tx.notify_proposed()) as store::ExtCallback) + }; + let committed_cb = if !WriteEvent::subscribed_committed(subscribed) { + None + } else { + let tx = tx.clone(); + Some(Box::new(move || tx.notify_committed()) as store::ExtCallback) + }; + let applied_tx = tx.clone(); + let applied_cb = must_call( + Box::new(move |resp: WriteResponse| { + fail_point!("applied_cb_return_undetermined_err", |_| { + applied_tx.notify(Err(kv::Error::from(Error::Undetermined( + ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string(), + )))); + }); + let mut res = match on_write_result::(resp) { + Ok(CmdRes::Resp(_)) => { + fail_point!("raftkv_async_write_finish"); + Ok(()) + } + Ok(CmdRes::Snap(_)) => { + Err(box_err!("unexpect snapshot, should mutate instead.")) + } + Err(e) => Err(kv::Error::from(e)), + }; + if let Some(cb) = on_applied { + cb(&mut res); + } + applied_tx.notify(res); + }), + drop_on_applied_callback, + ); + + let cb = StoreCallback::write_ext(applied_cb, proposed_cb, committed_cb); + let extra_opts = RaftCmdExtraOpts { + deadline: batch.deadline, + disk_full_opt: batch.disk_full_opt, + }; res = self .router .send_command(cmd, cb, extra_opts) .map_err(kv::Error::from); } if res.is_err() { + // Note that `on_applied` is not called in this case. We send message to the + // channel here to notify the caller that the writing ended, like + // how the `applied_cb` does. tx.notify(res); } rx.inspect(move |ev| { diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index ad88d53532e..ff0aa886a0e 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1508,7 +1508,7 @@ impl TxnScheduler { // sent to the raftstore. // // If some in-memory pessimistic locks need to be proposed, we will propose - // another TransferLeader command. Then, we can guarentee even if the proposed + // another TransferLeader command. Then, we can guarantee even if the proposed // locks don't include the locks deleted here, the response message of the // transfer leader command must be later than this write command because this // write command has been sent to the raftstore. Then, we don't need to worry diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 3a20e080736..dd57f28ab94 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -26,7 +26,6 @@ use resource_control::ResourceGroupManager; use test_raftstore::*; use tikv::{ config::{ConfigController, Module}, - server::raftkv::ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG, storage::{ self, config_manager::StorageConfigManger, @@ -42,7 +41,6 @@ use tikv::{ Error as StorageError, ErrorInner as StorageErrorInner, *, }, }; -use tikv_kv::ErrorInner::Undetermined; use tikv_util::{future::paired_future_callback, worker::dummy_scheduler, HandyRwLock}; use txn_types::{Key, Mutation, TimeStamp}; @@ -114,9 +112,6 @@ fn test_scheduler_leader_change_twice() { fn test_server_catching_api_error() { let raftkv_fp = "raftkv_early_error_report"; let mut cluster = new_server_cluster(0, 1); - // One scheduler worker thread would panic after processing the prewrite - // request because of undetermined error. - cluster.cfg.storage.scheduler_worker_pool_size = 2; cluster.run(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); @@ -145,10 +140,12 @@ fn test_server_catching_api_error() { prewrite_req.primary_lock = b"k3".to_vec(); prewrite_req.start_version = 1; prewrite_req.lock_ttl = prewrite_req.start_version + 1; - let prewrite_err = client.kv_prewrite(&prewrite_req).unwrap_err(); - assert_eq!( - prewrite_err.to_string(), - "RpcFailure: 1-CANCELLED CANCELLED" + let prewrite_resp = client.kv_prewrite(&prewrite_req).unwrap(); + assert!(prewrite_resp.has_region_error(), "{:?}", prewrite_resp); + assert!( + prewrite_resp.get_region_error().has_region_not_found(), + "{:?}", + prewrite_resp ); must_get_none(&cluster.get_engine(1), b"k3"); @@ -157,9 +154,11 @@ fn test_server_catching_api_error() { put_req.key = b"k3".to_vec(); put_req.value = b"v3".to_vec(); let put_resp = client.raw_put(&put_req).unwrap(); - assert_eq!( - put_resp.get_error(), - Undetermined(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()).to_string() + assert!(put_resp.has_region_error(), "{:?}", put_resp); + assert!( + put_resp.get_region_error().has_region_not_found(), + "{:?}", + put_resp ); must_get_none(&cluster.get_engine(1), b"k3"); @@ -199,9 +198,11 @@ fn test_raftkv_early_error_report() { put_req.key = k.to_vec(); put_req.value = b"v".to_vec(); let put_resp = client.raw_put(&put_req).unwrap(); - assert_eq!( - put_resp.get_error(), - Undetermined(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()).to_string() + assert!(put_resp.has_region_error(), "{:?}", put_resp); + assert!( + put_resp.get_region_error().has_region_not_found(), + "{:?}", + put_resp ); must_get_none(&cluster.get_engine(1), k); } @@ -217,12 +218,15 @@ fn test_raftkv_early_error_report() { put_req.value = b"v".to_vec(); let put_resp = client.raw_put(&put_req).unwrap(); if ctx.get_region_id() == injected_region_id { - assert_eq!( - put_resp.get_error(), - Undetermined(ASYNC_WRITE_CALLBACK_DROPPED_ERR_MSG.to_string()).to_string() + assert!(put_resp.has_region_error(), "{:?}", put_resp); + assert!( + put_resp.get_region_error().has_region_not_found(), + "{:?}", + put_resp ); must_get_none(&cluster.get_engine(1), k); } else { + assert!(!put_resp.has_region_error(), "{:?}", put_resp); must_get_equal(&cluster.get_engine(1), k, b"v"); } } From 189e4c4676c231da06ab0960f289fa915169fd9d Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 6 Jul 2023 23:12:23 -0700 Subject: [PATCH 0781/1149] update default value for end_point_request_max_handle_duration (#15029) close tikv/tikv#15022 Update default end_point_request_max_handle_duration for big region size Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/config/mod.rs | 34 ++++++++++++++++++++ src/coprocessor/endpoint.rs | 6 ++-- src/server/config.rs | 36 ++++++++++++++++++---- tests/failpoints/cases/test_coprocessor.rs | 4 ++- tests/integrations/config/mod.rs | 2 +- 5 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 137ab56e1e1..cd590550b6f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3433,6 +3433,8 @@ impl TikvConfig { .optimize_for(self.coprocessor.region_split_size()); self.raft_store .optimize_for(self.storage.engine == EngineType::RaftKv2); + self.server + .optimize_for(self.coprocessor.region_split_size()); if self.storage.engine == EngineType::RaftKv2 { self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); } @@ -5430,6 +5432,35 @@ mod tests { check_cfg(&cfg); } + #[test] + fn test_endpoint_config() { + let mut default_cfg = TikvConfig::default(); + default_cfg.storage.engine = EngineType::RaftKv; + default_cfg.validate().unwrap(); + assert_eq!( + default_cfg.server.end_point_request_max_handle_duration(), + ReadableDuration::secs(60) + ); + + let mut default_cfg = TikvConfig::default(); + default_cfg.storage.engine = EngineType::RaftKv2; + default_cfg.validate().unwrap(); + assert_eq!( + default_cfg.server.end_point_request_max_handle_duration(), + ReadableDuration::secs(1800) + ); + + let mut default_cfg = TikvConfig::default(); + default_cfg.storage.engine = EngineType::RaftKv2; + default_cfg.server.end_point_request_max_handle_duration = + Some(ReadableDuration::secs(900)); + default_cfg.validate().unwrap(); + assert_eq!( + default_cfg.server.end_point_request_max_handle_duration(), + ReadableDuration::secs(900) + ); + } + #[test] fn test_compatible_adjust_validate_equal() { // After calling many time of `compatible_adjust` and `validate` should has @@ -5910,6 +5941,9 @@ mod tests { default_cfg .coprocessor .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); + default_cfg + .server + .optimize_for(default_cfg.coprocessor.region_split_size()); default_cfg.security.redact_info_log = Some(false); default_cfg.coprocessor.region_max_size = Some(default_cfg.coprocessor.region_max_size()); default_cfg.coprocessor.region_max_keys = Some(default_cfg.coprocessor.region_max_keys()); diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 9339ae0bcfc..0cdd849b42c 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -103,7 +103,7 @@ impl Endpoint { batch_row_limit: cfg.end_point_batch_row_limit, stream_batch_row_limit: cfg.end_point_stream_batch_row_limit, stream_channel_size: cfg.end_point_stream_channel_size, - max_handle_duration: cfg.end_point_request_max_handle_duration.0, + max_handle_duration: cfg.end_point_request_max_handle_duration().0, slow_log_threshold: cfg.end_point_slow_log_threshold.0, quota_limiter, _phantom: Default::default(), @@ -1492,9 +1492,9 @@ mod tests { )); let config = Config { - end_point_request_max_handle_duration: ReadableDuration( + end_point_request_max_handle_duration: Some(ReadableDuration( (PAYLOAD_SMALL + PAYLOAD_LARGE) * 2, - ), + )), ..Default::default() }; diff --git a/src/server/config.rs b/src/server/config.rs index f84d93ef4a5..013d1a66238 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -139,7 +139,7 @@ pub struct Config { #[online_config(skip)] pub end_point_enable_batch_if_possible: bool, #[online_config(skip)] - pub end_point_request_max_handle_duration: ReadableDuration, + pub end_point_request_max_handle_duration: Option, #[online_config(skip)] pub end_point_max_concurrency: usize, #[serde(with = "perf_level_serde")] @@ -246,9 +246,7 @@ impl Default for Config { end_point_batch_row_limit: DEFAULT_ENDPOINT_BATCH_ROW_LIMIT, end_point_stream_batch_row_limit: DEFAULT_ENDPOINT_STREAM_BATCH_ROW_LIMIT, end_point_enable_batch_if_possible: true, - end_point_request_max_handle_duration: ReadableDuration::secs( - DEFAULT_ENDPOINT_REQUEST_MAX_HANDLE_SECS, - ), + end_point_request_max_handle_duration: None, end_point_max_concurrency: cmp::max(cpu_num as usize, MIN_ENDPOINT_MAX_CONCURRENCY), end_point_perf_level: PerfLevel::Uninitialized, snap_io_max_bytes_per_sec: ReadableSize(DEFAULT_SNAP_MAX_BYTES_PER_SEC), @@ -351,7 +349,7 @@ impl Config { return Err(box_err!("server.end-point-recursion-limit is too small")); } - if self.end_point_request_max_handle_duration.as_secs() + if self.end_point_request_max_handle_duration().as_secs() < DEFAULT_ENDPOINT_REQUEST_MAX_HANDLE_SECS { return Err(box_err!( @@ -405,6 +403,32 @@ impl Config { GrpcCompressionType::Gzip => CompressionAlgorithms::GRPC_COMPRESS_GZIP, } } + + pub fn end_point_request_max_handle_duration(&self) -> ReadableDuration { + if let Some(end_point_request_max_handle_duration) = + self.end_point_request_max_handle_duration + { + return end_point_request_max_handle_duration; + } + ReadableDuration::secs(DEFAULT_ENDPOINT_REQUEST_MAX_HANDLE_SECS) + } + + pub fn optimize_for(&mut self, region_size: ReadableSize) { + // It turns out for 256MB region size, 60s is typically enough. + const THRESHOLD_SIZE: ReadableSize = ReadableSize::mb(256); + if region_size.0 < THRESHOLD_SIZE.0 { + self.end_point_request_max_handle_duration + .get_or_insert(ReadableDuration::secs( + DEFAULT_ENDPOINT_REQUEST_MAX_HANDLE_SECS, + )); + } else { + self.end_point_request_max_handle_duration + .get_or_insert(ReadableDuration::secs(cmp::min( + 1800, + region_size.0 / THRESHOLD_SIZE.0 * DEFAULT_ENDPOINT_REQUEST_MAX_HANDLE_SECS, + ))); + } + } } pub struct ServerConfigManager { @@ -514,7 +538,7 @@ mod tests { invalid_cfg.validate().unwrap_err(); let mut invalid_cfg = cfg.clone(); - invalid_cfg.end_point_request_max_handle_duration = ReadableDuration::secs(0); + invalid_cfg.end_point_request_max_handle_duration = Some(ReadableDuration::secs(0)); invalid_cfg.validate().unwrap_err(); invalid_cfg = Config::default(); diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 366684269f1..84e11d91c61 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -63,7 +63,9 @@ fn test_deadline_3() { let (_, endpoint, _) = { let engine = tikv::storage::TestEngineBuilder::new().build().unwrap(); let cfg = tikv::server::Config { - end_point_request_max_handle_duration: tikv_util::config::ReadableDuration::secs(1), + end_point_request_max_handle_duration: Some(tikv_util::config::ReadableDuration::secs( + 1, + )), ..Default::default() }; init_data_with_details(Context::default(), engine, &product, &data, true, &cfg) diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 01a408cce1b..2ab5e4700dc 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -111,7 +111,7 @@ fn test_serde_custom_tikv_config() { end_point_batch_row_limit: 64, end_point_stream_batch_row_limit: 4096, end_point_enable_batch_if_possible: true, - end_point_request_max_handle_duration: ReadableDuration::secs(12), + end_point_request_max_handle_duration: Some(ReadableDuration::secs(12)), end_point_max_concurrency: 10, end_point_perf_level: PerfLevel::EnableTime, snap_io_max_bytes_per_sec: ReadableSize::mb(10), From 993eb2f610acfc4ee9e6c7b4f03eb63213f6d593 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Fri, 7 Jul 2023 17:30:42 +0800 Subject: [PATCH 0782/1149] raftstore-v2: fix issues of encryption and merge (#14820) ref tikv/tikv#12842, fix tikv/tikv#15052, fix tikv/tikv#15059 - Do not try to compact log when peer is destroyed - Check `apply_scheduler.is_some()` when handling CaptureChange - Fix encrypted dir deletion unable to traverse directory that was renamed - Fix a bug that encryption key can be lost when there're racing snapshots for a same region - Fix a bug that checkpoint of a to-be-destroyed tablet can cause deadlock Signed-off-by: tabokie --- Cargo.lock | 6 +- Cargo.toml | 3 +- cmd/tikv-ctl/Cargo.toml | 2 +- cmd/tikv-ctl/src/executor.rs | 2 +- cmd/tikv-ctl/src/main.rs | 10 +- components/backup-stream/Cargo.toml | 2 +- components/backup/Cargo.toml | 2 +- components/causal_ts/Cargo.toml | 2 +- components/cdc/Cargo.toml | 3 +- components/encryption/src/lib.rs | 2 + components/encryption/src/manager/mod.rs | 168 +++++++++++++++--- components/engine_panic/Cargo.toml | 2 +- components/engine_rocks/Cargo.toml | 2 +- components/engine_rocks/src/encryption.rs | 4 +- components/engine_test/src/lib.rs | 10 +- components/engine_traits/Cargo.toml | 2 +- components/engine_traits/src/encryption.rs | 7 +- .../engine_traits_tests/src/checkpoint.rs | 10 +- components/engine_traits_tests/src/ctor.rs | 2 +- components/error_code/Cargo.toml | 2 +- components/external_storage/export/Cargo.toml | 2 +- components/into_other/Cargo.toml | 2 +- components/raft_log_engine/Cargo.toml | 2 +- components/raft_log_engine/src/engine.rs | 21 ++- components/raftstore-v2/Cargo.toml | 2 +- components/raftstore-v2/src/fsm/peer.rs | 3 + .../operation/command/admin/compact_log.rs | 6 +- .../operation/command/admin/conf_change.rs | 9 +- .../src/operation/command/admin/flashback.rs | 6 +- .../src/operation/command/write/mod.rs | 11 +- components/raftstore-v2/src/operation/life.rs | 7 + .../src/operation/query/capture.rs | 14 +- .../raftstore-v2/src/operation/ready/mod.rs | 7 +- .../src/operation/ready/snapshot.rs | 4 +- components/raftstore-v2/src/router/message.rs | 2 +- components/raftstore-v2/src/worker/tablet.rs | 61 ++++++- .../tests/integrations/cluster.rs | 4 +- components/raftstore/Cargo.toml | 4 +- components/raftstore/src/store/peer.rs | 2 +- components/raftstore/src/store/snap.rs | 10 +- components/raftstore/src/store/util.rs | 10 +- components/raftstore/src/store/worker/read.rs | 1 + components/resolved_ts/Cargo.toml | 3 +- components/server/Cargo.toml | 4 +- components/server/src/server.rs | 21 ++- components/server/src/server2.rs | 13 +- components/snap_recovery/src/init_cluster.rs | 7 +- components/sst_importer/src/import_file.rs | 8 +- components/sst_importer/src/sst_importer.rs | 2 +- components/sst_importer/src/util.rs | 32 ++-- components/test_pd_client/Cargo.toml | 2 +- components/test_raftstore-v2/Cargo.toml | 10 +- components/test_raftstore-v2/src/util.rs | 4 +- components/test_raftstore/Cargo.toml | 4 +- components/test_raftstore/src/util.rs | 4 +- components/tikv_kv/Cargo.toml | 2 +- metrics/grafana/tikv_details.json | 2 +- src/config/mod.rs | 5 + src/server/debug2.rs | 4 +- src/server/engine_factory.rs | 16 +- src/server/service/kv.rs | 1 + src/server/tablet_snap.rs | 23 ++- tests/Cargo.toml | 4 +- 63 files changed, 428 insertions(+), 176 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 00ef5e5dd4f..49503f86612 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3095,7 +3095,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0a57dd2e2c3d1be9679a9ad6a3ed70dcb4b87eff" +source = "git+https://github.com/tikv/rust-rocksdb.git#782bfc8309f266da634267d207760362b362aa61" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3114,7 +3114,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#0a57dd2e2c3d1be9679a9ad6a3ed70dcb4b87eff" +source = "git+https://github.com/tikv/rust-rocksdb.git#782bfc8309f266da634267d207760362b362aa61" dependencies = [ "bzip2-sys", "cc", @@ -5086,7 +5086,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0a57dd2e2c3d1be9679a9ad6a3ed70dcb4b87eff" +source = "git+https://github.com/tikv/rust-rocksdb.git#782bfc8309f266da634267d207760362b362aa61" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index 10650db8924..9141e5b770d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -112,7 +112,7 @@ pprof = { version = "0.11", default-features = false, features = ["flamegraph", prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } @@ -358,6 +358,7 @@ tipb_helper = { path = "components/tipb_helper" } tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types" } # External libs +raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } grpcio = { version = "0.10.4", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } grpcio-health = { version = "0.10.4", default-features = false, features = ["protobuf-codec"] } tipb = { git = "https://github.com/pingcap/tipb.git" } diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 93a8c3c04d3..a36e72b3c64 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -70,7 +70,7 @@ log_wrappers = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft-engine = { git = "https://github.com/tikv/raft-engine.git" } raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } raft_log_engine = { workspace = true } diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index c26347f52e4..47ec632f957 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -101,7 +101,7 @@ pub fn new_debug_executor( .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); - let factory = KvEngineFactoryBuilder::new(env.clone(), cfg, cache) + let factory = KvEngineFactoryBuilder::new(env.clone(), cfg, cache, key_manager.clone()) .lite(true) .build(); diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index dda59ac14d3..6491ab241e8 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1252,14 +1252,16 @@ fn flush_std_buffer_to_log( } fn read_cluster_id(config: &TikvConfig) -> Result { - let env = config - .build_shared_rocks_env(None, None) - .map_err(|e| format!("build_shared_rocks_env fail: {}", e))?; + let key_manager = + data_key_manager_from_config(&config.security.encryption, &config.storage.data_dir) + .unwrap() + .map(Arc::new); + let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let cache = config .storage .block_cache .build_shared_cache(config.storage.engine); - let kv_engine = KvEngineFactoryBuilder::new(env, config, cache) + let kv_engine = KvEngineFactoryBuilder::new(env, config, cache, key_manager) .build() .create_shared_db(&config.storage.data_dir) .map_err(|e| format!("create_shared_db fail: {}", e))?; diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 0cb3814602c..373e138888e 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -62,7 +62,7 @@ pin-project = "1.0" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true } rand = "0.8.0" regex = "1" diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 6cb4edfe7dc..8c3490d1bc0 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -58,7 +58,7 @@ log_wrappers = { workspace = true } online_config = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true } security = { workspace = true } serde = "1.0" diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index 9b7925371c5..fb049a22ca7 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -23,7 +23,7 @@ parking_lot = "0.12" pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } serde = "1.0" serde_derive = "1.0" slog = { workspace = true } diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 1c695bb92bb..4a7ee57a193 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -50,7 +50,7 @@ pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true } resolved_ts = { workspace = true } security = { workspace = true } @@ -68,7 +68,6 @@ txn_types = { workspace = true } criterion = "0.3" engine_rocks = { workspace = true } engine_traits = { workspace = true } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tempfile = "3.0" test_pd_client = { workspace = true } test_raftstore = { workspace = true } diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index d37d2945273..38c38108dc5 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -1,5 +1,7 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(let_chains)] + mod config; mod crypter; mod encrypted_file; diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index e340bebcc5a..b11152e1882 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -46,6 +46,8 @@ struct Dicts { // key id used to encrypt the encryption file dictionary. The content is encrypted // using master key. key_dict: Mutex, + // A lock used to protect key_dict rotation. + key_dict_file_lock: Mutex<()>, // Thread-safe version of current_key_id. Only when writing back to key_dict, // write it back to `key_dict`. Reader should always use this atomic, instead of // key_dict.current_key_id, since the latter can reflect an update-in-progress key. @@ -73,6 +75,7 @@ impl Dicts { current_key_id: 0, ..Default::default() }), + key_dict_file_lock: Mutex::new(()), current_key_id: AtomicU64::new(0), rotation_period, base: Path::new(path).to_owned(), @@ -115,6 +118,7 @@ impl Dicts { file_dict: Mutex::new(file_dict), file_dict_file: Mutex::new(file_dict_file), key_dict: Mutex::new(key_dict), + key_dict_file_lock: Mutex::default(), current_key_id, rotation_period, base: base.to_owned(), @@ -149,6 +153,8 @@ impl Dicts { } fn save_key_dict(&self, master_key: &dyn Backend) -> Result<()> { + // In reality we only call this function inside `run_background_rotate_work`. + let _lk = self.key_dict_file_lock.try_lock().unwrap(); let file = EncryptedFile::new(&self.base, KEY_DICT_NAME); let (keys_len, key_bytes) = { let mut key_dict = self.key_dict.lock().unwrap(); @@ -636,7 +642,7 @@ impl DataKeyManager { self.open_file_with_writer(path, file_writer, true /* create */) } - pub fn open_file_with_writer, W: std::io::Write>( + pub fn open_file_with_writer, W: io::Write>( &self, path: P, writer: W, @@ -809,10 +815,13 @@ impl DataKeyManager { if !scan.exists() { return Ok(()); } - let mut iter = walkdir::WalkDir::new(scan).into_iter().peekable(); + let mut iter = walkdir::WalkDir::new(scan) + .into_iter() + .filter(|e| e.as_ref().map_or(true, |e| !e.path().is_dir())) + .peekable(); while let Some(e) = iter.next() { let e = e?; - if e.path_is_symlink() { + if e.path().is_symlink() { return Err(io::Error::new( io::ErrorKind::Other, format!("unexpected symbolic link: {}", e.path().display()), @@ -838,6 +847,11 @@ impl DataKeyManager { pub fn encryption_method(&self) -> engine_traits::EncryptionMethod { crypter::to_engine_encryption_method(self.method) } + + /// For tests. + pub fn file_count(&self) -> usize { + self.dicts.file_dict.lock().unwrap().files.len() + } } impl Drop for DataKeyManager { @@ -883,22 +897,26 @@ impl EncryptionKeyManager for DataKeyManager { Ok(encrypted_file) } - fn delete_file(&self, fname: &str) -> IoResult<()> { + // See comments of `remove_dir` for more details when using this with a + // directory. + fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> IoResult<()> { fail_point!("key_manager_fails_before_delete_file", |_| IoResult::Err( - std::io::ErrorKind::Other.into() + io::ErrorKind::Other.into() )); - // `RemoveDir` is not managed, but RocksDB may use `RenameFile` on a directory, - // which internally calls `LinkFile` and `DeleteFile`. - let path = Path::new(fname); - if path.is_dir() { - let mut iter = walkdir::WalkDir::new(path).into_iter().peekable(); - while let Some(e) = iter.next() { - self.dicts - .delete_file(e?.path().to_str().unwrap(), iter.peek().is_none())?; + if let Some(physical) = physical_fname { + let physical_path = Path::new(physical); + if physical_path.is_dir() { + self.remove_dir(Path::new(fname), Some(physical_path))?; + return Ok(()); } } else { - self.dicts.delete_file(fname, true)?; + let path = Path::new(fname); + if path.is_dir() { + self.remove_dir(path, None)?; + return Ok(()); + } } + self.dicts.delete_file(fname, true)?; Ok(()) } @@ -912,7 +930,7 @@ impl EncryptionKeyManager for DataKeyManager { .peekable(); while let Some(e) = iter.next() { let e = e?; - if e.path_is_symlink() { + if e.path().is_symlink() { return Err(io::Error::new( io::ErrorKind::Other, format!("unexpected symbolic link: {}", e.path().display()), @@ -935,6 +953,7 @@ impl EncryptionKeyManager for DataKeyManager { /// doesn't exist locally. It synchronizes log file in batch. It automatically /// reverts changes if caller aborts. pub struct DataKeyImporter<'a> { + start_time: SystemTime, manager: &'a DataKeyManager, // Added file names. file_additions: Vec, @@ -945,8 +964,11 @@ pub struct DataKeyImporter<'a> { #[allow(dead_code)] impl<'a> DataKeyImporter<'a> { + const EXPECTED_TIME_WINDOW_SECS: u64 = 120; + pub fn new(manager: &'a DataKeyManager) -> Self { Self { + start_time: SystemTime::now(), manager, file_additions: Vec::new(), key_additions: Vec::new(), @@ -954,14 +976,27 @@ impl<'a> DataKeyImporter<'a> { } } - pub fn add(&mut self, fname: &str, iv: Vec, new_key: DataKey) -> Result<()> { + pub fn add(&mut self, fname: &str, iv: Vec, mut new_key: DataKey) -> Result<()> { + // Needed for time window check. + new_key.creation_time = self + .start_time + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); let method = new_key.method; let mut key_id = None; { let mut key_dict = self.manager.dicts.key_dict.lock().unwrap(); for (id, data_key) in &key_dict.keys { if data_key.key == new_key.key { - key_id = Some(*id); + // If this key is created within the window, there's a risk it is created by + // another importer, and can be rollback-ed. + if new_key.creation_time.saturating_sub(data_key.creation_time) + > Self::EXPECTED_TIME_WINDOW_SECS + { + key_id = Some(*id); + } + break; } } if key_id.is_none() { @@ -973,6 +1008,7 @@ impl<'a> DataKeyImporter<'a> { key_id = Some(id); e.insert(new_key); self.key_additions.push(id); + info!("generate new ID for imported key"; "id" => id, "fname" => fname); break; } } @@ -1028,14 +1064,24 @@ impl<'a> DataKeyImporter<'a> { } pub fn rollback(&mut self) -> Result<()> { + if let Some(fname) = self.file_additions.first() { + info!("rollback imported file encryption info"; "sample_fname" => fname); + } assert!(!self.committed); let mut iter = self.file_additions.drain(..).peekable(); while let Some(f) = iter.next() { self.manager.dicts.delete_file(&f, iter.peek().is_none())?; } - if !self.key_additions.is_empty() { + // If the duration is longer than the window, we cannot delete keys because they + // may already be referenced by other files. + // System time can drift, use 1s as safety padding. + if !self.key_additions.is_empty() + && let Ok(duration) = self.start_time.elapsed() + && duration.as_secs() < Self::EXPECTED_TIME_WINDOW_SECS - 1 + { for key_id in self.key_additions.drain(..) { let mut key_dict = self.manager.dicts.key_dict.lock().unwrap(); + info!("rollback one imported data key"; "key_id" => key_id); key_dict.keys.remove(&key_id); } let (tx, rx) = std::sync::mpsc::channel(); @@ -1078,7 +1124,7 @@ mod tests { }; lazy_static::lazy_static! { - static ref LOCK_FOR_GAUGE: Mutex = Mutex::new(1); + static ref LOCK_FOR_GAUGE: Mutex<()> = Mutex::new(()); } fn new_mock_backend() -> Box { @@ -1323,9 +1369,9 @@ mod tests { let new_file = manager.new_file("foo").unwrap(); let get_file = manager.get_file("foo").unwrap(); assert_eq!(new_file, get_file); - manager.delete_file("foo").unwrap(); - manager.delete_file("foo").unwrap(); - manager.delete_file("foo1").unwrap(); + manager.delete_file("foo", None).unwrap(); + manager.delete_file("foo", None).unwrap(); + manager.delete_file("foo1", None).unwrap(); // Must be plaintext if file not found. assert_eq!(manager.get_file_exists("foo").unwrap(), None,); @@ -1387,14 +1433,14 @@ mod tests { let file = manager.new_file("foo").unwrap(); manager.link_file("foo", "foo1").unwrap(); - manager.delete_file("foo").unwrap(); + manager.delete_file("foo", None).unwrap(); // Must be the same. let file1 = manager.get_file("foo1").unwrap(); assert_eq!(file1, file); manager.link_file("foo", "foo2").unwrap(); - manager.delete_file("foo").unwrap(); + manager.delete_file("foo", None).unwrap(); assert_eq!(manager.get_file_exists("foo").unwrap(), None); assert_eq!(manager.get_file_exists("foo2").unwrap(), None); @@ -1580,7 +1626,7 @@ mod tests { #[test] fn test_plaintext_encrypter_writer() { - use std::io::{Read, Write}; + use io::{Read, Write}; let _guard = LOCK_FOR_GAUGE.lock().unwrap(); let (key_path, _tmp_key_dir) = create_key_file("key"); @@ -1614,7 +1660,7 @@ mod tests { } fn generate_mock_file>(dkm: Option<&DataKeyManager>, path: P, content: &String) { - use std::io::Write; + use io::Write; match dkm { Some(manager) => { // Encryption enabled. Use DataKeyManager to manage file. @@ -1636,7 +1682,7 @@ mod tests { path: P, expected: &String, ) { - use std::io::Read; + use io::Read; match dkm { Some(manager) => { @@ -1741,7 +1787,7 @@ mod tests { manager .link_file(subdir.to_str().unwrap(), dstdir.to_str().unwrap()) .unwrap(); - manager.delete_file(subdir.to_str().unwrap()).unwrap(); + manager.delete_file(subdir.to_str().unwrap(), None).unwrap(); assert_eq!( manager @@ -1847,4 +1893,70 @@ mod tests { assert_eq!(manager.get_file("1").unwrap(), file0); assert_eq!(manager.get_file("2").unwrap().key, key2); } + + // Test two importer importing duplicate files. + // issue-15052 + #[test] + fn test_import_keys_duplicate() { + let _guard = LOCK_FOR_GAUGE.lock().unwrap(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let manager = new_key_manager_def(&tmp_dir, Some(EncryptionMethod::Aes192Ctr)).unwrap(); + + let (_, key) = generate_data_key(EncryptionMethod::Aes192Ctr); + let file0 = manager.new_file("0").unwrap(); + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + let key = DataKey { + key, + method: EncryptionMethod::Aes192Ctr, + creation_time: now, + ..Default::default() + }; + + // Because of time window check, importer2 will create yet another key_id, so no + // conflict. + let mut importer1 = DataKeyImporter::new(&manager); + importer1.add("1", file0.iv.clone(), key.clone()).unwrap(); + let mut importer2 = DataKeyImporter::new(&manager); + importer2.add("2", file0.iv.clone(), key.clone()).unwrap(); + importer1.rollback().unwrap(); + importer2.commit().unwrap(); + assert_eq!(manager.get_file_exists("1").unwrap(), None); + assert_eq!(manager.get_file("2").unwrap().key, key.key); + + let mut importer1 = DataKeyImporter::new(&manager); + // Use a super old time. + importer1.start_time = SystemTime::now() - std::time::Duration::from_secs(1000000); + importer1.add("3", file0.iv.clone(), key.clone()).unwrap(); + let mut importer2 = DataKeyImporter::new(&manager); + importer2.add("4", file0.iv, key.clone()).unwrap(); + // This time, even though importer2 will use the same key_id, importer1 rollback + // cannot remove it. + importer1.rollback().unwrap(); + importer2.commit().unwrap(); + assert_eq!(manager.get_file_exists("3").unwrap(), None); + assert_eq!(manager.get_file("4").unwrap().key, key.key); + } + + #[test] + fn test_trash_encrypted_dir() { + let tmp_dir = tempfile::Builder::new() + .prefix("test_trash_encrypted_dir") + .tempdir() + .unwrap(); + let manager = new_key_manager_def(&tmp_dir, Some(EncryptionMethod::Aes192Ctr)).unwrap(); + let data_path = tmp_dir.path(); + let sub_dir = data_path.join("sub_dir"); + file_system::create_dir_all(&sub_dir).unwrap(); + let file_path = sub_dir.join("f"); + file_system::File::create(&file_path).unwrap(); + manager.new_file(file_path.to_str().unwrap()).unwrap(); + file_system::create_dir_all(sub_dir.join("deep_dir")).unwrap(); + assert_eq!(manager.file_count(), 1); + + crate::trash_dir_all(&sub_dir, Some(&manager)).unwrap(); + assert_eq!(manager.file_count(), 0); + } } diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index 2fad106519d..f5da1dad550 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -11,7 +11,7 @@ testexport = [] [dependencies] engine_traits = { workspace = true } kvproto = { workspace = true } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } tikv_alloc = { workspace = true } # FIXME: Remove this dep from the engine_traits interface tikv_util = { workspace = true } diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index af8a44db44c..1d275b788c2 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -42,7 +42,7 @@ online_config = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = "2" -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } regex = "1" serde = "1.0" serde_derive = "1.0" diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 99d492c4792..4dbe3ab10d2 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -48,8 +48,8 @@ impl DBEncryptionKeyManager for WrappedEncryptionKeyMan .new_file(fname) .map(convert_file_encryption_info) } - fn delete_file(&self, fname: &str) -> Result<()> { - self.manager.delete_file(fname) + fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> Result<()> { + self.manager.delete_file(fname, physical_fname) } fn link_file(&self, src_fname: &str, dst_fname: &str) -> Result<()> { self.manager.link_file(src_fname, dst_fname) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 932a1bcb51a..dd56d9a5db4 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -104,8 +104,6 @@ pub mod kv { KvTestEngine::new_kv_engine_opt(path, db_opt, cfs_opts) } - const TOMBSTONE_SUFFIX: &str = ".tombstone"; - #[derive(Clone)] pub struct TestTabletFactory { db_opt: DbOptions, @@ -129,13 +127,7 @@ pub mod kv { } fn destroy_tablet(&self, _ctx: TabletContext, path: &Path) -> Result<()> { - let tombstone_path = path.with_extension(TOMBSTONE_SUFFIX); - let _ = std::fs::remove_dir_all(&tombstone_path); - std::fs::rename(path, &tombstone_path)?; - if let Some(m) = &self.db_opt.key_manager { - m.remove_dir(path, Some(&tombstone_path))?; - } - std::fs::remove_dir_all(tombstone_path)?; + encryption::trash_dir_all(path, self.db_opt.key_manager.as_deref())?; Ok(()) } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 53cd960244d..2d11b59f623 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -19,7 +19,7 @@ kvproto = { workspace = true } lazy_static = "1.0" log_wrappers = { workspace = true } protobuf = "2" -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } serde = "1.0" slog = { workspace = true } slog-global = { workspace = true } diff --git a/components/engine_traits/src/encryption.rs b/components/engine_traits/src/encryption.rs index 16f29d16d75..7376e2d5592 100644 --- a/components/engine_traits/src/encryption.rs +++ b/components/engine_traits/src/encryption.rs @@ -8,7 +8,12 @@ use std::{ pub trait EncryptionKeyManager: Sync + Send { fn get_file(&self, fname: &str) -> Result; fn new_file(&self, fname: &str) -> Result; - fn delete_file(&self, fname: &str) -> Result<()>; + /// Can be used with both file and directory. + /// + /// `physical_fname` is a hint when `fname` was renamed physically. + /// Depending on the implementation, providing false negative or false + /// positive value may result in leaking encryption keys. + fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> Result<()>; fn link_file(&self, src_fname: &str, dst_fname: &str) -> Result<()>; } diff --git a/components/engine_traits_tests/src/checkpoint.rs b/components/engine_traits_tests/src/checkpoint.rs index ad85b8f85ed..e531e55a55e 100644 --- a/components/engine_traits_tests/src/checkpoint.rs +++ b/components/engine_traits_tests/src/checkpoint.rs @@ -4,7 +4,7 @@ use std::sync::Arc; -use encryption_export::data_key_manager_from_config; +use encryption_export::{data_key_manager_from_config, trash_dir_all}; use engine_test::{ ctor::{CfOptions, DbOptions, KvEngineConstructorExt}, kv::KvTestEngine, @@ -28,7 +28,7 @@ fn test_encrypted_checkpoint() { ); let mut db_opts = DbOptions::default(); - db_opts.set_key_manager(Some(key_manager)); + db_opts.set_key_manager(Some(key_manager.clone())); let cf_opts: Vec<_> = ALL_CFS.iter().map(|cf| (*cf, CfOptions::new())).collect(); let path1 = root_path.join("1").to_str().unwrap().to_owned(); @@ -46,4 +46,10 @@ fn test_encrypted_checkpoint() { db2.get_value_cf(CF_DEFAULT, b"foo").unwrap().unwrap(), b"bar" ); + drop(db1); + drop(db2); + // Match KvEngineFactory::destroy_tablet. + trash_dir_all(path1, Some(&key_manager)).unwrap(); + trash_dir_all(path2, Some(&key_manager)).unwrap(); + assert_eq!(key_manager.file_count(), 0); } diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index dce6a64dff2..5d987d64858 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -118,7 +118,7 @@ fn new_engine_opt_renamed_dir() { let new_path = root_path.join("new").to_str().unwrap().to_owned(); key_manager.link_file(&path, &new_path).unwrap(); fs::rename(&path, &new_path).unwrap(); - key_manager.delete_file(&path).unwrap(); + key_manager.delete_file(&path, Some(&new_path)).unwrap(); { let db = KvTestEngine::new_kv_engine_opt(&new_path, db_opts.clone(), cf_opts.clone()).unwrap(); diff --git a/components/error_code/Cargo.toml b/components/error_code/Cargo.toml index 307532c1bb6..04179f5bd53 100644 --- a/components/error_code/Cargo.toml +++ b/components/error_code/Cargo.toml @@ -16,6 +16,6 @@ path = "bin.rs" grpcio = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } serde = { version = "1.0", features = ["derive"] } tikv_alloc = { workspace = true } diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml index 48e911ad5c2..6537eaf8995 100644 --- a/components/external_storage/export/Cargo.toml +++ b/components/external_storage/export/Cargo.toml @@ -70,7 +70,7 @@ lazy_static = { optional = true, version = "1.3" } libloading = { optional = true, version = "0.7.0" } once_cell = { optional = true, version = "1.3.1" } protobuf = { optional = true, version = "2" } -slog-global = { optional = true, version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { optional = true, workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time", "rt", "net"], optional = true } tokio-util = { version = "0.7", features = ["compat"], optional = true } diff --git a/components/into_other/Cargo.toml b/components/into_other/Cargo.toml index a7778fded0e..dac98342f20 100644 --- a/components/into_other/Cargo.toml +++ b/components/into_other/Cargo.toml @@ -7,4 +7,4 @@ publish = false [dependencies] engine_traits = { workspace = true } kvproto = { workspace = true } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 99540c13aa3..e643089a872 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -14,7 +14,7 @@ lazy_static = "1.4.0" num_cpus = "1" online_config = { workspace = true } protobuf = "2" -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft-engine = { git = "https://github.com/tikv/raft-engine.git", features = ["swap"] } serde = "1.0" serde_derive = "1.0" diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 418684715f9..1f19a161b09 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -192,7 +192,7 @@ impl FileSystem for ManagedFileSystem { fn delete>(&self, path: P) -> IoResult<()> { self.base_file_system.delete(path.as_ref())?; if let Some(ref manager) = self.key_manager { - manager.delete_file(path.as_ref().to_str().unwrap())?; + manager.delete_file(path.as_ref().to_str().unwrap(), None)?; } Ok(()) } @@ -206,8 +206,11 @@ impl FileSystem for ManagedFileSystem { let r = self .base_file_system .rename(src_path.as_ref(), dst_path.as_ref()); - let del_file = if r.is_ok() { src_str } else { dst_str }; - if let Err(e) = manager.delete_file(del_file) { + if r.is_ok() { + if let Err(e) = manager.delete_file(src_str, Some(dst_str)) { + warn!("fail to remove encryption metadata during 'rename'"; "err" => ?e); + } + } else if let Err(e) = manager.delete_file(dst_str, Some(src_str)) { warn!("fail to remove encryption metadata during 'rename'"; "err" => ?e); } r @@ -216,6 +219,7 @@ impl FileSystem for ManagedFileSystem { } } + // TODO: distinguish reuse to trash and from trash. fn reuse>(&self, src_path: P, dst_path: P) -> IoResult<()> { if let Some(ref manager) = self.key_manager { // Note: In contrast to `rename`, `reuse` will make sure the encryption @@ -229,9 +233,12 @@ impl FileSystem for ManagedFileSystem { let r = self .base_file_system .rename(src_path.as_ref(), dst_path.as_ref()); - let del_file = if r.is_ok() { src_str } else { dst_str }; - if let Err(e) = manager.delete_file(del_file) { - warn!("fail to remove encryption metadata during 'reuse'"; "err" => ?e); + if r.is_ok() { + if let Err(e) = manager.delete_file(src_str, Some(dst_str)) { + warn!("fail to remove encryption metadata during 'rename'"; "err" => ?e); + } + } else if let Err(e) = manager.delete_file(dst_str, Some(src_str)) { + warn!("fail to remove encryption metadata during 'rename'"; "err" => ?e); } r } else { @@ -253,7 +260,7 @@ impl FileSystem for ManagedFileSystem { fn delete_metadata>(&self, path: P) -> IoResult<()> { if let Some(ref manager) = self.key_manager { // Note: no error if the file doesn't exist. - manager.delete_file(path.as_ref().to_str().unwrap())?; + manager.delete_file(path.as_ref().to_str().unwrap(), None)?; } self.base_file_system.delete_metadata(path) } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 15be69624e3..4279ed12726 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -49,7 +49,7 @@ parking_lot = "0.12" pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft-proto = { version = "0.7.0" } raftstore = { workspace = true } rand = "0.8.3" diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index ff1fccbe0cd..70348b34c04 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -215,6 +215,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, fn on_tick(&mut self, tick: PeerTick) { self.fsm.tick_registry[tick as usize] = false; + if !self.fsm.peer().serving() { + return; + } match tick { PeerTick::Raft => self.on_raft_tick(), PeerTick::PdHeartbeat => self.on_pd_heartbeat(), diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 4a79ee18b1f..8920ea97e1d 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -97,6 +97,8 @@ impl CompactLogContext { impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, T> { pub fn on_compact_log_tick(&mut self, force: bool) { + // Might read raft logs. + debug_assert!(self.fsm.peer().serving()); if !self.fsm.peer().is_leader() { // `compact_cache_to` is called when apply, there is no need to call // `compact_to` here, snapshot generating has already been cancelled @@ -229,9 +231,7 @@ impl Peer { } // Create a compact log request and notify directly. - // TODO: move this into a function - let term = self.raft_group().raft.raft_log.term(compact_idx).unwrap(); - + let term = self.index_term(compact_idx); let mut req = new_admin_request(self.region_id(), self.peer().clone()); let mut admin = AdminRequest::default(); admin.set_cmd_type(AdminCmdType::CompactLog); diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index b2bea379299..530789c31f6 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -586,7 +586,14 @@ impl Apply { let mut removed_records: Vec<_> = self.region_state_mut().take_removed_records().into(); let mut merged_records: Vec<_> = self.region_state_mut().take_merged_records().into(); let updates = admin_req.get_update_gc_peers().get_peer_id(); - info!(self.logger, "update gc peer"; "index" => log_index, "updates" => ?updates, "gc_peers" => ?removed_records, "merged_peers" => ?merged_records); + info!( + self.logger, + "update gc peer"; + "index" => log_index, + "updates" => ?updates, + "gc_peers" => ?removed_records, + "merged_peers" => ?merged_records + ); removed_records.retain(|p| !updates.contains(&p.get_id())); merged_records.retain_mut(|r| { let mut sources: Vec<_> = r.take_source_peers().into(); diff --git a/components/raftstore-v2/src/operation/command/admin/flashback.rs b/components/raftstore-v2/src/operation/command/admin/flashback.rs index b7acdcc5f0a..7301736e380 100644 --- a/components/raftstore-v2/src/operation/command/admin/flashback.rs +++ b/components/raftstore-v2/src/operation/command/admin/flashback.rs @@ -95,9 +95,11 @@ impl Peer { res.region_state.mut_region().set_is_in_flashback(false); }) })(); - slog::debug!(self.logger, + slog::debug!( + self.logger, "flashback update region"; - "region" => ?res.region_state.get_region()); + "region" => ?res.region_state.get_region() + ); let region_id = self.region_id(); { let mut meta = store_ctx.store_meta.lock().unwrap(); diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 17ab24836cd..a9d8bd664fe 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -248,16 +248,8 @@ impl Apply { end_key )); } - // region key range has no data prefix, so we must use origin key to check. util::check_key_in_region(start_key, self.region())?; - let end_key = keys::data_end_key(end_key); - let region_end_key = keys::data_end_key(self.region().get_end_key()); - if end_key > region_end_key { - return Err(Error::KeyNotInRegion( - end_key.to_vec(), - self.region().clone(), - )); - } + util::check_key_in_region_inclusive(end_key, self.region())?; if cf.is_empty() { cf = CF_DEFAULT; @@ -268,6 +260,7 @@ impl Apply { } let start_key = keys::data_key(start_key); + let end_key = keys::data_end_key(end_key); let start = Instant::now_coarse(); // Use delete_files_in_range to drop as many sst files as possible, this diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index e2ad63ed62c..bf40429c9f1 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -345,6 +345,13 @@ impl Store { ER: RaftEngine, T: Transport, { + debug!( + self.logger(), + "store handle raft message"; + "message_type" => %util::MsgType(&msg), + "from_peer_id" => msg.get_from_peer().get_id(), + "to_peer_id" => msg.get_to_peer().get_id(), + ); let region_id = msg.get_region_id(); // The message can be sent when the peer is being created, so try send it first. let mut msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m))) = diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index f83b7982cca..5dd43f14e19 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -52,14 +52,24 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> pub fn on_capture_change(&mut self, capture_change: CaptureChange) { fail_point!("raft_on_capture_change"); - let apply_router = self.fsm.peer().apply_scheduler().unwrap().clone(); + let apply_scheduler = self.fsm.peer().apply_scheduler().cloned(); + let id = self.fsm.peer().region_id(); + let term = self.fsm.peer().term(); let (ch, _) = QueryResChannel::with_callback(Box::new(move |res| { if let QueryResult::Response(resp) = res && resp.get_header().has_error() { // Return error capture_change.snap_cb.report_error(resp.clone()); return; } - apply_router.send(ApplyTask::CaptureApply(capture_change)) + if let Some(scheduler) = apply_scheduler { + scheduler.send(ApplyTask::CaptureApply(capture_change)) + } else { + let mut resp = cmd_resp::err_resp(raftstore::Error::RegionNotFound(id), term); + resp.mut_header() + .mut_error() + .set_message("apply scheduler is None".to_owned()); + capture_change.snap_cb.report_error(resp); + } })); self.on_leader_callback(ch); } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index d505442f55a..60367ce8af8 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -490,7 +490,12 @@ impl Peer { let to_peer = match self.peer_from_cache(msg.to) { Some(p) => p, None => { - warn!(self.logger, "failed to look up recipient peer"; "to_peer" => msg.to, "message_type" => ?msg.msg_type); + warn!( + self.logger, + "failed to look up recipient peer"; + "to_peer" => msg.to, + "message_type" => ?msg.msg_type + ); return None; } }; diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index e0625deb306..eac280158e3 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -169,7 +169,7 @@ pub fn install_tablet( } if let Err(e) = fs::rename(source, &target_path) { if let Some(m) = &key_manager { - m.delete_file(target_path.to_str().unwrap()).unwrap(); + m.remove_dir(&target_path, Some(source)).unwrap(); } panic!( "failed to rename tablet {} => {}: {:?}", @@ -179,7 +179,7 @@ pub fn install_tablet( ); } if let Some(m) = &key_manager { - m.delete_file(source.to_str().unwrap()).unwrap(); + m.remove_dir(source, Some(&target_path)).unwrap(); } true } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 52b9099cdf0..0e72d83b823 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -142,7 +142,7 @@ pub struct UnsafeWrite { pub struct CaptureChange { pub observer: ChangeObserver, pub region_epoch: RegionEpoch, - // A callback accpets a snapshot. + // A callback accepts a snapshot. pub snap_cb: AnyResChannel, } diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index fe9b1f64fff..2759aa10477 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -604,7 +604,7 @@ where if r && let Some(cb) = cb.take() { cb(); } - r + !r }); } @@ -679,4 +679,63 @@ mod tests { runner.on_timeout(); assert!(!path.exists()); } + + #[test] + fn test_destroy_locked_tablet() { + let dir = Builder::new() + .prefix("test_destroy_locked_tablet") + .tempdir() + .unwrap(); + let factory = Box::new(TestTabletFactory::new( + DbOptions::default(), + vec![("default", CfOptions::default())], + )); + let registry = TabletRegistry::new(factory, dir.path()).unwrap(); + let logger = slog_global::borrow_global().new(slog::o!()); + let (_dir, importer) = create_tmp_importer(); + let snap_dir = dir.path().join("snap"); + let snap_mgr = TabletSnapManager::new(snap_dir, None).unwrap(); + let mut runner = Runner::new(registry.clone(), importer, snap_mgr, logger); + + let mut region = Region::default(); + let r_1 = 1; + region.set_id(r_1); + region.set_start_key(b"a".to_vec()); + region.set_end_key(b"b".to_vec()); + let tablet1 = registry + .load(TabletContext::new(®ion, Some(1)), true) + .unwrap() + .latest() + .unwrap() + .clone(); + let path1 = PathBuf::from(tablet1.path()); + let r_2 = 2; + region.set_id(r_2); + region.set_start_key(b"c".to_vec()); + region.set_end_key(b"d".to_vec()); + let tablet2 = registry + .load(TabletContext::new(®ion, Some(1)), true) + .unwrap() + .latest() + .unwrap() + .clone(); + let path2 = PathBuf::from(tablet2.path()); + + // both tablets are locked. + runner.run(Task::prepare_destroy(tablet1, r_1, 10)); + runner.run(Task::prepare_destroy(tablet2, r_2, 10)); + runner.run(Task::destroy(r_1, 100)); + runner.run(Task::destroy(r_2, 100)); + assert!(path1.exists()); + assert!(path2.exists()); + + registry.remove(r_1); + runner.on_timeout(); + assert!(!path1.exists()); + assert!(path2.exists()); + + registry.remove(r_2); + runner.on_timeout(); + assert!(!path2.exists()); + } } diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index eceb756cc18..fcb379add1f 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -21,7 +21,7 @@ use engine_test::{ kv::{KvTestEngine, KvTestSnapshot, TestTabletFactory}, raft::RaftTestEngine, }; -use engine_traits::{EncryptionKeyManager, TabletContext, TabletRegistry, DATA_CFS}; +use engine_traits::{TabletContext, TabletRegistry, DATA_CFS}; use futures::executor::block_on; use kvproto::{ kvrpcpb::ApiVersion, @@ -662,7 +662,7 @@ impl Cluster { } std::fs::rename(&gen_path, &recv_path).unwrap(); if let Some(m) = from_snap_mgr.key_manager() { - m.delete_file(gen_path.to_str().unwrap()).unwrap(); + m.remove_dir(&gen_path, Some(&recv_path)).unwrap(); } assert!(recv_path.exists()); } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 70712e61d7d..e09199bcaa4 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -52,7 +52,7 @@ fs2 = "0.4" futures = "0.3" futures-util = { version = "0.3.1", default-features = false, features = ["io"] } getset = "0.1" -grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } +grpcio-health = { workspace = true } into_other = { workspace = true } itertools = "0.10" keys = { workspace = true } @@ -69,7 +69,7 @@ pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft-proto = { version = "0.7.0", default-features = false } rand = "0.8.3" resource_control = { workspace = true } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 65f8aa0a8a6..43e090a8aa0 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4696,7 +4696,7 @@ where if self.pending_merge_state.is_some() { return Err(Error::ProposalInMergingMode(self.region_id)); } - if self.raft_group.raft.pending_conf_index > self.get_store().applied_index() { + if self.raft_group.raft.has_pending_conf() { info!( "there is a pending conf change, try later"; "region_id" => self.region_id, diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index e3b1c594ea5..6fe21fe9750 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -900,7 +900,7 @@ impl Snapshot { } if let Some(ref mgr) = self.mgr.encryption_key_manager { for tmp_file_path in cf_file.tmp_file_paths() { - mgr.delete_file(&tmp_file_path)?; + mgr.delete_file(&tmp_file_path, None)?; } } } @@ -948,7 +948,7 @@ impl Snapshot { if file_exists(&file_path) { delete_file_if_exist(&file_path).unwrap(); if let Some(ref mgr) = self.mgr.encryption_key_manager { - mgr.delete_file(file_path.to_str().unwrap()).unwrap(); + mgr.delete_file(file_path.to_str().unwrap(), None).unwrap(); } file_id += 1; } else { @@ -998,7 +998,7 @@ impl Snapshot { } if let Some(ref mgr) = self.mgr.encryption_key_manager { for file_path in &file_paths { - mgr.delete_file(file_path).unwrap(); + mgr.delete_file(file_path, None).unwrap(); } } } @@ -1916,14 +1916,14 @@ impl SnapManagerCore { // because without metadata file, saved cf files are nothing. while let Err(e) = mgr.link_file(src, dst) { if e.kind() == ErrorKind::AlreadyExists { - mgr.delete_file(dst)?; + mgr.delete_file(dst, None)?; continue; } return Err(e.into()); } let r = file_system::rename(src, dst); let del_file = if r.is_ok() { src } else { dst }; - if let Err(e) = mgr.delete_file(del_file) { + if let Err(e) = mgr.delete_file(del_file, None) { warn!("fail to remove encryption metadata during 'rename_tmp_cf_file_for_send'"; "err" => ?e); } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index b2fa44b6d4c..e17fa17e97e 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1086,7 +1086,7 @@ pub fn check_conf_change( let promoted_commit_index = after_progress.maximal_committed_index().0; let first_index = node.raft.raft_log.first_index(); if current_progress.is_singleton() // It's always safe if there is only one node in the cluster. - || promoted_commit_index + 1 >= first_index + || promoted_commit_index + 1 >= first_index { return Ok(()); } @@ -1096,10 +1096,12 @@ pub fn check_conf_change( .inc(); Err(box_err!( - "{:?}: before: {:?}, after: {:?}, first index {}, promoted commit index {}", + "{:?}: before: {:?}, {:?}; after: {:?}, {:?}; first index {}; promoted commit index {}", change_peers, - current_progress.conf().to_conf_state(), - after_progress.conf().to_conf_state(), + current_progress.conf(), + current_progress.iter().collect::>(), + after_progress.conf(), + current_progress.iter().collect::>(), first_index, promoted_commit_index )) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 488d24ac134..ced50a9dc52 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -803,6 +803,7 @@ where "check term"; "delegate_term" => delegate.term, "header_term" => req.get_header().get_term(), + "tag" => &delegate.tag, ); TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.term_mismatch.inc()); return Err(e); diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index 61a0118be5d..c6c2695fada 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -38,7 +38,7 @@ online_config = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true } security = { workspace = true } slog = { workspace = true } @@ -52,7 +52,6 @@ txn_types = { workspace = true } [dev-dependencies] engine_rocks = { workspace = true } panic_hook = { workspace = true } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } tempfile = "3.0" test_raftstore = { workspace = true } test_sst_importer = { workspace = true } diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 0286a671fa1..dc8360bede0 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -53,7 +53,7 @@ file_system = { workspace = true } fs2 = "0.4" futures = "0.3" grpcio = { workspace = true } -grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } +grpcio-health = { workspace = true } hex = "0.4" keys = { workspace = true } kvproto = { workspace = true } @@ -63,7 +63,7 @@ log_wrappers = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 27b45e6973b..aa86709a7e1 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1457,13 +1457,18 @@ impl TikvServer { self.raft_statistics = raft_statistics; // Create kv engine. - let builder = KvEngineFactoryBuilder::new(env, &self.core.config, block_cache) - .compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { - router: Mutex::new(self.router.clone()), - })) - .region_info_accessor(self.region_info_accessor.clone()) - .sst_recovery_sender(self.init_sst_recovery_sender()) - .flow_listener(flow_listener); + let builder = KvEngineFactoryBuilder::new( + env, + &self.core.config, + block_cache, + self.core.encryption_key_manager.clone(), + ) + .compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { + router: Mutex::new(self.router.clone()), + })) + .region_info_accessor(self.region_info_accessor.clone()) + .sst_recovery_sender(self.init_sst_recovery_sender()) + .flow_listener(flow_listener); let factory = Box::new(builder.build()); let kv_engine = factory .create_shared_db(&self.core.store_path) @@ -1555,7 +1560,7 @@ mod test { .block_cache .build_shared_cache(config.storage.engine); - let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); + let factory = KvEngineFactoryBuilder::new(env, &config, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); for i in 1..6 { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 6b1bc2c331d..38b31755cda 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1372,9 +1372,14 @@ impl TikvServer { self.raft_statistics = raft_statistics; // Create kv engine. - let builder = KvEngineFactoryBuilder::new(env, &self.core.config, block_cache) - .sst_recovery_sender(self.init_sst_recovery_sender()) - .flow_listener(flow_listener); + let builder = KvEngineFactoryBuilder::new( + env, + &self.core.config, + block_cache, + self.core.encryption_key_manager.clone(), + ) + .sst_recovery_sender(self.init_sst_recovery_sender()) + .flow_listener(flow_listener); let mut node = NodeV2::new(&self.core.config.server, self.pd_client.clone(), None); node.try_bootstrap_store(&self.core.config.raft_store, &raft_engine) @@ -1495,7 +1500,7 @@ mod test { .block_cache .build_shared_cache(config.storage.engine); - let factory = KvEngineFactoryBuilder::new(env, &config, cache).build(); + let factory = KvEngineFactoryBuilder::new(env, &config, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); for i in 1..6 { diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 4e72a19d6a6..e200ec6ff44 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -325,9 +325,10 @@ pub fn create_local_engine_service( .build_shared_cache(config.storage.engine); // init rocksdb / kv db - let factory = KvEngineFactoryBuilder::new(env.clone(), config, block_cache) - .lite(true) - .build(); + let factory = + KvEngineFactoryBuilder::new(env.clone(), config, block_cache, key_manager.clone()) + .lite(true) + .build(); let kv_db = match factory.create_shared_db(&config.storage.data_dir) { Ok(db) => db, Err(e) => handle_engine_error(e), diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index a99c7c0f7e1..b270d26a411 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -66,7 +66,7 @@ impl ImportPath { key_manager.link_file(temp_str, save_str)?; let r = file_system::rename(&self.temp, &self.save); let del_file = if r.is_ok() { temp_str } else { save_str }; - if let Err(e) = key_manager.delete_file(del_file) { + if let Err(e) = key_manager.delete_file(del_file, None) { warn!("fail to remove encryption metadata during 'save'"; "file" => ?self, "err" => ?e); } @@ -154,7 +154,7 @@ impl ImportFile { manager.link_file(tmp_str, save_str)?; let r = file_system::rename(&self.path.temp, &self.path.save); let del_file = if r.is_ok() { tmp_str } else { save_str }; - if let Err(e) = manager.delete_file(del_file) { + if let Err(e) = manager.delete_file(del_file, None) { warn!("fail to remove encryption metadata during finishing importing files."; "err" => ?e); } @@ -170,7 +170,7 @@ impl ImportFile { let path = &self.path.temp; if path.exists() { if let Some(ref manager) = self.key_manager { - manager.delete_file(path.to_str().unwrap())?; + manager.delete_file(path.to_str().unwrap(), None)?; } file_system::remove_file(path)?; } @@ -281,7 +281,7 @@ impl ImportDir { if path.exists() { file_system::remove_file(path)?; if let Some(manager) = key_manager { - manager.delete_file(path.to_str().unwrap())?; + manager.delete_file(path.to_str().unwrap(), None)?; } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 8412273fa0f..787e562082a 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -1215,7 +1215,7 @@ impl SstImporter { key_manager.link_file(temp_str, save_str)?; let r = file_system::rename(&path.temp, &path.save); let del_file = if r.is_ok() { temp_str } else { save_str }; - if let Err(e) = key_manager.delete_file(del_file) { + if let Err(e) = key_manager.delete_file(del_file, None) { warn!("fail to remove encryption metadata during 'do_download'"; "err" => ?e); } r?; diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index deb72675cf4..ff7526172d5 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -37,7 +37,7 @@ pub fn prepare_sst_for_ingestion, Q: AsRef>( // rocksdb is not atomic, thus the file may be deleted but key in key // manager is not. if let Some(key_manager) = encryption_key_manager { - key_manager.delete_file(clone)?; + key_manager.delete_file(clone, None)?; } #[cfg(unix)] @@ -73,21 +73,27 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( clone: Q, encryption_key_manager: Option<&DataKeyManager>, ) -> Result<()> { - let path = path.as_ref().to_str().unwrap(); - let clone = clone.as_ref().to_str().unwrap(); - - if Path::new(clone).exists() { - file_system::remove_file(clone).map_err(|e| format!("remove {}: {:?}", clone, e))?; + let path = path.as_ref(); + let clone = clone.as_ref(); + if clone.exists() { + file_system::remove_file(clone) + .map_err(|e| format!("remove {}: {:?}", clone.display(), e))?; } // always try to remove the file from key manager because the clean up in // rocksdb is not atomic, thus the file may be deleted but key in key // manager is not. if let Some(key_manager) = encryption_key_manager { - key_manager.delete_file(clone)?; + key_manager.delete_file(clone.to_str().unwrap(), None)?; } - file_system::copy_and_sync(path, clone) - .map_err(|e| format!("copy from {} to {}: {:?}", path, clone, e))?; + file_system::copy_and_sync(path, clone).map_err(|e| { + format!( + "copy from {} to {}: {:?}", + path.display(), + clone.display(), + e + ) + })?; let mut pmts = file_system::metadata(clone)?.permissions(); if pmts.readonly() { @@ -96,9 +102,9 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( } // sync clone dir - File::open(Path::new(clone).parent().unwrap())?.sync_all()?; + File::open(clone.parent().unwrap())?.sync_all()?; if let Some(key_manager) = encryption_key_manager { - key_manager.link_file(path, clone)?; + key_manager.link_file(path.to_str().unwrap(), clone.to_str().unwrap())?; } Ok(()) @@ -211,7 +217,9 @@ mod tests { // Since we are not using key_manager in db, simulate the db deleting the file // from key_manager. if let Some(manager) = key_manager { - manager.delete_file(sst_clone.to_str().unwrap()).unwrap(); + manager + .delete_file(sst_clone.to_str().unwrap(), None) + .unwrap(); } // The second ingestion will copy sst_path to sst_clone. diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml index 975d4baff1b..f1256938535 100644 --- a/components/test_pd_client/Cargo.toml +++ b/components/test_pd_client/Cargo.toml @@ -13,7 +13,7 @@ keys = { workspace = true } kvproto = { workspace = true } log_wrappers = { workspace = true } pd_client = { workspace = true } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } tikv_util = { workspace = true } diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 58294e58b34..5d60b7a82b8 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -38,14 +38,14 @@ fail = "0.5" file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } -grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } +grpcio-health = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true, features = ["testexport"] } raftstore-v2 = { workspace = true, features = ["testexport"] } rand = "0.8" @@ -54,9 +54,9 @@ resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } server = { workspace = true } -slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } +slog = { workspace = true } # better to not use slog-global, but pass in the logger -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { workspace = true } tempfile = "3.0" test_pd_client = { workspace = true } test_raftstore = { workspace = true } @@ -64,5 +64,5 @@ test_util = { workspace = true } tikv = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } -tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-timer = { workspace = true } txn_types = { workspace = true } diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 0e8f67057a3..2809c2a01b0 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -75,8 +75,8 @@ pub fn create_test_engine( bootstrap_store(&raft_engine, cluster_id, store_id).unwrap(); } - let builder = - KvEngineFactoryBuilder::new(env, &cfg.tikv, cache).sst_recovery_sender(Some(scheduler)); + let builder = KvEngineFactoryBuilder::new(env, &cfg.tikv, cache, key_manager.clone()) + .sst_recovery_sender(Some(scheduler)); let factory = Box::new(builder.build()); let rocks_statistics = factory.rocks_statistics(); diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index 1b87aeac11b..ddd9ea33aaa 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -38,14 +38,14 @@ fail = "0.5" file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } -grpcio-health = { version = "0.10", default-features = false, features = ["protobuf-codec"] } +grpcio-health = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true, features = ["testexport"] } rand = "0.8" resolved_ts = { workspace = true } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 0c83ca6fec5..a497631c4fc 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -649,8 +649,8 @@ pub fn create_test_engine( let (raft_engine, raft_statistics) = RaftTestEngine::build(&cfg, &env, &key_manager, &cache); - let mut builder = - KvEngineFactoryBuilder::new(env, &cfg, cache).sst_recovery_sender(Some(scheduler)); + let mut builder = KvEngineFactoryBuilder::new(env, &cfg, cache, key_manager.clone()) + .sst_recovery_sender(Some(scheduler)); if let Some(router) = router { builder = builder.compaction_event_sender(Arc::new(RaftRouterCompactedEventSender { router: Mutex::new(router), diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 1f6fd641cd3..7d517de2cba 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -41,7 +41,7 @@ log_wrappers = { workspace = true } pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raftstore = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 706d00176e1..b025fd8bac0 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -2980,7 +2980,7 @@ "expr": "sum(rate(tikv_server_raft_append_rejects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance)", "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}-existed", + "legendFormat": "{{instance}}-memory", "queryType": "randomWalk", "refId": "A" } diff --git a/src/config/mod.rs b/src/config/mod.rs index cd590550b6f..d575827e699 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3472,6 +3472,11 @@ impl TikvConfig { enable-partitioned-raft-kv-compatible-learner, overwrite to false" ); } + if self.raft_store.use_delete_range { + return Err( + "partitioned-raft-kv doesn't support raftstore.use-delete-range=true.".into(), + ); + } } // Validate raftstore with other components. diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 4c21731c41d..07e079799e0 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1221,7 +1221,7 @@ mod tests { .build_shared_cache(cfg.storage.engine); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); @@ -1487,7 +1487,7 @@ mod tests { .build_shared_cache(cfg.storage.engine); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index bf70a63acdb..f8560f7ed55 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -42,7 +42,12 @@ pub struct KvEngineFactoryBuilder { } impl KvEngineFactoryBuilder { - pub fn new(env: Arc, config: &TikvConfig, cache: Cache) -> Self { + pub fn new( + env: Arc, + config: &TikvConfig, + cache: Cache, + key_manager: Option>, + ) -> Self { Self { inner: FactoryInner { region_info_accessor: None, @@ -50,7 +55,7 @@ impl KvEngineFactoryBuilder { api_version: config.storage.api_version(), flow_listener: None, sst_recovery_sender: None, - encryption_key_manager: None, + encryption_key_manager: key_manager, db_resources: config.rocksdb.build_resources(env), cf_resources: config.rocksdb.build_cf_resources(cache), state_storage: None, @@ -83,11 +88,6 @@ impl KvEngineFactoryBuilder { self } - pub fn encryption_key_manager(mut self, m: Option>) -> Self { - self.inner.encryption_key_manager = m; - self - } - /// Set whether enable lite mode. /// /// In lite mode, most listener/filters will not be installed. @@ -290,7 +290,7 @@ mod tests { let dir = test_util::temp_dir(name, false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache).build(); + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), dir.path()).unwrap(); (dir, reg) } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 0c319a73baa..c986c640b6d 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -2296,6 +2296,7 @@ fn needs_reject_raft_append(reject_messages_on_memory_ratio: f64) -> bool { if (raft_msg_usage + cached_entries + applying_entries) as f64 > usage as f64 * reject_messages_on_memory_ratio { + // FIXME: this doesn't output to logfile. debug!("need reject log append on memory limit"; "raft messages" => raft_msg_usage, "cached entries" => cached_entries, diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index e83339bfad5..fe0329ff9df 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -487,7 +487,12 @@ async fn recv_snap_imp<'a>( )); } let path = snap_mgr.tmp_recv_path(&context.key); - info!("begin to receive tablet snapshot files"; "file" => %path.display(), "region_id" => region_id); + info!( + "begin to receive tablet snapshot files"; + "file" => %path.display(), + "region_id" => region_id, + "temp_exists" => path.exists(), + ); if path.exists() { if let Some(m) = snap_mgr.key_manager() { m.remove_dir(&path, None)?; @@ -520,12 +525,19 @@ async fn recv_snap_imp<'a>( } fs::rename(&path, &final_path).map_err(|e| { if let Some(m) = snap_mgr.key_manager() { - let _ = m.delete_file(final_path.to_str().unwrap()); + if let Err(e) = m.remove_dir(&final_path, Some(&path)) { + error!( + "failed to clean up encryption keys after rename fails"; + "src" => %path.display(), + "dst" => %final_path.display(), + "err" => ?e, + ); + } } e })?; if let Some(m) = snap_mgr.key_manager() { - m.delete_file(path.to_str().unwrap())?; + m.remove_dir(&path, Some(&final_path))?; } Ok(context) } @@ -558,6 +570,7 @@ pub(crate) async fn recv_snap( match res { Ok(()) => sink.close().await?, Err(e) => { + info!("receive tablet snapshot aborted"; "err" => ?e); let status = RpcStatus::with_message(RpcStatusCode::UNKNOWN, format!("{:?}", e)); sink.fail(status).await?; } @@ -1020,12 +1033,12 @@ pub fn copy_tablet_snapshot( } fs::rename(&recv_path, &final_path).map_err(|e| { if let Some(m) = recver_snap_mgr.key_manager() { - let _ = m.delete_file(final_path.to_str().unwrap()); + let _ = m.remove_dir(&final_path, Some(&recv_path)); } e })?; if let Some(m) = recver_snap_mgr.key_manager() { - m.delete_file(recv_path.to_str().unwrap())?; + m.remove_dir(&recv_path, Some(&final_path))?; } Ok(()) diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 2f74b4e2fe5..be3a6792798 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -82,7 +82,7 @@ fail = "0.5" file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } -grpcio-health = { version = "0.10", default-features = false } +grpcio-health = { workspace = true } kvproto = { workspace = true } libc = "0.2" log_wrappers = { workspace = true } @@ -91,7 +91,7 @@ online_config = { workspace = true } paste = "1.0" pd_client = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } -raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true } raftstore-v2 = { workspace = true } From 20afd1784ac92dc0667d0749893981a099a7042a Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 10 Jul 2023 15:53:11 +0800 Subject: [PATCH 0783/1149] raftstore-v2: optimize the slowness detection on the Io-Hang scenario. (#15071) ref tikv/tikv#15070 This pr includes: * Polish the annotations in `raftstore-v2/src/worker/pd/slowness.rs`. * Optimize the detection and the interval of reporting statistics on the io hang case. --- .../raftstore-v2/src/worker/pd/slowness.rs | 44 +++++++++++-------- .../raftstore-v2/src/worker/pd/store.rs | 4 +- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/components/raftstore-v2/src/worker/pd/slowness.rs b/components/raftstore-v2/src/worker/pd/slowness.rs index c68c12b2b03..a715d06ae83 100644 --- a/components/raftstore-v2/src/worker/pd/slowness.rs +++ b/components/raftstore-v2/src/worker/pd/slowness.rs @@ -82,30 +82,36 @@ where } pub fn handle_slowness_stats_tick(&mut self) { - // The following code records a periodic "white noise", which helps - // mitigate any minor fluctuations in disk I/O or network I/O latency. - // After conducting extensive e2e testing, "100ms" has been determined - // to be the most suitable choice for it. - self.slowness_stats - .slow_cause - .record(100_000, Instant::now()); // 100ms - - // Handle timeout if last tick is not finished as expected. let mock_slowness_last_tick_unfinished = || { fail_point!("mock_slowness_last_tick_unfinished", |_| { true }); false }; - if mock_slowness_last_tick_unfinished() - || (!self.slowness_stats.last_tick_finished && self.is_store_heartbeat_delayed()) - { - // If the last slowness tick already reached abnormal state and was delayed for - // reporting by `store-heartbeat` to PD, we should report it here manually as a - // FAKE `store-heartbeat`. It's an assurance that the heartbeat to - // PD is not lost. Normally, this case rarely happens in - // raftstore-v2. - self.handle_fake_store_heartbeat(); + // Handle timeout if the last tick is not finished as expected. + if mock_slowness_last_tick_unfinished() || !self.slowness_stats.last_tick_finished { + // Record a sufficiently large interval to indicate potential write progress + // hanging on I/O. We use the store heartbeat interval as the default value. + self.slowness_stats.slow_cause.record( + self.store_heartbeat_interval.as_micros() as u64, + Instant::now(), + ); + + // If the last slowness tick already reached an abnormal state and was delayed + // for reporting by `store-heartbeat` to PD, we should manually report it here + // as a FAKE `store-heartbeat`. This ensures that the heartbeat to PD is not + // lost. Normally, this case rarely happens in raftstore-v2. + if self.is_store_heartbeat_delayed() { + self.handle_fake_store_heartbeat(); + } + } else { + // The following code records a periodic "white noise", which helps mitigate any + // minor fluctuations in disk I/O or network I/O latency. After + // extensive e2e testing, a duration of "100ms" has been determined + // to be the most suitable choice. + self.slowness_stats + .slow_cause + .record(100_000, Instant::now()); // 100ms } - // Move to next tick. + // Move to the next tick. self.slowness_stats.last_tick_finished = false; } diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index a17957b3347..8601e9ac1b2 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -323,8 +323,10 @@ where pub fn is_store_heartbeat_delayed(&self) -> bool { let now = UnixSecs::now(); let interval_second = now.into_inner() - self.store_stat.last_report_ts.into_inner(); - (interval_second >= self.store_heartbeat_interval.as_secs()) + let store_heartbeat_interval = std::cmp::max(self.store_heartbeat_interval.as_secs(), 1); + (interval_second >= store_heartbeat_interval) && (interval_second <= STORE_HEARTBEAT_DELAY_LIMIT) + && (interval_second % store_heartbeat_interval == 0) } pub fn handle_inspect_latency(&self, send_time: TiInstant, inspector: LatencyInspector) { From 8e216c875caf005de84194bb6450c64bcf8b530a Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 10 Jul 2023 16:17:12 +0800 Subject: [PATCH 0784/1149] raftstore-v2: include commit index for learner snapshot (#15027) close tikv/tikv#15026 Set commit index for learner snapshots. It's needed to address compatibility issues between v1 and v2 snapshots. See https://github.com/pingcap/tiflash/issues/7568#issuecomment-1576382311 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 3 -- .../src/operation/ready/snapshot.rs | 37 +++++++++++++----- components/raftstore-v2/src/raft/storage.rs | 21 +++++----- src/import/sst_service.rs | 10 ++--- tests/integrations/import/test_sst_service.rs | 2 +- tests/integrations/raftstore/test_snap.rs | 38 +++++++++++++++++++ 7 files changed, 84 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49503f86612..3b0863e6e14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2966,7 +2966,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#cd6769a15c69b48fd65ecb6d813b861156be4d61" +source = "git+https://github.com/pingcap/kvproto.git#f32671394cc43c7558e23fb953872e6b09a3dac2" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 60367ce8af8..d669ba23c08 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -513,9 +513,6 @@ impl Peer { ); } - // Filling start and end key is only needed for being compatible with - // raftstore v1 learners (e.g. tiflash engine). - // // There could be two cases: // - Target peer already exists but has not established communication with // leader yet diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index eac280158e3..31862a41a21 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -5,7 +5,7 @@ //! # Snapshot State //! //! generator and apply snapshot works asynchronously. the snap_sate indicates -//! the curren snapshot state. +//! the current snapshot state. //! //! # Process Overview //! @@ -35,7 +35,10 @@ use engine_traits::{ ALL_CFS, }; use fail::fail_point; -use kvproto::raft_serverpb::{PeerState, RaftSnapshotData}; +use kvproto::{ + metapb::PeerRole, + raft_serverpb::{PeerState, RaftSnapshotData}, +}; use protobuf::Message; use raft::{eraftpb::Snapshot, StateRole}; use raftstore::{ @@ -47,7 +50,7 @@ use raftstore::{ }, }; use slog::{debug, error, info, warn}; -use tikv_util::{box_err, log::SlogFormat, slog_panic}; +use tikv_util::{box_err, log::SlogFormat, slog_panic, store::find_peer_by_id}; use crate::{ fsm::ApplyResReporter, @@ -59,7 +62,7 @@ use crate::{ }; /// Snapshot generating task state. -/// snaposhot send success: Relax --> Generating --> Generated --> Sending --> +/// snapshot send success: Relax --> Generating --> Generated --> Sending --> /// Relax snapshot send failed: Relax --> Generating --> Generated --> Sending /// snapshot send again: Sending --> Relax #[derive(Debug)] @@ -218,7 +221,11 @@ impl Peer { } pub fn on_snapshot_generated(&mut self, snapshot: GenSnapRes) { - if self.storage_mut().on_snapshot_generated(snapshot) { + let commit_index = self.raft_group().raft.raft_log.committed; + if self + .storage_mut() + .on_snapshot_generated(snapshot, commit_index) + { self.raft_group_mut().ping(); self.set_has_ready(); } @@ -367,7 +374,7 @@ impl Apply { // Send generate snapshot task to region worker. let (last_applied_index, last_applied_term) = self.apply_progress(); snap_task.index.store(last_applied_index, Ordering::SeqCst); - let gen_tablet_sanp_task = ReadTask::GenTabletSnapshot { + let gen_tablet_snap_task = ReadTask::GenTabletSnapshot { region_id: snap_task.region_id, to_peer: snap_task.to_peer, tablet: self.tablet().clone(), @@ -377,7 +384,7 @@ impl Apply { for_balance: snap_task.for_balance, canceled: snap_task.canceled.clone(), }; - if let Err(e) = self.read_scheduler().schedule(gen_tablet_sanp_task) { + if let Err(e) = self.read_scheduler().schedule(gen_tablet_snap_task) { error!( self.logger, "schedule snapshot failed"; @@ -578,12 +585,12 @@ impl Storage { /// Try to switch snap state to generated. only `Generating` can switch to /// `Generated`. /// TODO: make the snap state more clearer, the snapshot must be consumed. - pub fn on_snapshot_generated(&self, res: GenSnapRes) -> bool { + pub fn on_snapshot_generated(&self, res: GenSnapRes, commit_index: u64) -> bool { if res.is_none() { self.cancel_generating_snap(None); return false; } - let (snapshot, to_peer_id) = *res.unwrap(); + let (mut snapshot, to_peer_id) = *res.unwrap(); if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { let SnapState::Generating { ref index, @@ -599,6 +606,16 @@ impl Storage { ); return false; } + // Set commit index for learner snapshots. It's needed to address + // compatibility issues between v1 and v2 snapshots. + // See https://github.com/pingcap/tiflash/issues/7568#issuecomment-1576382311 + if let Some(p) = find_peer_by_id(self.region(), to_peer_id) && p.get_role() == PeerRole::Learner { + let mut snapshot_data = RaftSnapshotData::default(); + if snapshot_data.merge_from_bytes(snapshot.get_data()).is_ok() { + snapshot_data.mut_meta().set_commit_index_hint(commit_index); + snapshot.set_data(snapshot_data.write_to_bytes().unwrap().into()); + } + } *state = SnapState::Generated(Box::new(snapshot)); } true @@ -643,7 +660,7 @@ impl Storage { let old_last_index = self.entry_storage().last_index(); if self.entry_storage().first_index() <= old_last_index { // All states are rewritten in the following blocks. Stale states will be - // cleaned up by compact worker. Have to use raft write batch here becaue + // cleaned up by compact worker. Have to use raft write batch here because // raft log engine expects deletes before writes. let raft_engine = self.entry_storage().raft_engine(); if task.raft_wb.is_none() { diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 8bc4cc2d9c5..407681c8f20 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -333,9 +333,10 @@ mod tests { DATA_CFS, }; use kvproto::{ - metapb::{Peer, Region}, - raft_serverpb::PeerState, + metapb::Region, + raft_serverpb::{PeerState, RaftSnapshotData}, }; + use protobuf::Message; use raft::{Error as RaftError, StorageError}; use raftstore::{ coprocessor::CoprocessorHost, @@ -348,6 +349,7 @@ mod tests { use slog::o; use tempfile::TempDir; use tikv_util::{ + store::new_peer, worker::{dummy_scheduler, Worker}, yatp_pool::{DefaultTicker, YatpPoolBuilder}, }; @@ -390,10 +392,8 @@ mod tests { fn new_region() -> Region { let mut region = Region::default(); region.set_id(4); - let mut p = Peer::default(); - p.set_id(5); - p.set_store_id(6); - region.mut_peers().push(p); + region.mut_peers().push(new_peer(6, 5)); + region.mut_peers().push(new_peer(8, 7)); region.mut_region_epoch().set_version(2); region.mut_region_epoch().set_conf_ver(4); region @@ -548,7 +548,7 @@ mod tests { let gen_task = s.gen_snap_task.borrow_mut().take().unwrap(); apply.schedule_gen_snapshot(gen_task); let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); - s.on_snapshot_generated(res); + s.on_snapshot_generated(res, 10); assert_eq!(s.snapshot(0, 8).unwrap_err(), unavailable); assert!(s.snap_states.borrow().get(&8).is_some()); let snap = match *s.snap_states.borrow().get(&to_peer_id).unwrap() { @@ -558,6 +558,9 @@ mod tests { assert_eq!(snap.get_metadata().get_index(), 5); assert_eq!(snap.get_metadata().get_term(), 5); assert_eq!(snap.get_data().is_empty(), false); + let mut snapshot_data = RaftSnapshotData::default(); + snapshot_data.merge_from_bytes(snap.get_data()).unwrap(); + assert_eq!(snapshot_data.get_meta().get_commit_index_hint(), 0); let snap_key = TabletSnapKey::from_region_snap(4, 7, &snap); let checkpointer_path = mgr.tablet_gen_path(&snap_key); assert!(checkpointer_path.exists()); @@ -586,8 +589,8 @@ mod tests { apply.set_apply_progress(10, 5); apply.schedule_gen_snapshot(gen_task_b); // on snapshot a and b - assert_eq!(s.on_snapshot_generated(res), false); + assert_eq!(s.on_snapshot_generated(res, 0), false); let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); - assert_eq!(s.on_snapshot_generated(res), true); + assert_eq!(s.on_snapshot_generated(res, 0), true); } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 99f920ceb94..11f47a4a624 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -723,8 +723,8 @@ impl ImportSst for ImportSstService { }, LocalTablets::Registry(_) => { if req.get_mode() == SwitchMode::Import { - if req.has_range() { - let range = req.take_range(); + if !req.get_ranges().is_empty() { + let range = req.take_ranges().into_vec().swap_remove(0); self.importer.range_enter_import_mode(range); Ok(true) } else { @@ -735,8 +735,8 @@ impl ImportSst for ImportSstService { } } else { // case SwitchMode::Normal - if req.has_range() { - let range = req.take_range(); + if !req.get_ranges().is_empty() { + let range = req.take_ranges().into_vec().swap_remove(0); self.importer.clear_import_mode_regions(range); Ok(true) } else { @@ -1245,7 +1245,7 @@ fn write_needs_restore(write: &[u8]) -> bool { false } Err(err) => { - warn!("write cannot be parsed, skipping"; "err" => %err, + warn!("write cannot be parsed, skipping"; "err" => %err, "write" => %log_wrappers::Value::key(write)); false } diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index aa095b50c1e..7802b18b192 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -123,7 +123,7 @@ fn test_ingest_sst() { fn switch_mode(import: &ImportSstClient, range: Range, mode: SwitchMode) { let mut switch_req = SwitchModeRequest::default(); switch_req.set_mode(mode); - switch_req.set_range(range); + switch_req.set_ranges(vec![range].into()); let _ = import.switch_mode(&switch_req).unwrap(); } diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index fbe2a1d9cb2..9eda281e9e4 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -20,6 +20,7 @@ use kvproto::{ raft_serverpb::{RaftMessage, RaftSnapshotData}, tikvpb::TikvClient, }; +use protobuf::Message as M1; use raft::eraftpb::{Message, MessageType, Snapshot}; use raftstore::{ coprocessor::{ApplySnapshotObserver, BoxApplySnapshotObserver, Coprocessor, CoprocessorHost}, @@ -948,3 +949,40 @@ fn check_observer(observer: &MockApplySnapshotObserver, region_id: u64, snap_pat panic!("cannot find {:?} in observer", snap_path); } + +#[test] +fn test_v2_leaner_snapshot_commit_index() { + let mut cluster = test_raftstore_v2::new_node_cluster(0, 2); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + + let (tx, rx) = mpsc::channel(); + cluster.add_recv_filter_on_node( + 2, + Box::new(RecvSnapshotFilter { + notifier: Mutex::new(Some(tx)), + region_id: r, + }), + ); + + cluster.must_put(b"k1", b"v1"); + + // Set commit index for learner snapshots. It's needed to address + // compatibility issues between v1 and v2 snapshots. + // See https://github.com/pingcap/tiflash/issues/7568#issuecomment-1576382311 + pd_client.must_add_peer(r, new_learner_peer(2, 2)); + let msg = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + let mut snapshot_data = RaftSnapshotData::default(); + snapshot_data + .merge_from_bytes(msg.get_message().get_snapshot().get_data()) + .unwrap(); + assert_ne!(snapshot_data.get_meta().get_commit_index_hint(), 0); + + cluster.must_put(b"k2", b"v2"); + + pd_client.must_add_peer(r, new_peer(2, 2)); + cluster.must_transfer_leader(1, new_peer(2, 2)); + + cluster.must_put(b"k3", b"v3"); +} From 4c7b045eaade8634d30f77fc078705cd125327b1 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Mon, 10 Jul 2023 21:15:42 +0800 Subject: [PATCH 0785/1149] raftstore: use future pool for snap-generator pool (#15079) close tikv/tikv#14940 raftstore: use future pool for snap-generator pool Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore/src/store/worker/region.rs | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 525b8ce15fc..e3b2dfaef76 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -28,14 +28,11 @@ use raft::eraftpb::Snapshot as RaftSnapshot; use tikv_util::{ box_err, box_try, config::VersionTrack, - defer, error, info, thd_name, + defer, error, info, time::{Instant, UnixSecs}, warn, worker::{Runnable, RunnableWithTimer}, -}; -use yatp::{ - pool::{Builder, ThreadPool}, - task::future::TaskCell, + yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, }; use super::metrics::*; @@ -373,7 +370,7 @@ where coprocessor_host: CoprocessorHost, router: R, pd_client: Option>, - pool: ThreadPool, + pool: FuturePool, } impl Runner @@ -407,8 +404,13 @@ where coprocessor_host, router, pd_client, - pool: Builder::new(thd_name!("snap-generator")) - .max_thread_count(cfg.value().snap_generator_pool_size) + pool: YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix("snap-generator") + .thread_count( + 1, + cfg.value().snap_generator_pool_size, + cfg.value().snap_generator_pool_size, + ) .build_future_pool(), } } @@ -858,7 +860,12 @@ where for_balance, allow_multi_files_snapshot, ); - }); + }).unwrap_or_else( + |e| { + error!("failed to generate snapshot"; "region_id" => region_id, "err" => ?e); + SNAP_COUNTER.generate.fail.inc(); + }, + ); } task @ Task::Apply { .. } => { fail_point!("on_region_worker_apply", true, |_| {}); @@ -887,10 +894,6 @@ where } } } - - fn shutdown(&mut self) { - self.pool.shutdown(); - } } impl RunnableWithTimer for Runner From 1d60d579a95f04fd68849693937e302131c876d2 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 11 Jul 2023 11:54:42 +0800 Subject: [PATCH 0786/1149] raftstore-v2: clean up import sst file only if flushed epoch is stale. (#15064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#15018 1. delete sst by using flushed epoch not raft epoch to avoid replay this log(flushed epoch is the epoch in raft enginem, which is related with flushed admin index) 2. delete sst divide into two place: a. cf flush, the cf flushed index is bigger than the sst applied index b. CleanUpSstTick, the sst epoch is stable than the flushed epoch. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_traits/src/flush.rs | 77 +++++++-- components/engine_traits/src/lib.rs | 1 + components/raftstore-v2/src/fsm/peer.rs | 9 +- .../src/operation/command/write/ingest.rs | 46 ++---- .../src/operation/ready/apply_trace.rs | 45 +++++- .../src/operation/ready/async_writer.rs | 16 +- .../raftstore-v2/src/operation/ready/mod.rs | 6 +- .../src/operation/ready/snapshot.rs | 1 + components/raftstore-v2/src/raft/apply.rs | 4 +- components/raftstore-v2/src/raft/storage.rs | 18 +++ .../raftstore/src/store/async_io/write.rs | 7 +- tests/failpoints/cases/test_import_service.rs | 149 +++++++++++++++--- 12 files changed, 297 insertions(+), 82 deletions(-) diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 4b7e0e6687d..eebf0e7c32a 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -13,13 +13,14 @@ //! be used as the start state. use std::{ - collections::{HashMap, LinkedList}, + collections::LinkedList, sync::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, RwLock, }, }; +use kvproto::import_sstpb::SstMeta; use slog_global::info; use tikv_util::set_panic_mark; @@ -61,36 +62,62 @@ struct FlushProgress { /// if the flushed index greater than it . #[derive(Debug, Clone)] pub struct SstApplyState { - sst_map: Arc, u64>>>, + // vec from cf to Vec. + ssts: Arc; DATA_CFS_LEN]>>, } impl Default for SstApplyState { fn default() -> Self { Self { - sst_map: Arc::new(RwLock::new(HashMap::new())), + ssts: Arc::new(RwLock::new(Default::default())), } } } +#[derive(Debug)] +pub struct SstApplyEntry { + pub applied_index: u64, + pub sst: SstMeta, +} + +impl SstApplyEntry { + pub fn new(applied_index: u64, sst: SstMeta) -> Self { + Self { applied_index, sst } + } +} + impl SstApplyState { #[inline] - pub fn registe_ssts(&self, uuids: Vec>, sst_applied_index: u64) { - let mut map = self.sst_map.write().unwrap(); - for uuid in uuids { - map.insert(uuid, sst_applied_index); + pub fn register_ssts(&self, applied_index: u64, ssts: Vec) { + let mut sst_list = self.ssts.write().unwrap(); + for sst in ssts { + let cf_index = data_cf_offset(sst.get_cf_name()); + let entry = SstApplyEntry::new(applied_index, sst); + sst_list.get_mut(cf_index).unwrap().push(entry); } } - /// Query the sst applied index. #[inline] - pub fn sst_applied_index(&self, uuid: &Vec) -> Option { - self.sst_map.read().unwrap().get(uuid).copied() + pub fn stale_ssts(&self, cf: &str, flushed_index: u64) -> Vec { + let sst_list = self.ssts.read().unwrap(); + let cf_index = data_cf_offset(cf); + if let Some(ssts) = sst_list.get(cf_index) { + return ssts + .iter() + .filter(|entry| entry.applied_index <= flushed_index) + .map(|entry| entry.sst.clone()) + .collect(); + } + vec![] } - pub fn delete_ssts(&self, uuids: Vec>) { - let mut map = self.sst_map.write().unwrap(); - for uuid in uuids { - map.remove(&uuid); + pub fn delete_ssts(&self, ssts: &Vec) { + let mut sst_list = self.ssts.write().unwrap(); + for sst in ssts { + let cf_index = data_cf_offset(sst.get_cf_name()); + if let Some(metas) = sst_list.get_mut(cf_index) { + metas.drain_filter(|entry| entry.sst.get_uuid() == sst.get_uuid()); + } } } } @@ -270,3 +297,25 @@ impl StateStorage for R { self.consume(&mut batch, true).unwrap(); } } + +#[cfg(test)] +mod test { + use std::vec; + + use kvproto::import_sstpb::SstMeta; + + use super::SstApplyState; + + #[test] + pub fn test_sst_apply_state() { + let stat = SstApplyState::default(); + let mut sst = SstMeta::default(); + sst.set_cf_name("write".to_owned()); + sst.set_uuid(vec![1, 2, 3, 4]); + stat.register_ssts(10, vec![sst.clone()]); + assert!(stat.stale_ssts("default", 10).is_empty()); + let sst = stat.stale_ssts("write", 10); + assert_eq!(sst[0].get_uuid(), vec![1, 2, 3, 4]); + stat.delete_ssts(&sst); + } +} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 537e0addf41..e09b1b52733 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -254,6 +254,7 @@ #![feature(linked_list_cursors)] #![feature(let_chains)] #![feature(str_split_as_str)] +#![feature(drain_filter)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 70348b34c04..a6a81cf4cab 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -294,9 +294,12 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, tablet_index, flushed_index, } => { - self.fsm - .peer_mut() - .on_data_flushed(cf, tablet_index, flushed_index); + self.fsm.peer_mut().on_data_flushed( + self.store_ctx, + cf, + tablet_index, + flushed_index, + ); } PeerMsg::PeerUnreachable { to_peer_id } => { self.fsm.peer_mut().on_peer_unreachable(to_peer_id) diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 8715318db4e..ff2e299abd0 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -8,7 +8,7 @@ use raftstore::{ store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, Result, }; -use slog::error; +use slog::{error, info}; use sst_importer::range_overlaps; use tikv_util::{box_try, slog_panic}; @@ -80,38 +80,27 @@ impl Peer { ctx: &mut StoreContext, ssts: Box<[SstMeta]>, ) { - let mut stale_ssts = Vec::from(ssts); - let epoch = self.region().get_region_epoch(); - stale_ssts.retain(|sst| { - fail::fail_point!("on_cleanup_import_sst", |_| true); - util::is_epoch_stale(sst.get_region_epoch(), epoch) - }); - - // some sst needs to be kept if the log didn't flush the disk. - let flushed_indexes = self.storage().apply_trace().flushed_indexes(); - stale_ssts.retain(|sst| { - let off = data_cf_offset(sst.get_cf_name()); - let uuid = sst.get_uuid().to_vec(); - let sst_index = self.sst_apply_state().sst_applied_index(&uuid); - if let Some(index) = sst_index { - return flushed_indexes.as_ref()[off] >= index; - } - true - }); + let mut stale_ssts: Vec = Vec::from(ssts); + let flushed_epoch = self.storage().flushed_epoch(); + stale_ssts.retain(|sst| util::is_epoch_stale(sst.get_region_epoch(), flushed_epoch)); fail::fail_point!("on_cleanup_import_sst_schedule"); if stale_ssts.is_empty() { return; } - let uuids = stale_ssts - .iter() - .map(|sst| sst.get_uuid().to_vec()) - .collect(); - self.sst_apply_state().delete_ssts(uuids); + info!( + self.logger, + "clean up import sst file by CleanupImportSst task"; + "flushed_epoch" => ?flushed_epoch, + "stale_ssts" => ?stale_ssts); + + self.sst_apply_state().delete_ssts(&stale_ssts); let _ = ctx .schedulers .tablet - .schedule(tablet::Task::CleanupImportSst(stale_ssts.into())); + .schedule(tablet::Task::CleanupImportSst( + stale_ssts.into_boxed_slice(), + )); } } @@ -156,12 +145,9 @@ impl Apply { if let Err(e) = self.sst_importer().ingest(&infos, self.tablet()) { slog_panic!(self.logger, "ingest fail"; "ssts" => ?ssts, "error" => ?e); } + let metas: Vec = infos.iter().map(|info| info.meta.clone()).collect(); + self.sst_apply_state().register_ssts(index, metas); } - let uuids = infos - .iter() - .map(|info| info.meta.get_uuid().to_vec()) - .collect::>(); - self.set_sst_applied_index(uuids, index); self.metrics.size_diff_hint += size; self.metrics.written_bytes += size as u64; diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 9cf241d9ee6..d535b4beb42 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -44,7 +44,7 @@ use kvproto::{ raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, }; use raftstore::store::{ - ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, + util, ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; use slog::{info, trace, Logger}; use tikv_util::{box_err, slog_panic, worker::Scheduler}; @@ -57,6 +57,7 @@ use crate::{ }, raft::{Peer, Storage}, router::PeerMsg, + worker::tablet, Result, StoreRouter, }; @@ -509,6 +510,8 @@ impl Storage { for cf in ALL_CFS { lb.put_flushed_index(region_id, cf, 0, 0).unwrap(); } + write_task.flushed_epoch = + Some(self.region_state().get_region().get_region_epoch().clone()); } pub fn record_apply_trace(&mut self, write_task: &mut WriteTask) { @@ -519,6 +522,20 @@ impl Storage { } let region_id = self.region().get_id(); let raft_engine = self.entry_storage().raft_engine(); + // must use the persistent epoch to avoid epoch rollback, the restart + // logic can see ApplyTrace::recover. self.epoch is not reliable because + // it maybe too newest, so the epoch maybe rollback after the node restarted. + let epoch = raft_engine + .get_region_state(region_id, trace.admin.flushed) + .unwrap() + .unwrap() + .get_region() + .get_region_epoch() + .clone(); + if util::is_epoch_stale(self.flushed_epoch(), &epoch) { + write_task.flushed_epoch = Some(epoch); + } + let tablet_index = self.tablet_index(); let lb = write_task .extra_write @@ -533,7 +550,13 @@ impl Storage { } impl Peer { - pub fn on_data_flushed(&mut self, cf: &str, tablet_index: u64, index: u64) { + pub fn on_data_flushed( + &mut self, + ctx: &mut StoreContext, + cf: &str, + tablet_index: u64, + index: u64, + ) { trace!(self.logger, "data flushed"; "cf" => cf, "tablet_index" => tablet_index, "index" => index, "trace" => ?self.storage().apply_trace()); if tablet_index < self.storage().tablet_index() { // Stale tablet. @@ -543,6 +566,24 @@ impl Peer { let apply_trace = self.storage_mut().apply_trace_mut(); apply_trace.on_flush(cf, index); apply_trace.maybe_advance_admin_flushed(apply_index); + let stale_ssts = self.sst_apply_state().stale_ssts(cf, index); + if stale_ssts.is_empty() { + return; + } + info!( + self.logger, + "schedule delete stale ssts after flush"; + "stale_ssts" => ?stale_ssts, + "apply_index" => apply_index, + "cf" => cf, + "flushed_index" => index, + ); + let _ = ctx + .schedulers + .tablet + .schedule(tablet::Task::CleanupImportSst( + stale_ssts.into_boxed_slice(), + )); } pub fn on_data_modified(&mut self, modification: DataTrace) { diff --git a/components/raftstore-v2/src/operation/ready/async_writer.rs b/components/raftstore-v2/src/operation/ready/async_writer.rs index 733031b0ff5..c2a9427580a 100644 --- a/components/raftstore-v2/src/operation/ready/async_writer.rs +++ b/components/raftstore-v2/src/operation/ready/async_writer.rs @@ -3,7 +3,7 @@ use std::collections::VecDeque; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::raft_serverpb::RaftMessage; +use kvproto::{metapb::RegionEpoch, raft_serverpb::RaftMessage}; use raftstore::store::{ local_metrics::RaftMetrics, Config, PersistedNotifier, WriteRouter, WriteRouterContext, WriteSenders, WriteTask, @@ -24,6 +24,7 @@ struct UnpersistedReady { max_empty_number: u64, raft_msgs: Vec>, has_snapshot: bool, + flushed_epoch: Option, } /// A writer that handles asynchronous writes. @@ -73,6 +74,7 @@ impl AsyncWriter { fn send(&mut self, ctx: &mut impl WriteRouterContext, task: WriteTask) { let ready_number = task.ready_number(); let has_snapshot = task.has_snapshot; + let flushed_epoch = task.flushed_epoch.clone(); self.write_router.send_write_msg( ctx, self.unpersisted_readies.back().map(|r| r.number), @@ -83,6 +85,7 @@ impl AsyncWriter { max_empty_number: ready_number, raft_msgs: vec![], has_snapshot, + flushed_epoch, }); } @@ -111,9 +114,9 @@ impl AsyncWriter { ctx: &mut impl WriteRouterContext, ready_number: u64, logger: &Logger, - ) -> (Vec>, bool) { + ) -> (Vec>, Option, bool) { if self.persisted_number >= ready_number { - return (vec![], false); + return (vec![], None, false); } let last_unpersisted = self.unpersisted_readies.back(); @@ -128,12 +131,14 @@ impl AsyncWriter { let mut raft_messages = vec![]; let mut has_snapshot = false; + let mut flushed_epoch = None; // There must be a match in `self.unpersisted_readies`. loop { let Some(v) = self.unpersisted_readies.pop_front() else { slog_panic!(logger, "ready number not found"; "ready_number" => ready_number); }; has_snapshot |= v.has_snapshot; + if v.number > ready_number { slog_panic!( logger, @@ -142,6 +147,9 @@ impl AsyncWriter { "ready_number" => ready_number ); } + if let Some(epoch) = v.flushed_epoch { + flushed_epoch = Some(epoch.clone()); + } if raft_messages.is_empty() { raft_messages = v.raft_msgs; } else { @@ -156,7 +164,7 @@ impl AsyncWriter { self.write_router .check_new_persisted(ctx, self.persisted_number); - (raft_messages, has_snapshot) + (raft_messages, flushed_epoch, has_snapshot) } pub fn persisted_number(&self) -> u64 { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index d669ba23c08..e222f04c942 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -841,7 +841,7 @@ impl Peer { error!(self.logger, "peer id not matched"; "persisted_peer_id" => peer_id, "persisted_number" => ready_number); return; } - let (persisted_message, has_snapshot) = + let (persisted_message, flushed_epoch, has_snapshot) = self.async_writer .on_persisted(ctx, ready_number, &self.logger); for msgs in persisted_message { @@ -867,6 +867,10 @@ impl Peer { self.on_applied_snapshot(ctx); } + if let Some(flushed_epoch) = flushed_epoch { + self.storage_mut().set_flushed_epoch(&flushed_epoch); + } + self.storage_mut() .entry_storage_mut() .update_cache_persisted(persisted_index); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 31862a41a21..92b9d3d17df 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -716,6 +716,7 @@ impl Storage { lb.put_flushed_index(region_id, cf, last_index, last_index) .unwrap(); } + task.flushed_epoch = Some(self.region_state().get_region().get_region_epoch().clone()); let (path, clean_split) = match self.split_init_mut() { // If index not match, the peer may accept a newer snapshot after split. diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index c72e8be1969..dc474a69de3 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -298,8 +298,8 @@ impl Apply { } #[inline] - pub fn set_sst_applied_index(&mut self, uuid: Vec>, apply_index: u64) { - self.sst_apply_state.registe_ssts(uuid, apply_index); + pub fn sst_apply_state(&self) -> &SstApplyState { + &self.sst_apply_state } #[inline] diff --git a/components/raftstore-v2/src/raft/storage.rs b/components/raftstore-v2/src/raft/storage.rs index 407681c8f20..3e5654c1b6d 100644 --- a/components/raftstore-v2/src/raft/storage.rs +++ b/components/raftstore-v2/src/raft/storage.rs @@ -9,6 +9,7 @@ use collections::HashMap; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ metapb, + metapb::RegionEpoch, raft_serverpb::{RaftApplyState, RaftLocalState, RegionLocalState}, }; use raft::{ @@ -46,6 +47,9 @@ pub struct Storage { split_init: Option>, /// The flushed index of all CFs. apply_trace: ApplyTrace, + // The flushed epoch means that the epoch has persisted into the raft engine. + // raft epoch >= engine epoch >= flushed epoch + flushed_epoch: RegionEpoch, } impl Debug for Storage { @@ -129,6 +133,18 @@ impl Storage { pub fn has_dirty_data(&self) -> bool { self.has_dirty_data } + + #[inline] + pub fn set_flushed_epoch(&mut self, epoch: &RegionEpoch) { + if util::is_epoch_stale(&self.flushed_epoch, epoch) { + self.flushed_epoch = epoch.clone(); + } + } + + #[inline] + pub fn flushed_epoch(&self) -> &RegionEpoch { + &self.flushed_epoch + } } impl Storage { @@ -151,6 +167,7 @@ impl Storage { } }; let region = region_state.get_region(); + let epoch = region.get_region_epoch().clone(); let logger = logger.new(o!("region_id" => region.id, "peer_id" => peer.get_id())); let has_dirty_data = match engine.get_dirty_mark(region.get_id(), region_state.get_tablet_index()) { @@ -183,6 +200,7 @@ impl Storage { gen_snap_task: RefCell::new(Box::new(None)), split_init: None, apply_trace, + flushed_epoch: epoch, }) } diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 0da8d1546b5..eedd5052bbb 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -21,7 +21,10 @@ use engine_traits::{ use error_code::ErrorCodeExt; use fail::fail_point; use file_system::{set_io_type, IoType}; -use kvproto::raft_serverpb::{RaftLocalState, RaftMessage}; +use kvproto::{ + metapb::RegionEpoch, + raft_serverpb::{RaftLocalState, RaftMessage}, +}; use parking_lot::Mutex; use protobuf::Message; use raft::eraftpb::Entry; @@ -200,6 +203,7 @@ where pub messages: Vec, pub trackers: Vec, pub has_snapshot: bool, + pub flushed_epoch: Option, } impl WriteTask @@ -222,6 +226,7 @@ where trackers: vec![], persisted_cbs: Vec::new(), has_snapshot: false, + flushed_epoch: None, } } diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 6f99f90895d..ad2cb99d6a8 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -9,7 +9,7 @@ use file_system::calc_crc32; use futures::{executor::block_on, stream, SinkExt}; use grpcio::{Result, WriteFlags}; use kvproto::import_sstpb::*; -use tempfile::Builder; +use tempfile::{Builder, TempDir}; use test_raftstore::Simulator; use test_sst_importer::*; use tikv::config::TikvConfig; @@ -252,7 +252,7 @@ fn test_ingest_file_twice_and_conflict() { } #[test] -fn test_ingest_sst_v2() { +fn test_delete_sst_v2_after_epoch_stale() { let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); let cleanup_interval = Duration::from_millis(10); @@ -262,13 +262,13 @@ fn test_ingest_sst_v2() { config.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); config.server.grpc_concurrency = 1; - let (cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); + let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); let sst_path = temp_dir.path().join("test.sst"); let sst_range = (0, 100); let (mut meta, data) = gen_sst_file(sst_path, sst_range); - - // No region id and epoch. + // disable data flushed + fail::cfg("on_flush_completed", "return()").unwrap(); send_upload_sst(&import, &meta, &data).unwrap(); let mut ingest = IngestRequest::default(); ingest.set_context(ctx.clone()); @@ -280,7 +280,6 @@ fn test_ingest_sst_v2() { let resp = import.ingest(&ingest).unwrap(); assert!(!resp.has_error(), "{:?}", resp.get_error()); - fail::cfg("on_cleanup_import_sst", "return").unwrap(); let (tx, rx) = channel::<()>(); let tx = Arc::new(Mutex::new(tx)); fail::cfg_callback("on_cleanup_import_sst_schedule", move || { @@ -288,16 +287,7 @@ fn test_ingest_sst_v2() { }) .unwrap(); rx.recv_timeout(std::time::Duration::from_secs(20)).unwrap(); - let mut count = 0; - for path in &cluster.paths { - let sst_dir = path.path().join("import-sst"); - for entry in std::fs::read_dir(sst_dir).unwrap() { - let entry = entry.unwrap(); - if entry.file_type().unwrap().is_file() { - count += 1; - } - } - } + assert_eq!(1, sst_file_count(&cluster.paths)); let (tx, rx) = channel::<()>(); let tx = Arc::new(Mutex::new(tx)); @@ -305,18 +295,127 @@ fn test_ingest_sst_v2() { tx.lock().unwrap().send(()).unwrap(); }) .unwrap(); - rx.recv_timeout(std::time::Duration::from_secs(20)).unwrap(); - - fail::remove("on_update_region_keys"); - fail::remove("on_cleanup_import_sst"); - fail::remove("on_cleanup_import_sst_schedule"); - assert_ne!(0, count); - - std::thread::sleep(std::time::Duration::from_secs(1)); - + rx.recv_timeout(std::time::Duration::from_millis(100)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(100)); let region_keys = cluster .pd_client .get_region_approximate_keys(ctx.get_region_id()) .unwrap(); assert_eq!(100, region_keys); + fail::remove("on_update_region_keys"); + + // test restart cluster + cluster.stop_node(1); + cluster.start().unwrap(); + let count = sst_file_count(&cluster.paths); + assert_eq!(1, count); + + // delete sts if the region epoch is stale. + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let region = cluster.get_region(b"zk10"); + pd_client.must_split_region( + region, + kvproto::pdpb::CheckPolicy::Usekey, + vec![b"random_key1".to_vec()], + ); + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("on_cleanup_import_sst_schedule", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + rx.recv_timeout(std::time::Duration::from_millis(100)) + .unwrap(); + std::thread::sleep(std::time::Duration::from_millis(100)); + assert_eq!(0, sst_file_count(&cluster.paths)); + + // test restart cluster + cluster.stop_node(1); + cluster.start().unwrap(); + let count = sst_file_count(&cluster.paths); + assert_eq!(0, count); + fail::remove("on_flush_completed"); +} + +#[test] +fn test_delete_sst_after_applied_sst() { + // let mut cluster = test_raftstore_v2::new_server_cluster(1, 1); + let mut config = TikvConfig::default(); + config.server.addr = "127.0.0.1:0".to_owned(); + let cleanup_interval = Duration::from_millis(10); + config.raft_store.split_region_check_tick_interval.0 = cleanup_interval; + config.raft_store.pd_heartbeat_tick_interval.0 = cleanup_interval; + config.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); + config.server.grpc_concurrency = 1; + // disable data flushed + fail::cfg("on_flush_completed", "return()").unwrap(); + let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); + let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + let sst_range = (0, 100); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + // No region id and epoch. + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta.clone()); + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + + // restart node + cluster.stop_node(1); + cluster.start().unwrap(); + let count = sst_file_count(&cluster.paths); + assert_eq!(1, count); + + // flush manual + fail::remove("on_flush_completed"); + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("on_flush_completed", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + for i in 0..count { + cluster.must_put(format!("k-{}", i).as_bytes(), b"v"); + } + cluster.flush_data(); + rx.recv_timeout(std::time::Duration::from_millis(100)) + .unwrap(); + fail::remove("on_flush_completed"); + std::thread::sleep(std::time::Duration::from_millis(100)); + let count = sst_file_count(&cluster.paths); + assert_eq!(0, count); + + cluster.stop_node(1); + cluster.start().unwrap(); +} + +fn sst_file_count(paths: &Vec) -> u64 { + let mut count = 0; + for path in paths { + let sst_dir = path.path().join("import-sst"); + for entry in std::fs::read_dir(sst_dir).unwrap() { + let entry = entry.unwrap(); + if entry + .path() + .file_name() + .and_then(|n| n.to_str()) + .unwrap() + .contains("0_0_0") + { + continue; + } + if entry.file_type().unwrap().is_file() { + count += 1; + } + } + } + count } From 88bd7f1fe2cbcac31d516f76b10f1f213375a06f Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 11 Jul 2023 15:05:13 +0800 Subject: [PATCH 0787/1149] sst_import: support multiple ranges in a single switch mode request (#15069) close tikv/tikv#14916 support multiple ranges in a single switch mode request Signed-off-by: Spade A --- components/sst_importer/src/import_mode2.rs | 72 +++++++++++++-------- components/sst_importer/src/sst_importer.rs | 8 +-- src/import/sst_service.rs | 8 +-- 3 files changed, 52 insertions(+), 36 deletions(-) diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index c892d958c22..70b7d7fac5e 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -63,13 +63,13 @@ impl ImportModeSwitcherV2 { let inner = self.inner.clone(); let switcher = Arc::downgrade(&inner); let timer_loop = async move { - let mut prev_range = None; + let mut prev_ranges = vec![]; // loop until the switcher has been dropped while let Some(switcher) = switcher.upgrade() { let next_check = { let now = Instant::now(); let mut switcher = switcher.lock().unwrap(); - if let Some(range) = prev_range.take() { + for range in prev_ranges.drain(..) { if let Some(next_check) = switcher.import_mode_ranges.get(&range) { if now >= *next_check { switcher.clear_import_mode_range(range); @@ -79,9 +79,12 @@ impl ImportModeSwitcherV2 { let mut min_next_check = now + switcher.timeout; for (range, next_check) in &switcher.import_mode_ranges { - if *next_check < min_next_check { - min_next_check = *next_check; - prev_range = Some(range.clone()); + if *next_check <= min_next_check { + if *next_check < min_next_check { + min_next_check = *next_check; + prev_ranges.clear(); + } + prev_ranges.push(range.clone()); } } min_next_check @@ -96,18 +99,22 @@ impl ImportModeSwitcherV2 { executor.spawn(timer_loop); } - pub fn range_enter_import_mode(&self, range: Range) { - let range = HashRange::from(range); + pub fn ranges_enter_import_mode(&self, ranges: Vec) { let mut inner = self.inner.lock().unwrap(); let next_check = Instant::now() + inner.timeout; - // if the range exists before, the timeout is updated - inner.import_mode_ranges.insert(range, next_check); + for range in ranges { + let range = HashRange::from(range); + // if the range exists before, the timeout is updated + inner.import_mode_ranges.insert(range, next_check); + } } - pub fn clear_import_mode_range(&self, range: Range) { + pub fn clear_import_mode_range(&self, ranges: Vec) { let mut inner = self.inner.lock().unwrap(); - let range = HashRange::from(range); - inner.clear_import_mode_range(range); + for range in ranges { + let range = HashRange::from(range); + inner.clear_import_mode_range(range); + } } pub fn region_in_import_mode(&self, region: &Region) -> bool { @@ -212,7 +219,7 @@ mod test { let mut key_range = Range::default(); key_range.set_end(b"j".to_vec()); - switcher.range_enter_import_mode(key_range.clone()); + switcher.ranges_enter_import_mode(vec![key_range.clone()]); // no regions should be set in import mode for i in 1..=5 { assert!(!switcher.region_in_import_mode(®ions[i - 1])); @@ -225,7 +232,7 @@ mod test { // region 1 2 3 should be included key_range.set_start(b"k09".to_vec()); key_range.set_end(b"k21".to_vec()); - switcher.range_enter_import_mode(key_range.clone()); + switcher.ranges_enter_import_mode(vec![key_range.clone()]); for i in 1..=3 { assert!(switcher.region_in_import_mode(®ions[i - 1])); } @@ -237,12 +244,12 @@ mod test { // region 3 4 5 should be included key_range2.set_start(b"k29".to_vec()); key_range2.set_end(b"".to_vec()); - switcher.range_enter_import_mode(key_range2.clone()); + switcher.ranges_enter_import_mode(vec![key_range2.clone()]); for i in 1..=5 { assert!(switcher.region_in_import_mode(®ions[i - 1])); } - switcher.clear_import_mode_range(key_range); + switcher.clear_import_mode_range(vec![key_range]); for i in 1..=2 { assert!(!switcher.region_in_import_mode(®ions[i - 1])); } @@ -250,7 +257,7 @@ mod test { assert!(switcher.region_in_import_mode(®ions[i - 1])); } - switcher.clear_import_mode_range(key_range2); + switcher.clear_import_mode_range(vec![key_range2]); for i in 3..=5 { assert!(!switcher.region_in_import_mode(®ions[i - 1])); } @@ -259,7 +266,7 @@ mod test { #[test] fn test_import_mode_timeout() { let cfg = Config { - import_mode_timeout: ReadableDuration::millis(300), + import_mode_timeout: ReadableDuration::millis(700), ..Config::default() }; @@ -277,29 +284,38 @@ mod test { region2.set_id(2); region2.set_start_key(b"k3".to_vec()); region2.set_end_key(b"k5".to_vec()); + let mut region3 = Region::default(); + region3.set_id(3); + region3.set_start_key(b"k5".to_vec()); + region3.set_end_key(b"k7".to_vec()); let mut key_range = Range::default(); key_range.set_start(b"k2".to_vec()); key_range.set_end(b"k4".to_vec()); - switcher.range_enter_import_mode(key_range); + let mut key_range2 = Range::default(); + key_range2.set_start(b"k5".to_vec()); + key_range2.set_end(b"k8".to_vec()); + switcher.ranges_enter_import_mode(vec![key_range, key_range2.clone()]); assert!(switcher.region_in_import_mode(®ion)); assert!(switcher.region_in_import_mode(®ion2)); + assert!(switcher.region_in_import_mode(®ion3)); switcher.start(threads.handle()); - thread::sleep(Duration::from_secs(1)); - threads.block_on(tokio::task::yield_now()); + thread::sleep(Duration::from_millis(400)); + // renew the timeout of key_range2 + switcher.ranges_enter_import_mode(vec![key_range2]); + thread::sleep(Duration::from_millis(400)); - let mut key_range = Range::default(); - key_range.set_start(b"k4".to_vec()); - key_range.set_end(b"k5".to_vec()); - switcher.range_enter_import_mode(key_range); + threads.block_on(tokio::task::yield_now()); + // the range covering region and region2 should be cleared due to timeout. assert!(!switcher.region_in_import_mode(®ion)); - assert!(switcher.region_in_import_mode(®ion2)); + assert!(!switcher.region_in_import_mode(®ion2)); + assert!(switcher.region_in_import_mode(®ion3)); - thread::sleep(Duration::from_secs(1)); + thread::sleep(Duration::from_millis(400)); threads.block_on(tokio::task::yield_now()); - assert!(!switcher.region_in_import_mode(®ion2)); + assert!(!switcher.region_in_import_mode(®ion3)); } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 787e562082a..e39c423469e 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -222,17 +222,17 @@ impl SstImporter { }) } - pub fn range_enter_import_mode(&self, range: Range) { + pub fn ranges_enter_import_mode(&self, ranges: Vec) { if let Either::Right(ref switcher) = self.switcher { - switcher.range_enter_import_mode(range) + switcher.ranges_enter_import_mode(ranges) } else { unreachable!(); } } - pub fn clear_import_mode_regions(&self, range: Range) { + pub fn clear_import_mode_regions(&self, ranges: Vec) { if let Either::Right(ref switcher) = self.switcher { - switcher.clear_import_mode_range(range); + switcher.clear_import_mode_range(ranges); } else { unreachable!(); } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 11f47a4a624..3860eba5d38 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -724,8 +724,8 @@ impl ImportSst for ImportSstService { LocalTablets::Registry(_) => { if req.get_mode() == SwitchMode::Import { if !req.get_ranges().is_empty() { - let range = req.take_ranges().into_vec().swap_remove(0); - self.importer.range_enter_import_mode(range); + let ranges = req.take_ranges().to_vec(); + self.importer.ranges_enter_import_mode(ranges); Ok(true) } else { Err(sst_importer::Error::Engine( @@ -736,8 +736,8 @@ impl ImportSst for ImportSstService { } else { // case SwitchMode::Normal if !req.get_ranges().is_empty() { - let range = req.take_ranges().into_vec().swap_remove(0); - self.importer.clear_import_mode_regions(range); + let ranges = req.take_ranges().to_vec(); + self.importer.clear_import_mode_regions(ranges); Ok(true) } else { Err(sst_importer::Error::Engine( From f7b0a517e6d084e5288d298e2af61cfb20118a13 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 12 Jul 2023 16:33:44 +0800 Subject: [PATCH 0788/1149] raftstore: support dynamically scale snap-generator-pool-size (#15076) close tikv/tikv#14325 support dynamically scale snap-generator-pool-size Signed-off-by: Spade A --- components/raftstore/src/store/config.rs | 8 +- components/raftstore/src/store/fsm/store.rs | 5 + .../src/store/worker/refresh_config.rs | 38 ++++- .../raftstore/src/store/worker/region.rs | 7 +- components/test_raftstore-v2/src/node.rs | 10 +- .../test_raftstore/src/common-test.toml | 1 + components/test_raftstore/src/node.rs | 10 +- .../integrations/raftstore/test_scale_pool.rs | 154 ++++++++++++++++-- 8 files changed, 210 insertions(+), 23 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 46eda405767..0dc5c6b7925 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -218,7 +218,6 @@ pub struct Config { #[online_config(hidden)] pub use_delete_range: bool, - #[online_config(skip)] pub snap_generator_pool_size: usize, pub cleanup_import_sst_interval: ReadableDuration, @@ -1178,6 +1177,13 @@ impl ConfigManager for RaftstoreConfigManager { error!("raftstore configuration manager schedule to resize store-io-pool-size work task failed"; "err"=> ?e); } } + if let Some(ConfigValue::Usize(resize_reader_size)) = change.get("snap_generator_pool_size") + { + let resize_reader_task = RefreshConfigTask::ScaleAsyncReader(*resize_reader_size); + if let Err(e) = self.scheduler.schedule(resize_reader_task) { + error!("raftstore configuration manager schedule to resize snap-generator-pool-size work task failed"; "err"=> ?e); + } + } info!( "raftstore config changed"; "change" => ?change, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 0af232db86f..62ddc0b43db 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -62,6 +62,7 @@ use tikv_util::{ timer::SteadyTimer, warn, worker::{LazyWorker, Scheduler, Worker}, + yatp_pool::FuturePool, Either, RingQueue, }; use time::{self, Timespec}; @@ -1594,6 +1595,7 @@ impl RaftBatchSystem { self.router(), Some(Arc::clone(&pd_client)), ); + let snap_generator_pool = region_runner.snap_generator_pool(); let region_scheduler = workers .region_worker .start_with_timer("snapshot-worker", region_runner); @@ -1681,6 +1683,7 @@ impl RaftBatchSystem { region_read_progress, health_service, causal_ts_provider, + snap_generator_pool, )?; Ok(()) } @@ -1698,6 +1701,7 @@ impl RaftBatchSystem { region_read_progress: RegionReadProgressRegistry, health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 + snap_generator_pool: FuturePool, ) -> Result<()> { let cfg = builder.cfg.value().clone(); let store = builder.store.clone(); @@ -1770,6 +1774,7 @@ impl RaftBatchSystem { self.router.router.clone(), self.apply_system.build_pool_state(apply_builder), self.system.build_pool_state(raft_builder), + snap_generator_pool, ); assert!(workers.refresh_config_worker.start(refresh_config_runner)); diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index 722a121850b..dae9fafd4ef 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -8,7 +8,8 @@ use std::{ use batch_system::{BatchRouter, Fsm, FsmTypes, HandlerBuilder, Poller, PoolState, Priority}; use file_system::{set_io_type, IoType}; use tikv_util::{ - debug, error, info, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, worker::Runnable, + debug, error, info, safe_panic, sys::thread::StdThreadBuildWrapper, thd_name, warn, + worker::Runnable, yatp_pool::FuturePool, }; use crate::store::{ @@ -187,6 +188,7 @@ pub enum Task { ScalePool(BatchComponent, usize), ScaleBatchSize(BatchComponent, usize), ScaleWriters(usize), + ScaleAsyncReader(usize), } impl Display for Task { @@ -201,6 +203,9 @@ impl Display for Task { Task::ScaleWriters(size) => { write!(f, "Scale store_io_pool_size adjusts {} ", size) } + Task::ScaleAsyncReader(size) => { + write!(f, "Scale snap_generator_pool_size adjusts {} ", size) + } } } } @@ -216,6 +221,7 @@ where writer_ctrl: WriterContoller>, apply_pool: PoolController, ControlFsm, AH>, raft_pool: PoolController, StoreFsm, RH>, + snap_generator_pool: FuturePool, } impl Runner @@ -233,6 +239,7 @@ where raft_router: BatchRouter, StoreFsm>, apply_pool_state: PoolState, ControlFsm, AH>, raft_pool_state: PoolState, StoreFsm, RH>, + snap_generator_pool: FuturePool, ) -> Self { let writer_ctrl = WriterContoller::new(writer_meta, store_writers); let apply_pool = PoolController::new(apply_router, apply_pool_state); @@ -242,6 +249,7 @@ where writer_ctrl, apply_pool, raft_pool, + snap_generator_pool, } } @@ -308,6 +316,31 @@ where "to" => size ); } + + fn resize_snap_generator_read_pool(&mut self, size: usize) { + let current_pool_size = self.snap_generator_pool.get_pool_size(); + // It may not take effect immediately. See comments of + // ThreadPool::scale_workers. + // Also, the size will be clamped between min_thread_count and the max_pool_size + // set when the pool is initialized. This is fine as max_pool_size + // is relatively a large value. + self.snap_generator_pool.scale_pool_size(size); + let (min_thread_count, max_thread_count) = self.snap_generator_pool.thread_count_limit(); + if size > max_thread_count || size < min_thread_count { + warn!( + "apply pool scale size is out of bound, and the size is clamped"; + "size" => size, + "min_thread_limit" => min_thread_count, + "max_thread_count" => max_thread_count, + ); + } else { + info!( + "resize apply pool"; + "from" => current_pool_size, + "to" => size, + ); + } + } } impl Runnable for Runner @@ -335,6 +368,9 @@ where } }, Task::ScaleWriters(size) => self.resize_store_writers(size), + Task::ScaleAsyncReader(size) => { + self.resize_snap_generator_read_pool(size); + } } } } diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index e3b2dfaef76..01f2411e849 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -51,6 +51,7 @@ use crate::{ }; const CLEANUP_MAX_REGION_COUNT: usize = 64; +const SNAP_GENERATOR_MAX_POOL_SIZE: usize = 16; const TIFLASH: &str = "tiflash"; const ENGINE: &str = "engine"; @@ -409,12 +410,16 @@ where .thread_count( 1, cfg.value().snap_generator_pool_size, - cfg.value().snap_generator_pool_size, + SNAP_GENERATOR_MAX_POOL_SIZE, ) .build_future_pool(), } } + pub fn snap_generator_pool(&self) -> FuturePool { + self.pool.clone() + } + fn region_state(&self, region_id: u64) -> Result { let region_key = keys::region_state_key(region_id); let region_state: RegionLocalState = diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 2d33a98b35d..d0e4c37cc49 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -151,7 +151,7 @@ pub struct NodeCluster { simulate_trans: HashMap>, concurrency_managers: HashMap, snap_mgrs: HashMap, - cfg_controller: Option, + cfg_controller: HashMap, } impl NodeCluster { @@ -163,7 +163,7 @@ impl NodeCluster { simulate_trans: HashMap::default(), concurrency_managers: HashMap::default(), snap_mgrs: HashMap::default(), - cfg_controller: None, + cfg_controller: HashMap::default(), } } @@ -171,8 +171,8 @@ impl NodeCluster { self.concurrency_managers.get(&node_id).unwrap().clone() } - pub fn get_cfg_controller(&self) -> Option<&ConfigController> { - self.cfg_controller.as_ref() + pub fn get_cfg_controller(&self, node_id: u64) -> Option<&ConfigController> { + self.cfg_controller.get(&node_id) } } @@ -369,7 +369,7 @@ impl Simulator for NodeCluster { self.nodes.insert(node_id, node); self.simulate_trans.insert(node_id, simulate_trans); - self.cfg_controller = Some(cfg_controller); + self.cfg_controller.insert(node_id, cfg_controller); Ok(node_id) } diff --git a/components/test_raftstore/src/common-test.toml b/components/test_raftstore/src/common-test.toml index 334291f7213..7eace3ac745 100644 --- a/components/test_raftstore/src/common-test.toml +++ b/components/test_raftstore/src/common-test.toml @@ -72,6 +72,7 @@ store-io-pool-size = 0 apply-pool-size = 1 store-pool-size = 1 +snap-generator-pool-size = 2 [coprocessor] [rocksdb] diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 7188ab98d98..44cff8d9413 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -155,7 +155,7 @@ pub struct NodeCluster { pd_client: Arc, nodes: HashMap>, snap_mgrs: HashMap, - cfg_controller: Option, + cfg_controller: HashMap, simulate_trans: HashMap, concurrency_managers: HashMap, #[allow(clippy::type_complexity)] @@ -169,7 +169,7 @@ impl NodeCluster { pd_client, nodes: HashMap::default(), snap_mgrs: HashMap::default(), - cfg_controller: None, + cfg_controller: HashMap::default(), simulate_trans: HashMap::default(), concurrency_managers: HashMap::default(), post_create_coprocessor_host: None, @@ -215,8 +215,8 @@ impl NodeCluster { self.concurrency_managers.get(&node_id).unwrap().clone() } - pub fn get_cfg_controller(&self) -> Option<&ConfigController> { - self.cfg_controller.as_ref() + pub fn get_cfg_controller(&self, node_id: u64) -> Option<&ConfigController> { + self.cfg_controller.get(&node_id) } } @@ -391,7 +391,7 @@ impl Simulator for NodeCluster { .routers .insert(node_id, SimulateTransport::new(router)); self.nodes.insert(node_id, node); - self.cfg_controller = Some(cfg_controller); + self.cfg_controller.insert(node_id, cfg_controller); self.simulate_trans.insert(node_id, simulate_trans); Ok(node_id) diff --git a/tests/integrations/raftstore/test_scale_pool.rs b/tests/integrations/raftstore/test_scale_pool.rs index e27e6939a2a..6d210c6c764 100644 --- a/tests/integrations/raftstore/test_scale_pool.rs +++ b/tests/integrations/raftstore/test_scale_pool.rs @@ -7,11 +7,14 @@ use std::{ }; use engine_traits::{MiscExt, Peekable}; +use raft::prelude::MessageType; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::config::ConfigurableDb; use tikv_util::{ + config::ReadableDuration, sys::thread::{self, Pid}, + time::Instant, HandyRwLock, }; @@ -33,7 +36,7 @@ fn test_increase_pool() { { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); @@ -88,7 +91,7 @@ fn test_increase_pool_v2() { { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); @@ -155,7 +158,7 @@ fn test_decrease_pool() { { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); change.insert("raftstore.store_pool_size".to_owned(), "1".to_owned()); @@ -220,7 +223,7 @@ fn test_increase_apply_pool_v2() { { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); change.insert("raftstore.apply-pool-size".to_owned(), "2".to_owned()); @@ -248,7 +251,7 @@ fn test_decrease_apply_pool_v2() { { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); change.insert("raftstore.apply-pool-size".to_owned(), "1".to_owned()); @@ -282,7 +285,7 @@ fn test_decrease_pool_v2() { { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); change.insert("raftstore.store_pool_size".to_owned(), "1".to_owned()); @@ -350,7 +353,7 @@ fn test_increase_async_ios() { // Update config, expand from 1 to 2 { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); @@ -394,7 +397,7 @@ fn test_decrease_async_ios() { // Update config, shrink from 4 to 1 { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); change.insert("raftstore.store-io-pool-size".to_owned(), "1".to_owned()); @@ -443,7 +446,7 @@ fn test_resize_async_ios_failed_1() { // sync-mode(async-ios == 0). { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); @@ -486,7 +489,7 @@ fn test_resize_async_ios_failed_2() { // async-mode(async-ios == 2). { let sim = cluster.sim.rl(); - let cfg_controller = sim.get_cfg_controller().unwrap(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); let change = { let mut change = HashMap::new(); @@ -556,3 +559,134 @@ fn test_adjust_hight_priority_background_threads() { h.join().unwrap(); h2.join().unwrap(); } + +#[test] +fn test_increase_snap_generator_pool_size() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.right_derive_when_split = false; + cluster.cfg.raft_store.snap_generator_pool_size = 1; + cluster.cfg.raft_store.raft_log_gc_threshold = 20; + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(20); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration(Duration::from_millis(50)); + cluster.run(); + // wait for yatp threads to sleep + std::thread::sleep(Duration::from_millis(200)); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"key0020"); + let id1 = cluster.get_region(b"").get_id(); + let id2 = cluster.get_region(b"key0020").get_id(); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(id1, 2) + .msg_type(MessageType::MsgAppend) + .direction(Direction::Recv), + )); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(id2, 2) + .msg_type(MessageType::MsgAppend) + .direction(Direction::Recv), + )); + + fail::cfg("before_region_gen_snap", "1*pause").unwrap(); + + for i in 0..20 { + let key = format!("key{:04}", i); + cluster.must_put(key.as_bytes(), b"val"); + } + + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); + let change = { + let mut change = HashMap::new(); + change.insert( + "raftstore.snap-generator-pool-size".to_owned(), + "2".to_owned(), + ); + change + }; + + cfg_controller.update(change).unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + let engine = cluster.get_engine(2); + assert!(engine.get_value(b"zkey0001").unwrap().is_none()); + + for i in 20..40 { + let key = format!("key{:04}", i); + cluster.must_put(key.as_bytes(), b"val"); + } + + std::thread::sleep(std::time::Duration::from_millis(500)); + let t = Instant::now(); + while t.saturating_elapsed() < Duration::from_secs(1) { + let val = engine.get_value(b"zkey0030").unwrap(); + if val.is_some() { + assert_eq!(val.unwrap(), b"val"); + break; + } + } + assert!(engine.get_value(b"zkey0001").unwrap().is_none()); + + fail::remove("before_region_gen_snap"); +} + +#[test] +fn test_decrease_snap_generator_pool_size() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.right_derive_when_split = false; + cluster.cfg.raft_store.snap_generator_pool_size = 2; + cluster.cfg.raft_store.raft_log_gc_threshold = 20; + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(20); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration(Duration::from_millis(50)); + cluster.run(); + // wait for yatp threads to sleep + std::thread::sleep(Duration::from_millis(200)); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 2) + .msg_type(MessageType::MsgAppend) + .direction(Direction::Recv), + )); + fail::cfg("before_region_gen_snap", "1*pause").unwrap(); + + { + let sim = cluster.sim.rl(); + let cfg_controller = sim.get_cfg_controller(1).unwrap(); + let change = { + let mut change = HashMap::new(); + change.insert( + "raftstore.snap-generator-pool-size".to_owned(), + "0".to_owned(), + ); + change + }; + cfg_controller.update(change).unwrap_err(); + + let change = { + let mut change = HashMap::new(); + change.insert( + "raftstore.snap-generator-pool-size".to_owned(), + "1".to_owned(), + ); + change + }; + + cfg_controller.update(change).unwrap(); + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + for i in 0..20 { + let key = format!("key{:04}", i); + cluster.must_put(key.as_bytes(), b"val"); + } + + std::thread::sleep(std::time::Duration::from_secs(2)); + let engine = cluster.get_engine(2); + assert!(engine.get_value(b"zkey0001").unwrap().is_none()); + + fail::remove("before_region_gen_snap"); +} From 372da724b85421adca55cdc6b9a18b5129945a10 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 13 Jul 2023 01:54:43 +0800 Subject: [PATCH 0789/1149] resource_control: support auto adjust background quota limit (#14980) ref tikv/tikv#14900 Signed-off-by: glorv --- Cargo.lock | 1 + components/file_system/src/lib.rs | 3 +- components/resource_control/Cargo.toml | 1 + components/resource_control/src/future.rs | 8 +- components/resource_control/src/lib.rs | 1 + .../resource_control/src/resource_group.rs | 39 +- .../resource_control/src/resource_limiter.rs | 51 ++- components/resource_control/src/worker.rs | 425 ++++++++++++++++++ components/server/src/server.rs | 7 + components/server/src/server2.rs | 7 + 10 files changed, 517 insertions(+), 26 deletions(-) create mode 100644 components/resource_control/src/worker.rs diff --git a/Cargo.lock b/Cargo.lock index 3b0863e6e14..859c69b6a09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4995,6 +4995,7 @@ dependencies = [ "serde", "slog", "slog-global", + "strum 0.20.0", "test_pd", "test_pd_client", "tikv_util", diff --git a/components/file_system/src/lib.rs b/components/file_system/src/lib.rs index 7bf1a45dcb1..1c5577f361a 100644 --- a/components/file_system/src/lib.rs +++ b/components/file_system/src/lib.rs @@ -35,7 +35,8 @@ use std::{ pub use file::{File, OpenOptions}; pub use io_stats::{ - get_io_type, get_thread_io_bytes_total, init as init_io_stats_collector, set_io_type, + fetch_io_bytes, get_io_type, get_thread_io_bytes_total, init as init_io_stats_collector, + set_io_type, }; pub use metrics_manager::{BytesFetcher, MetricsManager}; use online_config::ConfigValue; diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 1ddb633ab15..c554b5a32ab 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -27,6 +27,7 @@ protobuf = { version = "2.8", features = ["bytes"] } serde = { version = "1.0", features = ["derive"] } slog = { workspace = true } slog-global = { workspace = true } +strum = { version = "0.20", features = ["derive"] } test_pd = { workspace = true } test_pd_client = { workspace = true } tikv_util = { workspace = true } diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 79e976c2699..0f0bd38c212 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -220,7 +220,7 @@ mod tests { use tikv_util::yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}; use super::*; - use crate::resource_limiter::GroupStatistics; + use crate::resource_limiter::{GroupStatistics, ResourceType::Io}; #[pin_project] struct NotifyFuture { @@ -275,7 +275,7 @@ mod tests { loop { i += 1; spawn_and_wait(&pool, empty(), resource_limiter.clone()); - stats = resource_limiter.io_limiter.get_statistics(); + stats = resource_limiter.get_limiter(Io).get_statistics(); assert_eq!(stats.total_consumed, i * 150); if stats.total_wait_dur_us > 0 { break; @@ -284,7 +284,7 @@ mod tests { let start = Instant::now(); spawn_and_wait(&pool, empty(), resource_limiter.clone()); - let new_stats = resource_limiter.io_limiter.get_statistics(); + let new_stats = resource_limiter.get_limiter(Io).get_statistics(); let delta = new_stats - stats; let dur = start.saturating_elapsed(); assert_eq!(delta.total_consumed, 150); @@ -296,7 +296,7 @@ mod tests { { fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); spawn_and_wait(&pool, empty(), resource_limiter.clone()); - assert_eq!(resource_limiter.io_limiter.get_statistics(), new_stats); + assert_eq!(resource_limiter.get_limiter(Io).get_statistics(), new_stats); fail::remove("failed_to_get_thread_io_bytes_stats"); } } diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 2fbb25c3394..05330339163 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -24,6 +24,7 @@ pub mod channel; pub use channel::ResourceMetered; mod resource_limiter; +pub mod worker; #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 9c7d0e682f1..16109a5436a 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -57,7 +57,7 @@ pub enum ResourceConsumeType { /// ResourceGroupManager manages the metadata of each resource group. pub struct ResourceGroupManager { - resource_groups: DashMap, + pub(crate) resource_groups: DashMap, registry: RwLock>>, } @@ -127,6 +127,12 @@ impl ResourceGroupManager { } fn build_resource_limiter(rg: &PbResourceGroup) -> Option> { + #[cfg(test)] + { + if rg.name.contains("background") { + return Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY))); + } + } // TODO: only the "default" resource group support background tasks currently. if rg.name == DEFAULT_RESOURCE_GROUP_NAME { return Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY))); @@ -211,11 +217,22 @@ impl ResourceGroupManager { ); } } + + pub fn get_resource_limiter(&self, rg: &str) -> Option> { + if let Some(g) = self.resource_groups.get(rg) { + return g.limiter.clone(); + } + + // fallback to the default resource group if target group doesn't exist. + self.resource_groups + .get(DEFAULT_RESOURCE_GROUP_NAME) + .and_then(|g| g.limiter.clone()) + } } pub(crate) struct ResourceGroup { - group: PbResourceGroup, - limiter: Option>, + pub group: PbResourceGroup, + pub limiter: Option>, } impl ResourceGroup { @@ -223,7 +240,6 @@ impl ResourceGroup { Self { group, limiter } } - #[cfg(test)] pub(crate) fn get_ru_quota(&self) -> u64 { assert!(self.group.has_r_u_settings()); self.group @@ -619,10 +635,10 @@ impl GroupPriorityTracker { #[cfg(test)] pub(crate) mod tests { - use rand::{thread_rng, RngCore}; use yatp::queue::Extras; use super::*; + use crate::resource_limiter::ResourceType::{Cpu, Io}; pub fn new_resource_group_ru(name: String, ru: u64, group_priority: u32) -> PbResourceGroup { new_resource_group(name, true, ru, ru, group_priority) @@ -741,10 +757,10 @@ pub(crate) mod tests { assert!(group1.limiter.is_none()); let default_group = resource_manager.get_resource_group("default").unwrap(); let limiter = default_group.limiter.as_ref().unwrap().clone(); - assert!(limiter.cpu_limiter.get_rate_limit().is_infinite()); - assert!(limiter.io_limiter.get_rate_limit().is_infinite()); - limiter.cpu_limiter.set_rate_limit(100.0); - limiter.io_limiter.set_rate_limit(200.0); + assert!(limiter.get_limiter(Cpu).get_rate_limit().is_infinite()); + assert!(limiter.get_limiter(Io).get_rate_limit().is_infinite()); + limiter.get_limiter(Cpu).set_rate_limit(100.0); + limiter.get_limiter(Io).set_rate_limit(200.0); drop(group1); drop(default_group); @@ -755,8 +771,8 @@ pub(crate) mod tests { assert_eq!(default_group.get_ru_quota(), 100); let new_limiter = default_group.limiter.as_ref().unwrap().clone(); // check rate_limiter is not changed. - assert_eq!(new_limiter.cpu_limiter.get_rate_limit(), 100.0); - assert_eq!(new_limiter.io_limiter.get_rate_limit(), 200.0); + assert_eq!(new_limiter.get_limiter(Cpu).get_rate_limit(), 100.0); + assert_eq!(new_limiter.get_limiter(Io).get_rate_limit(), 200.0); assert_eq!(&*new_limiter as *const _, &*limiter as *const _); } @@ -923,6 +939,7 @@ pub(crate) mod tests { #[cfg(feature = "failpoints")] #[test] fn test_reset_resource_group_vt_overflow() { + use rand::{thread_rng, RngCore}; let resource_manager = ResourceGroupManager::default(); let resource_ctl = resource_manager.derive_controller("test_write".into(), false); let mut rng = thread_rng(); diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 1c41ea79893..29218da7b6b 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -1,33 +1,55 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + fmt, sync::atomic::{AtomicU64, Ordering}, time::Duration, }; +use strum::EnumCount; use tikv_util::time::Limiter; +#[derive(Clone, Copy, Eq, PartialEq, EnumCount)] +#[repr(usize)] +pub enum ResourceType { + Cpu, + Io, +} + +impl fmt::Debug for ResourceType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + ResourceType::Cpu => write!(f, "cpu"), + ResourceType::Io => write!(f, "io"), + } + } +} + pub struct ResourceLimiter { - #[allow(dead_code)] - pub(crate) cpu_limiter: QuotaLimiter, - #[allow(dead_code)] - pub(crate) io_limiter: QuotaLimiter, + limiters: [QuotaLimiter; ResourceType::COUNT], } impl ResourceLimiter { pub fn new(cpu_limit: f64, io_limit: f64) -> Self { + let cpu_limiter = QuotaLimiter::new(cpu_limit); + let io_limiter = QuotaLimiter::new(io_limit); Self { - cpu_limiter: QuotaLimiter::new(cpu_limit), - io_limiter: QuotaLimiter::new(io_limit), + limiters: [cpu_limiter, io_limiter], } } #[allow(dead_code)] pub fn consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { - let cpu_dur = self.cpu_limiter.consume(cpu_time.as_micros() as u64); - let io_dur = self.io_limiter.consume(io_bytes); + let cpu_dur = + self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); + let io_dur = self.limiters[ResourceType::Io as usize].consume(io_bytes); cpu_dur.max(io_dur) } + + #[inline] + pub(crate) fn get_limiter(&self, ty: ResourceType) -> &QuotaLimiter { + &self.limiters[ty as usize] + } } pub(crate) struct QuotaLimiter { @@ -46,12 +68,10 @@ impl QuotaLimiter { } } - #[cfg(test)] pub(crate) fn get_rate_limit(&self) -> f64 { self.limiter.speed_limit() } - #[cfg(test)] pub(crate) fn set_rate_limit(&self, mut limit: f64) { // treat 0 as infinity. if limit <= f64::EPSILON { @@ -97,3 +117,14 @@ impl std::ops::Sub for GroupStatistics { } } } + +impl std::ops::Div for GroupStatistics { + type Output = Self; + + fn div(self, rhs: f64) -> Self::Output { + Self { + total_consumed: (self.total_consumed as f64 / rhs) as u64, + total_wait_dur_us: (self.total_wait_dur_us as f64 / rhs) as u64, + } + } +} diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs new file mode 100644 index 00000000000..111c00ca394 --- /dev/null +++ b/components/resource_control/src/worker.rs @@ -0,0 +1,425 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{array, collections::HashMap, io::Result as IoResult, sync::Arc, time::Duration}; + +use file_system::{fetch_io_bytes, IoBytes, IoType}; +use strum::EnumCount; +use tikv_util::{ + sys::{cpu_time::ProcessStat, SysQuota}, + time::Instant, + warn, +}; + +use crate::{ + resource_group::ResourceGroupManager, + resource_limiter::{GroupStatistics, ResourceLimiter, ResourceType}, +}; + +pub const BACKGROUND_LIMIT_ADJUST_DURATION: Duration = Duration::from_secs(10); + +const MICROS_PER_SEC: f64 = 1_000_000.0; + +pub struct ResourceUsageStats { + total_quota: f64, + current_used: f64, +} + +pub trait ResourceStatsProvider { + fn get_current_stats(&mut self, _t: ResourceType) -> IoResult; +} + +pub struct SysQuotaGetter { + process_stat: ProcessStat, + prev_io_stats: [IoBytes; IoType::COUNT], + prev_io_ts: Instant, + io_bandwidth: u64, +} + +impl ResourceStatsProvider for SysQuotaGetter { + fn get_current_stats(&mut self, ty: ResourceType) -> IoResult { + match ty { + ResourceType::Cpu => { + let total_quota = SysQuota::cpu_cores_quota(); + self.process_stat.cpu_usage().map(|u| ResourceUsageStats { + // cpu is measured in us. + total_quota: total_quota * MICROS_PER_SEC, + current_used: u * MICROS_PER_SEC, + }) + } + ResourceType::Io => { + let mut stats = ResourceUsageStats { + total_quota: self.io_bandwidth as f64, + current_used: 0.0, + }; + let now = Instant::now_coarse(); + let dur = now.saturating_duration_since(self.prev_io_ts).as_secs_f64(); + if dur < 0.1 { + return Ok(stats); + } + let new_io_stats = fetch_io_bytes(); + let total_io_used = self + .prev_io_stats + .iter() + .zip(new_io_stats.iter()) + .map(|(s, new_s)| { + let delta = *new_s - *s; + delta.read + delta.write + }) + .sum::(); + self.prev_io_stats = new_io_stats; + self.prev_io_ts = now; + + stats.current_used = total_io_used as f64 / dur; + Ok(stats) + } + } + } +} + +pub struct GroupQuotaAdjustWorker { + prev_stats_by_group: [HashMap; ResourceType::COUNT], + last_adjust_time: Instant, + resource_ctl: Arc, + is_last_time_low_load: [bool; ResourceType::COUNT], + resource_quota_getter: R, +} + +impl GroupQuotaAdjustWorker { + pub fn new(resource_ctl: Arc, io_bandwidth: u64) -> Self { + let resource_quota_getter = SysQuotaGetter { + process_stat: ProcessStat::cur_proc_stat().unwrap(), + prev_io_stats: [IoBytes::default(); IoType::COUNT], + prev_io_ts: Instant::now_coarse(), + io_bandwidth, + }; + Self::with_quota_getter(resource_ctl, resource_quota_getter) + } +} + +impl GroupQuotaAdjustWorker { + pub fn with_quota_getter( + resource_ctl: Arc, + resource_quota_getter: R, + ) -> Self { + let prev_stats_by_group = array::from_fn(|_| HashMap::default()); + Self { + prev_stats_by_group, + last_adjust_time: Instant::now_coarse(), + resource_ctl, + resource_quota_getter, + is_last_time_low_load: array::from_fn(|_| false), + } + } + + pub fn adjust_quota(&mut self) { + let now = Instant::now_coarse(); + let dur_secs = now + .saturating_duration_since(self.last_adjust_time) + .as_secs_f64(); + // a conservative check, skip adjustment if the duration is too short. + if dur_secs < 1.0 { + return; + } + self.last_adjust_time = now; + + let mut background_groups: Vec<_> = self + .resource_ctl + .resource_groups + .iter() + .filter_map(|kv| { + let g = kv.value(); + g.limiter.as_ref().map(|limiter| GroupStats { + name: g.group.name.clone(), + ru_quota: g.get_ru_quota() as f64, + limiter: limiter.clone(), + stats_per_sec: GroupStatistics::default(), + expect_cost_rate: 0.0, + }) + }) + .collect(); + if background_groups.is_empty() { + return; + } + + self.do_adjust(ResourceType::Cpu, dur_secs, &mut background_groups); + self.do_adjust(ResourceType::Io, dur_secs, &mut background_groups); + } + + fn do_adjust( + &mut self, + resource_type: ResourceType, + dur_secs: f64, + bg_group_stats: &mut [GroupStats], + ) { + let resource_stats = match self.resource_quota_getter.get_current_stats(resource_type) { + Ok(r) => r, + Err(e) => { + warn!("get resource statistics info failed, skip adjust"; "type" => ?resource_type, "err" => ?e); + return; + } + }; + // if total resource quota is unlimited, set all groups' limit to unlimited. + if resource_stats.total_quota <= f64::EPSILON { + for g in bg_group_stats { + g.limiter + .get_limiter(resource_type) + .set_rate_limit(f64::INFINITY); + } + return; + } + + let mut total_ru_quota = 0.0; + let mut background_consumed_total = 0.0; + let mut has_wait = false; + for g in bg_group_stats.iter_mut() { + total_ru_quota += g.ru_quota; + let total_stats = g.limiter.get_limiter(resource_type).get_statistics(); + let stats_per_sec = (total_stats + - self.prev_stats_by_group[resource_type as usize] + .insert(g.name.clone(), total_stats) + .unwrap_or_default()) + / dur_secs; + background_consumed_total += stats_per_sec.total_consumed as f64; + g.stats_per_sec = stats_per_sec; + if stats_per_sec.total_wait_dur_us > 0 { + has_wait = true; + } + } + + // fast path if process cpu is low + let is_low_load = resource_stats.current_used <= (resource_stats.total_quota * 0.1); + if is_low_load && !has_wait && self.is_last_time_low_load[resource_type as usize] { + return; + } + self.is_last_time_low_load[resource_type as usize] = is_low_load; + + // the available resource for background tasks is defined as: + // (total_resource_quota - foreground_task_used). foreground_task_used + // resource is calculated by: (resource_current_total_used - + // background_consumed_total). We reserve 10% of the free resources for + // foreground tasks in case the fore ground traffics increases. + let mut available_resource_rate = ((resource_stats.total_quota + - resource_stats.current_used + + background_consumed_total) + * 0.9) + .max(resource_stats.total_quota * 0.1); + let mut total_expected_cost = 0.0; + for g in bg_group_stats.iter_mut() { + let mut rate_limit = g.limiter.get_limiter(resource_type).get_rate_limit(); + if rate_limit.is_infinite() { + rate_limit = 0.0; + } + let group_expected_cost = g.stats_per_sec.total_consumed as f64 + + g.stats_per_sec.total_wait_dur_us as f64 / MICROS_PER_SEC * rate_limit; + g.expect_cost_rate = group_expected_cost; + total_expected_cost += group_expected_cost; + } + // sort groups by the expect_cost_rate per ru + bg_group_stats.sort_by(|g1, g2| { + (g1.expect_cost_rate / g1.ru_quota) + .partial_cmp(&(g2.expect_cost_rate / g2.ru_quota)) + .unwrap() + }); + + // quota is enough, group is allowed to got more resource then its share by ru. + // e.g. Given a totol resource of 10000, and ("name", ru_quota, expected_rate) + // of: (rg1, 2000, 3000), (rg2, 3000, 1000), (rg3, 5000, 5000) + // then after the previous sort, the order is rg2, rg3, rg1 and the handle order + // is rg1, rg3, rg2 so the final rate limit assigned is: (rg1, 3000), + // (rg3, 5833(7000/6*5)), (rg2, 1166(7000/6*1)) + if total_expected_cost <= available_resource_rate { + for g in bg_group_stats.iter().rev() { + let limit = g + .expect_cost_rate + .max(available_resource_rate / total_ru_quota * g.ru_quota); + g.limiter.get_limiter(resource_type).set_rate_limit(limit); + available_resource_rate -= limit; + total_ru_quota -= g.ru_quota; + } + return; + } + + // quota is not enough, assign by share + // e.g. Given a totol resource of 10000, and ("name", ru_quota, expected_rate) + // of: (rg1, 2000, 1000), (rg2, 3000, 5000), (rg3, 5000, 7000) + // then after the previous sort, the order is rg1, rg3, rg2, and handle order is + // rg1, rg3, rg2 so the final rate limit assigned is: (rg1, 1000), (rg3, + // 5250(9000/12*7)), (rg2, 3750(9000/12*5)) + for g in bg_group_stats { + let limit = g + .expect_cost_rate + .min(available_resource_rate / total_ru_quota * g.ru_quota); + g.limiter.get_limiter(resource_type).set_rate_limit(limit); + available_resource_rate -= limit; + total_ru_quota -= g.ru_quota; + } + } +} + +struct GroupStats { + name: String, + limiter: Arc, + ru_quota: f64, + stats_per_sec: GroupStatistics, + expect_cost_rate: f64, +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + use crate::{resource_group::tests::new_resource_group_ru, resource_limiter::QuotaLimiter}; + + struct TestResourceStatsProvider { + cpu_total: f64, + cpu_used: f64, + io_total: f64, + io_used: f64, + } + + impl TestResourceStatsProvider { + fn new(cpu_total: f64, io_total: f64) -> Self { + Self { + cpu_total, + cpu_used: 0.0, + io_total, + io_used: 0.0, + } + } + } + + impl ResourceStatsProvider for TestResourceStatsProvider { + fn get_current_stats(&mut self, t: ResourceType) -> IoResult { + match t { + ResourceType::Cpu => Ok(ResourceUsageStats { + total_quota: self.cpu_total * 1_000_000.0, + current_used: self.cpu_used * 1_000_000.0, + }), + ResourceType::Io => Ok(ResourceUsageStats { + total_quota: self.io_total, + current_used: self.io_used, + }), + } + } + } + + #[test] + fn test_adjust_resource_limiter() { + let resource_ctl = Arc::new(ResourceGroupManager::default()); + let rg1 = new_resource_group_ru("test".into(), 1000, 14); + resource_ctl.add_resource_group(rg1); + assert!(resource_ctl.get_resource_limiter("test").is_none()); + + let test_provider = TestResourceStatsProvider::new(8.0, 10000.0); + let mut worker = + GroupQuotaAdjustWorker::with_quota_getter(resource_ctl.clone(), test_provider); + + let limiter = resource_ctl.get_resource_limiter("default").unwrap(); + assert!( + limiter + .get_limiter(ResourceType::Cpu) + .get_rate_limit() + .is_infinite() + ); + assert!( + limiter + .get_limiter(ResourceType::Io) + .get_rate_limit() + .is_infinite() + ); + + fn reset_quota_limiter(limiter: &QuotaLimiter) { + let limit = limiter.get_rate_limit(); + if limit.is_finite() { + limiter.set_rate_limit(f64::INFINITY); + limiter.set_rate_limit(limit); + } + } + + fn reset_limiter(limiter: &Arc) { + reset_quota_limiter(limiter.get_limiter(ResourceType::Cpu)); + reset_quota_limiter(limiter.get_limiter(ResourceType::Io)); + } + + let reset_quota = |worker: &mut GroupQuotaAdjustWorker, + cpu: f64, + io: f64, + dur: Duration| { + worker.resource_quota_getter.cpu_used = cpu; + worker.resource_quota_getter.io_used = io; + let now = Instant::now_coarse(); + worker.last_adjust_time = now - dur; + }; + + fn check(val: f64, expected: f64) { + assert!( + expected * 0.99 < val && val < expected * 1.01, + "actual: {}, expected: {}", + val, + expected + ); + } + + fn check_limiter(limiter: &Arc, cpu: f64, io: f64) { + check( + limiter.get_limiter(ResourceType::Cpu).get_rate_limit(), + cpu * 1_000_000.0, + ); + check(limiter.get_limiter(ResourceType::Io).get_rate_limit(), io); + reset_limiter(limiter); + } + + reset_quota(&mut worker, 0.0, 0.0, Duration::from_secs(1)); + worker.adjust_quota(); + check_limiter(&limiter, 7.2, 9000.0); + + reset_quota(&mut worker, 4.0, 2000.0, Duration::from_millis(500)); + worker.adjust_quota(); + check_limiter(&limiter, 7.2, 9000.0); + + reset_quota(&mut worker, 4.0, 2000.0, Duration::from_secs(1)); + worker.adjust_quota(); + check_limiter(&limiter, 3.6, 7200.0); + + reset_quota(&mut worker, 6.0, 4000.0, Duration::from_secs(1)); + limiter.consume(Duration::from_secs(2), 2000); + worker.adjust_quota(); + check_limiter(&limiter, 3.6, 7200.0); + + reset_quota(&mut worker, 8.0, 9500.0, Duration::from_secs(1)); + worker.adjust_quota(); + check_limiter(&limiter, 0.8, 1000.0); + + reset_quota(&mut worker, 7.5, 9500.0, Duration::from_secs(1)); + limiter.consume(Duration::from_secs(2), 2000); + worker.adjust_quota(); + check_limiter(&limiter, 2.25, 2250.0); + + reset_quota(&mut worker, 7.5, 9500.0, Duration::from_secs(5)); + limiter.consume(Duration::from_secs(10), 10000); + worker.adjust_quota(); + check_limiter(&limiter, 2.25, 2250.0); + + let default = new_resource_group_ru("default".into(), 2000, 8); + resource_ctl.add_resource_group(default); + let new_limiter = resource_ctl.get_resource_limiter("default").unwrap(); + assert_eq!(&*new_limiter as *const _, &*limiter as *const _); + + let bg = new_resource_group_ru("background".into(), 1000, 15); + resource_ctl.add_resource_group(bg); + let bg_limiter = resource_ctl.get_resource_limiter("background").unwrap(); + + reset_quota(&mut worker, 5.0, 7000.0, Duration::from_secs(1)); + worker.adjust_quota(); + check_limiter(&limiter, 1.8, 1800.0); + check_limiter(&bg_limiter, 0.9, 900.0); + + reset_quota(&mut worker, 6.0, 5000.0, Duration::from_secs(1)); + limiter.consume(Duration::from_millis(1200), 1200); + bg_limiter.consume(Duration::from_millis(1800), 1800); + worker.adjust_quota(); + check_limiter(&limiter, 2.4, 3600.0); + check_limiter(&bg_limiter, 2.1, 3600.0); + } +} diff --git a/components/server/src/server.rs b/components/server/src/server.rs index aa86709a7e1..bb89f6f2333 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -71,6 +71,7 @@ use raftstore::{ }; use resolved_ts::LeadershipResolver; use resource_control::{ + worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}, ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, }; use security::SecurityManager; @@ -317,6 +318,12 @@ where background_worker.spawn_async_task(async move { resource_mgr_service.watch_resource_groups().await; }); + // spawn a task to auto adjust background quota limiter. + let io_bandwidth = config.storage.io_rate_limit.max_bytes_per_sec.0; + let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); + background_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { + worker.adjust_quota(); + }); Some(mgr) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 38b31755cda..04976d42d1c 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -67,6 +67,7 @@ use raftstore_v2::{ StateStorage, }; use resource_control::{ + worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}, ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, }; use security::SecurityManager; @@ -300,6 +301,12 @@ where background_worker.spawn_async_task(async move { resource_mgr_service.watch_resource_groups().await; }); + // spawn a task to auto adjust background quota limiter. + let io_bandwidth = config.storage.io_rate_limit.max_bytes_per_sec.0; + let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); + background_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { + worker.adjust_quota(); + }); Some(mgr) } else { None From 783f8e2f7e9c59936ce9a74df818f80ce6ce8802 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 13 Jul 2023 15:21:44 +0800 Subject: [PATCH 0790/1149] raftstore-v2: do not use zero sequence number for cache (#15104) ref tikv/tikv#15091 Sequence number can be zero if there is no write, and zero sequence number may lead to false cache hit when TiDB bootstrap. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi --- .../raftstore-v2/src/operation/query/local.rs | 3 +- .../raftstore/src/store/region_snapshot.rs | 6 +++- components/test_storage/src/util.rs | 28 ++++++++++++------- tests/integrations/coprocessor/test_select.rs | 27 ++++++++++++++---- 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index b5b6b676973..36dbb26e4c7 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -276,8 +276,7 @@ where } }; - // FIXME: Tests show it causing TiDB failed to start. - // snap.set_from_v2(); + snap.set_from_v2(); snap.txn_ext = Some(delegate.txn_ext.clone()); snap.term = NonZeroU64::new(delegate.term); snap.txn_extra_op = delegate.txn_extra_op.load(); diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 41662df149f..bc22dfbf586 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -93,7 +93,11 @@ where pub fn get_data_version(&self) -> Result { if self.from_v2 { - Ok(self.snap.sequence_number()) + if self.snap.sequence_number() != 0 { + Ok(self.snap.sequence_number()) + } else { + Err(box_err!("Snapshot sequence number 0")) + } } else { self.get_apply_index() } diff --git a/components/test_storage/src/util.rs b/components/test_storage/src/util.rs index 032fe24c60c..2b7d80ea013 100644 --- a/components/test_storage/src/util.rs +++ b/components/test_storage/src/util.rs @@ -7,21 +7,29 @@ use tikv_util::HandyRwLock; use super::*; +#[macro_export] +macro_rules! prepare_raft_engine { + ($cluster:expr, $key:expr) => {{ + $cluster.run(); + // make sure leader has been elected. + assert_eq!($cluster.must_get(b""), None); + let region = $cluster.get_region($key.as_bytes()); + let leader = $cluster.leader_of_region(region.get_id()).unwrap(); + let engine = $cluster.sim.rl().storages[&leader.get_id()].clone(); + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(leader); + (engine, ctx) + }}; +} + pub fn new_raft_engine( count: usize, key: &str, ) -> (Cluster, SimulateEngine, Context) { let mut cluster = new_server_cluster(0, count); - cluster.run(); - // make sure leader has been elected. - assert_eq!(cluster.must_get(b""), None); - let region = cluster.get_region(key.as_bytes()); - let leader = cluster.leader_of_region(region.get_id()).unwrap(); - let engine = cluster.sim.rl().storages[&leader.get_id()].clone(); - let mut ctx = Context::default(); - ctx.set_region_id(region.get_id()); - ctx.set_region_epoch(region.get_region_epoch().clone()); - ctx.set_peer(leader); + let (engine, ctx) = prepare_raft_engine!(cluster, key); (cluster, engine, ctx) } diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 02edf45cd31..9af28b6e3d6 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -10,7 +10,8 @@ use kvproto::{ use protobuf::Message; use raftstore::store::Bucket; use test_coprocessor::*; -use test_raftstore::{Cluster, ServerCluster}; +use test_raftstore::*; +use test_raftstore_macro::test_case; use test_storage::*; use tidb_query_datatype::{ codec::{datum, Datum}, @@ -1762,18 +1763,34 @@ fn test_snapshot_failed() { assert!(resp.get_region_error().has_store_not_match()); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_empty_data_cache_miss() { + let mut cluster = new_cluster(0, 1); + let (raft_engine, ctx) = prepare_raft_engine!(cluster, ""); + + let product = ProductTable::new(); + let (_, endpoint, _) = + init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &[], false); + let mut req = DagSelect::from(&product).build_with(ctx, &[0]); + req.set_is_cache_enabled(true); + let resp = handle_request(&endpoint, req); + assert!(!resp.get_is_cache_hit()); +} + +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_cache() { + let mut cluster = new_cluster(0, 1); + let (raft_engine, ctx) = prepare_raft_engine!(cluster, ""); + let data = vec![ (1, Some("name:0"), 2), (2, Some("name:4"), 3), (4, Some("name:3"), 1), (5, Some("name:1"), 4), ]; - let product = ProductTable::new(); - let (_cluster, raft_engine, ctx) = new_raft_engine(1, ""); - let (_, endpoint, _) = init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &data, true); From c73b6d32fc12386cf505eb1d3b613b5843a3a8f8 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 13 Jul 2023 19:56:14 -0700 Subject: [PATCH 0791/1149] cdc: Exponentialize resolved ts scan backoff (#15119) close tikv/tikv#15112 Make the scan task retry wait time exponential, so that less logs will be produced. Signed-off-by: Yang Zhang --- components/resolved_ts/src/scanner.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index b6f9f900fd2..0ca74bda29d 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -26,7 +26,7 @@ use crate::{ const DEFAULT_SCAN_BATCH_SIZE: usize = 1024; const GET_SNAPSHOT_RETRY_TIME: u32 = 3; -const GET_SNAPSHOT_RETRY_BACKOFF_STEP: Duration = Duration::from_millis(25); +const GET_SNAPSHOT_RETRY_BACKOFF_STEP: Duration = Duration::from_millis(100); pub type BeforeStartCallback = Box; pub type OnErrorCallback = Box; @@ -183,7 +183,9 @@ impl, E: KvEngine> ScannerPool { if retry_times != 0 { if let Err(e) = GLOBAL_TIMER_HANDLE .delay( - std::time::Instant::now() + retry_times * GET_SNAPSHOT_RETRY_BACKOFF_STEP, + std::time::Instant::now() + + GET_SNAPSHOT_RETRY_BACKOFF_STEP + .mul_f64(10_f64.powi(retry_times as i32 - 1)), ) .compat() .await From 0f284c18f159222a6610f5a4274a43a3b1373567 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 14 Jul 2023 15:20:14 +0800 Subject: [PATCH 0792/1149] resolved ts: update read progress when peer destroyed or merged (#15102) close tikv/tikv#15031 Signed-off-by: 3pointer Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/operation/life.rs | 1 + .../integrations/raftstore/test_stale_read.rs | 40 ++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index bf40429c9f1..05c505461dd 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -795,6 +795,7 @@ impl Peer { let mut meta = ctx.store_meta.lock().unwrap(); meta.remove_region(region_id); meta.readers.remove(®ion_id); + meta.region_read_progress.remove(®ion_id); ctx.tablet_registry.remove(region_id); } // Remove tablet first, otherwise in extreme cases, a new peer can be created diff --git a/tests/integrations/raftstore/test_stale_read.rs b/tests/integrations/raftstore/test_stale_read.rs index 9cbbc6ca8ba..466f2dff608 100644 --- a/tests/integrations/raftstore/test_stale_read.rs +++ b/tests/integrations/raftstore/test_stale_read.rs @@ -8,7 +8,7 @@ use kvproto::{ metapb::{Peer, Region}, tikvpb_grpc::TikvClient, }; -use test_raftstore::{new_mutation, new_peer, new_server_cluster, PeerClient}; +use test_raftstore::{must_get_equal, new_mutation, new_peer, new_server_cluster, PeerClient}; use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, time::Instant}; @@ -131,3 +131,41 @@ fn test_stale_read_resolved_ts_advance() { must_resolved_ts_advance(&left); must_resolved_ts_advance(&right); } + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_resolved_ts_after_destroy_peer() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.resolved_ts.enable = true; + cluster.cfg.resolved_ts.advance_ts_interval = ReadableDuration::millis(200); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + + let r1 = cluster.run_conf_change(); + // Now region 1 only has peer (1, 1); + let (key, value) = (b"k1", b"v1"); + + cluster.must_put(key, value); + assert_eq!(cluster.get(key), Some(value.to_vec())); + + // Add peer (2, 2) to region 1. + pd_client.must_add_peer(r1, new_peer(2, 2)); + must_get_equal(&cluster.get_engine(2), key, value); + + // Add peer (3, 3) to region 1. + pd_client.must_add_peer(r1, new_peer(3, 3)); + must_get_equal(&cluster.get_engine(3), key, value); + + // Transfer leader to peer (2, 2). + cluster.must_transfer_leader(1, new_peer(2, 2)); + + // Remove peer (1, 1) from region 1. + pd_client.must_remove_peer(r1, new_peer(1, 1)); + + // Make sure region 1 is removed from store 1. + cluster.wait_destroy_and_clean(r1, new_peer(1, 1)); + + // Must not get destory peer's read progress + let meta = cluster.store_metas[&r1].lock().unwrap(); + assert_eq!(None, meta.region_read_progress.get_resolved_ts(&r1)) +} From 3e399f743751a3e7fc6921af22ee5db616c18163 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 14 Jul 2023 17:14:15 +0800 Subject: [PATCH 0793/1149] bucket polish and enable evict-cache-on-memory-ratio by default (#15096) close tikv/tikv#14356 1) avoid sending PeerTick::SplitRegionCheck for each write 2) reduce default bucket size from 96MB to 50MB to reduce chance of client side timeout 3) change the default value of evict-cache-on-memory-ratio to 0.1. Signed-off-by: tonyxuqqi Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/pd_client/src/lib.rs | 5 +++ .../raftstore-v2/src/operation/bucket.rs | 45 ++++++++++++++++--- .../src/operation/command/admin/split.rs | 5 ++- .../raftstore-v2/src/operation/command/mod.rs | 5 ++- .../raftstore/src/coprocessor/config.rs | 6 ++- components/raftstore/src/store/config.rs | 10 ++--- .../raftstore/src/store/worker/split_check.rs | 6 +-- 7 files changed, 65 insertions(+), 17 deletions(-) diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index e2d2ef750b8..d547cbf97b0 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -125,6 +125,11 @@ impl BucketMeta { self.keys.remove(idx); self.sizes.remove(idx); } + + // total size of the whole buckets + pub fn total_size(&self) -> u64 { + self.sizes.iter().sum() + } } #[derive(Debug, Clone)] diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 317ed89ef8d..6bb5514c0a1 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -11,7 +11,7 @@ use raftstore::{ coprocessor::RegionChangeEvent, store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, }; -use slog::{error, warn}; +use slog::{error, info, warn}; use crate::{ batch::StoreContext, @@ -29,6 +29,9 @@ pub struct BucketStatsInfo { // the report bucket stat records the increment stats after last report pd. // it will be reset after report pd. report_bucket_stat: Option, + // last bucket count. + // BucketStat.meta is Arc so it cannot be used for last bucket count + last_bucket_count: usize, } impl BucketStatsInfo { @@ -104,6 +107,10 @@ impl BucketStatsInfo { } self.report_bucket_stat = buckets.clone(); self.bucket_stat = buckets; + self.last_bucket_count = self + .bucket_stat + .as_ref() + .map_or(0, |bucket_stat| bucket_stat.meta.keys.len() - 1); } #[inline] @@ -125,6 +132,11 @@ impl BucketStatsInfo { pub fn bucket_stat(&self) -> &Option { &self.bucket_stat } + + #[inline] + pub fn last_bucket_count(&self) -> usize { + self.last_bucket_count + } } impl Peer { @@ -161,7 +173,11 @@ impl Peer { let region = self.region(); let current_version = self.region_buckets_info().version(); + let next_bucket_version = gen_bucket_version(self.term(), current_version); + let mut is_first_refresh = true; + let mut change_bucket_version = false; let mut region_buckets: BucketStat; + // The region buckets reset after this region happened split or merge. // The message should be dropped if it's epoch is lower than the regions. // The bucket ranges is none when the region buckets is also none. @@ -170,13 +186,11 @@ impl Peer { if let (Some(bucket_ranges), Some(peer_region_buckets)) = (bucket_ranges, self.region_buckets_info().bucket_stat()) { + is_first_refresh = false; assert_eq!(buckets.len(), bucket_ranges.len()); let mut meta_idx = 0; region_buckets = peer_region_buckets.clone(); let mut meta = (*region_buckets.meta).clone(); - if !buckets.is_empty() { - meta.version = gen_bucket_version(self.term(), current_version); - } meta.region_epoch = region_epoch; for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { // the bucket ranges maybe need to split or merge not all the meta keys, so it @@ -211,6 +225,7 @@ impl Peer { // bucket is too small region_buckets.left_merge(meta_idx); meta.left_merge(meta_idx); + change_bucket_version = true; continue; } } else { @@ -221,21 +236,30 @@ impl Peer { meta_idx += 1; region_buckets.split(meta_idx); meta.split(meta_idx, bucket_key); + change_bucket_version = true; } } meta_idx += 1; } + if self.region_buckets_info().last_bucket_count() != region_buckets.meta.keys.len() - 1 + { + change_bucket_version = true; + } + if change_bucket_version { + meta.version = next_bucket_version; + } region_buckets.meta = Arc::new(meta); } else { // when the region buckets is none, the exclusive buckets includes all the // bucket keys. assert_eq!(buckets.len(), 1); + change_bucket_version = true; let bucket_keys = buckets.pop().unwrap().keys; let bucket_count = bucket_keys.len() + 1; let mut meta = BucketMeta { region_id: self.region_id(), region_epoch, - version: gen_bucket_version(self.term(), current_version), + version: next_bucket_version, keys: bucket_keys, sizes: vec![store_ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], }; @@ -246,6 +270,17 @@ impl Peer { } let buckets_count = region_buckets.meta.keys.len() - 1; + if change_bucket_version { + // TODO: we may need to make it debug once the coprocessor timeout is resolved. + info!( + self.logger, + "refreshed region bucket info"; + "bucket_version" => next_bucket_version, + "buckets_count" => buckets_count, + "estimated_region_size" => region_buckets.meta.total_size(), + "first_refresh" => is_first_refresh, + ); + } store_ctx.coprocessor_host.on_region_changed( region, RegionChangeEvent::UpdateBuckets(buckets_count), diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 96e03f940a4..f3718813686 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -280,10 +280,11 @@ impl Peer { self.add_pending_tick(PeerTick::SplitRegionCheck); } - pub fn update_split_flow_control(&mut self, metrics: &ApplyMetrics) { + pub fn update_split_flow_control(&mut self, metrics: &ApplyMetrics, threshold: i64) { let control = self.split_flow_control_mut(); control.size_diff_hint += metrics.size_diff_hint; - if self.is_leader() { + let size_diff_hint = control.size_diff_hint; + if self.is_leader() && size_diff_hint >= threshold { self.add_pending_tick(PeerTick::SplitRegionCheck); } } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 8c63ab1dc41..5d842549700 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -406,7 +406,10 @@ impl Peer { } self.region_buckets_info_mut() .add_bucket_flow(&apply_res.bucket_stat); - self.update_split_flow_control(&apply_res.metrics); + self.update_split_flow_control( + &apply_res.metrics, + ctx.cfg.region_split_check_diff().0 as i64, + ); self.update_stat(&apply_res.metrics); ctx.store_stat.engine_total_bytes_written += apply_res.metrics.written_bytes; ctx.store_stat.engine_total_keys_written += apply_res.metrics.written_keys; diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index c05a8e89a41..e1246e8d59d 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -76,7 +76,11 @@ pub const RAFTSTORE_V2_SPLIT_SIZE: ReadableSize = ReadableSize::gb(10); /// Default batch split limit. pub const BATCH_SPLIT_LIMIT: u64 = 10; -pub const DEFAULT_BUCKET_SIZE: ReadableSize = ReadableSize::mb(96); +// A bucket will be split only when its size is larger than 2x of +// DEFAULT_BUCKET_SIZE So the avg of the actual bucket size is 75MB, which is +// slightly less than region size We don't use 48MB size because it will enable +// the automatic bucket under default 96MB region size. +pub const DEFAULT_BUCKET_SIZE: ReadableSize = ReadableSize::mb(50); pub const DEFAULT_REGION_BUCKET_MERGE_SIZE_RATIO: f64 = 0.33; diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 0dc5c6b7925..44be73329f4 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -270,10 +270,10 @@ pub struct Config { /// `evict_cache_on_memory_ratio` * total. /// /// Set it to 0 can disable cache evict. - // By default it's 0.2. So for different system memory capacity, cache evict happens: - // * system=8G, memory_usage_limit=6G, evict=1.2G - // * system=16G, memory_usage_limit=12G, evict=2.4G - // * system=32G, memory_usage_limit=24G, evict=4.8G + // By default it's 0.1. So for different system memory capacity, cache evict happens: + // * system=8G, memory_usage_limit=6G, evict=0.6G + // * system=16G, memory_usage_limit=12G, evict=1.2G + // * system=32G, memory_usage_limit=24G, evict=2.4G pub evict_cache_on_memory_ratio: f64, pub cmd_batch: bool, @@ -451,7 +451,7 @@ impl Default for Config { apply_yield_duration: ReadableDuration::millis(500), apply_yield_write_size: ReadableSize::kb(32), perf_level: PerfLevel::Uninitialized, - evict_cache_on_memory_ratio: 0.0, + evict_cache_on_memory_ratio: 0.1, cmd_batch: true, cmd_batch_concurrent_ready_max_count: 1, raft_write_size_limit: ReadableSize::mb(1), diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 1335ed5d5e8..4ff853f70a0 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -269,13 +269,13 @@ impl Runner { bucket.set_end_key(range.1.clone()); let bucket_entry = host.approximate_bucket_keys(&bucket, tablet)?; debug!( - "bucket_entry size {} keys count {}", + "bucket_entry size {} keys count {}, region_id {}", bucket_entry.size, - bucket_entry.keys.len() + bucket_entry.keys.len(), + region.get_id(), ); buckets.push(bucket_entry); } - self.on_buckets_created(&mut buckets, region, &ranges); self.refresh_region_buckets(buckets, region, bucket_ranges); Ok(()) From e116becf6835a6a4e3580ad7c310a63d0f428079 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 14 Jul 2023 17:27:44 +0800 Subject: [PATCH 0794/1149] config: fix unproper online config and check (#14935) close tikv/tikv#14877 Signed-off-by: 3pointer Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/sst_importer/src/config.rs | 3 +++ src/config/mod.rs | 9 --------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/components/sst_importer/src/config.rs b/components/sst_importer/src/config.rs index 7434c5cf0cd..7e83a07f2b2 100644 --- a/components/sst_importer/src/config.rs +++ b/components/sst_importer/src/config.rs @@ -13,11 +13,14 @@ use tikv_util::{config::ReadableDuration, HandyRwLock}; #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct Config { + #[online_config(skip)] pub num_threads: usize, + #[online_config(skip)] pub stream_channel_window: usize, /// The timeout for going back into normal mode from import mode. /// /// Default is 10m. + #[online_config(skip)] pub import_mode_timeout: ReadableDuration, /// the ratio of system memory used for import. pub memory_use_ratio: f64, diff --git a/src/config/mod.rs b/src/config/mod.rs index d575827e699..eff0da14ba3 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1569,15 +1569,6 @@ impl DbConfig { ) .into()); } - if self.max_sub_compactions == 0 - || self.max_sub_compactions as i32 > self.max_background_jobs - { - return Err(format!( - "max_sub_compactions should be greater than 0 and less than or equal to {:?}", - self.max_background_jobs, - ) - .into()); - } if self.max_background_flushes <= 0 || self.max_background_flushes > limit { return Err(format!( "max_background_flushes should be greater than 0 and less than or equal to {:?}", From 31a629950d05f893fdd72717938118ad11ed5d3f Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 14 Jul 2023 21:38:14 +0800 Subject: [PATCH 0795/1149] raftstore: allow DrAutoSync to majority commit log during SyncRecover phase (#15103) close tikv/tikv#14975 raftstore: do not enable group commit when replication mode is SyncRecover - during DR mode, the SyncRecover state should be committed in majority mode. otherwise, the qps may drop Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/peer.rs | 64 ++++++++++++++++--- .../raftstore/test_replication_mode.rs | 30 +++++++-- 2 files changed, 79 insertions(+), 15 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 43e090a8aa0..5fcf0a75f69 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1240,7 +1240,7 @@ where /// Updates replication mode. pub fn switch_replication_mode(&mut self, state: &Mutex) { self.replication_sync = false; - let mut guard = state.lock().unwrap(); + let guard = state.lock().unwrap(); let enable_group_commit = if guard.status().get_mode() == ReplicationMode::Majority { self.replication_mode_version = 0; self.dr_auto_sync_state = DrAutoSyncState::Async; @@ -1248,9 +1248,23 @@ where } else { self.dr_auto_sync_state = guard.status().get_dr_auto_sync().get_state(); self.replication_mode_version = guard.status().get_dr_auto_sync().state_id; - guard.status().get_dr_auto_sync().get_state() != DrAutoSyncState::Async + match guard.status().get_dr_auto_sync().get_state() { + // SyncRecover will enable group commit after it catches up logs. + DrAutoSyncState::Async | DrAutoSyncState::SyncRecover => false, + _ => true, + } }; + drop(guard); + self.switch_group_commit(enable_group_commit, state); + } + + fn switch_group_commit( + &mut self, + enable_group_commit: bool, + state: &Mutex, + ) { if enable_group_commit { + let mut guard = state.lock().unwrap(); let ids = mem::replace( guard.calculate_commit_group( self.replication_mode_version, @@ -1261,13 +1275,11 @@ where drop(guard); self.raft_group.raft.clear_commit_group(); self.raft_group.raft.assign_commit_groups(&ids); - } else { - drop(guard); } self.raft_group .raft .enable_group_commit(enable_group_commit); - info!("switch replication mode"; "version" => self.replication_mode_version, "region_id" => self.region_id, "peer_id" => self.peer.id); + info!("switch replication mode"; "version" => self.replication_mode_version, "region_id" => self.region_id, "peer_id" => self.peer.id, "enable_group_commit" => enable_group_commit); } /// Register self to apply_scheduler so that the peer is then usable. @@ -5230,7 +5242,10 @@ where None } - fn region_replication_status(&mut self) -> Option { + fn region_replication_status( + &mut self, + ctx: &PollContext, + ) -> Option { if self.replication_mode_version == 0 { return None; } @@ -5240,7 +5255,8 @@ where }; let state = if !self.replication_sync { if self.dr_auto_sync_state != DrAutoSyncState::Async { - let res = self.raft_group.raft.check_group_commit_consistent(); + // use raft_log_gc_threshold, it's indicate the log is almost synced. + let res = self.check_group_commit_consistent(ctx.cfg.raft_log_gc_threshold); if Some(true) != res { let mut buffer: SmallVec<[(u64, u64, u64); 5]> = SmallVec::new(); if self.get_store().applied_term() >= self.term() { @@ -5257,9 +5273,16 @@ where "status" => ?res, "region_id" => self.region_id, "peer_id" => self.peer.id, - "progress" => ?buffer + "progress" => ?buffer, + "dr_auto_sync_state" => ?self.dr_auto_sync_state, ); } else { + // Once the DR replicas catch up the log during the `SyncRecover` phase, we + // should enable group commit to promise `IntegrityOverLabel`. then safe + // to switch to the `Sync` phase. + if self.dr_auto_sync_state == DrAutoSyncState::SyncRecover { + self.switch_group_commit(true, &ctx.global_replication_state) + } self.replication_sync = true; } match res { @@ -5277,6 +5300,29 @@ where Some(status) } + pub fn check_group_commit_consistent(&mut self, allow_gap: u64) -> Option { + if !self.is_leader() || !self.raft_group.raft.apply_to_current_term() { + return None; + } + let original = self.raft_group.raft.group_commit(); + let res = { + // Hack: to check groups consistent we need to enable group commit first + // otherwise `maximal_committed_index` will return the committed index + // based on majorty instead of commit group + // TODO: remove outdated workaround after fixing raft interface, but old version + // keep this workaround. + self.raft_group.raft.enable_group_commit(true); + let (index, mut group_consistent) = + self.raft_group.raft.mut_prs().maximal_committed_index(); + if self.raft_group.raft.raft_log.committed > index { + group_consistent &= self.raft_group.raft.raft_log.committed - index < allow_gap; + } + Some(group_consistent) + }; + self.raft_group.raft.enable_group_commit(original); + res + } + pub fn heartbeat_pd(&mut self, ctx: &PollContext) { let task = PdTask::Heartbeat(HeartbeatTask { term: self.term(), @@ -5288,7 +5334,7 @@ where written_keys: self.peer_stat.written_keys, approximate_size: self.approximate_size, approximate_keys: self.approximate_keys, - replication_status: self.region_replication_status(), + replication_status: self.region_replication_status(ctx), wait_data_peers: self.wait_data_peers.clone(), }); if let Err(e) = ctx.pd_scheduler.schedule(task) { diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index cb7de1fad35..367ac63aabb 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -13,7 +13,7 @@ fn prepare_cluster() -> Cluster { cluster.pd_client.disable_default_operator(); cluster.pd_client.configure_dr_auto_sync("zone"); cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); - cluster.cfg.raft_store.raft_log_gc_threshold = 10; + cluster.cfg.raft_store.raft_log_gc_threshold = 1; cluster.add_label(1, "zone", "ES"); cluster.add_label(2, "zone", "ES"); cluster.add_label(3, "zone", "WS"); @@ -290,11 +290,9 @@ fn test_switching_replication_mode() { .rl() .async_command_on_node(1, request, cb) .unwrap(); - assert_eq!( - rx.recv_timeout(Duration::from_millis(100)), - Err(future::RecvTimeoutError::Timeout) - ); - must_get_none(&cluster.get_engine(1), b"k3"); + // sync recover should not block write. ref https://github.com/tikv/tikv/issues/14975. + assert_eq!(rx.recv_timeout(Duration::from_millis(100)).is_ok(), true); + must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); let state = cluster.pd_client.region_replication_status(region.get_id()); assert_eq!(state.state_id, 3); assert_eq!(state.state, RegionReplicationState::SimpleMajority); @@ -305,6 +303,26 @@ fn test_switching_replication_mode() { let state = cluster.pd_client.region_replication_status(region.get_id()); assert_eq!(state.state_id, 3); assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); + + cluster.add_send_filter(IsolationFilterFactory::new(3)); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_put_cf_cmd("default", b"k4", b"v4")], + false, + ); + request.mut_header().set_peer(new_peer(1, 1)); + let (cb, mut rx) = make_cb(&request); + cluster + .sim + .rl() + .async_command_on_node(1, request, cb) + .unwrap(); + // already enable group commit. + assert_eq!( + rx.recv_timeout(Duration::from_millis(100)), + Err(future::RecvTimeoutError::Timeout) + ); } #[test] From fac3d728d23d03d866f8fee328aaaf635373df80 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 17 Jul 2023 09:54:45 +0800 Subject: [PATCH 0796/1149] raftstore,raftstore-v2: fix unsafe vote after start (#15085) close tikv/tikv#15035 A peer should not grant vote for an election timeout after node start, otherwise, it may cause two leaders holding leader lease. See https://github.com/tikv/tikv/issues/15035 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/batch/store.rs | 40 ++++++++- .../raftstore-v2/src/operation/ready/mod.rs | 15 +++- components/raftstore/src/store/config.rs | 16 ++++ components/raftstore/src/store/fsm/store.rs | 38 ++++++++- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/peer.rs | 13 +++ components/test_raftstore/src/util.rs | 7 +- components/test_raftstore_macro/src/lib.rs | 2 +- tests/failpoints/cases/test_replica_read.rs | 3 +- .../failpoints/cases/test_transfer_leader.rs | 3 +- tests/integrations/config/mod.rs | 1 + .../integrations/raftstore/test_lease_read.rs | 82 ++++++++++++++++++- tests/integrations/raftstore/test_merge.rs | 1 - .../raftstore/test_replica_read.rs | 1 - .../raftstore/test_split_region.rs | 1 - 15 files changed, 206 insertions(+), 18 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 3240b954a87..39a28ea1a0e 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -47,7 +47,7 @@ use tikv_util::{ config::{Tracker, VersionTrack}, log::SlogFormat, sys::SysQuota, - time::{duration_to_sec, Instant as TiInstant, Limiter}, + time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, Limiter}, timer::{SteadyTimer, GLOBAL_TIMER_HANDLE}, worker::{Builder, LazyWorker, Scheduler, Worker}, yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, @@ -77,7 +77,6 @@ pub struct StoreContext { pub coprocessor_host: CoprocessorHost, /// The transport for sending messages to peers on other stores. pub trans: T, - pub current_time: Option, pub has_ready: bool, pub raft_metrics: RaftMetrics, /// The latest configuration. @@ -97,6 +96,11 @@ pub struct StoreContext { /// A background pool used for high-priority works. pub high_priority_pool: FuturePool, + /// current_time from monotonic_raw_now. + pub current_time: Option, + /// unsafe_vote_deadline from monotonic_raw_now. + pub unsafe_vote_deadline: Option, + /// Disk usage for the store itself. pub self_disk_usage: DiskUsage, @@ -136,6 +140,23 @@ impl StoreContext { self.tick_batch[PeerTick::GcPeer as usize].wait_duration = 60 * cmp::min(Duration::from_secs(1), self.cfg.raft_base_tick_interval.0); } + + // Return None means it has passed unsafe vote period. + pub fn maybe_in_unsafe_vote_period(&mut self) -> Option { + if self.cfg.allow_unsafe_vote_after_start { + return None; + } + let deadline = TiInstant::Monotonic(self.unsafe_vote_deadline?); + let current_time = + TiInstant::Monotonic(*self.current_time.get_or_insert_with(monotonic_raw_now)); + let remain_duration = deadline.saturating_duration_since(current_time); + if remain_duration > Duration::ZERO { + Some(remain_duration) + } else { + self.unsafe_vote_deadline.take(); + None + } + } } /// A [`PollHandler`] that handles updates of [`StoreFsm`]s and [`PeerFsm`]s. @@ -341,6 +362,7 @@ struct StorePollerBuilder { global_stat: GlobalStoreStat, sst_importer: Arc, key_manager: Option>, + node_start_time: Timespec, // monotonic_raw_now } impl StorePollerBuilder { @@ -359,6 +381,7 @@ impl StorePollerBuilder { coprocessor_host: CoprocessorHost, sst_importer: Arc, key_manager: Option>, + node_start_time: Timespec, // monotonic_raw_now ) -> Self { let pool_size = cfg.value().apply_batch_system.pool_size; let max_pool_size = std::cmp::max( @@ -394,6 +417,7 @@ impl StorePollerBuilder { global_stat, sst_importer, key_manager, + node_start_time, } } @@ -512,11 +536,20 @@ where fn build(&mut self, _priority: batch_system::Priority) -> Self::Handler { let cfg = self.cfg.value().clone(); + let election_timeout = cfg.raft_base_tick_interval.0 + * if cfg.raft_min_election_timeout_ticks != 0 { + cfg.raft_min_election_timeout_ticks as u32 + } else { + cfg.raft_election_timeout_ticks as u32 + }; + let unsafe_vote_deadline = + Some(self.node_start_time + time::Duration::from_std(election_timeout).unwrap()); let mut poll_ctx = StoreContext { logger: self.logger.clone(), store_id: self.store_id, trans: self.trans.clone(), current_time: None, + unsafe_vote_deadline, has_ready: false, raft_metrics: RaftMetrics::new(cfg.waterfall_metrics), cfg, @@ -621,6 +654,7 @@ pub struct StoreSystem { schedulers: Option>, logger: Logger, shutdown: Arc, + node_start_time: Timespec, // monotonic_raw_now } impl StoreSystem { @@ -816,6 +850,7 @@ impl StoreSystem { coprocessor_host, sst_importer, key_manager, + self.node_start_time, ); self.schedulers = Some(schedulers); @@ -995,6 +1030,7 @@ where schedulers: None, logger: logger.clone(), shutdown: Arc::new(AtomicBool::new(false)), + node_start_time: monotonic_raw_now(), }; (StoreRouter { router, logger }, system) } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index e222f04c942..cfc522805bc 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -265,8 +265,19 @@ impl Peer { if !self.serving() { return; } - if util::is_vote_msg(msg.get_message()) && self.maybe_gc_sender(&msg) { - return; + if util::is_vote_msg(msg.get_message()) { + if self.maybe_gc_sender(&msg) { + return; + } + if let Some(remain) = ctx.maybe_in_unsafe_vote_period() { + debug!(self.logger, + "drop request vote for one election timeout after node starts"; + "from_peer_id" => msg.get_message().get_from(), + "remain_duration" => ?remain, + ); + ctx.raft_metrics.message_dropped.unsafe_vote.inc(); + return; + } } if msg.get_to_peer().get_store_id() != self.peer().get_store_id() { ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 44be73329f4..b01cfd8240b 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -197,6 +197,15 @@ pub struct Config { // It will be set to raft_store_max_leader_lease/4 by default. pub renew_leader_lease_advance_duration: ReadableDuration, + // Set true to allow handling request vote messages within one election time + // after TiKV start. + // + // Note: set to true may break leader lease. It should only be true in tests. + #[doc(hidden)] + #[serde(skip)] + #[online_config(skip)] + pub allow_unsafe_vote_after_start: bool, + // Right region derive origin region id when split. #[online_config(hidden)] pub right_derive_when_split: bool, @@ -483,6 +492,7 @@ impl Default for Config { report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), + allow_unsafe_vote_after_start: false, report_region_buckets_tick_interval: ReadableDuration::secs(10), max_snapshot_file_raw_size: ReadableSize::mb(100), unreachable_backoff: ReadableDuration::secs(10), @@ -608,6 +618,12 @@ impl Config { otherwise it may lead to inconsistency." ); } + if self.allow_unsafe_vote_after_start { + warn!( + "allow_unsafe_vote_after_start need to be false, otherwise \ + it may lead to inconsistency" + ); + } if self.raft_election_timeout_ticks <= self.raft_heartbeat_ticks { return Err(box_err!( diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 62ddc0b43db..f937505a82e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -58,7 +58,7 @@ use tikv_util::{ store::{find_peer, region_on_stores}, sys as sys_util, sys::disk::{get_disk_status, DiskUsage}, - time::{duration_to_sec, Instant as TiInstant}, + time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, timer::SteadyTimer, warn, worker::{LazyWorker, Scheduler, Worker}, @@ -553,11 +553,13 @@ where pub pending_count: usize, pub ready_count: usize, pub has_ready: bool, + /// current_time from monotonic_raw_now. pub current_time: Option, + /// unsafe_vote_deadline from monotonic_raw_now. + pub unsafe_vote_deadline: Option, pub raft_perf_context: ER::PerfContext, pub kv_perf_context: EK::PerfContext, pub tick_batch: Vec, - pub node_start_time: Option, /// Disk usage for the store itself. pub self_disk_usage: DiskUsage, @@ -610,6 +612,23 @@ where self.tick_batch[PeerTick::RequestVoterReplicatedIndex as usize].wait_duration = self.cfg.raft_log_gc_tick_interval.0 * 2; } + + // Return None means it has passed unsafe vote period. + pub fn maybe_in_unsafe_vote_period(&mut self) -> Option { + if self.cfg.allow_unsafe_vote_after_start { + return None; + } + let deadline = TiInstant::Monotonic(self.unsafe_vote_deadline?); + let current_time = + TiInstant::Monotonic(*self.current_time.get_or_insert_with(monotonic_raw_now)); + let remain_duration = deadline.saturating_duration_since(current_time); + if remain_duration > Duration::ZERO { + Some(remain_duration) + } else { + self.unsafe_vote_deadline.take(); + None + } + } } impl PollContext @@ -1173,6 +1192,7 @@ pub struct RaftPollerBuilder { global_replication_state: Arc>, feature_gate: FeatureGate, write_senders: WriteSenders, + node_start_time: Timespec, // monotonic_raw_now } impl RaftPollerBuilder { @@ -1379,6 +1399,14 @@ where } else { None }; + let election_timeout = self.cfg.value().raft_base_tick_interval.0 + * if self.cfg.value().raft_min_election_timeout_ticks != 0 { + self.cfg.value().raft_min_election_timeout_ticks as u32 + } else { + self.cfg.value().raft_election_timeout_ticks as u32 + }; + let unsafe_vote_deadline = + Some(self.node_start_time + time::Duration::from_std(election_timeout).unwrap()); let mut ctx = PollContext { cfg: self.cfg.value().clone(), store: self.store.clone(), @@ -1407,6 +1435,7 @@ where ready_count: 0, has_ready: false, current_time: None, + unsafe_vote_deadline, raft_perf_context: ER::get_perf_context( self.cfg.value().perf_level, PerfContextKind::RaftstoreStore, @@ -1416,7 +1445,6 @@ where PerfContextKind::RaftstoreStore, ), tick_batch: vec![PeerTickBatch::default(); PeerTick::VARIANT_COUNT], - node_start_time: Some(TiInstant::now_coarse()), feature_gate: self.feature_gate.clone(), self_disk_usage: DiskUsage::Normal, store_disk_usages: Default::default(), @@ -1474,6 +1502,7 @@ where global_replication_state: self.global_replication_state.clone(), feature_gate: self.feature_gate.clone(), write_senders: self.write_senders.clone(), + node_start_time: self.node_start_time, } } } @@ -1505,6 +1534,7 @@ pub struct RaftBatchSystem { router: RaftRouter, workers: Option>, store_writers: StoreWriters, + node_start_time: Timespec, // monotonic_raw_now } impl RaftBatchSystem { @@ -1669,6 +1699,7 @@ impl RaftBatchSystem { pending_create_peers: Arc::new(Mutex::new(HashMap::default())), feature_gate: pd_client.feature_gate().clone(), write_senders: self.store_writers.senders(), + node_start_time: self.node_start_time, }; let region_peers = builder.init()?; self.start_system::( @@ -1865,6 +1896,7 @@ pub fn create_raft_batch_system( .as_ref() .map(|m| m.derive_controller("store-writer".to_owned(), false)), ), + node_start_time: monotonic_raw_now(), }; (raft_router, system) } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 870ce74b0f9..a5aa164e63e 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -180,6 +180,7 @@ make_static_metric! { disk_full, non_witness, recovery, + unsafe_vote, } pub label_enum ProposalType { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 5fcf0a75f69..0036c4b4e0c 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1969,6 +1969,19 @@ where } self.should_wake_up = state == LeaseState::Expired; } + } else if util::is_vote_msg(&m) { + // Only by passing an election timeout can peers handle request vote safely. + // See https://github.com/tikv/tikv/issues/15035 + if let Some(remain) = ctx.maybe_in_unsafe_vote_period() { + debug!("drop request vote for one election timeout after node start"; + "region_id" => self.region_id, + "peer_id" => self.peer.get_id(), + "from_peer_id" => m.get_from(), + "remain_duration" => ?remain, + ); + ctx.raft_metrics.message_dropped.unsafe_vote.inc(); + return Ok(()); + } } let from_id = m.get_from(); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index a497631c4fc..7fbb8cc2d28 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -151,13 +151,16 @@ lazy_static! { pub static ref TEST_CONFIG: TikvConfig = { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let common_test_cfg = manifest_dir.join("src/common-test.toml"); - TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { + let mut cfg = TikvConfig::from_file(&common_test_cfg, None).unwrap_or_else(|e| { panic!( "invalid auto generated configuration file {}, err {}", manifest_dir.display(), e ); - }) + }); + // To speed up leader transfer. + cfg.raft_store.allow_unsafe_vote_after_start = true; + cfg }; } diff --git a/components/test_raftstore_macro/src/lib.rs b/components/test_raftstore_macro/src/lib.rs index 3c8239d9f3b..39db5427bc2 100644 --- a/components/test_raftstore_macro/src/lib.rs +++ b/components/test_raftstore_macro/src/lib.rs @@ -75,7 +75,7 @@ fn render_test_cases(test_cases: Vec, fn_item: ItemFn) -> TokenStr 0, syn::parse( quote! { - use #package::{util::*, #method as new_cluster}; + use #package::{util::*, #method as new_cluster, Simulator}; } .into(), ) diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index cf38dcc92ce..76caaf18490 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -11,9 +11,8 @@ use engine_traits::RaftEngineReadOnly; use futures::executor::block_on; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState}; use raft::eraftpb::MessageType; -use test_raftstore::{Simulator as S1, *}; +use test_raftstore::*; use test_raftstore_macro::test_case; -use test_raftstore_v2::Simulator as S2; use tikv::storage::config::EngineType; use tikv_util::{config::ReadableDuration, future::block_on_timeout, HandyRwLock}; use txn_types::{Key, Lock, LockType}; diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index b6d46ce5dd4..75eb62bab99 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -16,9 +16,8 @@ use grpcio::{ChannelBuilder, Environment}; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use pd_client::PdClient; use raft::eraftpb::MessageType; -use test_raftstore::{Simulator as S1, *}; +use test_raftstore::*; use test_raftstore_macro::test_case; -use test_raftstore_v2::Simulator as S2; use tikv::storage::Snapshot; use tikv_util::{ config::{ReadableDuration, ReadableSize}, diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2ab5e4700dc..75384f3ebe3 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -221,6 +221,7 @@ fn test_serde_custom_tikv_config() { consistency_check_interval: ReadableDuration::secs(12), report_region_flow_interval: ReadableDuration::minutes(12), raft_store_max_leader_lease: ReadableDuration::secs(12), + allow_unsafe_vote_after_start: false, right_derive_when_split: false, allow_remove_leader: true, merge_max_log_gap: 3, diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 46af67a8cad..60c87fd4e00 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -10,7 +10,7 @@ use std::{ }; use engine_rocks::RocksSnapshot; -use kvproto::metapb; +use kvproto::{metapb, raft_serverpb::RaftMessage}; use more_asserts::assert_le; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; @@ -829,3 +829,83 @@ fn test_node_local_read_renew_lease() { thread::sleep(request_wait); } } + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_lease_restart_during_isolation() { + let mut cluster = new_cluster(0, 3); + let election_timeout = configure_for_lease_read(&mut cluster.cfg, Some(500), Some(3)); + cluster.cfg.raft_store.allow_unsafe_vote_after_start = false; + cluster.run(); + sleep_ms(election_timeout.as_millis() as _); + let mut region; + let start = Instant::now_coarse(); + let key = b"k"; + loop { + region = cluster.get_region(key); + if region.get_peers().len() == 3 + && region + .get_peers() + .iter() + .all(|p| p.get_role() == metapb::PeerRole::Voter) + { + break; + } + if start.saturating_elapsed() > Duration::from_secs(5) { + panic!("timeout"); + } + } + + let region_id = region.get_id(); + let peer1 = find_peer(®ion, 1).unwrap(); + let peer2 = find_peer(®ion, 2).unwrap(); + + cluster.must_put(key, b"v0"); + cluster.must_transfer_leader(region_id, peer1.clone()); + must_read_on_peer(&mut cluster, peer1.clone(), region.clone(), key, b"v0"); + cluster.must_transfer_leader(region_id, peer2.clone()); + must_read_on_peer(&mut cluster, peer2.clone(), region.clone(), key, b"v0"); + + cluster.add_send_filter(IsolationFilterFactory::new(2)); + + // Restart node 3. + cluster.stop_node(3); + cluster.run_node(3).unwrap(); + + // Let peer1 start election. + let mut timeout = RaftMessage::default(); + timeout.mut_message().set_to(peer1.get_id()); + timeout + .mut_message() + .set_msg_type(MessageType::MsgTimeoutNow); + timeout + .mut_message() + .set_msg_type(MessageType::MsgTimeoutNow); + timeout.set_region_id(region.get_id()); + timeout.set_from_peer(peer2.clone()); + timeout.set_to_peer(peer1.clone()); + timeout.set_region_epoch(region.get_region_epoch().clone()); + cluster.send_raft_msg(timeout).unwrap(); + + let (tx, rx) = mpsc::channel(); + let append_resp_notifier = Box::new(MessageTypeNotifier::new( + MessageType::MsgAppendResponse, + tx, + Arc::from(AtomicBool::new(true)), + )); + cluster.sim.wl().add_send_filter(3, append_resp_notifier); + let timeout = Duration::from_secs(5); + rx.recv_timeout(timeout).unwrap(); + + let mut put = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_put_cmd(key, b"v1")], + false, + ); + put.mut_header().set_peer(peer1.clone()); + let resp = cluster.call_command(put, timeout).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + must_read_on_peer(&mut cluster, peer1.clone(), region.clone(), key, b"v1"); + must_error_read_on_peer(&mut cluster, peer2.clone(), region.clone(), key, timeout); +} diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index fb9772c7189..afc0c9afab4 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -13,7 +13,6 @@ use raft::eraftpb::{ConfChangeType, MessageType}; use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; use test_raftstore_macro::test_case; -use test_raftstore_v2::Simulator as _; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 441e14cf3d8..50df1975dc3 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -19,7 +19,6 @@ use raft::eraftpb::MessageType; use raftstore::{store::ReadIndexContext, Result}; use test_raftstore::{Simulator as S1, *}; use test_raftstore_macro::test_case; -use test_raftstore_v2::Simulator as S2; use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; use txn_types::{Key, Lock, LockType}; use uuid::Uuid; diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index ee64d1a1b1d..04f7bb6044d 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -23,7 +23,6 @@ use raftstore::{ use raftstore_v2::router::QueryResult; use test_raftstore::*; use test_raftstore_macro::test_case; -use test_raftstore_v2::Simulator as S2; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, future::block_on_timeout}; use txn_types::{Key, LastChange, PessimisticLock}; From ccdb2c8e3a489cf4d2bcfa56c8528e481d437eda Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 17 Jul 2023 13:25:54 +0800 Subject: [PATCH 0797/1149] fix issue template (#15138) --- .github/ISSUE_TEMPLATE/bug-report.md | 2 +- .github/ISSUE_TEMPLATE/feature-request.md | 2 +- .github/ISSUE_TEMPLATE/question.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 6b414a80ed1..a8f9d04cb2a 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -1,7 +1,7 @@ --- name: "\U0001F41B Bug Report" about: Something isn't working as expected -label: type/bug +labels: type/bug --- ## Bug Report diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md index ad62241e4d0..3a608c54e52 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -1,7 +1,7 @@ --- name: "\U0001F680 Feature Request" about: As a user, I want to request a New Feature on the product. -label: type/feature-request +labels: type/feature-request --- ## Feature Request diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index 2e86d240a9f..57cb175d52a 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -1,7 +1,7 @@ --- name: "\U0001F914 Question" about: Usage question that isn't answered in docs or discussion -label: "T: Question" +labels: "type/question" --- ## Question From de16ac00be4510138f7aef18b8ee1004a3f5083d Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 17 Jul 2023 14:54:45 +0800 Subject: [PATCH 0798/1149] tests: stable tests (#15129) close tikv/tikv#15114 * stables test_gc_removed_peer * stables test_node_split_during_read_index Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- tests/integrations/raftstore/test_life.rs | 26 ++++++++++++++++--- .../raftstore/test_split_region.rs | 4 +-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs index fff3f0dea62..e940ca30a7c 100644 --- a/tests/integrations/raftstore/test_life.rs +++ b/tests/integrations/raftstore/test_life.rs @@ -160,7 +160,7 @@ fn test_gc_removed_peer() { }; cluster.add_send_filter(factory); - let must_gc_peer = |to_peer: kvproto::metapb::Peer| { + let check_gc_peer = |to_peer: kvproto::metapb::Peer, timeout| -> bool { let epoch = cluster.get_region_epoch(region_id); let mut msg = RaftMessage::default(); msg.set_is_tombstone(true); @@ -177,13 +177,19 @@ fn test_gc_removed_peer() { check_peer.set_check_region_epoch(epoch); cluster.sim.wl().send_raft_msg(msg.clone()).unwrap(); - let gc_resp = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + let Ok(gc_resp) = rx.recv_timeout(timeout) else { + return false; + }; assert_eq!(gc_resp.get_region_id(), region_id); assert_eq!(*gc_resp.get_from_peer(), to_peer); + true }; // Mock gc a peer that has been removed before creation. - must_gc_peer(new_learner_peer(2, 5)); + assert!(check_gc_peer( + new_learner_peer(2, 5), + Duration::from_secs(5) + )); cluster .pd_client @@ -198,5 +204,17 @@ fn test_gc_removed_peer() { cluster.wait_peer_state(region_id, 2, PeerState::Tombstone); // Mock gc peer request. GC learner(2, 4). - must_gc_peer(new_learner_peer(2, 4)); + let start = Instant::now(); + loop { + if check_gc_peer(new_learner_peer(2, 4), Duration::from_millis(200)) { + return; + } + if start.saturating_elapsed() > Duration::from_secs(5) { + break; + } + } + assert!(check_gc_peer( + new_learner_peer(2, 4), + Duration::from_millis(200) + )); } diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 04f7bb6044d..d10645c44fa 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1474,14 +1474,14 @@ fn test_node_split_during_read_index() { true, ); request.mut_header().set_peer(new_peer(1, 1)); - let (msg, sub) = raftstore_v2::router::PeerMsg::raft_query(request.clone()); + let (msg, sub) = raftstore_v2::router::PeerMsg::raft_query(request); cluster .sim .rl() .async_peer_msg_on_node(1, region.get_id(), msg) .unwrap(); - cluster.split_region(®ion, b"a", Callback::None); + cluster.must_split(®ion, b"a"); // Enable read index cluster.clear_recv_filter_on_node(2); From c27b43018ca83bd4103abf627c50293b841396bd Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 17 Jul 2023 17:59:15 +0800 Subject: [PATCH 0799/1149] raftstore & raftstore-v2:control grpc server according to slowness. (#15088) close tikv/tikv#15086 Make raftstore & raftstore-v2 able to control the grpc service, according to the slowness. Signed-off-by: lucasliang Co-authored-by: tonyxuqqi --- Cargo.lock | 20 ++- Cargo.toml | 3 + cmd/tikv-server/src/main.rs | 7 +- components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/batch/store.rs | 3 + components/raftstore-v2/src/worker/pd/mod.rs | 6 + .../raftstore-v2/src/worker/pd/store.rs | 24 ++++ .../tests/integrations/cluster.rs | 2 + components/raftstore/Cargo.toml | 1 + components/raftstore/src/store/fsm/peer.rs | 4 +- components/raftstore/src/store/fsm/store.rs | 5 + components/raftstore/src/store/worker/pd.rs | 49 +++++++ components/server/Cargo.toml | 2 + components/server/src/server.rs | 81 +++++++++-- components/server/src/server2.rs | 86 ++++++++++-- components/server/src/setup.rs | 4 + components/server/src/signal_handler.rs | 9 +- components/service/Cargo.toml | 11 ++ components/service/src/lib.rs | 4 + components/service/src/service_event.rs | 22 +++ components/service/src/service_manager.rs | 80 +++++++++++ components/test_raftstore-v2/Cargo.toml | 1 + components/test_raftstore-v2/src/node.rs | 4 + components/test_raftstore-v2/src/server.rs | 2 + components/test_raftstore/Cargo.toml | 1 + components/test_raftstore/src/node.rs | 2 + components/test_raftstore/src/server.rs | 2 + components/tikv_util/src/lib.rs | 13 ++ src/server/node.rs | 5 + src/server/raftkv2/node.rs | 5 + src/server/server.rs | 130 ++++++++++++++---- src/server/status_server/mod.rs | 97 +++++++++++++ tests/Cargo.toml | 2 + tests/failpoints/cases/test_kv_service.rs | 2 + .../integrations/config/dynamic/raftstore.rs | 2 + .../integrations/raftstore/test_bootstrap.rs | 2 + tests/integrations/server/kv_service.rs | 2 + tests/integrations/server/mod.rs | 1 + tests/integrations/server/server.rs | 82 +++++++++++ tests/integrations/server/status_server.rs | 2 + 40 files changed, 726 insertions(+), 55 deletions(-) create mode 100644 components/service/Cargo.toml create mode 100644 components/service/src/lib.rs create mode 100644 components/service/src/service_event.rs create mode 100644 components/service/src/service_manager.rs create mode 100644 tests/integrations/server/server.rs diff --git a/Cargo.lock b/Cargo.lock index 859c69b6a09..ecdd12c9507 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2966,7 +2966,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#f32671394cc43c7558e23fb953872e6b09a3dac2" +source = "git+https://github.com/pingcap/kvproto.git#92075f193814dbc00f347b4589743a36bf624f34" dependencies = [ "futures 0.3.15", "grpcio", @@ -4614,6 +4614,7 @@ dependencies = [ "serde", "serde_derive", "serde_with", + "service", "slog", "slog-global", "smallvec", @@ -4664,6 +4665,7 @@ dependencies = [ "rand 0.8.5", "resource_control", "resource_metering", + "service", "slog", "slog-global", "smallvec", @@ -5550,6 +5552,7 @@ dependencies = [ "engine_rocks_helper", "engine_traits", "error_code", + "fail", "file_system", "fs2", "futures 0.3.15", @@ -5574,6 +5577,7 @@ dependencies = [ "resource_metering", "security", "serde_json", + "service", "signal-hook", "slog", "slog-global", @@ -5588,6 +5592,15 @@ dependencies = [ "yatp", ] +[[package]] +name = "service" +version = "0.0.1" +dependencies = [ + "atomic", + "crossbeam", + "tikv_util", +] + [[package]] name = "sha2" version = "0.9.1" @@ -6251,6 +6264,7 @@ dependencies = [ "resource_metering", "security", "server", + "service", "slog", "slog-global", "tempfile", @@ -6298,6 +6312,7 @@ dependencies = [ "resource_metering", "security", "server", + "service", "slog", "slog-global", "tempfile", @@ -6423,6 +6438,8 @@ dependencies = [ "resource_metering", "security", "serde_json", + "server", + "service", "slog", "slog-global", "sst_importer", @@ -6750,6 +6767,7 @@ dependencies = [ "serde_derive", "serde_ignored", "serde_json", + "service", "slog", "slog-global", "smallvec", diff --git a/Cargo.toml b/Cargo.toml index 9141e5b770d..a2bbb19e787 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -128,6 +128,7 @@ serde = { version = "1.0", features = ["derive"] } serde_derive = "1.0" serde_ignored = "0.1" serde_json = { version = "1.0", features = ["preserve_order"] } +service = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } smallvec = "1.4" @@ -254,6 +255,7 @@ members = [ "components/resource_metering", "components/security", "components/server", + "components/service", "components/snap_recovery", "components/sst_importer", "components/test_backup", @@ -331,6 +333,7 @@ resource_control = { path = "components/resource_control" } resource_metering = { path = "components/resource_metering" } security = { path = "components/security" } server = { path = "components/server" } +service = { path = "components/service" } snap_recovery = { path = "components/snap_recovery" } sst_importer = { path = "components/sst_importer", default-features = false } test_backup = { path = "components/test_backup" } diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index e64afdf1868..9fdcad81c58 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -217,8 +217,11 @@ fn main() { process::exit(1) } + let (service_event_tx, service_event_rx) = tikv_util::mpsc::unbounded(); // pipe for controling service match config.storage.engine { - EngineType::RaftKv => server::server::run_tikv(config), - EngineType::RaftKv2 => server::server2::run_tikv(config), + EngineType::RaftKv => server::server::run_tikv(config, service_event_tx, service_event_rx), + EngineType::RaftKv2 => { + server::server2::run_tikv(config, service_event_tx, service_event_rx) + } } } diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 4279ed12726..d281c0eca69 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -55,6 +55,7 @@ raftstore = { workspace = true } rand = "0.8.3" resource_control = { workspace = true } resource_metering = { workspace = true } +service = { workspace = true } slog = "2.3" smallvec = "1.4" sst_importer = { workspace = true } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 39a28ea1a0e..44a623ed9f6 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -40,6 +40,7 @@ use raftstore::{ }, }; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use slog::{warn, Logger}; use sst_importer::SstImporter; use tikv_util::{ @@ -678,6 +679,7 @@ impl StoreSystem { pd_worker: LazyWorker, sst_importer: Arc, key_manager: Option>, + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> where T: Transport + 'static, @@ -793,6 +795,7 @@ impl StoreSystem { auto_split_controller, store_meta.lock().unwrap().region_read_progress.clone(), collector_reg_handle, + grpc_service_mgr, self.logger.clone(), self.shutdown.clone(), cfg.clone(), diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 1f4e09a1a71..4e97f49c261 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -19,6 +19,7 @@ use raftstore::store::{ WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }; use resource_metering::{Collector, CollectorRegHandle, RawRecords}; +use service::service_manager::GrpcServiceManager; use slog::{error, warn, Logger}; use tikv_util::{ config::VersionTrack, @@ -217,6 +218,9 @@ where // For slowness detection slowness_stats: slowness::SlownessStatistics, + // For grpc server. + grpc_service_manager: GrpcServiceManager, + logger: Logger, shutdown: Arc, cfg: Arc>, @@ -242,6 +246,7 @@ where auto_split_controller: AutoSplitController, region_read_progress: RegionReadProgressRegistry, collector_reg_handle: CollectorRegHandle, + grpc_service_manager: GrpcServiceManager, logger: Logger, shutdown: Arc, cfg: Arc>, @@ -279,6 +284,7 @@ where concurrency_manager, causal_ts_provider, slowness_stats, + grpc_service_manager, logger, shutdown, cfg, diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 8601e9ac1b2..f55484b3910 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -231,6 +231,8 @@ where stats.set_cpu_usages(self.store_stat.store_cpu_usages.clone().into()); stats.set_read_io_rates(self.store_stat.store_read_io_rates.clone().into()); stats.set_write_io_rates(self.store_stat.store_write_io_rates.clone().into()); + // Update grpc server status + stats.set_is_grpc_paused(self.grpc_service_manager.is_paused()); let mut interval = pdpb::TimeInterval::default(); interval.set_start_timestamp(self.store_stat.last_report_ts.into_inner()); @@ -268,6 +270,7 @@ where let resp = self.pd_client.store_heartbeat(stats, None, None); let logger = self.logger.clone(); + let mut grpc_service_manager = self.grpc_service_manager.clone(); let f = async move { match resp.await { Ok(mut resp) => { @@ -282,6 +285,27 @@ where "Ignored AwakenRegions in raftstore-v2 as no hibernated regions in raftstore-v2" ); } + // Control grpc server. + else if let Some(op) = resp.control_grpc.take() { + info!(logger, "forcely control grpc server"; + "is_grpc_server_paused" => grpc_service_manager.is_paused(), + "event" => ?op, + ); + match op.get_ctrl_event() { + pdpb::ControlGrpcEvent::Pause => { + if let Err(e) = grpc_service_manager.pause() { + warn!(logger, "failed to send service event to PAUSE grpc server"; + "err" => ?e); + } + } + pdpb::ControlGrpcEvent::Resume => { + if let Err(e) = grpc_service_manager.resume() { + warn!(logger, "failed to send service event to RESUME grpc server"; + "err" => ?e); + } + } + } + } } Err(e) => { error!(logger, "store heartbeat failed"; "err" => ?e); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index fcb379add1f..710286f8e13 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -45,6 +45,7 @@ use raftstore_v2::{ Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, }; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use slog::{debug, o, Logger}; use sst_importer::SstImporter; use tempfile::TempDir; @@ -353,6 +354,7 @@ impl RunningState { pd_worker, importer, key_manager, + GrpcServiceManager::dummy(), ) .unwrap(); diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index e09199bcaa4..27380a52882 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -77,6 +77,7 @@ resource_metering = { workspace = true } serde = "1.0" serde_derive = "1.0" serde_with = "1.4" +service = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } smallvec = "1.4" diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 2bb67a55deb..afcd35e2496 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2824,7 +2824,9 @@ where } } // It's v2 only message and ignore does no harm. - ExtraMessageType::MsgGcPeerResponse | ExtraMessageType::MsgFlushMemtable => (), + ExtraMessageType::MsgGcPeerResponse + | ExtraMessageType::MsgFlushMemtable + | ExtraMessageType::MsgRefreshBuckets => (), } } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index f937505a82e..fd3ad648db7 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -45,6 +45,7 @@ use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use sst_importer::SstImporter; use tikv_alloc::trace::TraceEvent; use tikv_util::{ @@ -1576,6 +1577,7 @@ impl RaftBatchSystem { collector_reg_handle: CollectorRegHandle, health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> { assert!(self.workers.is_none()); // TODO: we can get cluster meta regularly too later. @@ -1715,6 +1717,7 @@ impl RaftBatchSystem { health_service, causal_ts_provider, snap_generator_pool, + grpc_service_mgr, )?; Ok(()) } @@ -1733,6 +1736,7 @@ impl RaftBatchSystem { health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 snap_generator_pool: FuturePool, + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> { let cfg = builder.cfg.value().clone(); let store = builder.store.clone(); @@ -1824,6 +1828,7 @@ impl RaftBatchSystem { health_service, coprocessor_host, causal_ts_provider, + grpc_service_mgr, ); assert!(workers.pd_worker.start_with_timer(pd_runner)); diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index a16003c9bcf..51195d727a6 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -36,6 +36,7 @@ use pd_client::{metrics::*, BucketStat, Error, PdClient, RegionStat}; use prometheus::local::LocalHistogram; use raft::eraftpb::ConfChangeType; use resource_metering::{Collector, CollectorGuard, CollectorRegHandle, RawRecords}; +use service::service_manager::GrpcServiceManager; use tikv_util::{ box_err, debug, error, info, metrics::ThreadInfoStatistics, @@ -198,6 +199,7 @@ where min_resolved_ts: u64, }, ReportBuckets(BucketStat), + ControlGrpcServer(pdpb::ControlGrpcEvent), } pub struct StoreStat { @@ -428,6 +430,9 @@ where Task::ReportBuckets(ref buckets) => { write!(f, "report buckets: {:?}", buckets) } + Task::ControlGrpcServer(ref event) => { + write!(f, "control grpc server: {:?}", event) + } } } } @@ -950,6 +955,9 @@ where curr_health_status: ServingStatus, coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 + + // Service manager for grpc service. + grpc_service_manager: GrpcServiceManager, } impl Runner @@ -973,6 +981,7 @@ where health_service: Option, coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 + grpc_service_manager: GrpcServiceManager, ) -> Runner { let store_heartbeat_interval = cfg.pd_store_heartbeat_tick_interval.0; let interval = store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT; @@ -1045,6 +1054,7 @@ where curr_health_status: ServingStatus::Serving, coprocessor_host, causal_ts_provider, + grpc_service_manager, } } @@ -1355,6 +1365,9 @@ where stats.set_slow_score(slow_score as u64); self.set_slow_trend_to_store_stats(&mut stats, total_query_num); + stats.set_is_grpc_paused(self.grpc_service_manager.is_paused()); + + let scheduler = self.scheduler.clone(); let router = self.router.clone(); let resp = self .pd_client @@ -1431,6 +1444,14 @@ where abnormal_stores: awaken_regions.get_abnormal_stores().to_vec(), }); } + // Control grpc server. + if let Some(op) = resp.control_grpc.take() { + if let Err(e) = + scheduler.schedule(Task::ControlGrpcServer(op.get_ctrl_event())) + { + warn!("fail to schedule control grpc task"; "err" => ?e); + } + } } Err(e) => { error!("store heartbeat failed"; "err" => ?e); @@ -1969,6 +1990,31 @@ where (interval_second >= self.store_heartbeat_interval.as_secs()) && (interval_second <= STORE_HEARTBEAT_DELAY_LIMIT) } + + fn handle_control_grpc_server(&mut self, event: pdpb::ControlGrpcEvent) { + info!("forcely control grpc server"; + "curr_health_status" => ?self.curr_health_status, + "event" => ?event, + ); + match event { + pdpb::ControlGrpcEvent::Pause => { + if let Err(e) = self.grpc_service_manager.pause() { + warn!("failed to send service event to PAUSE grpc server"; + "err" => ?e); + } else { + self.update_health_status(ServingStatus::NotServing); + } + } + pdpb::ControlGrpcEvent::Resume => { + if let Err(e) = self.grpc_service_manager.resume() { + warn!("failed to send service event to RESUME grpc server"; + "err" => ?e); + } else { + self.update_health_status(ServingStatus::Serving); + } + } + } + } } fn calculate_region_cpu_records( @@ -2222,6 +2268,9 @@ where Task::ReportBuckets(buckets) => { self.handle_report_region_buckets(buckets); } + Task::ControlGrpcServer(event) => { + self.handle_control_grpc_server(event); + } }; } diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index dc8360bede0..55da894c6e8 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -49,6 +49,7 @@ engine_rocks = { workspace = true } engine_rocks_helper = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } +fail = "0.5" file_system = { workspace = true } fs2 = "0.4" futures = "0.3" @@ -73,6 +74,7 @@ resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } serde_json = "1.0" +service = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } snap_recovery = { workspace = true } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index bb89f6f2333..3ca4c7bbd1b 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -75,6 +75,7 @@ use resource_control::{ ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, }; use security::SecurityManager; +use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; use tikv::{ config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, @@ -110,6 +111,7 @@ use tikv::{ use tikv_util::{ check_environment_variables, config::VersionTrack, + mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, thread_group::GroupProperties, @@ -129,9 +131,12 @@ use crate::{ }; #[inline] -fn run_impl(config: TikvConfig) { - let mut tikv = TikvServer::::init(config); - +fn run_impl( + config: TikvConfig, + service_event_tx: TikvMpsc::Sender, + service_event_rx: TikvMpsc::Receiver, +) { + let mut tikv = TikvServer::::init(config, service_event_tx.clone()); // Must be called after `TikvServer::init`. let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; let high_water = (tikv.core.config.memory_usage_high_water * memory_limit as f64) as u64; @@ -153,17 +158,45 @@ fn run_impl(config: TikvConfig) { tikv.run_status_server(); tikv.core.init_quota_tuning_task(tikv.quota_limiter.clone()); - signal_handler::wait_for_signal( - Some(tikv.engines.take().unwrap().engines), - tikv.kv_statistics.clone(), - tikv.raft_statistics.clone(), - ); + // Build a background worker for handling signals. + { + let engines = tikv.engines.take().unwrap().engines; + let kv_statistics = tikv.kv_statistics.clone(); + let raft_statistics = tikv.raft_statistics.clone(); + std::thread::spawn(move || { + signal_handler::wait_for_signal( + Some(engines), + kv_statistics, + raft_statistics, + Some(service_event_tx), + ) + }); + } + loop { + if let Ok(service_event) = service_event_rx.recv() { + match service_event { + ServiceEvent::PauseGrpc => { + tikv.pause(); + } + ServiceEvent::ResumeGrpc => { + tikv.resume(); + } + ServiceEvent::Exit => { + break; + } + } + } + } tikv.stop(); } /// Run a TiKV server. Returns when the server is shutdown by the user, in which /// case the server will be properly stopped. -pub fn run_tikv(config: TikvConfig) { +pub fn run_tikv( + config: TikvConfig, + service_event_tx: TikvMpsc::Sender, + service_event_rx: TikvMpsc::Receiver, +) { // Sets the global logger ASAP. // It is okay to use the config w/o `validate()`, // because `initial_logger()` handles various conditions. @@ -184,9 +217,9 @@ pub fn run_tikv(config: TikvConfig) { dispatch_api_version!(config.storage.api_version(), { if !config.raft_engine.enable { - run_impl::(config) + run_impl::(config, service_event_tx, service_event_rx) } else { - run_impl::(config) + run_impl::(config, service_event_tx, service_event_rx) } }) } @@ -220,6 +253,7 @@ struct TikvServer { causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, br_snap_recovery_mode: bool, // use for br snapshot recovery + grpc_service_mgr: GrpcServiceManager, } struct TikvEngines { @@ -248,7 +282,7 @@ where ER: RaftEngine, F: KvFormat, { - fn init(mut config: TikvConfig) -> TikvServer { + fn init(mut config: TikvConfig, tx: TikvMpsc::Sender) -> TikvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -407,6 +441,7 @@ where causal_ts_provider, tablet_registry: None, br_snap_recovery_mode: is_recovering_marked, + grpc_service_mgr: GrpcServiceManager::new(tx), } } @@ -930,6 +965,7 @@ where self.concurrency_manager.clone(), collector_reg_handle, self.causal_ts_provider.clone(), + self.grpc_service_mgr.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); @@ -1397,6 +1433,7 @@ where self.engines.as_ref().unwrap().engine.raft_extension(), self.core.store_path.clone(), self.resource_manager.clone(), + self.grpc_service_mgr.clone(), ) { Ok(status_server) => Box::new(status_server), Err(e) => { @@ -1432,6 +1469,26 @@ where self.core.to_stop.into_iter().for_each(|s| s.stop()); } + + fn pause(&mut self) { + let server = self.servers.as_mut().unwrap(); + if let Err(e) = server.server.pause() { + warn!( + "failed to pause the server"; + "err" => ?e + ); + } + } + + fn resume(&mut self) { + let server = self.servers.as_mut().unwrap(); + if let Err(e) = server.server.resume() { + warn!( + "failed to resume the server"; + "err" => ?e + ); + } + } } impl TikvServer { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 04976d42d1c..68aff0cf8c3 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -71,6 +71,7 @@ use resource_control::{ ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, }; use security::SecurityManager; +use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ config::{ loop_registry, ConfigController, ConfigurableDb, DbConfigManger, DbType, LogConfigManager, @@ -107,6 +108,7 @@ use tikv::{ use tikv_util::{ check_environment_variables, config::VersionTrack, + mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, thread_group::GroupProperties, @@ -126,8 +128,12 @@ use crate::{ }; #[inline] -fn run_impl(config: TikvConfig) { - let mut tikv = TikvServer::::init::(config); +fn run_impl( + config: TikvConfig, + service_event_tx: TikvMpsc::Sender, + service_event_rx: TikvMpsc::Receiver, +) { + let mut tikv = TikvServer::::init::(config, service_event_tx.clone()); // Must be called after `TikvServer::init`. let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; @@ -149,18 +155,45 @@ fn run_impl(config: TikvConfig) { tikv.run_status_server(); tikv.core.init_quota_tuning_task(tikv.quota_limiter.clone()); - // TODO: support signal dump stats - signal_handler::wait_for_signal( - None as Option>, - tikv.kv_statistics.clone(), - tikv.raft_statistics.clone(), - ); + // Build a background worker for handling signals. + { + let kv_statistics = tikv.kv_statistics.clone(); + let raft_statistics = tikv.raft_statistics.clone(); + // TODO: support signal dump stats + std::thread::spawn(move || { + signal_handler::wait_for_signal( + None as Option>, + kv_statistics, + raft_statistics, + Some(service_event_tx), + ) + }); + } + loop { + if let Ok(service_event) = service_event_rx.recv() { + match service_event { + ServiceEvent::PauseGrpc => { + tikv.pause(); + } + ServiceEvent::ResumeGrpc => { + tikv.resume(); + } + ServiceEvent::Exit => { + break; + } + } + } + } tikv.stop(); } /// Run a TiKV server. Returns when the server is shutdown by the user, in which /// case the server will be properly stopped. -pub fn run_tikv(config: TikvConfig) { +pub fn run_tikv( + config: TikvConfig, + service_event_tx: TikvMpsc::Sender, + service_event_rx: TikvMpsc::Receiver, +) { // Sets the global logger ASAP. // It is okay to use the config w/o `validate()`, // because `initial_logger()` handles various conditions. @@ -181,9 +214,9 @@ pub fn run_tikv(config: TikvConfig) { dispatch_api_version!(config.storage.api_version(), { if !config.raft_engine.enable { - run_impl::(config) + run_impl::(config, service_event_tx, service_event_rx) } else { - run_impl::(config) + run_impl::(config, service_event_tx, service_event_rx) } }) } @@ -219,6 +252,7 @@ struct TikvServer { resource_manager: Option>, causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, + grpc_service_mgr: GrpcServiceManager, } struct TikvEngines { @@ -239,7 +273,10 @@ impl TikvServer where ER: RaftEngine, { - fn init(mut config: TikvConfig) -> TikvServer { + fn init( + mut config: TikvConfig, + tx: TikvMpsc::Sender, + ) -> TikvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -363,6 +400,7 @@ where resource_manager, causal_ts_provider, tablet_registry: None, + grpc_service_mgr: GrpcServiceManager::new(tx), } } @@ -834,6 +872,7 @@ where &state, importer.clone(), self.core.encryption_key_manager.clone(), + self.grpc_service_mgr.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); @@ -1242,6 +1281,7 @@ where self.engines.as_ref().unwrap().engine.raft_extension(), self.core.store_path.clone(), self.resource_manager.clone(), + self.grpc_service_mgr.clone(), ) { Ok(status_server) => Box::new(status_server), Err(e) => { @@ -1347,6 +1387,28 @@ where self.core.to_stop.into_iter().for_each(|s| s.stop()); } + + fn pause(&mut self) { + let server = self.servers.as_mut().unwrap(); + let r = server.server.pause(); + if let Err(e) = r { + warn!( + "failed to pause the server"; + "err" => ?e + ); + } + } + + fn resume(&mut self) { + let server = self.servers.as_mut().unwrap(); + let r = server.server.resume(); + if let Err(e) = r { + warn!( + "failed to resume the server"; + "err" => ?e + ); + } + } } impl TikvServer { diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index 5742eda8bc8..b758b9e39df 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -10,6 +10,7 @@ use std::{ use chrono::Local; use clap::ArgMatches; use collections::HashMap; +use fail; use tikv::config::{check_critical_config, persist_config, MetricConfig, TikvConfig}; use tikv_util::{self, config, logger}; @@ -75,6 +76,9 @@ fn make_engine_log_path(path: &str, sub_path: &str, filename: &str) -> String { #[allow(dead_code)] pub fn initial_logger(config: &TikvConfig) { + fail::fail_point!("mock_force_uninitial_logger", |_| { + LOG_INITIALIZED.store(false, Ordering::SeqCst); + }); let rocksdb_info_log_path = if !config.rocksdb.info_log_dir.is_empty() { make_engine_log_path(&config.rocksdb.info_log_dir, "", DEFAULT_ROCKSDB_LOG_FILE) } else { diff --git a/components/server/src/signal_handler.rs b/components/server/src/signal_handler.rs index 0977a1ed814..0644bb1b13a 100644 --- a/components/server/src/signal_handler.rs +++ b/components/server/src/signal_handler.rs @@ -10,11 +10,12 @@ pub use self::imp::wait_for_signal; #[cfg(unix)] mod imp { use engine_traits::MiscExt; + use service::service_event::ServiceEvent; use signal_hook::{ consts::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}, iterator::Signals, }; - use tikv_util::metrics; + use tikv_util::{metrics, mpsc as TikvMpsc}; use super::*; @@ -23,12 +24,18 @@ mod imp { engines: Option>, kv_statistics: Option>, raft_statistics: Option>, + service_event_tx: Option>, ) { let mut signals = Signals::new([SIGTERM, SIGINT, SIGHUP, SIGUSR1, SIGUSR2]).unwrap(); for signal in &mut signals { match signal { SIGTERM | SIGINT | SIGHUP => { info!("receive signal {}, stopping server...", signal); + if let Some(tx) = service_event_tx { + if let Err(e) = tx.send(ServiceEvent::Exit) { + warn!("failed to notify grpc server exit, {:?}", e); + } + } break; } SIGUSR1 => { diff --git a/components/service/Cargo.toml b/components/service/Cargo.toml new file mode 100644 index 00000000000..d21867f3f85 --- /dev/null +++ b/components/service/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "service" +version = "0.0.1" +license = "Apache-2.0" +edition = "2021" +publish = false + +[dependencies] +atomic = "0.5" +crossbeam = "0.8" +tikv_util = { workspace = true } diff --git a/components/service/src/lib.rs b/components/service/src/lib.rs new file mode 100644 index 00000000000..c07748bf408 --- /dev/null +++ b/components/service/src/lib.rs @@ -0,0 +1,4 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +pub mod service_event; +pub mod service_manager; diff --git a/components/service/src/service_event.rs b/components/service/src/service_event.rs new file mode 100644 index 00000000000..f9423f28df5 --- /dev/null +++ b/components/service/src/service_event.rs @@ -0,0 +1,22 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::fmt; + +/// Service Status enum +pub enum ServiceEvent { + // For grpc service. + PauseGrpc, + ResumeGrpc, + // ... + Exit, +} + +impl fmt::Debug for ServiceEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ServiceEvent::PauseGrpc => f.debug_tuple("PauseGrpc").finish(), + ServiceEvent::ResumeGrpc => f.debug_tuple("ResumeGrpc").finish(), + ServiceEvent::Exit => f.debug_tuple("Exit").finish(), + } + } +} diff --git a/components/service/src/service_manager.rs b/components/service/src/service_manager.rs new file mode 100644 index 00000000000..0b69fc00c93 --- /dev/null +++ b/components/service/src/service_manager.rs @@ -0,0 +1,80 @@ +use std::sync::{atomic::Ordering, Arc}; + +use atomic::Atomic; +use crossbeam::channel::SendError; +use tikv_util::mpsc; + +use crate::service_event::ServiceEvent; + +#[repr(u8)] +#[derive(Debug, Copy, Clone, PartialEq)] +enum GrpcServiceStatus { + Init, + Serving, + NotServing, +} + +#[derive(Clone)] +pub struct GrpcServiceManager { + status: Arc>, + service_router: mpsc::Sender, +} + +impl GrpcServiceManager { + fn build(router: mpsc::Sender, status: GrpcServiceStatus) -> Self { + Self { + status: Arc::new(Atomic::new(status)), + service_router: router, + } + } + + /// Generate a formal GrpcServiceManager. + pub fn new(router: mpsc::Sender) -> Self { + Self::build(router, GrpcServiceStatus::Serving) + } + + /// Only for test. + /// Generate a dummy GrpcServiceManager. + pub fn dummy() -> Self { + let (router, _) = mpsc::unbounded(); + Self::build(router, GrpcServiceStatus::Init) + } + + /// Send message to outer handler to notify PAUSE grpc server. + pub fn pause(&mut self) -> Result<(), SendError> { + if self.is_paused() { + // Already in PAUSE. + return Ok(()); + } + let result = self.service_router.send(ServiceEvent::PauseGrpc); + if result.is_ok() { + self.status + .store(GrpcServiceStatus::NotServing, Ordering::Relaxed); + } + result + } + + /// Send message to outer handler to notify RESUME grpc server. + pub fn resume(&mut self) -> Result<(), SendError> { + if self.is_serving() { + // Already in RESUME. + return Ok(()); + } + let result = self.service_router.send(ServiceEvent::ResumeGrpc); + if result.is_ok() { + self.status + .store(GrpcServiceStatus::Serving, Ordering::Relaxed); + } + result + } + + #[inline] + pub fn is_paused(&self) -> bool { + self.status.load(Ordering::Relaxed) == GrpcServiceStatus::NotServing + } + + #[inline] + fn is_serving(&self) -> bool { + self.status.load(Ordering::Relaxed) == GrpcServiceStatus::Serving + } +} diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 5d60b7a82b8..14d2192852a 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -54,6 +54,7 @@ resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } server = { workspace = true } +service = { workspace = true } slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index d0e4c37cc49..18d99ae8f4c 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -33,6 +33,7 @@ use raftstore_v2::{ }; use resource_control::ResourceGroupManager; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{Config, Filter}; @@ -47,6 +48,7 @@ use tikv::{ use tikv_util::{ box_err, config::VersionTrack, + mpsc, worker::{Builder as WorkerBuilder, LazyWorker}, }; @@ -299,6 +301,7 @@ impl Simulator for NodeCluster { ) }; + let (sender, _) = mpsc::unbounded(); let bg_worker = WorkerBuilder::new("background").thread_count(2).create(); let state: Arc> = Arc::default(); node.start( @@ -318,6 +321,7 @@ impl Simulator for NodeCluster { &state, importer, key_manager, + GrpcServiceManager::new(sender), )?; assert!( raft_engine diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 9b792d9f5bb..7d11aebd784 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -43,6 +43,7 @@ use raftstore_v2::{router::RaftRouter, StateStorage, StoreMeta, StoreRouter}; use resource_control::ResourceGroupManager; use resource_metering::{CollectorRegHandle, ResourceTagFactory}; use security::SecurityManager; +use service::service_manager::GrpcServiceManager; use slog_global::debug; use tempfile::TempDir; use test_pd_client::TestPdClient; @@ -663,6 +664,7 @@ impl ServerCluster { &state, importer, key_manager, + GrpcServiceManager::dummy(), )?; assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index ddd9ea33aaa..d48acc4e92b 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -53,6 +53,7 @@ resource_control = { workspace = true } resource_metering = { workspace = true } security = { workspace = true } server = { workspace = true } +service = { workspace = true } slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 44cff8d9413..3f0e9512dc3 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -32,6 +32,7 @@ use raftstore::{ }; use resource_control::ResourceGroupManager; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -333,6 +334,7 @@ impl Simulator for NodeCluster { cm, CollectorRegHandle::new_for_test(), None, + GrpcServiceManager::dummy(), )?; assert!( engines diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index ab2da452bcc..09aa5b5fa6a 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -45,6 +45,7 @@ use raftstore::{ use resource_control::ResourceGroupManager; use resource_metering::{CollectorRegHandle, ResourceTagFactory}; use security::SecurityManager; +use service::service_manager::GrpcServiceManager; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -607,6 +608,7 @@ impl ServerCluster { concurrency_manager.clone(), collector_reg_handle, causal_ts_provider, + GrpcServiceManager::dummy(), )?; assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index fd294a08d34..b8aa578a878 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -345,6 +345,19 @@ impl Either { _ => None, } } + + #[inline] + pub fn is_left(&self) -> bool { + match *self { + Either::Left(_) => true, + Either::Right(_) => false, + } + } + + #[inline] + pub fn is_right(&self) -> bool { + !self.is_left() + } } impl AsRef for Either diff --git a/src/server/node.rs b/src/server/node.rs index e36e980e1d3..d92d2423216 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -25,6 +25,7 @@ use raftstore::{ }, }; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use tikv_util::{ config::VersionTrack, worker::{LazyWorker, Scheduler, Worker}, @@ -172,6 +173,7 @@ where concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, causal_ts_provider: Option>, // used for rawkv apiv2 + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> where T: Transport + 'static, @@ -209,6 +211,7 @@ where concurrency_manager, collector_reg_handle, causal_ts_provider, + grpc_service_mgr, )?; Ok(()) @@ -456,6 +459,7 @@ where concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, causal_ts_provider: Option>, // used for rawkv apiv2 + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> where T: Transport + 'static, @@ -489,6 +493,7 @@ where collector_reg_handle, self.health_service.clone(), causal_ts_provider, + grpc_service_mgr, )?; Ok(()) } diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index e32fa28fd2b..71b0bf115d9 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -17,6 +17,7 @@ use raftstore::{ }; use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreRouter, StoreSystem}; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use slog::{info, o, Logger}; use sst_importer::SstImporter; use tikv_util::{ @@ -106,6 +107,7 @@ where state: &Mutex, sst_importer: Arc, key_manager: Option>, + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> where T: Transport + 'static, @@ -146,6 +148,7 @@ where store_cfg, sst_importer, key_manager, + grpc_service_mgr, )?; Ok(()) @@ -209,6 +212,7 @@ where store_cfg: Arc>, sst_importer: Arc, key_manager: Option>, + grpc_service_mgr: GrpcServiceManager, ) -> Result<()> where T: Transport + 'static, @@ -242,6 +246,7 @@ where pd_worker, sst_importer, key_manager, + grpc_service_mgr, )?; Ok(()) } diff --git a/src/server/server.rs b/src/server/server.rs index 26ce0b29bfc..6de8ceb48e2 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -53,6 +53,64 @@ pub const GRPC_THREAD_PREFIX: &str = "grpc-server"; pub const READPOOL_NORMAL_THREAD_PREFIX: &str = "store-read-norm"; pub const STATS_THREAD_PREFIX: &str = "transport-stats"; +pub trait GrpcBuilderFactory { + fn create_builder(&self, env: Arc) -> Result; +} + +struct BuilderFactory { + kv_service: S, + cfg: Arc>, + security_mgr: Arc, + health_service: HealthService, +} + +impl BuilderFactory +where + S: Tikv + Send + Clone + 'static, +{ + pub fn new( + kv_service: S, + cfg: Arc>, + security_mgr: Arc, + health_service: HealthService, + ) -> BuilderFactory { + BuilderFactory { + kv_service, + cfg, + security_mgr, + health_service, + } + } +} + +impl GrpcBuilderFactory for BuilderFactory +where + S: Tikv + Send + Clone + 'static, +{ + fn create_builder(&self, env: Arc) -> Result { + let addr = SocketAddr::from_str(&self.cfg.value().addr)?; + let ip: String = format!("{}", addr.ip()); + let mem_quota = ResourceQuota::new(Some("ServerMemQuota")) + .resize_memory(self.cfg.value().grpc_memory_pool_quota.0 as usize); + let channel_args = ChannelBuilder::new(Arc::clone(&env)) + .stream_initial_window_size(self.cfg.value().grpc_stream_initial_window_size.0 as i32) + .max_concurrent_stream(self.cfg.value().grpc_concurrent_stream) + .max_receive_message_len(-1) + .set_resource_quota(mem_quota) + .max_send_message_len(-1) + .http2_max_ping_strikes(i32::MAX) // For pings without data from clients. + .keepalive_time(self.cfg.value().grpc_keepalive_time.into()) + .keepalive_timeout(self.cfg.value().grpc_keepalive_timeout.into()) + .build_args(); + + let sb = ServerBuilder::new(Arc::clone(&env)) + .channel_args(channel_args) + .register_service(create_tikv(self.kv_service.clone())) + .register_service(create_health(self.health_service.clone())); + Ok(self.security_mgr.bind(sb, &ip, addr.port())) + } +} + /// The TiKV server /// /// It hosts various internal components, including gRPC, the raftstore router @@ -79,6 +137,7 @@ pub struct Server { debug_thread_pool: Arc, health_service: HealthService, timer: Handle, + builder_factory: Box, } impl Server @@ -141,30 +200,17 @@ where cfg.value().reject_messages_on_memory_ratio, resource_manager, ); + let builder_factory = Box::new(BuilderFactory::new( + kv_service, + cfg.clone(), + security_mgr.clone(), + health_service.clone(), + )); let addr = SocketAddr::from_str(&cfg.value().addr)?; - let ip = format!("{}", addr.ip()); let mem_quota = ResourceQuota::new(Some("ServerMemQuota")) .resize_memory(cfg.value().grpc_memory_pool_quota.0 as usize); - let channel_args = ChannelBuilder::new(Arc::clone(&env)) - .stream_initial_window_size(cfg.value().grpc_stream_initial_window_size.0 as i32) - .max_concurrent_stream(cfg.value().grpc_concurrent_stream) - .max_receive_message_len(-1) - .set_resource_quota(mem_quota.clone()) - .max_send_message_len(-1) - .http2_max_ping_strikes(i32::MAX) // For pings without data from clients. - .keepalive_time(cfg.value().grpc_keepalive_time.into()) - .keepalive_timeout(cfg.value().grpc_keepalive_timeout.into()) - .build_args(); - - let builder = { - let mut sb = ServerBuilder::new(Arc::clone(&env)) - .channel_args(channel_args) - .register_service(create_tikv(kv_service)) - .register_service(create_health(health_service.clone())); - sb = security_mgr.bind(sb, &ip, addr.port()); - Either::Left(sb) - }; + let builder = Either::Left(builder_factory.create_builder(env.clone())?); let conn_builder = ConnectionBuilder::new( env.clone(), @@ -195,6 +241,7 @@ where debug_thread_pool, health_service, timer: GLOBAL_TIMER_HANDLE.clone(), + builder_factory, }; Ok(svr) @@ -248,6 +295,15 @@ where Ok(addr) } + fn start_grpc(&mut self) { + info!("listening on addr"; "addr" => &self.local_addr); + let mut grpc_server = self.builder_or_server.take().unwrap().right().unwrap(); + grpc_server.start(); + self.builder_or_server = Some(Either::Right(grpc_server)); + self.health_service + .set_serving_status("", ServingStatus::Serving); + } + /// Starts the TiKV server. /// Notice: Make sure call `build_and_bind` first. pub fn start( @@ -280,10 +336,7 @@ where } } - let mut grpc_server = self.builder_or_server.take().unwrap().right().unwrap(); - info!("listening on addr"; "addr" => &self.local_addr); - grpc_server.start(); - self.builder_or_server = Some(Either::Right(grpc_server)); + self.start_grpc(); // Note this should be called only after grpc server is started. let mut grpc_load_stats = { @@ -323,8 +376,6 @@ where option_env!("TIKV_BUILD_GIT_HASH").unwrap_or("None"), ]) .set(startup_ts as i64); - self.health_service - .set_serving_status("", ServingStatus::Serving); info!("TiKV is ready to serve"); Ok(()) @@ -344,6 +395,33 @@ where Ok(()) } + pub fn pause(&mut self) -> Result<()> { + let start = Instant::now(); + // Prepare the builder for resume grpc server. And if the builder cannot be + // created, then pause will be skipped. + let builder = Either::Left(self.builder_factory.create_builder(self.env.clone())?); + if let Some(Either::Right(server)) = self.builder_or_server.take() { + drop(server); + } + self.health_service + .set_serving_status("", ServingStatus::NotServing); + self.builder_or_server = Some(builder); + info!("paused the grpc server"; "takes" => ?start.elapsed(),); + Ok(()) + } + + pub fn resume(&mut self) -> Result<()> { + if let Some(builder) = self.builder_or_server.as_ref() { + let start = Instant::now(); + assert!(builder.is_left()); + self.build_and_bind()?; + self.start_grpc(); + info!("resumed the grpc server"; "takes" => ?start.elapsed(),); + return Ok(()); + } + Err(Error::Other(box_err!("resume the grpc server is skipped."))) + } + // Return listening address, this may only be used for outer test // to get the real address because we may use "127.0.0.1:0" // in test to avoid port conflict. diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 2de6fee4a91..10623c54376 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -49,6 +49,7 @@ use resource_control::ResourceGroupManager; use security::{self, SecurityConfig}; use serde::Serialize; use serde_json::Value; +use service::service_manager::GrpcServiceManager; use tikv_kv::RaftExtension; use tikv_util::{ logger::set_log_level, @@ -93,6 +94,7 @@ pub struct StatusServer { security_config: Arc, store_path: PathBuf, resource_manager: Option>, + grpc_service_mgr: GrpcServiceManager, } impl StatusServer @@ -106,6 +108,7 @@ where router: R, store_path: PathBuf, resource_manager: Option>, + grpc_service_mgr: GrpcServiceManager, ) -> Result { let thread_pool = Builder::new_multi_thread() .enable_all() @@ -128,6 +131,7 @@ where security_config, store_path, resource_manager, + grpc_service_mgr, }) } @@ -440,6 +444,36 @@ impl StatusServer where R: 'static + Send + RaftExtension + Clone, { + async fn handle_pause_grpc( + mut grpc_service_mgr: GrpcServiceManager, + ) -> hyper::Result> { + if let Err(err) = grpc_service_mgr.pause() { + return Ok(make_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("fails to pause grpc: {}", err), + )); + } + Ok(make_response( + StatusCode::OK, + "Successfully pause grpc service", + )) + } + + async fn handle_resume_grpc( + mut grpc_service_mgr: GrpcServiceManager, + ) -> hyper::Result> { + if let Err(err) = grpc_service_mgr.resume() { + return Ok(make_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("fails to resume grpc: {}", err), + )); + } + Ok(make_response( + StatusCode::OK, + "Successfully resume grpc service", + )) + } + pub async fn dump_region_meta(req: Request, router: R) -> hyper::Result> { lazy_static! { static ref REGION: Regex = Regex::new(r"/region/(?P\d+)").unwrap(); @@ -553,6 +587,7 @@ where let router = self.router.clone(); let store_path = self.store_path.clone(); let resource_manager = self.resource_manager.clone(); + let grpc_service_mgr = self.grpc_service_mgr.clone(); // Start to serve. let server = builder.serve(make_service_fn(move |conn: &C| { let x509 = conn.get_x509(); @@ -561,6 +596,7 @@ where let router = router.clone(); let store_path = store_path.clone(); let resource_manager = resource_manager.clone(); + let grpc_service_mgr = grpc_service_mgr.clone(); async move { // Create a status service. Ok::<_, hyper::Error>(service_fn(move |req: Request| { @@ -570,6 +606,7 @@ where let router = router.clone(); let store_path = store_path.clone(); let resource_manager = resource_manager.clone(); + let grpc_service_mgr = grpc_service_mgr.clone(); async move { let path = req.uri().path().to_owned(); let method = req.method().to_owned(); @@ -650,6 +687,12 @@ where (Method::GET, "/resource_groups") => { Self::handle_get_all_resource_groups(resource_manager.as_ref()) } + (Method::PUT, "/pause_grpc") => { + Self::handle_pause_grpc(grpc_service_mgr).await + } + (Method::PUT, "/resume_grpc") => { + Self::handle_resume_grpc(grpc_service_mgr).await + } _ => Ok(make_response(StatusCode::NOT_FOUND, "path not found")), } } @@ -1032,6 +1075,7 @@ mod tests { use openssl::ssl::{SslConnector, SslFiletype, SslMethod}; use raftstore::store::region_meta::RegionMeta; use security::SecurityConfig; + use service::service_manager::GrpcServiceManager; use test_util::new_security_cfg; use tikv_kv::RaftExtension; use tikv_util::logger::get_log_level; @@ -1061,6 +1105,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1110,6 +1155,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1156,6 +1202,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1273,6 +1320,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1318,6 +1366,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1355,6 +1404,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1429,6 +1479,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1460,6 +1511,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1494,6 +1546,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1550,6 +1603,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1605,6 +1659,7 @@ mod tests { MockRouter, temp_dir.path().to_path_buf(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = "127.0.0.1:0".to_owned(); @@ -1628,4 +1683,46 @@ mod tests { status_server.stop(); } } + + #[test] + fn test_control_grpc_service() { + let mut multi_rocks_cfg = TikvConfig::default(); + multi_rocks_cfg.storage.engine = EngineType::RaftKv2; + let cfgs = [TikvConfig::default(), multi_rocks_cfg]; + for cfg in IntoIterator::into_iter(cfgs) { + let temp_dir = tempfile::TempDir::new().unwrap(); + let mut status_server = StatusServer::new( + 1, + ConfigController::new(cfg), + Arc::new(SecurityConfig::default()), + MockRouter, + temp_dir.path().to_path_buf(), + None, + GrpcServiceManager::dummy(), + ) + .unwrap(); + let addr = "127.0.0.1:0".to_owned(); + let _ = status_server.start(addr); + for req in ["/pause_grpc", "/resume_grpc"] { + let client = Client::new(); + let uri = Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query(req) + .build() + .unwrap(); + + let mut grpc_req = Request::default(); + *grpc_req.method_mut() = Method::PUT; + *grpc_req.uri_mut() = uri; + let handle = status_server.thread_pool.spawn(async move { + let res = client.request(grpc_req).await.unwrap(); + // Dummy grpc service manager, should return error. + assert_eq!(res.status(), StatusCode::INTERNAL_SERVER_ERROR); + }); + block_on(handle).unwrap(); + } + status_server.stop(); + } + } } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index be3a6792798..6c2b96cf642 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -97,6 +97,8 @@ raftstore = { workspace = true } raftstore-v2 = { workspace = true } rand = "0.8.3" resource_control = { workspace = true } +server = { workspace = true } +service = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } tempfile = "3.0" diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index a2615c1c7b2..f3831bb984b 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -100,4 +100,6 @@ fn test_undetermined_write_err() { .unwrap_err(); assert_eq!(err.to_string(), "RpcFailure: 1-CANCELLED CANCELLED",); fail::remove("applied_cb_return_undetermined_err"); + // The previous panic hasn't been captured. + assert!(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| drop(cluster))).is_err()); } diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 4a7ab76da8b..9d507c9980d 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -20,6 +20,7 @@ use raftstore::{ Result, }; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -112,6 +113,7 @@ fn start_raftstore( CollectorRegHandle::new_for_test(), None, None, + GrpcServiceManager::dummy(), ) .unwrap(); diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 312cb7c9c5c..e4df37c2e91 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -16,6 +16,7 @@ use raftstore::{ }; use raftstore_v2::router::PeerMsg; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use tempfile::Builder; use test_pd_client::{bootstrap_with_first_region, TestPdClient}; use test_raftstore::*; @@ -119,6 +120,7 @@ fn test_node_bootstrap_with_prepared_data() { ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), None, + GrpcServiceManager::dummy(), ) .unwrap(); assert!( diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index fbf4f349877..08f2bb6fa55 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -32,6 +32,7 @@ use raftstore::{ store::{fsm::store::StoreMeta, AutoSplitController, SnapManager}, }; use resource_metering::CollectorRegHandle; +use service::service_manager::GrpcServiceManager; use tempfile::Builder; use test_raftstore::*; use test_raftstore_macro::test_case; @@ -1409,6 +1410,7 @@ fn test_double_run_node() { ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), None, + GrpcServiceManager::dummy(), ) .unwrap_err(); assert!(format!("{:?}", e).contains("already started"), "{:?}", e); diff --git a/tests/integrations/server/mod.rs b/tests/integrations/server/mod.rs index fb813106cce..3bf842d2b9a 100644 --- a/tests/integrations/server/mod.rs +++ b/tests/integrations/server/mod.rs @@ -6,6 +6,7 @@ mod kv_service; mod lock_manager; mod raft_client; mod security; +mod server; mod status_server; use std::sync::Arc; diff --git a/tests/integrations/server/server.rs b/tests/integrations/server/server.rs new file mode 100644 index 00000000000..58a5af3a89e --- /dev/null +++ b/tests/integrations/server/server.rs @@ -0,0 +1,82 @@ +use std::{sync::Arc, time::Duration}; + +use grpcio::*; +use grpcio_health::{proto::HealthCheckRequest, HealthClient, ServingStatus}; +use service::service_event::ServiceEvent; +use test_pd::Server as MockServer; +use tikv::config::TikvConfig; + +#[test] +fn test_restart_grpc_service() { + fail::cfg("mock_force_uninitial_logger", "return").unwrap(); + let check_heath_api = |max_retry, client: &HealthClient| { + let req = HealthCheckRequest { + service: "".to_string(), + ..Default::default() + }; + for i in 0..max_retry { + let r = client.check(&req); + if r.is_err() { + assert!(i != max_retry - 1); + std::thread::sleep(Duration::from_millis(500)); + continue; + } + let resp = r.unwrap(); + assert_eq!(ServingStatus::Serving, resp.status); + break; + } + }; + let (service_event_tx, service_event_rx) = tikv_util::mpsc::unbounded(); + let sender = service_event_tx.clone(); + let addr = format!("127.0.0.1:{}", test_util::alloc_port()); + let grpc_addr = addr.clone(); + let tikv_thread = std::thread::spawn(move || { + let dir = test_util::temp_dir("test_run_tikv_server", true); + let mut pd_server = MockServer::new(1); + let eps = pd_server.bind_addrs(); + let mut config = TikvConfig::default(); + config.server.addr = grpc_addr; + config.log.level = slog::Level::Critical.into(); + config.log.file.filename = "".to_string(); + config.storage.data_dir = dir.path().to_str().unwrap().to_string(); + config.pd.endpoints = vec![format!("{}:{}", eps[0].0, eps[0].1)]; + server::server::run_tikv(config, service_event_tx, service_event_rx); + + pd_server.stop(); + }); + + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env).connect(&addr); + let client: HealthClient = HealthClient::new(channel); + let req = HealthCheckRequest { + service: "".to_string(), + ..Default::default() + }; + let max_retry = 30; + check_heath_api(max_retry, &client); + // PAUSE grpc service and validate. + { + let start = std::time::Instant::now(); + sender.send(ServiceEvent::PauseGrpc).unwrap(); + loop { + if start.elapsed() > Duration::from_secs(5) { + panic!(); + } + let resp = client.check(&req); + if resp.is_err() { + if let Err(Error::RpcFailure(status)) = resp { + assert_eq!(status.code(), RpcStatusCode::UNAVAILABLE); + } + break; + } + } + } + // RESUME grpc service and validate. + { + sender.send(ServiceEvent::ResumeGrpc).unwrap(); + check_heath_api(max_retry, &client); + } + sender.send(ServiceEvent::Exit).unwrap(); + tikv_thread.join().unwrap(); + fail::remove("mock_force_uninitial_logger"); +} diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index 1e3963ffdb7..3e1fbd4a9e8 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -5,6 +5,7 @@ use std::{error::Error, net::SocketAddr, sync::Arc}; use hyper::{body, Client, StatusCode, Uri}; use raftstore::store::region_meta::RegionMeta; use security::SecurityConfig; +use service::service_manager::GrpcServiceManager; use test_raftstore::new_server_cluster; use tikv::{config::ConfigController, server::status_server::StatusServer}; @@ -46,6 +47,7 @@ fn test_region_meta_endpoint() { router, std::env::temp_dir(), None, + GrpcServiceManager::dummy(), ) .unwrap(); let addr = format!("127.0.0.1:{}", test_util::alloc_port()); From c16cab2c29ba36af51826e9b7d4e8bd950e0c15e Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 18 Jul 2023 16:06:46 +0800 Subject: [PATCH 0800/1149] encryption: fix key collision issue in tablet snapshot (#15098) close tikv/tikv#15059 - Properly clean up encryption keys when cleaning up snapshot cache file. - Allow overwriting stale encryption keys when importing remote keys. Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- components/encryption/src/manager/mod.rs | 18 +++++++++++++++--- src/server/tablet_snap.rs | 13 +++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index b11152e1882..c47a127a1a5 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -1033,7 +1033,12 @@ impl<'a> DataKeyImporter<'a> { if let Entry::Vacant(e) = file_dict.files.entry(fname.to_owned()) { e.insert(file.clone()); } else { - return Err(box_err!("file name collides with existing file: {}", fname)); + // check for physical file. + if Path::new(fname).exists() { + return Err(box_err!("file name collides with existing file: {}", fname)); + } else { + warn!("overwriting existing unused encryption key"; "fname" => fname); + } } file_dict.files.len() as _ }; @@ -1825,10 +1830,17 @@ mod tests { let mut importer = DataKeyImporter::new(&manager); let file0 = manager.new_file("0").unwrap(); - // conflict + // conflict with actual file. + let f = tmp_dir.path().join("0").to_str().unwrap().to_owned(); + let _ = manager.new_file(&f).unwrap(); + File::create(&f).unwrap(); importer - .add("0", file0.iv.clone(), DataKey::default()) + .add(&f, file0.iv.clone(), DataKey::default()) .unwrap_err(); + // conflict with only key. + importer + .add("0", file0.iv.clone(), DataKey::default()) + .unwrap(); // same key importer .add( diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index fe0329ff9df..b8747d5b4b7 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -314,6 +314,9 @@ async fn cleanup_cache( } } fs::remove_file(entry.path())?; + if let Some(m) = key_manager { + m.delete_file(entry.path().to_str().unwrap(), None)?; + } } let mut missing = vec![]; loop { @@ -329,7 +332,10 @@ async fn cleanup_cache( continue; } // We should not write to the file directly as it's hard linked. - fs::remove_file(p)?; + fs::remove_file(&p)?; + if let Some(m) = key_manager { + m.delete_file(p.to_str().unwrap(), None)?; + } } missing.push(meta.file_name); } @@ -338,7 +344,10 @@ async fn cleanup_cache( } } for (_, p) in exists { - fs::remove_file(p)?; + fs::remove_file(&p)?; + if let Some(m) = key_manager { + m.delete_file(p.to_str().unwrap(), None)?; + } } let mut resp = TabletSnapshotResponse::default(); resp.mut_files().set_file_name(missing.clone().into()); From 4a7b2480536b3d0ad5b3d2810ec8aac3ba442064 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Tue, 18 Jul 2023 19:38:16 +0800 Subject: [PATCH 0801/1149] rawkv: fix ttl_checker for RawKV API v2 (#15143) close tikv/tikv#15142 rawkv: fix ttl_checker for RawKV API v2. Signed-off-by: Ping Yu Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../engine_rocks/src/mvcc_properties.rs | 4 +- components/engine_rocks/src/properties.rs | 5 + components/engine_rocks/src/ttl_properties.rs | 106 +++++++---- .../engine_traits/src/mvcc_properties.rs | 5 + .../engine_traits/src/ttl_properties.rs | 88 ++++++++- src/config/mod.rs | 12 +- src/server/gc_worker/compaction_filter.rs | 4 +- src/server/ttl/ttl_checker.rs | 2 +- src/server/ttl/ttl_compaction_filter.rs | 6 +- tests/failpoints/cases/test_ttl.rs | 170 +++++++++++------- 10 files changed, 287 insertions(+), 115 deletions(-) diff --git a/components/engine_rocks/src/mvcc_properties.rs b/components/engine_rocks/src/mvcc_properties.rs index ba1c61f0a88..99ac5fbe6b2 100644 --- a/components/engine_rocks/src/mvcc_properties.rs +++ b/components/engine_rocks/src/mvcc_properties.rs @@ -3,7 +3,7 @@ use engine_traits::{MvccProperties, MvccPropertiesExt, Result}; use txn_types::TimeStamp; -use crate::{decode_properties::DecodeProperties, RocksEngine, UserProperties}; +use crate::{decode_properties::DecodeProperties, RocksEngine, RocksTtlProperties, UserProperties}; pub const PROP_NUM_ERRORS: &str = "tikv.num_errors"; pub const PROP_MIN_TS: &str = "tikv.min_ts"; @@ -28,6 +28,7 @@ impl RocksMvccProperties { props.encode_u64(PROP_NUM_DELETES, mvcc_props.num_deletes); props.encode_u64(PROP_NUM_VERSIONS, mvcc_props.num_versions); props.encode_u64(PROP_MAX_ROW_VERSIONS, mvcc_props.max_row_versions); + RocksTtlProperties::encode_to(&mvcc_props.ttl, &mut props); props } @@ -43,6 +44,7 @@ impl RocksMvccProperties { .decode_u64(PROP_NUM_DELETES) .unwrap_or(res.num_versions - res.num_puts); res.max_row_versions = props.decode_u64(PROP_MAX_ROW_VERSIONS)?; + RocksTtlProperties::decode_from(&mut res.ttl, props); Ok(res) } } diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index d1158ac9c4e..87ccab9e5ab 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -461,6 +461,9 @@ impl TablePropertiesCollector for MvccPropertiesCollector { } else { self.props.num_deletes += 1; } + if let Some(expire_ts) = raw_value.expire_ts { + self.props.ttl.add(expire_ts); + } } Err(_) => { self.num_errors += 1; @@ -847,6 +850,8 @@ mod tests { assert_eq!(props.num_puts, 4); assert_eq!(props.num_versions, 7); assert_eq!(props.max_row_versions, 3); + assert_eq!(props.ttl.max_expire_ts, Some(u64::MAX)); + assert_eq!(props.ttl.min_expire_ts, Some(10)); } #[bench] diff --git a/components/engine_rocks/src/ttl_properties.rs b/components/engine_rocks/src/ttl_properties.rs index eb4641cc102..8e6021939bc 100644 --- a/components/engine_rocks/src/ttl_properties.rs +++ b/components/engine_rocks/src/ttl_properties.rs @@ -15,19 +15,30 @@ const PROP_MIN_EXPIRE_TS: &str = "tikv.min_expire_ts"; pub struct RocksTtlProperties; impl RocksTtlProperties { + pub fn encode_to(ttl_props: &TtlProperties, user_props: &mut UserProperties) { + if let Some(max_expire_ts) = ttl_props.max_expire_ts { + user_props.encode_u64(PROP_MAX_EXPIRE_TS, max_expire_ts); + } + if let Some(min_expire_ts) = ttl_props.min_expire_ts { + user_props.encode_u64(PROP_MIN_EXPIRE_TS, min_expire_ts); + } + } + pub fn encode(ttl_props: &TtlProperties) -> UserProperties { let mut props = UserProperties::new(); - props.encode_u64(PROP_MAX_EXPIRE_TS, ttl_props.max_expire_ts); - props.encode_u64(PROP_MIN_EXPIRE_TS, ttl_props.min_expire_ts); + Self::encode_to(ttl_props, &mut props); props } - pub fn decode(props: &T) -> Result { - let res = TtlProperties { - max_expire_ts: props.decode_u64(PROP_MAX_EXPIRE_TS)?, - min_expire_ts: props.decode_u64(PROP_MIN_EXPIRE_TS)?, - }; - Ok(res) + pub fn decode_from(ttl_props: &mut TtlProperties, props: &T) { + ttl_props.max_expire_ts = props.decode_u64(PROP_MAX_EXPIRE_TS).ok(); + ttl_props.min_expire_ts = props.decode_u64(PROP_MIN_EXPIRE_TS).ok(); + } + + pub fn decode(props: &T) -> TtlProperties { + let mut res = TtlProperties::default(); + Self::decode_from(&mut res, props); + res } } @@ -46,11 +57,10 @@ impl TtlPropertiesExt for RocksEngine { let mut res = Vec::new(); for (file_name, v) in collection.iter() { - let prop = match RocksTtlProperties::decode(v.user_collected_properties()) { - Ok(v) => v, - Err(_) => continue, - }; - res.push((file_name.to_string(), prop)); + let prop = RocksTtlProperties::decode(v.user_collected_properties()); + if prop.is_some() { + res.push((file_name.to_string(), prop)); + } } Ok(res) } @@ -81,12 +91,7 @@ impl TablePropertiesCollector for TtlPropertiesCollector { expire_ts: Some(expire_ts), .. }) => { - self.prop.max_expire_ts = std::cmp::max(self.prop.max_expire_ts, expire_ts); - if self.prop.min_expire_ts == 0 { - self.prop.min_expire_ts = expire_ts; - } else { - self.prop.min_expire_ts = std::cmp::min(self.prop.min_expire_ts, expire_ts); - } + self.prop.add(expire_ts); } Err(err) => { error!( @@ -101,9 +106,6 @@ impl TablePropertiesCollector for TtlPropertiesCollector { } fn finish(&mut self) -> HashMap, Vec> { - if self.prop.max_expire_ts == 0 && self.prop.min_expire_ts == 0 { - return HashMap::default(); - } RocksTtlProperties::encode(&self.prop).0 } } @@ -138,7 +140,7 @@ mod tests { } fn test_ttl_properties_impl() { - let get_properties = |case: &[(&'static str, u64)]| -> Result { + let get_properties = |case: &[(&'static str, u64)]| -> TtlProperties { let mut collector = TtlPropertiesCollector:: { prop: Default::default(), _phantom: PhantomData, @@ -165,6 +167,7 @@ mod tests { RocksTtlProperties::decode(&result) }; + // NOTE: expire_ts=0 is considered as no TTL in `ApiVersion::V1ttl` let case1 = [ ("zr\0a", 0), ("zr\0b", UnixSecs::now().into_inner()), @@ -172,24 +175,61 @@ mod tests { ("zr\0d", u64::MAX), ("zr\0e", 0), ]; - let props = get_properties(&case1).unwrap(); - assert_eq!(props.max_expire_ts, u64::MAX); + let props = get_properties(&case1); + assert_eq!(props.max_expire_ts, Some(u64::MAX)); match F::TAG { ApiVersion::V1 => unreachable!(), - ApiVersion::V1ttl => assert_eq!(props.min_expire_ts, 1), - // expire_ts = 0 is no longer a special case in API V2 - ApiVersion::V2 => assert_eq!(props.min_expire_ts, 0), + ApiVersion::V1ttl => assert_eq!(props.min_expire_ts, Some(1)), + ApiVersion::V2 => assert_eq!(props.min_expire_ts, Some(0)), } let case2 = [("zr\0a", 0)]; - get_properties(&case2).unwrap_err(); + match F::TAG { + ApiVersion::V1 => unreachable!(), + ApiVersion::V1ttl => assert!(get_properties(&case2).is_none()), + ApiVersion::V2 => assert_eq!(props.min_expire_ts, Some(0)), + } let case3 = []; - get_properties(&case3).unwrap_err(); + assert!(get_properties(&case3).is_none()); let case4 = [("zr\0a", 1)]; - let props = get_properties(&case4).unwrap(); - assert_eq!(props.max_expire_ts, 1); - assert_eq!(props.min_expire_ts, 1); + let props = get_properties(&case4); + assert_eq!(props.max_expire_ts, Some(1)); + assert_eq!(props.min_expire_ts, Some(1)); + } + + #[test] + fn test_ttl_properties_codec() { + let cases: Vec<(Option, Option, Vec<(&[u8], u64)>)> = vec![ + ( + Some(0), // min_expire_ts + Some(1), // max_expire_ts + vec![(b"tikv.min_expire_ts", 0), (b"tikv.max_expire_ts", 1)], // UserProperties + ), + (None, None, vec![]), + (Some(0), None, vec![(b"tikv.min_expire_ts", 0)]), + (None, Some(0), vec![(b"tikv.max_expire_ts", 0)]), + ]; + + for (i, (min_expire_ts, max_expire_ts, expect_user_props)) in cases.into_iter().enumerate() + { + let ttl_props = TtlProperties { + min_expire_ts, + max_expire_ts, + }; + let user_props = RocksTtlProperties::encode(&ttl_props); + let expect_user_props = UserProperties( + expect_user_props + .into_iter() + .map(|(name, value)| (name.to_vec(), value.to_be_bytes().to_vec())) + .collect::>(), + ); + assert_eq!(user_props.0, expect_user_props.0, "case {}", i); + + let decoded = RocksTtlProperties::decode(&user_props); + assert_eq!(decoded.max_expire_ts, ttl_props.max_expire_ts, "case {}", i); + assert_eq!(decoded.min_expire_ts, ttl_props.min_expire_ts, "case {}", i); + } } } diff --git a/components/engine_traits/src/mvcc_properties.rs b/components/engine_traits/src/mvcc_properties.rs index b3956fc2708..d1b3b406096 100644 --- a/components/engine_traits/src/mvcc_properties.rs +++ b/components/engine_traits/src/mvcc_properties.rs @@ -4,6 +4,8 @@ use std::cmp; use txn_types::TimeStamp; +use crate::TtlProperties; + #[derive(Clone, Debug)] pub struct MvccProperties { pub min_ts: TimeStamp, // The minimal timestamp. @@ -13,6 +15,7 @@ pub struct MvccProperties { pub num_deletes: u64, // The number of MVCC deletes of all rows. pub num_versions: u64, // The number of MVCC versions of all rows. pub max_row_versions: u64, // The maximal number of MVCC versions of a single row. + pub ttl: TtlProperties, // The ttl properties of all rows, for RawKV only. } impl MvccProperties { @@ -25,6 +28,7 @@ impl MvccProperties { num_deletes: 0, num_versions: 0, max_row_versions: 0, + ttl: TtlProperties::default(), } } @@ -36,6 +40,7 @@ impl MvccProperties { self.num_deletes += other.num_deletes; self.num_versions += other.num_versions; self.max_row_versions = cmp::max(self.max_row_versions, other.max_row_versions); + self.ttl.merge(&other.ttl); } } diff --git a/components/engine_traits/src/ttl_properties.rs b/components/engine_traits/src/ttl_properties.rs index 3d4ad7d84f9..24cbaf23b75 100644 --- a/components/engine_traits/src/ttl_properties.rs +++ b/components/engine_traits/src/ttl_properties.rs @@ -2,10 +2,42 @@ use crate::errors::Result; -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct TtlProperties { - pub max_expire_ts: u64, - pub min_expire_ts: u64, + pub max_expire_ts: Option, + pub min_expire_ts: Option, +} + +impl TtlProperties { + pub fn add(&mut self, expire_ts: u64) { + self.merge(&TtlProperties { + max_expire_ts: Some(expire_ts), + min_expire_ts: Some(expire_ts), + }); + } + + pub fn merge(&mut self, other: &TtlProperties) { + if let Some(max_expire_ts) = other.max_expire_ts { + self.max_expire_ts = Some(std::cmp::max( + self.max_expire_ts.unwrap_or(u64::MIN), + max_expire_ts, + )); + } + if let Some(min_expire_ts) = other.min_expire_ts { + self.min_expire_ts = Some(std::cmp::min( + self.min_expire_ts.unwrap_or(u64::MAX), + min_expire_ts, + )); + } + } + + pub fn is_some(&self) -> bool { + self.max_expire_ts.is_some() || self.min_expire_ts.is_some() + } + + pub fn is_none(&self) -> bool { + !self.is_some() + } } pub trait TtlPropertiesExt { @@ -16,3 +48,53 @@ pub trait TtlPropertiesExt { end_key: &[u8], ) -> Result>; } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ttl_properties() { + let verify = |prop: &TtlProperties, min: Option, max: Option| { + assert_eq!(prop.min_expire_ts, min); + assert_eq!(prop.max_expire_ts, max); + }; + + // add + let mut prop1 = TtlProperties::default(); + assert!(prop1.is_none()); + prop1.add(10); + assert!(prop1.is_some()); + verify(&prop1, Some(10), Some(10)); + + // merge + { + let mut prop2 = TtlProperties::default(); + prop2.add(20); + verify(&prop2, Some(20), Some(20)); + + prop1.merge(&prop2); + verify(&prop1, Some(10), Some(20)); + } + + // none merge some + let mut prop3 = TtlProperties::default(); + prop3.merge(&prop1); + verify(&prop3, Some(10), Some(20)); + + // some merge none + { + let prop4 = TtlProperties::default(); + prop3.merge(&prop4); + verify(&prop3, Some(10), Some(20)); + } + + // add + { + prop3.add(30); + verify(&prop3, Some(10), Some(30)); + prop3.add(0); + verify(&prop3, Some(0), Some(30)); + } + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index eff0da14ba3..c4aa3c36d52 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -759,10 +759,6 @@ impl DefaultCfConfig { prop_size_index_distance: self.prop_size_index_distance, prop_keys_index_distance: self.prop_keys_index_distance, }; - cf_opts.add_table_properties_collector_factory( - "tikv.rawkv-mvcc-properties-collector", - RawMvccPropertiesCollectorFactory::default(), - ); cf_opts.add_table_properties_collector_factory("tikv.range-properties-collector", f); if let Some(factory) = filter_factory { match api_version { @@ -788,6 +784,10 @@ impl DefaultCfConfig { .unwrap(); } ApiVersion::V2 => { + cf_opts.add_table_properties_collector_factory( + "tikv.rawkv-mvcc-properties-collector", + RawMvccPropertiesCollectorFactory::default(), + ); let factory = StackingCompactionFilterFactory::new( factory.clone(), RawCompactionFilterFactory, @@ -818,6 +818,10 @@ impl DefaultCfConfig { .unwrap(); } ApiVersion::V2 => { + cf_opts.add_table_properties_collector_factory( + "tikv.rawkv-mvcc-properties-collector", + RawMvccPropertiesCollectorFactory::default(), + ); cf_opts .set_compaction_filter_factory( "apiv2_gc_compaction_filter_factory", diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index e7b17ca409e..665824a1bac 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -880,7 +880,7 @@ pub mod test_utils { self } - fn prepare_gc(&self, engine: &RocksEngine) { + pub fn prepare_gc(&self, engine: &RocksEngine) { let safe_point = Arc::new(AtomicU64::new(self.safe_point)); let cfg_tracker = { let mut cfg = GcConfig::default(); @@ -909,7 +909,7 @@ pub mod test_utils { }); } - fn post_gc(&mut self) { + pub fn post_gc(&mut self) { self.callbacks_on_drop.clear(); let mut gc_context = GC_CONTEXT.lock().unwrap(); let callbacks = &mut gc_context.as_mut().unwrap().callbacks_on_drop; diff --git a/src/server/ttl/ttl_checker.rs b/src/server/ttl/ttl_checker.rs index d1208472f02..baf1129ccb5 100644 --- a/src/server/ttl/ttl_checker.rs +++ b/src/server/ttl/ttl_checker.rs @@ -176,7 +176,7 @@ pub fn check_ttl_and_compact_files( return; } for (file_name, prop) in res { - if prop.max_expire_ts <= current_ts { + if prop.max_expire_ts.unwrap_or(u64::MAX) <= current_ts { files.push(file_name); } } diff --git a/src/server/ttl/ttl_compaction_filter.rs b/src/server/ttl/ttl_compaction_filter.rs index 7fdb3c686b7..06fc6981cf2 100644 --- a/src/server/ttl/ttl_compaction_filter.rs +++ b/src/server/ttl/ttl_compaction_filter.rs @@ -32,10 +32,8 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { for i in 0..context.file_numbers().len() { let table_props = context.table_properties(i); let user_props = table_props.user_collected_properties(); - if let Ok(props) = RocksTtlProperties::decode(user_props) { - if props.min_expire_ts != 0 { - min_expire_ts = std::cmp::min(min_expire_ts, props.min_expire_ts); - } + if let Some(m) = RocksTtlProperties::decode(user_props).min_expire_ts { + min_expire_ts = std::cmp::min(min_expire_ts, m); } } if min_expire_ts > current { diff --git a/tests/failpoints/cases/test_ttl.rs b/tests/failpoints/cases/test_ttl.rs index 026a21136ab..294b3b9481d 100644 --- a/tests/failpoints/cases/test_ttl.rs +++ b/tests/failpoints/cases/test_ttl.rs @@ -1,15 +1,18 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::mpsc::channel; +use std::{iter::Iterator as StdIterator, sync::mpsc::channel, time::Duration}; use api_version::{test_kv_format_impl, ApiV1Ttl, KvFormat, RawValue}; use engine_rocks::{raw::CompactOptions, util::get_cf_handle}; use engine_traits::{IterOptions, MiscExt, Peekable, SyncMutable, CF_DEFAULT}; use futures::executor::block_on; -use kvproto::kvrpcpb::Context; +use kvproto::{kvrpcpb, kvrpcpb::Context}; use tikv::{ config::DbConfig, - server::ttl::check_ttl_and_compact_files, + server::{ + gc_worker::{GcTask, TestGcRunner}, + ttl::check_ttl_and_compact_files, + }, storage::{ kv::{SnapContext, TestEngineBuilder}, lock_manager::MockLockManager, @@ -22,7 +25,7 @@ use txn_types::Key; #[test] fn test_ttl_checker() { - test_ttl_checker_impl::(); + test_kv_format_impl!(test_ttl_checker_impl); } fn test_ttl_checker_impl() { @@ -34,72 +37,99 @@ fn test_ttl_checker_impl() { .path(dir.path()) .api_version(F::TAG); let engine = builder.build_with_cfg(&cfg).unwrap(); - let kvdb = engine.get_rocksdb(); - let key1 = b"zr\0key1"; - let value1 = RawValue { - user_value: vec![0; 10], - expire_ts: Some(10), - is_delete: false, - }; - kvdb.put_cf(CF_DEFAULT, key1, &F::encode_raw_value_owned(value1)) - .unwrap(); - kvdb.flush_cf(CF_DEFAULT, true).unwrap(); - let key2 = b"zr\0key2"; - let value2 = RawValue { - user_value: vec![0; 10], - expire_ts: Some(120), - is_delete: false, - }; - kvdb.put_cf(CF_DEFAULT, key2, &F::encode_raw_value_owned(value2)) - .unwrap(); - let key3 = b"zr\0key3"; - let value3 = RawValue { - user_value: vec![0; 10], - expire_ts: Some(20), - is_delete: false, - }; - kvdb.put_cf(CF_DEFAULT, key3, &F::encode_raw_value_owned(value3)) - .unwrap(); - kvdb.flush_cf(CF_DEFAULT, true).unwrap(); - let key4 = b"zr\0key4"; - let value4 = RawValue { - user_value: vec![0; 10], - expire_ts: None, - is_delete: false, - }; - kvdb.put_cf(CF_DEFAULT, key4, &F::encode_raw_value_owned(value4)) - .unwrap(); - kvdb.flush_cf(CF_DEFAULT, true).unwrap(); - let key5 = b"zr\0key5"; - let value5 = RawValue { - user_value: vec![0; 10], - expire_ts: Some(10), - is_delete: false, - }; - kvdb.put_cf(CF_DEFAULT, key5, &F::encode_raw_value_owned(value5)) - .unwrap(); - kvdb.flush_cf(CF_DEFAULT, true).unwrap(); - - assert!(kvdb.get_value_cf(CF_DEFAULT, key1).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key2).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key3).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key4).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key5).unwrap().is_some()); - check_ttl_and_compact_files(&kvdb, b"zr\0key1", b"zr\0key25", false); - assert!(kvdb.get_value_cf(CF_DEFAULT, key1).unwrap().is_none()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key2).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key3).unwrap().is_none()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key4).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key5).unwrap().is_some()); + // Make all entries earlier than safe point. + // TTL expired entries can only be collected when commit_ts < safe_point. + let commit_ts = 100; + let mut gc_runner = TestGcRunner::new(200); + + let mut do_compact = |start_key: &[u8], end_key: &[u8], mut expect_keys: usize| { + gc_runner.prepare_gc(&kvdb); + check_ttl_and_compact_files(&kvdb, start_key, end_key, false); + + if F::TAG == kvrpcpb::ApiVersion::V2 { + while let Ok(Some(task)) = gc_runner.gc_receiver.recv_timeout(Duration::from_secs(3)) { + match task { + GcTask::RawGcKeys { keys, .. } => { + expect_keys = expect_keys.checked_sub(keys.len()).unwrap(); + + // Delete keys by `delete_cf` for simplicity. + // In real cases, all old MVCC versions of `key` should be deleted. + // See `GcRunner::raw_gc_keys`. + for key in keys { + let db_key = + keys::data_key(key.append_ts(commit_ts.into()).as_encoded()); + kvdb.delete_cf(CF_DEFAULT, &db_key).unwrap(); + } + + if expect_keys == 0 { + break; + } + } + _ => unreachable!(), + } + } + assert_eq!(expect_keys, 0); + } + + gc_runner.post_gc(); + }; - check_ttl_and_compact_files(&kvdb, b"zr\0key2", b"zr\0key6", false); - assert!(kvdb.get_value_cf(CF_DEFAULT, key1).unwrap().is_none()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key2).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key3).unwrap().is_none()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key4).unwrap().is_some()); - assert!(kvdb.get_value_cf(CF_DEFAULT, key5).unwrap().is_none()); + let cases: Vec< + Vec<(&[u8] /* key */, Option /* expire_ts */)>, /* a batch, will be written to + * individual sst file by + * `flush_cf` */ + > = vec![ + vec![(b"r\0key0", Some(10)), (b"r\0key1", Some(110))], + vec![(b"r\0key2", Some(120)), (b"r\0key3", Some(20))], + vec![(b"r\0key4", None)], + vec![(b"r\0key5", Some(10))], + ]; + let keys = cases + .into_iter() + .flat_map(|batch| { + let keys = batch + .into_iter() + .map(|(key, expire_ts)| { + let key = make_raw_key::(key, Some(commit_ts)); + let value = RawValue { + user_value: vec![0; 10], + expire_ts, + is_delete: false, + }; + kvdb.put_cf(CF_DEFAULT, &key, &F::encode_raw_value_owned(value)) + .unwrap(); + key + }) + .collect::>(); + kvdb.flush_cf(CF_DEFAULT, true).unwrap(); + keys + }) + .collect::>(); + + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[0]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[1]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[2]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[3]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[4]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[5]).unwrap().is_some()); + + do_compact(b"zr\0key1", b"zr\0key25", 2); // cover key0 ~ key3 + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[0]).unwrap().is_none()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[1]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[2]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[3]).unwrap().is_none()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[4]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[5]).unwrap().is_some()); + + do_compact(b"zr\0key2", b"zr\0key6", 2); // cover key2 ~ key5 + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[0]).unwrap().is_none()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[1]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[2]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[3]).unwrap().is_none()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[4]).unwrap().is_some()); + assert!(kvdb.get_value_cf(CF_DEFAULT, &keys[5]).unwrap().is_none()); } #[test] @@ -446,3 +476,9 @@ fn test_stoarge_raw_batch_put_ttl_impl() { assert_eq!(res, Some(ttl)); } } + +fn make_raw_key(key: &[u8], ts: Option) -> Vec { + let encode_key = F::encode_raw_key(key, ts.map(Into::into)); + let res = keys::data_key(encode_key.as_encoded()); + res +} From c595cf51ecb002ed649944b7fa308982ad6e2f69 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Wed, 19 Jul 2023 13:29:17 +0800 Subject: [PATCH 0802/1149] cloud: support to write/read azure blobs with customer provided key (#15025) close tikv/tikv#15024 support to write/read azure blobs with customer provided key Signed-off-by: Leavrth Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 16 +-- components/cloud/azure/src/azblob.rs | 200 +++++++++++++++++++++++++-- 2 files changed, 198 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ecdd12c9507..43cabfa1080 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -391,7 +391,7 @@ dependencies = [ [[package]] name = "azure_core" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" +source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" dependencies = [ "async-trait", "base64 0.21.0", @@ -403,7 +403,7 @@ dependencies = [ "log", "paste", "pin-project", - "quick-xml 0.28.2", + "quick-xml 0.29.0", "rand 0.8.5", "reqwest", "rustc_version 0.4.0", @@ -417,7 +417,7 @@ dependencies = [ [[package]] name = "azure_identity" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" +source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" dependencies = [ "async-lock", "async-trait", @@ -437,7 +437,7 @@ dependencies = [ [[package]] name = "azure_security_keyvault" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" +source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" dependencies = [ "async-trait", "azure_core", @@ -452,7 +452,7 @@ dependencies = [ [[package]] name = "azure_storage" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" +source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" dependencies = [ "RustyXML", "async-trait", @@ -474,7 +474,7 @@ dependencies = [ [[package]] name = "azure_storage_blobs" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#5014404dff3cd77faf37a7077ec67bb6a2cf1f4c" +source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" dependencies = [ "RustyXML", "azure_core", @@ -4450,9 +4450,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.28.2" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1" +checksum = "81b9228215d82c7b61490fec1de287136b5de6f5700f6e58ea9ad61a7964ca51" dependencies = [ "memchr", "serde", diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index dbc52a898b0..f7327902724 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -13,7 +13,7 @@ use azure_core::{ }; use azure_identity::{ClientSecretCredential, TokenCredentialOptions}; use azure_storage::{prelude::*, ConnectionString, ConnectionStringBuilder}; -use azure_storage_blobs::prelude::*; +use azure_storage_blobs::{blob::operations::PutBlockBlobBuilder, prelude::*}; use cloud::blob::{ none_to_empty, BlobConfig, BlobStorage, BucketConf, PutResource, StringNonEmpty, }; @@ -24,8 +24,11 @@ use futures_util::{ stream::StreamExt, TryStreamExt, }; -pub use kvproto::brpb::{AzureBlobStorage as InputConfig, Bucket as InputBucket, CloudDynamic}; +pub use kvproto::brpb::{ + AzureBlobStorage as InputConfig, AzureCustomerKey, Bucket as InputBucket, CloudDynamic, +}; use oauth2::{ClientId, ClientSecret}; +use openssl::sha::Sha256; use tikv_util::{ debug, stream::{retry, RetryError}, @@ -53,6 +56,39 @@ struct CredentialInfo { client_secret: ClientSecret, } +#[derive(Clone, Debug)] +struct EncryptionCustomer { + encryption_key: String, + encryption_key_sha256: String, +} + +impl EncryptionCustomer { + fn new(encryption_key: &str) -> Self { + let mut hasher = Sha256::new(); + hasher.update(encryption_key.as_bytes()); + let encryption_key_sha256 = base64::encode(hasher.finish()); + EncryptionCustomer { + encryption_key: base64::encode(encryption_key), + encryption_key_sha256, + } + } +} + +impl From for EncryptionCustomer { + fn from(value: AzureCustomerKey) -> Self { + EncryptionCustomer { + encryption_key: value.encryption_key, + encryption_key_sha256: value.encryption_key_sha256, + } + } +} + +impl From for (String, String) { + fn from(value: EncryptionCustomer) -> (String, String) { + (value.encryption_key, value.encryption_key_sha256) + } +} + #[derive(Clone)] pub struct Config { bucket: BucketConf, @@ -64,6 +100,7 @@ pub struct Config { env_account_name: Option, env_shared_key: Option, encryption_scope: Option, + encryption_customer: Option, } impl std::fmt::Debug for Config { @@ -77,6 +114,7 @@ impl std::fmt::Debug for Config { .field("env_account_name", &self.env_account_name) .field("env_shared_key", &"?") .field("encryption_scope", &self.encryption_scope) + .field("encryption_customer_key", &"?") .finish() } } @@ -93,6 +131,7 @@ impl Config { env_account_name: Self::load_env_account_name(), env_shared_key: Self::load_env_shared_key(), encryption_scope: None, + encryption_customer: None, } } @@ -141,6 +180,9 @@ impl Config { encryption_scope: StringNonEmpty::opt( attrs.get("encryption_scope").unwrap_or(def).clone(), ), + encryption_customer: attrs + .get("encryption_key") + .map(|encryption_key| EncryptionCustomer::new(encryption_key)), }) } @@ -153,6 +195,11 @@ impl Config { region: None, }; + let encryption_customer = input + .encryption_key + .into_option() + .map(EncryptionCustomer::from); + Ok(Config { bucket, account_name: StringNonEmpty::opt(input.account_name), @@ -162,6 +209,7 @@ impl Config { env_account_name: Self::load_env_account_name(), env_shared_key: Self::load_env_shared_key(), encryption_scope: StringNonEmpty::opt(input.encryption_scope), + encryption_customer, }) } @@ -265,6 +313,7 @@ struct AzureUploader { storage_class: Option, encryption_scope: Option, + encryption_customer: Option, } impl AzureUploader { @@ -279,6 +328,7 @@ impl AzureUploader { config.bucket.storage_class.clone(), )), encryption_scope: config.encryption_scope.clone(), + encryption_customer: config.encryption_customer.clone(), } } @@ -313,14 +363,8 @@ impl AzureUploader { .blob_client(&self.name) .put_block_blob(data.to_vec()); - // the encryption scope and the access tier can not be both in the HTTP headers - let builder = if let Some(scope) = &self.encryption_scope { - builder.encryption_scope(scope.deref().clone()) - } else if let Some(tier) = self.storage_class { - builder.access_tier(tier) - } else { - builder - }; + let builder = self.adjust_put_builder(builder); + builder.await?; Ok(()) }) @@ -339,6 +383,26 @@ impl AzureUploader { } } + #[inline] + fn adjust_put_builder(&self, builder: PutBlockBlobBuilder) -> PutBlockBlobBuilder { + // the encryption scope and the access tier can not be both in the HTTP headers + if let Some(scope) = &self.encryption_scope { + return builder.encryption_scope(scope.deref().clone()); + } + + // the encryption customer provided key and the access tier can not be both in + // the HTTP headers + if let Some(key) = &self.encryption_customer { + return builder.encryption_key::<(String, String)>(key.clone().into()); + } + + if let Some(tier) = self.storage_class { + return builder.access_tier(tier); + } + + builder + } + fn get_timeout() -> Duration { CONNECTION_TIMEOUT } @@ -515,6 +579,8 @@ impl AzureStorage { } pub fn new(config: Config) -> io::Result { + Self::check_config(&config)?; + let account_name = config.get_account_name()?; let bucket = (*config.bucket.bucket).to_owned(); // priority: @@ -613,6 +679,38 @@ impl AzureStorage { } } + fn check_config(config: &Config) -> io::Result<()> { + if config.bucket.storage_class.is_some() { + if config.encryption_scope.is_some() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + concat!( + "Set Blob Tier cannot be used with customer-provided scope. ", + "Please don't supply the access-tier when use encryption-scope." + ), + )); + } + if config.encryption_customer.is_some() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + concat!( + "Set Blob Tier cannot be used with customer-provided key. ", + "Please don't supply the access-tier when use encryption-key." + ), + )); + } + } else if config.encryption_scope.is_some() && config.encryption_customer.is_some() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + concat!( + "Undefined input: There are both encryption-scope and customer provided key. ", + "Please select only one to encrypt blobs." + ), + )); + } + Ok(()) + } + fn maybe_prefix_key(&self, key: &str) -> String { if let Some(prefix) = &self.config.bucket.prefix { return format!("{}/{}", prefix.trim_end_matches('/'), key); @@ -636,6 +734,12 @@ impl AzureStorage { blob_client.get() }; + let builder = if let Some(key) = &self.config.encryption_customer { + builder.encryption_key::<(String, String)>(key.clone().into()) + } else { + builder + }; + let mut chunk: Vec = vec![]; let mut stream = builder.into_stream(); while let Some(value) = stream.next().await { @@ -836,4 +940,80 @@ mod tests { cd.set_bucket(bucket); cd } + + #[test] + fn test_config_check() { + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + let config = Config::from_input(input).unwrap(); + AzureStorage::check_config(&config).unwrap(); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + input.set_storage_class("Hot".to_owned()); + let config = Config::from_input(input).unwrap(); + AzureStorage::check_config(&config).unwrap(); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + input.set_storage_class("Hot".to_owned()); + let mut encryption_key = AzureCustomerKey::default(); + encryption_key.set_encryption_key("test".to_owned()); + encryption_key.set_encryption_key_sha256("test".to_owned()); + input.set_encryption_key(encryption_key); + let config = Config::from_input(input).unwrap(); + assert!(AzureStorage::check_config(&config).is_err()); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + input.set_storage_class("Hot".to_owned()); + input.set_encryption_scope("test".to_owned()); + let config = Config::from_input(input).unwrap(); + assert!(AzureStorage::check_config(&config).is_err()); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + input.set_storage_class("Hot".to_owned()); + let mut encryption_key = AzureCustomerKey::default(); + encryption_key.set_encryption_key("test".to_owned()); + encryption_key.set_encryption_key_sha256("test".to_owned()); + input.set_encryption_key(encryption_key); + input.set_encryption_scope("test".to_owned()); + let config = Config::from_input(input).unwrap(); + assert!(AzureStorage::check_config(&config).is_err()); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + let mut encryption_key = AzureCustomerKey::default(); + encryption_key.set_encryption_key("test".to_owned()); + encryption_key.set_encryption_key_sha256("test".to_owned()); + input.set_encryption_key(encryption_key); + let config = Config::from_input(input).unwrap(); + AzureStorage::check_config(&config).unwrap(); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + input.set_encryption_scope("test".to_owned()); + let config = Config::from_input(input).unwrap(); + AzureStorage::check_config(&config).unwrap(); + } + { + let mut input = InputConfig::default(); + input.set_bucket("test".to_owned()); + let mut encryption_key = AzureCustomerKey::default(); + input.set_encryption_scope("test".to_owned()); + encryption_key.set_encryption_key("test".to_owned()); + encryption_key.set_encryption_key_sha256("test".to_owned()); + input.set_encryption_key(encryption_key); + let config = Config::from_input(input).unwrap(); + assert!(AzureStorage::check_config(&config).is_err()); + } + } } From 1f3e7530caf346e147508457db3bf8e1a96f52d0 Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Wed, 19 Jul 2023 15:48:47 +0800 Subject: [PATCH 0803/1149] raftstore: set availability_context (#15152) close tikv/tikv#15151 Set availability_context in raftstore v1 Signed-off-by: CalvinNeo Signed-off-by: Neil Shen Co-authored-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index afcd35e2496..c9fa108e3c8 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2684,6 +2684,10 @@ where let mut resp = ExtraMessage::default(); resp.set_type(ExtraMessageType::MsgAvailabilityResponse); resp.wait_data = self.fsm.peer.wait_data; + let report = resp.mut_availability_context(); + report.set_from_region_id(self.region_id()); + report.set_from_region_epoch(self.region().get_region_epoch().clone()); + report.set_trimmed(true); self.fsm .peer .send_extra_message(resp, &mut self.ctx.trans, from); From 6e5c02207218cef13e1853426e6b18abe66b6f01 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 20 Jul 2023 01:24:53 +0800 Subject: [PATCH 0804/1149] resource_control: add background task control for br and coprocessor (#15100) ref tikv/tikv#14900 Signed-off-by: glorv --- Cargo.lock | 2 + components/backup/Cargo.toml | 1 + components/backup/src/endpoint.rs | 54 +++++++- components/external_storage/src/lib.rs | 5 +- components/resource_control/src/future.rs | 12 +- components/resource_control/src/lib.rs | 3 +- .../resource_control/src/resource_group.rs | 122 ++++++++++++++---- .../resource_control/src/resource_limiter.rs | 11 +- components/resource_control/src/worker.rs | 25 +++- components/server/src/server.rs | 3 + components/server/src/server2.rs | 3 + components/sst_importer/src/sst_importer.rs | 9 ++ components/test_backup/src/lib.rs | 1 + components/test_coprocessor/src/fixture.rs | 1 + components/test_raftstore-v2/src/server.rs | 2 + components/test_raftstore/src/server.rs | 2 + components/tidb_query_executors/Cargo.toml | 1 + components/tidb_query_executors/src/runner.rs | 20 ++- components/tikv_util/src/future.rs | 29 ++++- src/coprocessor/checksum.rs | 22 +++- src/coprocessor/dag/mod.rs | 7 + src/coprocessor/endpoint.rs | 29 ++++- src/coprocessor/statistics/analyze.rs | 33 ++++- src/import/sst_service.rs | 97 +++++++++----- src/server/server.rs | 1 + src/server/service/kv.rs | 4 +- src/server/status_server/mod.rs | 14 +- .../benches/coprocessor_executors/util/mod.rs | 1 + .../resource_metering/test_cpu.rs | 1 + .../resource_metering/test_read_keys.rs | 1 + 30 files changed, 419 insertions(+), 97 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43cabfa1080..be27c6ee616 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,6 +537,7 @@ dependencies = [ "raft", "raftstore", "rand 0.8.5", + "resource_control", "security", "serde", "serde_derive", @@ -6621,6 +6622,7 @@ dependencies = [ "log_wrappers", "match-template", "protobuf", + "resource_control", "slog", "slog-global", "smallvec", diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 8c3490d1bc0..225a88a3e8f 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -60,6 +60,7 @@ pd_client = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } raft = { workspace = true } raftstore = { workspace = true } +resource_control = { workspace = true } security = { workspace = true } serde = "1.0" serde_derive = "1.0" diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 9913c668202..a4efc162092 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -5,7 +5,7 @@ use std::{ cell::RefCell, fmt, sync::{atomic::*, mpsc, Arc, Mutex, RwLock}, - time::{SystemTime, UNIX_EPOCH}, + time::{Duration, SystemTime, UNIX_EPOCH}, }; use async_channel::SendError; @@ -24,6 +24,7 @@ use kvproto::{ use online_config::OnlineConfig; use raft::StateRole; use raftstore::coprocessor::RegionInfoProvider; +use resource_control::{with_resource_limiter, ResourceGroupManager, ResourceLimiter}; use tikv::{ config::BackupConfig, storage::{ @@ -35,7 +36,9 @@ use tikv::{ }, }; use tikv_util::{ - box_err, debug, error, error_unknown, impl_display_as_debug, info, + box_err, debug, error, error_unknown, + future::RescheduleChecker, + impl_display_as_debug, info, store::find_peer, time::{Instant, Limiter}, warn, @@ -53,6 +56,8 @@ use crate::{ }; const BACKUP_BATCH_LIMIT: usize = 1024; +// task yield duration when resource limit is on. +const TASK_YIELD_DURATION: Duration = Duration::from_millis(10); #[derive(Clone)] struct Request { @@ -71,6 +76,8 @@ struct Request { compression_level: i32, cipher: CipherInfo, replica_read: bool, + resource_group_name: String, + source_tag: String, } /// Backup Task. @@ -116,6 +123,10 @@ impl Task { cf: req.get_cf().to_owned(), })?; + let mut source_tag: String = req.get_context().get_request_source().into(); + if source_tag.is_empty() { + source_tag = "br".into(); + } let task = Task { request: Request { start_key: req.get_start_key().to_owned(), @@ -132,6 +143,12 @@ impl Task { compression_type: req.get_compression_type(), compression_level: req.get_compression_level(), replica_read: req.get_replica_read(), + resource_group_name: req + .get_context() + .get_resource_control_context() + .get_resource_group_name() + .to_owned(), + source_tag, cipher: req.cipher_info.unwrap_or_else(|| { let mut cipher = CipherInfo::default(); cipher.set_cipher_type(EncryptionMethod::Plaintext); @@ -200,6 +217,7 @@ struct InMemBackupFiles { start_version: TimeStamp, end_version: TimeStamp, region: Region, + limiter: Option>, } async fn save_backup_file_worker( @@ -210,7 +228,7 @@ async fn save_backup_file_worker( ) { while let Ok(msg) = rx.recv().await { let files = if msg.files.need_flush_keys() { - match msg.files.save(&storage).await { + match with_resource_limiter(msg.files.save(&storage), msg.limiter.clone()).await { Ok(mut split_files) => { let mut has_err = false; for file in split_files.iter_mut() { @@ -300,6 +318,7 @@ impl BackupRange { begin_ts: TimeStamp, saver: async_channel::Sender>, storage_name: &str, + resource_limiter: Option>, ) -> Result { assert!(!self.codec.is_raw_kv); @@ -381,6 +400,8 @@ impl BackupRange { .clone() .map_or_else(Vec::new, |k| k.into_raw().unwrap()); let mut writer = writer_builder.build(next_file_start_key.clone(), storage_name)?; + let mut reschedule_checker = + RescheduleChecker::new(tokio::task::yield_now, TASK_YIELD_DURATION); loop { if let Err(e) = scanner.scan_entries(&mut batch) { error!(?e; "backup scan entries failed"); @@ -410,6 +431,7 @@ impl BackupRange { start_version: begin_ts, end_version: backup_ts, region: self.region.clone(), + limiter: resource_limiter.clone(), }; send_to_worker_with_metrics(&saver, msg).await?; next_file_start_key = this_end_key; @@ -426,6 +448,9 @@ impl BackupRange { error_unknown!(?e; "backup build sst failed"); return Err(e); } + if resource_limiter.is_some() { + reschedule_checker.check().await; + } } drop(snap_store); let stat = scanner.take_statistics(); @@ -453,6 +478,7 @@ impl BackupRange { start_version: begin_ts, end_version: backup_ts, region: self.region.clone(), + limiter: resource_limiter.clone(), }; send_to_worker_with_metrics(&saver, msg).await?; @@ -586,6 +612,7 @@ impl BackupRange { start_version: TimeStamp::zero(), end_version: TimeStamp::zero(), region: self.region.clone(), + limiter: None, }; send_to_worker_with_metrics(&saver_tx, msg).await?; Ok(stat) @@ -684,6 +711,7 @@ pub struct Endpoint { softlimit: SoftLimitKeeper, api_version: ApiVersion, causal_ts_provider: Option>, // used in rawkv apiv2 only + resource_ctl: Option>, pub(crate) engine: E, pub(crate) region_info: R, @@ -838,6 +866,7 @@ impl Endpoint { concurrency_manager: ConcurrencyManager, api_version: ApiVersion, causal_ts_provider: Option>, + resource_ctl: Option>, ) -> Endpoint { let pool = ControlThreadPool::new(); let rt = utils::create_tokio_runtime(config.io_thread_size, "backup-io").unwrap(); @@ -856,6 +885,7 @@ impl Endpoint { concurrency_manager, api_version, causal_ts_provider, + resource_ctl, } } @@ -897,6 +927,9 @@ impl Endpoint { let batch_size = self.config_manager.0.read().unwrap().batch_size; let sst_max_size = self.config_manager.0.read().unwrap().sst_max_size.0; let limit = self.softlimit.limit(); + let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { + r.get_resource_limiter(&request.resource_group_name, &request.source_tag) + }); self.pool.borrow_mut().spawn(async move { // Migrated to 2021 migration. This let statement is probably not needed, see @@ -982,8 +1015,7 @@ impl Endpoint { sst_max_size, request.cipher.clone(), ); - brange - .backup( + with_resource_limiter(brange.backup( writer_builder, engine, concurrency_manager.clone(), @@ -991,7 +1023,8 @@ impl Endpoint { start_ts, saver_tx.clone(), _backend.name(), - ) + resource_limiter.clone(), + ), resource_limiter.clone()) .await }; match stat { @@ -1422,6 +1455,7 @@ pub mod tests { concurrency_manager, api_version, causal_ts_provider, + None, ), ) } @@ -1566,6 +1600,8 @@ pub mod tests { compression_level: 0, cipher: CipherInfo::default(), replica_read: false, + resource_group_name: "".into(), + source_tag: "br".into(), }, resp: tx, }; @@ -1675,6 +1711,8 @@ pub mod tests { compression_level: 0, cipher: CipherInfo::default(), replica_read: false, + resource_group_name: "".into(), + source_tag: "br".into(), }, resp: tx, }; @@ -1704,6 +1742,8 @@ pub mod tests { compression_level: 0, cipher: CipherInfo::default(), replica_read: true, + resource_group_name: "".into(), + source_tag: "br".into(), }, resp: tx, }; @@ -1817,6 +1857,8 @@ pub mod tests { compression_level: 0, cipher: CipherInfo::default(), replica_read: false, + resource_group_name: "".into(), + source_tag: "br".into(), }, resp: tx, }; diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 211a1b52ad6..dd021f14bf8 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -26,6 +26,7 @@ use futures_util::AsyncReadExt; use kvproto::brpb::CompressionType; use openssl::hash::{Hasher, MessageDigest}; use tikv_util::{ + future::RescheduleChecker, stream::READ_BUF_SIZE, time::{Instant, Limiter}, }; @@ -285,7 +286,8 @@ where format!("openssl hasher failed to init: {}", err), ) })?; - + let mut yield_checker = + RescheduleChecker::new(tokio::task::yield_now, Duration::from_millis(10)); loop { // separate the speed limiting from actual reading so it won't // affect the timeout calculation. @@ -306,6 +308,7 @@ where })?; } file_length += bytes_read as u64; + yield_checker.check().await; } if expected_length != 0 && expected_length != file_length { diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 0f0bd38c212..a22433638b8 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -96,7 +96,6 @@ pub struct LimitedFuture { } impl LimitedFuture { - #[allow(dead_code)] pub fn new(f: F, resource_limiter: Arc) -> Self { Self { f, @@ -213,6 +212,17 @@ impl Future for OptionalFuture { } } +pub async fn with_resource_limiter( + f: F, + limiter: Option>, +) -> F::Output { + if let Some(limiter) = limiter { + LimitedFuture::new(f, limiter).await + } else { + f.await + } +} + #[cfg(test)] mod tests { use std::sync::mpsc::{channel, Sender}; diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 05330339163..0989dc8fff3 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -12,7 +12,7 @@ pub use resource_group::{ }; mod future; -pub use future::ControlledFuture; +pub use future::{with_resource_limiter, ControlledFuture}; #[cfg(test)] extern crate test; @@ -24,6 +24,7 @@ pub mod channel; pub use channel::ResourceMetered; mod resource_limiter; +pub use resource_limiter::ResourceLimiter; pub mod worker; #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 16109a5436a..ce5d3f20608 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -4,6 +4,7 @@ use std::{ borrow::Cow, cell::Cell, cmp::{max, min}, + collections::HashSet, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, Mutex, @@ -117,27 +118,27 @@ impl ResourceGroupManager { controller.add_resource_group(group_name.clone().into_bytes(), ru_quota, rg.priority); }); info!("add resource group"; "name"=> &rg.name, "ru" => rg.get_r_u_settings().get_r_u().get_settings().get_fill_rate()); - let limiter = match self.resource_groups.get(&rg.name) { - Some(g) => g.limiter.clone(), - None => Self::build_resource_limiter(&rg), - }; + // try to reuse the quota limit when update resource group settings. + let prev_limiter = self + .resource_groups + .get(&rg.name) + .and_then(|g| g.limiter.clone()); + let limiter = Self::build_resource_limiter(&rg, prev_limiter); self.resource_groups .insert(group_name, ResourceGroup::new(rg, limiter)); } - fn build_resource_limiter(rg: &PbResourceGroup) -> Option> { - #[cfg(test)] - { - if rg.name.contains("background") { - return Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY))); - } - } - // TODO: only the "default" resource group support background tasks currently. - if rg.name == DEFAULT_RESOURCE_GROUP_NAME { - return Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY))); + fn build_resource_limiter( + rg: &PbResourceGroup, + old_limiter: Option>, + ) -> Option> { + if !rg.get_background_settings().get_job_types().is_empty() { + old_limiter + .or_else(|| Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY)))) + } else { + None } - None } pub fn remove_resource_group(&self, name: &str) { @@ -218,26 +219,43 @@ impl ResourceGroupManager { } } - pub fn get_resource_limiter(&self, rg: &str) -> Option> { - if let Some(g) = self.resource_groups.get(rg) { - return g.limiter.clone(); + pub fn get_resource_limiter( + &self, + rg: &str, + request_source: &str, + ) -> Option> { + if let Some(group) = self.resource_groups.get(rg) { + if !group.fallback_default { + return group.get_resource_limiter(request_source); + } } - // fallback to the default resource group if target group doesn't exist. self.resource_groups .get(DEFAULT_RESOURCE_GROUP_NAME) - .and_then(|g| g.limiter.clone()) + .and_then(|g| g.get_resource_limiter(request_source)) } } pub(crate) struct ResourceGroup { pub group: PbResourceGroup, pub limiter: Option>, + background_source_types: HashSet, + // whether to fallback background resource control to `default` group. + fallback_default: bool, } impl ResourceGroup { fn new(group: PbResourceGroup, limiter: Option>) -> Self { - Self { group, limiter } + let background_source_types = + HashSet::from_iter(group.get_background_settings().get_job_types().to_owned()); + let fallback_default = + !group.has_background_settings() && group.name != DEFAULT_RESOURCE_GROUP_NAME; + Self { + group, + limiter, + background_source_types, + fallback_default, + } } pub(crate) fn get_ru_quota(&self) -> u64 { @@ -248,6 +266,22 @@ impl ResourceGroup { .get_settings() .get_fill_rate() } + + fn get_resource_limiter(&self, request_source: &str) -> Option> { + self.limiter.as_ref().and_then(|limiter| { + // the source task name is the last part of `request_source` separated by "_" + // the request_source is + // {extrenal|internal}_{tidb_req_source}_{source_task_name} + let source_task_name = request_source.rsplit('_').next().unwrap_or(""); + if !source_task_name.is_empty() + && self.background_source_types.contains(source_task_name) + { + Some(limiter.clone()) + } else { + None + } + }) + } } pub struct ResourceController { @@ -644,6 +678,18 @@ pub(crate) mod tests { new_resource_group(name, true, ru, ru, group_priority) } + pub fn new_background_resource_group_ru( + name: String, + ru: u64, + group_priority: u32, + task_types: Vec, + ) -> PbResourceGroup { + let mut rg = new_resource_group(name, true, ru, ru, group_priority); + rg.mut_background_settings() + .set_job_types(task_types.into()); + rg + } + pub fn new_resource_group( name: String, is_ru_mode: bool, @@ -755,6 +801,20 @@ pub(crate) mod tests { // test resource gorup resource limiter. let group1 = resource_manager.get_resource_group("test").unwrap(); assert!(group1.limiter.is_none()); + assert!( + resource_manager + .get_resource_group("default") + .unwrap() + .limiter + .is_none() + ); + let new_default = new_background_resource_group_ru( + "default".into(), + 10000, + MEDIUM_PRIORITY, + vec!["br".into()], + ); + resource_manager.add_resource_group(new_default); let default_group = resource_manager.get_resource_group("default").unwrap(); let limiter = default_group.limiter.as_ref().unwrap().clone(); assert!(limiter.get_limiter(Cpu).get_rate_limit().is_infinite()); @@ -764,9 +824,13 @@ pub(crate) mod tests { drop(group1); drop(default_group); - let new_default = new_resource_group_ru("default".into(), 100, LOW_PRIORITY); + let new_default = new_background_resource_group_ru( + "default".into(), + 100, + LOW_PRIORITY, + vec!["lightning".into()], + ); resource_manager.add_resource_group(new_default); - let default_group = resource_manager.get_resource_group("default").unwrap(); assert_eq!(default_group.get_ru_quota(), 100); let new_limiter = default_group.limiter.as_ref().unwrap().clone(); @@ -774,6 +838,18 @@ pub(crate) mod tests { assert_eq!(new_limiter.get_limiter(Cpu).get_rate_limit(), 100.0); assert_eq!(new_limiter.get_limiter(Io).get_rate_limit(), 200.0); assert_eq!(&*new_limiter as *const _, &*limiter as *const _); + drop(default_group); + + // remove background setting, quota limiter should be none. + let new_default = new_resource_group_ru("default".into(), 100, LOW_PRIORITY); + resource_manager.add_resource_group(new_default); + assert!( + resource_manager + .get_resource_group("default") + .unwrap() + .limiter + .is_none() + ); } #[test] diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 29218da7b6b..89972b1ecc1 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -29,6 +29,12 @@ pub struct ResourceLimiter { limiters: [QuotaLimiter; ResourceType::COUNT], } +impl std::fmt::Debug for ResourceLimiter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ResourceLimiter(...)") + } +} + impl ResourceLimiter { pub fn new(cpu_limit: f64, io_limit: f64) -> Self { let cpu_limiter = QuotaLimiter::new(cpu_limit); @@ -38,7 +44,6 @@ impl ResourceLimiter { } } - #[allow(dead_code)] pub fn consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { let cpu_dur = self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); @@ -53,10 +58,8 @@ impl ResourceLimiter { } pub(crate) struct QuotaLimiter { - #[allow(dead_code)] limiter: Limiter, // total waiting duration in us - #[allow(dead_code)] total_wait_dur_us: AtomicU64, } @@ -80,7 +83,6 @@ impl QuotaLimiter { self.limiter.set_speed_limit(limit); } - #[allow(dead_code)] pub fn get_statistics(&self) -> GroupStatistics { GroupStatistics { total_consumed: self.limiter.total_bytes_consumed() as u64, @@ -88,7 +90,6 @@ impl QuotaLimiter { } } - #[allow(dead_code)] fn consume(&self, value: u64) -> Duration { if value == 0 { return Duration::ZERO; diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 111c00ca394..095fab34816 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -269,7 +269,7 @@ mod tests { use std::time::Duration; use super::*; - use crate::{resource_group::tests::new_resource_group_ru, resource_limiter::QuotaLimiter}; + use crate::{resource_group::tests::*, resource_limiter::QuotaLimiter}; struct TestResourceStatsProvider { cpu_total: f64, @@ -309,13 +309,21 @@ mod tests { let resource_ctl = Arc::new(ResourceGroupManager::default()); let rg1 = new_resource_group_ru("test".into(), 1000, 14); resource_ctl.add_resource_group(rg1); - assert!(resource_ctl.get_resource_limiter("test").is_none()); + assert!(resource_ctl.get_resource_limiter("test", "br").is_none()); let test_provider = TestResourceStatsProvider::new(8.0, 10000.0); let mut worker = GroupQuotaAdjustWorker::with_quota_getter(resource_ctl.clone(), test_provider); - let limiter = resource_ctl.get_resource_limiter("default").unwrap(); + let default_bg = + new_background_resource_group_ru("default".into(), 100000, 8, vec!["br".into()]); + resource_ctl.add_resource_group(default_bg); + assert!( + resource_ctl + .get_resource_limiter("default", "lightning") + .is_none() + ); + let limiter = resource_ctl.get_resource_limiter("default", "br").unwrap(); assert!( limiter .get_limiter(ResourceType::Cpu) @@ -401,14 +409,17 @@ mod tests { worker.adjust_quota(); check_limiter(&limiter, 2.25, 2250.0); - let default = new_resource_group_ru("default".into(), 2000, 8); + let default = + new_background_resource_group_ru("default".into(), 2000, 8, vec!["br".into()]); resource_ctl.add_resource_group(default); - let new_limiter = resource_ctl.get_resource_limiter("default").unwrap(); + let new_limiter = resource_ctl.get_resource_limiter("default", "br").unwrap(); assert_eq!(&*new_limiter as *const _, &*limiter as *const _); - let bg = new_resource_group_ru("background".into(), 1000, 15); + let bg = new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); resource_ctl.add_resource_group(bg); - let bg_limiter = resource_ctl.get_resource_limiter("background").unwrap(); + let bg_limiter = resource_ctl + .get_resource_limiter("background", "br") + .unwrap(); reset_quota(&mut worker, 5.0, 7000.0, Duration::from_secs(1)); worker.adjust_quota(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 3ca4c7bbd1b..c0f2fe949ee 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -800,6 +800,7 @@ where self.concurrency_manager.clone(), resource_tag_factory, self.quota_limiter.clone(), + self.resource_manager.clone(), ), coprocessor_v2::Endpoint::new(&self.core.config.coprocessor_v2), self.resolver.clone().unwrap(), @@ -1076,6 +1077,7 @@ where LocalTablets::Singleton(engines.engines.kv.clone()), servers.importer.clone(), None, + self.resource_manager.clone(), ); let import_cfg_mgr = import_service.get_config_manager(); @@ -1163,6 +1165,7 @@ where self.concurrency_manager.clone(), self.core.config.storage.api_version(), self.causal_ts_provider.clone(), + self.resource_manager.clone(), ); self.cfg_controller.as_mut().unwrap().register( tikv::config::Module::Backup, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 68aff0cf8c3..23ae2652369 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -767,6 +767,7 @@ where self.concurrency_manager.clone(), resource_tag_factory, self.quota_limiter.clone(), + self.resource_manager.clone(), ), coprocessor_v2::Endpoint::new(&self.core.config.coprocessor_v2), self.resolver.clone().unwrap(), @@ -936,6 +937,7 @@ where self.concurrency_manager.clone(), self.core.config.storage.api_version(), self.causal_ts_provider.clone(), + self.resource_manager.clone(), ); self.cfg_controller.as_mut().unwrap().register( tikv::config::Module::Backup, @@ -951,6 +953,7 @@ where LocalTablets::Registry(self.tablet_registry.as_ref().unwrap().clone()), servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), + self.resource_manager.clone(), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index e39c423469e..54b23dfdb78 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -38,6 +38,7 @@ use tikv_util::{ bytes::{decode_bytes_in_place, encode_bytes}, stream_event::{EventEncoder, EventIterator, Iterator as EIterator}, }, + future::RescheduleChecker, sys::{thread::ThreadBuildWrapper, SysQuota}, time::{Instant, Limiter}, Either, HandyRwLock, @@ -1251,6 +1252,9 @@ impl SstImporter { .build(path.save.to_str().unwrap()) .unwrap(); + let mut yield_check = + RescheduleChecker::new(tokio::task::yield_now, Duration::from_millis(10)); + let mut count = 0; while iter.valid()? { let mut old_key = Cow::Borrowed(keys::origin_key(iter.key())); let mut ts = None; @@ -1314,6 +1318,11 @@ impl SstImporter { } sst_writer.put(&data_key, &value)?; + count += 1; + if count >= 1024 { + count = 0; + yield_check.check().await; + } iter.next()?; if first_key.is_none() { first_key = Some(keys::origin_key(&data_key).to_vec()); diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 34eb6e8aa9e..3a5800e989b 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -95,6 +95,7 @@ impl TestSuite { sim.get_concurrency_manager(*id), api_version, None, + None, ); let mut worker = bg_worker.lazy_build(format!("backup-{}", id)); worker.start(backup_endpoint); diff --git a/components/test_coprocessor/src/fixture.rs b/components/test_coprocessor/src/fixture.rs index 5e94d3e47fe..57446b8d4f9 100644 --- a/components/test_coprocessor/src/fixture.rs +++ b/components/test_coprocessor/src/fixture.rs @@ -176,6 +176,7 @@ fn init_data_with_details_impl( cm, ResourceTagFactory::new_for_test(), limiter.clone(), + None, ); (store, copr, limiter) } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 7d11aebd784..6a5097f716a 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -547,6 +547,7 @@ impl ServerCluster { LocalTablets::Registry(tablet_registry.clone()), Arc::clone(&importer), Some(store_meta), + resource_manager.clone(), ); // Create deadlock service. @@ -569,6 +570,7 @@ impl ServerCluster { concurrency_manager.clone(), res_tag_factory, quota_limiter, + resource_manager.clone(), ); let copr_v2 = coprocessor_v2::Endpoint::new(&cfg.coprocessor_v2); let mut server = None; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 09aa5b5fa6a..4a032bcd34e 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -450,6 +450,7 @@ impl ServerCluster { LocalTablets::Singleton(engines.kv.clone()), Arc::clone(&importer), None, + resource_manager.clone(), ); // Create deadlock service. @@ -479,6 +480,7 @@ impl ServerCluster { concurrency_manager.clone(), res_tag_factory, quota_limiter, + resource_manager.clone(), ); let copr_v2 = coprocessor_v2::Endpoint::new(&cfg.coprocessor_v2); let mut server = None; diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 30fe64252ac..659ce2c0eb1 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -17,6 +17,7 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } match-template = "0.0.1" protobuf = { version = "2.8", features = ["bytes"] } +resource_control = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } smallvec = "1.4" diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 60359f22c55..c0d2f190c64 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -7,6 +7,7 @@ use fail::fail_point; use itertools::Itertools; use kvproto::coprocessor::KeyRange; use protobuf::Message; +use resource_control::{with_resource_limiter, ResourceLimiter}; use tidb_query_common::{ execute_stats::ExecSummary, metrics::*, @@ -79,6 +80,7 @@ pub struct BatchExecutorsRunner { paging_size: Option, quota_limiter: Arc, + resource_limiter: Option>, } // We assign a dummy type `()` so that we can omit the type when calling @@ -428,6 +430,7 @@ impl BatchExecutorsRunner { is_streaming: bool, paging_size: Option, quota_limiter: Arc, + resource_limiter: Option>, ) -> Result { let executors_len = req.get_executors().len(); let collect_exec_summary = req.get_collect_execution_summaries(); @@ -477,6 +480,7 @@ impl BatchExecutorsRunner { encode_type, paging_size, quota_limiter, + resource_limiter, }) } @@ -502,14 +506,18 @@ impl BatchExecutorsRunner { loop { let mut chunk = Chunk::default(); let mut sample = self.quota_limiter.new_sample(true); + let resource_limiter = self.resource_limiter.clone(); let (drained, record_len) = { let (cpu_time, res) = sample - .observe_cpu_async(self.internal_handle_request( - false, - batch_size, - &mut chunk, - &mut warnings, - &mut ctx, + .observe_cpu_async(with_resource_limiter( + self.internal_handle_request( + false, + batch_size, + &mut chunk, + &mut warnings, + &mut ctx, + ), + resource_limiter, )) .await; sample.add_cpu_time(cpu_time); diff --git a/components/tikv_util/src/future.rs b/components/tikv_util/src/future.rs index 875a8d97811..1eff166e48a 100644 --- a/components/tikv_util/src/future.rs +++ b/components/tikv_util/src/future.rs @@ -16,7 +16,11 @@ use futures::{ task::{self, ArcWake, Context, Poll}, }; -use crate::{callback::must_call, timer::GLOBAL_TIMER_HANDLE}; +use crate::{ + callback::must_call, + time::{Duration, Instant}, + timer::GLOBAL_TIMER_HANDLE, +}; /// Generates a paired future and callback so that when callback is being /// called, its result is automatically passed as a future result. @@ -232,6 +236,29 @@ where }) } +pub struct RescheduleChecker { + duration: Duration, + start: Instant, + future_builder: B, +} + +impl T> RescheduleChecker { + pub fn new(future_builder: B, duration: Duration) -> Self { + Self { + duration, + start: Instant::now_coarse(), + future_builder, + } + } + + pub async fn check(&mut self) { + if self.start.saturating_elapsed() >= self.duration { + (self.future_builder)().await; + self.start = Instant::now_coarse(); + } + } +} + #[cfg(test)] mod tests { use std::sync::atomic::AtomicUsize; diff --git a/src/coprocessor/checksum.rs b/src/coprocessor/checksum.rs index 3778f549427..0e6e46a9ce1 100644 --- a/src/coprocessor/checksum.rs +++ b/src/coprocessor/checksum.rs @@ -4,6 +4,7 @@ use api_version::{keyspace::KvPair, ApiV1}; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; +use resource_control::{with_resource_limiter, ResourceLimiter}; use tidb_query_common::storage::{ scanner::{RangesScanner, RangesScannerOptions}, Range, @@ -20,6 +21,7 @@ use crate::{ pub struct ChecksumContext { req: ChecksumRequest, scanner: RangesScanner>, ApiV1>, + resource_limiter: Option>, } impl ChecksumContext { @@ -29,6 +31,7 @@ impl ChecksumContext { start_ts: u64, snap: S, req_ctx: &ReqContext, + resource_limiter: Option>, ) -> Result { let store = SnapshotStore::new( snap, @@ -49,13 +52,14 @@ impl ChecksumContext { is_key_only: false, is_scanned_range_aware: false, }); - Ok(Self { req, scanner }) + Ok(Self { + req, + scanner, + resource_limiter, + }) } -} -#[async_trait] -impl RequestHandler for ChecksumContext { - async fn handle_request(&mut self) -> Result> { + async fn do_handle_request(&mut self) -> Result> { let algorithm = self.req.get_algorithm(); if algorithm != ChecksumAlgorithm::Crc64Xor { return Err(box_err!("unknown checksum algorithm {:?}", algorithm)); @@ -95,6 +99,14 @@ impl RequestHandler for ChecksumContext { resp.set_data(data); Ok(resp.into()) } +} + +#[async_trait] +impl RequestHandler for ChecksumContext { + async fn handle_request(&mut self) -> Result> { + let limiter = self.resource_limiter.clone(); + with_resource_limiter(self.do_handle_request(), limiter).await + } fn collect_scan_statistics(&mut self, dest: &mut Statistics) { self.scanner.collect_storage_stats(dest) diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index 31a6df181d5..62a3a53511a 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -8,6 +8,7 @@ use api_version::KvFormat; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; +use resource_control::ResourceLimiter; use tidb_query_common::{execute_stats::ExecSummary, storage::IntervalRange}; use tikv_alloc::trace::MemoryTraceGuard; use tipb::{DagRequest, SelectResponse, StreamResponse}; @@ -30,6 +31,7 @@ pub struct DagHandlerBuilder { is_cache_enabled: bool, paging_size: Option, quota_limiter: Arc, + resource_limiter: Option>, _phantom: PhantomData, } @@ -44,6 +46,7 @@ impl DagHandlerBuilder { is_cache_enabled: bool, paging_size: Option, quota_limiter: Arc, + resource_limiter: Option>, ) -> Self { DagHandlerBuilder { req, @@ -56,6 +59,7 @@ impl DagHandlerBuilder { is_cache_enabled, paging_size, quota_limiter, + resource_limiter, _phantom: PhantomData, } } @@ -79,6 +83,7 @@ impl DagHandlerBuilder { self.is_streaming, self.paging_size, self.quota_limiter, + self.resource_limiter, )? .into_boxed()) } @@ -101,6 +106,7 @@ impl BatchDagHandler { is_streaming: bool, paging_size: Option, quota_limiter: Arc, + resource_limiter: Option>, ) -> Result { Ok(Self { runner: tidb_query_executors::runner::BatchExecutorsRunner::from_request::<_, F>( @@ -112,6 +118,7 @@ impl BatchDagHandler { is_streaming, paging_size, quota_limiter, + resource_limiter, )?, data_version, }) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 0cdd849b42c..cdebf83fdf6 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -14,7 +14,7 @@ use engine_traits::PerfLevel; use futures::{channel::mpsc, future::Either, prelude::*}; use kvproto::{coprocessor as coppb, errorpb, kvrpcpb}; use protobuf::{CodedInputStream, Message}; -use resource_control::TaskMetadata; +use resource_control::{ResourceGroupManager, TaskMetadata}; use resource_metering::{FutureExt, ResourceTagFactory, StreamExt}; use tidb_query_common::execute_stats::ExecSummary; use tikv_alloc::trace::MemoryTraceGuard; @@ -70,6 +70,7 @@ pub struct Endpoint { slow_log_threshold: Duration, quota_limiter: Arc, + resource_ctl: Option>, _phantom: PhantomData, } @@ -83,6 +84,7 @@ impl Endpoint { concurrency_manager: ConcurrencyManager, resource_tag_factory: ResourceTagFactory, quota_limiter: Arc, + resource_ctl: Option>, ) -> Self { // FIXME: When yatp is used, we need to limit coprocessor requests in progress // to avoid using too much memory. However, if there are a number of large @@ -106,6 +108,7 @@ impl Endpoint { max_handle_duration: cfg.end_point_request_max_handle_duration().0, slow_log_threshold: cfg.end_point_slow_log_threshold.0, quota_limiter, + resource_ctl, _phantom: Default::default(), } } @@ -187,6 +190,14 @@ impl Endpoint { let mut input = CodedInputStream::from_bytes(&data); input.set_recursion_limit(self.recursion_limit); + let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { + r.get_resource_limiter( + context + .get_resource_control_context() + .get_resource_group_name(), + context.get_request_source(), + ) + }); let mut req_ctx: ReqContext; let builder: RequestHandlerBuilder; @@ -259,6 +270,7 @@ impl Endpoint { req.get_is_cache_enabled(), paging_size, quota_limiter, + resource_limiter, ) .data_version(data_version) .build() @@ -305,6 +317,7 @@ impl Endpoint { snap, req_ctx, quota_limiter, + resource_limiter, ) .map(|h| h.into_boxed()) }); @@ -350,6 +363,7 @@ impl Endpoint { start_ts, snap, req_ctx, + resource_limiter, ) .map(|h| h.into_boxed()) }); @@ -1030,6 +1044,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); // a normal request @@ -1071,6 +1086,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); copr.recursion_limit = 100; @@ -1109,6 +1125,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let mut req = coppb::Request::default(); @@ -1132,6 +1149,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let mut req = coppb::Request::default(); @@ -1180,6 +1198,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let (tx, rx) = mpsc::channel(); @@ -1231,6 +1250,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let handler_builder = @@ -1256,6 +1276,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); // Fail immediately @@ -1309,6 +1330,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let handler_builder = Box::new(|_, _: &_| Ok(StreamFixture::new(vec![]).into_boxed())); @@ -1337,6 +1359,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); // handler returns `finished == true` should not be called again. @@ -1436,6 +1459,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let counter = Arc::new(atomic::AtomicIsize::new(0)); @@ -1505,6 +1529,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let (tx, rx) = std::sync::mpsc::channel(); @@ -1884,6 +1909,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); { @@ -1949,6 +1975,7 @@ mod tests { cm, ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let mut req = coppb::Request::default(); req.mut_context().set_isolation_level(IsolationLevel::Si); diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index a49ac72398e..3d96240b26d 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -10,6 +10,7 @@ use kvproto::coprocessor::{KeyRange, Response}; use mur3::Hasher128; use protobuf::Message; use rand::{rngs::StdRng, Rng}; +use resource_control::{with_resource_limiter, ResourceLimiter}; use tidb_query_common::storage::{ scanner::{RangesScanner, RangesScannerOptions}, Range, @@ -51,6 +52,7 @@ pub struct AnalyzeContext { ranges: Vec, storage_stats: Statistics, quota_limiter: Arc, + resource_limiter: Option>, is_auto_analyze: bool, _phantom: PhantomData, } @@ -63,6 +65,7 @@ impl AnalyzeContext { snap: S, req_ctx: &ReqContext, quota_limiter: Arc, + resource_limiter: Option>, ) -> Result { let store = SnapshotStore::new( snap, @@ -81,6 +84,7 @@ impl AnalyzeContext { ranges, storage_stats: Statistics::default(), quota_limiter, + resource_limiter, is_auto_analyze, _phantom: PhantomData, }) @@ -232,10 +236,13 @@ impl RequestHandler for AnalyzeContext { is_key_only: true, is_scanned_range_aware: false, }); - let res = AnalyzeContext::handle_index( - req, - &mut scanner, - self.req.get_tp() == AnalyzeType::TypeCommonHandle, + let res = with_resource_limiter( + AnalyzeContext::handle_index( + req, + &mut scanner, + self.req.get_tp() == AnalyzeType::TypeCommonHandle, + ), + self.resource_limiter.clone(), ) .await; scanner.collect_storage_stats(&mut self.storage_stats); @@ -247,7 +254,11 @@ impl RequestHandler for AnalyzeContext { let storage = self.storage.take().unwrap(); let ranges = std::mem::take(&mut self.ranges); let mut builder = SampleBuilder::<_, F>::new(col_req, None, storage, ranges)?; - let res = AnalyzeContext::handle_column(&mut builder).await; + let res = with_resource_limiter( + AnalyzeContext::handle_column(&mut builder), + self.resource_limiter.clone(), + ) + .await; builder.data.collect_storage_stats(&mut self.storage_stats); res } @@ -260,7 +271,11 @@ impl RequestHandler for AnalyzeContext { let ranges = std::mem::take(&mut self.ranges); let mut builder = SampleBuilder::<_, F>::new(col_req, Some(idx_req), storage, ranges)?; - let res = AnalyzeContext::handle_mixed(&mut builder).await; + let res = with_resource_limiter( + AnalyzeContext::handle_mixed(&mut builder), + self.resource_limiter.clone(), + ) + .await; builder.data.collect_storage_stats(&mut self.storage_stats); res } @@ -278,7 +293,11 @@ impl RequestHandler for AnalyzeContext { self.is_auto_analyze, )?; - let res = AnalyzeContext::handle_full_sampling(&mut builder).await; + let res = with_resource_limiter( + AnalyzeContext::handle_full_sampling(&mut builder), + self.resource_limiter.clone(), + ) + .await; builder.data.collect_storage_stats(&mut self.storage_stats); res } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 3860eba5d38..0c81873c130 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -25,6 +25,7 @@ use kvproto::{ kvrpcpb::Context, }; use raftstore_v2::StoreMeta; +use resource_control::{with_resource_limiter, ResourceGroupManager}; use sst_importer::{ error_inc, metrics::*, sst_importer::DownloadExt, sst_meta_to_path, Config, ConfigManager, Error, Result, SstImporter, @@ -119,6 +120,7 @@ pub struct ImportSstService { // it's some iff multi-rocksdb is enabled store_meta: Option>>>, + resource_manager: Option>, } struct RequestCollector { @@ -300,6 +302,7 @@ impl ImportSstService { tablets: LocalTablets, importer: Arc, store_meta: Option>>>, + resource_manager: Option>, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -349,6 +352,7 @@ impl ImportSstService { raft_entry_max_size, writer, store_meta, + resource_manager, } } @@ -631,14 +635,25 @@ macro_rules! impl_write { let timer = Instant::now_coarse(); let label = stringify!($fn); + let resource_manager = self.resource_manager.clone(); let handle_task = async move { let res = async move { let first_req = rx.try_next().await?; - let meta = match first_req { - Some(r) => match r.chunk { - Some($chunk_ty::Meta(m)) => m, - _ => return Err(Error::InvalidChunk), - }, + let (meta, resource_limiter) = match first_req { + Some(r) => { + let limiter = resource_manager.as_ref().and_then(|m| { + m.get_resource_limiter( + r.get_context() + .get_resource_control_context() + .get_resource_group_name(), + r.get_context().get_request_source(), + ) + }); + match r.chunk { + Some($chunk_ty::Meta(m)) => (m, limiter), + _ => return Err(Error::InvalidChunk), + } + } _ => return Err(Error::InvalidChunk), }; let region_id = meta.get_region_id(); @@ -658,22 +673,33 @@ macro_rules! impl_write { return Err(Error::InvalidChunk); } }; - let writer = rx - .try_fold(writer, |mut writer, req| async move { - // Migrated to 2021 migration. This let statement is probably not - // needed, see https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html - let _ = &req; - let batch = match req.chunk { - Some($chunk_ty::Batch(b)) => b, - _ => return Err(Error::InvalidChunk), - }; - writer.write(batch)?; - Ok(writer) - }) + let (writer, resource_limiter) = rx + .try_fold( + (writer, resource_limiter), + |(mut writer, limiter), req| async move { + let batch = match req.chunk { + Some($chunk_ty::Batch(b)) => b, + _ => return Err(Error::InvalidChunk), + }; + let f = async { + writer.write(batch)?; + Ok(writer) + }; + with_resource_limiter(f, limiter.clone()) + .await + .map(|w| (w, limiter)) + }, + ) .await?; - let metas = writer.finish()?; - import.verify_checksum(&metas)?; + let finish_fn = async { + let metas = writer.finish()?; + import.verify_checksum(&metas)?; + Ok(metas) + }; + + let metas: Result<_> = with_resource_limiter(finish_fn, resource_limiter).await; + let metas = metas?; let mut resp = $resp_ty::default(); resp.set_metas(metas.into()); Ok(resp) @@ -884,6 +910,14 @@ impl ImportSst for ImportSstService { let region_id = req.get_sst().get_region_id(); let tablets = self.tablets.clone(); let start = Instant::now(); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + req.get_context() + .get_resource_control_context() + .get_resource_group_name(), + req.get_context().get_request_source(), + ) + }); let handle_task = async move { // Records how long the download task waits to be scheduled. @@ -914,17 +948,20 @@ impl ImportSst for ImportSstService { } }; - let res = importer.download_ext::( - req.get_sst(), - req.get_storage_backend(), - req.get_name(), - req.get_rewrite_rule(), - cipher, - limiter, - tablet.into_owned(), - DownloadExt::default() - .cache_key(req.get_storage_cache_id()) - .req_type(req.get_request_type()), + let res = with_resource_limiter( + importer.download_ext::( + req.get_sst(), + req.get_storage_backend(), + req.get_name(), + req.get_rewrite_rule(), + cipher, + limiter, + tablet.into_owned(), + DownloadExt::default() + .cache_key(req.get_storage_cache_id()) + .req_type(req.get_request_type()), + ), + resource_limiter, ); let mut resp = DownloadResponse::default(); match res.await { diff --git a/src/server/server.rs b/src/server/server.rs index 6de8ceb48e2..948930ae7ae 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -637,6 +637,7 @@ mod tests { storage.get_concurrency_manager(), ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), + None, ); let copr_v2 = coprocessor_v2::Endpoint::new(&coprocessor_v2::Config::default()); let debug_thread_pool = Arc::new( diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index c986c640b6d..747e2aed8b8 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -476,9 +476,9 @@ impl Tikv for Service { ctx.spawn(task); } - fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { + fn coprocessor(&mut self, ctx: RpcContext<'_>, req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 10623c54376..679f21fdf6c 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -764,15 +764,21 @@ where } } +#[derive(Serialize)] +struct BackgroundSetting { + task_types: Vec, +} + #[derive(Serialize)] struct ResourceGroupSetting { name: String, ru: u64, priority: u32, burst_limit: i64, + background: BackgroundSetting, } -fn into_debug_request_group(rg: ResourceGroup) -> ResourceGroupSetting { +fn into_debug_request_group(mut rg: ResourceGroup) -> ResourceGroupSetting { ResourceGroupSetting { name: rg.name, ru: rg @@ -788,6 +794,12 @@ fn into_debug_request_group(rg: ResourceGroup) -> ResourceGroupSetting { .get_r_u() .get_settings() .get_burst_limit(), + background: BackgroundSetting { + task_types: rg + .background_settings + .as_mut() + .map_or(vec![], |s| s.take_job_types().into()), + }, } } diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 0a5708c74ce..9737d6a2160 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -52,6 +52,7 @@ pub fn build_dag_handler( false, None, Arc::new(QuotaLimiter::default()), + None, ) .build() .unwrap() diff --git a/tests/integrations/resource_metering/test_cpu.rs b/tests/integrations/resource_metering/test_cpu.rs index 9ead51f5ef5..c15bf445ed3 100644 --- a/tests/integrations/resource_metering/test_cpu.rs +++ b/tests/integrations/resource_metering/test_cpu.rs @@ -229,6 +229,7 @@ fn setup_test_suite() -> (TestSuite, Store, Endpoint) cm, test_suite.get_tag_factory(), Arc::new(QuotaLimiter::default()), + None, ); (test_suite, store, endpoint) } diff --git a/tests/integrations/resource_metering/test_read_keys.rs b/tests/integrations/resource_metering/test_read_keys.rs index 35ef0e2ba88..f7a4ef86906 100644 --- a/tests/integrations/resource_metering/test_read_keys.rs +++ b/tests/integrations/resource_metering/test_read_keys.rs @@ -229,6 +229,7 @@ fn init_coprocessor_with_data( cm, tag_factory, Arc::new(QuotaLimiter::default()), + None, ) } From 2f2900a6ff1fa0b7c881718668c1714ecfdcbc2f Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 20 Jul 2023 12:54:47 +0800 Subject: [PATCH 0805/1149] raftstore-v2: fix issues related to background work (#15115) ref tikv/tikv#12842, fix tikv/tikv#15145 - Raise error when block-cache or write-buffer-limit is set too high. - Reduce write-buffer-limit ratio and max size. - Fix `max-compactions` adjusting doesn't consider level zero files. - Increase `max-manual-flush-rate` from 0.2 to 0.3. - Fix pre flush doesn't call callback when tablet is missing. - Move `pause_background_work` into background pool because it may block for a long time. - Only split clean tablet. - Schedule pre flush for commit merge in a dedicated merge because it blocks the source region. - Print rocksdb stats once 2h for v2. Signed-off-by: tabokie --- cmd/tikv-ctl/src/executor.rs | 2 +- cmd/tikv-ctl/src/main.rs | 5 +- components/encryption/src/manager/mod.rs | 19 ++- components/raftstore-v2/src/batch/store.rs | 2 +- .../operation/command/admin/merge/commit.rs | 1 + .../operation/command/admin/merge/prepare.rs | 1 + .../src/operation/command/admin/mod.rs | 3 + .../src/operation/command/admin/split.rs | 16 ++- .../raftstore-v2/src/operation/ready/mod.rs | 1 + components/raftstore-v2/src/worker/tablet.rs | 109 +++++++++++------- components/raftstore/src/store/config.rs | 2 +- components/server/src/common.rs | 64 +++++++--- components/server/src/raft_engine_switch.rs | 6 +- components/server/src/server.rs | 13 +-- components/server/src/server2.rs | 12 +- components/snap_recovery/src/init_cluster.rs | 5 +- components/test_raftstore-v2/src/util.rs | 6 +- components/test_raftstore/src/util.rs | 5 +- etc/config-template.toml | 7 +- src/config/mod.rs | 89 ++++++++------ src/server/debug2.rs | 10 +- src/server/engine_factory.rs | 5 +- src/storage/config.rs | 23 ++-- src/storage/kv/test_engine_builder.rs | 3 +- src/storage/mod.rs | 5 +- tests/integrations/config/mod.rs | 2 +- tests/integrations/storage/test_titan.rs | 5 +- 27 files changed, 235 insertions(+), 186 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 47ec632f957..f7783cd82a4 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -96,7 +96,7 @@ pub fn new_debug_executor( .unwrap() .map(Arc::new); - let cache = cfg.storage.block_cache.build_shared_cache(engine_type); + let cache = cfg.storage.block_cache.build_shared_cache(); let env = cfg .build_shared_rocks_env(key_manager.clone(), None /* io_rate_limiter */) .unwrap(); diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 6491ab241e8..91e689e4236 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1257,10 +1257,7 @@ fn read_cluster_id(config: &TikvConfig) -> Result { .unwrap() .map(Arc::new); let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); - let cache = config - .storage - .block_cache - .build_shared_cache(config.storage.engine); + let cache = config.storage.block_cache.build_shared_cache(); let kv_engine = KvEngineFactoryBuilder::new(env, config, cache, key_manager) .build() .create_shared_db(&config.storage.data_dir) diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index c47a127a1a5..f3594e8a96b 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -852,10 +852,8 @@ impl DataKeyManager { pub fn file_count(&self) -> usize { self.dicts.file_dict.lock().unwrap().files.len() } -} -impl Drop for DataKeyManager { - fn drop(&mut self) { + fn shutdown_background_worker(&mut self) { if let Err(e) = self.rotate_tx.send(RotateTask::Terminate) { info!("failed to terminate background rotation, are we shutting down?"; "err" => %e); } @@ -865,6 +863,12 @@ impl Drop for DataKeyManager { } } +impl Drop for DataKeyManager { + fn drop(&mut self) { + self.shutdown_background_worker(); + } +} + impl EncryptionKeyManager for DataKeyManager { // Get key to open existing file. fn get_file(&self, fname: &str) -> IoResult { @@ -1455,11 +1459,12 @@ mod tests { fn test_key_manager_rotate() { let _guard = LOCK_FOR_GAUGE.lock().unwrap(); let tmp_dir = tempfile::TempDir::new().unwrap(); - let manager = new_key_manager_def(&tmp_dir, None).unwrap(); + let mut manager = new_key_manager_def(&tmp_dir, None).unwrap(); let (key_id, key) = { let (id, k) = manager.dicts.current_data_key(); (id, k) }; + manager.shutdown_background_worker(); // Do not rotate. let master_key = MockBackend::default(); @@ -1524,11 +1529,12 @@ mod tests { Box::new(FileBackend::new(key_path.as_path()).unwrap()) as Box; let tmp_dir = tempfile::TempDir::new().unwrap(); let previous = new_mock_backend() as Box; - let manager = new_key_manager(&tmp_dir, None, master_key_backend, previous).unwrap(); + let mut manager = new_key_manager(&tmp_dir, None, master_key_backend, previous).unwrap(); let (key_id, key) = { let (id, k) = manager.dicts.current_data_key(); (id, k) }; + manager.shutdown_background_worker(); let master_key_backend = Box::new(FileBackend::new(key_path.as_path()).unwrap()) as Box; @@ -1574,7 +1580,8 @@ mod tests { let master_key_backend = Box::new(file_backend); let tmp_dir = tempfile::TempDir::new().unwrap(); let previous = new_mock_backend() as Box; - let manager = new_key_manager(&tmp_dir, None, master_key_backend, previous).unwrap(); + let mut manager = new_key_manager(&tmp_dir, None, master_key_backend, previous).unwrap(); + manager.shutdown_background_worker(); let file_backend = FileBackend::new(key_path.as_path()).unwrap(); let master_key_backend = Box::new(file_backend); diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 44a623ed9f6..847cb3ca4fe 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -68,7 +68,7 @@ use crate::{ }; const MIN_MANUAL_FLUSH_RATE: f64 = 0.2; -const MAX_MANUAL_FLUSH_PERIOD: Duration = Duration::from_secs(90); +const MAX_MANUAL_FLUSH_PERIOD: Duration = Duration::from_secs(120); /// A per-thread context shared by the [`StoreFsm`] and multiple [`PeerFsm`]s. pub struct StoreContext { diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 84ef4745288..708e9ef7ffb 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -230,6 +230,7 @@ impl Peer { self.start_pre_flush( store_ctx, "commit_merge", + true, &target.clone(), Box::new(move || { // If target peer is destroyed, life.rs is responsible for telling us to diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index f71670c2931..16f5d397ce7 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -416,6 +416,7 @@ impl Peer { self.start_pre_flush( store_ctx, "prepare_merge", + false, &self.region().clone(), Box::new(on_flush_finish), ); diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9dbc3a06a87..0623f909786 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -192,6 +192,7 @@ impl Peer { self.start_pre_flush( ctx, "split", + false, &self.region().clone(), Box::new(on_flush_finish), ); @@ -261,6 +262,7 @@ impl Peer { &mut self, ctx: &mut StoreContext, reason: &'static str, + high_priority: bool, target: &Region, on_local_flushed: Box, ) { @@ -274,6 +276,7 @@ impl Peer { if let Err(e) = ctx.schedulers.tablet.schedule(crate::TabletTask::Flush { region_id: target_id, reason, + high_priority, threshold: Some(std::time::Duration::from_secs(10)), cb: Some(on_local_flushed), }) { diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index f3718813686..3fefc7c1006 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -55,7 +55,7 @@ use raftstore::{ Result, }; use slog::{error, info, warn}; -use tikv_util::{log::SlogFormat, slog_panic, time::Instant}; +use tikv_util::{box_err, log::SlogFormat, slog_panic, time::Instant}; use crate::{ batch::StoreContext, @@ -316,6 +316,15 @@ impl Peer { ))); return; } + if self.storage().has_dirty_data() { + // If we split dirty tablet, the same trim compaction will be repeated + // exponentially more times. + info!(self.logger, "tablet still dirty, skip split."); + ch.set_result(cmd_resp::new_error(Error::Other(box_err!( + "tablet is dirty" + )))); + return; + } if let Err(e) = util::validate_split_region( self.region_id(), self.peer_id(), @@ -360,6 +369,11 @@ impl Peer { return; } + if self.storage().has_dirty_data() { + info!(self.logger, "tablet still dirty, skip half split."); + return; + } + // Do not check the bucket ranges if we want to split the region with a given // key range, this is to avoid compatibility issues. let split_check_bucket_ranges = if !is_key_range { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index cfc522805bc..12279651bf6 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -310,6 +310,7 @@ impl Peer { .schedule(crate::worker::tablet::Task::Flush { region_id: self.region().get_id(), reason: "unknown", + high_priority: false, threshold: Some(std::time::Duration::from_secs(10)), cb: None, }); diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 2759aa10477..2118bb3da77 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -25,7 +25,8 @@ use tikv_util::{ Either, }; -const DEFAULT_BACKGROUND_POOL_SIZE: usize = 6; +const DEFAULT_HIGH_PRI_POOL_SIZE: usize = 1; +const DEFAULT_LOW_PRI_POOL_SIZE: usize = 6; pub enum Task { Trim { @@ -55,10 +56,11 @@ pub enum Task { Flush { region_id: u64, reason: &'static str, + high_priority: bool, /// Do not flush if the active memtable is just flushed within this /// threshold. threshold: Option, - /// Callback will be called if memtable is fresh. + /// Callback will be called regardless of whether the flush succeeds. cb: Option>, }, DeleteRange { @@ -108,12 +110,14 @@ impl Display for Task { Task::Flush { region_id, reason, + high_priority, threshold, cb: on_flush_finish, } => { write!( f, - "flush tablet for region_id {region_id}, reason {reason}, threshold {:?}, has_cb {}", + "flush tablet for region_id {region_id}, reason {reason}, high_priority \ + {high_priority}, threshold {:?}, has_cb {}", threshold, on_flush_finish.is_some(), ) @@ -241,7 +245,8 @@ pub struct Runner { // An independent pool to run tasks that are time-consuming but doesn't take CPU resources, // such as waiting for RocksDB compaction. - background_pool: FuturePool, + high_pri_pool: FuturePool, + low_pri_pool: FuturePool, } impl Runner { @@ -258,13 +263,13 @@ impl Runner { logger, waiting_destroy_tasks: HashMap::default(), pending_destroy_tasks: Vec::new(), - background_pool: YatpPoolBuilder::new(DefaultTicker::default()) + high_pri_pool: YatpPoolBuilder::new(DefaultTicker::default()) + .name_prefix("tablet-high") + .thread_count(0, DEFAULT_HIGH_PRI_POOL_SIZE, DEFAULT_HIGH_PRI_POOL_SIZE) + .build_future_pool(), + low_pri_pool: YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix("tablet-bg") - .thread_count( - 0, - DEFAULT_BACKGROUND_POOL_SIZE, - DEFAULT_BACKGROUND_POOL_SIZE, - ) + .thread_count(0, DEFAULT_LOW_PRI_POOL_SIZE, DEFAULT_LOW_PRI_POOL_SIZE) .build_future_pool(), } } @@ -289,7 +294,7 @@ impl Runner { return; } let logger = self.logger.clone(); - self.background_pool + self.low_pri_pool .spawn(async move { let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); @@ -331,8 +336,14 @@ impl Runner { Either::Left(tablet) => { // The tablet is about to be deleted, flush is a waste and will block destroy. let _ = tablet.set_db_options(&[("avoid_flush_during_shutdown", "true")]); - let _ = tablet.pause_background_work(); - PathBuf::from(tablet.path()) + // `pause_background_work` needs to wait for outstanding compactions. + let path = PathBuf::from(tablet.path()); + self.low_pri_pool + .spawn(async move { + let _ = tablet.pause_background_work(); + }) + .unwrap(); + path } Either::Right(path) => path, } @@ -432,6 +443,7 @@ impl Runner { &self, region_id: u64, reason: &'static str, + high_priority: bool, threshold: Option, cb: Option>, ) { @@ -445,7 +457,10 @@ impl Runner { "region_id" => region_id, "reason" => reason, ); - return + if let Some(cb) = cb { + cb(); + } + return; }; let threshold = threshold.map(|t| std::time::SystemTime::now() - t); // The callback `cb` being some means it's the task sent from @@ -454,42 +469,47 @@ impl Runner { if let Some(cb) = cb { let logger = self.logger.clone(); let now = Instant::now(); - self.background_pool - .spawn(async move { - // sync flush for leader to let the flush happend before later checkpoint. - if threshold.is_none() || tablet.has_old_active_memtable(threshold.unwrap()) { - let r = tablet.flush_cfs(DATA_CFS, true); - let elapsed = now.saturating_elapsed(); - if let Err(e) = r { - warn!( - logger, - "flush memtable for leader failed"; - "region_id" => region_id, - "reason" => reason, - "err" => ?e, - ); - return; - } else { - info!( - logger, - "flush memtable for leader"; - "region_id" => region_id, - "reason" => reason, - "duration" => %ReadableDuration(elapsed), - ); - } + let pool = if high_priority + && self.low_pri_pool.get_running_task_count() > DEFAULT_LOW_PRI_POOL_SIZE / 2 + { + &self.high_pri_pool + } else { + &self.low_pri_pool + }; + pool.spawn(async move { + // sync flush for leader to let the flush happen before later checkpoint. + if threshold.is_none() || tablet.has_old_active_memtable(threshold.unwrap()) { + let r = tablet.flush_cfs(DATA_CFS, true); + let elapsed = now.saturating_elapsed(); + if let Err(e) = r { + warn!( + logger, + "flush memtable for leader failed"; + "region_id" => region_id, + "reason" => reason, + "err" => ?e, + ); } else { info!( logger, - "skipped flush memtable for leader"; + "flush memtable for leader"; "region_id" => region_id, "reason" => reason, + "duration" => %ReadableDuration(elapsed), ); } - drop(tablet); - cb(); - }) - .unwrap(); + } else { + info!( + logger, + "skipped flush memtable for leader"; + "region_id" => region_id, + "reason" => reason, + ); + } + drop(tablet); + cb(); + }) + .unwrap(); } else if threshold.is_none() || tablet.has_old_active_memtable(threshold.unwrap()) { if let Err(e) = tablet.flush_cfs(DATA_CFS, false) { warn!( @@ -585,9 +605,10 @@ where Task::Flush { region_id, reason, + high_priority, threshold, cb, - } => self.flush_tablet(region_id, reason, threshold, cb), + } => self.flush_tablet(region_id, reason, high_priority, threshold, cb), delete_range @ Task::DeleteRange { .. } => self.delete_range(delete_range), Task::SnapGc(keys) => self.snap_gc(keys), } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index b01cfd8240b..827ddc72727 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -403,7 +403,7 @@ impl Default for Config { raft_log_gc_size_limit: None, raft_log_reserve_max_ticks: 6, raft_engine_purge_interval: ReadableDuration::secs(10), - max_manual_flush_rate: 2.0, + max_manual_flush_rate: 3.0, raft_entry_cache_life_time: ReadableDuration::secs(30), raft_reject_transfer_leader_duration: ReadableDuration::secs(3), split_region_check_tick_interval: ReadableDuration::secs(10), diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 0209906cc55..094c845016f 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -23,7 +23,7 @@ use engine_rocks::{ }; use engine_traits::{ data_cf_offset, CachedTablet, CfOptions, CfOptionsExt, FlowControlFactorsExt, KvEngine, - RaftEngine, StatisticsReporter, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, + RaftEngine, StatisticsReporter, TabletRegistry, CF_DEFAULT, DATA_CFS, }; use error_code::ErrorCodeExt; use file_system::{get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor}; @@ -478,8 +478,8 @@ pub fn check_system_config(config: &TikvConfig) { pub struct EnginesResourceInfo { tablet_registry: TabletRegistry, - // The initial value of max_compactions. For kvdb.defaultcf, kvdb.writecf only. - base_max_compactions: [u32; 2], + // The initial value of max_compactions. + base_max_compactions: [u32; 3], raft_engine: Option, latest_normalized_pending_bytes: AtomicU32, normalized_pending_bytes_collector: MovingAvgU32, @@ -494,8 +494,10 @@ impl EnginesResourceInfo { raft_engine: Option, max_samples_to_preserve: usize, ) -> Self { + // Match DATA_CFS. let base_max_compactions = [ config.rocksdb.defaultcf.max_compactions.unwrap_or(0), + config.rocksdb.lockcf.max_compactions.unwrap_or(0), config.rocksdb.writecf.max_compactions.unwrap_or(0), ]; EnginesResourceInfo { @@ -514,17 +516,34 @@ impl EnginesResourceInfo { ) { let mut compaction_pending_bytes = [0; DATA_CFS.len()]; let mut soft_pending_compaction_bytes_limit = [0; DATA_CFS.len()]; + // level0 file number ratio within [compaction trigger, slowdown trigger]. + let mut level0_ratio = [0.0f32; DATA_CFS.len()]; let mut fetch_engine_cf = |engine: &RocksEngine, cf: &str| { if let Ok(cf_opts) = engine.get_options_cf(cf) { + let offset = data_cf_offset(cf); if let Ok(Some(b)) = engine.get_cf_pending_compaction_bytes(cf) { - let offset = data_cf_offset(cf); compaction_pending_bytes[offset] += b; soft_pending_compaction_bytes_limit[offset] = cmp::max( cf_opts.get_soft_pending_compaction_bytes_limit(), soft_pending_compaction_bytes_limit[offset], ); } + if let Ok(Some(n)) = engine.get_cf_num_files_at_level(cf, 0) { + let level0 = n as f32; + let slowdown_trigger = cf_opts.get_level_zero_slowdown_writes_trigger() as f32; + let compaction_trigger = + cf_opts.get_level_zero_file_num_compaction_trigger() as f32; + let ratio = if slowdown_trigger > compaction_trigger { + (level0 - compaction_trigger) / (slowdown_trigger - compaction_trigger) + } else { + 1.0 + }; + + if ratio > level0_ratio[offset] { + level0_ratio[offset] = ratio; + } + } } }; @@ -540,7 +559,7 @@ impl EnginesResourceInfo { for (_, cache) in cached_latest_tablets.iter_mut() { let Some(tablet) = cache.latest() else { continue }; - for cf in &[CF_DEFAULT, CF_WRITE, CF_LOCK] { + for cf in DATA_CFS { fetch_engine_cf(tablet, cf); } } @@ -556,29 +575,44 @@ impl EnginesResourceInfo { normalized_pending_bytes, (*pending * EnginesResourceInfo::SCALE_FACTOR / limit) as u32, ); - // kvdb defaultcf or writecf. - if (i == 1 || i == 2 ) - && let base = self.base_max_compactions[i-1] - && base > 0 - { + let base = self.base_max_compactions[i]; + if base > 0 { let level = *pending as f32 / limit as f32; - let delta = if level > 0.7 { + // 50% -> 1, 70% -> 2, 85% -> 3, 95% -> 6, 98% -> 1024. + let delta1 = if level > 0.98 { + 1024 + } else if level > 0.95 { + cmp::min(SysQuota::cpu_cores_quota() as u32 - 2, 6) + } else if level > 0.85 { + 3 + } else if level > 0.7 { 2 } else { u32::from(level > 0.5) }; - let cf = if i == 1 { - CF_DEFAULT + // 20% -> 1, 60% -> 2, 80% -> 3, 90% -> 6, 98% -> 1024. + let delta2 = if level0_ratio[i] > 0.98 { + // effectively disable the limiter. + 1024 + } else if level0_ratio[i] > 0.9 { + cmp::min(SysQuota::cpu_cores_quota() as u32 - 2, 6) + } else if level0_ratio[i] > 0.8 { + 3 + } else if level0_ratio[i] > 0.6 { + 2 } else { - CF_WRITE + u32::from(level0_ratio[i] > 0.2) }; + let delta = cmp::max(delta1, delta2); + let cf = DATA_CFS[i]; if delta != 0 { info!( "adjusting `max-compactions`"; "cf" => cf, "n" => base + delta, "pending_bytes" => *pending, - "soft_limit" => limit + "soft_limit" => limit, + "level0_ratio" => level0_ratio[i], ); } // We cannot get the current limit from limiter to avoid repeatedly setting the diff --git a/components/server/src/raft_engine_switch.rs b/components/server/src/raft_engine_switch.rs index bf46f07eabd..729029d4c8f 100644 --- a/components/server/src/raft_engine_switch.rs +++ b/components/server/src/raft_engine_switch.rs @@ -237,10 +237,8 @@ mod tests { cfg.raft_store.raftdb_path = raftdb_path.to_str().unwrap().to_owned(); cfg.raftdb.wal_dir = raftdb_wal_path.to_str().unwrap().to_owned(); cfg.raft_engine.mut_config().dir = raft_engine_path.to_str().unwrap().to_owned(); - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + cfg.validate().unwrap(); + let cache = cfg.storage.block_cache.build_shared_cache(); // Dump logs from RocksEngine to RaftLogEngine. let raft_engine = RaftLogEngine::new( diff --git a/components/server/src/server.rs b/components/server/src/server.rs index c0f2fe949ee..5edb962d61a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1499,12 +1499,7 @@ impl TikvServer { &mut self, flow_listener: engine_rocks::FlowListener, ) -> (Engines, Arc) { - let block_cache = self - .core - .config - .storage - .block_cache - .build_shared_cache(self.core.config.storage.engine); + let block_cache = self.core.config.storage.block_cache.build_shared_cache(); let env = self .core .config @@ -1622,10 +1617,8 @@ mod test { config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); let env = Arc::new(Env::default()); let path = Builder::new().prefix("test-update").tempdir().unwrap(); - let cache = config - .storage - .block_cache - .build_shared_cache(config.storage.engine); + config.validate().unwrap(); + let cache = config.storage.block_cache.build_shared_cache(); let factory = KvEngineFactoryBuilder::new(env, &config, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 23ae2652369..88e9a76340c 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1419,12 +1419,7 @@ impl TikvServer { &mut self, flow_listener: engine_rocks::FlowListener, ) -> Arc { - let block_cache = self - .core - .config - .storage - .block_cache - .build_shared_cache(self.core.config.storage.engine); + let block_cache = self.core.config.storage.block_cache.build_shared_cache(); let env = self .core .config @@ -1567,10 +1562,7 @@ mod test { config.rocksdb.lockcf.soft_pending_compaction_bytes_limit = Some(ReadableSize(1)); let env = Arc::new(Env::default()); let path = Builder::new().prefix("test-update").tempdir().unwrap(); - let cache = config - .storage - .block_cache - .build_shared_cache(config.storage.engine); + let cache = config.storage.block_cache.build_shared_cache(); let factory = KvEngineFactoryBuilder::new(env, &config, cache, None).build(); let reg = TabletRegistry::new(Box::new(factory), path.path().join("tablets")).unwrap(); diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index e200ec6ff44..46275e0c610 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -319,10 +319,7 @@ pub fn create_local_engine_service( let env = config .build_shared_rocks_env(key_manager.clone(), None) .map_err(|e| format!("build shared rocks env: {}", e))?; - let block_cache = config - .storage - .block_cache - .build_shared_cache(config.storage.engine); + let block_cache = config.storage.block_cache.build_shared_cache(); // init rocksdb / kv db let factory = diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 2809c2a01b0..7719b5e3557 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -22,7 +22,6 @@ use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, Con use tikv::{ server::KvEngineFactoryBuilder, storage::{ - config::EngineType, kv::{SnapContext, SnapshotExt}, point_key_range, Engine, Snapshot, }, @@ -57,10 +56,7 @@ pub fn create_test_engine( data_key_manager_from_config(&cfg.security.encryption, dir.path().to_str().unwrap()) .unwrap() .map(Arc::new); - let cache = cfg - .storage - .block_cache - .build_shared_cache(EngineType::RaftKv2); + let cache = cfg.storage.block_cache.build_shared_cache(); let env = cfg .build_shared_rocks_env(key_manager.clone(), limiter) .unwrap(); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 7fbb8cc2d28..98e39c4fb13 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -639,10 +639,7 @@ pub fn create_test_engine( data_key_manager_from_config(&cfg.security.encryption, dir.path().to_str().unwrap()) .unwrap() .map(Arc::new); - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + let cache = cfg.storage.block_cache.build_shared_cache(); let env = cfg .build_shared_rocks_env(key_manager.clone(), limiter) .unwrap(); diff --git a/etc/config-template.toml b/etc/config-template.toml index 818b8b64a44..b9cde90e633 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -299,7 +299,7 @@ ## Otherwise, the OOM problem might occur in TiKV. ## ## If it's not set, 45% of available system memory will be used. -# capacity = "1GB" +# capacity = "0B" [storage.flow-control] ## Flow controller is used to throttle the write rate at scheduler level, aiming @@ -572,7 +572,8 @@ # max-total-wal-size = "4GB" ## Dump statistics periodically in information logs. -## Same as RocksDB's default value (10 min). +## When storage.engine is "raft-kv", default value is 10m. +## When storage.engine is "partitioned-raft-kv", default value is 120m. # stats-dump-period = "10m" ## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ @@ -646,7 +647,7 @@ ## When storage.engine is "raft-kv", default is no limit. ## When storage.engine is "partitioned-raft-kv", default value is 25% of available system memory or ## 15GiB, whichever is smaller. -# write-buffer-limit = "1GB" +# write-buffer-limit = "0B" ## Options for `Titan`. [rocksdb.titan] diff --git a/src/config/mod.rs b/src/config/mod.rs index c4aa3c36d52..d28d73f7da4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -106,9 +106,10 @@ pub const MIN_BLOCK_CACHE_SHARD_SIZE: usize = 128 * MIB as usize; /// Maximum of 15% of system memory can be used by Raft Engine. Normally its /// memory usage is much smaller than that. const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; -/// Tentative value. -const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.25; -const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(15).0; + +const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; +// Too large will increase Raft Engine memory usage. +const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; const LOCKCF_MIN_MEM: usize = 256 * MIB as usize; const LOCKCF_MAX_MEM: usize = GIB as usize; @@ -1217,7 +1218,7 @@ pub struct DbConfig { #[serde(skip_serializing)] pub enable_statistics: bool, #[online_config(skip)] - pub stats_dump_period: ReadableDuration, + pub stats_dump_period: Option, pub compaction_readahead_size: ReadableSize, #[online_config(skip)] pub info_log_max_size: ReadableSize, @@ -1306,7 +1307,7 @@ impl Default for DbConfig { create_if_missing: true, max_open_files: 40960, enable_statistics: true, - stats_dump_period: ReadableDuration::minutes(10), + stats_dump_period: None, compaction_readahead_size: ReadableSize::kb(0), info_log_max_size: ReadableSize::gb(1), info_log_roll_time: ReadableDuration::secs(0), @@ -1346,6 +1347,8 @@ impl DbConfig { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); self.max_total_wal_size.get_or_insert(ReadableSize::gb(4)); + self.stats_dump_period + .get_or_insert(ReadableDuration::minutes(10)); self.defaultcf.enable_compaction_guard.get_or_insert(true); self.writecf.enable_compaction_guard.get_or_insert(true); self.defaultcf.format_version.get_or_insert(2); @@ -1364,6 +1367,8 @@ impl DbConfig { WRITE_BUFFER_MEMORY_LIMIT_MAX, ))); self.max_total_wal_size.get_or_insert(ReadableSize(1)); + self.stats_dump_period + .get_or_insert(ReadableDuration::minutes(120)); // In RaftKv2, every region uses its own rocksdb instance, it's actually the // even stricter compaction guard, so use the same output file size base. self.writecf @@ -1434,7 +1439,9 @@ impl DbConfig { opts.set_max_manifest_file_size(self.max_manifest_file_size.0); opts.create_if_missing(self.create_if_missing); opts.set_max_open_files(self.max_open_files); - opts.set_stats_dump_period_sec(self.stats_dump_period.as_secs() as usize); + opts.set_stats_dump_period_sec( + self.stats_dump_period.unwrap_or_default().as_secs() as usize + ); opts.set_compaction_readahead_size(self.compaction_readahead_size.0); opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); @@ -3433,6 +3440,15 @@ impl TikvConfig { if self.storage.engine == EngineType::RaftKv2 { self.raft_store.store_io_pool_size = cmp::max(self.raft_store.store_io_pool_size, 1); } + if self.storage.block_cache.capacity.is_none() { + let total_mem = SysQuota::memory_limit_in_bytes(); + let capacity = if self.storage.engine == EngineType::RaftKv2 { + (total_mem as f64) * RAFTSTORE_V2_BLOCK_CACHE_RATE + } else { + (total_mem as f64) * BLOCK_CACHE_RATE + }; + self.storage.block_cache.capacity = Some(ReadableSize(capacity as u64)); + } // Validate for v2. if self.storage.engine == EngineType::RaftKv2 { @@ -3570,42 +3586,47 @@ impl TikvConfig { fill_cf_opts!(self.rocksdb.raftcf, flow_control_cfg); // Validate memory usage limit. + let block_cache_cap = self.storage.block_cache.capacity.unwrap(); + let write_buffer_cap = self.rocksdb.write_buffer_limit.unwrap_or(ReadableSize(0)); if let Some(memory_usage_limit) = self.memory_usage_limit { let total = SysQuota::memory_limit_in_bytes(); if memory_usage_limit.0 > total { // Explicitly exceeds system memory capacity is not allowed. return Err(format!( - "memory_usage_limit is greater than system memory capacity {}", + "memory_usage_limit is greater than system memory capacity ({})", total ) .into()); } } else { - // Adjust `memory_usage_limit` if necessary. - if let Some(cap) = self.storage.block_cache.capacity { - let limit = (cap.0 as f64 / BLOCK_CACHE_RATE * MEMORY_USAGE_LIMIT_RATE) as u64; - self.memory_usage_limit = Some(ReadableSize(limit)); - } else { - self.memory_usage_limit = Some(Self::suggested_memory_usage_limit()); + let mut limit = + (block_cache_cap.0 as f64 / BLOCK_CACHE_RATE * MEMORY_USAGE_LIMIT_RATE) as u64; + if self.storage.engine == EngineType::RaftKv2 { + limit = cmp::max( + limit, + (write_buffer_cap.0 as f64 / WRITE_BUFFER_MEMORY_LIMIT_RATE + * MEMORY_USAGE_LIMIT_RATE) as u64, + ); } + let limit = ReadableSize(cmp::min(limit, SysQuota::memory_limit_in_bytes())); + let default = Self::suggested_memory_usage_limit(); + if limit > default { + warn!( + "memory_usage_limit ({}) > recommanded ({}), maybe page cache isn't enough", + limit, default, + ); + } + self.memory_usage_limit = Some(limit); } - let mut limit = self.memory_usage_limit.unwrap(); - let total = ReadableSize(SysQuota::memory_limit_in_bytes()); - if limit.0 > total.0 { - warn!( - "memory_usage_limit:{:?} > total:{:?}, fallback to total", - limit, total, - ); - self.memory_usage_limit = Some(total); - limit = total; - } - - let default = Self::suggested_memory_usage_limit(); - if limit.0 > default.0 { - warn!( - "memory_usage_limit:{:?} > recommanded:{:?}, maybe page cache isn't enough", - limit, default, - ); + if block_cache_cap.0 + write_buffer_cap.0 > self.memory_usage_limit.unwrap().0 { + return Err(format!( + "The sum of `storage.block-cache.capacity` and `rocksdb.write-buffer-limit` \ + is greater than memory-usage-limit: {} + {} > {}", + block_cache_cap, + write_buffer_cap, + self.memory_usage_limit.unwrap(), + ) + .into()); } // Validate sub-components. @@ -4917,11 +4938,8 @@ mod tests { &cfg.storage.data_dir, Some(cfg.rocksdb.build_opt(&resource, cfg.storage.engine)), cfg.rocksdb.build_cf_opts( - &cfg.rocksdb.build_cf_resources( - cfg.storage - .block_cache - .build_shared_cache(cfg.storage.engine), - ), + &cfg.rocksdb + .build_cf_resources(cfg.storage.block_cache.build_shared_cache()), None, cfg.storage.api_version(), None, @@ -5969,6 +5987,7 @@ mod tests { cfg.coprocessor_v2.coprocessor_plugin_directory = None; // Default is `None`, which is represented by not setting the key. cfg.rocksdb.write_buffer_limit = None; cfg.rocksdb.max_total_wal_size = None; + cfg.rocksdb.stats_dump_period = None; // cfg.rocksdb.defaultcf.enable_compaction_guard = None; cfg.rocksdb.writecf.enable_compaction_guard = None; diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 07e079799e0..e914b353760 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1215,10 +1215,7 @@ mod tests { cfg.storage.data_dir = path.to_str().unwrap().to_string(); cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + let cache = cfg.storage.block_cache.build_shared_cache(); let env = cfg.build_shared_rocks_env(None, None).unwrap(); let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); @@ -1481,10 +1478,7 @@ mod tests { cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); cfg.gc.enable_compaction_filter = false; - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + let cache = cfg.storage.block_cache.build_shared_cache(); let env = cfg.build_shared_rocks_env(None, None).unwrap(); let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index f8560f7ed55..85de282b137 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -283,10 +283,7 @@ mod tests { e ); }); - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + let cache = cfg.storage.block_cache.build_shared_cache(); let dir = test_util::temp_dir(name, false); let env = cfg.build_shared_rocks_env(None, None).unwrap(); diff --git a/src/storage/config.rs b/src/storage/config.rs index 9fc052e0ee0..a40db2c424b 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -14,10 +14,7 @@ use tikv_util::{ sys::SysQuota, }; -use crate::config::{ - BLOCK_CACHE_RATE, DEFAULT_ROCKSDB_SUB_DIR, DEFAULT_TABLET_SUB_DIR, MIN_BLOCK_CACHE_SHARD_SIZE, - RAFTSTORE_V2_BLOCK_CACHE_RATE, -}; +use crate::config::{DEFAULT_ROCKSDB_SUB_DIR, DEFAULT_TABLET_SUB_DIR, MIN_BLOCK_CACHE_SHARD_SIZE}; pub const DEFAULT_DATA_DIR: &str = "./"; const DEFAULT_GC_RATIO_THRESHOLD: f64 = 1.1; @@ -34,6 +31,10 @@ const DEFAULT_SCHED_PENDING_WRITE_MB: u64 = 100; const DEFAULT_RESERVED_SPACE_GB: u64 = 5; const DEFAULT_RESERVED_RAFT_SPACE_GB: u64 = 1; +// Block cache capacity used when TikvConfig isn't validated. It should only +// occur in tests. +const FALLBACK_BLOCK_CACHE_CAPACITY: ReadableSize = ReadableSize::mb(128); + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case")] pub enum EngineType { @@ -276,21 +277,11 @@ impl BlockCacheConfig { } } - pub fn build_shared_cache(&self, engine_type: EngineType) -> Cache { + pub fn build_shared_cache(&self) -> Cache { if self.shared == Some(false) { warn!("storage.block-cache.shared is deprecated, cache is always shared."); } - let capacity = match self.capacity { - None => { - let total_mem = SysQuota::memory_limit_in_bytes(); - if engine_type == EngineType::RaftKv2 { - ((total_mem as f64) * RAFTSTORE_V2_BLOCK_CACHE_RATE) as usize - } else { - ((total_mem as f64) * BLOCK_CACHE_RATE) as usize - } - } - Some(c) => c.0 as usize, - }; + let capacity = self.capacity.unwrap_or(FALLBACK_BLOCK_CACHE_CAPACITY).0 as usize; let mut cache_opts = LRUCacheOptions::new(); cache_opts.set_capacity(capacity); cache_opts.set_num_shard_bits(self.adjust_shard_bits(capacity) as c_int); diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index c6a7cb7f20d..23a0bfcd594 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -96,8 +96,7 @@ impl TestEngineBuilder { if !enable_block_cache { cache_opt.capacity = Some(ReadableSize::kb(0)); } - let shared = - cfg_rocksdb.build_cf_resources(cache_opt.build_shared_cache(EngineType::RaftKv)); + let shared = cfg_rocksdb.build_cf_resources(cache_opt.build_shared_cache()); let cfs_opts = cfs .iter() .map(|cf| match *cf { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 5aea4702cb5..36c1f648691 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4223,9 +4223,8 @@ mod tests { let engine = { let path = "".to_owned(); let cfg_rocksdb = db_config; - let shared = cfg_rocksdb.build_cf_resources( - BlockCacheConfig::default().build_shared_cache(EngineType::RaftKv), - ); + let shared = + cfg_rocksdb.build_cf_resources(BlockCacheConfig::default().build_shared_cache()); let cfs_opts = vec![ ( CF_DEFAULT, diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 75384f3ebe3..b0c00c6b30b 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -303,7 +303,7 @@ fn test_serde_custom_tikv_config() { create_if_missing: false, max_open_files: 12_345, enable_statistics: true, - stats_dump_period: ReadableDuration::minutes(12), + stats_dump_period: Some(ReadableDuration::minutes(12)), compaction_readahead_size: ReadableSize::kb(1), info_log_max_size: ReadableSize::kb(1), info_log_roll_time: ReadableDuration::secs(12), diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index bea48ed8d59..9c3eeec0c83 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -149,10 +149,7 @@ fn test_delete_files_in_range_for_titan() { // Set configs and create engines let mut cfg = TikvConfig::default(); - let cache = cfg - .storage - .block_cache - .build_shared_cache(cfg.storage.engine); + let cache = cfg.storage.block_cache.build_shared_cache(); cfg.rocksdb.titan.enabled = true; cfg.rocksdb.titan.disable_gc = true; cfg.rocksdb.titan.purge_obsolete_files_period = ReadableDuration::secs(1); From aa4942715d97a4f9b35213ee9b9c48f6676e9937 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 20 Jul 2023 14:05:17 -0700 Subject: [PATCH 0806/1149] unsafe recovery: Clarify the benefits of using InvokeClosureOnDrop. (#15168) ref tikv/tikv#10483 Comment only change. Signed-off-by: Yang Zhang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/peer.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 0036c4b4e0c..c8401f4a830 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -534,11 +534,17 @@ pub enum ForceLeaderState { // learner // - exit joint state // - start_unsafe_recovery_report -// -// Intends to use RAII to sync unsafe recovery procedures between peers, in -// addition to that, it uses a closure to avoid having a raft router as a member -// variable, which is statically dispatched, thus needs to propagate the -// generics everywhere. + +// A wrapper of a closure that will be invoked when it is dropped. +// This design has two benefits: +// 1. Using a closure (dynamically dispatched), so that it can avoid having +// generic member fields like RaftRouter, thus avoid having Rust generic +// type explosion problem. +// 2. Invoke on drop, so that it can be easily and safely used (together with +// Arc) as a coordinator between all concerning peers. Each of the peers +// holds a reference to the same strcuture, and whoever finishes the task +// drops its reference. Once the last reference is dropped, indicating all +// the peers have finished their own tasks, the closure is invoked. pub struct InvokeClosureOnDrop(Box); impl fmt::Debug for InvokeClosureOnDrop { From aa3a0b014296b5ee42b88fd67f7ac564a01b60bb Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 21 Jul 2023 18:17:17 +0800 Subject: [PATCH 0807/1149] [Dynamic Region]: bucket key range should be updated after ingested sst (#15164) close tikv/tikv#15147 bucket write keys and bytes should be updated after ingested sst. So the bucket key range will be refreshed . Signed-off-by: bufferflies <1045931706@qq.com> --- components/pd_client/src/lib.rs | 12 +++ .../src/operation/command/write/ingest.rs | 4 +- components/raftstore/src/store/fsm/peer.rs | 4 + tests/failpoints/cases/test_import_service.rs | 93 +++++++++++++------ tests/integrations/import/test_sst_service.rs | 34 ++++++- 5 files changed, 115 insertions(+), 32 deletions(-) diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index d547cbf97b0..f7f57dd4bba 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -198,6 +198,18 @@ impl BucketStat { } } + // Notice: It's not evenly distributed, so we update all buckets after ingest + // sst. Generally, sst file size is region split size, and this region is + // empty region. + pub fn ingest_sst(&mut self, key_count: u64, value_size: u64) { + for stat in self.stats.mut_write_bytes() { + *stat += value_size; + } + for stat in self.stats.mut_write_keys() { + *stat += key_count; + } + } + pub fn split(&mut self, idx: usize) { assert!(idx != 0); // inherit the traffic stats for splited bucket diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index ff2e299abd0..7e8ed381ad0 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -148,7 +148,9 @@ impl Apply { let metas: Vec = infos.iter().map(|info| info.meta.clone()).collect(); self.sst_apply_state().register_ssts(index, metas); } - + if let Some(s) = self.buckets.as_mut() { + s.ingest_sst(keys, size as u64); + } self.metrics.size_diff_hint += size; self.metrics.written_bytes += size as u64; self.metrics.written_keys += keys; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index c9fa108e3c8..a03662ef716 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6472,6 +6472,10 @@ where Some(self.fsm.peer.approximate_size.unwrap_or_default() + size); self.fsm.peer.approximate_keys = Some(self.fsm.peer.approximate_keys.unwrap_or_default() + keys); + + if let Some(buckets) = &mut self.fsm.peer.region_buckets { + buckets.ingest_sst(keys, size); + } // The ingested file may be overlapped with the data in engine, so we need to // check it again to get the accurate value. self.fsm.peer.may_skip_split_check = false; diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index ad2cb99d6a8..f1314694e64 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -257,9 +257,6 @@ fn test_delete_sst_v2_after_epoch_stale() { config.server.addr = "127.0.0.1:0".to_owned(); let cleanup_interval = Duration::from_millis(10); config.raft_store.cleanup_import_sst_interval.0 = cleanup_interval; - config.raft_store.split_region_check_tick_interval.0 = cleanup_interval; - config.raft_store.pd_heartbeat_tick_interval.0 = cleanup_interval; - config.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); config.server.grpc_concurrency = 1; let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); @@ -286,24 +283,9 @@ fn test_delete_sst_v2_after_epoch_stale() { tx.lock().unwrap().send(()).unwrap(); }) .unwrap(); - rx.recv_timeout(std::time::Duration::from_secs(20)).unwrap(); - assert_eq!(1, sst_file_count(&cluster.paths)); - - let (tx, rx) = channel::<()>(); - let tx = Arc::new(Mutex::new(tx)); - fail::cfg_callback("on_update_region_keys", move || { - tx.lock().unwrap().send(()).unwrap(); - }) - .unwrap(); rx.recv_timeout(std::time::Duration::from_millis(100)) .unwrap(); - std::thread::sleep(std::time::Duration::from_millis(100)); - let region_keys = cluster - .pd_client - .get_region_approximate_keys(ctx.get_region_id()) - .unwrap(); - assert_eq!(100, region_keys); - fail::remove("on_update_region_keys"); + assert_eq!(1, sst_file_count(&cluster.paths)); // test restart cluster cluster.stop_node(1); @@ -326,7 +308,7 @@ fn test_delete_sst_v2_after_epoch_stale() { tx.lock().unwrap().send(()).unwrap(); }) .unwrap(); - rx.recv_timeout(std::time::Duration::from_millis(100)) + rx.recv_timeout(std::time::Duration::from_millis(1000)) .unwrap(); std::thread::sleep(std::time::Duration::from_millis(100)); assert_eq!(0, sst_file_count(&cluster.paths)); @@ -341,17 +323,9 @@ fn test_delete_sst_v2_after_epoch_stale() { #[test] fn test_delete_sst_after_applied_sst() { - // let mut cluster = test_raftstore_v2::new_server_cluster(1, 1); - let mut config = TikvConfig::default(); - config.server.addr = "127.0.0.1:0".to_owned(); - let cleanup_interval = Duration::from_millis(10); - config.raft_store.split_region_check_tick_interval.0 = cleanup_interval; - config.raft_store.pd_heartbeat_tick_interval.0 = cleanup_interval; - config.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); - config.server.grpc_concurrency = 1; // disable data flushed fail::cfg("on_flush_completed", "return()").unwrap(); - let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); + let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(None); let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); let sst_path = temp_dir.path().join("test.sst"); let sst_range = (0, 100); @@ -397,6 +371,67 @@ fn test_delete_sst_after_applied_sst() { cluster.start().unwrap(); } +#[test] +fn test_split_buckets_after_ingest_sst_v2() { + let mut config = TikvConfig::default(); + config.server.addr = "127.0.0.1:0".to_owned(); + let cleanup_interval = Duration::from_millis(10); + config.raft_store.split_region_check_tick_interval.0 = cleanup_interval; + config.raft_store.pd_heartbeat_tick_interval.0 = cleanup_interval; + config.raft_store.report_region_buckets_tick_interval.0 = cleanup_interval; + config.coprocessor.enable_region_bucket = Some(true); + config.coprocessor.region_bucket_size = ReadableSize(200); + config.raft_store.region_split_check_diff = Some(ReadableSize(200)); + config.server.grpc_concurrency = 1; + + let (cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(Some(config)); + let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + let sst_range = (0, 255); + let (mut meta, data) = gen_sst_file(sst_path, sst_range); + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta.clone()); + + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + + let (tx, rx) = channel::<()>(); + let tx = Arc::new(Mutex::new(tx)); + fail::cfg_callback("on_update_region_keys", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + rx.recv_timeout(std::time::Duration::from_millis(100)) + .unwrap(); + + for _ in 0..10 { + let region_keys = cluster + .pd_client + .get_region_approximate_keys(ctx.get_region_id()) + .unwrap_or_default(); + if region_keys != 255 { + std::thread::sleep(std::time::Duration::from_millis(50)); + continue; + } + + let buckets = cluster + .pd_client + .get_buckets(ctx.get_region_id()) + .unwrap_or_default(); + if buckets.meta.keys.len() <= 2 { + std::thread::sleep(std::time::Duration::from_millis(50)); + } + return; + } + panic!("region keys is not 255 or buckets keys len less than 2") +} + fn sst_file_count(paths: &Vec) -> u64 { let mut count = 0; for path in paths { diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 7802b18b192..22ab9c7d7fe 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -8,6 +8,7 @@ use pd_client::PdClient; use tempfile::Builder; use test_sst_importer::*; use tikv::config::TikvConfig; +use tikv_util::config::ReadableSize; use super::util::*; @@ -87,13 +88,21 @@ fn test_write_and_ingest_with_tde() { #[test] fn test_ingest_sst() { let mut cfg = TikvConfig::default(); + let cleanup_interval = Duration::from_millis(10); + cfg.raft_store.split_region_check_tick_interval.0 = cleanup_interval; + cfg.raft_store.pd_heartbeat_tick_interval.0 = cleanup_interval; + cfg.raft_store.report_region_buckets_tick_interval.0 = cleanup_interval; + cfg.coprocessor.enable_region_bucket = Some(true); + cfg.coprocessor.region_bucket_size = ReadableSize(200); + cfg.raft_store.region_split_check_diff = Some(ReadableSize(200)); + cfg.server.addr = "127.0.0.1:0".to_owned(); cfg.server.grpc_concurrency = 1; - let (_cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client(Some(cfg)); + let (cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client(Some(cfg)); let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); let sst_path = temp_dir.path().join("test.sst"); - let sst_range = (0, 100); + let sst_range = (0, 255); let (mut meta, data) = gen_sst_file(sst_path, sst_range); // No region id and epoch. @@ -118,6 +127,27 @@ fn test_ingest_sst() { ingest.set_sst(meta); let resp = import.ingest(&ingest).unwrap(); assert!(!resp.has_error(), "{:?}", resp.get_error()); + + for _ in 0..10 { + let region_keys = cluster + .pd_client + .get_region_approximate_keys(ctx.get_region_id()) + .unwrap_or_default(); + if region_keys != 255 { + std::thread::sleep(std::time::Duration::from_millis(50)); + continue; + } + + let buckets = cluster + .pd_client + .get_buckets(ctx.get_region_id()) + .unwrap_or_default(); + if buckets.meta.keys.len() <= 2 { + std::thread::sleep(std::time::Duration::from_millis(50)); + } + return; + } + panic!("region keys is not 255 or buckets keys len less than 2") } fn switch_mode(import: &ImportSstClient, range: Range, mode: SwitchMode) { From d9087e61563a18a99f083cb074eaef289fae0a20 Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 24 Jul 2023 12:17:50 +0800 Subject: [PATCH 0808/1149] ctl: add get_region_read_progress (#15099) ref tikv/tikv#15082 feat(tikv-ctl): add get_region_read_progress Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 35 ++++- cmd/tikv-ctl/src/cmd.rs | 17 +++ cmd/tikv-ctl/src/executor.rs | 84 +++++++++++ cmd/tikv-ctl/src/main.rs | 21 ++- components/raftstore/src/store/util.rs | 20 +++ components/resolved_ts/src/endpoint.rs | 40 ++++++ components/resolved_ts/src/resolver.rs | 39 ++++++ components/server/src/server.rs | 22 ++- components/server/src/server2.rs | 21 +++ components/test_raftstore-v2/src/server.rs | 5 +- components/test_raftstore/src/server.rs | 8 +- src/server/raftkv2/mod.rs | 5 + src/server/service/debug.rs | 153 +++++++++++++++++++-- src/server/service/mod.rs | 2 +- 14 files changed, 451 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be27c6ee616..9d59dc0fb40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -280,6 +280,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "autotools" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8da1805e028a172334c3b680f93e71126f2327622faef2ec3d893c0a4ad77" +dependencies = [ + "cc", +] + [[package]] name = "aws" version = "0.0.1" @@ -2967,12 +2976,12 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#92075f193814dbc00f347b4589743a36bf624f34" +source = "git+https://github.com/pingcap/kvproto.git#770f7183434ca05887a3f63265b8745017130e1f" dependencies = [ "futures 0.3.15", "grpcio", "protobuf", - "protobuf-build 0.14.1", + "protobuf-build 0.15.1", "raft-proto", ] @@ -4413,11 +4422,24 @@ name = "protobuf-build" version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2df9942df2981178a930a72d442de47e2f0df18ad68e50a30f816f1848215ad0" +dependencies = [ + "bitflags", + "protobuf", + "protobuf-codegen", + "regex", +] + +[[package]] +name = "protobuf-build" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c852d9625b912c3e50480cdc701f60f49890b5d7ad46198dd583600f15e7c6ec" dependencies = [ "bitflags", "grpcio-compiler", "protobuf", "protobuf-codegen", + "protobuf-src", "regex", ] @@ -4440,6 +4462,15 @@ dependencies = [ "protobuf-codegen", ] +[[package]] +name = "protobuf-src" +version = "1.1.0+21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7ac8852baeb3cc6fb83b93646fb93c0ffe5d14bf138c945ceb4b9948ee0e3c1" +dependencies = [ + "autotools", +] + [[package]] name = "quick-xml" version = "0.22.0" diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 17c1a6dd8eb..8a432fc35df 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -627,6 +627,23 @@ pub enum Cmd { /// hex end key end: String, }, + GetRegionReadProgress { + #[structopt(short = "r", long)] + /// The target region id + region: u64, + + #[structopt(long)] + /// When specified, prints the locks associated with the transaction + /// that has the smallest 'start_ts' in the resolver, which is + /// preventing the 'resolved_ts' from advancing. + log: bool, + + #[structopt(long, requires = "log")] + /// The smallest start_ts of the target transaction. Namely, only the + /// transaction whose start_ts is greater than or equal to this value + /// can be recorded in TiKV logs. + min_start_ts: Option, + }, } #[derive(StructOpt)] diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index f7783cd82a4..1962cbe0e50 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -716,6 +716,8 @@ pub trait DebugExecutor { _start_ts: u64, _commit_ts: u64, ) -> Result<(), KeyRange>; + + fn get_region_read_progress(&self, region_id: u64, log: bool, min_start_ts: u64); } impl DebugExecutor for DebugClient { @@ -965,6 +967,78 @@ impl DebugExecutor for DebugClient { } } } + + fn get_region_read_progress(&self, region_id: u64, log: bool, min_start_ts: u64) { + let mut req = GetRegionReadProgressRequest::default(); + req.set_region_id(region_id); + req.set_log_locks(log); + req.set_min_start_ts(min_start_ts); + let opt = grpcio::CallOption::default().timeout(Duration::from_secs(10)); + let resp = self + .get_region_read_progress_opt(&req, opt) + .unwrap_or_else(|e| perror_and_exit("DebugClient::get_region_read_progress", e)); + if !resp.get_error().is_empty() { + println!("error: {}", resp.get_error()); + } + let fields = [ + ("Region read progress:", "".to_owned()), + ("exist", resp.get_region_read_progress_exist().to_string()), + ("safe_ts", resp.get_safe_ts().to_string()), + ("applied_index", resp.get_applied_index().to_string()), + ("read_state.ts", resp.get_read_state_ts().to_string()), + ( + "read_state.apply_index", + resp.get_read_state_apply_index().to_string(), + ), + ( + "pending front item (oldest) ts", + resp.get_pending_front_ts().to_string(), + ), + ( + "pending front item (oldest) applied index", + resp.get_pending_front_applied_index().to_string(), + ), + ( + "pending back item (latest) ts", + resp.get_pending_back_ts().to_string(), + ), + ( + "pending back item (latest) applied index", + resp.get_pending_back_applied_index().to_string(), + ), + ("paused", resp.get_region_read_progress_paused().to_string()), + ("discarding", resp.get_discard().to_string()), + // TODO: figure out the performance impact here before implementing it. + // ( + // "duration to last update_safe_ts", + // format!("{} ms", resp.get_duration_to_last_update_safe_ts_ms()), + // ), + // ( + // "duration to last consume_leader_info", + // format!("{} ms", resp.get_duration_to_last_consume_leader_ms()), + // ), + ("Resolver:", "".to_owned()), + ("exist", resp.get_resolver_exist().to_string()), + ("resolved_ts", resp.get_resolved_ts().to_string()), + ( + "tracked index", + resp.get_resolver_tracked_index().to_string(), + ), + ("number of locks", resp.get_num_locks().to_string()), + ( + "number of transactions", + resp.get_num_transactions().to_string(), + ), + ("stopped", resp.get_resolver_stopped().to_string()), + ]; + for (name, value) in &fields { + if value.is_empty() { + println!("{}", name); + } else { + println!(" {}: {}, ", name, value); + } + } + } } impl DebugExecutor for DebuggerImpl @@ -1217,6 +1291,11 @@ where ) -> Result<(), KeyRange> { unimplemented!("only available for remote mode"); } + + fn get_region_read_progress(&self, _region_id: u64, _log: bool, _min_start_ts: u64) { + println!("only available for remote mode"); + tikv_util::logger::exit_process_gracefully(-1); + } } fn handle_engine_error(err: EngineError) -> ! { @@ -1429,4 +1508,9 @@ impl DebugExecutor for DebuggerImplV2 { ) -> Result<(), KeyRange> { unimplemented!("only available for remote mode"); } + + fn get_region_read_progress(&self, _region_id: u64, _log: bool, _min_start_ts: u64) { + println!("only available for remote mode"); + tikv_util::logger::exit_process_gracefully(-1); + } } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 91e689e4236..c0d690a1593 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -6,11 +6,6 @@ #[macro_use] extern crate log; -mod cmd; -mod executor; -mod fork_readonly_tikv; -mod util; - use std::{ borrow::ToOwned, fs::{self, File, OpenOptions}, @@ -60,6 +55,11 @@ use txn_types::Key; use crate::{cmd::*, executor::*, util::*}; +mod cmd; +mod executor; +mod fork_readonly_tikv; +mod util; + fn main() { let opt = Opt::from_args(); @@ -653,6 +653,17 @@ fn main() { debug_executor.dump_cluster_info(); } Cmd::ResetToVersion { version } => debug_executor.reset_to_version(version), + Cmd::GetRegionReadProgress { + region, + log, + min_start_ts, + } => { + debug_executor.get_region_read_progress( + region, + log, + min_start_ts.unwrap_or_default(), + ); + } _ => { unreachable!() } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index e17fa17e97e..d12ddcb4c4e 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1655,6 +1655,26 @@ impl RegionReadProgressCore { pub fn get_local_leader_info(&self) -> &LocalLeaderInfo { &self.leader_info } + + pub fn applied_index(&self) -> u64 { + self.applied_index + } + + pub fn paused(&self) -> bool { + self.pause + } + + pub fn pending_items(&self) -> &VecDeque { + &self.pending_items + } + + pub fn read_state(&self) -> &ReadState { + &self.read_state + } + + pub fn discarding(&self) -> bool { + self.discard + } } /// Represent the duration of all stages of raftstore recorded by one diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 23be4a62fc5..3ce38874244 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -597,6 +597,29 @@ where self.store_id }) } + + fn handle_get_diagnosis_info( + &self, + region_id: u64, + log_locks: bool, + min_start_ts: u64, + callback: tikv::server::service::ResolvedTsDiagnosisCallback, + ) { + if let Some(r) = self.regions.get(®ion_id) { + if log_locks { + r.resolver.log_locks(min_start_ts); + } + callback(Some(( + r.resolver.stopped(), + r.resolver.resolved_ts().into_inner(), + r.resolver.tracked_index(), + r.resolver.num_locks(), + r.resolver.num_transactions(), + ))); + } else { + callback(None); + } + } } pub enum Task { @@ -632,6 +655,12 @@ pub enum Task { ChangeConfig { change: ConfigChange, }, + GetDiagnosisInfo { + region_id: u64, + log_locks: bool, + min_start_ts: u64, + callback: tikv::server::service::ResolvedTsDiagnosisCallback, + }, } impl fmt::Debug for Task { @@ -689,6 +718,11 @@ impl fmt::Debug for Task { .field("name", &"change_config") .field("change", &change) .finish(), + Task::GetDiagnosisInfo { region_id, .. } => de + .field("name", &"get_diagnosis_info") + .field("region_id", ®ion_id) + .field("callback", &"callback") + .finish(), } } } @@ -733,6 +767,12 @@ where apply_index, } => self.handle_scan_locks(region_id, observe_id, entries, apply_index), Task::ChangeConfig { change } => self.handle_change_config(change), + Task::GetDiagnosisInfo { + region_id, + log_locks, + min_start_ts, + callback, + } => self.handle_get_diagnosis_info(region_id, log_locks, min_start_ts, callback), } } } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index b341c546940..d165c5cc05f 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -8,6 +8,8 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; +const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; + // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. pub struct Resolver { @@ -74,6 +76,14 @@ impl Resolver { self.resolved_ts } + pub fn tracked_index(&self) -> u64 { + self.tracked_index + } + + pub fn stopped(&self) -> bool { + self.stopped + } + pub fn size(&self) -> usize { self.locks_by_key.keys().map(|k| k.len()).sum::() + self @@ -190,6 +200,35 @@ impl Resolver { self.resolved_ts } + + pub(crate) fn log_locks(&self, min_start_ts: u64) { + // log lock with the minimum start_ts >= min_start_ts + if let Some((start_ts, keys)) = self + .lock_ts_heap + .range(TimeStamp::new(min_start_ts)..) + .next() + { + let keys_for_log = keys + .iter() + .map(|key| log_wrappers::Value::key(key)) + .take(MAX_NUMBER_OF_LOCKS_IN_LOG) + .collect::>(); + info!( + "locks with the minimum start_ts in resolver"; + "region_id" => self.region_id, + "start_ts" => start_ts, + "sampled keys" => ?keys_for_log, + ); + } + } + + pub(crate) fn num_locks(&self) -> u64 { + self.locks_by_key.len() as u64 + } + + pub(crate) fn num_transactions(&self) -> u64 { + self.lock_ts_heap.len() as u64 + } } #[cfg(test)] diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 5edb962d61a..1afe058ca5d 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -69,7 +69,7 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; -use resolved_ts::LeadershipResolver; +use resolved_ts::{LeadershipResolver, Task}; use resource_control::{ worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}, ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, @@ -253,6 +253,7 @@ struct TikvServer { causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, br_snap_recovery_mode: bool, // use for br snapshot recovery + resolved_ts_scheduler: Option>, grpc_service_mgr: GrpcServiceManager, } @@ -441,6 +442,7 @@ where causal_ts_provider, tablet_registry: None, br_snap_recovery_mode: is_recovering_marked, + resolved_ts_scheduler: None, grpc_service_mgr: GrpcServiceManager::new(tx), } } @@ -1029,6 +1031,7 @@ where server.env(), self.security_mgr.clone(), ); + self.resolved_ts_scheduler = Some(rts_worker.scheduler()); rts_worker.start_with_timer(rts_endpoint); self.core.to_stop.push(rts_worker); } @@ -1095,10 +1098,27 @@ where .register(tikv::config::Module::Import, Box::new(import_cfg_mgr)); // Debug service. + let resolved_ts_scheduler = Arc::new(self.resolved_ts_scheduler.clone()); let debug_service = DebugService::new( servers.debugger.clone(), servers.server.get_debug_thread_pool().clone(), engines.engine.raft_extension(), + self.engines.as_ref().unwrap().store_meta.clone(), + Arc::new( + move |region_id, log_locks, min_start_ts, callback| -> bool { + if let Some(s) = resolved_ts_scheduler.as_ref() { + let res = s.schedule(Task::GetDiagnosisInfo { + region_id, + log_locks, + min_start_ts, + callback, + }); + res.is_ok() + } else { + false + } + }, + ), ); info!("start register debug service"); if servers diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 88e9a76340c..b05d8ad8dfa 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -66,6 +66,7 @@ use raftstore_v2::{ router::{PeerMsg, RaftRouter}, StateStorage, }; +use resolved_ts::Task; use resource_control::{ worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}, ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, @@ -252,6 +253,7 @@ struct TikvServer { resource_manager: Option>, causal_ts_provider: Option>, // used for rawkv apiv2 tablet_registry: Option>, + resolved_ts_scheduler: Option>, grpc_service_mgr: GrpcServiceManager, } @@ -400,6 +402,7 @@ where resource_manager, causal_ts_provider, tablet_registry: None, + resolved_ts_scheduler: None, grpc_service_mgr: GrpcServiceManager::new(tx), } } @@ -692,6 +695,7 @@ where self.env.clone(), self.security_mgr.clone(), ); + self.resolved_ts_scheduler = Some(rts_worker.scheduler()); rts_worker.start_with_timer(rts_endpoint); self.core.to_stop.push(rts_worker); } @@ -990,10 +994,27 @@ where debugger.set_raft_statistics(self.raft_statistics.clone()); // Debug service. + let resolved_ts_scheduler = Arc::new(self.resolved_ts_scheduler.clone()); let debug_service = DebugService::new( debugger, servers.server.get_debug_thread_pool().clone(), engines.engine.raft_extension(), + self.router.as_ref().unwrap().store_meta().clone(), + Arc::new( + move |region_id, log_locks, min_start_ts, callback| -> bool { + if let Some(s) = resolved_ts_scheduler.as_ref() { + let res = s.schedule(Task::GetDiagnosisInfo { + region_id, + log_locks, + min_start_ts, + callback, + }); + res.is_ok() + } else { + false + } + }, + ), ); if servers .server diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 6a5097f716a..00a13d7f78c 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1083,12 +1083,15 @@ pub fn must_new_cluster_and_debug_client() -> ( DebuggerImplV2::new(tablet_registry, raft_engine, ConfigController::default()); sim.pending_debug_service = Some(Box::new(move |cluster, debug_thread_handle| { - let raft_extension = cluster.storages.get(&1).unwrap().raft_extension(); + let raftkv = cluster.storages.get(&1).unwrap(); + let raft_extension = raftkv.raft_extension(); create_debug(DebugService::new( debugger.clone(), debug_thread_handle, raft_extension, + raftkv.raftkv.router().store_meta().clone(), + Arc::new(|_, _, _, _| false), )) })); } diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 4a032bcd34e..aad014a8834 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -500,7 +500,13 @@ impl ServerCluster { None, ); let debug_thread_handle = debug_thread_pool.handle().clone(); - let debug_service = DebugService::new(debugger, debug_thread_handle, extension); + let debug_service = DebugService::new( + debugger, + debug_thread_handle, + extension, + store_meta.clone(), + Arc::new(|_, _, _, _| false), + ); let apply_router = system.apply_router(); // Create node. diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 5935d542a37..a023ba158c5 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -128,6 +128,11 @@ impl RaftKv2 { pub fn set_txn_extra_scheduler(&mut self, txn_extra_scheduler: Arc) { self.txn_extra_scheduler = Some(txn_extra_scheduler); } + + // for test only + pub fn router(&self) -> &RaftRouter { + &self.router + } } impl tikv_kv::Engine for RaftKv2 { diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 20543cf6736..c7d5175f5ed 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -1,5 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +use std::sync::{Arc, Mutex}; + use futures::{ future::{Future, FutureExt, TryFutureExt}, sink::SinkExt, @@ -10,8 +12,9 @@ use grpcio::{ WriteFlags, }; use kvproto::debugpb::{self, *}; +use raftstore::store::fsm::store::StoreRegionMeta; use tikv_kv::RaftExtension; -use tikv_util::metrics; +use tikv_util::{future::paired_future_callback, metrics}; use tokio::runtime::Handle; use crate::server::debug::{Debugger, Error, Result}; @@ -38,30 +41,79 @@ fn error_to_grpc_error(tag: &'static str, e: Error) -> GrpcError { e } +pub type Callback = Box; +pub type ResolvedTsDiagnosisCallback = Callback< + Option<( + bool, // stopped + u64, // resolved_ts + u64, // tracked index + u64, // num_locks + u64, // num_transactions + )>, +>; +pub type ScheduleResolvedTsTask = Arc< + dyn Fn( + u64, // region id + bool, // log_locks + u64, // min_start_ts + ResolvedTsDiagnosisCallback, + ) -> bool + + Send + + Sync, +>; + /// Service handles the RPC messages for the `Debug` service. -#[derive(Clone)] -pub struct Service +pub struct Service where - T: RaftExtension, - D: Debugger, + T: RaftExtension + Clone, + D: Debugger + Clone, + S: StoreRegionMeta, { pool: Handle, debugger: D, raft_router: T, + store_meta: Arc>, + resolved_ts_scheduler: ScheduleResolvedTsTask, } -impl Service +impl Clone for Service where - T: RaftExtension, - D: Debugger, + T: RaftExtension + Clone, + D: Debugger + Clone, + S: StoreRegionMeta, +{ + fn clone(&self) -> Self { + Service { + pool: self.pool.clone(), + debugger: self.debugger.clone(), + raft_router: self.raft_router.clone(), + store_meta: self.store_meta.clone(), + resolved_ts_scheduler: self.resolved_ts_scheduler.clone(), + } + } +} + +impl Service +where + T: RaftExtension + Clone, + D: Debugger + Clone, + S: StoreRegionMeta, { /// Constructs a new `Service` with `Engines`, a `RaftExtension`, a /// `GcWorker` and a `RegionInfoAccessor`. - pub fn new(debugger: D, pool: Handle, raft_router: T) -> Self { + pub fn new( + debugger: D, + pool: Handle, + raft_router: T, + store_meta: Arc>, + resolved_ts_scheduler: ScheduleResolvedTsTask, + ) -> Self { Service { pool, debugger, raft_router, + store_meta, + resolved_ts_scheduler, } } @@ -86,10 +138,11 @@ where } } -impl debugpb::Debug for Service +impl debugpb::Debug for Service where T: RaftExtension + 'static, D: Debugger + Clone + Send + 'static, + S: StoreRegionMeta, { fn get(&mut self, ctx: RpcContext<'_>, mut req: GetRequest, sink: UnarySink) { const TAG: &str = "debug_get"; @@ -576,6 +629,86 @@ where self.handle_response(ctx, sink, f, "debug_flashback_to_version"); } + + fn get_region_read_progress( + &mut self, + ctx: RpcContext<'_>, + req: GetRegionReadProgressRequest, + sink: UnarySink, + ) { + let store_meta = self.store_meta.lock().unwrap(); + let rrp = store_meta.region_read_progress(); + let mut resp = GetRegionReadProgressResponse::default(); + rrp.with(|registry| { + let region = registry.get(&req.get_region_id()); + if let Some(r) = region { + resp.set_region_read_progress_exist(true); + resp.set_safe_ts(r.safe_ts()); + let core = r.get_core(); + resp.set_applied_index(core.applied_index()); + resp.set_region_read_progress_paused(core.paused()); + if let Some(back) = core.pending_items().back() { + resp.set_pending_back_ts(back.ts); + resp.set_pending_back_applied_index(back.idx); + } + if let Some(front) = core.pending_items().front() { + resp.set_pending_front_ts(front.ts); + resp.set_pending_front_applied_index(front.idx) + } + resp.set_read_state_ts(core.read_state().ts); + resp.set_read_state_apply_index(core.read_state().idx); + resp.set_discard(core.discarding()); + // TODO: set durations + // resp.set_duration_to_last_consume_leader_ms(); + // resp.set_duration_to_last_update_safe_ts_ms(); + } else { + resp.set_region_read_progress_exist(false); + } + }); + + // get from resolver + let (cb, f) = paired_future_callback(); + if (*self.resolved_ts_scheduler)( + req.get_region_id(), + req.get_log_locks(), + req.get_min_start_ts(), + cb, + ) { + let f = async move { + let res = f.await; + match res { + Err(e) => { + resp.set_error("get resolved-ts info failed".to_owned()); + error!("tikv-ctl get resolved-ts info failed"; "err" => ?e); + } + Ok(Some(( + stopped, + resolved_ts, + resolver_tracked_index, + num_locks, + num_transactions, + ))) => { + resp.set_resolver_exist(true); + resp.set_resolver_stopped(stopped); + resp.set_resolved_ts(resolved_ts); + resp.set_resolver_tracked_index(resolver_tracked_index); + resp.set_num_locks(num_locks); + resp.set_num_transactions(num_transactions); + } + Ok(None) => { + resp.set_resolver_exist(false); + } + } + + Ok(resp) + }; + self.handle_response(ctx, sink, f, "debug_get_region_read_progress"); + } else { + resp.set_error("resolved-ts is not enabled".to_owned()); + let f = async move { Ok(resp) }; + self.handle_response(ctx, sink, f, "debug_get_region_read_progress"); + } + } } mod region_size_response { diff --git a/src/server/service/mod.rs b/src/server/service/mod.rs index 793f4fd7906..00369a4ceae 100644 --- a/src/server/service/mod.rs +++ b/src/server/service/mod.rs @@ -6,7 +6,7 @@ pub mod diagnostics; mod kv; pub use self::{ - debug::Service as DebugService, + debug::{ResolvedTsDiagnosisCallback, Service as DebugService}, diagnostics::Service as DiagnosticsService, kv::{ batch_commands_request, batch_commands_response, future_flashback_to_version, From 47b4c474e73f6f51c19f137814c7daba78e9c093 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 24 Jul 2023 14:41:49 +0800 Subject: [PATCH 0809/1149] raftstore-v2: fix potential data loss during server close (#15173) close tikv/tikv#14390 fix potential data loss during server close Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/command/mod.rs | 10 ++- .../src/operation/ready/apply_trace.rs | 52 ++++++++------ .../integrations/raftstore/test_bootstrap.rs | 72 ++++++++++++++++++- 3 files changed, 109 insertions(+), 25 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 5d842549700..430d4ff2242 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -23,6 +23,7 @@ use std::{ }; use engine_traits::{KvEngine, PerfContext, RaftEngine, WriteBatch, WriteOptions}; +use fail::fail_point; use kvproto::raft_cmdpb::{ AdminCmdType, CmdType, RaftCmdRequest, RaftCmdResponse, RaftRequestHeader, }; @@ -839,7 +840,14 @@ impl Apply { apply_res.metrics = mem::take(&mut self.metrics); apply_res.bucket_stat = self.buckets.clone(); let written_bytes = apply_res.metrics.written_bytes; - self.res_reporter().report(apply_res); + + let skip_report = || -> bool { + fail_point!("before_report_apply_res", |_| { true }); + false + }(); + if !skip_report { + self.res_reporter().report(apply_res); + } if let Some(buckets) = &mut self.buckets { buckets.clear_stats(); } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index d535b4beb42..d4743448d07 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -612,51 +612,59 @@ impl Peer { })(); if let Some(tablet) = self.tablet().cloned() { + let applied_index = self.storage().entry_storage().applied_index(); + + let mut tried_count: usize = 0; + let mut flushed = false; // flush the oldest cf one by one until we are under the replay count threshold loop { let replay_count = self.storage().estimate_replay_count(); if replay_count < flush_threshold { + if flushed { + let admin_flush = self.storage_mut().apply_trace_mut().admin.flushed; + let (_, _, tablet_index) = ctx + .tablet_registry + .parse_tablet_name(Path::new(tablet.path())) + .unwrap(); + let mut lb = ctx.engine.log_batch(1); + lb.put_flushed_index(region_id, CF_RAFT, tablet_index, admin_flush) + .unwrap(); + ctx.engine.consume(&mut lb, true).unwrap(); + info!( + self.logger, + "flush before close flush admin for region"; + "admin_flush" => admin_flush, + ); + } break; } + info!( self.logger, "flush-before-close: replay count exceeds threshold, pick the oldest cf to flush"; "count" => replay_count, + "tried" => tried_count, ); + tried_count += 1; tablet.flush_oldest_cf(true, None).unwrap(); + flushed = true; let flush_state = self.flush_state().clone(); let mut apply_trace = self.storage_mut().apply_trace_mut(); - let mut max_flush_index = 0; let flushed_indexes = flush_state.as_ref().flushed_index(); for i in 0..flushed_indexes.len() { let flush_index = flushed_indexes[i].load(Ordering::SeqCst); let cf = offset_to_cf(i); apply_trace.on_flush(cf, flush_index); - max_flush_index = u64::max(max_flush_index, flush_index); } - apply_trace.maybe_advance_admin_flushed(max_flush_index); - let admin_flush = apply_trace.admin.flushed; - apply_trace.persisted_applied = admin_flush; - - if self.storage().estimate_replay_count() < flush_threshold { - let (_, _, tablet_index) = ctx - .tablet_registry - .parse_tablet_name(Path::new(tablet.path())) - .unwrap(); - let mut lb = ctx.engine.log_batch(1); - lb.put_flushed_index(region_id, CF_RAFT, tablet_index, admin_flush) - .unwrap(); - ctx.engine.consume(&mut lb, true).unwrap(); - info!( - self.logger, - "flush before close flush admin for region"; - "admin_flush" => admin_flush, - ); - break; - } + // We should use applied_index rather than flushed_index here. Memtable flush + // may be earlier than `on_apply_res` which means flushed_index can be larger + // than applied_index, and using flush_index can cause data loss which is + // described on the comment of `test_flush_index_exceed_last_modified`. + apply_trace.maybe_advance_admin_flushed(applied_index); + apply_trace.persisted_applied = apply_trace.admin.flushed; } } diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index e4df37c2e91..07fd692ca53 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -2,12 +2,13 @@ use std::{ path::Path, sync::{mpsc::sync_channel, Arc, Mutex}, + time::Duration, }; use concurrency_manager::ConcurrencyManager; use engine_traits::{ - Engines, Peekable, RaftEngine, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, - CF_WRITE, + DbOptionsExt, Engines, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, + CF_LOCK, CF_RAFT, CF_WRITE, }; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb::RegionLocalState}; use raftstore::{ @@ -249,3 +250,70 @@ fn test_flush_before_stop() { }) .unwrap(); } + +// We cannot use a flushed index to call `maybe_advance_admin_flushed` +// consider a case: +// 1. lock `k` with index 6 +// 2. on_applied_res => lockcf's last_modified = 6 +// 3. flush lock cf => lockcf's flushed_index = 6 +// 4. batch {unlock `k`, write `k`} with index 7 +// (last_modified is updated in store but RocksDB is modified in apply. So, +// before on_apply_res, the last_modified is not updated.) +// +// flush-before-close: +// 5. pick write cf to flush => writecf's flushed_index = 7 +// +// 6. maybe_advance_admin_flushed(7): as lockcf's last_modified = flushed_index, +// it will not block advancing admin index +// 7. admin index 7 is persisted. => we may loss `unlock k` +#[test] +fn test_flush_index_exceed_last_modified() { + let mut cluster = test_raftstore_v2::new_node_cluster(0, 1); + cluster.run(); + + let key = b"key1"; + cluster.must_put_cf(CF_LOCK, key, b"v"); + cluster.must_put_cf(CF_WRITE, b"dummy", b"v"); + + fail::cfg("before_report_apply_res", "return").unwrap(); + let reg = cluster.tablet_registries.get(&1).unwrap(); + { + let mut cache = reg.get(1).unwrap(); + let tablet = cache.latest().unwrap(); + tablet + .set_db_options(&[("avoid_flush_during_shutdown", "true")]) + .unwrap(); + + // previous flush before strategy is flush oldest one by one, where freshness + // comparison is in second, so sleep a second + std::thread::sleep(Duration::from_millis(1000)); + tablet.flush_cf(CF_LOCK, true).unwrap(); + } + + cluster + .batch_put( + key, + vec![ + new_put_cf_cmd(CF_WRITE, key, b"value"), + new_delete_cmd(CF_LOCK, key), + ], + ) + .unwrap(); + + fail::cfg("flush_before_cluse_threshold", "return(1)").unwrap(); + let router = cluster.get_router(1).unwrap(); + let (tx, rx) = sync_channel(1); + let msg = PeerMsg::FlushBeforeClose { tx }; + router.force_send(1, msg).unwrap(); + rx.recv().unwrap(); + + assert!(cluster.get_cf(CF_WRITE, b"key1").is_some()); + assert!(cluster.get_cf(CF_LOCK, b"key1").is_none()); + cluster.stop_node(1); + + fail::remove("before_report_apply_res"); + cluster.start().unwrap(); + + assert!(cluster.get_cf(CF_WRITE, b"key1").is_some()); + assert!(cluster.get_cf(CF_LOCK, b"key1").is_none()); +} From 78b578444351ec0db648a423cac346e93cc30f1e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 25 Jul 2023 00:52:32 +0800 Subject: [PATCH 0810/1149] raftstore-v2: implement force leader (#15171) ref tikv/tikv#15108 * Add UnsafeRecoveryHandle trait to abstract unsafe recovery APIs * Implement UnsafeRecoveryHandle for raftstore v1 router * Implement UnsafeRecoveryHandle for raftstore v2 router * Add unsafe recovery messages to v2 Store and v2 Peer * raftstore-v2: implement force leader Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 15 + components/raftstore-v2/src/fsm/store.rs | 4 + .../operation/command/admin/conf_change.rs | 4 +- .../operation/command/admin/merge/rollback.rs | 2 + .../raftstore-v2/src/operation/command/mod.rs | 24 ++ components/raftstore-v2/src/operation/mod.rs | 1 + components/raftstore-v2/src/operation/pd.rs | 12 +- .../raftstore-v2/src/operation/query/mod.rs | 11 +- .../raftstore-v2/src/operation/ready/mod.rs | 18 + .../operation/unsafe_recovery/force_leader.rs | 350 ++++++++++++++++++ .../src/operation/unsafe_recovery/mod.rs | 4 + .../src/operation/unsafe_recovery/report.rs | 21 ++ components/raftstore-v2/src/raft/peer.rs | 40 +- components/raftstore-v2/src/router/imp.rs | 60 ++- components/raftstore-v2/src/router/message.rs | 20 + components/raftstore-v2/src/router/mod.rs | 2 +- components/raftstore-v2/src/worker/pd/mod.rs | 7 +- .../raftstore-v2/src/worker/pd/store.rs | 54 ++- components/raftstore/src/store/fsm/peer.rs | 33 +- components/raftstore/src/store/mod.rs | 8 +- components/raftstore/src/store/peer.rs | 85 ++--- .../raftstore/src/store/unsafe_recovery.rs | 82 ++++ components/raftstore/src/store/worker/pd.rs | 14 +- components/test_raftstore-v2/src/cluster.rs | 47 ++- src/server/tablet_snap.rs | 6 + .../raftstore/test_unsafe_recovery.rs | 179 +++++---- 26 files changed, 916 insertions(+), 187 deletions(-) create mode 100644 components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs create mode 100644 components/raftstore-v2/src/operation/unsafe_recovery/mod.rs create mode 100644 components/raftstore-v2/src/operation/unsafe_recovery/report.rs create mode 100644 components/raftstore/src/store/unsafe_recovery.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index a6a81cf4cab..eb937e4d8ca 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -362,6 +362,21 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::FlushBeforeClose { tx } => { self.fsm.peer_mut().flush_before_close(self.store_ctx, tx) } + PeerMsg::EnterForceLeaderState { + syncer, + failed_stores, + } => self.fsm.peer_mut().on_enter_pre_force_leader( + self.store_ctx, + syncer, + failed_stores, + ), + PeerMsg::ExitForceLeaderState => { + self.fsm.peer_mut().on_exit_force_leader(self.store_ctx) + } + PeerMsg::ExitForceLeaderStateCampaign => { + self.fsm.peer_mut().on_exit_force_leader_campaign() + } + PeerMsg::UnsafeRecoveryWaitApply(_) | PeerMsg::UnsafeRecoveryFillOutReport(_) => (), } } // TODO: instead of propose pending commands immediately, we should use timeout. diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index e7cc2b52b9e..c7998359066 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -318,6 +318,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { send_time, inspector, ), + StoreMsg::UnsafeRecoveryReport(report) => self + .fsm + .store + .on_unsafe_recovery_report(self.store_ctx, report), } } } diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 530789c31f6..8d746391c1f 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -104,7 +104,7 @@ impl Peer { self.peer(), changes.as_ref(), &cc, - false, + self.is_in_force_leader(), )?; // TODO: check if the new peer is already in history record. @@ -279,7 +279,7 @@ impl Apply { self.apply_single_change(kind, cp, &mut new_region) }; if let Err(e) = res { - error!(self.logger, "failed to apply conf change"; + error!(self.logger, "failed to apply conf change"; "changes" => ?changes, "legacy" => legacy, "original region" => ?region, "err" => ?e); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index ec602c61eb2..7b8d34ed0e7 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -65,6 +65,8 @@ impl Peer { admin.set_cmd_type(AdminCmdType::RollbackMerge); admin.mut_rollback_merge().set_commit(index); request.set_admin_request(admin); + // TODO: it should propose via on_admin_command, otherwise it may panic + // during force leader. if let Err(e) = self.propose(store_ctx, request.write_to_bytes().unwrap()) { error!(self.logger, "failed to propose RollbackMerge"; "err" => ?e); } diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 430d4ff2242..74e3df196c6 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -196,6 +196,17 @@ impl Peer { } return Err(e); } + if self.has_force_leader() { + metrics.invalid_proposal.force_leader.inc(); + // in force leader state, forbid requests to make the recovery + // progress less error-prone. + if !(admin_type.is_some() + && (admin_type.unwrap() == AdminCmdType::ChangePeer + || admin_type.unwrap() == AdminCmdType::ChangePeerV2)) + { + return Err(Error::RecoveryInProgress(self.region_id())); + } + } // Check whether the region is in the flashback state and the request could be // proposed. Skip the not prepared error because the // `self.region().is_in_flashback` may not be the latest right after applying @@ -238,6 +249,19 @@ impl Peer { data: Vec, proposal_ctx: Vec, ) -> Result { + // Should not propose normal in force leader state. + // In `pre_propose_raft_command`, it rejects all the requests expect + // conf-change if in force leader state. + if self.has_force_leader() { + store_ctx.raft_metrics.invalid_proposal.force_leader.inc(); + panic!( + "[{}] {} propose normal in force leader state {:?}", + self.region_id(), + self.peer_id(), + self.force_leader() + ); + }; + store_ctx.raft_metrics.propose.normal.inc(); store_ctx .raft_metrics diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 663d051f7e1..8ce592dd753 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -8,6 +8,7 @@ mod pd; mod query; mod ready; mod txn_ext; +mod unsafe_recovery; pub use command::{ merge_source_path, AdminCmdResult, ApplyFlowControl, CatchUpLogs, CommittedEntries, diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 7ad82959fa8..817b3aa6eb6 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -22,7 +22,7 @@ use crate::{ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { #[inline] pub fn on_pd_store_heartbeat(&mut self) { - self.fsm.store.store_heartbeat_pd(self.store_ctx); + self.fsm.store.store_heartbeat_pd(self.store_ctx, None); self.schedule_tick( StoreTick::PdStoreHeartbeat, self.store_ctx.cfg.pd_store_heartbeat_tick_interval.0, @@ -31,8 +31,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { } impl Store { - pub fn store_heartbeat_pd(&self, ctx: &StoreContext) - where + pub fn store_heartbeat_pd( + &self, + ctx: &StoreContext, + report: Option, + ) where EK: KvEngine, ER: RaftEngine, { @@ -45,7 +48,6 @@ impl Store { } let snap_stats = ctx.snap_mgr.stats(); - // todo: imple snapshot status report stats.set_sending_snap_count(snap_stats.sending_count as u32); stats.set_receiving_snap_count(snap_stats.receiving_count as u32); stats.set_snapshot_stats(snap_stats.stats.into()); @@ -73,7 +75,7 @@ impl Store { ); stats.set_is_busy(false); // TODO: add query stats - let task = pd::Task::StoreHeartbeat { stats }; + let task = pd::Task::StoreHeartbeat { stats, report }; if let Err(e) = ctx.schedulers.pd.schedule(task) { error!(self.logger(), "notify pd failed"; "store_id" => self.store_id(), diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index e7f795dc230..eddbacd1dab 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -141,13 +141,9 @@ impl Peer { )); } - // TODO: add flashback_state check - // Check whether the store has the right peer to handle the request. let request = msg.get_requests(); - // TODO: add force leader - // ReadIndex can be processed on the replicas. let is_read_index_request = request.len() == 1 && request[0].get_cmd_type() == CmdType::ReadIndex; @@ -164,6 +160,13 @@ impl Peer { return Err(e); } + if self.has_force_leader() { + raft_metrics.invalid_proposal.force_leader.inc(); + // in force leader state, forbid requests to make the recovery + // progress less error-prone. + return Err(Error::RecoveryInProgress(self.region_id())); + } + // Check whether the peer is initialized. if !self.storage().is_initialized() { raft_metrics.invalid_proposal.region_not_initialized.inc(); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 12279651bf6..52d434414c4 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -229,6 +229,7 @@ impl Peer { return false; } self.retry_pending_reads(&store_ctx.cfg); + self.check_force_leader(store_ctx); self.raft_group_mut().tick() } @@ -886,6 +887,13 @@ impl Peer { self.storage_mut() .entry_storage_mut() .update_cache_persisted(persisted_index); + + if self.is_in_force_leader() { + // forward commit index, the committed entries will be applied in + // the next raft tick round. + self.maybe_force_forward_commit_index(); + } + if !self.destroy_progress().started() { // We may need to check if there is persisted committed logs. self.set_has_ready(); @@ -1049,6 +1057,16 @@ impl Peer { } _ => {} } + + if self.is_in_force_leader() && ss.raft_state != StateRole::Leader { + // for some reason, it's not leader anymore + info!(self.logger, + "step down in force leader state"; + "state" => ?ss.raft_state, + ); + self.on_force_leader_fail(); + } + self.read_progress() .update_leader_info(ss.leader_id, term, self.region()); let target = self.refresh_leader_transferee(); diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs new file mode 100644 index 00000000000..ba7e391dbef --- /dev/null +++ b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs @@ -0,0 +1,350 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::mem; + +use collections::HashSet; +use engine_traits::{KvEngine, RaftEngine}; +use raft::{eraftpb::MessageType, StateRole, Storage}; +use raftstore::store::{util::LeaseState, ForceLeaderState, UnsafeRecoveryForceLeaderSyncer}; +use slog::{info, warn}; +use tikv_util::time::Instant as TiInstant; + +use crate::{batch::StoreContext, raft::Peer, router::PeerMsg}; + +impl Peer { + pub fn on_enter_pre_force_leader( + &mut self, + ctx: &StoreContext, + syncer: UnsafeRecoveryForceLeaderSyncer, + failed_stores: HashSet, + ) { + match self.force_leader() { + Some(ForceLeaderState::PreForceLeader { .. }) => { + self.on_force_leader_fail(); + } + Some(ForceLeaderState::ForceLeader { .. }) => { + // already is a force leader, do nothing + return; + } + Some(ForceLeaderState::WaitTicks { .. }) => { + *self.force_leader_mut() = None; + } + None => {} + } + + if !self.storage().is_initialized() { + warn!(self.logger, + "Unsafe recovery, cannot force leader since this peer is not initialized"; + ); + return; + } + + let ticks = if self.is_leader() { + // wait two rounds of election timeout to trigger check quorum to + // step down the leader. + // Note: check quorum is triggered every `election_timeout` instead + // of `randomized_election_timeout` + Some( + self.raft_group().raft.election_timeout() * 2 + - self.raft_group().raft.election_elapsed, + ) + // When election timeout is triggered, leader_id is set to INVALID_ID. + // But learner(not promotable) is a exception here as it wouldn't tick + // election. + } else if self.raft_group().raft.promotable() && self.leader_id() != raft::INVALID_ID { + // wait one round of election timeout to make sure leader_id is invalid + if self.raft_group().raft.election_elapsed <= ctx.cfg.raft_election_timeout_ticks { + warn!( + self.logger, + "Unsafe recovery, reject pre force leader due to leader lease may not expired" + ); + return; + } + Some( + self.raft_group().raft.randomized_election_timeout() + - self.raft_group().raft.election_elapsed, + ) + } else { + None + }; + + if let Some(ticks) = ticks { + info!(self.logger, + "Unsafe recovery, enter wait ticks"; + "ticks" => ticks, + ); + *self.force_leader_mut() = Some(ForceLeaderState::WaitTicks { + syncer, + failed_stores, + ticks, + }); + self.set_has_ready(); + return; + } + + let expected_alive_voter = self.get_force_leader_expected_alive_voter(&failed_stores); + if !expected_alive_voter.is_empty() + && self + .raft_group() + .raft + .prs() + .has_quorum(&expected_alive_voter) + { + warn!(self.logger, + "Unsafe recovery, reject pre force leader due to has quorum"; + ); + return; + } + + info!(self.logger, + "Unsafe recovery, enter pre force leader state"; + "alive_voter" => ?expected_alive_voter, + ); + + // Do not use prevote as prevote won't set `vote` to itself. + // When PD issues force leader on two different peer, it may cause + // two force leader in same term. + self.raft_group_mut().raft.pre_vote = false; + // trigger vote request to all voters, will check the vote result in + // `check_force_leader` + if let Err(e) = self.raft_group_mut().campaign() { + warn!(self.logger, "Unsafe recovery, campaign failed"; "err" => ?e); + } + assert_eq!(self.raft_group().raft.state, StateRole::Candidate); + if !self + .raft_group() + .raft + .prs() + .votes() + .get(&self.peer_id()) + .unwrap() + { + warn!(self.logger, + "Unsafe recovery, pre force leader failed to campaign"; + ); + self.on_force_leader_fail(); + return; + } + + *self.force_leader_mut() = Some(ForceLeaderState::PreForceLeader { + syncer, + failed_stores, + }); + self.set_has_ready(); + } + + pub fn on_force_leader_fail(&mut self) { + self.raft_group_mut().raft.pre_vote = true; + self.raft_group_mut().raft.set_check_quorum(true); + *self.force_leader_mut() = None; + } + + fn on_enter_force_leader(&mut self) { + info!(self.logger, "Unsafe recovery, enter force leader state"); + assert_eq!(self.raft_group().raft.state, StateRole::Candidate); + + let failed_stores = match self.force_leader_mut().take() { + Some(ForceLeaderState::PreForceLeader { failed_stores, .. }) => failed_stores, + _ => unreachable!(), + }; + + let peer_ids: Vec<_> = self.voters().iter().collect(); + for peer_id in peer_ids { + let store_id = self + .region() + .get_peers() + .iter() + .find(|p| p.get_id() == peer_id) + .unwrap() + .get_store_id(); + if !failed_stores.contains(&store_id) { + continue; + } + + // make fake vote responses from peers on failed store. + let mut msg = raft::eraftpb::Message::new(); + msg.msg_type = MessageType::MsgRequestVoteResponse; + msg.reject = false; + msg.term = self.term(); + msg.from = peer_id; + msg.to = self.peer_id(); + self.raft_group_mut().step(msg).unwrap(); + } + + // after receiving all votes, should become leader + assert!(self.is_leader()); + self.raft_group_mut().raft.set_check_quorum(false); + + *self.force_leader_mut() = Some(ForceLeaderState::ForceLeader { + time: TiInstant::now_coarse(), + failed_stores, + }); + self.set_has_ready(); + } + + pub fn on_exit_force_leader(&mut self, ctx: &StoreContext) { + if !self.has_force_leader() { + return; + } + + info!(self.logger, "exit force leader state"); + *self.force_leader_mut() = None; + // leader lease shouldn't be renewed in force leader state. + assert_eq!(self.leader_lease().inspect(None), LeaseState::Expired); + let term = self.term(); + self.raft_group_mut() + .raft + .become_follower(term, raft::INVALID_ID); + + self.raft_group_mut().raft.set_check_quorum(true); + self.raft_group_mut().raft.pre_vote = true; + if self.raft_group().raft.promotable() { + // Do not campaign directly here, otherwise on_role_changed() won't called for + // follower state + let _ = ctx + .router + .send(self.region_id(), PeerMsg::ExitForceLeaderStateCampaign); + } + self.set_has_ready(); + } + + pub fn on_exit_force_leader_campaign(&mut self) { + let _ = self.raft_group_mut().campaign(); + self.set_has_ready(); + } + + fn get_force_leader_expected_alive_voter(&self, failed_stores: &HashSet) -> HashSet { + let region = self.region(); + self.voters() + .iter() + .filter(|peer_id| { + let store_id = region + .get_peers() + .iter() + .find(|p| p.get_id() == *peer_id) + .unwrap() + .get_store_id(); + !failed_stores.contains(&store_id) + }) + .collect() + } + + pub fn check_force_leader(&mut self, ctx: &StoreContext) { + if let Some(ForceLeaderState::WaitTicks { + syncer, + failed_stores, + ticks, + }) = self.force_leader_mut() + { + if *ticks == 0 { + let syncer_clone = syncer.clone(); + let s = mem::take(failed_stores); + self.on_enter_pre_force_leader(ctx, syncer_clone, s); + } else { + *ticks -= 1; + } + return; + }; + + let failed_stores = match self.force_leader() { + None => return, + Some(ForceLeaderState::ForceLeader { .. }) => { + if self.maybe_force_forward_commit_index() { + self.set_has_ready(); + } + return; + } + Some(ForceLeaderState::PreForceLeader { failed_stores, .. }) => failed_stores, + Some(ForceLeaderState::WaitTicks { .. }) => unreachable!(), + }; + + if self.raft_group().raft.election_elapsed + 1 < ctx.cfg.raft_election_timeout_ticks { + // wait as longer as it can to collect responses of request vote + return; + } + + let expected_alive_voter = self.get_force_leader_expected_alive_voter(failed_stores); + let check = || { + if self.raft_group().raft.state != StateRole::Candidate { + Err(format!( + "unexpected role {:?}", + self.raft_group().raft.state + )) + } else { + let mut granted = 0; + for (id, vote) in self.raft_group().raft.prs().votes() { + if expected_alive_voter.contains(id) { + if *vote { + granted += 1; + } else { + return Err(format!("receive reject response from {}", *id)); + } + } else if *id == self.peer_id() { + // self may be a learner + continue; + } else { + return Err(format!( + "receive unexpected vote from {} vote {}", + *id, *vote + )); + } + } + Ok(granted) + } + }; + + match check() { + Err(err) => { + warn!(self.logger, + "Unsafe recovery, pre force leader check failed"; + "alive_voter" => ?expected_alive_voter, + "reason" => err, + ); + self.on_force_leader_fail(); + } + Ok(granted) => { + info!(self.logger, + "Unsafe recovery, expected live voters:"; + "voters" => ?expected_alive_voter, + "granted" => granted, + ); + if granted == expected_alive_voter.len() { + self.on_enter_force_leader(); + } + } + } + } + + pub fn maybe_force_forward_commit_index(&mut self) -> bool { + let failed_stores = match self.force_leader() { + Some(ForceLeaderState::ForceLeader { failed_stores, .. }) => failed_stores, + _ => unreachable!(), + }; + + let region = self.region(); + let mut replicated_idx = self.raft_group().raft.raft_log.persisted; + for (peer_id, p) in self.raft_group().raft.prs().iter() { + let store_id = region + .get_peers() + .iter() + .find(|p| p.get_id() == *peer_id) + .unwrap() + .get_store_id(); + if failed_stores.contains(&store_id) { + continue; + } + if replicated_idx > p.matched { + replicated_idx = p.matched; + } + } + + if self.raft_group().store().term(replicated_idx).unwrap_or(0) < self.term() { + // do not commit logs of previous term directly + return false; + } + + let idx = std::cmp::max(self.raft_group().raft.raft_log.committed, replicated_idx); + self.raft_group_mut().raft.raft_log.committed = idx; + true + } +} diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs b/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs new file mode 100644 index 00000000000..c24c2838593 --- /dev/null +++ b/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs @@ -0,0 +1,4 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +mod force_leader; +mod report; diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs new file mode 100644 index 00000000000..9ede7714b6d --- /dev/null +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::pdpb; +use raftstore::store::Transport; + +use crate::{batch::StoreContext, fsm::Store}; + +impl Store { + pub fn on_unsafe_recovery_report( + &self, + ctx: &StoreContext, + report: pdpb::StoreReport, + ) where + EK: KvEngine, + ER: RaftEngine, + T: Transport, + { + self.store_heartbeat_pd(ctx, Some(report)) + } +} diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index ef7363ee0c4..392bf82e5ec 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -23,8 +23,8 @@ use raftstore::{ fsm::ApplyMetrics, metrics::RAFT_PEER_PENDING_DURATION, util::{Lease, RegionReadProgress}, - Config, EntryStorage, PeerStat, ProposalQueue, ReadDelegate, ReadIndexQueue, ReadProgress, - TabletSnapManager, WriteTask, + Config, EntryStorage, ForceLeaderState, PeerStat, ProposalQueue, ReadDelegate, + ReadIndexQueue, ReadProgress, TabletSnapManager, WriteTask, }, }; use slog::{debug, info, Logger}; @@ -125,6 +125,16 @@ pub struct Peer { gc_peer_context: GcPeerContext, abnormal_peer_context: AbnormalPeerContext, + + /// Force leader state is only used in online recovery when the majority of + /// peers are missing. In this state, it forces one peer to become leader + /// out of accordance with Raft election rule, and forbids any + /// read/write proposals. With that, we can further propose remove + /// failed-nodes conf-change, to make the Raft group forms majority and + /// works normally later on. + /// + /// For details, see the comment of `ForceLeaderState`. + force_leader_state: Option, } impl Peer { @@ -215,6 +225,7 @@ impl Peer { pending_messages: vec![], gc_peer_context: GcPeerContext::default(), abnormal_peer_context: AbnormalPeerContext::default(), + force_leader_state: None, }; // If this region has only one peer and I am the one, campaign directly. @@ -639,10 +650,8 @@ impl Peer { self.raft_group.raft.term } - #[inline] - // TODO - pub fn has_force_leader(&self) -> bool { - false + pub fn voters(&self) -> raft::util::Union<'_> { + self.raft_group.raft.prs().conf().voters().ids() } pub fn serving(&self) -> bool { @@ -955,4 +964,23 @@ impl Peer { true }) } + + pub fn has_force_leader(&self) -> bool { + self.force_leader_state.is_some() + } + + pub fn is_in_force_leader(&self) -> bool { + matches!( + self.force_leader_state, + Some(ForceLeaderState::ForceLeader { .. }) + ) + } + + pub fn force_leader(&self) -> Option<&ForceLeaderState> { + self.force_leader_state.as_ref() + } + + pub fn force_leader_mut(&mut self) -> &mut Option { + &mut self.force_leader_state + } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 325e4ee4a1b..ee3399895d4 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -5,12 +5,14 @@ use std::{ sync::{Arc, Mutex}, }; +use collections::HashSet; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ kvrpcpb::ExtraOp, metapb::RegionEpoch, + pdpb, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, }; @@ -18,11 +20,16 @@ use raftstore::{ router::CdcHandle, store::{ fsm::ChangeObserver, AsyncReadNotifier, Callback, FetchedLogs, GenSnapRes, RegionSnapshot, + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, + UnsafeRecoveryWaitApplySyncer, }, }; use slog::warn; +use tikv_util::box_err; -use super::{build_any_channel, message::CaptureChange, PeerMsg, QueryResChannel, QueryResult}; +use super::{ + build_any_channel, message::CaptureChange, PeerMsg, QueryResChannel, QueryResult, StoreMsg, +}; use crate::{batch::StoreRouter, operation::LocalReader, StoreMeta}; impl AsyncReadNotifier for StoreRouter { @@ -250,3 +257,54 @@ impl CdcHandle for RaftRouter { Ok(()) } } + +/// A wrapper of StoreRouter that is specialized for implementing +/// UnsafeRecoveryRouter. +pub struct UnsafeRecoveryRouter(Mutex>); + +impl UnsafeRecoveryRouter { + pub fn new(router: StoreRouter) -> UnsafeRecoveryRouter { + UnsafeRecoveryRouter(Mutex::new(router)) + } +} + +impl UnsafeRecoveryHandle for UnsafeRecoveryRouter { + fn send_enter_force_leader( + &self, + region_id: u64, + syncer: UnsafeRecoveryForceLeaderSyncer, + failed_stores: HashSet, + ) -> crate::Result<()> { + let router = self.0.lock().unwrap(); + router.check_send( + region_id, + PeerMsg::EnterForceLeaderState { + syncer, + failed_stores, + }, + ) + } + + fn broadcast_exit_force_leader(&self) { + let router = self.0.lock().unwrap(); + router.broadcast_normal(|| PeerMsg::ExitForceLeaderState); + } + + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer) { + let router = self.0.lock().unwrap(); + router.broadcast_normal(|| PeerMsg::UnsafeRecoveryWaitApply(syncer.clone())); + } + + fn broadcast_fill_out_report(&self, syncer: UnsafeRecoveryFillOutReportSyncer) { + let router = self.0.lock().unwrap(); + router.broadcast_normal(|| PeerMsg::UnsafeRecoveryFillOutReport(syncer.clone())); + } + + fn send_report(&self, report: pdpb::StoreReport) -> crate::Result<()> { + let router = self.0.lock().unwrap(); + match router.force_send_control(StoreMsg::UnsafeRecoveryReport(report)) { + Ok(()) => Ok(()), + Err(SendError(_)) => Err(box_err!("fail to send unsafe recovery store report")), + } + } +} diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 0e72d83b823..e8477968020 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -3,16 +3,20 @@ // #[PerformanceCriticalPath] use std::sync::{mpsc::SyncSender, Arc}; +use collections::HashSet; use kvproto::{ import_sstpb::SstMeta, metapb, metapb::RegionEpoch, + pdpb, raft_cmdpb::{RaftCmdRequest, RaftRequestHeader}, raft_serverpb::RaftMessage, }; use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, util::LatencyInspector, FetchedLogs, GenSnapRes, TabletSnapKey, + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, + UnsafeRecoveryWaitApplySyncer, }; use resource_control::ResourceMetered; use tikv_util::time::Instant; @@ -246,6 +250,20 @@ pub enum PeerMsg { }, /// A message that used to check if a snapshot gc is happened. SnapGc(Box<[TabletSnapKey]>), + + /// Let a peer enters force leader state during unsafe recovery. + EnterForceLeaderState { + syncer: UnsafeRecoveryForceLeaderSyncer, + failed_stores: HashSet, + }, + /// Let a peer exits force leader state. + ExitForceLeaderState, + /// Let a peer campaign directly after exit force leader. + ExitForceLeaderStateCampaign, + /// Wait for a peer to apply to the latest commit index. + UnsafeRecoveryWaitApply(UnsafeRecoveryWaitApplySyncer), + /// Wait for a peer to fill its status to the report. + UnsafeRecoveryFillOutReport(UnsafeRecoveryFillOutReportSyncer), } impl ResourceMetered for PeerMsg {} @@ -346,6 +364,8 @@ pub enum StoreMsg { send_time: Instant, inspector: LatencyInspector, }, + /// Send a store report for unsafe recovery. + UnsafeRecoveryReport(pdpb::StoreReport), } impl ResourceMetered for StoreMsg {} diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 2d0011c1ef0..7630e35c2a5 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -11,7 +11,7 @@ pub use self::response_channel::FlushChannel; #[cfg(feature = "testexport")] pub use self::response_channel::FlushSubscriber; pub use self::{ - imp::RaftRouter, + imp::{RaftRouter, UnsafeRecoveryRouter}, internal_message::ApplyRes, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 4e97f49c261..f89ea75b604 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -47,7 +47,8 @@ pub enum Task { // In store.rs. StoreHeartbeat { stats: pdpb::StoreStats, - // TODO: StoreReport, StoreDrAutoSyncStatus + report: Option, + // TODO: StoreDrAutoSyncStatus }, UpdateStoreInfos { cpu_usages: RecordPairVec, @@ -303,8 +304,8 @@ where fn run(&mut self, task: Task) { self.maybe_schedule_heartbeat_receiver(); match task { - Task::StoreHeartbeat { stats } => { - self.handle_store_heartbeat(stats, false /* is_fake_hb */) + Task::StoreHeartbeat { stats, report } => { + self.handle_store_heartbeat(stats, false /* is_fake_hb */, report) } Task::UpdateStoreInfos { cpu_usages, diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index f55484b3910..14aa3c5ca9e 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -1,8 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::cmp; +use std::{cmp, sync::Arc}; -use collections::HashMap; +use collections::{HashMap, HashSet}; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; use kvproto::pdpb; @@ -14,7 +14,10 @@ use pd_client::{ PdClient, }; use prometheus::local::LocalHistogram; -use raftstore::store::{metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, util::LatencyInspector}; +use raftstore::store::{ + metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, util::LatencyInspector, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, +}; use slog::{error, info, warn}; use tikv_util::{ metrics::RecordPairVec, @@ -24,7 +27,7 @@ use tikv_util::{ }; use super::Runner; -use crate::router::StoreMsg; +use crate::router::{StoreMsg, UnsafeRecoveryRouter}; const HOTSPOT_REPORT_CAPACITY: usize = 1000; @@ -175,7 +178,12 @@ where ER: RaftEngine, T: PdClient + 'static, { - pub fn handle_store_heartbeat(&mut self, mut stats: pdpb::StoreStats, is_fake_hb: bool) { + pub fn handle_store_heartbeat( + &mut self, + mut stats: pdpb::StoreStats, + is_fake_hb: bool, + store_report: Option, + ) { let mut report_peers = HashMap::default(); for (region_id, region_peer) in &mut self.region_peers { let read_bytes = region_peer.read_bytes - region_peer.last_store_report_read_bytes; @@ -268,13 +276,43 @@ where // Update slowness statistics self.update_slowness_in_store_stats(&mut stats, last_query_sum); - let resp = self.pd_client.store_heartbeat(stats, None, None); + let resp = self.pd_client.store_heartbeat(stats, store_report, None); let logger = self.logger.clone(); + let router = self.router.clone(); let mut grpc_service_manager = self.grpc_service_manager.clone(); let f = async move { match resp.await { Ok(mut resp) => { - // TODO: unsafe recovery + // TODO: handle replication_status + + if let Some(plan) = resp.recovery_plan.take() { + let router = Arc::new(UnsafeRecoveryRouter::new(router)); + info!(logger, "Unsafe recovery, received a recovery plan"); + if plan.has_force_leader() { + let mut failed_stores = HashSet::default(); + for failed_store in plan.get_force_leader().get_failed_stores() { + failed_stores.insert(*failed_store); + } + let syncer = UnsafeRecoveryForceLeaderSyncer::new( + plan.get_step(), + router.clone(), + ); + for region in plan.get_force_leader().get_enter_force_leaders() { + if let Err(e) = router.send_enter_force_leader( + *region, + syncer.clone(), + failed_stores.clone(), + ) { + error!(logger, + "fail to send force leader message for recovery"; + "err" => ?e); + } + } + } + // else { + // TODO: handle creates/tombstone/demotes + // } + } // Attention, as Hibernate Region is eliminated in // raftstore-v2, followings just mock the awaken @@ -338,7 +376,7 @@ where // And here, the `is_fake_hb` should be marked with `True` to represent that // this heartbeat message is a fake one. - self.handle_store_heartbeat(stats, true); + self.handle_store_heartbeat(stats, true, None); warn!(self.logger, "scheduling store_heartbeat timeout, force report store slow score to pd."; "store_id" => self.store_id, ); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index a03662ef716..9c3fdb792a3 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -776,7 +776,7 @@ where return; } - if !self.fsm.peer.is_force_leader() { + if !self.fsm.peer.is_in_force_leader() { error!( "Unsafe recovery, demoting failed voters failed, since this peer is not forced leader"; "region_id" => self.region().get_id(), @@ -1821,7 +1821,9 @@ where info!( "Unsafe recovery, expected live voters:"; "voters" => ?expected_alive_voter, - "granted" => granted + "granted" => granted, + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), ); if granted == expected_alive_voter.len() { self.on_enter_force_leader(); @@ -1966,17 +1968,15 @@ where self.register_check_long_uncommitted_tick(); } - if let Some(ForceLeaderState::ForceLeader { .. }) = self.fsm.peer.force_leader { - if r != StateRole::Leader { - // for some reason, it's not leader anymore - info!( - "step down in force leader state"; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - "state" => ?r, - ); - self.on_force_leader_fail(); - } + if self.fsm.peer.is_in_force_leader() && r != StateRole::Leader { + // for some reason, it's not leader anymore + info!( + "step down in force leader state"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "state" => ?r, + ); + self.on_force_leader_fail(); } } } @@ -2205,7 +2205,7 @@ where let syncer_clone = syncer.clone(); let failed_voters_clone = failed_voters.clone(); self.fsm.peer.unsafe_recovery_state = None; - if !self.fsm.peer.is_force_leader() { + if !self.fsm.peer.is_in_force_leader() { error!( "Unsafe recovery, lost forced leadership after exiting joint state"; "region_id" => self.region().get_id(), @@ -2224,7 +2224,7 @@ where "region_id" => self.region().get_id(), "peer_id" => self.fsm.peer_id(), ); - if self.fsm.peer.is_force_leader() { + if self.fsm.peer.is_in_force_leader() { self.propose_raft_command_internal( exit_joint_request(self.region(), &self.fsm.peer.peer), Callback::::write(Box::new(|resp| { @@ -3922,7 +3922,8 @@ where // until new leader elected, but we can't revert this operation // because its result is already persisted in apply worker // TODO: should we transfer leader here? - let demote_self = is_learner(&self.fsm.peer.peer) && !self.fsm.peer.is_force_leader(); + let demote_self = + is_learner(&self.fsm.peer.peer) && !self.fsm.peer.is_in_force_leader(); if remove_self || demote_self { warn!( "Removing or demoting leader"; diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 8f421bd3133..beda74b41d4 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -25,6 +25,7 @@ mod replication_mode; pub mod simple_write; pub mod snap; mod txn_ext; +mod unsafe_recovery; mod worker; #[cfg(any(test, feature = "testexport"))] @@ -56,8 +57,10 @@ pub use self::{ }, peer::{ can_amend_read, get_sync_log_from_request, make_transfer_leader_response, - propose_read_index, should_renew_lease, Peer, PeerStat, ProposalContext, ProposalQueue, - RequestInspector, RequestPolicy, SnapshotRecoveryWaitApplySyncer, + propose_read_index, should_renew_lease, ForceLeaderState, Peer, PeerStat, ProposalContext, + ProposalQueue, RequestInspector, RequestPolicy, SnapshotRecoveryWaitApplySyncer, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, peer_storage::{ @@ -76,6 +79,7 @@ pub use self::{ }, transport::{CasualRouter, ProposalRouter, SignificantRouter, StoreRouter, Transport}, txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, + unsafe_recovery::UnsafeRecoveryHandle, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ metrics as worker_metrics, AutoSplitController, BatchComponent, Bucket, BucketRange, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index c8401f4a830..84c00548b70 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -89,14 +89,15 @@ use crate::{ async_io::{read::ReadTask, write::WriteMsg, write_router::WriteRouter}, fsm::{ apply::{self, CatchUpLogs}, - store::{PollContext, RaftRouter}, + store::PollContext, Apply, ApplyMetrics, ApplyTask, Proposal, }, hibernate_state::GroupState, memory::{needs_evict_entry_cache, MEMTRACE_RAFT_ENTRIES}, - msg::{CasualMessage, ErrorCallback, PeerMsg, RaftCommand, SignificantMsg, StoreMsg}, + msg::{CasualMessage, ErrorCallback, RaftCommand}, peer_storage::HandleSnapshotResult, txn_ext::LocksStatus, + unsafe_recovery::UnsafeRecoveryHandle, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, ReadProgress, RegionTask, @@ -545,7 +546,7 @@ pub enum ForceLeaderState { // holds a reference to the same strcuture, and whoever finishes the task // drops its reference. Once the last reference is dropped, indicating all // the peers have finished their own tasks, the closure is invoked. -pub struct InvokeClosureOnDrop(Box); +pub struct InvokeClosureOnDrop(Option>); impl fmt::Debug for InvokeClosureOnDrop { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -555,20 +556,20 @@ impl fmt::Debug for InvokeClosureOnDrop { impl Drop for InvokeClosureOnDrop { fn drop(&mut self) { - self.0(); + if let Some(on_drop) = self.0.take() { + on_drop(); + } } } -pub fn start_unsafe_recovery_report( - router: &RaftRouter, +pub fn start_unsafe_recovery_report( + router: Arc, report_id: u64, exit_force_leader: bool, ) { let wait_apply = UnsafeRecoveryWaitApplySyncer::new(report_id, router.clone(), exit_force_leader); - router.broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::UnsafeRecoveryWaitApply(wait_apply.clone())) - }); + router.broadcast_wait_apply(wait_apply); } // Propose a read index request to the raft group, return the request id and @@ -653,13 +654,11 @@ pub fn can_amend_read( pub struct UnsafeRecoveryForceLeaderSyncer(Arc); impl UnsafeRecoveryForceLeaderSyncer { - pub fn new(report_id: u64, router: RaftRouter) -> Self { - let thread_safe_router = Mutex::new(router); - let inner = InvokeClosureOnDrop(Box::new(move || { + pub fn new(report_id: u64, router: Arc) -> Self { + let inner = InvokeClosureOnDrop(Some(Box::new(move || { info!("Unsafe recovery, force leader finished."); - let router_ptr = thread_safe_router.lock().unwrap(); - start_unsafe_recovery_report(&*router_ptr, report_id, false); - })); + start_unsafe_recovery_report(router, report_id, false); + }))); UnsafeRecoveryForceLeaderSyncer(Arc::new(inner)) } } @@ -671,19 +670,17 @@ pub struct UnsafeRecoveryExecutePlanSyncer { } impl UnsafeRecoveryExecutePlanSyncer { - pub fn new(report_id: u64, router: RaftRouter) -> Self { - let thread_safe_router = Mutex::new(router); + pub fn new(report_id: u64, router: Arc) -> Self { let abort = Arc::new(Mutex::new(false)); let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Box::new(move || { + let closure = InvokeClosureOnDrop(Some(Box::new(move || { info!("Unsafe recovery, plan execution finished"); if *abort_clone.lock().unwrap() { warn!("Unsafe recovery, plan execution aborted"); return; } - let router_ptr = thread_safe_router.lock().unwrap(); - start_unsafe_recovery_report(&*router_ptr, report_id, true); - })); + start_unsafe_recovery_report(router, report_id, true); + }))); UnsafeRecoveryExecutePlanSyncer { _closure: Arc::new(closure), abort, @@ -706,7 +703,7 @@ impl SnapshotRecoveryWaitApplySyncer { let thread_safe_router = Mutex::new(sender); let abort = Arc::new(Mutex::new(false)); let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Box::new(move || { + let closure = InvokeClosureOnDrop(Some(Box::new(move || { info!("region {} wait apply finished", region_id); if *abort_clone.lock().unwrap() { warn!("wait apply aborted"); @@ -717,7 +714,7 @@ impl SnapshotRecoveryWaitApplySyncer { _ = router_ptr.send(region_id).map_err(|_| { warn!("reply waitapply states failure."); }); - })); + }))); SnapshotRecoveryWaitApplySyncer { _closure: Arc::new(closure), abort, @@ -738,32 +735,23 @@ pub struct UnsafeRecoveryWaitApplySyncer { impl UnsafeRecoveryWaitApplySyncer { pub fn new( report_id: u64, - router: RaftRouter, + router: Arc, exit_force_leader: bool, ) -> Self { - let thread_safe_router = Mutex::new(router); let abort = Arc::new(Mutex::new(false)); let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Box::new(move || { + let closure = InvokeClosureOnDrop(Some(Box::new(move || { info!("Unsafe recovery, wait apply finished"); if *abort_clone.lock().unwrap() { warn!("Unsafe recovery, wait apply aborted"); return; } - let router_ptr = thread_safe_router.lock().unwrap(); if exit_force_leader { - (*router_ptr).broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::ExitForceLeaderState) - }); + router.broadcast_exit_force_leader(); } - let fill_out_report = - UnsafeRecoveryFillOutReportSyncer::new(report_id, (*router_ptr).clone()); - (*router_ptr).broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::UnsafeRecoveryFillOutReport( - fill_out_report.clone(), - )) - }); - })); + let fill_out_report = UnsafeRecoveryFillOutReportSyncer::new(report_id, router.clone()); + router.broadcast_fill_out_report(fill_out_report); + }))); UnsafeRecoveryWaitApplySyncer { _closure: Arc::new(closure), abort, @@ -782,11 +770,10 @@ pub struct UnsafeRecoveryFillOutReportSyncer { } impl UnsafeRecoveryFillOutReportSyncer { - pub fn new(report_id: u64, router: RaftRouter) -> Self { - let thread_safe_router = Mutex::new(router); + pub fn new(report_id: u64, router: Arc) -> Self { let reports = Arc::new(Mutex::new(vec![])); let reports_clone = reports.clone(); - let closure = InvokeClosureOnDrop(Box::new(move || { + let closure = InvokeClosureOnDrop(Some(Box::new(move || { info!("Unsafe recovery, peer reports collected"); let mut store_report = pdpb::StoreReport::default(); { @@ -794,12 +781,10 @@ impl UnsafeRecoveryFillOutReportSyncer { store_report.set_peer_reports(mem::take(&mut *reports_ptr).into()); } store_report.set_step(report_id); - let router_ptr = thread_safe_router.lock().unwrap(); - if let Err(e) = (*router_ptr).send_control(StoreMsg::UnsafeRecoveryReport(store_report)) - { + if let Err(e) = router.send_report(store_report) { error!("Unsafe recovery, fail to schedule reporting"; "err" => ?e); } - })); + }))); UnsafeRecoveryFillOutReportSyncer { _closure: Arc::new(closure), reports, @@ -3279,7 +3264,7 @@ where let persist_index = self.raft_group.raft.raft_log.persisted; self.mut_store().update_cache_persisted(persist_index); - if let Some(ForceLeaderState::ForceLeader { .. }) = self.force_leader { + if self.is_in_force_leader() { // forward commit index, the committed entries will be applied in the next raft // base tick round self.maybe_force_forward_commit_index(); @@ -3322,7 +3307,7 @@ where self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; - if let Some(ForceLeaderState::ForceLeader { .. }) = self.force_leader { + if self.is_in_force_leader() { // forward commit index, the committed entries will be applied in the next raft // base tick round self.maybe_force_forward_commit_index(); @@ -4793,7 +4778,7 @@ where &self.peer, changes.as_ref(), &cc, - self.is_force_leader(), + self.is_in_force_leader(), )?; ctx.raft_metrics.propose.conf_change.inc(); @@ -5154,7 +5139,7 @@ where } #[inline] - pub fn is_force_leader(&self) -> bool { + pub fn is_in_force_leader(&self) -> bool { matches!( self.force_leader, Some(ForceLeaderState::ForceLeader { .. }) @@ -5166,7 +5151,7 @@ where &self.unsafe_recovery_state { if self.raft_group.raft.raft_log.applied >= *target_index || force { - if self.is_force_leader() { + if self.is_in_force_leader() { info!( "Unsafe recovery, finish wait apply"; "region_id" => self.region().get_id(), diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs new file mode 100644 index 00000000000..163ade71c4a --- /dev/null +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -0,0 +1,82 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Mutex; + +use collections::HashSet; +use crossbeam::channel::SendError; +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::pdpb::StoreReport; +use tikv_util::box_err; + +use super::{ + peer::{ + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, + UnsafeRecoveryWaitApplySyncer, + }, + PeerMsg, RaftRouter, SignificantMsg, SignificantRouter, StoreMsg, +}; +use crate::Result; + +/// A handle for PD to schedule online unsafe recovery commands back to +/// raftstore. +pub trait UnsafeRecoveryHandle: Sync + Send { + fn send_enter_force_leader( + &self, + region_id: u64, + syncer: UnsafeRecoveryForceLeaderSyncer, + failed_stores: HashSet, + ) -> Result<()>; + + fn broadcast_exit_force_leader(&self); + + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer); + + fn broadcast_fill_out_report(&self, syncer: UnsafeRecoveryFillOutReportSyncer); + + fn send_report(&self, report: StoreReport) -> Result<()>; +} + +impl UnsafeRecoveryHandle for Mutex> { + fn send_enter_force_leader( + &self, + region_id: u64, + syncer: UnsafeRecoveryForceLeaderSyncer, + failed_stores: HashSet, + ) -> Result<()> { + let router = self.lock().unwrap(); + router.significant_send( + region_id, + SignificantMsg::EnterForceLeaderState { + syncer, + failed_stores, + }, + ) + } + + fn broadcast_exit_force_leader(&self) { + let router = self.lock().unwrap(); + router.broadcast_normal(|| PeerMsg::SignificantMsg(SignificantMsg::ExitForceLeaderState)); + } + + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer) { + let router = self.lock().unwrap(); + router.broadcast_normal(|| { + PeerMsg::SignificantMsg(SignificantMsg::UnsafeRecoveryWaitApply(syncer.clone())) + }); + } + + fn broadcast_fill_out_report(&self, syncer: UnsafeRecoveryFillOutReportSyncer) { + let router = self.lock().unwrap(); + router.broadcast_normal(|| { + PeerMsg::SignificantMsg(SignificantMsg::UnsafeRecoveryFillOutReport(syncer.clone())) + }); + } + + fn send_report(&self, report: StoreReport) -> Result<()> { + let router = self.lock().unwrap(); + match router.force_send_control(StoreMsg::UnsafeRecoveryReport(report)) { + Ok(()) => Ok(()), + Err(SendError(_)) => Err(box_err!("fail to send unsafe recovery store report")), + } + } +} diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 51195d727a6..abfb34d4c75 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -8,7 +8,7 @@ use std::{ sync::{ atomic::Ordering, mpsc::{self, Receiver, Sender}, - Arc, + Arc, Mutex, }, thread::{Builder, JoinHandle}, time::{Duration, Instant}, @@ -61,6 +61,7 @@ use crate::{ metrics::*, peer::{UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer}, transport::SignificantRouter, + unsafe_recovery::UnsafeRecoveryHandle, util::{is_epoch_stale, KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, worker::{ split_controller::{SplitInfo, TOP_N}, @@ -1385,17 +1386,16 @@ where for failed_store in plan.get_force_leader().get_failed_stores() { failed_stores.insert(*failed_store); } + let router = Arc::new(Mutex::new(router.clone())); let syncer = UnsafeRecoveryForceLeaderSyncer::new( plan.get_step(), router.clone(), ); for region in plan.get_force_leader().get_enter_force_leaders() { - if let Err(e) = router.significant_send( + if let Err(e) = router.send_enter_force_leader( *region, - SignificantMsg::EnterForceLeaderState { - syncer: syncer.clone(), - failed_stores: failed_stores.clone(), - }, + syncer.clone(), + failed_stores.clone(), ) { error!("fail to send force leader message for recovery"; "err" => ?e); } @@ -1403,7 +1403,7 @@ where } else { let syncer = UnsafeRecoveryExecutePlanSyncer::new( plan.get_step(), - router.clone(), + Arc::new(Mutex::new(router.clone())), ); for create in plan.take_creates().into_iter() { if let Err(e) = diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 00ca2239c34..1e7e307696b 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -25,6 +25,7 @@ use kvproto::{ errorpb::Error as PbError, kvrpcpb::ApiVersion, metapb::{self, Buckets, PeerRole, RegionEpoch}, + pdpb, raft_cmdpb::{ AdminCmdType, AdminRequest, CmdType, RaftCmdRequest, RaftCmdResponse, RegionDetailResponse, Request, Response, StatusCmdType, @@ -44,7 +45,7 @@ use raftstore::{ Error, Result, }; use raftstore_v2::{ - router::{DebugInfoChannel, PeerMsg, QueryResult}, + router::{DebugInfoChannel, PeerMsg, QueryResult, StoreMsg, StoreTick}, write_initial_states, SimpleWriteEncoder, StoreMeta, StoreRouter, }; use resource_control::ResourceGroupManager; @@ -1769,6 +1770,50 @@ impl, EK: KvEngine> Cluster { debug!("all nodes are shut down."); } + pub fn must_send_store_heartbeat(&self, node_id: u64) { + let router = self.sim.rl().get_router(node_id).unwrap(); + router + .send_control(StoreMsg::Tick(StoreTick::PdStoreHeartbeat)) + .unwrap(); + } + + pub fn enter_force_leader(&mut self, region_id: u64, store_id: u64, failed_stores: Vec) { + let mut plan = pdpb::RecoveryPlan::default(); + let mut force_leader = pdpb::ForceLeader::default(); + force_leader.set_enter_force_leaders([region_id].to_vec()); + force_leader.set_failed_stores(failed_stores.to_vec()); + plan.set_force_leader(force_leader); + // Triggers the unsafe recovery plan execution. + self.pd_client.must_set_unsafe_recovery_plan(store_id, plan); + self.must_send_store_heartbeat(store_id); + } + + pub fn must_enter_force_leader( + &mut self, + region_id: u64, + store_id: u64, + failed_stores: Vec, + ) -> pdpb::StoreReport { + self.enter_force_leader(region_id, store_id, failed_stores); + let mut store_report = None; + for _ in 0..20 { + store_report = self.pd_client.must_get_store_report(store_id); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + store_report.unwrap() + } + + pub fn exit_force_leader(&mut self, region_id: u64, store_id: u64) { + let router = self.sim.rl().get_router(store_id).unwrap(); + router + .send(region_id, PeerMsg::ExitForceLeaderState) + .unwrap(); + } + pub fn must_send_flashback_msg( &mut self, region_id: u64, diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index b8747d5b4b7..ca869f5c761 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -1040,6 +1040,12 @@ pub fn copy_tablet_snapshot( if let Some(m) = recver_snap_mgr.key_manager() { m.link_file(recv_path.to_str().unwrap(), final_path.to_str().unwrap())?; } + // Remove final path to make snapshot retryable. + if fs::remove_dir_all(&final_path).is_ok() { + if let Some(m) = recver_snap_mgr.key_manager() { + let _ = m.remove_dir(&final_path, None); + } + } fs::rename(&recv_path, &final_path).map_err(|e| { if let Some(m) = recver_snap_mgr.key_manager() { let _ = m.remove_dir(&final_path, Some(&recv_path)); diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index e3f22afe6d9..5e47af7dd5e 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -7,20 +7,23 @@ use kvproto::{metapb, pdpb}; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, store::find_peer, HandyRwLock}; -fn confirm_quorum_is_lost(cluster: &mut Cluster, region: &metapb::Region) { - let put = new_put_cmd(b"k2", b"v2"); - let req = new_request( - region.get_id(), - region.get_region_epoch().clone(), - vec![put], - true, - ); - // marjority is lost, can't propose command successfully. - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .unwrap_err(); +macro_rules! confirm_quorum_is_lost { + ($cluster:expr, $region:expr) => {{ + let put = new_put_cmd(b"k2", b"v2"); + let req = new_request( + $region.get_id(), + $region.get_region_epoch().clone(), + vec![put], + true, + ); + // majority is lost, can't propose command successfully. + $cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); + }}; } #[test] @@ -41,7 +44,7 @@ fn test_unsafe_recovery_demote_failed_voters() { cluster.stop_node(nodes[1]); cluster.stop_node(nodes[2]); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); @@ -96,7 +99,7 @@ fn test_unsafe_recovery_demote_non_exist_voters() { cluster.stop_node(nodes[1]); cluster.stop_node(nodes[2]); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); let mut plan = pdpb::RecoveryPlan::default(); @@ -172,7 +175,7 @@ fn test_unsafe_recovery_auto_promote_learner() { cluster.stop_node(nodes[1]); cluster.stop_node(nodes[2]); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); let to_be_removed: Vec = region @@ -258,7 +261,7 @@ fn test_unsafe_recovery_already_in_joint_state() { cluster.stop_node(nodes[2]); cluster.must_wait_for_leader_expire(nodes[0], region.get_id()); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); let to_be_removed: Vec = region @@ -356,7 +359,7 @@ fn test_unsafe_recovery_early_return_after_exit_joint_state() { cluster.stop_node(nodes[2]); cluster.must_wait_for_leader_expire(nodes[0], region.get_id()); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); let to_be_removed: Vec = region @@ -434,33 +437,32 @@ fn test_unsafe_recovery_create_region() { assert_eq!(created, true); } -fn must_get_error_recovery_in_progress( - cluster: &mut Cluster, - region: &metapb::Region, - cmd: kvproto::raft_cmdpb::Request, -) { - let req = new_request( - region.get_id(), - region.get_region_epoch().clone(), - vec![cmd], - true, - ); - let resp = cluster - .call_command_on_leader(req, Duration::from_millis(100)) - .unwrap(); - assert_eq!( - resp.get_header().get_error().get_recovery_in_progress(), - &kvproto::errorpb::RecoveryInProgress { - region_id: region.get_id(), - ..Default::default() - } - ); +macro_rules! must_get_error_recovery_in_progress { + ($cluster:expr, $region:expr, $cmd:expr) => { + let req = new_request( + $region.get_id(), + $region.get_region_epoch().clone(), + vec![$cmd], + true, + ); + let resp = $cluster + .call_command_on_leader(req, Duration::from_millis(100)) + .unwrap(); + assert_eq!( + resp.get_header().get_error().get_recovery_in_progress(), + &kvproto::errorpb::RecoveryInProgress { + region_id: $region.get_id(), + ..Default::default() + } + ); + }; } // Test the case that two of three nodes fail and force leader on the rest node. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_three_nodes() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -476,7 +478,7 @@ fn test_force_leader_three_nodes() { cluster.stop_node(3); // quorum is lost, can't propose command successfully. - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), 1, vec![2, 3]); // remove the peers on failed nodes @@ -488,13 +490,13 @@ fn test_force_leader_three_nodes() { .must_remove_peer(region.get_id(), find_peer(®ion, 3).unwrap().clone()); // forbid writes in force leader state let put = new_put_cmd(b"k3", b"v3"); - must_get_error_recovery_in_progress(&mut cluster, ®ion, put); + must_get_error_recovery_in_progress!(cluster, region, put); // forbid reads in force leader state let get = new_get_cmd(b"k1"); - must_get_error_recovery_in_progress(&mut cluster, ®ion, get); + must_get_error_recovery_in_progress!(cluster, region, get); // forbid read index in force leader state let read_index = new_read_index_cmd(); - must_get_error_recovery_in_progress(&mut cluster, ®ion, read_index); + must_get_error_recovery_in_progress!(cluster, region, read_index); cluster.exit_force_leader(region.get_id(), 1); // quorum is formed, can propose command successfully now @@ -506,7 +508,8 @@ fn test_force_leader_three_nodes() { // Test the case that three of five nodes fail and force leader on one of the // rest nodes. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_five_nodes() { let mut cluster = new_node_cluster(0, 5); cluster.pd_client.disable_default_operator(); @@ -525,7 +528,7 @@ fn test_force_leader_five_nodes() { cluster.stop_node(5); // quorum is lost, can't propose command successfully. - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); // remove the peers on failed nodes @@ -540,13 +543,13 @@ fn test_force_leader_five_nodes() { .must_remove_peer(region.get_id(), find_peer(®ion, 5).unwrap().clone()); // forbid writes in force leader state let put = new_put_cmd(b"k3", b"v3"); - must_get_error_recovery_in_progress(&mut cluster, ®ion, put); + must_get_error_recovery_in_progress!(cluster, region, put); // forbid reads in force leader state let get = new_get_cmd(b"k1"); - must_get_error_recovery_in_progress(&mut cluster, ®ion, get); + must_get_error_recovery_in_progress!(cluster, region, get); // forbid read index in force leader state let read_index = new_read_index_cmd(); - must_get_error_recovery_in_progress(&mut cluster, ®ion, read_index); + must_get_error_recovery_in_progress!(cluster, region, read_index); cluster.exit_force_leader(region.get_id(), 1); @@ -559,9 +562,10 @@ fn test_force_leader_five_nodes() { // Test the case that three of five nodes fail and force leader on the rest node // which is a learner. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_for_learner() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.raft_election_timeout_ticks = 5; cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); @@ -577,14 +581,17 @@ fn test_force_leader_for_learner() { cluster.must_transfer_leader(region.get_id(), peer_on_store5.clone()); let peer_on_store1 = find_peer(®ion, 1).unwrap(); + let new_learner = new_learner_peer( + peer_on_store1.get_store_id(), + cluster.pd_client.alloc_id().unwrap(), + ); // replace one peer with learner cluster .pd_client .must_remove_peer(region.get_id(), peer_on_store1.clone()); - cluster.pd_client.must_add_peer( - region.get_id(), - new_learner_peer(peer_on_store1.get_store_id(), peer_on_store1.get_id()), - ); + cluster + .pd_client + .must_add_peer(region.get_id(), new_learner.clone()); // Sleep 100 ms to wait for the new learner to be initialized. sleep_ms(100); @@ -594,7 +601,7 @@ fn test_force_leader_for_learner() { cluster.stop_node(4); cluster.stop_node(5); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); // wait election timeout std::thread::sleep(Duration::from_millis( @@ -604,9 +611,10 @@ fn test_force_leader_for_learner() { )); cluster.must_enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); // promote the learner first and remove the peers on failed nodes + let new_peer = new_peer(new_learner.get_store_id(), new_learner.get_id()); cluster .pd_client - .must_add_peer(region.get_id(), find_peer(®ion, 1).unwrap().clone()); + .must_add_peer(region.get_id(), new_peer.clone()); cluster .pd_client .must_remove_peer(region.get_id(), find_peer(®ion, 3).unwrap().clone()); @@ -623,7 +631,7 @@ fn test_force_leader_for_learner() { assert_eq!(cluster.must_get(b"k2"), None); assert_eq!(cluster.must_get(b"k3"), None); assert_eq!(cluster.must_get(b"k4"), Some(b"v4".to_vec())); - cluster.must_transfer_leader(region.get_id(), find_peer(®ion, 1).unwrap().clone()); + cluster.must_transfer_leader(region.get_id(), new_peer); } // Test the case that three of five nodes fail and force leader on a hibernated @@ -722,9 +730,10 @@ fn test_force_leader_on_hibernated_follower() { // Test the case that three of five nodes fail and force leader on the rest node // with triggering snapshot. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_trigger_snapshot() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.raft_election_timeout_ticks = 10; cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(90); @@ -821,9 +830,10 @@ fn test_force_leader_trigger_snapshot() { // Test the case that three of five nodes fail and force leader on the rest node // with uncommitted conf change. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_with_uncommitted_conf_change() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.raft_election_timeout_ticks = 10; cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(90); @@ -842,7 +852,7 @@ fn test_force_leader_with_uncommitted_conf_change() { cluster.stop_node(4); cluster.stop_node(5); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); // an uncommitted conf-change let cmd = new_change_peer_request( @@ -858,7 +868,7 @@ fn test_force_leader_with_uncommitted_conf_change() { std::thread::sleep(Duration::from_millis( cluster.cfg.raft_store.raft_election_timeout_ticks as u64 * cluster.cfg.raft_store.raft_base_tick_interval.as_millis() - * 2, + * 8, )); cluster.must_enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); // the uncommitted conf-change is committed successfully after being force @@ -891,9 +901,10 @@ fn test_force_leader_with_uncommitted_conf_change() { // is sent to b and break lease constrain, then b will reject a's heartbeat // while can vote for c. So c becomes leader and there are two leaders in the // group. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_on_healthy_region() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(30); cluster.cfg.raft_store.raft_election_timeout_ticks = 5; cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); @@ -930,9 +941,10 @@ fn test_force_leader_on_healthy_region() { // Test the case that three of five nodes fail and force leader on the one not // having latest log -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_on_wrong_leader() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -957,7 +969,7 @@ fn test_force_leader_on_wrong_leader() { cluster.stop_node(1); cluster.run_node(1).unwrap(); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); // try to force leader on peer of node2 which is stale cluster.must_enter_force_leader(region.get_id(), 2, vec![3, 4, 5]); @@ -978,9 +990,10 @@ fn test_force_leader_on_wrong_leader() { // Test the case that three of five nodes fail and force leader twice on // peers on different nodes -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_twice_on_different_peers() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -1001,7 +1014,7 @@ fn test_force_leader_twice_on_different_peers() { cluster.run_node(1).unwrap(); cluster.stop_node(2); cluster.run_node(2).unwrap(); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), 1, vec![3, 4, 5]); // enter force leader on a different peer @@ -1044,9 +1057,10 @@ fn test_force_leader_twice_on_different_peers() { // Test the case that three of five nodes fail and force leader twice on // peer on the same node -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_twice_on_same_peer() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -1089,9 +1103,10 @@ fn test_force_leader_twice_on_same_peer() { // Test the case that three of five nodes fail and force leader doesn't finish // in one election rounds due to network partition. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_force_leader_multiple_election_rounds() { - let mut cluster = new_node_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(30); cluster.cfg.raft_store.raft_election_timeout_ticks = 5; cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); @@ -1157,9 +1172,10 @@ fn test_force_leader_multiple_election_rounds() { // leader state before the peer(s) of the target region, thus proposes a no-op // entry (while becoming the leader) which is conflict with part of the catch up // logs, there will be data loss. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_has_commit_merge() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.run(); @@ -1215,9 +1231,10 @@ fn test_unsafe_recovery_has_commit_merge() { assert!(has_commit_merge); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_during_merge() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.run(); @@ -1260,7 +1277,7 @@ fn test_unsafe_recovery_during_merge() { cluster.stop_node(1); cluster.stop_node(3); - confirm_quorum_is_lost(&mut cluster, ®ion); + confirm_quorum_is_lost!(cluster, region); let report = cluster.must_enter_force_leader(right.get_id(), 2, vec![1, 3]); assert_eq!(report.get_peer_reports().len(), 1); From 6e04d90b94bab1e6a907563ee6e102e2fcea5024 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 25 Jul 2023 14:13:32 +0800 Subject: [PATCH 0811/1149] raftstore-v2: implement unsafe recovery wait apply (#15178) ref tikv/tikv#15108 raftstore-v2: implement unsafe recovery wait apply raftstore: move unsafe recovery structs to mod unsafe_recovery Signed-off-by: Neil Shen --- components/raftstore-v2/src/fsm/peer.rs | 8 +- .../raftstore-v2/src/operation/command/mod.rs | 2 + components/raftstore-v2/src/operation/life.rs | 5 +- .../raftstore-v2/src/operation/ready/mod.rs | 5 + .../src/operation/unsafe_recovery/report.rs | 100 +++++- components/raftstore-v2/src/raft/apply.rs | 6 + components/raftstore-v2/src/raft/peer.rs | 12 +- .../raftstore-v2/src/worker/pd/store.rs | 9 +- components/raftstore/src/store/fsm/peer.rs | 11 +- components/raftstore/src/store/mod.rs | 13 +- components/raftstore/src/store/msg.rs | 2 +- components/raftstore/src/store/peer.rs | 272 +---------------- .../raftstore/src/store/unsafe_recovery.rs | 285 +++++++++++++++++- components/raftstore/src/store/worker/pd.rs | 5 +- components/test_raftstore-v2/src/cluster.rs | 4 + .../failpoints/cases/test_unsafe_recovery.rs | 8 +- 16 files changed, 442 insertions(+), 305 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index eb937e4d8ca..5d19f8d7183 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -376,7 +376,13 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::ExitForceLeaderStateCampaign => { self.fsm.peer_mut().on_exit_force_leader_campaign() } - PeerMsg::UnsafeRecoveryWaitApply(_) | PeerMsg::UnsafeRecoveryFillOutReport(_) => (), + PeerMsg::UnsafeRecoveryWaitApply(syncer) => { + self.fsm.peer_mut().on_unsafe_recovery_wait_apply(syncer) + } + PeerMsg::UnsafeRecoveryFillOutReport(syncer) => self + .fsm + .peer_mut() + .on_unsafe_recovery_fill_out_report(syncer), } } // TODO: instead of propose pending commands immediately, we should use timeout. diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 74e3df196c6..a85c0bc549b 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -474,6 +474,7 @@ impl Peer { // We need to continue to apply after previous page is finished. self.set_has_ready(); } + self.check_unsafe_recovery_state(); } } @@ -582,6 +583,7 @@ impl Apply { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); fail::fail_point!("on_handle_apply_1003", self.peer_id() == 1003, |_| {}); fail::fail_point!("on_handle_apply_2", self.peer_id() == 2, |_| {}); + fail::fail_point!("on_handle_apply_store_1", self.store_id() == 1, |_| {}); let now = std::time::Instant::now(); let apply_wait_time = APPLY_TASK_WAIT_TIME_HISTOGRAM.local(); for (e, ch) in ce.entry_and_proposals { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 05c505461dd..a489331bc5b 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -767,10 +767,13 @@ impl Peer { if self.postponed_destroy() { return; } + // No need to wait for the apply anymore. + self.unsafe_recovery_maybe_finish_wait_apply(true); + + // Use extra write to ensure these writes are the last writes to raft engine. let raft_engine = self.entry_storage().raft_engine(); let mut region_state = self.storage().region_state().clone(); let region_id = region_state.get_region().get_id(); - // Use extra write to ensure these writes are the last writes to raft engine. let lb = write_task .extra_write .ensure_v2(|| raft_engine.log_batch(2)); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 52d434414c4..40c7305455d 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -878,6 +878,11 @@ impl Peer { // state need to update. if has_snapshot { self.on_applied_snapshot(ctx); + + if self.unsafe_recovery_state().is_some() { + debug!(self.logger, "unsafe recovery finishes applying a snapshot"); + self.unsafe_recovery_maybe_finish_wait_apply(false); + } } if let Some(flushed_epoch) = flushed_epoch { diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 9ede7714b6d..9371a2e3c26 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -1,10 +1,15 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{KvEngine, RaftEngine}; -use kvproto::pdpb; -use raftstore::store::Transport; +use kvproto::{pdpb, raft_serverpb::RegionLocalState}; +use raft::{GetEntriesContext, Storage, NO_LIMIT}; +use raftstore::store::{ + ProposalContext, Transport, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryState, + UnsafeRecoveryWaitApplySyncer, +}; +use slog::{info, warn}; -use crate::{batch::StoreContext, fsm::Store}; +use crate::{batch::StoreContext, fsm::Store, raft::Peer}; impl Store { pub fn on_unsafe_recovery_report( @@ -19,3 +24,92 @@ impl Store { self.store_heartbeat_pd(ctx, Some(report)) } } + +impl Peer { + pub fn on_unsafe_recovery_wait_apply(&mut self, syncer: UnsafeRecoveryWaitApplySyncer) { + if self.unsafe_recovery_state().is_some() { + warn!(self.logger, + "Unsafe recovery, can't wait apply, another plan is executing in progress"; + ); + syncer.abort(); + return; + } + let target_index = if self.has_force_leader() { + // For regions that lose quorum (or regions have force leader), whatever has + // been proposed will be committed. Based on that fact, we simply use "last + // index" here to avoid implementing another "wait commit" process. + self.raft_group().raft.raft_log.last_index() + } else { + self.raft_group().raft.raft_log.committed + }; + + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::WaitApply { + target_index, + syncer, + }); + self.unsafe_recovery_maybe_finish_wait_apply(!self.serving()); + } + + pub fn unsafe_recovery_maybe_finish_wait_apply(&mut self, force: bool) { + if let Some(UnsafeRecoveryState::WaitApply { target_index, .. }) = + self.unsafe_recovery_state() + { + if self.raft_group().raft.raft_log.applied >= *target_index || force { + if self.is_in_force_leader() { + info!(self.logger, + "Unsafe recovery, finish wait apply"; + "target_index" => target_index, + "applied" => self.raft_group().raft.raft_log.applied, + "force" => force, + ); + } + *self.unsafe_recovery_state_mut() = None; + } + } + } + + pub fn on_unsafe_recovery_fill_out_report( + &mut self, + syncer: UnsafeRecoveryFillOutReportSyncer, + ) { + if !self.serving() { + return; + } + let mut self_report = pdpb::PeerReport::default(); + self_report.set_raft_state(self.storage().raft_state().clone()); + let mut region_local_state = RegionLocalState::default(); + region_local_state.set_region(self.region().clone()); + self_report.set_region_state(region_local_state); + self_report.set_is_force_leader(self.has_force_leader()); + match self.storage().entries( + self.storage().entry_storage().commit_index() + 1, + self.storage().entry_storage().last_index() + 1, + NO_LIMIT, + GetEntriesContext::empty(false), + ) { + Ok(entries) => { + for entry in entries { + let ctx = ProposalContext::from_bytes(&entry.context); + if ctx.contains(ProposalContext::COMMIT_MERGE) { + self_report.set_has_commit_merge(true); + break; + } + } + } + Err(e) => panic!("Unsafe recovery, fail to get uncommitted entries, {:?}", e), + } + syncer.report_for_self(self_report); + } + + pub fn check_unsafe_recovery_state(&mut self) { + match self.unsafe_recovery_state() { + Some(UnsafeRecoveryState::WaitApply { .. }) => { + self.unsafe_recovery_maybe_finish_wait_apply(false) + } + Some(UnsafeRecoveryState::DemoteFailedVoters { .. }) => { + // TODO: support demote. + } + Some(_) | None => {} + } + } +} diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index dc474a69de3..3e660c4549c 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -226,6 +226,12 @@ impl Apply { self.peer.get_id() } + #[allow(unused)] + #[inline] + pub fn store_id(&self) -> u64 { + self.peer.get_store_id() + } + /// The tablet can't be public yet, otherwise content of latest tablet /// doesn't matches its epoch in both readers and peer fsm. #[inline] diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 392bf82e5ec..d7ac62763e9 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -24,7 +24,7 @@ use raftstore::{ metrics::RAFT_PEER_PENDING_DURATION, util::{Lease, RegionReadProgress}, Config, EntryStorage, ForceLeaderState, PeerStat, ProposalQueue, ReadDelegate, - ReadIndexQueue, ReadProgress, TabletSnapManager, WriteTask, + ReadIndexQueue, ReadProgress, TabletSnapManager, UnsafeRecoveryState, WriteTask, }, }; use slog::{debug, info, Logger}; @@ -135,6 +135,7 @@ pub struct Peer { /// /// For details, see the comment of `ForceLeaderState`. force_leader_state: Option, + unsafe_recovery_state: Option, } impl Peer { @@ -226,6 +227,7 @@ impl Peer { gc_peer_context: GcPeerContext::default(), abnormal_peer_context: AbnormalPeerContext::default(), force_leader_state: None, + unsafe_recovery_state: None, }; // If this region has only one peer and I am the one, campaign directly. @@ -983,4 +985,12 @@ impl Peer { pub fn force_leader_mut(&mut self) -> &mut Option { &mut self.force_leader_state } + + pub fn unsafe_recovery_state(&self) -> Option<&UnsafeRecoveryState> { + self.unsafe_recovery_state.as_ref() + } + + pub fn unsafe_recovery_state_mut(&mut self) -> &mut Option { + &mut self.unsafe_recovery_state + } } diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 14aa3c5ca9e..91c8f17fcf9 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -16,7 +16,7 @@ use pd_client::{ use prometheus::local::LocalHistogram; use raftstore::store::{ metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, util::LatencyInspector, - UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, }; use slog::{error, info, warn}; use tikv_util::{ @@ -308,10 +308,11 @@ where "err" => ?e); } } + } else { + let _syncer = + UnsafeRecoveryExecutePlanSyncer::new(plan.get_step(), router); + // TODO: handle creates/tombstone/demotes } - // else { - // TODO: handle creates/tombstone/demotes - // } } // Attention, as Hibernate Region is eliminated in diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 9c3fdb792a3..60ab1809688 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -84,13 +84,16 @@ use crate::{ metrics::*, msg::{Callback, ExtCallback, InspectedRaftMessage}, peer::{ - ConsistencyState, ForceLeaderState, Peer, PersistSnapshotResult, SnapshotRecoveryState, - SnapshotRecoveryWaitApplySyncer, StaleState, UnsafeRecoveryExecutePlanSyncer, - UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, - UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, TRANSFER_LEADER_COMMAND_REPLY_CTX, + ConsistencyState, Peer, PersistSnapshotResult, StaleState, + TRANSFER_LEADER_COMMAND_REPLY_CTX, }, region_meta::RegionMeta, transport::Transport, + unsafe_recovery::{ + ForceLeaderState, SnapshotRecoveryState, SnapshotRecoveryWaitApplySyncer, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, + }, util, util::{KeysInfoFormatter, LeaseState}, worker::{ diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index beda74b41d4..dad6cf69ed7 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -57,11 +57,8 @@ pub use self::{ }, peer::{ can_amend_read, get_sync_log_from_request, make_transfer_leader_response, - propose_read_index, should_renew_lease, ForceLeaderState, Peer, PeerStat, ProposalContext, - ProposalQueue, RequestInspector, RequestPolicy, SnapshotRecoveryWaitApplySyncer, - UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, - UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, - TRANSFER_LEADER_COMMAND_REPLY_CTX, + propose_read_index, should_renew_lease, Peer, PeerStat, ProposalContext, ProposalQueue, + RequestInspector, RequestPolicy, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, peer_storage::{ clear_meta, do_snapshot, write_initial_apply_state, write_initial_raft_state, @@ -79,7 +76,11 @@ pub use self::{ }, transport::{CasualRouter, ProposalRouter, SignificantRouter, StoreRouter, Transport}, txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, - unsafe_recovery::UnsafeRecoveryHandle, + unsafe_recovery::{ + ForceLeaderState, SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, + UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, + }, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ metrics as worker_metrics, AutoSplitController, BatchComponent, Bucket, BucketRange, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index c36e9880694..1ed8934e0f0 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -31,7 +31,7 @@ use super::{local_metrics::TimeTracker, region_meta::RegionMeta, FetchedLogs, Re use crate::store::{ fsm::apply::{CatchUpLogs, ChangeObserver, TaskRes as ApplyTaskRes}, metrics::RaftEventDurationType, - peer::{ + unsafe_recovery::{ SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 84c00548b70..d4751d9ddfd 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5,10 +5,9 @@ use std::{ cell::RefCell, cmp, collections::VecDeque, - fmt, mem, + mem, sync::{ atomic::{AtomicUsize, Ordering}, - mpsc::SyncSender, Arc, Mutex, }, time::{Duration, Instant}, @@ -29,7 +28,7 @@ use kvproto::{ errorpb, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb::{self, PeerRole}, - pdpb::{self, PeerStats}, + pdpb::PeerStats, raft_cmdpb::{ self, AdminCmdType, AdminResponse, CmdType, CommitMergeRequest, PutRequest, RaftCmdRequest, RaftCmdResponse, Request, TransferLeaderRequest, TransferLeaderResponse, @@ -97,7 +96,7 @@ use crate::{ msg::{CasualMessage, ErrorCallback, RaftCommand}, peer_storage::HandleSnapshotResult, txn_ext::LocksStatus, - unsafe_recovery::UnsafeRecoveryHandle, + unsafe_recovery::{ForceLeaderState, SnapshotRecoveryState, UnsafeRecoveryState}, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, ReadProgress, RegionTask, @@ -487,91 +486,6 @@ pub struct ReadyResult { pub has_write_ready: bool, } -#[derive(Debug)] -/// ForceLeader process would be: -/// - If it's hibernated, enter wait ticks state, and wake up the peer -/// - Enter pre force leader state, become candidate and send request vote to -/// all peers -/// - Wait for the responses of the request vote, no reject should be received. -/// - Enter force leader state, become leader without leader lease -/// - Execute recovery plan(some remove-peer commands) -/// - After the plan steps are all applied, exit force leader state -pub enum ForceLeaderState { - WaitTicks { - syncer: UnsafeRecoveryForceLeaderSyncer, - failed_stores: HashSet, - ticks: usize, - }, - PreForceLeader { - syncer: UnsafeRecoveryForceLeaderSyncer, - failed_stores: HashSet, - }, - ForceLeader { - time: TiInstant, - failed_stores: HashSet, - }, -} - -// Following shared states are used while reporting to PD for unsafe recovery -// and shared among all the regions per their life cycle. -// The work flow is like: -// 1. report phase -// - start_unsafe_recovery_report -// - broadcast wait-apply commands -// - wait for all the peers' apply indices meet their targets -// - broadcast fill out report commands -// - wait for all the peers fill out the reports for themselves -// - send a store report (through store heartbeat) -// 2. force leader phase -// - dispatch force leader commands -// - wait for all the peers that received the command become force leader -// - start_unsafe_recovery_report -// 3. plan execution phase -// - dispatch recovery plans -// - wait for all the creates, deletes and demotes to finish, for the -// demotes, procedures are: -// - exit joint state if it is already in joint state -// - demote failed voters, and promote self to be a voter if it is a -// learner -// - exit joint state -// - start_unsafe_recovery_report - -// A wrapper of a closure that will be invoked when it is dropped. -// This design has two benefits: -// 1. Using a closure (dynamically dispatched), so that it can avoid having -// generic member fields like RaftRouter, thus avoid having Rust generic -// type explosion problem. -// 2. Invoke on drop, so that it can be easily and safely used (together with -// Arc) as a coordinator between all concerning peers. Each of the peers -// holds a reference to the same strcuture, and whoever finishes the task -// drops its reference. Once the last reference is dropped, indicating all -// the peers have finished their own tasks, the closure is invoked. -pub struct InvokeClosureOnDrop(Option>); - -impl fmt::Debug for InvokeClosureOnDrop { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "InvokeClosureOnDrop") - } -} - -impl Drop for InvokeClosureOnDrop { - fn drop(&mut self) { - if let Some(on_drop) = self.0.take() { - on_drop(); - } - } -} - -pub fn start_unsafe_recovery_report( - router: Arc, - report_id: u64, - exit_force_leader: bool, -) { - let wait_apply = - UnsafeRecoveryWaitApplySyncer::new(report_id, router.clone(), exit_force_leader); - router.broadcast_wait_apply(wait_apply); -} - // Propose a read index request to the raft group, return the request id and // whether this request had dropped silently // #[RaftstoreCommon], copied from Peer::propose_read_index @@ -650,186 +564,6 @@ pub fn can_amend_read( false } -#[derive(Clone, Debug)] -pub struct UnsafeRecoveryForceLeaderSyncer(Arc); - -impl UnsafeRecoveryForceLeaderSyncer { - pub fn new(report_id: u64, router: Arc) -> Self { - let inner = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, force leader finished."); - start_unsafe_recovery_report(router, report_id, false); - }))); - UnsafeRecoveryForceLeaderSyncer(Arc::new(inner)) - } -} - -#[derive(Clone, Debug)] -pub struct UnsafeRecoveryExecutePlanSyncer { - _closure: Arc, - abort: Arc>, -} - -impl UnsafeRecoveryExecutePlanSyncer { - pub fn new(report_id: u64, router: Arc) -> Self { - let abort = Arc::new(Mutex::new(false)); - let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, plan execution finished"); - if *abort_clone.lock().unwrap() { - warn!("Unsafe recovery, plan execution aborted"); - return; - } - start_unsafe_recovery_report(router, report_id, true); - }))); - UnsafeRecoveryExecutePlanSyncer { - _closure: Arc::new(closure), - abort, - } - } - - pub fn abort(&self) { - *self.abort.lock().unwrap() = true; - } -} -// Syncer only send to leader in 2nd BR restore -#[derive(Clone, Debug)] -pub struct SnapshotRecoveryWaitApplySyncer { - _closure: Arc, - abort: Arc>, -} - -impl SnapshotRecoveryWaitApplySyncer { - pub fn new(region_id: u64, sender: SyncSender) -> Self { - let thread_safe_router = Mutex::new(sender); - let abort = Arc::new(Mutex::new(false)); - let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("region {} wait apply finished", region_id); - if *abort_clone.lock().unwrap() { - warn!("wait apply aborted"); - return; - } - let router_ptr = thread_safe_router.lock().unwrap(); - - _ = router_ptr.send(region_id).map_err(|_| { - warn!("reply waitapply states failure."); - }); - }))); - SnapshotRecoveryWaitApplySyncer { - _closure: Arc::new(closure), - abort, - } - } - - pub fn abort(&self) { - *self.abort.lock().unwrap() = true; - } -} - -#[derive(Clone, Debug)] -pub struct UnsafeRecoveryWaitApplySyncer { - _closure: Arc, - abort: Arc>, -} - -impl UnsafeRecoveryWaitApplySyncer { - pub fn new( - report_id: u64, - router: Arc, - exit_force_leader: bool, - ) -> Self { - let abort = Arc::new(Mutex::new(false)); - let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, wait apply finished"); - if *abort_clone.lock().unwrap() { - warn!("Unsafe recovery, wait apply aborted"); - return; - } - if exit_force_leader { - router.broadcast_exit_force_leader(); - } - let fill_out_report = UnsafeRecoveryFillOutReportSyncer::new(report_id, router.clone()); - router.broadcast_fill_out_report(fill_out_report); - }))); - UnsafeRecoveryWaitApplySyncer { - _closure: Arc::new(closure), - abort, - } - } - - pub fn abort(&self) { - *self.abort.lock().unwrap() = true; - } -} - -#[derive(Clone, Debug)] -pub struct UnsafeRecoveryFillOutReportSyncer { - _closure: Arc, - reports: Arc>>, -} - -impl UnsafeRecoveryFillOutReportSyncer { - pub fn new(report_id: u64, router: Arc) -> Self { - let reports = Arc::new(Mutex::new(vec![])); - let reports_clone = reports.clone(); - let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, peer reports collected"); - let mut store_report = pdpb::StoreReport::default(); - { - let mut reports_ptr = reports_clone.lock().unwrap(); - store_report.set_peer_reports(mem::take(&mut *reports_ptr).into()); - } - store_report.set_step(report_id); - if let Err(e) = router.send_report(store_report) { - error!("Unsafe recovery, fail to schedule reporting"; "err" => ?e); - } - }))); - UnsafeRecoveryFillOutReportSyncer { - _closure: Arc::new(closure), - reports, - } - } - - pub fn report_for_self(&self, report: pdpb::PeerReport) { - let mut reports_ptr = self.reports.lock().unwrap(); - (*reports_ptr).push(report); - } -} - -pub enum SnapshotRecoveryState { - // This state is set by the leader peer fsm. Once set, it sync and check leader commit index - // and force forward to last index once follower appended and then it also is checked - // every time this peer applies a the last index, if the last index is met, this state is - // reset / droppeds. The syncer is droped and send the response to the invoker, triggers - // the next step of recovery process. - WaitLogApplyToLast { - target_index: u64, - syncer: SnapshotRecoveryWaitApplySyncer, - }, -} - -pub enum UnsafeRecoveryState { - // Stores the state that is necessary for the wait apply stage of unsafe recovery process. - // This state is set by the peer fsm. Once set, it is checked every time this peer applies a - // new entry or a snapshot, if the target index is met, this state is reset / droppeds. The - // syncer holds a reference counted inner object that is shared among all the peers, whose - // destructor triggers the next step of unsafe recovery report process. - WaitApply { - target_index: u64, - syncer: UnsafeRecoveryWaitApplySyncer, - }, - DemoteFailedVoters { - syncer: UnsafeRecoveryExecutePlanSyncer, - failed_voters: Vec, - target_index: u64, - // Failed regions may be stuck in joint state, if that is the case, we need to ask the - // region to exit joint state before proposing the demotion. - demote_after_exit: bool, - }, - Destroy(UnsafeRecoveryExecutePlanSyncer), -} - #[derive(Getters, MutGetters)] pub struct Peer where diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index 163ade71c4a..a9c92ae8030 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -1,20 +1,20 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Mutex; +use std::{ + fmt, mem, + sync::{mpsc::SyncSender, Arc, Mutex}, +}; use collections::HashSet; use crossbeam::channel::SendError; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::pdpb::StoreReport; -use tikv_util::box_err; - -use super::{ - peer::{ - UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, - UnsafeRecoveryWaitApplySyncer, - }, - PeerMsg, RaftRouter, SignificantMsg, SignificantRouter, StoreMsg, +use kvproto::{ + metapb, + pdpb::{PeerReport, StoreReport}, }; +use tikv_util::{box_err, error, info, time::Instant as TiInstant, warn}; + +use super::{PeerMsg, RaftRouter, SignificantMsg, SignificantRouter, StoreMsg}; use crate::Result; /// A handle for PD to schedule online unsafe recovery commands back to @@ -80,3 +80,268 @@ impl UnsafeRecoveryHandle for Mutex, + ticks: usize, + }, + PreForceLeader { + syncer: UnsafeRecoveryForceLeaderSyncer, + failed_stores: HashSet, + }, + ForceLeader { + time: TiInstant, + failed_stores: HashSet, + }, +} + +// Following shared states are used while reporting to PD for unsafe recovery +// and shared among all the regions per their life cycle. +// The work flow is like: +// 1. report phase +// - start_unsafe_recovery_report +// - broadcast wait-apply commands +// - wait for all the peers' apply indices meet their targets +// - broadcast fill out report commands +// - wait for all the peers fill out the reports for themselves +// - send a store report (through store heartbeat) +// 2. force leader phase +// - dispatch force leader commands +// - wait for all the peers that received the command become force leader +// - start_unsafe_recovery_report +// 3. plan execution phase +// - dispatch recovery plans +// - wait for all the creates, deletes and demotes to finish, for the +// demotes, procedures are: +// - exit joint state if it is already in joint state +// - demote failed voters, and promote self to be a voter if it is a +// learner +// - exit joint state +// - start_unsafe_recovery_report + +// A wrapper of a closure that will be invoked when it is dropped. +// This design has two benefits: +// 1. Using a closure (dynamically dispatched), so that it can avoid having +// generic member fields like RaftRouter, thus avoid having Rust generic +// type explosion problem. +// 2. Invoke on drop, so that it can be easily and safely used (together with +// Arc) as a coordinator between all concerning peers. Each of the peers +// holds a reference to the same strcuture, and whoever finishes the task +// drops its reference. Once the last reference is dropped, indicating all +// the peers have finished their own tasks, the closure is invoked. +pub struct InvokeClosureOnDrop(Option>); + +impl fmt::Debug for InvokeClosureOnDrop { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "InvokeClosureOnDrop") + } +} + +impl Drop for InvokeClosureOnDrop { + fn drop(&mut self) { + if let Some(on_drop) = self.0.take() { + on_drop(); + } + } +} + +pub fn start_unsafe_recovery_report( + router: Arc, + report_id: u64, + exit_force_leader: bool, +) { + let wait_apply = + UnsafeRecoveryWaitApplySyncer::new(report_id, router.clone(), exit_force_leader); + router.broadcast_wait_apply(wait_apply); +} + +#[derive(Clone, Debug)] +pub struct UnsafeRecoveryForceLeaderSyncer(Arc); + +impl UnsafeRecoveryForceLeaderSyncer { + pub fn new(report_id: u64, router: Arc) -> Self { + let inner = InvokeClosureOnDrop(Some(Box::new(move || { + info!("Unsafe recovery, force leader finished."); + start_unsafe_recovery_report(router, report_id, false); + }))); + UnsafeRecoveryForceLeaderSyncer(Arc::new(inner)) + } +} + +#[derive(Clone, Debug)] +pub struct UnsafeRecoveryExecutePlanSyncer { + _closure: Arc, + abort: Arc>, +} + +impl UnsafeRecoveryExecutePlanSyncer { + pub fn new(report_id: u64, router: Arc) -> Self { + let abort = Arc::new(Mutex::new(false)); + let abort_clone = abort.clone(); + let closure = InvokeClosureOnDrop(Some(Box::new(move || { + info!("Unsafe recovery, plan execution finished"); + if *abort_clone.lock().unwrap() { + warn!("Unsafe recovery, plan execution aborted"); + return; + } + start_unsafe_recovery_report(router, report_id, true); + }))); + UnsafeRecoveryExecutePlanSyncer { + _closure: Arc::new(closure), + abort, + } + } + + pub fn abort(&self) { + *self.abort.lock().unwrap() = true; + } +} +// Syncer only send to leader in 2nd BR restore +#[derive(Clone, Debug)] +pub struct SnapshotRecoveryWaitApplySyncer { + _closure: Arc, + abort: Arc>, +} + +impl SnapshotRecoveryWaitApplySyncer { + pub fn new(region_id: u64, sender: SyncSender) -> Self { + let thread_safe_router = Mutex::new(sender); + let abort = Arc::new(Mutex::new(false)); + let abort_clone = abort.clone(); + let closure = InvokeClosureOnDrop(Some(Box::new(move || { + info!("region {} wait apply finished", region_id); + if *abort_clone.lock().unwrap() { + warn!("wait apply aborted"); + return; + } + let router_ptr = thread_safe_router.lock().unwrap(); + + _ = router_ptr.send(region_id).map_err(|_| { + warn!("reply waitapply states failure."); + }); + }))); + SnapshotRecoveryWaitApplySyncer { + _closure: Arc::new(closure), + abort, + } + } + + pub fn abort(&self) { + *self.abort.lock().unwrap() = true; + } +} + +#[derive(Clone, Debug)] +pub struct UnsafeRecoveryWaitApplySyncer { + _closure: Arc, + abort: Arc>, +} + +impl UnsafeRecoveryWaitApplySyncer { + pub fn new( + report_id: u64, + router: Arc, + exit_force_leader: bool, + ) -> Self { + let abort = Arc::new(Mutex::new(false)); + let abort_clone = abort.clone(); + let closure = InvokeClosureOnDrop(Some(Box::new(move || { + info!("Unsafe recovery, wait apply finished"); + if *abort_clone.lock().unwrap() { + warn!("Unsafe recovery, wait apply aborted"); + return; + } + if exit_force_leader { + router.broadcast_exit_force_leader(); + } + let fill_out_report = UnsafeRecoveryFillOutReportSyncer::new(report_id, router.clone()); + router.broadcast_fill_out_report(fill_out_report); + }))); + UnsafeRecoveryWaitApplySyncer { + _closure: Arc::new(closure), + abort, + } + } + + pub fn abort(&self) { + *self.abort.lock().unwrap() = true; + } +} + +#[derive(Clone, Debug)] +pub struct UnsafeRecoveryFillOutReportSyncer { + _closure: Arc, + reports: Arc>>, +} + +impl UnsafeRecoveryFillOutReportSyncer { + pub fn new(report_id: u64, router: Arc) -> Self { + let reports = Arc::new(Mutex::new(vec![])); + let reports_clone = reports.clone(); + let closure = InvokeClosureOnDrop(Some(Box::new(move || { + info!("Unsafe recovery, peer reports collected"); + let mut store_report = StoreReport::default(); + { + let mut reports_ptr = reports_clone.lock().unwrap(); + store_report.set_peer_reports(mem::take(&mut *reports_ptr).into()); + } + store_report.set_step(report_id); + if let Err(e) = router.send_report(store_report) { + error!("Unsafe recovery, fail to schedule reporting"; "err" => ?e); + } + }))); + UnsafeRecoveryFillOutReportSyncer { + _closure: Arc::new(closure), + reports, + } + } + + pub fn report_for_self(&self, report: PeerReport) { + let mut reports_ptr = self.reports.lock().unwrap(); + (*reports_ptr).push(report); + } +} + +pub enum SnapshotRecoveryState { + // This state is set by the leader peer fsm. Once set, it sync and check leader commit index + // and force forward to last index once follower appended and then it also is checked + // every time this peer applies a the last index, if the last index is met, this state is + // reset / droppeds. The syncer is droped and send the response to the invoker, triggers + // the next step of recovery process. + WaitLogApplyToLast { + target_index: u64, + syncer: SnapshotRecoveryWaitApplySyncer, + }, +} + +pub enum UnsafeRecoveryState { + // Stores the state that is necessary for the wait apply stage of unsafe recovery process. + // This state is set by the peer fsm. Once set, it is checked every time this peer applies a + // new entry or a snapshot, if the target index is met, this state is reset / droppeds. The + // syncer holds a reference counted inner object that is shared among all the peers, whose + // destructor triggers the next step of unsafe recovery report process. + WaitApply { + target_index: u64, + syncer: UnsafeRecoveryWaitApplySyncer, + }, + DemoteFailedVoters { + syncer: UnsafeRecoveryExecutePlanSyncer, + failed_voters: Vec, + target_index: u64, + // Failed regions may be stuck in joint state, if that is the case, we need to ask the + // region to exit joint state before proposing the demotion. + demote_after_exit: bool, + }, + Destroy(UnsafeRecoveryExecutePlanSyncer), +} diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index abfb34d4c75..8e62cca120b 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -59,9 +59,10 @@ use crate::{ store::{ cmd_resp::new_error, metrics::*, - peer::{UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer}, transport::SignificantRouter, - unsafe_recovery::UnsafeRecoveryHandle, + unsafe_recovery::{ + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, + }, util::{is_epoch_stale, KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, worker::{ split_controller::{SplitInfo, TOP_N}, diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 1e7e307696b..edb95c36570 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -436,6 +436,10 @@ impl, EK: KvEngine> Cluster { self.cfg.server.cluster_id } + pub fn get_node_ids(&self) -> HashSet { + self.sim.rl().get_node_ids() + } + pub fn flush_data(&self) { for reg in self.tablet_registries.values() { reg.for_each_opened_tablet(|_, cached| -> bool { diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index f829aa4fced..a5c6bdf495a 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -6,11 +6,13 @@ use futures::executor::block_on; use kvproto::{metapb, pdpb}; use pd_client::PdClient; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, mpsc, store::find_peer}; -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_send_report() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -24,7 +26,7 @@ fn test_unsafe_recovery_send_report() { cluster.must_transfer_leader(region.get_id(), store2_peer); cluster.put(b"random_key1", b"random_val1").unwrap(); - // Blocks the raft apply process on store 1 entirely . + // Blocks the raft apply process on store 1 entirely. let (apply_triggered_tx, apply_triggered_rx) = mpsc::bounded::<()>(1); let (apply_released_tx, apply_released_rx) = mpsc::bounded::<()>(1); fail::cfg_callback("on_handle_apply_store_1", move || { From 557d04f7187a4ec58d1f2b7f0f4e3d75452ecb75 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 25 Jul 2023 16:46:33 +0800 Subject: [PATCH 0812/1149] raftstore-v2: optimize lock cf memtable size for v2 (#15141) ref tikv/tikv#12842 adjust the default size of the memtable of the lock cf Signed-off-by: Spade A Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/config/mod.rs | 23 +++++++++++++++-------- tests/integrations/config/mod.rs | 10 +++++----- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index d28d73f7da4..30f718eaa66 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -325,7 +325,7 @@ macro_rules! cf_config { #[serde(with = "rocks_config::compression_type_level_serde")] #[online_config(skip)] pub compression_per_level: [DBCompressionType; 7], - pub write_buffer_size: ReadableSize, + pub write_buffer_size: Option, pub max_write_buffer_number: i32, #[online_config(skip)] pub min_write_buffer_number_to_merge: i32, @@ -467,7 +467,7 @@ macro_rules! write_into_metrics { .set($cf.read_amp_bytes_per_bit.into()); $metrics .with_label_values(&[$tag, "write_buffer_size"]) - .set($cf.write_buffer_size.0 as f64); + .set($cf.write_buffer_size.unwrap().0 as f64); $metrics .with_label_values(&[$tag, "max_write_buffer_number"]) .set($cf.max_write_buffer_number.into()); @@ -605,7 +605,7 @@ macro_rules! build_cf_opt { $opt.bottommost_zstd_compression_sample_size, 1, // parallel_threads ); - cf_opts.set_write_buffer_size($opt.write_buffer_size.0); + cf_opts.set_write_buffer_size($opt.write_buffer_size.unwrap_or(ReadableSize::mb(32)).0); cf_opts.set_max_write_buffer_number($opt.max_write_buffer_number); cf_opts.set_min_write_buffer_number_to_merge($opt.min_write_buffer_number_to_merge); cf_opts.set_max_bytes_for_level_base($opt.max_bytes_for_level_base.0); @@ -700,7 +700,7 @@ impl Default for DefaultCfConfig { DBCompressionType::Zstd, DBCompressionType::Zstd, ], - write_buffer_size: ReadableSize::mb(128), + write_buffer_size: Some(ReadableSize::mb(128)), max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), @@ -872,7 +872,7 @@ impl Default for WriteCfConfig { DBCompressionType::Zstd, DBCompressionType::Zstd, ], - write_buffer_size: ReadableSize::mb(128), + write_buffer_size: Some(ReadableSize::mb(128)), max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), @@ -994,7 +994,7 @@ impl Default for LockCfConfig { ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [DBCompressionType::No; 7], - write_buffer_size: ReadableSize::mb(32), + write_buffer_size: None, max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(128), @@ -1091,7 +1091,7 @@ impl Default for RaftCfConfig { ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [DBCompressionType::No; 7], - write_buffer_size: ReadableSize::mb(128), + write_buffer_size: Some(ReadableSize::mb(128)), max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(128), @@ -1355,6 +1355,9 @@ impl DbConfig { self.writecf.format_version.get_or_insert(2); self.lockcf.format_version.get_or_insert(2); self.raftcf.format_version.get_or_insert(2); + if self.lockcf.write_buffer_size.is_none() { + self.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); + } } EngineType::RaftKv2 => { self.enable_multi_batch_write.get_or_insert(false); @@ -1393,6 +1396,9 @@ impl DbConfig { // strategy is consistent with single RocksDB. self.defaultcf.max_compactions.get_or_insert(1); self.writecf.max_compactions.get_or_insert(1); + if self.lockcf.write_buffer_size.is_none() { + self.lockcf.write_buffer_size = Some(ReadableSize::mb(4)); + } } } } @@ -1630,7 +1636,7 @@ impl Default for RaftDefaultCfConfig { DBCompressionType::Zstd, DBCompressionType::Zstd, ], - write_buffer_size: ReadableSize::mb(128), + write_buffer_size: Some(ReadableSize::mb(128)), max_write_buffer_number: 5, min_write_buffer_number_to_merge: 1, max_bytes_for_level_base: ReadableSize::mb(512), @@ -5976,6 +5982,7 @@ mod tests { default_cfg.rocksdb.writecf.target_file_size_base = Some(ReadableSize::mb(8)); default_cfg.rocksdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); default_cfg.rocksdb.lockcf.target_file_size_base = Some(ReadableSize::mb(8)); + default_cfg.rocksdb.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); default_cfg.raftdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); default_cfg.raft_store.region_compact_check_step = Some(100); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index b0c00c6b30b..579b95a75bb 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -351,7 +351,7 @@ fn test_serde_custom_tikv_config() { DBCompressionType::Zstd, DBCompressionType::Lz4, ], - write_buffer_size: ReadableSize::mb(1), + write_buffer_size: Some(ReadableSize::mb(1)), max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), @@ -410,7 +410,7 @@ fn test_serde_custom_tikv_config() { DBCompressionType::Zstd, DBCompressionType::Lz4, ], - write_buffer_size: ReadableSize::mb(1), + write_buffer_size: Some(ReadableSize::mb(1)), max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), @@ -483,7 +483,7 @@ fn test_serde_custom_tikv_config() { DBCompressionType::Zstd, DBCompressionType::Lz4, ], - write_buffer_size: ReadableSize::mb(1), + write_buffer_size: Some(ReadableSize::mb(1)), max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), @@ -556,7 +556,7 @@ fn test_serde_custom_tikv_config() { DBCompressionType::Zstd, DBCompressionType::Lz4, ], - write_buffer_size: ReadableSize::mb(1), + write_buffer_size: Some(ReadableSize::mb(1)), max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), @@ -658,7 +658,7 @@ fn test_serde_custom_tikv_config() { DBCompressionType::Zstd, DBCompressionType::Lz4, ], - write_buffer_size: ReadableSize::mb(1), + write_buffer_size: Some(ReadableSize::mb(1)), max_write_buffer_number: 12, min_write_buffer_number_to_merge: 12, max_bytes_for_level_base: ReadableSize::kb(12), From 0774262d1799785610846f45e9da7a949dc3382a Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Tue, 25 Jul 2023 21:07:33 +0800 Subject: [PATCH 0813/1149] encryption: fix offset inconsistency between crypter and file (#15092) close tikv/tikv#15080 Fix offset inconsistency between crypter and file that could cause data corruption when file I/O is interrupted. Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- components/encryption/src/io.rs | 205 +++++++++++++++++++++----------- 1 file changed, 136 insertions(+), 69 deletions(-) diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index 4e4baf516cb..d7b7eb76b8a 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -255,7 +255,11 @@ impl Read for CrypterReader { fn read(&mut self, buf: &mut [u8]) -> IoResult { let count = self.reader.read(buf)?; if let Some(crypter) = self.crypter.as_mut() { - crypter.do_crypter_in_place(&mut buf[..count])?; + if let Err(e) = crypter.do_crypter_in_place(&mut buf[..count]) { + // FIXME: We can't recover from this without rollback `reader` to old offset. + // But that requires `Seek` which requires a wider refactor of user code. + panic!("`do_crypter_in_place` failed: {:?}", e); + } } Ok(count) } @@ -287,7 +291,9 @@ impl AsyncRead for CrypterReader { }; if let Some(crypter) = inner.crypter.as_mut() { if let Err(e) = crypter.do_crypter_in_place(&mut buf[..read_count]) { - return Poll::Ready(Err(e)); + // FIXME: We can't recover from this without rollback `reader` to old offset. + // But that requires `Seek` which requires a wider refactor of user code. + panic!("`do_crypter_in_place` failed: {:?}", e); } } Poll::Ready(Ok(read_count)) @@ -334,7 +340,10 @@ impl Write for CrypterWriter { if let Some(crypter) = self.crypter.as_mut() { let crypted = crypter.do_crypter(buf)?; debug_assert!(crypted.len() == buf.len()); - self.writer.write(crypted) + let r = self.writer.write(crypted); + let missing = buf.len() - r.as_ref().unwrap_or(&0); + crypter.lazy_reset_crypter(crypter.offset - missing as u64); + r } else { self.writer.write(buf) } @@ -392,6 +401,10 @@ struct CrypterCore { key: Vec, mode: Mode, initial_iv: Iv, + + // Used to ensure the atomicity of operation over a chunk of data. Only advance it when + // operation succeeds. + offset: u64, crypter: Option, block_size: usize, @@ -405,9 +418,10 @@ impl CrypterCore { method, key: key.to_owned(), mode, + initial_iv: iv, + offset: 0, crypter: None, block_size: 0, - initial_iv: iv, buffer: Vec::new(), }) } @@ -418,6 +432,17 @@ impl CrypterCore { self.buffer.resize(size + self.block_size, 0); } + // Delay the reset to future operations that use crypter. Guarantees those + // operations can only succeed after crypter is properly reset. + pub fn lazy_reset_crypter(&mut self, offset: u64) { + if self.offset != offset { + self.crypter.take(); + self.offset = offset; + } + } + + // It has the same guarantee as `lazy_reset_crypter`. In addition, it attempts + // to reset immediately and returns any error. pub fn reset_crypter(&mut self, offset: u64) -> IoResult<()> { let mut iv = self.initial_iv; iv.add_offset(offset / AES_BLOCK_SIZE as u64)?; @@ -428,6 +453,7 @@ impl CrypterCore { self.reset_buffer(partial_offset); let crypter_count = crypter.update(&partial_block, &mut self.buffer)?; if crypter_count != partial_offset { + self.lazy_reset_crypter(offset); return Err(IoError::new( ErrorKind::Other, format!( @@ -436,6 +462,7 @@ impl CrypterCore { ), )); } + self.offset = offset; self.crypter = Some(crypter); self.block_size = cipher.block_size(); Ok(()) @@ -447,7 +474,7 @@ impl CrypterCore { /// this code needs to be updated. pub fn do_crypter_in_place(&mut self, buf: &mut [u8]) -> IoResult<()> { if self.crypter.is_none() { - self.reset_crypter(0)?; + self.reset_crypter(self.offset)?; } let count = buf.len(); self.reset_buffer(std::cmp::min(count, MAX_INPLACE_CRYPTION_SIZE)); @@ -458,6 +485,7 @@ impl CrypterCore { debug_assert!(self.buffer.len() >= target - encrypted); let crypter_count = crypter.update(&buf[encrypted..target], &mut self.buffer)?; if crypter_count != target - encrypted { + self.crypter.take(); return Err(IoError::new( ErrorKind::Other, format!( @@ -470,18 +498,20 @@ impl CrypterCore { buf[encrypted..target].copy_from_slice(&self.buffer[..crypter_count]); encrypted += crypter_count; } + self.offset += count as u64; Ok(()) } pub fn do_crypter(&mut self, buf: &[u8]) -> IoResult<&[u8]> { if self.crypter.is_none() { - self.reset_crypter(0)?; + self.reset_crypter(self.offset)?; } let count = buf.len(); self.reset_buffer(count); let crypter = self.crypter.as_mut().unwrap(); let crypter_count = crypter.update(buf, &mut self.buffer)?; if crypter_count != count { + self.crypter.take(); return Err(IoError::new( ErrorKind::Other, format!( @@ -490,6 +520,7 @@ impl CrypterCore { ), )); } + self.offset += count as u64; Ok(&self.buffer[..count]) } @@ -512,7 +543,6 @@ mod tests { use std::{cmp::min, io::Cursor}; use byteorder::{BigEndian, ByteOrder}; - use futures::AsyncReadExt; use rand::{rngs::OsRng, RngCore}; use super::*; @@ -525,6 +555,58 @@ mod tests { key } + struct DecoratedCursor { + cursor: Cursor>, + read_size: usize, + } + + impl DecoratedCursor { + fn new(buff: Vec, read_size: usize) -> DecoratedCursor { + Self { + cursor: Cursor::new(buff.to_vec()), + read_size, + } + } + + fn into_inner(self) -> Vec { + self.cursor.into_inner() + } + } + + impl AsyncRead for DecoratedCursor { + fn poll_read( + mut self: Pin<&mut Self>, + _: &mut Context<'_>, + buf: &mut [u8], + ) -> Poll> { + let len = min(self.read_size, buf.len()); + Poll::Ready(self.cursor.read(&mut buf[..len])) + } + } + + impl Read for DecoratedCursor { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + let len = min(self.read_size, buf.len()); + self.cursor.read(&mut buf[..len]) + } + } + + impl Write for DecoratedCursor { + fn write(&mut self, buf: &[u8]) -> IoResult { + let len = min(self.read_size, buf.len()); + self.cursor.write(&buf[0..len]) + } + fn flush(&mut self) -> IoResult<()> { + self.cursor.flush() + } + } + + impl Seek for DecoratedCursor { + fn seek(&mut self, s: SeekFrom) -> IoResult { + self.cursor.seek(s) + } + } + #[test] fn test_decrypt_encrypted_text() { let methods = [ @@ -556,24 +638,30 @@ mod tests { let mut plaintext = vec![0; 1024]; OsRng.fill_bytes(&mut plaintext); - let buf = Vec::with_capacity(1024); - let mut encrypter = EncrypterWriter::new(buf, method, &key, iv).unwrap(); + let mut encrypter = EncrypterWriter::new( + DecoratedCursor::new(plaintext.clone(), 1), + method, + &key, + iv, + ) + .unwrap(); encrypter.write_all(&plaintext).unwrap(); - let buf = encrypter.finalize().unwrap(); + let encrypted = encrypter.finalize().unwrap().into_inner(); // Make sure it's properly encrypted. if method != EncryptionMethod::Plaintext { - assert_ne!(buf, plaintext); + assert_ne!(encrypted, plaintext); } else { - assert_eq!(buf, plaintext); + assert_eq!(encrypted, plaintext); } - let buf_reader = std::io::Cursor::new(buf); - let mut decrypter = DecrypterReader::new(buf_reader, method, &key, iv).unwrap(); + let mut decrypter = + DecrypterReader::new(DecoratedCursor::new(encrypted, 1), method, &key, iv) + .unwrap(); let mut piece = vec![0; 5]; // Read the first two blocks randomly. for i in 0..31 { assert_eq!(decrypter.seek(SeekFrom::Start(i as u64)).unwrap(), i as u64); - assert_eq!(decrypter.read(&mut piece).unwrap(), piece.len()); + decrypter.read_exact(&mut piece).unwrap(); assert_eq!(piece, plaintext[i..i + piece.len()]); } // Read the rest of the data sequentially. @@ -583,13 +671,14 @@ mod tests { cursor as u64 ); while cursor + piece.len() <= plaintext.len() { - assert_eq!(decrypter.read(&mut piece).unwrap(), piece.len()); + decrypter.read_exact(&mut piece).unwrap(); assert_eq!(piece, plaintext[cursor..cursor + piece.len()]); cursor += piece.len(); } let tail = plaintext.len() - cursor; - assert_eq!(decrypter.read(&mut piece).unwrap(), tail); - assert_eq!(piece[..tail], plaintext[cursor..cursor + tail]); + let mut short_piece = vec![0; tail]; + decrypter.read_exact(&mut short_piece).unwrap(); + assert_eq!(short_piece[..], plaintext[cursor..cursor + tail]); } } } @@ -609,9 +698,10 @@ mod tests { let sizes = [1024, 10240]; for method in methods { let key = generate_data_key(method); - let readable_text = std::io::Cursor::new(plaintext.clone()); let iv = Iv::new_ctr(); - let encrypter = EncrypterReader::new(readable_text, method, &key, iv).unwrap(); + let encrypter = + EncrypterReader::new(DecoratedCursor::new(plaintext.clone(), 1), method, &key, iv) + .unwrap(); let mut decrypter = DecrypterReader::new(encrypter, method, &key, iv).unwrap(); let mut read = vec![0; 10240]; for offset in offsets { @@ -621,7 +711,7 @@ mod tests { offset as u64 ); let actual_size = std::cmp::min(plaintext.len().saturating_sub(offset), size); - assert_eq!(decrypter.read(&mut read[..size]).unwrap(), actual_size); + decrypter.read_exact(&mut read[..actual_size]).unwrap(); if actual_size > 0 { assert_eq!(read[..actual_size], plaintext[offset..offset + actual_size]); } @@ -646,13 +736,14 @@ mod tests { let written = vec![0; 10240]; for method in methods { let key = generate_data_key(method); - let writable_text = std::io::Cursor::new(written.clone()); let iv = Iv::new_ctr(); - let encrypter = EncrypterWriter::new(writable_text, method, &key, iv).unwrap(); + let encrypter = + EncrypterWriter::new(DecoratedCursor::new(written.clone(), 1), method, &key, iv) + .unwrap(); let mut decrypter = DecrypterWriter::new(encrypter, method, &key, iv).unwrap(); // First write full data. assert_eq!(decrypter.seek(SeekFrom::Start(0)).unwrap(), 0); - assert_eq!(decrypter.write(&plaintext).unwrap(), plaintext.len()); + decrypter.write_all(&plaintext).unwrap(); // Then overwrite specific locations. for offset in offsets { for size in sizes { @@ -661,10 +752,9 @@ mod tests { offset as u64 ); let size = std::cmp::min(plaintext.len().saturating_sub(offset), size); - assert_eq!( - decrypter.write(&plaintext[offset..offset + size]).unwrap(), - size - ); + decrypter + .write_all(&plaintext[offset..offset + size]) + .unwrap(); } } let written = decrypter @@ -677,33 +767,8 @@ mod tests { } } - struct MockCursorReader { - cursor: Cursor>, - read_maxsize_once: usize, - } - - impl MockCursorReader { - fn new(buff: &mut [u8], size_once: usize) -> MockCursorReader { - Self { - cursor: Cursor::new(buff.to_vec()), - read_maxsize_once: size_once, - } - } - } - - impl AsyncRead for MockCursorReader { - fn poll_read( - mut self: Pin<&mut Self>, - _cx: &mut Context<'_>, - buf: &mut [u8], - ) -> Poll> { - let len = min(self.read_maxsize_once, buf.len()); - let r = self.cursor.read(&mut buf[..len]).unwrap(); - Poll::Ready(IoResult::Ok(r)) - } - } - async fn test_poll_read() { + use futures::AsyncReadExt; let methods = [ EncryptionMethod::Plaintext, EncryptionMethod::Aes128Ctr, @@ -720,38 +785,39 @@ mod tests { // encrypt plaintext into encrypt_text let read_once = 16; let mut encrypt_reader = EncrypterReader::new( - MockCursorReader::new(&mut plain_text[..], read_once), + DecoratedCursor::new(plain_text.clone(), read_once), method, &key[..], iv, ) .unwrap(); - let mut encrypt_text = [0; 20480]; + let mut encrypt_text = vec![0; 20480]; let mut encrypt_read_len = 0; loop { - let read_len = encrypt_reader - .read(&mut encrypt_text[encrypt_read_len..]) - .await - .unwrap(); + let read_len = + AsyncReadExt::read(&mut encrypt_reader, &mut encrypt_text[encrypt_read_len..]) + .await + .unwrap(); if read_len == 0 { break; } encrypt_read_len += read_len; } + encrypt_text.truncate(encrypt_read_len); if method == EncryptionMethod::Plaintext { - assert_eq!(encrypt_text[..encrypt_read_len], plain_text); + assert_eq!(encrypt_text, plain_text); } else { - assert_ne!(encrypt_text[..encrypt_read_len], plain_text); + assert_ne!(encrypt_text, plain_text); } // decrypt encrypt_text into decrypt_text - let mut decrypt_text = [0; 20480]; + let mut decrypt_text = vec![0; 20480]; let mut decrypt_read_len = 0; let read_once = 20; let mut decrypt_reader = DecrypterReader::new( - MockCursorReader::new(&mut encrypt_text[..encrypt_read_len], read_once), + DecoratedCursor::new(encrypt_text.clone(), read_once), method, &key[..], iv, @@ -759,17 +825,18 @@ mod tests { .unwrap(); loop { - let read_len = decrypt_reader - .read(&mut decrypt_text[decrypt_read_len..]) - .await - .unwrap(); + let read_len = + AsyncReadExt::read(&mut decrypt_reader, &mut decrypt_text[decrypt_read_len..]) + .await + .unwrap(); if read_len == 0 { break; } decrypt_read_len += read_len; } - assert_eq!(decrypt_text[..decrypt_read_len], plain_text); + decrypt_text.truncate(decrypt_read_len); + assert_eq!(decrypt_text, plain_text); } } From 5714e06a6703d1e349aa49d9c89f89dec62505d0 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 26 Jul 2023 02:37:33 +0800 Subject: [PATCH 0814/1149] resource_control: reset the resource statistics when limiter is recreated (#15186) ref tikv/tikv#14900 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/future.rs | 8 +-- .../resource_control/src/resource_group.rs | 17 ++++- .../resource_control/src/resource_limiter.rs | 26 +++++-- components/resource_control/src/worker.rs | 71 +++++++++++++++++-- 4 files changed, 102 insertions(+), 20 deletions(-) diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index a22433638b8..080c90c9f2d 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -266,7 +266,7 @@ mod tests { .name_prefix("test") .build_future_pool(); - let resource_limiter = Arc::new(ResourceLimiter::new(f64::INFINITY, 1000.0)); + let resource_limiter = Arc::new(ResourceLimiter::new(f64::INFINITY, 1000.0, 0)); fn spawn_and_wait(pool: &FuturePool, f: F, limiter: Arc) where @@ -285,7 +285,7 @@ mod tests { loop { i += 1; spawn_and_wait(&pool, empty(), resource_limiter.clone()); - stats = resource_limiter.get_limiter(Io).get_statistics(); + stats = resource_limiter.get_limit_statistics(Io); assert_eq!(stats.total_consumed, i * 150); if stats.total_wait_dur_us > 0 { break; @@ -294,7 +294,7 @@ mod tests { let start = Instant::now(); spawn_and_wait(&pool, empty(), resource_limiter.clone()); - let new_stats = resource_limiter.get_limiter(Io).get_statistics(); + let new_stats = resource_limiter.get_limit_statistics(Io); let delta = new_stats - stats; let dur = start.saturating_elapsed(); assert_eq!(delta.total_consumed, 150); @@ -306,7 +306,7 @@ mod tests { { fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); spawn_and_wait(&pool, empty(), resource_limiter.clone()); - assert_eq!(resource_limiter.get_limiter(Io).get_statistics(), new_stats); + assert_eq!(resource_limiter.get_limit_statistics(Io), new_stats); fail::remove("failed_to_get_thread_io_bytes_stats"); } } diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index ce5d3f20608..faad2150923 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -60,6 +60,9 @@ pub enum ResourceConsumeType { pub struct ResourceGroupManager { pub(crate) resource_groups: DashMap, registry: RwLock>>, + // auto incremental version generator used for mark the background + // resource limiter has changed. + version_generator: AtomicU64, } impl Default for ResourceGroupManager { @@ -67,6 +70,7 @@ impl Default for ResourceGroupManager { let manager = Self { resource_groups: Default::default(), registry: Default::default(), + version_generator: AtomicU64::new(0), }; // init the default resource group by default. @@ -123,19 +127,26 @@ impl ResourceGroupManager { .resource_groups .get(&rg.name) .and_then(|g| g.limiter.clone()); - let limiter = Self::build_resource_limiter(&rg, prev_limiter); + let limiter = self.build_resource_limiter(&rg, prev_limiter); self.resource_groups .insert(group_name, ResourceGroup::new(rg, limiter)); } fn build_resource_limiter( + &self, rg: &PbResourceGroup, old_limiter: Option>, ) -> Option> { if !rg.get_background_settings().get_job_types().is_empty() { - old_limiter - .or_else(|| Some(Arc::new(ResourceLimiter::new(f64::INFINITY, f64::INFINITY)))) + old_limiter.or_else(|| { + let version = self.version_generator.fetch_add(1, Ordering::Relaxed); + Some(Arc::new(ResourceLimiter::new( + f64::INFINITY, + f64::INFINITY, + version, + ))) + }) } else { None } diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 89972b1ecc1..f20e60640fc 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -26,6 +26,7 @@ impl fmt::Debug for ResourceType { } pub struct ResourceLimiter { + version: u64, limiters: [QuotaLimiter; ResourceType::COUNT], } @@ -36,10 +37,11 @@ impl std::fmt::Debug for ResourceLimiter { } impl ResourceLimiter { - pub fn new(cpu_limit: f64, io_limit: f64) -> Self { + pub fn new(cpu_limit: f64, io_limit: f64, version: u64) -> Self { let cpu_limiter = QuotaLimiter::new(cpu_limit); let io_limiter = QuotaLimiter::new(io_limit); Self { + version, limiters: [cpu_limiter, io_limiter], } } @@ -55,6 +57,15 @@ impl ResourceLimiter { pub(crate) fn get_limiter(&self, ty: ResourceType) -> &QuotaLimiter { &self.limiters[ty as usize] } + + pub(crate) fn get_limit_statistics(&self, ty: ResourceType) -> GroupStatistics { + let (total_consumed, total_wait_dur_us) = self.limiters[ty as usize].get_statistics(); + GroupStatistics { + version: self.version, + total_consumed, + total_wait_dur_us, + } + } } pub(crate) struct QuotaLimiter { @@ -83,11 +94,11 @@ impl QuotaLimiter { self.limiter.set_speed_limit(limit); } - pub fn get_statistics(&self) -> GroupStatistics { - GroupStatistics { - total_consumed: self.limiter.total_bytes_consumed() as u64, - total_wait_dur_us: self.total_wait_dur_us.load(Ordering::Relaxed), - } + fn get_statistics(&self) -> (u64, u64) { + ( + self.limiter.total_bytes_consumed() as u64, + self.total_wait_dur_us.load(Ordering::Relaxed), + ) } fn consume(&self, value: u64) -> Duration { @@ -105,6 +116,7 @@ impl QuotaLimiter { #[derive(Default, Clone, PartialEq, Eq, Copy, Debug)] pub struct GroupStatistics { + pub version: u64, pub total_consumed: u64, pub total_wait_dur_us: u64, } @@ -113,6 +125,7 @@ impl std::ops::Sub for GroupStatistics { type Output = Self; fn sub(self, rhs: Self) -> Self::Output { Self { + version: self.version, total_consumed: self.total_consumed.saturating_sub(rhs.total_consumed), total_wait_dur_us: self.total_wait_dur_us.saturating_sub(rhs.total_wait_dur_us), } @@ -124,6 +137,7 @@ impl std::ops::Div for GroupStatistics { fn div(self, rhs: f64) -> Self::Output { Self { + version: self.version, total_consumed: (self.total_consumed as f64 / rhs) as u64, total_wait_dur_us: (self.total_wait_dur_us as f64 / rhs) as u64, } diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 095fab34816..b58d37cd13a 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -1,6 +1,12 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{array, collections::HashMap, io::Result as IoResult, sync::Arc, time::Duration}; +use std::{ + array, + collections::{HashMap, HashSet}, + io::Result as IoResult, + sync::Arc, + time::Duration, +}; use file_system::{fetch_io_bytes, IoBytes, IoType}; use strum::EnumCount; @@ -143,6 +149,15 @@ impl GroupQuotaAdjustWorker { self.do_adjust(ResourceType::Cpu, dur_secs, &mut background_groups); self.do_adjust(ResourceType::Io, dur_secs, &mut background_groups); + + // clean up deleted group stats + if self.prev_stats_by_group[0].len() != background_groups.len() { + let name_set: HashSet<_> = + HashSet::from_iter(background_groups.iter().map(|g| &g.name)); + for stat_map in &mut self.prev_stats_by_group { + stat_map.retain(|k, _v| !name_set.contains(k)); + } + } } fn do_adjust( @@ -173,12 +188,18 @@ impl GroupQuotaAdjustWorker { let mut has_wait = false; for g in bg_group_stats.iter_mut() { total_ru_quota += g.ru_quota; - let total_stats = g.limiter.get_limiter(resource_type).get_statistics(); - let stats_per_sec = (total_stats - - self.prev_stats_by_group[resource_type as usize] - .insert(g.name.clone(), total_stats) - .unwrap_or_default()) - / dur_secs; + let total_stats = g.limiter.get_limit_statistics(resource_type); + let last_stats = self.prev_stats_by_group[resource_type as usize] + .insert(g.name.clone(), total_stats) + .unwrap_or_default(); + // version changes means this is a brand new limiter, so no need to sub the old + // statistics. + let stats_delta = if total_stats.version == last_stats.version { + total_stats - last_stats + } else { + total_stats + }; + let stats_per_sec = stats_delta / dur_secs; background_consumed_total += stats_per_sec.total_consumed as f64; g.stats_per_sec = stats_per_sec; if stats_per_sec.total_wait_dur_us > 0 { @@ -432,5 +453,41 @@ mod tests { worker.adjust_quota(); check_limiter(&limiter, 2.4, 3600.0); check_limiter(&bg_limiter, 2.1, 3600.0); + + let bg = new_resource_group_ru("background".into(), 1000, 15); + resource_ctl.add_resource_group(bg); + + let new_bg = + new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); + resource_ctl.add_resource_group(new_bg); + let new_bg_limiter = resource_ctl + .get_resource_limiter("background", "br") + .unwrap(); + assert_ne!(&*bg_limiter as *const _, &*new_bg_limiter as *const _); + assert!( + new_bg_limiter + .get_limit_statistics(ResourceType::Cpu) + .version + > bg_limiter.get_limit_statistics(ResourceType::Cpu).version + ); + let cpu_stats = new_bg_limiter.get_limit_statistics(ResourceType::Cpu); + assert_eq!(cpu_stats.total_consumed, 0); + assert_eq!(cpu_stats.total_wait_dur_us, 0); + let io_stats = new_bg_limiter.get_limit_statistics(ResourceType::Io); + assert_eq!(io_stats.total_consumed, 0); + assert_eq!(io_stats.total_wait_dur_us, 0); + + reset_quota(&mut worker, 0.0, 0.0, Duration::from_secs(1)); + worker.adjust_quota(); + check_limiter(&limiter, 4.8, 6000.0); + check_limiter(&new_bg_limiter, 2.4, 3000.0); + + reset_quota(&mut worker, 6.0, 5000.0, Duration::from_secs(1)); + limiter.consume(Duration::from_millis(1200), 1200); + new_bg_limiter.consume(Duration::from_millis(1800), 1800); + + worker.adjust_quota(); + check_limiter(&limiter, 2.4, 3600.0); + check_limiter(&new_bg_limiter, 2.1, 3600.0); } } From 93e439fbd3db12ca0e4b5afa2cba028f3c052ca7 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 26 Jul 2023 09:43:33 +0800 Subject: [PATCH 0815/1149] coprocessor: follower read response carry with the bucket's version (#15043) close tikv/tikv#15182 leader will sync the bucket version by extra message. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/bucket.rs | 58 ++++++++++++++++++- .../raftstore-v2/src/operation/ready/mod.rs | 4 ++ components/raftstore/src/store/fsm/peer.rs | 57 ++++++++++++++++-- components/raftstore/src/store/worker/read.rs | 6 ++ .../src/store/worker/split_controller.rs | 4 +- components/test_raftstore-v2/src/cluster.rs | 19 ++++-- components/test_storage/src/util.rs | 28 +++++++++ src/coprocessor/tracker.rs | 1 + tests/failpoints/cases/test_coprocessor.rs | 50 +++++++++++++++- tests/failpoints/cases/test_import_service.rs | 15 ++--- 10 files changed, 222 insertions(+), 20 deletions(-) diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 6bb5514c0a1..de4abb76712 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -5,7 +5,10 @@ use std::sync::Arc; use engine_traits::{KvEngine, RaftEngine}; -use kvproto::metapb::{self, RegionEpoch}; +use kvproto::{ + metapb::{self, RegionEpoch}, + raft_serverpb::{ExtraMessageType, RaftMessage, RefreshBuckets}, +}; use pd_client::{BucketMeta, BucketStat}; use raftstore::{ coprocessor::RegionChangeEvent, @@ -141,7 +144,7 @@ impl BucketStatsInfo { impl Peer { #[inline] - pub fn on_refresh_region_buckets( + pub fn on_refresh_region_buckets( &mut self, store_ctx: &mut StoreContext, region_epoch: RegionEpoch, @@ -299,6 +302,57 @@ impl Peer { if let Some(apply_scheduler) = self.apply_scheduler() { apply_scheduler.send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } + let version = region_buckets.meta.version; + // Notify followers to flush their relevant memtables + let peers = self.region().get_peers().to_vec(); + if !self.is_leader() { + return; + } + for p in peers { + if p == *self.peer() || p.is_witness { + continue; + } + let mut msg = RaftMessage::default(); + msg.set_region_id(self.region_id()); + msg.set_from_peer(self.peer().clone()); + msg.set_to_peer(p.clone()); + msg.set_region_epoch(self.region().get_region_epoch().clone()); + let extra_msg = msg.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgRefreshBuckets); + let mut refresh_buckets = RefreshBuckets::new(); + refresh_buckets.set_version(version); + extra_msg.set_refresh_buckets(refresh_buckets); + self.send_raft_message(store_ctx, msg); + } + } + + pub fn on_msg_refresh_buckets( + &mut self, + store_ctx: &mut StoreContext, + msg: &RaftMessage, + ) { + // leader should not receive this message + if self.is_leader() { + return; + } + let extra_msg = msg.get_extra_msg(); + let version = extra_msg.get_refresh_buckets().get_version(); + let region_epoch = msg.get_region_epoch(); + + let meta = BucketMeta { + region_id: self.region_id(), + version, + region_epoch: region_epoch.clone(), + keys: vec![], + sizes: vec![], + }; + + let mut store_meta = store_ctx.store_meta.lock().unwrap(); + if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { + reader + .0 + .update(ReadProgress::region_buckets(Arc::new(meta))); + } } #[inline] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 40c7305455d..f77db54c5af 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -336,6 +336,10 @@ impl Peer { ); return; } + ExtraMessageType::MsgRefreshBuckets => { + self.on_msg_refresh_buckets(ctx, &msg); + return; + } _ => (), } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 60ab1809688..7c037edf029 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -36,7 +36,7 @@ use kvproto::{ }, raft_serverpb::{ ExtraMessage, ExtraMessageType, MergeState, PeerState, RaftMessage, RaftSnapshotData, - RaftTruncatedState, RegionLocalState, + RaftTruncatedState, RefreshBuckets, RegionLocalState, }, replication_modepb::{DrAutoSyncState, ReplicationMode}, }; @@ -2026,6 +2026,11 @@ where self.fsm.peer.region() } + #[inline] + fn peer(&self) -> &metapb::Peer { + &self.fsm.peer.peer + } + #[inline] fn store_id(&self) -> u64 { self.fsm.peer.peer.get_store_id() @@ -2831,9 +2836,8 @@ where } } // It's v2 only message and ignore does no harm. - ExtraMessageType::MsgGcPeerResponse - | ExtraMessageType::MsgFlushMemtable - | ExtraMessageType::MsgRefreshBuckets => (), + ExtraMessageType::MsgGcPeerResponse | ExtraMessageType::MsgFlushMemtable => (), + ExtraMessageType::MsgRefreshBuckets => self.on_msg_refresh_buckets(msg), } } @@ -6018,14 +6022,35 @@ where RegionChangeEvent::UpdateBuckets(buckets_count), self.fsm.peer.get_role(), ); - let old_region_buckets = self.fsm.peer.region_buckets.replace(region_buckets); + let old_region_buckets: Option = + self.fsm.peer.region_buckets.replace(region_buckets); self.fsm.peer.last_region_buckets = old_region_buckets; let mut store_meta = self.ctx.store_meta.lock().unwrap(); + let version = self.fsm.peer.region_buckets.as_ref().unwrap().meta.version; if let Some(reader) = store_meta.readers.get_mut(&self.fsm.region_id()) { reader.update(ReadProgress::region_buckets( self.fsm.peer.region_buckets.as_ref().unwrap().meta.clone(), )); } + + // Notify followers to refresh their buckets version + if self.fsm.peer.is_leader() { + let peers = self.region().get_peers().to_vec(); + for p in peers { + if &p == self.peer() || p.is_witness { + continue; + } + let mut extra_msg = ExtraMessage::default(); + extra_msg.set_type(ExtraMessageType::MsgRefreshBuckets); + let mut refresh_buckets = RefreshBuckets::new(); + refresh_buckets.set_version(version); + extra_msg.set_refresh_buckets(refresh_buckets); + self.fsm + .peer + .send_extra_message(extra_msg, &mut self.ctx.trans, &p); + } + } + debug!( "finished on_refresh_region_buckets"; "region_id" => self.fsm.region_id(), @@ -6040,6 +6065,28 @@ where ); } + pub fn on_msg_refresh_buckets(&mut self, msg: RaftMessage) { + // leader should not receive this message + if self.fsm.peer.is_leader() { + return; + } + let version = msg.get_extra_msg().get_refresh_buckets().get_version(); + let region_epoch = msg.get_region_epoch().clone(); + + let meta = BucketMeta { + region_id: self.region_id(), + version, + region_epoch, + keys: vec![], + sizes: vec![], + }; + + let mut store_meta = self.ctx.store_meta.lock().unwrap(); + if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { + reader.update(ReadProgress::region_buckets(Arc::new(meta))); + } + } + fn on_compaction_declined_bytes(&mut self, declined_bytes: u64) { self.fsm.peer.compaction_declined_bytes += declined_bytes; if self.fsm.peer.compaction_declined_bytes >= self.ctx.cfg.region_split_check_diff().0 { diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index ced50a9dc52..da4a9fb8376 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -499,6 +499,11 @@ impl ReadDelegate { self.leader_lease = leader_lease; } Progress::RegionBuckets(bucket_meta) => { + if let Some(meta) = &self.bucket_meta { + if meta.version >= bucket_meta.version { + return; + } + } self.bucket_meta = Some(bucket_meta); } Progress::WaitData(wait_data) => { @@ -561,6 +566,7 @@ impl ReadDelegate { pub fn check_stale_read_safe(&self, read_ts: u64) -> std::result::Result<(), RaftCmdResponse> { let safe_ts = self.read_progress.safe_ts(); + fail_point!("skip_check_stale_read_safe", |_| Ok(())); if safe_ts >= read_ts { return Ok(()); } diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 6d556d1c283..d432f264e01 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -450,7 +450,9 @@ impl ReadStats { .or_insert_with(|| RegionInfo::new(num)); region_info.flow.add(write); region_info.flow.add(data); - if let Some(buckets) = buckets { + // the bucket of the follower only have the version info and not needs to be + // recorded the hot bucket. + if let Some(buckets) = buckets && !buckets.sizes.is_empty() { let bucket_stat = self .region_buckets .entry(region_id) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index edb95c36570..c70d72fce24 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1673,12 +1673,23 @@ impl, EK: KvEngine> Cluster { pub fn refresh_region_bucket_keys( &mut self, - _region: &metapb::Region, - _buckets: Vec, - _bucket_ranges: Option>, + region: &metapb::Region, + buckets: Vec, + bucket_ranges: Option>, _expect_buckets: Option, ) -> u64 { - unimplemented!() + let leader = self.leader_of_region(region.get_id()).unwrap(); + let router = self.sim.rl().get_router(leader.get_store_id()).unwrap(); + let refresh_buckets_msg = PeerMsg::RefreshRegionBuckets { + region_epoch: region.get_region_epoch().clone(), + buckets, + bucket_ranges, + }; + + if let Err(e) = router.send(region.get_id(), refresh_buckets_msg) { + panic!("router send refresh buckets msg failed, error: {:?}", e,); + } + 0 } pub fn send_half_split_region_message( diff --git a/components/test_storage/src/util.rs b/components/test_storage/src/util.rs index 2b7d80ea013..e91125ba001 100644 --- a/components/test_storage/src/util.rs +++ b/components/test_storage/src/util.rs @@ -11,6 +11,13 @@ use super::*; macro_rules! prepare_raft_engine { ($cluster:expr, $key:expr) => {{ $cluster.run(); + leader_raft_engine!($cluster, $key) + }}; +} + +#[macro_export] +macro_rules! leader_raft_engine { + ($cluster:expr, $key:expr) => {{ // make sure leader has been elected. assert_eq!($cluster.must_get(b""), None); let region = $cluster.get_region($key.as_bytes()); @@ -24,6 +31,27 @@ macro_rules! prepare_raft_engine { }}; } +#[macro_export] +macro_rules! follower_raft_engine { + ($cluster:expr, $key:expr) => {{ + let mut ret = vec![]; + let region = $cluster.get_region($key.as_bytes()); + let leader = $cluster.leader_of_region(region.get_id()).unwrap(); + for peer in ®ion.peers { + if peer.get_id() != leader.get_id() { + let mut ctx = Context::default(); + ctx.set_stale_read(true); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(peer.clone()); + let engine = $cluster.sim.rl().storages[&peer.get_id()].clone(); + ret.push((engine, ctx)); + } + } + ret + }}; +} + pub fn new_raft_engine( count: usize, key: &str, diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 005d97dd4b5..18eaa0b6e98 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -476,6 +476,7 @@ mod tests { 116, 128, 0, 255, 255, 255, 255, 255, 255, 254, 0, 0, 0, 0, 0, 0, 0, 248, ], ]; + bucket.sizes = vec![10]; track.buckets = Some(Arc::new(bucket)); let mut stat = Statistics::default(); diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 84e11d91c61..2fa3525eba2 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -1,15 +1,17 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::{sync::Arc, thread, time::Duration}; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ + coprocessor::Request, kvrpcpb::{Context, IsolationLevel}, tikvpb::TikvClient, }; use more_asserts::{assert_ge, assert_le}; use protobuf::Message; +use raftstore::store::Bucket; use test_coprocessor::*; use test_raftstore_macro::test_case; use test_storage::*; @@ -421,3 +423,49 @@ fn test_read_index_lock_checking_on_follower() { resp ); } + +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_follower_buckets() { + let mut cluster = new_cluster(0, 3); + cluster.run(); + fail::cfg("skip_check_stale_read_safe", "return()").unwrap(); + let product = ProductTable::new(); + let (raft_engine, ctx) = leader_raft_engine!(cluster, ""); + let (_, endpoint, _) = + init_data_with_engine_and_commit(ctx.clone(), raft_engine, &product, &[], true); + + let mut req = DagSelect::from(&product).build_with(ctx, &[0]); + let resp = handle_request(&endpoint, req.clone()); + assert_eq!(resp.get_latest_buckets_version(), 0); + + let mut bucket_key = product.get_record_range_all().get_start().to_owned(); + bucket_key.push(0); + let region = cluster.get_region(&bucket_key); + let bucket = Bucket { + keys: vec![bucket_key], + size: 1024, + }; + + cluster.refresh_region_bucket_keys(®ion, vec![bucket], None, None); + thread::sleep(Duration::from_millis(1000)); + let wait_refresh_buckets = |endpoint, req: Request, old_buckets_ver| { + let mut resp = Default::default(); + for _ in 0..10 { + resp = handle_request(&endpoint, req.clone()); + if resp.get_latest_buckets_version() != old_buckets_ver { + break; + } + thread::sleep(Duration::from_millis(100)); + } + assert_ne!(resp.get_latest_buckets_version(), old_buckets_ver); + }; + wait_refresh_buckets(endpoint, req.clone(), 0); + for (engine, ctx) in follower_raft_engine!(cluster, "") { + req.set_context(ctx.clone()); + let (_, endpoint, _) = + init_data_with_engine_and_commit(ctx.clone(), engine, &product, &[], true); + wait_refresh_buckets(endpoint, req.clone(), 0); + } + fail::remove("skip_check_stale_read_safe"); +} diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index f1314694e64..a2487456108 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -296,19 +296,20 @@ fn test_delete_sst_v2_after_epoch_stale() { // delete sts if the region epoch is stale. let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); - let region = cluster.get_region(b"zk10"); - pd_client.must_split_region( - region, - kvproto::pdpb::CheckPolicy::Usekey, - vec![b"random_key1".to_vec()], - ); let (tx, rx) = channel::<()>(); let tx = Arc::new(Mutex::new(tx)); fail::cfg_callback("on_cleanup_import_sst_schedule", move || { tx.lock().unwrap().send(()).unwrap(); }) .unwrap(); - rx.recv_timeout(std::time::Duration::from_millis(1000)) + let region = cluster.get_region(b"zk10"); + pd_client.must_split_region( + region, + kvproto::pdpb::CheckPolicy::Usekey, + vec![b"zk10".to_vec()], + ); + + rx.recv_timeout(std::time::Duration::from_millis(100)) .unwrap(); std::thread::sleep(std::time::Duration::from_millis(100)); assert_eq!(0, sst_file_count(&cluster.paths)); From 6dd40ad67b476084d6697ad246970bfd5a023dbb Mon Sep 17 00:00:00 2001 From: ekexium Date: Wed, 26 Jul 2023 12:18:04 +0800 Subject: [PATCH 0816/1149] metrics: add min_safe_ts, min_safe_ts_region and min_safe_ts_gap (#15118) ref tikv/tikv#15082 metrics: add min_safe_ts, min_safe_ts_region and min_safe_ts_gap Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/advance.rs | 10 +- components/resolved_ts/src/endpoint.rs | 71 +- components/resolved_ts/src/metrics.rs | 25 +- components/resolved_ts/src/resolver.rs | 2 +- metrics/grafana/tikv_details.json | 3326 +++++++++++++----------- 5 files changed, 1867 insertions(+), 1567 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 287eb93f841..4428ed01a35 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -45,7 +45,7 @@ use txn_types::TimeStamp; use crate::{endpoint::Task, metrics::*}; -const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s +pub(crate) const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; const DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS: usize = 4096; @@ -58,6 +58,9 @@ pub struct AdvanceTsWorker { /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. concurrency_manager: ConcurrencyManager, + + // cache the last pd tso, used to approximate the next timestamp w/o an actual TSO RPC + pub(crate) last_pd_tso: Arc>>, } impl AdvanceTsWorker { @@ -81,6 +84,7 @@ impl AdvanceTsWorker { advance_ts_interval, timer: SteadyTimer::default(), concurrency_manager, + last_pd_tso: Arc::new(std::sync::Mutex::new(None)), } } } @@ -103,9 +107,13 @@ impl AdvanceTsWorker { self.advance_ts_interval, )); + let last_pd_tso = self.last_pd_tso.clone(); let fut = async move { // Ignore get tso errors since we will retry every `advdance_ts_interval`. let mut min_ts = pd_client.get_tso().await.unwrap_or_default(); + if let Ok(mut last_pd_tso) = last_pd_tso.try_lock() { + *last_pd_tso = Some((min_ts, Instant::now())); + } // Sync with concurrency manager so that it can work correctly when // optimizations like async commit is enabled. diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 3ce38874244..6d4ebf9986b 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -35,13 +35,16 @@ use tokio::sync::Notify; use txn_types::{Key, TimeStamp}; use crate::{ - advance::{AdvanceTsWorker, LeadershipResolver}, + advance::{AdvanceTsWorker, LeadershipResolver, DEFAULT_CHECK_LEADER_TIMEOUT_DURATION}, cmd::{ChangeLog, ChangeRow}, metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, }; +/// grace period for logging safe-ts and resolved-ts gap in slow log +const SLOW_LOG_GRACE_PERIOD_MS: u64 = 1000; + enum ResolverStatus { Pending { tracked_index: u64, @@ -806,9 +809,18 @@ where let store_id = self.get_or_init_store_id(); let (mut oldest_ts, mut oldest_region, mut zero_ts_count) = (u64::MAX, 0, 0); let (mut oldest_leader_ts, mut oldest_leader_region) = (u64::MAX, 0); + let (mut oldest_safe_ts, mut oldest_safe_ts_region) = (u64::MAX, 0); self.region_read_progress.with(|registry| { for (region_id, read_progress) in registry { + let safe_ts = read_progress.safe_ts(); + if safe_ts > 0 && safe_ts < oldest_safe_ts { + oldest_safe_ts = safe_ts; + oldest_safe_ts_region = *region_id; + } + let (leader_info, leader_store_id) = read_progress.dump_leader_info(); + // this is maximum resolved-ts pushed to region_read_progress, namely candidates + // of safe_ts. It may not be the safe_ts yet let ts = leader_info.get_read_state().get_safe_ts(); if ts == 0 { zero_ts_count += 1; @@ -846,19 +858,62 @@ where } } } + // approximate a TSO from PD. It is better than local timestamp when clock skew + // exists. + let now: u64 = self + .advance_worker + .last_pd_tso + .try_lock() + .map(|opt| { + opt.map(|(pd_ts, instant)| { + pd_ts.physical() + instant.saturating_elapsed().as_millis() as u64 + }) + .unwrap_or_else(|| TimeStamp::physical_now()) + }) + .unwrap_or_else(|_| TimeStamp::physical_now()); + + RTS_MIN_SAFE_TS.set(oldest_safe_ts as i64); + RTS_MIN_SAFE_TS_REGION.set(oldest_safe_ts_region as i64); + let safe_ts_gap = now.saturating_sub(TimeStamp::from(oldest_safe_ts).physical()); + if safe_ts_gap + > self.cfg.advance_ts_interval.as_millis() + + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION.as_millis() as u64 + + SLOW_LOG_GRACE_PERIOD_MS + { + let mut lock_num = None; + let mut min_start_ts = None; + if let Some(ob) = self.regions.get(&oldest_safe_ts_region) { + min_start_ts = ob + .resolver + .locks() + .keys() + .next() + .cloned() + .map(TimeStamp::into_inner); + lock_num = Some(ob.resolver.locks_by_key.len()); + } + info!( + "the max gap of safe-ts is large"; + "gap" => safe_ts_gap, + "oldest safe-ts" => ?oldest_safe_ts, + "region id" => oldest_safe_ts_region, + "advance-ts-interval" => ?self.cfg.advance_ts_interval, + "lock num" => lock_num, + "min start ts" => min_start_ts, + ); + } + RTS_MIN_SAFE_TS_GAP.set(safe_ts_gap as i64); + RTS_MIN_RESOLVED_TS_REGION.set(oldest_region as i64); RTS_MIN_RESOLVED_TS.set(oldest_ts as i64); RTS_ZERO_RESOLVED_TS.set(zero_ts_count as i64); - RTS_MIN_RESOLVED_TS_GAP.set( - TimeStamp::physical_now().saturating_sub(TimeStamp::from(oldest_ts).physical()) as i64, - ); + RTS_MIN_RESOLVED_TS_GAP + .set(now.saturating_sub(TimeStamp::from(oldest_ts).physical()) as i64); RTS_MIN_LEADER_RESOLVED_TS_REGION.set(oldest_leader_region as i64); RTS_MIN_LEADER_RESOLVED_TS.set(oldest_leader_ts as i64); - RTS_MIN_LEADER_RESOLVED_TS_GAP.set( - TimeStamp::physical_now().saturating_sub(TimeStamp::from(oldest_leader_ts).physical()) - as i64, - ); + RTS_MIN_LEADER_RESOLVED_TS_GAP + .set(now.saturating_sub(TimeStamp::from(oldest_leader_ts).physical()) as i64); RTS_LOCK_HEAP_BYTES_GAUGE.set(lock_heap_size as i64); RTS_REGION_RESOLVE_STATUS_GAUGE_VEC diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 3ec35685c36..979da747fb1 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -38,7 +38,7 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_resolved_ts_gap_millis", - "The minimal (non-zero) resolved ts gap for observe regions" + "The minimal (non-zero) resolved ts gap for observed regions" ) .unwrap(); pub static ref RTS_RESOLVED_FAIL_ADVANCE_VEC: IntCounterVec = register_int_counter_vec!( @@ -66,22 +66,37 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_RESOLVED_TS: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_resolved_ts", - "The minimal (non-zero) resolved ts for observe regions" + "The minimal (non-zero) resolved ts for observed regions" + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS_REGION: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts_region", + "The region which has minimal safe ts" + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts", + "The minimal (non-zero) safe ts for observed regions" + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS_GAP: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts_gap_millis", + "The minimal (non-zero) safe ts gap for observed regions" ) .unwrap(); pub static ref RTS_ZERO_RESOLVED_TS: IntGauge = register_int_gauge!( "tikv_resolved_ts_zero_resolved_ts", - "The number of zero resolved ts for observe regions" + "The number of zero resolved ts for observed regions" ) .unwrap(); pub static ref RTS_LOCK_HEAP_BYTES_GAUGE: IntGauge = register_int_gauge!( "tikv_resolved_ts_lock_heap_bytes", - "Total bytes in memory of resolved-ts observe regions's lock heap" + "Total bytes in memory of resolved-ts observed regions's lock heap" ) .unwrap(); pub static ref RTS_REGION_RESOLVE_STATUS_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_resolved_ts_region_resolve_status", - "The status of resolved-ts observe regions", + "The status of resolved-ts observed regions", &["type"] ) .unwrap(); diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index d165c5cc05f..5e8f58e97e1 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -15,7 +15,7 @@ const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub struct Resolver { region_id: u64, // key -> start_ts - locks_by_key: HashMap, TimeStamp>, + pub(crate) locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, // The timestamps that guarantees no more commit will happen before. diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b025fd8bac0..060d2e83d01 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -14,7 +14,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "7.5.10" + "version": "7.5.11" }, { "type": "panel", @@ -70,7 +70,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1686724160525, + "iteration": 1689914850671, "links": [], "panels": [ { @@ -21249,7 +21249,7 @@ "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 30 }, "id": 2755, "panels": [ @@ -21775,7 +21775,7 @@ "h": 1, "w": 24, "x": 0, - "y": 29 + "y": 31 }, "id": 2758, "panels": [ @@ -23115,7 +23115,7 @@ "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 32 }, "id": 2759, "panels": [ @@ -24015,7 +24015,7 @@ "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 33 }, "id": 2760, "panels": [ @@ -24427,7 +24427,7 @@ "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 34 }, "id": 2757, "panels": [ @@ -25231,7 +25231,7 @@ "h": 1, "w": 24, "x": 0, - "y": 33 + "y": 35 }, "id": 3197, "panels": [ @@ -26325,7 +26325,7 @@ "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 36 }, "id": 2761, "panels": [ @@ -26722,7 +26722,7 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 37 }, "id": 2762, "panels": [ @@ -31462,7 +31462,7 @@ "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 38 }, "id": 12802, "panels": [ @@ -32511,7 +32511,7 @@ "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 39 }, "id": 3301, "panels": [ @@ -35520,7 +35520,7 @@ "h": 1, "w": 24, "x": 0, - "y": 38 + "y": 40 }, "id": 2820, "panels": [ @@ -36652,7 +36652,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 41 }, "id": 23763573235, "panels": [ @@ -38082,15 +38082,1756 @@ "type": "row" }, { - "collapsed": true, + "collapsed": false, "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 40 + "y": 42 }, "id": 8389, + "panels": [], + "title": "Resolved-TS", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": " \tThe CPU utilization of resolved ts worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 43 + }, + "hiddenSeries": false, + "id": 8385, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resolved TS Worker CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": " \tThe CPU utilization of advance ts worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 43 + }, + "hiddenSeries": false, + "id": 9162, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-tso", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Advance ts Worker CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": " \tThe CPU utilization of scan lock worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 43 + }, + "hiddenSeries": false, + "id": 9164, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-scan", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scan lock Worker CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "hiddenSeries": false, + "id": 8387, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max gap of resolved-ts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The gap between safe ts and current time", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "hiddenSeries": false, + "id": 23763573805, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max gap of safe-ts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The region that has minimal resolved ts", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "hiddenSeries": false, + "id": 23763572078, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Resolved TS Region", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The region that has minimal safe ts", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "hiddenSeries": false, + "id": 23763573804, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Safe TS Region", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed when handle a check leader request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 9168, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Check leader duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The gap between resolved ts of leaders and current time", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "hiddenSeries": false, + "id": 23763572077, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max gap of resolved-ts in region leaders", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Bucketed histogram of region count in a check leader request", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "hiddenSeries": false, + "id": 12308, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", + "refId": "A", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% CheckLeader request region count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The region that its leader has minimal resolved ts.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "hiddenSeries": false, + "id": 23763572079, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Leader Resolved TS Region", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 83 + }, + "hiddenSeries": false, + "id": 8379, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Lock heap size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The status of resolved-ts observe regions", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 83 + }, + "hiddenSeries": false, + "id": 8377, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Observe region status", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The count of fail to advance resolved-ts", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 91 + }, + "hiddenSeries": false, + "id": 9166, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}-{{reason}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Fail advance ts count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Bucketed histogram of the check leader request size", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 91 + }, + "hiddenSeries": false, + "id": 8383, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", + "refId": "A", + "step": 40 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{instance}}-check-num", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% CheckLeader request size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "Total bytes of pending commands in the channel", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 99 + }, + "hiddenSeries": false, + "id": 8381, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pending command size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 107 + }, + "id": 2763, "panels": [ { "aliasColors": {}, @@ -38098,34 +39839,28 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of resolved ts worker", - "editable": true, - "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, - "w": 8, + "h": 7, + "w": 12, "x": 0, - "y": 38 + "y": 44 }, "hiddenSeries": false, - "id": 8385, + "id": 23763573729, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, - "min": false, + "min": true, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -38139,7 +39874,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -38149,22 +39884,20 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", + "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "legendFormat": "{{type}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Resolved TS Worker CPU", + "title": "Allocator Stats", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -38179,1522 +39912,11 @@ }, "yaxes": [ { - "format": "percentunit", + "format": "decbytes", "label": null, "logBase": 1, "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of advance ts worker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 38 - }, - "hiddenSeries": false, - "id": 9162, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-tso", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Advance ts Worker CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of scan lock worker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 38 - }, - "hiddenSeries": false, - "id": 9164, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-scan", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Scan lock Worker CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved tso and current time", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 46 - }, - "hiddenSeries": false, - "id": 8387, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Max Resolved TS gap", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved tso of leaders and current time", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 46 - }, - "hiddenSeries": false, - "id": 23763572077, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Max Leader Resolved TS gap", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that has minimal resolved ts", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 54 - }, - "hiddenSeries": false, - "id": 23763572078, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Min Resolved TS Region", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that its leader has minimal resolved ts.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 54 - }, - "hiddenSeries": false, - "id": 23763572079, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Min Leader Resolved TS Region", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handle a check leader request", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 62 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 9168, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Check leader duration", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The status of resolved-ts observe regions", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 62 - }, - "hiddenSeries": false, - "id": 8377, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Observe region status", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of region count in a check leader request", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 70 - }, - "hiddenSeries": false, - "id": 12308, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "99% CheckLeader request region count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of fail to advance resolved-ts", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 70 - }, - "hiddenSeries": false, - "id": 9166, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{reason}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Fail advance ts count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 78 - }, - "hiddenSeries": false, - "id": 8379, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Lock heap size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of the check leader request size", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 78 - }, - "hiddenSeries": false, - "id": 8383, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{instance}}-check-num", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "99% CheckLeader request size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes of pending commands in the channel", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 86 - }, - "hiddenSeries": false, - "id": 8381, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pending command size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Resolved-TS", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 41 - }, - "id": 2763, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 44 - }, - "hiddenSeries": false, - "id": 23763573729, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Allocator Stats", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, + "min": null, "show": true }, { @@ -40031,7 +40253,7 @@ "h": 1, "w": 24, "x": 0, - "y": 42 + "y": 108 }, "id": 3922, "panels": [ @@ -42969,7 +43191,7 @@ "h": 1, "w": 24, "x": 0, - "y": 43 + "y": 109 }, "id": 4466, "panels": [ @@ -43542,7 +43764,7 @@ "h": 1, "w": 24, "x": 0, - "y": 44 + "y": 110 }, "id": 13016, "panels": [ From 9f9283ebaa5c79cb5100660d8803f2e242042987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 26 Jul 2023 15:12:03 +0800 Subject: [PATCH 0817/1149] snap_recovery: added some metrics (#15111) close tikv/tikv#15110 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 3 ++ components/snap_recovery/Cargo.toml | 4 ++ components/snap_recovery/src/lib.rs | 1 + components/snap_recovery/src/metrics.rs | 41 +++++++++++++++++++ .../src/region_meta_collector.rs | 3 ++ components/snap_recovery/src/services.rs | 39 +++++++++++++----- 6 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 components/snap_recovery/src/metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 9d59dc0fb40..e081bd06a8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5795,8 +5795,11 @@ dependencies = [ "grpcio", "keys", "kvproto", + "lazy_static", "log", "pd_client", + "prometheus", + "prometheus-static-metric", "protobuf", "raft_log_engine", "raftstore", diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index 4768759b852..985c7323af3 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -15,8 +15,11 @@ futures = { version = "0.3", features = ["executor"] } grpcio = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } +lazy_static = "1.4" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } pd_client = { workspace = true } +prometheus = { version = "0.13", default_features = false, features = ["nightly"] } +prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft_log_engine = { workspace = true } raftstore = { workspace = true } @@ -30,3 +33,4 @@ tikv_alloc = { workspace = true } tikv_util = { workspace = true } toml = "0.5" txn_types = { workspace = true } + diff --git a/components/snap_recovery/src/lib.rs b/components/snap_recovery/src/lib.rs index 2ed4a97c5d3..043cffb3c80 100644 --- a/components/snap_recovery/src/lib.rs +++ b/components/snap_recovery/src/lib.rs @@ -9,4 +9,5 @@ pub use init_cluster::{enter_snap_recovery_mode, start_recovery}; pub use services::RecoveryService; mod data_resolver; +mod metrics; mod region_meta_collector; diff --git a/components/snap_recovery/src/metrics.rs b/components/snap_recovery/src/metrics.rs new file mode 100644 index 00000000000..2999fe0798b --- /dev/null +++ b/components/snap_recovery/src/metrics.rs @@ -0,0 +1,41 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use lazy_static::*; +use prometheus::*; +use prometheus_static_metric::*; + +lazy_static! { + pub static ref REGION_EVENT_COUNTER: RegionEvent = register_static_int_counter_vec!( + RegionEvent, + "tikv_snap_restore_region_event", + "the total count of some events that each happened to one region. (But the counter counts all regions' events.)", + &["event"] + ) + .unwrap(); + + // NOTE: should we handle the concurrent case by adding a tid parameter? + pub static ref CURRENT_WAIT_APPLY_LEADER: IntGauge = register_int_gauge!( + "tikv_current_waiting_leader_apply", + "the current leader we are awaiting." + ).unwrap(); + + pub static ref CURRENT_WAIT_ELECTION_LEADER : IntGauge = register_int_gauge!( + "tikv_current_waiting_leader_election", + "the current leader we are awaiting." + ).unwrap(); + +} + +make_static_metric! { + pub label_enum RegionEventType { + collect_meta, + promote_to_leader, + keep_follower, + start_wait_leader_apply, + finish_wait_leader_apply, + } + + pub struct RegionEvent : IntCounter { + "event" => RegionEventType, + } +} diff --git a/components/snap_recovery/src/region_meta_collector.rs b/components/snap_recovery/src/region_meta_collector.rs index bb8f68dbc68..e3542d6691b 100644 --- a/components/snap_recovery/src/region_meta_collector.rs +++ b/components/snap_recovery/src/region_meta_collector.rs @@ -12,6 +12,8 @@ use kvproto::{ use thiserror::Error; use tikv_util::sys::thread::StdThreadBuildWrapper; +use crate::metrics::REGION_EVENT_COUNTER; + pub type Result = result::Result; #[allow(dead_code)] @@ -143,6 +145,7 @@ impl CollectWorker { // send to br let response = region_state.to_region_meta(); + REGION_EVENT_COUNTER.collect_meta.inc(); if let Err(e) = self.tx.unbounded_send(response) { warn!("send the region meta failure"; "err" => ?e); diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index 08201aebf3f..eb5c30a7ac2 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -35,7 +35,11 @@ use raftstore::{ use thiserror::Error; use tikv_util::sys::thread::{StdThreadBuildWrapper, ThreadBuildWrapper}; -use crate::{data_resolver::DataResolverManager, region_meta_collector::RegionMetaCollector}; +use crate::{ + data_resolver::DataResolverManager, + metrics::{CURRENT_WAIT_APPLY_LEADER, CURRENT_WAIT_ELECTION_LEADER, REGION_EVENT_COUNTER}, + region_meta_collector::RegionMetaCollector, +}; pub type Result = result::Result; @@ -203,7 +207,7 @@ impl RecoverData for RecoveryService { } .map(|res: Result<()>| match res { Ok(_) => { - info!("collect region meta done"); + debug!("collect region meta done"); } Err(e) => { error!("rcollect region meta failure"; "error" => ?e); @@ -229,7 +233,10 @@ impl RecoverData for RecoveryService { while let Some(req) = stream.next().await { let req = req.map_err(|e| eprintln!("rpc recv fail: {}", e)).unwrap(); if req.as_leader { + REGION_EVENT_COUNTER.promote_to_leader.inc(); leaders.push(req.region_id); + } else { + REGION_EVENT_COUNTER.keep_follower.inc(); } } @@ -242,8 +249,7 @@ impl RecoverData for RecoveryService { "err" => ?e); continue; } else { - info!("region starts to campaign"; - "region_id" => region_id); + debug!("region starts to campaign"; "region_id" => region_id); } let (tx, rx) = sync_channel(1); @@ -260,12 +266,15 @@ impl RecoverData for RecoveryService { rxs.push(Some(rx)); } + info!("send assign leader request done"; "count" => %leaders.len()); + // leader is campaign and be ensured as leader - for (_rid, rx) in leaders.iter().zip(rxs) { + for (rid, rx) in leaders.iter().zip(rxs) { if let Some(rx) = rx { + CURRENT_WAIT_ELECTION_LEADER.set(*rid as _); match rx.recv() { - Ok(_id) => { - info!("leader is assigned for region"); + Ok(id) => { + debug!("leader is assigned for region"; "region_id" => %id); } Err(e) => { error!("check leader failed"; "error" => ?e); @@ -273,14 +282,16 @@ impl RecoverData for RecoveryService { } } } + CURRENT_WAIT_ELECTION_LEADER.set(0); - info!("all region leader assigned done"); + info!("all region leader assigned done"; "count" => %leaders.len()); let now = Instant::now(); // wait apply to the last log let mut rx_apply = Vec::with_capacity(leaders.len()); for ®ion_id in &leaders { let (tx, rx) = sync_channel(1); + REGION_EVENT_COUNTER.start_wait_leader_apply.inc(); let wait_apply = SnapshotRecoveryWaitApplySyncer::new(region_id, tx.clone()); if let Err(e) = raft_router.significant_send( region_id, @@ -296,22 +307,25 @@ impl RecoverData for RecoveryService { } // leader apply to last log - for (_rid, rx) in leaders.iter().zip(rx_apply) { + for (rid, rx) in leaders.iter().zip(rx_apply) { if let Some(rx) = rx { + CURRENT_WAIT_APPLY_LEADER.set(*rid as _); match rx.recv() { Ok(region_id) => { - info!("leader apply to last log"; "error" => region_id); + debug!("leader apply to last log"; "region_id" => region_id); } Err(e) => { error!("leader failed to apply to last log"; "error" => ?e); } } + REGION_EVENT_COUNTER.finish_wait_leader_apply.inc(); } } + CURRENT_WAIT_APPLY_LEADER.set(0); info!( "all region leader apply to last log"; - "spent_time" => now.elapsed().as_secs(), + "spent_time" => now.elapsed().as_secs(), "count" => %leaders.len(), ); let mut resp = RecoverRegionResponse::default(); @@ -337,6 +351,9 @@ impl RecoverData for RecoveryService { info!("wait_apply start"); let task = async move { let now = Instant::now(); + // FIXME: this function will exit once the first region finished apply. + // BUT for the flashback resolve KV implementation, that is fine because the + // raft log stats is consistent. let (tx, rx) = sync_channel(1); RecoveryService::wait_apply_last(router, tx.clone()); match rx.recv() { From 5b3b36259391bcb92e418988316802f6806c499f Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 26 Jul 2023 15:57:04 +0800 Subject: [PATCH 0818/1149] Revert "RocksDB: change compaction-guard-min-output-file-size to 1MB" (#15211) close tikv/tikv#15177 Revert "RocksDB: change compaction-guard-min-output-file-size to 1MB" --- etc/config-template.toml | 4 ++-- src/config/mod.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index b9cde90e633..317336236d2 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -824,7 +824,7 @@ ## The lower bound of SST file size when compaction guard is enabled. The config prevent SST files ## being too small when compaction guard is enabled. -# compaction-guard-min-output-file-size = "1M" +# compaction-guard-min-output-file-size = "8M" ## The upper bound of SST file size when compaction guard is enabled. The config prevent SST files ## being too large when compaction guard is enabled. This config overrides target-file-size-base @@ -986,7 +986,7 @@ # dynamic-level-bytes = true # optimize-filters-for-hits = false # enable-compaction-guard = true -# compaction-guard-min-output-file-size = "1M" +# compaction-guard-min-output-file-size = "8M" # compaction-guard-max-output-file-size = "128M" # format-version = 2 # prepopulate-block-cache = "disabled" diff --git a/src/config/mod.rs b/src/config/mod.rs index 30f718eaa66..07580e07876 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -723,7 +723,7 @@ impl Default for DefaultCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(1), + compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Zstd, bottommost_zstd_compression_dict_size: 0, @@ -895,7 +895,7 @@ impl Default for WriteCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(1), + compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Zstd, bottommost_zstd_compression_dict_size: 0, @@ -1017,7 +1017,7 @@ impl Default for LockCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(1), + compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, @@ -1114,7 +1114,7 @@ impl Default for RaftCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(1), + compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, @@ -1659,7 +1659,7 @@ impl Default for RaftDefaultCfConfig { prop_keys_index_distance: DEFAULT_PROP_KEYS_INDEX_DISTANCE, enable_doubly_skiplist: true, enable_compaction_guard: None, - compaction_guard_min_output_file_size: ReadableSize::mb(1), + compaction_guard_min_output_file_size: ReadableSize::mb(8), compaction_guard_max_output_file_size: ReadableSize::mb(128), bottommost_level_compression: DBCompressionType::Disable, bottommost_zstd_compression_dict_size: 0, From 2a5adec17f6117ec70cedf366af26d8ebeb8d04b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 26 Jul 2023 19:43:34 +0800 Subject: [PATCH 0819/1149] snap-restore: added the config for temporarily disable check quorum (#15196) ref tikv/tikv#15122 Signed-off-by: hillium Co-authored-by: tonyxuqqi --- components/raftstore/src/store/config.rs | 11 +++++++++++ components/raftstore/src/store/peer.rs | 2 +- components/snap_recovery/src/init_cluster.rs | 8 ++++++++ tests/integrations/config/mod.rs | 1 + 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 827ddc72727..eca0b3c1ede 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -107,6 +107,16 @@ pub struct Config { #[online_config(skip)] pub raft_reject_transfer_leader_duration: ReadableDuration, + /// Whether to disable checking quorum for the raft group. This will make + /// leader lease unavailable. + /// It cannot be changed in the config file, the only way to change it is + /// programmatically change the config structure during bootstrapping + /// the cluster. + #[doc(hidden)] + #[serde(skip)] + #[online_config(skip)] + pub unsafe_disable_check_quorum: bool, + // Interval (ms) to check region whether need to be split or not. pub split_region_check_tick_interval: ReadableDuration, /// When size change of region exceed the diff since last check, it @@ -501,6 +511,7 @@ impl Default for Config { // TODO: make its value reasonable check_request_snapshot_interval: ReadableDuration::minutes(1), enable_v2_compatible_learner: false, + unsafe_disable_check_quorum: false, } } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index d4751d9ddfd..3ff09362c96 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -825,7 +825,7 @@ where max_size_per_msg: cfg.raft_max_size_per_msg.0, max_inflight_msgs: cfg.raft_max_inflight_msgs, applied: applied_index, - check_quorum: true, + check_quorum: !cfg.unsafe_disable_check_quorum, skip_bcast_commit: true, pre_vote: cfg.prevote, max_committed_size_per_ready: MAX_COMMITTED_SIZE_PER_READY, diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 46275e0c610..84a10cc1338 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -85,6 +85,14 @@ pub fn enter_snap_recovery_mode(config: &mut TikvConfig) { config.raft_store.max_snapshot_file_raw_size = ReadableSize::gb(1); config.raft_store.hibernate_regions = false; + // Disable prevote so it is possible to regenerate leaders. + config.raft_store.prevote = false; + // Because we have increased the election tick to inf, once there is a leader, + // the follower will believe it holds an eternal lease. So, once the leader + // reboots, the followers will reject to vote for it again. + // We need to disable the lease for avoiding that. + config.raft_store.unsafe_disable_check_quorum = true; + // disable auto compactions during the restore config.rocksdb.defaultcf.disable_auto_compactions = true; config.rocksdb.writecf.disable_auto_compactions = true; diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 579b95a75bb..801effcbb3d 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -267,6 +267,7 @@ fn test_serde_custom_tikv_config() { slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, enable_v2_compatible_learner: false, + unsafe_disable_check_quorum: false, }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { From 7e8ae8d1f90a912ada4ca9b852e5617f515bb4db Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 26 Jul 2023 21:33:05 +0800 Subject: [PATCH 0820/1149] raftstore-v2: fix paused checkpoint and other issues (#15174) close tikv/tikv#15059, fix tikv/tikv#15181, fix tikv/tikv#15209 - Fix checkpoint being paused forever. - Abort prepare merge if target is merging another. - Fix block-cache size is set to sum of cf's cache size by default. - Fix tablet being asked to destroy twice after commit merge. - Fix life.rs using the wrong fields of `CommitMergeRequest` to rollback. - Disable RocksDB stats dumping altogether. - Shutdown high priority pool manually before drop. - Add catch up logs into the prepare merge state machine to fix some corner case panics. Signed-off-by: tabokie Co-authored-by: tonyxuqqi --- Cargo.lock | 6 +- components/engine_rocks/src/checkpoint.rs | 10 +- components/raftstore-v2/src/batch/store.rs | 19 +- .../operation/command/admin/merge/commit.rs | 104 ++++++---- .../src/operation/command/admin/merge/mod.rs | 5 +- .../operation/command/admin/merge/prepare.rs | 95 ++++++++- .../operation/command/admin/merge/rollback.rs | 42 ++-- .../src/operation/command/admin/mod.rs | 2 +- components/raftstore-v2/src/operation/life.rs | 28 ++- .../src/router/response_channel.rs | 18 +- components/raftstore-v2/src/worker/tablet.rs | 84 ++++++-- .../tests/failpoints/test_merge.rs | 188 +++++++++++++++++- .../tests/integrations/cluster.rs | 8 +- .../tikv_util/src/yatp_pool/future_pool.rs | 5 + etc/config-template.toml | 11 +- src/config/mod.rs | 64 ++---- tests/integrations/config/mod.rs | 18 +- 17 files changed, 538 insertions(+), 169 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e081bd06a8a..84164fe44f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3105,7 +3105,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#782bfc8309f266da634267d207760362b362aa61" +source = "git+https://github.com/tikv/rust-rocksdb.git#3dee8778c2666030c174952db67f39356d60e1dc" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3124,7 +3124,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#782bfc8309f266da634267d207760362b362aa61" +source = "git+https://github.com/tikv/rust-rocksdb.git#3dee8778c2666030c174952db67f39356d60e1dc" dependencies = [ "bzip2-sys", "cc", @@ -5121,7 +5121,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#782bfc8309f266da634267d207760362b362aa61" +source = "git+https://github.com/tikv/rust-rocksdb.git#3dee8778c2666030c174952db67f39356d60e1dc" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_rocks/src/checkpoint.rs b/components/engine_rocks/src/checkpoint.rs index 0f86aa29945..250e50e0a45 100644 --- a/components/engine_rocks/src/checkpoint.rs +++ b/components/engine_rocks/src/checkpoint.rs @@ -42,7 +42,7 @@ impl Checkpointer for RocksEngineCheckpointer { #[cfg(test)] mod tests { - use engine_traits::{Checkpointable, Checkpointer, Peekable, SyncMutable, ALL_CFS}; + use engine_traits::{Checkpointable, Checkpointer, MiscExt, Peekable, SyncMutable, ALL_CFS}; use tempfile::tempdir; use crate::util::new_engine; @@ -55,6 +55,14 @@ mod tests { engine.put(b"key", b"value").unwrap(); let mut check_pointer = engine.new_checkpointer().unwrap(); + + engine.pause_background_work().unwrap(); + let path2 = dir.path().join("checkpoint"); + check_pointer + .create_at(path2.as_path(), None, 0) + .unwrap_err(); + engine.continue_background_work().unwrap(); + let path2 = dir.path().join("checkpoint"); check_pointer.create_at(path2.as_path(), None, 0).unwrap(); let engine2 = new_engine(path2.as_path().to_str().unwrap(), ALL_CFS).unwrap(); diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 847cb3ca4fe..506b9d531c0 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -375,6 +375,7 @@ impl StorePollerBuilder { trans: T, router: StoreRouter, schedulers: Schedulers, + high_priority_pool: FuturePool, logger: Logger, store_meta: Arc>>, shutdown: Arc, @@ -394,11 +395,6 @@ impl StorePollerBuilder { .after_start(move || set_io_type(IoType::ForegroundWrite)) .name_prefix("apply") .build_future_pool(); - let high_priority_pool = YatpPoolBuilder::new(DefaultTicker::default()) - .thread_count(1, 1, 1) - .after_start(move || set_io_type(IoType::ForegroundWrite)) - .name_prefix("store-bg") - .build_future_pool(); let global_stat = GlobalStoreStat::default(); StorePollerBuilder { cfg, @@ -617,6 +613,10 @@ struct Workers { // Following is not maintained by raftstore itself. background: Worker, + + // A background pool used for high-priority works. We need to hold a reference to shut it down + // manually. + high_priority_pool: FuturePool, } impl Workers { @@ -630,8 +630,13 @@ impl Workers { async_write: StoreWriters::new(None), purge, cleanup_worker: Worker::new("cleanup-worker"), - background, refresh_config_worker: LazyWorker::new("refreash-config-worker"), + background, + high_priority_pool: YatpPoolBuilder::new(DefaultTicker::default()) + .thread_count(1, 1, 1) + .after_start(move || set_io_type(IoType::ForegroundWrite)) + .name_prefix("store-bg") + .build_future_pool(), } } @@ -645,6 +650,7 @@ impl Workers { if let Some(w) = self.purge { w.stop(); } + self.high_priority_pool.shutdown(); } } @@ -846,6 +852,7 @@ impl StoreSystem { trans.clone(), router.clone(), schedulers.clone(), + workers.high_priority_pool.clone(), self.logger.clone(), store_meta.clone(), self.shutdown.clone(), diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 708e9ef7ffb..5bd92e3ea1c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -81,7 +81,7 @@ use tikv_util::{ time::Instant, }; -use super::merge_source_path; +use super::{merge_source_path, PrepareStatus}; use crate::{ batch::StoreContext, fsm::ApplyResReporter, @@ -155,14 +155,7 @@ impl Peer { pub fn start_commit_merge(&mut self, store_ctx: &mut StoreContext) { fail::fail_point!("start_commit_merge"); assert!(self.applied_merge_state().is_some()); - // Target already committed `CommitMerge`. - if let Some(c) = &self.merge_context().unwrap().catch_up_logs { - assert!(self.catch_up_logs_ready(c)); - let c = self.merge_context_mut().catch_up_logs.take().unwrap(); - self.finish_catch_up_logs(store_ctx, c); - } else { - self.on_check_merge(store_ctx); - } + self.on_check_merge(store_ctx); } // Match v1::on_check_merge. @@ -179,6 +172,11 @@ impl Peer { &mut self, store_ctx: &mut StoreContext, ) { + fail::fail_point!( + "ask_target_peer_to_commit_merge_2", + self.region_id() == 2, + |_| {} + ); let state = self.applied_merge_state().unwrap(); let target = state.get_target(); let target_id = target.get_id(); @@ -267,13 +265,14 @@ impl Peer { let merge = req.get_admin_request().get_commit_merge(); assert!(merge.has_source_state() && merge.get_source_state().has_merge_state()); let source_region = merge.get_source_state().get_region(); + let source_id = source_region.get_id(); let region = self.region(); if let Some(r) = self .storage() .region_state() .get_merged_records() .iter() - .find(|p| p.get_source_region_id() == source_region.get_id()) + .find(|p| p.get_source_region_id() == source_id) { info!( self.logger, @@ -283,13 +282,12 @@ impl Peer { ); let index = commit_of_merge(req.get_admin_request().get_commit_merge()); // If target caught up by snapshot, the source checkpoint hasn't been used. - let source_path = - merge_source_path(&store_ctx.tablet_registry, source_region.get_id(), index); + let source_path = merge_source_path(&store_ctx.tablet_registry, source_id, index); if source_path.exists() { self.record_tombstone_tablet_path(store_ctx, source_path, r.get_index()); } let _ = store_ctx.router.force_send( - source_region.get_id(), + source_id, PeerMsg::AckCommitMerge { index, target_id: self.region_id(), @@ -305,7 +303,7 @@ impl Peer { let index = commit_of_merge(req.get_admin_request().get_commit_merge()); let _ = store_ctx .router - .force_send(source_region.get_id(), PeerMsg::RejectCommitMerge { index }); + .force_send(source_id, PeerMsg::RejectCommitMerge { index }); } else if expected_epoch == region.get_region_epoch() { assert!( util::is_sibling_regions(source_region, region), @@ -322,8 +320,33 @@ impl Peer { ); assert!(!self.storage().has_dirty_data()); if self.is_leader() { - let (ch, _) = CmdResChannel::pair(); - self.on_admin_command(store_ctx, req, ch); + let index = commit_of_merge(req.get_admin_request().get_commit_merge()); + if self.proposal_control().is_merging() { + // `on_admin_command` may delay our request indefinitely. It's better to check + // directly. + info!( + self.logger, + "reject commit merge because of target is merging with another region"; + ); + } else { + let (ch, res) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); + if let Some(res) = res.take_result() + && res.get_header().has_error() + { + error!( + self.logger, + "failed to propose commit merge"; + "source" => source_id, + "res" => ?res, + ); + } else { + return; + } + } + let _ = store_ctx + .router + .force_send(source_id, PeerMsg::RejectCommitMerge { index }); } } else { info!( @@ -353,6 +376,7 @@ impl Apply { req: &AdminRequest, index: u64, ) -> Result<(AdminResponse, AdminCmdResult)> { + fail::fail_point!("apply_commit_merge"); PEER_ADMIN_CMD_COUNTER.commit_merge.all.inc(); self.flush(); @@ -574,7 +598,9 @@ impl Peer { } // Context would be empty if this peer hasn't applied PrepareMerge. - if let Some(cul) = self.merge_context().and_then(|c| c.catch_up_logs.as_ref()) { + if let Some(PrepareStatus::CatchUpLogs(cul)) = + self.merge_context().and_then(|c| c.prepare_status.as_ref()) + { slog_panic!( self.logger, "get conflicting catch_up_logs"; @@ -582,7 +608,15 @@ impl Peer { "current" => ?cul.merge, ); } - if !self.catch_up_logs_ready(&catch_up_logs) { + if let Some(state) = self.applied_merge_state() + && state.get_commit() == commit_of_merge(&catch_up_logs.merge) + { + assert_eq!( + state.get_target().get_id(), + catch_up_logs.target_region_id + ); + self.finish_catch_up_logs(store_ctx, catch_up_logs); + } else { // Directly append these logs to raft log and then commit them. match self.maybe_append_merge_entries(&catch_up_logs.merge) { Some(last_index) => { @@ -598,24 +632,7 @@ impl Peer { } } catch_up_logs.merge.clear_entries(); - self.merge_context_mut().catch_up_logs = Some(catch_up_logs); - } else { - self.finish_catch_up_logs(store_ctx, catch_up_logs); - } - } - - #[inline] - fn catch_up_logs_ready(&self, catch_up_logs: &CatchUpLogs) -> bool { - if let Some(state) = self.applied_merge_state() - && state.get_commit() == commit_of_merge(&catch_up_logs.merge) - { - assert_eq!( - state.get_target().get_id(), - catch_up_logs.target_region_id - ); - true - } else { - false + self.merge_context_mut().prepare_status = Some(PrepareStatus::CatchUpLogs(catch_up_logs)); } } @@ -683,7 +700,11 @@ impl Peer { } #[inline] - fn finish_catch_up_logs(&mut self, store_ctx: &mut StoreContext, c: CatchUpLogs) { + pub fn finish_catch_up_logs( + &mut self, + store_ctx: &mut StoreContext, + c: CatchUpLogs, + ) { let safe_ts = store_ctx .store_meta .lock() @@ -698,7 +719,6 @@ impl Peer { "failed to respond to merge target, are we shutting down?" ); } - self.take_merge_context(); self.mark_for_destroy(None); } } @@ -802,7 +822,13 @@ impl Peer { pub fn on_ack_commit_merge(&mut self, index: u64, target_id: u64) { // We don't check it against merge state because source peer might just restart // and haven't replayed `PrepareMerge` yet. - info!(self.logger, "destroy self on AckCommitMerge"; "index" => index, "target_id" => target_id); + info!( + self.logger, + "destroy self on AckCommitMerge"; + "index" => index, + "target_id" => target_id, + "prepare_status" => ?self.merge_context().and_then(|c| c.prepare_status.as_ref()), + ); self.take_merge_context(); self.mark_for_destroy(None); } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs index 253630c2bc1..78f909fb26e 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs @@ -37,7 +37,6 @@ pub fn merge_source_path( #[derive(Default)] pub struct MergeContext { prepare_status: Option, - catch_up_logs: Option, } impl MergeContext { @@ -90,8 +89,8 @@ impl MergeContext { impl Peer { #[inline] pub fn update_merge_progress_on_became_follower(&mut self) { - if let Some(ctx) = self.merge_context() - && matches!(ctx.prepare_status, Some(PrepareStatus::WaitForFence { .. })) + if let Some(MergeContext { prepare_status: Some(status) }) = self.merge_context() + && matches!(status, PrepareStatus::WaitForTrimStatus { .. } | PrepareStatus::WaitForFence { .. }) { self.take_merge_context(); self.proposal_control_mut().set_pending_prepare_merge(false); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 16f5d397ce7..138f5d327fe 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -56,7 +56,7 @@ use tikv_util::{ }; use txn_types::WriteBatchFlags; -use super::merge_source_path; +use super::{merge_source_path, CatchUpLogs}; use crate::{ batch::StoreContext, fsm::ApplyResReporter, @@ -67,13 +67,35 @@ use crate::{ const TRIM_CHECK_TIMEOUT: Duration = Duration::from_secs(10); -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct PreProposeContext { pub min_matched: u64, lock_size_limit: usize, } +/// FSM Graph (forward): +/// +/// +-------+------------------+ +/// | | v +/// None -> (1) -> (2) ------> (4) -> None(destroyed) +/// | | | ^ +/// +-------+------+--> (3) ---+ +/// +/// - None->1: `start_check_trim_status` +/// - 1->2: `check_pessimistic_locks` +/// - *->3: `on_catch_up_logs` +/// - *->4: `on_apply_res_prepare_merge` / `Peer::new` +/// - 4->None: `on_ack_commit_merge` +/// +/// Additional backward paths to None: +/// +/// - 1->None: `maybe_clean_up_stale_merge_context` / +/// `merge_on_availability_response` +/// - 1/2->None: `update_merge_progress_on_became_follower` / +/// `propose_prepare_merge` +/// - *->None: `rollback_merge` pub enum PrepareStatus { + /// (1) WaitForTrimStatus { start_time: Instant, // Peers that we are not sure if trimmed. @@ -82,6 +104,7 @@ pub enum PrepareStatus { // callback. req: Option, }, + /// (2) /// When a fence is present, we (1) delay the PrepareMerge /// command `cmd` until all writes before `idx` are applied (2) reject all /// in-coming write proposals. @@ -99,11 +122,44 @@ pub enum PrepareStatus { ctx: PreProposeContext, req: Option, }, + /// (3) + /// Catch up logs after source has committed `PrepareMerge` and target has + /// committed `CommitMerge`. + CatchUpLogs(CatchUpLogs), + /// (4) /// In this state, all write proposals except for `RollbackMerge` will be /// rejected. Applied(MergeState), } +impl std::fmt::Debug for PrepareStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + Self::WaitForTrimStatus { + start_time, + pending_peers, + req, + } => f + .debug_struct("PrepareStatus::WaitForTrimStatus") + .field("start_time", start_time) + .field("pending_peers", pending_peers) + .field("req", req) + .finish(), + Self::WaitForFence { fence, ctx, req } => f + .debug_struct("PrepareStatus::WaitForFence") + .field("fence", fence) + .field("ctx", ctx) + .field("req", req) + .finish(), + Self::CatchUpLogs(cul) => cul.fmt(f), + Self::Applied(state) => f + .debug_struct("PrepareStatus::Applied") + .field("state", state) + .finish(), + } + } +} + #[derive(Debug)] pub struct PrepareMergeResult { region_state: RegionLocalState, @@ -343,9 +399,16 @@ impl Peer { } } - let status = &mut self.merge_context_mut().prepare_status; // Shouldn't enter this call if trim check is already underway. - assert!(status.is_none()); + let status = &mut self.merge_context_mut().prepare_status; + if status.is_some() { + let logger = self.logger.clone(); + panic!( + "expect empty prepare merge status {}: {:?}", + SlogFormat(&logger), + self.merge_context_mut().prepare_status + ); + } *status = Some(PrepareStatus::WaitForTrimStatus { start_time: Instant::now_coarse(), pending_peers, @@ -463,7 +526,16 @@ impl Peer { }; let last_index = self.raft_group().raft.raft_log.last_index(); if has_locks && self.entry_storage().applied_index() < last_index { - self.merge_context_mut().prepare_status = Some(PrepareStatus::WaitForFence { + let status = &mut self.merge_context_mut().prepare_status; + if !matches!(status, Some(PrepareStatus::WaitForTrimStatus { .. })) { + let logger = self.logger.clone(); + panic!( + "expect WaitForTrimStatus {}: {:?}", + SlogFormat(&logger), + self.merge_context_mut().prepare_status + ); + } + *status = Some(PrepareStatus::WaitForFence { fence: last_index, ctx, req: Some(mem::take(req)), @@ -486,6 +558,7 @@ impl Peer { .as_ref() .and_then(|c| c.prepare_status.as_ref()) { + Some(PrepareStatus::WaitForTrimStatus { .. }) | None => Ok(None), Some(PrepareStatus::WaitForFence { fence, ctx, .. }) => { if applied_index < *fence { info!( @@ -499,11 +572,13 @@ impl Peer { Ok(Some(ctx.clone())) } } + Some(PrepareStatus::CatchUpLogs(cul)) => { + Err(box_err!("catch up logs is in-progress: {:?}.", cul)) + } Some(PrepareStatus::Applied(state)) => Err(box_err!( "another merge is in-progress, merge_state: {:?}.", state )), - _ => Ok(None), } } @@ -727,7 +802,13 @@ impl Peer { self.proposal_control_mut() .enter_prepare_merge(res.state.get_commit()); - self.merge_context_mut().prepare_status = Some(PrepareStatus::Applied(res.state)); + if let Some(PrepareStatus::CatchUpLogs(cul)) = self + .merge_context_mut() + .prepare_status + .replace(PrepareStatus::Applied(res.state)) + { + self.finish_catch_up_logs(store_ctx, cul); + } self.start_commit_merge(store_ctx); } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index 7b8d34ed0e7..cb45fdcf1cf 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -8,7 +8,6 @@ use kvproto::{ raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse}, raft_serverpb::PeerState, }; -use protobuf::Message; use raftstore::{ coprocessor::RegionChangeReason, store::{fsm::new_admin_request, metrics::PEER_ADMIN_CMD_COUNTER, LocksStatus, Transport}, @@ -23,6 +22,7 @@ use crate::{ fsm::ApplyResReporter, operation::AdminCmdResult, raft::{Apply, Peer}, + router::CmdResChannel, }; #[derive(Debug)] @@ -38,25 +38,16 @@ impl Peer { store_ctx: &mut StoreContext, index: u64, ) { - if self - .merge_context() - .map_or(true, |c| c.prepare_merge_index() != Some(index)) - { + let self_index = self.merge_context().and_then(|c| c.prepare_merge_index()); + if self_index != Some(index) { + info!( + self.logger, + "ignore RejectCommitMerge due to index not match"; + "index" => index, + "self_index" => ?self_index, + ); return; } - self.propose_rollback_merge(store_ctx, index); - } - - pub fn propose_rollback_merge( - &mut self, - store_ctx: &mut StoreContext, - index: u64, - ) { - info!( - self.logger, - "rollback prepare merge"; - "index" => index, - ); let mut request = new_admin_request(self.region_id(), self.peer().clone()); request .mut_header() @@ -65,10 +56,16 @@ impl Peer { admin.set_cmd_type(AdminCmdType::RollbackMerge); admin.mut_rollback_merge().set_commit(index); request.set_admin_request(admin); - // TODO: it should propose via on_admin_command, otherwise it may panic - // during force leader. - if let Err(e) = self.propose(store_ctx, request.write_to_bytes().unwrap()) { - error!(self.logger, "failed to propose RollbackMerge"; "err" => ?e); + let (ch, res) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, request, ch); + if let Some(res) = res.take_result() + && res.get_header().has_error() + { + error!( + self.logger, + "failed to propose rollback merge"; + "res" => ?res, + ); } } } @@ -80,6 +77,7 @@ impl Apply { req: &AdminRequest, _index: u64, ) -> Result<(AdminResponse, AdminCmdResult)> { + fail::fail_point!("apply_rollback_merge"); PEER_ADMIN_CMD_COUNTER.rollback_merge.all.inc(); if self.region_state().get_state() != PeerState::Merging { slog_panic!( diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 0623f909786..d59a564c696 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -222,7 +222,7 @@ impl Peer { } } AdminCmdType::CompactLog => self.propose_compact_log(ctx, req), - AdminCmdType::UpdateGcPeer => { + AdminCmdType::UpdateGcPeer | AdminCmdType::RollbackMerge => { let data = req.write_to_bytes().unwrap(); self.propose(ctx, data) } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index a489331bc5b..3f703bea24c 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -317,18 +317,38 @@ impl Store { // It will create the peer if it does not exist self.on_raft_message(ctx, raft_msg); - if let Err(SendError(PeerMsg::AskCommitMerge(req))) = ctx + let commit_merge = req.get_admin_request().get_commit_merge(); + // v2 specific. + assert!(commit_merge.has_source_state()); + let source_index = commit_merge + .get_source_state() + .get_merge_state() + .get_commit(); + let source_id = commit_merge.get_source_state().get_region().get_id(); + + if let Err(SendError(PeerMsg::AskCommitMerge(_))) = ctx .router .force_send(region_id, PeerMsg::AskCommitMerge(req)) { - let commit_merge = req.get_admin_request().get_commit_merge(); - let source_id = commit_merge.get_source().get_id(); let _ = ctx.router.force_send( source_id, PeerMsg::RejectCommitMerge { - index: commit_merge.get_commit(), + index: source_index, }, ); + info!( + self.logger(), + "Store rejects CommitMerge request"; + "source" => source_id, + "index" => source_index, + ); + } else { + info!( + self.logger(), + "Store forwards CommitMerge request to peer"; + "source" => source_id, + "index" => source_index, + ); } } diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index c300b6d8726..d23b2852a03 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -92,7 +92,7 @@ impl EventCore { } } - /// Set the result. + /// Set the result. Caller must guarantee result is set only once. /// /// After this call, no events should be notified. #[inline] @@ -103,6 +103,8 @@ impl EventCore { } *self.res.get() = Some(result); } + // FIXME: this is not safe. previous line can be reordered after this unless + // with a global barrier. let previous = self.event.fetch_or( fired_bit_of(PAYLOAD_EVENT) | fired_bit_of(CANCEL_EVENT), Ordering::AcqRel, @@ -236,6 +238,20 @@ impl BaseSubscriber { let e = self.core.event.load(Ordering::Relaxed); check_bit(e, fired_bit_of(PAYLOAD_EVENT)).is_some() } + + /// Synchronous version of `result`. It cannot be called concurrently with + /// another `take_result` or `result`. + #[inline] + pub fn take_result(&self) -> Option { + let e = self.core.event.load(Ordering::Relaxed); + if check_bit(e, fired_bit_of(PAYLOAD_EVENT)).is_some() { + let r = unsafe { (*self.core.res.get()).take() }; + assert!(r.is_some()); + r + } else { + None + } + } } unsafe impl Send for BaseSubscriber {} diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 2118bb3da77..183bb33cd34 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -25,7 +25,7 @@ use tikv_util::{ Either, }; -const DEFAULT_HIGH_PRI_POOL_SIZE: usize = 1; +const DEFAULT_HIGH_PRI_POOL_SIZE: usize = 2; const DEFAULT_LOW_PRI_POOL_SIZE: usize = 6; pub enum Task { @@ -357,10 +357,11 @@ impl Runner { cb: Option>, ) { let path = self.pause_background_work(tablet); - self.waiting_destroy_tasks - .entry(region_id) - .or_default() - .push((path, wait_for_persisted, cb)); + let list = self.waiting_destroy_tasks.entry(region_id).or_default(); + if list.iter().any(|(p, ..)| p == &path) { + return; + } + list.push((path, wait_for_persisted, cb)); } fn destroy(&mut self, region_id: u64, persisted: u64) { @@ -390,12 +391,16 @@ impl Runner { /// Returns true if task is consumed. Failure is considered consumed. fn process_destroy_task(logger: &Logger, registry: &TabletRegistry, path: &Path) -> bool { match EK::locked(path.to_str().unwrap()) { - Err(e) => warn!( - logger, - "failed to check whether the tablet path is locked"; - "err" => ?e, - "path" => path.display(), - ), + Err(e) if !e.to_string().contains("No such file or directory") => { + warn!( + logger, + "failed to check whether the tablet path is locked"; + "err" => ?e, + "path" => path.display(), + ); + return false; + } + Err(_) => (), Ok(false) => { let (_, region_id, tablet_index) = registry.parse_tablet_name(path).unwrap_or(("", 0, 0)); @@ -414,13 +419,13 @@ impl Runner { "path" => path.display(), ) }); - return true; } Ok(true) => { debug!(logger, "ignore locked tablet"; "path" => path.display()); + return false; } } - false + true } fn cleanup_ssts(&self, ssts: Box<[SstMeta]>) { @@ -470,7 +475,7 @@ impl Runner { let logger = self.logger.clone(); let now = Instant::now(); let pool = if high_priority - && self.low_pri_pool.get_running_task_count() > DEFAULT_LOW_PRI_POOL_SIZE / 2 + && self.low_pri_pool.get_running_task_count() >= DEFAULT_LOW_PRI_POOL_SIZE - 2 { &self.high_pri_pool } else { @@ -759,4 +764,55 @@ mod tests { runner.on_timeout(); assert!(!path2.exists()); } + + #[test] + fn test_destroy_missing() { + let dir = Builder::new() + .prefix("test_destroy_missing") + .tempdir() + .unwrap(); + let factory = Box::new(TestTabletFactory::new( + DbOptions::default(), + vec![("default", CfOptions::default())], + )); + let registry = TabletRegistry::new(factory, dir.path()).unwrap(); + let logger = slog_global::borrow_global().new(slog::o!()); + let (_dir, importer) = create_tmp_importer(); + let snap_dir = dir.path().join("snap"); + let snap_mgr = TabletSnapManager::new(snap_dir, None).unwrap(); + let mut runner = Runner::new(registry.clone(), importer, snap_mgr, logger); + + let mut region = Region::default(); + let r_1 = 1; + region.set_id(r_1); + region.set_start_key(b"a".to_vec()); + region.set_end_key(b"b".to_vec()); + let tablet = registry + .load(TabletContext::new(®ion, Some(1)), true) + .unwrap() + .latest() + .unwrap() + .clone(); + let path = PathBuf::from(tablet.path()); + // submit for destroy twice. + runner.run(Task::prepare_destroy(tablet.clone(), r_1, 10)); + runner.run(Task::destroy(r_1, 100)); + runner.run(Task::prepare_destroy(tablet, r_1, 10)); + runner.run(Task::destroy(r_1, 100)); + assert!(path.exists()); + registry.remove(r_1); + runner.on_timeout(); + assert!(!path.exists()); + assert!(runner.pending_destroy_tasks.is_empty()); + + // submit a non-existing path. + runner.run(Task::prepare_destroy_path( + dir.path().join("missing"), + r_1, + 200, + )); + runner.run(Task::destroy(r_1, 500)); + runner.on_timeout(); + assert!(runner.pending_destroy_tasks.is_empty()); + } } diff --git a/components/raftstore-v2/tests/failpoints/test_merge.rs b/components/raftstore-v2/tests/failpoints/test_merge.rs index 9321c06b5f5..890b8c5e27a 100644 --- a/components/raftstore-v2/tests/failpoints/test_merge.rs +++ b/components/raftstore-v2/tests/failpoints/test_merge.rs @@ -1,8 +1,12 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Mutex, time::Duration}; +use std::{ + sync::{mpsc, Mutex}, + time::Duration, +}; use engine_traits::Peekable; +use raftstore_v2::router::{PeerMsg, PeerTick}; use tikv_util::store::new_peer; use crate::cluster::{ @@ -141,7 +145,7 @@ fn test_rollback() { let region_2_clone = region_2.clone(); fail::cfg_callback("start_commit_merge", move || { split_region( - &mut router_clone.lock().unwrap(), + &router_clone.lock().unwrap(), region_2_clone.clone(), peer_2.clone(), region_3_id, @@ -160,7 +164,185 @@ fn test_rollback() { let mut resp = Default::default(); for _ in 0..10 { resp = put( - &mut cluster.routers[0], + &cluster.routers[0], + region_1_id, + format!("k{}k2", region_1_id).as_bytes(), + ); + if !resp.get_header().has_error() { + return; + } + std::thread::sleep(Duration::from_millis(100)); + } + assert!(!resp.get_header().has_error(), "{:?}", resp); +} + +// Target is merging. +#[test] +fn test_merge_conflict_0() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let router = &mut cluster.routers[0]; + + let region_1 = router.region_detail(2); + let peer_1 = region_1.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let peer_2 = new_peer(store_id, peer_1.get_id() + 1); + let region_1_id = region_1.get_id(); + let region_2_id = region_1_id + 1; + let (region_1, region_2) = split_region( + router, + region_1, + peer_1.clone(), + region_2_id, + peer_2.clone(), + Some(format!("k{}k", region_1_id).as_bytes()), + Some(format!("k{}k", region_2_id).as_bytes()), + format!("k{}", region_2_id).as_bytes(), + format!("k{}", region_2_id).as_bytes(), + false, + ); + + let peer_3 = new_peer(store_id, peer_1.get_id() + 2); + let region_3_id = region_2_id + 1; + let (region_2, region_3) = split_region( + router, + region_2, + peer_2.clone(), + region_3_id, + peer_3, + Some(format!("k{}k", region_2_id).as_bytes()), + Some(format!("k{}k", region_3_id).as_bytes()), + format!("k{}", region_3_id).as_bytes(), + format!("k{}", region_3_id).as_bytes(), + false, + ); + + // pause merge progress of 2+3. + let fp = fail::FailGuard::new("apply_commit_merge", "pause"); + merge_region( + &cluster, + 0, + region_2.clone(), + peer_2, + region_3.clone(), + false, + ); + // start merging 1+2. it should be aborted. + let (tx, rx) = mpsc::channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("apply_rollback_merge", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + let region_2 = cluster.routers[0].region_detail(region_2.get_id()); + merge_region(&cluster, 0, region_1, peer_1, region_2, false); + // wait for rollback. + rx.recv_timeout(std::time::Duration::from_secs(1)).unwrap(); + drop(fp); + fail::remove("apply_rollback_merge"); + + // Check region 1 is not merged and can serve writes. + let mut resp = Default::default(); + for _ in 0..10 { + resp = put( + &cluster.routers[0], + region_1_id, + format!("k{}k2", region_1_id).as_bytes(), + ); + if !resp.get_header().has_error() { + return; + } + std::thread::sleep(Duration::from_millis(100)); + } + assert!(!resp.get_header().has_error(), "{:?}", resp); + + // Ref https://github.com/tikv/yatp/issues/82, the nested future pool high_priority_pool can be + // leaked. We must wait for apply to finish. + cluster.routers[0].wait_applied_to_current_term(region_3.get_id(), Duration::from_secs(3)); +} + +// Target has been merged and destroyed. +#[test] +fn test_merge_conflict_1() { + let mut cluster = Cluster::default(); + let store_id = cluster.node(0).id(); + let router = &mut cluster.routers[0]; + + let region_1 = router.region_detail(2); + let peer_1 = region_1.get_peers()[0].clone(); + router.wait_applied_to_current_term(2, Duration::from_secs(3)); + let peer_2 = new_peer(store_id, peer_1.get_id() + 1); + let region_1_id = region_1.get_id(); + let region_2_id = region_1_id + 1; + let (region_1, region_2) = split_region( + router, + region_1, + peer_1.clone(), + region_2_id, + peer_2.clone(), + Some(format!("k{}k", region_1_id).as_bytes()), + Some(format!("k{}k", region_2_id).as_bytes()), + format!("k{}", region_2_id).as_bytes(), + format!("k{}", region_2_id).as_bytes(), + false, + ); + + let peer_3 = new_peer(store_id, peer_1.get_id() + 2); + let region_3_id = region_2_id + 1; + let (region_2, region_3) = split_region( + router, + region_2, + peer_2.clone(), + region_3_id, + peer_3, + Some(format!("k{}k", region_2_id).as_bytes()), + Some(format!("k{}k", region_3_id).as_bytes()), + format!("k{}", region_3_id).as_bytes(), + format!("k{}", region_3_id).as_bytes(), + false, + ); + + // pause merge progress of 1+2. + assert_eq!(region_1.get_id(), 2); + let fp = fail::FailGuard::new("ask_target_peer_to_commit_merge_2", "return"); + merge_region( + &cluster, + 0, + region_1.clone(), + peer_1, + region_2.clone(), + false, + ); + // merge 2+3. + merge_region( + &cluster, + 0, + region_2.clone(), + peer_2.clone(), + region_3, + true, + ); + assert_peer_not_exist(region_2.get_id(), peer_2.get_id(), &cluster.routers[0]); + // resume merging 1+2. it should be aborted. + let (tx, rx) = mpsc::channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("apply_rollback_merge", move || { + tx.lock().unwrap().send(()).unwrap(); + fail::remove("apply_rollback_merge"); + }) + .unwrap(); + drop(fp); + cluster.routers[0] + .send(region_1.get_id(), PeerMsg::Tick(PeerTick::CheckMerge)) + .unwrap(); + // wait for rollback. + rx.recv_timeout(std::time::Duration::from_secs(1)).unwrap(); + + // Check region 1 is not merged and can serve writes. + let mut resp = Default::default(); + for _ in 0..10 { + resp = put( + &cluster.routers[0], region_1_id, format!("k{}k2", region_1_id).as_bytes(), ); diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 710286f8e13..67b8eacd6a9 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -732,7 +732,7 @@ pub mod split_helper { req } - pub fn must_split(region_id: u64, req: RaftCmdRequest, router: &mut TestRouter) { + pub fn must_split(region_id: u64, req: RaftCmdRequest, router: &TestRouter) { let (msg, sub) = PeerMsg::admin_command(req); router.send(region_id, msg).unwrap(); block_on(sub.result()).unwrap(); @@ -742,7 +742,7 @@ pub mod split_helper { thread::sleep(Duration::from_secs(1)); } - pub fn put(router: &mut TestRouter, region_id: u64, key: &[u8]) -> RaftCmdResponse { + pub fn put(router: &TestRouter, region_id: u64, key: &[u8]) -> RaftCmdResponse { let header = Box::new(router.new_request_for(region_id).take_header()); let mut put = SimpleWriteEncoder::with_capacity(64); put.put(CF_DEFAULT, key, b"v1"); @@ -752,7 +752,7 @@ pub mod split_helper { // Split the region according to the parameters // return the updated original region pub fn split_region<'a>( - router: &'a mut TestRouter, + router: &'a TestRouter, region: metapb::Region, peer: metapb::Peer, split_region_id: u64, @@ -820,7 +820,7 @@ pub mod split_helper { // This is to simulate the case when the splitted peer's storage is not // initialized yet when refresh bucket happens pub fn split_region_and_refresh_bucket( - router: &mut TestRouter, + router: &TestRouter, region: metapb::Region, peer: metapb::Peer, split_region_id: u64, diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 66595b0205c..827ffbbdce2 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -114,6 +114,11 @@ impl FuturePool { pub fn thread_count_limit(&self) -> (usize, usize) { self.inner.pool.thread_count_limit() } + + /// Cancel all pending futures and join all threads. + pub fn shutdown(&self) { + self.inner.pool.shutdown(); + } } struct PoolInner { diff --git a/etc/config-template.toml b/etc/config-template.toml index 317336236d2..18e155c0a3b 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -288,17 +288,12 @@ [storage.block-cache] ## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory. -## When the config is not set, it is decided by the sum of the following fields or their default -## value: -## * rocksdb.defaultcf.block-cache-size or 25% of system's total memory -## * rocksdb.writecf.block-cache-size or 15% of system's total memory -## * rocksdb.lockcf.block-cache-size or 2% of system's total memory -## * raftdb.defaultcf.block-cache-size or 2% of system's total memory ## ## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly. ## Otherwise, the OOM problem might occur in TiKV. ## -## If it's not set, 45% of available system memory will be used. +## When storage.engine is "raft-kv", default value is 45% of available system memory. +## When storage.engine is "partitioned-raft-kv", default value is 30% of available system memory. # capacity = "0B" [storage.flow-control] @@ -573,7 +568,7 @@ ## Dump statistics periodically in information logs. ## When storage.engine is "raft-kv", default value is 10m. -## When storage.engine is "partitioned-raft-kv", default value is 120m. +## When storage.engine is "partitioned-raft-kv", default value is 0. # stats-dump-period = "10m" ## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ diff --git a/src/config/mod.rs b/src/config/mod.rs index 07580e07876..0432bfab3a4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -68,7 +68,7 @@ use serde::{ use serde_json::{to_value, Map, Value}; use tikv_util::{ config::{ - self, LogFormat, RaftDataStateMachine, ReadableDuration, ReadableSize, TomlWriter, GIB, MIB, + self, LogFormat, RaftDataStateMachine, ReadableDuration, ReadableSize, TomlWriter, MIB, }, logger::{get_level_by_string, get_string_by_level, set_log_level}, sys::SysQuota, @@ -111,10 +111,6 @@ const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; -const LOCKCF_MIN_MEM: usize = 256 * MIB as usize; -const LOCKCF_MAX_MEM: usize = GIB as usize; -const RAFT_MIN_MEM: usize = 256 * MIB as usize; -const RAFT_MAX_MEM: usize = 2 * GIB as usize; /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; const TMP_CONFIG_FILE: &str = "tmp_tikv.toml"; @@ -130,18 +126,6 @@ fn bloom_filter_ratio(et: EngineType) -> f64 { } } -fn memory_limit_for_cf(is_raft_db: bool, cf: &str, total_mem: u64) -> ReadableSize { - let (ratio, min, max) = match (is_raft_db, cf) { - (true, CF_DEFAULT) => (0.02, RAFT_MIN_MEM, RAFT_MAX_MEM), - (false, CF_DEFAULT) => (0.25, 0, usize::MAX), - (false, CF_LOCK) => (0.02, LOCKCF_MIN_MEM, LOCKCF_MAX_MEM), - (false, CF_WRITE) => (0.15, 0, usize::MAX), - _ => unreachable!(), - }; - let size = ((total_mem as f64 * ratio) as usize).clamp(min, max); - ReadableSize::mb(size as u64 / MIB) -} - #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -299,7 +283,8 @@ macro_rules! cf_config { pub struct $name { #[online_config(skip)] pub block_size: ReadableSize, - pub block_cache_size: ReadableSize, + // FIXME: deprecate it and update all tests related to it. + pub block_cache_size: Option, #[online_config(skip)] pub disable_block_cache: bool, #[online_config(skip)] @@ -426,7 +411,7 @@ macro_rules! write_into_metrics { .set($cf.block_size.0 as f64); $metrics .with_label_values(&[$tag, "block_cache_size"]) - .set($cf.block_cache_size.0 as f64); + .set($cf.block_cache_size.map(|s| s.0).unwrap_or_default() as f64); $metrics .with_label_values(&[$tag, "disable_block_cache"]) .set(($cf.disable_block_cache as i32).into()); @@ -675,11 +660,9 @@ cf_config!(DefaultCfConfig); impl Default for DefaultCfConfig { fn default() -> DefaultCfConfig { - let total_mem = SysQuota::memory_limit_in_bytes(); - DefaultCfConfig { block_size: ReadableSize::kb(32), - block_cache_size: memory_limit_for_cf(false, CF_DEFAULT, total_mem), + block_cache_size: None, disable_block_cache: false, cache_index_and_filter_blocks: true, pin_l0_filter_and_index_blocks: true, @@ -841,8 +824,6 @@ cf_config!(WriteCfConfig); impl Default for WriteCfConfig { fn default() -> WriteCfConfig { - let total_mem = SysQuota::memory_limit_in_bytes(); - // Setting blob_run_mode=read_only effectively disable Titan. let titan = TitanCfConfig { blob_run_mode: BlobRunMode::ReadOnly, @@ -851,7 +832,7 @@ impl Default for WriteCfConfig { WriteCfConfig { block_size: ReadableSize::kb(32), - block_cache_size: memory_limit_for_cf(false, CF_WRITE, total_mem), + block_cache_size: None, disable_block_cache: false, cache_index_and_filter_blocks: true, pin_l0_filter_and_index_blocks: true, @@ -971,8 +952,6 @@ cf_config!(LockCfConfig); impl Default for LockCfConfig { fn default() -> LockCfConfig { - let total_mem = SysQuota::memory_limit_in_bytes(); - // Setting blob_run_mode=read_only effectively disable Titan. let titan = TitanCfConfig { blob_run_mode: BlobRunMode::ReadOnly, @@ -981,7 +960,7 @@ impl Default for LockCfConfig { LockCfConfig { block_size: ReadableSize::kb(16), - block_cache_size: memory_limit_for_cf(false, CF_LOCK, total_mem), + block_cache_size: None, disable_block_cache: false, cache_index_and_filter_blocks: true, pin_l0_filter_and_index_blocks: true, @@ -1078,7 +1057,7 @@ impl Default for RaftCfConfig { }; RaftCfConfig { block_size: ReadableSize::kb(16), - block_cache_size: ReadableSize::mb(128), + block_cache_size: None, disable_block_cache: false, cache_index_and_filter_blocks: true, pin_l0_filter_and_index_blocks: true, @@ -1371,7 +1350,7 @@ impl DbConfig { ))); self.max_total_wal_size.get_or_insert(ReadableSize(1)); self.stats_dump_period - .get_or_insert(ReadableDuration::minutes(120)); + .get_or_insert(ReadableDuration::minutes(0)); // In RaftKv2, every region uses its own rocksdb instance, it's actually the // even stricter compaction guard, so use the same output file size base. self.writecf @@ -1611,11 +1590,9 @@ cf_config!(RaftDefaultCfConfig); impl Default for RaftDefaultCfConfig { fn default() -> RaftDefaultCfConfig { - let total_mem = SysQuota::memory_limit_in_bytes(); - RaftDefaultCfConfig { block_size: ReadableSize::kb(64), - block_cache_size: memory_limit_for_cf(true, CF_DEFAULT, total_mem), + block_cache_size: None, disable_block_cache: false, cache_index_and_filter_blocks: true, pin_l0_filter_and_index_blocks: true, @@ -3806,14 +3783,13 @@ impl TikvConfig { // When shared block cache is enabled, if its capacity is set, it overrides // individual block cache sizes. Otherwise use the sum of block cache // size of all column families as the shared cache size. - let cache_cfg = &mut self.storage.block_cache; - if cache_cfg.capacity.is_none() { - cache_cfg.capacity = Some(ReadableSize( - self.rocksdb.defaultcf.block_cache_size.0 - + self.rocksdb.writecf.block_cache_size.0 - + self.rocksdb.lockcf.block_cache_size.0 - + self.raftdb.defaultcf.block_cache_size.0, - )); + if let Some(a) = self.rocksdb.defaultcf.block_cache_size + && let Some(b) = self.rocksdb.writecf.block_cache_size + && let Some(c) = self.rocksdb.lockcf.block_cache_size + { + let d = self.raftdb.defaultcf.block_cache_size.map(|s| s.0).unwrap_or_default(); + let sum = a.0 + b.0 + c.0 + d; + self.storage.block_cache.capacity = Some(ReadableSize(sum)); } if self.backup.sst_max_size.0 < default_coprocessor.region_max_size().0 / 10 { warn!( @@ -4521,7 +4497,7 @@ mod tests { use test_util::assert_eq_debug; use tikv_kv::RocksEngine as RocksDBEngine; use tikv_util::{ - config::VersionTrack, + config::{VersionTrack, GIB}, logger::get_log_level, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::SysQuota, @@ -4840,7 +4816,7 @@ mod tests { let mut incoming = TikvConfig::default(); incoming.coprocessor.region_split_keys = Some(10000); incoming.gc.max_write_bytes_per_sec = ReadableSize::mb(100); - incoming.rocksdb.defaultcf.block_cache_size = ReadableSize::mb(500); + incoming.rocksdb.defaultcf.block_cache_size = Some(ReadableSize::mb(500)); incoming.storage.io_rate_limit.import_priority = file_system::IoPriority::High; let diff = old.diff(&incoming); let mut change = HashMap::new(); @@ -5106,7 +5082,7 @@ mod tests { cfg.rocksdb.max_background_flushes = 2; cfg.rocksdb.defaultcf.disable_auto_compactions = false; cfg.rocksdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(64)); - cfg.rocksdb.defaultcf.block_cache_size = ReadableSize::mb(8); + cfg.rocksdb.defaultcf.block_cache_size = Some(ReadableSize::mb(8)); cfg.rocksdb.rate_bytes_per_sec = ReadableSize::mb(64); cfg.rocksdb.rate_limiter_auto_tuned = false; cfg.validate().unwrap(); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 801effcbb3d..045da61e08c 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -331,7 +331,7 @@ fn test_serde_custom_tikv_config() { write_buffer_flush_oldest_first: true, defaultcf: DefaultCfConfig { block_size: ReadableSize::kb(12), - block_cache_size: ReadableSize::gb(12), + block_cache_size: Some(ReadableSize::gb(12)), disable_block_cache: false, cache_index_and_filter_blocks: false, pin_l0_filter_and_index_blocks: false, @@ -390,7 +390,7 @@ fn test_serde_custom_tikv_config() { }, writecf: WriteCfConfig { block_size: ReadableSize::kb(12), - block_cache_size: ReadableSize::gb(12), + block_cache_size: Some(ReadableSize::gb(12)), disable_block_cache: false, cache_index_and_filter_blocks: false, pin_l0_filter_and_index_blocks: false, @@ -463,7 +463,7 @@ fn test_serde_custom_tikv_config() { }, lockcf: LockCfConfig { block_size: ReadableSize::kb(12), - block_cache_size: ReadableSize::gb(12), + block_cache_size: Some(ReadableSize::gb(12)), disable_block_cache: false, cache_index_and_filter_blocks: false, pin_l0_filter_and_index_blocks: false, @@ -536,7 +536,7 @@ fn test_serde_custom_tikv_config() { }, raftcf: RaftCfConfig { block_size: ReadableSize::kb(12), - block_cache_size: ReadableSize::gb(12), + block_cache_size: Some(ReadableSize::gb(12)), disable_block_cache: false, cache_index_and_filter_blocks: false, pin_l0_filter_and_index_blocks: false, @@ -638,7 +638,7 @@ fn test_serde_custom_tikv_config() { wal_bytes_per_sync: ReadableSize::kb(32), defaultcf: RaftDefaultCfConfig { block_size: ReadableSize::kb(12), - block_cache_size: ReadableSize::gb(12), + block_cache_size: Some(ReadableSize::gb(12)), disable_block_cache: false, cache_index_and_filter_blocks: false, pin_l0_filter_and_index_blocks: false, @@ -912,10 +912,10 @@ fn test_block_cache_backward_compatible() { assert!(cfg.storage.block_cache.capacity.is_some()); assert_eq!( cfg.storage.block_cache.capacity.unwrap().0, - cfg.rocksdb.defaultcf.block_cache_size.0 - + cfg.rocksdb.writecf.block_cache_size.0 - + cfg.rocksdb.lockcf.block_cache_size.0 - + cfg.raftdb.defaultcf.block_cache_size.0 + cfg.rocksdb.defaultcf.block_cache_size.unwrap().0 + + cfg.rocksdb.writecf.block_cache_size.unwrap().0 + + cfg.rocksdb.lockcf.block_cache_size.unwrap().0 + + cfg.raftdb.defaultcf.block_cache_size.unwrap().0 ); } From ec65de6972091a3d4d2ce84c6afea9d20ec95e00 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 27 Jul 2023 12:01:34 +0800 Subject: [PATCH 0821/1149] resource_control: add background task control for txn scheduler (#15166) ref tikv/tikv#14900 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1 - .../resource_control/src/resource_limiter.rs | 16 ++- components/resource_control/src/worker.rs | 32 +++--- components/server/src/server.rs | 1 + components/server/src/server2.rs | 1 + components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/server.rs | 1 + components/tidb_query_executors/Cargo.toml | 1 - components/tidb_query_executors/src/runner.rs | 20 +--- components/tikv_util/src/quota_limiter.rs | 8 ++ src/coprocessor/checksum.rs | 22 +--- src/coprocessor/dag/mod.rs | 7 -- src/coprocessor/endpoint.rs | 32 ++++-- src/coprocessor/statistics/analyze.rs | 33 ++---- src/read_pool.rs | 56 +++++---- src/server/service/kv.rs | 26 ++--- src/storage/mod.rs | 106 +++++++++++++++++- src/storage/txn/scheduler.rs | 31 ++++- .../benches/coprocessor_executors/util/mod.rs | 1 - 19 files changed, 259 insertions(+), 137 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 84164fe44f8..8d10f0888e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6656,7 +6656,6 @@ dependencies = [ "log_wrappers", "match-template", "protobuf", - "resource_control", "slog", "slog-global", "smallvec", diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index f20e60640fc..c3e1fab9a49 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -3,11 +3,12 @@ use std::{ fmt, sync::atomic::{AtomicU64, Ordering}, - time::Duration, + time::{Duration, Instant}, }; +use futures::compat::Future01CompatExt; use strum::EnumCount; -use tikv_util::time::Limiter; +use tikv_util::{time::Limiter, timer::GLOBAL_TIMER_HANDLE}; #[derive(Clone, Copy, Eq, PartialEq, EnumCount)] #[repr(usize)] @@ -53,6 +54,17 @@ impl ResourceLimiter { cpu_dur.max(io_dur) } + pub async fn async_consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { + let dur = self.consume(cpu_time, io_bytes); + if !dur.is_zero() { + _ = GLOBAL_TIMER_HANDLE + .delay(Instant::now() + dur) + .compat() + .await; + } + dur + } + #[inline] pub(crate) fn get_limiter(&self, ty: ResourceType) -> &QuotaLimiter { &self.limiters[ty as usize] diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index b58d37cd13a..e2372d39f46 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -217,12 +217,12 @@ impl GroupQuotaAdjustWorker { // the available resource for background tasks is defined as: // (total_resource_quota - foreground_task_used). foreground_task_used // resource is calculated by: (resource_current_total_used - - // background_consumed_total). We reserve 10% of the free resources for + // background_consumed_total). We reserve 20% of the free resources for // foreground tasks in case the fore ground traffics increases. let mut available_resource_rate = ((resource_stats.total_quota - resource_stats.current_used + background_consumed_total) - * 0.9) + * 0.8) .max(resource_stats.total_quota * 0.1); let mut total_expected_cost = 0.0; for g in bg_group_stats.iter_mut() { @@ -401,20 +401,20 @@ mod tests { reset_quota(&mut worker, 0.0, 0.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 7.2, 9000.0); + check_limiter(&limiter, 6.4, 8000.0); reset_quota(&mut worker, 4.0, 2000.0, Duration::from_millis(500)); worker.adjust_quota(); - check_limiter(&limiter, 7.2, 9000.0); + check_limiter(&limiter, 6.4, 8000.0); reset_quota(&mut worker, 4.0, 2000.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 3.6, 7200.0); + check_limiter(&limiter, 3.2, 6400.0); reset_quota(&mut worker, 6.0, 4000.0, Duration::from_secs(1)); limiter.consume(Duration::from_secs(2), 2000); worker.adjust_quota(); - check_limiter(&limiter, 3.6, 7200.0); + check_limiter(&limiter, 3.2, 6400.0); reset_quota(&mut worker, 8.0, 9500.0, Duration::from_secs(1)); worker.adjust_quota(); @@ -423,12 +423,12 @@ mod tests { reset_quota(&mut worker, 7.5, 9500.0, Duration::from_secs(1)); limiter.consume(Duration::from_secs(2), 2000); worker.adjust_quota(); - check_limiter(&limiter, 2.25, 2250.0); + check_limiter(&limiter, 2.0, 2000.0); reset_quota(&mut worker, 7.5, 9500.0, Duration::from_secs(5)); limiter.consume(Duration::from_secs(10), 10000); worker.adjust_quota(); - check_limiter(&limiter, 2.25, 2250.0); + check_limiter(&limiter, 2.0, 2000.0); let default = new_background_resource_group_ru("default".into(), 2000, 8, vec!["br".into()]); @@ -444,15 +444,15 @@ mod tests { reset_quota(&mut worker, 5.0, 7000.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 1.8, 1800.0); - check_limiter(&bg_limiter, 0.9, 900.0); + check_limiter(&limiter, 1.6, 1600.0); + check_limiter(&bg_limiter, 0.8, 800.0); reset_quota(&mut worker, 6.0, 5000.0, Duration::from_secs(1)); limiter.consume(Duration::from_millis(1200), 1200); bg_limiter.consume(Duration::from_millis(1800), 1800); worker.adjust_quota(); - check_limiter(&limiter, 2.4, 3600.0); - check_limiter(&bg_limiter, 2.1, 3600.0); + check_limiter(&limiter, 2.4, 2800.0); + check_limiter(&bg_limiter, 1.6, 3600.0); let bg = new_resource_group_ru("background".into(), 1000, 15); resource_ctl.add_resource_group(bg); @@ -479,15 +479,15 @@ mod tests { reset_quota(&mut worker, 0.0, 0.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 4.8, 6000.0); - check_limiter(&new_bg_limiter, 2.4, 3000.0); + check_limiter(&limiter, 4.27, 5333.3); + check_limiter(&new_bg_limiter, 2.13, 2666.7); reset_quota(&mut worker, 6.0, 5000.0, Duration::from_secs(1)); limiter.consume(Duration::from_millis(1200), 1200); new_bg_limiter.consume(Duration::from_millis(1800), 1800); worker.adjust_quota(); - check_limiter(&limiter, 2.4, 3600.0); - check_limiter(&new_bg_limiter, 2.1, 3600.0); + check_limiter(&limiter, 2.4, 2800.0); + check_limiter(&new_bg_limiter, 1.6, 3600.0); } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 1afe058ca5d..9e6e6d07f06 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -648,6 +648,7 @@ where self.resource_manager .as_ref() .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), + self.resource_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to create raft storage: {}", e)); cfg_controller.register( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index b05d8ad8dfa..6a84e98be1e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -561,6 +561,7 @@ where self.resource_manager .as_ref() .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), + self.resource_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to create raft storage: {}", e)); cfg_controller.register( diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 00a13d7f78c..70c20943279 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -521,6 +521,7 @@ impl ServerCluster { resource_manager .as_ref() .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), + resource_manager.clone(), )?; self.storages.insert(node_id, raft_kv_v2.clone()); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index aad014a8834..3e41584f596 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -424,6 +424,7 @@ impl ServerCluster { resource_manager .as_ref() .map(|m| m.derive_controller("scheduler-worker-pool".to_owned(), true)), + resource_manager.clone(), )?; self.storages.insert(node_id, raft_engine); diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 659ce2c0eb1..30fe64252ac 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -17,7 +17,6 @@ kvproto = { workspace = true } log_wrappers = { workspace = true } match-template = "0.0.1" protobuf = { version = "2.8", features = ["bytes"] } -resource_control = { workspace = true } slog = { workspace = true } slog-global = { workspace = true } smallvec = "1.4" diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index c0d2f190c64..60359f22c55 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -7,7 +7,6 @@ use fail::fail_point; use itertools::Itertools; use kvproto::coprocessor::KeyRange; use protobuf::Message; -use resource_control::{with_resource_limiter, ResourceLimiter}; use tidb_query_common::{ execute_stats::ExecSummary, metrics::*, @@ -80,7 +79,6 @@ pub struct BatchExecutorsRunner { paging_size: Option, quota_limiter: Arc, - resource_limiter: Option>, } // We assign a dummy type `()` so that we can omit the type when calling @@ -430,7 +428,6 @@ impl BatchExecutorsRunner { is_streaming: bool, paging_size: Option, quota_limiter: Arc, - resource_limiter: Option>, ) -> Result { let executors_len = req.get_executors().len(); let collect_exec_summary = req.get_collect_execution_summaries(); @@ -480,7 +477,6 @@ impl BatchExecutorsRunner { encode_type, paging_size, quota_limiter, - resource_limiter, }) } @@ -506,18 +502,14 @@ impl BatchExecutorsRunner { loop { let mut chunk = Chunk::default(); let mut sample = self.quota_limiter.new_sample(true); - let resource_limiter = self.resource_limiter.clone(); let (drained, record_len) = { let (cpu_time, res) = sample - .observe_cpu_async(with_resource_limiter( - self.internal_handle_request( - false, - batch_size, - &mut chunk, - &mut warnings, - &mut ctx, - ), - resource_limiter, + .observe_cpu_async(self.internal_handle_request( + false, + batch_size, + &mut chunk, + &mut warnings, + &mut ctx, )) .await; sample.add_cpu_time(cpu_time); diff --git a/components/tikv_util/src/quota_limiter.rs b/components/tikv_util/src/quota_limiter.rs index ae2e52d40d9..f122087d1d0 100644 --- a/components/tikv_util/src/quota_limiter.rs +++ b/components/tikv_util/src/quota_limiter.rs @@ -129,6 +129,14 @@ impl<'a> Sample { pub fn add_cpu_time(&mut self, time: Duration) { self.cpu_time += time; } + + pub fn enable_cpu_limit(&mut self) { + self.enable_cpu_limit = true; + } + + pub fn cpu_time(&self) -> Duration { + self.cpu_time + } } pub struct CpuObserveGuard<'a> { diff --git a/src/coprocessor/checksum.rs b/src/coprocessor/checksum.rs index 0e6e46a9ce1..3778f549427 100644 --- a/src/coprocessor/checksum.rs +++ b/src/coprocessor/checksum.rs @@ -4,7 +4,6 @@ use api_version::{keyspace::KvPair, ApiV1}; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; -use resource_control::{with_resource_limiter, ResourceLimiter}; use tidb_query_common::storage::{ scanner::{RangesScanner, RangesScannerOptions}, Range, @@ -21,7 +20,6 @@ use crate::{ pub struct ChecksumContext { req: ChecksumRequest, scanner: RangesScanner>, ApiV1>, - resource_limiter: Option>, } impl ChecksumContext { @@ -31,7 +29,6 @@ impl ChecksumContext { start_ts: u64, snap: S, req_ctx: &ReqContext, - resource_limiter: Option>, ) -> Result { let store = SnapshotStore::new( snap, @@ -52,14 +49,13 @@ impl ChecksumContext { is_key_only: false, is_scanned_range_aware: false, }); - Ok(Self { - req, - scanner, - resource_limiter, - }) + Ok(Self { req, scanner }) } +} - async fn do_handle_request(&mut self) -> Result> { +#[async_trait] +impl RequestHandler for ChecksumContext { + async fn handle_request(&mut self) -> Result> { let algorithm = self.req.get_algorithm(); if algorithm != ChecksumAlgorithm::Crc64Xor { return Err(box_err!("unknown checksum algorithm {:?}", algorithm)); @@ -99,14 +95,6 @@ impl ChecksumContext { resp.set_data(data); Ok(resp.into()) } -} - -#[async_trait] -impl RequestHandler for ChecksumContext { - async fn handle_request(&mut self) -> Result> { - let limiter = self.resource_limiter.clone(); - with_resource_limiter(self.do_handle_request(), limiter).await - } fn collect_scan_statistics(&mut self, dest: &mut Statistics) { self.scanner.collect_storage_stats(dest) diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index 62a3a53511a..31a6df181d5 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -8,7 +8,6 @@ use api_version::KvFormat; use async_trait::async_trait; use kvproto::coprocessor::{KeyRange, Response}; use protobuf::Message; -use resource_control::ResourceLimiter; use tidb_query_common::{execute_stats::ExecSummary, storage::IntervalRange}; use tikv_alloc::trace::MemoryTraceGuard; use tipb::{DagRequest, SelectResponse, StreamResponse}; @@ -31,7 +30,6 @@ pub struct DagHandlerBuilder { is_cache_enabled: bool, paging_size: Option, quota_limiter: Arc, - resource_limiter: Option>, _phantom: PhantomData, } @@ -46,7 +44,6 @@ impl DagHandlerBuilder { is_cache_enabled: bool, paging_size: Option, quota_limiter: Arc, - resource_limiter: Option>, ) -> Self { DagHandlerBuilder { req, @@ -59,7 +56,6 @@ impl DagHandlerBuilder { is_cache_enabled, paging_size, quota_limiter, - resource_limiter, _phantom: PhantomData, } } @@ -83,7 +79,6 @@ impl DagHandlerBuilder { self.is_streaming, self.paging_size, self.quota_limiter, - self.resource_limiter, )? .into_boxed()) } @@ -106,7 +101,6 @@ impl BatchDagHandler { is_streaming: bool, paging_size: Option, quota_limiter: Arc, - resource_limiter: Option>, ) -> Result { Ok(Self { runner: tidb_query_executors::runner::BatchExecutorsRunner::from_request::<_, F>( @@ -118,7 +112,6 @@ impl BatchDagHandler { is_streaming, paging_size, quota_limiter, - resource_limiter, )?, data_version, }) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index cdebf83fdf6..c517e6fb08a 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -190,18 +190,9 @@ impl Endpoint { let mut input = CodedInputStream::from_bytes(&data); input.set_recursion_limit(self.recursion_limit); - let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { - r.get_resource_limiter( - context - .get_resource_control_context() - .get_resource_group_name(), - context.get_request_source(), - ) - }); let mut req_ctx: ReqContext; let builder: RequestHandlerBuilder; - match req.get_tp() { REQ_TYPE_DAG => { let mut dag = DagRequest::default(); @@ -270,7 +261,6 @@ impl Endpoint { req.get_is_cache_enabled(), paging_size, quota_limiter, - resource_limiter, ) .data_version(data_version) .build() @@ -317,7 +307,6 @@ impl Endpoint { snap, req_ctx, quota_limiter, - resource_limiter, ) .map(|h| h.into_boxed()) }); @@ -363,7 +352,6 @@ impl Endpoint { start_ts, snap, req_ctx, - resource_limiter, ) .map(|h| h.into_boxed()) }); @@ -502,6 +490,15 @@ impl Endpoint { .resource_tag_factory .new_tag_with_key_ranges(&req_ctx.context, key_ranges); let metadata = TaskMetadata::from_ctx(req_ctx.context.get_resource_control_context()); + let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { + r.get_resource_limiter( + req_ctx + .context + .get_resource_control_context() + .get_resource_group_name(), + req_ctx.context.get_request_source(), + ) + }); // box the tracker so that moving it is cheap. let tracker = Box::new(Tracker::new(req_ctx, self.slow_log_threshold)); @@ -513,6 +510,7 @@ impl Endpoint { priority, task_id, metadata, + resource_limiter, ) .map_err(|_| Error::MaxPendingTasksExceeded); async move { res.await? } @@ -737,6 +735,15 @@ impl Endpoint { let (tx, rx) = mpsc::channel::>(self.stream_channel_size); let priority = req_ctx.context.get_priority(); let metadata = TaskMetadata::from_ctx(req_ctx.context.get_resource_control_context()); + let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { + r.get_resource_limiter( + req_ctx + .context + .get_resource_control_context() + .get_resource_group_name(), + req_ctx.context.get_request_source(), + ) + }); let key_ranges = req_ctx .ranges .iter() @@ -760,6 +767,7 @@ impl Endpoint { priority, task_id, metadata, + resource_limiter, ) .map_err(|_| Error::MaxPendingTasksExceeded)?; Ok(rx) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 3d96240b26d..a49ac72398e 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -10,7 +10,6 @@ use kvproto::coprocessor::{KeyRange, Response}; use mur3::Hasher128; use protobuf::Message; use rand::{rngs::StdRng, Rng}; -use resource_control::{with_resource_limiter, ResourceLimiter}; use tidb_query_common::storage::{ scanner::{RangesScanner, RangesScannerOptions}, Range, @@ -52,7 +51,6 @@ pub struct AnalyzeContext { ranges: Vec, storage_stats: Statistics, quota_limiter: Arc, - resource_limiter: Option>, is_auto_analyze: bool, _phantom: PhantomData, } @@ -65,7 +63,6 @@ impl AnalyzeContext { snap: S, req_ctx: &ReqContext, quota_limiter: Arc, - resource_limiter: Option>, ) -> Result { let store = SnapshotStore::new( snap, @@ -84,7 +81,6 @@ impl AnalyzeContext { ranges, storage_stats: Statistics::default(), quota_limiter, - resource_limiter, is_auto_analyze, _phantom: PhantomData, }) @@ -236,13 +232,10 @@ impl RequestHandler for AnalyzeContext { is_key_only: true, is_scanned_range_aware: false, }); - let res = with_resource_limiter( - AnalyzeContext::handle_index( - req, - &mut scanner, - self.req.get_tp() == AnalyzeType::TypeCommonHandle, - ), - self.resource_limiter.clone(), + let res = AnalyzeContext::handle_index( + req, + &mut scanner, + self.req.get_tp() == AnalyzeType::TypeCommonHandle, ) .await; scanner.collect_storage_stats(&mut self.storage_stats); @@ -254,11 +247,7 @@ impl RequestHandler for AnalyzeContext { let storage = self.storage.take().unwrap(); let ranges = std::mem::take(&mut self.ranges); let mut builder = SampleBuilder::<_, F>::new(col_req, None, storage, ranges)?; - let res = with_resource_limiter( - AnalyzeContext::handle_column(&mut builder), - self.resource_limiter.clone(), - ) - .await; + let res = AnalyzeContext::handle_column(&mut builder).await; builder.data.collect_storage_stats(&mut self.storage_stats); res } @@ -271,11 +260,7 @@ impl RequestHandler for AnalyzeContext { let ranges = std::mem::take(&mut self.ranges); let mut builder = SampleBuilder::<_, F>::new(col_req, Some(idx_req), storage, ranges)?; - let res = with_resource_limiter( - AnalyzeContext::handle_mixed(&mut builder), - self.resource_limiter.clone(), - ) - .await; + let res = AnalyzeContext::handle_mixed(&mut builder).await; builder.data.collect_storage_stats(&mut self.storage_stats); res } @@ -293,11 +278,7 @@ impl RequestHandler for AnalyzeContext { self.is_auto_analyze, )?; - let res = with_resource_limiter( - AnalyzeContext::handle_full_sampling(&mut builder), - self.resource_limiter.clone(), - ) - .await; + let res = AnalyzeContext::handle_full_sampling(&mut builder).await; builder.data.collect_storage_stats(&mut self.storage_stats); res } diff --git a/src/read_pool.rs b/src/read_pool.rs index 3f61374b07f..a5898ea4f63 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -16,7 +16,9 @@ use futures::{channel::oneshot, future::TryFutureExt}; use kvproto::{errorpb, kvrpcpb::CommandPri}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use prometheus::{core::Metric, Histogram, IntCounter, IntGauge}; -use resource_control::{ControlledFuture, ResourceController, TaskMetadata}; +use resource_control::{ + with_resource_limiter, ControlledFuture, ResourceController, ResourceLimiter, TaskMetadata, +}; use thiserror::Error; use tikv_util::{ sys::{cpu_time::ProcessStat, SysQuota}, @@ -121,6 +123,7 @@ impl ReadPoolHandle { priority: CommandPri, task_id: u64, metadata: TaskMetadata<'_>, + resource_limiter: Option>, ) -> Result<(), ReadPoolError> where F: Future + Send + 'static, @@ -166,13 +169,16 @@ impl ReadPoolHandle { extras.set_metadata(metadata.to_vec()); let task_cell = if let Some(resource_ctl) = resource_ctl { TaskCell::new( - TrackedFuture::new(ControlledFuture::new( - async move { - f.await; - running_tasks.dec(); - }, - resource_ctl.clone(), - group_name, + TrackedFuture::new(with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + running_tasks.dec(); + }, + resource_ctl.clone(), + group_name, + ), + resource_limiter, )), extras, ) @@ -197,6 +203,7 @@ impl ReadPoolHandle { priority: CommandPri, task_id: u64, metadata: TaskMetadata<'_>, + resource_limiter: Option>, ) -> impl Future> where F: Future + Send + 'static, @@ -211,6 +218,7 @@ impl ReadPoolHandle { priority, task_id, metadata, + resource_limiter, ); async move { res?; @@ -806,14 +814,14 @@ mod tests { let (task4, _tx4) = gen_task(); handle - .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default(), None) .unwrap(); handle - .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default(), None) .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default()) { + match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -821,7 +829,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); handle - .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default()) + .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default(), None) .unwrap(); } @@ -855,14 +863,14 @@ mod tests { let (task5, _tx5) = gen_task(); handle - .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default(), None) .unwrap(); handle - .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default(), None) .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default()) { + match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -871,11 +879,11 @@ mod tests { assert_eq!(handle.get_normal_pool_size(), 3); handle - .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default()) + .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default(), None) .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default()) { + match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -911,14 +919,14 @@ mod tests { let (task5, _tx5) = gen_task(); handle - .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default(), None) .unwrap(); handle - .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default(), None) .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default()) { + match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -931,11 +939,11 @@ mod tests { assert_eq!(handle.get_normal_pool_size(), 1); handle - .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default()) + .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default(), None) .unwrap(); thread::sleep(Duration::from_millis(300)); - match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default()) { + match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -1036,10 +1044,10 @@ mod tests { let (task2, tx2) = gen_task(); handle - .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default()) + .spawn(task1, CommandPri::Normal, 1, TaskMetadata::default(), None) .unwrap(); handle - .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default()) + .spawn(task2, CommandPri::Normal, 2, TaskMetadata::default(), None) .unwrap(); tx1.send(()).unwrap(); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 747e2aed8b8..e2bd23f6bb0 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -192,11 +192,11 @@ macro_rules! handle_request { handle_request!($fn_name, $future_name, $req_ty, $resp_ty, no_time_detail); }; ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident, $time_detail: tt) => { - fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { + fn $fn_name(&mut self, ctx: RpcContext<'_>, req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -417,12 +417,12 @@ impl Tikv for Service { fn kv_prepare_flashback_to_version( &mut self, ctx: RpcContext<'_>, - mut req: PrepareFlashbackToVersionRequest, + req: PrepareFlashbackToVersionRequest, sink: UnarySink, ) { let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = future_prepare_flashback_to_version(self.storage.clone(), req); let task = async move { let resp = resp.await?; @@ -448,12 +448,12 @@ impl Tikv for Service { fn kv_flashback_to_version( &mut self, ctx: RpcContext<'_>, - mut req: FlashbackToVersionRequest, + req: FlashbackToVersionRequest, sink: UnarySink, ) { let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = future_flashback_to_version(self.storage.clone(), req); let task = async move { let resp = resp.await?; @@ -513,10 +513,10 @@ impl Tikv for Service { fn raw_coprocessor( &mut self, ctx: RpcContext<'_>, - mut req: RawCoprocessorRequest, + req: RawCoprocessorRequest, sink: UnarySink, ) { - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -561,7 +561,7 @@ impl Tikv for Service { assert!(!req.get_start_key().is_empty()); assert!(!req.get_end_key().is_empty()); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let (cb, f) = paired_future_callback(); let res = self.gc_worker.unsafe_destroy_range( req.take_context(), @@ -1149,7 +1149,7 @@ fn handle_batch_commands_request( let resp = future::ok(batch_commands_response::Response::default()); response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, - Some(batch_commands_request::request::Cmd::Get(mut req)) => { + Some(batch_commands_request::request::Cmd::Get(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -1163,14 +1163,14 @@ fn handle_batch_commands_request( batcher.as_mut().unwrap().add_get_request(req, id); } else { let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = future_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::Get)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.kv_get.inc()); response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); } }, - Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { + Some(batch_commands_request::request::Cmd::RawGet(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -1184,7 +1184,7 @@ fn handle_batch_commands_request( batcher.as_mut().unwrap().add_raw_get_request(req, id); } else { let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = future_raw_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::RawGet)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.raw_get.inc()); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 36c1f648691..2c5fe7dc750 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -90,7 +90,7 @@ use kvproto::{ use pd_client::FeatureGate; use raftstore::store::{util::build_key_range, ReadStats, TxnExt, WriteStats}; use rand::prelude::*; -use resource_control::{ResourceController, TaskMetadata}; +use resource_control::{ResourceController, ResourceGroupManager, ResourceLimiter, TaskMetadata}; use resource_metering::{FutureExt, ResourceTagFactory}; use tikv_kv::{OnAppliedCb, SnapshotExt}; use tikv_util::{ @@ -211,6 +211,7 @@ pub struct Storage { causal_ts_provider: Option>, quota_limiter: Arc, + resource_manager: Option>, _phantom: PhantomData, } @@ -235,6 +236,7 @@ impl Clone for Storage { causal_ts_provider: self.causal_ts_provider.clone(), resource_tag_factory: self.resource_tag_factory.clone(), quota_limiter: self.quota_limiter.clone(), + resource_manager: self.resource_manager.clone(), _phantom: PhantomData, } } @@ -273,6 +275,7 @@ impl Storage { feature_gate: FeatureGate, causal_ts_provider: Option>, resource_ctl: Option>, + resource_manager: Option>, ) -> Result { assert_eq!(config.api_version(), F::TAG, "Api version not match"); @@ -289,6 +292,7 @@ impl Storage { Arc::clone("a_limiter), feature_gate, resource_ctl, + resource_manager.clone(), ); info!("Storage started."); @@ -304,6 +308,7 @@ impl Storage { causal_ts_provider, resource_tag_factory, quota_limiter, + resource_manager, _phantom: PhantomData, }) } @@ -599,6 +604,12 @@ impl Storage { const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -735,6 +746,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -756,6 +768,15 @@ impl Storage { let priority = requests[0].get_context().get_priority(); let metadata = TaskMetadata::from_ctx(requests[0].get_context().get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + requests[0] + .get_context() + .get_resource_control_context() + .get_resource_group_name(), + requests[0].get_context().get_request_source(), + ) + }); let concurrency_manager = self.concurrency_manager.clone(); let api_version = self.api_version; let busy_threshold = @@ -920,6 +941,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -936,6 +958,12 @@ impl Storage { const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let key_ranges = keys .iter() @@ -1092,6 +1120,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -1115,6 +1144,12 @@ impl Storage { const CMD: CommandKind = CommandKind::scan; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1267,6 +1302,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -1281,6 +1317,12 @@ impl Storage { const CMD: CommandKind = CommandKind::scan_lock; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag_with_key_ranges( &ctx, @@ -1411,6 +1453,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ); async move { res.map_err(|_| Error::from(ErrorInner::SchedTooBusy)) @@ -1587,6 +1630,12 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_get; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -1652,6 +1701,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -1666,6 +1716,15 @@ impl Storage { // all requests in a batch have the same region, epoch, term, replica_read let priority = gets[0].get_context().get_priority(); let metadata = TaskMetadata::from_ctx(gets[0].get_context().get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + gets[0] + .get_context() + .get_resource_control_context() + .get_resource_group_name(), + gets[0].get_context().get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let api_version = self.api_version; let busy_threshold = Duration::from_millis(gets[0].get_context().busy_threshold_ms as u64); @@ -1782,6 +1841,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -1795,6 +1855,12 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_batch_get; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let key_ranges = keys.iter().map(|k| (k.clone(), k.clone())).collect(); let resource_tag = self @@ -1878,6 +1944,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -2291,6 +2358,12 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_scan; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let resource_tag = self.resource_tag_factory.new_tag(&ctx); let api_version = self.api_version; @@ -2402,6 +2475,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -2418,6 +2492,12 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_batch_scan; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2557,6 +2637,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -2570,6 +2651,12 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_get_key_ttl; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let resource_tag = self .resource_tag_factory @@ -2635,6 +2722,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ) } @@ -2741,6 +2829,12 @@ impl Storage { const CMD: CommandKind = CommandKind::raw_checksum; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); + let resource_limiter = self.resource_manager.as_ref().and_then(|r| { + r.get_resource_limiter( + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ) + }); let priority_tag = get_priority_tag(priority); let key_ranges = ranges .iter() @@ -2816,6 +2910,7 @@ impl Storage { priority, thread_rng().next_u64(), metadata, + resource_limiter, ); async move { @@ -2831,6 +2926,7 @@ impl Storage { priority: CommandPri, task_id: u64, metadata: TaskMetadata<'_>, + resource_limiter: Option>, ) -> impl Future> where Fut: Future> + Send + 'static, @@ -2843,7 +2939,7 @@ impl Storage { } Either::Right( self.read_pool - .spawn_handle(future, priority, task_id, metadata) + .spawn_handle(future, priority, task_id, metadata, resource_limiter) .map_err(|_| Error::from(ErrorInner::SchedTooBusy)) .and_then(|res| future::ready(res)), ) @@ -3199,6 +3295,10 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), ts_provider, + Some(Arc::new(ResourceController::new_for_test( + "test".to_owned(), + false, + ))), None, ) } @@ -3234,6 +3334,7 @@ impl TestStorageBuilder { "test".to_owned(), false, ))), + None, ) } @@ -3268,6 +3369,7 @@ impl TestStorageBuilder { latest_feature_gate(), None, Some(resource_controller), + None, ) } } diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index ff0aa886a0e..04116435612 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -47,7 +47,7 @@ use kvproto::{ use parking_lot::{Mutex, MutexGuard, RwLockWriteGuard}; use pd_client::{Feature, FeatureGate}; use raftstore::store::TxnExt; -use resource_control::{ResourceController, TaskMetadata}; +use resource_control::{ResourceController, ResourceGroupManager, TaskMetadata}; use resource_metering::{FutureExt, ResourceTagFactory}; use smallvec::{smallvec, SmallVec}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData, WriteEvent}; @@ -99,6 +99,10 @@ pub const DEFAULT_EXECUTION_DURATION_LIMIT: Duration = Duration::from_secs(24 * const IN_MEMORY_PESSIMISTIC_LOCK: Feature = Feature::require(6, 0, 0); pub const LAST_CHANGE_TS: Feature = Feature::require(6, 5, 0); +// we only do resource control in txn scheduler, so the cpu time tracked is much +// less than the actual cost, so we increase it by a factor. +const SCHEDULER_CPU_TIME_FACTOR: u32 = 5; + type SVec = SmallVec<[T; 4]>; /// Task is a running command. @@ -286,6 +290,7 @@ struct TxnSchedulerInner { lock_wait_queues: LockWaitQueues, quota_limiter: Arc, + resource_manager: Option>, feature_gate: FeatureGate, } @@ -441,6 +446,7 @@ impl TxnScheduler { quota_limiter: Arc, feature_gate: FeatureGate, resource_ctl: Option>, + resource_manager: Option>, ) -> Self { let t = Instant::now_coarse(); let mut task_slots = Vec::with_capacity(TASKS_SLOTS_NUM); @@ -475,6 +481,7 @@ impl TxnScheduler { resource_tag_factory, lock_wait_queues, quota_limiter, + resource_manager, feature_gate, }); @@ -1205,7 +1212,19 @@ impl TxnScheduler { let tracker = task.tracker; let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); + let resource_limiter = self.inner.resource_manager.as_ref().and_then(|m| { + m.get_resource_limiter( + task.cmd + .ctx() + .get_resource_control_context() + .get_resource_group_name(), + task.cmd.ctx().get_request_source(), + ) + }); let mut sample = quota_limiter.new_sample(true); + if resource_limiter.is_some() { + sample.enable_cpu_limit(); + } let pessimistic_lock_mode = self.pessimistic_lock_mode(); let pipelined = task.cmd.can_be_pipelined() && pessimistic_lock_mode == PessimisticLockMode::Pipelined; @@ -1259,6 +1278,14 @@ impl TxnScheduler { // TODO: write bytes can be a bit inaccurate due to error requests or in-memory // pessimistic locks. sample.add_write_bytes(write_bytes); + // estimate the cpu time for write by the schdule cpu time and write bytes + let expected_dur = (sample.cpu_time() + Duration::from_micros(write_bytes as u64)) + * SCHEDULER_CPU_TIME_FACTOR; + if let Some(limiter) = resource_limiter { + limiter + .async_consume(expected_dur, write_bytes as u64) + .await; + } } let read_bytes = sched_details .stat @@ -1984,6 +2011,7 @@ mod tests { "test".to_owned(), true, ))), + None, ), engine, ) @@ -2339,6 +2367,7 @@ mod tests { "test".to_owned(), true, ))), + None, ); // Use sync mode if pipelined_pessimistic_lock is false. assert_eq!(scheduler.pessimistic_lock_mode(), PessimisticLockMode::Sync); diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 9737d6a2160..0a5708c74ce 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -52,7 +52,6 @@ pub fn build_dag_handler( false, None, Arc::new(QuotaLimiter::default()), - None, ) .build() .unwrap() From 0ad33d428e85cb59d7381f791f47428713ce16e5 Mon Sep 17 00:00:00 2001 From: ekexium Date: Thu, 27 Jul 2023 13:42:05 +0800 Subject: [PATCH 0822/1149] metric: fix resolved-ts row (#15218) ref tikv/tikv#15082 Fix the unintentional changes in #15118: let resolved-ts row be collapsed by default, and put panels in it. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.json | 3487 +++++++++++++++-------------- 1 file changed, 1744 insertions(+), 1743 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 060d2e83d01..ce5571f9657 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -21249,7 +21249,7 @@ "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 26 }, "id": 2755, "panels": [ @@ -21775,7 +21775,7 @@ "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 27 }, "id": 2758, "panels": [ @@ -23115,7 +23115,7 @@ "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 28 }, "id": 2759, "panels": [ @@ -24015,7 +24015,7 @@ "h": 1, "w": 24, "x": 0, - "y": 33 + "y": 29 }, "id": 2760, "panels": [ @@ -24427,7 +24427,7 @@ "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 30 }, "id": 2757, "panels": [ @@ -25231,7 +25231,7 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 31 }, "id": 3197, "panels": [ @@ -26325,7 +26325,7 @@ "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 32 }, "id": 2761, "panels": [ @@ -26722,7 +26722,7 @@ "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 33 }, "id": 2762, "panels": [ @@ -31462,7 +31462,7 @@ "h": 1, "w": 24, "x": 0, - "y": 38 + "y": 34 }, "id": 12802, "panels": [ @@ -32511,7 +32511,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 35 }, "id": 3301, "panels": [ @@ -35520,7 +35520,7 @@ "h": 1, "w": 24, "x": 0, - "y": 40 + "y": 36 }, "id": 2820, "panels": [ @@ -36652,7 +36652,7 @@ "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 37 }, "id": 23763573235, "panels": [ @@ -38082,1745 +38082,1746 @@ "type": "row" }, { - "collapsed": false, + "collapsed": true, "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 42 + "y": 38 }, "id": 8389, - "panels": [], - "title": "Resolved-TS", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of resolved ts worker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 43 - }, - "hiddenSeries": false, - "id": 8385, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Resolved TS Worker CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of advance ts worker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 43 - }, - "hiddenSeries": false, - "id": 9162, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-tso", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Advance ts Worker CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of scan lock worker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 43 - }, - "hiddenSeries": false, - "id": 9164, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-scan", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Scan lock Worker CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 51 - }, - "hiddenSeries": false, - "id": 8387, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Max gap of resolved-ts", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "panels": [ { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between safe ts and current time", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 51 - }, - "hiddenSeries": false, - "id": 23763573805, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Max gap of safe-ts", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": " \tThe CPU utilization of resolved ts worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 8385, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resolved TS Worker CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that has minimal resolved ts", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 59 - }, - "hiddenSeries": false, - "id": 23763572078, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Min Resolved TS Region", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that has minimal safe ts", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 59 - }, - "hiddenSeries": false, - "id": 23763573804, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Min Safe TS Region", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handle a check leader request", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 67 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 9168, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Check leader duration", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved ts of leaders and current time", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 67 - }, - "hiddenSeries": false, - "id": 23763572077, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Max gap of resolved-ts in region leaders", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": " \tThe CPU utilization of advance ts worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 39 + }, + "hiddenSeries": false, + "id": 9162, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-tso", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Advance ts Worker CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": " \tThe CPU utilization of scan lock worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 39 + }, + "hiddenSeries": false, + "id": 9164, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-scan", + "metric": "tikv_thread_cpu_seconds_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scan lock Worker CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 8387, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max gap of resolved-ts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The gap between safe ts and current time", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 23763573805, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max gap of safe-ts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The region that has minimal resolved ts", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 55 + }, + "hiddenSeries": false, + "id": 23763572078, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Resolved TS Region", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The region that has minimal safe ts", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, + "hiddenSeries": false, + "id": 23763573804, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Safe TS Region", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed when handle a check leader request", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 9168, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Check leader duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The gap between resolved ts of leaders and current time", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 63 + }, + "hiddenSeries": false, + "id": 23763572077, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max gap of resolved-ts in region leaders", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Bucketed histogram of region count in a check leader request", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 71 + }, + "hiddenSeries": false, + "id": 12308, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", + "refId": "A", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% CheckLeader request region count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The region that its leader has minimal resolved ts.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 71 + }, + "hiddenSeries": false, + "id": 23763572079, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Min Leader Resolved TS Region", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 79 + }, + "hiddenSeries": false, + "id": 8379, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Lock heap size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The status of resolved-ts observe regions", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 79 + }, + "hiddenSeries": false, + "id": 8377, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Observe region status", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The count of fail to advance resolved-ts", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 87 + }, + "hiddenSeries": false, + "id": 9166, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}-{{reason}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Fail advance ts count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Bucketed histogram of the check leader request size", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 87 + }, + "hiddenSeries": false, + "id": 8383, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "tikv_snapshot_size_bucket", + "refId": "A", + "step": 40 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{instance}}-check-num", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "99% CheckLeader request size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of region count in a check leader request", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 75 - }, - "hiddenSeries": false, - "id": 12308, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "99% CheckLeader request region count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that its leader has minimal resolved ts.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 75 - }, - "hiddenSeries": false, - "id": 23763572079, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Min Leader Resolved TS Region", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 83 - }, - "hiddenSeries": false, - "id": 8379, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Lock heap size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The status of resolved-ts observe regions", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 83 - }, - "hiddenSeries": false, - "id": 8377, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Observe region status", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of fail to advance resolved-ts", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 91 - }, - "hiddenSeries": false, - "id": 9166, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{reason}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Fail advance ts count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of the check leader request size", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 91 - }, - "hiddenSeries": false, - "id": 8383, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{instance}}-check-num", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "99% CheckLeader request size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes of pending commands in the channel", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 99 - }, - "hiddenSeries": false, - "id": 8381, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pending command size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "Total bytes of pending commands in the channel", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 95 + }, + "hiddenSeries": false, + "id": 8381, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pending command size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Resolved-TS", + "type": "row" }, { "collapsed": true, @@ -39829,7 +39830,7 @@ "h": 1, "w": 24, "x": 0, - "y": 107 + "y": 39 }, "id": 2763, "panels": [ @@ -40253,7 +40254,7 @@ "h": 1, "w": 24, "x": 0, - "y": 108 + "y": 40 }, "id": 3922, "panels": [ @@ -43191,7 +43192,7 @@ "h": 1, "w": 24, "x": 0, - "y": 109 + "y": 41 }, "id": 4466, "panels": [ @@ -43764,7 +43765,7 @@ "h": 1, "w": 24, "x": 0, - "y": 110 + "y": 42 }, "id": 13016, "panels": [ From cf255e36eaa7fca89e9c2afafe397cbceeacf073 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 28 Jul 2023 11:15:05 +0800 Subject: [PATCH 0823/1149] resource_control: add some metrics for background task control (#15195) ref tikv/tikv#14900 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/future.rs | 2 +- components/resource_control/src/lib.rs | 2 ++ components/resource_control/src/metrics.rs | 33 +++++++++++++++++++ .../resource_control/src/resource_group.rs | 10 ++++-- .../resource_control/src/resource_limiter.rs | 26 +++++++++++---- components/resource_control/src/worker.rs | 10 ++++++ 6 files changed, 73 insertions(+), 10 deletions(-) create mode 100644 components/resource_control/src/metrics.rs diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 080c90c9f2d..93d0bb1001d 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -266,7 +266,7 @@ mod tests { .name_prefix("test") .build_future_pool(); - let resource_limiter = Arc::new(ResourceLimiter::new(f64::INFINITY, 1000.0, 0)); + let resource_limiter = Arc::new(ResourceLimiter::new("".into(), f64::INFINITY, 1000.0, 0)); fn spawn_and_wait(pool: &FuturePool, f: F, limiter: Arc) where diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 0989dc8fff3..df6496e2ed2 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -25,6 +25,8 @@ pub use channel::ResourceMetered; mod resource_limiter; pub use resource_limiter::ResourceLimiter; + +mod metrics; pub mod worker; #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] diff --git a/components/resource_control/src/metrics.rs b/components/resource_control/src/metrics.rs new file mode 100644 index 00000000000..16338f41c6c --- /dev/null +++ b/components/resource_control/src/metrics.rs @@ -0,0 +1,33 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use lazy_static::*; +use prometheus::*; + +lazy_static! { + pub static ref BACKGROUND_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( + "tikv_resource_control_background_quota_limiter", + "The quota limiter of background resource groups per resource type", + &["name", "type"] + ) + .unwrap(); + pub static ref BACKGROUND_RESOURCE_CONSUMPTION: IntCounterVec = register_int_counter_vec!( + "tikv_resource_control_background_resource_consumption", + "Total resource consumed of background resource groups per resource type", + &["name", "type"] + ) + .unwrap(); + pub static ref BACKGROUND_TASKS_WAIT_DURATION: IntCounterVec = register_int_counter_vec!( + "tikv_resource_control_background_task_wait_duration", + "Total wait duration of background tasks per resource group", + &["name"] + ) + .unwrap(); +} + +pub fn deregister_metrics(name: &str) { + for ty in ["cpu", "io"] { + _ = BACKGROUND_QUOTA_LIMIT_VEC.remove_label_values(&[name, ty]); + _ = BACKGROUND_RESOURCE_CONSUMPTION.remove_label_values(&[name, ty]); + } + _ = BACKGROUND_TASKS_WAIT_DURATION.remove_label_values(&[name]); +} diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index faad2150923..5aec1b32c46 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -25,7 +25,7 @@ use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; use tikv_util::{info, time::Instant}; use yatp::queue::priority::TaskPriorityProvider; -use crate::resource_limiter::ResourceLimiter; +use crate::{metrics::deregister_metrics, resource_limiter::ResourceLimiter}; // a read task cost at least 50us. const DEFAULT_PRIORITY_PER_READ_TASK: u64 = 50; @@ -142,6 +142,7 @@ impl ResourceGroupManager { old_limiter.or_else(|| { let version = self.version_generator.fetch_add(1, Ordering::Relaxed); Some(Arc::new(ResourceLimiter::new( + rg.name.clone(), f64::INFINITY, f64::INFINITY, version, @@ -157,8 +158,10 @@ impl ResourceGroupManager { self.registry.read().iter().for_each(|controller| { controller.remove_resource_group(group_name.as_bytes()); }); - info!("remove resource group"; "name"=> name); - self.resource_groups.remove(&group_name); + if self.resource_groups.remove(&group_name).is_some() { + deregister_metrics(name); + info!("remove resource group"; "name"=> name); + } } pub fn retain(&self, mut f: impl FnMut(&String, &PbResourceGroup) -> bool) { @@ -171,6 +174,7 @@ impl ResourceGroupManager { let ret = f(k, &v.group); if !ret { removed_names.push(k.clone()); + deregister_metrics(k); } ret }); diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index c3e1fab9a49..155b2224bb8 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -10,6 +10,8 @@ use futures::compat::Future01CompatExt; use strum::EnumCount; use tikv_util::{time::Limiter, timer::GLOBAL_TIMER_HANDLE}; +use crate::metrics::BACKGROUND_TASKS_WAIT_DURATION; + #[derive(Clone, Copy, Eq, PartialEq, EnumCount)] #[repr(usize)] pub enum ResourceType { @@ -17,16 +19,23 @@ pub enum ResourceType { Io, } -impl fmt::Debug for ResourceType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl ResourceType { + pub fn as_str(&self) -> &str { match *self { - ResourceType::Cpu => write!(f, "cpu"), - ResourceType::Io => write!(f, "io"), + ResourceType::Cpu => "cpu", + ResourceType::Io => "io", } } } +impl fmt::Debug for ResourceType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + pub struct ResourceLimiter { + name: String, version: u64, limiters: [QuotaLimiter; ResourceType::COUNT], } @@ -38,10 +47,11 @@ impl std::fmt::Debug for ResourceLimiter { } impl ResourceLimiter { - pub fn new(cpu_limit: f64, io_limit: f64, version: u64) -> Self { + pub fn new(name: String, cpu_limit: f64, io_limit: f64, version: u64) -> Self { let cpu_limiter = QuotaLimiter::new(cpu_limit); let io_limiter = QuotaLimiter::new(io_limit); Self { + name, version, limiters: [cpu_limiter, io_limiter], } @@ -51,7 +61,11 @@ impl ResourceLimiter { let cpu_dur = self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); let io_dur = self.limiters[ResourceType::Io as usize].consume(io_bytes); - cpu_dur.max(io_dur) + let wait_dur = cpu_dur.max(io_dur); + BACKGROUND_TASKS_WAIT_DURATION + .with_label_values(&[&self.name]) + .inc_by(wait_dur.as_micros() as u64); + wait_dur } pub async fn async_consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index e2372d39f46..ca1a32347ff 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -17,6 +17,7 @@ use tikv_util::{ }; use crate::{ + metrics::*, resource_group::ResourceGroupManager, resource_limiter::{GroupStatistics, ResourceLimiter, ResourceType}, }; @@ -199,6 +200,9 @@ impl GroupQuotaAdjustWorker { } else { total_stats }; + BACKGROUND_RESOURCE_CONSUMPTION + .with_label_values(&[&g.name, resource_type.as_str()]) + .inc_by(stats_delta.total_consumed); let stats_per_sec = stats_delta / dur_secs; background_consumed_total += stats_per_sec.total_consumed as f64; g.stats_per_sec = stats_per_sec; @@ -254,6 +258,9 @@ impl GroupQuotaAdjustWorker { .expect_cost_rate .max(available_resource_rate / total_ru_quota * g.ru_quota); g.limiter.get_limiter(resource_type).set_rate_limit(limit); + BACKGROUND_QUOTA_LIMIT_VEC + .with_label_values(&[&g.name, resource_type.as_str()]) + .set(limit as i64); available_resource_rate -= limit; total_ru_quota -= g.ru_quota; } @@ -271,6 +278,9 @@ impl GroupQuotaAdjustWorker { .expect_cost_rate .min(available_resource_rate / total_ru_quota * g.ru_quota); g.limiter.get_limiter(resource_type).set_rate_limit(limit); + BACKGROUND_QUOTA_LIMIT_VEC + .with_label_values(&[&g.name, resource_type.as_str()]) + .set(limit as i64); available_resource_rate -= limit; total_ru_quota -= g.ru_quota; } From e8238777ea52c99aa7a47a0dbc1748af49fc1b87 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 28 Jul 2023 11:28:35 +0800 Subject: [PATCH 0824/1149] pd_client: reduce store heartbeat retires to prevent heartbeat storm (#15191) ref tikv/pd#6556, close tikv/tikv#15184 The store heartbeat will report periodically, no need to do retires - do not retry the store heartbeat - change `remain_reconnect_count` as `remain_request_count` - fix some metrics Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/client.rs | 18 +++++++++------- components/pd_client/src/metrics.rs | 1 + components/pd_client/src/util.rs | 32 ++++++++++++++++++----------- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 6aeecc3bf65..c54222ae87f 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -325,6 +325,8 @@ impl fmt::Debug for RpcClient { } const LEADER_CHANGE_RETRY: usize = 10; +// periodic request like store_heartbeat, we don't need to retry. +const NO_RETRY: usize = 1; impl PdClient for RpcClient { fn store_global_config( @@ -870,10 +872,14 @@ impl PdClient for RpcClient { }) }; Box::pin(async move { - let resp = handler.await?; - PD_REQUEST_HISTOGRAM_VEC - .store_heartbeat - .observe(timer.saturating_elapsed_secs()); + let resp = handler + .map(|res| { + PD_REQUEST_HISTOGRAM_VEC + .store_heartbeat + .observe(timer.saturating_elapsed_secs()); + res + }) + .await?; check_resp_header(resp.get_header())?; match feature_gate.set_version(resp.get_cluster_version()) { Err(_) => warn!("invalid cluster version: {}", resp.get_cluster_version()), @@ -884,9 +890,7 @@ impl PdClient for RpcClient { }) as PdFuture<_> }; - self.pd_client - .request(req, executor, LEADER_CHANGE_RETRY) - .execute() + self.pd_client.request(req, executor, NO_RETRY).execute() } fn report_batch_split(&self, regions: Vec) -> PdFuture<()> { diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index 77f2e990e0f..d92e334396a 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -33,6 +33,7 @@ make_static_metric! { store_heartbeat, tso, scan_regions, + get_members, meta_storage_put, meta_storage_get, diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 6fd8aac679f..2e7567f6fa0 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -312,7 +312,7 @@ impl Client { F: FnMut(&Client, Req) -> PdFuture + Send + 'static, { Request { - remain_reconnect_count: retry, + remain_request_count: retry, request_sent: 0, client: self.clone(), req, @@ -404,7 +404,7 @@ impl Client { /// The context of sending requets. pub struct Request { - remain_reconnect_count: usize, + remain_request_count: usize, request_sent: usize, client: Arc, req: Req, @@ -419,15 +419,11 @@ where F: FnMut(&Client, Req) -> PdFuture + Send + 'static, { async fn reconnect_if_needed(&mut self) -> Result<()> { - debug!("reconnecting ..."; "remain" => self.remain_reconnect_count); - if self.request_sent < MAX_REQUEST_COUNT { + debug!("reconnecting ..."; "remain" => self.remain_request_count); + if self.request_sent < MAX_REQUEST_COUNT && self.request_sent < self.remain_request_count { return Ok(()); } - if self.remain_reconnect_count == 0 { - return Err(box_err!("request retry exceeds limit")); - } // Updating client. - self.remain_reconnect_count -= 1; // FIXME: should not block the core. debug!("(re)connecting PD client"); match self.client.reconnect(true).await { @@ -447,18 +443,22 @@ where } async fn send_and_receive(&mut self) -> Result { + if self.remain_request_count == 0 { + return Err(box_err!("request retry exceeds limit")); + } self.request_sent += 1; + self.remain_request_count -= 1; debug!("request sent: {}", self.request_sent); let r = self.req.clone(); (self.func)(&self.client, r).await } - fn should_not_retry(resp: &Result) -> bool { + fn should_not_retry(&self, resp: &Result) -> bool { match resp { Ok(_) => true, Err(err) => { // these errors are not caused by network, no need to retry - if err.retryable() { + if err.retryable() && self.remain_request_count > 0 { error!(?*err; "request failed, retry"); false } else { @@ -475,7 +475,7 @@ where loop { { let resp = self.send_and_receive().await; - if Self::should_not_retry(&resp) { + if self.should_not_retry(&resp) { return resp; } } @@ -621,10 +621,14 @@ impl PdConnector { }); let client = PdClientStub::new(channel.clone()); let option = CallOption::default().timeout(Duration::from_secs(REQUEST_TIMEOUT)); + let timer = Instant::now(); let response = client .get_members_async_opt(&GetMembersRequest::default(), option) .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "get_members", e)) .await; + PD_REQUEST_HISTOGRAM_VEC + .get_members + .observe(timer.saturating_elapsed_secs()); match response { Ok(resp) => Ok((client, resp)), Err(e) => Err(Error::Grpc(e)), @@ -789,7 +793,7 @@ impl PdConnector { Ok((None, has_network_error)) } - pub async fn reconnect_leader( + async fn reconnect_leader( &self, leader: &Member, ) -> Result<(Option<(PdClientStub, String)>, bool)> { @@ -835,6 +839,7 @@ impl PdConnector { let client_urls = leader.get_client_urls(); for leader_url in client_urls { let target = TargetInfo::new(leader_url.clone(), &ep); + let timer = Instant::now(); let response = client .get_members_async_opt( &GetMembersRequest::default(), @@ -846,6 +851,9 @@ impl PdConnector { panic!("fail to request PD {} err {:?}", "get_members", e) }) .await; + PD_REQUEST_HISTOGRAM_VEC + .get_members + .observe(timer.saturating_elapsed_secs()); match response { Ok(_) => return Ok(Some((client, target))), Err(_) => continue, From cceebf942b0b35b03e5d1366b8841f0c8a7a32e6 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 28 Jul 2023 11:43:35 +0800 Subject: [PATCH 0825/1149] resource_control: enable resource control for raft-v2 (#15214) ref tikv/tikv#13730 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/batch/store.rs | 16 ++++++++++++---- .../raftstore-v2/tests/integrations/cluster.rs | 10 ++++++++++ components/server/src/server2.rs | 9 ++++++++- components/test_raftstore-v2/src/node.rs | 11 +++++++++-- components/test_raftstore-v2/src/server.rs | 9 ++++++++- src/server/raftkv2/node.rs | 13 +++++++++++-- 6 files changed, 58 insertions(+), 10 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 506b9d531c0..4c142a43abf 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -39,6 +39,7 @@ use raftstore::{ WriteRouterContext, WriteSenders, WriterContoller, }, }; +use resource_control::ResourceController; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; use slog::{warn, Logger}; @@ -620,14 +621,19 @@ struct Workers { } impl Workers { - fn new(background: Worker, pd: LazyWorker, purge: Option) -> Self { + fn new( + background: Worker, + pd: LazyWorker, + purge: Option, + resource_control: Option>, + ) -> Self { let checkpoint = Builder::new("checkpoint-worker").thread_count(2).create(); Self { async_read: Worker::new("async-read-worker"), pd, tablet: Worker::new("tablet-worker"), checkpoint, - async_write: StoreWriters::new(None), + async_write: StoreWriters::new(resource_control), purge, cleanup_worker: Worker::new("cleanup-worker"), refresh_config_worker: LazyWorker::new("refreash-config-worker"), @@ -686,6 +692,7 @@ impl StoreSystem { sst_importer: Arc, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, + resource_ctl: Option>, ) -> Result<()> where T: Transport + 'static, @@ -778,7 +785,7 @@ impl StoreSystem { None }; - let mut workers = Workers::new(background, pd_worker, purge_worker); + let mut workers = Workers::new(background, pd_worker, purge_worker, resource_ctl); workers .async_write .spawn(store_id, raft_engine.clone(), None, router, &trans, &cfg)?; @@ -1026,6 +1033,7 @@ pub fn create_store_batch_system( cfg: &Config, store_id: u64, logger: Logger, + resource_ctl: Option>, ) -> (StoreRouter, StoreSystem) where EK: KvEngine, @@ -1033,7 +1041,7 @@ where { let (store_tx, store_fsm) = StoreFsm::new(cfg, store_id, logger.clone()); let (router, system) = - batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm, None); + batch_system::create_system(&cfg.store_batch_system, store_tx, store_fsm, resource_ctl); let system = StoreSystem { system, workers: None, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 67b8eacd6a9..5b3cc5feb93 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -44,6 +44,7 @@ use raftstore_v2::{ router::{DebugInfoChannel, FlushChannel, PeerMsg, QueryResult, RaftRouter, StoreMsg}, Bootstrap, SimpleWriteEncoder, StateStorage, StoreSystem, }; +use resource_control::{ResourceController, ResourceGroupManager}; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; use slog::{debug, o, Logger}; @@ -263,6 +264,7 @@ impl RunningState { concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, logger: &Logger, + resource_ctl: Arc, ) -> (TestRouter, Self) { let encryption_cfg = test_util::new_file_security_config(path); let key_manager = Some(Arc::new( @@ -286,6 +288,7 @@ impl RunningState { &cfg.value(), store_id, logger.clone(), + Some(resource_ctl.clone()), ); let cf_opts = DATA_CFS .iter() @@ -355,6 +358,7 @@ impl RunningState { importer, key_manager, GrpcServiceManager::dummy(), + Some(resource_ctl), ) .unwrap(); @@ -385,6 +389,7 @@ pub struct TestNode { path: TempDir, running_state: Option, logger: Logger, + resource_manager: Arc, } impl TestNode { @@ -396,6 +401,7 @@ impl TestNode { path, running_state: None, logger, + resource_manager: Arc::new(ResourceGroupManager::default()), } } @@ -405,6 +411,9 @@ impl TestNode { cop_cfg: Arc>, trans: TestTransport, ) -> TestRouter { + let resource_ctl = self + .resource_manager + .derive_controller("test-raft".into(), false); let (router, state) = RunningState::new( &self.pd_client, self.path.path(), @@ -414,6 +423,7 @@ impl TestNode { ConcurrencyManager::new(1.into()), None, &self.logger, + resource_ctl, ); self.running_state = Some(state); router diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 6a84e98be1e..8793442434e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1470,7 +1470,14 @@ impl TikvServer { .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); - let mut node = NodeV2::new(&self.core.config.server, self.pd_client.clone(), None); + let mut node = NodeV2::new( + &self.core.config.server, + self.pd_client.clone(), + None, + self.resource_manager + .as_ref() + .map(|r| r.derive_controller("raft-v2".into(), false)), + ); node.try_bootstrap_store(&self.core.config.raft_store, &raft_engine) .unwrap_or_else(|e| fatal!("failed to bootstrap store: {:?}", e)); assert_ne!(node.id(), 0); diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 18d99ae8f4c..d63ca0aa2f2 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -205,7 +205,7 @@ impl Simulator for NodeCluster { key_manager: Option>, raft_engine: RaftTestEngine, tablet_registry: TabletRegistry, - _resource_manager: &Option>, + resource_manager: &Option>, ) -> ServerResult { assert!(!self.nodes.contains_key(&node_id)); let pd_worker = LazyWorker::new("test-pd-worker"); @@ -222,7 +222,14 @@ impl Simulator for NodeCluster { ) .unwrap(); - let mut node = NodeV2::new(&cfg.server, self.pd_client.clone(), None); + let mut node = NodeV2::new( + &cfg.server, + self.pd_client.clone(), + None, + resource_manager + .as_ref() + .map(|r| r.derive_controller("raft-v2".into(), false)), + ); node.try_bootstrap_store(&raft_store, &raft_engine).unwrap(); assert_eq!(node.id(), node_id); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 70c20943279..7b5d501a59f 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -386,7 +386,14 @@ impl ServerCluster { ) .unwrap(); - let mut node = NodeV2::new(&cfg.server, self.pd_client.clone(), None); + let mut node = NodeV2::new( + &cfg.server, + self.pd_client.clone(), + None, + resource_manager + .as_ref() + .map(|r| r.derive_controller("raft-v2".into(), false)), + ); node.try_bootstrap_store(&raft_store, &raft_engine).unwrap(); assert_eq!(node.id(), node_id); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 71b0bf115d9..d9b17c5d35c 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -16,6 +16,7 @@ use raftstore::{ }, }; use raftstore_v2::{router::RaftRouter, Bootstrap, PdTask, StoreRouter, StoreSystem}; +use resource_control::ResourceController; use resource_metering::CollectorRegHandle; use service::service_manager::GrpcServiceManager; use slog::{info, o, Logger}; @@ -36,6 +37,7 @@ pub struct NodeV2 { pd_client: Arc, logger: Logger, + resource_ctl: Option>, } impl NodeV2 @@ -49,6 +51,7 @@ where cfg: &crate::server::Config, pd_client: Arc, store: Option, + resource_ctl: Option>, ) -> NodeV2 { let store = init_store(store, cfg); @@ -59,6 +62,7 @@ where system: None, has_started: false, logger: slog_global::borrow_global().new(o!()), + resource_ctl, } } @@ -76,8 +80,12 @@ where .bootstrap_store()?; self.store.set_id(store_id); - let (router, system) = - raftstore_v2::create_store_batch_system(cfg, store_id, self.logger.clone()); + let (router, system) = raftstore_v2::create_store_batch_system( + cfg, + store_id, + self.logger.clone(), + self.resource_ctl.clone(), + ); self.system = Some((router, system)); Ok(()) } @@ -247,6 +255,7 @@ where sst_importer, key_manager, grpc_service_mgr, + self.resource_ctl.clone(), )?; Ok(()) } From c010c9a2b9cec5b7d247d8c8df9ee8768034426c Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 28 Jul 2023 11:58:35 +0800 Subject: [PATCH 0826/1149] storage: skip the lock and rollback record in scan_latest_user_keys (#15220) close tikv/tikv#15219 Skip the `WriteType:Lock` and `WriteType::Rollback` record in `scan_latest_user_keys`. Signed-off-by: JmPotato Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/mvcc/reader/reader.rs | 36 +++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 3096a7376a5..48158eda946 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -594,7 +594,10 @@ impl MvccReader { Ok((locks, has_remain)) } - /// Scan the writes to get all the latest user keys. The return type is: + /// Scan the writes to get all the latest user keys. This scan will skip + /// `WriteType::Lock` and `WriteType::Rollback`, only return the key that + /// has a latest `WriteType::Put` or `WriteType::Delete` record. The return + /// type is: /// * `(Vec, has_remain)`. /// - `key` is the encoded user key without `commit_ts`. /// - `has_remain` indicates whether there MAY be remaining user keys that @@ -636,6 +639,15 @@ impl MvccReader { } let commit_ts = key.decode_ts()?; let user_key = key.truncate_ts()?; + // Skip the key if its latest write type is not `WriteType::Put` or + // `WriteType::Delete`. + match WriteRef::parse(cursor.value(&mut self.statistics.write))?.write_type { + WriteType::Put | WriteType::Delete => {} + WriteType::Lock | WriteType::Rollback => { + cursor.next(&mut self.statistics.write); + continue; + } + } // To make sure we only check each unique user key once and the filter returns // true. let is_same_user_key = cur_user_key.as_ref() == Some(&user_key); @@ -1860,13 +1872,27 @@ pub mod tests { 8, ); engine.commit(b"k3", 8, 9); - // Prewrite and rollback k4. + // Prewrite and commit k4. engine.prewrite( Mutation::make_put(Key::from_raw(b"k4"), b"v4@1".to_vec()), b"k4", 10, ); - engine.rollback(b"k4", 10); + engine.commit(b"k4", 10, 11); + // Prewrite and rollback k4. + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k4"), b"v4@2".to_vec()), + b"k4", + 12, + ); + engine.rollback(b"k4", 12); + // Prewrite and rollback k5. + engine.prewrite( + Mutation::make_put(Key::from_raw(b"k5"), b"v5@1".to_vec()), + b"k5", + 13, + ); + engine.rollback(b"k5", 13); // Current MVCC keys in `CF_WRITE` should be: // PUT k0 -> v0@999 @@ -1878,7 +1904,9 @@ pub mod tests { // PUT k3 -> v3@8 // ROLLBACK k3 -> v3@7 // PUT k3 -> v3@5 - // ROLLBACK k4 -> v4@1 + // ROLLBACK k4 -> v4@2 + // PUT k4 -> v4@1 + // ROLLBACK k5 -> v5@1 struct Case { start_key: Option, From 8707778729772851b6d25356de8ce1d60be94c5b Mon Sep 17 00:00:00 2001 From: tison Date: Fri, 28 Jul 2023 12:42:05 +0800 Subject: [PATCH 0827/1149] deps: catch up protobuf-build and its dependents to support compile with Apple M1 chip (#15229) close tikv/tikv#15228 Signed-off-by: tison Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 36 ++++--------------- components/tidb_query_executors/src/runner.rs | 3 ++ 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d10f0888e4..a9205592264 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2981,7 +2981,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "protobuf", - "protobuf-build 0.15.1", + "protobuf-build", "raft-proto", ] @@ -4405,30 +4405,6 @@ dependencies = [ "hex 0.3.2", ] -[[package]] -name = "protobuf-build" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b2be70fa994657539e3c872cc54363c9bf28b0d7a7f774df70e9fd760df3bc4" -dependencies = [ - "bitflags", - "protobuf", - "protobuf-codegen", - "regex", -] - -[[package]] -name = "protobuf-build" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2df9942df2981178a930a72d442de47e2f0df18ad68e50a30f816f1848215ad0" -dependencies = [ - "bitflags", - "protobuf", - "protobuf-codegen", - "regex", -] - [[package]] name = "protobuf-build" version = "0.15.1" @@ -4502,7 +4478,7 @@ dependencies = [ [[package]] name = "raft" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#f73766712a538c2f6eb135b455297ad6c03fc58d" +source = "git+https://github.com/tikv/raft-rs?branch=master#9d360a3b0cdb691da8e500a4f73c457b605a1d73" dependencies = [ "bytes", "fxhash", @@ -4561,11 +4537,11 @@ dependencies = [ [[package]] name = "raft-proto" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#f73766712a538c2f6eb135b455297ad6c03fc58d" +source = "git+https://github.com/tikv/raft-rs?branch=master#9d360a3b0cdb691da8e500a4f73c457b605a1d73" dependencies = [ "bytes", "protobuf", - "protobuf-build 0.14.1", + "protobuf-build", ] [[package]] @@ -7114,12 +7090,12 @@ dependencies = [ [[package]] name = "tipb" version = "0.0.1" -source = "git+https://github.com/pingcap/tipb.git#955fbdc879517f16b7a2f5967f143b92a6ab03dd" +source = "git+https://github.com/pingcap/tipb.git#55b921cfdca1e29bcc29a83c1532bfdf53f88c51" dependencies = [ "futures 0.3.15", "grpcio", "protobuf", - "protobuf-build 0.13.0", + "protobuf-build", ] [[package]] diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 60359f22c55..7c410befb25 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -160,6 +160,9 @@ impl BatchExecutorsRunner<()> { ExecType::TypeExpand => { other_err!("Expand executor not implemented"); } + ExecType::TypeExpand2 => { + other_err!("Expand2 executor not implemented"); + } } } From 0cb091d3c9038c54cde7415af0b57ff2cb177a0f Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 28 Jul 2023 14:04:35 +0800 Subject: [PATCH 0828/1149] raftstore: Observe when applying snapshot is cancelled (#15226) close tikv/tikv#15227 Deliver message to observer when applying snapshot is cancelled Signed-off-by: CalvinNeo Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_traits/src/engine.rs | 8 +++++++- .../raftstore/src/coprocessor/dispatcher.rs | 19 +++++++++++++++++++ components/raftstore/src/coprocessor/mod.rs | 2 ++ .../raftstore/src/store/worker/region.rs | 18 ++++++++++++++---- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index b3ee1c93b05..cc90f2ce075 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -63,7 +63,13 @@ pub trait KvEngine: /// Some KvEngines need to do some transforms before apply data from /// snapshot. These procedures can be batched in background if there are /// more than one incoming snapshots, thus not blocking applying thread. - fn can_apply_snapshot(&self, _is_timeout: bool, _new_batch: bool, _region_id: u64) -> bool { + fn can_apply_snapshot( + &self, + _is_timeout: bool, + _new_batch: bool, + _region_id: u64, + _queue_size: usize, + ) -> bool { true } diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 0e45ef1d09d..d082013cd2c 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -676,6 +676,13 @@ impl CoprocessorHost { } } + pub fn cancel_apply_snapshot(&self, region_id: u64, peer_id: u64) { + for observer in &self.registry.apply_snapshot_observers { + let observer = observer.observer.inner(); + observer.cancel_apply_snapshot(region_id, peer_id); + } + } + pub fn new_split_checker_host<'a>( &'a self, region: &Region, @@ -910,6 +917,7 @@ mod tests { PrePersist = 24, PreWriteApplyState = 25, OnRaftMessage = 26, + CancelApplySnapshot = 27, } impl Coprocessor for TestCoprocessor {} @@ -1128,6 +1136,13 @@ mod tests { ); false } + + fn cancel_apply_snapshot(&self, _: u64, _: u64) { + self.called.fetch_add( + ObserverIndex::CancelApplySnapshot as usize, + Ordering::SeqCst, + ); + } } impl CmdObserver for TestCoprocessor { @@ -1317,6 +1332,10 @@ mod tests { host.on_raft_message(&msg); index += ObserverIndex::OnRaftMessage as usize; assert_all!([&ob.called], &[index]); + + host.cancel_apply_snapshot(region.get_id(), 0); + index += ObserverIndex::CancelApplySnapshot as usize; + assert_all!([&ob.called], &[index]); } #[test] diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 7c84b09ce7e..0592e23200b 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -206,6 +206,8 @@ pub trait ApplySnapshotObserver: Coprocessor { ) { } + fn cancel_apply_snapshot(&self, _: u64, _: u64) {} + /// We call pre_apply_snapshot only when one of the observer returns true. fn should_pre_apply_snapshot(&self) -> bool { false diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 01f2411e849..068904b2a67 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -528,6 +528,8 @@ where } Err(Error::Abort) => { warn!("applying snapshot is aborted"; "region_id" => region_id); + self.coprocessor_host + .cancel_apply_snapshot(region_id, peer_id); assert_eq!( status.swap(JOB_STATUS_CANCELLED, Ordering::SeqCst), JOB_STATUS_CANCELLING @@ -781,10 +783,12 @@ where } if let Some(Task::Apply { region_id, .. }) = self.pending_applies.front() { fail_point!("handle_new_pending_applies", |_| {}); - if !self - .engine - .can_apply_snapshot(is_timeout, new_batch, *region_id) - { + if !self.engine.can_apply_snapshot( + is_timeout, + new_batch, + *region_id, + self.pending_applies.len(), + ) { // KvEngine can't apply snapshot for other reasons. break; } @@ -1334,6 +1338,7 @@ pub(crate) mod tests { obs.pre_apply_hash.load(Ordering::SeqCst), obs.post_apply_hash.load(Ordering::SeqCst) ); + assert_eq!(obs.cancel_apply.load(Ordering::SeqCst), 0); // the pending apply task should be finished and snapshots are ingested. // note that when ingest sst, it may flush memtable if overlap, @@ -1463,6 +1468,7 @@ pub(crate) mod tests { pub post_apply_count: Arc, pub pre_apply_hash: Arc, pub post_apply_hash: Arc, + pub cancel_apply: Arc, } impl Coprocessor for MockApplySnapshotObserver {} @@ -1499,5 +1505,9 @@ pub(crate) mod tests { fn should_pre_apply_snapshot(&self) -> bool { true } + + fn cancel_apply_snapshot(&self, _: u64, _: u64) { + self.cancel_apply.fetch_add(1, Ordering::SeqCst); + } } } From 42882f8bcf4ce52ea93f93d325c000147ac2e137 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 28 Jul 2023 17:28:36 +0800 Subject: [PATCH 0829/1149] txn: Fix possible assertion failure in next_last_change_info (#15241) close tikv/tikv#15109 This PR updates the way to calculate the last_change_info, which is not very proper before. This also fixes a assertion failure in `next_last_change_info` that might happen when it executes in parallel with GC in compaction filter. Signed-off-by: MyonKeminta --- src/storage/txn/actions/common.rs | 3 +- tests/failpoints/cases/test_transaction.rs | 46 ++++++++++++++++++---- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/storage/txn/actions/common.rs b/src/storage/txn/actions/common.rs index 336302c7130..5afb177fd49 100644 --- a/src/storage/txn/actions/common.rs +++ b/src/storage/txn/actions/common.rs @@ -28,6 +28,7 @@ pub fn next_last_change_info( )), LastChange::NotExist => Ok(LastChange::NotExist), LastChange::Unknown => { + fail_point!("before_get_write_in_next_last_change_info"); // We do not know the last change info, probably // because it comes from an older version TiKV. To support data // from old TiKV, we iterate to the last change to find it. @@ -49,7 +50,7 @@ pub fn next_last_change_info( assert!(matches!(w.write_type, WriteType::Put)); Ok(LastChange::make_exist( last_change_ts, - stat.write.next as u64, + stat.write.next as u64 + 1, )) } } diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 4e3ee2f298e..14f4161c7ae 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -31,15 +31,19 @@ use test_raftstore::{ configure_for_lease_read, new_learner_peer, new_server_cluster, try_kv_prewrite, DropMessageFilter, }; -use tikv::storage::{ - self, - kv::SnapshotExt, - lock_manager::MockLockManager, - txn::tests::{ - must_acquire_pessimistic_lock, must_commit, must_pessimistic_prewrite_put, - must_pessimistic_prewrite_put_err, must_prewrite_put, must_prewrite_put_err, +use tikv::{ + server::gc_worker::gc_by_compact, + storage::{ + self, + kv::SnapshotExt, + lock_manager::MockLockManager, + txn::tests::{ + must_acquire_pessimistic_lock, must_acquire_pessimistic_lock_return_value, must_commit, + must_pessimistic_prewrite_put, must_pessimistic_prewrite_put_err, must_prewrite_put, + must_prewrite_put_err, must_rollback, + }, + Snapshot, TestEngineBuilder, TestStorageBuilderApiV1, }, - Snapshot, TestEngineBuilder, TestStorageBuilderApiV1, }; use tikv_util::{ store::{new_peer, peer::new_incoming_voter}, @@ -773,3 +777,29 @@ fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { handle.join().unwrap(); } + +#[test] +fn test_next_last_change_info_called_when_gc() { + let mut engine = TestEngineBuilder::new().build().unwrap(); + let k = b"zk"; + + must_prewrite_put(&mut engine, k, b"v", k, 5); + must_commit(&mut engine, k, 5, 6); + + must_rollback(&mut engine, k, 10, true); + + fail::cfg("before_get_write_in_next_last_change_info", "pause").unwrap(); + + let mut engine2 = engine.clone(); + let h = thread::spawn(move || { + must_acquire_pessimistic_lock_return_value(&mut engine2, k, k, 30, 30, false) + }); + thread::sleep(Duration::from_millis(200)); + assert!(!h.is_finished()); + + gc_by_compact(&mut engine, &[], 20); + + fail::remove("before_get_write_in_next_last_change_info"); + + assert_eq!(h.join().unwrap().unwrap().as_slice(), b"v"); +} From 284ff3a9ca144619f5efaa357d6c287f1c669982 Mon Sep 17 00:00:00 2001 From: Hu# Date: Mon, 31 Jul 2023 17:49:07 +0800 Subject: [PATCH 0830/1149] resource_manager: report ru metrics to PD (#15030) ref tikv/tikv#14900 Signed-off-by: husharp --- Cargo.lock | 3 +- components/pd_client/src/client.rs | 45 +++ components/pd_client/src/lib.rs | 6 + components/pd_client/src/util.rs | 33 ++ components/resource_control/Cargo.toml | 1 + components/resource_control/src/future.rs | 11 +- components/resource_control/src/lib.rs | 34 ++ .../resource_control/src/resource_group.rs | 2 +- .../resource_control/src/resource_limiter.rs | 42 ++- components/resource_control/src/service.rs | 336 +++++++++++++++++- components/resource_control/src/worker.rs | 204 +++++++++-- components/server/src/server.rs | 28 +- components/server/src/server2.rs | 28 +- components/test_pd/src/mocker/mod.rs | 13 +- components/test_pd/src/server.rs | 34 +- src/storage/txn/scheduler.rs | 9 +- 16 files changed, 726 insertions(+), 103 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a9205592264..2b563cb3d52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2976,7 +2976,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#770f7183434ca05887a3f63265b8745017130e1f" +source = "git+https://github.com/pingcap/kvproto.git#ee4a4ff68ac385bd61ea9da868b4b6a6c0dadcec" dependencies = [ "futures 0.3.15", "grpcio", @@ -5003,6 +5003,7 @@ dependencies = [ "protobuf", "rand 0.8.5", "serde", + "serde_json", "slog", "slog-global", "strum 0.20.0", diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index c54222ae87f..06ea6e9055d 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -27,6 +27,7 @@ use kvproto::{ metapb, pdpb::{self, Member}, replication_modepb::{RegionReplicationStatus, ReplicationStatus, StoreDrAutoSyncStatus}, + resource_manager::TokenBucketsRequest, }; use security::SecurityManager; use tikv_util::{ @@ -1178,6 +1179,50 @@ impl PdClient for RpcClient { .request(req, executor, LEADER_CHANGE_RETRY) .execute() } + + fn report_ru_metrics(&self, req: TokenBucketsRequest) -> PdFuture<()> { + let executor = |client: &Client, req: TokenBucketsRequest| { + let mut inner = client.inner.wl(); + if let Either::Left(ref mut left) = inner.rg_sender { + let sender = left.take().expect("expect report_ru_metrics sink"); + let (tx, rx) = mpsc::unbounded(); + inner.rg_sender = Either::Right(tx); + let resp = inner.rg_resp.take().unwrap(); + // Note that for now we don't care about the result of the response stream. + inner.client_stub.spawn(async { + resp.for_each(|_| future::ready(())).await; + debug!("report_ru_metrics stream exited"); + }); + inner.client_stub.spawn(async move { + let mut sender = sender.sink_map_err(Error::Grpc); + let result = sender + .send_all(&mut rx.map(|r| Ok((r, WriteFlags::default())))) + .await; + match result { + Ok(()) => { + sender.get_mut().cancel(); + info!("cancel report_ru_metrics sender"); + } + Err(e) => { + error!(?e; "failed to report_ru_metrics buckets"); + } + }; + }); + } + + let sender = inner + .rg_sender + .as_mut() + .right() + .expect("expect report_ru_metrics sender"); + let ret = sender + .unbounded_send(req) + .map_err(|e| Error::StreamDisconnect(e.into_send_error())); + Box::pin(future::ready(ret)) as PdFuture<_> + }; + + self.pd_client.request(req, executor, NO_RETRY).execute() + } } impl RpcClient { diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index f7f57dd4bba..7a9d2cd2a61 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -21,6 +21,7 @@ use futures::future::BoxFuture; use kvproto::{ metapb, pdpb, replication_modepb::{RegionReplicationStatus, ReplicationStatus, StoreDrAutoSyncStatus}, + resource_manager::TokenBucketsRequest, }; use pdpb::QueryStats; use tikv_util::time::{Instant, UnixSecs}; @@ -247,6 +248,7 @@ impl BucketStat { pub const INVALID_ID: u64 = 0; // TODO: Implementation of config registration for each module pub const RESOURCE_CONTROL_CONFIG_PATH: &str = "resource_group/settings"; +pub const RESOURCE_CONTROL_CONTROLLER_CONFIG_PATH: &str = "resource_group/controller"; /// PdClient communicates with Placement Driver (PD). /// Because now one PD only supports one cluster, so it is no need to pass @@ -538,6 +540,10 @@ pub trait PdClient: Send + Sync { fn report_region_buckets(&self, _bucket_stat: &BucketStat, _period: Duration) -> PdFuture<()> { unimplemented!(); } + + fn report_ru_metrics(&self, _req: TokenBucketsRequest) -> PdFuture<()> { + unimplemented!(); + } } const REQUEST_TIMEOUT: u64 = 2; // 2s diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 2e7567f6fa0..5491a51c047 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -29,6 +29,9 @@ use kvproto::{ RegionHeartbeatRequest, RegionHeartbeatResponse, ReportBucketsRequest, ReportBucketsResponse, ResponseHeader, }, + resource_manager::{ + ResourceManagerClient as ResourceManagerStub, TokenBucketsRequest, TokenBucketsResponse, + }, }; use security::SecurityManager; use tikv_util::{ @@ -106,6 +109,12 @@ pub struct Inner { pub tso: TimestampOracle, pub meta_storage: MetaStorageStub, + pub rg_sender: Either< + Option>, + UnboundedSender, + >, + pub rg_resp: Option>, + last_try_reconnect: Instant, } @@ -186,6 +195,14 @@ impl Client { .unwrap_or_else(|e| panic!("fail to request PD {} err {:?}", "report_buckets", e)); let meta_storage = kvproto::meta_storagepb::MetaStorageClient::new(client_stub.client.channel().clone()); + let resource_manager = kvproto::resource_manager::ResourceManagerClient::new( + client_stub.client.channel().clone(), + ); + let (rg_sender, rg_rx) = resource_manager + .acquire_token_buckets_opt(target.call_option()) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "acquire_token_buckets", e) + }); Client { timer: GLOBAL_TIMER_HANDLE.clone(), inner: RwLock::new(Inner { @@ -204,6 +221,8 @@ impl Client { last_try_reconnect: Instant::now(), tso, meta_storage, + rg_sender: Either::Left(Some(rg_sender)), + rg_resp: Some(rg_rx), }), feature_gate: FeatureGate::default(), enable_forwarding, @@ -246,9 +265,23 @@ impl Client { inner.buckets_resp = Some(buckets_resp); inner.meta_storage = MetaStorageStub::new(client_stub.client.channel().clone()); + let resource_manager = ResourceManagerStub::new(client_stub.client.channel().clone()); inner.client_stub = client_stub; inner.members = members; inner.tso = tso; + + let (rg_tx, rg_rx) = resource_manager + .acquire_token_buckets_opt(target.call_option()) + .unwrap_or_else(|e| { + panic!("fail to request PD {} err {:?}", "acquire_token_buckets", e) + }); + info!("acquire_token_buckets sender and receiver are stale, refreshing ..."); + // Try to cancel an unused token buckets sender. + if let Either::Left(Some(ref mut r)) = inner.rg_sender { + r.cancel(); + } + inner.rg_sender = Either::Left(Some(rg_tx)); + inner.rg_resp = Some(rg_rx); if let Some(ref on_reconnect) = inner.on_reconnect { on_reconnect(); } diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index c554b5a32ab..21f5cad1514 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -25,6 +25,7 @@ pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" slog = { workspace = true } slog-global = { workspace = true } strum = { version = "0.20", features = ["derive"] } diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 93d0bb1001d..fd98fc9a092 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -137,24 +137,21 @@ impl Future for LimitedFuture { let dur = start.saturating_elapsed(); let io_bytes = if let Some(last_io_bytes) = last_io_bytes { match get_thread_io_bytes_stats() { - Ok(io_bytes) => { - let delta = io_bytes - last_io_bytes; - delta.read + delta.write - } + Ok(io_bytes) => io_bytes - last_io_bytes, Err(e) => { warn!("load thread io bytes failed"; "err" => e); - 0 + IoBytes::default() } } } else { - 0 + IoBytes::default() }; let mut wait_dur = this.resource_limiter.consume(dur, io_bytes); if wait_dur == Duration::ZERO { return res; } if wait_dur > MAX_WAIT_DURATION { - warn!("limiter future wait too long"; "wait" => ?wait_dur, "io" => io_bytes, "cpu" => ?dur); + warn!("limiter future wait too long"; "wait" => ?wait_dur, "io_read" => io_bytes.read, "io_write" => io_bytes.write, "cpu" => ?dur); wait_dur = MAX_WAIT_DURATION; } *this.post_delay = Some( diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index df6496e2ed2..6cfd24914a1 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -2,7 +2,10 @@ #![feature(test)] #![feature(local_key_cell_methods)] +use std::sync::Arc; + use online_config::OnlineConfig; +use pd_client::RpcClient; use serde::{Deserialize, Serialize}; mod resource_group; @@ -25,6 +28,8 @@ pub use channel::ResourceMetered; mod resource_limiter; pub use resource_limiter::ResourceLimiter; +use tikv_util::worker::Worker; +use worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}; mod metrics; pub mod worker; @@ -42,3 +47,32 @@ impl Default for Config { Self { enabled: true } } } + +pub fn start_periodic_tasks( + mgr: &Arc, + pd_client: Arc, + bg_worker: &Worker, + io_bandwidth: u64, +) { + let resource_mgr_service = ResourceManagerService::new(mgr.clone(), pd_client); + // spawn a task to periodically update the minimal virtual time of all resource + // groups. + let resource_mgr = mgr.clone(); + bg_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { + resource_mgr.advance_min_virtual_time(); + }); + let mut resource_mgr_service_clone = resource_mgr_service.clone(); + // spawn a task to watch all resource groups update. + bg_worker.spawn_async_task(async move { + resource_mgr_service_clone.watch_resource_groups().await; + }); + // spawn a task to auto adjust background quota limiter. + let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); + bg_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { + worker.adjust_quota(); + }); + // spawn a task to periodically upload resource usage statistics to PD. + bg_worker.spawn_async_task(async move { + resource_mgr_service.report_ru_metrics().await; + }); +} diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 5aec1b32c46..a356d30a7ac 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -481,7 +481,7 @@ impl ResourceController { // TODO: use different threshold for different resource type // needn't do update if the virtual different is less than 100ms/100KB. - if min_vt + 100_000 >= max_vt && max_vt < RESET_VT_THRESHOLD { + if min_vt >= max_vt.saturating_sub(100_000) && max_vt < RESET_VT_THRESHOLD { return; } diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 155b2224bb8..8898b4eba23 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -6,6 +6,7 @@ use std::{ time::{Duration, Instant}, }; +use file_system::IoBytes; use futures::compat::Future01CompatExt; use strum::EnumCount; use tikv_util::{time::Limiter, timer::GLOBAL_TIMER_HANDLE}; @@ -57,10 +58,10 @@ impl ResourceLimiter { } } - pub fn consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { + pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { let cpu_dur = self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); - let io_dur = self.limiters[ResourceType::Io as usize].consume(io_bytes); + let io_dur = self.limiters[ResourceType::Io as usize].consume_io(io_bytes); let wait_dur = cpu_dur.max(io_dur); BACKGROUND_TASKS_WAIT_DURATION .with_label_values(&[&self.name]) @@ -68,7 +69,7 @@ impl ResourceLimiter { wait_dur } - pub async fn async_consume(&self, cpu_time: Duration, io_bytes: u64) -> Duration { + pub async fn async_consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { let dur = self.consume(cpu_time, io_bytes); if !dur.is_zero() { _ = GLOBAL_TIMER_HANDLE @@ -85,11 +86,14 @@ impl ResourceLimiter { } pub(crate) fn get_limit_statistics(&self, ty: ResourceType) -> GroupStatistics { - let (total_consumed, total_wait_dur_us) = self.limiters[ty as usize].get_statistics(); + let (total_consumed, total_wait_dur_us, read_consumed, write_consumed) = + self.limiters[ty as usize].get_statistics(); GroupStatistics { version: self.version, total_consumed, total_wait_dur_us, + read_consumed, + write_consumed, } } } @@ -98,6 +102,8 @@ pub(crate) struct QuotaLimiter { limiter: Limiter, // total waiting duration in us total_wait_dur_us: AtomicU64, + read_bytes: AtomicU64, + write_bytes: AtomicU64, } impl QuotaLimiter { @@ -105,6 +111,8 @@ impl QuotaLimiter { Self { limiter: Limiter::new(limit), total_wait_dur_us: AtomicU64::new(0), + read_bytes: AtomicU64::new(0), + write_bytes: AtomicU64::new(0), } } @@ -120,10 +128,12 @@ impl QuotaLimiter { self.limiter.set_speed_limit(limit); } - fn get_statistics(&self) -> (u64, u64) { + fn get_statistics(&self) -> (u64, u64, u64, u64) { ( self.limiter.total_bytes_consumed() as u64, self.total_wait_dur_us.load(Ordering::Relaxed), + self.read_bytes.load(Ordering::Relaxed), + self.write_bytes.load(Ordering::Relaxed), ) } @@ -138,6 +148,22 @@ impl QuotaLimiter { } dur } + + fn consume_io(&self, value: IoBytes) -> Duration { + self.read_bytes.fetch_add(value.read, Ordering::Relaxed); + self.write_bytes.fetch_add(value.write, Ordering::Relaxed); + + let value = value.read + value.write; + if value == 0 { + return Duration::ZERO; + } + let dur = self.limiter.consume_duration(value as usize); + if dur != Duration::ZERO { + self.total_wait_dur_us + .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); + } + dur + } } #[derive(Default, Clone, PartialEq, Eq, Copy, Debug)] @@ -145,6 +171,8 @@ pub struct GroupStatistics { pub version: u64, pub total_consumed: u64, pub total_wait_dur_us: u64, + pub read_consumed: u64, + pub write_consumed: u64, } impl std::ops::Sub for GroupStatistics { @@ -154,6 +182,8 @@ impl std::ops::Sub for GroupStatistics { version: self.version, total_consumed: self.total_consumed.saturating_sub(rhs.total_consumed), total_wait_dur_us: self.total_wait_dur_us.saturating_sub(rhs.total_wait_dur_us), + read_consumed: self.read_consumed.saturating_sub(rhs.read_consumed), + write_consumed: self.write_consumed.saturating_sub(rhs.write_consumed), } } } @@ -166,6 +196,8 @@ impl std::ops::Div for GroupStatistics { version: self.version, total_consumed: (self.total_consumed as f64 / rhs) as u64, total_wait_dur_us: (self.total_wait_dur_us as f64 / rhs) as u64, + read_consumed: (self.read_consumed as f64 / rhs) as u64, + write_consumed: (self.write_consumed as f64 / rhs) as u64, } } } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 929bb48525b..5ecac9d74c4 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -1,24 +1,36 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashSet, sync::Arc, time::Duration}; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, + time::Duration, +}; use futures::{compat::Future01CompatExt, StreamExt}; -use kvproto::{pdpb::EventType, resource_manager::ResourceGroup}; -use pd_client::{Error as PdError, PdClient, RpcClient, RESOURCE_CONTROL_CONFIG_PATH}; -use tikv_util::{error, timer::GLOBAL_TIMER_HANDLE}; +use kvproto::{ + pdpb::EventType, + resource_manager::{ResourceGroup, TokenBucketRequest, TokenBucketsRequest}, +}; +use pd_client::{ + Error as PdError, PdClient, RpcClient, RESOURCE_CONTROL_CONFIG_PATH, + RESOURCE_CONTROL_CONTROLLER_CONFIG_PATH, +}; +use serde::{Deserialize, Serialize}; +use tikv_util::{error, info, timer::GLOBAL_TIMER_HANDLE}; -use crate::ResourceGroupManager; +use crate::{resource_limiter::ResourceType, ResourceGroupManager}; #[derive(Clone)] pub struct ResourceManagerService { manager: Arc, pd_client: Arc, - // record watch revision + // record watch revision. revision: i64, } impl ResourceManagerService { - /// Constructs a new `Service` with `ResourceGroupManager` and a `RpcClient` + /// Constructs a new `Service` with `ResourceGroupManager` and a + /// `RpcClient`. pub fn new( manager: Arc, pd_client: Arc, @@ -32,6 +44,7 @@ impl ResourceManagerService { } const RETRY_INTERVAL: Duration = Duration::from_secs(1); // to consistent with pd_client +const BACKGROUND_RU_REPORT_DURATION: Duration = Duration::from_secs(5); impl ResourceManagerService { pub async fn watch_resource_groups(&mut self) { @@ -139,12 +152,195 @@ impl ResourceManagerService { } } } + + async fn load_controller_config(&self) -> RequestUnitConfig { + loop { + match self + .pd_client + .load_global_config(RESOURCE_CONTROL_CONTROLLER_CONFIG_PATH.to_string()) + .await + { + Ok((items, _)) => { + if items.is_empty() { + error!("server does not save config, load config failed."); + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; + continue; + } + match serde_json::from_slice::(items[0].get_payload()) { + Ok(c) => return c.request_unit, + Err(err) => { + error!("parse controller config failed"; "err" => ?err); + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; + continue; + } + } + } + Err(err) => { + error!("failed to load controller config"; "err" => ?err); + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + RETRY_INTERVAL) + .compat() + .await; + continue; + } + } + } + } + + // report ru metrics periodically. + pub async fn report_ru_metrics(&self) { + let mut last_group_statistics_map: HashMap = HashMap::new(); + // load controller config firstly. + let config = self.load_controller_config().await; + info!("load controller config"; "config" => ?config); + + loop { + let background_groups: Vec<_> = self + .manager + .resource_groups + .iter() + .filter_map(|kv| { + let g = kv.value(); + g.limiter.clone().map(|limiter| { + let io_statistics = limiter.get_limit_statistics(ResourceType::Io); + let cpu_statistics = limiter.get_limit_statistics(ResourceType::Cpu); + + ( + g.group.name.clone(), + ReportStatistic { + // io statistics and cpu statistics should have the same version. + version: io_statistics.version, + read_bytes_consumed: io_statistics.read_consumed, + write_bytes_consumed: io_statistics.write_consumed, + cpu_consumed: cpu_statistics.total_consumed, + }, + ) + }) + }) + .collect(); + + if background_groups.is_empty() { + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + BACKGROUND_RU_REPORT_DURATION) + .compat() + .await; + continue; + } + + let mut req = TokenBucketsRequest::default(); + let all_reqs = req.mut_requests(); + for (name, statistic) in background_groups.into_iter() { + // Non-existence or version change means this is a brand new limiter, so no need + // to sub the old statistics. + let (cpu_consumed, io_consumed) = if let Some(last_stats) = + last_group_statistics_map + .get(&name) + .filter(|stats| statistic.version == stats.version) + { + if statistic == *last_stats { + continue; + } + ( + statistic.cpu_consumed - last_stats.cpu_consumed, + ( + statistic.read_bytes_consumed - last_stats.read_bytes_consumed, + statistic.write_bytes_consumed - last_stats.write_bytes_consumed, + ), + ) + } else { + ( + statistic.cpu_consumed, + ( + statistic.read_bytes_consumed, + statistic.write_bytes_consumed, + ), + ) + }; + // replace the previous statistics. + last_group_statistics_map.insert(name.clone(), statistic); + // report ru statistics. + let mut req = TokenBucketRequest::default(); + req.set_resource_group_name(name.clone()); + req.set_is_background(true); + let report_consumption = req.mut_consumption_since_last_request(); + + let read_total = config.read_cpu_ms_cost * cpu_consumed as f64 + + config.read_cost_per_byte * io_consumed.0 as f64; + let write_total = config.write_cost_per_byte * io_consumed.1 as f64; + + report_consumption.set_r_r_u(read_total); + report_consumption.set_w_r_u(write_total); + report_consumption.set_read_bytes(io_consumed.0 as f64); + report_consumption.set_write_bytes(io_consumed.1 as f64); + report_consumption.set_total_cpu_time_ms(cpu_consumed as f64); + + all_reqs.push(req); + } + + if !all_reqs.is_empty() { + if let Err(e) = self.pd_client.report_ru_metrics(req).await { + error!("report ru metrics failed"; "err" => ?e); + } + } + + let dur = if cfg!(feature = "failpoints") { + (|| { + fail::fail_point!("set_report_duration", |v| { + let dur = v + .expect("should provide delay time (in ms)") + .parse::() + .expect("should be number (in ms)"); + std::time::Duration::from_millis(dur) + }); + std::time::Duration::from_millis(100) + })() + } else { + BACKGROUND_RU_REPORT_DURATION + }; + + let _ = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + dur) + .compat() + .await; + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct RequestUnitConfig { + read_base_cost: f64, + read_cost_per_byte: f64, + write_base_cost: f64, + write_cost_per_byte: f64, + read_cpu_ms_cost: f64, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +struct ControllerConfig { + request_unit: RequestUnitConfig, +} + +#[derive(PartialEq, Eq, Debug, Clone)] +struct ReportStatistic { + version: u64, + read_bytes_consumed: u64, + write_bytes_consumed: u64, + cpu_consumed: u64, } #[cfg(test)] pub mod tests { use std::time::Duration; + use file_system::IoBytes; use futures::executor::block_on; use kvproto::pdpb::GlobalConfigItem; use pd_client::RpcClient; @@ -152,7 +348,9 @@ pub mod tests { use test_pd::{mocker::Service, util::*, Server as MockServer}; use tikv_util::{config::ReadableDuration, worker::Builder}; - use crate::resource_group::tests::{new_resource_group, new_resource_group_ru}; + use crate::resource_group::tests::{ + new_background_resource_group_ru, new_resource_group, new_resource_group_ru, + }; fn new_test_server_and_client( update_interval: ReadableDuration, @@ -192,6 +390,24 @@ pub mod tests { .unwrap(); } + fn store_controller_config(pd_client: Arc, config: ControllerConfig) { + let mut item = GlobalConfigItem::default(); + item.set_kind(EventType::Put); + item.set_name("controller_config".to_string()); + let buf = serde_json::to_vec(&config).unwrap(); + item.set_payload(buf); + + futures::executor::block_on(async move { + pd_client + .store_global_config( + RESOURCE_CONTROL_CONTROLLER_CONFIG_PATH.to_string(), + vec![item], + ) + .await + }) + .unwrap(); + } + use super::*; #[test] fn crud_config_test() { @@ -244,7 +460,7 @@ pub mod tests { background_worker.spawn_async_task(async move { s_clone.watch_resource_groups().await; }); - // Mock add + // Mock add. let group1 = new_resource_group_ru("TEST1".into(), 100, 0); add_resource_group(s.pd_client.clone(), group1); let group2 = new_resource_group_ru("TEST2".into(), 100, 0); @@ -254,16 +470,17 @@ pub mod tests { add_resource_group(s.pd_client.clone(), group2); wait_watch_ready(&s, 3); - // Mock delete + // Mock delete. delete_resource_group(s.pd_client.clone(), "TEST1"); - // Wait for watcher + // Wait for watcher. wait_watch_ready(&s, 2); let groups = s.manager.get_all_resource_groups(); assert_eq!(groups.len(), 2); assert!(s.manager.get_resource_group("TEST1").is_none()); let group = s.manager.get_resource_group("TEST2").unwrap(); assert_eq!(group.get_ru_quota(), 50); + server.stop(); } @@ -278,22 +495,107 @@ pub mod tests { background_worker.spawn_async_task(async move { s_clone.watch_resource_groups().await; }); - // Mock add + // Mock add. let group1 = new_resource_group_ru("TEST1".into(), 100, 0); add_resource_group(s.pd_client.clone(), group1); - // Mock reboot watch server + // Mock reboot watch server. let watch_global_config_fp = "watch_global_config_return"; fail::cfg(watch_global_config_fp, "return").unwrap(); std::thread::sleep(Duration::from_millis(100)); fail::remove(watch_global_config_fp); - // Mock add after rebooting will success - let group1 = new_resource_group_ru("TEST2".into(), 100, 0); - add_resource_group(s.pd_client.clone(), group1); - // Wait watcher update + // Mock add after rebooting will success. + let group2 = new_resource_group_ru("TEST2".into(), 100, 0); + add_resource_group(s.pd_client.clone(), group2); + // Wait watcher update. std::thread::sleep(Duration::from_secs(1)); let groups = s.manager.get_all_resource_groups(); assert_eq!(groups.len(), 3); server.stop(); } + + #[test] + fn load_controller_config_test() { + let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let resource_manager = ResourceGroupManager::default(); + + let s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); + // Set controller config. + let cfg = ControllerConfig { + request_unit: RequestUnitConfig { + read_base_cost: 1. / 8., + read_cost_per_byte: 1. / (64. * 1024.), + write_base_cost: 1., + write_cost_per_byte: 1. / 1024., + read_cpu_ms_cost: 1. / 3., + }, + }; + store_controller_config(s.clone().pd_client, cfg); + let config = block_on(s.load_controller_config()); + assert_eq!(config.read_base_cost, 1. / 8.); + + server.stop(); + } + + #[test] + fn report_ru_metrics_test() { + let (mut server, client) = new_test_server_and_client(ReadableDuration::millis(100)); + let resource_manager = ResourceGroupManager::default(); + + let s = ResourceManagerService::new(Arc::new(resource_manager), Arc::new(client)); + let bg = new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); + s.manager.add_resource_group(bg); + + // Set controller config. + let cfg = ControllerConfig { + request_unit: RequestUnitConfig { + read_base_cost: 1. / 8., + read_cost_per_byte: 1. / (64. * 1024.), + write_base_cost: 1., + write_cost_per_byte: 1. / 1024., + read_cpu_ms_cost: 1. / 3., + }, + }; + store_controller_config(s.clone().pd_client, cfg); + + fail::cfg("set_report_duration", "return(10)").unwrap(); + let background_worker = Builder::new("background").thread_count(1).create(); + let s_clone = s.clone(); + background_worker.spawn_async_task(async move { + s_clone.report_ru_metrics().await; + }); + // Mock consume. + let bg_limiter = s.manager.get_resource_limiter("background", "br").unwrap(); + bg_limiter.consume( + Duration::from_secs(2), + IoBytes { + read: 1000, + write: 1000, + }, + ); + // Wait for report ru metrics. + std::thread::sleep(Duration::from_millis(100)); + // Mock update version. + let bg = new_resource_group_ru("background".into(), 1000, 15); + s.manager.add_resource_group(bg); + + let background_group = + new_background_resource_group_ru("background".into(), 500, 8, vec!["lightning".into()]); + s.manager.add_resource_group(background_group); + let new_bg_limiter = s + .manager + .get_resource_limiter("background", "lightning") + .unwrap(); + new_bg_limiter.consume( + Duration::from_secs(5), + IoBytes { + read: 2000, + write: 2000, + }, + ); + // Wait for report ru metrics. + std::thread::sleep(Duration::from_millis(100)); + fail::remove("set_report_duration"); + server.stop(); + } } diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index ca1a32347ff..deb1b2e44de 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -324,8 +324,8 @@ mod tests { fn get_current_stats(&mut self, t: ResourceType) -> IoResult { match t { ResourceType::Cpu => Ok(ResourceUsageStats { - total_quota: self.cpu_total * 1_000_000.0, - current_used: self.cpu_used * 1_000_000.0, + total_quota: self.cpu_total * MICROS_PER_SEC, + current_used: self.cpu_used * MICROS_PER_SEC, }), ResourceType::Io => Ok(ResourceUsageStats { total_quota: self.io_total, @@ -400,45 +400,115 @@ mod tests { ); } - fn check_limiter(limiter: &Arc, cpu: f64, io: f64) { + fn check_limiter(limiter: &Arc, cpu: f64, io: IoBytes) { check( limiter.get_limiter(ResourceType::Cpu).get_rate_limit(), - cpu * 1_000_000.0, + cpu * MICROS_PER_SEC, + ); + check( + limiter.get_limiter(ResourceType::Io).get_rate_limit(), + (io.read + io.write) as f64, ); - check(limiter.get_limiter(ResourceType::Io).get_rate_limit(), io); reset_limiter(limiter); } reset_quota(&mut worker, 0.0, 0.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 6.4, 8000.0); + check_limiter( + &limiter, + 6.4, + IoBytes { + read: 4000, + write: 4000, + }, + ); reset_quota(&mut worker, 4.0, 2000.0, Duration::from_millis(500)); worker.adjust_quota(); - check_limiter(&limiter, 6.4, 8000.0); + check_limiter( + &limiter, + 6.4, + IoBytes { + read: 4000, + write: 4000, + }, + ); reset_quota(&mut worker, 4.0, 2000.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 3.2, 6400.0); + check_limiter( + &limiter, + 3.2, + IoBytes { + read: 3200, + write: 3200, + }, + ); reset_quota(&mut worker, 6.0, 4000.0, Duration::from_secs(1)); - limiter.consume(Duration::from_secs(2), 2000); + limiter.consume( + Duration::from_secs(2), + IoBytes { + read: 1000, + write: 1000, + }, + ); worker.adjust_quota(); - check_limiter(&limiter, 3.2, 6400.0); + check_limiter( + &limiter, + 3.2, + IoBytes { + read: 3200, + write: 3200, + }, + ); reset_quota(&mut worker, 8.0, 9500.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 0.8, 1000.0); + check_limiter( + &limiter, + 0.8, + IoBytes { + read: 500, + write: 500, + }, + ); reset_quota(&mut worker, 7.5, 9500.0, Duration::from_secs(1)); - limiter.consume(Duration::from_secs(2), 2000); + limiter.consume( + Duration::from_secs(2), + IoBytes { + read: 1000, + write: 1000, + }, + ); worker.adjust_quota(); - check_limiter(&limiter, 2.0, 2000.0); + check_limiter( + &limiter, + 2.0, + IoBytes { + read: 1000, + write: 1000, + }, + ); reset_quota(&mut worker, 7.5, 9500.0, Duration::from_secs(5)); - limiter.consume(Duration::from_secs(10), 10000); + limiter.consume( + Duration::from_secs(10), + IoBytes { + read: 5000, + write: 5000, + }, + ); worker.adjust_quota(); - check_limiter(&limiter, 2.0, 2000.0); + check_limiter( + &limiter, + 2.0, + IoBytes { + read: 1000, + write: 1000, + }, + ); let default = new_background_resource_group_ru("default".into(), 2000, 8, vec!["br".into()]); @@ -454,15 +524,55 @@ mod tests { reset_quota(&mut worker, 5.0, 7000.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 1.6, 1600.0); - check_limiter(&bg_limiter, 0.8, 800.0); + check_limiter( + &limiter, + 1.6, + IoBytes { + read: 800, + write: 800, + }, + ); + check_limiter( + &bg_limiter, + 0.8, + IoBytes { + read: 400, + write: 400, + }, + ); reset_quota(&mut worker, 6.0, 5000.0, Duration::from_secs(1)); - limiter.consume(Duration::from_millis(1200), 1200); - bg_limiter.consume(Duration::from_millis(1800), 1800); + limiter.consume( + Duration::from_millis(1200), + IoBytes { + read: 600, + write: 600, + }, + ); + bg_limiter.consume( + Duration::from_millis(1800), + IoBytes { + read: 900, + write: 900, + }, + ); worker.adjust_quota(); - check_limiter(&limiter, 2.4, 2800.0); - check_limiter(&bg_limiter, 1.6, 3600.0); + check_limiter( + &limiter, + 2.4, + IoBytes { + read: 1400, + write: 1400, + }, + ); + check_limiter( + &bg_limiter, + 1.6, + IoBytes { + read: 1800, + write: 1800, + }, + ); let bg = new_resource_group_ru("background".into(), 1000, 15); resource_ctl.add_resource_group(bg); @@ -489,15 +599,55 @@ mod tests { reset_quota(&mut worker, 0.0, 0.0, Duration::from_secs(1)); worker.adjust_quota(); - check_limiter(&limiter, 4.27, 5333.3); - check_limiter(&new_bg_limiter, 2.13, 2666.7); + check_limiter( + &limiter, + 4.27, + IoBytes { + read: 2667, + write: 2667, + }, + ); + check_limiter( + &new_bg_limiter, + 2.13, + IoBytes { + read: 1334, + write: 1334, + }, + ); reset_quota(&mut worker, 6.0, 5000.0, Duration::from_secs(1)); - limiter.consume(Duration::from_millis(1200), 1200); - new_bg_limiter.consume(Duration::from_millis(1800), 1800); + limiter.consume( + Duration::from_millis(1200), + IoBytes { + read: 600, + write: 600, + }, + ); + new_bg_limiter.consume( + Duration::from_millis(1800), + IoBytes { + read: 900, + write: 900, + }, + ); worker.adjust_quota(); - check_limiter(&limiter, 2.4, 2800.0); - check_limiter(&new_bg_limiter, 1.6, 3600.0); + check_limiter( + &limiter, + 2.4, + IoBytes { + read: 1400, + write: 1400, + }, + ); + check_limiter( + &new_bg_limiter, + 1.6, + IoBytes { + read: 1800, + write: 1800, + }, + ); } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 9e6e6d07f06..0b9bd59f236 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -70,10 +70,7 @@ use raftstore::{ RaftRouterCompactedEventSender, }; use resolved_ts::{LeadershipResolver, Task}; -use resource_control::{ - worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}, - ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, -}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; @@ -341,24 +338,13 @@ where let resource_manager = if config.resource_control.enabled { let mgr = Arc::new(ResourceGroupManager::default()); - let mut resource_mgr_service = - ResourceManagerService::new(mgr.clone(), pd_client.clone()); - // spawn a task to periodically update the minimal virtual time of all resource - // groups. - let resource_mgr = mgr.clone(); - background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { - resource_mgr.advance_min_virtual_time(); - }); - // spawn a task to watch all resource groups update. - background_worker.spawn_async_task(async move { - resource_mgr_service.watch_resource_groups().await; - }); - // spawn a task to auto adjust background quota limiter. let io_bandwidth = config.storage.io_rate_limit.max_bytes_per_sec.0; - let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); - background_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { - worker.adjust_quota(); - }); + resource_control::start_periodic_tasks( + &mgr, + pd_client.clone(), + &background_worker, + io_bandwidth, + ); Some(mgr) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 8793442434e..8de11ee05f2 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -67,10 +67,7 @@ use raftstore_v2::{ StateStorage, }; use resolved_ts::Task; -use resource_control::{ - worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}, - ResourceGroupManager, ResourceManagerService, MIN_PRIORITY_UPDATE_INTERVAL, -}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ @@ -328,24 +325,13 @@ where let resource_manager = if config.resource_control.enabled { let mgr = Arc::new(ResourceGroupManager::default()); - let mut resource_mgr_service = - ResourceManagerService::new(mgr.clone(), pd_client.clone()); - // spawn a task to periodically update the minimal virtual time of all resource - // groups. - let resource_mgr = mgr.clone(); - background_worker.spawn_interval_task(MIN_PRIORITY_UPDATE_INTERVAL, move || { - resource_mgr.advance_min_virtual_time(); - }); - // spawn a task to watch all resource groups update. - background_worker.spawn_async_task(async move { - resource_mgr_service.watch_resource_groups().await; - }); - // spawn a task to auto adjust background quota limiter. let io_bandwidth = config.storage.io_rate_limit.max_bytes_per_sec.0; - let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); - background_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { - worker.adjust_quota(); - }); + resource_control::start_periodic_tasks( + &mgr, + pd_client.clone(), + &background_worker, + io_bandwidth, + ); Some(mgr) } else { None diff --git a/components/test_pd/src/mocker/mod.rs b/components/test_pd/src/mocker/mod.rs index f4b6dafb6b6..8350e3ede06 100644 --- a/components/test_pd/src/mocker/mod.rs +++ b/components/test_pd/src/mocker/mod.rs @@ -3,7 +3,11 @@ use std::result; use futures::executor::block_on; -use kvproto::{meta_storagepb as mpb, pdpb::*}; +use kvproto::{ + meta_storagepb as mpb, + pdpb::*, + resource_manager::{TokenBucketsRequest, TokenBucketsResponse}, +}; mod bootstrap; pub mod etcd; @@ -216,4 +220,11 @@ pub trait PdMocker { fn get_operator(&self, _: &GetOperatorRequest) -> Option> { None } + + fn report_ru_metrics(&self, req: &TokenBucketsRequest) -> Option> { + req.get_requests().iter().for_each(|r| { + assert_eq!(r.get_is_background(), true); + }); + None + } } diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 615da206d2c..90a420fbba0 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -19,6 +19,8 @@ use grpcio::{ use kvproto::{ meta_storagepb_grpc::{create_meta_storage, MetaStorage}, pdpb::*, + resource_manager, + resource_manager_grpc::create_resource_manager, }; use pd_client::Error as PdError; use security::*; @@ -75,6 +77,7 @@ impl Server { pub fn start(&mut self, mgr: &SecurityManager, eps: Vec<(String, u16)>) { let pd = create_pd(self.mocker.clone()); let meta_store = create_meta_storage(self.mocker.clone()); + let resource_manager = create_resource_manager(self.mocker.clone()); let env = Arc::new( EnvBuilder::new() .cq_count(1) @@ -83,7 +86,8 @@ impl Server { ); let mut sb = ServerBuilder::new(env) .register_service(pd) - .register_service(meta_store); + .register_service(meta_store) + .register_service(resource_manager); for (host, port) in eps { sb = mgr.bind(sb, &host, port); } @@ -623,3 +627,31 @@ impl Pd for PdMock { unimplemented!() } } + +impl resource_manager::ResourceManager for PdMock { + fn acquire_token_buckets( + &mut self, + ctx: grpcio::RpcContext<'_>, + stream: grpcio::RequestStream, + sink: grpcio::DuplexSink, + ) { + let mock = self.clone(); + ctx.spawn(async move { + let mut stream = stream.map_err(PdError::from).try_filter_map(move |req| { + let resp = mock + .case + .as_ref() + .and_then(|case| case.report_ru_metrics(&req)) + .or_else(|| mock.default_handler.report_ru_metrics(&req)); + match resp { + None => future::ok(None), + Some(Ok(resp)) => future::ok(Some((resp, WriteFlags::default()))), + Some(Err(e)) => future::err(box_err!("{:?}", e)), + } + }); + let mut sink = sink.sink_map_err(PdError::from); + let _ = sink.send_all(&mut stream).await; + let _ = sink.close().await; + }); + } +} diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 04116435612..4df7033c21a 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -39,6 +39,7 @@ use collections::HashMap; use concurrency_manager::{ConcurrencyManager, KeyHandleGuard}; use crossbeam::utils::CachePadded; use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; +use file_system::IoBytes; use futures::{compat::Future01CompatExt, StreamExt}; use kvproto::{ kvrpcpb::{self, CommandPri, Context, DiskFullOpt, ExtraOp}, @@ -1283,7 +1284,13 @@ impl TxnScheduler { * SCHEDULER_CPU_TIME_FACTOR; if let Some(limiter) = resource_limiter { limiter - .async_consume(expected_dur, write_bytes as u64) + .async_consume( + expected_dur, + IoBytes { + read: 0, + write: write_bytes as u64, + }, + ) .await; } } From bb7bd13977f210673fd78a44efe41b4cec3045ca Mon Sep 17 00:00:00 2001 From: Hu# Date: Tue, 1 Aug 2023 07:55:06 +0800 Subject: [PATCH 0831/1149] ctl: remove success flashback in retry loop (#15116) close tikv/tikv#15107 Signed-off-by: husharp --- cmd/tikv-ctl/src/executor.rs | 10 +- cmd/tikv-ctl/src/main.rs | 225 ++++++++++++++---------- components/test_raftstore/src/server.rs | 23 ++- tests/integrations/server/debugger.rs | 127 ++++++++++++- tests/integrations/server/kv_service.rs | 1 - 5 files changed, 277 insertions(+), 109 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 1962cbe0e50..19a65e9bd20 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -83,9 +83,9 @@ pub fn new_debug_executor( data_dir: Option<&str>, host: Option<&str>, mgr: Arc, -) -> Box { +) -> Box { if let Some(remote) = host { - return Box::new(new_debug_client(remote, mgr)) as Box; + return Box::new(new_debug_client(remote, mgr)) as Box<_>; } // TODO: perhaps we should allow user skip specifying data path. @@ -128,7 +128,7 @@ pub fn new_debug_executor( let debugger: DebuggerImpl<_, MockEngine, MockLockManager, ApiV1> = DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller, None); - Box::new(debugger) as Box + Box::new(debugger) as Box<_> } else { let mut config = cfg.raft_engine.config(); config.dir = cfg.infer_raft_engine_path(Some(data_dir)).unwrap(); @@ -146,14 +146,14 @@ pub fn new_debug_executor( let debugger: DebuggerImpl<_, MockEngine, MockLockManager, ApiV1> = DebuggerImpl::new(Engines::new(kv_db, raft_db), cfg_controller, None); - Box::new(debugger) as Box + Box::new(debugger) as Box<_> } EngineType::RaftKv2 => { let registry = TabletRegistry::new(Box::new(factory), Path::new(data_dir).join("tablets")) .unwrap_or_else(|e| fatal!("failed to create tablet registry {:?}", e)); let debugger = DebuggerImplV2::new(registry, raft_db, cfg_controller); - Box::new(debugger) as Box + Box::new(debugger) as Box<_> } } } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index c0d690a1593..6baa1fe6c39 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -13,7 +13,7 @@ use std::{ path::Path, process, str, string::ToString, - sync::Arc, + sync::{Arc, Mutex, RwLock}, thread, time::Duration, u64, @@ -822,6 +822,10 @@ fn flashback_whole_cluster( start_key: Vec, end_key: Vec, ) { + println!( + "flashback whole cluster with version {} from {:?} to {:?}", + version, start_key, end_key + ); let pd_client = pd_client.clone(); let cfg = cfg.clone(); let runtime = tokio::runtime::Builder::new_multi_thread() @@ -831,43 +835,68 @@ fn flashback_whole_cluster( .unwrap(); block_on(runtime.spawn(async move { - // Prepare all regions for flashback. + // Pre-create the debug executors for all stores. + let stores = match pd_client.get_all_stores(false) { + Ok(stores) => stores, + Err(e) => { + println!("failed to load all stores: {:?}", e); + return; + } + }; + let debuggers = Mutex::new(stores + .into_iter() + .map(|s| { + let addr = pd_client.get_store(s.get_id()).unwrap().address; + let cfg_inner = cfg.clone(); + let mgr = Arc::clone(&mgr); + let debug_executor = new_debug_executor(&cfg_inner, None, Some(&addr), mgr); + (s.get_id(), debug_executor) + } ) + .collect::>()); + // Prepare flashback. let start_ts = pd_client.get_tso().await.unwrap(); - let mut stores_leader = load_leaders_to_each_store(&pd_client, start_key, end_key); - // Need to retry if all regions are not finish prepare. + let key_range_to_prepare = RwLock::new(load_key_range(&pd_client, start_key, end_key)); + // Need to retry if all regions are not finish. + let mut key_range_to_finish = key_range_to_prepare.read().unwrap().clone(); loop { + // Traverse all regions and prepare flashback. let mut futures = Vec::default(); - stores_leader - .clone() - .into_iter() - .for_each(|(store_id, leaders)| { - let addr = pd_client.get_store(store_id).unwrap().address; - leaders - .into_iter() - .filter(|(_, region_id)| { - region_ids.is_empty() || region_ids.contains(region_id) - }) - .for_each(|(key_range, region_id)| { - // Prepare flashback region version by key range. - let key_range = build_key_range(&key_range.0, &key_range.1, false); - - let addr = addr.clone(); - let cfg_inner = cfg.clone(); - let mgr = Arc::clone(&mgr); - let f = async move { - let debug_executor = - new_debug_executor(&cfg_inner, None, Some(&addr), mgr); - debug_executor.flashback_to_version( - version, - region_id, - key_range, - start_ts.into_inner(), - 0, - ) - }; - futures.push(f); - }) - }); + let read_result = key_range_to_prepare.read().unwrap().clone(); + read_result.into_iter(). + filter(|(_, (region_id, _))| { + region_ids.is_empty() || region_ids.contains(region_id) + }) + .for_each(|((start_key, end_key), (region_id, store_id))| { + let debuggers = &debuggers; + let key_range_to_prepare = &key_range_to_prepare; + let key_range = build_key_range(&start_key, &end_key, false); + let f = async move { + let debuggers = debuggers.lock().unwrap(); + match debuggers.get(&store_id).unwrap().flashback_to_version( + version, + region_id, + key_range, + start_ts.into_inner(), + 0, + ) { + Ok(_) => { + key_range_to_prepare + .write() + .unwrap() + .remove(&(start_key, end_key)); + Ok(()) + } + Err(err) => { + println!( + "prepare flashback region {} with start_ts {:?} to version {} failed, error: {:?}", + region_id, start_ts, version, err + ); + Err(err) + }, + } + }; + futures.push(f); + }); // Wait for finishing prepare flashback. match tokio::time::timeout( @@ -879,18 +908,18 @@ fn flashback_whole_cluster( Ok(res) => { if let Err(key_range) = res { // Retry specific key range to prepare flashback. - let retry_stores_leader = load_leaders_to_each_store( - &pd_client, - key_range.get_start_key().to_vec(), - key_range.get_end_key().to_vec(), - ); - // Need to update `stores_leader` to replace stale key range. - for (store_id, leaders) in retry_stores_leader { - let regions = stores_leader - .entry(store_id) - .or_insert_with(HashMap::default); - regions.extend(leaders); - } + let stale_key_range = (key_range.start_key.clone(), key_range.end_key.clone()); + let mut key_range_to_prepare = key_range_to_prepare.write().unwrap(); + // Remove stale key range. + key_range_to_prepare.remove(&stale_key_range); + key_range_to_finish.remove(&stale_key_range); + load_key_range(&pd_client, stale_key_range.0.clone(), stale_key_range.1.clone()) + .into_iter().for_each(|(key_range, region_info)| { + // Need to update `key_range_to_prepare` to replace stale key range. + key_range_to_prepare.insert(key_range.clone(), region_info); + // Need to update `key_range_to_finish` as well. + key_range_to_finish.insert(key_range, region_info); + }); thread::sleep(Duration::from_micros(WAIT_APPLY_FLASHBACK_STATE)); continue; } @@ -906,41 +935,47 @@ fn flashback_whole_cluster( } } - // Start flashback for all regions. + // Flashback for all regions. let commit_ts = pd_client.get_tso().await.unwrap(); + let key_range_to_finish = RwLock::new(key_range_to_finish); loop { let mut futures = Vec::default(); - stores_leader - .clone() - .into_iter() - .for_each(|(store_id, leaders)| { - let addr = pd_client.get_store(store_id).unwrap().address; - leaders - .into_iter() - .filter(|(_, region_id)| { - region_ids.is_empty() || region_ids.contains(region_id) - }) - .for_each(|(key_range, region_id)| { - // Flashback region version by key range. - let key_range = build_key_range(&key_range.0, &key_range.1, false); - - let addr = addr.clone(); - let cfg_inner = cfg.clone(); - let mgr = Arc::clone(&mgr); - let f = async move { - let debug_executor = - new_debug_executor(&cfg_inner, None, Some(&addr), mgr); - debug_executor.flashback_to_version( - version, - region_id, - key_range, - start_ts.into_inner(), - commit_ts.into_inner(), - ) - }; - futures.push(f); - }) - }); + let read_result = key_range_to_finish.read().unwrap().clone(); + read_result.into_iter() + .filter(|(_, (region_id, _))| { + region_ids.is_empty() || region_ids.contains(region_id) + }) + .for_each(|((start_key, end_key), (region_id, store_id))| { + let debuggers = &debuggers; + let key_range_to_finish = &key_range_to_finish; + let key_range = build_key_range(&start_key, &end_key, false); + let f = async move { + let debuggers = debuggers.lock().unwrap(); + match debuggers.get(&store_id).unwrap().flashback_to_version( + version, + region_id, + key_range, + start_ts.into_inner(), + commit_ts.into_inner(), + ) { + Ok(_) => { + key_range_to_finish + .write() + .unwrap() + .remove(&(start_key, end_key)); + Ok(()) + } + Err(err) => { + println!( + "finish flashback region {} with start_ts {:?} to version {} failed, error: {:?}", + region_id, start_ts, version, err + ); + Err(err) + }, + } + }; + futures.push(f); + }); // Wait for finishing flashback to version. match tokio::time::timeout( @@ -971,29 +1006,25 @@ fn flashback_whole_cluster( println!("flashback all stores success!"); } -fn load_leaders_to_each_store( +// Load (region_id, leader's store id) in the cluster with key ranges. +fn load_key_range( pd_client: &RpcClient, start_key: Vec, end_key: Vec, -) -> HashMap, Vec), u64>> { +) -> HashMap<(Vec, Vec), (u64, u64)> { // Get all regions in the cluster. let res = pd_client.batch_load_regions(start_key, end_key); - // Put all regions in right stores. - let mut store_regions = HashMap::default(); - res.into_iter().for_each(|batch| { - batch.into_iter().for_each(|r| { - let store_id = r.get_leader().get_store_id(); - let regions = store_regions - .entry(store_id) - .or_insert_with(HashMap::default); - let mut cur_region = r.get_region().clone(); - regions.insert( - (cur_region.take_start_key(), cur_region.take_end_key()), - cur_region.get_id(), - ); - }); - }); - store_regions + res.into_iter() + .flatten() + .map(|r| { + let cur_region = r.get_region(); + let start_key = cur_region.get_start_key().to_owned(); + let end_key = cur_region.get_end_key().to_owned(); + let region_id = cur_region.get_id(); + let leader_store_id = r.get_leader().get_store_id(); + ((start_key, end_key), (region_id, leader_store_id)) + }) + .collect::>() } fn read_fail_file(path: &str) -> Vec<(String, String)> { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 3e41584f596..487ef4e8b59 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -8,7 +8,7 @@ use std::{ usize, }; -use api_version::{dispatch_api_version, ApiV1, KvFormat}; +use api_version::{dispatch_api_version, KvFormat}; use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; @@ -67,8 +67,7 @@ use tikv::{ }, storage::{ self, - kv::{FakeExtension, LocalTablets, MockEngine, SnapContext}, - lock_manager::MockLockManager, + kv::{FakeExtension, LocalTablets, SnapContext}, txn::flow_controller::{EngineFlowController, FlowController}, Engine, Storage, }, @@ -495,10 +494,10 @@ impl ServerCluster { .unwrap(), ); - let debugger: DebuggerImpl<_, MockEngine, MockLockManager, ApiV1> = DebuggerImpl::new( + let debugger = DebuggerImpl::new( engines.clone(), ConfigController::new(cfg.tikv.clone()), - None, + Some(store.clone()), ); let debug_thread_handle = debug_thread_pool.handle().clone(); let debug_service = DebugService::new( @@ -939,6 +938,20 @@ pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClie (cluster, client, leader.get_store_id()) } +pub fn must_new_cluster_kv_client_and_debug_client() +-> (Cluster, TikvClient, DebugClient, Context) { + let (cluster, leader, ctx) = must_new_cluster_mul(1); + + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + + let kv_client = TikvClient::new(channel.clone()); + let debug_client = DebugClient::new(channel); + + (cluster, kv_client, debug_client, ctx) +} + pub fn must_new_and_configure_cluster_and_kv_client( configure: impl FnMut(&mut Cluster), ) -> (Cluster, TikvClient, Context) { diff --git a/tests/integrations/server/debugger.rs b/tests/integrations/server/debugger.rs index e8a7bccb052..9eabb30b58c 100644 --- a/tests/integrations/server/debugger.rs +++ b/tests/integrations/server/debugger.rs @@ -4,7 +4,14 @@ use collections::{HashMap, HashSet}; use engine_rocks::{raw::Range, util::get_cf_handle}; use engine_traits::{CachedTablet, MiscExt, CF_WRITE}; use keys::{data_key, DATA_MAX_KEY}; -use kvproto::debugpb::Db; +use kvproto::{ + debugpb::{ + Db, FlashbackToVersionRequest, FlashbackToVersionResponse, GetAllRegionsInStoreRequest, + RegionInfoRequest, + }, + debugpb_grpc::DebugClient, +}; +use test_raftstore::{must_kv_read_equal, write_and_read_key}; use tikv::{ config::ConfigController, server::{debug::Debugger, debug2::DebuggerImplV2}, @@ -161,3 +168,121 @@ fn test_compact() { check_compact(b"z".to_vec(), b"z".to_vec(), regions_compacted.clone()); check_compact(b"z".to_vec(), b"{".to_vec(), regions_compacted); } + +#[test] +fn test_flashback_to_version() { + let (mut _cluster, kv_client, debug_client, ctx) = + test_raftstore::must_new_cluster_kv_client_and_debug_client(); + let mut ts = 0; + for i in 0..2000 { + let v = format!("value@{}", i).into_bytes(); + let k = format!("key@{}", i % 1000).into_bytes(); + write_and_read_key(&kv_client, &ctx, &mut ts, k.clone(), v.clone()); + } + + let req = GetAllRegionsInStoreRequest::default(); + let regions = debug_client.get_all_regions_in_store(&req).unwrap().regions; + println!("regions: {:?}", regions); + let flashback_version = 5; + // prepare flashback. + let res = flashback_to_version(&debug_client, regions.clone(), flashback_version, ts + 1, 0); + assert_eq!(res.is_ok(), true); + // finish flashback. + let res = flashback_to_version(&debug_client, regions, flashback_version, ts + 1, ts + 2); + assert_eq!(res.is_ok(), true); + + ts += 2; + must_kv_read_equal(&kv_client, ctx, b"key@1".to_vec(), b"value@1".to_vec(), ts); +} + +#[test] +fn test_flashback_to_version_without_prepare() { + let (mut _cluster, kv_client, debug_client, ctx) = + test_raftstore::must_new_cluster_kv_client_and_debug_client(); + let mut ts = 0; + for i in 0..2000 { + let v = format!("value@{}", i).into_bytes(); + let k = format!("key@{}", i % 1000).into_bytes(); + write_and_read_key(&kv_client, &ctx, &mut ts, k.clone(), v.clone()); + } + + let req = GetAllRegionsInStoreRequest::default(); + let regions = debug_client.get_all_regions_in_store(&req).unwrap().regions; + // finish flashback. + match flashback_to_version(&debug_client, regions, 0, 1, 2).unwrap_err() { + grpcio::Error::RpcFailure(status) => { + assert_eq!(status.code(), grpcio::RpcStatusCode::UNKNOWN); + assert_eq!(status.message(), "not in flashback state"); + } + _ => panic!("expect not in flashback state"), + } +} + +#[test] +fn test_flashback_to_version_with_mismatch_ts() { + let (mut _cluster, kv_client, debug_client, ctx) = + test_raftstore::must_new_cluster_kv_client_and_debug_client(); + let mut ts = 0; + for i in 0..2000 { + let v = format!("value@{}", i).into_bytes(); + let k = format!("key@{}", i % 1000).into_bytes(); + write_and_read_key(&kv_client, &ctx, &mut ts, k.clone(), v.clone()); + } + + let req = GetAllRegionsInStoreRequest::default(); + let regions = debug_client.get_all_regions_in_store(&req).unwrap().regions; + let flashback_version = 5; + // prepare flashback. + let res = flashback_to_version(&debug_client, regions.clone(), flashback_version, ts + 1, 0); + assert_eq!(res.is_ok(), true); + + let res = flashback_to_version( + &debug_client, + regions.clone(), + flashback_version, + ts + 1, + ts + 3, + ); + assert_eq!(res.is_ok(), true); + + // use mismatch ts. + match flashback_to_version(&debug_client, regions, flashback_version, ts + 2, ts + 3) + .unwrap_err() + { + grpcio::Error::RpcFailure(status) => { + assert_eq!(status.code(), grpcio::RpcStatusCode::UNKNOWN); + assert_eq!(status.message(), "not in flashback state"); + } + _ => panic!("expect not in flashback state"), + } +} + +fn flashback_to_version( + client: &DebugClient, + regions: Vec, + version: u64, + start_ts: u64, + commit_ts: u64, +) -> grpcio::Result { + for region_id in regions { + let mut req = RegionInfoRequest::default(); + req.set_region_id(region_id); + let r = client + .region_info(&req) + .unwrap() + .region_local_state + .unwrap() + .region + .take() + .unwrap(); + let mut req = FlashbackToVersionRequest::default(); + req.set_version(version); + req.set_region_id(region_id); + req.set_start_key(r.get_start_key().to_vec()); + req.set_end_key(r.get_end_key().to_vec()); + req.set_start_ts(start_ts); + req.set_commit_ts(commit_ts); + client.flashback_to_version(&req)?; + } + Ok(FlashbackToVersionResponse::default()) +} diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 08f2bb6fa55..6982f51503e 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2541,7 +2541,6 @@ fn test_storage_with_quota_limiter_disable() { #[test_case(test_raftstore::must_new_and_configure_cluster_and_kv_client)] #[test_case(test_raftstore_v2::must_new_and_configure_cluster_and_kv_client)] fn test_commands_write_detail() { - test_util::init_log_for_test(); let (cluster, client, ctx) = new_cluster(|cluster| { cluster.cfg.pessimistic_txn.pipelined = false; cluster.cfg.pessimistic_txn.in_memory = false; From 4e3102e82a939e7de07fdb6b3ee57b8aa466860c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 1 Aug 2023 16:56:38 +0800 Subject: [PATCH 0832/1149] snap_recovery: added keep leader (#15124) close tikv/tikv#15122 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 + components/snap_recovery/Cargo.toml | 2 + components/snap_recovery/src/leader_keeper.rs | 260 ++++++++++++++++++ components/snap_recovery/src/lib.rs | 1 + components/snap_recovery/src/services.rs | 59 +--- 5 files changed, 278 insertions(+), 46 deletions(-) create mode 100644 components/snap_recovery/src/leader_keeper.rs diff --git a/Cargo.lock b/Cargo.lock index 2b563cb3d52..80d5ceaf5cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5770,6 +5770,7 @@ dependencies = [ "engine_traits", "futures 0.3.15", "grpcio", + "itertools", "keys", "kvproto", "lazy_static", @@ -5788,6 +5789,7 @@ dependencies = [ "tikv", "tikv_alloc", "tikv_util", + "tokio", "toml", "txn_types", ] diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index 985c7323af3..8b0b0ec4c3a 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -13,6 +13,7 @@ engine_rocks = { workspace = true } engine_traits = { workspace = true } futures = { version = "0.3", features = ["executor"] } grpcio = { workspace = true } +itertools = "0.10" keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.4" @@ -31,6 +32,7 @@ thiserror = "1.0" tikv = { workspace = true } tikv_alloc = { workspace = true } tikv_util = { workspace = true } +tokio = { version = "1.17", features = ["rt"] } toml = "0.5" txn_types = { workspace = true } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs new file mode 100644 index 00000000000..417d5becca3 --- /dev/null +++ b/components/snap_recovery/src/leader_keeper.rs @@ -0,0 +1,260 @@ +// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::HashSet, + marker::PhantomData, + sync::Mutex, + time::{Duration, Instant}, +}; + +use engine_traits::KvEngine; +use futures::compat::Future01CompatExt; +use itertools::Itertools; +use raftstore::{ + errors::{Error, Result}, + store::{Callback, CasualMessage, CasualRouter, SignificantMsg, SignificantRouter}, +}; +use tikv_util::{future::paired_future_callback, timer::GLOBAL_TIMER_HANDLE}; + +pub struct LeaderKeeper { + router: Router, + not_leader: HashSet, + + _ek: PhantomData, +} + +#[derive(Default)] +pub struct StepResult { + pub failed_leader: Vec<(u64, Error)>, + pub campaign_failed: Vec<(u64, Error)>, +} + +fn ellipse(ts: &[T], max_len: usize) -> String { + if ts.len() < max_len { + return format!("{:?}", &ts); + } + format!("{:?} (and {} more)", &ts[..max_len], ts.len() - max_len) +} + +impl std::fmt::Debug for StepResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StepResult") + .field( + "failed_leader", + &format_args!("{}", ellipse(&self.failed_leader, 8)), + ) + .field( + "campaign_failed", + &format_args!("{}", ellipse(&self.campaign_failed, 8)), + ) + .finish() + } +} + +impl LeaderKeeper +where + EK: KvEngine, + Router: CasualRouter + SignificantRouter + 'static, +{ + pub fn new(router: Router, to_keep: impl IntoIterator) -> Self { + Self { + router, + + not_leader: to_keep.into_iter().collect(), + _ek: PhantomData, + } + } + + pub async fn elect_and_wait_all_ready(&mut self) { + loop { + let now = Instant::now(); + let res = self.step().await; + info!("finished leader keeper stepping."; "result" => ?res, "take" => ?now.elapsed()); + GLOBAL_TIMER_HANDLE + .delay(now + Duration::from_secs(10)) + .compat() + .await + .expect("wrong with global timer, cannot stepping."); + if res.failed_leader.is_empty() { + return; + } + } + } + + pub async fn step(&mut self) -> StepResult { + const CONCURRENCY: usize = 256; + let r = Mutex::new(StepResult::default()); + let success = Mutex::new(HashSet::new()); + for batch in &self.not_leader.iter().chunks(CONCURRENCY) { + let tasks = batch.map(|region_id| async { + match self.check_leader(*region_id).await { + Ok(_) => { + success.lock().unwrap().insert(*region_id); + return; + } + Err(err) => r.lock().unwrap().failed_leader.push((*region_id, err)), + }; + + if let Err(err) = self.force_leader(*region_id) { + r.lock().unwrap().campaign_failed.push((*region_id, err)); + } + }); + futures::future::join_all(tasks).await; + } + success.lock().unwrap().iter().for_each(|i| { + debug_assert!(self.not_leader.remove(i)); + }); + r.into_inner().unwrap() + } + + async fn check_leader(&self, region_id: u64) -> Result<()> { + let (cb, fut) = paired_future_callback(); + let msg = SignificantMsg::LeaderCallback(Callback::::read(cb)); + self.router.significant_send(region_id, msg)?; + let resp = fut + .await + .map_err(|_err| Error::Other("canceled by store".into()))?; + let header = resp.response.get_header(); + if header.has_error() { + return Err(Error::Other(box_err!( + "got error: {:?}", + header.get_error() + ))); + } + Ok(()) + } + + fn force_leader(&self, region_id: u64) -> Result<()> { + let msg = CasualMessage::Campaign; + self.router.send(region_id, msg)?; + // We have nothing to do... + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::{cell::RefCell, collections::HashSet}; + + use engine_rocks::RocksEngine; + use engine_traits::KvEngine; + use futures::executor::block_on; + use kvproto::raft_cmdpb; + use raftstore::store::{CasualRouter, SignificantRouter}; + + use super::LeaderKeeper; + + #[derive(Default)] + struct MockStore { + regions: HashSet, + leaders: RefCell>, + } + + impl LeaderKeeper { + fn mut_router(&mut self) -> &mut Router { + &mut self.router + } + } + + // impl SignificantRouter for MockStore, which only handles `LeaderCallback`, + // return success when source region is leader, otherwise fill the error in + // header. + impl SignificantRouter for MockStore { + fn significant_send( + &self, + region_id: u64, + msg: raftstore::store::SignificantMsg, + ) -> raftstore::errors::Result<()> { + match msg { + raftstore::store::SignificantMsg::LeaderCallback(cb) => { + let mut resp = raft_cmdpb::RaftCmdResponse::default(); + let mut header = raft_cmdpb::RaftResponseHeader::default(); + if !self.leaders.borrow().contains(®ion_id) { + let mut err = kvproto::errorpb::Error::new(); + err.set_not_leader(kvproto::errorpb::NotLeader::new()); + header.set_error(err); + } + resp.set_header(header); + cb.invoke_with_response(resp); + Ok(()) + } + _ => panic!("unexpected msg"), + } + } + } + + // impl CasualRouter for MockStore, which only handles `Campaign`, + // add the region to leaders list when handling it. + impl CasualRouter for MockStore { + fn send( + &self, + region_id: u64, + msg: raftstore::store::CasualMessage, + ) -> raftstore::errors::Result<()> { + match msg { + raftstore::store::CasualMessage::Campaign => { + if !self.regions.contains(®ion_id) { + return Err(raftstore::Error::RegionNotFound(region_id)); + } + self.leaders.borrow_mut().insert(region_id); + Ok(()) + } + _ => panic!("unexpected msg"), + } + } + } + + #[test] + fn test_basic() { + let leaders = vec![1, 2, 3]; + let mut store = MockStore::default(); + store.regions = leaders.iter().copied().collect(); + let mut lk = LeaderKeeper::::new(store, leaders); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 3); + assert_eq!(res.campaign_failed.len(), 0); + } + + #[test] + fn test_failure() { + let leaders = vec![1, 2, 3]; + let mut store = MockStore::default(); + store.regions = leaders.iter().copied().collect(); + let mut lk = LeaderKeeper::::new(store, vec![1, 2, 3, 4]); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 4); + assert_eq!(res.campaign_failed.len(), 1); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 1); + assert_eq!(res.campaign_failed.len(), 1); + lk.mut_router().regions.insert(4); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 1); + assert_eq!(res.campaign_failed.len(), 0); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 0); + assert_eq!(res.campaign_failed.len(), 0); + } + + #[test] + fn test_many_regions() { + let leaders = std::iter::repeat_with({ + let mut x = 0; + move || { + x += 1; + x + } + }) + .take(2049) + .collect::>(); + let mut store = MockStore::default(); + store.regions = leaders.iter().copied().collect(); + let mut lk = LeaderKeeper::::new(store, leaders); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 2049); + assert_eq!(res.campaign_failed.len(), 0); + let res = block_on(lk.step()); + assert_eq!(res.failed_leader.len(), 0); + assert_eq!(res.campaign_failed.len(), 0); + } +} diff --git a/components/snap_recovery/src/lib.rs b/components/snap_recovery/src/lib.rs index 043cffb3c80..0baefb5eabe 100644 --- a/components/snap_recovery/src/lib.rs +++ b/components/snap_recovery/src/lib.rs @@ -9,5 +9,6 @@ pub use init_cluster::{enter_snap_recovery_mode, start_recovery}; pub use services::RecoveryService; mod data_resolver; +mod leader_keeper; mod metrics; mod region_meta_collector; diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index eb5c30a7ac2..10f82d64917 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -27,7 +27,7 @@ use raftstore::{ router::RaftStoreRouter, store::{ fsm::RaftRouter, - msg::{Callback, CasualMessage, PeerMsg, SignificantMsg}, + msg::{PeerMsg, SignificantMsg}, transport::SignificantRouter, SnapshotRecoveryWaitApplySyncer, }, @@ -37,7 +37,8 @@ use tikv_util::sys::thread::{StdThreadBuildWrapper, ThreadBuildWrapper}; use crate::{ data_resolver::DataResolverManager, - metrics::{CURRENT_WAIT_APPLY_LEADER, CURRENT_WAIT_ELECTION_LEADER, REGION_EVENT_COUNTER}, + leader_keeper::LeaderKeeper, + metrics::{CURRENT_WAIT_APPLY_LEADER, REGION_EVENT_COUNTER}, region_meta_collector::RegionMetaCollector, }; @@ -240,50 +241,16 @@ impl RecoverData for RecoveryService { } } - let mut rxs = Vec::with_capacity(leaders.len()); - for ®ion_id in &leaders { - if let Err(e) = raft_router.send_casual_msg(region_id, CasualMessage::Campaign) { - // TODO: retry may necessay - warn!("region fails to campaign: "; - "region_id" => region_id, - "err" => ?e); - continue; - } else { - debug!("region starts to campaign"; "region_id" => region_id); - } - - let (tx, rx) = sync_channel(1); - let callback = Callback::read(Box::new(move |_| { - if tx.send(1).is_err() { - error!("response failed"; "region_id" => region_id); - } - })); - if let Err(e) = raft_router - .significant_send(region_id, SignificantMsg::LeaderCallback(callback)) - { - warn!("LeaderCallback failed"; "err" => ?e, "region_id" => region_id); - } - rxs.push(Some(rx)); - } - - info!("send assign leader request done"; "count" => %leaders.len()); - - // leader is campaign and be ensured as leader - for (rid, rx) in leaders.iter().zip(rxs) { - if let Some(rx) = rx { - CURRENT_WAIT_ELECTION_LEADER.set(*rid as _); - match rx.recv() { - Ok(id) => { - debug!("leader is assigned for region"; "region_id" => %id); - } - Err(e) => { - error!("check leader failed"; "error" => ?e); - } - } - } - } - CURRENT_WAIT_ELECTION_LEADER.set(0); - + let mut lk = LeaderKeeper::new(raft_router.clone(), leaders.clone()); + // We must use the tokio runtime here because there isn't a `block_in_place` + // like thing in the futures executor. It simply panics when block + // on the block_on context. + // It is also impossible to directly `await` here, because that will make + // borrowing to the raft router crosses the await point. + tokio::runtime::Builder::new_current_thread() + .build() + .expect("failed to build temporary tokio runtime.") + .block_on(lk.elect_and_wait_all_ready()); info!("all region leader assigned done"; "count" => %leaders.len()); let now = Instant::now(); From 7a3163f3d1fe9ff4a86f108dbc2101d4513c39a9 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 3 Aug 2023 17:32:38 +0800 Subject: [PATCH 0833/1149] raftstore-v2: support unsafe recovery create and destroy peer (#15185) ref tikv/tikv#15108 * raftstore-v2: support unsafe recovery create peer Reuse split mechanism to create peer. Because in v2 the only way to create and initialize to peer is via a snapshot message. * raftstore-v2: support unsafe recovery destroy peer Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 7 + components/raftstore-v2/src/fsm/store.rs | 19 ++- .../src/operation/command/admin/split.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 45 ++++-- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/operation/unsafe_recovery/create.rs | 137 +++++++++++++++++ .../src/operation/unsafe_recovery/destroy.rs | 22 +++ .../src/operation/unsafe_recovery/mod.rs | 2 + .../src/operation/unsafe_recovery/report.rs | 3 + components/raftstore-v2/src/router/imp.rs | 31 +++- components/raftstore-v2/src/router/message.rs | 13 +- .../raftstore-v2/src/worker/pd/store.rs | 31 +++- .../raftstore/src/store/unsafe_recovery.rs | 41 +++++ components/raftstore/src/store/worker/pd.rs | 22 +-- components/test_raftstore-v2/src/cluster.rs | 41 +++++ .../failpoints/cases/test_unsafe_recovery.rs | 88 ----------- .../raftstore/test_unsafe_recovery.rs | 141 +++++++++++++++++- 17 files changed, 511 insertions(+), 138 deletions(-) create mode 100644 components/raftstore-v2/src/operation/unsafe_recovery/create.rs create mode 100644 components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 5d19f8d7183..0e854f268e1 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -383,6 +383,13 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, .fsm .peer_mut() .on_unsafe_recovery_fill_out_report(syncer), + PeerMsg::UnsafeRecoveryWaitInitialized(syncer) => self + .fsm + .peer_mut() + .on_unsafe_recovery_wait_initialized(syncer), + PeerMsg::UnsafeRecoveryDestroy(syncer) => { + self.fsm.peer_mut().on_unsafe_recovery_destroy_peer(syncer) + } } } // TODO: instead of propose pending commands immediately, we should use timeout. diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index c7998359066..2c3a220c969 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -297,8 +297,19 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { match msg { StoreMsg::Start => self.on_start(), StoreMsg::Tick(tick) => self.on_tick(tick), - StoreMsg::RaftMessage(msg) => self.fsm.store.on_raft_message(self.store_ctx, msg), - StoreMsg::SplitInit(msg) => self.fsm.store.on_split_init(self.store_ctx, msg), + StoreMsg::RaftMessage(msg) => { + self.fsm.store.on_raft_message(self.store_ctx, msg); + } + StoreMsg::SplitInit(msg) => { + // For normal region split, it must not skip sending + // SplitInit message, otherwise it requests a snapshot from + // leader which is expensive. + self.fsm.store.on_split_init( + self.store_ctx, + msg, + false, // skip_if_exists + ) + } StoreMsg::StoreUnreachable { to_store_id } => self .fsm .store @@ -322,6 +333,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { .fsm .store .on_unsafe_recovery_report(self.store_ctx, report), + StoreMsg::UnsafeRecoveryCreatePeer { region, syncer } => self + .fsm + .store + .on_unsafe_recovery_create_peer(self.store_ctx, region, syncer), } } } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 3fefc7c1006..93a14ed4124 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -92,8 +92,8 @@ pub struct SplitInit { /// In-memory pessimistic locks that should be inherited from parent region pub locks: PeerPessimisticLocks, - approximate_size: Option, - approximate_keys: Option, + pub approximate_size: Option, + pub approximate_keys: Option, } impl SplitInit { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 3f703bea24c..c5d64acd27f 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -252,6 +252,7 @@ impl Store { &mut self, ctx: &mut StoreContext, msg: Box, + skip_if_exists: bool, ) where EK: KvEngine, ER: RaftEngine, @@ -286,13 +287,17 @@ impl Store { })(); // It will create the peer if it does not exist - self.on_raft_message(ctx, raft_msg); + let create = self.on_raft_message(ctx, raft_msg); + if !create && skip_if_exists { + warn!(self.logger(), "skip sending SplitInit"; "msg" => ?msg); + return; + } if let Err(SendError(m)) = ctx.router.force_send(region_id, PeerMsg::SplitInit(msg)) { warn!( self.logger(), - "Split peer is destroyed before sending the intialization msg"; - "split init msg" => ?m, + "split peer is destroyed before sending the initialization msg"; + "msg" => ?m, ); report_split_init_finish(ctx, derived_region_id, region_id, true); } @@ -355,12 +360,17 @@ impl Store { /// When a message's recipient doesn't exist, it will be redirected to /// store. Store is responsible for checking if it's neccessary to create /// a peer to handle the message. + /// + /// Return true if the peer is created by the message, false indicates + /// either the message is invalid or the peer had already been created + /// before the message. #[inline] pub fn on_raft_message( &mut self, ctx: &mut StoreContext, msg: Box, - ) where + ) -> bool + where EK: KvEngine, ER: RaftEngine, T: Transport, @@ -379,7 +389,7 @@ impl Store { { m } else { - return; + return false; }; let from_peer = msg.get_from_peer(); let to_peer = msg.get_to_peer(); @@ -394,22 +404,22 @@ impl Store { ); if to_peer.store_id != self.store_id() { ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); - return; + return false; } if !msg.has_region_epoch() { ctx.raft_metrics.message_dropped.mismatch_region_epoch.inc(); - return; + return false; } if msg.has_merge_target() { // Target tombstone peer doesn't exist, so ignore it. ctx.raft_metrics.message_dropped.stale_msg.inc(); - return; + return false; } let destroyed = match check_if_to_peer_destroyed(&ctx.engine, &msg, self.store_id()) { Ok(d) => d, Err(e) => { error!(self.logger(), "failed to get region state"; "region_id" => region_id, "err" => ?e); - return; + return false; } }; if destroyed { @@ -417,7 +427,7 @@ impl Store { if let Some(msg) = build_peer_destroyed_report(&mut msg) { let _ = ctx.trans.send(msg); } - return; + return false; } if msg.has_extra_msg() { let extra_msg = msg.get_extra_msg(); @@ -429,11 +439,11 @@ impl Store { forward_destroy_to_source_peer(&msg, |m| { let _ = ctx.router.send_raft_message(m.into()); }); - return; + return false; } } ctx.raft_metrics.message_dropped.region_tombstone_peer.inc(); - return; + return false; } // If it's not destroyed, and the message is a tombstone message, create the // peer and destroy immediately to leave a tombstone record. @@ -477,7 +487,7 @@ impl Store { Ok(p) => p, res => { error!(self.logger(), "failed to create peer"; "region_id" => region_id, "peer_id" => to_peer.id, "err" => ?res.err()); - return; + return false; } }; ctx.store_meta @@ -502,6 +512,7 @@ impl Store { // handling its first readiness. let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg)); } + true } pub fn on_update_latency_inspectors( @@ -741,11 +752,14 @@ impl Peer { self.maybe_schedule_gc_peer_tick(); } - /// A peer can be destroyed in three cases: + /// A peer can be destroyed in four cases: + /// /// 1. Received a gc message; /// 2. Received a message whose target peer's ID is larger than this; /// 3. Applied a conf remove self command. - /// In all cases, the peer will be destroyed asynchronousely in next + /// 4. Received UnsafeRecoveryDestroy message. + /// + /// In all cases, the peer will be destroyed asynchronously in next /// handle_raft_ready. /// `triggered_msg` will be sent to store fsm after destroy is finished. /// Should set the message only when the target peer is supposed to be @@ -789,6 +803,7 @@ impl Peer { } // No need to wait for the apply anymore. self.unsafe_recovery_maybe_finish_wait_apply(true); + self.unsafe_recovery_maybe_finish_wait_initialized(true); // Use extra write to ensure these writes are the last writes to raft engine. let raft_engine = self.entry_storage().raft_engine(); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index f77db54c5af..2a393a884dc 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -885,7 +885,7 @@ impl Peer { if self.unsafe_recovery_state().is_some() { debug!(self.logger, "unsafe recovery finishes applying a snapshot"); - self.unsafe_recovery_maybe_finish_wait_apply(false); + self.check_unsafe_recovery_state(); } } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/create.rs b/components/raftstore-v2/src/operation/unsafe_recovery/create.rs new file mode 100644 index 00000000000..9f710a90fea --- /dev/null +++ b/components/raftstore-v2/src/operation/unsafe_recovery/create.rs @@ -0,0 +1,137 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::collections::Bound::{Excluded, Unbounded}; + +use crossbeam::channel::SendError; +use engine_traits::{KvEngine, RaftEngine, TabletContext}; +use keys::{data_end_key, data_key, enc_start_key}; +use kvproto::metapb::Region; +use raftstore::store::{ + PeerPessimisticLocks, Transport, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryState, + RAFT_INIT_LOG_INDEX, +}; +use slog::{error, info, warn}; + +use crate::{ + batch::StoreContext, + fsm::Store, + operation::{command::temp_split_path, SplitInit}, + raft::Peer, + router::PeerMsg, +}; + +impl Store { + // Reuse split mechanism to create peer. Because in v2 the only way to + // create and initialize to peer is via a snapshot message. + pub fn on_unsafe_recovery_create_peer( + &mut self, + ctx: &mut StoreContext, + region: Region, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) where + EK: KvEngine, + ER: RaftEngine, + T: Transport, + { + info!(self.logger(), "Unsafe recovery, creating a peer"; "peer" => ?region); + // Check if the peer has been created already. + let meta = ctx.store_meta.lock().unwrap(); + if let Some((_, id)) = meta + .region_ranges + .range(( + Excluded((data_key(region.get_start_key()), u64::MAX)), + Unbounded::<(Vec, u64)>, + )) + .next() + { + let (exist_region, _) = &meta.regions[id]; + if enc_start_key(exist_region) < data_end_key(region.get_end_key()) { + if exist_region.get_id() == region.get_id() { + warn!(self.logger(), + "Unsafe recovery, region has already been created"; + "region" => ?region, + "exist_region" => ?exist_region, + ); + return; + } else { + error!(self.logger(), + "Unsafe recovery, region to be created overlaps with an existing region"; + "region" => ?region, + "exist_region" => ?exist_region, + ); + return; + } + } + } + drop(meta); + + // Create an empty split tablet. + let region_id = region.get_id(); + let path = temp_split_path(&ctx.tablet_registry, region_id); + let tctx = TabletContext::new(®ion, Some(RAFT_INIT_LOG_INDEX)); + // TODO: make the follow line can recover from abort. + if let Err(e) = ctx + .tablet_registry + .tablet_factory() + .open_tablet(tctx, &path) + { + error!(self.logger(), + "Unsafe recovery, region to be created due to fail to open tablet"; + "region" => ?region, + "error" => ?e, + ); + return; + } + + let split_init = Box::new(SplitInit { + region, + derived_leader: false, + derived_region_id: 0, // No derived region. + check_split: false, + scheduled: false, + approximate_size: None, + approximate_keys: None, + locks: PeerPessimisticLocks::default(), + }); + // Skip sending SplitInit if there exists a peer, because a peer can not + // handle concurrent SplitInit messages. + self.on_split_init(ctx, split_init, true /* skip_if_exists */); + + let wait = PeerMsg::UnsafeRecoveryWaitInitialized(syncer); + if let Err(SendError(_)) = ctx.router.force_send(region_id, wait) { + warn!( + self.logger(), + "Unsafe recovery, created peer is destroyed before sending wait msg"; + "region_id" => region_id, + ); + } + } +} + +impl Peer { + pub fn on_unsafe_recovery_wait_initialized(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { + if self.unsafe_recovery_state().is_some() { + warn!(self.logger, + "Unsafe recovery, can't wait initialize, another plan is executing in progress"; + ); + syncer.abort(); + return; + } + + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::WaitInitialize(syncer)); + self.unsafe_recovery_maybe_finish_wait_initialized(!self.serving()); + } + + pub fn unsafe_recovery_maybe_finish_wait_initialized(&mut self, force: bool) { + if let Some(UnsafeRecoveryState::WaitInitialize(_)) = self.unsafe_recovery_state() { + if self.storage().is_initialized() || force { + info!(self.logger, + "Unsafe recovery, finish wait initialize"; + "tablet_index" => self.storage().tablet_index(), + "force" => force, + ); + *self.unsafe_recovery_state_mut() = None; + } + } + } +} diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs b/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs new file mode 100644 index 00000000000..66f048f31d5 --- /dev/null +++ b/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs @@ -0,0 +1,22 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use raftstore::store::{UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryState}; +use slog::warn; + +use crate::raft::Peer; + +impl Peer { + pub fn on_unsafe_recovery_destroy_peer(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { + if self.unsafe_recovery_state().is_some() { + warn!(self.logger, + "Unsafe recovery, can't destroy, another plan is executing in progress"; + ); + syncer.abort(); + return; + } + // Syncer will be dropped after peer finishing destroy process. + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::Destroy(syncer)); + self.mark_for_destroy(None); + } +} diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs b/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs index c24c2838593..e7fba1c5c66 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs @@ -1,4 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +mod create; +mod destroy; mod force_leader; mod report; diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 9371a2e3c26..172252c8e61 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -106,6 +106,9 @@ impl Peer { Some(UnsafeRecoveryState::WaitApply { .. }) => { self.unsafe_recovery_maybe_finish_wait_apply(false) } + Some(UnsafeRecoveryState::WaitInitialize { .. }) => { + self.unsafe_recovery_maybe_finish_wait_initialized(false) + } Some(UnsafeRecoveryState::DemoteFailedVoters { .. }) => { // TODO: support demote. } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index ee3399895d4..5b516b82e55 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -11,7 +11,7 @@ use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ kvrpcpb::ExtraOp, - metapb::RegionEpoch, + metapb::{Region, RegionEpoch}, pdpb, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -20,8 +20,8 @@ use raftstore::{ router::CdcHandle, store::{ fsm::ChangeObserver, AsyncReadNotifier, Callback, FetchedLogs, GenSnapRes, RegionSnapshot, - UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, - UnsafeRecoveryWaitApplySyncer, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, UnsafeRecoveryWaitApplySyncer, }, }; use slog::warn; @@ -290,6 +290,31 @@ impl UnsafeRecoveryHandle for UnsafeRecoveryRouter router.broadcast_normal(|| PeerMsg::ExitForceLeaderState); } + fn send_create_peer( + &self, + region: Region, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> crate::Result<()> { + let router = self.0.lock().unwrap(); + match router.force_send_control(StoreMsg::UnsafeRecoveryCreatePeer { region, syncer }) { + Ok(()) => Ok(()), + Err(SendError(_)) => Err(box_err!("fail to send unsafe recovery create peer")), + } + } + + fn send_destroy_peer( + &self, + region_id: u64, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> crate::Result<()> { + let router = self.0.lock().unwrap(); + match router.check_send(region_id, PeerMsg::UnsafeRecoveryDestroy(syncer)) { + // The peer may be destroy already. + Err(crate::Error::RegionNotFound(_)) => Ok(()), + res => res, + } + } + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer) { let router = self.0.lock().unwrap(); router.broadcast_normal(|| PeerMsg::UnsafeRecoveryWaitApply(syncer.clone())); diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index e8477968020..3a7da6a9168 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -15,8 +15,8 @@ use kvproto::{ use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, util::LatencyInspector, FetchedLogs, GenSnapRes, TabletSnapKey, - UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, - UnsafeRecoveryWaitApplySyncer, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, }; use resource_control::ResourceMetered; use tikv_util::time::Instant; @@ -264,6 +264,10 @@ pub enum PeerMsg { UnsafeRecoveryWaitApply(UnsafeRecoveryWaitApplySyncer), /// Wait for a peer to fill its status to the report. UnsafeRecoveryFillOutReport(UnsafeRecoveryFillOutReportSyncer), + /// Wait for a peer to be initialized. + UnsafeRecoveryWaitInitialized(UnsafeRecoveryExecutePlanSyncer), + /// Destroy a peer. + UnsafeRecoveryDestroy(UnsafeRecoveryExecutePlanSyncer), } impl ResourceMetered for PeerMsg {} @@ -366,6 +370,11 @@ pub enum StoreMsg { }, /// Send a store report for unsafe recovery. UnsafeRecoveryReport(pdpb::StoreReport), + /// Create a peer for unsafe recovery. + UnsafeRecoveryCreatePeer { + region: metapb::Region, + syncer: UnsafeRecoveryExecutePlanSyncer, + }, } impl ResourceMetered for StoreMsg {} diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 91c8f17fcf9..9bc602a5a4f 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -285,8 +285,8 @@ where Ok(mut resp) => { // TODO: handle replication_status - if let Some(plan) = resp.recovery_plan.take() { - let router = Arc::new(UnsafeRecoveryRouter::new(router)); + if let Some(mut plan) = resp.recovery_plan.take() { + let handle = Arc::new(UnsafeRecoveryRouter::new(router)); info!(logger, "Unsafe recovery, received a recovery plan"); if plan.has_force_leader() { let mut failed_stores = HashSet::default(); @@ -295,10 +295,10 @@ where } let syncer = UnsafeRecoveryForceLeaderSyncer::new( plan.get_step(), - router.clone(), + handle.clone(), ); for region in plan.get_force_leader().get_enter_force_leaders() { - if let Err(e) = router.send_enter_force_leader( + if let Err(e) = handle.send_enter_force_leader( *region, syncer.clone(), failed_stores.clone(), @@ -309,9 +309,26 @@ where } } } else { - let _syncer = - UnsafeRecoveryExecutePlanSyncer::new(plan.get_step(), router); - // TODO: handle creates/tombstone/demotes + let syncer = UnsafeRecoveryExecutePlanSyncer::new( + plan.get_step(), + handle.clone(), + ); + for create in plan.take_creates().into_iter() { + if let Err(e) = handle.send_create_peer(create, syncer.clone()) { + error!(logger, + "fail to send create peer message for recovery"; + "err" => ?e); + } + } + for tombstone in plan.take_tombstones().into_iter() { + if let Err(e) = handle.send_destroy_peer(tombstone, syncer.clone()) + { + error!(logger, + "fail to send destroy peer message for recovery"; + "err" => ?e); + } + } + // TODO: handle demotes } } diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index a9c92ae8030..e125c25d18b 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -29,6 +29,18 @@ pub trait UnsafeRecoveryHandle: Sync + Send { fn broadcast_exit_force_leader(&self); + fn send_create_peer( + &self, + region: metapb::Region, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> Result<()>; + + fn send_destroy_peer( + &self, + region_id: u64, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> Result<()>; + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer); fn broadcast_fill_out_report(&self, syncer: UnsafeRecoveryFillOutReportSyncer); @@ -58,6 +70,34 @@ impl UnsafeRecoveryHandle for Mutex Result<()> { + let router = self.lock().unwrap(); + match router.force_send_control(StoreMsg::UnsafeRecoveryCreatePeer { + syncer, + create: region, + }) { + Ok(()) => Ok(()), + Err(SendError(_)) => Err(box_err!("fail to send unsafe recovery create peer")), + } + } + + fn send_destroy_peer( + &self, + region_id: u64, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> Result<()> { + let router = self.lock().unwrap(); + match router.significant_send(region_id, SignificantMsg::UnsafeRecoveryDestroy(syncer)) { + // The peer may be destroy already. + Err(crate::Error::RegionNotFound(_)) => Ok(()), + res => res, + } + } + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer) { let router = self.lock().unwrap(); router.broadcast_normal(|| { @@ -344,4 +384,5 @@ pub enum UnsafeRecoveryState { demote_after_exit: bool, }, Destroy(UnsafeRecoveryExecutePlanSyncer), + WaitInitialize(UnsafeRecoveryExecutePlanSyncer), } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 8e62cca120b..5fa7a6b7290 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1382,18 +1382,18 @@ where } if let Some(mut plan) = resp.recovery_plan.take() { info!("Unsafe recovery, received a recovery plan"); + let handle = Arc::new(Mutex::new(router.clone())); if plan.has_force_leader() { let mut failed_stores = HashSet::default(); for failed_store in plan.get_force_leader().get_failed_stores() { failed_stores.insert(*failed_store); } - let router = Arc::new(Mutex::new(router.clone())); let syncer = UnsafeRecoveryForceLeaderSyncer::new( plan.get_step(), - router.clone(), + handle.clone(), ); for region in plan.get_force_leader().get_enter_force_leaders() { - if let Err(e) = router.send_enter_force_leader( + if let Err(e) = handle.send_enter_force_leader( *region, syncer.clone(), failed_stores.clone(), @@ -1404,24 +1404,16 @@ where } else { let syncer = UnsafeRecoveryExecutePlanSyncer::new( plan.get_step(), - Arc::new(Mutex::new(router.clone())), + handle.clone(), ); for create in plan.take_creates().into_iter() { - if let Err(e) = - router.send_control(StoreMsg::UnsafeRecoveryCreatePeer { - syncer: syncer.clone(), - create, - }) - { + if let Err(e) = handle.send_create_peer(create, syncer.clone()) { error!("fail to send create peer message for recovery"; "err" => ?e); } } for delete in plan.take_tombstones().into_iter() { - if let Err(e) = router.significant_send( - delete, - SignificantMsg::UnsafeRecoveryDestroy(syncer.clone()), - ) { - error!("fail to send delete peer message for recovery"; "err" => ?e); + if let Err(e) = handle.send_destroy_peer(delete, syncer.clone()) { + error!("fail to send destroy peer message for recovery"; "err" => ?e); } } for mut demote in plan.take_demotes().into_iter() { diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index c70d72fce24..2a5bb733962 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1659,6 +1659,30 @@ impl, EK: KvEngine> Cluster { } } + pub fn must_remove_region(&mut self, store_id: u64, region_id: u64) { + let timer = Instant::now(); + loop { + let peer = new_peer(store_id, 0); + let find_leader = new_status_request(region_id, peer, new_region_leader_cmd()); + let resp = self + .call_command(find_leader, Duration::from_secs(5)) + .unwrap(); + + if is_error_response(&resp) { + assert!( + resp.get_header().get_error().has_region_not_found(), + "unexpected error resp: {:?}", + resp + ); + break; + } + if timer.saturating_elapsed() > Duration::from_secs(60) { + panic!("region {} is not removed after 60s.", region_id); + } + thread::sleep(Duration::from_millis(100)); + } + } + pub fn get_snap_dir(&self, node_id: u64) -> String { self.sim.rl().get_snap_dir(node_id) } @@ -1785,6 +1809,23 @@ impl, EK: KvEngine> Cluster { debug!("all nodes are shut down."); } + pub fn must_wait_for_leader_expire(&self, node_id: u64, region_id: u64) { + let timer = Instant::now_coarse(); + while timer.saturating_elapsed() < Duration::from_secs(5) { + if self + .query_leader(node_id, region_id, Duration::from_secs(1)) + .is_none() + { + return; + } + sleep_ms(100); + } + panic!( + "region {}'s replica in store {} still has a valid leader after 5 secs", + region_id, node_id + ); + } + pub fn must_send_store_heartbeat(&self, node_id: u64) { let router = self.sim.rl().get_router(node_id).unwrap(); router diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index a5c6bdf495a..f8370f1a1c0 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -363,91 +363,3 @@ fn test_unsafe_recovery_demotion_reentrancy() { assert_eq!(demoted, true); fail::remove("on_handle_apply_store_1"); } - -#[test] -fn test_unsafe_recovery_create_destroy_reentrancy() { - let mut cluster = new_server_cluster(0, 3); - cluster.run(); - let nodes = Vec::from_iter(cluster.get_node_ids()); - assert_eq!(nodes.len(), 3); - - let pd_client = Arc::clone(&cluster.pd_client); - pd_client.disable_default_operator(); - let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); - - // Makes the leadership definite. - let store2_peer = find_peer(®ion, nodes[1]).unwrap().to_owned(); - cluster.must_transfer_leader(region.get_id(), store2_peer); - cluster.put(b"random_key1", b"random_val1").unwrap(); - - // Split the region into 2, and remove one of them, so that we can test both - // region peer list update and region creation. - pd_client.must_split_region( - region, - pdpb::CheckPolicy::Usekey, - vec![b"random_key1".to_vec()], - ); - let region1 = pd_client.get_region(b"random_key".as_ref()).unwrap(); - let region2 = pd_client.get_region(b"random_key1".as_ref()).unwrap(); - let region1_store0_peer = find_peer(®ion1, nodes[0]).unwrap().to_owned(); - pd_client.must_remove_peer(region1.get_id(), region1_store0_peer); - cluster.must_remove_region(nodes[0], region1.get_id()); - - // Makes the group lose its quorum. - cluster.stop_node(nodes[1]); - cluster.stop_node(nodes[2]); - { - let put = new_put_cmd(b"k2", b"v2"); - let req = new_request( - region2.get_id(), - region2.get_region_epoch().clone(), - vec![put], - true, - ); - // marjority is lost, can't propose command successfully. - cluster - .call_command_on_leader(req, Duration::from_millis(10)) - .unwrap_err(); - } - - cluster.must_enter_force_leader(region2.get_id(), nodes[0], vec![nodes[1], nodes[2]]); - - // Construct recovery plan. - let mut plan = pdpb::RecoveryPlan::default(); - - let mut create = metapb::Region::default(); - create.set_id(101); - create.set_end_key(b"random_key1".to_vec()); - let mut peer = metapb::Peer::default(); - peer.set_id(102); - peer.set_store_id(nodes[0]); - create.mut_peers().push(peer); - plan.mut_creates().push(create); - - plan.mut_tombstones().push(region2.get_id()); - - pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); - cluster.must_send_store_heartbeat(nodes[0]); - sleep_ms(100); - pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); - cluster.must_send_store_heartbeat(nodes[0]); - - // Store reports are sent once the entries are applied. - let mut store_report = None; - for _ in 0..20 { - store_report = pd_client.must_get_store_report(nodes[0]); - if store_report.is_some() { - break; - } - sleep_ms(100); - } - assert_ne!(store_report, None); - let report = store_report.unwrap(); - let peer_reports = report.get_peer_reports(); - assert_eq!(peer_reports.len(), 1); - let reported_region = peer_reports[0].get_region_state().get_region(); - assert_eq!(reported_region.get_id(), 101); - assert_eq!(reported_region.get_peers().len(), 1); - assert_eq!(reported_region.get_peers()[0].get_id(), 102); - fail::remove("on_handle_apply_store_1"); -} diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index 5e47af7dd5e..2f5f9f04991 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -393,9 +393,55 @@ fn test_unsafe_recovery_early_return_after_exit_joint_state() { assert_eq!(demoted, true); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_create_region() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer number check. + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let store0_peer = find_peer(®ion, nodes[0]).unwrap().to_owned(); + + // Removes the bootstrap region, since it overlaps with any regions we create. + pd_client.must_remove_peer(region.get_id(), store0_peer); + cluster.must_remove_region(nodes[0], region.get_id()); + + cluster.stop_node(nodes[1]); + cluster.stop_node(nodes[2]); + cluster.must_wait_for_leader_expire(nodes[0], region.get_id()); + + let mut create = metapb::Region::default(); + create.set_id(101); + create.set_start_key(b"anykey".to_vec()); + let mut peer = metapb::Peer::default(); + peer.set_id(102); + peer.set_store_id(nodes[0]); + create.mut_peers().push(peer); + let mut plan = pdpb::RecoveryPlan::default(); + plan.mut_creates().push(create); + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); + cluster.must_send_store_heartbeat(nodes[0]); + let mut created = false; + for _ in 1..11 { + let region = pd_client.get_region(b"anykey1").unwrap(); + if region.get_id() == 101 { + created = true; + } + sleep_ms(200); + } + assert_eq!(created, true); +} + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_unsafe_recovery_create_region_reentrancy() { + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -407,7 +453,7 @@ fn test_unsafe_recovery_create_region() { let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); let store0_peer = find_peer(®ion, nodes[0]).unwrap().to_owned(); - // Removes the boostrap region, since it overlaps with any regions we create. + // Removes the bootstrap region, since it overlaps with any regions we create. pd_client.must_remove_peer(region.get_id(), store0_peer); cluster.must_remove_region(nodes[0], region.get_id()); @@ -423,6 +469,7 @@ fn test_unsafe_recovery_create_region() { peer.set_store_id(nodes[0]); create.mut_peers().push(peer); let mut plan = pdpb::RecoveryPlan::default(); + plan.mut_creates().push(create.clone()); plan.mut_creates().push(create); pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); cluster.must_send_store_heartbeat(nodes[0]); @@ -437,6 +484,94 @@ fn test_unsafe_recovery_create_region() { assert_eq!(created, true); } +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_unsafe_recovery_create_destroy_reentrancy() { + let mut cluster = new_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + + // Makes the leadership definite. + let store2_peer = find_peer(®ion, nodes[1]).unwrap().to_owned(); + cluster.must_transfer_leader(region.get_id(), store2_peer); + cluster.put(b"random_key1", b"random_val1").unwrap(); + + // Split the region into 2, and remove one of them, so that we can test both + // region peer list update and region creation. + pd_client.must_split_region( + region, + pdpb::CheckPolicy::Usekey, + vec![b"random_key1".to_vec()], + ); + let region1 = pd_client.get_region(b"random_key".as_ref()).unwrap(); + let region2 = pd_client.get_region(b"random_key1".as_ref()).unwrap(); + let region1_store0_peer = find_peer(®ion1, nodes[0]).unwrap().to_owned(); + pd_client.must_remove_peer(region1.get_id(), region1_store0_peer); + cluster.must_remove_region(nodes[0], region1.get_id()); + + // Makes the group lose its quorum. + cluster.stop_node(nodes[1]); + cluster.stop_node(nodes[2]); + { + let put = new_put_cmd(b"k2", b"v2"); + let req = new_request( + region2.get_id(), + region2.get_region_epoch().clone(), + vec![put], + true, + ); + // marjority is lost, can't propose command successfully. + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); + } + + cluster.must_enter_force_leader(region2.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + + // Construct recovery plan. + let mut plan = pdpb::RecoveryPlan::default(); + + let mut create = metapb::Region::default(); + create.set_id(101); + create.set_end_key(b"random_key1".to_vec()); + let mut peer = metapb::Peer::default(); + peer.set_id(102); + peer.set_store_id(nodes[0]); + create.mut_peers().push(peer); + plan.mut_creates().push(create); + + plan.mut_tombstones().push(region2.get_id()); + + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); + cluster.must_send_store_heartbeat(nodes[0]); + sleep_ms(100); + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); + cluster.must_send_store_heartbeat(nodes[0]); + + // Store reports are sent once the entries are applied. + let mut store_report = None; + for _ in 0..20 { + store_report = pd_client.must_get_store_report(nodes[0]); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + let report = store_report.unwrap(); + let peer_reports = report.get_peer_reports(); + assert_eq!(peer_reports.len(), 1); + let reported_region = peer_reports[0].get_region_state().get_region(); + assert_eq!(reported_region.get_id(), 101); + assert_eq!(reported_region.get_peers().len(), 1); + assert_eq!(reported_region.get_peers()[0].get_id(), 102); +} + macro_rules! must_get_error_recovery_in_progress { ($cluster:expr, $region:expr, $cmd:expr) => { let req = new_request( From 1e47e9a2a987127a2371457d2c1379d62a54de11 Mon Sep 17 00:00:00 2001 From: lijie Date: Thu, 3 Aug 2023 19:09:23 +0800 Subject: [PATCH 0834/1149] chore: bump version to 7.4.0-alpha (#15280) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80d5ceaf5cc..5a196de0c3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6688,7 +6688,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.3.0-alpha" +version = "7.4.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index a2bbb19e787..d4d296214d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.3.0-alpha" +version = "7.4.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From 9816906ff5133527691a93cd1b83a9d7e0858a17 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 7 Aug 2023 11:49:10 +0800 Subject: [PATCH 0835/1149] raftstore-v2: support unsafe recovery demote (#15213) ref tikv/tikv#15108 * raftstore-v2: support unsafe recovery demote * raftstore-v2: do not step force leader on confchange Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Connor --- components/raftstore-v2/src/fsm/peer.rs | 11 ++ .../operation/command/admin/conf_change.rs | 3 +- .../raftstore-v2/src/operation/command/mod.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- .../src/operation/unsafe_recovery/demote.rs | 142 ++++++++++++++++++ .../src/operation/unsafe_recovery/mod.rs | 1 + .../src/operation/unsafe_recovery/report.rs | 6 +- components/raftstore-v2/src/router/imp.rs | 18 ++- components/raftstore-v2/src/router/message.rs | 5 + .../src/router/response_channel.rs | 12 +- .../raftstore-v2/src/worker/pd/store.rs | 12 +- components/raftstore/src/store/fsm/peer.rs | 65 +------- components/raftstore/src/store/mod.rs | 3 +- .../raftstore/src/store/unsafe_recovery.rs | 83 +++++++++- components/raftstore/src/store/worker/pd.rs | 11 +- .../failpoints/cases/test_unsafe_recovery.rs | 53 ++++--- .../raftstore/test_unsafe_recovery.rs | 50 +++--- 17 files changed, 354 insertions(+), 125 deletions(-) create mode 100644 components/raftstore-v2/src/operation/unsafe_recovery/demote.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 0e854f268e1..f6b9217ecbf 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -390,6 +390,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::UnsafeRecoveryDestroy(syncer) => { self.fsm.peer_mut().on_unsafe_recovery_destroy_peer(syncer) } + PeerMsg::UnsafeRecoveryDemoteFailedVoters { + failed_voters, + syncer, + } => self + .fsm + .peer_mut() + .on_unsafe_recovery_pre_demote_failed_voters( + self.store_ctx, + syncer, + failed_voters, + ), } } // TODO: instead of propose pending commands immediately, we should use timeout. diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 8d746391c1f..8c8e0e78d1e 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -160,7 +160,8 @@ impl Peer { "region" => ?self.region(), ); self.region_heartbeat_pd(ctx); - let demote_self = tikv_util::store::is_learner(self.peer()); + let demote_self = + tikv_util::store::is_learner(self.peer()) && !self.is_in_force_leader(); if remove_self || demote_self { warn!(self.logger, "removing or demoting leader"; "remove" => remove_self, "demote" => demote_self); let term = self.term(); diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index a85c0bc549b..0fd88cc987b 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -474,7 +474,7 @@ impl Peer { // We need to continue to apply after previous page is finished. self.set_has_ready(); } - self.check_unsafe_recovery_state(); + self.check_unsafe_recovery_state(ctx); } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 2a393a884dc..b985fd69c27 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -885,7 +885,7 @@ impl Peer { if self.unsafe_recovery_state().is_some() { debug!(self.logger, "unsafe recovery finishes applying a snapshot"); - self.check_unsafe_recovery_state(); + self.check_unsafe_recovery_state(ctx); } } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs new file mode 100644 index 00000000000..131a5b2109f --- /dev/null +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -0,0 +1,142 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RaftEngine}; +use kvproto::metapb; +use raftstore::store::{ + demote_failed_voters_request, exit_joint_request, Transport, UnsafeRecoveryExecutePlanSyncer, + UnsafeRecoveryState, +}; +use slog::{error, info, warn}; + +use crate::{batch::StoreContext, raft::Peer, router::CmdResChannel}; + +impl Peer { + pub fn on_unsafe_recovery_pre_demote_failed_voters( + &mut self, + ctx: &mut StoreContext, + syncer: UnsafeRecoveryExecutePlanSyncer, + failed_voters: Vec, + ) { + if self.unsafe_recovery_state().is_some() { + warn!(self.logger, + "Unsafe recovery, demote failed voters has already been initiated"; + ); + syncer.abort(); + return; + } + + if !self.is_in_force_leader() { + error!(self.logger, + "Unsafe recovery, demoting failed voters failed, since this peer is not forced leader"; + ); + return; + } + + if self.in_joint_state() { + info!(self.logger, + "Unsafe recovery, already in joint state, exit first"; + ); + let exit_joint = exit_joint_request(self.region(), self.peer()); + let (ch, sub) = CmdResChannel::pair(); + self.on_admin_command(ctx, exit_joint, ch); + if let Some(resp) = sub.try_result() && resp.get_header().has_error() { + error!(self.logger, + "Unsafe recovery, fail to exit residual joint state"; + "err" => ?resp.get_header().get_error(), + ); + return; + } + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::DemoteFailedVoters { + syncer, + failed_voters, + target_index: self.raft_group().raft.raft_log.last_index(), + demote_after_exit: true, + }); + } else { + self.unsafe_recovery_demote_failed_voters(ctx, failed_voters, syncer); + } + } + + pub fn unsafe_recovery_demote_failed_voters( + &mut self, + ctx: &mut StoreContext, + failed_voters: Vec, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) { + if let Some(req) = demote_failed_voters_request(self.region(), self.peer(), failed_voters) { + info!(self.logger, + "Unsafe recovery, demoting failed voters"; + "req" => ?req); + let (ch, sub) = CmdResChannel::pair(); + self.on_admin_command(ctx, req, ch); + if let Some(resp) = sub.try_result() && resp.get_header().has_error() { + error!(self.logger, + "Unsafe recovery, fail to finish demotion"; + "err" => ?resp.get_header().get_error(), + ); + return; + } + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::DemoteFailedVoters { + syncer, + failed_voters: vec![], // No longer needed since here. + target_index: self.raft_group().raft.raft_log.last_index(), + demote_after_exit: false, + }); + } else { + warn!(self.logger, + "Unsafe recovery, no need to demote failed voters"; + "region" => ?self.region(), + ); + } + } + + pub fn unsafe_recovery_maybe_finish_demote_failed_voters( + &mut self, + ctx: &mut StoreContext, + ) { + let Some(UnsafeRecoveryState::DemoteFailedVoters { + syncer, + failed_voters, + target_index, + demote_after_exit, + }) = self.unsafe_recovery_state() else { return }; + + if self.raft_group().raft.raft_log.applied < *target_index { + return; + } + + if *demote_after_exit { + let syncer_clone = syncer.clone(); + let failed_voters_clone = failed_voters.clone(); + *self.unsafe_recovery_state_mut() = None; + if !self.is_in_force_leader() { + error!(self.logger, + "Unsafe recovery, lost forced leadership after exiting joint state"; + ); + return; + } + self.unsafe_recovery_demote_failed_voters(ctx, failed_voters_clone, syncer_clone); + } else { + if self.in_joint_state() { + info!(self.logger, "Unsafe recovery, exiting joint state"); + if self.is_in_force_leader() { + let exit_joint = exit_joint_request(self.region(), self.peer()); + let (ch, sub) = CmdResChannel::pair(); + self.on_admin_command(ctx, exit_joint, ch); + if let Some(resp) = sub.try_result() && resp.get_header().has_error() { + error!(self.logger, + "Unsafe recovery, fail to exit joint state"; + "err" => ?resp.get_header().get_error(), + ); + } + } else { + error!(self.logger, + "Unsafe recovery, lost forced leadership while trying to exit joint state"; + ); + } + } + + *self.unsafe_recovery_state_mut() = None; + } + } +} diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs b/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs index e7fba1c5c66..2c6c1816a15 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/mod.rs @@ -1,6 +1,7 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. mod create; +mod demote; mod destroy; mod force_leader; mod report; diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 172252c8e61..1e1365ddf9f 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -101,7 +101,7 @@ impl Peer { syncer.report_for_self(self_report); } - pub fn check_unsafe_recovery_state(&mut self) { + pub fn check_unsafe_recovery_state(&mut self, ctx: &mut StoreContext) { match self.unsafe_recovery_state() { Some(UnsafeRecoveryState::WaitApply { .. }) => { self.unsafe_recovery_maybe_finish_wait_apply(false) @@ -110,9 +110,9 @@ impl Peer { self.unsafe_recovery_maybe_finish_wait_initialized(false) } Some(UnsafeRecoveryState::DemoteFailedVoters { .. }) => { - // TODO: support demote. + self.unsafe_recovery_maybe_finish_demote_failed_voters(ctx) } - Some(_) | None => {} + Some(UnsafeRecoveryState::Destroy(_)) | None => {} } } } diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 5b516b82e55..9c6cca96ae4 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -11,7 +11,7 @@ use engine_traits::{KvEngine, RaftEngine}; use futures::Future; use kvproto::{ kvrpcpb::ExtraOp, - metapb::{Region, RegionEpoch}, + metapb::{Peer, Region, RegionEpoch}, pdpb, raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -315,6 +315,22 @@ impl UnsafeRecoveryHandle for UnsafeRecoveryRouter } } + fn send_demote_peers( + &self, + region_id: u64, + failed_voters: Vec, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> crate::Result<()> { + let router = self.0.lock().unwrap(); + router.check_send( + region_id, + PeerMsg::UnsafeRecoveryDemoteFailedVoters { + syncer, + failed_voters, + }, + ) + } + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer) { let router = self.0.lock().unwrap(); router.broadcast_normal(|| PeerMsg::UnsafeRecoveryWaitApply(syncer.clone())); diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 3a7da6a9168..f09314b4f17 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -268,6 +268,11 @@ pub enum PeerMsg { UnsafeRecoveryWaitInitialized(UnsafeRecoveryExecutePlanSyncer), /// Destroy a peer. UnsafeRecoveryDestroy(UnsafeRecoveryExecutePlanSyncer), + // Demote failed voter peers. + UnsafeRecoveryDemoteFailedVoters { + failed_voters: Vec, + syncer: UnsafeRecoveryExecutePlanSyncer, + }, } impl ResourceMetered for PeerMsg {} diff --git a/components/raftstore-v2/src/router/response_channel.rs b/components/raftstore-v2/src/router/response_channel.rs index d23b2852a03..4f47f971670 100644 --- a/components/raftstore-v2/src/router/response_channel.rs +++ b/components/raftstore-v2/src/router/response_channel.rs @@ -240,7 +240,7 @@ impl BaseSubscriber { } /// Synchronous version of `result`. It cannot be called concurrently with - /// another `take_result` or `result`. + /// another `try_result`, `take_result` or `result`. #[inline] pub fn take_result(&self) -> Option { let e = self.core.event.load(Ordering::Relaxed); @@ -252,6 +252,16 @@ impl BaseSubscriber { None } } + + /// Return an reference of the result. It be called concurrently with + /// other `try_result`. + pub fn try_result(&self) -> Option<&Res> { + if self.has_result() { + unsafe { (*self.core.res.get()).as_ref() } + } else { + None + } + } } unsafe impl Send for BaseSubscriber {} diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index 9bc602a5a4f..f6aaa7f6ab0 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -328,7 +328,17 @@ where "err" => ?e); } } - // TODO: handle demotes + for mut demote in plan.take_demotes().into_iter() { + if let Err(e) = handle.send_demote_peers( + demote.get_region_id(), + demote.take_failed_voters().into_vec(), + syncer.clone(), + ) { + error!(logger, + "fail to send update peer list message for recovery"; + "err" => ?e); + } + } } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 7c037edf029..c22cb001369 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -9,7 +9,7 @@ use std::{ Bound::{Excluded, Unbounded}, VecDeque, }, - iter::{FromIterator, Iterator}, + iter::Iterator, mem, sync::{Arc, Mutex}, time::{Duration, Instant}, @@ -71,6 +71,7 @@ use crate::{ coprocessor::{RegionChangeEvent, RegionChangeReason}, store::{ cmd_resp::{bind_term, new_error}, + demote_failed_voters_request, entry_storage::MAX_WARMED_UP_CACHE_KEEP_TIME, fsm::{ apply, @@ -90,15 +91,16 @@ use crate::{ region_meta::RegionMeta, transport::Transport, unsafe_recovery::{ - ForceLeaderState, SnapshotRecoveryState, SnapshotRecoveryWaitApplySyncer, - UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, - UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, + exit_joint_request, ForceLeaderState, SnapshotRecoveryState, + SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, + UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, }, util, util::{KeysInfoFormatter, LeaseState}, worker::{ - new_change_peer_v2_request, Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, - GcSnapshotTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, + Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, GcSnapshotTask, RaftlogGcTask, + ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, }, CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, PeerTick, ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, ReadTask, @@ -6805,57 +6807,6 @@ fn new_compact_log_request( request } -fn demote_failed_voters_request( - region: &metapb::Region, - peer: &metapb::Peer, - failed_voters: Vec, -) -> Option { - let failed_voter_ids = HashSet::from_iter(failed_voters.iter().map(|voter| voter.get_id())); - let mut req = new_admin_request(region.get_id(), peer.clone()); - req.mut_header() - .set_region_epoch(region.get_region_epoch().clone()); - let mut change_peer_reqs: Vec = region - .get_peers() - .iter() - .filter_map(|peer| { - if failed_voter_ids.contains(&peer.get_id()) - && peer.get_role() == metapb::PeerRole::Voter - { - let mut peer_clone = peer.clone(); - peer_clone.set_role(metapb::PeerRole::Learner); - let mut cp = pdpb::ChangePeer::default(); - cp.set_change_type(ConfChangeType::AddLearnerNode); - cp.set_peer(peer_clone); - return Some(cp); - } - None - }) - .collect(); - - // Promote self if it is a learner. - if peer.get_role() == metapb::PeerRole::Learner { - let mut cp = pdpb::ChangePeer::default(); - cp.set_change_type(ConfChangeType::AddNode); - let mut promote = peer.clone(); - promote.set_role(metapb::PeerRole::Voter); - cp.set_peer(promote); - change_peer_reqs.push(cp); - } - if change_peer_reqs.is_empty() { - return None; - } - req.set_admin_request(new_change_peer_v2_request(change_peer_reqs)); - Some(req) -} - -fn exit_joint_request(region: &metapb::Region, peer: &metapb::Peer) -> RaftCmdRequest { - let mut req = new_admin_request(region.get_id(), peer.clone()); - req.mut_header() - .set_region_epoch(region.get_region_epoch().clone()); - req.set_admin_request(new_change_peer_v2_request(vec![])); - req -} - impl<'a, EK, ER, T: Transport> PeerFsmDelegate<'a, EK, ER, T> where EK: KvEngine, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index dad6cf69ed7..c0164c88b04 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -77,7 +77,8 @@ pub use self::{ transport::{CasualRouter, ProposalRouter, SignificantRouter, StoreRouter, Transport}, txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, unsafe_recovery::{ - ForceLeaderState, SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, + demote_failed_voters_request, exit_joint_request, ForceLeaderState, + SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, }, diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index e125c25d18b..92baeda00a1 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -10,11 +10,16 @@ use crossbeam::channel::SendError; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ metapb, - pdpb::{PeerReport, StoreReport}, + pdpb::{ChangePeer, PeerReport, StoreReport}, + raft_cmdpb::RaftCmdRequest, }; +use raft::eraftpb::ConfChangeType; use tikv_util::{box_err, error, info, time::Instant as TiInstant, warn}; -use super::{PeerMsg, RaftRouter, SignificantMsg, SignificantRouter, StoreMsg}; +use super::{ + fsm::new_admin_request, worker::new_change_peer_v2_request, PeerMsg, RaftRouter, + SignificantMsg, SignificantRouter, StoreMsg, +}; use crate::Result; /// A handle for PD to schedule online unsafe recovery commands back to @@ -41,6 +46,13 @@ pub trait UnsafeRecoveryHandle: Sync + Send { syncer: UnsafeRecoveryExecutePlanSyncer, ) -> Result<()>; + fn send_demote_peers( + &self, + region_id: u64, + failed_voters: Vec, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> Result<()>; + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer); fn broadcast_fill_out_report(&self, syncer: UnsafeRecoveryFillOutReportSyncer); @@ -98,6 +110,22 @@ impl UnsafeRecoveryHandle for Mutex, + syncer: UnsafeRecoveryExecutePlanSyncer, + ) -> Result<()> { + let router = self.lock().unwrap(); + router.significant_send( + region_id, + SignificantMsg::UnsafeRecoveryDemoteFailedVoters { + syncer, + failed_voters, + }, + ) + } + fn broadcast_wait_apply(&self, syncer: UnsafeRecoveryWaitApplySyncer) { let router = self.lock().unwrap(); router.broadcast_normal(|| { @@ -386,3 +414,54 @@ pub enum UnsafeRecoveryState { Destroy(UnsafeRecoveryExecutePlanSyncer), WaitInitialize(UnsafeRecoveryExecutePlanSyncer), } + +pub fn exit_joint_request(region: &metapb::Region, peer: &metapb::Peer) -> RaftCmdRequest { + let mut req = new_admin_request(region.get_id(), peer.clone()); + req.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + req.set_admin_request(new_change_peer_v2_request(vec![])); + req +} + +pub fn demote_failed_voters_request( + region: &metapb::Region, + peer: &metapb::Peer, + failed_voters: Vec, +) -> Option { + let failed_voter_ids = HashSet::from_iter(failed_voters.iter().map(|voter| voter.get_id())); + let mut req = new_admin_request(region.get_id(), peer.clone()); + req.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + let mut change_peer_reqs: Vec = region + .get_peers() + .iter() + .filter_map(|peer| { + if failed_voter_ids.contains(&peer.get_id()) + && peer.get_role() == metapb::PeerRole::Voter + { + let mut peer_clone = peer.clone(); + peer_clone.set_role(metapb::PeerRole::Learner); + let mut cp = ChangePeer::default(); + cp.set_change_type(ConfChangeType::AddLearnerNode); + cp.set_peer(peer_clone); + return Some(cp); + } + None + }) + .collect(); + + // Promote self if it is a learner. + if peer.get_role() == metapb::PeerRole::Learner { + let mut cp = ChangePeer::default(); + cp.set_change_type(ConfChangeType::AddNode); + let mut promote = peer.clone(); + promote.set_role(metapb::PeerRole::Voter); + cp.set_peer(promote); + change_peer_reqs.push(cp); + } + if change_peer_reqs.is_empty() { + return None; + } + req.set_admin_request(new_change_peer_v2_request(change_peer_reqs)); + Some(req) +} diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 5fa7a6b7290..8ae8d7fc5ed 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -59,7 +59,6 @@ use crate::{ store::{ cmd_resp::new_error, metrics::*, - transport::SignificantRouter, unsafe_recovery::{ UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, }, @@ -69,7 +68,7 @@ use crate::{ AutoSplitController, ReadStats, SplitConfigChange, WriteStats, }, Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, - RegionReadProgressRegistry, SignificantMsg, SnapManager, StoreInfo, StoreMsg, TxnExt, + RegionReadProgressRegistry, SnapManager, StoreInfo, StoreMsg, TxnExt, }, }; @@ -1417,12 +1416,10 @@ where } } for mut demote in plan.take_demotes().into_iter() { - if let Err(e) = router.significant_send( + if let Err(e) = handle.send_demote_peers( demote.get_region_id(), - SignificantMsg::UnsafeRecoveryDemoteFailedVoters { - syncer: syncer.clone(), - failed_voters: demote.take_failed_voters().into_vec(), - }, + demote.take_failed_voters().into_vec(), + syncer.clone(), ) { error!("fail to send update peer list message for recovery"; "err" => ?e); } diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index f8370f1a1c0..e9c70cef73b 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -73,9 +73,10 @@ fn test_unsafe_recovery_send_report() { fail::remove("on_handle_apply_store_1"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_execution_result_report() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // Prolong force leader time. cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); @@ -191,9 +192,10 @@ fn test_unsafe_recovery_execution_result_report() { fail::remove("on_handle_apply_store_1"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_wait_for_snapshot_apply() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(8); cluster.cfg.raft_store.merge_max_log_gap = 3; cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); @@ -206,26 +208,24 @@ fn test_unsafe_recovery_wait_for_snapshot_apply() { let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); // Makes the leadership definite. - let store2_peer = find_peer(®ion, nodes[1]).unwrap().to_owned(); - cluster.must_transfer_leader(region.get_id(), store2_peer); + let store0_peer = find_peer(®ion, nodes[0]).unwrap().to_owned(); + cluster.must_transfer_leader(region.get_id(), store0_peer.clone()); cluster.stop_node(nodes[1]); - let (raft_gc_triggered_tx, raft_gc_triggered_rx) = mpsc::bounded::<()>(1); - let (raft_gc_finished_tx, raft_gc_finished_rx) = mpsc::bounded::<()>(1); - fail::cfg_callback("worker_gc_raft_log", move || { - let _ = raft_gc_triggered_rx.recv(); - }) - .unwrap(); - fail::cfg_callback("worker_gc_raft_log_finished", move || { - let _ = raft_gc_finished_tx.send(()); - }) - .unwrap(); - (0..10).for_each(|_| cluster.must_put(b"random_k", b"random_v")); - // Unblock raft log GC. - drop(raft_gc_triggered_tx); - // Wait until logs are GCed. - raft_gc_finished_rx - .recv_timeout(Duration::from_secs(3)) - .unwrap(); + + // Compact logs to force requesting snapshot after clearing send filters. + let state = cluster.truncated_state(region.get_id(), store0_peer.get_store_id()); + // Write some data to trigger snapshot. + for i in 100..150 { + let key = format!("k{}", i); + let value = format!("v{}", i); + cluster.must_put(key.as_bytes(), value.as_bytes()); + } + cluster.wait_log_truncated( + region.get_id(), + store0_peer.get_store_id(), + state.get_index() + 40, + ); + // Makes the group lose its quorum. cluster.stop_node(nodes[2]); @@ -269,14 +269,13 @@ fn test_unsafe_recovery_wait_for_snapshot_apply() { } assert_ne!(store_report, None); - fail::remove("worker_gc_raft_log"); - fail::remove("worker_gc_raft_log_finished"); fail::remove("region_apply_snap"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_demotion_reentrancy() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); diff --git a/tests/integrations/raftstore/test_unsafe_recovery.rs b/tests/integrations/raftstore/test_unsafe_recovery.rs index 2f5f9f04991..fae17bd3689 100644 --- a/tests/integrations/raftstore/test_unsafe_recovery.rs +++ b/tests/integrations/raftstore/test_unsafe_recovery.rs @@ -26,9 +26,10 @@ macro_rules! confirm_quorum_is_lost { }}; } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_demote_failed_voters() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -81,9 +82,10 @@ fn test_unsafe_recovery_demote_failed_voters() { } // Demote non-exist voters will not work, but TiKV should still report to PD. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_demote_non_exist_voters() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -146,9 +148,10 @@ fn test_unsafe_recovery_demote_non_exist_voters() { assert_eq!(demoted, false); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_auto_promote_learner() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -168,7 +171,7 @@ fn test_unsafe_recovery_auto_promote_learner() { .must_remove_peer(region.get_id(), peer_on_store0.clone()); cluster.pd_client.must_add_peer( region.get_id(), - new_learner_peer(nodes[0], peer_on_store0.get_id()), + new_learner_peer(nodes[0], cluster.pd_client.alloc_id().unwrap()), ); // Sleep 100 ms to wait for the new learner to be initialized. sleep_ms(100); @@ -219,9 +222,10 @@ fn test_unsafe_recovery_auto_promote_learner() { assert!(promoted); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_already_in_joint_state() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -238,10 +242,10 @@ fn test_unsafe_recovery_already_in_joint_state() { cluster .pd_client .must_remove_peer(region.get_id(), peer_on_store2.clone()); - cluster.pd_client.must_add_peer( - region.get_id(), - new_learner_peer(nodes[2], peer_on_store2.get_id()), - ); + let new_peer_id = cluster.pd_client.alloc_id().unwrap(); + cluster + .pd_client + .must_add_peer(region.get_id(), new_learner_peer(nodes[2], new_peer_id)); // Wait the new learner to be initialized. sleep_ms(100); pd_client.must_joint_confchange( @@ -251,10 +255,7 @@ fn test_unsafe_recovery_already_in_joint_state() { ConfChangeType::AddLearnerNode, new_learner_peer(nodes[0], peer_on_store0.get_id()), ), - ( - ConfChangeType::AddNode, - new_peer(nodes[2], peer_on_store2.get_id()), - ), + (ConfChangeType::AddNode, new_peer(nodes[2], new_peer_id)), ], ); cluster.stop_node(nodes[1]); @@ -264,6 +265,7 @@ fn test_unsafe_recovery_already_in_joint_state() { confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); let to_be_removed: Vec = region .get_peers() .iter() @@ -308,9 +310,10 @@ fn test_unsafe_recovery_already_in_joint_state() { // Tests whether unsafe recovery behaves correctly when the failed region is // already in the middle of a joint state, once exit, it recovers itself without // any further demotions. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_early_return_after_exit_joint_state() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); @@ -329,16 +332,18 @@ fn test_unsafe_recovery_early_return_after_exit_joint_state() { cluster .pd_client .must_remove_peer(region.get_id(), peer_on_store0.clone()); + let new_peer_id_store0 = cluster.pd_client.alloc_id().unwrap(); cluster.pd_client.must_add_peer( region.get_id(), - new_learner_peer(nodes[0], peer_on_store0.get_id()), + new_learner_peer(nodes[0], new_peer_id_store0), ); + let new_peer_id_store2 = cluster.pd_client.alloc_id().unwrap(); cluster .pd_client .must_remove_peer(region.get_id(), peer_on_store2.clone()); cluster.pd_client.must_add_peer( region.get_id(), - new_learner_peer(nodes[2], peer_on_store2.get_id()), + new_learner_peer(nodes[2], new_peer_id_store2), ); // Wait the new learner to be initialized. sleep_ms(100); @@ -347,7 +352,7 @@ fn test_unsafe_recovery_early_return_after_exit_joint_state() { vec![ ( ConfChangeType::AddNode, - new_peer(nodes[0], peer_on_store0.get_id()), + new_peer(nodes[0], new_peer_id_store0), ), ( ConfChangeType::AddLearnerNode, @@ -362,6 +367,7 @@ fn test_unsafe_recovery_early_return_after_exit_joint_state() { confirm_quorum_is_lost!(cluster, region); cluster.must_enter_force_leader(region.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); let to_be_removed: Vec = region .get_peers() .iter() From b9e5e376252c1760b60a21f47822b7e6510d8434 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 7 Aug 2023 14:28:10 +0800 Subject: [PATCH 0836/1149] Update Code of Conduct (#15287) ref tikv/tikv#15273 Signed-off-by: Neil Shen --- CODE_OF_CONDUCT.md | 58 +++------------------------------------------- 1 file changed, 3 insertions(+), 55 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 9459a436976..48f6704c6d3 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,57 +1,5 @@ -## CNCF Community Code of Conduct v1.0 +# Code of Conduct -This project follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). +We follow the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md). -Other languages available: -- [Chinese/中文](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/zh.md) -- [German/Deutsch](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/de.md) -- [Spanish/Español](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/es.md) -- [French/Français](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/fr.md) -- [Italian/Italiano](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/it.md) -- [Japanese/日本語](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/jp.md) -- [Korean/한국어](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/ko.md) -- [Ukrainian/Українська](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/uk.md) -- [Russian/Русский](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/ru.md) -- [Portuguese/Português](https://github.com/cncf/foundation/blob/master/code-of-conduct-languages/pt.md) - -### Contributor Code of Conduct - -As contributors and maintainers of this project, and in the interest of fostering -an open and welcoming community, we pledge to respect all people who contribute -through reporting issues, posting feature requests, updating documentation, -submitting pull requests or patches, and other activities. - -We are committed to making participation in this project a harassment-free experience for -everyone, regardless of level of experience, gender, gender identity and expression, -sexual orientation, disability, personal appearance, body size, race, ethnicity, age, -religion, or nationality. - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery -* Personal attacks -* Trolling or insulting/derogatory comments -* Public or private harassment -* Publishing other's private information, such as physical or electronic addresses, - without explicit permission -* Other unethical or unprofessional conduct. - -Project maintainers have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are not -aligned to this Code of Conduct. By adopting this Code of Conduct, project maintainers -commit themselves to fairly and consistently applying these principles to every aspect -of managing this project. Project maintainers who do not follow or enforce the Code of -Conduct may be permanently removed from the project team. - -This code of conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. - -Instances of abusive, harassing, or otherwise unacceptable behavior in Kubernetes may be reported by contacting the [Kubernetes Code of Conduct Committee](https://git.k8s.io/community/committee-code-of-conduct) via . For other projects, please contact a CNCF project maintainer or our mediator, Mishi Choudhary . - -This Code of Conduct is adapted from the Contributor Covenant -(http://contributor-covenant.org), version 1.2.0, available at -http://contributor-covenant.org/version/1/2/0/ - -### CNCF Events Code of Conduct - -CNCF events are governed by the Linux Foundation [Code of Conduct](https://events.linuxfoundation.org/code-of-conduct/) available on the event page. This is designed to be compatible with the above policy and also includes more details on responding to incidents. +Please contact the [CNCF Code of Conduct Committee](mailto:conduct@cncf.io) in order to report violations of the Code of Conduct. From 9b1a816f1229c122b2c322f2d297de2217820fb1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 9 Aug 2023 15:16:55 +0800 Subject: [PATCH 0837/1149] raftstore: trigger compaction when no valid split key can be found (#15284) close tikv/tikv#15282 trigger compaction when no valid split key can be found Signed-off-by: SpadeA-Tang --- .../src/coprocessor/split_observer.rs | 8 +- components/raftstore/src/store/fsm/store.rs | 12 ++- components/raftstore/src/store/peer.rs | 81 +++++++++++++++++-- .../raftstore/src/store/worker/compact.rs | 1 + components/server/src/server.rs | 1 + components/test_raftstore/src/node.rs | 3 +- components/test_raftstore/src/server.rs | 3 +- src/server/node.rs | 6 +- tests/failpoints/cases/test_split_region.rs | 55 ++++++++++++- .../integrations/config/dynamic/raftstore.rs | 3 +- .../integrations/raftstore/test_bootstrap.rs | 3 +- tests/integrations/server/kv_service.rs | 3 +- 12 files changed, 164 insertions(+), 15 deletions(-) diff --git a/components/raftstore/src/coprocessor/split_observer.rs b/components/raftstore/src/coprocessor/split_observer.rs index 7f844f4b069..e84058d41dc 100644 --- a/components/raftstore/src/coprocessor/split_observer.rs +++ b/components/raftstore/src/coprocessor/split_observer.rs @@ -10,6 +10,8 @@ use tikv_util::{box_err, box_try, codec::bytes, error, warn}; use super::{AdminObserver, Coprocessor, ObserverContext, Result as CopResult}; use crate::{store::util, Error}; +pub const NO_VALID_SPLIT_KEY: &str = "no valid key found for split."; + pub fn strip_timestamp_if_exists(mut key: Vec) -> Vec { let mut slice = key.as_slice(); let strip_len = match bytes::decode_bytes(&mut slice, false) { @@ -35,6 +37,9 @@ pub fn is_valid_split_key(key: &[u8], index: usize, region: &Region) -> bool { } if let Err(Error::KeyNotInRegion(..)) = util::check_key_in_region_exclusive(key, region) { + // use this to distinguish whether the key is at the edge or outside of the + // region. + let equal_start_key = key == region.get_start_key(); warn!( "skip invalid split key: key is not in region"; "key" => log_wrappers::Value::key(key), @@ -42,6 +47,7 @@ pub fn is_valid_split_key(key: &[u8], index: usize, region: &Region) -> bool { "start_key" => log_wrappers::Value::key(region.get_start_key()), "end_key" => log_wrappers::Value::key(region.get_end_key()), "index" => index, + "equal_start_key" => equal_start_key, ); return false; } @@ -90,7 +96,7 @@ impl SplitObserver { .collect::>(); if ajusted_splits.is_empty() { - Err("no valid key found for split.".to_owned()) + Err(NO_VALID_SPLIT_KEY.to_owned()) } else { // Rewrite the splits. *splits = ajusted_splits; diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index fd3ad648db7..b674a2379e9 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -10,7 +10,10 @@ use std::{ }, mem, ops::{Deref, DerefMut}, - sync::{atomic::Ordering, Arc, Mutex}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, time::{Duration, Instant}, u64, }; @@ -571,6 +574,8 @@ where pub write_senders: WriteSenders, pub sync_write_worker: Option, T>>, pub pending_latency_inspect: Vec, + + pub safe_point: Arc, } impl PollContext @@ -1194,6 +1199,7 @@ pub struct RaftPollerBuilder { feature_gate: FeatureGate, write_senders: WriteSenders, node_start_time: Timespec, // monotonic_raw_now + safe_point: Arc, } impl RaftPollerBuilder { @@ -1452,6 +1458,7 @@ where write_senders: self.write_senders.clone(), sync_write_worker, pending_latency_inspect: vec![], + safe_point: self.safe_point.clone(), }; ctx.update_ticks_timeout(); let tag = format!("[store {}]", ctx.store.get_id()); @@ -1504,6 +1511,7 @@ where feature_gate: self.feature_gate.clone(), write_senders: self.write_senders.clone(), node_start_time: self.node_start_time, + safe_point: self.safe_point.clone(), } } } @@ -1578,6 +1586,7 @@ impl RaftBatchSystem { health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_mgr: GrpcServiceManager, + safe_point: Arc, ) -> Result<()> { assert!(self.workers.is_none()); // TODO: we can get cluster meta regularly too later. @@ -1702,6 +1711,7 @@ impl RaftBatchSystem { feature_gate: pd_client.feature_gate().clone(), write_senders: self.store_writers.senders(), node_start_time: self.node_start_time, + safe_point, }; let region_peers = builder.init()?; self.start_system::( diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 3ff09362c96..d64257aa54f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -19,11 +19,13 @@ use bytes::Bytes; use collections::{HashMap, HashSet}; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_traits::{ - Engines, KvEngine, PerfContext, RaftEngine, Snapshot, WriteBatch, WriteOptions, CF_LOCK, + Engines, KvEngine, PerfContext, RaftEngine, Snapshot, WriteBatch, WriteOptions, CF_DEFAULT, + CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use fail::fail_point; use getset::{Getters, MutGetters}; +use keys::{enc_end_key, enc_start_key}; use kvproto::{ errorpb, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, @@ -81,7 +83,10 @@ use super::{ DestroyPeerJob, LocalReadContext, }; use crate::{ - coprocessor::{CoprocessorHost, RegionChangeEvent, RegionChangeReason, RoleChange}, + coprocessor::{ + split_observer::NO_VALID_SPLIT_KEY, CoprocessorHost, RegionChangeEvent, RegionChangeReason, + RoleChange, + }, errors::RAFTSTORE_IS_BUSY, router::RaftStoreRouter, store::{ @@ -99,8 +104,8 @@ use crate::{ unsafe_recovery::{ForceLeaderState, SnapshotRecoveryState, UnsafeRecoveryState}, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ - HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, ReadProgress, RegionTask, - SplitCheckTask, + CleanupTask, CompactTask, HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, + ReadProgress, RegionTask, SplitCheckTask, }, Callback, Config, GlobalReplicationState, PdTask, ReadCallback, ReadIndexContext, ReadResponse, TxnExt, WriteCallback, RAFT_INIT_LOG_INDEX, @@ -782,6 +787,8 @@ where pub lead_transferee: u64, pub unsafe_recovery_state: Option, pub snapshot_recovery_state: Option, + + last_record_safe_point: u64, } impl Peer @@ -916,6 +923,7 @@ where REGION_READ_PROGRESS_CAP, peer_id, )), + last_record_safe_point: 0, memtrace_raft_entries: 0, write_router: WriteRouter::new(tag), unpersisted_readies: VecDeque::default(), @@ -4090,7 +4098,70 @@ where poll_ctx: &mut PollContext, req: &mut RaftCmdRequest, ) -> Result { - poll_ctx.coprocessor_host.pre_propose(self.region(), req)?; + poll_ctx + .coprocessor_host + .pre_propose(self.region(), req) + .map_err(|e| { + // If the error of prepropose contains str `NO_VALID_SPLIT_KEY`, it may mean the + // split_key of the split request is the region start key which + // means we may have so many potential duplicate mvcc versions + // that we can not manage to get a valid split key. So, we + // trigger a compaction to handle it. + if e.to_string().contains(NO_VALID_SPLIT_KEY) { + let safe_ts = (|| { + fail::fail_point!("safe_point_inject", |t| { + t.unwrap().parse::().unwrap() + }); + poll_ctx.safe_point.load(Ordering::Relaxed) + })(); + if safe_ts <= self.last_record_safe_point { + debug!( + "skip schedule compact range due to safe_point not updated"; + "region_id" => self.region_id, + "safe_point" => safe_ts, + ); + return e; + } + + let start_key = enc_start_key(self.region()); + let end_key = enc_end_key(self.region()); + + let mut all_scheduled = true; + for cf in [CF_WRITE, CF_DEFAULT] { + let task = CompactTask::Compact { + cf_name: String::from(cf), + start_key: Some(start_key.clone()), + end_key: Some(end_key.clone()), + }; + + if let Err(e) = poll_ctx + .cleanup_scheduler + .schedule(CleanupTask::Compact(task)) + { + error!( + "schedule compact range task failed"; + "region_id" => self.region_id, + "cf" => ?cf, + "err" => ?e, + ); + all_scheduled = false; + break; + } + } + + if all_scheduled { + info!( + "schedule compact range due to no valid split keys"; + "region_id" => self.region_id, + "safe_point" => safe_ts, + "region_start_key" => log_wrappers::Value::key(&start_key), + "region_end_key" => log_wrappers::Value::key(&end_key), + ); + self.last_record_safe_point = safe_ts; + } + } + e + })?; let mut ctx = ProposalContext::empty(); if get_sync_log_from_request(req) { diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index 4448e26a5b3..f97f5d6fa34 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -102,6 +102,7 @@ where start_key: Option<&[u8]>, end_key: Option<&[u8]>, ) -> Result<(), Error> { + fail_point!("on_compact_range_cf"); let timer = Instant::now(); let compact_range_timer = COMPACT_RANGE_CF .with_label_values(&[cf_name]) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 0b9bd59f236..0dcef5c0082 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -956,6 +956,7 @@ where collector_reg_handle, self.causal_ts_provider.clone(), self.grpc_service_mgr.clone(), + safe_point.clone(), ) .unwrap_or_else(|e| fatal!("failed to start node: {}", e)); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 3f0e9512dc3..f429f27ff8b 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -2,7 +2,7 @@ use std::{ path::Path, - sync::{Arc, Mutex, RwLock}, + sync::{atomic::AtomicU64, Arc, Mutex, RwLock}, }; use collections::{HashMap, HashSet}; @@ -335,6 +335,7 @@ impl Simulator for NodeCluster { CollectorRegHandle::new_for_test(), None, GrpcServiceManager::dummy(), + Arc::new(AtomicU64::new(0)), )?; assert!( engines diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 487ef4e8b59..8d26bae968d 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -2,7 +2,7 @@ use std::{ path::Path, - sync::{Arc, Mutex, RwLock}, + sync::{atomic::AtomicU64, Arc, Mutex, RwLock}, thread, time::Duration, usize, @@ -617,6 +617,7 @@ impl ServerCluster { collector_reg_handle, causal_ts_provider, GrpcServiceManager::dummy(), + Arc::new(AtomicU64::new(0)), )?; assert!(node_id == 0 || node_id == node.id()); let node_id = node.id(); diff --git a/src/server/node.rs b/src/server/node.rs index d92d2423216..228f679ed14 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - sync::{Arc, Mutex}, + sync::{atomic::AtomicU64, Arc, Mutex}, thread, time::Duration, }; @@ -174,6 +174,7 @@ where collector_reg_handle: CollectorRegHandle, causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_mgr: GrpcServiceManager, + safe_point: Arc, ) -> Result<()> where T: Transport + 'static, @@ -212,6 +213,7 @@ where collector_reg_handle, causal_ts_provider, grpc_service_mgr, + safe_point, )?; Ok(()) @@ -460,6 +462,7 @@ where collector_reg_handle: CollectorRegHandle, causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_mgr: GrpcServiceManager, + safe_point: Arc, ) -> Result<()> where T: Transport + 'static, @@ -494,6 +497,7 @@ where self.health_service.clone(), causal_ts_provider, grpc_service_mgr, + safe_point, )?; Ok(()) } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index e5c1828a2dd..3520de4e3ad 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -3,7 +3,7 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, - mpsc::{self, channel}, + mpsc::{self, channel, sync_channel}, Arc, Mutex, }, thread, @@ -39,7 +39,7 @@ use tikv_util::{ time::Instant, HandyRwLock, }; -use txn_types::{Key, LastChange, PessimisticLock}; +use txn_types::{Key, LastChange, PessimisticLock, TimeStamp}; #[test] fn test_follower_slow_split() { @@ -1356,3 +1356,54 @@ fn test_not_reset_has_dirty_data_due_to_slow_split() { cluster.must_put(b"k00001", b"val"); } + +#[test] +fn test_split_region_with_no_valid_split_keys() { + let mut cluster = test_raftstore::new_node_cluster(0, 3); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize::kb(1)); + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(500); + cluster.run(); + + let (tx, rx) = sync_channel(5); + fail::cfg_callback("on_compact_range_cf", move || { + tx.send(true).unwrap(); + }) + .unwrap(); + + let safe_point_inject = "safe_point_inject"; + fail::cfg(safe_point_inject, "return(100)").unwrap(); + + let mut raw_key = String::new(); + let _ = (0..250) + .map(|i: u8| { + raw_key.push(i as char); + }) + .collect::>(); + for i in 0..20 { + let key = Key::from_raw(raw_key.as_bytes()); + let key = key.append_ts(TimeStamp::new(i)); + cluster.must_put_cf(CF_WRITE, key.as_encoded(), b"val"); + } + + // one for default cf, one for write cf + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + for i in 0..20 { + let key = Key::from_raw(raw_key.as_bytes()); + let key = key.append_ts(TimeStamp::new(i)); + cluster.must_put_cf(CF_WRITE, key.as_encoded(), b"val"); + } + // at most one compaction will be triggered for each safe_point + rx.try_recv().unwrap_err(); + + fail::cfg(safe_point_inject, "return(200)").unwrap(); + for i in 0..20 { + let key = Key::from_raw(raw_key.as_bytes()); + let key = key.append_ts(TimeStamp::new(i)); + cluster.must_put_cf(CF_WRITE, key.as_encoded(), b"val"); + } + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + rx.try_recv().unwrap_err(); +} diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 9d507c9980d..4d6551ea27c 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -2,7 +2,7 @@ use std::{ iter::FromIterator, - sync::{mpsc, Arc, Mutex}, + sync::{atomic::AtomicU64, mpsc, Arc, Mutex}, time::Duration, }; @@ -114,6 +114,7 @@ fn start_raftstore( None, None, GrpcServiceManager::dummy(), + Arc::new(AtomicU64::new(0)), ) .unwrap(); diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 07fd692ca53..b43a3d00d16 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -1,7 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use std::{ path::Path, - sync::{mpsc::sync_channel, Arc, Mutex}, + sync::{atomic::AtomicU64, mpsc::sync_channel, Arc, Mutex}, time::Duration, }; @@ -122,6 +122,7 @@ fn test_node_bootstrap_with_prepared_data() { CollectorRegHandle::new_for_test(), None, GrpcServiceManager::dummy(), + Arc::new(AtomicU64::new(0)), ) .unwrap(); assert!( diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 6982f51503e..24b6a87bfa5 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -3,7 +3,7 @@ use std::{ char::from_u32, path::Path, - sync::*, + sync::{atomic::AtomicU64, *}, thread, time::{Duration, Instant}, }; @@ -1411,6 +1411,7 @@ fn test_double_run_node() { CollectorRegHandle::new_for_test(), None, GrpcServiceManager::dummy(), + Arc::new(AtomicU64::new(0)), ) .unwrap_err(); assert!(format!("{:?}", e).contains("already started"), "{:?}", e); From 099314f3f8185f9d56ccb2dcab57fd83db79383e Mon Sep 17 00:00:00 2001 From: 3pointer Date: Wed, 9 Aug 2023 19:28:55 +0800 Subject: [PATCH 0838/1149] resolved_ts: update leader info after conf change in raftstore v2 (#15249) ref tikv/tikv#15192 Signed-off-by: 3pointer Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../operation/command/admin/conf_change.rs | 3 +++ .../integrations/raftstore/test_stale_read.rs | 22 ++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 8c8e0e78d1e..9d58be13b04 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -197,6 +197,9 @@ impl Peer { .lock() .unwrap() .set_region(self.region(), true, &self.logger); + // Update leader's peer list after conf change. + self.read_progress() + .update_leader_info(self.leader_id(), self.term(), self.region()); ctx.coprocessor_host.on_region_changed( self.region(), RegionChangeEvent::Update(RegionChangeReason::ChangePeer), diff --git a/tests/integrations/raftstore/test_stale_read.rs b/tests/integrations/raftstore/test_stale_read.rs index 466f2dff608..24e13003f7e 100644 --- a/tests/integrations/raftstore/test_stale_read.rs +++ b/tests/integrations/raftstore/test_stale_read.rs @@ -8,7 +8,7 @@ use kvproto::{ metapb::{Peer, Region}, tikvpb_grpc::TikvClient, }; -use test_raftstore::{must_get_equal, new_mutation, new_peer, new_server_cluster, PeerClient}; +use test_raftstore::{must_get_equal, new_mutation, new_peer, PeerClient}; use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, time::Instant}; @@ -78,11 +78,13 @@ fn test_stale_read_with_ts0() { #[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_server_cluster)] fn test_stale_read_resolved_ts_advance() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.resolved_ts.enable = true; cluster.cfg.resolved_ts.advance_ts_interval = ReadableDuration::millis(200); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); - cluster.run(); + cluster.run_conf_change(); let cluster = RefCell::new(cluster); let must_resolved_ts_advance = |region: &Region| { @@ -112,12 +114,26 @@ fn test_stale_read_resolved_ts_advance() { } } }; + // Now region 1 only has peer (1, 1); + let (key, value) = (b"k1", b"v1"); + + cluster.borrow_mut().must_put(key, value); + assert_eq!(cluster.borrow_mut().get(key), Some(value.to_vec())); // Make sure resolved ts advances. let region = cluster.borrow().get_region(&[]); must_resolved_ts_advance(®ion); + // Add peer (2, 2) to region 1. + pd_client.must_add_peer(region.id, new_peer(2, 2)); + must_get_equal(&cluster.borrow().get_engine(2), key, value); + + // Test conf change. + let region = cluster.borrow().get_region(&[]); + must_resolved_ts_advance(®ion); + // Test transfer leader. + let region = cluster.borrow().get_region(&[]); cluster .borrow_mut() .must_transfer_leader(region.get_id(), region.get_peers()[1].clone()); From 2d2e8038ae1d13d76d40f1d83f0b2b3e7610b98b Mon Sep 17 00:00:00 2001 From: joker53-1 <56860465+joker53-1@users.noreply.github.com> Date: Thu, 10 Aug 2023 12:24:25 +0800 Subject: [PATCH 0839/1149] Modify the return value of the scan function under the RawStorage trait. (#15247) close tikv/tikv#15246 Modify the return value of the scan function under the RawStorage trait. * fix the scan function only return Value. Signed-off-by: joker53-1 <1205637346@qq.com> Co-authored-by: Andy Lok --- .../coprocessor_plugin_api/src/storage_api.rs | 2 +- src/coprocessor_v2/raw_storage_impl.rs | 26 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/components/coprocessor_plugin_api/src/storage_api.rs b/components/coprocessor_plugin_api/src/storage_api.rs index 08c09ca4a48..99f139885be 100644 --- a/components/coprocessor_plugin_api/src/storage_api.rs +++ b/components/coprocessor_plugin_api/src/storage_api.rs @@ -33,7 +33,7 @@ pub trait RawStorage { /// Same as [`RawStorage::get()`], but accepts a `key_range` such that /// values for keys in `[key_range.start, key_range.end)` are retrieved. /// The upper bound of the `key_range` is exclusive. - async fn scan(&self, key_range: Range) -> PluginResult>; + async fn scan(&self, key_range: Range) -> PluginResult>; /// Inserts a new key-value pair into the storage on the current node. async fn put(&self, key: Key, value: Value) -> PluginResult<()>; diff --git a/src/coprocessor_v2/raw_storage_impl.rs b/src/coprocessor_v2/raw_storage_impl.rs index 9a57b4c8624..d453c23819b 100644 --- a/src/coprocessor_v2/raw_storage_impl.rs +++ b/src/coprocessor_v2/raw_storage_impl.rs @@ -60,7 +60,7 @@ impl RawStorage for RawStorageImpl<'_, E Ok(kv_pairs) } - async fn scan(&self, key_range: Range) -> PluginResult> { + async fn scan(&self, key_range: Range) -> PluginResult> { let ctx = self.context.clone(); let key_only = false; let reverse = false; @@ -78,7 +78,7 @@ impl RawStorage for RawStorageImpl<'_, E let v = res.await.map_err(PluginErrorShim::from)?; let values = extract_kv_pairs(Ok(v)) .into_iter() - .map(|kv| kv.value) + .map(|kv| (kv.key, kv.value)) .collect(); Ok(values) } @@ -317,6 +317,13 @@ mod test { // Full scan let r = raw_storage.scan(full_scan.clone()).await.unwrap(); assert_eq!(r.len(), 3); + assert_eq!( + r, + keys.clone() + .into_iter() + .zip(values.clone()) + .collect::, Vec)>>() + ); // Batch delete (one non-existent) raw_storage @@ -331,6 +338,14 @@ mod test { .unwrap(); let r = raw_storage.scan(full_scan.clone()).await.unwrap(); assert_eq!(r.len(), 1); + assert_eq!( + r, + keys.clone() + .into_iter() + .skip(2) + .zip(values.clone().skip(2)) + .collect::, Vec)>>() + ); // Batch put (one overwrite) raw_storage @@ -339,6 +354,13 @@ mod test { .unwrap(); let r = raw_storage.scan(full_scan.clone()).await.unwrap(); assert_eq!(r.len(), 3); + assert_eq!( + r, + keys.clone() + .into_iter() + .zip(values.clone()) + .collect::, Vec)>>() + ); // Delete range (all) raw_storage.delete_range(full_scan.clone()).await.unwrap(); From c7c4f12c51d1d2800d3d116ecb7f8778f13e3cac Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 10 Aug 2023 14:46:56 +0800 Subject: [PATCH 0840/1149] raftstore-v2: fix error handling during merge trim check (#15243) ref tikv/tikv#12842 - Restore merge status when there's an error between trim check and propose. - Fix a case where snapshot just before prepare merge can cause rollback. - Mute an assertion that fails in v1 bulk_insert testing. - Use SkipUntil in compaction filter, together with the newly introduced UnsafeFilter API, this should remove any tombstones regardless of the context. Signed-off-by: tabokie --- Cargo.lock | 6 +- components/engine_rocks/src/util.rs | 88 +++++++++++++++++-- .../operation/command/admin/merge/prepare.rs | 80 ++++++++++++----- components/raftstore-v2/src/operation/life.rs | 24 ++++- .../src/operation/ready/snapshot.rs | 8 +- .../raftstore/src/store/entry_storage.rs | 15 +++- components/server/src/server.rs | 4 +- components/server/src/server2.rs | 4 +- 8 files changed, 186 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a196de0c3e..b247dbafffa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3105,7 +3105,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#3dee8778c2666030c174952db67f39356d60e1dc" +source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3124,7 +3124,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#3dee8778c2666030c174952db67f39356d60e1dc" +source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" dependencies = [ "bzip2-sys", "cc", @@ -5098,7 +5098,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#3dee8778c2666030c174952db67f39356d60e1dc" +source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 52b1364c3ce..225cd1d7f06 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -436,7 +436,7 @@ pub struct StackingCompactionFilter { } impl CompactionFilter for StackingCompactionFilter { - fn featured_filter( + fn unsafe_filter( &mut self, level: usize, key: &[u8], @@ -445,12 +445,12 @@ impl CompactionFilter for StackingComp value_type: CompactionFilterValueType, ) -> CompactionFilterDecision { if let Some(outer) = self.outer.as_mut() - && let r = outer.featured_filter(level, key, seqno, value, value_type) + && let r = outer.unsafe_filter(level, key, seqno, value, value_type) && !matches!(r, CompactionFilterDecision::Keep) { r } else if let Some(inner) = self.inner.as_mut() { - inner.featured_filter(level, key, seqno, value, value_type) + inner.unsafe_filter(level, key, seqno, value, value_type) } else { CompactionFilterDecision::Keep } @@ -489,7 +489,7 @@ impl CompactionFilterFactory for RangeCompactionFilterFactory { pub struct RangeCompactionFilter(Arc); impl CompactionFilter for RangeCompactionFilter { - fn featured_filter( + fn unsafe_filter( &mut self, _level: usize, key: &[u8], @@ -497,8 +497,11 @@ impl CompactionFilter for RangeCompactionFilter { _value: &[u8], _value_type: CompactionFilterValueType, ) -> CompactionFilterDecision { - if key < self.0.start_key.as_ref() || key >= self.0.end_key.as_ref() { - CompactionFilterDecision::Remove + if key < self.0.start_key.as_ref() { + CompactionFilterDecision::RemoveAndSkipUntil(self.0.start_key.to_vec()) + } else if key >= self.0.end_key.as_ref() { + assert!(key < keys::DATA_MAX_KEY); + CompactionFilterDecision::RemoveAndSkipUntil(keys::DATA_MAX_KEY.to_vec()) } else { CompactionFilterDecision::Keep } @@ -507,7 +510,9 @@ impl CompactionFilter for RangeCompactionFilter { #[cfg(test)] mod tests { - use engine_traits::{CfOptionsExt, Peekable, SyncMutable, CF_DEFAULT}; + use engine_traits::{ + CfOptionsExt, FlowControlFactorsExt, Iterable, MiscExt, Peekable, SyncMutable, CF_DEFAULT, + }; use rocksdb::DB; use tempfile::Builder; @@ -608,4 +613,73 @@ mod tests { let tmp_cf_opts = db.get_options_cf("cf_dynamic_level_bytes").unwrap(); assert!(tmp_cf_opts.get_level_compaction_dynamic_level_bytes()); } + + #[test] + fn test_range_filter() { + let path = Builder::new() + .prefix("test_range_filter") + .tempdir() + .unwrap(); + let path_str = path.path().to_str().unwrap(); + + let mut cf_opts = RocksCfOptions::default(); + cf_opts + .set_compaction_filter_factory( + "range", + RangeCompactionFilterFactory::new( + b"b".to_vec().into_boxed_slice(), + b"c".to_vec().into_boxed_slice(), + ), + ) + .unwrap(); + let cfs_opts = vec![(CF_DEFAULT, cf_opts)]; + let db = new_engine_opt(path_str, RocksDbOptions::default(), cfs_opts).unwrap(); + + // in-range keys. + db.put(b"b1", b"").unwrap(); + db.put(b"c2", b"").unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + assert_eq!( + db.get_cf_num_files_at_level(CF_DEFAULT, 0).unwrap(), + Some(1) + ); + + // put then delete. + db.put(b"a1", b"").unwrap(); + // avoid merging put and delete. + let _iter = db.iterator(CF_DEFAULT).unwrap(); + db.delete(b"a1").unwrap(); + db.delete(b"a1").unwrap(); + db.put(b"c1", b"").unwrap(); + let _iter = db.iterator(CF_DEFAULT).unwrap(); + db.delete(b"c1").unwrap(); + db.delete(b"c1").unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + assert_eq!( + db.get_cf_num_files_at_level(CF_DEFAULT, 0).unwrap(), + Some(1) + ); + + // multiple puts. + db.put(b"a2", b"").unwrap(); + db.put(b"a2", b"").unwrap(); + db.put(b"c2", b"").unwrap(); + db.put(b"c2", b"").unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + assert_eq!( + db.get_cf_num_files_at_level(CF_DEFAULT, 0).unwrap(), + Some(1) + ); + + // multiple deletes. + db.delete(b"a3").unwrap(); + db.delete(b"a3").unwrap(); + db.delete(b"c3").unwrap(); + db.delete(b"c3").unwrap(); + db.flush_cf(CF_DEFAULT, true).unwrap(); + assert_eq!( + db.get_cf_num_files_at_level(CF_DEFAULT, 0).unwrap(), + Some(1) + ); + } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 138f5d327fe..d3d1896287c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -50,7 +50,7 @@ use raftstore::{ store::{metrics::PEER_ADMIN_CMD_COUNTER, util, LocksStatus, ProposalContext, Transport}, Error, Result, }; -use slog::{debug, error, info, warn}; +use slog::{debug, error, info}; use tikv_util::{ box_err, log::SlogFormat, slog_panic, store::region_on_same_stores, time::Instant, }; @@ -65,7 +65,7 @@ use crate::{ router::{CmdResChannel, PeerMsg, RaftRequest}, }; -const TRIM_CHECK_TIMEOUT: Duration = Duration::from_secs(10); +const TRIM_CHECK_TIMEOUT: Duration = Duration::from_secs(30); #[derive(Clone, Debug)] pub struct PreProposeContext { @@ -81,7 +81,7 @@ pub struct PreProposeContext { /// | | | ^ /// +-------+------+--> (3) ---+ /// -/// - None->1: `start_check_trim_status` +/// - None->1: `start_check_trim_status` / `already_checked_trim_status` /// - 1->2: `check_pessimistic_locks` /// - *->3: `on_catch_up_logs` /// - *->4: `on_apply_res_prepare_merge` / `Peer::new` @@ -182,22 +182,26 @@ impl Peer { // (3) `check_pessimistic_locks` // Check 1 and 3 are async, they yield by returning // `Error::PendingPrepareMerge`. + // + // Error handling notes: + // - `WaitForTrimStatus` can only become `WaitForFence` when both check 2&3 + // succeed. We need to clean up the status if any of them failed, otherwise we + // still can't serve other merge requests (not until status is cleaned up in + // `maybe_clean_up_stale_merge_context`). + // - `WaitForFence` will always reach proposing phase when committed logs are + // applied. No intermediate error. let pre_propose = if let Some(r) = self.already_checked_pessimistic_locks()? { r - } else if self.already_checked_trim_status()? { - if !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) - .contains(WriteBatchFlags::PRE_FLUSH_FINISHED) - { - // We will always schedule flush (`merge_on_availability_response`) when trim - // status passed. - warn!( - self.logger, - "flush should already be scheduled for prepare merge" - ); - return Err(Error::PendingPrepareMerge); - } - let r = self.check_logs_before_prepare_merge(store_ctx)?; - self.check_pessimistic_locks(r, &mut req)? + } else if self.already_checked_trim_status(&req)? { + self.check_logs_before_prepare_merge(store_ctx) + .and_then(|r| self.check_pessimistic_locks(r, &mut req)) + .map_err(|e| { + if !matches!(e, Error::PendingPrepareMerge) { + info!(self.logger, "fail to advance to `WaitForFence`"; "err" => ?e); + self.take_merge_context(); + } + e + })? } else { return self.start_check_trim_status(store_ctx, &mut req); }; @@ -468,12 +472,13 @@ impl Peer { let logger = self.logger.clone(); let on_flush_finish = move || { let (ch, _) = CmdResChannel::pair(); - if let Err(e) = mailbox.try_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) { + if let Err(e) = mailbox.force_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) { error!( logger, "send PrepareMerge request failed after pre-flush finished"; "err" => ?e, ); + // We rely on `maybe_clean_up_stale_merge_context` to clean this up. } }; self.start_pre_flush( @@ -487,20 +492,36 @@ impl Peer { } } - fn already_checked_trim_status(&mut self) -> Result { + fn already_checked_trim_status(&mut self, req: &RaftCmdRequest) -> Result { + let flushed = WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) + .contains(WriteBatchFlags::PRE_FLUSH_FINISHED); match self .merge_context() .as_ref() .and_then(|c| c.prepare_status.as_ref()) { Some(PrepareStatus::WaitForTrimStatus { pending_peers, .. }) => { - if pending_peers.is_empty() { + // We should wait for the request sent from flush callback. + if pending_peers.is_empty() && flushed { Ok(true) } else { Err(Error::PendingPrepareMerge) } } - None => Ok(false), + None => { + // Pre-flush can only be triggered when the request has checked trim status. + if flushed { + self.merge_context_mut().prepare_status = + Some(PrepareStatus::WaitForTrimStatus { + start_time: Instant::now_coarse(), + pending_peers: HashMap::default(), + req: None, + }); + Ok(true) + } else { + Ok(false) + } + } // Shouldn't reach here after calling `already_checked_pessimistic_locks` first. _ => unreachable!(), } @@ -582,17 +603,23 @@ impl Peer { } } + // Free up memory if trim check failed. #[inline] pub fn maybe_clean_up_stale_merge_context(&mut self) { // Check if there's a stale trim check. Ideally this should be implemented as a // tick. But this is simpler. + // We do not check `req.is_some()` here. When req is taken for propose in flush + // callback, it will either trigger a state transition to `WaitForFence` or + // abort. If we see the state here, it means the req never made it to + // `propose_prepare_merge`. + // If the req is still inflight and reaches `propose_prepare_merge` later, + // `already_checked_trim_status` will restore the status. if let Some(PrepareStatus::WaitForTrimStatus { - start_time, req, .. + start_time, .. }) = self .merge_context() .as_ref() .and_then(|c| c.prepare_status.as_ref()) - && req.is_some() && start_time.saturating_elapsed() > TRIM_CHECK_TIMEOUT { info!(self.logger, "cancel merge because trim check timed out"); @@ -722,6 +749,13 @@ impl Apply { PEER_ADMIN_CMD_COUNTER.prepare_merge.success.inc(); + info!( + self.logger, + "execute PrepareMerge"; + "index" => log_index, + "target_region" => ?prepare_merge.get_target(), + ); + let _ = self.flush(); let reg = self.tablet_registry(); let path = merge_source_path(reg, self.region_id(), log_index); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index c5d64acd27f..52f00d137f8 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -782,10 +782,26 @@ impl Peer { // If it's marked as tombstone, then it must be changed by conf change. In // this case, all following entries are skipped so applied_index never equals // to last_applying_index. - (self.storage().region_state().get_state() != PeerState::Tombstone - && entry_storage.applied_index() != last_applying_index) - // Wait for critical commands like split. - || self.has_pending_tombstone_tablets() + if self.storage().region_state().get_state() != PeerState::Tombstone + && entry_storage.applied_index() != last_applying_index + { + info!( + self.logger, + "postpone destroy because there're pending apply logs"; + "applied" => entry_storage.applied_index(), + "last_applying" => last_applying_index, + ); + return true; + } + // Wait for critical commands like split. + if self.has_pending_tombstone_tablets() { + info!( + self.logger, + "postpone destroy because there're pending tombstone tablets" + ); + return true; + } + false } /// Start the destroy progress. It will write `Tombstone` state diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 92b9d3d17df..55f5c0b6379 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -301,8 +301,14 @@ impl Peer { self.storage_mut().on_applied_snapshot(); self.raft_group_mut().advance_apply_to(snapshot_index); - if self.proposal_control().is_merging() { + if self.proposal_control().has_applied_prepare_merge() { // After applying a snapshot, merge is rollbacked implicitly. + info!( + self.logger, + "rollback merge after applying snapshot"; + "index" => snapshot_index, + "region" => ?self.region(), + ); self.rollback_merge(ctx); } let read_tablet = SharedReadTablet::new(tablet.clone()); diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index f5226961a6c..c91c68538dd 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1177,7 +1177,20 @@ impl EntryStorage { } else { range.1 == self.last_index() + 1 }; - assert!(is_valid, "the warmup range should still be valid"); + // FIXME: the assertion below doesn't hold. + // assert!(is_valid, "the warmup range should still be valid"); + if !is_valid { + error!( + "unexpected warmup state"; + "region_id" => self.region_id, + "peer_id" => self.peer_id, + "cache_first" => ?self.entry_cache_first_index(), + "last_index" => self.last_index(), + "warmup_state_high" => range.1, + "last_entry_index" => index, + ); + return false; + } entries.truncate((range.1 - range.0) as usize); self.cache.prepend(entries); WARM_UP_ENTRY_CACHE_COUNTER.finished.inc(); diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 0dcef5c0082..c11df6cd502 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1641,7 +1641,7 @@ mod test { // Prepare some data for two tablets of the same region. So we can test whether // we fetch the bytes from the latest one. for i in 1..21 { - tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + tablet.put_cf(CF_DEFAULT, b"zkey", b"val").unwrap(); if i % 2 == 0 { tablet.flush_cf(CF_DEFAULT, true).unwrap(); } @@ -1656,7 +1656,7 @@ mod test { tablet = cached.latest().unwrap(); for i in 1..11 { - tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + tablet.put_cf(CF_DEFAULT, b"zkey", b"val").unwrap(); if i % 2 == 0 { tablet.flush_cf(CF_DEFAULT, true).unwrap(); } diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 8de11ee05f2..d4f65b8ce88 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1592,7 +1592,7 @@ mod test { // Prepare some data for two tablets of the same region. So we can test whether // we fetch the bytes from the latest one. for i in 1..21 { - tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + tablet.put_cf(CF_DEFAULT, b"zkey", b"val").unwrap(); if i % 2 == 0 { tablet.flush_cf(CF_DEFAULT, true).unwrap(); } @@ -1607,7 +1607,7 @@ mod test { tablet = cached.latest().unwrap(); for i in 1..11 { - tablet.put_cf(CF_DEFAULT, b"key", b"val").unwrap(); + tablet.put_cf(CF_DEFAULT, b"zkey", b"val").unwrap(); if i % 2 == 0 { tablet.flush_cf(CF_DEFAULT, true).unwrap(); } From 2dd419923c1a3eeef2098248c159fa9d55a49220 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 10 Aug 2023 16:03:26 +0800 Subject: [PATCH 0841/1149] raftkv: add admin command failed log (#15295) ref tikv/tikv#15258 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/raftkv/mod.rs | 32 ++++++++++++++++++++++------- src/server/raftkv2/mod.rs | 42 +++++++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 1dd7d9536c6..f5f4d77aa9f 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -23,7 +23,7 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; -use futures::{future::BoxFuture, task::AtomicWaker, Future, Stream, StreamExt}; +use futures::{future::BoxFuture, task::AtomicWaker, Future, Stream, StreamExt, TryFutureExt}; use kvproto::{ errorpb, kvrpcpb::{Context, IsolationLevel}, @@ -195,18 +195,36 @@ fn exec_admin>( router: &S, req: RaftCmdRequest, ) -> BoxFuture<'static, kv::Result<()>> { + let region_id = req.get_header().get_region_id(); + let peer_id = req.get_header().get_peer().get_id(); + let term = req.get_header().get_term(); + let epoch = req.get_header().get_region_epoch().clone(); + let admin_type = req.get_admin_request().get_cmd_type(); let (cb, f) = paired_future_callback(); let res = router.send_command( req, raftstore::store::Callback::write(cb), RaftCmdExtraOpts::default(), ); - Box::pin(async move { - res?; - let mut resp = box_try!(f.await); - check_raft_cmd_response(&mut resp.response)?; - Ok(()) - }) + Box::pin( + async move { + res?; + let mut resp = box_try!(f.await); + check_raft_cmd_response(&mut resp.response)?; + Ok(()) + } + .map_err(move |e| { + warn!("failed to execute admin command"; + "err" => ?e, + "admin_type" => ?admin_type, + "term" => term, + "region_epoch" => ?epoch, + "peer_id" => peer_id, + "region_id" => region_id, + ); + e + }), + ) } pub fn drop_snapshot_callback() -> kv::Result { diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index a023ba158c5..dacc90a91f0 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -12,7 +12,7 @@ use std::{ use collections::HashSet; use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; -use futures::{future::BoxFuture, Future, Stream, StreamExt}; +use futures::{future::BoxFuture, Future, Stream, StreamExt, TryFutureExt}; use kvproto::{ kvrpcpb::Context, raft_cmdpb::{AdminCmdType, CmdType, RaftCmdRequest, Request}, @@ -356,19 +356,35 @@ fn exec_admin( req: RaftCmdRequest, ) -> BoxFuture<'static, tikv_kv::Result<()>> { let region_id = req.get_header().get_region_id(); + let peer_id = req.get_header().get_peer().get_id(); + let term = req.get_header().get_term(); + let epoch = req.get_header().get_region_epoch().clone(); let admin_type = req.get_admin_request().get_cmd_type(); let (msg, sub) = PeerMsg::admin_command(req); let res = router.check_send(region_id, msg); - Box::pin(async move { - res?; - let mut resp = sub.result().await.ok_or_else(|| -> tikv_kv::Error { - box_err!( - "region {} exec_admin {:?} without response", - region_id, - admin_type - ) - })?; - check_raft_cmd_response(&mut resp)?; - Ok(()) - }) + Box::pin( + async move { + res?; + let mut resp = sub.result().await.ok_or_else(|| -> tikv_kv::Error { + box_err!( + "region {} exec_admin {:?} without response", + region_id, + admin_type + ) + })?; + check_raft_cmd_response(&mut resp)?; + Ok(()) + } + .map_err(move |e| { + warn!("failed to execute admin command"; + "err" => ?e, + "admin_type" => ?admin_type, + "term" => term, + "region_epoch" => ?epoch, + "peer_id" => peer_id, + "region_id" => region_id, + ); + e + }), + ) } From 9dc7450ef22abde48e14e61fc16d4f22b137d450 Mon Sep 17 00:00:00 2001 From: you06 Date: Fri, 11 Aug 2023 10:09:26 +0800 Subject: [PATCH 0842/1149] add error type MismatchPeerId (#15117) close tikv/tikv#15288 Add error type for "mismatch peer id". Signed-off-by: you06 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/error_code/src/raftstore.rs | 1 + components/raftstore/src/errors.rs | 16 +++++++++++++++ components/raftstore/src/store/util.rs | 28 +++++++++++++++++++++----- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/components/error_code/src/raftstore.rs b/components/error_code/src/raftstore.rs index 35dfe564ef0..7a2de2403e3 100644 --- a/components/error_code/src/raftstore.rs +++ b/components/error_code/src/raftstore.rs @@ -33,6 +33,7 @@ define_error_codes!( FLASHBACK_IN_PROGRESS => ("FlashbackInProgress", "", ""), FLASHBACK_NOT_PREPARED => ("FlashbackNotPrepared", "", ""), IS_WITNESS => ("IsWitness", "", ""), + MISMATCH_PEER_ID => ("MismatchPeerId", "", ""), SNAP_ABORT => ("SnapAbort", "", ""), SNAP_TOO_MANY => ("SnapTooMany", "", ""), diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 5deef832723..d1597a77121 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -143,6 +143,12 @@ pub enum Error { #[error("peer is a witness of region {0}")] IsWitness(u64), + + #[error("mismatch peer id {} != {}", .request_peer_id, .store_peer_id)] + MismatchPeerId { + request_peer_id: u64, + store_peer_id: u64, + }, } pub type Result = result::Result; @@ -272,6 +278,15 @@ impl From for errorpb::Error { e.set_region_id(region_id); errorpb.set_is_witness(e); } + Error::MismatchPeerId { + request_peer_id, + store_peer_id, + } => { + let mut e = errorpb::MismatchPeerId::default(); + e.set_request_peer_id(request_peer_id); + e.set_store_peer_id(store_peer_id); + errorpb.set_mismatch_peer_id(e); + } _ => {} }; @@ -329,6 +344,7 @@ impl ErrorCodeExt for Error { Error::DeadlineExceeded => error_code::raftstore::DEADLINE_EXCEEDED, Error::PendingPrepareMerge => error_code::raftstore::PENDING_PREPARE_MERGE, Error::IsWitness(..) => error_code::raftstore::IS_WITNESS, + Error::MismatchPeerId { .. } => error_code::raftstore::MISMATCH_PEER_ID, Error::Other(_) | Error::RegionNotRegistered { .. } => error_code::raftstore::UNKNOWN, } diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d12ddcb4c4e..d3dc0b3e920 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -423,11 +423,10 @@ pub fn check_peer_id(header: &RaftRequestHeader, peer_id: u64) -> Result<()> { if header.get_peer().get_id() == peer_id { Ok(()) } else { - Err(box_err!( - "mismatch peer id {} != {}", - header.get_peer().get_id(), - peer_id - )) + Err(Error::MismatchPeerId { + request_peer_id: header.get_peer().get_id(), + store_peer_id: peer_id, + }) } } @@ -2417,4 +2416,23 @@ mod tests { *region.get_peers(), ); } + + #[test] + fn test_peer_id_mismatch() { + use kvproto::errorpb::{Error, MismatchPeerId}; + let mut header = RaftRequestHeader::default(); + let mut peer = Peer::default(); + peer.set_id(1); + header.set_peer(peer); + // match + check_peer_id(&header, 1).unwrap(); + // mismatch + let err = check_peer_id(&header, 2).unwrap_err(); + let region_err: Error = err.into(); + assert!(region_err.has_mismatch_peer_id()); + let mut mismatch_err = MismatchPeerId::default(); + mismatch_err.set_request_peer_id(1); + mismatch_err.set_store_peer_id(2); + assert_eq!(region_err.get_mismatch_peer_id(), &mismatch_err) + } } From b43ff0f1c3dcbd582772f2a052b7089be8364feb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 11 Aug 2023 17:28:57 +0800 Subject: [PATCH 0843/1149] oneline_config: CamelCase encoder name (#15311) close tikv/tikv#15310 Rust uses CamelCase for struct names. And add a test for #[doc(hidden)] field. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/online_config/online_config_derive/src/lib.rs | 2 +- components/online_config/src/lib.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index 5518aa0e5e6..bb37aad5924 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -30,7 +30,7 @@ fn generate_token(ast: DeriveInput) -> std::result::Result { // Avoid naming conflict let mut hasher = DefaultHasher::new(); format!("{}", &name).hash(&mut hasher); - format!("{}_encoder_{:x}", name, hasher.finish()).as_str() + format!("{}Encoder{:x}", name, hasher.finish()).as_str() }, Span::call_site(), ); diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index 8be3c2087b4..45694305a5f 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -140,6 +140,8 @@ mod tests { #[derive(Clone, OnlineConfig, Debug, Default, PartialEq)] pub struct TestConfig { + // Test doc hidden fields support online config change. + #[doc(hidden)] field1: usize, field2: String, optional_field1: Option, From 7f53c909592e048af759d9c1281ceda502ce0250 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 14 Aug 2023 10:07:58 +0800 Subject: [PATCH 0844/1149] replica_read: reject read index if the log gap is large (#15270) close tikv/tikv#15200 reject read index if the applied index + raft_log_gc_count_limit <= read_index Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/query/mod.rs | 13 +++++- .../src/operation/query/replica.rs | 27 ++++++++++++ components/raftstore/src/store/config.rs | 10 +++++ components/raftstore/src/store/peer.rs | 39 +++++++++++++++- tests/failpoints/cases/test_replica_read.rs | 1 - tests/integrations/config/mod.rs | 1 + .../raftstore/test_replica_read.rs | 44 +++++++++++++++++++ 7 files changed, 132 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index eddbacd1dab..2f1b1cd0138 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -282,10 +282,21 @@ impl Peer { && read.cmds()[0].0.get_requests().len() == 1 && read.cmds()[0].0.get_requests()[0].get_cmd_type() == CmdType::ReadIndex; + let read_index = read.read_index.unwrap(); if is_read_index_request { self.respond_read_index(&mut read); - } else if self.ready_to_handle_unsafe_replica_read(read.read_index.unwrap()) { + } else if self.ready_to_handle_unsafe_replica_read(read_index) { self.respond_replica_read(&mut read); + } else if self.storage().apply_state().get_applied_index() + + ctx.cfg.follower_read_max_log_gap() + <= read_index + { + let mut response = cmd_resp::new_error(Error::ReadIndexNotReady { + region_id: self.region_id(), + reason: "applied index fail behind read index too long", + }); + cmd_resp::bind_term(&mut response, self.term()); + self.respond_replica_read_error(&mut read, response); } else { // TODO: `ReadIndex` requests could be blocked. self.pending_reads_mut().push_front(read); diff --git a/components/raftstore-v2/src/operation/query/replica.rs b/components/raftstore-v2/src/operation/query/replica.rs index 1d64250c34b..a8659a913bc 100644 --- a/components/raftstore-v2/src/operation/query/replica.rs +++ b/components/raftstore-v2/src/operation/query/replica.rs @@ -128,4 +128,31 @@ impl Peer { } } } + + pub(crate) fn respond_replica_read_error( + &self, + read_index_req: &mut ReadIndexRequest, + response: RaftCmdResponse, + ) { + debug!( + self.logger, + "handle replica reads with a read index failed"; + "request_id" => ?read_index_req.id, + "response" => ?response, + ); + RAFT_READ_INDEX_PENDING_COUNT.sub(read_index_req.cmds().len() as i64); + let time = monotonic_raw_now(); + for (_, ch, _) in read_index_req.take_cmds().drain(..) { + ch.read_tracker().map(|tracker| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { + t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) + .to_std() + .unwrap() + .as_nanos() + as u64; + }) + }); + ch.report_error(response.clone()); + } + } } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index eca0b3c1ede..817be7eb969 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -87,6 +87,11 @@ pub struct Config { // When the approximate size of raft log entries exceed this value, // gc will be forced trigger. pub raft_log_gc_size_limit: Option, + // follower will reject this follower request to avoid falling behind leader too far, + // when the read index is ahead of the sum between the applied index and + // follower_read_max_log_gap, + #[doc(hidden)] + pub follower_read_max_log_gap: u64, // Old Raft logs could be reserved if `raft_log_gc_threshold` is not reached. // GC them after ticks `raft_log_reserve_max_ticks` times. #[doc(hidden)] @@ -411,6 +416,7 @@ impl Default for Config { raft_log_gc_threshold: 50, raft_log_gc_count_limit: None, raft_log_gc_size_limit: None, + follower_read_max_log_gap: 100, raft_log_reserve_max_ticks: 6, raft_engine_purge_interval: ReadableDuration::secs(10), max_manual_flush_rate: 3.0, @@ -567,6 +573,10 @@ impl Config { self.raft_log_gc_size_limit.unwrap() } + pub fn follower_read_max_log_gap(&self) -> u64 { + self.follower_read_max_log_gap + } + pub fn region_compact_check_step(&self) -> u64 { self.region_compact_check_step.unwrap() } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index d64257aa54f..c1ed078d93f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -3158,6 +3158,33 @@ where } } + pub(crate) fn respond_replica_read_error( + &self, + read_index_req: &mut ReadIndexRequest>, + response: RaftCmdResponse, + ) { + debug!( + "handle replica reads with a read index failed"; + "request_id" => ?read_index_req.id, + "response" => ?response, + "peer_id" => self.peer_id(), + ); + RAFT_READ_INDEX_PENDING_COUNT.sub(read_index_req.cmds().len() as i64); + let time = monotonic_raw_now(); + for (_, ch, _) in read_index_req.take_cmds().drain(..) { + ch.read_tracker().map(|tracker| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { + t.metrics.read_index_confirm_wait_nanos = (time - read_index_req.propose_time) + .to_std() + .unwrap() + .as_nanos() + as u64; + }) + }); + ch.report_error(response.clone()); + } + } + /// Responses to the ready read index request on the replica, the replica is /// not a leader. fn post_pending_read_index_on_replica(&mut self, ctx: &mut PollContext) { @@ -3185,10 +3212,20 @@ where && read.cmds()[0].0.get_requests().len() == 1 && read.cmds()[0].0.get_requests()[0].get_cmd_type() == CmdType::ReadIndex; + let read_index = read.read_index.unwrap(); if is_read_index_request { self.response_read(&mut read, ctx, false); - } else if self.ready_to_handle_unsafe_replica_read(read.read_index.unwrap()) { + } else if self.ready_to_handle_unsafe_replica_read(read_index) { self.response_read(&mut read, ctx, true); + } else if self.get_store().applied_index() + ctx.cfg.follower_read_max_log_gap() + <= read_index + { + let mut response = cmd_resp::new_error(Error::ReadIndexNotReady { + region_id: self.region_id, + reason: "applied index fail behind read index too long", + }); + cmd_resp::bind_term(&mut response, self.term()); + self.respond_replica_read_error(&mut read, response); } else { // TODO: `ReadIndex` requests could be blocked. self.pending_reads.push_front(read); diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 76caaf18490..773d721da8b 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -336,7 +336,6 @@ fn test_read_after_cleanup_range_for_snap() { fail::remove("pause_on_peer_collect_message"); must_get_none(&cluster.get_engine(3), b"k0"); // Should not receive resp - rx1.recv_timeout(Duration::from_millis(500)).unwrap_err(); fail::remove("apply_snap_cleanup_range"); rx1.recv_timeout(Duration::from_secs(5)).unwrap(); } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 045da61e08c..8ead30e03ff 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -186,6 +186,7 @@ fn test_serde_custom_tikv_config() { raft_log_gc_threshold: 12, raft_log_gc_count_limit: Some(12), raft_log_gc_size_limit: Some(ReadableSize::kb(1)), + follower_read_max_log_gap: 100, raft_log_reserve_max_ticks: 100, raft_engine_purge_interval: ReadableDuration::minutes(20), max_manual_flush_rate: 5.0, diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index 50df1975dc3..bd3c10657c2 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -595,3 +595,47 @@ fn test_malformed_read_index() { let resp = block_on_timeout(resp, Duration::from_secs(10)).unwrap(); assert_eq!(resp.get_responses()[0].get_get().get_value(), b"v1"); } + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_replica_read_with_pending_peer() { + let mut cluster = new_cluster(0, 3); + + cluster.cfg.tikv.raft_store.raft_log_gc_count_limit = Some(100); + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(100)); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let r = cluster.run_conf_change(); + assert_eq!(r, 1); + pd_client.must_add_peer(1, new_peer(2, 2)); + pd_client.must_add_peer(1, new_peer(3, 3)); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + + // Make sure the peer 3 exists + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + // Make sure the peer 3 is pending + let new_region = cluster.get_region(b"k1"); + let filter = Box::new( + RegionPacketFilter::new(new_region.get_id(), 3) + .direction(Direction::Recv) + .msg_type(MessageType::MsgAppend) + .msg_type(MessageType::MsgSnapshot), + ); + cluster.sim.wl().add_recv_filter(3, filter); + cluster.must_put(b"k1", b"v2"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + for i in 0..200 { + cluster.must_put(format!("k{}", i).as_bytes(), b"v2"); + } + + let new_region = cluster.get_region(b"k1"); + let resp_ch = async_read_on_peer(&mut cluster, new_peer(3, 3), new_region, b"k1", true, true); + + let response = block_on_timeout(resp_ch, Duration::from_secs(3)).unwrap(); + assert!(response.get_header().get_error().has_read_index_not_ready()); +} From e8ea0a9e517675665877f5d7c23c037f1749eaa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 14 Aug 2023 14:27:29 +0800 Subject: [PATCH 0845/1149] snap_restore: fix snapshot recovery leader election (#15297) close tikv/tikv#15296 Signed-off-by: hillium Co-authored-by: qupeng --- components/snap_recovery/src/init_cluster.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 84a10cc1338..4dfd868f376 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -92,6 +92,8 @@ pub fn enter_snap_recovery_mode(config: &mut TikvConfig) { // reboots, the followers will reject to vote for it again. // We need to disable the lease for avoiding that. config.raft_store.unsafe_disable_check_quorum = true; + // The election is fully controlled by the restore procedure of BR. + config.raft_store.allow_unsafe_vote_after_start = true; // disable auto compactions during the restore config.rocksdb.defaultcf.disable_auto_compactions = true; From 09239a7ca87b592f01454686bad1a084bd067546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 14 Aug 2023 14:40:58 +0800 Subject: [PATCH 0846/1149] snap_recovery: make snap io better during snap restore (#15277) ref tikv/tikv#15276 Signed-off-by: hillium Co-authored-by: qupeng Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/snap_recovery/src/init_cluster.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 4dfd868f376..7ece321d9dd 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -81,6 +81,12 @@ pub fn enter_snap_recovery_mode(config: &mut TikvConfig) { config.raft_store.snap_generator_pool_size = 20; // applied snapshot mem size config.raft_store.snap_apply_batch_size = ReadableSize::gb(1); + + // unlimit the snapshot I/O. + config.server.snap_io_max_bytes_per_sec = ReadableSize::gb(16); + config.server.concurrent_recv_snap_limit = 256; + config.server.concurrent_send_snap_limit = 256; + // max snapshot file size, if larger than it, file be splitted. config.raft_store.max_snapshot_file_raw_size = ReadableSize::gb(1); config.raft_store.hibernate_regions = false; From 14dd8cce96e064a029e58de8cf832c35d44e09a5 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 15 Aug 2023 10:42:29 +0800 Subject: [PATCH 0847/1149] [raftstore-v2] add split log (#15248) ref tikv/tikv#15056 Add logs for split region so that we know the time a split takes end-to-end: from the split proposed to the last splitt region finishes the initialization. Signed-off-by: tonyxuqqi --- .../src/operation/command/admin/split.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 93a14ed4124..c744c1b9161 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -788,6 +788,11 @@ impl Peer { _ => unreachable!(), } } + info!( + self.logger, + "on_apply_res_split"; + "new_ids" => ?new_ids, + ); self.split_trace_mut().push((res.tablet_index, new_ids)); let region_state = self.storage().region_state().clone(); self.state_changes_mut() @@ -908,7 +913,14 @@ impl Peer { let mut found = false; for (_, ids) in self.split_trace_mut() { if ids.remove(®ion_id) { + let ids_len = ids.len(); found = true; + info!( + self.logger, + "region init finished after split"; + "split_region_id" => region_id, + "remaining_region_count" => ids_len, + ); break; } } From 4fb70fbee527916f7e74552d3e270911c2b8334e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 16 Aug 2023 10:33:31 +0800 Subject: [PATCH 0848/1149] config: disable rocksdb ribbon filter (#15331) close tikv/tikv#15324 Ribbon filter may cause OOM when compacting a sst with many entries. See tikv/tikv#15324 Signed-off-by: Neil Shen --- etc/config-template.toml | 24 ++++++++++----------- src/config/mod.rs | 45 +++++++++++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 18e155c0a3b..4d580db0a5b 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -154,7 +154,7 @@ # advertise-addr = "" ## Status address. -## This is used for reporting the status of TiKV directly through +## This is used for reporting the status of TiKV directly through ## the HTTP address. Notice that there is a risk of leaking status ## information if this port is exposed to the public. ## Empty string means disabling it. @@ -302,7 +302,7 @@ ## * throttle at scheduler, so raftstore and apply won't be blocked anymore ## * better control on the throttle rate to avoid QPS drop under heavy write ## -## Support change dynamically. +## Support change dynamically. ## When enabled, it disables kvdb's write stall and raftdb's write stall(except memtable) and vice versa. # enable = true @@ -312,12 +312,12 @@ ## When the number of SST files of level-0 of kvdb reaches the threshold, the flow controller begins to work # l0-files-threshold = 20 -## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to -## reject some write requests with `ServerIsBusy` error. +## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to +## reject some write requests with `ServerIsBusy` error. # soft-pending-compaction-bytes-limit = "192GB" -## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to -## reject all write requests with `ServerIsBusy` error. +## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to +## reject all write requests with `ServerIsBusy` error. # hard-pending-compaction-bytes-limit = "1024GB" [storage.io-rate-limit] @@ -708,7 +708,7 @@ ## lower levels. When this is set, `block-based-bloom-filter` will be ignored. ## Only effective for `format-version` >= 5. ## Disabled by default. -# ribbon-filter-above-level = 0 +## ribbon-filter-above-level = 0 # level0-file-num-compaction-trigger = 4 @@ -1190,7 +1190,7 @@ ## Plaintext as master key means no master key is given and only applicable when ## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't ## have sub-config items. Example: -## +## ## [security.encryption.master-key] ## type = "plaintext" ## @@ -1214,7 +1214,7 @@ ## ## Supply a custom encryption key stored in a file. It is recommended NOT to use in production, ## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs. -## The file must contain a 256-bits (32 bytes, regardless of key length implied by +## The file must contain a 256-bits (32 bytes, regardless of key length implied by ## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example: ## ## [security.encryption.master-key] @@ -1252,16 +1252,16 @@ ## Automatically reduce the number of backup threads when the current workload is high, ## in order to reduce impact on the cluster's performance during back up. -# enable-auto-tune = true +# enable-auto-tune = true -[log-backup] +[log-backup] ## Number of threads to perform backup stream tasks. ## The default value is CPU_NUM * 0.5, and limited to [2, 12]. # num-threads = 8 ## enable this feature. TiKV will starts watch related tasks in PD. and backup kv changes to storage accodring to task. ## The default value is false. -# enable = true +# enable = true [backup.hadoop] ## let TiKV know how to find the hdfs shell command. diff --git a/src/config/mod.rs b/src/config/mod.rs index 0432bfab3a4..d81b50a88f7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -397,6 +397,14 @@ macro_rules! cf_config { if self.format_version.map_or(false, |v| v > 5) { return Err("format-version larger than 5 is unsupported".into()); } + if self.ribbon_filter_above_level.is_some() + && self.format_version.map_or(true, |v| v < 5) + { + return Err( + "ribbon-filter-above-level is only supported when format-version >= 5" + .into(), + ); + } self.titan.validate()?; Ok(()) } @@ -517,6 +525,11 @@ macro_rules! write_into_metrics { $metrics .with_label_values(&[$tag, "enable_doubly_skiplist"]) .set(($cf.enable_doubly_skiplist as i32).into()); + $metrics + .with_label_values(&[$tag, "format_version"]) + .set($cf.format_version.unwrap_or(2) as f64); + + // Titan specific metrics. $metrics .with_label_values(&[$tag, "titan_min_blob_size"]) .set($cf.titan.min_blob_size.0 as f64); @@ -672,6 +685,8 @@ impl Default for DefaultCfConfig { whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + // Ribbon filter causes memory surge during compaction, + // so disable it by default. ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [ @@ -842,6 +857,8 @@ impl Default for WriteCfConfig { whole_key_filtering: false, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + // Ribbon filter causes memory surge during compaction, + // so disable it by default. ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [ @@ -970,6 +987,8 @@ impl Default for LockCfConfig { whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + // Ribbon filter causes memory surge during compaction, + // so disable it by default. ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [DBCompressionType::No; 7], @@ -1067,6 +1086,8 @@ impl Default for RaftCfConfig { whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + // Ribbon filter causes memory surge during compaction, + // so disable it by default. ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [DBCompressionType::No; 7], @@ -1330,10 +1351,6 @@ impl DbConfig { .get_or_insert(ReadableDuration::minutes(10)); self.defaultcf.enable_compaction_guard.get_or_insert(true); self.writecf.enable_compaction_guard.get_or_insert(true); - self.defaultcf.format_version.get_or_insert(2); - self.writecf.format_version.get_or_insert(2); - self.lockcf.format_version.get_or_insert(2); - self.raftcf.format_version.get_or_insert(2); if self.lockcf.write_buffer_size.is_none() { self.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); } @@ -1363,14 +1380,6 @@ impl DbConfig { self.writecf.disable_write_stall = true; self.lockcf.disable_write_stall = true; self.raftcf.disable_write_stall = true; - self.defaultcf.format_version.get_or_insert(5); - self.writecf.format_version.get_or_insert(5); - self.lockcf.format_version.get_or_insert(5); - self.raftcf.format_version.get_or_insert(5); - // According to FB, Ribbon filter is more cost-efficient for SST with 1h+ - // lifetime. We conservatively use it for L6. - self.defaultcf.ribbon_filter_above_level.get_or_insert(6); - self.writecf.ribbon_filter_above_level.get_or_insert(6); // Initially only allow one compaction. Pace up when pending bytes is high. This // strategy is consistent with single RocksDB. self.defaultcf.max_compactions.get_or_insert(1); @@ -1602,6 +1611,8 @@ impl Default for RaftDefaultCfConfig { whole_key_filtering: true, bloom_filter_bits_per_key: 10, block_based_bloom_filter: false, + // Ribbon filter causes memory surge during compaction, + // so disable it by default. ribbon_filter_above_level: None, read_amp_bytes_per_bit: 0, compression_per_level: [ @@ -5635,6 +5646,16 @@ mod tests { cfg.storage.engine = EngineType::RaftKv2; cfg.validate().unwrap(); assert!(!cfg.raft_store.enable_v2_compatible_learner); + + // Ribbon filter and format version. + let mut cfg = TikvConfig::default(); + cfg.rocksdb.writecf.ribbon_filter_above_level = Some(6); + cfg.rocksdb.writecf.format_version = None; + cfg.validate().unwrap_err(); + cfg.rocksdb.writecf.format_version = Some(3); + cfg.validate().unwrap_err(); + cfg.rocksdb.writecf.format_version = Some(5); + cfg.validate().unwrap(); } #[test] From df13dd6c2498903e5d3eb54062bb54ab88f80968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 16 Aug 2023 14:38:01 +0800 Subject: [PATCH 0849/1149] log_backup: save storage safe point asynchronously (#15314) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/tikv#15279 Also, the timeout for etcd implementation was reduced to 5s. Signed-off-by: hillium Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> --- Cargo.lock | 2 + components/backup-stream/Cargo.toml | 2 + components/backup-stream/src/endpoint.rs | 38 +++++----- .../src/metadata/store/lazy_etcd.rs | 2 +- .../backup-stream/tests/failpoints/mod.rs | 2 +- .../backup-stream/tests/integration/mod.rs | 20 +++--- components/backup-stream/tests/suite.rs | 71 +++++++++++-------- 7 files changed, 77 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b247dbafffa..c4465f73def 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -579,6 +579,7 @@ dependencies = [ "dashmap", "engine_panic", "engine_rocks", + "engine_test", "engine_traits", "error_code", "etcd-client", @@ -612,6 +613,7 @@ dependencies = [ "tempdir", "tempfile", "test_pd", + "test_pd_client", "test_raftstore", "test_util", "thiserror", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 373e138888e..8c1edc89a48 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -87,12 +87,14 @@ yatp = { workspace = true } [dev-dependencies] async-trait = "0.1" engine_panic = { workspace = true } +engine_test = { workspace = true } grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } tempdir = "0.3" tempfile = "3.0" test_pd = { workspace = true } +test_pd_client = { workspace = true } test_raftstore = { workspace = true } test_util = { workspace = true } url = "2" diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index ed8905423bc..657c3fe5b61 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1,11 +1,11 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashSet, fmt, marker::PhantomData, sync::Arc, time::Duration}; +use std::{any::Any, collections::HashSet, fmt, marker::PhantomData, sync::Arc, time::Duration}; use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use error_code::ErrorCodeExt; -use futures::FutureExt; +use futures::{stream::AbortHandle, FutureExt}; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, @@ -59,10 +59,6 @@ const SLOW_EVENT_THRESHOLD: f64 = 120.0; /// CHECKPOINT_SAFEPOINT_TTL_IF_ERROR specifies the safe point TTL(24 hour) if /// task has fatal error. const CHECKPOINT_SAFEPOINT_TTL_IF_ERROR: u64 = 24; -/// The timeout for tick updating the checkpoint. -/// Generally, it would take ~100ms. -/// 5s would be enough for it. -const TICK_UPDATE_TIMEOUT: Duration = Duration::from_secs(5); pub struct Endpoint { // Note: those fields are more like a shared context between components. @@ -78,7 +74,8 @@ pub struct Endpoint { pub(crate) subs: SubscriptionTracer, pub(crate) concurrency_manager: ConcurrencyManager, - range_router: Router, + // Note: some of fields are public so test cases are able to access them. + pub range_router: Router, observer: BackupStreamObserver, pool: Runtime, initial_scan_memory_quota: PendingMemoryQuota, @@ -89,6 +86,12 @@ pub struct Endpoint { // however probably it would be useful in the future. config: BackupStreamConfig, checkpoint_mgr: CheckpointManager, + + // Runtime status: + /// The handle to abort last save storage safe point. + /// This is used for simulating an asynchronous background worker. + /// Each time we spawn a task, once time goes by, we abort that task. + pub abort_last_storage_save: Option, } impl Endpoint @@ -181,6 +184,7 @@ where failover_time: None, config, checkpoint_mgr, + abort_last_storage_save: None, }; ep.pool.spawn(ep.min_ts_worker()); ep @@ -845,15 +849,13 @@ where } } - fn on_update_global_checkpoint(&self, task: String) { - let _guard = self.pool.handle().enter(); - let result = self.pool.block_on(tokio::time::timeout( - TICK_UPDATE_TIMEOUT, - self.update_global_checkpoint(task), - )); - if let Err(err) = result { - warn!("log backup update global checkpoint timed out"; "err" => %err) + fn on_update_global_checkpoint(&mut self, task: String) { + if let Some(handle) = self.abort_last_storage_save.take() { + handle.abort(); } + let (fut, handle) = futures::future::abortable(self.update_global_checkpoint(task)); + self.pool.spawn(fut); + self.abort_last_storage_save = Some(handle); } fn on_update_change_config(&mut self, cfg: BackupStreamConfig) { @@ -890,7 +892,7 @@ where self.on_update_change_config(cfg); } Task::Sync(cb, mut cond) => { - if cond(&self.range_router) { + if cond(self) { cb() } else { let sched = self.scheduler.clone(); @@ -1106,7 +1108,9 @@ pub enum Task { // Run the closure if ... Box, // This returns `true`. - Box bool + Send>, + // The argument should be `self`, but there are too many generic argument for `self`... + // So let the caller in test cases downcast this to the type they need manually... + Box bool + Send>, ), /// Mark the store as a failover store. /// This would prevent store from updating its checkpoint ts for a while. diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs index 3b697dae9b9..7dacf45e697 100644 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ b/components/backup-stream/src/metadata/store/lazy_etcd.rs @@ -25,7 +25,7 @@ use super::{ }; use crate::errors::{ContextualResultExt, Result}; -const RPC_TIMEOUT: Duration = Duration::from_secs(30); +const RPC_TIMEOUT: Duration = Duration::from_secs(5); #[derive(Clone)] pub struct LazyEtcdClient(Arc>); diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index c4b28f4686f..ff9b9f82ba1 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -208,7 +208,7 @@ mod all { .unwrap(); suite.sync(); - suite.wait_with(move |r| block_on(r.get_task_info("retry_abort")).is_ok()); + suite.wait_with_router(move |r| block_on(r.get_task_info("retry_abort")).is_ok()); let items = run_async_test(suite.write_records(0, 128, 1)); suite.force_flush_files("retry_abort"); suite.wait_for_flush(); diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index fc92a751825..a209572c6d8 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -231,21 +231,19 @@ mod all { .schedule(Task::UpdateGlobalCheckpoint("greenwoods".to_owned())) .unwrap(); let start = Instant::now(); - let (tx, rx) = tokio::sync::oneshot::channel(); - sched - .schedule(Task::Sync( - Box::new(move || { - tx.send(Instant::now()).unwrap(); - }), - Box::new(|_| true), - )) - .unwrap(); - let end = run_async_test(rx).unwrap(); + let (tx, rx) = std::sync::mpsc::channel(); + suite.wait_with(move |ep| { + tx.send((Instant::now(), ep.abort_last_storage_save.is_some())) + .unwrap(); + true + }); + let (end, has_abort) = rx.recv().unwrap(); assert!( - end - start < Duration::from_secs(10), + end - start < Duration::from_secs(2), "take = {:?}", end - start ); + assert!(has_abort); } /// This test case tests whether we correctly handle the pessimistic locks. diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 68f53a4a65e..e1df628d76b 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -31,10 +31,14 @@ use kvproto::{ }; use pd_client::PdClient; use protobuf::parse_from_bytes; -use raftstore::router::CdcRaftRouter; +use raftstore::{ + router::{CdcRaftRouter, ServerRaftStoreRouter}, + RegionInfoAccessor, +}; use resolved_ts::LeadershipResolver; use tempdir::TempDir; -use test_raftstore::{new_server_cluster, Cluster, ServerCluster}; +use test_pd_client::TestPdClient; +use test_raftstore::{new_server_cluster, Cluster, ServerCluster, SimulateTransport}; use test_util::retry; use tikv::config::BackupStreamConfig; use tikv_util::{ @@ -49,6 +53,18 @@ use tikv_util::{ use txn_types::{Key, TimeStamp, WriteRef}; use walkdir::WalkDir; +pub type TestEndpoint = Endpoint< + ErrorStore, + RegionInfoAccessor, + engine_test::kv::KvTestEngine, + CdcRaftRouter< + SimulateTransport< + ServerRaftStoreRouter, + >, + >, + TestPdClient, +>; + pub fn mutation(k: Vec, v: Vec) -> Mutation { mutation_op(k, v, Op::Put) } @@ -391,7 +407,7 @@ impl Suite { )) .unwrap(); let name = name.to_owned(); - self.wait_with(move |r| block_on(r.get_task_info(&name)).is_ok()) + self.wait_with_router(move |r| block_on(r.get_task_info(&name)).is_ok()) } /// This function tries to calculate the global checkpoint from the flush @@ -762,19 +778,25 @@ impl Suite { } pub fn sync(&self) { - self.wait_with(|_| true) + self.wait_with_router(|_| true) } - pub fn wait_with(&self, cond: impl FnMut(&Router) -> bool + Send + 'static + Clone) { + pub fn wait_with(&self, cond: impl FnMut(&mut TestEndpoint) -> bool + Send + 'static + Clone) { self.endpoints .iter() .map({ move |(_, wkr)| { let (tx, rx) = std::sync::mpsc::channel(); + let mut cond = cond.clone(); wkr.scheduler() .schedule(Task::Sync( Box::new(move || tx.send(()).unwrap()), - Box::new(cond.clone()), + Box::new(move |this| { + let ep = this + .downcast_mut::() + .expect("`Sync` with wrong type"); + cond(ep) + }), )) .unwrap(); rx @@ -783,32 +805,21 @@ impl Suite { .for_each(|rx| rx.recv().unwrap()) } + pub fn wait_with_router(&self, mut cond: impl FnMut(&Router) -> bool + Send + 'static + Clone) { + self.wait_with(move |ep| cond(&ep.range_router)) + } + pub fn wait_for_flush(&self) { - let (tx, rx) = std::sync::mpsc::channel(); - self.run(|| { - let tx = tx.clone(); - Task::Sync( - Box::new(move || { - tx.send(()).unwrap(); - }), - Box::new(move |r| { - let task_names = block_on(r.select_task(TaskSelector::All.reference())); - for task_name in task_names { - let tsk = block_on(r.get_task_info(&task_name)); - if tsk.unwrap().is_flushing() { - return false; - } - } - true - }), - ) - }); - for _ in self.endpoints.iter() { - // Receive messages from each store. - if rx.recv_timeout(Duration::from_secs(30)).is_err() { - panic!("the temp isn't empty after the deadline"); + self.wait_with_router(move |r| { + let task_names = block_on(r.select_task(TaskSelector::All.reference())); + for task_name in task_names { + let tsk = block_on(r.get_task_info(&task_name)); + if tsk.unwrap().is_flushing() { + return false; + } } - } + true + }); } pub fn must_shuffle_leader(&mut self, region_id: u64) { From fe6fc7341edd748dd69ad64d31f4499e67039434 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 16 Aug 2023 22:00:30 +0800 Subject: [PATCH 0850/1149] *: replace select!{ timeout } with block_on_timeout (#15341) close tikv/tikv#15340 Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi --- components/cdc/src/channel.rs | 19 ++-------- components/cdc/src/service.rs | 7 ++-- components/test_raftstore-v2/src/cluster.rs | 33 +++++------------ components/test_raftstore-v2/src/util.rs | 4 +-- components/tikv_kv/src/lib.rs | 35 ++++++------------- components/tikv_util/src/future.rs | 9 +++-- components/tikv_util/src/worker/pool.rs | 20 ++++------- .../raftstore/test_split_region.rs | 5 +-- 8 files changed, 38 insertions(+), 94 deletions(-) diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index 595632c306e..b11799d87c1 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -20,7 +20,7 @@ use futures::{ use grpcio::WriteFlags; use kvproto::cdcpb::{ChangeDataEvent, Event, ResolvedTs}; use protobuf::Message; -use tikv_util::{impl_display_as_debug, time::Instant, warn}; +use tikv_util::{future::block_on_timeout, impl_display_as_debug, time::Instant, warn}; use crate::metrics::*; @@ -435,22 +435,7 @@ pub fn recv_timeout(s: &mut S, dur: std::time::Duration) -> Result + Unpin, { - poll_timeout(&mut s.next(), dur) -} - -pub fn poll_timeout(fut: &mut F, dur: std::time::Duration) -> Result -where - F: std::future::Future + Unpin, -{ - use futures::FutureExt; - let mut timeout = futures_timer::Delay::new(dur).fuse(); - let mut f = fut.fuse(); - futures::executor::block_on(async { - futures::select! { - () = timeout => Err(()), - item = f => Ok(item), - } - }) + block_on_timeout(s.next(), dur) } #[cfg(test)] diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index 8dc30ec75e0..d07b5283380 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -512,9 +512,10 @@ mod tests { use futures::{executor::block_on, SinkExt}; use grpcio::{self, ChannelBuilder, EnvBuilder, Server, ServerBuilder, WriteFlags}; use kvproto::cdcpb::{create_change_data, ChangeDataClient, ResolvedTs}; + use tikv_util::future::block_on_timeout; use super::*; - use crate::channel::{poll_timeout, recv_timeout, CdcEvent}; + use crate::channel::{recv_timeout, CdcEvent}; fn new_rpc_suite(capacity: usize) -> (Server, ChangeDataClient, ReceiverWrapper) { let memory_quota = MemoryQuota::new(capacity); @@ -565,7 +566,7 @@ mod tests { let mut window_size = 0; loop { if matches!( - poll_timeout(&mut send(), Duration::from_millis(100)), + block_on_timeout(send(), Duration::from_millis(100)), Err(_) | Ok(Err(_)) ) { // Window is filled and flow control in sink is triggered. @@ -586,7 +587,7 @@ mod tests { .unwrap() .unwrap() .unwrap(); - poll_timeout(&mut send(), Duration::from_millis(100)) + block_on_timeout(send(), Duration::from_millis(100)) .unwrap() .unwrap(); // gRPC client may update window size after receiving a message, diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 2a5bb733962..84d8d0fcca5 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -17,9 +17,7 @@ use engine_traits::{ TabletRegistry, CF_DEFAULT, }; use file_system::IoRateLimiter; -use futures::{ - compat::Future01CompatExt, executor::block_on, future::BoxFuture, select, Future, FutureExt, -}; +use futures::{executor::block_on, future::BoxFuture, Future}; use keys::{data_key, validate_data_key, DATA_PREFIX_KEY}; use kvproto::{ errorpb::Error as PbError, @@ -65,7 +63,6 @@ use tikv_util::{ safe_panic, thread_group::GroupProperties, time::{Instant, ThreadReadId}, - timer::GLOBAL_TIMER_HANDLE, warn, worker::LazyWorker, HandyRwLock, @@ -109,17 +106,9 @@ pub trait Simulator { fn read(&mut self, request: RaftCmdRequest, timeout: Duration) -> Result { let node_id = request.get_header().get_peer().get_store_id(); - let timeout_f = GLOBAL_TIMER_HANDLE - .delay(std::time::Instant::now() + timeout) - .compat(); - futures::executor::block_on(async move { - futures::select! { - res = self.async_read(node_id, request).fuse() => res, - e = timeout_f.fuse() => { - Err(Error::Timeout(format!("request timeout for {:?}: {:?}", timeout,e))) - }, - } - }) + let f = self.async_read(node_id, request); + block_on_timeout(f, timeout) + .map_err(|e| Error::Timeout(format!("request timeout for {:?}: {:?}", timeout, e)))? } fn async_read( @@ -223,8 +212,7 @@ pub trait Simulator { } } - let mut fut = Box::pin(sub.result()); - match block_on_timeout(fut.as_mut(), timeout) + match block_on_timeout(sub.result(), timeout) .map_err(|e| Error::Timeout(format!("request timeout for {:?}: {:?}", timeout, e)))? { Some(QueryResult::Read(_)) => unreachable!(), @@ -289,14 +277,9 @@ pub trait Simulator { } } - let timeout_f = GLOBAL_TIMER_HANDLE.delay(std::time::Instant::now() + timeout); - block_on(async move { - select! { - // todo: unwrap? - res = sub.result().fuse() => Ok(res.unwrap()), - _ = timeout_f.compat().fuse() => Err(Error::Timeout(format!("request timeout for {:?}", timeout))), - } - }) + Ok(block_on_timeout(sub.result(), timeout) + .map_err(|e| Error::Timeout(format!("request timeout for {:?}: {:?}", timeout, e)))? + .unwrap()) } fn async_command_on_node( diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 7719b5e3557..ff0362fb4a4 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -266,7 +266,7 @@ pub fn batch_read_on_peer, EK: KvEngine>( request.mut_header().set_peer(peer.clone()); let snap = cluster.sim.wl().async_snapshot(node_id, request); let resp = block_on_timeout( - Box::pin(async move { + async move { match snap.await { Ok(snap) => ReadResponse { response: Default::default(), @@ -279,7 +279,7 @@ pub fn batch_read_on_peer, EK: KvEngine>( txn_extra_op: Default::default(), }, } - }), + }, Duration::from_secs(1), ) .unwrap(); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 55c9f66d9fd..25f58352750 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -40,7 +40,7 @@ use engine_traits::{ TabletRegistry, WriteBatch, CF_DEFAULT, CF_LOCK, }; use error_code::{self, ErrorCode, ErrorCodeExt}; -use futures::{compat::Future01CompatExt, future::BoxFuture, prelude::*}; +use futures::{future::BoxFuture, prelude::*}; use into_other::IntoOther; use kvproto::{ errorpb::Error as ErrorHeader, @@ -51,7 +51,7 @@ use kvproto::{ use pd_client::BucketMeta; use raftstore::store::{PessimisticLockPair, TxnExt}; use thiserror::Error; -use tikv_util::{deadline::Deadline, escape, time::ThreadReadId, timer::GLOBAL_TIMER_HANDLE}; +use tikv_util::{deadline::Deadline, escape, future::block_on_timeout, time::ThreadReadId}; use tracker::with_tls_tracker; use txn_types::{Key, PessimisticLock, TimeStamp, TxnExtra, Value}; @@ -376,34 +376,19 @@ pub trait Engine: Send + Clone + 'static { fn write(&self, ctx: &Context, batch: WriteData) -> Result<()> { let f = write(self, ctx, batch, None); - let timeout = GLOBAL_TIMER_HANDLE - .delay(Instant::now() + DEFAULT_TIMEOUT) - .compat(); - - futures::executor::block_on(async move { - futures::select! { - res = f.fuse() => { - if let Some(res) = res { - return res; - } - }, - _ = timeout.fuse() => (), - }; - Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT))) - }) + let res = block_on_timeout(f, DEFAULT_TIMEOUT) + .map_err(|_| Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT)))?; + if let Some(res) = res { + return res; + } + Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT))) } fn release_snapshot(&mut self) {} fn snapshot(&mut self, ctx: SnapContext<'_>) -> Result { - let deadline = Instant::now() + DEFAULT_TIMEOUT; - let timeout = GLOBAL_TIMER_HANDLE.delay(deadline).compat(); - futures::executor::block_on(async move { - futures::select! { - res = self.async_snapshot(ctx).fuse() => res, - _ = timeout.fuse() => Err(Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT))), - } - }) + block_on_timeout(self.async_snapshot(ctx), DEFAULT_TIMEOUT) + .map_err(|_| Error::from(ErrorInner::Timeout(DEFAULT_TIMEOUT)))? } fn put(&self, ctx: &Context, key: Key, value: Value) -> Result<()> { diff --git a/components/tikv_util/src/future.rs b/components/tikv_util/src/future.rs index 1eff166e48a..6b4e608b341 100644 --- a/components/tikv_util/src/future.rs +++ b/components/tikv_util/src/future.rs @@ -1,7 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::BorrowMut, cell::UnsafeCell, sync::{ atomic::{AtomicU8, Ordering}, @@ -216,10 +215,9 @@ pub fn try_poll(f: impl Future) -> Option { // Run a future with a timeout on the current thread. Returns Err if times out. #[allow(clippy::result_unit_err)] -pub fn block_on_timeout(mut fut: B, dur: std::time::Duration) -> Result +pub fn block_on_timeout(fut: F, dur: std::time::Duration) -> Result where - F: std::future::Future + Unpin, - B: BorrowMut, + F: std::future::Future, { use futures_util::compat::Future01CompatExt; @@ -227,7 +225,8 @@ where .delay(std::time::Instant::now() + dur) .compat() .fuse(); - let mut f = fut.borrow_mut().fuse(); + futures::pin_mut!(fut); + let mut f = fut.fuse(); futures::executor::block_on(async { futures::select! { _ = timeout => Err(()), diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index 26dbf495f54..c3919e42619 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -16,7 +16,7 @@ use futures::{ channel::mpsc::{unbounded, UnboundedReceiver, UnboundedSender}, compat::{Future01CompatExt, Stream01CompatExt}, executor::block_on, - future::{self, FutureExt}, + future::FutureExt, stream::StreamExt, }; use prometheus::IntGauge; @@ -24,7 +24,7 @@ use yatp::{Remote, ThreadPool}; use super::metrics::*; use crate::{ - future::poll_future_notify, + future::{block_on_timeout, poll_future_notify}, timer::GLOBAL_TIMER_HANDLE, yatp_pool::{DefaultTicker, YatpPoolBuilder}, }; @@ -243,18 +243,12 @@ impl ReceiverWrapper { &mut self, timeout: Duration, ) -> Result, std::sync::mpsc::RecvTimeoutError> { - let deadline = Instant::now() + timeout; - let delay = GLOBAL_TIMER_HANDLE.delay(deadline).compat(); - let ret = future::select(self.inner.next(), delay); - match block_on(ret) { - future::Either::Left((msg, _)) => { - if let Some(Msg::Task(t)) = msg { - return Ok(Some(t)); - } - Ok(None) - } - future::Either::Right(_) => Err(std::sync::mpsc::RecvTimeoutError::Timeout), + let msg = block_on_timeout(self.inner.next(), timeout) + .map_err(|_| std::sync::mpsc::RecvTimeoutError::Timeout)?; + if let Some(Msg::Task(t)) = msg { + return Ok(Some(t)); } + Ok(None) } } diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index d10645c44fa..c0f75487998 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1487,10 +1487,7 @@ fn test_node_split_during_read_index() { cluster.clear_recv_filter_on_node(2); cluster.clear_recv_filter_on_node(3); - match block_on_timeout( - Box::pin(async { sub.result().await }), - Duration::from_secs(5), - ) { + match block_on_timeout(sub.result(), Duration::from_secs(5)) { Ok(Some(QueryResult::Response(resp))) if resp.get_header().has_error() => {} other => { panic!("{:?}", other); From a3d38e61a657794e86b0baefdd0e6769acf1a065 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Thu, 17 Aug 2023 14:41:00 +0800 Subject: [PATCH 0851/1149] charset: support utf8mb4_0900_ai_ci collation (#15329) close tikv/tikv#15328 Signed-off-by: Yang Keao Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/codec/collation/collator/mod.rs | 31 +- .../data_0400.rs} | 101 +- .../collator/utf8mb4_uca/data_0900.rs | 12337 ++++++++++++++++ .../collation/collator/utf8mb4_uca/mod.rs | 114 + .../src/codec/collation/mod.rs | 1 + .../tidb_query_datatype/src/def/field_type.rs | 4 +- .../tidb_query_expr/src/impl_compare.rs | 14 + components/tidb_query_expr/src/impl_like.rs | 1 + 8 files changed, 12511 insertions(+), 92 deletions(-) rename components/tidb_query_datatype/src/codec/collation/collator/{utf8mb4_unicode_ci.rs => utf8mb4_uca/data_0400.rs} (99%) create mode 100644 components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0900.rs create mode 100644 components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs diff --git a/components/tidb_query_datatype/src/codec/collation/collator/mod.rs b/components/tidb_query_datatype/src/codec/collation/collator/mod.rs index bac55eabea7..20a89090535 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/mod.rs @@ -5,7 +5,7 @@ mod gbk_collation; mod latin1_bin; mod utf8mb4_binary; mod utf8mb4_general_ci; -mod utf8mb4_unicode_ci; +mod utf8mb4_uca; use std::{ cmp::Ordering, @@ -19,7 +19,7 @@ pub use gbk_collation::*; pub use latin1_bin::*; pub use utf8mb4_binary::*; pub use utf8mb4_general_ci::*; -pub use utf8mb4_unicode_ci::*; +pub use utf8mb4_uca::*; use super::{charset::*, Collator}; use crate::codec::Result; @@ -43,6 +43,7 @@ mod tests { (Collation::Latin1Bin, 4), (Collation::GbkBin, 5), (Collation::GbkChineseCi, 6), + (Collation::Utf8Mb40900AiCi, 7), ]; let cases = vec![ // (sa, sb, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, @@ -58,6 +59,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Equal, ], ), ( @@ -71,6 +73,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Less, ], ), ( @@ -84,6 +87,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Equal, + Ordering::Less, ], ), ( @@ -97,6 +101,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -110,6 +115,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -123,6 +129,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Equal, ], ), ( @@ -136,6 +143,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Greater, ], ), ( @@ -149,6 +157,7 @@ mod tests { Ordering::Greater, Ordering::Less, Ordering::Less, + Ordering::Equal, ], ), ( @@ -162,6 +171,7 @@ mod tests { Ordering::Less, Ordering::Greater, Ordering::Greater, + Ordering::Less, ], ), ( @@ -175,6 +185,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ]; @@ -231,6 +242,7 @@ mod tests { (Collation::Latin1Bin, 4), (Collation::GbkBin, 5), (Collation::GbkChineseCi, 6), + (Collation::Utf8Mb40900AiCi, 7), ]; let cases = vec![ // (str, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, Latin1, @@ -245,6 +257,7 @@ mod tests { vec![0x61], vec![0x61], vec![0x41], + vec![0x1C, 0x47], ], ), ( @@ -257,6 +270,7 @@ mod tests { vec![0x41], vec![0x41], vec![0x41], + vec![0x1C, 0x47, 0x2, 0x9], ], ), ( @@ -269,6 +283,7 @@ mod tests { vec![0x41], vec![0x41], vec![0x41], + vec![0x1C, 0x47], ], ), ( @@ -281,6 +296,7 @@ mod tests { vec![0xF0, 0x9F, 0x98, 0x83], vec![0x3F], vec![0x3F], + vec![0x15, 0xFE], ], ), ( @@ -321,6 +337,12 @@ mod tests { 0x46, 0x4f, 0x4f, 0x20, 0x3f, 0x20, 0x42, 0x41, 0x52, 0x20, 0x3f, 0x20, 0x42, 0x41, 0x5a, 0x20, 0x3f, 0x20, 0x51, 0x55, 0x58, ], + vec![ + 0x1C, 0xE5, 0x1D, 0xDD, 0x1D, 0xDD, 0x2, 0x9, 0x5, 0x84, 0x2, 0x9, 0x1C, + 0x60, 0x1C, 0x47, 0x1E, 0x33, 0x2, 0x9, 0xE, 0xF0, 0x2, 0x9, 0x1C, 0x60, + 0x1C, 0x47, 0x1F, 0x21, 0x2, 0x9, 0x9, 0x1B, 0x2, 0x9, 0x1E, 0x21, 0x1E, + 0xB5, 0x1E, 0xFF, + ], ], ), ( @@ -336,6 +358,10 @@ mod tests { vec![0xEF, 0xB7, 0xBB], vec![0x3f], vec![0x3f], + vec![ + 0x23, 0x25, 0x23, 0x9C, 0x2, 0x9, 0x23, 0x25, 0x23, 0x9C, 0x23, 0xB, 0x23, + 0x9C, 0x23, 0xB1, + ], ], ), ( @@ -348,6 +374,7 @@ mod tests { vec![0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87], vec![0xD6, 0xD0, 0xCE, 0xC4], vec![0xD3, 0x21, 0xC1, 0xAD], + vec![0xFB, 0x40, 0xCE, 0x2D, 0xFB, 0x40, 0xE5, 0x87], ], ), ]; diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0400.rs similarity index 99% rename from components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs rename to components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0400.rs index 5a529d48144..b117170a70a 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_unicode_ci.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0400.rs @@ -1,20 +1,22 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use super::*; +// Created from https://www.unicode.org/Public/UCA/4.0.0/allkeys-4.0.0.txt -/// Collator for `utf8mb4_unicode_ci` collation with padding behavior (trims -/// right spaces). -#[derive(Debug)] -pub struct CollatorUtf8Mb4UnicodeCi; +use super::{super::PADDING_SPACE, UnicodeVersion}; -impl Collator for CollatorUtf8Mb4UnicodeCi { - type Charset = CharsetUtf8mb4; - type Weight = u128; +static LONG_RUNE: u64 = 0xFFFD; - const IS_CASE_INSENSITIVE: bool = true; +#[derive(Debug)] +pub struct Unicode0400 {} + +impl UnicodeVersion for Unicode0400 { + #[inline] + fn preprocess(s: &str) -> &str { + s.trim_end_matches(PADDING_SPACE) + } #[inline] - fn char_weight(ch: char) -> Self::Weight { + fn char_weight(ch: char) -> u128 { let r = ch as usize; if r > 0xFFFF { return 0xFFFD; @@ -27,87 +29,8 @@ impl Collator for CollatorUtf8Mb4UnicodeCi { u as u128 } - - #[inline] - fn write_sort_key(writer: &mut W, bstr: &[u8]) -> Result { - let s = str::from_utf8(bstr)?.trim_end_matches(PADDING_SPACE); - let mut n = 0; - for ch in s.chars() { - let mut weight = Self::char_weight(ch); - while weight != 0 { - writer.write_u16_be((weight & 0xFFFF) as u16)?; - n += 1; - weight >>= 16 - } - } - Ok(n * std::mem::size_of::()) - } - - #[inline] - fn sort_compare(a: &[u8], b: &[u8]) -> Result { - let mut ca = str::from_utf8(a)?.trim_end_matches(PADDING_SPACE).chars(); - let mut cb = str::from_utf8(b)?.trim_end_matches(PADDING_SPACE).chars(); - let mut an = 0; - let mut bn = 0; - - loop { - if an == 0 { - for ach in &mut ca { - an = Self::char_weight(ach); - if an != 0 { - break; - } - } - } - - if bn == 0 { - for bch in &mut cb { - bn = Self::char_weight(bch); - if bn != 0 { - break; - } - } - } - - if an == 0 || bn == 0 { - return Ok(an.cmp(&bn)); - } - - if an == bn { - an = 0; - bn = 0; - continue; - } - - while an != 0 && bn != 0 { - if (an ^ bn) & 0xFFFF == 0 { - an >>= 16; - bn >>= 16; - } else { - return Ok((an & 0xFFFF).cmp(&(bn & 0xFFFF))); - } - } - } - } - - #[inline] - fn sort_hash(state: &mut H, bstr: &[u8]) -> Result<()> { - let s = str::from_utf8(bstr)?.trim_end_matches(PADDING_SPACE); - for ch in s.chars() { - let mut weight = Self::char_weight(ch); - while weight != 0 { - (weight & 0xFFFF).hash(state); - weight >>= 16; - } - } - Ok(()) - } } -// Created from https://www.unicode.org/Public/UCA/4.0.0/allkeys-4.0.0.txt - -static LONG_RUNE: u64 = 0xFFFD; - #[inline] fn map_long_rune(r: usize) -> u128 { match r { diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0900.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0900.rs new file mode 100644 index 00000000000..974eb8103c7 --- /dev/null +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/data_0900.rs @@ -0,0 +1,12337 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +// Created from https://www.unicode.org/Public/UCA/4.0.0/allkeys-4.0.0.txt + +use super::UnicodeVersion; + +static LONG_RUNE: u64 = 0xFFFD; + +#[derive(Debug)] +pub struct Unicode0900 {} + +impl UnicodeVersion for Unicode0900 { + #[inline] + fn preprocess(s: &str) -> &str { + s + } + + #[inline] + fn char_weight(ch: char) -> u128 { + let r = ch as usize; + if r > UNICODE_CI_TABLE.len() { + return (r as u128 >> 15) + 0xFBC0 + (((r as u128 & 0x7FFF) | 0x8000) << 16); + } + + let u = UNICODE_CI_TABLE[r]; + if u == LONG_RUNE { + return map_long_rune(r); + } + + u as u128 + } +} + +#[inline] +fn map_long_rune(r: usize) -> u128 { + match r { + 0x321D => 0x000003183CD43C773C013C7B3C000317, + 0x321E => 0x0000000003183C803C073C7B3C000317, + 0x327C => 0x0000000000003C7B3BF53CE03C733C03, + 0x3307 => 0x0000000000003D6E1C0E3D623D673D5E, + 0x3315 => 0x0000000000003D7B3D823D623D863D61, + 0x3316 => 0x000000003D843D6E1C0E3D7C3D863D61, + 0x3317 => 0x0000000000003D6E3D6C3D873D863D61, + 0x3319 => 0x0000000000003D8B3D6E3D7B3D823D62, + 0x331A => 0x0000000000003D863D5B3D683D843D62, + 0x3320 => 0x0000000000003D7B1C0E3D6B3D8B3D65, + 0x332B => 0x0000000000003D6E3D8B3D681C0E3D74, + 0x332E => 0x0000000000003D843D6E3D673D5A3D75, + 0x3332 => 0x0000000000003D6E3D6C3D823D5A3D76, + 0x3334 => 0x0000000000003D843D5E3D663D6C3D76, + 0x3336 => 0x0000000000003D841C0E3D6A3D623D77, + 0x3347 => 0x0000000000003D8B3D813D663D8B3D79, + 0x334A => 0x0000000000003D841C0E3D743D833D7A, + 0x3356 => 0x0000000000003D8B3D633D6E3D8B3D85, + 0x337F => 0xF93EFB40CF1AFB40DF0FFB40E82AFB40, + 0x33AE => 0x0000000000001E7106251C8F1C471E33, + 0x33AF => 0x000000001C3F1E7106251C8F1C471E33, + 0xFDFA => 0x23B1239C239C230B020923C5239C2364, + 0xFDFB => 0x23B1239C230B239C23250209239C2325, + 0xFFFD => 0x0000000000000000000000000000FFFD, + 0x1F19C => 0x00001E331C7A1E7102091C8F1DB91C3F, + 0x1F1A8 => 0x000000001E711CAA1E3302091D321D18, + 0x1F1A9 => 0x1E711E711CAA1D771E711E711DDD1D77, + + _ => 0xFFFD, + } +} + +#[rustfmt::skip] +static UNICODE_CI_TABLE: [u64; 0x2CEA1] = [ + + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x201, 0x202, 0x203, 0x204, 0x205, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x209, 0x260, 0x30C, 0x398, 0x1C12, 0x399, 0x396, 0x305, 0x317, 0x318, 0x38F, 0x616, 0x222, + 0x20D, 0x277, 0x394, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x239, 0x234, + 0x61A, 0x61B, 0x61C, 0x266, 0x38E, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, + 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, + 0x1F21, 0x319, 0x395, 0x31A, 0x485, 0x20B, 0x482, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, + 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, + 0x1EFF, 0x1F0B, 0x1F21, 0x31B, 0x61E, 0x31C, 0x620, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x206, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x209, 0x261, 0x1C11, 0x1C13, 0x1C10, + 0x1C14, 0x61F, 0x389, 0x489, 0x584, 0x1C47, 0x315, 0x61D, 0x0, 0x585, 0x486, 0x4F6, 0x617, 0x1C3F, 0x1C40, + 0x483, 0x1FCB, 0x38B, 0x28B, 0x48C, 0x1C3E, 0x1DDD, 0x316, 0x1C4106261C3E, 0x1C3F06261C3E, 0x1C4106261C40, 0x267, 0x1C47, 0x1C47, 0x1C47, + 0x1C47, 0x1C47, 0x1C47, 0x1CAA1C47, 0x1C7A, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1D32, 0x1D32, 0x1D32, 0x1D32, 0x1C8F, 0x1DB9, + 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x619, 0x1DDD, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1F0B, 0x1F50, 0x1E711E71, 0x1C47, + 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1CAA1C47, 0x1C7A, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1D32, 0x1D32, 0x1D32, 0x1D32, + 0x1C8F, 0x1DB9, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x618, 0x1DDD, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1F0B, 0x1F50, + 0x1F0B, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C7A, 0x1C7A, 0x1C7A, 0x1C7A, 0x1C7A, 0x1C7A, 0x1C7A, 0x1C7A, + 0x1C8F, 0x1C8F, 0x1C8F, 0x1C8F, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CF4, + 0x1CF4, 0x1CF4, 0x1CF4, 0x1CF4, 0x1CF4, 0x1CF4, 0x1CF4, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D32, 0x1D32, 0x1D32, 0x1D32, + 0x1D32, 0x1D32, 0x1D32, 0x1D32, 0x1D32, 0x1D36, 0x1D4C1D32, 0x1D4C1D32, 0x1D4C, 0x1D4C, 0x1D65, 0x1D65, 0x1E2F, 0x1D77, 0x1D77, + 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB91F7E, + 0x1DD8, 0x1DD8, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1CAA1DDD, 0x1CAA1DDD, 0x1E33, 0x1E33, 0x1E33, 0x1E33, 0x1E33, + 0x1E33, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E95, 0x1E95, 0x1E95, 0x1E95, 0x1E9A, 0x1E9A, + 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EF5, 0x1EF5, 0x1F0B, + 0x1F0B, 0x1F0B, 0x1F21, 0x1F21, 0x1F21, 0x1F21, 0x1F21, 0x1F21, 0x1E71, 0x1C68, 0x1C71, 0x1C75, 0x1C75, 0x1F71, 0x1F71, + 0x1DF0, 0x1C85, 0x1C85, 0x1C97, 0x1C9B, 0x1CA0, 0x1CA0, 0x1EF51F21, 0x1CB8, 0x1CBD, 0x1CC2, 0x1CEE, 0x1CEE, 0x1D06, 0x1D10, + 0x1D20, 0x1D47, 0x1D41, 0x1D6B, 0x1D6B, 0x1D82, 0x1DA2, 0x1ED4, 0x1DC4, 0x1DC8, 0x1DFD, 0x1DDD, 0x1DDD, 0x1D14, 0x1D14, + 0x1E15, 0x1E15, 0x1E38, 0x1F69, 0x1F69, 0x1E82, 0x1E88, 0x1EA0, 0x1EA4, 0x1EA4, 0x1EA8, 0x1EB5, 0x1EB5, 0x1EDE, 0x1EEA, + 0x1F17, 0x1F17, 0x1F26, 0x1F26, 0x1F3E, 0x1F43, 0x1F43, 0x1F48, 0x1F62, 0x1F6D, 0x1F6D, 0x1E711E95, 0x1F56, 0x1F99, 0x1F9D, + 0x1FA1, 0x1FA5, 0x1F211C8F, 0x1F211C8F, 0x1F211C8F, 0x1D4C1D77, 0x1D4C1D77, 0x1D4C1D77, 0x1D4C1DB9, 0x1D4C1DB9, 0x1D4C1DB9, 0x1C47, 0x1C47, 0x1D32, 0x1D32, + 0x1DDD, 0x1DDD, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1CB8, 0x1C47, 0x1C47, + 0x1C47, 0x1C47, 0x1CAA1C47, 0x1CAA1C47, 0x1D01, 0x1D01, 0x1CF4, 0x1CF4, 0x1D65, 0x1D65, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1F3E, + 0x1F3E, 0x1D4C, 0x1F211C8F, 0x1F211C8F, 0x1F211C8F, 0x1CF4, 0x1CF4, 0x1D20, 0x1F56, 0x1DB9, 0x1DB9, 0x1C47, 0x1C47, 0x1CAA1C47, 0x1CAA1C47, + 0x1DDD, 0x1DDD, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1D32, 0x1D32, 0x1D32, 0x1D32, 0x1DDD, + 0x1DDD, 0x1DDD, 0x1DDD, 0x1E33, 0x1E33, 0x1E33, 0x1E33, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1E71, 0x1E71, 0x1E95, 0x1E95, + 0x1F1D, 0x1F1D, 0x1D18, 0x1D18, 0x1DC8, 0x1CA4, 0x1E07, 0x1E07, 0x1F2C, 0x1F2C, 0x1C47, 0x1C47, 0x1CAA, 0x1CAA, 0x1DDD, + 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1F0B, 0x1F0B, 0x1D98, 0x1DD2, 0x1EAC, 0x1D50, 0x1C601C8F, 0x1E0C1E21, + 0x1C4C, 0x1C7F, 0x1C7F, 0x1D82, 0x1E9E, 0x1E7C, 0x1F38, 0x1F79, 0x1F79, 0x1C68, 0x1EC0, 0x1EF1, 0x1CB1, 0x1CB1, 0x1D55, + 0x1D55, 0x1E2B, 0x1E2B, 0x1E3F, 0x1E3F, 0x1F13, 0x1F13, 0x1C51, 0x1C55, 0x1C5B, 0x1C71, 0x1DF0, 0x1C89, 0x1C97, 0x1C9B, + 0x1CC7, 0x1CBD, 0x1CCB, 0x1CC2, 0x1CCF, 0x1CD5, 0x1CD9, 0x1D5D, 0x1D06, 0x1CF8, 0x1CFD, 0x1D10, 0x1CE1, 0x1EC8, 0x1D25, + 0x1D2C, 0x1D41, 0x1D47, 0x1D3A, 0x1D87, 0x1D8D, 0x1D93, 0x1D9D, 0x1ED4, 0x1EDA, 0x1DB1, 0x1DC4, 0x1DCE, 0x1DBD, 0x1DFD, + 0x1DE4, 0x1E02, 0x1E1C, 0x1E44, 0x1E49, 0x1E4E, 0x1E53, 0x1E57, 0x1E5C, 0x1E61, 0x1E38, 0x1E6A, 0x1E78, 0x1E82, 0x1D61, + 0x1E8C, 0x1E91, 0x1EB1, 0x1EA8, 0x1EC0, 0x1EDE, 0x1EEA, 0x1EF1, 0x1EFB, 0x1DA6, 0x1F0F, 0x1F30, 0x1F34, 0x1F3E, 0x1F4C, + 0x1F75, 0x1F84, 0x1F95, 0x1FA9, 0x1FAD, 0x1C64, 0x1CDD, 0x1D0A, 0x1D1C, 0x1D59, 0x1D73, 0x1D7B, 0x1E27, 0x1F8D, 0x1F91, + 0x1F211C8F, 0x1F3E1C8F, 0x1F341C8F, 0x1E711E95, 0x1E821E95, 0x1C891E95, 0x1DD81CE5, 0x1E711D77, 0x1F211D77, 0x1FB1, 0x1FB5, 0x1ECC, 0x1ED0, 0x1D18, 0x1D25, + 0x1D4C, 0x1E33, 0x1E44, 0x1E4E, 0x1E6A, 0x1EF5, 0x1F0B, 0x493, 0x495, 0x1D30, 0x1F7E, 0x1D31, 0x1F80, 0x1F88, 0x1F7D, + 0x1F89, 0x496, 0x497, 0x498, 0x499, 0x49A, 0x49B, 0x49C, 0x49D, 0x49E, 0x49F, 0x4A0, 0x4A1, 0x4A2, 0x4A3, + 0x1BF8, 0x1BF9, 0x4A4, 0x4A5, 0x4A6, 0x4A7, 0x4A8, 0x4A9, 0x487, 0x488, 0x48A, 0x48D, 0x484, 0x48B, 0x4AA, + 0x4AB, 0x1D10, 0x1D77, 0x1E71, 0x1EFF, 0x1F84, 0x4AC, 0x4AD, 0x4AE, 0x4AF, 0x4B0, 0x4B1, 0x4B2, 0x4B3, 0x4B4, + 0x1F7F, 0x4B5, 0x4B6, 0x4B7, 0x4B8, 0x4B9, 0x4BA, 0x4BB, 0x4BC, 0x4BD, 0x4BE, 0x4BF, 0x4C0, 0x4C1, 0x4C2, + 0x4C3, 0x4C4, 0x4C5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1C47, 0x1CAA, 0x1D32, + 0x1DDD, 0x1EB5, 0x1C7A, 0x1C8F, 0x1D18, 0x1DAA, 0x1E33, 0x1E95, 0x1EE3, 0x1EFF, 0x1FC3, 0x1FC3, 0x1FE4, 0x1FE4, 0x493, + 0x494, 0x1FC0, 0x1FC0, 0x8378FBC0, 0x8379FBC0, 0x1FC6, 0x1FD9, 0x1FD8, 0x1FDA, 0x234, 0x1FC7, 0x8380FBC0, 0x8381FBC0, 0x8382FBC0, 0x8383FBC0, + 0x483, 0x489, 0x1FB9, 0x28B, 0x1FBE, 0x1FC4, 0x1FC6, 0x838BFBC0, 0x1FCE, 0x838DFBC0, 0x1FDC, 0x1FE1, 0x1FC6, 0x1FB9, 0x1FBA, + 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, + 0x83A2FBC0, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x1FC6, 0x1FDC, 0x1FB9, 0x1FBE, 0x1FC4, 0x1FC6, 0x1FDC, + 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, + 0x1FCF, 0x1FD4, 0x1FD7, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x1FC6, 0x1FDC, 0x1FCE, 0x1FDC, 0x1FE1, + 0x1FC61FB91FC8, 0x1FBA, 0x1FC5, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDD, 0x1FCF, 0x1FC61FB91FC8, 0x1FD3, 0x1FD3, 0x1FC1, 0x1FC1, 0x1FBF, 0x1FBF, + 0x1FD2, 0x1FD2, 0x1FE3, 0x1FE3, 0x2005, 0x2005, 0x200A, 0x200A, 0x200B, 0x200B, 0x200E, 0x200E, 0x2015, 0x2015, 0x2018, + 0x2018, 0x201C, 0x201C, 0x1FC8, 0x1FD4, 0x1FD7, 0x1FC7, 0x1FC5, 0x1FBE, 0x611, 0x1FE5, 0x1FE5, 0x1FD7, 0x1FD1, 0x1FD1, + 0x1FD6, 0x1FD9, 0x1FD8, 0x1FDA, 0x205A, 0x205A, 0x2050, 0x2036, 0x205E, 0x2074, 0x2088, 0x2088, 0x2091, 0x20BC, 0x20E2, + 0x2119, 0x2096, 0x2080, 0x211D, 0x217F, 0x2022, 0x202E, 0x2032, 0x2036, 0x204A, 0x205A, 0x2062, 0x206C, 0x2080, 0x208D, + 0x2096, 0x20B0, 0x20C3, 0x20CC, 0x20E7, 0x20EF, 0x20FC, 0x2105, 0x210E, 0x211D, 0x212E, 0x2132, 0x2159, 0x2164, 0x2183, + 0x2188, 0x218F, 0x2194, 0x2198, 0x21A5, 0x21A9, 0x21AF, 0x2022, 0x202E, 0x2032, 0x2036, 0x204A, 0x205A, 0x2062, 0x206C, + 0x2080, 0x208D, 0x2096, 0x20B0, 0x20C3, 0x20CC, 0x20E7, 0x20EF, 0x20FC, 0x2105, 0x210E, 0x211D, 0x212E, 0x2132, 0x2159, + 0x2164, 0x2183, 0x2188, 0x218F, 0x2194, 0x2198, 0x21A5, 0x21A9, 0x21AF, 0x205A, 0x205A, 0x2050, 0x2036, 0x205E, 0x2074, + 0x2088, 0x2088, 0x2091, 0x20BC, 0x20E2, 0x2119, 0x2096, 0x2080, 0x211D, 0x217F, 0x2148, 0x2148, 0x21A0, 0x21A0, 0x21B4, + 0x21B4, 0x21B8, 0x21B8, 0x21C2, 0x21C2, 0x21BD, 0x21BD, 0x21C7, 0x21C7, 0x21CB, 0x21CB, 0x21CF, 0x21CF, 0x21D3, 0x21D3, + 0x21D7, 0x21D7, 0x21D7, 0x21D7, 0x212A, 0x212A, 0x2155, 0x2155, 0x2151, 0x2151, 0x214C, 0x214C, 0x20F8, 0x20F8, 0x4F7, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2084, 0x2084, 0x219C, 0x219C, 0x2100, 0x2100, 0x2036, 0x2036, + 0x203A, 0x203A, 0x2042, 0x2042, 0x2068, 0x2068, 0x2056, 0x2056, 0x209A, 0x209A, 0x20AA, 0x20AA, 0x20A6, 0x20A6, 0x20A2, + 0x20A2, 0x20D5, 0x20D5, 0x20DE, 0x20DE, 0x20F4, 0x20F4, 0x21DC, 0x21DC, 0x210A, 0x210A, 0x2114, 0x2114, 0x2121, 0x2121, + 0x2125, 0x2125, 0x213E, 0x213E, 0x215F, 0x215F, 0x216A, 0x216A, 0x2172, 0x2172, 0x2142, 0x2142, 0x2177, 0x2177, 0x217B, + 0x217B, 0x21E1, 0x2062, 0x2062, 0x209E, 0x209E, 0x20B5, 0x20B5, 0x20D9, 0x20D9, 0x20D1, 0x20D1, 0x216E, 0x216E, 0x20C7, + 0x20C7, 0x21E1, 0x2022, 0x2022, 0x2022, 0x2022, 0x202A, 0x202A, 0x205A, 0x205A, 0x2026, 0x2026, 0x2026, 0x2026, 0x2062, + 0x2062, 0x206C, 0x206C, 0x2079, 0x2079, 0x2080, 0x2080, 0x2080, 0x2080, 0x20E7, 0x20E7, 0x20EB, 0x20EB, 0x20EB, 0x20EB, + 0x21A5, 0x21A5, 0x211D, 0x211D, 0x211D, 0x211D, 0x211D, 0x211D, 0x2164, 0x2164, 0x2046, 0x2046, 0x2194, 0x2194, 0x203E, + 0x203E, 0x2136, 0x2136, 0x213A, 0x213A, 0x204E, 0x204E, 0x2055, 0x2055, 0x2071, 0x2071, 0x207E, 0x207E, 0x20C1, 0x20C1, + 0x20E6, 0x20E6, 0x2109, 0x2109, 0x2113, 0x2113, 0x2072, 0x2072, 0x20BA, 0x20BA, 0x20C2, 0x20C2, 0x2104, 0x2104, 0x21B3, + 0x21B3, 0x20AF, 0x20AF, 0x21E0, 0x21E0, 0x20AE, 0x20AE, 0x20BB, 0x20BB, 0x20DD, 0x20DD, 0x20F3, 0x20F3, 0x2146, 0x2146, + 0x20D0, 0x20D0, 0x2066, 0x2066, 0x2168, 0x2168, 0x20B9, 0x20B9, 0x8530FBC0, 0x2290, 0x2291, 0x2292, 0x2293, 0x2294, 0x2295, + 0x2296, 0x2297, 0x2298, 0x2299, 0x229A, 0x229B, 0x229C, 0x229D, 0x229E, 0x229F, 0x22A0, 0x22A1, 0x22A2, 0x22A3, 0x22A4, + 0x22A5, 0x22A6, 0x22A7, 0x22A8, 0x22A9, 0x22AA, 0x22AB, 0x22AC, 0x22AD, 0x22AE, 0x22AF, 0x22B0, 0x22B1, 0x22B2, 0x22B3, + 0x22B4, 0x22B5, 0x8557FBC0, 0x8558FBC0, 0x22B6, 0x3CB, 0x3CC, 0x262, 0x226, 0x269, 0x3CD, 0x8560FBC0, 0x2290, 0x2291, 0x2292, + 0x2293, 0x2294, 0x2295, 0x2296, 0x2297, 0x2298, 0x2299, 0x229A, 0x229B, 0x229C, 0x229D, 0x229E, 0x229F, 0x22A0, 0x22A1, + 0x22A2, 0x22A3, 0x22A4, 0x22A5, 0x22A6, 0x22A7, 0x22A8, 0x22A9, 0x22AA, 0x22AB, 0x22AC, 0x22AD, 0x22AE, 0x22AF, 0x22B0, + 0x22B1, 0x22B2, 0x22B3, 0x22B4, 0x22B5, 0x22B12294, 0x8588FBC0, 0x23A, 0x20E, 0x858BFBC0, 0x858CFBC0, 0x4F8, 0x4F9, 0x1C15, 0x8590FBC0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x3CE, 0x0, 0x3CF, 0x0, 0x0, 0x3D0, 0x0, 0x0, 0x3D1, 0x0, 0x85C8FBC0, 0x85C9FBC0, 0x85CAFBC0, 0x85CBFBC0, 0x85CCFBC0, + 0x85CDFBC0, 0x85CEFBC0, 0x85CFFBC0, 0x22B7, 0x22B8, 0x22B9, 0x22BA, 0x22BB, 0x22BC, 0x22BD, 0x22BE, 0x22BF, 0x22C0, 0x22C1, 0x22C1, + 0x22C2, 0x22C3, 0x22C3, 0x22C4, 0x22C4, 0x22C5, 0x22C6, 0x22C7, 0x22C7, 0x22C8, 0x22C8, 0x22C9, 0x22CA, 0x22CB, 0x22CC, + 0x85EBFBC0, 0x85ECFBC0, 0x85EDFBC0, 0x85EEFBC0, 0x85EFFBC0, 0x22BC22BC, 0x22C022BC, 0x22C022C0, 0x3D2, 0x3D3, 0x85F5FBC0, 0x85F6FBC0, 0x85F7FBC0, 0x85F8FBC0, 0x85F9FBC0, + 0x85FAFBC0, 0x85FBFBC0, 0x85FCFBC0, 0x85FDFBC0, 0x85FEFBC0, 0x85FFFBC0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x62D, 0x62F, 0x4FA, + 0x39C, 0x39E, 0x1C16, 0x227, 0x228, 0x4FD, 0x4FE, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x235, 0x0, 0x861DFBC0, 0x23B, 0x26A, 0x23CF, 0x22FD, 0x22FE, 0x22FF, 0x2302, 0x2303, 0x2307, + 0x230B, 0x230D, 0x231C, 0x231D, 0x231E, 0x2325, 0x232C, 0x232D, 0x2337, 0x2338, 0x2346, 0x2347, 0x2359, 0x235A, 0x2364, + 0x2365, 0x236A, 0x236B, 0x236E, 0x236F, 0x2398, 0x2399, 0x23CC, 0x23CD, 0x23CE, 0x0, 0x2376, 0x2382, 0x2387, 0x239C, + 0x23A3, 0x23A7, 0x23B1, 0x23B7, 0x23C5, 0x23C6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x39A, 0x229, 0x22A, 0x392, 0x230C, 0x2381, 0x0, 0x2301, + 0x2300, 0x2304, 0x22FD, 0x22FD230B, 0x22FD23B7, 0x22FD23BB, 0x22FD23C6, 0x231F, 0x2320, 0x230E, 0x2321, 0x2322, 0x230F, 0x2323, 0x2310, + 0x232E, 0x232F, 0x2326, 0x2327, 0x2330, 0x2328, 0x232A, 0x2339, 0x233A, 0x233B, 0x233C, 0x233D, 0x233E, 0x2340, 0x2341, + 0x2342, 0x2348, 0x2349, 0x234A, 0x234B, 0x234C, 0x234D, 0x234E, 0x234F, 0x2350, 0x235B, 0x235C, 0x235D, 0x2366, 0x2368, + 0x236C, 0x2370, 0x2377, 0x2378, 0x237A, 0x237B, 0x237D, 0x237E, 0x2383, 0x2385, 0x2388, 0x2389, 0x238A, 0x238B, 0x238D, + 0x238E, 0x2390, 0x2392, 0x2393, 0x2394, 0x2395, 0x2396, 0x239D, 0x239E, 0x239F, 0x23A0, 0x23AD, 0x23A8, 0x23AA, 0x23AB, + 0x23AC, 0x23B2, 0x2329, 0x23B6, 0x23B3, 0x23B3, 0x23B4, 0x23B8, 0x23B9, 0x23BA, 0x23BB, 0x23BC, 0x23BD, 0x23BE, 0x23BF, + 0x23C7, 0x23C8, 0x23C9, 0x23C1, 0x23CA, 0x23CB, 0x23D4, 0x23D4, 0x279, 0x23B6, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x4FF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x23B7, 0x23C6, 0x0, 0x0, 0x500, + 0x0, 0x0, 0x0, 0x0, 0x2343, 0x2351, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, + 0x1C46, 0x235E, 0x2369, 0x2371, 0x22FD, 0x23A3, 0x23B5, 0x2C4, 0x27A, 0x27B, 0x23C, 0x23D, 0x23E, 0x23F, 0x240, + 0x241, 0x26B, 0x3D4, 0x3D5, 0x3D6, 0x3D7, 0x870EFBC0, 0x0, 0x23D7, 0x0, 0x23D8, 0x23D9, 0x23D9, 0x23DB, 0x23DA, + 0x23DC, 0x23DD, 0x23DE, 0x23E0, 0x23E1, 0x23E1, 0x23E2, 0x23E3, 0x23E4, 0x23E6, 0x23E7, 0x23E8, 0x23E9, 0x23E9, 0x23EA, + 0x23EB, 0x23EB, 0x23ED, 0x23EE, 0x23EF, 0x23F0, 0x23F1, 0x23D8, 0x23D9, 0x23DB, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x874BFBC0, 0x874CFBC0, 0x23DF, 0x23E5, 0x23EC, 0x2311, 0x2312, 0x2313, + 0x2314, 0x2315, 0x2316, 0x2318, 0x2331, 0x2332, 0x2344, 0x2345, 0x2352, 0x235F, 0x2372, 0x2373, 0x2374, 0x237F, 0x2380, + 0x2397, 0x239A, 0x239B, 0x23A4, 0x23A5, 0x23AE, 0x23AF, 0x23B0, 0x23A1, 0x2353, 0x2354, 0x2360, 0x2333, 0x2334, 0x2361, + 0x2355, 0x2335, 0x2305, 0x2306, 0x23D0, 0x23D1, 0x23D2, 0x23C2, 0x23C3, 0x23D5, 0x23D6, 0x2336, 0x2362, 0x2363, 0x238C, + 0x240B, 0x240E, 0x240F, 0x2410, 0x2412, 0x2413, 0x2414, 0x2415, 0x2418, 0x241A, 0x241B, 0x241C, 0x241E, 0x2422, 0x2423, + 0x2425, 0x2426, 0x242A, 0x242B, 0x242C, 0x242D, 0x242E, 0x242F, 0x2430, 0x241F, 0x240C, 0x240D, 0x241D, 0x2411, 0x2427, + 0x2428, 0x2429, 0x2420, 0x2421, 0x2416, 0x2417, 0x2424, 0x2419, 0x2432, 0x2433, 0x2434, 0x2435, 0x2436, 0x2437, 0x2438, + 0x2439, 0x243A, 0x243B, 0x243C, 0x2431, 0x87B2FBC0, 0x87B3FBC0, 0x87B4FBC0, 0x87B5FBC0, 0x87B6FBC0, 0x87B7FBC0, 0x87B8FBC0, 0x87B9FBC0, 0x87BAFBC0, 0x87BBFBC0, + 0x87BCFBC0, 0x87BDFBC0, 0x87BEFBC0, 0x87BFFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x243D, + 0x243E, 0x243F, 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447, 0x2448, 0x2449, 0x244A, 0x244B, 0x244C, + 0x244D, 0x244E, 0x244F, 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457, 0x2458, 0x2459, 0x245A, 0x2449, + 0x244A, 0x244C, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x245B, 0x245C, 0x512, 0x2C5, + 0x22B, 0x263, 0x0, 0x87FBFBC0, 0x87FCFBC0, 0x87FDFBC0, 0x87FEFBC0, 0x87FFFBC0, 0x22E3, 0x22E4, 0x22E5, 0x22E6, 0x22E7, 0x22E8, 0x22E9, + 0x22EA, 0x22EB, 0x22EC, 0x22ED, 0x22EE, 0x22EF, 0x22F0, 0x22F1, 0x22F2, 0x22F3, 0x22F4, 0x22F5, 0x22F6, 0x22F7, 0x22F8, + 0x22F9, 0x22FA, 0x0, 0x0, 0x22FB, 0x22FC, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x882EFBC0, 0x882FFBC0, 0x242, 0x243, 0x244, 0x245, + 0x246, 0x247, 0x248, 0x249, 0x24A, 0x24B, 0x24C, 0x24D, 0x24E, 0x24F, 0x250, 0x883FFBC0, 0x23F2, 0x23F3, 0x23F4, + 0x23F5, 0x23F6, 0x23F7, 0x23F8, 0x23F9, 0x23FA, 0x23FB, 0x23FC, 0x23FD, 0x23FE, 0x23FF, 0x2400, 0x2401, 0x2402, 0x2403, + 0x2404, 0x2405, 0x2406, 0x2407, 0x2408, 0x2409, 0x240A, 0x0, 0x0, 0x0, 0x885CFBC0, 0x885DFBC0, 0x3D8, 0x885FFBC0, 0x8860FBC0, + 0x8861FBC0, 0x8862FBC0, 0x8863FBC0, 0x8864FBC0, 0x8865FBC0, 0x8866FBC0, 0x8867FBC0, 0x8868FBC0, 0x8869FBC0, 0x886AFBC0, 0x886BFBC0, 0x886CFBC0, 0x886DFBC0, 0x886EFBC0, 0x886FFBC0, + 0x8870FBC0, 0x8871FBC0, 0x8872FBC0, 0x8873FBC0, 0x8874FBC0, 0x8875FBC0, 0x8876FBC0, 0x8877FBC0, 0x8878FBC0, 0x8879FBC0, 0x887AFBC0, 0x887BFBC0, 0x887CFBC0, 0x887DFBC0, 0x887EFBC0, + 0x887FFBC0, 0x8880FBC0, 0x8881FBC0, 0x8882FBC0, 0x8883FBC0, 0x8884FBC0, 0x8885FBC0, 0x8886FBC0, 0x8887FBC0, 0x8888FBC0, 0x8889FBC0, 0x888AFBC0, 0x888BFBC0, 0x888CFBC0, 0x888DFBC0, + 0x888EFBC0, 0x888FFBC0, 0x8890FBC0, 0x8891FBC0, 0x8892FBC0, 0x8893FBC0, 0x8894FBC0, 0x8895FBC0, 0x8896FBC0, 0x8897FBC0, 0x8898FBC0, 0x8899FBC0, 0x889AFBC0, 0x889BFBC0, 0x889CFBC0, + 0x889DFBC0, 0x889EFBC0, 0x889FFBC0, 0x2317, 0x2319, 0x232B, 0x236D, 0x237C, 0x2386, 0x23A2, 0x23A6, 0x2308, 0x2309, 0x2356, 0x23C4, + 0x230A, 0x230B, 0x233F, 0x2367, 0x2391, 0x23C0, 0x2357, 0x2375, 0x238F, 0x88B5FBC0, 0x231A, 0x231B, 0x2324, 0x2358, 0x23D3, + 0x2379, 0x2384, 0x23A9, 0x88BEFBC0, 0x88BFFBC0, 0x88C0FBC0, 0x88C1FBC0, 0x88C2FBC0, 0x88C3FBC0, 0x88C4FBC0, 0x88C5FBC0, 0x88C6FBC0, 0x88C7FBC0, 0x88C8FBC0, 0x88C9FBC0, + 0x88CAFBC0, 0x88CBFBC0, 0x88CCFBC0, 0x88CDFBC0, 0x88CEFBC0, 0x88CFFBC0, 0x88D0FBC0, 0x88D1FBC0, 0x88D2FBC0, 0x88D3FBC0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x265E, 0x265F, + 0x2660, 0x2666, 0x2667, 0x2668, 0x2669, 0x266A, 0x266C, 0x266E, 0x266F, 0x2670, 0x2671, 0x2672, 0x2673, 0x2674, 0x2675, + 0x2676, 0x2677, 0x2678, 0x267A, 0x267B, 0x267C, 0x267D, 0x267E, 0x2681, 0x2682, 0x2683, 0x2684, 0x2686, 0x2688, 0x2689, + 0x268A, 0x268B, 0x268C, 0x268D, 0x268E, 0x268E, 0x268F, 0x2690, 0x2691, 0x2693, 0x2694, 0x2695, 0x2697, 0x2697, 0x2698, + 0x2699, 0x2699, 0x269A, 0x269B, 0x269C, 0x269D, 0x269E, 0x26A7, 0x26A8, 0x0, 0x269F, 0x26A6, 0x26AC, 0x26AD, 0x26AE, + 0x26AF, 0x26B0, 0x26B1, 0x26B4, 0x26B6, 0x26B7, 0x26B9, 0x26BA, 0x26BB, 0x26BC, 0x26BD, 0x26BE, 0x26B8, 0x26A9, 0x265B, + 0x0, 0x0, 0x0, 0x0, 0x26B5, 0x26AA, 0x26AB, 0x2676, 0x2677, 0x2678, 0x267E, 0x2686, 0x2688, 0x2690, 0x2695, + 0x266B, 0x266D, 0x26B2, 0x26B3, 0x28E, 0x28F, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, + 0x1C46, 0x3E7, 0x1BFA, 0x265D, 0x2661, 0x2662, 0x2663, 0x2664, 0x2665, 0x2685, 0x267F, 0x2696, 0x2679, 0x2680, 0x26A0, + 0x2687, 0x2692, 0x26BF, 0x0, 0x0, 0x0, 0x8984FBC0, 0x26C0, 0x26C1, 0x26C2, 0x26C3, 0x26C4, 0x26C5, 0x26C6, 0x26C8, + 0x898DFBC0, 0x898EFBC0, 0x26CA, 0x26CB, 0x8991FBC0, 0x8992FBC0, 0x26CC, 0x26CD, 0x26CE, 0x26CF, 0x26D0, 0x26D1, 0x26D2, 0x26D3, 0x26D4, + 0x26D5, 0x26D6, 0x26D7, 0x26D8, 0x26D9, 0x26DA, 0x26DB, 0x26DC, 0x26DD, 0x26DE, 0x26DF, 0x26E0, 0x26E1, 0x89A9FBC0, 0x26E2, + 0x26E3, 0x26E4, 0x26E5, 0x26E6, 0x26E7, 0x26E8, 0x89B1FBC0, 0x26EA, 0x89B3FBC0, 0x89B4FBC0, 0x89B5FBC0, 0x26EC, 0x26ED, 0x26EE, 0x26EF, + 0x89BAFBC0, 0x89BBFBC0, 0x0, 0x26F0, 0x26F1, 0x26F2, 0x26F3, 0x26F4, 0x26F5, 0x26F6, 0x26F7, 0x89C5FBC0, 0x89C6FBC0, 0x26FA, 0x26FB, + 0x89C9FBC0, 0x89CAFBC0, 0x26FC, 0x26FD, 0x26FE, 0x26FE26DD, 0x89CFFBC0, 0x89D0FBC0, 0x89D1FBC0, 0x89D2FBC0, 0x89D3FBC0, 0x89D4FBC0, 0x89D5FBC0, 0x89D6FBC0, 0x26FF, + 0x89D8FBC0, 0x89D9FBC0, 0x89DAFBC0, 0x89DBFBC0, 0x26DA, 0x26DB, 0x89DEFBC0, 0x26E7, 0x26C7, 0x26C9, 0x26F8, 0x26F9, 0x89E4FBC0, 0x89E5FBC0, 0x1C3D, + 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x26E9, 0x26EB, 0x1C17, 0x1C18, 0x1A96, 0x1A97, + 0x1A98, 0x1A99, 0x1A9A, 0x1A9B, 0x513, 0x1C19, 0x89FCFBC0, 0x89FDFBC0, 0x89FEFBC0, 0x89FFFBC0, 0x8A00FBC0, 0x0, 0x0, 0x0, 0x8A04FBC0, + 0x2705, 0x2706, 0x270A, 0x270B, 0x2702, 0x2703, 0x8A0BFBC0, 0x8A0CFBC0, 0x8A0DFBC0, 0x8A0EFBC0, 0x270C, 0x2707, 0x8A11FBC0, 0x8A12FBC0, 0x2704, + 0x2708, 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 0x271A, 0x271B, 0x271C, 0x271D, + 0x271E, 0x271F, 0x2720, 0x2721, 0x2722, 0x2723, 0x8A29FBC0, 0x2724, 0x2725, 0x2726, 0x2727, 0x2728, 0x2729, 0x272B, 0x8A31FBC0, + 0x272C, 0x272C, 0x8A34FBC0, 0x272D, 0x270D, 0x8A37FBC0, 0x270D, 0x270E, 0x8A3AFBC0, 0x8A3BFBC0, 0x0, 0x8A3DFBC0, 0x272F, 0x2730, 0x2731, + 0x2732, 0x2733, 0x8A43FBC0, 0x8A44FBC0, 0x8A45FBC0, 0x8A46FBC0, 0x2734, 0x2735, 0x8A49FBC0, 0x8A4AFBC0, 0x2736, 0x2737, 0x2738, 0x8A4EFBC0, 0x8A4FFBC0, + 0x8A50FBC0, 0x270F, 0x8A52FBC0, 0x8A53FBC0, 0x8A54FBC0, 0x8A55FBC0, 0x8A56FBC0, 0x8A57FBC0, 0x8A58FBC0, 0x2711, 0x2712, 0x2717, 0x272E, 0x8A5DFBC0, 0x2725, + 0x8A5FFBC0, 0x8A60FBC0, 0x8A61FBC0, 0x8A62FBC0, 0x8A63FBC0, 0x8A64FBC0, 0x8A65FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, + 0x1C45, 0x1C46, 0x0, 0x0, 0x2709, 0x2701, 0x2700, 0x272A, 0x8A76FBC0, 0x8A77FBC0, 0x8A78FBC0, 0x8A79FBC0, 0x8A7AFBC0, 0x8A7BFBC0, 0x8A7CFBC0, + 0x8A7DFBC0, 0x8A7EFBC0, 0x8A7FFBC0, 0x8A80FBC0, 0x0, 0x0, 0x0, 0x8A84FBC0, 0x273A, 0x273B, 0x273C, 0x273D, 0x273E, 0x273F, 0x2740, + 0x2742, 0x2744, 0x8A8EFBC0, 0x2745, 0x2746, 0x2747, 0x8A92FBC0, 0x2748, 0x2749, 0x274A, 0x274B, 0x274C, 0x274D, 0x274E, 0x274F, + 0x2750, 0x2751, 0x2753, 0x2754, 0x2755, 0x2756, 0x2757, 0x2758, 0x2759, 0x275A, 0x275B, 0x275C, 0x275D, 0x275E, 0x8AA9FBC0, + 0x275F, 0x2760, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x8AB1FBC0, 0x2766, 0x276C, 0x8AB4FBC0, 0x2767, 0x2768, 0x2769, 0x276A, + 0x276B, 0x8ABAFBC0, 0x8ABBFBC0, 0x0, 0x276D, 0x276E, 0x276F, 0x2770, 0x2771, 0x2772, 0x2773, 0x2774, 0x2777, 0x8AC6FBC0, 0x2778, + 0x2779, 0x277A, 0x8ACAFBC0, 0x277B, 0x277C, 0x277D, 0x8ACEFBC0, 0x8ACFFBC0, 0x2739, 0x8AD1FBC0, 0x8AD2FBC0, 0x8AD3FBC0, 0x8AD4FBC0, 0x8AD5FBC0, 0x8AD6FBC0, + 0x8AD7FBC0, 0x8AD8FBC0, 0x8AD9FBC0, 0x8ADAFBC0, 0x8ADBFBC0, 0x8ADCFBC0, 0x8ADDFBC0, 0x8ADEFBC0, 0x8ADFFBC0, 0x2741, 0x2743, 0x2775, 0x2776, 0x8AE4FBC0, 0x8AE5FBC0, + 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x3EC, 0x1C1A, 0x8AF2FBC0, 0x8AF3FBC0, 0x8AF4FBC0, + 0x8AF5FBC0, 0x8AF6FBC0, 0x8AF7FBC0, 0x8AF8FBC0, 0x2752, 0x8AFAFBC0, 0x8AFBFBC0, 0x8AFCFBC0, 0x8AFDFBC0, 0x8AFEFBC0, 0x8AFFFBC0, 0x8B00FBC0, 0x0, 0x0, 0x0, + 0x8B04FBC0, 0x277E, 0x277F, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2786, 0x8B0DFBC0, 0x8B0EFBC0, 0x2788, 0x2789, 0x8B11FBC0, 0x8B12FBC0, + 0x278A, 0x278B, 0x278C, 0x278D, 0x278E, 0x278F, 0x2790, 0x2791, 0x2792, 0x2793, 0x2794, 0x2795, 0x2796, 0x2797, 0x2798, + 0x2799, 0x279A, 0x279B, 0x279C, 0x279D, 0x279E, 0x279F, 0x8B29FBC0, 0x27A0, 0x27A1, 0x27A2, 0x27A3, 0x27A4, 0x27A5, 0x27A7, + 0x8B31FBC0, 0x27A8, 0x27A9, 0x8B34FBC0, 0x27AA, 0x27AC, 0x27AD, 0x27AE, 0x27AF, 0x8B3AFBC0, 0x8B3BFBC0, 0x0, 0x27B0, 0x27B1, 0x27B2, + 0x27B3, 0x27B4, 0x27B5, 0x27B6, 0x27B7, 0x8B45FBC0, 0x8B46FBC0, 0x27BA, 0x27BB, 0x8B49FBC0, 0x8B4AFBC0, 0x27BC, 0x27BD, 0x27BE, 0x8B4EFBC0, + 0x8B4FFBC0, 0x8B50FBC0, 0x8B51FBC0, 0x8B52FBC0, 0x8B53FBC0, 0x8B54FBC0, 0x8B55FBC0, 0x27BF, 0x27C0, 0x8B58FBC0, 0x8B59FBC0, 0x8B5AFBC0, 0x8B5BFBC0, 0x2798, 0x2799, + 0x8B5EFBC0, 0x27A6, 0x2785, 0x2787, 0x27B8, 0x27B9, 0x8B64FBC0, 0x8B65FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0x514, 0x27AB, 0x1A9C, 0x1A9D, 0x1A9E, 0x1A9F, 0x1AA0, 0x1AA1, 0x8B78FBC0, 0x8B79FBC0, 0x8B7AFBC0, 0x8B7BFBC0, + 0x8B7CFBC0, 0x8B7DFBC0, 0x8B7EFBC0, 0x8B7FFBC0, 0x8B80FBC0, 0x8B81FBC0, 0x0, 0x27CE, 0x8B84FBC0, 0x27C2, 0x27C3, 0x27C4, 0x27C5, 0x27C6, 0x27C7, + 0x8B8BFBC0, 0x8B8CFBC0, 0x8B8DFBC0, 0x27C8, 0x27C9, 0x27CA, 0x8B91FBC0, 0x27CB, 0x27CC, 0x27CD, 0x27CF, 0x8B96FBC0, 0x8B97FBC0, 0x8B98FBC0, 0x27D0, + 0x27D1, 0x8B9BFBC0, 0x27E1, 0x8B9DFBC0, 0x27D2, 0x27D3, 0x8BA0FBC0, 0x8BA1FBC0, 0x8BA2FBC0, 0x27D4, 0x27D5, 0x8BA5FBC0, 0x8BA6FBC0, 0x8BA7FBC0, 0x27D6, + 0x27E0, 0x27D7, 0x8BABFBC0, 0x8BACFBC0, 0x8BADFBC0, 0x27D8, 0x27D9, 0x27DA, 0x27DF, 0x27DB, 0x27DE, 0x27DD, 0x27DC, 0x27E2, 0x27E3, + 0x27E4, 0x27E5, 0x8BBAFBC0, 0x8BBBFBC0, 0x8BBCFBC0, 0x8BBDFBC0, 0x27E6, 0x27E7, 0x27E8, 0x27E9, 0x27EA, 0x8BC3FBC0, 0x8BC4FBC0, 0x8BC5FBC0, 0x27EB, + 0x27EC, 0x27ED, 0x8BC9FBC0, 0x27EE, 0x27EF, 0x27F0, 0x27F1, 0x8BCEFBC0, 0x8BCFFBC0, 0x27C1, 0x8BD1FBC0, 0x8BD2FBC0, 0x8BD3FBC0, 0x8BD4FBC0, 0x8BD5FBC0, + 0x8BD6FBC0, 0x27F2, 0x8BD8FBC0, 0x8BD9FBC0, 0x8BDAFBC0, 0x8BDBFBC0, 0x8BDCFBC0, 0x8BDDFBC0, 0x8BDEFBC0, 0x8BDFFBC0, 0x8BE0FBC0, 0x8BE1FBC0, 0x8BE2FBC0, 0x8BE3FBC0, 0x8BE4FBC0, + 0x8BE5FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1AA8, 0x1AA9, 0x1AAA, 0x515, + 0x516, 0x517, 0x518, 0x519, 0x51A, 0x1C1C, 0x51B, 0x8BFBFBC0, 0x8BFCFBC0, 0x8BFDFBC0, 0x8BFEFBC0, 0x8BFFFBC0, 0x0, 0x0, 0x0, + 0x0, 0x8C04FBC0, 0x27F3, 0x27F4, 0x27F5, 0x27F6, 0x27F7, 0x27F8, 0x27F9, 0x27FB, 0x8C0DFBC0, 0x27FD, 0x27FE, 0x27FF, 0x8C11FBC0, + 0x2800, 0x2801, 0x2802, 0x2803, 0x2804, 0x2805, 0x2806, 0x2807, 0x2808, 0x280A, 0x280B, 0x280D, 0x280E, 0x280F, 0x2810, + 0x2811, 0x2812, 0x2813, 0x2814, 0x2815, 0x2816, 0x2817, 0x2818, 0x8C29FBC0, 0x2819, 0x281A, 0x281B, 0x281C, 0x281D, 0x281E, + 0x281F, 0x2820, 0x2821, 0x2827, 0x2828, 0x2822, 0x2823, 0x2824, 0x2825, 0x2826, 0x8C3AFBC0, 0x8C3BFBC0, 0x8C3CFBC0, 0x282A, 0x282B, + 0x282C, 0x282D, 0x282E, 0x282F, 0x2830, 0x2831, 0x8C45FBC0, 0x2834, 0x2835, 0x2836, 0x8C49FBC0, 0x2837, 0x2838, 0x2839, 0x283A, + 0x8C4EFBC0, 0x8C4FFBC0, 0x8C50FBC0, 0x8C51FBC0, 0x8C52FBC0, 0x8C53FBC0, 0x8C54FBC0, 0x283B, 0x283C, 0x8C57FBC0, 0x2809, 0x280C, 0x2829, 0x8C5BFBC0, 0x8C5CFBC0, + 0x8C5DFBC0, 0x8C5EFBC0, 0x8C5FFBC0, 0x27FA, 0x27FC, 0x2832, 0x2833, 0x8C64FBC0, 0x8C65FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, + 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x8C70FBC0, 0x8C71FBC0, 0x8C72FBC0, 0x8C73FBC0, 0x8C74FBC0, 0x8C75FBC0, 0x8C76FBC0, 0x8C77FBC0, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C3E, 0x1C3F, 0x1C40, 0x51C, 0x2874, 0x0, 0x0, 0x0, 0x8C84FBC0, 0x283D, 0x283E, 0x283F, 0x2840, 0x2841, + 0x2842, 0x2843, 0x2845, 0x8C8DFBC0, 0x2847, 0x2848, 0x2849, 0x8C91FBC0, 0x284A, 0x284B, 0x284C, 0x284D, 0x284E, 0x284F, 0x2850, + 0x2851, 0x2852, 0x2853, 0x2854, 0x2855, 0x2856, 0x2857, 0x2858, 0x2859, 0x285A, 0x285B, 0x285C, 0x285D, 0x285E, 0x285F, + 0x2860, 0x8CA9FBC0, 0x2861, 0x2862, 0x2863, 0x2864, 0x2865, 0x2866, 0x2867, 0x2868, 0x2869, 0x286F, 0x8CB4FBC0, 0x286A, 0x286B, + 0x286C, 0x286D, 0x286E, 0x8CBAFBC0, 0x8CBBFBC0, 0x0, 0x2871, 0x2875, 0x2876, 0x2877, 0x2878, 0x2879, 0x287A, 0x287B, 0x8CC5FBC0, + 0x287E, 0x287F, 0x2880, 0x8CC9FBC0, 0x2881, 0x2882, 0x2883, 0x2884, 0x8CCEFBC0, 0x8CCFFBC0, 0x8CD0FBC0, 0x8CD1FBC0, 0x8CD2FBC0, 0x8CD3FBC0, 0x8CD4FBC0, + 0x2885, 0x2886, 0x8CD7FBC0, 0x8CD8FBC0, 0x8CD9FBC0, 0x8CDAFBC0, 0x8CDBFBC0, 0x8CDCFBC0, 0x8CDDFBC0, 0x2870, 0x8CDFFBC0, 0x2844, 0x2846, 0x287C, 0x287D, + 0x8CE4FBC0, 0x8CE5FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x8CF0FBC0, 0x2872, 0x2873, + 0x8CF3FBC0, 0x8CF4FBC0, 0x8CF5FBC0, 0x8CF6FBC0, 0x8CF7FBC0, 0x8CF8FBC0, 0x8CF9FBC0, 0x8CFAFBC0, 0x8CFBFBC0, 0x8CFCFBC0, 0x8CFDFBC0, 0x8CFEFBC0, 0x8CFFFBC0, 0x8D00FBC0, 0x0, + 0x0, 0x0, 0x8D04FBC0, 0x2887, 0x2888, 0x2889, 0x288A, 0x288C, 0x288D, 0x288E, 0x2890, 0x8D0DFBC0, 0x2892, 0x2893, 0x2894, + 0x8D11FBC0, 0x2895, 0x2896, 0x2897, 0x2898, 0x2899, 0x289A, 0x289B, 0x289C, 0x289D, 0x289E, 0x289F, 0x28A0, 0x28A1, 0x28A2, + 0x28A3, 0x28A4, 0x28A5, 0x28A6, 0x28A7, 0x28A8, 0x28A9, 0x28AA, 0x28AB, 0x28AC, 0x28AD, 0x28AE, 0x28AF, 0x28B0, 0x28B1, + 0x28B2, 0x28B3, 0x28BC, 0x28B4, 0x28BA, 0x28BB, 0x28B5, 0x28B6, 0x28B7, 0x28B8, 0x28B9, 0x28BD, 0x8D3BFBC0, 0x8D3CFBC0, 0x28BE, + 0x28BF, 0x28C0, 0x28C1, 0x28C2, 0x28C3, 0x28C4, 0x28C5, 0x8D45FBC0, 0x28C8, 0x28C9, 0x28CA, 0x8D49FBC0, 0x28CB, 0x28CC, 0x28CD, + 0x28CF, 0x28CF28B3, 0x51D, 0x8D50FBC0, 0x8D51FBC0, 0x8D52FBC0, 0x8D53FBC0, 0x28CF28B1, 0x28CF28B2, 0x28CF28BB, 0x28CE, 0x1AAB, 0x1AAC, 0x1AAD, 0x1AAE, + 0x1AAF, 0x1AB0, 0x1AB1, 0x288B, 0x288F, 0x2891, 0x28C6, 0x28C7, 0x8D64FBC0, 0x8D65FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1AB2, 0x1AB3, 0x1AB4, 0x1AB5, 0x1AB6, 0x1AB7, 0x1AB8, 0x1AB9, 0x1ABA, 0x51E, + 0x28CF28A6, 0x28CF28AB, 0x28CF28B3, 0x28CF28B4, 0x28CF28BA, 0x28CF2898, 0x8D80FBC0, 0x8D81FBC0, 0x0, 0x0, 0x8D84FBC0, 0x28D0, 0x28D1, 0x28D2, 0x28D3, + 0x28D4, 0x28D5, 0x28D6, 0x28D7, 0x28D8, 0x28D9, 0x28DA, 0x28DB, 0x28DC, 0x28DD, 0x28DE, 0x28DF, 0x28E0, 0x28E1, 0x8D97FBC0, + 0x8D98FBC0, 0x8D99FBC0, 0x28E2, 0x28E3, 0x28E4, 0x28E5, 0x28E6, 0x28E7, 0x28E8, 0x28E9, 0x28EA, 0x28EB, 0x28EC, 0x28ED, 0x28EE, + 0x28EF, 0x28F0, 0x28F1, 0x28F2, 0x28F3, 0x28F4, 0x28F5, 0x28F6, 0x28F7, 0x28F8, 0x28F9, 0x8DB2FBC0, 0x28FA, 0x28FB, 0x28FC, + 0x28FD, 0x28FE, 0x28FF, 0x2900, 0x2901, 0x2902, 0x8DBCFBC0, 0x2903, 0x8DBEFBC0, 0x8DBFFBC0, 0x2904, 0x2905, 0x2906, 0x2907, 0x2908, + 0x2909, 0x290A, 0x8DC7FBC0, 0x8DC8FBC0, 0x8DC9FBC0, 0x291C, 0x8DCBFBC0, 0x8DCCFBC0, 0x8DCDFBC0, 0x8DCEFBC0, 0x290B, 0x290C, 0x290D, 0x290E, 0x290F, + 0x2910, 0x8DD5FBC0, 0x2911, 0x8DD7FBC0, 0x2912, 0x2916, 0x2917, 0x2918, 0x2919, 0x291A, 0x291B, 0x2914, 0x8DE0FBC0, 0x8DE1FBC0, 0x8DE2FBC0, + 0x8DE3FBC0, 0x8DE4FBC0, 0x8DE5FBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x8DF0FBC0, 0x8DF1FBC0, + 0x2913, 0x2915, 0x3ED, 0x8DF5FBC0, 0x8DF6FBC0, 0x8DF7FBC0, 0x8DF8FBC0, 0x8DF9FBC0, 0x8DFAFBC0, 0x8DFBFBC0, 0x8DFCFBC0, 0x8DFDFBC0, 0x8DFEFBC0, 0x8DFFFBC0, 0x8E00FBC0, + 0x2D73, 0x2D74, 0x2D75, 0x2D76, 0x2D77, 0x2D78, 0x2D79, 0x2D7A, 0x2D7B, 0x2D7C, 0x2D7D, 0x2D7E, 0x2D7F, 0x2D80, 0x2D81, + 0x2D82, 0x2D83, 0x2D84, 0x2D85, 0x2D86, 0x2D87, 0x2D88, 0x2D89, 0x2D8A, 0x2D8B, 0x2D8C, 0x2D8D, 0x2D8E, 0x2D8F, 0x2D90, + 0x2D91, 0x2D92, 0x2D93, 0x2D94, 0x2D95, 0x2D96, 0x2D97, 0x2D98, 0x2D99, 0x2D9A, 0x2D9B, 0x2D9C, 0x2D9D, 0x2D9E, 0x2D9F, + 0x2DA0, 0x2DA1, 0x2DA2, 0x2DA3, 0x2DA4, 0x2DA5, 0x2DA6, 0x2DA7, 0x2DA8, 0x2DA9, 0x2DAA, 0x2DAB, 0x2DAC, 0x8E3BFBC0, 0x8E3CFBC0, + 0x8E3DFBC0, 0x8E3EFBC0, 0x1C1D, 0x2DAD, 0x2DAE, 0x2DAF, 0x2DB0, 0x2DB1, 0x2DB2, 0x1BFB, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x3EE, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x3EF, + 0x3F0, 0x8E5CFBC0, 0x8E5DFBC0, 0x8E5EFBC0, 0x8E5FFBC0, 0x8E60FBC0, 0x8E61FBC0, 0x8E62FBC0, 0x8E63FBC0, 0x8E64FBC0, 0x8E65FBC0, 0x8E66FBC0, 0x8E67FBC0, 0x8E68FBC0, 0x8E69FBC0, + 0x8E6AFBC0, 0x8E6BFBC0, 0x8E6CFBC0, 0x8E6DFBC0, 0x8E6EFBC0, 0x8E6FFBC0, 0x8E70FBC0, 0x8E71FBC0, 0x8E72FBC0, 0x8E73FBC0, 0x8E74FBC0, 0x8E75FBC0, 0x8E76FBC0, 0x8E77FBC0, 0x8E78FBC0, + 0x8E79FBC0, 0x8E7AFBC0, 0x8E7BFBC0, 0x8E7CFBC0, 0x8E7DFBC0, 0x8E7EFBC0, 0x8E7FFBC0, 0x8E80FBC0, 0x2DB4, 0x2DB5, 0x8E83FBC0, 0x2DB6, 0x8E85FBC0, 0x8E86FBC0, 0x2DB7, + 0x2DB8, 0x8E89FBC0, 0x2DBA, 0x8E8BFBC0, 0x8E8CFBC0, 0x2DBC, 0x8E8EFBC0, 0x8E8FFBC0, 0x8E90FBC0, 0x8E91FBC0, 0x8E92FBC0, 0x8E93FBC0, 0x2DBD, 0x2DBE, 0x2DBF, + 0x2DC0, 0x8E98FBC0, 0x2DC1, 0x2DC2, 0x2DC3, 0x2DC4, 0x2DC5, 0x2DC6, 0x2DC7, 0x8EA0FBC0, 0x2DC8, 0x2DC9, 0x2DCA, 0x8EA4FBC0, 0x2DCB, + 0x8EA6FBC0, 0x2DCC, 0x8EA8FBC0, 0x8EA9FBC0, 0x2DB9, 0x2DCD, 0x8EACFBC0, 0x2DCE, 0x2DCF, 0x2DD0, 0x2DD1, 0x2DD2, 0x2DD3, 0x2DD4, 0x2DD5, + 0x2DD6, 0x2DD7, 0x2DD8, 0x2DD9, 0x2DDA, 0x8EBAFBC0, 0x2DDB, 0x2DDC, 0x2DDD, 0x8EBEFBC0, 0x8EBFFBC0, 0x2DDE, 0x2DDF, 0x2DE0, 0x2DE1, + 0x2DE2, 0x8EC5FBC0, 0x1BFC, 0x8EC7FBC0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x8ECEFBC0, 0x8ECFFBC0, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x8EDAFBC0, 0x8EDBFBC0, 0x2DC12DCD, 0x2DC82DCD, 0x2DB3, 0x2DBB, 0x8EE0FBC0, 0x8EE1FBC0, + 0x8EE2FBC0, 0x8EE3FBC0, 0x8EE4FBC0, 0x8EE5FBC0, 0x8EE6FBC0, 0x8EE7FBC0, 0x8EE8FBC0, 0x8EE9FBC0, 0x8EEAFBC0, 0x8EEBFBC0, 0x8EECFBC0, 0x8EEDFBC0, 0x8EEEFBC0, 0x8EEFFBC0, 0x8EF0FBC0, + 0x8EF1FBC0, 0x8EF2FBC0, 0x8EF3FBC0, 0x8EF4FBC0, 0x8EF5FBC0, 0x8EF6FBC0, 0x8EF7FBC0, 0x8EF8FBC0, 0x8EF9FBC0, 0x8EFAFBC0, 0x8EFBFBC0, 0x8EFCFBC0, 0x8EFDFBC0, 0x8EFEFBC0, 0x8EFFFBC0, + 0x2E832E6C, 0x526, 0x527, 0x528, 0x3F3, 0x3F4, 0x3F5, 0x3F6, 0x3F7, 0x3F8, 0x3F9, 0x3FC, 0x3FC, 0x3FD, 0x3FE, + 0x3FF, 0x400, 0x401, 0x402, 0x529, 0x258, 0x52A, 0x52B, 0x52C, 0x0, 0x0, 0x52D, 0x52E, 0x52F, 0x530, + 0x531, 0x532, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3E, 0x1C3F, 0x1C40, + 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D, 0x533, 0x0, 0x534, 0x0, 0x535, 0x0, 0x31D, 0x31E, + 0x31F, 0x320, 0x0, 0x0, 0x2E26, 0x2E29, 0x2E2B, 0x2E6B2E2B, 0x2E2D, 0x2E2F, 0x2E31, 0x2E33, 0x8F48FBC0, 0x2E35, 0x2E37, + 0x2E39, 0x2E3B, 0x2E6B2E3B, 0x2E3D, 0x2E3F, 0x2E41, 0x2E43, 0x2E6B2E43, 0x2E45, 0x2E47, 0x2E49, 0x2E4B, 0x2E6B2E4B, 0x2E4D, 0x2E4F, + 0x2E51, 0x2E53, 0x2E6B2E53, 0x2E55, 0x2E57, 0x2E59, 0x2E5B, 0x2E5D, 0x2E5F, 0x2E62, 0x2E64, 0x2E66, 0x2E68, 0x2E6A, 0x2E6C, + 0x2E672E26, 0x2E5F, 0x2E28, 0x2E61, 0x8F6DFBC0, 0x8F6EFBC0, 0x8F6FFBC0, 0x8F70FBC0, 0x2E76, 0x2E77, 0x2E78, 0x2E7B, 0x2E7C, 0x2E7D, 0x2E7E, + 0x2E7F, 0x2E80, 0x2E81, 0x2E82, 0x2E83, 0x2E84, 0x0, 0x0, 0x2E79, 0x2E7A, 0x0, 0x0, 0x2E85, 0x403, 0x0, + 0x0, 0x2E6E, 0x2E70, 0x2E74, 0x2E75, 0x2E72, 0x2E6F, 0x2E71, 0x2E73, 0x2E27, 0x2E2A, 0x2E2C, 0x2E6B2E2C, 0x2E2E, 0x2E30, + 0x2E32, 0x2E34, 0x8F98FBC0, 0x2E36, 0x2E38, 0x2E3A, 0x2E3C, 0x2E6B2E3C, 0x2E3E, 0x2E40, 0x2E42, 0x2E44, 0x2E6B2E44, 0x2E46, 0x2E48, + 0x2E4A, 0x2E4C, 0x2E6B2E4C, 0x2E4E, 0x2E50, 0x2E52, 0x2E54, 0x2E6B2E54, 0x2E56, 0x2E58, 0x2E5A, 0x2E5C, 0x2E5E, 0x2E60, 0x2E63, + 0x2E65, 0x2E67, 0x2E69, 0x2E6B, 0x2E6D, 0x2E672E27, 0x2E56, 0x2E5E, 0x2E60, 0x8FBDFBC0, 0x536, 0x537, 0x538, 0x539, 0x53A, + 0x53B, 0x53C, 0x53D, 0x0, 0x53E, 0x53F, 0x540, 0x541, 0x542, 0x543, 0x8FCDFBC0, 0x544, 0x545, 0x3FA, 0x3FB, + 0x404, 0x405, 0x406, 0x546, 0x547, 0x548, 0x549, 0x407, 0x408, 0x8FDBFBC0, 0x8FDCFBC0, 0x8FDDFBC0, 0x8FDEFBC0, 0x8FDFFBC0, 0x8FE0FBC0, + 0x8FE1FBC0, 0x8FE2FBC0, 0x8FE3FBC0, 0x8FE4FBC0, 0x8FE5FBC0, 0x8FE6FBC0, 0x8FE7FBC0, 0x8FE8FBC0, 0x8FE9FBC0, 0x8FEAFBC0, 0x8FEBFBC0, 0x8FECFBC0, 0x8FEDFBC0, 0x8FEEFBC0, 0x8FEFFBC0, + 0x8FF0FBC0, 0x8FF1FBC0, 0x8FF2FBC0, 0x8FF3FBC0, 0x8FF4FBC0, 0x8FF5FBC0, 0x8FF6FBC0, 0x8FF7FBC0, 0x8FF8FBC0, 0x8FF9FBC0, 0x8FFAFBC0, 0x8FFBFBC0, 0x8FFCFBC0, 0x8FFDFBC0, 0x8FFEFBC0, + 0x8FFFFBC0, 0x3035, 0x3037, 0x3039, 0x303D, 0x3040, 0x3042, 0x3045, 0x3049, 0x304E, 0x3055, 0x3059, 0x305A, 0x305C, 0x305E, + 0x3061, 0x3064, 0x3068, 0x3069, 0x306A, 0x306D, 0x3070, 0x3074, 0x3075, 0x307B, 0x307E, 0x3081, 0x3083, 0x3085, 0x3089, + 0x308B, 0x3092, 0x3094, 0x309A, 0x30A1, 0x30A2, 0x30A3, 0x30A4, 0x30A5, 0x30A6, 0x30AB, 0x30AC, 0x30AD, 0x30AE, 0x30AF, + 0x30AF, 0x30B3, 0x30B5, 0x30B7, 0x30BA, 0x30BF, 0x30C3, 0x30B6, 0x30C5, 0x30C1, 0x0, 0x0, 0x0, 0x30CB, 0x30CC, + 0x3084, 0x3088, 0x308C, 0x3097, 0x309230CB3092, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x299, 0x29A, 0x40E, 0x40F, 0x410, 0x411, 0x308F, 0x3090, 0x30A7, 0x30A8, 0x30A9, 0x30AA, 0x30BB, 0x30BC, 0x30BD, + 0x30BE, 0x3041, 0x304F, 0x309C, 0x309D, 0x3073, 0x3082, 0x308A, 0x3053, 0x30C6, 0x30CD, 0x30CE, 0x3091, 0x30A0, 0x30C7, + 0x30C8, 0x30CF, 0x30D0, 0x30D1, 0x30D2, 0x30D3, 0x3065, 0x309E, 0x309F, 0x30B4, 0x30B1, 0x30B8, 0x30B9, 0x3036, 0x3038, + 0x303A, 0x3043, 0x304C, 0x3056, 0x306B, 0x3071, 0x3076, 0x3077, 0x307C, 0x308E, 0x3095, 0x308D, 0x30B0, 0x30C0, 0x30C2, + 0x30CA, 0x30D4, 0x30D6, 0x30D9, 0x30DA, 0x30D5, 0x30D7, 0x30D8, 0x3079, 0x30DB, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x30DC, 0x30DD, 0x30B2, 0x30C4, 0x54B, 0x54C, 0x223B, 0x223D, 0x223F, 0x2241, + 0x2243, 0x2245, 0x2247, 0x224B, 0x224D, 0x224F, 0x2251, 0x2253, 0x2255, 0x2259, 0x225B, 0x225D, 0x225F, 0x2261, 0x2263, + 0x2267, 0x2269, 0x226B, 0x226D, 0x226F, 0x2271, 0x2273, 0x2275, 0x2277, 0x2279, 0x227B, 0x227D, 0x2281, 0x2283, 0x2249, + 0x2257, 0x2265, 0x227F, 0x2285, 0x90C6FBC0, 0x2288, 0x90C8FBC0, 0x90C9FBC0, 0x90CAFBC0, 0x90CBFBC0, 0x90CCFBC0, 0x228D, 0x90CEFBC0, 0x90CFFBC0, 0x223A, + 0x223C, 0x223E, 0x2240, 0x2242, 0x2244, 0x2246, 0x224A, 0x224C, 0x224E, 0x2250, 0x2252, 0x2254, 0x2258, 0x225A, 0x225C, + 0x225E, 0x2260, 0x2262, 0x2266, 0x2268, 0x226A, 0x226C, 0x226E, 0x2270, 0x2272, 0x2274, 0x2276, 0x2278, 0x227A, 0x227C, + 0x2280, 0x2282, 0x2248, 0x2256, 0x2264, 0x227E, 0x2284, 0x2286, 0x2287, 0x2289, 0x228A, 0x228B, 0x2C6, 0x2254, 0x228C, + 0x228E, 0x228F, 0x3BF5, 0x3BF6, 0x3BF7, 0x3BF8, 0x3BF9, 0x3BFA, 0x3BFB, 0x3BFC, 0x3BFD, 0x3BFE, 0x3BFF, 0x3C00, 0x3C01, + 0x3C02, 0x3C03, 0x3C04, 0x3C05, 0x3C06, 0x3C07, 0x3C08, 0x3C09, 0x3C0A, 0x3C0B, 0x3C0C, 0x3C0D, 0x3C0E, 0x3C0F, 0x3C10, + 0x3C11, 0x3C12, 0x3C13, 0x3C14, 0x3C15, 0x3C16, 0x3C17, 0x3C18, 0x3C19, 0x3C1A, 0x3C1B, 0x3C1C, 0x3C1D, 0x3C1E, 0x3C1F, + 0x3C20, 0x3C21, 0x3C22, 0x3C23, 0x3C24, 0x3C25, 0x3C26, 0x3C27, 0x3C28, 0x3C29, 0x3C2A, 0x3C2B, 0x3C2C, 0x3C2D, 0x3C2E, + 0x3C2F, 0x3C30, 0x3C31, 0x3C32, 0x3C33, 0x3C34, 0x3C35, 0x3C36, 0x3C37, 0x3C38, 0x3C39, 0x3C3A, 0x3C3B, 0x3C3C, 0x3C3D, + 0x3C3E, 0x3C3F, 0x3C40, 0x3C41, 0x3C42, 0x3C43, 0x3C44, 0x3C45, 0x3C46, 0x3C47, 0x3C48, 0x3C49, 0x3C4A, 0x3C4B, 0x3C4C, + 0x3C4D, 0x3C4E, 0x3C4F, 0x3C50, 0x3C51, 0x3C52, 0x3C53, 0x3C71, 0x3C72, 0x3C73, 0x3C74, 0x3C75, 0x3C76, 0x3C77, 0x3C78, + 0x3C79, 0x3C7A, 0x3C7B, 0x3C7C, 0x3C7D, 0x3C7E, 0x3C7F, 0x3C80, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C87, + 0x3C88, 0x3C89, 0x3C8A, 0x3C8B, 0x3C8C, 0x3C8D, 0x3C8E, 0x3C8F, 0x3C90, 0x3C91, 0x3C92, 0x3C93, 0x3C94, 0x3C95, 0x3C96, + 0x3C97, 0x3C98, 0x3C99, 0x3C9A, 0x3C9B, 0x3C9C, 0x3C9D, 0x3C9E, 0x3C9F, 0x3CA0, 0x3CA1, 0x3CA2, 0x3CA3, 0x3CA4, 0x3CA5, + 0x3CA6, 0x3CA7, 0x3CA8, 0x3CA9, 0x3CAA, 0x3CAB, 0x3CAC, 0x3CAD, 0x3CAE, 0x3CAF, 0x3CB0, 0x3CB1, 0x3CB2, 0x3CB3, 0x3CB4, + 0x3CB5, 0x3CB6, 0x3CB7, 0x3CB8, 0x3CB9, 0x3CD1, 0x3CD2, 0x3CD3, 0x3CD4, 0x3CD5, 0x3CD6, 0x3CD7, 0x3CD8, 0x3CD9, 0x3CDA, + 0x3CDB, 0x3CDC, 0x3CDD, 0x3CDE, 0x3CDF, 0x3CE0, 0x3CE1, 0x3CE2, 0x3CE3, 0x3CE4, 0x3CE5, 0x3CE6, 0x3CE7, 0x3CE8, 0x3CE9, + 0x3CEA, 0x3CEB, 0x3CEC, 0x3CED, 0x3CEE, 0x3CEF, 0x3CF0, 0x3CF1, 0x3CF2, 0x3CF3, 0x3CF4, 0x3CF5, 0x3CF6, 0x3CF7, 0x3CF8, + 0x3CF9, 0x3CFA, 0x3CFB, 0x3CFC, 0x3CFD, 0x3CFE, 0x3CFF, 0x3D00, 0x3D01, 0x3D02, 0x3D03, 0x3D04, 0x3D05, 0x3D06, 0x3D07, + 0x3D08, 0x3D09, 0x3D0A, 0x3D0B, 0x3D0C, 0x3D0D, 0x3D0E, 0x3D0F, 0x3D10, 0x3D11, 0x3D12, 0x3D13, 0x3D14, 0x3D15, 0x3D16, + 0x3D17, 0x3D18, 0x3D19, 0x3D1A, 0x3D1B, 0x3D1C, 0x3D1D, 0x3D1E, 0x3D1F, 0x3D20, 0x3D21, 0x3D22, 0x3D23, 0x3D24, 0x3D25, + 0x3D26, 0x3D27, 0x3D28, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x249C, 0x249D, 0x249E, 0x249F, 0x24A0, 0x24A1, + 0x24A2, 0x24A3, 0x24A4, 0x24A5, 0x24A7, 0x24A8, 0x24A9, 0x24AA, 0x24AB, 0x24AC, 0x24AD, 0x24AE, 0x24AF, 0x24B0, 0x24B1, + 0x24B2, 0x24B3, 0x24B4, 0x24B5, 0x24B6, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5, + 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CD, 0x24CE, 0x24CF, 0x24D0, 0x24D1, 0x24D2, 0x24D3, 0x24D4, 0x24DC, + 0x24DD, 0x24DE, 0x24DF, 0x24E0, 0x24E1, 0x24E2, 0x24E3, 0x24E5, 0x24E6, 0x24E7, 0x24E8, 0x24E9, 0x24EA, 0x24EB, 0x24EC, + 0x24ED, 0x9249FBC0, 0x24EE, 0x24EF, 0x24F0, 0x24F1, 0x924EFBC0, 0x924FFBC0, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, + 0x9257FBC0, 0x24F9, 0x9259FBC0, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x925EFBC0, 0x925FFBC0, 0x24FE, 0x24FF, 0x2500, 0x2501, 0x2502, 0x2503, + 0x2504, 0x2505, 0x250B, 0x250C, 0x250D, 0x250E, 0x250F, 0x2510, 0x2511, 0x2512, 0x2513, 0x2514, 0x2515, 0x2516, 0x2517, + 0x2518, 0x2519, 0x251A, 0x251C, 0x251D, 0x251E, 0x251F, 0x2520, 0x2521, 0x2522, 0x2523, 0x2525, 0x2526, 0x2527, 0x2528, + 0x2529, 0x252A, 0x252B, 0x252C, 0x252D, 0x9289FBC0, 0x252E, 0x252F, 0x2530, 0x2531, 0x928EFBC0, 0x928FFBC0, 0x2532, 0x2533, 0x2534, + 0x2535, 0x2536, 0x2537, 0x2538, 0x2539, 0x253B, 0x253C, 0x253D, 0x253E, 0x253F, 0x2540, 0x2541, 0x2542, 0x2544, 0x2545, + 0x2546, 0x2547, 0x2548, 0x2549, 0x254A, 0x254B, 0x254D, 0x254E, 0x254F, 0x2550, 0x2551, 0x2552, 0x2553, 0x2554, 0x2555, + 0x92B1FBC0, 0x2556, 0x2557, 0x2558, 0x2559, 0x92B6FBC0, 0x92B7FBC0, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, 0x255F, 0x2560, 0x92BFFBC0, + 0x2561, 0x92C1FBC0, 0x2562, 0x2563, 0x2564, 0x2565, 0x92C6FBC0, 0x92C7FBC0, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, + 0x256D, 0x256E, 0x256F, 0x2570, 0x2571, 0x2572, 0x2573, 0x2574, 0x92D7FBC0, 0x2575, 0x2576, 0x2577, 0x2578, 0x2579, 0x257A, + 0x257B, 0x257C, 0x2584, 0x2585, 0x2586, 0x2587, 0x2588, 0x2589, 0x258A, 0x258B, 0x258C, 0x258D, 0x258E, 0x258F, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2594, 0x2595, 0x2596, 0x2597, 0x2598, 0x2599, 0x259A, 0x259B, 0x25A3, 0x25A4, 0x25A5, 0x25A6, + 0x25A7, 0x25A8, 0x25A9, 0x25AA, 0x25AC, 0x25AD, 0x25AE, 0x25AF, 0x25B0, 0x25B1, 0x25B2, 0x25B3, 0x25B5, 0x25B6, 0x25B7, + 0x25B8, 0x25B9, 0x25BA, 0x25BB, 0x25BC, 0x25BD, 0x9311FBC0, 0x25BE, 0x25BF, 0x25C0, 0x25C1, 0x9316FBC0, 0x9317FBC0, 0x25C2, 0x25C3, + 0x25C4, 0x25C5, 0x25C6, 0x25C7, 0x25C8, 0x25C9, 0x25CE, 0x25CF, 0x25D0, 0x25D1, 0x25D2, 0x25D3, 0x25D4, 0x25D5, 0x25D7, + 0x25D8, 0x25D9, 0x25DA, 0x25DB, 0x25DC, 0x25DD, 0x25DE, 0x25E7, 0x25E8, 0x25E9, 0x25EA, 0x25EB, 0x25EC, 0x25ED, 0x25EE, + 0x25F0, 0x25F1, 0x25F2, 0x25F3, 0x25F4, 0x25F5, 0x25F6, 0x25F7, 0x25FF, 0x2600, 0x2601, 0x2602, 0x2603, 0x2604, 0x2605, + 0x2606, 0x2607, 0x2608, 0x2609, 0x260A, 0x260B, 0x260C, 0x260D, 0x260E, 0x2613, 0x2614, 0x2615, 0x2616, 0x2617, 0x2618, + 0x2619, 0x261A, 0x2620, 0x2621, 0x2622, 0x935BFBC0, 0x935CFBC0, 0x0, 0x0, 0x0, 0x2C7, 0x251, 0x27C, 0x252, 0x253, + 0x254, 0x255, 0x26C, 0x2C8, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1ABB, 0x1ABC, + 0x1ABD, 0x1ABE, 0x1ABF, 0x1AC0, 0x1AC1, 0x1AC2, 0x1AC3, 0x1AC4, 0x1AC5, 0x937DFBC0, 0x937EFBC0, 0x937FFBC0, 0x24B7, 0x24B8, 0x24B9, + 0x24BA, 0x2506, 0x2507, 0x2508, 0x2509, 0x260F, 0x2610, 0x2611, 0x2612, 0x261B, 0x261C, 0x261D, 0x261E, 0x4C6, 0x4C7, + 0x4C8, 0x4C9, 0x4CA, 0x4CB, 0x4CC, 0x4CD, 0x4CE, 0x4CF, 0x939AFBC0, 0x939BFBC0, 0x939CFBC0, 0x939DFBC0, 0x939EFBC0, 0x939FFBC0, 0x337F, + 0x3380, 0x3381, 0x3382, 0x3383, 0x3384, 0x3385, 0x3386, 0x3387, 0x3388, 0x3389, 0x338A, 0x338B, 0x338C, 0x338D, 0x338E, + 0x338F, 0x3390, 0x3391, 0x3392, 0x3393, 0x3394, 0x3395, 0x3396, 0x3397, 0x3398, 0x3399, 0x339A, 0x339B, 0x339C, 0x339D, + 0x339E, 0x339F, 0x33A0, 0x33A1, 0x33A2, 0x33A3, 0x33A4, 0x33A5, 0x33A6, 0x33A7, 0x33A8, 0x33A9, 0x33AA, 0x33AB, 0x33AC, + 0x33AD, 0x33AE, 0x33AF, 0x33B0, 0x33B1, 0x33B2, 0x33B3, 0x33B4, 0x33B5, 0x33B6, 0x33B7, 0x33B8, 0x33B9, 0x33BA, 0x33BB, + 0x33BC, 0x33BD, 0x33BE, 0x33BF, 0x33C0, 0x33C1, 0x33C2, 0x33C3, 0x33C4, 0x33C5, 0x33C6, 0x33C7, 0x33C8, 0x33C9, 0x33CA, + 0x33CB, 0x33CC, 0x33CD, 0x33CE, 0x33CF, 0x33D0, 0x33D1, 0x33D2, 0x33D3, 0x33D4, 0x93F6FBC0, 0x93F7FBC0, 0x33CF, 0x33D0, 0x33D1, + 0x33D2, 0x33D3, 0x33D4, 0x93FEFBC0, 0x93FFFBC0, 0x20F, 0x33F9, 0x33FA, 0x33FB, 0x33FC, 0x33FD, 0x33FE, 0x33FF, 0x3400, 0x3401, + 0x3402, 0x3403, 0x3404, 0x3405, 0x3406, 0x3407, 0x3408, 0x3409, 0x340A, 0x340B, 0x340C, 0x340D, 0x340E, 0x340F, 0x3410, + 0x3411, 0x3412, 0x3413, 0x3414, 0x3415, 0x3416, 0x3417, 0x3418, 0x3419, 0x341A, 0x341B, 0x341C, 0x341D, 0x341E, 0x341F, + 0x3420, 0x3421, 0x3422, 0x3423, 0x3424, 0x3425, 0x3426, 0x3427, 0x3428, 0x3429, 0x342A, 0x342B, 0x342C, 0x342D, 0x342E, + 0x342F, 0x3430, 0x3431, 0x3432, 0x3433, 0x3434, 0x3435, 0x3436, 0x3437, 0x3438, 0x3439, 0x343A, 0x343B, 0x343C, 0x343D, + 0x343E, 0x343F, 0x3440, 0x3441, 0x3442, 0x3443, 0x3444, 0x3445, 0x3446, 0x3447, 0x3448, 0x3449, 0x344A, 0x344B, 0x344C, + 0x344D, 0x344E, 0x344F, 0x3450, 0x3451, 0x3452, 0x3453, 0x3454, 0x3455, 0x3456, 0x3457, 0x3458, 0x3459, 0x345A, 0x345B, + 0x345C, 0x345D, 0x345E, 0x345F, 0x3460, 0x3461, 0x3462, 0x3463, 0x3464, 0x3465, 0x3466, 0x3467, 0x3468, 0x3469, 0x346A, + 0x346B, 0x346C, 0x346D, 0x346E, 0x346F, 0x3470, 0x3471, 0x3472, 0x3473, 0x3474, 0x3475, 0x3476, 0x3477, 0x3478, 0x3479, + 0x347A, 0x347B, 0x347C, 0x347D, 0x347E, 0x347F, 0x3480, 0x3481, 0x3482, 0x3483, 0x3484, 0x3485, 0x3486, 0x3487, 0x3488, + 0x3489, 0x348A, 0x348B, 0x348C, 0x348D, 0x348E, 0x348F, 0x3490, 0x3491, 0x3492, 0x3493, 0x3494, 0x3495, 0x3496, 0x3497, + 0x3498, 0x3499, 0x349A, 0x349B, 0x349C, 0x349D, 0x349E, 0x349F, 0x34A0, 0x34A1, 0x34A2, 0x34A3, 0x34A4, 0x34A5, 0x34A6, + 0x34A7, 0x34A8, 0x34A9, 0x34AA, 0x34AB, 0x34AC, 0x34AD, 0x34AE, 0x34AF, 0x34B0, 0x34B1, 0x34B2, 0x34B3, 0x34B4, 0x34B5, + 0x34B6, 0x34B7, 0x34B8, 0x34B9, 0x34BA, 0x34BB, 0x34BC, 0x34BD, 0x34BE, 0x34BF, 0x34C0, 0x34C1, 0x34C2, 0x34C3, 0x34C4, + 0x34C5, 0x34C6, 0x34C7, 0x34C8, 0x34C9, 0x34CA, 0x34CB, 0x34CC, 0x34CD, 0x34CE, 0x34CF, 0x34D0, 0x34D1, 0x34D2, 0x34D3, + 0x34D4, 0x34D5, 0x34D6, 0x34D7, 0x34D8, 0x34D9, 0x34DA, 0x34DB, 0x34DC, 0x34DD, 0x34DE, 0x34DF, 0x34E0, 0x34E1, 0x34E2, + 0x34E3, 0x34E4, 0x34E5, 0x34E6, 0x34E7, 0x34E8, 0x34E9, 0x34EA, 0x34EB, 0x34EC, 0x34ED, 0x34EE, 0x34EF, 0x34F0, 0x34F1, + 0x34F2, 0x34F3, 0x34F4, 0x34F5, 0x34F6, 0x34F7, 0x34F8, 0x34F9, 0x34FA, 0x34FB, 0x34FC, 0x34FD, 0x34FE, 0x34FF, 0x3500, + 0x3501, 0x3502, 0x3503, 0x3504, 0x3505, 0x3506, 0x3507, 0x3508, 0x3509, 0x350A, 0x350B, 0x350C, 0x350D, 0x350E, 0x350F, + 0x3510, 0x3511, 0x3512, 0x3513, 0x3514, 0x3515, 0x3516, 0x3517, 0x3518, 0x3519, 0x351A, 0x351B, 0x351C, 0x351D, 0x351E, + 0x351F, 0x3520, 0x3521, 0x3522, 0x3523, 0x3524, 0x3525, 0x3526, 0x3527, 0x3528, 0x3529, 0x352A, 0x352B, 0x352C, 0x352D, + 0x352E, 0x352F, 0x3530, 0x3531, 0x3532, 0x3533, 0x3534, 0x3535, 0x3536, 0x3537, 0x3538, 0x3539, 0x353A, 0x353B, 0x353C, + 0x353D, 0x353E, 0x353F, 0x3540, 0x3541, 0x3542, 0x3543, 0x3544, 0x3545, 0x3546, 0x3547, 0x3548, 0x3549, 0x354A, 0x354B, + 0x354C, 0x354D, 0x354E, 0x354F, 0x3550, 0x3551, 0x3552, 0x3553, 0x3554, 0x3555, 0x3556, 0x3557, 0x3558, 0x3559, 0x355A, + 0x355B, 0x355C, 0x355D, 0x355E, 0x355F, 0x3560, 0x3561, 0x3562, 0x3563, 0x3564, 0x3565, 0x3566, 0x3567, 0x3568, 0x3569, + 0x356A, 0x356B, 0x356C, 0x356D, 0x356E, 0x356F, 0x3570, 0x3571, 0x3572, 0x3573, 0x35A6, 0x3574, 0x3576, 0x3577, 0x3578, + 0x3579, 0x357A, 0x357B, 0x357C, 0x357D, 0x357E, 0x357F, 0x3580, 0x3581, 0x3582, 0x3583, 0x3584, 0x3585, 0x3587, 0x3588, + 0x3589, 0x358A, 0x358B, 0x358C, 0x358D, 0x358E, 0x3595, 0x3596, 0x3597, 0x3598, 0x3599, 0x359A, 0x359B, 0x359C, 0x359D, + 0x359E, 0x359F, 0x35A0, 0x35A1, 0x35A2, 0x35A3, 0x35A4, 0x35A5, 0x35A7, 0x35A8, 0x35A9, 0x35AA, 0x35AB, 0x35AC, 0x35AD, + 0x35AE, 0x35AF, 0x35B0, 0x35B1, 0x35B2, 0x35B3, 0x35B4, 0x35B5, 0x35B6, 0x35B7, 0x35B8, 0x35B9, 0x35BA, 0x35BB, 0x35BC, + 0x35BD, 0x35BE, 0x35BF, 0x35C0, 0x35C1, 0x35C2, 0x35C3, 0x35C4, 0x35C5, 0x35C6, 0x35C7, 0x35C8, 0x35C9, 0x35CA, 0x35CB, + 0x35CC, 0x35CD, 0x35CE, 0x35CF, 0x35D0, 0x35D1, 0x35D2, 0x35D3, 0x35D4, 0x35D5, 0x35D6, 0x35D7, 0x35D8, 0x35D9, 0x35DA, + 0x35DB, 0x35DC, 0x35DD, 0x35DE, 0x35DF, 0x35E0, 0x35E1, 0x35E2, 0x35E3, 0x35E4, 0x35E5, 0x35E6, 0x35E7, 0x35E8, 0x35E9, + 0x35EA, 0x35EB, 0x35EC, 0x35ED, 0x35EE, 0x35EF, 0x35F0, 0x35F1, 0x35F2, 0x35F3, 0x35F4, 0x35F5, 0x35F6, 0x35F7, 0x35F8, + 0x35F9, 0x35FA, 0x35FB, 0x35FC, 0x35FD, 0x35FE, 0x35FF, 0x3600, 0x3601, 0x3602, 0x3603, 0x3604, 0x3605, 0x3606, 0x3607, + 0x3608, 0x3609, 0x360A, 0x360B, 0x360C, 0x360D, 0x360E, 0x360F, 0x3610, 0x3611, 0x3612, 0x3613, 0x3614, 0x3615, 0x3616, + 0x3617, 0x3618, 0x3619, 0x361A, 0x361B, 0x361C, 0x361D, 0x361E, 0x361F, 0x3620, 0x3621, 0x3622, 0x3623, 0x3624, 0x3625, + 0x3626, 0x3627, 0x3628, 0x3629, 0x362A, 0x362B, 0x362C, 0x362D, 0x362E, 0x362F, 0x3630, 0x3631, 0x3632, 0x3633, 0x3634, + 0x3635, 0x3636, 0x3637, 0x3638, 0x3639, 0x363A, 0x363B, 0x363C, 0x363D, 0x363E, 0x363F, 0x3640, 0x3641, 0x3642, 0x3643, + 0x3644, 0x3645, 0x3646, 0x3647, 0x3648, 0x3649, 0x364A, 0x364B, 0x364C, 0x364D, 0x364E, 0x364F, 0x3650, 0x3651, 0x3652, + 0x3653, 0x3654, 0x3655, 0x3656, 0x3657, 0x3658, 0x3659, 0x365A, 0x365B, 0x365C, 0x365D, 0x365E, 0x365F, 0x3660, 0x3661, + 0x3662, 0x3663, 0x3664, 0x3665, 0x3666, 0x3667, 0x3668, 0x3669, 0x366A, 0x366B, 0x366C, 0x41E, 0x27F, 0x3575, 0x3586, + 0x358F, 0x3590, 0x3591, 0x3592, 0x3593, 0x3594, 0x366D, 0x366E, 0x366F, 0x3670, 0x3671, 0x3672, 0x3673, 0x3674, 0x3675, + 0x209, 0x36BC, 0x36BD, 0x36BE, 0x36BF, 0x36C0, 0x36C1, 0x36C2, 0x36C3, 0x36C4, 0x36C5, 0x36C6, 0x36C7, 0x36C8, 0x36C9, + 0x36CA, 0x36CB, 0x36CC, 0x36CD, 0x36CE, 0x36CF, 0x36D0, 0x36D1, 0x36D2, 0x36D3, 0x36D4, 0x36D5, 0x321, 0x322, 0x969DFBC0, + 0x969EFBC0, 0x969FFBC0, 0x36D6, 0x36D6, 0x36D7, 0x36FB, 0x36D7, 0x36D7, 0x36D8, 0x36D8, 0x36D9, 0x36D9, 0x36F7, 0x36F9, 0x36D9, + 0x36D9, 0x36D9, 0x36DB, 0x36DC, 0x36DD, 0x36DE, 0x36DE, 0x36DE, 0x36DE, 0x36DE, 0x36E0, 0x36FE, 0x36E1, 0x36E2, 0x36E2, + 0x36E2, 0x36E2, 0x36E3, 0x36E3, 0x36E3, 0x36E4, 0x36E4, 0x36E6, 0x36E6, 0x36E7, 0x36E7, 0x36E8, 0x36E9, 0x36EA, 0x36EB, + 0x36EB, 0x36EB, 0x36EB, 0x36EB, 0x36ED, 0x36ED, 0x36ED, 0x36EE, 0x36EE, 0x36EE, 0x36E9, 0x36EF, 0x36F1, 0x36F1, 0x36F1, + 0x36F2, 0x36F2, 0x36F3, 0x36F3, 0x36F4, 0x36F5, 0x36FC, 0x3700, 0x3701, 0x36FD, 0x36FF, 0x3702, 0x3703, 0x3703, 0x3703, + 0x36E1, 0x36EB, 0x25C, 0x25D, 0x25E, 0x36F236E7, 0x36F136F1, 0x36D836D8, 0x36DF, 0x36EC, 0x36F6, 0x36DA, 0x36E5, 0x36F0, 0x36F8, + 0x36FA, 0x96F9FBC0, 0x96FAFBC0, 0x96FBFBC0, 0x96FCFBC0, 0x96FDFBC0, 0x96FEFBC0, 0x96FFFBC0, 0x2F66, 0x2F67, 0x2F68, 0x2F69, 0x2F6A, 0x2F6B, 0x2F6C, + 0x2F6D, 0x2F6E, 0x2F6F, 0x2F70, 0x2F71, 0x2F72, 0x970DFBC0, 0x2F73, 0x2F74, 0x2F75, 0x2F76, 0x2F77, 0x2F78, 0x2F79, 0x9715FBC0, + 0x9716FBC0, 0x9717FBC0, 0x9718FBC0, 0x9719FBC0, 0x971AFBC0, 0x971BFBC0, 0x971CFBC0, 0x971DFBC0, 0x971EFBC0, 0x971FFBC0, 0x2F7A, 0x2F7B, 0x2F7C, 0x2F7D, 0x2F7E, + 0x2F7F, 0x2F80, 0x2F81, 0x2F82, 0x2F83, 0x2F84, 0x2F85, 0x2F86, 0x2F87, 0x2F88, 0x2F89, 0x2F8A, 0x2F8B, 0x2F8C, 0x2F8D, + 0x2F8E, 0x296, 0x297, 0x9737FBC0, 0x9738FBC0, 0x9739FBC0, 0x973AFBC0, 0x973BFBC0, 0x973CFBC0, 0x973DFBC0, 0x973EFBC0, 0x973FFBC0, 0x2F8F, 0x2F90, 0x2F91, + 0x2F92, 0x2F93, 0x2F94, 0x2F95, 0x2F96, 0x2F97, 0x2F98, 0x2F99, 0x2F9A, 0x2F9B, 0x2F9C, 0x2F9D, 0x2F9E, 0x2F9F, 0x2FA0, + 0x2FA1, 0x2FA2, 0x9754FBC0, 0x9755FBC0, 0x9756FBC0, 0x9757FBC0, 0x9758FBC0, 0x9759FBC0, 0x975AFBC0, 0x975BFBC0, 0x975CFBC0, 0x975DFBC0, 0x975EFBC0, 0x975FFBC0, 0x2FA3, + 0x2FA4, 0x2FA5, 0x2FA6, 0x2FA7, 0x2FA8, 0x2FA9, 0x2FAA, 0x2FAB, 0x2FAC, 0x2FAD, 0x2FAE, 0x2FAF, 0x976DFBC0, 0x2FB0, 0x2FB1, + 0x2FB2, 0x9771FBC0, 0x2FB3, 0x2FB4, 0x9774FBC0, 0x9775FBC0, 0x9776FBC0, 0x9777FBC0, 0x9778FBC0, 0x9779FBC0, 0x977AFBC0, 0x977BFBC0, 0x977CFBC0, 0x977DFBC0, 0x977EFBC0, + 0x977FFBC0, 0x3116, 0x3117, 0x3118, 0x3119, 0x311A, 0x311B, 0x311C, 0x311D, 0x311E, 0x311F, 0x3120, 0x3121, 0x3122, 0x3123, + 0x3124, 0x3125, 0x3126, 0x3127, 0x3128, 0x3129, 0x312A, 0x312B, 0x312C, 0x312D, 0x312E, 0x312F, 0x3130, 0x3131, 0x3132, + 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3138, 0x313A, 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141, 0x3142, + 0x3143, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x0, 0x0, 0x314B, 0x314C, 0x314D, 0x314E, 0x314F, + 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158, 0x3159, 0x315A, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x315B, 0x0, 0x29B, 0x29C, 0x259, 0x1BFD, 0x412, + 0x413, 0x414, 0x1C1E, 0x3139, 0x0, 0x97DEFBC0, 0x97DFFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, + 0x1C45, 0x1C46, 0x97EAFBC0, 0x97EBFBC0, 0x97ECFBC0, 0x97EDFBC0, 0x97EEFBC0, 0x97EFFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0x97FAFBC0, 0x97FBFBC0, 0x97FCFBC0, 0x97FDFBC0, 0x97FEFBC0, 0x97FFFBC0, 0x3D9, 0x278, 0x22C, 0x27D, 0x256, 0x257, + 0x211, 0x212, 0x22D, 0x27E, 0x0, 0x0, 0x0, 0x0, 0x0, 0x980FFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x981AFBC0, 0x981BFBC0, 0x981CFBC0, 0x981DFBC0, 0x981EFBC0, 0x981FFBC0, 0x32E0, 0x32E2, 0x32E5, 0x32EB, + 0x32ED, 0x32F0, 0x32F2, 0x32F5, 0x32F6, 0x32F7, 0x32FC, 0x32FE, 0x3301, 0x3303, 0x3308, 0x330A, 0x330B, 0x330C, 0x3313, + 0x3316, 0x3319, 0x331E, 0x3322, 0x3325, 0x3327, 0x3329, 0x332C, 0x3331, 0x3332, 0x3335, 0x3339, 0x333C, 0x333D, 0x333E, + 0x333F, 0x32DF, 0x32E3, 0x32E6, 0x32EC, 0x32EE, 0x32F1, 0x32F3, 0x32F8, 0x32FD, 0x32FF, 0x3302, 0x3304, 0x3309, 0x3314, + 0x3317, 0x331A, 0x331F, 0x3333, 0x3323, 0x3328, 0x332D, 0x3337, 0x333A, 0x3340, 0x3341, 0x331C, 0x32E4, 0x32E7, 0x32EA, + 0x32F4, 0x32EF, 0x32F9, 0x332E, 0x3305, 0x3307, 0x3300, 0x330D, 0x3315, 0x3318, 0x3320, 0x332A, 0x3338, 0x333B, 0x3334, + 0x3336, 0x3342, 0x331B, 0x3324, 0x32E8, 0x332F, 0x3326, 0x332B, 0x3321, 0x9878FBC0, 0x9879FBC0, 0x987AFBC0, 0x987BFBC0, 0x987CFBC0, 0x987DFBC0, + 0x987EFBC0, 0x987FFBC0, 0x32D8, 0x32D9, 0x32DA, 0x32DB, 0x32DC, 0x32DD, 0x32DE, 0x32E1, 0x32E9, 0x3330, 0x32FA, 0x331D, 0x3343, + 0x3345, 0x3346, 0x3348, 0x3349, 0x334C, 0x334E, 0x334F, 0x3351, 0x3353, 0x3355, 0x3356, 0x334A, 0x3354, 0x3306, 0x32FB, + 0x330E, 0x330F, 0x3344, 0x3347, 0x334B, 0x334D, 0x3310, 0x3352, 0x3311, 0x3312, 0x3357, 0x3358, 0x3350, 0x335A, 0x3359, + 0x98ABFBC0, 0x98ACFBC0, 0x98ADFBC0, 0x98AEFBC0, 0x98AFFBC0, 0x3676, 0x3677, 0x3678, 0x3679, 0x367A, 0x367B, 0x367C, 0x367D, 0x367E, 0x367F, + 0x3680, 0x3681, 0x3682, 0x3683, 0x3684, 0x3685, 0x3686, 0x3687, 0x3688, 0x3689, 0x368A, 0x368B, 0x368C, 0x368D, 0x368E, + 0x368F, 0x3690, 0x3691, 0x3692, 0x3693, 0x3694, 0x3695, 0x3696, 0x3697, 0x3698, 0x3699, 0x369A, 0x369B, 0x369C, 0x369D, + 0x369E, 0x369F, 0x36A0, 0x36A1, 0x36A2, 0x36A3, 0x36A4, 0x36A5, 0x36A6, 0x36A7, 0x36A8, 0x36A9, 0x36AA, 0x36AB, 0x36AC, + 0x36AD, 0x36AE, 0x36AF, 0x36B0, 0x36B1, 0x36B2, 0x36B3, 0x36B4, 0x36B5, 0x36B6, 0x36B7, 0x36B8, 0x36B9, 0x36BA, 0x36BB, + 0x98F6FBC0, 0x98F7FBC0, 0x98F8FBC0, 0x98F9FBC0, 0x98FAFBC0, 0x98FBFBC0, 0x98FCFBC0, 0x98FDFBC0, 0x98FEFBC0, 0x98FFFBC0, 0x2F34, 0x2F35, 0x2F36, 0x2F37, 0x2F38, + 0x2F39, 0x2F3A, 0x2F3B, 0x2F3C, 0x2F3D, 0x2F3E, 0x2F3F, 0x2F40, 0x2F41, 0x2F42, 0x2F43, 0x2F44, 0x2F45, 0x2F46, 0x2F47, + 0x2F48, 0x2F49, 0x2F4A, 0x2F4B, 0x2F4C, 0x2F4D, 0x2F4E, 0x2F4F, 0x2F50, 0x2F5A2F3C, 0x2F5B2F3F, 0x991FFBC0, 0x2F51, 0x2F52, 0x2F53, + 0x2F54, 0x2F55, 0x2F56, 0x2F57, 0x2F58, 0x2F59, 0x2F5A, 0x2F5B, 0x2F5C, 0x992CFBC0, 0x992DFBC0, 0x992EFBC0, 0x992FFBC0, 0x2F5D, 0x2F5E, + 0x2F5F, 0x2F60, 0x2F61, 0x2F62, 0x2F63, 0x2F64, 0x2F65, 0x0, 0x0, 0x0, 0x993CFBC0, 0x993DFBC0, 0x993EFBC0, 0x993FFBC0, 0x54A, + 0x9941FBC0, 0x9942FBC0, 0x9943FBC0, 0x264, 0x26D, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x315C, 0x315D, 0x315E, 0x315F, 0x3160, 0x3161, 0x3162, 0x3163, 0x3164, 0x3165, 0x3166, 0x3167, 0x3168, 0x3169, 0x316A, + 0x316B, 0x316C, 0x316D, 0x316E, 0x316F, 0x3170, 0x3171, 0x3172, 0x3173, 0x3174, 0x3175, 0x3176, 0x3177, 0x3178, 0x3179, + 0x996EFBC0, 0x996FFBC0, 0x317A, 0x317B, 0x317C, 0x317D, 0x317E, 0x9975FBC0, 0x9976FBC0, 0x9977FBC0, 0x9978FBC0, 0x9979FBC0, 0x997AFBC0, 0x997BFBC0, 0x997CFBC0, + 0x997DFBC0, 0x997EFBC0, 0x997FFBC0, 0x317F, 0x3180, 0x3181, 0x3182, 0x3183, 0x3184, 0x3185, 0x3186, 0x3187, 0x3188, 0x3189, 0x318A, + 0x318B, 0x318C, 0x318D, 0x318E, 0x318F, 0x3190, 0x3191, 0x3192, 0x3193, 0x3194, 0x3195, 0x3196, 0x3197, 0x3198, 0x3199, + 0x319A, 0x319B, 0x319C, 0x319D, 0x319E, 0x319F, 0x31A0, 0x31A1, 0x31A2, 0x31A3, 0x31A4, 0x31A5, 0x31A6, 0x31A7, 0x31A8, + 0x31A9, 0x31AA, 0x99ACFBC0, 0x99ADFBC0, 0x99AEFBC0, 0x99AFFBC0, 0x31AB, 0x31AC, 0x31AD, 0x31AE, 0x31AF, 0x31B0, 0x31B1, 0x31B2, 0x31B3, + 0x31B4, 0x31B5, 0x31B6, 0x31B7, 0x31B8, 0x31B9, 0x31BA, 0x31BB, 0x31BC, 0x31BD, 0x31BE, 0x31BF, 0x31C0, 0x31C1, 0x31C2, + 0x31C3, 0x31C4, 0x99CAFBC0, 0x99CBFBC0, 0x99CCFBC0, 0x99CDFBC0, 0x99CEFBC0, 0x99CFFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0x1C3E, 0x99DBFBC0, 0x99DCFBC0, 0x99DDFBC0, 0x31B1319B, 0x31BC31B1319B, 0x551, 0x552, 0x553, 0x554, 0x555, 0x556, + 0x557, 0x558, 0x559, 0x55A, 0x55B, 0x55C, 0x55D, 0x55E, 0x55F, 0x560, 0x561, 0x562, 0x563, 0x564, 0x565, + 0x566, 0x567, 0x568, 0x569, 0x56A, 0x56B, 0x56C, 0x56D, 0x56E, 0x56F, 0x570, 0x2FB5, 0x2FB6, 0x2FB7, 0x2FB8, + 0x2FB9, 0x2FBA, 0x2FBB, 0x2FBC, 0x2FBD, 0x2FBE, 0x2FBF, 0x2FC0, 0x2FC1, 0x2FC2, 0x2FC3, 0x2FC4, 0x2FC5, 0x2FC6, 0x2FC7, + 0x2FC8, 0x2FC9, 0x2FCA, 0x2FCB, 0x2FCC, 0x2FCD, 0x2FCE, 0x2FCF, 0x2FD0, 0x9A1CFBC0, 0x9A1DFBC0, 0x2C9, 0x2CA, 0x31C5, 0x31C6, + 0x31C7, 0x31C8, 0x31C9, 0x31CA, 0x31CB, 0x31CC, 0x31CD, 0x31CE, 0x31CF, 0x31D0, 0x31D1, 0x31D2, 0x31D3, 0x31D4, 0x31D5, + 0x31D6, 0x31D7, 0x31D8, 0x31D9, 0x31DA, 0x31DB, 0x31DC, 0x31DD, 0x31DE, 0x31DF, 0x31E0, 0x31E1, 0x31E2, 0x31E3, 0x31E4, + 0x31E5, 0x31E6, 0x31E7, 0x31E8, 0x31E9, 0x31EA, 0x31EB, 0x31EC, 0x31ED, 0x31EE, 0x31EF, 0x31F0, 0x31F1, 0x31FA, 0x31FB, + 0x31FC, 0x31FD, 0x31FE, 0x31FF, 0x31F2, 0x31EB321131EB, 0x31F4, 0x31F5, 0x31F6, 0x31CB, 0x31CB, 0x31E0, 0x31E0, 0x31F7, 0x31F8, + 0x31F9, 0x9A5FFBC0, 0x3211, 0x3200, 0x3202, 0x3203, 0x3203, 0x3204, 0x3205, 0x3206, 0x3207, 0x3208, 0x3209, 0x31F3, 0x3201, + 0x3210, 0x320A, 0x320B, 0x320D, 0x320E, 0x320F, 0x320C, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x9A7DFBC0, 0x9A7EFBC0, 0x0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x9A8AFBC0, + 0x9A8BFBC0, 0x9A8CFBC0, 0x9A8DFBC0, 0x9A8EFBC0, 0x9A8FFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x9A9AFBC0, 0x9A9BFBC0, 0x9A9CFBC0, 0x9A9DFBC0, 0x9A9EFBC0, 0x9A9FFBC0, 0x415, 0x416, 0x417, 0x418, 0x419, 0x41A, 0x41B, 0x1BFE, 0x29D, + 0x29E, 0x29F, 0x2A0, 0x41C, 0x41D, 0x9AAEFBC0, 0x9AAFFBC0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9ABFFBC0, 0x9AC0FBC0, 0x9AC1FBC0, 0x9AC2FBC0, 0x9AC3FBC0, 0x9AC4FBC0, 0x9AC5FBC0, 0x9AC6FBC0, + 0x9AC7FBC0, 0x9AC8FBC0, 0x9AC9FBC0, 0x9ACAFBC0, 0x9ACBFBC0, 0x9ACCFBC0, 0x9ACDFBC0, 0x9ACEFBC0, 0x9ACFFBC0, 0x9AD0FBC0, 0x9AD1FBC0, 0x9AD2FBC0, 0x9AD3FBC0, 0x9AD4FBC0, 0x9AD5FBC0, + 0x9AD6FBC0, 0x9AD7FBC0, 0x9AD8FBC0, 0x9AD9FBC0, 0x9ADAFBC0, 0x9ADBFBC0, 0x9ADCFBC0, 0x9ADDFBC0, 0x9ADEFBC0, 0x9ADFFBC0, 0x9AE0FBC0, 0x9AE1FBC0, 0x9AE2FBC0, 0x9AE3FBC0, 0x9AE4FBC0, + 0x9AE5FBC0, 0x9AE6FBC0, 0x9AE7FBC0, 0x9AE8FBC0, 0x9AE9FBC0, 0x9AEAFBC0, 0x9AEBFBC0, 0x9AECFBC0, 0x9AEDFBC0, 0x9AEEFBC0, 0x9AEFFBC0, 0x9AF0FBC0, 0x9AF1FBC0, 0x9AF2FBC0, 0x9AF3FBC0, + 0x9AF4FBC0, 0x9AF5FBC0, 0x9AF6FBC0, 0x9AF7FBC0, 0x9AF8FBC0, 0x9AF9FBC0, 0x9AFAFBC0, 0x9AFBFBC0, 0x9AFCFBC0, 0x9AFDFBC0, 0x9AFEFBC0, 0x9AFFFBC0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x3257, 0x3258, 0x3259, 0x325A, 0x325B, 0x325C, 0x325D, 0x325E, 0x325F, 0x3260, 0x3261, 0x3262, 0x3263, + 0x3264, 0x3265, 0x3268, 0x3269, 0x326A, 0x326B, 0x326C, 0x326D, 0x326E, 0x326F, 0x3270, 0x3271, 0x3272, 0x3273, 0x3274, + 0x3275, 0x3276, 0x3278, 0x3279, 0x327A, 0x327B, 0x327C, 0x327E, 0x327F, 0x3280, 0x3281, 0x3282, 0x3283, 0x3284, 0x3285, + 0x3287, 0x3288, 0x3289, 0x328C, 0x0, 0x328D, 0x328E, 0x328F, 0x3290, 0x3291, 0x3292, 0x3293, 0x3294, 0x3295, 0x3296, + 0x3297, 0x3298, 0x3299, 0x329A, 0x329B, 0x329C, 0x3266, 0x3267, 0x3277, 0x327D, 0x3286, 0x328A, 0x328B, 0x9B4CFBC0, 0x9B4DFBC0, + 0x9B4EFBC0, 0x9B4FFBC0, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x2CB, 0x2CC, 0x280, + 0x25A, 0x2A1, 0x2A2, 0x210, 0x571, 0x572, 0x573, 0x574, 0x575, 0x576, 0x577, 0x578, 0x579, 0x57A, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x57B, 0x57C, 0x57D, 0x57E, 0x57F, 0x580, 0x581, + 0x582, 0x583, 0x9B7DFBC0, 0x9B7EFBC0, 0x9B7FFBC0, 0x0, 0x0, 0x0, 0x2C98, 0x2C99, 0x2C9A, 0x2C9B, 0x2C9C, 0x2C9D, 0x2C9E, + 0x2C9F, 0x2CA1, 0x2CA2, 0x2CA3, 0x2CA4, 0x2CA5, 0x2CA6, 0x2CA7, 0x2CA8, 0x2CA9, 0x2CAA, 0x2CAB, 0x2CAC, 0x2CAD, 0x2CAE, + 0x2CB0, 0x2CB2, 0x2CB4, 0x2CB7, 0x2CBA, 0x2CBC, 0x2CBD, 0x2CBF, 0x2CB3, 0x2CB5, 0x2CB8, 0x2CC0, 0x2CC1, 0x2CC2, 0x2CC3, + 0x2CC4, 0x2CC5, 0x2CC6, 0x2CC7, 0x2CB1, 0x2CBB, 0x2CA0, 0x2CBE, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0x2C98, 0x2CB6, 0x2CB9, 0x2CAF, 0x2C9F, 0x2CB0, 0x2FD1, 0x2FD1, 0x2FD2, 0x2FD2, 0x2FD2, 0x2FD3, + 0x2FD3, 0x2FD4, 0x2FD4, 0x2FD5, 0x2FD5, 0x2FD6, 0x2FD6, 0x2FD6, 0x2FD7, 0x2FD7, 0x2FD8, 0x2FD9, 0x2FDA, 0x2FDA, 0x2FDB, + 0x2FDB, 0x2FDC, 0x2FDC, 0x2FDD, 0x2FDD, 0x2FDD, 0x2FDE, 0x2FDE, 0x2FDF, 0x2FE0, 0x2FE0, 0x2FE1, 0x2FE2, 0x2FE3, 0x2FE4, + 0x2FE5, 0x2FE6, 0x0, 0x2FE7, 0x2FE7, 0x2FE8, 0x2FE9, 0x2FE9, 0x2FEA, 0x2FEA, 0x2FEB, 0x2FEB, 0x2FEC, 0x2FED, 0x2FEE, + 0x2FEF, 0x9BF4FBC0, 0x9BF5FBC0, 0x9BF6FBC0, 0x9BF7FBC0, 0x9BF8FBC0, 0x9BF9FBC0, 0x9BFAFBC0, 0x9BFBFBC0, 0x42A, 0x42B, 0x42C, 0x42D, 0x2EC6, 0x2EC7, + 0x2EC8, 0x2EC9, 0x2ECA, 0x2ECB, 0x2ECC, 0x2ECD, 0x2ECE, 0x2ECF, 0x2ED3, 0x2ED4, 0x2ED5, 0x2ED6, 0x2ED7, 0x2ED8, 0x2ED9, + 0x2EDA, 0x2EDB, 0x2EDC, 0x2EDD, 0x2EDE, 0x2EDF, 0x2EE0, 0x2EE1, 0x2EE2, 0x2EE3, 0x2EE5, 0x2EE7, 0x2EE8, 0x2EE9, 0x2EEA, + 0x2EEB, 0x2EEC, 0x2EED, 0x2EEE, 0x2EE4, 0x2EE6, 0x2EF0, 0x2EF1, 0x2EF2, 0x2EF3, 0x2EF4, 0x2EF5, 0x2EF6, 0x2EF7, 0x2EF8, + 0x2EF9, 0x2EFA, 0x2EFB, 0x2EFC, 0x2EFD, 0x2EFE, 0x2EFF, 0x2EEF, 0x0, 0x9C38FBC0, 0x9C39FBC0, 0x9C3AFBC0, 0x292, 0x293, 0x40B, + 0x40C, 0x40D, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x9C4AFBC0, 0x9C4BFBC0, 0x9C4CFBC0, + 0x2ED0, 0x2ED1, 0x2ED2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x335B, 0x335C, + 0x335D, 0x335E, 0x335F, 0x3360, 0x3361, 0x3362, 0x3363, 0x3364, 0x3365, 0x3366, 0x3367, 0x3368, 0x3369, 0x336A, 0x336B, + 0x336C, 0x336D, 0x336E, 0x336F, 0x3370, 0x3371, 0x3372, 0x3373, 0x3374, 0x3375, 0x3376, 0x3377, 0x3378, 0x3379, 0x337A, + 0x337B, 0x337C, 0x337D, 0x337E, 0x2C2, 0x2C3, 0x2032, 0x204A, 0x20E7, 0x2105, 0x210E, 0x210E, 0x218F, 0x21A0, 0x2129, + 0x9C89FBC0, 0x9C8AFBC0, 0x9C8BFBC0, 0x9C8CFBC0, 0x9C8DFBC0, 0x9C8EFBC0, 0x9C8FFBC0, 0x9C90FBC0, 0x9C91FBC0, 0x9C92FBC0, 0x9C93FBC0, 0x9C94FBC0, 0x9C95FBC0, 0x9C96FBC0, 0x9C97FBC0, + 0x9C98FBC0, 0x9C99FBC0, 0x9C9AFBC0, 0x9C9BFBC0, 0x9C9CFBC0, 0x9C9DFBC0, 0x9C9EFBC0, 0x9C9FFBC0, 0x9CA0FBC0, 0x9CA1FBC0, 0x9CA2FBC0, 0x9CA3FBC0, 0x9CA4FBC0, 0x9CA5FBC0, 0x9CA6FBC0, + 0x9CA7FBC0, 0x9CA8FBC0, 0x9CA9FBC0, 0x9CAAFBC0, 0x9CABFBC0, 0x9CACFBC0, 0x9CADFBC0, 0x9CAEFBC0, 0x9CAFFBC0, 0x9CB0FBC0, 0x9CB1FBC0, 0x9CB2FBC0, 0x9CB3FBC0, 0x9CB4FBC0, 0x9CB5FBC0, + 0x9CB6FBC0, 0x9CB7FBC0, 0x9CB8FBC0, 0x9CB9FBC0, 0x9CBAFBC0, 0x9CBBFBC0, 0x9CBCFBC0, 0x9CBDFBC0, 0x9CBEFBC0, 0x9CBFFBC0, 0x41F, 0x420, 0x421, 0x422, 0x423, + 0x424, 0x425, 0x426, 0x9CC8FBC0, 0x9CC9FBC0, 0x9CCAFBC0, 0x9CCBFBC0, 0x9CCCFBC0, 0x9CCDFBC0, 0x9CCEFBC0, 0x9CCFFBC0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x26A1, 0x26A1, 0x26A1, 0x26A1, 0x0, 0x26A1, 0x26A1, 0x26A1, 0x26A1, + 0x0, 0x0, 0x0, 0x26A2, 0x26A3, 0x9CF7FBC0, 0x0, 0x0, 0x9CFAFBC0, 0x9CFBFBC0, 0x9CFCFBC0, 0x9CFDFBC0, 0x9CFEFBC0, 0x9CFFFBC0, 0x1C4B, + 0x1C4E, 0x1C4F, 0x1C6D, 0x1C7E, 0x1C93, 0x1C94, 0x1CAE, 0x1CD4, 0x1D40, 0x1D54, 0x1D69, 0x1D80, 0x1DAE, 0x1DC2, 0x1DE1, + 0x1DF4, 0x1DE2, 0x1DF5, 0x1DEE, 0x1DE8, 0x1E0B, 0x1DFA, 0x1DFB, 0x1E10, 0x1E3E, 0x1E48, 0x1E99, 0x1EB9, 0x1EBB, 0x1EBC, + 0x1ED9, 0x1EE7, 0x1EF9, 0x1F25, 0x1F42, 0x1F8A, 0x1F8B, 0x1FBC, 0x1FCA, 0x1FD0, 0x1FD5, 0x1FE0, 0x20B4, 0x1C47, 0x1CAA1C47, + 0x1C60, 0x1C6C, 0x1C8F, 0x1CAA, 0x1CB8, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DC1, 0x1DDD, + 0x1E07, 0x1E0C, 0x1E33, 0x1E95, 0x1EB5, 0x1EF5, 0x1C47, 0x1C51, 0x1C55, 0x1C4F, 0x1C60, 0x1C8F, 0x1CAA, 0x1CBD, 0x1CC2, + 0x1CD4, 0x1CF4, 0x1D40, 0x1D65, 0x1DAA, 0x1DD8, 0x1DDD, 0x1DF0, 0x1DFA, 0x1DFB, 0x1E0C, 0x1E95, 0x1EB5, 0x1EBB, 0x1ED4, + 0x1EE3, 0x1F8B, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FDD, 0x1FDE, 0x1D32, 0x1E33, 0x1EB5, 0x1EE3, 0x1FBA, 0x1FBB, 0x1FD4, 0x1FDD, + 0x1FDE, 0x1EBD, 0x1C6E, 0x1C95, 0x1CEC, 0x1DAF, 0x1DC3, 0x1E13, 0x1E43, 0x1E60, 0x1E76, 0x1E9F, 0x1F2A, 0x1D0E, 0x20CC, + 0x1CF4, 0x1D181E95, 0x1D45, 0x1D4B, 0x1E11, 0x1EC5, 0x1EE2, 0x1C70, 0x1C96, 0x1CED, 0x1D05, 0x1D6A, 0x1D92, 0x1DB0, 0x1DCD, + 0x1E14, 0x1E4D, 0x1E77, 0x1E87, 0x1EE9, 0x1F03, 0x1F2B, 0x1C4D, 0x1C5A, 0x1C9F, 0x1CB5, 0x1CC6, 0x1CD3, 0x1CC1, 0x1D46, + 0x1DF7, 0x1E90, 0x1EC6, 0x1F47, 0x1C5B, 0x1C7A, 0x1C89, 0x1C8F, 0x1CCF, 0x1CE5, 0x1D5D, 0x1CF8, 0x1EC8, 0x1D41, 0x1D47, + 0x1D3A, 0x1D45, 0x1D59, 0x1D93, 0x1D92, 0x1D7B, 0x1DB1, 0x1EDA, 0x1DC4, 0x1DCE, 0x1DBD, 0x1DFD, 0x1E1C, 0x1E78, 0x1E82, + 0x1EA0, 0x1EC0, 0x1EDE, 0x1EB9, 0x1EEA, 0x1EF1, 0x1F21, 0x1F30, 0x1F34, 0x1F3E, 0x1FC5, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1E33, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1F60, + 0x1C47, 0x1CAA1C47, 0x1DDD1C47, 0x1EE31C47, 0x1C7A, 0x1C8F, 0x1C8F, 0x1CF4, 0x1CFD, 0x1D65, 0x1D77, 0x1D7B, 0x1DAE, 0x1DB9, 0x1DBD, + 0x1E38, 0x1E3D, 0x1E71, 0x1E71, 0x1F21, 0x1C55, 0x1C60, 0x1C79, 0x1CBD, 0x1CE5, 0x1D8B, 0x1DDD, 0x1E0C, 0x1E82, 0x1EB5, + 0x1EF5, 0x1C47, 0x1DDD, 0x1EB5, 0x0, 0x9DF6FBC0, 0x9DF7FBC0, 0x9DF8FBC0, 0x9DF9FBC0, 0x9DFAFBC0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1C47, 0x1C47, 0x1C60, 0x1C60, 0x1C60, 0x1C60, 0x1C60, 0x1C60, 0x1C7A, 0x1C7A, 0x1C8F, 0x1C8F, 0x1C8F, 0x1C8F, 0x1C8F, + 0x1C8F, 0x1C8F, 0x1C8F, 0x1C8F, 0x1C8F, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, + 0x1CE5, 0x1CE5, 0x1CF4, 0x1CF4, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D32, + 0x1D32, 0x1D32, 0x1D32, 0x1D65, 0x1D65, 0x1D65, 0x1D65, 0x1D65, 0x1D65, 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1D77, 0x1D77, + 0x1D77, 0x1D77, 0x1DAA, 0x1DAA, 0x1DAA, 0x1DAA, 0x1DAA, 0x1DAA, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, 0x1DB9, + 0x1DB9, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1E0C, 0x1E0C, 0x1E0C, 0x1E0C, 0x1E33, 0x1E33, + 0x1E33, 0x1E33, 0x1E33, 0x1E33, 0x1E33, 0x1E33, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, 0x1E71, + 0x1E71, 0x1E95, 0x1E95, 0x1E95, 0x1E95, 0x1E95, 0x1E95, 0x1E95, 0x1E95, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, + 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EE3, 0x1EE3, 0x1EE3, 0x1EE3, 0x1EF5, 0x1EF5, 0x1EF5, 0x1EF5, 0x1EF5, 0x1EF5, 0x1EF5, + 0x1EF5, 0x1EF5, 0x1EF5, 0x1EFF, 0x1EFF, 0x1EFF, 0x1EFF, 0x1F0B, 0x1F0B, 0x1F21, 0x1F21, 0x1F21, 0x1F21, 0x1F21, 0x1F21, + 0x1D18, 0x1E95, 0x1EF5, 0x1F0B, 0x1F801C47, 0x1E71, 0x1E80, 0x1E81, 0x1E711E71, 0x1CA9, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, + 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1C47, + 0x1C47, 0x1C47, 0x1C47, 0x1C47, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, + 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1CAA, 0x1D32, 0x1D32, 0x1D32, 0x1D32, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, + 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, 0x1DDD, + 0x1DDD, 0x1DDD, 0x1DDD, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, 0x1EB5, + 0x1EB5, 0x1EB5, 0x1F0B, 0x1F0B, 0x1F0B, 0x1F0B, 0x1F0B, 0x1F0B, 0x1F0B, 0x1F0B, 0x1D771D77, 0x1D771D77, 0x1EF0, 0x1EF0, 0x1F1B, + 0x1F1B, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, + 0x1FB9, 0x1FB9, 0x1FBE, 0x1FBE, 0x1FBE, 0x1FBE, 0x1FBE, 0x1FBE, 0x9F16FBC0, 0x9F17FBC0, 0x1FBE, 0x1FBE, 0x1FBE, 0x1FBE, 0x1FBE, + 0x1FBE, 0x9F1EFBC0, 0x9F1FFBC0, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, + 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, + 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FCE, 0x1FCE, 0x1FCE, 0x1FCE, 0x1FCE, 0x1FCE, 0x9F46FBC0, 0x9F47FBC0, 0x1FCE, 0x1FCE, + 0x1FCE, 0x1FCE, 0x1FCE, 0x1FCE, 0x9F4EFBC0, 0x9F4FFBC0, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x9F58FBC0, + 0x1FDC, 0x9F5AFBC0, 0x1FDC, 0x9F5CFBC0, 0x1FDC, 0x9F5EFBC0, 0x1FDC, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, + 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FB9, 0x1FB9, 0x1FBE, 0x1FBE, 0x1FC4, 0x1FC4, 0x1FC6, + 0x1FC6, 0x1FCE, 0x1FCE, 0x1FDC, 0x1FDC, 0x1FE1, 0x1FE1, 0x9F7EFBC0, 0x9F7FFBC0, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, + 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, + 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FC4, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, + 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FE1, 0x1FB9, 0x1FB9, 0x1FB9, + 0x1FB9, 0x1FB9, 0x9FB5FBC0, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x1FB9, 0x48E, 0x1FC6, 0x48E, 0x490, 0x489, + 0x1FC4, 0x1FC4, 0x1FC4, 0x9FC5FBC0, 0x1FC4, 0x1FC4, 0x1FBE, 0x1FBE, 0x1FC4, 0x1FC4, 0x1FC4, 0x48E, 0x48E, 0x48E, 0x1FC6, + 0x1FC6, 0x1FC6, 0x1FC6, 0x9FD4FBC0, 0x9FD5FBC0, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x1FC6, 0x9FDCFBC0, 0x48F, 0x48F, 0x48F, + 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FD4, 0x1FD4, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FDC, 0x1FD4, 0x489, 0x489, + 0x482, 0x9FF0FBC0, 0x9FF1FBC0, 0x1FE1, 0x1FE1, 0x1FE1, 0x9FF5FBC0, 0x1FE1, 0x1FE1, 0x1FCE, 0x1FCE, 0x1FE1, 0x1FE1, 0x1FE1, 0x483, + 0x48F, 0x9FFFFBC0, 0x209, 0x209, 0x209, 0x209, 0x209, 0x209, 0x209, 0x209, 0x209, 0x209, 0x209, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x213, 0x213, 0x214, 0x215, 0x216, 0x217, 0x383, 0x20C, 0x306, 0x307, 0x308, 0x309, + 0x30D, 0x30E, 0x30F, 0x310, 0x39F, 0x3A0, 0x3A4, 0x3A5, 0x277, 0x2770277, 0x27702770277, 0x3A6, 0x207, 0x208, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x209, 0x39B, 0x39D, 0x3AA, 0x3AA03AA, 0x3AA03AA03AA, 0x3AB, 0x3AB03AB, 0x3AB03AB03AB, 0x3AE, 0x30A, + 0x30B, 0x3AF, 0x2600260, 0x275, 0x20A, 0x3B0, 0x3B2, 0x3B4, 0x3B5, 0x3A7, 0x626, 0x323, 0x324, 0x2660266, 0x2600266, + 0x2660260, 0x397, 0x38C, 0x3A8, 0x3A9, 0x390, 0x236, 0x3B3, 0x391, 0x622, 0x21A, 0x3B1, 0x2E8, 0x2E9, 0x3AA03AA03AA03AA, + 0x2EA, 0x2EB, 0x2EC, 0x2ED, 0x2EE, 0x2EF, 0x2F0, 0x209, 0x0, 0x0, 0x0, 0x0, 0x0, 0xA065FBC0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1C3D, 0x1D32, 0xA072FBC0, 0xA073FBC0, 0x1C41, 0x1C42, + 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x616, 0x621, 0x61B, 0x317, 0x318, 0x1DB9, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x616, 0x621, 0x61B, 0x317, 0x318, 0xA08FFBC0, 0x1C47, 0x1CAA, 0x1DDD, 0x1EFF, + 0x1CBD, 0x1D18, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1E0C, 0x1E71, 0x1E95, 0xA09DFBC0, 0xA09EFBC0, 0xA09FFBC0, 0x1C1F, 0x1C20, 0x1C21, + 0x1C22, 0x1C23, 0x1C24, 0x1C25, 0x1C26, 0x1E711E33, 0x1C27, 0x1C28, 0x1C29, 0x1C2A, 0x1C2B, 0x1C2C, 0x1C2D, 0x1C2E, 0x1C2F, + 0x1C30, 0x1C31, 0x1C32, 0x1C33, 0x1C34, 0x1C35, 0x1C36, 0x1C37, 0x1C38, 0x1C39, 0x1C3A, 0x1C3B, 0x1C3C, 0xA0BFFBC0, 0xA0C0FBC0, + 0xA0C1FBC0, 0xA0C2FBC0, 0xA0C3FBC0, 0xA0C4FBC0, 0xA0C5FBC0, 0xA0C6FBC0, 0xA0C7FBC0, 0xA0C8FBC0, 0xA0C9FBC0, 0xA0CAFBC0, 0xA0CBFBC0, 0xA0CCFBC0, 0xA0CDFBC0, 0xA0CEFBC0, 0xA0CFFBC0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xA0F1FBC0, 0xA0F2FBC0, 0xA0F3FBC0, 0xA0F4FBC0, 0xA0F5FBC0, 0xA0F6FBC0, 0xA0F7FBC0, 0xA0F8FBC0, 0xA0F9FBC0, 0xA0FAFBC0, 0xA0FBFBC0, 0xA0FCFBC0, + 0xA0FDFBC0, 0xA0FEFBC0, 0xA0FFFBC0, 0x1C7A03941C47, 0x1E7103941C47, 0x1C7A, 0x1C7A04F6, 0x586, 0x1DDD03941C7A, 0x1EB503941C7A, 0x1CC2, 0x587, 0x1CE504F6, 0x1CF4, 0x1D18, + 0x1D18, 0x1D18, 0x1D18, 0x1D18, 0x1D32, 0x1D32, 0x1D77, 0x1D77, 0x588, 0x1DB9, 0x1DDD1DB9, 0x589, 0x58A, 0x1E0C, 0x1E21, + 0x1E33, 0x1E33, 0x1E33, 0x58B, 0x58C, 0x1DAA1E71, 0x1D771CAA1E95, 0x1DAA1E95, 0x58D, 0x1F21, 0x58E, 0x1FE1, 0x58F, 0x1F21, 0x590, + 0x1D65, 0x1C47, 0x1C60, 0x1C7A, 0x591, 0x1CAA, 0x1CAA, 0x1CE5, 0x1CF2, 0x1DAA, 0x1DDD, 0x22B7, 0x22B8, 0x22B9, 0x22BA, + 0x1D32, 0x592, 0x1EFF1C471CE5, 0x1FCF, 0x1FBB, 0x1FBB, 0x1FCF, 0x615, 0x593, 0x594, 0x595, 0x596, 0x1C8F, 0x1C8F, 0x1CAA, + 0x1D32, 0x1D4C, 0x597, 0x6B1, 0x598, 0x1E7103941C47, 0x1CF2, 0x599, 0x1C4406261C3E, 0x1C4606261C3E, 0x1C3D1C3E06261C3E, 0x1C4006261C3E, 0x1C4006261C3F, 0x1C4206261C3E, 0x1C4206261C3F, + 0x1C4206261C40, 0x1C4206261C41, 0x1C4306261C3E, 0x1C4306261C42, 0x1C4506261C3E, 0x1C4506261C40, 0x1C4506261C42, 0x1C4506261C44, 0x6261C3E, 0x1D32, 0x1D321D32, 0x1D321D321D32, 0x1EE31D32, 0x1EE3, 0x1D321EE3, + 0x1D321D321EE3, 0x1D321D321D321EE3, 0x1EFF1D32, 0x1EFF, 0x1D321EFF, 0x1D321D321EFF, 0x1D77, 0x1C7A, 0x1C8F, 0x1DAA, 0x1D32, 0x1D321D32, 0x1D321D321D32, 0x1EE31D32, 0x1EE3, + 0x1D321EE3, 0x1D321D321EE3, 0x1D321D321D321EE3, 0x1EFF1D32, 0x1EFF, 0x1D321EFF, 0x1D321D321EFF, 0x1D77, 0x1C7A, 0x1C8F, 0x1DAA, 0x1AC6, 0x1AC7, 0x1AC8, 0x1C8D, + 0x1C8D, 0x1C43, 0x1AC9, 0x1ACA, 0x1ACB, 0x1C4006261C3D, 0x59A, 0x59B, 0xA18CFBC0, 0xA18DFBC0, 0xA18EFBC0, 0xA18FFBC0, 0x59C, 0x59E, 0x59D, + 0x59F, 0x5A0, 0x5A1, 0x5A2, 0x5A3, 0x5A4, 0x5A5, 0x59C, 0x59D, 0x5A6, 0x5A7, 0x5A8, 0x5A9, 0x5AA, 0x5AB, + 0x5AC, 0x5AD, 0x5AE, 0x5AF, 0x5B0, 0x5B1, 0x5B2, 0x5B3, 0x5B4, 0x5B5, 0x5B6, 0x5B7, 0x5A0, 0x5B8, 0x5B9, + 0x5BA, 0x5BB, 0x5BC, 0x5BD, 0x5BE, 0x5BF, 0x5C0, 0x5C1, 0x5C2, 0x5C3, 0x5C4, 0x5C5, 0x5C6, 0x5C7, 0x5C8, + 0x5C9, 0x5CA, 0x5CB, 0x5CC, 0x5CD, 0x5CE, 0x5CF, 0x5D0, 0x5D1, 0x5D2, 0x5D3, 0x5D4, 0x5D5, 0x5D6, 0x5DA, + 0x5D8, 0x5D6, 0x5D7, 0x5D8, 0x5D9, 0x5DA, 0x5DB, 0x5DC, 0x5DD, 0x5DE, 0x5DF, 0x5E0, 0x5E1, 0x5E2, 0x5E3, + 0x5E4, 0x5E5, 0x5E6, 0x5E7, 0x5E8, 0x5E9, 0x5EA, 0x5EB, 0x5EC, 0x5ED, 0x5EE, 0x5EF, 0x5F0, 0x5F1, 0x5F2, + 0x5F3, 0x5F4, 0x5F5, 0x5F6, 0x5F7, 0x5F8, 0x5F9, 0x5FA, 0x5FB, 0x5FC, 0x5FD, 0x5FE, 0x5FF, 0x600, 0x601, + 0x602, 0x603, 0x604, 0x605, 0x606, 0x607, 0x608, 0x609, 0x609, 0x60A, 0x60B, 0x60C, 0x60D, 0x60D, 0x60E, + 0x60F, 0x60F, 0x610, 0x612, 0x613, 0x614, 0x615, 0x621, 0x623, 0x624, 0x625, 0x627, 0x628, 0x629, 0x62A, + 0x62B, 0x62C, 0x62E, 0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0x636, 0x637, 0x637, 0x638, 0x639, + 0x63A, 0x63B, 0x63C, 0x63C063C, 0x63C063C063C, 0x63D, 0x63D063D, 0x63D063D063D, 0x63E, 0x63F, 0x640, 0x641, 0x642, 0x643, 0x644, + 0x645, 0x646, 0x647, 0x648, 0x649, 0x64A, 0x64B, 0x64C, 0x64D, 0x649, 0x64E, 0x64F, 0x64F, 0x650, 0x651, + 0x650, 0x652, 0x652, 0x653, 0x654, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65A, 0x65B, 0x65C, 0x65D, 0x65E, + 0x65F, 0x660, 0x661, 0x662, 0x663, 0x664, 0x665, 0x666, 0x667, 0x668, 0x61B, 0x669, 0x669, 0x66A, 0x66B, + 0x66C, 0x66D, 0x66E, 0x66F, 0x670, 0x671, 0x672, 0x673, 0x656, 0x61A, 0x61C, 0x66B, 0x66C, 0x674, 0x675, + 0x674, 0x675, 0x676, 0x677, 0x676, 0x677, 0x678, 0x679, 0x67A, 0x67B, 0x67C, 0x67D, 0x678, 0x679, 0x67E, + 0x67F, 0x67E, 0x67F, 0x680, 0x681, 0x680, 0x681, 0x682, 0x683, 0x684, 0x685, 0x686, 0x687, 0x688, 0x689, + 0x68A, 0x68B, 0x68C, 0x68D, 0x68E, 0x68F, 0x690, 0x691, 0x692, 0x693, 0x694, 0x695, 0x696, 0x697, 0x698, + 0x699, 0x69A, 0x69B, 0x69C, 0x69D, 0x69E, 0x69F, 0x6A0, 0x6A1, 0x6A2, 0x6A3, 0x69A, 0x6A0, 0x6A1, 0x6A3, + 0x6A4, 0x6A5, 0x6A6, 0x6A7, 0x6A8, 0x6A9, 0x6AA, 0x6AB, 0x6AC, 0x6AD, 0x6AE, 0x6AF, 0x6B0, 0x6B2, 0x6B3, + 0x6B4, 0x6B5, 0x6B6, 0x6B7, 0x6B8, 0x6B9, 0x6BA, 0x6BB, 0x6BC, 0x6BD, 0x6BE, 0x6BF, 0x6C0, 0x6C1, 0x6C2, + 0x6C3, 0x6C4, 0x6C5, 0x6C6, 0x6C7, 0x6C8, 0x6C9, 0x6CA, 0x6CB, 0x6CC, 0x6CD, 0x6CE, 0x6CF, 0x6D0, 0x6D1, + 0x6D2, 0x6D3, 0x6D4, 0x67A, 0x67B, 0x689, 0x68A, 0x6D5, 0x6D6, 0x6D7, 0x6D8, 0x6D9, 0x6DA, 0x6A6, 0x6A7, + 0x6A8, 0x6A9, 0x6DB, 0x6DC, 0x6DD, 0x6DE, 0x6DF, 0x6E0, 0x6E1, 0x6E2, 0x6E3, 0x6E4, 0x6E5, 0x6E6, 0x6E7, + 0x6E8, 0x6E9, 0x6EA, 0x6EB, 0x6EC, 0x6ED, 0x6EE, 0x6EF, 0x6F0, 0x6F1, 0x6F2, 0x6F3, 0x6F4, 0x325, 0x326, + 0x327, 0x328, 0x6F5, 0x6F6, 0x6F7, 0x6F8, 0x6F9, 0x6FA, 0x6FB, 0x6FC, 0x6FD, 0x6FE, 0x6FF, 0x700, 0x701, + 0x702, 0x703, 0x704, 0x705, 0x706, 0x707, 0x708, 0x709, 0x70A, 0x70B, 0x70C, 0x70D, 0x70E, 0x70F, 0x710, + 0x711, 0x36F, 0x370, 0x712, 0x713, 0x714, 0x715, 0x716, 0x717, 0x718, 0x719, 0x71A, 0x71B, 0x71C, 0x71D, + 0x71E, 0x71F, 0x720, 0x721, 0x722, 0x723, 0x724, 0x725, 0x726, 0x727, 0x728, 0x729, 0x72A, 0x72B, 0x72C, + 0x72D, 0x72E, 0x72F, 0x730, 0x731, 0x732, 0x733, 0x734, 0x735, 0x736, 0x737, 0x738, 0x739, 0x73A, 0x73B, + 0x73C, 0x73D, 0x73E, 0x73F, 0x740, 0x741, 0x742, 0x743, 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74A, + 0x74B, 0x74C, 0x74D, 0x74E, 0x74F, 0x750, 0x751, 0x752, 0x753, 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, + 0x75A, 0x75B, 0x75C, 0x75D, 0x75E, 0x75F, 0x760, 0x761, 0x762, 0x763, 0x764, 0x765, 0x766, 0x767, 0x768, + 0x769, 0x76A, 0x76B, 0x76C, 0x76D, 0x76E, 0x76F, 0x770, 0x771, 0x772, 0x773, 0x774, 0x775, 0x776, 0x777, + 0x778, 0x779, 0x77A, 0x77B, 0x77C, 0x77D, 0x77E, 0x77F, 0x780, 0x781, 0x782, 0x783, 0x784, 0x785, 0x786, + 0x787, 0x788, 0x789, 0x78A, 0x78B, 0x78C, 0x78D, 0x78E, 0x78F, 0x790, 0x791, 0x792, 0x793, 0x794, 0x795, + 0x796, 0x797, 0x798, 0x799, 0x79A, 0x79B, 0x79C, 0x79D, 0x79E, 0x79F, 0x7A0, 0x7A1, 0x7A2, 0x7A3, 0x7A4, + 0x7A5, 0x7A6, 0x7A7, 0x7A8, 0x7A9, 0x7AA, 0x7AB, 0x7AC, 0x7AD, 0x7AE, 0x7AF, 0x7B0, 0x7B1, 0x7B2, 0x7B3, + 0x7B4, 0x7B5, 0x7B6, 0x7B7, 0x7B8, 0x7B9, 0x7BA, 0x7BB, 0x7BC, 0x7BD, 0x7BE, 0x7BF, 0x7C0, 0x7C1, 0x7C2, + 0x7C3, 0x7C4, 0x7C5, 0x7C6, 0x7C7, 0x7C8, 0x7C9, 0x7CA, 0x7CB, 0x7CC, 0x7CD, 0x7CE, 0x7CF, 0x7D0, 0x7D1, + 0x7D2, 0x7D3, 0x7D4, 0x7D5, 0x7D6, 0x7D7, 0x7D8, 0x7D9, 0x7DA, 0x7DB, 0x7DC, 0x7DD, 0x7DE, 0x7DF, 0x7E0, + 0x7E1, 0x7E2, 0x7E3, 0x7E4, 0x7E5, 0xA3FFFBC0, 0x7E6, 0x7E7, 0x7E8, 0x7E9, 0x7EA, 0x7EB, 0x7EC, 0x7ED, 0x7EE, + 0x7EF, 0x7F0, 0x7F1, 0x7F2, 0x7F3, 0x7F4, 0x7F5, 0x7F6, 0x7F7, 0x7F8, 0x7F9, 0x7FA, 0x7FB, 0x7FC, 0x7FD, + 0x7FE, 0x7FF, 0x800, 0x801, 0x802, 0x803, 0x804, 0x805, 0x806, 0x807, 0x808, 0x809, 0x80A, 0x80B, 0x80C, + 0xA427FBC0, 0xA428FBC0, 0xA429FBC0, 0xA42AFBC0, 0xA42BFBC0, 0xA42CFBC0, 0xA42DFBC0, 0xA42EFBC0, 0xA42FFBC0, 0xA430FBC0, 0xA431FBC0, 0xA432FBC0, 0xA433FBC0, 0xA434FBC0, 0xA435FBC0, + 0xA436FBC0, 0xA437FBC0, 0xA438FBC0, 0xA439FBC0, 0xA43AFBC0, 0xA43BFBC0, 0xA43CFBC0, 0xA43DFBC0, 0xA43EFBC0, 0xA43FFBC0, 0x80D, 0x80E, 0x80F, 0x810, 0x811, + 0x812, 0x813, 0x814, 0x815, 0x816, 0x817, 0xA44BFBC0, 0xA44CFBC0, 0xA44DFBC0, 0xA44EFBC0, 0xA44FFBC0, 0xA450FBC0, 0xA451FBC0, 0xA452FBC0, 0xA453FBC0, + 0xA454FBC0, 0xA455FBC0, 0xA456FBC0, 0xA457FBC0, 0xA458FBC0, 0xA459FBC0, 0xA45AFBC0, 0xA45BFBC0, 0xA45CFBC0, 0xA45DFBC0, 0xA45EFBC0, 0xA45FFBC0, 0x1C3E, 0x1C3F, 0x1C40, + 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D1C3E, 0x1C3E1C3E, 0x1C3F1C3E, 0x1C401C3E, 0x1C411C3E, 0x1C421C3E, 0x1C431C3E, 0x1C441C3E, 0x1C451C3E, + 0x1C461C3E, 0x1C3D1C3F, 0x3181C3E0317, 0x3181C3F0317, 0x3181C400317, 0x3181C410317, 0x3181C420317, 0x3181C430317, 0x3181C440317, 0x3181C450317, 0x3181C460317, 0x3181C3D1C3E0317, 0x3181C3E1C3E0317, 0x3181C3F1C3E0317, 0x3181C401C3E0317, + 0x3181C411C3E0317, 0x3181C421C3E0317, 0x3181C431C3E0317, 0x3181C441C3E0317, 0x3181C451C3E0317, 0x3181C461C3E0317, 0x3181C3D1C3F0317, 0x2771C3E, 0x2771C3F, 0x2771C40, 0x2771C41, 0x2771C42, 0x2771C43, 0x2771C44, 0x2771C45, + 0x2771C46, 0x2771C3D1C3E, 0x2771C3E1C3E, 0x2771C3F1C3E, 0x2771C401C3E, 0x2771C411C3E, 0x2771C421C3E, 0x2771C431C3E, 0x2771C441C3E, 0x2771C451C3E, 0x2771C461C3E, 0x2771C3D1C3F, 0x3181C470317, 0x3181C600317, 0x3181C7A0317, + 0x3181C8F0317, 0x3181CAA0317, 0x3181CE50317, 0x3181CF40317, 0x3181D180317, 0x3181D320317, 0x3181D4C0317, 0x3181D650317, 0x3181D770317, 0x3181DAA0317, 0x3181DB90317, 0x3181DDD0317, 0x3181E0C0317, 0x3181E210317, 0x3181E330317, + 0x3181E710317, 0x3181E950317, 0x3181EB50317, 0x3181EE30317, 0x3181EF50317, 0x3181EFF0317, 0x3181F0B0317, 0x3181F210317, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, + 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, + 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, + 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, + 0x1C3D, 0x1C3E1C3E, 0x1C3F1C3E, 0x1C401C3E, 0x1C411C3E, 0x1C421C3E, 0x1C431C3E, 0x1C441C3E, 0x1C451C3E, 0x1C461C3E, 0x1C3D1C3F, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D1C3E, 0x1C3D, 0x818, 0x819, 0x81A, 0x81B, 0x81C, 0x81D, 0x81E, 0x81F, + 0x820, 0x821, 0x822, 0x823, 0x824, 0x825, 0x826, 0x827, 0x828, 0x829, 0x82A, 0x82B, 0x82C, 0x82D, 0x82E, + 0x82F, 0x830, 0x831, 0x832, 0x833, 0x834, 0x835, 0x836, 0x837, 0x838, 0x839, 0x83A, 0x83B, 0x83C, 0x83D, + 0x83E, 0x83F, 0x840, 0x841, 0x842, 0x843, 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x84A, 0x84B, 0x84C, + 0x84D, 0x84E, 0x84F, 0x850, 0x851, 0x852, 0x853, 0x854, 0x855, 0x856, 0x857, 0x858, 0x859, 0x85A, 0x85B, + 0x85C, 0x85D, 0x85E, 0x85F, 0x860, 0x861, 0x862, 0x863, 0x864, 0x865, 0x866, 0x867, 0x868, 0x869, 0x86A, + 0x86B, 0x86C, 0x86D, 0x86E, 0x86F, 0x870, 0x871, 0x872, 0x873, 0x874, 0x875, 0x876, 0x877, 0x878, 0x879, + 0x87A, 0x87B, 0x87C, 0x87D, 0x87E, 0x87F, 0x880, 0x881, 0x882, 0x883, 0x884, 0x885, 0x886, 0x887, 0x888, + 0x889, 0x88A, 0x88B, 0x88C, 0x88D, 0x88E, 0x88F, 0x890, 0x891, 0x892, 0x893, 0x894, 0x895, 0x896, 0x897, + 0x898, 0x899, 0x89A, 0x89B, 0x89C, 0x89D, 0x89E, 0x89F, 0x8A0, 0x8A1, 0x8A2, 0x8A3, 0x8A4, 0x8A5, 0x8A6, + 0x8A7, 0x8A8, 0x8A9, 0x8AA, 0x8AB, 0x8AC, 0x8AD, 0x8AE, 0x8AF, 0x8B0, 0x8B1, 0x8B2, 0x8B3, 0x8B4, 0x8B5, + 0x8B6, 0x8B7, 0x8B8, 0x8B9, 0x8BA, 0x8BB, 0x8BC, 0x8BD, 0x8BE, 0x8BF, 0x8C0, 0x8C1, 0x8C2, 0x8C3, 0x8C4, + 0x8C5, 0x8C6, 0x8C7, 0x8C8, 0x8C9, 0x8CA, 0x8CB, 0x8CC, 0x8CD, 0x8CE, 0x8CF, 0x8D0, 0x8D1, 0x8D2, 0x8D3, + 0x8D4, 0x8D5, 0x8D6, 0x8D7, 0x8D8, 0x8D9, 0x8DA, 0x8DB, 0x8DC, 0x8DD, 0x8DE, 0x8DF, 0x8E0, 0x8E1, 0x8E2, + 0x8E3, 0x8E4, 0x8E5, 0x8E6, 0x8E7, 0x8E8, 0x8E9, 0x8EA, 0x8EB, 0x8EC, 0x8ED, 0x8EE, 0x8EF, 0x8F0, 0x8F1, + 0x8F2, 0x8F3, 0x8F4, 0x8F5, 0x8F6, 0x8F7, 0x8F8, 0x8F9, 0x8FA, 0x8FB, 0x8FC, 0x8FD, 0x8FE, 0x8FF, 0x900, + 0x901, 0x902, 0x903, 0x904, 0x905, 0x906, 0x907, 0x908, 0x909, 0x90A, 0x90B, 0x90C, 0x90D, 0x90E, 0x90F, + 0x910, 0x911, 0x912, 0x913, 0x914, 0x915, 0x916, 0x917, 0x918, 0x919, 0x91A, 0x91B, 0x91C, 0x91D, 0x91E, + 0x91F, 0x920, 0x921, 0x922, 0x923, 0x924, 0x925, 0x926, 0x927, 0x928, 0x929, 0x92A, 0x92B, 0x92C, 0x92D, + 0x92E, 0x92F, 0x930, 0x931, 0x932, 0x933, 0x934, 0x935, 0x936, 0x937, 0x938, 0x939, 0x93A, 0x93B, 0x93C, + 0x93D, 0x93E, 0x93F, 0x940, 0x941, 0x942, 0x943, 0x944, 0x945, 0x946, 0x947, 0xEA2, 0xEA3, 0xEA4, 0xEA5, + 0xEA6, 0xEA7, 0xEA8, 0xEA9, 0x948, 0x949, 0x94A, 0x94B, 0x94C, 0x94D, 0x94E, 0x94F, 0x950, 0x951, 0x952, + 0x953, 0x954, 0x955, 0x956, 0x957, 0x958, 0x959, 0x95A, 0x95B, 0x95C, 0x95D, 0x95E, 0x95F, 0x960, 0x961, + 0x962, 0x963, 0x964, 0x965, 0x966, 0x967, 0x968, 0x969, 0x96A, 0x96B, 0x96C, 0x96D, 0x96E, 0x96F, 0x970, + 0x971, 0x972, 0x973, 0x974, 0x975, 0x976, 0x977, 0x978, 0x979, 0x97A, 0x97B, 0x97C, 0x10F3, 0x10F4, 0x10F5, + 0x97D, 0x97E, 0x97F, 0x980, 0x981, 0x982, 0x983, 0x984, 0x985, 0x986, 0x987, 0x988, 0x989, 0x98A, 0x98B, + 0x98C, 0x98D, 0x98E, 0x98F, 0x990, 0x991, 0x992, 0x993, 0x994, 0x995, 0x996, 0xE9C, 0xE9D, 0xE9E, 0xE9F, + 0xEA0, 0xEA1, 0x997, 0x998, 0x999, 0x99A, 0x99B, 0x99C, 0x99D, 0x99E, 0x99F, 0x9A0, 0x9A1, 0x9A2, 0x9A3, + 0x9A4, 0x9A5, 0x9A6, 0x9A7, 0x9A8, 0x9A9, 0x9AA, 0x9AB, 0x9AC, 0x9AD, 0x9AE, 0x9AF, 0x9B0, 0x9B1, 0x9B2, + 0x9B3, 0x9B4, 0x9B5, 0x9B6, 0x9B7, 0x9B8, 0x9B9, 0x9BA, 0x9BB, 0x9BC, 0x9BD, 0x9BE, 0x9BF, 0x9C0, 0x9C1, + 0x9C2, 0x9C3, 0x9C4, 0x9C5, 0x9C6, 0x9C7, 0x9C8, 0x9C9, 0x9CA, 0x9CB, 0x9CC, 0x9CD, 0x9CE, 0x9CF, 0x9D0, + 0x9D1, 0x9D2, 0x9D3, 0x9D4, 0x9D5, 0x9D6, 0x9D7, 0x9D8, 0x9D9, 0x9DA, 0x9DB, 0x9DC, 0x9DD, 0x9DE, 0x9DF, + 0x9E0, 0x9E1, 0x9E2, 0x9E3, 0x9E4, 0x9E5, 0x9E6, 0x9E7, 0x9E8, 0x9E9, 0x9EA, 0x9EB, 0x9EC, 0x9ED, 0x9EE, + 0x9EF, 0x9F0, 0x9F1, 0x9F2, 0x9F3, 0x9F4, 0x9F5, 0x9F6, 0x9F7, 0x9F8, 0x9F9, 0x9FA, 0x9FB, 0x9FC, 0x9FD, + 0x9FE, 0x9FF, 0xA00, 0xA01, 0xA02, 0xA03, 0xA04, 0xA05, 0xA06, 0xA21, 0xA22, 0xA23, 0xA24, 0xA25, 0xA26, + 0xA27, 0xA28, 0xA29, 0xA2A, 0xA2B, 0xA2C, 0xA2D, 0xA2E, 0xA2F, 0xA30, 0xA31, 0xA32, 0xA33, 0xA34, 0xA35, + 0xA36, 0xA37, 0xA38, 0xA39, 0xA3A, 0xA3B, 0xA3C, 0xA3D, 0xA3E, 0xA3F, 0xA40, 0xA41, 0xA42, 0xA43, 0xA44, + 0xA45, 0xA46, 0xA47, 0xA48, 0xA49, 0xA4A, 0xA4B, 0xA4C, 0xA4D, 0xA4E, 0xA4F, 0xA50, 0xA51, 0xA52, 0xA53, + 0xA54, 0xA55, 0xA56, 0xA57, 0xA58, 0xA59, 0xA5A, 0xA5B, 0xA5C, 0xA5D, 0xA5E, 0xA5F, 0xA60, 0xA61, 0xA62, + 0xA63, 0xA64, 0xA65, 0xA66, 0xA67, 0xA68, 0xA69, 0xA6A, 0xA6B, 0xA6C, 0xA6D, 0xA6E, 0xA6F, 0xA70, 0xA71, + 0xA72, 0xA73, 0xA74, 0xA75, 0xA76, 0xA77, 0xA78, 0xA79, 0xA7A, 0xA7B, 0xA7C, 0xA7D, 0xA7E, 0xA7F, 0xA80, + 0xA81, 0xA82, 0xA83, 0xA84, 0xA85, 0xA86, 0xA87, 0xA88, 0x34D, 0x34E, 0x34F, 0x350, 0x351, 0x352, 0x353, + 0x354, 0x355, 0x356, 0x357, 0x358, 0x359, 0x35A, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, + 0x1C46, 0x1C3D1C3E, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D1C3E, 0x1C3E, 0x1C3F, 0x1C40, + 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D1C3E, 0xA89, 0xA8A, 0xA8B, 0xA8C, 0xA8D, 0xA8E, 0xA8F, 0xA90, + 0xA91, 0xA92, 0xA93, 0xA94, 0xA95, 0xA96, 0xA97, 0xA98, 0xA99, 0xA9A, 0xA9B, 0xA9C, 0xA9D, 0xA9E, 0xA9F, + 0xAA0, 0xAA1, 0xAA2, 0xAA3, 0xAA4, 0xAA5, 0xAA6, 0xAA7, 0xAA8, 0xAA9, 0xAAA, 0xAAB, 0xAAC, 0xAAD, 0xAAE, + 0xAAF, 0xAB0, 0xAB1, 0xAB2, 0xAB3, 0xAB4, 0xAB5, 0xAB6, 0xAB7, 0xAB8, 0xAB9, 0x341, 0x342, 0xABA, 0xABB, + 0xABC, 0xABD, 0xABE, 0xABF, 0xAC0, 0xAC1, 0xAC2, 0xAC3, 0xAC4, 0xAC5, 0xAC6, 0xAC7, 0xAC8, 0xAC9, 0xACA, + 0xACB, 0xACC, 0xACD, 0xACE, 0xACF, 0xAD0, 0xAD1, 0xAD2, 0xAD3, 0xAD4, 0xAD5, 0xAD6, 0xAD7, 0xAD8, 0x343, + 0x344, 0x345, 0x346, 0x347, 0x348, 0x349, 0x34A, 0x34B, 0x34C, 0xAD9, 0xADA, 0xADB, 0xADC, 0xADD, 0xADE, + 0xADF, 0xAE0, 0xAE1, 0xAE2, 0xAE3, 0xAE4, 0xAE5, 0xAE6, 0xAE7, 0xAE8, 0xD9C, 0xD9D, 0xD9E, 0xD9F, 0xDA0, + 0xDA1, 0xDA2, 0xDA3, 0xDA4, 0xDA5, 0xDA6, 0xDA7, 0xDA8, 0xDA9, 0xDAA, 0xDAB, 0xDAC, 0xDAD, 0xDAE, 0xDAF, + 0xDB0, 0xDB1, 0xDB2, 0xDB3, 0xDB4, 0xDB5, 0xDB6, 0xDB7, 0xDB8, 0xDB9, 0xDBA, 0xDBB, 0xDBC, 0xDBD, 0xDBE, + 0xDBF, 0xDC0, 0xDC1, 0xDC2, 0xDC3, 0xDC4, 0xDC5, 0xDC6, 0xDC7, 0xDC8, 0xDC9, 0xDCA, 0xDCB, 0xDCC, 0xDCD, + 0xDCE, 0xDCF, 0xDD0, 0xDD1, 0xDD2, 0xDD3, 0xDD4, 0xDD5, 0xDD6, 0xDD7, 0xDD8, 0xDD9, 0xDDA, 0xDDB, 0xDDC, + 0xDDD, 0xDDE, 0xDDF, 0xDE0, 0xDE1, 0xDE2, 0xDE3, 0xDE4, 0xDE5, 0xDE6, 0xDE7, 0xDE8, 0xDE9, 0xDEA, 0xDEB, + 0xDEC, 0xDED, 0xDEE, 0xDEF, 0xDF0, 0xDF1, 0xDF2, 0xDF3, 0xDF4, 0xDF5, 0xDF6, 0xDF7, 0xDF8, 0xDF9, 0xDFA, + 0xDFB, 0xDFC, 0xDFD, 0xDFE, 0xDFF, 0xE00, 0xE01, 0xE02, 0xE03, 0xE04, 0xE05, 0xE06, 0xE07, 0xE08, 0xE09, + 0xE0A, 0xE0B, 0xE0C, 0xE0D, 0xE0E, 0xE0F, 0xE10, 0xE11, 0xE12, 0xE13, 0xE14, 0xE15, 0xE16, 0xE17, 0xE18, + 0xE19, 0xE1A, 0xE1B, 0xE1C, 0xE1D, 0xE1E, 0xE1F, 0xE20, 0xE21, 0xE22, 0xE23, 0xE24, 0xE25, 0xE26, 0xE27, + 0xE28, 0xE29, 0xE2A, 0xE2B, 0xE2C, 0xE2D, 0xE2E, 0xE2F, 0xE30, 0xE31, 0xE32, 0xE33, 0xE34, 0xE35, 0xE36, + 0xE37, 0xE38, 0xE39, 0xE3A, 0xE3B, 0xE3C, 0xE3D, 0xE3E, 0xE3F, 0xE40, 0xE41, 0xE42, 0xE43, 0xE44, 0xE45, + 0xE46, 0xE47, 0xE48, 0xE49, 0xE4A, 0xE4B, 0xE4C, 0xE4D, 0xE4E, 0xE4F, 0xE50, 0xE51, 0xE52, 0xE53, 0xE54, + 0xE55, 0xE56, 0xE57, 0xE58, 0xE59, 0xE5A, 0xE5B, 0xE5C, 0xE5D, 0xE5E, 0xE5F, 0xE60, 0xE61, 0xE62, 0xE63, + 0xE64, 0xE65, 0xE66, 0xE67, 0xE68, 0xE69, 0xE6A, 0xE6B, 0xE6C, 0xE6D, 0xE6E, 0xE6F, 0xE70, 0xE71, 0xE72, + 0xE73, 0xE74, 0xE75, 0xE76, 0xE77, 0xE78, 0xE79, 0xE7A, 0xE7B, 0xE7C, 0xE7D, 0xE7E, 0xE7F, 0xE80, 0xE81, + 0xE82, 0xE83, 0xE84, 0xE85, 0xE86, 0xE87, 0xE88, 0xE89, 0xE8A, 0xE8B, 0xE8C, 0xE8D, 0xE8E, 0xE8F, 0xE90, + 0xE91, 0xE92, 0xE93, 0xE94, 0xE95, 0xE96, 0xE97, 0xE98, 0xE99, 0xE9A, 0xE9B, 0xAE9, 0xAEA, 0xAEB, 0xAEC, + 0xAED, 0xAEE, 0xAEF, 0xAF0, 0xAF1, 0xAF2, 0xAF3, 0xAF4, 0xAF5, 0xAF6, 0xAF7, 0xAF8, 0xAF9, 0xAFA, 0xAFB, + 0xAFC, 0xAFD, 0xAFE, 0xAFF, 0xB00, 0xB01, 0xB02, 0xB03, 0xB04, 0xB05, 0xB06, 0xB07, 0xB08, 0xB09, 0xB0A, + 0xB0B, 0xB0C, 0xB0D, 0xB0E, 0xB0F, 0xB10, 0xB11, 0xB12, 0xB13, 0xB14, 0xB15, 0xB16, 0xB17, 0xB18, 0xB19, + 0xB1A, 0xB1B, 0xB1C, 0xB1D, 0xB1E, 0xB1F, 0xB20, 0xB21, 0xB22, 0xB23, 0xB24, 0xB25, 0xB26, 0xB27, 0xB28, + 0xB29, 0xB2A, 0xB2B, 0xB2C, 0xB2D, 0xB2E, 0xB2F, 0xB30, 0xB31, 0xB32, 0xB33, 0xB34, 0xB35, 0xB36, 0xB37, + 0xB38, 0xB39, 0xB3A, 0xB3B, 0xB3C, 0xB3D, 0xB3E, 0xB3F, 0xB40, 0xB41, 0xB42, 0xB43, 0xB44, 0xB45, 0xB46, + 0xB47, 0xB48, 0xB49, 0xB4A, 0xB4B, 0xB4C, 0xB4D, 0xB4E, 0xB4F, 0xB50, 0xB51, 0xB52, 0xB53, 0xB54, 0xB55, + 0xB56, 0xB57, 0xB58, 0xB59, 0xB5A, 0xB5B, 0xB5C, 0xB5D, 0xB5E, 0xB5F, 0xB60, 0xB61, 0xB62, 0xB63, 0xB64, + 0xB65, 0xB66, 0xB67, 0xB68, 0xB69, 0xB6A, 0xB6B, 0x32B, 0x32C, 0x32D, 0x32E, 0x32F, 0x330, 0x331, 0x332, + 0x333, 0x334, 0x335, 0x336, 0x337, 0x338, 0x339, 0x33A, 0x33B, 0x33C, 0x33D, 0x33E, 0x33F, 0x340, 0xB6C, + 0xB6D, 0xB6E, 0xB6F, 0xB70, 0xB71, 0xB72, 0xB73, 0xB74, 0xB75, 0xB76, 0xB77, 0xB78, 0xB79, 0xB7A, 0xB7B, + 0xB7C, 0xB7D, 0xB7E, 0xB7F, 0xB80, 0xB81, 0xB82, 0xB83, 0xB84, 0xB85, 0xB86, 0xB87, 0xB88, 0xB89, 0xB8A, + 0xB8B, 0xB8C, 0xB8D, 0xB8E, 0xB8F, 0xB90, 0xB91, 0xB92, 0xB93, 0xB94, 0xB95, 0xB96, 0xB97, 0xB98, 0xB99, + 0xB9A, 0xB9B, 0xB9C, 0xB9D, 0xB9E, 0xB9F, 0xBA0, 0xBA1, 0xBA2, 0xBA3, 0xBA4, 0xBA5, 0xBA6, 0xBA7, 0xBA8, + 0xBA9, 0xBAA, 0x385, 0x386, 0x387, 0x388, 0xBAB, 0xBAC, 0xBAD, 0xBAE, 0xBAF, 0xBB0, 0xBB1, 0xBB2, 0xBB3, + 0xBB4, 0xBB5, 0xBB6, 0xBB7, 0xBB8, 0xBB9, 0xBBA, 0xBBB, 0xBBC, 0xBBD, 0xBBE, 0xBBF, 0xBC0, 0xBC1, 0xBC2, + 0xBC3, 0xBC4, 0xBC5, 0xBC6, 0xBC7, 0xBC8, 0xBC9, 0xBCA, 0x329, 0x32A, 0xBCB, 0xBCC, 0xBCD, 0xBCE, 0xBCF, + 0xBD0, 0xBD1, 0xBD2, 0xBD3, 0xBD4, 0xBD5, 0xBD6, 0xBD7, 0xBD8, 0x63C063C063C063C, 0xBD9, 0xBDA, 0xBDB, 0xBDC, 0xBDD, + 0xBDE, 0xBDF, 0xBE0, 0xBE1, 0xBE2, 0xBE3, 0xBE4, 0xBE5, 0xBE6, 0xBE7, 0xBE8, 0xBE9, 0xBEA, 0xBEB, 0xBEC, + 0xBED, 0xBEE, 0xBEF, 0xBF0, 0xBF1, 0xBF2, 0xBF3, 0xBF4, 0xBF5, 0xBF6, 0xBF7, 0xBF8, 0xBF9, 0xBFA, 0xBFB, + 0xBFC, 0xBFD, 0xBFE, 0xBFF, 0xC00, 0xC01, 0xC02, 0xC03, 0xC04, 0xC05, 0xC06, 0xC07, 0xC08, 0xC09, 0xC0A, + 0xC0B, 0xC0C, 0xC0D, 0xC0E, 0xC0F, 0xC10, 0xC11, 0xC12, 0xC13, 0xC14, 0xC15, 0xC16, 0xC17, 0xC18, 0xC19, + 0xC1A, 0xC1B, 0xC1C, 0xC1D, 0xC1E, 0xC1F, 0xC20, 0xC21, 0xC22, 0xC23, 0xC24, 0xC25, 0xC26, 0xC27, 0xC28, + 0xC29, 0xC2A, 0xC2B, 0xC2C, 0xC2D, 0xC2E, 0xC2F, 0xC30, 0xC31, 0xC32, 0xC33, 0xC34, 0xC35, 0xC36, 0xC37, + 0xC38, 0xC39, 0xC3A, 0xC3B, 0xC3C, 0xC3D, 0xC3E, 0xC3F, 0x61B02390239, 0x61B061B, 0x61B061B061B, 0xC40, 0xC41, 0xC42, 0xC43, + 0xC44, 0xC45, 0xC46, 0xC47, 0xC48, 0xC49, 0xC4A, 0xC4B, 0xC4C, 0xC4D, 0xC4E, 0xC4F, 0xC50, 0xC51, 0xC52, + 0xC53, 0xC54, 0xC55, 0xC56, 0xC57, 0xC58, 0xC59, 0xC5A, 0xC5B, 0xC5C, 0xC5D, 0xC5E, 0xC5F, 0xC60, 0xC61, + 0xC62, 0xC63, 0xC64, 0xC65, 0xC66, 0xC67, 0xC68, 0xC69, 0xC6A, 0xC6B, 0xC6C, 0xC6D, 0xC6E, 0xC6F, 0xC70, + 0xC71, 0xC72, 0xC73, 0xC74, 0xC75, 0xC76, 0xC77, 0xC78, 0xC79, 0xC7A, 0xC7B, 0xC7C, 0xC7D, 0xC7E, 0xC7F, + 0xC80, 0xC81, 0xC82, 0xC83, 0xC84, 0xC85, 0xC86, 0xC87, 0xC88, 0xC89, 0xC8A, 0xC8B, 0xC8C, 0xC8D, 0xC8E, + 0xC8F, 0xC90, 0xC91, 0xC92, 0xC93, 0xC94, 0xC95, 0xC96, 0xC97, 0xC98, 0xC99, 0xC9A, 0xC9B, 0xC9C, 0xC9D, + 0xC9E, 0xC9F, 0xCA0, 0xCA1, 0xCA2, 0xCA3, 0xCA4, 0xCA5, 0xCA5, 0xCA6, 0xCA7, 0xCA8, 0xCA9, 0xCAA, 0xCAB, + 0xCAC, 0xCAD, 0xCAE, 0xCAF, 0xCB0, 0xCB1, 0xCB2, 0xCB3, 0xCB4, 0xCB5, 0xCB6, 0xCB7, 0xCB8, 0xCB9, 0xCBA, + 0xCBB, 0xCBC, 0xCBD, 0xCBE, 0xCBF, 0xCC0, 0xCC1, 0xCC2, 0xCC3, 0xCC4, 0xCC5, 0xCC6, 0xCC7, 0xCC8, 0xCC9, + 0xCCA, 0xCCB, 0xCCC, 0xCCD, 0xCCE, 0xCCF, 0xCD0, 0xCD1, 0xCD2, 0xCD3, 0xCD4, 0xCD5, 0xCD6, 0xCD7, 0xCD8, + 0xCD9, 0xCDA, 0xCDB, 0xCDC, 0xCDD, 0xCDE, 0xCDF, 0xCE0, 0xCE1, 0xCE2, 0xCE3, 0xCE4, 0xCE5, 0xCE6, 0xCE7, + 0xCE8, 0xCE9, 0xCEA, 0xCEB, 0xCEC, 0xCED, 0xCEE, 0xCEF, 0xCF0, 0xCF1, 0xCF2, 0xCF3, 0xCF4, 0xCF5, 0xCF6, + 0xCF7, 0xCF8, 0xCF9, 0xCFA, 0xCFB, 0xCFC, 0xCFD, 0xCFE, 0xCFF, 0xD00, 0xD01, 0xD02, 0xD03, 0xD04, 0xD05, + 0xD06, 0xD07, 0xD08, 0xD09, 0xD0A, 0xD0B, 0xD0C, 0xD0D, 0xD0E, 0xD0F, 0xD10, 0xD11, 0xD12, 0xD13, 0xD14, + 0xD15, 0xD16, 0xD17, 0xD18, 0xD19, 0xD1A, 0xD1B, 0xD1C, 0xD1D, 0xD1E, 0xD1F, 0xD20, 0xD21, 0xD22, 0xD23, + 0xD24, 0xD25, 0xD26, 0xD27, 0xD28, 0xD29, 0xD2A, 0xD2B, 0xD2C, 0xD2D, 0xD2E, 0xD2F, 0xD30, 0xD31, 0xD32, + 0xD33, 0xD34, 0xD35, 0xD36, 0xD37, 0xD38, 0xD39, 0xD3A, 0xD3B, 0xAB74FBC0, 0xAB75FBC0, 0xD3C, 0xD3D, 0xD3E, 0xD3F, + 0xD40, 0xD41, 0xD42, 0xD43, 0xD44, 0xD45, 0xD46, 0xD47, 0xD48, 0xD49, 0xD4A, 0xD4B, 0xD4C, 0xD4D, 0xD4E, + 0xD4F, 0xD50, 0xD51, 0xD52, 0xD53, 0xD54, 0xD55, 0xD56, 0xD57, 0xD58, 0xD59, 0xD5A, 0xD5B, 0xAB96FBC0, 0xAB97FBC0, + 0xD5C, 0xD5D, 0xD5E, 0xD5F, 0xD60, 0xD61, 0xD62, 0xD63, 0xD64, 0xD65, 0xD66, 0xD67, 0xD68, 0xD69, 0xD6A, + 0xD6B, 0xD6C, 0xD6D, 0xD6E, 0xD6F, 0xD70, 0xD71, 0xD72, 0xD73, 0xD74, 0xD75, 0xD76, 0xD77, 0xD78, 0xD79, + 0xD7A, 0xD7B, 0xD7C, 0xD7D, 0xABBAFBC0, 0xABBBFBC0, 0xABBCFBC0, 0xD7E, 0xD7F, 0xD80, 0xD81, 0xD82, 0xD83, 0xD84, 0xD85, + 0xD86, 0xD87, 0xD88, 0xD89, 0xABC9FBC0, 0xD8A, 0xD8B, 0xD8C, 0xD8D, 0xD8E, 0xD8F, 0xD90, 0xD91, 0xABD2FBC0, 0xABD3FBC0, + 0xABD4FBC0, 0xABD5FBC0, 0xABD6FBC0, 0xABD7FBC0, 0xABD8FBC0, 0xABD9FBC0, 0xABDAFBC0, 0xABDBFBC0, 0xABDCFBC0, 0xABDDFBC0, 0xABDEFBC0, 0xABDFFBC0, 0xABE0FBC0, 0xABE1FBC0, 0xABE2FBC0, + 0xABE3FBC0, 0xABE4FBC0, 0xABE5FBC0, 0xABE6FBC0, 0xABE7FBC0, 0xABE8FBC0, 0xABE9FBC0, 0xABEAFBC0, 0xABEBFBC0, 0xD92, 0xD93, 0xD94, 0xD95, 0xABF0FBC0, 0xABF1FBC0, + 0xABF2FBC0, 0xABF3FBC0, 0xABF4FBC0, 0xABF5FBC0, 0xABF6FBC0, 0xABF7FBC0, 0xABF8FBC0, 0xABF9FBC0, 0xABFAFBC0, 0xABFBFBC0, 0xABFCFBC0, 0xABFDFBC0, 0xABFEFBC0, 0xABFFFBC0, 0x21E5, + 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF, 0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, + 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF, 0x2200, 0x2201, 0x2202, 0x2203, + 0x2204, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220A, 0x220B, 0x220C, 0x220D, 0x220E, 0x220F, 0x2210, 0x2211, 0x2212, + 0x2213, 0xAC2FFBC0, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF, 0x21F0, 0x21F1, + 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF, 0x2200, + 0x2201, 0x2202, 0x2203, 0x2204, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220A, 0x220B, 0x220C, 0x220D, 0x220E, 0x220F, + 0x2210, 0x2211, 0x2212, 0x2213, 0xAC5FFBC0, 0x1D86, 0x1D86, 0x1D87, 0x1E11, 0x1E57, 0x1C4C, 0x1E9E, 0x1D29, 0x1D29, 0x1D6F, + 0x1D6F, 0x1F3C, 0x1F3C, 0x1C55, 0x1DB1, 0x1C51, 0x1C5B, 0x1EEE, 0x1EFA, 0x1EFA, 0x1EEF, 0x1D2A, 0x1D2A, 0x1E20, 0x1CB7, + 0x1E52, 0x1DFC, 0x1CBC, 0x1D4C, 0x1EE3, 0x1E7C, 0x1F38, 0x1FE6, 0x1FE6, 0x1FE7, 0x1FE7, 0x1FE8, 0x1FE8, 0x1FE9, 0x1FE9, + 0x1FEA, 0x1FEA, 0x1FEC, 0x1FEC, 0x1FED, 0x1FED, 0x1FEE, 0x1FEE, 0x1FEF, 0x1FEF, 0x1FF0, 0x1FF0, 0x1FF1, 0x1FF1, 0x1FF3, + 0x1FF3, 0x1FF4, 0x1FF4, 0x1FF5, 0x1FF5, 0x1FF8, 0x1FF8, 0x1FF9, 0x1FF9, 0x1FFA, 0x1FFA, 0x1FFB, 0x1FFB, 0x1FFC, 0x1FFC, + 0x1FFD, 0x1FFD, 0x1FFE, 0x1FFE, 0x1FFF, 0x1FFF, 0x2000, 0x2000, 0x2001, 0x2001, 0x2002, 0x2002, 0x201D, 0x201D, 0x201E, + 0x201E, 0x1FEB, 0x1FEB, 0x1FF2, 0x1FF2, 0x1FF6, 0x1FF6, 0x1FF7, 0x1FF7, 0x2003, 0x2003, 0x2004, 0x2004, 0x2007, 0x2007, + 0x2008, 0x2008, 0x2009, 0x2009, 0x200D, 0x200D, 0x200F, 0x200F, 0x2010, 0x2010, 0x2011, 0x2011, 0x2012, 0x2012, 0x2013, + 0x2013, 0x2014, 0x2014, 0x2017, 0x2017, 0x2019, 0x2019, 0x201A, 0x201A, 0x201B, 0x201B, 0x201F, 0x201F, 0x2020, 0x2020, + 0x2021, 0x2021, 0x1FF01FE61FF1, 0xD96, 0xD97, 0xD98, 0xD99, 0xD9A, 0xD9B, 0x2006, 0x2006, 0x2016, 0x2016, 0x0, 0x0, + 0x0, 0x200C, 0x200C, 0xACF4FBC0, 0xACF5FBC0, 0xACF6FBC0, 0xACF7FBC0, 0xACF8FBC0, 0x281, 0x26E, 0x26F, 0x2F6, 0x1AE2, 0x282, 0x2F7, + 0x223B, 0x223D, 0x223F, 0x2241, 0x2243, 0x2245, 0x2247, 0x224B, 0x224D, 0x224F, 0x2251, 0x2253, 0x2255, 0x2259, 0x225B, + 0x225D, 0x225F, 0x2261, 0x2263, 0x2267, 0x2269, 0x226B, 0x226D, 0x226F, 0x2271, 0x2273, 0x2275, 0x2277, 0x2279, 0x227B, + 0x227D, 0x2281, 0x2283, 0x2249, 0x2257, 0x2265, 0x227F, 0x2285, 0xAD26FBC0, 0x2288, 0xAD28FBC0, 0xAD29FBC0, 0xAD2AFBC0, 0xAD2BFBC0, 0xAD2CFBC0, + 0x228D, 0xAD2EFBC0, 0xAD2FFBC0, 0x245D, 0x245E, 0x245F, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, + 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F, 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, + 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, + 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F, 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2469, 0x2482, 0xAD68FBC0, + 0xAD69FBC0, 0xAD6AFBC0, 0xAD6BFBC0, 0xAD6CFBC0, 0xAD6DFBC0, 0xAD6EFBC0, 0x2495, 0x427, 0xAD71FBC0, 0xAD72FBC0, 0xAD73FBC0, 0xAD74FBC0, 0xAD75FBC0, 0xAD76FBC0, 0xAD77FBC0, + 0xAD78FBC0, 0xAD79FBC0, 0xAD7AFBC0, 0xAD7BFBC0, 0xAD7CFBC0, 0xAD7DFBC0, 0xAD7EFBC0, 0x0, 0x24A6, 0x24BB, 0x24CC, 0x24D5, 0x24E4, 0x250A, 0x251B, + 0x2524, 0x253A, 0x2543, 0x254C, 0x257D, 0x259C, 0x25AB, 0x25B4, 0x25D6, 0x25DF, 0x25EF, 0x261F, 0x25CA, 0x25CB, 0x25CC, + 0x25CD, 0xAD97FBC0, 0xAD98FBC0, 0xAD99FBC0, 0xAD9AFBC0, 0xAD9BFBC0, 0xAD9CFBC0, 0xAD9DFBC0, 0xAD9EFBC0, 0xAD9FFBC0, 0x2623, 0x2624, 0x2625, 0x2626, 0x2627, + 0x2628, 0x2629, 0xADA7FBC0, 0x262A, 0x262B, 0x262C, 0x262D, 0x262E, 0x262F, 0x2630, 0xADAFFBC0, 0x2631, 0x2632, 0x2633, 0x2634, + 0x2635, 0x2636, 0x2637, 0xADB7FBC0, 0x2638, 0x2639, 0x263A, 0x263B, 0x263C, 0x263D, 0x263E, 0xADBFFBC0, 0x263F, 0x2640, 0x2641, + 0x2642, 0x2643, 0x2644, 0x2645, 0xADC7FBC0, 0x2646, 0x2647, 0x2648, 0x2649, 0x264A, 0x264B, 0x264C, 0xADCFFBC0, 0x264D, 0x264E, + 0x264F, 0x2650, 0x2651, 0x2652, 0x2653, 0xADD7FBC0, 0x2654, 0x2655, 0x2656, 0x2657, 0x2658, 0x2659, 0x265A, 0xADDFFBC0, 0x202E, + 0x2032, 0x2036, 0x204A, 0x2062, 0x206C, 0x2096, 0x20B0, 0x20C3, 0x20CC, 0x20E7, 0x20EF, 0x20FC, 0x2105, 0x210E, 0x2132, + 0x2159, 0x2164, 0x2183, 0x2188, 0x21D3, 0x210E2105, 0x2022, 0x205A, 0x2095, 0x2129, 0x21A0, 0x21A9, 0x21AE, 0x21B8, 0x21BD, + 0x21C7, 0x3B6, 0x3B7, 0x35B, 0x35C, 0x35D, 0x35E, 0x3B8, 0x3B9, 0x3BA, 0x35F, 0x360, 0x3BB, 0x361, 0x362, + 0x3BC, 0x3BD, 0x3BE, 0x3BF, 0x3C0, 0x3C1, 0x3C2, 0x3C3, 0x3C4, 0x21C, 0x276, 0x2F8, 0x3C5, 0x3C6, 0x363, + 0x364, 0x3C7, 0x3C8, 0x365, 0x366, 0x367, 0x368, 0x369, 0x36A, 0x36B, 0x36C, 0x36D, 0x36E, 0x2F1, 0x2F2, + 0x2F3, 0x2F4, 0x268, 0x218D, 0x283, 0x28C, 0x224, 0x28D, 0x223, 0x237, 0x3A1, 0x3A2, 0x3A3, 0x38A, 0x218, + 0x219, 0x284, 0x2F5, 0x384, 0x38D, 0x21D, 0x225, 0x311, 0x21B, 0x3C9, 0xAE45FBC0, 0xAE46FBC0, 0xAE47FBC0, 0xAE48FBC0, 0xAE49FBC0, + 0xAE4AFBC0, 0xAE4BFBC0, 0xAE4CFBC0, 0xAE4DFBC0, 0xAE4EFBC0, 0xAE4FFBC0, 0xAE50FBC0, 0xAE51FBC0, 0xAE52FBC0, 0xAE53FBC0, 0xAE54FBC0, 0xAE55FBC0, 0xAE56FBC0, 0xAE57FBC0, 0xAE58FBC0, + 0xAE59FBC0, 0xAE5AFBC0, 0xAE5BFBC0, 0xAE5CFBC0, 0xAE5DFBC0, 0xAE5EFBC0, 0xAE5FFBC0, 0xAE60FBC0, 0xAE61FBC0, 0xAE62FBC0, 0xAE63FBC0, 0xAE64FBC0, 0xAE65FBC0, 0xAE66FBC0, 0xAE67FBC0, + 0xAE68FBC0, 0xAE69FBC0, 0xAE6AFBC0, 0xAE6BFBC0, 0xAE6CFBC0, 0xAE6DFBC0, 0xAE6EFBC0, 0xAE6FFBC0, 0xAE70FBC0, 0xAE71FBC0, 0xAE72FBC0, 0xAE73FBC0, 0xAE74FBC0, 0xAE75FBC0, 0xAE76FBC0, + 0xAE77FBC0, 0xAE78FBC0, 0xAE79FBC0, 0xAE7AFBC0, 0xAE7BFBC0, 0xAE7CFBC0, 0xAE7DFBC0, 0xAE7EFBC0, 0xAE7FFBC0, 0xCE36FB40, 0xD382FB40, 0xCE5BFB40, 0xCE5AFB40, 0xCE59FB40, 0xCEBBFB40, + 0xD182FB40, 0xD1E0FB40, 0xD200FB40, 0xD202FB40, 0xD35CFB40, 0xD369FB40, 0xDC0FFB40, 0xDC0FFB40, 0xDC22FB40, 0xDC23FB40, 0xDC22FB40, 0xDC23FB40, 0xDDF3FB40, 0xDE7AFB40, 0xDF51FB40, + 0xDF50FB40, 0xDFC4FB40, 0xDFC3FB40, 0xE24CFB40, 0xE535FB40, 0xAE9AFBC0, 0xE5E1FB40, 0xE5E5FB40, 0xE708FB40, 0xEB7AFB40, 0xEBCDFB40, 0xEC11FB40, 0xEC35FB40, 0xEC3AFB40, 0xF06CFB40, + 0xF22BFB40, 0xF22BFB40, 0xCE2CFB40, 0xF25BFB40, 0xF2ADFB40, 0xF38BFB40, 0xF58BFB40, 0xF6EEFB40, 0xF93AFB40, 0xF93BFB40, 0xFAF9FB40, 0xFCF9FB40, 0xFE9FFB40, 0xFF53FB40, 0xFF52FB40, + 0xFF53FB40, 0xFF53FB40, 0xFF52FB40, 0xFF8AFB40, 0xFF8AFB40, 0xFF8BFB40, 0x8002FB41, 0x8080FB41, 0x807FFB41, 0x8089FB41, 0x81FCFB41, 0x8279FB41, 0x8279FB41, 0x8279FB41, 0x864EFB41, + 0x8864FB41, 0x8980FB41, 0x897FFB41, 0x89C1FB41, 0x89D2FB41, 0x89D2FB41, 0x8BA0FB41, 0x8D1DFB41, 0x8DB3FB41, 0x8F66FB41, 0x8FB6FB41, 0x8FB6FB41, 0x8FB6FB41, 0x9091FB41, 0x9485FB41, + 0x9577FB41, 0x9578FB41, 0x957FFB41, 0x95E8FB41, 0x961CFB41, 0x961DFB41, 0x96E8FB41, 0x9752FB41, 0x97E6FB41, 0x9875FB41, 0x98CEFB41, 0x98DEFB41, 0x98DFFB41, 0x98E0FB41, 0x98E0FB41, + 0x9963FB41, 0x9996FB41, 0x9A6CFB41, 0x9AA8FB41, 0x9B3CFB41, 0x9C7CFB41, 0x9E1FFB41, 0x9E75FB41, 0x9EA6FB41, 0x9EC4FB41, 0x9EFEFB41, 0x9F4AFB41, 0x9F50FB41, 0x9F52FB41, 0x9F7FFB41, + 0x9F8DFB41, 0x9F99FB41, 0x9F9CFB41, 0x9F9CFB41, 0x9F9FFB41, 0xAEF4FBC0, 0xAEF5FBC0, 0xAEF6FBC0, 0xAEF7FBC0, 0xAEF8FBC0, 0xAEF9FBC0, 0xAEFAFBC0, 0xAEFBFBC0, 0xAEFCFBC0, 0xAEFDFBC0, + 0xAEFEFBC0, 0xAEFFFBC0, 0xCE00FB40, 0xCE28FB40, 0xCE36FB40, 0xCE3FFB40, 0xCE59FB40, 0xCE85FB40, 0xCE8CFB40, 0xCEA0FB40, 0xCEBAFB40, 0xD13FFB40, 0xD165FB40, 0xD16BFB40, 0xD182FB40, + 0xD196FB40, 0xD1ABFB40, 0xD1E0FB40, 0xD1F5FB40, 0xD200FB40, 0xD29BFB40, 0xD2F9FB40, 0xD315FB40, 0xD31AFB40, 0xD338FB40, 0xD341FB40, 0xD35CFB40, 0xD369FB40, 0xD382FB40, 0xD3B6FB40, + 0xD3C8FB40, 0xD3E3FB40, 0xD6D7FB40, 0xD71FFB40, 0xD8EBFB40, 0xD902FB40, 0xD90AFB40, 0xD915FB40, 0xD927FB40, 0xD973FB40, 0xDB50FB40, 0xDB80FB40, 0xDBF8FB40, 0xDC0FFB40, 0xDC22FB40, + 0xDC38FB40, 0xDC6EFB40, 0xDC71FB40, 0xDDDBFB40, 0xDDE5FB40, 0xDDF1FB40, 0xDDFEFB40, 0xDE72FB40, 0xDE7AFB40, 0xDE7FFB40, 0xDEF4FB40, 0xDEFEFB40, 0xDF0BFB40, 0xDF13FB40, 0xDF50FB40, + 0xDF61FB40, 0xDF73FB40, 0xDFC3FB40, 0xE208FB40, 0xE236FB40, 0xE24BFB40, 0xE52FFB40, 0xE534FB40, 0xE587FB40, 0xE597FB40, 0xE5A4FB40, 0xE5B9FB40, 0xE5E0FB40, 0xE5E5FB40, 0xE6F0FB40, + 0xE708FB40, 0xE728FB40, 0xEB20FB40, 0xEB62FB40, 0xEB79FB40, 0xEBB3FB40, 0xEBCBFB40, 0xEBD4FB40, 0xEBDBFB40, 0xEC0FFB40, 0xEC14FB40, 0xEC34FB40, 0xF06BFB40, 0xF22AFB40, 0xF236FB40, + 0xF23BFB40, 0xF23FFB40, 0xF247FB40, 0xF259FB40, 0xF25BFB40, 0xF2ACFB40, 0xF384FB40, 0xF389FB40, 0xF4DCFB40, 0xF4E6FB40, 0xF518FB40, 0xF51FFB40, 0xF528FB40, 0xF530FB40, 0xF58BFB40, + 0xF592FB40, 0xF676FB40, 0xF67DFB40, 0xF6AEFB40, 0xF6BFFB40, 0xF6EEFB40, 0xF7DBFB40, 0xF7E2FB40, 0xF7F3FB40, 0xF93AFB40, 0xF9B8FB40, 0xF9BEFB40, 0xFA74FB40, 0xFACBFB40, 0xFAF9FB40, + 0xFC73FB40, 0xFCF8FB40, 0xFF36FB40, 0xFF51FB40, 0xFF8AFB40, 0xFFBDFB40, 0x8001FB41, 0x800CFB41, 0x8012FB41, 0x8033FB41, 0x807FFB41, 0x8089FB41, 0x81E3FB41, 0x81EAFB41, 0x81F3FB41, + 0x81FCFB41, 0x820CFB41, 0x821BFB41, 0x821FFB41, 0x826EFB41, 0x8272FB41, 0x8278FB41, 0x864DFB41, 0x866BFB41, 0x8840FB41, 0x884CFB41, 0x8863FB41, 0x897EFB41, 0x898BFB41, 0x89D2FB41, + 0x8A00FB41, 0x8C37FB41, 0x8C46FB41, 0x8C55FB41, 0x8C78FB41, 0x8C9DFB41, 0x8D64FB41, 0x8D70FB41, 0x8DB3FB41, 0x8EABFB41, 0x8ECAFB41, 0x8F9BFB41, 0x8FB0FB41, 0x8FB5FB41, 0x9091FB41, + 0x9149FB41, 0x91C6FB41, 0x91CCFB41, 0x91D1FB41, 0x9577FB41, 0x9580FB41, 0x961CFB41, 0x96B6FB41, 0x96B9FB41, 0x96E8FB41, 0x9751FB41, 0x975EFB41, 0x9762FB41, 0x9769FB41, 0x97CBFB41, + 0x97EDFB41, 0x97F3FB41, 0x9801FB41, 0x98A8FB41, 0x98DBFB41, 0x98DFFB41, 0x9996FB41, 0x9999FB41, 0x99ACFB41, 0x9AA8FB41, 0x9AD8FB41, 0x9ADFFB41, 0x9B25FB41, 0x9B2FFB41, 0x9B32FB41, + 0x9B3CFB41, 0x9B5AFB41, 0x9CE5FB41, 0x9E75FB41, 0x9E7FFB41, 0x9EA5FB41, 0x9EBBFB41, 0x9EC3FB41, 0x9ECDFB41, 0x9ED1FB41, 0x9EF9FB41, 0x9EFDFB41, 0x9F0EFB41, 0x9F13FB41, 0x9F20FB41, + 0x9F3BFB41, 0x9F4AFB41, 0x9F52FB41, 0x9F8DFB41, 0x9F9CFB41, 0x9FA0FB41, 0xAFD6FBC0, 0xAFD7FBC0, 0xAFD8FBC0, 0xAFD9FBC0, 0xAFDAFBC0, 0xAFDBFBC0, 0xAFDCFBC0, 0xAFDDFBC0, 0xAFDEFBC0, + 0xAFDFFBC0, 0xAFE0FBC0, 0xAFE1FBC0, 0xAFE2FBC0, 0xAFE3FBC0, 0xAFE4FBC0, 0xAFE5FBC0, 0xAFE6FBC0, 0xAFE7FBC0, 0xAFE8FBC0, 0xAFE9FBC0, 0xAFEAFBC0, 0xAFEBFBC0, 0xAFECFBC0, 0xAFEDFBC0, + 0xAFEEFBC0, 0xAFEFFBC0, 0x1A5B, 0x1A5C, 0x1A5D, 0x1A5E, 0x1A5F, 0x1A60, 0x1A61, 0x1A62, 0x1A63, 0x1A64, 0x1A65, 0x1A66, 0xAFFCFBC0, + 0xAFFDFBC0, 0xAFFEFBC0, 0xAFFFFBC0, 0x209, 0x231, 0x28A, 0x3AC, 0x1A8B, 0x1C07, 0x3D7C3D66, 0x1C3D, 0x36F, 0x370, 0x371, 0x372, + 0x373, 0x374, 0x375, 0x376, 0x377, 0x378, 0x1A8C, 0x1A8D, 0x379, 0x37A, 0x37B, 0x37C, 0x37D, 0x37E, 0x37F, + 0x380, 0x21E, 0x312, 0x313, 0x314, 0x1A8E, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x21F, 0x1C0A, 0x1C0A, 0x1C0B, 0x1C0B, 0x1C0C, 0x1A8C, 0x1A8F, 0xD341FB40, + 0xD344FB40, 0xD345FB40, 0x1C08, 0x3D673D79, 0x3AD, 0x1A90, 0x1A91, 0xB040FBC0, 0x3D5A, 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5E, + 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62, 0x3D62, 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, + 0x3D66, 0x3D66, 0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D6A, 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C, 0x3D6C, 0x3D6C, + 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D70, 0x3D71, 0x3D72, 0x3D73, 0x3D74, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75, + 0x3D76, 0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D78, 0x3D79, 0x3D7A, 0x3D7B, 0x3D7C, 0x3D7D, 0x3D7E, + 0x3D7E, 0x3D7F, 0x3D7F, 0x3D81, 0x3D81, 0x3D82, 0x3D83, 0x3D84, 0x3D85, 0x3D86, 0x3D87, 0x3D87, 0x3D88, 0x3D89, 0x3D8A, + 0x3D8B, 0x3D5C, 0x3D60, 0x3D63, 0xB097FBC0, 0xB098FBC0, 0x0, 0x0, 0x491, 0x492, 0x1C0D, 0x1C0D, 0x3D833D81, 0x220, 0x3D5A, + 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5E, 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62, 0x3D62, + 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, 0x3D66, 0x3D66, 0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D6A, + 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C, 0x3D6C, 0x3D6C, 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D70, 0x3D71, 0x3D72, 0x3D73, + 0x3D74, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75, 0x3D76, 0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D78, + 0x3D79, 0x3D7A, 0x3D7B, 0x3D7C, 0x3D7D, 0x3D7E, 0x3D7E, 0x3D7F, 0x3D7F, 0x3D81, 0x3D81, 0x3D82, 0x3D83, 0x3D84, 0x3D85, + 0x3D86, 0x3D87, 0x3D87, 0x3D88, 0x3D89, 0x3D8A, 0x3D8B, 0x3D5C, 0x3D60, 0x3D63, 0x3D87, 0x3D88, 0x3D89, 0x3D8A, 0x221, + 0x1C0E, 0x1C0F, 0x1C0F, 0x3D6E3D64, 0xB100FBC0, 0xB101FBC0, 0xB102FBC0, 0xB103FBC0, 0xB104FBC0, 0x3D8C, 0x3D8D, 0x3D8E, 0x3D8F, 0x3D91, 0x3D92, + 0x3D93, 0x3D94, 0x3D95, 0x3D96, 0x3D99, 0x3D9A, 0x3D9B, 0x3D9C, 0x3D9E, 0x3D9F, 0x3DA0, 0x3DA1, 0x3DA2, 0x3DA3, 0x3DA4, + 0x3DA8, 0x3DA9, 0x3DAB, 0x3DAC, 0x3DAE, 0x3DAF, 0x3DB0, 0x3DB1, 0x3DB2, 0x3DB3, 0x3DB4, 0x3DB6, 0x3DBA, 0x3DBB, 0x3DBC, + 0x3DBD, 0x3D90, 0x3D97, 0x3D9D, 0x3DBE, 0xB12EFBC0, 0xB12FFBC0, 0xB130FBC0, 0x3BF5, 0x3BF6, 0x3CD3, 0x3BF7, 0x3CD5, 0x3CD6, 0x3BF8, + 0x3BF9, 0x3BFA, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDD, 0x3CDE, 0x3C0F, 0x3BFB, 0x3BFC, 0x3BFD, 0x3C16, 0x3BFE, 0x3BFF, + 0x3C00, 0x3C01, 0x3C02, 0x3C03, 0x3C04, 0x3C05, 0x3C06, 0x3C07, 0x3C73, 0x3C74, 0x3C75, 0x3C76, 0x3C77, 0x3C78, 0x3C79, + 0x3C7A, 0x3C7B, 0x3C7C, 0x3C7D, 0x3C7E, 0x3C7F, 0x3C80, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C87, 0x3C72, + 0x3C09, 0x3C0A, 0x3CF0, 0x3CF1, 0x3CF5, 0x3CF7, 0x3CFC, 0x3D00, 0x3D02, 0x3C11, 0x3D06, 0x3D08, 0x3C12, 0x3C13, 0x3C15, + 0x3C17, 0x3C18, 0x3C1C, 0x3C1E, 0x3C20, 0x3C21, 0x3C22, 0x3C23, 0x3C24, 0x3C27, 0x3C2B, 0x3C35, 0x3C3C, 0x3C41, 0x3D1A, + 0x3D1B, 0x3C4C, 0x3C4D, 0x3C4E, 0x3C96, 0x3C97, 0x3C9A, 0x3CA3, 0x3CA4, 0x3CA6, 0x3CB0, 0x3CB3, 0xB18FFBC0, 0x1A92, 0x1A93, + 0xCE00FB40, 0xCE8CFB40, 0xCE09FB40, 0xD6DBFB40, 0xCE0AFB40, 0xCE2DFB40, 0xCE0BFB40, 0xF532FB40, 0xCE59FB40, 0xCE19FB40, 0xCE01FB40, 0xD929FB40, 0xD730FB40, 0xCEBAFB40, 0x3D8C, + 0x3DA2, 0x3D9A, 0x3D95, 0x3DAD, 0x3DAD, 0x3DAA, 0x3DA9, 0x3DBC, 0x3DA8, 0x3DBB, 0x3DBC, 0x3DB9, 0x3D98, 0x3DAE, 0x3DB0, + 0x3DB7, 0x3DB8, 0x3DB5, 0x3DBB, 0x3D8D, 0x3D92, 0x3D96, 0x3D99, 0x3DA5, 0x3DA6, 0x3DA7, 0xB1BBFBC0, 0xB1BCFBC0, 0xB1BDFBC0, 0xB1BEFBC0, + 0xB1BFFBC0, 0x1A67, 0x1A68, 0x1A69, 0x1A6A, 0x1A6B, 0x1A6C, 0x1A6D, 0x1A6E, 0x1A6F, 0x1A70, 0x1A71, 0x1A72, 0x1A73, 0x1A74, + 0x1A75, 0x1A76, 0x1A77, 0x1A78, 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C, 0x1A7D, 0x1A7E, 0x1A7F, 0x1A80, 0x1A81, 0x1A82, 0x1A83, + 0x1A84, 0x1A85, 0x1A86, 0x1A87, 0x1A88, 0x1A89, 0x1A8A, 0xB1E4FBC0, 0xB1E5FBC0, 0xB1E6FBC0, 0xB1E7FBC0, 0xB1E8FBC0, 0xB1E9FBC0, 0xB1EAFBC0, 0xB1EBFBC0, + 0xB1ECFBC0, 0xB1EDFBC0, 0xB1EEFBC0, 0xB1EFFBC0, 0x3D62, 0x3D66, 0x3D67, 0x3D6E, 0x3D71, 0x3D74, 0x3D75, 0x3D76, 0x3D77, 0x3D78, 0x3D7B, + 0x3D82, 0x3D83, 0x3D84, 0x3D85, 0x3D86, 0x3183BF50317, 0x3183BF70317, 0x3183BF80317, 0x3183BFA0317, 0x3183BFB0317, 0x3183BFC0317, 0x3183BFE0317, 0x3183C000317, 0x3183C010317, 0x3183C030317, + 0x3183C040317, 0x3183C050317, 0x3183C060317, 0x3183C070317, 0x3183C733BF50317, 0x3183C733BF70317, 0x3183C733BF80317, 0x3183C733BFA0317, 0x3183C733BFB0317, 0x3183C733BFC0317, 0x3183C733BFE0317, 0x3183C733C000317, 0x3183C733C010317, 0x3183C733C030317, 0x3183C733C040317, + 0x3183C733C050317, 0x3183C733C060317, 0x3183C733C070317, 0x3183C803C010317, 0xFFFD, 0xFFFD, 0xB21FFBC0, 0x318CE00FB400317, 0x318CE8CFB400317, 0x318CE09FB400317, 0x318D6DBFB400317, 0x318CE94FB400317, 0x318D16DFB400317, 0x318CE03FB400317, 0x318D16BFB400317, + 0x318CE5DFB400317, 0x318D341FB400317, 0x318E708FB400317, 0x318F06BFB400317, 0x318EC34FB400317, 0x318E728FB400317, 0x31891D1FB410317, 0x318D71FFB400317, 0x318E5E5FB400317, 0x318E82AFB400317, 0x318E709FB400317, 0x318F93EFB400317, 0x318D40DFB400317, 0x318F279FB400317, 0x3188CA1FB410317, + 0x318F95DFB400317, 0x318D2B4FB400317, 0x318CEE3FB400317, 0x318D47CFB400317, 0x318DB66FB400317, 0x318F6E3FB400317, 0x318CF01FB400317, 0x3188CC7FB410317, 0x318D354FB400317, 0x318F96DFB400317, 0x318CF11FB400317, 0x31881EAFB410317, 0x31881F3FB410317, 0xD54FFB40, 0xDE7CFB40, + 0xE587FB40, 0xFB8FFB40, 0x1C3D1C3E, 0x1C3D1C3F, 0x1C3D1C40, 0x1C3D1C41, 0x1C3D1C42, 0x1C3D1C43, 0x1C3D1C44, 0x1C3D1C45, 0x1CAA1E951E0C, 0x1C3E1C3F, 0x1C3F1C3F, 0x1C401C3F, 0x1C411C3F, + 0x1C421C3F, 0x1C431C3F, 0x1C441C3F, 0x1C451C3F, 0x1C461C3F, 0x1C3D1C40, 0x1C3E1C40, 0x1C3F1C40, 0x1C401C40, 0x1C411C40, 0x1C421C40, 0x3BF5, 0x3BF7, 0x3BF8, 0x3BFA, + 0x3BFB, 0x3BFC, 0x3BFE, 0x3C00, 0x3C01, 0x3C03, 0x3C04, 0x3C05, 0x3C06, 0x3C07, 0x3C733BF5, 0x3C733BF7, 0x3C733BF8, 0x3C733BFA, 0x3C733BFB, + 0x3C733BFC, 0x3C733BFE, 0x3C733C00, 0x3C733C01, 0x3C733C03, 0x3C733C04, 0x3C733C05, 0x3C733C06, 0x3C733C07, 0xFFFD, 0x3C863C003C803C01, 0x3C803C00, 0x1A94, 0xCE00FB40, 0xCE8CFB40, + 0xCE09FB40, 0xD6DBFB40, 0xCE94FB40, 0xD16DFB40, 0xCE03FB40, 0xD16BFB40, 0xCE5DFB40, 0xD341FB40, 0xE708FB40, 0xF06BFB40, 0xEC34FB40, 0xE728FB40, 0x91D1FB41, 0xD71FFB40, 0xE5E5FB40, + 0xE82AFB40, 0xE709FB40, 0xF93EFB40, 0xD40DFB40, 0xF279FB40, 0x8CA1FB41, 0xF95DFB40, 0xD2B4FB40, 0xF9D8FB40, 0xF537FB40, 0xD973FB40, 0x9069FB41, 0xD12AFB40, 0xD370FB40, 0xECE8FB40, + 0x9805FB41, 0xCF11FB40, 0xD199FB40, 0xEB63FB40, 0xCE0AFB40, 0xCE2DFB40, 0xCE0BFB40, 0xDDE6FB40, 0xD3F3FB40, 0xD33BFB40, 0xDB97FB40, 0xDB66FB40, 0xF6E3FB40, 0xCF01FB40, 0x8CC7FB41, + 0xD354FB40, 0xD91CFB40, 0x1C431C40, 0x1C441C40, 0x1C451C40, 0x1C461C40, 0x1C3D1C41, 0x1C3E1C41, 0x1C3F1C41, 0x1C401C41, 0x1C411C41, 0x1C421C41, 0x1C431C41, 0x1C441C41, 0x1C451C41, + 0x1C461C41, 0x1C3D1C42, 0xE708FB401C3E, 0xE708FB401C3F, 0xE708FB401C40, 0xE708FB401C41, 0xE708FB401C42, 0xE708FB401C43, 0xE708FB401C44, 0xE708FB401C45, 0xE708FB401C46, 0xE708FB401C3D1C3E, 0xE708FB401C3E1C3E, 0xE708FB401C3F1C3E, 0x1CF41D18, + 0x1CF41E331CAA, 0x1EE31CAA, 0x1C8F1E951D77, 0x3D5A, 0x3D5B, 0x3D5C, 0x3D5E, 0x3D5F, 0x3D60, 0x3D61, 0x3D62, 0x3D63, 0x3D64, 0x3D65, 0x3D66, + 0x3D67, 0x3D68, 0x3D69, 0x3D6A, 0x3D6B, 0x3D6C, 0x3D6D, 0x3D6E, 0x3D6F, 0x3D70, 0x3D71, 0x3D72, 0x3D73, 0x3D74, 0x3D75, + 0x3D76, 0x3D77, 0x3D78, 0x3D79, 0x3D7A, 0x3D7B, 0x3D7C, 0x3D7D, 0x3D7E, 0x3D7F, 0x3D81, 0x3D82, 0x3D83, 0x3D84, 0x3D85, + 0x3D86, 0x3D87, 0x3D88, 0x3D89, 0x3D8A, 0xB2FFFBC0, 0x3D6E1C0E3D743D5A, 0x3D5A3D763D843D5A, 0x3D5A3D773D8B3D5A, 0x3D841C0E3D5A, 0x3D623D8B3D703D5B, 0x3D6B3D8B3D5B, 0x3D8B3D5F3D5C, 0xFFFD, 0x1C0E3D601C0E3D5E, + 0x3D673D8B3D5F, 0x3D7B1C0E3D5F, 0x3D833D5B3D60, 0x3D6E3D6C3D823D60, 0x1C0E3D833D863D60, 0x3D8B3D863D60, 0x3D793D8B3D60, 0x3D603D61, 0x1C0E3D703D61, 0x1C0E3D833D7F3D61, 0x1C0E3D6A3D843D61, 0x3D863D61, 0xFFFD, 0xFFFD, 0xFFFD, + 0x3D7B3D823D62, 0xFFFD, 0xFFFD, 0x3D721C0E3D863D62, 0x3D671C0E3D63, 0x3D6F3D843D64, 0x3D781C0E3D64, 0x3D843D623D5B3D65, 0xFFFD, 0x3D623D8B3D833D66, 0x3D6B3D8B3D68, 0x3D6E3D8B3D68, 0x3D671C0E3D6A, 0x3D663D6D, 0x3D843D6E, + 0x3D8B3D6E, 0x3D733D6F, 0x3D6E3D6C3D73, 0x3D6C3D5B3D74, 0xFFFD, 0x3D6C1C0E3D74, 0x3D843D851C0E3D74, 0xFFFD, 0x3D843D623D75, 0x3D643D75, 0x3D843D75, 0xFFFD, 0x3D6E1C0E3D5B3D76, 0xFFFD, 0x3D8B3D823D76, + 0xFFFD, 0x3D693D77, 0x3D753D703D77, 0x3D6C3D843D77, 0x3D673D8B3D77, 0x3D661C0E3D77, 0x3D6A1C0E3D77, 0x3D6E3D8B3D5B3D78, 0x3D6E3D843D78, 0x3D8B3D78, 0x3D6E3D8B3D78, 0x3D841C0E3D78, 0x3D8B1C0E3D78, 0x3D863D623D5B3D79, 0x3D843D5B3D79, + 0x3D743D6C3D79, 0x3D623D843D79, 0xFFFD, 0x3D8B3D863D623D7A, 0x3D833D7A, 0xFFFD, 0x3D603D7C, 0x3D8B3D6E3D603D7C, 0x3D843D6E1C0E3D7C, 0x3D6E1C0E3D7E, 0x3D841C0E3D7E, 0x3D8B3D5A3D7F, 0x3D843D6E3D6C3D83, 0x3D823D83, 0x1C0E3D753D84, + 0x3D843D761C0E3D84, 0x3D7B3D85, 0xFFFD, 0x3D6E3D6C3D87, 0xF0B9FB401C3D, 0xF0B9FB401C3E, 0xF0B9FB401C3F, 0xF0B9FB401C40, 0xF0B9FB401C41, 0xF0B9FB401C42, 0xF0B9FB401C43, 0xF0B9FB401C44, 0xF0B9FB401C45, 0xF0B9FB401C46, 0xF0B9FB401C3D1C3E, + 0xF0B9FB401C3E1C3E, 0xF0B9FB401C3F1C3E, 0xF0B9FB401C401C3E, 0xF0B9FB401C411C3E, 0xF0B9FB401C421C3E, 0xF0B9FB401C431C3E, 0xF0B9FB401C441C3E, 0xF0B9FB401C451C3E, 0xF0B9FB401C461C3E, 0xF0B9FB401C3D1C3F, 0xF0B9FB401C3E1C3F, 0xF0B9FB401C3F1C3F, 0xF0B9FB401C401C3F, 0xF0B9FB401C411C3F, 0x1C471E0C1D18, + 0x1C471C8F, 0x1EB51C47, 0x1E331C471C60, 0x1EE31DDD, 0x1C7A1E0C, 0x1DAA1C8F, 0x1C3F1DAA1C8F, 0x1C401DAA1C8F, 0x1EB51D32, 0xE210FB40DE73FB40, 0xD48CFB40E62DFB40, 0xEB63FB40D927FB40, 0xECBBFB40E60EFB40, 0xFFFD, 0x1C471E0C, + 0x1C471DB9, 0x1C471FCB, 0x1C471DAA, 0x1C471D65, 0x1C601D65, 0x1C601DAA, 0x1C601CF4, 0x1D771C471C7A, 0x1D771C471C7A1D65, 0x1CE51E0C, 0x1CE51DB9, 0x1CE51FCB, 0x1CF41FCB, 0x1CF41DAA, 0x1CF41D65, + 0x1F211D18, 0x1F211D181D65, 0x1F211D181DAA, 0x1F211D181CF4, 0x1F211D181E95, 0x1D771FCB, 0x1D771DAA, 0x1D771C8F, 0x1D771D65, 0x1DAA1CE5, 0x1DAA1DB9, 0x1DAA1FCB, 0x1DAA1DAA, 0x1DAA1C7A, 0x1DAA1D65, + 0x1C3F1DAA1DAA, 0x1C3F1DAA1C7A, 0x1C3F1DAA, 0x1C3F1DAA1D65, 0x1C401DAA1DAA, 0x1C401DAA1C7A, 0x1C401DAA, 0x1C401DAA1D65, 0x1E7106251DAA, 0x1C3F1E7106251DAA, 0x1C471E0C, 0x1C471E0C1D65, 0x1C471E0C1DAA, 0x1C471E0C1CF4, 0x1C8F1C471E33, + 0xFFFD, 0xFFFD, 0x1E711E0C, 0x1E711DB9, 0x1E711FCB, 0x1E711DAA, 0x1EE31E0C, 0x1EE31DB9, 0x1EE31FCB, 0x1EE31DAA, 0x1EE31D65, 0x1EE31DAA, 0x1EF51E0C, 0x1EF51DB9, 0x1EF51FCB, + 0x1EF51DAA, 0x1EF51D65, 0x1EF51DAA, 0x1FE11D65, 0x1FE11DAA, 0x2771DAA02771C47, 0x1E211C60, 0x1C7A1C7A, 0x1C8F1C7A, 0x1CF41D6506251C7A, 0x2771DDD1C7A, 0x1C601C8F, 0x1F0B1CF4, 0x1C471D18, 0x1E0C1D18, + 0x1DB91D32, 0x1D651D65, 0x1DAA1D65, 0x1E951D65, 0x1DAA1D77, 0x1DB91D77, 0x1CF41DDD1D77, 0x1EFF1D77, 0x1C601DAA, 0x1D771D321DAA, 0x1D771DDD1DAA, 0x1D181E0C, 0x2771DAA02771E0C, 0x1DAA1E0C1E0C, 0x1E331E0C, + 0x1E331E71, 0x1EE31E71, 0x1C601EF5, 0x1DAA06251EE3, 0x1DAA06251C47, 0xE5E5FB401C3E, 0xE5E5FB401C3F, 0xE5E5FB401C40, 0xE5E5FB401C41, 0xE5E5FB401C42, 0xE5E5FB401C43, 0xE5E5FB401C44, 0xE5E5FB401C45, 0xE5E5FB401C46, 0xE5E5FB401C3D1C3E, + 0xE5E5FB401C3E1C3E, 0xE5E5FB401C3F1C3E, 0xE5E5FB401C401C3E, 0xE5E5FB401C411C3E, 0xE5E5FB401C421C3E, 0xE5E5FB401C431C3E, 0xE5E5FB401C441C3E, 0xE5E5FB401C451C3E, 0xE5E5FB401C461C3E, 0xE5E5FB401C3D1C3F, 0xE5E5FB401C3E1C3F, 0xE5E5FB401C3F1C3F, 0xE5E5FB401C401C3F, 0xE5E5FB401C411C3F, 0xE5E5FB401C421C3F, + 0xE5E5FB401C431C3F, 0xE5E5FB401C441C3F, 0xE5E5FB401C451C3F, 0xE5E5FB401C461C3F, 0xE5E5FB401C3D1C40, 0xE5E5FB401C3E1C40, 0x1D771C471CF4, 0xB400FB80, 0xB401FB80, 0xB402FB80, 0xB403FB80, 0xB404FB80, 0xB405FB80, 0xB406FB80, 0xB407FB80, + 0xB408FB80, 0xB409FB80, 0xB40AFB80, 0xB40BFB80, 0xB40CFB80, 0xB40DFB80, 0xB40EFB80, 0xB40FFB80, 0xB410FB80, 0xB411FB80, 0xB412FB80, 0xB413FB80, 0xB414FB80, 0xB415FB80, 0xB416FB80, + 0xB417FB80, 0xB418FB80, 0xB419FB80, 0xB41AFB80, 0xB41BFB80, 0xB41CFB80, 0xB41DFB80, 0xB41EFB80, 0xB41FFB80, 0xB420FB80, 0xB421FB80, 0xB422FB80, 0xB423FB80, 0xB424FB80, 0xB425FB80, + 0xB426FB80, 0xB427FB80, 0xB428FB80, 0xB429FB80, 0xB42AFB80, 0xB42BFB80, 0xB42CFB80, 0xB42DFB80, 0xB42EFB80, 0xB42FFB80, 0xB430FB80, 0xB431FB80, 0xB432FB80, 0xB433FB80, 0xB434FB80, + 0xB435FB80, 0xB436FB80, 0xB437FB80, 0xB438FB80, 0xB439FB80, 0xB43AFB80, 0xB43BFB80, 0xB43CFB80, 0xB43DFB80, 0xB43EFB80, 0xB43FFB80, 0xB440FB80, 0xB441FB80, 0xB442FB80, 0xB443FB80, + 0xB444FB80, 0xB445FB80, 0xB446FB80, 0xB447FB80, 0xB448FB80, 0xB449FB80, 0xB44AFB80, 0xB44BFB80, 0xB44CFB80, 0xB44DFB80, 0xB44EFB80, 0xB44FFB80, 0xB450FB80, 0xB451FB80, 0xB452FB80, + 0xB453FB80, 0xB454FB80, 0xB455FB80, 0xB456FB80, 0xB457FB80, 0xB458FB80, 0xB459FB80, 0xB45AFB80, 0xB45BFB80, 0xB45CFB80, 0xB45DFB80, 0xB45EFB80, 0xB45FFB80, 0xB460FB80, 0xB461FB80, + 0xB462FB80, 0xB463FB80, 0xB464FB80, 0xB465FB80, 0xB466FB80, 0xB467FB80, 0xB468FB80, 0xB469FB80, 0xB46AFB80, 0xB46BFB80, 0xB46CFB80, 0xB46DFB80, 0xB46EFB80, 0xB46FFB80, 0xB470FB80, + 0xB471FB80, 0xB472FB80, 0xB473FB80, 0xB474FB80, 0xB475FB80, 0xB476FB80, 0xB477FB80, 0xB478FB80, 0xB479FB80, 0xB47AFB80, 0xB47BFB80, 0xB47CFB80, 0xB47DFB80, 0xB47EFB80, 0xB47FFB80, + 0xB480FB80, 0xB481FB80, 0xB482FB80, 0xB483FB80, 0xB484FB80, 0xB485FB80, 0xB486FB80, 0xB487FB80, 0xB488FB80, 0xB489FB80, 0xB48AFB80, 0xB48BFB80, 0xB48CFB80, 0xB48DFB80, 0xB48EFB80, + 0xB48FFB80, 0xB490FB80, 0xB491FB80, 0xB492FB80, 0xB493FB80, 0xB494FB80, 0xB495FB80, 0xB496FB80, 0xB497FB80, 0xB498FB80, 0xB499FB80, 0xB49AFB80, 0xB49BFB80, 0xB49CFB80, 0xB49DFB80, + 0xB49EFB80, 0xB49FFB80, 0xB4A0FB80, 0xB4A1FB80, 0xB4A2FB80, 0xB4A3FB80, 0xB4A4FB80, 0xB4A5FB80, 0xB4A6FB80, 0xB4A7FB80, 0xB4A8FB80, 0xB4A9FB80, 0xB4AAFB80, 0xB4ABFB80, 0xB4ACFB80, + 0xB4ADFB80, 0xB4AEFB80, 0xB4AFFB80, 0xB4B0FB80, 0xB4B1FB80, 0xB4B2FB80, 0xB4B3FB80, 0xB4B4FB80, 0xB4B5FB80, 0xB4B6FB80, 0xB4B7FB80, 0xB4B8FB80, 0xB4B9FB80, 0xB4BAFB80, 0xB4BBFB80, + 0xB4BCFB80, 0xB4BDFB80, 0xB4BEFB80, 0xB4BFFB80, 0xB4C0FB80, 0xB4C1FB80, 0xB4C2FB80, 0xB4C3FB80, 0xB4C4FB80, 0xB4C5FB80, 0xB4C6FB80, 0xB4C7FB80, 0xB4C8FB80, 0xB4C9FB80, 0xB4CAFB80, + 0xB4CBFB80, 0xB4CCFB80, 0xB4CDFB80, 0xB4CEFB80, 0xB4CFFB80, 0xB4D0FB80, 0xB4D1FB80, 0xB4D2FB80, 0xB4D3FB80, 0xB4D4FB80, 0xB4D5FB80, 0xB4D6FB80, 0xB4D7FB80, 0xB4D8FB80, 0xB4D9FB80, + 0xB4DAFB80, 0xB4DBFB80, 0xB4DCFB80, 0xB4DDFB80, 0xB4DEFB80, 0xB4DFFB80, 0xB4E0FB80, 0xB4E1FB80, 0xB4E2FB80, 0xB4E3FB80, 0xB4E4FB80, 0xB4E5FB80, 0xB4E6FB80, 0xB4E7FB80, 0xB4E8FB80, + 0xB4E9FB80, 0xB4EAFB80, 0xB4EBFB80, 0xB4ECFB80, 0xB4EDFB80, 0xB4EEFB80, 0xB4EFFB80, 0xB4F0FB80, 0xB4F1FB80, 0xB4F2FB80, 0xB4F3FB80, 0xB4F4FB80, 0xB4F5FB80, 0xB4F6FB80, 0xB4F7FB80, + 0xB4F8FB80, 0xB4F9FB80, 0xB4FAFB80, 0xB4FBFB80, 0xB4FCFB80, 0xB4FDFB80, 0xB4FEFB80, 0xB4FFFB80, 0xB500FB80, 0xB501FB80, 0xB502FB80, 0xB503FB80, 0xB504FB80, 0xB505FB80, 0xB506FB80, + 0xB507FB80, 0xB508FB80, 0xB509FB80, 0xB50AFB80, 0xB50BFB80, 0xB50CFB80, 0xB50DFB80, 0xB50EFB80, 0xB50FFB80, 0xB510FB80, 0xB511FB80, 0xB512FB80, 0xB513FB80, 0xB514FB80, 0xB515FB80, + 0xB516FB80, 0xB517FB80, 0xB518FB80, 0xB519FB80, 0xB51AFB80, 0xB51BFB80, 0xB51CFB80, 0xB51DFB80, 0xB51EFB80, 0xB51FFB80, 0xB520FB80, 0xB521FB80, 0xB522FB80, 0xB523FB80, 0xB524FB80, + 0xB525FB80, 0xB526FB80, 0xB527FB80, 0xB528FB80, 0xB529FB80, 0xB52AFB80, 0xB52BFB80, 0xB52CFB80, 0xB52DFB80, 0xB52EFB80, 0xB52FFB80, 0xB530FB80, 0xB531FB80, 0xB532FB80, 0xB533FB80, + 0xB534FB80, 0xB535FB80, 0xB536FB80, 0xB537FB80, 0xB538FB80, 0xB539FB80, 0xB53AFB80, 0xB53BFB80, 0xB53CFB80, 0xB53DFB80, 0xB53EFB80, 0xB53FFB80, 0xB540FB80, 0xB541FB80, 0xB542FB80, + 0xB543FB80, 0xB544FB80, 0xB545FB80, 0xB546FB80, 0xB547FB80, 0xB548FB80, 0xB549FB80, 0xB54AFB80, 0xB54BFB80, 0xB54CFB80, 0xB54DFB80, 0xB54EFB80, 0xB54FFB80, 0xB550FB80, 0xB551FB80, + 0xB552FB80, 0xB553FB80, 0xB554FB80, 0xB555FB80, 0xB556FB80, 0xB557FB80, 0xB558FB80, 0xB559FB80, 0xB55AFB80, 0xB55BFB80, 0xB55CFB80, 0xB55DFB80, 0xB55EFB80, 0xB55FFB80, 0xB560FB80, + 0xB561FB80, 0xB562FB80, 0xB563FB80, 0xB564FB80, 0xB565FB80, 0xB566FB80, 0xB567FB80, 0xB568FB80, 0xB569FB80, 0xB56AFB80, 0xB56BFB80, 0xB56CFB80, 0xB56DFB80, 0xB56EFB80, 0xB56FFB80, + 0xB570FB80, 0xB571FB80, 0xB572FB80, 0xB573FB80, 0xB574FB80, 0xB575FB80, 0xB576FB80, 0xB577FB80, 0xB578FB80, 0xB579FB80, 0xB57AFB80, 0xB57BFB80, 0xB57CFB80, 0xB57DFB80, 0xB57EFB80, + 0xB57FFB80, 0xB580FB80, 0xB581FB80, 0xB582FB80, 0xB583FB80, 0xB584FB80, 0xB585FB80, 0xB586FB80, 0xB587FB80, 0xB588FB80, 0xB589FB80, 0xB58AFB80, 0xB58BFB80, 0xB58CFB80, 0xB58DFB80, + 0xB58EFB80, 0xB58FFB80, 0xB590FB80, 0xB591FB80, 0xB592FB80, 0xB593FB80, 0xB594FB80, 0xB595FB80, 0xB596FB80, 0xB597FB80, 0xB598FB80, 0xB599FB80, 0xB59AFB80, 0xB59BFB80, 0xB59CFB80, + 0xB59DFB80, 0xB59EFB80, 0xB59FFB80, 0xB5A0FB80, 0xB5A1FB80, 0xB5A2FB80, 0xB5A3FB80, 0xB5A4FB80, 0xB5A5FB80, 0xB5A6FB80, 0xB5A7FB80, 0xB5A8FB80, 0xB5A9FB80, 0xB5AAFB80, 0xB5ABFB80, + 0xB5ACFB80, 0xB5ADFB80, 0xB5AEFB80, 0xB5AFFB80, 0xB5B0FB80, 0xB5B1FB80, 0xB5B2FB80, 0xB5B3FB80, 0xB5B4FB80, 0xB5B5FB80, 0xB5B6FB80, 0xB5B7FB80, 0xB5B8FB80, 0xB5B9FB80, 0xB5BAFB80, + 0xB5BBFB80, 0xB5BCFB80, 0xB5BDFB80, 0xB5BEFB80, 0xB5BFFB80, 0xB5C0FB80, 0xB5C1FB80, 0xB5C2FB80, 0xB5C3FB80, 0xB5C4FB80, 0xB5C5FB80, 0xB5C6FB80, 0xB5C7FB80, 0xB5C8FB80, 0xB5C9FB80, + 0xB5CAFB80, 0xB5CBFB80, 0xB5CCFB80, 0xB5CDFB80, 0xB5CEFB80, 0xB5CFFB80, 0xB5D0FB80, 0xB5D1FB80, 0xB5D2FB80, 0xB5D3FB80, 0xB5D4FB80, 0xB5D5FB80, 0xB5D6FB80, 0xB5D7FB80, 0xB5D8FB80, + 0xB5D9FB80, 0xB5DAFB80, 0xB5DBFB80, 0xB5DCFB80, 0xB5DDFB80, 0xB5DEFB80, 0xB5DFFB80, 0xB5E0FB80, 0xB5E1FB80, 0xB5E2FB80, 0xB5E3FB80, 0xB5E4FB80, 0xB5E5FB80, 0xB5E6FB80, 0xB5E7FB80, + 0xB5E8FB80, 0xB5E9FB80, 0xB5EAFB80, 0xB5EBFB80, 0xB5ECFB80, 0xB5EDFB80, 0xB5EEFB80, 0xB5EFFB80, 0xB5F0FB80, 0xB5F1FB80, 0xB5F2FB80, 0xB5F3FB80, 0xB5F4FB80, 0xB5F5FB80, 0xB5F6FB80, + 0xB5F7FB80, 0xB5F8FB80, 0xB5F9FB80, 0xB5FAFB80, 0xB5FBFB80, 0xB5FCFB80, 0xB5FDFB80, 0xB5FEFB80, 0xB5FFFB80, 0xB600FB80, 0xB601FB80, 0xB602FB80, 0xB603FB80, 0xB604FB80, 0xB605FB80, + 0xB606FB80, 0xB607FB80, 0xB608FB80, 0xB609FB80, 0xB60AFB80, 0xB60BFB80, 0xB60CFB80, 0xB60DFB80, 0xB60EFB80, 0xB60FFB80, 0xB610FB80, 0xB611FB80, 0xB612FB80, 0xB613FB80, 0xB614FB80, + 0xB615FB80, 0xB616FB80, 0xB617FB80, 0xB618FB80, 0xB619FB80, 0xB61AFB80, 0xB61BFB80, 0xB61CFB80, 0xB61DFB80, 0xB61EFB80, 0xB61FFB80, 0xB620FB80, 0xB621FB80, 0xB622FB80, 0xB623FB80, + 0xB624FB80, 0xB625FB80, 0xB626FB80, 0xB627FB80, 0xB628FB80, 0xB629FB80, 0xB62AFB80, 0xB62BFB80, 0xB62CFB80, 0xB62DFB80, 0xB62EFB80, 0xB62FFB80, 0xB630FB80, 0xB631FB80, 0xB632FB80, + 0xB633FB80, 0xB634FB80, 0xB635FB80, 0xB636FB80, 0xB637FB80, 0xB638FB80, 0xB639FB80, 0xB63AFB80, 0xB63BFB80, 0xB63CFB80, 0xB63DFB80, 0xB63EFB80, 0xB63FFB80, 0xB640FB80, 0xB641FB80, + 0xB642FB80, 0xB643FB80, 0xB644FB80, 0xB645FB80, 0xB646FB80, 0xB647FB80, 0xB648FB80, 0xB649FB80, 0xB64AFB80, 0xB64BFB80, 0xB64CFB80, 0xB64DFB80, 0xB64EFB80, 0xB64FFB80, 0xB650FB80, + 0xB651FB80, 0xB652FB80, 0xB653FB80, 0xB654FB80, 0xB655FB80, 0xB656FB80, 0xB657FB80, 0xB658FB80, 0xB659FB80, 0xB65AFB80, 0xB65BFB80, 0xB65CFB80, 0xB65DFB80, 0xB65EFB80, 0xB65FFB80, + 0xB660FB80, 0xB661FB80, 0xB662FB80, 0xB663FB80, 0xB664FB80, 0xB665FB80, 0xB666FB80, 0xB667FB80, 0xB668FB80, 0xB669FB80, 0xB66AFB80, 0xB66BFB80, 0xB66CFB80, 0xB66DFB80, 0xB66EFB80, + 0xB66FFB80, 0xB670FB80, 0xB671FB80, 0xB672FB80, 0xB673FB80, 0xB674FB80, 0xB675FB80, 0xB676FB80, 0xB677FB80, 0xB678FB80, 0xB679FB80, 0xB67AFB80, 0xB67BFB80, 0xB67CFB80, 0xB67DFB80, + 0xB67EFB80, 0xB67FFB80, 0xB680FB80, 0xB681FB80, 0xB682FB80, 0xB683FB80, 0xB684FB80, 0xB685FB80, 0xB686FB80, 0xB687FB80, 0xB688FB80, 0xB689FB80, 0xB68AFB80, 0xB68BFB80, 0xB68CFB80, + 0xB68DFB80, 0xB68EFB80, 0xB68FFB80, 0xB690FB80, 0xB691FB80, 0xB692FB80, 0xB693FB80, 0xB694FB80, 0xB695FB80, 0xB696FB80, 0xB697FB80, 0xB698FB80, 0xB699FB80, 0xB69AFB80, 0xB69BFB80, + 0xB69CFB80, 0xB69DFB80, 0xB69EFB80, 0xB69FFB80, 0xB6A0FB80, 0xB6A1FB80, 0xB6A2FB80, 0xB6A3FB80, 0xB6A4FB80, 0xB6A5FB80, 0xB6A6FB80, 0xB6A7FB80, 0xB6A8FB80, 0xB6A9FB80, 0xB6AAFB80, + 0xB6ABFB80, 0xB6ACFB80, 0xB6ADFB80, 0xB6AEFB80, 0xB6AFFB80, 0xB6B0FB80, 0xB6B1FB80, 0xB6B2FB80, 0xB6B3FB80, 0xB6B4FB80, 0xB6B5FB80, 0xB6B6FB80, 0xB6B7FB80, 0xB6B8FB80, 0xB6B9FB80, + 0xB6BAFB80, 0xB6BBFB80, 0xB6BCFB80, 0xB6BDFB80, 0xB6BEFB80, 0xB6BFFB80, 0xB6C0FB80, 0xB6C1FB80, 0xB6C2FB80, 0xB6C3FB80, 0xB6C4FB80, 0xB6C5FB80, 0xB6C6FB80, 0xB6C7FB80, 0xB6C8FB80, + 0xB6C9FB80, 0xB6CAFB80, 0xB6CBFB80, 0xB6CCFB80, 0xB6CDFB80, 0xB6CEFB80, 0xB6CFFB80, 0xB6D0FB80, 0xB6D1FB80, 0xB6D2FB80, 0xB6D3FB80, 0xB6D4FB80, 0xB6D5FB80, 0xB6D6FB80, 0xB6D7FB80, + 0xB6D8FB80, 0xB6D9FB80, 0xB6DAFB80, 0xB6DBFB80, 0xB6DCFB80, 0xB6DDFB80, 0xB6DEFB80, 0xB6DFFB80, 0xB6E0FB80, 0xB6E1FB80, 0xB6E2FB80, 0xB6E3FB80, 0xB6E4FB80, 0xB6E5FB80, 0xB6E6FB80, + 0xB6E7FB80, 0xB6E8FB80, 0xB6E9FB80, 0xB6EAFB80, 0xB6EBFB80, 0xB6ECFB80, 0xB6EDFB80, 0xB6EEFB80, 0xB6EFFB80, 0xB6F0FB80, 0xB6F1FB80, 0xB6F2FB80, 0xB6F3FB80, 0xB6F4FB80, 0xB6F5FB80, + 0xB6F6FB80, 0xB6F7FB80, 0xB6F8FB80, 0xB6F9FB80, 0xB6FAFB80, 0xB6FBFB80, 0xB6FCFB80, 0xB6FDFB80, 0xB6FEFB80, 0xB6FFFB80, 0xB700FB80, 0xB701FB80, 0xB702FB80, 0xB703FB80, 0xB704FB80, + 0xB705FB80, 0xB706FB80, 0xB707FB80, 0xB708FB80, 0xB709FB80, 0xB70AFB80, 0xB70BFB80, 0xB70CFB80, 0xB70DFB80, 0xB70EFB80, 0xB70FFB80, 0xB710FB80, 0xB711FB80, 0xB712FB80, 0xB713FB80, + 0xB714FB80, 0xB715FB80, 0xB716FB80, 0xB717FB80, 0xB718FB80, 0xB719FB80, 0xB71AFB80, 0xB71BFB80, 0xB71CFB80, 0xB71DFB80, 0xB71EFB80, 0xB71FFB80, 0xB720FB80, 0xB721FB80, 0xB722FB80, + 0xB723FB80, 0xB724FB80, 0xB725FB80, 0xB726FB80, 0xB727FB80, 0xB728FB80, 0xB729FB80, 0xB72AFB80, 0xB72BFB80, 0xB72CFB80, 0xB72DFB80, 0xB72EFB80, 0xB72FFB80, 0xB730FB80, 0xB731FB80, + 0xB732FB80, 0xB733FB80, 0xB734FB80, 0xB735FB80, 0xB736FB80, 0xB737FB80, 0xB738FB80, 0xB739FB80, 0xB73AFB80, 0xB73BFB80, 0xB73CFB80, 0xB73DFB80, 0xB73EFB80, 0xB73FFB80, 0xB740FB80, + 0xB741FB80, 0xB742FB80, 0xB743FB80, 0xB744FB80, 0xB745FB80, 0xB746FB80, 0xB747FB80, 0xB748FB80, 0xB749FB80, 0xB74AFB80, 0xB74BFB80, 0xB74CFB80, 0xB74DFB80, 0xB74EFB80, 0xB74FFB80, + 0xB750FB80, 0xB751FB80, 0xB752FB80, 0xB753FB80, 0xB754FB80, 0xB755FB80, 0xB756FB80, 0xB757FB80, 0xB758FB80, 0xB759FB80, 0xB75AFB80, 0xB75BFB80, 0xB75CFB80, 0xB75DFB80, 0xB75EFB80, + 0xB75FFB80, 0xB760FB80, 0xB761FB80, 0xB762FB80, 0xB763FB80, 0xB764FB80, 0xB765FB80, 0xB766FB80, 0xB767FB80, 0xB768FB80, 0xB769FB80, 0xB76AFB80, 0xB76BFB80, 0xB76CFB80, 0xB76DFB80, + 0xB76EFB80, 0xB76FFB80, 0xB770FB80, 0xB771FB80, 0xB772FB80, 0xB773FB80, 0xB774FB80, 0xB775FB80, 0xB776FB80, 0xB777FB80, 0xB778FB80, 0xB779FB80, 0xB77AFB80, 0xB77BFB80, 0xB77CFB80, + 0xB77DFB80, 0xB77EFB80, 0xB77FFB80, 0xB780FB80, 0xB781FB80, 0xB782FB80, 0xB783FB80, 0xB784FB80, 0xB785FB80, 0xB786FB80, 0xB787FB80, 0xB788FB80, 0xB789FB80, 0xB78AFB80, 0xB78BFB80, + 0xB78CFB80, 0xB78DFB80, 0xB78EFB80, 0xB78FFB80, 0xB790FB80, 0xB791FB80, 0xB792FB80, 0xB793FB80, 0xB794FB80, 0xB795FB80, 0xB796FB80, 0xB797FB80, 0xB798FB80, 0xB799FB80, 0xB79AFB80, + 0xB79BFB80, 0xB79CFB80, 0xB79DFB80, 0xB79EFB80, 0xB79FFB80, 0xB7A0FB80, 0xB7A1FB80, 0xB7A2FB80, 0xB7A3FB80, 0xB7A4FB80, 0xB7A5FB80, 0xB7A6FB80, 0xB7A7FB80, 0xB7A8FB80, 0xB7A9FB80, + 0xB7AAFB80, 0xB7ABFB80, 0xB7ACFB80, 0xB7ADFB80, 0xB7AEFB80, 0xB7AFFB80, 0xB7B0FB80, 0xB7B1FB80, 0xB7B2FB80, 0xB7B3FB80, 0xB7B4FB80, 0xB7B5FB80, 0xB7B6FB80, 0xB7B7FB80, 0xB7B8FB80, + 0xB7B9FB80, 0xB7BAFB80, 0xB7BBFB80, 0xB7BCFB80, 0xB7BDFB80, 0xB7BEFB80, 0xB7BFFB80, 0xB7C0FB80, 0xB7C1FB80, 0xB7C2FB80, 0xB7C3FB80, 0xB7C4FB80, 0xB7C5FB80, 0xB7C6FB80, 0xB7C7FB80, + 0xB7C8FB80, 0xB7C9FB80, 0xB7CAFB80, 0xB7CBFB80, 0xB7CCFB80, 0xB7CDFB80, 0xB7CEFB80, 0xB7CFFB80, 0xB7D0FB80, 0xB7D1FB80, 0xB7D2FB80, 0xB7D3FB80, 0xB7D4FB80, 0xB7D5FB80, 0xB7D6FB80, + 0xB7D7FB80, 0xB7D8FB80, 0xB7D9FB80, 0xB7DAFB80, 0xB7DBFB80, 0xB7DCFB80, 0xB7DDFB80, 0xB7DEFB80, 0xB7DFFB80, 0xB7E0FB80, 0xB7E1FB80, 0xB7E2FB80, 0xB7E3FB80, 0xB7E4FB80, 0xB7E5FB80, + 0xB7E6FB80, 0xB7E7FB80, 0xB7E8FB80, 0xB7E9FB80, 0xB7EAFB80, 0xB7EBFB80, 0xB7ECFB80, 0xB7EDFB80, 0xB7EEFB80, 0xB7EFFB80, 0xB7F0FB80, 0xB7F1FB80, 0xB7F2FB80, 0xB7F3FB80, 0xB7F4FB80, + 0xB7F5FB80, 0xB7F6FB80, 0xB7F7FB80, 0xB7F8FB80, 0xB7F9FB80, 0xB7FAFB80, 0xB7FBFB80, 0xB7FCFB80, 0xB7FDFB80, 0xB7FEFB80, 0xB7FFFB80, 0xB800FB80, 0xB801FB80, 0xB802FB80, 0xB803FB80, + 0xB804FB80, 0xB805FB80, 0xB806FB80, 0xB807FB80, 0xB808FB80, 0xB809FB80, 0xB80AFB80, 0xB80BFB80, 0xB80CFB80, 0xB80DFB80, 0xB80EFB80, 0xB80FFB80, 0xB810FB80, 0xB811FB80, 0xB812FB80, + 0xB813FB80, 0xB814FB80, 0xB815FB80, 0xB816FB80, 0xB817FB80, 0xB818FB80, 0xB819FB80, 0xB81AFB80, 0xB81BFB80, 0xB81CFB80, 0xB81DFB80, 0xB81EFB80, 0xB81FFB80, 0xB820FB80, 0xB821FB80, + 0xB822FB80, 0xB823FB80, 0xB824FB80, 0xB825FB80, 0xB826FB80, 0xB827FB80, 0xB828FB80, 0xB829FB80, 0xB82AFB80, 0xB82BFB80, 0xB82CFB80, 0xB82DFB80, 0xB82EFB80, 0xB82FFB80, 0xB830FB80, + 0xB831FB80, 0xB832FB80, 0xB833FB80, 0xB834FB80, 0xB835FB80, 0xB836FB80, 0xB837FB80, 0xB838FB80, 0xB839FB80, 0xB83AFB80, 0xB83BFB80, 0xB83CFB80, 0xB83DFB80, 0xB83EFB80, 0xB83FFB80, + 0xB840FB80, 0xB841FB80, 0xB842FB80, 0xB843FB80, 0xB844FB80, 0xB845FB80, 0xB846FB80, 0xB847FB80, 0xB848FB80, 0xB849FB80, 0xB84AFB80, 0xB84BFB80, 0xB84CFB80, 0xB84DFB80, 0xB84EFB80, + 0xB84FFB80, 0xB850FB80, 0xB851FB80, 0xB852FB80, 0xB853FB80, 0xB854FB80, 0xB855FB80, 0xB856FB80, 0xB857FB80, 0xB858FB80, 0xB859FB80, 0xB85AFB80, 0xB85BFB80, 0xB85CFB80, 0xB85DFB80, + 0xB85EFB80, 0xB85FFB80, 0xB860FB80, 0xB861FB80, 0xB862FB80, 0xB863FB80, 0xB864FB80, 0xB865FB80, 0xB866FB80, 0xB867FB80, 0xB868FB80, 0xB869FB80, 0xB86AFB80, 0xB86BFB80, 0xB86CFB80, + 0xB86DFB80, 0xB86EFB80, 0xB86FFB80, 0xB870FB80, 0xB871FB80, 0xB872FB80, 0xB873FB80, 0xB874FB80, 0xB875FB80, 0xB876FB80, 0xB877FB80, 0xB878FB80, 0xB879FB80, 0xB87AFB80, 0xB87BFB80, + 0xB87CFB80, 0xB87DFB80, 0xB87EFB80, 0xB87FFB80, 0xB880FB80, 0xB881FB80, 0xB882FB80, 0xB883FB80, 0xB884FB80, 0xB885FB80, 0xB886FB80, 0xB887FB80, 0xB888FB80, 0xB889FB80, 0xB88AFB80, + 0xB88BFB80, 0xB88CFB80, 0xB88DFB80, 0xB88EFB80, 0xB88FFB80, 0xB890FB80, 0xB891FB80, 0xB892FB80, 0xB893FB80, 0xB894FB80, 0xB895FB80, 0xB896FB80, 0xB897FB80, 0xB898FB80, 0xB899FB80, + 0xB89AFB80, 0xB89BFB80, 0xB89CFB80, 0xB89DFB80, 0xB89EFB80, 0xB89FFB80, 0xB8A0FB80, 0xB8A1FB80, 0xB8A2FB80, 0xB8A3FB80, 0xB8A4FB80, 0xB8A5FB80, 0xB8A6FB80, 0xB8A7FB80, 0xB8A8FB80, + 0xB8A9FB80, 0xB8AAFB80, 0xB8ABFB80, 0xB8ACFB80, 0xB8ADFB80, 0xB8AEFB80, 0xB8AFFB80, 0xB8B0FB80, 0xB8B1FB80, 0xB8B2FB80, 0xB8B3FB80, 0xB8B4FB80, 0xB8B5FB80, 0xB8B6FB80, 0xB8B7FB80, + 0xB8B8FB80, 0xB8B9FB80, 0xB8BAFB80, 0xB8BBFB80, 0xB8BCFB80, 0xB8BDFB80, 0xB8BEFB80, 0xB8BFFB80, 0xB8C0FB80, 0xB8C1FB80, 0xB8C2FB80, 0xB8C3FB80, 0xB8C4FB80, 0xB8C5FB80, 0xB8C6FB80, + 0xB8C7FB80, 0xB8C8FB80, 0xB8C9FB80, 0xB8CAFB80, 0xB8CBFB80, 0xB8CCFB80, 0xB8CDFB80, 0xB8CEFB80, 0xB8CFFB80, 0xB8D0FB80, 0xB8D1FB80, 0xB8D2FB80, 0xB8D3FB80, 0xB8D4FB80, 0xB8D5FB80, + 0xB8D6FB80, 0xB8D7FB80, 0xB8D8FB80, 0xB8D9FB80, 0xB8DAFB80, 0xB8DBFB80, 0xB8DCFB80, 0xB8DDFB80, 0xB8DEFB80, 0xB8DFFB80, 0xB8E0FB80, 0xB8E1FB80, 0xB8E2FB80, 0xB8E3FB80, 0xB8E4FB80, + 0xB8E5FB80, 0xB8E6FB80, 0xB8E7FB80, 0xB8E8FB80, 0xB8E9FB80, 0xB8EAFB80, 0xB8EBFB80, 0xB8ECFB80, 0xB8EDFB80, 0xB8EEFB80, 0xB8EFFB80, 0xB8F0FB80, 0xB8F1FB80, 0xB8F2FB80, 0xB8F3FB80, + 0xB8F4FB80, 0xB8F5FB80, 0xB8F6FB80, 0xB8F7FB80, 0xB8F8FB80, 0xB8F9FB80, 0xB8FAFB80, 0xB8FBFB80, 0xB8FCFB80, 0xB8FDFB80, 0xB8FEFB80, 0xB8FFFB80, 0xB900FB80, 0xB901FB80, 0xB902FB80, + 0xB903FB80, 0xB904FB80, 0xB905FB80, 0xB906FB80, 0xB907FB80, 0xB908FB80, 0xB909FB80, 0xB90AFB80, 0xB90BFB80, 0xB90CFB80, 0xB90DFB80, 0xB90EFB80, 0xB90FFB80, 0xB910FB80, 0xB911FB80, + 0xB912FB80, 0xB913FB80, 0xB914FB80, 0xB915FB80, 0xB916FB80, 0xB917FB80, 0xB918FB80, 0xB919FB80, 0xB91AFB80, 0xB91BFB80, 0xB91CFB80, 0xB91DFB80, 0xB91EFB80, 0xB91FFB80, 0xB920FB80, + 0xB921FB80, 0xB922FB80, 0xB923FB80, 0xB924FB80, 0xB925FB80, 0xB926FB80, 0xB927FB80, 0xB928FB80, 0xB929FB80, 0xB92AFB80, 0xB92BFB80, 0xB92CFB80, 0xB92DFB80, 0xB92EFB80, 0xB92FFB80, + 0xB930FB80, 0xB931FB80, 0xB932FB80, 0xB933FB80, 0xB934FB80, 0xB935FB80, 0xB936FB80, 0xB937FB80, 0xB938FB80, 0xB939FB80, 0xB93AFB80, 0xB93BFB80, 0xB93CFB80, 0xB93DFB80, 0xB93EFB80, + 0xB93FFB80, 0xB940FB80, 0xB941FB80, 0xB942FB80, 0xB943FB80, 0xB944FB80, 0xB945FB80, 0xB946FB80, 0xB947FB80, 0xB948FB80, 0xB949FB80, 0xB94AFB80, 0xB94BFB80, 0xB94CFB80, 0xB94DFB80, + 0xB94EFB80, 0xB94FFB80, 0xB950FB80, 0xB951FB80, 0xB952FB80, 0xB953FB80, 0xB954FB80, 0xB955FB80, 0xB956FB80, 0xB957FB80, 0xB958FB80, 0xB959FB80, 0xB95AFB80, 0xB95BFB80, 0xB95CFB80, + 0xB95DFB80, 0xB95EFB80, 0xB95FFB80, 0xB960FB80, 0xB961FB80, 0xB962FB80, 0xB963FB80, 0xB964FB80, 0xB965FB80, 0xB966FB80, 0xB967FB80, 0xB968FB80, 0xB969FB80, 0xB96AFB80, 0xB96BFB80, + 0xB96CFB80, 0xB96DFB80, 0xB96EFB80, 0xB96FFB80, 0xB970FB80, 0xB971FB80, 0xB972FB80, 0xB973FB80, 0xB974FB80, 0xB975FB80, 0xB976FB80, 0xB977FB80, 0xB978FB80, 0xB979FB80, 0xB97AFB80, + 0xB97BFB80, 0xB97CFB80, 0xB97DFB80, 0xB97EFB80, 0xB97FFB80, 0xB980FB80, 0xB981FB80, 0xB982FB80, 0xB983FB80, 0xB984FB80, 0xB985FB80, 0xB986FB80, 0xB987FB80, 0xB988FB80, 0xB989FB80, + 0xB98AFB80, 0xB98BFB80, 0xB98CFB80, 0xB98DFB80, 0xB98EFB80, 0xB98FFB80, 0xB990FB80, 0xB991FB80, 0xB992FB80, 0xB993FB80, 0xB994FB80, 0xB995FB80, 0xB996FB80, 0xB997FB80, 0xB998FB80, + 0xB999FB80, 0xB99AFB80, 0xB99BFB80, 0xB99CFB80, 0xB99DFB80, 0xB99EFB80, 0xB99FFB80, 0xB9A0FB80, 0xB9A1FB80, 0xB9A2FB80, 0xB9A3FB80, 0xB9A4FB80, 0xB9A5FB80, 0xB9A6FB80, 0xB9A7FB80, + 0xB9A8FB80, 0xB9A9FB80, 0xB9AAFB80, 0xB9ABFB80, 0xB9ACFB80, 0xB9ADFB80, 0xB9AEFB80, 0xB9AFFB80, 0xB9B0FB80, 0xB9B1FB80, 0xB9B2FB80, 0xB9B3FB80, 0xB9B4FB80, 0xB9B5FB80, 0xB9B6FB80, + 0xB9B7FB80, 0xB9B8FB80, 0xB9B9FB80, 0xB9BAFB80, 0xB9BBFB80, 0xB9BCFB80, 0xB9BDFB80, 0xB9BEFB80, 0xB9BFFB80, 0xB9C0FB80, 0xB9C1FB80, 0xB9C2FB80, 0xB9C3FB80, 0xB9C4FB80, 0xB9C5FB80, + 0xB9C6FB80, 0xB9C7FB80, 0xB9C8FB80, 0xB9C9FB80, 0xB9CAFB80, 0xB9CBFB80, 0xB9CCFB80, 0xB9CDFB80, 0xB9CEFB80, 0xB9CFFB80, 0xB9D0FB80, 0xB9D1FB80, 0xB9D2FB80, 0xB9D3FB80, 0xB9D4FB80, + 0xB9D5FB80, 0xB9D6FB80, 0xB9D7FB80, 0xB9D8FB80, 0xB9D9FB80, 0xB9DAFB80, 0xB9DBFB80, 0xB9DCFB80, 0xB9DDFB80, 0xB9DEFB80, 0xB9DFFB80, 0xB9E0FB80, 0xB9E1FB80, 0xB9E2FB80, 0xB9E3FB80, + 0xB9E4FB80, 0xB9E5FB80, 0xB9E6FB80, 0xB9E7FB80, 0xB9E8FB80, 0xB9E9FB80, 0xB9EAFB80, 0xB9EBFB80, 0xB9ECFB80, 0xB9EDFB80, 0xB9EEFB80, 0xB9EFFB80, 0xB9F0FB80, 0xB9F1FB80, 0xB9F2FB80, + 0xB9F3FB80, 0xB9F4FB80, 0xB9F5FB80, 0xB9F6FB80, 0xB9F7FB80, 0xB9F8FB80, 0xB9F9FB80, 0xB9FAFB80, 0xB9FBFB80, 0xB9FCFB80, 0xB9FDFB80, 0xB9FEFB80, 0xB9FFFB80, 0xBA00FB80, 0xBA01FB80, + 0xBA02FB80, 0xBA03FB80, 0xBA04FB80, 0xBA05FB80, 0xBA06FB80, 0xBA07FB80, 0xBA08FB80, 0xBA09FB80, 0xBA0AFB80, 0xBA0BFB80, 0xBA0CFB80, 0xBA0DFB80, 0xBA0EFB80, 0xBA0FFB80, 0xBA10FB80, + 0xBA11FB80, 0xBA12FB80, 0xBA13FB80, 0xBA14FB80, 0xBA15FB80, 0xBA16FB80, 0xBA17FB80, 0xBA18FB80, 0xBA19FB80, 0xBA1AFB80, 0xBA1BFB80, 0xBA1CFB80, 0xBA1DFB80, 0xBA1EFB80, 0xBA1FFB80, + 0xBA20FB80, 0xBA21FB80, 0xBA22FB80, 0xBA23FB80, 0xBA24FB80, 0xBA25FB80, 0xBA26FB80, 0xBA27FB80, 0xBA28FB80, 0xBA29FB80, 0xBA2AFB80, 0xBA2BFB80, 0xBA2CFB80, 0xBA2DFB80, 0xBA2EFB80, + 0xBA2FFB80, 0xBA30FB80, 0xBA31FB80, 0xBA32FB80, 0xBA33FB80, 0xBA34FB80, 0xBA35FB80, 0xBA36FB80, 0xBA37FB80, 0xBA38FB80, 0xBA39FB80, 0xBA3AFB80, 0xBA3BFB80, 0xBA3CFB80, 0xBA3DFB80, + 0xBA3EFB80, 0xBA3FFB80, 0xBA40FB80, 0xBA41FB80, 0xBA42FB80, 0xBA43FB80, 0xBA44FB80, 0xBA45FB80, 0xBA46FB80, 0xBA47FB80, 0xBA48FB80, 0xBA49FB80, 0xBA4AFB80, 0xBA4BFB80, 0xBA4CFB80, + 0xBA4DFB80, 0xBA4EFB80, 0xBA4FFB80, 0xBA50FB80, 0xBA51FB80, 0xBA52FB80, 0xBA53FB80, 0xBA54FB80, 0xBA55FB80, 0xBA56FB80, 0xBA57FB80, 0xBA58FB80, 0xBA59FB80, 0xBA5AFB80, 0xBA5BFB80, + 0xBA5CFB80, 0xBA5DFB80, 0xBA5EFB80, 0xBA5FFB80, 0xBA60FB80, 0xBA61FB80, 0xBA62FB80, 0xBA63FB80, 0xBA64FB80, 0xBA65FB80, 0xBA66FB80, 0xBA67FB80, 0xBA68FB80, 0xBA69FB80, 0xBA6AFB80, + 0xBA6BFB80, 0xBA6CFB80, 0xBA6DFB80, 0xBA6EFB80, 0xBA6FFB80, 0xBA70FB80, 0xBA71FB80, 0xBA72FB80, 0xBA73FB80, 0xBA74FB80, 0xBA75FB80, 0xBA76FB80, 0xBA77FB80, 0xBA78FB80, 0xBA79FB80, + 0xBA7AFB80, 0xBA7BFB80, 0xBA7CFB80, 0xBA7DFB80, 0xBA7EFB80, 0xBA7FFB80, 0xBA80FB80, 0xBA81FB80, 0xBA82FB80, 0xBA83FB80, 0xBA84FB80, 0xBA85FB80, 0xBA86FB80, 0xBA87FB80, 0xBA88FB80, + 0xBA89FB80, 0xBA8AFB80, 0xBA8BFB80, 0xBA8CFB80, 0xBA8DFB80, 0xBA8EFB80, 0xBA8FFB80, 0xBA90FB80, 0xBA91FB80, 0xBA92FB80, 0xBA93FB80, 0xBA94FB80, 0xBA95FB80, 0xBA96FB80, 0xBA97FB80, + 0xBA98FB80, 0xBA99FB80, 0xBA9AFB80, 0xBA9BFB80, 0xBA9CFB80, 0xBA9DFB80, 0xBA9EFB80, 0xBA9FFB80, 0xBAA0FB80, 0xBAA1FB80, 0xBAA2FB80, 0xBAA3FB80, 0xBAA4FB80, 0xBAA5FB80, 0xBAA6FB80, + 0xBAA7FB80, 0xBAA8FB80, 0xBAA9FB80, 0xBAAAFB80, 0xBAABFB80, 0xBAACFB80, 0xBAADFB80, 0xBAAEFB80, 0xBAAFFB80, 0xBAB0FB80, 0xBAB1FB80, 0xBAB2FB80, 0xBAB3FB80, 0xBAB4FB80, 0xBAB5FB80, + 0xBAB6FB80, 0xBAB7FB80, 0xBAB8FB80, 0xBAB9FB80, 0xBABAFB80, 0xBABBFB80, 0xBABCFB80, 0xBABDFB80, 0xBABEFB80, 0xBABFFB80, 0xBAC0FB80, 0xBAC1FB80, 0xBAC2FB80, 0xBAC3FB80, 0xBAC4FB80, + 0xBAC5FB80, 0xBAC6FB80, 0xBAC7FB80, 0xBAC8FB80, 0xBAC9FB80, 0xBACAFB80, 0xBACBFB80, 0xBACCFB80, 0xBACDFB80, 0xBACEFB80, 0xBACFFB80, 0xBAD0FB80, 0xBAD1FB80, 0xBAD2FB80, 0xBAD3FB80, + 0xBAD4FB80, 0xBAD5FB80, 0xBAD6FB80, 0xBAD7FB80, 0xBAD8FB80, 0xBAD9FB80, 0xBADAFB80, 0xBADBFB80, 0xBADCFB80, 0xBADDFB80, 0xBADEFB80, 0xBADFFB80, 0xBAE0FB80, 0xBAE1FB80, 0xBAE2FB80, + 0xBAE3FB80, 0xBAE4FB80, 0xBAE5FB80, 0xBAE6FB80, 0xBAE7FB80, 0xBAE8FB80, 0xBAE9FB80, 0xBAEAFB80, 0xBAEBFB80, 0xBAECFB80, 0xBAEDFB80, 0xBAEEFB80, 0xBAEFFB80, 0xBAF0FB80, 0xBAF1FB80, + 0xBAF2FB80, 0xBAF3FB80, 0xBAF4FB80, 0xBAF5FB80, 0xBAF6FB80, 0xBAF7FB80, 0xBAF8FB80, 0xBAF9FB80, 0xBAFAFB80, 0xBAFBFB80, 0xBAFCFB80, 0xBAFDFB80, 0xBAFEFB80, 0xBAFFFB80, 0xBB00FB80, + 0xBB01FB80, 0xBB02FB80, 0xBB03FB80, 0xBB04FB80, 0xBB05FB80, 0xBB06FB80, 0xBB07FB80, 0xBB08FB80, 0xBB09FB80, 0xBB0AFB80, 0xBB0BFB80, 0xBB0CFB80, 0xBB0DFB80, 0xBB0EFB80, 0xBB0FFB80, + 0xBB10FB80, 0xBB11FB80, 0xBB12FB80, 0xBB13FB80, 0xBB14FB80, 0xBB15FB80, 0xBB16FB80, 0xBB17FB80, 0xBB18FB80, 0xBB19FB80, 0xBB1AFB80, 0xBB1BFB80, 0xBB1CFB80, 0xBB1DFB80, 0xBB1EFB80, + 0xBB1FFB80, 0xBB20FB80, 0xBB21FB80, 0xBB22FB80, 0xBB23FB80, 0xBB24FB80, 0xBB25FB80, 0xBB26FB80, 0xBB27FB80, 0xBB28FB80, 0xBB29FB80, 0xBB2AFB80, 0xBB2BFB80, 0xBB2CFB80, 0xBB2DFB80, + 0xBB2EFB80, 0xBB2FFB80, 0xBB30FB80, 0xBB31FB80, 0xBB32FB80, 0xBB33FB80, 0xBB34FB80, 0xBB35FB80, 0xBB36FB80, 0xBB37FB80, 0xBB38FB80, 0xBB39FB80, 0xBB3AFB80, 0xBB3BFB80, 0xBB3CFB80, + 0xBB3DFB80, 0xBB3EFB80, 0xBB3FFB80, 0xBB40FB80, 0xBB41FB80, 0xBB42FB80, 0xBB43FB80, 0xBB44FB80, 0xBB45FB80, 0xBB46FB80, 0xBB47FB80, 0xBB48FB80, 0xBB49FB80, 0xBB4AFB80, 0xBB4BFB80, + 0xBB4CFB80, 0xBB4DFB80, 0xBB4EFB80, 0xBB4FFB80, 0xBB50FB80, 0xBB51FB80, 0xBB52FB80, 0xBB53FB80, 0xBB54FB80, 0xBB55FB80, 0xBB56FB80, 0xBB57FB80, 0xBB58FB80, 0xBB59FB80, 0xBB5AFB80, + 0xBB5BFB80, 0xBB5CFB80, 0xBB5DFB80, 0xBB5EFB80, 0xBB5FFB80, 0xBB60FB80, 0xBB61FB80, 0xBB62FB80, 0xBB63FB80, 0xBB64FB80, 0xBB65FB80, 0xBB66FB80, 0xBB67FB80, 0xBB68FB80, 0xBB69FB80, + 0xBB6AFB80, 0xBB6BFB80, 0xBB6CFB80, 0xBB6DFB80, 0xBB6EFB80, 0xBB6FFB80, 0xBB70FB80, 0xBB71FB80, 0xBB72FB80, 0xBB73FB80, 0xBB74FB80, 0xBB75FB80, 0xBB76FB80, 0xBB77FB80, 0xBB78FB80, + 0xBB79FB80, 0xBB7AFB80, 0xBB7BFB80, 0xBB7CFB80, 0xBB7DFB80, 0xBB7EFB80, 0xBB7FFB80, 0xBB80FB80, 0xBB81FB80, 0xBB82FB80, 0xBB83FB80, 0xBB84FB80, 0xBB85FB80, 0xBB86FB80, 0xBB87FB80, + 0xBB88FB80, 0xBB89FB80, 0xBB8AFB80, 0xBB8BFB80, 0xBB8CFB80, 0xBB8DFB80, 0xBB8EFB80, 0xBB8FFB80, 0xBB90FB80, 0xBB91FB80, 0xBB92FB80, 0xBB93FB80, 0xBB94FB80, 0xBB95FB80, 0xBB96FB80, + 0xBB97FB80, 0xBB98FB80, 0xBB99FB80, 0xBB9AFB80, 0xBB9BFB80, 0xBB9CFB80, 0xBB9DFB80, 0xBB9EFB80, 0xBB9FFB80, 0xBBA0FB80, 0xBBA1FB80, 0xBBA2FB80, 0xBBA3FB80, 0xBBA4FB80, 0xBBA5FB80, + 0xBBA6FB80, 0xBBA7FB80, 0xBBA8FB80, 0xBBA9FB80, 0xBBAAFB80, 0xBBABFB80, 0xBBACFB80, 0xBBADFB80, 0xBBAEFB80, 0xBBAFFB80, 0xBBB0FB80, 0xBBB1FB80, 0xBBB2FB80, 0xBBB3FB80, 0xBBB4FB80, + 0xBBB5FB80, 0xBBB6FB80, 0xBBB7FB80, 0xBBB8FB80, 0xBBB9FB80, 0xBBBAFB80, 0xBBBBFB80, 0xBBBCFB80, 0xBBBDFB80, 0xBBBEFB80, 0xBBBFFB80, 0xBBC0FB80, 0xBBC1FB80, 0xBBC2FB80, 0xBBC3FB80, + 0xBBC4FB80, 0xBBC5FB80, 0xBBC6FB80, 0xBBC7FB80, 0xBBC8FB80, 0xBBC9FB80, 0xBBCAFB80, 0xBBCBFB80, 0xBBCCFB80, 0xBBCDFB80, 0xBBCEFB80, 0xBBCFFB80, 0xBBD0FB80, 0xBBD1FB80, 0xBBD2FB80, + 0xBBD3FB80, 0xBBD4FB80, 0xBBD5FB80, 0xBBD6FB80, 0xBBD7FB80, 0xBBD8FB80, 0xBBD9FB80, 0xBBDAFB80, 0xBBDBFB80, 0xBBDCFB80, 0xBBDDFB80, 0xBBDEFB80, 0xBBDFFB80, 0xBBE0FB80, 0xBBE1FB80, + 0xBBE2FB80, 0xBBE3FB80, 0xBBE4FB80, 0xBBE5FB80, 0xBBE6FB80, 0xBBE7FB80, 0xBBE8FB80, 0xBBE9FB80, 0xBBEAFB80, 0xBBEBFB80, 0xBBECFB80, 0xBBEDFB80, 0xBBEEFB80, 0xBBEFFB80, 0xBBF0FB80, + 0xBBF1FB80, 0xBBF2FB80, 0xBBF3FB80, 0xBBF4FB80, 0xBBF5FB80, 0xBBF6FB80, 0xBBF7FB80, 0xBBF8FB80, 0xBBF9FB80, 0xBBFAFB80, 0xBBFBFB80, 0xBBFCFB80, 0xBBFDFB80, 0xBBFEFB80, 0xBBFFFB80, + 0xBC00FB80, 0xBC01FB80, 0xBC02FB80, 0xBC03FB80, 0xBC04FB80, 0xBC05FB80, 0xBC06FB80, 0xBC07FB80, 0xBC08FB80, 0xBC09FB80, 0xBC0AFB80, 0xBC0BFB80, 0xBC0CFB80, 0xBC0DFB80, 0xBC0EFB80, + 0xBC0FFB80, 0xBC10FB80, 0xBC11FB80, 0xBC12FB80, 0xBC13FB80, 0xBC14FB80, 0xBC15FB80, 0xBC16FB80, 0xBC17FB80, 0xBC18FB80, 0xBC19FB80, 0xBC1AFB80, 0xBC1BFB80, 0xBC1CFB80, 0xBC1DFB80, + 0xBC1EFB80, 0xBC1FFB80, 0xBC20FB80, 0xBC21FB80, 0xBC22FB80, 0xBC23FB80, 0xBC24FB80, 0xBC25FB80, 0xBC26FB80, 0xBC27FB80, 0xBC28FB80, 0xBC29FB80, 0xBC2AFB80, 0xBC2BFB80, 0xBC2CFB80, + 0xBC2DFB80, 0xBC2EFB80, 0xBC2FFB80, 0xBC30FB80, 0xBC31FB80, 0xBC32FB80, 0xBC33FB80, 0xBC34FB80, 0xBC35FB80, 0xBC36FB80, 0xBC37FB80, 0xBC38FB80, 0xBC39FB80, 0xBC3AFB80, 0xBC3BFB80, + 0xBC3CFB80, 0xBC3DFB80, 0xBC3EFB80, 0xBC3FFB80, 0xBC40FB80, 0xBC41FB80, 0xBC42FB80, 0xBC43FB80, 0xBC44FB80, 0xBC45FB80, 0xBC46FB80, 0xBC47FB80, 0xBC48FB80, 0xBC49FB80, 0xBC4AFB80, + 0xBC4BFB80, 0xBC4CFB80, 0xBC4DFB80, 0xBC4EFB80, 0xBC4FFB80, 0xBC50FB80, 0xBC51FB80, 0xBC52FB80, 0xBC53FB80, 0xBC54FB80, 0xBC55FB80, 0xBC56FB80, 0xBC57FB80, 0xBC58FB80, 0xBC59FB80, + 0xBC5AFB80, 0xBC5BFB80, 0xBC5CFB80, 0xBC5DFB80, 0xBC5EFB80, 0xBC5FFB80, 0xBC60FB80, 0xBC61FB80, 0xBC62FB80, 0xBC63FB80, 0xBC64FB80, 0xBC65FB80, 0xBC66FB80, 0xBC67FB80, 0xBC68FB80, + 0xBC69FB80, 0xBC6AFB80, 0xBC6BFB80, 0xBC6CFB80, 0xBC6DFB80, 0xBC6EFB80, 0xBC6FFB80, 0xBC70FB80, 0xBC71FB80, 0xBC72FB80, 0xBC73FB80, 0xBC74FB80, 0xBC75FB80, 0xBC76FB80, 0xBC77FB80, + 0xBC78FB80, 0xBC79FB80, 0xBC7AFB80, 0xBC7BFB80, 0xBC7CFB80, 0xBC7DFB80, 0xBC7EFB80, 0xBC7FFB80, 0xBC80FB80, 0xBC81FB80, 0xBC82FB80, 0xBC83FB80, 0xBC84FB80, 0xBC85FB80, 0xBC86FB80, + 0xBC87FB80, 0xBC88FB80, 0xBC89FB80, 0xBC8AFB80, 0xBC8BFB80, 0xBC8CFB80, 0xBC8DFB80, 0xBC8EFB80, 0xBC8FFB80, 0xBC90FB80, 0xBC91FB80, 0xBC92FB80, 0xBC93FB80, 0xBC94FB80, 0xBC95FB80, + 0xBC96FB80, 0xBC97FB80, 0xBC98FB80, 0xBC99FB80, 0xBC9AFB80, 0xBC9BFB80, 0xBC9CFB80, 0xBC9DFB80, 0xBC9EFB80, 0xBC9FFB80, 0xBCA0FB80, 0xBCA1FB80, 0xBCA2FB80, 0xBCA3FB80, 0xBCA4FB80, + 0xBCA5FB80, 0xBCA6FB80, 0xBCA7FB80, 0xBCA8FB80, 0xBCA9FB80, 0xBCAAFB80, 0xBCABFB80, 0xBCACFB80, 0xBCADFB80, 0xBCAEFB80, 0xBCAFFB80, 0xBCB0FB80, 0xBCB1FB80, 0xBCB2FB80, 0xBCB3FB80, + 0xBCB4FB80, 0xBCB5FB80, 0xBCB6FB80, 0xBCB7FB80, 0xBCB8FB80, 0xBCB9FB80, 0xBCBAFB80, 0xBCBBFB80, 0xBCBCFB80, 0xBCBDFB80, 0xBCBEFB80, 0xBCBFFB80, 0xBCC0FB80, 0xBCC1FB80, 0xBCC2FB80, + 0xBCC3FB80, 0xBCC4FB80, 0xBCC5FB80, 0xBCC6FB80, 0xBCC7FB80, 0xBCC8FB80, 0xBCC9FB80, 0xBCCAFB80, 0xBCCBFB80, 0xBCCCFB80, 0xBCCDFB80, 0xBCCEFB80, 0xBCCFFB80, 0xBCD0FB80, 0xBCD1FB80, + 0xBCD2FB80, 0xBCD3FB80, 0xBCD4FB80, 0xBCD5FB80, 0xBCD6FB80, 0xBCD7FB80, 0xBCD8FB80, 0xBCD9FB80, 0xBCDAFB80, 0xBCDBFB80, 0xBCDCFB80, 0xBCDDFB80, 0xBCDEFB80, 0xBCDFFB80, 0xBCE0FB80, + 0xBCE1FB80, 0xBCE2FB80, 0xBCE3FB80, 0xBCE4FB80, 0xBCE5FB80, 0xBCE6FB80, 0xBCE7FB80, 0xBCE8FB80, 0xBCE9FB80, 0xBCEAFB80, 0xBCEBFB80, 0xBCECFB80, 0xBCEDFB80, 0xBCEEFB80, 0xBCEFFB80, + 0xBCF0FB80, 0xBCF1FB80, 0xBCF2FB80, 0xBCF3FB80, 0xBCF4FB80, 0xBCF5FB80, 0xBCF6FB80, 0xBCF7FB80, 0xBCF8FB80, 0xBCF9FB80, 0xBCFAFB80, 0xBCFBFB80, 0xBCFCFB80, 0xBCFDFB80, 0xBCFEFB80, + 0xBCFFFB80, 0xBD00FB80, 0xBD01FB80, 0xBD02FB80, 0xBD03FB80, 0xBD04FB80, 0xBD05FB80, 0xBD06FB80, 0xBD07FB80, 0xBD08FB80, 0xBD09FB80, 0xBD0AFB80, 0xBD0BFB80, 0xBD0CFB80, 0xBD0DFB80, + 0xBD0EFB80, 0xBD0FFB80, 0xBD10FB80, 0xBD11FB80, 0xBD12FB80, 0xBD13FB80, 0xBD14FB80, 0xBD15FB80, 0xBD16FB80, 0xBD17FB80, 0xBD18FB80, 0xBD19FB80, 0xBD1AFB80, 0xBD1BFB80, 0xBD1CFB80, + 0xBD1DFB80, 0xBD1EFB80, 0xBD1FFB80, 0xBD20FB80, 0xBD21FB80, 0xBD22FB80, 0xBD23FB80, 0xBD24FB80, 0xBD25FB80, 0xBD26FB80, 0xBD27FB80, 0xBD28FB80, 0xBD29FB80, 0xBD2AFB80, 0xBD2BFB80, + 0xBD2CFB80, 0xBD2DFB80, 0xBD2EFB80, 0xBD2FFB80, 0xBD30FB80, 0xBD31FB80, 0xBD32FB80, 0xBD33FB80, 0xBD34FB80, 0xBD35FB80, 0xBD36FB80, 0xBD37FB80, 0xBD38FB80, 0xBD39FB80, 0xBD3AFB80, + 0xBD3BFB80, 0xBD3CFB80, 0xBD3DFB80, 0xBD3EFB80, 0xBD3FFB80, 0xBD40FB80, 0xBD41FB80, 0xBD42FB80, 0xBD43FB80, 0xBD44FB80, 0xBD45FB80, 0xBD46FB80, 0xBD47FB80, 0xBD48FB80, 0xBD49FB80, + 0xBD4AFB80, 0xBD4BFB80, 0xBD4CFB80, 0xBD4DFB80, 0xBD4EFB80, 0xBD4FFB80, 0xBD50FB80, 0xBD51FB80, 0xBD52FB80, 0xBD53FB80, 0xBD54FB80, 0xBD55FB80, 0xBD56FB80, 0xBD57FB80, 0xBD58FB80, + 0xBD59FB80, 0xBD5AFB80, 0xBD5BFB80, 0xBD5CFB80, 0xBD5DFB80, 0xBD5EFB80, 0xBD5FFB80, 0xBD60FB80, 0xBD61FB80, 0xBD62FB80, 0xBD63FB80, 0xBD64FB80, 0xBD65FB80, 0xBD66FB80, 0xBD67FB80, + 0xBD68FB80, 0xBD69FB80, 0xBD6AFB80, 0xBD6BFB80, 0xBD6CFB80, 0xBD6DFB80, 0xBD6EFB80, 0xBD6FFB80, 0xBD70FB80, 0xBD71FB80, 0xBD72FB80, 0xBD73FB80, 0xBD74FB80, 0xBD75FB80, 0xBD76FB80, + 0xBD77FB80, 0xBD78FB80, 0xBD79FB80, 0xBD7AFB80, 0xBD7BFB80, 0xBD7CFB80, 0xBD7DFB80, 0xBD7EFB80, 0xBD7FFB80, 0xBD80FB80, 0xBD81FB80, 0xBD82FB80, 0xBD83FB80, 0xBD84FB80, 0xBD85FB80, + 0xBD86FB80, 0xBD87FB80, 0xBD88FB80, 0xBD89FB80, 0xBD8AFB80, 0xBD8BFB80, 0xBD8CFB80, 0xBD8DFB80, 0xBD8EFB80, 0xBD8FFB80, 0xBD90FB80, 0xBD91FB80, 0xBD92FB80, 0xBD93FB80, 0xBD94FB80, + 0xBD95FB80, 0xBD96FB80, 0xBD97FB80, 0xBD98FB80, 0xBD99FB80, 0xBD9AFB80, 0xBD9BFB80, 0xBD9CFB80, 0xBD9DFB80, 0xBD9EFB80, 0xBD9FFB80, 0xBDA0FB80, 0xBDA1FB80, 0xBDA2FB80, 0xBDA3FB80, + 0xBDA4FB80, 0xBDA5FB80, 0xBDA6FB80, 0xBDA7FB80, 0xBDA8FB80, 0xBDA9FB80, 0xBDAAFB80, 0xBDABFB80, 0xBDACFB80, 0xBDADFB80, 0xBDAEFB80, 0xBDAFFB80, 0xBDB0FB80, 0xBDB1FB80, 0xBDB2FB80, + 0xBDB3FB80, 0xBDB4FB80, 0xBDB5FB80, 0xBDB6FB80, 0xBDB7FB80, 0xBDB8FB80, 0xBDB9FB80, 0xBDBAFB80, 0xBDBBFB80, 0xBDBCFB80, 0xBDBDFB80, 0xBDBEFB80, 0xBDBFFB80, 0xBDC0FB80, 0xBDC1FB80, + 0xBDC2FB80, 0xBDC3FB80, 0xBDC4FB80, 0xBDC5FB80, 0xBDC6FB80, 0xBDC7FB80, 0xBDC8FB80, 0xBDC9FB80, 0xBDCAFB80, 0xBDCBFB80, 0xBDCCFB80, 0xBDCDFB80, 0xBDCEFB80, 0xBDCFFB80, 0xBDD0FB80, + 0xBDD1FB80, 0xBDD2FB80, 0xBDD3FB80, 0xBDD4FB80, 0xBDD5FB80, 0xBDD6FB80, 0xBDD7FB80, 0xBDD8FB80, 0xBDD9FB80, 0xBDDAFB80, 0xBDDBFB80, 0xBDDCFB80, 0xBDDDFB80, 0xBDDEFB80, 0xBDDFFB80, + 0xBDE0FB80, 0xBDE1FB80, 0xBDE2FB80, 0xBDE3FB80, 0xBDE4FB80, 0xBDE5FB80, 0xBDE6FB80, 0xBDE7FB80, 0xBDE8FB80, 0xBDE9FB80, 0xBDEAFB80, 0xBDEBFB80, 0xBDECFB80, 0xBDEDFB80, 0xBDEEFB80, + 0xBDEFFB80, 0xBDF0FB80, 0xBDF1FB80, 0xBDF2FB80, 0xBDF3FB80, 0xBDF4FB80, 0xBDF5FB80, 0xBDF6FB80, 0xBDF7FB80, 0xBDF8FB80, 0xBDF9FB80, 0xBDFAFB80, 0xBDFBFB80, 0xBDFCFB80, 0xBDFDFB80, + 0xBDFEFB80, 0xBDFFFB80, 0xBE00FB80, 0xBE01FB80, 0xBE02FB80, 0xBE03FB80, 0xBE04FB80, 0xBE05FB80, 0xBE06FB80, 0xBE07FB80, 0xBE08FB80, 0xBE09FB80, 0xBE0AFB80, 0xBE0BFB80, 0xBE0CFB80, + 0xBE0DFB80, 0xBE0EFB80, 0xBE0FFB80, 0xBE10FB80, 0xBE11FB80, 0xBE12FB80, 0xBE13FB80, 0xBE14FB80, 0xBE15FB80, 0xBE16FB80, 0xBE17FB80, 0xBE18FB80, 0xBE19FB80, 0xBE1AFB80, 0xBE1BFB80, + 0xBE1CFB80, 0xBE1DFB80, 0xBE1EFB80, 0xBE1FFB80, 0xBE20FB80, 0xBE21FB80, 0xBE22FB80, 0xBE23FB80, 0xBE24FB80, 0xBE25FB80, 0xBE26FB80, 0xBE27FB80, 0xBE28FB80, 0xBE29FB80, 0xBE2AFB80, + 0xBE2BFB80, 0xBE2CFB80, 0xBE2DFB80, 0xBE2EFB80, 0xBE2FFB80, 0xBE30FB80, 0xBE31FB80, 0xBE32FB80, 0xBE33FB80, 0xBE34FB80, 0xBE35FB80, 0xBE36FB80, 0xBE37FB80, 0xBE38FB80, 0xBE39FB80, + 0xBE3AFB80, 0xBE3BFB80, 0xBE3CFB80, 0xBE3DFB80, 0xBE3EFB80, 0xBE3FFB80, 0xBE40FB80, 0xBE41FB80, 0xBE42FB80, 0xBE43FB80, 0xBE44FB80, 0xBE45FB80, 0xBE46FB80, 0xBE47FB80, 0xBE48FB80, + 0xBE49FB80, 0xBE4AFB80, 0xBE4BFB80, 0xBE4CFB80, 0xBE4DFB80, 0xBE4EFB80, 0xBE4FFB80, 0xBE50FB80, 0xBE51FB80, 0xBE52FB80, 0xBE53FB80, 0xBE54FB80, 0xBE55FB80, 0xBE56FB80, 0xBE57FB80, + 0xBE58FB80, 0xBE59FB80, 0xBE5AFB80, 0xBE5BFB80, 0xBE5CFB80, 0xBE5DFB80, 0xBE5EFB80, 0xBE5FFB80, 0xBE60FB80, 0xBE61FB80, 0xBE62FB80, 0xBE63FB80, 0xBE64FB80, 0xBE65FB80, 0xBE66FB80, + 0xBE67FB80, 0xBE68FB80, 0xBE69FB80, 0xBE6AFB80, 0xBE6BFB80, 0xBE6CFB80, 0xBE6DFB80, 0xBE6EFB80, 0xBE6FFB80, 0xBE70FB80, 0xBE71FB80, 0xBE72FB80, 0xBE73FB80, 0xBE74FB80, 0xBE75FB80, + 0xBE76FB80, 0xBE77FB80, 0xBE78FB80, 0xBE79FB80, 0xBE7AFB80, 0xBE7BFB80, 0xBE7CFB80, 0xBE7DFB80, 0xBE7EFB80, 0xBE7FFB80, 0xBE80FB80, 0xBE81FB80, 0xBE82FB80, 0xBE83FB80, 0xBE84FB80, + 0xBE85FB80, 0xBE86FB80, 0xBE87FB80, 0xBE88FB80, 0xBE89FB80, 0xBE8AFB80, 0xBE8BFB80, 0xBE8CFB80, 0xBE8DFB80, 0xBE8EFB80, 0xBE8FFB80, 0xBE90FB80, 0xBE91FB80, 0xBE92FB80, 0xBE93FB80, + 0xBE94FB80, 0xBE95FB80, 0xBE96FB80, 0xBE97FB80, 0xBE98FB80, 0xBE99FB80, 0xBE9AFB80, 0xBE9BFB80, 0xBE9CFB80, 0xBE9DFB80, 0xBE9EFB80, 0xBE9FFB80, 0xBEA0FB80, 0xBEA1FB80, 0xBEA2FB80, + 0xBEA3FB80, 0xBEA4FB80, 0xBEA5FB80, 0xBEA6FB80, 0xBEA7FB80, 0xBEA8FB80, 0xBEA9FB80, 0xBEAAFB80, 0xBEABFB80, 0xBEACFB80, 0xBEADFB80, 0xBEAEFB80, 0xBEAFFB80, 0xBEB0FB80, 0xBEB1FB80, + 0xBEB2FB80, 0xBEB3FB80, 0xBEB4FB80, 0xBEB5FB80, 0xBEB6FB80, 0xBEB7FB80, 0xBEB8FB80, 0xBEB9FB80, 0xBEBAFB80, 0xBEBBFB80, 0xBEBCFB80, 0xBEBDFB80, 0xBEBEFB80, 0xBEBFFB80, 0xBEC0FB80, + 0xBEC1FB80, 0xBEC2FB80, 0xBEC3FB80, 0xBEC4FB80, 0xBEC5FB80, 0xBEC6FB80, 0xBEC7FB80, 0xBEC8FB80, 0xBEC9FB80, 0xBECAFB80, 0xBECBFB80, 0xBECCFB80, 0xBECDFB80, 0xBECEFB80, 0xBECFFB80, + 0xBED0FB80, 0xBED1FB80, 0xBED2FB80, 0xBED3FB80, 0xBED4FB80, 0xBED5FB80, 0xBED6FB80, 0xBED7FB80, 0xBED8FB80, 0xBED9FB80, 0xBEDAFB80, 0xBEDBFB80, 0xBEDCFB80, 0xBEDDFB80, 0xBEDEFB80, + 0xBEDFFB80, 0xBEE0FB80, 0xBEE1FB80, 0xBEE2FB80, 0xBEE3FB80, 0xBEE4FB80, 0xBEE5FB80, 0xBEE6FB80, 0xBEE7FB80, 0xBEE8FB80, 0xBEE9FB80, 0xBEEAFB80, 0xBEEBFB80, 0xBEECFB80, 0xBEEDFB80, + 0xBEEEFB80, 0xBEEFFB80, 0xBEF0FB80, 0xBEF1FB80, 0xBEF2FB80, 0xBEF3FB80, 0xBEF4FB80, 0xBEF5FB80, 0xBEF6FB80, 0xBEF7FB80, 0xBEF8FB80, 0xBEF9FB80, 0xBEFAFB80, 0xBEFBFB80, 0xBEFCFB80, + 0xBEFDFB80, 0xBEFEFB80, 0xBEFFFB80, 0xBF00FB80, 0xBF01FB80, 0xBF02FB80, 0xBF03FB80, 0xBF04FB80, 0xBF05FB80, 0xBF06FB80, 0xBF07FB80, 0xBF08FB80, 0xBF09FB80, 0xBF0AFB80, 0xBF0BFB80, + 0xBF0CFB80, 0xBF0DFB80, 0xBF0EFB80, 0xBF0FFB80, 0xBF10FB80, 0xBF11FB80, 0xBF12FB80, 0xBF13FB80, 0xBF14FB80, 0xBF15FB80, 0xBF16FB80, 0xBF17FB80, 0xBF18FB80, 0xBF19FB80, 0xBF1AFB80, + 0xBF1BFB80, 0xBF1CFB80, 0xBF1DFB80, 0xBF1EFB80, 0xBF1FFB80, 0xBF20FB80, 0xBF21FB80, 0xBF22FB80, 0xBF23FB80, 0xBF24FB80, 0xBF25FB80, 0xBF26FB80, 0xBF27FB80, 0xBF28FB80, 0xBF29FB80, + 0xBF2AFB80, 0xBF2BFB80, 0xBF2CFB80, 0xBF2DFB80, 0xBF2EFB80, 0xBF2FFB80, 0xBF30FB80, 0xBF31FB80, 0xBF32FB80, 0xBF33FB80, 0xBF34FB80, 0xBF35FB80, 0xBF36FB80, 0xBF37FB80, 0xBF38FB80, + 0xBF39FB80, 0xBF3AFB80, 0xBF3BFB80, 0xBF3CFB80, 0xBF3DFB80, 0xBF3EFB80, 0xBF3FFB80, 0xBF40FB80, 0xBF41FB80, 0xBF42FB80, 0xBF43FB80, 0xBF44FB80, 0xBF45FB80, 0xBF46FB80, 0xBF47FB80, + 0xBF48FB80, 0xBF49FB80, 0xBF4AFB80, 0xBF4BFB80, 0xBF4CFB80, 0xBF4DFB80, 0xBF4EFB80, 0xBF4FFB80, 0xBF50FB80, 0xBF51FB80, 0xBF52FB80, 0xBF53FB80, 0xBF54FB80, 0xBF55FB80, 0xBF56FB80, + 0xBF57FB80, 0xBF58FB80, 0xBF59FB80, 0xBF5AFB80, 0xBF5BFB80, 0xBF5CFB80, 0xBF5DFB80, 0xBF5EFB80, 0xBF5FFB80, 0xBF60FB80, 0xBF61FB80, 0xBF62FB80, 0xBF63FB80, 0xBF64FB80, 0xBF65FB80, + 0xBF66FB80, 0xBF67FB80, 0xBF68FB80, 0xBF69FB80, 0xBF6AFB80, 0xBF6BFB80, 0xBF6CFB80, 0xBF6DFB80, 0xBF6EFB80, 0xBF6FFB80, 0xBF70FB80, 0xBF71FB80, 0xBF72FB80, 0xBF73FB80, 0xBF74FB80, + 0xBF75FB80, 0xBF76FB80, 0xBF77FB80, 0xBF78FB80, 0xBF79FB80, 0xBF7AFB80, 0xBF7BFB80, 0xBF7CFB80, 0xBF7DFB80, 0xBF7EFB80, 0xBF7FFB80, 0xBF80FB80, 0xBF81FB80, 0xBF82FB80, 0xBF83FB80, + 0xBF84FB80, 0xBF85FB80, 0xBF86FB80, 0xBF87FB80, 0xBF88FB80, 0xBF89FB80, 0xBF8AFB80, 0xBF8BFB80, 0xBF8CFB80, 0xBF8DFB80, 0xBF8EFB80, 0xBF8FFB80, 0xBF90FB80, 0xBF91FB80, 0xBF92FB80, + 0xBF93FB80, 0xBF94FB80, 0xBF95FB80, 0xBF96FB80, 0xBF97FB80, 0xBF98FB80, 0xBF99FB80, 0xBF9AFB80, 0xBF9BFB80, 0xBF9CFB80, 0xBF9DFB80, 0xBF9EFB80, 0xBF9FFB80, 0xBFA0FB80, 0xBFA1FB80, + 0xBFA2FB80, 0xBFA3FB80, 0xBFA4FB80, 0xBFA5FB80, 0xBFA6FB80, 0xBFA7FB80, 0xBFA8FB80, 0xBFA9FB80, 0xBFAAFB80, 0xBFABFB80, 0xBFACFB80, 0xBFADFB80, 0xBFAEFB80, 0xBFAFFB80, 0xBFB0FB80, + 0xBFB1FB80, 0xBFB2FB80, 0xBFB3FB80, 0xBFB4FB80, 0xBFB5FB80, 0xBFB6FB80, 0xBFB7FB80, 0xBFB8FB80, 0xBFB9FB80, 0xBFBAFB80, 0xBFBBFB80, 0xBFBCFB80, 0xBFBDFB80, 0xBFBEFB80, 0xBFBFFB80, + 0xBFC0FB80, 0xBFC1FB80, 0xBFC2FB80, 0xBFC3FB80, 0xBFC4FB80, 0xBFC5FB80, 0xBFC6FB80, 0xBFC7FB80, 0xBFC8FB80, 0xBFC9FB80, 0xBFCAFB80, 0xBFCBFB80, 0xBFCCFB80, 0xBFCDFB80, 0xBFCEFB80, + 0xBFCFFB80, 0xBFD0FB80, 0xBFD1FB80, 0xBFD2FB80, 0xBFD3FB80, 0xBFD4FB80, 0xBFD5FB80, 0xBFD6FB80, 0xBFD7FB80, 0xBFD8FB80, 0xBFD9FB80, 0xBFDAFB80, 0xBFDBFB80, 0xBFDCFB80, 0xBFDDFB80, + 0xBFDEFB80, 0xBFDFFB80, 0xBFE0FB80, 0xBFE1FB80, 0xBFE2FB80, 0xBFE3FB80, 0xBFE4FB80, 0xBFE5FB80, 0xBFE6FB80, 0xBFE7FB80, 0xBFE8FB80, 0xBFE9FB80, 0xBFEAFB80, 0xBFEBFB80, 0xBFECFB80, + 0xBFEDFB80, 0xBFEEFB80, 0xBFEFFB80, 0xBFF0FB80, 0xBFF1FB80, 0xBFF2FB80, 0xBFF3FB80, 0xBFF4FB80, 0xBFF5FB80, 0xBFF6FB80, 0xBFF7FB80, 0xBFF8FB80, 0xBFF9FB80, 0xBFFAFB80, 0xBFFBFB80, + 0xBFFCFB80, 0xBFFDFB80, 0xBFFEFB80, 0xBFFFFB80, 0xC000FB80, 0xC001FB80, 0xC002FB80, 0xC003FB80, 0xC004FB80, 0xC005FB80, 0xC006FB80, 0xC007FB80, 0xC008FB80, 0xC009FB80, 0xC00AFB80, + 0xC00BFB80, 0xC00CFB80, 0xC00DFB80, 0xC00EFB80, 0xC00FFB80, 0xC010FB80, 0xC011FB80, 0xC012FB80, 0xC013FB80, 0xC014FB80, 0xC015FB80, 0xC016FB80, 0xC017FB80, 0xC018FB80, 0xC019FB80, + 0xC01AFB80, 0xC01BFB80, 0xC01CFB80, 0xC01DFB80, 0xC01EFB80, 0xC01FFB80, 0xC020FB80, 0xC021FB80, 0xC022FB80, 0xC023FB80, 0xC024FB80, 0xC025FB80, 0xC026FB80, 0xC027FB80, 0xC028FB80, + 0xC029FB80, 0xC02AFB80, 0xC02BFB80, 0xC02CFB80, 0xC02DFB80, 0xC02EFB80, 0xC02FFB80, 0xC030FB80, 0xC031FB80, 0xC032FB80, 0xC033FB80, 0xC034FB80, 0xC035FB80, 0xC036FB80, 0xC037FB80, + 0xC038FB80, 0xC039FB80, 0xC03AFB80, 0xC03BFB80, 0xC03CFB80, 0xC03DFB80, 0xC03EFB80, 0xC03FFB80, 0xC040FB80, 0xC041FB80, 0xC042FB80, 0xC043FB80, 0xC044FB80, 0xC045FB80, 0xC046FB80, + 0xC047FB80, 0xC048FB80, 0xC049FB80, 0xC04AFB80, 0xC04BFB80, 0xC04CFB80, 0xC04DFB80, 0xC04EFB80, 0xC04FFB80, 0xC050FB80, 0xC051FB80, 0xC052FB80, 0xC053FB80, 0xC054FB80, 0xC055FB80, + 0xC056FB80, 0xC057FB80, 0xC058FB80, 0xC059FB80, 0xC05AFB80, 0xC05BFB80, 0xC05CFB80, 0xC05DFB80, 0xC05EFB80, 0xC05FFB80, 0xC060FB80, 0xC061FB80, 0xC062FB80, 0xC063FB80, 0xC064FB80, + 0xC065FB80, 0xC066FB80, 0xC067FB80, 0xC068FB80, 0xC069FB80, 0xC06AFB80, 0xC06BFB80, 0xC06CFB80, 0xC06DFB80, 0xC06EFB80, 0xC06FFB80, 0xC070FB80, 0xC071FB80, 0xC072FB80, 0xC073FB80, + 0xC074FB80, 0xC075FB80, 0xC076FB80, 0xC077FB80, 0xC078FB80, 0xC079FB80, 0xC07AFB80, 0xC07BFB80, 0xC07CFB80, 0xC07DFB80, 0xC07EFB80, 0xC07FFB80, 0xC080FB80, 0xC081FB80, 0xC082FB80, + 0xC083FB80, 0xC084FB80, 0xC085FB80, 0xC086FB80, 0xC087FB80, 0xC088FB80, 0xC089FB80, 0xC08AFB80, 0xC08BFB80, 0xC08CFB80, 0xC08DFB80, 0xC08EFB80, 0xC08FFB80, 0xC090FB80, 0xC091FB80, + 0xC092FB80, 0xC093FB80, 0xC094FB80, 0xC095FB80, 0xC096FB80, 0xC097FB80, 0xC098FB80, 0xC099FB80, 0xC09AFB80, 0xC09BFB80, 0xC09CFB80, 0xC09DFB80, 0xC09EFB80, 0xC09FFB80, 0xC0A0FB80, + 0xC0A1FB80, 0xC0A2FB80, 0xC0A3FB80, 0xC0A4FB80, 0xC0A5FB80, 0xC0A6FB80, 0xC0A7FB80, 0xC0A8FB80, 0xC0A9FB80, 0xC0AAFB80, 0xC0ABFB80, 0xC0ACFB80, 0xC0ADFB80, 0xC0AEFB80, 0xC0AFFB80, + 0xC0B0FB80, 0xC0B1FB80, 0xC0B2FB80, 0xC0B3FB80, 0xC0B4FB80, 0xC0B5FB80, 0xC0B6FB80, 0xC0B7FB80, 0xC0B8FB80, 0xC0B9FB80, 0xC0BAFB80, 0xC0BBFB80, 0xC0BCFB80, 0xC0BDFB80, 0xC0BEFB80, + 0xC0BFFB80, 0xC0C0FB80, 0xC0C1FB80, 0xC0C2FB80, 0xC0C3FB80, 0xC0C4FB80, 0xC0C5FB80, 0xC0C6FB80, 0xC0C7FB80, 0xC0C8FB80, 0xC0C9FB80, 0xC0CAFB80, 0xC0CBFB80, 0xC0CCFB80, 0xC0CDFB80, + 0xC0CEFB80, 0xC0CFFB80, 0xC0D0FB80, 0xC0D1FB80, 0xC0D2FB80, 0xC0D3FB80, 0xC0D4FB80, 0xC0D5FB80, 0xC0D6FB80, 0xC0D7FB80, 0xC0D8FB80, 0xC0D9FB80, 0xC0DAFB80, 0xC0DBFB80, 0xC0DCFB80, + 0xC0DDFB80, 0xC0DEFB80, 0xC0DFFB80, 0xC0E0FB80, 0xC0E1FB80, 0xC0E2FB80, 0xC0E3FB80, 0xC0E4FB80, 0xC0E5FB80, 0xC0E6FB80, 0xC0E7FB80, 0xC0E8FB80, 0xC0E9FB80, 0xC0EAFB80, 0xC0EBFB80, + 0xC0ECFB80, 0xC0EDFB80, 0xC0EEFB80, 0xC0EFFB80, 0xC0F0FB80, 0xC0F1FB80, 0xC0F2FB80, 0xC0F3FB80, 0xC0F4FB80, 0xC0F5FB80, 0xC0F6FB80, 0xC0F7FB80, 0xC0F8FB80, 0xC0F9FB80, 0xC0FAFB80, + 0xC0FBFB80, 0xC0FCFB80, 0xC0FDFB80, 0xC0FEFB80, 0xC0FFFB80, 0xC100FB80, 0xC101FB80, 0xC102FB80, 0xC103FB80, 0xC104FB80, 0xC105FB80, 0xC106FB80, 0xC107FB80, 0xC108FB80, 0xC109FB80, + 0xC10AFB80, 0xC10BFB80, 0xC10CFB80, 0xC10DFB80, 0xC10EFB80, 0xC10FFB80, 0xC110FB80, 0xC111FB80, 0xC112FB80, 0xC113FB80, 0xC114FB80, 0xC115FB80, 0xC116FB80, 0xC117FB80, 0xC118FB80, + 0xC119FB80, 0xC11AFB80, 0xC11BFB80, 0xC11CFB80, 0xC11DFB80, 0xC11EFB80, 0xC11FFB80, 0xC120FB80, 0xC121FB80, 0xC122FB80, 0xC123FB80, 0xC124FB80, 0xC125FB80, 0xC126FB80, 0xC127FB80, + 0xC128FB80, 0xC129FB80, 0xC12AFB80, 0xC12BFB80, 0xC12CFB80, 0xC12DFB80, 0xC12EFB80, 0xC12FFB80, 0xC130FB80, 0xC131FB80, 0xC132FB80, 0xC133FB80, 0xC134FB80, 0xC135FB80, 0xC136FB80, + 0xC137FB80, 0xC138FB80, 0xC139FB80, 0xC13AFB80, 0xC13BFB80, 0xC13CFB80, 0xC13DFB80, 0xC13EFB80, 0xC13FFB80, 0xC140FB80, 0xC141FB80, 0xC142FB80, 0xC143FB80, 0xC144FB80, 0xC145FB80, + 0xC146FB80, 0xC147FB80, 0xC148FB80, 0xC149FB80, 0xC14AFB80, 0xC14BFB80, 0xC14CFB80, 0xC14DFB80, 0xC14EFB80, 0xC14FFB80, 0xC150FB80, 0xC151FB80, 0xC152FB80, 0xC153FB80, 0xC154FB80, + 0xC155FB80, 0xC156FB80, 0xC157FB80, 0xC158FB80, 0xC159FB80, 0xC15AFB80, 0xC15BFB80, 0xC15CFB80, 0xC15DFB80, 0xC15EFB80, 0xC15FFB80, 0xC160FB80, 0xC161FB80, 0xC162FB80, 0xC163FB80, + 0xC164FB80, 0xC165FB80, 0xC166FB80, 0xC167FB80, 0xC168FB80, 0xC169FB80, 0xC16AFB80, 0xC16BFB80, 0xC16CFB80, 0xC16DFB80, 0xC16EFB80, 0xC16FFB80, 0xC170FB80, 0xC171FB80, 0xC172FB80, + 0xC173FB80, 0xC174FB80, 0xC175FB80, 0xC176FB80, 0xC177FB80, 0xC178FB80, 0xC179FB80, 0xC17AFB80, 0xC17BFB80, 0xC17CFB80, 0xC17DFB80, 0xC17EFB80, 0xC17FFB80, 0xC180FB80, 0xC181FB80, + 0xC182FB80, 0xC183FB80, 0xC184FB80, 0xC185FB80, 0xC186FB80, 0xC187FB80, 0xC188FB80, 0xC189FB80, 0xC18AFB80, 0xC18BFB80, 0xC18CFB80, 0xC18DFB80, 0xC18EFB80, 0xC18FFB80, 0xC190FB80, + 0xC191FB80, 0xC192FB80, 0xC193FB80, 0xC194FB80, 0xC195FB80, 0xC196FB80, 0xC197FB80, 0xC198FB80, 0xC199FB80, 0xC19AFB80, 0xC19BFB80, 0xC19CFB80, 0xC19DFB80, 0xC19EFB80, 0xC19FFB80, + 0xC1A0FB80, 0xC1A1FB80, 0xC1A2FB80, 0xC1A3FB80, 0xC1A4FB80, 0xC1A5FB80, 0xC1A6FB80, 0xC1A7FB80, 0xC1A8FB80, 0xC1A9FB80, 0xC1AAFB80, 0xC1ABFB80, 0xC1ACFB80, 0xC1ADFB80, 0xC1AEFB80, + 0xC1AFFB80, 0xC1B0FB80, 0xC1B1FB80, 0xC1B2FB80, 0xC1B3FB80, 0xC1B4FB80, 0xC1B5FB80, 0xC1B6FB80, 0xC1B7FB80, 0xC1B8FB80, 0xC1B9FB80, 0xC1BAFB80, 0xC1BBFB80, 0xC1BCFB80, 0xC1BDFB80, + 0xC1BEFB80, 0xC1BFFB80, 0xC1C0FB80, 0xC1C1FB80, 0xC1C2FB80, 0xC1C3FB80, 0xC1C4FB80, 0xC1C5FB80, 0xC1C6FB80, 0xC1C7FB80, 0xC1C8FB80, 0xC1C9FB80, 0xC1CAFB80, 0xC1CBFB80, 0xC1CCFB80, + 0xC1CDFB80, 0xC1CEFB80, 0xC1CFFB80, 0xC1D0FB80, 0xC1D1FB80, 0xC1D2FB80, 0xC1D3FB80, 0xC1D4FB80, 0xC1D5FB80, 0xC1D6FB80, 0xC1D7FB80, 0xC1D8FB80, 0xC1D9FB80, 0xC1DAFB80, 0xC1DBFB80, + 0xC1DCFB80, 0xC1DDFB80, 0xC1DEFB80, 0xC1DFFB80, 0xC1E0FB80, 0xC1E1FB80, 0xC1E2FB80, 0xC1E3FB80, 0xC1E4FB80, 0xC1E5FB80, 0xC1E6FB80, 0xC1E7FB80, 0xC1E8FB80, 0xC1E9FB80, 0xC1EAFB80, + 0xC1EBFB80, 0xC1ECFB80, 0xC1EDFB80, 0xC1EEFB80, 0xC1EFFB80, 0xC1F0FB80, 0xC1F1FB80, 0xC1F2FB80, 0xC1F3FB80, 0xC1F4FB80, 0xC1F5FB80, 0xC1F6FB80, 0xC1F7FB80, 0xC1F8FB80, 0xC1F9FB80, + 0xC1FAFB80, 0xC1FBFB80, 0xC1FCFB80, 0xC1FDFB80, 0xC1FEFB80, 0xC1FFFB80, 0xC200FB80, 0xC201FB80, 0xC202FB80, 0xC203FB80, 0xC204FB80, 0xC205FB80, 0xC206FB80, 0xC207FB80, 0xC208FB80, + 0xC209FB80, 0xC20AFB80, 0xC20BFB80, 0xC20CFB80, 0xC20DFB80, 0xC20EFB80, 0xC20FFB80, 0xC210FB80, 0xC211FB80, 0xC212FB80, 0xC213FB80, 0xC214FB80, 0xC215FB80, 0xC216FB80, 0xC217FB80, + 0xC218FB80, 0xC219FB80, 0xC21AFB80, 0xC21BFB80, 0xC21CFB80, 0xC21DFB80, 0xC21EFB80, 0xC21FFB80, 0xC220FB80, 0xC221FB80, 0xC222FB80, 0xC223FB80, 0xC224FB80, 0xC225FB80, 0xC226FB80, + 0xC227FB80, 0xC228FB80, 0xC229FB80, 0xC22AFB80, 0xC22BFB80, 0xC22CFB80, 0xC22DFB80, 0xC22EFB80, 0xC22FFB80, 0xC230FB80, 0xC231FB80, 0xC232FB80, 0xC233FB80, 0xC234FB80, 0xC235FB80, + 0xC236FB80, 0xC237FB80, 0xC238FB80, 0xC239FB80, 0xC23AFB80, 0xC23BFB80, 0xC23CFB80, 0xC23DFB80, 0xC23EFB80, 0xC23FFB80, 0xC240FB80, 0xC241FB80, 0xC242FB80, 0xC243FB80, 0xC244FB80, + 0xC245FB80, 0xC246FB80, 0xC247FB80, 0xC248FB80, 0xC249FB80, 0xC24AFB80, 0xC24BFB80, 0xC24CFB80, 0xC24DFB80, 0xC24EFB80, 0xC24FFB80, 0xC250FB80, 0xC251FB80, 0xC252FB80, 0xC253FB80, + 0xC254FB80, 0xC255FB80, 0xC256FB80, 0xC257FB80, 0xC258FB80, 0xC259FB80, 0xC25AFB80, 0xC25BFB80, 0xC25CFB80, 0xC25DFB80, 0xC25EFB80, 0xC25FFB80, 0xC260FB80, 0xC261FB80, 0xC262FB80, + 0xC263FB80, 0xC264FB80, 0xC265FB80, 0xC266FB80, 0xC267FB80, 0xC268FB80, 0xC269FB80, 0xC26AFB80, 0xC26BFB80, 0xC26CFB80, 0xC26DFB80, 0xC26EFB80, 0xC26FFB80, 0xC270FB80, 0xC271FB80, + 0xC272FB80, 0xC273FB80, 0xC274FB80, 0xC275FB80, 0xC276FB80, 0xC277FB80, 0xC278FB80, 0xC279FB80, 0xC27AFB80, 0xC27BFB80, 0xC27CFB80, 0xC27DFB80, 0xC27EFB80, 0xC27FFB80, 0xC280FB80, + 0xC281FB80, 0xC282FB80, 0xC283FB80, 0xC284FB80, 0xC285FB80, 0xC286FB80, 0xC287FB80, 0xC288FB80, 0xC289FB80, 0xC28AFB80, 0xC28BFB80, 0xC28CFB80, 0xC28DFB80, 0xC28EFB80, 0xC28FFB80, + 0xC290FB80, 0xC291FB80, 0xC292FB80, 0xC293FB80, 0xC294FB80, 0xC295FB80, 0xC296FB80, 0xC297FB80, 0xC298FB80, 0xC299FB80, 0xC29AFB80, 0xC29BFB80, 0xC29CFB80, 0xC29DFB80, 0xC29EFB80, + 0xC29FFB80, 0xC2A0FB80, 0xC2A1FB80, 0xC2A2FB80, 0xC2A3FB80, 0xC2A4FB80, 0xC2A5FB80, 0xC2A6FB80, 0xC2A7FB80, 0xC2A8FB80, 0xC2A9FB80, 0xC2AAFB80, 0xC2ABFB80, 0xC2ACFB80, 0xC2ADFB80, + 0xC2AEFB80, 0xC2AFFB80, 0xC2B0FB80, 0xC2B1FB80, 0xC2B2FB80, 0xC2B3FB80, 0xC2B4FB80, 0xC2B5FB80, 0xC2B6FB80, 0xC2B7FB80, 0xC2B8FB80, 0xC2B9FB80, 0xC2BAFB80, 0xC2BBFB80, 0xC2BCFB80, + 0xC2BDFB80, 0xC2BEFB80, 0xC2BFFB80, 0xC2C0FB80, 0xC2C1FB80, 0xC2C2FB80, 0xC2C3FB80, 0xC2C4FB80, 0xC2C5FB80, 0xC2C6FB80, 0xC2C7FB80, 0xC2C8FB80, 0xC2C9FB80, 0xC2CAFB80, 0xC2CBFB80, + 0xC2CCFB80, 0xC2CDFB80, 0xC2CEFB80, 0xC2CFFB80, 0xC2D0FB80, 0xC2D1FB80, 0xC2D2FB80, 0xC2D3FB80, 0xC2D4FB80, 0xC2D5FB80, 0xC2D6FB80, 0xC2D7FB80, 0xC2D8FB80, 0xC2D9FB80, 0xC2DAFB80, + 0xC2DBFB80, 0xC2DCFB80, 0xC2DDFB80, 0xC2DEFB80, 0xC2DFFB80, 0xC2E0FB80, 0xC2E1FB80, 0xC2E2FB80, 0xC2E3FB80, 0xC2E4FB80, 0xC2E5FB80, 0xC2E6FB80, 0xC2E7FB80, 0xC2E8FB80, 0xC2E9FB80, + 0xC2EAFB80, 0xC2EBFB80, 0xC2ECFB80, 0xC2EDFB80, 0xC2EEFB80, 0xC2EFFB80, 0xC2F0FB80, 0xC2F1FB80, 0xC2F2FB80, 0xC2F3FB80, 0xC2F4FB80, 0xC2F5FB80, 0xC2F6FB80, 0xC2F7FB80, 0xC2F8FB80, + 0xC2F9FB80, 0xC2FAFB80, 0xC2FBFB80, 0xC2FCFB80, 0xC2FDFB80, 0xC2FEFB80, 0xC2FFFB80, 0xC300FB80, 0xC301FB80, 0xC302FB80, 0xC303FB80, 0xC304FB80, 0xC305FB80, 0xC306FB80, 0xC307FB80, + 0xC308FB80, 0xC309FB80, 0xC30AFB80, 0xC30BFB80, 0xC30CFB80, 0xC30DFB80, 0xC30EFB80, 0xC30FFB80, 0xC310FB80, 0xC311FB80, 0xC312FB80, 0xC313FB80, 0xC314FB80, 0xC315FB80, 0xC316FB80, + 0xC317FB80, 0xC318FB80, 0xC319FB80, 0xC31AFB80, 0xC31BFB80, 0xC31CFB80, 0xC31DFB80, 0xC31EFB80, 0xC31FFB80, 0xC320FB80, 0xC321FB80, 0xC322FB80, 0xC323FB80, 0xC324FB80, 0xC325FB80, + 0xC326FB80, 0xC327FB80, 0xC328FB80, 0xC329FB80, 0xC32AFB80, 0xC32BFB80, 0xC32CFB80, 0xC32DFB80, 0xC32EFB80, 0xC32FFB80, 0xC330FB80, 0xC331FB80, 0xC332FB80, 0xC333FB80, 0xC334FB80, + 0xC335FB80, 0xC336FB80, 0xC337FB80, 0xC338FB80, 0xC339FB80, 0xC33AFB80, 0xC33BFB80, 0xC33CFB80, 0xC33DFB80, 0xC33EFB80, 0xC33FFB80, 0xC340FB80, 0xC341FB80, 0xC342FB80, 0xC343FB80, + 0xC344FB80, 0xC345FB80, 0xC346FB80, 0xC347FB80, 0xC348FB80, 0xC349FB80, 0xC34AFB80, 0xC34BFB80, 0xC34CFB80, 0xC34DFB80, 0xC34EFB80, 0xC34FFB80, 0xC350FB80, 0xC351FB80, 0xC352FB80, + 0xC353FB80, 0xC354FB80, 0xC355FB80, 0xC356FB80, 0xC357FB80, 0xC358FB80, 0xC359FB80, 0xC35AFB80, 0xC35BFB80, 0xC35CFB80, 0xC35DFB80, 0xC35EFB80, 0xC35FFB80, 0xC360FB80, 0xC361FB80, + 0xC362FB80, 0xC363FB80, 0xC364FB80, 0xC365FB80, 0xC366FB80, 0xC367FB80, 0xC368FB80, 0xC369FB80, 0xC36AFB80, 0xC36BFB80, 0xC36CFB80, 0xC36DFB80, 0xC36EFB80, 0xC36FFB80, 0xC370FB80, + 0xC371FB80, 0xC372FB80, 0xC373FB80, 0xC374FB80, 0xC375FB80, 0xC376FB80, 0xC377FB80, 0xC378FB80, 0xC379FB80, 0xC37AFB80, 0xC37BFB80, 0xC37CFB80, 0xC37DFB80, 0xC37EFB80, 0xC37FFB80, + 0xC380FB80, 0xC381FB80, 0xC382FB80, 0xC383FB80, 0xC384FB80, 0xC385FB80, 0xC386FB80, 0xC387FB80, 0xC388FB80, 0xC389FB80, 0xC38AFB80, 0xC38BFB80, 0xC38CFB80, 0xC38DFB80, 0xC38EFB80, + 0xC38FFB80, 0xC390FB80, 0xC391FB80, 0xC392FB80, 0xC393FB80, 0xC394FB80, 0xC395FB80, 0xC396FB80, 0xC397FB80, 0xC398FB80, 0xC399FB80, 0xC39AFB80, 0xC39BFB80, 0xC39CFB80, 0xC39DFB80, + 0xC39EFB80, 0xC39FFB80, 0xC3A0FB80, 0xC3A1FB80, 0xC3A2FB80, 0xC3A3FB80, 0xC3A4FB80, 0xC3A5FB80, 0xC3A6FB80, 0xC3A7FB80, 0xC3A8FB80, 0xC3A9FB80, 0xC3AAFB80, 0xC3ABFB80, 0xC3ACFB80, + 0xC3ADFB80, 0xC3AEFB80, 0xC3AFFB80, 0xC3B0FB80, 0xC3B1FB80, 0xC3B2FB80, 0xC3B3FB80, 0xC3B4FB80, 0xC3B5FB80, 0xC3B6FB80, 0xC3B7FB80, 0xC3B8FB80, 0xC3B9FB80, 0xC3BAFB80, 0xC3BBFB80, + 0xC3BCFB80, 0xC3BDFB80, 0xC3BEFB80, 0xC3BFFB80, 0xC3C0FB80, 0xC3C1FB80, 0xC3C2FB80, 0xC3C3FB80, 0xC3C4FB80, 0xC3C5FB80, 0xC3C6FB80, 0xC3C7FB80, 0xC3C8FB80, 0xC3C9FB80, 0xC3CAFB80, + 0xC3CBFB80, 0xC3CCFB80, 0xC3CDFB80, 0xC3CEFB80, 0xC3CFFB80, 0xC3D0FB80, 0xC3D1FB80, 0xC3D2FB80, 0xC3D3FB80, 0xC3D4FB80, 0xC3D5FB80, 0xC3D6FB80, 0xC3D7FB80, 0xC3D8FB80, 0xC3D9FB80, + 0xC3DAFB80, 0xC3DBFB80, 0xC3DCFB80, 0xC3DDFB80, 0xC3DEFB80, 0xC3DFFB80, 0xC3E0FB80, 0xC3E1FB80, 0xC3E2FB80, 0xC3E3FB80, 0xC3E4FB80, 0xC3E5FB80, 0xC3E6FB80, 0xC3E7FB80, 0xC3E8FB80, + 0xC3E9FB80, 0xC3EAFB80, 0xC3EBFB80, 0xC3ECFB80, 0xC3EDFB80, 0xC3EEFB80, 0xC3EFFB80, 0xC3F0FB80, 0xC3F1FB80, 0xC3F2FB80, 0xC3F3FB80, 0xC3F4FB80, 0xC3F5FB80, 0xC3F6FB80, 0xC3F7FB80, + 0xC3F8FB80, 0xC3F9FB80, 0xC3FAFB80, 0xC3FBFB80, 0xC3FCFB80, 0xC3FDFB80, 0xC3FEFB80, 0xC3FFFB80, 0xC400FB80, 0xC401FB80, 0xC402FB80, 0xC403FB80, 0xC404FB80, 0xC405FB80, 0xC406FB80, + 0xC407FB80, 0xC408FB80, 0xC409FB80, 0xC40AFB80, 0xC40BFB80, 0xC40CFB80, 0xC40DFB80, 0xC40EFB80, 0xC40FFB80, 0xC410FB80, 0xC411FB80, 0xC412FB80, 0xC413FB80, 0xC414FB80, 0xC415FB80, + 0xC416FB80, 0xC417FB80, 0xC418FB80, 0xC419FB80, 0xC41AFB80, 0xC41BFB80, 0xC41CFB80, 0xC41DFB80, 0xC41EFB80, 0xC41FFB80, 0xC420FB80, 0xC421FB80, 0xC422FB80, 0xC423FB80, 0xC424FB80, + 0xC425FB80, 0xC426FB80, 0xC427FB80, 0xC428FB80, 0xC429FB80, 0xC42AFB80, 0xC42BFB80, 0xC42CFB80, 0xC42DFB80, 0xC42EFB80, 0xC42FFB80, 0xC430FB80, 0xC431FB80, 0xC432FB80, 0xC433FB80, + 0xC434FB80, 0xC435FB80, 0xC436FB80, 0xC437FB80, 0xC438FB80, 0xC439FB80, 0xC43AFB80, 0xC43BFB80, 0xC43CFB80, 0xC43DFB80, 0xC43EFB80, 0xC43FFB80, 0xC440FB80, 0xC441FB80, 0xC442FB80, + 0xC443FB80, 0xC444FB80, 0xC445FB80, 0xC446FB80, 0xC447FB80, 0xC448FB80, 0xC449FB80, 0xC44AFB80, 0xC44BFB80, 0xC44CFB80, 0xC44DFB80, 0xC44EFB80, 0xC44FFB80, 0xC450FB80, 0xC451FB80, + 0xC452FB80, 0xC453FB80, 0xC454FB80, 0xC455FB80, 0xC456FB80, 0xC457FB80, 0xC458FB80, 0xC459FB80, 0xC45AFB80, 0xC45BFB80, 0xC45CFB80, 0xC45DFB80, 0xC45EFB80, 0xC45FFB80, 0xC460FB80, + 0xC461FB80, 0xC462FB80, 0xC463FB80, 0xC464FB80, 0xC465FB80, 0xC466FB80, 0xC467FB80, 0xC468FB80, 0xC469FB80, 0xC46AFB80, 0xC46BFB80, 0xC46CFB80, 0xC46DFB80, 0xC46EFB80, 0xC46FFB80, + 0xC470FB80, 0xC471FB80, 0xC472FB80, 0xC473FB80, 0xC474FB80, 0xC475FB80, 0xC476FB80, 0xC477FB80, 0xC478FB80, 0xC479FB80, 0xC47AFB80, 0xC47BFB80, 0xC47CFB80, 0xC47DFB80, 0xC47EFB80, + 0xC47FFB80, 0xC480FB80, 0xC481FB80, 0xC482FB80, 0xC483FB80, 0xC484FB80, 0xC485FB80, 0xC486FB80, 0xC487FB80, 0xC488FB80, 0xC489FB80, 0xC48AFB80, 0xC48BFB80, 0xC48CFB80, 0xC48DFB80, + 0xC48EFB80, 0xC48FFB80, 0xC490FB80, 0xC491FB80, 0xC492FB80, 0xC493FB80, 0xC494FB80, 0xC495FB80, 0xC496FB80, 0xC497FB80, 0xC498FB80, 0xC499FB80, 0xC49AFB80, 0xC49BFB80, 0xC49CFB80, + 0xC49DFB80, 0xC49EFB80, 0xC49FFB80, 0xC4A0FB80, 0xC4A1FB80, 0xC4A2FB80, 0xC4A3FB80, 0xC4A4FB80, 0xC4A5FB80, 0xC4A6FB80, 0xC4A7FB80, 0xC4A8FB80, 0xC4A9FB80, 0xC4AAFB80, 0xC4ABFB80, + 0xC4ACFB80, 0xC4ADFB80, 0xC4AEFB80, 0xC4AFFB80, 0xC4B0FB80, 0xC4B1FB80, 0xC4B2FB80, 0xC4B3FB80, 0xC4B4FB80, 0xC4B5FB80, 0xC4B6FB80, 0xC4B7FB80, 0xC4B8FB80, 0xC4B9FB80, 0xC4BAFB80, + 0xC4BBFB80, 0xC4BCFB80, 0xC4BDFB80, 0xC4BEFB80, 0xC4BFFB80, 0xC4C0FB80, 0xC4C1FB80, 0xC4C2FB80, 0xC4C3FB80, 0xC4C4FB80, 0xC4C5FB80, 0xC4C6FB80, 0xC4C7FB80, 0xC4C8FB80, 0xC4C9FB80, + 0xC4CAFB80, 0xC4CBFB80, 0xC4CCFB80, 0xC4CDFB80, 0xC4CEFB80, 0xC4CFFB80, 0xC4D0FB80, 0xC4D1FB80, 0xC4D2FB80, 0xC4D3FB80, 0xC4D4FB80, 0xC4D5FB80, 0xC4D6FB80, 0xC4D7FB80, 0xC4D8FB80, + 0xC4D9FB80, 0xC4DAFB80, 0xC4DBFB80, 0xC4DCFB80, 0xC4DDFB80, 0xC4DEFB80, 0xC4DFFB80, 0xC4E0FB80, 0xC4E1FB80, 0xC4E2FB80, 0xC4E3FB80, 0xC4E4FB80, 0xC4E5FB80, 0xC4E6FB80, 0xC4E7FB80, + 0xC4E8FB80, 0xC4E9FB80, 0xC4EAFB80, 0xC4EBFB80, 0xC4ECFB80, 0xC4EDFB80, 0xC4EEFB80, 0xC4EFFB80, 0xC4F0FB80, 0xC4F1FB80, 0xC4F2FB80, 0xC4F3FB80, 0xC4F4FB80, 0xC4F5FB80, 0xC4F6FB80, + 0xC4F7FB80, 0xC4F8FB80, 0xC4F9FB80, 0xC4FAFB80, 0xC4FBFB80, 0xC4FCFB80, 0xC4FDFB80, 0xC4FEFB80, 0xC4FFFB80, 0xC500FB80, 0xC501FB80, 0xC502FB80, 0xC503FB80, 0xC504FB80, 0xC505FB80, + 0xC506FB80, 0xC507FB80, 0xC508FB80, 0xC509FB80, 0xC50AFB80, 0xC50BFB80, 0xC50CFB80, 0xC50DFB80, 0xC50EFB80, 0xC50FFB80, 0xC510FB80, 0xC511FB80, 0xC512FB80, 0xC513FB80, 0xC514FB80, + 0xC515FB80, 0xC516FB80, 0xC517FB80, 0xC518FB80, 0xC519FB80, 0xC51AFB80, 0xC51BFB80, 0xC51CFB80, 0xC51DFB80, 0xC51EFB80, 0xC51FFB80, 0xC520FB80, 0xC521FB80, 0xC522FB80, 0xC523FB80, + 0xC524FB80, 0xC525FB80, 0xC526FB80, 0xC527FB80, 0xC528FB80, 0xC529FB80, 0xC52AFB80, 0xC52BFB80, 0xC52CFB80, 0xC52DFB80, 0xC52EFB80, 0xC52FFB80, 0xC530FB80, 0xC531FB80, 0xC532FB80, + 0xC533FB80, 0xC534FB80, 0xC535FB80, 0xC536FB80, 0xC537FB80, 0xC538FB80, 0xC539FB80, 0xC53AFB80, 0xC53BFB80, 0xC53CFB80, 0xC53DFB80, 0xC53EFB80, 0xC53FFB80, 0xC540FB80, 0xC541FB80, + 0xC542FB80, 0xC543FB80, 0xC544FB80, 0xC545FB80, 0xC546FB80, 0xC547FB80, 0xC548FB80, 0xC549FB80, 0xC54AFB80, 0xC54BFB80, 0xC54CFB80, 0xC54DFB80, 0xC54EFB80, 0xC54FFB80, 0xC550FB80, + 0xC551FB80, 0xC552FB80, 0xC553FB80, 0xC554FB80, 0xC555FB80, 0xC556FB80, 0xC557FB80, 0xC558FB80, 0xC559FB80, 0xC55AFB80, 0xC55BFB80, 0xC55CFB80, 0xC55DFB80, 0xC55EFB80, 0xC55FFB80, + 0xC560FB80, 0xC561FB80, 0xC562FB80, 0xC563FB80, 0xC564FB80, 0xC565FB80, 0xC566FB80, 0xC567FB80, 0xC568FB80, 0xC569FB80, 0xC56AFB80, 0xC56BFB80, 0xC56CFB80, 0xC56DFB80, 0xC56EFB80, + 0xC56FFB80, 0xC570FB80, 0xC571FB80, 0xC572FB80, 0xC573FB80, 0xC574FB80, 0xC575FB80, 0xC576FB80, 0xC577FB80, 0xC578FB80, 0xC579FB80, 0xC57AFB80, 0xC57BFB80, 0xC57CFB80, 0xC57DFB80, + 0xC57EFB80, 0xC57FFB80, 0xC580FB80, 0xC581FB80, 0xC582FB80, 0xC583FB80, 0xC584FB80, 0xC585FB80, 0xC586FB80, 0xC587FB80, 0xC588FB80, 0xC589FB80, 0xC58AFB80, 0xC58BFB80, 0xC58CFB80, + 0xC58DFB80, 0xC58EFB80, 0xC58FFB80, 0xC590FB80, 0xC591FB80, 0xC592FB80, 0xC593FB80, 0xC594FB80, 0xC595FB80, 0xC596FB80, 0xC597FB80, 0xC598FB80, 0xC599FB80, 0xC59AFB80, 0xC59BFB80, + 0xC59CFB80, 0xC59DFB80, 0xC59EFB80, 0xC59FFB80, 0xC5A0FB80, 0xC5A1FB80, 0xC5A2FB80, 0xC5A3FB80, 0xC5A4FB80, 0xC5A5FB80, 0xC5A6FB80, 0xC5A7FB80, 0xC5A8FB80, 0xC5A9FB80, 0xC5AAFB80, + 0xC5ABFB80, 0xC5ACFB80, 0xC5ADFB80, 0xC5AEFB80, 0xC5AFFB80, 0xC5B0FB80, 0xC5B1FB80, 0xC5B2FB80, 0xC5B3FB80, 0xC5B4FB80, 0xC5B5FB80, 0xC5B6FB80, 0xC5B7FB80, 0xC5B8FB80, 0xC5B9FB80, + 0xC5BAFB80, 0xC5BBFB80, 0xC5BCFB80, 0xC5BDFB80, 0xC5BEFB80, 0xC5BFFB80, 0xC5C0FB80, 0xC5C1FB80, 0xC5C2FB80, 0xC5C3FB80, 0xC5C4FB80, 0xC5C5FB80, 0xC5C6FB80, 0xC5C7FB80, 0xC5C8FB80, + 0xC5C9FB80, 0xC5CAFB80, 0xC5CBFB80, 0xC5CCFB80, 0xC5CDFB80, 0xC5CEFB80, 0xC5CFFB80, 0xC5D0FB80, 0xC5D1FB80, 0xC5D2FB80, 0xC5D3FB80, 0xC5D4FB80, 0xC5D5FB80, 0xC5D6FB80, 0xC5D7FB80, + 0xC5D8FB80, 0xC5D9FB80, 0xC5DAFB80, 0xC5DBFB80, 0xC5DCFB80, 0xC5DDFB80, 0xC5DEFB80, 0xC5DFFB80, 0xC5E0FB80, 0xC5E1FB80, 0xC5E2FB80, 0xC5E3FB80, 0xC5E4FB80, 0xC5E5FB80, 0xC5E6FB80, + 0xC5E7FB80, 0xC5E8FB80, 0xC5E9FB80, 0xC5EAFB80, 0xC5EBFB80, 0xC5ECFB80, 0xC5EDFB80, 0xC5EEFB80, 0xC5EFFB80, 0xC5F0FB80, 0xC5F1FB80, 0xC5F2FB80, 0xC5F3FB80, 0xC5F4FB80, 0xC5F5FB80, + 0xC5F6FB80, 0xC5F7FB80, 0xC5F8FB80, 0xC5F9FB80, 0xC5FAFB80, 0xC5FBFB80, 0xC5FCFB80, 0xC5FDFB80, 0xC5FEFB80, 0xC5FFFB80, 0xC600FB80, 0xC601FB80, 0xC602FB80, 0xC603FB80, 0xC604FB80, + 0xC605FB80, 0xC606FB80, 0xC607FB80, 0xC608FB80, 0xC609FB80, 0xC60AFB80, 0xC60BFB80, 0xC60CFB80, 0xC60DFB80, 0xC60EFB80, 0xC60FFB80, 0xC610FB80, 0xC611FB80, 0xC612FB80, 0xC613FB80, + 0xC614FB80, 0xC615FB80, 0xC616FB80, 0xC617FB80, 0xC618FB80, 0xC619FB80, 0xC61AFB80, 0xC61BFB80, 0xC61CFB80, 0xC61DFB80, 0xC61EFB80, 0xC61FFB80, 0xC620FB80, 0xC621FB80, 0xC622FB80, + 0xC623FB80, 0xC624FB80, 0xC625FB80, 0xC626FB80, 0xC627FB80, 0xC628FB80, 0xC629FB80, 0xC62AFB80, 0xC62BFB80, 0xC62CFB80, 0xC62DFB80, 0xC62EFB80, 0xC62FFB80, 0xC630FB80, 0xC631FB80, + 0xC632FB80, 0xC633FB80, 0xC634FB80, 0xC635FB80, 0xC636FB80, 0xC637FB80, 0xC638FB80, 0xC639FB80, 0xC63AFB80, 0xC63BFB80, 0xC63CFB80, 0xC63DFB80, 0xC63EFB80, 0xC63FFB80, 0xC640FB80, + 0xC641FB80, 0xC642FB80, 0xC643FB80, 0xC644FB80, 0xC645FB80, 0xC646FB80, 0xC647FB80, 0xC648FB80, 0xC649FB80, 0xC64AFB80, 0xC64BFB80, 0xC64CFB80, 0xC64DFB80, 0xC64EFB80, 0xC64FFB80, + 0xC650FB80, 0xC651FB80, 0xC652FB80, 0xC653FB80, 0xC654FB80, 0xC655FB80, 0xC656FB80, 0xC657FB80, 0xC658FB80, 0xC659FB80, 0xC65AFB80, 0xC65BFB80, 0xC65CFB80, 0xC65DFB80, 0xC65EFB80, + 0xC65FFB80, 0xC660FB80, 0xC661FB80, 0xC662FB80, 0xC663FB80, 0xC664FB80, 0xC665FB80, 0xC666FB80, 0xC667FB80, 0xC668FB80, 0xC669FB80, 0xC66AFB80, 0xC66BFB80, 0xC66CFB80, 0xC66DFB80, + 0xC66EFB80, 0xC66FFB80, 0xC670FB80, 0xC671FB80, 0xC672FB80, 0xC673FB80, 0xC674FB80, 0xC675FB80, 0xC676FB80, 0xC677FB80, 0xC678FB80, 0xC679FB80, 0xC67AFB80, 0xC67BFB80, 0xC67CFB80, + 0xC67DFB80, 0xC67EFB80, 0xC67FFB80, 0xC680FB80, 0xC681FB80, 0xC682FB80, 0xC683FB80, 0xC684FB80, 0xC685FB80, 0xC686FB80, 0xC687FB80, 0xC688FB80, 0xC689FB80, 0xC68AFB80, 0xC68BFB80, + 0xC68CFB80, 0xC68DFB80, 0xC68EFB80, 0xC68FFB80, 0xC690FB80, 0xC691FB80, 0xC692FB80, 0xC693FB80, 0xC694FB80, 0xC695FB80, 0xC696FB80, 0xC697FB80, 0xC698FB80, 0xC699FB80, 0xC69AFB80, + 0xC69BFB80, 0xC69CFB80, 0xC69DFB80, 0xC69EFB80, 0xC69FFB80, 0xC6A0FB80, 0xC6A1FB80, 0xC6A2FB80, 0xC6A3FB80, 0xC6A4FB80, 0xC6A5FB80, 0xC6A6FB80, 0xC6A7FB80, 0xC6A8FB80, 0xC6A9FB80, + 0xC6AAFB80, 0xC6ABFB80, 0xC6ACFB80, 0xC6ADFB80, 0xC6AEFB80, 0xC6AFFB80, 0xC6B0FB80, 0xC6B1FB80, 0xC6B2FB80, 0xC6B3FB80, 0xC6B4FB80, 0xC6B5FB80, 0xC6B6FB80, 0xC6B7FB80, 0xC6B8FB80, + 0xC6B9FB80, 0xC6BAFB80, 0xC6BBFB80, 0xC6BCFB80, 0xC6BDFB80, 0xC6BEFB80, 0xC6BFFB80, 0xC6C0FB80, 0xC6C1FB80, 0xC6C2FB80, 0xC6C3FB80, 0xC6C4FB80, 0xC6C5FB80, 0xC6C6FB80, 0xC6C7FB80, + 0xC6C8FB80, 0xC6C9FB80, 0xC6CAFB80, 0xC6CBFB80, 0xC6CCFB80, 0xC6CDFB80, 0xC6CEFB80, 0xC6CFFB80, 0xC6D0FB80, 0xC6D1FB80, 0xC6D2FB80, 0xC6D3FB80, 0xC6D4FB80, 0xC6D5FB80, 0xC6D6FB80, + 0xC6D7FB80, 0xC6D8FB80, 0xC6D9FB80, 0xC6DAFB80, 0xC6DBFB80, 0xC6DCFB80, 0xC6DDFB80, 0xC6DEFB80, 0xC6DFFB80, 0xC6E0FB80, 0xC6E1FB80, 0xC6E2FB80, 0xC6E3FB80, 0xC6E4FB80, 0xC6E5FB80, + 0xC6E6FB80, 0xC6E7FB80, 0xC6E8FB80, 0xC6E9FB80, 0xC6EAFB80, 0xC6EBFB80, 0xC6ECFB80, 0xC6EDFB80, 0xC6EEFB80, 0xC6EFFB80, 0xC6F0FB80, 0xC6F1FB80, 0xC6F2FB80, 0xC6F3FB80, 0xC6F4FB80, + 0xC6F5FB80, 0xC6F6FB80, 0xC6F7FB80, 0xC6F8FB80, 0xC6F9FB80, 0xC6FAFB80, 0xC6FBFB80, 0xC6FCFB80, 0xC6FDFB80, 0xC6FEFB80, 0xC6FFFB80, 0xC700FB80, 0xC701FB80, 0xC702FB80, 0xC703FB80, + 0xC704FB80, 0xC705FB80, 0xC706FB80, 0xC707FB80, 0xC708FB80, 0xC709FB80, 0xC70AFB80, 0xC70BFB80, 0xC70CFB80, 0xC70DFB80, 0xC70EFB80, 0xC70FFB80, 0xC710FB80, 0xC711FB80, 0xC712FB80, + 0xC713FB80, 0xC714FB80, 0xC715FB80, 0xC716FB80, 0xC717FB80, 0xC718FB80, 0xC719FB80, 0xC71AFB80, 0xC71BFB80, 0xC71CFB80, 0xC71DFB80, 0xC71EFB80, 0xC71FFB80, 0xC720FB80, 0xC721FB80, + 0xC722FB80, 0xC723FB80, 0xC724FB80, 0xC725FB80, 0xC726FB80, 0xC727FB80, 0xC728FB80, 0xC729FB80, 0xC72AFB80, 0xC72BFB80, 0xC72CFB80, 0xC72DFB80, 0xC72EFB80, 0xC72FFB80, 0xC730FB80, + 0xC731FB80, 0xC732FB80, 0xC733FB80, 0xC734FB80, 0xC735FB80, 0xC736FB80, 0xC737FB80, 0xC738FB80, 0xC739FB80, 0xC73AFB80, 0xC73BFB80, 0xC73CFB80, 0xC73DFB80, 0xC73EFB80, 0xC73FFB80, + 0xC740FB80, 0xC741FB80, 0xC742FB80, 0xC743FB80, 0xC744FB80, 0xC745FB80, 0xC746FB80, 0xC747FB80, 0xC748FB80, 0xC749FB80, 0xC74AFB80, 0xC74BFB80, 0xC74CFB80, 0xC74DFB80, 0xC74EFB80, + 0xC74FFB80, 0xC750FB80, 0xC751FB80, 0xC752FB80, 0xC753FB80, 0xC754FB80, 0xC755FB80, 0xC756FB80, 0xC757FB80, 0xC758FB80, 0xC759FB80, 0xC75AFB80, 0xC75BFB80, 0xC75CFB80, 0xC75DFB80, + 0xC75EFB80, 0xC75FFB80, 0xC760FB80, 0xC761FB80, 0xC762FB80, 0xC763FB80, 0xC764FB80, 0xC765FB80, 0xC766FB80, 0xC767FB80, 0xC768FB80, 0xC769FB80, 0xC76AFB80, 0xC76BFB80, 0xC76CFB80, + 0xC76DFB80, 0xC76EFB80, 0xC76FFB80, 0xC770FB80, 0xC771FB80, 0xC772FB80, 0xC773FB80, 0xC774FB80, 0xC775FB80, 0xC776FB80, 0xC777FB80, 0xC778FB80, 0xC779FB80, 0xC77AFB80, 0xC77BFB80, + 0xC77CFB80, 0xC77DFB80, 0xC77EFB80, 0xC77FFB80, 0xC780FB80, 0xC781FB80, 0xC782FB80, 0xC783FB80, 0xC784FB80, 0xC785FB80, 0xC786FB80, 0xC787FB80, 0xC788FB80, 0xC789FB80, 0xC78AFB80, + 0xC78BFB80, 0xC78CFB80, 0xC78DFB80, 0xC78EFB80, 0xC78FFB80, 0xC790FB80, 0xC791FB80, 0xC792FB80, 0xC793FB80, 0xC794FB80, 0xC795FB80, 0xC796FB80, 0xC797FB80, 0xC798FB80, 0xC799FB80, + 0xC79AFB80, 0xC79BFB80, 0xC79CFB80, 0xC79DFB80, 0xC79EFB80, 0xC79FFB80, 0xC7A0FB80, 0xC7A1FB80, 0xC7A2FB80, 0xC7A3FB80, 0xC7A4FB80, 0xC7A5FB80, 0xC7A6FB80, 0xC7A7FB80, 0xC7A8FB80, + 0xC7A9FB80, 0xC7AAFB80, 0xC7ABFB80, 0xC7ACFB80, 0xC7ADFB80, 0xC7AEFB80, 0xC7AFFB80, 0xC7B0FB80, 0xC7B1FB80, 0xC7B2FB80, 0xC7B3FB80, 0xC7B4FB80, 0xC7B5FB80, 0xC7B6FB80, 0xC7B7FB80, + 0xC7B8FB80, 0xC7B9FB80, 0xC7BAFB80, 0xC7BBFB80, 0xC7BCFB80, 0xC7BDFB80, 0xC7BEFB80, 0xC7BFFB80, 0xC7C0FB80, 0xC7C1FB80, 0xC7C2FB80, 0xC7C3FB80, 0xC7C4FB80, 0xC7C5FB80, 0xC7C6FB80, + 0xC7C7FB80, 0xC7C8FB80, 0xC7C9FB80, 0xC7CAFB80, 0xC7CBFB80, 0xC7CCFB80, 0xC7CDFB80, 0xC7CEFB80, 0xC7CFFB80, 0xC7D0FB80, 0xC7D1FB80, 0xC7D2FB80, 0xC7D3FB80, 0xC7D4FB80, 0xC7D5FB80, + 0xC7D6FB80, 0xC7D7FB80, 0xC7D8FB80, 0xC7D9FB80, 0xC7DAFB80, 0xC7DBFB80, 0xC7DCFB80, 0xC7DDFB80, 0xC7DEFB80, 0xC7DFFB80, 0xC7E0FB80, 0xC7E1FB80, 0xC7E2FB80, 0xC7E3FB80, 0xC7E4FB80, + 0xC7E5FB80, 0xC7E6FB80, 0xC7E7FB80, 0xC7E8FB80, 0xC7E9FB80, 0xC7EAFB80, 0xC7EBFB80, 0xC7ECFB80, 0xC7EDFB80, 0xC7EEFB80, 0xC7EFFB80, 0xC7F0FB80, 0xC7F1FB80, 0xC7F2FB80, 0xC7F3FB80, + 0xC7F4FB80, 0xC7F5FB80, 0xC7F6FB80, 0xC7F7FB80, 0xC7F8FB80, 0xC7F9FB80, 0xC7FAFB80, 0xC7FBFB80, 0xC7FCFB80, 0xC7FDFB80, 0xC7FEFB80, 0xC7FFFB80, 0xC800FB80, 0xC801FB80, 0xC802FB80, + 0xC803FB80, 0xC804FB80, 0xC805FB80, 0xC806FB80, 0xC807FB80, 0xC808FB80, 0xC809FB80, 0xC80AFB80, 0xC80BFB80, 0xC80CFB80, 0xC80DFB80, 0xC80EFB80, 0xC80FFB80, 0xC810FB80, 0xC811FB80, + 0xC812FB80, 0xC813FB80, 0xC814FB80, 0xC815FB80, 0xC816FB80, 0xC817FB80, 0xC818FB80, 0xC819FB80, 0xC81AFB80, 0xC81BFB80, 0xC81CFB80, 0xC81DFB80, 0xC81EFB80, 0xC81FFB80, 0xC820FB80, + 0xC821FB80, 0xC822FB80, 0xC823FB80, 0xC824FB80, 0xC825FB80, 0xC826FB80, 0xC827FB80, 0xC828FB80, 0xC829FB80, 0xC82AFB80, 0xC82BFB80, 0xC82CFB80, 0xC82DFB80, 0xC82EFB80, 0xC82FFB80, + 0xC830FB80, 0xC831FB80, 0xC832FB80, 0xC833FB80, 0xC834FB80, 0xC835FB80, 0xC836FB80, 0xC837FB80, 0xC838FB80, 0xC839FB80, 0xC83AFB80, 0xC83BFB80, 0xC83CFB80, 0xC83DFB80, 0xC83EFB80, + 0xC83FFB80, 0xC840FB80, 0xC841FB80, 0xC842FB80, 0xC843FB80, 0xC844FB80, 0xC845FB80, 0xC846FB80, 0xC847FB80, 0xC848FB80, 0xC849FB80, 0xC84AFB80, 0xC84BFB80, 0xC84CFB80, 0xC84DFB80, + 0xC84EFB80, 0xC84FFB80, 0xC850FB80, 0xC851FB80, 0xC852FB80, 0xC853FB80, 0xC854FB80, 0xC855FB80, 0xC856FB80, 0xC857FB80, 0xC858FB80, 0xC859FB80, 0xC85AFB80, 0xC85BFB80, 0xC85CFB80, + 0xC85DFB80, 0xC85EFB80, 0xC85FFB80, 0xC860FB80, 0xC861FB80, 0xC862FB80, 0xC863FB80, 0xC864FB80, 0xC865FB80, 0xC866FB80, 0xC867FB80, 0xC868FB80, 0xC869FB80, 0xC86AFB80, 0xC86BFB80, + 0xC86CFB80, 0xC86DFB80, 0xC86EFB80, 0xC86FFB80, 0xC870FB80, 0xC871FB80, 0xC872FB80, 0xC873FB80, 0xC874FB80, 0xC875FB80, 0xC876FB80, 0xC877FB80, 0xC878FB80, 0xC879FB80, 0xC87AFB80, + 0xC87BFB80, 0xC87CFB80, 0xC87DFB80, 0xC87EFB80, 0xC87FFB80, 0xC880FB80, 0xC881FB80, 0xC882FB80, 0xC883FB80, 0xC884FB80, 0xC885FB80, 0xC886FB80, 0xC887FB80, 0xC888FB80, 0xC889FB80, + 0xC88AFB80, 0xC88BFB80, 0xC88CFB80, 0xC88DFB80, 0xC88EFB80, 0xC88FFB80, 0xC890FB80, 0xC891FB80, 0xC892FB80, 0xC893FB80, 0xC894FB80, 0xC895FB80, 0xC896FB80, 0xC897FB80, 0xC898FB80, + 0xC899FB80, 0xC89AFB80, 0xC89BFB80, 0xC89CFB80, 0xC89DFB80, 0xC89EFB80, 0xC89FFB80, 0xC8A0FB80, 0xC8A1FB80, 0xC8A2FB80, 0xC8A3FB80, 0xC8A4FB80, 0xC8A5FB80, 0xC8A6FB80, 0xC8A7FB80, + 0xC8A8FB80, 0xC8A9FB80, 0xC8AAFB80, 0xC8ABFB80, 0xC8ACFB80, 0xC8ADFB80, 0xC8AEFB80, 0xC8AFFB80, 0xC8B0FB80, 0xC8B1FB80, 0xC8B2FB80, 0xC8B3FB80, 0xC8B4FB80, 0xC8B5FB80, 0xC8B6FB80, + 0xC8B7FB80, 0xC8B8FB80, 0xC8B9FB80, 0xC8BAFB80, 0xC8BBFB80, 0xC8BCFB80, 0xC8BDFB80, 0xC8BEFB80, 0xC8BFFB80, 0xC8C0FB80, 0xC8C1FB80, 0xC8C2FB80, 0xC8C3FB80, 0xC8C4FB80, 0xC8C5FB80, + 0xC8C6FB80, 0xC8C7FB80, 0xC8C8FB80, 0xC8C9FB80, 0xC8CAFB80, 0xC8CBFB80, 0xC8CCFB80, 0xC8CDFB80, 0xC8CEFB80, 0xC8CFFB80, 0xC8D0FB80, 0xC8D1FB80, 0xC8D2FB80, 0xC8D3FB80, 0xC8D4FB80, + 0xC8D5FB80, 0xC8D6FB80, 0xC8D7FB80, 0xC8D8FB80, 0xC8D9FB80, 0xC8DAFB80, 0xC8DBFB80, 0xC8DCFB80, 0xC8DDFB80, 0xC8DEFB80, 0xC8DFFB80, 0xC8E0FB80, 0xC8E1FB80, 0xC8E2FB80, 0xC8E3FB80, + 0xC8E4FB80, 0xC8E5FB80, 0xC8E6FB80, 0xC8E7FB80, 0xC8E8FB80, 0xC8E9FB80, 0xC8EAFB80, 0xC8EBFB80, 0xC8ECFB80, 0xC8EDFB80, 0xC8EEFB80, 0xC8EFFB80, 0xC8F0FB80, 0xC8F1FB80, 0xC8F2FB80, + 0xC8F3FB80, 0xC8F4FB80, 0xC8F5FB80, 0xC8F6FB80, 0xC8F7FB80, 0xC8F8FB80, 0xC8F9FB80, 0xC8FAFB80, 0xC8FBFB80, 0xC8FCFB80, 0xC8FDFB80, 0xC8FEFB80, 0xC8FFFB80, 0xC900FB80, 0xC901FB80, + 0xC902FB80, 0xC903FB80, 0xC904FB80, 0xC905FB80, 0xC906FB80, 0xC907FB80, 0xC908FB80, 0xC909FB80, 0xC90AFB80, 0xC90BFB80, 0xC90CFB80, 0xC90DFB80, 0xC90EFB80, 0xC90FFB80, 0xC910FB80, + 0xC911FB80, 0xC912FB80, 0xC913FB80, 0xC914FB80, 0xC915FB80, 0xC916FB80, 0xC917FB80, 0xC918FB80, 0xC919FB80, 0xC91AFB80, 0xC91BFB80, 0xC91CFB80, 0xC91DFB80, 0xC91EFB80, 0xC91FFB80, + 0xC920FB80, 0xC921FB80, 0xC922FB80, 0xC923FB80, 0xC924FB80, 0xC925FB80, 0xC926FB80, 0xC927FB80, 0xC928FB80, 0xC929FB80, 0xC92AFB80, 0xC92BFB80, 0xC92CFB80, 0xC92DFB80, 0xC92EFB80, + 0xC92FFB80, 0xC930FB80, 0xC931FB80, 0xC932FB80, 0xC933FB80, 0xC934FB80, 0xC935FB80, 0xC936FB80, 0xC937FB80, 0xC938FB80, 0xC939FB80, 0xC93AFB80, 0xC93BFB80, 0xC93CFB80, 0xC93DFB80, + 0xC93EFB80, 0xC93FFB80, 0xC940FB80, 0xC941FB80, 0xC942FB80, 0xC943FB80, 0xC944FB80, 0xC945FB80, 0xC946FB80, 0xC947FB80, 0xC948FB80, 0xC949FB80, 0xC94AFB80, 0xC94BFB80, 0xC94CFB80, + 0xC94DFB80, 0xC94EFB80, 0xC94FFB80, 0xC950FB80, 0xC951FB80, 0xC952FB80, 0xC953FB80, 0xC954FB80, 0xC955FB80, 0xC956FB80, 0xC957FB80, 0xC958FB80, 0xC959FB80, 0xC95AFB80, 0xC95BFB80, + 0xC95CFB80, 0xC95DFB80, 0xC95EFB80, 0xC95FFB80, 0xC960FB80, 0xC961FB80, 0xC962FB80, 0xC963FB80, 0xC964FB80, 0xC965FB80, 0xC966FB80, 0xC967FB80, 0xC968FB80, 0xC969FB80, 0xC96AFB80, + 0xC96BFB80, 0xC96CFB80, 0xC96DFB80, 0xC96EFB80, 0xC96FFB80, 0xC970FB80, 0xC971FB80, 0xC972FB80, 0xC973FB80, 0xC974FB80, 0xC975FB80, 0xC976FB80, 0xC977FB80, 0xC978FB80, 0xC979FB80, + 0xC97AFB80, 0xC97BFB80, 0xC97CFB80, 0xC97DFB80, 0xC97EFB80, 0xC97FFB80, 0xC980FB80, 0xC981FB80, 0xC982FB80, 0xC983FB80, 0xC984FB80, 0xC985FB80, 0xC986FB80, 0xC987FB80, 0xC988FB80, + 0xC989FB80, 0xC98AFB80, 0xC98BFB80, 0xC98CFB80, 0xC98DFB80, 0xC98EFB80, 0xC98FFB80, 0xC990FB80, 0xC991FB80, 0xC992FB80, 0xC993FB80, 0xC994FB80, 0xC995FB80, 0xC996FB80, 0xC997FB80, + 0xC998FB80, 0xC999FB80, 0xC99AFB80, 0xC99BFB80, 0xC99CFB80, 0xC99DFB80, 0xC99EFB80, 0xC99FFB80, 0xC9A0FB80, 0xC9A1FB80, 0xC9A2FB80, 0xC9A3FB80, 0xC9A4FB80, 0xC9A5FB80, 0xC9A6FB80, + 0xC9A7FB80, 0xC9A8FB80, 0xC9A9FB80, 0xC9AAFB80, 0xC9ABFB80, 0xC9ACFB80, 0xC9ADFB80, 0xC9AEFB80, 0xC9AFFB80, 0xC9B0FB80, 0xC9B1FB80, 0xC9B2FB80, 0xC9B3FB80, 0xC9B4FB80, 0xC9B5FB80, + 0xC9B6FB80, 0xC9B7FB80, 0xC9B8FB80, 0xC9B9FB80, 0xC9BAFB80, 0xC9BBFB80, 0xC9BCFB80, 0xC9BDFB80, 0xC9BEFB80, 0xC9BFFB80, 0xC9C0FB80, 0xC9C1FB80, 0xC9C2FB80, 0xC9C3FB80, 0xC9C4FB80, + 0xC9C5FB80, 0xC9C6FB80, 0xC9C7FB80, 0xC9C8FB80, 0xC9C9FB80, 0xC9CAFB80, 0xC9CBFB80, 0xC9CCFB80, 0xC9CDFB80, 0xC9CEFB80, 0xC9CFFB80, 0xC9D0FB80, 0xC9D1FB80, 0xC9D2FB80, 0xC9D3FB80, + 0xC9D4FB80, 0xC9D5FB80, 0xC9D6FB80, 0xC9D7FB80, 0xC9D8FB80, 0xC9D9FB80, 0xC9DAFB80, 0xC9DBFB80, 0xC9DCFB80, 0xC9DDFB80, 0xC9DEFB80, 0xC9DFFB80, 0xC9E0FB80, 0xC9E1FB80, 0xC9E2FB80, + 0xC9E3FB80, 0xC9E4FB80, 0xC9E5FB80, 0xC9E6FB80, 0xC9E7FB80, 0xC9E8FB80, 0xC9E9FB80, 0xC9EAFB80, 0xC9EBFB80, 0xC9ECFB80, 0xC9EDFB80, 0xC9EEFB80, 0xC9EFFB80, 0xC9F0FB80, 0xC9F1FB80, + 0xC9F2FB80, 0xC9F3FB80, 0xC9F4FB80, 0xC9F5FB80, 0xC9F6FB80, 0xC9F7FB80, 0xC9F8FB80, 0xC9F9FB80, 0xC9FAFB80, 0xC9FBFB80, 0xC9FCFB80, 0xC9FDFB80, 0xC9FEFB80, 0xC9FFFB80, 0xCA00FB80, + 0xCA01FB80, 0xCA02FB80, 0xCA03FB80, 0xCA04FB80, 0xCA05FB80, 0xCA06FB80, 0xCA07FB80, 0xCA08FB80, 0xCA09FB80, 0xCA0AFB80, 0xCA0BFB80, 0xCA0CFB80, 0xCA0DFB80, 0xCA0EFB80, 0xCA0FFB80, + 0xCA10FB80, 0xCA11FB80, 0xCA12FB80, 0xCA13FB80, 0xCA14FB80, 0xCA15FB80, 0xCA16FB80, 0xCA17FB80, 0xCA18FB80, 0xCA19FB80, 0xCA1AFB80, 0xCA1BFB80, 0xCA1CFB80, 0xCA1DFB80, 0xCA1EFB80, + 0xCA1FFB80, 0xCA20FB80, 0xCA21FB80, 0xCA22FB80, 0xCA23FB80, 0xCA24FB80, 0xCA25FB80, 0xCA26FB80, 0xCA27FB80, 0xCA28FB80, 0xCA29FB80, 0xCA2AFB80, 0xCA2BFB80, 0xCA2CFB80, 0xCA2DFB80, + 0xCA2EFB80, 0xCA2FFB80, 0xCA30FB80, 0xCA31FB80, 0xCA32FB80, 0xCA33FB80, 0xCA34FB80, 0xCA35FB80, 0xCA36FB80, 0xCA37FB80, 0xCA38FB80, 0xCA39FB80, 0xCA3AFB80, 0xCA3BFB80, 0xCA3CFB80, + 0xCA3DFB80, 0xCA3EFB80, 0xCA3FFB80, 0xCA40FB80, 0xCA41FB80, 0xCA42FB80, 0xCA43FB80, 0xCA44FB80, 0xCA45FB80, 0xCA46FB80, 0xCA47FB80, 0xCA48FB80, 0xCA49FB80, 0xCA4AFB80, 0xCA4BFB80, + 0xCA4CFB80, 0xCA4DFB80, 0xCA4EFB80, 0xCA4FFB80, 0xCA50FB80, 0xCA51FB80, 0xCA52FB80, 0xCA53FB80, 0xCA54FB80, 0xCA55FB80, 0xCA56FB80, 0xCA57FB80, 0xCA58FB80, 0xCA59FB80, 0xCA5AFB80, + 0xCA5BFB80, 0xCA5CFB80, 0xCA5DFB80, 0xCA5EFB80, 0xCA5FFB80, 0xCA60FB80, 0xCA61FB80, 0xCA62FB80, 0xCA63FB80, 0xCA64FB80, 0xCA65FB80, 0xCA66FB80, 0xCA67FB80, 0xCA68FB80, 0xCA69FB80, + 0xCA6AFB80, 0xCA6BFB80, 0xCA6CFB80, 0xCA6DFB80, 0xCA6EFB80, 0xCA6FFB80, 0xCA70FB80, 0xCA71FB80, 0xCA72FB80, 0xCA73FB80, 0xCA74FB80, 0xCA75FB80, 0xCA76FB80, 0xCA77FB80, 0xCA78FB80, + 0xCA79FB80, 0xCA7AFB80, 0xCA7BFB80, 0xCA7CFB80, 0xCA7DFB80, 0xCA7EFB80, 0xCA7FFB80, 0xCA80FB80, 0xCA81FB80, 0xCA82FB80, 0xCA83FB80, 0xCA84FB80, 0xCA85FB80, 0xCA86FB80, 0xCA87FB80, + 0xCA88FB80, 0xCA89FB80, 0xCA8AFB80, 0xCA8BFB80, 0xCA8CFB80, 0xCA8DFB80, 0xCA8EFB80, 0xCA8FFB80, 0xCA90FB80, 0xCA91FB80, 0xCA92FB80, 0xCA93FB80, 0xCA94FB80, 0xCA95FB80, 0xCA96FB80, + 0xCA97FB80, 0xCA98FB80, 0xCA99FB80, 0xCA9AFB80, 0xCA9BFB80, 0xCA9CFB80, 0xCA9DFB80, 0xCA9EFB80, 0xCA9FFB80, 0xCAA0FB80, 0xCAA1FB80, 0xCAA2FB80, 0xCAA3FB80, 0xCAA4FB80, 0xCAA5FB80, + 0xCAA6FB80, 0xCAA7FB80, 0xCAA8FB80, 0xCAA9FB80, 0xCAAAFB80, 0xCAABFB80, 0xCAACFB80, 0xCAADFB80, 0xCAAEFB80, 0xCAAFFB80, 0xCAB0FB80, 0xCAB1FB80, 0xCAB2FB80, 0xCAB3FB80, 0xCAB4FB80, + 0xCAB5FB80, 0xCAB6FB80, 0xCAB7FB80, 0xCAB8FB80, 0xCAB9FB80, 0xCABAFB80, 0xCABBFB80, 0xCABCFB80, 0xCABDFB80, 0xCABEFB80, 0xCABFFB80, 0xCAC0FB80, 0xCAC1FB80, 0xCAC2FB80, 0xCAC3FB80, + 0xCAC4FB80, 0xCAC5FB80, 0xCAC6FB80, 0xCAC7FB80, 0xCAC8FB80, 0xCAC9FB80, 0xCACAFB80, 0xCACBFB80, 0xCACCFB80, 0xCACDFB80, 0xCACEFB80, 0xCACFFB80, 0xCAD0FB80, 0xCAD1FB80, 0xCAD2FB80, + 0xCAD3FB80, 0xCAD4FB80, 0xCAD5FB80, 0xCAD6FB80, 0xCAD7FB80, 0xCAD8FB80, 0xCAD9FB80, 0xCADAFB80, 0xCADBFB80, 0xCADCFB80, 0xCADDFB80, 0xCADEFB80, 0xCADFFB80, 0xCAE0FB80, 0xCAE1FB80, + 0xCAE2FB80, 0xCAE3FB80, 0xCAE4FB80, 0xCAE5FB80, 0xCAE6FB80, 0xCAE7FB80, 0xCAE8FB80, 0xCAE9FB80, 0xCAEAFB80, 0xCAEBFB80, 0xCAECFB80, 0xCAEDFB80, 0xCAEEFB80, 0xCAEFFB80, 0xCAF0FB80, + 0xCAF1FB80, 0xCAF2FB80, 0xCAF3FB80, 0xCAF4FB80, 0xCAF5FB80, 0xCAF6FB80, 0xCAF7FB80, 0xCAF8FB80, 0xCAF9FB80, 0xCAFAFB80, 0xCAFBFB80, 0xCAFCFB80, 0xCAFDFB80, 0xCAFEFB80, 0xCAFFFB80, + 0xCB00FB80, 0xCB01FB80, 0xCB02FB80, 0xCB03FB80, 0xCB04FB80, 0xCB05FB80, 0xCB06FB80, 0xCB07FB80, 0xCB08FB80, 0xCB09FB80, 0xCB0AFB80, 0xCB0BFB80, 0xCB0CFB80, 0xCB0DFB80, 0xCB0EFB80, + 0xCB0FFB80, 0xCB10FB80, 0xCB11FB80, 0xCB12FB80, 0xCB13FB80, 0xCB14FB80, 0xCB15FB80, 0xCB16FB80, 0xCB17FB80, 0xCB18FB80, 0xCB19FB80, 0xCB1AFB80, 0xCB1BFB80, 0xCB1CFB80, 0xCB1DFB80, + 0xCB1EFB80, 0xCB1FFB80, 0xCB20FB80, 0xCB21FB80, 0xCB22FB80, 0xCB23FB80, 0xCB24FB80, 0xCB25FB80, 0xCB26FB80, 0xCB27FB80, 0xCB28FB80, 0xCB29FB80, 0xCB2AFB80, 0xCB2BFB80, 0xCB2CFB80, + 0xCB2DFB80, 0xCB2EFB80, 0xCB2FFB80, 0xCB30FB80, 0xCB31FB80, 0xCB32FB80, 0xCB33FB80, 0xCB34FB80, 0xCB35FB80, 0xCB36FB80, 0xCB37FB80, 0xCB38FB80, 0xCB39FB80, 0xCB3AFB80, 0xCB3BFB80, + 0xCB3CFB80, 0xCB3DFB80, 0xCB3EFB80, 0xCB3FFB80, 0xCB40FB80, 0xCB41FB80, 0xCB42FB80, 0xCB43FB80, 0xCB44FB80, 0xCB45FB80, 0xCB46FB80, 0xCB47FB80, 0xCB48FB80, 0xCB49FB80, 0xCB4AFB80, + 0xCB4BFB80, 0xCB4CFB80, 0xCB4DFB80, 0xCB4EFB80, 0xCB4FFB80, 0xCB50FB80, 0xCB51FB80, 0xCB52FB80, 0xCB53FB80, 0xCB54FB80, 0xCB55FB80, 0xCB56FB80, 0xCB57FB80, 0xCB58FB80, 0xCB59FB80, + 0xCB5AFB80, 0xCB5BFB80, 0xCB5CFB80, 0xCB5DFB80, 0xCB5EFB80, 0xCB5FFB80, 0xCB60FB80, 0xCB61FB80, 0xCB62FB80, 0xCB63FB80, 0xCB64FB80, 0xCB65FB80, 0xCB66FB80, 0xCB67FB80, 0xCB68FB80, + 0xCB69FB80, 0xCB6AFB80, 0xCB6BFB80, 0xCB6CFB80, 0xCB6DFB80, 0xCB6EFB80, 0xCB6FFB80, 0xCB70FB80, 0xCB71FB80, 0xCB72FB80, 0xCB73FB80, 0xCB74FB80, 0xCB75FB80, 0xCB76FB80, 0xCB77FB80, + 0xCB78FB80, 0xCB79FB80, 0xCB7AFB80, 0xCB7BFB80, 0xCB7CFB80, 0xCB7DFB80, 0xCB7EFB80, 0xCB7FFB80, 0xCB80FB80, 0xCB81FB80, 0xCB82FB80, 0xCB83FB80, 0xCB84FB80, 0xCB85FB80, 0xCB86FB80, + 0xCB87FB80, 0xCB88FB80, 0xCB89FB80, 0xCB8AFB80, 0xCB8BFB80, 0xCB8CFB80, 0xCB8DFB80, 0xCB8EFB80, 0xCB8FFB80, 0xCB90FB80, 0xCB91FB80, 0xCB92FB80, 0xCB93FB80, 0xCB94FB80, 0xCB95FB80, + 0xCB96FB80, 0xCB97FB80, 0xCB98FB80, 0xCB99FB80, 0xCB9AFB80, 0xCB9BFB80, 0xCB9CFB80, 0xCB9DFB80, 0xCB9EFB80, 0xCB9FFB80, 0xCBA0FB80, 0xCBA1FB80, 0xCBA2FB80, 0xCBA3FB80, 0xCBA4FB80, + 0xCBA5FB80, 0xCBA6FB80, 0xCBA7FB80, 0xCBA8FB80, 0xCBA9FB80, 0xCBAAFB80, 0xCBABFB80, 0xCBACFB80, 0xCBADFB80, 0xCBAEFB80, 0xCBAFFB80, 0xCBB0FB80, 0xCBB1FB80, 0xCBB2FB80, 0xCBB3FB80, + 0xCBB4FB80, 0xCBB5FB80, 0xCBB6FB80, 0xCBB7FB80, 0xCBB8FB80, 0xCBB9FB80, 0xCBBAFB80, 0xCBBBFB80, 0xCBBCFB80, 0xCBBDFB80, 0xCBBEFB80, 0xCBBFFB80, 0xCBC0FB80, 0xCBC1FB80, 0xCBC2FB80, + 0xCBC3FB80, 0xCBC4FB80, 0xCBC5FB80, 0xCBC6FB80, 0xCBC7FB80, 0xCBC8FB80, 0xCBC9FB80, 0xCBCAFB80, 0xCBCBFB80, 0xCBCCFB80, 0xCBCDFB80, 0xCBCEFB80, 0xCBCFFB80, 0xCBD0FB80, 0xCBD1FB80, + 0xCBD2FB80, 0xCBD3FB80, 0xCBD4FB80, 0xCBD5FB80, 0xCBD6FB80, 0xCBD7FB80, 0xCBD8FB80, 0xCBD9FB80, 0xCBDAFB80, 0xCBDBFB80, 0xCBDCFB80, 0xCBDDFB80, 0xCBDEFB80, 0xCBDFFB80, 0xCBE0FB80, + 0xCBE1FB80, 0xCBE2FB80, 0xCBE3FB80, 0xCBE4FB80, 0xCBE5FB80, 0xCBE6FB80, 0xCBE7FB80, 0xCBE8FB80, 0xCBE9FB80, 0xCBEAFB80, 0xCBEBFB80, 0xCBECFB80, 0xCBEDFB80, 0xCBEEFB80, 0xCBEFFB80, + 0xCBF0FB80, 0xCBF1FB80, 0xCBF2FB80, 0xCBF3FB80, 0xCBF4FB80, 0xCBF5FB80, 0xCBF6FB80, 0xCBF7FB80, 0xCBF8FB80, 0xCBF9FB80, 0xCBFAFB80, 0xCBFBFB80, 0xCBFCFB80, 0xCBFDFB80, 0xCBFEFB80, + 0xCBFFFB80, 0xCC00FB80, 0xCC01FB80, 0xCC02FB80, 0xCC03FB80, 0xCC04FB80, 0xCC05FB80, 0xCC06FB80, 0xCC07FB80, 0xCC08FB80, 0xCC09FB80, 0xCC0AFB80, 0xCC0BFB80, 0xCC0CFB80, 0xCC0DFB80, + 0xCC0EFB80, 0xCC0FFB80, 0xCC10FB80, 0xCC11FB80, 0xCC12FB80, 0xCC13FB80, 0xCC14FB80, 0xCC15FB80, 0xCC16FB80, 0xCC17FB80, 0xCC18FB80, 0xCC19FB80, 0xCC1AFB80, 0xCC1BFB80, 0xCC1CFB80, + 0xCC1DFB80, 0xCC1EFB80, 0xCC1FFB80, 0xCC20FB80, 0xCC21FB80, 0xCC22FB80, 0xCC23FB80, 0xCC24FB80, 0xCC25FB80, 0xCC26FB80, 0xCC27FB80, 0xCC28FB80, 0xCC29FB80, 0xCC2AFB80, 0xCC2BFB80, + 0xCC2CFB80, 0xCC2DFB80, 0xCC2EFB80, 0xCC2FFB80, 0xCC30FB80, 0xCC31FB80, 0xCC32FB80, 0xCC33FB80, 0xCC34FB80, 0xCC35FB80, 0xCC36FB80, 0xCC37FB80, 0xCC38FB80, 0xCC39FB80, 0xCC3AFB80, + 0xCC3BFB80, 0xCC3CFB80, 0xCC3DFB80, 0xCC3EFB80, 0xCC3FFB80, 0xCC40FB80, 0xCC41FB80, 0xCC42FB80, 0xCC43FB80, 0xCC44FB80, 0xCC45FB80, 0xCC46FB80, 0xCC47FB80, 0xCC48FB80, 0xCC49FB80, + 0xCC4AFB80, 0xCC4BFB80, 0xCC4CFB80, 0xCC4DFB80, 0xCC4EFB80, 0xCC4FFB80, 0xCC50FB80, 0xCC51FB80, 0xCC52FB80, 0xCC53FB80, 0xCC54FB80, 0xCC55FB80, 0xCC56FB80, 0xCC57FB80, 0xCC58FB80, + 0xCC59FB80, 0xCC5AFB80, 0xCC5BFB80, 0xCC5CFB80, 0xCC5DFB80, 0xCC5EFB80, 0xCC5FFB80, 0xCC60FB80, 0xCC61FB80, 0xCC62FB80, 0xCC63FB80, 0xCC64FB80, 0xCC65FB80, 0xCC66FB80, 0xCC67FB80, + 0xCC68FB80, 0xCC69FB80, 0xCC6AFB80, 0xCC6BFB80, 0xCC6CFB80, 0xCC6DFB80, 0xCC6EFB80, 0xCC6FFB80, 0xCC70FB80, 0xCC71FB80, 0xCC72FB80, 0xCC73FB80, 0xCC74FB80, 0xCC75FB80, 0xCC76FB80, + 0xCC77FB80, 0xCC78FB80, 0xCC79FB80, 0xCC7AFB80, 0xCC7BFB80, 0xCC7CFB80, 0xCC7DFB80, 0xCC7EFB80, 0xCC7FFB80, 0xCC80FB80, 0xCC81FB80, 0xCC82FB80, 0xCC83FB80, 0xCC84FB80, 0xCC85FB80, + 0xCC86FB80, 0xCC87FB80, 0xCC88FB80, 0xCC89FB80, 0xCC8AFB80, 0xCC8BFB80, 0xCC8CFB80, 0xCC8DFB80, 0xCC8EFB80, 0xCC8FFB80, 0xCC90FB80, 0xCC91FB80, 0xCC92FB80, 0xCC93FB80, 0xCC94FB80, + 0xCC95FB80, 0xCC96FB80, 0xCC97FB80, 0xCC98FB80, 0xCC99FB80, 0xCC9AFB80, 0xCC9BFB80, 0xCC9CFB80, 0xCC9DFB80, 0xCC9EFB80, 0xCC9FFB80, 0xCCA0FB80, 0xCCA1FB80, 0xCCA2FB80, 0xCCA3FB80, + 0xCCA4FB80, 0xCCA5FB80, 0xCCA6FB80, 0xCCA7FB80, 0xCCA8FB80, 0xCCA9FB80, 0xCCAAFB80, 0xCCABFB80, 0xCCACFB80, 0xCCADFB80, 0xCCAEFB80, 0xCCAFFB80, 0xCCB0FB80, 0xCCB1FB80, 0xCCB2FB80, + 0xCCB3FB80, 0xCCB4FB80, 0xCCB5FB80, 0xCCB6FB80, 0xCCB7FB80, 0xCCB8FB80, 0xCCB9FB80, 0xCCBAFB80, 0xCCBBFB80, 0xCCBCFB80, 0xCCBDFB80, 0xCCBEFB80, 0xCCBFFB80, 0xCCC0FB80, 0xCCC1FB80, + 0xCCC2FB80, 0xCCC3FB80, 0xCCC4FB80, 0xCCC5FB80, 0xCCC6FB80, 0xCCC7FB80, 0xCCC8FB80, 0xCCC9FB80, 0xCCCAFB80, 0xCCCBFB80, 0xCCCCFB80, 0xCCCDFB80, 0xCCCEFB80, 0xCCCFFB80, 0xCCD0FB80, + 0xCCD1FB80, 0xCCD2FB80, 0xCCD3FB80, 0xCCD4FB80, 0xCCD5FB80, 0xCCD6FB80, 0xCCD7FB80, 0xCCD8FB80, 0xCCD9FB80, 0xCCDAFB80, 0xCCDBFB80, 0xCCDCFB80, 0xCCDDFB80, 0xCCDEFB80, 0xCCDFFB80, + 0xCCE0FB80, 0xCCE1FB80, 0xCCE2FB80, 0xCCE3FB80, 0xCCE4FB80, 0xCCE5FB80, 0xCCE6FB80, 0xCCE7FB80, 0xCCE8FB80, 0xCCE9FB80, 0xCCEAFB80, 0xCCEBFB80, 0xCCECFB80, 0xCCEDFB80, 0xCCEEFB80, + 0xCCEFFB80, 0xCCF0FB80, 0xCCF1FB80, 0xCCF2FB80, 0xCCF3FB80, 0xCCF4FB80, 0xCCF5FB80, 0xCCF6FB80, 0xCCF7FB80, 0xCCF8FB80, 0xCCF9FB80, 0xCCFAFB80, 0xCCFBFB80, 0xCCFCFB80, 0xCCFDFB80, + 0xCCFEFB80, 0xCCFFFB80, 0xCD00FB80, 0xCD01FB80, 0xCD02FB80, 0xCD03FB80, 0xCD04FB80, 0xCD05FB80, 0xCD06FB80, 0xCD07FB80, 0xCD08FB80, 0xCD09FB80, 0xCD0AFB80, 0xCD0BFB80, 0xCD0CFB80, + 0xCD0DFB80, 0xCD0EFB80, 0xCD0FFB80, 0xCD10FB80, 0xCD11FB80, 0xCD12FB80, 0xCD13FB80, 0xCD14FB80, 0xCD15FB80, 0xCD16FB80, 0xCD17FB80, 0xCD18FB80, 0xCD19FB80, 0xCD1AFB80, 0xCD1BFB80, + 0xCD1CFB80, 0xCD1DFB80, 0xCD1EFB80, 0xCD1FFB80, 0xCD20FB80, 0xCD21FB80, 0xCD22FB80, 0xCD23FB80, 0xCD24FB80, 0xCD25FB80, 0xCD26FB80, 0xCD27FB80, 0xCD28FB80, 0xCD29FB80, 0xCD2AFB80, + 0xCD2BFB80, 0xCD2CFB80, 0xCD2DFB80, 0xCD2EFB80, 0xCD2FFB80, 0xCD30FB80, 0xCD31FB80, 0xCD32FB80, 0xCD33FB80, 0xCD34FB80, 0xCD35FB80, 0xCD36FB80, 0xCD37FB80, 0xCD38FB80, 0xCD39FB80, + 0xCD3AFB80, 0xCD3BFB80, 0xCD3CFB80, 0xCD3DFB80, 0xCD3EFB80, 0xCD3FFB80, 0xCD40FB80, 0xCD41FB80, 0xCD42FB80, 0xCD43FB80, 0xCD44FB80, 0xCD45FB80, 0xCD46FB80, 0xCD47FB80, 0xCD48FB80, + 0xCD49FB80, 0xCD4AFB80, 0xCD4BFB80, 0xCD4CFB80, 0xCD4DFB80, 0xCD4EFB80, 0xCD4FFB80, 0xCD50FB80, 0xCD51FB80, 0xCD52FB80, 0xCD53FB80, 0xCD54FB80, 0xCD55FB80, 0xCD56FB80, 0xCD57FB80, + 0xCD58FB80, 0xCD59FB80, 0xCD5AFB80, 0xCD5BFB80, 0xCD5CFB80, 0xCD5DFB80, 0xCD5EFB80, 0xCD5FFB80, 0xCD60FB80, 0xCD61FB80, 0xCD62FB80, 0xCD63FB80, 0xCD64FB80, 0xCD65FB80, 0xCD66FB80, + 0xCD67FB80, 0xCD68FB80, 0xCD69FB80, 0xCD6AFB80, 0xCD6BFB80, 0xCD6CFB80, 0xCD6DFB80, 0xCD6EFB80, 0xCD6FFB80, 0xCD70FB80, 0xCD71FB80, 0xCD72FB80, 0xCD73FB80, 0xCD74FB80, 0xCD75FB80, + 0xCD76FB80, 0xCD77FB80, 0xCD78FB80, 0xCD79FB80, 0xCD7AFB80, 0xCD7BFB80, 0xCD7CFB80, 0xCD7DFB80, 0xCD7EFB80, 0xCD7FFB80, 0xCD80FB80, 0xCD81FB80, 0xCD82FB80, 0xCD83FB80, 0xCD84FB80, + 0xCD85FB80, 0xCD86FB80, 0xCD87FB80, 0xCD88FB80, 0xCD89FB80, 0xCD8AFB80, 0xCD8BFB80, 0xCD8CFB80, 0xCD8DFB80, 0xCD8EFB80, 0xCD8FFB80, 0xCD90FB80, 0xCD91FB80, 0xCD92FB80, 0xCD93FB80, + 0xCD94FB80, 0xCD95FB80, 0xCD96FB80, 0xCD97FB80, 0xCD98FB80, 0xCD99FB80, 0xCD9AFB80, 0xCD9BFB80, 0xCD9CFB80, 0xCD9DFB80, 0xCD9EFB80, 0xCD9FFB80, 0xCDA0FB80, 0xCDA1FB80, 0xCDA2FB80, + 0xCDA3FB80, 0xCDA4FB80, 0xCDA5FB80, 0xCDA6FB80, 0xCDA7FB80, 0xCDA8FB80, 0xCDA9FB80, 0xCDAAFB80, 0xCDABFB80, 0xCDACFB80, 0xCDADFB80, 0xCDAEFB80, 0xCDAFFB80, 0xCDB0FB80, 0xCDB1FB80, + 0xCDB2FB80, 0xCDB3FB80, 0xCDB4FB80, 0xCDB5FB80, 0xCDB6FBC0, 0xCDB7FBC0, 0xCDB8FBC0, 0xCDB9FBC0, 0xCDBAFBC0, 0xCDBBFBC0, 0xCDBCFBC0, 0xCDBDFBC0, 0xCDBEFBC0, 0xCDBFFBC0, 0xEAA, + 0xEAB, 0xEAC, 0xEAD, 0xEAE, 0xEAF, 0xEB0, 0xEB1, 0xEB2, 0xEB3, 0xEB4, 0xEB5, 0xEB6, 0xEB7, 0xEB8, 0xEB9, + 0xEBA, 0xEBB, 0xEBC, 0xEBD, 0xEBE, 0xEBF, 0xEC0, 0xEC1, 0xEC2, 0xEC3, 0xEC4, 0xEC5, 0xEC6, 0xEC7, 0xEC8, + 0xEC9, 0xECA, 0xECB, 0xECC, 0xECD, 0xECE, 0xECF, 0xED0, 0xED1, 0xED2, 0xED3, 0xED4, 0xED5, 0xED6, 0xED7, + 0xED8, 0xED9, 0xEDA, 0xEDB, 0xEDC, 0xEDD, 0xEDE, 0xEDF, 0xEE0, 0xEE1, 0xEE2, 0xEE3, 0xEE4, 0xEE5, 0xEE6, + 0xEE7, 0xEE8, 0xEE9, 0xCE00FB40, 0xCE01FB40, 0xCE02FB40, 0xCE03FB40, 0xCE04FB40, 0xCE05FB40, 0xCE06FB40, 0xCE07FB40, 0xCE08FB40, 0xCE09FB40, 0xCE0AFB40, 0xCE0BFB40, + 0xCE0CFB40, 0xCE0DFB40, 0xCE0EFB40, 0xCE0FFB40, 0xCE10FB40, 0xCE11FB40, 0xCE12FB40, 0xCE13FB40, 0xCE14FB40, 0xCE15FB40, 0xCE16FB40, 0xCE17FB40, 0xCE18FB40, 0xCE19FB40, 0xCE1AFB40, + 0xCE1BFB40, 0xCE1CFB40, 0xCE1DFB40, 0xCE1EFB40, 0xCE1FFB40, 0xCE20FB40, 0xCE21FB40, 0xCE22FB40, 0xCE23FB40, 0xCE24FB40, 0xCE25FB40, 0xCE26FB40, 0xCE27FB40, 0xCE28FB40, 0xCE29FB40, + 0xCE2AFB40, 0xCE2BFB40, 0xCE2CFB40, 0xCE2DFB40, 0xCE2EFB40, 0xCE2FFB40, 0xCE30FB40, 0xCE31FB40, 0xCE32FB40, 0xCE33FB40, 0xCE34FB40, 0xCE35FB40, 0xCE36FB40, 0xCE37FB40, 0xCE38FB40, + 0xCE39FB40, 0xCE3AFB40, 0xCE3BFB40, 0xCE3CFB40, 0xCE3DFB40, 0xCE3EFB40, 0xCE3FFB40, 0xCE40FB40, 0xCE41FB40, 0xCE42FB40, 0xCE43FB40, 0xCE44FB40, 0xCE45FB40, 0xCE46FB40, 0xCE47FB40, + 0xCE48FB40, 0xCE49FB40, 0xCE4AFB40, 0xCE4BFB40, 0xCE4CFB40, 0xCE4DFB40, 0xCE4EFB40, 0xCE4FFB40, 0xCE50FB40, 0xCE51FB40, 0xCE52FB40, 0xCE53FB40, 0xCE54FB40, 0xCE55FB40, 0xCE56FB40, + 0xCE57FB40, 0xCE58FB40, 0xCE59FB40, 0xCE5AFB40, 0xCE5BFB40, 0xCE5CFB40, 0xCE5DFB40, 0xCE5EFB40, 0xCE5FFB40, 0xCE60FB40, 0xCE61FB40, 0xCE62FB40, 0xCE63FB40, 0xCE64FB40, 0xCE65FB40, + 0xCE66FB40, 0xCE67FB40, 0xCE68FB40, 0xCE69FB40, 0xCE6AFB40, 0xCE6BFB40, 0xCE6CFB40, 0xCE6DFB40, 0xCE6EFB40, 0xCE6FFB40, 0xCE70FB40, 0xCE71FB40, 0xCE72FB40, 0xCE73FB40, 0xCE74FB40, + 0xCE75FB40, 0xCE76FB40, 0xCE77FB40, 0xCE78FB40, 0xCE79FB40, 0xCE7AFB40, 0xCE7BFB40, 0xCE7CFB40, 0xCE7DFB40, 0xCE7EFB40, 0xCE7FFB40, 0xCE80FB40, 0xCE81FB40, 0xCE82FB40, 0xCE83FB40, + 0xCE84FB40, 0xCE85FB40, 0xCE86FB40, 0xCE87FB40, 0xCE88FB40, 0xCE89FB40, 0xCE8AFB40, 0xCE8BFB40, 0xCE8CFB40, 0xCE8DFB40, 0xCE8EFB40, 0xCE8FFB40, 0xCE90FB40, 0xCE91FB40, 0xCE92FB40, + 0xCE93FB40, 0xCE94FB40, 0xCE95FB40, 0xCE96FB40, 0xCE97FB40, 0xCE98FB40, 0xCE99FB40, 0xCE9AFB40, 0xCE9BFB40, 0xCE9CFB40, 0xCE9DFB40, 0xCE9EFB40, 0xCE9FFB40, 0xCEA0FB40, 0xCEA1FB40, + 0xCEA2FB40, 0xCEA3FB40, 0xCEA4FB40, 0xCEA5FB40, 0xCEA6FB40, 0xCEA7FB40, 0xCEA8FB40, 0xCEA9FB40, 0xCEAAFB40, 0xCEABFB40, 0xCEACFB40, 0xCEADFB40, 0xCEAEFB40, 0xCEAFFB40, 0xCEB0FB40, + 0xCEB1FB40, 0xCEB2FB40, 0xCEB3FB40, 0xCEB4FB40, 0xCEB5FB40, 0xCEB6FB40, 0xCEB7FB40, 0xCEB8FB40, 0xCEB9FB40, 0xCEBAFB40, 0xCEBBFB40, 0xCEBCFB40, 0xCEBDFB40, 0xCEBEFB40, 0xCEBFFB40, + 0xCEC0FB40, 0xCEC1FB40, 0xCEC2FB40, 0xCEC3FB40, 0xCEC4FB40, 0xCEC5FB40, 0xCEC6FB40, 0xCEC7FB40, 0xCEC8FB40, 0xCEC9FB40, 0xCECAFB40, 0xCECBFB40, 0xCECCFB40, 0xCECDFB40, 0xCECEFB40, + 0xCECFFB40, 0xCED0FB40, 0xCED1FB40, 0xCED2FB40, 0xCED3FB40, 0xCED4FB40, 0xCED5FB40, 0xCED6FB40, 0xCED7FB40, 0xCED8FB40, 0xCED9FB40, 0xCEDAFB40, 0xCEDBFB40, 0xCEDCFB40, 0xCEDDFB40, + 0xCEDEFB40, 0xCEDFFB40, 0xCEE0FB40, 0xCEE1FB40, 0xCEE2FB40, 0xCEE3FB40, 0xCEE4FB40, 0xCEE5FB40, 0xCEE6FB40, 0xCEE7FB40, 0xCEE8FB40, 0xCEE9FB40, 0xCEEAFB40, 0xCEEBFB40, 0xCEECFB40, + 0xCEEDFB40, 0xCEEEFB40, 0xCEEFFB40, 0xCEF0FB40, 0xCEF1FB40, 0xCEF2FB40, 0xCEF3FB40, 0xCEF4FB40, 0xCEF5FB40, 0xCEF6FB40, 0xCEF7FB40, 0xCEF8FB40, 0xCEF9FB40, 0xCEFAFB40, 0xCEFBFB40, + 0xCEFCFB40, 0xCEFDFB40, 0xCEFEFB40, 0xCEFFFB40, 0xCF00FB40, 0xCF01FB40, 0xCF02FB40, 0xCF03FB40, 0xCF04FB40, 0xCF05FB40, 0xCF06FB40, 0xCF07FB40, 0xCF08FB40, 0xCF09FB40, 0xCF0AFB40, + 0xCF0BFB40, 0xCF0CFB40, 0xCF0DFB40, 0xCF0EFB40, 0xCF0FFB40, 0xCF10FB40, 0xCF11FB40, 0xCF12FB40, 0xCF13FB40, 0xCF14FB40, 0xCF15FB40, 0xCF16FB40, 0xCF17FB40, 0xCF18FB40, 0xCF19FB40, + 0xCF1AFB40, 0xCF1BFB40, 0xCF1CFB40, 0xCF1DFB40, 0xCF1EFB40, 0xCF1FFB40, 0xCF20FB40, 0xCF21FB40, 0xCF22FB40, 0xCF23FB40, 0xCF24FB40, 0xCF25FB40, 0xCF26FB40, 0xCF27FB40, 0xCF28FB40, + 0xCF29FB40, 0xCF2AFB40, 0xCF2BFB40, 0xCF2CFB40, 0xCF2DFB40, 0xCF2EFB40, 0xCF2FFB40, 0xCF30FB40, 0xCF31FB40, 0xCF32FB40, 0xCF33FB40, 0xCF34FB40, 0xCF35FB40, 0xCF36FB40, 0xCF37FB40, + 0xCF38FB40, 0xCF39FB40, 0xCF3AFB40, 0xCF3BFB40, 0xCF3CFB40, 0xCF3DFB40, 0xCF3EFB40, 0xCF3FFB40, 0xCF40FB40, 0xCF41FB40, 0xCF42FB40, 0xCF43FB40, 0xCF44FB40, 0xCF45FB40, 0xCF46FB40, + 0xCF47FB40, 0xCF48FB40, 0xCF49FB40, 0xCF4AFB40, 0xCF4BFB40, 0xCF4CFB40, 0xCF4DFB40, 0xCF4EFB40, 0xCF4FFB40, 0xCF50FB40, 0xCF51FB40, 0xCF52FB40, 0xCF53FB40, 0xCF54FB40, 0xCF55FB40, + 0xCF56FB40, 0xCF57FB40, 0xCF58FB40, 0xCF59FB40, 0xCF5AFB40, 0xCF5BFB40, 0xCF5CFB40, 0xCF5DFB40, 0xCF5EFB40, 0xCF5FFB40, 0xCF60FB40, 0xCF61FB40, 0xCF62FB40, 0xCF63FB40, 0xCF64FB40, + 0xCF65FB40, 0xCF66FB40, 0xCF67FB40, 0xCF68FB40, 0xCF69FB40, 0xCF6AFB40, 0xCF6BFB40, 0xCF6CFB40, 0xCF6DFB40, 0xCF6EFB40, 0xCF6FFB40, 0xCF70FB40, 0xCF71FB40, 0xCF72FB40, 0xCF73FB40, + 0xCF74FB40, 0xCF75FB40, 0xCF76FB40, 0xCF77FB40, 0xCF78FB40, 0xCF79FB40, 0xCF7AFB40, 0xCF7BFB40, 0xCF7CFB40, 0xCF7DFB40, 0xCF7EFB40, 0xCF7FFB40, 0xCF80FB40, 0xCF81FB40, 0xCF82FB40, + 0xCF83FB40, 0xCF84FB40, 0xCF85FB40, 0xCF86FB40, 0xCF87FB40, 0xCF88FB40, 0xCF89FB40, 0xCF8AFB40, 0xCF8BFB40, 0xCF8CFB40, 0xCF8DFB40, 0xCF8EFB40, 0xCF8FFB40, 0xCF90FB40, 0xCF91FB40, + 0xCF92FB40, 0xCF93FB40, 0xCF94FB40, 0xCF95FB40, 0xCF96FB40, 0xCF97FB40, 0xCF98FB40, 0xCF99FB40, 0xCF9AFB40, 0xCF9BFB40, 0xCF9CFB40, 0xCF9DFB40, 0xCF9EFB40, 0xCF9FFB40, 0xCFA0FB40, + 0xCFA1FB40, 0xCFA2FB40, 0xCFA3FB40, 0xCFA4FB40, 0xCFA5FB40, 0xCFA6FB40, 0xCFA7FB40, 0xCFA8FB40, 0xCFA9FB40, 0xCFAAFB40, 0xCFABFB40, 0xCFACFB40, 0xCFADFB40, 0xCFAEFB40, 0xCFAFFB40, + 0xCFB0FB40, 0xCFB1FB40, 0xCFB2FB40, 0xCFB3FB40, 0xCFB4FB40, 0xCFB5FB40, 0xCFB6FB40, 0xCFB7FB40, 0xCFB8FB40, 0xCFB9FB40, 0xCFBAFB40, 0xCFBBFB40, 0xCFBCFB40, 0xCFBDFB40, 0xCFBEFB40, + 0xCFBFFB40, 0xCFC0FB40, 0xCFC1FB40, 0xCFC2FB40, 0xCFC3FB40, 0xCFC4FB40, 0xCFC5FB40, 0xCFC6FB40, 0xCFC7FB40, 0xCFC8FB40, 0xCFC9FB40, 0xCFCAFB40, 0xCFCBFB40, 0xCFCCFB40, 0xCFCDFB40, + 0xCFCEFB40, 0xCFCFFB40, 0xCFD0FB40, 0xCFD1FB40, 0xCFD2FB40, 0xCFD3FB40, 0xCFD4FB40, 0xCFD5FB40, 0xCFD6FB40, 0xCFD7FB40, 0xCFD8FB40, 0xCFD9FB40, 0xCFDAFB40, 0xCFDBFB40, 0xCFDCFB40, + 0xCFDDFB40, 0xCFDEFB40, 0xCFDFFB40, 0xCFE0FB40, 0xCFE1FB40, 0xCFE2FB40, 0xCFE3FB40, 0xCFE4FB40, 0xCFE5FB40, 0xCFE6FB40, 0xCFE7FB40, 0xCFE8FB40, 0xCFE9FB40, 0xCFEAFB40, 0xCFEBFB40, + 0xCFECFB40, 0xCFEDFB40, 0xCFEEFB40, 0xCFEFFB40, 0xCFF0FB40, 0xCFF1FB40, 0xCFF2FB40, 0xCFF3FB40, 0xCFF4FB40, 0xCFF5FB40, 0xCFF6FB40, 0xCFF7FB40, 0xCFF8FB40, 0xCFF9FB40, 0xCFFAFB40, + 0xCFFBFB40, 0xCFFCFB40, 0xCFFDFB40, 0xCFFEFB40, 0xCFFFFB40, 0xD000FB40, 0xD001FB40, 0xD002FB40, 0xD003FB40, 0xD004FB40, 0xD005FB40, 0xD006FB40, 0xD007FB40, 0xD008FB40, 0xD009FB40, + 0xD00AFB40, 0xD00BFB40, 0xD00CFB40, 0xD00DFB40, 0xD00EFB40, 0xD00FFB40, 0xD010FB40, 0xD011FB40, 0xD012FB40, 0xD013FB40, 0xD014FB40, 0xD015FB40, 0xD016FB40, 0xD017FB40, 0xD018FB40, + 0xD019FB40, 0xD01AFB40, 0xD01BFB40, 0xD01CFB40, 0xD01DFB40, 0xD01EFB40, 0xD01FFB40, 0xD020FB40, 0xD021FB40, 0xD022FB40, 0xD023FB40, 0xD024FB40, 0xD025FB40, 0xD026FB40, 0xD027FB40, + 0xD028FB40, 0xD029FB40, 0xD02AFB40, 0xD02BFB40, 0xD02CFB40, 0xD02DFB40, 0xD02EFB40, 0xD02FFB40, 0xD030FB40, 0xD031FB40, 0xD032FB40, 0xD033FB40, 0xD034FB40, 0xD035FB40, 0xD036FB40, + 0xD037FB40, 0xD038FB40, 0xD039FB40, 0xD03AFB40, 0xD03BFB40, 0xD03CFB40, 0xD03DFB40, 0xD03EFB40, 0xD03FFB40, 0xD040FB40, 0xD041FB40, 0xD042FB40, 0xD043FB40, 0xD044FB40, 0xD045FB40, + 0xD046FB40, 0xD047FB40, 0xD048FB40, 0xD049FB40, 0xD04AFB40, 0xD04BFB40, 0xD04CFB40, 0xD04DFB40, 0xD04EFB40, 0xD04FFB40, 0xD050FB40, 0xD051FB40, 0xD052FB40, 0xD053FB40, 0xD054FB40, + 0xD055FB40, 0xD056FB40, 0xD057FB40, 0xD058FB40, 0xD059FB40, 0xD05AFB40, 0xD05BFB40, 0xD05CFB40, 0xD05DFB40, 0xD05EFB40, 0xD05FFB40, 0xD060FB40, 0xD061FB40, 0xD062FB40, 0xD063FB40, + 0xD064FB40, 0xD065FB40, 0xD066FB40, 0xD067FB40, 0xD068FB40, 0xD069FB40, 0xD06AFB40, 0xD06BFB40, 0xD06CFB40, 0xD06DFB40, 0xD06EFB40, 0xD06FFB40, 0xD070FB40, 0xD071FB40, 0xD072FB40, + 0xD073FB40, 0xD074FB40, 0xD075FB40, 0xD076FB40, 0xD077FB40, 0xD078FB40, 0xD079FB40, 0xD07AFB40, 0xD07BFB40, 0xD07CFB40, 0xD07DFB40, 0xD07EFB40, 0xD07FFB40, 0xD080FB40, 0xD081FB40, + 0xD082FB40, 0xD083FB40, 0xD084FB40, 0xD085FB40, 0xD086FB40, 0xD087FB40, 0xD088FB40, 0xD089FB40, 0xD08AFB40, 0xD08BFB40, 0xD08CFB40, 0xD08DFB40, 0xD08EFB40, 0xD08FFB40, 0xD090FB40, + 0xD091FB40, 0xD092FB40, 0xD093FB40, 0xD094FB40, 0xD095FB40, 0xD096FB40, 0xD097FB40, 0xD098FB40, 0xD099FB40, 0xD09AFB40, 0xD09BFB40, 0xD09CFB40, 0xD09DFB40, 0xD09EFB40, 0xD09FFB40, + 0xD0A0FB40, 0xD0A1FB40, 0xD0A2FB40, 0xD0A3FB40, 0xD0A4FB40, 0xD0A5FB40, 0xD0A6FB40, 0xD0A7FB40, 0xD0A8FB40, 0xD0A9FB40, 0xD0AAFB40, 0xD0ABFB40, 0xD0ACFB40, 0xD0ADFB40, 0xD0AEFB40, + 0xD0AFFB40, 0xD0B0FB40, 0xD0B1FB40, 0xD0B2FB40, 0xD0B3FB40, 0xD0B4FB40, 0xD0B5FB40, 0xD0B6FB40, 0xD0B7FB40, 0xD0B8FB40, 0xD0B9FB40, 0xD0BAFB40, 0xD0BBFB40, 0xD0BCFB40, 0xD0BDFB40, + 0xD0BEFB40, 0xD0BFFB40, 0xD0C0FB40, 0xD0C1FB40, 0xD0C2FB40, 0xD0C3FB40, 0xD0C4FB40, 0xD0C5FB40, 0xD0C6FB40, 0xD0C7FB40, 0xD0C8FB40, 0xD0C9FB40, 0xD0CAFB40, 0xD0CBFB40, 0xD0CCFB40, + 0xD0CDFB40, 0xD0CEFB40, 0xD0CFFB40, 0xD0D0FB40, 0xD0D1FB40, 0xD0D2FB40, 0xD0D3FB40, 0xD0D4FB40, 0xD0D5FB40, 0xD0D6FB40, 0xD0D7FB40, 0xD0D8FB40, 0xD0D9FB40, 0xD0DAFB40, 0xD0DBFB40, + 0xD0DCFB40, 0xD0DDFB40, 0xD0DEFB40, 0xD0DFFB40, 0xD0E0FB40, 0xD0E1FB40, 0xD0E2FB40, 0xD0E3FB40, 0xD0E4FB40, 0xD0E5FB40, 0xD0E6FB40, 0xD0E7FB40, 0xD0E8FB40, 0xD0E9FB40, 0xD0EAFB40, + 0xD0EBFB40, 0xD0ECFB40, 0xD0EDFB40, 0xD0EEFB40, 0xD0EFFB40, 0xD0F0FB40, 0xD0F1FB40, 0xD0F2FB40, 0xD0F3FB40, 0xD0F4FB40, 0xD0F5FB40, 0xD0F6FB40, 0xD0F7FB40, 0xD0F8FB40, 0xD0F9FB40, + 0xD0FAFB40, 0xD0FBFB40, 0xD0FCFB40, 0xD0FDFB40, 0xD0FEFB40, 0xD0FFFB40, 0xD100FB40, 0xD101FB40, 0xD102FB40, 0xD103FB40, 0xD104FB40, 0xD105FB40, 0xD106FB40, 0xD107FB40, 0xD108FB40, + 0xD109FB40, 0xD10AFB40, 0xD10BFB40, 0xD10CFB40, 0xD10DFB40, 0xD10EFB40, 0xD10FFB40, 0xD110FB40, 0xD111FB40, 0xD112FB40, 0xD113FB40, 0xD114FB40, 0xD115FB40, 0xD116FB40, 0xD117FB40, + 0xD118FB40, 0xD119FB40, 0xD11AFB40, 0xD11BFB40, 0xD11CFB40, 0xD11DFB40, 0xD11EFB40, 0xD11FFB40, 0xD120FB40, 0xD121FB40, 0xD122FB40, 0xD123FB40, 0xD124FB40, 0xD125FB40, 0xD126FB40, + 0xD127FB40, 0xD128FB40, 0xD129FB40, 0xD12AFB40, 0xD12BFB40, 0xD12CFB40, 0xD12DFB40, 0xD12EFB40, 0xD12FFB40, 0xD130FB40, 0xD131FB40, 0xD132FB40, 0xD133FB40, 0xD134FB40, 0xD135FB40, + 0xD136FB40, 0xD137FB40, 0xD138FB40, 0xD139FB40, 0xD13AFB40, 0xD13BFB40, 0xD13CFB40, 0xD13DFB40, 0xD13EFB40, 0xD13FFB40, 0xD140FB40, 0xD141FB40, 0xD142FB40, 0xD143FB40, 0xD144FB40, + 0xD145FB40, 0xD146FB40, 0xD147FB40, 0xD148FB40, 0xD149FB40, 0xD14AFB40, 0xD14BFB40, 0xD14CFB40, 0xD14DFB40, 0xD14EFB40, 0xD14FFB40, 0xD150FB40, 0xD151FB40, 0xD152FB40, 0xD153FB40, + 0xD154FB40, 0xD155FB40, 0xD156FB40, 0xD157FB40, 0xD158FB40, 0xD159FB40, 0xD15AFB40, 0xD15BFB40, 0xD15CFB40, 0xD15DFB40, 0xD15EFB40, 0xD15FFB40, 0xD160FB40, 0xD161FB40, 0xD162FB40, + 0xD163FB40, 0xD164FB40, 0xD165FB40, 0xD166FB40, 0xD167FB40, 0xD168FB40, 0xD169FB40, 0xD16AFB40, 0xD16BFB40, 0xD16CFB40, 0xD16DFB40, 0xD16EFB40, 0xD16FFB40, 0xD170FB40, 0xD171FB40, + 0xD172FB40, 0xD173FB40, 0xD174FB40, 0xD175FB40, 0xD176FB40, 0xD177FB40, 0xD178FB40, 0xD179FB40, 0xD17AFB40, 0xD17BFB40, 0xD17CFB40, 0xD17DFB40, 0xD17EFB40, 0xD17FFB40, 0xD180FB40, + 0xD181FB40, 0xD182FB40, 0xD183FB40, 0xD184FB40, 0xD185FB40, 0xD186FB40, 0xD187FB40, 0xD188FB40, 0xD189FB40, 0xD18AFB40, 0xD18BFB40, 0xD18CFB40, 0xD18DFB40, 0xD18EFB40, 0xD18FFB40, + 0xD190FB40, 0xD191FB40, 0xD192FB40, 0xD193FB40, 0xD194FB40, 0xD195FB40, 0xD196FB40, 0xD197FB40, 0xD198FB40, 0xD199FB40, 0xD19AFB40, 0xD19BFB40, 0xD19CFB40, 0xD19DFB40, 0xD19EFB40, + 0xD19FFB40, 0xD1A0FB40, 0xD1A1FB40, 0xD1A2FB40, 0xD1A3FB40, 0xD1A4FB40, 0xD1A5FB40, 0xD1A6FB40, 0xD1A7FB40, 0xD1A8FB40, 0xD1A9FB40, 0xD1AAFB40, 0xD1ABFB40, 0xD1ACFB40, 0xD1ADFB40, + 0xD1AEFB40, 0xD1AFFB40, 0xD1B0FB40, 0xD1B1FB40, 0xD1B2FB40, 0xD1B3FB40, 0xD1B4FB40, 0xD1B5FB40, 0xD1B6FB40, 0xD1B7FB40, 0xD1B8FB40, 0xD1B9FB40, 0xD1BAFB40, 0xD1BBFB40, 0xD1BCFB40, + 0xD1BDFB40, 0xD1BEFB40, 0xD1BFFB40, 0xD1C0FB40, 0xD1C1FB40, 0xD1C2FB40, 0xD1C3FB40, 0xD1C4FB40, 0xD1C5FB40, 0xD1C6FB40, 0xD1C7FB40, 0xD1C8FB40, 0xD1C9FB40, 0xD1CAFB40, 0xD1CBFB40, + 0xD1CCFB40, 0xD1CDFB40, 0xD1CEFB40, 0xD1CFFB40, 0xD1D0FB40, 0xD1D1FB40, 0xD1D2FB40, 0xD1D3FB40, 0xD1D4FB40, 0xD1D5FB40, 0xD1D6FB40, 0xD1D7FB40, 0xD1D8FB40, 0xD1D9FB40, 0xD1DAFB40, + 0xD1DBFB40, 0xD1DCFB40, 0xD1DDFB40, 0xD1DEFB40, 0xD1DFFB40, 0xD1E0FB40, 0xD1E1FB40, 0xD1E2FB40, 0xD1E3FB40, 0xD1E4FB40, 0xD1E5FB40, 0xD1E6FB40, 0xD1E7FB40, 0xD1E8FB40, 0xD1E9FB40, + 0xD1EAFB40, 0xD1EBFB40, 0xD1ECFB40, 0xD1EDFB40, 0xD1EEFB40, 0xD1EFFB40, 0xD1F0FB40, 0xD1F1FB40, 0xD1F2FB40, 0xD1F3FB40, 0xD1F4FB40, 0xD1F5FB40, 0xD1F6FB40, 0xD1F7FB40, 0xD1F8FB40, + 0xD1F9FB40, 0xD1FAFB40, 0xD1FBFB40, 0xD1FCFB40, 0xD1FDFB40, 0xD1FEFB40, 0xD1FFFB40, 0xD200FB40, 0xD201FB40, 0xD202FB40, 0xD203FB40, 0xD204FB40, 0xD205FB40, 0xD206FB40, 0xD207FB40, + 0xD208FB40, 0xD209FB40, 0xD20AFB40, 0xD20BFB40, 0xD20CFB40, 0xD20DFB40, 0xD20EFB40, 0xD20FFB40, 0xD210FB40, 0xD211FB40, 0xD212FB40, 0xD213FB40, 0xD214FB40, 0xD215FB40, 0xD216FB40, + 0xD217FB40, 0xD218FB40, 0xD219FB40, 0xD21AFB40, 0xD21BFB40, 0xD21CFB40, 0xD21DFB40, 0xD21EFB40, 0xD21FFB40, 0xD220FB40, 0xD221FB40, 0xD222FB40, 0xD223FB40, 0xD224FB40, 0xD225FB40, + 0xD226FB40, 0xD227FB40, 0xD228FB40, 0xD229FB40, 0xD22AFB40, 0xD22BFB40, 0xD22CFB40, 0xD22DFB40, 0xD22EFB40, 0xD22FFB40, 0xD230FB40, 0xD231FB40, 0xD232FB40, 0xD233FB40, 0xD234FB40, + 0xD235FB40, 0xD236FB40, 0xD237FB40, 0xD238FB40, 0xD239FB40, 0xD23AFB40, 0xD23BFB40, 0xD23CFB40, 0xD23DFB40, 0xD23EFB40, 0xD23FFB40, 0xD240FB40, 0xD241FB40, 0xD242FB40, 0xD243FB40, + 0xD244FB40, 0xD245FB40, 0xD246FB40, 0xD247FB40, 0xD248FB40, 0xD249FB40, 0xD24AFB40, 0xD24BFB40, 0xD24CFB40, 0xD24DFB40, 0xD24EFB40, 0xD24FFB40, 0xD250FB40, 0xD251FB40, 0xD252FB40, + 0xD253FB40, 0xD254FB40, 0xD255FB40, 0xD256FB40, 0xD257FB40, 0xD258FB40, 0xD259FB40, 0xD25AFB40, 0xD25BFB40, 0xD25CFB40, 0xD25DFB40, 0xD25EFB40, 0xD25FFB40, 0xD260FB40, 0xD261FB40, + 0xD262FB40, 0xD263FB40, 0xD264FB40, 0xD265FB40, 0xD266FB40, 0xD267FB40, 0xD268FB40, 0xD269FB40, 0xD26AFB40, 0xD26BFB40, 0xD26CFB40, 0xD26DFB40, 0xD26EFB40, 0xD26FFB40, 0xD270FB40, + 0xD271FB40, 0xD272FB40, 0xD273FB40, 0xD274FB40, 0xD275FB40, 0xD276FB40, 0xD277FB40, 0xD278FB40, 0xD279FB40, 0xD27AFB40, 0xD27BFB40, 0xD27CFB40, 0xD27DFB40, 0xD27EFB40, 0xD27FFB40, + 0xD280FB40, 0xD281FB40, 0xD282FB40, 0xD283FB40, 0xD284FB40, 0xD285FB40, 0xD286FB40, 0xD287FB40, 0xD288FB40, 0xD289FB40, 0xD28AFB40, 0xD28BFB40, 0xD28CFB40, 0xD28DFB40, 0xD28EFB40, + 0xD28FFB40, 0xD290FB40, 0xD291FB40, 0xD292FB40, 0xD293FB40, 0xD294FB40, 0xD295FB40, 0xD296FB40, 0xD297FB40, 0xD298FB40, 0xD299FB40, 0xD29AFB40, 0xD29BFB40, 0xD29CFB40, 0xD29DFB40, + 0xD29EFB40, 0xD29FFB40, 0xD2A0FB40, 0xD2A1FB40, 0xD2A2FB40, 0xD2A3FB40, 0xD2A4FB40, 0xD2A5FB40, 0xD2A6FB40, 0xD2A7FB40, 0xD2A8FB40, 0xD2A9FB40, 0xD2AAFB40, 0xD2ABFB40, 0xD2ACFB40, + 0xD2ADFB40, 0xD2AEFB40, 0xD2AFFB40, 0xD2B0FB40, 0xD2B1FB40, 0xD2B2FB40, 0xD2B3FB40, 0xD2B4FB40, 0xD2B5FB40, 0xD2B6FB40, 0xD2B7FB40, 0xD2B8FB40, 0xD2B9FB40, 0xD2BAFB40, 0xD2BBFB40, + 0xD2BCFB40, 0xD2BDFB40, 0xD2BEFB40, 0xD2BFFB40, 0xD2C0FB40, 0xD2C1FB40, 0xD2C2FB40, 0xD2C3FB40, 0xD2C4FB40, 0xD2C5FB40, 0xD2C6FB40, 0xD2C7FB40, 0xD2C8FB40, 0xD2C9FB40, 0xD2CAFB40, + 0xD2CBFB40, 0xD2CCFB40, 0xD2CDFB40, 0xD2CEFB40, 0xD2CFFB40, 0xD2D0FB40, 0xD2D1FB40, 0xD2D2FB40, 0xD2D3FB40, 0xD2D4FB40, 0xD2D5FB40, 0xD2D6FB40, 0xD2D7FB40, 0xD2D8FB40, 0xD2D9FB40, + 0xD2DAFB40, 0xD2DBFB40, 0xD2DCFB40, 0xD2DDFB40, 0xD2DEFB40, 0xD2DFFB40, 0xD2E0FB40, 0xD2E1FB40, 0xD2E2FB40, 0xD2E3FB40, 0xD2E4FB40, 0xD2E5FB40, 0xD2E6FB40, 0xD2E7FB40, 0xD2E8FB40, + 0xD2E9FB40, 0xD2EAFB40, 0xD2EBFB40, 0xD2ECFB40, 0xD2EDFB40, 0xD2EEFB40, 0xD2EFFB40, 0xD2F0FB40, 0xD2F1FB40, 0xD2F2FB40, 0xD2F3FB40, 0xD2F4FB40, 0xD2F5FB40, 0xD2F6FB40, 0xD2F7FB40, + 0xD2F8FB40, 0xD2F9FB40, 0xD2FAFB40, 0xD2FBFB40, 0xD2FCFB40, 0xD2FDFB40, 0xD2FEFB40, 0xD2FFFB40, 0xD300FB40, 0xD301FB40, 0xD302FB40, 0xD303FB40, 0xD304FB40, 0xD305FB40, 0xD306FB40, + 0xD307FB40, 0xD308FB40, 0xD309FB40, 0xD30AFB40, 0xD30BFB40, 0xD30CFB40, 0xD30DFB40, 0xD30EFB40, 0xD30FFB40, 0xD310FB40, 0xD311FB40, 0xD312FB40, 0xD313FB40, 0xD314FB40, 0xD315FB40, + 0xD316FB40, 0xD317FB40, 0xD318FB40, 0xD319FB40, 0xD31AFB40, 0xD31BFB40, 0xD31CFB40, 0xD31DFB40, 0xD31EFB40, 0xD31FFB40, 0xD320FB40, 0xD321FB40, 0xD322FB40, 0xD323FB40, 0xD324FB40, + 0xD325FB40, 0xD326FB40, 0xD327FB40, 0xD328FB40, 0xD329FB40, 0xD32AFB40, 0xD32BFB40, 0xD32CFB40, 0xD32DFB40, 0xD32EFB40, 0xD32FFB40, 0xD330FB40, 0xD331FB40, 0xD332FB40, 0xD333FB40, + 0xD334FB40, 0xD335FB40, 0xD336FB40, 0xD337FB40, 0xD338FB40, 0xD339FB40, 0xD33AFB40, 0xD33BFB40, 0xD33CFB40, 0xD33DFB40, 0xD33EFB40, 0xD33FFB40, 0xD340FB40, 0xD341FB40, 0xD342FB40, + 0xD343FB40, 0xD344FB40, 0xD345FB40, 0xD346FB40, 0xD347FB40, 0xD348FB40, 0xD349FB40, 0xD34AFB40, 0xD34BFB40, 0xD34CFB40, 0xD34DFB40, 0xD34EFB40, 0xD34FFB40, 0xD350FB40, 0xD351FB40, + 0xD352FB40, 0xD353FB40, 0xD354FB40, 0xD355FB40, 0xD356FB40, 0xD357FB40, 0xD358FB40, 0xD359FB40, 0xD35AFB40, 0xD35BFB40, 0xD35CFB40, 0xD35DFB40, 0xD35EFB40, 0xD35FFB40, 0xD360FB40, + 0xD361FB40, 0xD362FB40, 0xD363FB40, 0xD364FB40, 0xD365FB40, 0xD366FB40, 0xD367FB40, 0xD368FB40, 0xD369FB40, 0xD36AFB40, 0xD36BFB40, 0xD36CFB40, 0xD36DFB40, 0xD36EFB40, 0xD36FFB40, + 0xD370FB40, 0xD371FB40, 0xD372FB40, 0xD373FB40, 0xD374FB40, 0xD375FB40, 0xD376FB40, 0xD377FB40, 0xD378FB40, 0xD379FB40, 0xD37AFB40, 0xD37BFB40, 0xD37CFB40, 0xD37DFB40, 0xD37EFB40, + 0xD37FFB40, 0xD380FB40, 0xD381FB40, 0xD382FB40, 0xD383FB40, 0xD384FB40, 0xD385FB40, 0xD386FB40, 0xD387FB40, 0xD388FB40, 0xD389FB40, 0xD38AFB40, 0xD38BFB40, 0xD38CFB40, 0xD38DFB40, + 0xD38EFB40, 0xD38FFB40, 0xD390FB40, 0xD391FB40, 0xD392FB40, 0xD393FB40, 0xD394FB40, 0xD395FB40, 0xD396FB40, 0xD397FB40, 0xD398FB40, 0xD399FB40, 0xD39AFB40, 0xD39BFB40, 0xD39CFB40, + 0xD39DFB40, 0xD39EFB40, 0xD39FFB40, 0xD3A0FB40, 0xD3A1FB40, 0xD3A2FB40, 0xD3A3FB40, 0xD3A4FB40, 0xD3A5FB40, 0xD3A6FB40, 0xD3A7FB40, 0xD3A8FB40, 0xD3A9FB40, 0xD3AAFB40, 0xD3ABFB40, + 0xD3ACFB40, 0xD3ADFB40, 0xD3AEFB40, 0xD3AFFB40, 0xD3B0FB40, 0xD3B1FB40, 0xD3B2FB40, 0xD3B3FB40, 0xD3B4FB40, 0xD3B5FB40, 0xD3B6FB40, 0xD3B7FB40, 0xD3B8FB40, 0xD3B9FB40, 0xD3BAFB40, + 0xD3BBFB40, 0xD3BCFB40, 0xD3BDFB40, 0xD3BEFB40, 0xD3BFFB40, 0xD3C0FB40, 0xD3C1FB40, 0xD3C2FB40, 0xD3C3FB40, 0xD3C4FB40, 0xD3C5FB40, 0xD3C6FB40, 0xD3C7FB40, 0xD3C8FB40, 0xD3C9FB40, + 0xD3CAFB40, 0xD3CBFB40, 0xD3CCFB40, 0xD3CDFB40, 0xD3CEFB40, 0xD3CFFB40, 0xD3D0FB40, 0xD3D1FB40, 0xD3D2FB40, 0xD3D3FB40, 0xD3D4FB40, 0xD3D5FB40, 0xD3D6FB40, 0xD3D7FB40, 0xD3D8FB40, + 0xD3D9FB40, 0xD3DAFB40, 0xD3DBFB40, 0xD3DCFB40, 0xD3DDFB40, 0xD3DEFB40, 0xD3DFFB40, 0xD3E0FB40, 0xD3E1FB40, 0xD3E2FB40, 0xD3E3FB40, 0xD3E4FB40, 0xD3E5FB40, 0xD3E6FB40, 0xD3E7FB40, + 0xD3E8FB40, 0xD3E9FB40, 0xD3EAFB40, 0xD3EBFB40, 0xD3ECFB40, 0xD3EDFB40, 0xD3EEFB40, 0xD3EFFB40, 0xD3F0FB40, 0xD3F1FB40, 0xD3F2FB40, 0xD3F3FB40, 0xD3F4FB40, 0xD3F5FB40, 0xD3F6FB40, + 0xD3F7FB40, 0xD3F8FB40, 0xD3F9FB40, 0xD3FAFB40, 0xD3FBFB40, 0xD3FCFB40, 0xD3FDFB40, 0xD3FEFB40, 0xD3FFFB40, 0xD400FB40, 0xD401FB40, 0xD402FB40, 0xD403FB40, 0xD404FB40, 0xD405FB40, + 0xD406FB40, 0xD407FB40, 0xD408FB40, 0xD409FB40, 0xD40AFB40, 0xD40BFB40, 0xD40CFB40, 0xD40DFB40, 0xD40EFB40, 0xD40FFB40, 0xD410FB40, 0xD411FB40, 0xD412FB40, 0xD413FB40, 0xD414FB40, + 0xD415FB40, 0xD416FB40, 0xD417FB40, 0xD418FB40, 0xD419FB40, 0xD41AFB40, 0xD41BFB40, 0xD41CFB40, 0xD41DFB40, 0xD41EFB40, 0xD41FFB40, 0xD420FB40, 0xD421FB40, 0xD422FB40, 0xD423FB40, + 0xD424FB40, 0xD425FB40, 0xD426FB40, 0xD427FB40, 0xD428FB40, 0xD429FB40, 0xD42AFB40, 0xD42BFB40, 0xD42CFB40, 0xD42DFB40, 0xD42EFB40, 0xD42FFB40, 0xD430FB40, 0xD431FB40, 0xD432FB40, + 0xD433FB40, 0xD434FB40, 0xD435FB40, 0xD436FB40, 0xD437FB40, 0xD438FB40, 0xD439FB40, 0xD43AFB40, 0xD43BFB40, 0xD43CFB40, 0xD43DFB40, 0xD43EFB40, 0xD43FFB40, 0xD440FB40, 0xD441FB40, + 0xD442FB40, 0xD443FB40, 0xD444FB40, 0xD445FB40, 0xD446FB40, 0xD447FB40, 0xD448FB40, 0xD449FB40, 0xD44AFB40, 0xD44BFB40, 0xD44CFB40, 0xD44DFB40, 0xD44EFB40, 0xD44FFB40, 0xD450FB40, + 0xD451FB40, 0xD452FB40, 0xD453FB40, 0xD454FB40, 0xD455FB40, 0xD456FB40, 0xD457FB40, 0xD458FB40, 0xD459FB40, 0xD45AFB40, 0xD45BFB40, 0xD45CFB40, 0xD45DFB40, 0xD45EFB40, 0xD45FFB40, + 0xD460FB40, 0xD461FB40, 0xD462FB40, 0xD463FB40, 0xD464FB40, 0xD465FB40, 0xD466FB40, 0xD467FB40, 0xD468FB40, 0xD469FB40, 0xD46AFB40, 0xD46BFB40, 0xD46CFB40, 0xD46DFB40, 0xD46EFB40, + 0xD46FFB40, 0xD470FB40, 0xD471FB40, 0xD472FB40, 0xD473FB40, 0xD474FB40, 0xD475FB40, 0xD476FB40, 0xD477FB40, 0xD478FB40, 0xD479FB40, 0xD47AFB40, 0xD47BFB40, 0xD47CFB40, 0xD47DFB40, + 0xD47EFB40, 0xD47FFB40, 0xD480FB40, 0xD481FB40, 0xD482FB40, 0xD483FB40, 0xD484FB40, 0xD485FB40, 0xD486FB40, 0xD487FB40, 0xD488FB40, 0xD489FB40, 0xD48AFB40, 0xD48BFB40, 0xD48CFB40, + 0xD48DFB40, 0xD48EFB40, 0xD48FFB40, 0xD490FB40, 0xD491FB40, 0xD492FB40, 0xD493FB40, 0xD494FB40, 0xD495FB40, 0xD496FB40, 0xD497FB40, 0xD498FB40, 0xD499FB40, 0xD49AFB40, 0xD49BFB40, + 0xD49CFB40, 0xD49DFB40, 0xD49EFB40, 0xD49FFB40, 0xD4A0FB40, 0xD4A1FB40, 0xD4A2FB40, 0xD4A3FB40, 0xD4A4FB40, 0xD4A5FB40, 0xD4A6FB40, 0xD4A7FB40, 0xD4A8FB40, 0xD4A9FB40, 0xD4AAFB40, + 0xD4ABFB40, 0xD4ACFB40, 0xD4ADFB40, 0xD4AEFB40, 0xD4AFFB40, 0xD4B0FB40, 0xD4B1FB40, 0xD4B2FB40, 0xD4B3FB40, 0xD4B4FB40, 0xD4B5FB40, 0xD4B6FB40, 0xD4B7FB40, 0xD4B8FB40, 0xD4B9FB40, + 0xD4BAFB40, 0xD4BBFB40, 0xD4BCFB40, 0xD4BDFB40, 0xD4BEFB40, 0xD4BFFB40, 0xD4C0FB40, 0xD4C1FB40, 0xD4C2FB40, 0xD4C3FB40, 0xD4C4FB40, 0xD4C5FB40, 0xD4C6FB40, 0xD4C7FB40, 0xD4C8FB40, + 0xD4C9FB40, 0xD4CAFB40, 0xD4CBFB40, 0xD4CCFB40, 0xD4CDFB40, 0xD4CEFB40, 0xD4CFFB40, 0xD4D0FB40, 0xD4D1FB40, 0xD4D2FB40, 0xD4D3FB40, 0xD4D4FB40, 0xD4D5FB40, 0xD4D6FB40, 0xD4D7FB40, + 0xD4D8FB40, 0xD4D9FB40, 0xD4DAFB40, 0xD4DBFB40, 0xD4DCFB40, 0xD4DDFB40, 0xD4DEFB40, 0xD4DFFB40, 0xD4E0FB40, 0xD4E1FB40, 0xD4E2FB40, 0xD4E3FB40, 0xD4E4FB40, 0xD4E5FB40, 0xD4E6FB40, + 0xD4E7FB40, 0xD4E8FB40, 0xD4E9FB40, 0xD4EAFB40, 0xD4EBFB40, 0xD4ECFB40, 0xD4EDFB40, 0xD4EEFB40, 0xD4EFFB40, 0xD4F0FB40, 0xD4F1FB40, 0xD4F2FB40, 0xD4F3FB40, 0xD4F4FB40, 0xD4F5FB40, + 0xD4F6FB40, 0xD4F7FB40, 0xD4F8FB40, 0xD4F9FB40, 0xD4FAFB40, 0xD4FBFB40, 0xD4FCFB40, 0xD4FDFB40, 0xD4FEFB40, 0xD4FFFB40, 0xD500FB40, 0xD501FB40, 0xD502FB40, 0xD503FB40, 0xD504FB40, + 0xD505FB40, 0xD506FB40, 0xD507FB40, 0xD508FB40, 0xD509FB40, 0xD50AFB40, 0xD50BFB40, 0xD50CFB40, 0xD50DFB40, 0xD50EFB40, 0xD50FFB40, 0xD510FB40, 0xD511FB40, 0xD512FB40, 0xD513FB40, + 0xD514FB40, 0xD515FB40, 0xD516FB40, 0xD517FB40, 0xD518FB40, 0xD519FB40, 0xD51AFB40, 0xD51BFB40, 0xD51CFB40, 0xD51DFB40, 0xD51EFB40, 0xD51FFB40, 0xD520FB40, 0xD521FB40, 0xD522FB40, + 0xD523FB40, 0xD524FB40, 0xD525FB40, 0xD526FB40, 0xD527FB40, 0xD528FB40, 0xD529FB40, 0xD52AFB40, 0xD52BFB40, 0xD52CFB40, 0xD52DFB40, 0xD52EFB40, 0xD52FFB40, 0xD530FB40, 0xD531FB40, + 0xD532FB40, 0xD533FB40, 0xD534FB40, 0xD535FB40, 0xD536FB40, 0xD537FB40, 0xD538FB40, 0xD539FB40, 0xD53AFB40, 0xD53BFB40, 0xD53CFB40, 0xD53DFB40, 0xD53EFB40, 0xD53FFB40, 0xD540FB40, + 0xD541FB40, 0xD542FB40, 0xD543FB40, 0xD544FB40, 0xD545FB40, 0xD546FB40, 0xD547FB40, 0xD548FB40, 0xD549FB40, 0xD54AFB40, 0xD54BFB40, 0xD54CFB40, 0xD54DFB40, 0xD54EFB40, 0xD54FFB40, + 0xD550FB40, 0xD551FB40, 0xD552FB40, 0xD553FB40, 0xD554FB40, 0xD555FB40, 0xD556FB40, 0xD557FB40, 0xD558FB40, 0xD559FB40, 0xD55AFB40, 0xD55BFB40, 0xD55CFB40, 0xD55DFB40, 0xD55EFB40, + 0xD55FFB40, 0xD560FB40, 0xD561FB40, 0xD562FB40, 0xD563FB40, 0xD564FB40, 0xD565FB40, 0xD566FB40, 0xD567FB40, 0xD568FB40, 0xD569FB40, 0xD56AFB40, 0xD56BFB40, 0xD56CFB40, 0xD56DFB40, + 0xD56EFB40, 0xD56FFB40, 0xD570FB40, 0xD571FB40, 0xD572FB40, 0xD573FB40, 0xD574FB40, 0xD575FB40, 0xD576FB40, 0xD577FB40, 0xD578FB40, 0xD579FB40, 0xD57AFB40, 0xD57BFB40, 0xD57CFB40, + 0xD57DFB40, 0xD57EFB40, 0xD57FFB40, 0xD580FB40, 0xD581FB40, 0xD582FB40, 0xD583FB40, 0xD584FB40, 0xD585FB40, 0xD586FB40, 0xD587FB40, 0xD588FB40, 0xD589FB40, 0xD58AFB40, 0xD58BFB40, + 0xD58CFB40, 0xD58DFB40, 0xD58EFB40, 0xD58FFB40, 0xD590FB40, 0xD591FB40, 0xD592FB40, 0xD593FB40, 0xD594FB40, 0xD595FB40, 0xD596FB40, 0xD597FB40, 0xD598FB40, 0xD599FB40, 0xD59AFB40, + 0xD59BFB40, 0xD59CFB40, 0xD59DFB40, 0xD59EFB40, 0xD59FFB40, 0xD5A0FB40, 0xD5A1FB40, 0xD5A2FB40, 0xD5A3FB40, 0xD5A4FB40, 0xD5A5FB40, 0xD5A6FB40, 0xD5A7FB40, 0xD5A8FB40, 0xD5A9FB40, + 0xD5AAFB40, 0xD5ABFB40, 0xD5ACFB40, 0xD5ADFB40, 0xD5AEFB40, 0xD5AFFB40, 0xD5B0FB40, 0xD5B1FB40, 0xD5B2FB40, 0xD5B3FB40, 0xD5B4FB40, 0xD5B5FB40, 0xD5B6FB40, 0xD5B7FB40, 0xD5B8FB40, + 0xD5B9FB40, 0xD5BAFB40, 0xD5BBFB40, 0xD5BCFB40, 0xD5BDFB40, 0xD5BEFB40, 0xD5BFFB40, 0xD5C0FB40, 0xD5C1FB40, 0xD5C2FB40, 0xD5C3FB40, 0xD5C4FB40, 0xD5C5FB40, 0xD5C6FB40, 0xD5C7FB40, + 0xD5C8FB40, 0xD5C9FB40, 0xD5CAFB40, 0xD5CBFB40, 0xD5CCFB40, 0xD5CDFB40, 0xD5CEFB40, 0xD5CFFB40, 0xD5D0FB40, 0xD5D1FB40, 0xD5D2FB40, 0xD5D3FB40, 0xD5D4FB40, 0xD5D5FB40, 0xD5D6FB40, + 0xD5D7FB40, 0xD5D8FB40, 0xD5D9FB40, 0xD5DAFB40, 0xD5DBFB40, 0xD5DCFB40, 0xD5DDFB40, 0xD5DEFB40, 0xD5DFFB40, 0xD5E0FB40, 0xD5E1FB40, 0xD5E2FB40, 0xD5E3FB40, 0xD5E4FB40, 0xD5E5FB40, + 0xD5E6FB40, 0xD5E7FB40, 0xD5E8FB40, 0xD5E9FB40, 0xD5EAFB40, 0xD5EBFB40, 0xD5ECFB40, 0xD5EDFB40, 0xD5EEFB40, 0xD5EFFB40, 0xD5F0FB40, 0xD5F1FB40, 0xD5F2FB40, 0xD5F3FB40, 0xD5F4FB40, + 0xD5F5FB40, 0xD5F6FB40, 0xD5F7FB40, 0xD5F8FB40, 0xD5F9FB40, 0xD5FAFB40, 0xD5FBFB40, 0xD5FCFB40, 0xD5FDFB40, 0xD5FEFB40, 0xD5FFFB40, 0xD600FB40, 0xD601FB40, 0xD602FB40, 0xD603FB40, + 0xD604FB40, 0xD605FB40, 0xD606FB40, 0xD607FB40, 0xD608FB40, 0xD609FB40, 0xD60AFB40, 0xD60BFB40, 0xD60CFB40, 0xD60DFB40, 0xD60EFB40, 0xD60FFB40, 0xD610FB40, 0xD611FB40, 0xD612FB40, + 0xD613FB40, 0xD614FB40, 0xD615FB40, 0xD616FB40, 0xD617FB40, 0xD618FB40, 0xD619FB40, 0xD61AFB40, 0xD61BFB40, 0xD61CFB40, 0xD61DFB40, 0xD61EFB40, 0xD61FFB40, 0xD620FB40, 0xD621FB40, + 0xD622FB40, 0xD623FB40, 0xD624FB40, 0xD625FB40, 0xD626FB40, 0xD627FB40, 0xD628FB40, 0xD629FB40, 0xD62AFB40, 0xD62BFB40, 0xD62CFB40, 0xD62DFB40, 0xD62EFB40, 0xD62FFB40, 0xD630FB40, + 0xD631FB40, 0xD632FB40, 0xD633FB40, 0xD634FB40, 0xD635FB40, 0xD636FB40, 0xD637FB40, 0xD638FB40, 0xD639FB40, 0xD63AFB40, 0xD63BFB40, 0xD63CFB40, 0xD63DFB40, 0xD63EFB40, 0xD63FFB40, + 0xD640FB40, 0xD641FB40, 0xD642FB40, 0xD643FB40, 0xD644FB40, 0xD645FB40, 0xD646FB40, 0xD647FB40, 0xD648FB40, 0xD649FB40, 0xD64AFB40, 0xD64BFB40, 0xD64CFB40, 0xD64DFB40, 0xD64EFB40, + 0xD64FFB40, 0xD650FB40, 0xD651FB40, 0xD652FB40, 0xD653FB40, 0xD654FB40, 0xD655FB40, 0xD656FB40, 0xD657FB40, 0xD658FB40, 0xD659FB40, 0xD65AFB40, 0xD65BFB40, 0xD65CFB40, 0xD65DFB40, + 0xD65EFB40, 0xD65FFB40, 0xD660FB40, 0xD661FB40, 0xD662FB40, 0xD663FB40, 0xD664FB40, 0xD665FB40, 0xD666FB40, 0xD667FB40, 0xD668FB40, 0xD669FB40, 0xD66AFB40, 0xD66BFB40, 0xD66CFB40, + 0xD66DFB40, 0xD66EFB40, 0xD66FFB40, 0xD670FB40, 0xD671FB40, 0xD672FB40, 0xD673FB40, 0xD674FB40, 0xD675FB40, 0xD676FB40, 0xD677FB40, 0xD678FB40, 0xD679FB40, 0xD67AFB40, 0xD67BFB40, + 0xD67CFB40, 0xD67DFB40, 0xD67EFB40, 0xD67FFB40, 0xD680FB40, 0xD681FB40, 0xD682FB40, 0xD683FB40, 0xD684FB40, 0xD685FB40, 0xD686FB40, 0xD687FB40, 0xD688FB40, 0xD689FB40, 0xD68AFB40, + 0xD68BFB40, 0xD68CFB40, 0xD68DFB40, 0xD68EFB40, 0xD68FFB40, 0xD690FB40, 0xD691FB40, 0xD692FB40, 0xD693FB40, 0xD694FB40, 0xD695FB40, 0xD696FB40, 0xD697FB40, 0xD698FB40, 0xD699FB40, + 0xD69AFB40, 0xD69BFB40, 0xD69CFB40, 0xD69DFB40, 0xD69EFB40, 0xD69FFB40, 0xD6A0FB40, 0xD6A1FB40, 0xD6A2FB40, 0xD6A3FB40, 0xD6A4FB40, 0xD6A5FB40, 0xD6A6FB40, 0xD6A7FB40, 0xD6A8FB40, + 0xD6A9FB40, 0xD6AAFB40, 0xD6ABFB40, 0xD6ACFB40, 0xD6ADFB40, 0xD6AEFB40, 0xD6AFFB40, 0xD6B0FB40, 0xD6B1FB40, 0xD6B2FB40, 0xD6B3FB40, 0xD6B4FB40, 0xD6B5FB40, 0xD6B6FB40, 0xD6B7FB40, + 0xD6B8FB40, 0xD6B9FB40, 0xD6BAFB40, 0xD6BBFB40, 0xD6BCFB40, 0xD6BDFB40, 0xD6BEFB40, 0xD6BFFB40, 0xD6C0FB40, 0xD6C1FB40, 0xD6C2FB40, 0xD6C3FB40, 0xD6C4FB40, 0xD6C5FB40, 0xD6C6FB40, + 0xD6C7FB40, 0xD6C8FB40, 0xD6C9FB40, 0xD6CAFB40, 0xD6CBFB40, 0xD6CCFB40, 0xD6CDFB40, 0xD6CEFB40, 0xD6CFFB40, 0xD6D0FB40, 0xD6D1FB40, 0xD6D2FB40, 0xD6D3FB40, 0xD6D4FB40, 0xD6D5FB40, + 0xD6D6FB40, 0xD6D7FB40, 0xD6D8FB40, 0xD6D9FB40, 0xD6DAFB40, 0xD6DBFB40, 0xD6DCFB40, 0xD6DDFB40, 0xD6DEFB40, 0xD6DFFB40, 0xD6E0FB40, 0xD6E1FB40, 0xD6E2FB40, 0xD6E3FB40, 0xD6E4FB40, + 0xD6E5FB40, 0xD6E6FB40, 0xD6E7FB40, 0xD6E8FB40, 0xD6E9FB40, 0xD6EAFB40, 0xD6EBFB40, 0xD6ECFB40, 0xD6EDFB40, 0xD6EEFB40, 0xD6EFFB40, 0xD6F0FB40, 0xD6F1FB40, 0xD6F2FB40, 0xD6F3FB40, + 0xD6F4FB40, 0xD6F5FB40, 0xD6F6FB40, 0xD6F7FB40, 0xD6F8FB40, 0xD6F9FB40, 0xD6FAFB40, 0xD6FBFB40, 0xD6FCFB40, 0xD6FDFB40, 0xD6FEFB40, 0xD6FFFB40, 0xD700FB40, 0xD701FB40, 0xD702FB40, + 0xD703FB40, 0xD704FB40, 0xD705FB40, 0xD706FB40, 0xD707FB40, 0xD708FB40, 0xD709FB40, 0xD70AFB40, 0xD70BFB40, 0xD70CFB40, 0xD70DFB40, 0xD70EFB40, 0xD70FFB40, 0xD710FB40, 0xD711FB40, + 0xD712FB40, 0xD713FB40, 0xD714FB40, 0xD715FB40, 0xD716FB40, 0xD717FB40, 0xD718FB40, 0xD719FB40, 0xD71AFB40, 0xD71BFB40, 0xD71CFB40, 0xD71DFB40, 0xD71EFB40, 0xD71FFB40, 0xD720FB40, + 0xD721FB40, 0xD722FB40, 0xD723FB40, 0xD724FB40, 0xD725FB40, 0xD726FB40, 0xD727FB40, 0xD728FB40, 0xD729FB40, 0xD72AFB40, 0xD72BFB40, 0xD72CFB40, 0xD72DFB40, 0xD72EFB40, 0xD72FFB40, + 0xD730FB40, 0xD731FB40, 0xD732FB40, 0xD733FB40, 0xD734FB40, 0xD735FB40, 0xD736FB40, 0xD737FB40, 0xD738FB40, 0xD739FB40, 0xD73AFB40, 0xD73BFB40, 0xD73CFB40, 0xD73DFB40, 0xD73EFB40, + 0xD73FFB40, 0xD740FB40, 0xD741FB40, 0xD742FB40, 0xD743FB40, 0xD744FB40, 0xD745FB40, 0xD746FB40, 0xD747FB40, 0xD748FB40, 0xD749FB40, 0xD74AFB40, 0xD74BFB40, 0xD74CFB40, 0xD74DFB40, + 0xD74EFB40, 0xD74FFB40, 0xD750FB40, 0xD751FB40, 0xD752FB40, 0xD753FB40, 0xD754FB40, 0xD755FB40, 0xD756FB40, 0xD757FB40, 0xD758FB40, 0xD759FB40, 0xD75AFB40, 0xD75BFB40, 0xD75CFB40, + 0xD75DFB40, 0xD75EFB40, 0xD75FFB40, 0xD760FB40, 0xD761FB40, 0xD762FB40, 0xD763FB40, 0xD764FB40, 0xD765FB40, 0xD766FB40, 0xD767FB40, 0xD768FB40, 0xD769FB40, 0xD76AFB40, 0xD76BFB40, + 0xD76CFB40, 0xD76DFB40, 0xD76EFB40, 0xD76FFB40, 0xD770FB40, 0xD771FB40, 0xD772FB40, 0xD773FB40, 0xD774FB40, 0xD775FB40, 0xD776FB40, 0xD777FB40, 0xD778FB40, 0xD779FB40, 0xD77AFB40, + 0xD77BFB40, 0xD77CFB40, 0xD77DFB40, 0xD77EFB40, 0xD77FFB40, 0xD780FB40, 0xD781FB40, 0xD782FB40, 0xD783FB40, 0xD784FB40, 0xD785FB40, 0xD786FB40, 0xD787FB40, 0xD788FB40, 0xD789FB40, + 0xD78AFB40, 0xD78BFB40, 0xD78CFB40, 0xD78DFB40, 0xD78EFB40, 0xD78FFB40, 0xD790FB40, 0xD791FB40, 0xD792FB40, 0xD793FB40, 0xD794FB40, 0xD795FB40, 0xD796FB40, 0xD797FB40, 0xD798FB40, + 0xD799FB40, 0xD79AFB40, 0xD79BFB40, 0xD79CFB40, 0xD79DFB40, 0xD79EFB40, 0xD79FFB40, 0xD7A0FB40, 0xD7A1FB40, 0xD7A2FB40, 0xD7A3FB40, 0xD7A4FB40, 0xD7A5FB40, 0xD7A6FB40, 0xD7A7FB40, + 0xD7A8FB40, 0xD7A9FB40, 0xD7AAFB40, 0xD7ABFB40, 0xD7ACFB40, 0xD7ADFB40, 0xD7AEFB40, 0xD7AFFB40, 0xD7B0FB40, 0xD7B1FB40, 0xD7B2FB40, 0xD7B3FB40, 0xD7B4FB40, 0xD7B5FB40, 0xD7B6FB40, + 0xD7B7FB40, 0xD7B8FB40, 0xD7B9FB40, 0xD7BAFB40, 0xD7BBFB40, 0xD7BCFB40, 0xD7BDFB40, 0xD7BEFB40, 0xD7BFFB40, 0xD7C0FB40, 0xD7C1FB40, 0xD7C2FB40, 0xD7C3FB40, 0xD7C4FB40, 0xD7C5FB40, + 0xD7C6FB40, 0xD7C7FB40, 0xD7C8FB40, 0xD7C9FB40, 0xD7CAFB40, 0xD7CBFB40, 0xD7CCFB40, 0xD7CDFB40, 0xD7CEFB40, 0xD7CFFB40, 0xD7D0FB40, 0xD7D1FB40, 0xD7D2FB40, 0xD7D3FB40, 0xD7D4FB40, + 0xD7D5FB40, 0xD7D6FB40, 0xD7D7FB40, 0xD7D8FB40, 0xD7D9FB40, 0xD7DAFB40, 0xD7DBFB40, 0xD7DCFB40, 0xD7DDFB40, 0xD7DEFB40, 0xD7DFFB40, 0xD7E0FB40, 0xD7E1FB40, 0xD7E2FB40, 0xD7E3FB40, + 0xD7E4FB40, 0xD7E5FB40, 0xD7E6FB40, 0xD7E7FB40, 0xD7E8FB40, 0xD7E9FB40, 0xD7EAFB40, 0xD7EBFB40, 0xD7ECFB40, 0xD7EDFB40, 0xD7EEFB40, 0xD7EFFB40, 0xD7F0FB40, 0xD7F1FB40, 0xD7F2FB40, + 0xD7F3FB40, 0xD7F4FB40, 0xD7F5FB40, 0xD7F6FB40, 0xD7F7FB40, 0xD7F8FB40, 0xD7F9FB40, 0xD7FAFB40, 0xD7FBFB40, 0xD7FCFB40, 0xD7FDFB40, 0xD7FEFB40, 0xD7FFFB40, 0xD800FB40, 0xD801FB40, + 0xD802FB40, 0xD803FB40, 0xD804FB40, 0xD805FB40, 0xD806FB40, 0xD807FB40, 0xD808FB40, 0xD809FB40, 0xD80AFB40, 0xD80BFB40, 0xD80CFB40, 0xD80DFB40, 0xD80EFB40, 0xD80FFB40, 0xD810FB40, + 0xD811FB40, 0xD812FB40, 0xD813FB40, 0xD814FB40, 0xD815FB40, 0xD816FB40, 0xD817FB40, 0xD818FB40, 0xD819FB40, 0xD81AFB40, 0xD81BFB40, 0xD81CFB40, 0xD81DFB40, 0xD81EFB40, 0xD81FFB40, + 0xD820FB40, 0xD821FB40, 0xD822FB40, 0xD823FB40, 0xD824FB40, 0xD825FB40, 0xD826FB40, 0xD827FB40, 0xD828FB40, 0xD829FB40, 0xD82AFB40, 0xD82BFB40, 0xD82CFB40, 0xD82DFB40, 0xD82EFB40, + 0xD82FFB40, 0xD830FB40, 0xD831FB40, 0xD832FB40, 0xD833FB40, 0xD834FB40, 0xD835FB40, 0xD836FB40, 0xD837FB40, 0xD838FB40, 0xD839FB40, 0xD83AFB40, 0xD83BFB40, 0xD83CFB40, 0xD83DFB40, + 0xD83EFB40, 0xD83FFB40, 0xD840FB40, 0xD841FB40, 0xD842FB40, 0xD843FB40, 0xD844FB40, 0xD845FB40, 0xD846FB40, 0xD847FB40, 0xD848FB40, 0xD849FB40, 0xD84AFB40, 0xD84BFB40, 0xD84CFB40, + 0xD84DFB40, 0xD84EFB40, 0xD84FFB40, 0xD850FB40, 0xD851FB40, 0xD852FB40, 0xD853FB40, 0xD854FB40, 0xD855FB40, 0xD856FB40, 0xD857FB40, 0xD858FB40, 0xD859FB40, 0xD85AFB40, 0xD85BFB40, + 0xD85CFB40, 0xD85DFB40, 0xD85EFB40, 0xD85FFB40, 0xD860FB40, 0xD861FB40, 0xD862FB40, 0xD863FB40, 0xD864FB40, 0xD865FB40, 0xD866FB40, 0xD867FB40, 0xD868FB40, 0xD869FB40, 0xD86AFB40, + 0xD86BFB40, 0xD86CFB40, 0xD86DFB40, 0xD86EFB40, 0xD86FFB40, 0xD870FB40, 0xD871FB40, 0xD872FB40, 0xD873FB40, 0xD874FB40, 0xD875FB40, 0xD876FB40, 0xD877FB40, 0xD878FB40, 0xD879FB40, + 0xD87AFB40, 0xD87BFB40, 0xD87CFB40, 0xD87DFB40, 0xD87EFB40, 0xD87FFB40, 0xD880FB40, 0xD881FB40, 0xD882FB40, 0xD883FB40, 0xD884FB40, 0xD885FB40, 0xD886FB40, 0xD887FB40, 0xD888FB40, + 0xD889FB40, 0xD88AFB40, 0xD88BFB40, 0xD88CFB40, 0xD88DFB40, 0xD88EFB40, 0xD88FFB40, 0xD890FB40, 0xD891FB40, 0xD892FB40, 0xD893FB40, 0xD894FB40, 0xD895FB40, 0xD896FB40, 0xD897FB40, + 0xD898FB40, 0xD899FB40, 0xD89AFB40, 0xD89BFB40, 0xD89CFB40, 0xD89DFB40, 0xD89EFB40, 0xD89FFB40, 0xD8A0FB40, 0xD8A1FB40, 0xD8A2FB40, 0xD8A3FB40, 0xD8A4FB40, 0xD8A5FB40, 0xD8A6FB40, + 0xD8A7FB40, 0xD8A8FB40, 0xD8A9FB40, 0xD8AAFB40, 0xD8ABFB40, 0xD8ACFB40, 0xD8ADFB40, 0xD8AEFB40, 0xD8AFFB40, 0xD8B0FB40, 0xD8B1FB40, 0xD8B2FB40, 0xD8B3FB40, 0xD8B4FB40, 0xD8B5FB40, + 0xD8B6FB40, 0xD8B7FB40, 0xD8B8FB40, 0xD8B9FB40, 0xD8BAFB40, 0xD8BBFB40, 0xD8BCFB40, 0xD8BDFB40, 0xD8BEFB40, 0xD8BFFB40, 0xD8C0FB40, 0xD8C1FB40, 0xD8C2FB40, 0xD8C3FB40, 0xD8C4FB40, + 0xD8C5FB40, 0xD8C6FB40, 0xD8C7FB40, 0xD8C8FB40, 0xD8C9FB40, 0xD8CAFB40, 0xD8CBFB40, 0xD8CCFB40, 0xD8CDFB40, 0xD8CEFB40, 0xD8CFFB40, 0xD8D0FB40, 0xD8D1FB40, 0xD8D2FB40, 0xD8D3FB40, + 0xD8D4FB40, 0xD8D5FB40, 0xD8D6FB40, 0xD8D7FB40, 0xD8D8FB40, 0xD8D9FB40, 0xD8DAFB40, 0xD8DBFB40, 0xD8DCFB40, 0xD8DDFB40, 0xD8DEFB40, 0xD8DFFB40, 0xD8E0FB40, 0xD8E1FB40, 0xD8E2FB40, + 0xD8E3FB40, 0xD8E4FB40, 0xD8E5FB40, 0xD8E6FB40, 0xD8E7FB40, 0xD8E8FB40, 0xD8E9FB40, 0xD8EAFB40, 0xD8EBFB40, 0xD8ECFB40, 0xD8EDFB40, 0xD8EEFB40, 0xD8EFFB40, 0xD8F0FB40, 0xD8F1FB40, + 0xD8F2FB40, 0xD8F3FB40, 0xD8F4FB40, 0xD8F5FB40, 0xD8F6FB40, 0xD8F7FB40, 0xD8F8FB40, 0xD8F9FB40, 0xD8FAFB40, 0xD8FBFB40, 0xD8FCFB40, 0xD8FDFB40, 0xD8FEFB40, 0xD8FFFB40, 0xD900FB40, + 0xD901FB40, 0xD902FB40, 0xD903FB40, 0xD904FB40, 0xD905FB40, 0xD906FB40, 0xD907FB40, 0xD908FB40, 0xD909FB40, 0xD90AFB40, 0xD90BFB40, 0xD90CFB40, 0xD90DFB40, 0xD90EFB40, 0xD90FFB40, + 0xD910FB40, 0xD911FB40, 0xD912FB40, 0xD913FB40, 0xD914FB40, 0xD915FB40, 0xD916FB40, 0xD917FB40, 0xD918FB40, 0xD919FB40, 0xD91AFB40, 0xD91BFB40, 0xD91CFB40, 0xD91DFB40, 0xD91EFB40, + 0xD91FFB40, 0xD920FB40, 0xD921FB40, 0xD922FB40, 0xD923FB40, 0xD924FB40, 0xD925FB40, 0xD926FB40, 0xD927FB40, 0xD928FB40, 0xD929FB40, 0xD92AFB40, 0xD92BFB40, 0xD92CFB40, 0xD92DFB40, + 0xD92EFB40, 0xD92FFB40, 0xD930FB40, 0xD931FB40, 0xD932FB40, 0xD933FB40, 0xD934FB40, 0xD935FB40, 0xD936FB40, 0xD937FB40, 0xD938FB40, 0xD939FB40, 0xD93AFB40, 0xD93BFB40, 0xD93CFB40, + 0xD93DFB40, 0xD93EFB40, 0xD93FFB40, 0xD940FB40, 0xD941FB40, 0xD942FB40, 0xD943FB40, 0xD944FB40, 0xD945FB40, 0xD946FB40, 0xD947FB40, 0xD948FB40, 0xD949FB40, 0xD94AFB40, 0xD94BFB40, + 0xD94CFB40, 0xD94DFB40, 0xD94EFB40, 0xD94FFB40, 0xD950FB40, 0xD951FB40, 0xD952FB40, 0xD953FB40, 0xD954FB40, 0xD955FB40, 0xD956FB40, 0xD957FB40, 0xD958FB40, 0xD959FB40, 0xD95AFB40, + 0xD95BFB40, 0xD95CFB40, 0xD95DFB40, 0xD95EFB40, 0xD95FFB40, 0xD960FB40, 0xD961FB40, 0xD962FB40, 0xD963FB40, 0xD964FB40, 0xD965FB40, 0xD966FB40, 0xD967FB40, 0xD968FB40, 0xD969FB40, + 0xD96AFB40, 0xD96BFB40, 0xD96CFB40, 0xD96DFB40, 0xD96EFB40, 0xD96FFB40, 0xD970FB40, 0xD971FB40, 0xD972FB40, 0xD973FB40, 0xD974FB40, 0xD975FB40, 0xD976FB40, 0xD977FB40, 0xD978FB40, + 0xD979FB40, 0xD97AFB40, 0xD97BFB40, 0xD97CFB40, 0xD97DFB40, 0xD97EFB40, 0xD97FFB40, 0xD980FB40, 0xD981FB40, 0xD982FB40, 0xD983FB40, 0xD984FB40, 0xD985FB40, 0xD986FB40, 0xD987FB40, + 0xD988FB40, 0xD989FB40, 0xD98AFB40, 0xD98BFB40, 0xD98CFB40, 0xD98DFB40, 0xD98EFB40, 0xD98FFB40, 0xD990FB40, 0xD991FB40, 0xD992FB40, 0xD993FB40, 0xD994FB40, 0xD995FB40, 0xD996FB40, + 0xD997FB40, 0xD998FB40, 0xD999FB40, 0xD99AFB40, 0xD99BFB40, 0xD99CFB40, 0xD99DFB40, 0xD99EFB40, 0xD99FFB40, 0xD9A0FB40, 0xD9A1FB40, 0xD9A2FB40, 0xD9A3FB40, 0xD9A4FB40, 0xD9A5FB40, + 0xD9A6FB40, 0xD9A7FB40, 0xD9A8FB40, 0xD9A9FB40, 0xD9AAFB40, 0xD9ABFB40, 0xD9ACFB40, 0xD9ADFB40, 0xD9AEFB40, 0xD9AFFB40, 0xD9B0FB40, 0xD9B1FB40, 0xD9B2FB40, 0xD9B3FB40, 0xD9B4FB40, + 0xD9B5FB40, 0xD9B6FB40, 0xD9B7FB40, 0xD9B8FB40, 0xD9B9FB40, 0xD9BAFB40, 0xD9BBFB40, 0xD9BCFB40, 0xD9BDFB40, 0xD9BEFB40, 0xD9BFFB40, 0xD9C0FB40, 0xD9C1FB40, 0xD9C2FB40, 0xD9C3FB40, + 0xD9C4FB40, 0xD9C5FB40, 0xD9C6FB40, 0xD9C7FB40, 0xD9C8FB40, 0xD9C9FB40, 0xD9CAFB40, 0xD9CBFB40, 0xD9CCFB40, 0xD9CDFB40, 0xD9CEFB40, 0xD9CFFB40, 0xD9D0FB40, 0xD9D1FB40, 0xD9D2FB40, + 0xD9D3FB40, 0xD9D4FB40, 0xD9D5FB40, 0xD9D6FB40, 0xD9D7FB40, 0xD9D8FB40, 0xD9D9FB40, 0xD9DAFB40, 0xD9DBFB40, 0xD9DCFB40, 0xD9DDFB40, 0xD9DEFB40, 0xD9DFFB40, 0xD9E0FB40, 0xD9E1FB40, + 0xD9E2FB40, 0xD9E3FB40, 0xD9E4FB40, 0xD9E5FB40, 0xD9E6FB40, 0xD9E7FB40, 0xD9E8FB40, 0xD9E9FB40, 0xD9EAFB40, 0xD9EBFB40, 0xD9ECFB40, 0xD9EDFB40, 0xD9EEFB40, 0xD9EFFB40, 0xD9F0FB40, + 0xD9F1FB40, 0xD9F2FB40, 0xD9F3FB40, 0xD9F4FB40, 0xD9F5FB40, 0xD9F6FB40, 0xD9F7FB40, 0xD9F8FB40, 0xD9F9FB40, 0xD9FAFB40, 0xD9FBFB40, 0xD9FCFB40, 0xD9FDFB40, 0xD9FEFB40, 0xD9FFFB40, + 0xDA00FB40, 0xDA01FB40, 0xDA02FB40, 0xDA03FB40, 0xDA04FB40, 0xDA05FB40, 0xDA06FB40, 0xDA07FB40, 0xDA08FB40, 0xDA09FB40, 0xDA0AFB40, 0xDA0BFB40, 0xDA0CFB40, 0xDA0DFB40, 0xDA0EFB40, + 0xDA0FFB40, 0xDA10FB40, 0xDA11FB40, 0xDA12FB40, 0xDA13FB40, 0xDA14FB40, 0xDA15FB40, 0xDA16FB40, 0xDA17FB40, 0xDA18FB40, 0xDA19FB40, 0xDA1AFB40, 0xDA1BFB40, 0xDA1CFB40, 0xDA1DFB40, + 0xDA1EFB40, 0xDA1FFB40, 0xDA20FB40, 0xDA21FB40, 0xDA22FB40, 0xDA23FB40, 0xDA24FB40, 0xDA25FB40, 0xDA26FB40, 0xDA27FB40, 0xDA28FB40, 0xDA29FB40, 0xDA2AFB40, 0xDA2BFB40, 0xDA2CFB40, + 0xDA2DFB40, 0xDA2EFB40, 0xDA2FFB40, 0xDA30FB40, 0xDA31FB40, 0xDA32FB40, 0xDA33FB40, 0xDA34FB40, 0xDA35FB40, 0xDA36FB40, 0xDA37FB40, 0xDA38FB40, 0xDA39FB40, 0xDA3AFB40, 0xDA3BFB40, + 0xDA3CFB40, 0xDA3DFB40, 0xDA3EFB40, 0xDA3FFB40, 0xDA40FB40, 0xDA41FB40, 0xDA42FB40, 0xDA43FB40, 0xDA44FB40, 0xDA45FB40, 0xDA46FB40, 0xDA47FB40, 0xDA48FB40, 0xDA49FB40, 0xDA4AFB40, + 0xDA4BFB40, 0xDA4CFB40, 0xDA4DFB40, 0xDA4EFB40, 0xDA4FFB40, 0xDA50FB40, 0xDA51FB40, 0xDA52FB40, 0xDA53FB40, 0xDA54FB40, 0xDA55FB40, 0xDA56FB40, 0xDA57FB40, 0xDA58FB40, 0xDA59FB40, + 0xDA5AFB40, 0xDA5BFB40, 0xDA5CFB40, 0xDA5DFB40, 0xDA5EFB40, 0xDA5FFB40, 0xDA60FB40, 0xDA61FB40, 0xDA62FB40, 0xDA63FB40, 0xDA64FB40, 0xDA65FB40, 0xDA66FB40, 0xDA67FB40, 0xDA68FB40, + 0xDA69FB40, 0xDA6AFB40, 0xDA6BFB40, 0xDA6CFB40, 0xDA6DFB40, 0xDA6EFB40, 0xDA6FFB40, 0xDA70FB40, 0xDA71FB40, 0xDA72FB40, 0xDA73FB40, 0xDA74FB40, 0xDA75FB40, 0xDA76FB40, 0xDA77FB40, + 0xDA78FB40, 0xDA79FB40, 0xDA7AFB40, 0xDA7BFB40, 0xDA7CFB40, 0xDA7DFB40, 0xDA7EFB40, 0xDA7FFB40, 0xDA80FB40, 0xDA81FB40, 0xDA82FB40, 0xDA83FB40, 0xDA84FB40, 0xDA85FB40, 0xDA86FB40, + 0xDA87FB40, 0xDA88FB40, 0xDA89FB40, 0xDA8AFB40, 0xDA8BFB40, 0xDA8CFB40, 0xDA8DFB40, 0xDA8EFB40, 0xDA8FFB40, 0xDA90FB40, 0xDA91FB40, 0xDA92FB40, 0xDA93FB40, 0xDA94FB40, 0xDA95FB40, + 0xDA96FB40, 0xDA97FB40, 0xDA98FB40, 0xDA99FB40, 0xDA9AFB40, 0xDA9BFB40, 0xDA9CFB40, 0xDA9DFB40, 0xDA9EFB40, 0xDA9FFB40, 0xDAA0FB40, 0xDAA1FB40, 0xDAA2FB40, 0xDAA3FB40, 0xDAA4FB40, + 0xDAA5FB40, 0xDAA6FB40, 0xDAA7FB40, 0xDAA8FB40, 0xDAA9FB40, 0xDAAAFB40, 0xDAABFB40, 0xDAACFB40, 0xDAADFB40, 0xDAAEFB40, 0xDAAFFB40, 0xDAB0FB40, 0xDAB1FB40, 0xDAB2FB40, 0xDAB3FB40, + 0xDAB4FB40, 0xDAB5FB40, 0xDAB6FB40, 0xDAB7FB40, 0xDAB8FB40, 0xDAB9FB40, 0xDABAFB40, 0xDABBFB40, 0xDABCFB40, 0xDABDFB40, 0xDABEFB40, 0xDABFFB40, 0xDAC0FB40, 0xDAC1FB40, 0xDAC2FB40, + 0xDAC3FB40, 0xDAC4FB40, 0xDAC5FB40, 0xDAC6FB40, 0xDAC7FB40, 0xDAC8FB40, 0xDAC9FB40, 0xDACAFB40, 0xDACBFB40, 0xDACCFB40, 0xDACDFB40, 0xDACEFB40, 0xDACFFB40, 0xDAD0FB40, 0xDAD1FB40, + 0xDAD2FB40, 0xDAD3FB40, 0xDAD4FB40, 0xDAD5FB40, 0xDAD6FB40, 0xDAD7FB40, 0xDAD8FB40, 0xDAD9FB40, 0xDADAFB40, 0xDADBFB40, 0xDADCFB40, 0xDADDFB40, 0xDADEFB40, 0xDADFFB40, 0xDAE0FB40, + 0xDAE1FB40, 0xDAE2FB40, 0xDAE3FB40, 0xDAE4FB40, 0xDAE5FB40, 0xDAE6FB40, 0xDAE7FB40, 0xDAE8FB40, 0xDAE9FB40, 0xDAEAFB40, 0xDAEBFB40, 0xDAECFB40, 0xDAEDFB40, 0xDAEEFB40, 0xDAEFFB40, + 0xDAF0FB40, 0xDAF1FB40, 0xDAF2FB40, 0xDAF3FB40, 0xDAF4FB40, 0xDAF5FB40, 0xDAF6FB40, 0xDAF7FB40, 0xDAF8FB40, 0xDAF9FB40, 0xDAFAFB40, 0xDAFBFB40, 0xDAFCFB40, 0xDAFDFB40, 0xDAFEFB40, + 0xDAFFFB40, 0xDB00FB40, 0xDB01FB40, 0xDB02FB40, 0xDB03FB40, 0xDB04FB40, 0xDB05FB40, 0xDB06FB40, 0xDB07FB40, 0xDB08FB40, 0xDB09FB40, 0xDB0AFB40, 0xDB0BFB40, 0xDB0CFB40, 0xDB0DFB40, + 0xDB0EFB40, 0xDB0FFB40, 0xDB10FB40, 0xDB11FB40, 0xDB12FB40, 0xDB13FB40, 0xDB14FB40, 0xDB15FB40, 0xDB16FB40, 0xDB17FB40, 0xDB18FB40, 0xDB19FB40, 0xDB1AFB40, 0xDB1BFB40, 0xDB1CFB40, + 0xDB1DFB40, 0xDB1EFB40, 0xDB1FFB40, 0xDB20FB40, 0xDB21FB40, 0xDB22FB40, 0xDB23FB40, 0xDB24FB40, 0xDB25FB40, 0xDB26FB40, 0xDB27FB40, 0xDB28FB40, 0xDB29FB40, 0xDB2AFB40, 0xDB2BFB40, + 0xDB2CFB40, 0xDB2DFB40, 0xDB2EFB40, 0xDB2FFB40, 0xDB30FB40, 0xDB31FB40, 0xDB32FB40, 0xDB33FB40, 0xDB34FB40, 0xDB35FB40, 0xDB36FB40, 0xDB37FB40, 0xDB38FB40, 0xDB39FB40, 0xDB3AFB40, + 0xDB3BFB40, 0xDB3CFB40, 0xDB3DFB40, 0xDB3EFB40, 0xDB3FFB40, 0xDB40FB40, 0xDB41FB40, 0xDB42FB40, 0xDB43FB40, 0xDB44FB40, 0xDB45FB40, 0xDB46FB40, 0xDB47FB40, 0xDB48FB40, 0xDB49FB40, + 0xDB4AFB40, 0xDB4BFB40, 0xDB4CFB40, 0xDB4DFB40, 0xDB4EFB40, 0xDB4FFB40, 0xDB50FB40, 0xDB51FB40, 0xDB52FB40, 0xDB53FB40, 0xDB54FB40, 0xDB55FB40, 0xDB56FB40, 0xDB57FB40, 0xDB58FB40, + 0xDB59FB40, 0xDB5AFB40, 0xDB5BFB40, 0xDB5CFB40, 0xDB5DFB40, 0xDB5EFB40, 0xDB5FFB40, 0xDB60FB40, 0xDB61FB40, 0xDB62FB40, 0xDB63FB40, 0xDB64FB40, 0xDB65FB40, 0xDB66FB40, 0xDB67FB40, + 0xDB68FB40, 0xDB69FB40, 0xDB6AFB40, 0xDB6BFB40, 0xDB6CFB40, 0xDB6DFB40, 0xDB6EFB40, 0xDB6FFB40, 0xDB70FB40, 0xDB71FB40, 0xDB72FB40, 0xDB73FB40, 0xDB74FB40, 0xDB75FB40, 0xDB76FB40, + 0xDB77FB40, 0xDB78FB40, 0xDB79FB40, 0xDB7AFB40, 0xDB7BFB40, 0xDB7CFB40, 0xDB7DFB40, 0xDB7EFB40, 0xDB7FFB40, 0xDB80FB40, 0xDB81FB40, 0xDB82FB40, 0xDB83FB40, 0xDB84FB40, 0xDB85FB40, + 0xDB86FB40, 0xDB87FB40, 0xDB88FB40, 0xDB89FB40, 0xDB8AFB40, 0xDB8BFB40, 0xDB8CFB40, 0xDB8DFB40, 0xDB8EFB40, 0xDB8FFB40, 0xDB90FB40, 0xDB91FB40, 0xDB92FB40, 0xDB93FB40, 0xDB94FB40, + 0xDB95FB40, 0xDB96FB40, 0xDB97FB40, 0xDB98FB40, 0xDB99FB40, 0xDB9AFB40, 0xDB9BFB40, 0xDB9CFB40, 0xDB9DFB40, 0xDB9EFB40, 0xDB9FFB40, 0xDBA0FB40, 0xDBA1FB40, 0xDBA2FB40, 0xDBA3FB40, + 0xDBA4FB40, 0xDBA5FB40, 0xDBA6FB40, 0xDBA7FB40, 0xDBA8FB40, 0xDBA9FB40, 0xDBAAFB40, 0xDBABFB40, 0xDBACFB40, 0xDBADFB40, 0xDBAEFB40, 0xDBAFFB40, 0xDBB0FB40, 0xDBB1FB40, 0xDBB2FB40, + 0xDBB3FB40, 0xDBB4FB40, 0xDBB5FB40, 0xDBB6FB40, 0xDBB7FB40, 0xDBB8FB40, 0xDBB9FB40, 0xDBBAFB40, 0xDBBBFB40, 0xDBBCFB40, 0xDBBDFB40, 0xDBBEFB40, 0xDBBFFB40, 0xDBC0FB40, 0xDBC1FB40, + 0xDBC2FB40, 0xDBC3FB40, 0xDBC4FB40, 0xDBC5FB40, 0xDBC6FB40, 0xDBC7FB40, 0xDBC8FB40, 0xDBC9FB40, 0xDBCAFB40, 0xDBCBFB40, 0xDBCCFB40, 0xDBCDFB40, 0xDBCEFB40, 0xDBCFFB40, 0xDBD0FB40, + 0xDBD1FB40, 0xDBD2FB40, 0xDBD3FB40, 0xDBD4FB40, 0xDBD5FB40, 0xDBD6FB40, 0xDBD7FB40, 0xDBD8FB40, 0xDBD9FB40, 0xDBDAFB40, 0xDBDBFB40, 0xDBDCFB40, 0xDBDDFB40, 0xDBDEFB40, 0xDBDFFB40, + 0xDBE0FB40, 0xDBE1FB40, 0xDBE2FB40, 0xDBE3FB40, 0xDBE4FB40, 0xDBE5FB40, 0xDBE6FB40, 0xDBE7FB40, 0xDBE8FB40, 0xDBE9FB40, 0xDBEAFB40, 0xDBEBFB40, 0xDBECFB40, 0xDBEDFB40, 0xDBEEFB40, + 0xDBEFFB40, 0xDBF0FB40, 0xDBF1FB40, 0xDBF2FB40, 0xDBF3FB40, 0xDBF4FB40, 0xDBF5FB40, 0xDBF6FB40, 0xDBF7FB40, 0xDBF8FB40, 0xDBF9FB40, 0xDBFAFB40, 0xDBFBFB40, 0xDBFCFB40, 0xDBFDFB40, + 0xDBFEFB40, 0xDBFFFB40, 0xDC00FB40, 0xDC01FB40, 0xDC02FB40, 0xDC03FB40, 0xDC04FB40, 0xDC05FB40, 0xDC06FB40, 0xDC07FB40, 0xDC08FB40, 0xDC09FB40, 0xDC0AFB40, 0xDC0BFB40, 0xDC0CFB40, + 0xDC0DFB40, 0xDC0EFB40, 0xDC0FFB40, 0xDC10FB40, 0xDC11FB40, 0xDC12FB40, 0xDC13FB40, 0xDC14FB40, 0xDC15FB40, 0xDC16FB40, 0xDC17FB40, 0xDC18FB40, 0xDC19FB40, 0xDC1AFB40, 0xDC1BFB40, + 0xDC1CFB40, 0xDC1DFB40, 0xDC1EFB40, 0xDC1FFB40, 0xDC20FB40, 0xDC21FB40, 0xDC22FB40, 0xDC23FB40, 0xDC24FB40, 0xDC25FB40, 0xDC26FB40, 0xDC27FB40, 0xDC28FB40, 0xDC29FB40, 0xDC2AFB40, + 0xDC2BFB40, 0xDC2CFB40, 0xDC2DFB40, 0xDC2EFB40, 0xDC2FFB40, 0xDC30FB40, 0xDC31FB40, 0xDC32FB40, 0xDC33FB40, 0xDC34FB40, 0xDC35FB40, 0xDC36FB40, 0xDC37FB40, 0xDC38FB40, 0xDC39FB40, + 0xDC3AFB40, 0xDC3BFB40, 0xDC3CFB40, 0xDC3DFB40, 0xDC3EFB40, 0xDC3FFB40, 0xDC40FB40, 0xDC41FB40, 0xDC42FB40, 0xDC43FB40, 0xDC44FB40, 0xDC45FB40, 0xDC46FB40, 0xDC47FB40, 0xDC48FB40, + 0xDC49FB40, 0xDC4AFB40, 0xDC4BFB40, 0xDC4CFB40, 0xDC4DFB40, 0xDC4EFB40, 0xDC4FFB40, 0xDC50FB40, 0xDC51FB40, 0xDC52FB40, 0xDC53FB40, 0xDC54FB40, 0xDC55FB40, 0xDC56FB40, 0xDC57FB40, + 0xDC58FB40, 0xDC59FB40, 0xDC5AFB40, 0xDC5BFB40, 0xDC5CFB40, 0xDC5DFB40, 0xDC5EFB40, 0xDC5FFB40, 0xDC60FB40, 0xDC61FB40, 0xDC62FB40, 0xDC63FB40, 0xDC64FB40, 0xDC65FB40, 0xDC66FB40, + 0xDC67FB40, 0xDC68FB40, 0xDC69FB40, 0xDC6AFB40, 0xDC6BFB40, 0xDC6CFB40, 0xDC6DFB40, 0xDC6EFB40, 0xDC6FFB40, 0xDC70FB40, 0xDC71FB40, 0xDC72FB40, 0xDC73FB40, 0xDC74FB40, 0xDC75FB40, + 0xDC76FB40, 0xDC77FB40, 0xDC78FB40, 0xDC79FB40, 0xDC7AFB40, 0xDC7BFB40, 0xDC7CFB40, 0xDC7DFB40, 0xDC7EFB40, 0xDC7FFB40, 0xDC80FB40, 0xDC81FB40, 0xDC82FB40, 0xDC83FB40, 0xDC84FB40, + 0xDC85FB40, 0xDC86FB40, 0xDC87FB40, 0xDC88FB40, 0xDC89FB40, 0xDC8AFB40, 0xDC8BFB40, 0xDC8CFB40, 0xDC8DFB40, 0xDC8EFB40, 0xDC8FFB40, 0xDC90FB40, 0xDC91FB40, 0xDC92FB40, 0xDC93FB40, + 0xDC94FB40, 0xDC95FB40, 0xDC96FB40, 0xDC97FB40, 0xDC98FB40, 0xDC99FB40, 0xDC9AFB40, 0xDC9BFB40, 0xDC9CFB40, 0xDC9DFB40, 0xDC9EFB40, 0xDC9FFB40, 0xDCA0FB40, 0xDCA1FB40, 0xDCA2FB40, + 0xDCA3FB40, 0xDCA4FB40, 0xDCA5FB40, 0xDCA6FB40, 0xDCA7FB40, 0xDCA8FB40, 0xDCA9FB40, 0xDCAAFB40, 0xDCABFB40, 0xDCACFB40, 0xDCADFB40, 0xDCAEFB40, 0xDCAFFB40, 0xDCB0FB40, 0xDCB1FB40, + 0xDCB2FB40, 0xDCB3FB40, 0xDCB4FB40, 0xDCB5FB40, 0xDCB6FB40, 0xDCB7FB40, 0xDCB8FB40, 0xDCB9FB40, 0xDCBAFB40, 0xDCBBFB40, 0xDCBCFB40, 0xDCBDFB40, 0xDCBEFB40, 0xDCBFFB40, 0xDCC0FB40, + 0xDCC1FB40, 0xDCC2FB40, 0xDCC3FB40, 0xDCC4FB40, 0xDCC5FB40, 0xDCC6FB40, 0xDCC7FB40, 0xDCC8FB40, 0xDCC9FB40, 0xDCCAFB40, 0xDCCBFB40, 0xDCCCFB40, 0xDCCDFB40, 0xDCCEFB40, 0xDCCFFB40, + 0xDCD0FB40, 0xDCD1FB40, 0xDCD2FB40, 0xDCD3FB40, 0xDCD4FB40, 0xDCD5FB40, 0xDCD6FB40, 0xDCD7FB40, 0xDCD8FB40, 0xDCD9FB40, 0xDCDAFB40, 0xDCDBFB40, 0xDCDCFB40, 0xDCDDFB40, 0xDCDEFB40, + 0xDCDFFB40, 0xDCE0FB40, 0xDCE1FB40, 0xDCE2FB40, 0xDCE3FB40, 0xDCE4FB40, 0xDCE5FB40, 0xDCE6FB40, 0xDCE7FB40, 0xDCE8FB40, 0xDCE9FB40, 0xDCEAFB40, 0xDCEBFB40, 0xDCECFB40, 0xDCEDFB40, + 0xDCEEFB40, 0xDCEFFB40, 0xDCF0FB40, 0xDCF1FB40, 0xDCF2FB40, 0xDCF3FB40, 0xDCF4FB40, 0xDCF5FB40, 0xDCF6FB40, 0xDCF7FB40, 0xDCF8FB40, 0xDCF9FB40, 0xDCFAFB40, 0xDCFBFB40, 0xDCFCFB40, + 0xDCFDFB40, 0xDCFEFB40, 0xDCFFFB40, 0xDD00FB40, 0xDD01FB40, 0xDD02FB40, 0xDD03FB40, 0xDD04FB40, 0xDD05FB40, 0xDD06FB40, 0xDD07FB40, 0xDD08FB40, 0xDD09FB40, 0xDD0AFB40, 0xDD0BFB40, + 0xDD0CFB40, 0xDD0DFB40, 0xDD0EFB40, 0xDD0FFB40, 0xDD10FB40, 0xDD11FB40, 0xDD12FB40, 0xDD13FB40, 0xDD14FB40, 0xDD15FB40, 0xDD16FB40, 0xDD17FB40, 0xDD18FB40, 0xDD19FB40, 0xDD1AFB40, + 0xDD1BFB40, 0xDD1CFB40, 0xDD1DFB40, 0xDD1EFB40, 0xDD1FFB40, 0xDD20FB40, 0xDD21FB40, 0xDD22FB40, 0xDD23FB40, 0xDD24FB40, 0xDD25FB40, 0xDD26FB40, 0xDD27FB40, 0xDD28FB40, 0xDD29FB40, + 0xDD2AFB40, 0xDD2BFB40, 0xDD2CFB40, 0xDD2DFB40, 0xDD2EFB40, 0xDD2FFB40, 0xDD30FB40, 0xDD31FB40, 0xDD32FB40, 0xDD33FB40, 0xDD34FB40, 0xDD35FB40, 0xDD36FB40, 0xDD37FB40, 0xDD38FB40, + 0xDD39FB40, 0xDD3AFB40, 0xDD3BFB40, 0xDD3CFB40, 0xDD3DFB40, 0xDD3EFB40, 0xDD3FFB40, 0xDD40FB40, 0xDD41FB40, 0xDD42FB40, 0xDD43FB40, 0xDD44FB40, 0xDD45FB40, 0xDD46FB40, 0xDD47FB40, + 0xDD48FB40, 0xDD49FB40, 0xDD4AFB40, 0xDD4BFB40, 0xDD4CFB40, 0xDD4DFB40, 0xDD4EFB40, 0xDD4FFB40, 0xDD50FB40, 0xDD51FB40, 0xDD52FB40, 0xDD53FB40, 0xDD54FB40, 0xDD55FB40, 0xDD56FB40, + 0xDD57FB40, 0xDD58FB40, 0xDD59FB40, 0xDD5AFB40, 0xDD5BFB40, 0xDD5CFB40, 0xDD5DFB40, 0xDD5EFB40, 0xDD5FFB40, 0xDD60FB40, 0xDD61FB40, 0xDD62FB40, 0xDD63FB40, 0xDD64FB40, 0xDD65FB40, + 0xDD66FB40, 0xDD67FB40, 0xDD68FB40, 0xDD69FB40, 0xDD6AFB40, 0xDD6BFB40, 0xDD6CFB40, 0xDD6DFB40, 0xDD6EFB40, 0xDD6FFB40, 0xDD70FB40, 0xDD71FB40, 0xDD72FB40, 0xDD73FB40, 0xDD74FB40, + 0xDD75FB40, 0xDD76FB40, 0xDD77FB40, 0xDD78FB40, 0xDD79FB40, 0xDD7AFB40, 0xDD7BFB40, 0xDD7CFB40, 0xDD7DFB40, 0xDD7EFB40, 0xDD7FFB40, 0xDD80FB40, 0xDD81FB40, 0xDD82FB40, 0xDD83FB40, + 0xDD84FB40, 0xDD85FB40, 0xDD86FB40, 0xDD87FB40, 0xDD88FB40, 0xDD89FB40, 0xDD8AFB40, 0xDD8BFB40, 0xDD8CFB40, 0xDD8DFB40, 0xDD8EFB40, 0xDD8FFB40, 0xDD90FB40, 0xDD91FB40, 0xDD92FB40, + 0xDD93FB40, 0xDD94FB40, 0xDD95FB40, 0xDD96FB40, 0xDD97FB40, 0xDD98FB40, 0xDD99FB40, 0xDD9AFB40, 0xDD9BFB40, 0xDD9CFB40, 0xDD9DFB40, 0xDD9EFB40, 0xDD9FFB40, 0xDDA0FB40, 0xDDA1FB40, + 0xDDA2FB40, 0xDDA3FB40, 0xDDA4FB40, 0xDDA5FB40, 0xDDA6FB40, 0xDDA7FB40, 0xDDA8FB40, 0xDDA9FB40, 0xDDAAFB40, 0xDDABFB40, 0xDDACFB40, 0xDDADFB40, 0xDDAEFB40, 0xDDAFFB40, 0xDDB0FB40, + 0xDDB1FB40, 0xDDB2FB40, 0xDDB3FB40, 0xDDB4FB40, 0xDDB5FB40, 0xDDB6FB40, 0xDDB7FB40, 0xDDB8FB40, 0xDDB9FB40, 0xDDBAFB40, 0xDDBBFB40, 0xDDBCFB40, 0xDDBDFB40, 0xDDBEFB40, 0xDDBFFB40, + 0xDDC0FB40, 0xDDC1FB40, 0xDDC2FB40, 0xDDC3FB40, 0xDDC4FB40, 0xDDC5FB40, 0xDDC6FB40, 0xDDC7FB40, 0xDDC8FB40, 0xDDC9FB40, 0xDDCAFB40, 0xDDCBFB40, 0xDDCCFB40, 0xDDCDFB40, 0xDDCEFB40, + 0xDDCFFB40, 0xDDD0FB40, 0xDDD1FB40, 0xDDD2FB40, 0xDDD3FB40, 0xDDD4FB40, 0xDDD5FB40, 0xDDD6FB40, 0xDDD7FB40, 0xDDD8FB40, 0xDDD9FB40, 0xDDDAFB40, 0xDDDBFB40, 0xDDDCFB40, 0xDDDDFB40, + 0xDDDEFB40, 0xDDDFFB40, 0xDDE0FB40, 0xDDE1FB40, 0xDDE2FB40, 0xDDE3FB40, 0xDDE4FB40, 0xDDE5FB40, 0xDDE6FB40, 0xDDE7FB40, 0xDDE8FB40, 0xDDE9FB40, 0xDDEAFB40, 0xDDEBFB40, 0xDDECFB40, + 0xDDEDFB40, 0xDDEEFB40, 0xDDEFFB40, 0xDDF0FB40, 0xDDF1FB40, 0xDDF2FB40, 0xDDF3FB40, 0xDDF4FB40, 0xDDF5FB40, 0xDDF6FB40, 0xDDF7FB40, 0xDDF8FB40, 0xDDF9FB40, 0xDDFAFB40, 0xDDFBFB40, + 0xDDFCFB40, 0xDDFDFB40, 0xDDFEFB40, 0xDDFFFB40, 0xDE00FB40, 0xDE01FB40, 0xDE02FB40, 0xDE03FB40, 0xDE04FB40, 0xDE05FB40, 0xDE06FB40, 0xDE07FB40, 0xDE08FB40, 0xDE09FB40, 0xDE0AFB40, + 0xDE0BFB40, 0xDE0CFB40, 0xDE0DFB40, 0xDE0EFB40, 0xDE0FFB40, 0xDE10FB40, 0xDE11FB40, 0xDE12FB40, 0xDE13FB40, 0xDE14FB40, 0xDE15FB40, 0xDE16FB40, 0xDE17FB40, 0xDE18FB40, 0xDE19FB40, + 0xDE1AFB40, 0xDE1BFB40, 0xDE1CFB40, 0xDE1DFB40, 0xDE1EFB40, 0xDE1FFB40, 0xDE20FB40, 0xDE21FB40, 0xDE22FB40, 0xDE23FB40, 0xDE24FB40, 0xDE25FB40, 0xDE26FB40, 0xDE27FB40, 0xDE28FB40, + 0xDE29FB40, 0xDE2AFB40, 0xDE2BFB40, 0xDE2CFB40, 0xDE2DFB40, 0xDE2EFB40, 0xDE2FFB40, 0xDE30FB40, 0xDE31FB40, 0xDE32FB40, 0xDE33FB40, 0xDE34FB40, 0xDE35FB40, 0xDE36FB40, 0xDE37FB40, + 0xDE38FB40, 0xDE39FB40, 0xDE3AFB40, 0xDE3BFB40, 0xDE3CFB40, 0xDE3DFB40, 0xDE3EFB40, 0xDE3FFB40, 0xDE40FB40, 0xDE41FB40, 0xDE42FB40, 0xDE43FB40, 0xDE44FB40, 0xDE45FB40, 0xDE46FB40, + 0xDE47FB40, 0xDE48FB40, 0xDE49FB40, 0xDE4AFB40, 0xDE4BFB40, 0xDE4CFB40, 0xDE4DFB40, 0xDE4EFB40, 0xDE4FFB40, 0xDE50FB40, 0xDE51FB40, 0xDE52FB40, 0xDE53FB40, 0xDE54FB40, 0xDE55FB40, + 0xDE56FB40, 0xDE57FB40, 0xDE58FB40, 0xDE59FB40, 0xDE5AFB40, 0xDE5BFB40, 0xDE5CFB40, 0xDE5DFB40, 0xDE5EFB40, 0xDE5FFB40, 0xDE60FB40, 0xDE61FB40, 0xDE62FB40, 0xDE63FB40, 0xDE64FB40, + 0xDE65FB40, 0xDE66FB40, 0xDE67FB40, 0xDE68FB40, 0xDE69FB40, 0xDE6AFB40, 0xDE6BFB40, 0xDE6CFB40, 0xDE6DFB40, 0xDE6EFB40, 0xDE6FFB40, 0xDE70FB40, 0xDE71FB40, 0xDE72FB40, 0xDE73FB40, + 0xDE74FB40, 0xDE75FB40, 0xDE76FB40, 0xDE77FB40, 0xDE78FB40, 0xDE79FB40, 0xDE7AFB40, 0xDE7BFB40, 0xDE7CFB40, 0xDE7DFB40, 0xDE7EFB40, 0xDE7FFB40, 0xDE80FB40, 0xDE81FB40, 0xDE82FB40, + 0xDE83FB40, 0xDE84FB40, 0xDE85FB40, 0xDE86FB40, 0xDE87FB40, 0xDE88FB40, 0xDE89FB40, 0xDE8AFB40, 0xDE8BFB40, 0xDE8CFB40, 0xDE8DFB40, 0xDE8EFB40, 0xDE8FFB40, 0xDE90FB40, 0xDE91FB40, + 0xDE92FB40, 0xDE93FB40, 0xDE94FB40, 0xDE95FB40, 0xDE96FB40, 0xDE97FB40, 0xDE98FB40, 0xDE99FB40, 0xDE9AFB40, 0xDE9BFB40, 0xDE9CFB40, 0xDE9DFB40, 0xDE9EFB40, 0xDE9FFB40, 0xDEA0FB40, + 0xDEA1FB40, 0xDEA2FB40, 0xDEA3FB40, 0xDEA4FB40, 0xDEA5FB40, 0xDEA6FB40, 0xDEA7FB40, 0xDEA8FB40, 0xDEA9FB40, 0xDEAAFB40, 0xDEABFB40, 0xDEACFB40, 0xDEADFB40, 0xDEAEFB40, 0xDEAFFB40, + 0xDEB0FB40, 0xDEB1FB40, 0xDEB2FB40, 0xDEB3FB40, 0xDEB4FB40, 0xDEB5FB40, 0xDEB6FB40, 0xDEB7FB40, 0xDEB8FB40, 0xDEB9FB40, 0xDEBAFB40, 0xDEBBFB40, 0xDEBCFB40, 0xDEBDFB40, 0xDEBEFB40, + 0xDEBFFB40, 0xDEC0FB40, 0xDEC1FB40, 0xDEC2FB40, 0xDEC3FB40, 0xDEC4FB40, 0xDEC5FB40, 0xDEC6FB40, 0xDEC7FB40, 0xDEC8FB40, 0xDEC9FB40, 0xDECAFB40, 0xDECBFB40, 0xDECCFB40, 0xDECDFB40, + 0xDECEFB40, 0xDECFFB40, 0xDED0FB40, 0xDED1FB40, 0xDED2FB40, 0xDED3FB40, 0xDED4FB40, 0xDED5FB40, 0xDED6FB40, 0xDED7FB40, 0xDED8FB40, 0xDED9FB40, 0xDEDAFB40, 0xDEDBFB40, 0xDEDCFB40, + 0xDEDDFB40, 0xDEDEFB40, 0xDEDFFB40, 0xDEE0FB40, 0xDEE1FB40, 0xDEE2FB40, 0xDEE3FB40, 0xDEE4FB40, 0xDEE5FB40, 0xDEE6FB40, 0xDEE7FB40, 0xDEE8FB40, 0xDEE9FB40, 0xDEEAFB40, 0xDEEBFB40, + 0xDEECFB40, 0xDEEDFB40, 0xDEEEFB40, 0xDEEFFB40, 0xDEF0FB40, 0xDEF1FB40, 0xDEF2FB40, 0xDEF3FB40, 0xDEF4FB40, 0xDEF5FB40, 0xDEF6FB40, 0xDEF7FB40, 0xDEF8FB40, 0xDEF9FB40, 0xDEFAFB40, + 0xDEFBFB40, 0xDEFCFB40, 0xDEFDFB40, 0xDEFEFB40, 0xDEFFFB40, 0xDF00FB40, 0xDF01FB40, 0xDF02FB40, 0xDF03FB40, 0xDF04FB40, 0xDF05FB40, 0xDF06FB40, 0xDF07FB40, 0xDF08FB40, 0xDF09FB40, + 0xDF0AFB40, 0xDF0BFB40, 0xDF0CFB40, 0xDF0DFB40, 0xDF0EFB40, 0xDF0FFB40, 0xDF10FB40, 0xDF11FB40, 0xDF12FB40, 0xDF13FB40, 0xDF14FB40, 0xDF15FB40, 0xDF16FB40, 0xDF17FB40, 0xDF18FB40, + 0xDF19FB40, 0xDF1AFB40, 0xDF1BFB40, 0xDF1CFB40, 0xDF1DFB40, 0xDF1EFB40, 0xDF1FFB40, 0xDF20FB40, 0xDF21FB40, 0xDF22FB40, 0xDF23FB40, 0xDF24FB40, 0xDF25FB40, 0xDF26FB40, 0xDF27FB40, + 0xDF28FB40, 0xDF29FB40, 0xDF2AFB40, 0xDF2BFB40, 0xDF2CFB40, 0xDF2DFB40, 0xDF2EFB40, 0xDF2FFB40, 0xDF30FB40, 0xDF31FB40, 0xDF32FB40, 0xDF33FB40, 0xDF34FB40, 0xDF35FB40, 0xDF36FB40, + 0xDF37FB40, 0xDF38FB40, 0xDF39FB40, 0xDF3AFB40, 0xDF3BFB40, 0xDF3CFB40, 0xDF3DFB40, 0xDF3EFB40, 0xDF3FFB40, 0xDF40FB40, 0xDF41FB40, 0xDF42FB40, 0xDF43FB40, 0xDF44FB40, 0xDF45FB40, + 0xDF46FB40, 0xDF47FB40, 0xDF48FB40, 0xDF49FB40, 0xDF4AFB40, 0xDF4BFB40, 0xDF4CFB40, 0xDF4DFB40, 0xDF4EFB40, 0xDF4FFB40, 0xDF50FB40, 0xDF51FB40, 0xDF52FB40, 0xDF53FB40, 0xDF54FB40, + 0xDF55FB40, 0xDF56FB40, 0xDF57FB40, 0xDF58FB40, 0xDF59FB40, 0xDF5AFB40, 0xDF5BFB40, 0xDF5CFB40, 0xDF5DFB40, 0xDF5EFB40, 0xDF5FFB40, 0xDF60FB40, 0xDF61FB40, 0xDF62FB40, 0xDF63FB40, + 0xDF64FB40, 0xDF65FB40, 0xDF66FB40, 0xDF67FB40, 0xDF68FB40, 0xDF69FB40, 0xDF6AFB40, 0xDF6BFB40, 0xDF6CFB40, 0xDF6DFB40, 0xDF6EFB40, 0xDF6FFB40, 0xDF70FB40, 0xDF71FB40, 0xDF72FB40, + 0xDF73FB40, 0xDF74FB40, 0xDF75FB40, 0xDF76FB40, 0xDF77FB40, 0xDF78FB40, 0xDF79FB40, 0xDF7AFB40, 0xDF7BFB40, 0xDF7CFB40, 0xDF7DFB40, 0xDF7EFB40, 0xDF7FFB40, 0xDF80FB40, 0xDF81FB40, + 0xDF82FB40, 0xDF83FB40, 0xDF84FB40, 0xDF85FB40, 0xDF86FB40, 0xDF87FB40, 0xDF88FB40, 0xDF89FB40, 0xDF8AFB40, 0xDF8BFB40, 0xDF8CFB40, 0xDF8DFB40, 0xDF8EFB40, 0xDF8FFB40, 0xDF90FB40, + 0xDF91FB40, 0xDF92FB40, 0xDF93FB40, 0xDF94FB40, 0xDF95FB40, 0xDF96FB40, 0xDF97FB40, 0xDF98FB40, 0xDF99FB40, 0xDF9AFB40, 0xDF9BFB40, 0xDF9CFB40, 0xDF9DFB40, 0xDF9EFB40, 0xDF9FFB40, + 0xDFA0FB40, 0xDFA1FB40, 0xDFA2FB40, 0xDFA3FB40, 0xDFA4FB40, 0xDFA5FB40, 0xDFA6FB40, 0xDFA7FB40, 0xDFA8FB40, 0xDFA9FB40, 0xDFAAFB40, 0xDFABFB40, 0xDFACFB40, 0xDFADFB40, 0xDFAEFB40, + 0xDFAFFB40, 0xDFB0FB40, 0xDFB1FB40, 0xDFB2FB40, 0xDFB3FB40, 0xDFB4FB40, 0xDFB5FB40, 0xDFB6FB40, 0xDFB7FB40, 0xDFB8FB40, 0xDFB9FB40, 0xDFBAFB40, 0xDFBBFB40, 0xDFBCFB40, 0xDFBDFB40, + 0xDFBEFB40, 0xDFBFFB40, 0xDFC0FB40, 0xDFC1FB40, 0xDFC2FB40, 0xDFC3FB40, 0xDFC4FB40, 0xDFC5FB40, 0xDFC6FB40, 0xDFC7FB40, 0xDFC8FB40, 0xDFC9FB40, 0xDFCAFB40, 0xDFCBFB40, 0xDFCCFB40, + 0xDFCDFB40, 0xDFCEFB40, 0xDFCFFB40, 0xDFD0FB40, 0xDFD1FB40, 0xDFD2FB40, 0xDFD3FB40, 0xDFD4FB40, 0xDFD5FB40, 0xDFD6FB40, 0xDFD7FB40, 0xDFD8FB40, 0xDFD9FB40, 0xDFDAFB40, 0xDFDBFB40, + 0xDFDCFB40, 0xDFDDFB40, 0xDFDEFB40, 0xDFDFFB40, 0xDFE0FB40, 0xDFE1FB40, 0xDFE2FB40, 0xDFE3FB40, 0xDFE4FB40, 0xDFE5FB40, 0xDFE6FB40, 0xDFE7FB40, 0xDFE8FB40, 0xDFE9FB40, 0xDFEAFB40, + 0xDFEBFB40, 0xDFECFB40, 0xDFEDFB40, 0xDFEEFB40, 0xDFEFFB40, 0xDFF0FB40, 0xDFF1FB40, 0xDFF2FB40, 0xDFF3FB40, 0xDFF4FB40, 0xDFF5FB40, 0xDFF6FB40, 0xDFF7FB40, 0xDFF8FB40, 0xDFF9FB40, + 0xDFFAFB40, 0xDFFBFB40, 0xDFFCFB40, 0xDFFDFB40, 0xDFFEFB40, 0xDFFFFB40, 0xE000FB40, 0xE001FB40, 0xE002FB40, 0xE003FB40, 0xE004FB40, 0xE005FB40, 0xE006FB40, 0xE007FB40, 0xE008FB40, + 0xE009FB40, 0xE00AFB40, 0xE00BFB40, 0xE00CFB40, 0xE00DFB40, 0xE00EFB40, 0xE00FFB40, 0xE010FB40, 0xE011FB40, 0xE012FB40, 0xE013FB40, 0xE014FB40, 0xE015FB40, 0xE016FB40, 0xE017FB40, + 0xE018FB40, 0xE019FB40, 0xE01AFB40, 0xE01BFB40, 0xE01CFB40, 0xE01DFB40, 0xE01EFB40, 0xE01FFB40, 0xE020FB40, 0xE021FB40, 0xE022FB40, 0xE023FB40, 0xE024FB40, 0xE025FB40, 0xE026FB40, + 0xE027FB40, 0xE028FB40, 0xE029FB40, 0xE02AFB40, 0xE02BFB40, 0xE02CFB40, 0xE02DFB40, 0xE02EFB40, 0xE02FFB40, 0xE030FB40, 0xE031FB40, 0xE032FB40, 0xE033FB40, 0xE034FB40, 0xE035FB40, + 0xE036FB40, 0xE037FB40, 0xE038FB40, 0xE039FB40, 0xE03AFB40, 0xE03BFB40, 0xE03CFB40, 0xE03DFB40, 0xE03EFB40, 0xE03FFB40, 0xE040FB40, 0xE041FB40, 0xE042FB40, 0xE043FB40, 0xE044FB40, + 0xE045FB40, 0xE046FB40, 0xE047FB40, 0xE048FB40, 0xE049FB40, 0xE04AFB40, 0xE04BFB40, 0xE04CFB40, 0xE04DFB40, 0xE04EFB40, 0xE04FFB40, 0xE050FB40, 0xE051FB40, 0xE052FB40, 0xE053FB40, + 0xE054FB40, 0xE055FB40, 0xE056FB40, 0xE057FB40, 0xE058FB40, 0xE059FB40, 0xE05AFB40, 0xE05BFB40, 0xE05CFB40, 0xE05DFB40, 0xE05EFB40, 0xE05FFB40, 0xE060FB40, 0xE061FB40, 0xE062FB40, + 0xE063FB40, 0xE064FB40, 0xE065FB40, 0xE066FB40, 0xE067FB40, 0xE068FB40, 0xE069FB40, 0xE06AFB40, 0xE06BFB40, 0xE06CFB40, 0xE06DFB40, 0xE06EFB40, 0xE06FFB40, 0xE070FB40, 0xE071FB40, + 0xE072FB40, 0xE073FB40, 0xE074FB40, 0xE075FB40, 0xE076FB40, 0xE077FB40, 0xE078FB40, 0xE079FB40, 0xE07AFB40, 0xE07BFB40, 0xE07CFB40, 0xE07DFB40, 0xE07EFB40, 0xE07FFB40, 0xE080FB40, + 0xE081FB40, 0xE082FB40, 0xE083FB40, 0xE084FB40, 0xE085FB40, 0xE086FB40, 0xE087FB40, 0xE088FB40, 0xE089FB40, 0xE08AFB40, 0xE08BFB40, 0xE08CFB40, 0xE08DFB40, 0xE08EFB40, 0xE08FFB40, + 0xE090FB40, 0xE091FB40, 0xE092FB40, 0xE093FB40, 0xE094FB40, 0xE095FB40, 0xE096FB40, 0xE097FB40, 0xE098FB40, 0xE099FB40, 0xE09AFB40, 0xE09BFB40, 0xE09CFB40, 0xE09DFB40, 0xE09EFB40, + 0xE09FFB40, 0xE0A0FB40, 0xE0A1FB40, 0xE0A2FB40, 0xE0A3FB40, 0xE0A4FB40, 0xE0A5FB40, 0xE0A6FB40, 0xE0A7FB40, 0xE0A8FB40, 0xE0A9FB40, 0xE0AAFB40, 0xE0ABFB40, 0xE0ACFB40, 0xE0ADFB40, + 0xE0AEFB40, 0xE0AFFB40, 0xE0B0FB40, 0xE0B1FB40, 0xE0B2FB40, 0xE0B3FB40, 0xE0B4FB40, 0xE0B5FB40, 0xE0B6FB40, 0xE0B7FB40, 0xE0B8FB40, 0xE0B9FB40, 0xE0BAFB40, 0xE0BBFB40, 0xE0BCFB40, + 0xE0BDFB40, 0xE0BEFB40, 0xE0BFFB40, 0xE0C0FB40, 0xE0C1FB40, 0xE0C2FB40, 0xE0C3FB40, 0xE0C4FB40, 0xE0C5FB40, 0xE0C6FB40, 0xE0C7FB40, 0xE0C8FB40, 0xE0C9FB40, 0xE0CAFB40, 0xE0CBFB40, + 0xE0CCFB40, 0xE0CDFB40, 0xE0CEFB40, 0xE0CFFB40, 0xE0D0FB40, 0xE0D1FB40, 0xE0D2FB40, 0xE0D3FB40, 0xE0D4FB40, 0xE0D5FB40, 0xE0D6FB40, 0xE0D7FB40, 0xE0D8FB40, 0xE0D9FB40, 0xE0DAFB40, + 0xE0DBFB40, 0xE0DCFB40, 0xE0DDFB40, 0xE0DEFB40, 0xE0DFFB40, 0xE0E0FB40, 0xE0E1FB40, 0xE0E2FB40, 0xE0E3FB40, 0xE0E4FB40, 0xE0E5FB40, 0xE0E6FB40, 0xE0E7FB40, 0xE0E8FB40, 0xE0E9FB40, + 0xE0EAFB40, 0xE0EBFB40, 0xE0ECFB40, 0xE0EDFB40, 0xE0EEFB40, 0xE0EFFB40, 0xE0F0FB40, 0xE0F1FB40, 0xE0F2FB40, 0xE0F3FB40, 0xE0F4FB40, 0xE0F5FB40, 0xE0F6FB40, 0xE0F7FB40, 0xE0F8FB40, + 0xE0F9FB40, 0xE0FAFB40, 0xE0FBFB40, 0xE0FCFB40, 0xE0FDFB40, 0xE0FEFB40, 0xE0FFFB40, 0xE100FB40, 0xE101FB40, 0xE102FB40, 0xE103FB40, 0xE104FB40, 0xE105FB40, 0xE106FB40, 0xE107FB40, + 0xE108FB40, 0xE109FB40, 0xE10AFB40, 0xE10BFB40, 0xE10CFB40, 0xE10DFB40, 0xE10EFB40, 0xE10FFB40, 0xE110FB40, 0xE111FB40, 0xE112FB40, 0xE113FB40, 0xE114FB40, 0xE115FB40, 0xE116FB40, + 0xE117FB40, 0xE118FB40, 0xE119FB40, 0xE11AFB40, 0xE11BFB40, 0xE11CFB40, 0xE11DFB40, 0xE11EFB40, 0xE11FFB40, 0xE120FB40, 0xE121FB40, 0xE122FB40, 0xE123FB40, 0xE124FB40, 0xE125FB40, + 0xE126FB40, 0xE127FB40, 0xE128FB40, 0xE129FB40, 0xE12AFB40, 0xE12BFB40, 0xE12CFB40, 0xE12DFB40, 0xE12EFB40, 0xE12FFB40, 0xE130FB40, 0xE131FB40, 0xE132FB40, 0xE133FB40, 0xE134FB40, + 0xE135FB40, 0xE136FB40, 0xE137FB40, 0xE138FB40, 0xE139FB40, 0xE13AFB40, 0xE13BFB40, 0xE13CFB40, 0xE13DFB40, 0xE13EFB40, 0xE13FFB40, 0xE140FB40, 0xE141FB40, 0xE142FB40, 0xE143FB40, + 0xE144FB40, 0xE145FB40, 0xE146FB40, 0xE147FB40, 0xE148FB40, 0xE149FB40, 0xE14AFB40, 0xE14BFB40, 0xE14CFB40, 0xE14DFB40, 0xE14EFB40, 0xE14FFB40, 0xE150FB40, 0xE151FB40, 0xE152FB40, + 0xE153FB40, 0xE154FB40, 0xE155FB40, 0xE156FB40, 0xE157FB40, 0xE158FB40, 0xE159FB40, 0xE15AFB40, 0xE15BFB40, 0xE15CFB40, 0xE15DFB40, 0xE15EFB40, 0xE15FFB40, 0xE160FB40, 0xE161FB40, + 0xE162FB40, 0xE163FB40, 0xE164FB40, 0xE165FB40, 0xE166FB40, 0xE167FB40, 0xE168FB40, 0xE169FB40, 0xE16AFB40, 0xE16BFB40, 0xE16CFB40, 0xE16DFB40, 0xE16EFB40, 0xE16FFB40, 0xE170FB40, + 0xE171FB40, 0xE172FB40, 0xE173FB40, 0xE174FB40, 0xE175FB40, 0xE176FB40, 0xE177FB40, 0xE178FB40, 0xE179FB40, 0xE17AFB40, 0xE17BFB40, 0xE17CFB40, 0xE17DFB40, 0xE17EFB40, 0xE17FFB40, + 0xE180FB40, 0xE181FB40, 0xE182FB40, 0xE183FB40, 0xE184FB40, 0xE185FB40, 0xE186FB40, 0xE187FB40, 0xE188FB40, 0xE189FB40, 0xE18AFB40, 0xE18BFB40, 0xE18CFB40, 0xE18DFB40, 0xE18EFB40, + 0xE18FFB40, 0xE190FB40, 0xE191FB40, 0xE192FB40, 0xE193FB40, 0xE194FB40, 0xE195FB40, 0xE196FB40, 0xE197FB40, 0xE198FB40, 0xE199FB40, 0xE19AFB40, 0xE19BFB40, 0xE19CFB40, 0xE19DFB40, + 0xE19EFB40, 0xE19FFB40, 0xE1A0FB40, 0xE1A1FB40, 0xE1A2FB40, 0xE1A3FB40, 0xE1A4FB40, 0xE1A5FB40, 0xE1A6FB40, 0xE1A7FB40, 0xE1A8FB40, 0xE1A9FB40, 0xE1AAFB40, 0xE1ABFB40, 0xE1ACFB40, + 0xE1ADFB40, 0xE1AEFB40, 0xE1AFFB40, 0xE1B0FB40, 0xE1B1FB40, 0xE1B2FB40, 0xE1B3FB40, 0xE1B4FB40, 0xE1B5FB40, 0xE1B6FB40, 0xE1B7FB40, 0xE1B8FB40, 0xE1B9FB40, 0xE1BAFB40, 0xE1BBFB40, + 0xE1BCFB40, 0xE1BDFB40, 0xE1BEFB40, 0xE1BFFB40, 0xE1C0FB40, 0xE1C1FB40, 0xE1C2FB40, 0xE1C3FB40, 0xE1C4FB40, 0xE1C5FB40, 0xE1C6FB40, 0xE1C7FB40, 0xE1C8FB40, 0xE1C9FB40, 0xE1CAFB40, + 0xE1CBFB40, 0xE1CCFB40, 0xE1CDFB40, 0xE1CEFB40, 0xE1CFFB40, 0xE1D0FB40, 0xE1D1FB40, 0xE1D2FB40, 0xE1D3FB40, 0xE1D4FB40, 0xE1D5FB40, 0xE1D6FB40, 0xE1D7FB40, 0xE1D8FB40, 0xE1D9FB40, + 0xE1DAFB40, 0xE1DBFB40, 0xE1DCFB40, 0xE1DDFB40, 0xE1DEFB40, 0xE1DFFB40, 0xE1E0FB40, 0xE1E1FB40, 0xE1E2FB40, 0xE1E3FB40, 0xE1E4FB40, 0xE1E5FB40, 0xE1E6FB40, 0xE1E7FB40, 0xE1E8FB40, + 0xE1E9FB40, 0xE1EAFB40, 0xE1EBFB40, 0xE1ECFB40, 0xE1EDFB40, 0xE1EEFB40, 0xE1EFFB40, 0xE1F0FB40, 0xE1F1FB40, 0xE1F2FB40, 0xE1F3FB40, 0xE1F4FB40, 0xE1F5FB40, 0xE1F6FB40, 0xE1F7FB40, + 0xE1F8FB40, 0xE1F9FB40, 0xE1FAFB40, 0xE1FBFB40, 0xE1FCFB40, 0xE1FDFB40, 0xE1FEFB40, 0xE1FFFB40, 0xE200FB40, 0xE201FB40, 0xE202FB40, 0xE203FB40, 0xE204FB40, 0xE205FB40, 0xE206FB40, + 0xE207FB40, 0xE208FB40, 0xE209FB40, 0xE20AFB40, 0xE20BFB40, 0xE20CFB40, 0xE20DFB40, 0xE20EFB40, 0xE20FFB40, 0xE210FB40, 0xE211FB40, 0xE212FB40, 0xE213FB40, 0xE214FB40, 0xE215FB40, + 0xE216FB40, 0xE217FB40, 0xE218FB40, 0xE219FB40, 0xE21AFB40, 0xE21BFB40, 0xE21CFB40, 0xE21DFB40, 0xE21EFB40, 0xE21FFB40, 0xE220FB40, 0xE221FB40, 0xE222FB40, 0xE223FB40, 0xE224FB40, + 0xE225FB40, 0xE226FB40, 0xE227FB40, 0xE228FB40, 0xE229FB40, 0xE22AFB40, 0xE22BFB40, 0xE22CFB40, 0xE22DFB40, 0xE22EFB40, 0xE22FFB40, 0xE230FB40, 0xE231FB40, 0xE232FB40, 0xE233FB40, + 0xE234FB40, 0xE235FB40, 0xE236FB40, 0xE237FB40, 0xE238FB40, 0xE239FB40, 0xE23AFB40, 0xE23BFB40, 0xE23CFB40, 0xE23DFB40, 0xE23EFB40, 0xE23FFB40, 0xE240FB40, 0xE241FB40, 0xE242FB40, + 0xE243FB40, 0xE244FB40, 0xE245FB40, 0xE246FB40, 0xE247FB40, 0xE248FB40, 0xE249FB40, 0xE24AFB40, 0xE24BFB40, 0xE24CFB40, 0xE24DFB40, 0xE24EFB40, 0xE24FFB40, 0xE250FB40, 0xE251FB40, + 0xE252FB40, 0xE253FB40, 0xE254FB40, 0xE255FB40, 0xE256FB40, 0xE257FB40, 0xE258FB40, 0xE259FB40, 0xE25AFB40, 0xE25BFB40, 0xE25CFB40, 0xE25DFB40, 0xE25EFB40, 0xE25FFB40, 0xE260FB40, + 0xE261FB40, 0xE262FB40, 0xE263FB40, 0xE264FB40, 0xE265FB40, 0xE266FB40, 0xE267FB40, 0xE268FB40, 0xE269FB40, 0xE26AFB40, 0xE26BFB40, 0xE26CFB40, 0xE26DFB40, 0xE26EFB40, 0xE26FFB40, + 0xE270FB40, 0xE271FB40, 0xE272FB40, 0xE273FB40, 0xE274FB40, 0xE275FB40, 0xE276FB40, 0xE277FB40, 0xE278FB40, 0xE279FB40, 0xE27AFB40, 0xE27BFB40, 0xE27CFB40, 0xE27DFB40, 0xE27EFB40, + 0xE27FFB40, 0xE280FB40, 0xE281FB40, 0xE282FB40, 0xE283FB40, 0xE284FB40, 0xE285FB40, 0xE286FB40, 0xE287FB40, 0xE288FB40, 0xE289FB40, 0xE28AFB40, 0xE28BFB40, 0xE28CFB40, 0xE28DFB40, + 0xE28EFB40, 0xE28FFB40, 0xE290FB40, 0xE291FB40, 0xE292FB40, 0xE293FB40, 0xE294FB40, 0xE295FB40, 0xE296FB40, 0xE297FB40, 0xE298FB40, 0xE299FB40, 0xE29AFB40, 0xE29BFB40, 0xE29CFB40, + 0xE29DFB40, 0xE29EFB40, 0xE29FFB40, 0xE2A0FB40, 0xE2A1FB40, 0xE2A2FB40, 0xE2A3FB40, 0xE2A4FB40, 0xE2A5FB40, 0xE2A6FB40, 0xE2A7FB40, 0xE2A8FB40, 0xE2A9FB40, 0xE2AAFB40, 0xE2ABFB40, + 0xE2ACFB40, 0xE2ADFB40, 0xE2AEFB40, 0xE2AFFB40, 0xE2B0FB40, 0xE2B1FB40, 0xE2B2FB40, 0xE2B3FB40, 0xE2B4FB40, 0xE2B5FB40, 0xE2B6FB40, 0xE2B7FB40, 0xE2B8FB40, 0xE2B9FB40, 0xE2BAFB40, + 0xE2BBFB40, 0xE2BCFB40, 0xE2BDFB40, 0xE2BEFB40, 0xE2BFFB40, 0xE2C0FB40, 0xE2C1FB40, 0xE2C2FB40, 0xE2C3FB40, 0xE2C4FB40, 0xE2C5FB40, 0xE2C6FB40, 0xE2C7FB40, 0xE2C8FB40, 0xE2C9FB40, + 0xE2CAFB40, 0xE2CBFB40, 0xE2CCFB40, 0xE2CDFB40, 0xE2CEFB40, 0xE2CFFB40, 0xE2D0FB40, 0xE2D1FB40, 0xE2D2FB40, 0xE2D3FB40, 0xE2D4FB40, 0xE2D5FB40, 0xE2D6FB40, 0xE2D7FB40, 0xE2D8FB40, + 0xE2D9FB40, 0xE2DAFB40, 0xE2DBFB40, 0xE2DCFB40, 0xE2DDFB40, 0xE2DEFB40, 0xE2DFFB40, 0xE2E0FB40, 0xE2E1FB40, 0xE2E2FB40, 0xE2E3FB40, 0xE2E4FB40, 0xE2E5FB40, 0xE2E6FB40, 0xE2E7FB40, + 0xE2E8FB40, 0xE2E9FB40, 0xE2EAFB40, 0xE2EBFB40, 0xE2ECFB40, 0xE2EDFB40, 0xE2EEFB40, 0xE2EFFB40, 0xE2F0FB40, 0xE2F1FB40, 0xE2F2FB40, 0xE2F3FB40, 0xE2F4FB40, 0xE2F5FB40, 0xE2F6FB40, + 0xE2F7FB40, 0xE2F8FB40, 0xE2F9FB40, 0xE2FAFB40, 0xE2FBFB40, 0xE2FCFB40, 0xE2FDFB40, 0xE2FEFB40, 0xE2FFFB40, 0xE300FB40, 0xE301FB40, 0xE302FB40, 0xE303FB40, 0xE304FB40, 0xE305FB40, + 0xE306FB40, 0xE307FB40, 0xE308FB40, 0xE309FB40, 0xE30AFB40, 0xE30BFB40, 0xE30CFB40, 0xE30DFB40, 0xE30EFB40, 0xE30FFB40, 0xE310FB40, 0xE311FB40, 0xE312FB40, 0xE313FB40, 0xE314FB40, + 0xE315FB40, 0xE316FB40, 0xE317FB40, 0xE318FB40, 0xE319FB40, 0xE31AFB40, 0xE31BFB40, 0xE31CFB40, 0xE31DFB40, 0xE31EFB40, 0xE31FFB40, 0xE320FB40, 0xE321FB40, 0xE322FB40, 0xE323FB40, + 0xE324FB40, 0xE325FB40, 0xE326FB40, 0xE327FB40, 0xE328FB40, 0xE329FB40, 0xE32AFB40, 0xE32BFB40, 0xE32CFB40, 0xE32DFB40, 0xE32EFB40, 0xE32FFB40, 0xE330FB40, 0xE331FB40, 0xE332FB40, + 0xE333FB40, 0xE334FB40, 0xE335FB40, 0xE336FB40, 0xE337FB40, 0xE338FB40, 0xE339FB40, 0xE33AFB40, 0xE33BFB40, 0xE33CFB40, 0xE33DFB40, 0xE33EFB40, 0xE33FFB40, 0xE340FB40, 0xE341FB40, + 0xE342FB40, 0xE343FB40, 0xE344FB40, 0xE345FB40, 0xE346FB40, 0xE347FB40, 0xE348FB40, 0xE349FB40, 0xE34AFB40, 0xE34BFB40, 0xE34CFB40, 0xE34DFB40, 0xE34EFB40, 0xE34FFB40, 0xE350FB40, + 0xE351FB40, 0xE352FB40, 0xE353FB40, 0xE354FB40, 0xE355FB40, 0xE356FB40, 0xE357FB40, 0xE358FB40, 0xE359FB40, 0xE35AFB40, 0xE35BFB40, 0xE35CFB40, 0xE35DFB40, 0xE35EFB40, 0xE35FFB40, + 0xE360FB40, 0xE361FB40, 0xE362FB40, 0xE363FB40, 0xE364FB40, 0xE365FB40, 0xE366FB40, 0xE367FB40, 0xE368FB40, 0xE369FB40, 0xE36AFB40, 0xE36BFB40, 0xE36CFB40, 0xE36DFB40, 0xE36EFB40, + 0xE36FFB40, 0xE370FB40, 0xE371FB40, 0xE372FB40, 0xE373FB40, 0xE374FB40, 0xE375FB40, 0xE376FB40, 0xE377FB40, 0xE378FB40, 0xE379FB40, 0xE37AFB40, 0xE37BFB40, 0xE37CFB40, 0xE37DFB40, + 0xE37EFB40, 0xE37FFB40, 0xE380FB40, 0xE381FB40, 0xE382FB40, 0xE383FB40, 0xE384FB40, 0xE385FB40, 0xE386FB40, 0xE387FB40, 0xE388FB40, 0xE389FB40, 0xE38AFB40, 0xE38BFB40, 0xE38CFB40, + 0xE38DFB40, 0xE38EFB40, 0xE38FFB40, 0xE390FB40, 0xE391FB40, 0xE392FB40, 0xE393FB40, 0xE394FB40, 0xE395FB40, 0xE396FB40, 0xE397FB40, 0xE398FB40, 0xE399FB40, 0xE39AFB40, 0xE39BFB40, + 0xE39CFB40, 0xE39DFB40, 0xE39EFB40, 0xE39FFB40, 0xE3A0FB40, 0xE3A1FB40, 0xE3A2FB40, 0xE3A3FB40, 0xE3A4FB40, 0xE3A5FB40, 0xE3A6FB40, 0xE3A7FB40, 0xE3A8FB40, 0xE3A9FB40, 0xE3AAFB40, + 0xE3ABFB40, 0xE3ACFB40, 0xE3ADFB40, 0xE3AEFB40, 0xE3AFFB40, 0xE3B0FB40, 0xE3B1FB40, 0xE3B2FB40, 0xE3B3FB40, 0xE3B4FB40, 0xE3B5FB40, 0xE3B6FB40, 0xE3B7FB40, 0xE3B8FB40, 0xE3B9FB40, + 0xE3BAFB40, 0xE3BBFB40, 0xE3BCFB40, 0xE3BDFB40, 0xE3BEFB40, 0xE3BFFB40, 0xE3C0FB40, 0xE3C1FB40, 0xE3C2FB40, 0xE3C3FB40, 0xE3C4FB40, 0xE3C5FB40, 0xE3C6FB40, 0xE3C7FB40, 0xE3C8FB40, + 0xE3C9FB40, 0xE3CAFB40, 0xE3CBFB40, 0xE3CCFB40, 0xE3CDFB40, 0xE3CEFB40, 0xE3CFFB40, 0xE3D0FB40, 0xE3D1FB40, 0xE3D2FB40, 0xE3D3FB40, 0xE3D4FB40, 0xE3D5FB40, 0xE3D6FB40, 0xE3D7FB40, + 0xE3D8FB40, 0xE3D9FB40, 0xE3DAFB40, 0xE3DBFB40, 0xE3DCFB40, 0xE3DDFB40, 0xE3DEFB40, 0xE3DFFB40, 0xE3E0FB40, 0xE3E1FB40, 0xE3E2FB40, 0xE3E3FB40, 0xE3E4FB40, 0xE3E5FB40, 0xE3E6FB40, + 0xE3E7FB40, 0xE3E8FB40, 0xE3E9FB40, 0xE3EAFB40, 0xE3EBFB40, 0xE3ECFB40, 0xE3EDFB40, 0xE3EEFB40, 0xE3EFFB40, 0xE3F0FB40, 0xE3F1FB40, 0xE3F2FB40, 0xE3F3FB40, 0xE3F4FB40, 0xE3F5FB40, + 0xE3F6FB40, 0xE3F7FB40, 0xE3F8FB40, 0xE3F9FB40, 0xE3FAFB40, 0xE3FBFB40, 0xE3FCFB40, 0xE3FDFB40, 0xE3FEFB40, 0xE3FFFB40, 0xE400FB40, 0xE401FB40, 0xE402FB40, 0xE403FB40, 0xE404FB40, + 0xE405FB40, 0xE406FB40, 0xE407FB40, 0xE408FB40, 0xE409FB40, 0xE40AFB40, 0xE40BFB40, 0xE40CFB40, 0xE40DFB40, 0xE40EFB40, 0xE40FFB40, 0xE410FB40, 0xE411FB40, 0xE412FB40, 0xE413FB40, + 0xE414FB40, 0xE415FB40, 0xE416FB40, 0xE417FB40, 0xE418FB40, 0xE419FB40, 0xE41AFB40, 0xE41BFB40, 0xE41CFB40, 0xE41DFB40, 0xE41EFB40, 0xE41FFB40, 0xE420FB40, 0xE421FB40, 0xE422FB40, + 0xE423FB40, 0xE424FB40, 0xE425FB40, 0xE426FB40, 0xE427FB40, 0xE428FB40, 0xE429FB40, 0xE42AFB40, 0xE42BFB40, 0xE42CFB40, 0xE42DFB40, 0xE42EFB40, 0xE42FFB40, 0xE430FB40, 0xE431FB40, + 0xE432FB40, 0xE433FB40, 0xE434FB40, 0xE435FB40, 0xE436FB40, 0xE437FB40, 0xE438FB40, 0xE439FB40, 0xE43AFB40, 0xE43BFB40, 0xE43CFB40, 0xE43DFB40, 0xE43EFB40, 0xE43FFB40, 0xE440FB40, + 0xE441FB40, 0xE442FB40, 0xE443FB40, 0xE444FB40, 0xE445FB40, 0xE446FB40, 0xE447FB40, 0xE448FB40, 0xE449FB40, 0xE44AFB40, 0xE44BFB40, 0xE44CFB40, 0xE44DFB40, 0xE44EFB40, 0xE44FFB40, + 0xE450FB40, 0xE451FB40, 0xE452FB40, 0xE453FB40, 0xE454FB40, 0xE455FB40, 0xE456FB40, 0xE457FB40, 0xE458FB40, 0xE459FB40, 0xE45AFB40, 0xE45BFB40, 0xE45CFB40, 0xE45DFB40, 0xE45EFB40, + 0xE45FFB40, 0xE460FB40, 0xE461FB40, 0xE462FB40, 0xE463FB40, 0xE464FB40, 0xE465FB40, 0xE466FB40, 0xE467FB40, 0xE468FB40, 0xE469FB40, 0xE46AFB40, 0xE46BFB40, 0xE46CFB40, 0xE46DFB40, + 0xE46EFB40, 0xE46FFB40, 0xE470FB40, 0xE471FB40, 0xE472FB40, 0xE473FB40, 0xE474FB40, 0xE475FB40, 0xE476FB40, 0xE477FB40, 0xE478FB40, 0xE479FB40, 0xE47AFB40, 0xE47BFB40, 0xE47CFB40, + 0xE47DFB40, 0xE47EFB40, 0xE47FFB40, 0xE480FB40, 0xE481FB40, 0xE482FB40, 0xE483FB40, 0xE484FB40, 0xE485FB40, 0xE486FB40, 0xE487FB40, 0xE488FB40, 0xE489FB40, 0xE48AFB40, 0xE48BFB40, + 0xE48CFB40, 0xE48DFB40, 0xE48EFB40, 0xE48FFB40, 0xE490FB40, 0xE491FB40, 0xE492FB40, 0xE493FB40, 0xE494FB40, 0xE495FB40, 0xE496FB40, 0xE497FB40, 0xE498FB40, 0xE499FB40, 0xE49AFB40, + 0xE49BFB40, 0xE49CFB40, 0xE49DFB40, 0xE49EFB40, 0xE49FFB40, 0xE4A0FB40, 0xE4A1FB40, 0xE4A2FB40, 0xE4A3FB40, 0xE4A4FB40, 0xE4A5FB40, 0xE4A6FB40, 0xE4A7FB40, 0xE4A8FB40, 0xE4A9FB40, + 0xE4AAFB40, 0xE4ABFB40, 0xE4ACFB40, 0xE4ADFB40, 0xE4AEFB40, 0xE4AFFB40, 0xE4B0FB40, 0xE4B1FB40, 0xE4B2FB40, 0xE4B3FB40, 0xE4B4FB40, 0xE4B5FB40, 0xE4B6FB40, 0xE4B7FB40, 0xE4B8FB40, + 0xE4B9FB40, 0xE4BAFB40, 0xE4BBFB40, 0xE4BCFB40, 0xE4BDFB40, 0xE4BEFB40, 0xE4BFFB40, 0xE4C0FB40, 0xE4C1FB40, 0xE4C2FB40, 0xE4C3FB40, 0xE4C4FB40, 0xE4C5FB40, 0xE4C6FB40, 0xE4C7FB40, + 0xE4C8FB40, 0xE4C9FB40, 0xE4CAFB40, 0xE4CBFB40, 0xE4CCFB40, 0xE4CDFB40, 0xE4CEFB40, 0xE4CFFB40, 0xE4D0FB40, 0xE4D1FB40, 0xE4D2FB40, 0xE4D3FB40, 0xE4D4FB40, 0xE4D5FB40, 0xE4D6FB40, + 0xE4D7FB40, 0xE4D8FB40, 0xE4D9FB40, 0xE4DAFB40, 0xE4DBFB40, 0xE4DCFB40, 0xE4DDFB40, 0xE4DEFB40, 0xE4DFFB40, 0xE4E0FB40, 0xE4E1FB40, 0xE4E2FB40, 0xE4E3FB40, 0xE4E4FB40, 0xE4E5FB40, + 0xE4E6FB40, 0xE4E7FB40, 0xE4E8FB40, 0xE4E9FB40, 0xE4EAFB40, 0xE4EBFB40, 0xE4ECFB40, 0xE4EDFB40, 0xE4EEFB40, 0xE4EFFB40, 0xE4F0FB40, 0xE4F1FB40, 0xE4F2FB40, 0xE4F3FB40, 0xE4F4FB40, + 0xE4F5FB40, 0xE4F6FB40, 0xE4F7FB40, 0xE4F8FB40, 0xE4F9FB40, 0xE4FAFB40, 0xE4FBFB40, 0xE4FCFB40, 0xE4FDFB40, 0xE4FEFB40, 0xE4FFFB40, 0xE500FB40, 0xE501FB40, 0xE502FB40, 0xE503FB40, + 0xE504FB40, 0xE505FB40, 0xE506FB40, 0xE507FB40, 0xE508FB40, 0xE509FB40, 0xE50AFB40, 0xE50BFB40, 0xE50CFB40, 0xE50DFB40, 0xE50EFB40, 0xE50FFB40, 0xE510FB40, 0xE511FB40, 0xE512FB40, + 0xE513FB40, 0xE514FB40, 0xE515FB40, 0xE516FB40, 0xE517FB40, 0xE518FB40, 0xE519FB40, 0xE51AFB40, 0xE51BFB40, 0xE51CFB40, 0xE51DFB40, 0xE51EFB40, 0xE51FFB40, 0xE520FB40, 0xE521FB40, + 0xE522FB40, 0xE523FB40, 0xE524FB40, 0xE525FB40, 0xE526FB40, 0xE527FB40, 0xE528FB40, 0xE529FB40, 0xE52AFB40, 0xE52BFB40, 0xE52CFB40, 0xE52DFB40, 0xE52EFB40, 0xE52FFB40, 0xE530FB40, + 0xE531FB40, 0xE532FB40, 0xE533FB40, 0xE534FB40, 0xE535FB40, 0xE536FB40, 0xE537FB40, 0xE538FB40, 0xE539FB40, 0xE53AFB40, 0xE53BFB40, 0xE53CFB40, 0xE53DFB40, 0xE53EFB40, 0xE53FFB40, + 0xE540FB40, 0xE541FB40, 0xE542FB40, 0xE543FB40, 0xE544FB40, 0xE545FB40, 0xE546FB40, 0xE547FB40, 0xE548FB40, 0xE549FB40, 0xE54AFB40, 0xE54BFB40, 0xE54CFB40, 0xE54DFB40, 0xE54EFB40, + 0xE54FFB40, 0xE550FB40, 0xE551FB40, 0xE552FB40, 0xE553FB40, 0xE554FB40, 0xE555FB40, 0xE556FB40, 0xE557FB40, 0xE558FB40, 0xE559FB40, 0xE55AFB40, 0xE55BFB40, 0xE55CFB40, 0xE55DFB40, + 0xE55EFB40, 0xE55FFB40, 0xE560FB40, 0xE561FB40, 0xE562FB40, 0xE563FB40, 0xE564FB40, 0xE565FB40, 0xE566FB40, 0xE567FB40, 0xE568FB40, 0xE569FB40, 0xE56AFB40, 0xE56BFB40, 0xE56CFB40, + 0xE56DFB40, 0xE56EFB40, 0xE56FFB40, 0xE570FB40, 0xE571FB40, 0xE572FB40, 0xE573FB40, 0xE574FB40, 0xE575FB40, 0xE576FB40, 0xE577FB40, 0xE578FB40, 0xE579FB40, 0xE57AFB40, 0xE57BFB40, + 0xE57CFB40, 0xE57DFB40, 0xE57EFB40, 0xE57FFB40, 0xE580FB40, 0xE581FB40, 0xE582FB40, 0xE583FB40, 0xE584FB40, 0xE585FB40, 0xE586FB40, 0xE587FB40, 0xE588FB40, 0xE589FB40, 0xE58AFB40, + 0xE58BFB40, 0xE58CFB40, 0xE58DFB40, 0xE58EFB40, 0xE58FFB40, 0xE590FB40, 0xE591FB40, 0xE592FB40, 0xE593FB40, 0xE594FB40, 0xE595FB40, 0xE596FB40, 0xE597FB40, 0xE598FB40, 0xE599FB40, + 0xE59AFB40, 0xE59BFB40, 0xE59CFB40, 0xE59DFB40, 0xE59EFB40, 0xE59FFB40, 0xE5A0FB40, 0xE5A1FB40, 0xE5A2FB40, 0xE5A3FB40, 0xE5A4FB40, 0xE5A5FB40, 0xE5A6FB40, 0xE5A7FB40, 0xE5A8FB40, + 0xE5A9FB40, 0xE5AAFB40, 0xE5ABFB40, 0xE5ACFB40, 0xE5ADFB40, 0xE5AEFB40, 0xE5AFFB40, 0xE5B0FB40, 0xE5B1FB40, 0xE5B2FB40, 0xE5B3FB40, 0xE5B4FB40, 0xE5B5FB40, 0xE5B6FB40, 0xE5B7FB40, + 0xE5B8FB40, 0xE5B9FB40, 0xE5BAFB40, 0xE5BBFB40, 0xE5BCFB40, 0xE5BDFB40, 0xE5BEFB40, 0xE5BFFB40, 0xE5C0FB40, 0xE5C1FB40, 0xE5C2FB40, 0xE5C3FB40, 0xE5C4FB40, 0xE5C5FB40, 0xE5C6FB40, + 0xE5C7FB40, 0xE5C8FB40, 0xE5C9FB40, 0xE5CAFB40, 0xE5CBFB40, 0xE5CCFB40, 0xE5CDFB40, 0xE5CEFB40, 0xE5CFFB40, 0xE5D0FB40, 0xE5D1FB40, 0xE5D2FB40, 0xE5D3FB40, 0xE5D4FB40, 0xE5D5FB40, + 0xE5D6FB40, 0xE5D7FB40, 0xE5D8FB40, 0xE5D9FB40, 0xE5DAFB40, 0xE5DBFB40, 0xE5DCFB40, 0xE5DDFB40, 0xE5DEFB40, 0xE5DFFB40, 0xE5E0FB40, 0xE5E1FB40, 0xE5E2FB40, 0xE5E3FB40, 0xE5E4FB40, + 0xE5E5FB40, 0xE5E6FB40, 0xE5E7FB40, 0xE5E8FB40, 0xE5E9FB40, 0xE5EAFB40, 0xE5EBFB40, 0xE5ECFB40, 0xE5EDFB40, 0xE5EEFB40, 0xE5EFFB40, 0xE5F0FB40, 0xE5F1FB40, 0xE5F2FB40, 0xE5F3FB40, + 0xE5F4FB40, 0xE5F5FB40, 0xE5F6FB40, 0xE5F7FB40, 0xE5F8FB40, 0xE5F9FB40, 0xE5FAFB40, 0xE5FBFB40, 0xE5FCFB40, 0xE5FDFB40, 0xE5FEFB40, 0xE5FFFB40, 0xE600FB40, 0xE601FB40, 0xE602FB40, + 0xE603FB40, 0xE604FB40, 0xE605FB40, 0xE606FB40, 0xE607FB40, 0xE608FB40, 0xE609FB40, 0xE60AFB40, 0xE60BFB40, 0xE60CFB40, 0xE60DFB40, 0xE60EFB40, 0xE60FFB40, 0xE610FB40, 0xE611FB40, + 0xE612FB40, 0xE613FB40, 0xE614FB40, 0xE615FB40, 0xE616FB40, 0xE617FB40, 0xE618FB40, 0xE619FB40, 0xE61AFB40, 0xE61BFB40, 0xE61CFB40, 0xE61DFB40, 0xE61EFB40, 0xE61FFB40, 0xE620FB40, + 0xE621FB40, 0xE622FB40, 0xE623FB40, 0xE624FB40, 0xE625FB40, 0xE626FB40, 0xE627FB40, 0xE628FB40, 0xE629FB40, 0xE62AFB40, 0xE62BFB40, 0xE62CFB40, 0xE62DFB40, 0xE62EFB40, 0xE62FFB40, + 0xE630FB40, 0xE631FB40, 0xE632FB40, 0xE633FB40, 0xE634FB40, 0xE635FB40, 0xE636FB40, 0xE637FB40, 0xE638FB40, 0xE639FB40, 0xE63AFB40, 0xE63BFB40, 0xE63CFB40, 0xE63DFB40, 0xE63EFB40, + 0xE63FFB40, 0xE640FB40, 0xE641FB40, 0xE642FB40, 0xE643FB40, 0xE644FB40, 0xE645FB40, 0xE646FB40, 0xE647FB40, 0xE648FB40, 0xE649FB40, 0xE64AFB40, 0xE64BFB40, 0xE64CFB40, 0xE64DFB40, + 0xE64EFB40, 0xE64FFB40, 0xE650FB40, 0xE651FB40, 0xE652FB40, 0xE653FB40, 0xE654FB40, 0xE655FB40, 0xE656FB40, 0xE657FB40, 0xE658FB40, 0xE659FB40, 0xE65AFB40, 0xE65BFB40, 0xE65CFB40, + 0xE65DFB40, 0xE65EFB40, 0xE65FFB40, 0xE660FB40, 0xE661FB40, 0xE662FB40, 0xE663FB40, 0xE664FB40, 0xE665FB40, 0xE666FB40, 0xE667FB40, 0xE668FB40, 0xE669FB40, 0xE66AFB40, 0xE66BFB40, + 0xE66CFB40, 0xE66DFB40, 0xE66EFB40, 0xE66FFB40, 0xE670FB40, 0xE671FB40, 0xE672FB40, 0xE673FB40, 0xE674FB40, 0xE675FB40, 0xE676FB40, 0xE677FB40, 0xE678FB40, 0xE679FB40, 0xE67AFB40, + 0xE67BFB40, 0xE67CFB40, 0xE67DFB40, 0xE67EFB40, 0xE67FFB40, 0xE680FB40, 0xE681FB40, 0xE682FB40, 0xE683FB40, 0xE684FB40, 0xE685FB40, 0xE686FB40, 0xE687FB40, 0xE688FB40, 0xE689FB40, + 0xE68AFB40, 0xE68BFB40, 0xE68CFB40, 0xE68DFB40, 0xE68EFB40, 0xE68FFB40, 0xE690FB40, 0xE691FB40, 0xE692FB40, 0xE693FB40, 0xE694FB40, 0xE695FB40, 0xE696FB40, 0xE697FB40, 0xE698FB40, + 0xE699FB40, 0xE69AFB40, 0xE69BFB40, 0xE69CFB40, 0xE69DFB40, 0xE69EFB40, 0xE69FFB40, 0xE6A0FB40, 0xE6A1FB40, 0xE6A2FB40, 0xE6A3FB40, 0xE6A4FB40, 0xE6A5FB40, 0xE6A6FB40, 0xE6A7FB40, + 0xE6A8FB40, 0xE6A9FB40, 0xE6AAFB40, 0xE6ABFB40, 0xE6ACFB40, 0xE6ADFB40, 0xE6AEFB40, 0xE6AFFB40, 0xE6B0FB40, 0xE6B1FB40, 0xE6B2FB40, 0xE6B3FB40, 0xE6B4FB40, 0xE6B5FB40, 0xE6B6FB40, + 0xE6B7FB40, 0xE6B8FB40, 0xE6B9FB40, 0xE6BAFB40, 0xE6BBFB40, 0xE6BCFB40, 0xE6BDFB40, 0xE6BEFB40, 0xE6BFFB40, 0xE6C0FB40, 0xE6C1FB40, 0xE6C2FB40, 0xE6C3FB40, 0xE6C4FB40, 0xE6C5FB40, + 0xE6C6FB40, 0xE6C7FB40, 0xE6C8FB40, 0xE6C9FB40, 0xE6CAFB40, 0xE6CBFB40, 0xE6CCFB40, 0xE6CDFB40, 0xE6CEFB40, 0xE6CFFB40, 0xE6D0FB40, 0xE6D1FB40, 0xE6D2FB40, 0xE6D3FB40, 0xE6D4FB40, + 0xE6D5FB40, 0xE6D6FB40, 0xE6D7FB40, 0xE6D8FB40, 0xE6D9FB40, 0xE6DAFB40, 0xE6DBFB40, 0xE6DCFB40, 0xE6DDFB40, 0xE6DEFB40, 0xE6DFFB40, 0xE6E0FB40, 0xE6E1FB40, 0xE6E2FB40, 0xE6E3FB40, + 0xE6E4FB40, 0xE6E5FB40, 0xE6E6FB40, 0xE6E7FB40, 0xE6E8FB40, 0xE6E9FB40, 0xE6EAFB40, 0xE6EBFB40, 0xE6ECFB40, 0xE6EDFB40, 0xE6EEFB40, 0xE6EFFB40, 0xE6F0FB40, 0xE6F1FB40, 0xE6F2FB40, + 0xE6F3FB40, 0xE6F4FB40, 0xE6F5FB40, 0xE6F6FB40, 0xE6F7FB40, 0xE6F8FB40, 0xE6F9FB40, 0xE6FAFB40, 0xE6FBFB40, 0xE6FCFB40, 0xE6FDFB40, 0xE6FEFB40, 0xE6FFFB40, 0xE700FB40, 0xE701FB40, + 0xE702FB40, 0xE703FB40, 0xE704FB40, 0xE705FB40, 0xE706FB40, 0xE707FB40, 0xE708FB40, 0xE709FB40, 0xE70AFB40, 0xE70BFB40, 0xE70CFB40, 0xE70DFB40, 0xE70EFB40, 0xE70FFB40, 0xE710FB40, + 0xE711FB40, 0xE712FB40, 0xE713FB40, 0xE714FB40, 0xE715FB40, 0xE716FB40, 0xE717FB40, 0xE718FB40, 0xE719FB40, 0xE71AFB40, 0xE71BFB40, 0xE71CFB40, 0xE71DFB40, 0xE71EFB40, 0xE71FFB40, + 0xE720FB40, 0xE721FB40, 0xE722FB40, 0xE723FB40, 0xE724FB40, 0xE725FB40, 0xE726FB40, 0xE727FB40, 0xE728FB40, 0xE729FB40, 0xE72AFB40, 0xE72BFB40, 0xE72CFB40, 0xE72DFB40, 0xE72EFB40, + 0xE72FFB40, 0xE730FB40, 0xE731FB40, 0xE732FB40, 0xE733FB40, 0xE734FB40, 0xE735FB40, 0xE736FB40, 0xE737FB40, 0xE738FB40, 0xE739FB40, 0xE73AFB40, 0xE73BFB40, 0xE73CFB40, 0xE73DFB40, + 0xE73EFB40, 0xE73FFB40, 0xE740FB40, 0xE741FB40, 0xE742FB40, 0xE743FB40, 0xE744FB40, 0xE745FB40, 0xE746FB40, 0xE747FB40, 0xE748FB40, 0xE749FB40, 0xE74AFB40, 0xE74BFB40, 0xE74CFB40, + 0xE74DFB40, 0xE74EFB40, 0xE74FFB40, 0xE750FB40, 0xE751FB40, 0xE752FB40, 0xE753FB40, 0xE754FB40, 0xE755FB40, 0xE756FB40, 0xE757FB40, 0xE758FB40, 0xE759FB40, 0xE75AFB40, 0xE75BFB40, + 0xE75CFB40, 0xE75DFB40, 0xE75EFB40, 0xE75FFB40, 0xE760FB40, 0xE761FB40, 0xE762FB40, 0xE763FB40, 0xE764FB40, 0xE765FB40, 0xE766FB40, 0xE767FB40, 0xE768FB40, 0xE769FB40, 0xE76AFB40, + 0xE76BFB40, 0xE76CFB40, 0xE76DFB40, 0xE76EFB40, 0xE76FFB40, 0xE770FB40, 0xE771FB40, 0xE772FB40, 0xE773FB40, 0xE774FB40, 0xE775FB40, 0xE776FB40, 0xE777FB40, 0xE778FB40, 0xE779FB40, + 0xE77AFB40, 0xE77BFB40, 0xE77CFB40, 0xE77DFB40, 0xE77EFB40, 0xE77FFB40, 0xE780FB40, 0xE781FB40, 0xE782FB40, 0xE783FB40, 0xE784FB40, 0xE785FB40, 0xE786FB40, 0xE787FB40, 0xE788FB40, + 0xE789FB40, 0xE78AFB40, 0xE78BFB40, 0xE78CFB40, 0xE78DFB40, 0xE78EFB40, 0xE78FFB40, 0xE790FB40, 0xE791FB40, 0xE792FB40, 0xE793FB40, 0xE794FB40, 0xE795FB40, 0xE796FB40, 0xE797FB40, + 0xE798FB40, 0xE799FB40, 0xE79AFB40, 0xE79BFB40, 0xE79CFB40, 0xE79DFB40, 0xE79EFB40, 0xE79FFB40, 0xE7A0FB40, 0xE7A1FB40, 0xE7A2FB40, 0xE7A3FB40, 0xE7A4FB40, 0xE7A5FB40, 0xE7A6FB40, + 0xE7A7FB40, 0xE7A8FB40, 0xE7A9FB40, 0xE7AAFB40, 0xE7ABFB40, 0xE7ACFB40, 0xE7ADFB40, 0xE7AEFB40, 0xE7AFFB40, 0xE7B0FB40, 0xE7B1FB40, 0xE7B2FB40, 0xE7B3FB40, 0xE7B4FB40, 0xE7B5FB40, + 0xE7B6FB40, 0xE7B7FB40, 0xE7B8FB40, 0xE7B9FB40, 0xE7BAFB40, 0xE7BBFB40, 0xE7BCFB40, 0xE7BDFB40, 0xE7BEFB40, 0xE7BFFB40, 0xE7C0FB40, 0xE7C1FB40, 0xE7C2FB40, 0xE7C3FB40, 0xE7C4FB40, + 0xE7C5FB40, 0xE7C6FB40, 0xE7C7FB40, 0xE7C8FB40, 0xE7C9FB40, 0xE7CAFB40, 0xE7CBFB40, 0xE7CCFB40, 0xE7CDFB40, 0xE7CEFB40, 0xE7CFFB40, 0xE7D0FB40, 0xE7D1FB40, 0xE7D2FB40, 0xE7D3FB40, + 0xE7D4FB40, 0xE7D5FB40, 0xE7D6FB40, 0xE7D7FB40, 0xE7D8FB40, 0xE7D9FB40, 0xE7DAFB40, 0xE7DBFB40, 0xE7DCFB40, 0xE7DDFB40, 0xE7DEFB40, 0xE7DFFB40, 0xE7E0FB40, 0xE7E1FB40, 0xE7E2FB40, + 0xE7E3FB40, 0xE7E4FB40, 0xE7E5FB40, 0xE7E6FB40, 0xE7E7FB40, 0xE7E8FB40, 0xE7E9FB40, 0xE7EAFB40, 0xE7EBFB40, 0xE7ECFB40, 0xE7EDFB40, 0xE7EEFB40, 0xE7EFFB40, 0xE7F0FB40, 0xE7F1FB40, + 0xE7F2FB40, 0xE7F3FB40, 0xE7F4FB40, 0xE7F5FB40, 0xE7F6FB40, 0xE7F7FB40, 0xE7F8FB40, 0xE7F9FB40, 0xE7FAFB40, 0xE7FBFB40, 0xE7FCFB40, 0xE7FDFB40, 0xE7FEFB40, 0xE7FFFB40, 0xE800FB40, + 0xE801FB40, 0xE802FB40, 0xE803FB40, 0xE804FB40, 0xE805FB40, 0xE806FB40, 0xE807FB40, 0xE808FB40, 0xE809FB40, 0xE80AFB40, 0xE80BFB40, 0xE80CFB40, 0xE80DFB40, 0xE80EFB40, 0xE80FFB40, + 0xE810FB40, 0xE811FB40, 0xE812FB40, 0xE813FB40, 0xE814FB40, 0xE815FB40, 0xE816FB40, 0xE817FB40, 0xE818FB40, 0xE819FB40, 0xE81AFB40, 0xE81BFB40, 0xE81CFB40, 0xE81DFB40, 0xE81EFB40, + 0xE81FFB40, 0xE820FB40, 0xE821FB40, 0xE822FB40, 0xE823FB40, 0xE824FB40, 0xE825FB40, 0xE826FB40, 0xE827FB40, 0xE828FB40, 0xE829FB40, 0xE82AFB40, 0xE82BFB40, 0xE82CFB40, 0xE82DFB40, + 0xE82EFB40, 0xE82FFB40, 0xE830FB40, 0xE831FB40, 0xE832FB40, 0xE833FB40, 0xE834FB40, 0xE835FB40, 0xE836FB40, 0xE837FB40, 0xE838FB40, 0xE839FB40, 0xE83AFB40, 0xE83BFB40, 0xE83CFB40, + 0xE83DFB40, 0xE83EFB40, 0xE83FFB40, 0xE840FB40, 0xE841FB40, 0xE842FB40, 0xE843FB40, 0xE844FB40, 0xE845FB40, 0xE846FB40, 0xE847FB40, 0xE848FB40, 0xE849FB40, 0xE84AFB40, 0xE84BFB40, + 0xE84CFB40, 0xE84DFB40, 0xE84EFB40, 0xE84FFB40, 0xE850FB40, 0xE851FB40, 0xE852FB40, 0xE853FB40, 0xE854FB40, 0xE855FB40, 0xE856FB40, 0xE857FB40, 0xE858FB40, 0xE859FB40, 0xE85AFB40, + 0xE85BFB40, 0xE85CFB40, 0xE85DFB40, 0xE85EFB40, 0xE85FFB40, 0xE860FB40, 0xE861FB40, 0xE862FB40, 0xE863FB40, 0xE864FB40, 0xE865FB40, 0xE866FB40, 0xE867FB40, 0xE868FB40, 0xE869FB40, + 0xE86AFB40, 0xE86BFB40, 0xE86CFB40, 0xE86DFB40, 0xE86EFB40, 0xE86FFB40, 0xE870FB40, 0xE871FB40, 0xE872FB40, 0xE873FB40, 0xE874FB40, 0xE875FB40, 0xE876FB40, 0xE877FB40, 0xE878FB40, + 0xE879FB40, 0xE87AFB40, 0xE87BFB40, 0xE87CFB40, 0xE87DFB40, 0xE87EFB40, 0xE87FFB40, 0xE880FB40, 0xE881FB40, 0xE882FB40, 0xE883FB40, 0xE884FB40, 0xE885FB40, 0xE886FB40, 0xE887FB40, + 0xE888FB40, 0xE889FB40, 0xE88AFB40, 0xE88BFB40, 0xE88CFB40, 0xE88DFB40, 0xE88EFB40, 0xE88FFB40, 0xE890FB40, 0xE891FB40, 0xE892FB40, 0xE893FB40, 0xE894FB40, 0xE895FB40, 0xE896FB40, + 0xE897FB40, 0xE898FB40, 0xE899FB40, 0xE89AFB40, 0xE89BFB40, 0xE89CFB40, 0xE89DFB40, 0xE89EFB40, 0xE89FFB40, 0xE8A0FB40, 0xE8A1FB40, 0xE8A2FB40, 0xE8A3FB40, 0xE8A4FB40, 0xE8A5FB40, + 0xE8A6FB40, 0xE8A7FB40, 0xE8A8FB40, 0xE8A9FB40, 0xE8AAFB40, 0xE8ABFB40, 0xE8ACFB40, 0xE8ADFB40, 0xE8AEFB40, 0xE8AFFB40, 0xE8B0FB40, 0xE8B1FB40, 0xE8B2FB40, 0xE8B3FB40, 0xE8B4FB40, + 0xE8B5FB40, 0xE8B6FB40, 0xE8B7FB40, 0xE8B8FB40, 0xE8B9FB40, 0xE8BAFB40, 0xE8BBFB40, 0xE8BCFB40, 0xE8BDFB40, 0xE8BEFB40, 0xE8BFFB40, 0xE8C0FB40, 0xE8C1FB40, 0xE8C2FB40, 0xE8C3FB40, + 0xE8C4FB40, 0xE8C5FB40, 0xE8C6FB40, 0xE8C7FB40, 0xE8C8FB40, 0xE8C9FB40, 0xE8CAFB40, 0xE8CBFB40, 0xE8CCFB40, 0xE8CDFB40, 0xE8CEFB40, 0xE8CFFB40, 0xE8D0FB40, 0xE8D1FB40, 0xE8D2FB40, + 0xE8D3FB40, 0xE8D4FB40, 0xE8D5FB40, 0xE8D6FB40, 0xE8D7FB40, 0xE8D8FB40, 0xE8D9FB40, 0xE8DAFB40, 0xE8DBFB40, 0xE8DCFB40, 0xE8DDFB40, 0xE8DEFB40, 0xE8DFFB40, 0xE8E0FB40, 0xE8E1FB40, + 0xE8E2FB40, 0xE8E3FB40, 0xE8E4FB40, 0xE8E5FB40, 0xE8E6FB40, 0xE8E7FB40, 0xE8E8FB40, 0xE8E9FB40, 0xE8EAFB40, 0xE8EBFB40, 0xE8ECFB40, 0xE8EDFB40, 0xE8EEFB40, 0xE8EFFB40, 0xE8F0FB40, + 0xE8F1FB40, 0xE8F2FB40, 0xE8F3FB40, 0xE8F4FB40, 0xE8F5FB40, 0xE8F6FB40, 0xE8F7FB40, 0xE8F8FB40, 0xE8F9FB40, 0xE8FAFB40, 0xE8FBFB40, 0xE8FCFB40, 0xE8FDFB40, 0xE8FEFB40, 0xE8FFFB40, + 0xE900FB40, 0xE901FB40, 0xE902FB40, 0xE903FB40, 0xE904FB40, 0xE905FB40, 0xE906FB40, 0xE907FB40, 0xE908FB40, 0xE909FB40, 0xE90AFB40, 0xE90BFB40, 0xE90CFB40, 0xE90DFB40, 0xE90EFB40, + 0xE90FFB40, 0xE910FB40, 0xE911FB40, 0xE912FB40, 0xE913FB40, 0xE914FB40, 0xE915FB40, 0xE916FB40, 0xE917FB40, 0xE918FB40, 0xE919FB40, 0xE91AFB40, 0xE91BFB40, 0xE91CFB40, 0xE91DFB40, + 0xE91EFB40, 0xE91FFB40, 0xE920FB40, 0xE921FB40, 0xE922FB40, 0xE923FB40, 0xE924FB40, 0xE925FB40, 0xE926FB40, 0xE927FB40, 0xE928FB40, 0xE929FB40, 0xE92AFB40, 0xE92BFB40, 0xE92CFB40, + 0xE92DFB40, 0xE92EFB40, 0xE92FFB40, 0xE930FB40, 0xE931FB40, 0xE932FB40, 0xE933FB40, 0xE934FB40, 0xE935FB40, 0xE936FB40, 0xE937FB40, 0xE938FB40, 0xE939FB40, 0xE93AFB40, 0xE93BFB40, + 0xE93CFB40, 0xE93DFB40, 0xE93EFB40, 0xE93FFB40, 0xE940FB40, 0xE941FB40, 0xE942FB40, 0xE943FB40, 0xE944FB40, 0xE945FB40, 0xE946FB40, 0xE947FB40, 0xE948FB40, 0xE949FB40, 0xE94AFB40, + 0xE94BFB40, 0xE94CFB40, 0xE94DFB40, 0xE94EFB40, 0xE94FFB40, 0xE950FB40, 0xE951FB40, 0xE952FB40, 0xE953FB40, 0xE954FB40, 0xE955FB40, 0xE956FB40, 0xE957FB40, 0xE958FB40, 0xE959FB40, + 0xE95AFB40, 0xE95BFB40, 0xE95CFB40, 0xE95DFB40, 0xE95EFB40, 0xE95FFB40, 0xE960FB40, 0xE961FB40, 0xE962FB40, 0xE963FB40, 0xE964FB40, 0xE965FB40, 0xE966FB40, 0xE967FB40, 0xE968FB40, + 0xE969FB40, 0xE96AFB40, 0xE96BFB40, 0xE96CFB40, 0xE96DFB40, 0xE96EFB40, 0xE96FFB40, 0xE970FB40, 0xE971FB40, 0xE972FB40, 0xE973FB40, 0xE974FB40, 0xE975FB40, 0xE976FB40, 0xE977FB40, + 0xE978FB40, 0xE979FB40, 0xE97AFB40, 0xE97BFB40, 0xE97CFB40, 0xE97DFB40, 0xE97EFB40, 0xE97FFB40, 0xE980FB40, 0xE981FB40, 0xE982FB40, 0xE983FB40, 0xE984FB40, 0xE985FB40, 0xE986FB40, + 0xE987FB40, 0xE988FB40, 0xE989FB40, 0xE98AFB40, 0xE98BFB40, 0xE98CFB40, 0xE98DFB40, 0xE98EFB40, 0xE98FFB40, 0xE990FB40, 0xE991FB40, 0xE992FB40, 0xE993FB40, 0xE994FB40, 0xE995FB40, + 0xE996FB40, 0xE997FB40, 0xE998FB40, 0xE999FB40, 0xE99AFB40, 0xE99BFB40, 0xE99CFB40, 0xE99DFB40, 0xE99EFB40, 0xE99FFB40, 0xE9A0FB40, 0xE9A1FB40, 0xE9A2FB40, 0xE9A3FB40, 0xE9A4FB40, + 0xE9A5FB40, 0xE9A6FB40, 0xE9A7FB40, 0xE9A8FB40, 0xE9A9FB40, 0xE9AAFB40, 0xE9ABFB40, 0xE9ACFB40, 0xE9ADFB40, 0xE9AEFB40, 0xE9AFFB40, 0xE9B0FB40, 0xE9B1FB40, 0xE9B2FB40, 0xE9B3FB40, + 0xE9B4FB40, 0xE9B5FB40, 0xE9B6FB40, 0xE9B7FB40, 0xE9B8FB40, 0xE9B9FB40, 0xE9BAFB40, 0xE9BBFB40, 0xE9BCFB40, 0xE9BDFB40, 0xE9BEFB40, 0xE9BFFB40, 0xE9C0FB40, 0xE9C1FB40, 0xE9C2FB40, + 0xE9C3FB40, 0xE9C4FB40, 0xE9C5FB40, 0xE9C6FB40, 0xE9C7FB40, 0xE9C8FB40, 0xE9C9FB40, 0xE9CAFB40, 0xE9CBFB40, 0xE9CCFB40, 0xE9CDFB40, 0xE9CEFB40, 0xE9CFFB40, 0xE9D0FB40, 0xE9D1FB40, + 0xE9D2FB40, 0xE9D3FB40, 0xE9D4FB40, 0xE9D5FB40, 0xE9D6FB40, 0xE9D7FB40, 0xE9D8FB40, 0xE9D9FB40, 0xE9DAFB40, 0xE9DBFB40, 0xE9DCFB40, 0xE9DDFB40, 0xE9DEFB40, 0xE9DFFB40, 0xE9E0FB40, + 0xE9E1FB40, 0xE9E2FB40, 0xE9E3FB40, 0xE9E4FB40, 0xE9E5FB40, 0xE9E6FB40, 0xE9E7FB40, 0xE9E8FB40, 0xE9E9FB40, 0xE9EAFB40, 0xE9EBFB40, 0xE9ECFB40, 0xE9EDFB40, 0xE9EEFB40, 0xE9EFFB40, + 0xE9F0FB40, 0xE9F1FB40, 0xE9F2FB40, 0xE9F3FB40, 0xE9F4FB40, 0xE9F5FB40, 0xE9F6FB40, 0xE9F7FB40, 0xE9F8FB40, 0xE9F9FB40, 0xE9FAFB40, 0xE9FBFB40, 0xE9FCFB40, 0xE9FDFB40, 0xE9FEFB40, + 0xE9FFFB40, 0xEA00FB40, 0xEA01FB40, 0xEA02FB40, 0xEA03FB40, 0xEA04FB40, 0xEA05FB40, 0xEA06FB40, 0xEA07FB40, 0xEA08FB40, 0xEA09FB40, 0xEA0AFB40, 0xEA0BFB40, 0xEA0CFB40, 0xEA0DFB40, + 0xEA0EFB40, 0xEA0FFB40, 0xEA10FB40, 0xEA11FB40, 0xEA12FB40, 0xEA13FB40, 0xEA14FB40, 0xEA15FB40, 0xEA16FB40, 0xEA17FB40, 0xEA18FB40, 0xEA19FB40, 0xEA1AFB40, 0xEA1BFB40, 0xEA1CFB40, + 0xEA1DFB40, 0xEA1EFB40, 0xEA1FFB40, 0xEA20FB40, 0xEA21FB40, 0xEA22FB40, 0xEA23FB40, 0xEA24FB40, 0xEA25FB40, 0xEA26FB40, 0xEA27FB40, 0xEA28FB40, 0xEA29FB40, 0xEA2AFB40, 0xEA2BFB40, + 0xEA2CFB40, 0xEA2DFB40, 0xEA2EFB40, 0xEA2FFB40, 0xEA30FB40, 0xEA31FB40, 0xEA32FB40, 0xEA33FB40, 0xEA34FB40, 0xEA35FB40, 0xEA36FB40, 0xEA37FB40, 0xEA38FB40, 0xEA39FB40, 0xEA3AFB40, + 0xEA3BFB40, 0xEA3CFB40, 0xEA3DFB40, 0xEA3EFB40, 0xEA3FFB40, 0xEA40FB40, 0xEA41FB40, 0xEA42FB40, 0xEA43FB40, 0xEA44FB40, 0xEA45FB40, 0xEA46FB40, 0xEA47FB40, 0xEA48FB40, 0xEA49FB40, + 0xEA4AFB40, 0xEA4BFB40, 0xEA4CFB40, 0xEA4DFB40, 0xEA4EFB40, 0xEA4FFB40, 0xEA50FB40, 0xEA51FB40, 0xEA52FB40, 0xEA53FB40, 0xEA54FB40, 0xEA55FB40, 0xEA56FB40, 0xEA57FB40, 0xEA58FB40, + 0xEA59FB40, 0xEA5AFB40, 0xEA5BFB40, 0xEA5CFB40, 0xEA5DFB40, 0xEA5EFB40, 0xEA5FFB40, 0xEA60FB40, 0xEA61FB40, 0xEA62FB40, 0xEA63FB40, 0xEA64FB40, 0xEA65FB40, 0xEA66FB40, 0xEA67FB40, + 0xEA68FB40, 0xEA69FB40, 0xEA6AFB40, 0xEA6BFB40, 0xEA6CFB40, 0xEA6DFB40, 0xEA6EFB40, 0xEA6FFB40, 0xEA70FB40, 0xEA71FB40, 0xEA72FB40, 0xEA73FB40, 0xEA74FB40, 0xEA75FB40, 0xEA76FB40, + 0xEA77FB40, 0xEA78FB40, 0xEA79FB40, 0xEA7AFB40, 0xEA7BFB40, 0xEA7CFB40, 0xEA7DFB40, 0xEA7EFB40, 0xEA7FFB40, 0xEA80FB40, 0xEA81FB40, 0xEA82FB40, 0xEA83FB40, 0xEA84FB40, 0xEA85FB40, + 0xEA86FB40, 0xEA87FB40, 0xEA88FB40, 0xEA89FB40, 0xEA8AFB40, 0xEA8BFB40, 0xEA8CFB40, 0xEA8DFB40, 0xEA8EFB40, 0xEA8FFB40, 0xEA90FB40, 0xEA91FB40, 0xEA92FB40, 0xEA93FB40, 0xEA94FB40, + 0xEA95FB40, 0xEA96FB40, 0xEA97FB40, 0xEA98FB40, 0xEA99FB40, 0xEA9AFB40, 0xEA9BFB40, 0xEA9CFB40, 0xEA9DFB40, 0xEA9EFB40, 0xEA9FFB40, 0xEAA0FB40, 0xEAA1FB40, 0xEAA2FB40, 0xEAA3FB40, + 0xEAA4FB40, 0xEAA5FB40, 0xEAA6FB40, 0xEAA7FB40, 0xEAA8FB40, 0xEAA9FB40, 0xEAAAFB40, 0xEAABFB40, 0xEAACFB40, 0xEAADFB40, 0xEAAEFB40, 0xEAAFFB40, 0xEAB0FB40, 0xEAB1FB40, 0xEAB2FB40, + 0xEAB3FB40, 0xEAB4FB40, 0xEAB5FB40, 0xEAB6FB40, 0xEAB7FB40, 0xEAB8FB40, 0xEAB9FB40, 0xEABAFB40, 0xEABBFB40, 0xEABCFB40, 0xEABDFB40, 0xEABEFB40, 0xEABFFB40, 0xEAC0FB40, 0xEAC1FB40, + 0xEAC2FB40, 0xEAC3FB40, 0xEAC4FB40, 0xEAC5FB40, 0xEAC6FB40, 0xEAC7FB40, 0xEAC8FB40, 0xEAC9FB40, 0xEACAFB40, 0xEACBFB40, 0xEACCFB40, 0xEACDFB40, 0xEACEFB40, 0xEACFFB40, 0xEAD0FB40, + 0xEAD1FB40, 0xEAD2FB40, 0xEAD3FB40, 0xEAD4FB40, 0xEAD5FB40, 0xEAD6FB40, 0xEAD7FB40, 0xEAD8FB40, 0xEAD9FB40, 0xEADAFB40, 0xEADBFB40, 0xEADCFB40, 0xEADDFB40, 0xEADEFB40, 0xEADFFB40, + 0xEAE0FB40, 0xEAE1FB40, 0xEAE2FB40, 0xEAE3FB40, 0xEAE4FB40, 0xEAE5FB40, 0xEAE6FB40, 0xEAE7FB40, 0xEAE8FB40, 0xEAE9FB40, 0xEAEAFB40, 0xEAEBFB40, 0xEAECFB40, 0xEAEDFB40, 0xEAEEFB40, + 0xEAEFFB40, 0xEAF0FB40, 0xEAF1FB40, 0xEAF2FB40, 0xEAF3FB40, 0xEAF4FB40, 0xEAF5FB40, 0xEAF6FB40, 0xEAF7FB40, 0xEAF8FB40, 0xEAF9FB40, 0xEAFAFB40, 0xEAFBFB40, 0xEAFCFB40, 0xEAFDFB40, + 0xEAFEFB40, 0xEAFFFB40, 0xEB00FB40, 0xEB01FB40, 0xEB02FB40, 0xEB03FB40, 0xEB04FB40, 0xEB05FB40, 0xEB06FB40, 0xEB07FB40, 0xEB08FB40, 0xEB09FB40, 0xEB0AFB40, 0xEB0BFB40, 0xEB0CFB40, + 0xEB0DFB40, 0xEB0EFB40, 0xEB0FFB40, 0xEB10FB40, 0xEB11FB40, 0xEB12FB40, 0xEB13FB40, 0xEB14FB40, 0xEB15FB40, 0xEB16FB40, 0xEB17FB40, 0xEB18FB40, 0xEB19FB40, 0xEB1AFB40, 0xEB1BFB40, + 0xEB1CFB40, 0xEB1DFB40, 0xEB1EFB40, 0xEB1FFB40, 0xEB20FB40, 0xEB21FB40, 0xEB22FB40, 0xEB23FB40, 0xEB24FB40, 0xEB25FB40, 0xEB26FB40, 0xEB27FB40, 0xEB28FB40, 0xEB29FB40, 0xEB2AFB40, + 0xEB2BFB40, 0xEB2CFB40, 0xEB2DFB40, 0xEB2EFB40, 0xEB2FFB40, 0xEB30FB40, 0xEB31FB40, 0xEB32FB40, 0xEB33FB40, 0xEB34FB40, 0xEB35FB40, 0xEB36FB40, 0xEB37FB40, 0xEB38FB40, 0xEB39FB40, + 0xEB3AFB40, 0xEB3BFB40, 0xEB3CFB40, 0xEB3DFB40, 0xEB3EFB40, 0xEB3FFB40, 0xEB40FB40, 0xEB41FB40, 0xEB42FB40, 0xEB43FB40, 0xEB44FB40, 0xEB45FB40, 0xEB46FB40, 0xEB47FB40, 0xEB48FB40, + 0xEB49FB40, 0xEB4AFB40, 0xEB4BFB40, 0xEB4CFB40, 0xEB4DFB40, 0xEB4EFB40, 0xEB4FFB40, 0xEB50FB40, 0xEB51FB40, 0xEB52FB40, 0xEB53FB40, 0xEB54FB40, 0xEB55FB40, 0xEB56FB40, 0xEB57FB40, + 0xEB58FB40, 0xEB59FB40, 0xEB5AFB40, 0xEB5BFB40, 0xEB5CFB40, 0xEB5DFB40, 0xEB5EFB40, 0xEB5FFB40, 0xEB60FB40, 0xEB61FB40, 0xEB62FB40, 0xEB63FB40, 0xEB64FB40, 0xEB65FB40, 0xEB66FB40, + 0xEB67FB40, 0xEB68FB40, 0xEB69FB40, 0xEB6AFB40, 0xEB6BFB40, 0xEB6CFB40, 0xEB6DFB40, 0xEB6EFB40, 0xEB6FFB40, 0xEB70FB40, 0xEB71FB40, 0xEB72FB40, 0xEB73FB40, 0xEB74FB40, 0xEB75FB40, + 0xEB76FB40, 0xEB77FB40, 0xEB78FB40, 0xEB79FB40, 0xEB7AFB40, 0xEB7BFB40, 0xEB7CFB40, 0xEB7DFB40, 0xEB7EFB40, 0xEB7FFB40, 0xEB80FB40, 0xEB81FB40, 0xEB82FB40, 0xEB83FB40, 0xEB84FB40, + 0xEB85FB40, 0xEB86FB40, 0xEB87FB40, 0xEB88FB40, 0xEB89FB40, 0xEB8AFB40, 0xEB8BFB40, 0xEB8CFB40, 0xEB8DFB40, 0xEB8EFB40, 0xEB8FFB40, 0xEB90FB40, 0xEB91FB40, 0xEB92FB40, 0xEB93FB40, + 0xEB94FB40, 0xEB95FB40, 0xEB96FB40, 0xEB97FB40, 0xEB98FB40, 0xEB99FB40, 0xEB9AFB40, 0xEB9BFB40, 0xEB9CFB40, 0xEB9DFB40, 0xEB9EFB40, 0xEB9FFB40, 0xEBA0FB40, 0xEBA1FB40, 0xEBA2FB40, + 0xEBA3FB40, 0xEBA4FB40, 0xEBA5FB40, 0xEBA6FB40, 0xEBA7FB40, 0xEBA8FB40, 0xEBA9FB40, 0xEBAAFB40, 0xEBABFB40, 0xEBACFB40, 0xEBADFB40, 0xEBAEFB40, 0xEBAFFB40, 0xEBB0FB40, 0xEBB1FB40, + 0xEBB2FB40, 0xEBB3FB40, 0xEBB4FB40, 0xEBB5FB40, 0xEBB6FB40, 0xEBB7FB40, 0xEBB8FB40, 0xEBB9FB40, 0xEBBAFB40, 0xEBBBFB40, 0xEBBCFB40, 0xEBBDFB40, 0xEBBEFB40, 0xEBBFFB40, 0xEBC0FB40, + 0xEBC1FB40, 0xEBC2FB40, 0xEBC3FB40, 0xEBC4FB40, 0xEBC5FB40, 0xEBC6FB40, 0xEBC7FB40, 0xEBC8FB40, 0xEBC9FB40, 0xEBCAFB40, 0xEBCBFB40, 0xEBCCFB40, 0xEBCDFB40, 0xEBCEFB40, 0xEBCFFB40, + 0xEBD0FB40, 0xEBD1FB40, 0xEBD2FB40, 0xEBD3FB40, 0xEBD4FB40, 0xEBD5FB40, 0xEBD6FB40, 0xEBD7FB40, 0xEBD8FB40, 0xEBD9FB40, 0xEBDAFB40, 0xEBDBFB40, 0xEBDCFB40, 0xEBDDFB40, 0xEBDEFB40, + 0xEBDFFB40, 0xEBE0FB40, 0xEBE1FB40, 0xEBE2FB40, 0xEBE3FB40, 0xEBE4FB40, 0xEBE5FB40, 0xEBE6FB40, 0xEBE7FB40, 0xEBE8FB40, 0xEBE9FB40, 0xEBEAFB40, 0xEBEBFB40, 0xEBECFB40, 0xEBEDFB40, + 0xEBEEFB40, 0xEBEFFB40, 0xEBF0FB40, 0xEBF1FB40, 0xEBF2FB40, 0xEBF3FB40, 0xEBF4FB40, 0xEBF5FB40, 0xEBF6FB40, 0xEBF7FB40, 0xEBF8FB40, 0xEBF9FB40, 0xEBFAFB40, 0xEBFBFB40, 0xEBFCFB40, + 0xEBFDFB40, 0xEBFEFB40, 0xEBFFFB40, 0xEC00FB40, 0xEC01FB40, 0xEC02FB40, 0xEC03FB40, 0xEC04FB40, 0xEC05FB40, 0xEC06FB40, 0xEC07FB40, 0xEC08FB40, 0xEC09FB40, 0xEC0AFB40, 0xEC0BFB40, + 0xEC0CFB40, 0xEC0DFB40, 0xEC0EFB40, 0xEC0FFB40, 0xEC10FB40, 0xEC11FB40, 0xEC12FB40, 0xEC13FB40, 0xEC14FB40, 0xEC15FB40, 0xEC16FB40, 0xEC17FB40, 0xEC18FB40, 0xEC19FB40, 0xEC1AFB40, + 0xEC1BFB40, 0xEC1CFB40, 0xEC1DFB40, 0xEC1EFB40, 0xEC1FFB40, 0xEC20FB40, 0xEC21FB40, 0xEC22FB40, 0xEC23FB40, 0xEC24FB40, 0xEC25FB40, 0xEC26FB40, 0xEC27FB40, 0xEC28FB40, 0xEC29FB40, + 0xEC2AFB40, 0xEC2BFB40, 0xEC2CFB40, 0xEC2DFB40, 0xEC2EFB40, 0xEC2FFB40, 0xEC30FB40, 0xEC31FB40, 0xEC32FB40, 0xEC33FB40, 0xEC34FB40, 0xEC35FB40, 0xEC36FB40, 0xEC37FB40, 0xEC38FB40, + 0xEC39FB40, 0xEC3AFB40, 0xEC3BFB40, 0xEC3CFB40, 0xEC3DFB40, 0xEC3EFB40, 0xEC3FFB40, 0xEC40FB40, 0xEC41FB40, 0xEC42FB40, 0xEC43FB40, 0xEC44FB40, 0xEC45FB40, 0xEC46FB40, 0xEC47FB40, + 0xEC48FB40, 0xEC49FB40, 0xEC4AFB40, 0xEC4BFB40, 0xEC4CFB40, 0xEC4DFB40, 0xEC4EFB40, 0xEC4FFB40, 0xEC50FB40, 0xEC51FB40, 0xEC52FB40, 0xEC53FB40, 0xEC54FB40, 0xEC55FB40, 0xEC56FB40, + 0xEC57FB40, 0xEC58FB40, 0xEC59FB40, 0xEC5AFB40, 0xEC5BFB40, 0xEC5CFB40, 0xEC5DFB40, 0xEC5EFB40, 0xEC5FFB40, 0xEC60FB40, 0xEC61FB40, 0xEC62FB40, 0xEC63FB40, 0xEC64FB40, 0xEC65FB40, + 0xEC66FB40, 0xEC67FB40, 0xEC68FB40, 0xEC69FB40, 0xEC6AFB40, 0xEC6BFB40, 0xEC6CFB40, 0xEC6DFB40, 0xEC6EFB40, 0xEC6FFB40, 0xEC70FB40, 0xEC71FB40, 0xEC72FB40, 0xEC73FB40, 0xEC74FB40, + 0xEC75FB40, 0xEC76FB40, 0xEC77FB40, 0xEC78FB40, 0xEC79FB40, 0xEC7AFB40, 0xEC7BFB40, 0xEC7CFB40, 0xEC7DFB40, 0xEC7EFB40, 0xEC7FFB40, 0xEC80FB40, 0xEC81FB40, 0xEC82FB40, 0xEC83FB40, + 0xEC84FB40, 0xEC85FB40, 0xEC86FB40, 0xEC87FB40, 0xEC88FB40, 0xEC89FB40, 0xEC8AFB40, 0xEC8BFB40, 0xEC8CFB40, 0xEC8DFB40, 0xEC8EFB40, 0xEC8FFB40, 0xEC90FB40, 0xEC91FB40, 0xEC92FB40, + 0xEC93FB40, 0xEC94FB40, 0xEC95FB40, 0xEC96FB40, 0xEC97FB40, 0xEC98FB40, 0xEC99FB40, 0xEC9AFB40, 0xEC9BFB40, 0xEC9CFB40, 0xEC9DFB40, 0xEC9EFB40, 0xEC9FFB40, 0xECA0FB40, 0xECA1FB40, + 0xECA2FB40, 0xECA3FB40, 0xECA4FB40, 0xECA5FB40, 0xECA6FB40, 0xECA7FB40, 0xECA8FB40, 0xECA9FB40, 0xECAAFB40, 0xECABFB40, 0xECACFB40, 0xECADFB40, 0xECAEFB40, 0xECAFFB40, 0xECB0FB40, + 0xECB1FB40, 0xECB2FB40, 0xECB3FB40, 0xECB4FB40, 0xECB5FB40, 0xECB6FB40, 0xECB7FB40, 0xECB8FB40, 0xECB9FB40, 0xECBAFB40, 0xECBBFB40, 0xECBCFB40, 0xECBDFB40, 0xECBEFB40, 0xECBFFB40, + 0xECC0FB40, 0xECC1FB40, 0xECC2FB40, 0xECC3FB40, 0xECC4FB40, 0xECC5FB40, 0xECC6FB40, 0xECC7FB40, 0xECC8FB40, 0xECC9FB40, 0xECCAFB40, 0xECCBFB40, 0xECCCFB40, 0xECCDFB40, 0xECCEFB40, + 0xECCFFB40, 0xECD0FB40, 0xECD1FB40, 0xECD2FB40, 0xECD3FB40, 0xECD4FB40, 0xECD5FB40, 0xECD6FB40, 0xECD7FB40, 0xECD8FB40, 0xECD9FB40, 0xECDAFB40, 0xECDBFB40, 0xECDCFB40, 0xECDDFB40, + 0xECDEFB40, 0xECDFFB40, 0xECE0FB40, 0xECE1FB40, 0xECE2FB40, 0xECE3FB40, 0xECE4FB40, 0xECE5FB40, 0xECE6FB40, 0xECE7FB40, 0xECE8FB40, 0xECE9FB40, 0xECEAFB40, 0xECEBFB40, 0xECECFB40, + 0xECEDFB40, 0xECEEFB40, 0xECEFFB40, 0xECF0FB40, 0xECF1FB40, 0xECF2FB40, 0xECF3FB40, 0xECF4FB40, 0xECF5FB40, 0xECF6FB40, 0xECF7FB40, 0xECF8FB40, 0xECF9FB40, 0xECFAFB40, 0xECFBFB40, + 0xECFCFB40, 0xECFDFB40, 0xECFEFB40, 0xECFFFB40, 0xED00FB40, 0xED01FB40, 0xED02FB40, 0xED03FB40, 0xED04FB40, 0xED05FB40, 0xED06FB40, 0xED07FB40, 0xED08FB40, 0xED09FB40, 0xED0AFB40, + 0xED0BFB40, 0xED0CFB40, 0xED0DFB40, 0xED0EFB40, 0xED0FFB40, 0xED10FB40, 0xED11FB40, 0xED12FB40, 0xED13FB40, 0xED14FB40, 0xED15FB40, 0xED16FB40, 0xED17FB40, 0xED18FB40, 0xED19FB40, + 0xED1AFB40, 0xED1BFB40, 0xED1CFB40, 0xED1DFB40, 0xED1EFB40, 0xED1FFB40, 0xED20FB40, 0xED21FB40, 0xED22FB40, 0xED23FB40, 0xED24FB40, 0xED25FB40, 0xED26FB40, 0xED27FB40, 0xED28FB40, + 0xED29FB40, 0xED2AFB40, 0xED2BFB40, 0xED2CFB40, 0xED2DFB40, 0xED2EFB40, 0xED2FFB40, 0xED30FB40, 0xED31FB40, 0xED32FB40, 0xED33FB40, 0xED34FB40, 0xED35FB40, 0xED36FB40, 0xED37FB40, + 0xED38FB40, 0xED39FB40, 0xED3AFB40, 0xED3BFB40, 0xED3CFB40, 0xED3DFB40, 0xED3EFB40, 0xED3FFB40, 0xED40FB40, 0xED41FB40, 0xED42FB40, 0xED43FB40, 0xED44FB40, 0xED45FB40, 0xED46FB40, + 0xED47FB40, 0xED48FB40, 0xED49FB40, 0xED4AFB40, 0xED4BFB40, 0xED4CFB40, 0xED4DFB40, 0xED4EFB40, 0xED4FFB40, 0xED50FB40, 0xED51FB40, 0xED52FB40, 0xED53FB40, 0xED54FB40, 0xED55FB40, + 0xED56FB40, 0xED57FB40, 0xED58FB40, 0xED59FB40, 0xED5AFB40, 0xED5BFB40, 0xED5CFB40, 0xED5DFB40, 0xED5EFB40, 0xED5FFB40, 0xED60FB40, 0xED61FB40, 0xED62FB40, 0xED63FB40, 0xED64FB40, + 0xED65FB40, 0xED66FB40, 0xED67FB40, 0xED68FB40, 0xED69FB40, 0xED6AFB40, 0xED6BFB40, 0xED6CFB40, 0xED6DFB40, 0xED6EFB40, 0xED6FFB40, 0xED70FB40, 0xED71FB40, 0xED72FB40, 0xED73FB40, + 0xED74FB40, 0xED75FB40, 0xED76FB40, 0xED77FB40, 0xED78FB40, 0xED79FB40, 0xED7AFB40, 0xED7BFB40, 0xED7CFB40, 0xED7DFB40, 0xED7EFB40, 0xED7FFB40, 0xED80FB40, 0xED81FB40, 0xED82FB40, + 0xED83FB40, 0xED84FB40, 0xED85FB40, 0xED86FB40, 0xED87FB40, 0xED88FB40, 0xED89FB40, 0xED8AFB40, 0xED8BFB40, 0xED8CFB40, 0xED8DFB40, 0xED8EFB40, 0xED8FFB40, 0xED90FB40, 0xED91FB40, + 0xED92FB40, 0xED93FB40, 0xED94FB40, 0xED95FB40, 0xED96FB40, 0xED97FB40, 0xED98FB40, 0xED99FB40, 0xED9AFB40, 0xED9BFB40, 0xED9CFB40, 0xED9DFB40, 0xED9EFB40, 0xED9FFB40, 0xEDA0FB40, + 0xEDA1FB40, 0xEDA2FB40, 0xEDA3FB40, 0xEDA4FB40, 0xEDA5FB40, 0xEDA6FB40, 0xEDA7FB40, 0xEDA8FB40, 0xEDA9FB40, 0xEDAAFB40, 0xEDABFB40, 0xEDACFB40, 0xEDADFB40, 0xEDAEFB40, 0xEDAFFB40, + 0xEDB0FB40, 0xEDB1FB40, 0xEDB2FB40, 0xEDB3FB40, 0xEDB4FB40, 0xEDB5FB40, 0xEDB6FB40, 0xEDB7FB40, 0xEDB8FB40, 0xEDB9FB40, 0xEDBAFB40, 0xEDBBFB40, 0xEDBCFB40, 0xEDBDFB40, 0xEDBEFB40, + 0xEDBFFB40, 0xEDC0FB40, 0xEDC1FB40, 0xEDC2FB40, 0xEDC3FB40, 0xEDC4FB40, 0xEDC5FB40, 0xEDC6FB40, 0xEDC7FB40, 0xEDC8FB40, 0xEDC9FB40, 0xEDCAFB40, 0xEDCBFB40, 0xEDCCFB40, 0xEDCDFB40, + 0xEDCEFB40, 0xEDCFFB40, 0xEDD0FB40, 0xEDD1FB40, 0xEDD2FB40, 0xEDD3FB40, 0xEDD4FB40, 0xEDD5FB40, 0xEDD6FB40, 0xEDD7FB40, 0xEDD8FB40, 0xEDD9FB40, 0xEDDAFB40, 0xEDDBFB40, 0xEDDCFB40, + 0xEDDDFB40, 0xEDDEFB40, 0xEDDFFB40, 0xEDE0FB40, 0xEDE1FB40, 0xEDE2FB40, 0xEDE3FB40, 0xEDE4FB40, 0xEDE5FB40, 0xEDE6FB40, 0xEDE7FB40, 0xEDE8FB40, 0xEDE9FB40, 0xEDEAFB40, 0xEDEBFB40, + 0xEDECFB40, 0xEDEDFB40, 0xEDEEFB40, 0xEDEFFB40, 0xEDF0FB40, 0xEDF1FB40, 0xEDF2FB40, 0xEDF3FB40, 0xEDF4FB40, 0xEDF5FB40, 0xEDF6FB40, 0xEDF7FB40, 0xEDF8FB40, 0xEDF9FB40, 0xEDFAFB40, + 0xEDFBFB40, 0xEDFCFB40, 0xEDFDFB40, 0xEDFEFB40, 0xEDFFFB40, 0xEE00FB40, 0xEE01FB40, 0xEE02FB40, 0xEE03FB40, 0xEE04FB40, 0xEE05FB40, 0xEE06FB40, 0xEE07FB40, 0xEE08FB40, 0xEE09FB40, + 0xEE0AFB40, 0xEE0BFB40, 0xEE0CFB40, 0xEE0DFB40, 0xEE0EFB40, 0xEE0FFB40, 0xEE10FB40, 0xEE11FB40, 0xEE12FB40, 0xEE13FB40, 0xEE14FB40, 0xEE15FB40, 0xEE16FB40, 0xEE17FB40, 0xEE18FB40, + 0xEE19FB40, 0xEE1AFB40, 0xEE1BFB40, 0xEE1CFB40, 0xEE1DFB40, 0xEE1EFB40, 0xEE1FFB40, 0xEE20FB40, 0xEE21FB40, 0xEE22FB40, 0xEE23FB40, 0xEE24FB40, 0xEE25FB40, 0xEE26FB40, 0xEE27FB40, + 0xEE28FB40, 0xEE29FB40, 0xEE2AFB40, 0xEE2BFB40, 0xEE2CFB40, 0xEE2DFB40, 0xEE2EFB40, 0xEE2FFB40, 0xEE30FB40, 0xEE31FB40, 0xEE32FB40, 0xEE33FB40, 0xEE34FB40, 0xEE35FB40, 0xEE36FB40, + 0xEE37FB40, 0xEE38FB40, 0xEE39FB40, 0xEE3AFB40, 0xEE3BFB40, 0xEE3CFB40, 0xEE3DFB40, 0xEE3EFB40, 0xEE3FFB40, 0xEE40FB40, 0xEE41FB40, 0xEE42FB40, 0xEE43FB40, 0xEE44FB40, 0xEE45FB40, + 0xEE46FB40, 0xEE47FB40, 0xEE48FB40, 0xEE49FB40, 0xEE4AFB40, 0xEE4BFB40, 0xEE4CFB40, 0xEE4DFB40, 0xEE4EFB40, 0xEE4FFB40, 0xEE50FB40, 0xEE51FB40, 0xEE52FB40, 0xEE53FB40, 0xEE54FB40, + 0xEE55FB40, 0xEE56FB40, 0xEE57FB40, 0xEE58FB40, 0xEE59FB40, 0xEE5AFB40, 0xEE5BFB40, 0xEE5CFB40, 0xEE5DFB40, 0xEE5EFB40, 0xEE5FFB40, 0xEE60FB40, 0xEE61FB40, 0xEE62FB40, 0xEE63FB40, + 0xEE64FB40, 0xEE65FB40, 0xEE66FB40, 0xEE67FB40, 0xEE68FB40, 0xEE69FB40, 0xEE6AFB40, 0xEE6BFB40, 0xEE6CFB40, 0xEE6DFB40, 0xEE6EFB40, 0xEE6FFB40, 0xEE70FB40, 0xEE71FB40, 0xEE72FB40, + 0xEE73FB40, 0xEE74FB40, 0xEE75FB40, 0xEE76FB40, 0xEE77FB40, 0xEE78FB40, 0xEE79FB40, 0xEE7AFB40, 0xEE7BFB40, 0xEE7CFB40, 0xEE7DFB40, 0xEE7EFB40, 0xEE7FFB40, 0xEE80FB40, 0xEE81FB40, + 0xEE82FB40, 0xEE83FB40, 0xEE84FB40, 0xEE85FB40, 0xEE86FB40, 0xEE87FB40, 0xEE88FB40, 0xEE89FB40, 0xEE8AFB40, 0xEE8BFB40, 0xEE8CFB40, 0xEE8DFB40, 0xEE8EFB40, 0xEE8FFB40, 0xEE90FB40, + 0xEE91FB40, 0xEE92FB40, 0xEE93FB40, 0xEE94FB40, 0xEE95FB40, 0xEE96FB40, 0xEE97FB40, 0xEE98FB40, 0xEE99FB40, 0xEE9AFB40, 0xEE9BFB40, 0xEE9CFB40, 0xEE9DFB40, 0xEE9EFB40, 0xEE9FFB40, + 0xEEA0FB40, 0xEEA1FB40, 0xEEA2FB40, 0xEEA3FB40, 0xEEA4FB40, 0xEEA5FB40, 0xEEA6FB40, 0xEEA7FB40, 0xEEA8FB40, 0xEEA9FB40, 0xEEAAFB40, 0xEEABFB40, 0xEEACFB40, 0xEEADFB40, 0xEEAEFB40, + 0xEEAFFB40, 0xEEB0FB40, 0xEEB1FB40, 0xEEB2FB40, 0xEEB3FB40, 0xEEB4FB40, 0xEEB5FB40, 0xEEB6FB40, 0xEEB7FB40, 0xEEB8FB40, 0xEEB9FB40, 0xEEBAFB40, 0xEEBBFB40, 0xEEBCFB40, 0xEEBDFB40, + 0xEEBEFB40, 0xEEBFFB40, 0xEEC0FB40, 0xEEC1FB40, 0xEEC2FB40, 0xEEC3FB40, 0xEEC4FB40, 0xEEC5FB40, 0xEEC6FB40, 0xEEC7FB40, 0xEEC8FB40, 0xEEC9FB40, 0xEECAFB40, 0xEECBFB40, 0xEECCFB40, + 0xEECDFB40, 0xEECEFB40, 0xEECFFB40, 0xEED0FB40, 0xEED1FB40, 0xEED2FB40, 0xEED3FB40, 0xEED4FB40, 0xEED5FB40, 0xEED6FB40, 0xEED7FB40, 0xEED8FB40, 0xEED9FB40, 0xEEDAFB40, 0xEEDBFB40, + 0xEEDCFB40, 0xEEDDFB40, 0xEEDEFB40, 0xEEDFFB40, 0xEEE0FB40, 0xEEE1FB40, 0xEEE2FB40, 0xEEE3FB40, 0xEEE4FB40, 0xEEE5FB40, 0xEEE6FB40, 0xEEE7FB40, 0xEEE8FB40, 0xEEE9FB40, 0xEEEAFB40, + 0xEEEBFB40, 0xEEECFB40, 0xEEEDFB40, 0xEEEEFB40, 0xEEEFFB40, 0xEEF0FB40, 0xEEF1FB40, 0xEEF2FB40, 0xEEF3FB40, 0xEEF4FB40, 0xEEF5FB40, 0xEEF6FB40, 0xEEF7FB40, 0xEEF8FB40, 0xEEF9FB40, + 0xEEFAFB40, 0xEEFBFB40, 0xEEFCFB40, 0xEEFDFB40, 0xEEFEFB40, 0xEEFFFB40, 0xEF00FB40, 0xEF01FB40, 0xEF02FB40, 0xEF03FB40, 0xEF04FB40, 0xEF05FB40, 0xEF06FB40, 0xEF07FB40, 0xEF08FB40, + 0xEF09FB40, 0xEF0AFB40, 0xEF0BFB40, 0xEF0CFB40, 0xEF0DFB40, 0xEF0EFB40, 0xEF0FFB40, 0xEF10FB40, 0xEF11FB40, 0xEF12FB40, 0xEF13FB40, 0xEF14FB40, 0xEF15FB40, 0xEF16FB40, 0xEF17FB40, + 0xEF18FB40, 0xEF19FB40, 0xEF1AFB40, 0xEF1BFB40, 0xEF1CFB40, 0xEF1DFB40, 0xEF1EFB40, 0xEF1FFB40, 0xEF20FB40, 0xEF21FB40, 0xEF22FB40, 0xEF23FB40, 0xEF24FB40, 0xEF25FB40, 0xEF26FB40, + 0xEF27FB40, 0xEF28FB40, 0xEF29FB40, 0xEF2AFB40, 0xEF2BFB40, 0xEF2CFB40, 0xEF2DFB40, 0xEF2EFB40, 0xEF2FFB40, 0xEF30FB40, 0xEF31FB40, 0xEF32FB40, 0xEF33FB40, 0xEF34FB40, 0xEF35FB40, + 0xEF36FB40, 0xEF37FB40, 0xEF38FB40, 0xEF39FB40, 0xEF3AFB40, 0xEF3BFB40, 0xEF3CFB40, 0xEF3DFB40, 0xEF3EFB40, 0xEF3FFB40, 0xEF40FB40, 0xEF41FB40, 0xEF42FB40, 0xEF43FB40, 0xEF44FB40, + 0xEF45FB40, 0xEF46FB40, 0xEF47FB40, 0xEF48FB40, 0xEF49FB40, 0xEF4AFB40, 0xEF4BFB40, 0xEF4CFB40, 0xEF4DFB40, 0xEF4EFB40, 0xEF4FFB40, 0xEF50FB40, 0xEF51FB40, 0xEF52FB40, 0xEF53FB40, + 0xEF54FB40, 0xEF55FB40, 0xEF56FB40, 0xEF57FB40, 0xEF58FB40, 0xEF59FB40, 0xEF5AFB40, 0xEF5BFB40, 0xEF5CFB40, 0xEF5DFB40, 0xEF5EFB40, 0xEF5FFB40, 0xEF60FB40, 0xEF61FB40, 0xEF62FB40, + 0xEF63FB40, 0xEF64FB40, 0xEF65FB40, 0xEF66FB40, 0xEF67FB40, 0xEF68FB40, 0xEF69FB40, 0xEF6AFB40, 0xEF6BFB40, 0xEF6CFB40, 0xEF6DFB40, 0xEF6EFB40, 0xEF6FFB40, 0xEF70FB40, 0xEF71FB40, + 0xEF72FB40, 0xEF73FB40, 0xEF74FB40, 0xEF75FB40, 0xEF76FB40, 0xEF77FB40, 0xEF78FB40, 0xEF79FB40, 0xEF7AFB40, 0xEF7BFB40, 0xEF7CFB40, 0xEF7DFB40, 0xEF7EFB40, 0xEF7FFB40, 0xEF80FB40, + 0xEF81FB40, 0xEF82FB40, 0xEF83FB40, 0xEF84FB40, 0xEF85FB40, 0xEF86FB40, 0xEF87FB40, 0xEF88FB40, 0xEF89FB40, 0xEF8AFB40, 0xEF8BFB40, 0xEF8CFB40, 0xEF8DFB40, 0xEF8EFB40, 0xEF8FFB40, + 0xEF90FB40, 0xEF91FB40, 0xEF92FB40, 0xEF93FB40, 0xEF94FB40, 0xEF95FB40, 0xEF96FB40, 0xEF97FB40, 0xEF98FB40, 0xEF99FB40, 0xEF9AFB40, 0xEF9BFB40, 0xEF9CFB40, 0xEF9DFB40, 0xEF9EFB40, + 0xEF9FFB40, 0xEFA0FB40, 0xEFA1FB40, 0xEFA2FB40, 0xEFA3FB40, 0xEFA4FB40, 0xEFA5FB40, 0xEFA6FB40, 0xEFA7FB40, 0xEFA8FB40, 0xEFA9FB40, 0xEFAAFB40, 0xEFABFB40, 0xEFACFB40, 0xEFADFB40, + 0xEFAEFB40, 0xEFAFFB40, 0xEFB0FB40, 0xEFB1FB40, 0xEFB2FB40, 0xEFB3FB40, 0xEFB4FB40, 0xEFB5FB40, 0xEFB6FB40, 0xEFB7FB40, 0xEFB8FB40, 0xEFB9FB40, 0xEFBAFB40, 0xEFBBFB40, 0xEFBCFB40, + 0xEFBDFB40, 0xEFBEFB40, 0xEFBFFB40, 0xEFC0FB40, 0xEFC1FB40, 0xEFC2FB40, 0xEFC3FB40, 0xEFC4FB40, 0xEFC5FB40, 0xEFC6FB40, 0xEFC7FB40, 0xEFC8FB40, 0xEFC9FB40, 0xEFCAFB40, 0xEFCBFB40, + 0xEFCCFB40, 0xEFCDFB40, 0xEFCEFB40, 0xEFCFFB40, 0xEFD0FB40, 0xEFD1FB40, 0xEFD2FB40, 0xEFD3FB40, 0xEFD4FB40, 0xEFD5FB40, 0xEFD6FB40, 0xEFD7FB40, 0xEFD8FB40, 0xEFD9FB40, 0xEFDAFB40, + 0xEFDBFB40, 0xEFDCFB40, 0xEFDDFB40, 0xEFDEFB40, 0xEFDFFB40, 0xEFE0FB40, 0xEFE1FB40, 0xEFE2FB40, 0xEFE3FB40, 0xEFE4FB40, 0xEFE5FB40, 0xEFE6FB40, 0xEFE7FB40, 0xEFE8FB40, 0xEFE9FB40, + 0xEFEAFB40, 0xEFEBFB40, 0xEFECFB40, 0xEFEDFB40, 0xEFEEFB40, 0xEFEFFB40, 0xEFF0FB40, 0xEFF1FB40, 0xEFF2FB40, 0xEFF3FB40, 0xEFF4FB40, 0xEFF5FB40, 0xEFF6FB40, 0xEFF7FB40, 0xEFF8FB40, + 0xEFF9FB40, 0xEFFAFB40, 0xEFFBFB40, 0xEFFCFB40, 0xEFFDFB40, 0xEFFEFB40, 0xEFFFFB40, 0xF000FB40, 0xF001FB40, 0xF002FB40, 0xF003FB40, 0xF004FB40, 0xF005FB40, 0xF006FB40, 0xF007FB40, + 0xF008FB40, 0xF009FB40, 0xF00AFB40, 0xF00BFB40, 0xF00CFB40, 0xF00DFB40, 0xF00EFB40, 0xF00FFB40, 0xF010FB40, 0xF011FB40, 0xF012FB40, 0xF013FB40, 0xF014FB40, 0xF015FB40, 0xF016FB40, + 0xF017FB40, 0xF018FB40, 0xF019FB40, 0xF01AFB40, 0xF01BFB40, 0xF01CFB40, 0xF01DFB40, 0xF01EFB40, 0xF01FFB40, 0xF020FB40, 0xF021FB40, 0xF022FB40, 0xF023FB40, 0xF024FB40, 0xF025FB40, + 0xF026FB40, 0xF027FB40, 0xF028FB40, 0xF029FB40, 0xF02AFB40, 0xF02BFB40, 0xF02CFB40, 0xF02DFB40, 0xF02EFB40, 0xF02FFB40, 0xF030FB40, 0xF031FB40, 0xF032FB40, 0xF033FB40, 0xF034FB40, + 0xF035FB40, 0xF036FB40, 0xF037FB40, 0xF038FB40, 0xF039FB40, 0xF03AFB40, 0xF03BFB40, 0xF03CFB40, 0xF03DFB40, 0xF03EFB40, 0xF03FFB40, 0xF040FB40, 0xF041FB40, 0xF042FB40, 0xF043FB40, + 0xF044FB40, 0xF045FB40, 0xF046FB40, 0xF047FB40, 0xF048FB40, 0xF049FB40, 0xF04AFB40, 0xF04BFB40, 0xF04CFB40, 0xF04DFB40, 0xF04EFB40, 0xF04FFB40, 0xF050FB40, 0xF051FB40, 0xF052FB40, + 0xF053FB40, 0xF054FB40, 0xF055FB40, 0xF056FB40, 0xF057FB40, 0xF058FB40, 0xF059FB40, 0xF05AFB40, 0xF05BFB40, 0xF05CFB40, 0xF05DFB40, 0xF05EFB40, 0xF05FFB40, 0xF060FB40, 0xF061FB40, + 0xF062FB40, 0xF063FB40, 0xF064FB40, 0xF065FB40, 0xF066FB40, 0xF067FB40, 0xF068FB40, 0xF069FB40, 0xF06AFB40, 0xF06BFB40, 0xF06CFB40, 0xF06DFB40, 0xF06EFB40, 0xF06FFB40, 0xF070FB40, + 0xF071FB40, 0xF072FB40, 0xF073FB40, 0xF074FB40, 0xF075FB40, 0xF076FB40, 0xF077FB40, 0xF078FB40, 0xF079FB40, 0xF07AFB40, 0xF07BFB40, 0xF07CFB40, 0xF07DFB40, 0xF07EFB40, 0xF07FFB40, + 0xF080FB40, 0xF081FB40, 0xF082FB40, 0xF083FB40, 0xF084FB40, 0xF085FB40, 0xF086FB40, 0xF087FB40, 0xF088FB40, 0xF089FB40, 0xF08AFB40, 0xF08BFB40, 0xF08CFB40, 0xF08DFB40, 0xF08EFB40, + 0xF08FFB40, 0xF090FB40, 0xF091FB40, 0xF092FB40, 0xF093FB40, 0xF094FB40, 0xF095FB40, 0xF096FB40, 0xF097FB40, 0xF098FB40, 0xF099FB40, 0xF09AFB40, 0xF09BFB40, 0xF09CFB40, 0xF09DFB40, + 0xF09EFB40, 0xF09FFB40, 0xF0A0FB40, 0xF0A1FB40, 0xF0A2FB40, 0xF0A3FB40, 0xF0A4FB40, 0xF0A5FB40, 0xF0A6FB40, 0xF0A7FB40, 0xF0A8FB40, 0xF0A9FB40, 0xF0AAFB40, 0xF0ABFB40, 0xF0ACFB40, + 0xF0ADFB40, 0xF0AEFB40, 0xF0AFFB40, 0xF0B0FB40, 0xF0B1FB40, 0xF0B2FB40, 0xF0B3FB40, 0xF0B4FB40, 0xF0B5FB40, 0xF0B6FB40, 0xF0B7FB40, 0xF0B8FB40, 0xF0B9FB40, 0xF0BAFB40, 0xF0BBFB40, + 0xF0BCFB40, 0xF0BDFB40, 0xF0BEFB40, 0xF0BFFB40, 0xF0C0FB40, 0xF0C1FB40, 0xF0C2FB40, 0xF0C3FB40, 0xF0C4FB40, 0xF0C5FB40, 0xF0C6FB40, 0xF0C7FB40, 0xF0C8FB40, 0xF0C9FB40, 0xF0CAFB40, + 0xF0CBFB40, 0xF0CCFB40, 0xF0CDFB40, 0xF0CEFB40, 0xF0CFFB40, 0xF0D0FB40, 0xF0D1FB40, 0xF0D2FB40, 0xF0D3FB40, 0xF0D4FB40, 0xF0D5FB40, 0xF0D6FB40, 0xF0D7FB40, 0xF0D8FB40, 0xF0D9FB40, + 0xF0DAFB40, 0xF0DBFB40, 0xF0DCFB40, 0xF0DDFB40, 0xF0DEFB40, 0xF0DFFB40, 0xF0E0FB40, 0xF0E1FB40, 0xF0E2FB40, 0xF0E3FB40, 0xF0E4FB40, 0xF0E5FB40, 0xF0E6FB40, 0xF0E7FB40, 0xF0E8FB40, + 0xF0E9FB40, 0xF0EAFB40, 0xF0EBFB40, 0xF0ECFB40, 0xF0EDFB40, 0xF0EEFB40, 0xF0EFFB40, 0xF0F0FB40, 0xF0F1FB40, 0xF0F2FB40, 0xF0F3FB40, 0xF0F4FB40, 0xF0F5FB40, 0xF0F6FB40, 0xF0F7FB40, + 0xF0F8FB40, 0xF0F9FB40, 0xF0FAFB40, 0xF0FBFB40, 0xF0FCFB40, 0xF0FDFB40, 0xF0FEFB40, 0xF0FFFB40, 0xF100FB40, 0xF101FB40, 0xF102FB40, 0xF103FB40, 0xF104FB40, 0xF105FB40, 0xF106FB40, + 0xF107FB40, 0xF108FB40, 0xF109FB40, 0xF10AFB40, 0xF10BFB40, 0xF10CFB40, 0xF10DFB40, 0xF10EFB40, 0xF10FFB40, 0xF110FB40, 0xF111FB40, 0xF112FB40, 0xF113FB40, 0xF114FB40, 0xF115FB40, + 0xF116FB40, 0xF117FB40, 0xF118FB40, 0xF119FB40, 0xF11AFB40, 0xF11BFB40, 0xF11CFB40, 0xF11DFB40, 0xF11EFB40, 0xF11FFB40, 0xF120FB40, 0xF121FB40, 0xF122FB40, 0xF123FB40, 0xF124FB40, + 0xF125FB40, 0xF126FB40, 0xF127FB40, 0xF128FB40, 0xF129FB40, 0xF12AFB40, 0xF12BFB40, 0xF12CFB40, 0xF12DFB40, 0xF12EFB40, 0xF12FFB40, 0xF130FB40, 0xF131FB40, 0xF132FB40, 0xF133FB40, + 0xF134FB40, 0xF135FB40, 0xF136FB40, 0xF137FB40, 0xF138FB40, 0xF139FB40, 0xF13AFB40, 0xF13BFB40, 0xF13CFB40, 0xF13DFB40, 0xF13EFB40, 0xF13FFB40, 0xF140FB40, 0xF141FB40, 0xF142FB40, + 0xF143FB40, 0xF144FB40, 0xF145FB40, 0xF146FB40, 0xF147FB40, 0xF148FB40, 0xF149FB40, 0xF14AFB40, 0xF14BFB40, 0xF14CFB40, 0xF14DFB40, 0xF14EFB40, 0xF14FFB40, 0xF150FB40, 0xF151FB40, + 0xF152FB40, 0xF153FB40, 0xF154FB40, 0xF155FB40, 0xF156FB40, 0xF157FB40, 0xF158FB40, 0xF159FB40, 0xF15AFB40, 0xF15BFB40, 0xF15CFB40, 0xF15DFB40, 0xF15EFB40, 0xF15FFB40, 0xF160FB40, + 0xF161FB40, 0xF162FB40, 0xF163FB40, 0xF164FB40, 0xF165FB40, 0xF166FB40, 0xF167FB40, 0xF168FB40, 0xF169FB40, 0xF16AFB40, 0xF16BFB40, 0xF16CFB40, 0xF16DFB40, 0xF16EFB40, 0xF16FFB40, + 0xF170FB40, 0xF171FB40, 0xF172FB40, 0xF173FB40, 0xF174FB40, 0xF175FB40, 0xF176FB40, 0xF177FB40, 0xF178FB40, 0xF179FB40, 0xF17AFB40, 0xF17BFB40, 0xF17CFB40, 0xF17DFB40, 0xF17EFB40, + 0xF17FFB40, 0xF180FB40, 0xF181FB40, 0xF182FB40, 0xF183FB40, 0xF184FB40, 0xF185FB40, 0xF186FB40, 0xF187FB40, 0xF188FB40, 0xF189FB40, 0xF18AFB40, 0xF18BFB40, 0xF18CFB40, 0xF18DFB40, + 0xF18EFB40, 0xF18FFB40, 0xF190FB40, 0xF191FB40, 0xF192FB40, 0xF193FB40, 0xF194FB40, 0xF195FB40, 0xF196FB40, 0xF197FB40, 0xF198FB40, 0xF199FB40, 0xF19AFB40, 0xF19BFB40, 0xF19CFB40, + 0xF19DFB40, 0xF19EFB40, 0xF19FFB40, 0xF1A0FB40, 0xF1A1FB40, 0xF1A2FB40, 0xF1A3FB40, 0xF1A4FB40, 0xF1A5FB40, 0xF1A6FB40, 0xF1A7FB40, 0xF1A8FB40, 0xF1A9FB40, 0xF1AAFB40, 0xF1ABFB40, + 0xF1ACFB40, 0xF1ADFB40, 0xF1AEFB40, 0xF1AFFB40, 0xF1B0FB40, 0xF1B1FB40, 0xF1B2FB40, 0xF1B3FB40, 0xF1B4FB40, 0xF1B5FB40, 0xF1B6FB40, 0xF1B7FB40, 0xF1B8FB40, 0xF1B9FB40, 0xF1BAFB40, + 0xF1BBFB40, 0xF1BCFB40, 0xF1BDFB40, 0xF1BEFB40, 0xF1BFFB40, 0xF1C0FB40, 0xF1C1FB40, 0xF1C2FB40, 0xF1C3FB40, 0xF1C4FB40, 0xF1C5FB40, 0xF1C6FB40, 0xF1C7FB40, 0xF1C8FB40, 0xF1C9FB40, + 0xF1CAFB40, 0xF1CBFB40, 0xF1CCFB40, 0xF1CDFB40, 0xF1CEFB40, 0xF1CFFB40, 0xF1D0FB40, 0xF1D1FB40, 0xF1D2FB40, 0xF1D3FB40, 0xF1D4FB40, 0xF1D5FB40, 0xF1D6FB40, 0xF1D7FB40, 0xF1D8FB40, + 0xF1D9FB40, 0xF1DAFB40, 0xF1DBFB40, 0xF1DCFB40, 0xF1DDFB40, 0xF1DEFB40, 0xF1DFFB40, 0xF1E0FB40, 0xF1E1FB40, 0xF1E2FB40, 0xF1E3FB40, 0xF1E4FB40, 0xF1E5FB40, 0xF1E6FB40, 0xF1E7FB40, + 0xF1E8FB40, 0xF1E9FB40, 0xF1EAFB40, 0xF1EBFB40, 0xF1ECFB40, 0xF1EDFB40, 0xF1EEFB40, 0xF1EFFB40, 0xF1F0FB40, 0xF1F1FB40, 0xF1F2FB40, 0xF1F3FB40, 0xF1F4FB40, 0xF1F5FB40, 0xF1F6FB40, + 0xF1F7FB40, 0xF1F8FB40, 0xF1F9FB40, 0xF1FAFB40, 0xF1FBFB40, 0xF1FCFB40, 0xF1FDFB40, 0xF1FEFB40, 0xF1FFFB40, 0xF200FB40, 0xF201FB40, 0xF202FB40, 0xF203FB40, 0xF204FB40, 0xF205FB40, + 0xF206FB40, 0xF207FB40, 0xF208FB40, 0xF209FB40, 0xF20AFB40, 0xF20BFB40, 0xF20CFB40, 0xF20DFB40, 0xF20EFB40, 0xF20FFB40, 0xF210FB40, 0xF211FB40, 0xF212FB40, 0xF213FB40, 0xF214FB40, + 0xF215FB40, 0xF216FB40, 0xF217FB40, 0xF218FB40, 0xF219FB40, 0xF21AFB40, 0xF21BFB40, 0xF21CFB40, 0xF21DFB40, 0xF21EFB40, 0xF21FFB40, 0xF220FB40, 0xF221FB40, 0xF222FB40, 0xF223FB40, + 0xF224FB40, 0xF225FB40, 0xF226FB40, 0xF227FB40, 0xF228FB40, 0xF229FB40, 0xF22AFB40, 0xF22BFB40, 0xF22CFB40, 0xF22DFB40, 0xF22EFB40, 0xF22FFB40, 0xF230FB40, 0xF231FB40, 0xF232FB40, + 0xF233FB40, 0xF234FB40, 0xF235FB40, 0xF236FB40, 0xF237FB40, 0xF238FB40, 0xF239FB40, 0xF23AFB40, 0xF23BFB40, 0xF23CFB40, 0xF23DFB40, 0xF23EFB40, 0xF23FFB40, 0xF240FB40, 0xF241FB40, + 0xF242FB40, 0xF243FB40, 0xF244FB40, 0xF245FB40, 0xF246FB40, 0xF247FB40, 0xF248FB40, 0xF249FB40, 0xF24AFB40, 0xF24BFB40, 0xF24CFB40, 0xF24DFB40, 0xF24EFB40, 0xF24FFB40, 0xF250FB40, + 0xF251FB40, 0xF252FB40, 0xF253FB40, 0xF254FB40, 0xF255FB40, 0xF256FB40, 0xF257FB40, 0xF258FB40, 0xF259FB40, 0xF25AFB40, 0xF25BFB40, 0xF25CFB40, 0xF25DFB40, 0xF25EFB40, 0xF25FFB40, + 0xF260FB40, 0xF261FB40, 0xF262FB40, 0xF263FB40, 0xF264FB40, 0xF265FB40, 0xF266FB40, 0xF267FB40, 0xF268FB40, 0xF269FB40, 0xF26AFB40, 0xF26BFB40, 0xF26CFB40, 0xF26DFB40, 0xF26EFB40, + 0xF26FFB40, 0xF270FB40, 0xF271FB40, 0xF272FB40, 0xF273FB40, 0xF274FB40, 0xF275FB40, 0xF276FB40, 0xF277FB40, 0xF278FB40, 0xF279FB40, 0xF27AFB40, 0xF27BFB40, 0xF27CFB40, 0xF27DFB40, + 0xF27EFB40, 0xF27FFB40, 0xF280FB40, 0xF281FB40, 0xF282FB40, 0xF283FB40, 0xF284FB40, 0xF285FB40, 0xF286FB40, 0xF287FB40, 0xF288FB40, 0xF289FB40, 0xF28AFB40, 0xF28BFB40, 0xF28CFB40, + 0xF28DFB40, 0xF28EFB40, 0xF28FFB40, 0xF290FB40, 0xF291FB40, 0xF292FB40, 0xF293FB40, 0xF294FB40, 0xF295FB40, 0xF296FB40, 0xF297FB40, 0xF298FB40, 0xF299FB40, 0xF29AFB40, 0xF29BFB40, + 0xF29CFB40, 0xF29DFB40, 0xF29EFB40, 0xF29FFB40, 0xF2A0FB40, 0xF2A1FB40, 0xF2A2FB40, 0xF2A3FB40, 0xF2A4FB40, 0xF2A5FB40, 0xF2A6FB40, 0xF2A7FB40, 0xF2A8FB40, 0xF2A9FB40, 0xF2AAFB40, + 0xF2ABFB40, 0xF2ACFB40, 0xF2ADFB40, 0xF2AEFB40, 0xF2AFFB40, 0xF2B0FB40, 0xF2B1FB40, 0xF2B2FB40, 0xF2B3FB40, 0xF2B4FB40, 0xF2B5FB40, 0xF2B6FB40, 0xF2B7FB40, 0xF2B8FB40, 0xF2B9FB40, + 0xF2BAFB40, 0xF2BBFB40, 0xF2BCFB40, 0xF2BDFB40, 0xF2BEFB40, 0xF2BFFB40, 0xF2C0FB40, 0xF2C1FB40, 0xF2C2FB40, 0xF2C3FB40, 0xF2C4FB40, 0xF2C5FB40, 0xF2C6FB40, 0xF2C7FB40, 0xF2C8FB40, + 0xF2C9FB40, 0xF2CAFB40, 0xF2CBFB40, 0xF2CCFB40, 0xF2CDFB40, 0xF2CEFB40, 0xF2CFFB40, 0xF2D0FB40, 0xF2D1FB40, 0xF2D2FB40, 0xF2D3FB40, 0xF2D4FB40, 0xF2D5FB40, 0xF2D6FB40, 0xF2D7FB40, + 0xF2D8FB40, 0xF2D9FB40, 0xF2DAFB40, 0xF2DBFB40, 0xF2DCFB40, 0xF2DDFB40, 0xF2DEFB40, 0xF2DFFB40, 0xF2E0FB40, 0xF2E1FB40, 0xF2E2FB40, 0xF2E3FB40, 0xF2E4FB40, 0xF2E5FB40, 0xF2E6FB40, + 0xF2E7FB40, 0xF2E8FB40, 0xF2E9FB40, 0xF2EAFB40, 0xF2EBFB40, 0xF2ECFB40, 0xF2EDFB40, 0xF2EEFB40, 0xF2EFFB40, 0xF2F0FB40, 0xF2F1FB40, 0xF2F2FB40, 0xF2F3FB40, 0xF2F4FB40, 0xF2F5FB40, + 0xF2F6FB40, 0xF2F7FB40, 0xF2F8FB40, 0xF2F9FB40, 0xF2FAFB40, 0xF2FBFB40, 0xF2FCFB40, 0xF2FDFB40, 0xF2FEFB40, 0xF2FFFB40, 0xF300FB40, 0xF301FB40, 0xF302FB40, 0xF303FB40, 0xF304FB40, + 0xF305FB40, 0xF306FB40, 0xF307FB40, 0xF308FB40, 0xF309FB40, 0xF30AFB40, 0xF30BFB40, 0xF30CFB40, 0xF30DFB40, 0xF30EFB40, 0xF30FFB40, 0xF310FB40, 0xF311FB40, 0xF312FB40, 0xF313FB40, + 0xF314FB40, 0xF315FB40, 0xF316FB40, 0xF317FB40, 0xF318FB40, 0xF319FB40, 0xF31AFB40, 0xF31BFB40, 0xF31CFB40, 0xF31DFB40, 0xF31EFB40, 0xF31FFB40, 0xF320FB40, 0xF321FB40, 0xF322FB40, + 0xF323FB40, 0xF324FB40, 0xF325FB40, 0xF326FB40, 0xF327FB40, 0xF328FB40, 0xF329FB40, 0xF32AFB40, 0xF32BFB40, 0xF32CFB40, 0xF32DFB40, 0xF32EFB40, 0xF32FFB40, 0xF330FB40, 0xF331FB40, + 0xF332FB40, 0xF333FB40, 0xF334FB40, 0xF335FB40, 0xF336FB40, 0xF337FB40, 0xF338FB40, 0xF339FB40, 0xF33AFB40, 0xF33BFB40, 0xF33CFB40, 0xF33DFB40, 0xF33EFB40, 0xF33FFB40, 0xF340FB40, + 0xF341FB40, 0xF342FB40, 0xF343FB40, 0xF344FB40, 0xF345FB40, 0xF346FB40, 0xF347FB40, 0xF348FB40, 0xF349FB40, 0xF34AFB40, 0xF34BFB40, 0xF34CFB40, 0xF34DFB40, 0xF34EFB40, 0xF34FFB40, + 0xF350FB40, 0xF351FB40, 0xF352FB40, 0xF353FB40, 0xF354FB40, 0xF355FB40, 0xF356FB40, 0xF357FB40, 0xF358FB40, 0xF359FB40, 0xF35AFB40, 0xF35BFB40, 0xF35CFB40, 0xF35DFB40, 0xF35EFB40, + 0xF35FFB40, 0xF360FB40, 0xF361FB40, 0xF362FB40, 0xF363FB40, 0xF364FB40, 0xF365FB40, 0xF366FB40, 0xF367FB40, 0xF368FB40, 0xF369FB40, 0xF36AFB40, 0xF36BFB40, 0xF36CFB40, 0xF36DFB40, + 0xF36EFB40, 0xF36FFB40, 0xF370FB40, 0xF371FB40, 0xF372FB40, 0xF373FB40, 0xF374FB40, 0xF375FB40, 0xF376FB40, 0xF377FB40, 0xF378FB40, 0xF379FB40, 0xF37AFB40, 0xF37BFB40, 0xF37CFB40, + 0xF37DFB40, 0xF37EFB40, 0xF37FFB40, 0xF380FB40, 0xF381FB40, 0xF382FB40, 0xF383FB40, 0xF384FB40, 0xF385FB40, 0xF386FB40, 0xF387FB40, 0xF388FB40, 0xF389FB40, 0xF38AFB40, 0xF38BFB40, + 0xF38CFB40, 0xF38DFB40, 0xF38EFB40, 0xF38FFB40, 0xF390FB40, 0xF391FB40, 0xF392FB40, 0xF393FB40, 0xF394FB40, 0xF395FB40, 0xF396FB40, 0xF397FB40, 0xF398FB40, 0xF399FB40, 0xF39AFB40, + 0xF39BFB40, 0xF39CFB40, 0xF39DFB40, 0xF39EFB40, 0xF39FFB40, 0xF3A0FB40, 0xF3A1FB40, 0xF3A2FB40, 0xF3A3FB40, 0xF3A4FB40, 0xF3A5FB40, 0xF3A6FB40, 0xF3A7FB40, 0xF3A8FB40, 0xF3A9FB40, + 0xF3AAFB40, 0xF3ABFB40, 0xF3ACFB40, 0xF3ADFB40, 0xF3AEFB40, 0xF3AFFB40, 0xF3B0FB40, 0xF3B1FB40, 0xF3B2FB40, 0xF3B3FB40, 0xF3B4FB40, 0xF3B5FB40, 0xF3B6FB40, 0xF3B7FB40, 0xF3B8FB40, + 0xF3B9FB40, 0xF3BAFB40, 0xF3BBFB40, 0xF3BCFB40, 0xF3BDFB40, 0xF3BEFB40, 0xF3BFFB40, 0xF3C0FB40, 0xF3C1FB40, 0xF3C2FB40, 0xF3C3FB40, 0xF3C4FB40, 0xF3C5FB40, 0xF3C6FB40, 0xF3C7FB40, + 0xF3C8FB40, 0xF3C9FB40, 0xF3CAFB40, 0xF3CBFB40, 0xF3CCFB40, 0xF3CDFB40, 0xF3CEFB40, 0xF3CFFB40, 0xF3D0FB40, 0xF3D1FB40, 0xF3D2FB40, 0xF3D3FB40, 0xF3D4FB40, 0xF3D5FB40, 0xF3D6FB40, + 0xF3D7FB40, 0xF3D8FB40, 0xF3D9FB40, 0xF3DAFB40, 0xF3DBFB40, 0xF3DCFB40, 0xF3DDFB40, 0xF3DEFB40, 0xF3DFFB40, 0xF3E0FB40, 0xF3E1FB40, 0xF3E2FB40, 0xF3E3FB40, 0xF3E4FB40, 0xF3E5FB40, + 0xF3E6FB40, 0xF3E7FB40, 0xF3E8FB40, 0xF3E9FB40, 0xF3EAFB40, 0xF3EBFB40, 0xF3ECFB40, 0xF3EDFB40, 0xF3EEFB40, 0xF3EFFB40, 0xF3F0FB40, 0xF3F1FB40, 0xF3F2FB40, 0xF3F3FB40, 0xF3F4FB40, + 0xF3F5FB40, 0xF3F6FB40, 0xF3F7FB40, 0xF3F8FB40, 0xF3F9FB40, 0xF3FAFB40, 0xF3FBFB40, 0xF3FCFB40, 0xF3FDFB40, 0xF3FEFB40, 0xF3FFFB40, 0xF400FB40, 0xF401FB40, 0xF402FB40, 0xF403FB40, + 0xF404FB40, 0xF405FB40, 0xF406FB40, 0xF407FB40, 0xF408FB40, 0xF409FB40, 0xF40AFB40, 0xF40BFB40, 0xF40CFB40, 0xF40DFB40, 0xF40EFB40, 0xF40FFB40, 0xF410FB40, 0xF411FB40, 0xF412FB40, + 0xF413FB40, 0xF414FB40, 0xF415FB40, 0xF416FB40, 0xF417FB40, 0xF418FB40, 0xF419FB40, 0xF41AFB40, 0xF41BFB40, 0xF41CFB40, 0xF41DFB40, 0xF41EFB40, 0xF41FFB40, 0xF420FB40, 0xF421FB40, + 0xF422FB40, 0xF423FB40, 0xF424FB40, 0xF425FB40, 0xF426FB40, 0xF427FB40, 0xF428FB40, 0xF429FB40, 0xF42AFB40, 0xF42BFB40, 0xF42CFB40, 0xF42DFB40, 0xF42EFB40, 0xF42FFB40, 0xF430FB40, + 0xF431FB40, 0xF432FB40, 0xF433FB40, 0xF434FB40, 0xF435FB40, 0xF436FB40, 0xF437FB40, 0xF438FB40, 0xF439FB40, 0xF43AFB40, 0xF43BFB40, 0xF43CFB40, 0xF43DFB40, 0xF43EFB40, 0xF43FFB40, + 0xF440FB40, 0xF441FB40, 0xF442FB40, 0xF443FB40, 0xF444FB40, 0xF445FB40, 0xF446FB40, 0xF447FB40, 0xF448FB40, 0xF449FB40, 0xF44AFB40, 0xF44BFB40, 0xF44CFB40, 0xF44DFB40, 0xF44EFB40, + 0xF44FFB40, 0xF450FB40, 0xF451FB40, 0xF452FB40, 0xF453FB40, 0xF454FB40, 0xF455FB40, 0xF456FB40, 0xF457FB40, 0xF458FB40, 0xF459FB40, 0xF45AFB40, 0xF45BFB40, 0xF45CFB40, 0xF45DFB40, + 0xF45EFB40, 0xF45FFB40, 0xF460FB40, 0xF461FB40, 0xF462FB40, 0xF463FB40, 0xF464FB40, 0xF465FB40, 0xF466FB40, 0xF467FB40, 0xF468FB40, 0xF469FB40, 0xF46AFB40, 0xF46BFB40, 0xF46CFB40, + 0xF46DFB40, 0xF46EFB40, 0xF46FFB40, 0xF470FB40, 0xF471FB40, 0xF472FB40, 0xF473FB40, 0xF474FB40, 0xF475FB40, 0xF476FB40, 0xF477FB40, 0xF478FB40, 0xF479FB40, 0xF47AFB40, 0xF47BFB40, + 0xF47CFB40, 0xF47DFB40, 0xF47EFB40, 0xF47FFB40, 0xF480FB40, 0xF481FB40, 0xF482FB40, 0xF483FB40, 0xF484FB40, 0xF485FB40, 0xF486FB40, 0xF487FB40, 0xF488FB40, 0xF489FB40, 0xF48AFB40, + 0xF48BFB40, 0xF48CFB40, 0xF48DFB40, 0xF48EFB40, 0xF48FFB40, 0xF490FB40, 0xF491FB40, 0xF492FB40, 0xF493FB40, 0xF494FB40, 0xF495FB40, 0xF496FB40, 0xF497FB40, 0xF498FB40, 0xF499FB40, + 0xF49AFB40, 0xF49BFB40, 0xF49CFB40, 0xF49DFB40, 0xF49EFB40, 0xF49FFB40, 0xF4A0FB40, 0xF4A1FB40, 0xF4A2FB40, 0xF4A3FB40, 0xF4A4FB40, 0xF4A5FB40, 0xF4A6FB40, 0xF4A7FB40, 0xF4A8FB40, + 0xF4A9FB40, 0xF4AAFB40, 0xF4ABFB40, 0xF4ACFB40, 0xF4ADFB40, 0xF4AEFB40, 0xF4AFFB40, 0xF4B0FB40, 0xF4B1FB40, 0xF4B2FB40, 0xF4B3FB40, 0xF4B4FB40, 0xF4B5FB40, 0xF4B6FB40, 0xF4B7FB40, + 0xF4B8FB40, 0xF4B9FB40, 0xF4BAFB40, 0xF4BBFB40, 0xF4BCFB40, 0xF4BDFB40, 0xF4BEFB40, 0xF4BFFB40, 0xF4C0FB40, 0xF4C1FB40, 0xF4C2FB40, 0xF4C3FB40, 0xF4C4FB40, 0xF4C5FB40, 0xF4C6FB40, + 0xF4C7FB40, 0xF4C8FB40, 0xF4C9FB40, 0xF4CAFB40, 0xF4CBFB40, 0xF4CCFB40, 0xF4CDFB40, 0xF4CEFB40, 0xF4CFFB40, 0xF4D0FB40, 0xF4D1FB40, 0xF4D2FB40, 0xF4D3FB40, 0xF4D4FB40, 0xF4D5FB40, + 0xF4D6FB40, 0xF4D7FB40, 0xF4D8FB40, 0xF4D9FB40, 0xF4DAFB40, 0xF4DBFB40, 0xF4DCFB40, 0xF4DDFB40, 0xF4DEFB40, 0xF4DFFB40, 0xF4E0FB40, 0xF4E1FB40, 0xF4E2FB40, 0xF4E3FB40, 0xF4E4FB40, + 0xF4E5FB40, 0xF4E6FB40, 0xF4E7FB40, 0xF4E8FB40, 0xF4E9FB40, 0xF4EAFB40, 0xF4EBFB40, 0xF4ECFB40, 0xF4EDFB40, 0xF4EEFB40, 0xF4EFFB40, 0xF4F0FB40, 0xF4F1FB40, 0xF4F2FB40, 0xF4F3FB40, + 0xF4F4FB40, 0xF4F5FB40, 0xF4F6FB40, 0xF4F7FB40, 0xF4F8FB40, 0xF4F9FB40, 0xF4FAFB40, 0xF4FBFB40, 0xF4FCFB40, 0xF4FDFB40, 0xF4FEFB40, 0xF4FFFB40, 0xF500FB40, 0xF501FB40, 0xF502FB40, + 0xF503FB40, 0xF504FB40, 0xF505FB40, 0xF506FB40, 0xF507FB40, 0xF508FB40, 0xF509FB40, 0xF50AFB40, 0xF50BFB40, 0xF50CFB40, 0xF50DFB40, 0xF50EFB40, 0xF50FFB40, 0xF510FB40, 0xF511FB40, + 0xF512FB40, 0xF513FB40, 0xF514FB40, 0xF515FB40, 0xF516FB40, 0xF517FB40, 0xF518FB40, 0xF519FB40, 0xF51AFB40, 0xF51BFB40, 0xF51CFB40, 0xF51DFB40, 0xF51EFB40, 0xF51FFB40, 0xF520FB40, + 0xF521FB40, 0xF522FB40, 0xF523FB40, 0xF524FB40, 0xF525FB40, 0xF526FB40, 0xF527FB40, 0xF528FB40, 0xF529FB40, 0xF52AFB40, 0xF52BFB40, 0xF52CFB40, 0xF52DFB40, 0xF52EFB40, 0xF52FFB40, + 0xF530FB40, 0xF531FB40, 0xF532FB40, 0xF533FB40, 0xF534FB40, 0xF535FB40, 0xF536FB40, 0xF537FB40, 0xF538FB40, 0xF539FB40, 0xF53AFB40, 0xF53BFB40, 0xF53CFB40, 0xF53DFB40, 0xF53EFB40, + 0xF53FFB40, 0xF540FB40, 0xF541FB40, 0xF542FB40, 0xF543FB40, 0xF544FB40, 0xF545FB40, 0xF546FB40, 0xF547FB40, 0xF548FB40, 0xF549FB40, 0xF54AFB40, 0xF54BFB40, 0xF54CFB40, 0xF54DFB40, + 0xF54EFB40, 0xF54FFB40, 0xF550FB40, 0xF551FB40, 0xF552FB40, 0xF553FB40, 0xF554FB40, 0xF555FB40, 0xF556FB40, 0xF557FB40, 0xF558FB40, 0xF559FB40, 0xF55AFB40, 0xF55BFB40, 0xF55CFB40, + 0xF55DFB40, 0xF55EFB40, 0xF55FFB40, 0xF560FB40, 0xF561FB40, 0xF562FB40, 0xF563FB40, 0xF564FB40, 0xF565FB40, 0xF566FB40, 0xF567FB40, 0xF568FB40, 0xF569FB40, 0xF56AFB40, 0xF56BFB40, + 0xF56CFB40, 0xF56DFB40, 0xF56EFB40, 0xF56FFB40, 0xF570FB40, 0xF571FB40, 0xF572FB40, 0xF573FB40, 0xF574FB40, 0xF575FB40, 0xF576FB40, 0xF577FB40, 0xF578FB40, 0xF579FB40, 0xF57AFB40, + 0xF57BFB40, 0xF57CFB40, 0xF57DFB40, 0xF57EFB40, 0xF57FFB40, 0xF580FB40, 0xF581FB40, 0xF582FB40, 0xF583FB40, 0xF584FB40, 0xF585FB40, 0xF586FB40, 0xF587FB40, 0xF588FB40, 0xF589FB40, + 0xF58AFB40, 0xF58BFB40, 0xF58CFB40, 0xF58DFB40, 0xF58EFB40, 0xF58FFB40, 0xF590FB40, 0xF591FB40, 0xF592FB40, 0xF593FB40, 0xF594FB40, 0xF595FB40, 0xF596FB40, 0xF597FB40, 0xF598FB40, + 0xF599FB40, 0xF59AFB40, 0xF59BFB40, 0xF59CFB40, 0xF59DFB40, 0xF59EFB40, 0xF59FFB40, 0xF5A0FB40, 0xF5A1FB40, 0xF5A2FB40, 0xF5A3FB40, 0xF5A4FB40, 0xF5A5FB40, 0xF5A6FB40, 0xF5A7FB40, + 0xF5A8FB40, 0xF5A9FB40, 0xF5AAFB40, 0xF5ABFB40, 0xF5ACFB40, 0xF5ADFB40, 0xF5AEFB40, 0xF5AFFB40, 0xF5B0FB40, 0xF5B1FB40, 0xF5B2FB40, 0xF5B3FB40, 0xF5B4FB40, 0xF5B5FB40, 0xF5B6FB40, + 0xF5B7FB40, 0xF5B8FB40, 0xF5B9FB40, 0xF5BAFB40, 0xF5BBFB40, 0xF5BCFB40, 0xF5BDFB40, 0xF5BEFB40, 0xF5BFFB40, 0xF5C0FB40, 0xF5C1FB40, 0xF5C2FB40, 0xF5C3FB40, 0xF5C4FB40, 0xF5C5FB40, + 0xF5C6FB40, 0xF5C7FB40, 0xF5C8FB40, 0xF5C9FB40, 0xF5CAFB40, 0xF5CBFB40, 0xF5CCFB40, 0xF5CDFB40, 0xF5CEFB40, 0xF5CFFB40, 0xF5D0FB40, 0xF5D1FB40, 0xF5D2FB40, 0xF5D3FB40, 0xF5D4FB40, + 0xF5D5FB40, 0xF5D6FB40, 0xF5D7FB40, 0xF5D8FB40, 0xF5D9FB40, 0xF5DAFB40, 0xF5DBFB40, 0xF5DCFB40, 0xF5DDFB40, 0xF5DEFB40, 0xF5DFFB40, 0xF5E0FB40, 0xF5E1FB40, 0xF5E2FB40, 0xF5E3FB40, + 0xF5E4FB40, 0xF5E5FB40, 0xF5E6FB40, 0xF5E7FB40, 0xF5E8FB40, 0xF5E9FB40, 0xF5EAFB40, 0xF5EBFB40, 0xF5ECFB40, 0xF5EDFB40, 0xF5EEFB40, 0xF5EFFB40, 0xF5F0FB40, 0xF5F1FB40, 0xF5F2FB40, + 0xF5F3FB40, 0xF5F4FB40, 0xF5F5FB40, 0xF5F6FB40, 0xF5F7FB40, 0xF5F8FB40, 0xF5F9FB40, 0xF5FAFB40, 0xF5FBFB40, 0xF5FCFB40, 0xF5FDFB40, 0xF5FEFB40, 0xF5FFFB40, 0xF600FB40, 0xF601FB40, + 0xF602FB40, 0xF603FB40, 0xF604FB40, 0xF605FB40, 0xF606FB40, 0xF607FB40, 0xF608FB40, 0xF609FB40, 0xF60AFB40, 0xF60BFB40, 0xF60CFB40, 0xF60DFB40, 0xF60EFB40, 0xF60FFB40, 0xF610FB40, + 0xF611FB40, 0xF612FB40, 0xF613FB40, 0xF614FB40, 0xF615FB40, 0xF616FB40, 0xF617FB40, 0xF618FB40, 0xF619FB40, 0xF61AFB40, 0xF61BFB40, 0xF61CFB40, 0xF61DFB40, 0xF61EFB40, 0xF61FFB40, + 0xF620FB40, 0xF621FB40, 0xF622FB40, 0xF623FB40, 0xF624FB40, 0xF625FB40, 0xF626FB40, 0xF627FB40, 0xF628FB40, 0xF629FB40, 0xF62AFB40, 0xF62BFB40, 0xF62CFB40, 0xF62DFB40, 0xF62EFB40, + 0xF62FFB40, 0xF630FB40, 0xF631FB40, 0xF632FB40, 0xF633FB40, 0xF634FB40, 0xF635FB40, 0xF636FB40, 0xF637FB40, 0xF638FB40, 0xF639FB40, 0xF63AFB40, 0xF63BFB40, 0xF63CFB40, 0xF63DFB40, + 0xF63EFB40, 0xF63FFB40, 0xF640FB40, 0xF641FB40, 0xF642FB40, 0xF643FB40, 0xF644FB40, 0xF645FB40, 0xF646FB40, 0xF647FB40, 0xF648FB40, 0xF649FB40, 0xF64AFB40, 0xF64BFB40, 0xF64CFB40, + 0xF64DFB40, 0xF64EFB40, 0xF64FFB40, 0xF650FB40, 0xF651FB40, 0xF652FB40, 0xF653FB40, 0xF654FB40, 0xF655FB40, 0xF656FB40, 0xF657FB40, 0xF658FB40, 0xF659FB40, 0xF65AFB40, 0xF65BFB40, + 0xF65CFB40, 0xF65DFB40, 0xF65EFB40, 0xF65FFB40, 0xF660FB40, 0xF661FB40, 0xF662FB40, 0xF663FB40, 0xF664FB40, 0xF665FB40, 0xF666FB40, 0xF667FB40, 0xF668FB40, 0xF669FB40, 0xF66AFB40, + 0xF66BFB40, 0xF66CFB40, 0xF66DFB40, 0xF66EFB40, 0xF66FFB40, 0xF670FB40, 0xF671FB40, 0xF672FB40, 0xF673FB40, 0xF674FB40, 0xF675FB40, 0xF676FB40, 0xF677FB40, 0xF678FB40, 0xF679FB40, + 0xF67AFB40, 0xF67BFB40, 0xF67CFB40, 0xF67DFB40, 0xF67EFB40, 0xF67FFB40, 0xF680FB40, 0xF681FB40, 0xF682FB40, 0xF683FB40, 0xF684FB40, 0xF685FB40, 0xF686FB40, 0xF687FB40, 0xF688FB40, + 0xF689FB40, 0xF68AFB40, 0xF68BFB40, 0xF68CFB40, 0xF68DFB40, 0xF68EFB40, 0xF68FFB40, 0xF690FB40, 0xF691FB40, 0xF692FB40, 0xF693FB40, 0xF694FB40, 0xF695FB40, 0xF696FB40, 0xF697FB40, + 0xF698FB40, 0xF699FB40, 0xF69AFB40, 0xF69BFB40, 0xF69CFB40, 0xF69DFB40, 0xF69EFB40, 0xF69FFB40, 0xF6A0FB40, 0xF6A1FB40, 0xF6A2FB40, 0xF6A3FB40, 0xF6A4FB40, 0xF6A5FB40, 0xF6A6FB40, + 0xF6A7FB40, 0xF6A8FB40, 0xF6A9FB40, 0xF6AAFB40, 0xF6ABFB40, 0xF6ACFB40, 0xF6ADFB40, 0xF6AEFB40, 0xF6AFFB40, 0xF6B0FB40, 0xF6B1FB40, 0xF6B2FB40, 0xF6B3FB40, 0xF6B4FB40, 0xF6B5FB40, + 0xF6B6FB40, 0xF6B7FB40, 0xF6B8FB40, 0xF6B9FB40, 0xF6BAFB40, 0xF6BBFB40, 0xF6BCFB40, 0xF6BDFB40, 0xF6BEFB40, 0xF6BFFB40, 0xF6C0FB40, 0xF6C1FB40, 0xF6C2FB40, 0xF6C3FB40, 0xF6C4FB40, + 0xF6C5FB40, 0xF6C6FB40, 0xF6C7FB40, 0xF6C8FB40, 0xF6C9FB40, 0xF6CAFB40, 0xF6CBFB40, 0xF6CCFB40, 0xF6CDFB40, 0xF6CEFB40, 0xF6CFFB40, 0xF6D0FB40, 0xF6D1FB40, 0xF6D2FB40, 0xF6D3FB40, + 0xF6D4FB40, 0xF6D5FB40, 0xF6D6FB40, 0xF6D7FB40, 0xF6D8FB40, 0xF6D9FB40, 0xF6DAFB40, 0xF6DBFB40, 0xF6DCFB40, 0xF6DDFB40, 0xF6DEFB40, 0xF6DFFB40, 0xF6E0FB40, 0xF6E1FB40, 0xF6E2FB40, + 0xF6E3FB40, 0xF6E4FB40, 0xF6E5FB40, 0xF6E6FB40, 0xF6E7FB40, 0xF6E8FB40, 0xF6E9FB40, 0xF6EAFB40, 0xF6EBFB40, 0xF6ECFB40, 0xF6EDFB40, 0xF6EEFB40, 0xF6EFFB40, 0xF6F0FB40, 0xF6F1FB40, + 0xF6F2FB40, 0xF6F3FB40, 0xF6F4FB40, 0xF6F5FB40, 0xF6F6FB40, 0xF6F7FB40, 0xF6F8FB40, 0xF6F9FB40, 0xF6FAFB40, 0xF6FBFB40, 0xF6FCFB40, 0xF6FDFB40, 0xF6FEFB40, 0xF6FFFB40, 0xF700FB40, + 0xF701FB40, 0xF702FB40, 0xF703FB40, 0xF704FB40, 0xF705FB40, 0xF706FB40, 0xF707FB40, 0xF708FB40, 0xF709FB40, 0xF70AFB40, 0xF70BFB40, 0xF70CFB40, 0xF70DFB40, 0xF70EFB40, 0xF70FFB40, + 0xF710FB40, 0xF711FB40, 0xF712FB40, 0xF713FB40, 0xF714FB40, 0xF715FB40, 0xF716FB40, 0xF717FB40, 0xF718FB40, 0xF719FB40, 0xF71AFB40, 0xF71BFB40, 0xF71CFB40, 0xF71DFB40, 0xF71EFB40, + 0xF71FFB40, 0xF720FB40, 0xF721FB40, 0xF722FB40, 0xF723FB40, 0xF724FB40, 0xF725FB40, 0xF726FB40, 0xF727FB40, 0xF728FB40, 0xF729FB40, 0xF72AFB40, 0xF72BFB40, 0xF72CFB40, 0xF72DFB40, + 0xF72EFB40, 0xF72FFB40, 0xF730FB40, 0xF731FB40, 0xF732FB40, 0xF733FB40, 0xF734FB40, 0xF735FB40, 0xF736FB40, 0xF737FB40, 0xF738FB40, 0xF739FB40, 0xF73AFB40, 0xF73BFB40, 0xF73CFB40, + 0xF73DFB40, 0xF73EFB40, 0xF73FFB40, 0xF740FB40, 0xF741FB40, 0xF742FB40, 0xF743FB40, 0xF744FB40, 0xF745FB40, 0xF746FB40, 0xF747FB40, 0xF748FB40, 0xF749FB40, 0xF74AFB40, 0xF74BFB40, + 0xF74CFB40, 0xF74DFB40, 0xF74EFB40, 0xF74FFB40, 0xF750FB40, 0xF751FB40, 0xF752FB40, 0xF753FB40, 0xF754FB40, 0xF755FB40, 0xF756FB40, 0xF757FB40, 0xF758FB40, 0xF759FB40, 0xF75AFB40, + 0xF75BFB40, 0xF75CFB40, 0xF75DFB40, 0xF75EFB40, 0xF75FFB40, 0xF760FB40, 0xF761FB40, 0xF762FB40, 0xF763FB40, 0xF764FB40, 0xF765FB40, 0xF766FB40, 0xF767FB40, 0xF768FB40, 0xF769FB40, + 0xF76AFB40, 0xF76BFB40, 0xF76CFB40, 0xF76DFB40, 0xF76EFB40, 0xF76FFB40, 0xF770FB40, 0xF771FB40, 0xF772FB40, 0xF773FB40, 0xF774FB40, 0xF775FB40, 0xF776FB40, 0xF777FB40, 0xF778FB40, + 0xF779FB40, 0xF77AFB40, 0xF77BFB40, 0xF77CFB40, 0xF77DFB40, 0xF77EFB40, 0xF77FFB40, 0xF780FB40, 0xF781FB40, 0xF782FB40, 0xF783FB40, 0xF784FB40, 0xF785FB40, 0xF786FB40, 0xF787FB40, + 0xF788FB40, 0xF789FB40, 0xF78AFB40, 0xF78BFB40, 0xF78CFB40, 0xF78DFB40, 0xF78EFB40, 0xF78FFB40, 0xF790FB40, 0xF791FB40, 0xF792FB40, 0xF793FB40, 0xF794FB40, 0xF795FB40, 0xF796FB40, + 0xF797FB40, 0xF798FB40, 0xF799FB40, 0xF79AFB40, 0xF79BFB40, 0xF79CFB40, 0xF79DFB40, 0xF79EFB40, 0xF79FFB40, 0xF7A0FB40, 0xF7A1FB40, 0xF7A2FB40, 0xF7A3FB40, 0xF7A4FB40, 0xF7A5FB40, + 0xF7A6FB40, 0xF7A7FB40, 0xF7A8FB40, 0xF7A9FB40, 0xF7AAFB40, 0xF7ABFB40, 0xF7ACFB40, 0xF7ADFB40, 0xF7AEFB40, 0xF7AFFB40, 0xF7B0FB40, 0xF7B1FB40, 0xF7B2FB40, 0xF7B3FB40, 0xF7B4FB40, + 0xF7B5FB40, 0xF7B6FB40, 0xF7B7FB40, 0xF7B8FB40, 0xF7B9FB40, 0xF7BAFB40, 0xF7BBFB40, 0xF7BCFB40, 0xF7BDFB40, 0xF7BEFB40, 0xF7BFFB40, 0xF7C0FB40, 0xF7C1FB40, 0xF7C2FB40, 0xF7C3FB40, + 0xF7C4FB40, 0xF7C5FB40, 0xF7C6FB40, 0xF7C7FB40, 0xF7C8FB40, 0xF7C9FB40, 0xF7CAFB40, 0xF7CBFB40, 0xF7CCFB40, 0xF7CDFB40, 0xF7CEFB40, 0xF7CFFB40, 0xF7D0FB40, 0xF7D1FB40, 0xF7D2FB40, + 0xF7D3FB40, 0xF7D4FB40, 0xF7D5FB40, 0xF7D6FB40, 0xF7D7FB40, 0xF7D8FB40, 0xF7D9FB40, 0xF7DAFB40, 0xF7DBFB40, 0xF7DCFB40, 0xF7DDFB40, 0xF7DEFB40, 0xF7DFFB40, 0xF7E0FB40, 0xF7E1FB40, + 0xF7E2FB40, 0xF7E3FB40, 0xF7E4FB40, 0xF7E5FB40, 0xF7E6FB40, 0xF7E7FB40, 0xF7E8FB40, 0xF7E9FB40, 0xF7EAFB40, 0xF7EBFB40, 0xF7ECFB40, 0xF7EDFB40, 0xF7EEFB40, 0xF7EFFB40, 0xF7F0FB40, + 0xF7F1FB40, 0xF7F2FB40, 0xF7F3FB40, 0xF7F4FB40, 0xF7F5FB40, 0xF7F6FB40, 0xF7F7FB40, 0xF7F8FB40, 0xF7F9FB40, 0xF7FAFB40, 0xF7FBFB40, 0xF7FCFB40, 0xF7FDFB40, 0xF7FEFB40, 0xF7FFFB40, + 0xF800FB40, 0xF801FB40, 0xF802FB40, 0xF803FB40, 0xF804FB40, 0xF805FB40, 0xF806FB40, 0xF807FB40, 0xF808FB40, 0xF809FB40, 0xF80AFB40, 0xF80BFB40, 0xF80CFB40, 0xF80DFB40, 0xF80EFB40, + 0xF80FFB40, 0xF810FB40, 0xF811FB40, 0xF812FB40, 0xF813FB40, 0xF814FB40, 0xF815FB40, 0xF816FB40, 0xF817FB40, 0xF818FB40, 0xF819FB40, 0xF81AFB40, 0xF81BFB40, 0xF81CFB40, 0xF81DFB40, + 0xF81EFB40, 0xF81FFB40, 0xF820FB40, 0xF821FB40, 0xF822FB40, 0xF823FB40, 0xF824FB40, 0xF825FB40, 0xF826FB40, 0xF827FB40, 0xF828FB40, 0xF829FB40, 0xF82AFB40, 0xF82BFB40, 0xF82CFB40, + 0xF82DFB40, 0xF82EFB40, 0xF82FFB40, 0xF830FB40, 0xF831FB40, 0xF832FB40, 0xF833FB40, 0xF834FB40, 0xF835FB40, 0xF836FB40, 0xF837FB40, 0xF838FB40, 0xF839FB40, 0xF83AFB40, 0xF83BFB40, + 0xF83CFB40, 0xF83DFB40, 0xF83EFB40, 0xF83FFB40, 0xF840FB40, 0xF841FB40, 0xF842FB40, 0xF843FB40, 0xF844FB40, 0xF845FB40, 0xF846FB40, 0xF847FB40, 0xF848FB40, 0xF849FB40, 0xF84AFB40, + 0xF84BFB40, 0xF84CFB40, 0xF84DFB40, 0xF84EFB40, 0xF84FFB40, 0xF850FB40, 0xF851FB40, 0xF852FB40, 0xF853FB40, 0xF854FB40, 0xF855FB40, 0xF856FB40, 0xF857FB40, 0xF858FB40, 0xF859FB40, + 0xF85AFB40, 0xF85BFB40, 0xF85CFB40, 0xF85DFB40, 0xF85EFB40, 0xF85FFB40, 0xF860FB40, 0xF861FB40, 0xF862FB40, 0xF863FB40, 0xF864FB40, 0xF865FB40, 0xF866FB40, 0xF867FB40, 0xF868FB40, + 0xF869FB40, 0xF86AFB40, 0xF86BFB40, 0xF86CFB40, 0xF86DFB40, 0xF86EFB40, 0xF86FFB40, 0xF870FB40, 0xF871FB40, 0xF872FB40, 0xF873FB40, 0xF874FB40, 0xF875FB40, 0xF876FB40, 0xF877FB40, + 0xF878FB40, 0xF879FB40, 0xF87AFB40, 0xF87BFB40, 0xF87CFB40, 0xF87DFB40, 0xF87EFB40, 0xF87FFB40, 0xF880FB40, 0xF881FB40, 0xF882FB40, 0xF883FB40, 0xF884FB40, 0xF885FB40, 0xF886FB40, + 0xF887FB40, 0xF888FB40, 0xF889FB40, 0xF88AFB40, 0xF88BFB40, 0xF88CFB40, 0xF88DFB40, 0xF88EFB40, 0xF88FFB40, 0xF890FB40, 0xF891FB40, 0xF892FB40, 0xF893FB40, 0xF894FB40, 0xF895FB40, + 0xF896FB40, 0xF897FB40, 0xF898FB40, 0xF899FB40, 0xF89AFB40, 0xF89BFB40, 0xF89CFB40, 0xF89DFB40, 0xF89EFB40, 0xF89FFB40, 0xF8A0FB40, 0xF8A1FB40, 0xF8A2FB40, 0xF8A3FB40, 0xF8A4FB40, + 0xF8A5FB40, 0xF8A6FB40, 0xF8A7FB40, 0xF8A8FB40, 0xF8A9FB40, 0xF8AAFB40, 0xF8ABFB40, 0xF8ACFB40, 0xF8ADFB40, 0xF8AEFB40, 0xF8AFFB40, 0xF8B0FB40, 0xF8B1FB40, 0xF8B2FB40, 0xF8B3FB40, + 0xF8B4FB40, 0xF8B5FB40, 0xF8B6FB40, 0xF8B7FB40, 0xF8B8FB40, 0xF8B9FB40, 0xF8BAFB40, 0xF8BBFB40, 0xF8BCFB40, 0xF8BDFB40, 0xF8BEFB40, 0xF8BFFB40, 0xF8C0FB40, 0xF8C1FB40, 0xF8C2FB40, + 0xF8C3FB40, 0xF8C4FB40, 0xF8C5FB40, 0xF8C6FB40, 0xF8C7FB40, 0xF8C8FB40, 0xF8C9FB40, 0xF8CAFB40, 0xF8CBFB40, 0xF8CCFB40, 0xF8CDFB40, 0xF8CEFB40, 0xF8CFFB40, 0xF8D0FB40, 0xF8D1FB40, + 0xF8D2FB40, 0xF8D3FB40, 0xF8D4FB40, 0xF8D5FB40, 0xF8D6FB40, 0xF8D7FB40, 0xF8D8FB40, 0xF8D9FB40, 0xF8DAFB40, 0xF8DBFB40, 0xF8DCFB40, 0xF8DDFB40, 0xF8DEFB40, 0xF8DFFB40, 0xF8E0FB40, + 0xF8E1FB40, 0xF8E2FB40, 0xF8E3FB40, 0xF8E4FB40, 0xF8E5FB40, 0xF8E6FB40, 0xF8E7FB40, 0xF8E8FB40, 0xF8E9FB40, 0xF8EAFB40, 0xF8EBFB40, 0xF8ECFB40, 0xF8EDFB40, 0xF8EEFB40, 0xF8EFFB40, + 0xF8F0FB40, 0xF8F1FB40, 0xF8F2FB40, 0xF8F3FB40, 0xF8F4FB40, 0xF8F5FB40, 0xF8F6FB40, 0xF8F7FB40, 0xF8F8FB40, 0xF8F9FB40, 0xF8FAFB40, 0xF8FBFB40, 0xF8FCFB40, 0xF8FDFB40, 0xF8FEFB40, + 0xF8FFFB40, 0xF900FB40, 0xF901FB40, 0xF902FB40, 0xF903FB40, 0xF904FB40, 0xF905FB40, 0xF906FB40, 0xF907FB40, 0xF908FB40, 0xF909FB40, 0xF90AFB40, 0xF90BFB40, 0xF90CFB40, 0xF90DFB40, + 0xF90EFB40, 0xF90FFB40, 0xF910FB40, 0xF911FB40, 0xF912FB40, 0xF913FB40, 0xF914FB40, 0xF915FB40, 0xF916FB40, 0xF917FB40, 0xF918FB40, 0xF919FB40, 0xF91AFB40, 0xF91BFB40, 0xF91CFB40, + 0xF91DFB40, 0xF91EFB40, 0xF91FFB40, 0xF920FB40, 0xF921FB40, 0xF922FB40, 0xF923FB40, 0xF924FB40, 0xF925FB40, 0xF926FB40, 0xF927FB40, 0xF928FB40, 0xF929FB40, 0xF92AFB40, 0xF92BFB40, + 0xF92CFB40, 0xF92DFB40, 0xF92EFB40, 0xF92FFB40, 0xF930FB40, 0xF931FB40, 0xF932FB40, 0xF933FB40, 0xF934FB40, 0xF935FB40, 0xF936FB40, 0xF937FB40, 0xF938FB40, 0xF939FB40, 0xF93AFB40, + 0xF93BFB40, 0xF93CFB40, 0xF93DFB40, 0xF93EFB40, 0xF93FFB40, 0xF940FB40, 0xF941FB40, 0xF942FB40, 0xF943FB40, 0xF944FB40, 0xF945FB40, 0xF946FB40, 0xF947FB40, 0xF948FB40, 0xF949FB40, + 0xF94AFB40, 0xF94BFB40, 0xF94CFB40, 0xF94DFB40, 0xF94EFB40, 0xF94FFB40, 0xF950FB40, 0xF951FB40, 0xF952FB40, 0xF953FB40, 0xF954FB40, 0xF955FB40, 0xF956FB40, 0xF957FB40, 0xF958FB40, + 0xF959FB40, 0xF95AFB40, 0xF95BFB40, 0xF95CFB40, 0xF95DFB40, 0xF95EFB40, 0xF95FFB40, 0xF960FB40, 0xF961FB40, 0xF962FB40, 0xF963FB40, 0xF964FB40, 0xF965FB40, 0xF966FB40, 0xF967FB40, + 0xF968FB40, 0xF969FB40, 0xF96AFB40, 0xF96BFB40, 0xF96CFB40, 0xF96DFB40, 0xF96EFB40, 0xF96FFB40, 0xF970FB40, 0xF971FB40, 0xF972FB40, 0xF973FB40, 0xF974FB40, 0xF975FB40, 0xF976FB40, + 0xF977FB40, 0xF978FB40, 0xF979FB40, 0xF97AFB40, 0xF97BFB40, 0xF97CFB40, 0xF97DFB40, 0xF97EFB40, 0xF97FFB40, 0xF980FB40, 0xF981FB40, 0xF982FB40, 0xF983FB40, 0xF984FB40, 0xF985FB40, + 0xF986FB40, 0xF987FB40, 0xF988FB40, 0xF989FB40, 0xF98AFB40, 0xF98BFB40, 0xF98CFB40, 0xF98DFB40, 0xF98EFB40, 0xF98FFB40, 0xF990FB40, 0xF991FB40, 0xF992FB40, 0xF993FB40, 0xF994FB40, + 0xF995FB40, 0xF996FB40, 0xF997FB40, 0xF998FB40, 0xF999FB40, 0xF99AFB40, 0xF99BFB40, 0xF99CFB40, 0xF99DFB40, 0xF99EFB40, 0xF99FFB40, 0xF9A0FB40, 0xF9A1FB40, 0xF9A2FB40, 0xF9A3FB40, + 0xF9A4FB40, 0xF9A5FB40, 0xF9A6FB40, 0xF9A7FB40, 0xF9A8FB40, 0xF9A9FB40, 0xF9AAFB40, 0xF9ABFB40, 0xF9ACFB40, 0xF9ADFB40, 0xF9AEFB40, 0xF9AFFB40, 0xF9B0FB40, 0xF9B1FB40, 0xF9B2FB40, + 0xF9B3FB40, 0xF9B4FB40, 0xF9B5FB40, 0xF9B6FB40, 0xF9B7FB40, 0xF9B8FB40, 0xF9B9FB40, 0xF9BAFB40, 0xF9BBFB40, 0xF9BCFB40, 0xF9BDFB40, 0xF9BEFB40, 0xF9BFFB40, 0xF9C0FB40, 0xF9C1FB40, + 0xF9C2FB40, 0xF9C3FB40, 0xF9C4FB40, 0xF9C5FB40, 0xF9C6FB40, 0xF9C7FB40, 0xF9C8FB40, 0xF9C9FB40, 0xF9CAFB40, 0xF9CBFB40, 0xF9CCFB40, 0xF9CDFB40, 0xF9CEFB40, 0xF9CFFB40, 0xF9D0FB40, + 0xF9D1FB40, 0xF9D2FB40, 0xF9D3FB40, 0xF9D4FB40, 0xF9D5FB40, 0xF9D6FB40, 0xF9D7FB40, 0xF9D8FB40, 0xF9D9FB40, 0xF9DAFB40, 0xF9DBFB40, 0xF9DCFB40, 0xF9DDFB40, 0xF9DEFB40, 0xF9DFFB40, + 0xF9E0FB40, 0xF9E1FB40, 0xF9E2FB40, 0xF9E3FB40, 0xF9E4FB40, 0xF9E5FB40, 0xF9E6FB40, 0xF9E7FB40, 0xF9E8FB40, 0xF9E9FB40, 0xF9EAFB40, 0xF9EBFB40, 0xF9ECFB40, 0xF9EDFB40, 0xF9EEFB40, + 0xF9EFFB40, 0xF9F0FB40, 0xF9F1FB40, 0xF9F2FB40, 0xF9F3FB40, 0xF9F4FB40, 0xF9F5FB40, 0xF9F6FB40, 0xF9F7FB40, 0xF9F8FB40, 0xF9F9FB40, 0xF9FAFB40, 0xF9FBFB40, 0xF9FCFB40, 0xF9FDFB40, + 0xF9FEFB40, 0xF9FFFB40, 0xFA00FB40, 0xFA01FB40, 0xFA02FB40, 0xFA03FB40, 0xFA04FB40, 0xFA05FB40, 0xFA06FB40, 0xFA07FB40, 0xFA08FB40, 0xFA09FB40, 0xFA0AFB40, 0xFA0BFB40, 0xFA0CFB40, + 0xFA0DFB40, 0xFA0EFB40, 0xFA0FFB40, 0xFA10FB40, 0xFA11FB40, 0xFA12FB40, 0xFA13FB40, 0xFA14FB40, 0xFA15FB40, 0xFA16FB40, 0xFA17FB40, 0xFA18FB40, 0xFA19FB40, 0xFA1AFB40, 0xFA1BFB40, + 0xFA1CFB40, 0xFA1DFB40, 0xFA1EFB40, 0xFA1FFB40, 0xFA20FB40, 0xFA21FB40, 0xFA22FB40, 0xFA23FB40, 0xFA24FB40, 0xFA25FB40, 0xFA26FB40, 0xFA27FB40, 0xFA28FB40, 0xFA29FB40, 0xFA2AFB40, + 0xFA2BFB40, 0xFA2CFB40, 0xFA2DFB40, 0xFA2EFB40, 0xFA2FFB40, 0xFA30FB40, 0xFA31FB40, 0xFA32FB40, 0xFA33FB40, 0xFA34FB40, 0xFA35FB40, 0xFA36FB40, 0xFA37FB40, 0xFA38FB40, 0xFA39FB40, + 0xFA3AFB40, 0xFA3BFB40, 0xFA3CFB40, 0xFA3DFB40, 0xFA3EFB40, 0xFA3FFB40, 0xFA40FB40, 0xFA41FB40, 0xFA42FB40, 0xFA43FB40, 0xFA44FB40, 0xFA45FB40, 0xFA46FB40, 0xFA47FB40, 0xFA48FB40, + 0xFA49FB40, 0xFA4AFB40, 0xFA4BFB40, 0xFA4CFB40, 0xFA4DFB40, 0xFA4EFB40, 0xFA4FFB40, 0xFA50FB40, 0xFA51FB40, 0xFA52FB40, 0xFA53FB40, 0xFA54FB40, 0xFA55FB40, 0xFA56FB40, 0xFA57FB40, + 0xFA58FB40, 0xFA59FB40, 0xFA5AFB40, 0xFA5BFB40, 0xFA5CFB40, 0xFA5DFB40, 0xFA5EFB40, 0xFA5FFB40, 0xFA60FB40, 0xFA61FB40, 0xFA62FB40, 0xFA63FB40, 0xFA64FB40, 0xFA65FB40, 0xFA66FB40, + 0xFA67FB40, 0xFA68FB40, 0xFA69FB40, 0xFA6AFB40, 0xFA6BFB40, 0xFA6CFB40, 0xFA6DFB40, 0xFA6EFB40, 0xFA6FFB40, 0xFA70FB40, 0xFA71FB40, 0xFA72FB40, 0xFA73FB40, 0xFA74FB40, 0xFA75FB40, + 0xFA76FB40, 0xFA77FB40, 0xFA78FB40, 0xFA79FB40, 0xFA7AFB40, 0xFA7BFB40, 0xFA7CFB40, 0xFA7DFB40, 0xFA7EFB40, 0xFA7FFB40, 0xFA80FB40, 0xFA81FB40, 0xFA82FB40, 0xFA83FB40, 0xFA84FB40, + 0xFA85FB40, 0xFA86FB40, 0xFA87FB40, 0xFA88FB40, 0xFA89FB40, 0xFA8AFB40, 0xFA8BFB40, 0xFA8CFB40, 0xFA8DFB40, 0xFA8EFB40, 0xFA8FFB40, 0xFA90FB40, 0xFA91FB40, 0xFA92FB40, 0xFA93FB40, + 0xFA94FB40, 0xFA95FB40, 0xFA96FB40, 0xFA97FB40, 0xFA98FB40, 0xFA99FB40, 0xFA9AFB40, 0xFA9BFB40, 0xFA9CFB40, 0xFA9DFB40, 0xFA9EFB40, 0xFA9FFB40, 0xFAA0FB40, 0xFAA1FB40, 0xFAA2FB40, + 0xFAA3FB40, 0xFAA4FB40, 0xFAA5FB40, 0xFAA6FB40, 0xFAA7FB40, 0xFAA8FB40, 0xFAA9FB40, 0xFAAAFB40, 0xFAABFB40, 0xFAACFB40, 0xFAADFB40, 0xFAAEFB40, 0xFAAFFB40, 0xFAB0FB40, 0xFAB1FB40, + 0xFAB2FB40, 0xFAB3FB40, 0xFAB4FB40, 0xFAB5FB40, 0xFAB6FB40, 0xFAB7FB40, 0xFAB8FB40, 0xFAB9FB40, 0xFABAFB40, 0xFABBFB40, 0xFABCFB40, 0xFABDFB40, 0xFABEFB40, 0xFABFFB40, 0xFAC0FB40, + 0xFAC1FB40, 0xFAC2FB40, 0xFAC3FB40, 0xFAC4FB40, 0xFAC5FB40, 0xFAC6FB40, 0xFAC7FB40, 0xFAC8FB40, 0xFAC9FB40, 0xFACAFB40, 0xFACBFB40, 0xFACCFB40, 0xFACDFB40, 0xFACEFB40, 0xFACFFB40, + 0xFAD0FB40, 0xFAD1FB40, 0xFAD2FB40, 0xFAD3FB40, 0xFAD4FB40, 0xFAD5FB40, 0xFAD6FB40, 0xFAD7FB40, 0xFAD8FB40, 0xFAD9FB40, 0xFADAFB40, 0xFADBFB40, 0xFADCFB40, 0xFADDFB40, 0xFADEFB40, + 0xFADFFB40, 0xFAE0FB40, 0xFAE1FB40, 0xFAE2FB40, 0xFAE3FB40, 0xFAE4FB40, 0xFAE5FB40, 0xFAE6FB40, 0xFAE7FB40, 0xFAE8FB40, 0xFAE9FB40, 0xFAEAFB40, 0xFAEBFB40, 0xFAECFB40, 0xFAEDFB40, + 0xFAEEFB40, 0xFAEFFB40, 0xFAF0FB40, 0xFAF1FB40, 0xFAF2FB40, 0xFAF3FB40, 0xFAF4FB40, 0xFAF5FB40, 0xFAF6FB40, 0xFAF7FB40, 0xFAF8FB40, 0xFAF9FB40, 0xFAFAFB40, 0xFAFBFB40, 0xFAFCFB40, + 0xFAFDFB40, 0xFAFEFB40, 0xFAFFFB40, 0xFB00FB40, 0xFB01FB40, 0xFB02FB40, 0xFB03FB40, 0xFB04FB40, 0xFB05FB40, 0xFB06FB40, 0xFB07FB40, 0xFB08FB40, 0xFB09FB40, 0xFB0AFB40, 0xFB0BFB40, + 0xFB0CFB40, 0xFB0DFB40, 0xFB0EFB40, 0xFB0FFB40, 0xFB10FB40, 0xFB11FB40, 0xFB12FB40, 0xFB13FB40, 0xFB14FB40, 0xFB15FB40, 0xFB16FB40, 0xFB17FB40, 0xFB18FB40, 0xFB19FB40, 0xFB1AFB40, + 0xFB1BFB40, 0xFB1CFB40, 0xFB1DFB40, 0xFB1EFB40, 0xFB1FFB40, 0xFB20FB40, 0xFB21FB40, 0xFB22FB40, 0xFB23FB40, 0xFB24FB40, 0xFB25FB40, 0xFB26FB40, 0xFB27FB40, 0xFB28FB40, 0xFB29FB40, + 0xFB2AFB40, 0xFB2BFB40, 0xFB2CFB40, 0xFB2DFB40, 0xFB2EFB40, 0xFB2FFB40, 0xFB30FB40, 0xFB31FB40, 0xFB32FB40, 0xFB33FB40, 0xFB34FB40, 0xFB35FB40, 0xFB36FB40, 0xFB37FB40, 0xFB38FB40, + 0xFB39FB40, 0xFB3AFB40, 0xFB3BFB40, 0xFB3CFB40, 0xFB3DFB40, 0xFB3EFB40, 0xFB3FFB40, 0xFB40FB40, 0xFB41FB40, 0xFB42FB40, 0xFB43FB40, 0xFB44FB40, 0xFB45FB40, 0xFB46FB40, 0xFB47FB40, + 0xFB48FB40, 0xFB49FB40, 0xFB4AFB40, 0xFB4BFB40, 0xFB4CFB40, 0xFB4DFB40, 0xFB4EFB40, 0xFB4FFB40, 0xFB50FB40, 0xFB51FB40, 0xFB52FB40, 0xFB53FB40, 0xFB54FB40, 0xFB55FB40, 0xFB56FB40, + 0xFB57FB40, 0xFB58FB40, 0xFB59FB40, 0xFB5AFB40, 0xFB5BFB40, 0xFB5CFB40, 0xFB5DFB40, 0xFB5EFB40, 0xFB5FFB40, 0xFB60FB40, 0xFB61FB40, 0xFB62FB40, 0xFB63FB40, 0xFB64FB40, 0xFB65FB40, + 0xFB66FB40, 0xFB67FB40, 0xFB68FB40, 0xFB69FB40, 0xFB6AFB40, 0xFB6BFB40, 0xFB6CFB40, 0xFB6DFB40, 0xFB6EFB40, 0xFB6FFB40, 0xFB70FB40, 0xFB71FB40, 0xFB72FB40, 0xFB73FB40, 0xFB74FB40, + 0xFB75FB40, 0xFB76FB40, 0xFB77FB40, 0xFB78FB40, 0xFB79FB40, 0xFB7AFB40, 0xFB7BFB40, 0xFB7CFB40, 0xFB7DFB40, 0xFB7EFB40, 0xFB7FFB40, 0xFB80FB40, 0xFB81FB40, 0xFB82FB40, 0xFB83FB40, + 0xFB84FB40, 0xFB85FB40, 0xFB86FB40, 0xFB87FB40, 0xFB88FB40, 0xFB89FB40, 0xFB8AFB40, 0xFB8BFB40, 0xFB8CFB40, 0xFB8DFB40, 0xFB8EFB40, 0xFB8FFB40, 0xFB90FB40, 0xFB91FB40, 0xFB92FB40, + 0xFB93FB40, 0xFB94FB40, 0xFB95FB40, 0xFB96FB40, 0xFB97FB40, 0xFB98FB40, 0xFB99FB40, 0xFB9AFB40, 0xFB9BFB40, 0xFB9CFB40, 0xFB9DFB40, 0xFB9EFB40, 0xFB9FFB40, 0xFBA0FB40, 0xFBA1FB40, + 0xFBA2FB40, 0xFBA3FB40, 0xFBA4FB40, 0xFBA5FB40, 0xFBA6FB40, 0xFBA7FB40, 0xFBA8FB40, 0xFBA9FB40, 0xFBAAFB40, 0xFBABFB40, 0xFBACFB40, 0xFBADFB40, 0xFBAEFB40, 0xFBAFFB40, 0xFBB0FB40, + 0xFBB1FB40, 0xFBB2FB40, 0xFBB3FB40, 0xFBB4FB40, 0xFBB5FB40, 0xFBB6FB40, 0xFBB7FB40, 0xFBB8FB40, 0xFBB9FB40, 0xFBBAFB40, 0xFBBBFB40, 0xFBBCFB40, 0xFBBDFB40, 0xFBBEFB40, 0xFBBFFB40, + 0xFBC0FB40, 0xFBC1FB40, 0xFBC2FB40, 0xFBC3FB40, 0xFBC4FB40, 0xFBC5FB40, 0xFBC6FB40, 0xFBC7FB40, 0xFBC8FB40, 0xFBC9FB40, 0xFBCAFB40, 0xFBCBFB40, 0xFBCCFB40, 0xFBCDFB40, 0xFBCEFB40, + 0xFBCFFB40, 0xFBD0FB40, 0xFBD1FB40, 0xFBD2FB40, 0xFBD3FB40, 0xFBD4FB40, 0xFBD5FB40, 0xFBD6FB40, 0xFBD7FB40, 0xFBD8FB40, 0xFBD9FB40, 0xFBDAFB40, 0xFBDBFB40, 0xFBDCFB40, 0xFBDDFB40, + 0xFBDEFB40, 0xFBDFFB40, 0xFBE0FB40, 0xFBE1FB40, 0xFBE2FB40, 0xFBE3FB40, 0xFBE4FB40, 0xFBE5FB40, 0xFBE6FB40, 0xFBE7FB40, 0xFBE8FB40, 0xFBE9FB40, 0xFBEAFB40, 0xFBEBFB40, 0xFBECFB40, + 0xFBEDFB40, 0xFBEEFB40, 0xFBEFFB40, 0xFBF0FB40, 0xFBF1FB40, 0xFBF2FB40, 0xFBF3FB40, 0xFBF4FB40, 0xFBF5FB40, 0xFBF6FB40, 0xFBF7FB40, 0xFBF8FB40, 0xFBF9FB40, 0xFBFAFB40, 0xFBFBFB40, + 0xFBFCFB40, 0xFBFDFB40, 0xFBFEFB40, 0xFBFFFB40, 0xFC00FB40, 0xFC01FB40, 0xFC02FB40, 0xFC03FB40, 0xFC04FB40, 0xFC05FB40, 0xFC06FB40, 0xFC07FB40, 0xFC08FB40, 0xFC09FB40, 0xFC0AFB40, + 0xFC0BFB40, 0xFC0CFB40, 0xFC0DFB40, 0xFC0EFB40, 0xFC0FFB40, 0xFC10FB40, 0xFC11FB40, 0xFC12FB40, 0xFC13FB40, 0xFC14FB40, 0xFC15FB40, 0xFC16FB40, 0xFC17FB40, 0xFC18FB40, 0xFC19FB40, + 0xFC1AFB40, 0xFC1BFB40, 0xFC1CFB40, 0xFC1DFB40, 0xFC1EFB40, 0xFC1FFB40, 0xFC20FB40, 0xFC21FB40, 0xFC22FB40, 0xFC23FB40, 0xFC24FB40, 0xFC25FB40, 0xFC26FB40, 0xFC27FB40, 0xFC28FB40, + 0xFC29FB40, 0xFC2AFB40, 0xFC2BFB40, 0xFC2CFB40, 0xFC2DFB40, 0xFC2EFB40, 0xFC2FFB40, 0xFC30FB40, 0xFC31FB40, 0xFC32FB40, 0xFC33FB40, 0xFC34FB40, 0xFC35FB40, 0xFC36FB40, 0xFC37FB40, + 0xFC38FB40, 0xFC39FB40, 0xFC3AFB40, 0xFC3BFB40, 0xFC3CFB40, 0xFC3DFB40, 0xFC3EFB40, 0xFC3FFB40, 0xFC40FB40, 0xFC41FB40, 0xFC42FB40, 0xFC43FB40, 0xFC44FB40, 0xFC45FB40, 0xFC46FB40, + 0xFC47FB40, 0xFC48FB40, 0xFC49FB40, 0xFC4AFB40, 0xFC4BFB40, 0xFC4CFB40, 0xFC4DFB40, 0xFC4EFB40, 0xFC4FFB40, 0xFC50FB40, 0xFC51FB40, 0xFC52FB40, 0xFC53FB40, 0xFC54FB40, 0xFC55FB40, + 0xFC56FB40, 0xFC57FB40, 0xFC58FB40, 0xFC59FB40, 0xFC5AFB40, 0xFC5BFB40, 0xFC5CFB40, 0xFC5DFB40, 0xFC5EFB40, 0xFC5FFB40, 0xFC60FB40, 0xFC61FB40, 0xFC62FB40, 0xFC63FB40, 0xFC64FB40, + 0xFC65FB40, 0xFC66FB40, 0xFC67FB40, 0xFC68FB40, 0xFC69FB40, 0xFC6AFB40, 0xFC6BFB40, 0xFC6CFB40, 0xFC6DFB40, 0xFC6EFB40, 0xFC6FFB40, 0xFC70FB40, 0xFC71FB40, 0xFC72FB40, 0xFC73FB40, + 0xFC74FB40, 0xFC75FB40, 0xFC76FB40, 0xFC77FB40, 0xFC78FB40, 0xFC79FB40, 0xFC7AFB40, 0xFC7BFB40, 0xFC7CFB40, 0xFC7DFB40, 0xFC7EFB40, 0xFC7FFB40, 0xFC80FB40, 0xFC81FB40, 0xFC82FB40, + 0xFC83FB40, 0xFC84FB40, 0xFC85FB40, 0xFC86FB40, 0xFC87FB40, 0xFC88FB40, 0xFC89FB40, 0xFC8AFB40, 0xFC8BFB40, 0xFC8CFB40, 0xFC8DFB40, 0xFC8EFB40, 0xFC8FFB40, 0xFC90FB40, 0xFC91FB40, + 0xFC92FB40, 0xFC93FB40, 0xFC94FB40, 0xFC95FB40, 0xFC96FB40, 0xFC97FB40, 0xFC98FB40, 0xFC99FB40, 0xFC9AFB40, 0xFC9BFB40, 0xFC9CFB40, 0xFC9DFB40, 0xFC9EFB40, 0xFC9FFB40, 0xFCA0FB40, + 0xFCA1FB40, 0xFCA2FB40, 0xFCA3FB40, 0xFCA4FB40, 0xFCA5FB40, 0xFCA6FB40, 0xFCA7FB40, 0xFCA8FB40, 0xFCA9FB40, 0xFCAAFB40, 0xFCABFB40, 0xFCACFB40, 0xFCADFB40, 0xFCAEFB40, 0xFCAFFB40, + 0xFCB0FB40, 0xFCB1FB40, 0xFCB2FB40, 0xFCB3FB40, 0xFCB4FB40, 0xFCB5FB40, 0xFCB6FB40, 0xFCB7FB40, 0xFCB8FB40, 0xFCB9FB40, 0xFCBAFB40, 0xFCBBFB40, 0xFCBCFB40, 0xFCBDFB40, 0xFCBEFB40, + 0xFCBFFB40, 0xFCC0FB40, 0xFCC1FB40, 0xFCC2FB40, 0xFCC3FB40, 0xFCC4FB40, 0xFCC5FB40, 0xFCC6FB40, 0xFCC7FB40, 0xFCC8FB40, 0xFCC9FB40, 0xFCCAFB40, 0xFCCBFB40, 0xFCCCFB40, 0xFCCDFB40, + 0xFCCEFB40, 0xFCCFFB40, 0xFCD0FB40, 0xFCD1FB40, 0xFCD2FB40, 0xFCD3FB40, 0xFCD4FB40, 0xFCD5FB40, 0xFCD6FB40, 0xFCD7FB40, 0xFCD8FB40, 0xFCD9FB40, 0xFCDAFB40, 0xFCDBFB40, 0xFCDCFB40, + 0xFCDDFB40, 0xFCDEFB40, 0xFCDFFB40, 0xFCE0FB40, 0xFCE1FB40, 0xFCE2FB40, 0xFCE3FB40, 0xFCE4FB40, 0xFCE5FB40, 0xFCE6FB40, 0xFCE7FB40, 0xFCE8FB40, 0xFCE9FB40, 0xFCEAFB40, 0xFCEBFB40, + 0xFCECFB40, 0xFCEDFB40, 0xFCEEFB40, 0xFCEFFB40, 0xFCF0FB40, 0xFCF1FB40, 0xFCF2FB40, 0xFCF3FB40, 0xFCF4FB40, 0xFCF5FB40, 0xFCF6FB40, 0xFCF7FB40, 0xFCF8FB40, 0xFCF9FB40, 0xFCFAFB40, + 0xFCFBFB40, 0xFCFCFB40, 0xFCFDFB40, 0xFCFEFB40, 0xFCFFFB40, 0xFD00FB40, 0xFD01FB40, 0xFD02FB40, 0xFD03FB40, 0xFD04FB40, 0xFD05FB40, 0xFD06FB40, 0xFD07FB40, 0xFD08FB40, 0xFD09FB40, + 0xFD0AFB40, 0xFD0BFB40, 0xFD0CFB40, 0xFD0DFB40, 0xFD0EFB40, 0xFD0FFB40, 0xFD10FB40, 0xFD11FB40, 0xFD12FB40, 0xFD13FB40, 0xFD14FB40, 0xFD15FB40, 0xFD16FB40, 0xFD17FB40, 0xFD18FB40, + 0xFD19FB40, 0xFD1AFB40, 0xFD1BFB40, 0xFD1CFB40, 0xFD1DFB40, 0xFD1EFB40, 0xFD1FFB40, 0xFD20FB40, 0xFD21FB40, 0xFD22FB40, 0xFD23FB40, 0xFD24FB40, 0xFD25FB40, 0xFD26FB40, 0xFD27FB40, + 0xFD28FB40, 0xFD29FB40, 0xFD2AFB40, 0xFD2BFB40, 0xFD2CFB40, 0xFD2DFB40, 0xFD2EFB40, 0xFD2FFB40, 0xFD30FB40, 0xFD31FB40, 0xFD32FB40, 0xFD33FB40, 0xFD34FB40, 0xFD35FB40, 0xFD36FB40, + 0xFD37FB40, 0xFD38FB40, 0xFD39FB40, 0xFD3AFB40, 0xFD3BFB40, 0xFD3CFB40, 0xFD3DFB40, 0xFD3EFB40, 0xFD3FFB40, 0xFD40FB40, 0xFD41FB40, 0xFD42FB40, 0xFD43FB40, 0xFD44FB40, 0xFD45FB40, + 0xFD46FB40, 0xFD47FB40, 0xFD48FB40, 0xFD49FB40, 0xFD4AFB40, 0xFD4BFB40, 0xFD4CFB40, 0xFD4DFB40, 0xFD4EFB40, 0xFD4FFB40, 0xFD50FB40, 0xFD51FB40, 0xFD52FB40, 0xFD53FB40, 0xFD54FB40, + 0xFD55FB40, 0xFD56FB40, 0xFD57FB40, 0xFD58FB40, 0xFD59FB40, 0xFD5AFB40, 0xFD5BFB40, 0xFD5CFB40, 0xFD5DFB40, 0xFD5EFB40, 0xFD5FFB40, 0xFD60FB40, 0xFD61FB40, 0xFD62FB40, 0xFD63FB40, + 0xFD64FB40, 0xFD65FB40, 0xFD66FB40, 0xFD67FB40, 0xFD68FB40, 0xFD69FB40, 0xFD6AFB40, 0xFD6BFB40, 0xFD6CFB40, 0xFD6DFB40, 0xFD6EFB40, 0xFD6FFB40, 0xFD70FB40, 0xFD71FB40, 0xFD72FB40, + 0xFD73FB40, 0xFD74FB40, 0xFD75FB40, 0xFD76FB40, 0xFD77FB40, 0xFD78FB40, 0xFD79FB40, 0xFD7AFB40, 0xFD7BFB40, 0xFD7CFB40, 0xFD7DFB40, 0xFD7EFB40, 0xFD7FFB40, 0xFD80FB40, 0xFD81FB40, + 0xFD82FB40, 0xFD83FB40, 0xFD84FB40, 0xFD85FB40, 0xFD86FB40, 0xFD87FB40, 0xFD88FB40, 0xFD89FB40, 0xFD8AFB40, 0xFD8BFB40, 0xFD8CFB40, 0xFD8DFB40, 0xFD8EFB40, 0xFD8FFB40, 0xFD90FB40, + 0xFD91FB40, 0xFD92FB40, 0xFD93FB40, 0xFD94FB40, 0xFD95FB40, 0xFD96FB40, 0xFD97FB40, 0xFD98FB40, 0xFD99FB40, 0xFD9AFB40, 0xFD9BFB40, 0xFD9CFB40, 0xFD9DFB40, 0xFD9EFB40, 0xFD9FFB40, + 0xFDA0FB40, 0xFDA1FB40, 0xFDA2FB40, 0xFDA3FB40, 0xFDA4FB40, 0xFDA5FB40, 0xFDA6FB40, 0xFDA7FB40, 0xFDA8FB40, 0xFDA9FB40, 0xFDAAFB40, 0xFDABFB40, 0xFDACFB40, 0xFDADFB40, 0xFDAEFB40, + 0xFDAFFB40, 0xFDB0FB40, 0xFDB1FB40, 0xFDB2FB40, 0xFDB3FB40, 0xFDB4FB40, 0xFDB5FB40, 0xFDB6FB40, 0xFDB7FB40, 0xFDB8FB40, 0xFDB9FB40, 0xFDBAFB40, 0xFDBBFB40, 0xFDBCFB40, 0xFDBDFB40, + 0xFDBEFB40, 0xFDBFFB40, 0xFDC0FB40, 0xFDC1FB40, 0xFDC2FB40, 0xFDC3FB40, 0xFDC4FB40, 0xFDC5FB40, 0xFDC6FB40, 0xFDC7FB40, 0xFDC8FB40, 0xFDC9FB40, 0xFDCAFB40, 0xFDCBFB40, 0xFDCCFB40, + 0xFDCDFB40, 0xFDCEFB40, 0xFDCFFB40, 0xFDD0FB40, 0xFDD1FB40, 0xFDD2FB40, 0xFDD3FB40, 0xFDD4FB40, 0xFDD5FB40, 0xFDD6FB40, 0xFDD7FB40, 0xFDD8FB40, 0xFDD9FB40, 0xFDDAFB40, 0xFDDBFB40, + 0xFDDCFB40, 0xFDDDFB40, 0xFDDEFB40, 0xFDDFFB40, 0xFDE0FB40, 0xFDE1FB40, 0xFDE2FB40, 0xFDE3FB40, 0xFDE4FB40, 0xFDE5FB40, 0xFDE6FB40, 0xFDE7FB40, 0xFDE8FB40, 0xFDE9FB40, 0xFDEAFB40, + 0xFDEBFB40, 0xFDECFB40, 0xFDEDFB40, 0xFDEEFB40, 0xFDEFFB40, 0xFDF0FB40, 0xFDF1FB40, 0xFDF2FB40, 0xFDF3FB40, 0xFDF4FB40, 0xFDF5FB40, 0xFDF6FB40, 0xFDF7FB40, 0xFDF8FB40, 0xFDF9FB40, + 0xFDFAFB40, 0xFDFBFB40, 0xFDFCFB40, 0xFDFDFB40, 0xFDFEFB40, 0xFDFFFB40, 0xFE00FB40, 0xFE01FB40, 0xFE02FB40, 0xFE03FB40, 0xFE04FB40, 0xFE05FB40, 0xFE06FB40, 0xFE07FB40, 0xFE08FB40, + 0xFE09FB40, 0xFE0AFB40, 0xFE0BFB40, 0xFE0CFB40, 0xFE0DFB40, 0xFE0EFB40, 0xFE0FFB40, 0xFE10FB40, 0xFE11FB40, 0xFE12FB40, 0xFE13FB40, 0xFE14FB40, 0xFE15FB40, 0xFE16FB40, 0xFE17FB40, + 0xFE18FB40, 0xFE19FB40, 0xFE1AFB40, 0xFE1BFB40, 0xFE1CFB40, 0xFE1DFB40, 0xFE1EFB40, 0xFE1FFB40, 0xFE20FB40, 0xFE21FB40, 0xFE22FB40, 0xFE23FB40, 0xFE24FB40, 0xFE25FB40, 0xFE26FB40, + 0xFE27FB40, 0xFE28FB40, 0xFE29FB40, 0xFE2AFB40, 0xFE2BFB40, 0xFE2CFB40, 0xFE2DFB40, 0xFE2EFB40, 0xFE2FFB40, 0xFE30FB40, 0xFE31FB40, 0xFE32FB40, 0xFE33FB40, 0xFE34FB40, 0xFE35FB40, + 0xFE36FB40, 0xFE37FB40, 0xFE38FB40, 0xFE39FB40, 0xFE3AFB40, 0xFE3BFB40, 0xFE3CFB40, 0xFE3DFB40, 0xFE3EFB40, 0xFE3FFB40, 0xFE40FB40, 0xFE41FB40, 0xFE42FB40, 0xFE43FB40, 0xFE44FB40, + 0xFE45FB40, 0xFE46FB40, 0xFE47FB40, 0xFE48FB40, 0xFE49FB40, 0xFE4AFB40, 0xFE4BFB40, 0xFE4CFB40, 0xFE4DFB40, 0xFE4EFB40, 0xFE4FFB40, 0xFE50FB40, 0xFE51FB40, 0xFE52FB40, 0xFE53FB40, + 0xFE54FB40, 0xFE55FB40, 0xFE56FB40, 0xFE57FB40, 0xFE58FB40, 0xFE59FB40, 0xFE5AFB40, 0xFE5BFB40, 0xFE5CFB40, 0xFE5DFB40, 0xFE5EFB40, 0xFE5FFB40, 0xFE60FB40, 0xFE61FB40, 0xFE62FB40, + 0xFE63FB40, 0xFE64FB40, 0xFE65FB40, 0xFE66FB40, 0xFE67FB40, 0xFE68FB40, 0xFE69FB40, 0xFE6AFB40, 0xFE6BFB40, 0xFE6CFB40, 0xFE6DFB40, 0xFE6EFB40, 0xFE6FFB40, 0xFE70FB40, 0xFE71FB40, + 0xFE72FB40, 0xFE73FB40, 0xFE74FB40, 0xFE75FB40, 0xFE76FB40, 0xFE77FB40, 0xFE78FB40, 0xFE79FB40, 0xFE7AFB40, 0xFE7BFB40, 0xFE7CFB40, 0xFE7DFB40, 0xFE7EFB40, 0xFE7FFB40, 0xFE80FB40, + 0xFE81FB40, 0xFE82FB40, 0xFE83FB40, 0xFE84FB40, 0xFE85FB40, 0xFE86FB40, 0xFE87FB40, 0xFE88FB40, 0xFE89FB40, 0xFE8AFB40, 0xFE8BFB40, 0xFE8CFB40, 0xFE8DFB40, 0xFE8EFB40, 0xFE8FFB40, + 0xFE90FB40, 0xFE91FB40, 0xFE92FB40, 0xFE93FB40, 0xFE94FB40, 0xFE95FB40, 0xFE96FB40, 0xFE97FB40, 0xFE98FB40, 0xFE99FB40, 0xFE9AFB40, 0xFE9BFB40, 0xFE9CFB40, 0xFE9DFB40, 0xFE9EFB40, + 0xFE9FFB40, 0xFEA0FB40, 0xFEA1FB40, 0xFEA2FB40, 0xFEA3FB40, 0xFEA4FB40, 0xFEA5FB40, 0xFEA6FB40, 0xFEA7FB40, 0xFEA8FB40, 0xFEA9FB40, 0xFEAAFB40, 0xFEABFB40, 0xFEACFB40, 0xFEADFB40, + 0xFEAEFB40, 0xFEAFFB40, 0xFEB0FB40, 0xFEB1FB40, 0xFEB2FB40, 0xFEB3FB40, 0xFEB4FB40, 0xFEB5FB40, 0xFEB6FB40, 0xFEB7FB40, 0xFEB8FB40, 0xFEB9FB40, 0xFEBAFB40, 0xFEBBFB40, 0xFEBCFB40, + 0xFEBDFB40, 0xFEBEFB40, 0xFEBFFB40, 0xFEC0FB40, 0xFEC1FB40, 0xFEC2FB40, 0xFEC3FB40, 0xFEC4FB40, 0xFEC5FB40, 0xFEC6FB40, 0xFEC7FB40, 0xFEC8FB40, 0xFEC9FB40, 0xFECAFB40, 0xFECBFB40, + 0xFECCFB40, 0xFECDFB40, 0xFECEFB40, 0xFECFFB40, 0xFED0FB40, 0xFED1FB40, 0xFED2FB40, 0xFED3FB40, 0xFED4FB40, 0xFED5FB40, 0xFED6FB40, 0xFED7FB40, 0xFED8FB40, 0xFED9FB40, 0xFEDAFB40, + 0xFEDBFB40, 0xFEDCFB40, 0xFEDDFB40, 0xFEDEFB40, 0xFEDFFB40, 0xFEE0FB40, 0xFEE1FB40, 0xFEE2FB40, 0xFEE3FB40, 0xFEE4FB40, 0xFEE5FB40, 0xFEE6FB40, 0xFEE7FB40, 0xFEE8FB40, 0xFEE9FB40, + 0xFEEAFB40, 0xFEEBFB40, 0xFEECFB40, 0xFEEDFB40, 0xFEEEFB40, 0xFEEFFB40, 0xFEF0FB40, 0xFEF1FB40, 0xFEF2FB40, 0xFEF3FB40, 0xFEF4FB40, 0xFEF5FB40, 0xFEF6FB40, 0xFEF7FB40, 0xFEF8FB40, + 0xFEF9FB40, 0xFEFAFB40, 0xFEFBFB40, 0xFEFCFB40, 0xFEFDFB40, 0xFEFEFB40, 0xFEFFFB40, 0xFF00FB40, 0xFF01FB40, 0xFF02FB40, 0xFF03FB40, 0xFF04FB40, 0xFF05FB40, 0xFF06FB40, 0xFF07FB40, + 0xFF08FB40, 0xFF09FB40, 0xFF0AFB40, 0xFF0BFB40, 0xFF0CFB40, 0xFF0DFB40, 0xFF0EFB40, 0xFF0FFB40, 0xFF10FB40, 0xFF11FB40, 0xFF12FB40, 0xFF13FB40, 0xFF14FB40, 0xFF15FB40, 0xFF16FB40, + 0xFF17FB40, 0xFF18FB40, 0xFF19FB40, 0xFF1AFB40, 0xFF1BFB40, 0xFF1CFB40, 0xFF1DFB40, 0xFF1EFB40, 0xFF1FFB40, 0xFF20FB40, 0xFF21FB40, 0xFF22FB40, 0xFF23FB40, 0xFF24FB40, 0xFF25FB40, + 0xFF26FB40, 0xFF27FB40, 0xFF28FB40, 0xFF29FB40, 0xFF2AFB40, 0xFF2BFB40, 0xFF2CFB40, 0xFF2DFB40, 0xFF2EFB40, 0xFF2FFB40, 0xFF30FB40, 0xFF31FB40, 0xFF32FB40, 0xFF33FB40, 0xFF34FB40, + 0xFF35FB40, 0xFF36FB40, 0xFF37FB40, 0xFF38FB40, 0xFF39FB40, 0xFF3AFB40, 0xFF3BFB40, 0xFF3CFB40, 0xFF3DFB40, 0xFF3EFB40, 0xFF3FFB40, 0xFF40FB40, 0xFF41FB40, 0xFF42FB40, 0xFF43FB40, + 0xFF44FB40, 0xFF45FB40, 0xFF46FB40, 0xFF47FB40, 0xFF48FB40, 0xFF49FB40, 0xFF4AFB40, 0xFF4BFB40, 0xFF4CFB40, 0xFF4DFB40, 0xFF4EFB40, 0xFF4FFB40, 0xFF50FB40, 0xFF51FB40, 0xFF52FB40, + 0xFF53FB40, 0xFF54FB40, 0xFF55FB40, 0xFF56FB40, 0xFF57FB40, 0xFF58FB40, 0xFF59FB40, 0xFF5AFB40, 0xFF5BFB40, 0xFF5CFB40, 0xFF5DFB40, 0xFF5EFB40, 0xFF5FFB40, 0xFF60FB40, 0xFF61FB40, + 0xFF62FB40, 0xFF63FB40, 0xFF64FB40, 0xFF65FB40, 0xFF66FB40, 0xFF67FB40, 0xFF68FB40, 0xFF69FB40, 0xFF6AFB40, 0xFF6BFB40, 0xFF6CFB40, 0xFF6DFB40, 0xFF6EFB40, 0xFF6FFB40, 0xFF70FB40, + 0xFF71FB40, 0xFF72FB40, 0xFF73FB40, 0xFF74FB40, 0xFF75FB40, 0xFF76FB40, 0xFF77FB40, 0xFF78FB40, 0xFF79FB40, 0xFF7AFB40, 0xFF7BFB40, 0xFF7CFB40, 0xFF7DFB40, 0xFF7EFB40, 0xFF7FFB40, + 0xFF80FB40, 0xFF81FB40, 0xFF82FB40, 0xFF83FB40, 0xFF84FB40, 0xFF85FB40, 0xFF86FB40, 0xFF87FB40, 0xFF88FB40, 0xFF89FB40, 0xFF8AFB40, 0xFF8BFB40, 0xFF8CFB40, 0xFF8DFB40, 0xFF8EFB40, + 0xFF8FFB40, 0xFF90FB40, 0xFF91FB40, 0xFF92FB40, 0xFF93FB40, 0xFF94FB40, 0xFF95FB40, 0xFF96FB40, 0xFF97FB40, 0xFF98FB40, 0xFF99FB40, 0xFF9AFB40, 0xFF9BFB40, 0xFF9CFB40, 0xFF9DFB40, + 0xFF9EFB40, 0xFF9FFB40, 0xFFA0FB40, 0xFFA1FB40, 0xFFA2FB40, 0xFFA3FB40, 0xFFA4FB40, 0xFFA5FB40, 0xFFA6FB40, 0xFFA7FB40, 0xFFA8FB40, 0xFFA9FB40, 0xFFAAFB40, 0xFFABFB40, 0xFFACFB40, + 0xFFADFB40, 0xFFAEFB40, 0xFFAFFB40, 0xFFB0FB40, 0xFFB1FB40, 0xFFB2FB40, 0xFFB3FB40, 0xFFB4FB40, 0xFFB5FB40, 0xFFB6FB40, 0xFFB7FB40, 0xFFB8FB40, 0xFFB9FB40, 0xFFBAFB40, 0xFFBBFB40, + 0xFFBCFB40, 0xFFBDFB40, 0xFFBEFB40, 0xFFBFFB40, 0xFFC0FB40, 0xFFC1FB40, 0xFFC2FB40, 0xFFC3FB40, 0xFFC4FB40, 0xFFC5FB40, 0xFFC6FB40, 0xFFC7FB40, 0xFFC8FB40, 0xFFC9FB40, 0xFFCAFB40, + 0xFFCBFB40, 0xFFCCFB40, 0xFFCDFB40, 0xFFCEFB40, 0xFFCFFB40, 0xFFD0FB40, 0xFFD1FB40, 0xFFD2FB40, 0xFFD3FB40, 0xFFD4FB40, 0xFFD5FB40, 0xFFD6FB40, 0xFFD7FB40, 0xFFD8FB40, 0xFFD9FB40, + 0xFFDAFB40, 0xFFDBFB40, 0xFFDCFB40, 0xFFDDFB40, 0xFFDEFB40, 0xFFDFFB40, 0xFFE0FB40, 0xFFE1FB40, 0xFFE2FB40, 0xFFE3FB40, 0xFFE4FB40, 0xFFE5FB40, 0xFFE6FB40, 0xFFE7FB40, 0xFFE8FB40, + 0xFFE9FB40, 0xFFEAFB40, 0xFFEBFB40, 0xFFECFB40, 0xFFEDFB40, 0xFFEEFB40, 0xFFEFFB40, 0xFFF0FB40, 0xFFF1FB40, 0xFFF2FB40, 0xFFF3FB40, 0xFFF4FB40, 0xFFF5FB40, 0xFFF6FB40, 0xFFF7FB40, + 0xFFF8FB40, 0xFFF9FB40, 0xFFFAFB40, 0xFFFBFB40, 0xFFFCFB40, 0xFFFDFB40, 0xFFFEFB40, 0xFFFFFB40, 0x8000FB41, 0x8001FB41, 0x8002FB41, 0x8003FB41, 0x8004FB41, 0x8005FB41, 0x8006FB41, + 0x8007FB41, 0x8008FB41, 0x8009FB41, 0x800AFB41, 0x800BFB41, 0x800CFB41, 0x800DFB41, 0x800EFB41, 0x800FFB41, 0x8010FB41, 0x8011FB41, 0x8012FB41, 0x8013FB41, 0x8014FB41, 0x8015FB41, + 0x8016FB41, 0x8017FB41, 0x8018FB41, 0x8019FB41, 0x801AFB41, 0x801BFB41, 0x801CFB41, 0x801DFB41, 0x801EFB41, 0x801FFB41, 0x8020FB41, 0x8021FB41, 0x8022FB41, 0x8023FB41, 0x8024FB41, + 0x8025FB41, 0x8026FB41, 0x8027FB41, 0x8028FB41, 0x8029FB41, 0x802AFB41, 0x802BFB41, 0x802CFB41, 0x802DFB41, 0x802EFB41, 0x802FFB41, 0x8030FB41, 0x8031FB41, 0x8032FB41, 0x8033FB41, + 0x8034FB41, 0x8035FB41, 0x8036FB41, 0x8037FB41, 0x8038FB41, 0x8039FB41, 0x803AFB41, 0x803BFB41, 0x803CFB41, 0x803DFB41, 0x803EFB41, 0x803FFB41, 0x8040FB41, 0x8041FB41, 0x8042FB41, + 0x8043FB41, 0x8044FB41, 0x8045FB41, 0x8046FB41, 0x8047FB41, 0x8048FB41, 0x8049FB41, 0x804AFB41, 0x804BFB41, 0x804CFB41, 0x804DFB41, 0x804EFB41, 0x804FFB41, 0x8050FB41, 0x8051FB41, + 0x8052FB41, 0x8053FB41, 0x8054FB41, 0x8055FB41, 0x8056FB41, 0x8057FB41, 0x8058FB41, 0x8059FB41, 0x805AFB41, 0x805BFB41, 0x805CFB41, 0x805DFB41, 0x805EFB41, 0x805FFB41, 0x8060FB41, + 0x8061FB41, 0x8062FB41, 0x8063FB41, 0x8064FB41, 0x8065FB41, 0x8066FB41, 0x8067FB41, 0x8068FB41, 0x8069FB41, 0x806AFB41, 0x806BFB41, 0x806CFB41, 0x806DFB41, 0x806EFB41, 0x806FFB41, + 0x8070FB41, 0x8071FB41, 0x8072FB41, 0x8073FB41, 0x8074FB41, 0x8075FB41, 0x8076FB41, 0x8077FB41, 0x8078FB41, 0x8079FB41, 0x807AFB41, 0x807BFB41, 0x807CFB41, 0x807DFB41, 0x807EFB41, + 0x807FFB41, 0x8080FB41, 0x8081FB41, 0x8082FB41, 0x8083FB41, 0x8084FB41, 0x8085FB41, 0x8086FB41, 0x8087FB41, 0x8088FB41, 0x8089FB41, 0x808AFB41, 0x808BFB41, 0x808CFB41, 0x808DFB41, + 0x808EFB41, 0x808FFB41, 0x8090FB41, 0x8091FB41, 0x8092FB41, 0x8093FB41, 0x8094FB41, 0x8095FB41, 0x8096FB41, 0x8097FB41, 0x8098FB41, 0x8099FB41, 0x809AFB41, 0x809BFB41, 0x809CFB41, + 0x809DFB41, 0x809EFB41, 0x809FFB41, 0x80A0FB41, 0x80A1FB41, 0x80A2FB41, 0x80A3FB41, 0x80A4FB41, 0x80A5FB41, 0x80A6FB41, 0x80A7FB41, 0x80A8FB41, 0x80A9FB41, 0x80AAFB41, 0x80ABFB41, + 0x80ACFB41, 0x80ADFB41, 0x80AEFB41, 0x80AFFB41, 0x80B0FB41, 0x80B1FB41, 0x80B2FB41, 0x80B3FB41, 0x80B4FB41, 0x80B5FB41, 0x80B6FB41, 0x80B7FB41, 0x80B8FB41, 0x80B9FB41, 0x80BAFB41, + 0x80BBFB41, 0x80BCFB41, 0x80BDFB41, 0x80BEFB41, 0x80BFFB41, 0x80C0FB41, 0x80C1FB41, 0x80C2FB41, 0x80C3FB41, 0x80C4FB41, 0x80C5FB41, 0x80C6FB41, 0x80C7FB41, 0x80C8FB41, 0x80C9FB41, + 0x80CAFB41, 0x80CBFB41, 0x80CCFB41, 0x80CDFB41, 0x80CEFB41, 0x80CFFB41, 0x80D0FB41, 0x80D1FB41, 0x80D2FB41, 0x80D3FB41, 0x80D4FB41, 0x80D5FB41, 0x80D6FB41, 0x80D7FB41, 0x80D8FB41, + 0x80D9FB41, 0x80DAFB41, 0x80DBFB41, 0x80DCFB41, 0x80DDFB41, 0x80DEFB41, 0x80DFFB41, 0x80E0FB41, 0x80E1FB41, 0x80E2FB41, 0x80E3FB41, 0x80E4FB41, 0x80E5FB41, 0x80E6FB41, 0x80E7FB41, + 0x80E8FB41, 0x80E9FB41, 0x80EAFB41, 0x80EBFB41, 0x80ECFB41, 0x80EDFB41, 0x80EEFB41, 0x80EFFB41, 0x80F0FB41, 0x80F1FB41, 0x80F2FB41, 0x80F3FB41, 0x80F4FB41, 0x80F5FB41, 0x80F6FB41, + 0x80F7FB41, 0x80F8FB41, 0x80F9FB41, 0x80FAFB41, 0x80FBFB41, 0x80FCFB41, 0x80FDFB41, 0x80FEFB41, 0x80FFFB41, 0x8100FB41, 0x8101FB41, 0x8102FB41, 0x8103FB41, 0x8104FB41, 0x8105FB41, + 0x8106FB41, 0x8107FB41, 0x8108FB41, 0x8109FB41, 0x810AFB41, 0x810BFB41, 0x810CFB41, 0x810DFB41, 0x810EFB41, 0x810FFB41, 0x8110FB41, 0x8111FB41, 0x8112FB41, 0x8113FB41, 0x8114FB41, + 0x8115FB41, 0x8116FB41, 0x8117FB41, 0x8118FB41, 0x8119FB41, 0x811AFB41, 0x811BFB41, 0x811CFB41, 0x811DFB41, 0x811EFB41, 0x811FFB41, 0x8120FB41, 0x8121FB41, 0x8122FB41, 0x8123FB41, + 0x8124FB41, 0x8125FB41, 0x8126FB41, 0x8127FB41, 0x8128FB41, 0x8129FB41, 0x812AFB41, 0x812BFB41, 0x812CFB41, 0x812DFB41, 0x812EFB41, 0x812FFB41, 0x8130FB41, 0x8131FB41, 0x8132FB41, + 0x8133FB41, 0x8134FB41, 0x8135FB41, 0x8136FB41, 0x8137FB41, 0x8138FB41, 0x8139FB41, 0x813AFB41, 0x813BFB41, 0x813CFB41, 0x813DFB41, 0x813EFB41, 0x813FFB41, 0x8140FB41, 0x8141FB41, + 0x8142FB41, 0x8143FB41, 0x8144FB41, 0x8145FB41, 0x8146FB41, 0x8147FB41, 0x8148FB41, 0x8149FB41, 0x814AFB41, 0x814BFB41, 0x814CFB41, 0x814DFB41, 0x814EFB41, 0x814FFB41, 0x8150FB41, + 0x8151FB41, 0x8152FB41, 0x8153FB41, 0x8154FB41, 0x8155FB41, 0x8156FB41, 0x8157FB41, 0x8158FB41, 0x8159FB41, 0x815AFB41, 0x815BFB41, 0x815CFB41, 0x815DFB41, 0x815EFB41, 0x815FFB41, + 0x8160FB41, 0x8161FB41, 0x8162FB41, 0x8163FB41, 0x8164FB41, 0x8165FB41, 0x8166FB41, 0x8167FB41, 0x8168FB41, 0x8169FB41, 0x816AFB41, 0x816BFB41, 0x816CFB41, 0x816DFB41, 0x816EFB41, + 0x816FFB41, 0x8170FB41, 0x8171FB41, 0x8172FB41, 0x8173FB41, 0x8174FB41, 0x8175FB41, 0x8176FB41, 0x8177FB41, 0x8178FB41, 0x8179FB41, 0x817AFB41, 0x817BFB41, 0x817CFB41, 0x817DFB41, + 0x817EFB41, 0x817FFB41, 0x8180FB41, 0x8181FB41, 0x8182FB41, 0x8183FB41, 0x8184FB41, 0x8185FB41, 0x8186FB41, 0x8187FB41, 0x8188FB41, 0x8189FB41, 0x818AFB41, 0x818BFB41, 0x818CFB41, + 0x818DFB41, 0x818EFB41, 0x818FFB41, 0x8190FB41, 0x8191FB41, 0x8192FB41, 0x8193FB41, 0x8194FB41, 0x8195FB41, 0x8196FB41, 0x8197FB41, 0x8198FB41, 0x8199FB41, 0x819AFB41, 0x819BFB41, + 0x819CFB41, 0x819DFB41, 0x819EFB41, 0x819FFB41, 0x81A0FB41, 0x81A1FB41, 0x81A2FB41, 0x81A3FB41, 0x81A4FB41, 0x81A5FB41, 0x81A6FB41, 0x81A7FB41, 0x81A8FB41, 0x81A9FB41, 0x81AAFB41, + 0x81ABFB41, 0x81ACFB41, 0x81ADFB41, 0x81AEFB41, 0x81AFFB41, 0x81B0FB41, 0x81B1FB41, 0x81B2FB41, 0x81B3FB41, 0x81B4FB41, 0x81B5FB41, 0x81B6FB41, 0x81B7FB41, 0x81B8FB41, 0x81B9FB41, + 0x81BAFB41, 0x81BBFB41, 0x81BCFB41, 0x81BDFB41, 0x81BEFB41, 0x81BFFB41, 0x81C0FB41, 0x81C1FB41, 0x81C2FB41, 0x81C3FB41, 0x81C4FB41, 0x81C5FB41, 0x81C6FB41, 0x81C7FB41, 0x81C8FB41, + 0x81C9FB41, 0x81CAFB41, 0x81CBFB41, 0x81CCFB41, 0x81CDFB41, 0x81CEFB41, 0x81CFFB41, 0x81D0FB41, 0x81D1FB41, 0x81D2FB41, 0x81D3FB41, 0x81D4FB41, 0x81D5FB41, 0x81D6FB41, 0x81D7FB41, + 0x81D8FB41, 0x81D9FB41, 0x81DAFB41, 0x81DBFB41, 0x81DCFB41, 0x81DDFB41, 0x81DEFB41, 0x81DFFB41, 0x81E0FB41, 0x81E1FB41, 0x81E2FB41, 0x81E3FB41, 0x81E4FB41, 0x81E5FB41, 0x81E6FB41, + 0x81E7FB41, 0x81E8FB41, 0x81E9FB41, 0x81EAFB41, 0x81EBFB41, 0x81ECFB41, 0x81EDFB41, 0x81EEFB41, 0x81EFFB41, 0x81F0FB41, 0x81F1FB41, 0x81F2FB41, 0x81F3FB41, 0x81F4FB41, 0x81F5FB41, + 0x81F6FB41, 0x81F7FB41, 0x81F8FB41, 0x81F9FB41, 0x81FAFB41, 0x81FBFB41, 0x81FCFB41, 0x81FDFB41, 0x81FEFB41, 0x81FFFB41, 0x8200FB41, 0x8201FB41, 0x8202FB41, 0x8203FB41, 0x8204FB41, + 0x8205FB41, 0x8206FB41, 0x8207FB41, 0x8208FB41, 0x8209FB41, 0x820AFB41, 0x820BFB41, 0x820CFB41, 0x820DFB41, 0x820EFB41, 0x820FFB41, 0x8210FB41, 0x8211FB41, 0x8212FB41, 0x8213FB41, + 0x8214FB41, 0x8215FB41, 0x8216FB41, 0x8217FB41, 0x8218FB41, 0x8219FB41, 0x821AFB41, 0x821BFB41, 0x821CFB41, 0x821DFB41, 0x821EFB41, 0x821FFB41, 0x8220FB41, 0x8221FB41, 0x8222FB41, + 0x8223FB41, 0x8224FB41, 0x8225FB41, 0x8226FB41, 0x8227FB41, 0x8228FB41, 0x8229FB41, 0x822AFB41, 0x822BFB41, 0x822CFB41, 0x822DFB41, 0x822EFB41, 0x822FFB41, 0x8230FB41, 0x8231FB41, + 0x8232FB41, 0x8233FB41, 0x8234FB41, 0x8235FB41, 0x8236FB41, 0x8237FB41, 0x8238FB41, 0x8239FB41, 0x823AFB41, 0x823BFB41, 0x823CFB41, 0x823DFB41, 0x823EFB41, 0x823FFB41, 0x8240FB41, + 0x8241FB41, 0x8242FB41, 0x8243FB41, 0x8244FB41, 0x8245FB41, 0x8246FB41, 0x8247FB41, 0x8248FB41, 0x8249FB41, 0x824AFB41, 0x824BFB41, 0x824CFB41, 0x824DFB41, 0x824EFB41, 0x824FFB41, + 0x8250FB41, 0x8251FB41, 0x8252FB41, 0x8253FB41, 0x8254FB41, 0x8255FB41, 0x8256FB41, 0x8257FB41, 0x8258FB41, 0x8259FB41, 0x825AFB41, 0x825BFB41, 0x825CFB41, 0x825DFB41, 0x825EFB41, + 0x825FFB41, 0x8260FB41, 0x8261FB41, 0x8262FB41, 0x8263FB41, 0x8264FB41, 0x8265FB41, 0x8266FB41, 0x8267FB41, 0x8268FB41, 0x8269FB41, 0x826AFB41, 0x826BFB41, 0x826CFB41, 0x826DFB41, + 0x826EFB41, 0x826FFB41, 0x8270FB41, 0x8271FB41, 0x8272FB41, 0x8273FB41, 0x8274FB41, 0x8275FB41, 0x8276FB41, 0x8277FB41, 0x8278FB41, 0x8279FB41, 0x827AFB41, 0x827BFB41, 0x827CFB41, + 0x827DFB41, 0x827EFB41, 0x827FFB41, 0x8280FB41, 0x8281FB41, 0x8282FB41, 0x8283FB41, 0x8284FB41, 0x8285FB41, 0x8286FB41, 0x8287FB41, 0x8288FB41, 0x8289FB41, 0x828AFB41, 0x828BFB41, + 0x828CFB41, 0x828DFB41, 0x828EFB41, 0x828FFB41, 0x8290FB41, 0x8291FB41, 0x8292FB41, 0x8293FB41, 0x8294FB41, 0x8295FB41, 0x8296FB41, 0x8297FB41, 0x8298FB41, 0x8299FB41, 0x829AFB41, + 0x829BFB41, 0x829CFB41, 0x829DFB41, 0x829EFB41, 0x829FFB41, 0x82A0FB41, 0x82A1FB41, 0x82A2FB41, 0x82A3FB41, 0x82A4FB41, 0x82A5FB41, 0x82A6FB41, 0x82A7FB41, 0x82A8FB41, 0x82A9FB41, + 0x82AAFB41, 0x82ABFB41, 0x82ACFB41, 0x82ADFB41, 0x82AEFB41, 0x82AFFB41, 0x82B0FB41, 0x82B1FB41, 0x82B2FB41, 0x82B3FB41, 0x82B4FB41, 0x82B5FB41, 0x82B6FB41, 0x82B7FB41, 0x82B8FB41, + 0x82B9FB41, 0x82BAFB41, 0x82BBFB41, 0x82BCFB41, 0x82BDFB41, 0x82BEFB41, 0x82BFFB41, 0x82C0FB41, 0x82C1FB41, 0x82C2FB41, 0x82C3FB41, 0x82C4FB41, 0x82C5FB41, 0x82C6FB41, 0x82C7FB41, + 0x82C8FB41, 0x82C9FB41, 0x82CAFB41, 0x82CBFB41, 0x82CCFB41, 0x82CDFB41, 0x82CEFB41, 0x82CFFB41, 0x82D0FB41, 0x82D1FB41, 0x82D2FB41, 0x82D3FB41, 0x82D4FB41, 0x82D5FB41, 0x82D6FB41, + 0x82D7FB41, 0x82D8FB41, 0x82D9FB41, 0x82DAFB41, 0x82DBFB41, 0x82DCFB41, 0x82DDFB41, 0x82DEFB41, 0x82DFFB41, 0x82E0FB41, 0x82E1FB41, 0x82E2FB41, 0x82E3FB41, 0x82E4FB41, 0x82E5FB41, + 0x82E6FB41, 0x82E7FB41, 0x82E8FB41, 0x82E9FB41, 0x82EAFB41, 0x82EBFB41, 0x82ECFB41, 0x82EDFB41, 0x82EEFB41, 0x82EFFB41, 0x82F0FB41, 0x82F1FB41, 0x82F2FB41, 0x82F3FB41, 0x82F4FB41, + 0x82F5FB41, 0x82F6FB41, 0x82F7FB41, 0x82F8FB41, 0x82F9FB41, 0x82FAFB41, 0x82FBFB41, 0x82FCFB41, 0x82FDFB41, 0x82FEFB41, 0x82FFFB41, 0x8300FB41, 0x8301FB41, 0x8302FB41, 0x8303FB41, + 0x8304FB41, 0x8305FB41, 0x8306FB41, 0x8307FB41, 0x8308FB41, 0x8309FB41, 0x830AFB41, 0x830BFB41, 0x830CFB41, 0x830DFB41, 0x830EFB41, 0x830FFB41, 0x8310FB41, 0x8311FB41, 0x8312FB41, + 0x8313FB41, 0x8314FB41, 0x8315FB41, 0x8316FB41, 0x8317FB41, 0x8318FB41, 0x8319FB41, 0x831AFB41, 0x831BFB41, 0x831CFB41, 0x831DFB41, 0x831EFB41, 0x831FFB41, 0x8320FB41, 0x8321FB41, + 0x8322FB41, 0x8323FB41, 0x8324FB41, 0x8325FB41, 0x8326FB41, 0x8327FB41, 0x8328FB41, 0x8329FB41, 0x832AFB41, 0x832BFB41, 0x832CFB41, 0x832DFB41, 0x832EFB41, 0x832FFB41, 0x8330FB41, + 0x8331FB41, 0x8332FB41, 0x8333FB41, 0x8334FB41, 0x8335FB41, 0x8336FB41, 0x8337FB41, 0x8338FB41, 0x8339FB41, 0x833AFB41, 0x833BFB41, 0x833CFB41, 0x833DFB41, 0x833EFB41, 0x833FFB41, + 0x8340FB41, 0x8341FB41, 0x8342FB41, 0x8343FB41, 0x8344FB41, 0x8345FB41, 0x8346FB41, 0x8347FB41, 0x8348FB41, 0x8349FB41, 0x834AFB41, 0x834BFB41, 0x834CFB41, 0x834DFB41, 0x834EFB41, + 0x834FFB41, 0x8350FB41, 0x8351FB41, 0x8352FB41, 0x8353FB41, 0x8354FB41, 0x8355FB41, 0x8356FB41, 0x8357FB41, 0x8358FB41, 0x8359FB41, 0x835AFB41, 0x835BFB41, 0x835CFB41, 0x835DFB41, + 0x835EFB41, 0x835FFB41, 0x8360FB41, 0x8361FB41, 0x8362FB41, 0x8363FB41, 0x8364FB41, 0x8365FB41, 0x8366FB41, 0x8367FB41, 0x8368FB41, 0x8369FB41, 0x836AFB41, 0x836BFB41, 0x836CFB41, + 0x836DFB41, 0x836EFB41, 0x836FFB41, 0x8370FB41, 0x8371FB41, 0x8372FB41, 0x8373FB41, 0x8374FB41, 0x8375FB41, 0x8376FB41, 0x8377FB41, 0x8378FB41, 0x8379FB41, 0x837AFB41, 0x837BFB41, + 0x837CFB41, 0x837DFB41, 0x837EFB41, 0x837FFB41, 0x8380FB41, 0x8381FB41, 0x8382FB41, 0x8383FB41, 0x8384FB41, 0x8385FB41, 0x8386FB41, 0x8387FB41, 0x8388FB41, 0x8389FB41, 0x838AFB41, + 0x838BFB41, 0x838CFB41, 0x838DFB41, 0x838EFB41, 0x838FFB41, 0x8390FB41, 0x8391FB41, 0x8392FB41, 0x8393FB41, 0x8394FB41, 0x8395FB41, 0x8396FB41, 0x8397FB41, 0x8398FB41, 0x8399FB41, + 0x839AFB41, 0x839BFB41, 0x839CFB41, 0x839DFB41, 0x839EFB41, 0x839FFB41, 0x83A0FB41, 0x83A1FB41, 0x83A2FB41, 0x83A3FB41, 0x83A4FB41, 0x83A5FB41, 0x83A6FB41, 0x83A7FB41, 0x83A8FB41, + 0x83A9FB41, 0x83AAFB41, 0x83ABFB41, 0x83ACFB41, 0x83ADFB41, 0x83AEFB41, 0x83AFFB41, 0x83B0FB41, 0x83B1FB41, 0x83B2FB41, 0x83B3FB41, 0x83B4FB41, 0x83B5FB41, 0x83B6FB41, 0x83B7FB41, + 0x83B8FB41, 0x83B9FB41, 0x83BAFB41, 0x83BBFB41, 0x83BCFB41, 0x83BDFB41, 0x83BEFB41, 0x83BFFB41, 0x83C0FB41, 0x83C1FB41, 0x83C2FB41, 0x83C3FB41, 0x83C4FB41, 0x83C5FB41, 0x83C6FB41, + 0x83C7FB41, 0x83C8FB41, 0x83C9FB41, 0x83CAFB41, 0x83CBFB41, 0x83CCFB41, 0x83CDFB41, 0x83CEFB41, 0x83CFFB41, 0x83D0FB41, 0x83D1FB41, 0x83D2FB41, 0x83D3FB41, 0x83D4FB41, 0x83D5FB41, + 0x83D6FB41, 0x83D7FB41, 0x83D8FB41, 0x83D9FB41, 0x83DAFB41, 0x83DBFB41, 0x83DCFB41, 0x83DDFB41, 0x83DEFB41, 0x83DFFB41, 0x83E0FB41, 0x83E1FB41, 0x83E2FB41, 0x83E3FB41, 0x83E4FB41, + 0x83E5FB41, 0x83E6FB41, 0x83E7FB41, 0x83E8FB41, 0x83E9FB41, 0x83EAFB41, 0x83EBFB41, 0x83ECFB41, 0x83EDFB41, 0x83EEFB41, 0x83EFFB41, 0x83F0FB41, 0x83F1FB41, 0x83F2FB41, 0x83F3FB41, + 0x83F4FB41, 0x83F5FB41, 0x83F6FB41, 0x83F7FB41, 0x83F8FB41, 0x83F9FB41, 0x83FAFB41, 0x83FBFB41, 0x83FCFB41, 0x83FDFB41, 0x83FEFB41, 0x83FFFB41, 0x8400FB41, 0x8401FB41, 0x8402FB41, + 0x8403FB41, 0x8404FB41, 0x8405FB41, 0x8406FB41, 0x8407FB41, 0x8408FB41, 0x8409FB41, 0x840AFB41, 0x840BFB41, 0x840CFB41, 0x840DFB41, 0x840EFB41, 0x840FFB41, 0x8410FB41, 0x8411FB41, + 0x8412FB41, 0x8413FB41, 0x8414FB41, 0x8415FB41, 0x8416FB41, 0x8417FB41, 0x8418FB41, 0x8419FB41, 0x841AFB41, 0x841BFB41, 0x841CFB41, 0x841DFB41, 0x841EFB41, 0x841FFB41, 0x8420FB41, + 0x8421FB41, 0x8422FB41, 0x8423FB41, 0x8424FB41, 0x8425FB41, 0x8426FB41, 0x8427FB41, 0x8428FB41, 0x8429FB41, 0x842AFB41, 0x842BFB41, 0x842CFB41, 0x842DFB41, 0x842EFB41, 0x842FFB41, + 0x8430FB41, 0x8431FB41, 0x8432FB41, 0x8433FB41, 0x8434FB41, 0x8435FB41, 0x8436FB41, 0x8437FB41, 0x8438FB41, 0x8439FB41, 0x843AFB41, 0x843BFB41, 0x843CFB41, 0x843DFB41, 0x843EFB41, + 0x843FFB41, 0x8440FB41, 0x8441FB41, 0x8442FB41, 0x8443FB41, 0x8444FB41, 0x8445FB41, 0x8446FB41, 0x8447FB41, 0x8448FB41, 0x8449FB41, 0x844AFB41, 0x844BFB41, 0x844CFB41, 0x844DFB41, + 0x844EFB41, 0x844FFB41, 0x8450FB41, 0x8451FB41, 0x8452FB41, 0x8453FB41, 0x8454FB41, 0x8455FB41, 0x8456FB41, 0x8457FB41, 0x8458FB41, 0x8459FB41, 0x845AFB41, 0x845BFB41, 0x845CFB41, + 0x845DFB41, 0x845EFB41, 0x845FFB41, 0x8460FB41, 0x8461FB41, 0x8462FB41, 0x8463FB41, 0x8464FB41, 0x8465FB41, 0x8466FB41, 0x8467FB41, 0x8468FB41, 0x8469FB41, 0x846AFB41, 0x846BFB41, + 0x846CFB41, 0x846DFB41, 0x846EFB41, 0x846FFB41, 0x8470FB41, 0x8471FB41, 0x8472FB41, 0x8473FB41, 0x8474FB41, 0x8475FB41, 0x8476FB41, 0x8477FB41, 0x8478FB41, 0x8479FB41, 0x847AFB41, + 0x847BFB41, 0x847CFB41, 0x847DFB41, 0x847EFB41, 0x847FFB41, 0x8480FB41, 0x8481FB41, 0x8482FB41, 0x8483FB41, 0x8484FB41, 0x8485FB41, 0x8486FB41, 0x8487FB41, 0x8488FB41, 0x8489FB41, + 0x848AFB41, 0x848BFB41, 0x848CFB41, 0x848DFB41, 0x848EFB41, 0x848FFB41, 0x8490FB41, 0x8491FB41, 0x8492FB41, 0x8493FB41, 0x8494FB41, 0x8495FB41, 0x8496FB41, 0x8497FB41, 0x8498FB41, + 0x8499FB41, 0x849AFB41, 0x849BFB41, 0x849CFB41, 0x849DFB41, 0x849EFB41, 0x849FFB41, 0x84A0FB41, 0x84A1FB41, 0x84A2FB41, 0x84A3FB41, 0x84A4FB41, 0x84A5FB41, 0x84A6FB41, 0x84A7FB41, + 0x84A8FB41, 0x84A9FB41, 0x84AAFB41, 0x84ABFB41, 0x84ACFB41, 0x84ADFB41, 0x84AEFB41, 0x84AFFB41, 0x84B0FB41, 0x84B1FB41, 0x84B2FB41, 0x84B3FB41, 0x84B4FB41, 0x84B5FB41, 0x84B6FB41, + 0x84B7FB41, 0x84B8FB41, 0x84B9FB41, 0x84BAFB41, 0x84BBFB41, 0x84BCFB41, 0x84BDFB41, 0x84BEFB41, 0x84BFFB41, 0x84C0FB41, 0x84C1FB41, 0x84C2FB41, 0x84C3FB41, 0x84C4FB41, 0x84C5FB41, + 0x84C6FB41, 0x84C7FB41, 0x84C8FB41, 0x84C9FB41, 0x84CAFB41, 0x84CBFB41, 0x84CCFB41, 0x84CDFB41, 0x84CEFB41, 0x84CFFB41, 0x84D0FB41, 0x84D1FB41, 0x84D2FB41, 0x84D3FB41, 0x84D4FB41, + 0x84D5FB41, 0x84D6FB41, 0x84D7FB41, 0x84D8FB41, 0x84D9FB41, 0x84DAFB41, 0x84DBFB41, 0x84DCFB41, 0x84DDFB41, 0x84DEFB41, 0x84DFFB41, 0x84E0FB41, 0x84E1FB41, 0x84E2FB41, 0x84E3FB41, + 0x84E4FB41, 0x84E5FB41, 0x84E6FB41, 0x84E7FB41, 0x84E8FB41, 0x84E9FB41, 0x84EAFB41, 0x84EBFB41, 0x84ECFB41, 0x84EDFB41, 0x84EEFB41, 0x84EFFB41, 0x84F0FB41, 0x84F1FB41, 0x84F2FB41, + 0x84F3FB41, 0x84F4FB41, 0x84F5FB41, 0x84F6FB41, 0x84F7FB41, 0x84F8FB41, 0x84F9FB41, 0x84FAFB41, 0x84FBFB41, 0x84FCFB41, 0x84FDFB41, 0x84FEFB41, 0x84FFFB41, 0x8500FB41, 0x8501FB41, + 0x8502FB41, 0x8503FB41, 0x8504FB41, 0x8505FB41, 0x8506FB41, 0x8507FB41, 0x8508FB41, 0x8509FB41, 0x850AFB41, 0x850BFB41, 0x850CFB41, 0x850DFB41, 0x850EFB41, 0x850FFB41, 0x8510FB41, + 0x8511FB41, 0x8512FB41, 0x8513FB41, 0x8514FB41, 0x8515FB41, 0x8516FB41, 0x8517FB41, 0x8518FB41, 0x8519FB41, 0x851AFB41, 0x851BFB41, 0x851CFB41, 0x851DFB41, 0x851EFB41, 0x851FFB41, + 0x8520FB41, 0x8521FB41, 0x8522FB41, 0x8523FB41, 0x8524FB41, 0x8525FB41, 0x8526FB41, 0x8527FB41, 0x8528FB41, 0x8529FB41, 0x852AFB41, 0x852BFB41, 0x852CFB41, 0x852DFB41, 0x852EFB41, + 0x852FFB41, 0x8530FB41, 0x8531FB41, 0x8532FB41, 0x8533FB41, 0x8534FB41, 0x8535FB41, 0x8536FB41, 0x8537FB41, 0x8538FB41, 0x8539FB41, 0x853AFB41, 0x853BFB41, 0x853CFB41, 0x853DFB41, + 0x853EFB41, 0x853FFB41, 0x8540FB41, 0x8541FB41, 0x8542FB41, 0x8543FB41, 0x8544FB41, 0x8545FB41, 0x8546FB41, 0x8547FB41, 0x8548FB41, 0x8549FB41, 0x854AFB41, 0x854BFB41, 0x854CFB41, + 0x854DFB41, 0x854EFB41, 0x854FFB41, 0x8550FB41, 0x8551FB41, 0x8552FB41, 0x8553FB41, 0x8554FB41, 0x8555FB41, 0x8556FB41, 0x8557FB41, 0x8558FB41, 0x8559FB41, 0x855AFB41, 0x855BFB41, + 0x855CFB41, 0x855DFB41, 0x855EFB41, 0x855FFB41, 0x8560FB41, 0x8561FB41, 0x8562FB41, 0x8563FB41, 0x8564FB41, 0x8565FB41, 0x8566FB41, 0x8567FB41, 0x8568FB41, 0x8569FB41, 0x856AFB41, + 0x856BFB41, 0x856CFB41, 0x856DFB41, 0x856EFB41, 0x856FFB41, 0x8570FB41, 0x8571FB41, 0x8572FB41, 0x8573FB41, 0x8574FB41, 0x8575FB41, 0x8576FB41, 0x8577FB41, 0x8578FB41, 0x8579FB41, + 0x857AFB41, 0x857BFB41, 0x857CFB41, 0x857DFB41, 0x857EFB41, 0x857FFB41, 0x8580FB41, 0x8581FB41, 0x8582FB41, 0x8583FB41, 0x8584FB41, 0x8585FB41, 0x8586FB41, 0x8587FB41, 0x8588FB41, + 0x8589FB41, 0x858AFB41, 0x858BFB41, 0x858CFB41, 0x858DFB41, 0x858EFB41, 0x858FFB41, 0x8590FB41, 0x8591FB41, 0x8592FB41, 0x8593FB41, 0x8594FB41, 0x8595FB41, 0x8596FB41, 0x8597FB41, + 0x8598FB41, 0x8599FB41, 0x859AFB41, 0x859BFB41, 0x859CFB41, 0x859DFB41, 0x859EFB41, 0x859FFB41, 0x85A0FB41, 0x85A1FB41, 0x85A2FB41, 0x85A3FB41, 0x85A4FB41, 0x85A5FB41, 0x85A6FB41, + 0x85A7FB41, 0x85A8FB41, 0x85A9FB41, 0x85AAFB41, 0x85ABFB41, 0x85ACFB41, 0x85ADFB41, 0x85AEFB41, 0x85AFFB41, 0x85B0FB41, 0x85B1FB41, 0x85B2FB41, 0x85B3FB41, 0x85B4FB41, 0x85B5FB41, + 0x85B6FB41, 0x85B7FB41, 0x85B8FB41, 0x85B9FB41, 0x85BAFB41, 0x85BBFB41, 0x85BCFB41, 0x85BDFB41, 0x85BEFB41, 0x85BFFB41, 0x85C0FB41, 0x85C1FB41, 0x85C2FB41, 0x85C3FB41, 0x85C4FB41, + 0x85C5FB41, 0x85C6FB41, 0x85C7FB41, 0x85C8FB41, 0x85C9FB41, 0x85CAFB41, 0x85CBFB41, 0x85CCFB41, 0x85CDFB41, 0x85CEFB41, 0x85CFFB41, 0x85D0FB41, 0x85D1FB41, 0x85D2FB41, 0x85D3FB41, + 0x85D4FB41, 0x85D5FB41, 0x85D6FB41, 0x85D7FB41, 0x85D8FB41, 0x85D9FB41, 0x85DAFB41, 0x85DBFB41, 0x85DCFB41, 0x85DDFB41, 0x85DEFB41, 0x85DFFB41, 0x85E0FB41, 0x85E1FB41, 0x85E2FB41, + 0x85E3FB41, 0x85E4FB41, 0x85E5FB41, 0x85E6FB41, 0x85E7FB41, 0x85E8FB41, 0x85E9FB41, 0x85EAFB41, 0x85EBFB41, 0x85ECFB41, 0x85EDFB41, 0x85EEFB41, 0x85EFFB41, 0x85F0FB41, 0x85F1FB41, + 0x85F2FB41, 0x85F3FB41, 0x85F4FB41, 0x85F5FB41, 0x85F6FB41, 0x85F7FB41, 0x85F8FB41, 0x85F9FB41, 0x85FAFB41, 0x85FBFB41, 0x85FCFB41, 0x85FDFB41, 0x85FEFB41, 0x85FFFB41, 0x8600FB41, + 0x8601FB41, 0x8602FB41, 0x8603FB41, 0x8604FB41, 0x8605FB41, 0x8606FB41, 0x8607FB41, 0x8608FB41, 0x8609FB41, 0x860AFB41, 0x860BFB41, 0x860CFB41, 0x860DFB41, 0x860EFB41, 0x860FFB41, + 0x8610FB41, 0x8611FB41, 0x8612FB41, 0x8613FB41, 0x8614FB41, 0x8615FB41, 0x8616FB41, 0x8617FB41, 0x8618FB41, 0x8619FB41, 0x861AFB41, 0x861BFB41, 0x861CFB41, 0x861DFB41, 0x861EFB41, + 0x861FFB41, 0x8620FB41, 0x8621FB41, 0x8622FB41, 0x8623FB41, 0x8624FB41, 0x8625FB41, 0x8626FB41, 0x8627FB41, 0x8628FB41, 0x8629FB41, 0x862AFB41, 0x862BFB41, 0x862CFB41, 0x862DFB41, + 0x862EFB41, 0x862FFB41, 0x8630FB41, 0x8631FB41, 0x8632FB41, 0x8633FB41, 0x8634FB41, 0x8635FB41, 0x8636FB41, 0x8637FB41, 0x8638FB41, 0x8639FB41, 0x863AFB41, 0x863BFB41, 0x863CFB41, + 0x863DFB41, 0x863EFB41, 0x863FFB41, 0x8640FB41, 0x8641FB41, 0x8642FB41, 0x8643FB41, 0x8644FB41, 0x8645FB41, 0x8646FB41, 0x8647FB41, 0x8648FB41, 0x8649FB41, 0x864AFB41, 0x864BFB41, + 0x864CFB41, 0x864DFB41, 0x864EFB41, 0x864FFB41, 0x8650FB41, 0x8651FB41, 0x8652FB41, 0x8653FB41, 0x8654FB41, 0x8655FB41, 0x8656FB41, 0x8657FB41, 0x8658FB41, 0x8659FB41, 0x865AFB41, + 0x865BFB41, 0x865CFB41, 0x865DFB41, 0x865EFB41, 0x865FFB41, 0x8660FB41, 0x8661FB41, 0x8662FB41, 0x8663FB41, 0x8664FB41, 0x8665FB41, 0x8666FB41, 0x8667FB41, 0x8668FB41, 0x8669FB41, + 0x866AFB41, 0x866BFB41, 0x866CFB41, 0x866DFB41, 0x866EFB41, 0x866FFB41, 0x8670FB41, 0x8671FB41, 0x8672FB41, 0x8673FB41, 0x8674FB41, 0x8675FB41, 0x8676FB41, 0x8677FB41, 0x8678FB41, + 0x8679FB41, 0x867AFB41, 0x867BFB41, 0x867CFB41, 0x867DFB41, 0x867EFB41, 0x867FFB41, 0x8680FB41, 0x8681FB41, 0x8682FB41, 0x8683FB41, 0x8684FB41, 0x8685FB41, 0x8686FB41, 0x8687FB41, + 0x8688FB41, 0x8689FB41, 0x868AFB41, 0x868BFB41, 0x868CFB41, 0x868DFB41, 0x868EFB41, 0x868FFB41, 0x8690FB41, 0x8691FB41, 0x8692FB41, 0x8693FB41, 0x8694FB41, 0x8695FB41, 0x8696FB41, + 0x8697FB41, 0x8698FB41, 0x8699FB41, 0x869AFB41, 0x869BFB41, 0x869CFB41, 0x869DFB41, 0x869EFB41, 0x869FFB41, 0x86A0FB41, 0x86A1FB41, 0x86A2FB41, 0x86A3FB41, 0x86A4FB41, 0x86A5FB41, + 0x86A6FB41, 0x86A7FB41, 0x86A8FB41, 0x86A9FB41, 0x86AAFB41, 0x86ABFB41, 0x86ACFB41, 0x86ADFB41, 0x86AEFB41, 0x86AFFB41, 0x86B0FB41, 0x86B1FB41, 0x86B2FB41, 0x86B3FB41, 0x86B4FB41, + 0x86B5FB41, 0x86B6FB41, 0x86B7FB41, 0x86B8FB41, 0x86B9FB41, 0x86BAFB41, 0x86BBFB41, 0x86BCFB41, 0x86BDFB41, 0x86BEFB41, 0x86BFFB41, 0x86C0FB41, 0x86C1FB41, 0x86C2FB41, 0x86C3FB41, + 0x86C4FB41, 0x86C5FB41, 0x86C6FB41, 0x86C7FB41, 0x86C8FB41, 0x86C9FB41, 0x86CAFB41, 0x86CBFB41, 0x86CCFB41, 0x86CDFB41, 0x86CEFB41, 0x86CFFB41, 0x86D0FB41, 0x86D1FB41, 0x86D2FB41, + 0x86D3FB41, 0x86D4FB41, 0x86D5FB41, 0x86D6FB41, 0x86D7FB41, 0x86D8FB41, 0x86D9FB41, 0x86DAFB41, 0x86DBFB41, 0x86DCFB41, 0x86DDFB41, 0x86DEFB41, 0x86DFFB41, 0x86E0FB41, 0x86E1FB41, + 0x86E2FB41, 0x86E3FB41, 0x86E4FB41, 0x86E5FB41, 0x86E6FB41, 0x86E7FB41, 0x86E8FB41, 0x86E9FB41, 0x86EAFB41, 0x86EBFB41, 0x86ECFB41, 0x86EDFB41, 0x86EEFB41, 0x86EFFB41, 0x86F0FB41, + 0x86F1FB41, 0x86F2FB41, 0x86F3FB41, 0x86F4FB41, 0x86F5FB41, 0x86F6FB41, 0x86F7FB41, 0x86F8FB41, 0x86F9FB41, 0x86FAFB41, 0x86FBFB41, 0x86FCFB41, 0x86FDFB41, 0x86FEFB41, 0x86FFFB41, + 0x8700FB41, 0x8701FB41, 0x8702FB41, 0x8703FB41, 0x8704FB41, 0x8705FB41, 0x8706FB41, 0x8707FB41, 0x8708FB41, 0x8709FB41, 0x870AFB41, 0x870BFB41, 0x870CFB41, 0x870DFB41, 0x870EFB41, + 0x870FFB41, 0x8710FB41, 0x8711FB41, 0x8712FB41, 0x8713FB41, 0x8714FB41, 0x8715FB41, 0x8716FB41, 0x8717FB41, 0x8718FB41, 0x8719FB41, 0x871AFB41, 0x871BFB41, 0x871CFB41, 0x871DFB41, + 0x871EFB41, 0x871FFB41, 0x8720FB41, 0x8721FB41, 0x8722FB41, 0x8723FB41, 0x8724FB41, 0x8725FB41, 0x8726FB41, 0x8727FB41, 0x8728FB41, 0x8729FB41, 0x872AFB41, 0x872BFB41, 0x872CFB41, + 0x872DFB41, 0x872EFB41, 0x872FFB41, 0x8730FB41, 0x8731FB41, 0x8732FB41, 0x8733FB41, 0x8734FB41, 0x8735FB41, 0x8736FB41, 0x8737FB41, 0x8738FB41, 0x8739FB41, 0x873AFB41, 0x873BFB41, + 0x873CFB41, 0x873DFB41, 0x873EFB41, 0x873FFB41, 0x8740FB41, 0x8741FB41, 0x8742FB41, 0x8743FB41, 0x8744FB41, 0x8745FB41, 0x8746FB41, 0x8747FB41, 0x8748FB41, 0x8749FB41, 0x874AFB41, + 0x874BFB41, 0x874CFB41, 0x874DFB41, 0x874EFB41, 0x874FFB41, 0x8750FB41, 0x8751FB41, 0x8752FB41, 0x8753FB41, 0x8754FB41, 0x8755FB41, 0x8756FB41, 0x8757FB41, 0x8758FB41, 0x8759FB41, + 0x875AFB41, 0x875BFB41, 0x875CFB41, 0x875DFB41, 0x875EFB41, 0x875FFB41, 0x8760FB41, 0x8761FB41, 0x8762FB41, 0x8763FB41, 0x8764FB41, 0x8765FB41, 0x8766FB41, 0x8767FB41, 0x8768FB41, + 0x8769FB41, 0x876AFB41, 0x876BFB41, 0x876CFB41, 0x876DFB41, 0x876EFB41, 0x876FFB41, 0x8770FB41, 0x8771FB41, 0x8772FB41, 0x8773FB41, 0x8774FB41, 0x8775FB41, 0x8776FB41, 0x8777FB41, + 0x8778FB41, 0x8779FB41, 0x877AFB41, 0x877BFB41, 0x877CFB41, 0x877DFB41, 0x877EFB41, 0x877FFB41, 0x8780FB41, 0x8781FB41, 0x8782FB41, 0x8783FB41, 0x8784FB41, 0x8785FB41, 0x8786FB41, + 0x8787FB41, 0x8788FB41, 0x8789FB41, 0x878AFB41, 0x878BFB41, 0x878CFB41, 0x878DFB41, 0x878EFB41, 0x878FFB41, 0x8790FB41, 0x8791FB41, 0x8792FB41, 0x8793FB41, 0x8794FB41, 0x8795FB41, + 0x8796FB41, 0x8797FB41, 0x8798FB41, 0x8799FB41, 0x879AFB41, 0x879BFB41, 0x879CFB41, 0x879DFB41, 0x879EFB41, 0x879FFB41, 0x87A0FB41, 0x87A1FB41, 0x87A2FB41, 0x87A3FB41, 0x87A4FB41, + 0x87A5FB41, 0x87A6FB41, 0x87A7FB41, 0x87A8FB41, 0x87A9FB41, 0x87AAFB41, 0x87ABFB41, 0x87ACFB41, 0x87ADFB41, 0x87AEFB41, 0x87AFFB41, 0x87B0FB41, 0x87B1FB41, 0x87B2FB41, 0x87B3FB41, + 0x87B4FB41, 0x87B5FB41, 0x87B6FB41, 0x87B7FB41, 0x87B8FB41, 0x87B9FB41, 0x87BAFB41, 0x87BBFB41, 0x87BCFB41, 0x87BDFB41, 0x87BEFB41, 0x87BFFB41, 0x87C0FB41, 0x87C1FB41, 0x87C2FB41, + 0x87C3FB41, 0x87C4FB41, 0x87C5FB41, 0x87C6FB41, 0x87C7FB41, 0x87C8FB41, 0x87C9FB41, 0x87CAFB41, 0x87CBFB41, 0x87CCFB41, 0x87CDFB41, 0x87CEFB41, 0x87CFFB41, 0x87D0FB41, 0x87D1FB41, + 0x87D2FB41, 0x87D3FB41, 0x87D4FB41, 0x87D5FB41, 0x87D6FB41, 0x87D7FB41, 0x87D8FB41, 0x87D9FB41, 0x87DAFB41, 0x87DBFB41, 0x87DCFB41, 0x87DDFB41, 0x87DEFB41, 0x87DFFB41, 0x87E0FB41, + 0x87E1FB41, 0x87E2FB41, 0x87E3FB41, 0x87E4FB41, 0x87E5FB41, 0x87E6FB41, 0x87E7FB41, 0x87E8FB41, 0x87E9FB41, 0x87EAFB41, 0x87EBFB41, 0x87ECFB41, 0x87EDFB41, 0x87EEFB41, 0x87EFFB41, + 0x87F0FB41, 0x87F1FB41, 0x87F2FB41, 0x87F3FB41, 0x87F4FB41, 0x87F5FB41, 0x87F6FB41, 0x87F7FB41, 0x87F8FB41, 0x87F9FB41, 0x87FAFB41, 0x87FBFB41, 0x87FCFB41, 0x87FDFB41, 0x87FEFB41, + 0x87FFFB41, 0x8800FB41, 0x8801FB41, 0x8802FB41, 0x8803FB41, 0x8804FB41, 0x8805FB41, 0x8806FB41, 0x8807FB41, 0x8808FB41, 0x8809FB41, 0x880AFB41, 0x880BFB41, 0x880CFB41, 0x880DFB41, + 0x880EFB41, 0x880FFB41, 0x8810FB41, 0x8811FB41, 0x8812FB41, 0x8813FB41, 0x8814FB41, 0x8815FB41, 0x8816FB41, 0x8817FB41, 0x8818FB41, 0x8819FB41, 0x881AFB41, 0x881BFB41, 0x881CFB41, + 0x881DFB41, 0x881EFB41, 0x881FFB41, 0x8820FB41, 0x8821FB41, 0x8822FB41, 0x8823FB41, 0x8824FB41, 0x8825FB41, 0x8826FB41, 0x8827FB41, 0x8828FB41, 0x8829FB41, 0x882AFB41, 0x882BFB41, + 0x882CFB41, 0x882DFB41, 0x882EFB41, 0x882FFB41, 0x8830FB41, 0x8831FB41, 0x8832FB41, 0x8833FB41, 0x8834FB41, 0x8835FB41, 0x8836FB41, 0x8837FB41, 0x8838FB41, 0x8839FB41, 0x883AFB41, + 0x883BFB41, 0x883CFB41, 0x883DFB41, 0x883EFB41, 0x883FFB41, 0x8840FB41, 0x8841FB41, 0x8842FB41, 0x8843FB41, 0x8844FB41, 0x8845FB41, 0x8846FB41, 0x8847FB41, 0x8848FB41, 0x8849FB41, + 0x884AFB41, 0x884BFB41, 0x884CFB41, 0x884DFB41, 0x884EFB41, 0x884FFB41, 0x8850FB41, 0x8851FB41, 0x8852FB41, 0x8853FB41, 0x8854FB41, 0x8855FB41, 0x8856FB41, 0x8857FB41, 0x8858FB41, + 0x8859FB41, 0x885AFB41, 0x885BFB41, 0x885CFB41, 0x885DFB41, 0x885EFB41, 0x885FFB41, 0x8860FB41, 0x8861FB41, 0x8862FB41, 0x8863FB41, 0x8864FB41, 0x8865FB41, 0x8866FB41, 0x8867FB41, + 0x8868FB41, 0x8869FB41, 0x886AFB41, 0x886BFB41, 0x886CFB41, 0x886DFB41, 0x886EFB41, 0x886FFB41, 0x8870FB41, 0x8871FB41, 0x8872FB41, 0x8873FB41, 0x8874FB41, 0x8875FB41, 0x8876FB41, + 0x8877FB41, 0x8878FB41, 0x8879FB41, 0x887AFB41, 0x887BFB41, 0x887CFB41, 0x887DFB41, 0x887EFB41, 0x887FFB41, 0x8880FB41, 0x8881FB41, 0x8882FB41, 0x8883FB41, 0x8884FB41, 0x8885FB41, + 0x8886FB41, 0x8887FB41, 0x8888FB41, 0x8889FB41, 0x888AFB41, 0x888BFB41, 0x888CFB41, 0x888DFB41, 0x888EFB41, 0x888FFB41, 0x8890FB41, 0x8891FB41, 0x8892FB41, 0x8893FB41, 0x8894FB41, + 0x8895FB41, 0x8896FB41, 0x8897FB41, 0x8898FB41, 0x8899FB41, 0x889AFB41, 0x889BFB41, 0x889CFB41, 0x889DFB41, 0x889EFB41, 0x889FFB41, 0x88A0FB41, 0x88A1FB41, 0x88A2FB41, 0x88A3FB41, + 0x88A4FB41, 0x88A5FB41, 0x88A6FB41, 0x88A7FB41, 0x88A8FB41, 0x88A9FB41, 0x88AAFB41, 0x88ABFB41, 0x88ACFB41, 0x88ADFB41, 0x88AEFB41, 0x88AFFB41, 0x88B0FB41, 0x88B1FB41, 0x88B2FB41, + 0x88B3FB41, 0x88B4FB41, 0x88B5FB41, 0x88B6FB41, 0x88B7FB41, 0x88B8FB41, 0x88B9FB41, 0x88BAFB41, 0x88BBFB41, 0x88BCFB41, 0x88BDFB41, 0x88BEFB41, 0x88BFFB41, 0x88C0FB41, 0x88C1FB41, + 0x88C2FB41, 0x88C3FB41, 0x88C4FB41, 0x88C5FB41, 0x88C6FB41, 0x88C7FB41, 0x88C8FB41, 0x88C9FB41, 0x88CAFB41, 0x88CBFB41, 0x88CCFB41, 0x88CDFB41, 0x88CEFB41, 0x88CFFB41, 0x88D0FB41, + 0x88D1FB41, 0x88D2FB41, 0x88D3FB41, 0x88D4FB41, 0x88D5FB41, 0x88D6FB41, 0x88D7FB41, 0x88D8FB41, 0x88D9FB41, 0x88DAFB41, 0x88DBFB41, 0x88DCFB41, 0x88DDFB41, 0x88DEFB41, 0x88DFFB41, + 0x88E0FB41, 0x88E1FB41, 0x88E2FB41, 0x88E3FB41, 0x88E4FB41, 0x88E5FB41, 0x88E6FB41, 0x88E7FB41, 0x88E8FB41, 0x88E9FB41, 0x88EAFB41, 0x88EBFB41, 0x88ECFB41, 0x88EDFB41, 0x88EEFB41, + 0x88EFFB41, 0x88F0FB41, 0x88F1FB41, 0x88F2FB41, 0x88F3FB41, 0x88F4FB41, 0x88F5FB41, 0x88F6FB41, 0x88F7FB41, 0x88F8FB41, 0x88F9FB41, 0x88FAFB41, 0x88FBFB41, 0x88FCFB41, 0x88FDFB41, + 0x88FEFB41, 0x88FFFB41, 0x8900FB41, 0x8901FB41, 0x8902FB41, 0x8903FB41, 0x8904FB41, 0x8905FB41, 0x8906FB41, 0x8907FB41, 0x8908FB41, 0x8909FB41, 0x890AFB41, 0x890BFB41, 0x890CFB41, + 0x890DFB41, 0x890EFB41, 0x890FFB41, 0x8910FB41, 0x8911FB41, 0x8912FB41, 0x8913FB41, 0x8914FB41, 0x8915FB41, 0x8916FB41, 0x8917FB41, 0x8918FB41, 0x8919FB41, 0x891AFB41, 0x891BFB41, + 0x891CFB41, 0x891DFB41, 0x891EFB41, 0x891FFB41, 0x8920FB41, 0x8921FB41, 0x8922FB41, 0x8923FB41, 0x8924FB41, 0x8925FB41, 0x8926FB41, 0x8927FB41, 0x8928FB41, 0x8929FB41, 0x892AFB41, + 0x892BFB41, 0x892CFB41, 0x892DFB41, 0x892EFB41, 0x892FFB41, 0x8930FB41, 0x8931FB41, 0x8932FB41, 0x8933FB41, 0x8934FB41, 0x8935FB41, 0x8936FB41, 0x8937FB41, 0x8938FB41, 0x8939FB41, + 0x893AFB41, 0x893BFB41, 0x893CFB41, 0x893DFB41, 0x893EFB41, 0x893FFB41, 0x8940FB41, 0x8941FB41, 0x8942FB41, 0x8943FB41, 0x8944FB41, 0x8945FB41, 0x8946FB41, 0x8947FB41, 0x8948FB41, + 0x8949FB41, 0x894AFB41, 0x894BFB41, 0x894CFB41, 0x894DFB41, 0x894EFB41, 0x894FFB41, 0x8950FB41, 0x8951FB41, 0x8952FB41, 0x8953FB41, 0x8954FB41, 0x8955FB41, 0x8956FB41, 0x8957FB41, + 0x8958FB41, 0x8959FB41, 0x895AFB41, 0x895BFB41, 0x895CFB41, 0x895DFB41, 0x895EFB41, 0x895FFB41, 0x8960FB41, 0x8961FB41, 0x8962FB41, 0x8963FB41, 0x8964FB41, 0x8965FB41, 0x8966FB41, + 0x8967FB41, 0x8968FB41, 0x8969FB41, 0x896AFB41, 0x896BFB41, 0x896CFB41, 0x896DFB41, 0x896EFB41, 0x896FFB41, 0x8970FB41, 0x8971FB41, 0x8972FB41, 0x8973FB41, 0x8974FB41, 0x8975FB41, + 0x8976FB41, 0x8977FB41, 0x8978FB41, 0x8979FB41, 0x897AFB41, 0x897BFB41, 0x897CFB41, 0x897DFB41, 0x897EFB41, 0x897FFB41, 0x8980FB41, 0x8981FB41, 0x8982FB41, 0x8983FB41, 0x8984FB41, + 0x8985FB41, 0x8986FB41, 0x8987FB41, 0x8988FB41, 0x8989FB41, 0x898AFB41, 0x898BFB41, 0x898CFB41, 0x898DFB41, 0x898EFB41, 0x898FFB41, 0x8990FB41, 0x8991FB41, 0x8992FB41, 0x8993FB41, + 0x8994FB41, 0x8995FB41, 0x8996FB41, 0x8997FB41, 0x8998FB41, 0x8999FB41, 0x899AFB41, 0x899BFB41, 0x899CFB41, 0x899DFB41, 0x899EFB41, 0x899FFB41, 0x89A0FB41, 0x89A1FB41, 0x89A2FB41, + 0x89A3FB41, 0x89A4FB41, 0x89A5FB41, 0x89A6FB41, 0x89A7FB41, 0x89A8FB41, 0x89A9FB41, 0x89AAFB41, 0x89ABFB41, 0x89ACFB41, 0x89ADFB41, 0x89AEFB41, 0x89AFFB41, 0x89B0FB41, 0x89B1FB41, + 0x89B2FB41, 0x89B3FB41, 0x89B4FB41, 0x89B5FB41, 0x89B6FB41, 0x89B7FB41, 0x89B8FB41, 0x89B9FB41, 0x89BAFB41, 0x89BBFB41, 0x89BCFB41, 0x89BDFB41, 0x89BEFB41, 0x89BFFB41, 0x89C0FB41, + 0x89C1FB41, 0x89C2FB41, 0x89C3FB41, 0x89C4FB41, 0x89C5FB41, 0x89C6FB41, 0x89C7FB41, 0x89C8FB41, 0x89C9FB41, 0x89CAFB41, 0x89CBFB41, 0x89CCFB41, 0x89CDFB41, 0x89CEFB41, 0x89CFFB41, + 0x89D0FB41, 0x89D1FB41, 0x89D2FB41, 0x89D3FB41, 0x89D4FB41, 0x89D5FB41, 0x89D6FB41, 0x89D7FB41, 0x89D8FB41, 0x89D9FB41, 0x89DAFB41, 0x89DBFB41, 0x89DCFB41, 0x89DDFB41, 0x89DEFB41, + 0x89DFFB41, 0x89E0FB41, 0x89E1FB41, 0x89E2FB41, 0x89E3FB41, 0x89E4FB41, 0x89E5FB41, 0x89E6FB41, 0x89E7FB41, 0x89E8FB41, 0x89E9FB41, 0x89EAFB41, 0x89EBFB41, 0x89ECFB41, 0x89EDFB41, + 0x89EEFB41, 0x89EFFB41, 0x89F0FB41, 0x89F1FB41, 0x89F2FB41, 0x89F3FB41, 0x89F4FB41, 0x89F5FB41, 0x89F6FB41, 0x89F7FB41, 0x89F8FB41, 0x89F9FB41, 0x89FAFB41, 0x89FBFB41, 0x89FCFB41, + 0x89FDFB41, 0x89FEFB41, 0x89FFFB41, 0x8A00FB41, 0x8A01FB41, 0x8A02FB41, 0x8A03FB41, 0x8A04FB41, 0x8A05FB41, 0x8A06FB41, 0x8A07FB41, 0x8A08FB41, 0x8A09FB41, 0x8A0AFB41, 0x8A0BFB41, + 0x8A0CFB41, 0x8A0DFB41, 0x8A0EFB41, 0x8A0FFB41, 0x8A10FB41, 0x8A11FB41, 0x8A12FB41, 0x8A13FB41, 0x8A14FB41, 0x8A15FB41, 0x8A16FB41, 0x8A17FB41, 0x8A18FB41, 0x8A19FB41, 0x8A1AFB41, + 0x8A1BFB41, 0x8A1CFB41, 0x8A1DFB41, 0x8A1EFB41, 0x8A1FFB41, 0x8A20FB41, 0x8A21FB41, 0x8A22FB41, 0x8A23FB41, 0x8A24FB41, 0x8A25FB41, 0x8A26FB41, 0x8A27FB41, 0x8A28FB41, 0x8A29FB41, + 0x8A2AFB41, 0x8A2BFB41, 0x8A2CFB41, 0x8A2DFB41, 0x8A2EFB41, 0x8A2FFB41, 0x8A30FB41, 0x8A31FB41, 0x8A32FB41, 0x8A33FB41, 0x8A34FB41, 0x8A35FB41, 0x8A36FB41, 0x8A37FB41, 0x8A38FB41, + 0x8A39FB41, 0x8A3AFB41, 0x8A3BFB41, 0x8A3CFB41, 0x8A3DFB41, 0x8A3EFB41, 0x8A3FFB41, 0x8A40FB41, 0x8A41FB41, 0x8A42FB41, 0x8A43FB41, 0x8A44FB41, 0x8A45FB41, 0x8A46FB41, 0x8A47FB41, + 0x8A48FB41, 0x8A49FB41, 0x8A4AFB41, 0x8A4BFB41, 0x8A4CFB41, 0x8A4DFB41, 0x8A4EFB41, 0x8A4FFB41, 0x8A50FB41, 0x8A51FB41, 0x8A52FB41, 0x8A53FB41, 0x8A54FB41, 0x8A55FB41, 0x8A56FB41, + 0x8A57FB41, 0x8A58FB41, 0x8A59FB41, 0x8A5AFB41, 0x8A5BFB41, 0x8A5CFB41, 0x8A5DFB41, 0x8A5EFB41, 0x8A5FFB41, 0x8A60FB41, 0x8A61FB41, 0x8A62FB41, 0x8A63FB41, 0x8A64FB41, 0x8A65FB41, + 0x8A66FB41, 0x8A67FB41, 0x8A68FB41, 0x8A69FB41, 0x8A6AFB41, 0x8A6BFB41, 0x8A6CFB41, 0x8A6DFB41, 0x8A6EFB41, 0x8A6FFB41, 0x8A70FB41, 0x8A71FB41, 0x8A72FB41, 0x8A73FB41, 0x8A74FB41, + 0x8A75FB41, 0x8A76FB41, 0x8A77FB41, 0x8A78FB41, 0x8A79FB41, 0x8A7AFB41, 0x8A7BFB41, 0x8A7CFB41, 0x8A7DFB41, 0x8A7EFB41, 0x8A7FFB41, 0x8A80FB41, 0x8A81FB41, 0x8A82FB41, 0x8A83FB41, + 0x8A84FB41, 0x8A85FB41, 0x8A86FB41, 0x8A87FB41, 0x8A88FB41, 0x8A89FB41, 0x8A8AFB41, 0x8A8BFB41, 0x8A8CFB41, 0x8A8DFB41, 0x8A8EFB41, 0x8A8FFB41, 0x8A90FB41, 0x8A91FB41, 0x8A92FB41, + 0x8A93FB41, 0x8A94FB41, 0x8A95FB41, 0x8A96FB41, 0x8A97FB41, 0x8A98FB41, 0x8A99FB41, 0x8A9AFB41, 0x8A9BFB41, 0x8A9CFB41, 0x8A9DFB41, 0x8A9EFB41, 0x8A9FFB41, 0x8AA0FB41, 0x8AA1FB41, + 0x8AA2FB41, 0x8AA3FB41, 0x8AA4FB41, 0x8AA5FB41, 0x8AA6FB41, 0x8AA7FB41, 0x8AA8FB41, 0x8AA9FB41, 0x8AAAFB41, 0x8AABFB41, 0x8AACFB41, 0x8AADFB41, 0x8AAEFB41, 0x8AAFFB41, 0x8AB0FB41, + 0x8AB1FB41, 0x8AB2FB41, 0x8AB3FB41, 0x8AB4FB41, 0x8AB5FB41, 0x8AB6FB41, 0x8AB7FB41, 0x8AB8FB41, 0x8AB9FB41, 0x8ABAFB41, 0x8ABBFB41, 0x8ABCFB41, 0x8ABDFB41, 0x8ABEFB41, 0x8ABFFB41, + 0x8AC0FB41, 0x8AC1FB41, 0x8AC2FB41, 0x8AC3FB41, 0x8AC4FB41, 0x8AC5FB41, 0x8AC6FB41, 0x8AC7FB41, 0x8AC8FB41, 0x8AC9FB41, 0x8ACAFB41, 0x8ACBFB41, 0x8ACCFB41, 0x8ACDFB41, 0x8ACEFB41, + 0x8ACFFB41, 0x8AD0FB41, 0x8AD1FB41, 0x8AD2FB41, 0x8AD3FB41, 0x8AD4FB41, 0x8AD5FB41, 0x8AD6FB41, 0x8AD7FB41, 0x8AD8FB41, 0x8AD9FB41, 0x8ADAFB41, 0x8ADBFB41, 0x8ADCFB41, 0x8ADDFB41, + 0x8ADEFB41, 0x8ADFFB41, 0x8AE0FB41, 0x8AE1FB41, 0x8AE2FB41, 0x8AE3FB41, 0x8AE4FB41, 0x8AE5FB41, 0x8AE6FB41, 0x8AE7FB41, 0x8AE8FB41, 0x8AE9FB41, 0x8AEAFB41, 0x8AEBFB41, 0x8AECFB41, + 0x8AEDFB41, 0x8AEEFB41, 0x8AEFFB41, 0x8AF0FB41, 0x8AF1FB41, 0x8AF2FB41, 0x8AF3FB41, 0x8AF4FB41, 0x8AF5FB41, 0x8AF6FB41, 0x8AF7FB41, 0x8AF8FB41, 0x8AF9FB41, 0x8AFAFB41, 0x8AFBFB41, + 0x8AFCFB41, 0x8AFDFB41, 0x8AFEFB41, 0x8AFFFB41, 0x8B00FB41, 0x8B01FB41, 0x8B02FB41, 0x8B03FB41, 0x8B04FB41, 0x8B05FB41, 0x8B06FB41, 0x8B07FB41, 0x8B08FB41, 0x8B09FB41, 0x8B0AFB41, + 0x8B0BFB41, 0x8B0CFB41, 0x8B0DFB41, 0x8B0EFB41, 0x8B0FFB41, 0x8B10FB41, 0x8B11FB41, 0x8B12FB41, 0x8B13FB41, 0x8B14FB41, 0x8B15FB41, 0x8B16FB41, 0x8B17FB41, 0x8B18FB41, 0x8B19FB41, + 0x8B1AFB41, 0x8B1BFB41, 0x8B1CFB41, 0x8B1DFB41, 0x8B1EFB41, 0x8B1FFB41, 0x8B20FB41, 0x8B21FB41, 0x8B22FB41, 0x8B23FB41, 0x8B24FB41, 0x8B25FB41, 0x8B26FB41, 0x8B27FB41, 0x8B28FB41, + 0x8B29FB41, 0x8B2AFB41, 0x8B2BFB41, 0x8B2CFB41, 0x8B2DFB41, 0x8B2EFB41, 0x8B2FFB41, 0x8B30FB41, 0x8B31FB41, 0x8B32FB41, 0x8B33FB41, 0x8B34FB41, 0x8B35FB41, 0x8B36FB41, 0x8B37FB41, + 0x8B38FB41, 0x8B39FB41, 0x8B3AFB41, 0x8B3BFB41, 0x8B3CFB41, 0x8B3DFB41, 0x8B3EFB41, 0x8B3FFB41, 0x8B40FB41, 0x8B41FB41, 0x8B42FB41, 0x8B43FB41, 0x8B44FB41, 0x8B45FB41, 0x8B46FB41, + 0x8B47FB41, 0x8B48FB41, 0x8B49FB41, 0x8B4AFB41, 0x8B4BFB41, 0x8B4CFB41, 0x8B4DFB41, 0x8B4EFB41, 0x8B4FFB41, 0x8B50FB41, 0x8B51FB41, 0x8B52FB41, 0x8B53FB41, 0x8B54FB41, 0x8B55FB41, + 0x8B56FB41, 0x8B57FB41, 0x8B58FB41, 0x8B59FB41, 0x8B5AFB41, 0x8B5BFB41, 0x8B5CFB41, 0x8B5DFB41, 0x8B5EFB41, 0x8B5FFB41, 0x8B60FB41, 0x8B61FB41, 0x8B62FB41, 0x8B63FB41, 0x8B64FB41, + 0x8B65FB41, 0x8B66FB41, 0x8B67FB41, 0x8B68FB41, 0x8B69FB41, 0x8B6AFB41, 0x8B6BFB41, 0x8B6CFB41, 0x8B6DFB41, 0x8B6EFB41, 0x8B6FFB41, 0x8B70FB41, 0x8B71FB41, 0x8B72FB41, 0x8B73FB41, + 0x8B74FB41, 0x8B75FB41, 0x8B76FB41, 0x8B77FB41, 0x8B78FB41, 0x8B79FB41, 0x8B7AFB41, 0x8B7BFB41, 0x8B7CFB41, 0x8B7DFB41, 0x8B7EFB41, 0x8B7FFB41, 0x8B80FB41, 0x8B81FB41, 0x8B82FB41, + 0x8B83FB41, 0x8B84FB41, 0x8B85FB41, 0x8B86FB41, 0x8B87FB41, 0x8B88FB41, 0x8B89FB41, 0x8B8AFB41, 0x8B8BFB41, 0x8B8CFB41, 0x8B8DFB41, 0x8B8EFB41, 0x8B8FFB41, 0x8B90FB41, 0x8B91FB41, + 0x8B92FB41, 0x8B93FB41, 0x8B94FB41, 0x8B95FB41, 0x8B96FB41, 0x8B97FB41, 0x8B98FB41, 0x8B99FB41, 0x8B9AFB41, 0x8B9BFB41, 0x8B9CFB41, 0x8B9DFB41, 0x8B9EFB41, 0x8B9FFB41, 0x8BA0FB41, + 0x8BA1FB41, 0x8BA2FB41, 0x8BA3FB41, 0x8BA4FB41, 0x8BA5FB41, 0x8BA6FB41, 0x8BA7FB41, 0x8BA8FB41, 0x8BA9FB41, 0x8BAAFB41, 0x8BABFB41, 0x8BACFB41, 0x8BADFB41, 0x8BAEFB41, 0x8BAFFB41, + 0x8BB0FB41, 0x8BB1FB41, 0x8BB2FB41, 0x8BB3FB41, 0x8BB4FB41, 0x8BB5FB41, 0x8BB6FB41, 0x8BB7FB41, 0x8BB8FB41, 0x8BB9FB41, 0x8BBAFB41, 0x8BBBFB41, 0x8BBCFB41, 0x8BBDFB41, 0x8BBEFB41, + 0x8BBFFB41, 0x8BC0FB41, 0x8BC1FB41, 0x8BC2FB41, 0x8BC3FB41, 0x8BC4FB41, 0x8BC5FB41, 0x8BC6FB41, 0x8BC7FB41, 0x8BC8FB41, 0x8BC9FB41, 0x8BCAFB41, 0x8BCBFB41, 0x8BCCFB41, 0x8BCDFB41, + 0x8BCEFB41, 0x8BCFFB41, 0x8BD0FB41, 0x8BD1FB41, 0x8BD2FB41, 0x8BD3FB41, 0x8BD4FB41, 0x8BD5FB41, 0x8BD6FB41, 0x8BD7FB41, 0x8BD8FB41, 0x8BD9FB41, 0x8BDAFB41, 0x8BDBFB41, 0x8BDCFB41, + 0x8BDDFB41, 0x8BDEFB41, 0x8BDFFB41, 0x8BE0FB41, 0x8BE1FB41, 0x8BE2FB41, 0x8BE3FB41, 0x8BE4FB41, 0x8BE5FB41, 0x8BE6FB41, 0x8BE7FB41, 0x8BE8FB41, 0x8BE9FB41, 0x8BEAFB41, 0x8BEBFB41, + 0x8BECFB41, 0x8BEDFB41, 0x8BEEFB41, 0x8BEFFB41, 0x8BF0FB41, 0x8BF1FB41, 0x8BF2FB41, 0x8BF3FB41, 0x8BF4FB41, 0x8BF5FB41, 0x8BF6FB41, 0x8BF7FB41, 0x8BF8FB41, 0x8BF9FB41, 0x8BFAFB41, + 0x8BFBFB41, 0x8BFCFB41, 0x8BFDFB41, 0x8BFEFB41, 0x8BFFFB41, 0x8C00FB41, 0x8C01FB41, 0x8C02FB41, 0x8C03FB41, 0x8C04FB41, 0x8C05FB41, 0x8C06FB41, 0x8C07FB41, 0x8C08FB41, 0x8C09FB41, + 0x8C0AFB41, 0x8C0BFB41, 0x8C0CFB41, 0x8C0DFB41, 0x8C0EFB41, 0x8C0FFB41, 0x8C10FB41, 0x8C11FB41, 0x8C12FB41, 0x8C13FB41, 0x8C14FB41, 0x8C15FB41, 0x8C16FB41, 0x8C17FB41, 0x8C18FB41, + 0x8C19FB41, 0x8C1AFB41, 0x8C1BFB41, 0x8C1CFB41, 0x8C1DFB41, 0x8C1EFB41, 0x8C1FFB41, 0x8C20FB41, 0x8C21FB41, 0x8C22FB41, 0x8C23FB41, 0x8C24FB41, 0x8C25FB41, 0x8C26FB41, 0x8C27FB41, + 0x8C28FB41, 0x8C29FB41, 0x8C2AFB41, 0x8C2BFB41, 0x8C2CFB41, 0x8C2DFB41, 0x8C2EFB41, 0x8C2FFB41, 0x8C30FB41, 0x8C31FB41, 0x8C32FB41, 0x8C33FB41, 0x8C34FB41, 0x8C35FB41, 0x8C36FB41, + 0x8C37FB41, 0x8C38FB41, 0x8C39FB41, 0x8C3AFB41, 0x8C3BFB41, 0x8C3CFB41, 0x8C3DFB41, 0x8C3EFB41, 0x8C3FFB41, 0x8C40FB41, 0x8C41FB41, 0x8C42FB41, 0x8C43FB41, 0x8C44FB41, 0x8C45FB41, + 0x8C46FB41, 0x8C47FB41, 0x8C48FB41, 0x8C49FB41, 0x8C4AFB41, 0x8C4BFB41, 0x8C4CFB41, 0x8C4DFB41, 0x8C4EFB41, 0x8C4FFB41, 0x8C50FB41, 0x8C51FB41, 0x8C52FB41, 0x8C53FB41, 0x8C54FB41, + 0x8C55FB41, 0x8C56FB41, 0x8C57FB41, 0x8C58FB41, 0x8C59FB41, 0x8C5AFB41, 0x8C5BFB41, 0x8C5CFB41, 0x8C5DFB41, 0x8C5EFB41, 0x8C5FFB41, 0x8C60FB41, 0x8C61FB41, 0x8C62FB41, 0x8C63FB41, + 0x8C64FB41, 0x8C65FB41, 0x8C66FB41, 0x8C67FB41, 0x8C68FB41, 0x8C69FB41, 0x8C6AFB41, 0x8C6BFB41, 0x8C6CFB41, 0x8C6DFB41, 0x8C6EFB41, 0x8C6FFB41, 0x8C70FB41, 0x8C71FB41, 0x8C72FB41, + 0x8C73FB41, 0x8C74FB41, 0x8C75FB41, 0x8C76FB41, 0x8C77FB41, 0x8C78FB41, 0x8C79FB41, 0x8C7AFB41, 0x8C7BFB41, 0x8C7CFB41, 0x8C7DFB41, 0x8C7EFB41, 0x8C7FFB41, 0x8C80FB41, 0x8C81FB41, + 0x8C82FB41, 0x8C83FB41, 0x8C84FB41, 0x8C85FB41, 0x8C86FB41, 0x8C87FB41, 0x8C88FB41, 0x8C89FB41, 0x8C8AFB41, 0x8C8BFB41, 0x8C8CFB41, 0x8C8DFB41, 0x8C8EFB41, 0x8C8FFB41, 0x8C90FB41, + 0x8C91FB41, 0x8C92FB41, 0x8C93FB41, 0x8C94FB41, 0x8C95FB41, 0x8C96FB41, 0x8C97FB41, 0x8C98FB41, 0x8C99FB41, 0x8C9AFB41, 0x8C9BFB41, 0x8C9CFB41, 0x8C9DFB41, 0x8C9EFB41, 0x8C9FFB41, + 0x8CA0FB41, 0x8CA1FB41, 0x8CA2FB41, 0x8CA3FB41, 0x8CA4FB41, 0x8CA5FB41, 0x8CA6FB41, 0x8CA7FB41, 0x8CA8FB41, 0x8CA9FB41, 0x8CAAFB41, 0x8CABFB41, 0x8CACFB41, 0x8CADFB41, 0x8CAEFB41, + 0x8CAFFB41, 0x8CB0FB41, 0x8CB1FB41, 0x8CB2FB41, 0x8CB3FB41, 0x8CB4FB41, 0x8CB5FB41, 0x8CB6FB41, 0x8CB7FB41, 0x8CB8FB41, 0x8CB9FB41, 0x8CBAFB41, 0x8CBBFB41, 0x8CBCFB41, 0x8CBDFB41, + 0x8CBEFB41, 0x8CBFFB41, 0x8CC0FB41, 0x8CC1FB41, 0x8CC2FB41, 0x8CC3FB41, 0x8CC4FB41, 0x8CC5FB41, 0x8CC6FB41, 0x8CC7FB41, 0x8CC8FB41, 0x8CC9FB41, 0x8CCAFB41, 0x8CCBFB41, 0x8CCCFB41, + 0x8CCDFB41, 0x8CCEFB41, 0x8CCFFB41, 0x8CD0FB41, 0x8CD1FB41, 0x8CD2FB41, 0x8CD3FB41, 0x8CD4FB41, 0x8CD5FB41, 0x8CD6FB41, 0x8CD7FB41, 0x8CD8FB41, 0x8CD9FB41, 0x8CDAFB41, 0x8CDBFB41, + 0x8CDCFB41, 0x8CDDFB41, 0x8CDEFB41, 0x8CDFFB41, 0x8CE0FB41, 0x8CE1FB41, 0x8CE2FB41, 0x8CE3FB41, 0x8CE4FB41, 0x8CE5FB41, 0x8CE6FB41, 0x8CE7FB41, 0x8CE8FB41, 0x8CE9FB41, 0x8CEAFB41, + 0x8CEBFB41, 0x8CECFB41, 0x8CEDFB41, 0x8CEEFB41, 0x8CEFFB41, 0x8CF0FB41, 0x8CF1FB41, 0x8CF2FB41, 0x8CF3FB41, 0x8CF4FB41, 0x8CF5FB41, 0x8CF6FB41, 0x8CF7FB41, 0x8CF8FB41, 0x8CF9FB41, + 0x8CFAFB41, 0x8CFBFB41, 0x8CFCFB41, 0x8CFDFB41, 0x8CFEFB41, 0x8CFFFB41, 0x8D00FB41, 0x8D01FB41, 0x8D02FB41, 0x8D03FB41, 0x8D04FB41, 0x8D05FB41, 0x8D06FB41, 0x8D07FB41, 0x8D08FB41, + 0x8D09FB41, 0x8D0AFB41, 0x8D0BFB41, 0x8D0CFB41, 0x8D0DFB41, 0x8D0EFB41, 0x8D0FFB41, 0x8D10FB41, 0x8D11FB41, 0x8D12FB41, 0x8D13FB41, 0x8D14FB41, 0x8D15FB41, 0x8D16FB41, 0x8D17FB41, + 0x8D18FB41, 0x8D19FB41, 0x8D1AFB41, 0x8D1BFB41, 0x8D1CFB41, 0x8D1DFB41, 0x8D1EFB41, 0x8D1FFB41, 0x8D20FB41, 0x8D21FB41, 0x8D22FB41, 0x8D23FB41, 0x8D24FB41, 0x8D25FB41, 0x8D26FB41, + 0x8D27FB41, 0x8D28FB41, 0x8D29FB41, 0x8D2AFB41, 0x8D2BFB41, 0x8D2CFB41, 0x8D2DFB41, 0x8D2EFB41, 0x8D2FFB41, 0x8D30FB41, 0x8D31FB41, 0x8D32FB41, 0x8D33FB41, 0x8D34FB41, 0x8D35FB41, + 0x8D36FB41, 0x8D37FB41, 0x8D38FB41, 0x8D39FB41, 0x8D3AFB41, 0x8D3BFB41, 0x8D3CFB41, 0x8D3DFB41, 0x8D3EFB41, 0x8D3FFB41, 0x8D40FB41, 0x8D41FB41, 0x8D42FB41, 0x8D43FB41, 0x8D44FB41, + 0x8D45FB41, 0x8D46FB41, 0x8D47FB41, 0x8D48FB41, 0x8D49FB41, 0x8D4AFB41, 0x8D4BFB41, 0x8D4CFB41, 0x8D4DFB41, 0x8D4EFB41, 0x8D4FFB41, 0x8D50FB41, 0x8D51FB41, 0x8D52FB41, 0x8D53FB41, + 0x8D54FB41, 0x8D55FB41, 0x8D56FB41, 0x8D57FB41, 0x8D58FB41, 0x8D59FB41, 0x8D5AFB41, 0x8D5BFB41, 0x8D5CFB41, 0x8D5DFB41, 0x8D5EFB41, 0x8D5FFB41, 0x8D60FB41, 0x8D61FB41, 0x8D62FB41, + 0x8D63FB41, 0x8D64FB41, 0x8D65FB41, 0x8D66FB41, 0x8D67FB41, 0x8D68FB41, 0x8D69FB41, 0x8D6AFB41, 0x8D6BFB41, 0x8D6CFB41, 0x8D6DFB41, 0x8D6EFB41, 0x8D6FFB41, 0x8D70FB41, 0x8D71FB41, + 0x8D72FB41, 0x8D73FB41, 0x8D74FB41, 0x8D75FB41, 0x8D76FB41, 0x8D77FB41, 0x8D78FB41, 0x8D79FB41, 0x8D7AFB41, 0x8D7BFB41, 0x8D7CFB41, 0x8D7DFB41, 0x8D7EFB41, 0x8D7FFB41, 0x8D80FB41, + 0x8D81FB41, 0x8D82FB41, 0x8D83FB41, 0x8D84FB41, 0x8D85FB41, 0x8D86FB41, 0x8D87FB41, 0x8D88FB41, 0x8D89FB41, 0x8D8AFB41, 0x8D8BFB41, 0x8D8CFB41, 0x8D8DFB41, 0x8D8EFB41, 0x8D8FFB41, + 0x8D90FB41, 0x8D91FB41, 0x8D92FB41, 0x8D93FB41, 0x8D94FB41, 0x8D95FB41, 0x8D96FB41, 0x8D97FB41, 0x8D98FB41, 0x8D99FB41, 0x8D9AFB41, 0x8D9BFB41, 0x8D9CFB41, 0x8D9DFB41, 0x8D9EFB41, + 0x8D9FFB41, 0x8DA0FB41, 0x8DA1FB41, 0x8DA2FB41, 0x8DA3FB41, 0x8DA4FB41, 0x8DA5FB41, 0x8DA6FB41, 0x8DA7FB41, 0x8DA8FB41, 0x8DA9FB41, 0x8DAAFB41, 0x8DABFB41, 0x8DACFB41, 0x8DADFB41, + 0x8DAEFB41, 0x8DAFFB41, 0x8DB0FB41, 0x8DB1FB41, 0x8DB2FB41, 0x8DB3FB41, 0x8DB4FB41, 0x8DB5FB41, 0x8DB6FB41, 0x8DB7FB41, 0x8DB8FB41, 0x8DB9FB41, 0x8DBAFB41, 0x8DBBFB41, 0x8DBCFB41, + 0x8DBDFB41, 0x8DBEFB41, 0x8DBFFB41, 0x8DC0FB41, 0x8DC1FB41, 0x8DC2FB41, 0x8DC3FB41, 0x8DC4FB41, 0x8DC5FB41, 0x8DC6FB41, 0x8DC7FB41, 0x8DC8FB41, 0x8DC9FB41, 0x8DCAFB41, 0x8DCBFB41, + 0x8DCCFB41, 0x8DCDFB41, 0x8DCEFB41, 0x8DCFFB41, 0x8DD0FB41, 0x8DD1FB41, 0x8DD2FB41, 0x8DD3FB41, 0x8DD4FB41, 0x8DD5FB41, 0x8DD6FB41, 0x8DD7FB41, 0x8DD8FB41, 0x8DD9FB41, 0x8DDAFB41, + 0x8DDBFB41, 0x8DDCFB41, 0x8DDDFB41, 0x8DDEFB41, 0x8DDFFB41, 0x8DE0FB41, 0x8DE1FB41, 0x8DE2FB41, 0x8DE3FB41, 0x8DE4FB41, 0x8DE5FB41, 0x8DE6FB41, 0x8DE7FB41, 0x8DE8FB41, 0x8DE9FB41, + 0x8DEAFB41, 0x8DEBFB41, 0x8DECFB41, 0x8DEDFB41, 0x8DEEFB41, 0x8DEFFB41, 0x8DF0FB41, 0x8DF1FB41, 0x8DF2FB41, 0x8DF3FB41, 0x8DF4FB41, 0x8DF5FB41, 0x8DF6FB41, 0x8DF7FB41, 0x8DF8FB41, + 0x8DF9FB41, 0x8DFAFB41, 0x8DFBFB41, 0x8DFCFB41, 0x8DFDFB41, 0x8DFEFB41, 0x8DFFFB41, 0x8E00FB41, 0x8E01FB41, 0x8E02FB41, 0x8E03FB41, 0x8E04FB41, 0x8E05FB41, 0x8E06FB41, 0x8E07FB41, + 0x8E08FB41, 0x8E09FB41, 0x8E0AFB41, 0x8E0BFB41, 0x8E0CFB41, 0x8E0DFB41, 0x8E0EFB41, 0x8E0FFB41, 0x8E10FB41, 0x8E11FB41, 0x8E12FB41, 0x8E13FB41, 0x8E14FB41, 0x8E15FB41, 0x8E16FB41, + 0x8E17FB41, 0x8E18FB41, 0x8E19FB41, 0x8E1AFB41, 0x8E1BFB41, 0x8E1CFB41, 0x8E1DFB41, 0x8E1EFB41, 0x8E1FFB41, 0x8E20FB41, 0x8E21FB41, 0x8E22FB41, 0x8E23FB41, 0x8E24FB41, 0x8E25FB41, + 0x8E26FB41, 0x8E27FB41, 0x8E28FB41, 0x8E29FB41, 0x8E2AFB41, 0x8E2BFB41, 0x8E2CFB41, 0x8E2DFB41, 0x8E2EFB41, 0x8E2FFB41, 0x8E30FB41, 0x8E31FB41, 0x8E32FB41, 0x8E33FB41, 0x8E34FB41, + 0x8E35FB41, 0x8E36FB41, 0x8E37FB41, 0x8E38FB41, 0x8E39FB41, 0x8E3AFB41, 0x8E3BFB41, 0x8E3CFB41, 0x8E3DFB41, 0x8E3EFB41, 0x8E3FFB41, 0x8E40FB41, 0x8E41FB41, 0x8E42FB41, 0x8E43FB41, + 0x8E44FB41, 0x8E45FB41, 0x8E46FB41, 0x8E47FB41, 0x8E48FB41, 0x8E49FB41, 0x8E4AFB41, 0x8E4BFB41, 0x8E4CFB41, 0x8E4DFB41, 0x8E4EFB41, 0x8E4FFB41, 0x8E50FB41, 0x8E51FB41, 0x8E52FB41, + 0x8E53FB41, 0x8E54FB41, 0x8E55FB41, 0x8E56FB41, 0x8E57FB41, 0x8E58FB41, 0x8E59FB41, 0x8E5AFB41, 0x8E5BFB41, 0x8E5CFB41, 0x8E5DFB41, 0x8E5EFB41, 0x8E5FFB41, 0x8E60FB41, 0x8E61FB41, + 0x8E62FB41, 0x8E63FB41, 0x8E64FB41, 0x8E65FB41, 0x8E66FB41, 0x8E67FB41, 0x8E68FB41, 0x8E69FB41, 0x8E6AFB41, 0x8E6BFB41, 0x8E6CFB41, 0x8E6DFB41, 0x8E6EFB41, 0x8E6FFB41, 0x8E70FB41, + 0x8E71FB41, 0x8E72FB41, 0x8E73FB41, 0x8E74FB41, 0x8E75FB41, 0x8E76FB41, 0x8E77FB41, 0x8E78FB41, 0x8E79FB41, 0x8E7AFB41, 0x8E7BFB41, 0x8E7CFB41, 0x8E7DFB41, 0x8E7EFB41, 0x8E7FFB41, + 0x8E80FB41, 0x8E81FB41, 0x8E82FB41, 0x8E83FB41, 0x8E84FB41, 0x8E85FB41, 0x8E86FB41, 0x8E87FB41, 0x8E88FB41, 0x8E89FB41, 0x8E8AFB41, 0x8E8BFB41, 0x8E8CFB41, 0x8E8DFB41, 0x8E8EFB41, + 0x8E8FFB41, 0x8E90FB41, 0x8E91FB41, 0x8E92FB41, 0x8E93FB41, 0x8E94FB41, 0x8E95FB41, 0x8E96FB41, 0x8E97FB41, 0x8E98FB41, 0x8E99FB41, 0x8E9AFB41, 0x8E9BFB41, 0x8E9CFB41, 0x8E9DFB41, + 0x8E9EFB41, 0x8E9FFB41, 0x8EA0FB41, 0x8EA1FB41, 0x8EA2FB41, 0x8EA3FB41, 0x8EA4FB41, 0x8EA5FB41, 0x8EA6FB41, 0x8EA7FB41, 0x8EA8FB41, 0x8EA9FB41, 0x8EAAFB41, 0x8EABFB41, 0x8EACFB41, + 0x8EADFB41, 0x8EAEFB41, 0x8EAFFB41, 0x8EB0FB41, 0x8EB1FB41, 0x8EB2FB41, 0x8EB3FB41, 0x8EB4FB41, 0x8EB5FB41, 0x8EB6FB41, 0x8EB7FB41, 0x8EB8FB41, 0x8EB9FB41, 0x8EBAFB41, 0x8EBBFB41, + 0x8EBCFB41, 0x8EBDFB41, 0x8EBEFB41, 0x8EBFFB41, 0x8EC0FB41, 0x8EC1FB41, 0x8EC2FB41, 0x8EC3FB41, 0x8EC4FB41, 0x8EC5FB41, 0x8EC6FB41, 0x8EC7FB41, 0x8EC8FB41, 0x8EC9FB41, 0x8ECAFB41, + 0x8ECBFB41, 0x8ECCFB41, 0x8ECDFB41, 0x8ECEFB41, 0x8ECFFB41, 0x8ED0FB41, 0x8ED1FB41, 0x8ED2FB41, 0x8ED3FB41, 0x8ED4FB41, 0x8ED5FB41, 0x8ED6FB41, 0x8ED7FB41, 0x8ED8FB41, 0x8ED9FB41, + 0x8EDAFB41, 0x8EDBFB41, 0x8EDCFB41, 0x8EDDFB41, 0x8EDEFB41, 0x8EDFFB41, 0x8EE0FB41, 0x8EE1FB41, 0x8EE2FB41, 0x8EE3FB41, 0x8EE4FB41, 0x8EE5FB41, 0x8EE6FB41, 0x8EE7FB41, 0x8EE8FB41, + 0x8EE9FB41, 0x8EEAFB41, 0x8EEBFB41, 0x8EECFB41, 0x8EEDFB41, 0x8EEEFB41, 0x8EEFFB41, 0x8EF0FB41, 0x8EF1FB41, 0x8EF2FB41, 0x8EF3FB41, 0x8EF4FB41, 0x8EF5FB41, 0x8EF6FB41, 0x8EF7FB41, + 0x8EF8FB41, 0x8EF9FB41, 0x8EFAFB41, 0x8EFBFB41, 0x8EFCFB41, 0x8EFDFB41, 0x8EFEFB41, 0x8EFFFB41, 0x8F00FB41, 0x8F01FB41, 0x8F02FB41, 0x8F03FB41, 0x8F04FB41, 0x8F05FB41, 0x8F06FB41, + 0x8F07FB41, 0x8F08FB41, 0x8F09FB41, 0x8F0AFB41, 0x8F0BFB41, 0x8F0CFB41, 0x8F0DFB41, 0x8F0EFB41, 0x8F0FFB41, 0x8F10FB41, 0x8F11FB41, 0x8F12FB41, 0x8F13FB41, 0x8F14FB41, 0x8F15FB41, + 0x8F16FB41, 0x8F17FB41, 0x8F18FB41, 0x8F19FB41, 0x8F1AFB41, 0x8F1BFB41, 0x8F1CFB41, 0x8F1DFB41, 0x8F1EFB41, 0x8F1FFB41, 0x8F20FB41, 0x8F21FB41, 0x8F22FB41, 0x8F23FB41, 0x8F24FB41, + 0x8F25FB41, 0x8F26FB41, 0x8F27FB41, 0x8F28FB41, 0x8F29FB41, 0x8F2AFB41, 0x8F2BFB41, 0x8F2CFB41, 0x8F2DFB41, 0x8F2EFB41, 0x8F2FFB41, 0x8F30FB41, 0x8F31FB41, 0x8F32FB41, 0x8F33FB41, + 0x8F34FB41, 0x8F35FB41, 0x8F36FB41, 0x8F37FB41, 0x8F38FB41, 0x8F39FB41, 0x8F3AFB41, 0x8F3BFB41, 0x8F3CFB41, 0x8F3DFB41, 0x8F3EFB41, 0x8F3FFB41, 0x8F40FB41, 0x8F41FB41, 0x8F42FB41, + 0x8F43FB41, 0x8F44FB41, 0x8F45FB41, 0x8F46FB41, 0x8F47FB41, 0x8F48FB41, 0x8F49FB41, 0x8F4AFB41, 0x8F4BFB41, 0x8F4CFB41, 0x8F4DFB41, 0x8F4EFB41, 0x8F4FFB41, 0x8F50FB41, 0x8F51FB41, + 0x8F52FB41, 0x8F53FB41, 0x8F54FB41, 0x8F55FB41, 0x8F56FB41, 0x8F57FB41, 0x8F58FB41, 0x8F59FB41, 0x8F5AFB41, 0x8F5BFB41, 0x8F5CFB41, 0x8F5DFB41, 0x8F5EFB41, 0x8F5FFB41, 0x8F60FB41, + 0x8F61FB41, 0x8F62FB41, 0x8F63FB41, 0x8F64FB41, 0x8F65FB41, 0x8F66FB41, 0x8F67FB41, 0x8F68FB41, 0x8F69FB41, 0x8F6AFB41, 0x8F6BFB41, 0x8F6CFB41, 0x8F6DFB41, 0x8F6EFB41, 0x8F6FFB41, + 0x8F70FB41, 0x8F71FB41, 0x8F72FB41, 0x8F73FB41, 0x8F74FB41, 0x8F75FB41, 0x8F76FB41, 0x8F77FB41, 0x8F78FB41, 0x8F79FB41, 0x8F7AFB41, 0x8F7BFB41, 0x8F7CFB41, 0x8F7DFB41, 0x8F7EFB41, + 0x8F7FFB41, 0x8F80FB41, 0x8F81FB41, 0x8F82FB41, 0x8F83FB41, 0x8F84FB41, 0x8F85FB41, 0x8F86FB41, 0x8F87FB41, 0x8F88FB41, 0x8F89FB41, 0x8F8AFB41, 0x8F8BFB41, 0x8F8CFB41, 0x8F8DFB41, + 0x8F8EFB41, 0x8F8FFB41, 0x8F90FB41, 0x8F91FB41, 0x8F92FB41, 0x8F93FB41, 0x8F94FB41, 0x8F95FB41, 0x8F96FB41, 0x8F97FB41, 0x8F98FB41, 0x8F99FB41, 0x8F9AFB41, 0x8F9BFB41, 0x8F9CFB41, + 0x8F9DFB41, 0x8F9EFB41, 0x8F9FFB41, 0x8FA0FB41, 0x8FA1FB41, 0x8FA2FB41, 0x8FA3FB41, 0x8FA4FB41, 0x8FA5FB41, 0x8FA6FB41, 0x8FA7FB41, 0x8FA8FB41, 0x8FA9FB41, 0x8FAAFB41, 0x8FABFB41, + 0x8FACFB41, 0x8FADFB41, 0x8FAEFB41, 0x8FAFFB41, 0x8FB0FB41, 0x8FB1FB41, 0x8FB2FB41, 0x8FB3FB41, 0x8FB4FB41, 0x8FB5FB41, 0x8FB6FB41, 0x8FB7FB41, 0x8FB8FB41, 0x8FB9FB41, 0x8FBAFB41, + 0x8FBBFB41, 0x8FBCFB41, 0x8FBDFB41, 0x8FBEFB41, 0x8FBFFB41, 0x8FC0FB41, 0x8FC1FB41, 0x8FC2FB41, 0x8FC3FB41, 0x8FC4FB41, 0x8FC5FB41, 0x8FC6FB41, 0x8FC7FB41, 0x8FC8FB41, 0x8FC9FB41, + 0x8FCAFB41, 0x8FCBFB41, 0x8FCCFB41, 0x8FCDFB41, 0x8FCEFB41, 0x8FCFFB41, 0x8FD0FB41, 0x8FD1FB41, 0x8FD2FB41, 0x8FD3FB41, 0x8FD4FB41, 0x8FD5FB41, 0x8FD6FB41, 0x8FD7FB41, 0x8FD8FB41, + 0x8FD9FB41, 0x8FDAFB41, 0x8FDBFB41, 0x8FDCFB41, 0x8FDDFB41, 0x8FDEFB41, 0x8FDFFB41, 0x8FE0FB41, 0x8FE1FB41, 0x8FE2FB41, 0x8FE3FB41, 0x8FE4FB41, 0x8FE5FB41, 0x8FE6FB41, 0x8FE7FB41, + 0x8FE8FB41, 0x8FE9FB41, 0x8FEAFB41, 0x8FEBFB41, 0x8FECFB41, 0x8FEDFB41, 0x8FEEFB41, 0x8FEFFB41, 0x8FF0FB41, 0x8FF1FB41, 0x8FF2FB41, 0x8FF3FB41, 0x8FF4FB41, 0x8FF5FB41, 0x8FF6FB41, + 0x8FF7FB41, 0x8FF8FB41, 0x8FF9FB41, 0x8FFAFB41, 0x8FFBFB41, 0x8FFCFB41, 0x8FFDFB41, 0x8FFEFB41, 0x8FFFFB41, 0x9000FB41, 0x9001FB41, 0x9002FB41, 0x9003FB41, 0x9004FB41, 0x9005FB41, + 0x9006FB41, 0x9007FB41, 0x9008FB41, 0x9009FB41, 0x900AFB41, 0x900BFB41, 0x900CFB41, 0x900DFB41, 0x900EFB41, 0x900FFB41, 0x9010FB41, 0x9011FB41, 0x9012FB41, 0x9013FB41, 0x9014FB41, + 0x9015FB41, 0x9016FB41, 0x9017FB41, 0x9018FB41, 0x9019FB41, 0x901AFB41, 0x901BFB41, 0x901CFB41, 0x901DFB41, 0x901EFB41, 0x901FFB41, 0x9020FB41, 0x9021FB41, 0x9022FB41, 0x9023FB41, + 0x9024FB41, 0x9025FB41, 0x9026FB41, 0x9027FB41, 0x9028FB41, 0x9029FB41, 0x902AFB41, 0x902BFB41, 0x902CFB41, 0x902DFB41, 0x902EFB41, 0x902FFB41, 0x9030FB41, 0x9031FB41, 0x9032FB41, + 0x9033FB41, 0x9034FB41, 0x9035FB41, 0x9036FB41, 0x9037FB41, 0x9038FB41, 0x9039FB41, 0x903AFB41, 0x903BFB41, 0x903CFB41, 0x903DFB41, 0x903EFB41, 0x903FFB41, 0x9040FB41, 0x9041FB41, + 0x9042FB41, 0x9043FB41, 0x9044FB41, 0x9045FB41, 0x9046FB41, 0x9047FB41, 0x9048FB41, 0x9049FB41, 0x904AFB41, 0x904BFB41, 0x904CFB41, 0x904DFB41, 0x904EFB41, 0x904FFB41, 0x9050FB41, + 0x9051FB41, 0x9052FB41, 0x9053FB41, 0x9054FB41, 0x9055FB41, 0x9056FB41, 0x9057FB41, 0x9058FB41, 0x9059FB41, 0x905AFB41, 0x905BFB41, 0x905CFB41, 0x905DFB41, 0x905EFB41, 0x905FFB41, + 0x9060FB41, 0x9061FB41, 0x9062FB41, 0x9063FB41, 0x9064FB41, 0x9065FB41, 0x9066FB41, 0x9067FB41, 0x9068FB41, 0x9069FB41, 0x906AFB41, 0x906BFB41, 0x906CFB41, 0x906DFB41, 0x906EFB41, + 0x906FFB41, 0x9070FB41, 0x9071FB41, 0x9072FB41, 0x9073FB41, 0x9074FB41, 0x9075FB41, 0x9076FB41, 0x9077FB41, 0x9078FB41, 0x9079FB41, 0x907AFB41, 0x907BFB41, 0x907CFB41, 0x907DFB41, + 0x907EFB41, 0x907FFB41, 0x9080FB41, 0x9081FB41, 0x9082FB41, 0x9083FB41, 0x9084FB41, 0x9085FB41, 0x9086FB41, 0x9087FB41, 0x9088FB41, 0x9089FB41, 0x908AFB41, 0x908BFB41, 0x908CFB41, + 0x908DFB41, 0x908EFB41, 0x908FFB41, 0x9090FB41, 0x9091FB41, 0x9092FB41, 0x9093FB41, 0x9094FB41, 0x9095FB41, 0x9096FB41, 0x9097FB41, 0x9098FB41, 0x9099FB41, 0x909AFB41, 0x909BFB41, + 0x909CFB41, 0x909DFB41, 0x909EFB41, 0x909FFB41, 0x90A0FB41, 0x90A1FB41, 0x90A2FB41, 0x90A3FB41, 0x90A4FB41, 0x90A5FB41, 0x90A6FB41, 0x90A7FB41, 0x90A8FB41, 0x90A9FB41, 0x90AAFB41, + 0x90ABFB41, 0x90ACFB41, 0x90ADFB41, 0x90AEFB41, 0x90AFFB41, 0x90B0FB41, 0x90B1FB41, 0x90B2FB41, 0x90B3FB41, 0x90B4FB41, 0x90B5FB41, 0x90B6FB41, 0x90B7FB41, 0x90B8FB41, 0x90B9FB41, + 0x90BAFB41, 0x90BBFB41, 0x90BCFB41, 0x90BDFB41, 0x90BEFB41, 0x90BFFB41, 0x90C0FB41, 0x90C1FB41, 0x90C2FB41, 0x90C3FB41, 0x90C4FB41, 0x90C5FB41, 0x90C6FB41, 0x90C7FB41, 0x90C8FB41, + 0x90C9FB41, 0x90CAFB41, 0x90CBFB41, 0x90CCFB41, 0x90CDFB41, 0x90CEFB41, 0x90CFFB41, 0x90D0FB41, 0x90D1FB41, 0x90D2FB41, 0x90D3FB41, 0x90D4FB41, 0x90D5FB41, 0x90D6FB41, 0x90D7FB41, + 0x90D8FB41, 0x90D9FB41, 0x90DAFB41, 0x90DBFB41, 0x90DCFB41, 0x90DDFB41, 0x90DEFB41, 0x90DFFB41, 0x90E0FB41, 0x90E1FB41, 0x90E2FB41, 0x90E3FB41, 0x90E4FB41, 0x90E5FB41, 0x90E6FB41, + 0x90E7FB41, 0x90E8FB41, 0x90E9FB41, 0x90EAFB41, 0x90EBFB41, 0x90ECFB41, 0x90EDFB41, 0x90EEFB41, 0x90EFFB41, 0x90F0FB41, 0x90F1FB41, 0x90F2FB41, 0x90F3FB41, 0x90F4FB41, 0x90F5FB41, + 0x90F6FB41, 0x90F7FB41, 0x90F8FB41, 0x90F9FB41, 0x90FAFB41, 0x90FBFB41, 0x90FCFB41, 0x90FDFB41, 0x90FEFB41, 0x90FFFB41, 0x9100FB41, 0x9101FB41, 0x9102FB41, 0x9103FB41, 0x9104FB41, + 0x9105FB41, 0x9106FB41, 0x9107FB41, 0x9108FB41, 0x9109FB41, 0x910AFB41, 0x910BFB41, 0x910CFB41, 0x910DFB41, 0x910EFB41, 0x910FFB41, 0x9110FB41, 0x9111FB41, 0x9112FB41, 0x9113FB41, + 0x9114FB41, 0x9115FB41, 0x9116FB41, 0x9117FB41, 0x9118FB41, 0x9119FB41, 0x911AFB41, 0x911BFB41, 0x911CFB41, 0x911DFB41, 0x911EFB41, 0x911FFB41, 0x9120FB41, 0x9121FB41, 0x9122FB41, + 0x9123FB41, 0x9124FB41, 0x9125FB41, 0x9126FB41, 0x9127FB41, 0x9128FB41, 0x9129FB41, 0x912AFB41, 0x912BFB41, 0x912CFB41, 0x912DFB41, 0x912EFB41, 0x912FFB41, 0x9130FB41, 0x9131FB41, + 0x9132FB41, 0x9133FB41, 0x9134FB41, 0x9135FB41, 0x9136FB41, 0x9137FB41, 0x9138FB41, 0x9139FB41, 0x913AFB41, 0x913BFB41, 0x913CFB41, 0x913DFB41, 0x913EFB41, 0x913FFB41, 0x9140FB41, + 0x9141FB41, 0x9142FB41, 0x9143FB41, 0x9144FB41, 0x9145FB41, 0x9146FB41, 0x9147FB41, 0x9148FB41, 0x9149FB41, 0x914AFB41, 0x914BFB41, 0x914CFB41, 0x914DFB41, 0x914EFB41, 0x914FFB41, + 0x9150FB41, 0x9151FB41, 0x9152FB41, 0x9153FB41, 0x9154FB41, 0x9155FB41, 0x9156FB41, 0x9157FB41, 0x9158FB41, 0x9159FB41, 0x915AFB41, 0x915BFB41, 0x915CFB41, 0x915DFB41, 0x915EFB41, + 0x915FFB41, 0x9160FB41, 0x9161FB41, 0x9162FB41, 0x9163FB41, 0x9164FB41, 0x9165FB41, 0x9166FB41, 0x9167FB41, 0x9168FB41, 0x9169FB41, 0x916AFB41, 0x916BFB41, 0x916CFB41, 0x916DFB41, + 0x916EFB41, 0x916FFB41, 0x9170FB41, 0x9171FB41, 0x9172FB41, 0x9173FB41, 0x9174FB41, 0x9175FB41, 0x9176FB41, 0x9177FB41, 0x9178FB41, 0x9179FB41, 0x917AFB41, 0x917BFB41, 0x917CFB41, + 0x917DFB41, 0x917EFB41, 0x917FFB41, 0x9180FB41, 0x9181FB41, 0x9182FB41, 0x9183FB41, 0x9184FB41, 0x9185FB41, 0x9186FB41, 0x9187FB41, 0x9188FB41, 0x9189FB41, 0x918AFB41, 0x918BFB41, + 0x918CFB41, 0x918DFB41, 0x918EFB41, 0x918FFB41, 0x9190FB41, 0x9191FB41, 0x9192FB41, 0x9193FB41, 0x9194FB41, 0x9195FB41, 0x9196FB41, 0x9197FB41, 0x9198FB41, 0x9199FB41, 0x919AFB41, + 0x919BFB41, 0x919CFB41, 0x919DFB41, 0x919EFB41, 0x919FFB41, 0x91A0FB41, 0x91A1FB41, 0x91A2FB41, 0x91A3FB41, 0x91A4FB41, 0x91A5FB41, 0x91A6FB41, 0x91A7FB41, 0x91A8FB41, 0x91A9FB41, + 0x91AAFB41, 0x91ABFB41, 0x91ACFB41, 0x91ADFB41, 0x91AEFB41, 0x91AFFB41, 0x91B0FB41, 0x91B1FB41, 0x91B2FB41, 0x91B3FB41, 0x91B4FB41, 0x91B5FB41, 0x91B6FB41, 0x91B7FB41, 0x91B8FB41, + 0x91B9FB41, 0x91BAFB41, 0x91BBFB41, 0x91BCFB41, 0x91BDFB41, 0x91BEFB41, 0x91BFFB41, 0x91C0FB41, 0x91C1FB41, 0x91C2FB41, 0x91C3FB41, 0x91C4FB41, 0x91C5FB41, 0x91C6FB41, 0x91C7FB41, + 0x91C8FB41, 0x91C9FB41, 0x91CAFB41, 0x91CBFB41, 0x91CCFB41, 0x91CDFB41, 0x91CEFB41, 0x91CFFB41, 0x91D0FB41, 0x91D1FB41, 0x91D2FB41, 0x91D3FB41, 0x91D4FB41, 0x91D5FB41, 0x91D6FB41, + 0x91D7FB41, 0x91D8FB41, 0x91D9FB41, 0x91DAFB41, 0x91DBFB41, 0x91DCFB41, 0x91DDFB41, 0x91DEFB41, 0x91DFFB41, 0x91E0FB41, 0x91E1FB41, 0x91E2FB41, 0x91E3FB41, 0x91E4FB41, 0x91E5FB41, + 0x91E6FB41, 0x91E7FB41, 0x91E8FB41, 0x91E9FB41, 0x91EAFB41, 0x91EBFB41, 0x91ECFB41, 0x91EDFB41, 0x91EEFB41, 0x91EFFB41, 0x91F0FB41, 0x91F1FB41, 0x91F2FB41, 0x91F3FB41, 0x91F4FB41, + 0x91F5FB41, 0x91F6FB41, 0x91F7FB41, 0x91F8FB41, 0x91F9FB41, 0x91FAFB41, 0x91FBFB41, 0x91FCFB41, 0x91FDFB41, 0x91FEFB41, 0x91FFFB41, 0x9200FB41, 0x9201FB41, 0x9202FB41, 0x9203FB41, + 0x9204FB41, 0x9205FB41, 0x9206FB41, 0x9207FB41, 0x9208FB41, 0x9209FB41, 0x920AFB41, 0x920BFB41, 0x920CFB41, 0x920DFB41, 0x920EFB41, 0x920FFB41, 0x9210FB41, 0x9211FB41, 0x9212FB41, + 0x9213FB41, 0x9214FB41, 0x9215FB41, 0x9216FB41, 0x9217FB41, 0x9218FB41, 0x9219FB41, 0x921AFB41, 0x921BFB41, 0x921CFB41, 0x921DFB41, 0x921EFB41, 0x921FFB41, 0x9220FB41, 0x9221FB41, + 0x9222FB41, 0x9223FB41, 0x9224FB41, 0x9225FB41, 0x9226FB41, 0x9227FB41, 0x9228FB41, 0x9229FB41, 0x922AFB41, 0x922BFB41, 0x922CFB41, 0x922DFB41, 0x922EFB41, 0x922FFB41, 0x9230FB41, + 0x9231FB41, 0x9232FB41, 0x9233FB41, 0x9234FB41, 0x9235FB41, 0x9236FB41, 0x9237FB41, 0x9238FB41, 0x9239FB41, 0x923AFB41, 0x923BFB41, 0x923CFB41, 0x923DFB41, 0x923EFB41, 0x923FFB41, + 0x9240FB41, 0x9241FB41, 0x9242FB41, 0x9243FB41, 0x9244FB41, 0x9245FB41, 0x9246FB41, 0x9247FB41, 0x9248FB41, 0x9249FB41, 0x924AFB41, 0x924BFB41, 0x924CFB41, 0x924DFB41, 0x924EFB41, + 0x924FFB41, 0x9250FB41, 0x9251FB41, 0x9252FB41, 0x9253FB41, 0x9254FB41, 0x9255FB41, 0x9256FB41, 0x9257FB41, 0x9258FB41, 0x9259FB41, 0x925AFB41, 0x925BFB41, 0x925CFB41, 0x925DFB41, + 0x925EFB41, 0x925FFB41, 0x9260FB41, 0x9261FB41, 0x9262FB41, 0x9263FB41, 0x9264FB41, 0x9265FB41, 0x9266FB41, 0x9267FB41, 0x9268FB41, 0x9269FB41, 0x926AFB41, 0x926BFB41, 0x926CFB41, + 0x926DFB41, 0x926EFB41, 0x926FFB41, 0x9270FB41, 0x9271FB41, 0x9272FB41, 0x9273FB41, 0x9274FB41, 0x9275FB41, 0x9276FB41, 0x9277FB41, 0x9278FB41, 0x9279FB41, 0x927AFB41, 0x927BFB41, + 0x927CFB41, 0x927DFB41, 0x927EFB41, 0x927FFB41, 0x9280FB41, 0x9281FB41, 0x9282FB41, 0x9283FB41, 0x9284FB41, 0x9285FB41, 0x9286FB41, 0x9287FB41, 0x9288FB41, 0x9289FB41, 0x928AFB41, + 0x928BFB41, 0x928CFB41, 0x928DFB41, 0x928EFB41, 0x928FFB41, 0x9290FB41, 0x9291FB41, 0x9292FB41, 0x9293FB41, 0x9294FB41, 0x9295FB41, 0x9296FB41, 0x9297FB41, 0x9298FB41, 0x9299FB41, + 0x929AFB41, 0x929BFB41, 0x929CFB41, 0x929DFB41, 0x929EFB41, 0x929FFB41, 0x92A0FB41, 0x92A1FB41, 0x92A2FB41, 0x92A3FB41, 0x92A4FB41, 0x92A5FB41, 0x92A6FB41, 0x92A7FB41, 0x92A8FB41, + 0x92A9FB41, 0x92AAFB41, 0x92ABFB41, 0x92ACFB41, 0x92ADFB41, 0x92AEFB41, 0x92AFFB41, 0x92B0FB41, 0x92B1FB41, 0x92B2FB41, 0x92B3FB41, 0x92B4FB41, 0x92B5FB41, 0x92B6FB41, 0x92B7FB41, + 0x92B8FB41, 0x92B9FB41, 0x92BAFB41, 0x92BBFB41, 0x92BCFB41, 0x92BDFB41, 0x92BEFB41, 0x92BFFB41, 0x92C0FB41, 0x92C1FB41, 0x92C2FB41, 0x92C3FB41, 0x92C4FB41, 0x92C5FB41, 0x92C6FB41, + 0x92C7FB41, 0x92C8FB41, 0x92C9FB41, 0x92CAFB41, 0x92CBFB41, 0x92CCFB41, 0x92CDFB41, 0x92CEFB41, 0x92CFFB41, 0x92D0FB41, 0x92D1FB41, 0x92D2FB41, 0x92D3FB41, 0x92D4FB41, 0x92D5FB41, + 0x92D6FB41, 0x92D7FB41, 0x92D8FB41, 0x92D9FB41, 0x92DAFB41, 0x92DBFB41, 0x92DCFB41, 0x92DDFB41, 0x92DEFB41, 0x92DFFB41, 0x92E0FB41, 0x92E1FB41, 0x92E2FB41, 0x92E3FB41, 0x92E4FB41, + 0x92E5FB41, 0x92E6FB41, 0x92E7FB41, 0x92E8FB41, 0x92E9FB41, 0x92EAFB41, 0x92EBFB41, 0x92ECFB41, 0x92EDFB41, 0x92EEFB41, 0x92EFFB41, 0x92F0FB41, 0x92F1FB41, 0x92F2FB41, 0x92F3FB41, + 0x92F4FB41, 0x92F5FB41, 0x92F6FB41, 0x92F7FB41, 0x92F8FB41, 0x92F9FB41, 0x92FAFB41, 0x92FBFB41, 0x92FCFB41, 0x92FDFB41, 0x92FEFB41, 0x92FFFB41, 0x9300FB41, 0x9301FB41, 0x9302FB41, + 0x9303FB41, 0x9304FB41, 0x9305FB41, 0x9306FB41, 0x9307FB41, 0x9308FB41, 0x9309FB41, 0x930AFB41, 0x930BFB41, 0x930CFB41, 0x930DFB41, 0x930EFB41, 0x930FFB41, 0x9310FB41, 0x9311FB41, + 0x9312FB41, 0x9313FB41, 0x9314FB41, 0x9315FB41, 0x9316FB41, 0x9317FB41, 0x9318FB41, 0x9319FB41, 0x931AFB41, 0x931BFB41, 0x931CFB41, 0x931DFB41, 0x931EFB41, 0x931FFB41, 0x9320FB41, + 0x9321FB41, 0x9322FB41, 0x9323FB41, 0x9324FB41, 0x9325FB41, 0x9326FB41, 0x9327FB41, 0x9328FB41, 0x9329FB41, 0x932AFB41, 0x932BFB41, 0x932CFB41, 0x932DFB41, 0x932EFB41, 0x932FFB41, + 0x9330FB41, 0x9331FB41, 0x9332FB41, 0x9333FB41, 0x9334FB41, 0x9335FB41, 0x9336FB41, 0x9337FB41, 0x9338FB41, 0x9339FB41, 0x933AFB41, 0x933BFB41, 0x933CFB41, 0x933DFB41, 0x933EFB41, + 0x933FFB41, 0x9340FB41, 0x9341FB41, 0x9342FB41, 0x9343FB41, 0x9344FB41, 0x9345FB41, 0x9346FB41, 0x9347FB41, 0x9348FB41, 0x9349FB41, 0x934AFB41, 0x934BFB41, 0x934CFB41, 0x934DFB41, + 0x934EFB41, 0x934FFB41, 0x9350FB41, 0x9351FB41, 0x9352FB41, 0x9353FB41, 0x9354FB41, 0x9355FB41, 0x9356FB41, 0x9357FB41, 0x9358FB41, 0x9359FB41, 0x935AFB41, 0x935BFB41, 0x935CFB41, + 0x935DFB41, 0x935EFB41, 0x935FFB41, 0x9360FB41, 0x9361FB41, 0x9362FB41, 0x9363FB41, 0x9364FB41, 0x9365FB41, 0x9366FB41, 0x9367FB41, 0x9368FB41, 0x9369FB41, 0x936AFB41, 0x936BFB41, + 0x936CFB41, 0x936DFB41, 0x936EFB41, 0x936FFB41, 0x9370FB41, 0x9371FB41, 0x9372FB41, 0x9373FB41, 0x9374FB41, 0x9375FB41, 0x9376FB41, 0x9377FB41, 0x9378FB41, 0x9379FB41, 0x937AFB41, + 0x937BFB41, 0x937CFB41, 0x937DFB41, 0x937EFB41, 0x937FFB41, 0x9380FB41, 0x9381FB41, 0x9382FB41, 0x9383FB41, 0x9384FB41, 0x9385FB41, 0x9386FB41, 0x9387FB41, 0x9388FB41, 0x9389FB41, + 0x938AFB41, 0x938BFB41, 0x938CFB41, 0x938DFB41, 0x938EFB41, 0x938FFB41, 0x9390FB41, 0x9391FB41, 0x9392FB41, 0x9393FB41, 0x9394FB41, 0x9395FB41, 0x9396FB41, 0x9397FB41, 0x9398FB41, + 0x9399FB41, 0x939AFB41, 0x939BFB41, 0x939CFB41, 0x939DFB41, 0x939EFB41, 0x939FFB41, 0x93A0FB41, 0x93A1FB41, 0x93A2FB41, 0x93A3FB41, 0x93A4FB41, 0x93A5FB41, 0x93A6FB41, 0x93A7FB41, + 0x93A8FB41, 0x93A9FB41, 0x93AAFB41, 0x93ABFB41, 0x93ACFB41, 0x93ADFB41, 0x93AEFB41, 0x93AFFB41, 0x93B0FB41, 0x93B1FB41, 0x93B2FB41, 0x93B3FB41, 0x93B4FB41, 0x93B5FB41, 0x93B6FB41, + 0x93B7FB41, 0x93B8FB41, 0x93B9FB41, 0x93BAFB41, 0x93BBFB41, 0x93BCFB41, 0x93BDFB41, 0x93BEFB41, 0x93BFFB41, 0x93C0FB41, 0x93C1FB41, 0x93C2FB41, 0x93C3FB41, 0x93C4FB41, 0x93C5FB41, + 0x93C6FB41, 0x93C7FB41, 0x93C8FB41, 0x93C9FB41, 0x93CAFB41, 0x93CBFB41, 0x93CCFB41, 0x93CDFB41, 0x93CEFB41, 0x93CFFB41, 0x93D0FB41, 0x93D1FB41, 0x93D2FB41, 0x93D3FB41, 0x93D4FB41, + 0x93D5FB41, 0x93D6FB41, 0x93D7FB41, 0x93D8FB41, 0x93D9FB41, 0x93DAFB41, 0x93DBFB41, 0x93DCFB41, 0x93DDFB41, 0x93DEFB41, 0x93DFFB41, 0x93E0FB41, 0x93E1FB41, 0x93E2FB41, 0x93E3FB41, + 0x93E4FB41, 0x93E5FB41, 0x93E6FB41, 0x93E7FB41, 0x93E8FB41, 0x93E9FB41, 0x93EAFB41, 0x93EBFB41, 0x93ECFB41, 0x93EDFB41, 0x93EEFB41, 0x93EFFB41, 0x93F0FB41, 0x93F1FB41, 0x93F2FB41, + 0x93F3FB41, 0x93F4FB41, 0x93F5FB41, 0x93F6FB41, 0x93F7FB41, 0x93F8FB41, 0x93F9FB41, 0x93FAFB41, 0x93FBFB41, 0x93FCFB41, 0x93FDFB41, 0x93FEFB41, 0x93FFFB41, 0x9400FB41, 0x9401FB41, + 0x9402FB41, 0x9403FB41, 0x9404FB41, 0x9405FB41, 0x9406FB41, 0x9407FB41, 0x9408FB41, 0x9409FB41, 0x940AFB41, 0x940BFB41, 0x940CFB41, 0x940DFB41, 0x940EFB41, 0x940FFB41, 0x9410FB41, + 0x9411FB41, 0x9412FB41, 0x9413FB41, 0x9414FB41, 0x9415FB41, 0x9416FB41, 0x9417FB41, 0x9418FB41, 0x9419FB41, 0x941AFB41, 0x941BFB41, 0x941CFB41, 0x941DFB41, 0x941EFB41, 0x941FFB41, + 0x9420FB41, 0x9421FB41, 0x9422FB41, 0x9423FB41, 0x9424FB41, 0x9425FB41, 0x9426FB41, 0x9427FB41, 0x9428FB41, 0x9429FB41, 0x942AFB41, 0x942BFB41, 0x942CFB41, 0x942DFB41, 0x942EFB41, + 0x942FFB41, 0x9430FB41, 0x9431FB41, 0x9432FB41, 0x9433FB41, 0x9434FB41, 0x9435FB41, 0x9436FB41, 0x9437FB41, 0x9438FB41, 0x9439FB41, 0x943AFB41, 0x943BFB41, 0x943CFB41, 0x943DFB41, + 0x943EFB41, 0x943FFB41, 0x9440FB41, 0x9441FB41, 0x9442FB41, 0x9443FB41, 0x9444FB41, 0x9445FB41, 0x9446FB41, 0x9447FB41, 0x9448FB41, 0x9449FB41, 0x944AFB41, 0x944BFB41, 0x944CFB41, + 0x944DFB41, 0x944EFB41, 0x944FFB41, 0x9450FB41, 0x9451FB41, 0x9452FB41, 0x9453FB41, 0x9454FB41, 0x9455FB41, 0x9456FB41, 0x9457FB41, 0x9458FB41, 0x9459FB41, 0x945AFB41, 0x945BFB41, + 0x945CFB41, 0x945DFB41, 0x945EFB41, 0x945FFB41, 0x9460FB41, 0x9461FB41, 0x9462FB41, 0x9463FB41, 0x9464FB41, 0x9465FB41, 0x9466FB41, 0x9467FB41, 0x9468FB41, 0x9469FB41, 0x946AFB41, + 0x946BFB41, 0x946CFB41, 0x946DFB41, 0x946EFB41, 0x946FFB41, 0x9470FB41, 0x9471FB41, 0x9472FB41, 0x9473FB41, 0x9474FB41, 0x9475FB41, 0x9476FB41, 0x9477FB41, 0x9478FB41, 0x9479FB41, + 0x947AFB41, 0x947BFB41, 0x947CFB41, 0x947DFB41, 0x947EFB41, 0x947FFB41, 0x9480FB41, 0x9481FB41, 0x9482FB41, 0x9483FB41, 0x9484FB41, 0x9485FB41, 0x9486FB41, 0x9487FB41, 0x9488FB41, + 0x9489FB41, 0x948AFB41, 0x948BFB41, 0x948CFB41, 0x948DFB41, 0x948EFB41, 0x948FFB41, 0x9490FB41, 0x9491FB41, 0x9492FB41, 0x9493FB41, 0x9494FB41, 0x9495FB41, 0x9496FB41, 0x9497FB41, + 0x9498FB41, 0x9499FB41, 0x949AFB41, 0x949BFB41, 0x949CFB41, 0x949DFB41, 0x949EFB41, 0x949FFB41, 0x94A0FB41, 0x94A1FB41, 0x94A2FB41, 0x94A3FB41, 0x94A4FB41, 0x94A5FB41, 0x94A6FB41, + 0x94A7FB41, 0x94A8FB41, 0x94A9FB41, 0x94AAFB41, 0x94ABFB41, 0x94ACFB41, 0x94ADFB41, 0x94AEFB41, 0x94AFFB41, 0x94B0FB41, 0x94B1FB41, 0x94B2FB41, 0x94B3FB41, 0x94B4FB41, 0x94B5FB41, + 0x94B6FB41, 0x94B7FB41, 0x94B8FB41, 0x94B9FB41, 0x94BAFB41, 0x94BBFB41, 0x94BCFB41, 0x94BDFB41, 0x94BEFB41, 0x94BFFB41, 0x94C0FB41, 0x94C1FB41, 0x94C2FB41, 0x94C3FB41, 0x94C4FB41, + 0x94C5FB41, 0x94C6FB41, 0x94C7FB41, 0x94C8FB41, 0x94C9FB41, 0x94CAFB41, 0x94CBFB41, 0x94CCFB41, 0x94CDFB41, 0x94CEFB41, 0x94CFFB41, 0x94D0FB41, 0x94D1FB41, 0x94D2FB41, 0x94D3FB41, + 0x94D4FB41, 0x94D5FB41, 0x94D6FB41, 0x94D7FB41, 0x94D8FB41, 0x94D9FB41, 0x94DAFB41, 0x94DBFB41, 0x94DCFB41, 0x94DDFB41, 0x94DEFB41, 0x94DFFB41, 0x94E0FB41, 0x94E1FB41, 0x94E2FB41, + 0x94E3FB41, 0x94E4FB41, 0x94E5FB41, 0x94E6FB41, 0x94E7FB41, 0x94E8FB41, 0x94E9FB41, 0x94EAFB41, 0x94EBFB41, 0x94ECFB41, 0x94EDFB41, 0x94EEFB41, 0x94EFFB41, 0x94F0FB41, 0x94F1FB41, + 0x94F2FB41, 0x94F3FB41, 0x94F4FB41, 0x94F5FB41, 0x94F6FB41, 0x94F7FB41, 0x94F8FB41, 0x94F9FB41, 0x94FAFB41, 0x94FBFB41, 0x94FCFB41, 0x94FDFB41, 0x94FEFB41, 0x94FFFB41, 0x9500FB41, + 0x9501FB41, 0x9502FB41, 0x9503FB41, 0x9504FB41, 0x9505FB41, 0x9506FB41, 0x9507FB41, 0x9508FB41, 0x9509FB41, 0x950AFB41, 0x950BFB41, 0x950CFB41, 0x950DFB41, 0x950EFB41, 0x950FFB41, + 0x9510FB41, 0x9511FB41, 0x9512FB41, 0x9513FB41, 0x9514FB41, 0x9515FB41, 0x9516FB41, 0x9517FB41, 0x9518FB41, 0x9519FB41, 0x951AFB41, 0x951BFB41, 0x951CFB41, 0x951DFB41, 0x951EFB41, + 0x951FFB41, 0x9520FB41, 0x9521FB41, 0x9522FB41, 0x9523FB41, 0x9524FB41, 0x9525FB41, 0x9526FB41, 0x9527FB41, 0x9528FB41, 0x9529FB41, 0x952AFB41, 0x952BFB41, 0x952CFB41, 0x952DFB41, + 0x952EFB41, 0x952FFB41, 0x9530FB41, 0x9531FB41, 0x9532FB41, 0x9533FB41, 0x9534FB41, 0x9535FB41, 0x9536FB41, 0x9537FB41, 0x9538FB41, 0x9539FB41, 0x953AFB41, 0x953BFB41, 0x953CFB41, + 0x953DFB41, 0x953EFB41, 0x953FFB41, 0x9540FB41, 0x9541FB41, 0x9542FB41, 0x9543FB41, 0x9544FB41, 0x9545FB41, 0x9546FB41, 0x9547FB41, 0x9548FB41, 0x9549FB41, 0x954AFB41, 0x954BFB41, + 0x954CFB41, 0x954DFB41, 0x954EFB41, 0x954FFB41, 0x9550FB41, 0x9551FB41, 0x9552FB41, 0x9553FB41, 0x9554FB41, 0x9555FB41, 0x9556FB41, 0x9557FB41, 0x9558FB41, 0x9559FB41, 0x955AFB41, + 0x955BFB41, 0x955CFB41, 0x955DFB41, 0x955EFB41, 0x955FFB41, 0x9560FB41, 0x9561FB41, 0x9562FB41, 0x9563FB41, 0x9564FB41, 0x9565FB41, 0x9566FB41, 0x9567FB41, 0x9568FB41, 0x9569FB41, + 0x956AFB41, 0x956BFB41, 0x956CFB41, 0x956DFB41, 0x956EFB41, 0x956FFB41, 0x9570FB41, 0x9571FB41, 0x9572FB41, 0x9573FB41, 0x9574FB41, 0x9575FB41, 0x9576FB41, 0x9577FB41, 0x9578FB41, + 0x9579FB41, 0x957AFB41, 0x957BFB41, 0x957CFB41, 0x957DFB41, 0x957EFB41, 0x957FFB41, 0x9580FB41, 0x9581FB41, 0x9582FB41, 0x9583FB41, 0x9584FB41, 0x9585FB41, 0x9586FB41, 0x9587FB41, + 0x9588FB41, 0x9589FB41, 0x958AFB41, 0x958BFB41, 0x958CFB41, 0x958DFB41, 0x958EFB41, 0x958FFB41, 0x9590FB41, 0x9591FB41, 0x9592FB41, 0x9593FB41, 0x9594FB41, 0x9595FB41, 0x9596FB41, + 0x9597FB41, 0x9598FB41, 0x9599FB41, 0x959AFB41, 0x959BFB41, 0x959CFB41, 0x959DFB41, 0x959EFB41, 0x959FFB41, 0x95A0FB41, 0x95A1FB41, 0x95A2FB41, 0x95A3FB41, 0x95A4FB41, 0x95A5FB41, + 0x95A6FB41, 0x95A7FB41, 0x95A8FB41, 0x95A9FB41, 0x95AAFB41, 0x95ABFB41, 0x95ACFB41, 0x95ADFB41, 0x95AEFB41, 0x95AFFB41, 0x95B0FB41, 0x95B1FB41, 0x95B2FB41, 0x95B3FB41, 0x95B4FB41, + 0x95B5FB41, 0x95B6FB41, 0x95B7FB41, 0x95B8FB41, 0x95B9FB41, 0x95BAFB41, 0x95BBFB41, 0x95BCFB41, 0x95BDFB41, 0x95BEFB41, 0x95BFFB41, 0x95C0FB41, 0x95C1FB41, 0x95C2FB41, 0x95C3FB41, + 0x95C4FB41, 0x95C5FB41, 0x95C6FB41, 0x95C7FB41, 0x95C8FB41, 0x95C9FB41, 0x95CAFB41, 0x95CBFB41, 0x95CCFB41, 0x95CDFB41, 0x95CEFB41, 0x95CFFB41, 0x95D0FB41, 0x95D1FB41, 0x95D2FB41, + 0x95D3FB41, 0x95D4FB41, 0x95D5FB41, 0x95D6FB41, 0x95D7FB41, 0x95D8FB41, 0x95D9FB41, 0x95DAFB41, 0x95DBFB41, 0x95DCFB41, 0x95DDFB41, 0x95DEFB41, 0x95DFFB41, 0x95E0FB41, 0x95E1FB41, + 0x95E2FB41, 0x95E3FB41, 0x95E4FB41, 0x95E5FB41, 0x95E6FB41, 0x95E7FB41, 0x95E8FB41, 0x95E9FB41, 0x95EAFB41, 0x95EBFB41, 0x95ECFB41, 0x95EDFB41, 0x95EEFB41, 0x95EFFB41, 0x95F0FB41, + 0x95F1FB41, 0x95F2FB41, 0x95F3FB41, 0x95F4FB41, 0x95F5FB41, 0x95F6FB41, 0x95F7FB41, 0x95F8FB41, 0x95F9FB41, 0x95FAFB41, 0x95FBFB41, 0x95FCFB41, 0x95FDFB41, 0x95FEFB41, 0x95FFFB41, + 0x9600FB41, 0x9601FB41, 0x9602FB41, 0x9603FB41, 0x9604FB41, 0x9605FB41, 0x9606FB41, 0x9607FB41, 0x9608FB41, 0x9609FB41, 0x960AFB41, 0x960BFB41, 0x960CFB41, 0x960DFB41, 0x960EFB41, + 0x960FFB41, 0x9610FB41, 0x9611FB41, 0x9612FB41, 0x9613FB41, 0x9614FB41, 0x9615FB41, 0x9616FB41, 0x9617FB41, 0x9618FB41, 0x9619FB41, 0x961AFB41, 0x961BFB41, 0x961CFB41, 0x961DFB41, + 0x961EFB41, 0x961FFB41, 0x9620FB41, 0x9621FB41, 0x9622FB41, 0x9623FB41, 0x9624FB41, 0x9625FB41, 0x9626FB41, 0x9627FB41, 0x9628FB41, 0x9629FB41, 0x962AFB41, 0x962BFB41, 0x962CFB41, + 0x962DFB41, 0x962EFB41, 0x962FFB41, 0x9630FB41, 0x9631FB41, 0x9632FB41, 0x9633FB41, 0x9634FB41, 0x9635FB41, 0x9636FB41, 0x9637FB41, 0x9638FB41, 0x9639FB41, 0x963AFB41, 0x963BFB41, + 0x963CFB41, 0x963DFB41, 0x963EFB41, 0x963FFB41, 0x9640FB41, 0x9641FB41, 0x9642FB41, 0x9643FB41, 0x9644FB41, 0x9645FB41, 0x9646FB41, 0x9647FB41, 0x9648FB41, 0x9649FB41, 0x964AFB41, + 0x964BFB41, 0x964CFB41, 0x964DFB41, 0x964EFB41, 0x964FFB41, 0x9650FB41, 0x9651FB41, 0x9652FB41, 0x9653FB41, 0x9654FB41, 0x9655FB41, 0x9656FB41, 0x9657FB41, 0x9658FB41, 0x9659FB41, + 0x965AFB41, 0x965BFB41, 0x965CFB41, 0x965DFB41, 0x965EFB41, 0x965FFB41, 0x9660FB41, 0x9661FB41, 0x9662FB41, 0x9663FB41, 0x9664FB41, 0x9665FB41, 0x9666FB41, 0x9667FB41, 0x9668FB41, + 0x9669FB41, 0x966AFB41, 0x966BFB41, 0x966CFB41, 0x966DFB41, 0x966EFB41, 0x966FFB41, 0x9670FB41, 0x9671FB41, 0x9672FB41, 0x9673FB41, 0x9674FB41, 0x9675FB41, 0x9676FB41, 0x9677FB41, + 0x9678FB41, 0x9679FB41, 0x967AFB41, 0x967BFB41, 0x967CFB41, 0x967DFB41, 0x967EFB41, 0x967FFB41, 0x9680FB41, 0x9681FB41, 0x9682FB41, 0x9683FB41, 0x9684FB41, 0x9685FB41, 0x9686FB41, + 0x9687FB41, 0x9688FB41, 0x9689FB41, 0x968AFB41, 0x968BFB41, 0x968CFB41, 0x968DFB41, 0x968EFB41, 0x968FFB41, 0x9690FB41, 0x9691FB41, 0x9692FB41, 0x9693FB41, 0x9694FB41, 0x9695FB41, + 0x9696FB41, 0x9697FB41, 0x9698FB41, 0x9699FB41, 0x969AFB41, 0x969BFB41, 0x969CFB41, 0x969DFB41, 0x969EFB41, 0x969FFB41, 0x96A0FB41, 0x96A1FB41, 0x96A2FB41, 0x96A3FB41, 0x96A4FB41, + 0x96A5FB41, 0x96A6FB41, 0x96A7FB41, 0x96A8FB41, 0x96A9FB41, 0x96AAFB41, 0x96ABFB41, 0x96ACFB41, 0x96ADFB41, 0x96AEFB41, 0x96AFFB41, 0x96B0FB41, 0x96B1FB41, 0x96B2FB41, 0x96B3FB41, + 0x96B4FB41, 0x96B5FB41, 0x96B6FB41, 0x96B7FB41, 0x96B8FB41, 0x96B9FB41, 0x96BAFB41, 0x96BBFB41, 0x96BCFB41, 0x96BDFB41, 0x96BEFB41, 0x96BFFB41, 0x96C0FB41, 0x96C1FB41, 0x96C2FB41, + 0x96C3FB41, 0x96C4FB41, 0x96C5FB41, 0x96C6FB41, 0x96C7FB41, 0x96C8FB41, 0x96C9FB41, 0x96CAFB41, 0x96CBFB41, 0x96CCFB41, 0x96CDFB41, 0x96CEFB41, 0x96CFFB41, 0x96D0FB41, 0x96D1FB41, + 0x96D2FB41, 0x96D3FB41, 0x96D4FB41, 0x96D5FB41, 0x96D6FB41, 0x96D7FB41, 0x96D8FB41, 0x96D9FB41, 0x96DAFB41, 0x96DBFB41, 0x96DCFB41, 0x96DDFB41, 0x96DEFB41, 0x96DFFB41, 0x96E0FB41, + 0x96E1FB41, 0x96E2FB41, 0x96E3FB41, 0x96E4FB41, 0x96E5FB41, 0x96E6FB41, 0x96E7FB41, 0x96E8FB41, 0x96E9FB41, 0x96EAFB41, 0x96EBFB41, 0x96ECFB41, 0x96EDFB41, 0x96EEFB41, 0x96EFFB41, + 0x96F0FB41, 0x96F1FB41, 0x96F2FB41, 0x96F3FB41, 0x96F4FB41, 0x96F5FB41, 0x96F6FB41, 0x96F7FB41, 0x96F8FB41, 0x96F9FB41, 0x96FAFB41, 0x96FBFB41, 0x96FCFB41, 0x96FDFB41, 0x96FEFB41, + 0x96FFFB41, 0x9700FB41, 0x9701FB41, 0x9702FB41, 0x9703FB41, 0x9704FB41, 0x9705FB41, 0x9706FB41, 0x9707FB41, 0x9708FB41, 0x9709FB41, 0x970AFB41, 0x970BFB41, 0x970CFB41, 0x970DFB41, + 0x970EFB41, 0x970FFB41, 0x9710FB41, 0x9711FB41, 0x9712FB41, 0x9713FB41, 0x9714FB41, 0x9715FB41, 0x9716FB41, 0x9717FB41, 0x9718FB41, 0x9719FB41, 0x971AFB41, 0x971BFB41, 0x971CFB41, + 0x971DFB41, 0x971EFB41, 0x971FFB41, 0x9720FB41, 0x9721FB41, 0x9722FB41, 0x9723FB41, 0x9724FB41, 0x9725FB41, 0x9726FB41, 0x9727FB41, 0x9728FB41, 0x9729FB41, 0x972AFB41, 0x972BFB41, + 0x972CFB41, 0x972DFB41, 0x972EFB41, 0x972FFB41, 0x9730FB41, 0x9731FB41, 0x9732FB41, 0x9733FB41, 0x9734FB41, 0x9735FB41, 0x9736FB41, 0x9737FB41, 0x9738FB41, 0x9739FB41, 0x973AFB41, + 0x973BFB41, 0x973CFB41, 0x973DFB41, 0x973EFB41, 0x973FFB41, 0x9740FB41, 0x9741FB41, 0x9742FB41, 0x9743FB41, 0x9744FB41, 0x9745FB41, 0x9746FB41, 0x9747FB41, 0x9748FB41, 0x9749FB41, + 0x974AFB41, 0x974BFB41, 0x974CFB41, 0x974DFB41, 0x974EFB41, 0x974FFB41, 0x9750FB41, 0x9751FB41, 0x9752FB41, 0x9753FB41, 0x9754FB41, 0x9755FB41, 0x9756FB41, 0x9757FB41, 0x9758FB41, + 0x9759FB41, 0x975AFB41, 0x975BFB41, 0x975CFB41, 0x975DFB41, 0x975EFB41, 0x975FFB41, 0x9760FB41, 0x9761FB41, 0x9762FB41, 0x9763FB41, 0x9764FB41, 0x9765FB41, 0x9766FB41, 0x9767FB41, + 0x9768FB41, 0x9769FB41, 0x976AFB41, 0x976BFB41, 0x976CFB41, 0x976DFB41, 0x976EFB41, 0x976FFB41, 0x9770FB41, 0x9771FB41, 0x9772FB41, 0x9773FB41, 0x9774FB41, 0x9775FB41, 0x9776FB41, + 0x9777FB41, 0x9778FB41, 0x9779FB41, 0x977AFB41, 0x977BFB41, 0x977CFB41, 0x977DFB41, 0x977EFB41, 0x977FFB41, 0x9780FB41, 0x9781FB41, 0x9782FB41, 0x9783FB41, 0x9784FB41, 0x9785FB41, + 0x9786FB41, 0x9787FB41, 0x9788FB41, 0x9789FB41, 0x978AFB41, 0x978BFB41, 0x978CFB41, 0x978DFB41, 0x978EFB41, 0x978FFB41, 0x9790FB41, 0x9791FB41, 0x9792FB41, 0x9793FB41, 0x9794FB41, + 0x9795FB41, 0x9796FB41, 0x9797FB41, 0x9798FB41, 0x9799FB41, 0x979AFB41, 0x979BFB41, 0x979CFB41, 0x979DFB41, 0x979EFB41, 0x979FFB41, 0x97A0FB41, 0x97A1FB41, 0x97A2FB41, 0x97A3FB41, + 0x97A4FB41, 0x97A5FB41, 0x97A6FB41, 0x97A7FB41, 0x97A8FB41, 0x97A9FB41, 0x97AAFB41, 0x97ABFB41, 0x97ACFB41, 0x97ADFB41, 0x97AEFB41, 0x97AFFB41, 0x97B0FB41, 0x97B1FB41, 0x97B2FB41, + 0x97B3FB41, 0x97B4FB41, 0x97B5FB41, 0x97B6FB41, 0x97B7FB41, 0x97B8FB41, 0x97B9FB41, 0x97BAFB41, 0x97BBFB41, 0x97BCFB41, 0x97BDFB41, 0x97BEFB41, 0x97BFFB41, 0x97C0FB41, 0x97C1FB41, + 0x97C2FB41, 0x97C3FB41, 0x97C4FB41, 0x97C5FB41, 0x97C6FB41, 0x97C7FB41, 0x97C8FB41, 0x97C9FB41, 0x97CAFB41, 0x97CBFB41, 0x97CCFB41, 0x97CDFB41, 0x97CEFB41, 0x97CFFB41, 0x97D0FB41, + 0x97D1FB41, 0x97D2FB41, 0x97D3FB41, 0x97D4FB41, 0x97D5FB41, 0x97D6FB41, 0x97D7FB41, 0x97D8FB41, 0x97D9FB41, 0x97DAFB41, 0x97DBFB41, 0x97DCFB41, 0x97DDFB41, 0x97DEFB41, 0x97DFFB41, + 0x97E0FB41, 0x97E1FB41, 0x97E2FB41, 0x97E3FB41, 0x97E4FB41, 0x97E5FB41, 0x97E6FB41, 0x97E7FB41, 0x97E8FB41, 0x97E9FB41, 0x97EAFB41, 0x97EBFB41, 0x97ECFB41, 0x97EDFB41, 0x97EEFB41, + 0x97EFFB41, 0x97F0FB41, 0x97F1FB41, 0x97F2FB41, 0x97F3FB41, 0x97F4FB41, 0x97F5FB41, 0x97F6FB41, 0x97F7FB41, 0x97F8FB41, 0x97F9FB41, 0x97FAFB41, 0x97FBFB41, 0x97FCFB41, 0x97FDFB41, + 0x97FEFB41, 0x97FFFB41, 0x9800FB41, 0x9801FB41, 0x9802FB41, 0x9803FB41, 0x9804FB41, 0x9805FB41, 0x9806FB41, 0x9807FB41, 0x9808FB41, 0x9809FB41, 0x980AFB41, 0x980BFB41, 0x980CFB41, + 0x980DFB41, 0x980EFB41, 0x980FFB41, 0x9810FB41, 0x9811FB41, 0x9812FB41, 0x9813FB41, 0x9814FB41, 0x9815FB41, 0x9816FB41, 0x9817FB41, 0x9818FB41, 0x9819FB41, 0x981AFB41, 0x981BFB41, + 0x981CFB41, 0x981DFB41, 0x981EFB41, 0x981FFB41, 0x9820FB41, 0x9821FB41, 0x9822FB41, 0x9823FB41, 0x9824FB41, 0x9825FB41, 0x9826FB41, 0x9827FB41, 0x9828FB41, 0x9829FB41, 0x982AFB41, + 0x982BFB41, 0x982CFB41, 0x982DFB41, 0x982EFB41, 0x982FFB41, 0x9830FB41, 0x9831FB41, 0x9832FB41, 0x9833FB41, 0x9834FB41, 0x9835FB41, 0x9836FB41, 0x9837FB41, 0x9838FB41, 0x9839FB41, + 0x983AFB41, 0x983BFB41, 0x983CFB41, 0x983DFB41, 0x983EFB41, 0x983FFB41, 0x9840FB41, 0x9841FB41, 0x9842FB41, 0x9843FB41, 0x9844FB41, 0x9845FB41, 0x9846FB41, 0x9847FB41, 0x9848FB41, + 0x9849FB41, 0x984AFB41, 0x984BFB41, 0x984CFB41, 0x984DFB41, 0x984EFB41, 0x984FFB41, 0x9850FB41, 0x9851FB41, 0x9852FB41, 0x9853FB41, 0x9854FB41, 0x9855FB41, 0x9856FB41, 0x9857FB41, + 0x9858FB41, 0x9859FB41, 0x985AFB41, 0x985BFB41, 0x985CFB41, 0x985DFB41, 0x985EFB41, 0x985FFB41, 0x9860FB41, 0x9861FB41, 0x9862FB41, 0x9863FB41, 0x9864FB41, 0x9865FB41, 0x9866FB41, + 0x9867FB41, 0x9868FB41, 0x9869FB41, 0x986AFB41, 0x986BFB41, 0x986CFB41, 0x986DFB41, 0x986EFB41, 0x986FFB41, 0x9870FB41, 0x9871FB41, 0x9872FB41, 0x9873FB41, 0x9874FB41, 0x9875FB41, + 0x9876FB41, 0x9877FB41, 0x9878FB41, 0x9879FB41, 0x987AFB41, 0x987BFB41, 0x987CFB41, 0x987DFB41, 0x987EFB41, 0x987FFB41, 0x9880FB41, 0x9881FB41, 0x9882FB41, 0x9883FB41, 0x9884FB41, + 0x9885FB41, 0x9886FB41, 0x9887FB41, 0x9888FB41, 0x9889FB41, 0x988AFB41, 0x988BFB41, 0x988CFB41, 0x988DFB41, 0x988EFB41, 0x988FFB41, 0x9890FB41, 0x9891FB41, 0x9892FB41, 0x9893FB41, + 0x9894FB41, 0x9895FB41, 0x9896FB41, 0x9897FB41, 0x9898FB41, 0x9899FB41, 0x989AFB41, 0x989BFB41, 0x989CFB41, 0x989DFB41, 0x989EFB41, 0x989FFB41, 0x98A0FB41, 0x98A1FB41, 0x98A2FB41, + 0x98A3FB41, 0x98A4FB41, 0x98A5FB41, 0x98A6FB41, 0x98A7FB41, 0x98A8FB41, 0x98A9FB41, 0x98AAFB41, 0x98ABFB41, 0x98ACFB41, 0x98ADFB41, 0x98AEFB41, 0x98AFFB41, 0x98B0FB41, 0x98B1FB41, + 0x98B2FB41, 0x98B3FB41, 0x98B4FB41, 0x98B5FB41, 0x98B6FB41, 0x98B7FB41, 0x98B8FB41, 0x98B9FB41, 0x98BAFB41, 0x98BBFB41, 0x98BCFB41, 0x98BDFB41, 0x98BEFB41, 0x98BFFB41, 0x98C0FB41, + 0x98C1FB41, 0x98C2FB41, 0x98C3FB41, 0x98C4FB41, 0x98C5FB41, 0x98C6FB41, 0x98C7FB41, 0x98C8FB41, 0x98C9FB41, 0x98CAFB41, 0x98CBFB41, 0x98CCFB41, 0x98CDFB41, 0x98CEFB41, 0x98CFFB41, + 0x98D0FB41, 0x98D1FB41, 0x98D2FB41, 0x98D3FB41, 0x98D4FB41, 0x98D5FB41, 0x98D6FB41, 0x98D7FB41, 0x98D8FB41, 0x98D9FB41, 0x98DAFB41, 0x98DBFB41, 0x98DCFB41, 0x98DDFB41, 0x98DEFB41, + 0x98DFFB41, 0x98E0FB41, 0x98E1FB41, 0x98E2FB41, 0x98E3FB41, 0x98E4FB41, 0x98E5FB41, 0x98E6FB41, 0x98E7FB41, 0x98E8FB41, 0x98E9FB41, 0x98EAFB41, 0x98EBFB41, 0x98ECFB41, 0x98EDFB41, + 0x98EEFB41, 0x98EFFB41, 0x98F0FB41, 0x98F1FB41, 0x98F2FB41, 0x98F3FB41, 0x98F4FB41, 0x98F5FB41, 0x98F6FB41, 0x98F7FB41, 0x98F8FB41, 0x98F9FB41, 0x98FAFB41, 0x98FBFB41, 0x98FCFB41, + 0x98FDFB41, 0x98FEFB41, 0x98FFFB41, 0x9900FB41, 0x9901FB41, 0x9902FB41, 0x9903FB41, 0x9904FB41, 0x9905FB41, 0x9906FB41, 0x9907FB41, 0x9908FB41, 0x9909FB41, 0x990AFB41, 0x990BFB41, + 0x990CFB41, 0x990DFB41, 0x990EFB41, 0x990FFB41, 0x9910FB41, 0x9911FB41, 0x9912FB41, 0x9913FB41, 0x9914FB41, 0x9915FB41, 0x9916FB41, 0x9917FB41, 0x9918FB41, 0x9919FB41, 0x991AFB41, + 0x991BFB41, 0x991CFB41, 0x991DFB41, 0x991EFB41, 0x991FFB41, 0x9920FB41, 0x9921FB41, 0x9922FB41, 0x9923FB41, 0x9924FB41, 0x9925FB41, 0x9926FB41, 0x9927FB41, 0x9928FB41, 0x9929FB41, + 0x992AFB41, 0x992BFB41, 0x992CFB41, 0x992DFB41, 0x992EFB41, 0x992FFB41, 0x9930FB41, 0x9931FB41, 0x9932FB41, 0x9933FB41, 0x9934FB41, 0x9935FB41, 0x9936FB41, 0x9937FB41, 0x9938FB41, + 0x9939FB41, 0x993AFB41, 0x993BFB41, 0x993CFB41, 0x993DFB41, 0x993EFB41, 0x993FFB41, 0x9940FB41, 0x9941FB41, 0x9942FB41, 0x9943FB41, 0x9944FB41, 0x9945FB41, 0x9946FB41, 0x9947FB41, + 0x9948FB41, 0x9949FB41, 0x994AFB41, 0x994BFB41, 0x994CFB41, 0x994DFB41, 0x994EFB41, 0x994FFB41, 0x9950FB41, 0x9951FB41, 0x9952FB41, 0x9953FB41, 0x9954FB41, 0x9955FB41, 0x9956FB41, + 0x9957FB41, 0x9958FB41, 0x9959FB41, 0x995AFB41, 0x995BFB41, 0x995CFB41, 0x995DFB41, 0x995EFB41, 0x995FFB41, 0x9960FB41, 0x9961FB41, 0x9962FB41, 0x9963FB41, 0x9964FB41, 0x9965FB41, + 0x9966FB41, 0x9967FB41, 0x9968FB41, 0x9969FB41, 0x996AFB41, 0x996BFB41, 0x996CFB41, 0x996DFB41, 0x996EFB41, 0x996FFB41, 0x9970FB41, 0x9971FB41, 0x9972FB41, 0x9973FB41, 0x9974FB41, + 0x9975FB41, 0x9976FB41, 0x9977FB41, 0x9978FB41, 0x9979FB41, 0x997AFB41, 0x997BFB41, 0x997CFB41, 0x997DFB41, 0x997EFB41, 0x997FFB41, 0x9980FB41, 0x9981FB41, 0x9982FB41, 0x9983FB41, + 0x9984FB41, 0x9985FB41, 0x9986FB41, 0x9987FB41, 0x9988FB41, 0x9989FB41, 0x998AFB41, 0x998BFB41, 0x998CFB41, 0x998DFB41, 0x998EFB41, 0x998FFB41, 0x9990FB41, 0x9991FB41, 0x9992FB41, + 0x9993FB41, 0x9994FB41, 0x9995FB41, 0x9996FB41, 0x9997FB41, 0x9998FB41, 0x9999FB41, 0x999AFB41, 0x999BFB41, 0x999CFB41, 0x999DFB41, 0x999EFB41, 0x999FFB41, 0x99A0FB41, 0x99A1FB41, + 0x99A2FB41, 0x99A3FB41, 0x99A4FB41, 0x99A5FB41, 0x99A6FB41, 0x99A7FB41, 0x99A8FB41, 0x99A9FB41, 0x99AAFB41, 0x99ABFB41, 0x99ACFB41, 0x99ADFB41, 0x99AEFB41, 0x99AFFB41, 0x99B0FB41, + 0x99B1FB41, 0x99B2FB41, 0x99B3FB41, 0x99B4FB41, 0x99B5FB41, 0x99B6FB41, 0x99B7FB41, 0x99B8FB41, 0x99B9FB41, 0x99BAFB41, 0x99BBFB41, 0x99BCFB41, 0x99BDFB41, 0x99BEFB41, 0x99BFFB41, + 0x99C0FB41, 0x99C1FB41, 0x99C2FB41, 0x99C3FB41, 0x99C4FB41, 0x99C5FB41, 0x99C6FB41, 0x99C7FB41, 0x99C8FB41, 0x99C9FB41, 0x99CAFB41, 0x99CBFB41, 0x99CCFB41, 0x99CDFB41, 0x99CEFB41, + 0x99CFFB41, 0x99D0FB41, 0x99D1FB41, 0x99D2FB41, 0x99D3FB41, 0x99D4FB41, 0x99D5FB41, 0x99D6FB41, 0x99D7FB41, 0x99D8FB41, 0x99D9FB41, 0x99DAFB41, 0x99DBFB41, 0x99DCFB41, 0x99DDFB41, + 0x99DEFB41, 0x99DFFB41, 0x99E0FB41, 0x99E1FB41, 0x99E2FB41, 0x99E3FB41, 0x99E4FB41, 0x99E5FB41, 0x99E6FB41, 0x99E7FB41, 0x99E8FB41, 0x99E9FB41, 0x99EAFB41, 0x99EBFB41, 0x99ECFB41, + 0x99EDFB41, 0x99EEFB41, 0x99EFFB41, 0x99F0FB41, 0x99F1FB41, 0x99F2FB41, 0x99F3FB41, 0x99F4FB41, 0x99F5FB41, 0x99F6FB41, 0x99F7FB41, 0x99F8FB41, 0x99F9FB41, 0x99FAFB41, 0x99FBFB41, + 0x99FCFB41, 0x99FDFB41, 0x99FEFB41, 0x99FFFB41, 0x9A00FB41, 0x9A01FB41, 0x9A02FB41, 0x9A03FB41, 0x9A04FB41, 0x9A05FB41, 0x9A06FB41, 0x9A07FB41, 0x9A08FB41, 0x9A09FB41, 0x9A0AFB41, + 0x9A0BFB41, 0x9A0CFB41, 0x9A0DFB41, 0x9A0EFB41, 0x9A0FFB41, 0x9A10FB41, 0x9A11FB41, 0x9A12FB41, 0x9A13FB41, 0x9A14FB41, 0x9A15FB41, 0x9A16FB41, 0x9A17FB41, 0x9A18FB41, 0x9A19FB41, + 0x9A1AFB41, 0x9A1BFB41, 0x9A1CFB41, 0x9A1DFB41, 0x9A1EFB41, 0x9A1FFB41, 0x9A20FB41, 0x9A21FB41, 0x9A22FB41, 0x9A23FB41, 0x9A24FB41, 0x9A25FB41, 0x9A26FB41, 0x9A27FB41, 0x9A28FB41, + 0x9A29FB41, 0x9A2AFB41, 0x9A2BFB41, 0x9A2CFB41, 0x9A2DFB41, 0x9A2EFB41, 0x9A2FFB41, 0x9A30FB41, 0x9A31FB41, 0x9A32FB41, 0x9A33FB41, 0x9A34FB41, 0x9A35FB41, 0x9A36FB41, 0x9A37FB41, + 0x9A38FB41, 0x9A39FB41, 0x9A3AFB41, 0x9A3BFB41, 0x9A3CFB41, 0x9A3DFB41, 0x9A3EFB41, 0x9A3FFB41, 0x9A40FB41, 0x9A41FB41, 0x9A42FB41, 0x9A43FB41, 0x9A44FB41, 0x9A45FB41, 0x9A46FB41, + 0x9A47FB41, 0x9A48FB41, 0x9A49FB41, 0x9A4AFB41, 0x9A4BFB41, 0x9A4CFB41, 0x9A4DFB41, 0x9A4EFB41, 0x9A4FFB41, 0x9A50FB41, 0x9A51FB41, 0x9A52FB41, 0x9A53FB41, 0x9A54FB41, 0x9A55FB41, + 0x9A56FB41, 0x9A57FB41, 0x9A58FB41, 0x9A59FB41, 0x9A5AFB41, 0x9A5BFB41, 0x9A5CFB41, 0x9A5DFB41, 0x9A5EFB41, 0x9A5FFB41, 0x9A60FB41, 0x9A61FB41, 0x9A62FB41, 0x9A63FB41, 0x9A64FB41, + 0x9A65FB41, 0x9A66FB41, 0x9A67FB41, 0x9A68FB41, 0x9A69FB41, 0x9A6AFB41, 0x9A6BFB41, 0x9A6CFB41, 0x9A6DFB41, 0x9A6EFB41, 0x9A6FFB41, 0x9A70FB41, 0x9A71FB41, 0x9A72FB41, 0x9A73FB41, + 0x9A74FB41, 0x9A75FB41, 0x9A76FB41, 0x9A77FB41, 0x9A78FB41, 0x9A79FB41, 0x9A7AFB41, 0x9A7BFB41, 0x9A7CFB41, 0x9A7DFB41, 0x9A7EFB41, 0x9A7FFB41, 0x9A80FB41, 0x9A81FB41, 0x9A82FB41, + 0x9A83FB41, 0x9A84FB41, 0x9A85FB41, 0x9A86FB41, 0x9A87FB41, 0x9A88FB41, 0x9A89FB41, 0x9A8AFB41, 0x9A8BFB41, 0x9A8CFB41, 0x9A8DFB41, 0x9A8EFB41, 0x9A8FFB41, 0x9A90FB41, 0x9A91FB41, + 0x9A92FB41, 0x9A93FB41, 0x9A94FB41, 0x9A95FB41, 0x9A96FB41, 0x9A97FB41, 0x9A98FB41, 0x9A99FB41, 0x9A9AFB41, 0x9A9BFB41, 0x9A9CFB41, 0x9A9DFB41, 0x9A9EFB41, 0x9A9FFB41, 0x9AA0FB41, + 0x9AA1FB41, 0x9AA2FB41, 0x9AA3FB41, 0x9AA4FB41, 0x9AA5FB41, 0x9AA6FB41, 0x9AA7FB41, 0x9AA8FB41, 0x9AA9FB41, 0x9AAAFB41, 0x9AABFB41, 0x9AACFB41, 0x9AADFB41, 0x9AAEFB41, 0x9AAFFB41, + 0x9AB0FB41, 0x9AB1FB41, 0x9AB2FB41, 0x9AB3FB41, 0x9AB4FB41, 0x9AB5FB41, 0x9AB6FB41, 0x9AB7FB41, 0x9AB8FB41, 0x9AB9FB41, 0x9ABAFB41, 0x9ABBFB41, 0x9ABCFB41, 0x9ABDFB41, 0x9ABEFB41, + 0x9ABFFB41, 0x9AC0FB41, 0x9AC1FB41, 0x9AC2FB41, 0x9AC3FB41, 0x9AC4FB41, 0x9AC5FB41, 0x9AC6FB41, 0x9AC7FB41, 0x9AC8FB41, 0x9AC9FB41, 0x9ACAFB41, 0x9ACBFB41, 0x9ACCFB41, 0x9ACDFB41, + 0x9ACEFB41, 0x9ACFFB41, 0x9AD0FB41, 0x9AD1FB41, 0x9AD2FB41, 0x9AD3FB41, 0x9AD4FB41, 0x9AD5FB41, 0x9AD6FB41, 0x9AD7FB41, 0x9AD8FB41, 0x9AD9FB41, 0x9ADAFB41, 0x9ADBFB41, 0x9ADCFB41, + 0x9ADDFB41, 0x9ADEFB41, 0x9ADFFB41, 0x9AE0FB41, 0x9AE1FB41, 0x9AE2FB41, 0x9AE3FB41, 0x9AE4FB41, 0x9AE5FB41, 0x9AE6FB41, 0x9AE7FB41, 0x9AE8FB41, 0x9AE9FB41, 0x9AEAFB41, 0x9AEBFB41, + 0x9AECFB41, 0x9AEDFB41, 0x9AEEFB41, 0x9AEFFB41, 0x9AF0FB41, 0x9AF1FB41, 0x9AF2FB41, 0x9AF3FB41, 0x9AF4FB41, 0x9AF5FB41, 0x9AF6FB41, 0x9AF7FB41, 0x9AF8FB41, 0x9AF9FB41, 0x9AFAFB41, + 0x9AFBFB41, 0x9AFCFB41, 0x9AFDFB41, 0x9AFEFB41, 0x9AFFFB41, 0x9B00FB41, 0x9B01FB41, 0x9B02FB41, 0x9B03FB41, 0x9B04FB41, 0x9B05FB41, 0x9B06FB41, 0x9B07FB41, 0x9B08FB41, 0x9B09FB41, + 0x9B0AFB41, 0x9B0BFB41, 0x9B0CFB41, 0x9B0DFB41, 0x9B0EFB41, 0x9B0FFB41, 0x9B10FB41, 0x9B11FB41, 0x9B12FB41, 0x9B13FB41, 0x9B14FB41, 0x9B15FB41, 0x9B16FB41, 0x9B17FB41, 0x9B18FB41, + 0x9B19FB41, 0x9B1AFB41, 0x9B1BFB41, 0x9B1CFB41, 0x9B1DFB41, 0x9B1EFB41, 0x9B1FFB41, 0x9B20FB41, 0x9B21FB41, 0x9B22FB41, 0x9B23FB41, 0x9B24FB41, 0x9B25FB41, 0x9B26FB41, 0x9B27FB41, + 0x9B28FB41, 0x9B29FB41, 0x9B2AFB41, 0x9B2BFB41, 0x9B2CFB41, 0x9B2DFB41, 0x9B2EFB41, 0x9B2FFB41, 0x9B30FB41, 0x9B31FB41, 0x9B32FB41, 0x9B33FB41, 0x9B34FB41, 0x9B35FB41, 0x9B36FB41, + 0x9B37FB41, 0x9B38FB41, 0x9B39FB41, 0x9B3AFB41, 0x9B3BFB41, 0x9B3CFB41, 0x9B3DFB41, 0x9B3EFB41, 0x9B3FFB41, 0x9B40FB41, 0x9B41FB41, 0x9B42FB41, 0x9B43FB41, 0x9B44FB41, 0x9B45FB41, + 0x9B46FB41, 0x9B47FB41, 0x9B48FB41, 0x9B49FB41, 0x9B4AFB41, 0x9B4BFB41, 0x9B4CFB41, 0x9B4DFB41, 0x9B4EFB41, 0x9B4FFB41, 0x9B50FB41, 0x9B51FB41, 0x9B52FB41, 0x9B53FB41, 0x9B54FB41, + 0x9B55FB41, 0x9B56FB41, 0x9B57FB41, 0x9B58FB41, 0x9B59FB41, 0x9B5AFB41, 0x9B5BFB41, 0x9B5CFB41, 0x9B5DFB41, 0x9B5EFB41, 0x9B5FFB41, 0x9B60FB41, 0x9B61FB41, 0x9B62FB41, 0x9B63FB41, + 0x9B64FB41, 0x9B65FB41, 0x9B66FB41, 0x9B67FB41, 0x9B68FB41, 0x9B69FB41, 0x9B6AFB41, 0x9B6BFB41, 0x9B6CFB41, 0x9B6DFB41, 0x9B6EFB41, 0x9B6FFB41, 0x9B70FB41, 0x9B71FB41, 0x9B72FB41, + 0x9B73FB41, 0x9B74FB41, 0x9B75FB41, 0x9B76FB41, 0x9B77FB41, 0x9B78FB41, 0x9B79FB41, 0x9B7AFB41, 0x9B7BFB41, 0x9B7CFB41, 0x9B7DFB41, 0x9B7EFB41, 0x9B7FFB41, 0x9B80FB41, 0x9B81FB41, + 0x9B82FB41, 0x9B83FB41, 0x9B84FB41, 0x9B85FB41, 0x9B86FB41, 0x9B87FB41, 0x9B88FB41, 0x9B89FB41, 0x9B8AFB41, 0x9B8BFB41, 0x9B8CFB41, 0x9B8DFB41, 0x9B8EFB41, 0x9B8FFB41, 0x9B90FB41, + 0x9B91FB41, 0x9B92FB41, 0x9B93FB41, 0x9B94FB41, 0x9B95FB41, 0x9B96FB41, 0x9B97FB41, 0x9B98FB41, 0x9B99FB41, 0x9B9AFB41, 0x9B9BFB41, 0x9B9CFB41, 0x9B9DFB41, 0x9B9EFB41, 0x9B9FFB41, + 0x9BA0FB41, 0x9BA1FB41, 0x9BA2FB41, 0x9BA3FB41, 0x9BA4FB41, 0x9BA5FB41, 0x9BA6FB41, 0x9BA7FB41, 0x9BA8FB41, 0x9BA9FB41, 0x9BAAFB41, 0x9BABFB41, 0x9BACFB41, 0x9BADFB41, 0x9BAEFB41, + 0x9BAFFB41, 0x9BB0FB41, 0x9BB1FB41, 0x9BB2FB41, 0x9BB3FB41, 0x9BB4FB41, 0x9BB5FB41, 0x9BB6FB41, 0x9BB7FB41, 0x9BB8FB41, 0x9BB9FB41, 0x9BBAFB41, 0x9BBBFB41, 0x9BBCFB41, 0x9BBDFB41, + 0x9BBEFB41, 0x9BBFFB41, 0x9BC0FB41, 0x9BC1FB41, 0x9BC2FB41, 0x9BC3FB41, 0x9BC4FB41, 0x9BC5FB41, 0x9BC6FB41, 0x9BC7FB41, 0x9BC8FB41, 0x9BC9FB41, 0x9BCAFB41, 0x9BCBFB41, 0x9BCCFB41, + 0x9BCDFB41, 0x9BCEFB41, 0x9BCFFB41, 0x9BD0FB41, 0x9BD1FB41, 0x9BD2FB41, 0x9BD3FB41, 0x9BD4FB41, 0x9BD5FB41, 0x9BD6FB41, 0x9BD7FB41, 0x9BD8FB41, 0x9BD9FB41, 0x9BDAFB41, 0x9BDBFB41, + 0x9BDCFB41, 0x9BDDFB41, 0x9BDEFB41, 0x9BDFFB41, 0x9BE0FB41, 0x9BE1FB41, 0x9BE2FB41, 0x9BE3FB41, 0x9BE4FB41, 0x9BE5FB41, 0x9BE6FB41, 0x9BE7FB41, 0x9BE8FB41, 0x9BE9FB41, 0x9BEAFB41, + 0x9BEBFB41, 0x9BECFB41, 0x9BEDFB41, 0x9BEEFB41, 0x9BEFFB41, 0x9BF0FB41, 0x9BF1FB41, 0x9BF2FB41, 0x9BF3FB41, 0x9BF4FB41, 0x9BF5FB41, 0x9BF6FB41, 0x9BF7FB41, 0x9BF8FB41, 0x9BF9FB41, + 0x9BFAFB41, 0x9BFBFB41, 0x9BFCFB41, 0x9BFDFB41, 0x9BFEFB41, 0x9BFFFB41, 0x9C00FB41, 0x9C01FB41, 0x9C02FB41, 0x9C03FB41, 0x9C04FB41, 0x9C05FB41, 0x9C06FB41, 0x9C07FB41, 0x9C08FB41, + 0x9C09FB41, 0x9C0AFB41, 0x9C0BFB41, 0x9C0CFB41, 0x9C0DFB41, 0x9C0EFB41, 0x9C0FFB41, 0x9C10FB41, 0x9C11FB41, 0x9C12FB41, 0x9C13FB41, 0x9C14FB41, 0x9C15FB41, 0x9C16FB41, 0x9C17FB41, + 0x9C18FB41, 0x9C19FB41, 0x9C1AFB41, 0x9C1BFB41, 0x9C1CFB41, 0x9C1DFB41, 0x9C1EFB41, 0x9C1FFB41, 0x9C20FB41, 0x9C21FB41, 0x9C22FB41, 0x9C23FB41, 0x9C24FB41, 0x9C25FB41, 0x9C26FB41, + 0x9C27FB41, 0x9C28FB41, 0x9C29FB41, 0x9C2AFB41, 0x9C2BFB41, 0x9C2CFB41, 0x9C2DFB41, 0x9C2EFB41, 0x9C2FFB41, 0x9C30FB41, 0x9C31FB41, 0x9C32FB41, 0x9C33FB41, 0x9C34FB41, 0x9C35FB41, + 0x9C36FB41, 0x9C37FB41, 0x9C38FB41, 0x9C39FB41, 0x9C3AFB41, 0x9C3BFB41, 0x9C3CFB41, 0x9C3DFB41, 0x9C3EFB41, 0x9C3FFB41, 0x9C40FB41, 0x9C41FB41, 0x9C42FB41, 0x9C43FB41, 0x9C44FB41, + 0x9C45FB41, 0x9C46FB41, 0x9C47FB41, 0x9C48FB41, 0x9C49FB41, 0x9C4AFB41, 0x9C4BFB41, 0x9C4CFB41, 0x9C4DFB41, 0x9C4EFB41, 0x9C4FFB41, 0x9C50FB41, 0x9C51FB41, 0x9C52FB41, 0x9C53FB41, + 0x9C54FB41, 0x9C55FB41, 0x9C56FB41, 0x9C57FB41, 0x9C58FB41, 0x9C59FB41, 0x9C5AFB41, 0x9C5BFB41, 0x9C5CFB41, 0x9C5DFB41, 0x9C5EFB41, 0x9C5FFB41, 0x9C60FB41, 0x9C61FB41, 0x9C62FB41, + 0x9C63FB41, 0x9C64FB41, 0x9C65FB41, 0x9C66FB41, 0x9C67FB41, 0x9C68FB41, 0x9C69FB41, 0x9C6AFB41, 0x9C6BFB41, 0x9C6CFB41, 0x9C6DFB41, 0x9C6EFB41, 0x9C6FFB41, 0x9C70FB41, 0x9C71FB41, + 0x9C72FB41, 0x9C73FB41, 0x9C74FB41, 0x9C75FB41, 0x9C76FB41, 0x9C77FB41, 0x9C78FB41, 0x9C79FB41, 0x9C7AFB41, 0x9C7BFB41, 0x9C7CFB41, 0x9C7DFB41, 0x9C7EFB41, 0x9C7FFB41, 0x9C80FB41, + 0x9C81FB41, 0x9C82FB41, 0x9C83FB41, 0x9C84FB41, 0x9C85FB41, 0x9C86FB41, 0x9C87FB41, 0x9C88FB41, 0x9C89FB41, 0x9C8AFB41, 0x9C8BFB41, 0x9C8CFB41, 0x9C8DFB41, 0x9C8EFB41, 0x9C8FFB41, + 0x9C90FB41, 0x9C91FB41, 0x9C92FB41, 0x9C93FB41, 0x9C94FB41, 0x9C95FB41, 0x9C96FB41, 0x9C97FB41, 0x9C98FB41, 0x9C99FB41, 0x9C9AFB41, 0x9C9BFB41, 0x9C9CFB41, 0x9C9DFB41, 0x9C9EFB41, + 0x9C9FFB41, 0x9CA0FB41, 0x9CA1FB41, 0x9CA2FB41, 0x9CA3FB41, 0x9CA4FB41, 0x9CA5FB41, 0x9CA6FB41, 0x9CA7FB41, 0x9CA8FB41, 0x9CA9FB41, 0x9CAAFB41, 0x9CABFB41, 0x9CACFB41, 0x9CADFB41, + 0x9CAEFB41, 0x9CAFFB41, 0x9CB0FB41, 0x9CB1FB41, 0x9CB2FB41, 0x9CB3FB41, 0x9CB4FB41, 0x9CB5FB41, 0x9CB6FB41, 0x9CB7FB41, 0x9CB8FB41, 0x9CB9FB41, 0x9CBAFB41, 0x9CBBFB41, 0x9CBCFB41, + 0x9CBDFB41, 0x9CBEFB41, 0x9CBFFB41, 0x9CC0FB41, 0x9CC1FB41, 0x9CC2FB41, 0x9CC3FB41, 0x9CC4FB41, 0x9CC5FB41, 0x9CC6FB41, 0x9CC7FB41, 0x9CC8FB41, 0x9CC9FB41, 0x9CCAFB41, 0x9CCBFB41, + 0x9CCCFB41, 0x9CCDFB41, 0x9CCEFB41, 0x9CCFFB41, 0x9CD0FB41, 0x9CD1FB41, 0x9CD2FB41, 0x9CD3FB41, 0x9CD4FB41, 0x9CD5FB41, 0x9CD6FB41, 0x9CD7FB41, 0x9CD8FB41, 0x9CD9FB41, 0x9CDAFB41, + 0x9CDBFB41, 0x9CDCFB41, 0x9CDDFB41, 0x9CDEFB41, 0x9CDFFB41, 0x9CE0FB41, 0x9CE1FB41, 0x9CE2FB41, 0x9CE3FB41, 0x9CE4FB41, 0x9CE5FB41, 0x9CE6FB41, 0x9CE7FB41, 0x9CE8FB41, 0x9CE9FB41, + 0x9CEAFB41, 0x9CEBFB41, 0x9CECFB41, 0x9CEDFB41, 0x9CEEFB41, 0x9CEFFB41, 0x9CF0FB41, 0x9CF1FB41, 0x9CF2FB41, 0x9CF3FB41, 0x9CF4FB41, 0x9CF5FB41, 0x9CF6FB41, 0x9CF7FB41, 0x9CF8FB41, + 0x9CF9FB41, 0x9CFAFB41, 0x9CFBFB41, 0x9CFCFB41, 0x9CFDFB41, 0x9CFEFB41, 0x9CFFFB41, 0x9D00FB41, 0x9D01FB41, 0x9D02FB41, 0x9D03FB41, 0x9D04FB41, 0x9D05FB41, 0x9D06FB41, 0x9D07FB41, + 0x9D08FB41, 0x9D09FB41, 0x9D0AFB41, 0x9D0BFB41, 0x9D0CFB41, 0x9D0DFB41, 0x9D0EFB41, 0x9D0FFB41, 0x9D10FB41, 0x9D11FB41, 0x9D12FB41, 0x9D13FB41, 0x9D14FB41, 0x9D15FB41, 0x9D16FB41, + 0x9D17FB41, 0x9D18FB41, 0x9D19FB41, 0x9D1AFB41, 0x9D1BFB41, 0x9D1CFB41, 0x9D1DFB41, 0x9D1EFB41, 0x9D1FFB41, 0x9D20FB41, 0x9D21FB41, 0x9D22FB41, 0x9D23FB41, 0x9D24FB41, 0x9D25FB41, + 0x9D26FB41, 0x9D27FB41, 0x9D28FB41, 0x9D29FB41, 0x9D2AFB41, 0x9D2BFB41, 0x9D2CFB41, 0x9D2DFB41, 0x9D2EFB41, 0x9D2FFB41, 0x9D30FB41, 0x9D31FB41, 0x9D32FB41, 0x9D33FB41, 0x9D34FB41, + 0x9D35FB41, 0x9D36FB41, 0x9D37FB41, 0x9D38FB41, 0x9D39FB41, 0x9D3AFB41, 0x9D3BFB41, 0x9D3CFB41, 0x9D3DFB41, 0x9D3EFB41, 0x9D3FFB41, 0x9D40FB41, 0x9D41FB41, 0x9D42FB41, 0x9D43FB41, + 0x9D44FB41, 0x9D45FB41, 0x9D46FB41, 0x9D47FB41, 0x9D48FB41, 0x9D49FB41, 0x9D4AFB41, 0x9D4BFB41, 0x9D4CFB41, 0x9D4DFB41, 0x9D4EFB41, 0x9D4FFB41, 0x9D50FB41, 0x9D51FB41, 0x9D52FB41, + 0x9D53FB41, 0x9D54FB41, 0x9D55FB41, 0x9D56FB41, 0x9D57FB41, 0x9D58FB41, 0x9D59FB41, 0x9D5AFB41, 0x9D5BFB41, 0x9D5CFB41, 0x9D5DFB41, 0x9D5EFB41, 0x9D5FFB41, 0x9D60FB41, 0x9D61FB41, + 0x9D62FB41, 0x9D63FB41, 0x9D64FB41, 0x9D65FB41, 0x9D66FB41, 0x9D67FB41, 0x9D68FB41, 0x9D69FB41, 0x9D6AFB41, 0x9D6BFB41, 0x9D6CFB41, 0x9D6DFB41, 0x9D6EFB41, 0x9D6FFB41, 0x9D70FB41, + 0x9D71FB41, 0x9D72FB41, 0x9D73FB41, 0x9D74FB41, 0x9D75FB41, 0x9D76FB41, 0x9D77FB41, 0x9D78FB41, 0x9D79FB41, 0x9D7AFB41, 0x9D7BFB41, 0x9D7CFB41, 0x9D7DFB41, 0x9D7EFB41, 0x9D7FFB41, + 0x9D80FB41, 0x9D81FB41, 0x9D82FB41, 0x9D83FB41, 0x9D84FB41, 0x9D85FB41, 0x9D86FB41, 0x9D87FB41, 0x9D88FB41, 0x9D89FB41, 0x9D8AFB41, 0x9D8BFB41, 0x9D8CFB41, 0x9D8DFB41, 0x9D8EFB41, + 0x9D8FFB41, 0x9D90FB41, 0x9D91FB41, 0x9D92FB41, 0x9D93FB41, 0x9D94FB41, 0x9D95FB41, 0x9D96FB41, 0x9D97FB41, 0x9D98FB41, 0x9D99FB41, 0x9D9AFB41, 0x9D9BFB41, 0x9D9CFB41, 0x9D9DFB41, + 0x9D9EFB41, 0x9D9FFB41, 0x9DA0FB41, 0x9DA1FB41, 0x9DA2FB41, 0x9DA3FB41, 0x9DA4FB41, 0x9DA5FB41, 0x9DA6FB41, 0x9DA7FB41, 0x9DA8FB41, 0x9DA9FB41, 0x9DAAFB41, 0x9DABFB41, 0x9DACFB41, + 0x9DADFB41, 0x9DAEFB41, 0x9DAFFB41, 0x9DB0FB41, 0x9DB1FB41, 0x9DB2FB41, 0x9DB3FB41, 0x9DB4FB41, 0x9DB5FB41, 0x9DB6FB41, 0x9DB7FB41, 0x9DB8FB41, 0x9DB9FB41, 0x9DBAFB41, 0x9DBBFB41, + 0x9DBCFB41, 0x9DBDFB41, 0x9DBEFB41, 0x9DBFFB41, 0x9DC0FB41, 0x9DC1FB41, 0x9DC2FB41, 0x9DC3FB41, 0x9DC4FB41, 0x9DC5FB41, 0x9DC6FB41, 0x9DC7FB41, 0x9DC8FB41, 0x9DC9FB41, 0x9DCAFB41, + 0x9DCBFB41, 0x9DCCFB41, 0x9DCDFB41, 0x9DCEFB41, 0x9DCFFB41, 0x9DD0FB41, 0x9DD1FB41, 0x9DD2FB41, 0x9DD3FB41, 0x9DD4FB41, 0x9DD5FB41, 0x9DD6FB41, 0x9DD7FB41, 0x9DD8FB41, 0x9DD9FB41, + 0x9DDAFB41, 0x9DDBFB41, 0x9DDCFB41, 0x9DDDFB41, 0x9DDEFB41, 0x9DDFFB41, 0x9DE0FB41, 0x9DE1FB41, 0x9DE2FB41, 0x9DE3FB41, 0x9DE4FB41, 0x9DE5FB41, 0x9DE6FB41, 0x9DE7FB41, 0x9DE8FB41, + 0x9DE9FB41, 0x9DEAFB41, 0x9DEBFB41, 0x9DECFB41, 0x9DEDFB41, 0x9DEEFB41, 0x9DEFFB41, 0x9DF0FB41, 0x9DF1FB41, 0x9DF2FB41, 0x9DF3FB41, 0x9DF4FB41, 0x9DF5FB41, 0x9DF6FB41, 0x9DF7FB41, + 0x9DF8FB41, 0x9DF9FB41, 0x9DFAFB41, 0x9DFBFB41, 0x9DFCFB41, 0x9DFDFB41, 0x9DFEFB41, 0x9DFFFB41, 0x9E00FB41, 0x9E01FB41, 0x9E02FB41, 0x9E03FB41, 0x9E04FB41, 0x9E05FB41, 0x9E06FB41, + 0x9E07FB41, 0x9E08FB41, 0x9E09FB41, 0x9E0AFB41, 0x9E0BFB41, 0x9E0CFB41, 0x9E0DFB41, 0x9E0EFB41, 0x9E0FFB41, 0x9E10FB41, 0x9E11FB41, 0x9E12FB41, 0x9E13FB41, 0x9E14FB41, 0x9E15FB41, + 0x9E16FB41, 0x9E17FB41, 0x9E18FB41, 0x9E19FB41, 0x9E1AFB41, 0x9E1BFB41, 0x9E1CFB41, 0x9E1DFB41, 0x9E1EFB41, 0x9E1FFB41, 0x9E20FB41, 0x9E21FB41, 0x9E22FB41, 0x9E23FB41, 0x9E24FB41, + 0x9E25FB41, 0x9E26FB41, 0x9E27FB41, 0x9E28FB41, 0x9E29FB41, 0x9E2AFB41, 0x9E2BFB41, 0x9E2CFB41, 0x9E2DFB41, 0x9E2EFB41, 0x9E2FFB41, 0x9E30FB41, 0x9E31FB41, 0x9E32FB41, 0x9E33FB41, + 0x9E34FB41, 0x9E35FB41, 0x9E36FB41, 0x9E37FB41, 0x9E38FB41, 0x9E39FB41, 0x9E3AFB41, 0x9E3BFB41, 0x9E3CFB41, 0x9E3DFB41, 0x9E3EFB41, 0x9E3FFB41, 0x9E40FB41, 0x9E41FB41, 0x9E42FB41, + 0x9E43FB41, 0x9E44FB41, 0x9E45FB41, 0x9E46FB41, 0x9E47FB41, 0x9E48FB41, 0x9E49FB41, 0x9E4AFB41, 0x9E4BFB41, 0x9E4CFB41, 0x9E4DFB41, 0x9E4EFB41, 0x9E4FFB41, 0x9E50FB41, 0x9E51FB41, + 0x9E52FB41, 0x9E53FB41, 0x9E54FB41, 0x9E55FB41, 0x9E56FB41, 0x9E57FB41, 0x9E58FB41, 0x9E59FB41, 0x9E5AFB41, 0x9E5BFB41, 0x9E5CFB41, 0x9E5DFB41, 0x9E5EFB41, 0x9E5FFB41, 0x9E60FB41, + 0x9E61FB41, 0x9E62FB41, 0x9E63FB41, 0x9E64FB41, 0x9E65FB41, 0x9E66FB41, 0x9E67FB41, 0x9E68FB41, 0x9E69FB41, 0x9E6AFB41, 0x9E6BFB41, 0x9E6CFB41, 0x9E6DFB41, 0x9E6EFB41, 0x9E6FFB41, + 0x9E70FB41, 0x9E71FB41, 0x9E72FB41, 0x9E73FB41, 0x9E74FB41, 0x9E75FB41, 0x9E76FB41, 0x9E77FB41, 0x9E78FB41, 0x9E79FB41, 0x9E7AFB41, 0x9E7BFB41, 0x9E7CFB41, 0x9E7DFB41, 0x9E7EFB41, + 0x9E7FFB41, 0x9E80FB41, 0x9E81FB41, 0x9E82FB41, 0x9E83FB41, 0x9E84FB41, 0x9E85FB41, 0x9E86FB41, 0x9E87FB41, 0x9E88FB41, 0x9E89FB41, 0x9E8AFB41, 0x9E8BFB41, 0x9E8CFB41, 0x9E8DFB41, + 0x9E8EFB41, 0x9E8FFB41, 0x9E90FB41, 0x9E91FB41, 0x9E92FB41, 0x9E93FB41, 0x9E94FB41, 0x9E95FB41, 0x9E96FB41, 0x9E97FB41, 0x9E98FB41, 0x9E99FB41, 0x9E9AFB41, 0x9E9BFB41, 0x9E9CFB41, + 0x9E9DFB41, 0x9E9EFB41, 0x9E9FFB41, 0x9EA0FB41, 0x9EA1FB41, 0x9EA2FB41, 0x9EA3FB41, 0x9EA4FB41, 0x9EA5FB41, 0x9EA6FB41, 0x9EA7FB41, 0x9EA8FB41, 0x9EA9FB41, 0x9EAAFB41, 0x9EABFB41, + 0x9EACFB41, 0x9EADFB41, 0x9EAEFB41, 0x9EAFFB41, 0x9EB0FB41, 0x9EB1FB41, 0x9EB2FB41, 0x9EB3FB41, 0x9EB4FB41, 0x9EB5FB41, 0x9EB6FB41, 0x9EB7FB41, 0x9EB8FB41, 0x9EB9FB41, 0x9EBAFB41, + 0x9EBBFB41, 0x9EBCFB41, 0x9EBDFB41, 0x9EBEFB41, 0x9EBFFB41, 0x9EC0FB41, 0x9EC1FB41, 0x9EC2FB41, 0x9EC3FB41, 0x9EC4FB41, 0x9EC5FB41, 0x9EC6FB41, 0x9EC7FB41, 0x9EC8FB41, 0x9EC9FB41, + 0x9ECAFB41, 0x9ECBFB41, 0x9ECCFB41, 0x9ECDFB41, 0x9ECEFB41, 0x9ECFFB41, 0x9ED0FB41, 0x9ED1FB41, 0x9ED2FB41, 0x9ED3FB41, 0x9ED4FB41, 0x9ED5FB41, 0x9ED6FB41, 0x9ED7FB41, 0x9ED8FB41, + 0x9ED9FB41, 0x9EDAFB41, 0x9EDBFB41, 0x9EDCFB41, 0x9EDDFB41, 0x9EDEFB41, 0x9EDFFB41, 0x9EE0FB41, 0x9EE1FB41, 0x9EE2FB41, 0x9EE3FB41, 0x9EE4FB41, 0x9EE5FB41, 0x9EE6FB41, 0x9EE7FB41, + 0x9EE8FB41, 0x9EE9FB41, 0x9EEAFB41, 0x9EEBFB41, 0x9EECFB41, 0x9EEDFB41, 0x9EEEFB41, 0x9EEFFB41, 0x9EF0FB41, 0x9EF1FB41, 0x9EF2FB41, 0x9EF3FB41, 0x9EF4FB41, 0x9EF5FB41, 0x9EF6FB41, + 0x9EF7FB41, 0x9EF8FB41, 0x9EF9FB41, 0x9EFAFB41, 0x9EFBFB41, 0x9EFCFB41, 0x9EFDFB41, 0x9EFEFB41, 0x9EFFFB41, 0x9F00FB41, 0x9F01FB41, 0x9F02FB41, 0x9F03FB41, 0x9F04FB41, 0x9F05FB41, + 0x9F06FB41, 0x9F07FB41, 0x9F08FB41, 0x9F09FB41, 0x9F0AFB41, 0x9F0BFB41, 0x9F0CFB41, 0x9F0DFB41, 0x9F0EFB41, 0x9F0FFB41, 0x9F10FB41, 0x9F11FB41, 0x9F12FB41, 0x9F13FB41, 0x9F14FB41, + 0x9F15FB41, 0x9F16FB41, 0x9F17FB41, 0x9F18FB41, 0x9F19FB41, 0x9F1AFB41, 0x9F1BFB41, 0x9F1CFB41, 0x9F1DFB41, 0x9F1EFB41, 0x9F1FFB41, 0x9F20FB41, 0x9F21FB41, 0x9F22FB41, 0x9F23FB41, + 0x9F24FB41, 0x9F25FB41, 0x9F26FB41, 0x9F27FB41, 0x9F28FB41, 0x9F29FB41, 0x9F2AFB41, 0x9F2BFB41, 0x9F2CFB41, 0x9F2DFB41, 0x9F2EFB41, 0x9F2FFB41, 0x9F30FB41, 0x9F31FB41, 0x9F32FB41, + 0x9F33FB41, 0x9F34FB41, 0x9F35FB41, 0x9F36FB41, 0x9F37FB41, 0x9F38FB41, 0x9F39FB41, 0x9F3AFB41, 0x9F3BFB41, 0x9F3CFB41, 0x9F3DFB41, 0x9F3EFB41, 0x9F3FFB41, 0x9F40FB41, 0x9F41FB41, + 0x9F42FB41, 0x9F43FB41, 0x9F44FB41, 0x9F45FB41, 0x9F46FB41, 0x9F47FB41, 0x9F48FB41, 0x9F49FB41, 0x9F4AFB41, 0x9F4BFB41, 0x9F4CFB41, 0x9F4DFB41, 0x9F4EFB41, 0x9F4FFB41, 0x9F50FB41, + 0x9F51FB41, 0x9F52FB41, 0x9F53FB41, 0x9F54FB41, 0x9F55FB41, 0x9F56FB41, 0x9F57FB41, 0x9F58FB41, 0x9F59FB41, 0x9F5AFB41, 0x9F5BFB41, 0x9F5CFB41, 0x9F5DFB41, 0x9F5EFB41, 0x9F5FFB41, + 0x9F60FB41, 0x9F61FB41, 0x9F62FB41, 0x9F63FB41, 0x9F64FB41, 0x9F65FB41, 0x9F66FB41, 0x9F67FB41, 0x9F68FB41, 0x9F69FB41, 0x9F6AFB41, 0x9F6BFB41, 0x9F6CFB41, 0x9F6DFB41, 0x9F6EFB41, + 0x9F6FFB41, 0x9F70FB41, 0x9F71FB41, 0x9F72FB41, 0x9F73FB41, 0x9F74FB41, 0x9F75FB41, 0x9F76FB41, 0x9F77FB41, 0x9F78FB41, 0x9F79FB41, 0x9F7AFB41, 0x9F7BFB41, 0x9F7CFB41, 0x9F7DFB41, + 0x9F7EFB41, 0x9F7FFB41, 0x9F80FB41, 0x9F81FB41, 0x9F82FB41, 0x9F83FB41, 0x9F84FB41, 0x9F85FB41, 0x9F86FB41, 0x9F87FB41, 0x9F88FB41, 0x9F89FB41, 0x9F8AFB41, 0x9F8BFB41, 0x9F8CFB41, + 0x9F8DFB41, 0x9F8EFB41, 0x9F8FFB41, 0x9F90FB41, 0x9F91FB41, 0x9F92FB41, 0x9F93FB41, 0x9F94FB41, 0x9F95FB41, 0x9F96FB41, 0x9F97FB41, 0x9F98FB41, 0x9F99FB41, 0x9F9AFB41, 0x9F9BFB41, + 0x9F9CFB41, 0x9F9DFB41, 0x9F9EFB41, 0x9F9FFB41, 0x9FA0FB41, 0x9FA1FB41, 0x9FA2FB41, 0x9FA3FB41, 0x9FA4FB41, 0x9FA5FB41, 0x9FA6FB41, 0x9FA7FB41, 0x9FA8FB41, 0x9FA9FB41, 0x9FAAFB41, + 0x9FABFB41, 0x9FACFB41, 0x9FADFB41, 0x9FAEFB41, 0x9FAFFB41, 0x9FB0FB41, 0x9FB1FB41, 0x9FB2FB41, 0x9FB3FB41, 0x9FB4FB41, 0x9FB5FB41, 0x9FB6FB41, 0x9FB7FB41, 0x9FB8FB41, 0x9FB9FB41, + 0x9FBAFB41, 0x9FBBFB41, 0x9FBCFB41, 0x9FBDFB41, 0x9FBEFB41, 0x9FBFFB41, 0x9FC0FB41, 0x9FC1FB41, 0x9FC2FB41, 0x9FC3FB41, 0x9FC4FB41, 0x9FC5FB41, 0x9FC6FB41, 0x9FC7FB41, 0x9FC8FB41, + 0x9FC9FB41, 0x9FCAFB41, 0x9FCBFB41, 0x9FCCFB41, 0x9FCDFB41, 0x9FCEFB41, 0x9FCFFB41, 0x9FD0FB41, 0x9FD1FB41, 0x9FD2FB41, 0x9FD3FB41, 0x9FD4FB41, 0x9FD5FB41, 0x9FD6FBC1, 0x9FD7FBC1, + 0x9FD8FBC1, 0x9FD9FBC1, 0x9FDAFBC1, 0x9FDBFBC1, 0x9FDCFBC1, 0x9FDDFBC1, 0x9FDEFBC1, 0x9FDFFBC1, 0x9FE0FBC1, 0x9FE1FBC1, 0x9FE2FBC1, 0x9FE3FBC1, 0x9FE4FBC1, 0x9FE5FBC1, 0x9FE6FBC1, + 0x9FE7FBC1, 0x9FE8FBC1, 0x9FE9FBC1, 0x9FEAFBC1, 0x9FEBFBC1, 0x9FECFBC1, 0x9FEDFBC1, 0x9FEEFBC1, 0x9FEFFBC1, 0x9FF0FBC1, 0x9FF1FBC1, 0x9FF2FBC1, 0x9FF3FBC1, 0x9FF4FBC1, 0x9FF5FBC1, + 0x9FF6FBC1, 0x9FF7FBC1, 0x9FF8FBC1, 0x9FF9FBC1, 0x9FFAFBC1, 0x9FFBFBC1, 0x9FFCFBC1, 0x9FFDFBC1, 0x9FFEFBC1, 0x9FFFFBC1, 0x3DBF, 0x3DC0, 0x3DC1, 0x3DC2, 0x3DC3, + 0x3DC4, 0x3DC5, 0x3DC6, 0x3DC7, 0x3DC8, 0x3DC9, 0x3DCA, 0x3DCB, 0x3DCC, 0x3DCD, 0x3DCE, 0x3DCF, 0x3DD0, 0x3DD1, 0x3DD2, + 0x3DD3, 0x3DD4, 0x3DD5, 0x3DD6, 0x3DD7, 0x3DD8, 0x3DD9, 0x3DDA, 0x3DDB, 0x3DDC, 0x3DDD, 0x3DDE, 0x3DDF, 0x3DE0, 0x3DE1, + 0x3DE2, 0x3DE3, 0x3DE4, 0x3DE5, 0x3DE6, 0x3DE7, 0x3DE8, 0x3DE9, 0x3DEA, 0x3DEB, 0x3DEC, 0x3DED, 0x3DEE, 0x3DEF, 0x3DF0, + 0x3DF1, 0x3DF2, 0x3DF3, 0x3DF4, 0x3DF5, 0x3DF6, 0x3DF7, 0x3DF8, 0x3DF9, 0x3DFA, 0x3DFB, 0x3DFC, 0x3DFD, 0x3DFE, 0x3DFF, + 0x3E00, 0x3E01, 0x3E02, 0x3E03, 0x3E04, 0x3E05, 0x3E06, 0x3E07, 0x3E08, 0x3E09, 0x3E0A, 0x3E0B, 0x3E0C, 0x3E0D, 0x3E0E, + 0x3E0F, 0x3E10, 0x3E11, 0x3E12, 0x3E13, 0x3E14, 0x3E15, 0x3E16, 0x3E17, 0x3E18, 0x3E19, 0x3E1A, 0x3E1B, 0x3E1C, 0x3E1D, + 0x3E1E, 0x3E1F, 0x3E20, 0x3E21, 0x3E22, 0x3E23, 0x3E24, 0x3E25, 0x3E26, 0x3E27, 0x3E28, 0x3E29, 0x3E2A, 0x3E2B, 0x3E2C, + 0x3E2D, 0x3E2E, 0x3E2F, 0x3E30, 0x3E31, 0x3E32, 0x3E33, 0x3E34, 0x3E35, 0x3E36, 0x3E37, 0x3E38, 0x3E39, 0x3E3A, 0x3E3B, + 0x3E3C, 0x3E3D, 0x3E3E, 0x3E3F, 0x3E40, 0x3E41, 0x3E42, 0x3E43, 0x3E44, 0x3E45, 0x3E46, 0x3E47, 0x3E48, 0x3E49, 0x3E4A, + 0x3E4B, 0x3E4C, 0x3E4D, 0x3E4E, 0x3E4F, 0x3E50, 0x3E51, 0x3E52, 0x3E53, 0x3E54, 0x3E55, 0x3E56, 0x3E57, 0x3E58, 0x3E59, + 0x3E5A, 0x3E5B, 0x3E5C, 0x3E5D, 0x3E5E, 0x3E5F, 0x3E60, 0x3E61, 0x3E62, 0x3E63, 0x3E64, 0x3E65, 0x3E66, 0x3E67, 0x3E68, + 0x3E69, 0x3E6A, 0x3E6B, 0x3E6C, 0x3E6D, 0x3E6E, 0x3E6F, 0x3E70, 0x3E71, 0x3E72, 0x3E73, 0x3E74, 0x3E75, 0x3E76, 0x3E77, + 0x3E78, 0x3E79, 0x3E7A, 0x3E7B, 0x3E7C, 0x3E7D, 0x3E7E, 0x3E7F, 0x3E80, 0x3E81, 0x3E82, 0x3E83, 0x3E84, 0x3E85, 0x3E86, + 0x3E87, 0x3E88, 0x3E89, 0x3E8A, 0x3E8B, 0x3E8C, 0x3E8D, 0x3E8E, 0x3E8F, 0x3E90, 0x3E91, 0x3E92, 0x3E93, 0x3E94, 0x3E95, + 0x3E96, 0x3E97, 0x3E98, 0x3E99, 0x3E9A, 0x3E9B, 0x3E9C, 0x3E9D, 0x3E9E, 0x3E9F, 0x3EA0, 0x3EA1, 0x3EA2, 0x3EA3, 0x3EA4, + 0x3EA5, 0x3EA6, 0x3EA7, 0x3EA8, 0x3EA9, 0x3EAA, 0x3EAB, 0x3EAC, 0x3EAD, 0x3EAE, 0x3EAF, 0x3EB0, 0x3EB1, 0x3EB2, 0x3EB3, + 0x3EB4, 0x3EB5, 0x3EB6, 0x3EB7, 0x3EB8, 0x3EB9, 0x3EBA, 0x3EBB, 0x3EBC, 0x3EBD, 0x3EBE, 0x3EBF, 0x3EC0, 0x3EC1, 0x3EC2, + 0x3EC3, 0x3EC4, 0x3EC5, 0x3EC6, 0x3EC7, 0x3EC8, 0x3EC9, 0x3ECA, 0x3ECB, 0x3ECC, 0x3ECD, 0x3ECE, 0x3ECF, 0x3ED0, 0x3ED1, + 0x3ED2, 0x3ED3, 0x3ED4, 0x3ED5, 0x3ED6, 0x3ED7, 0x3ED8, 0x3ED9, 0x3EDA, 0x3EDB, 0x3EDC, 0x3EDD, 0x3EDE, 0x3EDF, 0x3EE0, + 0x3EE1, 0x3EE2, 0x3EE3, 0x3EE4, 0x3EE5, 0x3EE6, 0x3EE7, 0x3EE8, 0x3EE9, 0x3EEA, 0x3EEB, 0x3EEC, 0x3EED, 0x3EEE, 0x3EEF, + 0x3EF0, 0x3EF1, 0x3EF2, 0x3EF3, 0x3EF4, 0x3EF5, 0x3EF6, 0x3EF7, 0x3EF8, 0x3EF9, 0x3EFA, 0x3EFB, 0x3EFC, 0x3EFD, 0x3EFE, + 0x3EFF, 0x3F00, 0x3F01, 0x3F02, 0x3F03, 0x3F04, 0x3F05, 0x3F06, 0x3F07, 0x3F08, 0x3F09, 0x3F0A, 0x3F0B, 0x3F0C, 0x3F0D, + 0x3F0E, 0x3F0F, 0x3F10, 0x3F11, 0x3F12, 0x3F13, 0x3F14, 0x3F15, 0x3F16, 0x3F17, 0x3F18, 0x3F19, 0x3F1A, 0x3F1B, 0x3F1C, + 0x3F1D, 0x3F1E, 0x3F1F, 0x3F20, 0x3F21, 0x3F22, 0x3F23, 0x3F24, 0x3F25, 0x3F26, 0x3F27, 0x3F28, 0x3F29, 0x3F2A, 0x3F2B, + 0x3F2C, 0x3F2D, 0x3F2E, 0x3F2F, 0x3F30, 0x3F31, 0x3F32, 0x3F33, 0x3F34, 0x3F35, 0x3F36, 0x3F37, 0x3F38, 0x3F39, 0x3F3A, + 0x3F3B, 0x3F3C, 0x3F3D, 0x3F3E, 0x3F3F, 0x3F40, 0x3F41, 0x3F42, 0x3F43, 0x3F44, 0x3F45, 0x3F46, 0x3F47, 0x3F48, 0x3F49, + 0x3F4A, 0x3F4B, 0x3F4C, 0x3F4D, 0x3F4E, 0x3F4F, 0x3F50, 0x3F51, 0x3F52, 0x3F53, 0x3F54, 0x3F55, 0x3F56, 0x3F57, 0x3F58, + 0x3F59, 0x3F5A, 0x3F5B, 0x3F5C, 0x3F5D, 0x3F5E, 0x3F5F, 0x3F60, 0x3F61, 0x3F62, 0x3F63, 0x3F64, 0x3F65, 0x3F66, 0x3F67, + 0x3F68, 0x3F69, 0x3F6A, 0x3F6B, 0x3F6C, 0x3F6D, 0x3F6E, 0x3F6F, 0x3F70, 0x3F71, 0x3F72, 0x3F73, 0x3F74, 0x3F75, 0x3F76, + 0x3F77, 0x3F78, 0x3F79, 0x3F7A, 0x3F7B, 0x3F7C, 0x3F7D, 0x3F7E, 0x3F7F, 0x3F80, 0x3F81, 0x3F82, 0x3F83, 0x3F84, 0x3F85, + 0x3F86, 0x3F87, 0x3F88, 0x3F89, 0x3F8A, 0x3F8B, 0x3F8C, 0x3F8D, 0x3F8E, 0x3F8F, 0x3F90, 0x3F91, 0x3F92, 0x3F93, 0x3F94, + 0x3F95, 0x3F96, 0x3F97, 0x3F98, 0x3F99, 0x3F9A, 0x3F9B, 0x3F9C, 0x3F9D, 0x3F9E, 0x3F9F, 0x3FA0, 0x3FA1, 0x3FA2, 0x3FA3, + 0x3FA4, 0x3FA5, 0x3FA6, 0x3FA7, 0x3FA8, 0x3FA9, 0x3FAA, 0x3FAB, 0x3FAC, 0x3FAD, 0x3FAE, 0x3FAF, 0x3FB0, 0x3FB1, 0x3FB2, + 0x3FB3, 0x3FB4, 0x3FB5, 0x3FB6, 0x3FB7, 0x3FB8, 0x3FB9, 0x3FBA, 0x3FBB, 0x3FBC, 0x3FBD, 0x3FBE, 0x3FBF, 0x3FC0, 0x3FC1, + 0x3FC2, 0x3FC3, 0x3FC4, 0x3FC5, 0x3FC6, 0x3FC7, 0x3FC8, 0x3FC9, 0x3FCA, 0x3FCB, 0x3FCC, 0x3FCD, 0x3FCE, 0x3FCF, 0x3FD0, + 0x3FD1, 0x3FD2, 0x3FD3, 0x3FD4, 0x3FD5, 0x3FD6, 0x3FD7, 0x3FD8, 0x3FD9, 0x3FDA, 0x3FDB, 0x3FDC, 0x3FDD, 0x3FDE, 0x3FDF, + 0x3FE0, 0x3FE1, 0x3FE2, 0x3FE3, 0x3FE4, 0x3FE5, 0x3FE6, 0x3FE7, 0x3FE8, 0x3FE9, 0x3FEA, 0x3FEB, 0x3FEC, 0x3FED, 0x3FEE, + 0x3FEF, 0x3FF0, 0x3FF1, 0x3FF2, 0x3FF3, 0x3FF4, 0x3FF5, 0x3FF6, 0x3FF7, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, + 0x3FFE, 0x3FFF, 0x4000, 0x4001, 0x4002, 0x4003, 0x4004, 0x4005, 0x4006, 0x4007, 0x4008, 0x4009, 0x400A, 0x400B, 0x400C, + 0x400D, 0x400E, 0x400F, 0x4010, 0x4011, 0x4012, 0x4013, 0x4014, 0x4015, 0x4016, 0x4017, 0x4018, 0x4019, 0x401A, 0x401B, + 0x401C, 0x401D, 0x401E, 0x401F, 0x4020, 0x4021, 0x4022, 0x4023, 0x4024, 0x4025, 0x4026, 0x4027, 0x4028, 0x4029, 0x402A, + 0x402B, 0x402C, 0x402D, 0x402E, 0x402F, 0x4030, 0x4031, 0x4032, 0x4033, 0x4034, 0x4035, 0x4036, 0x4037, 0x4038, 0x4039, + 0x403A, 0x403B, 0x403C, 0x403D, 0x403E, 0x403F, 0x4040, 0x4041, 0x4042, 0x4043, 0x4044, 0x4045, 0x4046, 0x4047, 0x4048, + 0x4049, 0x404A, 0x404B, 0x404C, 0x404D, 0x404E, 0x404F, 0x4050, 0x4051, 0x4052, 0x4053, 0x4054, 0x4055, 0x4056, 0x4057, + 0x4058, 0x4059, 0x405A, 0x405B, 0x405C, 0x405D, 0x405E, 0x405F, 0x4060, 0x4061, 0x4062, 0x4063, 0x4064, 0x4065, 0x4066, + 0x4067, 0x4068, 0x4069, 0x406A, 0x406B, 0x406C, 0x406D, 0x406E, 0x406F, 0x4070, 0x4071, 0x4072, 0x4073, 0x4074, 0x4075, + 0x4076, 0x4077, 0x4078, 0x4079, 0x407A, 0x407B, 0x407C, 0x407D, 0x407E, 0x407F, 0x4080, 0x4081, 0x4082, 0x4083, 0x4084, + 0x4085, 0x4086, 0x4087, 0x4088, 0x4089, 0x408A, 0x408B, 0x408C, 0x408D, 0x408E, 0x408F, 0x4090, 0x4091, 0x4092, 0x4093, + 0x4094, 0x4095, 0x4096, 0x4097, 0x4098, 0x4099, 0x409A, 0x409B, 0x409C, 0x409D, 0x409E, 0x409F, 0x40A0, 0x40A1, 0x40A2, + 0x40A3, 0x40A4, 0x40A5, 0x40A6, 0x40A7, 0x40A8, 0x40A9, 0x40AA, 0x40AB, 0x40AC, 0x40AD, 0x40AE, 0x40AF, 0x40B0, 0x40B1, + 0x40B2, 0x40B3, 0x40B4, 0x40B5, 0x40B6, 0x40B7, 0x40B8, 0x40B9, 0x40BA, 0x40BB, 0x40BC, 0x40BD, 0x40BE, 0x40BF, 0x40C0, + 0x40C1, 0x40C2, 0x40C3, 0x40C4, 0x40C5, 0x40C6, 0x40C7, 0x40C8, 0x40C9, 0x40CA, 0x40CB, 0x40CC, 0x40CD, 0x40CE, 0x40CF, + 0x40D0, 0x40D1, 0x40D2, 0x40D3, 0x40D4, 0x40D5, 0x40D6, 0x40D7, 0x40D8, 0x40D9, 0x40DA, 0x40DB, 0x40DC, 0x40DD, 0x40DE, + 0x40DF, 0x40E0, 0x40E1, 0x40E2, 0x40E3, 0x40E4, 0x40E5, 0x40E6, 0x40E7, 0x40E8, 0x40E9, 0x40EA, 0x40EB, 0x40EC, 0x40ED, + 0x40EE, 0x40EF, 0x40F0, 0x40F1, 0x40F2, 0x40F3, 0x40F4, 0x40F5, 0x40F6, 0x40F7, 0x40F8, 0x40F9, 0x40FA, 0x40FB, 0x40FC, + 0x40FD, 0x40FE, 0x40FF, 0x4100, 0x4101, 0x4102, 0x4103, 0x4104, 0x4105, 0x4106, 0x4107, 0x4108, 0x4109, 0x410A, 0x410B, + 0x410C, 0x410D, 0x410E, 0x410F, 0x4110, 0x4111, 0x4112, 0x4113, 0x4114, 0x4115, 0x4116, 0x4117, 0x4118, 0x4119, 0x411A, + 0x411B, 0x411C, 0x411D, 0x411E, 0x411F, 0x4120, 0x4121, 0x4122, 0x4123, 0x4124, 0x4125, 0x4126, 0x4127, 0x4128, 0x4129, + 0x412A, 0x412B, 0x412C, 0x412D, 0x412E, 0x412F, 0x4130, 0x4131, 0x4132, 0x4133, 0x4134, 0x4135, 0x4136, 0x4137, 0x4138, + 0x4139, 0x413A, 0x413B, 0x413C, 0x413D, 0x413E, 0x413F, 0x4140, 0x4141, 0x4142, 0x4143, 0x4144, 0x4145, 0x4146, 0x4147, + 0x4148, 0x4149, 0x414A, 0x414B, 0x414C, 0x414D, 0x414E, 0x414F, 0x4150, 0x4151, 0x4152, 0x4153, 0x4154, 0x4155, 0x4156, + 0x4157, 0x4158, 0x4159, 0x415A, 0x415B, 0x415C, 0x415D, 0x415E, 0x415F, 0x4160, 0x4161, 0x4162, 0x4163, 0x4164, 0x4165, + 0x4166, 0x4167, 0x4168, 0x4169, 0x416A, 0x416B, 0x416C, 0x416D, 0x416E, 0x416F, 0x4170, 0x4171, 0x4172, 0x4173, 0x4174, + 0x4175, 0x4176, 0x4177, 0x4178, 0x4179, 0x417A, 0x417B, 0x417C, 0x417D, 0x417E, 0x417F, 0x4180, 0x4181, 0x4182, 0x4183, + 0x4184, 0x4185, 0x4186, 0x4187, 0x4188, 0x4189, 0x418A, 0x418B, 0x418C, 0x418D, 0x418E, 0x418F, 0x4190, 0x4191, 0x4192, + 0x4193, 0x4194, 0x4195, 0x4196, 0x4197, 0x4198, 0x4199, 0x419A, 0x419B, 0x419C, 0x419D, 0x419E, 0x419F, 0x41A0, 0x41A1, + 0x41A2, 0x41A3, 0x41A4, 0x41A5, 0x41A6, 0x41A7, 0x41A8, 0x41A9, 0x41AA, 0x41AB, 0x41AC, 0x41AD, 0x41AE, 0x41AF, 0x41B0, + 0x41B1, 0x41B2, 0x41B3, 0x41B4, 0x41B5, 0x41B6, 0x41B7, 0x41B8, 0x41B9, 0x41BA, 0x41BB, 0x41BC, 0x41BD, 0x41BE, 0x41BF, + 0x41C0, 0x41C1, 0x41C2, 0x41C3, 0x41C4, 0x41C5, 0x41C6, 0x41C7, 0x41C8, 0x41C9, 0x41CA, 0x41CB, 0x41CC, 0x41CD, 0x41CE, + 0x41CF, 0x41D0, 0x41D1, 0x41D2, 0x41D3, 0x41D4, 0x41D5, 0x41D6, 0x41D7, 0x41D8, 0x41D9, 0x41DA, 0x41DB, 0x41DC, 0x41DD, + 0x41DE, 0x41DF, 0x41E0, 0x41E1, 0x41E2, 0x41E3, 0x41E4, 0x41E5, 0x41E6, 0x41E7, 0x41E8, 0x41E9, 0x41EA, 0x41EB, 0x41EC, + 0x41ED, 0x41EE, 0x41EF, 0x41F0, 0x41F1, 0x41F2, 0x41F3, 0x41F4, 0x41F5, 0x41F6, 0x41F7, 0x41F8, 0x41F9, 0x41FA, 0x41FB, + 0x41FC, 0x41FD, 0x41FE, 0x41FF, 0x4200, 0x4201, 0x4202, 0x4203, 0x4204, 0x4205, 0x4206, 0x4207, 0x4208, 0x4209, 0x420A, + 0x420B, 0x420C, 0x420D, 0x420E, 0x420F, 0x4210, 0x4211, 0x4212, 0x4213, 0x4214, 0x4215, 0x4216, 0x4217, 0x4218, 0x4219, + 0x421A, 0x421B, 0x421C, 0x421D, 0x421E, 0x421F, 0x4220, 0x4221, 0x4222, 0x4223, 0x4224, 0x4225, 0x4226, 0x4227, 0x4228, + 0x4229, 0x422A, 0x422B, 0x422C, 0x422D, 0x422E, 0x422F, 0x4230, 0x4231, 0x4232, 0x4233, 0x4234, 0x4235, 0x4236, 0x4237, + 0x4238, 0x4239, 0x423A, 0x423B, 0x423C, 0x423D, 0x423E, 0x423F, 0x4240, 0x4241, 0x4242, 0x4243, 0x4244, 0x4245, 0x4246, + 0x4247, 0x4248, 0x4249, 0x424A, 0x424B, 0xA48DFBC1, 0xA48EFBC1, 0xA48FFBC1, 0xF41, 0xF42, 0xF43, 0xF44, 0xF45, 0xF46, 0xF47, + 0xF48, 0xF49, 0xF4A, 0xF4B, 0xF4C, 0xF4D, 0xF4E, 0xF4F, 0xF50, 0xF51, 0xF52, 0xF53, 0xF54, 0xF55, 0xF56, + 0xF57, 0xF58, 0xF59, 0xF5A, 0xF5B, 0xF5C, 0xF5D, 0xF5E, 0xF5F, 0xF60, 0xF61, 0xF62, 0xF63, 0xF64, 0xF65, + 0xF66, 0xF67, 0xF68, 0xF69, 0xF6A, 0xF6B, 0xF6C, 0xF6D, 0xF6E, 0xF6F, 0xF70, 0xF71, 0xF72, 0xF73, 0xF74, + 0xF75, 0xF76, 0xF77, 0xA4C7FBC1, 0xA4C8FBC1, 0xA4C9FBC1, 0xA4CAFBC1, 0xA4CBFBC1, 0xA4CCFBC1, 0xA4CDFBC1, 0xA4CEFBC1, 0xA4CFFBC1, 0x4252, 0x4253, 0x4254, + 0x4255, 0x4256, 0x4257, 0x4258, 0x4259, 0x425A, 0x425B, 0x425C, 0x425D, 0x425E, 0x425F, 0x4260, 0x4261, 0x4262, 0x4263, + 0x4264, 0x4265, 0x4266, 0x4267, 0x4268, 0x4269, 0x426A, 0x426B, 0x426E, 0x426C, 0x426F, 0x426D, 0x4270, 0x4271, 0x4272, + 0x4273, 0x4274, 0x4275, 0x4276, 0x4277, 0x4278, 0x4279, 0x424C, 0x424D, 0x424E, 0x424F, 0x4251, 0x4250, 0x22E, 0x285, + 0x375A, 0x375B, 0x375C, 0x375D, 0x375E, 0x375F, 0x3760, 0x3761, 0x3762, 0x3763, 0x3764, 0x3765, 0x3766, 0x3767, 0x3768, + 0x3769, 0x376A, 0x376B, 0x376C, 0x376D, 0x376E, 0x376F, 0x3770, 0x3771, 0x3772, 0x3773, 0x3774, 0x3775, 0x3776, 0x3777, + 0x3778, 0x3779, 0x377A, 0x377B, 0x377C, 0x377D, 0x377E, 0x377F, 0x3780, 0x3781, 0x3782, 0x3783, 0x3784, 0x3785, 0x3786, + 0x3787, 0x3788, 0x3789, 0x378A, 0x378B, 0x378C, 0x378D, 0x378E, 0x378F, 0x3790, 0x3791, 0x3792, 0x3793, 0x3794, 0x3795, + 0x3796, 0x3797, 0x3798, 0x3799, 0x379A, 0x379B, 0x379C, 0x379D, 0x379E, 0x379F, 0x37A0, 0x37A1, 0x37A2, 0x37A3, 0x37A4, + 0x37A5, 0x37A6, 0x37A7, 0x37A8, 0x37A9, 0x37AA, 0x37AB, 0x37AC, 0x37AD, 0x37AE, 0x37AF, 0x37B0, 0x37B1, 0x37B2, 0x37B3, + 0x37B4, 0x37B5, 0x37B6, 0x37B7, 0x37B8, 0x37B9, 0x37BA, 0x37BB, 0x37BC, 0x37BD, 0x37BE, 0x37BF, 0x37C0, 0x37C1, 0x37C2, + 0x37C3, 0x37C4, 0x37C5, 0x37C6, 0x37C7, 0x37C8, 0x37C9, 0x37CA, 0x37CB, 0x37CC, 0x37CD, 0x37CE, 0x37CF, 0x37D0, 0x37D1, + 0x37D2, 0x37D3, 0x37D4, 0x37D5, 0x37D6, 0x37D7, 0x37D8, 0x37D9, 0x37DA, 0x37DB, 0x37DC, 0x37DD, 0x37DE, 0x37DF, 0x37E0, + 0x37E1, 0x37E2, 0x37E3, 0x37E4, 0x37E5, 0x37E6, 0x37E7, 0x37E8, 0x37E9, 0x37EA, 0x37EB, 0x37EC, 0x37ED, 0x37EE, 0x37EF, + 0x37F0, 0x37F1, 0x37F2, 0x37F3, 0x37F4, 0x37F5, 0x37F6, 0x37F7, 0x37F8, 0x37F9, 0x37FA, 0x37FB, 0x37FC, 0x37FD, 0x37FE, + 0x37FF, 0x3800, 0x3801, 0x3802, 0x3803, 0x3804, 0x3805, 0x3806, 0x3807, 0x3808, 0x3809, 0x380A, 0x380B, 0x380C, 0x380D, + 0x380E, 0x380F, 0x3810, 0x3811, 0x3812, 0x3813, 0x3814, 0x3815, 0x3816, 0x3817, 0x3818, 0x3819, 0x381A, 0x381B, 0x381C, + 0x381D, 0x381E, 0x381F, 0x3820, 0x3821, 0x3822, 0x3823, 0x3824, 0x3825, 0x3826, 0x3827, 0x3828, 0x3829, 0x382A, 0x382B, + 0x382C, 0x382D, 0x382E, 0x382F, 0x3830, 0x3831, 0x3832, 0x3833, 0x3834, 0x3835, 0x3836, 0x3837, 0x3838, 0x3839, 0x383A, + 0x383B, 0x383C, 0x383D, 0x383E, 0x383F, 0x3840, 0x3841, 0x3842, 0x3843, 0x3844, 0x3845, 0x3846, 0x3847, 0x3848, 0x3849, + 0x384A, 0x384B, 0x384C, 0x384D, 0x384E, 0x384F, 0x3850, 0x3851, 0x3852, 0x3853, 0x3854, 0x3855, 0x3856, 0x3857, 0x3858, + 0x3859, 0x385A, 0x385B, 0x385C, 0x385D, 0x385E, 0x385F, 0x3860, 0x3861, 0x3862, 0x3863, 0x3864, 0x3865, 0x3866, 0x22F, + 0x286, 0x270, 0x37B2, 0x37C4, 0x37E1, 0x38653766, 0x38653778, 0x3865378D, 0x386637A1, 0x386537AC, 0x386637B2, 0x386637B4, 0x386537BA, 0x386537DF, 0x3865380E, + 0x38653825, 0x3866382B, 0x38653832, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x37C8, 0x382B, + 0xA62CFBC1, 0xA62DFBC1, 0xA62EFBC1, 0xA62FFBC1, 0xA630FBC1, 0xA631FBC1, 0xA632FBC1, 0xA633FBC1, 0xA634FBC1, 0xA635FBC1, 0xA636FBC1, 0xA637FBC1, 0xA638FBC1, 0xA639FBC1, 0xA63AFBC1, + 0xA63BFBC1, 0xA63CFBC1, 0xA63DFBC1, 0xA63EFBC1, 0xA63FFBC1, 0x2070, 0x2070, 0x2073, 0x2073, 0x2078, 0x2078, 0x208C, 0x208C, 0x2095, 0x2095, + 0x2129, 0x2129, 0x2150, 0x2150, 0x218C, 0x218C, 0x2193, 0x2193, 0x21A4, 0x21A4, 0x21AD, 0x21AD, 0x21AE, 0x21AE, 0x21BC, + 0x21BC, 0x21C1, 0x21C1, 0x21C6, 0x21C6, 0x21DB, 0x21DB, 0x215D, 0x215D, 0x2054, 0x2054, 0x20C0, 0x20C0, 0x20CB, 0x20CB, + 0x20E7, 0x20E7, 0x20E7, 0x20E7, 0x20E7, 0x20E7, 0x20E7, 0x0, 0x0, 0x0, 0x0, 0x393, 0x205E, 0x2080, 0x2088, + 0x211D, 0x218F, 0x2194, 0x2198, 0x2148, 0x0, 0x0, 0x3CA, 0x218E, 0x204F, 0x204F, 0x207F, 0x207F, 0x2067, 0x2067, + 0x2176, 0x2176, 0x207D, 0x207D, 0x2118, 0x2118, 0x2112, 0x2112, 0x215E, 0x215E, 0x2163, 0x2163, 0x2169, 0x2169, 0x2147, + 0x2147, 0x2187, 0x2187, 0x20E7, 0x20E7, 0x20E7, 0x20E7, 0x218F, 0x2198, 0x212E, 0x21B4, 0x3867, 0x3868, 0x3869, 0x386A, + 0x386B, 0x386C, 0x386D, 0x386E, 0x386F, 0x3870, 0x3871, 0x3872, 0x3873, 0x3874, 0x3875, 0x3876, 0x3877, 0x3878, 0x3879, + 0x387A, 0x387B, 0x387C, 0x387D, 0x387E, 0x387F, 0x3880, 0x3881, 0x3882, 0x3883, 0x3884, 0x3885, 0x3886, 0x3887, 0x3888, + 0x3889, 0x388A, 0x388B, 0x388C, 0x388D, 0x388E, 0x388F, 0x3890, 0x3891, 0x3892, 0x3893, 0x3894, 0x3895, 0x3896, 0x3897, + 0x3898, 0x3899, 0x389A, 0x389B, 0x389C, 0x389D, 0x389E, 0x389F, 0x38A0, 0x38A1, 0x38A2, 0x38A3, 0x38A4, 0x38A5, 0x38A6, + 0x38A7, 0x38A8, 0x38A9, 0x38AA, 0x38AB, 0x38AC, 0x38AD, 0x38AE, 0x38AF, 0x38B0, 0x38B1, 0x38B2, 0x38B3, 0x38B4, 0x38B5, + 0x38B6, 0x0, 0x0, 0x2D7, 0x287, 0x25F, 0x230, 0x238, 0x271, 0xA6F8FBC1, 0xA6F9FBC1, 0xA6FAFBC1, 0xA6FBFBC1, 0xA6FCFBC1, 0xA6FDFBC1, + 0xA6FEFBC1, 0xA6FFFBC1, 0x4D0, 0x4D1, 0x4D2, 0x4D3, 0x4D4, 0x4D5, 0x4D6, 0x4D7, 0x4D8, 0x4D9, 0x4DA, 0x4DB, 0x4DC, + 0x4DD, 0x4DE, 0x4DF, 0x4E0, 0x4E1, 0x4E2, 0x4E3, 0x4E4, 0x4E5, 0x4E6, 0x4E7, 0x4E8, 0x4E9, 0x4EA, 0x4EB, + 0x4EC, 0x4ED, 0x4EE, 0x4EF, 0x4F0, 0x4F1, 0x1F81, 0x1F81, 0x1F8C, 0x1F8C, 0x1D2B, 0x1D2B, 0x1F211E95, 0x1F211E95, 0x1F66, + 0x1F66, 0x1F67, 0x1F67, 0x1F68, 0x1F68, 0x1CE9, 0x1E75, 0x1C471C47, 0x1C471C47, 0x1DDD1C47, 0x1DDD1C47, 0x1EB51C47, 0x1EB51C47, 0x1EE31C47, 0x1EE31C47, + 0x1EE31C47, 0x1EE31C47, 0x1F0B1C47, 0x1F0B1C47, 0x1C8E, 0x1C8E, 0x1D70, 0x1D70, 0x1D71, 0x1D71, 0x1D72, 0x1D72, 0x1D7F, 0x1D7F, 0x1D81, + 0x1D81, 0x1E01, 0x1E01, 0x1DF9, 0x1DF9, 0x1DDD1DDD, 0x1DDD1DDD, 0x1E12, 0x1E12, 0x1E19, 0x1E19, 0x1E1A, 0x1E1A, 0x1E25, 0x1E25, + 0x1E26, 0x1E26, 0x1E3D, 0x1E3D, 0x1E70, 0x1E70, 0x1EE8, 0x1EE8, 0x1F0B1EE3, 0x1F0B1EE3, 0x1F3D, 0x1F3D, 0x1F54, 0x1F54, 0x1F55, + 0x1F55, 0x1F5A, 0x1F5A, 0x1F5E, 0x1F5E, 0x1F5F, 0x1F5F, 0x1F60, 0x1F60, 0x1F60, 0x1CA8, 0x1D9C, 0x1DB8, 0x1DD7, 0x1E6E, + 0x1E6F, 0x1EB0, 0x1F61, 0x1C8F, 0x1C8F, 0x1CE5, 0x1CE5, 0x1CF4, 0x1D0F, 0x1D0F, 0x1DA1, 0x1DA1, 0x1E33, 0x1E33, 0x1E71, + 0x1E71, 0x1E95, 0x1E95, 0x4F2, 0x4F3, 0x4F4, 0x1F82, 0x1F82, 0x1EC8, 0x1D97, 0x1F83, 0x1DCC, 0x1DCC, 0x1C83, 0x1C83, + 0x1C84, 0x1D24, 0x1C6F, 0x1C6F, 0x1CEB, 0x1CEB, 0x1C47, 0x1C47, 0x1DDD, 0x1DDD, 0x1EB5, 0x1EB5, 0x1CF4, 0x1CF4, 0x1D65, + 0x1D65, 0x1DB9, 0x1DB9, 0x1E33, 0x1E33, 0x1E71, 0x1E71, 0x1D25, 0x1CCF, 0x1CF8, 0x1D8D, 0x1D3A, 0xA7AFFBC1, 0x1D73, 0x1EB1, + 0x1D59, 0x1F08, 0x1C79, 0x1C79, 0x1E06, 0x1E06, 0xA7B8FBC1, 0xA7B9FBC1, 0xA7BAFBC1, 0xA7BBFBC1, 0xA7BCFBC1, 0xA7BDFBC1, 0xA7BEFBC1, 0xA7BFFBC1, 0xA7C0FBC1, + 0xA7C1FBC1, 0xA7C2FBC1, 0xA7C3FBC1, 0xA7C4FBC1, 0xA7C5FBC1, 0xA7C6FBC1, 0xA7C7FBC1, 0xA7C8FBC1, 0xA7C9FBC1, 0xA7CAFBC1, 0xA7CBFBC1, 0xA7CCFBC1, 0xA7CDFBC1, 0xA7CEFBC1, 0xA7CFFBC1, + 0xA7D0FBC1, 0xA7D1FBC1, 0xA7D2FBC1, 0xA7D3FBC1, 0xA7D4FBC1, 0xA7D5FBC1, 0xA7D6FBC1, 0xA7D7FBC1, 0xA7D8FBC1, 0xA7D9FBC1, 0xA7DAFBC1, 0xA7DBFBC1, 0xA7DCFBC1, 0xA7DDFBC1, 0xA7DEFBC1, + 0xA7DFFBC1, 0xA7E0FBC1, 0xA7E1FBC1, 0xA7E2FBC1, 0xA7E3FBC1, 0xA7E4FBC1, 0xA7E5FBC1, 0xA7E6FBC1, 0xA7E7FBC1, 0xA7E8FBC1, 0xA7E9FBC1, 0xA7EAFBC1, 0xA7EBFBC1, 0xA7ECFBC1, 0xA7EDFBC1, + 0xA7EEFBC1, 0xA7EFFBC1, 0xA7F0FBC1, 0xA7F1FBC1, 0xA7F2FBC1, 0xA7F3FBC1, 0xA7F4FBC1, 0xA7F5FBC1, 0xA7F6FBC1, 0x1D3F, 0x1D18, 0x1CAA1DDD, 0x1ED8, 0x1CF3, 0x1E1B, + 0x1DB6, 0x1D3E, 0x1DB7, 0x295C, 0x295D, 0x295E, 0x295F, 0x2960, 0x2961, 0x2962, 0x2963, 0x2964, 0x2965, 0x2966, 0x0, + 0x2967, 0x2968, 0x2969, 0x296A, 0x296B, 0x296C, 0x296D, 0x296E, 0x296F, 0x2970, 0x2971, 0x2972, 0x2973, 0x2974, 0x2975, + 0x2976, 0x2977, 0x2978, 0x2979, 0x297A, 0x297B, 0x297C, 0x297D, 0x297E, 0x297F, 0x2980, 0x2981, 0x2982, 0x51F, 0x520, + 0x521, 0x522, 0xA82CFBC1, 0xA82DFBC1, 0xA82EFBC1, 0xA82FFBC1, 0x1AA2, 0x1AA3, 0x1AA4, 0x1AA5, 0x1AA6, 0x1AA7, 0x523, 0x524, 0x1C1B, + 0x525, 0xA83AFBC1, 0xA83BFBC1, 0xA83CFBC1, 0xA83DFBC1, 0xA83EFBC1, 0xA83FFBC1, 0x2F00, 0x2F01, 0x2F02, 0x2F03, 0x2F04, 0x2F05, 0x2F06, 0x2F07, + 0x2F0C, 0x2F0D, 0x2F0E, 0x2F0F, 0x2F10, 0x2F11, 0x2F12, 0x2F13, 0x2F14, 0x2F15, 0x2F16, 0x2F17, 0x2F19, 0x2F1A, 0x2F1B, + 0x2F1C, 0x2F1F, 0x2F22, 0x2F23, 0x2F25, 0x2F26, 0x2F29, 0x2F2E, 0x2F2F, 0x2F30, 0x2F31, 0x2F2A, 0x2F2B, 0x2F2C, 0x2F2D, + 0x2F32, 0x2F18, 0x2F1D, 0x2F08, 0x2F09, 0x2F0A, 0x2F0B, 0x2F1E, 0x2F24, 0x2F27, 0x2F28, 0x2F20, 0x2F21, 0x2F33, 0x428, + 0x429, 0x294, 0x295, 0xA878FBC1, 0xA879FBC1, 0xA87AFBC1, 0xA87BFBC1, 0xA87CFBC1, 0xA87DFBC1, 0xA87EFBC1, 0xA87FFBC1, 0x0, 0x0, 0x2983, 0x2984, + 0x2985, 0x2986, 0x2987, 0x2988, 0x2989, 0x298A, 0x298B, 0x298C, 0x298D, 0x298E, 0x298F, 0x2990, 0x2991, 0x2992, 0x2993, + 0x2994, 0x2995, 0x2996, 0x2997, 0x2998, 0x2999, 0x299A, 0x299B, 0x299C, 0x299D, 0x299E, 0x299F, 0x29A0, 0x29A1, 0x29A2, + 0x29A3, 0x29A4, 0x29A5, 0x29A6, 0x29A7, 0x29A8, 0x29A9, 0x29AA, 0x29AB, 0x29AC, 0x29AD, 0x29AE, 0x29AF, 0x29B0, 0x29B1, + 0x29B2, 0x29B3, 0x29B4, 0x29B5, 0x29B6, 0x29B7, 0x29B8, 0x29B9, 0x29BA, 0x29BB, 0x29BC, 0x29BD, 0x29BE, 0x29BF, 0x29C0, + 0x29C1, 0x29C2, 0x29C3, 0x29C4, 0x29C5, 0x0, 0xA8C6FBC1, 0xA8C7FBC1, 0xA8C8FBC1, 0xA8C9FBC1, 0xA8CAFBC1, 0xA8CBFBC1, 0xA8CCFBC1, 0xA8CDFBC1, 0x290, + 0x291, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0xA8DAFBC1, 0xA8DBFBC1, 0xA8DCFBC1, 0xA8DDFBC1, + 0xA8DEFBC1, 0xA8DFFBC1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x26A4, 0x26A4, 0x26A4, 0x26A4, 0x26A4, 0x26A4, 0x3E8, 0x3E9, 0x3EA, 0x26A5, + 0x3EB, 0x265C, 0xA8FEFBC1, 0xA8FFFBC1, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x3014, + 0x3015, 0x3016, 0x3017, 0x3018, 0x3019, 0x301A, 0x301B, 0x301C, 0x301D, 0x301E, 0x301F, 0x3020, 0x3021, 0x3022, 0x3023, + 0x3024, 0x3025, 0x3026, 0x3027, 0x3028, 0x3029, 0x302A, 0x302B, 0x302C, 0x302D, 0x302E, 0x302F, 0x3030, 0x3031, 0x3032, + 0x3033, 0x3034, 0x0, 0x0, 0x0, 0x42E, 0x298, 0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5, 0x2FF6, 0x2FF7, + 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB, 0x2FFC, 0x2FFD, 0x2FFE, 0x2FFF, 0x3000, 0x3001, 0x3002, 0x3003, 0x3004, 0x3005, 0x3006, + 0x3007, 0x3008, 0x3009, 0x300A, 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3010, 0x3011, 0x3012, 0x3013, 0xA954FBC1, 0xA955FBC1, + 0xA956FBC1, 0xA957FBC1, 0xA958FBC1, 0xA959FBC1, 0xA95AFBC1, 0xA95BFBC1, 0xA95CFBC1, 0xA95DFBC1, 0xA95EFBC1, 0x2D8, 0x3C54, 0x3C55, 0x3C56, 0x3C57, 0x3C58, + 0x3C59, 0x3C5A, 0x3C5B, 0x3C5C, 0x3C5D, 0x3C5E, 0x3C5F, 0x3C60, 0x3C61, 0x3C62, 0x3C63, 0x3C64, 0x3C65, 0x3C66, 0x3C67, + 0x3C68, 0x3C69, 0x3C6A, 0x3C6B, 0x3C6C, 0x3C6D, 0x3C6E, 0x3C6F, 0x3C70, 0xA97DFBC1, 0xA97EFBC1, 0xA97FFBC1, 0x0, 0x0, 0x0, + 0x0, 0x329D, 0x329E, 0x329F, 0x32A0, 0x32A1, 0x32A2, 0x32A3, 0x32A4, 0x32A5, 0x32A6, 0x32A7, 0x32A8, 0x32A9, 0x32AA, + 0x32AB, 0x32AC, 0x32AD, 0x32AE, 0x32AF, 0x32B0, 0x32B1, 0x32B2, 0x32B3, 0x32B4, 0x32B5, 0x32B6, 0x32B7, 0x32B8, 0x32B9, + 0x32BA, 0x32BB, 0x32BC, 0x32BD, 0x32BE, 0x32BF, 0x32C0, 0x32C1, 0x32C2, 0x32C3, 0x32C5, 0x32C5, 0x32C7, 0x32C8, 0x32C9, + 0x32CA, 0x32CB, 0x32CC, 0x0, 0x32CD, 0x32D6, 0x32CF, 0x32D0, 0x32D1, 0x32D2, 0x32D4, 0x32D5, 0x32CE, 0x32D3, 0x32C4, + 0x32C6, 0x32D7, 0x2CD, 0x2CE, 0x2CF, 0x2D0, 0x2D1, 0x2D2, 0x25B, 0x2A3, 0x2A4, 0x2D3, 0x2D4, 0x2D5, 0x2D6, + 0xA9CEFBC1, 0x1BFF, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0xA9DAFBC1, 0xA9DBFBC1, 0xA9DCFBC1, + 0xA9DDFBC1, 0x42F, 0x430, 0x303E, 0x3046, 0x3050, 0x3066, 0x307F, 0x30C9, 0x1C00, 0x3058, 0x307A, 0x303C, 0x303F, 0x304B, + 0x3052, 0x3060, 0x3063, 0x3067, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x309B, + 0x306C, 0x306F, 0x307D, 0x3080, 0xA9FFFBC1, 0x3212, 0x3213, 0x3214, 0x3215, 0x3216, 0x3217, 0x3218, 0x3219, 0x321A, 0x321B, + 0x321C, 0x321D, 0x321E, 0x321F, 0x3220, 0x3221, 0x3222, 0x3223, 0x3224, 0x3225, 0x3226, 0x3227, 0x3228, 0x3229, 0x322A, + 0x322B, 0x322C, 0x322D, 0x322E, 0x322F, 0x3230, 0x3231, 0x3232, 0x3233, 0x3234, 0x3235, 0x3236, 0x3237, 0x3238, 0x3239, + 0x323A, 0x323F, 0x3240, 0x3241, 0x3242, 0x3243, 0x3244, 0x3245, 0x3246, 0x3247, 0x3248, 0x323B, 0x323C, 0x323D, 0x323E, + 0xAA37FBC1, 0xAA38FBC1, 0xAA39FBC1, 0xAA3AFBC1, 0xAA3BFBC1, 0xAA3CFBC1, 0xAA3DFBC1, 0xAA3EFBC1, 0xAA3FFBC1, 0x3249, 0x324A, 0x324B, 0x324C, 0x324D, 0x324E, + 0x324F, 0x3250, 0x3251, 0x3252, 0x3253, 0x3254, 0x3255, 0x3256, 0xAA4EFBC1, 0xAA4FFBC1, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0xAA5AFBC1, 0xAA5BFBC1, 0x431, 0x2A5, 0x2A6, 0x2A7, 0x303B, 0x3044, 0x3047, 0x304A, + 0x3051, 0x3057, 0x305B, 0x305D, 0x305F, 0x3062, 0x306E, 0x3072, 0x3093, 0x3096, 0x3098, 0x3078, 0x1C01, 0x3099, 0x304D, + 0x3086, 0x30E1, 0x30E2, 0x30E3, 0x54D, 0x54E, 0x54F, 0x3087, 0x30DE, 0x30DF, 0x30E0, 0x3048, 0x3054, 0x2DE3, 0x2DE4, + 0x2DE5, 0x2DE6, 0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, 0x2DEF, 0x2DF0, 0x2DF1, 0x2DF2, 0x2DF3, + 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA, 0x2DFB, 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0x2E00, 0x2E01, 0x2E02, + 0x2E03, 0x2E04, 0x2E05, 0x2E06, 0x2E07, 0x2E08, 0x2E09, 0x2E0A, 0x2E0B, 0x2E0C, 0x2E0D, 0x2E0E, 0x2E0F, 0x2E10, 0x2E11, + 0x2E12, 0x2E13, 0x2E14, 0x2E15, 0x2E16, 0x2E17, 0x2E18, 0x2E19, 0x2E1A, 0x2E1B, 0x2E1C, 0x2E1D, 0x2E1E, 0x2E1F, 0x2E20, + 0x2E21, 0x0, 0x2E22, 0x0, 0x2E23, 0xAAC3FBC1, 0xAAC4FBC1, 0xAAC5FBC1, 0xAAC6FBC1, 0xAAC7FBC1, 0xAAC8FBC1, 0xAAC9FBC1, 0xAACAFBC1, 0xAACBFBC1, 0xAACCFBC1, + 0xAACDFBC1, 0xAACEFBC1, 0xAACFFBC1, 0xAAD0FBC1, 0xAAD1FBC1, 0xAAD2FBC1, 0xAAD3FBC1, 0xAAD4FBC1, 0xAAD5FBC1, 0xAAD6FBC1, 0xAAD7FBC1, 0xAAD8FBC1, 0xAAD9FBC1, 0xAADAFBC1, 0x2E24, + 0x2E25, 0x1C02, 0x3F1, 0x3F2, 0x2939, 0x293A, 0x293B, 0x293C, 0x293D, 0x293E, 0x293F, 0x2940, 0x2941, 0x2942, 0x2943, + 0x294C, 0x294D, 0x294E, 0x294F, 0x2950, 0x2A8, 0x272, 0x291D, 0x1C03, 0x1C04, 0x2951, 0x295B, 0xAAF7FBC1, 0xAAF8FBC1, 0xAAF9FBC1, + 0xAAFAFBC1, 0xAAFBFBC1, 0xAAFCFBC1, 0xAAFDFBC1, 0xAAFEFBC1, 0xAAFFFBC1, 0xAB00FBC1, 0x24D6, 0x24D7, 0x24D8, 0x24D9, 0x24DA, 0x24DB, 0xAB07FBC1, 0xAB08FBC1, + 0x259D, 0x259E, 0x259F, 0x25A0, 0x25A1, 0x25A2, 0xAB0FFBC1, 0xAB10FBC1, 0x257E, 0x257F, 0x2580, 0x2581, 0x2582, 0x2583, 0xAB17FBC1, + 0xAB18FBC1, 0xAB19FBC1, 0xAB1AFBC1, 0xAB1BFBC1, 0xAB1CFBC1, 0xAB1DFBC1, 0xAB1EFBC1, 0xAB1FFBC1, 0x25E0, 0x25E1, 0x25E2, 0x25E3, 0x25E4, 0x25E5, 0x25E6, + 0xAB27FBC1, 0x25F8, 0x25F9, 0x25FA, 0x25FB, 0x25FC, 0x25FD, 0x25FE, 0xAB2FFBC1, 0x1C59, 0x1C50, 0x1CAF, 0x1CB0, 0x1CB6, 0x1CEA, + 0x1CFC, 0x1D91, 0x1D8B, 0x1D8C, 0x1DB5, 0x1DD6, 0x1DDC, 0x1DE3, 0x1DEF, 0x1DF6, 0x1DEB, 0x1DE9, 0x1DEA, 0x1DEC, 0x1DED, + 0x1E37, 0x1E3C, 0x1E65, 0x1E66, 0x1E5B, 0x1E67, 0x1E68, 0x1E69, 0x1E86, 0x1EBA, 0x1EC4, 0x1EBE, 0x1EBF, 0x1EC7, 0x1F08, + 0x1F09, 0x1F0A, 0x1F04, 0x1F05, 0x1F06, 0x1F07, 0x1F1C, 0x4F5, 0x1D2B, 0x1D91, 0x1D87, 0x1EC7, 0x1F5B, 0x1F5C, 0x1DF8, + 0x1F5D, 0x1C5F, 0x1FE2, 0xAB66FBC1, 0xAB67FBC1, 0xAB68FBC1, 0xAB69FBC1, 0xAB6AFBC1, 0xAB6BFBC1, 0xAB6CFBC1, 0xAB6DFBC1, 0xAB6EFBC1, 0xAB6FFBC1, 0x337F, 0x3380, + 0x3381, 0x3382, 0x3383, 0x3384, 0x3385, 0x3386, 0x3387, 0x3388, 0x3389, 0x338A, 0x338B, 0x338C, 0x338D, 0x338E, 0x338F, + 0x3390, 0x3391, 0x3392, 0x3393, 0x3394, 0x3395, 0x3396, 0x3397, 0x3398, 0x3399, 0x339A, 0x339B, 0x339C, 0x339D, 0x339E, + 0x339F, 0x33A0, 0x33A1, 0x33A2, 0x33A3, 0x33A4, 0x33A5, 0x33A6, 0x33A7, 0x33A8, 0x33A9, 0x33AA, 0x33AB, 0x33AC, 0x33AD, + 0x33AE, 0x33AF, 0x33B0, 0x33B1, 0x33B2, 0x33B3, 0x33B4, 0x33B5, 0x33B6, 0x33B7, 0x33B8, 0x33B9, 0x33BA, 0x33BB, 0x33BC, + 0x33BD, 0x33BE, 0x33BF, 0x33C0, 0x33C1, 0x33C2, 0x33C3, 0x33C4, 0x33C5, 0x33C6, 0x33C7, 0x33C8, 0x33C9, 0x33CA, 0x33CB, + 0x33CC, 0x33CD, 0x33CE, 0x291E, 0x291F, 0x2920, 0x2921, 0x2922, 0x2923, 0x2924, 0x2925, 0x2926, 0x2927, 0x2928, 0x2929, + 0x292A, 0x292B, 0x292C, 0x292D, 0x292E, 0x292F, 0x2930, 0x2931, 0x2932, 0x2933, 0x2934, 0x2935, 0x2936, 0x2937, 0x2938, + 0x2952, 0x2953, 0x2954, 0x2955, 0x2956, 0x2957, 0x2958, 0x2959, 0x2944, 0x2945, 0x2946, 0x2947, 0x2948, 0x2949, 0x294A, + 0x294B, 0x2A9, 0x0, 0x295A, 0xABEEFBC1, 0xABEFFBC1, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, + 0x1C46, 0xABFAFBC1, 0xABFBFBC1, 0xABFCFBC1, 0xABFDFBC1, 0xABFEFBC1, 0xABFFFBC1, 0x3C733BF5, 0x3CD13C733BF5, 0x3CD23C733BF5, 0x3CD33C733BF5, 0x3CD43C733BF5, 0x3CD53C733BF5, 0x3CD63C733BF5, 0x3CD73C733BF5, + 0x3CD83C733BF5, 0x3CD93C733BF5, 0x3CDA3C733BF5, 0x3CDB3C733BF5, 0x3CDC3C733BF5, 0x3CDD3C733BF5, 0x3CDE3C733BF5, 0x3CDF3C733BF5, 0x3CE03C733BF5, 0x3CE13C733BF5, 0x3CE23C733BF5, 0x3CE33C733BF5, 0x3CE43C733BF5, 0x3CE53C733BF5, 0x3CE63C733BF5, + 0x3CE73C733BF5, 0x3CE83C733BF5, 0x3CE93C733BF5, 0x3CEA3C733BF5, 0x3CEB3C733BF5, 0x3C743BF5, 0x3CD13C743BF5, 0x3CD23C743BF5, 0x3CD33C743BF5, 0x3CD43C743BF5, 0x3CD53C743BF5, 0x3CD63C743BF5, 0x3CD73C743BF5, 0x3CD83C743BF5, 0x3CD93C743BF5, + 0x3CDA3C743BF5, 0x3CDB3C743BF5, 0x3CDC3C743BF5, 0x3CDD3C743BF5, 0x3CDE3C743BF5, 0x3CDF3C743BF5, 0x3CE03C743BF5, 0x3CE13C743BF5, 0x3CE23C743BF5, 0x3CE33C743BF5, 0x3CE43C743BF5, 0x3CE53C743BF5, 0x3CE63C743BF5, 0x3CE73C743BF5, 0x3CE83C743BF5, + 0x3CE93C743BF5, 0x3CEA3C743BF5, 0x3CEB3C743BF5, 0x3C753BF5, 0x3CD13C753BF5, 0x3CD23C753BF5, 0x3CD33C753BF5, 0x3CD43C753BF5, 0x3CD53C753BF5, 0x3CD63C753BF5, 0x3CD73C753BF5, 0x3CD83C753BF5, 0x3CD93C753BF5, 0x3CDA3C753BF5, 0x3CDB3C753BF5, + 0x3CDC3C753BF5, 0x3CDD3C753BF5, 0x3CDE3C753BF5, 0x3CDF3C753BF5, 0x3CE03C753BF5, 0x3CE13C753BF5, 0x3CE23C753BF5, 0x3CE33C753BF5, 0x3CE43C753BF5, 0x3CE53C753BF5, 0x3CE63C753BF5, 0x3CE73C753BF5, 0x3CE83C753BF5, 0x3CE93C753BF5, 0x3CEA3C753BF5, + 0x3CEB3C753BF5, 0x3C763BF5, 0x3CD13C763BF5, 0x3CD23C763BF5, 0x3CD33C763BF5, 0x3CD43C763BF5, 0x3CD53C763BF5, 0x3CD63C763BF5, 0x3CD73C763BF5, 0x3CD83C763BF5, 0x3CD93C763BF5, 0x3CDA3C763BF5, 0x3CDB3C763BF5, 0x3CDC3C763BF5, 0x3CDD3C763BF5, + 0x3CDE3C763BF5, 0x3CDF3C763BF5, 0x3CE03C763BF5, 0x3CE13C763BF5, 0x3CE23C763BF5, 0x3CE33C763BF5, 0x3CE43C763BF5, 0x3CE53C763BF5, 0x3CE63C763BF5, 0x3CE73C763BF5, 0x3CE83C763BF5, 0x3CE93C763BF5, 0x3CEA3C763BF5, 0x3CEB3C763BF5, 0x3C773BF5, + 0x3CD13C773BF5, 0x3CD23C773BF5, 0x3CD33C773BF5, 0x3CD43C773BF5, 0x3CD53C773BF5, 0x3CD63C773BF5, 0x3CD73C773BF5, 0x3CD83C773BF5, 0x3CD93C773BF5, 0x3CDA3C773BF5, 0x3CDB3C773BF5, 0x3CDC3C773BF5, 0x3CDD3C773BF5, 0x3CDE3C773BF5, 0x3CDF3C773BF5, + 0x3CE03C773BF5, 0x3CE13C773BF5, 0x3CE23C773BF5, 0x3CE33C773BF5, 0x3CE43C773BF5, 0x3CE53C773BF5, 0x3CE63C773BF5, 0x3CE73C773BF5, 0x3CE83C773BF5, 0x3CE93C773BF5, 0x3CEA3C773BF5, 0x3CEB3C773BF5, 0x3C783BF5, 0x3CD13C783BF5, 0x3CD23C783BF5, + 0x3CD33C783BF5, 0x3CD43C783BF5, 0x3CD53C783BF5, 0x3CD63C783BF5, 0x3CD73C783BF5, 0x3CD83C783BF5, 0x3CD93C783BF5, 0x3CDA3C783BF5, 0x3CDB3C783BF5, 0x3CDC3C783BF5, 0x3CDD3C783BF5, 0x3CDE3C783BF5, 0x3CDF3C783BF5, 0x3CE03C783BF5, 0x3CE13C783BF5, + 0x3CE23C783BF5, 0x3CE33C783BF5, 0x3CE43C783BF5, 0x3CE53C783BF5, 0x3CE63C783BF5, 0x3CE73C783BF5, 0x3CE83C783BF5, 0x3CE93C783BF5, 0x3CEA3C783BF5, 0x3CEB3C783BF5, 0x3C793BF5, 0x3CD13C793BF5, 0x3CD23C793BF5, 0x3CD33C793BF5, 0x3CD43C793BF5, + 0x3CD53C793BF5, 0x3CD63C793BF5, 0x3CD73C793BF5, 0x3CD83C793BF5, 0x3CD93C793BF5, 0x3CDA3C793BF5, 0x3CDB3C793BF5, 0x3CDC3C793BF5, 0x3CDD3C793BF5, 0x3CDE3C793BF5, 0x3CDF3C793BF5, 0x3CE03C793BF5, 0x3CE13C793BF5, 0x3CE23C793BF5, 0x3CE33C793BF5, + 0x3CE43C793BF5, 0x3CE53C793BF5, 0x3CE63C793BF5, 0x3CE73C793BF5, 0x3CE83C793BF5, 0x3CE93C793BF5, 0x3CEA3C793BF5, 0x3CEB3C793BF5, 0x3C7A3BF5, 0x3CD13C7A3BF5, 0x3CD23C7A3BF5, 0x3CD33C7A3BF5, 0x3CD43C7A3BF5, 0x3CD53C7A3BF5, 0x3CD63C7A3BF5, + 0x3CD73C7A3BF5, 0x3CD83C7A3BF5, 0x3CD93C7A3BF5, 0x3CDA3C7A3BF5, 0x3CDB3C7A3BF5, 0x3CDC3C7A3BF5, 0x3CDD3C7A3BF5, 0x3CDE3C7A3BF5, 0x3CDF3C7A3BF5, 0x3CE03C7A3BF5, 0x3CE13C7A3BF5, 0x3CE23C7A3BF5, 0x3CE33C7A3BF5, 0x3CE43C7A3BF5, 0x3CE53C7A3BF5, + 0x3CE63C7A3BF5, 0x3CE73C7A3BF5, 0x3CE83C7A3BF5, 0x3CE93C7A3BF5, 0x3CEA3C7A3BF5, 0x3CEB3C7A3BF5, 0x3C7B3BF5, 0x3CD13C7B3BF5, 0x3CD23C7B3BF5, 0x3CD33C7B3BF5, 0x3CD43C7B3BF5, 0x3CD53C7B3BF5, 0x3CD63C7B3BF5, 0x3CD73C7B3BF5, 0x3CD83C7B3BF5, + 0x3CD93C7B3BF5, 0x3CDA3C7B3BF5, 0x3CDB3C7B3BF5, 0x3CDC3C7B3BF5, 0x3CDD3C7B3BF5, 0x3CDE3C7B3BF5, 0x3CDF3C7B3BF5, 0x3CE03C7B3BF5, 0x3CE13C7B3BF5, 0x3CE23C7B3BF5, 0x3CE33C7B3BF5, 0x3CE43C7B3BF5, 0x3CE53C7B3BF5, 0x3CE63C7B3BF5, 0x3CE73C7B3BF5, + 0x3CE83C7B3BF5, 0x3CE93C7B3BF5, 0x3CEA3C7B3BF5, 0x3CEB3C7B3BF5, 0x3C7C3BF5, 0x3CD13C7C3BF5, 0x3CD23C7C3BF5, 0x3CD33C7C3BF5, 0x3CD43C7C3BF5, 0x3CD53C7C3BF5, 0x3CD63C7C3BF5, 0x3CD73C7C3BF5, 0x3CD83C7C3BF5, 0x3CD93C7C3BF5, 0x3CDA3C7C3BF5, + 0x3CDB3C7C3BF5, 0x3CDC3C7C3BF5, 0x3CDD3C7C3BF5, 0x3CDE3C7C3BF5, 0x3CDF3C7C3BF5, 0x3CE03C7C3BF5, 0x3CE13C7C3BF5, 0x3CE23C7C3BF5, 0x3CE33C7C3BF5, 0x3CE43C7C3BF5, 0x3CE53C7C3BF5, 0x3CE63C7C3BF5, 0x3CE73C7C3BF5, 0x3CE83C7C3BF5, 0x3CE93C7C3BF5, + 0x3CEA3C7C3BF5, 0x3CEB3C7C3BF5, 0x3C7D3BF5, 0x3CD13C7D3BF5, 0x3CD23C7D3BF5, 0x3CD33C7D3BF5, 0x3CD43C7D3BF5, 0x3CD53C7D3BF5, 0x3CD63C7D3BF5, 0x3CD73C7D3BF5, 0x3CD83C7D3BF5, 0x3CD93C7D3BF5, 0x3CDA3C7D3BF5, 0x3CDB3C7D3BF5, 0x3CDC3C7D3BF5, + 0x3CDD3C7D3BF5, 0x3CDE3C7D3BF5, 0x3CDF3C7D3BF5, 0x3CE03C7D3BF5, 0x3CE13C7D3BF5, 0x3CE23C7D3BF5, 0x3CE33C7D3BF5, 0x3CE43C7D3BF5, 0x3CE53C7D3BF5, 0x3CE63C7D3BF5, 0x3CE73C7D3BF5, 0x3CE83C7D3BF5, 0x3CE93C7D3BF5, 0x3CEA3C7D3BF5, 0x3CEB3C7D3BF5, + 0x3C7E3BF5, 0x3CD13C7E3BF5, 0x3CD23C7E3BF5, 0x3CD33C7E3BF5, 0x3CD43C7E3BF5, 0x3CD53C7E3BF5, 0x3CD63C7E3BF5, 0x3CD73C7E3BF5, 0x3CD83C7E3BF5, 0x3CD93C7E3BF5, 0x3CDA3C7E3BF5, 0x3CDB3C7E3BF5, 0x3CDC3C7E3BF5, 0x3CDD3C7E3BF5, 0x3CDE3C7E3BF5, + 0x3CDF3C7E3BF5, 0x3CE03C7E3BF5, 0x3CE13C7E3BF5, 0x3CE23C7E3BF5, 0x3CE33C7E3BF5, 0x3CE43C7E3BF5, 0x3CE53C7E3BF5, 0x3CE63C7E3BF5, 0x3CE73C7E3BF5, 0x3CE83C7E3BF5, 0x3CE93C7E3BF5, 0x3CEA3C7E3BF5, 0x3CEB3C7E3BF5, 0x3C7F3BF5, 0x3CD13C7F3BF5, + 0x3CD23C7F3BF5, 0x3CD33C7F3BF5, 0x3CD43C7F3BF5, 0x3CD53C7F3BF5, 0x3CD63C7F3BF5, 0x3CD73C7F3BF5, 0x3CD83C7F3BF5, 0x3CD93C7F3BF5, 0x3CDA3C7F3BF5, 0x3CDB3C7F3BF5, 0x3CDC3C7F3BF5, 0x3CDD3C7F3BF5, 0x3CDE3C7F3BF5, 0x3CDF3C7F3BF5, 0x3CE03C7F3BF5, + 0x3CE13C7F3BF5, 0x3CE23C7F3BF5, 0x3CE33C7F3BF5, 0x3CE43C7F3BF5, 0x3CE53C7F3BF5, 0x3CE63C7F3BF5, 0x3CE73C7F3BF5, 0x3CE83C7F3BF5, 0x3CE93C7F3BF5, 0x3CEA3C7F3BF5, 0x3CEB3C7F3BF5, 0x3C803BF5, 0x3CD13C803BF5, 0x3CD23C803BF5, 0x3CD33C803BF5, + 0x3CD43C803BF5, 0x3CD53C803BF5, 0x3CD63C803BF5, 0x3CD73C803BF5, 0x3CD83C803BF5, 0x3CD93C803BF5, 0x3CDA3C803BF5, 0x3CDB3C803BF5, 0x3CDC3C803BF5, 0x3CDD3C803BF5, 0x3CDE3C803BF5, 0x3CDF3C803BF5, 0x3CE03C803BF5, 0x3CE13C803BF5, 0x3CE23C803BF5, + 0x3CE33C803BF5, 0x3CE43C803BF5, 0x3CE53C803BF5, 0x3CE63C803BF5, 0x3CE73C803BF5, 0x3CE83C803BF5, 0x3CE93C803BF5, 0x3CEA3C803BF5, 0x3CEB3C803BF5, 0x3C813BF5, 0x3CD13C813BF5, 0x3CD23C813BF5, 0x3CD33C813BF5, 0x3CD43C813BF5, 0x3CD53C813BF5, + 0x3CD63C813BF5, 0x3CD73C813BF5, 0x3CD83C813BF5, 0x3CD93C813BF5, 0x3CDA3C813BF5, 0x3CDB3C813BF5, 0x3CDC3C813BF5, 0x3CDD3C813BF5, 0x3CDE3C813BF5, 0x3CDF3C813BF5, 0x3CE03C813BF5, 0x3CE13C813BF5, 0x3CE23C813BF5, 0x3CE33C813BF5, 0x3CE43C813BF5, + 0x3CE53C813BF5, 0x3CE63C813BF5, 0x3CE73C813BF5, 0x3CE83C813BF5, 0x3CE93C813BF5, 0x3CEA3C813BF5, 0x3CEB3C813BF5, 0x3C823BF5, 0x3CD13C823BF5, 0x3CD23C823BF5, 0x3CD33C823BF5, 0x3CD43C823BF5, 0x3CD53C823BF5, 0x3CD63C823BF5, 0x3CD73C823BF5, + 0x3CD83C823BF5, 0x3CD93C823BF5, 0x3CDA3C823BF5, 0x3CDB3C823BF5, 0x3CDC3C823BF5, 0x3CDD3C823BF5, 0x3CDE3C823BF5, 0x3CDF3C823BF5, 0x3CE03C823BF5, 0x3CE13C823BF5, 0x3CE23C823BF5, 0x3CE33C823BF5, 0x3CE43C823BF5, 0x3CE53C823BF5, 0x3CE63C823BF5, + 0x3CE73C823BF5, 0x3CE83C823BF5, 0x3CE93C823BF5, 0x3CEA3C823BF5, 0x3CEB3C823BF5, 0x3C833BF5, 0x3CD13C833BF5, 0x3CD23C833BF5, 0x3CD33C833BF5, 0x3CD43C833BF5, 0x3CD53C833BF5, 0x3CD63C833BF5, 0x3CD73C833BF5, 0x3CD83C833BF5, 0x3CD93C833BF5, + 0x3CDA3C833BF5, 0x3CDB3C833BF5, 0x3CDC3C833BF5, 0x3CDD3C833BF5, 0x3CDE3C833BF5, 0x3CDF3C833BF5, 0x3CE03C833BF5, 0x3CE13C833BF5, 0x3CE23C833BF5, 0x3CE33C833BF5, 0x3CE43C833BF5, 0x3CE53C833BF5, 0x3CE63C833BF5, 0x3CE73C833BF5, 0x3CE83C833BF5, + 0x3CE93C833BF5, 0x3CEA3C833BF5, 0x3CEB3C833BF5, 0x3C843BF5, 0x3CD13C843BF5, 0x3CD23C843BF5, 0x3CD33C843BF5, 0x3CD43C843BF5, 0x3CD53C843BF5, 0x3CD63C843BF5, 0x3CD73C843BF5, 0x3CD83C843BF5, 0x3CD93C843BF5, 0x3CDA3C843BF5, 0x3CDB3C843BF5, + 0x3CDC3C843BF5, 0x3CDD3C843BF5, 0x3CDE3C843BF5, 0x3CDF3C843BF5, 0x3CE03C843BF5, 0x3CE13C843BF5, 0x3CE23C843BF5, 0x3CE33C843BF5, 0x3CE43C843BF5, 0x3CE53C843BF5, 0x3CE63C843BF5, 0x3CE73C843BF5, 0x3CE83C843BF5, 0x3CE93C843BF5, 0x3CEA3C843BF5, + 0x3CEB3C843BF5, 0x3C853BF5, 0x3CD13C853BF5, 0x3CD23C853BF5, 0x3CD33C853BF5, 0x3CD43C853BF5, 0x3CD53C853BF5, 0x3CD63C853BF5, 0x3CD73C853BF5, 0x3CD83C853BF5, 0x3CD93C853BF5, 0x3CDA3C853BF5, 0x3CDB3C853BF5, 0x3CDC3C853BF5, 0x3CDD3C853BF5, + 0x3CDE3C853BF5, 0x3CDF3C853BF5, 0x3CE03C853BF5, 0x3CE13C853BF5, 0x3CE23C853BF5, 0x3CE33C853BF5, 0x3CE43C853BF5, 0x3CE53C853BF5, 0x3CE63C853BF5, 0x3CE73C853BF5, 0x3CE83C853BF5, 0x3CE93C853BF5, 0x3CEA3C853BF5, 0x3CEB3C853BF5, 0x3C863BF5, + 0x3CD13C863BF5, 0x3CD23C863BF5, 0x3CD33C863BF5, 0x3CD43C863BF5, 0x3CD53C863BF5, 0x3CD63C863BF5, 0x3CD73C863BF5, 0x3CD83C863BF5, 0x3CD93C863BF5, 0x3CDA3C863BF5, 0x3CDB3C863BF5, 0x3CDC3C863BF5, 0x3CDD3C863BF5, 0x3CDE3C863BF5, 0x3CDF3C863BF5, + 0x3CE03C863BF5, 0x3CE13C863BF5, 0x3CE23C863BF5, 0x3CE33C863BF5, 0x3CE43C863BF5, 0x3CE53C863BF5, 0x3CE63C863BF5, 0x3CE73C863BF5, 0x3CE83C863BF5, 0x3CE93C863BF5, 0x3CEA3C863BF5, 0x3CEB3C863BF5, 0x3C873BF5, 0x3CD13C873BF5, 0x3CD23C873BF5, + 0x3CD33C873BF5, 0x3CD43C873BF5, 0x3CD53C873BF5, 0x3CD63C873BF5, 0x3CD73C873BF5, 0x3CD83C873BF5, 0x3CD93C873BF5, 0x3CDA3C873BF5, 0x3CDB3C873BF5, 0x3CDC3C873BF5, 0x3CDD3C873BF5, 0x3CDE3C873BF5, 0x3CDF3C873BF5, 0x3CE03C873BF5, 0x3CE13C873BF5, + 0x3CE23C873BF5, 0x3CE33C873BF5, 0x3CE43C873BF5, 0x3CE53C873BF5, 0x3CE63C873BF5, 0x3CE73C873BF5, 0x3CE83C873BF5, 0x3CE93C873BF5, 0x3CEA3C873BF5, 0x3CEB3C873BF5, 0x3C733BF6, 0x3CD13C733BF6, 0x3CD23C733BF6, 0x3CD33C733BF6, 0x3CD43C733BF6, + 0x3CD53C733BF6, 0x3CD63C733BF6, 0x3CD73C733BF6, 0x3CD83C733BF6, 0x3CD93C733BF6, 0x3CDA3C733BF6, 0x3CDB3C733BF6, 0x3CDC3C733BF6, 0x3CDD3C733BF6, 0x3CDE3C733BF6, 0x3CDF3C733BF6, 0x3CE03C733BF6, 0x3CE13C733BF6, 0x3CE23C733BF6, 0x3CE33C733BF6, + 0x3CE43C733BF6, 0x3CE53C733BF6, 0x3CE63C733BF6, 0x3CE73C733BF6, 0x3CE83C733BF6, 0x3CE93C733BF6, 0x3CEA3C733BF6, 0x3CEB3C733BF6, 0x3C743BF6, 0x3CD13C743BF6, 0x3CD23C743BF6, 0x3CD33C743BF6, 0x3CD43C743BF6, 0x3CD53C743BF6, 0x3CD63C743BF6, + 0x3CD73C743BF6, 0x3CD83C743BF6, 0x3CD93C743BF6, 0x3CDA3C743BF6, 0x3CDB3C743BF6, 0x3CDC3C743BF6, 0x3CDD3C743BF6, 0x3CDE3C743BF6, 0x3CDF3C743BF6, 0x3CE03C743BF6, 0x3CE13C743BF6, 0x3CE23C743BF6, 0x3CE33C743BF6, 0x3CE43C743BF6, 0x3CE53C743BF6, + 0x3CE63C743BF6, 0x3CE73C743BF6, 0x3CE83C743BF6, 0x3CE93C743BF6, 0x3CEA3C743BF6, 0x3CEB3C743BF6, 0x3C753BF6, 0x3CD13C753BF6, 0x3CD23C753BF6, 0x3CD33C753BF6, 0x3CD43C753BF6, 0x3CD53C753BF6, 0x3CD63C753BF6, 0x3CD73C753BF6, 0x3CD83C753BF6, + 0x3CD93C753BF6, 0x3CDA3C753BF6, 0x3CDB3C753BF6, 0x3CDC3C753BF6, 0x3CDD3C753BF6, 0x3CDE3C753BF6, 0x3CDF3C753BF6, 0x3CE03C753BF6, 0x3CE13C753BF6, 0x3CE23C753BF6, 0x3CE33C753BF6, 0x3CE43C753BF6, 0x3CE53C753BF6, 0x3CE63C753BF6, 0x3CE73C753BF6, + 0x3CE83C753BF6, 0x3CE93C753BF6, 0x3CEA3C753BF6, 0x3CEB3C753BF6, 0x3C763BF6, 0x3CD13C763BF6, 0x3CD23C763BF6, 0x3CD33C763BF6, 0x3CD43C763BF6, 0x3CD53C763BF6, 0x3CD63C763BF6, 0x3CD73C763BF6, 0x3CD83C763BF6, 0x3CD93C763BF6, 0x3CDA3C763BF6, + 0x3CDB3C763BF6, 0x3CDC3C763BF6, 0x3CDD3C763BF6, 0x3CDE3C763BF6, 0x3CDF3C763BF6, 0x3CE03C763BF6, 0x3CE13C763BF6, 0x3CE23C763BF6, 0x3CE33C763BF6, 0x3CE43C763BF6, 0x3CE53C763BF6, 0x3CE63C763BF6, 0x3CE73C763BF6, 0x3CE83C763BF6, 0x3CE93C763BF6, + 0x3CEA3C763BF6, 0x3CEB3C763BF6, 0x3C773BF6, 0x3CD13C773BF6, 0x3CD23C773BF6, 0x3CD33C773BF6, 0x3CD43C773BF6, 0x3CD53C773BF6, 0x3CD63C773BF6, 0x3CD73C773BF6, 0x3CD83C773BF6, 0x3CD93C773BF6, 0x3CDA3C773BF6, 0x3CDB3C773BF6, 0x3CDC3C773BF6, + 0x3CDD3C773BF6, 0x3CDE3C773BF6, 0x3CDF3C773BF6, 0x3CE03C773BF6, 0x3CE13C773BF6, 0x3CE23C773BF6, 0x3CE33C773BF6, 0x3CE43C773BF6, 0x3CE53C773BF6, 0x3CE63C773BF6, 0x3CE73C773BF6, 0x3CE83C773BF6, 0x3CE93C773BF6, 0x3CEA3C773BF6, 0x3CEB3C773BF6, + 0x3C783BF6, 0x3CD13C783BF6, 0x3CD23C783BF6, 0x3CD33C783BF6, 0x3CD43C783BF6, 0x3CD53C783BF6, 0x3CD63C783BF6, 0x3CD73C783BF6, 0x3CD83C783BF6, 0x3CD93C783BF6, 0x3CDA3C783BF6, 0x3CDB3C783BF6, 0x3CDC3C783BF6, 0x3CDD3C783BF6, 0x3CDE3C783BF6, + 0x3CDF3C783BF6, 0x3CE03C783BF6, 0x3CE13C783BF6, 0x3CE23C783BF6, 0x3CE33C783BF6, 0x3CE43C783BF6, 0x3CE53C783BF6, 0x3CE63C783BF6, 0x3CE73C783BF6, 0x3CE83C783BF6, 0x3CE93C783BF6, 0x3CEA3C783BF6, 0x3CEB3C783BF6, 0x3C793BF6, 0x3CD13C793BF6, + 0x3CD23C793BF6, 0x3CD33C793BF6, 0x3CD43C793BF6, 0x3CD53C793BF6, 0x3CD63C793BF6, 0x3CD73C793BF6, 0x3CD83C793BF6, 0x3CD93C793BF6, 0x3CDA3C793BF6, 0x3CDB3C793BF6, 0x3CDC3C793BF6, 0x3CDD3C793BF6, 0x3CDE3C793BF6, 0x3CDF3C793BF6, 0x3CE03C793BF6, + 0x3CE13C793BF6, 0x3CE23C793BF6, 0x3CE33C793BF6, 0x3CE43C793BF6, 0x3CE53C793BF6, 0x3CE63C793BF6, 0x3CE73C793BF6, 0x3CE83C793BF6, 0x3CE93C793BF6, 0x3CEA3C793BF6, 0x3CEB3C793BF6, 0x3C7A3BF6, 0x3CD13C7A3BF6, 0x3CD23C7A3BF6, 0x3CD33C7A3BF6, + 0x3CD43C7A3BF6, 0x3CD53C7A3BF6, 0x3CD63C7A3BF6, 0x3CD73C7A3BF6, 0x3CD83C7A3BF6, 0x3CD93C7A3BF6, 0x3CDA3C7A3BF6, 0x3CDB3C7A3BF6, 0x3CDC3C7A3BF6, 0x3CDD3C7A3BF6, 0x3CDE3C7A3BF6, 0x3CDF3C7A3BF6, 0x3CE03C7A3BF6, 0x3CE13C7A3BF6, 0x3CE23C7A3BF6, + 0x3CE33C7A3BF6, 0x3CE43C7A3BF6, 0x3CE53C7A3BF6, 0x3CE63C7A3BF6, 0x3CE73C7A3BF6, 0x3CE83C7A3BF6, 0x3CE93C7A3BF6, 0x3CEA3C7A3BF6, 0x3CEB3C7A3BF6, 0x3C7B3BF6, 0x3CD13C7B3BF6, 0x3CD23C7B3BF6, 0x3CD33C7B3BF6, 0x3CD43C7B3BF6, 0x3CD53C7B3BF6, + 0x3CD63C7B3BF6, 0x3CD73C7B3BF6, 0x3CD83C7B3BF6, 0x3CD93C7B3BF6, 0x3CDA3C7B3BF6, 0x3CDB3C7B3BF6, 0x3CDC3C7B3BF6, 0x3CDD3C7B3BF6, 0x3CDE3C7B3BF6, 0x3CDF3C7B3BF6, 0x3CE03C7B3BF6, 0x3CE13C7B3BF6, 0x3CE23C7B3BF6, 0x3CE33C7B3BF6, 0x3CE43C7B3BF6, + 0x3CE53C7B3BF6, 0x3CE63C7B3BF6, 0x3CE73C7B3BF6, 0x3CE83C7B3BF6, 0x3CE93C7B3BF6, 0x3CEA3C7B3BF6, 0x3CEB3C7B3BF6, 0x3C7C3BF6, 0x3CD13C7C3BF6, 0x3CD23C7C3BF6, 0x3CD33C7C3BF6, 0x3CD43C7C3BF6, 0x3CD53C7C3BF6, 0x3CD63C7C3BF6, 0x3CD73C7C3BF6, + 0x3CD83C7C3BF6, 0x3CD93C7C3BF6, 0x3CDA3C7C3BF6, 0x3CDB3C7C3BF6, 0x3CDC3C7C3BF6, 0x3CDD3C7C3BF6, 0x3CDE3C7C3BF6, 0x3CDF3C7C3BF6, 0x3CE03C7C3BF6, 0x3CE13C7C3BF6, 0x3CE23C7C3BF6, 0x3CE33C7C3BF6, 0x3CE43C7C3BF6, 0x3CE53C7C3BF6, 0x3CE63C7C3BF6, + 0x3CE73C7C3BF6, 0x3CE83C7C3BF6, 0x3CE93C7C3BF6, 0x3CEA3C7C3BF6, 0x3CEB3C7C3BF6, 0x3C7D3BF6, 0x3CD13C7D3BF6, 0x3CD23C7D3BF6, 0x3CD33C7D3BF6, 0x3CD43C7D3BF6, 0x3CD53C7D3BF6, 0x3CD63C7D3BF6, 0x3CD73C7D3BF6, 0x3CD83C7D3BF6, 0x3CD93C7D3BF6, + 0x3CDA3C7D3BF6, 0x3CDB3C7D3BF6, 0x3CDC3C7D3BF6, 0x3CDD3C7D3BF6, 0x3CDE3C7D3BF6, 0x3CDF3C7D3BF6, 0x3CE03C7D3BF6, 0x3CE13C7D3BF6, 0x3CE23C7D3BF6, 0x3CE33C7D3BF6, 0x3CE43C7D3BF6, 0x3CE53C7D3BF6, 0x3CE63C7D3BF6, 0x3CE73C7D3BF6, 0x3CE83C7D3BF6, + 0x3CE93C7D3BF6, 0x3CEA3C7D3BF6, 0x3CEB3C7D3BF6, 0x3C7E3BF6, 0x3CD13C7E3BF6, 0x3CD23C7E3BF6, 0x3CD33C7E3BF6, 0x3CD43C7E3BF6, 0x3CD53C7E3BF6, 0x3CD63C7E3BF6, 0x3CD73C7E3BF6, 0x3CD83C7E3BF6, 0x3CD93C7E3BF6, 0x3CDA3C7E3BF6, 0x3CDB3C7E3BF6, + 0x3CDC3C7E3BF6, 0x3CDD3C7E3BF6, 0x3CDE3C7E3BF6, 0x3CDF3C7E3BF6, 0x3CE03C7E3BF6, 0x3CE13C7E3BF6, 0x3CE23C7E3BF6, 0x3CE33C7E3BF6, 0x3CE43C7E3BF6, 0x3CE53C7E3BF6, 0x3CE63C7E3BF6, 0x3CE73C7E3BF6, 0x3CE83C7E3BF6, 0x3CE93C7E3BF6, 0x3CEA3C7E3BF6, + 0x3CEB3C7E3BF6, 0x3C7F3BF6, 0x3CD13C7F3BF6, 0x3CD23C7F3BF6, 0x3CD33C7F3BF6, 0x3CD43C7F3BF6, 0x3CD53C7F3BF6, 0x3CD63C7F3BF6, 0x3CD73C7F3BF6, 0x3CD83C7F3BF6, 0x3CD93C7F3BF6, 0x3CDA3C7F3BF6, 0x3CDB3C7F3BF6, 0x3CDC3C7F3BF6, 0x3CDD3C7F3BF6, + 0x3CDE3C7F3BF6, 0x3CDF3C7F3BF6, 0x3CE03C7F3BF6, 0x3CE13C7F3BF6, 0x3CE23C7F3BF6, 0x3CE33C7F3BF6, 0x3CE43C7F3BF6, 0x3CE53C7F3BF6, 0x3CE63C7F3BF6, 0x3CE73C7F3BF6, 0x3CE83C7F3BF6, 0x3CE93C7F3BF6, 0x3CEA3C7F3BF6, 0x3CEB3C7F3BF6, 0x3C803BF6, + 0x3CD13C803BF6, 0x3CD23C803BF6, 0x3CD33C803BF6, 0x3CD43C803BF6, 0x3CD53C803BF6, 0x3CD63C803BF6, 0x3CD73C803BF6, 0x3CD83C803BF6, 0x3CD93C803BF6, 0x3CDA3C803BF6, 0x3CDB3C803BF6, 0x3CDC3C803BF6, 0x3CDD3C803BF6, 0x3CDE3C803BF6, 0x3CDF3C803BF6, + 0x3CE03C803BF6, 0x3CE13C803BF6, 0x3CE23C803BF6, 0x3CE33C803BF6, 0x3CE43C803BF6, 0x3CE53C803BF6, 0x3CE63C803BF6, 0x3CE73C803BF6, 0x3CE83C803BF6, 0x3CE93C803BF6, 0x3CEA3C803BF6, 0x3CEB3C803BF6, 0x3C813BF6, 0x3CD13C813BF6, 0x3CD23C813BF6, + 0x3CD33C813BF6, 0x3CD43C813BF6, 0x3CD53C813BF6, 0x3CD63C813BF6, 0x3CD73C813BF6, 0x3CD83C813BF6, 0x3CD93C813BF6, 0x3CDA3C813BF6, 0x3CDB3C813BF6, 0x3CDC3C813BF6, 0x3CDD3C813BF6, 0x3CDE3C813BF6, 0x3CDF3C813BF6, 0x3CE03C813BF6, 0x3CE13C813BF6, + 0x3CE23C813BF6, 0x3CE33C813BF6, 0x3CE43C813BF6, 0x3CE53C813BF6, 0x3CE63C813BF6, 0x3CE73C813BF6, 0x3CE83C813BF6, 0x3CE93C813BF6, 0x3CEA3C813BF6, 0x3CEB3C813BF6, 0x3C823BF6, 0x3CD13C823BF6, 0x3CD23C823BF6, 0x3CD33C823BF6, 0x3CD43C823BF6, + 0x3CD53C823BF6, 0x3CD63C823BF6, 0x3CD73C823BF6, 0x3CD83C823BF6, 0x3CD93C823BF6, 0x3CDA3C823BF6, 0x3CDB3C823BF6, 0x3CDC3C823BF6, 0x3CDD3C823BF6, 0x3CDE3C823BF6, 0x3CDF3C823BF6, 0x3CE03C823BF6, 0x3CE13C823BF6, 0x3CE23C823BF6, 0x3CE33C823BF6, + 0x3CE43C823BF6, 0x3CE53C823BF6, 0x3CE63C823BF6, 0x3CE73C823BF6, 0x3CE83C823BF6, 0x3CE93C823BF6, 0x3CEA3C823BF6, 0x3CEB3C823BF6, 0x3C833BF6, 0x3CD13C833BF6, 0x3CD23C833BF6, 0x3CD33C833BF6, 0x3CD43C833BF6, 0x3CD53C833BF6, 0x3CD63C833BF6, + 0x3CD73C833BF6, 0x3CD83C833BF6, 0x3CD93C833BF6, 0x3CDA3C833BF6, 0x3CDB3C833BF6, 0x3CDC3C833BF6, 0x3CDD3C833BF6, 0x3CDE3C833BF6, 0x3CDF3C833BF6, 0x3CE03C833BF6, 0x3CE13C833BF6, 0x3CE23C833BF6, 0x3CE33C833BF6, 0x3CE43C833BF6, 0x3CE53C833BF6, + 0x3CE63C833BF6, 0x3CE73C833BF6, 0x3CE83C833BF6, 0x3CE93C833BF6, 0x3CEA3C833BF6, 0x3CEB3C833BF6, 0x3C843BF6, 0x3CD13C843BF6, 0x3CD23C843BF6, 0x3CD33C843BF6, 0x3CD43C843BF6, 0x3CD53C843BF6, 0x3CD63C843BF6, 0x3CD73C843BF6, 0x3CD83C843BF6, + 0x3CD93C843BF6, 0x3CDA3C843BF6, 0x3CDB3C843BF6, 0x3CDC3C843BF6, 0x3CDD3C843BF6, 0x3CDE3C843BF6, 0x3CDF3C843BF6, 0x3CE03C843BF6, 0x3CE13C843BF6, 0x3CE23C843BF6, 0x3CE33C843BF6, 0x3CE43C843BF6, 0x3CE53C843BF6, 0x3CE63C843BF6, 0x3CE73C843BF6, + 0x3CE83C843BF6, 0x3CE93C843BF6, 0x3CEA3C843BF6, 0x3CEB3C843BF6, 0x3C853BF6, 0x3CD13C853BF6, 0x3CD23C853BF6, 0x3CD33C853BF6, 0x3CD43C853BF6, 0x3CD53C853BF6, 0x3CD63C853BF6, 0x3CD73C853BF6, 0x3CD83C853BF6, 0x3CD93C853BF6, 0x3CDA3C853BF6, + 0x3CDB3C853BF6, 0x3CDC3C853BF6, 0x3CDD3C853BF6, 0x3CDE3C853BF6, 0x3CDF3C853BF6, 0x3CE03C853BF6, 0x3CE13C853BF6, 0x3CE23C853BF6, 0x3CE33C853BF6, 0x3CE43C853BF6, 0x3CE53C853BF6, 0x3CE63C853BF6, 0x3CE73C853BF6, 0x3CE83C853BF6, 0x3CE93C853BF6, + 0x3CEA3C853BF6, 0x3CEB3C853BF6, 0x3C863BF6, 0x3CD13C863BF6, 0x3CD23C863BF6, 0x3CD33C863BF6, 0x3CD43C863BF6, 0x3CD53C863BF6, 0x3CD63C863BF6, 0x3CD73C863BF6, 0x3CD83C863BF6, 0x3CD93C863BF6, 0x3CDA3C863BF6, 0x3CDB3C863BF6, 0x3CDC3C863BF6, + 0x3CDD3C863BF6, 0x3CDE3C863BF6, 0x3CDF3C863BF6, 0x3CE03C863BF6, 0x3CE13C863BF6, 0x3CE23C863BF6, 0x3CE33C863BF6, 0x3CE43C863BF6, 0x3CE53C863BF6, 0x3CE63C863BF6, 0x3CE73C863BF6, 0x3CE83C863BF6, 0x3CE93C863BF6, 0x3CEA3C863BF6, 0x3CEB3C863BF6, + 0x3C873BF6, 0x3CD13C873BF6, 0x3CD23C873BF6, 0x3CD33C873BF6, 0x3CD43C873BF6, 0x3CD53C873BF6, 0x3CD63C873BF6, 0x3CD73C873BF6, 0x3CD83C873BF6, 0x3CD93C873BF6, 0x3CDA3C873BF6, 0x3CDB3C873BF6, 0x3CDC3C873BF6, 0x3CDD3C873BF6, 0x3CDE3C873BF6, + 0x3CDF3C873BF6, 0x3CE03C873BF6, 0x3CE13C873BF6, 0x3CE23C873BF6, 0x3CE33C873BF6, 0x3CE43C873BF6, 0x3CE53C873BF6, 0x3CE63C873BF6, 0x3CE73C873BF6, 0x3CE83C873BF6, 0x3CE93C873BF6, 0x3CEA3C873BF6, 0x3CEB3C873BF6, 0x3C733BF7, 0x3CD13C733BF7, + 0x3CD23C733BF7, 0x3CD33C733BF7, 0x3CD43C733BF7, 0x3CD53C733BF7, 0x3CD63C733BF7, 0x3CD73C733BF7, 0x3CD83C733BF7, 0x3CD93C733BF7, 0x3CDA3C733BF7, 0x3CDB3C733BF7, 0x3CDC3C733BF7, 0x3CDD3C733BF7, 0x3CDE3C733BF7, 0x3CDF3C733BF7, 0x3CE03C733BF7, + 0x3CE13C733BF7, 0x3CE23C733BF7, 0x3CE33C733BF7, 0x3CE43C733BF7, 0x3CE53C733BF7, 0x3CE63C733BF7, 0x3CE73C733BF7, 0x3CE83C733BF7, 0x3CE93C733BF7, 0x3CEA3C733BF7, 0x3CEB3C733BF7, 0x3C743BF7, 0x3CD13C743BF7, 0x3CD23C743BF7, 0x3CD33C743BF7, + 0x3CD43C743BF7, 0x3CD53C743BF7, 0x3CD63C743BF7, 0x3CD73C743BF7, 0x3CD83C743BF7, 0x3CD93C743BF7, 0x3CDA3C743BF7, 0x3CDB3C743BF7, 0x3CDC3C743BF7, 0x3CDD3C743BF7, 0x3CDE3C743BF7, 0x3CDF3C743BF7, 0x3CE03C743BF7, 0x3CE13C743BF7, 0x3CE23C743BF7, + 0x3CE33C743BF7, 0x3CE43C743BF7, 0x3CE53C743BF7, 0x3CE63C743BF7, 0x3CE73C743BF7, 0x3CE83C743BF7, 0x3CE93C743BF7, 0x3CEA3C743BF7, 0x3CEB3C743BF7, 0x3C753BF7, 0x3CD13C753BF7, 0x3CD23C753BF7, 0x3CD33C753BF7, 0x3CD43C753BF7, 0x3CD53C753BF7, + 0x3CD63C753BF7, 0x3CD73C753BF7, 0x3CD83C753BF7, 0x3CD93C753BF7, 0x3CDA3C753BF7, 0x3CDB3C753BF7, 0x3CDC3C753BF7, 0x3CDD3C753BF7, 0x3CDE3C753BF7, 0x3CDF3C753BF7, 0x3CE03C753BF7, 0x3CE13C753BF7, 0x3CE23C753BF7, 0x3CE33C753BF7, 0x3CE43C753BF7, + 0x3CE53C753BF7, 0x3CE63C753BF7, 0x3CE73C753BF7, 0x3CE83C753BF7, 0x3CE93C753BF7, 0x3CEA3C753BF7, 0x3CEB3C753BF7, 0x3C763BF7, 0x3CD13C763BF7, 0x3CD23C763BF7, 0x3CD33C763BF7, 0x3CD43C763BF7, 0x3CD53C763BF7, 0x3CD63C763BF7, 0x3CD73C763BF7, + 0x3CD83C763BF7, 0x3CD93C763BF7, 0x3CDA3C763BF7, 0x3CDB3C763BF7, 0x3CDC3C763BF7, 0x3CDD3C763BF7, 0x3CDE3C763BF7, 0x3CDF3C763BF7, 0x3CE03C763BF7, 0x3CE13C763BF7, 0x3CE23C763BF7, 0x3CE33C763BF7, 0x3CE43C763BF7, 0x3CE53C763BF7, 0x3CE63C763BF7, + 0x3CE73C763BF7, 0x3CE83C763BF7, 0x3CE93C763BF7, 0x3CEA3C763BF7, 0x3CEB3C763BF7, 0x3C773BF7, 0x3CD13C773BF7, 0x3CD23C773BF7, 0x3CD33C773BF7, 0x3CD43C773BF7, 0x3CD53C773BF7, 0x3CD63C773BF7, 0x3CD73C773BF7, 0x3CD83C773BF7, 0x3CD93C773BF7, + 0x3CDA3C773BF7, 0x3CDB3C773BF7, 0x3CDC3C773BF7, 0x3CDD3C773BF7, 0x3CDE3C773BF7, 0x3CDF3C773BF7, 0x3CE03C773BF7, 0x3CE13C773BF7, 0x3CE23C773BF7, 0x3CE33C773BF7, 0x3CE43C773BF7, 0x3CE53C773BF7, 0x3CE63C773BF7, 0x3CE73C773BF7, 0x3CE83C773BF7, + 0x3CE93C773BF7, 0x3CEA3C773BF7, 0x3CEB3C773BF7, 0x3C783BF7, 0x3CD13C783BF7, 0x3CD23C783BF7, 0x3CD33C783BF7, 0x3CD43C783BF7, 0x3CD53C783BF7, 0x3CD63C783BF7, 0x3CD73C783BF7, 0x3CD83C783BF7, 0x3CD93C783BF7, 0x3CDA3C783BF7, 0x3CDB3C783BF7, + 0x3CDC3C783BF7, 0x3CDD3C783BF7, 0x3CDE3C783BF7, 0x3CDF3C783BF7, 0x3CE03C783BF7, 0x3CE13C783BF7, 0x3CE23C783BF7, 0x3CE33C783BF7, 0x3CE43C783BF7, 0x3CE53C783BF7, 0x3CE63C783BF7, 0x3CE73C783BF7, 0x3CE83C783BF7, 0x3CE93C783BF7, 0x3CEA3C783BF7, + 0x3CEB3C783BF7, 0x3C793BF7, 0x3CD13C793BF7, 0x3CD23C793BF7, 0x3CD33C793BF7, 0x3CD43C793BF7, 0x3CD53C793BF7, 0x3CD63C793BF7, 0x3CD73C793BF7, 0x3CD83C793BF7, 0x3CD93C793BF7, 0x3CDA3C793BF7, 0x3CDB3C793BF7, 0x3CDC3C793BF7, 0x3CDD3C793BF7, + 0x3CDE3C793BF7, 0x3CDF3C793BF7, 0x3CE03C793BF7, 0x3CE13C793BF7, 0x3CE23C793BF7, 0x3CE33C793BF7, 0x3CE43C793BF7, 0x3CE53C793BF7, 0x3CE63C793BF7, 0x3CE73C793BF7, 0x3CE83C793BF7, 0x3CE93C793BF7, 0x3CEA3C793BF7, 0x3CEB3C793BF7, 0x3C7A3BF7, + 0x3CD13C7A3BF7, 0x3CD23C7A3BF7, 0x3CD33C7A3BF7, 0x3CD43C7A3BF7, 0x3CD53C7A3BF7, 0x3CD63C7A3BF7, 0x3CD73C7A3BF7, 0x3CD83C7A3BF7, 0x3CD93C7A3BF7, 0x3CDA3C7A3BF7, 0x3CDB3C7A3BF7, 0x3CDC3C7A3BF7, 0x3CDD3C7A3BF7, 0x3CDE3C7A3BF7, 0x3CDF3C7A3BF7, + 0x3CE03C7A3BF7, 0x3CE13C7A3BF7, 0x3CE23C7A3BF7, 0x3CE33C7A3BF7, 0x3CE43C7A3BF7, 0x3CE53C7A3BF7, 0x3CE63C7A3BF7, 0x3CE73C7A3BF7, 0x3CE83C7A3BF7, 0x3CE93C7A3BF7, 0x3CEA3C7A3BF7, 0x3CEB3C7A3BF7, 0x3C7B3BF7, 0x3CD13C7B3BF7, 0x3CD23C7B3BF7, + 0x3CD33C7B3BF7, 0x3CD43C7B3BF7, 0x3CD53C7B3BF7, 0x3CD63C7B3BF7, 0x3CD73C7B3BF7, 0x3CD83C7B3BF7, 0x3CD93C7B3BF7, 0x3CDA3C7B3BF7, 0x3CDB3C7B3BF7, 0x3CDC3C7B3BF7, 0x3CDD3C7B3BF7, 0x3CDE3C7B3BF7, 0x3CDF3C7B3BF7, 0x3CE03C7B3BF7, 0x3CE13C7B3BF7, + 0x3CE23C7B3BF7, 0x3CE33C7B3BF7, 0x3CE43C7B3BF7, 0x3CE53C7B3BF7, 0x3CE63C7B3BF7, 0x3CE73C7B3BF7, 0x3CE83C7B3BF7, 0x3CE93C7B3BF7, 0x3CEA3C7B3BF7, 0x3CEB3C7B3BF7, 0x3C7C3BF7, 0x3CD13C7C3BF7, 0x3CD23C7C3BF7, 0x3CD33C7C3BF7, 0x3CD43C7C3BF7, + 0x3CD53C7C3BF7, 0x3CD63C7C3BF7, 0x3CD73C7C3BF7, 0x3CD83C7C3BF7, 0x3CD93C7C3BF7, 0x3CDA3C7C3BF7, 0x3CDB3C7C3BF7, 0x3CDC3C7C3BF7, 0x3CDD3C7C3BF7, 0x3CDE3C7C3BF7, 0x3CDF3C7C3BF7, 0x3CE03C7C3BF7, 0x3CE13C7C3BF7, 0x3CE23C7C3BF7, 0x3CE33C7C3BF7, + 0x3CE43C7C3BF7, 0x3CE53C7C3BF7, 0x3CE63C7C3BF7, 0x3CE73C7C3BF7, 0x3CE83C7C3BF7, 0x3CE93C7C3BF7, 0x3CEA3C7C3BF7, 0x3CEB3C7C3BF7, 0x3C7D3BF7, 0x3CD13C7D3BF7, 0x3CD23C7D3BF7, 0x3CD33C7D3BF7, 0x3CD43C7D3BF7, 0x3CD53C7D3BF7, 0x3CD63C7D3BF7, + 0x3CD73C7D3BF7, 0x3CD83C7D3BF7, 0x3CD93C7D3BF7, 0x3CDA3C7D3BF7, 0x3CDB3C7D3BF7, 0x3CDC3C7D3BF7, 0x3CDD3C7D3BF7, 0x3CDE3C7D3BF7, 0x3CDF3C7D3BF7, 0x3CE03C7D3BF7, 0x3CE13C7D3BF7, 0x3CE23C7D3BF7, 0x3CE33C7D3BF7, 0x3CE43C7D3BF7, 0x3CE53C7D3BF7, + 0x3CE63C7D3BF7, 0x3CE73C7D3BF7, 0x3CE83C7D3BF7, 0x3CE93C7D3BF7, 0x3CEA3C7D3BF7, 0x3CEB3C7D3BF7, 0x3C7E3BF7, 0x3CD13C7E3BF7, 0x3CD23C7E3BF7, 0x3CD33C7E3BF7, 0x3CD43C7E3BF7, 0x3CD53C7E3BF7, 0x3CD63C7E3BF7, 0x3CD73C7E3BF7, 0x3CD83C7E3BF7, + 0x3CD93C7E3BF7, 0x3CDA3C7E3BF7, 0x3CDB3C7E3BF7, 0x3CDC3C7E3BF7, 0x3CDD3C7E3BF7, 0x3CDE3C7E3BF7, 0x3CDF3C7E3BF7, 0x3CE03C7E3BF7, 0x3CE13C7E3BF7, 0x3CE23C7E3BF7, 0x3CE33C7E3BF7, 0x3CE43C7E3BF7, 0x3CE53C7E3BF7, 0x3CE63C7E3BF7, 0x3CE73C7E3BF7, + 0x3CE83C7E3BF7, 0x3CE93C7E3BF7, 0x3CEA3C7E3BF7, 0x3CEB3C7E3BF7, 0x3C7F3BF7, 0x3CD13C7F3BF7, 0x3CD23C7F3BF7, 0x3CD33C7F3BF7, 0x3CD43C7F3BF7, 0x3CD53C7F3BF7, 0x3CD63C7F3BF7, 0x3CD73C7F3BF7, 0x3CD83C7F3BF7, 0x3CD93C7F3BF7, 0x3CDA3C7F3BF7, + 0x3CDB3C7F3BF7, 0x3CDC3C7F3BF7, 0x3CDD3C7F3BF7, 0x3CDE3C7F3BF7, 0x3CDF3C7F3BF7, 0x3CE03C7F3BF7, 0x3CE13C7F3BF7, 0x3CE23C7F3BF7, 0x3CE33C7F3BF7, 0x3CE43C7F3BF7, 0x3CE53C7F3BF7, 0x3CE63C7F3BF7, 0x3CE73C7F3BF7, 0x3CE83C7F3BF7, 0x3CE93C7F3BF7, + 0x3CEA3C7F3BF7, 0x3CEB3C7F3BF7, 0x3C803BF7, 0x3CD13C803BF7, 0x3CD23C803BF7, 0x3CD33C803BF7, 0x3CD43C803BF7, 0x3CD53C803BF7, 0x3CD63C803BF7, 0x3CD73C803BF7, 0x3CD83C803BF7, 0x3CD93C803BF7, 0x3CDA3C803BF7, 0x3CDB3C803BF7, 0x3CDC3C803BF7, + 0x3CDD3C803BF7, 0x3CDE3C803BF7, 0x3CDF3C803BF7, 0x3CE03C803BF7, 0x3CE13C803BF7, 0x3CE23C803BF7, 0x3CE33C803BF7, 0x3CE43C803BF7, 0x3CE53C803BF7, 0x3CE63C803BF7, 0x3CE73C803BF7, 0x3CE83C803BF7, 0x3CE93C803BF7, 0x3CEA3C803BF7, 0x3CEB3C803BF7, + 0x3C813BF7, 0x3CD13C813BF7, 0x3CD23C813BF7, 0x3CD33C813BF7, 0x3CD43C813BF7, 0x3CD53C813BF7, 0x3CD63C813BF7, 0x3CD73C813BF7, 0x3CD83C813BF7, 0x3CD93C813BF7, 0x3CDA3C813BF7, 0x3CDB3C813BF7, 0x3CDC3C813BF7, 0x3CDD3C813BF7, 0x3CDE3C813BF7, + 0x3CDF3C813BF7, 0x3CE03C813BF7, 0x3CE13C813BF7, 0x3CE23C813BF7, 0x3CE33C813BF7, 0x3CE43C813BF7, 0x3CE53C813BF7, 0x3CE63C813BF7, 0x3CE73C813BF7, 0x3CE83C813BF7, 0x3CE93C813BF7, 0x3CEA3C813BF7, 0x3CEB3C813BF7, 0x3C823BF7, 0x3CD13C823BF7, + 0x3CD23C823BF7, 0x3CD33C823BF7, 0x3CD43C823BF7, 0x3CD53C823BF7, 0x3CD63C823BF7, 0x3CD73C823BF7, 0x3CD83C823BF7, 0x3CD93C823BF7, 0x3CDA3C823BF7, 0x3CDB3C823BF7, 0x3CDC3C823BF7, 0x3CDD3C823BF7, 0x3CDE3C823BF7, 0x3CDF3C823BF7, 0x3CE03C823BF7, + 0x3CE13C823BF7, 0x3CE23C823BF7, 0x3CE33C823BF7, 0x3CE43C823BF7, 0x3CE53C823BF7, 0x3CE63C823BF7, 0x3CE73C823BF7, 0x3CE83C823BF7, 0x3CE93C823BF7, 0x3CEA3C823BF7, 0x3CEB3C823BF7, 0x3C833BF7, 0x3CD13C833BF7, 0x3CD23C833BF7, 0x3CD33C833BF7, + 0x3CD43C833BF7, 0x3CD53C833BF7, 0x3CD63C833BF7, 0x3CD73C833BF7, 0x3CD83C833BF7, 0x3CD93C833BF7, 0x3CDA3C833BF7, 0x3CDB3C833BF7, 0x3CDC3C833BF7, 0x3CDD3C833BF7, 0x3CDE3C833BF7, 0x3CDF3C833BF7, 0x3CE03C833BF7, 0x3CE13C833BF7, 0x3CE23C833BF7, + 0x3CE33C833BF7, 0x3CE43C833BF7, 0x3CE53C833BF7, 0x3CE63C833BF7, 0x3CE73C833BF7, 0x3CE83C833BF7, 0x3CE93C833BF7, 0x3CEA3C833BF7, 0x3CEB3C833BF7, 0x3C843BF7, 0x3CD13C843BF7, 0x3CD23C843BF7, 0x3CD33C843BF7, 0x3CD43C843BF7, 0x3CD53C843BF7, + 0x3CD63C843BF7, 0x3CD73C843BF7, 0x3CD83C843BF7, 0x3CD93C843BF7, 0x3CDA3C843BF7, 0x3CDB3C843BF7, 0x3CDC3C843BF7, 0x3CDD3C843BF7, 0x3CDE3C843BF7, 0x3CDF3C843BF7, 0x3CE03C843BF7, 0x3CE13C843BF7, 0x3CE23C843BF7, 0x3CE33C843BF7, 0x3CE43C843BF7, + 0x3CE53C843BF7, 0x3CE63C843BF7, 0x3CE73C843BF7, 0x3CE83C843BF7, 0x3CE93C843BF7, 0x3CEA3C843BF7, 0x3CEB3C843BF7, 0x3C853BF7, 0x3CD13C853BF7, 0x3CD23C853BF7, 0x3CD33C853BF7, 0x3CD43C853BF7, 0x3CD53C853BF7, 0x3CD63C853BF7, 0x3CD73C853BF7, + 0x3CD83C853BF7, 0x3CD93C853BF7, 0x3CDA3C853BF7, 0x3CDB3C853BF7, 0x3CDC3C853BF7, 0x3CDD3C853BF7, 0x3CDE3C853BF7, 0x3CDF3C853BF7, 0x3CE03C853BF7, 0x3CE13C853BF7, 0x3CE23C853BF7, 0x3CE33C853BF7, 0x3CE43C853BF7, 0x3CE53C853BF7, 0x3CE63C853BF7, + 0x3CE73C853BF7, 0x3CE83C853BF7, 0x3CE93C853BF7, 0x3CEA3C853BF7, 0x3CEB3C853BF7, 0x3C863BF7, 0x3CD13C863BF7, 0x3CD23C863BF7, 0x3CD33C863BF7, 0x3CD43C863BF7, 0x3CD53C863BF7, 0x3CD63C863BF7, 0x3CD73C863BF7, 0x3CD83C863BF7, 0x3CD93C863BF7, + 0x3CDA3C863BF7, 0x3CDB3C863BF7, 0x3CDC3C863BF7, 0x3CDD3C863BF7, 0x3CDE3C863BF7, 0x3CDF3C863BF7, 0x3CE03C863BF7, 0x3CE13C863BF7, 0x3CE23C863BF7, 0x3CE33C863BF7, 0x3CE43C863BF7, 0x3CE53C863BF7, 0x3CE63C863BF7, 0x3CE73C863BF7, 0x3CE83C863BF7, + 0x3CE93C863BF7, 0x3CEA3C863BF7, 0x3CEB3C863BF7, 0x3C873BF7, 0x3CD13C873BF7, 0x3CD23C873BF7, 0x3CD33C873BF7, 0x3CD43C873BF7, 0x3CD53C873BF7, 0x3CD63C873BF7, 0x3CD73C873BF7, 0x3CD83C873BF7, 0x3CD93C873BF7, 0x3CDA3C873BF7, 0x3CDB3C873BF7, + 0x3CDC3C873BF7, 0x3CDD3C873BF7, 0x3CDE3C873BF7, 0x3CDF3C873BF7, 0x3CE03C873BF7, 0x3CE13C873BF7, 0x3CE23C873BF7, 0x3CE33C873BF7, 0x3CE43C873BF7, 0x3CE53C873BF7, 0x3CE63C873BF7, 0x3CE73C873BF7, 0x3CE83C873BF7, 0x3CE93C873BF7, 0x3CEA3C873BF7, + 0x3CEB3C873BF7, 0x3C733BF8, 0x3CD13C733BF8, 0x3CD23C733BF8, 0x3CD33C733BF8, 0x3CD43C733BF8, 0x3CD53C733BF8, 0x3CD63C733BF8, 0x3CD73C733BF8, 0x3CD83C733BF8, 0x3CD93C733BF8, 0x3CDA3C733BF8, 0x3CDB3C733BF8, 0x3CDC3C733BF8, 0x3CDD3C733BF8, + 0x3CDE3C733BF8, 0x3CDF3C733BF8, 0x3CE03C733BF8, 0x3CE13C733BF8, 0x3CE23C733BF8, 0x3CE33C733BF8, 0x3CE43C733BF8, 0x3CE53C733BF8, 0x3CE63C733BF8, 0x3CE73C733BF8, 0x3CE83C733BF8, 0x3CE93C733BF8, 0x3CEA3C733BF8, 0x3CEB3C733BF8, 0x3C743BF8, + 0x3CD13C743BF8, 0x3CD23C743BF8, 0x3CD33C743BF8, 0x3CD43C743BF8, 0x3CD53C743BF8, 0x3CD63C743BF8, 0x3CD73C743BF8, 0x3CD83C743BF8, 0x3CD93C743BF8, 0x3CDA3C743BF8, 0x3CDB3C743BF8, 0x3CDC3C743BF8, 0x3CDD3C743BF8, 0x3CDE3C743BF8, 0x3CDF3C743BF8, + 0x3CE03C743BF8, 0x3CE13C743BF8, 0x3CE23C743BF8, 0x3CE33C743BF8, 0x3CE43C743BF8, 0x3CE53C743BF8, 0x3CE63C743BF8, 0x3CE73C743BF8, 0x3CE83C743BF8, 0x3CE93C743BF8, 0x3CEA3C743BF8, 0x3CEB3C743BF8, 0x3C753BF8, 0x3CD13C753BF8, 0x3CD23C753BF8, + 0x3CD33C753BF8, 0x3CD43C753BF8, 0x3CD53C753BF8, 0x3CD63C753BF8, 0x3CD73C753BF8, 0x3CD83C753BF8, 0x3CD93C753BF8, 0x3CDA3C753BF8, 0x3CDB3C753BF8, 0x3CDC3C753BF8, 0x3CDD3C753BF8, 0x3CDE3C753BF8, 0x3CDF3C753BF8, 0x3CE03C753BF8, 0x3CE13C753BF8, + 0x3CE23C753BF8, 0x3CE33C753BF8, 0x3CE43C753BF8, 0x3CE53C753BF8, 0x3CE63C753BF8, 0x3CE73C753BF8, 0x3CE83C753BF8, 0x3CE93C753BF8, 0x3CEA3C753BF8, 0x3CEB3C753BF8, 0x3C763BF8, 0x3CD13C763BF8, 0x3CD23C763BF8, 0x3CD33C763BF8, 0x3CD43C763BF8, + 0x3CD53C763BF8, 0x3CD63C763BF8, 0x3CD73C763BF8, 0x3CD83C763BF8, 0x3CD93C763BF8, 0x3CDA3C763BF8, 0x3CDB3C763BF8, 0x3CDC3C763BF8, 0x3CDD3C763BF8, 0x3CDE3C763BF8, 0x3CDF3C763BF8, 0x3CE03C763BF8, 0x3CE13C763BF8, 0x3CE23C763BF8, 0x3CE33C763BF8, + 0x3CE43C763BF8, 0x3CE53C763BF8, 0x3CE63C763BF8, 0x3CE73C763BF8, 0x3CE83C763BF8, 0x3CE93C763BF8, 0x3CEA3C763BF8, 0x3CEB3C763BF8, 0x3C773BF8, 0x3CD13C773BF8, 0x3CD23C773BF8, 0x3CD33C773BF8, 0x3CD43C773BF8, 0x3CD53C773BF8, 0x3CD63C773BF8, + 0x3CD73C773BF8, 0x3CD83C773BF8, 0x3CD93C773BF8, 0x3CDA3C773BF8, 0x3CDB3C773BF8, 0x3CDC3C773BF8, 0x3CDD3C773BF8, 0x3CDE3C773BF8, 0x3CDF3C773BF8, 0x3CE03C773BF8, 0x3CE13C773BF8, 0x3CE23C773BF8, 0x3CE33C773BF8, 0x3CE43C773BF8, 0x3CE53C773BF8, + 0x3CE63C773BF8, 0x3CE73C773BF8, 0x3CE83C773BF8, 0x3CE93C773BF8, 0x3CEA3C773BF8, 0x3CEB3C773BF8, 0x3C783BF8, 0x3CD13C783BF8, 0x3CD23C783BF8, 0x3CD33C783BF8, 0x3CD43C783BF8, 0x3CD53C783BF8, 0x3CD63C783BF8, 0x3CD73C783BF8, 0x3CD83C783BF8, + 0x3CD93C783BF8, 0x3CDA3C783BF8, 0x3CDB3C783BF8, 0x3CDC3C783BF8, 0x3CDD3C783BF8, 0x3CDE3C783BF8, 0x3CDF3C783BF8, 0x3CE03C783BF8, 0x3CE13C783BF8, 0x3CE23C783BF8, 0x3CE33C783BF8, 0x3CE43C783BF8, 0x3CE53C783BF8, 0x3CE63C783BF8, 0x3CE73C783BF8, + 0x3CE83C783BF8, 0x3CE93C783BF8, 0x3CEA3C783BF8, 0x3CEB3C783BF8, 0x3C793BF8, 0x3CD13C793BF8, 0x3CD23C793BF8, 0x3CD33C793BF8, 0x3CD43C793BF8, 0x3CD53C793BF8, 0x3CD63C793BF8, 0x3CD73C793BF8, 0x3CD83C793BF8, 0x3CD93C793BF8, 0x3CDA3C793BF8, + 0x3CDB3C793BF8, 0x3CDC3C793BF8, 0x3CDD3C793BF8, 0x3CDE3C793BF8, 0x3CDF3C793BF8, 0x3CE03C793BF8, 0x3CE13C793BF8, 0x3CE23C793BF8, 0x3CE33C793BF8, 0x3CE43C793BF8, 0x3CE53C793BF8, 0x3CE63C793BF8, 0x3CE73C793BF8, 0x3CE83C793BF8, 0x3CE93C793BF8, + 0x3CEA3C793BF8, 0x3CEB3C793BF8, 0x3C7A3BF8, 0x3CD13C7A3BF8, 0x3CD23C7A3BF8, 0x3CD33C7A3BF8, 0x3CD43C7A3BF8, 0x3CD53C7A3BF8, 0x3CD63C7A3BF8, 0x3CD73C7A3BF8, 0x3CD83C7A3BF8, 0x3CD93C7A3BF8, 0x3CDA3C7A3BF8, 0x3CDB3C7A3BF8, 0x3CDC3C7A3BF8, + 0x3CDD3C7A3BF8, 0x3CDE3C7A3BF8, 0x3CDF3C7A3BF8, 0x3CE03C7A3BF8, 0x3CE13C7A3BF8, 0x3CE23C7A3BF8, 0x3CE33C7A3BF8, 0x3CE43C7A3BF8, 0x3CE53C7A3BF8, 0x3CE63C7A3BF8, 0x3CE73C7A3BF8, 0x3CE83C7A3BF8, 0x3CE93C7A3BF8, 0x3CEA3C7A3BF8, 0x3CEB3C7A3BF8, + 0x3C7B3BF8, 0x3CD13C7B3BF8, 0x3CD23C7B3BF8, 0x3CD33C7B3BF8, 0x3CD43C7B3BF8, 0x3CD53C7B3BF8, 0x3CD63C7B3BF8, 0x3CD73C7B3BF8, 0x3CD83C7B3BF8, 0x3CD93C7B3BF8, 0x3CDA3C7B3BF8, 0x3CDB3C7B3BF8, 0x3CDC3C7B3BF8, 0x3CDD3C7B3BF8, 0x3CDE3C7B3BF8, + 0x3CDF3C7B3BF8, 0x3CE03C7B3BF8, 0x3CE13C7B3BF8, 0x3CE23C7B3BF8, 0x3CE33C7B3BF8, 0x3CE43C7B3BF8, 0x3CE53C7B3BF8, 0x3CE63C7B3BF8, 0x3CE73C7B3BF8, 0x3CE83C7B3BF8, 0x3CE93C7B3BF8, 0x3CEA3C7B3BF8, 0x3CEB3C7B3BF8, 0x3C7C3BF8, 0x3CD13C7C3BF8, + 0x3CD23C7C3BF8, 0x3CD33C7C3BF8, 0x3CD43C7C3BF8, 0x3CD53C7C3BF8, 0x3CD63C7C3BF8, 0x3CD73C7C3BF8, 0x3CD83C7C3BF8, 0x3CD93C7C3BF8, 0x3CDA3C7C3BF8, 0x3CDB3C7C3BF8, 0x3CDC3C7C3BF8, 0x3CDD3C7C3BF8, 0x3CDE3C7C3BF8, 0x3CDF3C7C3BF8, 0x3CE03C7C3BF8, + 0x3CE13C7C3BF8, 0x3CE23C7C3BF8, 0x3CE33C7C3BF8, 0x3CE43C7C3BF8, 0x3CE53C7C3BF8, 0x3CE63C7C3BF8, 0x3CE73C7C3BF8, 0x3CE83C7C3BF8, 0x3CE93C7C3BF8, 0x3CEA3C7C3BF8, 0x3CEB3C7C3BF8, 0x3C7D3BF8, 0x3CD13C7D3BF8, 0x3CD23C7D3BF8, 0x3CD33C7D3BF8, + 0x3CD43C7D3BF8, 0x3CD53C7D3BF8, 0x3CD63C7D3BF8, 0x3CD73C7D3BF8, 0x3CD83C7D3BF8, 0x3CD93C7D3BF8, 0x3CDA3C7D3BF8, 0x3CDB3C7D3BF8, 0x3CDC3C7D3BF8, 0x3CDD3C7D3BF8, 0x3CDE3C7D3BF8, 0x3CDF3C7D3BF8, 0x3CE03C7D3BF8, 0x3CE13C7D3BF8, 0x3CE23C7D3BF8, + 0x3CE33C7D3BF8, 0x3CE43C7D3BF8, 0x3CE53C7D3BF8, 0x3CE63C7D3BF8, 0x3CE73C7D3BF8, 0x3CE83C7D3BF8, 0x3CE93C7D3BF8, 0x3CEA3C7D3BF8, 0x3CEB3C7D3BF8, 0x3C7E3BF8, 0x3CD13C7E3BF8, 0x3CD23C7E3BF8, 0x3CD33C7E3BF8, 0x3CD43C7E3BF8, 0x3CD53C7E3BF8, + 0x3CD63C7E3BF8, 0x3CD73C7E3BF8, 0x3CD83C7E3BF8, 0x3CD93C7E3BF8, 0x3CDA3C7E3BF8, 0x3CDB3C7E3BF8, 0x3CDC3C7E3BF8, 0x3CDD3C7E3BF8, 0x3CDE3C7E3BF8, 0x3CDF3C7E3BF8, 0x3CE03C7E3BF8, 0x3CE13C7E3BF8, 0x3CE23C7E3BF8, 0x3CE33C7E3BF8, 0x3CE43C7E3BF8, + 0x3CE53C7E3BF8, 0x3CE63C7E3BF8, 0x3CE73C7E3BF8, 0x3CE83C7E3BF8, 0x3CE93C7E3BF8, 0x3CEA3C7E3BF8, 0x3CEB3C7E3BF8, 0x3C7F3BF8, 0x3CD13C7F3BF8, 0x3CD23C7F3BF8, 0x3CD33C7F3BF8, 0x3CD43C7F3BF8, 0x3CD53C7F3BF8, 0x3CD63C7F3BF8, 0x3CD73C7F3BF8, + 0x3CD83C7F3BF8, 0x3CD93C7F3BF8, 0x3CDA3C7F3BF8, 0x3CDB3C7F3BF8, 0x3CDC3C7F3BF8, 0x3CDD3C7F3BF8, 0x3CDE3C7F3BF8, 0x3CDF3C7F3BF8, 0x3CE03C7F3BF8, 0x3CE13C7F3BF8, 0x3CE23C7F3BF8, 0x3CE33C7F3BF8, 0x3CE43C7F3BF8, 0x3CE53C7F3BF8, 0x3CE63C7F3BF8, + 0x3CE73C7F3BF8, 0x3CE83C7F3BF8, 0x3CE93C7F3BF8, 0x3CEA3C7F3BF8, 0x3CEB3C7F3BF8, 0x3C803BF8, 0x3CD13C803BF8, 0x3CD23C803BF8, 0x3CD33C803BF8, 0x3CD43C803BF8, 0x3CD53C803BF8, 0x3CD63C803BF8, 0x3CD73C803BF8, 0x3CD83C803BF8, 0x3CD93C803BF8, + 0x3CDA3C803BF8, 0x3CDB3C803BF8, 0x3CDC3C803BF8, 0x3CDD3C803BF8, 0x3CDE3C803BF8, 0x3CDF3C803BF8, 0x3CE03C803BF8, 0x3CE13C803BF8, 0x3CE23C803BF8, 0x3CE33C803BF8, 0x3CE43C803BF8, 0x3CE53C803BF8, 0x3CE63C803BF8, 0x3CE73C803BF8, 0x3CE83C803BF8, + 0x3CE93C803BF8, 0x3CEA3C803BF8, 0x3CEB3C803BF8, 0x3C813BF8, 0x3CD13C813BF8, 0x3CD23C813BF8, 0x3CD33C813BF8, 0x3CD43C813BF8, 0x3CD53C813BF8, 0x3CD63C813BF8, 0x3CD73C813BF8, 0x3CD83C813BF8, 0x3CD93C813BF8, 0x3CDA3C813BF8, 0x3CDB3C813BF8, + 0x3CDC3C813BF8, 0x3CDD3C813BF8, 0x3CDE3C813BF8, 0x3CDF3C813BF8, 0x3CE03C813BF8, 0x3CE13C813BF8, 0x3CE23C813BF8, 0x3CE33C813BF8, 0x3CE43C813BF8, 0x3CE53C813BF8, 0x3CE63C813BF8, 0x3CE73C813BF8, 0x3CE83C813BF8, 0x3CE93C813BF8, 0x3CEA3C813BF8, + 0x3CEB3C813BF8, 0x3C823BF8, 0x3CD13C823BF8, 0x3CD23C823BF8, 0x3CD33C823BF8, 0x3CD43C823BF8, 0x3CD53C823BF8, 0x3CD63C823BF8, 0x3CD73C823BF8, 0x3CD83C823BF8, 0x3CD93C823BF8, 0x3CDA3C823BF8, 0x3CDB3C823BF8, 0x3CDC3C823BF8, 0x3CDD3C823BF8, + 0x3CDE3C823BF8, 0x3CDF3C823BF8, 0x3CE03C823BF8, 0x3CE13C823BF8, 0x3CE23C823BF8, 0x3CE33C823BF8, 0x3CE43C823BF8, 0x3CE53C823BF8, 0x3CE63C823BF8, 0x3CE73C823BF8, 0x3CE83C823BF8, 0x3CE93C823BF8, 0x3CEA3C823BF8, 0x3CEB3C823BF8, 0x3C833BF8, + 0x3CD13C833BF8, 0x3CD23C833BF8, 0x3CD33C833BF8, 0x3CD43C833BF8, 0x3CD53C833BF8, 0x3CD63C833BF8, 0x3CD73C833BF8, 0x3CD83C833BF8, 0x3CD93C833BF8, 0x3CDA3C833BF8, 0x3CDB3C833BF8, 0x3CDC3C833BF8, 0x3CDD3C833BF8, 0x3CDE3C833BF8, 0x3CDF3C833BF8, + 0x3CE03C833BF8, 0x3CE13C833BF8, 0x3CE23C833BF8, 0x3CE33C833BF8, 0x3CE43C833BF8, 0x3CE53C833BF8, 0x3CE63C833BF8, 0x3CE73C833BF8, 0x3CE83C833BF8, 0x3CE93C833BF8, 0x3CEA3C833BF8, 0x3CEB3C833BF8, 0x3C843BF8, 0x3CD13C843BF8, 0x3CD23C843BF8, + 0x3CD33C843BF8, 0x3CD43C843BF8, 0x3CD53C843BF8, 0x3CD63C843BF8, 0x3CD73C843BF8, 0x3CD83C843BF8, 0x3CD93C843BF8, 0x3CDA3C843BF8, 0x3CDB3C843BF8, 0x3CDC3C843BF8, 0x3CDD3C843BF8, 0x3CDE3C843BF8, 0x3CDF3C843BF8, 0x3CE03C843BF8, 0x3CE13C843BF8, + 0x3CE23C843BF8, 0x3CE33C843BF8, 0x3CE43C843BF8, 0x3CE53C843BF8, 0x3CE63C843BF8, 0x3CE73C843BF8, 0x3CE83C843BF8, 0x3CE93C843BF8, 0x3CEA3C843BF8, 0x3CEB3C843BF8, 0x3C853BF8, 0x3CD13C853BF8, 0x3CD23C853BF8, 0x3CD33C853BF8, 0x3CD43C853BF8, + 0x3CD53C853BF8, 0x3CD63C853BF8, 0x3CD73C853BF8, 0x3CD83C853BF8, 0x3CD93C853BF8, 0x3CDA3C853BF8, 0x3CDB3C853BF8, 0x3CDC3C853BF8, 0x3CDD3C853BF8, 0x3CDE3C853BF8, 0x3CDF3C853BF8, 0x3CE03C853BF8, 0x3CE13C853BF8, 0x3CE23C853BF8, 0x3CE33C853BF8, + 0x3CE43C853BF8, 0x3CE53C853BF8, 0x3CE63C853BF8, 0x3CE73C853BF8, 0x3CE83C853BF8, 0x3CE93C853BF8, 0x3CEA3C853BF8, 0x3CEB3C853BF8, 0x3C863BF8, 0x3CD13C863BF8, 0x3CD23C863BF8, 0x3CD33C863BF8, 0x3CD43C863BF8, 0x3CD53C863BF8, 0x3CD63C863BF8, + 0x3CD73C863BF8, 0x3CD83C863BF8, 0x3CD93C863BF8, 0x3CDA3C863BF8, 0x3CDB3C863BF8, 0x3CDC3C863BF8, 0x3CDD3C863BF8, 0x3CDE3C863BF8, 0x3CDF3C863BF8, 0x3CE03C863BF8, 0x3CE13C863BF8, 0x3CE23C863BF8, 0x3CE33C863BF8, 0x3CE43C863BF8, 0x3CE53C863BF8, + 0x3CE63C863BF8, 0x3CE73C863BF8, 0x3CE83C863BF8, 0x3CE93C863BF8, 0x3CEA3C863BF8, 0x3CEB3C863BF8, 0x3C873BF8, 0x3CD13C873BF8, 0x3CD23C873BF8, 0x3CD33C873BF8, 0x3CD43C873BF8, 0x3CD53C873BF8, 0x3CD63C873BF8, 0x3CD73C873BF8, 0x3CD83C873BF8, + 0x3CD93C873BF8, 0x3CDA3C873BF8, 0x3CDB3C873BF8, 0x3CDC3C873BF8, 0x3CDD3C873BF8, 0x3CDE3C873BF8, 0x3CDF3C873BF8, 0x3CE03C873BF8, 0x3CE13C873BF8, 0x3CE23C873BF8, 0x3CE33C873BF8, 0x3CE43C873BF8, 0x3CE53C873BF8, 0x3CE63C873BF8, 0x3CE73C873BF8, + 0x3CE83C873BF8, 0x3CE93C873BF8, 0x3CEA3C873BF8, 0x3CEB3C873BF8, 0x3C733BF9, 0x3CD13C733BF9, 0x3CD23C733BF9, 0x3CD33C733BF9, 0x3CD43C733BF9, 0x3CD53C733BF9, 0x3CD63C733BF9, 0x3CD73C733BF9, 0x3CD83C733BF9, 0x3CD93C733BF9, 0x3CDA3C733BF9, + 0x3CDB3C733BF9, 0x3CDC3C733BF9, 0x3CDD3C733BF9, 0x3CDE3C733BF9, 0x3CDF3C733BF9, 0x3CE03C733BF9, 0x3CE13C733BF9, 0x3CE23C733BF9, 0x3CE33C733BF9, 0x3CE43C733BF9, 0x3CE53C733BF9, 0x3CE63C733BF9, 0x3CE73C733BF9, 0x3CE83C733BF9, 0x3CE93C733BF9, + 0x3CEA3C733BF9, 0x3CEB3C733BF9, 0x3C743BF9, 0x3CD13C743BF9, 0x3CD23C743BF9, 0x3CD33C743BF9, 0x3CD43C743BF9, 0x3CD53C743BF9, 0x3CD63C743BF9, 0x3CD73C743BF9, 0x3CD83C743BF9, 0x3CD93C743BF9, 0x3CDA3C743BF9, 0x3CDB3C743BF9, 0x3CDC3C743BF9, + 0x3CDD3C743BF9, 0x3CDE3C743BF9, 0x3CDF3C743BF9, 0x3CE03C743BF9, 0x3CE13C743BF9, 0x3CE23C743BF9, 0x3CE33C743BF9, 0x3CE43C743BF9, 0x3CE53C743BF9, 0x3CE63C743BF9, 0x3CE73C743BF9, 0x3CE83C743BF9, 0x3CE93C743BF9, 0x3CEA3C743BF9, 0x3CEB3C743BF9, + 0x3C753BF9, 0x3CD13C753BF9, 0x3CD23C753BF9, 0x3CD33C753BF9, 0x3CD43C753BF9, 0x3CD53C753BF9, 0x3CD63C753BF9, 0x3CD73C753BF9, 0x3CD83C753BF9, 0x3CD93C753BF9, 0x3CDA3C753BF9, 0x3CDB3C753BF9, 0x3CDC3C753BF9, 0x3CDD3C753BF9, 0x3CDE3C753BF9, + 0x3CDF3C753BF9, 0x3CE03C753BF9, 0x3CE13C753BF9, 0x3CE23C753BF9, 0x3CE33C753BF9, 0x3CE43C753BF9, 0x3CE53C753BF9, 0x3CE63C753BF9, 0x3CE73C753BF9, 0x3CE83C753BF9, 0x3CE93C753BF9, 0x3CEA3C753BF9, 0x3CEB3C753BF9, 0x3C763BF9, 0x3CD13C763BF9, + 0x3CD23C763BF9, 0x3CD33C763BF9, 0x3CD43C763BF9, 0x3CD53C763BF9, 0x3CD63C763BF9, 0x3CD73C763BF9, 0x3CD83C763BF9, 0x3CD93C763BF9, 0x3CDA3C763BF9, 0x3CDB3C763BF9, 0x3CDC3C763BF9, 0x3CDD3C763BF9, 0x3CDE3C763BF9, 0x3CDF3C763BF9, 0x3CE03C763BF9, + 0x3CE13C763BF9, 0x3CE23C763BF9, 0x3CE33C763BF9, 0x3CE43C763BF9, 0x3CE53C763BF9, 0x3CE63C763BF9, 0x3CE73C763BF9, 0x3CE83C763BF9, 0x3CE93C763BF9, 0x3CEA3C763BF9, 0x3CEB3C763BF9, 0x3C773BF9, 0x3CD13C773BF9, 0x3CD23C773BF9, 0x3CD33C773BF9, + 0x3CD43C773BF9, 0x3CD53C773BF9, 0x3CD63C773BF9, 0x3CD73C773BF9, 0x3CD83C773BF9, 0x3CD93C773BF9, 0x3CDA3C773BF9, 0x3CDB3C773BF9, 0x3CDC3C773BF9, 0x3CDD3C773BF9, 0x3CDE3C773BF9, 0x3CDF3C773BF9, 0x3CE03C773BF9, 0x3CE13C773BF9, 0x3CE23C773BF9, + 0x3CE33C773BF9, 0x3CE43C773BF9, 0x3CE53C773BF9, 0x3CE63C773BF9, 0x3CE73C773BF9, 0x3CE83C773BF9, 0x3CE93C773BF9, 0x3CEA3C773BF9, 0x3CEB3C773BF9, 0x3C783BF9, 0x3CD13C783BF9, 0x3CD23C783BF9, 0x3CD33C783BF9, 0x3CD43C783BF9, 0x3CD53C783BF9, + 0x3CD63C783BF9, 0x3CD73C783BF9, 0x3CD83C783BF9, 0x3CD93C783BF9, 0x3CDA3C783BF9, 0x3CDB3C783BF9, 0x3CDC3C783BF9, 0x3CDD3C783BF9, 0x3CDE3C783BF9, 0x3CDF3C783BF9, 0x3CE03C783BF9, 0x3CE13C783BF9, 0x3CE23C783BF9, 0x3CE33C783BF9, 0x3CE43C783BF9, + 0x3CE53C783BF9, 0x3CE63C783BF9, 0x3CE73C783BF9, 0x3CE83C783BF9, 0x3CE93C783BF9, 0x3CEA3C783BF9, 0x3CEB3C783BF9, 0x3C793BF9, 0x3CD13C793BF9, 0x3CD23C793BF9, 0x3CD33C793BF9, 0x3CD43C793BF9, 0x3CD53C793BF9, 0x3CD63C793BF9, 0x3CD73C793BF9, + 0x3CD83C793BF9, 0x3CD93C793BF9, 0x3CDA3C793BF9, 0x3CDB3C793BF9, 0x3CDC3C793BF9, 0x3CDD3C793BF9, 0x3CDE3C793BF9, 0x3CDF3C793BF9, 0x3CE03C793BF9, 0x3CE13C793BF9, 0x3CE23C793BF9, 0x3CE33C793BF9, 0x3CE43C793BF9, 0x3CE53C793BF9, 0x3CE63C793BF9, + 0x3CE73C793BF9, 0x3CE83C793BF9, 0x3CE93C793BF9, 0x3CEA3C793BF9, 0x3CEB3C793BF9, 0x3C7A3BF9, 0x3CD13C7A3BF9, 0x3CD23C7A3BF9, 0x3CD33C7A3BF9, 0x3CD43C7A3BF9, 0x3CD53C7A3BF9, 0x3CD63C7A3BF9, 0x3CD73C7A3BF9, 0x3CD83C7A3BF9, 0x3CD93C7A3BF9, + 0x3CDA3C7A3BF9, 0x3CDB3C7A3BF9, 0x3CDC3C7A3BF9, 0x3CDD3C7A3BF9, 0x3CDE3C7A3BF9, 0x3CDF3C7A3BF9, 0x3CE03C7A3BF9, 0x3CE13C7A3BF9, 0x3CE23C7A3BF9, 0x3CE33C7A3BF9, 0x3CE43C7A3BF9, 0x3CE53C7A3BF9, 0x3CE63C7A3BF9, 0x3CE73C7A3BF9, 0x3CE83C7A3BF9, + 0x3CE93C7A3BF9, 0x3CEA3C7A3BF9, 0x3CEB3C7A3BF9, 0x3C7B3BF9, 0x3CD13C7B3BF9, 0x3CD23C7B3BF9, 0x3CD33C7B3BF9, 0x3CD43C7B3BF9, 0x3CD53C7B3BF9, 0x3CD63C7B3BF9, 0x3CD73C7B3BF9, 0x3CD83C7B3BF9, 0x3CD93C7B3BF9, 0x3CDA3C7B3BF9, 0x3CDB3C7B3BF9, + 0x3CDC3C7B3BF9, 0x3CDD3C7B3BF9, 0x3CDE3C7B3BF9, 0x3CDF3C7B3BF9, 0x3CE03C7B3BF9, 0x3CE13C7B3BF9, 0x3CE23C7B3BF9, 0x3CE33C7B3BF9, 0x3CE43C7B3BF9, 0x3CE53C7B3BF9, 0x3CE63C7B3BF9, 0x3CE73C7B3BF9, 0x3CE83C7B3BF9, 0x3CE93C7B3BF9, 0x3CEA3C7B3BF9, + 0x3CEB3C7B3BF9, 0x3C7C3BF9, 0x3CD13C7C3BF9, 0x3CD23C7C3BF9, 0x3CD33C7C3BF9, 0x3CD43C7C3BF9, 0x3CD53C7C3BF9, 0x3CD63C7C3BF9, 0x3CD73C7C3BF9, 0x3CD83C7C3BF9, 0x3CD93C7C3BF9, 0x3CDA3C7C3BF9, 0x3CDB3C7C3BF9, 0x3CDC3C7C3BF9, 0x3CDD3C7C3BF9, + 0x3CDE3C7C3BF9, 0x3CDF3C7C3BF9, 0x3CE03C7C3BF9, 0x3CE13C7C3BF9, 0x3CE23C7C3BF9, 0x3CE33C7C3BF9, 0x3CE43C7C3BF9, 0x3CE53C7C3BF9, 0x3CE63C7C3BF9, 0x3CE73C7C3BF9, 0x3CE83C7C3BF9, 0x3CE93C7C3BF9, 0x3CEA3C7C3BF9, 0x3CEB3C7C3BF9, 0x3C7D3BF9, + 0x3CD13C7D3BF9, 0x3CD23C7D3BF9, 0x3CD33C7D3BF9, 0x3CD43C7D3BF9, 0x3CD53C7D3BF9, 0x3CD63C7D3BF9, 0x3CD73C7D3BF9, 0x3CD83C7D3BF9, 0x3CD93C7D3BF9, 0x3CDA3C7D3BF9, 0x3CDB3C7D3BF9, 0x3CDC3C7D3BF9, 0x3CDD3C7D3BF9, 0x3CDE3C7D3BF9, 0x3CDF3C7D3BF9, + 0x3CE03C7D3BF9, 0x3CE13C7D3BF9, 0x3CE23C7D3BF9, 0x3CE33C7D3BF9, 0x3CE43C7D3BF9, 0x3CE53C7D3BF9, 0x3CE63C7D3BF9, 0x3CE73C7D3BF9, 0x3CE83C7D3BF9, 0x3CE93C7D3BF9, 0x3CEA3C7D3BF9, 0x3CEB3C7D3BF9, 0x3C7E3BF9, 0x3CD13C7E3BF9, 0x3CD23C7E3BF9, + 0x3CD33C7E3BF9, 0x3CD43C7E3BF9, 0x3CD53C7E3BF9, 0x3CD63C7E3BF9, 0x3CD73C7E3BF9, 0x3CD83C7E3BF9, 0x3CD93C7E3BF9, 0x3CDA3C7E3BF9, 0x3CDB3C7E3BF9, 0x3CDC3C7E3BF9, 0x3CDD3C7E3BF9, 0x3CDE3C7E3BF9, 0x3CDF3C7E3BF9, 0x3CE03C7E3BF9, 0x3CE13C7E3BF9, + 0x3CE23C7E3BF9, 0x3CE33C7E3BF9, 0x3CE43C7E3BF9, 0x3CE53C7E3BF9, 0x3CE63C7E3BF9, 0x3CE73C7E3BF9, 0x3CE83C7E3BF9, 0x3CE93C7E3BF9, 0x3CEA3C7E3BF9, 0x3CEB3C7E3BF9, 0x3C7F3BF9, 0x3CD13C7F3BF9, 0x3CD23C7F3BF9, 0x3CD33C7F3BF9, 0x3CD43C7F3BF9, + 0x3CD53C7F3BF9, 0x3CD63C7F3BF9, 0x3CD73C7F3BF9, 0x3CD83C7F3BF9, 0x3CD93C7F3BF9, 0x3CDA3C7F3BF9, 0x3CDB3C7F3BF9, 0x3CDC3C7F3BF9, 0x3CDD3C7F3BF9, 0x3CDE3C7F3BF9, 0x3CDF3C7F3BF9, 0x3CE03C7F3BF9, 0x3CE13C7F3BF9, 0x3CE23C7F3BF9, 0x3CE33C7F3BF9, + 0x3CE43C7F3BF9, 0x3CE53C7F3BF9, 0x3CE63C7F3BF9, 0x3CE73C7F3BF9, 0x3CE83C7F3BF9, 0x3CE93C7F3BF9, 0x3CEA3C7F3BF9, 0x3CEB3C7F3BF9, 0x3C803BF9, 0x3CD13C803BF9, 0x3CD23C803BF9, 0x3CD33C803BF9, 0x3CD43C803BF9, 0x3CD53C803BF9, 0x3CD63C803BF9, + 0x3CD73C803BF9, 0x3CD83C803BF9, 0x3CD93C803BF9, 0x3CDA3C803BF9, 0x3CDB3C803BF9, 0x3CDC3C803BF9, 0x3CDD3C803BF9, 0x3CDE3C803BF9, 0x3CDF3C803BF9, 0x3CE03C803BF9, 0x3CE13C803BF9, 0x3CE23C803BF9, 0x3CE33C803BF9, 0x3CE43C803BF9, 0x3CE53C803BF9, + 0x3CE63C803BF9, 0x3CE73C803BF9, 0x3CE83C803BF9, 0x3CE93C803BF9, 0x3CEA3C803BF9, 0x3CEB3C803BF9, 0x3C813BF9, 0x3CD13C813BF9, 0x3CD23C813BF9, 0x3CD33C813BF9, 0x3CD43C813BF9, 0x3CD53C813BF9, 0x3CD63C813BF9, 0x3CD73C813BF9, 0x3CD83C813BF9, + 0x3CD93C813BF9, 0x3CDA3C813BF9, 0x3CDB3C813BF9, 0x3CDC3C813BF9, 0x3CDD3C813BF9, 0x3CDE3C813BF9, 0x3CDF3C813BF9, 0x3CE03C813BF9, 0x3CE13C813BF9, 0x3CE23C813BF9, 0x3CE33C813BF9, 0x3CE43C813BF9, 0x3CE53C813BF9, 0x3CE63C813BF9, 0x3CE73C813BF9, + 0x3CE83C813BF9, 0x3CE93C813BF9, 0x3CEA3C813BF9, 0x3CEB3C813BF9, 0x3C823BF9, 0x3CD13C823BF9, 0x3CD23C823BF9, 0x3CD33C823BF9, 0x3CD43C823BF9, 0x3CD53C823BF9, 0x3CD63C823BF9, 0x3CD73C823BF9, 0x3CD83C823BF9, 0x3CD93C823BF9, 0x3CDA3C823BF9, + 0x3CDB3C823BF9, 0x3CDC3C823BF9, 0x3CDD3C823BF9, 0x3CDE3C823BF9, 0x3CDF3C823BF9, 0x3CE03C823BF9, 0x3CE13C823BF9, 0x3CE23C823BF9, 0x3CE33C823BF9, 0x3CE43C823BF9, 0x3CE53C823BF9, 0x3CE63C823BF9, 0x3CE73C823BF9, 0x3CE83C823BF9, 0x3CE93C823BF9, + 0x3CEA3C823BF9, 0x3CEB3C823BF9, 0x3C833BF9, 0x3CD13C833BF9, 0x3CD23C833BF9, 0x3CD33C833BF9, 0x3CD43C833BF9, 0x3CD53C833BF9, 0x3CD63C833BF9, 0x3CD73C833BF9, 0x3CD83C833BF9, 0x3CD93C833BF9, 0x3CDA3C833BF9, 0x3CDB3C833BF9, 0x3CDC3C833BF9, + 0x3CDD3C833BF9, 0x3CDE3C833BF9, 0x3CDF3C833BF9, 0x3CE03C833BF9, 0x3CE13C833BF9, 0x3CE23C833BF9, 0x3CE33C833BF9, 0x3CE43C833BF9, 0x3CE53C833BF9, 0x3CE63C833BF9, 0x3CE73C833BF9, 0x3CE83C833BF9, 0x3CE93C833BF9, 0x3CEA3C833BF9, 0x3CEB3C833BF9, + 0x3C843BF9, 0x3CD13C843BF9, 0x3CD23C843BF9, 0x3CD33C843BF9, 0x3CD43C843BF9, 0x3CD53C843BF9, 0x3CD63C843BF9, 0x3CD73C843BF9, 0x3CD83C843BF9, 0x3CD93C843BF9, 0x3CDA3C843BF9, 0x3CDB3C843BF9, 0x3CDC3C843BF9, 0x3CDD3C843BF9, 0x3CDE3C843BF9, + 0x3CDF3C843BF9, 0x3CE03C843BF9, 0x3CE13C843BF9, 0x3CE23C843BF9, 0x3CE33C843BF9, 0x3CE43C843BF9, 0x3CE53C843BF9, 0x3CE63C843BF9, 0x3CE73C843BF9, 0x3CE83C843BF9, 0x3CE93C843BF9, 0x3CEA3C843BF9, 0x3CEB3C843BF9, 0x3C853BF9, 0x3CD13C853BF9, + 0x3CD23C853BF9, 0x3CD33C853BF9, 0x3CD43C853BF9, 0x3CD53C853BF9, 0x3CD63C853BF9, 0x3CD73C853BF9, 0x3CD83C853BF9, 0x3CD93C853BF9, 0x3CDA3C853BF9, 0x3CDB3C853BF9, 0x3CDC3C853BF9, 0x3CDD3C853BF9, 0x3CDE3C853BF9, 0x3CDF3C853BF9, 0x3CE03C853BF9, + 0x3CE13C853BF9, 0x3CE23C853BF9, 0x3CE33C853BF9, 0x3CE43C853BF9, 0x3CE53C853BF9, 0x3CE63C853BF9, 0x3CE73C853BF9, 0x3CE83C853BF9, 0x3CE93C853BF9, 0x3CEA3C853BF9, 0x3CEB3C853BF9, 0x3C863BF9, 0x3CD13C863BF9, 0x3CD23C863BF9, 0x3CD33C863BF9, + 0x3CD43C863BF9, 0x3CD53C863BF9, 0x3CD63C863BF9, 0x3CD73C863BF9, 0x3CD83C863BF9, 0x3CD93C863BF9, 0x3CDA3C863BF9, 0x3CDB3C863BF9, 0x3CDC3C863BF9, 0x3CDD3C863BF9, 0x3CDE3C863BF9, 0x3CDF3C863BF9, 0x3CE03C863BF9, 0x3CE13C863BF9, 0x3CE23C863BF9, + 0x3CE33C863BF9, 0x3CE43C863BF9, 0x3CE53C863BF9, 0x3CE63C863BF9, 0x3CE73C863BF9, 0x3CE83C863BF9, 0x3CE93C863BF9, 0x3CEA3C863BF9, 0x3CEB3C863BF9, 0x3C873BF9, 0x3CD13C873BF9, 0x3CD23C873BF9, 0x3CD33C873BF9, 0x3CD43C873BF9, 0x3CD53C873BF9, + 0x3CD63C873BF9, 0x3CD73C873BF9, 0x3CD83C873BF9, 0x3CD93C873BF9, 0x3CDA3C873BF9, 0x3CDB3C873BF9, 0x3CDC3C873BF9, 0x3CDD3C873BF9, 0x3CDE3C873BF9, 0x3CDF3C873BF9, 0x3CE03C873BF9, 0x3CE13C873BF9, 0x3CE23C873BF9, 0x3CE33C873BF9, 0x3CE43C873BF9, + 0x3CE53C873BF9, 0x3CE63C873BF9, 0x3CE73C873BF9, 0x3CE83C873BF9, 0x3CE93C873BF9, 0x3CEA3C873BF9, 0x3CEB3C873BF9, 0x3C733BFA, 0x3CD13C733BFA, 0x3CD23C733BFA, 0x3CD33C733BFA, 0x3CD43C733BFA, 0x3CD53C733BFA, 0x3CD63C733BFA, 0x3CD73C733BFA, + 0x3CD83C733BFA, 0x3CD93C733BFA, 0x3CDA3C733BFA, 0x3CDB3C733BFA, 0x3CDC3C733BFA, 0x3CDD3C733BFA, 0x3CDE3C733BFA, 0x3CDF3C733BFA, 0x3CE03C733BFA, 0x3CE13C733BFA, 0x3CE23C733BFA, 0x3CE33C733BFA, 0x3CE43C733BFA, 0x3CE53C733BFA, 0x3CE63C733BFA, + 0x3CE73C733BFA, 0x3CE83C733BFA, 0x3CE93C733BFA, 0x3CEA3C733BFA, 0x3CEB3C733BFA, 0x3C743BFA, 0x3CD13C743BFA, 0x3CD23C743BFA, 0x3CD33C743BFA, 0x3CD43C743BFA, 0x3CD53C743BFA, 0x3CD63C743BFA, 0x3CD73C743BFA, 0x3CD83C743BFA, 0x3CD93C743BFA, + 0x3CDA3C743BFA, 0x3CDB3C743BFA, 0x3CDC3C743BFA, 0x3CDD3C743BFA, 0x3CDE3C743BFA, 0x3CDF3C743BFA, 0x3CE03C743BFA, 0x3CE13C743BFA, 0x3CE23C743BFA, 0x3CE33C743BFA, 0x3CE43C743BFA, 0x3CE53C743BFA, 0x3CE63C743BFA, 0x3CE73C743BFA, 0x3CE83C743BFA, + 0x3CE93C743BFA, 0x3CEA3C743BFA, 0x3CEB3C743BFA, 0x3C753BFA, 0x3CD13C753BFA, 0x3CD23C753BFA, 0x3CD33C753BFA, 0x3CD43C753BFA, 0x3CD53C753BFA, 0x3CD63C753BFA, 0x3CD73C753BFA, 0x3CD83C753BFA, 0x3CD93C753BFA, 0x3CDA3C753BFA, 0x3CDB3C753BFA, + 0x3CDC3C753BFA, 0x3CDD3C753BFA, 0x3CDE3C753BFA, 0x3CDF3C753BFA, 0x3CE03C753BFA, 0x3CE13C753BFA, 0x3CE23C753BFA, 0x3CE33C753BFA, 0x3CE43C753BFA, 0x3CE53C753BFA, 0x3CE63C753BFA, 0x3CE73C753BFA, 0x3CE83C753BFA, 0x3CE93C753BFA, 0x3CEA3C753BFA, + 0x3CEB3C753BFA, 0x3C763BFA, 0x3CD13C763BFA, 0x3CD23C763BFA, 0x3CD33C763BFA, 0x3CD43C763BFA, 0x3CD53C763BFA, 0x3CD63C763BFA, 0x3CD73C763BFA, 0x3CD83C763BFA, 0x3CD93C763BFA, 0x3CDA3C763BFA, 0x3CDB3C763BFA, 0x3CDC3C763BFA, 0x3CDD3C763BFA, + 0x3CDE3C763BFA, 0x3CDF3C763BFA, 0x3CE03C763BFA, 0x3CE13C763BFA, 0x3CE23C763BFA, 0x3CE33C763BFA, 0x3CE43C763BFA, 0x3CE53C763BFA, 0x3CE63C763BFA, 0x3CE73C763BFA, 0x3CE83C763BFA, 0x3CE93C763BFA, 0x3CEA3C763BFA, 0x3CEB3C763BFA, 0x3C773BFA, + 0x3CD13C773BFA, 0x3CD23C773BFA, 0x3CD33C773BFA, 0x3CD43C773BFA, 0x3CD53C773BFA, 0x3CD63C773BFA, 0x3CD73C773BFA, 0x3CD83C773BFA, 0x3CD93C773BFA, 0x3CDA3C773BFA, 0x3CDB3C773BFA, 0x3CDC3C773BFA, 0x3CDD3C773BFA, 0x3CDE3C773BFA, 0x3CDF3C773BFA, + 0x3CE03C773BFA, 0x3CE13C773BFA, 0x3CE23C773BFA, 0x3CE33C773BFA, 0x3CE43C773BFA, 0x3CE53C773BFA, 0x3CE63C773BFA, 0x3CE73C773BFA, 0x3CE83C773BFA, 0x3CE93C773BFA, 0x3CEA3C773BFA, 0x3CEB3C773BFA, 0x3C783BFA, 0x3CD13C783BFA, 0x3CD23C783BFA, + 0x3CD33C783BFA, 0x3CD43C783BFA, 0x3CD53C783BFA, 0x3CD63C783BFA, 0x3CD73C783BFA, 0x3CD83C783BFA, 0x3CD93C783BFA, 0x3CDA3C783BFA, 0x3CDB3C783BFA, 0x3CDC3C783BFA, 0x3CDD3C783BFA, 0x3CDE3C783BFA, 0x3CDF3C783BFA, 0x3CE03C783BFA, 0x3CE13C783BFA, + 0x3CE23C783BFA, 0x3CE33C783BFA, 0x3CE43C783BFA, 0x3CE53C783BFA, 0x3CE63C783BFA, 0x3CE73C783BFA, 0x3CE83C783BFA, 0x3CE93C783BFA, 0x3CEA3C783BFA, 0x3CEB3C783BFA, 0x3C793BFA, 0x3CD13C793BFA, 0x3CD23C793BFA, 0x3CD33C793BFA, 0x3CD43C793BFA, + 0x3CD53C793BFA, 0x3CD63C793BFA, 0x3CD73C793BFA, 0x3CD83C793BFA, 0x3CD93C793BFA, 0x3CDA3C793BFA, 0x3CDB3C793BFA, 0x3CDC3C793BFA, 0x3CDD3C793BFA, 0x3CDE3C793BFA, 0x3CDF3C793BFA, 0x3CE03C793BFA, 0x3CE13C793BFA, 0x3CE23C793BFA, 0x3CE33C793BFA, + 0x3CE43C793BFA, 0x3CE53C793BFA, 0x3CE63C793BFA, 0x3CE73C793BFA, 0x3CE83C793BFA, 0x3CE93C793BFA, 0x3CEA3C793BFA, 0x3CEB3C793BFA, 0x3C7A3BFA, 0x3CD13C7A3BFA, 0x3CD23C7A3BFA, 0x3CD33C7A3BFA, 0x3CD43C7A3BFA, 0x3CD53C7A3BFA, 0x3CD63C7A3BFA, + 0x3CD73C7A3BFA, 0x3CD83C7A3BFA, 0x3CD93C7A3BFA, 0x3CDA3C7A3BFA, 0x3CDB3C7A3BFA, 0x3CDC3C7A3BFA, 0x3CDD3C7A3BFA, 0x3CDE3C7A3BFA, 0x3CDF3C7A3BFA, 0x3CE03C7A3BFA, 0x3CE13C7A3BFA, 0x3CE23C7A3BFA, 0x3CE33C7A3BFA, 0x3CE43C7A3BFA, 0x3CE53C7A3BFA, + 0x3CE63C7A3BFA, 0x3CE73C7A3BFA, 0x3CE83C7A3BFA, 0x3CE93C7A3BFA, 0x3CEA3C7A3BFA, 0x3CEB3C7A3BFA, 0x3C7B3BFA, 0x3CD13C7B3BFA, 0x3CD23C7B3BFA, 0x3CD33C7B3BFA, 0x3CD43C7B3BFA, 0x3CD53C7B3BFA, 0x3CD63C7B3BFA, 0x3CD73C7B3BFA, 0x3CD83C7B3BFA, + 0x3CD93C7B3BFA, 0x3CDA3C7B3BFA, 0x3CDB3C7B3BFA, 0x3CDC3C7B3BFA, 0x3CDD3C7B3BFA, 0x3CDE3C7B3BFA, 0x3CDF3C7B3BFA, 0x3CE03C7B3BFA, 0x3CE13C7B3BFA, 0x3CE23C7B3BFA, 0x3CE33C7B3BFA, 0x3CE43C7B3BFA, 0x3CE53C7B3BFA, 0x3CE63C7B3BFA, 0x3CE73C7B3BFA, + 0x3CE83C7B3BFA, 0x3CE93C7B3BFA, 0x3CEA3C7B3BFA, 0x3CEB3C7B3BFA, 0x3C7C3BFA, 0x3CD13C7C3BFA, 0x3CD23C7C3BFA, 0x3CD33C7C3BFA, 0x3CD43C7C3BFA, 0x3CD53C7C3BFA, 0x3CD63C7C3BFA, 0x3CD73C7C3BFA, 0x3CD83C7C3BFA, 0x3CD93C7C3BFA, 0x3CDA3C7C3BFA, + 0x3CDB3C7C3BFA, 0x3CDC3C7C3BFA, 0x3CDD3C7C3BFA, 0x3CDE3C7C3BFA, 0x3CDF3C7C3BFA, 0x3CE03C7C3BFA, 0x3CE13C7C3BFA, 0x3CE23C7C3BFA, 0x3CE33C7C3BFA, 0x3CE43C7C3BFA, 0x3CE53C7C3BFA, 0x3CE63C7C3BFA, 0x3CE73C7C3BFA, 0x3CE83C7C3BFA, 0x3CE93C7C3BFA, + 0x3CEA3C7C3BFA, 0x3CEB3C7C3BFA, 0x3C7D3BFA, 0x3CD13C7D3BFA, 0x3CD23C7D3BFA, 0x3CD33C7D3BFA, 0x3CD43C7D3BFA, 0x3CD53C7D3BFA, 0x3CD63C7D3BFA, 0x3CD73C7D3BFA, 0x3CD83C7D3BFA, 0x3CD93C7D3BFA, 0x3CDA3C7D3BFA, 0x3CDB3C7D3BFA, 0x3CDC3C7D3BFA, + 0x3CDD3C7D3BFA, 0x3CDE3C7D3BFA, 0x3CDF3C7D3BFA, 0x3CE03C7D3BFA, 0x3CE13C7D3BFA, 0x3CE23C7D3BFA, 0x3CE33C7D3BFA, 0x3CE43C7D3BFA, 0x3CE53C7D3BFA, 0x3CE63C7D3BFA, 0x3CE73C7D3BFA, 0x3CE83C7D3BFA, 0x3CE93C7D3BFA, 0x3CEA3C7D3BFA, 0x3CEB3C7D3BFA, + 0x3C7E3BFA, 0x3CD13C7E3BFA, 0x3CD23C7E3BFA, 0x3CD33C7E3BFA, 0x3CD43C7E3BFA, 0x3CD53C7E3BFA, 0x3CD63C7E3BFA, 0x3CD73C7E3BFA, 0x3CD83C7E3BFA, 0x3CD93C7E3BFA, 0x3CDA3C7E3BFA, 0x3CDB3C7E3BFA, 0x3CDC3C7E3BFA, 0x3CDD3C7E3BFA, 0x3CDE3C7E3BFA, + 0x3CDF3C7E3BFA, 0x3CE03C7E3BFA, 0x3CE13C7E3BFA, 0x3CE23C7E3BFA, 0x3CE33C7E3BFA, 0x3CE43C7E3BFA, 0x3CE53C7E3BFA, 0x3CE63C7E3BFA, 0x3CE73C7E3BFA, 0x3CE83C7E3BFA, 0x3CE93C7E3BFA, 0x3CEA3C7E3BFA, 0x3CEB3C7E3BFA, 0x3C7F3BFA, 0x3CD13C7F3BFA, + 0x3CD23C7F3BFA, 0x3CD33C7F3BFA, 0x3CD43C7F3BFA, 0x3CD53C7F3BFA, 0x3CD63C7F3BFA, 0x3CD73C7F3BFA, 0x3CD83C7F3BFA, 0x3CD93C7F3BFA, 0x3CDA3C7F3BFA, 0x3CDB3C7F3BFA, 0x3CDC3C7F3BFA, 0x3CDD3C7F3BFA, 0x3CDE3C7F3BFA, 0x3CDF3C7F3BFA, 0x3CE03C7F3BFA, + 0x3CE13C7F3BFA, 0x3CE23C7F3BFA, 0x3CE33C7F3BFA, 0x3CE43C7F3BFA, 0x3CE53C7F3BFA, 0x3CE63C7F3BFA, 0x3CE73C7F3BFA, 0x3CE83C7F3BFA, 0x3CE93C7F3BFA, 0x3CEA3C7F3BFA, 0x3CEB3C7F3BFA, 0x3C803BFA, 0x3CD13C803BFA, 0x3CD23C803BFA, 0x3CD33C803BFA, + 0x3CD43C803BFA, 0x3CD53C803BFA, 0x3CD63C803BFA, 0x3CD73C803BFA, 0x3CD83C803BFA, 0x3CD93C803BFA, 0x3CDA3C803BFA, 0x3CDB3C803BFA, 0x3CDC3C803BFA, 0x3CDD3C803BFA, 0x3CDE3C803BFA, 0x3CDF3C803BFA, 0x3CE03C803BFA, 0x3CE13C803BFA, 0x3CE23C803BFA, + 0x3CE33C803BFA, 0x3CE43C803BFA, 0x3CE53C803BFA, 0x3CE63C803BFA, 0x3CE73C803BFA, 0x3CE83C803BFA, 0x3CE93C803BFA, 0x3CEA3C803BFA, 0x3CEB3C803BFA, 0x3C813BFA, 0x3CD13C813BFA, 0x3CD23C813BFA, 0x3CD33C813BFA, 0x3CD43C813BFA, 0x3CD53C813BFA, + 0x3CD63C813BFA, 0x3CD73C813BFA, 0x3CD83C813BFA, 0x3CD93C813BFA, 0x3CDA3C813BFA, 0x3CDB3C813BFA, 0x3CDC3C813BFA, 0x3CDD3C813BFA, 0x3CDE3C813BFA, 0x3CDF3C813BFA, 0x3CE03C813BFA, 0x3CE13C813BFA, 0x3CE23C813BFA, 0x3CE33C813BFA, 0x3CE43C813BFA, + 0x3CE53C813BFA, 0x3CE63C813BFA, 0x3CE73C813BFA, 0x3CE83C813BFA, 0x3CE93C813BFA, 0x3CEA3C813BFA, 0x3CEB3C813BFA, 0x3C823BFA, 0x3CD13C823BFA, 0x3CD23C823BFA, 0x3CD33C823BFA, 0x3CD43C823BFA, 0x3CD53C823BFA, 0x3CD63C823BFA, 0x3CD73C823BFA, + 0x3CD83C823BFA, 0x3CD93C823BFA, 0x3CDA3C823BFA, 0x3CDB3C823BFA, 0x3CDC3C823BFA, 0x3CDD3C823BFA, 0x3CDE3C823BFA, 0x3CDF3C823BFA, 0x3CE03C823BFA, 0x3CE13C823BFA, 0x3CE23C823BFA, 0x3CE33C823BFA, 0x3CE43C823BFA, 0x3CE53C823BFA, 0x3CE63C823BFA, + 0x3CE73C823BFA, 0x3CE83C823BFA, 0x3CE93C823BFA, 0x3CEA3C823BFA, 0x3CEB3C823BFA, 0x3C833BFA, 0x3CD13C833BFA, 0x3CD23C833BFA, 0x3CD33C833BFA, 0x3CD43C833BFA, 0x3CD53C833BFA, 0x3CD63C833BFA, 0x3CD73C833BFA, 0x3CD83C833BFA, 0x3CD93C833BFA, + 0x3CDA3C833BFA, 0x3CDB3C833BFA, 0x3CDC3C833BFA, 0x3CDD3C833BFA, 0x3CDE3C833BFA, 0x3CDF3C833BFA, 0x3CE03C833BFA, 0x3CE13C833BFA, 0x3CE23C833BFA, 0x3CE33C833BFA, 0x3CE43C833BFA, 0x3CE53C833BFA, 0x3CE63C833BFA, 0x3CE73C833BFA, 0x3CE83C833BFA, + 0x3CE93C833BFA, 0x3CEA3C833BFA, 0x3CEB3C833BFA, 0x3C843BFA, 0x3CD13C843BFA, 0x3CD23C843BFA, 0x3CD33C843BFA, 0x3CD43C843BFA, 0x3CD53C843BFA, 0x3CD63C843BFA, 0x3CD73C843BFA, 0x3CD83C843BFA, 0x3CD93C843BFA, 0x3CDA3C843BFA, 0x3CDB3C843BFA, + 0x3CDC3C843BFA, 0x3CDD3C843BFA, 0x3CDE3C843BFA, 0x3CDF3C843BFA, 0x3CE03C843BFA, 0x3CE13C843BFA, 0x3CE23C843BFA, 0x3CE33C843BFA, 0x3CE43C843BFA, 0x3CE53C843BFA, 0x3CE63C843BFA, 0x3CE73C843BFA, 0x3CE83C843BFA, 0x3CE93C843BFA, 0x3CEA3C843BFA, + 0x3CEB3C843BFA, 0x3C853BFA, 0x3CD13C853BFA, 0x3CD23C853BFA, 0x3CD33C853BFA, 0x3CD43C853BFA, 0x3CD53C853BFA, 0x3CD63C853BFA, 0x3CD73C853BFA, 0x3CD83C853BFA, 0x3CD93C853BFA, 0x3CDA3C853BFA, 0x3CDB3C853BFA, 0x3CDC3C853BFA, 0x3CDD3C853BFA, + 0x3CDE3C853BFA, 0x3CDF3C853BFA, 0x3CE03C853BFA, 0x3CE13C853BFA, 0x3CE23C853BFA, 0x3CE33C853BFA, 0x3CE43C853BFA, 0x3CE53C853BFA, 0x3CE63C853BFA, 0x3CE73C853BFA, 0x3CE83C853BFA, 0x3CE93C853BFA, 0x3CEA3C853BFA, 0x3CEB3C853BFA, 0x3C863BFA, + 0x3CD13C863BFA, 0x3CD23C863BFA, 0x3CD33C863BFA, 0x3CD43C863BFA, 0x3CD53C863BFA, 0x3CD63C863BFA, 0x3CD73C863BFA, 0x3CD83C863BFA, 0x3CD93C863BFA, 0x3CDA3C863BFA, 0x3CDB3C863BFA, 0x3CDC3C863BFA, 0x3CDD3C863BFA, 0x3CDE3C863BFA, 0x3CDF3C863BFA, + 0x3CE03C863BFA, 0x3CE13C863BFA, 0x3CE23C863BFA, 0x3CE33C863BFA, 0x3CE43C863BFA, 0x3CE53C863BFA, 0x3CE63C863BFA, 0x3CE73C863BFA, 0x3CE83C863BFA, 0x3CE93C863BFA, 0x3CEA3C863BFA, 0x3CEB3C863BFA, 0x3C873BFA, 0x3CD13C873BFA, 0x3CD23C873BFA, + 0x3CD33C873BFA, 0x3CD43C873BFA, 0x3CD53C873BFA, 0x3CD63C873BFA, 0x3CD73C873BFA, 0x3CD83C873BFA, 0x3CD93C873BFA, 0x3CDA3C873BFA, 0x3CDB3C873BFA, 0x3CDC3C873BFA, 0x3CDD3C873BFA, 0x3CDE3C873BFA, 0x3CDF3C873BFA, 0x3CE03C873BFA, 0x3CE13C873BFA, + 0x3CE23C873BFA, 0x3CE33C873BFA, 0x3CE43C873BFA, 0x3CE53C873BFA, 0x3CE63C873BFA, 0x3CE73C873BFA, 0x3CE83C873BFA, 0x3CE93C873BFA, 0x3CEA3C873BFA, 0x3CEB3C873BFA, 0x3C733BFB, 0x3CD13C733BFB, 0x3CD23C733BFB, 0x3CD33C733BFB, 0x3CD43C733BFB, + 0x3CD53C733BFB, 0x3CD63C733BFB, 0x3CD73C733BFB, 0x3CD83C733BFB, 0x3CD93C733BFB, 0x3CDA3C733BFB, 0x3CDB3C733BFB, 0x3CDC3C733BFB, 0x3CDD3C733BFB, 0x3CDE3C733BFB, 0x3CDF3C733BFB, 0x3CE03C733BFB, 0x3CE13C733BFB, 0x3CE23C733BFB, 0x3CE33C733BFB, + 0x3CE43C733BFB, 0x3CE53C733BFB, 0x3CE63C733BFB, 0x3CE73C733BFB, 0x3CE83C733BFB, 0x3CE93C733BFB, 0x3CEA3C733BFB, 0x3CEB3C733BFB, 0x3C743BFB, 0x3CD13C743BFB, 0x3CD23C743BFB, 0x3CD33C743BFB, 0x3CD43C743BFB, 0x3CD53C743BFB, 0x3CD63C743BFB, + 0x3CD73C743BFB, 0x3CD83C743BFB, 0x3CD93C743BFB, 0x3CDA3C743BFB, 0x3CDB3C743BFB, 0x3CDC3C743BFB, 0x3CDD3C743BFB, 0x3CDE3C743BFB, 0x3CDF3C743BFB, 0x3CE03C743BFB, 0x3CE13C743BFB, 0x3CE23C743BFB, 0x3CE33C743BFB, 0x3CE43C743BFB, 0x3CE53C743BFB, + 0x3CE63C743BFB, 0x3CE73C743BFB, 0x3CE83C743BFB, 0x3CE93C743BFB, 0x3CEA3C743BFB, 0x3CEB3C743BFB, 0x3C753BFB, 0x3CD13C753BFB, 0x3CD23C753BFB, 0x3CD33C753BFB, 0x3CD43C753BFB, 0x3CD53C753BFB, 0x3CD63C753BFB, 0x3CD73C753BFB, 0x3CD83C753BFB, + 0x3CD93C753BFB, 0x3CDA3C753BFB, 0x3CDB3C753BFB, 0x3CDC3C753BFB, 0x3CDD3C753BFB, 0x3CDE3C753BFB, 0x3CDF3C753BFB, 0x3CE03C753BFB, 0x3CE13C753BFB, 0x3CE23C753BFB, 0x3CE33C753BFB, 0x3CE43C753BFB, 0x3CE53C753BFB, 0x3CE63C753BFB, 0x3CE73C753BFB, + 0x3CE83C753BFB, 0x3CE93C753BFB, 0x3CEA3C753BFB, 0x3CEB3C753BFB, 0x3C763BFB, 0x3CD13C763BFB, 0x3CD23C763BFB, 0x3CD33C763BFB, 0x3CD43C763BFB, 0x3CD53C763BFB, 0x3CD63C763BFB, 0x3CD73C763BFB, 0x3CD83C763BFB, 0x3CD93C763BFB, 0x3CDA3C763BFB, + 0x3CDB3C763BFB, 0x3CDC3C763BFB, 0x3CDD3C763BFB, 0x3CDE3C763BFB, 0x3CDF3C763BFB, 0x3CE03C763BFB, 0x3CE13C763BFB, 0x3CE23C763BFB, 0x3CE33C763BFB, 0x3CE43C763BFB, 0x3CE53C763BFB, 0x3CE63C763BFB, 0x3CE73C763BFB, 0x3CE83C763BFB, 0x3CE93C763BFB, + 0x3CEA3C763BFB, 0x3CEB3C763BFB, 0x3C773BFB, 0x3CD13C773BFB, 0x3CD23C773BFB, 0x3CD33C773BFB, 0x3CD43C773BFB, 0x3CD53C773BFB, 0x3CD63C773BFB, 0x3CD73C773BFB, 0x3CD83C773BFB, 0x3CD93C773BFB, 0x3CDA3C773BFB, 0x3CDB3C773BFB, 0x3CDC3C773BFB, + 0x3CDD3C773BFB, 0x3CDE3C773BFB, 0x3CDF3C773BFB, 0x3CE03C773BFB, 0x3CE13C773BFB, 0x3CE23C773BFB, 0x3CE33C773BFB, 0x3CE43C773BFB, 0x3CE53C773BFB, 0x3CE63C773BFB, 0x3CE73C773BFB, 0x3CE83C773BFB, 0x3CE93C773BFB, 0x3CEA3C773BFB, 0x3CEB3C773BFB, + 0x3C783BFB, 0x3CD13C783BFB, 0x3CD23C783BFB, 0x3CD33C783BFB, 0x3CD43C783BFB, 0x3CD53C783BFB, 0x3CD63C783BFB, 0x3CD73C783BFB, 0x3CD83C783BFB, 0x3CD93C783BFB, 0x3CDA3C783BFB, 0x3CDB3C783BFB, 0x3CDC3C783BFB, 0x3CDD3C783BFB, 0x3CDE3C783BFB, + 0x3CDF3C783BFB, 0x3CE03C783BFB, 0x3CE13C783BFB, 0x3CE23C783BFB, 0x3CE33C783BFB, 0x3CE43C783BFB, 0x3CE53C783BFB, 0x3CE63C783BFB, 0x3CE73C783BFB, 0x3CE83C783BFB, 0x3CE93C783BFB, 0x3CEA3C783BFB, 0x3CEB3C783BFB, 0x3C793BFB, 0x3CD13C793BFB, + 0x3CD23C793BFB, 0x3CD33C793BFB, 0x3CD43C793BFB, 0x3CD53C793BFB, 0x3CD63C793BFB, 0x3CD73C793BFB, 0x3CD83C793BFB, 0x3CD93C793BFB, 0x3CDA3C793BFB, 0x3CDB3C793BFB, 0x3CDC3C793BFB, 0x3CDD3C793BFB, 0x3CDE3C793BFB, 0x3CDF3C793BFB, 0x3CE03C793BFB, + 0x3CE13C793BFB, 0x3CE23C793BFB, 0x3CE33C793BFB, 0x3CE43C793BFB, 0x3CE53C793BFB, 0x3CE63C793BFB, 0x3CE73C793BFB, 0x3CE83C793BFB, 0x3CE93C793BFB, 0x3CEA3C793BFB, 0x3CEB3C793BFB, 0x3C7A3BFB, 0x3CD13C7A3BFB, 0x3CD23C7A3BFB, 0x3CD33C7A3BFB, + 0x3CD43C7A3BFB, 0x3CD53C7A3BFB, 0x3CD63C7A3BFB, 0x3CD73C7A3BFB, 0x3CD83C7A3BFB, 0x3CD93C7A3BFB, 0x3CDA3C7A3BFB, 0x3CDB3C7A3BFB, 0x3CDC3C7A3BFB, 0x3CDD3C7A3BFB, 0x3CDE3C7A3BFB, 0x3CDF3C7A3BFB, 0x3CE03C7A3BFB, 0x3CE13C7A3BFB, 0x3CE23C7A3BFB, + 0x3CE33C7A3BFB, 0x3CE43C7A3BFB, 0x3CE53C7A3BFB, 0x3CE63C7A3BFB, 0x3CE73C7A3BFB, 0x3CE83C7A3BFB, 0x3CE93C7A3BFB, 0x3CEA3C7A3BFB, 0x3CEB3C7A3BFB, 0x3C7B3BFB, 0x3CD13C7B3BFB, 0x3CD23C7B3BFB, 0x3CD33C7B3BFB, 0x3CD43C7B3BFB, 0x3CD53C7B3BFB, + 0x3CD63C7B3BFB, 0x3CD73C7B3BFB, 0x3CD83C7B3BFB, 0x3CD93C7B3BFB, 0x3CDA3C7B3BFB, 0x3CDB3C7B3BFB, 0x3CDC3C7B3BFB, 0x3CDD3C7B3BFB, 0x3CDE3C7B3BFB, 0x3CDF3C7B3BFB, 0x3CE03C7B3BFB, 0x3CE13C7B3BFB, 0x3CE23C7B3BFB, 0x3CE33C7B3BFB, 0x3CE43C7B3BFB, + 0x3CE53C7B3BFB, 0x3CE63C7B3BFB, 0x3CE73C7B3BFB, 0x3CE83C7B3BFB, 0x3CE93C7B3BFB, 0x3CEA3C7B3BFB, 0x3CEB3C7B3BFB, 0x3C7C3BFB, 0x3CD13C7C3BFB, 0x3CD23C7C3BFB, 0x3CD33C7C3BFB, 0x3CD43C7C3BFB, 0x3CD53C7C3BFB, 0x3CD63C7C3BFB, 0x3CD73C7C3BFB, + 0x3CD83C7C3BFB, 0x3CD93C7C3BFB, 0x3CDA3C7C3BFB, 0x3CDB3C7C3BFB, 0x3CDC3C7C3BFB, 0x3CDD3C7C3BFB, 0x3CDE3C7C3BFB, 0x3CDF3C7C3BFB, 0x3CE03C7C3BFB, 0x3CE13C7C3BFB, 0x3CE23C7C3BFB, 0x3CE33C7C3BFB, 0x3CE43C7C3BFB, 0x3CE53C7C3BFB, 0x3CE63C7C3BFB, + 0x3CE73C7C3BFB, 0x3CE83C7C3BFB, 0x3CE93C7C3BFB, 0x3CEA3C7C3BFB, 0x3CEB3C7C3BFB, 0x3C7D3BFB, 0x3CD13C7D3BFB, 0x3CD23C7D3BFB, 0x3CD33C7D3BFB, 0x3CD43C7D3BFB, 0x3CD53C7D3BFB, 0x3CD63C7D3BFB, 0x3CD73C7D3BFB, 0x3CD83C7D3BFB, 0x3CD93C7D3BFB, + 0x3CDA3C7D3BFB, 0x3CDB3C7D3BFB, 0x3CDC3C7D3BFB, 0x3CDD3C7D3BFB, 0x3CDE3C7D3BFB, 0x3CDF3C7D3BFB, 0x3CE03C7D3BFB, 0x3CE13C7D3BFB, 0x3CE23C7D3BFB, 0x3CE33C7D3BFB, 0x3CE43C7D3BFB, 0x3CE53C7D3BFB, 0x3CE63C7D3BFB, 0x3CE73C7D3BFB, 0x3CE83C7D3BFB, + 0x3CE93C7D3BFB, 0x3CEA3C7D3BFB, 0x3CEB3C7D3BFB, 0x3C7E3BFB, 0x3CD13C7E3BFB, 0x3CD23C7E3BFB, 0x3CD33C7E3BFB, 0x3CD43C7E3BFB, 0x3CD53C7E3BFB, 0x3CD63C7E3BFB, 0x3CD73C7E3BFB, 0x3CD83C7E3BFB, 0x3CD93C7E3BFB, 0x3CDA3C7E3BFB, 0x3CDB3C7E3BFB, + 0x3CDC3C7E3BFB, 0x3CDD3C7E3BFB, 0x3CDE3C7E3BFB, 0x3CDF3C7E3BFB, 0x3CE03C7E3BFB, 0x3CE13C7E3BFB, 0x3CE23C7E3BFB, 0x3CE33C7E3BFB, 0x3CE43C7E3BFB, 0x3CE53C7E3BFB, 0x3CE63C7E3BFB, 0x3CE73C7E3BFB, 0x3CE83C7E3BFB, 0x3CE93C7E3BFB, 0x3CEA3C7E3BFB, + 0x3CEB3C7E3BFB, 0x3C7F3BFB, 0x3CD13C7F3BFB, 0x3CD23C7F3BFB, 0x3CD33C7F3BFB, 0x3CD43C7F3BFB, 0x3CD53C7F3BFB, 0x3CD63C7F3BFB, 0x3CD73C7F3BFB, 0x3CD83C7F3BFB, 0x3CD93C7F3BFB, 0x3CDA3C7F3BFB, 0x3CDB3C7F3BFB, 0x3CDC3C7F3BFB, 0x3CDD3C7F3BFB, + 0x3CDE3C7F3BFB, 0x3CDF3C7F3BFB, 0x3CE03C7F3BFB, 0x3CE13C7F3BFB, 0x3CE23C7F3BFB, 0x3CE33C7F3BFB, 0x3CE43C7F3BFB, 0x3CE53C7F3BFB, 0x3CE63C7F3BFB, 0x3CE73C7F3BFB, 0x3CE83C7F3BFB, 0x3CE93C7F3BFB, 0x3CEA3C7F3BFB, 0x3CEB3C7F3BFB, 0x3C803BFB, + 0x3CD13C803BFB, 0x3CD23C803BFB, 0x3CD33C803BFB, 0x3CD43C803BFB, 0x3CD53C803BFB, 0x3CD63C803BFB, 0x3CD73C803BFB, 0x3CD83C803BFB, 0x3CD93C803BFB, 0x3CDA3C803BFB, 0x3CDB3C803BFB, 0x3CDC3C803BFB, 0x3CDD3C803BFB, 0x3CDE3C803BFB, 0x3CDF3C803BFB, + 0x3CE03C803BFB, 0x3CE13C803BFB, 0x3CE23C803BFB, 0x3CE33C803BFB, 0x3CE43C803BFB, 0x3CE53C803BFB, 0x3CE63C803BFB, 0x3CE73C803BFB, 0x3CE83C803BFB, 0x3CE93C803BFB, 0x3CEA3C803BFB, 0x3CEB3C803BFB, 0x3C813BFB, 0x3CD13C813BFB, 0x3CD23C813BFB, + 0x3CD33C813BFB, 0x3CD43C813BFB, 0x3CD53C813BFB, 0x3CD63C813BFB, 0x3CD73C813BFB, 0x3CD83C813BFB, 0x3CD93C813BFB, 0x3CDA3C813BFB, 0x3CDB3C813BFB, 0x3CDC3C813BFB, 0x3CDD3C813BFB, 0x3CDE3C813BFB, 0x3CDF3C813BFB, 0x3CE03C813BFB, 0x3CE13C813BFB, + 0x3CE23C813BFB, 0x3CE33C813BFB, 0x3CE43C813BFB, 0x3CE53C813BFB, 0x3CE63C813BFB, 0x3CE73C813BFB, 0x3CE83C813BFB, 0x3CE93C813BFB, 0x3CEA3C813BFB, 0x3CEB3C813BFB, 0x3C823BFB, 0x3CD13C823BFB, 0x3CD23C823BFB, 0x3CD33C823BFB, 0x3CD43C823BFB, + 0x3CD53C823BFB, 0x3CD63C823BFB, 0x3CD73C823BFB, 0x3CD83C823BFB, 0x3CD93C823BFB, 0x3CDA3C823BFB, 0x3CDB3C823BFB, 0x3CDC3C823BFB, 0x3CDD3C823BFB, 0x3CDE3C823BFB, 0x3CDF3C823BFB, 0x3CE03C823BFB, 0x3CE13C823BFB, 0x3CE23C823BFB, 0x3CE33C823BFB, + 0x3CE43C823BFB, 0x3CE53C823BFB, 0x3CE63C823BFB, 0x3CE73C823BFB, 0x3CE83C823BFB, 0x3CE93C823BFB, 0x3CEA3C823BFB, 0x3CEB3C823BFB, 0x3C833BFB, 0x3CD13C833BFB, 0x3CD23C833BFB, 0x3CD33C833BFB, 0x3CD43C833BFB, 0x3CD53C833BFB, 0x3CD63C833BFB, + 0x3CD73C833BFB, 0x3CD83C833BFB, 0x3CD93C833BFB, 0x3CDA3C833BFB, 0x3CDB3C833BFB, 0x3CDC3C833BFB, 0x3CDD3C833BFB, 0x3CDE3C833BFB, 0x3CDF3C833BFB, 0x3CE03C833BFB, 0x3CE13C833BFB, 0x3CE23C833BFB, 0x3CE33C833BFB, 0x3CE43C833BFB, 0x3CE53C833BFB, + 0x3CE63C833BFB, 0x3CE73C833BFB, 0x3CE83C833BFB, 0x3CE93C833BFB, 0x3CEA3C833BFB, 0x3CEB3C833BFB, 0x3C843BFB, 0x3CD13C843BFB, 0x3CD23C843BFB, 0x3CD33C843BFB, 0x3CD43C843BFB, 0x3CD53C843BFB, 0x3CD63C843BFB, 0x3CD73C843BFB, 0x3CD83C843BFB, + 0x3CD93C843BFB, 0x3CDA3C843BFB, 0x3CDB3C843BFB, 0x3CDC3C843BFB, 0x3CDD3C843BFB, 0x3CDE3C843BFB, 0x3CDF3C843BFB, 0x3CE03C843BFB, 0x3CE13C843BFB, 0x3CE23C843BFB, 0x3CE33C843BFB, 0x3CE43C843BFB, 0x3CE53C843BFB, 0x3CE63C843BFB, 0x3CE73C843BFB, + 0x3CE83C843BFB, 0x3CE93C843BFB, 0x3CEA3C843BFB, 0x3CEB3C843BFB, 0x3C853BFB, 0x3CD13C853BFB, 0x3CD23C853BFB, 0x3CD33C853BFB, 0x3CD43C853BFB, 0x3CD53C853BFB, 0x3CD63C853BFB, 0x3CD73C853BFB, 0x3CD83C853BFB, 0x3CD93C853BFB, 0x3CDA3C853BFB, + 0x3CDB3C853BFB, 0x3CDC3C853BFB, 0x3CDD3C853BFB, 0x3CDE3C853BFB, 0x3CDF3C853BFB, 0x3CE03C853BFB, 0x3CE13C853BFB, 0x3CE23C853BFB, 0x3CE33C853BFB, 0x3CE43C853BFB, 0x3CE53C853BFB, 0x3CE63C853BFB, 0x3CE73C853BFB, 0x3CE83C853BFB, 0x3CE93C853BFB, + 0x3CEA3C853BFB, 0x3CEB3C853BFB, 0x3C863BFB, 0x3CD13C863BFB, 0x3CD23C863BFB, 0x3CD33C863BFB, 0x3CD43C863BFB, 0x3CD53C863BFB, 0x3CD63C863BFB, 0x3CD73C863BFB, 0x3CD83C863BFB, 0x3CD93C863BFB, 0x3CDA3C863BFB, 0x3CDB3C863BFB, 0x3CDC3C863BFB, + 0x3CDD3C863BFB, 0x3CDE3C863BFB, 0x3CDF3C863BFB, 0x3CE03C863BFB, 0x3CE13C863BFB, 0x3CE23C863BFB, 0x3CE33C863BFB, 0x3CE43C863BFB, 0x3CE53C863BFB, 0x3CE63C863BFB, 0x3CE73C863BFB, 0x3CE83C863BFB, 0x3CE93C863BFB, 0x3CEA3C863BFB, 0x3CEB3C863BFB, + 0x3C873BFB, 0x3CD13C873BFB, 0x3CD23C873BFB, 0x3CD33C873BFB, 0x3CD43C873BFB, 0x3CD53C873BFB, 0x3CD63C873BFB, 0x3CD73C873BFB, 0x3CD83C873BFB, 0x3CD93C873BFB, 0x3CDA3C873BFB, 0x3CDB3C873BFB, 0x3CDC3C873BFB, 0x3CDD3C873BFB, 0x3CDE3C873BFB, + 0x3CDF3C873BFB, 0x3CE03C873BFB, 0x3CE13C873BFB, 0x3CE23C873BFB, 0x3CE33C873BFB, 0x3CE43C873BFB, 0x3CE53C873BFB, 0x3CE63C873BFB, 0x3CE73C873BFB, 0x3CE83C873BFB, 0x3CE93C873BFB, 0x3CEA3C873BFB, 0x3CEB3C873BFB, 0x3C733BFC, 0x3CD13C733BFC, + 0x3CD23C733BFC, 0x3CD33C733BFC, 0x3CD43C733BFC, 0x3CD53C733BFC, 0x3CD63C733BFC, 0x3CD73C733BFC, 0x3CD83C733BFC, 0x3CD93C733BFC, 0x3CDA3C733BFC, 0x3CDB3C733BFC, 0x3CDC3C733BFC, 0x3CDD3C733BFC, 0x3CDE3C733BFC, 0x3CDF3C733BFC, 0x3CE03C733BFC, + 0x3CE13C733BFC, 0x3CE23C733BFC, 0x3CE33C733BFC, 0x3CE43C733BFC, 0x3CE53C733BFC, 0x3CE63C733BFC, 0x3CE73C733BFC, 0x3CE83C733BFC, 0x3CE93C733BFC, 0x3CEA3C733BFC, 0x3CEB3C733BFC, 0x3C743BFC, 0x3CD13C743BFC, 0x3CD23C743BFC, 0x3CD33C743BFC, + 0x3CD43C743BFC, 0x3CD53C743BFC, 0x3CD63C743BFC, 0x3CD73C743BFC, 0x3CD83C743BFC, 0x3CD93C743BFC, 0x3CDA3C743BFC, 0x3CDB3C743BFC, 0x3CDC3C743BFC, 0x3CDD3C743BFC, 0x3CDE3C743BFC, 0x3CDF3C743BFC, 0x3CE03C743BFC, 0x3CE13C743BFC, 0x3CE23C743BFC, + 0x3CE33C743BFC, 0x3CE43C743BFC, 0x3CE53C743BFC, 0x3CE63C743BFC, 0x3CE73C743BFC, 0x3CE83C743BFC, 0x3CE93C743BFC, 0x3CEA3C743BFC, 0x3CEB3C743BFC, 0x3C753BFC, 0x3CD13C753BFC, 0x3CD23C753BFC, 0x3CD33C753BFC, 0x3CD43C753BFC, 0x3CD53C753BFC, + 0x3CD63C753BFC, 0x3CD73C753BFC, 0x3CD83C753BFC, 0x3CD93C753BFC, 0x3CDA3C753BFC, 0x3CDB3C753BFC, 0x3CDC3C753BFC, 0x3CDD3C753BFC, 0x3CDE3C753BFC, 0x3CDF3C753BFC, 0x3CE03C753BFC, 0x3CE13C753BFC, 0x3CE23C753BFC, 0x3CE33C753BFC, 0x3CE43C753BFC, + 0x3CE53C753BFC, 0x3CE63C753BFC, 0x3CE73C753BFC, 0x3CE83C753BFC, 0x3CE93C753BFC, 0x3CEA3C753BFC, 0x3CEB3C753BFC, 0x3C763BFC, 0x3CD13C763BFC, 0x3CD23C763BFC, 0x3CD33C763BFC, 0x3CD43C763BFC, 0x3CD53C763BFC, 0x3CD63C763BFC, 0x3CD73C763BFC, + 0x3CD83C763BFC, 0x3CD93C763BFC, 0x3CDA3C763BFC, 0x3CDB3C763BFC, 0x3CDC3C763BFC, 0x3CDD3C763BFC, 0x3CDE3C763BFC, 0x3CDF3C763BFC, 0x3CE03C763BFC, 0x3CE13C763BFC, 0x3CE23C763BFC, 0x3CE33C763BFC, 0x3CE43C763BFC, 0x3CE53C763BFC, 0x3CE63C763BFC, + 0x3CE73C763BFC, 0x3CE83C763BFC, 0x3CE93C763BFC, 0x3CEA3C763BFC, 0x3CEB3C763BFC, 0x3C773BFC, 0x3CD13C773BFC, 0x3CD23C773BFC, 0x3CD33C773BFC, 0x3CD43C773BFC, 0x3CD53C773BFC, 0x3CD63C773BFC, 0x3CD73C773BFC, 0x3CD83C773BFC, 0x3CD93C773BFC, + 0x3CDA3C773BFC, 0x3CDB3C773BFC, 0x3CDC3C773BFC, 0x3CDD3C773BFC, 0x3CDE3C773BFC, 0x3CDF3C773BFC, 0x3CE03C773BFC, 0x3CE13C773BFC, 0x3CE23C773BFC, 0x3CE33C773BFC, 0x3CE43C773BFC, 0x3CE53C773BFC, 0x3CE63C773BFC, 0x3CE73C773BFC, 0x3CE83C773BFC, + 0x3CE93C773BFC, 0x3CEA3C773BFC, 0x3CEB3C773BFC, 0x3C783BFC, 0x3CD13C783BFC, 0x3CD23C783BFC, 0x3CD33C783BFC, 0x3CD43C783BFC, 0x3CD53C783BFC, 0x3CD63C783BFC, 0x3CD73C783BFC, 0x3CD83C783BFC, 0x3CD93C783BFC, 0x3CDA3C783BFC, 0x3CDB3C783BFC, + 0x3CDC3C783BFC, 0x3CDD3C783BFC, 0x3CDE3C783BFC, 0x3CDF3C783BFC, 0x3CE03C783BFC, 0x3CE13C783BFC, 0x3CE23C783BFC, 0x3CE33C783BFC, 0x3CE43C783BFC, 0x3CE53C783BFC, 0x3CE63C783BFC, 0x3CE73C783BFC, 0x3CE83C783BFC, 0x3CE93C783BFC, 0x3CEA3C783BFC, + 0x3CEB3C783BFC, 0x3C793BFC, 0x3CD13C793BFC, 0x3CD23C793BFC, 0x3CD33C793BFC, 0x3CD43C793BFC, 0x3CD53C793BFC, 0x3CD63C793BFC, 0x3CD73C793BFC, 0x3CD83C793BFC, 0x3CD93C793BFC, 0x3CDA3C793BFC, 0x3CDB3C793BFC, 0x3CDC3C793BFC, 0x3CDD3C793BFC, + 0x3CDE3C793BFC, 0x3CDF3C793BFC, 0x3CE03C793BFC, 0x3CE13C793BFC, 0x3CE23C793BFC, 0x3CE33C793BFC, 0x3CE43C793BFC, 0x3CE53C793BFC, 0x3CE63C793BFC, 0x3CE73C793BFC, 0x3CE83C793BFC, 0x3CE93C793BFC, 0x3CEA3C793BFC, 0x3CEB3C793BFC, 0x3C7A3BFC, + 0x3CD13C7A3BFC, 0x3CD23C7A3BFC, 0x3CD33C7A3BFC, 0x3CD43C7A3BFC, 0x3CD53C7A3BFC, 0x3CD63C7A3BFC, 0x3CD73C7A3BFC, 0x3CD83C7A3BFC, 0x3CD93C7A3BFC, 0x3CDA3C7A3BFC, 0x3CDB3C7A3BFC, 0x3CDC3C7A3BFC, 0x3CDD3C7A3BFC, 0x3CDE3C7A3BFC, 0x3CDF3C7A3BFC, + 0x3CE03C7A3BFC, 0x3CE13C7A3BFC, 0x3CE23C7A3BFC, 0x3CE33C7A3BFC, 0x3CE43C7A3BFC, 0x3CE53C7A3BFC, 0x3CE63C7A3BFC, 0x3CE73C7A3BFC, 0x3CE83C7A3BFC, 0x3CE93C7A3BFC, 0x3CEA3C7A3BFC, 0x3CEB3C7A3BFC, 0x3C7B3BFC, 0x3CD13C7B3BFC, 0x3CD23C7B3BFC, + 0x3CD33C7B3BFC, 0x3CD43C7B3BFC, 0x3CD53C7B3BFC, 0x3CD63C7B3BFC, 0x3CD73C7B3BFC, 0x3CD83C7B3BFC, 0x3CD93C7B3BFC, 0x3CDA3C7B3BFC, 0x3CDB3C7B3BFC, 0x3CDC3C7B3BFC, 0x3CDD3C7B3BFC, 0x3CDE3C7B3BFC, 0x3CDF3C7B3BFC, 0x3CE03C7B3BFC, 0x3CE13C7B3BFC, + 0x3CE23C7B3BFC, 0x3CE33C7B3BFC, 0x3CE43C7B3BFC, 0x3CE53C7B3BFC, 0x3CE63C7B3BFC, 0x3CE73C7B3BFC, 0x3CE83C7B3BFC, 0x3CE93C7B3BFC, 0x3CEA3C7B3BFC, 0x3CEB3C7B3BFC, 0x3C7C3BFC, 0x3CD13C7C3BFC, 0x3CD23C7C3BFC, 0x3CD33C7C3BFC, 0x3CD43C7C3BFC, + 0x3CD53C7C3BFC, 0x3CD63C7C3BFC, 0x3CD73C7C3BFC, 0x3CD83C7C3BFC, 0x3CD93C7C3BFC, 0x3CDA3C7C3BFC, 0x3CDB3C7C3BFC, 0x3CDC3C7C3BFC, 0x3CDD3C7C3BFC, 0x3CDE3C7C3BFC, 0x3CDF3C7C3BFC, 0x3CE03C7C3BFC, 0x3CE13C7C3BFC, 0x3CE23C7C3BFC, 0x3CE33C7C3BFC, + 0x3CE43C7C3BFC, 0x3CE53C7C3BFC, 0x3CE63C7C3BFC, 0x3CE73C7C3BFC, 0x3CE83C7C3BFC, 0x3CE93C7C3BFC, 0x3CEA3C7C3BFC, 0x3CEB3C7C3BFC, 0x3C7D3BFC, 0x3CD13C7D3BFC, 0x3CD23C7D3BFC, 0x3CD33C7D3BFC, 0x3CD43C7D3BFC, 0x3CD53C7D3BFC, 0x3CD63C7D3BFC, + 0x3CD73C7D3BFC, 0x3CD83C7D3BFC, 0x3CD93C7D3BFC, 0x3CDA3C7D3BFC, 0x3CDB3C7D3BFC, 0x3CDC3C7D3BFC, 0x3CDD3C7D3BFC, 0x3CDE3C7D3BFC, 0x3CDF3C7D3BFC, 0x3CE03C7D3BFC, 0x3CE13C7D3BFC, 0x3CE23C7D3BFC, 0x3CE33C7D3BFC, 0x3CE43C7D3BFC, 0x3CE53C7D3BFC, + 0x3CE63C7D3BFC, 0x3CE73C7D3BFC, 0x3CE83C7D3BFC, 0x3CE93C7D3BFC, 0x3CEA3C7D3BFC, 0x3CEB3C7D3BFC, 0x3C7E3BFC, 0x3CD13C7E3BFC, 0x3CD23C7E3BFC, 0x3CD33C7E3BFC, 0x3CD43C7E3BFC, 0x3CD53C7E3BFC, 0x3CD63C7E3BFC, 0x3CD73C7E3BFC, 0x3CD83C7E3BFC, + 0x3CD93C7E3BFC, 0x3CDA3C7E3BFC, 0x3CDB3C7E3BFC, 0x3CDC3C7E3BFC, 0x3CDD3C7E3BFC, 0x3CDE3C7E3BFC, 0x3CDF3C7E3BFC, 0x3CE03C7E3BFC, 0x3CE13C7E3BFC, 0x3CE23C7E3BFC, 0x3CE33C7E3BFC, 0x3CE43C7E3BFC, 0x3CE53C7E3BFC, 0x3CE63C7E3BFC, 0x3CE73C7E3BFC, + 0x3CE83C7E3BFC, 0x3CE93C7E3BFC, 0x3CEA3C7E3BFC, 0x3CEB3C7E3BFC, 0x3C7F3BFC, 0x3CD13C7F3BFC, 0x3CD23C7F3BFC, 0x3CD33C7F3BFC, 0x3CD43C7F3BFC, 0x3CD53C7F3BFC, 0x3CD63C7F3BFC, 0x3CD73C7F3BFC, 0x3CD83C7F3BFC, 0x3CD93C7F3BFC, 0x3CDA3C7F3BFC, + 0x3CDB3C7F3BFC, 0x3CDC3C7F3BFC, 0x3CDD3C7F3BFC, 0x3CDE3C7F3BFC, 0x3CDF3C7F3BFC, 0x3CE03C7F3BFC, 0x3CE13C7F3BFC, 0x3CE23C7F3BFC, 0x3CE33C7F3BFC, 0x3CE43C7F3BFC, 0x3CE53C7F3BFC, 0x3CE63C7F3BFC, 0x3CE73C7F3BFC, 0x3CE83C7F3BFC, 0x3CE93C7F3BFC, + 0x3CEA3C7F3BFC, 0x3CEB3C7F3BFC, 0x3C803BFC, 0x3CD13C803BFC, 0x3CD23C803BFC, 0x3CD33C803BFC, 0x3CD43C803BFC, 0x3CD53C803BFC, 0x3CD63C803BFC, 0x3CD73C803BFC, 0x3CD83C803BFC, 0x3CD93C803BFC, 0x3CDA3C803BFC, 0x3CDB3C803BFC, 0x3CDC3C803BFC, + 0x3CDD3C803BFC, 0x3CDE3C803BFC, 0x3CDF3C803BFC, 0x3CE03C803BFC, 0x3CE13C803BFC, 0x3CE23C803BFC, 0x3CE33C803BFC, 0x3CE43C803BFC, 0x3CE53C803BFC, 0x3CE63C803BFC, 0x3CE73C803BFC, 0x3CE83C803BFC, 0x3CE93C803BFC, 0x3CEA3C803BFC, 0x3CEB3C803BFC, + 0x3C813BFC, 0x3CD13C813BFC, 0x3CD23C813BFC, 0x3CD33C813BFC, 0x3CD43C813BFC, 0x3CD53C813BFC, 0x3CD63C813BFC, 0x3CD73C813BFC, 0x3CD83C813BFC, 0x3CD93C813BFC, 0x3CDA3C813BFC, 0x3CDB3C813BFC, 0x3CDC3C813BFC, 0x3CDD3C813BFC, 0x3CDE3C813BFC, + 0x3CDF3C813BFC, 0x3CE03C813BFC, 0x3CE13C813BFC, 0x3CE23C813BFC, 0x3CE33C813BFC, 0x3CE43C813BFC, 0x3CE53C813BFC, 0x3CE63C813BFC, 0x3CE73C813BFC, 0x3CE83C813BFC, 0x3CE93C813BFC, 0x3CEA3C813BFC, 0x3CEB3C813BFC, 0x3C823BFC, 0x3CD13C823BFC, + 0x3CD23C823BFC, 0x3CD33C823BFC, 0x3CD43C823BFC, 0x3CD53C823BFC, 0x3CD63C823BFC, 0x3CD73C823BFC, 0x3CD83C823BFC, 0x3CD93C823BFC, 0x3CDA3C823BFC, 0x3CDB3C823BFC, 0x3CDC3C823BFC, 0x3CDD3C823BFC, 0x3CDE3C823BFC, 0x3CDF3C823BFC, 0x3CE03C823BFC, + 0x3CE13C823BFC, 0x3CE23C823BFC, 0x3CE33C823BFC, 0x3CE43C823BFC, 0x3CE53C823BFC, 0x3CE63C823BFC, 0x3CE73C823BFC, 0x3CE83C823BFC, 0x3CE93C823BFC, 0x3CEA3C823BFC, 0x3CEB3C823BFC, 0x3C833BFC, 0x3CD13C833BFC, 0x3CD23C833BFC, 0x3CD33C833BFC, + 0x3CD43C833BFC, 0x3CD53C833BFC, 0x3CD63C833BFC, 0x3CD73C833BFC, 0x3CD83C833BFC, 0x3CD93C833BFC, 0x3CDA3C833BFC, 0x3CDB3C833BFC, 0x3CDC3C833BFC, 0x3CDD3C833BFC, 0x3CDE3C833BFC, 0x3CDF3C833BFC, 0x3CE03C833BFC, 0x3CE13C833BFC, 0x3CE23C833BFC, + 0x3CE33C833BFC, 0x3CE43C833BFC, 0x3CE53C833BFC, 0x3CE63C833BFC, 0x3CE73C833BFC, 0x3CE83C833BFC, 0x3CE93C833BFC, 0x3CEA3C833BFC, 0x3CEB3C833BFC, 0x3C843BFC, 0x3CD13C843BFC, 0x3CD23C843BFC, 0x3CD33C843BFC, 0x3CD43C843BFC, 0x3CD53C843BFC, + 0x3CD63C843BFC, 0x3CD73C843BFC, 0x3CD83C843BFC, 0x3CD93C843BFC, 0x3CDA3C843BFC, 0x3CDB3C843BFC, 0x3CDC3C843BFC, 0x3CDD3C843BFC, 0x3CDE3C843BFC, 0x3CDF3C843BFC, 0x3CE03C843BFC, 0x3CE13C843BFC, 0x3CE23C843BFC, 0x3CE33C843BFC, 0x3CE43C843BFC, + 0x3CE53C843BFC, 0x3CE63C843BFC, 0x3CE73C843BFC, 0x3CE83C843BFC, 0x3CE93C843BFC, 0x3CEA3C843BFC, 0x3CEB3C843BFC, 0x3C853BFC, 0x3CD13C853BFC, 0x3CD23C853BFC, 0x3CD33C853BFC, 0x3CD43C853BFC, 0x3CD53C853BFC, 0x3CD63C853BFC, 0x3CD73C853BFC, + 0x3CD83C853BFC, 0x3CD93C853BFC, 0x3CDA3C853BFC, 0x3CDB3C853BFC, 0x3CDC3C853BFC, 0x3CDD3C853BFC, 0x3CDE3C853BFC, 0x3CDF3C853BFC, 0x3CE03C853BFC, 0x3CE13C853BFC, 0x3CE23C853BFC, 0x3CE33C853BFC, 0x3CE43C853BFC, 0x3CE53C853BFC, 0x3CE63C853BFC, + 0x3CE73C853BFC, 0x3CE83C853BFC, 0x3CE93C853BFC, 0x3CEA3C853BFC, 0x3CEB3C853BFC, 0x3C863BFC, 0x3CD13C863BFC, 0x3CD23C863BFC, 0x3CD33C863BFC, 0x3CD43C863BFC, 0x3CD53C863BFC, 0x3CD63C863BFC, 0x3CD73C863BFC, 0x3CD83C863BFC, 0x3CD93C863BFC, + 0x3CDA3C863BFC, 0x3CDB3C863BFC, 0x3CDC3C863BFC, 0x3CDD3C863BFC, 0x3CDE3C863BFC, 0x3CDF3C863BFC, 0x3CE03C863BFC, 0x3CE13C863BFC, 0x3CE23C863BFC, 0x3CE33C863BFC, 0x3CE43C863BFC, 0x3CE53C863BFC, 0x3CE63C863BFC, 0x3CE73C863BFC, 0x3CE83C863BFC, + 0x3CE93C863BFC, 0x3CEA3C863BFC, 0x3CEB3C863BFC, 0x3C873BFC, 0x3CD13C873BFC, 0x3CD23C873BFC, 0x3CD33C873BFC, 0x3CD43C873BFC, 0x3CD53C873BFC, 0x3CD63C873BFC, 0x3CD73C873BFC, 0x3CD83C873BFC, 0x3CD93C873BFC, 0x3CDA3C873BFC, 0x3CDB3C873BFC, + 0x3CDC3C873BFC, 0x3CDD3C873BFC, 0x3CDE3C873BFC, 0x3CDF3C873BFC, 0x3CE03C873BFC, 0x3CE13C873BFC, 0x3CE23C873BFC, 0x3CE33C873BFC, 0x3CE43C873BFC, 0x3CE53C873BFC, 0x3CE63C873BFC, 0x3CE73C873BFC, 0x3CE83C873BFC, 0x3CE93C873BFC, 0x3CEA3C873BFC, + 0x3CEB3C873BFC, 0x3C733BFD, 0x3CD13C733BFD, 0x3CD23C733BFD, 0x3CD33C733BFD, 0x3CD43C733BFD, 0x3CD53C733BFD, 0x3CD63C733BFD, 0x3CD73C733BFD, 0x3CD83C733BFD, 0x3CD93C733BFD, 0x3CDA3C733BFD, 0x3CDB3C733BFD, 0x3CDC3C733BFD, 0x3CDD3C733BFD, + 0x3CDE3C733BFD, 0x3CDF3C733BFD, 0x3CE03C733BFD, 0x3CE13C733BFD, 0x3CE23C733BFD, 0x3CE33C733BFD, 0x3CE43C733BFD, 0x3CE53C733BFD, 0x3CE63C733BFD, 0x3CE73C733BFD, 0x3CE83C733BFD, 0x3CE93C733BFD, 0x3CEA3C733BFD, 0x3CEB3C733BFD, 0x3C743BFD, + 0x3CD13C743BFD, 0x3CD23C743BFD, 0x3CD33C743BFD, 0x3CD43C743BFD, 0x3CD53C743BFD, 0x3CD63C743BFD, 0x3CD73C743BFD, 0x3CD83C743BFD, 0x3CD93C743BFD, 0x3CDA3C743BFD, 0x3CDB3C743BFD, 0x3CDC3C743BFD, 0x3CDD3C743BFD, 0x3CDE3C743BFD, 0x3CDF3C743BFD, + 0x3CE03C743BFD, 0x3CE13C743BFD, 0x3CE23C743BFD, 0x3CE33C743BFD, 0x3CE43C743BFD, 0x3CE53C743BFD, 0x3CE63C743BFD, 0x3CE73C743BFD, 0x3CE83C743BFD, 0x3CE93C743BFD, 0x3CEA3C743BFD, 0x3CEB3C743BFD, 0x3C753BFD, 0x3CD13C753BFD, 0x3CD23C753BFD, + 0x3CD33C753BFD, 0x3CD43C753BFD, 0x3CD53C753BFD, 0x3CD63C753BFD, 0x3CD73C753BFD, 0x3CD83C753BFD, 0x3CD93C753BFD, 0x3CDA3C753BFD, 0x3CDB3C753BFD, 0x3CDC3C753BFD, 0x3CDD3C753BFD, 0x3CDE3C753BFD, 0x3CDF3C753BFD, 0x3CE03C753BFD, 0x3CE13C753BFD, + 0x3CE23C753BFD, 0x3CE33C753BFD, 0x3CE43C753BFD, 0x3CE53C753BFD, 0x3CE63C753BFD, 0x3CE73C753BFD, 0x3CE83C753BFD, 0x3CE93C753BFD, 0x3CEA3C753BFD, 0x3CEB3C753BFD, 0x3C763BFD, 0x3CD13C763BFD, 0x3CD23C763BFD, 0x3CD33C763BFD, 0x3CD43C763BFD, + 0x3CD53C763BFD, 0x3CD63C763BFD, 0x3CD73C763BFD, 0x3CD83C763BFD, 0x3CD93C763BFD, 0x3CDA3C763BFD, 0x3CDB3C763BFD, 0x3CDC3C763BFD, 0x3CDD3C763BFD, 0x3CDE3C763BFD, 0x3CDF3C763BFD, 0x3CE03C763BFD, 0x3CE13C763BFD, 0x3CE23C763BFD, 0x3CE33C763BFD, + 0x3CE43C763BFD, 0x3CE53C763BFD, 0x3CE63C763BFD, 0x3CE73C763BFD, 0x3CE83C763BFD, 0x3CE93C763BFD, 0x3CEA3C763BFD, 0x3CEB3C763BFD, 0x3C773BFD, 0x3CD13C773BFD, 0x3CD23C773BFD, 0x3CD33C773BFD, 0x3CD43C773BFD, 0x3CD53C773BFD, 0x3CD63C773BFD, + 0x3CD73C773BFD, 0x3CD83C773BFD, 0x3CD93C773BFD, 0x3CDA3C773BFD, 0x3CDB3C773BFD, 0x3CDC3C773BFD, 0x3CDD3C773BFD, 0x3CDE3C773BFD, 0x3CDF3C773BFD, 0x3CE03C773BFD, 0x3CE13C773BFD, 0x3CE23C773BFD, 0x3CE33C773BFD, 0x3CE43C773BFD, 0x3CE53C773BFD, + 0x3CE63C773BFD, 0x3CE73C773BFD, 0x3CE83C773BFD, 0x3CE93C773BFD, 0x3CEA3C773BFD, 0x3CEB3C773BFD, 0x3C783BFD, 0x3CD13C783BFD, 0x3CD23C783BFD, 0x3CD33C783BFD, 0x3CD43C783BFD, 0x3CD53C783BFD, 0x3CD63C783BFD, 0x3CD73C783BFD, 0x3CD83C783BFD, + 0x3CD93C783BFD, 0x3CDA3C783BFD, 0x3CDB3C783BFD, 0x3CDC3C783BFD, 0x3CDD3C783BFD, 0x3CDE3C783BFD, 0x3CDF3C783BFD, 0x3CE03C783BFD, 0x3CE13C783BFD, 0x3CE23C783BFD, 0x3CE33C783BFD, 0x3CE43C783BFD, 0x3CE53C783BFD, 0x3CE63C783BFD, 0x3CE73C783BFD, + 0x3CE83C783BFD, 0x3CE93C783BFD, 0x3CEA3C783BFD, 0x3CEB3C783BFD, 0x3C793BFD, 0x3CD13C793BFD, 0x3CD23C793BFD, 0x3CD33C793BFD, 0x3CD43C793BFD, 0x3CD53C793BFD, 0x3CD63C793BFD, 0x3CD73C793BFD, 0x3CD83C793BFD, 0x3CD93C793BFD, 0x3CDA3C793BFD, + 0x3CDB3C793BFD, 0x3CDC3C793BFD, 0x3CDD3C793BFD, 0x3CDE3C793BFD, 0x3CDF3C793BFD, 0x3CE03C793BFD, 0x3CE13C793BFD, 0x3CE23C793BFD, 0x3CE33C793BFD, 0x3CE43C793BFD, 0x3CE53C793BFD, 0x3CE63C793BFD, 0x3CE73C793BFD, 0x3CE83C793BFD, 0x3CE93C793BFD, + 0x3CEA3C793BFD, 0x3CEB3C793BFD, 0x3C7A3BFD, 0x3CD13C7A3BFD, 0x3CD23C7A3BFD, 0x3CD33C7A3BFD, 0x3CD43C7A3BFD, 0x3CD53C7A3BFD, 0x3CD63C7A3BFD, 0x3CD73C7A3BFD, 0x3CD83C7A3BFD, 0x3CD93C7A3BFD, 0x3CDA3C7A3BFD, 0x3CDB3C7A3BFD, 0x3CDC3C7A3BFD, + 0x3CDD3C7A3BFD, 0x3CDE3C7A3BFD, 0x3CDF3C7A3BFD, 0x3CE03C7A3BFD, 0x3CE13C7A3BFD, 0x3CE23C7A3BFD, 0x3CE33C7A3BFD, 0x3CE43C7A3BFD, 0x3CE53C7A3BFD, 0x3CE63C7A3BFD, 0x3CE73C7A3BFD, 0x3CE83C7A3BFD, 0x3CE93C7A3BFD, 0x3CEA3C7A3BFD, 0x3CEB3C7A3BFD, + 0x3C7B3BFD, 0x3CD13C7B3BFD, 0x3CD23C7B3BFD, 0x3CD33C7B3BFD, 0x3CD43C7B3BFD, 0x3CD53C7B3BFD, 0x3CD63C7B3BFD, 0x3CD73C7B3BFD, 0x3CD83C7B3BFD, 0x3CD93C7B3BFD, 0x3CDA3C7B3BFD, 0x3CDB3C7B3BFD, 0x3CDC3C7B3BFD, 0x3CDD3C7B3BFD, 0x3CDE3C7B3BFD, + 0x3CDF3C7B3BFD, 0x3CE03C7B3BFD, 0x3CE13C7B3BFD, 0x3CE23C7B3BFD, 0x3CE33C7B3BFD, 0x3CE43C7B3BFD, 0x3CE53C7B3BFD, 0x3CE63C7B3BFD, 0x3CE73C7B3BFD, 0x3CE83C7B3BFD, 0x3CE93C7B3BFD, 0x3CEA3C7B3BFD, 0x3CEB3C7B3BFD, 0x3C7C3BFD, 0x3CD13C7C3BFD, + 0x3CD23C7C3BFD, 0x3CD33C7C3BFD, 0x3CD43C7C3BFD, 0x3CD53C7C3BFD, 0x3CD63C7C3BFD, 0x3CD73C7C3BFD, 0x3CD83C7C3BFD, 0x3CD93C7C3BFD, 0x3CDA3C7C3BFD, 0x3CDB3C7C3BFD, 0x3CDC3C7C3BFD, 0x3CDD3C7C3BFD, 0x3CDE3C7C3BFD, 0x3CDF3C7C3BFD, 0x3CE03C7C3BFD, + 0x3CE13C7C3BFD, 0x3CE23C7C3BFD, 0x3CE33C7C3BFD, 0x3CE43C7C3BFD, 0x3CE53C7C3BFD, 0x3CE63C7C3BFD, 0x3CE73C7C3BFD, 0x3CE83C7C3BFD, 0x3CE93C7C3BFD, 0x3CEA3C7C3BFD, 0x3CEB3C7C3BFD, 0x3C7D3BFD, 0x3CD13C7D3BFD, 0x3CD23C7D3BFD, 0x3CD33C7D3BFD, + 0x3CD43C7D3BFD, 0x3CD53C7D3BFD, 0x3CD63C7D3BFD, 0x3CD73C7D3BFD, 0x3CD83C7D3BFD, 0x3CD93C7D3BFD, 0x3CDA3C7D3BFD, 0x3CDB3C7D3BFD, 0x3CDC3C7D3BFD, 0x3CDD3C7D3BFD, 0x3CDE3C7D3BFD, 0x3CDF3C7D3BFD, 0x3CE03C7D3BFD, 0x3CE13C7D3BFD, 0x3CE23C7D3BFD, + 0x3CE33C7D3BFD, 0x3CE43C7D3BFD, 0x3CE53C7D3BFD, 0x3CE63C7D3BFD, 0x3CE73C7D3BFD, 0x3CE83C7D3BFD, 0x3CE93C7D3BFD, 0x3CEA3C7D3BFD, 0x3CEB3C7D3BFD, 0x3C7E3BFD, 0x3CD13C7E3BFD, 0x3CD23C7E3BFD, 0x3CD33C7E3BFD, 0x3CD43C7E3BFD, 0x3CD53C7E3BFD, + 0x3CD63C7E3BFD, 0x3CD73C7E3BFD, 0x3CD83C7E3BFD, 0x3CD93C7E3BFD, 0x3CDA3C7E3BFD, 0x3CDB3C7E3BFD, 0x3CDC3C7E3BFD, 0x3CDD3C7E3BFD, 0x3CDE3C7E3BFD, 0x3CDF3C7E3BFD, 0x3CE03C7E3BFD, 0x3CE13C7E3BFD, 0x3CE23C7E3BFD, 0x3CE33C7E3BFD, 0x3CE43C7E3BFD, + 0x3CE53C7E3BFD, 0x3CE63C7E3BFD, 0x3CE73C7E3BFD, 0x3CE83C7E3BFD, 0x3CE93C7E3BFD, 0x3CEA3C7E3BFD, 0x3CEB3C7E3BFD, 0x3C7F3BFD, 0x3CD13C7F3BFD, 0x3CD23C7F3BFD, 0x3CD33C7F3BFD, 0x3CD43C7F3BFD, 0x3CD53C7F3BFD, 0x3CD63C7F3BFD, 0x3CD73C7F3BFD, + 0x3CD83C7F3BFD, 0x3CD93C7F3BFD, 0x3CDA3C7F3BFD, 0x3CDB3C7F3BFD, 0x3CDC3C7F3BFD, 0x3CDD3C7F3BFD, 0x3CDE3C7F3BFD, 0x3CDF3C7F3BFD, 0x3CE03C7F3BFD, 0x3CE13C7F3BFD, 0x3CE23C7F3BFD, 0x3CE33C7F3BFD, 0x3CE43C7F3BFD, 0x3CE53C7F3BFD, 0x3CE63C7F3BFD, + 0x3CE73C7F3BFD, 0x3CE83C7F3BFD, 0x3CE93C7F3BFD, 0x3CEA3C7F3BFD, 0x3CEB3C7F3BFD, 0x3C803BFD, 0x3CD13C803BFD, 0x3CD23C803BFD, 0x3CD33C803BFD, 0x3CD43C803BFD, 0x3CD53C803BFD, 0x3CD63C803BFD, 0x3CD73C803BFD, 0x3CD83C803BFD, 0x3CD93C803BFD, + 0x3CDA3C803BFD, 0x3CDB3C803BFD, 0x3CDC3C803BFD, 0x3CDD3C803BFD, 0x3CDE3C803BFD, 0x3CDF3C803BFD, 0x3CE03C803BFD, 0x3CE13C803BFD, 0x3CE23C803BFD, 0x3CE33C803BFD, 0x3CE43C803BFD, 0x3CE53C803BFD, 0x3CE63C803BFD, 0x3CE73C803BFD, 0x3CE83C803BFD, + 0x3CE93C803BFD, 0x3CEA3C803BFD, 0x3CEB3C803BFD, 0x3C813BFD, 0x3CD13C813BFD, 0x3CD23C813BFD, 0x3CD33C813BFD, 0x3CD43C813BFD, 0x3CD53C813BFD, 0x3CD63C813BFD, 0x3CD73C813BFD, 0x3CD83C813BFD, 0x3CD93C813BFD, 0x3CDA3C813BFD, 0x3CDB3C813BFD, + 0x3CDC3C813BFD, 0x3CDD3C813BFD, 0x3CDE3C813BFD, 0x3CDF3C813BFD, 0x3CE03C813BFD, 0x3CE13C813BFD, 0x3CE23C813BFD, 0x3CE33C813BFD, 0x3CE43C813BFD, 0x3CE53C813BFD, 0x3CE63C813BFD, 0x3CE73C813BFD, 0x3CE83C813BFD, 0x3CE93C813BFD, 0x3CEA3C813BFD, + 0x3CEB3C813BFD, 0x3C823BFD, 0x3CD13C823BFD, 0x3CD23C823BFD, 0x3CD33C823BFD, 0x3CD43C823BFD, 0x3CD53C823BFD, 0x3CD63C823BFD, 0x3CD73C823BFD, 0x3CD83C823BFD, 0x3CD93C823BFD, 0x3CDA3C823BFD, 0x3CDB3C823BFD, 0x3CDC3C823BFD, 0x3CDD3C823BFD, + 0x3CDE3C823BFD, 0x3CDF3C823BFD, 0x3CE03C823BFD, 0x3CE13C823BFD, 0x3CE23C823BFD, 0x3CE33C823BFD, 0x3CE43C823BFD, 0x3CE53C823BFD, 0x3CE63C823BFD, 0x3CE73C823BFD, 0x3CE83C823BFD, 0x3CE93C823BFD, 0x3CEA3C823BFD, 0x3CEB3C823BFD, 0x3C833BFD, + 0x3CD13C833BFD, 0x3CD23C833BFD, 0x3CD33C833BFD, 0x3CD43C833BFD, 0x3CD53C833BFD, 0x3CD63C833BFD, 0x3CD73C833BFD, 0x3CD83C833BFD, 0x3CD93C833BFD, 0x3CDA3C833BFD, 0x3CDB3C833BFD, 0x3CDC3C833BFD, 0x3CDD3C833BFD, 0x3CDE3C833BFD, 0x3CDF3C833BFD, + 0x3CE03C833BFD, 0x3CE13C833BFD, 0x3CE23C833BFD, 0x3CE33C833BFD, 0x3CE43C833BFD, 0x3CE53C833BFD, 0x3CE63C833BFD, 0x3CE73C833BFD, 0x3CE83C833BFD, 0x3CE93C833BFD, 0x3CEA3C833BFD, 0x3CEB3C833BFD, 0x3C843BFD, 0x3CD13C843BFD, 0x3CD23C843BFD, + 0x3CD33C843BFD, 0x3CD43C843BFD, 0x3CD53C843BFD, 0x3CD63C843BFD, 0x3CD73C843BFD, 0x3CD83C843BFD, 0x3CD93C843BFD, 0x3CDA3C843BFD, 0x3CDB3C843BFD, 0x3CDC3C843BFD, 0x3CDD3C843BFD, 0x3CDE3C843BFD, 0x3CDF3C843BFD, 0x3CE03C843BFD, 0x3CE13C843BFD, + 0x3CE23C843BFD, 0x3CE33C843BFD, 0x3CE43C843BFD, 0x3CE53C843BFD, 0x3CE63C843BFD, 0x3CE73C843BFD, 0x3CE83C843BFD, 0x3CE93C843BFD, 0x3CEA3C843BFD, 0x3CEB3C843BFD, 0x3C853BFD, 0x3CD13C853BFD, 0x3CD23C853BFD, 0x3CD33C853BFD, 0x3CD43C853BFD, + 0x3CD53C853BFD, 0x3CD63C853BFD, 0x3CD73C853BFD, 0x3CD83C853BFD, 0x3CD93C853BFD, 0x3CDA3C853BFD, 0x3CDB3C853BFD, 0x3CDC3C853BFD, 0x3CDD3C853BFD, 0x3CDE3C853BFD, 0x3CDF3C853BFD, 0x3CE03C853BFD, 0x3CE13C853BFD, 0x3CE23C853BFD, 0x3CE33C853BFD, + 0x3CE43C853BFD, 0x3CE53C853BFD, 0x3CE63C853BFD, 0x3CE73C853BFD, 0x3CE83C853BFD, 0x3CE93C853BFD, 0x3CEA3C853BFD, 0x3CEB3C853BFD, 0x3C863BFD, 0x3CD13C863BFD, 0x3CD23C863BFD, 0x3CD33C863BFD, 0x3CD43C863BFD, 0x3CD53C863BFD, 0x3CD63C863BFD, + 0x3CD73C863BFD, 0x3CD83C863BFD, 0x3CD93C863BFD, 0x3CDA3C863BFD, 0x3CDB3C863BFD, 0x3CDC3C863BFD, 0x3CDD3C863BFD, 0x3CDE3C863BFD, 0x3CDF3C863BFD, 0x3CE03C863BFD, 0x3CE13C863BFD, 0x3CE23C863BFD, 0x3CE33C863BFD, 0x3CE43C863BFD, 0x3CE53C863BFD, + 0x3CE63C863BFD, 0x3CE73C863BFD, 0x3CE83C863BFD, 0x3CE93C863BFD, 0x3CEA3C863BFD, 0x3CEB3C863BFD, 0x3C873BFD, 0x3CD13C873BFD, 0x3CD23C873BFD, 0x3CD33C873BFD, 0x3CD43C873BFD, 0x3CD53C873BFD, 0x3CD63C873BFD, 0x3CD73C873BFD, 0x3CD83C873BFD, + 0x3CD93C873BFD, 0x3CDA3C873BFD, 0x3CDB3C873BFD, 0x3CDC3C873BFD, 0x3CDD3C873BFD, 0x3CDE3C873BFD, 0x3CDF3C873BFD, 0x3CE03C873BFD, 0x3CE13C873BFD, 0x3CE23C873BFD, 0x3CE33C873BFD, 0x3CE43C873BFD, 0x3CE53C873BFD, 0x3CE63C873BFD, 0x3CE73C873BFD, + 0x3CE83C873BFD, 0x3CE93C873BFD, 0x3CEA3C873BFD, 0x3CEB3C873BFD, 0x3C733BFE, 0x3CD13C733BFE, 0x3CD23C733BFE, 0x3CD33C733BFE, 0x3CD43C733BFE, 0x3CD53C733BFE, 0x3CD63C733BFE, 0x3CD73C733BFE, 0x3CD83C733BFE, 0x3CD93C733BFE, 0x3CDA3C733BFE, + 0x3CDB3C733BFE, 0x3CDC3C733BFE, 0x3CDD3C733BFE, 0x3CDE3C733BFE, 0x3CDF3C733BFE, 0x3CE03C733BFE, 0x3CE13C733BFE, 0x3CE23C733BFE, 0x3CE33C733BFE, 0x3CE43C733BFE, 0x3CE53C733BFE, 0x3CE63C733BFE, 0x3CE73C733BFE, 0x3CE83C733BFE, 0x3CE93C733BFE, + 0x3CEA3C733BFE, 0x3CEB3C733BFE, 0x3C743BFE, 0x3CD13C743BFE, 0x3CD23C743BFE, 0x3CD33C743BFE, 0x3CD43C743BFE, 0x3CD53C743BFE, 0x3CD63C743BFE, 0x3CD73C743BFE, 0x3CD83C743BFE, 0x3CD93C743BFE, 0x3CDA3C743BFE, 0x3CDB3C743BFE, 0x3CDC3C743BFE, + 0x3CDD3C743BFE, 0x3CDE3C743BFE, 0x3CDF3C743BFE, 0x3CE03C743BFE, 0x3CE13C743BFE, 0x3CE23C743BFE, 0x3CE33C743BFE, 0x3CE43C743BFE, 0x3CE53C743BFE, 0x3CE63C743BFE, 0x3CE73C743BFE, 0x3CE83C743BFE, 0x3CE93C743BFE, 0x3CEA3C743BFE, 0x3CEB3C743BFE, + 0x3C753BFE, 0x3CD13C753BFE, 0x3CD23C753BFE, 0x3CD33C753BFE, 0x3CD43C753BFE, 0x3CD53C753BFE, 0x3CD63C753BFE, 0x3CD73C753BFE, 0x3CD83C753BFE, 0x3CD93C753BFE, 0x3CDA3C753BFE, 0x3CDB3C753BFE, 0x3CDC3C753BFE, 0x3CDD3C753BFE, 0x3CDE3C753BFE, + 0x3CDF3C753BFE, 0x3CE03C753BFE, 0x3CE13C753BFE, 0x3CE23C753BFE, 0x3CE33C753BFE, 0x3CE43C753BFE, 0x3CE53C753BFE, 0x3CE63C753BFE, 0x3CE73C753BFE, 0x3CE83C753BFE, 0x3CE93C753BFE, 0x3CEA3C753BFE, 0x3CEB3C753BFE, 0x3C763BFE, 0x3CD13C763BFE, + 0x3CD23C763BFE, 0x3CD33C763BFE, 0x3CD43C763BFE, 0x3CD53C763BFE, 0x3CD63C763BFE, 0x3CD73C763BFE, 0x3CD83C763BFE, 0x3CD93C763BFE, 0x3CDA3C763BFE, 0x3CDB3C763BFE, 0x3CDC3C763BFE, 0x3CDD3C763BFE, 0x3CDE3C763BFE, 0x3CDF3C763BFE, 0x3CE03C763BFE, + 0x3CE13C763BFE, 0x3CE23C763BFE, 0x3CE33C763BFE, 0x3CE43C763BFE, 0x3CE53C763BFE, 0x3CE63C763BFE, 0x3CE73C763BFE, 0x3CE83C763BFE, 0x3CE93C763BFE, 0x3CEA3C763BFE, 0x3CEB3C763BFE, 0x3C773BFE, 0x3CD13C773BFE, 0x3CD23C773BFE, 0x3CD33C773BFE, + 0x3CD43C773BFE, 0x3CD53C773BFE, 0x3CD63C773BFE, 0x3CD73C773BFE, 0x3CD83C773BFE, 0x3CD93C773BFE, 0x3CDA3C773BFE, 0x3CDB3C773BFE, 0x3CDC3C773BFE, 0x3CDD3C773BFE, 0x3CDE3C773BFE, 0x3CDF3C773BFE, 0x3CE03C773BFE, 0x3CE13C773BFE, 0x3CE23C773BFE, + 0x3CE33C773BFE, 0x3CE43C773BFE, 0x3CE53C773BFE, 0x3CE63C773BFE, 0x3CE73C773BFE, 0x3CE83C773BFE, 0x3CE93C773BFE, 0x3CEA3C773BFE, 0x3CEB3C773BFE, 0x3C783BFE, 0x3CD13C783BFE, 0x3CD23C783BFE, 0x3CD33C783BFE, 0x3CD43C783BFE, 0x3CD53C783BFE, + 0x3CD63C783BFE, 0x3CD73C783BFE, 0x3CD83C783BFE, 0x3CD93C783BFE, 0x3CDA3C783BFE, 0x3CDB3C783BFE, 0x3CDC3C783BFE, 0x3CDD3C783BFE, 0x3CDE3C783BFE, 0x3CDF3C783BFE, 0x3CE03C783BFE, 0x3CE13C783BFE, 0x3CE23C783BFE, 0x3CE33C783BFE, 0x3CE43C783BFE, + 0x3CE53C783BFE, 0x3CE63C783BFE, 0x3CE73C783BFE, 0x3CE83C783BFE, 0x3CE93C783BFE, 0x3CEA3C783BFE, 0x3CEB3C783BFE, 0x3C793BFE, 0x3CD13C793BFE, 0x3CD23C793BFE, 0x3CD33C793BFE, 0x3CD43C793BFE, 0x3CD53C793BFE, 0x3CD63C793BFE, 0x3CD73C793BFE, + 0x3CD83C793BFE, 0x3CD93C793BFE, 0x3CDA3C793BFE, 0x3CDB3C793BFE, 0x3CDC3C793BFE, 0x3CDD3C793BFE, 0x3CDE3C793BFE, 0x3CDF3C793BFE, 0x3CE03C793BFE, 0x3CE13C793BFE, 0x3CE23C793BFE, 0x3CE33C793BFE, 0x3CE43C793BFE, 0x3CE53C793BFE, 0x3CE63C793BFE, + 0x3CE73C793BFE, 0x3CE83C793BFE, 0x3CE93C793BFE, 0x3CEA3C793BFE, 0x3CEB3C793BFE, 0x3C7A3BFE, 0x3CD13C7A3BFE, 0x3CD23C7A3BFE, 0x3CD33C7A3BFE, 0x3CD43C7A3BFE, 0x3CD53C7A3BFE, 0x3CD63C7A3BFE, 0x3CD73C7A3BFE, 0x3CD83C7A3BFE, 0x3CD93C7A3BFE, + 0x3CDA3C7A3BFE, 0x3CDB3C7A3BFE, 0x3CDC3C7A3BFE, 0x3CDD3C7A3BFE, 0x3CDE3C7A3BFE, 0x3CDF3C7A3BFE, 0x3CE03C7A3BFE, 0x3CE13C7A3BFE, 0x3CE23C7A3BFE, 0x3CE33C7A3BFE, 0x3CE43C7A3BFE, 0x3CE53C7A3BFE, 0x3CE63C7A3BFE, 0x3CE73C7A3BFE, 0x3CE83C7A3BFE, + 0x3CE93C7A3BFE, 0x3CEA3C7A3BFE, 0x3CEB3C7A3BFE, 0x3C7B3BFE, 0x3CD13C7B3BFE, 0x3CD23C7B3BFE, 0x3CD33C7B3BFE, 0x3CD43C7B3BFE, 0x3CD53C7B3BFE, 0x3CD63C7B3BFE, 0x3CD73C7B3BFE, 0x3CD83C7B3BFE, 0x3CD93C7B3BFE, 0x3CDA3C7B3BFE, 0x3CDB3C7B3BFE, + 0x3CDC3C7B3BFE, 0x3CDD3C7B3BFE, 0x3CDE3C7B3BFE, 0x3CDF3C7B3BFE, 0x3CE03C7B3BFE, 0x3CE13C7B3BFE, 0x3CE23C7B3BFE, 0x3CE33C7B3BFE, 0x3CE43C7B3BFE, 0x3CE53C7B3BFE, 0x3CE63C7B3BFE, 0x3CE73C7B3BFE, 0x3CE83C7B3BFE, 0x3CE93C7B3BFE, 0x3CEA3C7B3BFE, + 0x3CEB3C7B3BFE, 0x3C7C3BFE, 0x3CD13C7C3BFE, 0x3CD23C7C3BFE, 0x3CD33C7C3BFE, 0x3CD43C7C3BFE, 0x3CD53C7C3BFE, 0x3CD63C7C3BFE, 0x3CD73C7C3BFE, 0x3CD83C7C3BFE, 0x3CD93C7C3BFE, 0x3CDA3C7C3BFE, 0x3CDB3C7C3BFE, 0x3CDC3C7C3BFE, 0x3CDD3C7C3BFE, + 0x3CDE3C7C3BFE, 0x3CDF3C7C3BFE, 0x3CE03C7C3BFE, 0x3CE13C7C3BFE, 0x3CE23C7C3BFE, 0x3CE33C7C3BFE, 0x3CE43C7C3BFE, 0x3CE53C7C3BFE, 0x3CE63C7C3BFE, 0x3CE73C7C3BFE, 0x3CE83C7C3BFE, 0x3CE93C7C3BFE, 0x3CEA3C7C3BFE, 0x3CEB3C7C3BFE, 0x3C7D3BFE, + 0x3CD13C7D3BFE, 0x3CD23C7D3BFE, 0x3CD33C7D3BFE, 0x3CD43C7D3BFE, 0x3CD53C7D3BFE, 0x3CD63C7D3BFE, 0x3CD73C7D3BFE, 0x3CD83C7D3BFE, 0x3CD93C7D3BFE, 0x3CDA3C7D3BFE, 0x3CDB3C7D3BFE, 0x3CDC3C7D3BFE, 0x3CDD3C7D3BFE, 0x3CDE3C7D3BFE, 0x3CDF3C7D3BFE, + 0x3CE03C7D3BFE, 0x3CE13C7D3BFE, 0x3CE23C7D3BFE, 0x3CE33C7D3BFE, 0x3CE43C7D3BFE, 0x3CE53C7D3BFE, 0x3CE63C7D3BFE, 0x3CE73C7D3BFE, 0x3CE83C7D3BFE, 0x3CE93C7D3BFE, 0x3CEA3C7D3BFE, 0x3CEB3C7D3BFE, 0x3C7E3BFE, 0x3CD13C7E3BFE, 0x3CD23C7E3BFE, + 0x3CD33C7E3BFE, 0x3CD43C7E3BFE, 0x3CD53C7E3BFE, 0x3CD63C7E3BFE, 0x3CD73C7E3BFE, 0x3CD83C7E3BFE, 0x3CD93C7E3BFE, 0x3CDA3C7E3BFE, 0x3CDB3C7E3BFE, 0x3CDC3C7E3BFE, 0x3CDD3C7E3BFE, 0x3CDE3C7E3BFE, 0x3CDF3C7E3BFE, 0x3CE03C7E3BFE, 0x3CE13C7E3BFE, + 0x3CE23C7E3BFE, 0x3CE33C7E3BFE, 0x3CE43C7E3BFE, 0x3CE53C7E3BFE, 0x3CE63C7E3BFE, 0x3CE73C7E3BFE, 0x3CE83C7E3BFE, 0x3CE93C7E3BFE, 0x3CEA3C7E3BFE, 0x3CEB3C7E3BFE, 0x3C7F3BFE, 0x3CD13C7F3BFE, 0x3CD23C7F3BFE, 0x3CD33C7F3BFE, 0x3CD43C7F3BFE, + 0x3CD53C7F3BFE, 0x3CD63C7F3BFE, 0x3CD73C7F3BFE, 0x3CD83C7F3BFE, 0x3CD93C7F3BFE, 0x3CDA3C7F3BFE, 0x3CDB3C7F3BFE, 0x3CDC3C7F3BFE, 0x3CDD3C7F3BFE, 0x3CDE3C7F3BFE, 0x3CDF3C7F3BFE, 0x3CE03C7F3BFE, 0x3CE13C7F3BFE, 0x3CE23C7F3BFE, 0x3CE33C7F3BFE, + 0x3CE43C7F3BFE, 0x3CE53C7F3BFE, 0x3CE63C7F3BFE, 0x3CE73C7F3BFE, 0x3CE83C7F3BFE, 0x3CE93C7F3BFE, 0x3CEA3C7F3BFE, 0x3CEB3C7F3BFE, 0x3C803BFE, 0x3CD13C803BFE, 0x3CD23C803BFE, 0x3CD33C803BFE, 0x3CD43C803BFE, 0x3CD53C803BFE, 0x3CD63C803BFE, + 0x3CD73C803BFE, 0x3CD83C803BFE, 0x3CD93C803BFE, 0x3CDA3C803BFE, 0x3CDB3C803BFE, 0x3CDC3C803BFE, 0x3CDD3C803BFE, 0x3CDE3C803BFE, 0x3CDF3C803BFE, 0x3CE03C803BFE, 0x3CE13C803BFE, 0x3CE23C803BFE, 0x3CE33C803BFE, 0x3CE43C803BFE, 0x3CE53C803BFE, + 0x3CE63C803BFE, 0x3CE73C803BFE, 0x3CE83C803BFE, 0x3CE93C803BFE, 0x3CEA3C803BFE, 0x3CEB3C803BFE, 0x3C813BFE, 0x3CD13C813BFE, 0x3CD23C813BFE, 0x3CD33C813BFE, 0x3CD43C813BFE, 0x3CD53C813BFE, 0x3CD63C813BFE, 0x3CD73C813BFE, 0x3CD83C813BFE, + 0x3CD93C813BFE, 0x3CDA3C813BFE, 0x3CDB3C813BFE, 0x3CDC3C813BFE, 0x3CDD3C813BFE, 0x3CDE3C813BFE, 0x3CDF3C813BFE, 0x3CE03C813BFE, 0x3CE13C813BFE, 0x3CE23C813BFE, 0x3CE33C813BFE, 0x3CE43C813BFE, 0x3CE53C813BFE, 0x3CE63C813BFE, 0x3CE73C813BFE, + 0x3CE83C813BFE, 0x3CE93C813BFE, 0x3CEA3C813BFE, 0x3CEB3C813BFE, 0x3C823BFE, 0x3CD13C823BFE, 0x3CD23C823BFE, 0x3CD33C823BFE, 0x3CD43C823BFE, 0x3CD53C823BFE, 0x3CD63C823BFE, 0x3CD73C823BFE, 0x3CD83C823BFE, 0x3CD93C823BFE, 0x3CDA3C823BFE, + 0x3CDB3C823BFE, 0x3CDC3C823BFE, 0x3CDD3C823BFE, 0x3CDE3C823BFE, 0x3CDF3C823BFE, 0x3CE03C823BFE, 0x3CE13C823BFE, 0x3CE23C823BFE, 0x3CE33C823BFE, 0x3CE43C823BFE, 0x3CE53C823BFE, 0x3CE63C823BFE, 0x3CE73C823BFE, 0x3CE83C823BFE, 0x3CE93C823BFE, + 0x3CEA3C823BFE, 0x3CEB3C823BFE, 0x3C833BFE, 0x3CD13C833BFE, 0x3CD23C833BFE, 0x3CD33C833BFE, 0x3CD43C833BFE, 0x3CD53C833BFE, 0x3CD63C833BFE, 0x3CD73C833BFE, 0x3CD83C833BFE, 0x3CD93C833BFE, 0x3CDA3C833BFE, 0x3CDB3C833BFE, 0x3CDC3C833BFE, + 0x3CDD3C833BFE, 0x3CDE3C833BFE, 0x3CDF3C833BFE, 0x3CE03C833BFE, 0x3CE13C833BFE, 0x3CE23C833BFE, 0x3CE33C833BFE, 0x3CE43C833BFE, 0x3CE53C833BFE, 0x3CE63C833BFE, 0x3CE73C833BFE, 0x3CE83C833BFE, 0x3CE93C833BFE, 0x3CEA3C833BFE, 0x3CEB3C833BFE, + 0x3C843BFE, 0x3CD13C843BFE, 0x3CD23C843BFE, 0x3CD33C843BFE, 0x3CD43C843BFE, 0x3CD53C843BFE, 0x3CD63C843BFE, 0x3CD73C843BFE, 0x3CD83C843BFE, 0x3CD93C843BFE, 0x3CDA3C843BFE, 0x3CDB3C843BFE, 0x3CDC3C843BFE, 0x3CDD3C843BFE, 0x3CDE3C843BFE, + 0x3CDF3C843BFE, 0x3CE03C843BFE, 0x3CE13C843BFE, 0x3CE23C843BFE, 0x3CE33C843BFE, 0x3CE43C843BFE, 0x3CE53C843BFE, 0x3CE63C843BFE, 0x3CE73C843BFE, 0x3CE83C843BFE, 0x3CE93C843BFE, 0x3CEA3C843BFE, 0x3CEB3C843BFE, 0x3C853BFE, 0x3CD13C853BFE, + 0x3CD23C853BFE, 0x3CD33C853BFE, 0x3CD43C853BFE, 0x3CD53C853BFE, 0x3CD63C853BFE, 0x3CD73C853BFE, 0x3CD83C853BFE, 0x3CD93C853BFE, 0x3CDA3C853BFE, 0x3CDB3C853BFE, 0x3CDC3C853BFE, 0x3CDD3C853BFE, 0x3CDE3C853BFE, 0x3CDF3C853BFE, 0x3CE03C853BFE, + 0x3CE13C853BFE, 0x3CE23C853BFE, 0x3CE33C853BFE, 0x3CE43C853BFE, 0x3CE53C853BFE, 0x3CE63C853BFE, 0x3CE73C853BFE, 0x3CE83C853BFE, 0x3CE93C853BFE, 0x3CEA3C853BFE, 0x3CEB3C853BFE, 0x3C863BFE, 0x3CD13C863BFE, 0x3CD23C863BFE, 0x3CD33C863BFE, + 0x3CD43C863BFE, 0x3CD53C863BFE, 0x3CD63C863BFE, 0x3CD73C863BFE, 0x3CD83C863BFE, 0x3CD93C863BFE, 0x3CDA3C863BFE, 0x3CDB3C863BFE, 0x3CDC3C863BFE, 0x3CDD3C863BFE, 0x3CDE3C863BFE, 0x3CDF3C863BFE, 0x3CE03C863BFE, 0x3CE13C863BFE, 0x3CE23C863BFE, + 0x3CE33C863BFE, 0x3CE43C863BFE, 0x3CE53C863BFE, 0x3CE63C863BFE, 0x3CE73C863BFE, 0x3CE83C863BFE, 0x3CE93C863BFE, 0x3CEA3C863BFE, 0x3CEB3C863BFE, 0x3C873BFE, 0x3CD13C873BFE, 0x3CD23C873BFE, 0x3CD33C873BFE, 0x3CD43C873BFE, 0x3CD53C873BFE, + 0x3CD63C873BFE, 0x3CD73C873BFE, 0x3CD83C873BFE, 0x3CD93C873BFE, 0x3CDA3C873BFE, 0x3CDB3C873BFE, 0x3CDC3C873BFE, 0x3CDD3C873BFE, 0x3CDE3C873BFE, 0x3CDF3C873BFE, 0x3CE03C873BFE, 0x3CE13C873BFE, 0x3CE23C873BFE, 0x3CE33C873BFE, 0x3CE43C873BFE, + 0x3CE53C873BFE, 0x3CE63C873BFE, 0x3CE73C873BFE, 0x3CE83C873BFE, 0x3CE93C873BFE, 0x3CEA3C873BFE, 0x3CEB3C873BFE, 0x3C733BFF, 0x3CD13C733BFF, 0x3CD23C733BFF, 0x3CD33C733BFF, 0x3CD43C733BFF, 0x3CD53C733BFF, 0x3CD63C733BFF, 0x3CD73C733BFF, + 0x3CD83C733BFF, 0x3CD93C733BFF, 0x3CDA3C733BFF, 0x3CDB3C733BFF, 0x3CDC3C733BFF, 0x3CDD3C733BFF, 0x3CDE3C733BFF, 0x3CDF3C733BFF, 0x3CE03C733BFF, 0x3CE13C733BFF, 0x3CE23C733BFF, 0x3CE33C733BFF, 0x3CE43C733BFF, 0x3CE53C733BFF, 0x3CE63C733BFF, + 0x3CE73C733BFF, 0x3CE83C733BFF, 0x3CE93C733BFF, 0x3CEA3C733BFF, 0x3CEB3C733BFF, 0x3C743BFF, 0x3CD13C743BFF, 0x3CD23C743BFF, 0x3CD33C743BFF, 0x3CD43C743BFF, 0x3CD53C743BFF, 0x3CD63C743BFF, 0x3CD73C743BFF, 0x3CD83C743BFF, 0x3CD93C743BFF, + 0x3CDA3C743BFF, 0x3CDB3C743BFF, 0x3CDC3C743BFF, 0x3CDD3C743BFF, 0x3CDE3C743BFF, 0x3CDF3C743BFF, 0x3CE03C743BFF, 0x3CE13C743BFF, 0x3CE23C743BFF, 0x3CE33C743BFF, 0x3CE43C743BFF, 0x3CE53C743BFF, 0x3CE63C743BFF, 0x3CE73C743BFF, 0x3CE83C743BFF, + 0x3CE93C743BFF, 0x3CEA3C743BFF, 0x3CEB3C743BFF, 0x3C753BFF, 0x3CD13C753BFF, 0x3CD23C753BFF, 0x3CD33C753BFF, 0x3CD43C753BFF, 0x3CD53C753BFF, 0x3CD63C753BFF, 0x3CD73C753BFF, 0x3CD83C753BFF, 0x3CD93C753BFF, 0x3CDA3C753BFF, 0x3CDB3C753BFF, + 0x3CDC3C753BFF, 0x3CDD3C753BFF, 0x3CDE3C753BFF, 0x3CDF3C753BFF, 0x3CE03C753BFF, 0x3CE13C753BFF, 0x3CE23C753BFF, 0x3CE33C753BFF, 0x3CE43C753BFF, 0x3CE53C753BFF, 0x3CE63C753BFF, 0x3CE73C753BFF, 0x3CE83C753BFF, 0x3CE93C753BFF, 0x3CEA3C753BFF, + 0x3CEB3C753BFF, 0x3C763BFF, 0x3CD13C763BFF, 0x3CD23C763BFF, 0x3CD33C763BFF, 0x3CD43C763BFF, 0x3CD53C763BFF, 0x3CD63C763BFF, 0x3CD73C763BFF, 0x3CD83C763BFF, 0x3CD93C763BFF, 0x3CDA3C763BFF, 0x3CDB3C763BFF, 0x3CDC3C763BFF, 0x3CDD3C763BFF, + 0x3CDE3C763BFF, 0x3CDF3C763BFF, 0x3CE03C763BFF, 0x3CE13C763BFF, 0x3CE23C763BFF, 0x3CE33C763BFF, 0x3CE43C763BFF, 0x3CE53C763BFF, 0x3CE63C763BFF, 0x3CE73C763BFF, 0x3CE83C763BFF, 0x3CE93C763BFF, 0x3CEA3C763BFF, 0x3CEB3C763BFF, 0x3C773BFF, + 0x3CD13C773BFF, 0x3CD23C773BFF, 0x3CD33C773BFF, 0x3CD43C773BFF, 0x3CD53C773BFF, 0x3CD63C773BFF, 0x3CD73C773BFF, 0x3CD83C773BFF, 0x3CD93C773BFF, 0x3CDA3C773BFF, 0x3CDB3C773BFF, 0x3CDC3C773BFF, 0x3CDD3C773BFF, 0x3CDE3C773BFF, 0x3CDF3C773BFF, + 0x3CE03C773BFF, 0x3CE13C773BFF, 0x3CE23C773BFF, 0x3CE33C773BFF, 0x3CE43C773BFF, 0x3CE53C773BFF, 0x3CE63C773BFF, 0x3CE73C773BFF, 0x3CE83C773BFF, 0x3CE93C773BFF, 0x3CEA3C773BFF, 0x3CEB3C773BFF, 0x3C783BFF, 0x3CD13C783BFF, 0x3CD23C783BFF, + 0x3CD33C783BFF, 0x3CD43C783BFF, 0x3CD53C783BFF, 0x3CD63C783BFF, 0x3CD73C783BFF, 0x3CD83C783BFF, 0x3CD93C783BFF, 0x3CDA3C783BFF, 0x3CDB3C783BFF, 0x3CDC3C783BFF, 0x3CDD3C783BFF, 0x3CDE3C783BFF, 0x3CDF3C783BFF, 0x3CE03C783BFF, 0x3CE13C783BFF, + 0x3CE23C783BFF, 0x3CE33C783BFF, 0x3CE43C783BFF, 0x3CE53C783BFF, 0x3CE63C783BFF, 0x3CE73C783BFF, 0x3CE83C783BFF, 0x3CE93C783BFF, 0x3CEA3C783BFF, 0x3CEB3C783BFF, 0x3C793BFF, 0x3CD13C793BFF, 0x3CD23C793BFF, 0x3CD33C793BFF, 0x3CD43C793BFF, + 0x3CD53C793BFF, 0x3CD63C793BFF, 0x3CD73C793BFF, 0x3CD83C793BFF, 0x3CD93C793BFF, 0x3CDA3C793BFF, 0x3CDB3C793BFF, 0x3CDC3C793BFF, 0x3CDD3C793BFF, 0x3CDE3C793BFF, 0x3CDF3C793BFF, 0x3CE03C793BFF, 0x3CE13C793BFF, 0x3CE23C793BFF, 0x3CE33C793BFF, + 0x3CE43C793BFF, 0x3CE53C793BFF, 0x3CE63C793BFF, 0x3CE73C793BFF, 0x3CE83C793BFF, 0x3CE93C793BFF, 0x3CEA3C793BFF, 0x3CEB3C793BFF, 0x3C7A3BFF, 0x3CD13C7A3BFF, 0x3CD23C7A3BFF, 0x3CD33C7A3BFF, 0x3CD43C7A3BFF, 0x3CD53C7A3BFF, 0x3CD63C7A3BFF, + 0x3CD73C7A3BFF, 0x3CD83C7A3BFF, 0x3CD93C7A3BFF, 0x3CDA3C7A3BFF, 0x3CDB3C7A3BFF, 0x3CDC3C7A3BFF, 0x3CDD3C7A3BFF, 0x3CDE3C7A3BFF, 0x3CDF3C7A3BFF, 0x3CE03C7A3BFF, 0x3CE13C7A3BFF, 0x3CE23C7A3BFF, 0x3CE33C7A3BFF, 0x3CE43C7A3BFF, 0x3CE53C7A3BFF, + 0x3CE63C7A3BFF, 0x3CE73C7A3BFF, 0x3CE83C7A3BFF, 0x3CE93C7A3BFF, 0x3CEA3C7A3BFF, 0x3CEB3C7A3BFF, 0x3C7B3BFF, 0x3CD13C7B3BFF, 0x3CD23C7B3BFF, 0x3CD33C7B3BFF, 0x3CD43C7B3BFF, 0x3CD53C7B3BFF, 0x3CD63C7B3BFF, 0x3CD73C7B3BFF, 0x3CD83C7B3BFF, + 0x3CD93C7B3BFF, 0x3CDA3C7B3BFF, 0x3CDB3C7B3BFF, 0x3CDC3C7B3BFF, 0x3CDD3C7B3BFF, 0x3CDE3C7B3BFF, 0x3CDF3C7B3BFF, 0x3CE03C7B3BFF, 0x3CE13C7B3BFF, 0x3CE23C7B3BFF, 0x3CE33C7B3BFF, 0x3CE43C7B3BFF, 0x3CE53C7B3BFF, 0x3CE63C7B3BFF, 0x3CE73C7B3BFF, + 0x3CE83C7B3BFF, 0x3CE93C7B3BFF, 0x3CEA3C7B3BFF, 0x3CEB3C7B3BFF, 0x3C7C3BFF, 0x3CD13C7C3BFF, 0x3CD23C7C3BFF, 0x3CD33C7C3BFF, 0x3CD43C7C3BFF, 0x3CD53C7C3BFF, 0x3CD63C7C3BFF, 0x3CD73C7C3BFF, 0x3CD83C7C3BFF, 0x3CD93C7C3BFF, 0x3CDA3C7C3BFF, + 0x3CDB3C7C3BFF, 0x3CDC3C7C3BFF, 0x3CDD3C7C3BFF, 0x3CDE3C7C3BFF, 0x3CDF3C7C3BFF, 0x3CE03C7C3BFF, 0x3CE13C7C3BFF, 0x3CE23C7C3BFF, 0x3CE33C7C3BFF, 0x3CE43C7C3BFF, 0x3CE53C7C3BFF, 0x3CE63C7C3BFF, 0x3CE73C7C3BFF, 0x3CE83C7C3BFF, 0x3CE93C7C3BFF, + 0x3CEA3C7C3BFF, 0x3CEB3C7C3BFF, 0x3C7D3BFF, 0x3CD13C7D3BFF, 0x3CD23C7D3BFF, 0x3CD33C7D3BFF, 0x3CD43C7D3BFF, 0x3CD53C7D3BFF, 0x3CD63C7D3BFF, 0x3CD73C7D3BFF, 0x3CD83C7D3BFF, 0x3CD93C7D3BFF, 0x3CDA3C7D3BFF, 0x3CDB3C7D3BFF, 0x3CDC3C7D3BFF, + 0x3CDD3C7D3BFF, 0x3CDE3C7D3BFF, 0x3CDF3C7D3BFF, 0x3CE03C7D3BFF, 0x3CE13C7D3BFF, 0x3CE23C7D3BFF, 0x3CE33C7D3BFF, 0x3CE43C7D3BFF, 0x3CE53C7D3BFF, 0x3CE63C7D3BFF, 0x3CE73C7D3BFF, 0x3CE83C7D3BFF, 0x3CE93C7D3BFF, 0x3CEA3C7D3BFF, 0x3CEB3C7D3BFF, + 0x3C7E3BFF, 0x3CD13C7E3BFF, 0x3CD23C7E3BFF, 0x3CD33C7E3BFF, 0x3CD43C7E3BFF, 0x3CD53C7E3BFF, 0x3CD63C7E3BFF, 0x3CD73C7E3BFF, 0x3CD83C7E3BFF, 0x3CD93C7E3BFF, 0x3CDA3C7E3BFF, 0x3CDB3C7E3BFF, 0x3CDC3C7E3BFF, 0x3CDD3C7E3BFF, 0x3CDE3C7E3BFF, + 0x3CDF3C7E3BFF, 0x3CE03C7E3BFF, 0x3CE13C7E3BFF, 0x3CE23C7E3BFF, 0x3CE33C7E3BFF, 0x3CE43C7E3BFF, 0x3CE53C7E3BFF, 0x3CE63C7E3BFF, 0x3CE73C7E3BFF, 0x3CE83C7E3BFF, 0x3CE93C7E3BFF, 0x3CEA3C7E3BFF, 0x3CEB3C7E3BFF, 0x3C7F3BFF, 0x3CD13C7F3BFF, + 0x3CD23C7F3BFF, 0x3CD33C7F3BFF, 0x3CD43C7F3BFF, 0x3CD53C7F3BFF, 0x3CD63C7F3BFF, 0x3CD73C7F3BFF, 0x3CD83C7F3BFF, 0x3CD93C7F3BFF, 0x3CDA3C7F3BFF, 0x3CDB3C7F3BFF, 0x3CDC3C7F3BFF, 0x3CDD3C7F3BFF, 0x3CDE3C7F3BFF, 0x3CDF3C7F3BFF, 0x3CE03C7F3BFF, + 0x3CE13C7F3BFF, 0x3CE23C7F3BFF, 0x3CE33C7F3BFF, 0x3CE43C7F3BFF, 0x3CE53C7F3BFF, 0x3CE63C7F3BFF, 0x3CE73C7F3BFF, 0x3CE83C7F3BFF, 0x3CE93C7F3BFF, 0x3CEA3C7F3BFF, 0x3CEB3C7F3BFF, 0x3C803BFF, 0x3CD13C803BFF, 0x3CD23C803BFF, 0x3CD33C803BFF, + 0x3CD43C803BFF, 0x3CD53C803BFF, 0x3CD63C803BFF, 0x3CD73C803BFF, 0x3CD83C803BFF, 0x3CD93C803BFF, 0x3CDA3C803BFF, 0x3CDB3C803BFF, 0x3CDC3C803BFF, 0x3CDD3C803BFF, 0x3CDE3C803BFF, 0x3CDF3C803BFF, 0x3CE03C803BFF, 0x3CE13C803BFF, 0x3CE23C803BFF, + 0x3CE33C803BFF, 0x3CE43C803BFF, 0x3CE53C803BFF, 0x3CE63C803BFF, 0x3CE73C803BFF, 0x3CE83C803BFF, 0x3CE93C803BFF, 0x3CEA3C803BFF, 0x3CEB3C803BFF, 0x3C813BFF, 0x3CD13C813BFF, 0x3CD23C813BFF, 0x3CD33C813BFF, 0x3CD43C813BFF, 0x3CD53C813BFF, + 0x3CD63C813BFF, 0x3CD73C813BFF, 0x3CD83C813BFF, 0x3CD93C813BFF, 0x3CDA3C813BFF, 0x3CDB3C813BFF, 0x3CDC3C813BFF, 0x3CDD3C813BFF, 0x3CDE3C813BFF, 0x3CDF3C813BFF, 0x3CE03C813BFF, 0x3CE13C813BFF, 0x3CE23C813BFF, 0x3CE33C813BFF, 0x3CE43C813BFF, + 0x3CE53C813BFF, 0x3CE63C813BFF, 0x3CE73C813BFF, 0x3CE83C813BFF, 0x3CE93C813BFF, 0x3CEA3C813BFF, 0x3CEB3C813BFF, 0x3C823BFF, 0x3CD13C823BFF, 0x3CD23C823BFF, 0x3CD33C823BFF, 0x3CD43C823BFF, 0x3CD53C823BFF, 0x3CD63C823BFF, 0x3CD73C823BFF, + 0x3CD83C823BFF, 0x3CD93C823BFF, 0x3CDA3C823BFF, 0x3CDB3C823BFF, 0x3CDC3C823BFF, 0x3CDD3C823BFF, 0x3CDE3C823BFF, 0x3CDF3C823BFF, 0x3CE03C823BFF, 0x3CE13C823BFF, 0x3CE23C823BFF, 0x3CE33C823BFF, 0x3CE43C823BFF, 0x3CE53C823BFF, 0x3CE63C823BFF, + 0x3CE73C823BFF, 0x3CE83C823BFF, 0x3CE93C823BFF, 0x3CEA3C823BFF, 0x3CEB3C823BFF, 0x3C833BFF, 0x3CD13C833BFF, 0x3CD23C833BFF, 0x3CD33C833BFF, 0x3CD43C833BFF, 0x3CD53C833BFF, 0x3CD63C833BFF, 0x3CD73C833BFF, 0x3CD83C833BFF, 0x3CD93C833BFF, + 0x3CDA3C833BFF, 0x3CDB3C833BFF, 0x3CDC3C833BFF, 0x3CDD3C833BFF, 0x3CDE3C833BFF, 0x3CDF3C833BFF, 0x3CE03C833BFF, 0x3CE13C833BFF, 0x3CE23C833BFF, 0x3CE33C833BFF, 0x3CE43C833BFF, 0x3CE53C833BFF, 0x3CE63C833BFF, 0x3CE73C833BFF, 0x3CE83C833BFF, + 0x3CE93C833BFF, 0x3CEA3C833BFF, 0x3CEB3C833BFF, 0x3C843BFF, 0x3CD13C843BFF, 0x3CD23C843BFF, 0x3CD33C843BFF, 0x3CD43C843BFF, 0x3CD53C843BFF, 0x3CD63C843BFF, 0x3CD73C843BFF, 0x3CD83C843BFF, 0x3CD93C843BFF, 0x3CDA3C843BFF, 0x3CDB3C843BFF, + 0x3CDC3C843BFF, 0x3CDD3C843BFF, 0x3CDE3C843BFF, 0x3CDF3C843BFF, 0x3CE03C843BFF, 0x3CE13C843BFF, 0x3CE23C843BFF, 0x3CE33C843BFF, 0x3CE43C843BFF, 0x3CE53C843BFF, 0x3CE63C843BFF, 0x3CE73C843BFF, 0x3CE83C843BFF, 0x3CE93C843BFF, 0x3CEA3C843BFF, + 0x3CEB3C843BFF, 0x3C853BFF, 0x3CD13C853BFF, 0x3CD23C853BFF, 0x3CD33C853BFF, 0x3CD43C853BFF, 0x3CD53C853BFF, 0x3CD63C853BFF, 0x3CD73C853BFF, 0x3CD83C853BFF, 0x3CD93C853BFF, 0x3CDA3C853BFF, 0x3CDB3C853BFF, 0x3CDC3C853BFF, 0x3CDD3C853BFF, + 0x3CDE3C853BFF, 0x3CDF3C853BFF, 0x3CE03C853BFF, 0x3CE13C853BFF, 0x3CE23C853BFF, 0x3CE33C853BFF, 0x3CE43C853BFF, 0x3CE53C853BFF, 0x3CE63C853BFF, 0x3CE73C853BFF, 0x3CE83C853BFF, 0x3CE93C853BFF, 0x3CEA3C853BFF, 0x3CEB3C853BFF, 0x3C863BFF, + 0x3CD13C863BFF, 0x3CD23C863BFF, 0x3CD33C863BFF, 0x3CD43C863BFF, 0x3CD53C863BFF, 0x3CD63C863BFF, 0x3CD73C863BFF, 0x3CD83C863BFF, 0x3CD93C863BFF, 0x3CDA3C863BFF, 0x3CDB3C863BFF, 0x3CDC3C863BFF, 0x3CDD3C863BFF, 0x3CDE3C863BFF, 0x3CDF3C863BFF, + 0x3CE03C863BFF, 0x3CE13C863BFF, 0x3CE23C863BFF, 0x3CE33C863BFF, 0x3CE43C863BFF, 0x3CE53C863BFF, 0x3CE63C863BFF, 0x3CE73C863BFF, 0x3CE83C863BFF, 0x3CE93C863BFF, 0x3CEA3C863BFF, 0x3CEB3C863BFF, 0x3C873BFF, 0x3CD13C873BFF, 0x3CD23C873BFF, + 0x3CD33C873BFF, 0x3CD43C873BFF, 0x3CD53C873BFF, 0x3CD63C873BFF, 0x3CD73C873BFF, 0x3CD83C873BFF, 0x3CD93C873BFF, 0x3CDA3C873BFF, 0x3CDB3C873BFF, 0x3CDC3C873BFF, 0x3CDD3C873BFF, 0x3CDE3C873BFF, 0x3CDF3C873BFF, 0x3CE03C873BFF, 0x3CE13C873BFF, + 0x3CE23C873BFF, 0x3CE33C873BFF, 0x3CE43C873BFF, 0x3CE53C873BFF, 0x3CE63C873BFF, 0x3CE73C873BFF, 0x3CE83C873BFF, 0x3CE93C873BFF, 0x3CEA3C873BFF, 0x3CEB3C873BFF, 0x3C733C00, 0x3CD13C733C00, 0x3CD23C733C00, 0x3CD33C733C00, 0x3CD43C733C00, + 0x3CD53C733C00, 0x3CD63C733C00, 0x3CD73C733C00, 0x3CD83C733C00, 0x3CD93C733C00, 0x3CDA3C733C00, 0x3CDB3C733C00, 0x3CDC3C733C00, 0x3CDD3C733C00, 0x3CDE3C733C00, 0x3CDF3C733C00, 0x3CE03C733C00, 0x3CE13C733C00, 0x3CE23C733C00, 0x3CE33C733C00, + 0x3CE43C733C00, 0x3CE53C733C00, 0x3CE63C733C00, 0x3CE73C733C00, 0x3CE83C733C00, 0x3CE93C733C00, 0x3CEA3C733C00, 0x3CEB3C733C00, 0x3C743C00, 0x3CD13C743C00, 0x3CD23C743C00, 0x3CD33C743C00, 0x3CD43C743C00, 0x3CD53C743C00, 0x3CD63C743C00, + 0x3CD73C743C00, 0x3CD83C743C00, 0x3CD93C743C00, 0x3CDA3C743C00, 0x3CDB3C743C00, 0x3CDC3C743C00, 0x3CDD3C743C00, 0x3CDE3C743C00, 0x3CDF3C743C00, 0x3CE03C743C00, 0x3CE13C743C00, 0x3CE23C743C00, 0x3CE33C743C00, 0x3CE43C743C00, 0x3CE53C743C00, + 0x3CE63C743C00, 0x3CE73C743C00, 0x3CE83C743C00, 0x3CE93C743C00, 0x3CEA3C743C00, 0x3CEB3C743C00, 0x3C753C00, 0x3CD13C753C00, 0x3CD23C753C00, 0x3CD33C753C00, 0x3CD43C753C00, 0x3CD53C753C00, 0x3CD63C753C00, 0x3CD73C753C00, 0x3CD83C753C00, + 0x3CD93C753C00, 0x3CDA3C753C00, 0x3CDB3C753C00, 0x3CDC3C753C00, 0x3CDD3C753C00, 0x3CDE3C753C00, 0x3CDF3C753C00, 0x3CE03C753C00, 0x3CE13C753C00, 0x3CE23C753C00, 0x3CE33C753C00, 0x3CE43C753C00, 0x3CE53C753C00, 0x3CE63C753C00, 0x3CE73C753C00, + 0x3CE83C753C00, 0x3CE93C753C00, 0x3CEA3C753C00, 0x3CEB3C753C00, 0x3C763C00, 0x3CD13C763C00, 0x3CD23C763C00, 0x3CD33C763C00, 0x3CD43C763C00, 0x3CD53C763C00, 0x3CD63C763C00, 0x3CD73C763C00, 0x3CD83C763C00, 0x3CD93C763C00, 0x3CDA3C763C00, + 0x3CDB3C763C00, 0x3CDC3C763C00, 0x3CDD3C763C00, 0x3CDE3C763C00, 0x3CDF3C763C00, 0x3CE03C763C00, 0x3CE13C763C00, 0x3CE23C763C00, 0x3CE33C763C00, 0x3CE43C763C00, 0x3CE53C763C00, 0x3CE63C763C00, 0x3CE73C763C00, 0x3CE83C763C00, 0x3CE93C763C00, + 0x3CEA3C763C00, 0x3CEB3C763C00, 0x3C773C00, 0x3CD13C773C00, 0x3CD23C773C00, 0x3CD33C773C00, 0x3CD43C773C00, 0x3CD53C773C00, 0x3CD63C773C00, 0x3CD73C773C00, 0x3CD83C773C00, 0x3CD93C773C00, 0x3CDA3C773C00, 0x3CDB3C773C00, 0x3CDC3C773C00, + 0x3CDD3C773C00, 0x3CDE3C773C00, 0x3CDF3C773C00, 0x3CE03C773C00, 0x3CE13C773C00, 0x3CE23C773C00, 0x3CE33C773C00, 0x3CE43C773C00, 0x3CE53C773C00, 0x3CE63C773C00, 0x3CE73C773C00, 0x3CE83C773C00, 0x3CE93C773C00, 0x3CEA3C773C00, 0x3CEB3C773C00, + 0x3C783C00, 0x3CD13C783C00, 0x3CD23C783C00, 0x3CD33C783C00, 0x3CD43C783C00, 0x3CD53C783C00, 0x3CD63C783C00, 0x3CD73C783C00, 0x3CD83C783C00, 0x3CD93C783C00, 0x3CDA3C783C00, 0x3CDB3C783C00, 0x3CDC3C783C00, 0x3CDD3C783C00, 0x3CDE3C783C00, + 0x3CDF3C783C00, 0x3CE03C783C00, 0x3CE13C783C00, 0x3CE23C783C00, 0x3CE33C783C00, 0x3CE43C783C00, 0x3CE53C783C00, 0x3CE63C783C00, 0x3CE73C783C00, 0x3CE83C783C00, 0x3CE93C783C00, 0x3CEA3C783C00, 0x3CEB3C783C00, 0x3C793C00, 0x3CD13C793C00, + 0x3CD23C793C00, 0x3CD33C793C00, 0x3CD43C793C00, 0x3CD53C793C00, 0x3CD63C793C00, 0x3CD73C793C00, 0x3CD83C793C00, 0x3CD93C793C00, 0x3CDA3C793C00, 0x3CDB3C793C00, 0x3CDC3C793C00, 0x3CDD3C793C00, 0x3CDE3C793C00, 0x3CDF3C793C00, 0x3CE03C793C00, + 0x3CE13C793C00, 0x3CE23C793C00, 0x3CE33C793C00, 0x3CE43C793C00, 0x3CE53C793C00, 0x3CE63C793C00, 0x3CE73C793C00, 0x3CE83C793C00, 0x3CE93C793C00, 0x3CEA3C793C00, 0x3CEB3C793C00, 0x3C7A3C00, 0x3CD13C7A3C00, 0x3CD23C7A3C00, 0x3CD33C7A3C00, + 0x3CD43C7A3C00, 0x3CD53C7A3C00, 0x3CD63C7A3C00, 0x3CD73C7A3C00, 0x3CD83C7A3C00, 0x3CD93C7A3C00, 0x3CDA3C7A3C00, 0x3CDB3C7A3C00, 0x3CDC3C7A3C00, 0x3CDD3C7A3C00, 0x3CDE3C7A3C00, 0x3CDF3C7A3C00, 0x3CE03C7A3C00, 0x3CE13C7A3C00, 0x3CE23C7A3C00, + 0x3CE33C7A3C00, 0x3CE43C7A3C00, 0x3CE53C7A3C00, 0x3CE63C7A3C00, 0x3CE73C7A3C00, 0x3CE83C7A3C00, 0x3CE93C7A3C00, 0x3CEA3C7A3C00, 0x3CEB3C7A3C00, 0x3C7B3C00, 0x3CD13C7B3C00, 0x3CD23C7B3C00, 0x3CD33C7B3C00, 0x3CD43C7B3C00, 0x3CD53C7B3C00, + 0x3CD63C7B3C00, 0x3CD73C7B3C00, 0x3CD83C7B3C00, 0x3CD93C7B3C00, 0x3CDA3C7B3C00, 0x3CDB3C7B3C00, 0x3CDC3C7B3C00, 0x3CDD3C7B3C00, 0x3CDE3C7B3C00, 0x3CDF3C7B3C00, 0x3CE03C7B3C00, 0x3CE13C7B3C00, 0x3CE23C7B3C00, 0x3CE33C7B3C00, 0x3CE43C7B3C00, + 0x3CE53C7B3C00, 0x3CE63C7B3C00, 0x3CE73C7B3C00, 0x3CE83C7B3C00, 0x3CE93C7B3C00, 0x3CEA3C7B3C00, 0x3CEB3C7B3C00, 0x3C7C3C00, 0x3CD13C7C3C00, 0x3CD23C7C3C00, 0x3CD33C7C3C00, 0x3CD43C7C3C00, 0x3CD53C7C3C00, 0x3CD63C7C3C00, 0x3CD73C7C3C00, + 0x3CD83C7C3C00, 0x3CD93C7C3C00, 0x3CDA3C7C3C00, 0x3CDB3C7C3C00, 0x3CDC3C7C3C00, 0x3CDD3C7C3C00, 0x3CDE3C7C3C00, 0x3CDF3C7C3C00, 0x3CE03C7C3C00, 0x3CE13C7C3C00, 0x3CE23C7C3C00, 0x3CE33C7C3C00, 0x3CE43C7C3C00, 0x3CE53C7C3C00, 0x3CE63C7C3C00, + 0x3CE73C7C3C00, 0x3CE83C7C3C00, 0x3CE93C7C3C00, 0x3CEA3C7C3C00, 0x3CEB3C7C3C00, 0x3C7D3C00, 0x3CD13C7D3C00, 0x3CD23C7D3C00, 0x3CD33C7D3C00, 0x3CD43C7D3C00, 0x3CD53C7D3C00, 0x3CD63C7D3C00, 0x3CD73C7D3C00, 0x3CD83C7D3C00, 0x3CD93C7D3C00, + 0x3CDA3C7D3C00, 0x3CDB3C7D3C00, 0x3CDC3C7D3C00, 0x3CDD3C7D3C00, 0x3CDE3C7D3C00, 0x3CDF3C7D3C00, 0x3CE03C7D3C00, 0x3CE13C7D3C00, 0x3CE23C7D3C00, 0x3CE33C7D3C00, 0x3CE43C7D3C00, 0x3CE53C7D3C00, 0x3CE63C7D3C00, 0x3CE73C7D3C00, 0x3CE83C7D3C00, + 0x3CE93C7D3C00, 0x3CEA3C7D3C00, 0x3CEB3C7D3C00, 0x3C7E3C00, 0x3CD13C7E3C00, 0x3CD23C7E3C00, 0x3CD33C7E3C00, 0x3CD43C7E3C00, 0x3CD53C7E3C00, 0x3CD63C7E3C00, 0x3CD73C7E3C00, 0x3CD83C7E3C00, 0x3CD93C7E3C00, 0x3CDA3C7E3C00, 0x3CDB3C7E3C00, + 0x3CDC3C7E3C00, 0x3CDD3C7E3C00, 0x3CDE3C7E3C00, 0x3CDF3C7E3C00, 0x3CE03C7E3C00, 0x3CE13C7E3C00, 0x3CE23C7E3C00, 0x3CE33C7E3C00, 0x3CE43C7E3C00, 0x3CE53C7E3C00, 0x3CE63C7E3C00, 0x3CE73C7E3C00, 0x3CE83C7E3C00, 0x3CE93C7E3C00, 0x3CEA3C7E3C00, + 0x3CEB3C7E3C00, 0x3C7F3C00, 0x3CD13C7F3C00, 0x3CD23C7F3C00, 0x3CD33C7F3C00, 0x3CD43C7F3C00, 0x3CD53C7F3C00, 0x3CD63C7F3C00, 0x3CD73C7F3C00, 0x3CD83C7F3C00, 0x3CD93C7F3C00, 0x3CDA3C7F3C00, 0x3CDB3C7F3C00, 0x3CDC3C7F3C00, 0x3CDD3C7F3C00, + 0x3CDE3C7F3C00, 0x3CDF3C7F3C00, 0x3CE03C7F3C00, 0x3CE13C7F3C00, 0x3CE23C7F3C00, 0x3CE33C7F3C00, 0x3CE43C7F3C00, 0x3CE53C7F3C00, 0x3CE63C7F3C00, 0x3CE73C7F3C00, 0x3CE83C7F3C00, 0x3CE93C7F3C00, 0x3CEA3C7F3C00, 0x3CEB3C7F3C00, 0x3C803C00, + 0x3CD13C803C00, 0x3CD23C803C00, 0x3CD33C803C00, 0x3CD43C803C00, 0x3CD53C803C00, 0x3CD63C803C00, 0x3CD73C803C00, 0x3CD83C803C00, 0x3CD93C803C00, 0x3CDA3C803C00, 0x3CDB3C803C00, 0x3CDC3C803C00, 0x3CDD3C803C00, 0x3CDE3C803C00, 0x3CDF3C803C00, + 0x3CE03C803C00, 0x3CE13C803C00, 0x3CE23C803C00, 0x3CE33C803C00, 0x3CE43C803C00, 0x3CE53C803C00, 0x3CE63C803C00, 0x3CE73C803C00, 0x3CE83C803C00, 0x3CE93C803C00, 0x3CEA3C803C00, 0x3CEB3C803C00, 0x3C813C00, 0x3CD13C813C00, 0x3CD23C813C00, + 0x3CD33C813C00, 0x3CD43C813C00, 0x3CD53C813C00, 0x3CD63C813C00, 0x3CD73C813C00, 0x3CD83C813C00, 0x3CD93C813C00, 0x3CDA3C813C00, 0x3CDB3C813C00, 0x3CDC3C813C00, 0x3CDD3C813C00, 0x3CDE3C813C00, 0x3CDF3C813C00, 0x3CE03C813C00, 0x3CE13C813C00, + 0x3CE23C813C00, 0x3CE33C813C00, 0x3CE43C813C00, 0x3CE53C813C00, 0x3CE63C813C00, 0x3CE73C813C00, 0x3CE83C813C00, 0x3CE93C813C00, 0x3CEA3C813C00, 0x3CEB3C813C00, 0x3C823C00, 0x3CD13C823C00, 0x3CD23C823C00, 0x3CD33C823C00, 0x3CD43C823C00, + 0x3CD53C823C00, 0x3CD63C823C00, 0x3CD73C823C00, 0x3CD83C823C00, 0x3CD93C823C00, 0x3CDA3C823C00, 0x3CDB3C823C00, 0x3CDC3C823C00, 0x3CDD3C823C00, 0x3CDE3C823C00, 0x3CDF3C823C00, 0x3CE03C823C00, 0x3CE13C823C00, 0x3CE23C823C00, 0x3CE33C823C00, + 0x3CE43C823C00, 0x3CE53C823C00, 0x3CE63C823C00, 0x3CE73C823C00, 0x3CE83C823C00, 0x3CE93C823C00, 0x3CEA3C823C00, 0x3CEB3C823C00, 0x3C833C00, 0x3CD13C833C00, 0x3CD23C833C00, 0x3CD33C833C00, 0x3CD43C833C00, 0x3CD53C833C00, 0x3CD63C833C00, + 0x3CD73C833C00, 0x3CD83C833C00, 0x3CD93C833C00, 0x3CDA3C833C00, 0x3CDB3C833C00, 0x3CDC3C833C00, 0x3CDD3C833C00, 0x3CDE3C833C00, 0x3CDF3C833C00, 0x3CE03C833C00, 0x3CE13C833C00, 0x3CE23C833C00, 0x3CE33C833C00, 0x3CE43C833C00, 0x3CE53C833C00, + 0x3CE63C833C00, 0x3CE73C833C00, 0x3CE83C833C00, 0x3CE93C833C00, 0x3CEA3C833C00, 0x3CEB3C833C00, 0x3C843C00, 0x3CD13C843C00, 0x3CD23C843C00, 0x3CD33C843C00, 0x3CD43C843C00, 0x3CD53C843C00, 0x3CD63C843C00, 0x3CD73C843C00, 0x3CD83C843C00, + 0x3CD93C843C00, 0x3CDA3C843C00, 0x3CDB3C843C00, 0x3CDC3C843C00, 0x3CDD3C843C00, 0x3CDE3C843C00, 0x3CDF3C843C00, 0x3CE03C843C00, 0x3CE13C843C00, 0x3CE23C843C00, 0x3CE33C843C00, 0x3CE43C843C00, 0x3CE53C843C00, 0x3CE63C843C00, 0x3CE73C843C00, + 0x3CE83C843C00, 0x3CE93C843C00, 0x3CEA3C843C00, 0x3CEB3C843C00, 0x3C853C00, 0x3CD13C853C00, 0x3CD23C853C00, 0x3CD33C853C00, 0x3CD43C853C00, 0x3CD53C853C00, 0x3CD63C853C00, 0x3CD73C853C00, 0x3CD83C853C00, 0x3CD93C853C00, 0x3CDA3C853C00, + 0x3CDB3C853C00, 0x3CDC3C853C00, 0x3CDD3C853C00, 0x3CDE3C853C00, 0x3CDF3C853C00, 0x3CE03C853C00, 0x3CE13C853C00, 0x3CE23C853C00, 0x3CE33C853C00, 0x3CE43C853C00, 0x3CE53C853C00, 0x3CE63C853C00, 0x3CE73C853C00, 0x3CE83C853C00, 0x3CE93C853C00, + 0x3CEA3C853C00, 0x3CEB3C853C00, 0x3C863C00, 0x3CD13C863C00, 0x3CD23C863C00, 0x3CD33C863C00, 0x3CD43C863C00, 0x3CD53C863C00, 0x3CD63C863C00, 0x3CD73C863C00, 0x3CD83C863C00, 0x3CD93C863C00, 0x3CDA3C863C00, 0x3CDB3C863C00, 0x3CDC3C863C00, + 0x3CDD3C863C00, 0x3CDE3C863C00, 0x3CDF3C863C00, 0x3CE03C863C00, 0x3CE13C863C00, 0x3CE23C863C00, 0x3CE33C863C00, 0x3CE43C863C00, 0x3CE53C863C00, 0x3CE63C863C00, 0x3CE73C863C00, 0x3CE83C863C00, 0x3CE93C863C00, 0x3CEA3C863C00, 0x3CEB3C863C00, + 0x3C873C00, 0x3CD13C873C00, 0x3CD23C873C00, 0x3CD33C873C00, 0x3CD43C873C00, 0x3CD53C873C00, 0x3CD63C873C00, 0x3CD73C873C00, 0x3CD83C873C00, 0x3CD93C873C00, 0x3CDA3C873C00, 0x3CDB3C873C00, 0x3CDC3C873C00, 0x3CDD3C873C00, 0x3CDE3C873C00, + 0x3CDF3C873C00, 0x3CE03C873C00, 0x3CE13C873C00, 0x3CE23C873C00, 0x3CE33C873C00, 0x3CE43C873C00, 0x3CE53C873C00, 0x3CE63C873C00, 0x3CE73C873C00, 0x3CE83C873C00, 0x3CE93C873C00, 0x3CEA3C873C00, 0x3CEB3C873C00, 0x3C733C01, 0x3CD13C733C01, + 0x3CD23C733C01, 0x3CD33C733C01, 0x3CD43C733C01, 0x3CD53C733C01, 0x3CD63C733C01, 0x3CD73C733C01, 0x3CD83C733C01, 0x3CD93C733C01, 0x3CDA3C733C01, 0x3CDB3C733C01, 0x3CDC3C733C01, 0x3CDD3C733C01, 0x3CDE3C733C01, 0x3CDF3C733C01, 0x3CE03C733C01, + 0x3CE13C733C01, 0x3CE23C733C01, 0x3CE33C733C01, 0x3CE43C733C01, 0x3CE53C733C01, 0x3CE63C733C01, 0x3CE73C733C01, 0x3CE83C733C01, 0x3CE93C733C01, 0x3CEA3C733C01, 0x3CEB3C733C01, 0x3C743C01, 0x3CD13C743C01, 0x3CD23C743C01, 0x3CD33C743C01, + 0x3CD43C743C01, 0x3CD53C743C01, 0x3CD63C743C01, 0x3CD73C743C01, 0x3CD83C743C01, 0x3CD93C743C01, 0x3CDA3C743C01, 0x3CDB3C743C01, 0x3CDC3C743C01, 0x3CDD3C743C01, 0x3CDE3C743C01, 0x3CDF3C743C01, 0x3CE03C743C01, 0x3CE13C743C01, 0x3CE23C743C01, + 0x3CE33C743C01, 0x3CE43C743C01, 0x3CE53C743C01, 0x3CE63C743C01, 0x3CE73C743C01, 0x3CE83C743C01, 0x3CE93C743C01, 0x3CEA3C743C01, 0x3CEB3C743C01, 0x3C753C01, 0x3CD13C753C01, 0x3CD23C753C01, 0x3CD33C753C01, 0x3CD43C753C01, 0x3CD53C753C01, + 0x3CD63C753C01, 0x3CD73C753C01, 0x3CD83C753C01, 0x3CD93C753C01, 0x3CDA3C753C01, 0x3CDB3C753C01, 0x3CDC3C753C01, 0x3CDD3C753C01, 0x3CDE3C753C01, 0x3CDF3C753C01, 0x3CE03C753C01, 0x3CE13C753C01, 0x3CE23C753C01, 0x3CE33C753C01, 0x3CE43C753C01, + 0x3CE53C753C01, 0x3CE63C753C01, 0x3CE73C753C01, 0x3CE83C753C01, 0x3CE93C753C01, 0x3CEA3C753C01, 0x3CEB3C753C01, 0x3C763C01, 0x3CD13C763C01, 0x3CD23C763C01, 0x3CD33C763C01, 0x3CD43C763C01, 0x3CD53C763C01, 0x3CD63C763C01, 0x3CD73C763C01, + 0x3CD83C763C01, 0x3CD93C763C01, 0x3CDA3C763C01, 0x3CDB3C763C01, 0x3CDC3C763C01, 0x3CDD3C763C01, 0x3CDE3C763C01, 0x3CDF3C763C01, 0x3CE03C763C01, 0x3CE13C763C01, 0x3CE23C763C01, 0x3CE33C763C01, 0x3CE43C763C01, 0x3CE53C763C01, 0x3CE63C763C01, + 0x3CE73C763C01, 0x3CE83C763C01, 0x3CE93C763C01, 0x3CEA3C763C01, 0x3CEB3C763C01, 0x3C773C01, 0x3CD13C773C01, 0x3CD23C773C01, 0x3CD33C773C01, 0x3CD43C773C01, 0x3CD53C773C01, 0x3CD63C773C01, 0x3CD73C773C01, 0x3CD83C773C01, 0x3CD93C773C01, + 0x3CDA3C773C01, 0x3CDB3C773C01, 0x3CDC3C773C01, 0x3CDD3C773C01, 0x3CDE3C773C01, 0x3CDF3C773C01, 0x3CE03C773C01, 0x3CE13C773C01, 0x3CE23C773C01, 0x3CE33C773C01, 0x3CE43C773C01, 0x3CE53C773C01, 0x3CE63C773C01, 0x3CE73C773C01, 0x3CE83C773C01, + 0x3CE93C773C01, 0x3CEA3C773C01, 0x3CEB3C773C01, 0x3C783C01, 0x3CD13C783C01, 0x3CD23C783C01, 0x3CD33C783C01, 0x3CD43C783C01, 0x3CD53C783C01, 0x3CD63C783C01, 0x3CD73C783C01, 0x3CD83C783C01, 0x3CD93C783C01, 0x3CDA3C783C01, 0x3CDB3C783C01, + 0x3CDC3C783C01, 0x3CDD3C783C01, 0x3CDE3C783C01, 0x3CDF3C783C01, 0x3CE03C783C01, 0x3CE13C783C01, 0x3CE23C783C01, 0x3CE33C783C01, 0x3CE43C783C01, 0x3CE53C783C01, 0x3CE63C783C01, 0x3CE73C783C01, 0x3CE83C783C01, 0x3CE93C783C01, 0x3CEA3C783C01, + 0x3CEB3C783C01, 0x3C793C01, 0x3CD13C793C01, 0x3CD23C793C01, 0x3CD33C793C01, 0x3CD43C793C01, 0x3CD53C793C01, 0x3CD63C793C01, 0x3CD73C793C01, 0x3CD83C793C01, 0x3CD93C793C01, 0x3CDA3C793C01, 0x3CDB3C793C01, 0x3CDC3C793C01, 0x3CDD3C793C01, + 0x3CDE3C793C01, 0x3CDF3C793C01, 0x3CE03C793C01, 0x3CE13C793C01, 0x3CE23C793C01, 0x3CE33C793C01, 0x3CE43C793C01, 0x3CE53C793C01, 0x3CE63C793C01, 0x3CE73C793C01, 0x3CE83C793C01, 0x3CE93C793C01, 0x3CEA3C793C01, 0x3CEB3C793C01, 0x3C7A3C01, + 0x3CD13C7A3C01, 0x3CD23C7A3C01, 0x3CD33C7A3C01, 0x3CD43C7A3C01, 0x3CD53C7A3C01, 0x3CD63C7A3C01, 0x3CD73C7A3C01, 0x3CD83C7A3C01, 0x3CD93C7A3C01, 0x3CDA3C7A3C01, 0x3CDB3C7A3C01, 0x3CDC3C7A3C01, 0x3CDD3C7A3C01, 0x3CDE3C7A3C01, 0x3CDF3C7A3C01, + 0x3CE03C7A3C01, 0x3CE13C7A3C01, 0x3CE23C7A3C01, 0x3CE33C7A3C01, 0x3CE43C7A3C01, 0x3CE53C7A3C01, 0x3CE63C7A3C01, 0x3CE73C7A3C01, 0x3CE83C7A3C01, 0x3CE93C7A3C01, 0x3CEA3C7A3C01, 0x3CEB3C7A3C01, 0x3C7B3C01, 0x3CD13C7B3C01, 0x3CD23C7B3C01, + 0x3CD33C7B3C01, 0x3CD43C7B3C01, 0x3CD53C7B3C01, 0x3CD63C7B3C01, 0x3CD73C7B3C01, 0x3CD83C7B3C01, 0x3CD93C7B3C01, 0x3CDA3C7B3C01, 0x3CDB3C7B3C01, 0x3CDC3C7B3C01, 0x3CDD3C7B3C01, 0x3CDE3C7B3C01, 0x3CDF3C7B3C01, 0x3CE03C7B3C01, 0x3CE13C7B3C01, + 0x3CE23C7B3C01, 0x3CE33C7B3C01, 0x3CE43C7B3C01, 0x3CE53C7B3C01, 0x3CE63C7B3C01, 0x3CE73C7B3C01, 0x3CE83C7B3C01, 0x3CE93C7B3C01, 0x3CEA3C7B3C01, 0x3CEB3C7B3C01, 0x3C7C3C01, 0x3CD13C7C3C01, 0x3CD23C7C3C01, 0x3CD33C7C3C01, 0x3CD43C7C3C01, + 0x3CD53C7C3C01, 0x3CD63C7C3C01, 0x3CD73C7C3C01, 0x3CD83C7C3C01, 0x3CD93C7C3C01, 0x3CDA3C7C3C01, 0x3CDB3C7C3C01, 0x3CDC3C7C3C01, 0x3CDD3C7C3C01, 0x3CDE3C7C3C01, 0x3CDF3C7C3C01, 0x3CE03C7C3C01, 0x3CE13C7C3C01, 0x3CE23C7C3C01, 0x3CE33C7C3C01, + 0x3CE43C7C3C01, 0x3CE53C7C3C01, 0x3CE63C7C3C01, 0x3CE73C7C3C01, 0x3CE83C7C3C01, 0x3CE93C7C3C01, 0x3CEA3C7C3C01, 0x3CEB3C7C3C01, 0x3C7D3C01, 0x3CD13C7D3C01, 0x3CD23C7D3C01, 0x3CD33C7D3C01, 0x3CD43C7D3C01, 0x3CD53C7D3C01, 0x3CD63C7D3C01, + 0x3CD73C7D3C01, 0x3CD83C7D3C01, 0x3CD93C7D3C01, 0x3CDA3C7D3C01, 0x3CDB3C7D3C01, 0x3CDC3C7D3C01, 0x3CDD3C7D3C01, 0x3CDE3C7D3C01, 0x3CDF3C7D3C01, 0x3CE03C7D3C01, 0x3CE13C7D3C01, 0x3CE23C7D3C01, 0x3CE33C7D3C01, 0x3CE43C7D3C01, 0x3CE53C7D3C01, + 0x3CE63C7D3C01, 0x3CE73C7D3C01, 0x3CE83C7D3C01, 0x3CE93C7D3C01, 0x3CEA3C7D3C01, 0x3CEB3C7D3C01, 0x3C7E3C01, 0x3CD13C7E3C01, 0x3CD23C7E3C01, 0x3CD33C7E3C01, 0x3CD43C7E3C01, 0x3CD53C7E3C01, 0x3CD63C7E3C01, 0x3CD73C7E3C01, 0x3CD83C7E3C01, + 0x3CD93C7E3C01, 0x3CDA3C7E3C01, 0x3CDB3C7E3C01, 0x3CDC3C7E3C01, 0x3CDD3C7E3C01, 0x3CDE3C7E3C01, 0x3CDF3C7E3C01, 0x3CE03C7E3C01, 0x3CE13C7E3C01, 0x3CE23C7E3C01, 0x3CE33C7E3C01, 0x3CE43C7E3C01, 0x3CE53C7E3C01, 0x3CE63C7E3C01, 0x3CE73C7E3C01, + 0x3CE83C7E3C01, 0x3CE93C7E3C01, 0x3CEA3C7E3C01, 0x3CEB3C7E3C01, 0x3C7F3C01, 0x3CD13C7F3C01, 0x3CD23C7F3C01, 0x3CD33C7F3C01, 0x3CD43C7F3C01, 0x3CD53C7F3C01, 0x3CD63C7F3C01, 0x3CD73C7F3C01, 0x3CD83C7F3C01, 0x3CD93C7F3C01, 0x3CDA3C7F3C01, + 0x3CDB3C7F3C01, 0x3CDC3C7F3C01, 0x3CDD3C7F3C01, 0x3CDE3C7F3C01, 0x3CDF3C7F3C01, 0x3CE03C7F3C01, 0x3CE13C7F3C01, 0x3CE23C7F3C01, 0x3CE33C7F3C01, 0x3CE43C7F3C01, 0x3CE53C7F3C01, 0x3CE63C7F3C01, 0x3CE73C7F3C01, 0x3CE83C7F3C01, 0x3CE93C7F3C01, + 0x3CEA3C7F3C01, 0x3CEB3C7F3C01, 0x3C803C01, 0x3CD13C803C01, 0x3CD23C803C01, 0x3CD33C803C01, 0x3CD43C803C01, 0x3CD53C803C01, 0x3CD63C803C01, 0x3CD73C803C01, 0x3CD83C803C01, 0x3CD93C803C01, 0x3CDA3C803C01, 0x3CDB3C803C01, 0x3CDC3C803C01, + 0x3CDD3C803C01, 0x3CDE3C803C01, 0x3CDF3C803C01, 0x3CE03C803C01, 0x3CE13C803C01, 0x3CE23C803C01, 0x3CE33C803C01, 0x3CE43C803C01, 0x3CE53C803C01, 0x3CE63C803C01, 0x3CE73C803C01, 0x3CE83C803C01, 0x3CE93C803C01, 0x3CEA3C803C01, 0x3CEB3C803C01, + 0x3C813C01, 0x3CD13C813C01, 0x3CD23C813C01, 0x3CD33C813C01, 0x3CD43C813C01, 0x3CD53C813C01, 0x3CD63C813C01, 0x3CD73C813C01, 0x3CD83C813C01, 0x3CD93C813C01, 0x3CDA3C813C01, 0x3CDB3C813C01, 0x3CDC3C813C01, 0x3CDD3C813C01, 0x3CDE3C813C01, + 0x3CDF3C813C01, 0x3CE03C813C01, 0x3CE13C813C01, 0x3CE23C813C01, 0x3CE33C813C01, 0x3CE43C813C01, 0x3CE53C813C01, 0x3CE63C813C01, 0x3CE73C813C01, 0x3CE83C813C01, 0x3CE93C813C01, 0x3CEA3C813C01, 0x3CEB3C813C01, 0x3C823C01, 0x3CD13C823C01, + 0x3CD23C823C01, 0x3CD33C823C01, 0x3CD43C823C01, 0x3CD53C823C01, 0x3CD63C823C01, 0x3CD73C823C01, 0x3CD83C823C01, 0x3CD93C823C01, 0x3CDA3C823C01, 0x3CDB3C823C01, 0x3CDC3C823C01, 0x3CDD3C823C01, 0x3CDE3C823C01, 0x3CDF3C823C01, 0x3CE03C823C01, + 0x3CE13C823C01, 0x3CE23C823C01, 0x3CE33C823C01, 0x3CE43C823C01, 0x3CE53C823C01, 0x3CE63C823C01, 0x3CE73C823C01, 0x3CE83C823C01, 0x3CE93C823C01, 0x3CEA3C823C01, 0x3CEB3C823C01, 0x3C833C01, 0x3CD13C833C01, 0x3CD23C833C01, 0x3CD33C833C01, + 0x3CD43C833C01, 0x3CD53C833C01, 0x3CD63C833C01, 0x3CD73C833C01, 0x3CD83C833C01, 0x3CD93C833C01, 0x3CDA3C833C01, 0x3CDB3C833C01, 0x3CDC3C833C01, 0x3CDD3C833C01, 0x3CDE3C833C01, 0x3CDF3C833C01, 0x3CE03C833C01, 0x3CE13C833C01, 0x3CE23C833C01, + 0x3CE33C833C01, 0x3CE43C833C01, 0x3CE53C833C01, 0x3CE63C833C01, 0x3CE73C833C01, 0x3CE83C833C01, 0x3CE93C833C01, 0x3CEA3C833C01, 0x3CEB3C833C01, 0x3C843C01, 0x3CD13C843C01, 0x3CD23C843C01, 0x3CD33C843C01, 0x3CD43C843C01, 0x3CD53C843C01, + 0x3CD63C843C01, 0x3CD73C843C01, 0x3CD83C843C01, 0x3CD93C843C01, 0x3CDA3C843C01, 0x3CDB3C843C01, 0x3CDC3C843C01, 0x3CDD3C843C01, 0x3CDE3C843C01, 0x3CDF3C843C01, 0x3CE03C843C01, 0x3CE13C843C01, 0x3CE23C843C01, 0x3CE33C843C01, 0x3CE43C843C01, + 0x3CE53C843C01, 0x3CE63C843C01, 0x3CE73C843C01, 0x3CE83C843C01, 0x3CE93C843C01, 0x3CEA3C843C01, 0x3CEB3C843C01, 0x3C853C01, 0x3CD13C853C01, 0x3CD23C853C01, 0x3CD33C853C01, 0x3CD43C853C01, 0x3CD53C853C01, 0x3CD63C853C01, 0x3CD73C853C01, + 0x3CD83C853C01, 0x3CD93C853C01, 0x3CDA3C853C01, 0x3CDB3C853C01, 0x3CDC3C853C01, 0x3CDD3C853C01, 0x3CDE3C853C01, 0x3CDF3C853C01, 0x3CE03C853C01, 0x3CE13C853C01, 0x3CE23C853C01, 0x3CE33C853C01, 0x3CE43C853C01, 0x3CE53C853C01, 0x3CE63C853C01, + 0x3CE73C853C01, 0x3CE83C853C01, 0x3CE93C853C01, 0x3CEA3C853C01, 0x3CEB3C853C01, 0x3C863C01, 0x3CD13C863C01, 0x3CD23C863C01, 0x3CD33C863C01, 0x3CD43C863C01, 0x3CD53C863C01, 0x3CD63C863C01, 0x3CD73C863C01, 0x3CD83C863C01, 0x3CD93C863C01, + 0x3CDA3C863C01, 0x3CDB3C863C01, 0x3CDC3C863C01, 0x3CDD3C863C01, 0x3CDE3C863C01, 0x3CDF3C863C01, 0x3CE03C863C01, 0x3CE13C863C01, 0x3CE23C863C01, 0x3CE33C863C01, 0x3CE43C863C01, 0x3CE53C863C01, 0x3CE63C863C01, 0x3CE73C863C01, 0x3CE83C863C01, + 0x3CE93C863C01, 0x3CEA3C863C01, 0x3CEB3C863C01, 0x3C873C01, 0x3CD13C873C01, 0x3CD23C873C01, 0x3CD33C873C01, 0x3CD43C873C01, 0x3CD53C873C01, 0x3CD63C873C01, 0x3CD73C873C01, 0x3CD83C873C01, 0x3CD93C873C01, 0x3CDA3C873C01, 0x3CDB3C873C01, + 0x3CDC3C873C01, 0x3CDD3C873C01, 0x3CDE3C873C01, 0x3CDF3C873C01, 0x3CE03C873C01, 0x3CE13C873C01, 0x3CE23C873C01, 0x3CE33C873C01, 0x3CE43C873C01, 0x3CE53C873C01, 0x3CE63C873C01, 0x3CE73C873C01, 0x3CE83C873C01, 0x3CE93C873C01, 0x3CEA3C873C01, + 0x3CEB3C873C01, 0x3C733C02, 0x3CD13C733C02, 0x3CD23C733C02, 0x3CD33C733C02, 0x3CD43C733C02, 0x3CD53C733C02, 0x3CD63C733C02, 0x3CD73C733C02, 0x3CD83C733C02, 0x3CD93C733C02, 0x3CDA3C733C02, 0x3CDB3C733C02, 0x3CDC3C733C02, 0x3CDD3C733C02, + 0x3CDE3C733C02, 0x3CDF3C733C02, 0x3CE03C733C02, 0x3CE13C733C02, 0x3CE23C733C02, 0x3CE33C733C02, 0x3CE43C733C02, 0x3CE53C733C02, 0x3CE63C733C02, 0x3CE73C733C02, 0x3CE83C733C02, 0x3CE93C733C02, 0x3CEA3C733C02, 0x3CEB3C733C02, 0x3C743C02, + 0x3CD13C743C02, 0x3CD23C743C02, 0x3CD33C743C02, 0x3CD43C743C02, 0x3CD53C743C02, 0x3CD63C743C02, 0x3CD73C743C02, 0x3CD83C743C02, 0x3CD93C743C02, 0x3CDA3C743C02, 0x3CDB3C743C02, 0x3CDC3C743C02, 0x3CDD3C743C02, 0x3CDE3C743C02, 0x3CDF3C743C02, + 0x3CE03C743C02, 0x3CE13C743C02, 0x3CE23C743C02, 0x3CE33C743C02, 0x3CE43C743C02, 0x3CE53C743C02, 0x3CE63C743C02, 0x3CE73C743C02, 0x3CE83C743C02, 0x3CE93C743C02, 0x3CEA3C743C02, 0x3CEB3C743C02, 0x3C753C02, 0x3CD13C753C02, 0x3CD23C753C02, + 0x3CD33C753C02, 0x3CD43C753C02, 0x3CD53C753C02, 0x3CD63C753C02, 0x3CD73C753C02, 0x3CD83C753C02, 0x3CD93C753C02, 0x3CDA3C753C02, 0x3CDB3C753C02, 0x3CDC3C753C02, 0x3CDD3C753C02, 0x3CDE3C753C02, 0x3CDF3C753C02, 0x3CE03C753C02, 0x3CE13C753C02, + 0x3CE23C753C02, 0x3CE33C753C02, 0x3CE43C753C02, 0x3CE53C753C02, 0x3CE63C753C02, 0x3CE73C753C02, 0x3CE83C753C02, 0x3CE93C753C02, 0x3CEA3C753C02, 0x3CEB3C753C02, 0x3C763C02, 0x3CD13C763C02, 0x3CD23C763C02, 0x3CD33C763C02, 0x3CD43C763C02, + 0x3CD53C763C02, 0x3CD63C763C02, 0x3CD73C763C02, 0x3CD83C763C02, 0x3CD93C763C02, 0x3CDA3C763C02, 0x3CDB3C763C02, 0x3CDC3C763C02, 0x3CDD3C763C02, 0x3CDE3C763C02, 0x3CDF3C763C02, 0x3CE03C763C02, 0x3CE13C763C02, 0x3CE23C763C02, 0x3CE33C763C02, + 0x3CE43C763C02, 0x3CE53C763C02, 0x3CE63C763C02, 0x3CE73C763C02, 0x3CE83C763C02, 0x3CE93C763C02, 0x3CEA3C763C02, 0x3CEB3C763C02, 0x3C773C02, 0x3CD13C773C02, 0x3CD23C773C02, 0x3CD33C773C02, 0x3CD43C773C02, 0x3CD53C773C02, 0x3CD63C773C02, + 0x3CD73C773C02, 0x3CD83C773C02, 0x3CD93C773C02, 0x3CDA3C773C02, 0x3CDB3C773C02, 0x3CDC3C773C02, 0x3CDD3C773C02, 0x3CDE3C773C02, 0x3CDF3C773C02, 0x3CE03C773C02, 0x3CE13C773C02, 0x3CE23C773C02, 0x3CE33C773C02, 0x3CE43C773C02, 0x3CE53C773C02, + 0x3CE63C773C02, 0x3CE73C773C02, 0x3CE83C773C02, 0x3CE93C773C02, 0x3CEA3C773C02, 0x3CEB3C773C02, 0x3C783C02, 0x3CD13C783C02, 0x3CD23C783C02, 0x3CD33C783C02, 0x3CD43C783C02, 0x3CD53C783C02, 0x3CD63C783C02, 0x3CD73C783C02, 0x3CD83C783C02, + 0x3CD93C783C02, 0x3CDA3C783C02, 0x3CDB3C783C02, 0x3CDC3C783C02, 0x3CDD3C783C02, 0x3CDE3C783C02, 0x3CDF3C783C02, 0x3CE03C783C02, 0x3CE13C783C02, 0x3CE23C783C02, 0x3CE33C783C02, 0x3CE43C783C02, 0x3CE53C783C02, 0x3CE63C783C02, 0x3CE73C783C02, + 0x3CE83C783C02, 0x3CE93C783C02, 0x3CEA3C783C02, 0x3CEB3C783C02, 0x3C793C02, 0x3CD13C793C02, 0x3CD23C793C02, 0x3CD33C793C02, 0x3CD43C793C02, 0x3CD53C793C02, 0x3CD63C793C02, 0x3CD73C793C02, 0x3CD83C793C02, 0x3CD93C793C02, 0x3CDA3C793C02, + 0x3CDB3C793C02, 0x3CDC3C793C02, 0x3CDD3C793C02, 0x3CDE3C793C02, 0x3CDF3C793C02, 0x3CE03C793C02, 0x3CE13C793C02, 0x3CE23C793C02, 0x3CE33C793C02, 0x3CE43C793C02, 0x3CE53C793C02, 0x3CE63C793C02, 0x3CE73C793C02, 0x3CE83C793C02, 0x3CE93C793C02, + 0x3CEA3C793C02, 0x3CEB3C793C02, 0x3C7A3C02, 0x3CD13C7A3C02, 0x3CD23C7A3C02, 0x3CD33C7A3C02, 0x3CD43C7A3C02, 0x3CD53C7A3C02, 0x3CD63C7A3C02, 0x3CD73C7A3C02, 0x3CD83C7A3C02, 0x3CD93C7A3C02, 0x3CDA3C7A3C02, 0x3CDB3C7A3C02, 0x3CDC3C7A3C02, + 0x3CDD3C7A3C02, 0x3CDE3C7A3C02, 0x3CDF3C7A3C02, 0x3CE03C7A3C02, 0x3CE13C7A3C02, 0x3CE23C7A3C02, 0x3CE33C7A3C02, 0x3CE43C7A3C02, 0x3CE53C7A3C02, 0x3CE63C7A3C02, 0x3CE73C7A3C02, 0x3CE83C7A3C02, 0x3CE93C7A3C02, 0x3CEA3C7A3C02, 0x3CEB3C7A3C02, + 0x3C7B3C02, 0x3CD13C7B3C02, 0x3CD23C7B3C02, 0x3CD33C7B3C02, 0x3CD43C7B3C02, 0x3CD53C7B3C02, 0x3CD63C7B3C02, 0x3CD73C7B3C02, 0x3CD83C7B3C02, 0x3CD93C7B3C02, 0x3CDA3C7B3C02, 0x3CDB3C7B3C02, 0x3CDC3C7B3C02, 0x3CDD3C7B3C02, 0x3CDE3C7B3C02, + 0x3CDF3C7B3C02, 0x3CE03C7B3C02, 0x3CE13C7B3C02, 0x3CE23C7B3C02, 0x3CE33C7B3C02, 0x3CE43C7B3C02, 0x3CE53C7B3C02, 0x3CE63C7B3C02, 0x3CE73C7B3C02, 0x3CE83C7B3C02, 0x3CE93C7B3C02, 0x3CEA3C7B3C02, 0x3CEB3C7B3C02, 0x3C7C3C02, 0x3CD13C7C3C02, + 0x3CD23C7C3C02, 0x3CD33C7C3C02, 0x3CD43C7C3C02, 0x3CD53C7C3C02, 0x3CD63C7C3C02, 0x3CD73C7C3C02, 0x3CD83C7C3C02, 0x3CD93C7C3C02, 0x3CDA3C7C3C02, 0x3CDB3C7C3C02, 0x3CDC3C7C3C02, 0x3CDD3C7C3C02, 0x3CDE3C7C3C02, 0x3CDF3C7C3C02, 0x3CE03C7C3C02, + 0x3CE13C7C3C02, 0x3CE23C7C3C02, 0x3CE33C7C3C02, 0x3CE43C7C3C02, 0x3CE53C7C3C02, 0x3CE63C7C3C02, 0x3CE73C7C3C02, 0x3CE83C7C3C02, 0x3CE93C7C3C02, 0x3CEA3C7C3C02, 0x3CEB3C7C3C02, 0x3C7D3C02, 0x3CD13C7D3C02, 0x3CD23C7D3C02, 0x3CD33C7D3C02, + 0x3CD43C7D3C02, 0x3CD53C7D3C02, 0x3CD63C7D3C02, 0x3CD73C7D3C02, 0x3CD83C7D3C02, 0x3CD93C7D3C02, 0x3CDA3C7D3C02, 0x3CDB3C7D3C02, 0x3CDC3C7D3C02, 0x3CDD3C7D3C02, 0x3CDE3C7D3C02, 0x3CDF3C7D3C02, 0x3CE03C7D3C02, 0x3CE13C7D3C02, 0x3CE23C7D3C02, + 0x3CE33C7D3C02, 0x3CE43C7D3C02, 0x3CE53C7D3C02, 0x3CE63C7D3C02, 0x3CE73C7D3C02, 0x3CE83C7D3C02, 0x3CE93C7D3C02, 0x3CEA3C7D3C02, 0x3CEB3C7D3C02, 0x3C7E3C02, 0x3CD13C7E3C02, 0x3CD23C7E3C02, 0x3CD33C7E3C02, 0x3CD43C7E3C02, 0x3CD53C7E3C02, + 0x3CD63C7E3C02, 0x3CD73C7E3C02, 0x3CD83C7E3C02, 0x3CD93C7E3C02, 0x3CDA3C7E3C02, 0x3CDB3C7E3C02, 0x3CDC3C7E3C02, 0x3CDD3C7E3C02, 0x3CDE3C7E3C02, 0x3CDF3C7E3C02, 0x3CE03C7E3C02, 0x3CE13C7E3C02, 0x3CE23C7E3C02, 0x3CE33C7E3C02, 0x3CE43C7E3C02, + 0x3CE53C7E3C02, 0x3CE63C7E3C02, 0x3CE73C7E3C02, 0x3CE83C7E3C02, 0x3CE93C7E3C02, 0x3CEA3C7E3C02, 0x3CEB3C7E3C02, 0x3C7F3C02, 0x3CD13C7F3C02, 0x3CD23C7F3C02, 0x3CD33C7F3C02, 0x3CD43C7F3C02, 0x3CD53C7F3C02, 0x3CD63C7F3C02, 0x3CD73C7F3C02, + 0x3CD83C7F3C02, 0x3CD93C7F3C02, 0x3CDA3C7F3C02, 0x3CDB3C7F3C02, 0x3CDC3C7F3C02, 0x3CDD3C7F3C02, 0x3CDE3C7F3C02, 0x3CDF3C7F3C02, 0x3CE03C7F3C02, 0x3CE13C7F3C02, 0x3CE23C7F3C02, 0x3CE33C7F3C02, 0x3CE43C7F3C02, 0x3CE53C7F3C02, 0x3CE63C7F3C02, + 0x3CE73C7F3C02, 0x3CE83C7F3C02, 0x3CE93C7F3C02, 0x3CEA3C7F3C02, 0x3CEB3C7F3C02, 0x3C803C02, 0x3CD13C803C02, 0x3CD23C803C02, 0x3CD33C803C02, 0x3CD43C803C02, 0x3CD53C803C02, 0x3CD63C803C02, 0x3CD73C803C02, 0x3CD83C803C02, 0x3CD93C803C02, + 0x3CDA3C803C02, 0x3CDB3C803C02, 0x3CDC3C803C02, 0x3CDD3C803C02, 0x3CDE3C803C02, 0x3CDF3C803C02, 0x3CE03C803C02, 0x3CE13C803C02, 0x3CE23C803C02, 0x3CE33C803C02, 0x3CE43C803C02, 0x3CE53C803C02, 0x3CE63C803C02, 0x3CE73C803C02, 0x3CE83C803C02, + 0x3CE93C803C02, 0x3CEA3C803C02, 0x3CEB3C803C02, 0x3C813C02, 0x3CD13C813C02, 0x3CD23C813C02, 0x3CD33C813C02, 0x3CD43C813C02, 0x3CD53C813C02, 0x3CD63C813C02, 0x3CD73C813C02, 0x3CD83C813C02, 0x3CD93C813C02, 0x3CDA3C813C02, 0x3CDB3C813C02, + 0x3CDC3C813C02, 0x3CDD3C813C02, 0x3CDE3C813C02, 0x3CDF3C813C02, 0x3CE03C813C02, 0x3CE13C813C02, 0x3CE23C813C02, 0x3CE33C813C02, 0x3CE43C813C02, 0x3CE53C813C02, 0x3CE63C813C02, 0x3CE73C813C02, 0x3CE83C813C02, 0x3CE93C813C02, 0x3CEA3C813C02, + 0x3CEB3C813C02, 0x3C823C02, 0x3CD13C823C02, 0x3CD23C823C02, 0x3CD33C823C02, 0x3CD43C823C02, 0x3CD53C823C02, 0x3CD63C823C02, 0x3CD73C823C02, 0x3CD83C823C02, 0x3CD93C823C02, 0x3CDA3C823C02, 0x3CDB3C823C02, 0x3CDC3C823C02, 0x3CDD3C823C02, + 0x3CDE3C823C02, 0x3CDF3C823C02, 0x3CE03C823C02, 0x3CE13C823C02, 0x3CE23C823C02, 0x3CE33C823C02, 0x3CE43C823C02, 0x3CE53C823C02, 0x3CE63C823C02, 0x3CE73C823C02, 0x3CE83C823C02, 0x3CE93C823C02, 0x3CEA3C823C02, 0x3CEB3C823C02, 0x3C833C02, + 0x3CD13C833C02, 0x3CD23C833C02, 0x3CD33C833C02, 0x3CD43C833C02, 0x3CD53C833C02, 0x3CD63C833C02, 0x3CD73C833C02, 0x3CD83C833C02, 0x3CD93C833C02, 0x3CDA3C833C02, 0x3CDB3C833C02, 0x3CDC3C833C02, 0x3CDD3C833C02, 0x3CDE3C833C02, 0x3CDF3C833C02, + 0x3CE03C833C02, 0x3CE13C833C02, 0x3CE23C833C02, 0x3CE33C833C02, 0x3CE43C833C02, 0x3CE53C833C02, 0x3CE63C833C02, 0x3CE73C833C02, 0x3CE83C833C02, 0x3CE93C833C02, 0x3CEA3C833C02, 0x3CEB3C833C02, 0x3C843C02, 0x3CD13C843C02, 0x3CD23C843C02, + 0x3CD33C843C02, 0x3CD43C843C02, 0x3CD53C843C02, 0x3CD63C843C02, 0x3CD73C843C02, 0x3CD83C843C02, 0x3CD93C843C02, 0x3CDA3C843C02, 0x3CDB3C843C02, 0x3CDC3C843C02, 0x3CDD3C843C02, 0x3CDE3C843C02, 0x3CDF3C843C02, 0x3CE03C843C02, 0x3CE13C843C02, + 0x3CE23C843C02, 0x3CE33C843C02, 0x3CE43C843C02, 0x3CE53C843C02, 0x3CE63C843C02, 0x3CE73C843C02, 0x3CE83C843C02, 0x3CE93C843C02, 0x3CEA3C843C02, 0x3CEB3C843C02, 0x3C853C02, 0x3CD13C853C02, 0x3CD23C853C02, 0x3CD33C853C02, 0x3CD43C853C02, + 0x3CD53C853C02, 0x3CD63C853C02, 0x3CD73C853C02, 0x3CD83C853C02, 0x3CD93C853C02, 0x3CDA3C853C02, 0x3CDB3C853C02, 0x3CDC3C853C02, 0x3CDD3C853C02, 0x3CDE3C853C02, 0x3CDF3C853C02, 0x3CE03C853C02, 0x3CE13C853C02, 0x3CE23C853C02, 0x3CE33C853C02, + 0x3CE43C853C02, 0x3CE53C853C02, 0x3CE63C853C02, 0x3CE73C853C02, 0x3CE83C853C02, 0x3CE93C853C02, 0x3CEA3C853C02, 0x3CEB3C853C02, 0x3C863C02, 0x3CD13C863C02, 0x3CD23C863C02, 0x3CD33C863C02, 0x3CD43C863C02, 0x3CD53C863C02, 0x3CD63C863C02, + 0x3CD73C863C02, 0x3CD83C863C02, 0x3CD93C863C02, 0x3CDA3C863C02, 0x3CDB3C863C02, 0x3CDC3C863C02, 0x3CDD3C863C02, 0x3CDE3C863C02, 0x3CDF3C863C02, 0x3CE03C863C02, 0x3CE13C863C02, 0x3CE23C863C02, 0x3CE33C863C02, 0x3CE43C863C02, 0x3CE53C863C02, + 0x3CE63C863C02, 0x3CE73C863C02, 0x3CE83C863C02, 0x3CE93C863C02, 0x3CEA3C863C02, 0x3CEB3C863C02, 0x3C873C02, 0x3CD13C873C02, 0x3CD23C873C02, 0x3CD33C873C02, 0x3CD43C873C02, 0x3CD53C873C02, 0x3CD63C873C02, 0x3CD73C873C02, 0x3CD83C873C02, + 0x3CD93C873C02, 0x3CDA3C873C02, 0x3CDB3C873C02, 0x3CDC3C873C02, 0x3CDD3C873C02, 0x3CDE3C873C02, 0x3CDF3C873C02, 0x3CE03C873C02, 0x3CE13C873C02, 0x3CE23C873C02, 0x3CE33C873C02, 0x3CE43C873C02, 0x3CE53C873C02, 0x3CE63C873C02, 0x3CE73C873C02, + 0x3CE83C873C02, 0x3CE93C873C02, 0x3CEA3C873C02, 0x3CEB3C873C02, 0x3C733C03, 0x3CD13C733C03, 0x3CD23C733C03, 0x3CD33C733C03, 0x3CD43C733C03, 0x3CD53C733C03, 0x3CD63C733C03, 0x3CD73C733C03, 0x3CD83C733C03, 0x3CD93C733C03, 0x3CDA3C733C03, + 0x3CDB3C733C03, 0x3CDC3C733C03, 0x3CDD3C733C03, 0x3CDE3C733C03, 0x3CDF3C733C03, 0x3CE03C733C03, 0x3CE13C733C03, 0x3CE23C733C03, 0x3CE33C733C03, 0x3CE43C733C03, 0x3CE53C733C03, 0x3CE63C733C03, 0x3CE73C733C03, 0x3CE83C733C03, 0x3CE93C733C03, + 0x3CEA3C733C03, 0x3CEB3C733C03, 0x3C743C03, 0x3CD13C743C03, 0x3CD23C743C03, 0x3CD33C743C03, 0x3CD43C743C03, 0x3CD53C743C03, 0x3CD63C743C03, 0x3CD73C743C03, 0x3CD83C743C03, 0x3CD93C743C03, 0x3CDA3C743C03, 0x3CDB3C743C03, 0x3CDC3C743C03, + 0x3CDD3C743C03, 0x3CDE3C743C03, 0x3CDF3C743C03, 0x3CE03C743C03, 0x3CE13C743C03, 0x3CE23C743C03, 0x3CE33C743C03, 0x3CE43C743C03, 0x3CE53C743C03, 0x3CE63C743C03, 0x3CE73C743C03, 0x3CE83C743C03, 0x3CE93C743C03, 0x3CEA3C743C03, 0x3CEB3C743C03, + 0x3C753C03, 0x3CD13C753C03, 0x3CD23C753C03, 0x3CD33C753C03, 0x3CD43C753C03, 0x3CD53C753C03, 0x3CD63C753C03, 0x3CD73C753C03, 0x3CD83C753C03, 0x3CD93C753C03, 0x3CDA3C753C03, 0x3CDB3C753C03, 0x3CDC3C753C03, 0x3CDD3C753C03, 0x3CDE3C753C03, + 0x3CDF3C753C03, 0x3CE03C753C03, 0x3CE13C753C03, 0x3CE23C753C03, 0x3CE33C753C03, 0x3CE43C753C03, 0x3CE53C753C03, 0x3CE63C753C03, 0x3CE73C753C03, 0x3CE83C753C03, 0x3CE93C753C03, 0x3CEA3C753C03, 0x3CEB3C753C03, 0x3C763C03, 0x3CD13C763C03, + 0x3CD23C763C03, 0x3CD33C763C03, 0x3CD43C763C03, 0x3CD53C763C03, 0x3CD63C763C03, 0x3CD73C763C03, 0x3CD83C763C03, 0x3CD93C763C03, 0x3CDA3C763C03, 0x3CDB3C763C03, 0x3CDC3C763C03, 0x3CDD3C763C03, 0x3CDE3C763C03, 0x3CDF3C763C03, 0x3CE03C763C03, + 0x3CE13C763C03, 0x3CE23C763C03, 0x3CE33C763C03, 0x3CE43C763C03, 0x3CE53C763C03, 0x3CE63C763C03, 0x3CE73C763C03, 0x3CE83C763C03, 0x3CE93C763C03, 0x3CEA3C763C03, 0x3CEB3C763C03, 0x3C773C03, 0x3CD13C773C03, 0x3CD23C773C03, 0x3CD33C773C03, + 0x3CD43C773C03, 0x3CD53C773C03, 0x3CD63C773C03, 0x3CD73C773C03, 0x3CD83C773C03, 0x3CD93C773C03, 0x3CDA3C773C03, 0x3CDB3C773C03, 0x3CDC3C773C03, 0x3CDD3C773C03, 0x3CDE3C773C03, 0x3CDF3C773C03, 0x3CE03C773C03, 0x3CE13C773C03, 0x3CE23C773C03, + 0x3CE33C773C03, 0x3CE43C773C03, 0x3CE53C773C03, 0x3CE63C773C03, 0x3CE73C773C03, 0x3CE83C773C03, 0x3CE93C773C03, 0x3CEA3C773C03, 0x3CEB3C773C03, 0x3C783C03, 0x3CD13C783C03, 0x3CD23C783C03, 0x3CD33C783C03, 0x3CD43C783C03, 0x3CD53C783C03, + 0x3CD63C783C03, 0x3CD73C783C03, 0x3CD83C783C03, 0x3CD93C783C03, 0x3CDA3C783C03, 0x3CDB3C783C03, 0x3CDC3C783C03, 0x3CDD3C783C03, 0x3CDE3C783C03, 0x3CDF3C783C03, 0x3CE03C783C03, 0x3CE13C783C03, 0x3CE23C783C03, 0x3CE33C783C03, 0x3CE43C783C03, + 0x3CE53C783C03, 0x3CE63C783C03, 0x3CE73C783C03, 0x3CE83C783C03, 0x3CE93C783C03, 0x3CEA3C783C03, 0x3CEB3C783C03, 0x3C793C03, 0x3CD13C793C03, 0x3CD23C793C03, 0x3CD33C793C03, 0x3CD43C793C03, 0x3CD53C793C03, 0x3CD63C793C03, 0x3CD73C793C03, + 0x3CD83C793C03, 0x3CD93C793C03, 0x3CDA3C793C03, 0x3CDB3C793C03, 0x3CDC3C793C03, 0x3CDD3C793C03, 0x3CDE3C793C03, 0x3CDF3C793C03, 0x3CE03C793C03, 0x3CE13C793C03, 0x3CE23C793C03, 0x3CE33C793C03, 0x3CE43C793C03, 0x3CE53C793C03, 0x3CE63C793C03, + 0x3CE73C793C03, 0x3CE83C793C03, 0x3CE93C793C03, 0x3CEA3C793C03, 0x3CEB3C793C03, 0x3C7A3C03, 0x3CD13C7A3C03, 0x3CD23C7A3C03, 0x3CD33C7A3C03, 0x3CD43C7A3C03, 0x3CD53C7A3C03, 0x3CD63C7A3C03, 0x3CD73C7A3C03, 0x3CD83C7A3C03, 0x3CD93C7A3C03, + 0x3CDA3C7A3C03, 0x3CDB3C7A3C03, 0x3CDC3C7A3C03, 0x3CDD3C7A3C03, 0x3CDE3C7A3C03, 0x3CDF3C7A3C03, 0x3CE03C7A3C03, 0x3CE13C7A3C03, 0x3CE23C7A3C03, 0x3CE33C7A3C03, 0x3CE43C7A3C03, 0x3CE53C7A3C03, 0x3CE63C7A3C03, 0x3CE73C7A3C03, 0x3CE83C7A3C03, + 0x3CE93C7A3C03, 0x3CEA3C7A3C03, 0x3CEB3C7A3C03, 0x3C7B3C03, 0x3CD13C7B3C03, 0x3CD23C7B3C03, 0x3CD33C7B3C03, 0x3CD43C7B3C03, 0x3CD53C7B3C03, 0x3CD63C7B3C03, 0x3CD73C7B3C03, 0x3CD83C7B3C03, 0x3CD93C7B3C03, 0x3CDA3C7B3C03, 0x3CDB3C7B3C03, + 0x3CDC3C7B3C03, 0x3CDD3C7B3C03, 0x3CDE3C7B3C03, 0x3CDF3C7B3C03, 0x3CE03C7B3C03, 0x3CE13C7B3C03, 0x3CE23C7B3C03, 0x3CE33C7B3C03, 0x3CE43C7B3C03, 0x3CE53C7B3C03, 0x3CE63C7B3C03, 0x3CE73C7B3C03, 0x3CE83C7B3C03, 0x3CE93C7B3C03, 0x3CEA3C7B3C03, + 0x3CEB3C7B3C03, 0x3C7C3C03, 0x3CD13C7C3C03, 0x3CD23C7C3C03, 0x3CD33C7C3C03, 0x3CD43C7C3C03, 0x3CD53C7C3C03, 0x3CD63C7C3C03, 0x3CD73C7C3C03, 0x3CD83C7C3C03, 0x3CD93C7C3C03, 0x3CDA3C7C3C03, 0x3CDB3C7C3C03, 0x3CDC3C7C3C03, 0x3CDD3C7C3C03, + 0x3CDE3C7C3C03, 0x3CDF3C7C3C03, 0x3CE03C7C3C03, 0x3CE13C7C3C03, 0x3CE23C7C3C03, 0x3CE33C7C3C03, 0x3CE43C7C3C03, 0x3CE53C7C3C03, 0x3CE63C7C3C03, 0x3CE73C7C3C03, 0x3CE83C7C3C03, 0x3CE93C7C3C03, 0x3CEA3C7C3C03, 0x3CEB3C7C3C03, 0x3C7D3C03, + 0x3CD13C7D3C03, 0x3CD23C7D3C03, 0x3CD33C7D3C03, 0x3CD43C7D3C03, 0x3CD53C7D3C03, 0x3CD63C7D3C03, 0x3CD73C7D3C03, 0x3CD83C7D3C03, 0x3CD93C7D3C03, 0x3CDA3C7D3C03, 0x3CDB3C7D3C03, 0x3CDC3C7D3C03, 0x3CDD3C7D3C03, 0x3CDE3C7D3C03, 0x3CDF3C7D3C03, + 0x3CE03C7D3C03, 0x3CE13C7D3C03, 0x3CE23C7D3C03, 0x3CE33C7D3C03, 0x3CE43C7D3C03, 0x3CE53C7D3C03, 0x3CE63C7D3C03, 0x3CE73C7D3C03, 0x3CE83C7D3C03, 0x3CE93C7D3C03, 0x3CEA3C7D3C03, 0x3CEB3C7D3C03, 0x3C7E3C03, 0x3CD13C7E3C03, 0x3CD23C7E3C03, + 0x3CD33C7E3C03, 0x3CD43C7E3C03, 0x3CD53C7E3C03, 0x3CD63C7E3C03, 0x3CD73C7E3C03, 0x3CD83C7E3C03, 0x3CD93C7E3C03, 0x3CDA3C7E3C03, 0x3CDB3C7E3C03, 0x3CDC3C7E3C03, 0x3CDD3C7E3C03, 0x3CDE3C7E3C03, 0x3CDF3C7E3C03, 0x3CE03C7E3C03, 0x3CE13C7E3C03, + 0x3CE23C7E3C03, 0x3CE33C7E3C03, 0x3CE43C7E3C03, 0x3CE53C7E3C03, 0x3CE63C7E3C03, 0x3CE73C7E3C03, 0x3CE83C7E3C03, 0x3CE93C7E3C03, 0x3CEA3C7E3C03, 0x3CEB3C7E3C03, 0x3C7F3C03, 0x3CD13C7F3C03, 0x3CD23C7F3C03, 0x3CD33C7F3C03, 0x3CD43C7F3C03, + 0x3CD53C7F3C03, 0x3CD63C7F3C03, 0x3CD73C7F3C03, 0x3CD83C7F3C03, 0x3CD93C7F3C03, 0x3CDA3C7F3C03, 0x3CDB3C7F3C03, 0x3CDC3C7F3C03, 0x3CDD3C7F3C03, 0x3CDE3C7F3C03, 0x3CDF3C7F3C03, 0x3CE03C7F3C03, 0x3CE13C7F3C03, 0x3CE23C7F3C03, 0x3CE33C7F3C03, + 0x3CE43C7F3C03, 0x3CE53C7F3C03, 0x3CE63C7F3C03, 0x3CE73C7F3C03, 0x3CE83C7F3C03, 0x3CE93C7F3C03, 0x3CEA3C7F3C03, 0x3CEB3C7F3C03, 0x3C803C03, 0x3CD13C803C03, 0x3CD23C803C03, 0x3CD33C803C03, 0x3CD43C803C03, 0x3CD53C803C03, 0x3CD63C803C03, + 0x3CD73C803C03, 0x3CD83C803C03, 0x3CD93C803C03, 0x3CDA3C803C03, 0x3CDB3C803C03, 0x3CDC3C803C03, 0x3CDD3C803C03, 0x3CDE3C803C03, 0x3CDF3C803C03, 0x3CE03C803C03, 0x3CE13C803C03, 0x3CE23C803C03, 0x3CE33C803C03, 0x3CE43C803C03, 0x3CE53C803C03, + 0x3CE63C803C03, 0x3CE73C803C03, 0x3CE83C803C03, 0x3CE93C803C03, 0x3CEA3C803C03, 0x3CEB3C803C03, 0x3C813C03, 0x3CD13C813C03, 0x3CD23C813C03, 0x3CD33C813C03, 0x3CD43C813C03, 0x3CD53C813C03, 0x3CD63C813C03, 0x3CD73C813C03, 0x3CD83C813C03, + 0x3CD93C813C03, 0x3CDA3C813C03, 0x3CDB3C813C03, 0x3CDC3C813C03, 0x3CDD3C813C03, 0x3CDE3C813C03, 0x3CDF3C813C03, 0x3CE03C813C03, 0x3CE13C813C03, 0x3CE23C813C03, 0x3CE33C813C03, 0x3CE43C813C03, 0x3CE53C813C03, 0x3CE63C813C03, 0x3CE73C813C03, + 0x3CE83C813C03, 0x3CE93C813C03, 0x3CEA3C813C03, 0x3CEB3C813C03, 0x3C823C03, 0x3CD13C823C03, 0x3CD23C823C03, 0x3CD33C823C03, 0x3CD43C823C03, 0x3CD53C823C03, 0x3CD63C823C03, 0x3CD73C823C03, 0x3CD83C823C03, 0x3CD93C823C03, 0x3CDA3C823C03, + 0x3CDB3C823C03, 0x3CDC3C823C03, 0x3CDD3C823C03, 0x3CDE3C823C03, 0x3CDF3C823C03, 0x3CE03C823C03, 0x3CE13C823C03, 0x3CE23C823C03, 0x3CE33C823C03, 0x3CE43C823C03, 0x3CE53C823C03, 0x3CE63C823C03, 0x3CE73C823C03, 0x3CE83C823C03, 0x3CE93C823C03, + 0x3CEA3C823C03, 0x3CEB3C823C03, 0x3C833C03, 0x3CD13C833C03, 0x3CD23C833C03, 0x3CD33C833C03, 0x3CD43C833C03, 0x3CD53C833C03, 0x3CD63C833C03, 0x3CD73C833C03, 0x3CD83C833C03, 0x3CD93C833C03, 0x3CDA3C833C03, 0x3CDB3C833C03, 0x3CDC3C833C03, + 0x3CDD3C833C03, 0x3CDE3C833C03, 0x3CDF3C833C03, 0x3CE03C833C03, 0x3CE13C833C03, 0x3CE23C833C03, 0x3CE33C833C03, 0x3CE43C833C03, 0x3CE53C833C03, 0x3CE63C833C03, 0x3CE73C833C03, 0x3CE83C833C03, 0x3CE93C833C03, 0x3CEA3C833C03, 0x3CEB3C833C03, + 0x3C843C03, 0x3CD13C843C03, 0x3CD23C843C03, 0x3CD33C843C03, 0x3CD43C843C03, 0x3CD53C843C03, 0x3CD63C843C03, 0x3CD73C843C03, 0x3CD83C843C03, 0x3CD93C843C03, 0x3CDA3C843C03, 0x3CDB3C843C03, 0x3CDC3C843C03, 0x3CDD3C843C03, 0x3CDE3C843C03, + 0x3CDF3C843C03, 0x3CE03C843C03, 0x3CE13C843C03, 0x3CE23C843C03, 0x3CE33C843C03, 0x3CE43C843C03, 0x3CE53C843C03, 0x3CE63C843C03, 0x3CE73C843C03, 0x3CE83C843C03, 0x3CE93C843C03, 0x3CEA3C843C03, 0x3CEB3C843C03, 0x3C853C03, 0x3CD13C853C03, + 0x3CD23C853C03, 0x3CD33C853C03, 0x3CD43C853C03, 0x3CD53C853C03, 0x3CD63C853C03, 0x3CD73C853C03, 0x3CD83C853C03, 0x3CD93C853C03, 0x3CDA3C853C03, 0x3CDB3C853C03, 0x3CDC3C853C03, 0x3CDD3C853C03, 0x3CDE3C853C03, 0x3CDF3C853C03, 0x3CE03C853C03, + 0x3CE13C853C03, 0x3CE23C853C03, 0x3CE33C853C03, 0x3CE43C853C03, 0x3CE53C853C03, 0x3CE63C853C03, 0x3CE73C853C03, 0x3CE83C853C03, 0x3CE93C853C03, 0x3CEA3C853C03, 0x3CEB3C853C03, 0x3C863C03, 0x3CD13C863C03, 0x3CD23C863C03, 0x3CD33C863C03, + 0x3CD43C863C03, 0x3CD53C863C03, 0x3CD63C863C03, 0x3CD73C863C03, 0x3CD83C863C03, 0x3CD93C863C03, 0x3CDA3C863C03, 0x3CDB3C863C03, 0x3CDC3C863C03, 0x3CDD3C863C03, 0x3CDE3C863C03, 0x3CDF3C863C03, 0x3CE03C863C03, 0x3CE13C863C03, 0x3CE23C863C03, + 0x3CE33C863C03, 0x3CE43C863C03, 0x3CE53C863C03, 0x3CE63C863C03, 0x3CE73C863C03, 0x3CE83C863C03, 0x3CE93C863C03, 0x3CEA3C863C03, 0x3CEB3C863C03, 0x3C873C03, 0x3CD13C873C03, 0x3CD23C873C03, 0x3CD33C873C03, 0x3CD43C873C03, 0x3CD53C873C03, + 0x3CD63C873C03, 0x3CD73C873C03, 0x3CD83C873C03, 0x3CD93C873C03, 0x3CDA3C873C03, 0x3CDB3C873C03, 0x3CDC3C873C03, 0x3CDD3C873C03, 0x3CDE3C873C03, 0x3CDF3C873C03, 0x3CE03C873C03, 0x3CE13C873C03, 0x3CE23C873C03, 0x3CE33C873C03, 0x3CE43C873C03, + 0x3CE53C873C03, 0x3CE63C873C03, 0x3CE73C873C03, 0x3CE83C873C03, 0x3CE93C873C03, 0x3CEA3C873C03, 0x3CEB3C873C03, 0x3C733C04, 0x3CD13C733C04, 0x3CD23C733C04, 0x3CD33C733C04, 0x3CD43C733C04, 0x3CD53C733C04, 0x3CD63C733C04, 0x3CD73C733C04, + 0x3CD83C733C04, 0x3CD93C733C04, 0x3CDA3C733C04, 0x3CDB3C733C04, 0x3CDC3C733C04, 0x3CDD3C733C04, 0x3CDE3C733C04, 0x3CDF3C733C04, 0x3CE03C733C04, 0x3CE13C733C04, 0x3CE23C733C04, 0x3CE33C733C04, 0x3CE43C733C04, 0x3CE53C733C04, 0x3CE63C733C04, + 0x3CE73C733C04, 0x3CE83C733C04, 0x3CE93C733C04, 0x3CEA3C733C04, 0x3CEB3C733C04, 0x3C743C04, 0x3CD13C743C04, 0x3CD23C743C04, 0x3CD33C743C04, 0x3CD43C743C04, 0x3CD53C743C04, 0x3CD63C743C04, 0x3CD73C743C04, 0x3CD83C743C04, 0x3CD93C743C04, + 0x3CDA3C743C04, 0x3CDB3C743C04, 0x3CDC3C743C04, 0x3CDD3C743C04, 0x3CDE3C743C04, 0x3CDF3C743C04, 0x3CE03C743C04, 0x3CE13C743C04, 0x3CE23C743C04, 0x3CE33C743C04, 0x3CE43C743C04, 0x3CE53C743C04, 0x3CE63C743C04, 0x3CE73C743C04, 0x3CE83C743C04, + 0x3CE93C743C04, 0x3CEA3C743C04, 0x3CEB3C743C04, 0x3C753C04, 0x3CD13C753C04, 0x3CD23C753C04, 0x3CD33C753C04, 0x3CD43C753C04, 0x3CD53C753C04, 0x3CD63C753C04, 0x3CD73C753C04, 0x3CD83C753C04, 0x3CD93C753C04, 0x3CDA3C753C04, 0x3CDB3C753C04, + 0x3CDC3C753C04, 0x3CDD3C753C04, 0x3CDE3C753C04, 0x3CDF3C753C04, 0x3CE03C753C04, 0x3CE13C753C04, 0x3CE23C753C04, 0x3CE33C753C04, 0x3CE43C753C04, 0x3CE53C753C04, 0x3CE63C753C04, 0x3CE73C753C04, 0x3CE83C753C04, 0x3CE93C753C04, 0x3CEA3C753C04, + 0x3CEB3C753C04, 0x3C763C04, 0x3CD13C763C04, 0x3CD23C763C04, 0x3CD33C763C04, 0x3CD43C763C04, 0x3CD53C763C04, 0x3CD63C763C04, 0x3CD73C763C04, 0x3CD83C763C04, 0x3CD93C763C04, 0x3CDA3C763C04, 0x3CDB3C763C04, 0x3CDC3C763C04, 0x3CDD3C763C04, + 0x3CDE3C763C04, 0x3CDF3C763C04, 0x3CE03C763C04, 0x3CE13C763C04, 0x3CE23C763C04, 0x3CE33C763C04, 0x3CE43C763C04, 0x3CE53C763C04, 0x3CE63C763C04, 0x3CE73C763C04, 0x3CE83C763C04, 0x3CE93C763C04, 0x3CEA3C763C04, 0x3CEB3C763C04, 0x3C773C04, + 0x3CD13C773C04, 0x3CD23C773C04, 0x3CD33C773C04, 0x3CD43C773C04, 0x3CD53C773C04, 0x3CD63C773C04, 0x3CD73C773C04, 0x3CD83C773C04, 0x3CD93C773C04, 0x3CDA3C773C04, 0x3CDB3C773C04, 0x3CDC3C773C04, 0x3CDD3C773C04, 0x3CDE3C773C04, 0x3CDF3C773C04, + 0x3CE03C773C04, 0x3CE13C773C04, 0x3CE23C773C04, 0x3CE33C773C04, 0x3CE43C773C04, 0x3CE53C773C04, 0x3CE63C773C04, 0x3CE73C773C04, 0x3CE83C773C04, 0x3CE93C773C04, 0x3CEA3C773C04, 0x3CEB3C773C04, 0x3C783C04, 0x3CD13C783C04, 0x3CD23C783C04, + 0x3CD33C783C04, 0x3CD43C783C04, 0x3CD53C783C04, 0x3CD63C783C04, 0x3CD73C783C04, 0x3CD83C783C04, 0x3CD93C783C04, 0x3CDA3C783C04, 0x3CDB3C783C04, 0x3CDC3C783C04, 0x3CDD3C783C04, 0x3CDE3C783C04, 0x3CDF3C783C04, 0x3CE03C783C04, 0x3CE13C783C04, + 0x3CE23C783C04, 0x3CE33C783C04, 0x3CE43C783C04, 0x3CE53C783C04, 0x3CE63C783C04, 0x3CE73C783C04, 0x3CE83C783C04, 0x3CE93C783C04, 0x3CEA3C783C04, 0x3CEB3C783C04, 0x3C793C04, 0x3CD13C793C04, 0x3CD23C793C04, 0x3CD33C793C04, 0x3CD43C793C04, + 0x3CD53C793C04, 0x3CD63C793C04, 0x3CD73C793C04, 0x3CD83C793C04, 0x3CD93C793C04, 0x3CDA3C793C04, 0x3CDB3C793C04, 0x3CDC3C793C04, 0x3CDD3C793C04, 0x3CDE3C793C04, 0x3CDF3C793C04, 0x3CE03C793C04, 0x3CE13C793C04, 0x3CE23C793C04, 0x3CE33C793C04, + 0x3CE43C793C04, 0x3CE53C793C04, 0x3CE63C793C04, 0x3CE73C793C04, 0x3CE83C793C04, 0x3CE93C793C04, 0x3CEA3C793C04, 0x3CEB3C793C04, 0x3C7A3C04, 0x3CD13C7A3C04, 0x3CD23C7A3C04, 0x3CD33C7A3C04, 0x3CD43C7A3C04, 0x3CD53C7A3C04, 0x3CD63C7A3C04, + 0x3CD73C7A3C04, 0x3CD83C7A3C04, 0x3CD93C7A3C04, 0x3CDA3C7A3C04, 0x3CDB3C7A3C04, 0x3CDC3C7A3C04, 0x3CDD3C7A3C04, 0x3CDE3C7A3C04, 0x3CDF3C7A3C04, 0x3CE03C7A3C04, 0x3CE13C7A3C04, 0x3CE23C7A3C04, 0x3CE33C7A3C04, 0x3CE43C7A3C04, 0x3CE53C7A3C04, + 0x3CE63C7A3C04, 0x3CE73C7A3C04, 0x3CE83C7A3C04, 0x3CE93C7A3C04, 0x3CEA3C7A3C04, 0x3CEB3C7A3C04, 0x3C7B3C04, 0x3CD13C7B3C04, 0x3CD23C7B3C04, 0x3CD33C7B3C04, 0x3CD43C7B3C04, 0x3CD53C7B3C04, 0x3CD63C7B3C04, 0x3CD73C7B3C04, 0x3CD83C7B3C04, + 0x3CD93C7B3C04, 0x3CDA3C7B3C04, 0x3CDB3C7B3C04, 0x3CDC3C7B3C04, 0x3CDD3C7B3C04, 0x3CDE3C7B3C04, 0x3CDF3C7B3C04, 0x3CE03C7B3C04, 0x3CE13C7B3C04, 0x3CE23C7B3C04, 0x3CE33C7B3C04, 0x3CE43C7B3C04, 0x3CE53C7B3C04, 0x3CE63C7B3C04, 0x3CE73C7B3C04, + 0x3CE83C7B3C04, 0x3CE93C7B3C04, 0x3CEA3C7B3C04, 0x3CEB3C7B3C04, 0x3C7C3C04, 0x3CD13C7C3C04, 0x3CD23C7C3C04, 0x3CD33C7C3C04, 0x3CD43C7C3C04, 0x3CD53C7C3C04, 0x3CD63C7C3C04, 0x3CD73C7C3C04, 0x3CD83C7C3C04, 0x3CD93C7C3C04, 0x3CDA3C7C3C04, + 0x3CDB3C7C3C04, 0x3CDC3C7C3C04, 0x3CDD3C7C3C04, 0x3CDE3C7C3C04, 0x3CDF3C7C3C04, 0x3CE03C7C3C04, 0x3CE13C7C3C04, 0x3CE23C7C3C04, 0x3CE33C7C3C04, 0x3CE43C7C3C04, 0x3CE53C7C3C04, 0x3CE63C7C3C04, 0x3CE73C7C3C04, 0x3CE83C7C3C04, 0x3CE93C7C3C04, + 0x3CEA3C7C3C04, 0x3CEB3C7C3C04, 0x3C7D3C04, 0x3CD13C7D3C04, 0x3CD23C7D3C04, 0x3CD33C7D3C04, 0x3CD43C7D3C04, 0x3CD53C7D3C04, 0x3CD63C7D3C04, 0x3CD73C7D3C04, 0x3CD83C7D3C04, 0x3CD93C7D3C04, 0x3CDA3C7D3C04, 0x3CDB3C7D3C04, 0x3CDC3C7D3C04, + 0x3CDD3C7D3C04, 0x3CDE3C7D3C04, 0x3CDF3C7D3C04, 0x3CE03C7D3C04, 0x3CE13C7D3C04, 0x3CE23C7D3C04, 0x3CE33C7D3C04, 0x3CE43C7D3C04, 0x3CE53C7D3C04, 0x3CE63C7D3C04, 0x3CE73C7D3C04, 0x3CE83C7D3C04, 0x3CE93C7D3C04, 0x3CEA3C7D3C04, 0x3CEB3C7D3C04, + 0x3C7E3C04, 0x3CD13C7E3C04, 0x3CD23C7E3C04, 0x3CD33C7E3C04, 0x3CD43C7E3C04, 0x3CD53C7E3C04, 0x3CD63C7E3C04, 0x3CD73C7E3C04, 0x3CD83C7E3C04, 0x3CD93C7E3C04, 0x3CDA3C7E3C04, 0x3CDB3C7E3C04, 0x3CDC3C7E3C04, 0x3CDD3C7E3C04, 0x3CDE3C7E3C04, + 0x3CDF3C7E3C04, 0x3CE03C7E3C04, 0x3CE13C7E3C04, 0x3CE23C7E3C04, 0x3CE33C7E3C04, 0x3CE43C7E3C04, 0x3CE53C7E3C04, 0x3CE63C7E3C04, 0x3CE73C7E3C04, 0x3CE83C7E3C04, 0x3CE93C7E3C04, 0x3CEA3C7E3C04, 0x3CEB3C7E3C04, 0x3C7F3C04, 0x3CD13C7F3C04, + 0x3CD23C7F3C04, 0x3CD33C7F3C04, 0x3CD43C7F3C04, 0x3CD53C7F3C04, 0x3CD63C7F3C04, 0x3CD73C7F3C04, 0x3CD83C7F3C04, 0x3CD93C7F3C04, 0x3CDA3C7F3C04, 0x3CDB3C7F3C04, 0x3CDC3C7F3C04, 0x3CDD3C7F3C04, 0x3CDE3C7F3C04, 0x3CDF3C7F3C04, 0x3CE03C7F3C04, + 0x3CE13C7F3C04, 0x3CE23C7F3C04, 0x3CE33C7F3C04, 0x3CE43C7F3C04, 0x3CE53C7F3C04, 0x3CE63C7F3C04, 0x3CE73C7F3C04, 0x3CE83C7F3C04, 0x3CE93C7F3C04, 0x3CEA3C7F3C04, 0x3CEB3C7F3C04, 0x3C803C04, 0x3CD13C803C04, 0x3CD23C803C04, 0x3CD33C803C04, + 0x3CD43C803C04, 0x3CD53C803C04, 0x3CD63C803C04, 0x3CD73C803C04, 0x3CD83C803C04, 0x3CD93C803C04, 0x3CDA3C803C04, 0x3CDB3C803C04, 0x3CDC3C803C04, 0x3CDD3C803C04, 0x3CDE3C803C04, 0x3CDF3C803C04, 0x3CE03C803C04, 0x3CE13C803C04, 0x3CE23C803C04, + 0x3CE33C803C04, 0x3CE43C803C04, 0x3CE53C803C04, 0x3CE63C803C04, 0x3CE73C803C04, 0x3CE83C803C04, 0x3CE93C803C04, 0x3CEA3C803C04, 0x3CEB3C803C04, 0x3C813C04, 0x3CD13C813C04, 0x3CD23C813C04, 0x3CD33C813C04, 0x3CD43C813C04, 0x3CD53C813C04, + 0x3CD63C813C04, 0x3CD73C813C04, 0x3CD83C813C04, 0x3CD93C813C04, 0x3CDA3C813C04, 0x3CDB3C813C04, 0x3CDC3C813C04, 0x3CDD3C813C04, 0x3CDE3C813C04, 0x3CDF3C813C04, 0x3CE03C813C04, 0x3CE13C813C04, 0x3CE23C813C04, 0x3CE33C813C04, 0x3CE43C813C04, + 0x3CE53C813C04, 0x3CE63C813C04, 0x3CE73C813C04, 0x3CE83C813C04, 0x3CE93C813C04, 0x3CEA3C813C04, 0x3CEB3C813C04, 0x3C823C04, 0x3CD13C823C04, 0x3CD23C823C04, 0x3CD33C823C04, 0x3CD43C823C04, 0x3CD53C823C04, 0x3CD63C823C04, 0x3CD73C823C04, + 0x3CD83C823C04, 0x3CD93C823C04, 0x3CDA3C823C04, 0x3CDB3C823C04, 0x3CDC3C823C04, 0x3CDD3C823C04, 0x3CDE3C823C04, 0x3CDF3C823C04, 0x3CE03C823C04, 0x3CE13C823C04, 0x3CE23C823C04, 0x3CE33C823C04, 0x3CE43C823C04, 0x3CE53C823C04, 0x3CE63C823C04, + 0x3CE73C823C04, 0x3CE83C823C04, 0x3CE93C823C04, 0x3CEA3C823C04, 0x3CEB3C823C04, 0x3C833C04, 0x3CD13C833C04, 0x3CD23C833C04, 0x3CD33C833C04, 0x3CD43C833C04, 0x3CD53C833C04, 0x3CD63C833C04, 0x3CD73C833C04, 0x3CD83C833C04, 0x3CD93C833C04, + 0x3CDA3C833C04, 0x3CDB3C833C04, 0x3CDC3C833C04, 0x3CDD3C833C04, 0x3CDE3C833C04, 0x3CDF3C833C04, 0x3CE03C833C04, 0x3CE13C833C04, 0x3CE23C833C04, 0x3CE33C833C04, 0x3CE43C833C04, 0x3CE53C833C04, 0x3CE63C833C04, 0x3CE73C833C04, 0x3CE83C833C04, + 0x3CE93C833C04, 0x3CEA3C833C04, 0x3CEB3C833C04, 0x3C843C04, 0x3CD13C843C04, 0x3CD23C843C04, 0x3CD33C843C04, 0x3CD43C843C04, 0x3CD53C843C04, 0x3CD63C843C04, 0x3CD73C843C04, 0x3CD83C843C04, 0x3CD93C843C04, 0x3CDA3C843C04, 0x3CDB3C843C04, + 0x3CDC3C843C04, 0x3CDD3C843C04, 0x3CDE3C843C04, 0x3CDF3C843C04, 0x3CE03C843C04, 0x3CE13C843C04, 0x3CE23C843C04, 0x3CE33C843C04, 0x3CE43C843C04, 0x3CE53C843C04, 0x3CE63C843C04, 0x3CE73C843C04, 0x3CE83C843C04, 0x3CE93C843C04, 0x3CEA3C843C04, + 0x3CEB3C843C04, 0x3C853C04, 0x3CD13C853C04, 0x3CD23C853C04, 0x3CD33C853C04, 0x3CD43C853C04, 0x3CD53C853C04, 0x3CD63C853C04, 0x3CD73C853C04, 0x3CD83C853C04, 0x3CD93C853C04, 0x3CDA3C853C04, 0x3CDB3C853C04, 0x3CDC3C853C04, 0x3CDD3C853C04, + 0x3CDE3C853C04, 0x3CDF3C853C04, 0x3CE03C853C04, 0x3CE13C853C04, 0x3CE23C853C04, 0x3CE33C853C04, 0x3CE43C853C04, 0x3CE53C853C04, 0x3CE63C853C04, 0x3CE73C853C04, 0x3CE83C853C04, 0x3CE93C853C04, 0x3CEA3C853C04, 0x3CEB3C853C04, 0x3C863C04, + 0x3CD13C863C04, 0x3CD23C863C04, 0x3CD33C863C04, 0x3CD43C863C04, 0x3CD53C863C04, 0x3CD63C863C04, 0x3CD73C863C04, 0x3CD83C863C04, 0x3CD93C863C04, 0x3CDA3C863C04, 0x3CDB3C863C04, 0x3CDC3C863C04, 0x3CDD3C863C04, 0x3CDE3C863C04, 0x3CDF3C863C04, + 0x3CE03C863C04, 0x3CE13C863C04, 0x3CE23C863C04, 0x3CE33C863C04, 0x3CE43C863C04, 0x3CE53C863C04, 0x3CE63C863C04, 0x3CE73C863C04, 0x3CE83C863C04, 0x3CE93C863C04, 0x3CEA3C863C04, 0x3CEB3C863C04, 0x3C873C04, 0x3CD13C873C04, 0x3CD23C873C04, + 0x3CD33C873C04, 0x3CD43C873C04, 0x3CD53C873C04, 0x3CD63C873C04, 0x3CD73C873C04, 0x3CD83C873C04, 0x3CD93C873C04, 0x3CDA3C873C04, 0x3CDB3C873C04, 0x3CDC3C873C04, 0x3CDD3C873C04, 0x3CDE3C873C04, 0x3CDF3C873C04, 0x3CE03C873C04, 0x3CE13C873C04, + 0x3CE23C873C04, 0x3CE33C873C04, 0x3CE43C873C04, 0x3CE53C873C04, 0x3CE63C873C04, 0x3CE73C873C04, 0x3CE83C873C04, 0x3CE93C873C04, 0x3CEA3C873C04, 0x3CEB3C873C04, 0x3C733C05, 0x3CD13C733C05, 0x3CD23C733C05, 0x3CD33C733C05, 0x3CD43C733C05, + 0x3CD53C733C05, 0x3CD63C733C05, 0x3CD73C733C05, 0x3CD83C733C05, 0x3CD93C733C05, 0x3CDA3C733C05, 0x3CDB3C733C05, 0x3CDC3C733C05, 0x3CDD3C733C05, 0x3CDE3C733C05, 0x3CDF3C733C05, 0x3CE03C733C05, 0x3CE13C733C05, 0x3CE23C733C05, 0x3CE33C733C05, + 0x3CE43C733C05, 0x3CE53C733C05, 0x3CE63C733C05, 0x3CE73C733C05, 0x3CE83C733C05, 0x3CE93C733C05, 0x3CEA3C733C05, 0x3CEB3C733C05, 0x3C743C05, 0x3CD13C743C05, 0x3CD23C743C05, 0x3CD33C743C05, 0x3CD43C743C05, 0x3CD53C743C05, 0x3CD63C743C05, + 0x3CD73C743C05, 0x3CD83C743C05, 0x3CD93C743C05, 0x3CDA3C743C05, 0x3CDB3C743C05, 0x3CDC3C743C05, 0x3CDD3C743C05, 0x3CDE3C743C05, 0x3CDF3C743C05, 0x3CE03C743C05, 0x3CE13C743C05, 0x3CE23C743C05, 0x3CE33C743C05, 0x3CE43C743C05, 0x3CE53C743C05, + 0x3CE63C743C05, 0x3CE73C743C05, 0x3CE83C743C05, 0x3CE93C743C05, 0x3CEA3C743C05, 0x3CEB3C743C05, 0x3C753C05, 0x3CD13C753C05, 0x3CD23C753C05, 0x3CD33C753C05, 0x3CD43C753C05, 0x3CD53C753C05, 0x3CD63C753C05, 0x3CD73C753C05, 0x3CD83C753C05, + 0x3CD93C753C05, 0x3CDA3C753C05, 0x3CDB3C753C05, 0x3CDC3C753C05, 0x3CDD3C753C05, 0x3CDE3C753C05, 0x3CDF3C753C05, 0x3CE03C753C05, 0x3CE13C753C05, 0x3CE23C753C05, 0x3CE33C753C05, 0x3CE43C753C05, 0x3CE53C753C05, 0x3CE63C753C05, 0x3CE73C753C05, + 0x3CE83C753C05, 0x3CE93C753C05, 0x3CEA3C753C05, 0x3CEB3C753C05, 0x3C763C05, 0x3CD13C763C05, 0x3CD23C763C05, 0x3CD33C763C05, 0x3CD43C763C05, 0x3CD53C763C05, 0x3CD63C763C05, 0x3CD73C763C05, 0x3CD83C763C05, 0x3CD93C763C05, 0x3CDA3C763C05, + 0x3CDB3C763C05, 0x3CDC3C763C05, 0x3CDD3C763C05, 0x3CDE3C763C05, 0x3CDF3C763C05, 0x3CE03C763C05, 0x3CE13C763C05, 0x3CE23C763C05, 0x3CE33C763C05, 0x3CE43C763C05, 0x3CE53C763C05, 0x3CE63C763C05, 0x3CE73C763C05, 0x3CE83C763C05, 0x3CE93C763C05, + 0x3CEA3C763C05, 0x3CEB3C763C05, 0x3C773C05, 0x3CD13C773C05, 0x3CD23C773C05, 0x3CD33C773C05, 0x3CD43C773C05, 0x3CD53C773C05, 0x3CD63C773C05, 0x3CD73C773C05, 0x3CD83C773C05, 0x3CD93C773C05, 0x3CDA3C773C05, 0x3CDB3C773C05, 0x3CDC3C773C05, + 0x3CDD3C773C05, 0x3CDE3C773C05, 0x3CDF3C773C05, 0x3CE03C773C05, 0x3CE13C773C05, 0x3CE23C773C05, 0x3CE33C773C05, 0x3CE43C773C05, 0x3CE53C773C05, 0x3CE63C773C05, 0x3CE73C773C05, 0x3CE83C773C05, 0x3CE93C773C05, 0x3CEA3C773C05, 0x3CEB3C773C05, + 0x3C783C05, 0x3CD13C783C05, 0x3CD23C783C05, 0x3CD33C783C05, 0x3CD43C783C05, 0x3CD53C783C05, 0x3CD63C783C05, 0x3CD73C783C05, 0x3CD83C783C05, 0x3CD93C783C05, 0x3CDA3C783C05, 0x3CDB3C783C05, 0x3CDC3C783C05, 0x3CDD3C783C05, 0x3CDE3C783C05, + 0x3CDF3C783C05, 0x3CE03C783C05, 0x3CE13C783C05, 0x3CE23C783C05, 0x3CE33C783C05, 0x3CE43C783C05, 0x3CE53C783C05, 0x3CE63C783C05, 0x3CE73C783C05, 0x3CE83C783C05, 0x3CE93C783C05, 0x3CEA3C783C05, 0x3CEB3C783C05, 0x3C793C05, 0x3CD13C793C05, + 0x3CD23C793C05, 0x3CD33C793C05, 0x3CD43C793C05, 0x3CD53C793C05, 0x3CD63C793C05, 0x3CD73C793C05, 0x3CD83C793C05, 0x3CD93C793C05, 0x3CDA3C793C05, 0x3CDB3C793C05, 0x3CDC3C793C05, 0x3CDD3C793C05, 0x3CDE3C793C05, 0x3CDF3C793C05, 0x3CE03C793C05, + 0x3CE13C793C05, 0x3CE23C793C05, 0x3CE33C793C05, 0x3CE43C793C05, 0x3CE53C793C05, 0x3CE63C793C05, 0x3CE73C793C05, 0x3CE83C793C05, 0x3CE93C793C05, 0x3CEA3C793C05, 0x3CEB3C793C05, 0x3C7A3C05, 0x3CD13C7A3C05, 0x3CD23C7A3C05, 0x3CD33C7A3C05, + 0x3CD43C7A3C05, 0x3CD53C7A3C05, 0x3CD63C7A3C05, 0x3CD73C7A3C05, 0x3CD83C7A3C05, 0x3CD93C7A3C05, 0x3CDA3C7A3C05, 0x3CDB3C7A3C05, 0x3CDC3C7A3C05, 0x3CDD3C7A3C05, 0x3CDE3C7A3C05, 0x3CDF3C7A3C05, 0x3CE03C7A3C05, 0x3CE13C7A3C05, 0x3CE23C7A3C05, + 0x3CE33C7A3C05, 0x3CE43C7A3C05, 0x3CE53C7A3C05, 0x3CE63C7A3C05, 0x3CE73C7A3C05, 0x3CE83C7A3C05, 0x3CE93C7A3C05, 0x3CEA3C7A3C05, 0x3CEB3C7A3C05, 0x3C7B3C05, 0x3CD13C7B3C05, 0x3CD23C7B3C05, 0x3CD33C7B3C05, 0x3CD43C7B3C05, 0x3CD53C7B3C05, + 0x3CD63C7B3C05, 0x3CD73C7B3C05, 0x3CD83C7B3C05, 0x3CD93C7B3C05, 0x3CDA3C7B3C05, 0x3CDB3C7B3C05, 0x3CDC3C7B3C05, 0x3CDD3C7B3C05, 0x3CDE3C7B3C05, 0x3CDF3C7B3C05, 0x3CE03C7B3C05, 0x3CE13C7B3C05, 0x3CE23C7B3C05, 0x3CE33C7B3C05, 0x3CE43C7B3C05, + 0x3CE53C7B3C05, 0x3CE63C7B3C05, 0x3CE73C7B3C05, 0x3CE83C7B3C05, 0x3CE93C7B3C05, 0x3CEA3C7B3C05, 0x3CEB3C7B3C05, 0x3C7C3C05, 0x3CD13C7C3C05, 0x3CD23C7C3C05, 0x3CD33C7C3C05, 0x3CD43C7C3C05, 0x3CD53C7C3C05, 0x3CD63C7C3C05, 0x3CD73C7C3C05, + 0x3CD83C7C3C05, 0x3CD93C7C3C05, 0x3CDA3C7C3C05, 0x3CDB3C7C3C05, 0x3CDC3C7C3C05, 0x3CDD3C7C3C05, 0x3CDE3C7C3C05, 0x3CDF3C7C3C05, 0x3CE03C7C3C05, 0x3CE13C7C3C05, 0x3CE23C7C3C05, 0x3CE33C7C3C05, 0x3CE43C7C3C05, 0x3CE53C7C3C05, 0x3CE63C7C3C05, + 0x3CE73C7C3C05, 0x3CE83C7C3C05, 0x3CE93C7C3C05, 0x3CEA3C7C3C05, 0x3CEB3C7C3C05, 0x3C7D3C05, 0x3CD13C7D3C05, 0x3CD23C7D3C05, 0x3CD33C7D3C05, 0x3CD43C7D3C05, 0x3CD53C7D3C05, 0x3CD63C7D3C05, 0x3CD73C7D3C05, 0x3CD83C7D3C05, 0x3CD93C7D3C05, + 0x3CDA3C7D3C05, 0x3CDB3C7D3C05, 0x3CDC3C7D3C05, 0x3CDD3C7D3C05, 0x3CDE3C7D3C05, 0x3CDF3C7D3C05, 0x3CE03C7D3C05, 0x3CE13C7D3C05, 0x3CE23C7D3C05, 0x3CE33C7D3C05, 0x3CE43C7D3C05, 0x3CE53C7D3C05, 0x3CE63C7D3C05, 0x3CE73C7D3C05, 0x3CE83C7D3C05, + 0x3CE93C7D3C05, 0x3CEA3C7D3C05, 0x3CEB3C7D3C05, 0x3C7E3C05, 0x3CD13C7E3C05, 0x3CD23C7E3C05, 0x3CD33C7E3C05, 0x3CD43C7E3C05, 0x3CD53C7E3C05, 0x3CD63C7E3C05, 0x3CD73C7E3C05, 0x3CD83C7E3C05, 0x3CD93C7E3C05, 0x3CDA3C7E3C05, 0x3CDB3C7E3C05, + 0x3CDC3C7E3C05, 0x3CDD3C7E3C05, 0x3CDE3C7E3C05, 0x3CDF3C7E3C05, 0x3CE03C7E3C05, 0x3CE13C7E3C05, 0x3CE23C7E3C05, 0x3CE33C7E3C05, 0x3CE43C7E3C05, 0x3CE53C7E3C05, 0x3CE63C7E3C05, 0x3CE73C7E3C05, 0x3CE83C7E3C05, 0x3CE93C7E3C05, 0x3CEA3C7E3C05, + 0x3CEB3C7E3C05, 0x3C7F3C05, 0x3CD13C7F3C05, 0x3CD23C7F3C05, 0x3CD33C7F3C05, 0x3CD43C7F3C05, 0x3CD53C7F3C05, 0x3CD63C7F3C05, 0x3CD73C7F3C05, 0x3CD83C7F3C05, 0x3CD93C7F3C05, 0x3CDA3C7F3C05, 0x3CDB3C7F3C05, 0x3CDC3C7F3C05, 0x3CDD3C7F3C05, + 0x3CDE3C7F3C05, 0x3CDF3C7F3C05, 0x3CE03C7F3C05, 0x3CE13C7F3C05, 0x3CE23C7F3C05, 0x3CE33C7F3C05, 0x3CE43C7F3C05, 0x3CE53C7F3C05, 0x3CE63C7F3C05, 0x3CE73C7F3C05, 0x3CE83C7F3C05, 0x3CE93C7F3C05, 0x3CEA3C7F3C05, 0x3CEB3C7F3C05, 0x3C803C05, + 0x3CD13C803C05, 0x3CD23C803C05, 0x3CD33C803C05, 0x3CD43C803C05, 0x3CD53C803C05, 0x3CD63C803C05, 0x3CD73C803C05, 0x3CD83C803C05, 0x3CD93C803C05, 0x3CDA3C803C05, 0x3CDB3C803C05, 0x3CDC3C803C05, 0x3CDD3C803C05, 0x3CDE3C803C05, 0x3CDF3C803C05, + 0x3CE03C803C05, 0x3CE13C803C05, 0x3CE23C803C05, 0x3CE33C803C05, 0x3CE43C803C05, 0x3CE53C803C05, 0x3CE63C803C05, 0x3CE73C803C05, 0x3CE83C803C05, 0x3CE93C803C05, 0x3CEA3C803C05, 0x3CEB3C803C05, 0x3C813C05, 0x3CD13C813C05, 0x3CD23C813C05, + 0x3CD33C813C05, 0x3CD43C813C05, 0x3CD53C813C05, 0x3CD63C813C05, 0x3CD73C813C05, 0x3CD83C813C05, 0x3CD93C813C05, 0x3CDA3C813C05, 0x3CDB3C813C05, 0x3CDC3C813C05, 0x3CDD3C813C05, 0x3CDE3C813C05, 0x3CDF3C813C05, 0x3CE03C813C05, 0x3CE13C813C05, + 0x3CE23C813C05, 0x3CE33C813C05, 0x3CE43C813C05, 0x3CE53C813C05, 0x3CE63C813C05, 0x3CE73C813C05, 0x3CE83C813C05, 0x3CE93C813C05, 0x3CEA3C813C05, 0x3CEB3C813C05, 0x3C823C05, 0x3CD13C823C05, 0x3CD23C823C05, 0x3CD33C823C05, 0x3CD43C823C05, + 0x3CD53C823C05, 0x3CD63C823C05, 0x3CD73C823C05, 0x3CD83C823C05, 0x3CD93C823C05, 0x3CDA3C823C05, 0x3CDB3C823C05, 0x3CDC3C823C05, 0x3CDD3C823C05, 0x3CDE3C823C05, 0x3CDF3C823C05, 0x3CE03C823C05, 0x3CE13C823C05, 0x3CE23C823C05, 0x3CE33C823C05, + 0x3CE43C823C05, 0x3CE53C823C05, 0x3CE63C823C05, 0x3CE73C823C05, 0x3CE83C823C05, 0x3CE93C823C05, 0x3CEA3C823C05, 0x3CEB3C823C05, 0x3C833C05, 0x3CD13C833C05, 0x3CD23C833C05, 0x3CD33C833C05, 0x3CD43C833C05, 0x3CD53C833C05, 0x3CD63C833C05, + 0x3CD73C833C05, 0x3CD83C833C05, 0x3CD93C833C05, 0x3CDA3C833C05, 0x3CDB3C833C05, 0x3CDC3C833C05, 0x3CDD3C833C05, 0x3CDE3C833C05, 0x3CDF3C833C05, 0x3CE03C833C05, 0x3CE13C833C05, 0x3CE23C833C05, 0x3CE33C833C05, 0x3CE43C833C05, 0x3CE53C833C05, + 0x3CE63C833C05, 0x3CE73C833C05, 0x3CE83C833C05, 0x3CE93C833C05, 0x3CEA3C833C05, 0x3CEB3C833C05, 0x3C843C05, 0x3CD13C843C05, 0x3CD23C843C05, 0x3CD33C843C05, 0x3CD43C843C05, 0x3CD53C843C05, 0x3CD63C843C05, 0x3CD73C843C05, 0x3CD83C843C05, + 0x3CD93C843C05, 0x3CDA3C843C05, 0x3CDB3C843C05, 0x3CDC3C843C05, 0x3CDD3C843C05, 0x3CDE3C843C05, 0x3CDF3C843C05, 0x3CE03C843C05, 0x3CE13C843C05, 0x3CE23C843C05, 0x3CE33C843C05, 0x3CE43C843C05, 0x3CE53C843C05, 0x3CE63C843C05, 0x3CE73C843C05, + 0x3CE83C843C05, 0x3CE93C843C05, 0x3CEA3C843C05, 0x3CEB3C843C05, 0x3C853C05, 0x3CD13C853C05, 0x3CD23C853C05, 0x3CD33C853C05, 0x3CD43C853C05, 0x3CD53C853C05, 0x3CD63C853C05, 0x3CD73C853C05, 0x3CD83C853C05, 0x3CD93C853C05, 0x3CDA3C853C05, + 0x3CDB3C853C05, 0x3CDC3C853C05, 0x3CDD3C853C05, 0x3CDE3C853C05, 0x3CDF3C853C05, 0x3CE03C853C05, 0x3CE13C853C05, 0x3CE23C853C05, 0x3CE33C853C05, 0x3CE43C853C05, 0x3CE53C853C05, 0x3CE63C853C05, 0x3CE73C853C05, 0x3CE83C853C05, 0x3CE93C853C05, + 0x3CEA3C853C05, 0x3CEB3C853C05, 0x3C863C05, 0x3CD13C863C05, 0x3CD23C863C05, 0x3CD33C863C05, 0x3CD43C863C05, 0x3CD53C863C05, 0x3CD63C863C05, 0x3CD73C863C05, 0x3CD83C863C05, 0x3CD93C863C05, 0x3CDA3C863C05, 0x3CDB3C863C05, 0x3CDC3C863C05, + 0x3CDD3C863C05, 0x3CDE3C863C05, 0x3CDF3C863C05, 0x3CE03C863C05, 0x3CE13C863C05, 0x3CE23C863C05, 0x3CE33C863C05, 0x3CE43C863C05, 0x3CE53C863C05, 0x3CE63C863C05, 0x3CE73C863C05, 0x3CE83C863C05, 0x3CE93C863C05, 0x3CEA3C863C05, 0x3CEB3C863C05, + 0x3C873C05, 0x3CD13C873C05, 0x3CD23C873C05, 0x3CD33C873C05, 0x3CD43C873C05, 0x3CD53C873C05, 0x3CD63C873C05, 0x3CD73C873C05, 0x3CD83C873C05, 0x3CD93C873C05, 0x3CDA3C873C05, 0x3CDB3C873C05, 0x3CDC3C873C05, 0x3CDD3C873C05, 0x3CDE3C873C05, + 0x3CDF3C873C05, 0x3CE03C873C05, 0x3CE13C873C05, 0x3CE23C873C05, 0x3CE33C873C05, 0x3CE43C873C05, 0x3CE53C873C05, 0x3CE63C873C05, 0x3CE73C873C05, 0x3CE83C873C05, 0x3CE93C873C05, 0x3CEA3C873C05, 0x3CEB3C873C05, 0x3C733C06, 0x3CD13C733C06, + 0x3CD23C733C06, 0x3CD33C733C06, 0x3CD43C733C06, 0x3CD53C733C06, 0x3CD63C733C06, 0x3CD73C733C06, 0x3CD83C733C06, 0x3CD93C733C06, 0x3CDA3C733C06, 0x3CDB3C733C06, 0x3CDC3C733C06, 0x3CDD3C733C06, 0x3CDE3C733C06, 0x3CDF3C733C06, 0x3CE03C733C06, + 0x3CE13C733C06, 0x3CE23C733C06, 0x3CE33C733C06, 0x3CE43C733C06, 0x3CE53C733C06, 0x3CE63C733C06, 0x3CE73C733C06, 0x3CE83C733C06, 0x3CE93C733C06, 0x3CEA3C733C06, 0x3CEB3C733C06, 0x3C743C06, 0x3CD13C743C06, 0x3CD23C743C06, 0x3CD33C743C06, + 0x3CD43C743C06, 0x3CD53C743C06, 0x3CD63C743C06, 0x3CD73C743C06, 0x3CD83C743C06, 0x3CD93C743C06, 0x3CDA3C743C06, 0x3CDB3C743C06, 0x3CDC3C743C06, 0x3CDD3C743C06, 0x3CDE3C743C06, 0x3CDF3C743C06, 0x3CE03C743C06, 0x3CE13C743C06, 0x3CE23C743C06, + 0x3CE33C743C06, 0x3CE43C743C06, 0x3CE53C743C06, 0x3CE63C743C06, 0x3CE73C743C06, 0x3CE83C743C06, 0x3CE93C743C06, 0x3CEA3C743C06, 0x3CEB3C743C06, 0x3C753C06, 0x3CD13C753C06, 0x3CD23C753C06, 0x3CD33C753C06, 0x3CD43C753C06, 0x3CD53C753C06, + 0x3CD63C753C06, 0x3CD73C753C06, 0x3CD83C753C06, 0x3CD93C753C06, 0x3CDA3C753C06, 0x3CDB3C753C06, 0x3CDC3C753C06, 0x3CDD3C753C06, 0x3CDE3C753C06, 0x3CDF3C753C06, 0x3CE03C753C06, 0x3CE13C753C06, 0x3CE23C753C06, 0x3CE33C753C06, 0x3CE43C753C06, + 0x3CE53C753C06, 0x3CE63C753C06, 0x3CE73C753C06, 0x3CE83C753C06, 0x3CE93C753C06, 0x3CEA3C753C06, 0x3CEB3C753C06, 0x3C763C06, 0x3CD13C763C06, 0x3CD23C763C06, 0x3CD33C763C06, 0x3CD43C763C06, 0x3CD53C763C06, 0x3CD63C763C06, 0x3CD73C763C06, + 0x3CD83C763C06, 0x3CD93C763C06, 0x3CDA3C763C06, 0x3CDB3C763C06, 0x3CDC3C763C06, 0x3CDD3C763C06, 0x3CDE3C763C06, 0x3CDF3C763C06, 0x3CE03C763C06, 0x3CE13C763C06, 0x3CE23C763C06, 0x3CE33C763C06, 0x3CE43C763C06, 0x3CE53C763C06, 0x3CE63C763C06, + 0x3CE73C763C06, 0x3CE83C763C06, 0x3CE93C763C06, 0x3CEA3C763C06, 0x3CEB3C763C06, 0x3C773C06, 0x3CD13C773C06, 0x3CD23C773C06, 0x3CD33C773C06, 0x3CD43C773C06, 0x3CD53C773C06, 0x3CD63C773C06, 0x3CD73C773C06, 0x3CD83C773C06, 0x3CD93C773C06, + 0x3CDA3C773C06, 0x3CDB3C773C06, 0x3CDC3C773C06, 0x3CDD3C773C06, 0x3CDE3C773C06, 0x3CDF3C773C06, 0x3CE03C773C06, 0x3CE13C773C06, 0x3CE23C773C06, 0x3CE33C773C06, 0x3CE43C773C06, 0x3CE53C773C06, 0x3CE63C773C06, 0x3CE73C773C06, 0x3CE83C773C06, + 0x3CE93C773C06, 0x3CEA3C773C06, 0x3CEB3C773C06, 0x3C783C06, 0x3CD13C783C06, 0x3CD23C783C06, 0x3CD33C783C06, 0x3CD43C783C06, 0x3CD53C783C06, 0x3CD63C783C06, 0x3CD73C783C06, 0x3CD83C783C06, 0x3CD93C783C06, 0x3CDA3C783C06, 0x3CDB3C783C06, + 0x3CDC3C783C06, 0x3CDD3C783C06, 0x3CDE3C783C06, 0x3CDF3C783C06, 0x3CE03C783C06, 0x3CE13C783C06, 0x3CE23C783C06, 0x3CE33C783C06, 0x3CE43C783C06, 0x3CE53C783C06, 0x3CE63C783C06, 0x3CE73C783C06, 0x3CE83C783C06, 0x3CE93C783C06, 0x3CEA3C783C06, + 0x3CEB3C783C06, 0x3C793C06, 0x3CD13C793C06, 0x3CD23C793C06, 0x3CD33C793C06, 0x3CD43C793C06, 0x3CD53C793C06, 0x3CD63C793C06, 0x3CD73C793C06, 0x3CD83C793C06, 0x3CD93C793C06, 0x3CDA3C793C06, 0x3CDB3C793C06, 0x3CDC3C793C06, 0x3CDD3C793C06, + 0x3CDE3C793C06, 0x3CDF3C793C06, 0x3CE03C793C06, 0x3CE13C793C06, 0x3CE23C793C06, 0x3CE33C793C06, 0x3CE43C793C06, 0x3CE53C793C06, 0x3CE63C793C06, 0x3CE73C793C06, 0x3CE83C793C06, 0x3CE93C793C06, 0x3CEA3C793C06, 0x3CEB3C793C06, 0x3C7A3C06, + 0x3CD13C7A3C06, 0x3CD23C7A3C06, 0x3CD33C7A3C06, 0x3CD43C7A3C06, 0x3CD53C7A3C06, 0x3CD63C7A3C06, 0x3CD73C7A3C06, 0x3CD83C7A3C06, 0x3CD93C7A3C06, 0x3CDA3C7A3C06, 0x3CDB3C7A3C06, 0x3CDC3C7A3C06, 0x3CDD3C7A3C06, 0x3CDE3C7A3C06, 0x3CDF3C7A3C06, + 0x3CE03C7A3C06, 0x3CE13C7A3C06, 0x3CE23C7A3C06, 0x3CE33C7A3C06, 0x3CE43C7A3C06, 0x3CE53C7A3C06, 0x3CE63C7A3C06, 0x3CE73C7A3C06, 0x3CE83C7A3C06, 0x3CE93C7A3C06, 0x3CEA3C7A3C06, 0x3CEB3C7A3C06, 0x3C7B3C06, 0x3CD13C7B3C06, 0x3CD23C7B3C06, + 0x3CD33C7B3C06, 0x3CD43C7B3C06, 0x3CD53C7B3C06, 0x3CD63C7B3C06, 0x3CD73C7B3C06, 0x3CD83C7B3C06, 0x3CD93C7B3C06, 0x3CDA3C7B3C06, 0x3CDB3C7B3C06, 0x3CDC3C7B3C06, 0x3CDD3C7B3C06, 0x3CDE3C7B3C06, 0x3CDF3C7B3C06, 0x3CE03C7B3C06, 0x3CE13C7B3C06, + 0x3CE23C7B3C06, 0x3CE33C7B3C06, 0x3CE43C7B3C06, 0x3CE53C7B3C06, 0x3CE63C7B3C06, 0x3CE73C7B3C06, 0x3CE83C7B3C06, 0x3CE93C7B3C06, 0x3CEA3C7B3C06, 0x3CEB3C7B3C06, 0x3C7C3C06, 0x3CD13C7C3C06, 0x3CD23C7C3C06, 0x3CD33C7C3C06, 0x3CD43C7C3C06, + 0x3CD53C7C3C06, 0x3CD63C7C3C06, 0x3CD73C7C3C06, 0x3CD83C7C3C06, 0x3CD93C7C3C06, 0x3CDA3C7C3C06, 0x3CDB3C7C3C06, 0x3CDC3C7C3C06, 0x3CDD3C7C3C06, 0x3CDE3C7C3C06, 0x3CDF3C7C3C06, 0x3CE03C7C3C06, 0x3CE13C7C3C06, 0x3CE23C7C3C06, 0x3CE33C7C3C06, + 0x3CE43C7C3C06, 0x3CE53C7C3C06, 0x3CE63C7C3C06, 0x3CE73C7C3C06, 0x3CE83C7C3C06, 0x3CE93C7C3C06, 0x3CEA3C7C3C06, 0x3CEB3C7C3C06, 0x3C7D3C06, 0x3CD13C7D3C06, 0x3CD23C7D3C06, 0x3CD33C7D3C06, 0x3CD43C7D3C06, 0x3CD53C7D3C06, 0x3CD63C7D3C06, + 0x3CD73C7D3C06, 0x3CD83C7D3C06, 0x3CD93C7D3C06, 0x3CDA3C7D3C06, 0x3CDB3C7D3C06, 0x3CDC3C7D3C06, 0x3CDD3C7D3C06, 0x3CDE3C7D3C06, 0x3CDF3C7D3C06, 0x3CE03C7D3C06, 0x3CE13C7D3C06, 0x3CE23C7D3C06, 0x3CE33C7D3C06, 0x3CE43C7D3C06, 0x3CE53C7D3C06, + 0x3CE63C7D3C06, 0x3CE73C7D3C06, 0x3CE83C7D3C06, 0x3CE93C7D3C06, 0x3CEA3C7D3C06, 0x3CEB3C7D3C06, 0x3C7E3C06, 0x3CD13C7E3C06, 0x3CD23C7E3C06, 0x3CD33C7E3C06, 0x3CD43C7E3C06, 0x3CD53C7E3C06, 0x3CD63C7E3C06, 0x3CD73C7E3C06, 0x3CD83C7E3C06, + 0x3CD93C7E3C06, 0x3CDA3C7E3C06, 0x3CDB3C7E3C06, 0x3CDC3C7E3C06, 0x3CDD3C7E3C06, 0x3CDE3C7E3C06, 0x3CDF3C7E3C06, 0x3CE03C7E3C06, 0x3CE13C7E3C06, 0x3CE23C7E3C06, 0x3CE33C7E3C06, 0x3CE43C7E3C06, 0x3CE53C7E3C06, 0x3CE63C7E3C06, 0x3CE73C7E3C06, + 0x3CE83C7E3C06, 0x3CE93C7E3C06, 0x3CEA3C7E3C06, 0x3CEB3C7E3C06, 0x3C7F3C06, 0x3CD13C7F3C06, 0x3CD23C7F3C06, 0x3CD33C7F3C06, 0x3CD43C7F3C06, 0x3CD53C7F3C06, 0x3CD63C7F3C06, 0x3CD73C7F3C06, 0x3CD83C7F3C06, 0x3CD93C7F3C06, 0x3CDA3C7F3C06, + 0x3CDB3C7F3C06, 0x3CDC3C7F3C06, 0x3CDD3C7F3C06, 0x3CDE3C7F3C06, 0x3CDF3C7F3C06, 0x3CE03C7F3C06, 0x3CE13C7F3C06, 0x3CE23C7F3C06, 0x3CE33C7F3C06, 0x3CE43C7F3C06, 0x3CE53C7F3C06, 0x3CE63C7F3C06, 0x3CE73C7F3C06, 0x3CE83C7F3C06, 0x3CE93C7F3C06, + 0x3CEA3C7F3C06, 0x3CEB3C7F3C06, 0x3C803C06, 0x3CD13C803C06, 0x3CD23C803C06, 0x3CD33C803C06, 0x3CD43C803C06, 0x3CD53C803C06, 0x3CD63C803C06, 0x3CD73C803C06, 0x3CD83C803C06, 0x3CD93C803C06, 0x3CDA3C803C06, 0x3CDB3C803C06, 0x3CDC3C803C06, + 0x3CDD3C803C06, 0x3CDE3C803C06, 0x3CDF3C803C06, 0x3CE03C803C06, 0x3CE13C803C06, 0x3CE23C803C06, 0x3CE33C803C06, 0x3CE43C803C06, 0x3CE53C803C06, 0x3CE63C803C06, 0x3CE73C803C06, 0x3CE83C803C06, 0x3CE93C803C06, 0x3CEA3C803C06, 0x3CEB3C803C06, + 0x3C813C06, 0x3CD13C813C06, 0x3CD23C813C06, 0x3CD33C813C06, 0x3CD43C813C06, 0x3CD53C813C06, 0x3CD63C813C06, 0x3CD73C813C06, 0x3CD83C813C06, 0x3CD93C813C06, 0x3CDA3C813C06, 0x3CDB3C813C06, 0x3CDC3C813C06, 0x3CDD3C813C06, 0x3CDE3C813C06, + 0x3CDF3C813C06, 0x3CE03C813C06, 0x3CE13C813C06, 0x3CE23C813C06, 0x3CE33C813C06, 0x3CE43C813C06, 0x3CE53C813C06, 0x3CE63C813C06, 0x3CE73C813C06, 0x3CE83C813C06, 0x3CE93C813C06, 0x3CEA3C813C06, 0x3CEB3C813C06, 0x3C823C06, 0x3CD13C823C06, + 0x3CD23C823C06, 0x3CD33C823C06, 0x3CD43C823C06, 0x3CD53C823C06, 0x3CD63C823C06, 0x3CD73C823C06, 0x3CD83C823C06, 0x3CD93C823C06, 0x3CDA3C823C06, 0x3CDB3C823C06, 0x3CDC3C823C06, 0x3CDD3C823C06, 0x3CDE3C823C06, 0x3CDF3C823C06, 0x3CE03C823C06, + 0x3CE13C823C06, 0x3CE23C823C06, 0x3CE33C823C06, 0x3CE43C823C06, 0x3CE53C823C06, 0x3CE63C823C06, 0x3CE73C823C06, 0x3CE83C823C06, 0x3CE93C823C06, 0x3CEA3C823C06, 0x3CEB3C823C06, 0x3C833C06, 0x3CD13C833C06, 0x3CD23C833C06, 0x3CD33C833C06, + 0x3CD43C833C06, 0x3CD53C833C06, 0x3CD63C833C06, 0x3CD73C833C06, 0x3CD83C833C06, 0x3CD93C833C06, 0x3CDA3C833C06, 0x3CDB3C833C06, 0x3CDC3C833C06, 0x3CDD3C833C06, 0x3CDE3C833C06, 0x3CDF3C833C06, 0x3CE03C833C06, 0x3CE13C833C06, 0x3CE23C833C06, + 0x3CE33C833C06, 0x3CE43C833C06, 0x3CE53C833C06, 0x3CE63C833C06, 0x3CE73C833C06, 0x3CE83C833C06, 0x3CE93C833C06, 0x3CEA3C833C06, 0x3CEB3C833C06, 0x3C843C06, 0x3CD13C843C06, 0x3CD23C843C06, 0x3CD33C843C06, 0x3CD43C843C06, 0x3CD53C843C06, + 0x3CD63C843C06, 0x3CD73C843C06, 0x3CD83C843C06, 0x3CD93C843C06, 0x3CDA3C843C06, 0x3CDB3C843C06, 0x3CDC3C843C06, 0x3CDD3C843C06, 0x3CDE3C843C06, 0x3CDF3C843C06, 0x3CE03C843C06, 0x3CE13C843C06, 0x3CE23C843C06, 0x3CE33C843C06, 0x3CE43C843C06, + 0x3CE53C843C06, 0x3CE63C843C06, 0x3CE73C843C06, 0x3CE83C843C06, 0x3CE93C843C06, 0x3CEA3C843C06, 0x3CEB3C843C06, 0x3C853C06, 0x3CD13C853C06, 0x3CD23C853C06, 0x3CD33C853C06, 0x3CD43C853C06, 0x3CD53C853C06, 0x3CD63C853C06, 0x3CD73C853C06, + 0x3CD83C853C06, 0x3CD93C853C06, 0x3CDA3C853C06, 0x3CDB3C853C06, 0x3CDC3C853C06, 0x3CDD3C853C06, 0x3CDE3C853C06, 0x3CDF3C853C06, 0x3CE03C853C06, 0x3CE13C853C06, 0x3CE23C853C06, 0x3CE33C853C06, 0x3CE43C853C06, 0x3CE53C853C06, 0x3CE63C853C06, + 0x3CE73C853C06, 0x3CE83C853C06, 0x3CE93C853C06, 0x3CEA3C853C06, 0x3CEB3C853C06, 0x3C863C06, 0x3CD13C863C06, 0x3CD23C863C06, 0x3CD33C863C06, 0x3CD43C863C06, 0x3CD53C863C06, 0x3CD63C863C06, 0x3CD73C863C06, 0x3CD83C863C06, 0x3CD93C863C06, + 0x3CDA3C863C06, 0x3CDB3C863C06, 0x3CDC3C863C06, 0x3CDD3C863C06, 0x3CDE3C863C06, 0x3CDF3C863C06, 0x3CE03C863C06, 0x3CE13C863C06, 0x3CE23C863C06, 0x3CE33C863C06, 0x3CE43C863C06, 0x3CE53C863C06, 0x3CE63C863C06, 0x3CE73C863C06, 0x3CE83C863C06, + 0x3CE93C863C06, 0x3CEA3C863C06, 0x3CEB3C863C06, 0x3C873C06, 0x3CD13C873C06, 0x3CD23C873C06, 0x3CD33C873C06, 0x3CD43C873C06, 0x3CD53C873C06, 0x3CD63C873C06, 0x3CD73C873C06, 0x3CD83C873C06, 0x3CD93C873C06, 0x3CDA3C873C06, 0x3CDB3C873C06, + 0x3CDC3C873C06, 0x3CDD3C873C06, 0x3CDE3C873C06, 0x3CDF3C873C06, 0x3CE03C873C06, 0x3CE13C873C06, 0x3CE23C873C06, 0x3CE33C873C06, 0x3CE43C873C06, 0x3CE53C873C06, 0x3CE63C873C06, 0x3CE73C873C06, 0x3CE83C873C06, 0x3CE93C873C06, 0x3CEA3C873C06, + 0x3CEB3C873C06, 0x3C733C07, 0x3CD13C733C07, 0x3CD23C733C07, 0x3CD33C733C07, 0x3CD43C733C07, 0x3CD53C733C07, 0x3CD63C733C07, 0x3CD73C733C07, 0x3CD83C733C07, 0x3CD93C733C07, 0x3CDA3C733C07, 0x3CDB3C733C07, 0x3CDC3C733C07, 0x3CDD3C733C07, + 0x3CDE3C733C07, 0x3CDF3C733C07, 0x3CE03C733C07, 0x3CE13C733C07, 0x3CE23C733C07, 0x3CE33C733C07, 0x3CE43C733C07, 0x3CE53C733C07, 0x3CE63C733C07, 0x3CE73C733C07, 0x3CE83C733C07, 0x3CE93C733C07, 0x3CEA3C733C07, 0x3CEB3C733C07, 0x3C743C07, + 0x3CD13C743C07, 0x3CD23C743C07, 0x3CD33C743C07, 0x3CD43C743C07, 0x3CD53C743C07, 0x3CD63C743C07, 0x3CD73C743C07, 0x3CD83C743C07, 0x3CD93C743C07, 0x3CDA3C743C07, 0x3CDB3C743C07, 0x3CDC3C743C07, 0x3CDD3C743C07, 0x3CDE3C743C07, 0x3CDF3C743C07, + 0x3CE03C743C07, 0x3CE13C743C07, 0x3CE23C743C07, 0x3CE33C743C07, 0x3CE43C743C07, 0x3CE53C743C07, 0x3CE63C743C07, 0x3CE73C743C07, 0x3CE83C743C07, 0x3CE93C743C07, 0x3CEA3C743C07, 0x3CEB3C743C07, 0x3C753C07, 0x3CD13C753C07, 0x3CD23C753C07, + 0x3CD33C753C07, 0x3CD43C753C07, 0x3CD53C753C07, 0x3CD63C753C07, 0x3CD73C753C07, 0x3CD83C753C07, 0x3CD93C753C07, 0x3CDA3C753C07, 0x3CDB3C753C07, 0x3CDC3C753C07, 0x3CDD3C753C07, 0x3CDE3C753C07, 0x3CDF3C753C07, 0x3CE03C753C07, 0x3CE13C753C07, + 0x3CE23C753C07, 0x3CE33C753C07, 0x3CE43C753C07, 0x3CE53C753C07, 0x3CE63C753C07, 0x3CE73C753C07, 0x3CE83C753C07, 0x3CE93C753C07, 0x3CEA3C753C07, 0x3CEB3C753C07, 0x3C763C07, 0x3CD13C763C07, 0x3CD23C763C07, 0x3CD33C763C07, 0x3CD43C763C07, + 0x3CD53C763C07, 0x3CD63C763C07, 0x3CD73C763C07, 0x3CD83C763C07, 0x3CD93C763C07, 0x3CDA3C763C07, 0x3CDB3C763C07, 0x3CDC3C763C07, 0x3CDD3C763C07, 0x3CDE3C763C07, 0x3CDF3C763C07, 0x3CE03C763C07, 0x3CE13C763C07, 0x3CE23C763C07, 0x3CE33C763C07, + 0x3CE43C763C07, 0x3CE53C763C07, 0x3CE63C763C07, 0x3CE73C763C07, 0x3CE83C763C07, 0x3CE93C763C07, 0x3CEA3C763C07, 0x3CEB3C763C07, 0x3C773C07, 0x3CD13C773C07, 0x3CD23C773C07, 0x3CD33C773C07, 0x3CD43C773C07, 0x3CD53C773C07, 0x3CD63C773C07, + 0x3CD73C773C07, 0x3CD83C773C07, 0x3CD93C773C07, 0x3CDA3C773C07, 0x3CDB3C773C07, 0x3CDC3C773C07, 0x3CDD3C773C07, 0x3CDE3C773C07, 0x3CDF3C773C07, 0x3CE03C773C07, 0x3CE13C773C07, 0x3CE23C773C07, 0x3CE33C773C07, 0x3CE43C773C07, 0x3CE53C773C07, + 0x3CE63C773C07, 0x3CE73C773C07, 0x3CE83C773C07, 0x3CE93C773C07, 0x3CEA3C773C07, 0x3CEB3C773C07, 0x3C783C07, 0x3CD13C783C07, 0x3CD23C783C07, 0x3CD33C783C07, 0x3CD43C783C07, 0x3CD53C783C07, 0x3CD63C783C07, 0x3CD73C783C07, 0x3CD83C783C07, + 0x3CD93C783C07, 0x3CDA3C783C07, 0x3CDB3C783C07, 0x3CDC3C783C07, 0x3CDD3C783C07, 0x3CDE3C783C07, 0x3CDF3C783C07, 0x3CE03C783C07, 0x3CE13C783C07, 0x3CE23C783C07, 0x3CE33C783C07, 0x3CE43C783C07, 0x3CE53C783C07, 0x3CE63C783C07, 0x3CE73C783C07, + 0x3CE83C783C07, 0x3CE93C783C07, 0x3CEA3C783C07, 0x3CEB3C783C07, 0x3C793C07, 0x3CD13C793C07, 0x3CD23C793C07, 0x3CD33C793C07, 0x3CD43C793C07, 0x3CD53C793C07, 0x3CD63C793C07, 0x3CD73C793C07, 0x3CD83C793C07, 0x3CD93C793C07, 0x3CDA3C793C07, + 0x3CDB3C793C07, 0x3CDC3C793C07, 0x3CDD3C793C07, 0x3CDE3C793C07, 0x3CDF3C793C07, 0x3CE03C793C07, 0x3CE13C793C07, 0x3CE23C793C07, 0x3CE33C793C07, 0x3CE43C793C07, 0x3CE53C793C07, 0x3CE63C793C07, 0x3CE73C793C07, 0x3CE83C793C07, 0x3CE93C793C07, + 0x3CEA3C793C07, 0x3CEB3C793C07, 0x3C7A3C07, 0x3CD13C7A3C07, 0x3CD23C7A3C07, 0x3CD33C7A3C07, 0x3CD43C7A3C07, 0x3CD53C7A3C07, 0x3CD63C7A3C07, 0x3CD73C7A3C07, 0x3CD83C7A3C07, 0x3CD93C7A3C07, 0x3CDA3C7A3C07, 0x3CDB3C7A3C07, 0x3CDC3C7A3C07, + 0x3CDD3C7A3C07, 0x3CDE3C7A3C07, 0x3CDF3C7A3C07, 0x3CE03C7A3C07, 0x3CE13C7A3C07, 0x3CE23C7A3C07, 0x3CE33C7A3C07, 0x3CE43C7A3C07, 0x3CE53C7A3C07, 0x3CE63C7A3C07, 0x3CE73C7A3C07, 0x3CE83C7A3C07, 0x3CE93C7A3C07, 0x3CEA3C7A3C07, 0x3CEB3C7A3C07, + 0x3C7B3C07, 0x3CD13C7B3C07, 0x3CD23C7B3C07, 0x3CD33C7B3C07, 0x3CD43C7B3C07, 0x3CD53C7B3C07, 0x3CD63C7B3C07, 0x3CD73C7B3C07, 0x3CD83C7B3C07, 0x3CD93C7B3C07, 0x3CDA3C7B3C07, 0x3CDB3C7B3C07, 0x3CDC3C7B3C07, 0x3CDD3C7B3C07, 0x3CDE3C7B3C07, + 0x3CDF3C7B3C07, 0x3CE03C7B3C07, 0x3CE13C7B3C07, 0x3CE23C7B3C07, 0x3CE33C7B3C07, 0x3CE43C7B3C07, 0x3CE53C7B3C07, 0x3CE63C7B3C07, 0x3CE73C7B3C07, 0x3CE83C7B3C07, 0x3CE93C7B3C07, 0x3CEA3C7B3C07, 0x3CEB3C7B3C07, 0x3C7C3C07, 0x3CD13C7C3C07, + 0x3CD23C7C3C07, 0x3CD33C7C3C07, 0x3CD43C7C3C07, 0x3CD53C7C3C07, 0x3CD63C7C3C07, 0x3CD73C7C3C07, 0x3CD83C7C3C07, 0x3CD93C7C3C07, 0x3CDA3C7C3C07, 0x3CDB3C7C3C07, 0x3CDC3C7C3C07, 0x3CDD3C7C3C07, 0x3CDE3C7C3C07, 0x3CDF3C7C3C07, 0x3CE03C7C3C07, + 0x3CE13C7C3C07, 0x3CE23C7C3C07, 0x3CE33C7C3C07, 0x3CE43C7C3C07, 0x3CE53C7C3C07, 0x3CE63C7C3C07, 0x3CE73C7C3C07, 0x3CE83C7C3C07, 0x3CE93C7C3C07, 0x3CEA3C7C3C07, 0x3CEB3C7C3C07, 0x3C7D3C07, 0x3CD13C7D3C07, 0x3CD23C7D3C07, 0x3CD33C7D3C07, + 0x3CD43C7D3C07, 0x3CD53C7D3C07, 0x3CD63C7D3C07, 0x3CD73C7D3C07, 0x3CD83C7D3C07, 0x3CD93C7D3C07, 0x3CDA3C7D3C07, 0x3CDB3C7D3C07, 0x3CDC3C7D3C07, 0x3CDD3C7D3C07, 0x3CDE3C7D3C07, 0x3CDF3C7D3C07, 0x3CE03C7D3C07, 0x3CE13C7D3C07, 0x3CE23C7D3C07, + 0x3CE33C7D3C07, 0x3CE43C7D3C07, 0x3CE53C7D3C07, 0x3CE63C7D3C07, 0x3CE73C7D3C07, 0x3CE83C7D3C07, 0x3CE93C7D3C07, 0x3CEA3C7D3C07, 0x3CEB3C7D3C07, 0x3C7E3C07, 0x3CD13C7E3C07, 0x3CD23C7E3C07, 0x3CD33C7E3C07, 0x3CD43C7E3C07, 0x3CD53C7E3C07, + 0x3CD63C7E3C07, 0x3CD73C7E3C07, 0x3CD83C7E3C07, 0x3CD93C7E3C07, 0x3CDA3C7E3C07, 0x3CDB3C7E3C07, 0x3CDC3C7E3C07, 0x3CDD3C7E3C07, 0x3CDE3C7E3C07, 0x3CDF3C7E3C07, 0x3CE03C7E3C07, 0x3CE13C7E3C07, 0x3CE23C7E3C07, 0x3CE33C7E3C07, 0x3CE43C7E3C07, + 0x3CE53C7E3C07, 0x3CE63C7E3C07, 0x3CE73C7E3C07, 0x3CE83C7E3C07, 0x3CE93C7E3C07, 0x3CEA3C7E3C07, 0x3CEB3C7E3C07, 0x3C7F3C07, 0x3CD13C7F3C07, 0x3CD23C7F3C07, 0x3CD33C7F3C07, 0x3CD43C7F3C07, 0x3CD53C7F3C07, 0x3CD63C7F3C07, 0x3CD73C7F3C07, + 0x3CD83C7F3C07, 0x3CD93C7F3C07, 0x3CDA3C7F3C07, 0x3CDB3C7F3C07, 0x3CDC3C7F3C07, 0x3CDD3C7F3C07, 0x3CDE3C7F3C07, 0x3CDF3C7F3C07, 0x3CE03C7F3C07, 0x3CE13C7F3C07, 0x3CE23C7F3C07, 0x3CE33C7F3C07, 0x3CE43C7F3C07, 0x3CE53C7F3C07, 0x3CE63C7F3C07, + 0x3CE73C7F3C07, 0x3CE83C7F3C07, 0x3CE93C7F3C07, 0x3CEA3C7F3C07, 0x3CEB3C7F3C07, 0x3C803C07, 0x3CD13C803C07, 0x3CD23C803C07, 0x3CD33C803C07, 0x3CD43C803C07, 0x3CD53C803C07, 0x3CD63C803C07, 0x3CD73C803C07, 0x3CD83C803C07, 0x3CD93C803C07, + 0x3CDA3C803C07, 0x3CDB3C803C07, 0x3CDC3C803C07, 0x3CDD3C803C07, 0x3CDE3C803C07, 0x3CDF3C803C07, 0x3CE03C803C07, 0x3CE13C803C07, 0x3CE23C803C07, 0x3CE33C803C07, 0x3CE43C803C07, 0x3CE53C803C07, 0x3CE63C803C07, 0x3CE73C803C07, 0x3CE83C803C07, + 0x3CE93C803C07, 0x3CEA3C803C07, 0x3CEB3C803C07, 0x3C813C07, 0x3CD13C813C07, 0x3CD23C813C07, 0x3CD33C813C07, 0x3CD43C813C07, 0x3CD53C813C07, 0x3CD63C813C07, 0x3CD73C813C07, 0x3CD83C813C07, 0x3CD93C813C07, 0x3CDA3C813C07, 0x3CDB3C813C07, + 0x3CDC3C813C07, 0x3CDD3C813C07, 0x3CDE3C813C07, 0x3CDF3C813C07, 0x3CE03C813C07, 0x3CE13C813C07, 0x3CE23C813C07, 0x3CE33C813C07, 0x3CE43C813C07, 0x3CE53C813C07, 0x3CE63C813C07, 0x3CE73C813C07, 0x3CE83C813C07, 0x3CE93C813C07, 0x3CEA3C813C07, + 0x3CEB3C813C07, 0x3C823C07, 0x3CD13C823C07, 0x3CD23C823C07, 0x3CD33C823C07, 0x3CD43C823C07, 0x3CD53C823C07, 0x3CD63C823C07, 0x3CD73C823C07, 0x3CD83C823C07, 0x3CD93C823C07, 0x3CDA3C823C07, 0x3CDB3C823C07, 0x3CDC3C823C07, 0x3CDD3C823C07, + 0x3CDE3C823C07, 0x3CDF3C823C07, 0x3CE03C823C07, 0x3CE13C823C07, 0x3CE23C823C07, 0x3CE33C823C07, 0x3CE43C823C07, 0x3CE53C823C07, 0x3CE63C823C07, 0x3CE73C823C07, 0x3CE83C823C07, 0x3CE93C823C07, 0x3CEA3C823C07, 0x3CEB3C823C07, 0x3C833C07, + 0x3CD13C833C07, 0x3CD23C833C07, 0x3CD33C833C07, 0x3CD43C833C07, 0x3CD53C833C07, 0x3CD63C833C07, 0x3CD73C833C07, 0x3CD83C833C07, 0x3CD93C833C07, 0x3CDA3C833C07, 0x3CDB3C833C07, 0x3CDC3C833C07, 0x3CDD3C833C07, 0x3CDE3C833C07, 0x3CDF3C833C07, + 0x3CE03C833C07, 0x3CE13C833C07, 0x3CE23C833C07, 0x3CE33C833C07, 0x3CE43C833C07, 0x3CE53C833C07, 0x3CE63C833C07, 0x3CE73C833C07, 0x3CE83C833C07, 0x3CE93C833C07, 0x3CEA3C833C07, 0x3CEB3C833C07, 0x3C843C07, 0x3CD13C843C07, 0x3CD23C843C07, + 0x3CD33C843C07, 0x3CD43C843C07, 0x3CD53C843C07, 0x3CD63C843C07, 0x3CD73C843C07, 0x3CD83C843C07, 0x3CD93C843C07, 0x3CDA3C843C07, 0x3CDB3C843C07, 0x3CDC3C843C07, 0x3CDD3C843C07, 0x3CDE3C843C07, 0x3CDF3C843C07, 0x3CE03C843C07, 0x3CE13C843C07, + 0x3CE23C843C07, 0x3CE33C843C07, 0x3CE43C843C07, 0x3CE53C843C07, 0x3CE63C843C07, 0x3CE73C843C07, 0x3CE83C843C07, 0x3CE93C843C07, 0x3CEA3C843C07, 0x3CEB3C843C07, 0x3C853C07, 0x3CD13C853C07, 0x3CD23C853C07, 0x3CD33C853C07, 0x3CD43C853C07, + 0x3CD53C853C07, 0x3CD63C853C07, 0x3CD73C853C07, 0x3CD83C853C07, 0x3CD93C853C07, 0x3CDA3C853C07, 0x3CDB3C853C07, 0x3CDC3C853C07, 0x3CDD3C853C07, 0x3CDE3C853C07, 0x3CDF3C853C07, 0x3CE03C853C07, 0x3CE13C853C07, 0x3CE23C853C07, 0x3CE33C853C07, + 0x3CE43C853C07, 0x3CE53C853C07, 0x3CE63C853C07, 0x3CE73C853C07, 0x3CE83C853C07, 0x3CE93C853C07, 0x3CEA3C853C07, 0x3CEB3C853C07, 0x3C863C07, 0x3CD13C863C07, 0x3CD23C863C07, 0x3CD33C863C07, 0x3CD43C863C07, 0x3CD53C863C07, 0x3CD63C863C07, + 0x3CD73C863C07, 0x3CD83C863C07, 0x3CD93C863C07, 0x3CDA3C863C07, 0x3CDB3C863C07, 0x3CDC3C863C07, 0x3CDD3C863C07, 0x3CDE3C863C07, 0x3CDF3C863C07, 0x3CE03C863C07, 0x3CE13C863C07, 0x3CE23C863C07, 0x3CE33C863C07, 0x3CE43C863C07, 0x3CE53C863C07, + 0x3CE63C863C07, 0x3CE73C863C07, 0x3CE83C863C07, 0x3CE93C863C07, 0x3CEA3C863C07, 0x3CEB3C863C07, 0x3C873C07, 0x3CD13C873C07, 0x3CD23C873C07, 0x3CD33C873C07, 0x3CD43C873C07, 0x3CD53C873C07, 0x3CD63C873C07, 0x3CD73C873C07, 0x3CD83C873C07, + 0x3CD93C873C07, 0x3CDA3C873C07, 0x3CDB3C873C07, 0x3CDC3C873C07, 0x3CDD3C873C07, 0x3CDE3C873C07, 0x3CDF3C873C07, 0x3CE03C873C07, 0x3CE13C873C07, 0x3CE23C873C07, 0x3CE33C873C07, 0x3CE43C873C07, 0x3CE53C873C07, 0x3CE63C873C07, 0x3CE73C873C07, + 0x3CE83C873C07, 0x3CE93C873C07, 0x3CEA3C873C07, 0x3CEB3C873C07, 0x3C733C08, 0x3CD13C733C08, 0x3CD23C733C08, 0x3CD33C733C08, 0x3CD43C733C08, 0x3CD53C733C08, 0x3CD63C733C08, 0x3CD73C733C08, 0x3CD83C733C08, 0x3CD93C733C08, 0x3CDA3C733C08, + 0x3CDB3C733C08, 0x3CBA, 0x3CBB, 0x3CBC, 0x3CBD, 0x3CBE, 0x3CBF, 0x3CC0, 0x3CC1, 0x3CC2, 0x3CC3, 0x3CC4, 0x3CC5, 0x3CC6, 0x3CC7, + 0x3CC8, 0x3CC9, 0x3CCA, 0x3CCB, 0x3CCC, 0x3CCD, 0x3CCE, 0x3CCF, 0x3CD0, 0xD7C7FBC1, 0xD7C8FBC1, 0xD7C9FBC1, 0xD7CAFBC1, 0x3D29, 0x3D2A, + 0x3D2B, 0x3D2C, 0x3D2D, 0x3D2E, 0x3D2F, 0x3D30, 0x3D31, 0x3D32, 0x3D33, 0x3D34, 0x3D35, 0x3D36, 0x3D37, 0x3D38, 0x3D39, + 0x3D3A, 0x3D3B, 0x3D3C, 0x3D3D, 0x3D3E, 0x3D3F, 0x3D40, 0x3D41, 0x3D42, 0x3D43, 0x3D44, 0x3D45, 0x3D46, 0x3D47, 0x3D48, + 0x3D49, 0x3D4A, 0x3D4B, 0x3D4C, 0x3D4D, 0x3D4E, 0x3D4F, 0x3D50, 0x3D51, 0x3D52, 0x3D53, 0x3D54, 0x3D55, 0x3D56, 0x3D57, + 0x3D58, 0x3D59, 0xD7FCFBC1, 0xD7FDFBC1, 0xD7FEFBC1, 0xD7FFFBC1, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, + 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xE000FBC1, + 0xE001FBC1, 0xE002FBC1, 0xE003FBC1, 0xE004FBC1, 0xE005FBC1, 0xE006FBC1, 0xE007FBC1, 0xE008FBC1, 0xE009FBC1, 0xE00AFBC1, 0xE00BFBC1, 0xE00CFBC1, 0xE00DFBC1, 0xE00EFBC1, 0xE00FFBC1, + 0xE010FBC1, 0xE011FBC1, 0xE012FBC1, 0xE013FBC1, 0xE014FBC1, 0xE015FBC1, 0xE016FBC1, 0xE017FBC1, 0xE018FBC1, 0xE019FBC1, 0xE01AFBC1, 0xE01BFBC1, 0xE01CFBC1, 0xE01DFBC1, 0xE01EFBC1, + 0xE01FFBC1, 0xE020FBC1, 0xE021FBC1, 0xE022FBC1, 0xE023FBC1, 0xE024FBC1, 0xE025FBC1, 0xE026FBC1, 0xE027FBC1, 0xE028FBC1, 0xE029FBC1, 0xE02AFBC1, 0xE02BFBC1, 0xE02CFBC1, 0xE02DFBC1, + 0xE02EFBC1, 0xE02FFBC1, 0xE030FBC1, 0xE031FBC1, 0xE032FBC1, 0xE033FBC1, 0xE034FBC1, 0xE035FBC1, 0xE036FBC1, 0xE037FBC1, 0xE038FBC1, 0xE039FBC1, 0xE03AFBC1, 0xE03BFBC1, 0xE03CFBC1, + 0xE03DFBC1, 0xE03EFBC1, 0xE03FFBC1, 0xE040FBC1, 0xE041FBC1, 0xE042FBC1, 0xE043FBC1, 0xE044FBC1, 0xE045FBC1, 0xE046FBC1, 0xE047FBC1, 0xE048FBC1, 0xE049FBC1, 0xE04AFBC1, 0xE04BFBC1, + 0xE04CFBC1, 0xE04DFBC1, 0xE04EFBC1, 0xE04FFBC1, 0xE050FBC1, 0xE051FBC1, 0xE052FBC1, 0xE053FBC1, 0xE054FBC1, 0xE055FBC1, 0xE056FBC1, 0xE057FBC1, 0xE058FBC1, 0xE059FBC1, 0xE05AFBC1, + 0xE05BFBC1, 0xE05CFBC1, 0xE05DFBC1, 0xE05EFBC1, 0xE05FFBC1, 0xE060FBC1, 0xE061FBC1, 0xE062FBC1, 0xE063FBC1, 0xE064FBC1, 0xE065FBC1, 0xE066FBC1, 0xE067FBC1, 0xE068FBC1, 0xE069FBC1, + 0xE06AFBC1, 0xE06BFBC1, 0xE06CFBC1, 0xE06DFBC1, 0xE06EFBC1, 0xE06FFBC1, 0xE070FBC1, 0xE071FBC1, 0xE072FBC1, 0xE073FBC1, 0xE074FBC1, 0xE075FBC1, 0xE076FBC1, 0xE077FBC1, 0xE078FBC1, + 0xE079FBC1, 0xE07AFBC1, 0xE07BFBC1, 0xE07CFBC1, 0xE07DFBC1, 0xE07EFBC1, 0xE07FFBC1, 0xE080FBC1, 0xE081FBC1, 0xE082FBC1, 0xE083FBC1, 0xE084FBC1, 0xE085FBC1, 0xE086FBC1, 0xE087FBC1, + 0xE088FBC1, 0xE089FBC1, 0xE08AFBC1, 0xE08BFBC1, 0xE08CFBC1, 0xE08DFBC1, 0xE08EFBC1, 0xE08FFBC1, 0xE090FBC1, 0xE091FBC1, 0xE092FBC1, 0xE093FBC1, 0xE094FBC1, 0xE095FBC1, 0xE096FBC1, + 0xE097FBC1, 0xE098FBC1, 0xE099FBC1, 0xE09AFBC1, 0xE09BFBC1, 0xE09CFBC1, 0xE09DFBC1, 0xE09EFBC1, 0xE09FFBC1, 0xE0A0FBC1, 0xE0A1FBC1, 0xE0A2FBC1, 0xE0A3FBC1, 0xE0A4FBC1, 0xE0A5FBC1, + 0xE0A6FBC1, 0xE0A7FBC1, 0xE0A8FBC1, 0xE0A9FBC1, 0xE0AAFBC1, 0xE0ABFBC1, 0xE0ACFBC1, 0xE0ADFBC1, 0xE0AEFBC1, 0xE0AFFBC1, 0xE0B0FBC1, 0xE0B1FBC1, 0xE0B2FBC1, 0xE0B3FBC1, 0xE0B4FBC1, + 0xE0B5FBC1, 0xE0B6FBC1, 0xE0B7FBC1, 0xE0B8FBC1, 0xE0B9FBC1, 0xE0BAFBC1, 0xE0BBFBC1, 0xE0BCFBC1, 0xE0BDFBC1, 0xE0BEFBC1, 0xE0BFFBC1, 0xE0C0FBC1, 0xE0C1FBC1, 0xE0C2FBC1, 0xE0C3FBC1, + 0xE0C4FBC1, 0xE0C5FBC1, 0xE0C6FBC1, 0xE0C7FBC1, 0xE0C8FBC1, 0xE0C9FBC1, 0xE0CAFBC1, 0xE0CBFBC1, 0xE0CCFBC1, 0xE0CDFBC1, 0xE0CEFBC1, 0xE0CFFBC1, 0xE0D0FBC1, 0xE0D1FBC1, 0xE0D2FBC1, + 0xE0D3FBC1, 0xE0D4FBC1, 0xE0D5FBC1, 0xE0D6FBC1, 0xE0D7FBC1, 0xE0D8FBC1, 0xE0D9FBC1, 0xE0DAFBC1, 0xE0DBFBC1, 0xE0DCFBC1, 0xE0DDFBC1, 0xE0DEFBC1, 0xE0DFFBC1, 0xE0E0FBC1, 0xE0E1FBC1, + 0xE0E2FBC1, 0xE0E3FBC1, 0xE0E4FBC1, 0xE0E5FBC1, 0xE0E6FBC1, 0xE0E7FBC1, 0xE0E8FBC1, 0xE0E9FBC1, 0xE0EAFBC1, 0xE0EBFBC1, 0xE0ECFBC1, 0xE0EDFBC1, 0xE0EEFBC1, 0xE0EFFBC1, 0xE0F0FBC1, + 0xE0F1FBC1, 0xE0F2FBC1, 0xE0F3FBC1, 0xE0F4FBC1, 0xE0F5FBC1, 0xE0F6FBC1, 0xE0F7FBC1, 0xE0F8FBC1, 0xE0F9FBC1, 0xE0FAFBC1, 0xE0FBFBC1, 0xE0FCFBC1, 0xE0FDFBC1, 0xE0FEFBC1, 0xE0FFFBC1, + 0xE100FBC1, 0xE101FBC1, 0xE102FBC1, 0xE103FBC1, 0xE104FBC1, 0xE105FBC1, 0xE106FBC1, 0xE107FBC1, 0xE108FBC1, 0xE109FBC1, 0xE10AFBC1, 0xE10BFBC1, 0xE10CFBC1, 0xE10DFBC1, 0xE10EFBC1, + 0xE10FFBC1, 0xE110FBC1, 0xE111FBC1, 0xE112FBC1, 0xE113FBC1, 0xE114FBC1, 0xE115FBC1, 0xE116FBC1, 0xE117FBC1, 0xE118FBC1, 0xE119FBC1, 0xE11AFBC1, 0xE11BFBC1, 0xE11CFBC1, 0xE11DFBC1, + 0xE11EFBC1, 0xE11FFBC1, 0xE120FBC1, 0xE121FBC1, 0xE122FBC1, 0xE123FBC1, 0xE124FBC1, 0xE125FBC1, 0xE126FBC1, 0xE127FBC1, 0xE128FBC1, 0xE129FBC1, 0xE12AFBC1, 0xE12BFBC1, 0xE12CFBC1, + 0xE12DFBC1, 0xE12EFBC1, 0xE12FFBC1, 0xE130FBC1, 0xE131FBC1, 0xE132FBC1, 0xE133FBC1, 0xE134FBC1, 0xE135FBC1, 0xE136FBC1, 0xE137FBC1, 0xE138FBC1, 0xE139FBC1, 0xE13AFBC1, 0xE13BFBC1, + 0xE13CFBC1, 0xE13DFBC1, 0xE13EFBC1, 0xE13FFBC1, 0xE140FBC1, 0xE141FBC1, 0xE142FBC1, 0xE143FBC1, 0xE144FBC1, 0xE145FBC1, 0xE146FBC1, 0xE147FBC1, 0xE148FBC1, 0xE149FBC1, 0xE14AFBC1, + 0xE14BFBC1, 0xE14CFBC1, 0xE14DFBC1, 0xE14EFBC1, 0xE14FFBC1, 0xE150FBC1, 0xE151FBC1, 0xE152FBC1, 0xE153FBC1, 0xE154FBC1, 0xE155FBC1, 0xE156FBC1, 0xE157FBC1, 0xE158FBC1, 0xE159FBC1, + 0xE15AFBC1, 0xE15BFBC1, 0xE15CFBC1, 0xE15DFBC1, 0xE15EFBC1, 0xE15FFBC1, 0xE160FBC1, 0xE161FBC1, 0xE162FBC1, 0xE163FBC1, 0xE164FBC1, 0xE165FBC1, 0xE166FBC1, 0xE167FBC1, 0xE168FBC1, + 0xE169FBC1, 0xE16AFBC1, 0xE16BFBC1, 0xE16CFBC1, 0xE16DFBC1, 0xE16EFBC1, 0xE16FFBC1, 0xE170FBC1, 0xE171FBC1, 0xE172FBC1, 0xE173FBC1, 0xE174FBC1, 0xE175FBC1, 0xE176FBC1, 0xE177FBC1, + 0xE178FBC1, 0xE179FBC1, 0xE17AFBC1, 0xE17BFBC1, 0xE17CFBC1, 0xE17DFBC1, 0xE17EFBC1, 0xE17FFBC1, 0xE180FBC1, 0xE181FBC1, 0xE182FBC1, 0xE183FBC1, 0xE184FBC1, 0xE185FBC1, 0xE186FBC1, + 0xE187FBC1, 0xE188FBC1, 0xE189FBC1, 0xE18AFBC1, 0xE18BFBC1, 0xE18CFBC1, 0xE18DFBC1, 0xE18EFBC1, 0xE18FFBC1, 0xE190FBC1, 0xE191FBC1, 0xE192FBC1, 0xE193FBC1, 0xE194FBC1, 0xE195FBC1, + 0xE196FBC1, 0xE197FBC1, 0xE198FBC1, 0xE199FBC1, 0xE19AFBC1, 0xE19BFBC1, 0xE19CFBC1, 0xE19DFBC1, 0xE19EFBC1, 0xE19FFBC1, 0xE1A0FBC1, 0xE1A1FBC1, 0xE1A2FBC1, 0xE1A3FBC1, 0xE1A4FBC1, + 0xE1A5FBC1, 0xE1A6FBC1, 0xE1A7FBC1, 0xE1A8FBC1, 0xE1A9FBC1, 0xE1AAFBC1, 0xE1ABFBC1, 0xE1ACFBC1, 0xE1ADFBC1, 0xE1AEFBC1, 0xE1AFFBC1, 0xE1B0FBC1, 0xE1B1FBC1, 0xE1B2FBC1, 0xE1B3FBC1, + 0xE1B4FBC1, 0xE1B5FBC1, 0xE1B6FBC1, 0xE1B7FBC1, 0xE1B8FBC1, 0xE1B9FBC1, 0xE1BAFBC1, 0xE1BBFBC1, 0xE1BCFBC1, 0xE1BDFBC1, 0xE1BEFBC1, 0xE1BFFBC1, 0xE1C0FBC1, 0xE1C1FBC1, 0xE1C2FBC1, + 0xE1C3FBC1, 0xE1C4FBC1, 0xE1C5FBC1, 0xE1C6FBC1, 0xE1C7FBC1, 0xE1C8FBC1, 0xE1C9FBC1, 0xE1CAFBC1, 0xE1CBFBC1, 0xE1CCFBC1, 0xE1CDFBC1, 0xE1CEFBC1, 0xE1CFFBC1, 0xE1D0FBC1, 0xE1D1FBC1, + 0xE1D2FBC1, 0xE1D3FBC1, 0xE1D4FBC1, 0xE1D5FBC1, 0xE1D6FBC1, 0xE1D7FBC1, 0xE1D8FBC1, 0xE1D9FBC1, 0xE1DAFBC1, 0xE1DBFBC1, 0xE1DCFBC1, 0xE1DDFBC1, 0xE1DEFBC1, 0xE1DFFBC1, 0xE1E0FBC1, + 0xE1E1FBC1, 0xE1E2FBC1, 0xE1E3FBC1, 0xE1E4FBC1, 0xE1E5FBC1, 0xE1E6FBC1, 0xE1E7FBC1, 0xE1E8FBC1, 0xE1E9FBC1, 0xE1EAFBC1, 0xE1EBFBC1, 0xE1ECFBC1, 0xE1EDFBC1, 0xE1EEFBC1, 0xE1EFFBC1, + 0xE1F0FBC1, 0xE1F1FBC1, 0xE1F2FBC1, 0xE1F3FBC1, 0xE1F4FBC1, 0xE1F5FBC1, 0xE1F6FBC1, 0xE1F7FBC1, 0xE1F8FBC1, 0xE1F9FBC1, 0xE1FAFBC1, 0xE1FBFBC1, 0xE1FCFBC1, 0xE1FDFBC1, 0xE1FEFBC1, + 0xE1FFFBC1, 0xE200FBC1, 0xE201FBC1, 0xE202FBC1, 0xE203FBC1, 0xE204FBC1, 0xE205FBC1, 0xE206FBC1, 0xE207FBC1, 0xE208FBC1, 0xE209FBC1, 0xE20AFBC1, 0xE20BFBC1, 0xE20CFBC1, 0xE20DFBC1, + 0xE20EFBC1, 0xE20FFBC1, 0xE210FBC1, 0xE211FBC1, 0xE212FBC1, 0xE213FBC1, 0xE214FBC1, 0xE215FBC1, 0xE216FBC1, 0xE217FBC1, 0xE218FBC1, 0xE219FBC1, 0xE21AFBC1, 0xE21BFBC1, 0xE21CFBC1, + 0xE21DFBC1, 0xE21EFBC1, 0xE21FFBC1, 0xE220FBC1, 0xE221FBC1, 0xE222FBC1, 0xE223FBC1, 0xE224FBC1, 0xE225FBC1, 0xE226FBC1, 0xE227FBC1, 0xE228FBC1, 0xE229FBC1, 0xE22AFBC1, 0xE22BFBC1, + 0xE22CFBC1, 0xE22DFBC1, 0xE22EFBC1, 0xE22FFBC1, 0xE230FBC1, 0xE231FBC1, 0xE232FBC1, 0xE233FBC1, 0xE234FBC1, 0xE235FBC1, 0xE236FBC1, 0xE237FBC1, 0xE238FBC1, 0xE239FBC1, 0xE23AFBC1, + 0xE23BFBC1, 0xE23CFBC1, 0xE23DFBC1, 0xE23EFBC1, 0xE23FFBC1, 0xE240FBC1, 0xE241FBC1, 0xE242FBC1, 0xE243FBC1, 0xE244FBC1, 0xE245FBC1, 0xE246FBC1, 0xE247FBC1, 0xE248FBC1, 0xE249FBC1, + 0xE24AFBC1, 0xE24BFBC1, 0xE24CFBC1, 0xE24DFBC1, 0xE24EFBC1, 0xE24FFBC1, 0xE250FBC1, 0xE251FBC1, 0xE252FBC1, 0xE253FBC1, 0xE254FBC1, 0xE255FBC1, 0xE256FBC1, 0xE257FBC1, 0xE258FBC1, + 0xE259FBC1, 0xE25AFBC1, 0xE25BFBC1, 0xE25CFBC1, 0xE25DFBC1, 0xE25EFBC1, 0xE25FFBC1, 0xE260FBC1, 0xE261FBC1, 0xE262FBC1, 0xE263FBC1, 0xE264FBC1, 0xE265FBC1, 0xE266FBC1, 0xE267FBC1, + 0xE268FBC1, 0xE269FBC1, 0xE26AFBC1, 0xE26BFBC1, 0xE26CFBC1, 0xE26DFBC1, 0xE26EFBC1, 0xE26FFBC1, 0xE270FBC1, 0xE271FBC1, 0xE272FBC1, 0xE273FBC1, 0xE274FBC1, 0xE275FBC1, 0xE276FBC1, + 0xE277FBC1, 0xE278FBC1, 0xE279FBC1, 0xE27AFBC1, 0xE27BFBC1, 0xE27CFBC1, 0xE27DFBC1, 0xE27EFBC1, 0xE27FFBC1, 0xE280FBC1, 0xE281FBC1, 0xE282FBC1, 0xE283FBC1, 0xE284FBC1, 0xE285FBC1, + 0xE286FBC1, 0xE287FBC1, 0xE288FBC1, 0xE289FBC1, 0xE28AFBC1, 0xE28BFBC1, 0xE28CFBC1, 0xE28DFBC1, 0xE28EFBC1, 0xE28FFBC1, 0xE290FBC1, 0xE291FBC1, 0xE292FBC1, 0xE293FBC1, 0xE294FBC1, + 0xE295FBC1, 0xE296FBC1, 0xE297FBC1, 0xE298FBC1, 0xE299FBC1, 0xE29AFBC1, 0xE29BFBC1, 0xE29CFBC1, 0xE29DFBC1, 0xE29EFBC1, 0xE29FFBC1, 0xE2A0FBC1, 0xE2A1FBC1, 0xE2A2FBC1, 0xE2A3FBC1, + 0xE2A4FBC1, 0xE2A5FBC1, 0xE2A6FBC1, 0xE2A7FBC1, 0xE2A8FBC1, 0xE2A9FBC1, 0xE2AAFBC1, 0xE2ABFBC1, 0xE2ACFBC1, 0xE2ADFBC1, 0xE2AEFBC1, 0xE2AFFBC1, 0xE2B0FBC1, 0xE2B1FBC1, 0xE2B2FBC1, + 0xE2B3FBC1, 0xE2B4FBC1, 0xE2B5FBC1, 0xE2B6FBC1, 0xE2B7FBC1, 0xE2B8FBC1, 0xE2B9FBC1, 0xE2BAFBC1, 0xE2BBFBC1, 0xE2BCFBC1, 0xE2BDFBC1, 0xE2BEFBC1, 0xE2BFFBC1, 0xE2C0FBC1, 0xE2C1FBC1, + 0xE2C2FBC1, 0xE2C3FBC1, 0xE2C4FBC1, 0xE2C5FBC1, 0xE2C6FBC1, 0xE2C7FBC1, 0xE2C8FBC1, 0xE2C9FBC1, 0xE2CAFBC1, 0xE2CBFBC1, 0xE2CCFBC1, 0xE2CDFBC1, 0xE2CEFBC1, 0xE2CFFBC1, 0xE2D0FBC1, + 0xE2D1FBC1, 0xE2D2FBC1, 0xE2D3FBC1, 0xE2D4FBC1, 0xE2D5FBC1, 0xE2D6FBC1, 0xE2D7FBC1, 0xE2D8FBC1, 0xE2D9FBC1, 0xE2DAFBC1, 0xE2DBFBC1, 0xE2DCFBC1, 0xE2DDFBC1, 0xE2DEFBC1, 0xE2DFFBC1, + 0xE2E0FBC1, 0xE2E1FBC1, 0xE2E2FBC1, 0xE2E3FBC1, 0xE2E4FBC1, 0xE2E5FBC1, 0xE2E6FBC1, 0xE2E7FBC1, 0xE2E8FBC1, 0xE2E9FBC1, 0xE2EAFBC1, 0xE2EBFBC1, 0xE2ECFBC1, 0xE2EDFBC1, 0xE2EEFBC1, + 0xE2EFFBC1, 0xE2F0FBC1, 0xE2F1FBC1, 0xE2F2FBC1, 0xE2F3FBC1, 0xE2F4FBC1, 0xE2F5FBC1, 0xE2F6FBC1, 0xE2F7FBC1, 0xE2F8FBC1, 0xE2F9FBC1, 0xE2FAFBC1, 0xE2FBFBC1, 0xE2FCFBC1, 0xE2FDFBC1, + 0xE2FEFBC1, 0xE2FFFBC1, 0xE300FBC1, 0xE301FBC1, 0xE302FBC1, 0xE303FBC1, 0xE304FBC1, 0xE305FBC1, 0xE306FBC1, 0xE307FBC1, 0xE308FBC1, 0xE309FBC1, 0xE30AFBC1, 0xE30BFBC1, 0xE30CFBC1, + 0xE30DFBC1, 0xE30EFBC1, 0xE30FFBC1, 0xE310FBC1, 0xE311FBC1, 0xE312FBC1, 0xE313FBC1, 0xE314FBC1, 0xE315FBC1, 0xE316FBC1, 0xE317FBC1, 0xE318FBC1, 0xE319FBC1, 0xE31AFBC1, 0xE31BFBC1, + 0xE31CFBC1, 0xE31DFBC1, 0xE31EFBC1, 0xE31FFBC1, 0xE320FBC1, 0xE321FBC1, 0xE322FBC1, 0xE323FBC1, 0xE324FBC1, 0xE325FBC1, 0xE326FBC1, 0xE327FBC1, 0xE328FBC1, 0xE329FBC1, 0xE32AFBC1, + 0xE32BFBC1, 0xE32CFBC1, 0xE32DFBC1, 0xE32EFBC1, 0xE32FFBC1, 0xE330FBC1, 0xE331FBC1, 0xE332FBC1, 0xE333FBC1, 0xE334FBC1, 0xE335FBC1, 0xE336FBC1, 0xE337FBC1, 0xE338FBC1, 0xE339FBC1, + 0xE33AFBC1, 0xE33BFBC1, 0xE33CFBC1, 0xE33DFBC1, 0xE33EFBC1, 0xE33FFBC1, 0xE340FBC1, 0xE341FBC1, 0xE342FBC1, 0xE343FBC1, 0xE344FBC1, 0xE345FBC1, 0xE346FBC1, 0xE347FBC1, 0xE348FBC1, + 0xE349FBC1, 0xE34AFBC1, 0xE34BFBC1, 0xE34CFBC1, 0xE34DFBC1, 0xE34EFBC1, 0xE34FFBC1, 0xE350FBC1, 0xE351FBC1, 0xE352FBC1, 0xE353FBC1, 0xE354FBC1, 0xE355FBC1, 0xE356FBC1, 0xE357FBC1, + 0xE358FBC1, 0xE359FBC1, 0xE35AFBC1, 0xE35BFBC1, 0xE35CFBC1, 0xE35DFBC1, 0xE35EFBC1, 0xE35FFBC1, 0xE360FBC1, 0xE361FBC1, 0xE362FBC1, 0xE363FBC1, 0xE364FBC1, 0xE365FBC1, 0xE366FBC1, + 0xE367FBC1, 0xE368FBC1, 0xE369FBC1, 0xE36AFBC1, 0xE36BFBC1, 0xE36CFBC1, 0xE36DFBC1, 0xE36EFBC1, 0xE36FFBC1, 0xE370FBC1, 0xE371FBC1, 0xE372FBC1, 0xE373FBC1, 0xE374FBC1, 0xE375FBC1, + 0xE376FBC1, 0xE377FBC1, 0xE378FBC1, 0xE379FBC1, 0xE37AFBC1, 0xE37BFBC1, 0xE37CFBC1, 0xE37DFBC1, 0xE37EFBC1, 0xE37FFBC1, 0xE380FBC1, 0xE381FBC1, 0xE382FBC1, 0xE383FBC1, 0xE384FBC1, + 0xE385FBC1, 0xE386FBC1, 0xE387FBC1, 0xE388FBC1, 0xE389FBC1, 0xE38AFBC1, 0xE38BFBC1, 0xE38CFBC1, 0xE38DFBC1, 0xE38EFBC1, 0xE38FFBC1, 0xE390FBC1, 0xE391FBC1, 0xE392FBC1, 0xE393FBC1, + 0xE394FBC1, 0xE395FBC1, 0xE396FBC1, 0xE397FBC1, 0xE398FBC1, 0xE399FBC1, 0xE39AFBC1, 0xE39BFBC1, 0xE39CFBC1, 0xE39DFBC1, 0xE39EFBC1, 0xE39FFBC1, 0xE3A0FBC1, 0xE3A1FBC1, 0xE3A2FBC1, + 0xE3A3FBC1, 0xE3A4FBC1, 0xE3A5FBC1, 0xE3A6FBC1, 0xE3A7FBC1, 0xE3A8FBC1, 0xE3A9FBC1, 0xE3AAFBC1, 0xE3ABFBC1, 0xE3ACFBC1, 0xE3ADFBC1, 0xE3AEFBC1, 0xE3AFFBC1, 0xE3B0FBC1, 0xE3B1FBC1, + 0xE3B2FBC1, 0xE3B3FBC1, 0xE3B4FBC1, 0xE3B5FBC1, 0xE3B6FBC1, 0xE3B7FBC1, 0xE3B8FBC1, 0xE3B9FBC1, 0xE3BAFBC1, 0xE3BBFBC1, 0xE3BCFBC1, 0xE3BDFBC1, 0xE3BEFBC1, 0xE3BFFBC1, 0xE3C0FBC1, + 0xE3C1FBC1, 0xE3C2FBC1, 0xE3C3FBC1, 0xE3C4FBC1, 0xE3C5FBC1, 0xE3C6FBC1, 0xE3C7FBC1, 0xE3C8FBC1, 0xE3C9FBC1, 0xE3CAFBC1, 0xE3CBFBC1, 0xE3CCFBC1, 0xE3CDFBC1, 0xE3CEFBC1, 0xE3CFFBC1, + 0xE3D0FBC1, 0xE3D1FBC1, 0xE3D2FBC1, 0xE3D3FBC1, 0xE3D4FBC1, 0xE3D5FBC1, 0xE3D6FBC1, 0xE3D7FBC1, 0xE3D8FBC1, 0xE3D9FBC1, 0xE3DAFBC1, 0xE3DBFBC1, 0xE3DCFBC1, 0xE3DDFBC1, 0xE3DEFBC1, + 0xE3DFFBC1, 0xE3E0FBC1, 0xE3E1FBC1, 0xE3E2FBC1, 0xE3E3FBC1, 0xE3E4FBC1, 0xE3E5FBC1, 0xE3E6FBC1, 0xE3E7FBC1, 0xE3E8FBC1, 0xE3E9FBC1, 0xE3EAFBC1, 0xE3EBFBC1, 0xE3ECFBC1, 0xE3EDFBC1, + 0xE3EEFBC1, 0xE3EFFBC1, 0xE3F0FBC1, 0xE3F1FBC1, 0xE3F2FBC1, 0xE3F3FBC1, 0xE3F4FBC1, 0xE3F5FBC1, 0xE3F6FBC1, 0xE3F7FBC1, 0xE3F8FBC1, 0xE3F9FBC1, 0xE3FAFBC1, 0xE3FBFBC1, 0xE3FCFBC1, + 0xE3FDFBC1, 0xE3FEFBC1, 0xE3FFFBC1, 0xE400FBC1, 0xE401FBC1, 0xE402FBC1, 0xE403FBC1, 0xE404FBC1, 0xE405FBC1, 0xE406FBC1, 0xE407FBC1, 0xE408FBC1, 0xE409FBC1, 0xE40AFBC1, 0xE40BFBC1, + 0xE40CFBC1, 0xE40DFBC1, 0xE40EFBC1, 0xE40FFBC1, 0xE410FBC1, 0xE411FBC1, 0xE412FBC1, 0xE413FBC1, 0xE414FBC1, 0xE415FBC1, 0xE416FBC1, 0xE417FBC1, 0xE418FBC1, 0xE419FBC1, 0xE41AFBC1, + 0xE41BFBC1, 0xE41CFBC1, 0xE41DFBC1, 0xE41EFBC1, 0xE41FFBC1, 0xE420FBC1, 0xE421FBC1, 0xE422FBC1, 0xE423FBC1, 0xE424FBC1, 0xE425FBC1, 0xE426FBC1, 0xE427FBC1, 0xE428FBC1, 0xE429FBC1, + 0xE42AFBC1, 0xE42BFBC1, 0xE42CFBC1, 0xE42DFBC1, 0xE42EFBC1, 0xE42FFBC1, 0xE430FBC1, 0xE431FBC1, 0xE432FBC1, 0xE433FBC1, 0xE434FBC1, 0xE435FBC1, 0xE436FBC1, 0xE437FBC1, 0xE438FBC1, + 0xE439FBC1, 0xE43AFBC1, 0xE43BFBC1, 0xE43CFBC1, 0xE43DFBC1, 0xE43EFBC1, 0xE43FFBC1, 0xE440FBC1, 0xE441FBC1, 0xE442FBC1, 0xE443FBC1, 0xE444FBC1, 0xE445FBC1, 0xE446FBC1, 0xE447FBC1, + 0xE448FBC1, 0xE449FBC1, 0xE44AFBC1, 0xE44BFBC1, 0xE44CFBC1, 0xE44DFBC1, 0xE44EFBC1, 0xE44FFBC1, 0xE450FBC1, 0xE451FBC1, 0xE452FBC1, 0xE453FBC1, 0xE454FBC1, 0xE455FBC1, 0xE456FBC1, + 0xE457FBC1, 0xE458FBC1, 0xE459FBC1, 0xE45AFBC1, 0xE45BFBC1, 0xE45CFBC1, 0xE45DFBC1, 0xE45EFBC1, 0xE45FFBC1, 0xE460FBC1, 0xE461FBC1, 0xE462FBC1, 0xE463FBC1, 0xE464FBC1, 0xE465FBC1, + 0xE466FBC1, 0xE467FBC1, 0xE468FBC1, 0xE469FBC1, 0xE46AFBC1, 0xE46BFBC1, 0xE46CFBC1, 0xE46DFBC1, 0xE46EFBC1, 0xE46FFBC1, 0xE470FBC1, 0xE471FBC1, 0xE472FBC1, 0xE473FBC1, 0xE474FBC1, + 0xE475FBC1, 0xE476FBC1, 0xE477FBC1, 0xE478FBC1, 0xE479FBC1, 0xE47AFBC1, 0xE47BFBC1, 0xE47CFBC1, 0xE47DFBC1, 0xE47EFBC1, 0xE47FFBC1, 0xE480FBC1, 0xE481FBC1, 0xE482FBC1, 0xE483FBC1, + 0xE484FBC1, 0xE485FBC1, 0xE486FBC1, 0xE487FBC1, 0xE488FBC1, 0xE489FBC1, 0xE48AFBC1, 0xE48BFBC1, 0xE48CFBC1, 0xE48DFBC1, 0xE48EFBC1, 0xE48FFBC1, 0xE490FBC1, 0xE491FBC1, 0xE492FBC1, + 0xE493FBC1, 0xE494FBC1, 0xE495FBC1, 0xE496FBC1, 0xE497FBC1, 0xE498FBC1, 0xE499FBC1, 0xE49AFBC1, 0xE49BFBC1, 0xE49CFBC1, 0xE49DFBC1, 0xE49EFBC1, 0xE49FFBC1, 0xE4A0FBC1, 0xE4A1FBC1, + 0xE4A2FBC1, 0xE4A3FBC1, 0xE4A4FBC1, 0xE4A5FBC1, 0xE4A6FBC1, 0xE4A7FBC1, 0xE4A8FBC1, 0xE4A9FBC1, 0xE4AAFBC1, 0xE4ABFBC1, 0xE4ACFBC1, 0xE4ADFBC1, 0xE4AEFBC1, 0xE4AFFBC1, 0xE4B0FBC1, + 0xE4B1FBC1, 0xE4B2FBC1, 0xE4B3FBC1, 0xE4B4FBC1, 0xE4B5FBC1, 0xE4B6FBC1, 0xE4B7FBC1, 0xE4B8FBC1, 0xE4B9FBC1, 0xE4BAFBC1, 0xE4BBFBC1, 0xE4BCFBC1, 0xE4BDFBC1, 0xE4BEFBC1, 0xE4BFFBC1, + 0xE4C0FBC1, 0xE4C1FBC1, 0xE4C2FBC1, 0xE4C3FBC1, 0xE4C4FBC1, 0xE4C5FBC1, 0xE4C6FBC1, 0xE4C7FBC1, 0xE4C8FBC1, 0xE4C9FBC1, 0xE4CAFBC1, 0xE4CBFBC1, 0xE4CCFBC1, 0xE4CDFBC1, 0xE4CEFBC1, + 0xE4CFFBC1, 0xE4D0FBC1, 0xE4D1FBC1, 0xE4D2FBC1, 0xE4D3FBC1, 0xE4D4FBC1, 0xE4D5FBC1, 0xE4D6FBC1, 0xE4D7FBC1, 0xE4D8FBC1, 0xE4D9FBC1, 0xE4DAFBC1, 0xE4DBFBC1, 0xE4DCFBC1, 0xE4DDFBC1, + 0xE4DEFBC1, 0xE4DFFBC1, 0xE4E0FBC1, 0xE4E1FBC1, 0xE4E2FBC1, 0xE4E3FBC1, 0xE4E4FBC1, 0xE4E5FBC1, 0xE4E6FBC1, 0xE4E7FBC1, 0xE4E8FBC1, 0xE4E9FBC1, 0xE4EAFBC1, 0xE4EBFBC1, 0xE4ECFBC1, + 0xE4EDFBC1, 0xE4EEFBC1, 0xE4EFFBC1, 0xE4F0FBC1, 0xE4F1FBC1, 0xE4F2FBC1, 0xE4F3FBC1, 0xE4F4FBC1, 0xE4F5FBC1, 0xE4F6FBC1, 0xE4F7FBC1, 0xE4F8FBC1, 0xE4F9FBC1, 0xE4FAFBC1, 0xE4FBFBC1, + 0xE4FCFBC1, 0xE4FDFBC1, 0xE4FEFBC1, 0xE4FFFBC1, 0xE500FBC1, 0xE501FBC1, 0xE502FBC1, 0xE503FBC1, 0xE504FBC1, 0xE505FBC1, 0xE506FBC1, 0xE507FBC1, 0xE508FBC1, 0xE509FBC1, 0xE50AFBC1, + 0xE50BFBC1, 0xE50CFBC1, 0xE50DFBC1, 0xE50EFBC1, 0xE50FFBC1, 0xE510FBC1, 0xE511FBC1, 0xE512FBC1, 0xE513FBC1, 0xE514FBC1, 0xE515FBC1, 0xE516FBC1, 0xE517FBC1, 0xE518FBC1, 0xE519FBC1, + 0xE51AFBC1, 0xE51BFBC1, 0xE51CFBC1, 0xE51DFBC1, 0xE51EFBC1, 0xE51FFBC1, 0xE520FBC1, 0xE521FBC1, 0xE522FBC1, 0xE523FBC1, 0xE524FBC1, 0xE525FBC1, 0xE526FBC1, 0xE527FBC1, 0xE528FBC1, + 0xE529FBC1, 0xE52AFBC1, 0xE52BFBC1, 0xE52CFBC1, 0xE52DFBC1, 0xE52EFBC1, 0xE52FFBC1, 0xE530FBC1, 0xE531FBC1, 0xE532FBC1, 0xE533FBC1, 0xE534FBC1, 0xE535FBC1, 0xE536FBC1, 0xE537FBC1, + 0xE538FBC1, 0xE539FBC1, 0xE53AFBC1, 0xE53BFBC1, 0xE53CFBC1, 0xE53DFBC1, 0xE53EFBC1, 0xE53FFBC1, 0xE540FBC1, 0xE541FBC1, 0xE542FBC1, 0xE543FBC1, 0xE544FBC1, 0xE545FBC1, 0xE546FBC1, + 0xE547FBC1, 0xE548FBC1, 0xE549FBC1, 0xE54AFBC1, 0xE54BFBC1, 0xE54CFBC1, 0xE54DFBC1, 0xE54EFBC1, 0xE54FFBC1, 0xE550FBC1, 0xE551FBC1, 0xE552FBC1, 0xE553FBC1, 0xE554FBC1, 0xE555FBC1, + 0xE556FBC1, 0xE557FBC1, 0xE558FBC1, 0xE559FBC1, 0xE55AFBC1, 0xE55BFBC1, 0xE55CFBC1, 0xE55DFBC1, 0xE55EFBC1, 0xE55FFBC1, 0xE560FBC1, 0xE561FBC1, 0xE562FBC1, 0xE563FBC1, 0xE564FBC1, + 0xE565FBC1, 0xE566FBC1, 0xE567FBC1, 0xE568FBC1, 0xE569FBC1, 0xE56AFBC1, 0xE56BFBC1, 0xE56CFBC1, 0xE56DFBC1, 0xE56EFBC1, 0xE56FFBC1, 0xE570FBC1, 0xE571FBC1, 0xE572FBC1, 0xE573FBC1, + 0xE574FBC1, 0xE575FBC1, 0xE576FBC1, 0xE577FBC1, 0xE578FBC1, 0xE579FBC1, 0xE57AFBC1, 0xE57BFBC1, 0xE57CFBC1, 0xE57DFBC1, 0xE57EFBC1, 0xE57FFBC1, 0xE580FBC1, 0xE581FBC1, 0xE582FBC1, + 0xE583FBC1, 0xE584FBC1, 0xE585FBC1, 0xE586FBC1, 0xE587FBC1, 0xE588FBC1, 0xE589FBC1, 0xE58AFBC1, 0xE58BFBC1, 0xE58CFBC1, 0xE58DFBC1, 0xE58EFBC1, 0xE58FFBC1, 0xE590FBC1, 0xE591FBC1, + 0xE592FBC1, 0xE593FBC1, 0xE594FBC1, 0xE595FBC1, 0xE596FBC1, 0xE597FBC1, 0xE598FBC1, 0xE599FBC1, 0xE59AFBC1, 0xE59BFBC1, 0xE59CFBC1, 0xE59DFBC1, 0xE59EFBC1, 0xE59FFBC1, 0xE5A0FBC1, + 0xE5A1FBC1, 0xE5A2FBC1, 0xE5A3FBC1, 0xE5A4FBC1, 0xE5A5FBC1, 0xE5A6FBC1, 0xE5A7FBC1, 0xE5A8FBC1, 0xE5A9FBC1, 0xE5AAFBC1, 0xE5ABFBC1, 0xE5ACFBC1, 0xE5ADFBC1, 0xE5AEFBC1, 0xE5AFFBC1, + 0xE5B0FBC1, 0xE5B1FBC1, 0xE5B2FBC1, 0xE5B3FBC1, 0xE5B4FBC1, 0xE5B5FBC1, 0xE5B6FBC1, 0xE5B7FBC1, 0xE5B8FBC1, 0xE5B9FBC1, 0xE5BAFBC1, 0xE5BBFBC1, 0xE5BCFBC1, 0xE5BDFBC1, 0xE5BEFBC1, + 0xE5BFFBC1, 0xE5C0FBC1, 0xE5C1FBC1, 0xE5C2FBC1, 0xE5C3FBC1, 0xE5C4FBC1, 0xE5C5FBC1, 0xE5C6FBC1, 0xE5C7FBC1, 0xE5C8FBC1, 0xE5C9FBC1, 0xE5CAFBC1, 0xE5CBFBC1, 0xE5CCFBC1, 0xE5CDFBC1, + 0xE5CEFBC1, 0xE5CFFBC1, 0xE5D0FBC1, 0xE5D1FBC1, 0xE5D2FBC1, 0xE5D3FBC1, 0xE5D4FBC1, 0xE5D5FBC1, 0xE5D6FBC1, 0xE5D7FBC1, 0xE5D8FBC1, 0xE5D9FBC1, 0xE5DAFBC1, 0xE5DBFBC1, 0xE5DCFBC1, + 0xE5DDFBC1, 0xE5DEFBC1, 0xE5DFFBC1, 0xE5E0FBC1, 0xE5E1FBC1, 0xE5E2FBC1, 0xE5E3FBC1, 0xE5E4FBC1, 0xE5E5FBC1, 0xE5E6FBC1, 0xE5E7FBC1, 0xE5E8FBC1, 0xE5E9FBC1, 0xE5EAFBC1, 0xE5EBFBC1, + 0xE5ECFBC1, 0xE5EDFBC1, 0xE5EEFBC1, 0xE5EFFBC1, 0xE5F0FBC1, 0xE5F1FBC1, 0xE5F2FBC1, 0xE5F3FBC1, 0xE5F4FBC1, 0xE5F5FBC1, 0xE5F6FBC1, 0xE5F7FBC1, 0xE5F8FBC1, 0xE5F9FBC1, 0xE5FAFBC1, + 0xE5FBFBC1, 0xE5FCFBC1, 0xE5FDFBC1, 0xE5FEFBC1, 0xE5FFFBC1, 0xE600FBC1, 0xE601FBC1, 0xE602FBC1, 0xE603FBC1, 0xE604FBC1, 0xE605FBC1, 0xE606FBC1, 0xE607FBC1, 0xE608FBC1, 0xE609FBC1, + 0xE60AFBC1, 0xE60BFBC1, 0xE60CFBC1, 0xE60DFBC1, 0xE60EFBC1, 0xE60FFBC1, 0xE610FBC1, 0xE611FBC1, 0xE612FBC1, 0xE613FBC1, 0xE614FBC1, 0xE615FBC1, 0xE616FBC1, 0xE617FBC1, 0xE618FBC1, + 0xE619FBC1, 0xE61AFBC1, 0xE61BFBC1, 0xE61CFBC1, 0xE61DFBC1, 0xE61EFBC1, 0xE61FFBC1, 0xE620FBC1, 0xE621FBC1, 0xE622FBC1, 0xE623FBC1, 0xE624FBC1, 0xE625FBC1, 0xE626FBC1, 0xE627FBC1, + 0xE628FBC1, 0xE629FBC1, 0xE62AFBC1, 0xE62BFBC1, 0xE62CFBC1, 0xE62DFBC1, 0xE62EFBC1, 0xE62FFBC1, 0xE630FBC1, 0xE631FBC1, 0xE632FBC1, 0xE633FBC1, 0xE634FBC1, 0xE635FBC1, 0xE636FBC1, + 0xE637FBC1, 0xE638FBC1, 0xE639FBC1, 0xE63AFBC1, 0xE63BFBC1, 0xE63CFBC1, 0xE63DFBC1, 0xE63EFBC1, 0xE63FFBC1, 0xE640FBC1, 0xE641FBC1, 0xE642FBC1, 0xE643FBC1, 0xE644FBC1, 0xE645FBC1, + 0xE646FBC1, 0xE647FBC1, 0xE648FBC1, 0xE649FBC1, 0xE64AFBC1, 0xE64BFBC1, 0xE64CFBC1, 0xE64DFBC1, 0xE64EFBC1, 0xE64FFBC1, 0xE650FBC1, 0xE651FBC1, 0xE652FBC1, 0xE653FBC1, 0xE654FBC1, + 0xE655FBC1, 0xE656FBC1, 0xE657FBC1, 0xE658FBC1, 0xE659FBC1, 0xE65AFBC1, 0xE65BFBC1, 0xE65CFBC1, 0xE65DFBC1, 0xE65EFBC1, 0xE65FFBC1, 0xE660FBC1, 0xE661FBC1, 0xE662FBC1, 0xE663FBC1, + 0xE664FBC1, 0xE665FBC1, 0xE666FBC1, 0xE667FBC1, 0xE668FBC1, 0xE669FBC1, 0xE66AFBC1, 0xE66BFBC1, 0xE66CFBC1, 0xE66DFBC1, 0xE66EFBC1, 0xE66FFBC1, 0xE670FBC1, 0xE671FBC1, 0xE672FBC1, + 0xE673FBC1, 0xE674FBC1, 0xE675FBC1, 0xE676FBC1, 0xE677FBC1, 0xE678FBC1, 0xE679FBC1, 0xE67AFBC1, 0xE67BFBC1, 0xE67CFBC1, 0xE67DFBC1, 0xE67EFBC1, 0xE67FFBC1, 0xE680FBC1, 0xE681FBC1, + 0xE682FBC1, 0xE683FBC1, 0xE684FBC1, 0xE685FBC1, 0xE686FBC1, 0xE687FBC1, 0xE688FBC1, 0xE689FBC1, 0xE68AFBC1, 0xE68BFBC1, 0xE68CFBC1, 0xE68DFBC1, 0xE68EFBC1, 0xE68FFBC1, 0xE690FBC1, + 0xE691FBC1, 0xE692FBC1, 0xE693FBC1, 0xE694FBC1, 0xE695FBC1, 0xE696FBC1, 0xE697FBC1, 0xE698FBC1, 0xE699FBC1, 0xE69AFBC1, 0xE69BFBC1, 0xE69CFBC1, 0xE69DFBC1, 0xE69EFBC1, 0xE69FFBC1, + 0xE6A0FBC1, 0xE6A1FBC1, 0xE6A2FBC1, 0xE6A3FBC1, 0xE6A4FBC1, 0xE6A5FBC1, 0xE6A6FBC1, 0xE6A7FBC1, 0xE6A8FBC1, 0xE6A9FBC1, 0xE6AAFBC1, 0xE6ABFBC1, 0xE6ACFBC1, 0xE6ADFBC1, 0xE6AEFBC1, + 0xE6AFFBC1, 0xE6B0FBC1, 0xE6B1FBC1, 0xE6B2FBC1, 0xE6B3FBC1, 0xE6B4FBC1, 0xE6B5FBC1, 0xE6B6FBC1, 0xE6B7FBC1, 0xE6B8FBC1, 0xE6B9FBC1, 0xE6BAFBC1, 0xE6BBFBC1, 0xE6BCFBC1, 0xE6BDFBC1, + 0xE6BEFBC1, 0xE6BFFBC1, 0xE6C0FBC1, 0xE6C1FBC1, 0xE6C2FBC1, 0xE6C3FBC1, 0xE6C4FBC1, 0xE6C5FBC1, 0xE6C6FBC1, 0xE6C7FBC1, 0xE6C8FBC1, 0xE6C9FBC1, 0xE6CAFBC1, 0xE6CBFBC1, 0xE6CCFBC1, + 0xE6CDFBC1, 0xE6CEFBC1, 0xE6CFFBC1, 0xE6D0FBC1, 0xE6D1FBC1, 0xE6D2FBC1, 0xE6D3FBC1, 0xE6D4FBC1, 0xE6D5FBC1, 0xE6D6FBC1, 0xE6D7FBC1, 0xE6D8FBC1, 0xE6D9FBC1, 0xE6DAFBC1, 0xE6DBFBC1, + 0xE6DCFBC1, 0xE6DDFBC1, 0xE6DEFBC1, 0xE6DFFBC1, 0xE6E0FBC1, 0xE6E1FBC1, 0xE6E2FBC1, 0xE6E3FBC1, 0xE6E4FBC1, 0xE6E5FBC1, 0xE6E6FBC1, 0xE6E7FBC1, 0xE6E8FBC1, 0xE6E9FBC1, 0xE6EAFBC1, + 0xE6EBFBC1, 0xE6ECFBC1, 0xE6EDFBC1, 0xE6EEFBC1, 0xE6EFFBC1, 0xE6F0FBC1, 0xE6F1FBC1, 0xE6F2FBC1, 0xE6F3FBC1, 0xE6F4FBC1, 0xE6F5FBC1, 0xE6F6FBC1, 0xE6F7FBC1, 0xE6F8FBC1, 0xE6F9FBC1, + 0xE6FAFBC1, 0xE6FBFBC1, 0xE6FCFBC1, 0xE6FDFBC1, 0xE6FEFBC1, 0xE6FFFBC1, 0xE700FBC1, 0xE701FBC1, 0xE702FBC1, 0xE703FBC1, 0xE704FBC1, 0xE705FBC1, 0xE706FBC1, 0xE707FBC1, 0xE708FBC1, + 0xE709FBC1, 0xE70AFBC1, 0xE70BFBC1, 0xE70CFBC1, 0xE70DFBC1, 0xE70EFBC1, 0xE70FFBC1, 0xE710FBC1, 0xE711FBC1, 0xE712FBC1, 0xE713FBC1, 0xE714FBC1, 0xE715FBC1, 0xE716FBC1, 0xE717FBC1, + 0xE718FBC1, 0xE719FBC1, 0xE71AFBC1, 0xE71BFBC1, 0xE71CFBC1, 0xE71DFBC1, 0xE71EFBC1, 0xE71FFBC1, 0xE720FBC1, 0xE721FBC1, 0xE722FBC1, 0xE723FBC1, 0xE724FBC1, 0xE725FBC1, 0xE726FBC1, + 0xE727FBC1, 0xE728FBC1, 0xE729FBC1, 0xE72AFBC1, 0xE72BFBC1, 0xE72CFBC1, 0xE72DFBC1, 0xE72EFBC1, 0xE72FFBC1, 0xE730FBC1, 0xE731FBC1, 0xE732FBC1, 0xE733FBC1, 0xE734FBC1, 0xE735FBC1, + 0xE736FBC1, 0xE737FBC1, 0xE738FBC1, 0xE739FBC1, 0xE73AFBC1, 0xE73BFBC1, 0xE73CFBC1, 0xE73DFBC1, 0xE73EFBC1, 0xE73FFBC1, 0xE740FBC1, 0xE741FBC1, 0xE742FBC1, 0xE743FBC1, 0xE744FBC1, + 0xE745FBC1, 0xE746FBC1, 0xE747FBC1, 0xE748FBC1, 0xE749FBC1, 0xE74AFBC1, 0xE74BFBC1, 0xE74CFBC1, 0xE74DFBC1, 0xE74EFBC1, 0xE74FFBC1, 0xE750FBC1, 0xE751FBC1, 0xE752FBC1, 0xE753FBC1, + 0xE754FBC1, 0xE755FBC1, 0xE756FBC1, 0xE757FBC1, 0xE758FBC1, 0xE759FBC1, 0xE75AFBC1, 0xE75BFBC1, 0xE75CFBC1, 0xE75DFBC1, 0xE75EFBC1, 0xE75FFBC1, 0xE760FBC1, 0xE761FBC1, 0xE762FBC1, + 0xE763FBC1, 0xE764FBC1, 0xE765FBC1, 0xE766FBC1, 0xE767FBC1, 0xE768FBC1, 0xE769FBC1, 0xE76AFBC1, 0xE76BFBC1, 0xE76CFBC1, 0xE76DFBC1, 0xE76EFBC1, 0xE76FFBC1, 0xE770FBC1, 0xE771FBC1, + 0xE772FBC1, 0xE773FBC1, 0xE774FBC1, 0xE775FBC1, 0xE776FBC1, 0xE777FBC1, 0xE778FBC1, 0xE779FBC1, 0xE77AFBC1, 0xE77BFBC1, 0xE77CFBC1, 0xE77DFBC1, 0xE77EFBC1, 0xE77FFBC1, 0xE780FBC1, + 0xE781FBC1, 0xE782FBC1, 0xE783FBC1, 0xE784FBC1, 0xE785FBC1, 0xE786FBC1, 0xE787FBC1, 0xE788FBC1, 0xE789FBC1, 0xE78AFBC1, 0xE78BFBC1, 0xE78CFBC1, 0xE78DFBC1, 0xE78EFBC1, 0xE78FFBC1, + 0xE790FBC1, 0xE791FBC1, 0xE792FBC1, 0xE793FBC1, 0xE794FBC1, 0xE795FBC1, 0xE796FBC1, 0xE797FBC1, 0xE798FBC1, 0xE799FBC1, 0xE79AFBC1, 0xE79BFBC1, 0xE79CFBC1, 0xE79DFBC1, 0xE79EFBC1, + 0xE79FFBC1, 0xE7A0FBC1, 0xE7A1FBC1, 0xE7A2FBC1, 0xE7A3FBC1, 0xE7A4FBC1, 0xE7A5FBC1, 0xE7A6FBC1, 0xE7A7FBC1, 0xE7A8FBC1, 0xE7A9FBC1, 0xE7AAFBC1, 0xE7ABFBC1, 0xE7ACFBC1, 0xE7ADFBC1, + 0xE7AEFBC1, 0xE7AFFBC1, 0xE7B0FBC1, 0xE7B1FBC1, 0xE7B2FBC1, 0xE7B3FBC1, 0xE7B4FBC1, 0xE7B5FBC1, 0xE7B6FBC1, 0xE7B7FBC1, 0xE7B8FBC1, 0xE7B9FBC1, 0xE7BAFBC1, 0xE7BBFBC1, 0xE7BCFBC1, + 0xE7BDFBC1, 0xE7BEFBC1, 0xE7BFFBC1, 0xE7C0FBC1, 0xE7C1FBC1, 0xE7C2FBC1, 0xE7C3FBC1, 0xE7C4FBC1, 0xE7C5FBC1, 0xE7C6FBC1, 0xE7C7FBC1, 0xE7C8FBC1, 0xE7C9FBC1, 0xE7CAFBC1, 0xE7CBFBC1, + 0xE7CCFBC1, 0xE7CDFBC1, 0xE7CEFBC1, 0xE7CFFBC1, 0xE7D0FBC1, 0xE7D1FBC1, 0xE7D2FBC1, 0xE7D3FBC1, 0xE7D4FBC1, 0xE7D5FBC1, 0xE7D6FBC1, 0xE7D7FBC1, 0xE7D8FBC1, 0xE7D9FBC1, 0xE7DAFBC1, + 0xE7DBFBC1, 0xE7DCFBC1, 0xE7DDFBC1, 0xE7DEFBC1, 0xE7DFFBC1, 0xE7E0FBC1, 0xE7E1FBC1, 0xE7E2FBC1, 0xE7E3FBC1, 0xE7E4FBC1, 0xE7E5FBC1, 0xE7E6FBC1, 0xE7E7FBC1, 0xE7E8FBC1, 0xE7E9FBC1, + 0xE7EAFBC1, 0xE7EBFBC1, 0xE7ECFBC1, 0xE7EDFBC1, 0xE7EEFBC1, 0xE7EFFBC1, 0xE7F0FBC1, 0xE7F1FBC1, 0xE7F2FBC1, 0xE7F3FBC1, 0xE7F4FBC1, 0xE7F5FBC1, 0xE7F6FBC1, 0xE7F7FBC1, 0xE7F8FBC1, + 0xE7F9FBC1, 0xE7FAFBC1, 0xE7FBFBC1, 0xE7FCFBC1, 0xE7FDFBC1, 0xE7FEFBC1, 0xE7FFFBC1, 0xE800FBC1, 0xE801FBC1, 0xE802FBC1, 0xE803FBC1, 0xE804FBC1, 0xE805FBC1, 0xE806FBC1, 0xE807FBC1, + 0xE808FBC1, 0xE809FBC1, 0xE80AFBC1, 0xE80BFBC1, 0xE80CFBC1, 0xE80DFBC1, 0xE80EFBC1, 0xE80FFBC1, 0xE810FBC1, 0xE811FBC1, 0xE812FBC1, 0xE813FBC1, 0xE814FBC1, 0xE815FBC1, 0xE816FBC1, + 0xE817FBC1, 0xE818FBC1, 0xE819FBC1, 0xE81AFBC1, 0xE81BFBC1, 0xE81CFBC1, 0xE81DFBC1, 0xE81EFBC1, 0xE81FFBC1, 0xE820FBC1, 0xE821FBC1, 0xE822FBC1, 0xE823FBC1, 0xE824FBC1, 0xE825FBC1, + 0xE826FBC1, 0xE827FBC1, 0xE828FBC1, 0xE829FBC1, 0xE82AFBC1, 0xE82BFBC1, 0xE82CFBC1, 0xE82DFBC1, 0xE82EFBC1, 0xE82FFBC1, 0xE830FBC1, 0xE831FBC1, 0xE832FBC1, 0xE833FBC1, 0xE834FBC1, + 0xE835FBC1, 0xE836FBC1, 0xE837FBC1, 0xE838FBC1, 0xE839FBC1, 0xE83AFBC1, 0xE83BFBC1, 0xE83CFBC1, 0xE83DFBC1, 0xE83EFBC1, 0xE83FFBC1, 0xE840FBC1, 0xE841FBC1, 0xE842FBC1, 0xE843FBC1, + 0xE844FBC1, 0xE845FBC1, 0xE846FBC1, 0xE847FBC1, 0xE848FBC1, 0xE849FBC1, 0xE84AFBC1, 0xE84BFBC1, 0xE84CFBC1, 0xE84DFBC1, 0xE84EFBC1, 0xE84FFBC1, 0xE850FBC1, 0xE851FBC1, 0xE852FBC1, + 0xE853FBC1, 0xE854FBC1, 0xE855FBC1, 0xE856FBC1, 0xE857FBC1, 0xE858FBC1, 0xE859FBC1, 0xE85AFBC1, 0xE85BFBC1, 0xE85CFBC1, 0xE85DFBC1, 0xE85EFBC1, 0xE85FFBC1, 0xE860FBC1, 0xE861FBC1, + 0xE862FBC1, 0xE863FBC1, 0xE864FBC1, 0xE865FBC1, 0xE866FBC1, 0xE867FBC1, 0xE868FBC1, 0xE869FBC1, 0xE86AFBC1, 0xE86BFBC1, 0xE86CFBC1, 0xE86DFBC1, 0xE86EFBC1, 0xE86FFBC1, 0xE870FBC1, + 0xE871FBC1, 0xE872FBC1, 0xE873FBC1, 0xE874FBC1, 0xE875FBC1, 0xE876FBC1, 0xE877FBC1, 0xE878FBC1, 0xE879FBC1, 0xE87AFBC1, 0xE87BFBC1, 0xE87CFBC1, 0xE87DFBC1, 0xE87EFBC1, 0xE87FFBC1, + 0xE880FBC1, 0xE881FBC1, 0xE882FBC1, 0xE883FBC1, 0xE884FBC1, 0xE885FBC1, 0xE886FBC1, 0xE887FBC1, 0xE888FBC1, 0xE889FBC1, 0xE88AFBC1, 0xE88BFBC1, 0xE88CFBC1, 0xE88DFBC1, 0xE88EFBC1, + 0xE88FFBC1, 0xE890FBC1, 0xE891FBC1, 0xE892FBC1, 0xE893FBC1, 0xE894FBC1, 0xE895FBC1, 0xE896FBC1, 0xE897FBC1, 0xE898FBC1, 0xE899FBC1, 0xE89AFBC1, 0xE89BFBC1, 0xE89CFBC1, 0xE89DFBC1, + 0xE89EFBC1, 0xE89FFBC1, 0xE8A0FBC1, 0xE8A1FBC1, 0xE8A2FBC1, 0xE8A3FBC1, 0xE8A4FBC1, 0xE8A5FBC1, 0xE8A6FBC1, 0xE8A7FBC1, 0xE8A8FBC1, 0xE8A9FBC1, 0xE8AAFBC1, 0xE8ABFBC1, 0xE8ACFBC1, + 0xE8ADFBC1, 0xE8AEFBC1, 0xE8AFFBC1, 0xE8B0FBC1, 0xE8B1FBC1, 0xE8B2FBC1, 0xE8B3FBC1, 0xE8B4FBC1, 0xE8B5FBC1, 0xE8B6FBC1, 0xE8B7FBC1, 0xE8B8FBC1, 0xE8B9FBC1, 0xE8BAFBC1, 0xE8BBFBC1, + 0xE8BCFBC1, 0xE8BDFBC1, 0xE8BEFBC1, 0xE8BFFBC1, 0xE8C0FBC1, 0xE8C1FBC1, 0xE8C2FBC1, 0xE8C3FBC1, 0xE8C4FBC1, 0xE8C5FBC1, 0xE8C6FBC1, 0xE8C7FBC1, 0xE8C8FBC1, 0xE8C9FBC1, 0xE8CAFBC1, + 0xE8CBFBC1, 0xE8CCFBC1, 0xE8CDFBC1, 0xE8CEFBC1, 0xE8CFFBC1, 0xE8D0FBC1, 0xE8D1FBC1, 0xE8D2FBC1, 0xE8D3FBC1, 0xE8D4FBC1, 0xE8D5FBC1, 0xE8D6FBC1, 0xE8D7FBC1, 0xE8D8FBC1, 0xE8D9FBC1, + 0xE8DAFBC1, 0xE8DBFBC1, 0xE8DCFBC1, 0xE8DDFBC1, 0xE8DEFBC1, 0xE8DFFBC1, 0xE8E0FBC1, 0xE8E1FBC1, 0xE8E2FBC1, 0xE8E3FBC1, 0xE8E4FBC1, 0xE8E5FBC1, 0xE8E6FBC1, 0xE8E7FBC1, 0xE8E8FBC1, + 0xE8E9FBC1, 0xE8EAFBC1, 0xE8EBFBC1, 0xE8ECFBC1, 0xE8EDFBC1, 0xE8EEFBC1, 0xE8EFFBC1, 0xE8F0FBC1, 0xE8F1FBC1, 0xE8F2FBC1, 0xE8F3FBC1, 0xE8F4FBC1, 0xE8F5FBC1, 0xE8F6FBC1, 0xE8F7FBC1, + 0xE8F8FBC1, 0xE8F9FBC1, 0xE8FAFBC1, 0xE8FBFBC1, 0xE8FCFBC1, 0xE8FDFBC1, 0xE8FEFBC1, 0xE8FFFBC1, 0xE900FBC1, 0xE901FBC1, 0xE902FBC1, 0xE903FBC1, 0xE904FBC1, 0xE905FBC1, 0xE906FBC1, + 0xE907FBC1, 0xE908FBC1, 0xE909FBC1, 0xE90AFBC1, 0xE90BFBC1, 0xE90CFBC1, 0xE90DFBC1, 0xE90EFBC1, 0xE90FFBC1, 0xE910FBC1, 0xE911FBC1, 0xE912FBC1, 0xE913FBC1, 0xE914FBC1, 0xE915FBC1, + 0xE916FBC1, 0xE917FBC1, 0xE918FBC1, 0xE919FBC1, 0xE91AFBC1, 0xE91BFBC1, 0xE91CFBC1, 0xE91DFBC1, 0xE91EFBC1, 0xE91FFBC1, 0xE920FBC1, 0xE921FBC1, 0xE922FBC1, 0xE923FBC1, 0xE924FBC1, + 0xE925FBC1, 0xE926FBC1, 0xE927FBC1, 0xE928FBC1, 0xE929FBC1, 0xE92AFBC1, 0xE92BFBC1, 0xE92CFBC1, 0xE92DFBC1, 0xE92EFBC1, 0xE92FFBC1, 0xE930FBC1, 0xE931FBC1, 0xE932FBC1, 0xE933FBC1, + 0xE934FBC1, 0xE935FBC1, 0xE936FBC1, 0xE937FBC1, 0xE938FBC1, 0xE939FBC1, 0xE93AFBC1, 0xE93BFBC1, 0xE93CFBC1, 0xE93DFBC1, 0xE93EFBC1, 0xE93FFBC1, 0xE940FBC1, 0xE941FBC1, 0xE942FBC1, + 0xE943FBC1, 0xE944FBC1, 0xE945FBC1, 0xE946FBC1, 0xE947FBC1, 0xE948FBC1, 0xE949FBC1, 0xE94AFBC1, 0xE94BFBC1, 0xE94CFBC1, 0xE94DFBC1, 0xE94EFBC1, 0xE94FFBC1, 0xE950FBC1, 0xE951FBC1, + 0xE952FBC1, 0xE953FBC1, 0xE954FBC1, 0xE955FBC1, 0xE956FBC1, 0xE957FBC1, 0xE958FBC1, 0xE959FBC1, 0xE95AFBC1, 0xE95BFBC1, 0xE95CFBC1, 0xE95DFBC1, 0xE95EFBC1, 0xE95FFBC1, 0xE960FBC1, + 0xE961FBC1, 0xE962FBC1, 0xE963FBC1, 0xE964FBC1, 0xE965FBC1, 0xE966FBC1, 0xE967FBC1, 0xE968FBC1, 0xE969FBC1, 0xE96AFBC1, 0xE96BFBC1, 0xE96CFBC1, 0xE96DFBC1, 0xE96EFBC1, 0xE96FFBC1, + 0xE970FBC1, 0xE971FBC1, 0xE972FBC1, 0xE973FBC1, 0xE974FBC1, 0xE975FBC1, 0xE976FBC1, 0xE977FBC1, 0xE978FBC1, 0xE979FBC1, 0xE97AFBC1, 0xE97BFBC1, 0xE97CFBC1, 0xE97DFBC1, 0xE97EFBC1, + 0xE97FFBC1, 0xE980FBC1, 0xE981FBC1, 0xE982FBC1, 0xE983FBC1, 0xE984FBC1, 0xE985FBC1, 0xE986FBC1, 0xE987FBC1, 0xE988FBC1, 0xE989FBC1, 0xE98AFBC1, 0xE98BFBC1, 0xE98CFBC1, 0xE98DFBC1, + 0xE98EFBC1, 0xE98FFBC1, 0xE990FBC1, 0xE991FBC1, 0xE992FBC1, 0xE993FBC1, 0xE994FBC1, 0xE995FBC1, 0xE996FBC1, 0xE997FBC1, 0xE998FBC1, 0xE999FBC1, 0xE99AFBC1, 0xE99BFBC1, 0xE99CFBC1, + 0xE99DFBC1, 0xE99EFBC1, 0xE99FFBC1, 0xE9A0FBC1, 0xE9A1FBC1, 0xE9A2FBC1, 0xE9A3FBC1, 0xE9A4FBC1, 0xE9A5FBC1, 0xE9A6FBC1, 0xE9A7FBC1, 0xE9A8FBC1, 0xE9A9FBC1, 0xE9AAFBC1, 0xE9ABFBC1, + 0xE9ACFBC1, 0xE9ADFBC1, 0xE9AEFBC1, 0xE9AFFBC1, 0xE9B0FBC1, 0xE9B1FBC1, 0xE9B2FBC1, 0xE9B3FBC1, 0xE9B4FBC1, 0xE9B5FBC1, 0xE9B6FBC1, 0xE9B7FBC1, 0xE9B8FBC1, 0xE9B9FBC1, 0xE9BAFBC1, + 0xE9BBFBC1, 0xE9BCFBC1, 0xE9BDFBC1, 0xE9BEFBC1, 0xE9BFFBC1, 0xE9C0FBC1, 0xE9C1FBC1, 0xE9C2FBC1, 0xE9C3FBC1, 0xE9C4FBC1, 0xE9C5FBC1, 0xE9C6FBC1, 0xE9C7FBC1, 0xE9C8FBC1, 0xE9C9FBC1, + 0xE9CAFBC1, 0xE9CBFBC1, 0xE9CCFBC1, 0xE9CDFBC1, 0xE9CEFBC1, 0xE9CFFBC1, 0xE9D0FBC1, 0xE9D1FBC1, 0xE9D2FBC1, 0xE9D3FBC1, 0xE9D4FBC1, 0xE9D5FBC1, 0xE9D6FBC1, 0xE9D7FBC1, 0xE9D8FBC1, + 0xE9D9FBC1, 0xE9DAFBC1, 0xE9DBFBC1, 0xE9DCFBC1, 0xE9DDFBC1, 0xE9DEFBC1, 0xE9DFFBC1, 0xE9E0FBC1, 0xE9E1FBC1, 0xE9E2FBC1, 0xE9E3FBC1, 0xE9E4FBC1, 0xE9E5FBC1, 0xE9E6FBC1, 0xE9E7FBC1, + 0xE9E8FBC1, 0xE9E9FBC1, 0xE9EAFBC1, 0xE9EBFBC1, 0xE9ECFBC1, 0xE9EDFBC1, 0xE9EEFBC1, 0xE9EFFBC1, 0xE9F0FBC1, 0xE9F1FBC1, 0xE9F2FBC1, 0xE9F3FBC1, 0xE9F4FBC1, 0xE9F5FBC1, 0xE9F6FBC1, + 0xE9F7FBC1, 0xE9F8FBC1, 0xE9F9FBC1, 0xE9FAFBC1, 0xE9FBFBC1, 0xE9FCFBC1, 0xE9FDFBC1, 0xE9FEFBC1, 0xE9FFFBC1, 0xEA00FBC1, 0xEA01FBC1, 0xEA02FBC1, 0xEA03FBC1, 0xEA04FBC1, 0xEA05FBC1, + 0xEA06FBC1, 0xEA07FBC1, 0xEA08FBC1, 0xEA09FBC1, 0xEA0AFBC1, 0xEA0BFBC1, 0xEA0CFBC1, 0xEA0DFBC1, 0xEA0EFBC1, 0xEA0FFBC1, 0xEA10FBC1, 0xEA11FBC1, 0xEA12FBC1, 0xEA13FBC1, 0xEA14FBC1, + 0xEA15FBC1, 0xEA16FBC1, 0xEA17FBC1, 0xEA18FBC1, 0xEA19FBC1, 0xEA1AFBC1, 0xEA1BFBC1, 0xEA1CFBC1, 0xEA1DFBC1, 0xEA1EFBC1, 0xEA1FFBC1, 0xEA20FBC1, 0xEA21FBC1, 0xEA22FBC1, 0xEA23FBC1, + 0xEA24FBC1, 0xEA25FBC1, 0xEA26FBC1, 0xEA27FBC1, 0xEA28FBC1, 0xEA29FBC1, 0xEA2AFBC1, 0xEA2BFBC1, 0xEA2CFBC1, 0xEA2DFBC1, 0xEA2EFBC1, 0xEA2FFBC1, 0xEA30FBC1, 0xEA31FBC1, 0xEA32FBC1, + 0xEA33FBC1, 0xEA34FBC1, 0xEA35FBC1, 0xEA36FBC1, 0xEA37FBC1, 0xEA38FBC1, 0xEA39FBC1, 0xEA3AFBC1, 0xEA3BFBC1, 0xEA3CFBC1, 0xEA3DFBC1, 0xEA3EFBC1, 0xEA3FFBC1, 0xEA40FBC1, 0xEA41FBC1, + 0xEA42FBC1, 0xEA43FBC1, 0xEA44FBC1, 0xEA45FBC1, 0xEA46FBC1, 0xEA47FBC1, 0xEA48FBC1, 0xEA49FBC1, 0xEA4AFBC1, 0xEA4BFBC1, 0xEA4CFBC1, 0xEA4DFBC1, 0xEA4EFBC1, 0xEA4FFBC1, 0xEA50FBC1, + 0xEA51FBC1, 0xEA52FBC1, 0xEA53FBC1, 0xEA54FBC1, 0xEA55FBC1, 0xEA56FBC1, 0xEA57FBC1, 0xEA58FBC1, 0xEA59FBC1, 0xEA5AFBC1, 0xEA5BFBC1, 0xEA5CFBC1, 0xEA5DFBC1, 0xEA5EFBC1, 0xEA5FFBC1, + 0xEA60FBC1, 0xEA61FBC1, 0xEA62FBC1, 0xEA63FBC1, 0xEA64FBC1, 0xEA65FBC1, 0xEA66FBC1, 0xEA67FBC1, 0xEA68FBC1, 0xEA69FBC1, 0xEA6AFBC1, 0xEA6BFBC1, 0xEA6CFBC1, 0xEA6DFBC1, 0xEA6EFBC1, + 0xEA6FFBC1, 0xEA70FBC1, 0xEA71FBC1, 0xEA72FBC1, 0xEA73FBC1, 0xEA74FBC1, 0xEA75FBC1, 0xEA76FBC1, 0xEA77FBC1, 0xEA78FBC1, 0xEA79FBC1, 0xEA7AFBC1, 0xEA7BFBC1, 0xEA7CFBC1, 0xEA7DFBC1, + 0xEA7EFBC1, 0xEA7FFBC1, 0xEA80FBC1, 0xEA81FBC1, 0xEA82FBC1, 0xEA83FBC1, 0xEA84FBC1, 0xEA85FBC1, 0xEA86FBC1, 0xEA87FBC1, 0xEA88FBC1, 0xEA89FBC1, 0xEA8AFBC1, 0xEA8BFBC1, 0xEA8CFBC1, + 0xEA8DFBC1, 0xEA8EFBC1, 0xEA8FFBC1, 0xEA90FBC1, 0xEA91FBC1, 0xEA92FBC1, 0xEA93FBC1, 0xEA94FBC1, 0xEA95FBC1, 0xEA96FBC1, 0xEA97FBC1, 0xEA98FBC1, 0xEA99FBC1, 0xEA9AFBC1, 0xEA9BFBC1, + 0xEA9CFBC1, 0xEA9DFBC1, 0xEA9EFBC1, 0xEA9FFBC1, 0xEAA0FBC1, 0xEAA1FBC1, 0xEAA2FBC1, 0xEAA3FBC1, 0xEAA4FBC1, 0xEAA5FBC1, 0xEAA6FBC1, 0xEAA7FBC1, 0xEAA8FBC1, 0xEAA9FBC1, 0xEAAAFBC1, + 0xEAABFBC1, 0xEAACFBC1, 0xEAADFBC1, 0xEAAEFBC1, 0xEAAFFBC1, 0xEAB0FBC1, 0xEAB1FBC1, 0xEAB2FBC1, 0xEAB3FBC1, 0xEAB4FBC1, 0xEAB5FBC1, 0xEAB6FBC1, 0xEAB7FBC1, 0xEAB8FBC1, 0xEAB9FBC1, + 0xEABAFBC1, 0xEABBFBC1, 0xEABCFBC1, 0xEABDFBC1, 0xEABEFBC1, 0xEABFFBC1, 0xEAC0FBC1, 0xEAC1FBC1, 0xEAC2FBC1, 0xEAC3FBC1, 0xEAC4FBC1, 0xEAC5FBC1, 0xEAC6FBC1, 0xEAC7FBC1, 0xEAC8FBC1, + 0xEAC9FBC1, 0xEACAFBC1, 0xEACBFBC1, 0xEACCFBC1, 0xEACDFBC1, 0xEACEFBC1, 0xEACFFBC1, 0xEAD0FBC1, 0xEAD1FBC1, 0xEAD2FBC1, 0xEAD3FBC1, 0xEAD4FBC1, 0xEAD5FBC1, 0xEAD6FBC1, 0xEAD7FBC1, + 0xEAD8FBC1, 0xEAD9FBC1, 0xEADAFBC1, 0xEADBFBC1, 0xEADCFBC1, 0xEADDFBC1, 0xEADEFBC1, 0xEADFFBC1, 0xEAE0FBC1, 0xEAE1FBC1, 0xEAE2FBC1, 0xEAE3FBC1, 0xEAE4FBC1, 0xEAE5FBC1, 0xEAE6FBC1, + 0xEAE7FBC1, 0xEAE8FBC1, 0xEAE9FBC1, 0xEAEAFBC1, 0xEAEBFBC1, 0xEAECFBC1, 0xEAEDFBC1, 0xEAEEFBC1, 0xEAEFFBC1, 0xEAF0FBC1, 0xEAF1FBC1, 0xEAF2FBC1, 0xEAF3FBC1, 0xEAF4FBC1, 0xEAF5FBC1, + 0xEAF6FBC1, 0xEAF7FBC1, 0xEAF8FBC1, 0xEAF9FBC1, 0xEAFAFBC1, 0xEAFBFBC1, 0xEAFCFBC1, 0xEAFDFBC1, 0xEAFEFBC1, 0xEAFFFBC1, 0xEB00FBC1, 0xEB01FBC1, 0xEB02FBC1, 0xEB03FBC1, 0xEB04FBC1, + 0xEB05FBC1, 0xEB06FBC1, 0xEB07FBC1, 0xEB08FBC1, 0xEB09FBC1, 0xEB0AFBC1, 0xEB0BFBC1, 0xEB0CFBC1, 0xEB0DFBC1, 0xEB0EFBC1, 0xEB0FFBC1, 0xEB10FBC1, 0xEB11FBC1, 0xEB12FBC1, 0xEB13FBC1, + 0xEB14FBC1, 0xEB15FBC1, 0xEB16FBC1, 0xEB17FBC1, 0xEB18FBC1, 0xEB19FBC1, 0xEB1AFBC1, 0xEB1BFBC1, 0xEB1CFBC1, 0xEB1DFBC1, 0xEB1EFBC1, 0xEB1FFBC1, 0xEB20FBC1, 0xEB21FBC1, 0xEB22FBC1, + 0xEB23FBC1, 0xEB24FBC1, 0xEB25FBC1, 0xEB26FBC1, 0xEB27FBC1, 0xEB28FBC1, 0xEB29FBC1, 0xEB2AFBC1, 0xEB2BFBC1, 0xEB2CFBC1, 0xEB2DFBC1, 0xEB2EFBC1, 0xEB2FFBC1, 0xEB30FBC1, 0xEB31FBC1, + 0xEB32FBC1, 0xEB33FBC1, 0xEB34FBC1, 0xEB35FBC1, 0xEB36FBC1, 0xEB37FBC1, 0xEB38FBC1, 0xEB39FBC1, 0xEB3AFBC1, 0xEB3BFBC1, 0xEB3CFBC1, 0xEB3DFBC1, 0xEB3EFBC1, 0xEB3FFBC1, 0xEB40FBC1, + 0xEB41FBC1, 0xEB42FBC1, 0xEB43FBC1, 0xEB44FBC1, 0xEB45FBC1, 0xEB46FBC1, 0xEB47FBC1, 0xEB48FBC1, 0xEB49FBC1, 0xEB4AFBC1, 0xEB4BFBC1, 0xEB4CFBC1, 0xEB4DFBC1, 0xEB4EFBC1, 0xEB4FFBC1, + 0xEB50FBC1, 0xEB51FBC1, 0xEB52FBC1, 0xEB53FBC1, 0xEB54FBC1, 0xEB55FBC1, 0xEB56FBC1, 0xEB57FBC1, 0xEB58FBC1, 0xEB59FBC1, 0xEB5AFBC1, 0xEB5BFBC1, 0xEB5CFBC1, 0xEB5DFBC1, 0xEB5EFBC1, + 0xEB5FFBC1, 0xEB60FBC1, 0xEB61FBC1, 0xEB62FBC1, 0xEB63FBC1, 0xEB64FBC1, 0xEB65FBC1, 0xEB66FBC1, 0xEB67FBC1, 0xEB68FBC1, 0xEB69FBC1, 0xEB6AFBC1, 0xEB6BFBC1, 0xEB6CFBC1, 0xEB6DFBC1, + 0xEB6EFBC1, 0xEB6FFBC1, 0xEB70FBC1, 0xEB71FBC1, 0xEB72FBC1, 0xEB73FBC1, 0xEB74FBC1, 0xEB75FBC1, 0xEB76FBC1, 0xEB77FBC1, 0xEB78FBC1, 0xEB79FBC1, 0xEB7AFBC1, 0xEB7BFBC1, 0xEB7CFBC1, + 0xEB7DFBC1, 0xEB7EFBC1, 0xEB7FFBC1, 0xEB80FBC1, 0xEB81FBC1, 0xEB82FBC1, 0xEB83FBC1, 0xEB84FBC1, 0xEB85FBC1, 0xEB86FBC1, 0xEB87FBC1, 0xEB88FBC1, 0xEB89FBC1, 0xEB8AFBC1, 0xEB8BFBC1, + 0xEB8CFBC1, 0xEB8DFBC1, 0xEB8EFBC1, 0xEB8FFBC1, 0xEB90FBC1, 0xEB91FBC1, 0xEB92FBC1, 0xEB93FBC1, 0xEB94FBC1, 0xEB95FBC1, 0xEB96FBC1, 0xEB97FBC1, 0xEB98FBC1, 0xEB99FBC1, 0xEB9AFBC1, + 0xEB9BFBC1, 0xEB9CFBC1, 0xEB9DFBC1, 0xEB9EFBC1, 0xEB9FFBC1, 0xEBA0FBC1, 0xEBA1FBC1, 0xEBA2FBC1, 0xEBA3FBC1, 0xEBA4FBC1, 0xEBA5FBC1, 0xEBA6FBC1, 0xEBA7FBC1, 0xEBA8FBC1, 0xEBA9FBC1, + 0xEBAAFBC1, 0xEBABFBC1, 0xEBACFBC1, 0xEBADFBC1, 0xEBAEFBC1, 0xEBAFFBC1, 0xEBB0FBC1, 0xEBB1FBC1, 0xEBB2FBC1, 0xEBB3FBC1, 0xEBB4FBC1, 0xEBB5FBC1, 0xEBB6FBC1, 0xEBB7FBC1, 0xEBB8FBC1, + 0xEBB9FBC1, 0xEBBAFBC1, 0xEBBBFBC1, 0xEBBCFBC1, 0xEBBDFBC1, 0xEBBEFBC1, 0xEBBFFBC1, 0xEBC0FBC1, 0xEBC1FBC1, 0xEBC2FBC1, 0xEBC3FBC1, 0xEBC4FBC1, 0xEBC5FBC1, 0xEBC6FBC1, 0xEBC7FBC1, + 0xEBC8FBC1, 0xEBC9FBC1, 0xEBCAFBC1, 0xEBCBFBC1, 0xEBCCFBC1, 0xEBCDFBC1, 0xEBCEFBC1, 0xEBCFFBC1, 0xEBD0FBC1, 0xEBD1FBC1, 0xEBD2FBC1, 0xEBD3FBC1, 0xEBD4FBC1, 0xEBD5FBC1, 0xEBD6FBC1, + 0xEBD7FBC1, 0xEBD8FBC1, 0xEBD9FBC1, 0xEBDAFBC1, 0xEBDBFBC1, 0xEBDCFBC1, 0xEBDDFBC1, 0xEBDEFBC1, 0xEBDFFBC1, 0xEBE0FBC1, 0xEBE1FBC1, 0xEBE2FBC1, 0xEBE3FBC1, 0xEBE4FBC1, 0xEBE5FBC1, + 0xEBE6FBC1, 0xEBE7FBC1, 0xEBE8FBC1, 0xEBE9FBC1, 0xEBEAFBC1, 0xEBEBFBC1, 0xEBECFBC1, 0xEBEDFBC1, 0xEBEEFBC1, 0xEBEFFBC1, 0xEBF0FBC1, 0xEBF1FBC1, 0xEBF2FBC1, 0xEBF3FBC1, 0xEBF4FBC1, + 0xEBF5FBC1, 0xEBF6FBC1, 0xEBF7FBC1, 0xEBF8FBC1, 0xEBF9FBC1, 0xEBFAFBC1, 0xEBFBFBC1, 0xEBFCFBC1, 0xEBFDFBC1, 0xEBFEFBC1, 0xEBFFFBC1, 0xEC00FBC1, 0xEC01FBC1, 0xEC02FBC1, 0xEC03FBC1, + 0xEC04FBC1, 0xEC05FBC1, 0xEC06FBC1, 0xEC07FBC1, 0xEC08FBC1, 0xEC09FBC1, 0xEC0AFBC1, 0xEC0BFBC1, 0xEC0CFBC1, 0xEC0DFBC1, 0xEC0EFBC1, 0xEC0FFBC1, 0xEC10FBC1, 0xEC11FBC1, 0xEC12FBC1, + 0xEC13FBC1, 0xEC14FBC1, 0xEC15FBC1, 0xEC16FBC1, 0xEC17FBC1, 0xEC18FBC1, 0xEC19FBC1, 0xEC1AFBC1, 0xEC1BFBC1, 0xEC1CFBC1, 0xEC1DFBC1, 0xEC1EFBC1, 0xEC1FFBC1, 0xEC20FBC1, 0xEC21FBC1, + 0xEC22FBC1, 0xEC23FBC1, 0xEC24FBC1, 0xEC25FBC1, 0xEC26FBC1, 0xEC27FBC1, 0xEC28FBC1, 0xEC29FBC1, 0xEC2AFBC1, 0xEC2BFBC1, 0xEC2CFBC1, 0xEC2DFBC1, 0xEC2EFBC1, 0xEC2FFBC1, 0xEC30FBC1, + 0xEC31FBC1, 0xEC32FBC1, 0xEC33FBC1, 0xEC34FBC1, 0xEC35FBC1, 0xEC36FBC1, 0xEC37FBC1, 0xEC38FBC1, 0xEC39FBC1, 0xEC3AFBC1, 0xEC3BFBC1, 0xEC3CFBC1, 0xEC3DFBC1, 0xEC3EFBC1, 0xEC3FFBC1, + 0xEC40FBC1, 0xEC41FBC1, 0xEC42FBC1, 0xEC43FBC1, 0xEC44FBC1, 0xEC45FBC1, 0xEC46FBC1, 0xEC47FBC1, 0xEC48FBC1, 0xEC49FBC1, 0xEC4AFBC1, 0xEC4BFBC1, 0xEC4CFBC1, 0xEC4DFBC1, 0xEC4EFBC1, + 0xEC4FFBC1, 0xEC50FBC1, 0xEC51FBC1, 0xEC52FBC1, 0xEC53FBC1, 0xEC54FBC1, 0xEC55FBC1, 0xEC56FBC1, 0xEC57FBC1, 0xEC58FBC1, 0xEC59FBC1, 0xEC5AFBC1, 0xEC5BFBC1, 0xEC5CFBC1, 0xEC5DFBC1, + 0xEC5EFBC1, 0xEC5FFBC1, 0xEC60FBC1, 0xEC61FBC1, 0xEC62FBC1, 0xEC63FBC1, 0xEC64FBC1, 0xEC65FBC1, 0xEC66FBC1, 0xEC67FBC1, 0xEC68FBC1, 0xEC69FBC1, 0xEC6AFBC1, 0xEC6BFBC1, 0xEC6CFBC1, + 0xEC6DFBC1, 0xEC6EFBC1, 0xEC6FFBC1, 0xEC70FBC1, 0xEC71FBC1, 0xEC72FBC1, 0xEC73FBC1, 0xEC74FBC1, 0xEC75FBC1, 0xEC76FBC1, 0xEC77FBC1, 0xEC78FBC1, 0xEC79FBC1, 0xEC7AFBC1, 0xEC7BFBC1, + 0xEC7CFBC1, 0xEC7DFBC1, 0xEC7EFBC1, 0xEC7FFBC1, 0xEC80FBC1, 0xEC81FBC1, 0xEC82FBC1, 0xEC83FBC1, 0xEC84FBC1, 0xEC85FBC1, 0xEC86FBC1, 0xEC87FBC1, 0xEC88FBC1, 0xEC89FBC1, 0xEC8AFBC1, + 0xEC8BFBC1, 0xEC8CFBC1, 0xEC8DFBC1, 0xEC8EFBC1, 0xEC8FFBC1, 0xEC90FBC1, 0xEC91FBC1, 0xEC92FBC1, 0xEC93FBC1, 0xEC94FBC1, 0xEC95FBC1, 0xEC96FBC1, 0xEC97FBC1, 0xEC98FBC1, 0xEC99FBC1, + 0xEC9AFBC1, 0xEC9BFBC1, 0xEC9CFBC1, 0xEC9DFBC1, 0xEC9EFBC1, 0xEC9FFBC1, 0xECA0FBC1, 0xECA1FBC1, 0xECA2FBC1, 0xECA3FBC1, 0xECA4FBC1, 0xECA5FBC1, 0xECA6FBC1, 0xECA7FBC1, 0xECA8FBC1, + 0xECA9FBC1, 0xECAAFBC1, 0xECABFBC1, 0xECACFBC1, 0xECADFBC1, 0xECAEFBC1, 0xECAFFBC1, 0xECB0FBC1, 0xECB1FBC1, 0xECB2FBC1, 0xECB3FBC1, 0xECB4FBC1, 0xECB5FBC1, 0xECB6FBC1, 0xECB7FBC1, + 0xECB8FBC1, 0xECB9FBC1, 0xECBAFBC1, 0xECBBFBC1, 0xECBCFBC1, 0xECBDFBC1, 0xECBEFBC1, 0xECBFFBC1, 0xECC0FBC1, 0xECC1FBC1, 0xECC2FBC1, 0xECC3FBC1, 0xECC4FBC1, 0xECC5FBC1, 0xECC6FBC1, + 0xECC7FBC1, 0xECC8FBC1, 0xECC9FBC1, 0xECCAFBC1, 0xECCBFBC1, 0xECCCFBC1, 0xECCDFBC1, 0xECCEFBC1, 0xECCFFBC1, 0xECD0FBC1, 0xECD1FBC1, 0xECD2FBC1, 0xECD3FBC1, 0xECD4FBC1, 0xECD5FBC1, + 0xECD6FBC1, 0xECD7FBC1, 0xECD8FBC1, 0xECD9FBC1, 0xECDAFBC1, 0xECDBFBC1, 0xECDCFBC1, 0xECDDFBC1, 0xECDEFBC1, 0xECDFFBC1, 0xECE0FBC1, 0xECE1FBC1, 0xECE2FBC1, 0xECE3FBC1, 0xECE4FBC1, + 0xECE5FBC1, 0xECE6FBC1, 0xECE7FBC1, 0xECE8FBC1, 0xECE9FBC1, 0xECEAFBC1, 0xECEBFBC1, 0xECECFBC1, 0xECEDFBC1, 0xECEEFBC1, 0xECEFFBC1, 0xECF0FBC1, 0xECF1FBC1, 0xECF2FBC1, 0xECF3FBC1, + 0xECF4FBC1, 0xECF5FBC1, 0xECF6FBC1, 0xECF7FBC1, 0xECF8FBC1, 0xECF9FBC1, 0xECFAFBC1, 0xECFBFBC1, 0xECFCFBC1, 0xECFDFBC1, 0xECFEFBC1, 0xECFFFBC1, 0xED00FBC1, 0xED01FBC1, 0xED02FBC1, + 0xED03FBC1, 0xED04FBC1, 0xED05FBC1, 0xED06FBC1, 0xED07FBC1, 0xED08FBC1, 0xED09FBC1, 0xED0AFBC1, 0xED0BFBC1, 0xED0CFBC1, 0xED0DFBC1, 0xED0EFBC1, 0xED0FFBC1, 0xED10FBC1, 0xED11FBC1, + 0xED12FBC1, 0xED13FBC1, 0xED14FBC1, 0xED15FBC1, 0xED16FBC1, 0xED17FBC1, 0xED18FBC1, 0xED19FBC1, 0xED1AFBC1, 0xED1BFBC1, 0xED1CFBC1, 0xED1DFBC1, 0xED1EFBC1, 0xED1FFBC1, 0xED20FBC1, + 0xED21FBC1, 0xED22FBC1, 0xED23FBC1, 0xED24FBC1, 0xED25FBC1, 0xED26FBC1, 0xED27FBC1, 0xED28FBC1, 0xED29FBC1, 0xED2AFBC1, 0xED2BFBC1, 0xED2CFBC1, 0xED2DFBC1, 0xED2EFBC1, 0xED2FFBC1, + 0xED30FBC1, 0xED31FBC1, 0xED32FBC1, 0xED33FBC1, 0xED34FBC1, 0xED35FBC1, 0xED36FBC1, 0xED37FBC1, 0xED38FBC1, 0xED39FBC1, 0xED3AFBC1, 0xED3BFBC1, 0xED3CFBC1, 0xED3DFBC1, 0xED3EFBC1, + 0xED3FFBC1, 0xED40FBC1, 0xED41FBC1, 0xED42FBC1, 0xED43FBC1, 0xED44FBC1, 0xED45FBC1, 0xED46FBC1, 0xED47FBC1, 0xED48FBC1, 0xED49FBC1, 0xED4AFBC1, 0xED4BFBC1, 0xED4CFBC1, 0xED4DFBC1, + 0xED4EFBC1, 0xED4FFBC1, 0xED50FBC1, 0xED51FBC1, 0xED52FBC1, 0xED53FBC1, 0xED54FBC1, 0xED55FBC1, 0xED56FBC1, 0xED57FBC1, 0xED58FBC1, 0xED59FBC1, 0xED5AFBC1, 0xED5BFBC1, 0xED5CFBC1, + 0xED5DFBC1, 0xED5EFBC1, 0xED5FFBC1, 0xED60FBC1, 0xED61FBC1, 0xED62FBC1, 0xED63FBC1, 0xED64FBC1, 0xED65FBC1, 0xED66FBC1, 0xED67FBC1, 0xED68FBC1, 0xED69FBC1, 0xED6AFBC1, 0xED6BFBC1, + 0xED6CFBC1, 0xED6DFBC1, 0xED6EFBC1, 0xED6FFBC1, 0xED70FBC1, 0xED71FBC1, 0xED72FBC1, 0xED73FBC1, 0xED74FBC1, 0xED75FBC1, 0xED76FBC1, 0xED77FBC1, 0xED78FBC1, 0xED79FBC1, 0xED7AFBC1, + 0xED7BFBC1, 0xED7CFBC1, 0xED7DFBC1, 0xED7EFBC1, 0xED7FFBC1, 0xED80FBC1, 0xED81FBC1, 0xED82FBC1, 0xED83FBC1, 0xED84FBC1, 0xED85FBC1, 0xED86FBC1, 0xED87FBC1, 0xED88FBC1, 0xED89FBC1, + 0xED8AFBC1, 0xED8BFBC1, 0xED8CFBC1, 0xED8DFBC1, 0xED8EFBC1, 0xED8FFBC1, 0xED90FBC1, 0xED91FBC1, 0xED92FBC1, 0xED93FBC1, 0xED94FBC1, 0xED95FBC1, 0xED96FBC1, 0xED97FBC1, 0xED98FBC1, + 0xED99FBC1, 0xED9AFBC1, 0xED9BFBC1, 0xED9CFBC1, 0xED9DFBC1, 0xED9EFBC1, 0xED9FFBC1, 0xEDA0FBC1, 0xEDA1FBC1, 0xEDA2FBC1, 0xEDA3FBC1, 0xEDA4FBC1, 0xEDA5FBC1, 0xEDA6FBC1, 0xEDA7FBC1, + 0xEDA8FBC1, 0xEDA9FBC1, 0xEDAAFBC1, 0xEDABFBC1, 0xEDACFBC1, 0xEDADFBC1, 0xEDAEFBC1, 0xEDAFFBC1, 0xEDB0FBC1, 0xEDB1FBC1, 0xEDB2FBC1, 0xEDB3FBC1, 0xEDB4FBC1, 0xEDB5FBC1, 0xEDB6FBC1, + 0xEDB7FBC1, 0xEDB8FBC1, 0xEDB9FBC1, 0xEDBAFBC1, 0xEDBBFBC1, 0xEDBCFBC1, 0xEDBDFBC1, 0xEDBEFBC1, 0xEDBFFBC1, 0xEDC0FBC1, 0xEDC1FBC1, 0xEDC2FBC1, 0xEDC3FBC1, 0xEDC4FBC1, 0xEDC5FBC1, + 0xEDC6FBC1, 0xEDC7FBC1, 0xEDC8FBC1, 0xEDC9FBC1, 0xEDCAFBC1, 0xEDCBFBC1, 0xEDCCFBC1, 0xEDCDFBC1, 0xEDCEFBC1, 0xEDCFFBC1, 0xEDD0FBC1, 0xEDD1FBC1, 0xEDD2FBC1, 0xEDD3FBC1, 0xEDD4FBC1, + 0xEDD5FBC1, 0xEDD6FBC1, 0xEDD7FBC1, 0xEDD8FBC1, 0xEDD9FBC1, 0xEDDAFBC1, 0xEDDBFBC1, 0xEDDCFBC1, 0xEDDDFBC1, 0xEDDEFBC1, 0xEDDFFBC1, 0xEDE0FBC1, 0xEDE1FBC1, 0xEDE2FBC1, 0xEDE3FBC1, + 0xEDE4FBC1, 0xEDE5FBC1, 0xEDE6FBC1, 0xEDE7FBC1, 0xEDE8FBC1, 0xEDE9FBC1, 0xEDEAFBC1, 0xEDEBFBC1, 0xEDECFBC1, 0xEDEDFBC1, 0xEDEEFBC1, 0xEDEFFBC1, 0xEDF0FBC1, 0xEDF1FBC1, 0xEDF2FBC1, + 0xEDF3FBC1, 0xEDF4FBC1, 0xEDF5FBC1, 0xEDF6FBC1, 0xEDF7FBC1, 0xEDF8FBC1, 0xEDF9FBC1, 0xEDFAFBC1, 0xEDFBFBC1, 0xEDFCFBC1, 0xEDFDFBC1, 0xEDFEFBC1, 0xEDFFFBC1, 0xEE00FBC1, 0xEE01FBC1, + 0xEE02FBC1, 0xEE03FBC1, 0xEE04FBC1, 0xEE05FBC1, 0xEE06FBC1, 0xEE07FBC1, 0xEE08FBC1, 0xEE09FBC1, 0xEE0AFBC1, 0xEE0BFBC1, 0xEE0CFBC1, 0xEE0DFBC1, 0xEE0EFBC1, 0xEE0FFBC1, 0xEE10FBC1, + 0xEE11FBC1, 0xEE12FBC1, 0xEE13FBC1, 0xEE14FBC1, 0xEE15FBC1, 0xEE16FBC1, 0xEE17FBC1, 0xEE18FBC1, 0xEE19FBC1, 0xEE1AFBC1, 0xEE1BFBC1, 0xEE1CFBC1, 0xEE1DFBC1, 0xEE1EFBC1, 0xEE1FFBC1, + 0xEE20FBC1, 0xEE21FBC1, 0xEE22FBC1, 0xEE23FBC1, 0xEE24FBC1, 0xEE25FBC1, 0xEE26FBC1, 0xEE27FBC1, 0xEE28FBC1, 0xEE29FBC1, 0xEE2AFBC1, 0xEE2BFBC1, 0xEE2CFBC1, 0xEE2DFBC1, 0xEE2EFBC1, + 0xEE2FFBC1, 0xEE30FBC1, 0xEE31FBC1, 0xEE32FBC1, 0xEE33FBC1, 0xEE34FBC1, 0xEE35FBC1, 0xEE36FBC1, 0xEE37FBC1, 0xEE38FBC1, 0xEE39FBC1, 0xEE3AFBC1, 0xEE3BFBC1, 0xEE3CFBC1, 0xEE3DFBC1, + 0xEE3EFBC1, 0xEE3FFBC1, 0xEE40FBC1, 0xEE41FBC1, 0xEE42FBC1, 0xEE43FBC1, 0xEE44FBC1, 0xEE45FBC1, 0xEE46FBC1, 0xEE47FBC1, 0xEE48FBC1, 0xEE49FBC1, 0xEE4AFBC1, 0xEE4BFBC1, 0xEE4CFBC1, + 0xEE4DFBC1, 0xEE4EFBC1, 0xEE4FFBC1, 0xEE50FBC1, 0xEE51FBC1, 0xEE52FBC1, 0xEE53FBC1, 0xEE54FBC1, 0xEE55FBC1, 0xEE56FBC1, 0xEE57FBC1, 0xEE58FBC1, 0xEE59FBC1, 0xEE5AFBC1, 0xEE5BFBC1, + 0xEE5CFBC1, 0xEE5DFBC1, 0xEE5EFBC1, 0xEE5FFBC1, 0xEE60FBC1, 0xEE61FBC1, 0xEE62FBC1, 0xEE63FBC1, 0xEE64FBC1, 0xEE65FBC1, 0xEE66FBC1, 0xEE67FBC1, 0xEE68FBC1, 0xEE69FBC1, 0xEE6AFBC1, + 0xEE6BFBC1, 0xEE6CFBC1, 0xEE6DFBC1, 0xEE6EFBC1, 0xEE6FFBC1, 0xEE70FBC1, 0xEE71FBC1, 0xEE72FBC1, 0xEE73FBC1, 0xEE74FBC1, 0xEE75FBC1, 0xEE76FBC1, 0xEE77FBC1, 0xEE78FBC1, 0xEE79FBC1, + 0xEE7AFBC1, 0xEE7BFBC1, 0xEE7CFBC1, 0xEE7DFBC1, 0xEE7EFBC1, 0xEE7FFBC1, 0xEE80FBC1, 0xEE81FBC1, 0xEE82FBC1, 0xEE83FBC1, 0xEE84FBC1, 0xEE85FBC1, 0xEE86FBC1, 0xEE87FBC1, 0xEE88FBC1, + 0xEE89FBC1, 0xEE8AFBC1, 0xEE8BFBC1, 0xEE8CFBC1, 0xEE8DFBC1, 0xEE8EFBC1, 0xEE8FFBC1, 0xEE90FBC1, 0xEE91FBC1, 0xEE92FBC1, 0xEE93FBC1, 0xEE94FBC1, 0xEE95FBC1, 0xEE96FBC1, 0xEE97FBC1, + 0xEE98FBC1, 0xEE99FBC1, 0xEE9AFBC1, 0xEE9BFBC1, 0xEE9CFBC1, 0xEE9DFBC1, 0xEE9EFBC1, 0xEE9FFBC1, 0xEEA0FBC1, 0xEEA1FBC1, 0xEEA2FBC1, 0xEEA3FBC1, 0xEEA4FBC1, 0xEEA5FBC1, 0xEEA6FBC1, + 0xEEA7FBC1, 0xEEA8FBC1, 0xEEA9FBC1, 0xEEAAFBC1, 0xEEABFBC1, 0xEEACFBC1, 0xEEADFBC1, 0xEEAEFBC1, 0xEEAFFBC1, 0xEEB0FBC1, 0xEEB1FBC1, 0xEEB2FBC1, 0xEEB3FBC1, 0xEEB4FBC1, 0xEEB5FBC1, + 0xEEB6FBC1, 0xEEB7FBC1, 0xEEB8FBC1, 0xEEB9FBC1, 0xEEBAFBC1, 0xEEBBFBC1, 0xEEBCFBC1, 0xEEBDFBC1, 0xEEBEFBC1, 0xEEBFFBC1, 0xEEC0FBC1, 0xEEC1FBC1, 0xEEC2FBC1, 0xEEC3FBC1, 0xEEC4FBC1, + 0xEEC5FBC1, 0xEEC6FBC1, 0xEEC7FBC1, 0xEEC8FBC1, 0xEEC9FBC1, 0xEECAFBC1, 0xEECBFBC1, 0xEECCFBC1, 0xEECDFBC1, 0xEECEFBC1, 0xEECFFBC1, 0xEED0FBC1, 0xEED1FBC1, 0xEED2FBC1, 0xEED3FBC1, + 0xEED4FBC1, 0xEED5FBC1, 0xEED6FBC1, 0xEED7FBC1, 0xEED8FBC1, 0xEED9FBC1, 0xEEDAFBC1, 0xEEDBFBC1, 0xEEDCFBC1, 0xEEDDFBC1, 0xEEDEFBC1, 0xEEDFFBC1, 0xEEE0FBC1, 0xEEE1FBC1, 0xEEE2FBC1, + 0xEEE3FBC1, 0xEEE4FBC1, 0xEEE5FBC1, 0xEEE6FBC1, 0xEEE7FBC1, 0xEEE8FBC1, 0xEEE9FBC1, 0xEEEAFBC1, 0xEEEBFBC1, 0xEEECFBC1, 0xEEEDFBC1, 0xEEEEFBC1, 0xEEEFFBC1, 0xEEF0FBC1, 0xEEF1FBC1, + 0xEEF2FBC1, 0xEEF3FBC1, 0xEEF4FBC1, 0xEEF5FBC1, 0xEEF6FBC1, 0xEEF7FBC1, 0xEEF8FBC1, 0xEEF9FBC1, 0xEEFAFBC1, 0xEEFBFBC1, 0xEEFCFBC1, 0xEEFDFBC1, 0xEEFEFBC1, 0xEEFFFBC1, 0xEF00FBC1, + 0xEF01FBC1, 0xEF02FBC1, 0xEF03FBC1, 0xEF04FBC1, 0xEF05FBC1, 0xEF06FBC1, 0xEF07FBC1, 0xEF08FBC1, 0xEF09FBC1, 0xEF0AFBC1, 0xEF0BFBC1, 0xEF0CFBC1, 0xEF0DFBC1, 0xEF0EFBC1, 0xEF0FFBC1, + 0xEF10FBC1, 0xEF11FBC1, 0xEF12FBC1, 0xEF13FBC1, 0xEF14FBC1, 0xEF15FBC1, 0xEF16FBC1, 0xEF17FBC1, 0xEF18FBC1, 0xEF19FBC1, 0xEF1AFBC1, 0xEF1BFBC1, 0xEF1CFBC1, 0xEF1DFBC1, 0xEF1EFBC1, + 0xEF1FFBC1, 0xEF20FBC1, 0xEF21FBC1, 0xEF22FBC1, 0xEF23FBC1, 0xEF24FBC1, 0xEF25FBC1, 0xEF26FBC1, 0xEF27FBC1, 0xEF28FBC1, 0xEF29FBC1, 0xEF2AFBC1, 0xEF2BFBC1, 0xEF2CFBC1, 0xEF2DFBC1, + 0xEF2EFBC1, 0xEF2FFBC1, 0xEF30FBC1, 0xEF31FBC1, 0xEF32FBC1, 0xEF33FBC1, 0xEF34FBC1, 0xEF35FBC1, 0xEF36FBC1, 0xEF37FBC1, 0xEF38FBC1, 0xEF39FBC1, 0xEF3AFBC1, 0xEF3BFBC1, 0xEF3CFBC1, + 0xEF3DFBC1, 0xEF3EFBC1, 0xEF3FFBC1, 0xEF40FBC1, 0xEF41FBC1, 0xEF42FBC1, 0xEF43FBC1, 0xEF44FBC1, 0xEF45FBC1, 0xEF46FBC1, 0xEF47FBC1, 0xEF48FBC1, 0xEF49FBC1, 0xEF4AFBC1, 0xEF4BFBC1, + 0xEF4CFBC1, 0xEF4DFBC1, 0xEF4EFBC1, 0xEF4FFBC1, 0xEF50FBC1, 0xEF51FBC1, 0xEF52FBC1, 0xEF53FBC1, 0xEF54FBC1, 0xEF55FBC1, 0xEF56FBC1, 0xEF57FBC1, 0xEF58FBC1, 0xEF59FBC1, 0xEF5AFBC1, + 0xEF5BFBC1, 0xEF5CFBC1, 0xEF5DFBC1, 0xEF5EFBC1, 0xEF5FFBC1, 0xEF60FBC1, 0xEF61FBC1, 0xEF62FBC1, 0xEF63FBC1, 0xEF64FBC1, 0xEF65FBC1, 0xEF66FBC1, 0xEF67FBC1, 0xEF68FBC1, 0xEF69FBC1, + 0xEF6AFBC1, 0xEF6BFBC1, 0xEF6CFBC1, 0xEF6DFBC1, 0xEF6EFBC1, 0xEF6FFBC1, 0xEF70FBC1, 0xEF71FBC1, 0xEF72FBC1, 0xEF73FBC1, 0xEF74FBC1, 0xEF75FBC1, 0xEF76FBC1, 0xEF77FBC1, 0xEF78FBC1, + 0xEF79FBC1, 0xEF7AFBC1, 0xEF7BFBC1, 0xEF7CFBC1, 0xEF7DFBC1, 0xEF7EFBC1, 0xEF7FFBC1, 0xEF80FBC1, 0xEF81FBC1, 0xEF82FBC1, 0xEF83FBC1, 0xEF84FBC1, 0xEF85FBC1, 0xEF86FBC1, 0xEF87FBC1, + 0xEF88FBC1, 0xEF89FBC1, 0xEF8AFBC1, 0xEF8BFBC1, 0xEF8CFBC1, 0xEF8DFBC1, 0xEF8EFBC1, 0xEF8FFBC1, 0xEF90FBC1, 0xEF91FBC1, 0xEF92FBC1, 0xEF93FBC1, 0xEF94FBC1, 0xEF95FBC1, 0xEF96FBC1, + 0xEF97FBC1, 0xEF98FBC1, 0xEF99FBC1, 0xEF9AFBC1, 0xEF9BFBC1, 0xEF9CFBC1, 0xEF9DFBC1, 0xEF9EFBC1, 0xEF9FFBC1, 0xEFA0FBC1, 0xEFA1FBC1, 0xEFA2FBC1, 0xEFA3FBC1, 0xEFA4FBC1, 0xEFA5FBC1, + 0xEFA6FBC1, 0xEFA7FBC1, 0xEFA8FBC1, 0xEFA9FBC1, 0xEFAAFBC1, 0xEFABFBC1, 0xEFACFBC1, 0xEFADFBC1, 0xEFAEFBC1, 0xEFAFFBC1, 0xEFB0FBC1, 0xEFB1FBC1, 0xEFB2FBC1, 0xEFB3FBC1, 0xEFB4FBC1, + 0xEFB5FBC1, 0xEFB6FBC1, 0xEFB7FBC1, 0xEFB8FBC1, 0xEFB9FBC1, 0xEFBAFBC1, 0xEFBBFBC1, 0xEFBCFBC1, 0xEFBDFBC1, 0xEFBEFBC1, 0xEFBFFBC1, 0xEFC0FBC1, 0xEFC1FBC1, 0xEFC2FBC1, 0xEFC3FBC1, + 0xEFC4FBC1, 0xEFC5FBC1, 0xEFC6FBC1, 0xEFC7FBC1, 0xEFC8FBC1, 0xEFC9FBC1, 0xEFCAFBC1, 0xEFCBFBC1, 0xEFCCFBC1, 0xEFCDFBC1, 0xEFCEFBC1, 0xEFCFFBC1, 0xEFD0FBC1, 0xEFD1FBC1, 0xEFD2FBC1, + 0xEFD3FBC1, 0xEFD4FBC1, 0xEFD5FBC1, 0xEFD6FBC1, 0xEFD7FBC1, 0xEFD8FBC1, 0xEFD9FBC1, 0xEFDAFBC1, 0xEFDBFBC1, 0xEFDCFBC1, 0xEFDDFBC1, 0xEFDEFBC1, 0xEFDFFBC1, 0xEFE0FBC1, 0xEFE1FBC1, + 0xEFE2FBC1, 0xEFE3FBC1, 0xEFE4FBC1, 0xEFE5FBC1, 0xEFE6FBC1, 0xEFE7FBC1, 0xEFE8FBC1, 0xEFE9FBC1, 0xEFEAFBC1, 0xEFEBFBC1, 0xEFECFBC1, 0xEFEDFBC1, 0xEFEEFBC1, 0xEFEFFBC1, 0xEFF0FBC1, + 0xEFF1FBC1, 0xEFF2FBC1, 0xEFF3FBC1, 0xEFF4FBC1, 0xEFF5FBC1, 0xEFF6FBC1, 0xEFF7FBC1, 0xEFF8FBC1, 0xEFF9FBC1, 0xEFFAFBC1, 0xEFFBFBC1, 0xEFFCFBC1, 0xEFFDFBC1, 0xEFFEFBC1, 0xEFFFFBC1, + 0xF000FBC1, 0xF001FBC1, 0xF002FBC1, 0xF003FBC1, 0xF004FBC1, 0xF005FBC1, 0xF006FBC1, 0xF007FBC1, 0xF008FBC1, 0xF009FBC1, 0xF00AFBC1, 0xF00BFBC1, 0xF00CFBC1, 0xF00DFBC1, 0xF00EFBC1, + 0xF00FFBC1, 0xF010FBC1, 0xF011FBC1, 0xF012FBC1, 0xF013FBC1, 0xF014FBC1, 0xF015FBC1, 0xF016FBC1, 0xF017FBC1, 0xF018FBC1, 0xF019FBC1, 0xF01AFBC1, 0xF01BFBC1, 0xF01CFBC1, 0xF01DFBC1, + 0xF01EFBC1, 0xF01FFBC1, 0xF020FBC1, 0xF021FBC1, 0xF022FBC1, 0xF023FBC1, 0xF024FBC1, 0xF025FBC1, 0xF026FBC1, 0xF027FBC1, 0xF028FBC1, 0xF029FBC1, 0xF02AFBC1, 0xF02BFBC1, 0xF02CFBC1, + 0xF02DFBC1, 0xF02EFBC1, 0xF02FFBC1, 0xF030FBC1, 0xF031FBC1, 0xF032FBC1, 0xF033FBC1, 0xF034FBC1, 0xF035FBC1, 0xF036FBC1, 0xF037FBC1, 0xF038FBC1, 0xF039FBC1, 0xF03AFBC1, 0xF03BFBC1, + 0xF03CFBC1, 0xF03DFBC1, 0xF03EFBC1, 0xF03FFBC1, 0xF040FBC1, 0xF041FBC1, 0xF042FBC1, 0xF043FBC1, 0xF044FBC1, 0xF045FBC1, 0xF046FBC1, 0xF047FBC1, 0xF048FBC1, 0xF049FBC1, 0xF04AFBC1, + 0xF04BFBC1, 0xF04CFBC1, 0xF04DFBC1, 0xF04EFBC1, 0xF04FFBC1, 0xF050FBC1, 0xF051FBC1, 0xF052FBC1, 0xF053FBC1, 0xF054FBC1, 0xF055FBC1, 0xF056FBC1, 0xF057FBC1, 0xF058FBC1, 0xF059FBC1, + 0xF05AFBC1, 0xF05BFBC1, 0xF05CFBC1, 0xF05DFBC1, 0xF05EFBC1, 0xF05FFBC1, 0xF060FBC1, 0xF061FBC1, 0xF062FBC1, 0xF063FBC1, 0xF064FBC1, 0xF065FBC1, 0xF066FBC1, 0xF067FBC1, 0xF068FBC1, + 0xF069FBC1, 0xF06AFBC1, 0xF06BFBC1, 0xF06CFBC1, 0xF06DFBC1, 0xF06EFBC1, 0xF06FFBC1, 0xF070FBC1, 0xF071FBC1, 0xF072FBC1, 0xF073FBC1, 0xF074FBC1, 0xF075FBC1, 0xF076FBC1, 0xF077FBC1, + 0xF078FBC1, 0xF079FBC1, 0xF07AFBC1, 0xF07BFBC1, 0xF07CFBC1, 0xF07DFBC1, 0xF07EFBC1, 0xF07FFBC1, 0xF080FBC1, 0xF081FBC1, 0xF082FBC1, 0xF083FBC1, 0xF084FBC1, 0xF085FBC1, 0xF086FBC1, + 0xF087FBC1, 0xF088FBC1, 0xF089FBC1, 0xF08AFBC1, 0xF08BFBC1, 0xF08CFBC1, 0xF08DFBC1, 0xF08EFBC1, 0xF08FFBC1, 0xF090FBC1, 0xF091FBC1, 0xF092FBC1, 0xF093FBC1, 0xF094FBC1, 0xF095FBC1, + 0xF096FBC1, 0xF097FBC1, 0xF098FBC1, 0xF099FBC1, 0xF09AFBC1, 0xF09BFBC1, 0xF09CFBC1, 0xF09DFBC1, 0xF09EFBC1, 0xF09FFBC1, 0xF0A0FBC1, 0xF0A1FBC1, 0xF0A2FBC1, 0xF0A3FBC1, 0xF0A4FBC1, + 0xF0A5FBC1, 0xF0A6FBC1, 0xF0A7FBC1, 0xF0A8FBC1, 0xF0A9FBC1, 0xF0AAFBC1, 0xF0ABFBC1, 0xF0ACFBC1, 0xF0ADFBC1, 0xF0AEFBC1, 0xF0AFFBC1, 0xF0B0FBC1, 0xF0B1FBC1, 0xF0B2FBC1, 0xF0B3FBC1, + 0xF0B4FBC1, 0xF0B5FBC1, 0xF0B6FBC1, 0xF0B7FBC1, 0xF0B8FBC1, 0xF0B9FBC1, 0xF0BAFBC1, 0xF0BBFBC1, 0xF0BCFBC1, 0xF0BDFBC1, 0xF0BEFBC1, 0xF0BFFBC1, 0xF0C0FBC1, 0xF0C1FBC1, 0xF0C2FBC1, + 0xF0C3FBC1, 0xF0C4FBC1, 0xF0C5FBC1, 0xF0C6FBC1, 0xF0C7FBC1, 0xF0C8FBC1, 0xF0C9FBC1, 0xF0CAFBC1, 0xF0CBFBC1, 0xF0CCFBC1, 0xF0CDFBC1, 0xF0CEFBC1, 0xF0CFFBC1, 0xF0D0FBC1, 0xF0D1FBC1, + 0xF0D2FBC1, 0xF0D3FBC1, 0xF0D4FBC1, 0xF0D5FBC1, 0xF0D6FBC1, 0xF0D7FBC1, 0xF0D8FBC1, 0xF0D9FBC1, 0xF0DAFBC1, 0xF0DBFBC1, 0xF0DCFBC1, 0xF0DDFBC1, 0xF0DEFBC1, 0xF0DFFBC1, 0xF0E0FBC1, + 0xF0E1FBC1, 0xF0E2FBC1, 0xF0E3FBC1, 0xF0E4FBC1, 0xF0E5FBC1, 0xF0E6FBC1, 0xF0E7FBC1, 0xF0E8FBC1, 0xF0E9FBC1, 0xF0EAFBC1, 0xF0EBFBC1, 0xF0ECFBC1, 0xF0EDFBC1, 0xF0EEFBC1, 0xF0EFFBC1, + 0xF0F0FBC1, 0xF0F1FBC1, 0xF0F2FBC1, 0xF0F3FBC1, 0xF0F4FBC1, 0xF0F5FBC1, 0xF0F6FBC1, 0xF0F7FBC1, 0xF0F8FBC1, 0xF0F9FBC1, 0xF0FAFBC1, 0xF0FBFBC1, 0xF0FCFBC1, 0xF0FDFBC1, 0xF0FEFBC1, + 0xF0FFFBC1, 0xF100FBC1, 0xF101FBC1, 0xF102FBC1, 0xF103FBC1, 0xF104FBC1, 0xF105FBC1, 0xF106FBC1, 0xF107FBC1, 0xF108FBC1, 0xF109FBC1, 0xF10AFBC1, 0xF10BFBC1, 0xF10CFBC1, 0xF10DFBC1, + 0xF10EFBC1, 0xF10FFBC1, 0xF110FBC1, 0xF111FBC1, 0xF112FBC1, 0xF113FBC1, 0xF114FBC1, 0xF115FBC1, 0xF116FBC1, 0xF117FBC1, 0xF118FBC1, 0xF119FBC1, 0xF11AFBC1, 0xF11BFBC1, 0xF11CFBC1, + 0xF11DFBC1, 0xF11EFBC1, 0xF11FFBC1, 0xF120FBC1, 0xF121FBC1, 0xF122FBC1, 0xF123FBC1, 0xF124FBC1, 0xF125FBC1, 0xF126FBC1, 0xF127FBC1, 0xF128FBC1, 0xF129FBC1, 0xF12AFBC1, 0xF12BFBC1, + 0xF12CFBC1, 0xF12DFBC1, 0xF12EFBC1, 0xF12FFBC1, 0xF130FBC1, 0xF131FBC1, 0xF132FBC1, 0xF133FBC1, 0xF134FBC1, 0xF135FBC1, 0xF136FBC1, 0xF137FBC1, 0xF138FBC1, 0xF139FBC1, 0xF13AFBC1, + 0xF13BFBC1, 0xF13CFBC1, 0xF13DFBC1, 0xF13EFBC1, 0xF13FFBC1, 0xF140FBC1, 0xF141FBC1, 0xF142FBC1, 0xF143FBC1, 0xF144FBC1, 0xF145FBC1, 0xF146FBC1, 0xF147FBC1, 0xF148FBC1, 0xF149FBC1, + 0xF14AFBC1, 0xF14BFBC1, 0xF14CFBC1, 0xF14DFBC1, 0xF14EFBC1, 0xF14FFBC1, 0xF150FBC1, 0xF151FBC1, 0xF152FBC1, 0xF153FBC1, 0xF154FBC1, 0xF155FBC1, 0xF156FBC1, 0xF157FBC1, 0xF158FBC1, + 0xF159FBC1, 0xF15AFBC1, 0xF15BFBC1, 0xF15CFBC1, 0xF15DFBC1, 0xF15EFBC1, 0xF15FFBC1, 0xF160FBC1, 0xF161FBC1, 0xF162FBC1, 0xF163FBC1, 0xF164FBC1, 0xF165FBC1, 0xF166FBC1, 0xF167FBC1, + 0xF168FBC1, 0xF169FBC1, 0xF16AFBC1, 0xF16BFBC1, 0xF16CFBC1, 0xF16DFBC1, 0xF16EFBC1, 0xF16FFBC1, 0xF170FBC1, 0xF171FBC1, 0xF172FBC1, 0xF173FBC1, 0xF174FBC1, 0xF175FBC1, 0xF176FBC1, + 0xF177FBC1, 0xF178FBC1, 0xF179FBC1, 0xF17AFBC1, 0xF17BFBC1, 0xF17CFBC1, 0xF17DFBC1, 0xF17EFBC1, 0xF17FFBC1, 0xF180FBC1, 0xF181FBC1, 0xF182FBC1, 0xF183FBC1, 0xF184FBC1, 0xF185FBC1, + 0xF186FBC1, 0xF187FBC1, 0xF188FBC1, 0xF189FBC1, 0xF18AFBC1, 0xF18BFBC1, 0xF18CFBC1, 0xF18DFBC1, 0xF18EFBC1, 0xF18FFBC1, 0xF190FBC1, 0xF191FBC1, 0xF192FBC1, 0xF193FBC1, 0xF194FBC1, + 0xF195FBC1, 0xF196FBC1, 0xF197FBC1, 0xF198FBC1, 0xF199FBC1, 0xF19AFBC1, 0xF19BFBC1, 0xF19CFBC1, 0xF19DFBC1, 0xF19EFBC1, 0xF19FFBC1, 0xF1A0FBC1, 0xF1A1FBC1, 0xF1A2FBC1, 0xF1A3FBC1, + 0xF1A4FBC1, 0xF1A5FBC1, 0xF1A6FBC1, 0xF1A7FBC1, 0xF1A8FBC1, 0xF1A9FBC1, 0xF1AAFBC1, 0xF1ABFBC1, 0xF1ACFBC1, 0xF1ADFBC1, 0xF1AEFBC1, 0xF1AFFBC1, 0xF1B0FBC1, 0xF1B1FBC1, 0xF1B2FBC1, + 0xF1B3FBC1, 0xF1B4FBC1, 0xF1B5FBC1, 0xF1B6FBC1, 0xF1B7FBC1, 0xF1B8FBC1, 0xF1B9FBC1, 0xF1BAFBC1, 0xF1BBFBC1, 0xF1BCFBC1, 0xF1BDFBC1, 0xF1BEFBC1, 0xF1BFFBC1, 0xF1C0FBC1, 0xF1C1FBC1, + 0xF1C2FBC1, 0xF1C3FBC1, 0xF1C4FBC1, 0xF1C5FBC1, 0xF1C6FBC1, 0xF1C7FBC1, 0xF1C8FBC1, 0xF1C9FBC1, 0xF1CAFBC1, 0xF1CBFBC1, 0xF1CCFBC1, 0xF1CDFBC1, 0xF1CEFBC1, 0xF1CFFBC1, 0xF1D0FBC1, + 0xF1D1FBC1, 0xF1D2FBC1, 0xF1D3FBC1, 0xF1D4FBC1, 0xF1D5FBC1, 0xF1D6FBC1, 0xF1D7FBC1, 0xF1D8FBC1, 0xF1D9FBC1, 0xF1DAFBC1, 0xF1DBFBC1, 0xF1DCFBC1, 0xF1DDFBC1, 0xF1DEFBC1, 0xF1DFFBC1, + 0xF1E0FBC1, 0xF1E1FBC1, 0xF1E2FBC1, 0xF1E3FBC1, 0xF1E4FBC1, 0xF1E5FBC1, 0xF1E6FBC1, 0xF1E7FBC1, 0xF1E8FBC1, 0xF1E9FBC1, 0xF1EAFBC1, 0xF1EBFBC1, 0xF1ECFBC1, 0xF1EDFBC1, 0xF1EEFBC1, + 0xF1EFFBC1, 0xF1F0FBC1, 0xF1F1FBC1, 0xF1F2FBC1, 0xF1F3FBC1, 0xF1F4FBC1, 0xF1F5FBC1, 0xF1F6FBC1, 0xF1F7FBC1, 0xF1F8FBC1, 0xF1F9FBC1, 0xF1FAFBC1, 0xF1FBFBC1, 0xF1FCFBC1, 0xF1FDFBC1, + 0xF1FEFBC1, 0xF1FFFBC1, 0xF200FBC1, 0xF201FBC1, 0xF202FBC1, 0xF203FBC1, 0xF204FBC1, 0xF205FBC1, 0xF206FBC1, 0xF207FBC1, 0xF208FBC1, 0xF209FBC1, 0xF20AFBC1, 0xF20BFBC1, 0xF20CFBC1, + 0xF20DFBC1, 0xF20EFBC1, 0xF20FFBC1, 0xF210FBC1, 0xF211FBC1, 0xF212FBC1, 0xF213FBC1, 0xF214FBC1, 0xF215FBC1, 0xF216FBC1, 0xF217FBC1, 0xF218FBC1, 0xF219FBC1, 0xF21AFBC1, 0xF21BFBC1, + 0xF21CFBC1, 0xF21DFBC1, 0xF21EFBC1, 0xF21FFBC1, 0xF220FBC1, 0xF221FBC1, 0xF222FBC1, 0xF223FBC1, 0xF224FBC1, 0xF225FBC1, 0xF226FBC1, 0xF227FBC1, 0xF228FBC1, 0xF229FBC1, 0xF22AFBC1, + 0xF22BFBC1, 0xF22CFBC1, 0xF22DFBC1, 0xF22EFBC1, 0xF22FFBC1, 0xF230FBC1, 0xF231FBC1, 0xF232FBC1, 0xF233FBC1, 0xF234FBC1, 0xF235FBC1, 0xF236FBC1, 0xF237FBC1, 0xF238FBC1, 0xF239FBC1, + 0xF23AFBC1, 0xF23BFBC1, 0xF23CFBC1, 0xF23DFBC1, 0xF23EFBC1, 0xF23FFBC1, 0xF240FBC1, 0xF241FBC1, 0xF242FBC1, 0xF243FBC1, 0xF244FBC1, 0xF245FBC1, 0xF246FBC1, 0xF247FBC1, 0xF248FBC1, + 0xF249FBC1, 0xF24AFBC1, 0xF24BFBC1, 0xF24CFBC1, 0xF24DFBC1, 0xF24EFBC1, 0xF24FFBC1, 0xF250FBC1, 0xF251FBC1, 0xF252FBC1, 0xF253FBC1, 0xF254FBC1, 0xF255FBC1, 0xF256FBC1, 0xF257FBC1, + 0xF258FBC1, 0xF259FBC1, 0xF25AFBC1, 0xF25BFBC1, 0xF25CFBC1, 0xF25DFBC1, 0xF25EFBC1, 0xF25FFBC1, 0xF260FBC1, 0xF261FBC1, 0xF262FBC1, 0xF263FBC1, 0xF264FBC1, 0xF265FBC1, 0xF266FBC1, + 0xF267FBC1, 0xF268FBC1, 0xF269FBC1, 0xF26AFBC1, 0xF26BFBC1, 0xF26CFBC1, 0xF26DFBC1, 0xF26EFBC1, 0xF26FFBC1, 0xF270FBC1, 0xF271FBC1, 0xF272FBC1, 0xF273FBC1, 0xF274FBC1, 0xF275FBC1, + 0xF276FBC1, 0xF277FBC1, 0xF278FBC1, 0xF279FBC1, 0xF27AFBC1, 0xF27BFBC1, 0xF27CFBC1, 0xF27DFBC1, 0xF27EFBC1, 0xF27FFBC1, 0xF280FBC1, 0xF281FBC1, 0xF282FBC1, 0xF283FBC1, 0xF284FBC1, + 0xF285FBC1, 0xF286FBC1, 0xF287FBC1, 0xF288FBC1, 0xF289FBC1, 0xF28AFBC1, 0xF28BFBC1, 0xF28CFBC1, 0xF28DFBC1, 0xF28EFBC1, 0xF28FFBC1, 0xF290FBC1, 0xF291FBC1, 0xF292FBC1, 0xF293FBC1, + 0xF294FBC1, 0xF295FBC1, 0xF296FBC1, 0xF297FBC1, 0xF298FBC1, 0xF299FBC1, 0xF29AFBC1, 0xF29BFBC1, 0xF29CFBC1, 0xF29DFBC1, 0xF29EFBC1, 0xF29FFBC1, 0xF2A0FBC1, 0xF2A1FBC1, 0xF2A2FBC1, + 0xF2A3FBC1, 0xF2A4FBC1, 0xF2A5FBC1, 0xF2A6FBC1, 0xF2A7FBC1, 0xF2A8FBC1, 0xF2A9FBC1, 0xF2AAFBC1, 0xF2ABFBC1, 0xF2ACFBC1, 0xF2ADFBC1, 0xF2AEFBC1, 0xF2AFFBC1, 0xF2B0FBC1, 0xF2B1FBC1, + 0xF2B2FBC1, 0xF2B3FBC1, 0xF2B4FBC1, 0xF2B5FBC1, 0xF2B6FBC1, 0xF2B7FBC1, 0xF2B8FBC1, 0xF2B9FBC1, 0xF2BAFBC1, 0xF2BBFBC1, 0xF2BCFBC1, 0xF2BDFBC1, 0xF2BEFBC1, 0xF2BFFBC1, 0xF2C0FBC1, + 0xF2C1FBC1, 0xF2C2FBC1, 0xF2C3FBC1, 0xF2C4FBC1, 0xF2C5FBC1, 0xF2C6FBC1, 0xF2C7FBC1, 0xF2C8FBC1, 0xF2C9FBC1, 0xF2CAFBC1, 0xF2CBFBC1, 0xF2CCFBC1, 0xF2CDFBC1, 0xF2CEFBC1, 0xF2CFFBC1, + 0xF2D0FBC1, 0xF2D1FBC1, 0xF2D2FBC1, 0xF2D3FBC1, 0xF2D4FBC1, 0xF2D5FBC1, 0xF2D6FBC1, 0xF2D7FBC1, 0xF2D8FBC1, 0xF2D9FBC1, 0xF2DAFBC1, 0xF2DBFBC1, 0xF2DCFBC1, 0xF2DDFBC1, 0xF2DEFBC1, + 0xF2DFFBC1, 0xF2E0FBC1, 0xF2E1FBC1, 0xF2E2FBC1, 0xF2E3FBC1, 0xF2E4FBC1, 0xF2E5FBC1, 0xF2E6FBC1, 0xF2E7FBC1, 0xF2E8FBC1, 0xF2E9FBC1, 0xF2EAFBC1, 0xF2EBFBC1, 0xF2ECFBC1, 0xF2EDFBC1, + 0xF2EEFBC1, 0xF2EFFBC1, 0xF2F0FBC1, 0xF2F1FBC1, 0xF2F2FBC1, 0xF2F3FBC1, 0xF2F4FBC1, 0xF2F5FBC1, 0xF2F6FBC1, 0xF2F7FBC1, 0xF2F8FBC1, 0xF2F9FBC1, 0xF2FAFBC1, 0xF2FBFBC1, 0xF2FCFBC1, + 0xF2FDFBC1, 0xF2FEFBC1, 0xF2FFFBC1, 0xF300FBC1, 0xF301FBC1, 0xF302FBC1, 0xF303FBC1, 0xF304FBC1, 0xF305FBC1, 0xF306FBC1, 0xF307FBC1, 0xF308FBC1, 0xF309FBC1, 0xF30AFBC1, 0xF30BFBC1, + 0xF30CFBC1, 0xF30DFBC1, 0xF30EFBC1, 0xF30FFBC1, 0xF310FBC1, 0xF311FBC1, 0xF312FBC1, 0xF313FBC1, 0xF314FBC1, 0xF315FBC1, 0xF316FBC1, 0xF317FBC1, 0xF318FBC1, 0xF319FBC1, 0xF31AFBC1, + 0xF31BFBC1, 0xF31CFBC1, 0xF31DFBC1, 0xF31EFBC1, 0xF31FFBC1, 0xF320FBC1, 0xF321FBC1, 0xF322FBC1, 0xF323FBC1, 0xF324FBC1, 0xF325FBC1, 0xF326FBC1, 0xF327FBC1, 0xF328FBC1, 0xF329FBC1, + 0xF32AFBC1, 0xF32BFBC1, 0xF32CFBC1, 0xF32DFBC1, 0xF32EFBC1, 0xF32FFBC1, 0xF330FBC1, 0xF331FBC1, 0xF332FBC1, 0xF333FBC1, 0xF334FBC1, 0xF335FBC1, 0xF336FBC1, 0xF337FBC1, 0xF338FBC1, + 0xF339FBC1, 0xF33AFBC1, 0xF33BFBC1, 0xF33CFBC1, 0xF33DFBC1, 0xF33EFBC1, 0xF33FFBC1, 0xF340FBC1, 0xF341FBC1, 0xF342FBC1, 0xF343FBC1, 0xF344FBC1, 0xF345FBC1, 0xF346FBC1, 0xF347FBC1, + 0xF348FBC1, 0xF349FBC1, 0xF34AFBC1, 0xF34BFBC1, 0xF34CFBC1, 0xF34DFBC1, 0xF34EFBC1, 0xF34FFBC1, 0xF350FBC1, 0xF351FBC1, 0xF352FBC1, 0xF353FBC1, 0xF354FBC1, 0xF355FBC1, 0xF356FBC1, + 0xF357FBC1, 0xF358FBC1, 0xF359FBC1, 0xF35AFBC1, 0xF35BFBC1, 0xF35CFBC1, 0xF35DFBC1, 0xF35EFBC1, 0xF35FFBC1, 0xF360FBC1, 0xF361FBC1, 0xF362FBC1, 0xF363FBC1, 0xF364FBC1, 0xF365FBC1, + 0xF366FBC1, 0xF367FBC1, 0xF368FBC1, 0xF369FBC1, 0xF36AFBC1, 0xF36BFBC1, 0xF36CFBC1, 0xF36DFBC1, 0xF36EFBC1, 0xF36FFBC1, 0xF370FBC1, 0xF371FBC1, 0xF372FBC1, 0xF373FBC1, 0xF374FBC1, + 0xF375FBC1, 0xF376FBC1, 0xF377FBC1, 0xF378FBC1, 0xF379FBC1, 0xF37AFBC1, 0xF37BFBC1, 0xF37CFBC1, 0xF37DFBC1, 0xF37EFBC1, 0xF37FFBC1, 0xF380FBC1, 0xF381FBC1, 0xF382FBC1, 0xF383FBC1, + 0xF384FBC1, 0xF385FBC1, 0xF386FBC1, 0xF387FBC1, 0xF388FBC1, 0xF389FBC1, 0xF38AFBC1, 0xF38BFBC1, 0xF38CFBC1, 0xF38DFBC1, 0xF38EFBC1, 0xF38FFBC1, 0xF390FBC1, 0xF391FBC1, 0xF392FBC1, + 0xF393FBC1, 0xF394FBC1, 0xF395FBC1, 0xF396FBC1, 0xF397FBC1, 0xF398FBC1, 0xF399FBC1, 0xF39AFBC1, 0xF39BFBC1, 0xF39CFBC1, 0xF39DFBC1, 0xF39EFBC1, 0xF39FFBC1, 0xF3A0FBC1, 0xF3A1FBC1, + 0xF3A2FBC1, 0xF3A3FBC1, 0xF3A4FBC1, 0xF3A5FBC1, 0xF3A6FBC1, 0xF3A7FBC1, 0xF3A8FBC1, 0xF3A9FBC1, 0xF3AAFBC1, 0xF3ABFBC1, 0xF3ACFBC1, 0xF3ADFBC1, 0xF3AEFBC1, 0xF3AFFBC1, 0xF3B0FBC1, + 0xF3B1FBC1, 0xF3B2FBC1, 0xF3B3FBC1, 0xF3B4FBC1, 0xF3B5FBC1, 0xF3B6FBC1, 0xF3B7FBC1, 0xF3B8FBC1, 0xF3B9FBC1, 0xF3BAFBC1, 0xF3BBFBC1, 0xF3BCFBC1, 0xF3BDFBC1, 0xF3BEFBC1, 0xF3BFFBC1, + 0xF3C0FBC1, 0xF3C1FBC1, 0xF3C2FBC1, 0xF3C3FBC1, 0xF3C4FBC1, 0xF3C5FBC1, 0xF3C6FBC1, 0xF3C7FBC1, 0xF3C8FBC1, 0xF3C9FBC1, 0xF3CAFBC1, 0xF3CBFBC1, 0xF3CCFBC1, 0xF3CDFBC1, 0xF3CEFBC1, + 0xF3CFFBC1, 0xF3D0FBC1, 0xF3D1FBC1, 0xF3D2FBC1, 0xF3D3FBC1, 0xF3D4FBC1, 0xF3D5FBC1, 0xF3D6FBC1, 0xF3D7FBC1, 0xF3D8FBC1, 0xF3D9FBC1, 0xF3DAFBC1, 0xF3DBFBC1, 0xF3DCFBC1, 0xF3DDFBC1, + 0xF3DEFBC1, 0xF3DFFBC1, 0xF3E0FBC1, 0xF3E1FBC1, 0xF3E2FBC1, 0xF3E3FBC1, 0xF3E4FBC1, 0xF3E5FBC1, 0xF3E6FBC1, 0xF3E7FBC1, 0xF3E8FBC1, 0xF3E9FBC1, 0xF3EAFBC1, 0xF3EBFBC1, 0xF3ECFBC1, + 0xF3EDFBC1, 0xF3EEFBC1, 0xF3EFFBC1, 0xF3F0FBC1, 0xF3F1FBC1, 0xF3F2FBC1, 0xF3F3FBC1, 0xF3F4FBC1, 0xF3F5FBC1, 0xF3F6FBC1, 0xF3F7FBC1, 0xF3F8FBC1, 0xF3F9FBC1, 0xF3FAFBC1, 0xF3FBFBC1, + 0xF3FCFBC1, 0xF3FDFBC1, 0xF3FEFBC1, 0xF3FFFBC1, 0xF400FBC1, 0xF401FBC1, 0xF402FBC1, 0xF403FBC1, 0xF404FBC1, 0xF405FBC1, 0xF406FBC1, 0xF407FBC1, 0xF408FBC1, 0xF409FBC1, 0xF40AFBC1, + 0xF40BFBC1, 0xF40CFBC1, 0xF40DFBC1, 0xF40EFBC1, 0xF40FFBC1, 0xF410FBC1, 0xF411FBC1, 0xF412FBC1, 0xF413FBC1, 0xF414FBC1, 0xF415FBC1, 0xF416FBC1, 0xF417FBC1, 0xF418FBC1, 0xF419FBC1, + 0xF41AFBC1, 0xF41BFBC1, 0xF41CFBC1, 0xF41DFBC1, 0xF41EFBC1, 0xF41FFBC1, 0xF420FBC1, 0xF421FBC1, 0xF422FBC1, 0xF423FBC1, 0xF424FBC1, 0xF425FBC1, 0xF426FBC1, 0xF427FBC1, 0xF428FBC1, + 0xF429FBC1, 0xF42AFBC1, 0xF42BFBC1, 0xF42CFBC1, 0xF42DFBC1, 0xF42EFBC1, 0xF42FFBC1, 0xF430FBC1, 0xF431FBC1, 0xF432FBC1, 0xF433FBC1, 0xF434FBC1, 0xF435FBC1, 0xF436FBC1, 0xF437FBC1, + 0xF438FBC1, 0xF439FBC1, 0xF43AFBC1, 0xF43BFBC1, 0xF43CFBC1, 0xF43DFBC1, 0xF43EFBC1, 0xF43FFBC1, 0xF440FBC1, 0xF441FBC1, 0xF442FBC1, 0xF443FBC1, 0xF444FBC1, 0xF445FBC1, 0xF446FBC1, + 0xF447FBC1, 0xF448FBC1, 0xF449FBC1, 0xF44AFBC1, 0xF44BFBC1, 0xF44CFBC1, 0xF44DFBC1, 0xF44EFBC1, 0xF44FFBC1, 0xF450FBC1, 0xF451FBC1, 0xF452FBC1, 0xF453FBC1, 0xF454FBC1, 0xF455FBC1, + 0xF456FBC1, 0xF457FBC1, 0xF458FBC1, 0xF459FBC1, 0xF45AFBC1, 0xF45BFBC1, 0xF45CFBC1, 0xF45DFBC1, 0xF45EFBC1, 0xF45FFBC1, 0xF460FBC1, 0xF461FBC1, 0xF462FBC1, 0xF463FBC1, 0xF464FBC1, + 0xF465FBC1, 0xF466FBC1, 0xF467FBC1, 0xF468FBC1, 0xF469FBC1, 0xF46AFBC1, 0xF46BFBC1, 0xF46CFBC1, 0xF46DFBC1, 0xF46EFBC1, 0xF46FFBC1, 0xF470FBC1, 0xF471FBC1, 0xF472FBC1, 0xF473FBC1, + 0xF474FBC1, 0xF475FBC1, 0xF476FBC1, 0xF477FBC1, 0xF478FBC1, 0xF479FBC1, 0xF47AFBC1, 0xF47BFBC1, 0xF47CFBC1, 0xF47DFBC1, 0xF47EFBC1, 0xF47FFBC1, 0xF480FBC1, 0xF481FBC1, 0xF482FBC1, + 0xF483FBC1, 0xF484FBC1, 0xF485FBC1, 0xF486FBC1, 0xF487FBC1, 0xF488FBC1, 0xF489FBC1, 0xF48AFBC1, 0xF48BFBC1, 0xF48CFBC1, 0xF48DFBC1, 0xF48EFBC1, 0xF48FFBC1, 0xF490FBC1, 0xF491FBC1, + 0xF492FBC1, 0xF493FBC1, 0xF494FBC1, 0xF495FBC1, 0xF496FBC1, 0xF497FBC1, 0xF498FBC1, 0xF499FBC1, 0xF49AFBC1, 0xF49BFBC1, 0xF49CFBC1, 0xF49DFBC1, 0xF49EFBC1, 0xF49FFBC1, 0xF4A0FBC1, + 0xF4A1FBC1, 0xF4A2FBC1, 0xF4A3FBC1, 0xF4A4FBC1, 0xF4A5FBC1, 0xF4A6FBC1, 0xF4A7FBC1, 0xF4A8FBC1, 0xF4A9FBC1, 0xF4AAFBC1, 0xF4ABFBC1, 0xF4ACFBC1, 0xF4ADFBC1, 0xF4AEFBC1, 0xF4AFFBC1, + 0xF4B0FBC1, 0xF4B1FBC1, 0xF4B2FBC1, 0xF4B3FBC1, 0xF4B4FBC1, 0xF4B5FBC1, 0xF4B6FBC1, 0xF4B7FBC1, 0xF4B8FBC1, 0xF4B9FBC1, 0xF4BAFBC1, 0xF4BBFBC1, 0xF4BCFBC1, 0xF4BDFBC1, 0xF4BEFBC1, + 0xF4BFFBC1, 0xF4C0FBC1, 0xF4C1FBC1, 0xF4C2FBC1, 0xF4C3FBC1, 0xF4C4FBC1, 0xF4C5FBC1, 0xF4C6FBC1, 0xF4C7FBC1, 0xF4C8FBC1, 0xF4C9FBC1, 0xF4CAFBC1, 0xF4CBFBC1, 0xF4CCFBC1, 0xF4CDFBC1, + 0xF4CEFBC1, 0xF4CFFBC1, 0xF4D0FBC1, 0xF4D1FBC1, 0xF4D2FBC1, 0xF4D3FBC1, 0xF4D4FBC1, 0xF4D5FBC1, 0xF4D6FBC1, 0xF4D7FBC1, 0xF4D8FBC1, 0xF4D9FBC1, 0xF4DAFBC1, 0xF4DBFBC1, 0xF4DCFBC1, + 0xF4DDFBC1, 0xF4DEFBC1, 0xF4DFFBC1, 0xF4E0FBC1, 0xF4E1FBC1, 0xF4E2FBC1, 0xF4E3FBC1, 0xF4E4FBC1, 0xF4E5FBC1, 0xF4E6FBC1, 0xF4E7FBC1, 0xF4E8FBC1, 0xF4E9FBC1, 0xF4EAFBC1, 0xF4EBFBC1, + 0xF4ECFBC1, 0xF4EDFBC1, 0xF4EEFBC1, 0xF4EFFBC1, 0xF4F0FBC1, 0xF4F1FBC1, 0xF4F2FBC1, 0xF4F3FBC1, 0xF4F4FBC1, 0xF4F5FBC1, 0xF4F6FBC1, 0xF4F7FBC1, 0xF4F8FBC1, 0xF4F9FBC1, 0xF4FAFBC1, + 0xF4FBFBC1, 0xF4FCFBC1, 0xF4FDFBC1, 0xF4FEFBC1, 0xF4FFFBC1, 0xF500FBC1, 0xF501FBC1, 0xF502FBC1, 0xF503FBC1, 0xF504FBC1, 0xF505FBC1, 0xF506FBC1, 0xF507FBC1, 0xF508FBC1, 0xF509FBC1, + 0xF50AFBC1, 0xF50BFBC1, 0xF50CFBC1, 0xF50DFBC1, 0xF50EFBC1, 0xF50FFBC1, 0xF510FBC1, 0xF511FBC1, 0xF512FBC1, 0xF513FBC1, 0xF514FBC1, 0xF515FBC1, 0xF516FBC1, 0xF517FBC1, 0xF518FBC1, + 0xF519FBC1, 0xF51AFBC1, 0xF51BFBC1, 0xF51CFBC1, 0xF51DFBC1, 0xF51EFBC1, 0xF51FFBC1, 0xF520FBC1, 0xF521FBC1, 0xF522FBC1, 0xF523FBC1, 0xF524FBC1, 0xF525FBC1, 0xF526FBC1, 0xF527FBC1, + 0xF528FBC1, 0xF529FBC1, 0xF52AFBC1, 0xF52BFBC1, 0xF52CFBC1, 0xF52DFBC1, 0xF52EFBC1, 0xF52FFBC1, 0xF530FBC1, 0xF531FBC1, 0xF532FBC1, 0xF533FBC1, 0xF534FBC1, 0xF535FBC1, 0xF536FBC1, + 0xF537FBC1, 0xF538FBC1, 0xF539FBC1, 0xF53AFBC1, 0xF53BFBC1, 0xF53CFBC1, 0xF53DFBC1, 0xF53EFBC1, 0xF53FFBC1, 0xF540FBC1, 0xF541FBC1, 0xF542FBC1, 0xF543FBC1, 0xF544FBC1, 0xF545FBC1, + 0xF546FBC1, 0xF547FBC1, 0xF548FBC1, 0xF549FBC1, 0xF54AFBC1, 0xF54BFBC1, 0xF54CFBC1, 0xF54DFBC1, 0xF54EFBC1, 0xF54FFBC1, 0xF550FBC1, 0xF551FBC1, 0xF552FBC1, 0xF553FBC1, 0xF554FBC1, + 0xF555FBC1, 0xF556FBC1, 0xF557FBC1, 0xF558FBC1, 0xF559FBC1, 0xF55AFBC1, 0xF55BFBC1, 0xF55CFBC1, 0xF55DFBC1, 0xF55EFBC1, 0xF55FFBC1, 0xF560FBC1, 0xF561FBC1, 0xF562FBC1, 0xF563FBC1, + 0xF564FBC1, 0xF565FBC1, 0xF566FBC1, 0xF567FBC1, 0xF568FBC1, 0xF569FBC1, 0xF56AFBC1, 0xF56BFBC1, 0xF56CFBC1, 0xF56DFBC1, 0xF56EFBC1, 0xF56FFBC1, 0xF570FBC1, 0xF571FBC1, 0xF572FBC1, + 0xF573FBC1, 0xF574FBC1, 0xF575FBC1, 0xF576FBC1, 0xF577FBC1, 0xF578FBC1, 0xF579FBC1, 0xF57AFBC1, 0xF57BFBC1, 0xF57CFBC1, 0xF57DFBC1, 0xF57EFBC1, 0xF57FFBC1, 0xF580FBC1, 0xF581FBC1, + 0xF582FBC1, 0xF583FBC1, 0xF584FBC1, 0xF585FBC1, 0xF586FBC1, 0xF587FBC1, 0xF588FBC1, 0xF589FBC1, 0xF58AFBC1, 0xF58BFBC1, 0xF58CFBC1, 0xF58DFBC1, 0xF58EFBC1, 0xF58FFBC1, 0xF590FBC1, + 0xF591FBC1, 0xF592FBC1, 0xF593FBC1, 0xF594FBC1, 0xF595FBC1, 0xF596FBC1, 0xF597FBC1, 0xF598FBC1, 0xF599FBC1, 0xF59AFBC1, 0xF59BFBC1, 0xF59CFBC1, 0xF59DFBC1, 0xF59EFBC1, 0xF59FFBC1, + 0xF5A0FBC1, 0xF5A1FBC1, 0xF5A2FBC1, 0xF5A3FBC1, 0xF5A4FBC1, 0xF5A5FBC1, 0xF5A6FBC1, 0xF5A7FBC1, 0xF5A8FBC1, 0xF5A9FBC1, 0xF5AAFBC1, 0xF5ABFBC1, 0xF5ACFBC1, 0xF5ADFBC1, 0xF5AEFBC1, + 0xF5AFFBC1, 0xF5B0FBC1, 0xF5B1FBC1, 0xF5B2FBC1, 0xF5B3FBC1, 0xF5B4FBC1, 0xF5B5FBC1, 0xF5B6FBC1, 0xF5B7FBC1, 0xF5B8FBC1, 0xF5B9FBC1, 0xF5BAFBC1, 0xF5BBFBC1, 0xF5BCFBC1, 0xF5BDFBC1, + 0xF5BEFBC1, 0xF5BFFBC1, 0xF5C0FBC1, 0xF5C1FBC1, 0xF5C2FBC1, 0xF5C3FBC1, 0xF5C4FBC1, 0xF5C5FBC1, 0xF5C6FBC1, 0xF5C7FBC1, 0xF5C8FBC1, 0xF5C9FBC1, 0xF5CAFBC1, 0xF5CBFBC1, 0xF5CCFBC1, + 0xF5CDFBC1, 0xF5CEFBC1, 0xF5CFFBC1, 0xF5D0FBC1, 0xF5D1FBC1, 0xF5D2FBC1, 0xF5D3FBC1, 0xF5D4FBC1, 0xF5D5FBC1, 0xF5D6FBC1, 0xF5D7FBC1, 0xF5D8FBC1, 0xF5D9FBC1, 0xF5DAFBC1, 0xF5DBFBC1, + 0xF5DCFBC1, 0xF5DDFBC1, 0xF5DEFBC1, 0xF5DFFBC1, 0xF5E0FBC1, 0xF5E1FBC1, 0xF5E2FBC1, 0xF5E3FBC1, 0xF5E4FBC1, 0xF5E5FBC1, 0xF5E6FBC1, 0xF5E7FBC1, 0xF5E8FBC1, 0xF5E9FBC1, 0xF5EAFBC1, + 0xF5EBFBC1, 0xF5ECFBC1, 0xF5EDFBC1, 0xF5EEFBC1, 0xF5EFFBC1, 0xF5F0FBC1, 0xF5F1FBC1, 0xF5F2FBC1, 0xF5F3FBC1, 0xF5F4FBC1, 0xF5F5FBC1, 0xF5F6FBC1, 0xF5F7FBC1, 0xF5F8FBC1, 0xF5F9FBC1, + 0xF5FAFBC1, 0xF5FBFBC1, 0xF5FCFBC1, 0xF5FDFBC1, 0xF5FEFBC1, 0xF5FFFBC1, 0xF600FBC1, 0xF601FBC1, 0xF602FBC1, 0xF603FBC1, 0xF604FBC1, 0xF605FBC1, 0xF606FBC1, 0xF607FBC1, 0xF608FBC1, + 0xF609FBC1, 0xF60AFBC1, 0xF60BFBC1, 0xF60CFBC1, 0xF60DFBC1, 0xF60EFBC1, 0xF60FFBC1, 0xF610FBC1, 0xF611FBC1, 0xF612FBC1, 0xF613FBC1, 0xF614FBC1, 0xF615FBC1, 0xF616FBC1, 0xF617FBC1, + 0xF618FBC1, 0xF619FBC1, 0xF61AFBC1, 0xF61BFBC1, 0xF61CFBC1, 0xF61DFBC1, 0xF61EFBC1, 0xF61FFBC1, 0xF620FBC1, 0xF621FBC1, 0xF622FBC1, 0xF623FBC1, 0xF624FBC1, 0xF625FBC1, 0xF626FBC1, + 0xF627FBC1, 0xF628FBC1, 0xF629FBC1, 0xF62AFBC1, 0xF62BFBC1, 0xF62CFBC1, 0xF62DFBC1, 0xF62EFBC1, 0xF62FFBC1, 0xF630FBC1, 0xF631FBC1, 0xF632FBC1, 0xF633FBC1, 0xF634FBC1, 0xF635FBC1, + 0xF636FBC1, 0xF637FBC1, 0xF638FBC1, 0xF639FBC1, 0xF63AFBC1, 0xF63BFBC1, 0xF63CFBC1, 0xF63DFBC1, 0xF63EFBC1, 0xF63FFBC1, 0xF640FBC1, 0xF641FBC1, 0xF642FBC1, 0xF643FBC1, 0xF644FBC1, + 0xF645FBC1, 0xF646FBC1, 0xF647FBC1, 0xF648FBC1, 0xF649FBC1, 0xF64AFBC1, 0xF64BFBC1, 0xF64CFBC1, 0xF64DFBC1, 0xF64EFBC1, 0xF64FFBC1, 0xF650FBC1, 0xF651FBC1, 0xF652FBC1, 0xF653FBC1, + 0xF654FBC1, 0xF655FBC1, 0xF656FBC1, 0xF657FBC1, 0xF658FBC1, 0xF659FBC1, 0xF65AFBC1, 0xF65BFBC1, 0xF65CFBC1, 0xF65DFBC1, 0xF65EFBC1, 0xF65FFBC1, 0xF660FBC1, 0xF661FBC1, 0xF662FBC1, + 0xF663FBC1, 0xF664FBC1, 0xF665FBC1, 0xF666FBC1, 0xF667FBC1, 0xF668FBC1, 0xF669FBC1, 0xF66AFBC1, 0xF66BFBC1, 0xF66CFBC1, 0xF66DFBC1, 0xF66EFBC1, 0xF66FFBC1, 0xF670FBC1, 0xF671FBC1, + 0xF672FBC1, 0xF673FBC1, 0xF674FBC1, 0xF675FBC1, 0xF676FBC1, 0xF677FBC1, 0xF678FBC1, 0xF679FBC1, 0xF67AFBC1, 0xF67BFBC1, 0xF67CFBC1, 0xF67DFBC1, 0xF67EFBC1, 0xF67FFBC1, 0xF680FBC1, + 0xF681FBC1, 0xF682FBC1, 0xF683FBC1, 0xF684FBC1, 0xF685FBC1, 0xF686FBC1, 0xF687FBC1, 0xF688FBC1, 0xF689FBC1, 0xF68AFBC1, 0xF68BFBC1, 0xF68CFBC1, 0xF68DFBC1, 0xF68EFBC1, 0xF68FFBC1, + 0xF690FBC1, 0xF691FBC1, 0xF692FBC1, 0xF693FBC1, 0xF694FBC1, 0xF695FBC1, 0xF696FBC1, 0xF697FBC1, 0xF698FBC1, 0xF699FBC1, 0xF69AFBC1, 0xF69BFBC1, 0xF69CFBC1, 0xF69DFBC1, 0xF69EFBC1, + 0xF69FFBC1, 0xF6A0FBC1, 0xF6A1FBC1, 0xF6A2FBC1, 0xF6A3FBC1, 0xF6A4FBC1, 0xF6A5FBC1, 0xF6A6FBC1, 0xF6A7FBC1, 0xF6A8FBC1, 0xF6A9FBC1, 0xF6AAFBC1, 0xF6ABFBC1, 0xF6ACFBC1, 0xF6ADFBC1, + 0xF6AEFBC1, 0xF6AFFBC1, 0xF6B0FBC1, 0xF6B1FBC1, 0xF6B2FBC1, 0xF6B3FBC1, 0xF6B4FBC1, 0xF6B5FBC1, 0xF6B6FBC1, 0xF6B7FBC1, 0xF6B8FBC1, 0xF6B9FBC1, 0xF6BAFBC1, 0xF6BBFBC1, 0xF6BCFBC1, + 0xF6BDFBC1, 0xF6BEFBC1, 0xF6BFFBC1, 0xF6C0FBC1, 0xF6C1FBC1, 0xF6C2FBC1, 0xF6C3FBC1, 0xF6C4FBC1, 0xF6C5FBC1, 0xF6C6FBC1, 0xF6C7FBC1, 0xF6C8FBC1, 0xF6C9FBC1, 0xF6CAFBC1, 0xF6CBFBC1, + 0xF6CCFBC1, 0xF6CDFBC1, 0xF6CEFBC1, 0xF6CFFBC1, 0xF6D0FBC1, 0xF6D1FBC1, 0xF6D2FBC1, 0xF6D3FBC1, 0xF6D4FBC1, 0xF6D5FBC1, 0xF6D6FBC1, 0xF6D7FBC1, 0xF6D8FBC1, 0xF6D9FBC1, 0xF6DAFBC1, + 0xF6DBFBC1, 0xF6DCFBC1, 0xF6DDFBC1, 0xF6DEFBC1, 0xF6DFFBC1, 0xF6E0FBC1, 0xF6E1FBC1, 0xF6E2FBC1, 0xF6E3FBC1, 0xF6E4FBC1, 0xF6E5FBC1, 0xF6E6FBC1, 0xF6E7FBC1, 0xF6E8FBC1, 0xF6E9FBC1, + 0xF6EAFBC1, 0xF6EBFBC1, 0xF6ECFBC1, 0xF6EDFBC1, 0xF6EEFBC1, 0xF6EFFBC1, 0xF6F0FBC1, 0xF6F1FBC1, 0xF6F2FBC1, 0xF6F3FBC1, 0xF6F4FBC1, 0xF6F5FBC1, 0xF6F6FBC1, 0xF6F7FBC1, 0xF6F8FBC1, + 0xF6F9FBC1, 0xF6FAFBC1, 0xF6FBFBC1, 0xF6FCFBC1, 0xF6FDFBC1, 0xF6FEFBC1, 0xF6FFFBC1, 0xF700FBC1, 0xF701FBC1, 0xF702FBC1, 0xF703FBC1, 0xF704FBC1, 0xF705FBC1, 0xF706FBC1, 0xF707FBC1, + 0xF708FBC1, 0xF709FBC1, 0xF70AFBC1, 0xF70BFBC1, 0xF70CFBC1, 0xF70DFBC1, 0xF70EFBC1, 0xF70FFBC1, 0xF710FBC1, 0xF711FBC1, 0xF712FBC1, 0xF713FBC1, 0xF714FBC1, 0xF715FBC1, 0xF716FBC1, + 0xF717FBC1, 0xF718FBC1, 0xF719FBC1, 0xF71AFBC1, 0xF71BFBC1, 0xF71CFBC1, 0xF71DFBC1, 0xF71EFBC1, 0xF71FFBC1, 0xF720FBC1, 0xF721FBC1, 0xF722FBC1, 0xF723FBC1, 0xF724FBC1, 0xF725FBC1, + 0xF726FBC1, 0xF727FBC1, 0xF728FBC1, 0xF729FBC1, 0xF72AFBC1, 0xF72BFBC1, 0xF72CFBC1, 0xF72DFBC1, 0xF72EFBC1, 0xF72FFBC1, 0xF730FBC1, 0xF731FBC1, 0xF732FBC1, 0xF733FBC1, 0xF734FBC1, + 0xF735FBC1, 0xF736FBC1, 0xF737FBC1, 0xF738FBC1, 0xF739FBC1, 0xF73AFBC1, 0xF73BFBC1, 0xF73CFBC1, 0xF73DFBC1, 0xF73EFBC1, 0xF73FFBC1, 0xF740FBC1, 0xF741FBC1, 0xF742FBC1, 0xF743FBC1, + 0xF744FBC1, 0xF745FBC1, 0xF746FBC1, 0xF747FBC1, 0xF748FBC1, 0xF749FBC1, 0xF74AFBC1, 0xF74BFBC1, 0xF74CFBC1, 0xF74DFBC1, 0xF74EFBC1, 0xF74FFBC1, 0xF750FBC1, 0xF751FBC1, 0xF752FBC1, + 0xF753FBC1, 0xF754FBC1, 0xF755FBC1, 0xF756FBC1, 0xF757FBC1, 0xF758FBC1, 0xF759FBC1, 0xF75AFBC1, 0xF75BFBC1, 0xF75CFBC1, 0xF75DFBC1, 0xF75EFBC1, 0xF75FFBC1, 0xF760FBC1, 0xF761FBC1, + 0xF762FBC1, 0xF763FBC1, 0xF764FBC1, 0xF765FBC1, 0xF766FBC1, 0xF767FBC1, 0xF768FBC1, 0xF769FBC1, 0xF76AFBC1, 0xF76BFBC1, 0xF76CFBC1, 0xF76DFBC1, 0xF76EFBC1, 0xF76FFBC1, 0xF770FBC1, + 0xF771FBC1, 0xF772FBC1, 0xF773FBC1, 0xF774FBC1, 0xF775FBC1, 0xF776FBC1, 0xF777FBC1, 0xF778FBC1, 0xF779FBC1, 0xF77AFBC1, 0xF77BFBC1, 0xF77CFBC1, 0xF77DFBC1, 0xF77EFBC1, 0xF77FFBC1, + 0xF780FBC1, 0xF781FBC1, 0xF782FBC1, 0xF783FBC1, 0xF784FBC1, 0xF785FBC1, 0xF786FBC1, 0xF787FBC1, 0xF788FBC1, 0xF789FBC1, 0xF78AFBC1, 0xF78BFBC1, 0xF78CFBC1, 0xF78DFBC1, 0xF78EFBC1, + 0xF78FFBC1, 0xF790FBC1, 0xF791FBC1, 0xF792FBC1, 0xF793FBC1, 0xF794FBC1, 0xF795FBC1, 0xF796FBC1, 0xF797FBC1, 0xF798FBC1, 0xF799FBC1, 0xF79AFBC1, 0xF79BFBC1, 0xF79CFBC1, 0xF79DFBC1, + 0xF79EFBC1, 0xF79FFBC1, 0xF7A0FBC1, 0xF7A1FBC1, 0xF7A2FBC1, 0xF7A3FBC1, 0xF7A4FBC1, 0xF7A5FBC1, 0xF7A6FBC1, 0xF7A7FBC1, 0xF7A8FBC1, 0xF7A9FBC1, 0xF7AAFBC1, 0xF7ABFBC1, 0xF7ACFBC1, + 0xF7ADFBC1, 0xF7AEFBC1, 0xF7AFFBC1, 0xF7B0FBC1, 0xF7B1FBC1, 0xF7B2FBC1, 0xF7B3FBC1, 0xF7B4FBC1, 0xF7B5FBC1, 0xF7B6FBC1, 0xF7B7FBC1, 0xF7B8FBC1, 0xF7B9FBC1, 0xF7BAFBC1, 0xF7BBFBC1, + 0xF7BCFBC1, 0xF7BDFBC1, 0xF7BEFBC1, 0xF7BFFBC1, 0xF7C0FBC1, 0xF7C1FBC1, 0xF7C2FBC1, 0xF7C3FBC1, 0xF7C4FBC1, 0xF7C5FBC1, 0xF7C6FBC1, 0xF7C7FBC1, 0xF7C8FBC1, 0xF7C9FBC1, 0xF7CAFBC1, + 0xF7CBFBC1, 0xF7CCFBC1, 0xF7CDFBC1, 0xF7CEFBC1, 0xF7CFFBC1, 0xF7D0FBC1, 0xF7D1FBC1, 0xF7D2FBC1, 0xF7D3FBC1, 0xF7D4FBC1, 0xF7D5FBC1, 0xF7D6FBC1, 0xF7D7FBC1, 0xF7D8FBC1, 0xF7D9FBC1, + 0xF7DAFBC1, 0xF7DBFBC1, 0xF7DCFBC1, 0xF7DDFBC1, 0xF7DEFBC1, 0xF7DFFBC1, 0xF7E0FBC1, 0xF7E1FBC1, 0xF7E2FBC1, 0xF7E3FBC1, 0xF7E4FBC1, 0xF7E5FBC1, 0xF7E6FBC1, 0xF7E7FBC1, 0xF7E8FBC1, + 0xF7E9FBC1, 0xF7EAFBC1, 0xF7EBFBC1, 0xF7ECFBC1, 0xF7EDFBC1, 0xF7EEFBC1, 0xF7EFFBC1, 0xF7F0FBC1, 0xF7F1FBC1, 0xF7F2FBC1, 0xF7F3FBC1, 0xF7F4FBC1, 0xF7F5FBC1, 0xF7F6FBC1, 0xF7F7FBC1, + 0xF7F8FBC1, 0xF7F9FBC1, 0xF7FAFBC1, 0xF7FBFBC1, 0xF7FCFBC1, 0xF7FDFBC1, 0xF7FEFBC1, 0xF7FFFBC1, 0xF800FBC1, 0xF801FBC1, 0xF802FBC1, 0xF803FBC1, 0xF804FBC1, 0xF805FBC1, 0xF806FBC1, + 0xF807FBC1, 0xF808FBC1, 0xF809FBC1, 0xF80AFBC1, 0xF80BFBC1, 0xF80CFBC1, 0xF80DFBC1, 0xF80EFBC1, 0xF80FFBC1, 0xF810FBC1, 0xF811FBC1, 0xF812FBC1, 0xF813FBC1, 0xF814FBC1, 0xF815FBC1, + 0xF816FBC1, 0xF817FBC1, 0xF818FBC1, 0xF819FBC1, 0xF81AFBC1, 0xF81BFBC1, 0xF81CFBC1, 0xF81DFBC1, 0xF81EFBC1, 0xF81FFBC1, 0xF820FBC1, 0xF821FBC1, 0xF822FBC1, 0xF823FBC1, 0xF824FBC1, + 0xF825FBC1, 0xF826FBC1, 0xF827FBC1, 0xF828FBC1, 0xF829FBC1, 0xF82AFBC1, 0xF82BFBC1, 0xF82CFBC1, 0xF82DFBC1, 0xF82EFBC1, 0xF82FFBC1, 0xF830FBC1, 0xF831FBC1, 0xF832FBC1, 0xF833FBC1, + 0xF834FBC1, 0xF835FBC1, 0xF836FBC1, 0xF837FBC1, 0xF838FBC1, 0xF839FBC1, 0xF83AFBC1, 0xF83BFBC1, 0xF83CFBC1, 0xF83DFBC1, 0xF83EFBC1, 0xF83FFBC1, 0xF840FBC1, 0xF841FBC1, 0xF842FBC1, + 0xF843FBC1, 0xF844FBC1, 0xF845FBC1, 0xF846FBC1, 0xF847FBC1, 0xF848FBC1, 0xF849FBC1, 0xF84AFBC1, 0xF84BFBC1, 0xF84CFBC1, 0xF84DFBC1, 0xF84EFBC1, 0xF84FFBC1, 0xF850FBC1, 0xF851FBC1, + 0xF852FBC1, 0xF853FBC1, 0xF854FBC1, 0xF855FBC1, 0xF856FBC1, 0xF857FBC1, 0xF858FBC1, 0xF859FBC1, 0xF85AFBC1, 0xF85BFBC1, 0xF85CFBC1, 0xF85DFBC1, 0xF85EFBC1, 0xF85FFBC1, 0xF860FBC1, + 0xF861FBC1, 0xF862FBC1, 0xF863FBC1, 0xF864FBC1, 0xF865FBC1, 0xF866FBC1, 0xF867FBC1, 0xF868FBC1, 0xF869FBC1, 0xF86AFBC1, 0xF86BFBC1, 0xF86CFBC1, 0xF86DFBC1, 0xF86EFBC1, 0xF86FFBC1, + 0xF870FBC1, 0xF871FBC1, 0xF872FBC1, 0xF873FBC1, 0xF874FBC1, 0xF875FBC1, 0xF876FBC1, 0xF877FBC1, 0xF878FBC1, 0xF879FBC1, 0xF87AFBC1, 0xF87BFBC1, 0xF87CFBC1, 0xF87DFBC1, 0xF87EFBC1, + 0xF87FFBC1, 0xF880FBC1, 0xF881FBC1, 0xF882FBC1, 0xF883FBC1, 0xF884FBC1, 0xF885FBC1, 0xF886FBC1, 0xF887FBC1, 0xF888FBC1, 0xF889FBC1, 0xF88AFBC1, 0xF88BFBC1, 0xF88CFBC1, 0xF88DFBC1, + 0xF88EFBC1, 0xF88FFBC1, 0xF890FBC1, 0xF891FBC1, 0xF892FBC1, 0xF893FBC1, 0xF894FBC1, 0xF895FBC1, 0xF896FBC1, 0xF897FBC1, 0xF898FBC1, 0xF899FBC1, 0xF89AFBC1, 0xF89BFBC1, 0xF89CFBC1, + 0xF89DFBC1, 0xF89EFBC1, 0xF89FFBC1, 0xF8A0FBC1, 0xF8A1FBC1, 0xF8A2FBC1, 0xF8A3FBC1, 0xF8A4FBC1, 0xF8A5FBC1, 0xF8A6FBC1, 0xF8A7FBC1, 0xF8A8FBC1, 0xF8A9FBC1, 0xF8AAFBC1, 0xF8ABFBC1, + 0xF8ACFBC1, 0xF8ADFBC1, 0xF8AEFBC1, 0xF8AFFBC1, 0xF8B0FBC1, 0xF8B1FBC1, 0xF8B2FBC1, 0xF8B3FBC1, 0xF8B4FBC1, 0xF8B5FBC1, 0xF8B6FBC1, 0xF8B7FBC1, 0xF8B8FBC1, 0xF8B9FBC1, 0xF8BAFBC1, + 0xF8BBFBC1, 0xF8BCFBC1, 0xF8BDFBC1, 0xF8BEFBC1, 0xF8BFFBC1, 0xF8C0FBC1, 0xF8C1FBC1, 0xF8C2FBC1, 0xF8C3FBC1, 0xF8C4FBC1, 0xF8C5FBC1, 0xF8C6FBC1, 0xF8C7FBC1, 0xF8C8FBC1, 0xF8C9FBC1, + 0xF8CAFBC1, 0xF8CBFBC1, 0xF8CCFBC1, 0xF8CDFBC1, 0xF8CEFBC1, 0xF8CFFBC1, 0xF8D0FBC1, 0xF8D1FBC1, 0xF8D2FBC1, 0xF8D3FBC1, 0xF8D4FBC1, 0xF8D5FBC1, 0xF8D6FBC1, 0xF8D7FBC1, 0xF8D8FBC1, + 0xF8D9FBC1, 0xF8DAFBC1, 0xF8DBFBC1, 0xF8DCFBC1, 0xF8DDFBC1, 0xF8DEFBC1, 0xF8DFFBC1, 0xF8E0FBC1, 0xF8E1FBC1, 0xF8E2FBC1, 0xF8E3FBC1, 0xF8E4FBC1, 0xF8E5FBC1, 0xF8E6FBC1, 0xF8E7FBC1, + 0xF8E8FBC1, 0xF8E9FBC1, 0xF8EAFBC1, 0xF8EBFBC1, 0xF8ECFBC1, 0xF8EDFBC1, 0xF8EEFBC1, 0xF8EFFBC1, 0xF8F0FBC1, 0xF8F1FBC1, 0xF8F2FBC1, 0xF8F3FBC1, 0xF8F4FBC1, 0xF8F5FBC1, 0xF8F6FBC1, + 0xF8F7FBC1, 0xF8F8FBC1, 0xF8F9FBC1, 0xF8FAFBC1, 0xF8FBFBC1, 0xF8FCFBC1, 0xF8FDFBC1, 0xF8FEFBC1, 0xF8FFFBC1, 0x8C48FB41, 0xE6F4FB40, 0x8ECAFB41, 0x8CC8FB41, 0xEED1FB40, 0xCE32FB40, + 0xD3E5FB40, 0x9F9CFB41, 0x9F9CFB41, 0xD951FB40, 0x91D1FB41, 0xD587FB40, 0xD948FB40, 0xE1F6FB40, 0xF669FB40, 0xFF85FB40, 0x863FFB41, 0x87BAFB41, 0x88F8FB41, 0x908FFB41, 0xEA02FB40, + 0xED1BFB40, 0xF0D9FB40, 0xF3DEFB40, 0x843DFB41, 0x916AFB41, 0x99F1FB41, 0xCE82FB40, 0xD375FB40, 0xEB04FB40, 0xF21BFB40, 0x862DFB41, 0x9E1EFB41, 0xDD50FB40, 0xEFEBFB40, 0x85CDFB41, + 0x8964FB41, 0xE2C9FB40, 0x81D8FB41, 0x881FFB41, 0xDECAFB40, 0xE717FB40, 0xED6AFB40, 0xF2FCFB40, 0x90CEFB41, 0xCF86FB40, 0xD1B7FB40, 0xD2DEFB40, 0xE4C4FB40, 0xEAD3FB40, 0xF210FB40, + 0xF6E7FB40, 0x8001FB41, 0x8606FB41, 0x865CFB41, 0x8DEFFB41, 0x9732FB41, 0x9B6FFB41, 0x9DFAFB41, 0xF88CFB40, 0xF97FFB40, 0xFDA0FB40, 0x83C9FB41, 0x9304FB41, 0x9E7FFB41, 0x8AD6FB41, + 0xD8DFFB40, 0xDF04FB40, 0xFC60FB40, 0x807EFB41, 0xF262FB40, 0xF8CAFB40, 0x8CC2FB41, 0x96F7FB41, 0xD8D8FB40, 0xDC62FB40, 0xEA13FB40, 0xEDDAFB40, 0xEF0FFB40, 0xFD2FFB40, 0xFE37FB40, + 0x964BFB41, 0xD2D2FB40, 0x808BFB41, 0xD1DCFB40, 0xD1CCFB40, 0xFA1CFB40, 0xFDBEFB40, 0x83F1FB41, 0x9675FB41, 0x8B80FB41, 0xE2CFFB40, 0xEA02FB40, 0x8AFEFB41, 0xCE39FB40, 0xDBE7FB40, + 0xE012FB40, 0xF387FB40, 0xF570FB40, 0xD317FB40, 0xF8FBFB40, 0xCFBFFB40, 0xDFA9FB40, 0xCE0DFB40, 0xECCCFB40, 0xE578FB40, 0xFD22FB40, 0xD3C3FB40, 0xD85EFB40, 0xF701FB40, 0x8449FB41, + 0x8AAAFB41, 0xEBBAFB40, 0x8FB0FB41, 0xEC88FB40, 0xE2FEFB40, 0x82E5FB41, 0xE3A0FB40, 0xF565FB40, 0xCEAEFB40, 0xD169FB40, 0xD1C9FB40, 0xE881FB40, 0xFCE7FB40, 0x826FFB41, 0x8AD2FB41, + 0x91CFFB41, 0xD2F5FB40, 0xD442FB40, 0xD973FB40, 0xDEECFB40, 0xE5C5FB40, 0xEFFEFB40, 0xF92AFB40, 0x95ADFB41, 0x9A6AFB41, 0x9E97FB41, 0x9ECEFB41, 0xD29BFB40, 0xE6C6FB40, 0xEB77FB40, + 0x8F62FB41, 0xDE74FB40, 0xE190FB40, 0xE200FB40, 0xE49AFB40, 0xEF23FB40, 0xF149FB40, 0xF489FB40, 0xF9CAFB40, 0xFDF4FB40, 0x806FFB41, 0x8F26FB41, 0x84EEFB41, 0x9023FB41, 0x934AFB41, + 0xD217FB40, 0xD2A3FB40, 0xD4BDFB40, 0xF0C8FB40, 0x88C2FB41, 0x8AAAFB41, 0xDEC9FB40, 0xDFF5FB40, 0xE37BFB40, 0xEBAEFB40, 0xFC3EFB40, 0xF375FB40, 0xCEE4FB40, 0xD6F9FB40, 0xDBE7FB40, + 0xDDBAFB40, 0xE01CFB40, 0xF3B2FB40, 0xF469FB40, 0xFF9AFB40, 0x8046FB41, 0x9234FB41, 0x96F6FB41, 0x9748FB41, 0x9818FB41, 0xCF8BFB40, 0xF9AEFB40, 0x91B4FB41, 0x96B8FB41, 0xE0E1FB40, + 0xCE86FB40, 0xD0DAFB40, 0xDBEEFB40, 0xDC3FFB40, 0xE599FB40, 0xEA02FB40, 0xF1CEFB40, 0xF642FB40, 0x84FCFB41, 0x907CFB41, 0x9F8DFB41, 0xE688FB40, 0x962EFB41, 0xD289FB40, 0xE77BFB40, + 0xE7F3FB40, 0xED41FB40, 0xEE9CFB40, 0xF409FB40, 0xF559FB40, 0xF86BFB40, 0xFD10FB40, 0x985EFB41, 0xD16DFB40, 0xE22EFB40, 0x9678FB41, 0xD02BFB40, 0xDD19FB40, 0xEDEAFB40, 0x8F2AFB41, + 0xDF8BFB40, 0xE144FB40, 0xE817FB40, 0xF387FB40, 0x9686FB41, 0xD229FB40, 0xD40FFB40, 0xDC65FB40, 0xE613FB40, 0xE74EFB40, 0xE8A8FB40, 0xECE5FB40, 0xF406FB40, 0xF5E2FB40, 0xFF79FB40, + 0x88CFFB41, 0x88E1FB41, 0x91CCFB41, 0x96E2FB41, 0xD33FFB40, 0xEEBAFB40, 0xD41DFB40, 0xF1D0FB40, 0xF498FB40, 0x85FAFB41, 0x96A3FB41, 0x9C57FB41, 0x9E9FFB41, 0xE797FB40, 0xEDCBFB40, + 0x81E8FB41, 0xFACBFB40, 0xFB20FB40, 0xFC92FB40, 0xF2C0FB40, 0xF099FB40, 0x8B58FB41, 0xCEC0FB40, 0x8336FB41, 0xD23AFB40, 0xD207FB40, 0xDEA6FB40, 0xE2D3FB40, 0xFCD6FB40, 0xDB85FB40, + 0xED1EFB40, 0xE6B4FB40, 0x8F3BFB41, 0x884CFB41, 0x964DFB41, 0x898BFB41, 0xDED3FB40, 0xD140FB40, 0xD5C0FB40, 0xFA0EFB41, 0xFA0FFB41, 0xD85AFB40, 0xFA11FB41, 0xE674FB40, 0xFA13FB41, + 0xFA14FB41, 0xD1DEFB40, 0xF32AFB40, 0xF6CAFB40, 0xF93CFB40, 0xF95EFB40, 0xF965FB40, 0xF98FFB40, 0x9756FB41, 0xFCBEFB40, 0xFFBDFB40, 0xFA1FFB41, 0x8612FB41, 0xFA21FB41, 0x8AF8FB41, + 0xFA23FB41, 0xFA24FB41, 0x9038FB41, 0x90FDFB41, 0xFA27FB41, 0xFA28FB41, 0xFA29FB41, 0x98EFFB41, 0x98FCFB41, 0x9928FB41, 0x9DB4FB41, 0x90DEFB41, 0x96B7FB41, 0xCFAEFB40, 0xD0E7FB40, + 0xD14DFB40, 0xD2C9FB40, 0xD2E4FB40, 0xD351FB40, 0xD59DFB40, 0xD606FB40, 0xD668FB40, 0xD840FB40, 0xD8A8FB40, 0xDC64FB40, 0xDC6EFB40, 0xE094FB40, 0xE168FB40, 0xE18EFB40, 0xE1F2FB40, + 0xE54FFB40, 0xE5E2FB40, 0xE691FB40, 0xE885FB40, 0xED77FB40, 0xEE1AFB40, 0xEF22FB40, 0xF16EFB40, 0xF22BFB40, 0xF422FB40, 0xF891FB40, 0xF93EFB40, 0xF949FB40, 0xF948FB40, 0xF950FB40, + 0xF956FB40, 0xF95DFB40, 0xF98DFB40, 0xF98EFB40, 0xFA40FB40, 0xFA81FB40, 0xFBC0FB40, 0xFDF4FB40, 0xFE09FB40, 0xFE41FB40, 0xFF72FB40, 0x8005FB41, 0x81EDFB41, 0x8279FB41, 0x8279FB41, + 0x8457FB41, 0x8910FB41, 0x8996FB41, 0x8B01FB41, 0x8B39FB41, 0x8CD3FB41, 0x8D08FB41, 0x8FB6FB41, 0x9038FB41, 0x96E3FB41, 0x97FFFB41, 0x983BFB41, 0xE075FB40, 0xC2EEFB84, 0x8218FB41, + 0xFA6EFBC1, 0xFA6FFBC1, 0xCE26FB40, 0xD1B5FB40, 0xD168FB40, 0xCF80FB40, 0xD145FB40, 0xD180FB40, 0xD2C7FB40, 0xD2FAFB40, 0xD59DFB40, 0xD555FB40, 0xD599FB40, 0xD5E2FB40, 0xD85AFB40, + 0xD8B3FB40, 0xD944FB40, 0xD954FB40, 0xDA62FB40, 0xDB28FB40, 0xDED2FB40, 0xDED9FB40, 0xDF69FB40, 0xDFADFB40, 0xE0D8FB40, 0xE14EFB40, 0xE108FB40, 0xE18EFB40, 0xE160FB40, 0xE1F2FB40, + 0xE234FB40, 0xE3C4FB40, 0xE41CFB40, 0xE452FB40, 0xE556FB40, 0xE674FB40, 0xE717FB40, 0xE71BFB40, 0xE756FB40, 0xEB79FB40, 0xEBBAFB40, 0xED41FB40, 0xEEDBFB40, 0xEECBFB40, 0xEF22FB40, + 0xF01EFB40, 0xF16EFB40, 0xF7A7FB40, 0xF235FB40, 0xF2AFFB40, 0xF32AFB40, 0xF471FB40, 0xF506FB40, 0xF53BFB40, 0xF61DFB40, 0xF61FFB40, 0xF6CAFB40, 0xF6DBFB40, 0xF6F4FB40, 0xF74AFB40, + 0xF740FB40, 0xF8CCFB40, 0xFAB1FB40, 0xFBC0FB40, 0xFC7BFB40, 0xFD5BFB40, 0xFDF4FB40, 0xFF3EFB40, 0x8005FB41, 0x8352FB41, 0x83EFFB41, 0x8779FB41, 0x8941FB41, 0x8986FB41, 0x8996FB41, + 0x8ABFFB41, 0x8AF8FB41, 0x8ACBFB41, 0x8B01FB41, 0x8AFEFB41, 0x8AEDFB41, 0x8B39FB41, 0x8B8AFB41, 0x8D08FB41, 0x8F38FB41, 0x9072FB41, 0x9199FB41, 0x9276FB41, 0x967CFB41, 0x96E3FB41, + 0x9756FB41, 0x97DBFB41, 0x97FFFB41, 0x980BFB41, 0x983BFB41, 0x9B12FB41, 0x9F9CFB41, 0xA84AFB84, 0xA844FB84, 0xB3D5FB84, 0xBB9DFB80, 0xC018FB80, 0xC039FB80, 0xD249FB84, 0xDCD0FB84, + 0xFED3FB84, 0x9F43FB41, 0x9F8EFB41, 0xFADAFBC1, 0xFADBFBC1, 0xFADCFBC1, 0xFADDFBC1, 0xFADEFBC1, 0xFADFFBC1, 0xFAE0FBC1, 0xFAE1FBC1, 0xFAE2FBC1, 0xFAE3FBC1, 0xFAE4FBC1, 0xFAE5FBC1, + 0xFAE6FBC1, 0xFAE7FBC1, 0xFAE8FBC1, 0xFAE9FBC1, 0xFAEAFBC1, 0xFAEBFBC1, 0xFAECFBC1, 0xFAEDFBC1, 0xFAEEFBC1, 0xFAEFFBC1, 0xFAF0FBC1, 0xFAF1FBC1, 0xFAF2FBC1, 0xFAF3FBC1, 0xFAF4FBC1, + 0xFAF5FBC1, 0xFAF6FBC1, 0xFAF7FBC1, 0xFAF8FBC1, 0xFAF9FBC1, 0xFAFAFBC1, 0xFAFBFBC1, 0xFAFCFBC1, 0xFAFDFBC1, 0xFAFEFBC1, 0xFAFFFBC1, 0x1CE51CE5, 0x1D321CE5, 0x1D771CE5, 0x1D321CE51CE5, + 0x1D771CE51CE5, 0x1E951E71, 0x1E951E71, 0xFB07FBC1, 0xFB08FBC1, 0xFB09FBC1, 0xFB0AFBC1, 0xFB0BFBC1, 0xFB0CFBC1, 0xFB0DFBC1, 0xFB0EFBC1, 0xFB0FFBC1, 0xFB10FBC1, 0xFB11FBC1, 0xFB12FBC1, + 0x22A522A3, 0x229422A3, 0x229A22A3, 0x22A522AD, 0x229C22A3, 0xFB18FBC1, 0xFB19FBC1, 0xFB1AFBC1, 0xFB1BFBC1, 0xFB1CFBC1, 0x22C0, 0x0, 0x22C022C0, 0x22C6, 0x22B7, + 0x22BA, 0x22BB, 0x22C1, 0x22C2, 0x22C3, 0x22CA, 0x22CC, 0x616, 0x22CB, 0x22CB, 0x22CB, 0x22CB, 0x22B7, 0x22B7, 0x22B7, + 0x22B8, 0x22B9, 0x22BA, 0x22BB, 0x22BC, 0x22BD, 0xFB37FBC1, 0x22BF, 0x22C0, 0x22C1, 0x22C1, 0x22C2, 0xFB3DFBC1, 0x22C3, 0xFB3FFBC1, + 0x22C4, 0x22C5, 0xFB42FBC1, 0x22C7, 0x22C7, 0xFB45FBC1, 0x22C8, 0x22C9, 0x22CA, 0x22CB, 0x22CC, 0x22BC, 0x22B8, 0x22C1, 0x22C7, + 0x22C222B7, 0x2301, 0x2301, 0x230E, 0x230E, 0x230E, 0x230E, 0x230F, 0x230F, 0x230F, 0x230F, 0x2310, 0x2310, 0x2310, 0x2310, + 0x2320, 0x2320, 0x2320, 0x2320, 0x2323, 0x2323, 0x2323, 0x2323, 0x231F, 0x231F, 0x231F, 0x231F, 0x237B, 0x237B, 0x237B, + 0x237B, 0x237E, 0x237E, 0x237E, 0x237E, 0x2327, 0x2327, 0x2327, 0x2327, 0x2326, 0x2326, 0x2326, 0x2326, 0x2328, 0x2328, + 0x2328, 0x2328, 0x232A, 0x232A, 0x232A, 0x232A, 0x233E, 0x233E, 0x233D, 0x233D, 0x2340, 0x2340, 0x2339, 0x2339, 0x234F, + 0x234F, 0x2348, 0x2348, 0x2388, 0x2388, 0x2388, 0x2388, 0x2390, 0x2390, 0x2390, 0x2390, 0x2395, 0x2395, 0x2395, 0x2395, + 0x2393, 0x2393, 0x2393, 0x2393, 0x23A8, 0x23A8, 0x23AA, 0x23AA, 0x23AA, 0x23AA, 0x23B6, 0x23B6, 0x23B3, 0x23B3, 0x23B3, + 0x23B3, 0x23B2, 0x23B2, 0x23B2, 0x23B2, 0x23D4, 0x23D4, 0x23D4, 0x23D4, 0x502, 0x503, 0x504, 0x505, 0x506, 0x507, + 0x508, 0x509, 0x50A, 0x50B, 0x50C, 0x50D, 0x50E, 0x50F, 0x510, 0x511, 0xFBC2FBC1, 0xFBC3FBC1, 0xFBC4FBC1, 0xFBC5FBC1, 0xFBC6FBC1, + 0xFBC7FBC1, 0xFBC8FBC1, 0xFBC9FBC1, 0xFBCAFBC1, 0xFBCBFBC1, 0xFBCCFBC1, 0xFBCDFBC1, 0xFBCEFBC1, 0xFBCFFBC1, 0xFBD0FBC1, 0xFBD1FBC1, 0xFBD2FBC1, 0x238D, 0x238D, 0x238D, + 0x238D, 0x23BB, 0x23BB, 0x23BA, 0x23BA, 0x23BC, 0x23BC, 0x22FD23BB, 0x23BF, 0x23BF, 0x23B9, 0x23B9, 0x23BD, 0x23BD, 0x23CA, + 0x23CA, 0x23CA, 0x23CA, 0x23C5, 0x23C5, 0x230B2307, 0x230B2307, 0x23B62307, 0x23B62307, 0x23B72307, 0x23B72307, 0x23BB2307, 0x23BB2307, 0x23BA2307, 0x23BA2307, + 0x23BC2307, 0x23BC2307, 0x23CA2307, 0x23CA2307, 0x23CA2307, 0x23C52307, 0x23C52307, 0x23C52307, 0x23C7, 0x23C7, 0x23C7, 0x23C7, 0x23252307, 0x232C2307, 0x23A32307, + 0x23C52307, 0x23C62307, 0x2325230D, 0x232C230D, 0x232D230D, 0x23A3230D, 0x23C5230D, 0x23C6230D, 0x2325231D, 0x232C231D, 0x232D231D, 0x23A3231D, 0x23C5231D, 0x23C6231D, 0x2325231E, + 0x23A3231E, 0x23C5231E, 0x23C6231E, 0x232C2325, 0x23A32325, 0x2325232C, 0x23A3232C, 0x2325232D, 0x232C232D, 0x23A3232D, 0x23252359, 0x232C2359, 0x232D2359, 0x23A32359, 0x232C2364, + 0x23A32364, 0x23252365, 0x232C2365, 0x232D2365, 0x23A32365, 0x232C236A, 0x23A3236A, 0x23A3236B, 0x2325236E, 0x23A3236E, 0x2325236F, 0x23A3236F, 0x23252376, 0x232C2376, 0x232D2376, + 0x23A32376, 0x23C52376, 0x23C62376, 0x232C2382, 0x23A32382, 0x23C52382, 0x23C62382, 0x230B2387, 0x23252387, 0x232C2387, 0x232D2387, 0x239C2387, 0x23A32387, 0x23C52387, 0x23C62387, + 0x2325239C, 0x232C239C, 0x232D239C, 0x23A3239C, 0x23C5239C, 0x23C6239C, 0x232523A3, 0x232C23A3, 0x232D23A3, 0x23A323A3, 0x23C523A3, 0x23C623A3, 0x232523A7, 0x232C23A7, 0x232D23A7, + 0x23A323A7, 0x23C523A7, 0x23C623A7, 0x232523B1, 0x23A323B1, 0x23C523B1, 0x23C623B1, 0x232523C6, 0x232C23C6, 0x232D23C6, 0x23A323C6, 0x23C523C6, 0x23C623C6, 0x2338, 0x2346, + 0x23C5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x23462307, 0x23472307, 0x23A32307, 0x23A72307, 0x23C52307, 0x23C62307, 0x2346230D, 0x2347230D, + 0x23A3230D, 0x23A7230D, 0x23C5230D, 0x23C6230D, 0x2346231D, 0x2347231D, 0x23A3231D, 0x23A7231D, 0x23C5231D, 0x23C6231D, 0x2346231E, 0x2347231E, 0x23A3231E, 0x23A7231E, 0x23C5231E, + 0x23C6231E, 0x23C52376, 0x23C62376, 0x23C52382, 0x23C62382, 0x230B2387, 0x239C2387, 0x23A32387, 0x23C52387, 0x23C62387, 0x23A3239C, 0x23C5239C, 0x23C6239C, 0x230B23A3, 0x23A323A3, + 0x234623A7, 0x234723A7, 0x23A323A7, 0x23A723A7, 0x23C523A7, 0x23C623A7, 0x23C5, 0x234623C6, 0x234723C6, 0x23A323C6, 0x23A723C6, 0x23C523C6, 0x23C623C6, 0x23252307, 0x232C2307, + 0x232D2307, 0x23A32307, 0x23B12307, 0x2325230D, 0x232C230D, 0x232D230D, 0x23A3230D, 0x23B1230D, 0x2325231D, 0x232C231D, 0x232D231D, 0x23A3231D, 0x23B1231D, 0x23A3231E, 0x232C2325, + 0x23A32325, 0x2325232C, 0x23A3232C, 0x2325232D, 0x23A3232D, 0x23252359, 0x232C2359, 0x232D2359, 0x23A32359, 0x232C2364, 0x232D2364, 0x23A32364, 0x23252365, 0x232C2365, 0x232D2365, + 0x23A32365, 0x232C236A, 0x23A3236B, 0x2325236E, 0x23A3236E, 0x2325236F, 0x23A3236F, 0x23252376, 0x232C2376, 0x232D2376, 0x23A32376, 0x232C2382, 0x23A32382, 0x23252387, 0x232C2387, + 0x232D2387, 0x239C2387, 0x23A32387, 0x2325239C, 0x232C239C, 0x232D239C, 0x23A3239C, 0x23B1239C, 0x232523A3, 0x232C23A3, 0x232D23A3, 0x23A323A3, 0x232523A7, 0x232C23A7, 0x232D23A7, + 0x23A323A7, 0x23B123A7, 0x232523B1, 0x23A323B1, 0x23B1, 0x232523C6, 0x232C23C6, 0x232D23C6, 0x23A323C6, 0x23B123C6, 0x23A32307, 0x23B12307, 0x23A3230D, 0x23B1230D, 0x23A3231D, + 0x23B1231D, 0x23A3231E, 0x23B1231E, 0x23A32359, 0x23B12359, 0x23A3235A, 0x23B1235A, 0x239C2387, 0x23A32387, 0x23A3239C, 0x23A323A7, 0x23B123A7, 0x23A323C6, 0x23B123C6, 0x0, + 0x0, 0x0, 0x23C5236A, 0x23C6236A, 0x23C5236E, 0x23C6236E, 0x23C5236F, 0x23C6236F, 0x23C52359, 0x23C62359, 0x23C5235A, 0x23C6235A, 0x23C5232C, 0x23C6232C, 0x23C52325, + 0x23C62325, 0x23C5232D, 0x23C6232D, 0x23C52364, 0x23C62364, 0x23C52365, 0x23C62365, 0x2325235A, 0x232C235A, 0x232D235A, 0x23A3235A, 0x2346235A, 0x23462359, 0x23462364, 0x23462365, + 0x23C5236A, 0x23C6236A, 0x23C5236E, 0x23C6236E, 0x23C5236F, 0x23C6236F, 0x23C52359, 0x23C62359, 0x23C5235A, 0x23C6235A, 0x23C5232C, 0x23C6232C, 0x23C52325, 0x23C62325, 0x23C5232D, + 0x23C6232D, 0x23C52364, 0x23C62364, 0x23C52365, 0x23C62365, 0x2325235A, 0x232C235A, 0x232D235A, 0x23A3235A, 0x2346235A, 0x23462359, 0x23462364, 0x23462365, 0x2325235A, 0x232C235A, + 0x232D235A, 0x23A3235A, 0x23B12359, 0x23B1235A, 0x23A3236A, 0x23252359, 0x232C2359, 0x232D2359, 0x2325235A, 0x232C235A, 0x232D235A, 0x23A3236A, 0x23A3236B, 0x230B, 0x230B, + 0x381, 0x382, 0xFD40FBC1, 0xFD41FBC1, 0xFD42FBC1, 0xFD43FBC1, 0xFD44FBC1, 0xFD45FBC1, 0xFD46FBC1, 0xFD47FBC1, 0xFD48FBC1, 0xFD49FBC1, 0xFD4AFBC1, 0xFD4BFBC1, 0xFD4CFBC1, + 0xFD4DFBC1, 0xFD4EFBC1, 0xFD4FFBC1, 0x23A32325231D, 0x2325232C231D, 0x2325232C231D, 0x23A3232C231D, 0x23A3232D231D, 0x232523A3231D, 0x232C23A3231D, 0x232D23A3231D, 0x232C23A32325, 0x232C23A32325, 0x23C623A3232C, 0x23C523A3232C, + 0x2325232C2359, 0x232C23252359, 0x23C523252359, 0x232C23A32359, 0x232C23A32359, 0x232523A32359, 0x23A323A32359, 0x23A323A32359, 0x232C232C2364, 0x232C232C2364, 0x23A323A32364, 0x23A3232C235A, 0x23A3232C235A, 0x23C62325235A, 0x232D23A3235A, + 0x232D23A3235A, 0x23A323A3235A, 0x23A323A3235A, 0x23C5232C2365, 0x23A3232D2365, 0x23A3232D2365, 0x232C23A3236A, 0x232C23A3236A, 0x23A323A3236A, 0x23C623A3236A, 0x23A32325236E, 0x23A323A3236E, 0x23A323A3236E, 0x23C523A3236E, 0x23A323A3236F, + 0x23C623A3236F, 0x23C523A3236F, 0x23A3232D2376, 0x23A3232D2376, 0x232C23A32382, 0x23A323A32382, 0x23A3232C239C, 0x23C6232C239C, 0x23C5232C239C, 0x23252325239C, 0x23252325239C, 0x23A3232D239C, 0x23A3232D239C, 0x232C23A3239C, 0x232C23A3239C, + 0x2325232C23A3, 0x23A3232C23A3, 0x23C6232C23A3, 0x232C232523A3, 0x23A3232523A3, 0x2325232D23A3, 0x23A3232D23A3, 0xFD90FBC1, 0xFD91FBC1, 0x232D232523A3, 0x232523A323B1, 0x23A323A323B1, 0x23A3232C23A7, 0x23C5232C23A7, 0x23A3232523A7, + 0x23A3232523A7, 0x23C5232523A7, 0x23C623A323A7, 0x23C523A323A7, 0x23A323A323C6, 0x23A323A323C6, 0x23C6232D230D, 0x23C62325231D, 0x23C52325231D, 0x23C6232D231D, 0x23C5232D231D, 0x23C623A3231D, 0x23C523A3231D, 0x23C623A32325, 0x23C5232C2325, + 0x23C523A32325, 0x23C5232D2359, 0x23C6232C2364, 0x23C6232C235A, 0x23C6232C2365, 0x23C62325239C, 0x23C623A3239C, 0x23C6232C23C6, 0x23C6232523C6, 0x23C623A323C6, 0x23C623A323A3, 0x23C623A32382, 0x23C6232C23A7, 0x232C23A32382, 0x23A3232C239C, + 0x23C623A3236E, 0x23C623A32387, 0x232C232523A7, 0x23C6232D23A3, 0x23A32325239C, 0x23A323A32387, 0x23A32325239C, 0x232C232523A7, 0x23C6232C2325, 0x23C62325232C, 0x23C6232523A3, 0x23C623A32376, 0x23C6232C230D, 0x23A323A32387, 0x23A32325236E, + 0x23A323A32364, 0x23C6232D2359, 0x23C6232523A7, 0xFDC8FBC1, 0xFDC9FBC1, 0xFDCAFBC1, 0xFDCBFBC1, 0xFDCCFBC1, 0xFDCDFBC1, 0xFDCEFBC1, 0xFDCFFBC1, 0xFDD0FBC1, 0xFDD1FBC1, 0xFDD2FBC1, 0xFDD3FBC1, + 0xFDD4FBC1, 0xFDD5FBC1, 0xFDD6FBC1, 0xFDD7FBC1, 0xFDD8FBC1, 0xFDD9FBC1, 0xFDDAFBC1, 0xFDDBFBC1, 0xFDDCFBC1, 0xFDDDFBC1, 0xFDDEFBC1, 0xFDDFFBC1, 0xFDE0FBC1, 0xFDE1FBC1, 0xFDE2FBC1, + 0xFDE3FBC1, 0xFDE4FBC1, 0xFDE5FBC1, 0xFDE6FBC1, 0xFDE7FBC1, 0xFDE8FBC1, 0xFDE9FBC1, 0xFDEAFBC1, 0xFDEBFBC1, 0xFDECFBC1, 0xFDEDFBC1, 0xFDEEFBC1, 0xFDEFFBC1, 0x23D4239C2364, 0x23D4239C2382, + 0x23B1239C239C230B, 0x2346230D2387230B, 0x233723A3232C23A3, 0x23A3236E239C2364, 0x239C23B723592346, 0x23B123C6239C236E, 0x23A3239C235923B7, 0x23C5239C2364, 0xFFFD, 0xFFFD, 0x239C230B23C72346, 0x501, 0xFDFEFBC1, 0xFDFFFBC1, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x222, 0x231, 0x28A, 0x239, 0x234, 0x260, 0x266, 0x37B, 0x37C, 0x27702770277, 0xFE1AFBC1, 0xFE1BFBC1, 0xFE1CFBC1, 0xFE1DFBC1, 0xFE1EFBC1, + 0xFE1FFBC1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x2770277, 0x216, 0x215, 0x20B, 0x20B, 0x317, 0x318, 0x31B, 0x31C, 0x379, 0x37A, 0x377, 0x378, + 0x371, 0x372, 0x36F, 0x370, 0x373, 0x374, 0x375, 0x376, 0x232, 0x233, 0x319, 0x31A, 0x20A, 0x20A, 0x20A, + 0x20A, 0x20B, 0x20B, 0x20B, 0x222, 0x231, 0x277, 0xFE53FBC1, 0x234, 0x239, 0x266, 0x260, 0x216, 0x317, 0x318, + 0x31B, 0x31C, 0x379, 0x37A, 0x398, 0x396, 0x38F, 0x616, 0x20D, 0x61A, 0x61C, 0x61B, 0xFE67FBC1, 0x395, 0x1C12, + 0x399, 0x38E, 0xFE6CFBC1, 0xFE6DFBC1, 0xFE6EFBC1, 0xFE6FFBC1, 0x0, 0x0, 0x0, 0x0, 0x0, 0xFE75FBC1, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x22FD, 0x22FE, 0x22FE, 0x22FF, 0x22FF, 0x2302, 0x2302, 0x2303, + 0x2303, 0x2307, 0x2307, 0x2307, 0x2307, 0x230B, 0x230B, 0x230D, 0x230D, 0x230D, 0x230D, 0x231C, 0x231C, 0x231D, 0x231D, + 0x231D, 0x231D, 0x231E, 0x231E, 0x231E, 0x231E, 0x2325, 0x2325, 0x2325, 0x2325, 0x232C, 0x232C, 0x232C, 0x232C, 0x232D, + 0x232D, 0x232D, 0x232D, 0x2337, 0x2337, 0x2338, 0x2338, 0x2346, 0x2346, 0x2347, 0x2347, 0x2359, 0x2359, 0x2359, 0x2359, + 0x235A, 0x235A, 0x235A, 0x235A, 0x2364, 0x2364, 0x2364, 0x2364, 0x2365, 0x2365, 0x2365, 0x2365, 0x236A, 0x236A, 0x236A, + 0x236A, 0x236B, 0x236B, 0x236B, 0x236B, 0x236E, 0x236E, 0x236E, 0x236E, 0x236F, 0x236F, 0x236F, 0x236F, 0x2376, 0x2376, + 0x2376, 0x2376, 0x2382, 0x2382, 0x2382, 0x2382, 0x2387, 0x2387, 0x2387, 0x2387, 0x239C, 0x239C, 0x239C, 0x239C, 0x23A3, + 0x23A3, 0x23A3, 0x23A3, 0x23A7, 0x23A7, 0x23A7, 0x23A7, 0x23B1, 0x23B1, 0x23B1, 0x23B1, 0x23B7, 0x23B7, 0x23C5, 0x23C5, + 0x23C6, 0x23C6, 0x23C6, 0x23C6, 0x22FE239C, 0x22FE239C, 0x22FF239C, 0x22FF239C, 0x2303239C, 0x2303239C, 0x230B239C, 0x230B239C, 0xFEFDFBC1, 0xFEFEFBC1, 0x0, + 0xFF00FBC1, 0x260, 0x30C, 0x398, 0x1C12, 0x399, 0x396, 0x305, 0x317, 0x318, 0x38F, 0x616, 0x222, 0x20D, 0x277, + 0x394, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x239, 0x234, 0x61A, 0x61B, + 0x61C, 0x266, 0x38E, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, + 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x319, + 0x395, 0x31A, 0x485, 0x20B, 0x482, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, + 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, + 0x1F21, 0x31B, 0x61E, 0x31C, 0x620, 0x32D, 0x32E, 0x28A, 0x373, 0x374, 0x231, 0x221, 0x3D8A, 0x3D5A, 0x3D5B, + 0x3D5C, 0x3D5E, 0x3D5F, 0x3D7E, 0x3D7F, 0x3D81, 0x3D6C, 0x1C0E, 0x3D5A, 0x3D5B, 0x3D5C, 0x3D5E, 0x3D5F, 0x3D60, 0x3D61, + 0x3D62, 0x3D63, 0x3D64, 0x3D65, 0x3D66, 0x3D67, 0x3D68, 0x3D69, 0x3D6A, 0x3D6B, 0x3D6C, 0x3D6D, 0x3D6E, 0x3D6F, 0x3D70, + 0x3D71, 0x3D72, 0x3D73, 0x3D74, 0x3D75, 0x3D76, 0x3D77, 0x3D78, 0x3D79, 0x3D7A, 0x3D7B, 0x3D7C, 0x3D7D, 0x3D7E, 0x3D7F, + 0x3D81, 0x3D82, 0x3D83, 0x3D84, 0x3D85, 0x3D86, 0x3D87, 0x3D8B, 0x0, 0x0, 0x3C72, 0x3BF5, 0x3BF6, 0x3CD3, 0x3BF7, + 0x3CD5, 0x3CD6, 0x3BF8, 0x3BF9, 0x3BFA, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDD, 0x3CDE, 0x3C0F, 0x3BFB, 0x3BFC, 0x3BFD, + 0x3C16, 0x3BFE, 0x3BFF, 0x3C00, 0x3C01, 0x3C02, 0x3C03, 0x3C04, 0x3C05, 0x3C06, 0x3C07, 0xFFBFFBC1, 0xFFC0FBC1, 0xFFC1FBC1, 0x3C73, + 0x3C74, 0x3C75, 0x3C76, 0x3C77, 0x3C78, 0xFFC8FBC1, 0xFFC9FBC1, 0x3C79, 0x3C7A, 0x3C7B, 0x3C7C, 0x3C7D, 0x3C7E, 0xFFD0FBC1, 0xFFD1FBC1, + 0x3C7F, 0x3C80, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0xFFD8FBC1, 0xFFD9FBC1, 0x3C85, 0x3C86, 0x3C87, 0xFFDDFBC1, 0xFFDEFBC1, 0xFFDFFBC1, 0x1C11, + 0x1C13, 0x61D, 0x486, 0x61F, 0x1C14, 0x1C27, 0xFFE7FBC1, 0x81A, 0x59C, 0x59E, 0x59D, 0x59F, 0x8B8, 0x8E3, 0xFFEFFBC1, + 0xFFF0FBC1, 0xFFF1FBC1, 0xFFF2FBC1, 0xFFF3FBC1, 0xFFF4FBC1, 0xFFF5FBC1, 0xFFF6FBC1, 0xFFF7FBC1, 0xFFF8FBC1, 0x0, 0x0, 0x0, 0x1A95, 0xFFFD, 0xFFFEFBC1, + 0xFFFFFBC1, 0x45E7, 0x45E8, 0x45E9, 0x45EA, 0x45EB, 0x45EC, 0x45ED, 0x45EE, 0x45EF, 0x45F0, 0x45F1, 0x45F2, 0x800CFBC2, 0x45F3, + 0x45F4, 0x45F5, 0x45F6, 0x45F7, 0x45F8, 0x45F9, 0x45FA, 0x45FB, 0x45FC, 0x45FD, 0x45FE, 0x45FF, 0x4600, 0x4601, 0x4602, + 0x4603, 0x4604, 0x4605, 0x4606, 0x4607, 0x4608, 0x4609, 0x460A, 0x460B, 0x460C, 0x8027FBC2, 0x460D, 0x460E, 0x460F, 0x4610, + 0x4611, 0x4612, 0x4613, 0x4614, 0x4615, 0x4616, 0x4617, 0x4618, 0x4619, 0x461A, 0x461B, 0x461C, 0x461D, 0x461E, 0x461F, + 0x803BFBC2, 0x4620, 0x4621, 0x803EFBC2, 0x4622, 0x4623, 0x4624, 0x4625, 0x4626, 0x4627, 0x4628, 0x4629, 0x462A, 0x462B, 0x462C, + 0x462D, 0x462E, 0x462F, 0x4630, 0x804EFBC2, 0x804FFBC2, 0x4631, 0x4632, 0x4633, 0x4634, 0x4635, 0x4636, 0x4637, 0x4638, 0x4639, + 0x463A, 0x463B, 0x463C, 0x463D, 0x463E, 0x805EFBC2, 0x805FFBC2, 0x8060FBC2, 0x8061FBC2, 0x8062FBC2, 0x8063FBC2, 0x8064FBC2, 0x8065FBC2, 0x8066FBC2, 0x8067FBC2, + 0x8068FBC2, 0x8069FBC2, 0x806AFBC2, 0x806BFBC2, 0x806CFBC2, 0x806DFBC2, 0x806EFBC2, 0x806FFBC2, 0x8070FBC2, 0x8071FBC2, 0x8072FBC2, 0x8073FBC2, 0x8074FBC2, 0x8075FBC2, 0x8076FBC2, + 0x8077FBC2, 0x8078FBC2, 0x8079FBC2, 0x807AFBC2, 0x807BFBC2, 0x807CFBC2, 0x807DFBC2, 0x807EFBC2, 0x807FFBC2, 0x463F, 0x4640, 0x4641, 0x4642, 0x4643, 0x4644, + 0x4645, 0x4646, 0x4647, 0x4648, 0x4649, 0x464A, 0x464B, 0x464C, 0x464D, 0x464E, 0x464F, 0x4650, 0x4651, 0x4652, 0x4653, + 0x4654, 0x4655, 0x4656, 0x4657, 0x4658, 0x4659, 0x465A, 0x465B, 0x465C, 0x465D, 0x465E, 0x465F, 0x4660, 0x4661, 0x4662, + 0x4663, 0x4664, 0x4665, 0x4666, 0x4667, 0x4668, 0x4669, 0x466A, 0x466B, 0x466C, 0x466D, 0x466E, 0x466F, 0x4670, 0x4671, + 0x4672, 0x4673, 0x4674, 0x4675, 0x4676, 0x4677, 0x4678, 0x4679, 0x467A, 0x467B, 0x467C, 0x467D, 0x467E, 0x467F, 0x4680, + 0x4681, 0x4682, 0x4683, 0x4684, 0x4685, 0x4686, 0x4687, 0x4688, 0x4689, 0x468A, 0x468B, 0x468C, 0x468D, 0x468E, 0x468F, + 0x4690, 0x4691, 0x4692, 0x4693, 0x4694, 0x4695, 0x4696, 0x4697, 0x4698, 0x4699, 0x469A, 0x469B, 0x469C, 0x469D, 0x469E, + 0x469F, 0x46A0, 0x46A1, 0x46A2, 0x46A3, 0x46A4, 0x46A5, 0x46A6, 0x46A7, 0x46A8, 0x46A9, 0x46AA, 0x46AB, 0x46AC, 0x46AD, + 0x46AE, 0x46AF, 0x46B0, 0x46B1, 0x46B2, 0x46B3, 0x46B4, 0x46B5, 0x46B6, 0x46B7, 0x46B8, 0x46B9, 0x80FBFBC2, 0x80FCFBC2, 0x80FDFBC2, + 0x80FEFBC2, 0x80FFFBC2, 0x2FA, 0x2FB, 0x2FC, 0x8103FBC2, 0x8104FBC2, 0x8105FBC2, 0x8106FBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0x1AE9, 0x1AEA, 0x1AEB, 0x1AEC, 0x1AED, 0x1AEE, 0x1AEF, 0x1AF0, 0x1AF1, 0x1AF2, 0x1AF3, 0x1AF4, + 0x1AF5, 0x1AF6, 0x1AF7, 0x1AF8, 0x1AF9, 0x1AFA, 0x1AFB, 0x1AFC, 0x1AFD, 0x1AFE, 0x1AFF, 0x1B00, 0x1B01, 0x1B02, 0x1B03, + 0x1B04, 0x1B05, 0x1B06, 0x1B07, 0x1B08, 0x1B09, 0x1B0A, 0x1B0B, 0x1B0C, 0x8134FBC2, 0x8135FBC2, 0x8136FBC2, 0xF78, 0xF79, 0xF7A, + 0xF7B, 0xF7C, 0xF7D, 0xF7E, 0xF7F, 0xF80, 0x1B0D, 0x1B0E, 0x1C3E, 0x1C42, 0x1B0F, 0x1B10, 0x1B11, 0x1B12, 0x1C42, + 0x1B13, 0x1B14, 0x1B15, 0x1B16, 0x1B17, 0x1B18, 0x1C42, 0x1B19, 0x1B1A, 0x1B1B, 0x1B1C, 0x1B1D, 0x1B1E, 0x1B1F, 0x1B20, + 0x1C3E, 0x1C3E, 0x1C3E, 0x1C3F, 0x1C3F, 0x1C3F, 0x1C3F, 0x1C42, 0x1B21, 0x1B22, 0x1B23, 0x1B24, 0x1B25, 0x1B26, 0x1B27, + 0x1B28, 0x1B29, 0x1B2A, 0x1B2B, 0x1B2C, 0x1B2D, 0x1B2E, 0x1B2F, 0x1B30, 0x1B31, 0x1B32, 0x1B33, 0x1C42, 0x1B34, 0x1B35, + 0x1B36, 0x1B37, 0x1B38, 0xF81, 0xF82, 0xF83, 0xF84, 0xF85, 0xF86, 0xF87, 0xF88, 0xF89, 0xF8A, 0xF8B, 0xF8C, + 0xF8D, 0xF8E, 0xF8F, 0xF90, 0xF91, 0x1C3D, 0x1B39, 0xF92, 0xF93, 0xF94, 0x818FFBC2, 0xF95, 0xF96, 0xF97, 0xF98, + 0xF99, 0xF9A, 0xF9B, 0xF9C, 0xF9D, 0xF9E, 0xF9F, 0xFA0, 0x819CFBC2, 0x819DFBC2, 0x819EFBC2, 0x819FFBC2, 0xFA1, 0x81A1FBC2, 0x81A2FBC2, + 0x81A3FBC2, 0x81A4FBC2, 0x81A5FBC2, 0x81A6FBC2, 0x81A7FBC2, 0x81A8FBC2, 0x81A9FBC2, 0x81AAFBC2, 0x81ABFBC2, 0x81ACFBC2, 0x81ADFBC2, 0x81AEFBC2, 0x81AFFBC2, 0x81B0FBC2, 0x81B1FBC2, + 0x81B2FBC2, 0x81B3FBC2, 0x81B4FBC2, 0x81B5FBC2, 0x81B6FBC2, 0x81B7FBC2, 0x81B8FBC2, 0x81B9FBC2, 0x81BAFBC2, 0x81BBFBC2, 0x81BCFBC2, 0x81BDFBC2, 0x81BEFBC2, 0x81BFFBC2, 0x81C0FBC2, + 0x81C1FBC2, 0x81C2FBC2, 0x81C3FBC2, 0x81C4FBC2, 0x81C5FBC2, 0x81C6FBC2, 0x81C7FBC2, 0x81C8FBC2, 0x81C9FBC2, 0x81CAFBC2, 0x81CBFBC2, 0x81CCFBC2, 0x81CDFBC2, 0x81CEFBC2, 0x81CFFBC2, + 0xFA2, 0xFA3, 0xFA4, 0xFA5, 0xFA6, 0xFA7, 0xFA8, 0xFA9, 0xFAA, 0xFAB, 0xFAC, 0xFAD, 0xFAE, 0xFAF, 0xFB0, + 0xFB1, 0xFB2, 0xFB3, 0xFB4, 0xFB5, 0xFB6, 0xFB7, 0xFB8, 0xFB9, 0xFBA, 0xFBB, 0xFBC, 0xFBD, 0xFBE, 0xFBF, + 0xFC0, 0xFC1, 0xFC2, 0xFC3, 0xFC4, 0xFC5, 0xFC6, 0xFC7, 0xFC8, 0xFC9, 0xFCA, 0xFCB, 0xFCC, 0xFCD, 0xFCE, + 0x0, 0x81FEFBC2, 0x81FFFBC2, 0x8200FBC2, 0x8201FBC2, 0x8202FBC2, 0x8203FBC2, 0x8204FBC2, 0x8205FBC2, 0x8206FBC2, 0x8207FBC2, 0x8208FBC2, 0x8209FBC2, 0x820AFBC2, 0x820BFBC2, + 0x820CFBC2, 0x820DFBC2, 0x820EFBC2, 0x820FFBC2, 0x8210FBC2, 0x8211FBC2, 0x8212FBC2, 0x8213FBC2, 0x8214FBC2, 0x8215FBC2, 0x8216FBC2, 0x8217FBC2, 0x8218FBC2, 0x8219FBC2, 0x821AFBC2, + 0x821BFBC2, 0x821CFBC2, 0x821DFBC2, 0x821EFBC2, 0x821FFBC2, 0x8220FBC2, 0x8221FBC2, 0x8222FBC2, 0x8223FBC2, 0x8224FBC2, 0x8225FBC2, 0x8226FBC2, 0x8227FBC2, 0x8228FBC2, 0x8229FBC2, + 0x822AFBC2, 0x822BFBC2, 0x822CFBC2, 0x822DFBC2, 0x822EFBC2, 0x822FFBC2, 0x8230FBC2, 0x8231FBC2, 0x8232FBC2, 0x8233FBC2, 0x8234FBC2, 0x8235FBC2, 0x8236FBC2, 0x8237FBC2, 0x8238FBC2, + 0x8239FBC2, 0x823AFBC2, 0x823BFBC2, 0x823CFBC2, 0x823DFBC2, 0x823EFBC2, 0x823FFBC2, 0x8240FBC2, 0x8241FBC2, 0x8242FBC2, 0x8243FBC2, 0x8244FBC2, 0x8245FBC2, 0x8246FBC2, 0x8247FBC2, + 0x8248FBC2, 0x8249FBC2, 0x824AFBC2, 0x824BFBC2, 0x824CFBC2, 0x824DFBC2, 0x824EFBC2, 0x824FFBC2, 0x8250FBC2, 0x8251FBC2, 0x8252FBC2, 0x8253FBC2, 0x8254FBC2, 0x8255FBC2, 0x8256FBC2, + 0x8257FBC2, 0x8258FBC2, 0x8259FBC2, 0x825AFBC2, 0x825BFBC2, 0x825CFBC2, 0x825DFBC2, 0x825EFBC2, 0x825FFBC2, 0x8260FBC2, 0x8261FBC2, 0x8262FBC2, 0x8263FBC2, 0x8264FBC2, 0x8265FBC2, + 0x8266FBC2, 0x8267FBC2, 0x8268FBC2, 0x8269FBC2, 0x826AFBC2, 0x826BFBC2, 0x826CFBC2, 0x826DFBC2, 0x826EFBC2, 0x826FFBC2, 0x8270FBC2, 0x8271FBC2, 0x8272FBC2, 0x8273FBC2, 0x8274FBC2, + 0x8275FBC2, 0x8276FBC2, 0x8277FBC2, 0x8278FBC2, 0x8279FBC2, 0x827AFBC2, 0x827BFBC2, 0x827CFBC2, 0x827DFBC2, 0x827EFBC2, 0x827FFBC2, 0x43AF, 0x43B0, 0x43B1, 0x43B2, + 0x43B3, 0x43B4, 0x43B5, 0x43B6, 0x43B7, 0x43B8, 0x43B9, 0x43BA, 0x43BB, 0x43BC, 0x43BD, 0x43BE, 0x43BF, 0x43C0, 0x43C1, + 0x43C2, 0x43C3, 0x43C4, 0x43C5, 0x43C6, 0x43C7, 0x43C8, 0x43C9, 0x43CA, 0x43CB, 0x829DFBC2, 0x829EFBC2, 0x829FFBC2, 0x43CC, 0x43CD, + 0x43CE, 0x43CF, 0x43D0, 0x43D1, 0x43D2, 0x43D3, 0x43D4, 0x43D5, 0x43D6, 0x43D7, 0x43D8, 0x43D9, 0x43DA, 0x43DB, 0x43DC, + 0x43DD, 0x43DE, 0x43DF, 0x43E0, 0x43E1, 0x43E2, 0x43E3, 0x43E4, 0x43E5, 0x43E6, 0x43E7, 0x43E8, 0x43E9, 0x43EA, 0x43EB, + 0x43EC, 0x43ED, 0x43EE, 0x43EF, 0x43F0, 0x43F1, 0x43F2, 0x43F3, 0x43F4, 0x43F5, 0x43F6, 0x43F7, 0x43F8, 0x43F9, 0x43FA, + 0x43FB, 0x43FC, 0x82D1FBC2, 0x82D2FBC2, 0x82D3FBC2, 0x82D4FBC2, 0x82D5FBC2, 0x82D6FBC2, 0x82D7FBC2, 0x82D8FBC2, 0x82D9FBC2, 0x82DAFBC2, 0x82DBFBC2, 0x82DCFBC2, 0x82DDFBC2, + 0x82DEFBC2, 0x82DFFBC2, 0x0, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1B3A, 0x1B3B, 0x1B3C, + 0x1B3D, 0x1B3E, 0x1B3F, 0x1B40, 0x1B41, 0x1B42, 0x1B43, 0x1B44, 0x1B45, 0x1B46, 0x1B47, 0x1B48, 0x1B49, 0x1B4A, 0x1B4B, + 0x82FCFBC2, 0x82FDFBC2, 0x82FEFBC2, 0x82FFFBC2, 0x4417, 0x4418, 0x4419, 0x441A, 0x441B, 0x441C, 0x441D, 0x441E, 0x441F, 0x4420, 0x4421, + 0x4422, 0x4423, 0x4424, 0x4425, 0x4427, 0x4428, 0x4429, 0x442A, 0x442B, 0x442C, 0x442D, 0x442E, 0x442F, 0x4430, 0x4431, + 0x4432, 0x4433, 0x4434, 0x4435, 0x4436, 0x4426, 0x1C3E, 0x1C42, 0x1AE3, 0x1AE4, 0x8324FBC2, 0x8325FBC2, 0x8326FBC2, 0x8327FBC2, 0x8328FBC2, + 0x8329FBC2, 0x832AFBC2, 0x832BFBC2, 0x832CFBC2, 0x832DFBC2, 0x832EFBC2, 0x832FFBC2, 0x4437, 0x4438, 0x4439, 0x443A, 0x443B, 0x443C, 0x443D, 0x443E, + 0x443F, 0x4440, 0x4441, 0x4442, 0x4443, 0x4444, 0x4445, 0x4446, 0x4447, 0x4448, 0x4449, 0x444A, 0x444B, 0x444C, 0x444D, + 0x444E, 0x444F, 0x4450, 0x4451, 0x834BFBC2, 0x834CFBC2, 0x834DFBC2, 0x834EFBC2, 0x834FFBC2, 0x2214, 0x2215, 0x2216, 0x2217, 0x2218, 0x2219, + 0x221A, 0x221B, 0x221C, 0x221D, 0x221E, 0x221F, 0x2220, 0x2221, 0x2222, 0x2223, 0x2224, 0x2225, 0x2226, 0x2227, 0x2228, + 0x2229, 0x222A, 0x222B, 0x222C, 0x222D, 0x222E, 0x222F, 0x2230, 0x2231, 0x2232, 0x2233, 0x2234, 0x2235, 0x2236, 0x2237, + 0x2238, 0x2239, 0x2214, 0x2217, 0x221B, 0x2221, 0x2225, 0x837BFBC2, 0x837CFBC2, 0x837DFBC2, 0x837EFBC2, 0x837FFBC2, 0x496B, 0x496C, 0x496D, + 0x496E, 0x496F, 0x4970, 0x4971, 0x4972, 0x4973, 0x4974, 0x4975, 0x4976, 0x4977, 0x4978, 0x4979, 0x497A, 0x497B, 0x497C, + 0x497D, 0x497E, 0x497F, 0x4980, 0x4981, 0x4982, 0x4983, 0x4984, 0x4985, 0x4986, 0x4987, 0x4988, 0x839EFBC2, 0x2FD, 0x4989, + 0x498A, 0x498B, 0x498C, 0x498D, 0x498E, 0x498F, 0x4990, 0x4991, 0x4992, 0x4993, 0x4994, 0x4995, 0x4996, 0x4997, 0x4998, + 0x4999, 0x499A, 0x499B, 0x499C, 0x499D, 0x499E, 0x499F, 0x49A0, 0x49A1, 0x49A2, 0x49A3, 0x49A4, 0x49A5, 0x49A6, 0x49A7, + 0x49A8, 0x49A9, 0x49AA, 0x49AB, 0x49AC, 0x83C4FBC2, 0x83C5FBC2, 0x83C6FBC2, 0x83C7FBC2, 0x49AD, 0x49AE, 0x49AF, 0x49B0, 0x49B1, 0x49B2, + 0x49B3, 0x49B4, 0x2FE, 0x1C3E, 0x1C3F, 0x1B4C, 0x1B4D, 0x1B4E, 0x83D6FBC2, 0x83D7FBC2, 0x83D8FBC2, 0x83D9FBC2, 0x83DAFBC2, 0x83DBFBC2, 0x83DCFBC2, + 0x83DDFBC2, 0x83DEFBC2, 0x83DFFBC2, 0x83E0FBC2, 0x83E1FBC2, 0x83E2FBC2, 0x83E3FBC2, 0x83E4FBC2, 0x83E5FBC2, 0x83E6FBC2, 0x83E7FBC2, 0x83E8FBC2, 0x83E9FBC2, 0x83EAFBC2, 0x83EBFBC2, + 0x83ECFBC2, 0x83EDFBC2, 0x83EEFBC2, 0x83EFFBC2, 0x83F0FBC2, 0x83F1FBC2, 0x83F2FBC2, 0x83F3FBC2, 0x83F4FBC2, 0x83F5FBC2, 0x83F6FBC2, 0x83F7FBC2, 0x83F8FBC2, 0x83F9FBC2, 0x83FAFBC2, + 0x83FBFBC2, 0x83FCFBC2, 0x83FDFBC2, 0x83FEFBC2, 0x83FFFBC2, 0x4452, 0x4453, 0x4454, 0x4455, 0x4456, 0x4457, 0x4458, 0x4459, 0x445A, 0x445B, + 0x445C, 0x445D, 0x445E, 0x445F, 0x4460, 0x4461, 0x4462, 0x4463, 0x4464, 0x4465, 0x4466, 0x4467, 0x4468, 0x4469, 0x446A, + 0x446B, 0x446C, 0x446D, 0x446E, 0x446F, 0x4470, 0x4471, 0x4472, 0x4473, 0x4474, 0x4475, 0x4476, 0x4477, 0x4478, 0x4479, + 0x4452, 0x4453, 0x4454, 0x4455, 0x4456, 0x4457, 0x4458, 0x4459, 0x445A, 0x445B, 0x445C, 0x445D, 0x445E, 0x445F, 0x4460, + 0x4461, 0x4462, 0x4463, 0x4464, 0x4465, 0x4466, 0x4467, 0x4468, 0x4469, 0x446A, 0x446B, 0x446C, 0x446D, 0x446E, 0x446F, + 0x4470, 0x4471, 0x4472, 0x4473, 0x4474, 0x4475, 0x4476, 0x4477, 0x4478, 0x4479, 0x447A, 0x447B, 0x447C, 0x447D, 0x447E, + 0x447F, 0x4480, 0x4481, 0x4482, 0x4483, 0x4484, 0x4485, 0x4486, 0x4487, 0x4488, 0x4489, 0x448A, 0x448B, 0x448C, 0x448D, + 0x448E, 0x448F, 0x4490, 0x4491, 0x4492, 0x4493, 0x4494, 0x4495, 0x4496, 0x4497, 0x4498, 0x4499, 0x449A, 0x449B, 0x449C, + 0x449D, 0x449E, 0x449F, 0x44A0, 0x44A1, 0x44A2, 0x44A3, 0x44A4, 0x44A5, 0x44A6, 0x44A7, 0x44A8, 0x44A9, 0x4535, 0x4536, + 0x4537, 0x4538, 0x4539, 0x453A, 0x453B, 0x453C, 0x453D, 0x453E, 0x453F, 0x4540, 0x4541, 0x4542, 0x4543, 0x4544, 0x4545, + 0x4546, 0x4547, 0x4548, 0x4549, 0x454A, 0x454B, 0x454C, 0x454D, 0x454E, 0x454F, 0x4550, 0x4551, 0x4552, 0x849EFBC2, 0x849FFBC2, + 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x84AAFBC2, 0x84ABFBC2, 0x84ACFBC2, 0x84ADFBC2, 0x84AEFBC2, + 0x84AFFBC2, 0x33D5, 0x33D6, 0x33D7, 0x33D8, 0x33D9, 0x33DA, 0x33DB, 0x33DC, 0x33DD, 0x33DE, 0x33DF, 0x33E0, 0x33E1, 0x33E2, + 0x33E3, 0x33E4, 0x33E5, 0x33E6, 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x33EB, 0x33EC, 0x33ED, 0x33EE, 0x33EF, 0x33F0, 0x33F1, + 0x33F2, 0x33F3, 0x33F4, 0x33F5, 0x33F6, 0x33F7, 0x33F8, 0x84D4FBC2, 0x84D5FBC2, 0x84D6FBC2, 0x84D7FBC2, 0x33D5, 0x33D6, 0x33D7, 0x33D8, + 0x33D9, 0x33DA, 0x33DB, 0x33DC, 0x33DD, 0x33DE, 0x33DF, 0x33E0, 0x33E1, 0x33E2, 0x33E3, 0x33E4, 0x33E5, 0x33E6, 0x33E7, + 0x33E8, 0x33E9, 0x33EA, 0x33EB, 0x33EC, 0x33ED, 0x33EE, 0x33EF, 0x33F0, 0x33F1, 0x33F2, 0x33F3, 0x33F4, 0x33F5, 0x33F6, + 0x33F7, 0x33F8, 0x84FCFBC2, 0x84FDFBC2, 0x84FEFBC2, 0x84FFFBC2, 0x4553, 0x4554, 0x4555, 0x4556, 0x4557, 0x4558, 0x4559, 0x455A, 0x455B, + 0x455C, 0x455D, 0x455E, 0x455F, 0x4560, 0x4561, 0x4562, 0x4563, 0x4564, 0x4565, 0x4566, 0x4567, 0x4568, 0x4569, 0x456A, + 0x456B, 0x456C, 0x456D, 0x456E, 0x456F, 0x4570, 0x4571, 0x4572, 0x4573, 0x4574, 0x4575, 0x4576, 0x4577, 0x4578, 0x4579, + 0x457A, 0x8528FBC2, 0x8529FBC2, 0x852AFBC2, 0x852BFBC2, 0x852CFBC2, 0x852DFBC2, 0x852EFBC2, 0x852FFBC2, 0x457B, 0x457C, 0x457D, 0x457E, 0x457F, 0x4580, + 0x4581, 0x4582, 0x4583, 0x4584, 0x4585, 0x4586, 0x4587, 0x4588, 0x4589, 0x458A, 0x458B, 0x458C, 0x458D, 0x458E, 0x458F, + 0x4590, 0x4591, 0x4592, 0x4593, 0x4594, 0x4595, 0x4596, 0x4597, 0x4598, 0x4599, 0x459A, 0x459B, 0x459C, 0x459D, 0x459E, + 0x459F, 0x45A0, 0x45A1, 0x45A2, 0x45A3, 0x45A4, 0x45A5, 0x45A6, 0x45A7, 0x45A8, 0x45A9, 0x45AA, 0x45AB, 0x45AC, 0x45AD, + 0x45AE, 0x8564FBC2, 0x8565FBC2, 0x8566FBC2, 0x8567FBC2, 0x8568FBC2, 0x8569FBC2, 0x856AFBC2, 0x856BFBC2, 0x856CFBC2, 0x856DFBC2, 0x856EFBC2, 0x432, 0x8570FBC2, 0x8571FBC2, + 0x8572FBC2, 0x8573FBC2, 0x8574FBC2, 0x8575FBC2, 0x8576FBC2, 0x8577FBC2, 0x8578FBC2, 0x8579FBC2, 0x857AFBC2, 0x857BFBC2, 0x857CFBC2, 0x857DFBC2, 0x857EFBC2, 0x857FFBC2, 0x8580FBC2, + 0x8581FBC2, 0x8582FBC2, 0x8583FBC2, 0x8584FBC2, 0x8585FBC2, 0x8586FBC2, 0x8587FBC2, 0x8588FBC2, 0x8589FBC2, 0x858AFBC2, 0x858BFBC2, 0x858CFBC2, 0x858DFBC2, 0x858EFBC2, 0x858FFBC2, + 0x8590FBC2, 0x8591FBC2, 0x8592FBC2, 0x8593FBC2, 0x8594FBC2, 0x8595FBC2, 0x8596FBC2, 0x8597FBC2, 0x8598FBC2, 0x8599FBC2, 0x859AFBC2, 0x859BFBC2, 0x859CFBC2, 0x859DFBC2, 0x859EFBC2, + 0x859FFBC2, 0x85A0FBC2, 0x85A1FBC2, 0x85A2FBC2, 0x85A3FBC2, 0x85A4FBC2, 0x85A5FBC2, 0x85A6FBC2, 0x85A7FBC2, 0x85A8FBC2, 0x85A9FBC2, 0x85AAFBC2, 0x85ABFBC2, 0x85ACFBC2, 0x85ADFBC2, + 0x85AEFBC2, 0x85AFFBC2, 0x85B0FBC2, 0x85B1FBC2, 0x85B2FBC2, 0x85B3FBC2, 0x85B4FBC2, 0x85B5FBC2, 0x85B6FBC2, 0x85B7FBC2, 0x85B8FBC2, 0x85B9FBC2, 0x85BAFBC2, 0x85BBFBC2, 0x85BCFBC2, + 0x85BDFBC2, 0x85BEFBC2, 0x85BFFBC2, 0x85C0FBC2, 0x85C1FBC2, 0x85C2FBC2, 0x85C3FBC2, 0x85C4FBC2, 0x85C5FBC2, 0x85C6FBC2, 0x85C7FBC2, 0x85C8FBC2, 0x85C9FBC2, 0x85CAFBC2, 0x85CBFBC2, + 0x85CCFBC2, 0x85CDFBC2, 0x85CEFBC2, 0x85CFFBC2, 0x85D0FBC2, 0x85D1FBC2, 0x85D2FBC2, 0x85D3FBC2, 0x85D4FBC2, 0x85D5FBC2, 0x85D6FBC2, 0x85D7FBC2, 0x85D8FBC2, 0x85D9FBC2, 0x85DAFBC2, + 0x85DBFBC2, 0x85DCFBC2, 0x85DDFBC2, 0x85DEFBC2, 0x85DFFBC2, 0x85E0FBC2, 0x85E1FBC2, 0x85E2FBC2, 0x85E3FBC2, 0x85E4FBC2, 0x85E5FBC2, 0x85E6FBC2, 0x85E7FBC2, 0x85E8FBC2, 0x85E9FBC2, + 0x85EAFBC2, 0x85EBFBC2, 0x85ECFBC2, 0x85EDFBC2, 0x85EEFBC2, 0x85EFFBC2, 0x85F0FBC2, 0x85F1FBC2, 0x85F2FBC2, 0x85F3FBC2, 0x85F4FBC2, 0x85F5FBC2, 0x85F6FBC2, 0x85F7FBC2, 0x85F8FBC2, + 0x85F9FBC2, 0x85FAFBC2, 0x85FBFBC2, 0x85FCFBC2, 0x85FDFBC2, 0x85FEFBC2, 0x85FFFBC2, 0x46BA, 0x46BB, 0x46BC, 0x46BD, 0x46BE, 0x46BF, 0x46C0, 0x46C1, + 0x46C2, 0x46C3, 0x46C4, 0x46C5, 0x46C6, 0x46C7, 0x46C8, 0x46C9, 0x46CA, 0x46CB, 0x46CC, 0x46CD, 0x46CE, 0x46CF, 0x46D0, + 0x46D1, 0x46D2, 0x46D3, 0x46D4, 0x46D5, 0x46D6, 0x46D7, 0x46D8, 0x46D9, 0x46DA, 0x46DB, 0x46DC, 0x46DD, 0x46DE, 0x46DF, + 0x46E0, 0x46E1, 0x46E2, 0x46E3, 0x46E4, 0x46E5, 0x46E6, 0x46E7, 0x46E8, 0x46E9, 0x46EA, 0x46EB, 0x46EC, 0x46ED, 0x46EE, + 0x46EF, 0x46F0, 0x46F1, 0x46F2, 0x46F3, 0x46F4, 0x46F5, 0x46F6, 0x46F7, 0x46F8, 0x46F9, 0x46FA, 0x46FB, 0x46FC, 0x46FD, + 0x46FE, 0x46FF, 0x4700, 0x4701, 0x4702, 0x4703, 0x4704, 0x4705, 0x4706, 0x4707, 0x4708, 0x4709, 0x470A, 0x470B, 0x470C, + 0x470D, 0x470E, 0x470F, 0x4710, 0x4711, 0x4712, 0x4713, 0x4714, 0x4715, 0x4716, 0x4717, 0x4718, 0x4719, 0x471A, 0x471B, + 0x471C, 0x471D, 0x471E, 0x471F, 0x4720, 0x4721, 0x4722, 0x4723, 0x4724, 0x4725, 0x4726, 0x4727, 0x4728, 0x4729, 0x472A, + 0x472B, 0x472C, 0x472D, 0x472E, 0x472F, 0x4730, 0x4731, 0x4732, 0x4733, 0x4734, 0x4735, 0x4736, 0x4737, 0x4738, 0x4739, + 0x473A, 0x473B, 0x473C, 0x473D, 0x473E, 0x473F, 0x4740, 0x4741, 0x4742, 0x4743, 0x4744, 0x4745, 0x4746, 0x4747, 0x4748, + 0x4749, 0x474A, 0x474B, 0x474C, 0x474D, 0x474E, 0x474F, 0x4750, 0x4751, 0x4752, 0x4753, 0x4754, 0x4755, 0x4756, 0x4757, + 0x4758, 0x4759, 0x475A, 0x475B, 0x475C, 0x475D, 0x475E, 0x475F, 0x4760, 0x4761, 0x4762, 0x4763, 0x4764, 0x4765, 0x4766, + 0x4767, 0x4768, 0x4769, 0x476A, 0x476B, 0x476C, 0x476D, 0x476E, 0x476F, 0x4770, 0x4771, 0x4772, 0x4773, 0x4774, 0x4775, + 0x4776, 0x4777, 0x4778, 0x4779, 0x477A, 0x477B, 0x477C, 0x477D, 0x477E, 0x477F, 0x4780, 0x4781, 0x4782, 0x4783, 0x4784, + 0x4785, 0x4786, 0x4787, 0x4788, 0x4789, 0x478A, 0x478B, 0x478C, 0x478D, 0x478E, 0x478F, 0x4790, 0x4791, 0x4792, 0x4793, + 0x4794, 0x4795, 0x4796, 0x4797, 0x4798, 0x4799, 0x479A, 0x479B, 0x479C, 0x479D, 0x479E, 0x479F, 0x47A0, 0x47A1, 0x47A2, + 0x47A3, 0x47A4, 0x47A5, 0x47A6, 0x47A7, 0x47A8, 0x47A9, 0x47AA, 0x47AB, 0x47AC, 0x47AD, 0x47AE, 0x47AF, 0x47B0, 0x47B1, + 0x47B2, 0x47B3, 0x47B4, 0x47B5, 0x47B6, 0x47B7, 0x47B8, 0x47B9, 0x47BA, 0x47BB, 0x47BC, 0x47BD, 0x47BE, 0x47BF, 0x47C0, + 0x47C1, 0x47C2, 0x47C3, 0x47C4, 0x47C5, 0x47C6, 0x47C7, 0x47C8, 0x47C9, 0x47CA, 0x47CB, 0x47CC, 0x47CD, 0x47CE, 0x47CF, + 0x47D0, 0x47D1, 0x47D2, 0x47D3, 0x47D4, 0x47D5, 0x47D6, 0x47D7, 0x47D8, 0x47D9, 0x47DA, 0x47DB, 0x47DC, 0x47DD, 0x47DE, + 0x47DF, 0x47E0, 0x47E1, 0x47E2, 0x47E3, 0x47E4, 0x47E5, 0x47E6, 0x47E7, 0x47E8, 0x47E9, 0x47EA, 0x47EB, 0x47EC, 0x47ED, + 0x47EE, 0x47EF, 0x47F0, 0x8737FBC2, 0x8738FBC2, 0x8739FBC2, 0x873AFBC2, 0x873BFBC2, 0x873CFBC2, 0x873DFBC2, 0x873EFBC2, 0x873FFBC2, 0x47F1, 0x47F2, 0x47F3, + 0x47F4, 0x47F5, 0x47F6, 0x47F7, 0x47F8, 0x47F9, 0x47FA, 0x47FB, 0x47FC, 0x47FD, 0x47FE, 0x47FF, 0x4800, 0x4801, 0x4802, + 0x4803, 0x4804, 0x4805, 0x4806, 0x8756FBC2, 0x8757FBC2, 0x8758FBC2, 0x8759FBC2, 0x875AFBC2, 0x875BFBC2, 0x875CFBC2, 0x875DFBC2, 0x875EFBC2, 0x875FFBC2, 0x4807, + 0x4808, 0x4809, 0x480A, 0x480B, 0x480C, 0x480D, 0x480E, 0x8768FBC2, 0x8769FBC2, 0x876AFBC2, 0x876BFBC2, 0x876CFBC2, 0x876DFBC2, 0x876EFBC2, 0x876FFBC2, + 0x8770FBC2, 0x8771FBC2, 0x8772FBC2, 0x8773FBC2, 0x8774FBC2, 0x8775FBC2, 0x8776FBC2, 0x8777FBC2, 0x8778FBC2, 0x8779FBC2, 0x877AFBC2, 0x877BFBC2, 0x877CFBC2, 0x877DFBC2, 0x877EFBC2, + 0x877FFBC2, 0x8780FBC2, 0x8781FBC2, 0x8782FBC2, 0x8783FBC2, 0x8784FBC2, 0x8785FBC2, 0x8786FBC2, 0x8787FBC2, 0x8788FBC2, 0x8789FBC2, 0x878AFBC2, 0x878BFBC2, 0x878CFBC2, 0x878DFBC2, + 0x878EFBC2, 0x878FFBC2, 0x8790FBC2, 0x8791FBC2, 0x8792FBC2, 0x8793FBC2, 0x8794FBC2, 0x8795FBC2, 0x8796FBC2, 0x8797FBC2, 0x8798FBC2, 0x8799FBC2, 0x879AFBC2, 0x879BFBC2, 0x879CFBC2, + 0x879DFBC2, 0x879EFBC2, 0x879FFBC2, 0x87A0FBC2, 0x87A1FBC2, 0x87A2FBC2, 0x87A3FBC2, 0x87A4FBC2, 0x87A5FBC2, 0x87A6FBC2, 0x87A7FBC2, 0x87A8FBC2, 0x87A9FBC2, 0x87AAFBC2, 0x87ABFBC2, + 0x87ACFBC2, 0x87ADFBC2, 0x87AEFBC2, 0x87AFFBC2, 0x87B0FBC2, 0x87B1FBC2, 0x87B2FBC2, 0x87B3FBC2, 0x87B4FBC2, 0x87B5FBC2, 0x87B6FBC2, 0x87B7FBC2, 0x87B8FBC2, 0x87B9FBC2, 0x87BAFBC2, + 0x87BBFBC2, 0x87BCFBC2, 0x87BDFBC2, 0x87BEFBC2, 0x87BFFBC2, 0x87C0FBC2, 0x87C1FBC2, 0x87C2FBC2, 0x87C3FBC2, 0x87C4FBC2, 0x87C5FBC2, 0x87C6FBC2, 0x87C7FBC2, 0x87C8FBC2, 0x87C9FBC2, + 0x87CAFBC2, 0x87CBFBC2, 0x87CCFBC2, 0x87CDFBC2, 0x87CEFBC2, 0x87CFFBC2, 0x87D0FBC2, 0x87D1FBC2, 0x87D2FBC2, 0x87D3FBC2, 0x87D4FBC2, 0x87D5FBC2, 0x87D6FBC2, 0x87D7FBC2, 0x87D8FBC2, + 0x87D9FBC2, 0x87DAFBC2, 0x87DBFBC2, 0x87DCFBC2, 0x87DDFBC2, 0x87DEFBC2, 0x87DFFBC2, 0x87E0FBC2, 0x87E1FBC2, 0x87E2FBC2, 0x87E3FBC2, 0x87E4FBC2, 0x87E5FBC2, 0x87E6FBC2, 0x87E7FBC2, + 0x87E8FBC2, 0x87E9FBC2, 0x87EAFBC2, 0x87EBFBC2, 0x87ECFBC2, 0x87EDFBC2, 0x87EEFBC2, 0x87EFFBC2, 0x87F0FBC2, 0x87F1FBC2, 0x87F2FBC2, 0x87F3FBC2, 0x87F4FBC2, 0x87F5FBC2, 0x87F6FBC2, + 0x87F7FBC2, 0x87F8FBC2, 0x87F9FBC2, 0x87FAFBC2, 0x87FBFBC2, 0x87FCFBC2, 0x87FDFBC2, 0x87FEFBC2, 0x87FFFBC2, 0x480F, 0x4810, 0x4811, 0x4812, 0x4813, 0x4814, + 0x8806FBC2, 0x8807FBC2, 0x4815, 0x8809FBC2, 0x4816, 0x4817, 0x4818, 0x4819, 0x481A, 0x481B, 0x481C, 0x481D, 0x481E, 0x481F, 0x4820, + 0x4821, 0x4822, 0x4823, 0x4824, 0x4825, 0x4826, 0x4827, 0x4828, 0x4829, 0x482A, 0x482B, 0x482C, 0x482D, 0x482E, 0x482F, + 0x4830, 0x4831, 0x4832, 0x4833, 0x4834, 0x4835, 0x4836, 0x4837, 0x4838, 0x4839, 0x483A, 0x483B, 0x483C, 0x483D, 0x483E, + 0x483F, 0x4840, 0x4841, 0x8836FBC2, 0x4842, 0x4843, 0x8839FBC2, 0x883AFBC2, 0x883BFBC2, 0x4844, 0x883DFBC2, 0x883EFBC2, 0x4845, 0x48F6, 0x48F7, + 0x48F8, 0x48F9, 0x48FA, 0x48FB, 0x48FC, 0x48FD, 0x48FE, 0x48FF, 0x4900, 0x4901, 0x4902, 0x4903, 0x4904, 0x4905, 0x4906, + 0x4907, 0x4908, 0x4909, 0x490A, 0x490B, 0x8856FBC2, 0x2D9, 0x1C3E, 0x1C3F, 0x1C40, 0x1B5E, 0x1B5F, 0x1B60, 0x1B61, 0x1B62, + 0x48B5, 0x48B6, 0x48B7, 0x48B8, 0x48B9, 0x48BA, 0x48BB, 0x48BC, 0x48BD, 0x48BE, 0x48BF, 0x48C0, 0x48C1, 0x48C2, 0x48C2, + 0x48C3, 0x48C4, 0x48C5, 0x48C6, 0x48C7, 0x48C8, 0x48C9, 0x48CA, 0xFCF, 0xFD0, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, + 0x1B4F, 0x1B50, 0x48CB, 0x48CB, 0x48CC, 0x48CC, 0x48CD, 0x48CE, 0x48CF, 0x48CF, 0x48D0, 0x48D1, 0x48D2, 0x48D3, 0x48D4, + 0x48D4, 0x48D5, 0x48D5, 0x48D6, 0x48D6, 0x48D7, 0x48D7, 0x48D8, 0x48D8, 0x48D9, 0x48DA, 0x48DB, 0x48DC, 0x48DD, 0x48DE, + 0x48DF, 0x48DF, 0x48E0, 0x889FFBC2, 0x88A0FBC2, 0x88A1FBC2, 0x88A2FBC2, 0x88A3FBC2, 0x88A4FBC2, 0x88A5FBC2, 0x88A6FBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C41, 0x1C42, 0x1B51, 0x1B52, 0x1B53, 0x88B0FBC2, 0x88B1FBC2, 0x88B2FBC2, 0x88B3FBC2, 0x88B4FBC2, 0x88B5FBC2, 0x88B6FBC2, 0x88B7FBC2, 0x88B8FBC2, 0x88B9FBC2, + 0x88BAFBC2, 0x88BBFBC2, 0x88BCFBC2, 0x88BDFBC2, 0x88BEFBC2, 0x88BFFBC2, 0x88C0FBC2, 0x88C1FBC2, 0x88C2FBC2, 0x88C3FBC2, 0x88C4FBC2, 0x88C5FBC2, 0x88C6FBC2, 0x88C7FBC2, 0x88C8FBC2, + 0x88C9FBC2, 0x88CAFBC2, 0x88CBFBC2, 0x88CCFBC2, 0x88CDFBC2, 0x88CEFBC2, 0x88CFFBC2, 0x88D0FBC2, 0x88D1FBC2, 0x88D2FBC2, 0x88D3FBC2, 0x88D4FBC2, 0x88D5FBC2, 0x88D6FBC2, 0x88D7FBC2, + 0x88D8FBC2, 0x88D9FBC2, 0x88DAFBC2, 0x88DBFBC2, 0x88DCFBC2, 0x88DDFBC2, 0x88DEFBC2, 0x88DFFBC2, 0x48E1, 0x48E2, 0x48E3, 0x48E4, 0x48E5, 0x48E6, 0x48E7, + 0x48E8, 0x48E9, 0x48EA, 0x48EB, 0x48EC, 0x48ED, 0x48EE, 0x48EF, 0x48F0, 0x48F1, 0x48F2, 0x48F3, 0x88F3FBC2, 0x48F4, 0x48F5, + 0x88F6FBC2, 0x88F7FBC2, 0x88F8FBC2, 0x88F9FBC2, 0x88FAFBC2, 0x1C3E, 0x1C42, 0x1B54, 0x1B55, 0x1B56, 0x22CD, 0x22CE, 0x22CF, 0x22D0, 0x22D1, + 0x22D2, 0x22D3, 0x22D4, 0x22D5, 0x22D6, 0x22D7, 0x22D8, 0x22D9, 0x22DA, 0x22DB, 0x22DC, 0x22DD, 0x22DE, 0x22DF, 0x22E0, + 0x22E1, 0x22E2, 0x1C3E, 0x1B5B, 0x1B5C, 0x1B5D, 0x1C3F, 0x1C40, 0x891CFBC2, 0x891DFBC2, 0x891EFBC2, 0x2FF, 0x43FD, 0x43FE, 0x43FF, + 0x4400, 0x4401, 0x4402, 0x4403, 0x4404, 0x4405, 0x4406, 0x4407, 0x4408, 0x4409, 0x440A, 0x440B, 0x440C, 0x440D, 0x440E, + 0x440F, 0x4410, 0x4411, 0x4412, 0x4413, 0x4414, 0x4415, 0x4416, 0x893AFBC2, 0x893BFBC2, 0x893CFBC2, 0x893DFBC2, 0x893EFBC2, 0x2F9, 0x8940FBC2, + 0x8941FBC2, 0x8942FBC2, 0x8943FBC2, 0x8944FBC2, 0x8945FBC2, 0x8946FBC2, 0x8947FBC2, 0x8948FBC2, 0x8949FBC2, 0x894AFBC2, 0x894BFBC2, 0x894CFBC2, 0x894DFBC2, 0x894EFBC2, 0x894FFBC2, + 0x8950FBC2, 0x8951FBC2, 0x8952FBC2, 0x8953FBC2, 0x8954FBC2, 0x8955FBC2, 0x8956FBC2, 0x8957FBC2, 0x8958FBC2, 0x8959FBC2, 0x895AFBC2, 0x895BFBC2, 0x895CFBC2, 0x895DFBC2, 0x895EFBC2, + 0x895FFBC2, 0x8960FBC2, 0x8961FBC2, 0x8962FBC2, 0x8963FBC2, 0x8964FBC2, 0x8965FBC2, 0x8966FBC2, 0x8967FBC2, 0x8968FBC2, 0x8969FBC2, 0x896AFBC2, 0x896BFBC2, 0x896CFBC2, 0x896DFBC2, + 0x896EFBC2, 0x896FFBC2, 0x8970FBC2, 0x8971FBC2, 0x8972FBC2, 0x8973FBC2, 0x8974FBC2, 0x8975FBC2, 0x8976FBC2, 0x8977FBC2, 0x8978FBC2, 0x8979FBC2, 0x897AFBC2, 0x897BFBC2, 0x897CFBC2, + 0x897DFBC2, 0x897EFBC2, 0x897FFBC2, 0x5242, 0x5243, 0x5244, 0x5245, 0x5246, 0x5247, 0x5248, 0x5248, 0x5249, 0x524A, 0x524B, 0x524B, + 0x524C, 0x524C, 0x524D, 0x524D, 0x524E, 0x524F, 0x5250, 0x5251, 0x5251, 0x5252, 0x5253, 0x5254, 0x5255, 0x5255, 0x5256, + 0x5256, 0x5257, 0x5258, 0x525B, 0x525C, 0x5242, 0x5243, 0x5244, 0x5245, 0x5246, 0x5247, 0x5248, 0x5249, 0x524A, 0x524B, + 0x524C, 0x524D, 0x524E, 0x524F, 0x5250, 0x5251, 0x5251, 0x5252, 0x5253, 0x5254, 0x5255, 0x5256, 0x5257, 0x5258, 0x89B8FBC2, + 0x89B9FBC2, 0x89BAFBC2, 0x89BBFBC2, 0x1BDD, 0x1BD2, 0x5259, 0x525A, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, + 0x1C46, 0x1BA7, 0x1BA8, 0x1BA9, 0x1BAA, 0x1BAB, 0x1BAC, 0x1BAD, 0x89D0FBC2, 0x89D1FBC2, 0x1BAE, 0x1BAF, 0x1BB0, 0x1BB1, 0x1BB2, + 0x1BB3, 0x1BB4, 0x1BB5, 0x1BB6, 0x1BB7, 0x1BB8, 0x1BB9, 0x1BBA, 0x1BBB, 0x1BBC, 0x1BBD, 0x1BBE, 0x1BBF, 0x1BC0, 0x1BC1, + 0x1BC2, 0x1BC3, 0x1BC4, 0x1BC5, 0x1BC6, 0x1BC7, 0x1BC8, 0x1BC9, 0x1BCA, 0x1BCB, 0x1BCC, 0x1BCD, 0x1BCE, 0x1BCF, 0x1BD0, + 0x1BD1, 0x1BD3, 0x1BD4, 0x1BD5, 0x1BD6, 0x1BD7, 0x1BD8, 0x1BD9, 0x1BDA, 0x1BDB, 0x1BDC, 0x2D0D, 0x2D0E, 0x2D0F, 0x2D10, + 0x8A04FBC2, 0x2D11, 0x2D12, 0x8A07FBC2, 0x8A08FBC2, 0x8A09FBC2, 0x8A0AFBC2, 0x8A0BFBC2, 0x2D13, 0x0, 0x0, 0x0, 0x2D14, 0x2D15, 0x2D16, + 0x2D17, 0x8A14FBC2, 0x2D18, 0x2D19, 0x2D1A, 0x8A18FBC2, 0x2D1B, 0x2D1C, 0x2D1D, 0x2D1E, 0x2D1F, 0x2D20, 0x2D21, 0x2D22, 0x2D23, + 0x2D24, 0x2D25, 0x2D26, 0x2D27, 0x2D28, 0x2D29, 0x2D2A, 0x2D2B, 0x2D2C, 0x2D2D, 0x2D2E, 0x2D2F, 0x2D30, 0x2D31, 0x2D32, + 0x2D33, 0x2D34, 0x2D35, 0x8A34FBC2, 0x8A35FBC2, 0x8A36FBC2, 0x8A37FBC2, 0x0, 0x0, 0x0, 0x8A3BFBC2, 0x8A3CFBC2, 0x8A3DFBC2, 0x8A3EFBC2, 0x2D36, + 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1B7E, 0x1B7F, 0x1B80, 0x1B81, 0x8A48FBC2, 0x8A49FBC2, 0x8A4AFBC2, 0x8A4BFBC2, 0x8A4CFBC2, 0x8A4DFBC2, 0x8A4EFBC2, + 0x8A4FFBC2, 0x438, 0x439, 0x43A, 0x43B, 0x43C, 0x43D, 0x2AA, 0x2AB, 0x43E, 0x8A59FBC2, 0x8A5AFBC2, 0x8A5BFBC2, 0x8A5CFBC2, 0x8A5DFBC2, + 0x8A5EFBC2, 0x8A5FFBC2, 0x4846, 0x4847, 0x4848, 0x4849, 0x484A, 0x484B, 0x484C, 0x484D, 0x484E, 0x484F, 0x4850, 0x4851, 0x4852, + 0x4853, 0x4854, 0x4855, 0x4856, 0x4857, 0x4858, 0x4859, 0x485A, 0x485B, 0x485C, 0x485D, 0x485E, 0x485F, 0x4860, 0x4861, + 0x4862, 0x1C3E, 0x1B57, 0x1B58, 0x4863, 0x4864, 0x4865, 0x4866, 0x4867, 0x4868, 0x4869, 0x486A, 0x486B, 0x486C, 0x486D, + 0x486E, 0x486F, 0x4870, 0x4871, 0x4872, 0x4873, 0x4874, 0x4875, 0x4876, 0x4877, 0x4878, 0x4879, 0x487A, 0x487B, 0x487C, + 0x487D, 0x487E, 0x487F, 0x1C3E, 0x1B59, 0x1B5A, 0x8AA0FBC2, 0x8AA1FBC2, 0x8AA2FBC2, 0x8AA3FBC2, 0x8AA4FBC2, 0x8AA5FBC2, 0x8AA6FBC2, 0x8AA7FBC2, 0x8AA8FBC2, + 0x8AA9FBC2, 0x8AAAFBC2, 0x8AABFBC2, 0x8AACFBC2, 0x8AADFBC2, 0x8AAEFBC2, 0x8AAFFBC2, 0x8AB0FBC2, 0x8AB1FBC2, 0x8AB2FBC2, 0x8AB3FBC2, 0x8AB4FBC2, 0x8AB5FBC2, 0x8AB6FBC2, 0x8AB7FBC2, + 0x8AB8FBC2, 0x8AB9FBC2, 0x8ABAFBC2, 0x8ABBFBC2, 0x8ABCFBC2, 0x8ABDFBC2, 0x8ABEFBC2, 0x8ABFFBC2, 0x4947, 0x4948, 0x4949, 0x494A, 0x494B, 0x494C, 0x494D, + 0x494E, 0x494E, 0x494F, 0x4950, 0x4951, 0x4952, 0x4953, 0x4954, 0x4955, 0x4956, 0x4957, 0x4958, 0x4959, 0x495A, 0x495B, + 0x495C, 0x495D, 0x495E, 0x495F, 0x4960, 0x4961, 0x4962, 0x4963, 0x4964, 0x4965, 0x4966, 0x4967, 0x4968, 0x4969, 0x496A, + 0x0, 0x0, 0x8AE7FBC2, 0x8AE8FBC2, 0x8AE9FBC2, 0x8AEAFBC2, 0x1C3E, 0x1C42, 0x1B63, 0x1B64, 0x1B65, 0x443, 0x444, 0x445, 0x446, + 0x447, 0x448, 0x449, 0x8AF7FBC2, 0x8AF8FBC2, 0x8AF9FBC2, 0x8AFAFBC2, 0x8AFBFBC2, 0x8AFCFBC2, 0x8AFDFBC2, 0x8AFEFBC2, 0x8AFFFBC2, 0x4880, 0x4881, 0x4882, + 0x4883, 0x4884, 0x4885, 0x4886, 0x4887, 0x4888, 0x4889, 0x488A, 0x488B, 0x488C, 0x488D, 0x488E, 0x488F, 0x4890, 0x4891, + 0x4892, 0x4893, 0x4894, 0x4895, 0x4896, 0x4897, 0x4898, 0x4899, 0x489A, 0x489B, 0x489C, 0x489D, 0x489E, 0x489F, 0x48A0, + 0x48A1, 0x48A2, 0x48A3, 0x48A4, 0x48A5, 0x48A6, 0x48A7, 0x48A8, 0x48A9, 0x48AA, 0x48AB, 0x48AC, 0x48AD, 0x48AD, 0x48AE, + 0x48AF, 0x48B0, 0x48B1, 0x48B2, 0x48B3, 0x48B4, 0x8B36FBC2, 0x8B37FBC2, 0x8B38FBC2, 0x442, 0x2DA, 0x2DB, 0x2DC, 0x2DD, 0x2DE, + 0x2DF, 0x490C, 0x490D, 0x490E, 0x490F, 0x4910, 0x4911, 0x4912, 0x4913, 0x4914, 0x4915, 0x4916, 0x4917, 0x4918, 0x4919, + 0x491A, 0x491B, 0x491C, 0x491D, 0x491E, 0x491F, 0x4920, 0x4921, 0x8B56FBC2, 0x8B57FBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1B66, + 0x1B67, 0x1B68, 0x1B69, 0x4922, 0x4923, 0x4924, 0x4925, 0x4926, 0x4927, 0x4928, 0x4929, 0x492A, 0x492B, 0x492C, 0x492D, + 0x492E, 0x492F, 0x4930, 0x4931, 0x4932, 0x4933, 0x4934, 0x8B73FBC2, 0x8B74FBC2, 0x8B75FBC2, 0x8B76FBC2, 0x8B77FBC2, 0x1C3E, 0x1C3F, 0x1C40, + 0x1C41, 0x1B6A, 0x1B6B, 0x1B6C, 0x1B6D, 0x4935, 0x4936, 0x4937, 0x4938, 0x4939, 0x493A, 0x493B, 0x493C, 0x493D, 0x493E, + 0x493F, 0x4940, 0x4941, 0x4942, 0x4943, 0x4944, 0x4945, 0x4946, 0x8B92FBC2, 0x8B93FBC2, 0x8B94FBC2, 0x8B95FBC2, 0x8B96FBC2, 0x8B97FBC2, 0x8B98FBC2, + 0x44A, 0x44B, 0x44C, 0x44D, 0x8B9DFBC2, 0x8B9EFBC2, 0x8B9FFBC2, 0x8BA0FBC2, 0x8BA1FBC2, 0x8BA2FBC2, 0x8BA3FBC2, 0x8BA4FBC2, 0x8BA5FBC2, 0x8BA6FBC2, 0x8BA7FBC2, + 0x8BA8FBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1B6E, 0x1B6F, 0x1B70, 0x8BB0FBC2, 0x8BB1FBC2, 0x8BB2FBC2, 0x8BB3FBC2, 0x8BB4FBC2, 0x8BB5FBC2, 0x8BB6FBC2, + 0x8BB7FBC2, 0x8BB8FBC2, 0x8BB9FBC2, 0x8BBAFBC2, 0x8BBBFBC2, 0x8BBCFBC2, 0x8BBDFBC2, 0x8BBEFBC2, 0x8BBFFBC2, 0x8BC0FBC2, 0x8BC1FBC2, 0x8BC2FBC2, 0x8BC3FBC2, 0x8BC4FBC2, 0x8BC5FBC2, + 0x8BC6FBC2, 0x8BC7FBC2, 0x8BC8FBC2, 0x8BC9FBC2, 0x8BCAFBC2, 0x8BCBFBC2, 0x8BCCFBC2, 0x8BCDFBC2, 0x8BCEFBC2, 0x8BCFFBC2, 0x8BD0FBC2, 0x8BD1FBC2, 0x8BD2FBC2, 0x8BD3FBC2, 0x8BD4FBC2, + 0x8BD5FBC2, 0x8BD6FBC2, 0x8BD7FBC2, 0x8BD8FBC2, 0x8BD9FBC2, 0x8BDAFBC2, 0x8BDBFBC2, 0x8BDCFBC2, 0x8BDDFBC2, 0x8BDEFBC2, 0x8BDFFBC2, 0x8BE0FBC2, 0x8BE1FBC2, 0x8BE2FBC2, 0x8BE3FBC2, + 0x8BE4FBC2, 0x8BE5FBC2, 0x8BE6FBC2, 0x8BE7FBC2, 0x8BE8FBC2, 0x8BE9FBC2, 0x8BEAFBC2, 0x8BEBFBC2, 0x8BECFBC2, 0x8BEDFBC2, 0x8BEEFBC2, 0x8BEFFBC2, 0x8BF0FBC2, 0x8BF1FBC2, 0x8BF2FBC2, + 0x8BF3FBC2, 0x8BF4FBC2, 0x8BF5FBC2, 0x8BF6FBC2, 0x8BF7FBC2, 0x8BF8FBC2, 0x8BF9FBC2, 0x8BFAFBC2, 0x8BFBFBC2, 0x8BFCFBC2, 0x8BFDFBC2, 0x8BFEFBC2, 0x8BFFFBC2, 0x372D, 0x372D, + 0x372E, 0x372F, 0x372F, 0x3730, 0x3731, 0x3732, 0x3732, 0x3733, 0x3733, 0x3734, 0x3734, 0x3735, 0x3735, 0x3736, 0x3736, + 0x3737, 0x3737, 0x3738, 0x3739, 0x3739, 0x373A, 0x373A, 0x373B, 0x373B, 0x373C, 0x373C, 0x373D, 0x373D, 0x373E, 0x373E, + 0x373F, 0x3740, 0x3741, 0x3742, 0x3743, 0x3743, 0x3744, 0x3744, 0x3745, 0x3745, 0x3746, 0x3746, 0x3747, 0x3748, 0x3748, + 0x3749, 0x374A, 0x374B, 0x374C, 0x374C, 0x374D, 0x374D, 0x374E, 0x374E, 0x374F, 0x374F, 0x3750, 0x3750, 0x3751, 0x3752, + 0x3753, 0x3754, 0x3754, 0x3755, 0x3755, 0x3756, 0x3756, 0x3757, 0x3757, 0x3758, 0x3759, 0x8C49FBC2, 0x8C4AFBC2, 0x8C4BFBC2, 0x8C4CFBC2, + 0x8C4DFBC2, 0x8C4EFBC2, 0x8C4FFBC2, 0x8C50FBC2, 0x8C51FBC2, 0x8C52FBC2, 0x8C53FBC2, 0x8C54FBC2, 0x8C55FBC2, 0x8C56FBC2, 0x8C57FBC2, 0x8C58FBC2, 0x8C59FBC2, 0x8C5AFBC2, 0x8C5BFBC2, + 0x8C5CFBC2, 0x8C5DFBC2, 0x8C5EFBC2, 0x8C5FFBC2, 0x8C60FBC2, 0x8C61FBC2, 0x8C62FBC2, 0x8C63FBC2, 0x8C64FBC2, 0x8C65FBC2, 0x8C66FBC2, 0x8C67FBC2, 0x8C68FBC2, 0x8C69FBC2, 0x8C6AFBC2, + 0x8C6BFBC2, 0x8C6CFBC2, 0x8C6DFBC2, 0x8C6EFBC2, 0x8C6FFBC2, 0x8C70FBC2, 0x8C71FBC2, 0x8C72FBC2, 0x8C73FBC2, 0x8C74FBC2, 0x8C75FBC2, 0x8C76FBC2, 0x8C77FBC2, 0x8C78FBC2, 0x8C79FBC2, + 0x8C7AFBC2, 0x8C7BFBC2, 0x8C7CFBC2, 0x8C7DFBC2, 0x8C7EFBC2, 0x8C7FFBC2, 0x3704, 0x3704, 0x3705, 0x3706, 0x3707, 0x3708, 0x3709, 0x370A, 0x370B, + 0x370C, 0x370C, 0x370C, 0x370D, 0x370E, 0x370F, 0x3710, 0x3711, 0x3711, 0x3712, 0x3713, 0x3714, 0x3715, 0x3716, 0x3717, + 0x3718, 0x3719, 0x371A, 0x371B, 0x371B, 0x371C, 0x371C, 0x371C, 0x371D, 0x371E, 0x371F, 0x371F, 0x3720, 0x3721, 0x3722, + 0x3723, 0x3724, 0x3725, 0x3726, 0x3726, 0x3727, 0x3727, 0x3728, 0x3729, 0x372A, 0x372B, 0x372C, 0x8CB3FBC2, 0x8CB4FBC2, 0x8CB5FBC2, + 0x8CB6FBC2, 0x8CB7FBC2, 0x8CB8FBC2, 0x8CB9FBC2, 0x8CBAFBC2, 0x8CBBFBC2, 0x8CBCFBC2, 0x8CBDFBC2, 0x8CBEFBC2, 0x8CBFFBC2, 0x3704, 0x3704, 0x3705, 0x3706, 0x3707, + 0x3708, 0x3709, 0x370A, 0x370B, 0x370C, 0x370C, 0x370C, 0x370D, 0x370E, 0x370F, 0x3710, 0x3711, 0x3711, 0x3712, 0x3713, + 0x3714, 0x3715, 0x3716, 0x3717, 0x3718, 0x3719, 0x371A, 0x371B, 0x371B, 0x371C, 0x371C, 0x371C, 0x371D, 0x371E, 0x371F, + 0x371F, 0x3720, 0x3721, 0x3722, 0x3723, 0x3724, 0x3725, 0x3726, 0x3726, 0x3727, 0x3727, 0x3728, 0x3729, 0x372A, 0x372B, + 0x372C, 0x8CF3FBC2, 0x8CF4FBC2, 0x8CF5FBC2, 0x8CF6FBC2, 0x8CF7FBC2, 0x8CF8FBC2, 0x8CF9FBC2, 0x1C3E, 0x1C42, 0x1AE5, 0x1AE6, 0x1AE7, 0x1AE8, 0x8D00FBC2, + 0x8D01FBC2, 0x8D02FBC2, 0x8D03FBC2, 0x8D04FBC2, 0x8D05FBC2, 0x8D06FBC2, 0x8D07FBC2, 0x8D08FBC2, 0x8D09FBC2, 0x8D0AFBC2, 0x8D0BFBC2, 0x8D0CFBC2, 0x8D0DFBC2, 0x8D0EFBC2, 0x8D0FFBC2, + 0x8D10FBC2, 0x8D11FBC2, 0x8D12FBC2, 0x8D13FBC2, 0x8D14FBC2, 0x8D15FBC2, 0x8D16FBC2, 0x8D17FBC2, 0x8D18FBC2, 0x8D19FBC2, 0x8D1AFBC2, 0x8D1BFBC2, 0x8D1CFBC2, 0x8D1DFBC2, 0x8D1EFBC2, + 0x8D1FFBC2, 0x8D20FBC2, 0x8D21FBC2, 0x8D22FBC2, 0x8D23FBC2, 0x8D24FBC2, 0x8D25FBC2, 0x8D26FBC2, 0x8D27FBC2, 0x8D28FBC2, 0x8D29FBC2, 0x8D2AFBC2, 0x8D2BFBC2, 0x8D2CFBC2, 0x8D2DFBC2, + 0x8D2EFBC2, 0x8D2FFBC2, 0x8D30FBC2, 0x8D31FBC2, 0x8D32FBC2, 0x8D33FBC2, 0x8D34FBC2, 0x8D35FBC2, 0x8D36FBC2, 0x8D37FBC2, 0x8D38FBC2, 0x8D39FBC2, 0x8D3AFBC2, 0x8D3BFBC2, 0x8D3CFBC2, + 0x8D3DFBC2, 0x8D3EFBC2, 0x8D3FFBC2, 0x8D40FBC2, 0x8D41FBC2, 0x8D42FBC2, 0x8D43FBC2, 0x8D44FBC2, 0x8D45FBC2, 0x8D46FBC2, 0x8D47FBC2, 0x8D48FBC2, 0x8D49FBC2, 0x8D4AFBC2, 0x8D4BFBC2, + 0x8D4CFBC2, 0x8D4DFBC2, 0x8D4EFBC2, 0x8D4FFBC2, 0x8D50FBC2, 0x8D51FBC2, 0x8D52FBC2, 0x8D53FBC2, 0x8D54FBC2, 0x8D55FBC2, 0x8D56FBC2, 0x8D57FBC2, 0x8D58FBC2, 0x8D59FBC2, 0x8D5AFBC2, + 0x8D5BFBC2, 0x8D5CFBC2, 0x8D5DFBC2, 0x8D5EFBC2, 0x8D5FFBC2, 0x8D60FBC2, 0x8D61FBC2, 0x8D62FBC2, 0x8D63FBC2, 0x8D64FBC2, 0x8D65FBC2, 0x8D66FBC2, 0x8D67FBC2, 0x8D68FBC2, 0x8D69FBC2, + 0x8D6AFBC2, 0x8D6BFBC2, 0x8D6CFBC2, 0x8D6DFBC2, 0x8D6EFBC2, 0x8D6FFBC2, 0x8D70FBC2, 0x8D71FBC2, 0x8D72FBC2, 0x8D73FBC2, 0x8D74FBC2, 0x8D75FBC2, 0x8D76FBC2, 0x8D77FBC2, 0x8D78FBC2, + 0x8D79FBC2, 0x8D7AFBC2, 0x8D7BFBC2, 0x8D7CFBC2, 0x8D7DFBC2, 0x8D7EFBC2, 0x8D7FFBC2, 0x8D80FBC2, 0x8D81FBC2, 0x8D82FBC2, 0x8D83FBC2, 0x8D84FBC2, 0x8D85FBC2, 0x8D86FBC2, 0x8D87FBC2, + 0x8D88FBC2, 0x8D89FBC2, 0x8D8AFBC2, 0x8D8BFBC2, 0x8D8CFBC2, 0x8D8DFBC2, 0x8D8EFBC2, 0x8D8FFBC2, 0x8D90FBC2, 0x8D91FBC2, 0x8D92FBC2, 0x8D93FBC2, 0x8D94FBC2, 0x8D95FBC2, 0x8D96FBC2, + 0x8D97FBC2, 0x8D98FBC2, 0x8D99FBC2, 0x8D9AFBC2, 0x8D9BFBC2, 0x8D9CFBC2, 0x8D9DFBC2, 0x8D9EFBC2, 0x8D9FFBC2, 0x8DA0FBC2, 0x8DA1FBC2, 0x8DA2FBC2, 0x8DA3FBC2, 0x8DA4FBC2, 0x8DA5FBC2, + 0x8DA6FBC2, 0x8DA7FBC2, 0x8DA8FBC2, 0x8DA9FBC2, 0x8DAAFBC2, 0x8DABFBC2, 0x8DACFBC2, 0x8DADFBC2, 0x8DAEFBC2, 0x8DAFFBC2, 0x8DB0FBC2, 0x8DB1FBC2, 0x8DB2FBC2, 0x8DB3FBC2, 0x8DB4FBC2, + 0x8DB5FBC2, 0x8DB6FBC2, 0x8DB7FBC2, 0x8DB8FBC2, 0x8DB9FBC2, 0x8DBAFBC2, 0x8DBBFBC2, 0x8DBCFBC2, 0x8DBDFBC2, 0x8DBEFBC2, 0x8DBFFBC2, 0x8DC0FBC2, 0x8DC1FBC2, 0x8DC2FBC2, 0x8DC3FBC2, + 0x8DC4FBC2, 0x8DC5FBC2, 0x8DC6FBC2, 0x8DC7FBC2, 0x8DC8FBC2, 0x8DC9FBC2, 0x8DCAFBC2, 0x8DCBFBC2, 0x8DCCFBC2, 0x8DCDFBC2, 0x8DCEFBC2, 0x8DCFFBC2, 0x8DD0FBC2, 0x8DD1FBC2, 0x8DD2FBC2, + 0x8DD3FBC2, 0x8DD4FBC2, 0x8DD5FBC2, 0x8DD6FBC2, 0x8DD7FBC2, 0x8DD8FBC2, 0x8DD9FBC2, 0x8DDAFBC2, 0x8DDBFBC2, 0x8DDCFBC2, 0x8DDDFBC2, 0x8DDEFBC2, 0x8DDFFBC2, 0x8DE0FBC2, 0x8DE1FBC2, + 0x8DE2FBC2, 0x8DE3FBC2, 0x8DE4FBC2, 0x8DE5FBC2, 0x8DE6FBC2, 0x8DE7FBC2, 0x8DE8FBC2, 0x8DE9FBC2, 0x8DEAFBC2, 0x8DEBFBC2, 0x8DECFBC2, 0x8DEDFBC2, 0x8DEEFBC2, 0x8DEFFBC2, 0x8DF0FBC2, + 0x8DF1FBC2, 0x8DF2FBC2, 0x8DF3FBC2, 0x8DF4FBC2, 0x8DF5FBC2, 0x8DF6FBC2, 0x8DF7FBC2, 0x8DF8FBC2, 0x8DF9FBC2, 0x8DFAFBC2, 0x8DFBFBC2, 0x8DFCFBC2, 0x8DFDFBC2, 0x8DFEFBC2, 0x8DFFFBC2, + 0x8E00FBC2, 0x8E01FBC2, 0x8E02FBC2, 0x8E03FBC2, 0x8E04FBC2, 0x8E05FBC2, 0x8E06FBC2, 0x8E07FBC2, 0x8E08FBC2, 0x8E09FBC2, 0x8E0AFBC2, 0x8E0BFBC2, 0x8E0CFBC2, 0x8E0DFBC2, 0x8E0EFBC2, + 0x8E0FFBC2, 0x8E10FBC2, 0x8E11FBC2, 0x8E12FBC2, 0x8E13FBC2, 0x8E14FBC2, 0x8E15FBC2, 0x8E16FBC2, 0x8E17FBC2, 0x8E18FBC2, 0x8E19FBC2, 0x8E1AFBC2, 0x8E1BFBC2, 0x8E1CFBC2, 0x8E1DFBC2, + 0x8E1EFBC2, 0x8E1FFBC2, 0x8E20FBC2, 0x8E21FBC2, 0x8E22FBC2, 0x8E23FBC2, 0x8E24FBC2, 0x8E25FBC2, 0x8E26FBC2, 0x8E27FBC2, 0x8E28FBC2, 0x8E29FBC2, 0x8E2AFBC2, 0x8E2BFBC2, 0x8E2CFBC2, + 0x8E2DFBC2, 0x8E2EFBC2, 0x8E2FFBC2, 0x8E30FBC2, 0x8E31FBC2, 0x8E32FBC2, 0x8E33FBC2, 0x8E34FBC2, 0x8E35FBC2, 0x8E36FBC2, 0x8E37FBC2, 0x8E38FBC2, 0x8E39FBC2, 0x8E3AFBC2, 0x8E3BFBC2, + 0x8E3CFBC2, 0x8E3DFBC2, 0x8E3EFBC2, 0x8E3FFBC2, 0x8E40FBC2, 0x8E41FBC2, 0x8E42FBC2, 0x8E43FBC2, 0x8E44FBC2, 0x8E45FBC2, 0x8E46FBC2, 0x8E47FBC2, 0x8E48FBC2, 0x8E49FBC2, 0x8E4AFBC2, + 0x8E4BFBC2, 0x8E4CFBC2, 0x8E4DFBC2, 0x8E4EFBC2, 0x8E4FFBC2, 0x8E50FBC2, 0x8E51FBC2, 0x8E52FBC2, 0x8E53FBC2, 0x8E54FBC2, 0x8E55FBC2, 0x8E56FBC2, 0x8E57FBC2, 0x8E58FBC2, 0x8E59FBC2, + 0x8E5AFBC2, 0x8E5BFBC2, 0x8E5CFBC2, 0x8E5DFBC2, 0x8E5EFBC2, 0x8E5FFBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x1ACC, 0x1ACD, 0x1ACE, 0x1ACF, 0x1AD0, 0x1AD1, 0x1AD2, 0x1AD3, 0x1AD4, 0x1AD5, 0x1AD6, 0x1AD7, 0x1AD8, 0x1AD9, 0x1ADA, + 0x1ADB, 0x1ADC, 0x1ADD, 0x1ADE, 0x1ADF, 0x1AE0, 0x1AE1, 0x8E7FFBC2, 0x8E80FBC2, 0x8E81FBC2, 0x8E82FBC2, 0x8E83FBC2, 0x8E84FBC2, 0x8E85FBC2, 0x8E86FBC2, + 0x8E87FBC2, 0x8E88FBC2, 0x8E89FBC2, 0x8E8AFBC2, 0x8E8BFBC2, 0x8E8CFBC2, 0x8E8DFBC2, 0x8E8EFBC2, 0x8E8FFBC2, 0x8E90FBC2, 0x8E91FBC2, 0x8E92FBC2, 0x8E93FBC2, 0x8E94FBC2, 0x8E95FBC2, + 0x8E96FBC2, 0x8E97FBC2, 0x8E98FBC2, 0x8E99FBC2, 0x8E9AFBC2, 0x8E9BFBC2, 0x8E9CFBC2, 0x8E9DFBC2, 0x8E9EFBC2, 0x8E9FFBC2, 0x8EA0FBC2, 0x8EA1FBC2, 0x8EA2FBC2, 0x8EA3FBC2, 0x8EA4FBC2, + 0x8EA5FBC2, 0x8EA6FBC2, 0x8EA7FBC2, 0x8EA8FBC2, 0x8EA9FBC2, 0x8EAAFBC2, 0x8EABFBC2, 0x8EACFBC2, 0x8EADFBC2, 0x8EAEFBC2, 0x8EAFFBC2, 0x8EB0FBC2, 0x8EB1FBC2, 0x8EB2FBC2, 0x8EB3FBC2, + 0x8EB4FBC2, 0x8EB5FBC2, 0x8EB6FBC2, 0x8EB7FBC2, 0x8EB8FBC2, 0x8EB9FBC2, 0x8EBAFBC2, 0x8EBBFBC2, 0x8EBCFBC2, 0x8EBDFBC2, 0x8EBEFBC2, 0x8EBFFBC2, 0x8EC0FBC2, 0x8EC1FBC2, 0x8EC2FBC2, + 0x8EC3FBC2, 0x8EC4FBC2, 0x8EC5FBC2, 0x8EC6FBC2, 0x8EC7FBC2, 0x8EC8FBC2, 0x8EC9FBC2, 0x8ECAFBC2, 0x8ECBFBC2, 0x8ECCFBC2, 0x8ECDFBC2, 0x8ECEFBC2, 0x8ECFFBC2, 0x8ED0FBC2, 0x8ED1FBC2, + 0x8ED2FBC2, 0x8ED3FBC2, 0x8ED4FBC2, 0x8ED5FBC2, 0x8ED6FBC2, 0x8ED7FBC2, 0x8ED8FBC2, 0x8ED9FBC2, 0x8EDAFBC2, 0x8EDBFBC2, 0x8EDCFBC2, 0x8EDDFBC2, 0x8EDEFBC2, 0x8EDFFBC2, 0x8EE0FBC2, + 0x8EE1FBC2, 0x8EE2FBC2, 0x8EE3FBC2, 0x8EE4FBC2, 0x8EE5FBC2, 0x8EE6FBC2, 0x8EE7FBC2, 0x8EE8FBC2, 0x8EE9FBC2, 0x8EEAFBC2, 0x8EEBFBC2, 0x8EECFBC2, 0x8EEDFBC2, 0x8EEEFBC2, 0x8EEFFBC2, + 0x8EF0FBC2, 0x8EF1FBC2, 0x8EF2FBC2, 0x8EF3FBC2, 0x8EF4FBC2, 0x8EF5FBC2, 0x8EF6FBC2, 0x8EF7FBC2, 0x8EF8FBC2, 0x8EF9FBC2, 0x8EFAFBC2, 0x8EFBFBC2, 0x8EFCFBC2, 0x8EFDFBC2, 0x8EFEFBC2, + 0x8EFFFBC2, 0x8F00FBC2, 0x8F01FBC2, 0x8F02FBC2, 0x8F03FBC2, 0x8F04FBC2, 0x8F05FBC2, 0x8F06FBC2, 0x8F07FBC2, 0x8F08FBC2, 0x8F09FBC2, 0x8F0AFBC2, 0x8F0BFBC2, 0x8F0CFBC2, 0x8F0DFBC2, + 0x8F0EFBC2, 0x8F0FFBC2, 0x8F10FBC2, 0x8F11FBC2, 0x8F12FBC2, 0x8F13FBC2, 0x8F14FBC2, 0x8F15FBC2, 0x8F16FBC2, 0x8F17FBC2, 0x8F18FBC2, 0x8F19FBC2, 0x8F1AFBC2, 0x8F1BFBC2, 0x8F1CFBC2, + 0x8F1DFBC2, 0x8F1EFBC2, 0x8F1FFBC2, 0x8F20FBC2, 0x8F21FBC2, 0x8F22FBC2, 0x8F23FBC2, 0x8F24FBC2, 0x8F25FBC2, 0x8F26FBC2, 0x8F27FBC2, 0x8F28FBC2, 0x8F29FBC2, 0x8F2AFBC2, 0x8F2BFBC2, + 0x8F2CFBC2, 0x8F2DFBC2, 0x8F2EFBC2, 0x8F2FFBC2, 0x8F30FBC2, 0x8F31FBC2, 0x8F32FBC2, 0x8F33FBC2, 0x8F34FBC2, 0x8F35FBC2, 0x8F36FBC2, 0x8F37FBC2, 0x8F38FBC2, 0x8F39FBC2, 0x8F3AFBC2, + 0x8F3BFBC2, 0x8F3CFBC2, 0x8F3DFBC2, 0x8F3EFBC2, 0x8F3FFBC2, 0x8F40FBC2, 0x8F41FBC2, 0x8F42FBC2, 0x8F43FBC2, 0x8F44FBC2, 0x8F45FBC2, 0x8F46FBC2, 0x8F47FBC2, 0x8F48FBC2, 0x8F49FBC2, + 0x8F4AFBC2, 0x8F4BFBC2, 0x8F4CFBC2, 0x8F4DFBC2, 0x8F4EFBC2, 0x8F4FFBC2, 0x8F50FBC2, 0x8F51FBC2, 0x8F52FBC2, 0x8F53FBC2, 0x8F54FBC2, 0x8F55FBC2, 0x8F56FBC2, 0x8F57FBC2, 0x8F58FBC2, + 0x8F59FBC2, 0x8F5AFBC2, 0x8F5BFBC2, 0x8F5CFBC2, 0x8F5DFBC2, 0x8F5EFBC2, 0x8F5FFBC2, 0x8F60FBC2, 0x8F61FBC2, 0x8F62FBC2, 0x8F63FBC2, 0x8F64FBC2, 0x8F65FBC2, 0x8F66FBC2, 0x8F67FBC2, + 0x8F68FBC2, 0x8F69FBC2, 0x8F6AFBC2, 0x8F6BFBC2, 0x8F6CFBC2, 0x8F6DFBC2, 0x8F6EFBC2, 0x8F6FFBC2, 0x8F70FBC2, 0x8F71FBC2, 0x8F72FBC2, 0x8F73FBC2, 0x8F74FBC2, 0x8F75FBC2, 0x8F76FBC2, + 0x8F77FBC2, 0x8F78FBC2, 0x8F79FBC2, 0x8F7AFBC2, 0x8F7BFBC2, 0x8F7CFBC2, 0x8F7DFBC2, 0x8F7EFBC2, 0x8F7FFBC2, 0x8F80FBC2, 0x8F81FBC2, 0x8F82FBC2, 0x8F83FBC2, 0x8F84FBC2, 0x8F85FBC2, + 0x8F86FBC2, 0x8F87FBC2, 0x8F88FBC2, 0x8F89FBC2, 0x8F8AFBC2, 0x8F8BFBC2, 0x8F8CFBC2, 0x8F8DFBC2, 0x8F8EFBC2, 0x8F8FFBC2, 0x8F90FBC2, 0x8F91FBC2, 0x8F92FBC2, 0x8F93FBC2, 0x8F94FBC2, + 0x8F95FBC2, 0x8F96FBC2, 0x8F97FBC2, 0x8F98FBC2, 0x8F99FBC2, 0x8F9AFBC2, 0x8F9BFBC2, 0x8F9CFBC2, 0x8F9DFBC2, 0x8F9EFBC2, 0x8F9FFBC2, 0x8FA0FBC2, 0x8FA1FBC2, 0x8FA2FBC2, 0x8FA3FBC2, + 0x8FA4FBC2, 0x8FA5FBC2, 0x8FA6FBC2, 0x8FA7FBC2, 0x8FA8FBC2, 0x8FA9FBC2, 0x8FAAFBC2, 0x8FABFBC2, 0x8FACFBC2, 0x8FADFBC2, 0x8FAEFBC2, 0x8FAFFBC2, 0x8FB0FBC2, 0x8FB1FBC2, 0x8FB2FBC2, + 0x8FB3FBC2, 0x8FB4FBC2, 0x8FB5FBC2, 0x8FB6FBC2, 0x8FB7FBC2, 0x8FB8FBC2, 0x8FB9FBC2, 0x8FBAFBC2, 0x8FBBFBC2, 0x8FBCFBC2, 0x8FBDFBC2, 0x8FBEFBC2, 0x8FBFFBC2, 0x8FC0FBC2, 0x8FC1FBC2, + 0x8FC2FBC2, 0x8FC3FBC2, 0x8FC4FBC2, 0x8FC5FBC2, 0x8FC6FBC2, 0x8FC7FBC2, 0x8FC8FBC2, 0x8FC9FBC2, 0x8FCAFBC2, 0x8FCBFBC2, 0x8FCCFBC2, 0x8FCDFBC2, 0x8FCEFBC2, 0x8FCFFBC2, 0x8FD0FBC2, + 0x8FD1FBC2, 0x8FD2FBC2, 0x8FD3FBC2, 0x8FD4FBC2, 0x8FD5FBC2, 0x8FD6FBC2, 0x8FD7FBC2, 0x8FD8FBC2, 0x8FD9FBC2, 0x8FDAFBC2, 0x8FDBFBC2, 0x8FDCFBC2, 0x8FDDFBC2, 0x8FDEFBC2, 0x8FDFFBC2, + 0x8FE0FBC2, 0x8FE1FBC2, 0x8FE2FBC2, 0x8FE3FBC2, 0x8FE4FBC2, 0x8FE5FBC2, 0x8FE6FBC2, 0x8FE7FBC2, 0x8FE8FBC2, 0x8FE9FBC2, 0x8FEAFBC2, 0x8FEBFBC2, 0x8FECFBC2, 0x8FEDFBC2, 0x8FEEFBC2, + 0x8FEFFBC2, 0x8FF0FBC2, 0x8FF1FBC2, 0x8FF2FBC2, 0x8FF3FBC2, 0x8FF4FBC2, 0x8FF5FBC2, 0x8FF6FBC2, 0x8FF7FBC2, 0x8FF8FBC2, 0x8FF9FBC2, 0x8FFAFBC2, 0x8FFBFBC2, 0x8FFCFBC2, 0x8FFDFBC2, + 0x8FFEFBC2, 0x8FFFFBC2, 0x0, 0x0, 0x0, 0x2CF7, 0x2CF8, 0x2CC8, 0x2CC9, 0x2CCA, 0x2CCB, 0x2CCC, 0x2CCD, 0x2CCE, 0x2CCF, + 0x2CD0, 0x2CD1, 0x2CD2, 0x2CD3, 0x2CD4, 0x2CD5, 0x2CD6, 0x2CD7, 0x2CD8, 0x2CD9, 0x2CDA, 0x2CDB, 0x2CDC, 0x2CDD, 0x2CDE, + 0x2CDF, 0x2CE0, 0x2CE1, 0x2CE2, 0x2CE3, 0x2CE4, 0x2CE5, 0x2CE6, 0x2CE7, 0x2CE8, 0x2CE9, 0x2CEA, 0x2CEB, 0x2CEC, 0x2CED, + 0x2CEE, 0x2CEF, 0x2CF0, 0x2CF1, 0x2CF2, 0x2CF3, 0x2CF4, 0x2CF5, 0x2CF6, 0x2CF9, 0x2CFA, 0x2CFB, 0x2CFC, 0x2CFD, 0x2CFE, + 0x2CFF, 0x2D00, 0x2D01, 0x2D02, 0x2D03, 0x2D04, 0x2D05, 0x2D06, 0x2D07, 0x2D08, 0x2D09, 0x2D0A, 0x2D0B, 0x2AC, 0x2AD, + 0x433, 0x434, 0x435, 0x436, 0x437, 0x904EFBC2, 0x904FFBC2, 0x9050FBC2, 0x9051FBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0x1B73, 0x1B74, 0x1B75, 0x1B76, 0x1B77, 0x1B78, 0x1B79, 0x1B7A, 0x1B7B, 0x1B7C, 0x1B7D, 0x1C3D, + 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x9070FBC2, 0x9071FBC2, 0x9072FBC2, 0x9073FBC2, 0x9074FBC2, 0x9075FBC2, + 0x9076FBC2, 0x9077FBC2, 0x9078FBC2, 0x9079FBC2, 0x907AFBC2, 0x907BFBC2, 0x907CFBC2, 0x907DFBC2, 0x907EFBC2, 0x2D0C, 0x0, 0x0, 0x0, 0x29C6, 0x29C7, + 0x29C8, 0x29C9, 0x29CA, 0x29CB, 0x29CC, 0x29CD, 0x29CE, 0x29CF, 0x29D0, 0x29D1, 0x29D2, 0x29D3, 0x29D4, 0x29D5, 0x29D6, + 0x29D7, 0x29D8, 0x29D9, 0x29DA, 0x29DB, 0x29DC, 0x29DC, 0x29DD, 0x29DD, 0x29DE, 0x29DF, 0x29E0, 0x29E1, 0x29E2, 0x29E3, + 0x29E4, 0x29E5, 0x29E6, 0x29E7, 0x29E8, 0x29E9, 0x29EA, 0x29EB, 0x29E6, 0x29EC, 0x29ED, 0x29EE, 0x29EF, 0x29F0, 0x29F1, + 0x29F2, 0x29F3, 0x29F4, 0x29F5, 0x29F6, 0x29F7, 0x29F8, 0x29F9, 0x0, 0x44E, 0x44F, 0x0, 0x2E0, 0x2E1, 0x2AE, + 0x2AF, 0x90C2FBC2, 0x90C3FBC2, 0x90C4FBC2, 0x90C5FBC2, 0x90C6FBC2, 0x90C7FBC2, 0x90C8FBC2, 0x90C9FBC2, 0x90CAFBC2, 0x90CBFBC2, 0x90CCFBC2, 0x90CDFBC2, 0x90CEFBC2, 0x90CFFBC2, + 0x45AF, 0x45B0, 0x45B1, 0x45B2, 0x45B3, 0x45B4, 0x45B5, 0x45B6, 0x45B7, 0x45B8, 0x45B9, 0x45BA, 0x45BB, 0x45BC, 0x45BD, + 0x45BE, 0x45BF, 0x45C0, 0x45C1, 0x45C2, 0x45C3, 0x45C4, 0x45C5, 0x45C6, 0x45C7, 0x90E9FBC2, 0x90EAFBC2, 0x90EBFBC2, 0x90ECFBC2, 0x90EDFBC2, + 0x90EEFBC2, 0x90EFFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x90FAFBC2, 0x90FBFBC2, 0x90FCFBC2, + 0x90FDFBC2, 0x90FEFBC2, 0x90FFFBC2, 0x0, 0x0, 0x0, 0x30E4, 0x30E5, 0x30E6, 0x30E7, 0x30E8, 0x30E9, 0x30EA, 0x30EB, 0x30EC, + 0x30ED, 0x30EE, 0x30EF, 0x30F0, 0x30F1, 0x30F2, 0x30F3, 0x30F4, 0x30F5, 0x30F6, 0x30F7, 0x30F8, 0x30F9, 0x30FA, 0x30FB, + 0x30FC, 0x30FD, 0x30FE, 0x30FF, 0x3100, 0x3101, 0x3102, 0x3103, 0x3104, 0x3105, 0x3106, 0x3107, 0x3108, 0x3109, 0x310A, + 0x310B, 0x310C, 0x310D, 0x310E, 0x310F, 0x3110, 0x3111, 0x3112, 0x3113, 0x3114, 0x3115, 0x9135FBC2, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x2E2, 0x2B0, 0x2B1, 0x273, 0x9144FBC2, 0x9145FBC2, 0x9146FBC2, 0x9147FBC2, + 0x9148FBC2, 0x9149FBC2, 0x914AFBC2, 0x914BFBC2, 0x914CFBC2, 0x914DFBC2, 0x914EFBC2, 0x914FFBC2, 0x29FA, 0x29FB, 0x29FC, 0x29FD, 0x29FE, 0x29FF, 0x2A00, + 0x2A01, 0x2A02, 0x2A03, 0x2A04, 0x2A05, 0x2A06, 0x2A07, 0x2A08, 0x2A09, 0x2A0A, 0x2A0B, 0x2A0C, 0x2A0D, 0x2A0E, 0x2A0F, + 0x2A10, 0x2A11, 0x2A12, 0x2A13, 0x2A14, 0x2A15, 0x2A16, 0x2A17, 0x2A18, 0x2A19, 0x2A1B, 0x2A1C, 0x2A1D, 0x0, 0x450, + 0x451, 0x2A1A, 0x9177FBC2, 0x9178FBC2, 0x9179FBC2, 0x917AFBC2, 0x917BFBC2, 0x917CFBC2, 0x917DFBC2, 0x917EFBC2, 0x917FFBC2, 0x0, 0x0, 0x0, 0x2A20, + 0x2A21, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28, 0x2A29, 0x2A2A, 0x2A2B, 0x2A2C, 0x2A2D, 0x2A2E, 0x2A2F, + 0x2A30, 0x2A31, 0x2A32, 0x2A33, 0x2A34, 0x2A35, 0x2A36, 0x2A37, 0x2A38, 0x2A39, 0x2A3A, 0x2A3B, 0x2A3C, 0x2A3D, 0x2A3E, + 0x2A3F, 0x2A40, 0x2A41, 0x2A42, 0x2A43, 0x2A44, 0x2A45, 0x2A46, 0x2A47, 0x2A48, 0x2A49, 0x2A4A, 0x2A4B, 0x2A4C, 0x2A4D, + 0x2A4E, 0x2A4F, 0x2A54, 0x2A55, 0x2A56, 0x2A57, 0x2A58, 0x2A59, 0x2A5A, 0x2A5B, 0x2A5C, 0x2A5D, 0x2A5E, 0x2A5F, 0x2A60, + 0x2A61, 0x2A50, 0x2A51, 0x2A52, 0x2A1E, 0x2B2, 0x2B3, 0x453, 0x2E3, 0x454, 0x0, 0x0, 0x0, 0x452, 0x91CEFBC2, + 0x91CFFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x2A1F, 0x455, 0x2A53, 0x456, + 0x2E4, 0x2E5, 0x91E0FBC2, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1B8C, 0x1B8D, 0x1B8E, + 0x1B8F, 0x1B90, 0x1B91, 0x1B92, 0x1B93, 0x1B94, 0x1B95, 0x1B96, 0x91F5FBC2, 0x91F6FBC2, 0x91F7FBC2, 0x91F8FBC2, 0x91F9FBC2, 0x91FAFBC2, 0x91FBFBC2, + 0x91FCFBC2, 0x91FDFBC2, 0x91FEFBC2, 0x91FFFBC2, 0x2A62, 0x2A63, 0x2A64, 0x2A65, 0x2A66, 0x2A67, 0x2A68, 0x2A69, 0x2A6A, 0x2A6B, 0x2A6C, + 0x2A6D, 0x2A6E, 0x2A6F, 0x2A70, 0x2A71, 0x2A72, 0x2A73, 0x9212FBC2, 0x2A74, 0x2A75, 0x2A76, 0x2A77, 0x2A78, 0x2A79, 0x2A7A, + 0x2A7B, 0x2A7C, 0x2A7D, 0x2A7E, 0x2A7F, 0x2A80, 0x2A81, 0x2A82, 0x2A83, 0x2A84, 0x2A85, 0x2A86, 0x2A87, 0x2A88, 0x2A89, + 0x2A8A, 0x2A8B, 0x2A8C, 0x2A8D, 0x2A8E, 0x2A8F, 0x2A90, 0x2A91, 0x2A92, 0x2A93, 0x2A94, 0x0, 0x2A95, 0x0, 0x0, + 0x2B4, 0x2B5, 0x457, 0x458, 0x459, 0x45A, 0x0, 0x923FFBC2, 0x9240FBC2, 0x9241FBC2, 0x9242FBC2, 0x9243FBC2, 0x9244FBC2, 0x9245FBC2, 0x9246FBC2, + 0x9247FBC2, 0x9248FBC2, 0x9249FBC2, 0x924AFBC2, 0x924BFBC2, 0x924CFBC2, 0x924DFBC2, 0x924EFBC2, 0x924FFBC2, 0x9250FBC2, 0x9251FBC2, 0x9252FBC2, 0x9253FBC2, 0x9254FBC2, 0x9255FBC2, + 0x9256FBC2, 0x9257FBC2, 0x9258FBC2, 0x9259FBC2, 0x925AFBC2, 0x925BFBC2, 0x925CFBC2, 0x925DFBC2, 0x925EFBC2, 0x925FFBC2, 0x9260FBC2, 0x9261FBC2, 0x9262FBC2, 0x9263FBC2, 0x9264FBC2, + 0x9265FBC2, 0x9266FBC2, 0x9267FBC2, 0x9268FBC2, 0x9269FBC2, 0x926AFBC2, 0x926BFBC2, 0x926CFBC2, 0x926DFBC2, 0x926EFBC2, 0x926FFBC2, 0x9270FBC2, 0x9271FBC2, 0x9272FBC2, 0x9273FBC2, + 0x9274FBC2, 0x9275FBC2, 0x9276FBC2, 0x9277FBC2, 0x9278FBC2, 0x9279FBC2, 0x927AFBC2, 0x927BFBC2, 0x927CFBC2, 0x927DFBC2, 0x927EFBC2, 0x927FFBC2, 0x2ACF, 0x2AD0, 0x2AD1, + 0x2AD2, 0x2AD5, 0x2AD6, 0x2AD7, 0x9287FBC2, 0x2AD8, 0x9289FBC2, 0x2AD9, 0x2ADA, 0x2ADB, 0x2ADC, 0x928EFBC2, 0x2ADD, 0x2ADE, 0x2ADF, + 0x2AE0, 0x2AE1, 0x2AE2, 0x2AE3, 0x2AE4, 0x2AE5, 0x2AE6, 0x2AE7, 0x2AE8, 0x2AE9, 0x2AEA, 0x2AEB, 0x929EFBC2, 0x2AEC, 0x2AED, + 0x2AEE, 0x2AEF, 0x2AF0, 0x2AF1, 0x2AD3, 0x2AD4, 0x2AF2, 0x2AF3, 0x2E6, 0x92AAFBC2, 0x92ABFBC2, 0x92ACFBC2, 0x92ADFBC2, 0x92AEFBC2, 0x92AFFBC2, + 0x2A96, 0x2A97, 0x2A98, 0x2A99, 0x2A9A, 0x2A9B, 0x2A9C, 0x2A9D, 0x2A9E, 0x2A9F, 0x2AA0, 0x2AA1, 0x2AA2, 0x2AA3, 0x2AA4, + 0x2AA5, 0x2AA6, 0x2AA7, 0x2AA8, 0x2AA9, 0x2AAA, 0x2AAB, 0x2AAC, 0x2AAD, 0x2AAE, 0x2AAF, 0x2AB0, 0x2AB1, 0x2AB2, 0x2AB3, + 0x2AB4, 0x2AB5, 0x2AB6, 0x2AB7, 0x2AB8, 0x2AB9, 0x2ABA, 0x2ABB, 0x2ABC, 0x2ABD, 0x2ABE, 0x2ABF, 0x2AC0, 0x2AC1, 0x2AC2, + 0x2AC3, 0x2AC4, 0x0, 0x2AC5, 0x2AC6, 0x2AC7, 0x2AC8, 0x2AC9, 0x2ACA, 0x2ACB, 0x2ACC, 0x2ACD, 0x0, 0x2ACE, 0x92EBFBC2, + 0x92ECFBC2, 0x92EDFBC2, 0x92EEFBC2, 0x92EFFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x92FAFBC2, + 0x92FBFBC2, 0x92FCFBC2, 0x92FDFBC2, 0x92FEFBC2, 0x92FFFBC2, 0x0, 0x0, 0x0, 0x0, 0x9304FBC2, 0x2AF5, 0x2AF6, 0x2AF7, 0x2AF8, 0x2AF9, + 0x2AFA, 0x2AFB, 0x2AFD, 0x930DFBC2, 0x930EFBC2, 0x2AFF, 0x2B00, 0x9311FBC2, 0x9312FBC2, 0x2B01, 0x2B02, 0x2B03, 0x2B04, 0x2B05, 0x2B06, + 0x2B07, 0x2B08, 0x2B09, 0x2B0A, 0x2B0B, 0x2B0C, 0x2B0D, 0x2B0E, 0x2B0F, 0x2B10, 0x2B11, 0x2B12, 0x2B13, 0x2B14, 0x2B15, + 0x2B16, 0x9329FBC2, 0x2B17, 0x2B18, 0x2B19, 0x2B1A, 0x2B1B, 0x2B1C, 0x2B1D, 0x9331FBC2, 0x2B1E, 0x2B1F, 0x9334FBC2, 0x2B20, 0x2B21, + 0x2B22, 0x2B23, 0x2B24, 0x933AFBC2, 0x933BFBC2, 0x0, 0x2B25, 0x2B28, 0x2B29, 0x2B2A, 0x2B2B, 0x2B2C, 0x2B2D, 0x2B2E, 0x9345FBC2, + 0x9346FBC2, 0x2B31, 0x2B32, 0x9349FBC2, 0x934AFBC2, 0x2B33, 0x2B34, 0x2B35, 0x934EFBC2, 0x934FFBC2, 0x2AF4, 0x9351FBC2, 0x9352FBC2, 0x9353FBC2, 0x9354FBC2, + 0x9355FBC2, 0x9356FBC2, 0x2B36, 0x9358FBC2, 0x9359FBC2, 0x935AFBC2, 0x935BFBC2, 0x935CFBC2, 0x2B37, 0x2B26, 0x2B27, 0x2AFC, 0x2AFE, 0x2B2F, 0x2B30, + 0x9364FBC2, 0x9365FBC2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x936DFBC2, 0x936EFBC2, 0x936FFBC2, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x9375FBC2, 0x9376FBC2, 0x9377FBC2, 0x9378FBC2, 0x9379FBC2, 0x937AFBC2, 0x937BFBC2, 0x937CFBC2, 0x937DFBC2, 0x937EFBC2, 0x937FFBC2, 0x9380FBC2, 0x9381FBC2, + 0x9382FBC2, 0x9383FBC2, 0x9384FBC2, 0x9385FBC2, 0x9386FBC2, 0x9387FBC2, 0x9388FBC2, 0x9389FBC2, 0x938AFBC2, 0x938BFBC2, 0x938CFBC2, 0x938DFBC2, 0x938EFBC2, 0x938FFBC2, 0x9390FBC2, + 0x9391FBC2, 0x9392FBC2, 0x9393FBC2, 0x9394FBC2, 0x9395FBC2, 0x9396FBC2, 0x9397FBC2, 0x9398FBC2, 0x9399FBC2, 0x939AFBC2, 0x939BFBC2, 0x939CFBC2, 0x939DFBC2, 0x939EFBC2, 0x939FFBC2, + 0x93A0FBC2, 0x93A1FBC2, 0x93A2FBC2, 0x93A3FBC2, 0x93A4FBC2, 0x93A5FBC2, 0x93A6FBC2, 0x93A7FBC2, 0x93A8FBC2, 0x93A9FBC2, 0x93AAFBC2, 0x93ABFBC2, 0x93ACFBC2, 0x93ADFBC2, 0x93AEFBC2, + 0x93AFFBC2, 0x93B0FBC2, 0x93B1FBC2, 0x93B2FBC2, 0x93B3FBC2, 0x93B4FBC2, 0x93B5FBC2, 0x93B6FBC2, 0x93B7FBC2, 0x93B8FBC2, 0x93B9FBC2, 0x93BAFBC2, 0x93BBFBC2, 0x93BCFBC2, 0x93BDFBC2, + 0x93BEFBC2, 0x93BFFBC2, 0x93C0FBC2, 0x93C1FBC2, 0x93C2FBC2, 0x93C3FBC2, 0x93C4FBC2, 0x93C5FBC2, 0x93C6FBC2, 0x93C7FBC2, 0x93C8FBC2, 0x93C9FBC2, 0x93CAFBC2, 0x93CBFBC2, 0x93CCFBC2, + 0x93CDFBC2, 0x93CEFBC2, 0x93CFFBC2, 0x93D0FBC2, 0x93D1FBC2, 0x93D2FBC2, 0x93D3FBC2, 0x93D4FBC2, 0x93D5FBC2, 0x93D6FBC2, 0x93D7FBC2, 0x93D8FBC2, 0x93D9FBC2, 0x93DAFBC2, 0x93DBFBC2, + 0x93DCFBC2, 0x93DDFBC2, 0x93DEFBC2, 0x93DFFBC2, 0x93E0FBC2, 0x93E1FBC2, 0x93E2FBC2, 0x93E3FBC2, 0x93E4FBC2, 0x93E5FBC2, 0x93E6FBC2, 0x93E7FBC2, 0x93E8FBC2, 0x93E9FBC2, 0x93EAFBC2, + 0x93EBFBC2, 0x93ECFBC2, 0x93EDFBC2, 0x93EEFBC2, 0x93EFFBC2, 0x93F0FBC2, 0x93F1FBC2, 0x93F2FBC2, 0x93F3FBC2, 0x93F4FBC2, 0x93F5FBC2, 0x93F6FBC2, 0x93F7FBC2, 0x93F8FBC2, 0x93F9FBC2, + 0x93FAFBC2, 0x93FBFBC2, 0x93FCFBC2, 0x93FDFBC2, 0x93FEFBC2, 0x93FFFBC2, 0x2B3A, 0x2B3B, 0x2B3C, 0x2B3D, 0x2B3E, 0x2B3F, 0x2B40, 0x2B41, 0x2B42, + 0x2B43, 0x2B44, 0x2B45, 0x2B46, 0x2B47, 0x2B48, 0x2B49, 0x2B4A, 0x2B4B, 0x2B4C, 0x2B4D, 0x2B4E, 0x2B4F, 0x2B50, 0x2B51, + 0x2B52, 0x2B53, 0x2B54, 0x2B55, 0x2B56, 0x2B57, 0x2B58, 0x2B59, 0x2B5A, 0x2B5B, 0x2B5C, 0x2B5D, 0x2B5E, 0x2B5F, 0x2B60, + 0x2B61, 0x2B62, 0x2B63, 0x2B64, 0x2B65, 0x2B66, 0x2B67, 0x2B68, 0x2B69, 0x2B6A, 0x2B6B, 0x2B6C, 0x2B6D, 0x2B6E, 0x2B71, + 0x2B72, 0x2B73, 0x2B74, 0x2B75, 0x2B76, 0x2B77, 0x2B78, 0x2B79, 0x2B7A, 0x2B7B, 0x2B7C, 0x2B7D, 0x2B7E, 0x0, 0x0, + 0x0, 0x0, 0x2B6F, 0x2B70, 0x2B38, 0x2B39, 0x2B6, 0x2B7, 0x45B, 0x45C, 0x45D, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, + 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x945AFBC2, 0x45E, 0x945CFBC2, 0x45F, 0x945EFBC2, 0x945FFBC2, 0x9460FBC2, 0x9461FBC2, 0x9462FBC2, + 0x9463FBC2, 0x9464FBC2, 0x9465FBC2, 0x9466FBC2, 0x9467FBC2, 0x9468FBC2, 0x9469FBC2, 0x946AFBC2, 0x946BFBC2, 0x946CFBC2, 0x946DFBC2, 0x946EFBC2, 0x946FFBC2, 0x9470FBC2, 0x9471FBC2, + 0x9472FBC2, 0x9473FBC2, 0x9474FBC2, 0x9475FBC2, 0x9476FBC2, 0x9477FBC2, 0x9478FBC2, 0x9479FBC2, 0x947AFBC2, 0x947BFBC2, 0x947CFBC2, 0x947DFBC2, 0x947EFBC2, 0x947FFBC2, 0x2B80, + 0x2B81, 0x2B82, 0x2B83, 0x2B84, 0x2B85, 0x2B86, 0x2B87, 0x2B88, 0x2B89, 0x2B8A, 0x2B8B, 0x2B8C, 0x2B8D, 0x2B8E, 0x2B8F, + 0x2B90, 0x2B91, 0x2B92, 0x2B93, 0x2B94, 0x2B95, 0x2B96, 0x2B97, 0x2B98, 0x2B99, 0x2B9A, 0x2B9B, 0x2B9C, 0x2B9D, 0x2B9E, + 0x2B9F, 0x2BA0, 0x2BA1, 0x2BA2, 0x2BA3, 0x2BA4, 0x2BA5, 0x2BA6, 0x2BA7, 0x2BA8, 0x2BA9, 0x2BAA, 0x2BAB, 0x2BAC, 0x2BAD, + 0x2BAE, 0x2BAF, 0x2BB2, 0x2BB3, 0x2BB4, 0x2BB5, 0x2BB6, 0x2BB7, 0x2BB8, 0x2BB9, 0x2BBA, 0x2BBB, 0x2BBC, 0x2BBD, 0x2BBE, + 0x2BBF, 0x2BC0, 0x0, 0x0, 0x0, 0x2BC1, 0x0, 0x2BB0, 0x2BB1, 0x460, 0x2B7F, 0x94C8FBC2, 0x94C9FBC2, 0x94CAFBC2, 0x94CBFBC2, + 0x94CCFBC2, 0x94CDFBC2, 0x94CEFBC2, 0x94CFFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x94DAFBC2, + 0x94DBFBC2, 0x94DCFBC2, 0x94DDFBC2, 0x94DEFBC2, 0x94DFFBC2, 0x94E0FBC2, 0x94E1FBC2, 0x94E2FBC2, 0x94E3FBC2, 0x94E4FBC2, 0x94E5FBC2, 0x94E6FBC2, 0x94E7FBC2, 0x94E8FBC2, 0x94E9FBC2, + 0x94EAFBC2, 0x94EBFBC2, 0x94ECFBC2, 0x94EDFBC2, 0x94EEFBC2, 0x94EFFBC2, 0x94F0FBC2, 0x94F1FBC2, 0x94F2FBC2, 0x94F3FBC2, 0x94F4FBC2, 0x94F5FBC2, 0x94F6FBC2, 0x94F7FBC2, 0x94F8FBC2, + 0x94F9FBC2, 0x94FAFBC2, 0x94FBFBC2, 0x94FCFBC2, 0x94FDFBC2, 0x94FEFBC2, 0x94FFFBC2, 0x9500FBC2, 0x9501FBC2, 0x9502FBC2, 0x9503FBC2, 0x9504FBC2, 0x9505FBC2, 0x9506FBC2, 0x9507FBC2, + 0x9508FBC2, 0x9509FBC2, 0x950AFBC2, 0x950BFBC2, 0x950CFBC2, 0x950DFBC2, 0x950EFBC2, 0x950FFBC2, 0x9510FBC2, 0x9511FBC2, 0x9512FBC2, 0x9513FBC2, 0x9514FBC2, 0x9515FBC2, 0x9516FBC2, + 0x9517FBC2, 0x9518FBC2, 0x9519FBC2, 0x951AFBC2, 0x951BFBC2, 0x951CFBC2, 0x951DFBC2, 0x951EFBC2, 0x951FFBC2, 0x9520FBC2, 0x9521FBC2, 0x9522FBC2, 0x9523FBC2, 0x9524FBC2, 0x9525FBC2, + 0x9526FBC2, 0x9527FBC2, 0x9528FBC2, 0x9529FBC2, 0x952AFBC2, 0x952BFBC2, 0x952CFBC2, 0x952DFBC2, 0x952EFBC2, 0x952FFBC2, 0x9530FBC2, 0x9531FBC2, 0x9532FBC2, 0x9533FBC2, 0x9534FBC2, + 0x9535FBC2, 0x9536FBC2, 0x9537FBC2, 0x9538FBC2, 0x9539FBC2, 0x953AFBC2, 0x953BFBC2, 0x953CFBC2, 0x953DFBC2, 0x953EFBC2, 0x953FFBC2, 0x9540FBC2, 0x9541FBC2, 0x9542FBC2, 0x9543FBC2, + 0x9544FBC2, 0x9545FBC2, 0x9546FBC2, 0x9547FBC2, 0x9548FBC2, 0x9549FBC2, 0x954AFBC2, 0x954BFBC2, 0x954CFBC2, 0x954DFBC2, 0x954EFBC2, 0x954FFBC2, 0x9550FBC2, 0x9551FBC2, 0x9552FBC2, + 0x9553FBC2, 0x9554FBC2, 0x9555FBC2, 0x9556FBC2, 0x9557FBC2, 0x9558FBC2, 0x9559FBC2, 0x955AFBC2, 0x955BFBC2, 0x955CFBC2, 0x955DFBC2, 0x955EFBC2, 0x955FFBC2, 0x9560FBC2, 0x9561FBC2, + 0x9562FBC2, 0x9563FBC2, 0x9564FBC2, 0x9565FBC2, 0x9566FBC2, 0x9567FBC2, 0x9568FBC2, 0x9569FBC2, 0x956AFBC2, 0x956BFBC2, 0x956CFBC2, 0x956DFBC2, 0x956EFBC2, 0x956FFBC2, 0x9570FBC2, + 0x9571FBC2, 0x9572FBC2, 0x9573FBC2, 0x9574FBC2, 0x9575FBC2, 0x9576FBC2, 0x9577FBC2, 0x9578FBC2, 0x9579FBC2, 0x957AFBC2, 0x957BFBC2, 0x957CFBC2, 0x957DFBC2, 0x957EFBC2, 0x957FFBC2, + 0x2BC2, 0x2BC3, 0x2BC4, 0x2BC5, 0x2BC6, 0x2BC7, 0x2BC8, 0x2BC9, 0x2BCA, 0x2BCB, 0x2BCC, 0x2BCD, 0x2BCE, 0x2BCF, 0x2BD0, + 0x2BD1, 0x2BD2, 0x2BD3, 0x2BD4, 0x2BD5, 0x2BD6, 0x2BD7, 0x2BD8, 0x2BD9, 0x2BDA, 0x2BDB, 0x2BDC, 0x2BDD, 0x2BDE, 0x2BDF, + 0x2BE0, 0x2BE1, 0x2BE2, 0x2BE3, 0x2BE4, 0x2BE5, 0x2BE6, 0x2BE7, 0x2BE8, 0x2BE9, 0x2BEA, 0x2BEB, 0x2BEC, 0x2BED, 0x2BEE, + 0x2BEF, 0x2BF0, 0x2BF1, 0x2BF2, 0x2BF3, 0x2BF4, 0x2BF5, 0x2BF6, 0x2BF7, 0x95B6FBC2, 0x95B7FBC2, 0x2BF8, 0x2BF9, 0x2BFA, 0x2BFB, + 0x0, 0x0, 0x0, 0x2BFC, 0x0, 0x461, 0x2B8, 0x2B9, 0x462, 0x463, 0x464, 0x465, 0x466, 0x467, 0x468, + 0x469, 0x46A, 0x46B, 0x46C, 0x46D, 0x46E, 0x46F, 0x470, 0x471, 0x472, 0x473, 0x474, 0x475, 0x2BC4, 0x2BC4, + 0x2BC5, 0x2BC6, 0x2BF4, 0x2BF5, 0x95DEFBC2, 0x95DFFBC2, 0x95E0FBC2, 0x95E1FBC2, 0x95E2FBC2, 0x95E3FBC2, 0x95E4FBC2, 0x95E5FBC2, 0x95E6FBC2, 0x95E7FBC2, 0x95E8FBC2, + 0x95E9FBC2, 0x95EAFBC2, 0x95EBFBC2, 0x95ECFBC2, 0x95EDFBC2, 0x95EEFBC2, 0x95EFFBC2, 0x95F0FBC2, 0x95F1FBC2, 0x95F2FBC2, 0x95F3FBC2, 0x95F4FBC2, 0x95F5FBC2, 0x95F6FBC2, 0x95F7FBC2, + 0x95F8FBC2, 0x95F9FBC2, 0x95FAFBC2, 0x95FBFBC2, 0x95FCFBC2, 0x95FDFBC2, 0x95FEFBC2, 0x95FFFBC2, 0x2BFD, 0x2BFE, 0x2BFF, 0x2C00, 0x2C01, 0x2C02, 0x2C03, + 0x2C04, 0x2C05, 0x2C06, 0x2C07, 0x2C08, 0x2C09, 0x2C0A, 0x2C0B, 0x2C0C, 0x2C0D, 0x2C0E, 0x2C0F, 0x2C10, 0x2C11, 0x2C12, + 0x2C13, 0x2C14, 0x2C15, 0x2C16, 0x2C17, 0x2C18, 0x2C19, 0x2C1A, 0x2C1B, 0x2C1C, 0x2C1D, 0x2C1E, 0x2C1F, 0x2C20, 0x2C21, + 0x2C22, 0x2C23, 0x2C24, 0x2C25, 0x2C26, 0x2C27, 0x2C28, 0x2C29, 0x2C2A, 0x2C2B, 0x2C2C, 0x2C2D, 0x2C2E, 0x2C2F, 0x2C30, + 0x2C31, 0x2C32, 0x2C33, 0x2C34, 0x2C35, 0x2C36, 0x2C37, 0x2C38, 0x2C39, 0x0, 0x0, 0x2C3A, 0x0, 0x2BA, 0x2BB, + 0x476, 0x2C3B, 0x9645FBC2, 0x9646FBC2, 0x9647FBC2, 0x9648FBC2, 0x9649FBC2, 0x964AFBC2, 0x964BFBC2, 0x964CFBC2, 0x964DFBC2, 0x964EFBC2, 0x964FFBC2, 0x1C3D, 0x1C3E, + 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x965AFBC2, 0x965BFBC2, 0x965CFBC2, 0x965DFBC2, 0x965EFBC2, 0x965FFBC2, 0x3DA, + 0x3DB, 0x3DC, 0x3DD, 0x3DE, 0x3DF, 0x3E0, 0x3E1, 0x3E2, 0x3E3, 0x3E4, 0x3E5, 0x3E6, 0x966DFBC2, 0x966EFBC2, 0x966FFBC2, + 0x9670FBC2, 0x9671FBC2, 0x9672FBC2, 0x9673FBC2, 0x9674FBC2, 0x9675FBC2, 0x9676FBC2, 0x9677FBC2, 0x9678FBC2, 0x9679FBC2, 0x967AFBC2, 0x967BFBC2, 0x967CFBC2, 0x967DFBC2, 0x967EFBC2, + 0x967FFBC2, 0x2C3C, 0x2C3D, 0x2C3E, 0x2C3F, 0x2C40, 0x2C41, 0x2C42, 0x2C43, 0x2C44, 0x2C45, 0x2C49, 0x2C4A, 0x2C4B, 0x2C4C, + 0x2C4D, 0x2C4E, 0x2C4F, 0x2C50, 0x2C51, 0x2C52, 0x2C53, 0x2C54, 0x2C55, 0x2C56, 0x2C57, 0x2C58, 0x2C59, 0x2C5A, 0x2C5B, + 0x2C5C, 0x2C5D, 0x2C5E, 0x2C5F, 0x2C60, 0x2C61, 0x2C62, 0x2C63, 0x2C64, 0x2C65, 0x2C47, 0x2C46, 0x2C48, 0x2C66, 0x0, + 0x0, 0x2C67, 0x2C68, 0x2C69, 0x2C6A, 0x2C6B, 0x2C6C, 0x2C6D, 0x2C6E, 0x2C6F, 0x2C70, 0x0, 0x96B8FBC2, 0x96B9FBC2, 0x96BAFBC2, + 0x96BBFBC2, 0x96BCFBC2, 0x96BDFBC2, 0x96BEFBC2, 0x96BFFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x96CAFBC2, 0x96CBFBC2, 0x96CCFBC2, 0x96CDFBC2, 0x96CEFBC2, 0x96CFFBC2, 0x96D0FBC2, 0x96D1FBC2, 0x96D2FBC2, 0x96D3FBC2, 0x96D4FBC2, 0x96D5FBC2, 0x96D6FBC2, 0x96D7FBC2, 0x96D8FBC2, + 0x96D9FBC2, 0x96DAFBC2, 0x96DBFBC2, 0x96DCFBC2, 0x96DDFBC2, 0x96DEFBC2, 0x96DFFBC2, 0x96E0FBC2, 0x96E1FBC2, 0x96E2FBC2, 0x96E3FBC2, 0x96E4FBC2, 0x96E5FBC2, 0x96E6FBC2, 0x96E7FBC2, + 0x96E8FBC2, 0x96E9FBC2, 0x96EAFBC2, 0x96EBFBC2, 0x96ECFBC2, 0x96EDFBC2, 0x96EEFBC2, 0x96EFFBC2, 0x96F0FBC2, 0x96F1FBC2, 0x96F2FBC2, 0x96F3FBC2, 0x96F4FBC2, 0x96F5FBC2, 0x96F6FBC2, + 0x96F7FBC2, 0x96F8FBC2, 0x96F9FBC2, 0x96FAFBC2, 0x96FBFBC2, 0x96FCFBC2, 0x96FDFBC2, 0x96FEFBC2, 0x96FFFBC2, 0x2C71, 0x2C72, 0x2C73, 0x2C74, 0x2C75, 0x2C75, + 0x2C76, 0x2C77, 0x2C78, 0x2C79, 0x2C7A, 0x2C7B, 0x2C7C, 0x2C7D, 0x2C7E, 0x2C7F, 0x2C80, 0x2C81, 0x2C82, 0x2C83, 0x2C84, + 0x2C85, 0x2C85, 0x2C86, 0x2C87, 0x2C88, 0x971AFBC2, 0x971BFBC2, 0x971CFBC2, 0x2C95, 0x2C96, 0x2C97, 0x2C89, 0x2C8A, 0x2C8B, 0x2C8C, + 0x2C8D, 0x2C8E, 0x2C8F, 0x2C90, 0x2C91, 0x2C92, 0x2C93, 0x2C94, 0x972CFBC2, 0x972DFBC2, 0x972EFBC2, 0x972FFBC2, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1B71, 0x1B72, 0x2BC, 0x2BD, 0x2E7, 0x550, 0x9740FBC2, 0x9741FBC2, + 0x9742FBC2, 0x9743FBC2, 0x9744FBC2, 0x9745FBC2, 0x9746FBC2, 0x9747FBC2, 0x9748FBC2, 0x9749FBC2, 0x974AFBC2, 0x974BFBC2, 0x974CFBC2, 0x974DFBC2, 0x974EFBC2, 0x974FFBC2, 0x9750FBC2, + 0x9751FBC2, 0x9752FBC2, 0x9753FBC2, 0x9754FBC2, 0x9755FBC2, 0x9756FBC2, 0x9757FBC2, 0x9758FBC2, 0x9759FBC2, 0x975AFBC2, 0x975BFBC2, 0x975CFBC2, 0x975DFBC2, 0x975EFBC2, 0x975FFBC2, + 0x9760FBC2, 0x9761FBC2, 0x9762FBC2, 0x9763FBC2, 0x9764FBC2, 0x9765FBC2, 0x9766FBC2, 0x9767FBC2, 0x9768FBC2, 0x9769FBC2, 0x976AFBC2, 0x976BFBC2, 0x976CFBC2, 0x976DFBC2, 0x976EFBC2, + 0x976FFBC2, 0x9770FBC2, 0x9771FBC2, 0x9772FBC2, 0x9773FBC2, 0x9774FBC2, 0x9775FBC2, 0x9776FBC2, 0x9777FBC2, 0x9778FBC2, 0x9779FBC2, 0x977AFBC2, 0x977BFBC2, 0x977CFBC2, 0x977DFBC2, + 0x977EFBC2, 0x977FFBC2, 0x9780FBC2, 0x9781FBC2, 0x9782FBC2, 0x9783FBC2, 0x9784FBC2, 0x9785FBC2, 0x9786FBC2, 0x9787FBC2, 0x9788FBC2, 0x9789FBC2, 0x978AFBC2, 0x978BFBC2, 0x978CFBC2, + 0x978DFBC2, 0x978EFBC2, 0x978FFBC2, 0x9790FBC2, 0x9791FBC2, 0x9792FBC2, 0x9793FBC2, 0x9794FBC2, 0x9795FBC2, 0x9796FBC2, 0x9797FBC2, 0x9798FBC2, 0x9799FBC2, 0x979AFBC2, 0x979BFBC2, + 0x979CFBC2, 0x979DFBC2, 0x979EFBC2, 0x979FFBC2, 0x97A0FBC2, 0x97A1FBC2, 0x97A2FBC2, 0x97A3FBC2, 0x97A4FBC2, 0x97A5FBC2, 0x97A6FBC2, 0x97A7FBC2, 0x97A8FBC2, 0x97A9FBC2, 0x97AAFBC2, + 0x97ABFBC2, 0x97ACFBC2, 0x97ADFBC2, 0x97AEFBC2, 0x97AFFBC2, 0x97B0FBC2, 0x97B1FBC2, 0x97B2FBC2, 0x97B3FBC2, 0x97B4FBC2, 0x97B5FBC2, 0x97B6FBC2, 0x97B7FBC2, 0x97B8FBC2, 0x97B9FBC2, + 0x97BAFBC2, 0x97BBFBC2, 0x97BCFBC2, 0x97BDFBC2, 0x97BEFBC2, 0x97BFFBC2, 0x97C0FBC2, 0x97C1FBC2, 0x97C2FBC2, 0x97C3FBC2, 0x97C4FBC2, 0x97C5FBC2, 0x97C6FBC2, 0x97C7FBC2, 0x97C8FBC2, + 0x97C9FBC2, 0x97CAFBC2, 0x97CBFBC2, 0x97CCFBC2, 0x97CDFBC2, 0x97CEFBC2, 0x97CFFBC2, 0x97D0FBC2, 0x97D1FBC2, 0x97D2FBC2, 0x97D3FBC2, 0x97D4FBC2, 0x97D5FBC2, 0x97D6FBC2, 0x97D7FBC2, + 0x97D8FBC2, 0x97D9FBC2, 0x97DAFBC2, 0x97DBFBC2, 0x97DCFBC2, 0x97DDFBC2, 0x97DEFBC2, 0x97DFFBC2, 0x97E0FBC2, 0x97E1FBC2, 0x97E2FBC2, 0x97E3FBC2, 0x97E4FBC2, 0x97E5FBC2, 0x97E6FBC2, + 0x97E7FBC2, 0x97E8FBC2, 0x97E9FBC2, 0x97EAFBC2, 0x97EBFBC2, 0x97ECFBC2, 0x97EDFBC2, 0x97EEFBC2, 0x97EFFBC2, 0x97F0FBC2, 0x97F1FBC2, 0x97F2FBC2, 0x97F3FBC2, 0x97F4FBC2, 0x97F5FBC2, + 0x97F6FBC2, 0x97F7FBC2, 0x97F8FBC2, 0x97F9FBC2, 0x97FAFBC2, 0x97FBFBC2, 0x97FCFBC2, 0x97FDFBC2, 0x97FEFBC2, 0x97FFFBC2, 0x9800FBC2, 0x9801FBC2, 0x9802FBC2, 0x9803FBC2, 0x9804FBC2, + 0x9805FBC2, 0x9806FBC2, 0x9807FBC2, 0x9808FBC2, 0x9809FBC2, 0x980AFBC2, 0x980BFBC2, 0x980CFBC2, 0x980DFBC2, 0x980EFBC2, 0x980FFBC2, 0x9810FBC2, 0x9811FBC2, 0x9812FBC2, 0x9813FBC2, + 0x9814FBC2, 0x9815FBC2, 0x9816FBC2, 0x9817FBC2, 0x9818FBC2, 0x9819FBC2, 0x981AFBC2, 0x981BFBC2, 0x981CFBC2, 0x981DFBC2, 0x981EFBC2, 0x981FFBC2, 0x9820FBC2, 0x9821FBC2, 0x9822FBC2, + 0x9823FBC2, 0x9824FBC2, 0x9825FBC2, 0x9826FBC2, 0x9827FBC2, 0x9828FBC2, 0x9829FBC2, 0x982AFBC2, 0x982BFBC2, 0x982CFBC2, 0x982DFBC2, 0x982EFBC2, 0x982FFBC2, 0x9830FBC2, 0x9831FBC2, + 0x9832FBC2, 0x9833FBC2, 0x9834FBC2, 0x9835FBC2, 0x9836FBC2, 0x9837FBC2, 0x9838FBC2, 0x9839FBC2, 0x983AFBC2, 0x983BFBC2, 0x983CFBC2, 0x983DFBC2, 0x983EFBC2, 0x983FFBC2, 0x9840FBC2, + 0x9841FBC2, 0x9842FBC2, 0x9843FBC2, 0x9844FBC2, 0x9845FBC2, 0x9846FBC2, 0x9847FBC2, 0x9848FBC2, 0x9849FBC2, 0x984AFBC2, 0x984BFBC2, 0x984CFBC2, 0x984DFBC2, 0x984EFBC2, 0x984FFBC2, + 0x9850FBC2, 0x9851FBC2, 0x9852FBC2, 0x9853FBC2, 0x9854FBC2, 0x9855FBC2, 0x9856FBC2, 0x9857FBC2, 0x9858FBC2, 0x9859FBC2, 0x985AFBC2, 0x985BFBC2, 0x985CFBC2, 0x985DFBC2, 0x985EFBC2, + 0x985FFBC2, 0x9860FBC2, 0x9861FBC2, 0x9862FBC2, 0x9863FBC2, 0x9864FBC2, 0x9865FBC2, 0x9866FBC2, 0x9867FBC2, 0x9868FBC2, 0x9869FBC2, 0x986AFBC2, 0x986BFBC2, 0x986CFBC2, 0x986DFBC2, + 0x986EFBC2, 0x986FFBC2, 0x9870FBC2, 0x9871FBC2, 0x9872FBC2, 0x9873FBC2, 0x9874FBC2, 0x9875FBC2, 0x9876FBC2, 0x9877FBC2, 0x9878FBC2, 0x9879FBC2, 0x987AFBC2, 0x987BFBC2, 0x987CFBC2, + 0x987DFBC2, 0x987EFBC2, 0x987FFBC2, 0x9880FBC2, 0x9881FBC2, 0x9882FBC2, 0x9883FBC2, 0x9884FBC2, 0x9885FBC2, 0x9886FBC2, 0x9887FBC2, 0x9888FBC2, 0x9889FBC2, 0x988AFBC2, 0x988BFBC2, + 0x988CFBC2, 0x988DFBC2, 0x988EFBC2, 0x988FFBC2, 0x9890FBC2, 0x9891FBC2, 0x9892FBC2, 0x9893FBC2, 0x9894FBC2, 0x9895FBC2, 0x9896FBC2, 0x9897FBC2, 0x9898FBC2, 0x9899FBC2, 0x989AFBC2, + 0x989BFBC2, 0x989CFBC2, 0x989DFBC2, 0x989EFBC2, 0x989FFBC2, 0x42FC, 0x42FD, 0x42FE, 0x42FF, 0x4300, 0x4301, 0x4302, 0x4303, 0x4304, 0x4305, + 0x4306, 0x4307, 0x4308, 0x4309, 0x430A, 0x430B, 0x430C, 0x430D, 0x430E, 0x430F, 0x4310, 0x4311, 0x4312, 0x4313, 0x4314, + 0x4315, 0x4316, 0x4317, 0x4318, 0x4319, 0x431A, 0x431B, 0x42FC, 0x42FD, 0x42FE, 0x42FF, 0x4300, 0x4301, 0x4302, 0x4303, + 0x4304, 0x4305, 0x4306, 0x4307, 0x4308, 0x4309, 0x430A, 0x430B, 0x430C, 0x430D, 0x430E, 0x430F, 0x4310, 0x4311, 0x4312, + 0x4313, 0x4314, 0x4315, 0x4316, 0x4317, 0x4318, 0x4319, 0x431A, 0x431B, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, + 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1B97, 0x1B98, 0x1B99, 0x1B9A, 0x1B9B, 0x1B9C, 0x1B9D, 0x1B9E, 0x1B9F, 0x98F3FBC2, 0x98F4FBC2, + 0x98F5FBC2, 0x98F6FBC2, 0x98F7FBC2, 0x98F8FBC2, 0x98F9FBC2, 0x98FAFBC2, 0x98FBFBC2, 0x98FCFBC2, 0x98FDFBC2, 0x98FEFBC2, 0x42FB, 0x9900FBC2, 0x9901FBC2, 0x9902FBC2, 0x9903FBC2, + 0x9904FBC2, 0x9905FBC2, 0x9906FBC2, 0x9907FBC2, 0x9908FBC2, 0x9909FBC2, 0x990AFBC2, 0x990BFBC2, 0x990CFBC2, 0x990DFBC2, 0x990EFBC2, 0x990FFBC2, 0x9910FBC2, 0x9911FBC2, 0x9912FBC2, + 0x9913FBC2, 0x9914FBC2, 0x9915FBC2, 0x9916FBC2, 0x9917FBC2, 0x9918FBC2, 0x9919FBC2, 0x991AFBC2, 0x991BFBC2, 0x991CFBC2, 0x991DFBC2, 0x991EFBC2, 0x991FFBC2, 0x9920FBC2, 0x9921FBC2, + 0x9922FBC2, 0x9923FBC2, 0x9924FBC2, 0x9925FBC2, 0x9926FBC2, 0x9927FBC2, 0x9928FBC2, 0x9929FBC2, 0x992AFBC2, 0x992BFBC2, 0x992CFBC2, 0x992DFBC2, 0x992EFBC2, 0x992FFBC2, 0x9930FBC2, + 0x9931FBC2, 0x9932FBC2, 0x9933FBC2, 0x9934FBC2, 0x9935FBC2, 0x9936FBC2, 0x9937FBC2, 0x9938FBC2, 0x9939FBC2, 0x993AFBC2, 0x993BFBC2, 0x993CFBC2, 0x993DFBC2, 0x993EFBC2, 0x993FFBC2, + 0x9940FBC2, 0x9941FBC2, 0x9942FBC2, 0x9943FBC2, 0x9944FBC2, 0x9945FBC2, 0x9946FBC2, 0x9947FBC2, 0x9948FBC2, 0x9949FBC2, 0x994AFBC2, 0x994BFBC2, 0x994CFBC2, 0x994DFBC2, 0x994EFBC2, + 0x994FFBC2, 0x9950FBC2, 0x9951FBC2, 0x9952FBC2, 0x9953FBC2, 0x9954FBC2, 0x9955FBC2, 0x9956FBC2, 0x9957FBC2, 0x9958FBC2, 0x9959FBC2, 0x995AFBC2, 0x995BFBC2, 0x995CFBC2, 0x995DFBC2, + 0x995EFBC2, 0x995FFBC2, 0x9960FBC2, 0x9961FBC2, 0x9962FBC2, 0x9963FBC2, 0x9964FBC2, 0x9965FBC2, 0x9966FBC2, 0x9967FBC2, 0x9968FBC2, 0x9969FBC2, 0x996AFBC2, 0x996BFBC2, 0x996CFBC2, + 0x996DFBC2, 0x996EFBC2, 0x996FFBC2, 0x9970FBC2, 0x9971FBC2, 0x9972FBC2, 0x9973FBC2, 0x9974FBC2, 0x9975FBC2, 0x9976FBC2, 0x9977FBC2, 0x9978FBC2, 0x9979FBC2, 0x997AFBC2, 0x997BFBC2, + 0x997CFBC2, 0x997DFBC2, 0x997EFBC2, 0x997FFBC2, 0x9980FBC2, 0x9981FBC2, 0x9982FBC2, 0x9983FBC2, 0x9984FBC2, 0x9985FBC2, 0x9986FBC2, 0x9987FBC2, 0x9988FBC2, 0x9989FBC2, 0x998AFBC2, + 0x998BFBC2, 0x998CFBC2, 0x998DFBC2, 0x998EFBC2, 0x998FFBC2, 0x9990FBC2, 0x9991FBC2, 0x9992FBC2, 0x9993FBC2, 0x9994FBC2, 0x9995FBC2, 0x9996FBC2, 0x9997FBC2, 0x9998FBC2, 0x9999FBC2, + 0x999AFBC2, 0x999BFBC2, 0x999CFBC2, 0x999DFBC2, 0x999EFBC2, 0x999FFBC2, 0x99A0FBC2, 0x99A1FBC2, 0x99A2FBC2, 0x99A3FBC2, 0x99A4FBC2, 0x99A5FBC2, 0x99A6FBC2, 0x99A7FBC2, 0x99A8FBC2, + 0x99A9FBC2, 0x99AAFBC2, 0x99ABFBC2, 0x99ACFBC2, 0x99ADFBC2, 0x99AEFBC2, 0x99AFFBC2, 0x99B0FBC2, 0x99B1FBC2, 0x99B2FBC2, 0x99B3FBC2, 0x99B4FBC2, 0x99B5FBC2, 0x99B6FBC2, 0x99B7FBC2, + 0x99B8FBC2, 0x99B9FBC2, 0x99BAFBC2, 0x99BBFBC2, 0x99BCFBC2, 0x99BDFBC2, 0x99BEFBC2, 0x99BFFBC2, 0x99C0FBC2, 0x99C1FBC2, 0x99C2FBC2, 0x99C3FBC2, 0x99C4FBC2, 0x99C5FBC2, 0x99C6FBC2, + 0x99C7FBC2, 0x99C8FBC2, 0x99C9FBC2, 0x99CAFBC2, 0x99CBFBC2, 0x99CCFBC2, 0x99CDFBC2, 0x99CEFBC2, 0x99CFFBC2, 0x99D0FBC2, 0x99D1FBC2, 0x99D2FBC2, 0x99D3FBC2, 0x99D4FBC2, 0x99D5FBC2, + 0x99D6FBC2, 0x99D7FBC2, 0x99D8FBC2, 0x99D9FBC2, 0x99DAFBC2, 0x99DBFBC2, 0x99DCFBC2, 0x99DDFBC2, 0x99DEFBC2, 0x99DFFBC2, 0x99E0FBC2, 0x99E1FBC2, 0x99E2FBC2, 0x99E3FBC2, 0x99E4FBC2, + 0x99E5FBC2, 0x99E6FBC2, 0x99E7FBC2, 0x99E8FBC2, 0x99E9FBC2, 0x99EAFBC2, 0x99EBFBC2, 0x99ECFBC2, 0x99EDFBC2, 0x99EEFBC2, 0x99EFFBC2, 0x99F0FBC2, 0x99F1FBC2, 0x99F2FBC2, 0x99F3FBC2, + 0x99F4FBC2, 0x99F5FBC2, 0x99F6FBC2, 0x99F7FBC2, 0x99F8FBC2, 0x99F9FBC2, 0x99FAFBC2, 0x99FBFBC2, 0x99FCFBC2, 0x99FDFBC2, 0x99FEFBC2, 0x99FFFBC2, 0x9A00FBC2, 0x9A01FBC2, 0x9A02FBC2, + 0x9A03FBC2, 0x9A04FBC2, 0x9A05FBC2, 0x9A06FBC2, 0x9A07FBC2, 0x9A08FBC2, 0x9A09FBC2, 0x9A0AFBC2, 0x9A0BFBC2, 0x9A0CFBC2, 0x9A0DFBC2, 0x9A0EFBC2, 0x9A0FFBC2, 0x9A10FBC2, 0x9A11FBC2, + 0x9A12FBC2, 0x9A13FBC2, 0x9A14FBC2, 0x9A15FBC2, 0x9A16FBC2, 0x9A17FBC2, 0x9A18FBC2, 0x9A19FBC2, 0x9A1AFBC2, 0x9A1BFBC2, 0x9A1CFBC2, 0x9A1DFBC2, 0x9A1EFBC2, 0x9A1FFBC2, 0x9A20FBC2, + 0x9A21FBC2, 0x9A22FBC2, 0x9A23FBC2, 0x9A24FBC2, 0x9A25FBC2, 0x9A26FBC2, 0x9A27FBC2, 0x9A28FBC2, 0x9A29FBC2, 0x9A2AFBC2, 0x9A2BFBC2, 0x9A2CFBC2, 0x9A2DFBC2, 0x9A2EFBC2, 0x9A2FFBC2, + 0x9A30FBC2, 0x9A31FBC2, 0x9A32FBC2, 0x9A33FBC2, 0x9A34FBC2, 0x9A35FBC2, 0x9A36FBC2, 0x9A37FBC2, 0x9A38FBC2, 0x9A39FBC2, 0x9A3AFBC2, 0x9A3BFBC2, 0x9A3CFBC2, 0x9A3DFBC2, 0x9A3EFBC2, + 0x9A3FFBC2, 0x9A40FBC2, 0x9A41FBC2, 0x9A42FBC2, 0x9A43FBC2, 0x9A44FBC2, 0x9A45FBC2, 0x9A46FBC2, 0x9A47FBC2, 0x9A48FBC2, 0x9A49FBC2, 0x9A4AFBC2, 0x9A4BFBC2, 0x9A4CFBC2, 0x9A4DFBC2, + 0x9A4EFBC2, 0x9A4FFBC2, 0x9A50FBC2, 0x9A51FBC2, 0x9A52FBC2, 0x9A53FBC2, 0x9A54FBC2, 0x9A55FBC2, 0x9A56FBC2, 0x9A57FBC2, 0x9A58FBC2, 0x9A59FBC2, 0x9A5AFBC2, 0x9A5BFBC2, 0x9A5CFBC2, + 0x9A5DFBC2, 0x9A5EFBC2, 0x9A5FFBC2, 0x9A60FBC2, 0x9A61FBC2, 0x9A62FBC2, 0x9A63FBC2, 0x9A64FBC2, 0x9A65FBC2, 0x9A66FBC2, 0x9A67FBC2, 0x9A68FBC2, 0x9A69FBC2, 0x9A6AFBC2, 0x9A6BFBC2, + 0x9A6CFBC2, 0x9A6DFBC2, 0x9A6EFBC2, 0x9A6FFBC2, 0x9A70FBC2, 0x9A71FBC2, 0x9A72FBC2, 0x9A73FBC2, 0x9A74FBC2, 0x9A75FBC2, 0x9A76FBC2, 0x9A77FBC2, 0x9A78FBC2, 0x9A79FBC2, 0x9A7AFBC2, + 0x9A7BFBC2, 0x9A7CFBC2, 0x9A7DFBC2, 0x9A7EFBC2, 0x9A7FFBC2, 0x9A80FBC2, 0x9A81FBC2, 0x9A82FBC2, 0x9A83FBC2, 0x9A84FBC2, 0x9A85FBC2, 0x9A86FBC2, 0x9A87FBC2, 0x9A88FBC2, 0x9A89FBC2, + 0x9A8AFBC2, 0x9A8BFBC2, 0x9A8CFBC2, 0x9A8DFBC2, 0x9A8EFBC2, 0x9A8FFBC2, 0x9A90FBC2, 0x9A91FBC2, 0x9A92FBC2, 0x9A93FBC2, 0x9A94FBC2, 0x9A95FBC2, 0x9A96FBC2, 0x9A97FBC2, 0x9A98FBC2, + 0x9A99FBC2, 0x9A9AFBC2, 0x9A9BFBC2, 0x9A9CFBC2, 0x9A9DFBC2, 0x9A9EFBC2, 0x9A9FFBC2, 0x9AA0FBC2, 0x9AA1FBC2, 0x9AA2FBC2, 0x9AA3FBC2, 0x9AA4FBC2, 0x9AA5FBC2, 0x9AA6FBC2, 0x9AA7FBC2, + 0x9AA8FBC2, 0x9AA9FBC2, 0x9AAAFBC2, 0x9AABFBC2, 0x9AACFBC2, 0x9AADFBC2, 0x9AAEFBC2, 0x9AAFFBC2, 0x9AB0FBC2, 0x9AB1FBC2, 0x9AB2FBC2, 0x9AB3FBC2, 0x9AB4FBC2, 0x9AB5FBC2, 0x9AB6FBC2, + 0x9AB7FBC2, 0x9AB8FBC2, 0x9AB9FBC2, 0x9ABAFBC2, 0x9ABBFBC2, 0x9ABCFBC2, 0x9ABDFBC2, 0x9ABEFBC2, 0x9ABFFBC2, 0x4323, 0x4324, 0x4325, 0x4326, 0x4327, 0x4328, + 0x4329, 0x432A, 0x432B, 0x432C, 0x432D, 0x432E, 0x432F, 0x4330, 0x4331, 0x4332, 0x4333, 0x4334, 0x4335, 0x4336, 0x4337, + 0x431C, 0x431D, 0x431E, 0x431F, 0x4320, 0x4321, 0x4322, 0x4338, 0x4339, 0x433F, 0x433A, 0x433B, 0x433C, 0x433D, 0x433E, + 0x4340, 0x4346, 0x4344, 0x4352, 0x4347, 0x4345, 0x4353, 0x434E, 0x434C, 0x434F, 0x434D, 0x4341, 0x4354, 0x4343, 0x4342, + 0x434A, 0x4348, 0x4350, 0x434B, 0x4349, 0x4351, 0x9AF9FBC2, 0x9AFAFBC2, 0x9AFBFBC2, 0x9AFCFBC2, 0x9AFDFBC2, 0x9AFEFBC2, 0x9AFFFBC2, 0x9B00FBC2, 0x9B01FBC2, + 0x9B02FBC2, 0x9B03FBC2, 0x9B04FBC2, 0x9B05FBC2, 0x9B06FBC2, 0x9B07FBC2, 0x9B08FBC2, 0x9B09FBC2, 0x9B0AFBC2, 0x9B0BFBC2, 0x9B0CFBC2, 0x9B0DFBC2, 0x9B0EFBC2, 0x9B0FFBC2, 0x9B10FBC2, + 0x9B11FBC2, 0x9B12FBC2, 0x9B13FBC2, 0x9B14FBC2, 0x9B15FBC2, 0x9B16FBC2, 0x9B17FBC2, 0x9B18FBC2, 0x9B19FBC2, 0x9B1AFBC2, 0x9B1BFBC2, 0x9B1CFBC2, 0x9B1DFBC2, 0x9B1EFBC2, 0x9B1FFBC2, + 0x9B20FBC2, 0x9B21FBC2, 0x9B22FBC2, 0x9B23FBC2, 0x9B24FBC2, 0x9B25FBC2, 0x9B26FBC2, 0x9B27FBC2, 0x9B28FBC2, 0x9B29FBC2, 0x9B2AFBC2, 0x9B2BFBC2, 0x9B2CFBC2, 0x9B2DFBC2, 0x9B2EFBC2, + 0x9B2FFBC2, 0x9B30FBC2, 0x9B31FBC2, 0x9B32FBC2, 0x9B33FBC2, 0x9B34FBC2, 0x9B35FBC2, 0x9B36FBC2, 0x9B37FBC2, 0x9B38FBC2, 0x9B39FBC2, 0x9B3AFBC2, 0x9B3BFBC2, 0x9B3CFBC2, 0x9B3DFBC2, + 0x9B3EFBC2, 0x9B3FFBC2, 0x9B40FBC2, 0x9B41FBC2, 0x9B42FBC2, 0x9B43FBC2, 0x9B44FBC2, 0x9B45FBC2, 0x9B46FBC2, 0x9B47FBC2, 0x9B48FBC2, 0x9B49FBC2, 0x9B4AFBC2, 0x9B4BFBC2, 0x9B4CFBC2, + 0x9B4DFBC2, 0x9B4EFBC2, 0x9B4FFBC2, 0x9B50FBC2, 0x9B51FBC2, 0x9B52FBC2, 0x9B53FBC2, 0x9B54FBC2, 0x9B55FBC2, 0x9B56FBC2, 0x9B57FBC2, 0x9B58FBC2, 0x9B59FBC2, 0x9B5AFBC2, 0x9B5BFBC2, + 0x9B5CFBC2, 0x9B5DFBC2, 0x9B5EFBC2, 0x9B5FFBC2, 0x9B60FBC2, 0x9B61FBC2, 0x9B62FBC2, 0x9B63FBC2, 0x9B64FBC2, 0x9B65FBC2, 0x9B66FBC2, 0x9B67FBC2, 0x9B68FBC2, 0x9B69FBC2, 0x9B6AFBC2, + 0x9B6BFBC2, 0x9B6CFBC2, 0x9B6DFBC2, 0x9B6EFBC2, 0x9B6FFBC2, 0x9B70FBC2, 0x9B71FBC2, 0x9B72FBC2, 0x9B73FBC2, 0x9B74FBC2, 0x9B75FBC2, 0x9B76FBC2, 0x9B77FBC2, 0x9B78FBC2, 0x9B79FBC2, + 0x9B7AFBC2, 0x9B7BFBC2, 0x9B7CFBC2, 0x9B7DFBC2, 0x9B7EFBC2, 0x9B7FFBC2, 0x9B80FBC2, 0x9B81FBC2, 0x9B82FBC2, 0x9B83FBC2, 0x9B84FBC2, 0x9B85FBC2, 0x9B86FBC2, 0x9B87FBC2, 0x9B88FBC2, + 0x9B89FBC2, 0x9B8AFBC2, 0x9B8BFBC2, 0x9B8CFBC2, 0x9B8DFBC2, 0x9B8EFBC2, 0x9B8FFBC2, 0x9B90FBC2, 0x9B91FBC2, 0x9B92FBC2, 0x9B93FBC2, 0x9B94FBC2, 0x9B95FBC2, 0x9B96FBC2, 0x9B97FBC2, + 0x9B98FBC2, 0x9B99FBC2, 0x9B9AFBC2, 0x9B9BFBC2, 0x9B9CFBC2, 0x9B9DFBC2, 0x9B9EFBC2, 0x9B9FFBC2, 0x9BA0FBC2, 0x9BA1FBC2, 0x9BA2FBC2, 0x9BA3FBC2, 0x9BA4FBC2, 0x9BA5FBC2, 0x9BA6FBC2, + 0x9BA7FBC2, 0x9BA8FBC2, 0x9BA9FBC2, 0x9BAAFBC2, 0x9BABFBC2, 0x9BACFBC2, 0x9BADFBC2, 0x9BAEFBC2, 0x9BAFFBC2, 0x9BB0FBC2, 0x9BB1FBC2, 0x9BB2FBC2, 0x9BB3FBC2, 0x9BB4FBC2, 0x9BB5FBC2, + 0x9BB6FBC2, 0x9BB7FBC2, 0x9BB8FBC2, 0x9BB9FBC2, 0x9BBAFBC2, 0x9BBBFBC2, 0x9BBCFBC2, 0x9BBDFBC2, 0x9BBEFBC2, 0x9BBFFBC2, 0x9BC0FBC2, 0x9BC1FBC2, 0x9BC2FBC2, 0x9BC3FBC2, 0x9BC4FBC2, + 0x9BC5FBC2, 0x9BC6FBC2, 0x9BC7FBC2, 0x9BC8FBC2, 0x9BC9FBC2, 0x9BCAFBC2, 0x9BCBFBC2, 0x9BCCFBC2, 0x9BCDFBC2, 0x9BCEFBC2, 0x9BCFFBC2, 0x9BD0FBC2, 0x9BD1FBC2, 0x9BD2FBC2, 0x9BD3FBC2, + 0x9BD4FBC2, 0x9BD5FBC2, 0x9BD6FBC2, 0x9BD7FBC2, 0x9BD8FBC2, 0x9BD9FBC2, 0x9BDAFBC2, 0x9BDBFBC2, 0x9BDCFBC2, 0x9BDDFBC2, 0x9BDEFBC2, 0x9BDFFBC2, 0x9BE0FBC2, 0x9BE1FBC2, 0x9BE2FBC2, + 0x9BE3FBC2, 0x9BE4FBC2, 0x9BE5FBC2, 0x9BE6FBC2, 0x9BE7FBC2, 0x9BE8FBC2, 0x9BE9FBC2, 0x9BEAFBC2, 0x9BEBFBC2, 0x9BECFBC2, 0x9BEDFBC2, 0x9BEEFBC2, 0x9BEFFBC2, 0x9BF0FBC2, 0x9BF1FBC2, + 0x9BF2FBC2, 0x9BF3FBC2, 0x9BF4FBC2, 0x9BF5FBC2, 0x9BF6FBC2, 0x9BF7FBC2, 0x9BF8FBC2, 0x9BF9FBC2, 0x9BFAFBC2, 0x9BFBFBC2, 0x9BFCFBC2, 0x9BFDFBC2, 0x9BFEFBC2, 0x9BFFFBC2, 0x2D37, + 0x2D38, 0x2D39, 0x2D3A, 0x2D3B, 0x2D3C, 0x2D3D, 0x2D3E, 0x2D3F, 0x9C09FBC2, 0x2D40, 0x2D41, 0x2D42, 0x2D43, 0x2D44, 0x2D45, + 0x2D46, 0x2D47, 0x2D48, 0x2D49, 0x2D4A, 0x2D4B, 0x2D4C, 0x2D4D, 0x2D4E, 0x2D4F, 0x2D50, 0x2D51, 0x2D52, 0x2D53, 0x2D54, + 0x2D55, 0x2D56, 0x2D57, 0x2D58, 0x2D59, 0x2D5A, 0x2D5B, 0x2D5C, 0x2D5D, 0x2D5E, 0x2D5F, 0x2D60, 0x2D61, 0x2D62, 0x2D63, + 0x2D64, 0x2D66, 0x2D67, 0x2D68, 0x2D69, 0x2D6A, 0x2D6B, 0x2D6C, 0x2D6D, 0x9C37FBC2, 0x2D6E, 0x2D6F, 0x2D70, 0x2D71, 0x0, + 0x0, 0x0, 0x2D72, 0x2D65, 0x2BE, 0x2BF, 0x43F, 0x440, 0x441, 0x9C46FBC2, 0x9C47FBC2, 0x9C48FBC2, 0x9C49FBC2, 0x9C4AFBC2, 0x9C4BFBC2, + 0x9C4CFBC2, 0x9C4DFBC2, 0x9C4EFBC2, 0x9C4FFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3E, + 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1B82, 0x1B83, 0x1B84, 0x1B85, 0x1B86, 0x1B87, 0x1B88, + 0x1B89, 0x1B8A, 0x1B8B, 0x9C6DFBC2, 0x9C6EFBC2, 0x9C6FFBC2, 0x409, 0x40A, 0x2E86, 0x2E88, 0x2E8A, 0x2E8C, 0x2E8E, 0x2E90, 0x2E92, + 0x2E94, 0x2E96, 0x2E98, 0x2E9A, 0x2E9C, 0x2E9E, 0x2EA0, 0x2EA2, 0x2EA4, 0x2EA6, 0x2EA8, 0x2EAA, 0x2EAC, 0x2EAE, 0x2EB0, + 0x2EB2, 0x2EB3, 0x2EB5, 0x2EB7, 0x2EB9, 0x2EBB, 0x2EBD, 0x2EBF, 0x9C90FBC2, 0x9C91FBC2, 0x2E87, 0x2E89, 0x2E8B, 0x2E8D, 0x2E8F, + 0x2E91, 0x2E93, 0x2E95, 0x2E97, 0x2E99, 0x2E9B, 0x2E9D, 0x2E9F, 0x2EA1, 0x2EA3, 0x2EA5, 0x2EA7, 0x2EA9, 0x2EAB, 0x2EAD, + 0x2EAF, 0x2EB1, 0x9CA8FBC2, 0x2EB4, 0x2EB6, 0x2EB8, 0x2EBA, 0x2EBC, 0x2EBE, 0x2EC0, 0x2EC1, 0x2EC2, 0x2EC3, 0x2EC4, 0x2EC5, + 0x0, 0x0, 0x9CB7FBC2, 0x9CB8FBC2, 0x9CB9FBC2, 0x9CBAFBC2, 0x9CBBFBC2, 0x9CBCFBC2, 0x9CBDFBC2, 0x9CBEFBC2, 0x9CBFFBC2, 0x9CC0FBC2, 0x9CC1FBC2, 0x9CC2FBC2, 0x9CC3FBC2, + 0x9CC4FBC2, 0x9CC5FBC2, 0x9CC6FBC2, 0x9CC7FBC2, 0x9CC8FBC2, 0x9CC9FBC2, 0x9CCAFBC2, 0x9CCBFBC2, 0x9CCCFBC2, 0x9CCDFBC2, 0x9CCEFBC2, 0x9CCFFBC2, 0x9CD0FBC2, 0x9CD1FBC2, 0x9CD2FBC2, + 0x9CD3FBC2, 0x9CD4FBC2, 0x9CD5FBC2, 0x9CD6FBC2, 0x9CD7FBC2, 0x9CD8FBC2, 0x9CD9FBC2, 0x9CDAFBC2, 0x9CDBFBC2, 0x9CDCFBC2, 0x9CDDFBC2, 0x9CDEFBC2, 0x9CDFFBC2, 0x9CE0FBC2, 0x9CE1FBC2, + 0x9CE2FBC2, 0x9CE3FBC2, 0x9CE4FBC2, 0x9CE5FBC2, 0x9CE6FBC2, 0x9CE7FBC2, 0x9CE8FBC2, 0x9CE9FBC2, 0x9CEAFBC2, 0x9CEBFBC2, 0x9CECFBC2, 0x9CEDFBC2, 0x9CEEFBC2, 0x9CEFFBC2, 0x9CF0FBC2, + 0x9CF1FBC2, 0x9CF2FBC2, 0x9CF3FBC2, 0x9CF4FBC2, 0x9CF5FBC2, 0x9CF6FBC2, 0x9CF7FBC2, 0x9CF8FBC2, 0x9CF9FBC2, 0x9CFAFBC2, 0x9CFBFBC2, 0x9CFCFBC2, 0x9CFDFBC2, 0x9CFEFBC2, 0x9CFFFBC2, + 0x9D00FBC2, 0x9D01FBC2, 0x9D02FBC2, 0x9D03FBC2, 0x9D04FBC2, 0x9D05FBC2, 0x9D06FBC2, 0x9D07FBC2, 0x9D08FBC2, 0x9D09FBC2, 0x9D0AFBC2, 0x9D0BFBC2, 0x9D0CFBC2, 0x9D0DFBC2, 0x9D0EFBC2, + 0x9D0FFBC2, 0x9D10FBC2, 0x9D11FBC2, 0x9D12FBC2, 0x9D13FBC2, 0x9D14FBC2, 0x9D15FBC2, 0x9D16FBC2, 0x9D17FBC2, 0x9D18FBC2, 0x9D19FBC2, 0x9D1AFBC2, 0x9D1BFBC2, 0x9D1CFBC2, 0x9D1DFBC2, + 0x9D1EFBC2, 0x9D1FFBC2, 0x9D20FBC2, 0x9D21FBC2, 0x9D22FBC2, 0x9D23FBC2, 0x9D24FBC2, 0x9D25FBC2, 0x9D26FBC2, 0x9D27FBC2, 0x9D28FBC2, 0x9D29FBC2, 0x9D2AFBC2, 0x9D2BFBC2, 0x9D2CFBC2, + 0x9D2DFBC2, 0x9D2EFBC2, 0x9D2FFBC2, 0x9D30FBC2, 0x9D31FBC2, 0x9D32FBC2, 0x9D33FBC2, 0x9D34FBC2, 0x9D35FBC2, 0x9D36FBC2, 0x9D37FBC2, 0x9D38FBC2, 0x9D39FBC2, 0x9D3AFBC2, 0x9D3BFBC2, + 0x9D3CFBC2, 0x9D3DFBC2, 0x9D3EFBC2, 0x9D3FFBC2, 0x9D40FBC2, 0x9D41FBC2, 0x9D42FBC2, 0x9D43FBC2, 0x9D44FBC2, 0x9D45FBC2, 0x9D46FBC2, 0x9D47FBC2, 0x9D48FBC2, 0x9D49FBC2, 0x9D4AFBC2, + 0x9D4BFBC2, 0x9D4CFBC2, 0x9D4DFBC2, 0x9D4EFBC2, 0x9D4FFBC2, 0x9D50FBC2, 0x9D51FBC2, 0x9D52FBC2, 0x9D53FBC2, 0x9D54FBC2, 0x9D55FBC2, 0x9D56FBC2, 0x9D57FBC2, 0x9D58FBC2, 0x9D59FBC2, + 0x9D5AFBC2, 0x9D5BFBC2, 0x9D5CFBC2, 0x9D5DFBC2, 0x9D5EFBC2, 0x9D5FFBC2, 0x9D60FBC2, 0x9D61FBC2, 0x9D62FBC2, 0x9D63FBC2, 0x9D64FBC2, 0x9D65FBC2, 0x9D66FBC2, 0x9D67FBC2, 0x9D68FBC2, + 0x9D69FBC2, 0x9D6AFBC2, 0x9D6BFBC2, 0x9D6CFBC2, 0x9D6DFBC2, 0x9D6EFBC2, 0x9D6FFBC2, 0x9D70FBC2, 0x9D71FBC2, 0x9D72FBC2, 0x9D73FBC2, 0x9D74FBC2, 0x9D75FBC2, 0x9D76FBC2, 0x9D77FBC2, + 0x9D78FBC2, 0x9D79FBC2, 0x9D7AFBC2, 0x9D7BFBC2, 0x9D7CFBC2, 0x9D7DFBC2, 0x9D7EFBC2, 0x9D7FFBC2, 0x9D80FBC2, 0x9D81FBC2, 0x9D82FBC2, 0x9D83FBC2, 0x9D84FBC2, 0x9D85FBC2, 0x9D86FBC2, + 0x9D87FBC2, 0x9D88FBC2, 0x9D89FBC2, 0x9D8AFBC2, 0x9D8BFBC2, 0x9D8CFBC2, 0x9D8DFBC2, 0x9D8EFBC2, 0x9D8FFBC2, 0x9D90FBC2, 0x9D91FBC2, 0x9D92FBC2, 0x9D93FBC2, 0x9D94FBC2, 0x9D95FBC2, + 0x9D96FBC2, 0x9D97FBC2, 0x9D98FBC2, 0x9D99FBC2, 0x9D9AFBC2, 0x9D9BFBC2, 0x9D9CFBC2, 0x9D9DFBC2, 0x9D9EFBC2, 0x9D9FFBC2, 0x9DA0FBC2, 0x9DA1FBC2, 0x9DA2FBC2, 0x9DA3FBC2, 0x9DA4FBC2, + 0x9DA5FBC2, 0x9DA6FBC2, 0x9DA7FBC2, 0x9DA8FBC2, 0x9DA9FBC2, 0x9DAAFBC2, 0x9DABFBC2, 0x9DACFBC2, 0x9DADFBC2, 0x9DAEFBC2, 0x9DAFFBC2, 0x9DB0FBC2, 0x9DB1FBC2, 0x9DB2FBC2, 0x9DB3FBC2, + 0x9DB4FBC2, 0x9DB5FBC2, 0x9DB6FBC2, 0x9DB7FBC2, 0x9DB8FBC2, 0x9DB9FBC2, 0x9DBAFBC2, 0x9DBBFBC2, 0x9DBCFBC2, 0x9DBDFBC2, 0x9DBEFBC2, 0x9DBFFBC2, 0x9DC0FBC2, 0x9DC1FBC2, 0x9DC2FBC2, + 0x9DC3FBC2, 0x9DC4FBC2, 0x9DC5FBC2, 0x9DC6FBC2, 0x9DC7FBC2, 0x9DC8FBC2, 0x9DC9FBC2, 0x9DCAFBC2, 0x9DCBFBC2, 0x9DCCFBC2, 0x9DCDFBC2, 0x9DCEFBC2, 0x9DCFFBC2, 0x9DD0FBC2, 0x9DD1FBC2, + 0x9DD2FBC2, 0x9DD3FBC2, 0x9DD4FBC2, 0x9DD5FBC2, 0x9DD6FBC2, 0x9DD7FBC2, 0x9DD8FBC2, 0x9DD9FBC2, 0x9DDAFBC2, 0x9DDBFBC2, 0x9DDCFBC2, 0x9DDDFBC2, 0x9DDEFBC2, 0x9DDFFBC2, 0x9DE0FBC2, + 0x9DE1FBC2, 0x9DE2FBC2, 0x9DE3FBC2, 0x9DE4FBC2, 0x9DE5FBC2, 0x9DE6FBC2, 0x9DE7FBC2, 0x9DE8FBC2, 0x9DE9FBC2, 0x9DEAFBC2, 0x9DEBFBC2, 0x9DECFBC2, 0x9DEDFBC2, 0x9DEEFBC2, 0x9DEFFBC2, + 0x9DF0FBC2, 0x9DF1FBC2, 0x9DF2FBC2, 0x9DF3FBC2, 0x9DF4FBC2, 0x9DF5FBC2, 0x9DF6FBC2, 0x9DF7FBC2, 0x9DF8FBC2, 0x9DF9FBC2, 0x9DFAFBC2, 0x9DFBFBC2, 0x9DFCFBC2, 0x9DFDFBC2, 0x9DFEFBC2, + 0x9DFFFBC2, 0x9E00FBC2, 0x9E01FBC2, 0x9E02FBC2, 0x9E03FBC2, 0x9E04FBC2, 0x9E05FBC2, 0x9E06FBC2, 0x9E07FBC2, 0x9E08FBC2, 0x9E09FBC2, 0x9E0AFBC2, 0x9E0BFBC2, 0x9E0CFBC2, 0x9E0DFBC2, + 0x9E0EFBC2, 0x9E0FFBC2, 0x9E10FBC2, 0x9E11FBC2, 0x9E12FBC2, 0x9E13FBC2, 0x9E14FBC2, 0x9E15FBC2, 0x9E16FBC2, 0x9E17FBC2, 0x9E18FBC2, 0x9E19FBC2, 0x9E1AFBC2, 0x9E1BFBC2, 0x9E1CFBC2, + 0x9E1DFBC2, 0x9E1EFBC2, 0x9E1FFBC2, 0x9E20FBC2, 0x9E21FBC2, 0x9E22FBC2, 0x9E23FBC2, 0x9E24FBC2, 0x9E25FBC2, 0x9E26FBC2, 0x9E27FBC2, 0x9E28FBC2, 0x9E29FBC2, 0x9E2AFBC2, 0x9E2BFBC2, + 0x9E2CFBC2, 0x9E2DFBC2, 0x9E2EFBC2, 0x9E2FFBC2, 0x9E30FBC2, 0x9E31FBC2, 0x9E32FBC2, 0x9E33FBC2, 0x9E34FBC2, 0x9E35FBC2, 0x9E36FBC2, 0x9E37FBC2, 0x9E38FBC2, 0x9E39FBC2, 0x9E3AFBC2, + 0x9E3BFBC2, 0x9E3CFBC2, 0x9E3DFBC2, 0x9E3EFBC2, 0x9E3FFBC2, 0x9E40FBC2, 0x9E41FBC2, 0x9E42FBC2, 0x9E43FBC2, 0x9E44FBC2, 0x9E45FBC2, 0x9E46FBC2, 0x9E47FBC2, 0x9E48FBC2, 0x9E49FBC2, + 0x9E4AFBC2, 0x9E4BFBC2, 0x9E4CFBC2, 0x9E4DFBC2, 0x9E4EFBC2, 0x9E4FFBC2, 0x9E50FBC2, 0x9E51FBC2, 0x9E52FBC2, 0x9E53FBC2, 0x9E54FBC2, 0x9E55FBC2, 0x9E56FBC2, 0x9E57FBC2, 0x9E58FBC2, + 0x9E59FBC2, 0x9E5AFBC2, 0x9E5BFBC2, 0x9E5CFBC2, 0x9E5DFBC2, 0x9E5EFBC2, 0x9E5FFBC2, 0x9E60FBC2, 0x9E61FBC2, 0x9E62FBC2, 0x9E63FBC2, 0x9E64FBC2, 0x9E65FBC2, 0x9E66FBC2, 0x9E67FBC2, + 0x9E68FBC2, 0x9E69FBC2, 0x9E6AFBC2, 0x9E6BFBC2, 0x9E6CFBC2, 0x9E6DFBC2, 0x9E6EFBC2, 0x9E6FFBC2, 0x9E70FBC2, 0x9E71FBC2, 0x9E72FBC2, 0x9E73FBC2, 0x9E74FBC2, 0x9E75FBC2, 0x9E76FBC2, + 0x9E77FBC2, 0x9E78FBC2, 0x9E79FBC2, 0x9E7AFBC2, 0x9E7BFBC2, 0x9E7CFBC2, 0x9E7DFBC2, 0x9E7EFBC2, 0x9E7FFBC2, 0x9E80FBC2, 0x9E81FBC2, 0x9E82FBC2, 0x9E83FBC2, 0x9E84FBC2, 0x9E85FBC2, + 0x9E86FBC2, 0x9E87FBC2, 0x9E88FBC2, 0x9E89FBC2, 0x9E8AFBC2, 0x9E8BFBC2, 0x9E8CFBC2, 0x9E8DFBC2, 0x9E8EFBC2, 0x9E8FFBC2, 0x9E90FBC2, 0x9E91FBC2, 0x9E92FBC2, 0x9E93FBC2, 0x9E94FBC2, + 0x9E95FBC2, 0x9E96FBC2, 0x9E97FBC2, 0x9E98FBC2, 0x9E99FBC2, 0x9E9AFBC2, 0x9E9BFBC2, 0x9E9CFBC2, 0x9E9DFBC2, 0x9E9EFBC2, 0x9E9FFBC2, 0x9EA0FBC2, 0x9EA1FBC2, 0x9EA2FBC2, 0x9EA3FBC2, + 0x9EA4FBC2, 0x9EA5FBC2, 0x9EA6FBC2, 0x9EA7FBC2, 0x9EA8FBC2, 0x9EA9FBC2, 0x9EAAFBC2, 0x9EABFBC2, 0x9EACFBC2, 0x9EADFBC2, 0x9EAEFBC2, 0x9EAFFBC2, 0x9EB0FBC2, 0x9EB1FBC2, 0x9EB2FBC2, + 0x9EB3FBC2, 0x9EB4FBC2, 0x9EB5FBC2, 0x9EB6FBC2, 0x9EB7FBC2, 0x9EB8FBC2, 0x9EB9FBC2, 0x9EBAFBC2, 0x9EBBFBC2, 0x9EBCFBC2, 0x9EBDFBC2, 0x9EBEFBC2, 0x9EBFFBC2, 0x9EC0FBC2, 0x9EC1FBC2, + 0x9EC2FBC2, 0x9EC3FBC2, 0x9EC4FBC2, 0x9EC5FBC2, 0x9EC6FBC2, 0x9EC7FBC2, 0x9EC8FBC2, 0x9EC9FBC2, 0x9ECAFBC2, 0x9ECBFBC2, 0x9ECCFBC2, 0x9ECDFBC2, 0x9ECEFBC2, 0x9ECFFBC2, 0x9ED0FBC2, + 0x9ED1FBC2, 0x9ED2FBC2, 0x9ED3FBC2, 0x9ED4FBC2, 0x9ED5FBC2, 0x9ED6FBC2, 0x9ED7FBC2, 0x9ED8FBC2, 0x9ED9FBC2, 0x9EDAFBC2, 0x9EDBFBC2, 0x9EDCFBC2, 0x9EDDFBC2, 0x9EDEFBC2, 0x9EDFFBC2, + 0x9EE0FBC2, 0x9EE1FBC2, 0x9EE2FBC2, 0x9EE3FBC2, 0x9EE4FBC2, 0x9EE5FBC2, 0x9EE6FBC2, 0x9EE7FBC2, 0x9EE8FBC2, 0x9EE9FBC2, 0x9EEAFBC2, 0x9EEBFBC2, 0x9EECFBC2, 0x9EEDFBC2, 0x9EEEFBC2, + 0x9EEFFBC2, 0x9EF0FBC2, 0x9EF1FBC2, 0x9EF2FBC2, 0x9EF3FBC2, 0x9EF4FBC2, 0x9EF5FBC2, 0x9EF6FBC2, 0x9EF7FBC2, 0x9EF8FBC2, 0x9EF9FBC2, 0x9EFAFBC2, 0x9EFBFBC2, 0x9EFCFBC2, 0x9EFDFBC2, + 0x9EFEFBC2, 0x9EFFFBC2, 0x9F00FBC2, 0x9F01FBC2, 0x9F02FBC2, 0x9F03FBC2, 0x9F04FBC2, 0x9F05FBC2, 0x9F06FBC2, 0x9F07FBC2, 0x9F08FBC2, 0x9F09FBC2, 0x9F0AFBC2, 0x9F0BFBC2, 0x9F0CFBC2, + 0x9F0DFBC2, 0x9F0EFBC2, 0x9F0FFBC2, 0x9F10FBC2, 0x9F11FBC2, 0x9F12FBC2, 0x9F13FBC2, 0x9F14FBC2, 0x9F15FBC2, 0x9F16FBC2, 0x9F17FBC2, 0x9F18FBC2, 0x9F19FBC2, 0x9F1AFBC2, 0x9F1BFBC2, + 0x9F1CFBC2, 0x9F1DFBC2, 0x9F1EFBC2, 0x9F1FFBC2, 0x9F20FBC2, 0x9F21FBC2, 0x9F22FBC2, 0x9F23FBC2, 0x9F24FBC2, 0x9F25FBC2, 0x9F26FBC2, 0x9F27FBC2, 0x9F28FBC2, 0x9F29FBC2, 0x9F2AFBC2, + 0x9F2BFBC2, 0x9F2CFBC2, 0x9F2DFBC2, 0x9F2EFBC2, 0x9F2FFBC2, 0x9F30FBC2, 0x9F31FBC2, 0x9F32FBC2, 0x9F33FBC2, 0x9F34FBC2, 0x9F35FBC2, 0x9F36FBC2, 0x9F37FBC2, 0x9F38FBC2, 0x9F39FBC2, + 0x9F3AFBC2, 0x9F3BFBC2, 0x9F3CFBC2, 0x9F3DFBC2, 0x9F3EFBC2, 0x9F3FFBC2, 0x9F40FBC2, 0x9F41FBC2, 0x9F42FBC2, 0x9F43FBC2, 0x9F44FBC2, 0x9F45FBC2, 0x9F46FBC2, 0x9F47FBC2, 0x9F48FBC2, + 0x9F49FBC2, 0x9F4AFBC2, 0x9F4BFBC2, 0x9F4CFBC2, 0x9F4DFBC2, 0x9F4EFBC2, 0x9F4FFBC2, 0x9F50FBC2, 0x9F51FBC2, 0x9F52FBC2, 0x9F53FBC2, 0x9F54FBC2, 0x9F55FBC2, 0x9F56FBC2, 0x9F57FBC2, + 0x9F58FBC2, 0x9F59FBC2, 0x9F5AFBC2, 0x9F5BFBC2, 0x9F5CFBC2, 0x9F5DFBC2, 0x9F5EFBC2, 0x9F5FFBC2, 0x9F60FBC2, 0x9F61FBC2, 0x9F62FBC2, 0x9F63FBC2, 0x9F64FBC2, 0x9F65FBC2, 0x9F66FBC2, + 0x9F67FBC2, 0x9F68FBC2, 0x9F69FBC2, 0x9F6AFBC2, 0x9F6BFBC2, 0x9F6CFBC2, 0x9F6DFBC2, 0x9F6EFBC2, 0x9F6FFBC2, 0x9F70FBC2, 0x9F71FBC2, 0x9F72FBC2, 0x9F73FBC2, 0x9F74FBC2, 0x9F75FBC2, + 0x9F76FBC2, 0x9F77FBC2, 0x9F78FBC2, 0x9F79FBC2, 0x9F7AFBC2, 0x9F7BFBC2, 0x9F7CFBC2, 0x9F7DFBC2, 0x9F7EFBC2, 0x9F7FFBC2, 0x9F80FBC2, 0x9F81FBC2, 0x9F82FBC2, 0x9F83FBC2, 0x9F84FBC2, + 0x9F85FBC2, 0x9F86FBC2, 0x9F87FBC2, 0x9F88FBC2, 0x9F89FBC2, 0x9F8AFBC2, 0x9F8BFBC2, 0x9F8CFBC2, 0x9F8DFBC2, 0x9F8EFBC2, 0x9F8FFBC2, 0x9F90FBC2, 0x9F91FBC2, 0x9F92FBC2, 0x9F93FBC2, + 0x9F94FBC2, 0x9F95FBC2, 0x9F96FBC2, 0x9F97FBC2, 0x9F98FBC2, 0x9F99FBC2, 0x9F9AFBC2, 0x9F9BFBC2, 0x9F9CFBC2, 0x9F9DFBC2, 0x9F9EFBC2, 0x9F9FFBC2, 0x9FA0FBC2, 0x9FA1FBC2, 0x9FA2FBC2, + 0x9FA3FBC2, 0x9FA4FBC2, 0x9FA5FBC2, 0x9FA6FBC2, 0x9FA7FBC2, 0x9FA8FBC2, 0x9FA9FBC2, 0x9FAAFBC2, 0x9FABFBC2, 0x9FACFBC2, 0x9FADFBC2, 0x9FAEFBC2, 0x9FAFFBC2, 0x9FB0FBC2, 0x9FB1FBC2, + 0x9FB2FBC2, 0x9FB3FBC2, 0x9FB4FBC2, 0x9FB5FBC2, 0x9FB6FBC2, 0x9FB7FBC2, 0x9FB8FBC2, 0x9FB9FBC2, 0x9FBAFBC2, 0x9FBBFBC2, 0x9FBCFBC2, 0x9FBDFBC2, 0x9FBEFBC2, 0x9FBFFBC2, 0x9FC0FBC2, + 0x9FC1FBC2, 0x9FC2FBC2, 0x9FC3FBC2, 0x9FC4FBC2, 0x9FC5FBC2, 0x9FC6FBC2, 0x9FC7FBC2, 0x9FC8FBC2, 0x9FC9FBC2, 0x9FCAFBC2, 0x9FCBFBC2, 0x9FCCFBC2, 0x9FCDFBC2, 0x9FCEFBC2, 0x9FCFFBC2, + 0x9FD0FBC2, 0x9FD1FBC2, 0x9FD2FBC2, 0x9FD3FBC2, 0x9FD4FBC2, 0x9FD5FBC2, 0x9FD6FBC2, 0x9FD7FBC2, 0x9FD8FBC2, 0x9FD9FBC2, 0x9FDAFBC2, 0x9FDBFBC2, 0x9FDCFBC2, 0x9FDDFBC2, 0x9FDEFBC2, + 0x9FDFFBC2, 0x9FE0FBC2, 0x9FE1FBC2, 0x9FE2FBC2, 0x9FE3FBC2, 0x9FE4FBC2, 0x9FE5FBC2, 0x9FE6FBC2, 0x9FE7FBC2, 0x9FE8FBC2, 0x9FE9FBC2, 0x9FEAFBC2, 0x9FEBFBC2, 0x9FECFBC2, 0x9FEDFBC2, + 0x9FEEFBC2, 0x9FEFFBC2, 0x9FF0FBC2, 0x9FF1FBC2, 0x9FF2FBC2, 0x9FF3FBC2, 0x9FF4FBC2, 0x9FF5FBC2, 0x9FF6FBC2, 0x9FF7FBC2, 0x9FF8FBC2, 0x9FF9FBC2, 0x9FFAFBC2, 0x9FFBFBC2, 0x9FFCFBC2, + 0x9FFDFBC2, 0x9FFEFBC2, 0x9FFFFBC2, 0x49B5, 0x49B6, 0x49B7, 0x49B8, 0x49B9, 0x49BA, 0x49BB, 0x49BC, 0x49BD, 0x49BE, 0x49BF, 0x49C0, + 0x49C1, 0x49C2, 0x49C3, 0x49C4, 0x49C5, 0x49C6, 0x49C7, 0x49C8, 0x49C9, 0x49CA, 0x49CB, 0x49CC, 0x49CD, 0x49CE, 0x49CF, + 0x49D0, 0x49D1, 0x49D2, 0x49D3, 0x49D4, 0x49D5, 0x49D6, 0x49D7, 0x49D8, 0x49D9, 0x49DA, 0x49DB, 0x49DC, 0x49DD, 0x49DE, + 0x49DF, 0x49E0, 0x49E1, 0x49E2, 0x49E3, 0x49E4, 0x49E5, 0x49E6, 0x49E7, 0x49E8, 0x49E9, 0x49EA, 0x49EB, 0x49EC, 0x49ED, + 0x49EE, 0x49EF, 0x49F0, 0x49F1, 0x49F2, 0x49F3, 0x49F4, 0x49F5, 0x49F6, 0x49F7, 0x49F8, 0x49F9, 0x49FA, 0x49FB, 0x49FC, + 0x49FD, 0x49FE, 0x49FF, 0x4A00, 0x4A01, 0x4A02, 0x4A03, 0x4A04, 0x4A05, 0x4A06, 0x4A07, 0x4A08, 0x4A09, 0x4A0A, 0x4A0B, + 0x4A0C, 0x4A0D, 0x4A0E, 0x4A0F, 0x4A10, 0x4A11, 0x4A12, 0x4A13, 0x4A14, 0x4A15, 0x4A16, 0x4A17, 0x4A18, 0x4A19, 0x4A1A, + 0x4A1B, 0x4A1C, 0x4A1D, 0x4A1E, 0x4A1F, 0x4A20, 0x4A21, 0x4A22, 0x4A23, 0x4A24, 0x4A25, 0x4A26, 0x4A27, 0x4A28, 0x4A29, + 0x4A2A, 0x4A2B, 0x4A2C, 0x4A2D, 0x4A2E, 0x4A2F, 0x4A30, 0x4A31, 0x4A32, 0x4A33, 0x4A34, 0x4A35, 0x4A36, 0x4A37, 0x4A38, + 0x4A39, 0x4A3A, 0x4A3B, 0x4A3C, 0x4A3D, 0x4A3E, 0x4A3F, 0x4A40, 0x4A41, 0x4A42, 0x4A43, 0x4A44, 0x4A45, 0x4A46, 0x4A47, + 0x4A48, 0x4A49, 0x4A4A, 0x4A4B, 0x4A4C, 0x4A4D, 0x4A4E, 0x4A4F, 0x4A50, 0x4A51, 0x4A52, 0x4A53, 0x4A54, 0x4A55, 0x4A56, + 0x4A57, 0x4A58, 0x4A59, 0x4A5A, 0x4A5B, 0x4A5C, 0x4A5D, 0x4A5E, 0x4A5F, 0x4A60, 0x4A61, 0x4A62, 0x4A63, 0x4A64, 0x4A65, + 0x4A66, 0x4A67, 0x4A68, 0x4A69, 0x4A6A, 0x4A6B, 0x4A6C, 0x4A6D, 0x4A6E, 0x4A6F, 0x4A70, 0x4A71, 0x4A72, 0x4A73, 0x4A74, + 0x4A75, 0x4A76, 0x4A77, 0x4A78, 0x4A79, 0x4A7A, 0x4A7B, 0x4A7C, 0x4A7D, 0x4A7E, 0x4A7F, 0x4A80, 0x4A81, 0x4A82, 0x4A83, + 0x4A84, 0x4A85, 0x4A86, 0x4A87, 0x4A88, 0x4A89, 0x4A8A, 0x4A8B, 0x4A8C, 0x4A8D, 0x4A8E, 0x4A8F, 0x4A90, 0x4A91, 0x4A92, + 0x4A93, 0x4A94, 0x4A95, 0x4A96, 0x4A97, 0x4A98, 0x4A99, 0x4A9A, 0x4A9B, 0x4A9C, 0x4A9D, 0x4A9E, 0x4A9F, 0x4AA0, 0x4AA1, + 0x4AA2, 0x4AA3, 0x4AA4, 0x4AA5, 0x4AA6, 0x4AA7, 0x4AA8, 0x4AA9, 0x4AAA, 0x4AAB, 0x4AAC, 0x4AAD, 0x4AAE, 0x4AAF, 0x4AB0, + 0x4AB1, 0x4AB2, 0x4AB3, 0x4AB4, 0x4AB5, 0x4AB6, 0x4AB7, 0x4AB8, 0x4AB9, 0x4ABA, 0x4ABB, 0x4ABC, 0x4ABD, 0x4ABE, 0x4ABF, + 0x4AC0, 0x4AC1, 0x4AC2, 0x4AC3, 0x4AC4, 0x4AC5, 0x4AC6, 0x4AC7, 0x4AC8, 0x4AC9, 0x4ACA, 0x4ACB, 0x4ACC, 0x4ACD, 0x4ACE, + 0x4ACF, 0x4AD0, 0x4AD1, 0x4AD2, 0x4AD3, 0x4AD4, 0x4AD5, 0x4AD6, 0x4AD7, 0x4AD8, 0x4AD9, 0x4ADA, 0x4ADB, 0x4ADC, 0x4ADD, + 0x4ADE, 0x4ADF, 0x4AE0, 0x4AE1, 0x4AE2, 0x4AE3, 0x4AE4, 0x4AE5, 0x4AE6, 0x4AE7, 0x4AE8, 0x4AE9, 0x4AEA, 0x4AEB, 0x4AEC, + 0x4AED, 0x4AEE, 0x4AEF, 0x4AF0, 0x4AF1, 0x4AF2, 0x4AF3, 0x4AF4, 0x4AF5, 0x4AF6, 0x4AF7, 0x4AF8, 0x4AF9, 0x4AFA, 0x4AFB, + 0x4AFC, 0x4AFD, 0x4AFE, 0x4AFF, 0x4B00, 0x4B01, 0x4B02, 0x4B03, 0x4B04, 0x4B05, 0x4B06, 0x4B07, 0x4B08, 0x4B09, 0x4B0A, + 0x4B0B, 0x4B0C, 0x4B0D, 0x4B0E, 0x4B0F, 0x4B10, 0x4B11, 0x4B12, 0x4B13, 0x4B14, 0x4B15, 0x4B16, 0x4B17, 0x4B18, 0x4B19, + 0x4B1A, 0x4B1B, 0x4B1C, 0x4B1D, 0x4B1E, 0x4B1F, 0x4B20, 0x4B21, 0x4B22, 0x4B23, 0x4B24, 0x4B25, 0x4B26, 0x4B27, 0x4B28, + 0x4B29, 0x4B2A, 0x4B2B, 0x4B2C, 0x4B2D, 0x4B2E, 0x4B2F, 0x4B30, 0x4B31, 0x4B32, 0x4B33, 0x4B34, 0x4B35, 0x4B36, 0x4B37, + 0x4B38, 0x4B39, 0x4B3A, 0x4B3B, 0x4B3C, 0x4B3D, 0x4B3E, 0x4B3F, 0x4B40, 0x4B41, 0x4B42, 0x4B43, 0x4B44, 0x4B45, 0x4B46, + 0x4B47, 0x4B48, 0x4B49, 0x4B4A, 0x4B4B, 0x4B4C, 0x4B4D, 0x4B4E, 0x4B4F, 0x4B50, 0x4B51, 0x4B52, 0x4B53, 0x4B54, 0x4B55, + 0x4B56, 0x4B57, 0x4B58, 0x4B59, 0x4B5A, 0x4B5B, 0x4B5C, 0x4B5D, 0x4B5E, 0x4B5F, 0x4B60, 0x4B61, 0x4B62, 0x4B63, 0x4B64, + 0x4B65, 0x4B66, 0x4B67, 0x4B68, 0x4B69, 0x4B6A, 0x4B6B, 0x4B6C, 0x4B6D, 0x4B6E, 0x4B6F, 0x4B70, 0x4B71, 0x4B72, 0x4B73, + 0x4B74, 0x4B75, 0x4B76, 0x4B77, 0x4B78, 0x4B79, 0x4B7A, 0x4B7B, 0x4B7C, 0x4B7D, 0x4B7E, 0x4B7F, 0x4B80, 0x4B81, 0x4B82, + 0x4B83, 0x4B84, 0x4B85, 0x4B86, 0x4B87, 0x4B88, 0x4B89, 0x4B8A, 0x4B8B, 0x4B8C, 0x4B8D, 0x4B8E, 0x4B8F, 0x4B90, 0x4B91, + 0x4B92, 0x4B93, 0x4B94, 0x4B95, 0x4B96, 0x4B97, 0x4B98, 0x4B99, 0x4B9A, 0x4B9B, 0x4B9C, 0x4B9D, 0x4B9E, 0x4B9F, 0x4BA0, + 0x4BA1, 0x4BA2, 0x4BA3, 0x4BA4, 0x4BA5, 0x4BA6, 0x4BA7, 0x4BA8, 0x4BA9, 0x4BAA, 0x4BAB, 0x4BAC, 0x4BAD, 0x4BAE, 0x4BAF, + 0x4BB0, 0x4BB1, 0x4BB2, 0x4BB3, 0x4BB4, 0x4BB5, 0x4BB6, 0x4BB7, 0x4BB8, 0x4BB9, 0x4BBA, 0x4BBB, 0x4BBC, 0x4BBD, 0x4BBE, + 0x4BBF, 0x4BC0, 0x4BC1, 0x4BC2, 0x4BC3, 0x4BC4, 0x4BC5, 0x4BC6, 0x4BC7, 0x4BC8, 0x4BC9, 0x4BCA, 0x4BCB, 0x4BCC, 0x4BCD, + 0x4BCE, 0x4BCF, 0x4BD0, 0x4BD1, 0x4BD2, 0x4BD3, 0x4BD4, 0x4BD5, 0x4BD6, 0x4BD7, 0x4BD8, 0x4BD9, 0x4BDA, 0x4BDB, 0x4BDC, + 0x4BDD, 0x4BDE, 0x4BDF, 0x4BE0, 0x4BE1, 0x4BE2, 0x4BE3, 0x4BE4, 0x4BE5, 0x4BE6, 0x4BE7, 0x4BE8, 0x4BE9, 0x4BEA, 0x4BEB, + 0x4BEC, 0x4BED, 0x4BEE, 0x4BEF, 0x4BF0, 0x4BF1, 0x4BF2, 0x4BF3, 0x4BF4, 0x4BF5, 0x4BF6, 0x4BF7, 0x4BF8, 0x4BF9, 0x4BFA, + 0x4BFB, 0x4BFC, 0x4BFD, 0x4BFE, 0x4BFF, 0x4C00, 0x4C01, 0x4C02, 0x4C03, 0x4C04, 0x4C05, 0x4C06, 0x4C07, 0x4C08, 0x4C09, + 0x4C0A, 0x4C0B, 0x4C0C, 0x4C0D, 0x4C0E, 0x4C0F, 0x4C10, 0x4C11, 0x4C12, 0x4C13, 0x4C14, 0x4C15, 0x4C16, 0x4C17, 0x4C1A, + 0x4C1B, 0x4C1C, 0x4C1D, 0x4C1E, 0x4C1F, 0x4C20, 0x4C21, 0x4C22, 0x4C23, 0x4C24, 0x4C25, 0x4C26, 0x4C27, 0x4C28, 0x4C29, + 0x4C2A, 0x4C2B, 0x4C2C, 0x4C2D, 0x4C2E, 0x4C2F, 0x4C30, 0x4C31, 0x4C32, 0x4C33, 0x4C34, 0x4C35, 0x4C36, 0x4C37, 0x4C38, + 0x4C39, 0x4C3A, 0x4C3B, 0x4C3C, 0x4C3D, 0x4C3E, 0x4C3F, 0x4C40, 0x4C41, 0x4C42, 0x4C43, 0x4C44, 0x4C45, 0x4C46, 0x4C47, + 0x4C48, 0x4C49, 0x4C4A, 0x4C4B, 0x4C4C, 0x4C4D, 0x4C4E, 0x4C4F, 0x4C50, 0x4C51, 0x4C52, 0x4C53, 0x4C54, 0x4C55, 0x4C56, + 0x4C57, 0x4C58, 0x4C59, 0x4C5A, 0x4C5B, 0x4C5C, 0x4C5D, 0x4C5E, 0x4C5F, 0x4C60, 0x4C61, 0x4C62, 0x4C63, 0x4C64, 0x4C65, + 0x4C66, 0x4C67, 0x4C68, 0x4C69, 0x4C6A, 0x4C6B, 0x4C6C, 0x4C6D, 0x4C6E, 0x4C6F, 0x4C70, 0x4C71, 0x4C72, 0x4C73, 0x4C74, + 0x4C75, 0x4C76, 0x4C77, 0x4C78, 0x4C79, 0x4C7A, 0x4C7B, 0x4C7C, 0x4C7D, 0x4C7E, 0x4C7F, 0x4C80, 0x4C81, 0x4C82, 0x4C83, + 0x4C84, 0x4C85, 0x4C86, 0x4C87, 0x4C88, 0x4C89, 0x4C8A, 0x4C18, 0x4C19, 0x4C8B, 0x4C8C, 0x4C8D, 0x4C8E, 0x4C8F, 0x4C90, + 0x4C91, 0x4C92, 0x4C93, 0x4C94, 0x4C95, 0x4C96, 0x4C97, 0x4C98, 0x4C99, 0x4C9A, 0x4C9B, 0x4C9C, 0x4C9D, 0x4C9E, 0x4C9F, + 0x4CA0, 0x4CA1, 0x4CA2, 0x4CA3, 0x4CA4, 0x4CA5, 0x4CA6, 0x4CA7, 0x4CA8, 0x4CA9, 0x4CAA, 0x4CAB, 0x4CAC, 0x4CAD, 0x4CAE, + 0x4CAF, 0x4CB0, 0x4CB1, 0x4CB2, 0x4CB3, 0x4CB4, 0x4CB5, 0x4CB6, 0x4CB7, 0x4CB8, 0x4CB9, 0x4CBA, 0x4CBB, 0x4CBC, 0x4CBD, + 0x4CBE, 0x4CBF, 0x4CC0, 0x4CC1, 0x4CC2, 0x4CC3, 0x4CC4, 0x4CC5, 0x4CC6, 0x4CC7, 0x4CC8, 0x4CC9, 0x4CCA, 0x4CCB, 0x4CCC, + 0x4CCD, 0x4CCE, 0x4CCF, 0x4CD0, 0x4CD1, 0x4CD2, 0x4CD3, 0x4CD4, 0x4CD5, 0x4CD6, 0x4CD7, 0x4CD8, 0x4CD9, 0x4CDA, 0x4CDB, + 0x4CDC, 0x4CDD, 0x4CDE, 0x4CDF, 0x4CE0, 0x4CE1, 0x4CE2, 0x4CE3, 0x4CE4, 0x4CE5, 0x4CE6, 0x4CE7, 0x4CE8, 0x4CE9, 0x4CEA, + 0x4CEB, 0x4CEC, 0x4CED, 0x4CEE, 0x4CEF, 0x4CF0, 0x4CF1, 0x4CF2, 0x4CF3, 0x4CF4, 0x4CF5, 0x4CF6, 0x4CF7, 0x4CF8, 0x4CF9, + 0x4CFA, 0x4CFB, 0x4CFC, 0x4CFD, 0x4CFE, 0x4CFF, 0x4D00, 0x4D01, 0x4D02, 0x4D03, 0x4D04, 0x4D05, 0x4D06, 0x4D07, 0x4D08, + 0x4D09, 0x4D0A, 0x4D0B, 0x4D0C, 0x4D0D, 0x4D0E, 0x4D0F, 0x4D10, 0x4D11, 0x4D12, 0x4D13, 0x4D14, 0x4D15, 0x4D16, 0x4D17, + 0x4D18, 0x4D19, 0x4D1A, 0x4D1B, 0x4D1C, 0x4D1D, 0x4D1E, 0x4D1F, 0x4D20, 0x4D21, 0x4D22, 0x4D23, 0x4D24, 0x4D25, 0x4D26, + 0x4D27, 0x4D28, 0x4D29, 0x4D2A, 0x4D2B, 0x4D2C, 0x4D2D, 0x4D2E, 0x4D2F, 0x4D30, 0x4D31, 0x4D32, 0x4D33, 0x4D34, 0x4D35, + 0x4D36, 0x4D37, 0x4D38, 0x4D39, 0x4D3A, 0x4D3B, 0x4D3C, 0x4D3D, 0x4D3E, 0x4D3F, 0x4D40, 0x4D41, 0x4D42, 0x4D43, 0x4D44, + 0x4D45, 0x4D46, 0x4D47, 0x4D48, 0x4D49, 0x4D4A, 0x4D4B, 0x4D4C, 0x4D4D, 0x4D4E, 0xA39AFBC2, 0xA39BFBC2, 0xA39CFBC2, 0xA39DFBC2, 0xA39EFBC2, + 0xA39FFBC2, 0xA3A0FBC2, 0xA3A1FBC2, 0xA3A2FBC2, 0xA3A3FBC2, 0xA3A4FBC2, 0xA3A5FBC2, 0xA3A6FBC2, 0xA3A7FBC2, 0xA3A8FBC2, 0xA3A9FBC2, 0xA3AAFBC2, 0xA3ABFBC2, 0xA3ACFBC2, 0xA3ADFBC2, + 0xA3AEFBC2, 0xA3AFFBC2, 0xA3B0FBC2, 0xA3B1FBC2, 0xA3B2FBC2, 0xA3B3FBC2, 0xA3B4FBC2, 0xA3B5FBC2, 0xA3B6FBC2, 0xA3B7FBC2, 0xA3B8FBC2, 0xA3B9FBC2, 0xA3BAFBC2, 0xA3BBFBC2, 0xA3BCFBC2, + 0xA3BDFBC2, 0xA3BEFBC2, 0xA3BFFBC2, 0xA3C0FBC2, 0xA3C1FBC2, 0xA3C2FBC2, 0xA3C3FBC2, 0xA3C4FBC2, 0xA3C5FBC2, 0xA3C6FBC2, 0xA3C7FBC2, 0xA3C8FBC2, 0xA3C9FBC2, 0xA3CAFBC2, 0xA3CBFBC2, + 0xA3CCFBC2, 0xA3CDFBC2, 0xA3CEFBC2, 0xA3CFFBC2, 0xA3D0FBC2, 0xA3D1FBC2, 0xA3D2FBC2, 0xA3D3FBC2, 0xA3D4FBC2, 0xA3D5FBC2, 0xA3D6FBC2, 0xA3D7FBC2, 0xA3D8FBC2, 0xA3D9FBC2, 0xA3DAFBC2, + 0xA3DBFBC2, 0xA3DCFBC2, 0xA3DDFBC2, 0xA3DEFBC2, 0xA3DFFBC2, 0xA3E0FBC2, 0xA3E1FBC2, 0xA3E2FBC2, 0xA3E3FBC2, 0xA3E4FBC2, 0xA3E5FBC2, 0xA3E6FBC2, 0xA3E7FBC2, 0xA3E8FBC2, 0xA3E9FBC2, + 0xA3EAFBC2, 0xA3EBFBC2, 0xA3ECFBC2, 0xA3EDFBC2, 0xA3EEFBC2, 0xA3EFFBC2, 0xA3F0FBC2, 0xA3F1FBC2, 0xA3F2FBC2, 0xA3F3FBC2, 0xA3F4FBC2, 0xA3F5FBC2, 0xA3F6FBC2, 0xA3F7FBC2, 0xA3F8FBC2, + 0xA3F9FBC2, 0xA3FAFBC2, 0xA3FBFBC2, 0xA3FCFBC2, 0xA3FDFBC2, 0xA3FEFBC2, 0xA3FFFBC2, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C3F, 0x1C40, 0x1C40, + 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3E, 0x1C3F, 0x1C40, 0x1C40, 0x1C41, 0x1C42, 0x1BDE, 0x1BDF, 0x1C3E, + 0x1C3F, 0x1C40, 0x1C40, 0x1C41, 0x1C42, 0x1C40, 0x1C40, 0x1C41, 0x1C41, 0x1C41, 0x1C41, 0x1C43, 0x1C44, 0x1C44, 0x1C44, + 0x1C45, 0x1C45, 0x1C46, 0x1C46, 0x1C46, 0x1C46, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, + 0x1C41, 0x1C42, 0x1C42, 0x1C3F, 0x1C40, 0x1C3E, 0x1C3F, 0x1BE0, 0x1BE1, 0x1BE2, 0x1BE3, 0x1BE4, 0x1BE5, 0x1BE6, 0x1BE7, + 0x1BE8, 0x1BE9, 0x1BEA, 0x1BEB, 0x1BEC, 0x1BED, 0x1BEE, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0xA46FFBC2, 0x300, + 0x301, 0x302, 0x303, 0x304, 0xA475FBC2, 0xA476FBC2, 0xA477FBC2, 0xA478FBC2, 0xA479FBC2, 0xA47AFBC2, 0xA47BFBC2, 0xA47CFBC2, 0xA47DFBC2, 0xA47EFBC2, 0xA47FFBC2, + 0x4D4F, 0x4D50, 0x4D51, 0x4D52, 0x4D53, 0x4D54, 0x4D55, 0x4D56, 0x4D57, 0x4D58, 0x4D59, 0x4D5A, 0x4D5B, 0x4D5C, 0x4D5D, + 0x4D5E, 0x4D5F, 0x4D60, 0x4D61, 0x4D62, 0x4D63, 0x4D64, 0x4D65, 0x4D66, 0x4D67, 0x4D68, 0x4D69, 0x4D6A, 0x4D6B, 0x4D6C, + 0x4D6D, 0x4D6E, 0x4D6F, 0x4D70, 0x4D71, 0x4D72, 0x4D73, 0x4D74, 0x4D75, 0x4D76, 0x4D77, 0x4D78, 0x4D79, 0x4D7A, 0x4D7B, + 0x4D7C, 0x4D7D, 0x4D7E, 0x4D7F, 0x4D80, 0x4D81, 0x4D82, 0x4D83, 0x4D84, 0x4D85, 0x4D86, 0x4D87, 0x4D88, 0x4D89, 0x4D8A, + 0x4D8B, 0x4D8C, 0x4D8D, 0x4D8E, 0x4D8F, 0x4D90, 0x4D91, 0x4D92, 0x4D93, 0x4D94, 0x4D95, 0x4D96, 0x4D97, 0x4D98, 0x4D99, + 0x4D9A, 0x4D9B, 0x4D9C, 0x4D9D, 0x4D9E, 0x4D9F, 0x4DA0, 0x4DA1, 0x4DA2, 0x4DA3, 0x4DA4, 0x4DA5, 0x4DA6, 0x4DA7, 0x4DA8, + 0x4DA9, 0x4DAA, 0x4DAB, 0x4DAC, 0x4DAD, 0x4DAE, 0x4DAF, 0x4DB0, 0x4DB1, 0x4DB2, 0x4DB3, 0x4DB4, 0x4DB5, 0x4DB6, 0x4DB7, + 0x4DB8, 0x4DB9, 0x4DBA, 0x4DBB, 0x4DBC, 0x4DBD, 0x4DBE, 0x4DBF, 0x4DC0, 0x4DC1, 0x4DC2, 0x4DC3, 0x4DC4, 0x4DC5, 0x4DC6, + 0x4DC7, 0x4DC8, 0x4DC9, 0x4DCA, 0x4DCB, 0x4DCC, 0x4DCD, 0x4DCE, 0x4DCF, 0x4DD0, 0x4DD1, 0x4DD2, 0x4DD3, 0x4DD4, 0x4DD5, + 0x4DD6, 0x4DD7, 0x4DD8, 0x4DD9, 0x4DDA, 0x4DDB, 0x4DDC, 0x4DDD, 0x4DDE, 0x4DDF, 0x4DE0, 0x4DE1, 0x4DE2, 0x4DE3, 0x4DE4, + 0x4DE5, 0x4DE6, 0x4DE7, 0x4DE8, 0x4DE9, 0x4DEA, 0x4DEB, 0x4DEC, 0x4DED, 0x4DEE, 0x4DEF, 0x4DF0, 0x4DF1, 0x4DF2, 0x4DF3, + 0x4DF4, 0x4DF5, 0x4DF6, 0x4DF7, 0x4DF8, 0x4DF9, 0x4DFA, 0x4DFB, 0x4DFC, 0x4DFD, 0x4DFE, 0x4DFF, 0x4E00, 0x4E01, 0x4E02, + 0x4E03, 0x4E04, 0x4E05, 0x4E06, 0x4E07, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x4E0C, 0x4E0D, 0x4E0E, 0x4E0F, 0x4E10, 0x4E11, + 0x4E12, 0xA544FBC2, 0xA545FBC2, 0xA546FBC2, 0xA547FBC2, 0xA548FBC2, 0xA549FBC2, 0xA54AFBC2, 0xA54BFBC2, 0xA54CFBC2, 0xA54DFBC2, 0xA54EFBC2, 0xA54FFBC2, 0xA550FBC2, 0xA551FBC2, + 0xA552FBC2, 0xA553FBC2, 0xA554FBC2, 0xA555FBC2, 0xA556FBC2, 0xA557FBC2, 0xA558FBC2, 0xA559FBC2, 0xA55AFBC2, 0xA55BFBC2, 0xA55CFBC2, 0xA55DFBC2, 0xA55EFBC2, 0xA55FFBC2, 0xA560FBC2, + 0xA561FBC2, 0xA562FBC2, 0xA563FBC2, 0xA564FBC2, 0xA565FBC2, 0xA566FBC2, 0xA567FBC2, 0xA568FBC2, 0xA569FBC2, 0xA56AFBC2, 0xA56BFBC2, 0xA56CFBC2, 0xA56DFBC2, 0xA56EFBC2, 0xA56FFBC2, + 0xA570FBC2, 0xA571FBC2, 0xA572FBC2, 0xA573FBC2, 0xA574FBC2, 0xA575FBC2, 0xA576FBC2, 0xA577FBC2, 0xA578FBC2, 0xA579FBC2, 0xA57AFBC2, 0xA57BFBC2, 0xA57CFBC2, 0xA57DFBC2, 0xA57EFBC2, + 0xA57FFBC2, 0xA580FBC2, 0xA581FBC2, 0xA582FBC2, 0xA583FBC2, 0xA584FBC2, 0xA585FBC2, 0xA586FBC2, 0xA587FBC2, 0xA588FBC2, 0xA589FBC2, 0xA58AFBC2, 0xA58BFBC2, 0xA58CFBC2, 0xA58DFBC2, + 0xA58EFBC2, 0xA58FFBC2, 0xA590FBC2, 0xA591FBC2, 0xA592FBC2, 0xA593FBC2, 0xA594FBC2, 0xA595FBC2, 0xA596FBC2, 0xA597FBC2, 0xA598FBC2, 0xA599FBC2, 0xA59AFBC2, 0xA59BFBC2, 0xA59CFBC2, + 0xA59DFBC2, 0xA59EFBC2, 0xA59FFBC2, 0xA5A0FBC2, 0xA5A1FBC2, 0xA5A2FBC2, 0xA5A3FBC2, 0xA5A4FBC2, 0xA5A5FBC2, 0xA5A6FBC2, 0xA5A7FBC2, 0xA5A8FBC2, 0xA5A9FBC2, 0xA5AAFBC2, 0xA5ABFBC2, + 0xA5ACFBC2, 0xA5ADFBC2, 0xA5AEFBC2, 0xA5AFFBC2, 0xA5B0FBC2, 0xA5B1FBC2, 0xA5B2FBC2, 0xA5B3FBC2, 0xA5B4FBC2, 0xA5B5FBC2, 0xA5B6FBC2, 0xA5B7FBC2, 0xA5B8FBC2, 0xA5B9FBC2, 0xA5BAFBC2, + 0xA5BBFBC2, 0xA5BCFBC2, 0xA5BDFBC2, 0xA5BEFBC2, 0xA5BFFBC2, 0xA5C0FBC2, 0xA5C1FBC2, 0xA5C2FBC2, 0xA5C3FBC2, 0xA5C4FBC2, 0xA5C5FBC2, 0xA5C6FBC2, 0xA5C7FBC2, 0xA5C8FBC2, 0xA5C9FBC2, + 0xA5CAFBC2, 0xA5CBFBC2, 0xA5CCFBC2, 0xA5CDFBC2, 0xA5CEFBC2, 0xA5CFFBC2, 0xA5D0FBC2, 0xA5D1FBC2, 0xA5D2FBC2, 0xA5D3FBC2, 0xA5D4FBC2, 0xA5D5FBC2, 0xA5D6FBC2, 0xA5D7FBC2, 0xA5D8FBC2, + 0xA5D9FBC2, 0xA5DAFBC2, 0xA5DBFBC2, 0xA5DCFBC2, 0xA5DDFBC2, 0xA5DEFBC2, 0xA5DFFBC2, 0xA5E0FBC2, 0xA5E1FBC2, 0xA5E2FBC2, 0xA5E3FBC2, 0xA5E4FBC2, 0xA5E5FBC2, 0xA5E6FBC2, 0xA5E7FBC2, + 0xA5E8FBC2, 0xA5E9FBC2, 0xA5EAFBC2, 0xA5EBFBC2, 0xA5ECFBC2, 0xA5EDFBC2, 0xA5EEFBC2, 0xA5EFFBC2, 0xA5F0FBC2, 0xA5F1FBC2, 0xA5F2FBC2, 0xA5F3FBC2, 0xA5F4FBC2, 0xA5F5FBC2, 0xA5F6FBC2, + 0xA5F7FBC2, 0xA5F8FBC2, 0xA5F9FBC2, 0xA5FAFBC2, 0xA5FBFBC2, 0xA5FCFBC2, 0xA5FDFBC2, 0xA5FEFBC2, 0xA5FFFBC2, 0xA600FBC2, 0xA601FBC2, 0xA602FBC2, 0xA603FBC2, 0xA604FBC2, 0xA605FBC2, + 0xA606FBC2, 0xA607FBC2, 0xA608FBC2, 0xA609FBC2, 0xA60AFBC2, 0xA60BFBC2, 0xA60CFBC2, 0xA60DFBC2, 0xA60EFBC2, 0xA60FFBC2, 0xA610FBC2, 0xA611FBC2, 0xA612FBC2, 0xA613FBC2, 0xA614FBC2, + 0xA615FBC2, 0xA616FBC2, 0xA617FBC2, 0xA618FBC2, 0xA619FBC2, 0xA61AFBC2, 0xA61BFBC2, 0xA61CFBC2, 0xA61DFBC2, 0xA61EFBC2, 0xA61FFBC2, 0xA620FBC2, 0xA621FBC2, 0xA622FBC2, 0xA623FBC2, + 0xA624FBC2, 0xA625FBC2, 0xA626FBC2, 0xA627FBC2, 0xA628FBC2, 0xA629FBC2, 0xA62AFBC2, 0xA62BFBC2, 0xA62CFBC2, 0xA62DFBC2, 0xA62EFBC2, 0xA62FFBC2, 0xA630FBC2, 0xA631FBC2, 0xA632FBC2, + 0xA633FBC2, 0xA634FBC2, 0xA635FBC2, 0xA636FBC2, 0xA637FBC2, 0xA638FBC2, 0xA639FBC2, 0xA63AFBC2, 0xA63BFBC2, 0xA63CFBC2, 0xA63DFBC2, 0xA63EFBC2, 0xA63FFBC2, 0xA640FBC2, 0xA641FBC2, + 0xA642FBC2, 0xA643FBC2, 0xA644FBC2, 0xA645FBC2, 0xA646FBC2, 0xA647FBC2, 0xA648FBC2, 0xA649FBC2, 0xA64AFBC2, 0xA64BFBC2, 0xA64CFBC2, 0xA64DFBC2, 0xA64EFBC2, 0xA64FFBC2, 0xA650FBC2, + 0xA651FBC2, 0xA652FBC2, 0xA653FBC2, 0xA654FBC2, 0xA655FBC2, 0xA656FBC2, 0xA657FBC2, 0xA658FBC2, 0xA659FBC2, 0xA65AFBC2, 0xA65BFBC2, 0xA65CFBC2, 0xA65DFBC2, 0xA65EFBC2, 0xA65FFBC2, + 0xA660FBC2, 0xA661FBC2, 0xA662FBC2, 0xA663FBC2, 0xA664FBC2, 0xA665FBC2, 0xA666FBC2, 0xA667FBC2, 0xA668FBC2, 0xA669FBC2, 0xA66AFBC2, 0xA66BFBC2, 0xA66CFBC2, 0xA66DFBC2, 0xA66EFBC2, + 0xA66FFBC2, 0xA670FBC2, 0xA671FBC2, 0xA672FBC2, 0xA673FBC2, 0xA674FBC2, 0xA675FBC2, 0xA676FBC2, 0xA677FBC2, 0xA678FBC2, 0xA679FBC2, 0xA67AFBC2, 0xA67BFBC2, 0xA67CFBC2, 0xA67DFBC2, + 0xA67EFBC2, 0xA67FFBC2, 0xA680FBC2, 0xA681FBC2, 0xA682FBC2, 0xA683FBC2, 0xA684FBC2, 0xA685FBC2, 0xA686FBC2, 0xA687FBC2, 0xA688FBC2, 0xA689FBC2, 0xA68AFBC2, 0xA68BFBC2, 0xA68CFBC2, + 0xA68DFBC2, 0xA68EFBC2, 0xA68FFBC2, 0xA690FBC2, 0xA691FBC2, 0xA692FBC2, 0xA693FBC2, 0xA694FBC2, 0xA695FBC2, 0xA696FBC2, 0xA697FBC2, 0xA698FBC2, 0xA699FBC2, 0xA69AFBC2, 0xA69BFBC2, + 0xA69CFBC2, 0xA69DFBC2, 0xA69EFBC2, 0xA69FFBC2, 0xA6A0FBC2, 0xA6A1FBC2, 0xA6A2FBC2, 0xA6A3FBC2, 0xA6A4FBC2, 0xA6A5FBC2, 0xA6A6FBC2, 0xA6A7FBC2, 0xA6A8FBC2, 0xA6A9FBC2, 0xA6AAFBC2, + 0xA6ABFBC2, 0xA6ACFBC2, 0xA6ADFBC2, 0xA6AEFBC2, 0xA6AFFBC2, 0xA6B0FBC2, 0xA6B1FBC2, 0xA6B2FBC2, 0xA6B3FBC2, 0xA6B4FBC2, 0xA6B5FBC2, 0xA6B6FBC2, 0xA6B7FBC2, 0xA6B8FBC2, 0xA6B9FBC2, + 0xA6BAFBC2, 0xA6BBFBC2, 0xA6BCFBC2, 0xA6BDFBC2, 0xA6BEFBC2, 0xA6BFFBC2, 0xA6C0FBC2, 0xA6C1FBC2, 0xA6C2FBC2, 0xA6C3FBC2, 0xA6C4FBC2, 0xA6C5FBC2, 0xA6C6FBC2, 0xA6C7FBC2, 0xA6C8FBC2, + 0xA6C9FBC2, 0xA6CAFBC2, 0xA6CBFBC2, 0xA6CCFBC2, 0xA6CDFBC2, 0xA6CEFBC2, 0xA6CFFBC2, 0xA6D0FBC2, 0xA6D1FBC2, 0xA6D2FBC2, 0xA6D3FBC2, 0xA6D4FBC2, 0xA6D5FBC2, 0xA6D6FBC2, 0xA6D7FBC2, + 0xA6D8FBC2, 0xA6D9FBC2, 0xA6DAFBC2, 0xA6DBFBC2, 0xA6DCFBC2, 0xA6DDFBC2, 0xA6DEFBC2, 0xA6DFFBC2, 0xA6E0FBC2, 0xA6E1FBC2, 0xA6E2FBC2, 0xA6E3FBC2, 0xA6E4FBC2, 0xA6E5FBC2, 0xA6E6FBC2, + 0xA6E7FBC2, 0xA6E8FBC2, 0xA6E9FBC2, 0xA6EAFBC2, 0xA6EBFBC2, 0xA6ECFBC2, 0xA6EDFBC2, 0xA6EEFBC2, 0xA6EFFBC2, 0xA6F0FBC2, 0xA6F1FBC2, 0xA6F2FBC2, 0xA6F3FBC2, 0xA6F4FBC2, 0xA6F5FBC2, + 0xA6F6FBC2, 0xA6F7FBC2, 0xA6F8FBC2, 0xA6F9FBC2, 0xA6FAFBC2, 0xA6FBFBC2, 0xA6FCFBC2, 0xA6FDFBC2, 0xA6FEFBC2, 0xA6FFFBC2, 0xA700FBC2, 0xA701FBC2, 0xA702FBC2, 0xA703FBC2, 0xA704FBC2, + 0xA705FBC2, 0xA706FBC2, 0xA707FBC2, 0xA708FBC2, 0xA709FBC2, 0xA70AFBC2, 0xA70BFBC2, 0xA70CFBC2, 0xA70DFBC2, 0xA70EFBC2, 0xA70FFBC2, 0xA710FBC2, 0xA711FBC2, 0xA712FBC2, 0xA713FBC2, + 0xA714FBC2, 0xA715FBC2, 0xA716FBC2, 0xA717FBC2, 0xA718FBC2, 0xA719FBC2, 0xA71AFBC2, 0xA71BFBC2, 0xA71CFBC2, 0xA71DFBC2, 0xA71EFBC2, 0xA71FFBC2, 0xA720FBC2, 0xA721FBC2, 0xA722FBC2, + 0xA723FBC2, 0xA724FBC2, 0xA725FBC2, 0xA726FBC2, 0xA727FBC2, 0xA728FBC2, 0xA729FBC2, 0xA72AFBC2, 0xA72BFBC2, 0xA72CFBC2, 0xA72DFBC2, 0xA72EFBC2, 0xA72FFBC2, 0xA730FBC2, 0xA731FBC2, + 0xA732FBC2, 0xA733FBC2, 0xA734FBC2, 0xA735FBC2, 0xA736FBC2, 0xA737FBC2, 0xA738FBC2, 0xA739FBC2, 0xA73AFBC2, 0xA73BFBC2, 0xA73CFBC2, 0xA73DFBC2, 0xA73EFBC2, 0xA73FFBC2, 0xA740FBC2, + 0xA741FBC2, 0xA742FBC2, 0xA743FBC2, 0xA744FBC2, 0xA745FBC2, 0xA746FBC2, 0xA747FBC2, 0xA748FBC2, 0xA749FBC2, 0xA74AFBC2, 0xA74BFBC2, 0xA74CFBC2, 0xA74DFBC2, 0xA74EFBC2, 0xA74FFBC2, + 0xA750FBC2, 0xA751FBC2, 0xA752FBC2, 0xA753FBC2, 0xA754FBC2, 0xA755FBC2, 0xA756FBC2, 0xA757FBC2, 0xA758FBC2, 0xA759FBC2, 0xA75AFBC2, 0xA75BFBC2, 0xA75CFBC2, 0xA75DFBC2, 0xA75EFBC2, + 0xA75FFBC2, 0xA760FBC2, 0xA761FBC2, 0xA762FBC2, 0xA763FBC2, 0xA764FBC2, 0xA765FBC2, 0xA766FBC2, 0xA767FBC2, 0xA768FBC2, 0xA769FBC2, 0xA76AFBC2, 0xA76BFBC2, 0xA76CFBC2, 0xA76DFBC2, + 0xA76EFBC2, 0xA76FFBC2, 0xA770FBC2, 0xA771FBC2, 0xA772FBC2, 0xA773FBC2, 0xA774FBC2, 0xA775FBC2, 0xA776FBC2, 0xA777FBC2, 0xA778FBC2, 0xA779FBC2, 0xA77AFBC2, 0xA77BFBC2, 0xA77CFBC2, + 0xA77DFBC2, 0xA77EFBC2, 0xA77FFBC2, 0xA780FBC2, 0xA781FBC2, 0xA782FBC2, 0xA783FBC2, 0xA784FBC2, 0xA785FBC2, 0xA786FBC2, 0xA787FBC2, 0xA788FBC2, 0xA789FBC2, 0xA78AFBC2, 0xA78BFBC2, + 0xA78CFBC2, 0xA78DFBC2, 0xA78EFBC2, 0xA78FFBC2, 0xA790FBC2, 0xA791FBC2, 0xA792FBC2, 0xA793FBC2, 0xA794FBC2, 0xA795FBC2, 0xA796FBC2, 0xA797FBC2, 0xA798FBC2, 0xA799FBC2, 0xA79AFBC2, + 0xA79BFBC2, 0xA79CFBC2, 0xA79DFBC2, 0xA79EFBC2, 0xA79FFBC2, 0xA7A0FBC2, 0xA7A1FBC2, 0xA7A2FBC2, 0xA7A3FBC2, 0xA7A4FBC2, 0xA7A5FBC2, 0xA7A6FBC2, 0xA7A7FBC2, 0xA7A8FBC2, 0xA7A9FBC2, + 0xA7AAFBC2, 0xA7ABFBC2, 0xA7ACFBC2, 0xA7ADFBC2, 0xA7AEFBC2, 0xA7AFFBC2, 0xA7B0FBC2, 0xA7B1FBC2, 0xA7B2FBC2, 0xA7B3FBC2, 0xA7B4FBC2, 0xA7B5FBC2, 0xA7B6FBC2, 0xA7B7FBC2, 0xA7B8FBC2, + 0xA7B9FBC2, 0xA7BAFBC2, 0xA7BBFBC2, 0xA7BCFBC2, 0xA7BDFBC2, 0xA7BEFBC2, 0xA7BFFBC2, 0xA7C0FBC2, 0xA7C1FBC2, 0xA7C2FBC2, 0xA7C3FBC2, 0xA7C4FBC2, 0xA7C5FBC2, 0xA7C6FBC2, 0xA7C7FBC2, + 0xA7C8FBC2, 0xA7C9FBC2, 0xA7CAFBC2, 0xA7CBFBC2, 0xA7CCFBC2, 0xA7CDFBC2, 0xA7CEFBC2, 0xA7CFFBC2, 0xA7D0FBC2, 0xA7D1FBC2, 0xA7D2FBC2, 0xA7D3FBC2, 0xA7D4FBC2, 0xA7D5FBC2, 0xA7D6FBC2, + 0xA7D7FBC2, 0xA7D8FBC2, 0xA7D9FBC2, 0xA7DAFBC2, 0xA7DBFBC2, 0xA7DCFBC2, 0xA7DDFBC2, 0xA7DEFBC2, 0xA7DFFBC2, 0xA7E0FBC2, 0xA7E1FBC2, 0xA7E2FBC2, 0xA7E3FBC2, 0xA7E4FBC2, 0xA7E5FBC2, + 0xA7E6FBC2, 0xA7E7FBC2, 0xA7E8FBC2, 0xA7E9FBC2, 0xA7EAFBC2, 0xA7EBFBC2, 0xA7ECFBC2, 0xA7EDFBC2, 0xA7EEFBC2, 0xA7EFFBC2, 0xA7F0FBC2, 0xA7F1FBC2, 0xA7F2FBC2, 0xA7F3FBC2, 0xA7F4FBC2, + 0xA7F5FBC2, 0xA7F6FBC2, 0xA7F7FBC2, 0xA7F8FBC2, 0xA7F9FBC2, 0xA7FAFBC2, 0xA7FBFBC2, 0xA7FCFBC2, 0xA7FDFBC2, 0xA7FEFBC2, 0xA7FFFBC2, 0xA800FBC2, 0xA801FBC2, 0xA802FBC2, 0xA803FBC2, + 0xA804FBC2, 0xA805FBC2, 0xA806FBC2, 0xA807FBC2, 0xA808FBC2, 0xA809FBC2, 0xA80AFBC2, 0xA80BFBC2, 0xA80CFBC2, 0xA80DFBC2, 0xA80EFBC2, 0xA80FFBC2, 0xA810FBC2, 0xA811FBC2, 0xA812FBC2, + 0xA813FBC2, 0xA814FBC2, 0xA815FBC2, 0xA816FBC2, 0xA817FBC2, 0xA818FBC2, 0xA819FBC2, 0xA81AFBC2, 0xA81BFBC2, 0xA81CFBC2, 0xA81DFBC2, 0xA81EFBC2, 0xA81FFBC2, 0xA820FBC2, 0xA821FBC2, + 0xA822FBC2, 0xA823FBC2, 0xA824FBC2, 0xA825FBC2, 0xA826FBC2, 0xA827FBC2, 0xA828FBC2, 0xA829FBC2, 0xA82AFBC2, 0xA82BFBC2, 0xA82CFBC2, 0xA82DFBC2, 0xA82EFBC2, 0xA82FFBC2, 0xA830FBC2, + 0xA831FBC2, 0xA832FBC2, 0xA833FBC2, 0xA834FBC2, 0xA835FBC2, 0xA836FBC2, 0xA837FBC2, 0xA838FBC2, 0xA839FBC2, 0xA83AFBC2, 0xA83BFBC2, 0xA83CFBC2, 0xA83DFBC2, 0xA83EFBC2, 0xA83FFBC2, + 0xA840FBC2, 0xA841FBC2, 0xA842FBC2, 0xA843FBC2, 0xA844FBC2, 0xA845FBC2, 0xA846FBC2, 0xA847FBC2, 0xA848FBC2, 0xA849FBC2, 0xA84AFBC2, 0xA84BFBC2, 0xA84CFBC2, 0xA84DFBC2, 0xA84EFBC2, + 0xA84FFBC2, 0xA850FBC2, 0xA851FBC2, 0xA852FBC2, 0xA853FBC2, 0xA854FBC2, 0xA855FBC2, 0xA856FBC2, 0xA857FBC2, 0xA858FBC2, 0xA859FBC2, 0xA85AFBC2, 0xA85BFBC2, 0xA85CFBC2, 0xA85DFBC2, + 0xA85EFBC2, 0xA85FFBC2, 0xA860FBC2, 0xA861FBC2, 0xA862FBC2, 0xA863FBC2, 0xA864FBC2, 0xA865FBC2, 0xA866FBC2, 0xA867FBC2, 0xA868FBC2, 0xA869FBC2, 0xA86AFBC2, 0xA86BFBC2, 0xA86CFBC2, + 0xA86DFBC2, 0xA86EFBC2, 0xA86FFBC2, 0xA870FBC2, 0xA871FBC2, 0xA872FBC2, 0xA873FBC2, 0xA874FBC2, 0xA875FBC2, 0xA876FBC2, 0xA877FBC2, 0xA878FBC2, 0xA879FBC2, 0xA87AFBC2, 0xA87BFBC2, + 0xA87CFBC2, 0xA87DFBC2, 0xA87EFBC2, 0xA87FFBC2, 0xA880FBC2, 0xA881FBC2, 0xA882FBC2, 0xA883FBC2, 0xA884FBC2, 0xA885FBC2, 0xA886FBC2, 0xA887FBC2, 0xA888FBC2, 0xA889FBC2, 0xA88AFBC2, + 0xA88BFBC2, 0xA88CFBC2, 0xA88DFBC2, 0xA88EFBC2, 0xA88FFBC2, 0xA890FBC2, 0xA891FBC2, 0xA892FBC2, 0xA893FBC2, 0xA894FBC2, 0xA895FBC2, 0xA896FBC2, 0xA897FBC2, 0xA898FBC2, 0xA899FBC2, + 0xA89AFBC2, 0xA89BFBC2, 0xA89CFBC2, 0xA89DFBC2, 0xA89EFBC2, 0xA89FFBC2, 0xA8A0FBC2, 0xA8A1FBC2, 0xA8A2FBC2, 0xA8A3FBC2, 0xA8A4FBC2, 0xA8A5FBC2, 0xA8A6FBC2, 0xA8A7FBC2, 0xA8A8FBC2, + 0xA8A9FBC2, 0xA8AAFBC2, 0xA8ABFBC2, 0xA8ACFBC2, 0xA8ADFBC2, 0xA8AEFBC2, 0xA8AFFBC2, 0xA8B0FBC2, 0xA8B1FBC2, 0xA8B2FBC2, 0xA8B3FBC2, 0xA8B4FBC2, 0xA8B5FBC2, 0xA8B6FBC2, 0xA8B7FBC2, + 0xA8B8FBC2, 0xA8B9FBC2, 0xA8BAFBC2, 0xA8BBFBC2, 0xA8BCFBC2, 0xA8BDFBC2, 0xA8BEFBC2, 0xA8BFFBC2, 0xA8C0FBC2, 0xA8C1FBC2, 0xA8C2FBC2, 0xA8C3FBC2, 0xA8C4FBC2, 0xA8C5FBC2, 0xA8C6FBC2, + 0xA8C7FBC2, 0xA8C8FBC2, 0xA8C9FBC2, 0xA8CAFBC2, 0xA8CBFBC2, 0xA8CCFBC2, 0xA8CDFBC2, 0xA8CEFBC2, 0xA8CFFBC2, 0xA8D0FBC2, 0xA8D1FBC2, 0xA8D2FBC2, 0xA8D3FBC2, 0xA8D4FBC2, 0xA8D5FBC2, + 0xA8D6FBC2, 0xA8D7FBC2, 0xA8D8FBC2, 0xA8D9FBC2, 0xA8DAFBC2, 0xA8DBFBC2, 0xA8DCFBC2, 0xA8DDFBC2, 0xA8DEFBC2, 0xA8DFFBC2, 0xA8E0FBC2, 0xA8E1FBC2, 0xA8E2FBC2, 0xA8E3FBC2, 0xA8E4FBC2, + 0xA8E5FBC2, 0xA8E6FBC2, 0xA8E7FBC2, 0xA8E8FBC2, 0xA8E9FBC2, 0xA8EAFBC2, 0xA8EBFBC2, 0xA8ECFBC2, 0xA8EDFBC2, 0xA8EEFBC2, 0xA8EFFBC2, 0xA8F0FBC2, 0xA8F1FBC2, 0xA8F2FBC2, 0xA8F3FBC2, + 0xA8F4FBC2, 0xA8F5FBC2, 0xA8F6FBC2, 0xA8F7FBC2, 0xA8F8FBC2, 0xA8F9FBC2, 0xA8FAFBC2, 0xA8FBFBC2, 0xA8FCFBC2, 0xA8FDFBC2, 0xA8FEFBC2, 0xA8FFFBC2, 0xA900FBC2, 0xA901FBC2, 0xA902FBC2, + 0xA903FBC2, 0xA904FBC2, 0xA905FBC2, 0xA906FBC2, 0xA907FBC2, 0xA908FBC2, 0xA909FBC2, 0xA90AFBC2, 0xA90BFBC2, 0xA90CFBC2, 0xA90DFBC2, 0xA90EFBC2, 0xA90FFBC2, 0xA910FBC2, 0xA911FBC2, + 0xA912FBC2, 0xA913FBC2, 0xA914FBC2, 0xA915FBC2, 0xA916FBC2, 0xA917FBC2, 0xA918FBC2, 0xA919FBC2, 0xA91AFBC2, 0xA91BFBC2, 0xA91CFBC2, 0xA91DFBC2, 0xA91EFBC2, 0xA91FFBC2, 0xA920FBC2, + 0xA921FBC2, 0xA922FBC2, 0xA923FBC2, 0xA924FBC2, 0xA925FBC2, 0xA926FBC2, 0xA927FBC2, 0xA928FBC2, 0xA929FBC2, 0xA92AFBC2, 0xA92BFBC2, 0xA92CFBC2, 0xA92DFBC2, 0xA92EFBC2, 0xA92FFBC2, + 0xA930FBC2, 0xA931FBC2, 0xA932FBC2, 0xA933FBC2, 0xA934FBC2, 0xA935FBC2, 0xA936FBC2, 0xA937FBC2, 0xA938FBC2, 0xA939FBC2, 0xA93AFBC2, 0xA93BFBC2, 0xA93CFBC2, 0xA93DFBC2, 0xA93EFBC2, + 0xA93FFBC2, 0xA940FBC2, 0xA941FBC2, 0xA942FBC2, 0xA943FBC2, 0xA944FBC2, 0xA945FBC2, 0xA946FBC2, 0xA947FBC2, 0xA948FBC2, 0xA949FBC2, 0xA94AFBC2, 0xA94BFBC2, 0xA94CFBC2, 0xA94DFBC2, + 0xA94EFBC2, 0xA94FFBC2, 0xA950FBC2, 0xA951FBC2, 0xA952FBC2, 0xA953FBC2, 0xA954FBC2, 0xA955FBC2, 0xA956FBC2, 0xA957FBC2, 0xA958FBC2, 0xA959FBC2, 0xA95AFBC2, 0xA95BFBC2, 0xA95CFBC2, + 0xA95DFBC2, 0xA95EFBC2, 0xA95FFBC2, 0xA960FBC2, 0xA961FBC2, 0xA962FBC2, 0xA963FBC2, 0xA964FBC2, 0xA965FBC2, 0xA966FBC2, 0xA967FBC2, 0xA968FBC2, 0xA969FBC2, 0xA96AFBC2, 0xA96BFBC2, + 0xA96CFBC2, 0xA96DFBC2, 0xA96EFBC2, 0xA96FFBC2, 0xA970FBC2, 0xA971FBC2, 0xA972FBC2, 0xA973FBC2, 0xA974FBC2, 0xA975FBC2, 0xA976FBC2, 0xA977FBC2, 0xA978FBC2, 0xA979FBC2, 0xA97AFBC2, + 0xA97BFBC2, 0xA97CFBC2, 0xA97DFBC2, 0xA97EFBC2, 0xA97FFBC2, 0xA980FBC2, 0xA981FBC2, 0xA982FBC2, 0xA983FBC2, 0xA984FBC2, 0xA985FBC2, 0xA986FBC2, 0xA987FBC2, 0xA988FBC2, 0xA989FBC2, + 0xA98AFBC2, 0xA98BFBC2, 0xA98CFBC2, 0xA98DFBC2, 0xA98EFBC2, 0xA98FFBC2, 0xA990FBC2, 0xA991FBC2, 0xA992FBC2, 0xA993FBC2, 0xA994FBC2, 0xA995FBC2, 0xA996FBC2, 0xA997FBC2, 0xA998FBC2, + 0xA999FBC2, 0xA99AFBC2, 0xA99BFBC2, 0xA99CFBC2, 0xA99DFBC2, 0xA99EFBC2, 0xA99FFBC2, 0xA9A0FBC2, 0xA9A1FBC2, 0xA9A2FBC2, 0xA9A3FBC2, 0xA9A4FBC2, 0xA9A5FBC2, 0xA9A6FBC2, 0xA9A7FBC2, + 0xA9A8FBC2, 0xA9A9FBC2, 0xA9AAFBC2, 0xA9ABFBC2, 0xA9ACFBC2, 0xA9ADFBC2, 0xA9AEFBC2, 0xA9AFFBC2, 0xA9B0FBC2, 0xA9B1FBC2, 0xA9B2FBC2, 0xA9B3FBC2, 0xA9B4FBC2, 0xA9B5FBC2, 0xA9B6FBC2, + 0xA9B7FBC2, 0xA9B8FBC2, 0xA9B9FBC2, 0xA9BAFBC2, 0xA9BBFBC2, 0xA9BCFBC2, 0xA9BDFBC2, 0xA9BEFBC2, 0xA9BFFBC2, 0xA9C0FBC2, 0xA9C1FBC2, 0xA9C2FBC2, 0xA9C3FBC2, 0xA9C4FBC2, 0xA9C5FBC2, + 0xA9C6FBC2, 0xA9C7FBC2, 0xA9C8FBC2, 0xA9C9FBC2, 0xA9CAFBC2, 0xA9CBFBC2, 0xA9CCFBC2, 0xA9CDFBC2, 0xA9CEFBC2, 0xA9CFFBC2, 0xA9D0FBC2, 0xA9D1FBC2, 0xA9D2FBC2, 0xA9D3FBC2, 0xA9D4FBC2, + 0xA9D5FBC2, 0xA9D6FBC2, 0xA9D7FBC2, 0xA9D8FBC2, 0xA9D9FBC2, 0xA9DAFBC2, 0xA9DBFBC2, 0xA9DCFBC2, 0xA9DDFBC2, 0xA9DEFBC2, 0xA9DFFBC2, 0xA9E0FBC2, 0xA9E1FBC2, 0xA9E2FBC2, 0xA9E3FBC2, + 0xA9E4FBC2, 0xA9E5FBC2, 0xA9E6FBC2, 0xA9E7FBC2, 0xA9E8FBC2, 0xA9E9FBC2, 0xA9EAFBC2, 0xA9EBFBC2, 0xA9ECFBC2, 0xA9EDFBC2, 0xA9EEFBC2, 0xA9EFFBC2, 0xA9F0FBC2, 0xA9F1FBC2, 0xA9F2FBC2, + 0xA9F3FBC2, 0xA9F4FBC2, 0xA9F5FBC2, 0xA9F6FBC2, 0xA9F7FBC2, 0xA9F8FBC2, 0xA9F9FBC2, 0xA9FAFBC2, 0xA9FBFBC2, 0xA9FCFBC2, 0xA9FDFBC2, 0xA9FEFBC2, 0xA9FFFBC2, 0xAA00FBC2, 0xAA01FBC2, + 0xAA02FBC2, 0xAA03FBC2, 0xAA04FBC2, 0xAA05FBC2, 0xAA06FBC2, 0xAA07FBC2, 0xAA08FBC2, 0xAA09FBC2, 0xAA0AFBC2, 0xAA0BFBC2, 0xAA0CFBC2, 0xAA0DFBC2, 0xAA0EFBC2, 0xAA0FFBC2, 0xAA10FBC2, + 0xAA11FBC2, 0xAA12FBC2, 0xAA13FBC2, 0xAA14FBC2, 0xAA15FBC2, 0xAA16FBC2, 0xAA17FBC2, 0xAA18FBC2, 0xAA19FBC2, 0xAA1AFBC2, 0xAA1BFBC2, 0xAA1CFBC2, 0xAA1DFBC2, 0xAA1EFBC2, 0xAA1FFBC2, + 0xAA20FBC2, 0xAA21FBC2, 0xAA22FBC2, 0xAA23FBC2, 0xAA24FBC2, 0xAA25FBC2, 0xAA26FBC2, 0xAA27FBC2, 0xAA28FBC2, 0xAA29FBC2, 0xAA2AFBC2, 0xAA2BFBC2, 0xAA2CFBC2, 0xAA2DFBC2, 0xAA2EFBC2, + 0xAA2FFBC2, 0xAA30FBC2, 0xAA31FBC2, 0xAA32FBC2, 0xAA33FBC2, 0xAA34FBC2, 0xAA35FBC2, 0xAA36FBC2, 0xAA37FBC2, 0xAA38FBC2, 0xAA39FBC2, 0xAA3AFBC2, 0xAA3BFBC2, 0xAA3CFBC2, 0xAA3DFBC2, + 0xAA3EFBC2, 0xAA3FFBC2, 0xAA40FBC2, 0xAA41FBC2, 0xAA42FBC2, 0xAA43FBC2, 0xAA44FBC2, 0xAA45FBC2, 0xAA46FBC2, 0xAA47FBC2, 0xAA48FBC2, 0xAA49FBC2, 0xAA4AFBC2, 0xAA4BFBC2, 0xAA4CFBC2, + 0xAA4DFBC2, 0xAA4EFBC2, 0xAA4FFBC2, 0xAA50FBC2, 0xAA51FBC2, 0xAA52FBC2, 0xAA53FBC2, 0xAA54FBC2, 0xAA55FBC2, 0xAA56FBC2, 0xAA57FBC2, 0xAA58FBC2, 0xAA59FBC2, 0xAA5AFBC2, 0xAA5BFBC2, + 0xAA5CFBC2, 0xAA5DFBC2, 0xAA5EFBC2, 0xAA5FFBC2, 0xAA60FBC2, 0xAA61FBC2, 0xAA62FBC2, 0xAA63FBC2, 0xAA64FBC2, 0xAA65FBC2, 0xAA66FBC2, 0xAA67FBC2, 0xAA68FBC2, 0xAA69FBC2, 0xAA6AFBC2, + 0xAA6BFBC2, 0xAA6CFBC2, 0xAA6DFBC2, 0xAA6EFBC2, 0xAA6FFBC2, 0xAA70FBC2, 0xAA71FBC2, 0xAA72FBC2, 0xAA73FBC2, 0xAA74FBC2, 0xAA75FBC2, 0xAA76FBC2, 0xAA77FBC2, 0xAA78FBC2, 0xAA79FBC2, + 0xAA7AFBC2, 0xAA7BFBC2, 0xAA7CFBC2, 0xAA7DFBC2, 0xAA7EFBC2, 0xAA7FFBC2, 0xAA80FBC2, 0xAA81FBC2, 0xAA82FBC2, 0xAA83FBC2, 0xAA84FBC2, 0xAA85FBC2, 0xAA86FBC2, 0xAA87FBC2, 0xAA88FBC2, + 0xAA89FBC2, 0xAA8AFBC2, 0xAA8BFBC2, 0xAA8CFBC2, 0xAA8DFBC2, 0xAA8EFBC2, 0xAA8FFBC2, 0xAA90FBC2, 0xAA91FBC2, 0xAA92FBC2, 0xAA93FBC2, 0xAA94FBC2, 0xAA95FBC2, 0xAA96FBC2, 0xAA97FBC2, + 0xAA98FBC2, 0xAA99FBC2, 0xAA9AFBC2, 0xAA9BFBC2, 0xAA9CFBC2, 0xAA9DFBC2, 0xAA9EFBC2, 0xAA9FFBC2, 0xAAA0FBC2, 0xAAA1FBC2, 0xAAA2FBC2, 0xAAA3FBC2, 0xAAA4FBC2, 0xAAA5FBC2, 0xAAA6FBC2, + 0xAAA7FBC2, 0xAAA8FBC2, 0xAAA9FBC2, 0xAAAAFBC2, 0xAAABFBC2, 0xAAACFBC2, 0xAAADFBC2, 0xAAAEFBC2, 0xAAAFFBC2, 0xAAB0FBC2, 0xAAB1FBC2, 0xAAB2FBC2, 0xAAB3FBC2, 0xAAB4FBC2, 0xAAB5FBC2, + 0xAAB6FBC2, 0xAAB7FBC2, 0xAAB8FBC2, 0xAAB9FBC2, 0xAABAFBC2, 0xAABBFBC2, 0xAABCFBC2, 0xAABDFBC2, 0xAABEFBC2, 0xAABFFBC2, 0xAAC0FBC2, 0xAAC1FBC2, 0xAAC2FBC2, 0xAAC3FBC2, 0xAAC4FBC2, + 0xAAC5FBC2, 0xAAC6FBC2, 0xAAC7FBC2, 0xAAC8FBC2, 0xAAC9FBC2, 0xAACAFBC2, 0xAACBFBC2, 0xAACCFBC2, 0xAACDFBC2, 0xAACEFBC2, 0xAACFFBC2, 0xAAD0FBC2, 0xAAD1FBC2, 0xAAD2FBC2, 0xAAD3FBC2, + 0xAAD4FBC2, 0xAAD5FBC2, 0xAAD6FBC2, 0xAAD7FBC2, 0xAAD8FBC2, 0xAAD9FBC2, 0xAADAFBC2, 0xAADBFBC2, 0xAADCFBC2, 0xAADDFBC2, 0xAADEFBC2, 0xAADFFBC2, 0xAAE0FBC2, 0xAAE1FBC2, 0xAAE2FBC2, + 0xAAE3FBC2, 0xAAE4FBC2, 0xAAE5FBC2, 0xAAE6FBC2, 0xAAE7FBC2, 0xAAE8FBC2, 0xAAE9FBC2, 0xAAEAFBC2, 0xAAEBFBC2, 0xAAECFBC2, 0xAAEDFBC2, 0xAAEEFBC2, 0xAAEFFBC2, 0xAAF0FBC2, 0xAAF1FBC2, + 0xAAF2FBC2, 0xAAF3FBC2, 0xAAF4FBC2, 0xAAF5FBC2, 0xAAF6FBC2, 0xAAF7FBC2, 0xAAF8FBC2, 0xAAF9FBC2, 0xAAFAFBC2, 0xAAFBFBC2, 0xAAFCFBC2, 0xAAFDFBC2, 0xAAFEFBC2, 0xAAFFFBC2, 0xAB00FBC2, + 0xAB01FBC2, 0xAB02FBC2, 0xAB03FBC2, 0xAB04FBC2, 0xAB05FBC2, 0xAB06FBC2, 0xAB07FBC2, 0xAB08FBC2, 0xAB09FBC2, 0xAB0AFBC2, 0xAB0BFBC2, 0xAB0CFBC2, 0xAB0DFBC2, 0xAB0EFBC2, 0xAB0FFBC2, + 0xAB10FBC2, 0xAB11FBC2, 0xAB12FBC2, 0xAB13FBC2, 0xAB14FBC2, 0xAB15FBC2, 0xAB16FBC2, 0xAB17FBC2, 0xAB18FBC2, 0xAB19FBC2, 0xAB1AFBC2, 0xAB1BFBC2, 0xAB1CFBC2, 0xAB1DFBC2, 0xAB1EFBC2, + 0xAB1FFBC2, 0xAB20FBC2, 0xAB21FBC2, 0xAB22FBC2, 0xAB23FBC2, 0xAB24FBC2, 0xAB25FBC2, 0xAB26FBC2, 0xAB27FBC2, 0xAB28FBC2, 0xAB29FBC2, 0xAB2AFBC2, 0xAB2BFBC2, 0xAB2CFBC2, 0xAB2DFBC2, + 0xAB2EFBC2, 0xAB2FFBC2, 0xAB30FBC2, 0xAB31FBC2, 0xAB32FBC2, 0xAB33FBC2, 0xAB34FBC2, 0xAB35FBC2, 0xAB36FBC2, 0xAB37FBC2, 0xAB38FBC2, 0xAB39FBC2, 0xAB3AFBC2, 0xAB3BFBC2, 0xAB3CFBC2, + 0xAB3DFBC2, 0xAB3EFBC2, 0xAB3FFBC2, 0xAB40FBC2, 0xAB41FBC2, 0xAB42FBC2, 0xAB43FBC2, 0xAB44FBC2, 0xAB45FBC2, 0xAB46FBC2, 0xAB47FBC2, 0xAB48FBC2, 0xAB49FBC2, 0xAB4AFBC2, 0xAB4BFBC2, + 0xAB4CFBC2, 0xAB4DFBC2, 0xAB4EFBC2, 0xAB4FFBC2, 0xAB50FBC2, 0xAB51FBC2, 0xAB52FBC2, 0xAB53FBC2, 0xAB54FBC2, 0xAB55FBC2, 0xAB56FBC2, 0xAB57FBC2, 0xAB58FBC2, 0xAB59FBC2, 0xAB5AFBC2, + 0xAB5BFBC2, 0xAB5CFBC2, 0xAB5DFBC2, 0xAB5EFBC2, 0xAB5FFBC2, 0xAB60FBC2, 0xAB61FBC2, 0xAB62FBC2, 0xAB63FBC2, 0xAB64FBC2, 0xAB65FBC2, 0xAB66FBC2, 0xAB67FBC2, 0xAB68FBC2, 0xAB69FBC2, + 0xAB6AFBC2, 0xAB6BFBC2, 0xAB6CFBC2, 0xAB6DFBC2, 0xAB6EFBC2, 0xAB6FFBC2, 0xAB70FBC2, 0xAB71FBC2, 0xAB72FBC2, 0xAB73FBC2, 0xAB74FBC2, 0xAB75FBC2, 0xAB76FBC2, 0xAB77FBC2, 0xAB78FBC2, + 0xAB79FBC2, 0xAB7AFBC2, 0xAB7BFBC2, 0xAB7CFBC2, 0xAB7DFBC2, 0xAB7EFBC2, 0xAB7FFBC2, 0xAB80FBC2, 0xAB81FBC2, 0xAB82FBC2, 0xAB83FBC2, 0xAB84FBC2, 0xAB85FBC2, 0xAB86FBC2, 0xAB87FBC2, + 0xAB88FBC2, 0xAB89FBC2, 0xAB8AFBC2, 0xAB8BFBC2, 0xAB8CFBC2, 0xAB8DFBC2, 0xAB8EFBC2, 0xAB8FFBC2, 0xAB90FBC2, 0xAB91FBC2, 0xAB92FBC2, 0xAB93FBC2, 0xAB94FBC2, 0xAB95FBC2, 0xAB96FBC2, + 0xAB97FBC2, 0xAB98FBC2, 0xAB99FBC2, 0xAB9AFBC2, 0xAB9BFBC2, 0xAB9CFBC2, 0xAB9DFBC2, 0xAB9EFBC2, 0xAB9FFBC2, 0xABA0FBC2, 0xABA1FBC2, 0xABA2FBC2, 0xABA3FBC2, 0xABA4FBC2, 0xABA5FBC2, + 0xABA6FBC2, 0xABA7FBC2, 0xABA8FBC2, 0xABA9FBC2, 0xABAAFBC2, 0xABABFBC2, 0xABACFBC2, 0xABADFBC2, 0xABAEFBC2, 0xABAFFBC2, 0xABB0FBC2, 0xABB1FBC2, 0xABB2FBC2, 0xABB3FBC2, 0xABB4FBC2, + 0xABB5FBC2, 0xABB6FBC2, 0xABB7FBC2, 0xABB8FBC2, 0xABB9FBC2, 0xABBAFBC2, 0xABBBFBC2, 0xABBCFBC2, 0xABBDFBC2, 0xABBEFBC2, 0xABBFFBC2, 0xABC0FBC2, 0xABC1FBC2, 0xABC2FBC2, 0xABC3FBC2, + 0xABC4FBC2, 0xABC5FBC2, 0xABC6FBC2, 0xABC7FBC2, 0xABC8FBC2, 0xABC9FBC2, 0xABCAFBC2, 0xABCBFBC2, 0xABCCFBC2, 0xABCDFBC2, 0xABCEFBC2, 0xABCFFBC2, 0xABD0FBC2, 0xABD1FBC2, 0xABD2FBC2, + 0xABD3FBC2, 0xABD4FBC2, 0xABD5FBC2, 0xABD6FBC2, 0xABD7FBC2, 0xABD8FBC2, 0xABD9FBC2, 0xABDAFBC2, 0xABDBFBC2, 0xABDCFBC2, 0xABDDFBC2, 0xABDEFBC2, 0xABDFFBC2, 0xABE0FBC2, 0xABE1FBC2, + 0xABE2FBC2, 0xABE3FBC2, 0xABE4FBC2, 0xABE5FBC2, 0xABE6FBC2, 0xABE7FBC2, 0xABE8FBC2, 0xABE9FBC2, 0xABEAFBC2, 0xABEBFBC2, 0xABECFBC2, 0xABEDFBC2, 0xABEEFBC2, 0xABEFFBC2, 0xABF0FBC2, + 0xABF1FBC2, 0xABF2FBC2, 0xABF3FBC2, 0xABF4FBC2, 0xABF5FBC2, 0xABF6FBC2, 0xABF7FBC2, 0xABF8FBC2, 0xABF9FBC2, 0xABFAFBC2, 0xABFBFBC2, 0xABFCFBC2, 0xABFDFBC2, 0xABFEFBC2, 0xABFFFBC2, + 0xAC00FBC2, 0xAC01FBC2, 0xAC02FBC2, 0xAC03FBC2, 0xAC04FBC2, 0xAC05FBC2, 0xAC06FBC2, 0xAC07FBC2, 0xAC08FBC2, 0xAC09FBC2, 0xAC0AFBC2, 0xAC0BFBC2, 0xAC0CFBC2, 0xAC0DFBC2, 0xAC0EFBC2, + 0xAC0FFBC2, 0xAC10FBC2, 0xAC11FBC2, 0xAC12FBC2, 0xAC13FBC2, 0xAC14FBC2, 0xAC15FBC2, 0xAC16FBC2, 0xAC17FBC2, 0xAC18FBC2, 0xAC19FBC2, 0xAC1AFBC2, 0xAC1BFBC2, 0xAC1CFBC2, 0xAC1DFBC2, + 0xAC1EFBC2, 0xAC1FFBC2, 0xAC20FBC2, 0xAC21FBC2, 0xAC22FBC2, 0xAC23FBC2, 0xAC24FBC2, 0xAC25FBC2, 0xAC26FBC2, 0xAC27FBC2, 0xAC28FBC2, 0xAC29FBC2, 0xAC2AFBC2, 0xAC2BFBC2, 0xAC2CFBC2, + 0xAC2DFBC2, 0xAC2EFBC2, 0xAC2FFBC2, 0xAC30FBC2, 0xAC31FBC2, 0xAC32FBC2, 0xAC33FBC2, 0xAC34FBC2, 0xAC35FBC2, 0xAC36FBC2, 0xAC37FBC2, 0xAC38FBC2, 0xAC39FBC2, 0xAC3AFBC2, 0xAC3BFBC2, + 0xAC3CFBC2, 0xAC3DFBC2, 0xAC3EFBC2, 0xAC3FFBC2, 0xAC40FBC2, 0xAC41FBC2, 0xAC42FBC2, 0xAC43FBC2, 0xAC44FBC2, 0xAC45FBC2, 0xAC46FBC2, 0xAC47FBC2, 0xAC48FBC2, 0xAC49FBC2, 0xAC4AFBC2, + 0xAC4BFBC2, 0xAC4CFBC2, 0xAC4DFBC2, 0xAC4EFBC2, 0xAC4FFBC2, 0xAC50FBC2, 0xAC51FBC2, 0xAC52FBC2, 0xAC53FBC2, 0xAC54FBC2, 0xAC55FBC2, 0xAC56FBC2, 0xAC57FBC2, 0xAC58FBC2, 0xAC59FBC2, + 0xAC5AFBC2, 0xAC5BFBC2, 0xAC5CFBC2, 0xAC5DFBC2, 0xAC5EFBC2, 0xAC5FFBC2, 0xAC60FBC2, 0xAC61FBC2, 0xAC62FBC2, 0xAC63FBC2, 0xAC64FBC2, 0xAC65FBC2, 0xAC66FBC2, 0xAC67FBC2, 0xAC68FBC2, + 0xAC69FBC2, 0xAC6AFBC2, 0xAC6BFBC2, 0xAC6CFBC2, 0xAC6DFBC2, 0xAC6EFBC2, 0xAC6FFBC2, 0xAC70FBC2, 0xAC71FBC2, 0xAC72FBC2, 0xAC73FBC2, 0xAC74FBC2, 0xAC75FBC2, 0xAC76FBC2, 0xAC77FBC2, + 0xAC78FBC2, 0xAC79FBC2, 0xAC7AFBC2, 0xAC7BFBC2, 0xAC7CFBC2, 0xAC7DFBC2, 0xAC7EFBC2, 0xAC7FFBC2, 0xAC80FBC2, 0xAC81FBC2, 0xAC82FBC2, 0xAC83FBC2, 0xAC84FBC2, 0xAC85FBC2, 0xAC86FBC2, + 0xAC87FBC2, 0xAC88FBC2, 0xAC89FBC2, 0xAC8AFBC2, 0xAC8BFBC2, 0xAC8CFBC2, 0xAC8DFBC2, 0xAC8EFBC2, 0xAC8FFBC2, 0xAC90FBC2, 0xAC91FBC2, 0xAC92FBC2, 0xAC93FBC2, 0xAC94FBC2, 0xAC95FBC2, + 0xAC96FBC2, 0xAC97FBC2, 0xAC98FBC2, 0xAC99FBC2, 0xAC9AFBC2, 0xAC9BFBC2, 0xAC9CFBC2, 0xAC9DFBC2, 0xAC9EFBC2, 0xAC9FFBC2, 0xACA0FBC2, 0xACA1FBC2, 0xACA2FBC2, 0xACA3FBC2, 0xACA4FBC2, + 0xACA5FBC2, 0xACA6FBC2, 0xACA7FBC2, 0xACA8FBC2, 0xACA9FBC2, 0xACAAFBC2, 0xACABFBC2, 0xACACFBC2, 0xACADFBC2, 0xACAEFBC2, 0xACAFFBC2, 0xACB0FBC2, 0xACB1FBC2, 0xACB2FBC2, 0xACB3FBC2, + 0xACB4FBC2, 0xACB5FBC2, 0xACB6FBC2, 0xACB7FBC2, 0xACB8FBC2, 0xACB9FBC2, 0xACBAFBC2, 0xACBBFBC2, 0xACBCFBC2, 0xACBDFBC2, 0xACBEFBC2, 0xACBFFBC2, 0xACC0FBC2, 0xACC1FBC2, 0xACC2FBC2, + 0xACC3FBC2, 0xACC4FBC2, 0xACC5FBC2, 0xACC6FBC2, 0xACC7FBC2, 0xACC8FBC2, 0xACC9FBC2, 0xACCAFBC2, 0xACCBFBC2, 0xACCCFBC2, 0xACCDFBC2, 0xACCEFBC2, 0xACCFFBC2, 0xACD0FBC2, 0xACD1FBC2, + 0xACD2FBC2, 0xACD3FBC2, 0xACD4FBC2, 0xACD5FBC2, 0xACD6FBC2, 0xACD7FBC2, 0xACD8FBC2, 0xACD9FBC2, 0xACDAFBC2, 0xACDBFBC2, 0xACDCFBC2, 0xACDDFBC2, 0xACDEFBC2, 0xACDFFBC2, 0xACE0FBC2, + 0xACE1FBC2, 0xACE2FBC2, 0xACE3FBC2, 0xACE4FBC2, 0xACE5FBC2, 0xACE6FBC2, 0xACE7FBC2, 0xACE8FBC2, 0xACE9FBC2, 0xACEAFBC2, 0xACEBFBC2, 0xACECFBC2, 0xACEDFBC2, 0xACEEFBC2, 0xACEFFBC2, + 0xACF0FBC2, 0xACF1FBC2, 0xACF2FBC2, 0xACF3FBC2, 0xACF4FBC2, 0xACF5FBC2, 0xACF6FBC2, 0xACF7FBC2, 0xACF8FBC2, 0xACF9FBC2, 0xACFAFBC2, 0xACFBFBC2, 0xACFCFBC2, 0xACFDFBC2, 0xACFEFBC2, + 0xACFFFBC2, 0xAD00FBC2, 0xAD01FBC2, 0xAD02FBC2, 0xAD03FBC2, 0xAD04FBC2, 0xAD05FBC2, 0xAD06FBC2, 0xAD07FBC2, 0xAD08FBC2, 0xAD09FBC2, 0xAD0AFBC2, 0xAD0BFBC2, 0xAD0CFBC2, 0xAD0DFBC2, + 0xAD0EFBC2, 0xAD0FFBC2, 0xAD10FBC2, 0xAD11FBC2, 0xAD12FBC2, 0xAD13FBC2, 0xAD14FBC2, 0xAD15FBC2, 0xAD16FBC2, 0xAD17FBC2, 0xAD18FBC2, 0xAD19FBC2, 0xAD1AFBC2, 0xAD1BFBC2, 0xAD1CFBC2, + 0xAD1DFBC2, 0xAD1EFBC2, 0xAD1FFBC2, 0xAD20FBC2, 0xAD21FBC2, 0xAD22FBC2, 0xAD23FBC2, 0xAD24FBC2, 0xAD25FBC2, 0xAD26FBC2, 0xAD27FBC2, 0xAD28FBC2, 0xAD29FBC2, 0xAD2AFBC2, 0xAD2BFBC2, + 0xAD2CFBC2, 0xAD2DFBC2, 0xAD2EFBC2, 0xAD2FFBC2, 0xAD30FBC2, 0xAD31FBC2, 0xAD32FBC2, 0xAD33FBC2, 0xAD34FBC2, 0xAD35FBC2, 0xAD36FBC2, 0xAD37FBC2, 0xAD38FBC2, 0xAD39FBC2, 0xAD3AFBC2, + 0xAD3BFBC2, 0xAD3CFBC2, 0xAD3DFBC2, 0xAD3EFBC2, 0xAD3FFBC2, 0xAD40FBC2, 0xAD41FBC2, 0xAD42FBC2, 0xAD43FBC2, 0xAD44FBC2, 0xAD45FBC2, 0xAD46FBC2, 0xAD47FBC2, 0xAD48FBC2, 0xAD49FBC2, + 0xAD4AFBC2, 0xAD4BFBC2, 0xAD4CFBC2, 0xAD4DFBC2, 0xAD4EFBC2, 0xAD4FFBC2, 0xAD50FBC2, 0xAD51FBC2, 0xAD52FBC2, 0xAD53FBC2, 0xAD54FBC2, 0xAD55FBC2, 0xAD56FBC2, 0xAD57FBC2, 0xAD58FBC2, + 0xAD59FBC2, 0xAD5AFBC2, 0xAD5BFBC2, 0xAD5CFBC2, 0xAD5DFBC2, 0xAD5EFBC2, 0xAD5FFBC2, 0xAD60FBC2, 0xAD61FBC2, 0xAD62FBC2, 0xAD63FBC2, 0xAD64FBC2, 0xAD65FBC2, 0xAD66FBC2, 0xAD67FBC2, + 0xAD68FBC2, 0xAD69FBC2, 0xAD6AFBC2, 0xAD6BFBC2, 0xAD6CFBC2, 0xAD6DFBC2, 0xAD6EFBC2, 0xAD6FFBC2, 0xAD70FBC2, 0xAD71FBC2, 0xAD72FBC2, 0xAD73FBC2, 0xAD74FBC2, 0xAD75FBC2, 0xAD76FBC2, + 0xAD77FBC2, 0xAD78FBC2, 0xAD79FBC2, 0xAD7AFBC2, 0xAD7BFBC2, 0xAD7CFBC2, 0xAD7DFBC2, 0xAD7EFBC2, 0xAD7FFBC2, 0xAD80FBC2, 0xAD81FBC2, 0xAD82FBC2, 0xAD83FBC2, 0xAD84FBC2, 0xAD85FBC2, + 0xAD86FBC2, 0xAD87FBC2, 0xAD88FBC2, 0xAD89FBC2, 0xAD8AFBC2, 0xAD8BFBC2, 0xAD8CFBC2, 0xAD8DFBC2, 0xAD8EFBC2, 0xAD8FFBC2, 0xAD90FBC2, 0xAD91FBC2, 0xAD92FBC2, 0xAD93FBC2, 0xAD94FBC2, + 0xAD95FBC2, 0xAD96FBC2, 0xAD97FBC2, 0xAD98FBC2, 0xAD99FBC2, 0xAD9AFBC2, 0xAD9BFBC2, 0xAD9CFBC2, 0xAD9DFBC2, 0xAD9EFBC2, 0xAD9FFBC2, 0xADA0FBC2, 0xADA1FBC2, 0xADA2FBC2, 0xADA3FBC2, + 0xADA4FBC2, 0xADA5FBC2, 0xADA6FBC2, 0xADA7FBC2, 0xADA8FBC2, 0xADA9FBC2, 0xADAAFBC2, 0xADABFBC2, 0xADACFBC2, 0xADADFBC2, 0xADAEFBC2, 0xADAFFBC2, 0xADB0FBC2, 0xADB1FBC2, 0xADB2FBC2, + 0xADB3FBC2, 0xADB4FBC2, 0xADB5FBC2, 0xADB6FBC2, 0xADB7FBC2, 0xADB8FBC2, 0xADB9FBC2, 0xADBAFBC2, 0xADBBFBC2, 0xADBCFBC2, 0xADBDFBC2, 0xADBEFBC2, 0xADBFFBC2, 0xADC0FBC2, 0xADC1FBC2, + 0xADC2FBC2, 0xADC3FBC2, 0xADC4FBC2, 0xADC5FBC2, 0xADC6FBC2, 0xADC7FBC2, 0xADC8FBC2, 0xADC9FBC2, 0xADCAFBC2, 0xADCBFBC2, 0xADCCFBC2, 0xADCDFBC2, 0xADCEFBC2, 0xADCFFBC2, 0xADD0FBC2, + 0xADD1FBC2, 0xADD2FBC2, 0xADD3FBC2, 0xADD4FBC2, 0xADD5FBC2, 0xADD6FBC2, 0xADD7FBC2, 0xADD8FBC2, 0xADD9FBC2, 0xADDAFBC2, 0xADDBFBC2, 0xADDCFBC2, 0xADDDFBC2, 0xADDEFBC2, 0xADDFFBC2, + 0xADE0FBC2, 0xADE1FBC2, 0xADE2FBC2, 0xADE3FBC2, 0xADE4FBC2, 0xADE5FBC2, 0xADE6FBC2, 0xADE7FBC2, 0xADE8FBC2, 0xADE9FBC2, 0xADEAFBC2, 0xADEBFBC2, 0xADECFBC2, 0xADEDFBC2, 0xADEEFBC2, + 0xADEFFBC2, 0xADF0FBC2, 0xADF1FBC2, 0xADF2FBC2, 0xADF3FBC2, 0xADF4FBC2, 0xADF5FBC2, 0xADF6FBC2, 0xADF7FBC2, 0xADF8FBC2, 0xADF9FBC2, 0xADFAFBC2, 0xADFBFBC2, 0xADFCFBC2, 0xADFDFBC2, + 0xADFEFBC2, 0xADFFFBC2, 0xAE00FBC2, 0xAE01FBC2, 0xAE02FBC2, 0xAE03FBC2, 0xAE04FBC2, 0xAE05FBC2, 0xAE06FBC2, 0xAE07FBC2, 0xAE08FBC2, 0xAE09FBC2, 0xAE0AFBC2, 0xAE0BFBC2, 0xAE0CFBC2, + 0xAE0DFBC2, 0xAE0EFBC2, 0xAE0FFBC2, 0xAE10FBC2, 0xAE11FBC2, 0xAE12FBC2, 0xAE13FBC2, 0xAE14FBC2, 0xAE15FBC2, 0xAE16FBC2, 0xAE17FBC2, 0xAE18FBC2, 0xAE19FBC2, 0xAE1AFBC2, 0xAE1BFBC2, + 0xAE1CFBC2, 0xAE1DFBC2, 0xAE1EFBC2, 0xAE1FFBC2, 0xAE20FBC2, 0xAE21FBC2, 0xAE22FBC2, 0xAE23FBC2, 0xAE24FBC2, 0xAE25FBC2, 0xAE26FBC2, 0xAE27FBC2, 0xAE28FBC2, 0xAE29FBC2, 0xAE2AFBC2, + 0xAE2BFBC2, 0xAE2CFBC2, 0xAE2DFBC2, 0xAE2EFBC2, 0xAE2FFBC2, 0xAE30FBC2, 0xAE31FBC2, 0xAE32FBC2, 0xAE33FBC2, 0xAE34FBC2, 0xAE35FBC2, 0xAE36FBC2, 0xAE37FBC2, 0xAE38FBC2, 0xAE39FBC2, + 0xAE3AFBC2, 0xAE3BFBC2, 0xAE3CFBC2, 0xAE3DFBC2, 0xAE3EFBC2, 0xAE3FFBC2, 0xAE40FBC2, 0xAE41FBC2, 0xAE42FBC2, 0xAE43FBC2, 0xAE44FBC2, 0xAE45FBC2, 0xAE46FBC2, 0xAE47FBC2, 0xAE48FBC2, + 0xAE49FBC2, 0xAE4AFBC2, 0xAE4BFBC2, 0xAE4CFBC2, 0xAE4DFBC2, 0xAE4EFBC2, 0xAE4FFBC2, 0xAE50FBC2, 0xAE51FBC2, 0xAE52FBC2, 0xAE53FBC2, 0xAE54FBC2, 0xAE55FBC2, 0xAE56FBC2, 0xAE57FBC2, + 0xAE58FBC2, 0xAE59FBC2, 0xAE5AFBC2, 0xAE5BFBC2, 0xAE5CFBC2, 0xAE5DFBC2, 0xAE5EFBC2, 0xAE5FFBC2, 0xAE60FBC2, 0xAE61FBC2, 0xAE62FBC2, 0xAE63FBC2, 0xAE64FBC2, 0xAE65FBC2, 0xAE66FBC2, + 0xAE67FBC2, 0xAE68FBC2, 0xAE69FBC2, 0xAE6AFBC2, 0xAE6BFBC2, 0xAE6CFBC2, 0xAE6DFBC2, 0xAE6EFBC2, 0xAE6FFBC2, 0xAE70FBC2, 0xAE71FBC2, 0xAE72FBC2, 0xAE73FBC2, 0xAE74FBC2, 0xAE75FBC2, + 0xAE76FBC2, 0xAE77FBC2, 0xAE78FBC2, 0xAE79FBC2, 0xAE7AFBC2, 0xAE7BFBC2, 0xAE7CFBC2, 0xAE7DFBC2, 0xAE7EFBC2, 0xAE7FFBC2, 0xAE80FBC2, 0xAE81FBC2, 0xAE82FBC2, 0xAE83FBC2, 0xAE84FBC2, + 0xAE85FBC2, 0xAE86FBC2, 0xAE87FBC2, 0xAE88FBC2, 0xAE89FBC2, 0xAE8AFBC2, 0xAE8BFBC2, 0xAE8CFBC2, 0xAE8DFBC2, 0xAE8EFBC2, 0xAE8FFBC2, 0xAE90FBC2, 0xAE91FBC2, 0xAE92FBC2, 0xAE93FBC2, + 0xAE94FBC2, 0xAE95FBC2, 0xAE96FBC2, 0xAE97FBC2, 0xAE98FBC2, 0xAE99FBC2, 0xAE9AFBC2, 0xAE9BFBC2, 0xAE9CFBC2, 0xAE9DFBC2, 0xAE9EFBC2, 0xAE9FFBC2, 0xAEA0FBC2, 0xAEA1FBC2, 0xAEA2FBC2, + 0xAEA3FBC2, 0xAEA4FBC2, 0xAEA5FBC2, 0xAEA6FBC2, 0xAEA7FBC2, 0xAEA8FBC2, 0xAEA9FBC2, 0xAEAAFBC2, 0xAEABFBC2, 0xAEACFBC2, 0xAEADFBC2, 0xAEAEFBC2, 0xAEAFFBC2, 0xAEB0FBC2, 0xAEB1FBC2, + 0xAEB2FBC2, 0xAEB3FBC2, 0xAEB4FBC2, 0xAEB5FBC2, 0xAEB6FBC2, 0xAEB7FBC2, 0xAEB8FBC2, 0xAEB9FBC2, 0xAEBAFBC2, 0xAEBBFBC2, 0xAEBCFBC2, 0xAEBDFBC2, 0xAEBEFBC2, 0xAEBFFBC2, 0xAEC0FBC2, + 0xAEC1FBC2, 0xAEC2FBC2, 0xAEC3FBC2, 0xAEC4FBC2, 0xAEC5FBC2, 0xAEC6FBC2, 0xAEC7FBC2, 0xAEC8FBC2, 0xAEC9FBC2, 0xAECAFBC2, 0xAECBFBC2, 0xAECCFBC2, 0xAECDFBC2, 0xAECEFBC2, 0xAECFFBC2, + 0xAED0FBC2, 0xAED1FBC2, 0xAED2FBC2, 0xAED3FBC2, 0xAED4FBC2, 0xAED5FBC2, 0xAED6FBC2, 0xAED7FBC2, 0xAED8FBC2, 0xAED9FBC2, 0xAEDAFBC2, 0xAEDBFBC2, 0xAEDCFBC2, 0xAEDDFBC2, 0xAEDEFBC2, + 0xAEDFFBC2, 0xAEE0FBC2, 0xAEE1FBC2, 0xAEE2FBC2, 0xAEE3FBC2, 0xAEE4FBC2, 0xAEE5FBC2, 0xAEE6FBC2, 0xAEE7FBC2, 0xAEE8FBC2, 0xAEE9FBC2, 0xAEEAFBC2, 0xAEEBFBC2, 0xAEECFBC2, 0xAEEDFBC2, + 0xAEEEFBC2, 0xAEEFFBC2, 0xAEF0FBC2, 0xAEF1FBC2, 0xAEF2FBC2, 0xAEF3FBC2, 0xAEF4FBC2, 0xAEF5FBC2, 0xAEF6FBC2, 0xAEF7FBC2, 0xAEF8FBC2, 0xAEF9FBC2, 0xAEFAFBC2, 0xAEFBFBC2, 0xAEFCFBC2, + 0xAEFDFBC2, 0xAEFEFBC2, 0xAEFFFBC2, 0xAF00FBC2, 0xAF01FBC2, 0xAF02FBC2, 0xAF03FBC2, 0xAF04FBC2, 0xAF05FBC2, 0xAF06FBC2, 0xAF07FBC2, 0xAF08FBC2, 0xAF09FBC2, 0xAF0AFBC2, 0xAF0BFBC2, + 0xAF0CFBC2, 0xAF0DFBC2, 0xAF0EFBC2, 0xAF0FFBC2, 0xAF10FBC2, 0xAF11FBC2, 0xAF12FBC2, 0xAF13FBC2, 0xAF14FBC2, 0xAF15FBC2, 0xAF16FBC2, 0xAF17FBC2, 0xAF18FBC2, 0xAF19FBC2, 0xAF1AFBC2, + 0xAF1BFBC2, 0xAF1CFBC2, 0xAF1DFBC2, 0xAF1EFBC2, 0xAF1FFBC2, 0xAF20FBC2, 0xAF21FBC2, 0xAF22FBC2, 0xAF23FBC2, 0xAF24FBC2, 0xAF25FBC2, 0xAF26FBC2, 0xAF27FBC2, 0xAF28FBC2, 0xAF29FBC2, + 0xAF2AFBC2, 0xAF2BFBC2, 0xAF2CFBC2, 0xAF2DFBC2, 0xAF2EFBC2, 0xAF2FFBC2, 0xAF30FBC2, 0xAF31FBC2, 0xAF32FBC2, 0xAF33FBC2, 0xAF34FBC2, 0xAF35FBC2, 0xAF36FBC2, 0xAF37FBC2, 0xAF38FBC2, + 0xAF39FBC2, 0xAF3AFBC2, 0xAF3BFBC2, 0xAF3CFBC2, 0xAF3DFBC2, 0xAF3EFBC2, 0xAF3FFBC2, 0xAF40FBC2, 0xAF41FBC2, 0xAF42FBC2, 0xAF43FBC2, 0xAF44FBC2, 0xAF45FBC2, 0xAF46FBC2, 0xAF47FBC2, + 0xAF48FBC2, 0xAF49FBC2, 0xAF4AFBC2, 0xAF4BFBC2, 0xAF4CFBC2, 0xAF4DFBC2, 0xAF4EFBC2, 0xAF4FFBC2, 0xAF50FBC2, 0xAF51FBC2, 0xAF52FBC2, 0xAF53FBC2, 0xAF54FBC2, 0xAF55FBC2, 0xAF56FBC2, + 0xAF57FBC2, 0xAF58FBC2, 0xAF59FBC2, 0xAF5AFBC2, 0xAF5BFBC2, 0xAF5CFBC2, 0xAF5DFBC2, 0xAF5EFBC2, 0xAF5FFBC2, 0xAF60FBC2, 0xAF61FBC2, 0xAF62FBC2, 0xAF63FBC2, 0xAF64FBC2, 0xAF65FBC2, + 0xAF66FBC2, 0xAF67FBC2, 0xAF68FBC2, 0xAF69FBC2, 0xAF6AFBC2, 0xAF6BFBC2, 0xAF6CFBC2, 0xAF6DFBC2, 0xAF6EFBC2, 0xAF6FFBC2, 0xAF70FBC2, 0xAF71FBC2, 0xAF72FBC2, 0xAF73FBC2, 0xAF74FBC2, + 0xAF75FBC2, 0xAF76FBC2, 0xAF77FBC2, 0xAF78FBC2, 0xAF79FBC2, 0xAF7AFBC2, 0xAF7BFBC2, 0xAF7CFBC2, 0xAF7DFBC2, 0xAF7EFBC2, 0xAF7FFBC2, 0xAF80FBC2, 0xAF81FBC2, 0xAF82FBC2, 0xAF83FBC2, + 0xAF84FBC2, 0xAF85FBC2, 0xAF86FBC2, 0xAF87FBC2, 0xAF88FBC2, 0xAF89FBC2, 0xAF8AFBC2, 0xAF8BFBC2, 0xAF8CFBC2, 0xAF8DFBC2, 0xAF8EFBC2, 0xAF8FFBC2, 0xAF90FBC2, 0xAF91FBC2, 0xAF92FBC2, + 0xAF93FBC2, 0xAF94FBC2, 0xAF95FBC2, 0xAF96FBC2, 0xAF97FBC2, 0xAF98FBC2, 0xAF99FBC2, 0xAF9AFBC2, 0xAF9BFBC2, 0xAF9CFBC2, 0xAF9DFBC2, 0xAF9EFBC2, 0xAF9FFBC2, 0xAFA0FBC2, 0xAFA1FBC2, + 0xAFA2FBC2, 0xAFA3FBC2, 0xAFA4FBC2, 0xAFA5FBC2, 0xAFA6FBC2, 0xAFA7FBC2, 0xAFA8FBC2, 0xAFA9FBC2, 0xAFAAFBC2, 0xAFABFBC2, 0xAFACFBC2, 0xAFADFBC2, 0xAFAEFBC2, 0xAFAFFBC2, 0xAFB0FBC2, + 0xAFB1FBC2, 0xAFB2FBC2, 0xAFB3FBC2, 0xAFB4FBC2, 0xAFB5FBC2, 0xAFB6FBC2, 0xAFB7FBC2, 0xAFB8FBC2, 0xAFB9FBC2, 0xAFBAFBC2, 0xAFBBFBC2, 0xAFBCFBC2, 0xAFBDFBC2, 0xAFBEFBC2, 0xAFBFFBC2, + 0xAFC0FBC2, 0xAFC1FBC2, 0xAFC2FBC2, 0xAFC3FBC2, 0xAFC4FBC2, 0xAFC5FBC2, 0xAFC6FBC2, 0xAFC7FBC2, 0xAFC8FBC2, 0xAFC9FBC2, 0xAFCAFBC2, 0xAFCBFBC2, 0xAFCCFBC2, 0xAFCDFBC2, 0xAFCEFBC2, + 0xAFCFFBC2, 0xAFD0FBC2, 0xAFD1FBC2, 0xAFD2FBC2, 0xAFD3FBC2, 0xAFD4FBC2, 0xAFD5FBC2, 0xAFD6FBC2, 0xAFD7FBC2, 0xAFD8FBC2, 0xAFD9FBC2, 0xAFDAFBC2, 0xAFDBFBC2, 0xAFDCFBC2, 0xAFDDFBC2, + 0xAFDEFBC2, 0xAFDFFBC2, 0xAFE0FBC2, 0xAFE1FBC2, 0xAFE2FBC2, 0xAFE3FBC2, 0xAFE4FBC2, 0xAFE5FBC2, 0xAFE6FBC2, 0xAFE7FBC2, 0xAFE8FBC2, 0xAFE9FBC2, 0xAFEAFBC2, 0xAFEBFBC2, 0xAFECFBC2, + 0xAFEDFBC2, 0xAFEEFBC2, 0xAFEFFBC2, 0xAFF0FBC2, 0xAFF1FBC2, 0xAFF2FBC2, 0xAFF3FBC2, 0xAFF4FBC2, 0xAFF5FBC2, 0xAFF6FBC2, 0xAFF7FBC2, 0xAFF8FBC2, 0xAFF9FBC2, 0xAFFAFBC2, 0xAFFBFBC2, + 0xAFFCFBC2, 0xAFFDFBC2, 0xAFFEFBC2, 0xAFFFFBC2, 0x4E13, 0x4E14, 0x4E15, 0x4E16, 0x4E17, 0x4E18, 0x4E19, 0x4E1A, 0x4E1B, 0x4E1C, 0x4E1D, + 0x4E1E, 0x4E1F, 0x4E20, 0x4E21, 0x4E22, 0x4E23, 0x4E24, 0x4E25, 0x4E26, 0x4E27, 0x4E28, 0x4E29, 0x4E2A, 0x4E2B, 0x4E2C, + 0x4E2D, 0x4E2E, 0x4E2F, 0x4E30, 0x4E31, 0x4E32, 0x4E33, 0x4E34, 0x4E35, 0x4E36, 0x4E37, 0x4E38, 0x4E39, 0x4E3A, 0x4E3B, + 0x4E3C, 0x4E3D, 0x4E3E, 0x4E3F, 0x4E40, 0x4E41, 0x4E42, 0x4E43, 0x4E44, 0x4E45, 0x4E46, 0x4E47, 0x4E48, 0x4E49, 0x4E4A, + 0x4E4B, 0x4E4C, 0x4E4D, 0x4E4E, 0x4E4F, 0x4E50, 0x4E51, 0x4E52, 0x4E53, 0x4E54, 0x4E55, 0x4E56, 0x4E57, 0x4E58, 0x4E59, + 0x4E5A, 0x4E5B, 0x4E5C, 0x4E5D, 0x4E5E, 0x4E5F, 0x4E60, 0x4E61, 0x4E62, 0x4E63, 0x4E64, 0x4E65, 0x4E66, 0x4E67, 0x4E68, + 0x4E69, 0x4E6A, 0x4E6B, 0x4E6C, 0x4E6D, 0x4E6E, 0x4E6F, 0x4E70, 0x4E71, 0x4E72, 0x4E73, 0x4E74, 0x4E75, 0x4E76, 0x4E77, + 0x4E78, 0x4E79, 0x4E7A, 0x4E7B, 0x4E7C, 0x4E7D, 0x4E7E, 0x4E7F, 0x4E80, 0x4E81, 0x4E82, 0x4E83, 0x4E84, 0x4E85, 0x4E86, + 0x4E87, 0x4E88, 0x4E89, 0x4E8A, 0x4E8B, 0x4E8C, 0x4E8D, 0x4E8E, 0x4E8F, 0x4E90, 0x4E91, 0x4E92, 0x4E93, 0x4E94, 0x4E95, + 0x4E96, 0x4E97, 0x4E98, 0x4E99, 0x4E9A, 0x4E9B, 0x4E9C, 0x4E9D, 0x4E9E, 0x4E9F, 0x4EA0, 0x4EA1, 0x4EA2, 0x4EA3, 0x4EA4, + 0x4EA5, 0x4EA6, 0x4EA7, 0x4EA8, 0x4EA9, 0x4EAA, 0x4EAB, 0x4EAC, 0x4EAD, 0x4EAE, 0x4EAF, 0x4EB0, 0x4EB1, 0x4EB2, 0x4EB3, + 0x4EB4, 0x4EB5, 0x4EB6, 0x4EB7, 0x4EB8, 0x4EB9, 0x4EBA, 0x4EBB, 0x4EBC, 0x4EBD, 0x4EBE, 0x4EBF, 0x4EC0, 0x4EC1, 0x4EC2, + 0x4EC3, 0x4EC4, 0x4EC5, 0x4EC6, 0x4EC7, 0x4EC8, 0x4EC9, 0x4ECA, 0x4ECB, 0x4ECC, 0x4ECD, 0x4ECE, 0x4ECF, 0x4ED0, 0x4ED1, + 0x4ED2, 0x4ED3, 0x4ED4, 0x4ED5, 0x4ED6, 0x4ED7, 0x4ED8, 0x4ED9, 0x4EDA, 0x4EDB, 0x4EDC, 0x4EDD, 0x4EDE, 0x4EDF, 0x4EE0, + 0x4EE1, 0x4EE2, 0x4EE3, 0x4EE4, 0x4EE5, 0x4EE6, 0x4EE7, 0x4EE8, 0x4EE9, 0x4EEA, 0x4EEB, 0x4EEC, 0x4EED, 0x4EEE, 0x4EEF, + 0x4EF0, 0x4EF1, 0x4EF2, 0x4EF3, 0x4EF4, 0x4EF5, 0x4EF6, 0x4EF7, 0x4EF8, 0x4EF9, 0x4EFA, 0x4EFB, 0x4EFC, 0x4EFD, 0x4EFE, + 0x4EFF, 0x4F00, 0x4F01, 0x4F02, 0x4F03, 0x4F04, 0x4F05, 0x4F06, 0x4F07, 0x4F08, 0x4F09, 0x4F0A, 0x4F0B, 0x4F0C, 0x4F0D, + 0x4F0E, 0x4F0F, 0x4F10, 0x4F11, 0x4F12, 0x4F13, 0x4F14, 0x4F15, 0x4F16, 0x4F17, 0x4F18, 0x4F19, 0x4F1A, 0x4F1B, 0x4F1C, + 0x4F1D, 0x4F1E, 0x4F1F, 0x4F20, 0x4F21, 0x4F22, 0x4F23, 0x4F24, 0x4F25, 0x4F26, 0x4F27, 0x4F28, 0x4F29, 0x4F2A, 0x4F2B, + 0x4F2C, 0x4F2D, 0x4F2E, 0x4F2F, 0x4F30, 0x4F31, 0x4F32, 0x4F33, 0x4F34, 0x4F35, 0x4F36, 0x4F37, 0x4F38, 0x4F39, 0x4F3A, + 0x4F3B, 0x4F3C, 0x4F3D, 0x4F3E, 0x4F3F, 0x4F40, 0x4F41, 0x4F42, 0x4F43, 0x4F44, 0x4F45, 0x4F46, 0x4F47, 0x4F48, 0x4F49, + 0x4F4A, 0x4F4B, 0x4F4C, 0x4F4D, 0x4F4E, 0x4F4F, 0x4F50, 0x4F51, 0x4F52, 0x4F53, 0x4F54, 0x4F55, 0x4F56, 0x4F57, 0x4F58, + 0x4F59, 0x4F5A, 0x4F5B, 0x4F5C, 0x4F5D, 0x4F5E, 0x4F5F, 0x4F60, 0x4F61, 0x4F62, 0x4F63, 0x4F64, 0x4F65, 0x4F66, 0x4F67, + 0x4F68, 0x4F69, 0x4F6A, 0x4F6B, 0x4F6C, 0x4F6D, 0x4F6E, 0x4F6F, 0x4F70, 0x4F71, 0x4F72, 0x4F73, 0x4F74, 0x4F75, 0x4F76, + 0x4F77, 0x4F78, 0x4F79, 0x4F7A, 0x4F7B, 0x4F7C, 0x4F7D, 0x4F7E, 0x4F7F, 0x4F80, 0x4F81, 0x4F82, 0x4F83, 0x4F84, 0x4F85, + 0x4F86, 0x4F87, 0x4F88, 0x4F89, 0x4F8A, 0x4F8B, 0x4F8C, 0x4F8D, 0x4F8E, 0x4F8F, 0x4F90, 0x4F91, 0x4F92, 0x4F93, 0x4F94, + 0x4F95, 0x4F96, 0x4F97, 0x4F98, 0x4F99, 0x4F9A, 0x4F9B, 0x4F9C, 0x4F9D, 0x4F9E, 0x4F9F, 0x4FA0, 0x4FA1, 0x4FA2, 0x4FA3, + 0x4FA4, 0x4FA5, 0x4FA6, 0x4FA7, 0x4FA8, 0x4FA9, 0x4FAA, 0x4FAB, 0x4FAC, 0x4FAD, 0x4FAE, 0x4FAF, 0x4FB0, 0x4FB1, 0x4FB2, + 0x4FB3, 0x4FB4, 0x4FB5, 0x4FB6, 0x4FB7, 0x4FB8, 0x4FB9, 0x4FBA, 0x4FBB, 0x4FBC, 0x4FBD, 0x4FBE, 0x4FBF, 0x4FC0, 0x4FC1, + 0x4FC2, 0x4FC3, 0x4FC4, 0x4FC5, 0x4FC6, 0x4FC7, 0x4FC8, 0x4FC9, 0x4FCA, 0x4FCB, 0x4FCC, 0x4FCD, 0x4FCE, 0x4FCF, 0x4FD0, + 0x4FD1, 0x4FD2, 0x4FD3, 0x4FD4, 0x4FD5, 0x4FD6, 0x4FD7, 0x4FD8, 0x4FD9, 0x4FDA, 0x4FDB, 0x4FDC, 0x4FDD, 0x4FDE, 0x4FDF, + 0x4FE0, 0x4FE1, 0x4FE2, 0x4FE3, 0x4FE4, 0x4FE5, 0x4FE6, 0x4FE7, 0x4FE8, 0x4FE9, 0x4FEA, 0x4FEB, 0x4FEC, 0x4FED, 0x4FEE, + 0x4FEF, 0x4FF0, 0x4FF1, 0x4FF2, 0x4FF3, 0x4FF4, 0x4FF5, 0x4FF6, 0x4FF7, 0x4FF8, 0x4FF9, 0x4FFA, 0x4FFB, 0x4FFC, 0x4FFD, + 0x4FFE, 0x4FFF, 0x5000, 0x5001, 0x5002, 0x5003, 0x5004, 0x5005, 0x5006, 0x5007, 0x5008, 0x5009, 0x500A, 0x500B, 0x500C, + 0x500D, 0x500E, 0x500F, 0x5010, 0x5011, 0x5012, 0x5013, 0x5014, 0x5015, 0x5016, 0x5017, 0x5018, 0x5019, 0x501A, 0x501B, + 0x501C, 0x501D, 0x501E, 0x501F, 0x5020, 0x5021, 0x5022, 0x5023, 0x5024, 0x5025, 0x5026, 0x5027, 0x5028, 0x5029, 0x502A, + 0x502B, 0x502C, 0x502D, 0x502E, 0x502F, 0x5030, 0x5031, 0x5032, 0x5033, 0x5034, 0x5035, 0x5036, 0x5037, 0x5038, 0x5039, + 0x503A, 0x503B, 0x503C, 0x503D, 0x503E, 0x503F, 0x5040, 0x5041, 0x5042, 0x5043, 0x5044, 0x5045, 0x5046, 0x5047, 0x5048, + 0x5049, 0x504A, 0x504B, 0x504C, 0x504D, 0x504E, 0x504F, 0x5050, 0x5051, 0x5052, 0x5053, 0x5054, 0x5055, 0x5056, 0x5057, + 0x5058, 0x5059, 0x505A, 0x505B, 0x505C, 0x505D, 0x505E, 0x505F, 0x5060, 0x5061, 0x5062, 0x5063, 0x5064, 0x5065, 0x5066, + 0x5067, 0x5068, 0x5069, 0x506A, 0x506B, 0x506C, 0x506D, 0x506E, 0x506F, 0x5070, 0x5071, 0x5072, 0x5073, 0x5074, 0x5075, + 0x5076, 0x5077, 0x5078, 0x5079, 0x507A, 0x507B, 0x507C, 0x507D, 0x507E, 0x507F, 0x5080, 0x5081, 0x5082, 0x5083, 0x5084, + 0x5085, 0x5086, 0x5087, 0x5088, 0x5089, 0x508A, 0x508B, 0x508C, 0x508D, 0x508E, 0x508F, 0x5090, 0x5091, 0x5092, 0x5093, + 0x5094, 0x5095, 0x5096, 0x5097, 0x5098, 0x5099, 0x509A, 0x509B, 0x509C, 0x509D, 0x509E, 0x509F, 0x50A0, 0x50A1, 0x50A2, + 0x50A3, 0x50A4, 0x50A5, 0x50A6, 0x50A7, 0x50A8, 0x50A9, 0x50AA, 0x50AB, 0x50AC, 0x50AD, 0x50AE, 0x50AF, 0x50B0, 0x50B1, + 0x50B2, 0x50B3, 0x50B4, 0x50B5, 0x50B6, 0x50B7, 0x50B8, 0x50B9, 0x50BA, 0x50BB, 0x50BC, 0x50BD, 0x50BE, 0x50BF, 0x50C0, + 0x50C1, 0x50C2, 0x50C3, 0x50C4, 0x50C5, 0x50C6, 0x50C7, 0x50C8, 0x50C9, 0x50CA, 0x50CB, 0x50CC, 0x50CD, 0x50CE, 0x50CF, + 0x50D0, 0x50D1, 0x50D2, 0x50D3, 0x50D4, 0x50D5, 0x50D6, 0x50D7, 0x50D8, 0x50D9, 0x50DA, 0x50DB, 0x50DC, 0x50DD, 0x50DE, + 0x50DF, 0x50E0, 0x50E1, 0x50E2, 0x50E3, 0x50E4, 0x50E5, 0x50E6, 0x50E7, 0x50E8, 0x50E9, 0x50EA, 0x50EB, 0x50EC, 0x50ED, + 0x50EE, 0x50EF, 0x50F0, 0x50F1, 0x50F2, 0x50F3, 0x50F4, 0x50F5, 0x50F6, 0x50F7, 0x50F8, 0x50F9, 0x50FA, 0x50FB, 0x50FC, + 0x50FD, 0x50FE, 0x50FF, 0x5100, 0x5101, 0x5102, 0x5103, 0x5104, 0x5105, 0x5106, 0x5107, 0x5108, 0x5109, 0x510A, 0x510B, + 0x510C, 0x510D, 0x510E, 0x510F, 0x5110, 0x5111, 0x5112, 0x5113, 0x5114, 0x5115, 0x5116, 0x5117, 0x5118, 0x5119, 0x511A, + 0x511B, 0x511C, 0x511D, 0x511E, 0x511F, 0x5120, 0x5121, 0x5122, 0x5123, 0x5124, 0x5125, 0x5126, 0x5127, 0x5128, 0x5129, + 0x512A, 0x512B, 0x512C, 0x512D, 0x512E, 0x512F, 0x5130, 0x5131, 0x5132, 0x5133, 0x5134, 0x5135, 0x5136, 0x5137, 0x5138, + 0x5139, 0x513A, 0x513B, 0x513C, 0x513D, 0x513E, 0x513F, 0x5140, 0x5141, 0x5142, 0x5143, 0x5144, 0x5145, 0x5146, 0x5147, + 0x5148, 0x5149, 0x514A, 0x514B, 0x514C, 0x514D, 0x514E, 0x514F, 0x5150, 0x5151, 0x5152, 0x5153, 0x5154, 0x5155, 0x5156, + 0x5157, 0x5158, 0x5159, 0x515A, 0x515B, 0x515C, 0x515D, 0x515E, 0x515F, 0x5160, 0x5161, 0x5162, 0x5163, 0x5164, 0x5165, + 0x5166, 0x5167, 0x5168, 0x5169, 0x516A, 0x516B, 0x516C, 0x516D, 0x516E, 0x516F, 0x5170, 0x5171, 0x5172, 0x5173, 0x5174, + 0x5175, 0x5176, 0x5177, 0x5178, 0x5179, 0x517A, 0x517B, 0x517C, 0x517D, 0x517E, 0x517F, 0x5180, 0x5181, 0x5182, 0x5183, + 0x5184, 0x5185, 0x5186, 0x5187, 0x5188, 0x5189, 0x518A, 0x518B, 0x518C, 0x518D, 0x518E, 0x518F, 0x5190, 0x5191, 0x5192, + 0x5193, 0x5194, 0x5195, 0x5196, 0x5197, 0x5198, 0x5199, 0x519A, 0x519B, 0x519C, 0x519D, 0x519E, 0x519F, 0x51A0, 0x51A1, + 0x51A2, 0x51A3, 0x51A4, 0x51A5, 0x51A6, 0x51A7, 0x51A8, 0x51A9, 0x51AA, 0x51AB, 0x51AC, 0x51AD, 0x51AE, 0x51AF, 0x51B0, + 0x51B1, 0x51B2, 0x51B3, 0x51B4, 0x51B5, 0x51B6, 0x51B7, 0x51B8, 0x51B9, 0x51BA, 0x51BB, 0x51BC, 0x51BD, 0x51BE, 0x51BF, + 0x51C0, 0x51C1, 0x51C2, 0x51C3, 0x51C4, 0x51C5, 0x51C6, 0x51C7, 0x51C8, 0x51C9, 0x51CA, 0x51CB, 0x51CC, 0x51CD, 0x51CE, + 0x51CF, 0x51D0, 0x51D1, 0x51D2, 0x51D3, 0x51D4, 0x51D5, 0x51D6, 0x51D7, 0x51D8, 0x51D9, 0x51DA, 0x51DB, 0x51DC, 0x51DD, + 0x51DE, 0x51DF, 0x51E0, 0x51E1, 0x51E2, 0x51E3, 0x51E4, 0x51E5, 0x51E6, 0x51E7, 0x51E8, 0x51E9, 0x51EA, 0x51EB, 0x51EC, + 0x51ED, 0x51EE, 0x51EF, 0x51F0, 0x51F1, 0x51F2, 0x51F3, 0x51F4, 0x51F5, 0x51F6, 0x51F7, 0x51F8, 0x51F9, 0x51FA, 0x51FB, + 0x51FC, 0x51FD, 0x51FE, 0x51FF, 0x5200, 0x5201, 0x5202, 0x5203, 0x5204, 0x5205, 0x5206, 0x5207, 0x5208, 0x5209, 0x520A, + 0x520B, 0x520C, 0x520D, 0x520E, 0x520F, 0x5210, 0x5211, 0x5212, 0x5213, 0x5214, 0x5215, 0x5216, 0x5217, 0x5218, 0x5219, + 0x521A, 0x521B, 0x521C, 0x521D, 0x521E, 0x521F, 0x5220, 0x5221, 0x5222, 0x5223, 0x5224, 0x5225, 0x5226, 0x5227, 0x5228, + 0x5229, 0x522A, 0x522B, 0x522C, 0x522D, 0x522E, 0x522F, 0x5230, 0x5231, 0x5232, 0x5233, 0x5234, 0x5235, 0x5236, 0x5237, + 0x5238, 0x5239, 0x523A, 0x523B, 0x523C, 0x523D, 0x523E, 0x523F, 0x5240, 0x5241, 0xB42FFBC2, 0xB430FBC2, 0xB431FBC2, 0xB432FBC2, 0xB433FBC2, + 0xB434FBC2, 0xB435FBC2, 0xB436FBC2, 0xB437FBC2, 0xB438FBC2, 0xB439FBC2, 0xB43AFBC2, 0xB43BFBC2, 0xB43CFBC2, 0xB43DFBC2, 0xB43EFBC2, 0xB43FFBC2, 0xB440FBC2, 0xB441FBC2, 0xB442FBC2, + 0xB443FBC2, 0xB444FBC2, 0xB445FBC2, 0xB446FBC2, 0xB447FBC2, 0xB448FBC2, 0xB449FBC2, 0xB44AFBC2, 0xB44BFBC2, 0xB44CFBC2, 0xB44DFBC2, 0xB44EFBC2, 0xB44FFBC2, 0xB450FBC2, 0xB451FBC2, + 0xB452FBC2, 0xB453FBC2, 0xB454FBC2, 0xB455FBC2, 0xB456FBC2, 0xB457FBC2, 0xB458FBC2, 0xB459FBC2, 0xB45AFBC2, 0xB45BFBC2, 0xB45CFBC2, 0xB45DFBC2, 0xB45EFBC2, 0xB45FFBC2, 0xB460FBC2, + 0xB461FBC2, 0xB462FBC2, 0xB463FBC2, 0xB464FBC2, 0xB465FBC2, 0xB466FBC2, 0xB467FBC2, 0xB468FBC2, 0xB469FBC2, 0xB46AFBC2, 0xB46BFBC2, 0xB46CFBC2, 0xB46DFBC2, 0xB46EFBC2, 0xB46FFBC2, + 0xB470FBC2, 0xB471FBC2, 0xB472FBC2, 0xB473FBC2, 0xB474FBC2, 0xB475FBC2, 0xB476FBC2, 0xB477FBC2, 0xB478FBC2, 0xB479FBC2, 0xB47AFBC2, 0xB47BFBC2, 0xB47CFBC2, 0xB47DFBC2, 0xB47EFBC2, + 0xB47FFBC2, 0xB480FBC2, 0xB481FBC2, 0xB482FBC2, 0xB483FBC2, 0xB484FBC2, 0xB485FBC2, 0xB486FBC2, 0xB487FBC2, 0xB488FBC2, 0xB489FBC2, 0xB48AFBC2, 0xB48BFBC2, 0xB48CFBC2, 0xB48DFBC2, + 0xB48EFBC2, 0xB48FFBC2, 0xB490FBC2, 0xB491FBC2, 0xB492FBC2, 0xB493FBC2, 0xB494FBC2, 0xB495FBC2, 0xB496FBC2, 0xB497FBC2, 0xB498FBC2, 0xB499FBC2, 0xB49AFBC2, 0xB49BFBC2, 0xB49CFBC2, + 0xB49DFBC2, 0xB49EFBC2, 0xB49FFBC2, 0xB4A0FBC2, 0xB4A1FBC2, 0xB4A2FBC2, 0xB4A3FBC2, 0xB4A4FBC2, 0xB4A5FBC2, 0xB4A6FBC2, 0xB4A7FBC2, 0xB4A8FBC2, 0xB4A9FBC2, 0xB4AAFBC2, 0xB4ABFBC2, + 0xB4ACFBC2, 0xB4ADFBC2, 0xB4AEFBC2, 0xB4AFFBC2, 0xB4B0FBC2, 0xB4B1FBC2, 0xB4B2FBC2, 0xB4B3FBC2, 0xB4B4FBC2, 0xB4B5FBC2, 0xB4B6FBC2, 0xB4B7FBC2, 0xB4B8FBC2, 0xB4B9FBC2, 0xB4BAFBC2, + 0xB4BBFBC2, 0xB4BCFBC2, 0xB4BDFBC2, 0xB4BEFBC2, 0xB4BFFBC2, 0xB4C0FBC2, 0xB4C1FBC2, 0xB4C2FBC2, 0xB4C3FBC2, 0xB4C4FBC2, 0xB4C5FBC2, 0xB4C6FBC2, 0xB4C7FBC2, 0xB4C8FBC2, 0xB4C9FBC2, + 0xB4CAFBC2, 0xB4CBFBC2, 0xB4CCFBC2, 0xB4CDFBC2, 0xB4CEFBC2, 0xB4CFFBC2, 0xB4D0FBC2, 0xB4D1FBC2, 0xB4D2FBC2, 0xB4D3FBC2, 0xB4D4FBC2, 0xB4D5FBC2, 0xB4D6FBC2, 0xB4D7FBC2, 0xB4D8FBC2, + 0xB4D9FBC2, 0xB4DAFBC2, 0xB4DBFBC2, 0xB4DCFBC2, 0xB4DDFBC2, 0xB4DEFBC2, 0xB4DFFBC2, 0xB4E0FBC2, 0xB4E1FBC2, 0xB4E2FBC2, 0xB4E3FBC2, 0xB4E4FBC2, 0xB4E5FBC2, 0xB4E6FBC2, 0xB4E7FBC2, + 0xB4E8FBC2, 0xB4E9FBC2, 0xB4EAFBC2, 0xB4EBFBC2, 0xB4ECFBC2, 0xB4EDFBC2, 0xB4EEFBC2, 0xB4EFFBC2, 0xB4F0FBC2, 0xB4F1FBC2, 0xB4F2FBC2, 0xB4F3FBC2, 0xB4F4FBC2, 0xB4F5FBC2, 0xB4F6FBC2, + 0xB4F7FBC2, 0xB4F8FBC2, 0xB4F9FBC2, 0xB4FAFBC2, 0xB4FBFBC2, 0xB4FCFBC2, 0xB4FDFBC2, 0xB4FEFBC2, 0xB4FFFBC2, 0xB500FBC2, 0xB501FBC2, 0xB502FBC2, 0xB503FBC2, 0xB504FBC2, 0xB505FBC2, + 0xB506FBC2, 0xB507FBC2, 0xB508FBC2, 0xB509FBC2, 0xB50AFBC2, 0xB50BFBC2, 0xB50CFBC2, 0xB50DFBC2, 0xB50EFBC2, 0xB50FFBC2, 0xB510FBC2, 0xB511FBC2, 0xB512FBC2, 0xB513FBC2, 0xB514FBC2, + 0xB515FBC2, 0xB516FBC2, 0xB517FBC2, 0xB518FBC2, 0xB519FBC2, 0xB51AFBC2, 0xB51BFBC2, 0xB51CFBC2, 0xB51DFBC2, 0xB51EFBC2, 0xB51FFBC2, 0xB520FBC2, 0xB521FBC2, 0xB522FBC2, 0xB523FBC2, + 0xB524FBC2, 0xB525FBC2, 0xB526FBC2, 0xB527FBC2, 0xB528FBC2, 0xB529FBC2, 0xB52AFBC2, 0xB52BFBC2, 0xB52CFBC2, 0xB52DFBC2, 0xB52EFBC2, 0xB52FFBC2, 0xB530FBC2, 0xB531FBC2, 0xB532FBC2, + 0xB533FBC2, 0xB534FBC2, 0xB535FBC2, 0xB536FBC2, 0xB537FBC2, 0xB538FBC2, 0xB539FBC2, 0xB53AFBC2, 0xB53BFBC2, 0xB53CFBC2, 0xB53DFBC2, 0xB53EFBC2, 0xB53FFBC2, 0xB540FBC2, 0xB541FBC2, + 0xB542FBC2, 0xB543FBC2, 0xB544FBC2, 0xB545FBC2, 0xB546FBC2, 0xB547FBC2, 0xB548FBC2, 0xB549FBC2, 0xB54AFBC2, 0xB54BFBC2, 0xB54CFBC2, 0xB54DFBC2, 0xB54EFBC2, 0xB54FFBC2, 0xB550FBC2, + 0xB551FBC2, 0xB552FBC2, 0xB553FBC2, 0xB554FBC2, 0xB555FBC2, 0xB556FBC2, 0xB557FBC2, 0xB558FBC2, 0xB559FBC2, 0xB55AFBC2, 0xB55BFBC2, 0xB55CFBC2, 0xB55DFBC2, 0xB55EFBC2, 0xB55FFBC2, + 0xB560FBC2, 0xB561FBC2, 0xB562FBC2, 0xB563FBC2, 0xB564FBC2, 0xB565FBC2, 0xB566FBC2, 0xB567FBC2, 0xB568FBC2, 0xB569FBC2, 0xB56AFBC2, 0xB56BFBC2, 0xB56CFBC2, 0xB56DFBC2, 0xB56EFBC2, + 0xB56FFBC2, 0xB570FBC2, 0xB571FBC2, 0xB572FBC2, 0xB573FBC2, 0xB574FBC2, 0xB575FBC2, 0xB576FBC2, 0xB577FBC2, 0xB578FBC2, 0xB579FBC2, 0xB57AFBC2, 0xB57BFBC2, 0xB57CFBC2, 0xB57DFBC2, + 0xB57EFBC2, 0xB57FFBC2, 0xB580FBC2, 0xB581FBC2, 0xB582FBC2, 0xB583FBC2, 0xB584FBC2, 0xB585FBC2, 0xB586FBC2, 0xB587FBC2, 0xB588FBC2, 0xB589FBC2, 0xB58AFBC2, 0xB58BFBC2, 0xB58CFBC2, + 0xB58DFBC2, 0xB58EFBC2, 0xB58FFBC2, 0xB590FBC2, 0xB591FBC2, 0xB592FBC2, 0xB593FBC2, 0xB594FBC2, 0xB595FBC2, 0xB596FBC2, 0xB597FBC2, 0xB598FBC2, 0xB599FBC2, 0xB59AFBC2, 0xB59BFBC2, + 0xB59CFBC2, 0xB59DFBC2, 0xB59EFBC2, 0xB59FFBC2, 0xB5A0FBC2, 0xB5A1FBC2, 0xB5A2FBC2, 0xB5A3FBC2, 0xB5A4FBC2, 0xB5A5FBC2, 0xB5A6FBC2, 0xB5A7FBC2, 0xB5A8FBC2, 0xB5A9FBC2, 0xB5AAFBC2, + 0xB5ABFBC2, 0xB5ACFBC2, 0xB5ADFBC2, 0xB5AEFBC2, 0xB5AFFBC2, 0xB5B0FBC2, 0xB5B1FBC2, 0xB5B2FBC2, 0xB5B3FBC2, 0xB5B4FBC2, 0xB5B5FBC2, 0xB5B6FBC2, 0xB5B7FBC2, 0xB5B8FBC2, 0xB5B9FBC2, + 0xB5BAFBC2, 0xB5BBFBC2, 0xB5BCFBC2, 0xB5BDFBC2, 0xB5BEFBC2, 0xB5BFFBC2, 0xB5C0FBC2, 0xB5C1FBC2, 0xB5C2FBC2, 0xB5C3FBC2, 0xB5C4FBC2, 0xB5C5FBC2, 0xB5C6FBC2, 0xB5C7FBC2, 0xB5C8FBC2, + 0xB5C9FBC2, 0xB5CAFBC2, 0xB5CBFBC2, 0xB5CCFBC2, 0xB5CDFBC2, 0xB5CEFBC2, 0xB5CFFBC2, 0xB5D0FBC2, 0xB5D1FBC2, 0xB5D2FBC2, 0xB5D3FBC2, 0xB5D4FBC2, 0xB5D5FBC2, 0xB5D6FBC2, 0xB5D7FBC2, + 0xB5D8FBC2, 0xB5D9FBC2, 0xB5DAFBC2, 0xB5DBFBC2, 0xB5DCFBC2, 0xB5DDFBC2, 0xB5DEFBC2, 0xB5DFFBC2, 0xB5E0FBC2, 0xB5E1FBC2, 0xB5E2FBC2, 0xB5E3FBC2, 0xB5E4FBC2, 0xB5E5FBC2, 0xB5E6FBC2, + 0xB5E7FBC2, 0xB5E8FBC2, 0xB5E9FBC2, 0xB5EAFBC2, 0xB5EBFBC2, 0xB5ECFBC2, 0xB5EDFBC2, 0xB5EEFBC2, 0xB5EFFBC2, 0xB5F0FBC2, 0xB5F1FBC2, 0xB5F2FBC2, 0xB5F3FBC2, 0xB5F4FBC2, 0xB5F5FBC2, + 0xB5F6FBC2, 0xB5F7FBC2, 0xB5F8FBC2, 0xB5F9FBC2, 0xB5FAFBC2, 0xB5FBFBC2, 0xB5FCFBC2, 0xB5FDFBC2, 0xB5FEFBC2, 0xB5FFFBC2, 0xB600FBC2, 0xB601FBC2, 0xB602FBC2, 0xB603FBC2, 0xB604FBC2, + 0xB605FBC2, 0xB606FBC2, 0xB607FBC2, 0xB608FBC2, 0xB609FBC2, 0xB60AFBC2, 0xB60BFBC2, 0xB60CFBC2, 0xB60DFBC2, 0xB60EFBC2, 0xB60FFBC2, 0xB610FBC2, 0xB611FBC2, 0xB612FBC2, 0xB613FBC2, + 0xB614FBC2, 0xB615FBC2, 0xB616FBC2, 0xB617FBC2, 0xB618FBC2, 0xB619FBC2, 0xB61AFBC2, 0xB61BFBC2, 0xB61CFBC2, 0xB61DFBC2, 0xB61EFBC2, 0xB61FFBC2, 0xB620FBC2, 0xB621FBC2, 0xB622FBC2, + 0xB623FBC2, 0xB624FBC2, 0xB625FBC2, 0xB626FBC2, 0xB627FBC2, 0xB628FBC2, 0xB629FBC2, 0xB62AFBC2, 0xB62BFBC2, 0xB62CFBC2, 0xB62DFBC2, 0xB62EFBC2, 0xB62FFBC2, 0xB630FBC2, 0xB631FBC2, + 0xB632FBC2, 0xB633FBC2, 0xB634FBC2, 0xB635FBC2, 0xB636FBC2, 0xB637FBC2, 0xB638FBC2, 0xB639FBC2, 0xB63AFBC2, 0xB63BFBC2, 0xB63CFBC2, 0xB63DFBC2, 0xB63EFBC2, 0xB63FFBC2, 0xB640FBC2, + 0xB641FBC2, 0xB642FBC2, 0xB643FBC2, 0xB644FBC2, 0xB645FBC2, 0xB646FBC2, 0xB647FBC2, 0xB648FBC2, 0xB649FBC2, 0xB64AFBC2, 0xB64BFBC2, 0xB64CFBC2, 0xB64DFBC2, 0xB64EFBC2, 0xB64FFBC2, + 0xB650FBC2, 0xB651FBC2, 0xB652FBC2, 0xB653FBC2, 0xB654FBC2, 0xB655FBC2, 0xB656FBC2, 0xB657FBC2, 0xB658FBC2, 0xB659FBC2, 0xB65AFBC2, 0xB65BFBC2, 0xB65CFBC2, 0xB65DFBC2, 0xB65EFBC2, + 0xB65FFBC2, 0xB660FBC2, 0xB661FBC2, 0xB662FBC2, 0xB663FBC2, 0xB664FBC2, 0xB665FBC2, 0xB666FBC2, 0xB667FBC2, 0xB668FBC2, 0xB669FBC2, 0xB66AFBC2, 0xB66BFBC2, 0xB66CFBC2, 0xB66DFBC2, + 0xB66EFBC2, 0xB66FFBC2, 0xB670FBC2, 0xB671FBC2, 0xB672FBC2, 0xB673FBC2, 0xB674FBC2, 0xB675FBC2, 0xB676FBC2, 0xB677FBC2, 0xB678FBC2, 0xB679FBC2, 0xB67AFBC2, 0xB67BFBC2, 0xB67CFBC2, + 0xB67DFBC2, 0xB67EFBC2, 0xB67FFBC2, 0xB680FBC2, 0xB681FBC2, 0xB682FBC2, 0xB683FBC2, 0xB684FBC2, 0xB685FBC2, 0xB686FBC2, 0xB687FBC2, 0xB688FBC2, 0xB689FBC2, 0xB68AFBC2, 0xB68BFBC2, + 0xB68CFBC2, 0xB68DFBC2, 0xB68EFBC2, 0xB68FFBC2, 0xB690FBC2, 0xB691FBC2, 0xB692FBC2, 0xB693FBC2, 0xB694FBC2, 0xB695FBC2, 0xB696FBC2, 0xB697FBC2, 0xB698FBC2, 0xB699FBC2, 0xB69AFBC2, + 0xB69BFBC2, 0xB69CFBC2, 0xB69DFBC2, 0xB69EFBC2, 0xB69FFBC2, 0xB6A0FBC2, 0xB6A1FBC2, 0xB6A2FBC2, 0xB6A3FBC2, 0xB6A4FBC2, 0xB6A5FBC2, 0xB6A6FBC2, 0xB6A7FBC2, 0xB6A8FBC2, 0xB6A9FBC2, + 0xB6AAFBC2, 0xB6ABFBC2, 0xB6ACFBC2, 0xB6ADFBC2, 0xB6AEFBC2, 0xB6AFFBC2, 0xB6B0FBC2, 0xB6B1FBC2, 0xB6B2FBC2, 0xB6B3FBC2, 0xB6B4FBC2, 0xB6B5FBC2, 0xB6B6FBC2, 0xB6B7FBC2, 0xB6B8FBC2, + 0xB6B9FBC2, 0xB6BAFBC2, 0xB6BBFBC2, 0xB6BCFBC2, 0xB6BDFBC2, 0xB6BEFBC2, 0xB6BFFBC2, 0xB6C0FBC2, 0xB6C1FBC2, 0xB6C2FBC2, 0xB6C3FBC2, 0xB6C4FBC2, 0xB6C5FBC2, 0xB6C6FBC2, 0xB6C7FBC2, + 0xB6C8FBC2, 0xB6C9FBC2, 0xB6CAFBC2, 0xB6CBFBC2, 0xB6CCFBC2, 0xB6CDFBC2, 0xB6CEFBC2, 0xB6CFFBC2, 0xB6D0FBC2, 0xB6D1FBC2, 0xB6D2FBC2, 0xB6D3FBC2, 0xB6D4FBC2, 0xB6D5FBC2, 0xB6D6FBC2, + 0xB6D7FBC2, 0xB6D8FBC2, 0xB6D9FBC2, 0xB6DAFBC2, 0xB6DBFBC2, 0xB6DCFBC2, 0xB6DDFBC2, 0xB6DEFBC2, 0xB6DFFBC2, 0xB6E0FBC2, 0xB6E1FBC2, 0xB6E2FBC2, 0xB6E3FBC2, 0xB6E4FBC2, 0xB6E5FBC2, + 0xB6E6FBC2, 0xB6E7FBC2, 0xB6E8FBC2, 0xB6E9FBC2, 0xB6EAFBC2, 0xB6EBFBC2, 0xB6ECFBC2, 0xB6EDFBC2, 0xB6EEFBC2, 0xB6EFFBC2, 0xB6F0FBC2, 0xB6F1FBC2, 0xB6F2FBC2, 0xB6F3FBC2, 0xB6F4FBC2, + 0xB6F5FBC2, 0xB6F6FBC2, 0xB6F7FBC2, 0xB6F8FBC2, 0xB6F9FBC2, 0xB6FAFBC2, 0xB6FBFBC2, 0xB6FCFBC2, 0xB6FDFBC2, 0xB6FEFBC2, 0xB6FFFBC2, 0xB700FBC2, 0xB701FBC2, 0xB702FBC2, 0xB703FBC2, + 0xB704FBC2, 0xB705FBC2, 0xB706FBC2, 0xB707FBC2, 0xB708FBC2, 0xB709FBC2, 0xB70AFBC2, 0xB70BFBC2, 0xB70CFBC2, 0xB70DFBC2, 0xB70EFBC2, 0xB70FFBC2, 0xB710FBC2, 0xB711FBC2, 0xB712FBC2, + 0xB713FBC2, 0xB714FBC2, 0xB715FBC2, 0xB716FBC2, 0xB717FBC2, 0xB718FBC2, 0xB719FBC2, 0xB71AFBC2, 0xB71BFBC2, 0xB71CFBC2, 0xB71DFBC2, 0xB71EFBC2, 0xB71FFBC2, 0xB720FBC2, 0xB721FBC2, + 0xB722FBC2, 0xB723FBC2, 0xB724FBC2, 0xB725FBC2, 0xB726FBC2, 0xB727FBC2, 0xB728FBC2, 0xB729FBC2, 0xB72AFBC2, 0xB72BFBC2, 0xB72CFBC2, 0xB72DFBC2, 0xB72EFBC2, 0xB72FFBC2, 0xB730FBC2, + 0xB731FBC2, 0xB732FBC2, 0xB733FBC2, 0xB734FBC2, 0xB735FBC2, 0xB736FBC2, 0xB737FBC2, 0xB738FBC2, 0xB739FBC2, 0xB73AFBC2, 0xB73BFBC2, 0xB73CFBC2, 0xB73DFBC2, 0xB73EFBC2, 0xB73FFBC2, + 0xB740FBC2, 0xB741FBC2, 0xB742FBC2, 0xB743FBC2, 0xB744FBC2, 0xB745FBC2, 0xB746FBC2, 0xB747FBC2, 0xB748FBC2, 0xB749FBC2, 0xB74AFBC2, 0xB74BFBC2, 0xB74CFBC2, 0xB74DFBC2, 0xB74EFBC2, + 0xB74FFBC2, 0xB750FBC2, 0xB751FBC2, 0xB752FBC2, 0xB753FBC2, 0xB754FBC2, 0xB755FBC2, 0xB756FBC2, 0xB757FBC2, 0xB758FBC2, 0xB759FBC2, 0xB75AFBC2, 0xB75BFBC2, 0xB75CFBC2, 0xB75DFBC2, + 0xB75EFBC2, 0xB75FFBC2, 0xB760FBC2, 0xB761FBC2, 0xB762FBC2, 0xB763FBC2, 0xB764FBC2, 0xB765FBC2, 0xB766FBC2, 0xB767FBC2, 0xB768FBC2, 0xB769FBC2, 0xB76AFBC2, 0xB76BFBC2, 0xB76CFBC2, + 0xB76DFBC2, 0xB76EFBC2, 0xB76FFBC2, 0xB770FBC2, 0xB771FBC2, 0xB772FBC2, 0xB773FBC2, 0xB774FBC2, 0xB775FBC2, 0xB776FBC2, 0xB777FBC2, 0xB778FBC2, 0xB779FBC2, 0xB77AFBC2, 0xB77BFBC2, + 0xB77CFBC2, 0xB77DFBC2, 0xB77EFBC2, 0xB77FFBC2, 0xB780FBC2, 0xB781FBC2, 0xB782FBC2, 0xB783FBC2, 0xB784FBC2, 0xB785FBC2, 0xB786FBC2, 0xB787FBC2, 0xB788FBC2, 0xB789FBC2, 0xB78AFBC2, + 0xB78BFBC2, 0xB78CFBC2, 0xB78DFBC2, 0xB78EFBC2, 0xB78FFBC2, 0xB790FBC2, 0xB791FBC2, 0xB792FBC2, 0xB793FBC2, 0xB794FBC2, 0xB795FBC2, 0xB796FBC2, 0xB797FBC2, 0xB798FBC2, 0xB799FBC2, + 0xB79AFBC2, 0xB79BFBC2, 0xB79CFBC2, 0xB79DFBC2, 0xB79EFBC2, 0xB79FFBC2, 0xB7A0FBC2, 0xB7A1FBC2, 0xB7A2FBC2, 0xB7A3FBC2, 0xB7A4FBC2, 0xB7A5FBC2, 0xB7A6FBC2, 0xB7A7FBC2, 0xB7A8FBC2, + 0xB7A9FBC2, 0xB7AAFBC2, 0xB7ABFBC2, 0xB7ACFBC2, 0xB7ADFBC2, 0xB7AEFBC2, 0xB7AFFBC2, 0xB7B0FBC2, 0xB7B1FBC2, 0xB7B2FBC2, 0xB7B3FBC2, 0xB7B4FBC2, 0xB7B5FBC2, 0xB7B6FBC2, 0xB7B7FBC2, + 0xB7B8FBC2, 0xB7B9FBC2, 0xB7BAFBC2, 0xB7BBFBC2, 0xB7BCFBC2, 0xB7BDFBC2, 0xB7BEFBC2, 0xB7BFFBC2, 0xB7C0FBC2, 0xB7C1FBC2, 0xB7C2FBC2, 0xB7C3FBC2, 0xB7C4FBC2, 0xB7C5FBC2, 0xB7C6FBC2, + 0xB7C7FBC2, 0xB7C8FBC2, 0xB7C9FBC2, 0xB7CAFBC2, 0xB7CBFBC2, 0xB7CCFBC2, 0xB7CDFBC2, 0xB7CEFBC2, 0xB7CFFBC2, 0xB7D0FBC2, 0xB7D1FBC2, 0xB7D2FBC2, 0xB7D3FBC2, 0xB7D4FBC2, 0xB7D5FBC2, + 0xB7D6FBC2, 0xB7D7FBC2, 0xB7D8FBC2, 0xB7D9FBC2, 0xB7DAFBC2, 0xB7DBFBC2, 0xB7DCFBC2, 0xB7DDFBC2, 0xB7DEFBC2, 0xB7DFFBC2, 0xB7E0FBC2, 0xB7E1FBC2, 0xB7E2FBC2, 0xB7E3FBC2, 0xB7E4FBC2, + 0xB7E5FBC2, 0xB7E6FBC2, 0xB7E7FBC2, 0xB7E8FBC2, 0xB7E9FBC2, 0xB7EAFBC2, 0xB7EBFBC2, 0xB7ECFBC2, 0xB7EDFBC2, 0xB7EEFBC2, 0xB7EFFBC2, 0xB7F0FBC2, 0xB7F1FBC2, 0xB7F2FBC2, 0xB7F3FBC2, + 0xB7F4FBC2, 0xB7F5FBC2, 0xB7F6FBC2, 0xB7F7FBC2, 0xB7F8FBC2, 0xB7F9FBC2, 0xB7FAFBC2, 0xB7FBFBC2, 0xB7FCFBC2, 0xB7FDFBC2, 0xB7FEFBC2, 0xB7FFFBC2, 0xB800FBC2, 0xB801FBC2, 0xB802FBC2, + 0xB803FBC2, 0xB804FBC2, 0xB805FBC2, 0xB806FBC2, 0xB807FBC2, 0xB808FBC2, 0xB809FBC2, 0xB80AFBC2, 0xB80BFBC2, 0xB80CFBC2, 0xB80DFBC2, 0xB80EFBC2, 0xB80FFBC2, 0xB810FBC2, 0xB811FBC2, + 0xB812FBC2, 0xB813FBC2, 0xB814FBC2, 0xB815FBC2, 0xB816FBC2, 0xB817FBC2, 0xB818FBC2, 0xB819FBC2, 0xB81AFBC2, 0xB81BFBC2, 0xB81CFBC2, 0xB81DFBC2, 0xB81EFBC2, 0xB81FFBC2, 0xB820FBC2, + 0xB821FBC2, 0xB822FBC2, 0xB823FBC2, 0xB824FBC2, 0xB825FBC2, 0xB826FBC2, 0xB827FBC2, 0xB828FBC2, 0xB829FBC2, 0xB82AFBC2, 0xB82BFBC2, 0xB82CFBC2, 0xB82DFBC2, 0xB82EFBC2, 0xB82FFBC2, + 0xB830FBC2, 0xB831FBC2, 0xB832FBC2, 0xB833FBC2, 0xB834FBC2, 0xB835FBC2, 0xB836FBC2, 0xB837FBC2, 0xB838FBC2, 0xB839FBC2, 0xB83AFBC2, 0xB83BFBC2, 0xB83CFBC2, 0xB83DFBC2, 0xB83EFBC2, + 0xB83FFBC2, 0xB840FBC2, 0xB841FBC2, 0xB842FBC2, 0xB843FBC2, 0xB844FBC2, 0xB845FBC2, 0xB846FBC2, 0xB847FBC2, 0xB848FBC2, 0xB849FBC2, 0xB84AFBC2, 0xB84BFBC2, 0xB84CFBC2, 0xB84DFBC2, + 0xB84EFBC2, 0xB84FFBC2, 0xB850FBC2, 0xB851FBC2, 0xB852FBC2, 0xB853FBC2, 0xB854FBC2, 0xB855FBC2, 0xB856FBC2, 0xB857FBC2, 0xB858FBC2, 0xB859FBC2, 0xB85AFBC2, 0xB85BFBC2, 0xB85CFBC2, + 0xB85DFBC2, 0xB85EFBC2, 0xB85FFBC2, 0xB860FBC2, 0xB861FBC2, 0xB862FBC2, 0xB863FBC2, 0xB864FBC2, 0xB865FBC2, 0xB866FBC2, 0xB867FBC2, 0xB868FBC2, 0xB869FBC2, 0xB86AFBC2, 0xB86BFBC2, + 0xB86CFBC2, 0xB86DFBC2, 0xB86EFBC2, 0xB86FFBC2, 0xB870FBC2, 0xB871FBC2, 0xB872FBC2, 0xB873FBC2, 0xB874FBC2, 0xB875FBC2, 0xB876FBC2, 0xB877FBC2, 0xB878FBC2, 0xB879FBC2, 0xB87AFBC2, + 0xB87BFBC2, 0xB87CFBC2, 0xB87DFBC2, 0xB87EFBC2, 0xB87FFBC2, 0xB880FBC2, 0xB881FBC2, 0xB882FBC2, 0xB883FBC2, 0xB884FBC2, 0xB885FBC2, 0xB886FBC2, 0xB887FBC2, 0xB888FBC2, 0xB889FBC2, + 0xB88AFBC2, 0xB88BFBC2, 0xB88CFBC2, 0xB88DFBC2, 0xB88EFBC2, 0xB88FFBC2, 0xB890FBC2, 0xB891FBC2, 0xB892FBC2, 0xB893FBC2, 0xB894FBC2, 0xB895FBC2, 0xB896FBC2, 0xB897FBC2, 0xB898FBC2, + 0xB899FBC2, 0xB89AFBC2, 0xB89BFBC2, 0xB89CFBC2, 0xB89DFBC2, 0xB89EFBC2, 0xB89FFBC2, 0xB8A0FBC2, 0xB8A1FBC2, 0xB8A2FBC2, 0xB8A3FBC2, 0xB8A4FBC2, 0xB8A5FBC2, 0xB8A6FBC2, 0xB8A7FBC2, + 0xB8A8FBC2, 0xB8A9FBC2, 0xB8AAFBC2, 0xB8ABFBC2, 0xB8ACFBC2, 0xB8ADFBC2, 0xB8AEFBC2, 0xB8AFFBC2, 0xB8B0FBC2, 0xB8B1FBC2, 0xB8B2FBC2, 0xB8B3FBC2, 0xB8B4FBC2, 0xB8B5FBC2, 0xB8B6FBC2, + 0xB8B7FBC2, 0xB8B8FBC2, 0xB8B9FBC2, 0xB8BAFBC2, 0xB8BBFBC2, 0xB8BCFBC2, 0xB8BDFBC2, 0xB8BEFBC2, 0xB8BFFBC2, 0xB8C0FBC2, 0xB8C1FBC2, 0xB8C2FBC2, 0xB8C3FBC2, 0xB8C4FBC2, 0xB8C5FBC2, + 0xB8C6FBC2, 0xB8C7FBC2, 0xB8C8FBC2, 0xB8C9FBC2, 0xB8CAFBC2, 0xB8CBFBC2, 0xB8CCFBC2, 0xB8CDFBC2, 0xB8CEFBC2, 0xB8CFFBC2, 0xB8D0FBC2, 0xB8D1FBC2, 0xB8D2FBC2, 0xB8D3FBC2, 0xB8D4FBC2, + 0xB8D5FBC2, 0xB8D6FBC2, 0xB8D7FBC2, 0xB8D8FBC2, 0xB8D9FBC2, 0xB8DAFBC2, 0xB8DBFBC2, 0xB8DCFBC2, 0xB8DDFBC2, 0xB8DEFBC2, 0xB8DFFBC2, 0xB8E0FBC2, 0xB8E1FBC2, 0xB8E2FBC2, 0xB8E3FBC2, + 0xB8E4FBC2, 0xB8E5FBC2, 0xB8E6FBC2, 0xB8E7FBC2, 0xB8E8FBC2, 0xB8E9FBC2, 0xB8EAFBC2, 0xB8EBFBC2, 0xB8ECFBC2, 0xB8EDFBC2, 0xB8EEFBC2, 0xB8EFFBC2, 0xB8F0FBC2, 0xB8F1FBC2, 0xB8F2FBC2, + 0xB8F3FBC2, 0xB8F4FBC2, 0xB8F5FBC2, 0xB8F6FBC2, 0xB8F7FBC2, 0xB8F8FBC2, 0xB8F9FBC2, 0xB8FAFBC2, 0xB8FBFBC2, 0xB8FCFBC2, 0xB8FDFBC2, 0xB8FEFBC2, 0xB8FFFBC2, 0xB900FBC2, 0xB901FBC2, + 0xB902FBC2, 0xB903FBC2, 0xB904FBC2, 0xB905FBC2, 0xB906FBC2, 0xB907FBC2, 0xB908FBC2, 0xB909FBC2, 0xB90AFBC2, 0xB90BFBC2, 0xB90CFBC2, 0xB90DFBC2, 0xB90EFBC2, 0xB90FFBC2, 0xB910FBC2, + 0xB911FBC2, 0xB912FBC2, 0xB913FBC2, 0xB914FBC2, 0xB915FBC2, 0xB916FBC2, 0xB917FBC2, 0xB918FBC2, 0xB919FBC2, 0xB91AFBC2, 0xB91BFBC2, 0xB91CFBC2, 0xB91DFBC2, 0xB91EFBC2, 0xB91FFBC2, + 0xB920FBC2, 0xB921FBC2, 0xB922FBC2, 0xB923FBC2, 0xB924FBC2, 0xB925FBC2, 0xB926FBC2, 0xB927FBC2, 0xB928FBC2, 0xB929FBC2, 0xB92AFBC2, 0xB92BFBC2, 0xB92CFBC2, 0xB92DFBC2, 0xB92EFBC2, + 0xB92FFBC2, 0xB930FBC2, 0xB931FBC2, 0xB932FBC2, 0xB933FBC2, 0xB934FBC2, 0xB935FBC2, 0xB936FBC2, 0xB937FBC2, 0xB938FBC2, 0xB939FBC2, 0xB93AFBC2, 0xB93BFBC2, 0xB93CFBC2, 0xB93DFBC2, + 0xB93EFBC2, 0xB93FFBC2, 0xB940FBC2, 0xB941FBC2, 0xB942FBC2, 0xB943FBC2, 0xB944FBC2, 0xB945FBC2, 0xB946FBC2, 0xB947FBC2, 0xB948FBC2, 0xB949FBC2, 0xB94AFBC2, 0xB94BFBC2, 0xB94CFBC2, + 0xB94DFBC2, 0xB94EFBC2, 0xB94FFBC2, 0xB950FBC2, 0xB951FBC2, 0xB952FBC2, 0xB953FBC2, 0xB954FBC2, 0xB955FBC2, 0xB956FBC2, 0xB957FBC2, 0xB958FBC2, 0xB959FBC2, 0xB95AFBC2, 0xB95BFBC2, + 0xB95CFBC2, 0xB95DFBC2, 0xB95EFBC2, 0xB95FFBC2, 0xB960FBC2, 0xB961FBC2, 0xB962FBC2, 0xB963FBC2, 0xB964FBC2, 0xB965FBC2, 0xB966FBC2, 0xB967FBC2, 0xB968FBC2, 0xB969FBC2, 0xB96AFBC2, + 0xB96BFBC2, 0xB96CFBC2, 0xB96DFBC2, 0xB96EFBC2, 0xB96FFBC2, 0xB970FBC2, 0xB971FBC2, 0xB972FBC2, 0xB973FBC2, 0xB974FBC2, 0xB975FBC2, 0xB976FBC2, 0xB977FBC2, 0xB978FBC2, 0xB979FBC2, + 0xB97AFBC2, 0xB97BFBC2, 0xB97CFBC2, 0xB97DFBC2, 0xB97EFBC2, 0xB97FFBC2, 0xB980FBC2, 0xB981FBC2, 0xB982FBC2, 0xB983FBC2, 0xB984FBC2, 0xB985FBC2, 0xB986FBC2, 0xB987FBC2, 0xB988FBC2, + 0xB989FBC2, 0xB98AFBC2, 0xB98BFBC2, 0xB98CFBC2, 0xB98DFBC2, 0xB98EFBC2, 0xB98FFBC2, 0xB990FBC2, 0xB991FBC2, 0xB992FBC2, 0xB993FBC2, 0xB994FBC2, 0xB995FBC2, 0xB996FBC2, 0xB997FBC2, + 0xB998FBC2, 0xB999FBC2, 0xB99AFBC2, 0xB99BFBC2, 0xB99CFBC2, 0xB99DFBC2, 0xB99EFBC2, 0xB99FFBC2, 0xB9A0FBC2, 0xB9A1FBC2, 0xB9A2FBC2, 0xB9A3FBC2, 0xB9A4FBC2, 0xB9A5FBC2, 0xB9A6FBC2, + 0xB9A7FBC2, 0xB9A8FBC2, 0xB9A9FBC2, 0xB9AAFBC2, 0xB9ABFBC2, 0xB9ACFBC2, 0xB9ADFBC2, 0xB9AEFBC2, 0xB9AFFBC2, 0xB9B0FBC2, 0xB9B1FBC2, 0xB9B2FBC2, 0xB9B3FBC2, 0xB9B4FBC2, 0xB9B5FBC2, + 0xB9B6FBC2, 0xB9B7FBC2, 0xB9B8FBC2, 0xB9B9FBC2, 0xB9BAFBC2, 0xB9BBFBC2, 0xB9BCFBC2, 0xB9BDFBC2, 0xB9BEFBC2, 0xB9BFFBC2, 0xB9C0FBC2, 0xB9C1FBC2, 0xB9C2FBC2, 0xB9C3FBC2, 0xB9C4FBC2, + 0xB9C5FBC2, 0xB9C6FBC2, 0xB9C7FBC2, 0xB9C8FBC2, 0xB9C9FBC2, 0xB9CAFBC2, 0xB9CBFBC2, 0xB9CCFBC2, 0xB9CDFBC2, 0xB9CEFBC2, 0xB9CFFBC2, 0xB9D0FBC2, 0xB9D1FBC2, 0xB9D2FBC2, 0xB9D3FBC2, + 0xB9D4FBC2, 0xB9D5FBC2, 0xB9D6FBC2, 0xB9D7FBC2, 0xB9D8FBC2, 0xB9D9FBC2, 0xB9DAFBC2, 0xB9DBFBC2, 0xB9DCFBC2, 0xB9DDFBC2, 0xB9DEFBC2, 0xB9DFFBC2, 0xB9E0FBC2, 0xB9E1FBC2, 0xB9E2FBC2, + 0xB9E3FBC2, 0xB9E4FBC2, 0xB9E5FBC2, 0xB9E6FBC2, 0xB9E7FBC2, 0xB9E8FBC2, 0xB9E9FBC2, 0xB9EAFBC2, 0xB9EBFBC2, 0xB9ECFBC2, 0xB9EDFBC2, 0xB9EEFBC2, 0xB9EFFBC2, 0xB9F0FBC2, 0xB9F1FBC2, + 0xB9F2FBC2, 0xB9F3FBC2, 0xB9F4FBC2, 0xB9F5FBC2, 0xB9F6FBC2, 0xB9F7FBC2, 0xB9F8FBC2, 0xB9F9FBC2, 0xB9FAFBC2, 0xB9FBFBC2, 0xB9FCFBC2, 0xB9FDFBC2, 0xB9FEFBC2, 0xB9FFFBC2, 0xBA00FBC2, + 0xBA01FBC2, 0xBA02FBC2, 0xBA03FBC2, 0xBA04FBC2, 0xBA05FBC2, 0xBA06FBC2, 0xBA07FBC2, 0xBA08FBC2, 0xBA09FBC2, 0xBA0AFBC2, 0xBA0BFBC2, 0xBA0CFBC2, 0xBA0DFBC2, 0xBA0EFBC2, 0xBA0FFBC2, + 0xBA10FBC2, 0xBA11FBC2, 0xBA12FBC2, 0xBA13FBC2, 0xBA14FBC2, 0xBA15FBC2, 0xBA16FBC2, 0xBA17FBC2, 0xBA18FBC2, 0xBA19FBC2, 0xBA1AFBC2, 0xBA1BFBC2, 0xBA1CFBC2, 0xBA1DFBC2, 0xBA1EFBC2, + 0xBA1FFBC2, 0xBA20FBC2, 0xBA21FBC2, 0xBA22FBC2, 0xBA23FBC2, 0xBA24FBC2, 0xBA25FBC2, 0xBA26FBC2, 0xBA27FBC2, 0xBA28FBC2, 0xBA29FBC2, 0xBA2AFBC2, 0xBA2BFBC2, 0xBA2CFBC2, 0xBA2DFBC2, + 0xBA2EFBC2, 0xBA2FFBC2, 0xBA30FBC2, 0xBA31FBC2, 0xBA32FBC2, 0xBA33FBC2, 0xBA34FBC2, 0xBA35FBC2, 0xBA36FBC2, 0xBA37FBC2, 0xBA38FBC2, 0xBA39FBC2, 0xBA3AFBC2, 0xBA3BFBC2, 0xBA3CFBC2, + 0xBA3DFBC2, 0xBA3EFBC2, 0xBA3FFBC2, 0xBA40FBC2, 0xBA41FBC2, 0xBA42FBC2, 0xBA43FBC2, 0xBA44FBC2, 0xBA45FBC2, 0xBA46FBC2, 0xBA47FBC2, 0xBA48FBC2, 0xBA49FBC2, 0xBA4AFBC2, 0xBA4BFBC2, + 0xBA4CFBC2, 0xBA4DFBC2, 0xBA4EFBC2, 0xBA4FFBC2, 0xBA50FBC2, 0xBA51FBC2, 0xBA52FBC2, 0xBA53FBC2, 0xBA54FBC2, 0xBA55FBC2, 0xBA56FBC2, 0xBA57FBC2, 0xBA58FBC2, 0xBA59FBC2, 0xBA5AFBC2, + 0xBA5BFBC2, 0xBA5CFBC2, 0xBA5DFBC2, 0xBA5EFBC2, 0xBA5FFBC2, 0xBA60FBC2, 0xBA61FBC2, 0xBA62FBC2, 0xBA63FBC2, 0xBA64FBC2, 0xBA65FBC2, 0xBA66FBC2, 0xBA67FBC2, 0xBA68FBC2, 0xBA69FBC2, + 0xBA6AFBC2, 0xBA6BFBC2, 0xBA6CFBC2, 0xBA6DFBC2, 0xBA6EFBC2, 0xBA6FFBC2, 0xBA70FBC2, 0xBA71FBC2, 0xBA72FBC2, 0xBA73FBC2, 0xBA74FBC2, 0xBA75FBC2, 0xBA76FBC2, 0xBA77FBC2, 0xBA78FBC2, + 0xBA79FBC2, 0xBA7AFBC2, 0xBA7BFBC2, 0xBA7CFBC2, 0xBA7DFBC2, 0xBA7EFBC2, 0xBA7FFBC2, 0xBA80FBC2, 0xBA81FBC2, 0xBA82FBC2, 0xBA83FBC2, 0xBA84FBC2, 0xBA85FBC2, 0xBA86FBC2, 0xBA87FBC2, + 0xBA88FBC2, 0xBA89FBC2, 0xBA8AFBC2, 0xBA8BFBC2, 0xBA8CFBC2, 0xBA8DFBC2, 0xBA8EFBC2, 0xBA8FFBC2, 0xBA90FBC2, 0xBA91FBC2, 0xBA92FBC2, 0xBA93FBC2, 0xBA94FBC2, 0xBA95FBC2, 0xBA96FBC2, + 0xBA97FBC2, 0xBA98FBC2, 0xBA99FBC2, 0xBA9AFBC2, 0xBA9BFBC2, 0xBA9CFBC2, 0xBA9DFBC2, 0xBA9EFBC2, 0xBA9FFBC2, 0xBAA0FBC2, 0xBAA1FBC2, 0xBAA2FBC2, 0xBAA3FBC2, 0xBAA4FBC2, 0xBAA5FBC2, + 0xBAA6FBC2, 0xBAA7FBC2, 0xBAA8FBC2, 0xBAA9FBC2, 0xBAAAFBC2, 0xBAABFBC2, 0xBAACFBC2, 0xBAADFBC2, 0xBAAEFBC2, 0xBAAFFBC2, 0xBAB0FBC2, 0xBAB1FBC2, 0xBAB2FBC2, 0xBAB3FBC2, 0xBAB4FBC2, + 0xBAB5FBC2, 0xBAB6FBC2, 0xBAB7FBC2, 0xBAB8FBC2, 0xBAB9FBC2, 0xBABAFBC2, 0xBABBFBC2, 0xBABCFBC2, 0xBABDFBC2, 0xBABEFBC2, 0xBABFFBC2, 0xBAC0FBC2, 0xBAC1FBC2, 0xBAC2FBC2, 0xBAC3FBC2, + 0xBAC4FBC2, 0xBAC5FBC2, 0xBAC6FBC2, 0xBAC7FBC2, 0xBAC8FBC2, 0xBAC9FBC2, 0xBACAFBC2, 0xBACBFBC2, 0xBACCFBC2, 0xBACDFBC2, 0xBACEFBC2, 0xBACFFBC2, 0xBAD0FBC2, 0xBAD1FBC2, 0xBAD2FBC2, + 0xBAD3FBC2, 0xBAD4FBC2, 0xBAD5FBC2, 0xBAD6FBC2, 0xBAD7FBC2, 0xBAD8FBC2, 0xBAD9FBC2, 0xBADAFBC2, 0xBADBFBC2, 0xBADCFBC2, 0xBADDFBC2, 0xBADEFBC2, 0xBADFFBC2, 0xBAE0FBC2, 0xBAE1FBC2, + 0xBAE2FBC2, 0xBAE3FBC2, 0xBAE4FBC2, 0xBAE5FBC2, 0xBAE6FBC2, 0xBAE7FBC2, 0xBAE8FBC2, 0xBAE9FBC2, 0xBAEAFBC2, 0xBAEBFBC2, 0xBAECFBC2, 0xBAEDFBC2, 0xBAEEFBC2, 0xBAEFFBC2, 0xBAF0FBC2, + 0xBAF1FBC2, 0xBAF2FBC2, 0xBAF3FBC2, 0xBAF4FBC2, 0xBAF5FBC2, 0xBAF6FBC2, 0xBAF7FBC2, 0xBAF8FBC2, 0xBAF9FBC2, 0xBAFAFBC2, 0xBAFBFBC2, 0xBAFCFBC2, 0xBAFDFBC2, 0xBAFEFBC2, 0xBAFFFBC2, + 0xBB00FBC2, 0xBB01FBC2, 0xBB02FBC2, 0xBB03FBC2, 0xBB04FBC2, 0xBB05FBC2, 0xBB06FBC2, 0xBB07FBC2, 0xBB08FBC2, 0xBB09FBC2, 0xBB0AFBC2, 0xBB0BFBC2, 0xBB0CFBC2, 0xBB0DFBC2, 0xBB0EFBC2, + 0xBB0FFBC2, 0xBB10FBC2, 0xBB11FBC2, 0xBB12FBC2, 0xBB13FBC2, 0xBB14FBC2, 0xBB15FBC2, 0xBB16FBC2, 0xBB17FBC2, 0xBB18FBC2, 0xBB19FBC2, 0xBB1AFBC2, 0xBB1BFBC2, 0xBB1CFBC2, 0xBB1DFBC2, + 0xBB1EFBC2, 0xBB1FFBC2, 0xBB20FBC2, 0xBB21FBC2, 0xBB22FBC2, 0xBB23FBC2, 0xBB24FBC2, 0xBB25FBC2, 0xBB26FBC2, 0xBB27FBC2, 0xBB28FBC2, 0xBB29FBC2, 0xBB2AFBC2, 0xBB2BFBC2, 0xBB2CFBC2, + 0xBB2DFBC2, 0xBB2EFBC2, 0xBB2FFBC2, 0xBB30FBC2, 0xBB31FBC2, 0xBB32FBC2, 0xBB33FBC2, 0xBB34FBC2, 0xBB35FBC2, 0xBB36FBC2, 0xBB37FBC2, 0xBB38FBC2, 0xBB39FBC2, 0xBB3AFBC2, 0xBB3BFBC2, + 0xBB3CFBC2, 0xBB3DFBC2, 0xBB3EFBC2, 0xBB3FFBC2, 0xBB40FBC2, 0xBB41FBC2, 0xBB42FBC2, 0xBB43FBC2, 0xBB44FBC2, 0xBB45FBC2, 0xBB46FBC2, 0xBB47FBC2, 0xBB48FBC2, 0xBB49FBC2, 0xBB4AFBC2, + 0xBB4BFBC2, 0xBB4CFBC2, 0xBB4DFBC2, 0xBB4EFBC2, 0xBB4FFBC2, 0xBB50FBC2, 0xBB51FBC2, 0xBB52FBC2, 0xBB53FBC2, 0xBB54FBC2, 0xBB55FBC2, 0xBB56FBC2, 0xBB57FBC2, 0xBB58FBC2, 0xBB59FBC2, + 0xBB5AFBC2, 0xBB5BFBC2, 0xBB5CFBC2, 0xBB5DFBC2, 0xBB5EFBC2, 0xBB5FFBC2, 0xBB60FBC2, 0xBB61FBC2, 0xBB62FBC2, 0xBB63FBC2, 0xBB64FBC2, 0xBB65FBC2, 0xBB66FBC2, 0xBB67FBC2, 0xBB68FBC2, + 0xBB69FBC2, 0xBB6AFBC2, 0xBB6BFBC2, 0xBB6CFBC2, 0xBB6DFBC2, 0xBB6EFBC2, 0xBB6FFBC2, 0xBB70FBC2, 0xBB71FBC2, 0xBB72FBC2, 0xBB73FBC2, 0xBB74FBC2, 0xBB75FBC2, 0xBB76FBC2, 0xBB77FBC2, + 0xBB78FBC2, 0xBB79FBC2, 0xBB7AFBC2, 0xBB7BFBC2, 0xBB7CFBC2, 0xBB7DFBC2, 0xBB7EFBC2, 0xBB7FFBC2, 0xBB80FBC2, 0xBB81FBC2, 0xBB82FBC2, 0xBB83FBC2, 0xBB84FBC2, 0xBB85FBC2, 0xBB86FBC2, + 0xBB87FBC2, 0xBB88FBC2, 0xBB89FBC2, 0xBB8AFBC2, 0xBB8BFBC2, 0xBB8CFBC2, 0xBB8DFBC2, 0xBB8EFBC2, 0xBB8FFBC2, 0xBB90FBC2, 0xBB91FBC2, 0xBB92FBC2, 0xBB93FBC2, 0xBB94FBC2, 0xBB95FBC2, + 0xBB96FBC2, 0xBB97FBC2, 0xBB98FBC2, 0xBB99FBC2, 0xBB9AFBC2, 0xBB9BFBC2, 0xBB9CFBC2, 0xBB9DFBC2, 0xBB9EFBC2, 0xBB9FFBC2, 0xBBA0FBC2, 0xBBA1FBC2, 0xBBA2FBC2, 0xBBA3FBC2, 0xBBA4FBC2, + 0xBBA5FBC2, 0xBBA6FBC2, 0xBBA7FBC2, 0xBBA8FBC2, 0xBBA9FBC2, 0xBBAAFBC2, 0xBBABFBC2, 0xBBACFBC2, 0xBBADFBC2, 0xBBAEFBC2, 0xBBAFFBC2, 0xBBB0FBC2, 0xBBB1FBC2, 0xBBB2FBC2, 0xBBB3FBC2, + 0xBBB4FBC2, 0xBBB5FBC2, 0xBBB6FBC2, 0xBBB7FBC2, 0xBBB8FBC2, 0xBBB9FBC2, 0xBBBAFBC2, 0xBBBBFBC2, 0xBBBCFBC2, 0xBBBDFBC2, 0xBBBEFBC2, 0xBBBFFBC2, 0xBBC0FBC2, 0xBBC1FBC2, 0xBBC2FBC2, + 0xBBC3FBC2, 0xBBC4FBC2, 0xBBC5FBC2, 0xBBC6FBC2, 0xBBC7FBC2, 0xBBC8FBC2, 0xBBC9FBC2, 0xBBCAFBC2, 0xBBCBFBC2, 0xBBCCFBC2, 0xBBCDFBC2, 0xBBCEFBC2, 0xBBCFFBC2, 0xBBD0FBC2, 0xBBD1FBC2, + 0xBBD2FBC2, 0xBBD3FBC2, 0xBBD4FBC2, 0xBBD5FBC2, 0xBBD6FBC2, 0xBBD7FBC2, 0xBBD8FBC2, 0xBBD9FBC2, 0xBBDAFBC2, 0xBBDBFBC2, 0xBBDCFBC2, 0xBBDDFBC2, 0xBBDEFBC2, 0xBBDFFBC2, 0xBBE0FBC2, + 0xBBE1FBC2, 0xBBE2FBC2, 0xBBE3FBC2, 0xBBE4FBC2, 0xBBE5FBC2, 0xBBE6FBC2, 0xBBE7FBC2, 0xBBE8FBC2, 0xBBE9FBC2, 0xBBEAFBC2, 0xBBEBFBC2, 0xBBECFBC2, 0xBBEDFBC2, 0xBBEEFBC2, 0xBBEFFBC2, + 0xBBF0FBC2, 0xBBF1FBC2, 0xBBF2FBC2, 0xBBF3FBC2, 0xBBF4FBC2, 0xBBF5FBC2, 0xBBF6FBC2, 0xBBF7FBC2, 0xBBF8FBC2, 0xBBF9FBC2, 0xBBFAFBC2, 0xBBFBFBC2, 0xBBFCFBC2, 0xBBFDFBC2, 0xBBFEFBC2, + 0xBBFFFBC2, 0xBC00FBC2, 0xBC01FBC2, 0xBC02FBC2, 0xBC03FBC2, 0xBC04FBC2, 0xBC05FBC2, 0xBC06FBC2, 0xBC07FBC2, 0xBC08FBC2, 0xBC09FBC2, 0xBC0AFBC2, 0xBC0BFBC2, 0xBC0CFBC2, 0xBC0DFBC2, + 0xBC0EFBC2, 0xBC0FFBC2, 0xBC10FBC2, 0xBC11FBC2, 0xBC12FBC2, 0xBC13FBC2, 0xBC14FBC2, 0xBC15FBC2, 0xBC16FBC2, 0xBC17FBC2, 0xBC18FBC2, 0xBC19FBC2, 0xBC1AFBC2, 0xBC1BFBC2, 0xBC1CFBC2, + 0xBC1DFBC2, 0xBC1EFBC2, 0xBC1FFBC2, 0xBC20FBC2, 0xBC21FBC2, 0xBC22FBC2, 0xBC23FBC2, 0xBC24FBC2, 0xBC25FBC2, 0xBC26FBC2, 0xBC27FBC2, 0xBC28FBC2, 0xBC29FBC2, 0xBC2AFBC2, 0xBC2BFBC2, + 0xBC2CFBC2, 0xBC2DFBC2, 0xBC2EFBC2, 0xBC2FFBC2, 0xBC30FBC2, 0xBC31FBC2, 0xBC32FBC2, 0xBC33FBC2, 0xBC34FBC2, 0xBC35FBC2, 0xBC36FBC2, 0xBC37FBC2, 0xBC38FBC2, 0xBC39FBC2, 0xBC3AFBC2, + 0xBC3BFBC2, 0xBC3CFBC2, 0xBC3DFBC2, 0xBC3EFBC2, 0xBC3FFBC2, 0xBC40FBC2, 0xBC41FBC2, 0xBC42FBC2, 0xBC43FBC2, 0xBC44FBC2, 0xBC45FBC2, 0xBC46FBC2, 0xBC47FBC2, 0xBC48FBC2, 0xBC49FBC2, + 0xBC4AFBC2, 0xBC4BFBC2, 0xBC4CFBC2, 0xBC4DFBC2, 0xBC4EFBC2, 0xBC4FFBC2, 0xBC50FBC2, 0xBC51FBC2, 0xBC52FBC2, 0xBC53FBC2, 0xBC54FBC2, 0xBC55FBC2, 0xBC56FBC2, 0xBC57FBC2, 0xBC58FBC2, + 0xBC59FBC2, 0xBC5AFBC2, 0xBC5BFBC2, 0xBC5CFBC2, 0xBC5DFBC2, 0xBC5EFBC2, 0xBC5FFBC2, 0xBC60FBC2, 0xBC61FBC2, 0xBC62FBC2, 0xBC63FBC2, 0xBC64FBC2, 0xBC65FBC2, 0xBC66FBC2, 0xBC67FBC2, + 0xBC68FBC2, 0xBC69FBC2, 0xBC6AFBC2, 0xBC6BFBC2, 0xBC6CFBC2, 0xBC6DFBC2, 0xBC6EFBC2, 0xBC6FFBC2, 0xBC70FBC2, 0xBC71FBC2, 0xBC72FBC2, 0xBC73FBC2, 0xBC74FBC2, 0xBC75FBC2, 0xBC76FBC2, + 0xBC77FBC2, 0xBC78FBC2, 0xBC79FBC2, 0xBC7AFBC2, 0xBC7BFBC2, 0xBC7CFBC2, 0xBC7DFBC2, 0xBC7EFBC2, 0xBC7FFBC2, 0xBC80FBC2, 0xBC81FBC2, 0xBC82FBC2, 0xBC83FBC2, 0xBC84FBC2, 0xBC85FBC2, + 0xBC86FBC2, 0xBC87FBC2, 0xBC88FBC2, 0xBC89FBC2, 0xBC8AFBC2, 0xBC8BFBC2, 0xBC8CFBC2, 0xBC8DFBC2, 0xBC8EFBC2, 0xBC8FFBC2, 0xBC90FBC2, 0xBC91FBC2, 0xBC92FBC2, 0xBC93FBC2, 0xBC94FBC2, + 0xBC95FBC2, 0xBC96FBC2, 0xBC97FBC2, 0xBC98FBC2, 0xBC99FBC2, 0xBC9AFBC2, 0xBC9BFBC2, 0xBC9CFBC2, 0xBC9DFBC2, 0xBC9EFBC2, 0xBC9FFBC2, 0xBCA0FBC2, 0xBCA1FBC2, 0xBCA2FBC2, 0xBCA3FBC2, + 0xBCA4FBC2, 0xBCA5FBC2, 0xBCA6FBC2, 0xBCA7FBC2, 0xBCA8FBC2, 0xBCA9FBC2, 0xBCAAFBC2, 0xBCABFBC2, 0xBCACFBC2, 0xBCADFBC2, 0xBCAEFBC2, 0xBCAFFBC2, 0xBCB0FBC2, 0xBCB1FBC2, 0xBCB2FBC2, + 0xBCB3FBC2, 0xBCB4FBC2, 0xBCB5FBC2, 0xBCB6FBC2, 0xBCB7FBC2, 0xBCB8FBC2, 0xBCB9FBC2, 0xBCBAFBC2, 0xBCBBFBC2, 0xBCBCFBC2, 0xBCBDFBC2, 0xBCBEFBC2, 0xBCBFFBC2, 0xBCC0FBC2, 0xBCC1FBC2, + 0xBCC2FBC2, 0xBCC3FBC2, 0xBCC4FBC2, 0xBCC5FBC2, 0xBCC6FBC2, 0xBCC7FBC2, 0xBCC8FBC2, 0xBCC9FBC2, 0xBCCAFBC2, 0xBCCBFBC2, 0xBCCCFBC2, 0xBCCDFBC2, 0xBCCEFBC2, 0xBCCFFBC2, 0xBCD0FBC2, + 0xBCD1FBC2, 0xBCD2FBC2, 0xBCD3FBC2, 0xBCD4FBC2, 0xBCD5FBC2, 0xBCD6FBC2, 0xBCD7FBC2, 0xBCD8FBC2, 0xBCD9FBC2, 0xBCDAFBC2, 0xBCDBFBC2, 0xBCDCFBC2, 0xBCDDFBC2, 0xBCDEFBC2, 0xBCDFFBC2, + 0xBCE0FBC2, 0xBCE1FBC2, 0xBCE2FBC2, 0xBCE3FBC2, 0xBCE4FBC2, 0xBCE5FBC2, 0xBCE6FBC2, 0xBCE7FBC2, 0xBCE8FBC2, 0xBCE9FBC2, 0xBCEAFBC2, 0xBCEBFBC2, 0xBCECFBC2, 0xBCEDFBC2, 0xBCEEFBC2, + 0xBCEFFBC2, 0xBCF0FBC2, 0xBCF1FBC2, 0xBCF2FBC2, 0xBCF3FBC2, 0xBCF4FBC2, 0xBCF5FBC2, 0xBCF6FBC2, 0xBCF7FBC2, 0xBCF8FBC2, 0xBCF9FBC2, 0xBCFAFBC2, 0xBCFBFBC2, 0xBCFCFBC2, 0xBCFDFBC2, + 0xBCFEFBC2, 0xBCFFFBC2, 0xBD00FBC2, 0xBD01FBC2, 0xBD02FBC2, 0xBD03FBC2, 0xBD04FBC2, 0xBD05FBC2, 0xBD06FBC2, 0xBD07FBC2, 0xBD08FBC2, 0xBD09FBC2, 0xBD0AFBC2, 0xBD0BFBC2, 0xBD0CFBC2, + 0xBD0DFBC2, 0xBD0EFBC2, 0xBD0FFBC2, 0xBD10FBC2, 0xBD11FBC2, 0xBD12FBC2, 0xBD13FBC2, 0xBD14FBC2, 0xBD15FBC2, 0xBD16FBC2, 0xBD17FBC2, 0xBD18FBC2, 0xBD19FBC2, 0xBD1AFBC2, 0xBD1BFBC2, + 0xBD1CFBC2, 0xBD1DFBC2, 0xBD1EFBC2, 0xBD1FFBC2, 0xBD20FBC2, 0xBD21FBC2, 0xBD22FBC2, 0xBD23FBC2, 0xBD24FBC2, 0xBD25FBC2, 0xBD26FBC2, 0xBD27FBC2, 0xBD28FBC2, 0xBD29FBC2, 0xBD2AFBC2, + 0xBD2BFBC2, 0xBD2CFBC2, 0xBD2DFBC2, 0xBD2EFBC2, 0xBD2FFBC2, 0xBD30FBC2, 0xBD31FBC2, 0xBD32FBC2, 0xBD33FBC2, 0xBD34FBC2, 0xBD35FBC2, 0xBD36FBC2, 0xBD37FBC2, 0xBD38FBC2, 0xBD39FBC2, + 0xBD3AFBC2, 0xBD3BFBC2, 0xBD3CFBC2, 0xBD3DFBC2, 0xBD3EFBC2, 0xBD3FFBC2, 0xBD40FBC2, 0xBD41FBC2, 0xBD42FBC2, 0xBD43FBC2, 0xBD44FBC2, 0xBD45FBC2, 0xBD46FBC2, 0xBD47FBC2, 0xBD48FBC2, + 0xBD49FBC2, 0xBD4AFBC2, 0xBD4BFBC2, 0xBD4CFBC2, 0xBD4DFBC2, 0xBD4EFBC2, 0xBD4FFBC2, 0xBD50FBC2, 0xBD51FBC2, 0xBD52FBC2, 0xBD53FBC2, 0xBD54FBC2, 0xBD55FBC2, 0xBD56FBC2, 0xBD57FBC2, + 0xBD58FBC2, 0xBD59FBC2, 0xBD5AFBC2, 0xBD5BFBC2, 0xBD5CFBC2, 0xBD5DFBC2, 0xBD5EFBC2, 0xBD5FFBC2, 0xBD60FBC2, 0xBD61FBC2, 0xBD62FBC2, 0xBD63FBC2, 0xBD64FBC2, 0xBD65FBC2, 0xBD66FBC2, + 0xBD67FBC2, 0xBD68FBC2, 0xBD69FBC2, 0xBD6AFBC2, 0xBD6BFBC2, 0xBD6CFBC2, 0xBD6DFBC2, 0xBD6EFBC2, 0xBD6FFBC2, 0xBD70FBC2, 0xBD71FBC2, 0xBD72FBC2, 0xBD73FBC2, 0xBD74FBC2, 0xBD75FBC2, + 0xBD76FBC2, 0xBD77FBC2, 0xBD78FBC2, 0xBD79FBC2, 0xBD7AFBC2, 0xBD7BFBC2, 0xBD7CFBC2, 0xBD7DFBC2, 0xBD7EFBC2, 0xBD7FFBC2, 0xBD80FBC2, 0xBD81FBC2, 0xBD82FBC2, 0xBD83FBC2, 0xBD84FBC2, + 0xBD85FBC2, 0xBD86FBC2, 0xBD87FBC2, 0xBD88FBC2, 0xBD89FBC2, 0xBD8AFBC2, 0xBD8BFBC2, 0xBD8CFBC2, 0xBD8DFBC2, 0xBD8EFBC2, 0xBD8FFBC2, 0xBD90FBC2, 0xBD91FBC2, 0xBD92FBC2, 0xBD93FBC2, + 0xBD94FBC2, 0xBD95FBC2, 0xBD96FBC2, 0xBD97FBC2, 0xBD98FBC2, 0xBD99FBC2, 0xBD9AFBC2, 0xBD9BFBC2, 0xBD9CFBC2, 0xBD9DFBC2, 0xBD9EFBC2, 0xBD9FFBC2, 0xBDA0FBC2, 0xBDA1FBC2, 0xBDA2FBC2, + 0xBDA3FBC2, 0xBDA4FBC2, 0xBDA5FBC2, 0xBDA6FBC2, 0xBDA7FBC2, 0xBDA8FBC2, 0xBDA9FBC2, 0xBDAAFBC2, 0xBDABFBC2, 0xBDACFBC2, 0xBDADFBC2, 0xBDAEFBC2, 0xBDAFFBC2, 0xBDB0FBC2, 0xBDB1FBC2, + 0xBDB2FBC2, 0xBDB3FBC2, 0xBDB4FBC2, 0xBDB5FBC2, 0xBDB6FBC2, 0xBDB7FBC2, 0xBDB8FBC2, 0xBDB9FBC2, 0xBDBAFBC2, 0xBDBBFBC2, 0xBDBCFBC2, 0xBDBDFBC2, 0xBDBEFBC2, 0xBDBFFBC2, 0xBDC0FBC2, + 0xBDC1FBC2, 0xBDC2FBC2, 0xBDC3FBC2, 0xBDC4FBC2, 0xBDC5FBC2, 0xBDC6FBC2, 0xBDC7FBC2, 0xBDC8FBC2, 0xBDC9FBC2, 0xBDCAFBC2, 0xBDCBFBC2, 0xBDCCFBC2, 0xBDCDFBC2, 0xBDCEFBC2, 0xBDCFFBC2, + 0xBDD0FBC2, 0xBDD1FBC2, 0xBDD2FBC2, 0xBDD3FBC2, 0xBDD4FBC2, 0xBDD5FBC2, 0xBDD6FBC2, 0xBDD7FBC2, 0xBDD8FBC2, 0xBDD9FBC2, 0xBDDAFBC2, 0xBDDBFBC2, 0xBDDCFBC2, 0xBDDDFBC2, 0xBDDEFBC2, + 0xBDDFFBC2, 0xBDE0FBC2, 0xBDE1FBC2, 0xBDE2FBC2, 0xBDE3FBC2, 0xBDE4FBC2, 0xBDE5FBC2, 0xBDE6FBC2, 0xBDE7FBC2, 0xBDE8FBC2, 0xBDE9FBC2, 0xBDEAFBC2, 0xBDEBFBC2, 0xBDECFBC2, 0xBDEDFBC2, + 0xBDEEFBC2, 0xBDEFFBC2, 0xBDF0FBC2, 0xBDF1FBC2, 0xBDF2FBC2, 0xBDF3FBC2, 0xBDF4FBC2, 0xBDF5FBC2, 0xBDF6FBC2, 0xBDF7FBC2, 0xBDF8FBC2, 0xBDF9FBC2, 0xBDFAFBC2, 0xBDFBFBC2, 0xBDFCFBC2, + 0xBDFDFBC2, 0xBDFEFBC2, 0xBDFFFBC2, 0xBE00FBC2, 0xBE01FBC2, 0xBE02FBC2, 0xBE03FBC2, 0xBE04FBC2, 0xBE05FBC2, 0xBE06FBC2, 0xBE07FBC2, 0xBE08FBC2, 0xBE09FBC2, 0xBE0AFBC2, 0xBE0BFBC2, + 0xBE0CFBC2, 0xBE0DFBC2, 0xBE0EFBC2, 0xBE0FFBC2, 0xBE10FBC2, 0xBE11FBC2, 0xBE12FBC2, 0xBE13FBC2, 0xBE14FBC2, 0xBE15FBC2, 0xBE16FBC2, 0xBE17FBC2, 0xBE18FBC2, 0xBE19FBC2, 0xBE1AFBC2, + 0xBE1BFBC2, 0xBE1CFBC2, 0xBE1DFBC2, 0xBE1EFBC2, 0xBE1FFBC2, 0xBE20FBC2, 0xBE21FBC2, 0xBE22FBC2, 0xBE23FBC2, 0xBE24FBC2, 0xBE25FBC2, 0xBE26FBC2, 0xBE27FBC2, 0xBE28FBC2, 0xBE29FBC2, + 0xBE2AFBC2, 0xBE2BFBC2, 0xBE2CFBC2, 0xBE2DFBC2, 0xBE2EFBC2, 0xBE2FFBC2, 0xBE30FBC2, 0xBE31FBC2, 0xBE32FBC2, 0xBE33FBC2, 0xBE34FBC2, 0xBE35FBC2, 0xBE36FBC2, 0xBE37FBC2, 0xBE38FBC2, + 0xBE39FBC2, 0xBE3AFBC2, 0xBE3BFBC2, 0xBE3CFBC2, 0xBE3DFBC2, 0xBE3EFBC2, 0xBE3FFBC2, 0xBE40FBC2, 0xBE41FBC2, 0xBE42FBC2, 0xBE43FBC2, 0xBE44FBC2, 0xBE45FBC2, 0xBE46FBC2, 0xBE47FBC2, + 0xBE48FBC2, 0xBE49FBC2, 0xBE4AFBC2, 0xBE4BFBC2, 0xBE4CFBC2, 0xBE4DFBC2, 0xBE4EFBC2, 0xBE4FFBC2, 0xBE50FBC2, 0xBE51FBC2, 0xBE52FBC2, 0xBE53FBC2, 0xBE54FBC2, 0xBE55FBC2, 0xBE56FBC2, + 0xBE57FBC2, 0xBE58FBC2, 0xBE59FBC2, 0xBE5AFBC2, 0xBE5BFBC2, 0xBE5CFBC2, 0xBE5DFBC2, 0xBE5EFBC2, 0xBE5FFBC2, 0xBE60FBC2, 0xBE61FBC2, 0xBE62FBC2, 0xBE63FBC2, 0xBE64FBC2, 0xBE65FBC2, + 0xBE66FBC2, 0xBE67FBC2, 0xBE68FBC2, 0xBE69FBC2, 0xBE6AFBC2, 0xBE6BFBC2, 0xBE6CFBC2, 0xBE6DFBC2, 0xBE6EFBC2, 0xBE6FFBC2, 0xBE70FBC2, 0xBE71FBC2, 0xBE72FBC2, 0xBE73FBC2, 0xBE74FBC2, + 0xBE75FBC2, 0xBE76FBC2, 0xBE77FBC2, 0xBE78FBC2, 0xBE79FBC2, 0xBE7AFBC2, 0xBE7BFBC2, 0xBE7CFBC2, 0xBE7DFBC2, 0xBE7EFBC2, 0xBE7FFBC2, 0xBE80FBC2, 0xBE81FBC2, 0xBE82FBC2, 0xBE83FBC2, + 0xBE84FBC2, 0xBE85FBC2, 0xBE86FBC2, 0xBE87FBC2, 0xBE88FBC2, 0xBE89FBC2, 0xBE8AFBC2, 0xBE8BFBC2, 0xBE8CFBC2, 0xBE8DFBC2, 0xBE8EFBC2, 0xBE8FFBC2, 0xBE90FBC2, 0xBE91FBC2, 0xBE92FBC2, + 0xBE93FBC2, 0xBE94FBC2, 0xBE95FBC2, 0xBE96FBC2, 0xBE97FBC2, 0xBE98FBC2, 0xBE99FBC2, 0xBE9AFBC2, 0xBE9BFBC2, 0xBE9CFBC2, 0xBE9DFBC2, 0xBE9EFBC2, 0xBE9FFBC2, 0xBEA0FBC2, 0xBEA1FBC2, + 0xBEA2FBC2, 0xBEA3FBC2, 0xBEA4FBC2, 0xBEA5FBC2, 0xBEA6FBC2, 0xBEA7FBC2, 0xBEA8FBC2, 0xBEA9FBC2, 0xBEAAFBC2, 0xBEABFBC2, 0xBEACFBC2, 0xBEADFBC2, 0xBEAEFBC2, 0xBEAFFBC2, 0xBEB0FBC2, + 0xBEB1FBC2, 0xBEB2FBC2, 0xBEB3FBC2, 0xBEB4FBC2, 0xBEB5FBC2, 0xBEB6FBC2, 0xBEB7FBC2, 0xBEB8FBC2, 0xBEB9FBC2, 0xBEBAFBC2, 0xBEBBFBC2, 0xBEBCFBC2, 0xBEBDFBC2, 0xBEBEFBC2, 0xBEBFFBC2, + 0xBEC0FBC2, 0xBEC1FBC2, 0xBEC2FBC2, 0xBEC3FBC2, 0xBEC4FBC2, 0xBEC5FBC2, 0xBEC6FBC2, 0xBEC7FBC2, 0xBEC8FBC2, 0xBEC9FBC2, 0xBECAFBC2, 0xBECBFBC2, 0xBECCFBC2, 0xBECDFBC2, 0xBECEFBC2, + 0xBECFFBC2, 0xBED0FBC2, 0xBED1FBC2, 0xBED2FBC2, 0xBED3FBC2, 0xBED4FBC2, 0xBED5FBC2, 0xBED6FBC2, 0xBED7FBC2, 0xBED8FBC2, 0xBED9FBC2, 0xBEDAFBC2, 0xBEDBFBC2, 0xBEDCFBC2, 0xBEDDFBC2, + 0xBEDEFBC2, 0xBEDFFBC2, 0xBEE0FBC2, 0xBEE1FBC2, 0xBEE2FBC2, 0xBEE3FBC2, 0xBEE4FBC2, 0xBEE5FBC2, 0xBEE6FBC2, 0xBEE7FBC2, 0xBEE8FBC2, 0xBEE9FBC2, 0xBEEAFBC2, 0xBEEBFBC2, 0xBEECFBC2, + 0xBEEDFBC2, 0xBEEEFBC2, 0xBEEFFBC2, 0xBEF0FBC2, 0xBEF1FBC2, 0xBEF2FBC2, 0xBEF3FBC2, 0xBEF4FBC2, 0xBEF5FBC2, 0xBEF6FBC2, 0xBEF7FBC2, 0xBEF8FBC2, 0xBEF9FBC2, 0xBEFAFBC2, 0xBEFBFBC2, + 0xBEFCFBC2, 0xBEFDFBC2, 0xBEFEFBC2, 0xBEFFFBC2, 0xBF00FBC2, 0xBF01FBC2, 0xBF02FBC2, 0xBF03FBC2, 0xBF04FBC2, 0xBF05FBC2, 0xBF06FBC2, 0xBF07FBC2, 0xBF08FBC2, 0xBF09FBC2, 0xBF0AFBC2, + 0xBF0BFBC2, 0xBF0CFBC2, 0xBF0DFBC2, 0xBF0EFBC2, 0xBF0FFBC2, 0xBF10FBC2, 0xBF11FBC2, 0xBF12FBC2, 0xBF13FBC2, 0xBF14FBC2, 0xBF15FBC2, 0xBF16FBC2, 0xBF17FBC2, 0xBF18FBC2, 0xBF19FBC2, + 0xBF1AFBC2, 0xBF1BFBC2, 0xBF1CFBC2, 0xBF1DFBC2, 0xBF1EFBC2, 0xBF1FFBC2, 0xBF20FBC2, 0xBF21FBC2, 0xBF22FBC2, 0xBF23FBC2, 0xBF24FBC2, 0xBF25FBC2, 0xBF26FBC2, 0xBF27FBC2, 0xBF28FBC2, + 0xBF29FBC2, 0xBF2AFBC2, 0xBF2BFBC2, 0xBF2CFBC2, 0xBF2DFBC2, 0xBF2EFBC2, 0xBF2FFBC2, 0xBF30FBC2, 0xBF31FBC2, 0xBF32FBC2, 0xBF33FBC2, 0xBF34FBC2, 0xBF35FBC2, 0xBF36FBC2, 0xBF37FBC2, + 0xBF38FBC2, 0xBF39FBC2, 0xBF3AFBC2, 0xBF3BFBC2, 0xBF3CFBC2, 0xBF3DFBC2, 0xBF3EFBC2, 0xBF3FFBC2, 0xBF40FBC2, 0xBF41FBC2, 0xBF42FBC2, 0xBF43FBC2, 0xBF44FBC2, 0xBF45FBC2, 0xBF46FBC2, + 0xBF47FBC2, 0xBF48FBC2, 0xBF49FBC2, 0xBF4AFBC2, 0xBF4BFBC2, 0xBF4CFBC2, 0xBF4DFBC2, 0xBF4EFBC2, 0xBF4FFBC2, 0xBF50FBC2, 0xBF51FBC2, 0xBF52FBC2, 0xBF53FBC2, 0xBF54FBC2, 0xBF55FBC2, + 0xBF56FBC2, 0xBF57FBC2, 0xBF58FBC2, 0xBF59FBC2, 0xBF5AFBC2, 0xBF5BFBC2, 0xBF5CFBC2, 0xBF5DFBC2, 0xBF5EFBC2, 0xBF5FFBC2, 0xBF60FBC2, 0xBF61FBC2, 0xBF62FBC2, 0xBF63FBC2, 0xBF64FBC2, + 0xBF65FBC2, 0xBF66FBC2, 0xBF67FBC2, 0xBF68FBC2, 0xBF69FBC2, 0xBF6AFBC2, 0xBF6BFBC2, 0xBF6CFBC2, 0xBF6DFBC2, 0xBF6EFBC2, 0xBF6FFBC2, 0xBF70FBC2, 0xBF71FBC2, 0xBF72FBC2, 0xBF73FBC2, + 0xBF74FBC2, 0xBF75FBC2, 0xBF76FBC2, 0xBF77FBC2, 0xBF78FBC2, 0xBF79FBC2, 0xBF7AFBC2, 0xBF7BFBC2, 0xBF7CFBC2, 0xBF7DFBC2, 0xBF7EFBC2, 0xBF7FFBC2, 0xBF80FBC2, 0xBF81FBC2, 0xBF82FBC2, + 0xBF83FBC2, 0xBF84FBC2, 0xBF85FBC2, 0xBF86FBC2, 0xBF87FBC2, 0xBF88FBC2, 0xBF89FBC2, 0xBF8AFBC2, 0xBF8BFBC2, 0xBF8CFBC2, 0xBF8DFBC2, 0xBF8EFBC2, 0xBF8FFBC2, 0xBF90FBC2, 0xBF91FBC2, + 0xBF92FBC2, 0xBF93FBC2, 0xBF94FBC2, 0xBF95FBC2, 0xBF96FBC2, 0xBF97FBC2, 0xBF98FBC2, 0xBF99FBC2, 0xBF9AFBC2, 0xBF9BFBC2, 0xBF9CFBC2, 0xBF9DFBC2, 0xBF9EFBC2, 0xBF9FFBC2, 0xBFA0FBC2, + 0xBFA1FBC2, 0xBFA2FBC2, 0xBFA3FBC2, 0xBFA4FBC2, 0xBFA5FBC2, 0xBFA6FBC2, 0xBFA7FBC2, 0xBFA8FBC2, 0xBFA9FBC2, 0xBFAAFBC2, 0xBFABFBC2, 0xBFACFBC2, 0xBFADFBC2, 0xBFAEFBC2, 0xBFAFFBC2, + 0xBFB0FBC2, 0xBFB1FBC2, 0xBFB2FBC2, 0xBFB3FBC2, 0xBFB4FBC2, 0xBFB5FBC2, 0xBFB6FBC2, 0xBFB7FBC2, 0xBFB8FBC2, 0xBFB9FBC2, 0xBFBAFBC2, 0xBFBBFBC2, 0xBFBCFBC2, 0xBFBDFBC2, 0xBFBEFBC2, + 0xBFBFFBC2, 0xBFC0FBC2, 0xBFC1FBC2, 0xBFC2FBC2, 0xBFC3FBC2, 0xBFC4FBC2, 0xBFC5FBC2, 0xBFC6FBC2, 0xBFC7FBC2, 0xBFC8FBC2, 0xBFC9FBC2, 0xBFCAFBC2, 0xBFCBFBC2, 0xBFCCFBC2, 0xBFCDFBC2, + 0xBFCEFBC2, 0xBFCFFBC2, 0xBFD0FBC2, 0xBFD1FBC2, 0xBFD2FBC2, 0xBFD3FBC2, 0xBFD4FBC2, 0xBFD5FBC2, 0xBFD6FBC2, 0xBFD7FBC2, 0xBFD8FBC2, 0xBFD9FBC2, 0xBFDAFBC2, 0xBFDBFBC2, 0xBFDCFBC2, + 0xBFDDFBC2, 0xBFDEFBC2, 0xBFDFFBC2, 0xBFE0FBC2, 0xBFE1FBC2, 0xBFE2FBC2, 0xBFE3FBC2, 0xBFE4FBC2, 0xBFE5FBC2, 0xBFE6FBC2, 0xBFE7FBC2, 0xBFE8FBC2, 0xBFE9FBC2, 0xBFEAFBC2, 0xBFEBFBC2, + 0xBFECFBC2, 0xBFEDFBC2, 0xBFEEFBC2, 0xBFEFFBC2, 0xBFF0FBC2, 0xBFF1FBC2, 0xBFF2FBC2, 0xBFF3FBC2, 0xBFF4FBC2, 0xBFF5FBC2, 0xBFF6FBC2, 0xBFF7FBC2, 0xBFF8FBC2, 0xBFF9FBC2, 0xBFFAFBC2, + 0xBFFBFBC2, 0xBFFCFBC2, 0xBFFDFBC2, 0xBFFEFBC2, 0xBFFFFBC2, 0xC000FBC2, 0xC001FBC2, 0xC002FBC2, 0xC003FBC2, 0xC004FBC2, 0xC005FBC2, 0xC006FBC2, 0xC007FBC2, 0xC008FBC2, 0xC009FBC2, + 0xC00AFBC2, 0xC00BFBC2, 0xC00CFBC2, 0xC00DFBC2, 0xC00EFBC2, 0xC00FFBC2, 0xC010FBC2, 0xC011FBC2, 0xC012FBC2, 0xC013FBC2, 0xC014FBC2, 0xC015FBC2, 0xC016FBC2, 0xC017FBC2, 0xC018FBC2, + 0xC019FBC2, 0xC01AFBC2, 0xC01BFBC2, 0xC01CFBC2, 0xC01DFBC2, 0xC01EFBC2, 0xC01FFBC2, 0xC020FBC2, 0xC021FBC2, 0xC022FBC2, 0xC023FBC2, 0xC024FBC2, 0xC025FBC2, 0xC026FBC2, 0xC027FBC2, + 0xC028FBC2, 0xC029FBC2, 0xC02AFBC2, 0xC02BFBC2, 0xC02CFBC2, 0xC02DFBC2, 0xC02EFBC2, 0xC02FFBC2, 0xC030FBC2, 0xC031FBC2, 0xC032FBC2, 0xC033FBC2, 0xC034FBC2, 0xC035FBC2, 0xC036FBC2, + 0xC037FBC2, 0xC038FBC2, 0xC039FBC2, 0xC03AFBC2, 0xC03BFBC2, 0xC03CFBC2, 0xC03DFBC2, 0xC03EFBC2, 0xC03FFBC2, 0xC040FBC2, 0xC041FBC2, 0xC042FBC2, 0xC043FBC2, 0xC044FBC2, 0xC045FBC2, + 0xC046FBC2, 0xC047FBC2, 0xC048FBC2, 0xC049FBC2, 0xC04AFBC2, 0xC04BFBC2, 0xC04CFBC2, 0xC04DFBC2, 0xC04EFBC2, 0xC04FFBC2, 0xC050FBC2, 0xC051FBC2, 0xC052FBC2, 0xC053FBC2, 0xC054FBC2, + 0xC055FBC2, 0xC056FBC2, 0xC057FBC2, 0xC058FBC2, 0xC059FBC2, 0xC05AFBC2, 0xC05BFBC2, 0xC05CFBC2, 0xC05DFBC2, 0xC05EFBC2, 0xC05FFBC2, 0xC060FBC2, 0xC061FBC2, 0xC062FBC2, 0xC063FBC2, + 0xC064FBC2, 0xC065FBC2, 0xC066FBC2, 0xC067FBC2, 0xC068FBC2, 0xC069FBC2, 0xC06AFBC2, 0xC06BFBC2, 0xC06CFBC2, 0xC06DFBC2, 0xC06EFBC2, 0xC06FFBC2, 0xC070FBC2, 0xC071FBC2, 0xC072FBC2, + 0xC073FBC2, 0xC074FBC2, 0xC075FBC2, 0xC076FBC2, 0xC077FBC2, 0xC078FBC2, 0xC079FBC2, 0xC07AFBC2, 0xC07BFBC2, 0xC07CFBC2, 0xC07DFBC2, 0xC07EFBC2, 0xC07FFBC2, 0xC080FBC2, 0xC081FBC2, + 0xC082FBC2, 0xC083FBC2, 0xC084FBC2, 0xC085FBC2, 0xC086FBC2, 0xC087FBC2, 0xC088FBC2, 0xC089FBC2, 0xC08AFBC2, 0xC08BFBC2, 0xC08CFBC2, 0xC08DFBC2, 0xC08EFBC2, 0xC08FFBC2, 0xC090FBC2, + 0xC091FBC2, 0xC092FBC2, 0xC093FBC2, 0xC094FBC2, 0xC095FBC2, 0xC096FBC2, 0xC097FBC2, 0xC098FBC2, 0xC099FBC2, 0xC09AFBC2, 0xC09BFBC2, 0xC09CFBC2, 0xC09DFBC2, 0xC09EFBC2, 0xC09FFBC2, + 0xC0A0FBC2, 0xC0A1FBC2, 0xC0A2FBC2, 0xC0A3FBC2, 0xC0A4FBC2, 0xC0A5FBC2, 0xC0A6FBC2, 0xC0A7FBC2, 0xC0A8FBC2, 0xC0A9FBC2, 0xC0AAFBC2, 0xC0ABFBC2, 0xC0ACFBC2, 0xC0ADFBC2, 0xC0AEFBC2, + 0xC0AFFBC2, 0xC0B0FBC2, 0xC0B1FBC2, 0xC0B2FBC2, 0xC0B3FBC2, 0xC0B4FBC2, 0xC0B5FBC2, 0xC0B6FBC2, 0xC0B7FBC2, 0xC0B8FBC2, 0xC0B9FBC2, 0xC0BAFBC2, 0xC0BBFBC2, 0xC0BCFBC2, 0xC0BDFBC2, + 0xC0BEFBC2, 0xC0BFFBC2, 0xC0C0FBC2, 0xC0C1FBC2, 0xC0C2FBC2, 0xC0C3FBC2, 0xC0C4FBC2, 0xC0C5FBC2, 0xC0C6FBC2, 0xC0C7FBC2, 0xC0C8FBC2, 0xC0C9FBC2, 0xC0CAFBC2, 0xC0CBFBC2, 0xC0CCFBC2, + 0xC0CDFBC2, 0xC0CEFBC2, 0xC0CFFBC2, 0xC0D0FBC2, 0xC0D1FBC2, 0xC0D2FBC2, 0xC0D3FBC2, 0xC0D4FBC2, 0xC0D5FBC2, 0xC0D6FBC2, 0xC0D7FBC2, 0xC0D8FBC2, 0xC0D9FBC2, 0xC0DAFBC2, 0xC0DBFBC2, + 0xC0DCFBC2, 0xC0DDFBC2, 0xC0DEFBC2, 0xC0DFFBC2, 0xC0E0FBC2, 0xC0E1FBC2, 0xC0E2FBC2, 0xC0E3FBC2, 0xC0E4FBC2, 0xC0E5FBC2, 0xC0E6FBC2, 0xC0E7FBC2, 0xC0E8FBC2, 0xC0E9FBC2, 0xC0EAFBC2, + 0xC0EBFBC2, 0xC0ECFBC2, 0xC0EDFBC2, 0xC0EEFBC2, 0xC0EFFBC2, 0xC0F0FBC2, 0xC0F1FBC2, 0xC0F2FBC2, 0xC0F3FBC2, 0xC0F4FBC2, 0xC0F5FBC2, 0xC0F6FBC2, 0xC0F7FBC2, 0xC0F8FBC2, 0xC0F9FBC2, + 0xC0FAFBC2, 0xC0FBFBC2, 0xC0FCFBC2, 0xC0FDFBC2, 0xC0FEFBC2, 0xC0FFFBC2, 0xC100FBC2, 0xC101FBC2, 0xC102FBC2, 0xC103FBC2, 0xC104FBC2, 0xC105FBC2, 0xC106FBC2, 0xC107FBC2, 0xC108FBC2, + 0xC109FBC2, 0xC10AFBC2, 0xC10BFBC2, 0xC10CFBC2, 0xC10DFBC2, 0xC10EFBC2, 0xC10FFBC2, 0xC110FBC2, 0xC111FBC2, 0xC112FBC2, 0xC113FBC2, 0xC114FBC2, 0xC115FBC2, 0xC116FBC2, 0xC117FBC2, + 0xC118FBC2, 0xC119FBC2, 0xC11AFBC2, 0xC11BFBC2, 0xC11CFBC2, 0xC11DFBC2, 0xC11EFBC2, 0xC11FFBC2, 0xC120FBC2, 0xC121FBC2, 0xC122FBC2, 0xC123FBC2, 0xC124FBC2, 0xC125FBC2, 0xC126FBC2, + 0xC127FBC2, 0xC128FBC2, 0xC129FBC2, 0xC12AFBC2, 0xC12BFBC2, 0xC12CFBC2, 0xC12DFBC2, 0xC12EFBC2, 0xC12FFBC2, 0xC130FBC2, 0xC131FBC2, 0xC132FBC2, 0xC133FBC2, 0xC134FBC2, 0xC135FBC2, + 0xC136FBC2, 0xC137FBC2, 0xC138FBC2, 0xC139FBC2, 0xC13AFBC2, 0xC13BFBC2, 0xC13CFBC2, 0xC13DFBC2, 0xC13EFBC2, 0xC13FFBC2, 0xC140FBC2, 0xC141FBC2, 0xC142FBC2, 0xC143FBC2, 0xC144FBC2, + 0xC145FBC2, 0xC146FBC2, 0xC147FBC2, 0xC148FBC2, 0xC149FBC2, 0xC14AFBC2, 0xC14BFBC2, 0xC14CFBC2, 0xC14DFBC2, 0xC14EFBC2, 0xC14FFBC2, 0xC150FBC2, 0xC151FBC2, 0xC152FBC2, 0xC153FBC2, + 0xC154FBC2, 0xC155FBC2, 0xC156FBC2, 0xC157FBC2, 0xC158FBC2, 0xC159FBC2, 0xC15AFBC2, 0xC15BFBC2, 0xC15CFBC2, 0xC15DFBC2, 0xC15EFBC2, 0xC15FFBC2, 0xC160FBC2, 0xC161FBC2, 0xC162FBC2, + 0xC163FBC2, 0xC164FBC2, 0xC165FBC2, 0xC166FBC2, 0xC167FBC2, 0xC168FBC2, 0xC169FBC2, 0xC16AFBC2, 0xC16BFBC2, 0xC16CFBC2, 0xC16DFBC2, 0xC16EFBC2, 0xC16FFBC2, 0xC170FBC2, 0xC171FBC2, + 0xC172FBC2, 0xC173FBC2, 0xC174FBC2, 0xC175FBC2, 0xC176FBC2, 0xC177FBC2, 0xC178FBC2, 0xC179FBC2, 0xC17AFBC2, 0xC17BFBC2, 0xC17CFBC2, 0xC17DFBC2, 0xC17EFBC2, 0xC17FFBC2, 0xC180FBC2, + 0xC181FBC2, 0xC182FBC2, 0xC183FBC2, 0xC184FBC2, 0xC185FBC2, 0xC186FBC2, 0xC187FBC2, 0xC188FBC2, 0xC189FBC2, 0xC18AFBC2, 0xC18BFBC2, 0xC18CFBC2, 0xC18DFBC2, 0xC18EFBC2, 0xC18FFBC2, + 0xC190FBC2, 0xC191FBC2, 0xC192FBC2, 0xC193FBC2, 0xC194FBC2, 0xC195FBC2, 0xC196FBC2, 0xC197FBC2, 0xC198FBC2, 0xC199FBC2, 0xC19AFBC2, 0xC19BFBC2, 0xC19CFBC2, 0xC19DFBC2, 0xC19EFBC2, + 0xC19FFBC2, 0xC1A0FBC2, 0xC1A1FBC2, 0xC1A2FBC2, 0xC1A3FBC2, 0xC1A4FBC2, 0xC1A5FBC2, 0xC1A6FBC2, 0xC1A7FBC2, 0xC1A8FBC2, 0xC1A9FBC2, 0xC1AAFBC2, 0xC1ABFBC2, 0xC1ACFBC2, 0xC1ADFBC2, + 0xC1AEFBC2, 0xC1AFFBC2, 0xC1B0FBC2, 0xC1B1FBC2, 0xC1B2FBC2, 0xC1B3FBC2, 0xC1B4FBC2, 0xC1B5FBC2, 0xC1B6FBC2, 0xC1B7FBC2, 0xC1B8FBC2, 0xC1B9FBC2, 0xC1BAFBC2, 0xC1BBFBC2, 0xC1BCFBC2, + 0xC1BDFBC2, 0xC1BEFBC2, 0xC1BFFBC2, 0xC1C0FBC2, 0xC1C1FBC2, 0xC1C2FBC2, 0xC1C3FBC2, 0xC1C4FBC2, 0xC1C5FBC2, 0xC1C6FBC2, 0xC1C7FBC2, 0xC1C8FBC2, 0xC1C9FBC2, 0xC1CAFBC2, 0xC1CBFBC2, + 0xC1CCFBC2, 0xC1CDFBC2, 0xC1CEFBC2, 0xC1CFFBC2, 0xC1D0FBC2, 0xC1D1FBC2, 0xC1D2FBC2, 0xC1D3FBC2, 0xC1D4FBC2, 0xC1D5FBC2, 0xC1D6FBC2, 0xC1D7FBC2, 0xC1D8FBC2, 0xC1D9FBC2, 0xC1DAFBC2, + 0xC1DBFBC2, 0xC1DCFBC2, 0xC1DDFBC2, 0xC1DEFBC2, 0xC1DFFBC2, 0xC1E0FBC2, 0xC1E1FBC2, 0xC1E2FBC2, 0xC1E3FBC2, 0xC1E4FBC2, 0xC1E5FBC2, 0xC1E6FBC2, 0xC1E7FBC2, 0xC1E8FBC2, 0xC1E9FBC2, + 0xC1EAFBC2, 0xC1EBFBC2, 0xC1ECFBC2, 0xC1EDFBC2, 0xC1EEFBC2, 0xC1EFFBC2, 0xC1F0FBC2, 0xC1F1FBC2, 0xC1F2FBC2, 0xC1F3FBC2, 0xC1F4FBC2, 0xC1F5FBC2, 0xC1F6FBC2, 0xC1F7FBC2, 0xC1F8FBC2, + 0xC1F9FBC2, 0xC1FAFBC2, 0xC1FBFBC2, 0xC1FCFBC2, 0xC1FDFBC2, 0xC1FEFBC2, 0xC1FFFBC2, 0xC200FBC2, 0xC201FBC2, 0xC202FBC2, 0xC203FBC2, 0xC204FBC2, 0xC205FBC2, 0xC206FBC2, 0xC207FBC2, + 0xC208FBC2, 0xC209FBC2, 0xC20AFBC2, 0xC20BFBC2, 0xC20CFBC2, 0xC20DFBC2, 0xC20EFBC2, 0xC20FFBC2, 0xC210FBC2, 0xC211FBC2, 0xC212FBC2, 0xC213FBC2, 0xC214FBC2, 0xC215FBC2, 0xC216FBC2, + 0xC217FBC2, 0xC218FBC2, 0xC219FBC2, 0xC21AFBC2, 0xC21BFBC2, 0xC21CFBC2, 0xC21DFBC2, 0xC21EFBC2, 0xC21FFBC2, 0xC220FBC2, 0xC221FBC2, 0xC222FBC2, 0xC223FBC2, 0xC224FBC2, 0xC225FBC2, + 0xC226FBC2, 0xC227FBC2, 0xC228FBC2, 0xC229FBC2, 0xC22AFBC2, 0xC22BFBC2, 0xC22CFBC2, 0xC22DFBC2, 0xC22EFBC2, 0xC22FFBC2, 0xC230FBC2, 0xC231FBC2, 0xC232FBC2, 0xC233FBC2, 0xC234FBC2, + 0xC235FBC2, 0xC236FBC2, 0xC237FBC2, 0xC238FBC2, 0xC239FBC2, 0xC23AFBC2, 0xC23BFBC2, 0xC23CFBC2, 0xC23DFBC2, 0xC23EFBC2, 0xC23FFBC2, 0xC240FBC2, 0xC241FBC2, 0xC242FBC2, 0xC243FBC2, + 0xC244FBC2, 0xC245FBC2, 0xC246FBC2, 0xC247FBC2, 0xC248FBC2, 0xC249FBC2, 0xC24AFBC2, 0xC24BFBC2, 0xC24CFBC2, 0xC24DFBC2, 0xC24EFBC2, 0xC24FFBC2, 0xC250FBC2, 0xC251FBC2, 0xC252FBC2, + 0xC253FBC2, 0xC254FBC2, 0xC255FBC2, 0xC256FBC2, 0xC257FBC2, 0xC258FBC2, 0xC259FBC2, 0xC25AFBC2, 0xC25BFBC2, 0xC25CFBC2, 0xC25DFBC2, 0xC25EFBC2, 0xC25FFBC2, 0xC260FBC2, 0xC261FBC2, + 0xC262FBC2, 0xC263FBC2, 0xC264FBC2, 0xC265FBC2, 0xC266FBC2, 0xC267FBC2, 0xC268FBC2, 0xC269FBC2, 0xC26AFBC2, 0xC26BFBC2, 0xC26CFBC2, 0xC26DFBC2, 0xC26EFBC2, 0xC26FFBC2, 0xC270FBC2, + 0xC271FBC2, 0xC272FBC2, 0xC273FBC2, 0xC274FBC2, 0xC275FBC2, 0xC276FBC2, 0xC277FBC2, 0xC278FBC2, 0xC279FBC2, 0xC27AFBC2, 0xC27BFBC2, 0xC27CFBC2, 0xC27DFBC2, 0xC27EFBC2, 0xC27FFBC2, + 0xC280FBC2, 0xC281FBC2, 0xC282FBC2, 0xC283FBC2, 0xC284FBC2, 0xC285FBC2, 0xC286FBC2, 0xC287FBC2, 0xC288FBC2, 0xC289FBC2, 0xC28AFBC2, 0xC28BFBC2, 0xC28CFBC2, 0xC28DFBC2, 0xC28EFBC2, + 0xC28FFBC2, 0xC290FBC2, 0xC291FBC2, 0xC292FBC2, 0xC293FBC2, 0xC294FBC2, 0xC295FBC2, 0xC296FBC2, 0xC297FBC2, 0xC298FBC2, 0xC299FBC2, 0xC29AFBC2, 0xC29BFBC2, 0xC29CFBC2, 0xC29DFBC2, + 0xC29EFBC2, 0xC29FFBC2, 0xC2A0FBC2, 0xC2A1FBC2, 0xC2A2FBC2, 0xC2A3FBC2, 0xC2A4FBC2, 0xC2A5FBC2, 0xC2A6FBC2, 0xC2A7FBC2, 0xC2A8FBC2, 0xC2A9FBC2, 0xC2AAFBC2, 0xC2ABFBC2, 0xC2ACFBC2, + 0xC2ADFBC2, 0xC2AEFBC2, 0xC2AFFBC2, 0xC2B0FBC2, 0xC2B1FBC2, 0xC2B2FBC2, 0xC2B3FBC2, 0xC2B4FBC2, 0xC2B5FBC2, 0xC2B6FBC2, 0xC2B7FBC2, 0xC2B8FBC2, 0xC2B9FBC2, 0xC2BAFBC2, 0xC2BBFBC2, + 0xC2BCFBC2, 0xC2BDFBC2, 0xC2BEFBC2, 0xC2BFFBC2, 0xC2C0FBC2, 0xC2C1FBC2, 0xC2C2FBC2, 0xC2C3FBC2, 0xC2C4FBC2, 0xC2C5FBC2, 0xC2C6FBC2, 0xC2C7FBC2, 0xC2C8FBC2, 0xC2C9FBC2, 0xC2CAFBC2, + 0xC2CBFBC2, 0xC2CCFBC2, 0xC2CDFBC2, 0xC2CEFBC2, 0xC2CFFBC2, 0xC2D0FBC2, 0xC2D1FBC2, 0xC2D2FBC2, 0xC2D3FBC2, 0xC2D4FBC2, 0xC2D5FBC2, 0xC2D6FBC2, 0xC2D7FBC2, 0xC2D8FBC2, 0xC2D9FBC2, + 0xC2DAFBC2, 0xC2DBFBC2, 0xC2DCFBC2, 0xC2DDFBC2, 0xC2DEFBC2, 0xC2DFFBC2, 0xC2E0FBC2, 0xC2E1FBC2, 0xC2E2FBC2, 0xC2E3FBC2, 0xC2E4FBC2, 0xC2E5FBC2, 0xC2E6FBC2, 0xC2E7FBC2, 0xC2E8FBC2, + 0xC2E9FBC2, 0xC2EAFBC2, 0xC2EBFBC2, 0xC2ECFBC2, 0xC2EDFBC2, 0xC2EEFBC2, 0xC2EFFBC2, 0xC2F0FBC2, 0xC2F1FBC2, 0xC2F2FBC2, 0xC2F3FBC2, 0xC2F4FBC2, 0xC2F5FBC2, 0xC2F6FBC2, 0xC2F7FBC2, + 0xC2F8FBC2, 0xC2F9FBC2, 0xC2FAFBC2, 0xC2FBFBC2, 0xC2FCFBC2, 0xC2FDFBC2, 0xC2FEFBC2, 0xC2FFFBC2, 0xC300FBC2, 0xC301FBC2, 0xC302FBC2, 0xC303FBC2, 0xC304FBC2, 0xC305FBC2, 0xC306FBC2, + 0xC307FBC2, 0xC308FBC2, 0xC309FBC2, 0xC30AFBC2, 0xC30BFBC2, 0xC30CFBC2, 0xC30DFBC2, 0xC30EFBC2, 0xC30FFBC2, 0xC310FBC2, 0xC311FBC2, 0xC312FBC2, 0xC313FBC2, 0xC314FBC2, 0xC315FBC2, + 0xC316FBC2, 0xC317FBC2, 0xC318FBC2, 0xC319FBC2, 0xC31AFBC2, 0xC31BFBC2, 0xC31CFBC2, 0xC31DFBC2, 0xC31EFBC2, 0xC31FFBC2, 0xC320FBC2, 0xC321FBC2, 0xC322FBC2, 0xC323FBC2, 0xC324FBC2, + 0xC325FBC2, 0xC326FBC2, 0xC327FBC2, 0xC328FBC2, 0xC329FBC2, 0xC32AFBC2, 0xC32BFBC2, 0xC32CFBC2, 0xC32DFBC2, 0xC32EFBC2, 0xC32FFBC2, 0xC330FBC2, 0xC331FBC2, 0xC332FBC2, 0xC333FBC2, + 0xC334FBC2, 0xC335FBC2, 0xC336FBC2, 0xC337FBC2, 0xC338FBC2, 0xC339FBC2, 0xC33AFBC2, 0xC33BFBC2, 0xC33CFBC2, 0xC33DFBC2, 0xC33EFBC2, 0xC33FFBC2, 0xC340FBC2, 0xC341FBC2, 0xC342FBC2, + 0xC343FBC2, 0xC344FBC2, 0xC345FBC2, 0xC346FBC2, 0xC347FBC2, 0xC348FBC2, 0xC349FBC2, 0xC34AFBC2, 0xC34BFBC2, 0xC34CFBC2, 0xC34DFBC2, 0xC34EFBC2, 0xC34FFBC2, 0xC350FBC2, 0xC351FBC2, + 0xC352FBC2, 0xC353FBC2, 0xC354FBC2, 0xC355FBC2, 0xC356FBC2, 0xC357FBC2, 0xC358FBC2, 0xC359FBC2, 0xC35AFBC2, 0xC35BFBC2, 0xC35CFBC2, 0xC35DFBC2, 0xC35EFBC2, 0xC35FFBC2, 0xC360FBC2, + 0xC361FBC2, 0xC362FBC2, 0xC363FBC2, 0xC364FBC2, 0xC365FBC2, 0xC366FBC2, 0xC367FBC2, 0xC368FBC2, 0xC369FBC2, 0xC36AFBC2, 0xC36BFBC2, 0xC36CFBC2, 0xC36DFBC2, 0xC36EFBC2, 0xC36FFBC2, + 0xC370FBC2, 0xC371FBC2, 0xC372FBC2, 0xC373FBC2, 0xC374FBC2, 0xC375FBC2, 0xC376FBC2, 0xC377FBC2, 0xC378FBC2, 0xC379FBC2, 0xC37AFBC2, 0xC37BFBC2, 0xC37CFBC2, 0xC37DFBC2, 0xC37EFBC2, + 0xC37FFBC2, 0xC380FBC2, 0xC381FBC2, 0xC382FBC2, 0xC383FBC2, 0xC384FBC2, 0xC385FBC2, 0xC386FBC2, 0xC387FBC2, 0xC388FBC2, 0xC389FBC2, 0xC38AFBC2, 0xC38BFBC2, 0xC38CFBC2, 0xC38DFBC2, + 0xC38EFBC2, 0xC38FFBC2, 0xC390FBC2, 0xC391FBC2, 0xC392FBC2, 0xC393FBC2, 0xC394FBC2, 0xC395FBC2, 0xC396FBC2, 0xC397FBC2, 0xC398FBC2, 0xC399FBC2, 0xC39AFBC2, 0xC39BFBC2, 0xC39CFBC2, + 0xC39DFBC2, 0xC39EFBC2, 0xC39FFBC2, 0xC3A0FBC2, 0xC3A1FBC2, 0xC3A2FBC2, 0xC3A3FBC2, 0xC3A4FBC2, 0xC3A5FBC2, 0xC3A6FBC2, 0xC3A7FBC2, 0xC3A8FBC2, 0xC3A9FBC2, 0xC3AAFBC2, 0xC3ABFBC2, + 0xC3ACFBC2, 0xC3ADFBC2, 0xC3AEFBC2, 0xC3AFFBC2, 0xC3B0FBC2, 0xC3B1FBC2, 0xC3B2FBC2, 0xC3B3FBC2, 0xC3B4FBC2, 0xC3B5FBC2, 0xC3B6FBC2, 0xC3B7FBC2, 0xC3B8FBC2, 0xC3B9FBC2, 0xC3BAFBC2, + 0xC3BBFBC2, 0xC3BCFBC2, 0xC3BDFBC2, 0xC3BEFBC2, 0xC3BFFBC2, 0xC3C0FBC2, 0xC3C1FBC2, 0xC3C2FBC2, 0xC3C3FBC2, 0xC3C4FBC2, 0xC3C5FBC2, 0xC3C6FBC2, 0xC3C7FBC2, 0xC3C8FBC2, 0xC3C9FBC2, + 0xC3CAFBC2, 0xC3CBFBC2, 0xC3CCFBC2, 0xC3CDFBC2, 0xC3CEFBC2, 0xC3CFFBC2, 0xC3D0FBC2, 0xC3D1FBC2, 0xC3D2FBC2, 0xC3D3FBC2, 0xC3D4FBC2, 0xC3D5FBC2, 0xC3D6FBC2, 0xC3D7FBC2, 0xC3D8FBC2, + 0xC3D9FBC2, 0xC3DAFBC2, 0xC3DBFBC2, 0xC3DCFBC2, 0xC3DDFBC2, 0xC3DEFBC2, 0xC3DFFBC2, 0xC3E0FBC2, 0xC3E1FBC2, 0xC3E2FBC2, 0xC3E3FBC2, 0xC3E4FBC2, 0xC3E5FBC2, 0xC3E6FBC2, 0xC3E7FBC2, + 0xC3E8FBC2, 0xC3E9FBC2, 0xC3EAFBC2, 0xC3EBFBC2, 0xC3ECFBC2, 0xC3EDFBC2, 0xC3EEFBC2, 0xC3EFFBC2, 0xC3F0FBC2, 0xC3F1FBC2, 0xC3F2FBC2, 0xC3F3FBC2, 0xC3F4FBC2, 0xC3F5FBC2, 0xC3F6FBC2, + 0xC3F7FBC2, 0xC3F8FBC2, 0xC3F9FBC2, 0xC3FAFBC2, 0xC3FBFBC2, 0xC3FCFBC2, 0xC3FDFBC2, 0xC3FEFBC2, 0xC3FFFBC2, 0x525D, 0x525E, 0x525F, 0x5260, 0x5261, 0x5262, + 0x5263, 0x5264, 0x5265, 0x5266, 0x5267, 0x5268, 0x5269, 0x526A, 0x526B, 0x526C, 0x526D, 0x526E, 0x526F, 0x5270, 0x5271, + 0x5272, 0x5273, 0x5274, 0x5275, 0x5276, 0x5277, 0x5278, 0x5279, 0x527A, 0x527B, 0x527C, 0x527D, 0x527E, 0x527F, 0x5280, + 0x5281, 0x5282, 0x5283, 0x5284, 0x5285, 0x5286, 0x5287, 0x5288, 0x5289, 0x528A, 0x528B, 0x528C, 0x528D, 0x528E, 0x528F, + 0x5290, 0x5291, 0x5292, 0x5293, 0x5294, 0x5295, 0x5296, 0x5297, 0x5298, 0x5299, 0x529A, 0x529B, 0x529C, 0x529D, 0x529E, + 0x529F, 0x52A0, 0x52A1, 0x52A2, 0x52A3, 0x52A4, 0x52A5, 0x52A6, 0x52A7, 0x52A8, 0x52A9, 0x52AA, 0x52AB, 0x52AC, 0x52AD, + 0x52AE, 0x52AF, 0x52B0, 0x52B1, 0x52B2, 0x52B3, 0x52B4, 0x52B5, 0x52B6, 0x52B7, 0x52B8, 0x52B9, 0x52BA, 0x52BB, 0x52BC, + 0x52BD, 0x52BE, 0x52BF, 0x52C0, 0x52C1, 0x52C2, 0x52C3, 0x52C4, 0x52C5, 0x52C6, 0x52C7, 0x52C8, 0x52C9, 0x52CA, 0x52CB, + 0x52CC, 0x52CD, 0x52CE, 0x52CF, 0x52D0, 0x52D1, 0x52D2, 0x52D3, 0x52D4, 0x52D5, 0x52D6, 0x52D7, 0x52D8, 0x52D9, 0x52DA, + 0x52DB, 0x52DC, 0x52DD, 0x52DE, 0x52DF, 0x52E0, 0x52E1, 0x52E2, 0x52E3, 0x52E4, 0x52E5, 0x52E6, 0x52E7, 0x52E8, 0x52E9, + 0x52EA, 0x52EB, 0x52EC, 0x52ED, 0x52EE, 0x52EF, 0x52F0, 0x52F1, 0x52F2, 0x52F3, 0x52F4, 0x52F5, 0x52F6, 0x52F7, 0x52F8, + 0x52F9, 0x52FA, 0x52FB, 0x52FC, 0x52FD, 0x52FE, 0x52FF, 0x5300, 0x5301, 0x5302, 0x5303, 0x5304, 0x5305, 0x5306, 0x5307, + 0x5308, 0x5309, 0x530A, 0x530B, 0x530C, 0x530D, 0x530E, 0x530F, 0x5310, 0x5311, 0x5312, 0x5313, 0x5314, 0x5315, 0x5316, + 0x5317, 0x5318, 0x5319, 0x531A, 0x531B, 0x531C, 0x531D, 0x531E, 0x531F, 0x5320, 0x5321, 0x5322, 0x5323, 0x5324, 0x5325, + 0x5326, 0x5327, 0x5328, 0x5329, 0x532A, 0x532B, 0x532C, 0x532D, 0x532E, 0x532F, 0x5330, 0x5331, 0x5332, 0x5333, 0x5334, + 0x5335, 0x5336, 0x5337, 0x5338, 0x5339, 0x533A, 0x533B, 0x533C, 0x533D, 0x533E, 0x533F, 0x5340, 0x5341, 0x5342, 0x5343, + 0x5344, 0x5345, 0x5346, 0x5347, 0x5348, 0x5349, 0x534A, 0x534B, 0x534C, 0x534D, 0x534E, 0x534F, 0x5350, 0x5351, 0x5352, + 0x5353, 0x5354, 0x5355, 0x5356, 0x5357, 0x5358, 0x5359, 0x535A, 0x535B, 0x535C, 0x535D, 0x535E, 0x535F, 0x5360, 0x5361, + 0x5362, 0x5363, 0x5364, 0x5365, 0x5366, 0x5367, 0x5368, 0x5369, 0x536A, 0x536B, 0x536C, 0x536D, 0x536E, 0x536F, 0x5370, + 0x5371, 0x5372, 0x5373, 0x5374, 0x5375, 0x5376, 0x5377, 0x5378, 0x5379, 0x537A, 0x537B, 0x537C, 0x537D, 0x537E, 0x537F, + 0x5380, 0x5381, 0x5382, 0x5383, 0x5384, 0x5385, 0x5386, 0x5387, 0x5388, 0x5389, 0x538A, 0x538B, 0x538C, 0x538D, 0x538E, + 0x538F, 0x5390, 0x5391, 0x5392, 0x5393, 0x5394, 0x5395, 0x5396, 0x5397, 0x5398, 0x5399, 0x539A, 0x539B, 0x539C, 0x539D, + 0x539E, 0x539F, 0x53A0, 0x53A1, 0x53A2, 0x53A3, 0x53A4, 0x53A5, 0x53A6, 0x53A7, 0x53A8, 0x53A9, 0x53AA, 0x53AB, 0x53AC, + 0x53AD, 0x53AE, 0x53AF, 0x53B0, 0x53B1, 0x53B2, 0x53B3, 0x53B4, 0x53B5, 0x53B6, 0x53B7, 0x53B8, 0x53B9, 0x53BA, 0x53BB, + 0x53BC, 0x53BD, 0x53BE, 0x53BF, 0x53C0, 0x53C1, 0x53C2, 0x53C3, 0x53C4, 0x53C5, 0x53C6, 0x53C7, 0x53C8, 0x53C9, 0x53CA, + 0x53CB, 0x53CC, 0x53CD, 0x53CE, 0x53CF, 0x53D0, 0x53D1, 0x53D2, 0x53D3, 0x53D4, 0x53D5, 0x53D6, 0x53D7, 0x53D8, 0x53D9, + 0x53DA, 0x53DB, 0x53DC, 0x53DD, 0x53DE, 0x53DF, 0x53E0, 0x53E1, 0x53E2, 0x53E3, 0x53E4, 0x53E5, 0x53E6, 0x53E7, 0x53E8, + 0x53E9, 0x53EA, 0x53EB, 0x53EC, 0x53ED, 0x53EE, 0x53EF, 0x53F0, 0x53F1, 0x53F2, 0x53F3, 0x53F4, 0x53F5, 0x53F6, 0x53F7, + 0x53F8, 0x53F9, 0x53FA, 0x53FB, 0x53FC, 0x53FD, 0x53FE, 0x53FF, 0x5400, 0x5401, 0x5402, 0x5403, 0x5404, 0x5405, 0x5406, + 0x5407, 0x5408, 0x5409, 0x540A, 0x540B, 0x540C, 0x540D, 0x540E, 0x540F, 0x5410, 0x5411, 0x5412, 0x5413, 0x5414, 0x5415, + 0x5416, 0x5417, 0x5418, 0x5419, 0x541A, 0x541B, 0x541C, 0x541D, 0x541E, 0x541F, 0x5420, 0x5421, 0x5422, 0x5423, 0x5424, + 0x5425, 0x5426, 0x5427, 0x5428, 0x5429, 0x542A, 0x542B, 0x542C, 0x542D, 0x542E, 0x542F, 0x5430, 0x5431, 0x5432, 0x5433, + 0x5434, 0x5435, 0x5436, 0x5437, 0x5438, 0x5439, 0x543A, 0x543B, 0x543C, 0x543D, 0x543E, 0x543F, 0x5440, 0x5441, 0x5442, + 0x5443, 0x5444, 0x5445, 0x5446, 0x5447, 0x5448, 0x5449, 0x544A, 0x544B, 0x544C, 0x544D, 0x544E, 0x544F, 0x5450, 0x5451, + 0x5452, 0x5453, 0x5454, 0x5455, 0x5456, 0x5457, 0x5458, 0x5459, 0x545A, 0x545B, 0x545C, 0x545D, 0x545E, 0x545F, 0x5460, + 0x5461, 0x5462, 0x5463, 0x5464, 0x5465, 0x5466, 0x5467, 0x5468, 0x5469, 0x546A, 0x546B, 0x546C, 0x546D, 0x546E, 0x546F, + 0x5470, 0x5471, 0x5472, 0x5473, 0x5474, 0x5475, 0x5476, 0x5477, 0x5478, 0x5479, 0x547A, 0x547B, 0x547C, 0x547D, 0x547E, + 0x547F, 0x5480, 0x5481, 0x5482, 0x5483, 0x5484, 0x5485, 0x5486, 0x5487, 0x5488, 0x5489, 0x548A, 0x548B, 0x548C, 0x548D, + 0x548E, 0x548F, 0x5490, 0x5491, 0x5492, 0x5493, 0x5494, 0x5495, 0x5496, 0x5497, 0x5498, 0x5499, 0x549A, 0x549B, 0x549C, + 0x549D, 0x549E, 0x549F, 0x54A0, 0x54A1, 0x54A2, 0x54A3, 0xC647FBC2, 0xC648FBC2, 0xC649FBC2, 0xC64AFBC2, 0xC64BFBC2, 0xC64CFBC2, 0xC64DFBC2, 0xC64EFBC2, + 0xC64FFBC2, 0xC650FBC2, 0xC651FBC2, 0xC652FBC2, 0xC653FBC2, 0xC654FBC2, 0xC655FBC2, 0xC656FBC2, 0xC657FBC2, 0xC658FBC2, 0xC659FBC2, 0xC65AFBC2, 0xC65BFBC2, 0xC65CFBC2, 0xC65DFBC2, + 0xC65EFBC2, 0xC65FFBC2, 0xC660FBC2, 0xC661FBC2, 0xC662FBC2, 0xC663FBC2, 0xC664FBC2, 0xC665FBC2, 0xC666FBC2, 0xC667FBC2, 0xC668FBC2, 0xC669FBC2, 0xC66AFBC2, 0xC66BFBC2, 0xC66CFBC2, + 0xC66DFBC2, 0xC66EFBC2, 0xC66FFBC2, 0xC670FBC2, 0xC671FBC2, 0xC672FBC2, 0xC673FBC2, 0xC674FBC2, 0xC675FBC2, 0xC676FBC2, 0xC677FBC2, 0xC678FBC2, 0xC679FBC2, 0xC67AFBC2, 0xC67BFBC2, + 0xC67CFBC2, 0xC67DFBC2, 0xC67EFBC2, 0xC67FFBC2, 0xC680FBC2, 0xC681FBC2, 0xC682FBC2, 0xC683FBC2, 0xC684FBC2, 0xC685FBC2, 0xC686FBC2, 0xC687FBC2, 0xC688FBC2, 0xC689FBC2, 0xC68AFBC2, + 0xC68BFBC2, 0xC68CFBC2, 0xC68DFBC2, 0xC68EFBC2, 0xC68FFBC2, 0xC690FBC2, 0xC691FBC2, 0xC692FBC2, 0xC693FBC2, 0xC694FBC2, 0xC695FBC2, 0xC696FBC2, 0xC697FBC2, 0xC698FBC2, 0xC699FBC2, + 0xC69AFBC2, 0xC69BFBC2, 0xC69CFBC2, 0xC69DFBC2, 0xC69EFBC2, 0xC69FFBC2, 0xC6A0FBC2, 0xC6A1FBC2, 0xC6A2FBC2, 0xC6A3FBC2, 0xC6A4FBC2, 0xC6A5FBC2, 0xC6A6FBC2, 0xC6A7FBC2, 0xC6A8FBC2, + 0xC6A9FBC2, 0xC6AAFBC2, 0xC6ABFBC2, 0xC6ACFBC2, 0xC6ADFBC2, 0xC6AEFBC2, 0xC6AFFBC2, 0xC6B0FBC2, 0xC6B1FBC2, 0xC6B2FBC2, 0xC6B3FBC2, 0xC6B4FBC2, 0xC6B5FBC2, 0xC6B6FBC2, 0xC6B7FBC2, + 0xC6B8FBC2, 0xC6B9FBC2, 0xC6BAFBC2, 0xC6BBFBC2, 0xC6BCFBC2, 0xC6BDFBC2, 0xC6BEFBC2, 0xC6BFFBC2, 0xC6C0FBC2, 0xC6C1FBC2, 0xC6C2FBC2, 0xC6C3FBC2, 0xC6C4FBC2, 0xC6C5FBC2, 0xC6C6FBC2, + 0xC6C7FBC2, 0xC6C8FBC2, 0xC6C9FBC2, 0xC6CAFBC2, 0xC6CBFBC2, 0xC6CCFBC2, 0xC6CDFBC2, 0xC6CEFBC2, 0xC6CFFBC2, 0xC6D0FBC2, 0xC6D1FBC2, 0xC6D2FBC2, 0xC6D3FBC2, 0xC6D4FBC2, 0xC6D5FBC2, + 0xC6D6FBC2, 0xC6D7FBC2, 0xC6D8FBC2, 0xC6D9FBC2, 0xC6DAFBC2, 0xC6DBFBC2, 0xC6DCFBC2, 0xC6DDFBC2, 0xC6DEFBC2, 0xC6DFFBC2, 0xC6E0FBC2, 0xC6E1FBC2, 0xC6E2FBC2, 0xC6E3FBC2, 0xC6E4FBC2, + 0xC6E5FBC2, 0xC6E6FBC2, 0xC6E7FBC2, 0xC6E8FBC2, 0xC6E9FBC2, 0xC6EAFBC2, 0xC6EBFBC2, 0xC6ECFBC2, 0xC6EDFBC2, 0xC6EEFBC2, 0xC6EFFBC2, 0xC6F0FBC2, 0xC6F1FBC2, 0xC6F2FBC2, 0xC6F3FBC2, + 0xC6F4FBC2, 0xC6F5FBC2, 0xC6F6FBC2, 0xC6F7FBC2, 0xC6F8FBC2, 0xC6F9FBC2, 0xC6FAFBC2, 0xC6FBFBC2, 0xC6FCFBC2, 0xC6FDFBC2, 0xC6FEFBC2, 0xC6FFFBC2, 0xC700FBC2, 0xC701FBC2, 0xC702FBC2, + 0xC703FBC2, 0xC704FBC2, 0xC705FBC2, 0xC706FBC2, 0xC707FBC2, 0xC708FBC2, 0xC709FBC2, 0xC70AFBC2, 0xC70BFBC2, 0xC70CFBC2, 0xC70DFBC2, 0xC70EFBC2, 0xC70FFBC2, 0xC710FBC2, 0xC711FBC2, + 0xC712FBC2, 0xC713FBC2, 0xC714FBC2, 0xC715FBC2, 0xC716FBC2, 0xC717FBC2, 0xC718FBC2, 0xC719FBC2, 0xC71AFBC2, 0xC71BFBC2, 0xC71CFBC2, 0xC71DFBC2, 0xC71EFBC2, 0xC71FFBC2, 0xC720FBC2, + 0xC721FBC2, 0xC722FBC2, 0xC723FBC2, 0xC724FBC2, 0xC725FBC2, 0xC726FBC2, 0xC727FBC2, 0xC728FBC2, 0xC729FBC2, 0xC72AFBC2, 0xC72BFBC2, 0xC72CFBC2, 0xC72DFBC2, 0xC72EFBC2, 0xC72FFBC2, + 0xC730FBC2, 0xC731FBC2, 0xC732FBC2, 0xC733FBC2, 0xC734FBC2, 0xC735FBC2, 0xC736FBC2, 0xC737FBC2, 0xC738FBC2, 0xC739FBC2, 0xC73AFBC2, 0xC73BFBC2, 0xC73CFBC2, 0xC73DFBC2, 0xC73EFBC2, + 0xC73FFBC2, 0xC740FBC2, 0xC741FBC2, 0xC742FBC2, 0xC743FBC2, 0xC744FBC2, 0xC745FBC2, 0xC746FBC2, 0xC747FBC2, 0xC748FBC2, 0xC749FBC2, 0xC74AFBC2, 0xC74BFBC2, 0xC74CFBC2, 0xC74DFBC2, + 0xC74EFBC2, 0xC74FFBC2, 0xC750FBC2, 0xC751FBC2, 0xC752FBC2, 0xC753FBC2, 0xC754FBC2, 0xC755FBC2, 0xC756FBC2, 0xC757FBC2, 0xC758FBC2, 0xC759FBC2, 0xC75AFBC2, 0xC75BFBC2, 0xC75CFBC2, + 0xC75DFBC2, 0xC75EFBC2, 0xC75FFBC2, 0xC760FBC2, 0xC761FBC2, 0xC762FBC2, 0xC763FBC2, 0xC764FBC2, 0xC765FBC2, 0xC766FBC2, 0xC767FBC2, 0xC768FBC2, 0xC769FBC2, 0xC76AFBC2, 0xC76BFBC2, + 0xC76CFBC2, 0xC76DFBC2, 0xC76EFBC2, 0xC76FFBC2, 0xC770FBC2, 0xC771FBC2, 0xC772FBC2, 0xC773FBC2, 0xC774FBC2, 0xC775FBC2, 0xC776FBC2, 0xC777FBC2, 0xC778FBC2, 0xC779FBC2, 0xC77AFBC2, + 0xC77BFBC2, 0xC77CFBC2, 0xC77DFBC2, 0xC77EFBC2, 0xC77FFBC2, 0xC780FBC2, 0xC781FBC2, 0xC782FBC2, 0xC783FBC2, 0xC784FBC2, 0xC785FBC2, 0xC786FBC2, 0xC787FBC2, 0xC788FBC2, 0xC789FBC2, + 0xC78AFBC2, 0xC78BFBC2, 0xC78CFBC2, 0xC78DFBC2, 0xC78EFBC2, 0xC78FFBC2, 0xC790FBC2, 0xC791FBC2, 0xC792FBC2, 0xC793FBC2, 0xC794FBC2, 0xC795FBC2, 0xC796FBC2, 0xC797FBC2, 0xC798FBC2, + 0xC799FBC2, 0xC79AFBC2, 0xC79BFBC2, 0xC79CFBC2, 0xC79DFBC2, 0xC79EFBC2, 0xC79FFBC2, 0xC7A0FBC2, 0xC7A1FBC2, 0xC7A2FBC2, 0xC7A3FBC2, 0xC7A4FBC2, 0xC7A5FBC2, 0xC7A6FBC2, 0xC7A7FBC2, + 0xC7A8FBC2, 0xC7A9FBC2, 0xC7AAFBC2, 0xC7ABFBC2, 0xC7ACFBC2, 0xC7ADFBC2, 0xC7AEFBC2, 0xC7AFFBC2, 0xC7B0FBC2, 0xC7B1FBC2, 0xC7B2FBC2, 0xC7B3FBC2, 0xC7B4FBC2, 0xC7B5FBC2, 0xC7B6FBC2, + 0xC7B7FBC2, 0xC7B8FBC2, 0xC7B9FBC2, 0xC7BAFBC2, 0xC7BBFBC2, 0xC7BCFBC2, 0xC7BDFBC2, 0xC7BEFBC2, 0xC7BFFBC2, 0xC7C0FBC2, 0xC7C1FBC2, 0xC7C2FBC2, 0xC7C3FBC2, 0xC7C4FBC2, 0xC7C5FBC2, + 0xC7C6FBC2, 0xC7C7FBC2, 0xC7C8FBC2, 0xC7C9FBC2, 0xC7CAFBC2, 0xC7CBFBC2, 0xC7CCFBC2, 0xC7CDFBC2, 0xC7CEFBC2, 0xC7CFFBC2, 0xC7D0FBC2, 0xC7D1FBC2, 0xC7D2FBC2, 0xC7D3FBC2, 0xC7D4FBC2, + 0xC7D5FBC2, 0xC7D6FBC2, 0xC7D7FBC2, 0xC7D8FBC2, 0xC7D9FBC2, 0xC7DAFBC2, 0xC7DBFBC2, 0xC7DCFBC2, 0xC7DDFBC2, 0xC7DEFBC2, 0xC7DFFBC2, 0xC7E0FBC2, 0xC7E1FBC2, 0xC7E2FBC2, 0xC7E3FBC2, + 0xC7E4FBC2, 0xC7E5FBC2, 0xC7E6FBC2, 0xC7E7FBC2, 0xC7E8FBC2, 0xC7E9FBC2, 0xC7EAFBC2, 0xC7EBFBC2, 0xC7ECFBC2, 0xC7EDFBC2, 0xC7EEFBC2, 0xC7EFFBC2, 0xC7F0FBC2, 0xC7F1FBC2, 0xC7F2FBC2, + 0xC7F3FBC2, 0xC7F4FBC2, 0xC7F5FBC2, 0xC7F6FBC2, 0xC7F7FBC2, 0xC7F8FBC2, 0xC7F9FBC2, 0xC7FAFBC2, 0xC7FBFBC2, 0xC7FCFBC2, 0xC7FDFBC2, 0xC7FEFBC2, 0xC7FFFBC2, 0xC800FBC2, 0xC801FBC2, + 0xC802FBC2, 0xC803FBC2, 0xC804FBC2, 0xC805FBC2, 0xC806FBC2, 0xC807FBC2, 0xC808FBC2, 0xC809FBC2, 0xC80AFBC2, 0xC80BFBC2, 0xC80CFBC2, 0xC80DFBC2, 0xC80EFBC2, 0xC80FFBC2, 0xC810FBC2, + 0xC811FBC2, 0xC812FBC2, 0xC813FBC2, 0xC814FBC2, 0xC815FBC2, 0xC816FBC2, 0xC817FBC2, 0xC818FBC2, 0xC819FBC2, 0xC81AFBC2, 0xC81BFBC2, 0xC81CFBC2, 0xC81DFBC2, 0xC81EFBC2, 0xC81FFBC2, + 0xC820FBC2, 0xC821FBC2, 0xC822FBC2, 0xC823FBC2, 0xC824FBC2, 0xC825FBC2, 0xC826FBC2, 0xC827FBC2, 0xC828FBC2, 0xC829FBC2, 0xC82AFBC2, 0xC82BFBC2, 0xC82CFBC2, 0xC82DFBC2, 0xC82EFBC2, + 0xC82FFBC2, 0xC830FBC2, 0xC831FBC2, 0xC832FBC2, 0xC833FBC2, 0xC834FBC2, 0xC835FBC2, 0xC836FBC2, 0xC837FBC2, 0xC838FBC2, 0xC839FBC2, 0xC83AFBC2, 0xC83BFBC2, 0xC83CFBC2, 0xC83DFBC2, + 0xC83EFBC2, 0xC83FFBC2, 0xC840FBC2, 0xC841FBC2, 0xC842FBC2, 0xC843FBC2, 0xC844FBC2, 0xC845FBC2, 0xC846FBC2, 0xC847FBC2, 0xC848FBC2, 0xC849FBC2, 0xC84AFBC2, 0xC84BFBC2, 0xC84CFBC2, + 0xC84DFBC2, 0xC84EFBC2, 0xC84FFBC2, 0xC850FBC2, 0xC851FBC2, 0xC852FBC2, 0xC853FBC2, 0xC854FBC2, 0xC855FBC2, 0xC856FBC2, 0xC857FBC2, 0xC858FBC2, 0xC859FBC2, 0xC85AFBC2, 0xC85BFBC2, + 0xC85CFBC2, 0xC85DFBC2, 0xC85EFBC2, 0xC85FFBC2, 0xC860FBC2, 0xC861FBC2, 0xC862FBC2, 0xC863FBC2, 0xC864FBC2, 0xC865FBC2, 0xC866FBC2, 0xC867FBC2, 0xC868FBC2, 0xC869FBC2, 0xC86AFBC2, + 0xC86BFBC2, 0xC86CFBC2, 0xC86DFBC2, 0xC86EFBC2, 0xC86FFBC2, 0xC870FBC2, 0xC871FBC2, 0xC872FBC2, 0xC873FBC2, 0xC874FBC2, 0xC875FBC2, 0xC876FBC2, 0xC877FBC2, 0xC878FBC2, 0xC879FBC2, + 0xC87AFBC2, 0xC87BFBC2, 0xC87CFBC2, 0xC87DFBC2, 0xC87EFBC2, 0xC87FFBC2, 0xC880FBC2, 0xC881FBC2, 0xC882FBC2, 0xC883FBC2, 0xC884FBC2, 0xC885FBC2, 0xC886FBC2, 0xC887FBC2, 0xC888FBC2, + 0xC889FBC2, 0xC88AFBC2, 0xC88BFBC2, 0xC88CFBC2, 0xC88DFBC2, 0xC88EFBC2, 0xC88FFBC2, 0xC890FBC2, 0xC891FBC2, 0xC892FBC2, 0xC893FBC2, 0xC894FBC2, 0xC895FBC2, 0xC896FBC2, 0xC897FBC2, + 0xC898FBC2, 0xC899FBC2, 0xC89AFBC2, 0xC89BFBC2, 0xC89CFBC2, 0xC89DFBC2, 0xC89EFBC2, 0xC89FFBC2, 0xC8A0FBC2, 0xC8A1FBC2, 0xC8A2FBC2, 0xC8A3FBC2, 0xC8A4FBC2, 0xC8A5FBC2, 0xC8A6FBC2, + 0xC8A7FBC2, 0xC8A8FBC2, 0xC8A9FBC2, 0xC8AAFBC2, 0xC8ABFBC2, 0xC8ACFBC2, 0xC8ADFBC2, 0xC8AEFBC2, 0xC8AFFBC2, 0xC8B0FBC2, 0xC8B1FBC2, 0xC8B2FBC2, 0xC8B3FBC2, 0xC8B4FBC2, 0xC8B5FBC2, + 0xC8B6FBC2, 0xC8B7FBC2, 0xC8B8FBC2, 0xC8B9FBC2, 0xC8BAFBC2, 0xC8BBFBC2, 0xC8BCFBC2, 0xC8BDFBC2, 0xC8BEFBC2, 0xC8BFFBC2, 0xC8C0FBC2, 0xC8C1FBC2, 0xC8C2FBC2, 0xC8C3FBC2, 0xC8C4FBC2, + 0xC8C5FBC2, 0xC8C6FBC2, 0xC8C7FBC2, 0xC8C8FBC2, 0xC8C9FBC2, 0xC8CAFBC2, 0xC8CBFBC2, 0xC8CCFBC2, 0xC8CDFBC2, 0xC8CEFBC2, 0xC8CFFBC2, 0xC8D0FBC2, 0xC8D1FBC2, 0xC8D2FBC2, 0xC8D3FBC2, + 0xC8D4FBC2, 0xC8D5FBC2, 0xC8D6FBC2, 0xC8D7FBC2, 0xC8D8FBC2, 0xC8D9FBC2, 0xC8DAFBC2, 0xC8DBFBC2, 0xC8DCFBC2, 0xC8DDFBC2, 0xC8DEFBC2, 0xC8DFFBC2, 0xC8E0FBC2, 0xC8E1FBC2, 0xC8E2FBC2, + 0xC8E3FBC2, 0xC8E4FBC2, 0xC8E5FBC2, 0xC8E6FBC2, 0xC8E7FBC2, 0xC8E8FBC2, 0xC8E9FBC2, 0xC8EAFBC2, 0xC8EBFBC2, 0xC8ECFBC2, 0xC8EDFBC2, 0xC8EEFBC2, 0xC8EFFBC2, 0xC8F0FBC2, 0xC8F1FBC2, + 0xC8F2FBC2, 0xC8F3FBC2, 0xC8F4FBC2, 0xC8F5FBC2, 0xC8F6FBC2, 0xC8F7FBC2, 0xC8F8FBC2, 0xC8F9FBC2, 0xC8FAFBC2, 0xC8FBFBC2, 0xC8FCFBC2, 0xC8FDFBC2, 0xC8FEFBC2, 0xC8FFFBC2, 0xC900FBC2, + 0xC901FBC2, 0xC902FBC2, 0xC903FBC2, 0xC904FBC2, 0xC905FBC2, 0xC906FBC2, 0xC907FBC2, 0xC908FBC2, 0xC909FBC2, 0xC90AFBC2, 0xC90BFBC2, 0xC90CFBC2, 0xC90DFBC2, 0xC90EFBC2, 0xC90FFBC2, + 0xC910FBC2, 0xC911FBC2, 0xC912FBC2, 0xC913FBC2, 0xC914FBC2, 0xC915FBC2, 0xC916FBC2, 0xC917FBC2, 0xC918FBC2, 0xC919FBC2, 0xC91AFBC2, 0xC91BFBC2, 0xC91CFBC2, 0xC91DFBC2, 0xC91EFBC2, + 0xC91FFBC2, 0xC920FBC2, 0xC921FBC2, 0xC922FBC2, 0xC923FBC2, 0xC924FBC2, 0xC925FBC2, 0xC926FBC2, 0xC927FBC2, 0xC928FBC2, 0xC929FBC2, 0xC92AFBC2, 0xC92BFBC2, 0xC92CFBC2, 0xC92DFBC2, + 0xC92EFBC2, 0xC92FFBC2, 0xC930FBC2, 0xC931FBC2, 0xC932FBC2, 0xC933FBC2, 0xC934FBC2, 0xC935FBC2, 0xC936FBC2, 0xC937FBC2, 0xC938FBC2, 0xC939FBC2, 0xC93AFBC2, 0xC93BFBC2, 0xC93CFBC2, + 0xC93DFBC2, 0xC93EFBC2, 0xC93FFBC2, 0xC940FBC2, 0xC941FBC2, 0xC942FBC2, 0xC943FBC2, 0xC944FBC2, 0xC945FBC2, 0xC946FBC2, 0xC947FBC2, 0xC948FBC2, 0xC949FBC2, 0xC94AFBC2, 0xC94BFBC2, + 0xC94CFBC2, 0xC94DFBC2, 0xC94EFBC2, 0xC94FFBC2, 0xC950FBC2, 0xC951FBC2, 0xC952FBC2, 0xC953FBC2, 0xC954FBC2, 0xC955FBC2, 0xC956FBC2, 0xC957FBC2, 0xC958FBC2, 0xC959FBC2, 0xC95AFBC2, + 0xC95BFBC2, 0xC95CFBC2, 0xC95DFBC2, 0xC95EFBC2, 0xC95FFBC2, 0xC960FBC2, 0xC961FBC2, 0xC962FBC2, 0xC963FBC2, 0xC964FBC2, 0xC965FBC2, 0xC966FBC2, 0xC967FBC2, 0xC968FBC2, 0xC969FBC2, + 0xC96AFBC2, 0xC96BFBC2, 0xC96CFBC2, 0xC96DFBC2, 0xC96EFBC2, 0xC96FFBC2, 0xC970FBC2, 0xC971FBC2, 0xC972FBC2, 0xC973FBC2, 0xC974FBC2, 0xC975FBC2, 0xC976FBC2, 0xC977FBC2, 0xC978FBC2, + 0xC979FBC2, 0xC97AFBC2, 0xC97BFBC2, 0xC97CFBC2, 0xC97DFBC2, 0xC97EFBC2, 0xC97FFBC2, 0xC980FBC2, 0xC981FBC2, 0xC982FBC2, 0xC983FBC2, 0xC984FBC2, 0xC985FBC2, 0xC986FBC2, 0xC987FBC2, + 0xC988FBC2, 0xC989FBC2, 0xC98AFBC2, 0xC98BFBC2, 0xC98CFBC2, 0xC98DFBC2, 0xC98EFBC2, 0xC98FFBC2, 0xC990FBC2, 0xC991FBC2, 0xC992FBC2, 0xC993FBC2, 0xC994FBC2, 0xC995FBC2, 0xC996FBC2, + 0xC997FBC2, 0xC998FBC2, 0xC999FBC2, 0xC99AFBC2, 0xC99BFBC2, 0xC99CFBC2, 0xC99DFBC2, 0xC99EFBC2, 0xC99FFBC2, 0xC9A0FBC2, 0xC9A1FBC2, 0xC9A2FBC2, 0xC9A3FBC2, 0xC9A4FBC2, 0xC9A5FBC2, + 0xC9A6FBC2, 0xC9A7FBC2, 0xC9A8FBC2, 0xC9A9FBC2, 0xC9AAFBC2, 0xC9ABFBC2, 0xC9ACFBC2, 0xC9ADFBC2, 0xC9AEFBC2, 0xC9AFFBC2, 0xC9B0FBC2, 0xC9B1FBC2, 0xC9B2FBC2, 0xC9B3FBC2, 0xC9B4FBC2, + 0xC9B5FBC2, 0xC9B6FBC2, 0xC9B7FBC2, 0xC9B8FBC2, 0xC9B9FBC2, 0xC9BAFBC2, 0xC9BBFBC2, 0xC9BCFBC2, 0xC9BDFBC2, 0xC9BEFBC2, 0xC9BFFBC2, 0xC9C0FBC2, 0xC9C1FBC2, 0xC9C2FBC2, 0xC9C3FBC2, + 0xC9C4FBC2, 0xC9C5FBC2, 0xC9C6FBC2, 0xC9C7FBC2, 0xC9C8FBC2, 0xC9C9FBC2, 0xC9CAFBC2, 0xC9CBFBC2, 0xC9CCFBC2, 0xC9CDFBC2, 0xC9CEFBC2, 0xC9CFFBC2, 0xC9D0FBC2, 0xC9D1FBC2, 0xC9D2FBC2, + 0xC9D3FBC2, 0xC9D4FBC2, 0xC9D5FBC2, 0xC9D6FBC2, 0xC9D7FBC2, 0xC9D8FBC2, 0xC9D9FBC2, 0xC9DAFBC2, 0xC9DBFBC2, 0xC9DCFBC2, 0xC9DDFBC2, 0xC9DEFBC2, 0xC9DFFBC2, 0xC9E0FBC2, 0xC9E1FBC2, + 0xC9E2FBC2, 0xC9E3FBC2, 0xC9E4FBC2, 0xC9E5FBC2, 0xC9E6FBC2, 0xC9E7FBC2, 0xC9E8FBC2, 0xC9E9FBC2, 0xC9EAFBC2, 0xC9EBFBC2, 0xC9ECFBC2, 0xC9EDFBC2, 0xC9EEFBC2, 0xC9EFFBC2, 0xC9F0FBC2, + 0xC9F1FBC2, 0xC9F2FBC2, 0xC9F3FBC2, 0xC9F4FBC2, 0xC9F5FBC2, 0xC9F6FBC2, 0xC9F7FBC2, 0xC9F8FBC2, 0xC9F9FBC2, 0xC9FAFBC2, 0xC9FBFBC2, 0xC9FCFBC2, 0xC9FDFBC2, 0xC9FEFBC2, 0xC9FFFBC2, + 0xCA00FBC2, 0xCA01FBC2, 0xCA02FBC2, 0xCA03FBC2, 0xCA04FBC2, 0xCA05FBC2, 0xCA06FBC2, 0xCA07FBC2, 0xCA08FBC2, 0xCA09FBC2, 0xCA0AFBC2, 0xCA0BFBC2, 0xCA0CFBC2, 0xCA0DFBC2, 0xCA0EFBC2, + 0xCA0FFBC2, 0xCA10FBC2, 0xCA11FBC2, 0xCA12FBC2, 0xCA13FBC2, 0xCA14FBC2, 0xCA15FBC2, 0xCA16FBC2, 0xCA17FBC2, 0xCA18FBC2, 0xCA19FBC2, 0xCA1AFBC2, 0xCA1BFBC2, 0xCA1CFBC2, 0xCA1DFBC2, + 0xCA1EFBC2, 0xCA1FFBC2, 0xCA20FBC2, 0xCA21FBC2, 0xCA22FBC2, 0xCA23FBC2, 0xCA24FBC2, 0xCA25FBC2, 0xCA26FBC2, 0xCA27FBC2, 0xCA28FBC2, 0xCA29FBC2, 0xCA2AFBC2, 0xCA2BFBC2, 0xCA2CFBC2, + 0xCA2DFBC2, 0xCA2EFBC2, 0xCA2FFBC2, 0xCA30FBC2, 0xCA31FBC2, 0xCA32FBC2, 0xCA33FBC2, 0xCA34FBC2, 0xCA35FBC2, 0xCA36FBC2, 0xCA37FBC2, 0xCA38FBC2, 0xCA39FBC2, 0xCA3AFBC2, 0xCA3BFBC2, + 0xCA3CFBC2, 0xCA3DFBC2, 0xCA3EFBC2, 0xCA3FFBC2, 0xCA40FBC2, 0xCA41FBC2, 0xCA42FBC2, 0xCA43FBC2, 0xCA44FBC2, 0xCA45FBC2, 0xCA46FBC2, 0xCA47FBC2, 0xCA48FBC2, 0xCA49FBC2, 0xCA4AFBC2, + 0xCA4BFBC2, 0xCA4CFBC2, 0xCA4DFBC2, 0xCA4EFBC2, 0xCA4FFBC2, 0xCA50FBC2, 0xCA51FBC2, 0xCA52FBC2, 0xCA53FBC2, 0xCA54FBC2, 0xCA55FBC2, 0xCA56FBC2, 0xCA57FBC2, 0xCA58FBC2, 0xCA59FBC2, + 0xCA5AFBC2, 0xCA5BFBC2, 0xCA5CFBC2, 0xCA5DFBC2, 0xCA5EFBC2, 0xCA5FFBC2, 0xCA60FBC2, 0xCA61FBC2, 0xCA62FBC2, 0xCA63FBC2, 0xCA64FBC2, 0xCA65FBC2, 0xCA66FBC2, 0xCA67FBC2, 0xCA68FBC2, + 0xCA69FBC2, 0xCA6AFBC2, 0xCA6BFBC2, 0xCA6CFBC2, 0xCA6DFBC2, 0xCA6EFBC2, 0xCA6FFBC2, 0xCA70FBC2, 0xCA71FBC2, 0xCA72FBC2, 0xCA73FBC2, 0xCA74FBC2, 0xCA75FBC2, 0xCA76FBC2, 0xCA77FBC2, + 0xCA78FBC2, 0xCA79FBC2, 0xCA7AFBC2, 0xCA7BFBC2, 0xCA7CFBC2, 0xCA7DFBC2, 0xCA7EFBC2, 0xCA7FFBC2, 0xCA80FBC2, 0xCA81FBC2, 0xCA82FBC2, 0xCA83FBC2, 0xCA84FBC2, 0xCA85FBC2, 0xCA86FBC2, + 0xCA87FBC2, 0xCA88FBC2, 0xCA89FBC2, 0xCA8AFBC2, 0xCA8BFBC2, 0xCA8CFBC2, 0xCA8DFBC2, 0xCA8EFBC2, 0xCA8FFBC2, 0xCA90FBC2, 0xCA91FBC2, 0xCA92FBC2, 0xCA93FBC2, 0xCA94FBC2, 0xCA95FBC2, + 0xCA96FBC2, 0xCA97FBC2, 0xCA98FBC2, 0xCA99FBC2, 0xCA9AFBC2, 0xCA9BFBC2, 0xCA9CFBC2, 0xCA9DFBC2, 0xCA9EFBC2, 0xCA9FFBC2, 0xCAA0FBC2, 0xCAA1FBC2, 0xCAA2FBC2, 0xCAA3FBC2, 0xCAA4FBC2, + 0xCAA5FBC2, 0xCAA6FBC2, 0xCAA7FBC2, 0xCAA8FBC2, 0xCAA9FBC2, 0xCAAAFBC2, 0xCAABFBC2, 0xCAACFBC2, 0xCAADFBC2, 0xCAAEFBC2, 0xCAAFFBC2, 0xCAB0FBC2, 0xCAB1FBC2, 0xCAB2FBC2, 0xCAB3FBC2, + 0xCAB4FBC2, 0xCAB5FBC2, 0xCAB6FBC2, 0xCAB7FBC2, 0xCAB8FBC2, 0xCAB9FBC2, 0xCABAFBC2, 0xCABBFBC2, 0xCABCFBC2, 0xCABDFBC2, 0xCABEFBC2, 0xCABFFBC2, 0xCAC0FBC2, 0xCAC1FBC2, 0xCAC2FBC2, + 0xCAC3FBC2, 0xCAC4FBC2, 0xCAC5FBC2, 0xCAC6FBC2, 0xCAC7FBC2, 0xCAC8FBC2, 0xCAC9FBC2, 0xCACAFBC2, 0xCACBFBC2, 0xCACCFBC2, 0xCACDFBC2, 0xCACEFBC2, 0xCACFFBC2, 0xCAD0FBC2, 0xCAD1FBC2, + 0xCAD2FBC2, 0xCAD3FBC2, 0xCAD4FBC2, 0xCAD5FBC2, 0xCAD6FBC2, 0xCAD7FBC2, 0xCAD8FBC2, 0xCAD9FBC2, 0xCADAFBC2, 0xCADBFBC2, 0xCADCFBC2, 0xCADDFBC2, 0xCADEFBC2, 0xCADFFBC2, 0xCAE0FBC2, + 0xCAE1FBC2, 0xCAE2FBC2, 0xCAE3FBC2, 0xCAE4FBC2, 0xCAE5FBC2, 0xCAE6FBC2, 0xCAE7FBC2, 0xCAE8FBC2, 0xCAE9FBC2, 0xCAEAFBC2, 0xCAEBFBC2, 0xCAECFBC2, 0xCAEDFBC2, 0xCAEEFBC2, 0xCAEFFBC2, + 0xCAF0FBC2, 0xCAF1FBC2, 0xCAF2FBC2, 0xCAF3FBC2, 0xCAF4FBC2, 0xCAF5FBC2, 0xCAF6FBC2, 0xCAF7FBC2, 0xCAF8FBC2, 0xCAF9FBC2, 0xCAFAFBC2, 0xCAFBFBC2, 0xCAFCFBC2, 0xCAFDFBC2, 0xCAFEFBC2, + 0xCAFFFBC2, 0xCB00FBC2, 0xCB01FBC2, 0xCB02FBC2, 0xCB03FBC2, 0xCB04FBC2, 0xCB05FBC2, 0xCB06FBC2, 0xCB07FBC2, 0xCB08FBC2, 0xCB09FBC2, 0xCB0AFBC2, 0xCB0BFBC2, 0xCB0CFBC2, 0xCB0DFBC2, + 0xCB0EFBC2, 0xCB0FFBC2, 0xCB10FBC2, 0xCB11FBC2, 0xCB12FBC2, 0xCB13FBC2, 0xCB14FBC2, 0xCB15FBC2, 0xCB16FBC2, 0xCB17FBC2, 0xCB18FBC2, 0xCB19FBC2, 0xCB1AFBC2, 0xCB1BFBC2, 0xCB1CFBC2, + 0xCB1DFBC2, 0xCB1EFBC2, 0xCB1FFBC2, 0xCB20FBC2, 0xCB21FBC2, 0xCB22FBC2, 0xCB23FBC2, 0xCB24FBC2, 0xCB25FBC2, 0xCB26FBC2, 0xCB27FBC2, 0xCB28FBC2, 0xCB29FBC2, 0xCB2AFBC2, 0xCB2BFBC2, + 0xCB2CFBC2, 0xCB2DFBC2, 0xCB2EFBC2, 0xCB2FFBC2, 0xCB30FBC2, 0xCB31FBC2, 0xCB32FBC2, 0xCB33FBC2, 0xCB34FBC2, 0xCB35FBC2, 0xCB36FBC2, 0xCB37FBC2, 0xCB38FBC2, 0xCB39FBC2, 0xCB3AFBC2, + 0xCB3BFBC2, 0xCB3CFBC2, 0xCB3DFBC2, 0xCB3EFBC2, 0xCB3FFBC2, 0xCB40FBC2, 0xCB41FBC2, 0xCB42FBC2, 0xCB43FBC2, 0xCB44FBC2, 0xCB45FBC2, 0xCB46FBC2, 0xCB47FBC2, 0xCB48FBC2, 0xCB49FBC2, + 0xCB4AFBC2, 0xCB4BFBC2, 0xCB4CFBC2, 0xCB4DFBC2, 0xCB4EFBC2, 0xCB4FFBC2, 0xCB50FBC2, 0xCB51FBC2, 0xCB52FBC2, 0xCB53FBC2, 0xCB54FBC2, 0xCB55FBC2, 0xCB56FBC2, 0xCB57FBC2, 0xCB58FBC2, + 0xCB59FBC2, 0xCB5AFBC2, 0xCB5BFBC2, 0xCB5CFBC2, 0xCB5DFBC2, 0xCB5EFBC2, 0xCB5FFBC2, 0xCB60FBC2, 0xCB61FBC2, 0xCB62FBC2, 0xCB63FBC2, 0xCB64FBC2, 0xCB65FBC2, 0xCB66FBC2, 0xCB67FBC2, + 0xCB68FBC2, 0xCB69FBC2, 0xCB6AFBC2, 0xCB6BFBC2, 0xCB6CFBC2, 0xCB6DFBC2, 0xCB6EFBC2, 0xCB6FFBC2, 0xCB70FBC2, 0xCB71FBC2, 0xCB72FBC2, 0xCB73FBC2, 0xCB74FBC2, 0xCB75FBC2, 0xCB76FBC2, + 0xCB77FBC2, 0xCB78FBC2, 0xCB79FBC2, 0xCB7AFBC2, 0xCB7BFBC2, 0xCB7CFBC2, 0xCB7DFBC2, 0xCB7EFBC2, 0xCB7FFBC2, 0xCB80FBC2, 0xCB81FBC2, 0xCB82FBC2, 0xCB83FBC2, 0xCB84FBC2, 0xCB85FBC2, + 0xCB86FBC2, 0xCB87FBC2, 0xCB88FBC2, 0xCB89FBC2, 0xCB8AFBC2, 0xCB8BFBC2, 0xCB8CFBC2, 0xCB8DFBC2, 0xCB8EFBC2, 0xCB8FFBC2, 0xCB90FBC2, 0xCB91FBC2, 0xCB92FBC2, 0xCB93FBC2, 0xCB94FBC2, + 0xCB95FBC2, 0xCB96FBC2, 0xCB97FBC2, 0xCB98FBC2, 0xCB99FBC2, 0xCB9AFBC2, 0xCB9BFBC2, 0xCB9CFBC2, 0xCB9DFBC2, 0xCB9EFBC2, 0xCB9FFBC2, 0xCBA0FBC2, 0xCBA1FBC2, 0xCBA2FBC2, 0xCBA3FBC2, + 0xCBA4FBC2, 0xCBA5FBC2, 0xCBA6FBC2, 0xCBA7FBC2, 0xCBA8FBC2, 0xCBA9FBC2, 0xCBAAFBC2, 0xCBABFBC2, 0xCBACFBC2, 0xCBADFBC2, 0xCBAEFBC2, 0xCBAFFBC2, 0xCBB0FBC2, 0xCBB1FBC2, 0xCBB2FBC2, + 0xCBB3FBC2, 0xCBB4FBC2, 0xCBB5FBC2, 0xCBB6FBC2, 0xCBB7FBC2, 0xCBB8FBC2, 0xCBB9FBC2, 0xCBBAFBC2, 0xCBBBFBC2, 0xCBBCFBC2, 0xCBBDFBC2, 0xCBBEFBC2, 0xCBBFFBC2, 0xCBC0FBC2, 0xCBC1FBC2, + 0xCBC2FBC2, 0xCBC3FBC2, 0xCBC4FBC2, 0xCBC5FBC2, 0xCBC6FBC2, 0xCBC7FBC2, 0xCBC8FBC2, 0xCBC9FBC2, 0xCBCAFBC2, 0xCBCBFBC2, 0xCBCCFBC2, 0xCBCDFBC2, 0xCBCEFBC2, 0xCBCFFBC2, 0xCBD0FBC2, + 0xCBD1FBC2, 0xCBD2FBC2, 0xCBD3FBC2, 0xCBD4FBC2, 0xCBD5FBC2, 0xCBD6FBC2, 0xCBD7FBC2, 0xCBD8FBC2, 0xCBD9FBC2, 0xCBDAFBC2, 0xCBDBFBC2, 0xCBDCFBC2, 0xCBDDFBC2, 0xCBDEFBC2, 0xCBDFFBC2, + 0xCBE0FBC2, 0xCBE1FBC2, 0xCBE2FBC2, 0xCBE3FBC2, 0xCBE4FBC2, 0xCBE5FBC2, 0xCBE6FBC2, 0xCBE7FBC2, 0xCBE8FBC2, 0xCBE9FBC2, 0xCBEAFBC2, 0xCBEBFBC2, 0xCBECFBC2, 0xCBEDFBC2, 0xCBEEFBC2, + 0xCBEFFBC2, 0xCBF0FBC2, 0xCBF1FBC2, 0xCBF2FBC2, 0xCBF3FBC2, 0xCBF4FBC2, 0xCBF5FBC2, 0xCBF6FBC2, 0xCBF7FBC2, 0xCBF8FBC2, 0xCBF9FBC2, 0xCBFAFBC2, 0xCBFBFBC2, 0xCBFCFBC2, 0xCBFDFBC2, + 0xCBFEFBC2, 0xCBFFFBC2, 0xCC00FBC2, 0xCC01FBC2, 0xCC02FBC2, 0xCC03FBC2, 0xCC04FBC2, 0xCC05FBC2, 0xCC06FBC2, 0xCC07FBC2, 0xCC08FBC2, 0xCC09FBC2, 0xCC0AFBC2, 0xCC0BFBC2, 0xCC0CFBC2, + 0xCC0DFBC2, 0xCC0EFBC2, 0xCC0FFBC2, 0xCC10FBC2, 0xCC11FBC2, 0xCC12FBC2, 0xCC13FBC2, 0xCC14FBC2, 0xCC15FBC2, 0xCC16FBC2, 0xCC17FBC2, 0xCC18FBC2, 0xCC19FBC2, 0xCC1AFBC2, 0xCC1BFBC2, + 0xCC1CFBC2, 0xCC1DFBC2, 0xCC1EFBC2, 0xCC1FFBC2, 0xCC20FBC2, 0xCC21FBC2, 0xCC22FBC2, 0xCC23FBC2, 0xCC24FBC2, 0xCC25FBC2, 0xCC26FBC2, 0xCC27FBC2, 0xCC28FBC2, 0xCC29FBC2, 0xCC2AFBC2, + 0xCC2BFBC2, 0xCC2CFBC2, 0xCC2DFBC2, 0xCC2EFBC2, 0xCC2FFBC2, 0xCC30FBC2, 0xCC31FBC2, 0xCC32FBC2, 0xCC33FBC2, 0xCC34FBC2, 0xCC35FBC2, 0xCC36FBC2, 0xCC37FBC2, 0xCC38FBC2, 0xCC39FBC2, + 0xCC3AFBC2, 0xCC3BFBC2, 0xCC3CFBC2, 0xCC3DFBC2, 0xCC3EFBC2, 0xCC3FFBC2, 0xCC40FBC2, 0xCC41FBC2, 0xCC42FBC2, 0xCC43FBC2, 0xCC44FBC2, 0xCC45FBC2, 0xCC46FBC2, 0xCC47FBC2, 0xCC48FBC2, + 0xCC49FBC2, 0xCC4AFBC2, 0xCC4BFBC2, 0xCC4CFBC2, 0xCC4DFBC2, 0xCC4EFBC2, 0xCC4FFBC2, 0xCC50FBC2, 0xCC51FBC2, 0xCC52FBC2, 0xCC53FBC2, 0xCC54FBC2, 0xCC55FBC2, 0xCC56FBC2, 0xCC57FBC2, + 0xCC58FBC2, 0xCC59FBC2, 0xCC5AFBC2, 0xCC5BFBC2, 0xCC5CFBC2, 0xCC5DFBC2, 0xCC5EFBC2, 0xCC5FFBC2, 0xCC60FBC2, 0xCC61FBC2, 0xCC62FBC2, 0xCC63FBC2, 0xCC64FBC2, 0xCC65FBC2, 0xCC66FBC2, + 0xCC67FBC2, 0xCC68FBC2, 0xCC69FBC2, 0xCC6AFBC2, 0xCC6BFBC2, 0xCC6CFBC2, 0xCC6DFBC2, 0xCC6EFBC2, 0xCC6FFBC2, 0xCC70FBC2, 0xCC71FBC2, 0xCC72FBC2, 0xCC73FBC2, 0xCC74FBC2, 0xCC75FBC2, + 0xCC76FBC2, 0xCC77FBC2, 0xCC78FBC2, 0xCC79FBC2, 0xCC7AFBC2, 0xCC7BFBC2, 0xCC7CFBC2, 0xCC7DFBC2, 0xCC7EFBC2, 0xCC7FFBC2, 0xCC80FBC2, 0xCC81FBC2, 0xCC82FBC2, 0xCC83FBC2, 0xCC84FBC2, + 0xCC85FBC2, 0xCC86FBC2, 0xCC87FBC2, 0xCC88FBC2, 0xCC89FBC2, 0xCC8AFBC2, 0xCC8BFBC2, 0xCC8CFBC2, 0xCC8DFBC2, 0xCC8EFBC2, 0xCC8FFBC2, 0xCC90FBC2, 0xCC91FBC2, 0xCC92FBC2, 0xCC93FBC2, + 0xCC94FBC2, 0xCC95FBC2, 0xCC96FBC2, 0xCC97FBC2, 0xCC98FBC2, 0xCC99FBC2, 0xCC9AFBC2, 0xCC9BFBC2, 0xCC9CFBC2, 0xCC9DFBC2, 0xCC9EFBC2, 0xCC9FFBC2, 0xCCA0FBC2, 0xCCA1FBC2, 0xCCA2FBC2, + 0xCCA3FBC2, 0xCCA4FBC2, 0xCCA5FBC2, 0xCCA6FBC2, 0xCCA7FBC2, 0xCCA8FBC2, 0xCCA9FBC2, 0xCCAAFBC2, 0xCCABFBC2, 0xCCACFBC2, 0xCCADFBC2, 0xCCAEFBC2, 0xCCAFFBC2, 0xCCB0FBC2, 0xCCB1FBC2, + 0xCCB2FBC2, 0xCCB3FBC2, 0xCCB4FBC2, 0xCCB5FBC2, 0xCCB6FBC2, 0xCCB7FBC2, 0xCCB8FBC2, 0xCCB9FBC2, 0xCCBAFBC2, 0xCCBBFBC2, 0xCCBCFBC2, 0xCCBDFBC2, 0xCCBEFBC2, 0xCCBFFBC2, 0xCCC0FBC2, + 0xCCC1FBC2, 0xCCC2FBC2, 0xCCC3FBC2, 0xCCC4FBC2, 0xCCC5FBC2, 0xCCC6FBC2, 0xCCC7FBC2, 0xCCC8FBC2, 0xCCC9FBC2, 0xCCCAFBC2, 0xCCCBFBC2, 0xCCCCFBC2, 0xCCCDFBC2, 0xCCCEFBC2, 0xCCCFFBC2, + 0xCCD0FBC2, 0xCCD1FBC2, 0xCCD2FBC2, 0xCCD3FBC2, 0xCCD4FBC2, 0xCCD5FBC2, 0xCCD6FBC2, 0xCCD7FBC2, 0xCCD8FBC2, 0xCCD9FBC2, 0xCCDAFBC2, 0xCCDBFBC2, 0xCCDCFBC2, 0xCCDDFBC2, 0xCCDEFBC2, + 0xCCDFFBC2, 0xCCE0FBC2, 0xCCE1FBC2, 0xCCE2FBC2, 0xCCE3FBC2, 0xCCE4FBC2, 0xCCE5FBC2, 0xCCE6FBC2, 0xCCE7FBC2, 0xCCE8FBC2, 0xCCE9FBC2, 0xCCEAFBC2, 0xCCEBFBC2, 0xCCECFBC2, 0xCCEDFBC2, + 0xCCEEFBC2, 0xCCEFFBC2, 0xCCF0FBC2, 0xCCF1FBC2, 0xCCF2FBC2, 0xCCF3FBC2, 0xCCF4FBC2, 0xCCF5FBC2, 0xCCF6FBC2, 0xCCF7FBC2, 0xCCF8FBC2, 0xCCF9FBC2, 0xCCFAFBC2, 0xCCFBFBC2, 0xCCFCFBC2, + 0xCCFDFBC2, 0xCCFEFBC2, 0xCCFFFBC2, 0xCD00FBC2, 0xCD01FBC2, 0xCD02FBC2, 0xCD03FBC2, 0xCD04FBC2, 0xCD05FBC2, 0xCD06FBC2, 0xCD07FBC2, 0xCD08FBC2, 0xCD09FBC2, 0xCD0AFBC2, 0xCD0BFBC2, + 0xCD0CFBC2, 0xCD0DFBC2, 0xCD0EFBC2, 0xCD0FFBC2, 0xCD10FBC2, 0xCD11FBC2, 0xCD12FBC2, 0xCD13FBC2, 0xCD14FBC2, 0xCD15FBC2, 0xCD16FBC2, 0xCD17FBC2, 0xCD18FBC2, 0xCD19FBC2, 0xCD1AFBC2, + 0xCD1BFBC2, 0xCD1CFBC2, 0xCD1DFBC2, 0xCD1EFBC2, 0xCD1FFBC2, 0xCD20FBC2, 0xCD21FBC2, 0xCD22FBC2, 0xCD23FBC2, 0xCD24FBC2, 0xCD25FBC2, 0xCD26FBC2, 0xCD27FBC2, 0xCD28FBC2, 0xCD29FBC2, + 0xCD2AFBC2, 0xCD2BFBC2, 0xCD2CFBC2, 0xCD2DFBC2, 0xCD2EFBC2, 0xCD2FFBC2, 0xCD30FBC2, 0xCD31FBC2, 0xCD32FBC2, 0xCD33FBC2, 0xCD34FBC2, 0xCD35FBC2, 0xCD36FBC2, 0xCD37FBC2, 0xCD38FBC2, + 0xCD39FBC2, 0xCD3AFBC2, 0xCD3BFBC2, 0xCD3CFBC2, 0xCD3DFBC2, 0xCD3EFBC2, 0xCD3FFBC2, 0xCD40FBC2, 0xCD41FBC2, 0xCD42FBC2, 0xCD43FBC2, 0xCD44FBC2, 0xCD45FBC2, 0xCD46FBC2, 0xCD47FBC2, + 0xCD48FBC2, 0xCD49FBC2, 0xCD4AFBC2, 0xCD4BFBC2, 0xCD4CFBC2, 0xCD4DFBC2, 0xCD4EFBC2, 0xCD4FFBC2, 0xCD50FBC2, 0xCD51FBC2, 0xCD52FBC2, 0xCD53FBC2, 0xCD54FBC2, 0xCD55FBC2, 0xCD56FBC2, + 0xCD57FBC2, 0xCD58FBC2, 0xCD59FBC2, 0xCD5AFBC2, 0xCD5BFBC2, 0xCD5CFBC2, 0xCD5DFBC2, 0xCD5EFBC2, 0xCD5FFBC2, 0xCD60FBC2, 0xCD61FBC2, 0xCD62FBC2, 0xCD63FBC2, 0xCD64FBC2, 0xCD65FBC2, + 0xCD66FBC2, 0xCD67FBC2, 0xCD68FBC2, 0xCD69FBC2, 0xCD6AFBC2, 0xCD6BFBC2, 0xCD6CFBC2, 0xCD6DFBC2, 0xCD6EFBC2, 0xCD6FFBC2, 0xCD70FBC2, 0xCD71FBC2, 0xCD72FBC2, 0xCD73FBC2, 0xCD74FBC2, + 0xCD75FBC2, 0xCD76FBC2, 0xCD77FBC2, 0xCD78FBC2, 0xCD79FBC2, 0xCD7AFBC2, 0xCD7BFBC2, 0xCD7CFBC2, 0xCD7DFBC2, 0xCD7EFBC2, 0xCD7FFBC2, 0xCD80FBC2, 0xCD81FBC2, 0xCD82FBC2, 0xCD83FBC2, + 0xCD84FBC2, 0xCD85FBC2, 0xCD86FBC2, 0xCD87FBC2, 0xCD88FBC2, 0xCD89FBC2, 0xCD8AFBC2, 0xCD8BFBC2, 0xCD8CFBC2, 0xCD8DFBC2, 0xCD8EFBC2, 0xCD8FFBC2, 0xCD90FBC2, 0xCD91FBC2, 0xCD92FBC2, + 0xCD93FBC2, 0xCD94FBC2, 0xCD95FBC2, 0xCD96FBC2, 0xCD97FBC2, 0xCD98FBC2, 0xCD99FBC2, 0xCD9AFBC2, 0xCD9BFBC2, 0xCD9CFBC2, 0xCD9DFBC2, 0xCD9EFBC2, 0xCD9FFBC2, 0xCDA0FBC2, 0xCDA1FBC2, + 0xCDA2FBC2, 0xCDA3FBC2, 0xCDA4FBC2, 0xCDA5FBC2, 0xCDA6FBC2, 0xCDA7FBC2, 0xCDA8FBC2, 0xCDA9FBC2, 0xCDAAFBC2, 0xCDABFBC2, 0xCDACFBC2, 0xCDADFBC2, 0xCDAEFBC2, 0xCDAFFBC2, 0xCDB0FBC2, + 0xCDB1FBC2, 0xCDB2FBC2, 0xCDB3FBC2, 0xCDB4FBC2, 0xCDB5FBC2, 0xCDB6FBC2, 0xCDB7FBC2, 0xCDB8FBC2, 0xCDB9FBC2, 0xCDBAFBC2, 0xCDBBFBC2, 0xCDBCFBC2, 0xCDBDFBC2, 0xCDBEFBC2, 0xCDBFFBC2, + 0xCDC0FBC2, 0xCDC1FBC2, 0xCDC2FBC2, 0xCDC3FBC2, 0xCDC4FBC2, 0xCDC5FBC2, 0xCDC6FBC2, 0xCDC7FBC2, 0xCDC8FBC2, 0xCDC9FBC2, 0xCDCAFBC2, 0xCDCBFBC2, 0xCDCCFBC2, 0xCDCDFBC2, 0xCDCEFBC2, + 0xCDCFFBC2, 0xCDD0FBC2, 0xCDD1FBC2, 0xCDD2FBC2, 0xCDD3FBC2, 0xCDD4FBC2, 0xCDD5FBC2, 0xCDD6FBC2, 0xCDD7FBC2, 0xCDD8FBC2, 0xCDD9FBC2, 0xCDDAFBC2, 0xCDDBFBC2, 0xCDDCFBC2, 0xCDDDFBC2, + 0xCDDEFBC2, 0xCDDFFBC2, 0xCDE0FBC2, 0xCDE1FBC2, 0xCDE2FBC2, 0xCDE3FBC2, 0xCDE4FBC2, 0xCDE5FBC2, 0xCDE6FBC2, 0xCDE7FBC2, 0xCDE8FBC2, 0xCDE9FBC2, 0xCDEAFBC2, 0xCDEBFBC2, 0xCDECFBC2, + 0xCDEDFBC2, 0xCDEEFBC2, 0xCDEFFBC2, 0xCDF0FBC2, 0xCDF1FBC2, 0xCDF2FBC2, 0xCDF3FBC2, 0xCDF4FBC2, 0xCDF5FBC2, 0xCDF6FBC2, 0xCDF7FBC2, 0xCDF8FBC2, 0xCDF9FBC2, 0xCDFAFBC2, 0xCDFBFBC2, + 0xCDFCFBC2, 0xCDFDFBC2, 0xCDFEFBC2, 0xCDFFFBC2, 0xCE00FBC2, 0xCE01FBC2, 0xCE02FBC2, 0xCE03FBC2, 0xCE04FBC2, 0xCE05FBC2, 0xCE06FBC2, 0xCE07FBC2, 0xCE08FBC2, 0xCE09FBC2, 0xCE0AFBC2, + 0xCE0BFBC2, 0xCE0CFBC2, 0xCE0DFBC2, 0xCE0EFBC2, 0xCE0FFBC2, 0xCE10FBC2, 0xCE11FBC2, 0xCE12FBC2, 0xCE13FBC2, 0xCE14FBC2, 0xCE15FBC2, 0xCE16FBC2, 0xCE17FBC2, 0xCE18FBC2, 0xCE19FBC2, + 0xCE1AFBC2, 0xCE1BFBC2, 0xCE1CFBC2, 0xCE1DFBC2, 0xCE1EFBC2, 0xCE1FFBC2, 0xCE20FBC2, 0xCE21FBC2, 0xCE22FBC2, 0xCE23FBC2, 0xCE24FBC2, 0xCE25FBC2, 0xCE26FBC2, 0xCE27FBC2, 0xCE28FBC2, + 0xCE29FBC2, 0xCE2AFBC2, 0xCE2BFBC2, 0xCE2CFBC2, 0xCE2DFBC2, 0xCE2EFBC2, 0xCE2FFBC2, 0xCE30FBC2, 0xCE31FBC2, 0xCE32FBC2, 0xCE33FBC2, 0xCE34FBC2, 0xCE35FBC2, 0xCE36FBC2, 0xCE37FBC2, + 0xCE38FBC2, 0xCE39FBC2, 0xCE3AFBC2, 0xCE3BFBC2, 0xCE3CFBC2, 0xCE3DFBC2, 0xCE3EFBC2, 0xCE3FFBC2, 0xCE40FBC2, 0xCE41FBC2, 0xCE42FBC2, 0xCE43FBC2, 0xCE44FBC2, 0xCE45FBC2, 0xCE46FBC2, + 0xCE47FBC2, 0xCE48FBC2, 0xCE49FBC2, 0xCE4AFBC2, 0xCE4BFBC2, 0xCE4CFBC2, 0xCE4DFBC2, 0xCE4EFBC2, 0xCE4FFBC2, 0xCE50FBC2, 0xCE51FBC2, 0xCE52FBC2, 0xCE53FBC2, 0xCE54FBC2, 0xCE55FBC2, + 0xCE56FBC2, 0xCE57FBC2, 0xCE58FBC2, 0xCE59FBC2, 0xCE5AFBC2, 0xCE5BFBC2, 0xCE5CFBC2, 0xCE5DFBC2, 0xCE5EFBC2, 0xCE5FFBC2, 0xCE60FBC2, 0xCE61FBC2, 0xCE62FBC2, 0xCE63FBC2, 0xCE64FBC2, + 0xCE65FBC2, 0xCE66FBC2, 0xCE67FBC2, 0xCE68FBC2, 0xCE69FBC2, 0xCE6AFBC2, 0xCE6BFBC2, 0xCE6CFBC2, 0xCE6DFBC2, 0xCE6EFBC2, 0xCE6FFBC2, 0xCE70FBC2, 0xCE71FBC2, 0xCE72FBC2, 0xCE73FBC2, + 0xCE74FBC2, 0xCE75FBC2, 0xCE76FBC2, 0xCE77FBC2, 0xCE78FBC2, 0xCE79FBC2, 0xCE7AFBC2, 0xCE7BFBC2, 0xCE7CFBC2, 0xCE7DFBC2, 0xCE7EFBC2, 0xCE7FFBC2, 0xCE80FBC2, 0xCE81FBC2, 0xCE82FBC2, + 0xCE83FBC2, 0xCE84FBC2, 0xCE85FBC2, 0xCE86FBC2, 0xCE87FBC2, 0xCE88FBC2, 0xCE89FBC2, 0xCE8AFBC2, 0xCE8BFBC2, 0xCE8CFBC2, 0xCE8DFBC2, 0xCE8EFBC2, 0xCE8FFBC2, 0xCE90FBC2, 0xCE91FBC2, + 0xCE92FBC2, 0xCE93FBC2, 0xCE94FBC2, 0xCE95FBC2, 0xCE96FBC2, 0xCE97FBC2, 0xCE98FBC2, 0xCE99FBC2, 0xCE9AFBC2, 0xCE9BFBC2, 0xCE9CFBC2, 0xCE9DFBC2, 0xCE9EFBC2, 0xCE9FFBC2, 0xCEA0FBC2, + 0xCEA1FBC2, 0xCEA2FBC2, 0xCEA3FBC2, 0xCEA4FBC2, 0xCEA5FBC2, 0xCEA6FBC2, 0xCEA7FBC2, 0xCEA8FBC2, 0xCEA9FBC2, 0xCEAAFBC2, 0xCEABFBC2, 0xCEACFBC2, 0xCEADFBC2, 0xCEAEFBC2, 0xCEAFFBC2, + 0xCEB0FBC2, 0xCEB1FBC2, 0xCEB2FBC2, 0xCEB3FBC2, 0xCEB4FBC2, 0xCEB5FBC2, 0xCEB6FBC2, 0xCEB7FBC2, 0xCEB8FBC2, 0xCEB9FBC2, 0xCEBAFBC2, 0xCEBBFBC2, 0xCEBCFBC2, 0xCEBDFBC2, 0xCEBEFBC2, + 0xCEBFFBC2, 0xCEC0FBC2, 0xCEC1FBC2, 0xCEC2FBC2, 0xCEC3FBC2, 0xCEC4FBC2, 0xCEC5FBC2, 0xCEC6FBC2, 0xCEC7FBC2, 0xCEC8FBC2, 0xCEC9FBC2, 0xCECAFBC2, 0xCECBFBC2, 0xCECCFBC2, 0xCECDFBC2, + 0xCECEFBC2, 0xCECFFBC2, 0xCED0FBC2, 0xCED1FBC2, 0xCED2FBC2, 0xCED3FBC2, 0xCED4FBC2, 0xCED5FBC2, 0xCED6FBC2, 0xCED7FBC2, 0xCED8FBC2, 0xCED9FBC2, 0xCEDAFBC2, 0xCEDBFBC2, 0xCEDCFBC2, + 0xCEDDFBC2, 0xCEDEFBC2, 0xCEDFFBC2, 0xCEE0FBC2, 0xCEE1FBC2, 0xCEE2FBC2, 0xCEE3FBC2, 0xCEE4FBC2, 0xCEE5FBC2, 0xCEE6FBC2, 0xCEE7FBC2, 0xCEE8FBC2, 0xCEE9FBC2, 0xCEEAFBC2, 0xCEEBFBC2, + 0xCEECFBC2, 0xCEEDFBC2, 0xCEEEFBC2, 0xCEEFFBC2, 0xCEF0FBC2, 0xCEF1FBC2, 0xCEF2FBC2, 0xCEF3FBC2, 0xCEF4FBC2, 0xCEF5FBC2, 0xCEF6FBC2, 0xCEF7FBC2, 0xCEF8FBC2, 0xCEF9FBC2, 0xCEFAFBC2, + 0xCEFBFBC2, 0xCEFCFBC2, 0xCEFDFBC2, 0xCEFEFBC2, 0xCEFFFBC2, 0xCF00FBC2, 0xCF01FBC2, 0xCF02FBC2, 0xCF03FBC2, 0xCF04FBC2, 0xCF05FBC2, 0xCF06FBC2, 0xCF07FBC2, 0xCF08FBC2, 0xCF09FBC2, + 0xCF0AFBC2, 0xCF0BFBC2, 0xCF0CFBC2, 0xCF0DFBC2, 0xCF0EFBC2, 0xCF0FFBC2, 0xCF10FBC2, 0xCF11FBC2, 0xCF12FBC2, 0xCF13FBC2, 0xCF14FBC2, 0xCF15FBC2, 0xCF16FBC2, 0xCF17FBC2, 0xCF18FBC2, + 0xCF19FBC2, 0xCF1AFBC2, 0xCF1BFBC2, 0xCF1CFBC2, 0xCF1DFBC2, 0xCF1EFBC2, 0xCF1FFBC2, 0xCF20FBC2, 0xCF21FBC2, 0xCF22FBC2, 0xCF23FBC2, 0xCF24FBC2, 0xCF25FBC2, 0xCF26FBC2, 0xCF27FBC2, + 0xCF28FBC2, 0xCF29FBC2, 0xCF2AFBC2, 0xCF2BFBC2, 0xCF2CFBC2, 0xCF2DFBC2, 0xCF2EFBC2, 0xCF2FFBC2, 0xCF30FBC2, 0xCF31FBC2, 0xCF32FBC2, 0xCF33FBC2, 0xCF34FBC2, 0xCF35FBC2, 0xCF36FBC2, + 0xCF37FBC2, 0xCF38FBC2, 0xCF39FBC2, 0xCF3AFBC2, 0xCF3BFBC2, 0xCF3CFBC2, 0xCF3DFBC2, 0xCF3EFBC2, 0xCF3FFBC2, 0xCF40FBC2, 0xCF41FBC2, 0xCF42FBC2, 0xCF43FBC2, 0xCF44FBC2, 0xCF45FBC2, + 0xCF46FBC2, 0xCF47FBC2, 0xCF48FBC2, 0xCF49FBC2, 0xCF4AFBC2, 0xCF4BFBC2, 0xCF4CFBC2, 0xCF4DFBC2, 0xCF4EFBC2, 0xCF4FFBC2, 0xCF50FBC2, 0xCF51FBC2, 0xCF52FBC2, 0xCF53FBC2, 0xCF54FBC2, + 0xCF55FBC2, 0xCF56FBC2, 0xCF57FBC2, 0xCF58FBC2, 0xCF59FBC2, 0xCF5AFBC2, 0xCF5BFBC2, 0xCF5CFBC2, 0xCF5DFBC2, 0xCF5EFBC2, 0xCF5FFBC2, 0xCF60FBC2, 0xCF61FBC2, 0xCF62FBC2, 0xCF63FBC2, + 0xCF64FBC2, 0xCF65FBC2, 0xCF66FBC2, 0xCF67FBC2, 0xCF68FBC2, 0xCF69FBC2, 0xCF6AFBC2, 0xCF6BFBC2, 0xCF6CFBC2, 0xCF6DFBC2, 0xCF6EFBC2, 0xCF6FFBC2, 0xCF70FBC2, 0xCF71FBC2, 0xCF72FBC2, + 0xCF73FBC2, 0xCF74FBC2, 0xCF75FBC2, 0xCF76FBC2, 0xCF77FBC2, 0xCF78FBC2, 0xCF79FBC2, 0xCF7AFBC2, 0xCF7BFBC2, 0xCF7CFBC2, 0xCF7DFBC2, 0xCF7EFBC2, 0xCF7FFBC2, 0xCF80FBC2, 0xCF81FBC2, + 0xCF82FBC2, 0xCF83FBC2, 0xCF84FBC2, 0xCF85FBC2, 0xCF86FBC2, 0xCF87FBC2, 0xCF88FBC2, 0xCF89FBC2, 0xCF8AFBC2, 0xCF8BFBC2, 0xCF8CFBC2, 0xCF8DFBC2, 0xCF8EFBC2, 0xCF8FFBC2, 0xCF90FBC2, + 0xCF91FBC2, 0xCF92FBC2, 0xCF93FBC2, 0xCF94FBC2, 0xCF95FBC2, 0xCF96FBC2, 0xCF97FBC2, 0xCF98FBC2, 0xCF99FBC2, 0xCF9AFBC2, 0xCF9BFBC2, 0xCF9CFBC2, 0xCF9DFBC2, 0xCF9EFBC2, 0xCF9FFBC2, + 0xCFA0FBC2, 0xCFA1FBC2, 0xCFA2FBC2, 0xCFA3FBC2, 0xCFA4FBC2, 0xCFA5FBC2, 0xCFA6FBC2, 0xCFA7FBC2, 0xCFA8FBC2, 0xCFA9FBC2, 0xCFAAFBC2, 0xCFABFBC2, 0xCFACFBC2, 0xCFADFBC2, 0xCFAEFBC2, + 0xCFAFFBC2, 0xCFB0FBC2, 0xCFB1FBC2, 0xCFB2FBC2, 0xCFB3FBC2, 0xCFB4FBC2, 0xCFB5FBC2, 0xCFB6FBC2, 0xCFB7FBC2, 0xCFB8FBC2, 0xCFB9FBC2, 0xCFBAFBC2, 0xCFBBFBC2, 0xCFBCFBC2, 0xCFBDFBC2, + 0xCFBEFBC2, 0xCFBFFBC2, 0xCFC0FBC2, 0xCFC1FBC2, 0xCFC2FBC2, 0xCFC3FBC2, 0xCFC4FBC2, 0xCFC5FBC2, 0xCFC6FBC2, 0xCFC7FBC2, 0xCFC8FBC2, 0xCFC9FBC2, 0xCFCAFBC2, 0xCFCBFBC2, 0xCFCCFBC2, + 0xCFCDFBC2, 0xCFCEFBC2, 0xCFCFFBC2, 0xCFD0FBC2, 0xCFD1FBC2, 0xCFD2FBC2, 0xCFD3FBC2, 0xCFD4FBC2, 0xCFD5FBC2, 0xCFD6FBC2, 0xCFD7FBC2, 0xCFD8FBC2, 0xCFD9FBC2, 0xCFDAFBC2, 0xCFDBFBC2, + 0xCFDCFBC2, 0xCFDDFBC2, 0xCFDEFBC2, 0xCFDFFBC2, 0xCFE0FBC2, 0xCFE1FBC2, 0xCFE2FBC2, 0xCFE3FBC2, 0xCFE4FBC2, 0xCFE5FBC2, 0xCFE6FBC2, 0xCFE7FBC2, 0xCFE8FBC2, 0xCFE9FBC2, 0xCFEAFBC2, + 0xCFEBFBC2, 0xCFECFBC2, 0xCFEDFBC2, 0xCFEEFBC2, 0xCFEFFBC2, 0xCFF0FBC2, 0xCFF1FBC2, 0xCFF2FBC2, 0xCFF3FBC2, 0xCFF4FBC2, 0xCFF5FBC2, 0xCFF6FBC2, 0xCFF7FBC2, 0xCFF8FBC2, 0xCFF9FBC2, + 0xCFFAFBC2, 0xCFFBFBC2, 0xCFFCFBC2, 0xCFFDFBC2, 0xCFFEFBC2, 0xCFFFFBC2, 0xD000FBC2, 0xD001FBC2, 0xD002FBC2, 0xD003FBC2, 0xD004FBC2, 0xD005FBC2, 0xD006FBC2, 0xD007FBC2, 0xD008FBC2, + 0xD009FBC2, 0xD00AFBC2, 0xD00BFBC2, 0xD00CFBC2, 0xD00DFBC2, 0xD00EFBC2, 0xD00FFBC2, 0xD010FBC2, 0xD011FBC2, 0xD012FBC2, 0xD013FBC2, 0xD014FBC2, 0xD015FBC2, 0xD016FBC2, 0xD017FBC2, + 0xD018FBC2, 0xD019FBC2, 0xD01AFBC2, 0xD01BFBC2, 0xD01CFBC2, 0xD01DFBC2, 0xD01EFBC2, 0xD01FFBC2, 0xD020FBC2, 0xD021FBC2, 0xD022FBC2, 0xD023FBC2, 0xD024FBC2, 0xD025FBC2, 0xD026FBC2, + 0xD027FBC2, 0xD028FBC2, 0xD029FBC2, 0xD02AFBC2, 0xD02BFBC2, 0xD02CFBC2, 0xD02DFBC2, 0xD02EFBC2, 0xD02FFBC2, 0xD030FBC2, 0xD031FBC2, 0xD032FBC2, 0xD033FBC2, 0xD034FBC2, 0xD035FBC2, + 0xD036FBC2, 0xD037FBC2, 0xD038FBC2, 0xD039FBC2, 0xD03AFBC2, 0xD03BFBC2, 0xD03CFBC2, 0xD03DFBC2, 0xD03EFBC2, 0xD03FFBC2, 0xD040FBC2, 0xD041FBC2, 0xD042FBC2, 0xD043FBC2, 0xD044FBC2, + 0xD045FBC2, 0xD046FBC2, 0xD047FBC2, 0xD048FBC2, 0xD049FBC2, 0xD04AFBC2, 0xD04BFBC2, 0xD04CFBC2, 0xD04DFBC2, 0xD04EFBC2, 0xD04FFBC2, 0xD050FBC2, 0xD051FBC2, 0xD052FBC2, 0xD053FBC2, + 0xD054FBC2, 0xD055FBC2, 0xD056FBC2, 0xD057FBC2, 0xD058FBC2, 0xD059FBC2, 0xD05AFBC2, 0xD05BFBC2, 0xD05CFBC2, 0xD05DFBC2, 0xD05EFBC2, 0xD05FFBC2, 0xD060FBC2, 0xD061FBC2, 0xD062FBC2, + 0xD063FBC2, 0xD064FBC2, 0xD065FBC2, 0xD066FBC2, 0xD067FBC2, 0xD068FBC2, 0xD069FBC2, 0xD06AFBC2, 0xD06BFBC2, 0xD06CFBC2, 0xD06DFBC2, 0xD06EFBC2, 0xD06FFBC2, 0xD070FBC2, 0xD071FBC2, + 0xD072FBC2, 0xD073FBC2, 0xD074FBC2, 0xD075FBC2, 0xD076FBC2, 0xD077FBC2, 0xD078FBC2, 0xD079FBC2, 0xD07AFBC2, 0xD07BFBC2, 0xD07CFBC2, 0xD07DFBC2, 0xD07EFBC2, 0xD07FFBC2, 0xD080FBC2, + 0xD081FBC2, 0xD082FBC2, 0xD083FBC2, 0xD084FBC2, 0xD085FBC2, 0xD086FBC2, 0xD087FBC2, 0xD088FBC2, 0xD089FBC2, 0xD08AFBC2, 0xD08BFBC2, 0xD08CFBC2, 0xD08DFBC2, 0xD08EFBC2, 0xD08FFBC2, + 0xD090FBC2, 0xD091FBC2, 0xD092FBC2, 0xD093FBC2, 0xD094FBC2, 0xD095FBC2, 0xD096FBC2, 0xD097FBC2, 0xD098FBC2, 0xD099FBC2, 0xD09AFBC2, 0xD09BFBC2, 0xD09CFBC2, 0xD09DFBC2, 0xD09EFBC2, + 0xD09FFBC2, 0xD0A0FBC2, 0xD0A1FBC2, 0xD0A2FBC2, 0xD0A3FBC2, 0xD0A4FBC2, 0xD0A5FBC2, 0xD0A6FBC2, 0xD0A7FBC2, 0xD0A8FBC2, 0xD0A9FBC2, 0xD0AAFBC2, 0xD0ABFBC2, 0xD0ACFBC2, 0xD0ADFBC2, + 0xD0AEFBC2, 0xD0AFFBC2, 0xD0B0FBC2, 0xD0B1FBC2, 0xD0B2FBC2, 0xD0B3FBC2, 0xD0B4FBC2, 0xD0B5FBC2, 0xD0B6FBC2, 0xD0B7FBC2, 0xD0B8FBC2, 0xD0B9FBC2, 0xD0BAFBC2, 0xD0BBFBC2, 0xD0BCFBC2, + 0xD0BDFBC2, 0xD0BEFBC2, 0xD0BFFBC2, 0xD0C0FBC2, 0xD0C1FBC2, 0xD0C2FBC2, 0xD0C3FBC2, 0xD0C4FBC2, 0xD0C5FBC2, 0xD0C6FBC2, 0xD0C7FBC2, 0xD0C8FBC2, 0xD0C9FBC2, 0xD0CAFBC2, 0xD0CBFBC2, + 0xD0CCFBC2, 0xD0CDFBC2, 0xD0CEFBC2, 0xD0CFFBC2, 0xD0D0FBC2, 0xD0D1FBC2, 0xD0D2FBC2, 0xD0D3FBC2, 0xD0D4FBC2, 0xD0D5FBC2, 0xD0D6FBC2, 0xD0D7FBC2, 0xD0D8FBC2, 0xD0D9FBC2, 0xD0DAFBC2, + 0xD0DBFBC2, 0xD0DCFBC2, 0xD0DDFBC2, 0xD0DEFBC2, 0xD0DFFBC2, 0xD0E0FBC2, 0xD0E1FBC2, 0xD0E2FBC2, 0xD0E3FBC2, 0xD0E4FBC2, 0xD0E5FBC2, 0xD0E6FBC2, 0xD0E7FBC2, 0xD0E8FBC2, 0xD0E9FBC2, + 0xD0EAFBC2, 0xD0EBFBC2, 0xD0ECFBC2, 0xD0EDFBC2, 0xD0EEFBC2, 0xD0EFFBC2, 0xD0F0FBC2, 0xD0F1FBC2, 0xD0F2FBC2, 0xD0F3FBC2, 0xD0F4FBC2, 0xD0F5FBC2, 0xD0F6FBC2, 0xD0F7FBC2, 0xD0F8FBC2, + 0xD0F9FBC2, 0xD0FAFBC2, 0xD0FBFBC2, 0xD0FCFBC2, 0xD0FDFBC2, 0xD0FEFBC2, 0xD0FFFBC2, 0xD100FBC2, 0xD101FBC2, 0xD102FBC2, 0xD103FBC2, 0xD104FBC2, 0xD105FBC2, 0xD106FBC2, 0xD107FBC2, + 0xD108FBC2, 0xD109FBC2, 0xD10AFBC2, 0xD10BFBC2, 0xD10CFBC2, 0xD10DFBC2, 0xD10EFBC2, 0xD10FFBC2, 0xD110FBC2, 0xD111FBC2, 0xD112FBC2, 0xD113FBC2, 0xD114FBC2, 0xD115FBC2, 0xD116FBC2, + 0xD117FBC2, 0xD118FBC2, 0xD119FBC2, 0xD11AFBC2, 0xD11BFBC2, 0xD11CFBC2, 0xD11DFBC2, 0xD11EFBC2, 0xD11FFBC2, 0xD120FBC2, 0xD121FBC2, 0xD122FBC2, 0xD123FBC2, 0xD124FBC2, 0xD125FBC2, + 0xD126FBC2, 0xD127FBC2, 0xD128FBC2, 0xD129FBC2, 0xD12AFBC2, 0xD12BFBC2, 0xD12CFBC2, 0xD12DFBC2, 0xD12EFBC2, 0xD12FFBC2, 0xD130FBC2, 0xD131FBC2, 0xD132FBC2, 0xD133FBC2, 0xD134FBC2, + 0xD135FBC2, 0xD136FBC2, 0xD137FBC2, 0xD138FBC2, 0xD139FBC2, 0xD13AFBC2, 0xD13BFBC2, 0xD13CFBC2, 0xD13DFBC2, 0xD13EFBC2, 0xD13FFBC2, 0xD140FBC2, 0xD141FBC2, 0xD142FBC2, 0xD143FBC2, + 0xD144FBC2, 0xD145FBC2, 0xD146FBC2, 0xD147FBC2, 0xD148FBC2, 0xD149FBC2, 0xD14AFBC2, 0xD14BFBC2, 0xD14CFBC2, 0xD14DFBC2, 0xD14EFBC2, 0xD14FFBC2, 0xD150FBC2, 0xD151FBC2, 0xD152FBC2, + 0xD153FBC2, 0xD154FBC2, 0xD155FBC2, 0xD156FBC2, 0xD157FBC2, 0xD158FBC2, 0xD159FBC2, 0xD15AFBC2, 0xD15BFBC2, 0xD15CFBC2, 0xD15DFBC2, 0xD15EFBC2, 0xD15FFBC2, 0xD160FBC2, 0xD161FBC2, + 0xD162FBC2, 0xD163FBC2, 0xD164FBC2, 0xD165FBC2, 0xD166FBC2, 0xD167FBC2, 0xD168FBC2, 0xD169FBC2, 0xD16AFBC2, 0xD16BFBC2, 0xD16CFBC2, 0xD16DFBC2, 0xD16EFBC2, 0xD16FFBC2, 0xD170FBC2, + 0xD171FBC2, 0xD172FBC2, 0xD173FBC2, 0xD174FBC2, 0xD175FBC2, 0xD176FBC2, 0xD177FBC2, 0xD178FBC2, 0xD179FBC2, 0xD17AFBC2, 0xD17BFBC2, 0xD17CFBC2, 0xD17DFBC2, 0xD17EFBC2, 0xD17FFBC2, + 0xD180FBC2, 0xD181FBC2, 0xD182FBC2, 0xD183FBC2, 0xD184FBC2, 0xD185FBC2, 0xD186FBC2, 0xD187FBC2, 0xD188FBC2, 0xD189FBC2, 0xD18AFBC2, 0xD18BFBC2, 0xD18CFBC2, 0xD18DFBC2, 0xD18EFBC2, + 0xD18FFBC2, 0xD190FBC2, 0xD191FBC2, 0xD192FBC2, 0xD193FBC2, 0xD194FBC2, 0xD195FBC2, 0xD196FBC2, 0xD197FBC2, 0xD198FBC2, 0xD199FBC2, 0xD19AFBC2, 0xD19BFBC2, 0xD19CFBC2, 0xD19DFBC2, + 0xD19EFBC2, 0xD19FFBC2, 0xD1A0FBC2, 0xD1A1FBC2, 0xD1A2FBC2, 0xD1A3FBC2, 0xD1A4FBC2, 0xD1A5FBC2, 0xD1A6FBC2, 0xD1A7FBC2, 0xD1A8FBC2, 0xD1A9FBC2, 0xD1AAFBC2, 0xD1ABFBC2, 0xD1ACFBC2, + 0xD1ADFBC2, 0xD1AEFBC2, 0xD1AFFBC2, 0xD1B0FBC2, 0xD1B1FBC2, 0xD1B2FBC2, 0xD1B3FBC2, 0xD1B4FBC2, 0xD1B5FBC2, 0xD1B6FBC2, 0xD1B7FBC2, 0xD1B8FBC2, 0xD1B9FBC2, 0xD1BAFBC2, 0xD1BBFBC2, + 0xD1BCFBC2, 0xD1BDFBC2, 0xD1BEFBC2, 0xD1BFFBC2, 0xD1C0FBC2, 0xD1C1FBC2, 0xD1C2FBC2, 0xD1C3FBC2, 0xD1C4FBC2, 0xD1C5FBC2, 0xD1C6FBC2, 0xD1C7FBC2, 0xD1C8FBC2, 0xD1C9FBC2, 0xD1CAFBC2, + 0xD1CBFBC2, 0xD1CCFBC2, 0xD1CDFBC2, 0xD1CEFBC2, 0xD1CFFBC2, 0xD1D0FBC2, 0xD1D1FBC2, 0xD1D2FBC2, 0xD1D3FBC2, 0xD1D4FBC2, 0xD1D5FBC2, 0xD1D6FBC2, 0xD1D7FBC2, 0xD1D8FBC2, 0xD1D9FBC2, + 0xD1DAFBC2, 0xD1DBFBC2, 0xD1DCFBC2, 0xD1DDFBC2, 0xD1DEFBC2, 0xD1DFFBC2, 0xD1E0FBC2, 0xD1E1FBC2, 0xD1E2FBC2, 0xD1E3FBC2, 0xD1E4FBC2, 0xD1E5FBC2, 0xD1E6FBC2, 0xD1E7FBC2, 0xD1E8FBC2, + 0xD1E9FBC2, 0xD1EAFBC2, 0xD1EBFBC2, 0xD1ECFBC2, 0xD1EDFBC2, 0xD1EEFBC2, 0xD1EFFBC2, 0xD1F0FBC2, 0xD1F1FBC2, 0xD1F2FBC2, 0xD1F3FBC2, 0xD1F4FBC2, 0xD1F5FBC2, 0xD1F6FBC2, 0xD1F7FBC2, + 0xD1F8FBC2, 0xD1F9FBC2, 0xD1FAFBC2, 0xD1FBFBC2, 0xD1FCFBC2, 0xD1FDFBC2, 0xD1FEFBC2, 0xD1FFFBC2, 0xD200FBC2, 0xD201FBC2, 0xD202FBC2, 0xD203FBC2, 0xD204FBC2, 0xD205FBC2, 0xD206FBC2, + 0xD207FBC2, 0xD208FBC2, 0xD209FBC2, 0xD20AFBC2, 0xD20BFBC2, 0xD20CFBC2, 0xD20DFBC2, 0xD20EFBC2, 0xD20FFBC2, 0xD210FBC2, 0xD211FBC2, 0xD212FBC2, 0xD213FBC2, 0xD214FBC2, 0xD215FBC2, + 0xD216FBC2, 0xD217FBC2, 0xD218FBC2, 0xD219FBC2, 0xD21AFBC2, 0xD21BFBC2, 0xD21CFBC2, 0xD21DFBC2, 0xD21EFBC2, 0xD21FFBC2, 0xD220FBC2, 0xD221FBC2, 0xD222FBC2, 0xD223FBC2, 0xD224FBC2, + 0xD225FBC2, 0xD226FBC2, 0xD227FBC2, 0xD228FBC2, 0xD229FBC2, 0xD22AFBC2, 0xD22BFBC2, 0xD22CFBC2, 0xD22DFBC2, 0xD22EFBC2, 0xD22FFBC2, 0xD230FBC2, 0xD231FBC2, 0xD232FBC2, 0xD233FBC2, + 0xD234FBC2, 0xD235FBC2, 0xD236FBC2, 0xD237FBC2, 0xD238FBC2, 0xD239FBC2, 0xD23AFBC2, 0xD23BFBC2, 0xD23CFBC2, 0xD23DFBC2, 0xD23EFBC2, 0xD23FFBC2, 0xD240FBC2, 0xD241FBC2, 0xD242FBC2, + 0xD243FBC2, 0xD244FBC2, 0xD245FBC2, 0xD246FBC2, 0xD247FBC2, 0xD248FBC2, 0xD249FBC2, 0xD24AFBC2, 0xD24BFBC2, 0xD24CFBC2, 0xD24DFBC2, 0xD24EFBC2, 0xD24FFBC2, 0xD250FBC2, 0xD251FBC2, + 0xD252FBC2, 0xD253FBC2, 0xD254FBC2, 0xD255FBC2, 0xD256FBC2, 0xD257FBC2, 0xD258FBC2, 0xD259FBC2, 0xD25AFBC2, 0xD25BFBC2, 0xD25CFBC2, 0xD25DFBC2, 0xD25EFBC2, 0xD25FFBC2, 0xD260FBC2, + 0xD261FBC2, 0xD262FBC2, 0xD263FBC2, 0xD264FBC2, 0xD265FBC2, 0xD266FBC2, 0xD267FBC2, 0xD268FBC2, 0xD269FBC2, 0xD26AFBC2, 0xD26BFBC2, 0xD26CFBC2, 0xD26DFBC2, 0xD26EFBC2, 0xD26FFBC2, + 0xD270FBC2, 0xD271FBC2, 0xD272FBC2, 0xD273FBC2, 0xD274FBC2, 0xD275FBC2, 0xD276FBC2, 0xD277FBC2, 0xD278FBC2, 0xD279FBC2, 0xD27AFBC2, 0xD27BFBC2, 0xD27CFBC2, 0xD27DFBC2, 0xD27EFBC2, + 0xD27FFBC2, 0xD280FBC2, 0xD281FBC2, 0xD282FBC2, 0xD283FBC2, 0xD284FBC2, 0xD285FBC2, 0xD286FBC2, 0xD287FBC2, 0xD288FBC2, 0xD289FBC2, 0xD28AFBC2, 0xD28BFBC2, 0xD28CFBC2, 0xD28DFBC2, + 0xD28EFBC2, 0xD28FFBC2, 0xD290FBC2, 0xD291FBC2, 0xD292FBC2, 0xD293FBC2, 0xD294FBC2, 0xD295FBC2, 0xD296FBC2, 0xD297FBC2, 0xD298FBC2, 0xD299FBC2, 0xD29AFBC2, 0xD29BFBC2, 0xD29CFBC2, + 0xD29DFBC2, 0xD29EFBC2, 0xD29FFBC2, 0xD2A0FBC2, 0xD2A1FBC2, 0xD2A2FBC2, 0xD2A3FBC2, 0xD2A4FBC2, 0xD2A5FBC2, 0xD2A6FBC2, 0xD2A7FBC2, 0xD2A8FBC2, 0xD2A9FBC2, 0xD2AAFBC2, 0xD2ABFBC2, + 0xD2ACFBC2, 0xD2ADFBC2, 0xD2AEFBC2, 0xD2AFFBC2, 0xD2B0FBC2, 0xD2B1FBC2, 0xD2B2FBC2, 0xD2B3FBC2, 0xD2B4FBC2, 0xD2B5FBC2, 0xD2B6FBC2, 0xD2B7FBC2, 0xD2B8FBC2, 0xD2B9FBC2, 0xD2BAFBC2, + 0xD2BBFBC2, 0xD2BCFBC2, 0xD2BDFBC2, 0xD2BEFBC2, 0xD2BFFBC2, 0xD2C0FBC2, 0xD2C1FBC2, 0xD2C2FBC2, 0xD2C3FBC2, 0xD2C4FBC2, 0xD2C5FBC2, 0xD2C6FBC2, 0xD2C7FBC2, 0xD2C8FBC2, 0xD2C9FBC2, + 0xD2CAFBC2, 0xD2CBFBC2, 0xD2CCFBC2, 0xD2CDFBC2, 0xD2CEFBC2, 0xD2CFFBC2, 0xD2D0FBC2, 0xD2D1FBC2, 0xD2D2FBC2, 0xD2D3FBC2, 0xD2D4FBC2, 0xD2D5FBC2, 0xD2D6FBC2, 0xD2D7FBC2, 0xD2D8FBC2, + 0xD2D9FBC2, 0xD2DAFBC2, 0xD2DBFBC2, 0xD2DCFBC2, 0xD2DDFBC2, 0xD2DEFBC2, 0xD2DFFBC2, 0xD2E0FBC2, 0xD2E1FBC2, 0xD2E2FBC2, 0xD2E3FBC2, 0xD2E4FBC2, 0xD2E5FBC2, 0xD2E6FBC2, 0xD2E7FBC2, + 0xD2E8FBC2, 0xD2E9FBC2, 0xD2EAFBC2, 0xD2EBFBC2, 0xD2ECFBC2, 0xD2EDFBC2, 0xD2EEFBC2, 0xD2EFFBC2, 0xD2F0FBC2, 0xD2F1FBC2, 0xD2F2FBC2, 0xD2F3FBC2, 0xD2F4FBC2, 0xD2F5FBC2, 0xD2F6FBC2, + 0xD2F7FBC2, 0xD2F8FBC2, 0xD2F9FBC2, 0xD2FAFBC2, 0xD2FBFBC2, 0xD2FCFBC2, 0xD2FDFBC2, 0xD2FEFBC2, 0xD2FFFBC2, 0xD300FBC2, 0xD301FBC2, 0xD302FBC2, 0xD303FBC2, 0xD304FBC2, 0xD305FBC2, + 0xD306FBC2, 0xD307FBC2, 0xD308FBC2, 0xD309FBC2, 0xD30AFBC2, 0xD30BFBC2, 0xD30CFBC2, 0xD30DFBC2, 0xD30EFBC2, 0xD30FFBC2, 0xD310FBC2, 0xD311FBC2, 0xD312FBC2, 0xD313FBC2, 0xD314FBC2, + 0xD315FBC2, 0xD316FBC2, 0xD317FBC2, 0xD318FBC2, 0xD319FBC2, 0xD31AFBC2, 0xD31BFBC2, 0xD31CFBC2, 0xD31DFBC2, 0xD31EFBC2, 0xD31FFBC2, 0xD320FBC2, 0xD321FBC2, 0xD322FBC2, 0xD323FBC2, + 0xD324FBC2, 0xD325FBC2, 0xD326FBC2, 0xD327FBC2, 0xD328FBC2, 0xD329FBC2, 0xD32AFBC2, 0xD32BFBC2, 0xD32CFBC2, 0xD32DFBC2, 0xD32EFBC2, 0xD32FFBC2, 0xD330FBC2, 0xD331FBC2, 0xD332FBC2, + 0xD333FBC2, 0xD334FBC2, 0xD335FBC2, 0xD336FBC2, 0xD337FBC2, 0xD338FBC2, 0xD339FBC2, 0xD33AFBC2, 0xD33BFBC2, 0xD33CFBC2, 0xD33DFBC2, 0xD33EFBC2, 0xD33FFBC2, 0xD340FBC2, 0xD341FBC2, + 0xD342FBC2, 0xD343FBC2, 0xD344FBC2, 0xD345FBC2, 0xD346FBC2, 0xD347FBC2, 0xD348FBC2, 0xD349FBC2, 0xD34AFBC2, 0xD34BFBC2, 0xD34CFBC2, 0xD34DFBC2, 0xD34EFBC2, 0xD34FFBC2, 0xD350FBC2, + 0xD351FBC2, 0xD352FBC2, 0xD353FBC2, 0xD354FBC2, 0xD355FBC2, 0xD356FBC2, 0xD357FBC2, 0xD358FBC2, 0xD359FBC2, 0xD35AFBC2, 0xD35BFBC2, 0xD35CFBC2, 0xD35DFBC2, 0xD35EFBC2, 0xD35FFBC2, + 0xD360FBC2, 0xD361FBC2, 0xD362FBC2, 0xD363FBC2, 0xD364FBC2, 0xD365FBC2, 0xD366FBC2, 0xD367FBC2, 0xD368FBC2, 0xD369FBC2, 0xD36AFBC2, 0xD36BFBC2, 0xD36CFBC2, 0xD36DFBC2, 0xD36EFBC2, + 0xD36FFBC2, 0xD370FBC2, 0xD371FBC2, 0xD372FBC2, 0xD373FBC2, 0xD374FBC2, 0xD375FBC2, 0xD376FBC2, 0xD377FBC2, 0xD378FBC2, 0xD379FBC2, 0xD37AFBC2, 0xD37BFBC2, 0xD37CFBC2, 0xD37DFBC2, + 0xD37EFBC2, 0xD37FFBC2, 0xD380FBC2, 0xD381FBC2, 0xD382FBC2, 0xD383FBC2, 0xD384FBC2, 0xD385FBC2, 0xD386FBC2, 0xD387FBC2, 0xD388FBC2, 0xD389FBC2, 0xD38AFBC2, 0xD38BFBC2, 0xD38CFBC2, + 0xD38DFBC2, 0xD38EFBC2, 0xD38FFBC2, 0xD390FBC2, 0xD391FBC2, 0xD392FBC2, 0xD393FBC2, 0xD394FBC2, 0xD395FBC2, 0xD396FBC2, 0xD397FBC2, 0xD398FBC2, 0xD399FBC2, 0xD39AFBC2, 0xD39BFBC2, + 0xD39CFBC2, 0xD39DFBC2, 0xD39EFBC2, 0xD39FFBC2, 0xD3A0FBC2, 0xD3A1FBC2, 0xD3A2FBC2, 0xD3A3FBC2, 0xD3A4FBC2, 0xD3A5FBC2, 0xD3A6FBC2, 0xD3A7FBC2, 0xD3A8FBC2, 0xD3A9FBC2, 0xD3AAFBC2, + 0xD3ABFBC2, 0xD3ACFBC2, 0xD3ADFBC2, 0xD3AEFBC2, 0xD3AFFBC2, 0xD3B0FBC2, 0xD3B1FBC2, 0xD3B2FBC2, 0xD3B3FBC2, 0xD3B4FBC2, 0xD3B5FBC2, 0xD3B6FBC2, 0xD3B7FBC2, 0xD3B8FBC2, 0xD3B9FBC2, + 0xD3BAFBC2, 0xD3BBFBC2, 0xD3BCFBC2, 0xD3BDFBC2, 0xD3BEFBC2, 0xD3BFFBC2, 0xD3C0FBC2, 0xD3C1FBC2, 0xD3C2FBC2, 0xD3C3FBC2, 0xD3C4FBC2, 0xD3C5FBC2, 0xD3C6FBC2, 0xD3C7FBC2, 0xD3C8FBC2, + 0xD3C9FBC2, 0xD3CAFBC2, 0xD3CBFBC2, 0xD3CCFBC2, 0xD3CDFBC2, 0xD3CEFBC2, 0xD3CFFBC2, 0xD3D0FBC2, 0xD3D1FBC2, 0xD3D2FBC2, 0xD3D3FBC2, 0xD3D4FBC2, 0xD3D5FBC2, 0xD3D6FBC2, 0xD3D7FBC2, + 0xD3D8FBC2, 0xD3D9FBC2, 0xD3DAFBC2, 0xD3DBFBC2, 0xD3DCFBC2, 0xD3DDFBC2, 0xD3DEFBC2, 0xD3DFFBC2, 0xD3E0FBC2, 0xD3E1FBC2, 0xD3E2FBC2, 0xD3E3FBC2, 0xD3E4FBC2, 0xD3E5FBC2, 0xD3E6FBC2, + 0xD3E7FBC2, 0xD3E8FBC2, 0xD3E9FBC2, 0xD3EAFBC2, 0xD3EBFBC2, 0xD3ECFBC2, 0xD3EDFBC2, 0xD3EEFBC2, 0xD3EFFBC2, 0xD3F0FBC2, 0xD3F1FBC2, 0xD3F2FBC2, 0xD3F3FBC2, 0xD3F4FBC2, 0xD3F5FBC2, + 0xD3F6FBC2, 0xD3F7FBC2, 0xD3F8FBC2, 0xD3F9FBC2, 0xD3FAFBC2, 0xD3FBFBC2, 0xD3FCFBC2, 0xD3FDFBC2, 0xD3FEFBC2, 0xD3FFFBC2, 0xD400FBC2, 0xD401FBC2, 0xD402FBC2, 0xD403FBC2, 0xD404FBC2, + 0xD405FBC2, 0xD406FBC2, 0xD407FBC2, 0xD408FBC2, 0xD409FBC2, 0xD40AFBC2, 0xD40BFBC2, 0xD40CFBC2, 0xD40DFBC2, 0xD40EFBC2, 0xD40FFBC2, 0xD410FBC2, 0xD411FBC2, 0xD412FBC2, 0xD413FBC2, + 0xD414FBC2, 0xD415FBC2, 0xD416FBC2, 0xD417FBC2, 0xD418FBC2, 0xD419FBC2, 0xD41AFBC2, 0xD41BFBC2, 0xD41CFBC2, 0xD41DFBC2, 0xD41EFBC2, 0xD41FFBC2, 0xD420FBC2, 0xD421FBC2, 0xD422FBC2, + 0xD423FBC2, 0xD424FBC2, 0xD425FBC2, 0xD426FBC2, 0xD427FBC2, 0xD428FBC2, 0xD429FBC2, 0xD42AFBC2, 0xD42BFBC2, 0xD42CFBC2, 0xD42DFBC2, 0xD42EFBC2, 0xD42FFBC2, 0xD430FBC2, 0xD431FBC2, + 0xD432FBC2, 0xD433FBC2, 0xD434FBC2, 0xD435FBC2, 0xD436FBC2, 0xD437FBC2, 0xD438FBC2, 0xD439FBC2, 0xD43AFBC2, 0xD43BFBC2, 0xD43CFBC2, 0xD43DFBC2, 0xD43EFBC2, 0xD43FFBC2, 0xD440FBC2, + 0xD441FBC2, 0xD442FBC2, 0xD443FBC2, 0xD444FBC2, 0xD445FBC2, 0xD446FBC2, 0xD447FBC2, 0xD448FBC2, 0xD449FBC2, 0xD44AFBC2, 0xD44BFBC2, 0xD44CFBC2, 0xD44DFBC2, 0xD44EFBC2, 0xD44FFBC2, + 0xD450FBC2, 0xD451FBC2, 0xD452FBC2, 0xD453FBC2, 0xD454FBC2, 0xD455FBC2, 0xD456FBC2, 0xD457FBC2, 0xD458FBC2, 0xD459FBC2, 0xD45AFBC2, 0xD45BFBC2, 0xD45CFBC2, 0xD45DFBC2, 0xD45EFBC2, + 0xD45FFBC2, 0xD460FBC2, 0xD461FBC2, 0xD462FBC2, 0xD463FBC2, 0xD464FBC2, 0xD465FBC2, 0xD466FBC2, 0xD467FBC2, 0xD468FBC2, 0xD469FBC2, 0xD46AFBC2, 0xD46BFBC2, 0xD46CFBC2, 0xD46DFBC2, + 0xD46EFBC2, 0xD46FFBC2, 0xD470FBC2, 0xD471FBC2, 0xD472FBC2, 0xD473FBC2, 0xD474FBC2, 0xD475FBC2, 0xD476FBC2, 0xD477FBC2, 0xD478FBC2, 0xD479FBC2, 0xD47AFBC2, 0xD47BFBC2, 0xD47CFBC2, + 0xD47DFBC2, 0xD47EFBC2, 0xD47FFBC2, 0xD480FBC2, 0xD481FBC2, 0xD482FBC2, 0xD483FBC2, 0xD484FBC2, 0xD485FBC2, 0xD486FBC2, 0xD487FBC2, 0xD488FBC2, 0xD489FBC2, 0xD48AFBC2, 0xD48BFBC2, + 0xD48CFBC2, 0xD48DFBC2, 0xD48EFBC2, 0xD48FFBC2, 0xD490FBC2, 0xD491FBC2, 0xD492FBC2, 0xD493FBC2, 0xD494FBC2, 0xD495FBC2, 0xD496FBC2, 0xD497FBC2, 0xD498FBC2, 0xD499FBC2, 0xD49AFBC2, + 0xD49BFBC2, 0xD49CFBC2, 0xD49DFBC2, 0xD49EFBC2, 0xD49FFBC2, 0xD4A0FBC2, 0xD4A1FBC2, 0xD4A2FBC2, 0xD4A3FBC2, 0xD4A4FBC2, 0xD4A5FBC2, 0xD4A6FBC2, 0xD4A7FBC2, 0xD4A8FBC2, 0xD4A9FBC2, + 0xD4AAFBC2, 0xD4ABFBC2, 0xD4ACFBC2, 0xD4ADFBC2, 0xD4AEFBC2, 0xD4AFFBC2, 0xD4B0FBC2, 0xD4B1FBC2, 0xD4B2FBC2, 0xD4B3FBC2, 0xD4B4FBC2, 0xD4B5FBC2, 0xD4B6FBC2, 0xD4B7FBC2, 0xD4B8FBC2, + 0xD4B9FBC2, 0xD4BAFBC2, 0xD4BBFBC2, 0xD4BCFBC2, 0xD4BDFBC2, 0xD4BEFBC2, 0xD4BFFBC2, 0xD4C0FBC2, 0xD4C1FBC2, 0xD4C2FBC2, 0xD4C3FBC2, 0xD4C4FBC2, 0xD4C5FBC2, 0xD4C6FBC2, 0xD4C7FBC2, + 0xD4C8FBC2, 0xD4C9FBC2, 0xD4CAFBC2, 0xD4CBFBC2, 0xD4CCFBC2, 0xD4CDFBC2, 0xD4CEFBC2, 0xD4CFFBC2, 0xD4D0FBC2, 0xD4D1FBC2, 0xD4D2FBC2, 0xD4D3FBC2, 0xD4D4FBC2, 0xD4D5FBC2, 0xD4D6FBC2, + 0xD4D7FBC2, 0xD4D8FBC2, 0xD4D9FBC2, 0xD4DAFBC2, 0xD4DBFBC2, 0xD4DCFBC2, 0xD4DDFBC2, 0xD4DEFBC2, 0xD4DFFBC2, 0xD4E0FBC2, 0xD4E1FBC2, 0xD4E2FBC2, 0xD4E3FBC2, 0xD4E4FBC2, 0xD4E5FBC2, + 0xD4E6FBC2, 0xD4E7FBC2, 0xD4E8FBC2, 0xD4E9FBC2, 0xD4EAFBC2, 0xD4EBFBC2, 0xD4ECFBC2, 0xD4EDFBC2, 0xD4EEFBC2, 0xD4EFFBC2, 0xD4F0FBC2, 0xD4F1FBC2, 0xD4F2FBC2, 0xD4F3FBC2, 0xD4F4FBC2, + 0xD4F5FBC2, 0xD4F6FBC2, 0xD4F7FBC2, 0xD4F8FBC2, 0xD4F9FBC2, 0xD4FAFBC2, 0xD4FBFBC2, 0xD4FCFBC2, 0xD4FDFBC2, 0xD4FEFBC2, 0xD4FFFBC2, 0xD500FBC2, 0xD501FBC2, 0xD502FBC2, 0xD503FBC2, + 0xD504FBC2, 0xD505FBC2, 0xD506FBC2, 0xD507FBC2, 0xD508FBC2, 0xD509FBC2, 0xD50AFBC2, 0xD50BFBC2, 0xD50CFBC2, 0xD50DFBC2, 0xD50EFBC2, 0xD50FFBC2, 0xD510FBC2, 0xD511FBC2, 0xD512FBC2, + 0xD513FBC2, 0xD514FBC2, 0xD515FBC2, 0xD516FBC2, 0xD517FBC2, 0xD518FBC2, 0xD519FBC2, 0xD51AFBC2, 0xD51BFBC2, 0xD51CFBC2, 0xD51DFBC2, 0xD51EFBC2, 0xD51FFBC2, 0xD520FBC2, 0xD521FBC2, + 0xD522FBC2, 0xD523FBC2, 0xD524FBC2, 0xD525FBC2, 0xD526FBC2, 0xD527FBC2, 0xD528FBC2, 0xD529FBC2, 0xD52AFBC2, 0xD52BFBC2, 0xD52CFBC2, 0xD52DFBC2, 0xD52EFBC2, 0xD52FFBC2, 0xD530FBC2, + 0xD531FBC2, 0xD532FBC2, 0xD533FBC2, 0xD534FBC2, 0xD535FBC2, 0xD536FBC2, 0xD537FBC2, 0xD538FBC2, 0xD539FBC2, 0xD53AFBC2, 0xD53BFBC2, 0xD53CFBC2, 0xD53DFBC2, 0xD53EFBC2, 0xD53FFBC2, + 0xD540FBC2, 0xD541FBC2, 0xD542FBC2, 0xD543FBC2, 0xD544FBC2, 0xD545FBC2, 0xD546FBC2, 0xD547FBC2, 0xD548FBC2, 0xD549FBC2, 0xD54AFBC2, 0xD54BFBC2, 0xD54CFBC2, 0xD54DFBC2, 0xD54EFBC2, + 0xD54FFBC2, 0xD550FBC2, 0xD551FBC2, 0xD552FBC2, 0xD553FBC2, 0xD554FBC2, 0xD555FBC2, 0xD556FBC2, 0xD557FBC2, 0xD558FBC2, 0xD559FBC2, 0xD55AFBC2, 0xD55BFBC2, 0xD55CFBC2, 0xD55DFBC2, + 0xD55EFBC2, 0xD55FFBC2, 0xD560FBC2, 0xD561FBC2, 0xD562FBC2, 0xD563FBC2, 0xD564FBC2, 0xD565FBC2, 0xD566FBC2, 0xD567FBC2, 0xD568FBC2, 0xD569FBC2, 0xD56AFBC2, 0xD56BFBC2, 0xD56CFBC2, + 0xD56DFBC2, 0xD56EFBC2, 0xD56FFBC2, 0xD570FBC2, 0xD571FBC2, 0xD572FBC2, 0xD573FBC2, 0xD574FBC2, 0xD575FBC2, 0xD576FBC2, 0xD577FBC2, 0xD578FBC2, 0xD579FBC2, 0xD57AFBC2, 0xD57BFBC2, + 0xD57CFBC2, 0xD57DFBC2, 0xD57EFBC2, 0xD57FFBC2, 0xD580FBC2, 0xD581FBC2, 0xD582FBC2, 0xD583FBC2, 0xD584FBC2, 0xD585FBC2, 0xD586FBC2, 0xD587FBC2, 0xD588FBC2, 0xD589FBC2, 0xD58AFBC2, + 0xD58BFBC2, 0xD58CFBC2, 0xD58DFBC2, 0xD58EFBC2, 0xD58FFBC2, 0xD590FBC2, 0xD591FBC2, 0xD592FBC2, 0xD593FBC2, 0xD594FBC2, 0xD595FBC2, 0xD596FBC2, 0xD597FBC2, 0xD598FBC2, 0xD599FBC2, + 0xD59AFBC2, 0xD59BFBC2, 0xD59CFBC2, 0xD59DFBC2, 0xD59EFBC2, 0xD59FFBC2, 0xD5A0FBC2, 0xD5A1FBC2, 0xD5A2FBC2, 0xD5A3FBC2, 0xD5A4FBC2, 0xD5A5FBC2, 0xD5A6FBC2, 0xD5A7FBC2, 0xD5A8FBC2, + 0xD5A9FBC2, 0xD5AAFBC2, 0xD5ABFBC2, 0xD5ACFBC2, 0xD5ADFBC2, 0xD5AEFBC2, 0xD5AFFBC2, 0xD5B0FBC2, 0xD5B1FBC2, 0xD5B2FBC2, 0xD5B3FBC2, 0xD5B4FBC2, 0xD5B5FBC2, 0xD5B6FBC2, 0xD5B7FBC2, + 0xD5B8FBC2, 0xD5B9FBC2, 0xD5BAFBC2, 0xD5BBFBC2, 0xD5BCFBC2, 0xD5BDFBC2, 0xD5BEFBC2, 0xD5BFFBC2, 0xD5C0FBC2, 0xD5C1FBC2, 0xD5C2FBC2, 0xD5C3FBC2, 0xD5C4FBC2, 0xD5C5FBC2, 0xD5C6FBC2, + 0xD5C7FBC2, 0xD5C8FBC2, 0xD5C9FBC2, 0xD5CAFBC2, 0xD5CBFBC2, 0xD5CCFBC2, 0xD5CDFBC2, 0xD5CEFBC2, 0xD5CFFBC2, 0xD5D0FBC2, 0xD5D1FBC2, 0xD5D2FBC2, 0xD5D3FBC2, 0xD5D4FBC2, 0xD5D5FBC2, + 0xD5D6FBC2, 0xD5D7FBC2, 0xD5D8FBC2, 0xD5D9FBC2, 0xD5DAFBC2, 0xD5DBFBC2, 0xD5DCFBC2, 0xD5DDFBC2, 0xD5DEFBC2, 0xD5DFFBC2, 0xD5E0FBC2, 0xD5E1FBC2, 0xD5E2FBC2, 0xD5E3FBC2, 0xD5E4FBC2, + 0xD5E5FBC2, 0xD5E6FBC2, 0xD5E7FBC2, 0xD5E8FBC2, 0xD5E9FBC2, 0xD5EAFBC2, 0xD5EBFBC2, 0xD5ECFBC2, 0xD5EDFBC2, 0xD5EEFBC2, 0xD5EFFBC2, 0xD5F0FBC2, 0xD5F1FBC2, 0xD5F2FBC2, 0xD5F3FBC2, + 0xD5F4FBC2, 0xD5F5FBC2, 0xD5F6FBC2, 0xD5F7FBC2, 0xD5F8FBC2, 0xD5F9FBC2, 0xD5FAFBC2, 0xD5FBFBC2, 0xD5FCFBC2, 0xD5FDFBC2, 0xD5FEFBC2, 0xD5FFFBC2, 0xD600FBC2, 0xD601FBC2, 0xD602FBC2, + 0xD603FBC2, 0xD604FBC2, 0xD605FBC2, 0xD606FBC2, 0xD607FBC2, 0xD608FBC2, 0xD609FBC2, 0xD60AFBC2, 0xD60BFBC2, 0xD60CFBC2, 0xD60DFBC2, 0xD60EFBC2, 0xD60FFBC2, 0xD610FBC2, 0xD611FBC2, + 0xD612FBC2, 0xD613FBC2, 0xD614FBC2, 0xD615FBC2, 0xD616FBC2, 0xD617FBC2, 0xD618FBC2, 0xD619FBC2, 0xD61AFBC2, 0xD61BFBC2, 0xD61CFBC2, 0xD61DFBC2, 0xD61EFBC2, 0xD61FFBC2, 0xD620FBC2, + 0xD621FBC2, 0xD622FBC2, 0xD623FBC2, 0xD624FBC2, 0xD625FBC2, 0xD626FBC2, 0xD627FBC2, 0xD628FBC2, 0xD629FBC2, 0xD62AFBC2, 0xD62BFBC2, 0xD62CFBC2, 0xD62DFBC2, 0xD62EFBC2, 0xD62FFBC2, + 0xD630FBC2, 0xD631FBC2, 0xD632FBC2, 0xD633FBC2, 0xD634FBC2, 0xD635FBC2, 0xD636FBC2, 0xD637FBC2, 0xD638FBC2, 0xD639FBC2, 0xD63AFBC2, 0xD63BFBC2, 0xD63CFBC2, 0xD63DFBC2, 0xD63EFBC2, + 0xD63FFBC2, 0xD640FBC2, 0xD641FBC2, 0xD642FBC2, 0xD643FBC2, 0xD644FBC2, 0xD645FBC2, 0xD646FBC2, 0xD647FBC2, 0xD648FBC2, 0xD649FBC2, 0xD64AFBC2, 0xD64BFBC2, 0xD64CFBC2, 0xD64DFBC2, + 0xD64EFBC2, 0xD64FFBC2, 0xD650FBC2, 0xD651FBC2, 0xD652FBC2, 0xD653FBC2, 0xD654FBC2, 0xD655FBC2, 0xD656FBC2, 0xD657FBC2, 0xD658FBC2, 0xD659FBC2, 0xD65AFBC2, 0xD65BFBC2, 0xD65CFBC2, + 0xD65DFBC2, 0xD65EFBC2, 0xD65FFBC2, 0xD660FBC2, 0xD661FBC2, 0xD662FBC2, 0xD663FBC2, 0xD664FBC2, 0xD665FBC2, 0xD666FBC2, 0xD667FBC2, 0xD668FBC2, 0xD669FBC2, 0xD66AFBC2, 0xD66BFBC2, + 0xD66CFBC2, 0xD66DFBC2, 0xD66EFBC2, 0xD66FFBC2, 0xD670FBC2, 0xD671FBC2, 0xD672FBC2, 0xD673FBC2, 0xD674FBC2, 0xD675FBC2, 0xD676FBC2, 0xD677FBC2, 0xD678FBC2, 0xD679FBC2, 0xD67AFBC2, + 0xD67BFBC2, 0xD67CFBC2, 0xD67DFBC2, 0xD67EFBC2, 0xD67FFBC2, 0xD680FBC2, 0xD681FBC2, 0xD682FBC2, 0xD683FBC2, 0xD684FBC2, 0xD685FBC2, 0xD686FBC2, 0xD687FBC2, 0xD688FBC2, 0xD689FBC2, + 0xD68AFBC2, 0xD68BFBC2, 0xD68CFBC2, 0xD68DFBC2, 0xD68EFBC2, 0xD68FFBC2, 0xD690FBC2, 0xD691FBC2, 0xD692FBC2, 0xD693FBC2, 0xD694FBC2, 0xD695FBC2, 0xD696FBC2, 0xD697FBC2, 0xD698FBC2, + 0xD699FBC2, 0xD69AFBC2, 0xD69BFBC2, 0xD69CFBC2, 0xD69DFBC2, 0xD69EFBC2, 0xD69FFBC2, 0xD6A0FBC2, 0xD6A1FBC2, 0xD6A2FBC2, 0xD6A3FBC2, 0xD6A4FBC2, 0xD6A5FBC2, 0xD6A6FBC2, 0xD6A7FBC2, + 0xD6A8FBC2, 0xD6A9FBC2, 0xD6AAFBC2, 0xD6ABFBC2, 0xD6ACFBC2, 0xD6ADFBC2, 0xD6AEFBC2, 0xD6AFFBC2, 0xD6B0FBC2, 0xD6B1FBC2, 0xD6B2FBC2, 0xD6B3FBC2, 0xD6B4FBC2, 0xD6B5FBC2, 0xD6B6FBC2, + 0xD6B7FBC2, 0xD6B8FBC2, 0xD6B9FBC2, 0xD6BAFBC2, 0xD6BBFBC2, 0xD6BCFBC2, 0xD6BDFBC2, 0xD6BEFBC2, 0xD6BFFBC2, 0xD6C0FBC2, 0xD6C1FBC2, 0xD6C2FBC2, 0xD6C3FBC2, 0xD6C4FBC2, 0xD6C5FBC2, + 0xD6C6FBC2, 0xD6C7FBC2, 0xD6C8FBC2, 0xD6C9FBC2, 0xD6CAFBC2, 0xD6CBFBC2, 0xD6CCFBC2, 0xD6CDFBC2, 0xD6CEFBC2, 0xD6CFFBC2, 0xD6D0FBC2, 0xD6D1FBC2, 0xD6D2FBC2, 0xD6D3FBC2, 0xD6D4FBC2, + 0xD6D5FBC2, 0xD6D6FBC2, 0xD6D7FBC2, 0xD6D8FBC2, 0xD6D9FBC2, 0xD6DAFBC2, 0xD6DBFBC2, 0xD6DCFBC2, 0xD6DDFBC2, 0xD6DEFBC2, 0xD6DFFBC2, 0xD6E0FBC2, 0xD6E1FBC2, 0xD6E2FBC2, 0xD6E3FBC2, + 0xD6E4FBC2, 0xD6E5FBC2, 0xD6E6FBC2, 0xD6E7FBC2, 0xD6E8FBC2, 0xD6E9FBC2, 0xD6EAFBC2, 0xD6EBFBC2, 0xD6ECFBC2, 0xD6EDFBC2, 0xD6EEFBC2, 0xD6EFFBC2, 0xD6F0FBC2, 0xD6F1FBC2, 0xD6F2FBC2, + 0xD6F3FBC2, 0xD6F4FBC2, 0xD6F5FBC2, 0xD6F6FBC2, 0xD6F7FBC2, 0xD6F8FBC2, 0xD6F9FBC2, 0xD6FAFBC2, 0xD6FBFBC2, 0xD6FCFBC2, 0xD6FDFBC2, 0xD6FEFBC2, 0xD6FFFBC2, 0xD700FBC2, 0xD701FBC2, + 0xD702FBC2, 0xD703FBC2, 0xD704FBC2, 0xD705FBC2, 0xD706FBC2, 0xD707FBC2, 0xD708FBC2, 0xD709FBC2, 0xD70AFBC2, 0xD70BFBC2, 0xD70CFBC2, 0xD70DFBC2, 0xD70EFBC2, 0xD70FFBC2, 0xD710FBC2, + 0xD711FBC2, 0xD712FBC2, 0xD713FBC2, 0xD714FBC2, 0xD715FBC2, 0xD716FBC2, 0xD717FBC2, 0xD718FBC2, 0xD719FBC2, 0xD71AFBC2, 0xD71BFBC2, 0xD71CFBC2, 0xD71DFBC2, 0xD71EFBC2, 0xD71FFBC2, + 0xD720FBC2, 0xD721FBC2, 0xD722FBC2, 0xD723FBC2, 0xD724FBC2, 0xD725FBC2, 0xD726FBC2, 0xD727FBC2, 0xD728FBC2, 0xD729FBC2, 0xD72AFBC2, 0xD72BFBC2, 0xD72CFBC2, 0xD72DFBC2, 0xD72EFBC2, + 0xD72FFBC2, 0xD730FBC2, 0xD731FBC2, 0xD732FBC2, 0xD733FBC2, 0xD734FBC2, 0xD735FBC2, 0xD736FBC2, 0xD737FBC2, 0xD738FBC2, 0xD739FBC2, 0xD73AFBC2, 0xD73BFBC2, 0xD73CFBC2, 0xD73DFBC2, + 0xD73EFBC2, 0xD73FFBC2, 0xD740FBC2, 0xD741FBC2, 0xD742FBC2, 0xD743FBC2, 0xD744FBC2, 0xD745FBC2, 0xD746FBC2, 0xD747FBC2, 0xD748FBC2, 0xD749FBC2, 0xD74AFBC2, 0xD74BFBC2, 0xD74CFBC2, + 0xD74DFBC2, 0xD74EFBC2, 0xD74FFBC2, 0xD750FBC2, 0xD751FBC2, 0xD752FBC2, 0xD753FBC2, 0xD754FBC2, 0xD755FBC2, 0xD756FBC2, 0xD757FBC2, 0xD758FBC2, 0xD759FBC2, 0xD75AFBC2, 0xD75BFBC2, + 0xD75CFBC2, 0xD75DFBC2, 0xD75EFBC2, 0xD75FFBC2, 0xD760FBC2, 0xD761FBC2, 0xD762FBC2, 0xD763FBC2, 0xD764FBC2, 0xD765FBC2, 0xD766FBC2, 0xD767FBC2, 0xD768FBC2, 0xD769FBC2, 0xD76AFBC2, + 0xD76BFBC2, 0xD76CFBC2, 0xD76DFBC2, 0xD76EFBC2, 0xD76FFBC2, 0xD770FBC2, 0xD771FBC2, 0xD772FBC2, 0xD773FBC2, 0xD774FBC2, 0xD775FBC2, 0xD776FBC2, 0xD777FBC2, 0xD778FBC2, 0xD779FBC2, + 0xD77AFBC2, 0xD77BFBC2, 0xD77CFBC2, 0xD77DFBC2, 0xD77EFBC2, 0xD77FFBC2, 0xD780FBC2, 0xD781FBC2, 0xD782FBC2, 0xD783FBC2, 0xD784FBC2, 0xD785FBC2, 0xD786FBC2, 0xD787FBC2, 0xD788FBC2, + 0xD789FBC2, 0xD78AFBC2, 0xD78BFBC2, 0xD78CFBC2, 0xD78DFBC2, 0xD78EFBC2, 0xD78FFBC2, 0xD790FBC2, 0xD791FBC2, 0xD792FBC2, 0xD793FBC2, 0xD794FBC2, 0xD795FBC2, 0xD796FBC2, 0xD797FBC2, + 0xD798FBC2, 0xD799FBC2, 0xD79AFBC2, 0xD79BFBC2, 0xD79CFBC2, 0xD79DFBC2, 0xD79EFBC2, 0xD79FFBC2, 0xD7A0FBC2, 0xD7A1FBC2, 0xD7A2FBC2, 0xD7A3FBC2, 0xD7A4FBC2, 0xD7A5FBC2, 0xD7A6FBC2, + 0xD7A7FBC2, 0xD7A8FBC2, 0xD7A9FBC2, 0xD7AAFBC2, 0xD7ABFBC2, 0xD7ACFBC2, 0xD7ADFBC2, 0xD7AEFBC2, 0xD7AFFBC2, 0xD7B0FBC2, 0xD7B1FBC2, 0xD7B2FBC2, 0xD7B3FBC2, 0xD7B4FBC2, 0xD7B5FBC2, + 0xD7B6FBC2, 0xD7B7FBC2, 0xD7B8FBC2, 0xD7B9FBC2, 0xD7BAFBC2, 0xD7BBFBC2, 0xD7BCFBC2, 0xD7BDFBC2, 0xD7BEFBC2, 0xD7BFFBC2, 0xD7C0FBC2, 0xD7C1FBC2, 0xD7C2FBC2, 0xD7C3FBC2, 0xD7C4FBC2, + 0xD7C5FBC2, 0xD7C6FBC2, 0xD7C7FBC2, 0xD7C8FBC2, 0xD7C9FBC2, 0xD7CAFBC2, 0xD7CBFBC2, 0xD7CCFBC2, 0xD7CDFBC2, 0xD7CEFBC2, 0xD7CFFBC2, 0xD7D0FBC2, 0xD7D1FBC2, 0xD7D2FBC2, 0xD7D3FBC2, + 0xD7D4FBC2, 0xD7D5FBC2, 0xD7D6FBC2, 0xD7D7FBC2, 0xD7D8FBC2, 0xD7D9FBC2, 0xD7DAFBC2, 0xD7DBFBC2, 0xD7DCFBC2, 0xD7DDFBC2, 0xD7DEFBC2, 0xD7DFFBC2, 0xD7E0FBC2, 0xD7E1FBC2, 0xD7E2FBC2, + 0xD7E3FBC2, 0xD7E4FBC2, 0xD7E5FBC2, 0xD7E6FBC2, 0xD7E7FBC2, 0xD7E8FBC2, 0xD7E9FBC2, 0xD7EAFBC2, 0xD7EBFBC2, 0xD7ECFBC2, 0xD7EDFBC2, 0xD7EEFBC2, 0xD7EFFBC2, 0xD7F0FBC2, 0xD7F1FBC2, + 0xD7F2FBC2, 0xD7F3FBC2, 0xD7F4FBC2, 0xD7F5FBC2, 0xD7F6FBC2, 0xD7F7FBC2, 0xD7F8FBC2, 0xD7F9FBC2, 0xD7FAFBC2, 0xD7FBFBC2, 0xD7FCFBC2, 0xD7FDFBC2, 0xD7FEFBC2, 0xD7FFFBC2, 0xD800FBC2, + 0xD801FBC2, 0xD802FBC2, 0xD803FBC2, 0xD804FBC2, 0xD805FBC2, 0xD806FBC2, 0xD807FBC2, 0xD808FBC2, 0xD809FBC2, 0xD80AFBC2, 0xD80BFBC2, 0xD80CFBC2, 0xD80DFBC2, 0xD80EFBC2, 0xD80FFBC2, + 0xD810FBC2, 0xD811FBC2, 0xD812FBC2, 0xD813FBC2, 0xD814FBC2, 0xD815FBC2, 0xD816FBC2, 0xD817FBC2, 0xD818FBC2, 0xD819FBC2, 0xD81AFBC2, 0xD81BFBC2, 0xD81CFBC2, 0xD81DFBC2, 0xD81EFBC2, + 0xD81FFBC2, 0xD820FBC2, 0xD821FBC2, 0xD822FBC2, 0xD823FBC2, 0xD824FBC2, 0xD825FBC2, 0xD826FBC2, 0xD827FBC2, 0xD828FBC2, 0xD829FBC2, 0xD82AFBC2, 0xD82BFBC2, 0xD82CFBC2, 0xD82DFBC2, + 0xD82EFBC2, 0xD82FFBC2, 0xD830FBC2, 0xD831FBC2, 0xD832FBC2, 0xD833FBC2, 0xD834FBC2, 0xD835FBC2, 0xD836FBC2, 0xD837FBC2, 0xD838FBC2, 0xD839FBC2, 0xD83AFBC2, 0xD83BFBC2, 0xD83CFBC2, + 0xD83DFBC2, 0xD83EFBC2, 0xD83FFBC2, 0xD840FBC2, 0xD841FBC2, 0xD842FBC2, 0xD843FBC2, 0xD844FBC2, 0xD845FBC2, 0xD846FBC2, 0xD847FBC2, 0xD848FBC2, 0xD849FBC2, 0xD84AFBC2, 0xD84BFBC2, + 0xD84CFBC2, 0xD84DFBC2, 0xD84EFBC2, 0xD84FFBC2, 0xD850FBC2, 0xD851FBC2, 0xD852FBC2, 0xD853FBC2, 0xD854FBC2, 0xD855FBC2, 0xD856FBC2, 0xD857FBC2, 0xD858FBC2, 0xD859FBC2, 0xD85AFBC2, + 0xD85BFBC2, 0xD85CFBC2, 0xD85DFBC2, 0xD85EFBC2, 0xD85FFBC2, 0xD860FBC2, 0xD861FBC2, 0xD862FBC2, 0xD863FBC2, 0xD864FBC2, 0xD865FBC2, 0xD866FBC2, 0xD867FBC2, 0xD868FBC2, 0xD869FBC2, + 0xD86AFBC2, 0xD86BFBC2, 0xD86CFBC2, 0xD86DFBC2, 0xD86EFBC2, 0xD86FFBC2, 0xD870FBC2, 0xD871FBC2, 0xD872FBC2, 0xD873FBC2, 0xD874FBC2, 0xD875FBC2, 0xD876FBC2, 0xD877FBC2, 0xD878FBC2, + 0xD879FBC2, 0xD87AFBC2, 0xD87BFBC2, 0xD87CFBC2, 0xD87DFBC2, 0xD87EFBC2, 0xD87FFBC2, 0xD880FBC2, 0xD881FBC2, 0xD882FBC2, 0xD883FBC2, 0xD884FBC2, 0xD885FBC2, 0xD886FBC2, 0xD887FBC2, + 0xD888FBC2, 0xD889FBC2, 0xD88AFBC2, 0xD88BFBC2, 0xD88CFBC2, 0xD88DFBC2, 0xD88EFBC2, 0xD88FFBC2, 0xD890FBC2, 0xD891FBC2, 0xD892FBC2, 0xD893FBC2, 0xD894FBC2, 0xD895FBC2, 0xD896FBC2, + 0xD897FBC2, 0xD898FBC2, 0xD899FBC2, 0xD89AFBC2, 0xD89BFBC2, 0xD89CFBC2, 0xD89DFBC2, 0xD89EFBC2, 0xD89FFBC2, 0xD8A0FBC2, 0xD8A1FBC2, 0xD8A2FBC2, 0xD8A3FBC2, 0xD8A4FBC2, 0xD8A5FBC2, + 0xD8A6FBC2, 0xD8A7FBC2, 0xD8A8FBC2, 0xD8A9FBC2, 0xD8AAFBC2, 0xD8ABFBC2, 0xD8ACFBC2, 0xD8ADFBC2, 0xD8AEFBC2, 0xD8AFFBC2, 0xD8B0FBC2, 0xD8B1FBC2, 0xD8B2FBC2, 0xD8B3FBC2, 0xD8B4FBC2, + 0xD8B5FBC2, 0xD8B6FBC2, 0xD8B7FBC2, 0xD8B8FBC2, 0xD8B9FBC2, 0xD8BAFBC2, 0xD8BBFBC2, 0xD8BCFBC2, 0xD8BDFBC2, 0xD8BEFBC2, 0xD8BFFBC2, 0xD8C0FBC2, 0xD8C1FBC2, 0xD8C2FBC2, 0xD8C3FBC2, + 0xD8C4FBC2, 0xD8C5FBC2, 0xD8C6FBC2, 0xD8C7FBC2, 0xD8C8FBC2, 0xD8C9FBC2, 0xD8CAFBC2, 0xD8CBFBC2, 0xD8CCFBC2, 0xD8CDFBC2, 0xD8CEFBC2, 0xD8CFFBC2, 0xD8D0FBC2, 0xD8D1FBC2, 0xD8D2FBC2, + 0xD8D3FBC2, 0xD8D4FBC2, 0xD8D5FBC2, 0xD8D6FBC2, 0xD8D7FBC2, 0xD8D8FBC2, 0xD8D9FBC2, 0xD8DAFBC2, 0xD8DBFBC2, 0xD8DCFBC2, 0xD8DDFBC2, 0xD8DEFBC2, 0xD8DFFBC2, 0xD8E0FBC2, 0xD8E1FBC2, + 0xD8E2FBC2, 0xD8E3FBC2, 0xD8E4FBC2, 0xD8E5FBC2, 0xD8E6FBC2, 0xD8E7FBC2, 0xD8E8FBC2, 0xD8E9FBC2, 0xD8EAFBC2, 0xD8EBFBC2, 0xD8ECFBC2, 0xD8EDFBC2, 0xD8EEFBC2, 0xD8EFFBC2, 0xD8F0FBC2, + 0xD8F1FBC2, 0xD8F2FBC2, 0xD8F3FBC2, 0xD8F4FBC2, 0xD8F5FBC2, 0xD8F6FBC2, 0xD8F7FBC2, 0xD8F8FBC2, 0xD8F9FBC2, 0xD8FAFBC2, 0xD8FBFBC2, 0xD8FCFBC2, 0xD8FDFBC2, 0xD8FEFBC2, 0xD8FFFBC2, + 0xD900FBC2, 0xD901FBC2, 0xD902FBC2, 0xD903FBC2, 0xD904FBC2, 0xD905FBC2, 0xD906FBC2, 0xD907FBC2, 0xD908FBC2, 0xD909FBC2, 0xD90AFBC2, 0xD90BFBC2, 0xD90CFBC2, 0xD90DFBC2, 0xD90EFBC2, + 0xD90FFBC2, 0xD910FBC2, 0xD911FBC2, 0xD912FBC2, 0xD913FBC2, 0xD914FBC2, 0xD915FBC2, 0xD916FBC2, 0xD917FBC2, 0xD918FBC2, 0xD919FBC2, 0xD91AFBC2, 0xD91BFBC2, 0xD91CFBC2, 0xD91DFBC2, + 0xD91EFBC2, 0xD91FFBC2, 0xD920FBC2, 0xD921FBC2, 0xD922FBC2, 0xD923FBC2, 0xD924FBC2, 0xD925FBC2, 0xD926FBC2, 0xD927FBC2, 0xD928FBC2, 0xD929FBC2, 0xD92AFBC2, 0xD92BFBC2, 0xD92CFBC2, + 0xD92DFBC2, 0xD92EFBC2, 0xD92FFBC2, 0xD930FBC2, 0xD931FBC2, 0xD932FBC2, 0xD933FBC2, 0xD934FBC2, 0xD935FBC2, 0xD936FBC2, 0xD937FBC2, 0xD938FBC2, 0xD939FBC2, 0xD93AFBC2, 0xD93BFBC2, + 0xD93CFBC2, 0xD93DFBC2, 0xD93EFBC2, 0xD93FFBC2, 0xD940FBC2, 0xD941FBC2, 0xD942FBC2, 0xD943FBC2, 0xD944FBC2, 0xD945FBC2, 0xD946FBC2, 0xD947FBC2, 0xD948FBC2, 0xD949FBC2, 0xD94AFBC2, + 0xD94BFBC2, 0xD94CFBC2, 0xD94DFBC2, 0xD94EFBC2, 0xD94FFBC2, 0xD950FBC2, 0xD951FBC2, 0xD952FBC2, 0xD953FBC2, 0xD954FBC2, 0xD955FBC2, 0xD956FBC2, 0xD957FBC2, 0xD958FBC2, 0xD959FBC2, + 0xD95AFBC2, 0xD95BFBC2, 0xD95CFBC2, 0xD95DFBC2, 0xD95EFBC2, 0xD95FFBC2, 0xD960FBC2, 0xD961FBC2, 0xD962FBC2, 0xD963FBC2, 0xD964FBC2, 0xD965FBC2, 0xD966FBC2, 0xD967FBC2, 0xD968FBC2, + 0xD969FBC2, 0xD96AFBC2, 0xD96BFBC2, 0xD96CFBC2, 0xD96DFBC2, 0xD96EFBC2, 0xD96FFBC2, 0xD970FBC2, 0xD971FBC2, 0xD972FBC2, 0xD973FBC2, 0xD974FBC2, 0xD975FBC2, 0xD976FBC2, 0xD977FBC2, + 0xD978FBC2, 0xD979FBC2, 0xD97AFBC2, 0xD97BFBC2, 0xD97CFBC2, 0xD97DFBC2, 0xD97EFBC2, 0xD97FFBC2, 0xD980FBC2, 0xD981FBC2, 0xD982FBC2, 0xD983FBC2, 0xD984FBC2, 0xD985FBC2, 0xD986FBC2, + 0xD987FBC2, 0xD988FBC2, 0xD989FBC2, 0xD98AFBC2, 0xD98BFBC2, 0xD98CFBC2, 0xD98DFBC2, 0xD98EFBC2, 0xD98FFBC2, 0xD990FBC2, 0xD991FBC2, 0xD992FBC2, 0xD993FBC2, 0xD994FBC2, 0xD995FBC2, + 0xD996FBC2, 0xD997FBC2, 0xD998FBC2, 0xD999FBC2, 0xD99AFBC2, 0xD99BFBC2, 0xD99CFBC2, 0xD99DFBC2, 0xD99EFBC2, 0xD99FFBC2, 0xD9A0FBC2, 0xD9A1FBC2, 0xD9A2FBC2, 0xD9A3FBC2, 0xD9A4FBC2, + 0xD9A5FBC2, 0xD9A6FBC2, 0xD9A7FBC2, 0xD9A8FBC2, 0xD9A9FBC2, 0xD9AAFBC2, 0xD9ABFBC2, 0xD9ACFBC2, 0xD9ADFBC2, 0xD9AEFBC2, 0xD9AFFBC2, 0xD9B0FBC2, 0xD9B1FBC2, 0xD9B2FBC2, 0xD9B3FBC2, + 0xD9B4FBC2, 0xD9B5FBC2, 0xD9B6FBC2, 0xD9B7FBC2, 0xD9B8FBC2, 0xD9B9FBC2, 0xD9BAFBC2, 0xD9BBFBC2, 0xD9BCFBC2, 0xD9BDFBC2, 0xD9BEFBC2, 0xD9BFFBC2, 0xD9C0FBC2, 0xD9C1FBC2, 0xD9C2FBC2, + 0xD9C3FBC2, 0xD9C4FBC2, 0xD9C5FBC2, 0xD9C6FBC2, 0xD9C7FBC2, 0xD9C8FBC2, 0xD9C9FBC2, 0xD9CAFBC2, 0xD9CBFBC2, 0xD9CCFBC2, 0xD9CDFBC2, 0xD9CEFBC2, 0xD9CFFBC2, 0xD9D0FBC2, 0xD9D1FBC2, + 0xD9D2FBC2, 0xD9D3FBC2, 0xD9D4FBC2, 0xD9D5FBC2, 0xD9D6FBC2, 0xD9D7FBC2, 0xD9D8FBC2, 0xD9D9FBC2, 0xD9DAFBC2, 0xD9DBFBC2, 0xD9DCFBC2, 0xD9DDFBC2, 0xD9DEFBC2, 0xD9DFFBC2, 0xD9E0FBC2, + 0xD9E1FBC2, 0xD9E2FBC2, 0xD9E3FBC2, 0xD9E4FBC2, 0xD9E5FBC2, 0xD9E6FBC2, 0xD9E7FBC2, 0xD9E8FBC2, 0xD9E9FBC2, 0xD9EAFBC2, 0xD9EBFBC2, 0xD9ECFBC2, 0xD9EDFBC2, 0xD9EEFBC2, 0xD9EFFBC2, + 0xD9F0FBC2, 0xD9F1FBC2, 0xD9F2FBC2, 0xD9F3FBC2, 0xD9F4FBC2, 0xD9F5FBC2, 0xD9F6FBC2, 0xD9F7FBC2, 0xD9F8FBC2, 0xD9F9FBC2, 0xD9FAFBC2, 0xD9FBFBC2, 0xD9FCFBC2, 0xD9FDFBC2, 0xD9FEFBC2, + 0xD9FFFBC2, 0xDA00FBC2, 0xDA01FBC2, 0xDA02FBC2, 0xDA03FBC2, 0xDA04FBC2, 0xDA05FBC2, 0xDA06FBC2, 0xDA07FBC2, 0xDA08FBC2, 0xDA09FBC2, 0xDA0AFBC2, 0xDA0BFBC2, 0xDA0CFBC2, 0xDA0DFBC2, + 0xDA0EFBC2, 0xDA0FFBC2, 0xDA10FBC2, 0xDA11FBC2, 0xDA12FBC2, 0xDA13FBC2, 0xDA14FBC2, 0xDA15FBC2, 0xDA16FBC2, 0xDA17FBC2, 0xDA18FBC2, 0xDA19FBC2, 0xDA1AFBC2, 0xDA1BFBC2, 0xDA1CFBC2, + 0xDA1DFBC2, 0xDA1EFBC2, 0xDA1FFBC2, 0xDA20FBC2, 0xDA21FBC2, 0xDA22FBC2, 0xDA23FBC2, 0xDA24FBC2, 0xDA25FBC2, 0xDA26FBC2, 0xDA27FBC2, 0xDA28FBC2, 0xDA29FBC2, 0xDA2AFBC2, 0xDA2BFBC2, + 0xDA2CFBC2, 0xDA2DFBC2, 0xDA2EFBC2, 0xDA2FFBC2, 0xDA30FBC2, 0xDA31FBC2, 0xDA32FBC2, 0xDA33FBC2, 0xDA34FBC2, 0xDA35FBC2, 0xDA36FBC2, 0xDA37FBC2, 0xDA38FBC2, 0xDA39FBC2, 0xDA3AFBC2, + 0xDA3BFBC2, 0xDA3CFBC2, 0xDA3DFBC2, 0xDA3EFBC2, 0xDA3FFBC2, 0xDA40FBC2, 0xDA41FBC2, 0xDA42FBC2, 0xDA43FBC2, 0xDA44FBC2, 0xDA45FBC2, 0xDA46FBC2, 0xDA47FBC2, 0xDA48FBC2, 0xDA49FBC2, + 0xDA4AFBC2, 0xDA4BFBC2, 0xDA4CFBC2, 0xDA4DFBC2, 0xDA4EFBC2, 0xDA4FFBC2, 0xDA50FBC2, 0xDA51FBC2, 0xDA52FBC2, 0xDA53FBC2, 0xDA54FBC2, 0xDA55FBC2, 0xDA56FBC2, 0xDA57FBC2, 0xDA58FBC2, + 0xDA59FBC2, 0xDA5AFBC2, 0xDA5BFBC2, 0xDA5CFBC2, 0xDA5DFBC2, 0xDA5EFBC2, 0xDA5FFBC2, 0xDA60FBC2, 0xDA61FBC2, 0xDA62FBC2, 0xDA63FBC2, 0xDA64FBC2, 0xDA65FBC2, 0xDA66FBC2, 0xDA67FBC2, + 0xDA68FBC2, 0xDA69FBC2, 0xDA6AFBC2, 0xDA6BFBC2, 0xDA6CFBC2, 0xDA6DFBC2, 0xDA6EFBC2, 0xDA6FFBC2, 0xDA70FBC2, 0xDA71FBC2, 0xDA72FBC2, 0xDA73FBC2, 0xDA74FBC2, 0xDA75FBC2, 0xDA76FBC2, + 0xDA77FBC2, 0xDA78FBC2, 0xDA79FBC2, 0xDA7AFBC2, 0xDA7BFBC2, 0xDA7CFBC2, 0xDA7DFBC2, 0xDA7EFBC2, 0xDA7FFBC2, 0xDA80FBC2, 0xDA81FBC2, 0xDA82FBC2, 0xDA83FBC2, 0xDA84FBC2, 0xDA85FBC2, + 0xDA86FBC2, 0xDA87FBC2, 0xDA88FBC2, 0xDA89FBC2, 0xDA8AFBC2, 0xDA8BFBC2, 0xDA8CFBC2, 0xDA8DFBC2, 0xDA8EFBC2, 0xDA8FFBC2, 0xDA90FBC2, 0xDA91FBC2, 0xDA92FBC2, 0xDA93FBC2, 0xDA94FBC2, + 0xDA95FBC2, 0xDA96FBC2, 0xDA97FBC2, 0xDA98FBC2, 0xDA99FBC2, 0xDA9AFBC2, 0xDA9BFBC2, 0xDA9CFBC2, 0xDA9DFBC2, 0xDA9EFBC2, 0xDA9FFBC2, 0xDAA0FBC2, 0xDAA1FBC2, 0xDAA2FBC2, 0xDAA3FBC2, + 0xDAA4FBC2, 0xDAA5FBC2, 0xDAA6FBC2, 0xDAA7FBC2, 0xDAA8FBC2, 0xDAA9FBC2, 0xDAAAFBC2, 0xDAABFBC2, 0xDAACFBC2, 0xDAADFBC2, 0xDAAEFBC2, 0xDAAFFBC2, 0xDAB0FBC2, 0xDAB1FBC2, 0xDAB2FBC2, + 0xDAB3FBC2, 0xDAB4FBC2, 0xDAB5FBC2, 0xDAB6FBC2, 0xDAB7FBC2, 0xDAB8FBC2, 0xDAB9FBC2, 0xDABAFBC2, 0xDABBFBC2, 0xDABCFBC2, 0xDABDFBC2, 0xDABEFBC2, 0xDABFFBC2, 0xDAC0FBC2, 0xDAC1FBC2, + 0xDAC2FBC2, 0xDAC3FBC2, 0xDAC4FBC2, 0xDAC5FBC2, 0xDAC6FBC2, 0xDAC7FBC2, 0xDAC8FBC2, 0xDAC9FBC2, 0xDACAFBC2, 0xDACBFBC2, 0xDACCFBC2, 0xDACDFBC2, 0xDACEFBC2, 0xDACFFBC2, 0xDAD0FBC2, + 0xDAD1FBC2, 0xDAD2FBC2, 0xDAD3FBC2, 0xDAD4FBC2, 0xDAD5FBC2, 0xDAD6FBC2, 0xDAD7FBC2, 0xDAD8FBC2, 0xDAD9FBC2, 0xDADAFBC2, 0xDADBFBC2, 0xDADCFBC2, 0xDADDFBC2, 0xDADEFBC2, 0xDADFFBC2, + 0xDAE0FBC2, 0xDAE1FBC2, 0xDAE2FBC2, 0xDAE3FBC2, 0xDAE4FBC2, 0xDAE5FBC2, 0xDAE6FBC2, 0xDAE7FBC2, 0xDAE8FBC2, 0xDAE9FBC2, 0xDAEAFBC2, 0xDAEBFBC2, 0xDAECFBC2, 0xDAEDFBC2, 0xDAEEFBC2, + 0xDAEFFBC2, 0xDAF0FBC2, 0xDAF1FBC2, 0xDAF2FBC2, 0xDAF3FBC2, 0xDAF4FBC2, 0xDAF5FBC2, 0xDAF6FBC2, 0xDAF7FBC2, 0xDAF8FBC2, 0xDAF9FBC2, 0xDAFAFBC2, 0xDAFBFBC2, 0xDAFCFBC2, 0xDAFDFBC2, + 0xDAFEFBC2, 0xDAFFFBC2, 0xDB00FBC2, 0xDB01FBC2, 0xDB02FBC2, 0xDB03FBC2, 0xDB04FBC2, 0xDB05FBC2, 0xDB06FBC2, 0xDB07FBC2, 0xDB08FBC2, 0xDB09FBC2, 0xDB0AFBC2, 0xDB0BFBC2, 0xDB0CFBC2, + 0xDB0DFBC2, 0xDB0EFBC2, 0xDB0FFBC2, 0xDB10FBC2, 0xDB11FBC2, 0xDB12FBC2, 0xDB13FBC2, 0xDB14FBC2, 0xDB15FBC2, 0xDB16FBC2, 0xDB17FBC2, 0xDB18FBC2, 0xDB19FBC2, 0xDB1AFBC2, 0xDB1BFBC2, + 0xDB1CFBC2, 0xDB1DFBC2, 0xDB1EFBC2, 0xDB1FFBC2, 0xDB20FBC2, 0xDB21FBC2, 0xDB22FBC2, 0xDB23FBC2, 0xDB24FBC2, 0xDB25FBC2, 0xDB26FBC2, 0xDB27FBC2, 0xDB28FBC2, 0xDB29FBC2, 0xDB2AFBC2, + 0xDB2BFBC2, 0xDB2CFBC2, 0xDB2DFBC2, 0xDB2EFBC2, 0xDB2FFBC2, 0xDB30FBC2, 0xDB31FBC2, 0xDB32FBC2, 0xDB33FBC2, 0xDB34FBC2, 0xDB35FBC2, 0xDB36FBC2, 0xDB37FBC2, 0xDB38FBC2, 0xDB39FBC2, + 0xDB3AFBC2, 0xDB3BFBC2, 0xDB3CFBC2, 0xDB3DFBC2, 0xDB3EFBC2, 0xDB3FFBC2, 0xDB40FBC2, 0xDB41FBC2, 0xDB42FBC2, 0xDB43FBC2, 0xDB44FBC2, 0xDB45FBC2, 0xDB46FBC2, 0xDB47FBC2, 0xDB48FBC2, + 0xDB49FBC2, 0xDB4AFBC2, 0xDB4BFBC2, 0xDB4CFBC2, 0xDB4DFBC2, 0xDB4EFBC2, 0xDB4FFBC2, 0xDB50FBC2, 0xDB51FBC2, 0xDB52FBC2, 0xDB53FBC2, 0xDB54FBC2, 0xDB55FBC2, 0xDB56FBC2, 0xDB57FBC2, + 0xDB58FBC2, 0xDB59FBC2, 0xDB5AFBC2, 0xDB5BFBC2, 0xDB5CFBC2, 0xDB5DFBC2, 0xDB5EFBC2, 0xDB5FFBC2, 0xDB60FBC2, 0xDB61FBC2, 0xDB62FBC2, 0xDB63FBC2, 0xDB64FBC2, 0xDB65FBC2, 0xDB66FBC2, + 0xDB67FBC2, 0xDB68FBC2, 0xDB69FBC2, 0xDB6AFBC2, 0xDB6BFBC2, 0xDB6CFBC2, 0xDB6DFBC2, 0xDB6EFBC2, 0xDB6FFBC2, 0xDB70FBC2, 0xDB71FBC2, 0xDB72FBC2, 0xDB73FBC2, 0xDB74FBC2, 0xDB75FBC2, + 0xDB76FBC2, 0xDB77FBC2, 0xDB78FBC2, 0xDB79FBC2, 0xDB7AFBC2, 0xDB7BFBC2, 0xDB7CFBC2, 0xDB7DFBC2, 0xDB7EFBC2, 0xDB7FFBC2, 0xDB80FBC2, 0xDB81FBC2, 0xDB82FBC2, 0xDB83FBC2, 0xDB84FBC2, + 0xDB85FBC2, 0xDB86FBC2, 0xDB87FBC2, 0xDB88FBC2, 0xDB89FBC2, 0xDB8AFBC2, 0xDB8BFBC2, 0xDB8CFBC2, 0xDB8DFBC2, 0xDB8EFBC2, 0xDB8FFBC2, 0xDB90FBC2, 0xDB91FBC2, 0xDB92FBC2, 0xDB93FBC2, + 0xDB94FBC2, 0xDB95FBC2, 0xDB96FBC2, 0xDB97FBC2, 0xDB98FBC2, 0xDB99FBC2, 0xDB9AFBC2, 0xDB9BFBC2, 0xDB9CFBC2, 0xDB9DFBC2, 0xDB9EFBC2, 0xDB9FFBC2, 0xDBA0FBC2, 0xDBA1FBC2, 0xDBA2FBC2, + 0xDBA3FBC2, 0xDBA4FBC2, 0xDBA5FBC2, 0xDBA6FBC2, 0xDBA7FBC2, 0xDBA8FBC2, 0xDBA9FBC2, 0xDBAAFBC2, 0xDBABFBC2, 0xDBACFBC2, 0xDBADFBC2, 0xDBAEFBC2, 0xDBAFFBC2, 0xDBB0FBC2, 0xDBB1FBC2, + 0xDBB2FBC2, 0xDBB3FBC2, 0xDBB4FBC2, 0xDBB5FBC2, 0xDBB6FBC2, 0xDBB7FBC2, 0xDBB8FBC2, 0xDBB9FBC2, 0xDBBAFBC2, 0xDBBBFBC2, 0xDBBCFBC2, 0xDBBDFBC2, 0xDBBEFBC2, 0xDBBFFBC2, 0xDBC0FBC2, + 0xDBC1FBC2, 0xDBC2FBC2, 0xDBC3FBC2, 0xDBC4FBC2, 0xDBC5FBC2, 0xDBC6FBC2, 0xDBC7FBC2, 0xDBC8FBC2, 0xDBC9FBC2, 0xDBCAFBC2, 0xDBCBFBC2, 0xDBCCFBC2, 0xDBCDFBC2, 0xDBCEFBC2, 0xDBCFFBC2, + 0xDBD0FBC2, 0xDBD1FBC2, 0xDBD2FBC2, 0xDBD3FBC2, 0xDBD4FBC2, 0xDBD5FBC2, 0xDBD6FBC2, 0xDBD7FBC2, 0xDBD8FBC2, 0xDBD9FBC2, 0xDBDAFBC2, 0xDBDBFBC2, 0xDBDCFBC2, 0xDBDDFBC2, 0xDBDEFBC2, + 0xDBDFFBC2, 0xDBE0FBC2, 0xDBE1FBC2, 0xDBE2FBC2, 0xDBE3FBC2, 0xDBE4FBC2, 0xDBE5FBC2, 0xDBE6FBC2, 0xDBE7FBC2, 0xDBE8FBC2, 0xDBE9FBC2, 0xDBEAFBC2, 0xDBEBFBC2, 0xDBECFBC2, 0xDBEDFBC2, + 0xDBEEFBC2, 0xDBEFFBC2, 0xDBF0FBC2, 0xDBF1FBC2, 0xDBF2FBC2, 0xDBF3FBC2, 0xDBF4FBC2, 0xDBF5FBC2, 0xDBF6FBC2, 0xDBF7FBC2, 0xDBF8FBC2, 0xDBF9FBC2, 0xDBFAFBC2, 0xDBFBFBC2, 0xDBFCFBC2, + 0xDBFDFBC2, 0xDBFEFBC2, 0xDBFFFBC2, 0xDC00FBC2, 0xDC01FBC2, 0xDC02FBC2, 0xDC03FBC2, 0xDC04FBC2, 0xDC05FBC2, 0xDC06FBC2, 0xDC07FBC2, 0xDC08FBC2, 0xDC09FBC2, 0xDC0AFBC2, 0xDC0BFBC2, + 0xDC0CFBC2, 0xDC0DFBC2, 0xDC0EFBC2, 0xDC0FFBC2, 0xDC10FBC2, 0xDC11FBC2, 0xDC12FBC2, 0xDC13FBC2, 0xDC14FBC2, 0xDC15FBC2, 0xDC16FBC2, 0xDC17FBC2, 0xDC18FBC2, 0xDC19FBC2, 0xDC1AFBC2, + 0xDC1BFBC2, 0xDC1CFBC2, 0xDC1DFBC2, 0xDC1EFBC2, 0xDC1FFBC2, 0xDC20FBC2, 0xDC21FBC2, 0xDC22FBC2, 0xDC23FBC2, 0xDC24FBC2, 0xDC25FBC2, 0xDC26FBC2, 0xDC27FBC2, 0xDC28FBC2, 0xDC29FBC2, + 0xDC2AFBC2, 0xDC2BFBC2, 0xDC2CFBC2, 0xDC2DFBC2, 0xDC2EFBC2, 0xDC2FFBC2, 0xDC30FBC2, 0xDC31FBC2, 0xDC32FBC2, 0xDC33FBC2, 0xDC34FBC2, 0xDC35FBC2, 0xDC36FBC2, 0xDC37FBC2, 0xDC38FBC2, + 0xDC39FBC2, 0xDC3AFBC2, 0xDC3BFBC2, 0xDC3CFBC2, 0xDC3DFBC2, 0xDC3EFBC2, 0xDC3FFBC2, 0xDC40FBC2, 0xDC41FBC2, 0xDC42FBC2, 0xDC43FBC2, 0xDC44FBC2, 0xDC45FBC2, 0xDC46FBC2, 0xDC47FBC2, + 0xDC48FBC2, 0xDC49FBC2, 0xDC4AFBC2, 0xDC4BFBC2, 0xDC4CFBC2, 0xDC4DFBC2, 0xDC4EFBC2, 0xDC4FFBC2, 0xDC50FBC2, 0xDC51FBC2, 0xDC52FBC2, 0xDC53FBC2, 0xDC54FBC2, 0xDC55FBC2, 0xDC56FBC2, + 0xDC57FBC2, 0xDC58FBC2, 0xDC59FBC2, 0xDC5AFBC2, 0xDC5BFBC2, 0xDC5CFBC2, 0xDC5DFBC2, 0xDC5EFBC2, 0xDC5FFBC2, 0xDC60FBC2, 0xDC61FBC2, 0xDC62FBC2, 0xDC63FBC2, 0xDC64FBC2, 0xDC65FBC2, + 0xDC66FBC2, 0xDC67FBC2, 0xDC68FBC2, 0xDC69FBC2, 0xDC6AFBC2, 0xDC6BFBC2, 0xDC6CFBC2, 0xDC6DFBC2, 0xDC6EFBC2, 0xDC6FFBC2, 0xDC70FBC2, 0xDC71FBC2, 0xDC72FBC2, 0xDC73FBC2, 0xDC74FBC2, + 0xDC75FBC2, 0xDC76FBC2, 0xDC77FBC2, 0xDC78FBC2, 0xDC79FBC2, 0xDC7AFBC2, 0xDC7BFBC2, 0xDC7CFBC2, 0xDC7DFBC2, 0xDC7EFBC2, 0xDC7FFBC2, 0xDC80FBC2, 0xDC81FBC2, 0xDC82FBC2, 0xDC83FBC2, + 0xDC84FBC2, 0xDC85FBC2, 0xDC86FBC2, 0xDC87FBC2, 0xDC88FBC2, 0xDC89FBC2, 0xDC8AFBC2, 0xDC8BFBC2, 0xDC8CFBC2, 0xDC8DFBC2, 0xDC8EFBC2, 0xDC8FFBC2, 0xDC90FBC2, 0xDC91FBC2, 0xDC92FBC2, + 0xDC93FBC2, 0xDC94FBC2, 0xDC95FBC2, 0xDC96FBC2, 0xDC97FBC2, 0xDC98FBC2, 0xDC99FBC2, 0xDC9AFBC2, 0xDC9BFBC2, 0xDC9CFBC2, 0xDC9DFBC2, 0xDC9EFBC2, 0xDC9FFBC2, 0xDCA0FBC2, 0xDCA1FBC2, + 0xDCA2FBC2, 0xDCA3FBC2, 0xDCA4FBC2, 0xDCA5FBC2, 0xDCA6FBC2, 0xDCA7FBC2, 0xDCA8FBC2, 0xDCA9FBC2, 0xDCAAFBC2, 0xDCABFBC2, 0xDCACFBC2, 0xDCADFBC2, 0xDCAEFBC2, 0xDCAFFBC2, 0xDCB0FBC2, + 0xDCB1FBC2, 0xDCB2FBC2, 0xDCB3FBC2, 0xDCB4FBC2, 0xDCB5FBC2, 0xDCB6FBC2, 0xDCB7FBC2, 0xDCB8FBC2, 0xDCB9FBC2, 0xDCBAFBC2, 0xDCBBFBC2, 0xDCBCFBC2, 0xDCBDFBC2, 0xDCBEFBC2, 0xDCBFFBC2, + 0xDCC0FBC2, 0xDCC1FBC2, 0xDCC2FBC2, 0xDCC3FBC2, 0xDCC4FBC2, 0xDCC5FBC2, 0xDCC6FBC2, 0xDCC7FBC2, 0xDCC8FBC2, 0xDCC9FBC2, 0xDCCAFBC2, 0xDCCBFBC2, 0xDCCCFBC2, 0xDCCDFBC2, 0xDCCEFBC2, + 0xDCCFFBC2, 0xDCD0FBC2, 0xDCD1FBC2, 0xDCD2FBC2, 0xDCD3FBC2, 0xDCD4FBC2, 0xDCD5FBC2, 0xDCD6FBC2, 0xDCD7FBC2, 0xDCD8FBC2, 0xDCD9FBC2, 0xDCDAFBC2, 0xDCDBFBC2, 0xDCDCFBC2, 0xDCDDFBC2, + 0xDCDEFBC2, 0xDCDFFBC2, 0xDCE0FBC2, 0xDCE1FBC2, 0xDCE2FBC2, 0xDCE3FBC2, 0xDCE4FBC2, 0xDCE5FBC2, 0xDCE6FBC2, 0xDCE7FBC2, 0xDCE8FBC2, 0xDCE9FBC2, 0xDCEAFBC2, 0xDCEBFBC2, 0xDCECFBC2, + 0xDCEDFBC2, 0xDCEEFBC2, 0xDCEFFBC2, 0xDCF0FBC2, 0xDCF1FBC2, 0xDCF2FBC2, 0xDCF3FBC2, 0xDCF4FBC2, 0xDCF5FBC2, 0xDCF6FBC2, 0xDCF7FBC2, 0xDCF8FBC2, 0xDCF9FBC2, 0xDCFAFBC2, 0xDCFBFBC2, + 0xDCFCFBC2, 0xDCFDFBC2, 0xDCFEFBC2, 0xDCFFFBC2, 0xDD00FBC2, 0xDD01FBC2, 0xDD02FBC2, 0xDD03FBC2, 0xDD04FBC2, 0xDD05FBC2, 0xDD06FBC2, 0xDD07FBC2, 0xDD08FBC2, 0xDD09FBC2, 0xDD0AFBC2, + 0xDD0BFBC2, 0xDD0CFBC2, 0xDD0DFBC2, 0xDD0EFBC2, 0xDD0FFBC2, 0xDD10FBC2, 0xDD11FBC2, 0xDD12FBC2, 0xDD13FBC2, 0xDD14FBC2, 0xDD15FBC2, 0xDD16FBC2, 0xDD17FBC2, 0xDD18FBC2, 0xDD19FBC2, + 0xDD1AFBC2, 0xDD1BFBC2, 0xDD1CFBC2, 0xDD1DFBC2, 0xDD1EFBC2, 0xDD1FFBC2, 0xDD20FBC2, 0xDD21FBC2, 0xDD22FBC2, 0xDD23FBC2, 0xDD24FBC2, 0xDD25FBC2, 0xDD26FBC2, 0xDD27FBC2, 0xDD28FBC2, + 0xDD29FBC2, 0xDD2AFBC2, 0xDD2BFBC2, 0xDD2CFBC2, 0xDD2DFBC2, 0xDD2EFBC2, 0xDD2FFBC2, 0xDD30FBC2, 0xDD31FBC2, 0xDD32FBC2, 0xDD33FBC2, 0xDD34FBC2, 0xDD35FBC2, 0xDD36FBC2, 0xDD37FBC2, + 0xDD38FBC2, 0xDD39FBC2, 0xDD3AFBC2, 0xDD3BFBC2, 0xDD3CFBC2, 0xDD3DFBC2, 0xDD3EFBC2, 0xDD3FFBC2, 0xDD40FBC2, 0xDD41FBC2, 0xDD42FBC2, 0xDD43FBC2, 0xDD44FBC2, 0xDD45FBC2, 0xDD46FBC2, + 0xDD47FBC2, 0xDD48FBC2, 0xDD49FBC2, 0xDD4AFBC2, 0xDD4BFBC2, 0xDD4CFBC2, 0xDD4DFBC2, 0xDD4EFBC2, 0xDD4FFBC2, 0xDD50FBC2, 0xDD51FBC2, 0xDD52FBC2, 0xDD53FBC2, 0xDD54FBC2, 0xDD55FBC2, + 0xDD56FBC2, 0xDD57FBC2, 0xDD58FBC2, 0xDD59FBC2, 0xDD5AFBC2, 0xDD5BFBC2, 0xDD5CFBC2, 0xDD5DFBC2, 0xDD5EFBC2, 0xDD5FFBC2, 0xDD60FBC2, 0xDD61FBC2, 0xDD62FBC2, 0xDD63FBC2, 0xDD64FBC2, + 0xDD65FBC2, 0xDD66FBC2, 0xDD67FBC2, 0xDD68FBC2, 0xDD69FBC2, 0xDD6AFBC2, 0xDD6BFBC2, 0xDD6CFBC2, 0xDD6DFBC2, 0xDD6EFBC2, 0xDD6FFBC2, 0xDD70FBC2, 0xDD71FBC2, 0xDD72FBC2, 0xDD73FBC2, + 0xDD74FBC2, 0xDD75FBC2, 0xDD76FBC2, 0xDD77FBC2, 0xDD78FBC2, 0xDD79FBC2, 0xDD7AFBC2, 0xDD7BFBC2, 0xDD7CFBC2, 0xDD7DFBC2, 0xDD7EFBC2, 0xDD7FFBC2, 0xDD80FBC2, 0xDD81FBC2, 0xDD82FBC2, + 0xDD83FBC2, 0xDD84FBC2, 0xDD85FBC2, 0xDD86FBC2, 0xDD87FBC2, 0xDD88FBC2, 0xDD89FBC2, 0xDD8AFBC2, 0xDD8BFBC2, 0xDD8CFBC2, 0xDD8DFBC2, 0xDD8EFBC2, 0xDD8FFBC2, 0xDD90FBC2, 0xDD91FBC2, + 0xDD92FBC2, 0xDD93FBC2, 0xDD94FBC2, 0xDD95FBC2, 0xDD96FBC2, 0xDD97FBC2, 0xDD98FBC2, 0xDD99FBC2, 0xDD9AFBC2, 0xDD9BFBC2, 0xDD9CFBC2, 0xDD9DFBC2, 0xDD9EFBC2, 0xDD9FFBC2, 0xDDA0FBC2, + 0xDDA1FBC2, 0xDDA2FBC2, 0xDDA3FBC2, 0xDDA4FBC2, 0xDDA5FBC2, 0xDDA6FBC2, 0xDDA7FBC2, 0xDDA8FBC2, 0xDDA9FBC2, 0xDDAAFBC2, 0xDDABFBC2, 0xDDACFBC2, 0xDDADFBC2, 0xDDAEFBC2, 0xDDAFFBC2, + 0xDDB0FBC2, 0xDDB1FBC2, 0xDDB2FBC2, 0xDDB3FBC2, 0xDDB4FBC2, 0xDDB5FBC2, 0xDDB6FBC2, 0xDDB7FBC2, 0xDDB8FBC2, 0xDDB9FBC2, 0xDDBAFBC2, 0xDDBBFBC2, 0xDDBCFBC2, 0xDDBDFBC2, 0xDDBEFBC2, + 0xDDBFFBC2, 0xDDC0FBC2, 0xDDC1FBC2, 0xDDC2FBC2, 0xDDC3FBC2, 0xDDC4FBC2, 0xDDC5FBC2, 0xDDC6FBC2, 0xDDC7FBC2, 0xDDC8FBC2, 0xDDC9FBC2, 0xDDCAFBC2, 0xDDCBFBC2, 0xDDCCFBC2, 0xDDCDFBC2, + 0xDDCEFBC2, 0xDDCFFBC2, 0xDDD0FBC2, 0xDDD1FBC2, 0xDDD2FBC2, 0xDDD3FBC2, 0xDDD4FBC2, 0xDDD5FBC2, 0xDDD6FBC2, 0xDDD7FBC2, 0xDDD8FBC2, 0xDDD9FBC2, 0xDDDAFBC2, 0xDDDBFBC2, 0xDDDCFBC2, + 0xDDDDFBC2, 0xDDDEFBC2, 0xDDDFFBC2, 0xDDE0FBC2, 0xDDE1FBC2, 0xDDE2FBC2, 0xDDE3FBC2, 0xDDE4FBC2, 0xDDE5FBC2, 0xDDE6FBC2, 0xDDE7FBC2, 0xDDE8FBC2, 0xDDE9FBC2, 0xDDEAFBC2, 0xDDEBFBC2, + 0xDDECFBC2, 0xDDEDFBC2, 0xDDEEFBC2, 0xDDEFFBC2, 0xDDF0FBC2, 0xDDF1FBC2, 0xDDF2FBC2, 0xDDF3FBC2, 0xDDF4FBC2, 0xDDF5FBC2, 0xDDF6FBC2, 0xDDF7FBC2, 0xDDF8FBC2, 0xDDF9FBC2, 0xDDFAFBC2, + 0xDDFBFBC2, 0xDDFCFBC2, 0xDDFDFBC2, 0xDDFEFBC2, 0xDDFFFBC2, 0xDE00FBC2, 0xDE01FBC2, 0xDE02FBC2, 0xDE03FBC2, 0xDE04FBC2, 0xDE05FBC2, 0xDE06FBC2, 0xDE07FBC2, 0xDE08FBC2, 0xDE09FBC2, + 0xDE0AFBC2, 0xDE0BFBC2, 0xDE0CFBC2, 0xDE0DFBC2, 0xDE0EFBC2, 0xDE0FFBC2, 0xDE10FBC2, 0xDE11FBC2, 0xDE12FBC2, 0xDE13FBC2, 0xDE14FBC2, 0xDE15FBC2, 0xDE16FBC2, 0xDE17FBC2, 0xDE18FBC2, + 0xDE19FBC2, 0xDE1AFBC2, 0xDE1BFBC2, 0xDE1CFBC2, 0xDE1DFBC2, 0xDE1EFBC2, 0xDE1FFBC2, 0xDE20FBC2, 0xDE21FBC2, 0xDE22FBC2, 0xDE23FBC2, 0xDE24FBC2, 0xDE25FBC2, 0xDE26FBC2, 0xDE27FBC2, + 0xDE28FBC2, 0xDE29FBC2, 0xDE2AFBC2, 0xDE2BFBC2, 0xDE2CFBC2, 0xDE2DFBC2, 0xDE2EFBC2, 0xDE2FFBC2, 0xDE30FBC2, 0xDE31FBC2, 0xDE32FBC2, 0xDE33FBC2, 0xDE34FBC2, 0xDE35FBC2, 0xDE36FBC2, + 0xDE37FBC2, 0xDE38FBC2, 0xDE39FBC2, 0xDE3AFBC2, 0xDE3BFBC2, 0xDE3CFBC2, 0xDE3DFBC2, 0xDE3EFBC2, 0xDE3FFBC2, 0xDE40FBC2, 0xDE41FBC2, 0xDE42FBC2, 0xDE43FBC2, 0xDE44FBC2, 0xDE45FBC2, + 0xDE46FBC2, 0xDE47FBC2, 0xDE48FBC2, 0xDE49FBC2, 0xDE4AFBC2, 0xDE4BFBC2, 0xDE4CFBC2, 0xDE4DFBC2, 0xDE4EFBC2, 0xDE4FFBC2, 0xDE50FBC2, 0xDE51FBC2, 0xDE52FBC2, 0xDE53FBC2, 0xDE54FBC2, + 0xDE55FBC2, 0xDE56FBC2, 0xDE57FBC2, 0xDE58FBC2, 0xDE59FBC2, 0xDE5AFBC2, 0xDE5BFBC2, 0xDE5CFBC2, 0xDE5DFBC2, 0xDE5EFBC2, 0xDE5FFBC2, 0xDE60FBC2, 0xDE61FBC2, 0xDE62FBC2, 0xDE63FBC2, + 0xDE64FBC2, 0xDE65FBC2, 0xDE66FBC2, 0xDE67FBC2, 0xDE68FBC2, 0xDE69FBC2, 0xDE6AFBC2, 0xDE6BFBC2, 0xDE6CFBC2, 0xDE6DFBC2, 0xDE6EFBC2, 0xDE6FFBC2, 0xDE70FBC2, 0xDE71FBC2, 0xDE72FBC2, + 0xDE73FBC2, 0xDE74FBC2, 0xDE75FBC2, 0xDE76FBC2, 0xDE77FBC2, 0xDE78FBC2, 0xDE79FBC2, 0xDE7AFBC2, 0xDE7BFBC2, 0xDE7CFBC2, 0xDE7DFBC2, 0xDE7EFBC2, 0xDE7FFBC2, 0xDE80FBC2, 0xDE81FBC2, + 0xDE82FBC2, 0xDE83FBC2, 0xDE84FBC2, 0xDE85FBC2, 0xDE86FBC2, 0xDE87FBC2, 0xDE88FBC2, 0xDE89FBC2, 0xDE8AFBC2, 0xDE8BFBC2, 0xDE8CFBC2, 0xDE8DFBC2, 0xDE8EFBC2, 0xDE8FFBC2, 0xDE90FBC2, + 0xDE91FBC2, 0xDE92FBC2, 0xDE93FBC2, 0xDE94FBC2, 0xDE95FBC2, 0xDE96FBC2, 0xDE97FBC2, 0xDE98FBC2, 0xDE99FBC2, 0xDE9AFBC2, 0xDE9BFBC2, 0xDE9CFBC2, 0xDE9DFBC2, 0xDE9EFBC2, 0xDE9FFBC2, + 0xDEA0FBC2, 0xDEA1FBC2, 0xDEA2FBC2, 0xDEA3FBC2, 0xDEA4FBC2, 0xDEA5FBC2, 0xDEA6FBC2, 0xDEA7FBC2, 0xDEA8FBC2, 0xDEA9FBC2, 0xDEAAFBC2, 0xDEABFBC2, 0xDEACFBC2, 0xDEADFBC2, 0xDEAEFBC2, + 0xDEAFFBC2, 0xDEB0FBC2, 0xDEB1FBC2, 0xDEB2FBC2, 0xDEB3FBC2, 0xDEB4FBC2, 0xDEB5FBC2, 0xDEB6FBC2, 0xDEB7FBC2, 0xDEB8FBC2, 0xDEB9FBC2, 0xDEBAFBC2, 0xDEBBFBC2, 0xDEBCFBC2, 0xDEBDFBC2, + 0xDEBEFBC2, 0xDEBFFBC2, 0xDEC0FBC2, 0xDEC1FBC2, 0xDEC2FBC2, 0xDEC3FBC2, 0xDEC4FBC2, 0xDEC5FBC2, 0xDEC6FBC2, 0xDEC7FBC2, 0xDEC8FBC2, 0xDEC9FBC2, 0xDECAFBC2, 0xDECBFBC2, 0xDECCFBC2, + 0xDECDFBC2, 0xDECEFBC2, 0xDECFFBC2, 0xDED0FBC2, 0xDED1FBC2, 0xDED2FBC2, 0xDED3FBC2, 0xDED4FBC2, 0xDED5FBC2, 0xDED6FBC2, 0xDED7FBC2, 0xDED8FBC2, 0xDED9FBC2, 0xDEDAFBC2, 0xDEDBFBC2, + 0xDEDCFBC2, 0xDEDDFBC2, 0xDEDEFBC2, 0xDEDFFBC2, 0xDEE0FBC2, 0xDEE1FBC2, 0xDEE2FBC2, 0xDEE3FBC2, 0xDEE4FBC2, 0xDEE5FBC2, 0xDEE6FBC2, 0xDEE7FBC2, 0xDEE8FBC2, 0xDEE9FBC2, 0xDEEAFBC2, + 0xDEEBFBC2, 0xDEECFBC2, 0xDEEDFBC2, 0xDEEEFBC2, 0xDEEFFBC2, 0xDEF0FBC2, 0xDEF1FBC2, 0xDEF2FBC2, 0xDEF3FBC2, 0xDEF4FBC2, 0xDEF5FBC2, 0xDEF6FBC2, 0xDEF7FBC2, 0xDEF8FBC2, 0xDEF9FBC2, + 0xDEFAFBC2, 0xDEFBFBC2, 0xDEFCFBC2, 0xDEFDFBC2, 0xDEFEFBC2, 0xDEFFFBC2, 0xDF00FBC2, 0xDF01FBC2, 0xDF02FBC2, 0xDF03FBC2, 0xDF04FBC2, 0xDF05FBC2, 0xDF06FBC2, 0xDF07FBC2, 0xDF08FBC2, + 0xDF09FBC2, 0xDF0AFBC2, 0xDF0BFBC2, 0xDF0CFBC2, 0xDF0DFBC2, 0xDF0EFBC2, 0xDF0FFBC2, 0xDF10FBC2, 0xDF11FBC2, 0xDF12FBC2, 0xDF13FBC2, 0xDF14FBC2, 0xDF15FBC2, 0xDF16FBC2, 0xDF17FBC2, + 0xDF18FBC2, 0xDF19FBC2, 0xDF1AFBC2, 0xDF1BFBC2, 0xDF1CFBC2, 0xDF1DFBC2, 0xDF1EFBC2, 0xDF1FFBC2, 0xDF20FBC2, 0xDF21FBC2, 0xDF22FBC2, 0xDF23FBC2, 0xDF24FBC2, 0xDF25FBC2, 0xDF26FBC2, + 0xDF27FBC2, 0xDF28FBC2, 0xDF29FBC2, 0xDF2AFBC2, 0xDF2BFBC2, 0xDF2CFBC2, 0xDF2DFBC2, 0xDF2EFBC2, 0xDF2FFBC2, 0xDF30FBC2, 0xDF31FBC2, 0xDF32FBC2, 0xDF33FBC2, 0xDF34FBC2, 0xDF35FBC2, + 0xDF36FBC2, 0xDF37FBC2, 0xDF38FBC2, 0xDF39FBC2, 0xDF3AFBC2, 0xDF3BFBC2, 0xDF3CFBC2, 0xDF3DFBC2, 0xDF3EFBC2, 0xDF3FFBC2, 0xDF40FBC2, 0xDF41FBC2, 0xDF42FBC2, 0xDF43FBC2, 0xDF44FBC2, + 0xDF45FBC2, 0xDF46FBC2, 0xDF47FBC2, 0xDF48FBC2, 0xDF49FBC2, 0xDF4AFBC2, 0xDF4BFBC2, 0xDF4CFBC2, 0xDF4DFBC2, 0xDF4EFBC2, 0xDF4FFBC2, 0xDF50FBC2, 0xDF51FBC2, 0xDF52FBC2, 0xDF53FBC2, + 0xDF54FBC2, 0xDF55FBC2, 0xDF56FBC2, 0xDF57FBC2, 0xDF58FBC2, 0xDF59FBC2, 0xDF5AFBC2, 0xDF5BFBC2, 0xDF5CFBC2, 0xDF5DFBC2, 0xDF5EFBC2, 0xDF5FFBC2, 0xDF60FBC2, 0xDF61FBC2, 0xDF62FBC2, + 0xDF63FBC2, 0xDF64FBC2, 0xDF65FBC2, 0xDF66FBC2, 0xDF67FBC2, 0xDF68FBC2, 0xDF69FBC2, 0xDF6AFBC2, 0xDF6BFBC2, 0xDF6CFBC2, 0xDF6DFBC2, 0xDF6EFBC2, 0xDF6FFBC2, 0xDF70FBC2, 0xDF71FBC2, + 0xDF72FBC2, 0xDF73FBC2, 0xDF74FBC2, 0xDF75FBC2, 0xDF76FBC2, 0xDF77FBC2, 0xDF78FBC2, 0xDF79FBC2, 0xDF7AFBC2, 0xDF7BFBC2, 0xDF7CFBC2, 0xDF7DFBC2, 0xDF7EFBC2, 0xDF7FFBC2, 0xDF80FBC2, + 0xDF81FBC2, 0xDF82FBC2, 0xDF83FBC2, 0xDF84FBC2, 0xDF85FBC2, 0xDF86FBC2, 0xDF87FBC2, 0xDF88FBC2, 0xDF89FBC2, 0xDF8AFBC2, 0xDF8BFBC2, 0xDF8CFBC2, 0xDF8DFBC2, 0xDF8EFBC2, 0xDF8FFBC2, + 0xDF90FBC2, 0xDF91FBC2, 0xDF92FBC2, 0xDF93FBC2, 0xDF94FBC2, 0xDF95FBC2, 0xDF96FBC2, 0xDF97FBC2, 0xDF98FBC2, 0xDF99FBC2, 0xDF9AFBC2, 0xDF9BFBC2, 0xDF9CFBC2, 0xDF9DFBC2, 0xDF9EFBC2, + 0xDF9FFBC2, 0xDFA0FBC2, 0xDFA1FBC2, 0xDFA2FBC2, 0xDFA3FBC2, 0xDFA4FBC2, 0xDFA5FBC2, 0xDFA6FBC2, 0xDFA7FBC2, 0xDFA8FBC2, 0xDFA9FBC2, 0xDFAAFBC2, 0xDFABFBC2, 0xDFACFBC2, 0xDFADFBC2, + 0xDFAEFBC2, 0xDFAFFBC2, 0xDFB0FBC2, 0xDFB1FBC2, 0xDFB2FBC2, 0xDFB3FBC2, 0xDFB4FBC2, 0xDFB5FBC2, 0xDFB6FBC2, 0xDFB7FBC2, 0xDFB8FBC2, 0xDFB9FBC2, 0xDFBAFBC2, 0xDFBBFBC2, 0xDFBCFBC2, + 0xDFBDFBC2, 0xDFBEFBC2, 0xDFBFFBC2, 0xDFC0FBC2, 0xDFC1FBC2, 0xDFC2FBC2, 0xDFC3FBC2, 0xDFC4FBC2, 0xDFC5FBC2, 0xDFC6FBC2, 0xDFC7FBC2, 0xDFC8FBC2, 0xDFC9FBC2, 0xDFCAFBC2, 0xDFCBFBC2, + 0xDFCCFBC2, 0xDFCDFBC2, 0xDFCEFBC2, 0xDFCFFBC2, 0xDFD0FBC2, 0xDFD1FBC2, 0xDFD2FBC2, 0xDFD3FBC2, 0xDFD4FBC2, 0xDFD5FBC2, 0xDFD6FBC2, 0xDFD7FBC2, 0xDFD8FBC2, 0xDFD9FBC2, 0xDFDAFBC2, + 0xDFDBFBC2, 0xDFDCFBC2, 0xDFDDFBC2, 0xDFDEFBC2, 0xDFDFFBC2, 0xDFE0FBC2, 0xDFE1FBC2, 0xDFE2FBC2, 0xDFE3FBC2, 0xDFE4FBC2, 0xDFE5FBC2, 0xDFE6FBC2, 0xDFE7FBC2, 0xDFE8FBC2, 0xDFE9FBC2, + 0xDFEAFBC2, 0xDFEBFBC2, 0xDFECFBC2, 0xDFEDFBC2, 0xDFEEFBC2, 0xDFEFFBC2, 0xDFF0FBC2, 0xDFF1FBC2, 0xDFF2FBC2, 0xDFF3FBC2, 0xDFF4FBC2, 0xDFF5FBC2, 0xDFF6FBC2, 0xDFF7FBC2, 0xDFF8FBC2, + 0xDFF9FBC2, 0xDFFAFBC2, 0xDFFBFBC2, 0xDFFCFBC2, 0xDFFDFBC2, 0xDFFEFBC2, 0xDFFFFBC2, 0xE000FBC2, 0xE001FBC2, 0xE002FBC2, 0xE003FBC2, 0xE004FBC2, 0xE005FBC2, 0xE006FBC2, 0xE007FBC2, + 0xE008FBC2, 0xE009FBC2, 0xE00AFBC2, 0xE00BFBC2, 0xE00CFBC2, 0xE00DFBC2, 0xE00EFBC2, 0xE00FFBC2, 0xE010FBC2, 0xE011FBC2, 0xE012FBC2, 0xE013FBC2, 0xE014FBC2, 0xE015FBC2, 0xE016FBC2, + 0xE017FBC2, 0xE018FBC2, 0xE019FBC2, 0xE01AFBC2, 0xE01BFBC2, 0xE01CFBC2, 0xE01DFBC2, 0xE01EFBC2, 0xE01FFBC2, 0xE020FBC2, 0xE021FBC2, 0xE022FBC2, 0xE023FBC2, 0xE024FBC2, 0xE025FBC2, + 0xE026FBC2, 0xE027FBC2, 0xE028FBC2, 0xE029FBC2, 0xE02AFBC2, 0xE02BFBC2, 0xE02CFBC2, 0xE02DFBC2, 0xE02EFBC2, 0xE02FFBC2, 0xE030FBC2, 0xE031FBC2, 0xE032FBC2, 0xE033FBC2, 0xE034FBC2, + 0xE035FBC2, 0xE036FBC2, 0xE037FBC2, 0xE038FBC2, 0xE039FBC2, 0xE03AFBC2, 0xE03BFBC2, 0xE03CFBC2, 0xE03DFBC2, 0xE03EFBC2, 0xE03FFBC2, 0xE040FBC2, 0xE041FBC2, 0xE042FBC2, 0xE043FBC2, + 0xE044FBC2, 0xE045FBC2, 0xE046FBC2, 0xE047FBC2, 0xE048FBC2, 0xE049FBC2, 0xE04AFBC2, 0xE04BFBC2, 0xE04CFBC2, 0xE04DFBC2, 0xE04EFBC2, 0xE04FFBC2, 0xE050FBC2, 0xE051FBC2, 0xE052FBC2, + 0xE053FBC2, 0xE054FBC2, 0xE055FBC2, 0xE056FBC2, 0xE057FBC2, 0xE058FBC2, 0xE059FBC2, 0xE05AFBC2, 0xE05BFBC2, 0xE05CFBC2, 0xE05DFBC2, 0xE05EFBC2, 0xE05FFBC2, 0xE060FBC2, 0xE061FBC2, + 0xE062FBC2, 0xE063FBC2, 0xE064FBC2, 0xE065FBC2, 0xE066FBC2, 0xE067FBC2, 0xE068FBC2, 0xE069FBC2, 0xE06AFBC2, 0xE06BFBC2, 0xE06CFBC2, 0xE06DFBC2, 0xE06EFBC2, 0xE06FFBC2, 0xE070FBC2, + 0xE071FBC2, 0xE072FBC2, 0xE073FBC2, 0xE074FBC2, 0xE075FBC2, 0xE076FBC2, 0xE077FBC2, 0xE078FBC2, 0xE079FBC2, 0xE07AFBC2, 0xE07BFBC2, 0xE07CFBC2, 0xE07DFBC2, 0xE07EFBC2, 0xE07FFBC2, + 0xE080FBC2, 0xE081FBC2, 0xE082FBC2, 0xE083FBC2, 0xE084FBC2, 0xE085FBC2, 0xE086FBC2, 0xE087FBC2, 0xE088FBC2, 0xE089FBC2, 0xE08AFBC2, 0xE08BFBC2, 0xE08CFBC2, 0xE08DFBC2, 0xE08EFBC2, + 0xE08FFBC2, 0xE090FBC2, 0xE091FBC2, 0xE092FBC2, 0xE093FBC2, 0xE094FBC2, 0xE095FBC2, 0xE096FBC2, 0xE097FBC2, 0xE098FBC2, 0xE099FBC2, 0xE09AFBC2, 0xE09BFBC2, 0xE09CFBC2, 0xE09DFBC2, + 0xE09EFBC2, 0xE09FFBC2, 0xE0A0FBC2, 0xE0A1FBC2, 0xE0A2FBC2, 0xE0A3FBC2, 0xE0A4FBC2, 0xE0A5FBC2, 0xE0A6FBC2, 0xE0A7FBC2, 0xE0A8FBC2, 0xE0A9FBC2, 0xE0AAFBC2, 0xE0ABFBC2, 0xE0ACFBC2, + 0xE0ADFBC2, 0xE0AEFBC2, 0xE0AFFBC2, 0xE0B0FBC2, 0xE0B1FBC2, 0xE0B2FBC2, 0xE0B3FBC2, 0xE0B4FBC2, 0xE0B5FBC2, 0xE0B6FBC2, 0xE0B7FBC2, 0xE0B8FBC2, 0xE0B9FBC2, 0xE0BAFBC2, 0xE0BBFBC2, + 0xE0BCFBC2, 0xE0BDFBC2, 0xE0BEFBC2, 0xE0BFFBC2, 0xE0C0FBC2, 0xE0C1FBC2, 0xE0C2FBC2, 0xE0C3FBC2, 0xE0C4FBC2, 0xE0C5FBC2, 0xE0C6FBC2, 0xE0C7FBC2, 0xE0C8FBC2, 0xE0C9FBC2, 0xE0CAFBC2, + 0xE0CBFBC2, 0xE0CCFBC2, 0xE0CDFBC2, 0xE0CEFBC2, 0xE0CFFBC2, 0xE0D0FBC2, 0xE0D1FBC2, 0xE0D2FBC2, 0xE0D3FBC2, 0xE0D4FBC2, 0xE0D5FBC2, 0xE0D6FBC2, 0xE0D7FBC2, 0xE0D8FBC2, 0xE0D9FBC2, + 0xE0DAFBC2, 0xE0DBFBC2, 0xE0DCFBC2, 0xE0DDFBC2, 0xE0DEFBC2, 0xE0DFFBC2, 0xE0E0FBC2, 0xE0E1FBC2, 0xE0E2FBC2, 0xE0E3FBC2, 0xE0E4FBC2, 0xE0E5FBC2, 0xE0E6FBC2, 0xE0E7FBC2, 0xE0E8FBC2, + 0xE0E9FBC2, 0xE0EAFBC2, 0xE0EBFBC2, 0xE0ECFBC2, 0xE0EDFBC2, 0xE0EEFBC2, 0xE0EFFBC2, 0xE0F0FBC2, 0xE0F1FBC2, 0xE0F2FBC2, 0xE0F3FBC2, 0xE0F4FBC2, 0xE0F5FBC2, 0xE0F6FBC2, 0xE0F7FBC2, + 0xE0F8FBC2, 0xE0F9FBC2, 0xE0FAFBC2, 0xE0FBFBC2, 0xE0FCFBC2, 0xE0FDFBC2, 0xE0FEFBC2, 0xE0FFFBC2, 0xE100FBC2, 0xE101FBC2, 0xE102FBC2, 0xE103FBC2, 0xE104FBC2, 0xE105FBC2, 0xE106FBC2, + 0xE107FBC2, 0xE108FBC2, 0xE109FBC2, 0xE10AFBC2, 0xE10BFBC2, 0xE10CFBC2, 0xE10DFBC2, 0xE10EFBC2, 0xE10FFBC2, 0xE110FBC2, 0xE111FBC2, 0xE112FBC2, 0xE113FBC2, 0xE114FBC2, 0xE115FBC2, + 0xE116FBC2, 0xE117FBC2, 0xE118FBC2, 0xE119FBC2, 0xE11AFBC2, 0xE11BFBC2, 0xE11CFBC2, 0xE11DFBC2, 0xE11EFBC2, 0xE11FFBC2, 0xE120FBC2, 0xE121FBC2, 0xE122FBC2, 0xE123FBC2, 0xE124FBC2, + 0xE125FBC2, 0xE126FBC2, 0xE127FBC2, 0xE128FBC2, 0xE129FBC2, 0xE12AFBC2, 0xE12BFBC2, 0xE12CFBC2, 0xE12DFBC2, 0xE12EFBC2, 0xE12FFBC2, 0xE130FBC2, 0xE131FBC2, 0xE132FBC2, 0xE133FBC2, + 0xE134FBC2, 0xE135FBC2, 0xE136FBC2, 0xE137FBC2, 0xE138FBC2, 0xE139FBC2, 0xE13AFBC2, 0xE13BFBC2, 0xE13CFBC2, 0xE13DFBC2, 0xE13EFBC2, 0xE13FFBC2, 0xE140FBC2, 0xE141FBC2, 0xE142FBC2, + 0xE143FBC2, 0xE144FBC2, 0xE145FBC2, 0xE146FBC2, 0xE147FBC2, 0xE148FBC2, 0xE149FBC2, 0xE14AFBC2, 0xE14BFBC2, 0xE14CFBC2, 0xE14DFBC2, 0xE14EFBC2, 0xE14FFBC2, 0xE150FBC2, 0xE151FBC2, + 0xE152FBC2, 0xE153FBC2, 0xE154FBC2, 0xE155FBC2, 0xE156FBC2, 0xE157FBC2, 0xE158FBC2, 0xE159FBC2, 0xE15AFBC2, 0xE15BFBC2, 0xE15CFBC2, 0xE15DFBC2, 0xE15EFBC2, 0xE15FFBC2, 0xE160FBC2, + 0xE161FBC2, 0xE162FBC2, 0xE163FBC2, 0xE164FBC2, 0xE165FBC2, 0xE166FBC2, 0xE167FBC2, 0xE168FBC2, 0xE169FBC2, 0xE16AFBC2, 0xE16BFBC2, 0xE16CFBC2, 0xE16DFBC2, 0xE16EFBC2, 0xE16FFBC2, + 0xE170FBC2, 0xE171FBC2, 0xE172FBC2, 0xE173FBC2, 0xE174FBC2, 0xE175FBC2, 0xE176FBC2, 0xE177FBC2, 0xE178FBC2, 0xE179FBC2, 0xE17AFBC2, 0xE17BFBC2, 0xE17CFBC2, 0xE17DFBC2, 0xE17EFBC2, + 0xE17FFBC2, 0xE180FBC2, 0xE181FBC2, 0xE182FBC2, 0xE183FBC2, 0xE184FBC2, 0xE185FBC2, 0xE186FBC2, 0xE187FBC2, 0xE188FBC2, 0xE189FBC2, 0xE18AFBC2, 0xE18BFBC2, 0xE18CFBC2, 0xE18DFBC2, + 0xE18EFBC2, 0xE18FFBC2, 0xE190FBC2, 0xE191FBC2, 0xE192FBC2, 0xE193FBC2, 0xE194FBC2, 0xE195FBC2, 0xE196FBC2, 0xE197FBC2, 0xE198FBC2, 0xE199FBC2, 0xE19AFBC2, 0xE19BFBC2, 0xE19CFBC2, + 0xE19DFBC2, 0xE19EFBC2, 0xE19FFBC2, 0xE1A0FBC2, 0xE1A1FBC2, 0xE1A2FBC2, 0xE1A3FBC2, 0xE1A4FBC2, 0xE1A5FBC2, 0xE1A6FBC2, 0xE1A7FBC2, 0xE1A8FBC2, 0xE1A9FBC2, 0xE1AAFBC2, 0xE1ABFBC2, + 0xE1ACFBC2, 0xE1ADFBC2, 0xE1AEFBC2, 0xE1AFFBC2, 0xE1B0FBC2, 0xE1B1FBC2, 0xE1B2FBC2, 0xE1B3FBC2, 0xE1B4FBC2, 0xE1B5FBC2, 0xE1B6FBC2, 0xE1B7FBC2, 0xE1B8FBC2, 0xE1B9FBC2, 0xE1BAFBC2, + 0xE1BBFBC2, 0xE1BCFBC2, 0xE1BDFBC2, 0xE1BEFBC2, 0xE1BFFBC2, 0xE1C0FBC2, 0xE1C1FBC2, 0xE1C2FBC2, 0xE1C3FBC2, 0xE1C4FBC2, 0xE1C5FBC2, 0xE1C6FBC2, 0xE1C7FBC2, 0xE1C8FBC2, 0xE1C9FBC2, + 0xE1CAFBC2, 0xE1CBFBC2, 0xE1CCFBC2, 0xE1CDFBC2, 0xE1CEFBC2, 0xE1CFFBC2, 0xE1D0FBC2, 0xE1D1FBC2, 0xE1D2FBC2, 0xE1D3FBC2, 0xE1D4FBC2, 0xE1D5FBC2, 0xE1D6FBC2, 0xE1D7FBC2, 0xE1D8FBC2, + 0xE1D9FBC2, 0xE1DAFBC2, 0xE1DBFBC2, 0xE1DCFBC2, 0xE1DDFBC2, 0xE1DEFBC2, 0xE1DFFBC2, 0xE1E0FBC2, 0xE1E1FBC2, 0xE1E2FBC2, 0xE1E3FBC2, 0xE1E4FBC2, 0xE1E5FBC2, 0xE1E6FBC2, 0xE1E7FBC2, + 0xE1E8FBC2, 0xE1E9FBC2, 0xE1EAFBC2, 0xE1EBFBC2, 0xE1ECFBC2, 0xE1EDFBC2, 0xE1EEFBC2, 0xE1EFFBC2, 0xE1F0FBC2, 0xE1F1FBC2, 0xE1F2FBC2, 0xE1F3FBC2, 0xE1F4FBC2, 0xE1F5FBC2, 0xE1F6FBC2, + 0xE1F7FBC2, 0xE1F8FBC2, 0xE1F9FBC2, 0xE1FAFBC2, 0xE1FBFBC2, 0xE1FCFBC2, 0xE1FDFBC2, 0xE1FEFBC2, 0xE1FFFBC2, 0xE200FBC2, 0xE201FBC2, 0xE202FBC2, 0xE203FBC2, 0xE204FBC2, 0xE205FBC2, + 0xE206FBC2, 0xE207FBC2, 0xE208FBC2, 0xE209FBC2, 0xE20AFBC2, 0xE20BFBC2, 0xE20CFBC2, 0xE20DFBC2, 0xE20EFBC2, 0xE20FFBC2, 0xE210FBC2, 0xE211FBC2, 0xE212FBC2, 0xE213FBC2, 0xE214FBC2, + 0xE215FBC2, 0xE216FBC2, 0xE217FBC2, 0xE218FBC2, 0xE219FBC2, 0xE21AFBC2, 0xE21BFBC2, 0xE21CFBC2, 0xE21DFBC2, 0xE21EFBC2, 0xE21FFBC2, 0xE220FBC2, 0xE221FBC2, 0xE222FBC2, 0xE223FBC2, + 0xE224FBC2, 0xE225FBC2, 0xE226FBC2, 0xE227FBC2, 0xE228FBC2, 0xE229FBC2, 0xE22AFBC2, 0xE22BFBC2, 0xE22CFBC2, 0xE22DFBC2, 0xE22EFBC2, 0xE22FFBC2, 0xE230FBC2, 0xE231FBC2, 0xE232FBC2, + 0xE233FBC2, 0xE234FBC2, 0xE235FBC2, 0xE236FBC2, 0xE237FBC2, 0xE238FBC2, 0xE239FBC2, 0xE23AFBC2, 0xE23BFBC2, 0xE23CFBC2, 0xE23DFBC2, 0xE23EFBC2, 0xE23FFBC2, 0xE240FBC2, 0xE241FBC2, + 0xE242FBC2, 0xE243FBC2, 0xE244FBC2, 0xE245FBC2, 0xE246FBC2, 0xE247FBC2, 0xE248FBC2, 0xE249FBC2, 0xE24AFBC2, 0xE24BFBC2, 0xE24CFBC2, 0xE24DFBC2, 0xE24EFBC2, 0xE24FFBC2, 0xE250FBC2, + 0xE251FBC2, 0xE252FBC2, 0xE253FBC2, 0xE254FBC2, 0xE255FBC2, 0xE256FBC2, 0xE257FBC2, 0xE258FBC2, 0xE259FBC2, 0xE25AFBC2, 0xE25BFBC2, 0xE25CFBC2, 0xE25DFBC2, 0xE25EFBC2, 0xE25FFBC2, + 0xE260FBC2, 0xE261FBC2, 0xE262FBC2, 0xE263FBC2, 0xE264FBC2, 0xE265FBC2, 0xE266FBC2, 0xE267FBC2, 0xE268FBC2, 0xE269FBC2, 0xE26AFBC2, 0xE26BFBC2, 0xE26CFBC2, 0xE26DFBC2, 0xE26EFBC2, + 0xE26FFBC2, 0xE270FBC2, 0xE271FBC2, 0xE272FBC2, 0xE273FBC2, 0xE274FBC2, 0xE275FBC2, 0xE276FBC2, 0xE277FBC2, 0xE278FBC2, 0xE279FBC2, 0xE27AFBC2, 0xE27BFBC2, 0xE27CFBC2, 0xE27DFBC2, + 0xE27EFBC2, 0xE27FFBC2, 0xE280FBC2, 0xE281FBC2, 0xE282FBC2, 0xE283FBC2, 0xE284FBC2, 0xE285FBC2, 0xE286FBC2, 0xE287FBC2, 0xE288FBC2, 0xE289FBC2, 0xE28AFBC2, 0xE28BFBC2, 0xE28CFBC2, + 0xE28DFBC2, 0xE28EFBC2, 0xE28FFBC2, 0xE290FBC2, 0xE291FBC2, 0xE292FBC2, 0xE293FBC2, 0xE294FBC2, 0xE295FBC2, 0xE296FBC2, 0xE297FBC2, 0xE298FBC2, 0xE299FBC2, 0xE29AFBC2, 0xE29BFBC2, + 0xE29CFBC2, 0xE29DFBC2, 0xE29EFBC2, 0xE29FFBC2, 0xE2A0FBC2, 0xE2A1FBC2, 0xE2A2FBC2, 0xE2A3FBC2, 0xE2A4FBC2, 0xE2A5FBC2, 0xE2A6FBC2, 0xE2A7FBC2, 0xE2A8FBC2, 0xE2A9FBC2, 0xE2AAFBC2, + 0xE2ABFBC2, 0xE2ACFBC2, 0xE2ADFBC2, 0xE2AEFBC2, 0xE2AFFBC2, 0xE2B0FBC2, 0xE2B1FBC2, 0xE2B2FBC2, 0xE2B3FBC2, 0xE2B4FBC2, 0xE2B5FBC2, 0xE2B6FBC2, 0xE2B7FBC2, 0xE2B8FBC2, 0xE2B9FBC2, + 0xE2BAFBC2, 0xE2BBFBC2, 0xE2BCFBC2, 0xE2BDFBC2, 0xE2BEFBC2, 0xE2BFFBC2, 0xE2C0FBC2, 0xE2C1FBC2, 0xE2C2FBC2, 0xE2C3FBC2, 0xE2C4FBC2, 0xE2C5FBC2, 0xE2C6FBC2, 0xE2C7FBC2, 0xE2C8FBC2, + 0xE2C9FBC2, 0xE2CAFBC2, 0xE2CBFBC2, 0xE2CCFBC2, 0xE2CDFBC2, 0xE2CEFBC2, 0xE2CFFBC2, 0xE2D0FBC2, 0xE2D1FBC2, 0xE2D2FBC2, 0xE2D3FBC2, 0xE2D4FBC2, 0xE2D5FBC2, 0xE2D6FBC2, 0xE2D7FBC2, + 0xE2D8FBC2, 0xE2D9FBC2, 0xE2DAFBC2, 0xE2DBFBC2, 0xE2DCFBC2, 0xE2DDFBC2, 0xE2DEFBC2, 0xE2DFFBC2, 0xE2E0FBC2, 0xE2E1FBC2, 0xE2E2FBC2, 0xE2E3FBC2, 0xE2E4FBC2, 0xE2E5FBC2, 0xE2E6FBC2, + 0xE2E7FBC2, 0xE2E8FBC2, 0xE2E9FBC2, 0xE2EAFBC2, 0xE2EBFBC2, 0xE2ECFBC2, 0xE2EDFBC2, 0xE2EEFBC2, 0xE2EFFBC2, 0xE2F0FBC2, 0xE2F1FBC2, 0xE2F2FBC2, 0xE2F3FBC2, 0xE2F4FBC2, 0xE2F5FBC2, + 0xE2F6FBC2, 0xE2F7FBC2, 0xE2F8FBC2, 0xE2F9FBC2, 0xE2FAFBC2, 0xE2FBFBC2, 0xE2FCFBC2, 0xE2FDFBC2, 0xE2FEFBC2, 0xE2FFFBC2, 0xE300FBC2, 0xE301FBC2, 0xE302FBC2, 0xE303FBC2, 0xE304FBC2, + 0xE305FBC2, 0xE306FBC2, 0xE307FBC2, 0xE308FBC2, 0xE309FBC2, 0xE30AFBC2, 0xE30BFBC2, 0xE30CFBC2, 0xE30DFBC2, 0xE30EFBC2, 0xE30FFBC2, 0xE310FBC2, 0xE311FBC2, 0xE312FBC2, 0xE313FBC2, + 0xE314FBC2, 0xE315FBC2, 0xE316FBC2, 0xE317FBC2, 0xE318FBC2, 0xE319FBC2, 0xE31AFBC2, 0xE31BFBC2, 0xE31CFBC2, 0xE31DFBC2, 0xE31EFBC2, 0xE31FFBC2, 0xE320FBC2, 0xE321FBC2, 0xE322FBC2, + 0xE323FBC2, 0xE324FBC2, 0xE325FBC2, 0xE326FBC2, 0xE327FBC2, 0xE328FBC2, 0xE329FBC2, 0xE32AFBC2, 0xE32BFBC2, 0xE32CFBC2, 0xE32DFBC2, 0xE32EFBC2, 0xE32FFBC2, 0xE330FBC2, 0xE331FBC2, + 0xE332FBC2, 0xE333FBC2, 0xE334FBC2, 0xE335FBC2, 0xE336FBC2, 0xE337FBC2, 0xE338FBC2, 0xE339FBC2, 0xE33AFBC2, 0xE33BFBC2, 0xE33CFBC2, 0xE33DFBC2, 0xE33EFBC2, 0xE33FFBC2, 0xE340FBC2, + 0xE341FBC2, 0xE342FBC2, 0xE343FBC2, 0xE344FBC2, 0xE345FBC2, 0xE346FBC2, 0xE347FBC2, 0xE348FBC2, 0xE349FBC2, 0xE34AFBC2, 0xE34BFBC2, 0xE34CFBC2, 0xE34DFBC2, 0xE34EFBC2, 0xE34FFBC2, + 0xE350FBC2, 0xE351FBC2, 0xE352FBC2, 0xE353FBC2, 0xE354FBC2, 0xE355FBC2, 0xE356FBC2, 0xE357FBC2, 0xE358FBC2, 0xE359FBC2, 0xE35AFBC2, 0xE35BFBC2, 0xE35CFBC2, 0xE35DFBC2, 0xE35EFBC2, + 0xE35FFBC2, 0xE360FBC2, 0xE361FBC2, 0xE362FBC2, 0xE363FBC2, 0xE364FBC2, 0xE365FBC2, 0xE366FBC2, 0xE367FBC2, 0xE368FBC2, 0xE369FBC2, 0xE36AFBC2, 0xE36BFBC2, 0xE36CFBC2, 0xE36DFBC2, + 0xE36EFBC2, 0xE36FFBC2, 0xE370FBC2, 0xE371FBC2, 0xE372FBC2, 0xE373FBC2, 0xE374FBC2, 0xE375FBC2, 0xE376FBC2, 0xE377FBC2, 0xE378FBC2, 0xE379FBC2, 0xE37AFBC2, 0xE37BFBC2, 0xE37CFBC2, + 0xE37DFBC2, 0xE37EFBC2, 0xE37FFBC2, 0xE380FBC2, 0xE381FBC2, 0xE382FBC2, 0xE383FBC2, 0xE384FBC2, 0xE385FBC2, 0xE386FBC2, 0xE387FBC2, 0xE388FBC2, 0xE389FBC2, 0xE38AFBC2, 0xE38BFBC2, + 0xE38CFBC2, 0xE38DFBC2, 0xE38EFBC2, 0xE38FFBC2, 0xE390FBC2, 0xE391FBC2, 0xE392FBC2, 0xE393FBC2, 0xE394FBC2, 0xE395FBC2, 0xE396FBC2, 0xE397FBC2, 0xE398FBC2, 0xE399FBC2, 0xE39AFBC2, + 0xE39BFBC2, 0xE39CFBC2, 0xE39DFBC2, 0xE39EFBC2, 0xE39FFBC2, 0xE3A0FBC2, 0xE3A1FBC2, 0xE3A2FBC2, 0xE3A3FBC2, 0xE3A4FBC2, 0xE3A5FBC2, 0xE3A6FBC2, 0xE3A7FBC2, 0xE3A8FBC2, 0xE3A9FBC2, + 0xE3AAFBC2, 0xE3ABFBC2, 0xE3ACFBC2, 0xE3ADFBC2, 0xE3AEFBC2, 0xE3AFFBC2, 0xE3B0FBC2, 0xE3B1FBC2, 0xE3B2FBC2, 0xE3B3FBC2, 0xE3B4FBC2, 0xE3B5FBC2, 0xE3B6FBC2, 0xE3B7FBC2, 0xE3B8FBC2, + 0xE3B9FBC2, 0xE3BAFBC2, 0xE3BBFBC2, 0xE3BCFBC2, 0xE3BDFBC2, 0xE3BEFBC2, 0xE3BFFBC2, 0xE3C0FBC2, 0xE3C1FBC2, 0xE3C2FBC2, 0xE3C3FBC2, 0xE3C4FBC2, 0xE3C5FBC2, 0xE3C6FBC2, 0xE3C7FBC2, + 0xE3C8FBC2, 0xE3C9FBC2, 0xE3CAFBC2, 0xE3CBFBC2, 0xE3CCFBC2, 0xE3CDFBC2, 0xE3CEFBC2, 0xE3CFFBC2, 0xE3D0FBC2, 0xE3D1FBC2, 0xE3D2FBC2, 0xE3D3FBC2, 0xE3D4FBC2, 0xE3D5FBC2, 0xE3D6FBC2, + 0xE3D7FBC2, 0xE3D8FBC2, 0xE3D9FBC2, 0xE3DAFBC2, 0xE3DBFBC2, 0xE3DCFBC2, 0xE3DDFBC2, 0xE3DEFBC2, 0xE3DFFBC2, 0xE3E0FBC2, 0xE3E1FBC2, 0xE3E2FBC2, 0xE3E3FBC2, 0xE3E4FBC2, 0xE3E5FBC2, + 0xE3E6FBC2, 0xE3E7FBC2, 0xE3E8FBC2, 0xE3E9FBC2, 0xE3EAFBC2, 0xE3EBFBC2, 0xE3ECFBC2, 0xE3EDFBC2, 0xE3EEFBC2, 0xE3EFFBC2, 0xE3F0FBC2, 0xE3F1FBC2, 0xE3F2FBC2, 0xE3F3FBC2, 0xE3F4FBC2, + 0xE3F5FBC2, 0xE3F6FBC2, 0xE3F7FBC2, 0xE3F8FBC2, 0xE3F9FBC2, 0xE3FAFBC2, 0xE3FBFBC2, 0xE3FCFBC2, 0xE3FDFBC2, 0xE3FEFBC2, 0xE3FFFBC2, 0xE400FBC2, 0xE401FBC2, 0xE402FBC2, 0xE403FBC2, + 0xE404FBC2, 0xE405FBC2, 0xE406FBC2, 0xE407FBC2, 0xE408FBC2, 0xE409FBC2, 0xE40AFBC2, 0xE40BFBC2, 0xE40CFBC2, 0xE40DFBC2, 0xE40EFBC2, 0xE40FFBC2, 0xE410FBC2, 0xE411FBC2, 0xE412FBC2, + 0xE413FBC2, 0xE414FBC2, 0xE415FBC2, 0xE416FBC2, 0xE417FBC2, 0xE418FBC2, 0xE419FBC2, 0xE41AFBC2, 0xE41BFBC2, 0xE41CFBC2, 0xE41DFBC2, 0xE41EFBC2, 0xE41FFBC2, 0xE420FBC2, 0xE421FBC2, + 0xE422FBC2, 0xE423FBC2, 0xE424FBC2, 0xE425FBC2, 0xE426FBC2, 0xE427FBC2, 0xE428FBC2, 0xE429FBC2, 0xE42AFBC2, 0xE42BFBC2, 0xE42CFBC2, 0xE42DFBC2, 0xE42EFBC2, 0xE42FFBC2, 0xE430FBC2, + 0xE431FBC2, 0xE432FBC2, 0xE433FBC2, 0xE434FBC2, 0xE435FBC2, 0xE436FBC2, 0xE437FBC2, 0xE438FBC2, 0xE439FBC2, 0xE43AFBC2, 0xE43BFBC2, 0xE43CFBC2, 0xE43DFBC2, 0xE43EFBC2, 0xE43FFBC2, + 0xE440FBC2, 0xE441FBC2, 0xE442FBC2, 0xE443FBC2, 0xE444FBC2, 0xE445FBC2, 0xE446FBC2, 0xE447FBC2, 0xE448FBC2, 0xE449FBC2, 0xE44AFBC2, 0xE44BFBC2, 0xE44CFBC2, 0xE44DFBC2, 0xE44EFBC2, + 0xE44FFBC2, 0xE450FBC2, 0xE451FBC2, 0xE452FBC2, 0xE453FBC2, 0xE454FBC2, 0xE455FBC2, 0xE456FBC2, 0xE457FBC2, 0xE458FBC2, 0xE459FBC2, 0xE45AFBC2, 0xE45BFBC2, 0xE45CFBC2, 0xE45DFBC2, + 0xE45EFBC2, 0xE45FFBC2, 0xE460FBC2, 0xE461FBC2, 0xE462FBC2, 0xE463FBC2, 0xE464FBC2, 0xE465FBC2, 0xE466FBC2, 0xE467FBC2, 0xE468FBC2, 0xE469FBC2, 0xE46AFBC2, 0xE46BFBC2, 0xE46CFBC2, + 0xE46DFBC2, 0xE46EFBC2, 0xE46FFBC2, 0xE470FBC2, 0xE471FBC2, 0xE472FBC2, 0xE473FBC2, 0xE474FBC2, 0xE475FBC2, 0xE476FBC2, 0xE477FBC2, 0xE478FBC2, 0xE479FBC2, 0xE47AFBC2, 0xE47BFBC2, + 0xE47CFBC2, 0xE47DFBC2, 0xE47EFBC2, 0xE47FFBC2, 0xE480FBC2, 0xE481FBC2, 0xE482FBC2, 0xE483FBC2, 0xE484FBC2, 0xE485FBC2, 0xE486FBC2, 0xE487FBC2, 0xE488FBC2, 0xE489FBC2, 0xE48AFBC2, + 0xE48BFBC2, 0xE48CFBC2, 0xE48DFBC2, 0xE48EFBC2, 0xE48FFBC2, 0xE490FBC2, 0xE491FBC2, 0xE492FBC2, 0xE493FBC2, 0xE494FBC2, 0xE495FBC2, 0xE496FBC2, 0xE497FBC2, 0xE498FBC2, 0xE499FBC2, + 0xE49AFBC2, 0xE49BFBC2, 0xE49CFBC2, 0xE49DFBC2, 0xE49EFBC2, 0xE49FFBC2, 0xE4A0FBC2, 0xE4A1FBC2, 0xE4A2FBC2, 0xE4A3FBC2, 0xE4A4FBC2, 0xE4A5FBC2, 0xE4A6FBC2, 0xE4A7FBC2, 0xE4A8FBC2, + 0xE4A9FBC2, 0xE4AAFBC2, 0xE4ABFBC2, 0xE4ACFBC2, 0xE4ADFBC2, 0xE4AEFBC2, 0xE4AFFBC2, 0xE4B0FBC2, 0xE4B1FBC2, 0xE4B2FBC2, 0xE4B3FBC2, 0xE4B4FBC2, 0xE4B5FBC2, 0xE4B6FBC2, 0xE4B7FBC2, + 0xE4B8FBC2, 0xE4B9FBC2, 0xE4BAFBC2, 0xE4BBFBC2, 0xE4BCFBC2, 0xE4BDFBC2, 0xE4BEFBC2, 0xE4BFFBC2, 0xE4C0FBC2, 0xE4C1FBC2, 0xE4C2FBC2, 0xE4C3FBC2, 0xE4C4FBC2, 0xE4C5FBC2, 0xE4C6FBC2, + 0xE4C7FBC2, 0xE4C8FBC2, 0xE4C9FBC2, 0xE4CAFBC2, 0xE4CBFBC2, 0xE4CCFBC2, 0xE4CDFBC2, 0xE4CEFBC2, 0xE4CFFBC2, 0xE4D0FBC2, 0xE4D1FBC2, 0xE4D2FBC2, 0xE4D3FBC2, 0xE4D4FBC2, 0xE4D5FBC2, + 0xE4D6FBC2, 0xE4D7FBC2, 0xE4D8FBC2, 0xE4D9FBC2, 0xE4DAFBC2, 0xE4DBFBC2, 0xE4DCFBC2, 0xE4DDFBC2, 0xE4DEFBC2, 0xE4DFFBC2, 0xE4E0FBC2, 0xE4E1FBC2, 0xE4E2FBC2, 0xE4E3FBC2, 0xE4E4FBC2, + 0xE4E5FBC2, 0xE4E6FBC2, 0xE4E7FBC2, 0xE4E8FBC2, 0xE4E9FBC2, 0xE4EAFBC2, 0xE4EBFBC2, 0xE4ECFBC2, 0xE4EDFBC2, 0xE4EEFBC2, 0xE4EFFBC2, 0xE4F0FBC2, 0xE4F1FBC2, 0xE4F2FBC2, 0xE4F3FBC2, + 0xE4F4FBC2, 0xE4F5FBC2, 0xE4F6FBC2, 0xE4F7FBC2, 0xE4F8FBC2, 0xE4F9FBC2, 0xE4FAFBC2, 0xE4FBFBC2, 0xE4FCFBC2, 0xE4FDFBC2, 0xE4FEFBC2, 0xE4FFFBC2, 0xE500FBC2, 0xE501FBC2, 0xE502FBC2, + 0xE503FBC2, 0xE504FBC2, 0xE505FBC2, 0xE506FBC2, 0xE507FBC2, 0xE508FBC2, 0xE509FBC2, 0xE50AFBC2, 0xE50BFBC2, 0xE50CFBC2, 0xE50DFBC2, 0xE50EFBC2, 0xE50FFBC2, 0xE510FBC2, 0xE511FBC2, + 0xE512FBC2, 0xE513FBC2, 0xE514FBC2, 0xE515FBC2, 0xE516FBC2, 0xE517FBC2, 0xE518FBC2, 0xE519FBC2, 0xE51AFBC2, 0xE51BFBC2, 0xE51CFBC2, 0xE51DFBC2, 0xE51EFBC2, 0xE51FFBC2, 0xE520FBC2, + 0xE521FBC2, 0xE522FBC2, 0xE523FBC2, 0xE524FBC2, 0xE525FBC2, 0xE526FBC2, 0xE527FBC2, 0xE528FBC2, 0xE529FBC2, 0xE52AFBC2, 0xE52BFBC2, 0xE52CFBC2, 0xE52DFBC2, 0xE52EFBC2, 0xE52FFBC2, + 0xE530FBC2, 0xE531FBC2, 0xE532FBC2, 0xE533FBC2, 0xE534FBC2, 0xE535FBC2, 0xE536FBC2, 0xE537FBC2, 0xE538FBC2, 0xE539FBC2, 0xE53AFBC2, 0xE53BFBC2, 0xE53CFBC2, 0xE53DFBC2, 0xE53EFBC2, + 0xE53FFBC2, 0xE540FBC2, 0xE541FBC2, 0xE542FBC2, 0xE543FBC2, 0xE544FBC2, 0xE545FBC2, 0xE546FBC2, 0xE547FBC2, 0xE548FBC2, 0xE549FBC2, 0xE54AFBC2, 0xE54BFBC2, 0xE54CFBC2, 0xE54DFBC2, + 0xE54EFBC2, 0xE54FFBC2, 0xE550FBC2, 0xE551FBC2, 0xE552FBC2, 0xE553FBC2, 0xE554FBC2, 0xE555FBC2, 0xE556FBC2, 0xE557FBC2, 0xE558FBC2, 0xE559FBC2, 0xE55AFBC2, 0xE55BFBC2, 0xE55CFBC2, + 0xE55DFBC2, 0xE55EFBC2, 0xE55FFBC2, 0xE560FBC2, 0xE561FBC2, 0xE562FBC2, 0xE563FBC2, 0xE564FBC2, 0xE565FBC2, 0xE566FBC2, 0xE567FBC2, 0xE568FBC2, 0xE569FBC2, 0xE56AFBC2, 0xE56BFBC2, + 0xE56CFBC2, 0xE56DFBC2, 0xE56EFBC2, 0xE56FFBC2, 0xE570FBC2, 0xE571FBC2, 0xE572FBC2, 0xE573FBC2, 0xE574FBC2, 0xE575FBC2, 0xE576FBC2, 0xE577FBC2, 0xE578FBC2, 0xE579FBC2, 0xE57AFBC2, + 0xE57BFBC2, 0xE57CFBC2, 0xE57DFBC2, 0xE57EFBC2, 0xE57FFBC2, 0xE580FBC2, 0xE581FBC2, 0xE582FBC2, 0xE583FBC2, 0xE584FBC2, 0xE585FBC2, 0xE586FBC2, 0xE587FBC2, 0xE588FBC2, 0xE589FBC2, + 0xE58AFBC2, 0xE58BFBC2, 0xE58CFBC2, 0xE58DFBC2, 0xE58EFBC2, 0xE58FFBC2, 0xE590FBC2, 0xE591FBC2, 0xE592FBC2, 0xE593FBC2, 0xE594FBC2, 0xE595FBC2, 0xE596FBC2, 0xE597FBC2, 0xE598FBC2, + 0xE599FBC2, 0xE59AFBC2, 0xE59BFBC2, 0xE59CFBC2, 0xE59DFBC2, 0xE59EFBC2, 0xE59FFBC2, 0xE5A0FBC2, 0xE5A1FBC2, 0xE5A2FBC2, 0xE5A3FBC2, 0xE5A4FBC2, 0xE5A5FBC2, 0xE5A6FBC2, 0xE5A7FBC2, + 0xE5A8FBC2, 0xE5A9FBC2, 0xE5AAFBC2, 0xE5ABFBC2, 0xE5ACFBC2, 0xE5ADFBC2, 0xE5AEFBC2, 0xE5AFFBC2, 0xE5B0FBC2, 0xE5B1FBC2, 0xE5B2FBC2, 0xE5B3FBC2, 0xE5B4FBC2, 0xE5B5FBC2, 0xE5B6FBC2, + 0xE5B7FBC2, 0xE5B8FBC2, 0xE5B9FBC2, 0xE5BAFBC2, 0xE5BBFBC2, 0xE5BCFBC2, 0xE5BDFBC2, 0xE5BEFBC2, 0xE5BFFBC2, 0xE5C0FBC2, 0xE5C1FBC2, 0xE5C2FBC2, 0xE5C3FBC2, 0xE5C4FBC2, 0xE5C5FBC2, + 0xE5C6FBC2, 0xE5C7FBC2, 0xE5C8FBC2, 0xE5C9FBC2, 0xE5CAFBC2, 0xE5CBFBC2, 0xE5CCFBC2, 0xE5CDFBC2, 0xE5CEFBC2, 0xE5CFFBC2, 0xE5D0FBC2, 0xE5D1FBC2, 0xE5D2FBC2, 0xE5D3FBC2, 0xE5D4FBC2, + 0xE5D5FBC2, 0xE5D6FBC2, 0xE5D7FBC2, 0xE5D8FBC2, 0xE5D9FBC2, 0xE5DAFBC2, 0xE5DBFBC2, 0xE5DCFBC2, 0xE5DDFBC2, 0xE5DEFBC2, 0xE5DFFBC2, 0xE5E0FBC2, 0xE5E1FBC2, 0xE5E2FBC2, 0xE5E3FBC2, + 0xE5E4FBC2, 0xE5E5FBC2, 0xE5E6FBC2, 0xE5E7FBC2, 0xE5E8FBC2, 0xE5E9FBC2, 0xE5EAFBC2, 0xE5EBFBC2, 0xE5ECFBC2, 0xE5EDFBC2, 0xE5EEFBC2, 0xE5EFFBC2, 0xE5F0FBC2, 0xE5F1FBC2, 0xE5F2FBC2, + 0xE5F3FBC2, 0xE5F4FBC2, 0xE5F5FBC2, 0xE5F6FBC2, 0xE5F7FBC2, 0xE5F8FBC2, 0xE5F9FBC2, 0xE5FAFBC2, 0xE5FBFBC2, 0xE5FCFBC2, 0xE5FDFBC2, 0xE5FEFBC2, 0xE5FFFBC2, 0xE600FBC2, 0xE601FBC2, + 0xE602FBC2, 0xE603FBC2, 0xE604FBC2, 0xE605FBC2, 0xE606FBC2, 0xE607FBC2, 0xE608FBC2, 0xE609FBC2, 0xE60AFBC2, 0xE60BFBC2, 0xE60CFBC2, 0xE60DFBC2, 0xE60EFBC2, 0xE60FFBC2, 0xE610FBC2, + 0xE611FBC2, 0xE612FBC2, 0xE613FBC2, 0xE614FBC2, 0xE615FBC2, 0xE616FBC2, 0xE617FBC2, 0xE618FBC2, 0xE619FBC2, 0xE61AFBC2, 0xE61BFBC2, 0xE61CFBC2, 0xE61DFBC2, 0xE61EFBC2, 0xE61FFBC2, + 0xE620FBC2, 0xE621FBC2, 0xE622FBC2, 0xE623FBC2, 0xE624FBC2, 0xE625FBC2, 0xE626FBC2, 0xE627FBC2, 0xE628FBC2, 0xE629FBC2, 0xE62AFBC2, 0xE62BFBC2, 0xE62CFBC2, 0xE62DFBC2, 0xE62EFBC2, + 0xE62FFBC2, 0xE630FBC2, 0xE631FBC2, 0xE632FBC2, 0xE633FBC2, 0xE634FBC2, 0xE635FBC2, 0xE636FBC2, 0xE637FBC2, 0xE638FBC2, 0xE639FBC2, 0xE63AFBC2, 0xE63BFBC2, 0xE63CFBC2, 0xE63DFBC2, + 0xE63EFBC2, 0xE63FFBC2, 0xE640FBC2, 0xE641FBC2, 0xE642FBC2, 0xE643FBC2, 0xE644FBC2, 0xE645FBC2, 0xE646FBC2, 0xE647FBC2, 0xE648FBC2, 0xE649FBC2, 0xE64AFBC2, 0xE64BFBC2, 0xE64CFBC2, + 0xE64DFBC2, 0xE64EFBC2, 0xE64FFBC2, 0xE650FBC2, 0xE651FBC2, 0xE652FBC2, 0xE653FBC2, 0xE654FBC2, 0xE655FBC2, 0xE656FBC2, 0xE657FBC2, 0xE658FBC2, 0xE659FBC2, 0xE65AFBC2, 0xE65BFBC2, + 0xE65CFBC2, 0xE65DFBC2, 0xE65EFBC2, 0xE65FFBC2, 0xE660FBC2, 0xE661FBC2, 0xE662FBC2, 0xE663FBC2, 0xE664FBC2, 0xE665FBC2, 0xE666FBC2, 0xE667FBC2, 0xE668FBC2, 0xE669FBC2, 0xE66AFBC2, + 0xE66BFBC2, 0xE66CFBC2, 0xE66DFBC2, 0xE66EFBC2, 0xE66FFBC2, 0xE670FBC2, 0xE671FBC2, 0xE672FBC2, 0xE673FBC2, 0xE674FBC2, 0xE675FBC2, 0xE676FBC2, 0xE677FBC2, 0xE678FBC2, 0xE679FBC2, + 0xE67AFBC2, 0xE67BFBC2, 0xE67CFBC2, 0xE67DFBC2, 0xE67EFBC2, 0xE67FFBC2, 0xE680FBC2, 0xE681FBC2, 0xE682FBC2, 0xE683FBC2, 0xE684FBC2, 0xE685FBC2, 0xE686FBC2, 0xE687FBC2, 0xE688FBC2, + 0xE689FBC2, 0xE68AFBC2, 0xE68BFBC2, 0xE68CFBC2, 0xE68DFBC2, 0xE68EFBC2, 0xE68FFBC2, 0xE690FBC2, 0xE691FBC2, 0xE692FBC2, 0xE693FBC2, 0xE694FBC2, 0xE695FBC2, 0xE696FBC2, 0xE697FBC2, + 0xE698FBC2, 0xE699FBC2, 0xE69AFBC2, 0xE69BFBC2, 0xE69CFBC2, 0xE69DFBC2, 0xE69EFBC2, 0xE69FFBC2, 0xE6A0FBC2, 0xE6A1FBC2, 0xE6A2FBC2, 0xE6A3FBC2, 0xE6A4FBC2, 0xE6A5FBC2, 0xE6A6FBC2, + 0xE6A7FBC2, 0xE6A8FBC2, 0xE6A9FBC2, 0xE6AAFBC2, 0xE6ABFBC2, 0xE6ACFBC2, 0xE6ADFBC2, 0xE6AEFBC2, 0xE6AFFBC2, 0xE6B0FBC2, 0xE6B1FBC2, 0xE6B2FBC2, 0xE6B3FBC2, 0xE6B4FBC2, 0xE6B5FBC2, + 0xE6B6FBC2, 0xE6B7FBC2, 0xE6B8FBC2, 0xE6B9FBC2, 0xE6BAFBC2, 0xE6BBFBC2, 0xE6BCFBC2, 0xE6BDFBC2, 0xE6BEFBC2, 0xE6BFFBC2, 0xE6C0FBC2, 0xE6C1FBC2, 0xE6C2FBC2, 0xE6C3FBC2, 0xE6C4FBC2, + 0xE6C5FBC2, 0xE6C6FBC2, 0xE6C7FBC2, 0xE6C8FBC2, 0xE6C9FBC2, 0xE6CAFBC2, 0xE6CBFBC2, 0xE6CCFBC2, 0xE6CDFBC2, 0xE6CEFBC2, 0xE6CFFBC2, 0xE6D0FBC2, 0xE6D1FBC2, 0xE6D2FBC2, 0xE6D3FBC2, + 0xE6D4FBC2, 0xE6D5FBC2, 0xE6D6FBC2, 0xE6D7FBC2, 0xE6D8FBC2, 0xE6D9FBC2, 0xE6DAFBC2, 0xE6DBFBC2, 0xE6DCFBC2, 0xE6DDFBC2, 0xE6DEFBC2, 0xE6DFFBC2, 0xE6E0FBC2, 0xE6E1FBC2, 0xE6E2FBC2, + 0xE6E3FBC2, 0xE6E4FBC2, 0xE6E5FBC2, 0xE6E6FBC2, 0xE6E7FBC2, 0xE6E8FBC2, 0xE6E9FBC2, 0xE6EAFBC2, 0xE6EBFBC2, 0xE6ECFBC2, 0xE6EDFBC2, 0xE6EEFBC2, 0xE6EFFBC2, 0xE6F0FBC2, 0xE6F1FBC2, + 0xE6F2FBC2, 0xE6F3FBC2, 0xE6F4FBC2, 0xE6F5FBC2, 0xE6F6FBC2, 0xE6F7FBC2, 0xE6F8FBC2, 0xE6F9FBC2, 0xE6FAFBC2, 0xE6FBFBC2, 0xE6FCFBC2, 0xE6FDFBC2, 0xE6FEFBC2, 0xE6FFFBC2, 0xE700FBC2, + 0xE701FBC2, 0xE702FBC2, 0xE703FBC2, 0xE704FBC2, 0xE705FBC2, 0xE706FBC2, 0xE707FBC2, 0xE708FBC2, 0xE709FBC2, 0xE70AFBC2, 0xE70BFBC2, 0xE70CFBC2, 0xE70DFBC2, 0xE70EFBC2, 0xE70FFBC2, + 0xE710FBC2, 0xE711FBC2, 0xE712FBC2, 0xE713FBC2, 0xE714FBC2, 0xE715FBC2, 0xE716FBC2, 0xE717FBC2, 0xE718FBC2, 0xE719FBC2, 0xE71AFBC2, 0xE71BFBC2, 0xE71CFBC2, 0xE71DFBC2, 0xE71EFBC2, + 0xE71FFBC2, 0xE720FBC2, 0xE721FBC2, 0xE722FBC2, 0xE723FBC2, 0xE724FBC2, 0xE725FBC2, 0xE726FBC2, 0xE727FBC2, 0xE728FBC2, 0xE729FBC2, 0xE72AFBC2, 0xE72BFBC2, 0xE72CFBC2, 0xE72DFBC2, + 0xE72EFBC2, 0xE72FFBC2, 0xE730FBC2, 0xE731FBC2, 0xE732FBC2, 0xE733FBC2, 0xE734FBC2, 0xE735FBC2, 0xE736FBC2, 0xE737FBC2, 0xE738FBC2, 0xE739FBC2, 0xE73AFBC2, 0xE73BFBC2, 0xE73CFBC2, + 0xE73DFBC2, 0xE73EFBC2, 0xE73FFBC2, 0xE740FBC2, 0xE741FBC2, 0xE742FBC2, 0xE743FBC2, 0xE744FBC2, 0xE745FBC2, 0xE746FBC2, 0xE747FBC2, 0xE748FBC2, 0xE749FBC2, 0xE74AFBC2, 0xE74BFBC2, + 0xE74CFBC2, 0xE74DFBC2, 0xE74EFBC2, 0xE74FFBC2, 0xE750FBC2, 0xE751FBC2, 0xE752FBC2, 0xE753FBC2, 0xE754FBC2, 0xE755FBC2, 0xE756FBC2, 0xE757FBC2, 0xE758FBC2, 0xE759FBC2, 0xE75AFBC2, + 0xE75BFBC2, 0xE75CFBC2, 0xE75DFBC2, 0xE75EFBC2, 0xE75FFBC2, 0xE760FBC2, 0xE761FBC2, 0xE762FBC2, 0xE763FBC2, 0xE764FBC2, 0xE765FBC2, 0xE766FBC2, 0xE767FBC2, 0xE768FBC2, 0xE769FBC2, + 0xE76AFBC2, 0xE76BFBC2, 0xE76CFBC2, 0xE76DFBC2, 0xE76EFBC2, 0xE76FFBC2, 0xE770FBC2, 0xE771FBC2, 0xE772FBC2, 0xE773FBC2, 0xE774FBC2, 0xE775FBC2, 0xE776FBC2, 0xE777FBC2, 0xE778FBC2, + 0xE779FBC2, 0xE77AFBC2, 0xE77BFBC2, 0xE77CFBC2, 0xE77DFBC2, 0xE77EFBC2, 0xE77FFBC2, 0xE780FBC2, 0xE781FBC2, 0xE782FBC2, 0xE783FBC2, 0xE784FBC2, 0xE785FBC2, 0xE786FBC2, 0xE787FBC2, + 0xE788FBC2, 0xE789FBC2, 0xE78AFBC2, 0xE78BFBC2, 0xE78CFBC2, 0xE78DFBC2, 0xE78EFBC2, 0xE78FFBC2, 0xE790FBC2, 0xE791FBC2, 0xE792FBC2, 0xE793FBC2, 0xE794FBC2, 0xE795FBC2, 0xE796FBC2, + 0xE797FBC2, 0xE798FBC2, 0xE799FBC2, 0xE79AFBC2, 0xE79BFBC2, 0xE79CFBC2, 0xE79DFBC2, 0xE79EFBC2, 0xE79FFBC2, 0xE7A0FBC2, 0xE7A1FBC2, 0xE7A2FBC2, 0xE7A3FBC2, 0xE7A4FBC2, 0xE7A5FBC2, + 0xE7A6FBC2, 0xE7A7FBC2, 0xE7A8FBC2, 0xE7A9FBC2, 0xE7AAFBC2, 0xE7ABFBC2, 0xE7ACFBC2, 0xE7ADFBC2, 0xE7AEFBC2, 0xE7AFFBC2, 0xE7B0FBC2, 0xE7B1FBC2, 0xE7B2FBC2, 0xE7B3FBC2, 0xE7B4FBC2, + 0xE7B5FBC2, 0xE7B6FBC2, 0xE7B7FBC2, 0xE7B8FBC2, 0xE7B9FBC2, 0xE7BAFBC2, 0xE7BBFBC2, 0xE7BCFBC2, 0xE7BDFBC2, 0xE7BEFBC2, 0xE7BFFBC2, 0xE7C0FBC2, 0xE7C1FBC2, 0xE7C2FBC2, 0xE7C3FBC2, + 0xE7C4FBC2, 0xE7C5FBC2, 0xE7C6FBC2, 0xE7C7FBC2, 0xE7C8FBC2, 0xE7C9FBC2, 0xE7CAFBC2, 0xE7CBFBC2, 0xE7CCFBC2, 0xE7CDFBC2, 0xE7CEFBC2, 0xE7CFFBC2, 0xE7D0FBC2, 0xE7D1FBC2, 0xE7D2FBC2, + 0xE7D3FBC2, 0xE7D4FBC2, 0xE7D5FBC2, 0xE7D6FBC2, 0xE7D7FBC2, 0xE7D8FBC2, 0xE7D9FBC2, 0xE7DAFBC2, 0xE7DBFBC2, 0xE7DCFBC2, 0xE7DDFBC2, 0xE7DEFBC2, 0xE7DFFBC2, 0xE7E0FBC2, 0xE7E1FBC2, + 0xE7E2FBC2, 0xE7E3FBC2, 0xE7E4FBC2, 0xE7E5FBC2, 0xE7E6FBC2, 0xE7E7FBC2, 0xE7E8FBC2, 0xE7E9FBC2, 0xE7EAFBC2, 0xE7EBFBC2, 0xE7ECFBC2, 0xE7EDFBC2, 0xE7EEFBC2, 0xE7EFFBC2, 0xE7F0FBC2, + 0xE7F1FBC2, 0xE7F2FBC2, 0xE7F3FBC2, 0xE7F4FBC2, 0xE7F5FBC2, 0xE7F6FBC2, 0xE7F7FBC2, 0xE7F8FBC2, 0xE7F9FBC2, 0xE7FAFBC2, 0xE7FBFBC2, 0xE7FCFBC2, 0xE7FDFBC2, 0xE7FEFBC2, 0xE7FFFBC2, + 0x38B7, 0x38B8, 0x38B9, 0x38BA, 0x38BB, 0x38BC, 0x38BD, 0x38BE, 0x38BF, 0x38C0, 0x38C1, 0x38C2, 0x38C3, 0x38C4, 0x38C5, + 0x38C6, 0x38C7, 0x38C8, 0x38C9, 0x38CA, 0x38CB, 0x38CC, 0x38CD, 0x38CE, 0x38CF, 0x38D0, 0x38D1, 0x38D2, 0x38D3, 0x38D4, + 0x38D5, 0x38D6, 0x38D7, 0x38D8, 0x38D9, 0x38DA, 0x38DB, 0x38DC, 0x38DD, 0x38DE, 0x38DF, 0x38E0, 0x38E1, 0x38E2, 0x38E3, + 0x38E4, 0x38E5, 0x38E6, 0x38E7, 0x38E8, 0x38E9, 0x38EA, 0x38EB, 0x38EC, 0x38ED, 0x38EE, 0x38EF, 0x38F0, 0x38F1, 0x38F2, + 0x38F3, 0x38F4, 0x38F5, 0x38F6, 0x38F7, 0x38F8, 0x38F9, 0x38FA, 0x38FB, 0x38FC, 0x38FD, 0x38FE, 0x38FF, 0x3900, 0x3901, + 0x3902, 0x3903, 0x3904, 0x3905, 0x3906, 0x3907, 0x3908, 0x3909, 0x390A, 0x390B, 0x390C, 0x390D, 0x390E, 0x390F, 0x3910, + 0x3911, 0x3912, 0x3913, 0x3914, 0x3915, 0x3916, 0x3917, 0x3918, 0x3919, 0x391A, 0x391B, 0x391C, 0x391D, 0x391E, 0x391F, + 0x3920, 0x3921, 0x3922, 0x3923, 0x3924, 0x3925, 0x3926, 0x3927, 0x3928, 0x3929, 0x392A, 0x392B, 0x392C, 0x392D, 0x392E, + 0x392F, 0x3930, 0x3931, 0x3932, 0x3933, 0x3934, 0x3935, 0x3936, 0x3937, 0x3938, 0x3939, 0x393A, 0x393B, 0x393C, 0x393D, + 0x393E, 0x393F, 0x3940, 0x3941, 0x3942, 0x3943, 0x3944, 0x3945, 0x3946, 0x3947, 0x3948, 0x3949, 0x394A, 0x394B, 0x394C, + 0x394D, 0x394E, 0x394F, 0x3950, 0x3951, 0x3952, 0x3953, 0x3954, 0x3955, 0x3956, 0x3957, 0x3958, 0x3959, 0x395A, 0x395B, + 0x395C, 0x395D, 0x395E, 0x395F, 0x3960, 0x3961, 0x3962, 0x3963, 0x3964, 0x3965, 0x3966, 0x3967, 0x3968, 0x3969, 0x396A, + 0x396B, 0x396C, 0x396D, 0x396E, 0x396F, 0x3970, 0x3971, 0x3972, 0x3973, 0x3974, 0x3975, 0x3976, 0x3977, 0x3978, 0x3979, + 0x397A, 0x397B, 0x397C, 0x397D, 0x397E, 0x397F, 0x3980, 0x3981, 0x3982, 0x3983, 0x3984, 0x3985, 0x3986, 0x3987, 0x3988, + 0x3989, 0x398A, 0x398B, 0x398C, 0x398D, 0x398E, 0x398F, 0x3990, 0x3991, 0x3992, 0x3993, 0x3994, 0x3995, 0x3996, 0x3997, + 0x3998, 0x3999, 0x399A, 0x399B, 0x399C, 0x399D, 0x399E, 0x399F, 0x39A0, 0x39A1, 0x39A2, 0x39A3, 0x39A4, 0x39A5, 0x39A6, + 0x39A7, 0x39A8, 0x39A9, 0x39AA, 0x39AB, 0x39AC, 0x39AD, 0x39AE, 0x39AF, 0x39B0, 0x39B1, 0x39B2, 0x39B3, 0x39B4, 0x39B5, + 0x39B6, 0x39B7, 0x39B8, 0x39B9, 0x39BA, 0x39BB, 0x39BC, 0x39BD, 0x39BE, 0x39BF, 0x39C0, 0x39C1, 0x39C2, 0x39C3, 0x39C4, + 0x39C5, 0x39C6, 0x39C7, 0x39C8, 0x39C9, 0x39CA, 0x39CB, 0x39CC, 0x39CD, 0x39CE, 0x39CF, 0x39D0, 0x39D1, 0x39D2, 0x39D3, + 0x39D4, 0x39D5, 0x39D6, 0x39D7, 0x39D8, 0x39D9, 0x39DA, 0x39DB, 0x39DC, 0x39DD, 0x39DE, 0x39DF, 0x39E0, 0x39E1, 0x39E2, + 0x39E3, 0x39E4, 0x39E5, 0x39E6, 0x39E7, 0x39E8, 0x39E9, 0x39EA, 0x39EB, 0x39EC, 0x39ED, 0x39EE, 0x39EF, 0x39F0, 0x39F1, + 0x39F2, 0x39F3, 0x39F4, 0x39F5, 0x39F6, 0x39F7, 0x39F8, 0x39F9, 0x39FA, 0x39FB, 0x39FC, 0x39FD, 0x39FE, 0x39FF, 0x3A00, + 0x3A01, 0x3A02, 0x3A03, 0x3A04, 0x3A05, 0x3A06, 0x3A07, 0x3A08, 0x3A09, 0x3A0A, 0x3A0B, 0x3A0C, 0x3A0D, 0x3A0E, 0x3A0F, + 0x3A10, 0x3A11, 0x3A12, 0x3A13, 0x3A14, 0x3A15, 0x3A16, 0x3A17, 0x3A18, 0x3A19, 0x3A1A, 0x3A1B, 0x3A1C, 0x3A1D, 0x3A1E, + 0x3A1F, 0x3A20, 0x3A21, 0x3A22, 0x3A23, 0x3A24, 0x3A25, 0x3A26, 0x3A27, 0x3A28, 0x3A29, 0x3A2A, 0x3A2B, 0x3A2C, 0x3A2D, + 0x3A2E, 0x3A2F, 0x3A30, 0x3A31, 0x3A32, 0x3A33, 0x3A34, 0x3A35, 0x3A36, 0x3A37, 0x3A38, 0x3A39, 0x3A3A, 0x3A3B, 0x3A3C, + 0x3A3D, 0x3A3E, 0x3A3F, 0x3A40, 0x3A41, 0x3A42, 0x3A43, 0x3A44, 0x3A45, 0x3A46, 0x3A47, 0x3A48, 0x3A49, 0x3A4A, 0x3A4B, + 0x3A4C, 0x3A4D, 0x3A4E, 0x3A4F, 0x3A50, 0x3A51, 0x3A52, 0x3A53, 0x3A54, 0x3A55, 0x3A56, 0x3A57, 0x3A58, 0x3A59, 0x3A5A, + 0x3A5B, 0x3A5C, 0x3A5D, 0x3A5E, 0x3A5F, 0x3A60, 0x3A61, 0x3A62, 0x3A63, 0x3A64, 0x3A65, 0x3A66, 0x3A67, 0x3A68, 0x3A69, + 0x3A6A, 0x3A6B, 0x3A6C, 0x3A6D, 0x3A6E, 0x3A6F, 0x3A70, 0x3A71, 0x3A72, 0x3A73, 0x3A74, 0x3A75, 0x3A76, 0x3A77, 0x3A78, + 0x3A79, 0x3A7A, 0x3A7B, 0x3A7C, 0x3A7D, 0x3A7E, 0x3A7F, 0x3A80, 0x3A81, 0x3A82, 0x3A83, 0x3A84, 0x3A85, 0x3A86, 0x3A87, + 0x3A88, 0x3A89, 0x3A8A, 0x3A8B, 0x3A8C, 0x3A8D, 0x3A8E, 0x3A8F, 0x3A90, 0x3A91, 0x3A92, 0x3A93, 0x3A94, 0x3A95, 0x3A96, + 0x3A97, 0x3A98, 0x3A99, 0x3A9A, 0x3A9B, 0x3A9C, 0x3A9D, 0x3A9E, 0x3A9F, 0x3AA0, 0x3AA1, 0x3AA2, 0x3AA3, 0x3AA4, 0x3AA5, + 0x3AA6, 0x3AA7, 0x3AA8, 0x3AA9, 0x3AAA, 0x3AAB, 0x3AAC, 0x3AAD, 0x3AAE, 0x3AAF, 0x3AB0, 0x3AB1, 0x3AB2, 0x3AB3, 0x3AB4, + 0x3AB5, 0x3AB6, 0x3AB7, 0x3AB8, 0x3AB9, 0x3ABA, 0x3ABB, 0x3ABC, 0x3ABD, 0x3ABE, 0x3ABF, 0x3AC0, 0x3AC1, 0x3AC2, 0x3AC3, + 0x3AC4, 0x3AC5, 0x3AC6, 0x3AC7, 0x3AC8, 0x3AC9, 0x3ACA, 0x3ACB, 0x3ACC, 0x3ACD, 0x3ACE, 0x3ACF, 0x3AD0, 0x3AD1, 0x3AD2, + 0x3AD3, 0x3AD4, 0x3AD5, 0x3AD6, 0x3AD7, 0x3AD8, 0x3AD9, 0x3ADA, 0x3ADB, 0x3ADC, 0x3ADD, 0x3ADE, 0x3ADF, 0x3AE0, 0x3AE1, + 0x3AE2, 0x3AE3, 0x3AE4, 0x3AE5, 0x3AE6, 0x3AE7, 0x3AE8, 0x3AE9, 0x3AEA, 0x3AEB, 0x3AEC, 0x3AED, 0x3AEE, 0x3AEF, 0xEA39FBC2, + 0xEA3AFBC2, 0xEA3BFBC2, 0xEA3CFBC2, 0xEA3DFBC2, 0xEA3EFBC2, 0xEA3FFBC2, 0x45C8, 0x45C9, 0x45CA, 0x45CB, 0x45CC, 0x45CD, 0x45CE, 0x45CF, 0x45D0, + 0x45D1, 0x45D2, 0x45D3, 0x45D4, 0x45D5, 0x45D6, 0x45D7, 0x45D8, 0x45D9, 0x45DA, 0x45DB, 0x45DC, 0x45DD, 0x45DE, 0x45DF, + 0x45E0, 0x45E1, 0x45E2, 0x45E3, 0x45E4, 0x45E5, 0x45E6, 0xEA5FFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0xEA6AFBC2, 0xEA6BFBC2, 0xEA6CFBC2, 0xEA6DFBC2, 0x2C0, 0x2C1, 0xEA70FBC2, 0xEA71FBC2, 0xEA72FBC2, 0xEA73FBC2, 0xEA74FBC2, 0xEA75FBC2, + 0xEA76FBC2, 0xEA77FBC2, 0xEA78FBC2, 0xEA79FBC2, 0xEA7AFBC2, 0xEA7BFBC2, 0xEA7CFBC2, 0xEA7DFBC2, 0xEA7EFBC2, 0xEA7FFBC2, 0xEA80FBC2, 0xEA81FBC2, 0xEA82FBC2, 0xEA83FBC2, 0xEA84FBC2, + 0xEA85FBC2, 0xEA86FBC2, 0xEA87FBC2, 0xEA88FBC2, 0xEA89FBC2, 0xEA8AFBC2, 0xEA8BFBC2, 0xEA8CFBC2, 0xEA8DFBC2, 0xEA8EFBC2, 0xEA8FFBC2, 0xEA90FBC2, 0xEA91FBC2, 0xEA92FBC2, 0xEA93FBC2, + 0xEA94FBC2, 0xEA95FBC2, 0xEA96FBC2, 0xEA97FBC2, 0xEA98FBC2, 0xEA99FBC2, 0xEA9AFBC2, 0xEA9BFBC2, 0xEA9CFBC2, 0xEA9DFBC2, 0xEA9EFBC2, 0xEA9FFBC2, 0xEAA0FBC2, 0xEAA1FBC2, 0xEAA2FBC2, + 0xEAA3FBC2, 0xEAA4FBC2, 0xEAA5FBC2, 0xEAA6FBC2, 0xEAA7FBC2, 0xEAA8FBC2, 0xEAA9FBC2, 0xEAAAFBC2, 0xEAABFBC2, 0xEAACFBC2, 0xEAADFBC2, 0xEAAEFBC2, 0xEAAFFBC2, 0xEAB0FBC2, 0xEAB1FBC2, + 0xEAB2FBC2, 0xEAB3FBC2, 0xEAB4FBC2, 0xEAB5FBC2, 0xEAB6FBC2, 0xEAB7FBC2, 0xEAB8FBC2, 0xEAB9FBC2, 0xEABAFBC2, 0xEABBFBC2, 0xEABCFBC2, 0xEABDFBC2, 0xEABEFBC2, 0xEABFFBC2, 0xEAC0FBC2, + 0xEAC1FBC2, 0xEAC2FBC2, 0xEAC3FBC2, 0xEAC4FBC2, 0xEAC5FBC2, 0xEAC6FBC2, 0xEAC7FBC2, 0xEAC8FBC2, 0xEAC9FBC2, 0xEACAFBC2, 0xEACBFBC2, 0xEACCFBC2, 0xEACDFBC2, 0xEACEFBC2, 0xEACFFBC2, + 0x3AF0, 0x3AF1, 0x3AF2, 0x3AF3, 0x3AF4, 0x3AF5, 0x3AF6, 0x3AF7, 0x3AF8, 0x3AF9, 0x3AFA, 0x3AFB, 0x3AFC, 0x3AFD, 0x3AFE, + 0x3AFF, 0x3B00, 0x3B01, 0x3B02, 0x3B03, 0x3B04, 0x3B05, 0x3B06, 0x3B07, 0x3B08, 0x3B09, 0x3B0A, 0x3B0B, 0x3B0C, 0x3B0D, + 0xEAEEFBC2, 0xEAEFFBC2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x288, 0xEAF6FBC2, 0xEAF7FBC2, 0xEAF8FBC2, 0xEAF9FBC2, 0xEAFAFBC2, 0xEAFBFBC2, 0xEAFCFBC2, + 0xEAFDFBC2, 0xEAFEFBC2, 0xEAFFFBC2, 0x4355, 0x4356, 0x4357, 0x4358, 0x4359, 0x435A, 0x435B, 0x435C, 0x435D, 0x435E, 0x435F, 0x4360, + 0x4361, 0x4362, 0x4363, 0x4364, 0x4365, 0x4366, 0x4367, 0x4368, 0x4369, 0x436A, 0x436B, 0x436C, 0x436D, 0x436E, 0x436F, + 0x4370, 0x4371, 0x4372, 0x4373, 0x4374, 0x4375, 0x4376, 0x4377, 0x4378, 0x4379, 0x437A, 0x437B, 0x437C, 0x437D, 0x437E, + 0x437F, 0x4380, 0x4381, 0x4382, 0x4383, 0x4384, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x477, 0x478, + 0x479, 0x47A, 0x47B, 0xFD1, 0xFD2, 0xFD3, 0xFD4, 0x4385, 0x4386, 0x1C05, 0x1C06, 0x47C, 0xFD5, 0xEB46FBC2, 0xEB47FBC2, + 0xEB48FBC2, 0xEB49FBC2, 0xEB4AFBC2, 0xEB4BFBC2, 0xEB4CFBC2, 0xEB4DFBC2, 0xEB4EFBC2, 0xEB4FFBC2, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, + 0x1C44, 0x1C45, 0x1C46, 0xEB5AFBC2, 0x1BA0, 0x1BA1, 0x1BA2, 0x1BA3, 0x1BA4, 0x1BA5, 0x1BA6, 0xEB62FBC2, 0x4387, 0x4388, 0x4389, + 0x438A, 0x438B, 0x438C, 0x438D, 0x438E, 0x438F, 0x4390, 0x4391, 0x4392, 0x4393, 0x4394, 0x4395, 0x4396, 0x4397, 0x4398, + 0x4399, 0x439A, 0x439B, 0xEB78FBC2, 0xEB79FBC2, 0xEB7AFBC2, 0xEB7BFBC2, 0xEB7CFBC2, 0x439C, 0x439D, 0x439E, 0x439F, 0x43A0, 0x43A1, 0x43A2, + 0x43A3, 0x43A4, 0x43A5, 0x43A6, 0x43A7, 0x43A8, 0x43A9, 0x43AA, 0x43AB, 0x43AC, 0x43AD, 0x43AE, 0xEB90FBC2, 0xEB91FBC2, 0xEB92FBC2, + 0xEB93FBC2, 0xEB94FBC2, 0xEB95FBC2, 0xEB96FBC2, 0xEB97FBC2, 0xEB98FBC2, 0xEB99FBC2, 0xEB9AFBC2, 0xEB9BFBC2, 0xEB9CFBC2, 0xEB9DFBC2, 0xEB9EFBC2, 0xEB9FFBC2, 0xEBA0FBC2, 0xEBA1FBC2, + 0xEBA2FBC2, 0xEBA3FBC2, 0xEBA4FBC2, 0xEBA5FBC2, 0xEBA6FBC2, 0xEBA7FBC2, 0xEBA8FBC2, 0xEBA9FBC2, 0xEBAAFBC2, 0xEBABFBC2, 0xEBACFBC2, 0xEBADFBC2, 0xEBAEFBC2, 0xEBAFFBC2, 0xEBB0FBC2, + 0xEBB1FBC2, 0xEBB2FBC2, 0xEBB3FBC2, 0xEBB4FBC2, 0xEBB5FBC2, 0xEBB6FBC2, 0xEBB7FBC2, 0xEBB8FBC2, 0xEBB9FBC2, 0xEBBAFBC2, 0xEBBBFBC2, 0xEBBCFBC2, 0xEBBDFBC2, 0xEBBEFBC2, 0xEBBFFBC2, + 0xEBC0FBC2, 0xEBC1FBC2, 0xEBC2FBC2, 0xEBC3FBC2, 0xEBC4FBC2, 0xEBC5FBC2, 0xEBC6FBC2, 0xEBC7FBC2, 0xEBC8FBC2, 0xEBC9FBC2, 0xEBCAFBC2, 0xEBCBFBC2, 0xEBCCFBC2, 0xEBCDFBC2, 0xEBCEFBC2, + 0xEBCFFBC2, 0xEBD0FBC2, 0xEBD1FBC2, 0xEBD2FBC2, 0xEBD3FBC2, 0xEBD4FBC2, 0xEBD5FBC2, 0xEBD6FBC2, 0xEBD7FBC2, 0xEBD8FBC2, 0xEBD9FBC2, 0xEBDAFBC2, 0xEBDBFBC2, 0xEBDCFBC2, 0xEBDDFBC2, + 0xEBDEFBC2, 0xEBDFFBC2, 0xEBE0FBC2, 0xEBE1FBC2, 0xEBE2FBC2, 0xEBE3FBC2, 0xEBE4FBC2, 0xEBE5FBC2, 0xEBE6FBC2, 0xEBE7FBC2, 0xEBE8FBC2, 0xEBE9FBC2, 0xEBEAFBC2, 0xEBEBFBC2, 0xEBECFBC2, + 0xEBEDFBC2, 0xEBEEFBC2, 0xEBEFFBC2, 0xEBF0FBC2, 0xEBF1FBC2, 0xEBF2FBC2, 0xEBF3FBC2, 0xEBF4FBC2, 0xEBF5FBC2, 0xEBF6FBC2, 0xEBF7FBC2, 0xEBF8FBC2, 0xEBF9FBC2, 0xEBFAFBC2, 0xEBFBFBC2, + 0xEBFCFBC2, 0xEBFDFBC2, 0xEBFEFBC2, 0xEBFFFBC2, 0xEC00FBC2, 0xEC01FBC2, 0xEC02FBC2, 0xEC03FBC2, 0xEC04FBC2, 0xEC05FBC2, 0xEC06FBC2, 0xEC07FBC2, 0xEC08FBC2, 0xEC09FBC2, 0xEC0AFBC2, + 0xEC0BFBC2, 0xEC0CFBC2, 0xEC0DFBC2, 0xEC0EFBC2, 0xEC0FFBC2, 0xEC10FBC2, 0xEC11FBC2, 0xEC12FBC2, 0xEC13FBC2, 0xEC14FBC2, 0xEC15FBC2, 0xEC16FBC2, 0xEC17FBC2, 0xEC18FBC2, 0xEC19FBC2, + 0xEC1AFBC2, 0xEC1BFBC2, 0xEC1CFBC2, 0xEC1DFBC2, 0xEC1EFBC2, 0xEC1FFBC2, 0xEC20FBC2, 0xEC21FBC2, 0xEC22FBC2, 0xEC23FBC2, 0xEC24FBC2, 0xEC25FBC2, 0xEC26FBC2, 0xEC27FBC2, 0xEC28FBC2, + 0xEC29FBC2, 0xEC2AFBC2, 0xEC2BFBC2, 0xEC2CFBC2, 0xEC2DFBC2, 0xEC2EFBC2, 0xEC2FFBC2, 0xEC30FBC2, 0xEC31FBC2, 0xEC32FBC2, 0xEC33FBC2, 0xEC34FBC2, 0xEC35FBC2, 0xEC36FBC2, 0xEC37FBC2, + 0xEC38FBC2, 0xEC39FBC2, 0xEC3AFBC2, 0xEC3BFBC2, 0xEC3CFBC2, 0xEC3DFBC2, 0xEC3EFBC2, 0xEC3FFBC2, 0xEC40FBC2, 0xEC41FBC2, 0xEC42FBC2, 0xEC43FBC2, 0xEC44FBC2, 0xEC45FBC2, 0xEC46FBC2, + 0xEC47FBC2, 0xEC48FBC2, 0xEC49FBC2, 0xEC4AFBC2, 0xEC4BFBC2, 0xEC4CFBC2, 0xEC4DFBC2, 0xEC4EFBC2, 0xEC4FFBC2, 0xEC50FBC2, 0xEC51FBC2, 0xEC52FBC2, 0xEC53FBC2, 0xEC54FBC2, 0xEC55FBC2, + 0xEC56FBC2, 0xEC57FBC2, 0xEC58FBC2, 0xEC59FBC2, 0xEC5AFBC2, 0xEC5BFBC2, 0xEC5CFBC2, 0xEC5DFBC2, 0xEC5EFBC2, 0xEC5FFBC2, 0xEC60FBC2, 0xEC61FBC2, 0xEC62FBC2, 0xEC63FBC2, 0xEC64FBC2, + 0xEC65FBC2, 0xEC66FBC2, 0xEC67FBC2, 0xEC68FBC2, 0xEC69FBC2, 0xEC6AFBC2, 0xEC6BFBC2, 0xEC6CFBC2, 0xEC6DFBC2, 0xEC6EFBC2, 0xEC6FFBC2, 0xEC70FBC2, 0xEC71FBC2, 0xEC72FBC2, 0xEC73FBC2, + 0xEC74FBC2, 0xEC75FBC2, 0xEC76FBC2, 0xEC77FBC2, 0xEC78FBC2, 0xEC79FBC2, 0xEC7AFBC2, 0xEC7BFBC2, 0xEC7CFBC2, 0xEC7DFBC2, 0xEC7EFBC2, 0xEC7FFBC2, 0xEC80FBC2, 0xEC81FBC2, 0xEC82FBC2, + 0xEC83FBC2, 0xEC84FBC2, 0xEC85FBC2, 0xEC86FBC2, 0xEC87FBC2, 0xEC88FBC2, 0xEC89FBC2, 0xEC8AFBC2, 0xEC8BFBC2, 0xEC8CFBC2, 0xEC8DFBC2, 0xEC8EFBC2, 0xEC8FFBC2, 0xEC90FBC2, 0xEC91FBC2, + 0xEC92FBC2, 0xEC93FBC2, 0xEC94FBC2, 0xEC95FBC2, 0xEC96FBC2, 0xEC97FBC2, 0xEC98FBC2, 0xEC99FBC2, 0xEC9AFBC2, 0xEC9BFBC2, 0xEC9CFBC2, 0xEC9DFBC2, 0xEC9EFBC2, 0xEC9FFBC2, 0xECA0FBC2, + 0xECA1FBC2, 0xECA2FBC2, 0xECA3FBC2, 0xECA4FBC2, 0xECA5FBC2, 0xECA6FBC2, 0xECA7FBC2, 0xECA8FBC2, 0xECA9FBC2, 0xECAAFBC2, 0xECABFBC2, 0xECACFBC2, 0xECADFBC2, 0xECAEFBC2, 0xECAFFBC2, + 0xECB0FBC2, 0xECB1FBC2, 0xECB2FBC2, 0xECB3FBC2, 0xECB4FBC2, 0xECB5FBC2, 0xECB6FBC2, 0xECB7FBC2, 0xECB8FBC2, 0xECB9FBC2, 0xECBAFBC2, 0xECBBFBC2, 0xECBCFBC2, 0xECBDFBC2, 0xECBEFBC2, + 0xECBFFBC2, 0xECC0FBC2, 0xECC1FBC2, 0xECC2FBC2, 0xECC3FBC2, 0xECC4FBC2, 0xECC5FBC2, 0xECC6FBC2, 0xECC7FBC2, 0xECC8FBC2, 0xECC9FBC2, 0xECCAFBC2, 0xECCBFBC2, 0xECCCFBC2, 0xECCDFBC2, + 0xECCEFBC2, 0xECCFFBC2, 0xECD0FBC2, 0xECD1FBC2, 0xECD2FBC2, 0xECD3FBC2, 0xECD4FBC2, 0xECD5FBC2, 0xECD6FBC2, 0xECD7FBC2, 0xECD8FBC2, 0xECD9FBC2, 0xECDAFBC2, 0xECDBFBC2, 0xECDCFBC2, + 0xECDDFBC2, 0xECDEFBC2, 0xECDFFBC2, 0xECE0FBC2, 0xECE1FBC2, 0xECE2FBC2, 0xECE3FBC2, 0xECE4FBC2, 0xECE5FBC2, 0xECE6FBC2, 0xECE7FBC2, 0xECE8FBC2, 0xECE9FBC2, 0xECEAFBC2, 0xECEBFBC2, + 0xECECFBC2, 0xECEDFBC2, 0xECEEFBC2, 0xECEFFBC2, 0xECF0FBC2, 0xECF1FBC2, 0xECF2FBC2, 0xECF3FBC2, 0xECF4FBC2, 0xECF5FBC2, 0xECF6FBC2, 0xECF7FBC2, 0xECF8FBC2, 0xECF9FBC2, 0xECFAFBC2, + 0xECFBFBC2, 0xECFCFBC2, 0xECFDFBC2, 0xECFEFBC2, 0xECFFFBC2, 0xED00FBC2, 0xED01FBC2, 0xED02FBC2, 0xED03FBC2, 0xED04FBC2, 0xED05FBC2, 0xED06FBC2, 0xED07FBC2, 0xED08FBC2, 0xED09FBC2, + 0xED0AFBC2, 0xED0BFBC2, 0xED0CFBC2, 0xED0DFBC2, 0xED0EFBC2, 0xED0FFBC2, 0xED10FBC2, 0xED11FBC2, 0xED12FBC2, 0xED13FBC2, 0xED14FBC2, 0xED15FBC2, 0xED16FBC2, 0xED17FBC2, 0xED18FBC2, + 0xED19FBC2, 0xED1AFBC2, 0xED1BFBC2, 0xED1CFBC2, 0xED1DFBC2, 0xED1EFBC2, 0xED1FFBC2, 0xED20FBC2, 0xED21FBC2, 0xED22FBC2, 0xED23FBC2, 0xED24FBC2, 0xED25FBC2, 0xED26FBC2, 0xED27FBC2, + 0xED28FBC2, 0xED29FBC2, 0xED2AFBC2, 0xED2BFBC2, 0xED2CFBC2, 0xED2DFBC2, 0xED2EFBC2, 0xED2FFBC2, 0xED30FBC2, 0xED31FBC2, 0xED32FBC2, 0xED33FBC2, 0xED34FBC2, 0xED35FBC2, 0xED36FBC2, + 0xED37FBC2, 0xED38FBC2, 0xED39FBC2, 0xED3AFBC2, 0xED3BFBC2, 0xED3CFBC2, 0xED3DFBC2, 0xED3EFBC2, 0xED3FFBC2, 0xED40FBC2, 0xED41FBC2, 0xED42FBC2, 0xED43FBC2, 0xED44FBC2, 0xED45FBC2, + 0xED46FBC2, 0xED47FBC2, 0xED48FBC2, 0xED49FBC2, 0xED4AFBC2, 0xED4BFBC2, 0xED4CFBC2, 0xED4DFBC2, 0xED4EFBC2, 0xED4FFBC2, 0xED50FBC2, 0xED51FBC2, 0xED52FBC2, 0xED53FBC2, 0xED54FBC2, + 0xED55FBC2, 0xED56FBC2, 0xED57FBC2, 0xED58FBC2, 0xED59FBC2, 0xED5AFBC2, 0xED5BFBC2, 0xED5CFBC2, 0xED5DFBC2, 0xED5EFBC2, 0xED5FFBC2, 0xED60FBC2, 0xED61FBC2, 0xED62FBC2, 0xED63FBC2, + 0xED64FBC2, 0xED65FBC2, 0xED66FBC2, 0xED67FBC2, 0xED68FBC2, 0xED69FBC2, 0xED6AFBC2, 0xED6BFBC2, 0xED6CFBC2, 0xED6DFBC2, 0xED6EFBC2, 0xED6FFBC2, 0xED70FBC2, 0xED71FBC2, 0xED72FBC2, + 0xED73FBC2, 0xED74FBC2, 0xED75FBC2, 0xED76FBC2, 0xED77FBC2, 0xED78FBC2, 0xED79FBC2, 0xED7AFBC2, 0xED7BFBC2, 0xED7CFBC2, 0xED7DFBC2, 0xED7EFBC2, 0xED7FFBC2, 0xED80FBC2, 0xED81FBC2, + 0xED82FBC2, 0xED83FBC2, 0xED84FBC2, 0xED85FBC2, 0xED86FBC2, 0xED87FBC2, 0xED88FBC2, 0xED89FBC2, 0xED8AFBC2, 0xED8BFBC2, 0xED8CFBC2, 0xED8DFBC2, 0xED8EFBC2, 0xED8FFBC2, 0xED90FBC2, + 0xED91FBC2, 0xED92FBC2, 0xED93FBC2, 0xED94FBC2, 0xED95FBC2, 0xED96FBC2, 0xED97FBC2, 0xED98FBC2, 0xED99FBC2, 0xED9AFBC2, 0xED9BFBC2, 0xED9CFBC2, 0xED9DFBC2, 0xED9EFBC2, 0xED9FFBC2, + 0xEDA0FBC2, 0xEDA1FBC2, 0xEDA2FBC2, 0xEDA3FBC2, 0xEDA4FBC2, 0xEDA5FBC2, 0xEDA6FBC2, 0xEDA7FBC2, 0xEDA8FBC2, 0xEDA9FBC2, 0xEDAAFBC2, 0xEDABFBC2, 0xEDACFBC2, 0xEDADFBC2, 0xEDAEFBC2, + 0xEDAFFBC2, 0xEDB0FBC2, 0xEDB1FBC2, 0xEDB2FBC2, 0xEDB3FBC2, 0xEDB4FBC2, 0xEDB5FBC2, 0xEDB6FBC2, 0xEDB7FBC2, 0xEDB8FBC2, 0xEDB9FBC2, 0xEDBAFBC2, 0xEDBBFBC2, 0xEDBCFBC2, 0xEDBDFBC2, + 0xEDBEFBC2, 0xEDBFFBC2, 0xEDC0FBC2, 0xEDC1FBC2, 0xEDC2FBC2, 0xEDC3FBC2, 0xEDC4FBC2, 0xEDC5FBC2, 0xEDC6FBC2, 0xEDC7FBC2, 0xEDC8FBC2, 0xEDC9FBC2, 0xEDCAFBC2, 0xEDCBFBC2, 0xEDCCFBC2, + 0xEDCDFBC2, 0xEDCEFBC2, 0xEDCFFBC2, 0xEDD0FBC2, 0xEDD1FBC2, 0xEDD2FBC2, 0xEDD3FBC2, 0xEDD4FBC2, 0xEDD5FBC2, 0xEDD6FBC2, 0xEDD7FBC2, 0xEDD8FBC2, 0xEDD9FBC2, 0xEDDAFBC2, 0xEDDBFBC2, + 0xEDDCFBC2, 0xEDDDFBC2, 0xEDDEFBC2, 0xEDDFFBC2, 0xEDE0FBC2, 0xEDE1FBC2, 0xEDE2FBC2, 0xEDE3FBC2, 0xEDE4FBC2, 0xEDE5FBC2, 0xEDE6FBC2, 0xEDE7FBC2, 0xEDE8FBC2, 0xEDE9FBC2, 0xEDEAFBC2, + 0xEDEBFBC2, 0xEDECFBC2, 0xEDEDFBC2, 0xEDEEFBC2, 0xEDEFFBC2, 0xEDF0FBC2, 0xEDF1FBC2, 0xEDF2FBC2, 0xEDF3FBC2, 0xEDF4FBC2, 0xEDF5FBC2, 0xEDF6FBC2, 0xEDF7FBC2, 0xEDF8FBC2, 0xEDF9FBC2, + 0xEDFAFBC2, 0xEDFBFBC2, 0xEDFCFBC2, 0xEDFDFBC2, 0xEDFEFBC2, 0xEDFFFBC2, 0xEE00FBC2, 0xEE01FBC2, 0xEE02FBC2, 0xEE03FBC2, 0xEE04FBC2, 0xEE05FBC2, 0xEE06FBC2, 0xEE07FBC2, 0xEE08FBC2, + 0xEE09FBC2, 0xEE0AFBC2, 0xEE0BFBC2, 0xEE0CFBC2, 0xEE0DFBC2, 0xEE0EFBC2, 0xEE0FFBC2, 0xEE10FBC2, 0xEE11FBC2, 0xEE12FBC2, 0xEE13FBC2, 0xEE14FBC2, 0xEE15FBC2, 0xEE16FBC2, 0xEE17FBC2, + 0xEE18FBC2, 0xEE19FBC2, 0xEE1AFBC2, 0xEE1BFBC2, 0xEE1CFBC2, 0xEE1DFBC2, 0xEE1EFBC2, 0xEE1FFBC2, 0xEE20FBC2, 0xEE21FBC2, 0xEE22FBC2, 0xEE23FBC2, 0xEE24FBC2, 0xEE25FBC2, 0xEE26FBC2, + 0xEE27FBC2, 0xEE28FBC2, 0xEE29FBC2, 0xEE2AFBC2, 0xEE2BFBC2, 0xEE2CFBC2, 0xEE2DFBC2, 0xEE2EFBC2, 0xEE2FFBC2, 0xEE30FBC2, 0xEE31FBC2, 0xEE32FBC2, 0xEE33FBC2, 0xEE34FBC2, 0xEE35FBC2, + 0xEE36FBC2, 0xEE37FBC2, 0xEE38FBC2, 0xEE39FBC2, 0xEE3AFBC2, 0xEE3BFBC2, 0xEE3CFBC2, 0xEE3DFBC2, 0xEE3EFBC2, 0xEE3FFBC2, 0xEE40FBC2, 0xEE41FBC2, 0xEE42FBC2, 0xEE43FBC2, 0xEE44FBC2, + 0xEE45FBC2, 0xEE46FBC2, 0xEE47FBC2, 0xEE48FBC2, 0xEE49FBC2, 0xEE4AFBC2, 0xEE4BFBC2, 0xEE4CFBC2, 0xEE4DFBC2, 0xEE4EFBC2, 0xEE4FFBC2, 0xEE50FBC2, 0xEE51FBC2, 0xEE52FBC2, 0xEE53FBC2, + 0xEE54FBC2, 0xEE55FBC2, 0xEE56FBC2, 0xEE57FBC2, 0xEE58FBC2, 0xEE59FBC2, 0xEE5AFBC2, 0xEE5BFBC2, 0xEE5CFBC2, 0xEE5DFBC2, 0xEE5EFBC2, 0xEE5FFBC2, 0xEE60FBC2, 0xEE61FBC2, 0xEE62FBC2, + 0xEE63FBC2, 0xEE64FBC2, 0xEE65FBC2, 0xEE66FBC2, 0xEE67FBC2, 0xEE68FBC2, 0xEE69FBC2, 0xEE6AFBC2, 0xEE6BFBC2, 0xEE6CFBC2, 0xEE6DFBC2, 0xEE6EFBC2, 0xEE6FFBC2, 0xEE70FBC2, 0xEE71FBC2, + 0xEE72FBC2, 0xEE73FBC2, 0xEE74FBC2, 0xEE75FBC2, 0xEE76FBC2, 0xEE77FBC2, 0xEE78FBC2, 0xEE79FBC2, 0xEE7AFBC2, 0xEE7BFBC2, 0xEE7CFBC2, 0xEE7DFBC2, 0xEE7EFBC2, 0xEE7FFBC2, 0xEE80FBC2, + 0xEE81FBC2, 0xEE82FBC2, 0xEE83FBC2, 0xEE84FBC2, 0xEE85FBC2, 0xEE86FBC2, 0xEE87FBC2, 0xEE88FBC2, 0xEE89FBC2, 0xEE8AFBC2, 0xEE8BFBC2, 0xEE8CFBC2, 0xEE8DFBC2, 0xEE8EFBC2, 0xEE8FFBC2, + 0xEE90FBC2, 0xEE91FBC2, 0xEE92FBC2, 0xEE93FBC2, 0xEE94FBC2, 0xEE95FBC2, 0xEE96FBC2, 0xEE97FBC2, 0xEE98FBC2, 0xEE99FBC2, 0xEE9AFBC2, 0xEE9BFBC2, 0xEE9CFBC2, 0xEE9DFBC2, 0xEE9EFBC2, + 0xEE9FFBC2, 0xEEA0FBC2, 0xEEA1FBC2, 0xEEA2FBC2, 0xEEA3FBC2, 0xEEA4FBC2, 0xEEA5FBC2, 0xEEA6FBC2, 0xEEA7FBC2, 0xEEA8FBC2, 0xEEA9FBC2, 0xEEAAFBC2, 0xEEABFBC2, 0xEEACFBC2, 0xEEADFBC2, + 0xEEAEFBC2, 0xEEAFFBC2, 0xEEB0FBC2, 0xEEB1FBC2, 0xEEB2FBC2, 0xEEB3FBC2, 0xEEB4FBC2, 0xEEB5FBC2, 0xEEB6FBC2, 0xEEB7FBC2, 0xEEB8FBC2, 0xEEB9FBC2, 0xEEBAFBC2, 0xEEBBFBC2, 0xEEBCFBC2, + 0xEEBDFBC2, 0xEEBEFBC2, 0xEEBFFBC2, 0xEEC0FBC2, 0xEEC1FBC2, 0xEEC2FBC2, 0xEEC3FBC2, 0xEEC4FBC2, 0xEEC5FBC2, 0xEEC6FBC2, 0xEEC7FBC2, 0xEEC8FBC2, 0xEEC9FBC2, 0xEECAFBC2, 0xEECBFBC2, + 0xEECCFBC2, 0xEECDFBC2, 0xEECEFBC2, 0xEECFFBC2, 0xEED0FBC2, 0xEED1FBC2, 0xEED2FBC2, 0xEED3FBC2, 0xEED4FBC2, 0xEED5FBC2, 0xEED6FBC2, 0xEED7FBC2, 0xEED8FBC2, 0xEED9FBC2, 0xEEDAFBC2, + 0xEEDBFBC2, 0xEEDCFBC2, 0xEEDDFBC2, 0xEEDEFBC2, 0xEEDFFBC2, 0xEEE0FBC2, 0xEEE1FBC2, 0xEEE2FBC2, 0xEEE3FBC2, 0xEEE4FBC2, 0xEEE5FBC2, 0xEEE6FBC2, 0xEEE7FBC2, 0xEEE8FBC2, 0xEEE9FBC2, + 0xEEEAFBC2, 0xEEEBFBC2, 0xEEECFBC2, 0xEEEDFBC2, 0xEEEEFBC2, 0xEEEFFBC2, 0xEEF0FBC2, 0xEEF1FBC2, 0xEEF2FBC2, 0xEEF3FBC2, 0xEEF4FBC2, 0xEEF5FBC2, 0xEEF6FBC2, 0xEEF7FBC2, 0xEEF8FBC2, + 0xEEF9FBC2, 0xEEFAFBC2, 0xEEFBFBC2, 0xEEFCFBC2, 0xEEFDFBC2, 0xEEFEFBC2, 0xEEFFFBC2, 0x427A, 0x427B, 0x427C, 0x427D, 0x427E, 0x427F, 0x427E, 0x4280, + 0x4281, 0x4282, 0x4283, 0x4284, 0x4285, 0x4286, 0x4287, 0x4288, 0x4289, 0x428A, 0x428B, 0x4289, 0x428C, 0x428D, 0x428E, + 0x428F, 0x4290, 0x4291, 0x4292, 0x4293, 0x4294, 0x4295, 0x4296, 0x4297, 0x4298, 0x4299, 0x429A, 0x429B, 0x429C, 0x429B, + 0x429D, 0x429E, 0x429F, 0x42A0, 0x42A1, 0x42A2, 0x42A3, 0x42A4, 0x42A5, 0x42A6, 0x42A7, 0x42A8, 0x42A9, 0x42AA, 0x42AB, + 0x42AC, 0x42AD, 0x42AE, 0x42AF, 0x42B0, 0x42B1, 0x42B2, 0x42B3, 0x42B4, 0x42B5, 0x42B4, 0x42B6, 0x42B7, 0x42B8, 0x42B9, + 0x42BA, 0xEF45FBC2, 0xEF46FBC2, 0xEF47FBC2, 0xEF48FBC2, 0xEF49FBC2, 0xEF4AFBC2, 0xEF4BFBC2, 0xEF4CFBC2, 0xEF4DFBC2, 0xEF4EFBC2, 0xEF4FFBC2, 0x42BB, 0x42BC, 0x42BD, + 0x42BE, 0x42BF, 0x42C0, 0x42C1, 0x42C2, 0x42C3, 0x42C4, 0x42C5, 0x42C6, 0x42C7, 0x42C8, 0x42C9, 0x42CA, 0x42CB, 0x42CC, + 0x42CD, 0x42CE, 0x42CF, 0x42D0, 0x42D1, 0x42D2, 0x42D3, 0x42D4, 0x42D5, 0x42D6, 0x42D7, 0x42D8, 0x42D9, 0x42DA, 0x42DB, + 0x42DC, 0x42DD, 0x42DE, 0x42DF, 0x42E0, 0x42E1, 0x42E2, 0x42E3, 0x42E4, 0x42E5, 0x42E6, 0x42E7, 0x42E8, 0x42E9, 0xEF7FFBC2, + 0xEF80FBC2, 0xEF81FBC2, 0xEF82FBC2, 0xEF83FBC2, 0xEF84FBC2, 0xEF85FBC2, 0xEF86FBC2, 0xEF87FBC2, 0xEF88FBC2, 0xEF89FBC2, 0xEF8AFBC2, 0xEF8BFBC2, 0xEF8CFBC2, 0xEF8DFBC2, 0xEF8EFBC2, + 0x42EA, 0x42EB, 0x42EC, 0x42ED, 0x42EE, 0x42EF, 0x42F0, 0x42F1, 0x42F2, 0x42F3, 0x42F4, 0x42F5, 0x42F6, 0x42F7, 0x42F8, + 0x42F9, 0x42FA, 0xEFA0FBC2, 0xEFA1FBC2, 0xEFA2FBC2, 0xEFA3FBC2, 0xEFA4FBC2, 0xEFA5FBC2, 0xEFA6FBC2, 0xEFA7FBC2, 0xEFA8FBC2, 0xEFA9FBC2, 0xEFAAFBC2, 0xEFABFBC2, 0xEFACFBC2, + 0xEFADFBC2, 0xEFAEFBC2, 0xEFAFFBC2, 0xEFB0FBC2, 0xEFB1FBC2, 0xEFB2FBC2, 0xEFB3FBC2, 0xEFB4FBC2, 0xEFB5FBC2, 0xEFB6FBC2, 0xEFB7FBC2, 0xEFB8FBC2, 0xEFB9FBC2, 0xEFBAFBC2, 0xEFBBFBC2, + 0xEFBCFBC2, 0xEFBDFBC2, 0xEFBEFBC2, 0xEFBFFBC2, 0xEFC0FBC2, 0xEFC1FBC2, 0xEFC2FBC2, 0xEFC3FBC2, 0xEFC4FBC2, 0xEFC5FBC2, 0xEFC6FBC2, 0xEFC7FBC2, 0xEFC8FBC2, 0xEFC9FBC2, 0xEFCAFBC2, + 0xEFCBFBC2, 0xEFCCFBC2, 0xEFCDFBC2, 0xEFCEFBC2, 0xEFCFFBC2, 0xEFD0FBC2, 0xEFD1FBC2, 0xEFD2FBC2, 0xEFD3FBC2, 0xEFD4FBC2, 0xEFD5FBC2, 0xEFD6FBC2, 0xEFD7FBC2, 0xEFD8FBC2, 0xEFD9FBC2, + 0xEFDAFBC2, 0xEFDBFBC2, 0xEFDCFBC2, 0xEFDDFBC2, 0xEFDEFBC2, 0xEFDFFBC2, 0x1C09, 0xEFE1FBC2, 0xEFE2FBC2, 0xEFE3FBC2, 0xEFE4FBC2, 0xEFE5FBC2, 0xEFE6FBC2, 0xEFE7FBC2, 0xEFE8FBC2, + 0xEFE9FBC2, 0xEFEAFBC2, 0xEFEBFBC2, 0xEFECFBC2, 0xEFEDFBC2, 0xEFEEFBC2, 0xEFEFFBC2, 0xEFF0FBC2, 0xEFF1FBC2, 0xEFF2FBC2, 0xEFF3FBC2, 0xEFF4FBC2, 0xEFF5FBC2, 0xEFF6FBC2, 0xEFF7FBC2, + 0xEFF8FBC2, 0xEFF9FBC2, 0xEFFAFBC2, 0xEFFBFBC2, 0xEFFCFBC2, 0xEFFDFBC2, 0xEFFEFBC2, 0xEFFFFBC2, 0x8000FB00, 0x8001FB00, 0x8002FB00, 0x8003FB00, 0x8004FB00, 0x8005FB00, 0x8006FB00, + 0x8007FB00, 0x8008FB00, 0x8009FB00, 0x800AFB00, 0x800BFB00, 0x800CFB00, 0x800DFB00, 0x800EFB00, 0x800FFB00, 0x8010FB00, 0x8011FB00, 0x8012FB00, 0x8013FB00, 0x8014FB00, 0x8015FB00, + 0x8016FB00, 0x8017FB00, 0x8018FB00, 0x8019FB00, 0x801AFB00, 0x801BFB00, 0x801CFB00, 0x801DFB00, 0x801EFB00, 0x801FFB00, 0x8020FB00, 0x8021FB00, 0x8022FB00, 0x8023FB00, 0x8024FB00, + 0x8025FB00, 0x8026FB00, 0x8027FB00, 0x8028FB00, 0x8029FB00, 0x802AFB00, 0x802BFB00, 0x802CFB00, 0x802DFB00, 0x802EFB00, 0x802FFB00, 0x8030FB00, 0x8031FB00, 0x8032FB00, 0x8033FB00, + 0x8034FB00, 0x8035FB00, 0x8036FB00, 0x8037FB00, 0x8038FB00, 0x8039FB00, 0x803AFB00, 0x803BFB00, 0x803CFB00, 0x803DFB00, 0x803EFB00, 0x803FFB00, 0x8040FB00, 0x8041FB00, 0x8042FB00, + 0x8043FB00, 0x8044FB00, 0x8045FB00, 0x8046FB00, 0x8047FB00, 0x8048FB00, 0x8049FB00, 0x804AFB00, 0x804BFB00, 0x804CFB00, 0x804DFB00, 0x804EFB00, 0x804FFB00, 0x8050FB00, 0x8051FB00, + 0x8052FB00, 0x8053FB00, 0x8054FB00, 0x8055FB00, 0x8056FB00, 0x8057FB00, 0x8058FB00, 0x8059FB00, 0x805AFB00, 0x805BFB00, 0x805CFB00, 0x805DFB00, 0x805EFB00, 0x805FFB00, 0x8060FB00, + 0x8061FB00, 0x8062FB00, 0x8063FB00, 0x8064FB00, 0x8065FB00, 0x8066FB00, 0x8067FB00, 0x8068FB00, 0x8069FB00, 0x806AFB00, 0x806BFB00, 0x806CFB00, 0x806DFB00, 0x806EFB00, 0x806FFB00, + 0x8070FB00, 0x8071FB00, 0x8072FB00, 0x8073FB00, 0x8074FB00, 0x8075FB00, 0x8076FB00, 0x8077FB00, 0x8078FB00, 0x8079FB00, 0x807AFB00, 0x807BFB00, 0x807CFB00, 0x807DFB00, 0x807EFB00, + 0x807FFB00, 0x8080FB00, 0x8081FB00, 0x8082FB00, 0x8083FB00, 0x8084FB00, 0x8085FB00, 0x8086FB00, 0x8087FB00, 0x8088FB00, 0x8089FB00, 0x808AFB00, 0x808BFB00, 0x808CFB00, 0x808DFB00, + 0x808EFB00, 0x808FFB00, 0x8090FB00, 0x8091FB00, 0x8092FB00, 0x8093FB00, 0x8094FB00, 0x8095FB00, 0x8096FB00, 0x8097FB00, 0x8098FB00, 0x8099FB00, 0x809AFB00, 0x809BFB00, 0x809CFB00, + 0x809DFB00, 0x809EFB00, 0x809FFB00, 0x80A0FB00, 0x80A1FB00, 0x80A2FB00, 0x80A3FB00, 0x80A4FB00, 0x80A5FB00, 0x80A6FB00, 0x80A7FB00, 0x80A8FB00, 0x80A9FB00, 0x80AAFB00, 0x80ABFB00, + 0x80ACFB00, 0x80ADFB00, 0x80AEFB00, 0x80AFFB00, 0x80B0FB00, 0x80B1FB00, 0x80B2FB00, 0x80B3FB00, 0x80B4FB00, 0x80B5FB00, 0x80B6FB00, 0x80B7FB00, 0x80B8FB00, 0x80B9FB00, 0x80BAFB00, + 0x80BBFB00, 0x80BCFB00, 0x80BDFB00, 0x80BEFB00, 0x80BFFB00, 0x80C0FB00, 0x80C1FB00, 0x80C2FB00, 0x80C3FB00, 0x80C4FB00, 0x80C5FB00, 0x80C6FB00, 0x80C7FB00, 0x80C8FB00, 0x80C9FB00, + 0x80CAFB00, 0x80CBFB00, 0x80CCFB00, 0x80CDFB00, 0x80CEFB00, 0x80CFFB00, 0x80D0FB00, 0x80D1FB00, 0x80D2FB00, 0x80D3FB00, 0x80D4FB00, 0x80D5FB00, 0x80D6FB00, 0x80D7FB00, 0x80D8FB00, + 0x80D9FB00, 0x80DAFB00, 0x80DBFB00, 0x80DCFB00, 0x80DDFB00, 0x80DEFB00, 0x80DFFB00, 0x80E0FB00, 0x80E1FB00, 0x80E2FB00, 0x80E3FB00, 0x80E4FB00, 0x80E5FB00, 0x80E6FB00, 0x80E7FB00, + 0x80E8FB00, 0x80E9FB00, 0x80EAFB00, 0x80EBFB00, 0x80ECFB00, 0x80EDFB00, 0x80EEFB00, 0x80EFFB00, 0x80F0FB00, 0x80F1FB00, 0x80F2FB00, 0x80F3FB00, 0x80F4FB00, 0x80F5FB00, 0x80F6FB00, + 0x80F7FB00, 0x80F8FB00, 0x80F9FB00, 0x80FAFB00, 0x80FBFB00, 0x80FCFB00, 0x80FDFB00, 0x80FEFB00, 0x80FFFB00, 0x8100FB00, 0x8101FB00, 0x8102FB00, 0x8103FB00, 0x8104FB00, 0x8105FB00, + 0x8106FB00, 0x8107FB00, 0x8108FB00, 0x8109FB00, 0x810AFB00, 0x810BFB00, 0x810CFB00, 0x810DFB00, 0x810EFB00, 0x810FFB00, 0x8110FB00, 0x8111FB00, 0x8112FB00, 0x8113FB00, 0x8114FB00, + 0x8115FB00, 0x8116FB00, 0x8117FB00, 0x8118FB00, 0x8119FB00, 0x811AFB00, 0x811BFB00, 0x811CFB00, 0x811DFB00, 0x811EFB00, 0x811FFB00, 0x8120FB00, 0x8121FB00, 0x8122FB00, 0x8123FB00, + 0x8124FB00, 0x8125FB00, 0x8126FB00, 0x8127FB00, 0x8128FB00, 0x8129FB00, 0x812AFB00, 0x812BFB00, 0x812CFB00, 0x812DFB00, 0x812EFB00, 0x812FFB00, 0x8130FB00, 0x8131FB00, 0x8132FB00, + 0x8133FB00, 0x8134FB00, 0x8135FB00, 0x8136FB00, 0x8137FB00, 0x8138FB00, 0x8139FB00, 0x813AFB00, 0x813BFB00, 0x813CFB00, 0x813DFB00, 0x813EFB00, 0x813FFB00, 0x8140FB00, 0x8141FB00, + 0x8142FB00, 0x8143FB00, 0x8144FB00, 0x8145FB00, 0x8146FB00, 0x8147FB00, 0x8148FB00, 0x8149FB00, 0x814AFB00, 0x814BFB00, 0x814CFB00, 0x814DFB00, 0x814EFB00, 0x814FFB00, 0x8150FB00, + 0x8151FB00, 0x8152FB00, 0x8153FB00, 0x8154FB00, 0x8155FB00, 0x8156FB00, 0x8157FB00, 0x8158FB00, 0x8159FB00, 0x815AFB00, 0x815BFB00, 0x815CFB00, 0x815DFB00, 0x815EFB00, 0x815FFB00, + 0x8160FB00, 0x8161FB00, 0x8162FB00, 0x8163FB00, 0x8164FB00, 0x8165FB00, 0x8166FB00, 0x8167FB00, 0x8168FB00, 0x8169FB00, 0x816AFB00, 0x816BFB00, 0x816CFB00, 0x816DFB00, 0x816EFB00, + 0x816FFB00, 0x8170FB00, 0x8171FB00, 0x8172FB00, 0x8173FB00, 0x8174FB00, 0x8175FB00, 0x8176FB00, 0x8177FB00, 0x8178FB00, 0x8179FB00, 0x817AFB00, 0x817BFB00, 0x817CFB00, 0x817DFB00, + 0x817EFB00, 0x817FFB00, 0x8180FB00, 0x8181FB00, 0x8182FB00, 0x8183FB00, 0x8184FB00, 0x8185FB00, 0x8186FB00, 0x8187FB00, 0x8188FB00, 0x8189FB00, 0x818AFB00, 0x818BFB00, 0x818CFB00, + 0x818DFB00, 0x818EFB00, 0x818FFB00, 0x8190FB00, 0x8191FB00, 0x8192FB00, 0x8193FB00, 0x8194FB00, 0x8195FB00, 0x8196FB00, 0x8197FB00, 0x8198FB00, 0x8199FB00, 0x819AFB00, 0x819BFB00, + 0x819CFB00, 0x819DFB00, 0x819EFB00, 0x819FFB00, 0x81A0FB00, 0x81A1FB00, 0x81A2FB00, 0x81A3FB00, 0x81A4FB00, 0x81A5FB00, 0x81A6FB00, 0x81A7FB00, 0x81A8FB00, 0x81A9FB00, 0x81AAFB00, + 0x81ABFB00, 0x81ACFB00, 0x81ADFB00, 0x81AEFB00, 0x81AFFB00, 0x81B0FB00, 0x81B1FB00, 0x81B2FB00, 0x81B3FB00, 0x81B4FB00, 0x81B5FB00, 0x81B6FB00, 0x81B7FB00, 0x81B8FB00, 0x81B9FB00, + 0x81BAFB00, 0x81BBFB00, 0x81BCFB00, 0x81BDFB00, 0x81BEFB00, 0x81BFFB00, 0x81C0FB00, 0x81C1FB00, 0x81C2FB00, 0x81C3FB00, 0x81C4FB00, 0x81C5FB00, 0x81C6FB00, 0x81C7FB00, 0x81C8FB00, + 0x81C9FB00, 0x81CAFB00, 0x81CBFB00, 0x81CCFB00, 0x81CDFB00, 0x81CEFB00, 0x81CFFB00, 0x81D0FB00, 0x81D1FB00, 0x81D2FB00, 0x81D3FB00, 0x81D4FB00, 0x81D5FB00, 0x81D6FB00, 0x81D7FB00, + 0x81D8FB00, 0x81D9FB00, 0x81DAFB00, 0x81DBFB00, 0x81DCFB00, 0x81DDFB00, 0x81DEFB00, 0x81DFFB00, 0x81E0FB00, 0x81E1FB00, 0x81E2FB00, 0x81E3FB00, 0x81E4FB00, 0x81E5FB00, 0x81E6FB00, + 0x81E7FB00, 0x81E8FB00, 0x81E9FB00, 0x81EAFB00, 0x81EBFB00, 0x81ECFB00, 0x81EDFB00, 0x81EEFB00, 0x81EFFB00, 0x81F0FB00, 0x81F1FB00, 0x81F2FB00, 0x81F3FB00, 0x81F4FB00, 0x81F5FB00, + 0x81F6FB00, 0x81F7FB00, 0x81F8FB00, 0x81F9FB00, 0x81FAFB00, 0x81FBFB00, 0x81FCFB00, 0x81FDFB00, 0x81FEFB00, 0x81FFFB00, 0x8200FB00, 0x8201FB00, 0x8202FB00, 0x8203FB00, 0x8204FB00, + 0x8205FB00, 0x8206FB00, 0x8207FB00, 0x8208FB00, 0x8209FB00, 0x820AFB00, 0x820BFB00, 0x820CFB00, 0x820DFB00, 0x820EFB00, 0x820FFB00, 0x8210FB00, 0x8211FB00, 0x8212FB00, 0x8213FB00, + 0x8214FB00, 0x8215FB00, 0x8216FB00, 0x8217FB00, 0x8218FB00, 0x8219FB00, 0x821AFB00, 0x821BFB00, 0x821CFB00, 0x821DFB00, 0x821EFB00, 0x821FFB00, 0x8220FB00, 0x8221FB00, 0x8222FB00, + 0x8223FB00, 0x8224FB00, 0x8225FB00, 0x8226FB00, 0x8227FB00, 0x8228FB00, 0x8229FB00, 0x822AFB00, 0x822BFB00, 0x822CFB00, 0x822DFB00, 0x822EFB00, 0x822FFB00, 0x8230FB00, 0x8231FB00, + 0x8232FB00, 0x8233FB00, 0x8234FB00, 0x8235FB00, 0x8236FB00, 0x8237FB00, 0x8238FB00, 0x8239FB00, 0x823AFB00, 0x823BFB00, 0x823CFB00, 0x823DFB00, 0x823EFB00, 0x823FFB00, 0x8240FB00, + 0x8241FB00, 0x8242FB00, 0x8243FB00, 0x8244FB00, 0x8245FB00, 0x8246FB00, 0x8247FB00, 0x8248FB00, 0x8249FB00, 0x824AFB00, 0x824BFB00, 0x824CFB00, 0x824DFB00, 0x824EFB00, 0x824FFB00, + 0x8250FB00, 0x8251FB00, 0x8252FB00, 0x8253FB00, 0x8254FB00, 0x8255FB00, 0x8256FB00, 0x8257FB00, 0x8258FB00, 0x8259FB00, 0x825AFB00, 0x825BFB00, 0x825CFB00, 0x825DFB00, 0x825EFB00, + 0x825FFB00, 0x8260FB00, 0x8261FB00, 0x8262FB00, 0x8263FB00, 0x8264FB00, 0x8265FB00, 0x8266FB00, 0x8267FB00, 0x8268FB00, 0x8269FB00, 0x826AFB00, 0x826BFB00, 0x826CFB00, 0x826DFB00, + 0x826EFB00, 0x826FFB00, 0x8270FB00, 0x8271FB00, 0x8272FB00, 0x8273FB00, 0x8274FB00, 0x8275FB00, 0x8276FB00, 0x8277FB00, 0x8278FB00, 0x8279FB00, 0x827AFB00, 0x827BFB00, 0x827CFB00, + 0x827DFB00, 0x827EFB00, 0x827FFB00, 0x8280FB00, 0x8281FB00, 0x8282FB00, 0x8283FB00, 0x8284FB00, 0x8285FB00, 0x8286FB00, 0x8287FB00, 0x8288FB00, 0x8289FB00, 0x828AFB00, 0x828BFB00, + 0x828CFB00, 0x828DFB00, 0x828EFB00, 0x828FFB00, 0x8290FB00, 0x8291FB00, 0x8292FB00, 0x8293FB00, 0x8294FB00, 0x8295FB00, 0x8296FB00, 0x8297FB00, 0x8298FB00, 0x8299FB00, 0x829AFB00, + 0x829BFB00, 0x829CFB00, 0x829DFB00, 0x829EFB00, 0x829FFB00, 0x82A0FB00, 0x82A1FB00, 0x82A2FB00, 0x82A3FB00, 0x82A4FB00, 0x82A5FB00, 0x82A6FB00, 0x82A7FB00, 0x82A8FB00, 0x82A9FB00, + 0x82AAFB00, 0x82ABFB00, 0x82ACFB00, 0x82ADFB00, 0x82AEFB00, 0x82AFFB00, 0x82B0FB00, 0x82B1FB00, 0x82B2FB00, 0x82B3FB00, 0x82B4FB00, 0x82B5FB00, 0x82B6FB00, 0x82B7FB00, 0x82B8FB00, + 0x82B9FB00, 0x82BAFB00, 0x82BBFB00, 0x82BCFB00, 0x82BDFB00, 0x82BEFB00, 0x82BFFB00, 0x82C0FB00, 0x82C1FB00, 0x82C2FB00, 0x82C3FB00, 0x82C4FB00, 0x82C5FB00, 0x82C6FB00, 0x82C7FB00, + 0x82C8FB00, 0x82C9FB00, 0x82CAFB00, 0x82CBFB00, 0x82CCFB00, 0x82CDFB00, 0x82CEFB00, 0x82CFFB00, 0x82D0FB00, 0x82D1FB00, 0x82D2FB00, 0x82D3FB00, 0x82D4FB00, 0x82D5FB00, 0x82D6FB00, + 0x82D7FB00, 0x82D8FB00, 0x82D9FB00, 0x82DAFB00, 0x82DBFB00, 0x82DCFB00, 0x82DDFB00, 0x82DEFB00, 0x82DFFB00, 0x82E0FB00, 0x82E1FB00, 0x82E2FB00, 0x82E3FB00, 0x82E4FB00, 0x82E5FB00, + 0x82E6FB00, 0x82E7FB00, 0x82E8FB00, 0x82E9FB00, 0x82EAFB00, 0x82EBFB00, 0x82ECFB00, 0x82EDFB00, 0x82EEFB00, 0x82EFFB00, 0x82F0FB00, 0x82F1FB00, 0x82F2FB00, 0x82F3FB00, 0x82F4FB00, + 0x82F5FB00, 0x82F6FB00, 0x82F7FB00, 0x82F8FB00, 0x82F9FB00, 0x82FAFB00, 0x82FBFB00, 0x82FCFB00, 0x82FDFB00, 0x82FEFB00, 0x82FFFB00, 0x8300FB00, 0x8301FB00, 0x8302FB00, 0x8303FB00, + 0x8304FB00, 0x8305FB00, 0x8306FB00, 0x8307FB00, 0x8308FB00, 0x8309FB00, 0x830AFB00, 0x830BFB00, 0x830CFB00, 0x830DFB00, 0x830EFB00, 0x830FFB00, 0x8310FB00, 0x8311FB00, 0x8312FB00, + 0x8313FB00, 0x8314FB00, 0x8315FB00, 0x8316FB00, 0x8317FB00, 0x8318FB00, 0x8319FB00, 0x831AFB00, 0x831BFB00, 0x831CFB00, 0x831DFB00, 0x831EFB00, 0x831FFB00, 0x8320FB00, 0x8321FB00, + 0x8322FB00, 0x8323FB00, 0x8324FB00, 0x8325FB00, 0x8326FB00, 0x8327FB00, 0x8328FB00, 0x8329FB00, 0x832AFB00, 0x832BFB00, 0x832CFB00, 0x832DFB00, 0x832EFB00, 0x832FFB00, 0x8330FB00, + 0x8331FB00, 0x8332FB00, 0x8333FB00, 0x8334FB00, 0x8335FB00, 0x8336FB00, 0x8337FB00, 0x8338FB00, 0x8339FB00, 0x833AFB00, 0x833BFB00, 0x833CFB00, 0x833DFB00, 0x833EFB00, 0x833FFB00, + 0x8340FB00, 0x8341FB00, 0x8342FB00, 0x8343FB00, 0x8344FB00, 0x8345FB00, 0x8346FB00, 0x8347FB00, 0x8348FB00, 0x8349FB00, 0x834AFB00, 0x834BFB00, 0x834CFB00, 0x834DFB00, 0x834EFB00, + 0x834FFB00, 0x8350FB00, 0x8351FB00, 0x8352FB00, 0x8353FB00, 0x8354FB00, 0x8355FB00, 0x8356FB00, 0x8357FB00, 0x8358FB00, 0x8359FB00, 0x835AFB00, 0x835BFB00, 0x835CFB00, 0x835DFB00, + 0x835EFB00, 0x835FFB00, 0x8360FB00, 0x8361FB00, 0x8362FB00, 0x8363FB00, 0x8364FB00, 0x8365FB00, 0x8366FB00, 0x8367FB00, 0x8368FB00, 0x8369FB00, 0x836AFB00, 0x836BFB00, 0x836CFB00, + 0x836DFB00, 0x836EFB00, 0x836FFB00, 0x8370FB00, 0x8371FB00, 0x8372FB00, 0x8373FB00, 0x8374FB00, 0x8375FB00, 0x8376FB00, 0x8377FB00, 0x8378FB00, 0x8379FB00, 0x837AFB00, 0x837BFB00, + 0x837CFB00, 0x837DFB00, 0x837EFB00, 0x837FFB00, 0x8380FB00, 0x8381FB00, 0x8382FB00, 0x8383FB00, 0x8384FB00, 0x8385FB00, 0x8386FB00, 0x8387FB00, 0x8388FB00, 0x8389FB00, 0x838AFB00, + 0x838BFB00, 0x838CFB00, 0x838DFB00, 0x838EFB00, 0x838FFB00, 0x8390FB00, 0x8391FB00, 0x8392FB00, 0x8393FB00, 0x8394FB00, 0x8395FB00, 0x8396FB00, 0x8397FB00, 0x8398FB00, 0x8399FB00, + 0x839AFB00, 0x839BFB00, 0x839CFB00, 0x839DFB00, 0x839EFB00, 0x839FFB00, 0x83A0FB00, 0x83A1FB00, 0x83A2FB00, 0x83A3FB00, 0x83A4FB00, 0x83A5FB00, 0x83A6FB00, 0x83A7FB00, 0x83A8FB00, + 0x83A9FB00, 0x83AAFB00, 0x83ABFB00, 0x83ACFB00, 0x83ADFB00, 0x83AEFB00, 0x83AFFB00, 0x83B0FB00, 0x83B1FB00, 0x83B2FB00, 0x83B3FB00, 0x83B4FB00, 0x83B5FB00, 0x83B6FB00, 0x83B7FB00, + 0x83B8FB00, 0x83B9FB00, 0x83BAFB00, 0x83BBFB00, 0x83BCFB00, 0x83BDFB00, 0x83BEFB00, 0x83BFFB00, 0x83C0FB00, 0x83C1FB00, 0x83C2FB00, 0x83C3FB00, 0x83C4FB00, 0x83C5FB00, 0x83C6FB00, + 0x83C7FB00, 0x83C8FB00, 0x83C9FB00, 0x83CAFB00, 0x83CBFB00, 0x83CCFB00, 0x83CDFB00, 0x83CEFB00, 0x83CFFB00, 0x83D0FB00, 0x83D1FB00, 0x83D2FB00, 0x83D3FB00, 0x83D4FB00, 0x83D5FB00, + 0x83D6FB00, 0x83D7FB00, 0x83D8FB00, 0x83D9FB00, 0x83DAFB00, 0x83DBFB00, 0x83DCFB00, 0x83DDFB00, 0x83DEFB00, 0x83DFFB00, 0x83E0FB00, 0x83E1FB00, 0x83E2FB00, 0x83E3FB00, 0x83E4FB00, + 0x83E5FB00, 0x83E6FB00, 0x83E7FB00, 0x83E8FB00, 0x83E9FB00, 0x83EAFB00, 0x83EBFB00, 0x83ECFB00, 0x83EDFB00, 0x83EEFB00, 0x83EFFB00, 0x83F0FB00, 0x83F1FB00, 0x83F2FB00, 0x83F3FB00, + 0x83F4FB00, 0x83F5FB00, 0x83F6FB00, 0x83F7FB00, 0x83F8FB00, 0x83F9FB00, 0x83FAFB00, 0x83FBFB00, 0x83FCFB00, 0x83FDFB00, 0x83FEFB00, 0x83FFFB00, 0x8400FB00, 0x8401FB00, 0x8402FB00, + 0x8403FB00, 0x8404FB00, 0x8405FB00, 0x8406FB00, 0x8407FB00, 0x8408FB00, 0x8409FB00, 0x840AFB00, 0x840BFB00, 0x840CFB00, 0x840DFB00, 0x840EFB00, 0x840FFB00, 0x8410FB00, 0x8411FB00, + 0x8412FB00, 0x8413FB00, 0x8414FB00, 0x8415FB00, 0x8416FB00, 0x8417FB00, 0x8418FB00, 0x8419FB00, 0x841AFB00, 0x841BFB00, 0x841CFB00, 0x841DFB00, 0x841EFB00, 0x841FFB00, 0x8420FB00, + 0x8421FB00, 0x8422FB00, 0x8423FB00, 0x8424FB00, 0x8425FB00, 0x8426FB00, 0x8427FB00, 0x8428FB00, 0x8429FB00, 0x842AFB00, 0x842BFB00, 0x842CFB00, 0x842DFB00, 0x842EFB00, 0x842FFB00, + 0x8430FB00, 0x8431FB00, 0x8432FB00, 0x8433FB00, 0x8434FB00, 0x8435FB00, 0x8436FB00, 0x8437FB00, 0x8438FB00, 0x8439FB00, 0x843AFB00, 0x843BFB00, 0x843CFB00, 0x843DFB00, 0x843EFB00, + 0x843FFB00, 0x8440FB00, 0x8441FB00, 0x8442FB00, 0x8443FB00, 0x8444FB00, 0x8445FB00, 0x8446FB00, 0x8447FB00, 0x8448FB00, 0x8449FB00, 0x844AFB00, 0x844BFB00, 0x844CFB00, 0x844DFB00, + 0x844EFB00, 0x844FFB00, 0x8450FB00, 0x8451FB00, 0x8452FB00, 0x8453FB00, 0x8454FB00, 0x8455FB00, 0x8456FB00, 0x8457FB00, 0x8458FB00, 0x8459FB00, 0x845AFB00, 0x845BFB00, 0x845CFB00, + 0x845DFB00, 0x845EFB00, 0x845FFB00, 0x8460FB00, 0x8461FB00, 0x8462FB00, 0x8463FB00, 0x8464FB00, 0x8465FB00, 0x8466FB00, 0x8467FB00, 0x8468FB00, 0x8469FB00, 0x846AFB00, 0x846BFB00, + 0x846CFB00, 0x846DFB00, 0x846EFB00, 0x846FFB00, 0x8470FB00, 0x8471FB00, 0x8472FB00, 0x8473FB00, 0x8474FB00, 0x8475FB00, 0x8476FB00, 0x8477FB00, 0x8478FB00, 0x8479FB00, 0x847AFB00, + 0x847BFB00, 0x847CFB00, 0x847DFB00, 0x847EFB00, 0x847FFB00, 0x8480FB00, 0x8481FB00, 0x8482FB00, 0x8483FB00, 0x8484FB00, 0x8485FB00, 0x8486FB00, 0x8487FB00, 0x8488FB00, 0x8489FB00, + 0x848AFB00, 0x848BFB00, 0x848CFB00, 0x848DFB00, 0x848EFB00, 0x848FFB00, 0x8490FB00, 0x8491FB00, 0x8492FB00, 0x8493FB00, 0x8494FB00, 0x8495FB00, 0x8496FB00, 0x8497FB00, 0x8498FB00, + 0x8499FB00, 0x849AFB00, 0x849BFB00, 0x849CFB00, 0x849DFB00, 0x849EFB00, 0x849FFB00, 0x84A0FB00, 0x84A1FB00, 0x84A2FB00, 0x84A3FB00, 0x84A4FB00, 0x84A5FB00, 0x84A6FB00, 0x84A7FB00, + 0x84A8FB00, 0x84A9FB00, 0x84AAFB00, 0x84ABFB00, 0x84ACFB00, 0x84ADFB00, 0x84AEFB00, 0x84AFFB00, 0x84B0FB00, 0x84B1FB00, 0x84B2FB00, 0x84B3FB00, 0x84B4FB00, 0x84B5FB00, 0x84B6FB00, + 0x84B7FB00, 0x84B8FB00, 0x84B9FB00, 0x84BAFB00, 0x84BBFB00, 0x84BCFB00, 0x84BDFB00, 0x84BEFB00, 0x84BFFB00, 0x84C0FB00, 0x84C1FB00, 0x84C2FB00, 0x84C3FB00, 0x84C4FB00, 0x84C5FB00, + 0x84C6FB00, 0x84C7FB00, 0x84C8FB00, 0x84C9FB00, 0x84CAFB00, 0x84CBFB00, 0x84CCFB00, 0x84CDFB00, 0x84CEFB00, 0x84CFFB00, 0x84D0FB00, 0x84D1FB00, 0x84D2FB00, 0x84D3FB00, 0x84D4FB00, + 0x84D5FB00, 0x84D6FB00, 0x84D7FB00, 0x84D8FB00, 0x84D9FB00, 0x84DAFB00, 0x84DBFB00, 0x84DCFB00, 0x84DDFB00, 0x84DEFB00, 0x84DFFB00, 0x84E0FB00, 0x84E1FB00, 0x84E2FB00, 0x84E3FB00, + 0x84E4FB00, 0x84E5FB00, 0x84E6FB00, 0x84E7FB00, 0x84E8FB00, 0x84E9FB00, 0x84EAFB00, 0x84EBFB00, 0x84ECFB00, 0x84EDFB00, 0x84EEFB00, 0x84EFFB00, 0x84F0FB00, 0x84F1FB00, 0x84F2FB00, + 0x84F3FB00, 0x84F4FB00, 0x84F5FB00, 0x84F6FB00, 0x84F7FB00, 0x84F8FB00, 0x84F9FB00, 0x84FAFB00, 0x84FBFB00, 0x84FCFB00, 0x84FDFB00, 0x84FEFB00, 0x84FFFB00, 0x8500FB00, 0x8501FB00, + 0x8502FB00, 0x8503FB00, 0x8504FB00, 0x8505FB00, 0x8506FB00, 0x8507FB00, 0x8508FB00, 0x8509FB00, 0x850AFB00, 0x850BFB00, 0x850CFB00, 0x850DFB00, 0x850EFB00, 0x850FFB00, 0x8510FB00, + 0x8511FB00, 0x8512FB00, 0x8513FB00, 0x8514FB00, 0x8515FB00, 0x8516FB00, 0x8517FB00, 0x8518FB00, 0x8519FB00, 0x851AFB00, 0x851BFB00, 0x851CFB00, 0x851DFB00, 0x851EFB00, 0x851FFB00, + 0x8520FB00, 0x8521FB00, 0x8522FB00, 0x8523FB00, 0x8524FB00, 0x8525FB00, 0x8526FB00, 0x8527FB00, 0x8528FB00, 0x8529FB00, 0x852AFB00, 0x852BFB00, 0x852CFB00, 0x852DFB00, 0x852EFB00, + 0x852FFB00, 0x8530FB00, 0x8531FB00, 0x8532FB00, 0x8533FB00, 0x8534FB00, 0x8535FB00, 0x8536FB00, 0x8537FB00, 0x8538FB00, 0x8539FB00, 0x853AFB00, 0x853BFB00, 0x853CFB00, 0x853DFB00, + 0x853EFB00, 0x853FFB00, 0x8540FB00, 0x8541FB00, 0x8542FB00, 0x8543FB00, 0x8544FB00, 0x8545FB00, 0x8546FB00, 0x8547FB00, 0x8548FB00, 0x8549FB00, 0x854AFB00, 0x854BFB00, 0x854CFB00, + 0x854DFB00, 0x854EFB00, 0x854FFB00, 0x8550FB00, 0x8551FB00, 0x8552FB00, 0x8553FB00, 0x8554FB00, 0x8555FB00, 0x8556FB00, 0x8557FB00, 0x8558FB00, 0x8559FB00, 0x855AFB00, 0x855BFB00, + 0x855CFB00, 0x855DFB00, 0x855EFB00, 0x855FFB00, 0x8560FB00, 0x8561FB00, 0x8562FB00, 0x8563FB00, 0x8564FB00, 0x8565FB00, 0x8566FB00, 0x8567FB00, 0x8568FB00, 0x8569FB00, 0x856AFB00, + 0x856BFB00, 0x856CFB00, 0x856DFB00, 0x856EFB00, 0x856FFB00, 0x8570FB00, 0x8571FB00, 0x8572FB00, 0x8573FB00, 0x8574FB00, 0x8575FB00, 0x8576FB00, 0x8577FB00, 0x8578FB00, 0x8579FB00, + 0x857AFB00, 0x857BFB00, 0x857CFB00, 0x857DFB00, 0x857EFB00, 0x857FFB00, 0x8580FB00, 0x8581FB00, 0x8582FB00, 0x8583FB00, 0x8584FB00, 0x8585FB00, 0x8586FB00, 0x8587FB00, 0x8588FB00, + 0x8589FB00, 0x858AFB00, 0x858BFB00, 0x858CFB00, 0x858DFB00, 0x858EFB00, 0x858FFB00, 0x8590FB00, 0x8591FB00, 0x8592FB00, 0x8593FB00, 0x8594FB00, 0x8595FB00, 0x8596FB00, 0x8597FB00, + 0x8598FB00, 0x8599FB00, 0x859AFB00, 0x859BFB00, 0x859CFB00, 0x859DFB00, 0x859EFB00, 0x859FFB00, 0x85A0FB00, 0x85A1FB00, 0x85A2FB00, 0x85A3FB00, 0x85A4FB00, 0x85A5FB00, 0x85A6FB00, + 0x85A7FB00, 0x85A8FB00, 0x85A9FB00, 0x85AAFB00, 0x85ABFB00, 0x85ACFB00, 0x85ADFB00, 0x85AEFB00, 0x85AFFB00, 0x85B0FB00, 0x85B1FB00, 0x85B2FB00, 0x85B3FB00, 0x85B4FB00, 0x85B5FB00, + 0x85B6FB00, 0x85B7FB00, 0x85B8FB00, 0x85B9FB00, 0x85BAFB00, 0x85BBFB00, 0x85BCFB00, 0x85BDFB00, 0x85BEFB00, 0x85BFFB00, 0x85C0FB00, 0x85C1FB00, 0x85C2FB00, 0x85C3FB00, 0x85C4FB00, + 0x85C5FB00, 0x85C6FB00, 0x85C7FB00, 0x85C8FB00, 0x85C9FB00, 0x85CAFB00, 0x85CBFB00, 0x85CCFB00, 0x85CDFB00, 0x85CEFB00, 0x85CFFB00, 0x85D0FB00, 0x85D1FB00, 0x85D2FB00, 0x85D3FB00, + 0x85D4FB00, 0x85D5FB00, 0x85D6FB00, 0x85D7FB00, 0x85D8FB00, 0x85D9FB00, 0x85DAFB00, 0x85DBFB00, 0x85DCFB00, 0x85DDFB00, 0x85DEFB00, 0x85DFFB00, 0x85E0FB00, 0x85E1FB00, 0x85E2FB00, + 0x85E3FB00, 0x85E4FB00, 0x85E5FB00, 0x85E6FB00, 0x85E7FB00, 0x85E8FB00, 0x85E9FB00, 0x85EAFB00, 0x85EBFB00, 0x85ECFB00, 0x85EDFB00, 0x85EEFB00, 0x85EFFB00, 0x85F0FB00, 0x85F1FB00, + 0x85F2FB00, 0x85F3FB00, 0x85F4FB00, 0x85F5FB00, 0x85F6FB00, 0x85F7FB00, 0x85F8FB00, 0x85F9FB00, 0x85FAFB00, 0x85FBFB00, 0x85FCFB00, 0x85FDFB00, 0x85FEFB00, 0x85FFFB00, 0x8600FB00, + 0x8601FB00, 0x8602FB00, 0x8603FB00, 0x8604FB00, 0x8605FB00, 0x8606FB00, 0x8607FB00, 0x8608FB00, 0x8609FB00, 0x860AFB00, 0x860BFB00, 0x860CFB00, 0x860DFB00, 0x860EFB00, 0x860FFB00, + 0x8610FB00, 0x8611FB00, 0x8612FB00, 0x8613FB00, 0x8614FB00, 0x8615FB00, 0x8616FB00, 0x8617FB00, 0x8618FB00, 0x8619FB00, 0x861AFB00, 0x861BFB00, 0x861CFB00, 0x861DFB00, 0x861EFB00, + 0x861FFB00, 0x8620FB00, 0x8621FB00, 0x8622FB00, 0x8623FB00, 0x8624FB00, 0x8625FB00, 0x8626FB00, 0x8627FB00, 0x8628FB00, 0x8629FB00, 0x862AFB00, 0x862BFB00, 0x862CFB00, 0x862DFB00, + 0x862EFB00, 0x862FFB00, 0x8630FB00, 0x8631FB00, 0x8632FB00, 0x8633FB00, 0x8634FB00, 0x8635FB00, 0x8636FB00, 0x8637FB00, 0x8638FB00, 0x8639FB00, 0x863AFB00, 0x863BFB00, 0x863CFB00, + 0x863DFB00, 0x863EFB00, 0x863FFB00, 0x8640FB00, 0x8641FB00, 0x8642FB00, 0x8643FB00, 0x8644FB00, 0x8645FB00, 0x8646FB00, 0x8647FB00, 0x8648FB00, 0x8649FB00, 0x864AFB00, 0x864BFB00, + 0x864CFB00, 0x864DFB00, 0x864EFB00, 0x864FFB00, 0x8650FB00, 0x8651FB00, 0x8652FB00, 0x8653FB00, 0x8654FB00, 0x8655FB00, 0x8656FB00, 0x8657FB00, 0x8658FB00, 0x8659FB00, 0x865AFB00, + 0x865BFB00, 0x865CFB00, 0x865DFB00, 0x865EFB00, 0x865FFB00, 0x8660FB00, 0x8661FB00, 0x8662FB00, 0x8663FB00, 0x8664FB00, 0x8665FB00, 0x8666FB00, 0x8667FB00, 0x8668FB00, 0x8669FB00, + 0x866AFB00, 0x866BFB00, 0x866CFB00, 0x866DFB00, 0x866EFB00, 0x866FFB00, 0x8670FB00, 0x8671FB00, 0x8672FB00, 0x8673FB00, 0x8674FB00, 0x8675FB00, 0x8676FB00, 0x8677FB00, 0x8678FB00, + 0x8679FB00, 0x867AFB00, 0x867BFB00, 0x867CFB00, 0x867DFB00, 0x867EFB00, 0x867FFB00, 0x8680FB00, 0x8681FB00, 0x8682FB00, 0x8683FB00, 0x8684FB00, 0x8685FB00, 0x8686FB00, 0x8687FB00, + 0x8688FB00, 0x8689FB00, 0x868AFB00, 0x868BFB00, 0x868CFB00, 0x868DFB00, 0x868EFB00, 0x868FFB00, 0x8690FB00, 0x8691FB00, 0x8692FB00, 0x8693FB00, 0x8694FB00, 0x8695FB00, 0x8696FB00, + 0x8697FB00, 0x8698FB00, 0x8699FB00, 0x869AFB00, 0x869BFB00, 0x869CFB00, 0x869DFB00, 0x869EFB00, 0x869FFB00, 0x86A0FB00, 0x86A1FB00, 0x86A2FB00, 0x86A3FB00, 0x86A4FB00, 0x86A5FB00, + 0x86A6FB00, 0x86A7FB00, 0x86A8FB00, 0x86A9FB00, 0x86AAFB00, 0x86ABFB00, 0x86ACFB00, 0x86ADFB00, 0x86AEFB00, 0x86AFFB00, 0x86B0FB00, 0x86B1FB00, 0x86B2FB00, 0x86B3FB00, 0x86B4FB00, + 0x86B5FB00, 0x86B6FB00, 0x86B7FB00, 0x86B8FB00, 0x86B9FB00, 0x86BAFB00, 0x86BBFB00, 0x86BCFB00, 0x86BDFB00, 0x86BEFB00, 0x86BFFB00, 0x86C0FB00, 0x86C1FB00, 0x86C2FB00, 0x86C3FB00, + 0x86C4FB00, 0x86C5FB00, 0x86C6FB00, 0x86C7FB00, 0x86C8FB00, 0x86C9FB00, 0x86CAFB00, 0x86CBFB00, 0x86CCFB00, 0x86CDFB00, 0x86CEFB00, 0x86CFFB00, 0x86D0FB00, 0x86D1FB00, 0x86D2FB00, + 0x86D3FB00, 0x86D4FB00, 0x86D5FB00, 0x86D6FB00, 0x86D7FB00, 0x86D8FB00, 0x86D9FB00, 0x86DAFB00, 0x86DBFB00, 0x86DCFB00, 0x86DDFB00, 0x86DEFB00, 0x86DFFB00, 0x86E0FB00, 0x86E1FB00, + 0x86E2FB00, 0x86E3FB00, 0x86E4FB00, 0x86E5FB00, 0x86E6FB00, 0x86E7FB00, 0x86E8FB00, 0x86E9FB00, 0x86EAFB00, 0x86EBFB00, 0x86ECFB00, 0x86EDFB00, 0x86EEFB00, 0x86EFFB00, 0x86F0FB00, + 0x86F1FB00, 0x86F2FB00, 0x86F3FB00, 0x86F4FB00, 0x86F5FB00, 0x86F6FB00, 0x86F7FB00, 0x86F8FB00, 0x86F9FB00, 0x86FAFB00, 0x86FBFB00, 0x86FCFB00, 0x86FDFB00, 0x86FEFB00, 0x86FFFB00, + 0x8700FB00, 0x8701FB00, 0x8702FB00, 0x8703FB00, 0x8704FB00, 0x8705FB00, 0x8706FB00, 0x8707FB00, 0x8708FB00, 0x8709FB00, 0x870AFB00, 0x870BFB00, 0x870CFB00, 0x870DFB00, 0x870EFB00, + 0x870FFB00, 0x8710FB00, 0x8711FB00, 0x8712FB00, 0x8713FB00, 0x8714FB00, 0x8715FB00, 0x8716FB00, 0x8717FB00, 0x8718FB00, 0x8719FB00, 0x871AFB00, 0x871BFB00, 0x871CFB00, 0x871DFB00, + 0x871EFB00, 0x871FFB00, 0x8720FB00, 0x8721FB00, 0x8722FB00, 0x8723FB00, 0x8724FB00, 0x8725FB00, 0x8726FB00, 0x8727FB00, 0x8728FB00, 0x8729FB00, 0x872AFB00, 0x872BFB00, 0x872CFB00, + 0x872DFB00, 0x872EFB00, 0x872FFB00, 0x8730FB00, 0x8731FB00, 0x8732FB00, 0x8733FB00, 0x8734FB00, 0x8735FB00, 0x8736FB00, 0x8737FB00, 0x8738FB00, 0x8739FB00, 0x873AFB00, 0x873BFB00, + 0x873CFB00, 0x873DFB00, 0x873EFB00, 0x873FFB00, 0x8740FB00, 0x8741FB00, 0x8742FB00, 0x8743FB00, 0x8744FB00, 0x8745FB00, 0x8746FB00, 0x8747FB00, 0x8748FB00, 0x8749FB00, 0x874AFB00, + 0x874BFB00, 0x874CFB00, 0x874DFB00, 0x874EFB00, 0x874FFB00, 0x8750FB00, 0x8751FB00, 0x8752FB00, 0x8753FB00, 0x8754FB00, 0x8755FB00, 0x8756FB00, 0x8757FB00, 0x8758FB00, 0x8759FB00, + 0x875AFB00, 0x875BFB00, 0x875CFB00, 0x875DFB00, 0x875EFB00, 0x875FFB00, 0x8760FB00, 0x8761FB00, 0x8762FB00, 0x8763FB00, 0x8764FB00, 0x8765FB00, 0x8766FB00, 0x8767FB00, 0x8768FB00, + 0x8769FB00, 0x876AFB00, 0x876BFB00, 0x876CFB00, 0x876DFB00, 0x876EFB00, 0x876FFB00, 0x8770FB00, 0x8771FB00, 0x8772FB00, 0x8773FB00, 0x8774FB00, 0x8775FB00, 0x8776FB00, 0x8777FB00, + 0x8778FB00, 0x8779FB00, 0x877AFB00, 0x877BFB00, 0x877CFB00, 0x877DFB00, 0x877EFB00, 0x877FFB00, 0x8780FB00, 0x8781FB00, 0x8782FB00, 0x8783FB00, 0x8784FB00, 0x8785FB00, 0x8786FB00, + 0x8787FB00, 0x8788FB00, 0x8789FB00, 0x878AFB00, 0x878BFB00, 0x878CFB00, 0x878DFB00, 0x878EFB00, 0x878FFB00, 0x8790FB00, 0x8791FB00, 0x8792FB00, 0x8793FB00, 0x8794FB00, 0x8795FB00, + 0x8796FB00, 0x8797FB00, 0x8798FB00, 0x8799FB00, 0x879AFB00, 0x879BFB00, 0x879CFB00, 0x879DFB00, 0x879EFB00, 0x879FFB00, 0x87A0FB00, 0x87A1FB00, 0x87A2FB00, 0x87A3FB00, 0x87A4FB00, + 0x87A5FB00, 0x87A6FB00, 0x87A7FB00, 0x87A8FB00, 0x87A9FB00, 0x87AAFB00, 0x87ABFB00, 0x87ACFB00, 0x87ADFB00, 0x87AEFB00, 0x87AFFB00, 0x87B0FB00, 0x87B1FB00, 0x87B2FB00, 0x87B3FB00, + 0x87B4FB00, 0x87B5FB00, 0x87B6FB00, 0x87B7FB00, 0x87B8FB00, 0x87B9FB00, 0x87BAFB00, 0x87BBFB00, 0x87BCFB00, 0x87BDFB00, 0x87BEFB00, 0x87BFFB00, 0x87C0FB00, 0x87C1FB00, 0x87C2FB00, + 0x87C3FB00, 0x87C4FB00, 0x87C5FB00, 0x87C6FB00, 0x87C7FB00, 0x87C8FB00, 0x87C9FB00, 0x87CAFB00, 0x87CBFB00, 0x87CCFB00, 0x87CDFB00, 0x87CEFB00, 0x87CFFB00, 0x87D0FB00, 0x87D1FB00, + 0x87D2FB00, 0x87D3FB00, 0x87D4FB00, 0x87D5FB00, 0x87D6FB00, 0x87D7FB00, 0x87D8FB00, 0x87D9FB00, 0x87DAFB00, 0x87DBFB00, 0x87DCFB00, 0x87DDFB00, 0x87DEFB00, 0x87DFFB00, 0x87E0FB00, + 0x87E1FB00, 0x87E2FB00, 0x87E3FB00, 0x87E4FB00, 0x87E5FB00, 0x87E6FB00, 0x87E7FB00, 0x87E8FB00, 0x87E9FB00, 0x87EAFB00, 0x87EBFB00, 0x87ECFB00, 0x87EDFB00, 0x87EEFB00, 0x87EFFB00, + 0x87F0FB00, 0x87F1FB00, 0x87F2FB00, 0x87F3FB00, 0x87F4FB00, 0x87F5FB00, 0x87F6FB00, 0x87F7FB00, 0x87F8FB00, 0x87F9FB00, 0x87FAFB00, 0x87FBFB00, 0x87FCFB00, 0x87FDFB00, 0x87FEFB00, + 0x87FFFB00, 0x8800FB00, 0x8801FB00, 0x8802FB00, 0x8803FB00, 0x8804FB00, 0x8805FB00, 0x8806FB00, 0x8807FB00, 0x8808FB00, 0x8809FB00, 0x880AFB00, 0x880BFB00, 0x880CFB00, 0x880DFB00, + 0x880EFB00, 0x880FFB00, 0x8810FB00, 0x8811FB00, 0x8812FB00, 0x8813FB00, 0x8814FB00, 0x8815FB00, 0x8816FB00, 0x8817FB00, 0x8818FB00, 0x8819FB00, 0x881AFB00, 0x881BFB00, 0x881CFB00, + 0x881DFB00, 0x881EFB00, 0x881FFB00, 0x8820FB00, 0x8821FB00, 0x8822FB00, 0x8823FB00, 0x8824FB00, 0x8825FB00, 0x8826FB00, 0x8827FB00, 0x8828FB00, 0x8829FB00, 0x882AFB00, 0x882BFB00, + 0x882CFB00, 0x882DFB00, 0x882EFB00, 0x882FFB00, 0x8830FB00, 0x8831FB00, 0x8832FB00, 0x8833FB00, 0x8834FB00, 0x8835FB00, 0x8836FB00, 0x8837FB00, 0x8838FB00, 0x8839FB00, 0x883AFB00, + 0x883BFB00, 0x883CFB00, 0x883DFB00, 0x883EFB00, 0x883FFB00, 0x8840FB00, 0x8841FB00, 0x8842FB00, 0x8843FB00, 0x8844FB00, 0x8845FB00, 0x8846FB00, 0x8847FB00, 0x8848FB00, 0x8849FB00, + 0x884AFB00, 0x884BFB00, 0x884CFB00, 0x884DFB00, 0x884EFB00, 0x884FFB00, 0x8850FB00, 0x8851FB00, 0x8852FB00, 0x8853FB00, 0x8854FB00, 0x8855FB00, 0x8856FB00, 0x8857FB00, 0x8858FB00, + 0x8859FB00, 0x885AFB00, 0x885BFB00, 0x885CFB00, 0x885DFB00, 0x885EFB00, 0x885FFB00, 0x8860FB00, 0x8861FB00, 0x8862FB00, 0x8863FB00, 0x8864FB00, 0x8865FB00, 0x8866FB00, 0x8867FB00, + 0x8868FB00, 0x8869FB00, 0x886AFB00, 0x886BFB00, 0x886CFB00, 0x886DFB00, 0x886EFB00, 0x886FFB00, 0x8870FB00, 0x8871FB00, 0x8872FB00, 0x8873FB00, 0x8874FB00, 0x8875FB00, 0x8876FB00, + 0x8877FB00, 0x8878FB00, 0x8879FB00, 0x887AFB00, 0x887BFB00, 0x887CFB00, 0x887DFB00, 0x887EFB00, 0x887FFB00, 0x8880FB00, 0x8881FB00, 0x8882FB00, 0x8883FB00, 0x8884FB00, 0x8885FB00, + 0x8886FB00, 0x8887FB00, 0x8888FB00, 0x8889FB00, 0x888AFB00, 0x888BFB00, 0x888CFB00, 0x888DFB00, 0x888EFB00, 0x888FFB00, 0x8890FB00, 0x8891FB00, 0x8892FB00, 0x8893FB00, 0x8894FB00, + 0x8895FB00, 0x8896FB00, 0x8897FB00, 0x8898FB00, 0x8899FB00, 0x889AFB00, 0x889BFB00, 0x889CFB00, 0x889DFB00, 0x889EFB00, 0x889FFB00, 0x88A0FB00, 0x88A1FB00, 0x88A2FB00, 0x88A3FB00, + 0x88A4FB00, 0x88A5FB00, 0x88A6FB00, 0x88A7FB00, 0x88A8FB00, 0x88A9FB00, 0x88AAFB00, 0x88ABFB00, 0x88ACFB00, 0x88ADFB00, 0x88AEFB00, 0x88AFFB00, 0x88B0FB00, 0x88B1FB00, 0x88B2FB00, + 0x88B3FB00, 0x88B4FB00, 0x88B5FB00, 0x88B6FB00, 0x88B7FB00, 0x88B8FB00, 0x88B9FB00, 0x88BAFB00, 0x88BBFB00, 0x88BCFB00, 0x88BDFB00, 0x88BEFB00, 0x88BFFB00, 0x88C0FB00, 0x88C1FB00, + 0x88C2FB00, 0x88C3FB00, 0x88C4FB00, 0x88C5FB00, 0x88C6FB00, 0x88C7FB00, 0x88C8FB00, 0x88C9FB00, 0x88CAFB00, 0x88CBFB00, 0x88CCFB00, 0x88CDFB00, 0x88CEFB00, 0x88CFFB00, 0x88D0FB00, + 0x88D1FB00, 0x88D2FB00, 0x88D3FB00, 0x88D4FB00, 0x88D5FB00, 0x88D6FB00, 0x88D7FB00, 0x88D8FB00, 0x88D9FB00, 0x88DAFB00, 0x88DBFB00, 0x88DCFB00, 0x88DDFB00, 0x88DEFB00, 0x88DFFB00, + 0x88E0FB00, 0x88E1FB00, 0x88E2FB00, 0x88E3FB00, 0x88E4FB00, 0x88E5FB00, 0x88E6FB00, 0x88E7FB00, 0x88E8FB00, 0x88E9FB00, 0x88EAFB00, 0x88EBFB00, 0x88ECFB00, 0x88EDFB00, 0x88EEFB00, + 0x88EFFB00, 0x88F0FB00, 0x88F1FB00, 0x88F2FB00, 0x88F3FB00, 0x88F4FB00, 0x88F5FB00, 0x88F6FB00, 0x88F7FB00, 0x88F8FB00, 0x88F9FB00, 0x88FAFB00, 0x88FBFB00, 0x88FCFB00, 0x88FDFB00, + 0x88FEFB00, 0x88FFFB00, 0x8900FB00, 0x8901FB00, 0x8902FB00, 0x8903FB00, 0x8904FB00, 0x8905FB00, 0x8906FB00, 0x8907FB00, 0x8908FB00, 0x8909FB00, 0x890AFB00, 0x890BFB00, 0x890CFB00, + 0x890DFB00, 0x890EFB00, 0x890FFB00, 0x8910FB00, 0x8911FB00, 0x8912FB00, 0x8913FB00, 0x8914FB00, 0x8915FB00, 0x8916FB00, 0x8917FB00, 0x8918FB00, 0x8919FB00, 0x891AFB00, 0x891BFB00, + 0x891CFB00, 0x891DFB00, 0x891EFB00, 0x891FFB00, 0x8920FB00, 0x8921FB00, 0x8922FB00, 0x8923FB00, 0x8924FB00, 0x8925FB00, 0x8926FB00, 0x8927FB00, 0x8928FB00, 0x8929FB00, 0x892AFB00, + 0x892BFB00, 0x892CFB00, 0x892DFB00, 0x892EFB00, 0x892FFB00, 0x8930FB00, 0x8931FB00, 0x8932FB00, 0x8933FB00, 0x8934FB00, 0x8935FB00, 0x8936FB00, 0x8937FB00, 0x8938FB00, 0x8939FB00, + 0x893AFB00, 0x893BFB00, 0x893CFB00, 0x893DFB00, 0x893EFB00, 0x893FFB00, 0x8940FB00, 0x8941FB00, 0x8942FB00, 0x8943FB00, 0x8944FB00, 0x8945FB00, 0x8946FB00, 0x8947FB00, 0x8948FB00, + 0x8949FB00, 0x894AFB00, 0x894BFB00, 0x894CFB00, 0x894DFB00, 0x894EFB00, 0x894FFB00, 0x8950FB00, 0x8951FB00, 0x8952FB00, 0x8953FB00, 0x8954FB00, 0x8955FB00, 0x8956FB00, 0x8957FB00, + 0x8958FB00, 0x8959FB00, 0x895AFB00, 0x895BFB00, 0x895CFB00, 0x895DFB00, 0x895EFB00, 0x895FFB00, 0x8960FB00, 0x8961FB00, 0x8962FB00, 0x8963FB00, 0x8964FB00, 0x8965FB00, 0x8966FB00, + 0x8967FB00, 0x8968FB00, 0x8969FB00, 0x896AFB00, 0x896BFB00, 0x896CFB00, 0x896DFB00, 0x896EFB00, 0x896FFB00, 0x8970FB00, 0x8971FB00, 0x8972FB00, 0x8973FB00, 0x8974FB00, 0x8975FB00, + 0x8976FB00, 0x8977FB00, 0x8978FB00, 0x8979FB00, 0x897AFB00, 0x897BFB00, 0x897CFB00, 0x897DFB00, 0x897EFB00, 0x897FFB00, 0x8980FB00, 0x8981FB00, 0x8982FB00, 0x8983FB00, 0x8984FB00, + 0x8985FB00, 0x8986FB00, 0x8987FB00, 0x8988FB00, 0x8989FB00, 0x898AFB00, 0x898BFB00, 0x898CFB00, 0x898DFB00, 0x898EFB00, 0x898FFB00, 0x8990FB00, 0x8991FB00, 0x8992FB00, 0x8993FB00, + 0x8994FB00, 0x8995FB00, 0x8996FB00, 0x8997FB00, 0x8998FB00, 0x8999FB00, 0x899AFB00, 0x899BFB00, 0x899CFB00, 0x899DFB00, 0x899EFB00, 0x899FFB00, 0x89A0FB00, 0x89A1FB00, 0x89A2FB00, + 0x89A3FB00, 0x89A4FB00, 0x89A5FB00, 0x89A6FB00, 0x89A7FB00, 0x89A8FB00, 0x89A9FB00, 0x89AAFB00, 0x89ABFB00, 0x89ACFB00, 0x89ADFB00, 0x89AEFB00, 0x89AFFB00, 0x89B0FB00, 0x89B1FB00, + 0x89B2FB00, 0x89B3FB00, 0x89B4FB00, 0x89B5FB00, 0x89B6FB00, 0x89B7FB00, 0x89B8FB00, 0x89B9FB00, 0x89BAFB00, 0x89BBFB00, 0x89BCFB00, 0x89BDFB00, 0x89BEFB00, 0x89BFFB00, 0x89C0FB00, + 0x89C1FB00, 0x89C2FB00, 0x89C3FB00, 0x89C4FB00, 0x89C5FB00, 0x89C6FB00, 0x89C7FB00, 0x89C8FB00, 0x89C9FB00, 0x89CAFB00, 0x89CBFB00, 0x89CCFB00, 0x89CDFB00, 0x89CEFB00, 0x89CFFB00, + 0x89D0FB00, 0x89D1FB00, 0x89D2FB00, 0x89D3FB00, 0x89D4FB00, 0x89D5FB00, 0x89D6FB00, 0x89D7FB00, 0x89D8FB00, 0x89D9FB00, 0x89DAFB00, 0x89DBFB00, 0x89DCFB00, 0x89DDFB00, 0x89DEFB00, + 0x89DFFB00, 0x89E0FB00, 0x89E1FB00, 0x89E2FB00, 0x89E3FB00, 0x89E4FB00, 0x89E5FB00, 0x89E6FB00, 0x89E7FB00, 0x89E8FB00, 0x89E9FB00, 0x89EAFB00, 0x89EBFB00, 0x89ECFB00, 0x89EDFB00, + 0x89EEFB00, 0x89EFFB00, 0x89F0FB00, 0x89F1FB00, 0x89F2FB00, 0x89F3FB00, 0x89F4FB00, 0x89F5FB00, 0x89F6FB00, 0x89F7FB00, 0x89F8FB00, 0x89F9FB00, 0x89FAFB00, 0x89FBFB00, 0x89FCFB00, + 0x89FDFB00, 0x89FEFB00, 0x89FFFB00, 0x8A00FB00, 0x8A01FB00, 0x8A02FB00, 0x8A03FB00, 0x8A04FB00, 0x8A05FB00, 0x8A06FB00, 0x8A07FB00, 0x8A08FB00, 0x8A09FB00, 0x8A0AFB00, 0x8A0BFB00, + 0x8A0CFB00, 0x8A0DFB00, 0x8A0EFB00, 0x8A0FFB00, 0x8A10FB00, 0x8A11FB00, 0x8A12FB00, 0x8A13FB00, 0x8A14FB00, 0x8A15FB00, 0x8A16FB00, 0x8A17FB00, 0x8A18FB00, 0x8A19FB00, 0x8A1AFB00, + 0x8A1BFB00, 0x8A1CFB00, 0x8A1DFB00, 0x8A1EFB00, 0x8A1FFB00, 0x8A20FB00, 0x8A21FB00, 0x8A22FB00, 0x8A23FB00, 0x8A24FB00, 0x8A25FB00, 0x8A26FB00, 0x8A27FB00, 0x8A28FB00, 0x8A29FB00, + 0x8A2AFB00, 0x8A2BFB00, 0x8A2CFB00, 0x8A2DFB00, 0x8A2EFB00, 0x8A2FFB00, 0x8A30FB00, 0x8A31FB00, 0x8A32FB00, 0x8A33FB00, 0x8A34FB00, 0x8A35FB00, 0x8A36FB00, 0x8A37FB00, 0x8A38FB00, + 0x8A39FB00, 0x8A3AFB00, 0x8A3BFB00, 0x8A3CFB00, 0x8A3DFB00, 0x8A3EFB00, 0x8A3FFB00, 0x8A40FB00, 0x8A41FB00, 0x8A42FB00, 0x8A43FB00, 0x8A44FB00, 0x8A45FB00, 0x8A46FB00, 0x8A47FB00, + 0x8A48FB00, 0x8A49FB00, 0x8A4AFB00, 0x8A4BFB00, 0x8A4CFB00, 0x8A4DFB00, 0x8A4EFB00, 0x8A4FFB00, 0x8A50FB00, 0x8A51FB00, 0x8A52FB00, 0x8A53FB00, 0x8A54FB00, 0x8A55FB00, 0x8A56FB00, + 0x8A57FB00, 0x8A58FB00, 0x8A59FB00, 0x8A5AFB00, 0x8A5BFB00, 0x8A5CFB00, 0x8A5DFB00, 0x8A5EFB00, 0x8A5FFB00, 0x8A60FB00, 0x8A61FB00, 0x8A62FB00, 0x8A63FB00, 0x8A64FB00, 0x8A65FB00, + 0x8A66FB00, 0x8A67FB00, 0x8A68FB00, 0x8A69FB00, 0x8A6AFB00, 0x8A6BFB00, 0x8A6CFB00, 0x8A6DFB00, 0x8A6EFB00, 0x8A6FFB00, 0x8A70FB00, 0x8A71FB00, 0x8A72FB00, 0x8A73FB00, 0x8A74FB00, + 0x8A75FB00, 0x8A76FB00, 0x8A77FB00, 0x8A78FB00, 0x8A79FB00, 0x8A7AFB00, 0x8A7BFB00, 0x8A7CFB00, 0x8A7DFB00, 0x8A7EFB00, 0x8A7FFB00, 0x8A80FB00, 0x8A81FB00, 0x8A82FB00, 0x8A83FB00, + 0x8A84FB00, 0x8A85FB00, 0x8A86FB00, 0x8A87FB00, 0x8A88FB00, 0x8A89FB00, 0x8A8AFB00, 0x8A8BFB00, 0x8A8CFB00, 0x8A8DFB00, 0x8A8EFB00, 0x8A8FFB00, 0x8A90FB00, 0x8A91FB00, 0x8A92FB00, + 0x8A93FB00, 0x8A94FB00, 0x8A95FB00, 0x8A96FB00, 0x8A97FB00, 0x8A98FB00, 0x8A99FB00, 0x8A9AFB00, 0x8A9BFB00, 0x8A9CFB00, 0x8A9DFB00, 0x8A9EFB00, 0x8A9FFB00, 0x8AA0FB00, 0x8AA1FB00, + 0x8AA2FB00, 0x8AA3FB00, 0x8AA4FB00, 0x8AA5FB00, 0x8AA6FB00, 0x8AA7FB00, 0x8AA8FB00, 0x8AA9FB00, 0x8AAAFB00, 0x8AABFB00, 0x8AACFB00, 0x8AADFB00, 0x8AAEFB00, 0x8AAFFB00, 0x8AB0FB00, + 0x8AB1FB00, 0x8AB2FB00, 0x8AB3FB00, 0x8AB4FB00, 0x8AB5FB00, 0x8AB6FB00, 0x8AB7FB00, 0x8AB8FB00, 0x8AB9FB00, 0x8ABAFB00, 0x8ABBFB00, 0x8ABCFB00, 0x8ABDFB00, 0x8ABEFB00, 0x8ABFFB00, + 0x8AC0FB00, 0x8AC1FB00, 0x8AC2FB00, 0x8AC3FB00, 0x8AC4FB00, 0x8AC5FB00, 0x8AC6FB00, 0x8AC7FB00, 0x8AC8FB00, 0x8AC9FB00, 0x8ACAFB00, 0x8ACBFB00, 0x8ACCFB00, 0x8ACDFB00, 0x8ACEFB00, + 0x8ACFFB00, 0x8AD0FB00, 0x8AD1FB00, 0x8AD2FB00, 0x8AD3FB00, 0x8AD4FB00, 0x8AD5FB00, 0x8AD6FB00, 0x8AD7FB00, 0x8AD8FB00, 0x8AD9FB00, 0x8ADAFB00, 0x8ADBFB00, 0x8ADCFB00, 0x8ADDFB00, + 0x8ADEFB00, 0x8ADFFB00, 0x8AE0FB00, 0x8AE1FB00, 0x8AE2FB00, 0x8AE3FB00, 0x8AE4FB00, 0x8AE5FB00, 0x8AE6FB00, 0x8AE7FB00, 0x8AE8FB00, 0x8AE9FB00, 0x8AEAFB00, 0x8AEBFB00, 0x8AECFB00, + 0x8AEDFB00, 0x8AEEFB00, 0x8AEFFB00, 0x8AF0FB00, 0x8AF1FB00, 0x8AF2FB00, 0x8AF3FB00, 0x8AF4FB00, 0x8AF5FB00, 0x8AF6FB00, 0x8AF7FB00, 0x8AF8FB00, 0x8AF9FB00, 0x8AFAFB00, 0x8AFBFB00, + 0x8AFCFB00, 0x8AFDFB00, 0x8AFEFB00, 0x8AFFFB00, 0x8B00FB00, 0x8B01FB00, 0x8B02FB00, 0x8B03FB00, 0x8B04FB00, 0x8B05FB00, 0x8B06FB00, 0x8B07FB00, 0x8B08FB00, 0x8B09FB00, 0x8B0AFB00, + 0x8B0BFB00, 0x8B0CFB00, 0x8B0DFB00, 0x8B0EFB00, 0x8B0FFB00, 0x8B10FB00, 0x8B11FB00, 0x8B12FB00, 0x8B13FB00, 0x8B14FB00, 0x8B15FB00, 0x8B16FB00, 0x8B17FB00, 0x8B18FB00, 0x8B19FB00, + 0x8B1AFB00, 0x8B1BFB00, 0x8B1CFB00, 0x8B1DFB00, 0x8B1EFB00, 0x8B1FFB00, 0x8B20FB00, 0x8B21FB00, 0x8B22FB00, 0x8B23FB00, 0x8B24FB00, 0x8B25FB00, 0x8B26FB00, 0x8B27FB00, 0x8B28FB00, + 0x8B29FB00, 0x8B2AFB00, 0x8B2BFB00, 0x8B2CFB00, 0x8B2DFB00, 0x8B2EFB00, 0x8B2FFB00, 0x8B30FB00, 0x8B31FB00, 0x8B32FB00, 0x8B33FB00, 0x8B34FB00, 0x8B35FB00, 0x8B36FB00, 0x8B37FB00, + 0x8B38FB00, 0x8B39FB00, 0x8B3AFB00, 0x8B3BFB00, 0x8B3CFB00, 0x8B3DFB00, 0x8B3EFB00, 0x8B3FFB00, 0x8B40FB00, 0x8B41FB00, 0x8B42FB00, 0x8B43FB00, 0x8B44FB00, 0x8B45FB00, 0x8B46FB00, + 0x8B47FB00, 0x8B48FB00, 0x8B49FB00, 0x8B4AFB00, 0x8B4BFB00, 0x8B4CFB00, 0x8B4DFB00, 0x8B4EFB00, 0x8B4FFB00, 0x8B50FB00, 0x8B51FB00, 0x8B52FB00, 0x8B53FB00, 0x8B54FB00, 0x8B55FB00, + 0x8B56FB00, 0x8B57FB00, 0x8B58FB00, 0x8B59FB00, 0x8B5AFB00, 0x8B5BFB00, 0x8B5CFB00, 0x8B5DFB00, 0x8B5EFB00, 0x8B5FFB00, 0x8B60FB00, 0x8B61FB00, 0x8B62FB00, 0x8B63FB00, 0x8B64FB00, + 0x8B65FB00, 0x8B66FB00, 0x8B67FB00, 0x8B68FB00, 0x8B69FB00, 0x8B6AFB00, 0x8B6BFB00, 0x8B6CFB00, 0x8B6DFB00, 0x8B6EFB00, 0x8B6FFB00, 0x8B70FB00, 0x8B71FB00, 0x8B72FB00, 0x8B73FB00, + 0x8B74FB00, 0x8B75FB00, 0x8B76FB00, 0x8B77FB00, 0x8B78FB00, 0x8B79FB00, 0x8B7AFB00, 0x8B7BFB00, 0x8B7CFB00, 0x8B7DFB00, 0x8B7EFB00, 0x8B7FFB00, 0x8B80FB00, 0x8B81FB00, 0x8B82FB00, + 0x8B83FB00, 0x8B84FB00, 0x8B85FB00, 0x8B86FB00, 0x8B87FB00, 0x8B88FB00, 0x8B89FB00, 0x8B8AFB00, 0x8B8BFB00, 0x8B8CFB00, 0x8B8DFB00, 0x8B8EFB00, 0x8B8FFB00, 0x8B90FB00, 0x8B91FB00, + 0x8B92FB00, 0x8B93FB00, 0x8B94FB00, 0x8B95FB00, 0x8B96FB00, 0x8B97FB00, 0x8B98FB00, 0x8B99FB00, 0x8B9AFB00, 0x8B9BFB00, 0x8B9CFB00, 0x8B9DFB00, 0x8B9EFB00, 0x8B9FFB00, 0x8BA0FB00, + 0x8BA1FB00, 0x8BA2FB00, 0x8BA3FB00, 0x8BA4FB00, 0x8BA5FB00, 0x8BA6FB00, 0x8BA7FB00, 0x8BA8FB00, 0x8BA9FB00, 0x8BAAFB00, 0x8BABFB00, 0x8BACFB00, 0x8BADFB00, 0x8BAEFB00, 0x8BAFFB00, + 0x8BB0FB00, 0x8BB1FB00, 0x8BB2FB00, 0x8BB3FB00, 0x8BB4FB00, 0x8BB5FB00, 0x8BB6FB00, 0x8BB7FB00, 0x8BB8FB00, 0x8BB9FB00, 0x8BBAFB00, 0x8BBBFB00, 0x8BBCFB00, 0x8BBDFB00, 0x8BBEFB00, + 0x8BBFFB00, 0x8BC0FB00, 0x8BC1FB00, 0x8BC2FB00, 0x8BC3FB00, 0x8BC4FB00, 0x8BC5FB00, 0x8BC6FB00, 0x8BC7FB00, 0x8BC8FB00, 0x8BC9FB00, 0x8BCAFB00, 0x8BCBFB00, 0x8BCCFB00, 0x8BCDFB00, + 0x8BCEFB00, 0x8BCFFB00, 0x8BD0FB00, 0x8BD1FB00, 0x8BD2FB00, 0x8BD3FB00, 0x8BD4FB00, 0x8BD5FB00, 0x8BD6FB00, 0x8BD7FB00, 0x8BD8FB00, 0x8BD9FB00, 0x8BDAFB00, 0x8BDBFB00, 0x8BDCFB00, + 0x8BDDFB00, 0x8BDEFB00, 0x8BDFFB00, 0x8BE0FB00, 0x8BE1FB00, 0x8BE2FB00, 0x8BE3FB00, 0x8BE4FB00, 0x8BE5FB00, 0x8BE6FB00, 0x8BE7FB00, 0x8BE8FB00, 0x8BE9FB00, 0x8BEAFB00, 0x8BEBFB00, + 0x8BECFB00, 0x8BEDFB00, 0x8BEEFB00, 0x8BEFFB00, 0x8BF0FB00, 0x8BF1FB00, 0x8BF2FB00, 0x8BF3FB00, 0x8BF4FB00, 0x8BF5FB00, 0x8BF6FB00, 0x8BF7FB00, 0x8BF8FB00, 0x8BF9FB00, 0x8BFAFB00, + 0x8BFBFB00, 0x8BFCFB00, 0x8BFDFB00, 0x8BFEFB00, 0x8BFFFB00, 0x8C00FB00, 0x8C01FB00, 0x8C02FB00, 0x8C03FB00, 0x8C04FB00, 0x8C05FB00, 0x8C06FB00, 0x8C07FB00, 0x8C08FB00, 0x8C09FB00, + 0x8C0AFB00, 0x8C0BFB00, 0x8C0CFB00, 0x8C0DFB00, 0x8C0EFB00, 0x8C0FFB00, 0x8C10FB00, 0x8C11FB00, 0x8C12FB00, 0x8C13FB00, 0x8C14FB00, 0x8C15FB00, 0x8C16FB00, 0x8C17FB00, 0x8C18FB00, + 0x8C19FB00, 0x8C1AFB00, 0x8C1BFB00, 0x8C1CFB00, 0x8C1DFB00, 0x8C1EFB00, 0x8C1FFB00, 0x8C20FB00, 0x8C21FB00, 0x8C22FB00, 0x8C23FB00, 0x8C24FB00, 0x8C25FB00, 0x8C26FB00, 0x8C27FB00, + 0x8C28FB00, 0x8C29FB00, 0x8C2AFB00, 0x8C2BFB00, 0x8C2CFB00, 0x8C2DFB00, 0x8C2EFB00, 0x8C2FFB00, 0x8C30FB00, 0x8C31FB00, 0x8C32FB00, 0x8C33FB00, 0x8C34FB00, 0x8C35FB00, 0x8C36FB00, + 0x8C37FB00, 0x8C38FB00, 0x8C39FB00, 0x8C3AFB00, 0x8C3BFB00, 0x8C3CFB00, 0x8C3DFB00, 0x8C3EFB00, 0x8C3FFB00, 0x8C40FB00, 0x8C41FB00, 0x8C42FB00, 0x8C43FB00, 0x8C44FB00, 0x8C45FB00, + 0x8C46FB00, 0x8C47FB00, 0x8C48FB00, 0x8C49FB00, 0x8C4AFB00, 0x8C4BFB00, 0x8C4CFB00, 0x8C4DFB00, 0x8C4EFB00, 0x8C4FFB00, 0x8C50FB00, 0x8C51FB00, 0x8C52FB00, 0x8C53FB00, 0x8C54FB00, + 0x8C55FB00, 0x8C56FB00, 0x8C57FB00, 0x8C58FB00, 0x8C59FB00, 0x8C5AFB00, 0x8C5BFB00, 0x8C5CFB00, 0x8C5DFB00, 0x8C5EFB00, 0x8C5FFB00, 0x8C60FB00, 0x8C61FB00, 0x8C62FB00, 0x8C63FB00, + 0x8C64FB00, 0x8C65FB00, 0x8C66FB00, 0x8C67FB00, 0x8C68FB00, 0x8C69FB00, 0x8C6AFB00, 0x8C6BFB00, 0x8C6CFB00, 0x8C6DFB00, 0x8C6EFB00, 0x8C6FFB00, 0x8C70FB00, 0x8C71FB00, 0x8C72FB00, + 0x8C73FB00, 0x8C74FB00, 0x8C75FB00, 0x8C76FB00, 0x8C77FB00, 0x8C78FB00, 0x8C79FB00, 0x8C7AFB00, 0x8C7BFB00, 0x8C7CFB00, 0x8C7DFB00, 0x8C7EFB00, 0x8C7FFB00, 0x8C80FB00, 0x8C81FB00, + 0x8C82FB00, 0x8C83FB00, 0x8C84FB00, 0x8C85FB00, 0x8C86FB00, 0x8C87FB00, 0x8C88FB00, 0x8C89FB00, 0x8C8AFB00, 0x8C8BFB00, 0x8C8CFB00, 0x8C8DFB00, 0x8C8EFB00, 0x8C8FFB00, 0x8C90FB00, + 0x8C91FB00, 0x8C92FB00, 0x8C93FB00, 0x8C94FB00, 0x8C95FB00, 0x8C96FB00, 0x8C97FB00, 0x8C98FB00, 0x8C99FB00, 0x8C9AFB00, 0x8C9BFB00, 0x8C9CFB00, 0x8C9DFB00, 0x8C9EFB00, 0x8C9FFB00, + 0x8CA0FB00, 0x8CA1FB00, 0x8CA2FB00, 0x8CA3FB00, 0x8CA4FB00, 0x8CA5FB00, 0x8CA6FB00, 0x8CA7FB00, 0x8CA8FB00, 0x8CA9FB00, 0x8CAAFB00, 0x8CABFB00, 0x8CACFB00, 0x8CADFB00, 0x8CAEFB00, + 0x8CAFFB00, 0x8CB0FB00, 0x8CB1FB00, 0x8CB2FB00, 0x8CB3FB00, 0x8CB4FB00, 0x8CB5FB00, 0x8CB6FB00, 0x8CB7FB00, 0x8CB8FB00, 0x8CB9FB00, 0x8CBAFB00, 0x8CBBFB00, 0x8CBCFB00, 0x8CBDFB00, + 0x8CBEFB00, 0x8CBFFB00, 0x8CC0FB00, 0x8CC1FB00, 0x8CC2FB00, 0x8CC3FB00, 0x8CC4FB00, 0x8CC5FB00, 0x8CC6FB00, 0x8CC7FB00, 0x8CC8FB00, 0x8CC9FB00, 0x8CCAFB00, 0x8CCBFB00, 0x8CCCFB00, + 0x8CCDFB00, 0x8CCEFB00, 0x8CCFFB00, 0x8CD0FB00, 0x8CD1FB00, 0x8CD2FB00, 0x8CD3FB00, 0x8CD4FB00, 0x8CD5FB00, 0x8CD6FB00, 0x8CD7FB00, 0x8CD8FB00, 0x8CD9FB00, 0x8CDAFB00, 0x8CDBFB00, + 0x8CDCFB00, 0x8CDDFB00, 0x8CDEFB00, 0x8CDFFB00, 0x8CE0FB00, 0x8CE1FB00, 0x8CE2FB00, 0x8CE3FB00, 0x8CE4FB00, 0x8CE5FB00, 0x8CE6FB00, 0x8CE7FB00, 0x8CE8FB00, 0x8CE9FB00, 0x8CEAFB00, + 0x8CEBFB00, 0x8CECFB00, 0x8CEDFB00, 0x8CEEFB00, 0x8CEFFB00, 0x8CF0FB00, 0x8CF1FB00, 0x8CF2FB00, 0x8CF3FB00, 0x8CF4FB00, 0x8CF5FB00, 0x8CF6FB00, 0x8CF7FB00, 0x8CF8FB00, 0x8CF9FB00, + 0x8CFAFB00, 0x8CFBFB00, 0x8CFCFB00, 0x8CFDFB00, 0x8CFEFB00, 0x8CFFFB00, 0x8D00FB00, 0x8D01FB00, 0x8D02FB00, 0x8D03FB00, 0x8D04FB00, 0x8D05FB00, 0x8D06FB00, 0x8D07FB00, 0x8D08FB00, + 0x8D09FB00, 0x8D0AFB00, 0x8D0BFB00, 0x8D0CFB00, 0x8D0DFB00, 0x8D0EFB00, 0x8D0FFB00, 0x8D10FB00, 0x8D11FB00, 0x8D12FB00, 0x8D13FB00, 0x8D14FB00, 0x8D15FB00, 0x8D16FB00, 0x8D17FB00, + 0x8D18FB00, 0x8D19FB00, 0x8D1AFB00, 0x8D1BFB00, 0x8D1CFB00, 0x8D1DFB00, 0x8D1EFB00, 0x8D1FFB00, 0x8D20FB00, 0x8D21FB00, 0x8D22FB00, 0x8D23FB00, 0x8D24FB00, 0x8D25FB00, 0x8D26FB00, + 0x8D27FB00, 0x8D28FB00, 0x8D29FB00, 0x8D2AFB00, 0x8D2BFB00, 0x8D2CFB00, 0x8D2DFB00, 0x8D2EFB00, 0x8D2FFB00, 0x8D30FB00, 0x8D31FB00, 0x8D32FB00, 0x8D33FB00, 0x8D34FB00, 0x8D35FB00, + 0x8D36FB00, 0x8D37FB00, 0x8D38FB00, 0x8D39FB00, 0x8D3AFB00, 0x8D3BFB00, 0x8D3CFB00, 0x8D3DFB00, 0x8D3EFB00, 0x8D3FFB00, 0x8D40FB00, 0x8D41FB00, 0x8D42FB00, 0x8D43FB00, 0x8D44FB00, + 0x8D45FB00, 0x8D46FB00, 0x8D47FB00, 0x8D48FB00, 0x8D49FB00, 0x8D4AFB00, 0x8D4BFB00, 0x8D4CFB00, 0x8D4DFB00, 0x8D4EFB00, 0x8D4FFB00, 0x8D50FB00, 0x8D51FB00, 0x8D52FB00, 0x8D53FB00, + 0x8D54FB00, 0x8D55FB00, 0x8D56FB00, 0x8D57FB00, 0x8D58FB00, 0x8D59FB00, 0x8D5AFB00, 0x8D5BFB00, 0x8D5CFB00, 0x8D5DFB00, 0x8D5EFB00, 0x8D5FFB00, 0x8D60FB00, 0x8D61FB00, 0x8D62FB00, + 0x8D63FB00, 0x8D64FB00, 0x8D65FB00, 0x8D66FB00, 0x8D67FB00, 0x8D68FB00, 0x8D69FB00, 0x8D6AFB00, 0x8D6BFB00, 0x8D6CFB00, 0x8D6DFB00, 0x8D6EFB00, 0x8D6FFB00, 0x8D70FB00, 0x8D71FB00, + 0x8D72FB00, 0x8D73FB00, 0x8D74FB00, 0x8D75FB00, 0x8D76FB00, 0x8D77FB00, 0x8D78FB00, 0x8D79FB00, 0x8D7AFB00, 0x8D7BFB00, 0x8D7CFB00, 0x8D7DFB00, 0x8D7EFB00, 0x8D7FFB00, 0x8D80FB00, + 0x8D81FB00, 0x8D82FB00, 0x8D83FB00, 0x8D84FB00, 0x8D85FB00, 0x8D86FB00, 0x8D87FB00, 0x8D88FB00, 0x8D89FB00, 0x8D8AFB00, 0x8D8BFB00, 0x8D8CFB00, 0x8D8DFB00, 0x8D8EFB00, 0x8D8FFB00, + 0x8D90FB00, 0x8D91FB00, 0x8D92FB00, 0x8D93FB00, 0x8D94FB00, 0x8D95FB00, 0x8D96FB00, 0x8D97FB00, 0x8D98FB00, 0x8D99FB00, 0x8D9AFB00, 0x8D9BFB00, 0x8D9CFB00, 0x8D9DFB00, 0x8D9EFB00, + 0x8D9FFB00, 0x8DA0FB00, 0x8DA1FB00, 0x8DA2FB00, 0x8DA3FB00, 0x8DA4FB00, 0x8DA5FB00, 0x8DA6FB00, 0x8DA7FB00, 0x8DA8FB00, 0x8DA9FB00, 0x8DAAFB00, 0x8DABFB00, 0x8DACFB00, 0x8DADFB00, + 0x8DAEFB00, 0x8DAFFB00, 0x8DB0FB00, 0x8DB1FB00, 0x8DB2FB00, 0x8DB3FB00, 0x8DB4FB00, 0x8DB5FB00, 0x8DB6FB00, 0x8DB7FB00, 0x8DB8FB00, 0x8DB9FB00, 0x8DBAFB00, 0x8DBBFB00, 0x8DBCFB00, + 0x8DBDFB00, 0x8DBEFB00, 0x8DBFFB00, 0x8DC0FB00, 0x8DC1FB00, 0x8DC2FB00, 0x8DC3FB00, 0x8DC4FB00, 0x8DC5FB00, 0x8DC6FB00, 0x8DC7FB00, 0x8DC8FB00, 0x8DC9FB00, 0x8DCAFB00, 0x8DCBFB00, + 0x8DCCFB00, 0x8DCDFB00, 0x8DCEFB00, 0x8DCFFB00, 0x8DD0FB00, 0x8DD1FB00, 0x8DD2FB00, 0x8DD3FB00, 0x8DD4FB00, 0x8DD5FB00, 0x8DD6FB00, 0x8DD7FB00, 0x8DD8FB00, 0x8DD9FB00, 0x8DDAFB00, + 0x8DDBFB00, 0x8DDCFB00, 0x8DDDFB00, 0x8DDEFB00, 0x8DDFFB00, 0x8DE0FB00, 0x8DE1FB00, 0x8DE2FB00, 0x8DE3FB00, 0x8DE4FB00, 0x8DE5FB00, 0x8DE6FB00, 0x8DE7FB00, 0x8DE8FB00, 0x8DE9FB00, + 0x8DEAFB00, 0x8DEBFB00, 0x8DECFB00, 0x8DEDFB00, 0x8DEEFB00, 0x8DEFFB00, 0x8DF0FB00, 0x8DF1FB00, 0x8DF2FB00, 0x8DF3FB00, 0x8DF4FB00, 0x8DF5FB00, 0x8DF6FB00, 0x8DF7FB00, 0x8DF8FB00, + 0x8DF9FB00, 0x8DFAFB00, 0x8DFBFB00, 0x8DFCFB00, 0x8DFDFB00, 0x8DFEFB00, 0x8DFFFB00, 0x8E00FB00, 0x8E01FB00, 0x8E02FB00, 0x8E03FB00, 0x8E04FB00, 0x8E05FB00, 0x8E06FB00, 0x8E07FB00, + 0x8E08FB00, 0x8E09FB00, 0x8E0AFB00, 0x8E0BFB00, 0x8E0CFB00, 0x8E0DFB00, 0x8E0EFB00, 0x8E0FFB00, 0x8E10FB00, 0x8E11FB00, 0x8E12FB00, 0x8E13FB00, 0x8E14FB00, 0x8E15FB00, 0x8E16FB00, + 0x8E17FB00, 0x8E18FB00, 0x8E19FB00, 0x8E1AFB00, 0x8E1BFB00, 0x8E1CFB00, 0x8E1DFB00, 0x8E1EFB00, 0x8E1FFB00, 0x8E20FB00, 0x8E21FB00, 0x8E22FB00, 0x8E23FB00, 0x8E24FB00, 0x8E25FB00, + 0x8E26FB00, 0x8E27FB00, 0x8E28FB00, 0x8E29FB00, 0x8E2AFB00, 0x8E2BFB00, 0x8E2CFB00, 0x8E2DFB00, 0x8E2EFB00, 0x8E2FFB00, 0x8E30FB00, 0x8E31FB00, 0x8E32FB00, 0x8E33FB00, 0x8E34FB00, + 0x8E35FB00, 0x8E36FB00, 0x8E37FB00, 0x8E38FB00, 0x8E39FB00, 0x8E3AFB00, 0x8E3BFB00, 0x8E3CFB00, 0x8E3DFB00, 0x8E3EFB00, 0x8E3FFB00, 0x8E40FB00, 0x8E41FB00, 0x8E42FB00, 0x8E43FB00, + 0x8E44FB00, 0x8E45FB00, 0x8E46FB00, 0x8E47FB00, 0x8E48FB00, 0x8E49FB00, 0x8E4AFB00, 0x8E4BFB00, 0x8E4CFB00, 0x8E4DFB00, 0x8E4EFB00, 0x8E4FFB00, 0x8E50FB00, 0x8E51FB00, 0x8E52FB00, + 0x8E53FB00, 0x8E54FB00, 0x8E55FB00, 0x8E56FB00, 0x8E57FB00, 0x8E58FB00, 0x8E59FB00, 0x8E5AFB00, 0x8E5BFB00, 0x8E5CFB00, 0x8E5DFB00, 0x8E5EFB00, 0x8E5FFB00, 0x8E60FB00, 0x8E61FB00, + 0x8E62FB00, 0x8E63FB00, 0x8E64FB00, 0x8E65FB00, 0x8E66FB00, 0x8E67FB00, 0x8E68FB00, 0x8E69FB00, 0x8E6AFB00, 0x8E6BFB00, 0x8E6CFB00, 0x8E6DFB00, 0x8E6EFB00, 0x8E6FFB00, 0x8E70FB00, + 0x8E71FB00, 0x8E72FB00, 0x8E73FB00, 0x8E74FB00, 0x8E75FB00, 0x8E76FB00, 0x8E77FB00, 0x8E78FB00, 0x8E79FB00, 0x8E7AFB00, 0x8E7BFB00, 0x8E7CFB00, 0x8E7DFB00, 0x8E7EFB00, 0x8E7FFB00, + 0x8E80FB00, 0x8E81FB00, 0x8E82FB00, 0x8E83FB00, 0x8E84FB00, 0x8E85FB00, 0x8E86FB00, 0x8E87FB00, 0x8E88FB00, 0x8E89FB00, 0x8E8AFB00, 0x8E8BFB00, 0x8E8CFB00, 0x8E8DFB00, 0x8E8EFB00, + 0x8E8FFB00, 0x8E90FB00, 0x8E91FB00, 0x8E92FB00, 0x8E93FB00, 0x8E94FB00, 0x8E95FB00, 0x8E96FB00, 0x8E97FB00, 0x8E98FB00, 0x8E99FB00, 0x8E9AFB00, 0x8E9BFB00, 0x8E9CFB00, 0x8E9DFB00, + 0x8E9EFB00, 0x8E9FFB00, 0x8EA0FB00, 0x8EA1FB00, 0x8EA2FB00, 0x8EA3FB00, 0x8EA4FB00, 0x8EA5FB00, 0x8EA6FB00, 0x8EA7FB00, 0x8EA8FB00, 0x8EA9FB00, 0x8EAAFB00, 0x8EABFB00, 0x8EACFB00, + 0x8EADFB00, 0x8EAEFB00, 0x8EAFFB00, 0x8EB0FB00, 0x8EB1FB00, 0x8EB2FB00, 0x8EB3FB00, 0x8EB4FB00, 0x8EB5FB00, 0x8EB6FB00, 0x8EB7FB00, 0x8EB8FB00, 0x8EB9FB00, 0x8EBAFB00, 0x8EBBFB00, + 0x8EBCFB00, 0x8EBDFB00, 0x8EBEFB00, 0x8EBFFB00, 0x8EC0FB00, 0x8EC1FB00, 0x8EC2FB00, 0x8EC3FB00, 0x8EC4FB00, 0x8EC5FB00, 0x8EC6FB00, 0x8EC7FB00, 0x8EC8FB00, 0x8EC9FB00, 0x8ECAFB00, + 0x8ECBFB00, 0x8ECCFB00, 0x8ECDFB00, 0x8ECEFB00, 0x8ECFFB00, 0x8ED0FB00, 0x8ED1FB00, 0x8ED2FB00, 0x8ED3FB00, 0x8ED4FB00, 0x8ED5FB00, 0x8ED6FB00, 0x8ED7FB00, 0x8ED8FB00, 0x8ED9FB00, + 0x8EDAFB00, 0x8EDBFB00, 0x8EDCFB00, 0x8EDDFB00, 0x8EDEFB00, 0x8EDFFB00, 0x8EE0FB00, 0x8EE1FB00, 0x8EE2FB00, 0x8EE3FB00, 0x8EE4FB00, 0x8EE5FB00, 0x8EE6FB00, 0x8EE7FB00, 0x8EE8FB00, + 0x8EE9FB00, 0x8EEAFB00, 0x8EEBFB00, 0x8EECFB00, 0x8EEDFB00, 0x8EEEFB00, 0x8EEFFB00, 0x8EF0FB00, 0x8EF1FB00, 0x8EF2FB00, 0x8EF3FB00, 0x8EF4FB00, 0x8EF5FB00, 0x8EF6FB00, 0x8EF7FB00, + 0x8EF8FB00, 0x8EF9FB00, 0x8EFAFB00, 0x8EFBFB00, 0x8EFCFB00, 0x8EFDFB00, 0x8EFEFB00, 0x8EFFFB00, 0x8F00FB00, 0x8F01FB00, 0x8F02FB00, 0x8F03FB00, 0x8F04FB00, 0x8F05FB00, 0x8F06FB00, + 0x8F07FB00, 0x8F08FB00, 0x8F09FB00, 0x8F0AFB00, 0x8F0BFB00, 0x8F0CFB00, 0x8F0DFB00, 0x8F0EFB00, 0x8F0FFB00, 0x8F10FB00, 0x8F11FB00, 0x8F12FB00, 0x8F13FB00, 0x8F14FB00, 0x8F15FB00, + 0x8F16FB00, 0x8F17FB00, 0x8F18FB00, 0x8F19FB00, 0x8F1AFB00, 0x8F1BFB00, 0x8F1CFB00, 0x8F1DFB00, 0x8F1EFB00, 0x8F1FFB00, 0x8F20FB00, 0x8F21FB00, 0x8F22FB00, 0x8F23FB00, 0x8F24FB00, + 0x8F25FB00, 0x8F26FB00, 0x8F27FB00, 0x8F28FB00, 0x8F29FB00, 0x8F2AFB00, 0x8F2BFB00, 0x8F2CFB00, 0x8F2DFB00, 0x8F2EFB00, 0x8F2FFB00, 0x8F30FB00, 0x8F31FB00, 0x8F32FB00, 0x8F33FB00, + 0x8F34FB00, 0x8F35FB00, 0x8F36FB00, 0x8F37FB00, 0x8F38FB00, 0x8F39FB00, 0x8F3AFB00, 0x8F3BFB00, 0x8F3CFB00, 0x8F3DFB00, 0x8F3EFB00, 0x8F3FFB00, 0x8F40FB00, 0x8F41FB00, 0x8F42FB00, + 0x8F43FB00, 0x8F44FB00, 0x8F45FB00, 0x8F46FB00, 0x8F47FB00, 0x8F48FB00, 0x8F49FB00, 0x8F4AFB00, 0x8F4BFB00, 0x8F4CFB00, 0x8F4DFB00, 0x8F4EFB00, 0x8F4FFB00, 0x8F50FB00, 0x8F51FB00, + 0x8F52FB00, 0x8F53FB00, 0x8F54FB00, 0x8F55FB00, 0x8F56FB00, 0x8F57FB00, 0x8F58FB00, 0x8F59FB00, 0x8F5AFB00, 0x8F5BFB00, 0x8F5CFB00, 0x8F5DFB00, 0x8F5EFB00, 0x8F5FFB00, 0x8F60FB00, + 0x8F61FB00, 0x8F62FB00, 0x8F63FB00, 0x8F64FB00, 0x8F65FB00, 0x8F66FB00, 0x8F67FB00, 0x8F68FB00, 0x8F69FB00, 0x8F6AFB00, 0x8F6BFB00, 0x8F6CFB00, 0x8F6DFB00, 0x8F6EFB00, 0x8F6FFB00, + 0x8F70FB00, 0x8F71FB00, 0x8F72FB00, 0x8F73FB00, 0x8F74FB00, 0x8F75FB00, 0x8F76FB00, 0x8F77FB00, 0x8F78FB00, 0x8F79FB00, 0x8F7AFB00, 0x8F7BFB00, 0x8F7CFB00, 0x8F7DFB00, 0x8F7EFB00, + 0x8F7FFB00, 0x8F80FB00, 0x8F81FB00, 0x8F82FB00, 0x8F83FB00, 0x8F84FB00, 0x8F85FB00, 0x8F86FB00, 0x8F87FB00, 0x8F88FB00, 0x8F89FB00, 0x8F8AFB00, 0x8F8BFB00, 0x8F8CFB00, 0x8F8DFB00, + 0x8F8EFB00, 0x8F8FFB00, 0x8F90FB00, 0x8F91FB00, 0x8F92FB00, 0x8F93FB00, 0x8F94FB00, 0x8F95FB00, 0x8F96FB00, 0x8F97FB00, 0x8F98FB00, 0x8F99FB00, 0x8F9AFB00, 0x8F9BFB00, 0x8F9CFB00, + 0x8F9DFB00, 0x8F9EFB00, 0x8F9FFB00, 0x8FA0FB00, 0x8FA1FB00, 0x8FA2FB00, 0x8FA3FB00, 0x8FA4FB00, 0x8FA5FB00, 0x8FA6FB00, 0x8FA7FB00, 0x8FA8FB00, 0x8FA9FB00, 0x8FAAFB00, 0x8FABFB00, + 0x8FACFB00, 0x8FADFB00, 0x8FAEFB00, 0x8FAFFB00, 0x8FB0FB00, 0x8FB1FB00, 0x8FB2FB00, 0x8FB3FB00, 0x8FB4FB00, 0x8FB5FB00, 0x8FB6FB00, 0x8FB7FB00, 0x8FB8FB00, 0x8FB9FB00, 0x8FBAFB00, + 0x8FBBFB00, 0x8FBCFB00, 0x8FBDFB00, 0x8FBEFB00, 0x8FBFFB00, 0x8FC0FB00, 0x8FC1FB00, 0x8FC2FB00, 0x8FC3FB00, 0x8FC4FB00, 0x8FC5FB00, 0x8FC6FB00, 0x8FC7FB00, 0x8FC8FB00, 0x8FC9FB00, + 0x8FCAFB00, 0x8FCBFB00, 0x8FCCFB00, 0x8FCDFB00, 0x8FCEFB00, 0x8FCFFB00, 0x8FD0FB00, 0x8FD1FB00, 0x8FD2FB00, 0x8FD3FB00, 0x8FD4FB00, 0x8FD5FB00, 0x8FD6FB00, 0x8FD7FB00, 0x8FD8FB00, + 0x8FD9FB00, 0x8FDAFB00, 0x8FDBFB00, 0x8FDCFB00, 0x8FDDFB00, 0x8FDEFB00, 0x8FDFFB00, 0x8FE0FB00, 0x8FE1FB00, 0x8FE2FB00, 0x8FE3FB00, 0x8FE4FB00, 0x8FE5FB00, 0x8FE6FB00, 0x8FE7FB00, + 0x8FE8FB00, 0x8FE9FB00, 0x8FEAFB00, 0x8FEBFB00, 0x8FECFB00, 0x8FEDFB00, 0x8FEEFB00, 0x8FEFFB00, 0x8FF0FB00, 0x8FF1FB00, 0x8FF2FB00, 0x8FF3FB00, 0x8FF4FB00, 0x8FF5FB00, 0x8FF6FB00, + 0x8FF7FB00, 0x8FF8FB00, 0x8FF9FB00, 0x8FFAFB00, 0x8FFBFB00, 0x8FFCFB00, 0x8FFDFB00, 0x8FFEFB00, 0x8FFFFB00, 0x9000FB00, 0x9001FB00, 0x9002FB00, 0x9003FB00, 0x9004FB00, 0x9005FB00, + 0x9006FB00, 0x9007FB00, 0x9008FB00, 0x9009FB00, 0x900AFB00, 0x900BFB00, 0x900CFB00, 0x900DFB00, 0x900EFB00, 0x900FFB00, 0x9010FB00, 0x9011FB00, 0x9012FB00, 0x9013FB00, 0x9014FB00, + 0x9015FB00, 0x9016FB00, 0x9017FB00, 0x9018FB00, 0x9019FB00, 0x901AFB00, 0x901BFB00, 0x901CFB00, 0x901DFB00, 0x901EFB00, 0x901FFB00, 0x9020FB00, 0x9021FB00, 0x9022FB00, 0x9023FB00, + 0x9024FB00, 0x9025FB00, 0x9026FB00, 0x9027FB00, 0x9028FB00, 0x9029FB00, 0x902AFB00, 0x902BFB00, 0x902CFB00, 0x902DFB00, 0x902EFB00, 0x902FFB00, 0x9030FB00, 0x9031FB00, 0x9032FB00, + 0x9033FB00, 0x9034FB00, 0x9035FB00, 0x9036FB00, 0x9037FB00, 0x9038FB00, 0x9039FB00, 0x903AFB00, 0x903BFB00, 0x903CFB00, 0x903DFB00, 0x903EFB00, 0x903FFB00, 0x9040FB00, 0x9041FB00, + 0x9042FB00, 0x9043FB00, 0x9044FB00, 0x9045FB00, 0x9046FB00, 0x9047FB00, 0x9048FB00, 0x9049FB00, 0x904AFB00, 0x904BFB00, 0x904CFB00, 0x904DFB00, 0x904EFB00, 0x904FFB00, 0x9050FB00, + 0x9051FB00, 0x9052FB00, 0x9053FB00, 0x9054FB00, 0x9055FB00, 0x9056FB00, 0x9057FB00, 0x9058FB00, 0x9059FB00, 0x905AFB00, 0x905BFB00, 0x905CFB00, 0x905DFB00, 0x905EFB00, 0x905FFB00, + 0x9060FB00, 0x9061FB00, 0x9062FB00, 0x9063FB00, 0x9064FB00, 0x9065FB00, 0x9066FB00, 0x9067FB00, 0x9068FB00, 0x9069FB00, 0x906AFB00, 0x906BFB00, 0x906CFB00, 0x906DFB00, 0x906EFB00, + 0x906FFB00, 0x9070FB00, 0x9071FB00, 0x9072FB00, 0x9073FB00, 0x9074FB00, 0x9075FB00, 0x9076FB00, 0x9077FB00, 0x9078FB00, 0x9079FB00, 0x907AFB00, 0x907BFB00, 0x907CFB00, 0x907DFB00, + 0x907EFB00, 0x907FFB00, 0x9080FB00, 0x9081FB00, 0x9082FB00, 0x9083FB00, 0x9084FB00, 0x9085FB00, 0x9086FB00, 0x9087FB00, 0x9088FB00, 0x9089FB00, 0x908AFB00, 0x908BFB00, 0x908CFB00, + 0x908DFB00, 0x908EFB00, 0x908FFB00, 0x9090FB00, 0x9091FB00, 0x9092FB00, 0x9093FB00, 0x9094FB00, 0x9095FB00, 0x9096FB00, 0x9097FB00, 0x9098FB00, 0x9099FB00, 0x909AFB00, 0x909BFB00, + 0x909CFB00, 0x909DFB00, 0x909EFB00, 0x909FFB00, 0x90A0FB00, 0x90A1FB00, 0x90A2FB00, 0x90A3FB00, 0x90A4FB00, 0x90A5FB00, 0x90A6FB00, 0x90A7FB00, 0x90A8FB00, 0x90A9FB00, 0x90AAFB00, + 0x90ABFB00, 0x90ACFB00, 0x90ADFB00, 0x90AEFB00, 0x90AFFB00, 0x90B0FB00, 0x90B1FB00, 0x90B2FB00, 0x90B3FB00, 0x90B4FB00, 0x90B5FB00, 0x90B6FB00, 0x90B7FB00, 0x90B8FB00, 0x90B9FB00, + 0x90BAFB00, 0x90BBFB00, 0x90BCFB00, 0x90BDFB00, 0x90BEFB00, 0x90BFFB00, 0x90C0FB00, 0x90C1FB00, 0x90C2FB00, 0x90C3FB00, 0x90C4FB00, 0x90C5FB00, 0x90C6FB00, 0x90C7FB00, 0x90C8FB00, + 0x90C9FB00, 0x90CAFB00, 0x90CBFB00, 0x90CCFB00, 0x90CDFB00, 0x90CEFB00, 0x90CFFB00, 0x90D0FB00, 0x90D1FB00, 0x90D2FB00, 0x90D3FB00, 0x90D4FB00, 0x90D5FB00, 0x90D6FB00, 0x90D7FB00, + 0x90D8FB00, 0x90D9FB00, 0x90DAFB00, 0x90DBFB00, 0x90DCFB00, 0x90DDFB00, 0x90DEFB00, 0x90DFFB00, 0x90E0FB00, 0x90E1FB00, 0x90E2FB00, 0x90E3FB00, 0x90E4FB00, 0x90E5FB00, 0x90E6FB00, + 0x90E7FB00, 0x90E8FB00, 0x90E9FB00, 0x90EAFB00, 0x90EBFB00, 0x90ECFB00, 0x90EDFB00, 0x90EEFB00, 0x90EFFB00, 0x90F0FB00, 0x90F1FB00, 0x90F2FB00, 0x90F3FB00, 0x90F4FB00, 0x90F5FB00, + 0x90F6FB00, 0x90F7FB00, 0x90F8FB00, 0x90F9FB00, 0x90FAFB00, 0x90FBFB00, 0x90FCFB00, 0x90FDFB00, 0x90FEFB00, 0x90FFFB00, 0x9100FB00, 0x9101FB00, 0x9102FB00, 0x9103FB00, 0x9104FB00, + 0x9105FB00, 0x9106FB00, 0x9107FB00, 0x9108FB00, 0x9109FB00, 0x910AFB00, 0x910BFB00, 0x910CFB00, 0x910DFB00, 0x910EFB00, 0x910FFB00, 0x9110FB00, 0x9111FB00, 0x9112FB00, 0x9113FB00, + 0x9114FB00, 0x9115FB00, 0x9116FB00, 0x9117FB00, 0x9118FB00, 0x9119FB00, 0x911AFB00, 0x911BFB00, 0x911CFB00, 0x911DFB00, 0x911EFB00, 0x911FFB00, 0x9120FB00, 0x9121FB00, 0x9122FB00, + 0x9123FB00, 0x9124FB00, 0x9125FB00, 0x9126FB00, 0x9127FB00, 0x9128FB00, 0x9129FB00, 0x912AFB00, 0x912BFB00, 0x912CFB00, 0x912DFB00, 0x912EFB00, 0x912FFB00, 0x9130FB00, 0x9131FB00, + 0x9132FB00, 0x9133FB00, 0x9134FB00, 0x9135FB00, 0x9136FB00, 0x9137FB00, 0x9138FB00, 0x9139FB00, 0x913AFB00, 0x913BFB00, 0x913CFB00, 0x913DFB00, 0x913EFB00, 0x913FFB00, 0x9140FB00, + 0x9141FB00, 0x9142FB00, 0x9143FB00, 0x9144FB00, 0x9145FB00, 0x9146FB00, 0x9147FB00, 0x9148FB00, 0x9149FB00, 0x914AFB00, 0x914BFB00, 0x914CFB00, 0x914DFB00, 0x914EFB00, 0x914FFB00, + 0x9150FB00, 0x9151FB00, 0x9152FB00, 0x9153FB00, 0x9154FB00, 0x9155FB00, 0x9156FB00, 0x9157FB00, 0x9158FB00, 0x9159FB00, 0x915AFB00, 0x915BFB00, 0x915CFB00, 0x915DFB00, 0x915EFB00, + 0x915FFB00, 0x9160FB00, 0x9161FB00, 0x9162FB00, 0x9163FB00, 0x9164FB00, 0x9165FB00, 0x9166FB00, 0x9167FB00, 0x9168FB00, 0x9169FB00, 0x916AFB00, 0x916BFB00, 0x916CFB00, 0x916DFB00, + 0x916EFB00, 0x916FFB00, 0x9170FB00, 0x9171FB00, 0x9172FB00, 0x9173FB00, 0x9174FB00, 0x9175FB00, 0x9176FB00, 0x9177FB00, 0x9178FB00, 0x9179FB00, 0x917AFB00, 0x917BFB00, 0x917CFB00, + 0x917DFB00, 0x917EFB00, 0x917FFB00, 0x9180FB00, 0x9181FB00, 0x9182FB00, 0x9183FB00, 0x9184FB00, 0x9185FB00, 0x9186FB00, 0x9187FB00, 0x9188FB00, 0x9189FB00, 0x918AFB00, 0x918BFB00, + 0x918CFB00, 0x918DFB00, 0x918EFB00, 0x918FFB00, 0x9190FB00, 0x9191FB00, 0x9192FB00, 0x9193FB00, 0x9194FB00, 0x9195FB00, 0x9196FB00, 0x9197FB00, 0x9198FB00, 0x9199FB00, 0x919AFB00, + 0x919BFB00, 0x919CFB00, 0x919DFB00, 0x919EFB00, 0x919FFB00, 0x91A0FB00, 0x91A1FB00, 0x91A2FB00, 0x91A3FB00, 0x91A4FB00, 0x91A5FB00, 0x91A6FB00, 0x91A7FB00, 0x91A8FB00, 0x91A9FB00, + 0x91AAFB00, 0x91ABFB00, 0x91ACFB00, 0x91ADFB00, 0x91AEFB00, 0x91AFFB00, 0x91B0FB00, 0x91B1FB00, 0x91B2FB00, 0x91B3FB00, 0x91B4FB00, 0x91B5FB00, 0x91B6FB00, 0x91B7FB00, 0x91B8FB00, + 0x91B9FB00, 0x91BAFB00, 0x91BBFB00, 0x91BCFB00, 0x91BDFB00, 0x91BEFB00, 0x91BFFB00, 0x91C0FB00, 0x91C1FB00, 0x91C2FB00, 0x91C3FB00, 0x91C4FB00, 0x91C5FB00, 0x91C6FB00, 0x91C7FB00, + 0x91C8FB00, 0x91C9FB00, 0x91CAFB00, 0x91CBFB00, 0x91CCFB00, 0x91CDFB00, 0x91CEFB00, 0x91CFFB00, 0x91D0FB00, 0x91D1FB00, 0x91D2FB00, 0x91D3FB00, 0x91D4FB00, 0x91D5FB00, 0x91D6FB00, + 0x91D7FB00, 0x91D8FB00, 0x91D9FB00, 0x91DAFB00, 0x91DBFB00, 0x91DCFB00, 0x91DDFB00, 0x91DEFB00, 0x91DFFB00, 0x91E0FB00, 0x91E1FB00, 0x91E2FB00, 0x91E3FB00, 0x91E4FB00, 0x91E5FB00, + 0x91E6FB00, 0x91E7FB00, 0x91E8FB00, 0x91E9FB00, 0x91EAFB00, 0x91EBFB00, 0x91ECFB00, 0x91EDFB00, 0x91EEFB00, 0x91EFFB00, 0x91F0FB00, 0x91F1FB00, 0x91F2FB00, 0x91F3FB00, 0x91F4FB00, + 0x91F5FB00, 0x91F6FB00, 0x91F7FB00, 0x91F8FB00, 0x91F9FB00, 0x91FAFB00, 0x91FBFB00, 0x91FCFB00, 0x91FDFB00, 0x91FEFB00, 0x91FFFB00, 0x9200FB00, 0x9201FB00, 0x9202FB00, 0x9203FB00, + 0x9204FB00, 0x9205FB00, 0x9206FB00, 0x9207FB00, 0x9208FB00, 0x9209FB00, 0x920AFB00, 0x920BFB00, 0x920CFB00, 0x920DFB00, 0x920EFB00, 0x920FFB00, 0x9210FB00, 0x9211FB00, 0x9212FB00, + 0x9213FB00, 0x9214FB00, 0x9215FB00, 0x9216FB00, 0x9217FB00, 0x9218FB00, 0x9219FB00, 0x921AFB00, 0x921BFB00, 0x921CFB00, 0x921DFB00, 0x921EFB00, 0x921FFB00, 0x9220FB00, 0x9221FB00, + 0x9222FB00, 0x9223FB00, 0x9224FB00, 0x9225FB00, 0x9226FB00, 0x9227FB00, 0x9228FB00, 0x9229FB00, 0x922AFB00, 0x922BFB00, 0x922CFB00, 0x922DFB00, 0x922EFB00, 0x922FFB00, 0x9230FB00, + 0x9231FB00, 0x9232FB00, 0x9233FB00, 0x9234FB00, 0x9235FB00, 0x9236FB00, 0x9237FB00, 0x9238FB00, 0x9239FB00, 0x923AFB00, 0x923BFB00, 0x923CFB00, 0x923DFB00, 0x923EFB00, 0x923FFB00, + 0x9240FB00, 0x9241FB00, 0x9242FB00, 0x9243FB00, 0x9244FB00, 0x9245FB00, 0x9246FB00, 0x9247FB00, 0x9248FB00, 0x9249FB00, 0x924AFB00, 0x924BFB00, 0x924CFB00, 0x924DFB00, 0x924EFB00, + 0x924FFB00, 0x9250FB00, 0x9251FB00, 0x9252FB00, 0x9253FB00, 0x9254FB00, 0x9255FB00, 0x9256FB00, 0x9257FB00, 0x9258FB00, 0x9259FB00, 0x925AFB00, 0x925BFB00, 0x925CFB00, 0x925DFB00, + 0x925EFB00, 0x925FFB00, 0x9260FB00, 0x9261FB00, 0x9262FB00, 0x9263FB00, 0x9264FB00, 0x9265FB00, 0x9266FB00, 0x9267FB00, 0x9268FB00, 0x9269FB00, 0x926AFB00, 0x926BFB00, 0x926CFB00, + 0x926DFB00, 0x926EFB00, 0x926FFB00, 0x9270FB00, 0x9271FB00, 0x9272FB00, 0x9273FB00, 0x9274FB00, 0x9275FB00, 0x9276FB00, 0x9277FB00, 0x9278FB00, 0x9279FB00, 0x927AFB00, 0x927BFB00, + 0x927CFB00, 0x927DFB00, 0x927EFB00, 0x927FFB00, 0x9280FB00, 0x9281FB00, 0x9282FB00, 0x9283FB00, 0x9284FB00, 0x9285FB00, 0x9286FB00, 0x9287FB00, 0x9288FB00, 0x9289FB00, 0x928AFB00, + 0x928BFB00, 0x928CFB00, 0x928DFB00, 0x928EFB00, 0x928FFB00, 0x9290FB00, 0x9291FB00, 0x9292FB00, 0x9293FB00, 0x9294FB00, 0x9295FB00, 0x9296FB00, 0x9297FB00, 0x9298FB00, 0x9299FB00, + 0x929AFB00, 0x929BFB00, 0x929CFB00, 0x929DFB00, 0x929EFB00, 0x929FFB00, 0x92A0FB00, 0x92A1FB00, 0x92A2FB00, 0x92A3FB00, 0x92A4FB00, 0x92A5FB00, 0x92A6FB00, 0x92A7FB00, 0x92A8FB00, + 0x92A9FB00, 0x92AAFB00, 0x92ABFB00, 0x92ACFB00, 0x92ADFB00, 0x92AEFB00, 0x92AFFB00, 0x92B0FB00, 0x92B1FB00, 0x92B2FB00, 0x92B3FB00, 0x92B4FB00, 0x92B5FB00, 0x92B6FB00, 0x92B7FB00, + 0x92B8FB00, 0x92B9FB00, 0x92BAFB00, 0x92BBFB00, 0x92BCFB00, 0x92BDFB00, 0x92BEFB00, 0x92BFFB00, 0x92C0FB00, 0x92C1FB00, 0x92C2FB00, 0x92C3FB00, 0x92C4FB00, 0x92C5FB00, 0x92C6FB00, + 0x92C7FB00, 0x92C8FB00, 0x92C9FB00, 0x92CAFB00, 0x92CBFB00, 0x92CCFB00, 0x92CDFB00, 0x92CEFB00, 0x92CFFB00, 0x92D0FB00, 0x92D1FB00, 0x92D2FB00, 0x92D3FB00, 0x92D4FB00, 0x92D5FB00, + 0x92D6FB00, 0x92D7FB00, 0x92D8FB00, 0x92D9FB00, 0x92DAFB00, 0x92DBFB00, 0x92DCFB00, 0x92DDFB00, 0x92DEFB00, 0x92DFFB00, 0x92E0FB00, 0x92E1FB00, 0x92E2FB00, 0x92E3FB00, 0x92E4FB00, + 0x92E5FB00, 0x92E6FB00, 0x92E7FB00, 0x92E8FB00, 0x92E9FB00, 0x92EAFB00, 0x92EBFB00, 0x92ECFB00, 0x92EDFB00, 0x92EEFB00, 0x92EFFB00, 0x92F0FB00, 0x92F1FB00, 0x92F2FB00, 0x92F3FB00, + 0x92F4FB00, 0x92F5FB00, 0x92F6FB00, 0x92F7FB00, 0x92F8FB00, 0x92F9FB00, 0x92FAFB00, 0x92FBFB00, 0x92FCFB00, 0x92FDFB00, 0x92FEFB00, 0x92FFFB00, 0x9300FB00, 0x9301FB00, 0x9302FB00, + 0x9303FB00, 0x9304FB00, 0x9305FB00, 0x9306FB00, 0x9307FB00, 0x9308FB00, 0x9309FB00, 0x930AFB00, 0x930BFB00, 0x930CFB00, 0x930DFB00, 0x930EFB00, 0x930FFB00, 0x9310FB00, 0x9311FB00, + 0x9312FB00, 0x9313FB00, 0x9314FB00, 0x9315FB00, 0x9316FB00, 0x9317FB00, 0x9318FB00, 0x9319FB00, 0x931AFB00, 0x931BFB00, 0x931CFB00, 0x931DFB00, 0x931EFB00, 0x931FFB00, 0x9320FB00, + 0x9321FB00, 0x9322FB00, 0x9323FB00, 0x9324FB00, 0x9325FB00, 0x9326FB00, 0x9327FB00, 0x9328FB00, 0x9329FB00, 0x932AFB00, 0x932BFB00, 0x932CFB00, 0x932DFB00, 0x932EFB00, 0x932FFB00, + 0x9330FB00, 0x9331FB00, 0x9332FB00, 0x9333FB00, 0x9334FB00, 0x9335FB00, 0x9336FB00, 0x9337FB00, 0x9338FB00, 0x9339FB00, 0x933AFB00, 0x933BFB00, 0x933CFB00, 0x933DFB00, 0x933EFB00, + 0x933FFB00, 0x9340FB00, 0x9341FB00, 0x9342FB00, 0x9343FB00, 0x9344FB00, 0x9345FB00, 0x9346FB00, 0x9347FB00, 0x9348FB00, 0x9349FB00, 0x934AFB00, 0x934BFB00, 0x934CFB00, 0x934DFB00, + 0x934EFB00, 0x934FFB00, 0x9350FB00, 0x9351FB00, 0x9352FB00, 0x9353FB00, 0x9354FB00, 0x9355FB00, 0x9356FB00, 0x9357FB00, 0x9358FB00, 0x9359FB00, 0x935AFB00, 0x935BFB00, 0x935CFB00, + 0x935DFB00, 0x935EFB00, 0x935FFB00, 0x9360FB00, 0x9361FB00, 0x9362FB00, 0x9363FB00, 0x9364FB00, 0x9365FB00, 0x9366FB00, 0x9367FB00, 0x9368FB00, 0x9369FB00, 0x936AFB00, 0x936BFB00, + 0x936CFB00, 0x936DFB00, 0x936EFB00, 0x936FFB00, 0x9370FB00, 0x9371FB00, 0x9372FB00, 0x9373FB00, 0x9374FB00, 0x9375FB00, 0x9376FB00, 0x9377FB00, 0x9378FB00, 0x9379FB00, 0x937AFB00, + 0x937BFB00, 0x937CFB00, 0x937DFB00, 0x937EFB00, 0x937FFB00, 0x9380FB00, 0x9381FB00, 0x9382FB00, 0x9383FB00, 0x9384FB00, 0x9385FB00, 0x9386FB00, 0x9387FB00, 0x9388FB00, 0x9389FB00, + 0x938AFB00, 0x938BFB00, 0x938CFB00, 0x938DFB00, 0x938EFB00, 0x938FFB00, 0x9390FB00, 0x9391FB00, 0x9392FB00, 0x9393FB00, 0x9394FB00, 0x9395FB00, 0x9396FB00, 0x9397FB00, 0x9398FB00, + 0x9399FB00, 0x939AFB00, 0x939BFB00, 0x939CFB00, 0x939DFB00, 0x939EFB00, 0x939FFB00, 0x93A0FB00, 0x93A1FB00, 0x93A2FB00, 0x93A3FB00, 0x93A4FB00, 0x93A5FB00, 0x93A6FB00, 0x93A7FB00, + 0x93A8FB00, 0x93A9FB00, 0x93AAFB00, 0x93ABFB00, 0x93ACFB00, 0x93ADFB00, 0x93AEFB00, 0x93AFFB00, 0x93B0FB00, 0x93B1FB00, 0x93B2FB00, 0x93B3FB00, 0x93B4FB00, 0x93B5FB00, 0x93B6FB00, + 0x93B7FB00, 0x93B8FB00, 0x93B9FB00, 0x93BAFB00, 0x93BBFB00, 0x93BCFB00, 0x93BDFB00, 0x93BEFB00, 0x93BFFB00, 0x93C0FB00, 0x93C1FB00, 0x93C2FB00, 0x93C3FB00, 0x93C4FB00, 0x93C5FB00, + 0x93C6FB00, 0x93C7FB00, 0x93C8FB00, 0x93C9FB00, 0x93CAFB00, 0x93CBFB00, 0x93CCFB00, 0x93CDFB00, 0x93CEFB00, 0x93CFFB00, 0x93D0FB00, 0x93D1FB00, 0x93D2FB00, 0x93D3FB00, 0x93D4FB00, + 0x93D5FB00, 0x93D6FB00, 0x93D7FB00, 0x93D8FB00, 0x93D9FB00, 0x93DAFB00, 0x93DBFB00, 0x93DCFB00, 0x93DDFB00, 0x93DEFB00, 0x93DFFB00, 0x93E0FB00, 0x93E1FB00, 0x93E2FB00, 0x93E3FB00, + 0x93E4FB00, 0x93E5FB00, 0x93E6FB00, 0x93E7FB00, 0x93E8FB00, 0x93E9FB00, 0x93EAFB00, 0x93EBFB00, 0x93ECFB00, 0x93EDFB00, 0x93EEFB00, 0x93EFFB00, 0x93F0FB00, 0x93F1FB00, 0x93F2FB00, + 0x93F3FB00, 0x93F4FB00, 0x93F5FB00, 0x93F6FB00, 0x93F7FB00, 0x93F8FB00, 0x93F9FB00, 0x93FAFB00, 0x93FBFB00, 0x93FCFB00, 0x93FDFB00, 0x93FEFB00, 0x93FFFB00, 0x9400FB00, 0x9401FB00, + 0x9402FB00, 0x9403FB00, 0x9404FB00, 0x9405FB00, 0x9406FB00, 0x9407FB00, 0x9408FB00, 0x9409FB00, 0x940AFB00, 0x940BFB00, 0x940CFB00, 0x940DFB00, 0x940EFB00, 0x940FFB00, 0x9410FB00, + 0x9411FB00, 0x9412FB00, 0x9413FB00, 0x9414FB00, 0x9415FB00, 0x9416FB00, 0x9417FB00, 0x9418FB00, 0x9419FB00, 0x941AFB00, 0x941BFB00, 0x941CFB00, 0x941DFB00, 0x941EFB00, 0x941FFB00, + 0x9420FB00, 0x9421FB00, 0x9422FB00, 0x9423FB00, 0x9424FB00, 0x9425FB00, 0x9426FB00, 0x9427FB00, 0x9428FB00, 0x9429FB00, 0x942AFB00, 0x942BFB00, 0x942CFB00, 0x942DFB00, 0x942EFB00, + 0x942FFB00, 0x9430FB00, 0x9431FB00, 0x9432FB00, 0x9433FB00, 0x9434FB00, 0x9435FB00, 0x9436FB00, 0x9437FB00, 0x9438FB00, 0x9439FB00, 0x943AFB00, 0x943BFB00, 0x943CFB00, 0x943DFB00, + 0x943EFB00, 0x943FFB00, 0x9440FB00, 0x9441FB00, 0x9442FB00, 0x9443FB00, 0x9444FB00, 0x9445FB00, 0x9446FB00, 0x9447FB00, 0x9448FB00, 0x9449FB00, 0x944AFB00, 0x944BFB00, 0x944CFB00, + 0x944DFB00, 0x944EFB00, 0x944FFB00, 0x9450FB00, 0x9451FB00, 0x9452FB00, 0x9453FB00, 0x9454FB00, 0x9455FB00, 0x9456FB00, 0x9457FB00, 0x9458FB00, 0x9459FB00, 0x945AFB00, 0x945BFB00, + 0x945CFB00, 0x945DFB00, 0x945EFB00, 0x945FFB00, 0x9460FB00, 0x9461FB00, 0x9462FB00, 0x9463FB00, 0x9464FB00, 0x9465FB00, 0x9466FB00, 0x9467FB00, 0x9468FB00, 0x9469FB00, 0x946AFB00, + 0x946BFB00, 0x946CFB00, 0x946DFB00, 0x946EFB00, 0x946FFB00, 0x9470FB00, 0x9471FB00, 0x9472FB00, 0x9473FB00, 0x9474FB00, 0x9475FB00, 0x9476FB00, 0x9477FB00, 0x9478FB00, 0x9479FB00, + 0x947AFB00, 0x947BFB00, 0x947CFB00, 0x947DFB00, 0x947EFB00, 0x947FFB00, 0x9480FB00, 0x9481FB00, 0x9482FB00, 0x9483FB00, 0x9484FB00, 0x9485FB00, 0x9486FB00, 0x9487FB00, 0x9488FB00, + 0x9489FB00, 0x948AFB00, 0x948BFB00, 0x948CFB00, 0x948DFB00, 0x948EFB00, 0x948FFB00, 0x9490FB00, 0x9491FB00, 0x9492FB00, 0x9493FB00, 0x9494FB00, 0x9495FB00, 0x9496FB00, 0x9497FB00, + 0x9498FB00, 0x9499FB00, 0x949AFB00, 0x949BFB00, 0x949CFB00, 0x949DFB00, 0x949EFB00, 0x949FFB00, 0x94A0FB00, 0x94A1FB00, 0x94A2FB00, 0x94A3FB00, 0x94A4FB00, 0x94A5FB00, 0x94A6FB00, + 0x94A7FB00, 0x94A8FB00, 0x94A9FB00, 0x94AAFB00, 0x94ABFB00, 0x94ACFB00, 0x94ADFB00, 0x94AEFB00, 0x94AFFB00, 0x94B0FB00, 0x94B1FB00, 0x94B2FB00, 0x94B3FB00, 0x94B4FB00, 0x94B5FB00, + 0x94B6FB00, 0x94B7FB00, 0x94B8FB00, 0x94B9FB00, 0x94BAFB00, 0x94BBFB00, 0x94BCFB00, 0x94BDFB00, 0x94BEFB00, 0x94BFFB00, 0x94C0FB00, 0x94C1FB00, 0x94C2FB00, 0x94C3FB00, 0x94C4FB00, + 0x94C5FB00, 0x94C6FB00, 0x94C7FB00, 0x94C8FB00, 0x94C9FB00, 0x94CAFB00, 0x94CBFB00, 0x94CCFB00, 0x94CDFB00, 0x94CEFB00, 0x94CFFB00, 0x94D0FB00, 0x94D1FB00, 0x94D2FB00, 0x94D3FB00, + 0x94D4FB00, 0x94D5FB00, 0x94D6FB00, 0x94D7FB00, 0x94D8FB00, 0x94D9FB00, 0x94DAFB00, 0x94DBFB00, 0x94DCFB00, 0x94DDFB00, 0x94DEFB00, 0x94DFFB00, 0x94E0FB00, 0x94E1FB00, 0x94E2FB00, + 0x94E3FB00, 0x94E4FB00, 0x94E5FB00, 0x94E6FB00, 0x94E7FB00, 0x94E8FB00, 0x94E9FB00, 0x94EAFB00, 0x94EBFB00, 0x94ECFB00, 0x94EDFB00, 0x94EEFB00, 0x94EFFB00, 0x94F0FB00, 0x94F1FB00, + 0x94F2FB00, 0x94F3FB00, 0x94F4FB00, 0x94F5FB00, 0x94F6FB00, 0x94F7FB00, 0x94F8FB00, 0x94F9FB00, 0x94FAFB00, 0x94FBFB00, 0x94FCFB00, 0x94FDFB00, 0x94FEFB00, 0x94FFFB00, 0x9500FB00, + 0x9501FB00, 0x9502FB00, 0x9503FB00, 0x9504FB00, 0x9505FB00, 0x9506FB00, 0x9507FB00, 0x9508FB00, 0x9509FB00, 0x950AFB00, 0x950BFB00, 0x950CFB00, 0x950DFB00, 0x950EFB00, 0x950FFB00, + 0x9510FB00, 0x9511FB00, 0x9512FB00, 0x9513FB00, 0x9514FB00, 0x9515FB00, 0x9516FB00, 0x9517FB00, 0x9518FB00, 0x9519FB00, 0x951AFB00, 0x951BFB00, 0x951CFB00, 0x951DFB00, 0x951EFB00, + 0x951FFB00, 0x9520FB00, 0x9521FB00, 0x9522FB00, 0x9523FB00, 0x9524FB00, 0x9525FB00, 0x9526FB00, 0x9527FB00, 0x9528FB00, 0x9529FB00, 0x952AFB00, 0x952BFB00, 0x952CFB00, 0x952DFB00, + 0x952EFB00, 0x952FFB00, 0x9530FB00, 0x9531FB00, 0x9532FB00, 0x9533FB00, 0x9534FB00, 0x9535FB00, 0x9536FB00, 0x9537FB00, 0x9538FB00, 0x9539FB00, 0x953AFB00, 0x953BFB00, 0x953CFB00, + 0x953DFB00, 0x953EFB00, 0x953FFB00, 0x9540FB00, 0x9541FB00, 0x9542FB00, 0x9543FB00, 0x9544FB00, 0x9545FB00, 0x9546FB00, 0x9547FB00, 0x9548FB00, 0x9549FB00, 0x954AFB00, 0x954BFB00, + 0x954CFB00, 0x954DFB00, 0x954EFB00, 0x954FFB00, 0x9550FB00, 0x9551FB00, 0x9552FB00, 0x9553FB00, 0x9554FB00, 0x9555FB00, 0x9556FB00, 0x9557FB00, 0x9558FB00, 0x9559FB00, 0x955AFB00, + 0x955BFB00, 0x955CFB00, 0x955DFB00, 0x955EFB00, 0x955FFB00, 0x9560FB00, 0x9561FB00, 0x9562FB00, 0x9563FB00, 0x9564FB00, 0x9565FB00, 0x9566FB00, 0x9567FB00, 0x9568FB00, 0x9569FB00, + 0x956AFB00, 0x956BFB00, 0x956CFB00, 0x956DFB00, 0x956EFB00, 0x956FFB00, 0x9570FB00, 0x9571FB00, 0x9572FB00, 0x9573FB00, 0x9574FB00, 0x9575FB00, 0x9576FB00, 0x9577FB00, 0x9578FB00, + 0x9579FB00, 0x957AFB00, 0x957BFB00, 0x957CFB00, 0x957DFB00, 0x957EFB00, 0x957FFB00, 0x9580FB00, 0x9581FB00, 0x9582FB00, 0x9583FB00, 0x9584FB00, 0x9585FB00, 0x9586FB00, 0x9587FB00, + 0x9588FB00, 0x9589FB00, 0x958AFB00, 0x958BFB00, 0x958CFB00, 0x958DFB00, 0x958EFB00, 0x958FFB00, 0x9590FB00, 0x9591FB00, 0x9592FB00, 0x9593FB00, 0x9594FB00, 0x9595FB00, 0x9596FB00, + 0x9597FB00, 0x9598FB00, 0x9599FB00, 0x959AFB00, 0x959BFB00, 0x959CFB00, 0x959DFB00, 0x959EFB00, 0x959FFB00, 0x95A0FB00, 0x95A1FB00, 0x95A2FB00, 0x95A3FB00, 0x95A4FB00, 0x95A5FB00, + 0x95A6FB00, 0x95A7FB00, 0x95A8FB00, 0x95A9FB00, 0x95AAFB00, 0x95ABFB00, 0x95ACFB00, 0x95ADFB00, 0x95AEFB00, 0x95AFFB00, 0x95B0FB00, 0x95B1FB00, 0x95B2FB00, 0x95B3FB00, 0x95B4FB00, + 0x95B5FB00, 0x95B6FB00, 0x95B7FB00, 0x95B8FB00, 0x95B9FB00, 0x95BAFB00, 0x95BBFB00, 0x95BCFB00, 0x95BDFB00, 0x95BEFB00, 0x95BFFB00, 0x95C0FB00, 0x95C1FB00, 0x95C2FB00, 0x95C3FB00, + 0x95C4FB00, 0x95C5FB00, 0x95C6FB00, 0x95C7FB00, 0x95C8FB00, 0x95C9FB00, 0x95CAFB00, 0x95CBFB00, 0x95CCFB00, 0x95CDFB00, 0x95CEFB00, 0x95CFFB00, 0x95D0FB00, 0x95D1FB00, 0x95D2FB00, + 0x95D3FB00, 0x95D4FB00, 0x95D5FB00, 0x95D6FB00, 0x95D7FB00, 0x95D8FB00, 0x95D9FB00, 0x95DAFB00, 0x95DBFB00, 0x95DCFB00, 0x95DDFB00, 0x95DEFB00, 0x95DFFB00, 0x95E0FB00, 0x95E1FB00, + 0x95E2FB00, 0x95E3FB00, 0x95E4FB00, 0x95E5FB00, 0x95E6FB00, 0x95E7FB00, 0x95E8FB00, 0x95E9FB00, 0x95EAFB00, 0x95EBFB00, 0x95ECFB00, 0x95EDFB00, 0x95EEFB00, 0x95EFFB00, 0x95F0FB00, + 0x95F1FB00, 0x95F2FB00, 0x95F3FB00, 0x95F4FB00, 0x95F5FB00, 0x95F6FB00, 0x95F7FB00, 0x95F8FB00, 0x95F9FB00, 0x95FAFB00, 0x95FBFB00, 0x95FCFB00, 0x95FDFB00, 0x95FEFB00, 0x95FFFB00, + 0x9600FB00, 0x9601FB00, 0x9602FB00, 0x9603FB00, 0x9604FB00, 0x9605FB00, 0x9606FB00, 0x9607FB00, 0x9608FB00, 0x9609FB00, 0x960AFB00, 0x960BFB00, 0x960CFB00, 0x960DFB00, 0x960EFB00, + 0x960FFB00, 0x9610FB00, 0x9611FB00, 0x9612FB00, 0x9613FB00, 0x9614FB00, 0x9615FB00, 0x9616FB00, 0x9617FB00, 0x9618FB00, 0x9619FB00, 0x961AFB00, 0x961BFB00, 0x961CFB00, 0x961DFB00, + 0x961EFB00, 0x961FFB00, 0x9620FB00, 0x9621FB00, 0x9622FB00, 0x9623FB00, 0x9624FB00, 0x9625FB00, 0x9626FB00, 0x9627FB00, 0x9628FB00, 0x9629FB00, 0x962AFB00, 0x962BFB00, 0x962CFB00, + 0x962DFB00, 0x962EFB00, 0x962FFB00, 0x9630FB00, 0x9631FB00, 0x9632FB00, 0x9633FB00, 0x9634FB00, 0x9635FB00, 0x9636FB00, 0x9637FB00, 0x9638FB00, 0x9639FB00, 0x963AFB00, 0x963BFB00, + 0x963CFB00, 0x963DFB00, 0x963EFB00, 0x963FFB00, 0x9640FB00, 0x9641FB00, 0x9642FB00, 0x9643FB00, 0x9644FB00, 0x9645FB00, 0x9646FB00, 0x9647FB00, 0x9648FB00, 0x9649FB00, 0x964AFB00, + 0x964BFB00, 0x964CFB00, 0x964DFB00, 0x964EFB00, 0x964FFB00, 0x9650FB00, 0x9651FB00, 0x9652FB00, 0x9653FB00, 0x9654FB00, 0x9655FB00, 0x9656FB00, 0x9657FB00, 0x9658FB00, 0x9659FB00, + 0x965AFB00, 0x965BFB00, 0x965CFB00, 0x965DFB00, 0x965EFB00, 0x965FFB00, 0x9660FB00, 0x9661FB00, 0x9662FB00, 0x9663FB00, 0x9664FB00, 0x9665FB00, 0x9666FB00, 0x9667FB00, 0x9668FB00, + 0x9669FB00, 0x966AFB00, 0x966BFB00, 0x966CFB00, 0x966DFB00, 0x966EFB00, 0x966FFB00, 0x9670FB00, 0x9671FB00, 0x9672FB00, 0x9673FB00, 0x9674FB00, 0x9675FB00, 0x9676FB00, 0x9677FB00, + 0x9678FB00, 0x9679FB00, 0x967AFB00, 0x967BFB00, 0x967CFB00, 0x967DFB00, 0x967EFB00, 0x967FFB00, 0x9680FB00, 0x9681FB00, 0x9682FB00, 0x9683FB00, 0x9684FB00, 0x9685FB00, 0x9686FB00, + 0x9687FB00, 0x9688FB00, 0x9689FB00, 0x968AFB00, 0x968BFB00, 0x968CFB00, 0x968DFB00, 0x968EFB00, 0x968FFB00, 0x9690FB00, 0x9691FB00, 0x9692FB00, 0x9693FB00, 0x9694FB00, 0x9695FB00, + 0x9696FB00, 0x9697FB00, 0x9698FB00, 0x9699FB00, 0x969AFB00, 0x969BFB00, 0x969CFB00, 0x969DFB00, 0x969EFB00, 0x969FFB00, 0x96A0FB00, 0x96A1FB00, 0x96A2FB00, 0x96A3FB00, 0x96A4FB00, + 0x96A5FB00, 0x96A6FB00, 0x96A7FB00, 0x96A8FB00, 0x96A9FB00, 0x96AAFB00, 0x96ABFB00, 0x96ACFB00, 0x96ADFB00, 0x96AEFB00, 0x96AFFB00, 0x96B0FB00, 0x96B1FB00, 0x96B2FB00, 0x96B3FB00, + 0x96B4FB00, 0x96B5FB00, 0x96B6FB00, 0x96B7FB00, 0x96B8FB00, 0x96B9FB00, 0x96BAFB00, 0x96BBFB00, 0x96BCFB00, 0x96BDFB00, 0x96BEFB00, 0x96BFFB00, 0x96C0FB00, 0x96C1FB00, 0x96C2FB00, + 0x96C3FB00, 0x96C4FB00, 0x96C5FB00, 0x96C6FB00, 0x96C7FB00, 0x96C8FB00, 0x96C9FB00, 0x96CAFB00, 0x96CBFB00, 0x96CCFB00, 0x96CDFB00, 0x96CEFB00, 0x96CFFB00, 0x96D0FB00, 0x96D1FB00, + 0x96D2FB00, 0x96D3FB00, 0x96D4FB00, 0x96D5FB00, 0x96D6FB00, 0x96D7FB00, 0x96D8FB00, 0x96D9FB00, 0x96DAFB00, 0x96DBFB00, 0x96DCFB00, 0x96DDFB00, 0x96DEFB00, 0x96DFFB00, 0x96E0FB00, + 0x96E1FB00, 0x96E2FB00, 0x96E3FB00, 0x96E4FB00, 0x96E5FB00, 0x96E6FB00, 0x96E7FB00, 0x96E8FB00, 0x96E9FB00, 0x96EAFB00, 0x96EBFB00, 0x96ECFB00, 0x96EDFB00, 0x96EEFB00, 0x96EFFB00, + 0x96F0FB00, 0x96F1FB00, 0x96F2FB00, 0x96F3FB00, 0x96F4FB00, 0x96F5FB00, 0x96F6FB00, 0x96F7FB00, 0x96F8FB00, 0x96F9FB00, 0x96FAFB00, 0x96FBFB00, 0x96FCFB00, 0x96FDFB00, 0x96FEFB00, + 0x96FFFB00, 0x9700FB00, 0x9701FB00, 0x9702FB00, 0x9703FB00, 0x9704FB00, 0x9705FB00, 0x9706FB00, 0x9707FB00, 0x9708FB00, 0x9709FB00, 0x970AFB00, 0x970BFB00, 0x970CFB00, 0x970DFB00, + 0x970EFB00, 0x970FFB00, 0x9710FB00, 0x9711FB00, 0x9712FB00, 0x9713FB00, 0x9714FB00, 0x9715FB00, 0x9716FB00, 0x9717FB00, 0x9718FB00, 0x9719FB00, 0x971AFB00, 0x971BFB00, 0x971CFB00, + 0x971DFB00, 0x971EFB00, 0x971FFB00, 0x9720FB00, 0x9721FB00, 0x9722FB00, 0x9723FB00, 0x9724FB00, 0x9725FB00, 0x9726FB00, 0x9727FB00, 0x9728FB00, 0x9729FB00, 0x972AFB00, 0x972BFB00, + 0x972CFB00, 0x972DFB00, 0x972EFB00, 0x972FFB00, 0x9730FB00, 0x9731FB00, 0x9732FB00, 0x9733FB00, 0x9734FB00, 0x9735FB00, 0x9736FB00, 0x9737FB00, 0x9738FB00, 0x9739FB00, 0x973AFB00, + 0x973BFB00, 0x973CFB00, 0x973DFB00, 0x973EFB00, 0x973FFB00, 0x9740FB00, 0x9741FB00, 0x9742FB00, 0x9743FB00, 0x9744FB00, 0x9745FB00, 0x9746FB00, 0x9747FB00, 0x9748FB00, 0x9749FB00, + 0x974AFB00, 0x974BFB00, 0x974CFB00, 0x974DFB00, 0x974EFB00, 0x974FFB00, 0x9750FB00, 0x9751FB00, 0x9752FB00, 0x9753FB00, 0x9754FB00, 0x9755FB00, 0x9756FB00, 0x9757FB00, 0x9758FB00, + 0x9759FB00, 0x975AFB00, 0x975BFB00, 0x975CFB00, 0x975DFB00, 0x975EFB00, 0x975FFB00, 0x9760FB00, 0x9761FB00, 0x9762FB00, 0x9763FB00, 0x9764FB00, 0x9765FB00, 0x9766FB00, 0x9767FB00, + 0x9768FB00, 0x9769FB00, 0x976AFB00, 0x976BFB00, 0x976CFB00, 0x976DFB00, 0x976EFB00, 0x976FFB00, 0x9770FB00, 0x9771FB00, 0x9772FB00, 0x9773FB00, 0x9774FB00, 0x9775FB00, 0x9776FB00, + 0x9777FB00, 0x9778FB00, 0x9779FB00, 0x977AFB00, 0x977BFB00, 0x977CFB00, 0x977DFB00, 0x977EFB00, 0x977FFB00, 0x9780FB00, 0x9781FB00, 0x9782FB00, 0x9783FB00, 0x9784FB00, 0x9785FB00, + 0x9786FB00, 0x9787FB00, 0x9788FB00, 0x9789FB00, 0x978AFB00, 0x978BFB00, 0x978CFB00, 0x978DFB00, 0x978EFB00, 0x978FFB00, 0x9790FB00, 0x9791FB00, 0x9792FB00, 0x9793FB00, 0x9794FB00, + 0x9795FB00, 0x9796FB00, 0x9797FB00, 0x9798FB00, 0x9799FB00, 0x979AFB00, 0x979BFB00, 0x979CFB00, 0x979DFB00, 0x979EFB00, 0x979FFB00, 0x97A0FB00, 0x97A1FB00, 0x97A2FB00, 0x97A3FB00, + 0x97A4FB00, 0x97A5FB00, 0x97A6FB00, 0x97A7FB00, 0x97A8FB00, 0x97A9FB00, 0x97AAFB00, 0x97ABFB00, 0x97ACFB00, 0x97ADFB00, 0x97AEFB00, 0x97AFFB00, 0x97B0FB00, 0x97B1FB00, 0x97B2FB00, + 0x97B3FB00, 0x97B4FB00, 0x97B5FB00, 0x97B6FB00, 0x97B7FB00, 0x97B8FB00, 0x97B9FB00, 0x97BAFB00, 0x97BBFB00, 0x97BCFB00, 0x97BDFB00, 0x97BEFB00, 0x97BFFB00, 0x97C0FB00, 0x97C1FB00, + 0x97C2FB00, 0x97C3FB00, 0x97C4FB00, 0x97C5FB00, 0x97C6FB00, 0x97C7FB00, 0x97C8FB00, 0x97C9FB00, 0x97CAFB00, 0x97CBFB00, 0x97CCFB00, 0x97CDFB00, 0x97CEFB00, 0x97CFFB00, 0x97D0FB00, + 0x97D1FB00, 0x97D2FB00, 0x97D3FB00, 0x97D4FB00, 0x97D5FB00, 0x97D6FB00, 0x97D7FB00, 0x97D8FB00, 0x97D9FB00, 0x97DAFB00, 0x97DBFB00, 0x97DCFB00, 0x97DDFB00, 0x97DEFB00, 0x97DFFB00, + 0x97E0FB00, 0x97E1FB00, 0x97E2FB00, 0x97E3FB00, 0x97E4FB00, 0x97E5FB00, 0x97E6FB00, 0x97E7FB00, 0x97E8FB00, 0x97E9FB00, 0x97EAFB00, 0x97EBFB00, 0x97ECFB00, 0x97EDFB00, 0x97EEFB00, + 0x97EFFB00, 0x97F0FB00, 0x97F1FB00, 0x97F2FB00, 0x97F3FB00, 0x97F4FB00, 0x97F5FB00, 0x97F6FB00, 0x97F7FB00, 0x97F8FB00, 0x97F9FB00, 0x97FAFB00, 0x97FBFB00, 0x97FCFB00, 0x97FDFB00, + 0x97FEFB00, 0x97FFFB00, 0x9800FB00, 0x9801FB00, 0x9802FB00, 0x9803FB00, 0x9804FB00, 0x9805FB00, 0x9806FB00, 0x9807FB00, 0x9808FB00, 0x9809FB00, 0x980AFB00, 0x980BFB00, 0x980CFB00, + 0x980DFB00, 0x980EFB00, 0x980FFB00, 0x9810FB00, 0x9811FB00, 0x9812FB00, 0x9813FB00, 0x9814FB00, 0x9815FB00, 0x9816FB00, 0x9817FB00, 0x9818FB00, 0x9819FB00, 0x981AFB00, 0x981BFB00, + 0x981CFB00, 0x981DFB00, 0x981EFB00, 0x981FFB00, 0x9820FB00, 0x9821FB00, 0x9822FB00, 0x9823FB00, 0x9824FB00, 0x9825FB00, 0x9826FB00, 0x9827FB00, 0x9828FB00, 0x9829FB00, 0x982AFB00, + 0x982BFB00, 0x982CFB00, 0x982DFB00, 0x982EFB00, 0x982FFB00, 0x9830FB00, 0x9831FB00, 0x9832FB00, 0x9833FB00, 0x9834FB00, 0x9835FB00, 0x9836FB00, 0x9837FB00, 0x9838FB00, 0x9839FB00, + 0x983AFB00, 0x983BFB00, 0x983CFB00, 0x983DFB00, 0x983EFB00, 0x983FFB00, 0x9840FB00, 0x9841FB00, 0x9842FB00, 0x9843FB00, 0x9844FB00, 0x9845FB00, 0x9846FB00, 0x9847FB00, 0x9848FB00, + 0x9849FB00, 0x984AFB00, 0x984BFB00, 0x984CFB00, 0x984DFB00, 0x984EFB00, 0x984FFB00, 0x9850FB00, 0x9851FB00, 0x9852FB00, 0x9853FB00, 0x9854FB00, 0x9855FB00, 0x9856FB00, 0x9857FB00, + 0x9858FB00, 0x9859FB00, 0x985AFB00, 0x985BFB00, 0x985CFB00, 0x985DFB00, 0x985EFB00, 0x985FFB00, 0x9860FB00, 0x9861FB00, 0x9862FB00, 0x9863FB00, 0x9864FB00, 0x9865FB00, 0x9866FB00, + 0x9867FB00, 0x9868FB00, 0x9869FB00, 0x986AFB00, 0x986BFB00, 0x986CFB00, 0x986DFB00, 0x986EFB00, 0x986FFB00, 0x9870FB00, 0x9871FB00, 0x9872FB00, 0x9873FB00, 0x9874FB00, 0x9875FB00, + 0x9876FB00, 0x9877FB00, 0x9878FB00, 0x9879FB00, 0x987AFB00, 0x987BFB00, 0x987CFB00, 0x987DFB00, 0x987EFB00, 0x987FFB00, 0x9880FB00, 0x9881FB00, 0x9882FB00, 0x9883FB00, 0x9884FB00, + 0x9885FB00, 0x9886FB00, 0x9887FB00, 0x9888FB00, 0x9889FB00, 0x988AFB00, 0x988BFB00, 0x988CFB00, 0x988DFB00, 0x988EFB00, 0x988FFB00, 0x9890FB00, 0x9891FB00, 0x9892FB00, 0x9893FB00, + 0x9894FB00, 0x9895FB00, 0x9896FB00, 0x9897FB00, 0x9898FB00, 0x9899FB00, 0x989AFB00, 0x989BFB00, 0x989CFB00, 0x989DFB00, 0x989EFB00, 0x989FFB00, 0x98A0FB00, 0x98A1FB00, 0x98A2FB00, + 0x98A3FB00, 0x98A4FB00, 0x98A5FB00, 0x98A6FB00, 0x98A7FB00, 0x98A8FB00, 0x98A9FB00, 0x98AAFB00, 0x98ABFB00, 0x98ACFB00, 0x98ADFB00, 0x98AEFB00, 0x98AFFB00, 0x98B0FB00, 0x98B1FB00, + 0x98B2FB00, 0x98B3FB00, 0x98B4FB00, 0x98B5FB00, 0x98B6FB00, 0x98B7FB00, 0x98B8FB00, 0x98B9FB00, 0x98BAFB00, 0x98BBFB00, 0x98BCFB00, 0x98BDFB00, 0x98BEFB00, 0x98BFFB00, 0x98C0FB00, + 0x98C1FB00, 0x98C2FB00, 0x98C3FB00, 0x98C4FB00, 0x98C5FB00, 0x98C6FB00, 0x98C7FB00, 0x98C8FB00, 0x98C9FB00, 0x98CAFB00, 0x98CBFB00, 0x98CCFB00, 0x98CDFB00, 0x98CEFB00, 0x98CFFB00, + 0x98D0FB00, 0x98D1FB00, 0x98D2FB00, 0x98D3FB00, 0x98D4FB00, 0x98D5FB00, 0x98D6FB00, 0x98D7FB00, 0x98D8FB00, 0x98D9FB00, 0x98DAFB00, 0x98DBFB00, 0x98DCFB00, 0x98DDFB00, 0x98DEFB00, + 0x98DFFB00, 0x98E0FB00, 0x98E1FB00, 0x98E2FB00, 0x98E3FB00, 0x98E4FB00, 0x98E5FB00, 0x98E6FB00, 0x98E7FB00, 0x98E8FB00, 0x98E9FB00, 0x98EAFB00, 0x98EBFB00, 0x98ECFB00, 0x98EDFB00, + 0x98EEFB00, 0x98EFFB00, 0x98F0FB00, 0x98F1FB00, 0x98F2FB00, 0x98F3FB00, 0x98F4FB00, 0x98F5FB00, 0x98F6FB00, 0x98F7FB00, 0x98F8FB00, 0x98F9FB00, 0x98FAFB00, 0x98FBFB00, 0x98FCFB00, + 0x98FDFB00, 0x98FEFB00, 0x98FFFB00, 0x9900FB00, 0x9901FB00, 0x9902FB00, 0x9903FB00, 0x9904FB00, 0x9905FB00, 0x9906FB00, 0x9907FB00, 0x9908FB00, 0x9909FB00, 0x990AFB00, 0x990BFB00, + 0x990CFB00, 0x990DFB00, 0x990EFB00, 0x990FFB00, 0x9910FB00, 0x9911FB00, 0x9912FB00, 0x9913FB00, 0x9914FB00, 0x9915FB00, 0x9916FB00, 0x9917FB00, 0x9918FB00, 0x9919FB00, 0x991AFB00, + 0x991BFB00, 0x991CFB00, 0x991DFB00, 0x991EFB00, 0x991FFB00, 0x9920FB00, 0x9921FB00, 0x9922FB00, 0x9923FB00, 0x9924FB00, 0x9925FB00, 0x9926FB00, 0x9927FB00, 0x9928FB00, 0x9929FB00, + 0x992AFB00, 0x992BFB00, 0x992CFB00, 0x992DFB00, 0x992EFB00, 0x992FFB00, 0x9930FB00, 0x9931FB00, 0x9932FB00, 0x9933FB00, 0x9934FB00, 0x9935FB00, 0x9936FB00, 0x9937FB00, 0x9938FB00, + 0x9939FB00, 0x993AFB00, 0x993BFB00, 0x993CFB00, 0x993DFB00, 0x993EFB00, 0x993FFB00, 0x9940FB00, 0x9941FB00, 0x9942FB00, 0x9943FB00, 0x9944FB00, 0x9945FB00, 0x9946FB00, 0x9947FB00, + 0x9948FB00, 0x9949FB00, 0x994AFB00, 0x994BFB00, 0x994CFB00, 0x994DFB00, 0x994EFB00, 0x994FFB00, 0x9950FB00, 0x9951FB00, 0x9952FB00, 0x9953FB00, 0x9954FB00, 0x9955FB00, 0x9956FB00, + 0x9957FB00, 0x9958FB00, 0x9959FB00, 0x995AFB00, 0x995BFB00, 0x995CFB00, 0x995DFB00, 0x995EFB00, 0x995FFB00, 0x9960FB00, 0x9961FB00, 0x9962FB00, 0x9963FB00, 0x9964FB00, 0x9965FB00, + 0x9966FB00, 0x9967FB00, 0x9968FB00, 0x9969FB00, 0x996AFB00, 0x996BFB00, 0x996CFB00, 0x996DFB00, 0x996EFB00, 0x996FFB00, 0x9970FB00, 0x9971FB00, 0x9972FB00, 0x9973FB00, 0x9974FB00, + 0x9975FB00, 0x9976FB00, 0x9977FB00, 0x9978FB00, 0x9979FB00, 0x997AFB00, 0x997BFB00, 0x997CFB00, 0x997DFB00, 0x997EFB00, 0x997FFB00, 0x9980FB00, 0x9981FB00, 0x9982FB00, 0x9983FB00, + 0x9984FB00, 0x9985FB00, 0x9986FB00, 0x9987FB00, 0x9988FB00, 0x9989FB00, 0x998AFB00, 0x998BFB00, 0x998CFB00, 0x998DFB00, 0x998EFB00, 0x998FFB00, 0x9990FB00, 0x9991FB00, 0x9992FB00, + 0x9993FB00, 0x9994FB00, 0x9995FB00, 0x9996FB00, 0x9997FB00, 0x9998FB00, 0x9999FB00, 0x999AFB00, 0x999BFB00, 0x999CFB00, 0x999DFB00, 0x999EFB00, 0x999FFB00, 0x99A0FB00, 0x99A1FB00, + 0x99A2FB00, 0x99A3FB00, 0x99A4FB00, 0x99A5FB00, 0x99A6FB00, 0x99A7FB00, 0x99A8FB00, 0x99A9FB00, 0x99AAFB00, 0x99ABFB00, 0x99ACFB00, 0x99ADFB00, 0x99AEFB00, 0x99AFFB00, 0x99B0FB00, + 0x99B1FB00, 0x99B2FB00, 0x99B3FB00, 0x99B4FB00, 0x99B5FB00, 0x99B6FB00, 0x99B7FB00, 0x99B8FB00, 0x99B9FB00, 0x99BAFB00, 0x99BBFB00, 0x99BCFB00, 0x99BDFB00, 0x99BEFB00, 0x99BFFB00, + 0x99C0FB00, 0x99C1FB00, 0x99C2FB00, 0x99C3FB00, 0x99C4FB00, 0x99C5FB00, 0x99C6FB00, 0x99C7FB00, 0x99C8FB00, 0x99C9FB00, 0x99CAFB00, 0x99CBFB00, 0x99CCFB00, 0x99CDFB00, 0x99CEFB00, + 0x99CFFB00, 0x99D0FB00, 0x99D1FB00, 0x99D2FB00, 0x99D3FB00, 0x99D4FB00, 0x99D5FB00, 0x99D6FB00, 0x99D7FB00, 0x99D8FB00, 0x99D9FB00, 0x99DAFB00, 0x99DBFB00, 0x99DCFB00, 0x99DDFB00, + 0x99DEFB00, 0x99DFFB00, 0x99E0FB00, 0x99E1FB00, 0x99E2FB00, 0x99E3FB00, 0x99E4FB00, 0x99E5FB00, 0x99E6FB00, 0x99E7FB00, 0x99E8FB00, 0x99E9FB00, 0x99EAFB00, 0x99EBFB00, 0x99ECFB00, + 0x99EDFB00, 0x99EEFB00, 0x99EFFB00, 0x99F0FB00, 0x99F1FB00, 0x99F2FB00, 0x99F3FB00, 0x99F4FB00, 0x99F5FB00, 0x99F6FB00, 0x99F7FB00, 0x99F8FB00, 0x99F9FB00, 0x99FAFB00, 0x99FBFB00, + 0x99FCFB00, 0x99FDFB00, 0x99FEFB00, 0x99FFFB00, 0x9A00FB00, 0x9A01FB00, 0x9A02FB00, 0x9A03FB00, 0x9A04FB00, 0x9A05FB00, 0x9A06FB00, 0x9A07FB00, 0x9A08FB00, 0x9A09FB00, 0x9A0AFB00, + 0x9A0BFB00, 0x9A0CFB00, 0x9A0DFB00, 0x9A0EFB00, 0x9A0FFB00, 0x9A10FB00, 0x9A11FB00, 0x9A12FB00, 0x9A13FB00, 0x9A14FB00, 0x9A15FB00, 0x9A16FB00, 0x9A17FB00, 0x9A18FB00, 0x9A19FB00, + 0x9A1AFB00, 0x9A1BFB00, 0x9A1CFB00, 0x9A1DFB00, 0x9A1EFB00, 0x9A1FFB00, 0x9A20FB00, 0x9A21FB00, 0x9A22FB00, 0x9A23FB00, 0x9A24FB00, 0x9A25FB00, 0x9A26FB00, 0x9A27FB00, 0x9A28FB00, + 0x9A29FB00, 0x9A2AFB00, 0x9A2BFB00, 0x9A2CFB00, 0x9A2DFB00, 0x9A2EFB00, 0x9A2FFB00, 0x9A30FB00, 0x9A31FB00, 0x9A32FB00, 0x9A33FB00, 0x9A34FB00, 0x9A35FB00, 0x9A36FB00, 0x9A37FB00, + 0x9A38FB00, 0x9A39FB00, 0x9A3AFB00, 0x9A3BFB00, 0x9A3CFB00, 0x9A3DFB00, 0x9A3EFB00, 0x9A3FFB00, 0x9A40FB00, 0x9A41FB00, 0x9A42FB00, 0x9A43FB00, 0x9A44FB00, 0x9A45FB00, 0x9A46FB00, + 0x9A47FB00, 0x9A48FB00, 0x9A49FB00, 0x9A4AFB00, 0x9A4BFB00, 0x9A4CFB00, 0x9A4DFB00, 0x9A4EFB00, 0x9A4FFB00, 0x9A50FB00, 0x9A51FB00, 0x9A52FB00, 0x9A53FB00, 0x9A54FB00, 0x9A55FB00, + 0x9A56FB00, 0x9A57FB00, 0x9A58FB00, 0x9A59FB00, 0x9A5AFB00, 0x9A5BFB00, 0x9A5CFB00, 0x9A5DFB00, 0x9A5EFB00, 0x9A5FFB00, 0x9A60FB00, 0x9A61FB00, 0x9A62FB00, 0x9A63FB00, 0x9A64FB00, + 0x9A65FB00, 0x9A66FB00, 0x9A67FB00, 0x9A68FB00, 0x9A69FB00, 0x9A6AFB00, 0x9A6BFB00, 0x9A6CFB00, 0x9A6DFB00, 0x9A6EFB00, 0x9A6FFB00, 0x9A70FB00, 0x9A71FB00, 0x9A72FB00, 0x9A73FB00, + 0x9A74FB00, 0x9A75FB00, 0x9A76FB00, 0x9A77FB00, 0x9A78FB00, 0x9A79FB00, 0x9A7AFB00, 0x9A7BFB00, 0x9A7CFB00, 0x9A7DFB00, 0x9A7EFB00, 0x9A7FFB00, 0x9A80FB00, 0x9A81FB00, 0x9A82FB00, + 0x9A83FB00, 0x9A84FB00, 0x9A85FB00, 0x9A86FB00, 0x9A87FB00, 0x9A88FB00, 0x9A89FB00, 0x9A8AFB00, 0x9A8BFB00, 0x9A8CFB00, 0x9A8DFB00, 0x9A8EFB00, 0x9A8FFB00, 0x9A90FB00, 0x9A91FB00, + 0x9A92FB00, 0x9A93FB00, 0x9A94FB00, 0x9A95FB00, 0x9A96FB00, 0x9A97FB00, 0x9A98FB00, 0x9A99FB00, 0x9A9AFB00, 0x9A9BFB00, 0x9A9CFB00, 0x9A9DFB00, 0x9A9EFB00, 0x9A9FFB00, 0x9AA0FB00, + 0x9AA1FB00, 0x9AA2FB00, 0x9AA3FB00, 0x9AA4FB00, 0x9AA5FB00, 0x9AA6FB00, 0x9AA7FB00, 0x9AA8FB00, 0x9AA9FB00, 0x9AAAFB00, 0x9AABFB00, 0x9AACFB00, 0x9AADFB00, 0x9AAEFB00, 0x9AAFFB00, + 0x9AB0FB00, 0x9AB1FB00, 0x9AB2FB00, 0x9AB3FB00, 0x9AB4FB00, 0x9AB5FB00, 0x9AB6FB00, 0x9AB7FB00, 0x9AB8FB00, 0x9AB9FB00, 0x9ABAFB00, 0x9ABBFB00, 0x9ABCFB00, 0x9ABDFB00, 0x9ABEFB00, + 0x9ABFFB00, 0x9AC0FB00, 0x9AC1FB00, 0x9AC2FB00, 0x9AC3FB00, 0x9AC4FB00, 0x9AC5FB00, 0x9AC6FB00, 0x9AC7FB00, 0x9AC8FB00, 0x9AC9FB00, 0x9ACAFB00, 0x9ACBFB00, 0x9ACCFB00, 0x9ACDFB00, + 0x9ACEFB00, 0x9ACFFB00, 0x9AD0FB00, 0x9AD1FB00, 0x9AD2FB00, 0x9AD3FB00, 0x9AD4FB00, 0x9AD5FB00, 0x9AD6FB00, 0x9AD7FB00, 0x9AD8FB00, 0x9AD9FB00, 0x9ADAFB00, 0x9ADBFB00, 0x9ADCFB00, + 0x9ADDFB00, 0x9ADEFB00, 0x9ADFFB00, 0x9AE0FB00, 0x9AE1FB00, 0x9AE2FB00, 0x9AE3FB00, 0x9AE4FB00, 0x9AE5FB00, 0x9AE6FB00, 0x9AE7FB00, 0x9AE8FB00, 0x9AE9FB00, 0x9AEAFB00, 0x9AEBFB00, + 0x9AECFB00, 0x9AEDFB00, 0x9AEEFB00, 0x9AEFFB00, 0x9AF0FB00, 0x9AF1FB00, 0x9AF2FB00, 0x9AF3FB00, 0x9AF4FB00, 0x9AF5FB00, 0x9AF6FB00, 0x9AF7FB00, 0x9AF8FB00, 0x9AF9FB00, 0x9AFAFB00, + 0x9AFBFB00, 0x9AFCFB00, 0x9AFDFB00, 0x9AFEFB00, 0x9AFFFB00, 0x8B00FBC3, 0x8B01FBC3, 0x8B02FBC3, 0x8B03FBC3, 0x8B04FBC3, 0x8B05FBC3, 0x8B06FBC3, 0x8B07FBC3, 0x8B08FBC3, 0x8B09FBC3, + 0x8B0AFBC3, 0x8B0BFBC3, 0x8B0CFBC3, 0x8B0DFBC3, 0x8B0EFBC3, 0x8B0FFBC3, 0x8B10FBC3, 0x8B11FBC3, 0x8B12FBC3, 0x8B13FBC3, 0x8B14FBC3, 0x8B15FBC3, 0x8B16FBC3, 0x8B17FBC3, 0x8B18FBC3, + 0x8B19FBC3, 0x8B1AFBC3, 0x8B1BFBC3, 0x8B1CFBC3, 0x8B1DFBC3, 0x8B1EFBC3, 0x8B1FFBC3, 0x8B20FBC3, 0x8B21FBC3, 0x8B22FBC3, 0x8B23FBC3, 0x8B24FBC3, 0x8B25FBC3, 0x8B26FBC3, 0x8B27FBC3, + 0x8B28FBC3, 0x8B29FBC3, 0x8B2AFBC3, 0x8B2BFBC3, 0x8B2CFBC3, 0x8B2DFBC3, 0x8B2EFBC3, 0x8B2FFBC3, 0x8B30FBC3, 0x8B31FBC3, 0x8B32FBC3, 0x8B33FBC3, 0x8B34FBC3, 0x8B35FBC3, 0x8B36FBC3, + 0x8B37FBC3, 0x8B38FBC3, 0x8B39FBC3, 0x8B3AFBC3, 0x8B3BFBC3, 0x8B3CFBC3, 0x8B3DFBC3, 0x8B3EFBC3, 0x8B3FFBC3, 0x8B40FBC3, 0x8B41FBC3, 0x8B42FBC3, 0x8B43FBC3, 0x8B44FBC3, 0x8B45FBC3, + 0x8B46FBC3, 0x8B47FBC3, 0x8B48FBC3, 0x8B49FBC3, 0x8B4AFBC3, 0x8B4BFBC3, 0x8B4CFBC3, 0x8B4DFBC3, 0x8B4EFBC3, 0x8B4FFBC3, 0x8B50FBC3, 0x8B51FBC3, 0x8B52FBC3, 0x8B53FBC3, 0x8B54FBC3, + 0x8B55FBC3, 0x8B56FBC3, 0x8B57FBC3, 0x8B58FBC3, 0x8B59FBC3, 0x8B5AFBC3, 0x8B5BFBC3, 0x8B5CFBC3, 0x8B5DFBC3, 0x8B5EFBC3, 0x8B5FFBC3, 0x8B60FBC3, 0x8B61FBC3, 0x8B62FBC3, 0x8B63FBC3, + 0x8B64FBC3, 0x8B65FBC3, 0x8B66FBC3, 0x8B67FBC3, 0x8B68FBC3, 0x8B69FBC3, 0x8B6AFBC3, 0x8B6BFBC3, 0x8B6CFBC3, 0x8B6DFBC3, 0x8B6EFBC3, 0x8B6FFBC3, 0x8B70FBC3, 0x8B71FBC3, 0x8B72FBC3, + 0x8B73FBC3, 0x8B74FBC3, 0x8B75FBC3, 0x8B76FBC3, 0x8B77FBC3, 0x8B78FBC3, 0x8B79FBC3, 0x8B7AFBC3, 0x8B7BFBC3, 0x8B7CFBC3, 0x8B7DFBC3, 0x8B7EFBC3, 0x8B7FFBC3, 0x8B80FBC3, 0x8B81FBC3, + 0x8B82FBC3, 0x8B83FBC3, 0x8B84FBC3, 0x8B85FBC3, 0x8B86FBC3, 0x8B87FBC3, 0x8B88FBC3, 0x8B89FBC3, 0x8B8AFBC3, 0x8B8BFBC3, 0x8B8CFBC3, 0x8B8DFBC3, 0x8B8EFBC3, 0x8B8FFBC3, 0x8B90FBC3, + 0x8B91FBC3, 0x8B92FBC3, 0x8B93FBC3, 0x8B94FBC3, 0x8B95FBC3, 0x8B96FBC3, 0x8B97FBC3, 0x8B98FBC3, 0x8B99FBC3, 0x8B9AFBC3, 0x8B9BFBC3, 0x8B9CFBC3, 0x8B9DFBC3, 0x8B9EFBC3, 0x8B9FFBC3, + 0x8BA0FBC3, 0x8BA1FBC3, 0x8BA2FBC3, 0x8BA3FBC3, 0x8BA4FBC3, 0x8BA5FBC3, 0x8BA6FBC3, 0x8BA7FBC3, 0x8BA8FBC3, 0x8BA9FBC3, 0x8BAAFBC3, 0x8BABFBC3, 0x8BACFBC3, 0x8BADFBC3, 0x8BAEFBC3, + 0x8BAFFBC3, 0x8BB0FBC3, 0x8BB1FBC3, 0x8BB2FBC3, 0x8BB3FBC3, 0x8BB4FBC3, 0x8BB5FBC3, 0x8BB6FBC3, 0x8BB7FBC3, 0x8BB8FBC3, 0x8BB9FBC3, 0x8BBAFBC3, 0x8BBBFBC3, 0x8BBCFBC3, 0x8BBDFBC3, + 0x8BBEFBC3, 0x8BBFFBC3, 0x8BC0FBC3, 0x8BC1FBC3, 0x8BC2FBC3, 0x8BC3FBC3, 0x8BC4FBC3, 0x8BC5FBC3, 0x8BC6FBC3, 0x8BC7FBC3, 0x8BC8FBC3, 0x8BC9FBC3, 0x8BCAFBC3, 0x8BCBFBC3, 0x8BCCFBC3, + 0x8BCDFBC3, 0x8BCEFBC3, 0x8BCFFBC3, 0x8BD0FBC3, 0x8BD1FBC3, 0x8BD2FBC3, 0x8BD3FBC3, 0x8BD4FBC3, 0x8BD5FBC3, 0x8BD6FBC3, 0x8BD7FBC3, 0x8BD8FBC3, 0x8BD9FBC3, 0x8BDAFBC3, 0x8BDBFBC3, + 0x8BDCFBC3, 0x8BDDFBC3, 0x8BDEFBC3, 0x8BDFFBC3, 0x8BE0FBC3, 0x8BE1FBC3, 0x8BE2FBC3, 0x8BE3FBC3, 0x8BE4FBC3, 0x8BE5FBC3, 0x8BE6FBC3, 0x8BE7FBC3, 0x8BE8FBC3, 0x8BE9FBC3, 0x8BEAFBC3, + 0x8BEBFBC3, 0x8BECFBC3, 0x8BEDFBC3, 0x8BEEFBC3, 0x8BEFFBC3, 0x8BF0FBC3, 0x8BF1FBC3, 0x8BF2FBC3, 0x8BF3FBC3, 0x8BF4FBC3, 0x8BF5FBC3, 0x8BF6FBC3, 0x8BF7FBC3, 0x8BF8FBC3, 0x8BF9FBC3, + 0x8BFAFBC3, 0x8BFBFBC3, 0x8BFCFBC3, 0x8BFDFBC3, 0x8BFEFBC3, 0x8BFFFBC3, 0x8C00FBC3, 0x8C01FBC3, 0x8C02FBC3, 0x8C03FBC3, 0x8C04FBC3, 0x8C05FBC3, 0x8C06FBC3, 0x8C07FBC3, 0x8C08FBC3, + 0x8C09FBC3, 0x8C0AFBC3, 0x8C0BFBC3, 0x8C0CFBC3, 0x8C0DFBC3, 0x8C0EFBC3, 0x8C0FFBC3, 0x8C10FBC3, 0x8C11FBC3, 0x8C12FBC3, 0x8C13FBC3, 0x8C14FBC3, 0x8C15FBC3, 0x8C16FBC3, 0x8C17FBC3, + 0x8C18FBC3, 0x8C19FBC3, 0x8C1AFBC3, 0x8C1BFBC3, 0x8C1CFBC3, 0x8C1DFBC3, 0x8C1EFBC3, 0x8C1FFBC3, 0x8C20FBC3, 0x8C21FBC3, 0x8C22FBC3, 0x8C23FBC3, 0x8C24FBC3, 0x8C25FBC3, 0x8C26FBC3, + 0x8C27FBC3, 0x8C28FBC3, 0x8C29FBC3, 0x8C2AFBC3, 0x8C2BFBC3, 0x8C2CFBC3, 0x8C2DFBC3, 0x8C2EFBC3, 0x8C2FFBC3, 0x8C30FBC3, 0x8C31FBC3, 0x8C32FBC3, 0x8C33FBC3, 0x8C34FBC3, 0x8C35FBC3, + 0x8C36FBC3, 0x8C37FBC3, 0x8C38FBC3, 0x8C39FBC3, 0x8C3AFBC3, 0x8C3BFBC3, 0x8C3CFBC3, 0x8C3DFBC3, 0x8C3EFBC3, 0x8C3FFBC3, 0x8C40FBC3, 0x8C41FBC3, 0x8C42FBC3, 0x8C43FBC3, 0x8C44FBC3, + 0x8C45FBC3, 0x8C46FBC3, 0x8C47FBC3, 0x8C48FBC3, 0x8C49FBC3, 0x8C4AFBC3, 0x8C4BFBC3, 0x8C4CFBC3, 0x8C4DFBC3, 0x8C4EFBC3, 0x8C4FFBC3, 0x8C50FBC3, 0x8C51FBC3, 0x8C52FBC3, 0x8C53FBC3, + 0x8C54FBC3, 0x8C55FBC3, 0x8C56FBC3, 0x8C57FBC3, 0x8C58FBC3, 0x8C59FBC3, 0x8C5AFBC3, 0x8C5BFBC3, 0x8C5CFBC3, 0x8C5DFBC3, 0x8C5EFBC3, 0x8C5FFBC3, 0x8C60FBC3, 0x8C61FBC3, 0x8C62FBC3, + 0x8C63FBC3, 0x8C64FBC3, 0x8C65FBC3, 0x8C66FBC3, 0x8C67FBC3, 0x8C68FBC3, 0x8C69FBC3, 0x8C6AFBC3, 0x8C6BFBC3, 0x8C6CFBC3, 0x8C6DFBC3, 0x8C6EFBC3, 0x8C6FFBC3, 0x8C70FBC3, 0x8C71FBC3, + 0x8C72FBC3, 0x8C73FBC3, 0x8C74FBC3, 0x8C75FBC3, 0x8C76FBC3, 0x8C77FBC3, 0x8C78FBC3, 0x8C79FBC3, 0x8C7AFBC3, 0x8C7BFBC3, 0x8C7CFBC3, 0x8C7DFBC3, 0x8C7EFBC3, 0x8C7FFBC3, 0x8C80FBC3, + 0x8C81FBC3, 0x8C82FBC3, 0x8C83FBC3, 0x8C84FBC3, 0x8C85FBC3, 0x8C86FBC3, 0x8C87FBC3, 0x8C88FBC3, 0x8C89FBC3, 0x8C8AFBC3, 0x8C8BFBC3, 0x8C8CFBC3, 0x8C8DFBC3, 0x8C8EFBC3, 0x8C8FFBC3, + 0x8C90FBC3, 0x8C91FBC3, 0x8C92FBC3, 0x8C93FBC3, 0x8C94FBC3, 0x8C95FBC3, 0x8C96FBC3, 0x8C97FBC3, 0x8C98FBC3, 0x8C99FBC3, 0x8C9AFBC3, 0x8C9BFBC3, 0x8C9CFBC3, 0x8C9DFBC3, 0x8C9EFBC3, + 0x8C9FFBC3, 0x8CA0FBC3, 0x8CA1FBC3, 0x8CA2FBC3, 0x8CA3FBC3, 0x8CA4FBC3, 0x8CA5FBC3, 0x8CA6FBC3, 0x8CA7FBC3, 0x8CA8FBC3, 0x8CA9FBC3, 0x8CAAFBC3, 0x8CABFBC3, 0x8CACFBC3, 0x8CADFBC3, + 0x8CAEFBC3, 0x8CAFFBC3, 0x8CB0FBC3, 0x8CB1FBC3, 0x8CB2FBC3, 0x8CB3FBC3, 0x8CB4FBC3, 0x8CB5FBC3, 0x8CB6FBC3, 0x8CB7FBC3, 0x8CB8FBC3, 0x8CB9FBC3, 0x8CBAFBC3, 0x8CBBFBC3, 0x8CBCFBC3, + 0x8CBDFBC3, 0x8CBEFBC3, 0x8CBFFBC3, 0x8CC0FBC3, 0x8CC1FBC3, 0x8CC2FBC3, 0x8CC3FBC3, 0x8CC4FBC3, 0x8CC5FBC3, 0x8CC6FBC3, 0x8CC7FBC3, 0x8CC8FBC3, 0x8CC9FBC3, 0x8CCAFBC3, 0x8CCBFBC3, + 0x8CCCFBC3, 0x8CCDFBC3, 0x8CCEFBC3, 0x8CCFFBC3, 0x8CD0FBC3, 0x8CD1FBC3, 0x8CD2FBC3, 0x8CD3FBC3, 0x8CD4FBC3, 0x8CD5FBC3, 0x8CD6FBC3, 0x8CD7FBC3, 0x8CD8FBC3, 0x8CD9FBC3, 0x8CDAFBC3, + 0x8CDBFBC3, 0x8CDCFBC3, 0x8CDDFBC3, 0x8CDEFBC3, 0x8CDFFBC3, 0x8CE0FBC3, 0x8CE1FBC3, 0x8CE2FBC3, 0x8CE3FBC3, 0x8CE4FBC3, 0x8CE5FBC3, 0x8CE6FBC3, 0x8CE7FBC3, 0x8CE8FBC3, 0x8CE9FBC3, + 0x8CEAFBC3, 0x8CEBFBC3, 0x8CECFBC3, 0x8CEDFBC3, 0x8CEEFBC3, 0x8CEFFBC3, 0x8CF0FBC3, 0x8CF1FBC3, 0x8CF2FBC3, 0x8CF3FBC3, 0x8CF4FBC3, 0x8CF5FBC3, 0x8CF6FBC3, 0x8CF7FBC3, 0x8CF8FBC3, + 0x8CF9FBC3, 0x8CFAFBC3, 0x8CFBFBC3, 0x8CFCFBC3, 0x8CFDFBC3, 0x8CFEFBC3, 0x8CFFFBC3, 0x8D00FBC3, 0x8D01FBC3, 0x8D02FBC3, 0x8D03FBC3, 0x8D04FBC3, 0x8D05FBC3, 0x8D06FBC3, 0x8D07FBC3, + 0x8D08FBC3, 0x8D09FBC3, 0x8D0AFBC3, 0x8D0BFBC3, 0x8D0CFBC3, 0x8D0DFBC3, 0x8D0EFBC3, 0x8D0FFBC3, 0x8D10FBC3, 0x8D11FBC3, 0x8D12FBC3, 0x8D13FBC3, 0x8D14FBC3, 0x8D15FBC3, 0x8D16FBC3, + 0x8D17FBC3, 0x8D18FBC3, 0x8D19FBC3, 0x8D1AFBC3, 0x8D1BFBC3, 0x8D1CFBC3, 0x8D1DFBC3, 0x8D1EFBC3, 0x8D1FFBC3, 0x8D20FBC3, 0x8D21FBC3, 0x8D22FBC3, 0x8D23FBC3, 0x8D24FBC3, 0x8D25FBC3, + 0x8D26FBC3, 0x8D27FBC3, 0x8D28FBC3, 0x8D29FBC3, 0x8D2AFBC3, 0x8D2BFBC3, 0x8D2CFBC3, 0x8D2DFBC3, 0x8D2EFBC3, 0x8D2FFBC3, 0x8D30FBC3, 0x8D31FBC3, 0x8D32FBC3, 0x8D33FBC3, 0x8D34FBC3, + 0x8D35FBC3, 0x8D36FBC3, 0x8D37FBC3, 0x8D38FBC3, 0x8D39FBC3, 0x8D3AFBC3, 0x8D3BFBC3, 0x8D3CFBC3, 0x8D3DFBC3, 0x8D3EFBC3, 0x8D3FFBC3, 0x8D40FBC3, 0x8D41FBC3, 0x8D42FBC3, 0x8D43FBC3, + 0x8D44FBC3, 0x8D45FBC3, 0x8D46FBC3, 0x8D47FBC3, 0x8D48FBC3, 0x8D49FBC3, 0x8D4AFBC3, 0x8D4BFBC3, 0x8D4CFBC3, 0x8D4DFBC3, 0x8D4EFBC3, 0x8D4FFBC3, 0x8D50FBC3, 0x8D51FBC3, 0x8D52FBC3, + 0x8D53FBC3, 0x8D54FBC3, 0x8D55FBC3, 0x8D56FBC3, 0x8D57FBC3, 0x8D58FBC3, 0x8D59FBC3, 0x8D5AFBC3, 0x8D5BFBC3, 0x8D5CFBC3, 0x8D5DFBC3, 0x8D5EFBC3, 0x8D5FFBC3, 0x8D60FBC3, 0x8D61FBC3, + 0x8D62FBC3, 0x8D63FBC3, 0x8D64FBC3, 0x8D65FBC3, 0x8D66FBC3, 0x8D67FBC3, 0x8D68FBC3, 0x8D69FBC3, 0x8D6AFBC3, 0x8D6BFBC3, 0x8D6CFBC3, 0x8D6DFBC3, 0x8D6EFBC3, 0x8D6FFBC3, 0x8D70FBC3, + 0x8D71FBC3, 0x8D72FBC3, 0x8D73FBC3, 0x8D74FBC3, 0x8D75FBC3, 0x8D76FBC3, 0x8D77FBC3, 0x8D78FBC3, 0x8D79FBC3, 0x8D7AFBC3, 0x8D7BFBC3, 0x8D7CFBC3, 0x8D7DFBC3, 0x8D7EFBC3, 0x8D7FFBC3, + 0x8D80FBC3, 0x8D81FBC3, 0x8D82FBC3, 0x8D83FBC3, 0x8D84FBC3, 0x8D85FBC3, 0x8D86FBC3, 0x8D87FBC3, 0x8D88FBC3, 0x8D89FBC3, 0x8D8AFBC3, 0x8D8BFBC3, 0x8D8CFBC3, 0x8D8DFBC3, 0x8D8EFBC3, + 0x8D8FFBC3, 0x8D90FBC3, 0x8D91FBC3, 0x8D92FBC3, 0x8D93FBC3, 0x8D94FBC3, 0x8D95FBC3, 0x8D96FBC3, 0x8D97FBC3, 0x8D98FBC3, 0x8D99FBC3, 0x8D9AFBC3, 0x8D9BFBC3, 0x8D9CFBC3, 0x8D9DFBC3, + 0x8D9EFBC3, 0x8D9FFBC3, 0x8DA0FBC3, 0x8DA1FBC3, 0x8DA2FBC3, 0x8DA3FBC3, 0x8DA4FBC3, 0x8DA5FBC3, 0x8DA6FBC3, 0x8DA7FBC3, 0x8DA8FBC3, 0x8DA9FBC3, 0x8DAAFBC3, 0x8DABFBC3, 0x8DACFBC3, + 0x8DADFBC3, 0x8DAEFBC3, 0x8DAFFBC3, 0x8DB0FBC3, 0x8DB1FBC3, 0x8DB2FBC3, 0x8DB3FBC3, 0x8DB4FBC3, 0x8DB5FBC3, 0x8DB6FBC3, 0x8DB7FBC3, 0x8DB8FBC3, 0x8DB9FBC3, 0x8DBAFBC3, 0x8DBBFBC3, + 0x8DBCFBC3, 0x8DBDFBC3, 0x8DBEFBC3, 0x8DBFFBC3, 0x8DC0FBC3, 0x8DC1FBC3, 0x8DC2FBC3, 0x8DC3FBC3, 0x8DC4FBC3, 0x8DC5FBC3, 0x8DC6FBC3, 0x8DC7FBC3, 0x8DC8FBC3, 0x8DC9FBC3, 0x8DCAFBC3, + 0x8DCBFBC3, 0x8DCCFBC3, 0x8DCDFBC3, 0x8DCEFBC3, 0x8DCFFBC3, 0x8DD0FBC3, 0x8DD1FBC3, 0x8DD2FBC3, 0x8DD3FBC3, 0x8DD4FBC3, 0x8DD5FBC3, 0x8DD6FBC3, 0x8DD7FBC3, 0x8DD8FBC3, 0x8DD9FBC3, + 0x8DDAFBC3, 0x8DDBFBC3, 0x8DDCFBC3, 0x8DDDFBC3, 0x8DDEFBC3, 0x8DDFFBC3, 0x8DE0FBC3, 0x8DE1FBC3, 0x8DE2FBC3, 0x8DE3FBC3, 0x8DE4FBC3, 0x8DE5FBC3, 0x8DE6FBC3, 0x8DE7FBC3, 0x8DE8FBC3, + 0x8DE9FBC3, 0x8DEAFBC3, 0x8DEBFBC3, 0x8DECFBC3, 0x8DEDFBC3, 0x8DEEFBC3, 0x8DEFFBC3, 0x8DF0FBC3, 0x8DF1FBC3, 0x8DF2FBC3, 0x8DF3FBC3, 0x8DF4FBC3, 0x8DF5FBC3, 0x8DF6FBC3, 0x8DF7FBC3, + 0x8DF8FBC3, 0x8DF9FBC3, 0x8DFAFBC3, 0x8DFBFBC3, 0x8DFCFBC3, 0x8DFDFBC3, 0x8DFEFBC3, 0x8DFFFBC3, 0x8E00FBC3, 0x8E01FBC3, 0x8E02FBC3, 0x8E03FBC3, 0x8E04FBC3, 0x8E05FBC3, 0x8E06FBC3, + 0x8E07FBC3, 0x8E08FBC3, 0x8E09FBC3, 0x8E0AFBC3, 0x8E0BFBC3, 0x8E0CFBC3, 0x8E0DFBC3, 0x8E0EFBC3, 0x8E0FFBC3, 0x8E10FBC3, 0x8E11FBC3, 0x8E12FBC3, 0x8E13FBC3, 0x8E14FBC3, 0x8E15FBC3, + 0x8E16FBC3, 0x8E17FBC3, 0x8E18FBC3, 0x8E19FBC3, 0x8E1AFBC3, 0x8E1BFBC3, 0x8E1CFBC3, 0x8E1DFBC3, 0x8E1EFBC3, 0x8E1FFBC3, 0x8E20FBC3, 0x8E21FBC3, 0x8E22FBC3, 0x8E23FBC3, 0x8E24FBC3, + 0x8E25FBC3, 0x8E26FBC3, 0x8E27FBC3, 0x8E28FBC3, 0x8E29FBC3, 0x8E2AFBC3, 0x8E2BFBC3, 0x8E2CFBC3, 0x8E2DFBC3, 0x8E2EFBC3, 0x8E2FFBC3, 0x8E30FBC3, 0x8E31FBC3, 0x8E32FBC3, 0x8E33FBC3, + 0x8E34FBC3, 0x8E35FBC3, 0x8E36FBC3, 0x8E37FBC3, 0x8E38FBC3, 0x8E39FBC3, 0x8E3AFBC3, 0x8E3BFBC3, 0x8E3CFBC3, 0x8E3DFBC3, 0x8E3EFBC3, 0x8E3FFBC3, 0x8E40FBC3, 0x8E41FBC3, 0x8E42FBC3, + 0x8E43FBC3, 0x8E44FBC3, 0x8E45FBC3, 0x8E46FBC3, 0x8E47FBC3, 0x8E48FBC3, 0x8E49FBC3, 0x8E4AFBC3, 0x8E4BFBC3, 0x8E4CFBC3, 0x8E4DFBC3, 0x8E4EFBC3, 0x8E4FFBC3, 0x8E50FBC3, 0x8E51FBC3, + 0x8E52FBC3, 0x8E53FBC3, 0x8E54FBC3, 0x8E55FBC3, 0x8E56FBC3, 0x8E57FBC3, 0x8E58FBC3, 0x8E59FBC3, 0x8E5AFBC3, 0x8E5BFBC3, 0x8E5CFBC3, 0x8E5DFBC3, 0x8E5EFBC3, 0x8E5FFBC3, 0x8E60FBC3, + 0x8E61FBC3, 0x8E62FBC3, 0x8E63FBC3, 0x8E64FBC3, 0x8E65FBC3, 0x8E66FBC3, 0x8E67FBC3, 0x8E68FBC3, 0x8E69FBC3, 0x8E6AFBC3, 0x8E6BFBC3, 0x8E6CFBC3, 0x8E6DFBC3, 0x8E6EFBC3, 0x8E6FFBC3, + 0x8E70FBC3, 0x8E71FBC3, 0x8E72FBC3, 0x8E73FBC3, 0x8E74FBC3, 0x8E75FBC3, 0x8E76FBC3, 0x8E77FBC3, 0x8E78FBC3, 0x8E79FBC3, 0x8E7AFBC3, 0x8E7BFBC3, 0x8E7CFBC3, 0x8E7DFBC3, 0x8E7EFBC3, + 0x8E7FFBC3, 0x8E80FBC3, 0x8E81FBC3, 0x8E82FBC3, 0x8E83FBC3, 0x8E84FBC3, 0x8E85FBC3, 0x8E86FBC3, 0x8E87FBC3, 0x8E88FBC3, 0x8E89FBC3, 0x8E8AFBC3, 0x8E8BFBC3, 0x8E8CFBC3, 0x8E8DFBC3, + 0x8E8EFBC3, 0x8E8FFBC3, 0x8E90FBC3, 0x8E91FBC3, 0x8E92FBC3, 0x8E93FBC3, 0x8E94FBC3, 0x8E95FBC3, 0x8E96FBC3, 0x8E97FBC3, 0x8E98FBC3, 0x8E99FBC3, 0x8E9AFBC3, 0x8E9BFBC3, 0x8E9CFBC3, + 0x8E9DFBC3, 0x8E9EFBC3, 0x8E9FFBC3, 0x8EA0FBC3, 0x8EA1FBC3, 0x8EA2FBC3, 0x8EA3FBC3, 0x8EA4FBC3, 0x8EA5FBC3, 0x8EA6FBC3, 0x8EA7FBC3, 0x8EA8FBC3, 0x8EA9FBC3, 0x8EAAFBC3, 0x8EABFBC3, + 0x8EACFBC3, 0x8EADFBC3, 0x8EAEFBC3, 0x8EAFFBC3, 0x8EB0FBC3, 0x8EB1FBC3, 0x8EB2FBC3, 0x8EB3FBC3, 0x8EB4FBC3, 0x8EB5FBC3, 0x8EB6FBC3, 0x8EB7FBC3, 0x8EB8FBC3, 0x8EB9FBC3, 0x8EBAFBC3, + 0x8EBBFBC3, 0x8EBCFBC3, 0x8EBDFBC3, 0x8EBEFBC3, 0x8EBFFBC3, 0x8EC0FBC3, 0x8EC1FBC3, 0x8EC2FBC3, 0x8EC3FBC3, 0x8EC4FBC3, 0x8EC5FBC3, 0x8EC6FBC3, 0x8EC7FBC3, 0x8EC8FBC3, 0x8EC9FBC3, + 0x8ECAFBC3, 0x8ECBFBC3, 0x8ECCFBC3, 0x8ECDFBC3, 0x8ECEFBC3, 0x8ECFFBC3, 0x8ED0FBC3, 0x8ED1FBC3, 0x8ED2FBC3, 0x8ED3FBC3, 0x8ED4FBC3, 0x8ED5FBC3, 0x8ED6FBC3, 0x8ED7FBC3, 0x8ED8FBC3, + 0x8ED9FBC3, 0x8EDAFBC3, 0x8EDBFBC3, 0x8EDCFBC3, 0x8EDDFBC3, 0x8EDEFBC3, 0x8EDFFBC3, 0x8EE0FBC3, 0x8EE1FBC3, 0x8EE2FBC3, 0x8EE3FBC3, 0x8EE4FBC3, 0x8EE5FBC3, 0x8EE6FBC3, 0x8EE7FBC3, + 0x8EE8FBC3, 0x8EE9FBC3, 0x8EEAFBC3, 0x8EEBFBC3, 0x8EECFBC3, 0x8EEDFBC3, 0x8EEEFBC3, 0x8EEFFBC3, 0x8EF0FBC3, 0x8EF1FBC3, 0x8EF2FBC3, 0x8EF3FBC3, 0x8EF4FBC3, 0x8EF5FBC3, 0x8EF6FBC3, + 0x8EF7FBC3, 0x8EF8FBC3, 0x8EF9FBC3, 0x8EFAFBC3, 0x8EFBFBC3, 0x8EFCFBC3, 0x8EFDFBC3, 0x8EFEFBC3, 0x8EFFFBC3, 0x8F00FBC3, 0x8F01FBC3, 0x8F02FBC3, 0x8F03FBC3, 0x8F04FBC3, 0x8F05FBC3, + 0x8F06FBC3, 0x8F07FBC3, 0x8F08FBC3, 0x8F09FBC3, 0x8F0AFBC3, 0x8F0BFBC3, 0x8F0CFBC3, 0x8F0DFBC3, 0x8F0EFBC3, 0x8F0FFBC3, 0x8F10FBC3, 0x8F11FBC3, 0x8F12FBC3, 0x8F13FBC3, 0x8F14FBC3, + 0x8F15FBC3, 0x8F16FBC3, 0x8F17FBC3, 0x8F18FBC3, 0x8F19FBC3, 0x8F1AFBC3, 0x8F1BFBC3, 0x8F1CFBC3, 0x8F1DFBC3, 0x8F1EFBC3, 0x8F1FFBC3, 0x8F20FBC3, 0x8F21FBC3, 0x8F22FBC3, 0x8F23FBC3, + 0x8F24FBC3, 0x8F25FBC3, 0x8F26FBC3, 0x8F27FBC3, 0x8F28FBC3, 0x8F29FBC3, 0x8F2AFBC3, 0x8F2BFBC3, 0x8F2CFBC3, 0x8F2DFBC3, 0x8F2EFBC3, 0x8F2FFBC3, 0x8F30FBC3, 0x8F31FBC3, 0x8F32FBC3, + 0x8F33FBC3, 0x8F34FBC3, 0x8F35FBC3, 0x8F36FBC3, 0x8F37FBC3, 0x8F38FBC3, 0x8F39FBC3, 0x8F3AFBC3, 0x8F3BFBC3, 0x8F3CFBC3, 0x8F3DFBC3, 0x8F3EFBC3, 0x8F3FFBC3, 0x8F40FBC3, 0x8F41FBC3, + 0x8F42FBC3, 0x8F43FBC3, 0x8F44FBC3, 0x8F45FBC3, 0x8F46FBC3, 0x8F47FBC3, 0x8F48FBC3, 0x8F49FBC3, 0x8F4AFBC3, 0x8F4BFBC3, 0x8F4CFBC3, 0x8F4DFBC3, 0x8F4EFBC3, 0x8F4FFBC3, 0x8F50FBC3, + 0x8F51FBC3, 0x8F52FBC3, 0x8F53FBC3, 0x8F54FBC3, 0x8F55FBC3, 0x8F56FBC3, 0x8F57FBC3, 0x8F58FBC3, 0x8F59FBC3, 0x8F5AFBC3, 0x8F5BFBC3, 0x8F5CFBC3, 0x8F5DFBC3, 0x8F5EFBC3, 0x8F5FFBC3, + 0x8F60FBC3, 0x8F61FBC3, 0x8F62FBC3, 0x8F63FBC3, 0x8F64FBC3, 0x8F65FBC3, 0x8F66FBC3, 0x8F67FBC3, 0x8F68FBC3, 0x8F69FBC3, 0x8F6AFBC3, 0x8F6BFBC3, 0x8F6CFBC3, 0x8F6DFBC3, 0x8F6EFBC3, + 0x8F6FFBC3, 0x8F70FBC3, 0x8F71FBC3, 0x8F72FBC3, 0x8F73FBC3, 0x8F74FBC3, 0x8F75FBC3, 0x8F76FBC3, 0x8F77FBC3, 0x8F78FBC3, 0x8F79FBC3, 0x8F7AFBC3, 0x8F7BFBC3, 0x8F7CFBC3, 0x8F7DFBC3, + 0x8F7EFBC3, 0x8F7FFBC3, 0x8F80FBC3, 0x8F81FBC3, 0x8F82FBC3, 0x8F83FBC3, 0x8F84FBC3, 0x8F85FBC3, 0x8F86FBC3, 0x8F87FBC3, 0x8F88FBC3, 0x8F89FBC3, 0x8F8AFBC3, 0x8F8BFBC3, 0x8F8CFBC3, + 0x8F8DFBC3, 0x8F8EFBC3, 0x8F8FFBC3, 0x8F90FBC3, 0x8F91FBC3, 0x8F92FBC3, 0x8F93FBC3, 0x8F94FBC3, 0x8F95FBC3, 0x8F96FBC3, 0x8F97FBC3, 0x8F98FBC3, 0x8F99FBC3, 0x8F9AFBC3, 0x8F9BFBC3, + 0x8F9CFBC3, 0x8F9DFBC3, 0x8F9EFBC3, 0x8F9FFBC3, 0x8FA0FBC3, 0x8FA1FBC3, 0x8FA2FBC3, 0x8FA3FBC3, 0x8FA4FBC3, 0x8FA5FBC3, 0x8FA6FBC3, 0x8FA7FBC3, 0x8FA8FBC3, 0x8FA9FBC3, 0x8FAAFBC3, + 0x8FABFBC3, 0x8FACFBC3, 0x8FADFBC3, 0x8FAEFBC3, 0x8FAFFBC3, 0x8FB0FBC3, 0x8FB1FBC3, 0x8FB2FBC3, 0x8FB3FBC3, 0x8FB4FBC3, 0x8FB5FBC3, 0x8FB6FBC3, 0x8FB7FBC3, 0x8FB8FBC3, 0x8FB9FBC3, + 0x8FBAFBC3, 0x8FBBFBC3, 0x8FBCFBC3, 0x8FBDFBC3, 0x8FBEFBC3, 0x8FBFFBC3, 0x8FC0FBC3, 0x8FC1FBC3, 0x8FC2FBC3, 0x8FC3FBC3, 0x8FC4FBC3, 0x8FC5FBC3, 0x8FC6FBC3, 0x8FC7FBC3, 0x8FC8FBC3, + 0x8FC9FBC3, 0x8FCAFBC3, 0x8FCBFBC3, 0x8FCCFBC3, 0x8FCDFBC3, 0x8FCEFBC3, 0x8FCFFBC3, 0x8FD0FBC3, 0x8FD1FBC3, 0x8FD2FBC3, 0x8FD3FBC3, 0x8FD4FBC3, 0x8FD5FBC3, 0x8FD6FBC3, 0x8FD7FBC3, + 0x8FD8FBC3, 0x8FD9FBC3, 0x8FDAFBC3, 0x8FDBFBC3, 0x8FDCFBC3, 0x8FDDFBC3, 0x8FDEFBC3, 0x8FDFFBC3, 0x8FE0FBC3, 0x8FE1FBC3, 0x8FE2FBC3, 0x8FE3FBC3, 0x8FE4FBC3, 0x8FE5FBC3, 0x8FE6FBC3, + 0x8FE7FBC3, 0x8FE8FBC3, 0x8FE9FBC3, 0x8FEAFBC3, 0x8FEBFBC3, 0x8FECFBC3, 0x8FEDFBC3, 0x8FEEFBC3, 0x8FEFFBC3, 0x8FF0FBC3, 0x8FF1FBC3, 0x8FF2FBC3, 0x8FF3FBC3, 0x8FF4FBC3, 0x8FF5FBC3, + 0x8FF6FBC3, 0x8FF7FBC3, 0x8FF8FBC3, 0x8FF9FBC3, 0x8FFAFBC3, 0x8FFBFBC3, 0x8FFCFBC3, 0x8FFDFBC3, 0x8FFEFBC3, 0x8FFFFBC3, 0x9000FBC3, 0x9001FBC3, 0x9002FBC3, 0x9003FBC3, 0x9004FBC3, + 0x9005FBC3, 0x9006FBC3, 0x9007FBC3, 0x9008FBC3, 0x9009FBC3, 0x900AFBC3, 0x900BFBC3, 0x900CFBC3, 0x900DFBC3, 0x900EFBC3, 0x900FFBC3, 0x9010FBC3, 0x9011FBC3, 0x9012FBC3, 0x9013FBC3, + 0x9014FBC3, 0x9015FBC3, 0x9016FBC3, 0x9017FBC3, 0x9018FBC3, 0x9019FBC3, 0x901AFBC3, 0x901BFBC3, 0x901CFBC3, 0x901DFBC3, 0x901EFBC3, 0x901FFBC3, 0x9020FBC3, 0x9021FBC3, 0x9022FBC3, + 0x9023FBC3, 0x9024FBC3, 0x9025FBC3, 0x9026FBC3, 0x9027FBC3, 0x9028FBC3, 0x9029FBC3, 0x902AFBC3, 0x902BFBC3, 0x902CFBC3, 0x902DFBC3, 0x902EFBC3, 0x902FFBC3, 0x9030FBC3, 0x9031FBC3, + 0x9032FBC3, 0x9033FBC3, 0x9034FBC3, 0x9035FBC3, 0x9036FBC3, 0x9037FBC3, 0x9038FBC3, 0x9039FBC3, 0x903AFBC3, 0x903BFBC3, 0x903CFBC3, 0x903DFBC3, 0x903EFBC3, 0x903FFBC3, 0x9040FBC3, + 0x9041FBC3, 0x9042FBC3, 0x9043FBC3, 0x9044FBC3, 0x9045FBC3, 0x9046FBC3, 0x9047FBC3, 0x9048FBC3, 0x9049FBC3, 0x904AFBC3, 0x904BFBC3, 0x904CFBC3, 0x904DFBC3, 0x904EFBC3, 0x904FFBC3, + 0x9050FBC3, 0x9051FBC3, 0x9052FBC3, 0x9053FBC3, 0x9054FBC3, 0x9055FBC3, 0x9056FBC3, 0x9057FBC3, 0x9058FBC3, 0x9059FBC3, 0x905AFBC3, 0x905BFBC3, 0x905CFBC3, 0x905DFBC3, 0x905EFBC3, + 0x905FFBC3, 0x9060FBC3, 0x9061FBC3, 0x9062FBC3, 0x9063FBC3, 0x9064FBC3, 0x9065FBC3, 0x9066FBC3, 0x9067FBC3, 0x9068FBC3, 0x9069FBC3, 0x906AFBC3, 0x906BFBC3, 0x906CFBC3, 0x906DFBC3, + 0x906EFBC3, 0x906FFBC3, 0x9070FBC3, 0x9071FBC3, 0x9072FBC3, 0x9073FBC3, 0x9074FBC3, 0x9075FBC3, 0x9076FBC3, 0x9077FBC3, 0x9078FBC3, 0x9079FBC3, 0x907AFBC3, 0x907BFBC3, 0x907CFBC3, + 0x907DFBC3, 0x907EFBC3, 0x907FFBC3, 0x9080FBC3, 0x9081FBC3, 0x9082FBC3, 0x9083FBC3, 0x9084FBC3, 0x9085FBC3, 0x9086FBC3, 0x9087FBC3, 0x9088FBC3, 0x9089FBC3, 0x908AFBC3, 0x908BFBC3, + 0x908CFBC3, 0x908DFBC3, 0x908EFBC3, 0x908FFBC3, 0x9090FBC3, 0x9091FBC3, 0x9092FBC3, 0x9093FBC3, 0x9094FBC3, 0x9095FBC3, 0x9096FBC3, 0x9097FBC3, 0x9098FBC3, 0x9099FBC3, 0x909AFBC3, + 0x909BFBC3, 0x909CFBC3, 0x909DFBC3, 0x909EFBC3, 0x909FFBC3, 0x90A0FBC3, 0x90A1FBC3, 0x90A2FBC3, 0x90A3FBC3, 0x90A4FBC3, 0x90A5FBC3, 0x90A6FBC3, 0x90A7FBC3, 0x90A8FBC3, 0x90A9FBC3, + 0x90AAFBC3, 0x90ABFBC3, 0x90ACFBC3, 0x90ADFBC3, 0x90AEFBC3, 0x90AFFBC3, 0x90B0FBC3, 0x90B1FBC3, 0x90B2FBC3, 0x90B3FBC3, 0x90B4FBC3, 0x90B5FBC3, 0x90B6FBC3, 0x90B7FBC3, 0x90B8FBC3, + 0x90B9FBC3, 0x90BAFBC3, 0x90BBFBC3, 0x90BCFBC3, 0x90BDFBC3, 0x90BEFBC3, 0x90BFFBC3, 0x90C0FBC3, 0x90C1FBC3, 0x90C2FBC3, 0x90C3FBC3, 0x90C4FBC3, 0x90C5FBC3, 0x90C6FBC3, 0x90C7FBC3, + 0x90C8FBC3, 0x90C9FBC3, 0x90CAFBC3, 0x90CBFBC3, 0x90CCFBC3, 0x90CDFBC3, 0x90CEFBC3, 0x90CFFBC3, 0x90D0FBC3, 0x90D1FBC3, 0x90D2FBC3, 0x90D3FBC3, 0x90D4FBC3, 0x90D5FBC3, 0x90D6FBC3, + 0x90D7FBC3, 0x90D8FBC3, 0x90D9FBC3, 0x90DAFBC3, 0x90DBFBC3, 0x90DCFBC3, 0x90DDFBC3, 0x90DEFBC3, 0x90DFFBC3, 0x90E0FBC3, 0x90E1FBC3, 0x90E2FBC3, 0x90E3FBC3, 0x90E4FBC3, 0x90E5FBC3, + 0x90E6FBC3, 0x90E7FBC3, 0x90E8FBC3, 0x90E9FBC3, 0x90EAFBC3, 0x90EBFBC3, 0x90ECFBC3, 0x90EDFBC3, 0x90EEFBC3, 0x90EFFBC3, 0x90F0FBC3, 0x90F1FBC3, 0x90F2FBC3, 0x90F3FBC3, 0x90F4FBC3, + 0x90F5FBC3, 0x90F6FBC3, 0x90F7FBC3, 0x90F8FBC3, 0x90F9FBC3, 0x90FAFBC3, 0x90FBFBC3, 0x90FCFBC3, 0x90FDFBC3, 0x90FEFBC3, 0x90FFFBC3, 0x9100FBC3, 0x9101FBC3, 0x9102FBC3, 0x9103FBC3, + 0x9104FBC3, 0x9105FBC3, 0x9106FBC3, 0x9107FBC3, 0x9108FBC3, 0x9109FBC3, 0x910AFBC3, 0x910BFBC3, 0x910CFBC3, 0x910DFBC3, 0x910EFBC3, 0x910FFBC3, 0x9110FBC3, 0x9111FBC3, 0x9112FBC3, + 0x9113FBC3, 0x9114FBC3, 0x9115FBC3, 0x9116FBC3, 0x9117FBC3, 0x9118FBC3, 0x9119FBC3, 0x911AFBC3, 0x911BFBC3, 0x911CFBC3, 0x911DFBC3, 0x911EFBC3, 0x911FFBC3, 0x9120FBC3, 0x9121FBC3, + 0x9122FBC3, 0x9123FBC3, 0x9124FBC3, 0x9125FBC3, 0x9126FBC3, 0x9127FBC3, 0x9128FBC3, 0x9129FBC3, 0x912AFBC3, 0x912BFBC3, 0x912CFBC3, 0x912DFBC3, 0x912EFBC3, 0x912FFBC3, 0x9130FBC3, + 0x9131FBC3, 0x9132FBC3, 0x9133FBC3, 0x9134FBC3, 0x9135FBC3, 0x9136FBC3, 0x9137FBC3, 0x9138FBC3, 0x9139FBC3, 0x913AFBC3, 0x913BFBC3, 0x913CFBC3, 0x913DFBC3, 0x913EFBC3, 0x913FFBC3, + 0x9140FBC3, 0x9141FBC3, 0x9142FBC3, 0x9143FBC3, 0x9144FBC3, 0x9145FBC3, 0x9146FBC3, 0x9147FBC3, 0x9148FBC3, 0x9149FBC3, 0x914AFBC3, 0x914BFBC3, 0x914CFBC3, 0x914DFBC3, 0x914EFBC3, + 0x914FFBC3, 0x9150FBC3, 0x9151FBC3, 0x9152FBC3, 0x9153FBC3, 0x9154FBC3, 0x9155FBC3, 0x9156FBC3, 0x9157FBC3, 0x9158FBC3, 0x9159FBC3, 0x915AFBC3, 0x915BFBC3, 0x915CFBC3, 0x915DFBC3, + 0x915EFBC3, 0x915FFBC3, 0x9160FBC3, 0x9161FBC3, 0x9162FBC3, 0x9163FBC3, 0x9164FBC3, 0x9165FBC3, 0x9166FBC3, 0x9167FBC3, 0x9168FBC3, 0x9169FBC3, 0x916AFBC3, 0x916BFBC3, 0x916CFBC3, + 0x916DFBC3, 0x916EFBC3, 0x916FFBC3, 0x9170FBC3, 0x9171FBC3, 0x9172FBC3, 0x9173FBC3, 0x9174FBC3, 0x9175FBC3, 0x9176FBC3, 0x9177FBC3, 0x9178FBC3, 0x9179FBC3, 0x917AFBC3, 0x917BFBC3, + 0x917CFBC3, 0x917DFBC3, 0x917EFBC3, 0x917FFBC3, 0x9180FBC3, 0x9181FBC3, 0x9182FBC3, 0x9183FBC3, 0x9184FBC3, 0x9185FBC3, 0x9186FBC3, 0x9187FBC3, 0x9188FBC3, 0x9189FBC3, 0x918AFBC3, + 0x918BFBC3, 0x918CFBC3, 0x918DFBC3, 0x918EFBC3, 0x918FFBC3, 0x9190FBC3, 0x9191FBC3, 0x9192FBC3, 0x9193FBC3, 0x9194FBC3, 0x9195FBC3, 0x9196FBC3, 0x9197FBC3, 0x9198FBC3, 0x9199FBC3, + 0x919AFBC3, 0x919BFBC3, 0x919CFBC3, 0x919DFBC3, 0x919EFBC3, 0x919FFBC3, 0x91A0FBC3, 0x91A1FBC3, 0x91A2FBC3, 0x91A3FBC3, 0x91A4FBC3, 0x91A5FBC3, 0x91A6FBC3, 0x91A7FBC3, 0x91A8FBC3, + 0x91A9FBC3, 0x91AAFBC3, 0x91ABFBC3, 0x91ACFBC3, 0x91ADFBC3, 0x91AEFBC3, 0x91AFFBC3, 0x91B0FBC3, 0x91B1FBC3, 0x91B2FBC3, 0x91B3FBC3, 0x91B4FBC3, 0x91B5FBC3, 0x91B6FBC3, 0x91B7FBC3, + 0x91B8FBC3, 0x91B9FBC3, 0x91BAFBC3, 0x91BBFBC3, 0x91BCFBC3, 0x91BDFBC3, 0x91BEFBC3, 0x91BFFBC3, 0x91C0FBC3, 0x91C1FBC3, 0x91C2FBC3, 0x91C3FBC3, 0x91C4FBC3, 0x91C5FBC3, 0x91C6FBC3, + 0x91C7FBC3, 0x91C8FBC3, 0x91C9FBC3, 0x91CAFBC3, 0x91CBFBC3, 0x91CCFBC3, 0x91CDFBC3, 0x91CEFBC3, 0x91CFFBC3, 0x91D0FBC3, 0x91D1FBC3, 0x91D2FBC3, 0x91D3FBC3, 0x91D4FBC3, 0x91D5FBC3, + 0x91D6FBC3, 0x91D7FBC3, 0x91D8FBC3, 0x91D9FBC3, 0x91DAFBC3, 0x91DBFBC3, 0x91DCFBC3, 0x91DDFBC3, 0x91DEFBC3, 0x91DFFBC3, 0x91E0FBC3, 0x91E1FBC3, 0x91E2FBC3, 0x91E3FBC3, 0x91E4FBC3, + 0x91E5FBC3, 0x91E6FBC3, 0x91E7FBC3, 0x91E8FBC3, 0x91E9FBC3, 0x91EAFBC3, 0x91EBFBC3, 0x91ECFBC3, 0x91EDFBC3, 0x91EEFBC3, 0x91EFFBC3, 0x91F0FBC3, 0x91F1FBC3, 0x91F2FBC3, 0x91F3FBC3, + 0x91F4FBC3, 0x91F5FBC3, 0x91F6FBC3, 0x91F7FBC3, 0x91F8FBC3, 0x91F9FBC3, 0x91FAFBC3, 0x91FBFBC3, 0x91FCFBC3, 0x91FDFBC3, 0x91FEFBC3, 0x91FFFBC3, 0x9200FBC3, 0x9201FBC3, 0x9202FBC3, + 0x9203FBC3, 0x9204FBC3, 0x9205FBC3, 0x9206FBC3, 0x9207FBC3, 0x9208FBC3, 0x9209FBC3, 0x920AFBC3, 0x920BFBC3, 0x920CFBC3, 0x920DFBC3, 0x920EFBC3, 0x920FFBC3, 0x9210FBC3, 0x9211FBC3, + 0x9212FBC3, 0x9213FBC3, 0x9214FBC3, 0x9215FBC3, 0x9216FBC3, 0x9217FBC3, 0x9218FBC3, 0x9219FBC3, 0x921AFBC3, 0x921BFBC3, 0x921CFBC3, 0x921DFBC3, 0x921EFBC3, 0x921FFBC3, 0x9220FBC3, + 0x9221FBC3, 0x9222FBC3, 0x9223FBC3, 0x9224FBC3, 0x9225FBC3, 0x9226FBC3, 0x9227FBC3, 0x9228FBC3, 0x9229FBC3, 0x922AFBC3, 0x922BFBC3, 0x922CFBC3, 0x922DFBC3, 0x922EFBC3, 0x922FFBC3, + 0x9230FBC3, 0x9231FBC3, 0x9232FBC3, 0x9233FBC3, 0x9234FBC3, 0x9235FBC3, 0x9236FBC3, 0x9237FBC3, 0x9238FBC3, 0x9239FBC3, 0x923AFBC3, 0x923BFBC3, 0x923CFBC3, 0x923DFBC3, 0x923EFBC3, + 0x923FFBC3, 0x9240FBC3, 0x9241FBC3, 0x9242FBC3, 0x9243FBC3, 0x9244FBC3, 0x9245FBC3, 0x9246FBC3, 0x9247FBC3, 0x9248FBC3, 0x9249FBC3, 0x924AFBC3, 0x924BFBC3, 0x924CFBC3, 0x924DFBC3, + 0x924EFBC3, 0x924FFBC3, 0x9250FBC3, 0x9251FBC3, 0x9252FBC3, 0x9253FBC3, 0x9254FBC3, 0x9255FBC3, 0x9256FBC3, 0x9257FBC3, 0x9258FBC3, 0x9259FBC3, 0x925AFBC3, 0x925BFBC3, 0x925CFBC3, + 0x925DFBC3, 0x925EFBC3, 0x925FFBC3, 0x9260FBC3, 0x9261FBC3, 0x9262FBC3, 0x9263FBC3, 0x9264FBC3, 0x9265FBC3, 0x9266FBC3, 0x9267FBC3, 0x9268FBC3, 0x9269FBC3, 0x926AFBC3, 0x926BFBC3, + 0x926CFBC3, 0x926DFBC3, 0x926EFBC3, 0x926FFBC3, 0x9270FBC3, 0x9271FBC3, 0x9272FBC3, 0x9273FBC3, 0x9274FBC3, 0x9275FBC3, 0x9276FBC3, 0x9277FBC3, 0x9278FBC3, 0x9279FBC3, 0x927AFBC3, + 0x927BFBC3, 0x927CFBC3, 0x927DFBC3, 0x927EFBC3, 0x927FFBC3, 0x9280FBC3, 0x9281FBC3, 0x9282FBC3, 0x9283FBC3, 0x9284FBC3, 0x9285FBC3, 0x9286FBC3, 0x9287FBC3, 0x9288FBC3, 0x9289FBC3, + 0x928AFBC3, 0x928BFBC3, 0x928CFBC3, 0x928DFBC3, 0x928EFBC3, 0x928FFBC3, 0x9290FBC3, 0x9291FBC3, 0x9292FBC3, 0x9293FBC3, 0x9294FBC3, 0x9295FBC3, 0x9296FBC3, 0x9297FBC3, 0x9298FBC3, + 0x9299FBC3, 0x929AFBC3, 0x929BFBC3, 0x929CFBC3, 0x929DFBC3, 0x929EFBC3, 0x929FFBC3, 0x92A0FBC3, 0x92A1FBC3, 0x92A2FBC3, 0x92A3FBC3, 0x92A4FBC3, 0x92A5FBC3, 0x92A6FBC3, 0x92A7FBC3, + 0x92A8FBC3, 0x92A9FBC3, 0x92AAFBC3, 0x92ABFBC3, 0x92ACFBC3, 0x92ADFBC3, 0x92AEFBC3, 0x92AFFBC3, 0x92B0FBC3, 0x92B1FBC3, 0x92B2FBC3, 0x92B3FBC3, 0x92B4FBC3, 0x92B5FBC3, 0x92B6FBC3, + 0x92B7FBC3, 0x92B8FBC3, 0x92B9FBC3, 0x92BAFBC3, 0x92BBFBC3, 0x92BCFBC3, 0x92BDFBC3, 0x92BEFBC3, 0x92BFFBC3, 0x92C0FBC3, 0x92C1FBC3, 0x92C2FBC3, 0x92C3FBC3, 0x92C4FBC3, 0x92C5FBC3, + 0x92C6FBC3, 0x92C7FBC3, 0x92C8FBC3, 0x92C9FBC3, 0x92CAFBC3, 0x92CBFBC3, 0x92CCFBC3, 0x92CDFBC3, 0x92CEFBC3, 0x92CFFBC3, 0x92D0FBC3, 0x92D1FBC3, 0x92D2FBC3, 0x92D3FBC3, 0x92D4FBC3, + 0x92D5FBC3, 0x92D6FBC3, 0x92D7FBC3, 0x92D8FBC3, 0x92D9FBC3, 0x92DAFBC3, 0x92DBFBC3, 0x92DCFBC3, 0x92DDFBC3, 0x92DEFBC3, 0x92DFFBC3, 0x92E0FBC3, 0x92E1FBC3, 0x92E2FBC3, 0x92E3FBC3, + 0x92E4FBC3, 0x92E5FBC3, 0x92E6FBC3, 0x92E7FBC3, 0x92E8FBC3, 0x92E9FBC3, 0x92EAFBC3, 0x92EBFBC3, 0x92ECFBC3, 0x92EDFBC3, 0x92EEFBC3, 0x92EFFBC3, 0x92F0FBC3, 0x92F1FBC3, 0x92F2FBC3, + 0x92F3FBC3, 0x92F4FBC3, 0x92F5FBC3, 0x92F6FBC3, 0x92F7FBC3, 0x92F8FBC3, 0x92F9FBC3, 0x92FAFBC3, 0x92FBFBC3, 0x92FCFBC3, 0x92FDFBC3, 0x92FEFBC3, 0x92FFFBC3, 0x9300FBC3, 0x9301FBC3, + 0x9302FBC3, 0x9303FBC3, 0x9304FBC3, 0x9305FBC3, 0x9306FBC3, 0x9307FBC3, 0x9308FBC3, 0x9309FBC3, 0x930AFBC3, 0x930BFBC3, 0x930CFBC3, 0x930DFBC3, 0x930EFBC3, 0x930FFBC3, 0x9310FBC3, + 0x9311FBC3, 0x9312FBC3, 0x9313FBC3, 0x9314FBC3, 0x9315FBC3, 0x9316FBC3, 0x9317FBC3, 0x9318FBC3, 0x9319FBC3, 0x931AFBC3, 0x931BFBC3, 0x931CFBC3, 0x931DFBC3, 0x931EFBC3, 0x931FFBC3, + 0x9320FBC3, 0x9321FBC3, 0x9322FBC3, 0x9323FBC3, 0x9324FBC3, 0x9325FBC3, 0x9326FBC3, 0x9327FBC3, 0x9328FBC3, 0x9329FBC3, 0x932AFBC3, 0x932BFBC3, 0x932CFBC3, 0x932DFBC3, 0x932EFBC3, + 0x932FFBC3, 0x9330FBC3, 0x9331FBC3, 0x9332FBC3, 0x9333FBC3, 0x9334FBC3, 0x9335FBC3, 0x9336FBC3, 0x9337FBC3, 0x9338FBC3, 0x9339FBC3, 0x933AFBC3, 0x933BFBC3, 0x933CFBC3, 0x933DFBC3, + 0x933EFBC3, 0x933FFBC3, 0x9340FBC3, 0x9341FBC3, 0x9342FBC3, 0x9343FBC3, 0x9344FBC3, 0x9345FBC3, 0x9346FBC3, 0x9347FBC3, 0x9348FBC3, 0x9349FBC3, 0x934AFBC3, 0x934BFBC3, 0x934CFBC3, + 0x934DFBC3, 0x934EFBC3, 0x934FFBC3, 0x9350FBC3, 0x9351FBC3, 0x9352FBC3, 0x9353FBC3, 0x9354FBC3, 0x9355FBC3, 0x9356FBC3, 0x9357FBC3, 0x9358FBC3, 0x9359FBC3, 0x935AFBC3, 0x935BFBC3, + 0x935CFBC3, 0x935DFBC3, 0x935EFBC3, 0x935FFBC3, 0x9360FBC3, 0x9361FBC3, 0x9362FBC3, 0x9363FBC3, 0x9364FBC3, 0x9365FBC3, 0x9366FBC3, 0x9367FBC3, 0x9368FBC3, 0x9369FBC3, 0x936AFBC3, + 0x936BFBC3, 0x936CFBC3, 0x936DFBC3, 0x936EFBC3, 0x936FFBC3, 0x9370FBC3, 0x9371FBC3, 0x9372FBC3, 0x9373FBC3, 0x9374FBC3, 0x9375FBC3, 0x9376FBC3, 0x9377FBC3, 0x9378FBC3, 0x9379FBC3, + 0x937AFBC3, 0x937BFBC3, 0x937CFBC3, 0x937DFBC3, 0x937EFBC3, 0x937FFBC3, 0x9380FBC3, 0x9381FBC3, 0x9382FBC3, 0x9383FBC3, 0x9384FBC3, 0x9385FBC3, 0x9386FBC3, 0x9387FBC3, 0x9388FBC3, + 0x9389FBC3, 0x938AFBC3, 0x938BFBC3, 0x938CFBC3, 0x938DFBC3, 0x938EFBC3, 0x938FFBC3, 0x9390FBC3, 0x9391FBC3, 0x9392FBC3, 0x9393FBC3, 0x9394FBC3, 0x9395FBC3, 0x9396FBC3, 0x9397FBC3, + 0x9398FBC3, 0x9399FBC3, 0x939AFBC3, 0x939BFBC3, 0x939CFBC3, 0x939DFBC3, 0x939EFBC3, 0x939FFBC3, 0x93A0FBC3, 0x93A1FBC3, 0x93A2FBC3, 0x93A3FBC3, 0x93A4FBC3, 0x93A5FBC3, 0x93A6FBC3, + 0x93A7FBC3, 0x93A8FBC3, 0x93A9FBC3, 0x93AAFBC3, 0x93ABFBC3, 0x93ACFBC3, 0x93ADFBC3, 0x93AEFBC3, 0x93AFFBC3, 0x93B0FBC3, 0x93B1FBC3, 0x93B2FBC3, 0x93B3FBC3, 0x93B4FBC3, 0x93B5FBC3, + 0x93B6FBC3, 0x93B7FBC3, 0x93B8FBC3, 0x93B9FBC3, 0x93BAFBC3, 0x93BBFBC3, 0x93BCFBC3, 0x93BDFBC3, 0x93BEFBC3, 0x93BFFBC3, 0x93C0FBC3, 0x93C1FBC3, 0x93C2FBC3, 0x93C3FBC3, 0x93C4FBC3, + 0x93C5FBC3, 0x93C6FBC3, 0x93C7FBC3, 0x93C8FBC3, 0x93C9FBC3, 0x93CAFBC3, 0x93CBFBC3, 0x93CCFBC3, 0x93CDFBC3, 0x93CEFBC3, 0x93CFFBC3, 0x93D0FBC3, 0x93D1FBC3, 0x93D2FBC3, 0x93D3FBC3, + 0x93D4FBC3, 0x93D5FBC3, 0x93D6FBC3, 0x93D7FBC3, 0x93D8FBC3, 0x93D9FBC3, 0x93DAFBC3, 0x93DBFBC3, 0x93DCFBC3, 0x93DDFBC3, 0x93DEFBC3, 0x93DFFBC3, 0x93E0FBC3, 0x93E1FBC3, 0x93E2FBC3, + 0x93E3FBC3, 0x93E4FBC3, 0x93E5FBC3, 0x93E6FBC3, 0x93E7FBC3, 0x93E8FBC3, 0x93E9FBC3, 0x93EAFBC3, 0x93EBFBC3, 0x93ECFBC3, 0x93EDFBC3, 0x93EEFBC3, 0x93EFFBC3, 0x93F0FBC3, 0x93F1FBC3, + 0x93F2FBC3, 0x93F3FBC3, 0x93F4FBC3, 0x93F5FBC3, 0x93F6FBC3, 0x93F7FBC3, 0x93F8FBC3, 0x93F9FBC3, 0x93FAFBC3, 0x93FBFBC3, 0x93FCFBC3, 0x93FDFBC3, 0x93FEFBC3, 0x93FFFBC3, 0x9400FBC3, + 0x9401FBC3, 0x9402FBC3, 0x9403FBC3, 0x9404FBC3, 0x9405FBC3, 0x9406FBC3, 0x9407FBC3, 0x9408FBC3, 0x9409FBC3, 0x940AFBC3, 0x940BFBC3, 0x940CFBC3, 0x940DFBC3, 0x940EFBC3, 0x940FFBC3, + 0x9410FBC3, 0x9411FBC3, 0x9412FBC3, 0x9413FBC3, 0x9414FBC3, 0x9415FBC3, 0x9416FBC3, 0x9417FBC3, 0x9418FBC3, 0x9419FBC3, 0x941AFBC3, 0x941BFBC3, 0x941CFBC3, 0x941DFBC3, 0x941EFBC3, + 0x941FFBC3, 0x9420FBC3, 0x9421FBC3, 0x9422FBC3, 0x9423FBC3, 0x9424FBC3, 0x9425FBC3, 0x9426FBC3, 0x9427FBC3, 0x9428FBC3, 0x9429FBC3, 0x942AFBC3, 0x942BFBC3, 0x942CFBC3, 0x942DFBC3, + 0x942EFBC3, 0x942FFBC3, 0x9430FBC3, 0x9431FBC3, 0x9432FBC3, 0x9433FBC3, 0x9434FBC3, 0x9435FBC3, 0x9436FBC3, 0x9437FBC3, 0x9438FBC3, 0x9439FBC3, 0x943AFBC3, 0x943BFBC3, 0x943CFBC3, + 0x943DFBC3, 0x943EFBC3, 0x943FFBC3, 0x9440FBC3, 0x9441FBC3, 0x9442FBC3, 0x9443FBC3, 0x9444FBC3, 0x9445FBC3, 0x9446FBC3, 0x9447FBC3, 0x9448FBC3, 0x9449FBC3, 0x944AFBC3, 0x944BFBC3, + 0x944CFBC3, 0x944DFBC3, 0x944EFBC3, 0x944FFBC3, 0x9450FBC3, 0x9451FBC3, 0x9452FBC3, 0x9453FBC3, 0x9454FBC3, 0x9455FBC3, 0x9456FBC3, 0x9457FBC3, 0x9458FBC3, 0x9459FBC3, 0x945AFBC3, + 0x945BFBC3, 0x945CFBC3, 0x945DFBC3, 0x945EFBC3, 0x945FFBC3, 0x9460FBC3, 0x9461FBC3, 0x9462FBC3, 0x9463FBC3, 0x9464FBC3, 0x9465FBC3, 0x9466FBC3, 0x9467FBC3, 0x9468FBC3, 0x9469FBC3, + 0x946AFBC3, 0x946BFBC3, 0x946CFBC3, 0x946DFBC3, 0x946EFBC3, 0x946FFBC3, 0x9470FBC3, 0x9471FBC3, 0x9472FBC3, 0x9473FBC3, 0x9474FBC3, 0x9475FBC3, 0x9476FBC3, 0x9477FBC3, 0x9478FBC3, + 0x9479FBC3, 0x947AFBC3, 0x947BFBC3, 0x947CFBC3, 0x947DFBC3, 0x947EFBC3, 0x947FFBC3, 0x9480FBC3, 0x9481FBC3, 0x9482FBC3, 0x9483FBC3, 0x9484FBC3, 0x9485FBC3, 0x9486FBC3, 0x9487FBC3, + 0x9488FBC3, 0x9489FBC3, 0x948AFBC3, 0x948BFBC3, 0x948CFBC3, 0x948DFBC3, 0x948EFBC3, 0x948FFBC3, 0x9490FBC3, 0x9491FBC3, 0x9492FBC3, 0x9493FBC3, 0x9494FBC3, 0x9495FBC3, 0x9496FBC3, + 0x9497FBC3, 0x9498FBC3, 0x9499FBC3, 0x949AFBC3, 0x949BFBC3, 0x949CFBC3, 0x949DFBC3, 0x949EFBC3, 0x949FFBC3, 0x94A0FBC3, 0x94A1FBC3, 0x94A2FBC3, 0x94A3FBC3, 0x94A4FBC3, 0x94A5FBC3, + 0x94A6FBC3, 0x94A7FBC3, 0x94A8FBC3, 0x94A9FBC3, 0x94AAFBC3, 0x94ABFBC3, 0x94ACFBC3, 0x94ADFBC3, 0x94AEFBC3, 0x94AFFBC3, 0x94B0FBC3, 0x94B1FBC3, 0x94B2FBC3, 0x94B3FBC3, 0x94B4FBC3, + 0x94B5FBC3, 0x94B6FBC3, 0x94B7FBC3, 0x94B8FBC3, 0x94B9FBC3, 0x94BAFBC3, 0x94BBFBC3, 0x94BCFBC3, 0x94BDFBC3, 0x94BEFBC3, 0x94BFFBC3, 0x94C0FBC3, 0x94C1FBC3, 0x94C2FBC3, 0x94C3FBC3, + 0x94C4FBC3, 0x94C5FBC3, 0x94C6FBC3, 0x94C7FBC3, 0x94C8FBC3, 0x94C9FBC3, 0x94CAFBC3, 0x94CBFBC3, 0x94CCFBC3, 0x94CDFBC3, 0x94CEFBC3, 0x94CFFBC3, 0x94D0FBC3, 0x94D1FBC3, 0x94D2FBC3, + 0x94D3FBC3, 0x94D4FBC3, 0x94D5FBC3, 0x94D6FBC3, 0x94D7FBC3, 0x94D8FBC3, 0x94D9FBC3, 0x94DAFBC3, 0x94DBFBC3, 0x94DCFBC3, 0x94DDFBC3, 0x94DEFBC3, 0x94DFFBC3, 0x94E0FBC3, 0x94E1FBC3, + 0x94E2FBC3, 0x94E3FBC3, 0x94E4FBC3, 0x94E5FBC3, 0x94E6FBC3, 0x94E7FBC3, 0x94E8FBC3, 0x94E9FBC3, 0x94EAFBC3, 0x94EBFBC3, 0x94ECFBC3, 0x94EDFBC3, 0x94EEFBC3, 0x94EFFBC3, 0x94F0FBC3, + 0x94F1FBC3, 0x94F2FBC3, 0x94F3FBC3, 0x94F4FBC3, 0x94F5FBC3, 0x94F6FBC3, 0x94F7FBC3, 0x94F8FBC3, 0x94F9FBC3, 0x94FAFBC3, 0x94FBFBC3, 0x94FCFBC3, 0x94FDFBC3, 0x94FEFBC3, 0x94FFFBC3, + 0x9500FBC3, 0x9501FBC3, 0x9502FBC3, 0x9503FBC3, 0x9504FBC3, 0x9505FBC3, 0x9506FBC3, 0x9507FBC3, 0x9508FBC3, 0x9509FBC3, 0x950AFBC3, 0x950BFBC3, 0x950CFBC3, 0x950DFBC3, 0x950EFBC3, + 0x950FFBC3, 0x9510FBC3, 0x9511FBC3, 0x9512FBC3, 0x9513FBC3, 0x9514FBC3, 0x9515FBC3, 0x9516FBC3, 0x9517FBC3, 0x9518FBC3, 0x9519FBC3, 0x951AFBC3, 0x951BFBC3, 0x951CFBC3, 0x951DFBC3, + 0x951EFBC3, 0x951FFBC3, 0x9520FBC3, 0x9521FBC3, 0x9522FBC3, 0x9523FBC3, 0x9524FBC3, 0x9525FBC3, 0x9526FBC3, 0x9527FBC3, 0x9528FBC3, 0x9529FBC3, 0x952AFBC3, 0x952BFBC3, 0x952CFBC3, + 0x952DFBC3, 0x952EFBC3, 0x952FFBC3, 0x9530FBC3, 0x9531FBC3, 0x9532FBC3, 0x9533FBC3, 0x9534FBC3, 0x9535FBC3, 0x9536FBC3, 0x9537FBC3, 0x9538FBC3, 0x9539FBC3, 0x953AFBC3, 0x953BFBC3, + 0x953CFBC3, 0x953DFBC3, 0x953EFBC3, 0x953FFBC3, 0x9540FBC3, 0x9541FBC3, 0x9542FBC3, 0x9543FBC3, 0x9544FBC3, 0x9545FBC3, 0x9546FBC3, 0x9547FBC3, 0x9548FBC3, 0x9549FBC3, 0x954AFBC3, + 0x954BFBC3, 0x954CFBC3, 0x954DFBC3, 0x954EFBC3, 0x954FFBC3, 0x9550FBC3, 0x9551FBC3, 0x9552FBC3, 0x9553FBC3, 0x9554FBC3, 0x9555FBC3, 0x9556FBC3, 0x9557FBC3, 0x9558FBC3, 0x9559FBC3, + 0x955AFBC3, 0x955BFBC3, 0x955CFBC3, 0x955DFBC3, 0x955EFBC3, 0x955FFBC3, 0x9560FBC3, 0x9561FBC3, 0x9562FBC3, 0x9563FBC3, 0x9564FBC3, 0x9565FBC3, 0x9566FBC3, 0x9567FBC3, 0x9568FBC3, + 0x9569FBC3, 0x956AFBC3, 0x956BFBC3, 0x956CFBC3, 0x956DFBC3, 0x956EFBC3, 0x956FFBC3, 0x9570FBC3, 0x9571FBC3, 0x9572FBC3, 0x9573FBC3, 0x9574FBC3, 0x9575FBC3, 0x9576FBC3, 0x9577FBC3, + 0x9578FBC3, 0x9579FBC3, 0x957AFBC3, 0x957BFBC3, 0x957CFBC3, 0x957DFBC3, 0x957EFBC3, 0x957FFBC3, 0x9580FBC3, 0x9581FBC3, 0x9582FBC3, 0x9583FBC3, 0x9584FBC3, 0x9585FBC3, 0x9586FBC3, + 0x9587FBC3, 0x9588FBC3, 0x9589FBC3, 0x958AFBC3, 0x958BFBC3, 0x958CFBC3, 0x958DFBC3, 0x958EFBC3, 0x958FFBC3, 0x9590FBC3, 0x9591FBC3, 0x9592FBC3, 0x9593FBC3, 0x9594FBC3, 0x9595FBC3, + 0x9596FBC3, 0x9597FBC3, 0x9598FBC3, 0x9599FBC3, 0x959AFBC3, 0x959BFBC3, 0x959CFBC3, 0x959DFBC3, 0x959EFBC3, 0x959FFBC3, 0x95A0FBC3, 0x95A1FBC3, 0x95A2FBC3, 0x95A3FBC3, 0x95A4FBC3, + 0x95A5FBC3, 0x95A6FBC3, 0x95A7FBC3, 0x95A8FBC3, 0x95A9FBC3, 0x95AAFBC3, 0x95ABFBC3, 0x95ACFBC3, 0x95ADFBC3, 0x95AEFBC3, 0x95AFFBC3, 0x95B0FBC3, 0x95B1FBC3, 0x95B2FBC3, 0x95B3FBC3, + 0x95B4FBC3, 0x95B5FBC3, 0x95B6FBC3, 0x95B7FBC3, 0x95B8FBC3, 0x95B9FBC3, 0x95BAFBC3, 0x95BBFBC3, 0x95BCFBC3, 0x95BDFBC3, 0x95BEFBC3, 0x95BFFBC3, 0x95C0FBC3, 0x95C1FBC3, 0x95C2FBC3, + 0x95C3FBC3, 0x95C4FBC3, 0x95C5FBC3, 0x95C6FBC3, 0x95C7FBC3, 0x95C8FBC3, 0x95C9FBC3, 0x95CAFBC3, 0x95CBFBC3, 0x95CCFBC3, 0x95CDFBC3, 0x95CEFBC3, 0x95CFFBC3, 0x95D0FBC3, 0x95D1FBC3, + 0x95D2FBC3, 0x95D3FBC3, 0x95D4FBC3, 0x95D5FBC3, 0x95D6FBC3, 0x95D7FBC3, 0x95D8FBC3, 0x95D9FBC3, 0x95DAFBC3, 0x95DBFBC3, 0x95DCFBC3, 0x95DDFBC3, 0x95DEFBC3, 0x95DFFBC3, 0x95E0FBC3, + 0x95E1FBC3, 0x95E2FBC3, 0x95E3FBC3, 0x95E4FBC3, 0x95E5FBC3, 0x95E6FBC3, 0x95E7FBC3, 0x95E8FBC3, 0x95E9FBC3, 0x95EAFBC3, 0x95EBFBC3, 0x95ECFBC3, 0x95EDFBC3, 0x95EEFBC3, 0x95EFFBC3, + 0x95F0FBC3, 0x95F1FBC3, 0x95F2FBC3, 0x95F3FBC3, 0x95F4FBC3, 0x95F5FBC3, 0x95F6FBC3, 0x95F7FBC3, 0x95F8FBC3, 0x95F9FBC3, 0x95FAFBC3, 0x95FBFBC3, 0x95FCFBC3, 0x95FDFBC3, 0x95FEFBC3, + 0x95FFFBC3, 0x9600FBC3, 0x9601FBC3, 0x9602FBC3, 0x9603FBC3, 0x9604FBC3, 0x9605FBC3, 0x9606FBC3, 0x9607FBC3, 0x9608FBC3, 0x9609FBC3, 0x960AFBC3, 0x960BFBC3, 0x960CFBC3, 0x960DFBC3, + 0x960EFBC3, 0x960FFBC3, 0x9610FBC3, 0x9611FBC3, 0x9612FBC3, 0x9613FBC3, 0x9614FBC3, 0x9615FBC3, 0x9616FBC3, 0x9617FBC3, 0x9618FBC3, 0x9619FBC3, 0x961AFBC3, 0x961BFBC3, 0x961CFBC3, + 0x961DFBC3, 0x961EFBC3, 0x961FFBC3, 0x9620FBC3, 0x9621FBC3, 0x9622FBC3, 0x9623FBC3, 0x9624FBC3, 0x9625FBC3, 0x9626FBC3, 0x9627FBC3, 0x9628FBC3, 0x9629FBC3, 0x962AFBC3, 0x962BFBC3, + 0x962CFBC3, 0x962DFBC3, 0x962EFBC3, 0x962FFBC3, 0x9630FBC3, 0x9631FBC3, 0x9632FBC3, 0x9633FBC3, 0x9634FBC3, 0x9635FBC3, 0x9636FBC3, 0x9637FBC3, 0x9638FBC3, 0x9639FBC3, 0x963AFBC3, + 0x963BFBC3, 0x963CFBC3, 0x963DFBC3, 0x963EFBC3, 0x963FFBC3, 0x9640FBC3, 0x9641FBC3, 0x9642FBC3, 0x9643FBC3, 0x9644FBC3, 0x9645FBC3, 0x9646FBC3, 0x9647FBC3, 0x9648FBC3, 0x9649FBC3, + 0x964AFBC3, 0x964BFBC3, 0x964CFBC3, 0x964DFBC3, 0x964EFBC3, 0x964FFBC3, 0x9650FBC3, 0x9651FBC3, 0x9652FBC3, 0x9653FBC3, 0x9654FBC3, 0x9655FBC3, 0x9656FBC3, 0x9657FBC3, 0x9658FBC3, + 0x9659FBC3, 0x965AFBC3, 0x965BFBC3, 0x965CFBC3, 0x965DFBC3, 0x965EFBC3, 0x965FFBC3, 0x9660FBC3, 0x9661FBC3, 0x9662FBC3, 0x9663FBC3, 0x9664FBC3, 0x9665FBC3, 0x9666FBC3, 0x9667FBC3, + 0x9668FBC3, 0x9669FBC3, 0x966AFBC3, 0x966BFBC3, 0x966CFBC3, 0x966DFBC3, 0x966EFBC3, 0x966FFBC3, 0x9670FBC3, 0x9671FBC3, 0x9672FBC3, 0x9673FBC3, 0x9674FBC3, 0x9675FBC3, 0x9676FBC3, + 0x9677FBC3, 0x9678FBC3, 0x9679FBC3, 0x967AFBC3, 0x967BFBC3, 0x967CFBC3, 0x967DFBC3, 0x967EFBC3, 0x967FFBC3, 0x9680FBC3, 0x9681FBC3, 0x9682FBC3, 0x9683FBC3, 0x9684FBC3, 0x9685FBC3, + 0x9686FBC3, 0x9687FBC3, 0x9688FBC3, 0x9689FBC3, 0x968AFBC3, 0x968BFBC3, 0x968CFBC3, 0x968DFBC3, 0x968EFBC3, 0x968FFBC3, 0x9690FBC3, 0x9691FBC3, 0x9692FBC3, 0x9693FBC3, 0x9694FBC3, + 0x9695FBC3, 0x9696FBC3, 0x9697FBC3, 0x9698FBC3, 0x9699FBC3, 0x969AFBC3, 0x969BFBC3, 0x969CFBC3, 0x969DFBC3, 0x969EFBC3, 0x969FFBC3, 0x96A0FBC3, 0x96A1FBC3, 0x96A2FBC3, 0x96A3FBC3, + 0x96A4FBC3, 0x96A5FBC3, 0x96A6FBC3, 0x96A7FBC3, 0x96A8FBC3, 0x96A9FBC3, 0x96AAFBC3, 0x96ABFBC3, 0x96ACFBC3, 0x96ADFBC3, 0x96AEFBC3, 0x96AFFBC3, 0x96B0FBC3, 0x96B1FBC3, 0x96B2FBC3, + 0x96B3FBC3, 0x96B4FBC3, 0x96B5FBC3, 0x96B6FBC3, 0x96B7FBC3, 0x96B8FBC3, 0x96B9FBC3, 0x96BAFBC3, 0x96BBFBC3, 0x96BCFBC3, 0x96BDFBC3, 0x96BEFBC3, 0x96BFFBC3, 0x96C0FBC3, 0x96C1FBC3, + 0x96C2FBC3, 0x96C3FBC3, 0x96C4FBC3, 0x96C5FBC3, 0x96C6FBC3, 0x96C7FBC3, 0x96C8FBC3, 0x96C9FBC3, 0x96CAFBC3, 0x96CBFBC3, 0x96CCFBC3, 0x96CDFBC3, 0x96CEFBC3, 0x96CFFBC3, 0x96D0FBC3, + 0x96D1FBC3, 0x96D2FBC3, 0x96D3FBC3, 0x96D4FBC3, 0x96D5FBC3, 0x96D6FBC3, 0x96D7FBC3, 0x96D8FBC3, 0x96D9FBC3, 0x96DAFBC3, 0x96DBFBC3, 0x96DCFBC3, 0x96DDFBC3, 0x96DEFBC3, 0x96DFFBC3, + 0x96E0FBC3, 0x96E1FBC3, 0x96E2FBC3, 0x96E3FBC3, 0x96E4FBC3, 0x96E5FBC3, 0x96E6FBC3, 0x96E7FBC3, 0x96E8FBC3, 0x96E9FBC3, 0x96EAFBC3, 0x96EBFBC3, 0x96ECFBC3, 0x96EDFBC3, 0x96EEFBC3, + 0x96EFFBC3, 0x96F0FBC3, 0x96F1FBC3, 0x96F2FBC3, 0x96F3FBC3, 0x96F4FBC3, 0x96F5FBC3, 0x96F6FBC3, 0x96F7FBC3, 0x96F8FBC3, 0x96F9FBC3, 0x96FAFBC3, 0x96FBFBC3, 0x96FCFBC3, 0x96FDFBC3, + 0x96FEFBC3, 0x96FFFBC3, 0x9700FBC3, 0x9701FBC3, 0x9702FBC3, 0x9703FBC3, 0x9704FBC3, 0x9705FBC3, 0x9706FBC3, 0x9707FBC3, 0x9708FBC3, 0x9709FBC3, 0x970AFBC3, 0x970BFBC3, 0x970CFBC3, + 0x970DFBC3, 0x970EFBC3, 0x970FFBC3, 0x9710FBC3, 0x9711FBC3, 0x9712FBC3, 0x9713FBC3, 0x9714FBC3, 0x9715FBC3, 0x9716FBC3, 0x9717FBC3, 0x9718FBC3, 0x9719FBC3, 0x971AFBC3, 0x971BFBC3, + 0x971CFBC3, 0x971DFBC3, 0x971EFBC3, 0x971FFBC3, 0x9720FBC3, 0x9721FBC3, 0x9722FBC3, 0x9723FBC3, 0x9724FBC3, 0x9725FBC3, 0x9726FBC3, 0x9727FBC3, 0x9728FBC3, 0x9729FBC3, 0x972AFBC3, + 0x972BFBC3, 0x972CFBC3, 0x972DFBC3, 0x972EFBC3, 0x972FFBC3, 0x9730FBC3, 0x9731FBC3, 0x9732FBC3, 0x9733FBC3, 0x9734FBC3, 0x9735FBC3, 0x9736FBC3, 0x9737FBC3, 0x9738FBC3, 0x9739FBC3, + 0x973AFBC3, 0x973BFBC3, 0x973CFBC3, 0x973DFBC3, 0x973EFBC3, 0x973FFBC3, 0x9740FBC3, 0x9741FBC3, 0x9742FBC3, 0x9743FBC3, 0x9744FBC3, 0x9745FBC3, 0x9746FBC3, 0x9747FBC3, 0x9748FBC3, + 0x9749FBC3, 0x974AFBC3, 0x974BFBC3, 0x974CFBC3, 0x974DFBC3, 0x974EFBC3, 0x974FFBC3, 0x9750FBC3, 0x9751FBC3, 0x9752FBC3, 0x9753FBC3, 0x9754FBC3, 0x9755FBC3, 0x9756FBC3, 0x9757FBC3, + 0x9758FBC3, 0x9759FBC3, 0x975AFBC3, 0x975BFBC3, 0x975CFBC3, 0x975DFBC3, 0x975EFBC3, 0x975FFBC3, 0x9760FBC3, 0x9761FBC3, 0x9762FBC3, 0x9763FBC3, 0x9764FBC3, 0x9765FBC3, 0x9766FBC3, + 0x9767FBC3, 0x9768FBC3, 0x9769FBC3, 0x976AFBC3, 0x976BFBC3, 0x976CFBC3, 0x976DFBC3, 0x976EFBC3, 0x976FFBC3, 0x9770FBC3, 0x9771FBC3, 0x9772FBC3, 0x9773FBC3, 0x9774FBC3, 0x9775FBC3, + 0x9776FBC3, 0x9777FBC3, 0x9778FBC3, 0x9779FBC3, 0x977AFBC3, 0x977BFBC3, 0x977CFBC3, 0x977DFBC3, 0x977EFBC3, 0x977FFBC3, 0x9780FBC3, 0x9781FBC3, 0x9782FBC3, 0x9783FBC3, 0x9784FBC3, + 0x9785FBC3, 0x9786FBC3, 0x9787FBC3, 0x9788FBC3, 0x9789FBC3, 0x978AFBC3, 0x978BFBC3, 0x978CFBC3, 0x978DFBC3, 0x978EFBC3, 0x978FFBC3, 0x9790FBC3, 0x9791FBC3, 0x9792FBC3, 0x9793FBC3, + 0x9794FBC3, 0x9795FBC3, 0x9796FBC3, 0x9797FBC3, 0x9798FBC3, 0x9799FBC3, 0x979AFBC3, 0x979BFBC3, 0x979CFBC3, 0x979DFBC3, 0x979EFBC3, 0x979FFBC3, 0x97A0FBC3, 0x97A1FBC3, 0x97A2FBC3, + 0x97A3FBC3, 0x97A4FBC3, 0x97A5FBC3, 0x97A6FBC3, 0x97A7FBC3, 0x97A8FBC3, 0x97A9FBC3, 0x97AAFBC3, 0x97ABFBC3, 0x97ACFBC3, 0x97ADFBC3, 0x97AEFBC3, 0x97AFFBC3, 0x97B0FBC3, 0x97B1FBC3, + 0x97B2FBC3, 0x97B3FBC3, 0x97B4FBC3, 0x97B5FBC3, 0x97B6FBC3, 0x97B7FBC3, 0x97B8FBC3, 0x97B9FBC3, 0x97BAFBC3, 0x97BBFBC3, 0x97BCFBC3, 0x97BDFBC3, 0x97BEFBC3, 0x97BFFBC3, 0x97C0FBC3, + 0x97C1FBC3, 0x97C2FBC3, 0x97C3FBC3, 0x97C4FBC3, 0x97C5FBC3, 0x97C6FBC3, 0x97C7FBC3, 0x97C8FBC3, 0x97C9FBC3, 0x97CAFBC3, 0x97CBFBC3, 0x97CCFBC3, 0x97CDFBC3, 0x97CEFBC3, 0x97CFFBC3, + 0x97D0FBC3, 0x97D1FBC3, 0x97D2FBC3, 0x97D3FBC3, 0x97D4FBC3, 0x97D5FBC3, 0x97D6FBC3, 0x97D7FBC3, 0x97D8FBC3, 0x97D9FBC3, 0x97DAFBC3, 0x97DBFBC3, 0x97DCFBC3, 0x97DDFBC3, 0x97DEFBC3, + 0x97DFFBC3, 0x97E0FBC3, 0x97E1FBC3, 0x97E2FBC3, 0x97E3FBC3, 0x97E4FBC3, 0x97E5FBC3, 0x97E6FBC3, 0x97E7FBC3, 0x97E8FBC3, 0x97E9FBC3, 0x97EAFBC3, 0x97EBFBC3, 0x97ECFBC3, 0x97EDFBC3, + 0x97EEFBC3, 0x97EFFBC3, 0x97F0FBC3, 0x97F1FBC3, 0x97F2FBC3, 0x97F3FBC3, 0x97F4FBC3, 0x97F5FBC3, 0x97F6FBC3, 0x97F7FBC3, 0x97F8FBC3, 0x97F9FBC3, 0x97FAFBC3, 0x97FBFBC3, 0x97FCFBC3, + 0x97FDFBC3, 0x97FEFBC3, 0x97FFFBC3, 0x9800FBC3, 0x9801FBC3, 0x9802FBC3, 0x9803FBC3, 0x9804FBC3, 0x9805FBC3, 0x9806FBC3, 0x9807FBC3, 0x9808FBC3, 0x9809FBC3, 0x980AFBC3, 0x980BFBC3, + 0x980CFBC3, 0x980DFBC3, 0x980EFBC3, 0x980FFBC3, 0x9810FBC3, 0x9811FBC3, 0x9812FBC3, 0x9813FBC3, 0x9814FBC3, 0x9815FBC3, 0x9816FBC3, 0x9817FBC3, 0x9818FBC3, 0x9819FBC3, 0x981AFBC3, + 0x981BFBC3, 0x981CFBC3, 0x981DFBC3, 0x981EFBC3, 0x981FFBC3, 0x9820FBC3, 0x9821FBC3, 0x9822FBC3, 0x9823FBC3, 0x9824FBC3, 0x9825FBC3, 0x9826FBC3, 0x9827FBC3, 0x9828FBC3, 0x9829FBC3, + 0x982AFBC3, 0x982BFBC3, 0x982CFBC3, 0x982DFBC3, 0x982EFBC3, 0x982FFBC3, 0x9830FBC3, 0x9831FBC3, 0x9832FBC3, 0x9833FBC3, 0x9834FBC3, 0x9835FBC3, 0x9836FBC3, 0x9837FBC3, 0x9838FBC3, + 0x9839FBC3, 0x983AFBC3, 0x983BFBC3, 0x983CFBC3, 0x983DFBC3, 0x983EFBC3, 0x983FFBC3, 0x9840FBC3, 0x9841FBC3, 0x9842FBC3, 0x9843FBC3, 0x9844FBC3, 0x9845FBC3, 0x9846FBC3, 0x9847FBC3, + 0x9848FBC3, 0x9849FBC3, 0x984AFBC3, 0x984BFBC3, 0x984CFBC3, 0x984DFBC3, 0x984EFBC3, 0x984FFBC3, 0x9850FBC3, 0x9851FBC3, 0x9852FBC3, 0x9853FBC3, 0x9854FBC3, 0x9855FBC3, 0x9856FBC3, + 0x9857FBC3, 0x9858FBC3, 0x9859FBC3, 0x985AFBC3, 0x985BFBC3, 0x985CFBC3, 0x985DFBC3, 0x985EFBC3, 0x985FFBC3, 0x9860FBC3, 0x9861FBC3, 0x9862FBC3, 0x9863FBC3, 0x9864FBC3, 0x9865FBC3, + 0x9866FBC3, 0x9867FBC3, 0x9868FBC3, 0x9869FBC3, 0x986AFBC3, 0x986BFBC3, 0x986CFBC3, 0x986DFBC3, 0x986EFBC3, 0x986FFBC3, 0x9870FBC3, 0x9871FBC3, 0x9872FBC3, 0x9873FBC3, 0x9874FBC3, + 0x9875FBC3, 0x9876FBC3, 0x9877FBC3, 0x9878FBC3, 0x9879FBC3, 0x987AFBC3, 0x987BFBC3, 0x987CFBC3, 0x987DFBC3, 0x987EFBC3, 0x987FFBC3, 0x9880FBC3, 0x9881FBC3, 0x9882FBC3, 0x9883FBC3, + 0x9884FBC3, 0x9885FBC3, 0x9886FBC3, 0x9887FBC3, 0x9888FBC3, 0x9889FBC3, 0x988AFBC3, 0x988BFBC3, 0x988CFBC3, 0x988DFBC3, 0x988EFBC3, 0x988FFBC3, 0x9890FBC3, 0x9891FBC3, 0x9892FBC3, + 0x9893FBC3, 0x9894FBC3, 0x9895FBC3, 0x9896FBC3, 0x9897FBC3, 0x9898FBC3, 0x9899FBC3, 0x989AFBC3, 0x989BFBC3, 0x989CFBC3, 0x989DFBC3, 0x989EFBC3, 0x989FFBC3, 0x98A0FBC3, 0x98A1FBC3, + 0x98A2FBC3, 0x98A3FBC3, 0x98A4FBC3, 0x98A5FBC3, 0x98A6FBC3, 0x98A7FBC3, 0x98A8FBC3, 0x98A9FBC3, 0x98AAFBC3, 0x98ABFBC3, 0x98ACFBC3, 0x98ADFBC3, 0x98AEFBC3, 0x98AFFBC3, 0x98B0FBC3, + 0x98B1FBC3, 0x98B2FBC3, 0x98B3FBC3, 0x98B4FBC3, 0x98B5FBC3, 0x98B6FBC3, 0x98B7FBC3, 0x98B8FBC3, 0x98B9FBC3, 0x98BAFBC3, 0x98BBFBC3, 0x98BCFBC3, 0x98BDFBC3, 0x98BEFBC3, 0x98BFFBC3, + 0x98C0FBC3, 0x98C1FBC3, 0x98C2FBC3, 0x98C3FBC3, 0x98C4FBC3, 0x98C5FBC3, 0x98C6FBC3, 0x98C7FBC3, 0x98C8FBC3, 0x98C9FBC3, 0x98CAFBC3, 0x98CBFBC3, 0x98CCFBC3, 0x98CDFBC3, 0x98CEFBC3, + 0x98CFFBC3, 0x98D0FBC3, 0x98D1FBC3, 0x98D2FBC3, 0x98D3FBC3, 0x98D4FBC3, 0x98D5FBC3, 0x98D6FBC3, 0x98D7FBC3, 0x98D8FBC3, 0x98D9FBC3, 0x98DAFBC3, 0x98DBFBC3, 0x98DCFBC3, 0x98DDFBC3, + 0x98DEFBC3, 0x98DFFBC3, 0x98E0FBC3, 0x98E1FBC3, 0x98E2FBC3, 0x98E3FBC3, 0x98E4FBC3, 0x98E5FBC3, 0x98E6FBC3, 0x98E7FBC3, 0x98E8FBC3, 0x98E9FBC3, 0x98EAFBC3, 0x98EBFBC3, 0x98ECFBC3, + 0x98EDFBC3, 0x98EEFBC3, 0x98EFFBC3, 0x98F0FBC3, 0x98F1FBC3, 0x98F2FBC3, 0x98F3FBC3, 0x98F4FBC3, 0x98F5FBC3, 0x98F6FBC3, 0x98F7FBC3, 0x98F8FBC3, 0x98F9FBC3, 0x98FAFBC3, 0x98FBFBC3, + 0x98FCFBC3, 0x98FDFBC3, 0x98FEFBC3, 0x98FFFBC3, 0x9900FBC3, 0x9901FBC3, 0x9902FBC3, 0x9903FBC3, 0x9904FBC3, 0x9905FBC3, 0x9906FBC3, 0x9907FBC3, 0x9908FBC3, 0x9909FBC3, 0x990AFBC3, + 0x990BFBC3, 0x990CFBC3, 0x990DFBC3, 0x990EFBC3, 0x990FFBC3, 0x9910FBC3, 0x9911FBC3, 0x9912FBC3, 0x9913FBC3, 0x9914FBC3, 0x9915FBC3, 0x9916FBC3, 0x9917FBC3, 0x9918FBC3, 0x9919FBC3, + 0x991AFBC3, 0x991BFBC3, 0x991CFBC3, 0x991DFBC3, 0x991EFBC3, 0x991FFBC3, 0x9920FBC3, 0x9921FBC3, 0x9922FBC3, 0x9923FBC3, 0x9924FBC3, 0x9925FBC3, 0x9926FBC3, 0x9927FBC3, 0x9928FBC3, + 0x9929FBC3, 0x992AFBC3, 0x992BFBC3, 0x992CFBC3, 0x992DFBC3, 0x992EFBC3, 0x992FFBC3, 0x9930FBC3, 0x9931FBC3, 0x9932FBC3, 0x9933FBC3, 0x9934FBC3, 0x9935FBC3, 0x9936FBC3, 0x9937FBC3, + 0x9938FBC3, 0x9939FBC3, 0x993AFBC3, 0x993BFBC3, 0x993CFBC3, 0x993DFBC3, 0x993EFBC3, 0x993FFBC3, 0x9940FBC3, 0x9941FBC3, 0x9942FBC3, 0x9943FBC3, 0x9944FBC3, 0x9945FBC3, 0x9946FBC3, + 0x9947FBC3, 0x9948FBC3, 0x9949FBC3, 0x994AFBC3, 0x994BFBC3, 0x994CFBC3, 0x994DFBC3, 0x994EFBC3, 0x994FFBC3, 0x9950FBC3, 0x9951FBC3, 0x9952FBC3, 0x9953FBC3, 0x9954FBC3, 0x9955FBC3, + 0x9956FBC3, 0x9957FBC3, 0x9958FBC3, 0x9959FBC3, 0x995AFBC3, 0x995BFBC3, 0x995CFBC3, 0x995DFBC3, 0x995EFBC3, 0x995FFBC3, 0x9960FBC3, 0x9961FBC3, 0x9962FBC3, 0x9963FBC3, 0x9964FBC3, + 0x9965FBC3, 0x9966FBC3, 0x9967FBC3, 0x9968FBC3, 0x9969FBC3, 0x996AFBC3, 0x996BFBC3, 0x996CFBC3, 0x996DFBC3, 0x996EFBC3, 0x996FFBC3, 0x9970FBC3, 0x9971FBC3, 0x9972FBC3, 0x9973FBC3, + 0x9974FBC3, 0x9975FBC3, 0x9976FBC3, 0x9977FBC3, 0x9978FBC3, 0x9979FBC3, 0x997AFBC3, 0x997BFBC3, 0x997CFBC3, 0x997DFBC3, 0x997EFBC3, 0x997FFBC3, 0x9980FBC3, 0x9981FBC3, 0x9982FBC3, + 0x9983FBC3, 0x9984FBC3, 0x9985FBC3, 0x9986FBC3, 0x9987FBC3, 0x9988FBC3, 0x9989FBC3, 0x998AFBC3, 0x998BFBC3, 0x998CFBC3, 0x998DFBC3, 0x998EFBC3, 0x998FFBC3, 0x9990FBC3, 0x9991FBC3, + 0x9992FBC3, 0x9993FBC3, 0x9994FBC3, 0x9995FBC3, 0x9996FBC3, 0x9997FBC3, 0x9998FBC3, 0x9999FBC3, 0x999AFBC3, 0x999BFBC3, 0x999CFBC3, 0x999DFBC3, 0x999EFBC3, 0x999FFBC3, 0x99A0FBC3, + 0x99A1FBC3, 0x99A2FBC3, 0x99A3FBC3, 0x99A4FBC3, 0x99A5FBC3, 0x99A6FBC3, 0x99A7FBC3, 0x99A8FBC3, 0x99A9FBC3, 0x99AAFBC3, 0x99ABFBC3, 0x99ACFBC3, 0x99ADFBC3, 0x99AEFBC3, 0x99AFFBC3, + 0x99B0FBC3, 0x99B1FBC3, 0x99B2FBC3, 0x99B3FBC3, 0x99B4FBC3, 0x99B5FBC3, 0x99B6FBC3, 0x99B7FBC3, 0x99B8FBC3, 0x99B9FBC3, 0x99BAFBC3, 0x99BBFBC3, 0x99BCFBC3, 0x99BDFBC3, 0x99BEFBC3, + 0x99BFFBC3, 0x99C0FBC3, 0x99C1FBC3, 0x99C2FBC3, 0x99C3FBC3, 0x99C4FBC3, 0x99C5FBC3, 0x99C6FBC3, 0x99C7FBC3, 0x99C8FBC3, 0x99C9FBC3, 0x99CAFBC3, 0x99CBFBC3, 0x99CCFBC3, 0x99CDFBC3, + 0x99CEFBC3, 0x99CFFBC3, 0x99D0FBC3, 0x99D1FBC3, 0x99D2FBC3, 0x99D3FBC3, 0x99D4FBC3, 0x99D5FBC3, 0x99D6FBC3, 0x99D7FBC3, 0x99D8FBC3, 0x99D9FBC3, 0x99DAFBC3, 0x99DBFBC3, 0x99DCFBC3, + 0x99DDFBC3, 0x99DEFBC3, 0x99DFFBC3, 0x99E0FBC3, 0x99E1FBC3, 0x99E2FBC3, 0x99E3FBC3, 0x99E4FBC3, 0x99E5FBC3, 0x99E6FBC3, 0x99E7FBC3, 0x99E8FBC3, 0x99E9FBC3, 0x99EAFBC3, 0x99EBFBC3, + 0x99ECFBC3, 0x99EDFBC3, 0x99EEFBC3, 0x99EFFBC3, 0x99F0FBC3, 0x99F1FBC3, 0x99F2FBC3, 0x99F3FBC3, 0x99F4FBC3, 0x99F5FBC3, 0x99F6FBC3, 0x99F7FBC3, 0x99F8FBC3, 0x99F9FBC3, 0x99FAFBC3, + 0x99FBFBC3, 0x99FCFBC3, 0x99FDFBC3, 0x99FEFBC3, 0x99FFFBC3, 0x9A00FBC3, 0x9A01FBC3, 0x9A02FBC3, 0x9A03FBC3, 0x9A04FBC3, 0x9A05FBC3, 0x9A06FBC3, 0x9A07FBC3, 0x9A08FBC3, 0x9A09FBC3, + 0x9A0AFBC3, 0x9A0BFBC3, 0x9A0CFBC3, 0x9A0DFBC3, 0x9A0EFBC3, 0x9A0FFBC3, 0x9A10FBC3, 0x9A11FBC3, 0x9A12FBC3, 0x9A13FBC3, 0x9A14FBC3, 0x9A15FBC3, 0x9A16FBC3, 0x9A17FBC3, 0x9A18FBC3, + 0x9A19FBC3, 0x9A1AFBC3, 0x9A1BFBC3, 0x9A1CFBC3, 0x9A1DFBC3, 0x9A1EFBC3, 0x9A1FFBC3, 0x9A20FBC3, 0x9A21FBC3, 0x9A22FBC3, 0x9A23FBC3, 0x9A24FBC3, 0x9A25FBC3, 0x9A26FBC3, 0x9A27FBC3, + 0x9A28FBC3, 0x9A29FBC3, 0x9A2AFBC3, 0x9A2BFBC3, 0x9A2CFBC3, 0x9A2DFBC3, 0x9A2EFBC3, 0x9A2FFBC3, 0x9A30FBC3, 0x9A31FBC3, 0x9A32FBC3, 0x9A33FBC3, 0x9A34FBC3, 0x9A35FBC3, 0x9A36FBC3, + 0x9A37FBC3, 0x9A38FBC3, 0x9A39FBC3, 0x9A3AFBC3, 0x9A3BFBC3, 0x9A3CFBC3, 0x9A3DFBC3, 0x9A3EFBC3, 0x9A3FFBC3, 0x9A40FBC3, 0x9A41FBC3, 0x9A42FBC3, 0x9A43FBC3, 0x9A44FBC3, 0x9A45FBC3, + 0x9A46FBC3, 0x9A47FBC3, 0x9A48FBC3, 0x9A49FBC3, 0x9A4AFBC3, 0x9A4BFBC3, 0x9A4CFBC3, 0x9A4DFBC3, 0x9A4EFBC3, 0x9A4FFBC3, 0x9A50FBC3, 0x9A51FBC3, 0x9A52FBC3, 0x9A53FBC3, 0x9A54FBC3, + 0x9A55FBC3, 0x9A56FBC3, 0x9A57FBC3, 0x9A58FBC3, 0x9A59FBC3, 0x9A5AFBC3, 0x9A5BFBC3, 0x9A5CFBC3, 0x9A5DFBC3, 0x9A5EFBC3, 0x9A5FFBC3, 0x9A60FBC3, 0x9A61FBC3, 0x9A62FBC3, 0x9A63FBC3, + 0x9A64FBC3, 0x9A65FBC3, 0x9A66FBC3, 0x9A67FBC3, 0x9A68FBC3, 0x9A69FBC3, 0x9A6AFBC3, 0x9A6BFBC3, 0x9A6CFBC3, 0x9A6DFBC3, 0x9A6EFBC3, 0x9A6FFBC3, 0x9A70FBC3, 0x9A71FBC3, 0x9A72FBC3, + 0x9A73FBC3, 0x9A74FBC3, 0x9A75FBC3, 0x9A76FBC3, 0x9A77FBC3, 0x9A78FBC3, 0x9A79FBC3, 0x9A7AFBC3, 0x9A7BFBC3, 0x9A7CFBC3, 0x9A7DFBC3, 0x9A7EFBC3, 0x9A7FFBC3, 0x9A80FBC3, 0x9A81FBC3, + 0x9A82FBC3, 0x9A83FBC3, 0x9A84FBC3, 0x9A85FBC3, 0x9A86FBC3, 0x9A87FBC3, 0x9A88FBC3, 0x9A89FBC3, 0x9A8AFBC3, 0x9A8BFBC3, 0x9A8CFBC3, 0x9A8DFBC3, 0x9A8EFBC3, 0x9A8FFBC3, 0x9A90FBC3, + 0x9A91FBC3, 0x9A92FBC3, 0x9A93FBC3, 0x9A94FBC3, 0x9A95FBC3, 0x9A96FBC3, 0x9A97FBC3, 0x9A98FBC3, 0x9A99FBC3, 0x9A9AFBC3, 0x9A9BFBC3, 0x9A9CFBC3, 0x9A9DFBC3, 0x9A9EFBC3, 0x9A9FFBC3, + 0x9AA0FBC3, 0x9AA1FBC3, 0x9AA2FBC3, 0x9AA3FBC3, 0x9AA4FBC3, 0x9AA5FBC3, 0x9AA6FBC3, 0x9AA7FBC3, 0x9AA8FBC3, 0x9AA9FBC3, 0x9AAAFBC3, 0x9AABFBC3, 0x9AACFBC3, 0x9AADFBC3, 0x9AAEFBC3, + 0x9AAFFBC3, 0x9AB0FBC3, 0x9AB1FBC3, 0x9AB2FBC3, 0x9AB3FBC3, 0x9AB4FBC3, 0x9AB5FBC3, 0x9AB6FBC3, 0x9AB7FBC3, 0x9AB8FBC3, 0x9AB9FBC3, 0x9ABAFBC3, 0x9ABBFBC3, 0x9ABCFBC3, 0x9ABDFBC3, + 0x9ABEFBC3, 0x9ABFFBC3, 0x9AC0FBC3, 0x9AC1FBC3, 0x9AC2FBC3, 0x9AC3FBC3, 0x9AC4FBC3, 0x9AC5FBC3, 0x9AC6FBC3, 0x9AC7FBC3, 0x9AC8FBC3, 0x9AC9FBC3, 0x9ACAFBC3, 0x9ACBFBC3, 0x9ACCFBC3, + 0x9ACDFBC3, 0x9ACEFBC3, 0x9ACFFBC3, 0x9AD0FBC3, 0x9AD1FBC3, 0x9AD2FBC3, 0x9AD3FBC3, 0x9AD4FBC3, 0x9AD5FBC3, 0x9AD6FBC3, 0x9AD7FBC3, 0x9AD8FBC3, 0x9AD9FBC3, 0x9ADAFBC3, 0x9ADBFBC3, + 0x9ADCFBC3, 0x9ADDFBC3, 0x9ADEFBC3, 0x9ADFFBC3, 0x9AE0FBC3, 0x9AE1FBC3, 0x9AE2FBC3, 0x9AE3FBC3, 0x9AE4FBC3, 0x9AE5FBC3, 0x9AE6FBC3, 0x9AE7FBC3, 0x9AE8FBC3, 0x9AE9FBC3, 0x9AEAFBC3, + 0x9AEBFBC3, 0x9AECFBC3, 0x9AEDFBC3, 0x9AEEFBC3, 0x9AEFFBC3, 0x9AF0FBC3, 0x9AF1FBC3, 0x9AF2FBC3, 0x9AF3FBC3, 0x9AF4FBC3, 0x9AF5FBC3, 0x9AF6FBC3, 0x9AF7FBC3, 0x9AF8FBC3, 0x9AF9FBC3, + 0x9AFAFBC3, 0x9AFBFBC3, 0x9AFCFBC3, 0x9AFDFBC3, 0x9AFEFBC3, 0x9AFFFBC3, 0x9B00FBC3, 0x9B01FBC3, 0x9B02FBC3, 0x9B03FBC3, 0x9B04FBC3, 0x9B05FBC3, 0x9B06FBC3, 0x9B07FBC3, 0x9B08FBC3, + 0x9B09FBC3, 0x9B0AFBC3, 0x9B0BFBC3, 0x9B0CFBC3, 0x9B0DFBC3, 0x9B0EFBC3, 0x9B0FFBC3, 0x9B10FBC3, 0x9B11FBC3, 0x9B12FBC3, 0x9B13FBC3, 0x9B14FBC3, 0x9B15FBC3, 0x9B16FBC3, 0x9B17FBC3, + 0x9B18FBC3, 0x9B19FBC3, 0x9B1AFBC3, 0x9B1BFBC3, 0x9B1CFBC3, 0x9B1DFBC3, 0x9B1EFBC3, 0x9B1FFBC3, 0x9B20FBC3, 0x9B21FBC3, 0x9B22FBC3, 0x9B23FBC3, 0x9B24FBC3, 0x9B25FBC3, 0x9B26FBC3, + 0x9B27FBC3, 0x9B28FBC3, 0x9B29FBC3, 0x9B2AFBC3, 0x9B2BFBC3, 0x9B2CFBC3, 0x9B2DFBC3, 0x9B2EFBC3, 0x9B2FFBC3, 0x9B30FBC3, 0x9B31FBC3, 0x9B32FBC3, 0x9B33FBC3, 0x9B34FBC3, 0x9B35FBC3, + 0x9B36FBC3, 0x9B37FBC3, 0x9B38FBC3, 0x9B39FBC3, 0x9B3AFBC3, 0x9B3BFBC3, 0x9B3CFBC3, 0x9B3DFBC3, 0x9B3EFBC3, 0x9B3FFBC3, 0x9B40FBC3, 0x9B41FBC3, 0x9B42FBC3, 0x9B43FBC3, 0x9B44FBC3, + 0x9B45FBC3, 0x9B46FBC3, 0x9B47FBC3, 0x9B48FBC3, 0x9B49FBC3, 0x9B4AFBC3, 0x9B4BFBC3, 0x9B4CFBC3, 0x9B4DFBC3, 0x9B4EFBC3, 0x9B4FFBC3, 0x9B50FBC3, 0x9B51FBC3, 0x9B52FBC3, 0x9B53FBC3, + 0x9B54FBC3, 0x9B55FBC3, 0x9B56FBC3, 0x9B57FBC3, 0x9B58FBC3, 0x9B59FBC3, 0x9B5AFBC3, 0x9B5BFBC3, 0x9B5CFBC3, 0x9B5DFBC3, 0x9B5EFBC3, 0x9B5FFBC3, 0x9B60FBC3, 0x9B61FBC3, 0x9B62FBC3, + 0x9B63FBC3, 0x9B64FBC3, 0x9B65FBC3, 0x9B66FBC3, 0x9B67FBC3, 0x9B68FBC3, 0x9B69FBC3, 0x9B6AFBC3, 0x9B6BFBC3, 0x9B6CFBC3, 0x9B6DFBC3, 0x9B6EFBC3, 0x9B6FFBC3, 0x9B70FBC3, 0x9B71FBC3, + 0x9B72FBC3, 0x9B73FBC3, 0x9B74FBC3, 0x9B75FBC3, 0x9B76FBC3, 0x9B77FBC3, 0x9B78FBC3, 0x9B79FBC3, 0x9B7AFBC3, 0x9B7BFBC3, 0x9B7CFBC3, 0x9B7DFBC3, 0x9B7EFBC3, 0x9B7FFBC3, 0x9B80FBC3, + 0x9B81FBC3, 0x9B82FBC3, 0x9B83FBC3, 0x9B84FBC3, 0x9B85FBC3, 0x9B86FBC3, 0x9B87FBC3, 0x9B88FBC3, 0x9B89FBC3, 0x9B8AFBC3, 0x9B8BFBC3, 0x9B8CFBC3, 0x9B8DFBC3, 0x9B8EFBC3, 0x9B8FFBC3, + 0x9B90FBC3, 0x9B91FBC3, 0x9B92FBC3, 0x9B93FBC3, 0x9B94FBC3, 0x9B95FBC3, 0x9B96FBC3, 0x9B97FBC3, 0x9B98FBC3, 0x9B99FBC3, 0x9B9AFBC3, 0x9B9BFBC3, 0x9B9CFBC3, 0x9B9DFBC3, 0x9B9EFBC3, + 0x9B9FFBC3, 0x9BA0FBC3, 0x9BA1FBC3, 0x9BA2FBC3, 0x9BA3FBC3, 0x9BA4FBC3, 0x9BA5FBC3, 0x9BA6FBC3, 0x9BA7FBC3, 0x9BA8FBC3, 0x9BA9FBC3, 0x9BAAFBC3, 0x9BABFBC3, 0x9BACFBC3, 0x9BADFBC3, + 0x9BAEFBC3, 0x9BAFFBC3, 0x9BB0FBC3, 0x9BB1FBC3, 0x9BB2FBC3, 0x9BB3FBC3, 0x9BB4FBC3, 0x9BB5FBC3, 0x9BB6FBC3, 0x9BB7FBC3, 0x9BB8FBC3, 0x9BB9FBC3, 0x9BBAFBC3, 0x9BBBFBC3, 0x9BBCFBC3, + 0x9BBDFBC3, 0x9BBEFBC3, 0x9BBFFBC3, 0x9BC0FBC3, 0x9BC1FBC3, 0x9BC2FBC3, 0x9BC3FBC3, 0x9BC4FBC3, 0x9BC5FBC3, 0x9BC6FBC3, 0x9BC7FBC3, 0x9BC8FBC3, 0x9BC9FBC3, 0x9BCAFBC3, 0x9BCBFBC3, + 0x9BCCFBC3, 0x9BCDFBC3, 0x9BCEFBC3, 0x9BCFFBC3, 0x9BD0FBC3, 0x9BD1FBC3, 0x9BD2FBC3, 0x9BD3FBC3, 0x9BD4FBC3, 0x9BD5FBC3, 0x9BD6FBC3, 0x9BD7FBC3, 0x9BD8FBC3, 0x9BD9FBC3, 0x9BDAFBC3, + 0x9BDBFBC3, 0x9BDCFBC3, 0x9BDDFBC3, 0x9BDEFBC3, 0x9BDFFBC3, 0x9BE0FBC3, 0x9BE1FBC3, 0x9BE2FBC3, 0x9BE3FBC3, 0x9BE4FBC3, 0x9BE5FBC3, 0x9BE6FBC3, 0x9BE7FBC3, 0x9BE8FBC3, 0x9BE9FBC3, + 0x9BEAFBC3, 0x9BEBFBC3, 0x9BECFBC3, 0x9BEDFBC3, 0x9BEEFBC3, 0x9BEFFBC3, 0x9BF0FBC3, 0x9BF1FBC3, 0x9BF2FBC3, 0x9BF3FBC3, 0x9BF4FBC3, 0x9BF5FBC3, 0x9BF6FBC3, 0x9BF7FBC3, 0x9BF8FBC3, + 0x9BF9FBC3, 0x9BFAFBC3, 0x9BFBFBC3, 0x9BFCFBC3, 0x9BFDFBC3, 0x9BFEFBC3, 0x9BFFFBC3, 0x9C00FBC3, 0x9C01FBC3, 0x9C02FBC3, 0x9C03FBC3, 0x9C04FBC3, 0x9C05FBC3, 0x9C06FBC3, 0x9C07FBC3, + 0x9C08FBC3, 0x9C09FBC3, 0x9C0AFBC3, 0x9C0BFBC3, 0x9C0CFBC3, 0x9C0DFBC3, 0x9C0EFBC3, 0x9C0FFBC3, 0x9C10FBC3, 0x9C11FBC3, 0x9C12FBC3, 0x9C13FBC3, 0x9C14FBC3, 0x9C15FBC3, 0x9C16FBC3, + 0x9C17FBC3, 0x9C18FBC3, 0x9C19FBC3, 0x9C1AFBC3, 0x9C1BFBC3, 0x9C1CFBC3, 0x9C1DFBC3, 0x9C1EFBC3, 0x9C1FFBC3, 0x9C20FBC3, 0x9C21FBC3, 0x9C22FBC3, 0x9C23FBC3, 0x9C24FBC3, 0x9C25FBC3, + 0x9C26FBC3, 0x9C27FBC3, 0x9C28FBC3, 0x9C29FBC3, 0x9C2AFBC3, 0x9C2BFBC3, 0x9C2CFBC3, 0x9C2DFBC3, 0x9C2EFBC3, 0x9C2FFBC3, 0x9C30FBC3, 0x9C31FBC3, 0x9C32FBC3, 0x9C33FBC3, 0x9C34FBC3, + 0x9C35FBC3, 0x9C36FBC3, 0x9C37FBC3, 0x9C38FBC3, 0x9C39FBC3, 0x9C3AFBC3, 0x9C3BFBC3, 0x9C3CFBC3, 0x9C3DFBC3, 0x9C3EFBC3, 0x9C3FFBC3, 0x9C40FBC3, 0x9C41FBC3, 0x9C42FBC3, 0x9C43FBC3, + 0x9C44FBC3, 0x9C45FBC3, 0x9C46FBC3, 0x9C47FBC3, 0x9C48FBC3, 0x9C49FBC3, 0x9C4AFBC3, 0x9C4BFBC3, 0x9C4CFBC3, 0x9C4DFBC3, 0x9C4EFBC3, 0x9C4FFBC3, 0x9C50FBC3, 0x9C51FBC3, 0x9C52FBC3, + 0x9C53FBC3, 0x9C54FBC3, 0x9C55FBC3, 0x9C56FBC3, 0x9C57FBC3, 0x9C58FBC3, 0x9C59FBC3, 0x9C5AFBC3, 0x9C5BFBC3, 0x9C5CFBC3, 0x9C5DFBC3, 0x9C5EFBC3, 0x9C5FFBC3, 0x9C60FBC3, 0x9C61FBC3, + 0x9C62FBC3, 0x9C63FBC3, 0x9C64FBC3, 0x9C65FBC3, 0x9C66FBC3, 0x9C67FBC3, 0x9C68FBC3, 0x9C69FBC3, 0x9C6AFBC3, 0x9C6BFBC3, 0x9C6CFBC3, 0x9C6DFBC3, 0x9C6EFBC3, 0x9C6FFBC3, 0x9C70FBC3, + 0x9C71FBC3, 0x9C72FBC3, 0x9C73FBC3, 0x9C74FBC3, 0x9C75FBC3, 0x9C76FBC3, 0x9C77FBC3, 0x9C78FBC3, 0x9C79FBC3, 0x9C7AFBC3, 0x9C7BFBC3, 0x9C7CFBC3, 0x9C7DFBC3, 0x9C7EFBC3, 0x9C7FFBC3, + 0x9C80FBC3, 0x9C81FBC3, 0x9C82FBC3, 0x9C83FBC3, 0x9C84FBC3, 0x9C85FBC3, 0x9C86FBC3, 0x9C87FBC3, 0x9C88FBC3, 0x9C89FBC3, 0x9C8AFBC3, 0x9C8BFBC3, 0x9C8CFBC3, 0x9C8DFBC3, 0x9C8EFBC3, + 0x9C8FFBC3, 0x9C90FBC3, 0x9C91FBC3, 0x9C92FBC3, 0x9C93FBC3, 0x9C94FBC3, 0x9C95FBC3, 0x9C96FBC3, 0x9C97FBC3, 0x9C98FBC3, 0x9C99FBC3, 0x9C9AFBC3, 0x9C9BFBC3, 0x9C9CFBC3, 0x9C9DFBC3, + 0x9C9EFBC3, 0x9C9FFBC3, 0x9CA0FBC3, 0x9CA1FBC3, 0x9CA2FBC3, 0x9CA3FBC3, 0x9CA4FBC3, 0x9CA5FBC3, 0x9CA6FBC3, 0x9CA7FBC3, 0x9CA8FBC3, 0x9CA9FBC3, 0x9CAAFBC3, 0x9CABFBC3, 0x9CACFBC3, + 0x9CADFBC3, 0x9CAEFBC3, 0x9CAFFBC3, 0x9CB0FBC3, 0x9CB1FBC3, 0x9CB2FBC3, 0x9CB3FBC3, 0x9CB4FBC3, 0x9CB5FBC3, 0x9CB6FBC3, 0x9CB7FBC3, 0x9CB8FBC3, 0x9CB9FBC3, 0x9CBAFBC3, 0x9CBBFBC3, + 0x9CBCFBC3, 0x9CBDFBC3, 0x9CBEFBC3, 0x9CBFFBC3, 0x9CC0FBC3, 0x9CC1FBC3, 0x9CC2FBC3, 0x9CC3FBC3, 0x9CC4FBC3, 0x9CC5FBC3, 0x9CC6FBC3, 0x9CC7FBC3, 0x9CC8FBC3, 0x9CC9FBC3, 0x9CCAFBC3, + 0x9CCBFBC3, 0x9CCCFBC3, 0x9CCDFBC3, 0x9CCEFBC3, 0x9CCFFBC3, 0x9CD0FBC3, 0x9CD1FBC3, 0x9CD2FBC3, 0x9CD3FBC3, 0x9CD4FBC3, 0x9CD5FBC3, 0x9CD6FBC3, 0x9CD7FBC3, 0x9CD8FBC3, 0x9CD9FBC3, + 0x9CDAFBC3, 0x9CDBFBC3, 0x9CDCFBC3, 0x9CDDFBC3, 0x9CDEFBC3, 0x9CDFFBC3, 0x9CE0FBC3, 0x9CE1FBC3, 0x9CE2FBC3, 0x9CE3FBC3, 0x9CE4FBC3, 0x9CE5FBC3, 0x9CE6FBC3, 0x9CE7FBC3, 0x9CE8FBC3, + 0x9CE9FBC3, 0x9CEAFBC3, 0x9CEBFBC3, 0x9CECFBC3, 0x9CEDFBC3, 0x9CEEFBC3, 0x9CEFFBC3, 0x9CF0FBC3, 0x9CF1FBC3, 0x9CF2FBC3, 0x9CF3FBC3, 0x9CF4FBC3, 0x9CF5FBC3, 0x9CF6FBC3, 0x9CF7FBC3, + 0x9CF8FBC3, 0x9CF9FBC3, 0x9CFAFBC3, 0x9CFBFBC3, 0x9CFCFBC3, 0x9CFDFBC3, 0x9CFEFBC3, 0x9CFFFBC3, 0x9D00FBC3, 0x9D01FBC3, 0x9D02FBC3, 0x9D03FBC3, 0x9D04FBC3, 0x9D05FBC3, 0x9D06FBC3, + 0x9D07FBC3, 0x9D08FBC3, 0x9D09FBC3, 0x9D0AFBC3, 0x9D0BFBC3, 0x9D0CFBC3, 0x9D0DFBC3, 0x9D0EFBC3, 0x9D0FFBC3, 0x9D10FBC3, 0x9D11FBC3, 0x9D12FBC3, 0x9D13FBC3, 0x9D14FBC3, 0x9D15FBC3, + 0x9D16FBC3, 0x9D17FBC3, 0x9D18FBC3, 0x9D19FBC3, 0x9D1AFBC3, 0x9D1BFBC3, 0x9D1CFBC3, 0x9D1DFBC3, 0x9D1EFBC3, 0x9D1FFBC3, 0x9D20FBC3, 0x9D21FBC3, 0x9D22FBC3, 0x9D23FBC3, 0x9D24FBC3, + 0x9D25FBC3, 0x9D26FBC3, 0x9D27FBC3, 0x9D28FBC3, 0x9D29FBC3, 0x9D2AFBC3, 0x9D2BFBC3, 0x9D2CFBC3, 0x9D2DFBC3, 0x9D2EFBC3, 0x9D2FFBC3, 0x9D30FBC3, 0x9D31FBC3, 0x9D32FBC3, 0x9D33FBC3, + 0x9D34FBC3, 0x9D35FBC3, 0x9D36FBC3, 0x9D37FBC3, 0x9D38FBC3, 0x9D39FBC3, 0x9D3AFBC3, 0x9D3BFBC3, 0x9D3CFBC3, 0x9D3DFBC3, 0x9D3EFBC3, 0x9D3FFBC3, 0x9D40FBC3, 0x9D41FBC3, 0x9D42FBC3, + 0x9D43FBC3, 0x9D44FBC3, 0x9D45FBC3, 0x9D46FBC3, 0x9D47FBC3, 0x9D48FBC3, 0x9D49FBC3, 0x9D4AFBC3, 0x9D4BFBC3, 0x9D4CFBC3, 0x9D4DFBC3, 0x9D4EFBC3, 0x9D4FFBC3, 0x9D50FBC3, 0x9D51FBC3, + 0x9D52FBC3, 0x9D53FBC3, 0x9D54FBC3, 0x9D55FBC3, 0x9D56FBC3, 0x9D57FBC3, 0x9D58FBC3, 0x9D59FBC3, 0x9D5AFBC3, 0x9D5BFBC3, 0x9D5CFBC3, 0x9D5DFBC3, 0x9D5EFBC3, 0x9D5FFBC3, 0x9D60FBC3, + 0x9D61FBC3, 0x9D62FBC3, 0x9D63FBC3, 0x9D64FBC3, 0x9D65FBC3, 0x9D66FBC3, 0x9D67FBC3, 0x9D68FBC3, 0x9D69FBC3, 0x9D6AFBC3, 0x9D6BFBC3, 0x9D6CFBC3, 0x9D6DFBC3, 0x9D6EFBC3, 0x9D6FFBC3, + 0x9D70FBC3, 0x9D71FBC3, 0x9D72FBC3, 0x9D73FBC3, 0x9D74FBC3, 0x9D75FBC3, 0x9D76FBC3, 0x9D77FBC3, 0x9D78FBC3, 0x9D79FBC3, 0x9D7AFBC3, 0x9D7BFBC3, 0x9D7CFBC3, 0x9D7DFBC3, 0x9D7EFBC3, + 0x9D7FFBC3, 0x9D80FBC3, 0x9D81FBC3, 0x9D82FBC3, 0x9D83FBC3, 0x9D84FBC3, 0x9D85FBC3, 0x9D86FBC3, 0x9D87FBC3, 0x9D88FBC3, 0x9D89FBC3, 0x9D8AFBC3, 0x9D8BFBC3, 0x9D8CFBC3, 0x9D8DFBC3, + 0x9D8EFBC3, 0x9D8FFBC3, 0x9D90FBC3, 0x9D91FBC3, 0x9D92FBC3, 0x9D93FBC3, 0x9D94FBC3, 0x9D95FBC3, 0x9D96FBC3, 0x9D97FBC3, 0x9D98FBC3, 0x9D99FBC3, 0x9D9AFBC3, 0x9D9BFBC3, 0x9D9CFBC3, + 0x9D9DFBC3, 0x9D9EFBC3, 0x9D9FFBC3, 0x9DA0FBC3, 0x9DA1FBC3, 0x9DA2FBC3, 0x9DA3FBC3, 0x9DA4FBC3, 0x9DA5FBC3, 0x9DA6FBC3, 0x9DA7FBC3, 0x9DA8FBC3, 0x9DA9FBC3, 0x9DAAFBC3, 0x9DABFBC3, + 0x9DACFBC3, 0x9DADFBC3, 0x9DAEFBC3, 0x9DAFFBC3, 0x9DB0FBC3, 0x9DB1FBC3, 0x9DB2FBC3, 0x9DB3FBC3, 0x9DB4FBC3, 0x9DB5FBC3, 0x9DB6FBC3, 0x9DB7FBC3, 0x9DB8FBC3, 0x9DB9FBC3, 0x9DBAFBC3, + 0x9DBBFBC3, 0x9DBCFBC3, 0x9DBDFBC3, 0x9DBEFBC3, 0x9DBFFBC3, 0x9DC0FBC3, 0x9DC1FBC3, 0x9DC2FBC3, 0x9DC3FBC3, 0x9DC4FBC3, 0x9DC5FBC3, 0x9DC6FBC3, 0x9DC7FBC3, 0x9DC8FBC3, 0x9DC9FBC3, + 0x9DCAFBC3, 0x9DCBFBC3, 0x9DCCFBC3, 0x9DCDFBC3, 0x9DCEFBC3, 0x9DCFFBC3, 0x9DD0FBC3, 0x9DD1FBC3, 0x9DD2FBC3, 0x9DD3FBC3, 0x9DD4FBC3, 0x9DD5FBC3, 0x9DD6FBC3, 0x9DD7FBC3, 0x9DD8FBC3, + 0x9DD9FBC3, 0x9DDAFBC3, 0x9DDBFBC3, 0x9DDCFBC3, 0x9DDDFBC3, 0x9DDEFBC3, 0x9DDFFBC3, 0x9DE0FBC3, 0x9DE1FBC3, 0x9DE2FBC3, 0x9DE3FBC3, 0x9DE4FBC3, 0x9DE5FBC3, 0x9DE6FBC3, 0x9DE7FBC3, + 0x9DE8FBC3, 0x9DE9FBC3, 0x9DEAFBC3, 0x9DEBFBC3, 0x9DECFBC3, 0x9DEDFBC3, 0x9DEEFBC3, 0x9DEFFBC3, 0x9DF0FBC3, 0x9DF1FBC3, 0x9DF2FBC3, 0x9DF3FBC3, 0x9DF4FBC3, 0x9DF5FBC3, 0x9DF6FBC3, + 0x9DF7FBC3, 0x9DF8FBC3, 0x9DF9FBC3, 0x9DFAFBC3, 0x9DFBFBC3, 0x9DFCFBC3, 0x9DFDFBC3, 0x9DFEFBC3, 0x9DFFFBC3, 0x9E00FBC3, 0x9E01FBC3, 0x9E02FBC3, 0x9E03FBC3, 0x9E04FBC3, 0x9E05FBC3, + 0x9E06FBC3, 0x9E07FBC3, 0x9E08FBC3, 0x9E09FBC3, 0x9E0AFBC3, 0x9E0BFBC3, 0x9E0CFBC3, 0x9E0DFBC3, 0x9E0EFBC3, 0x9E0FFBC3, 0x9E10FBC3, 0x9E11FBC3, 0x9E12FBC3, 0x9E13FBC3, 0x9E14FBC3, + 0x9E15FBC3, 0x9E16FBC3, 0x9E17FBC3, 0x9E18FBC3, 0x9E19FBC3, 0x9E1AFBC3, 0x9E1BFBC3, 0x9E1CFBC3, 0x9E1DFBC3, 0x9E1EFBC3, 0x9E1FFBC3, 0x9E20FBC3, 0x9E21FBC3, 0x9E22FBC3, 0x9E23FBC3, + 0x9E24FBC3, 0x9E25FBC3, 0x9E26FBC3, 0x9E27FBC3, 0x9E28FBC3, 0x9E29FBC3, 0x9E2AFBC3, 0x9E2BFBC3, 0x9E2CFBC3, 0x9E2DFBC3, 0x9E2EFBC3, 0x9E2FFBC3, 0x9E30FBC3, 0x9E31FBC3, 0x9E32FBC3, + 0x9E33FBC3, 0x9E34FBC3, 0x9E35FBC3, 0x9E36FBC3, 0x9E37FBC3, 0x9E38FBC3, 0x9E39FBC3, 0x9E3AFBC3, 0x9E3BFBC3, 0x9E3CFBC3, 0x9E3DFBC3, 0x9E3EFBC3, 0x9E3FFBC3, 0x9E40FBC3, 0x9E41FBC3, + 0x9E42FBC3, 0x9E43FBC3, 0x9E44FBC3, 0x9E45FBC3, 0x9E46FBC3, 0x9E47FBC3, 0x9E48FBC3, 0x9E49FBC3, 0x9E4AFBC3, 0x9E4BFBC3, 0x9E4CFBC3, 0x9E4DFBC3, 0x9E4EFBC3, 0x9E4FFBC3, 0x9E50FBC3, + 0x9E51FBC3, 0x9E52FBC3, 0x9E53FBC3, 0x9E54FBC3, 0x9E55FBC3, 0x9E56FBC3, 0x9E57FBC3, 0x9E58FBC3, 0x9E59FBC3, 0x9E5AFBC3, 0x9E5BFBC3, 0x9E5CFBC3, 0x9E5DFBC3, 0x9E5EFBC3, 0x9E5FFBC3, + 0x9E60FBC3, 0x9E61FBC3, 0x9E62FBC3, 0x9E63FBC3, 0x9E64FBC3, 0x9E65FBC3, 0x9E66FBC3, 0x9E67FBC3, 0x9E68FBC3, 0x9E69FBC3, 0x9E6AFBC3, 0x9E6BFBC3, 0x9E6CFBC3, 0x9E6DFBC3, 0x9E6EFBC3, + 0x9E6FFBC3, 0x9E70FBC3, 0x9E71FBC3, 0x9E72FBC3, 0x9E73FBC3, 0x9E74FBC3, 0x9E75FBC3, 0x9E76FBC3, 0x9E77FBC3, 0x9E78FBC3, 0x9E79FBC3, 0x9E7AFBC3, 0x9E7BFBC3, 0x9E7CFBC3, 0x9E7DFBC3, + 0x9E7EFBC3, 0x9E7FFBC3, 0x9E80FBC3, 0x9E81FBC3, 0x9E82FBC3, 0x9E83FBC3, 0x9E84FBC3, 0x9E85FBC3, 0x9E86FBC3, 0x9E87FBC3, 0x9E88FBC3, 0x9E89FBC3, 0x9E8AFBC3, 0x9E8BFBC3, 0x9E8CFBC3, + 0x9E8DFBC3, 0x9E8EFBC3, 0x9E8FFBC3, 0x9E90FBC3, 0x9E91FBC3, 0x9E92FBC3, 0x9E93FBC3, 0x9E94FBC3, 0x9E95FBC3, 0x9E96FBC3, 0x9E97FBC3, 0x9E98FBC3, 0x9E99FBC3, 0x9E9AFBC3, 0x9E9BFBC3, + 0x9E9CFBC3, 0x9E9DFBC3, 0x9E9EFBC3, 0x9E9FFBC3, 0x9EA0FBC3, 0x9EA1FBC3, 0x9EA2FBC3, 0x9EA3FBC3, 0x9EA4FBC3, 0x9EA5FBC3, 0x9EA6FBC3, 0x9EA7FBC3, 0x9EA8FBC3, 0x9EA9FBC3, 0x9EAAFBC3, + 0x9EABFBC3, 0x9EACFBC3, 0x9EADFBC3, 0x9EAEFBC3, 0x9EAFFBC3, 0x9EB0FBC3, 0x9EB1FBC3, 0x9EB2FBC3, 0x9EB3FBC3, 0x9EB4FBC3, 0x9EB5FBC3, 0x9EB6FBC3, 0x9EB7FBC3, 0x9EB8FBC3, 0x9EB9FBC3, + 0x9EBAFBC3, 0x9EBBFBC3, 0x9EBCFBC3, 0x9EBDFBC3, 0x9EBEFBC3, 0x9EBFFBC3, 0x9EC0FBC3, 0x9EC1FBC3, 0x9EC2FBC3, 0x9EC3FBC3, 0x9EC4FBC3, 0x9EC5FBC3, 0x9EC6FBC3, 0x9EC7FBC3, 0x9EC8FBC3, + 0x9EC9FBC3, 0x9ECAFBC3, 0x9ECBFBC3, 0x9ECCFBC3, 0x9ECDFBC3, 0x9ECEFBC3, 0x9ECFFBC3, 0x9ED0FBC3, 0x9ED1FBC3, 0x9ED2FBC3, 0x9ED3FBC3, 0x9ED4FBC3, 0x9ED5FBC3, 0x9ED6FBC3, 0x9ED7FBC3, + 0x9ED8FBC3, 0x9ED9FBC3, 0x9EDAFBC3, 0x9EDBFBC3, 0x9EDCFBC3, 0x9EDDFBC3, 0x9EDEFBC3, 0x9EDFFBC3, 0x9EE0FBC3, 0x9EE1FBC3, 0x9EE2FBC3, 0x9EE3FBC3, 0x9EE4FBC3, 0x9EE5FBC3, 0x9EE6FBC3, + 0x9EE7FBC3, 0x9EE8FBC3, 0x9EE9FBC3, 0x9EEAFBC3, 0x9EEBFBC3, 0x9EECFBC3, 0x9EEDFBC3, 0x9EEEFBC3, 0x9EEFFBC3, 0x9EF0FBC3, 0x9EF1FBC3, 0x9EF2FBC3, 0x9EF3FBC3, 0x9EF4FBC3, 0x9EF5FBC3, + 0x9EF6FBC3, 0x9EF7FBC3, 0x9EF8FBC3, 0x9EF9FBC3, 0x9EFAFBC3, 0x9EFBFBC3, 0x9EFCFBC3, 0x9EFDFBC3, 0x9EFEFBC3, 0x9EFFFBC3, 0x9F00FBC3, 0x9F01FBC3, 0x9F02FBC3, 0x9F03FBC3, 0x9F04FBC3, + 0x9F05FBC3, 0x9F06FBC3, 0x9F07FBC3, 0x9F08FBC3, 0x9F09FBC3, 0x9F0AFBC3, 0x9F0BFBC3, 0x9F0CFBC3, 0x9F0DFBC3, 0x9F0EFBC3, 0x9F0FFBC3, 0x9F10FBC3, 0x9F11FBC3, 0x9F12FBC3, 0x9F13FBC3, + 0x9F14FBC3, 0x9F15FBC3, 0x9F16FBC3, 0x9F17FBC3, 0x9F18FBC3, 0x9F19FBC3, 0x9F1AFBC3, 0x9F1BFBC3, 0x9F1CFBC3, 0x9F1DFBC3, 0x9F1EFBC3, 0x9F1FFBC3, 0x9F20FBC3, 0x9F21FBC3, 0x9F22FBC3, + 0x9F23FBC3, 0x9F24FBC3, 0x9F25FBC3, 0x9F26FBC3, 0x9F27FBC3, 0x9F28FBC3, 0x9F29FBC3, 0x9F2AFBC3, 0x9F2BFBC3, 0x9F2CFBC3, 0x9F2DFBC3, 0x9F2EFBC3, 0x9F2FFBC3, 0x9F30FBC3, 0x9F31FBC3, + 0x9F32FBC3, 0x9F33FBC3, 0x9F34FBC3, 0x9F35FBC3, 0x9F36FBC3, 0x9F37FBC3, 0x9F38FBC3, 0x9F39FBC3, 0x9F3AFBC3, 0x9F3BFBC3, 0x9F3CFBC3, 0x9F3DFBC3, 0x9F3EFBC3, 0x9F3FFBC3, 0x9F40FBC3, + 0x9F41FBC3, 0x9F42FBC3, 0x9F43FBC3, 0x9F44FBC3, 0x9F45FBC3, 0x9F46FBC3, 0x9F47FBC3, 0x9F48FBC3, 0x9F49FBC3, 0x9F4AFBC3, 0x9F4BFBC3, 0x9F4CFBC3, 0x9F4DFBC3, 0x9F4EFBC3, 0x9F4FFBC3, + 0x9F50FBC3, 0x9F51FBC3, 0x9F52FBC3, 0x9F53FBC3, 0x9F54FBC3, 0x9F55FBC3, 0x9F56FBC3, 0x9F57FBC3, 0x9F58FBC3, 0x9F59FBC3, 0x9F5AFBC3, 0x9F5BFBC3, 0x9F5CFBC3, 0x9F5DFBC3, 0x9F5EFBC3, + 0x9F5FFBC3, 0x9F60FBC3, 0x9F61FBC3, 0x9F62FBC3, 0x9F63FBC3, 0x9F64FBC3, 0x9F65FBC3, 0x9F66FBC3, 0x9F67FBC3, 0x9F68FBC3, 0x9F69FBC3, 0x9F6AFBC3, 0x9F6BFBC3, 0x9F6CFBC3, 0x9F6DFBC3, + 0x9F6EFBC3, 0x9F6FFBC3, 0x9F70FBC3, 0x9F71FBC3, 0x9F72FBC3, 0x9F73FBC3, 0x9F74FBC3, 0x9F75FBC3, 0x9F76FBC3, 0x9F77FBC3, 0x9F78FBC3, 0x9F79FBC3, 0x9F7AFBC3, 0x9F7BFBC3, 0x9F7CFBC3, + 0x9F7DFBC3, 0x9F7EFBC3, 0x9F7FFBC3, 0x9F80FBC3, 0x9F81FBC3, 0x9F82FBC3, 0x9F83FBC3, 0x9F84FBC3, 0x9F85FBC3, 0x9F86FBC3, 0x9F87FBC3, 0x9F88FBC3, 0x9F89FBC3, 0x9F8AFBC3, 0x9F8BFBC3, + 0x9F8CFBC3, 0x9F8DFBC3, 0x9F8EFBC3, 0x9F8FFBC3, 0x9F90FBC3, 0x9F91FBC3, 0x9F92FBC3, 0x9F93FBC3, 0x9F94FBC3, 0x9F95FBC3, 0x9F96FBC3, 0x9F97FBC3, 0x9F98FBC3, 0x9F99FBC3, 0x9F9AFBC3, + 0x9F9BFBC3, 0x9F9CFBC3, 0x9F9DFBC3, 0x9F9EFBC3, 0x9F9FFBC3, 0x9FA0FBC3, 0x9FA1FBC3, 0x9FA2FBC3, 0x9FA3FBC3, 0x9FA4FBC3, 0x9FA5FBC3, 0x9FA6FBC3, 0x9FA7FBC3, 0x9FA8FBC3, 0x9FA9FBC3, + 0x9FAAFBC3, 0x9FABFBC3, 0x9FACFBC3, 0x9FADFBC3, 0x9FAEFBC3, 0x9FAFFBC3, 0x9FB0FBC3, 0x9FB1FBC3, 0x9FB2FBC3, 0x9FB3FBC3, 0x9FB4FBC3, 0x9FB5FBC3, 0x9FB6FBC3, 0x9FB7FBC3, 0x9FB8FBC3, + 0x9FB9FBC3, 0x9FBAFBC3, 0x9FBBFBC3, 0x9FBCFBC3, 0x9FBDFBC3, 0x9FBEFBC3, 0x9FBFFBC3, 0x9FC0FBC3, 0x9FC1FBC3, 0x9FC2FBC3, 0x9FC3FBC3, 0x9FC4FBC3, 0x9FC5FBC3, 0x9FC6FBC3, 0x9FC7FBC3, + 0x9FC8FBC3, 0x9FC9FBC3, 0x9FCAFBC3, 0x9FCBFBC3, 0x9FCCFBC3, 0x9FCDFBC3, 0x9FCEFBC3, 0x9FCFFBC3, 0x9FD0FBC3, 0x9FD1FBC3, 0x9FD2FBC3, 0x9FD3FBC3, 0x9FD4FBC3, 0x9FD5FBC3, 0x9FD6FBC3, + 0x9FD7FBC3, 0x9FD8FBC3, 0x9FD9FBC3, 0x9FDAFBC3, 0x9FDBFBC3, 0x9FDCFBC3, 0x9FDDFBC3, 0x9FDEFBC3, 0x9FDFFBC3, 0x9FE0FBC3, 0x9FE1FBC3, 0x9FE2FBC3, 0x9FE3FBC3, 0x9FE4FBC3, 0x9FE5FBC3, + 0x9FE6FBC3, 0x9FE7FBC3, 0x9FE8FBC3, 0x9FE9FBC3, 0x9FEAFBC3, 0x9FEBFBC3, 0x9FECFBC3, 0x9FEDFBC3, 0x9FEEFBC3, 0x9FEFFBC3, 0x9FF0FBC3, 0x9FF1FBC3, 0x9FF2FBC3, 0x9FF3FBC3, 0x9FF4FBC3, + 0x9FF5FBC3, 0x9FF6FBC3, 0x9FF7FBC3, 0x9FF8FBC3, 0x9FF9FBC3, 0x9FFAFBC3, 0x9FFBFBC3, 0x9FFCFBC3, 0x9FFDFBC3, 0x9FFEFBC3, 0x9FFFFBC3, 0xA000FBC3, 0xA001FBC3, 0xA002FBC3, 0xA003FBC3, + 0xA004FBC3, 0xA005FBC3, 0xA006FBC3, 0xA007FBC3, 0xA008FBC3, 0xA009FBC3, 0xA00AFBC3, 0xA00BFBC3, 0xA00CFBC3, 0xA00DFBC3, 0xA00EFBC3, 0xA00FFBC3, 0xA010FBC3, 0xA011FBC3, 0xA012FBC3, + 0xA013FBC3, 0xA014FBC3, 0xA015FBC3, 0xA016FBC3, 0xA017FBC3, 0xA018FBC3, 0xA019FBC3, 0xA01AFBC3, 0xA01BFBC3, 0xA01CFBC3, 0xA01DFBC3, 0xA01EFBC3, 0xA01FFBC3, 0xA020FBC3, 0xA021FBC3, + 0xA022FBC3, 0xA023FBC3, 0xA024FBC3, 0xA025FBC3, 0xA026FBC3, 0xA027FBC3, 0xA028FBC3, 0xA029FBC3, 0xA02AFBC3, 0xA02BFBC3, 0xA02CFBC3, 0xA02DFBC3, 0xA02EFBC3, 0xA02FFBC3, 0xA030FBC3, + 0xA031FBC3, 0xA032FBC3, 0xA033FBC3, 0xA034FBC3, 0xA035FBC3, 0xA036FBC3, 0xA037FBC3, 0xA038FBC3, 0xA039FBC3, 0xA03AFBC3, 0xA03BFBC3, 0xA03CFBC3, 0xA03DFBC3, 0xA03EFBC3, 0xA03FFBC3, + 0xA040FBC3, 0xA041FBC3, 0xA042FBC3, 0xA043FBC3, 0xA044FBC3, 0xA045FBC3, 0xA046FBC3, 0xA047FBC3, 0xA048FBC3, 0xA049FBC3, 0xA04AFBC3, 0xA04BFBC3, 0xA04CFBC3, 0xA04DFBC3, 0xA04EFBC3, + 0xA04FFBC3, 0xA050FBC3, 0xA051FBC3, 0xA052FBC3, 0xA053FBC3, 0xA054FBC3, 0xA055FBC3, 0xA056FBC3, 0xA057FBC3, 0xA058FBC3, 0xA059FBC3, 0xA05AFBC3, 0xA05BFBC3, 0xA05CFBC3, 0xA05DFBC3, + 0xA05EFBC3, 0xA05FFBC3, 0xA060FBC3, 0xA061FBC3, 0xA062FBC3, 0xA063FBC3, 0xA064FBC3, 0xA065FBC3, 0xA066FBC3, 0xA067FBC3, 0xA068FBC3, 0xA069FBC3, 0xA06AFBC3, 0xA06BFBC3, 0xA06CFBC3, + 0xA06DFBC3, 0xA06EFBC3, 0xA06FFBC3, 0xA070FBC3, 0xA071FBC3, 0xA072FBC3, 0xA073FBC3, 0xA074FBC3, 0xA075FBC3, 0xA076FBC3, 0xA077FBC3, 0xA078FBC3, 0xA079FBC3, 0xA07AFBC3, 0xA07BFBC3, + 0xA07CFBC3, 0xA07DFBC3, 0xA07EFBC3, 0xA07FFBC3, 0xA080FBC3, 0xA081FBC3, 0xA082FBC3, 0xA083FBC3, 0xA084FBC3, 0xA085FBC3, 0xA086FBC3, 0xA087FBC3, 0xA088FBC3, 0xA089FBC3, 0xA08AFBC3, + 0xA08BFBC3, 0xA08CFBC3, 0xA08DFBC3, 0xA08EFBC3, 0xA08FFBC3, 0xA090FBC3, 0xA091FBC3, 0xA092FBC3, 0xA093FBC3, 0xA094FBC3, 0xA095FBC3, 0xA096FBC3, 0xA097FBC3, 0xA098FBC3, 0xA099FBC3, + 0xA09AFBC3, 0xA09BFBC3, 0xA09CFBC3, 0xA09DFBC3, 0xA09EFBC3, 0xA09FFBC3, 0xA0A0FBC3, 0xA0A1FBC3, 0xA0A2FBC3, 0xA0A3FBC3, 0xA0A4FBC3, 0xA0A5FBC3, 0xA0A6FBC3, 0xA0A7FBC3, 0xA0A8FBC3, + 0xA0A9FBC3, 0xA0AAFBC3, 0xA0ABFBC3, 0xA0ACFBC3, 0xA0ADFBC3, 0xA0AEFBC3, 0xA0AFFBC3, 0xA0B0FBC3, 0xA0B1FBC3, 0xA0B2FBC3, 0xA0B3FBC3, 0xA0B4FBC3, 0xA0B5FBC3, 0xA0B6FBC3, 0xA0B7FBC3, + 0xA0B8FBC3, 0xA0B9FBC3, 0xA0BAFBC3, 0xA0BBFBC3, 0xA0BCFBC3, 0xA0BDFBC3, 0xA0BEFBC3, 0xA0BFFBC3, 0xA0C0FBC3, 0xA0C1FBC3, 0xA0C2FBC3, 0xA0C3FBC3, 0xA0C4FBC3, 0xA0C5FBC3, 0xA0C6FBC3, + 0xA0C7FBC3, 0xA0C8FBC3, 0xA0C9FBC3, 0xA0CAFBC3, 0xA0CBFBC3, 0xA0CCFBC3, 0xA0CDFBC3, 0xA0CEFBC3, 0xA0CFFBC3, 0xA0D0FBC3, 0xA0D1FBC3, 0xA0D2FBC3, 0xA0D3FBC3, 0xA0D4FBC3, 0xA0D5FBC3, + 0xA0D6FBC3, 0xA0D7FBC3, 0xA0D8FBC3, 0xA0D9FBC3, 0xA0DAFBC3, 0xA0DBFBC3, 0xA0DCFBC3, 0xA0DDFBC3, 0xA0DEFBC3, 0xA0DFFBC3, 0xA0E0FBC3, 0xA0E1FBC3, 0xA0E2FBC3, 0xA0E3FBC3, 0xA0E4FBC3, + 0xA0E5FBC3, 0xA0E6FBC3, 0xA0E7FBC3, 0xA0E8FBC3, 0xA0E9FBC3, 0xA0EAFBC3, 0xA0EBFBC3, 0xA0ECFBC3, 0xA0EDFBC3, 0xA0EEFBC3, 0xA0EFFBC3, 0xA0F0FBC3, 0xA0F1FBC3, 0xA0F2FBC3, 0xA0F3FBC3, + 0xA0F4FBC3, 0xA0F5FBC3, 0xA0F6FBC3, 0xA0F7FBC3, 0xA0F8FBC3, 0xA0F9FBC3, 0xA0FAFBC3, 0xA0FBFBC3, 0xA0FCFBC3, 0xA0FDFBC3, 0xA0FEFBC3, 0xA0FFFBC3, 0xA100FBC3, 0xA101FBC3, 0xA102FBC3, + 0xA103FBC3, 0xA104FBC3, 0xA105FBC3, 0xA106FBC3, 0xA107FBC3, 0xA108FBC3, 0xA109FBC3, 0xA10AFBC3, 0xA10BFBC3, 0xA10CFBC3, 0xA10DFBC3, 0xA10EFBC3, 0xA10FFBC3, 0xA110FBC3, 0xA111FBC3, + 0xA112FBC3, 0xA113FBC3, 0xA114FBC3, 0xA115FBC3, 0xA116FBC3, 0xA117FBC3, 0xA118FBC3, 0xA119FBC3, 0xA11AFBC3, 0xA11BFBC3, 0xA11CFBC3, 0xA11DFBC3, 0xA11EFBC3, 0xA11FFBC3, 0xA120FBC3, + 0xA121FBC3, 0xA122FBC3, 0xA123FBC3, 0xA124FBC3, 0xA125FBC3, 0xA126FBC3, 0xA127FBC3, 0xA128FBC3, 0xA129FBC3, 0xA12AFBC3, 0xA12BFBC3, 0xA12CFBC3, 0xA12DFBC3, 0xA12EFBC3, 0xA12FFBC3, + 0xA130FBC3, 0xA131FBC3, 0xA132FBC3, 0xA133FBC3, 0xA134FBC3, 0xA135FBC3, 0xA136FBC3, 0xA137FBC3, 0xA138FBC3, 0xA139FBC3, 0xA13AFBC3, 0xA13BFBC3, 0xA13CFBC3, 0xA13DFBC3, 0xA13EFBC3, + 0xA13FFBC3, 0xA140FBC3, 0xA141FBC3, 0xA142FBC3, 0xA143FBC3, 0xA144FBC3, 0xA145FBC3, 0xA146FBC3, 0xA147FBC3, 0xA148FBC3, 0xA149FBC3, 0xA14AFBC3, 0xA14BFBC3, 0xA14CFBC3, 0xA14DFBC3, + 0xA14EFBC3, 0xA14FFBC3, 0xA150FBC3, 0xA151FBC3, 0xA152FBC3, 0xA153FBC3, 0xA154FBC3, 0xA155FBC3, 0xA156FBC3, 0xA157FBC3, 0xA158FBC3, 0xA159FBC3, 0xA15AFBC3, 0xA15BFBC3, 0xA15CFBC3, + 0xA15DFBC3, 0xA15EFBC3, 0xA15FFBC3, 0xA160FBC3, 0xA161FBC3, 0xA162FBC3, 0xA163FBC3, 0xA164FBC3, 0xA165FBC3, 0xA166FBC3, 0xA167FBC3, 0xA168FBC3, 0xA169FBC3, 0xA16AFBC3, 0xA16BFBC3, + 0xA16CFBC3, 0xA16DFBC3, 0xA16EFBC3, 0xA16FFBC3, 0xA170FBC3, 0xA171FBC3, 0xA172FBC3, 0xA173FBC3, 0xA174FBC3, 0xA175FBC3, 0xA176FBC3, 0xA177FBC3, 0xA178FBC3, 0xA179FBC3, 0xA17AFBC3, + 0xA17BFBC3, 0xA17CFBC3, 0xA17DFBC3, 0xA17EFBC3, 0xA17FFBC3, 0xA180FBC3, 0xA181FBC3, 0xA182FBC3, 0xA183FBC3, 0xA184FBC3, 0xA185FBC3, 0xA186FBC3, 0xA187FBC3, 0xA188FBC3, 0xA189FBC3, + 0xA18AFBC3, 0xA18BFBC3, 0xA18CFBC3, 0xA18DFBC3, 0xA18EFBC3, 0xA18FFBC3, 0xA190FBC3, 0xA191FBC3, 0xA192FBC3, 0xA193FBC3, 0xA194FBC3, 0xA195FBC3, 0xA196FBC3, 0xA197FBC3, 0xA198FBC3, + 0xA199FBC3, 0xA19AFBC3, 0xA19BFBC3, 0xA19CFBC3, 0xA19DFBC3, 0xA19EFBC3, 0xA19FFBC3, 0xA1A0FBC3, 0xA1A1FBC3, 0xA1A2FBC3, 0xA1A3FBC3, 0xA1A4FBC3, 0xA1A5FBC3, 0xA1A6FBC3, 0xA1A7FBC3, + 0xA1A8FBC3, 0xA1A9FBC3, 0xA1AAFBC3, 0xA1ABFBC3, 0xA1ACFBC3, 0xA1ADFBC3, 0xA1AEFBC3, 0xA1AFFBC3, 0xA1B0FBC3, 0xA1B1FBC3, 0xA1B2FBC3, 0xA1B3FBC3, 0xA1B4FBC3, 0xA1B5FBC3, 0xA1B6FBC3, + 0xA1B7FBC3, 0xA1B8FBC3, 0xA1B9FBC3, 0xA1BAFBC3, 0xA1BBFBC3, 0xA1BCFBC3, 0xA1BDFBC3, 0xA1BEFBC3, 0xA1BFFBC3, 0xA1C0FBC3, 0xA1C1FBC3, 0xA1C2FBC3, 0xA1C3FBC3, 0xA1C4FBC3, 0xA1C5FBC3, + 0xA1C6FBC3, 0xA1C7FBC3, 0xA1C8FBC3, 0xA1C9FBC3, 0xA1CAFBC3, 0xA1CBFBC3, 0xA1CCFBC3, 0xA1CDFBC3, 0xA1CEFBC3, 0xA1CFFBC3, 0xA1D0FBC3, 0xA1D1FBC3, 0xA1D2FBC3, 0xA1D3FBC3, 0xA1D4FBC3, + 0xA1D5FBC3, 0xA1D6FBC3, 0xA1D7FBC3, 0xA1D8FBC3, 0xA1D9FBC3, 0xA1DAFBC3, 0xA1DBFBC3, 0xA1DCFBC3, 0xA1DDFBC3, 0xA1DEFBC3, 0xA1DFFBC3, 0xA1E0FBC3, 0xA1E1FBC3, 0xA1E2FBC3, 0xA1E3FBC3, + 0xA1E4FBC3, 0xA1E5FBC3, 0xA1E6FBC3, 0xA1E7FBC3, 0xA1E8FBC3, 0xA1E9FBC3, 0xA1EAFBC3, 0xA1EBFBC3, 0xA1ECFBC3, 0xA1EDFBC3, 0xA1EEFBC3, 0xA1EFFBC3, 0xA1F0FBC3, 0xA1F1FBC3, 0xA1F2FBC3, + 0xA1F3FBC3, 0xA1F4FBC3, 0xA1F5FBC3, 0xA1F6FBC3, 0xA1F7FBC3, 0xA1F8FBC3, 0xA1F9FBC3, 0xA1FAFBC3, 0xA1FBFBC3, 0xA1FCFBC3, 0xA1FDFBC3, 0xA1FEFBC3, 0xA1FFFBC3, 0xA200FBC3, 0xA201FBC3, + 0xA202FBC3, 0xA203FBC3, 0xA204FBC3, 0xA205FBC3, 0xA206FBC3, 0xA207FBC3, 0xA208FBC3, 0xA209FBC3, 0xA20AFBC3, 0xA20BFBC3, 0xA20CFBC3, 0xA20DFBC3, 0xA20EFBC3, 0xA20FFBC3, 0xA210FBC3, + 0xA211FBC3, 0xA212FBC3, 0xA213FBC3, 0xA214FBC3, 0xA215FBC3, 0xA216FBC3, 0xA217FBC3, 0xA218FBC3, 0xA219FBC3, 0xA21AFBC3, 0xA21BFBC3, 0xA21CFBC3, 0xA21DFBC3, 0xA21EFBC3, 0xA21FFBC3, + 0xA220FBC3, 0xA221FBC3, 0xA222FBC3, 0xA223FBC3, 0xA224FBC3, 0xA225FBC3, 0xA226FBC3, 0xA227FBC3, 0xA228FBC3, 0xA229FBC3, 0xA22AFBC3, 0xA22BFBC3, 0xA22CFBC3, 0xA22DFBC3, 0xA22EFBC3, + 0xA22FFBC3, 0xA230FBC3, 0xA231FBC3, 0xA232FBC3, 0xA233FBC3, 0xA234FBC3, 0xA235FBC3, 0xA236FBC3, 0xA237FBC3, 0xA238FBC3, 0xA239FBC3, 0xA23AFBC3, 0xA23BFBC3, 0xA23CFBC3, 0xA23DFBC3, + 0xA23EFBC3, 0xA23FFBC3, 0xA240FBC3, 0xA241FBC3, 0xA242FBC3, 0xA243FBC3, 0xA244FBC3, 0xA245FBC3, 0xA246FBC3, 0xA247FBC3, 0xA248FBC3, 0xA249FBC3, 0xA24AFBC3, 0xA24BFBC3, 0xA24CFBC3, + 0xA24DFBC3, 0xA24EFBC3, 0xA24FFBC3, 0xA250FBC3, 0xA251FBC3, 0xA252FBC3, 0xA253FBC3, 0xA254FBC3, 0xA255FBC3, 0xA256FBC3, 0xA257FBC3, 0xA258FBC3, 0xA259FBC3, 0xA25AFBC3, 0xA25BFBC3, + 0xA25CFBC3, 0xA25DFBC3, 0xA25EFBC3, 0xA25FFBC3, 0xA260FBC3, 0xA261FBC3, 0xA262FBC3, 0xA263FBC3, 0xA264FBC3, 0xA265FBC3, 0xA266FBC3, 0xA267FBC3, 0xA268FBC3, 0xA269FBC3, 0xA26AFBC3, + 0xA26BFBC3, 0xA26CFBC3, 0xA26DFBC3, 0xA26EFBC3, 0xA26FFBC3, 0xA270FBC3, 0xA271FBC3, 0xA272FBC3, 0xA273FBC3, 0xA274FBC3, 0xA275FBC3, 0xA276FBC3, 0xA277FBC3, 0xA278FBC3, 0xA279FBC3, + 0xA27AFBC3, 0xA27BFBC3, 0xA27CFBC3, 0xA27DFBC3, 0xA27EFBC3, 0xA27FFBC3, 0xA280FBC3, 0xA281FBC3, 0xA282FBC3, 0xA283FBC3, 0xA284FBC3, 0xA285FBC3, 0xA286FBC3, 0xA287FBC3, 0xA288FBC3, + 0xA289FBC3, 0xA28AFBC3, 0xA28BFBC3, 0xA28CFBC3, 0xA28DFBC3, 0xA28EFBC3, 0xA28FFBC3, 0xA290FBC3, 0xA291FBC3, 0xA292FBC3, 0xA293FBC3, 0xA294FBC3, 0xA295FBC3, 0xA296FBC3, 0xA297FBC3, + 0xA298FBC3, 0xA299FBC3, 0xA29AFBC3, 0xA29BFBC3, 0xA29CFBC3, 0xA29DFBC3, 0xA29EFBC3, 0xA29FFBC3, 0xA2A0FBC3, 0xA2A1FBC3, 0xA2A2FBC3, 0xA2A3FBC3, 0xA2A4FBC3, 0xA2A5FBC3, 0xA2A6FBC3, + 0xA2A7FBC3, 0xA2A8FBC3, 0xA2A9FBC3, 0xA2AAFBC3, 0xA2ABFBC3, 0xA2ACFBC3, 0xA2ADFBC3, 0xA2AEFBC3, 0xA2AFFBC3, 0xA2B0FBC3, 0xA2B1FBC3, 0xA2B2FBC3, 0xA2B3FBC3, 0xA2B4FBC3, 0xA2B5FBC3, + 0xA2B6FBC3, 0xA2B7FBC3, 0xA2B8FBC3, 0xA2B9FBC3, 0xA2BAFBC3, 0xA2BBFBC3, 0xA2BCFBC3, 0xA2BDFBC3, 0xA2BEFBC3, 0xA2BFFBC3, 0xA2C0FBC3, 0xA2C1FBC3, 0xA2C2FBC3, 0xA2C3FBC3, 0xA2C4FBC3, + 0xA2C5FBC3, 0xA2C6FBC3, 0xA2C7FBC3, 0xA2C8FBC3, 0xA2C9FBC3, 0xA2CAFBC3, 0xA2CBFBC3, 0xA2CCFBC3, 0xA2CDFBC3, 0xA2CEFBC3, 0xA2CFFBC3, 0xA2D0FBC3, 0xA2D1FBC3, 0xA2D2FBC3, 0xA2D3FBC3, + 0xA2D4FBC3, 0xA2D5FBC3, 0xA2D6FBC3, 0xA2D7FBC3, 0xA2D8FBC3, 0xA2D9FBC3, 0xA2DAFBC3, 0xA2DBFBC3, 0xA2DCFBC3, 0xA2DDFBC3, 0xA2DEFBC3, 0xA2DFFBC3, 0xA2E0FBC3, 0xA2E1FBC3, 0xA2E2FBC3, + 0xA2E3FBC3, 0xA2E4FBC3, 0xA2E5FBC3, 0xA2E6FBC3, 0xA2E7FBC3, 0xA2E8FBC3, 0xA2E9FBC3, 0xA2EAFBC3, 0xA2EBFBC3, 0xA2ECFBC3, 0xA2EDFBC3, 0xA2EEFBC3, 0xA2EFFBC3, 0xA2F0FBC3, 0xA2F1FBC3, + 0xA2F2FBC3, 0xA2F3FBC3, 0xA2F4FBC3, 0xA2F5FBC3, 0xA2F6FBC3, 0xA2F7FBC3, 0xA2F8FBC3, 0xA2F9FBC3, 0xA2FAFBC3, 0xA2FBFBC3, 0xA2FCFBC3, 0xA2FDFBC3, 0xA2FEFBC3, 0xA2FFFBC3, 0xA300FBC3, + 0xA301FBC3, 0xA302FBC3, 0xA303FBC3, 0xA304FBC3, 0xA305FBC3, 0xA306FBC3, 0xA307FBC3, 0xA308FBC3, 0xA309FBC3, 0xA30AFBC3, 0xA30BFBC3, 0xA30CFBC3, 0xA30DFBC3, 0xA30EFBC3, 0xA30FFBC3, + 0xA310FBC3, 0xA311FBC3, 0xA312FBC3, 0xA313FBC3, 0xA314FBC3, 0xA315FBC3, 0xA316FBC3, 0xA317FBC3, 0xA318FBC3, 0xA319FBC3, 0xA31AFBC3, 0xA31BFBC3, 0xA31CFBC3, 0xA31DFBC3, 0xA31EFBC3, + 0xA31FFBC3, 0xA320FBC3, 0xA321FBC3, 0xA322FBC3, 0xA323FBC3, 0xA324FBC3, 0xA325FBC3, 0xA326FBC3, 0xA327FBC3, 0xA328FBC3, 0xA329FBC3, 0xA32AFBC3, 0xA32BFBC3, 0xA32CFBC3, 0xA32DFBC3, + 0xA32EFBC3, 0xA32FFBC3, 0xA330FBC3, 0xA331FBC3, 0xA332FBC3, 0xA333FBC3, 0xA334FBC3, 0xA335FBC3, 0xA336FBC3, 0xA337FBC3, 0xA338FBC3, 0xA339FBC3, 0xA33AFBC3, 0xA33BFBC3, 0xA33CFBC3, + 0xA33DFBC3, 0xA33EFBC3, 0xA33FFBC3, 0xA340FBC3, 0xA341FBC3, 0xA342FBC3, 0xA343FBC3, 0xA344FBC3, 0xA345FBC3, 0xA346FBC3, 0xA347FBC3, 0xA348FBC3, 0xA349FBC3, 0xA34AFBC3, 0xA34BFBC3, + 0xA34CFBC3, 0xA34DFBC3, 0xA34EFBC3, 0xA34FFBC3, 0xA350FBC3, 0xA351FBC3, 0xA352FBC3, 0xA353FBC3, 0xA354FBC3, 0xA355FBC3, 0xA356FBC3, 0xA357FBC3, 0xA358FBC3, 0xA359FBC3, 0xA35AFBC3, + 0xA35BFBC3, 0xA35CFBC3, 0xA35DFBC3, 0xA35EFBC3, 0xA35FFBC3, 0xA360FBC3, 0xA361FBC3, 0xA362FBC3, 0xA363FBC3, 0xA364FBC3, 0xA365FBC3, 0xA366FBC3, 0xA367FBC3, 0xA368FBC3, 0xA369FBC3, + 0xA36AFBC3, 0xA36BFBC3, 0xA36CFBC3, 0xA36DFBC3, 0xA36EFBC3, 0xA36FFBC3, 0xA370FBC3, 0xA371FBC3, 0xA372FBC3, 0xA373FBC3, 0xA374FBC3, 0xA375FBC3, 0xA376FBC3, 0xA377FBC3, 0xA378FBC3, + 0xA379FBC3, 0xA37AFBC3, 0xA37BFBC3, 0xA37CFBC3, 0xA37DFBC3, 0xA37EFBC3, 0xA37FFBC3, 0xA380FBC3, 0xA381FBC3, 0xA382FBC3, 0xA383FBC3, 0xA384FBC3, 0xA385FBC3, 0xA386FBC3, 0xA387FBC3, + 0xA388FBC3, 0xA389FBC3, 0xA38AFBC3, 0xA38BFBC3, 0xA38CFBC3, 0xA38DFBC3, 0xA38EFBC3, 0xA38FFBC3, 0xA390FBC3, 0xA391FBC3, 0xA392FBC3, 0xA393FBC3, 0xA394FBC3, 0xA395FBC3, 0xA396FBC3, + 0xA397FBC3, 0xA398FBC3, 0xA399FBC3, 0xA39AFBC3, 0xA39BFBC3, 0xA39CFBC3, 0xA39DFBC3, 0xA39EFBC3, 0xA39FFBC3, 0xA3A0FBC3, 0xA3A1FBC3, 0xA3A2FBC3, 0xA3A3FBC3, 0xA3A4FBC3, 0xA3A5FBC3, + 0xA3A6FBC3, 0xA3A7FBC3, 0xA3A8FBC3, 0xA3A9FBC3, 0xA3AAFBC3, 0xA3ABFBC3, 0xA3ACFBC3, 0xA3ADFBC3, 0xA3AEFBC3, 0xA3AFFBC3, 0xA3B0FBC3, 0xA3B1FBC3, 0xA3B2FBC3, 0xA3B3FBC3, 0xA3B4FBC3, + 0xA3B5FBC3, 0xA3B6FBC3, 0xA3B7FBC3, 0xA3B8FBC3, 0xA3B9FBC3, 0xA3BAFBC3, 0xA3BBFBC3, 0xA3BCFBC3, 0xA3BDFBC3, 0xA3BEFBC3, 0xA3BFFBC3, 0xA3C0FBC3, 0xA3C1FBC3, 0xA3C2FBC3, 0xA3C3FBC3, + 0xA3C4FBC3, 0xA3C5FBC3, 0xA3C6FBC3, 0xA3C7FBC3, 0xA3C8FBC3, 0xA3C9FBC3, 0xA3CAFBC3, 0xA3CBFBC3, 0xA3CCFBC3, 0xA3CDFBC3, 0xA3CEFBC3, 0xA3CFFBC3, 0xA3D0FBC3, 0xA3D1FBC3, 0xA3D2FBC3, + 0xA3D3FBC3, 0xA3D4FBC3, 0xA3D5FBC3, 0xA3D6FBC3, 0xA3D7FBC3, 0xA3D8FBC3, 0xA3D9FBC3, 0xA3DAFBC3, 0xA3DBFBC3, 0xA3DCFBC3, 0xA3DDFBC3, 0xA3DEFBC3, 0xA3DFFBC3, 0xA3E0FBC3, 0xA3E1FBC3, + 0xA3E2FBC3, 0xA3E3FBC3, 0xA3E4FBC3, 0xA3E5FBC3, 0xA3E6FBC3, 0xA3E7FBC3, 0xA3E8FBC3, 0xA3E9FBC3, 0xA3EAFBC3, 0xA3EBFBC3, 0xA3ECFBC3, 0xA3EDFBC3, 0xA3EEFBC3, 0xA3EFFBC3, 0xA3F0FBC3, + 0xA3F1FBC3, 0xA3F2FBC3, 0xA3F3FBC3, 0xA3F4FBC3, 0xA3F5FBC3, 0xA3F6FBC3, 0xA3F7FBC3, 0xA3F8FBC3, 0xA3F9FBC3, 0xA3FAFBC3, 0xA3FBFBC3, 0xA3FCFBC3, 0xA3FDFBC3, 0xA3FEFBC3, 0xA3FFFBC3, + 0xA400FBC3, 0xA401FBC3, 0xA402FBC3, 0xA403FBC3, 0xA404FBC3, 0xA405FBC3, 0xA406FBC3, 0xA407FBC3, 0xA408FBC3, 0xA409FBC3, 0xA40AFBC3, 0xA40BFBC3, 0xA40CFBC3, 0xA40DFBC3, 0xA40EFBC3, + 0xA40FFBC3, 0xA410FBC3, 0xA411FBC3, 0xA412FBC3, 0xA413FBC3, 0xA414FBC3, 0xA415FBC3, 0xA416FBC3, 0xA417FBC3, 0xA418FBC3, 0xA419FBC3, 0xA41AFBC3, 0xA41BFBC3, 0xA41CFBC3, 0xA41DFBC3, + 0xA41EFBC3, 0xA41FFBC3, 0xA420FBC3, 0xA421FBC3, 0xA422FBC3, 0xA423FBC3, 0xA424FBC3, 0xA425FBC3, 0xA426FBC3, 0xA427FBC3, 0xA428FBC3, 0xA429FBC3, 0xA42AFBC3, 0xA42BFBC3, 0xA42CFBC3, + 0xA42DFBC3, 0xA42EFBC3, 0xA42FFBC3, 0xA430FBC3, 0xA431FBC3, 0xA432FBC3, 0xA433FBC3, 0xA434FBC3, 0xA435FBC3, 0xA436FBC3, 0xA437FBC3, 0xA438FBC3, 0xA439FBC3, 0xA43AFBC3, 0xA43BFBC3, + 0xA43CFBC3, 0xA43DFBC3, 0xA43EFBC3, 0xA43FFBC3, 0xA440FBC3, 0xA441FBC3, 0xA442FBC3, 0xA443FBC3, 0xA444FBC3, 0xA445FBC3, 0xA446FBC3, 0xA447FBC3, 0xA448FBC3, 0xA449FBC3, 0xA44AFBC3, + 0xA44BFBC3, 0xA44CFBC3, 0xA44DFBC3, 0xA44EFBC3, 0xA44FFBC3, 0xA450FBC3, 0xA451FBC3, 0xA452FBC3, 0xA453FBC3, 0xA454FBC3, 0xA455FBC3, 0xA456FBC3, 0xA457FBC3, 0xA458FBC3, 0xA459FBC3, + 0xA45AFBC3, 0xA45BFBC3, 0xA45CFBC3, 0xA45DFBC3, 0xA45EFBC3, 0xA45FFBC3, 0xA460FBC3, 0xA461FBC3, 0xA462FBC3, 0xA463FBC3, 0xA464FBC3, 0xA465FBC3, 0xA466FBC3, 0xA467FBC3, 0xA468FBC3, + 0xA469FBC3, 0xA46AFBC3, 0xA46BFBC3, 0xA46CFBC3, 0xA46DFBC3, 0xA46EFBC3, 0xA46FFBC3, 0xA470FBC3, 0xA471FBC3, 0xA472FBC3, 0xA473FBC3, 0xA474FBC3, 0xA475FBC3, 0xA476FBC3, 0xA477FBC3, + 0xA478FBC3, 0xA479FBC3, 0xA47AFBC3, 0xA47BFBC3, 0xA47CFBC3, 0xA47DFBC3, 0xA47EFBC3, 0xA47FFBC3, 0xA480FBC3, 0xA481FBC3, 0xA482FBC3, 0xA483FBC3, 0xA484FBC3, 0xA485FBC3, 0xA486FBC3, + 0xA487FBC3, 0xA488FBC3, 0xA489FBC3, 0xA48AFBC3, 0xA48BFBC3, 0xA48CFBC3, 0xA48DFBC3, 0xA48EFBC3, 0xA48FFBC3, 0xA490FBC3, 0xA491FBC3, 0xA492FBC3, 0xA493FBC3, 0xA494FBC3, 0xA495FBC3, + 0xA496FBC3, 0xA497FBC3, 0xA498FBC3, 0xA499FBC3, 0xA49AFBC3, 0xA49BFBC3, 0xA49CFBC3, 0xA49DFBC3, 0xA49EFBC3, 0xA49FFBC3, 0xA4A0FBC3, 0xA4A1FBC3, 0xA4A2FBC3, 0xA4A3FBC3, 0xA4A4FBC3, + 0xA4A5FBC3, 0xA4A6FBC3, 0xA4A7FBC3, 0xA4A8FBC3, 0xA4A9FBC3, 0xA4AAFBC3, 0xA4ABFBC3, 0xA4ACFBC3, 0xA4ADFBC3, 0xA4AEFBC3, 0xA4AFFBC3, 0xA4B0FBC3, 0xA4B1FBC3, 0xA4B2FBC3, 0xA4B3FBC3, + 0xA4B4FBC3, 0xA4B5FBC3, 0xA4B6FBC3, 0xA4B7FBC3, 0xA4B8FBC3, 0xA4B9FBC3, 0xA4BAFBC3, 0xA4BBFBC3, 0xA4BCFBC3, 0xA4BDFBC3, 0xA4BEFBC3, 0xA4BFFBC3, 0xA4C0FBC3, 0xA4C1FBC3, 0xA4C2FBC3, + 0xA4C3FBC3, 0xA4C4FBC3, 0xA4C5FBC3, 0xA4C6FBC3, 0xA4C7FBC3, 0xA4C8FBC3, 0xA4C9FBC3, 0xA4CAFBC3, 0xA4CBFBC3, 0xA4CCFBC3, 0xA4CDFBC3, 0xA4CEFBC3, 0xA4CFFBC3, 0xA4D0FBC3, 0xA4D1FBC3, + 0xA4D2FBC3, 0xA4D3FBC3, 0xA4D4FBC3, 0xA4D5FBC3, 0xA4D6FBC3, 0xA4D7FBC3, 0xA4D8FBC3, 0xA4D9FBC3, 0xA4DAFBC3, 0xA4DBFBC3, 0xA4DCFBC3, 0xA4DDFBC3, 0xA4DEFBC3, 0xA4DFFBC3, 0xA4E0FBC3, + 0xA4E1FBC3, 0xA4E2FBC3, 0xA4E3FBC3, 0xA4E4FBC3, 0xA4E5FBC3, 0xA4E6FBC3, 0xA4E7FBC3, 0xA4E8FBC3, 0xA4E9FBC3, 0xA4EAFBC3, 0xA4EBFBC3, 0xA4ECFBC3, 0xA4EDFBC3, 0xA4EEFBC3, 0xA4EFFBC3, + 0xA4F0FBC3, 0xA4F1FBC3, 0xA4F2FBC3, 0xA4F3FBC3, 0xA4F4FBC3, 0xA4F5FBC3, 0xA4F6FBC3, 0xA4F7FBC3, 0xA4F8FBC3, 0xA4F9FBC3, 0xA4FAFBC3, 0xA4FBFBC3, 0xA4FCFBC3, 0xA4FDFBC3, 0xA4FEFBC3, + 0xA4FFFBC3, 0xA500FBC3, 0xA501FBC3, 0xA502FBC3, 0xA503FBC3, 0xA504FBC3, 0xA505FBC3, 0xA506FBC3, 0xA507FBC3, 0xA508FBC3, 0xA509FBC3, 0xA50AFBC3, 0xA50BFBC3, 0xA50CFBC3, 0xA50DFBC3, + 0xA50EFBC3, 0xA50FFBC3, 0xA510FBC3, 0xA511FBC3, 0xA512FBC3, 0xA513FBC3, 0xA514FBC3, 0xA515FBC3, 0xA516FBC3, 0xA517FBC3, 0xA518FBC3, 0xA519FBC3, 0xA51AFBC3, 0xA51BFBC3, 0xA51CFBC3, + 0xA51DFBC3, 0xA51EFBC3, 0xA51FFBC3, 0xA520FBC3, 0xA521FBC3, 0xA522FBC3, 0xA523FBC3, 0xA524FBC3, 0xA525FBC3, 0xA526FBC3, 0xA527FBC3, 0xA528FBC3, 0xA529FBC3, 0xA52AFBC3, 0xA52BFBC3, + 0xA52CFBC3, 0xA52DFBC3, 0xA52EFBC3, 0xA52FFBC3, 0xA530FBC3, 0xA531FBC3, 0xA532FBC3, 0xA533FBC3, 0xA534FBC3, 0xA535FBC3, 0xA536FBC3, 0xA537FBC3, 0xA538FBC3, 0xA539FBC3, 0xA53AFBC3, + 0xA53BFBC3, 0xA53CFBC3, 0xA53DFBC3, 0xA53EFBC3, 0xA53FFBC3, 0xA540FBC3, 0xA541FBC3, 0xA542FBC3, 0xA543FBC3, 0xA544FBC3, 0xA545FBC3, 0xA546FBC3, 0xA547FBC3, 0xA548FBC3, 0xA549FBC3, + 0xA54AFBC3, 0xA54BFBC3, 0xA54CFBC3, 0xA54DFBC3, 0xA54EFBC3, 0xA54FFBC3, 0xA550FBC3, 0xA551FBC3, 0xA552FBC3, 0xA553FBC3, 0xA554FBC3, 0xA555FBC3, 0xA556FBC3, 0xA557FBC3, 0xA558FBC3, + 0xA559FBC3, 0xA55AFBC3, 0xA55BFBC3, 0xA55CFBC3, 0xA55DFBC3, 0xA55EFBC3, 0xA55FFBC3, 0xA560FBC3, 0xA561FBC3, 0xA562FBC3, 0xA563FBC3, 0xA564FBC3, 0xA565FBC3, 0xA566FBC3, 0xA567FBC3, + 0xA568FBC3, 0xA569FBC3, 0xA56AFBC3, 0xA56BFBC3, 0xA56CFBC3, 0xA56DFBC3, 0xA56EFBC3, 0xA56FFBC3, 0xA570FBC3, 0xA571FBC3, 0xA572FBC3, 0xA573FBC3, 0xA574FBC3, 0xA575FBC3, 0xA576FBC3, + 0xA577FBC3, 0xA578FBC3, 0xA579FBC3, 0xA57AFBC3, 0xA57BFBC3, 0xA57CFBC3, 0xA57DFBC3, 0xA57EFBC3, 0xA57FFBC3, 0xA580FBC3, 0xA581FBC3, 0xA582FBC3, 0xA583FBC3, 0xA584FBC3, 0xA585FBC3, + 0xA586FBC3, 0xA587FBC3, 0xA588FBC3, 0xA589FBC3, 0xA58AFBC3, 0xA58BFBC3, 0xA58CFBC3, 0xA58DFBC3, 0xA58EFBC3, 0xA58FFBC3, 0xA590FBC3, 0xA591FBC3, 0xA592FBC3, 0xA593FBC3, 0xA594FBC3, + 0xA595FBC3, 0xA596FBC3, 0xA597FBC3, 0xA598FBC3, 0xA599FBC3, 0xA59AFBC3, 0xA59BFBC3, 0xA59CFBC3, 0xA59DFBC3, 0xA59EFBC3, 0xA59FFBC3, 0xA5A0FBC3, 0xA5A1FBC3, 0xA5A2FBC3, 0xA5A3FBC3, + 0xA5A4FBC3, 0xA5A5FBC3, 0xA5A6FBC3, 0xA5A7FBC3, 0xA5A8FBC3, 0xA5A9FBC3, 0xA5AAFBC3, 0xA5ABFBC3, 0xA5ACFBC3, 0xA5ADFBC3, 0xA5AEFBC3, 0xA5AFFBC3, 0xA5B0FBC3, 0xA5B1FBC3, 0xA5B2FBC3, + 0xA5B3FBC3, 0xA5B4FBC3, 0xA5B5FBC3, 0xA5B6FBC3, 0xA5B7FBC3, 0xA5B8FBC3, 0xA5B9FBC3, 0xA5BAFBC3, 0xA5BBFBC3, 0xA5BCFBC3, 0xA5BDFBC3, 0xA5BEFBC3, 0xA5BFFBC3, 0xA5C0FBC3, 0xA5C1FBC3, + 0xA5C2FBC3, 0xA5C3FBC3, 0xA5C4FBC3, 0xA5C5FBC3, 0xA5C6FBC3, 0xA5C7FBC3, 0xA5C8FBC3, 0xA5C9FBC3, 0xA5CAFBC3, 0xA5CBFBC3, 0xA5CCFBC3, 0xA5CDFBC3, 0xA5CEFBC3, 0xA5CFFBC3, 0xA5D0FBC3, + 0xA5D1FBC3, 0xA5D2FBC3, 0xA5D3FBC3, 0xA5D4FBC3, 0xA5D5FBC3, 0xA5D6FBC3, 0xA5D7FBC3, 0xA5D8FBC3, 0xA5D9FBC3, 0xA5DAFBC3, 0xA5DBFBC3, 0xA5DCFBC3, 0xA5DDFBC3, 0xA5DEFBC3, 0xA5DFFBC3, + 0xA5E0FBC3, 0xA5E1FBC3, 0xA5E2FBC3, 0xA5E3FBC3, 0xA5E4FBC3, 0xA5E5FBC3, 0xA5E6FBC3, 0xA5E7FBC3, 0xA5E8FBC3, 0xA5E9FBC3, 0xA5EAFBC3, 0xA5EBFBC3, 0xA5ECFBC3, 0xA5EDFBC3, 0xA5EEFBC3, + 0xA5EFFBC3, 0xA5F0FBC3, 0xA5F1FBC3, 0xA5F2FBC3, 0xA5F3FBC3, 0xA5F4FBC3, 0xA5F5FBC3, 0xA5F6FBC3, 0xA5F7FBC3, 0xA5F8FBC3, 0xA5F9FBC3, 0xA5FAFBC3, 0xA5FBFBC3, 0xA5FCFBC3, 0xA5FDFBC3, + 0xA5FEFBC3, 0xA5FFFBC3, 0xA600FBC3, 0xA601FBC3, 0xA602FBC3, 0xA603FBC3, 0xA604FBC3, 0xA605FBC3, 0xA606FBC3, 0xA607FBC3, 0xA608FBC3, 0xA609FBC3, 0xA60AFBC3, 0xA60BFBC3, 0xA60CFBC3, + 0xA60DFBC3, 0xA60EFBC3, 0xA60FFBC3, 0xA610FBC3, 0xA611FBC3, 0xA612FBC3, 0xA613FBC3, 0xA614FBC3, 0xA615FBC3, 0xA616FBC3, 0xA617FBC3, 0xA618FBC3, 0xA619FBC3, 0xA61AFBC3, 0xA61BFBC3, + 0xA61CFBC3, 0xA61DFBC3, 0xA61EFBC3, 0xA61FFBC3, 0xA620FBC3, 0xA621FBC3, 0xA622FBC3, 0xA623FBC3, 0xA624FBC3, 0xA625FBC3, 0xA626FBC3, 0xA627FBC3, 0xA628FBC3, 0xA629FBC3, 0xA62AFBC3, + 0xA62BFBC3, 0xA62CFBC3, 0xA62DFBC3, 0xA62EFBC3, 0xA62FFBC3, 0xA630FBC3, 0xA631FBC3, 0xA632FBC3, 0xA633FBC3, 0xA634FBC3, 0xA635FBC3, 0xA636FBC3, 0xA637FBC3, 0xA638FBC3, 0xA639FBC3, + 0xA63AFBC3, 0xA63BFBC3, 0xA63CFBC3, 0xA63DFBC3, 0xA63EFBC3, 0xA63FFBC3, 0xA640FBC3, 0xA641FBC3, 0xA642FBC3, 0xA643FBC3, 0xA644FBC3, 0xA645FBC3, 0xA646FBC3, 0xA647FBC3, 0xA648FBC3, + 0xA649FBC3, 0xA64AFBC3, 0xA64BFBC3, 0xA64CFBC3, 0xA64DFBC3, 0xA64EFBC3, 0xA64FFBC3, 0xA650FBC3, 0xA651FBC3, 0xA652FBC3, 0xA653FBC3, 0xA654FBC3, 0xA655FBC3, 0xA656FBC3, 0xA657FBC3, + 0xA658FBC3, 0xA659FBC3, 0xA65AFBC3, 0xA65BFBC3, 0xA65CFBC3, 0xA65DFBC3, 0xA65EFBC3, 0xA65FFBC3, 0xA660FBC3, 0xA661FBC3, 0xA662FBC3, 0xA663FBC3, 0xA664FBC3, 0xA665FBC3, 0xA666FBC3, + 0xA667FBC3, 0xA668FBC3, 0xA669FBC3, 0xA66AFBC3, 0xA66BFBC3, 0xA66CFBC3, 0xA66DFBC3, 0xA66EFBC3, 0xA66FFBC3, 0xA670FBC3, 0xA671FBC3, 0xA672FBC3, 0xA673FBC3, 0xA674FBC3, 0xA675FBC3, + 0xA676FBC3, 0xA677FBC3, 0xA678FBC3, 0xA679FBC3, 0xA67AFBC3, 0xA67BFBC3, 0xA67CFBC3, 0xA67DFBC3, 0xA67EFBC3, 0xA67FFBC3, 0xA680FBC3, 0xA681FBC3, 0xA682FBC3, 0xA683FBC3, 0xA684FBC3, + 0xA685FBC3, 0xA686FBC3, 0xA687FBC3, 0xA688FBC3, 0xA689FBC3, 0xA68AFBC3, 0xA68BFBC3, 0xA68CFBC3, 0xA68DFBC3, 0xA68EFBC3, 0xA68FFBC3, 0xA690FBC3, 0xA691FBC3, 0xA692FBC3, 0xA693FBC3, + 0xA694FBC3, 0xA695FBC3, 0xA696FBC3, 0xA697FBC3, 0xA698FBC3, 0xA699FBC3, 0xA69AFBC3, 0xA69BFBC3, 0xA69CFBC3, 0xA69DFBC3, 0xA69EFBC3, 0xA69FFBC3, 0xA6A0FBC3, 0xA6A1FBC3, 0xA6A2FBC3, + 0xA6A3FBC3, 0xA6A4FBC3, 0xA6A5FBC3, 0xA6A6FBC3, 0xA6A7FBC3, 0xA6A8FBC3, 0xA6A9FBC3, 0xA6AAFBC3, 0xA6ABFBC3, 0xA6ACFBC3, 0xA6ADFBC3, 0xA6AEFBC3, 0xA6AFFBC3, 0xA6B0FBC3, 0xA6B1FBC3, + 0xA6B2FBC3, 0xA6B3FBC3, 0xA6B4FBC3, 0xA6B5FBC3, 0xA6B6FBC3, 0xA6B7FBC3, 0xA6B8FBC3, 0xA6B9FBC3, 0xA6BAFBC3, 0xA6BBFBC3, 0xA6BCFBC3, 0xA6BDFBC3, 0xA6BEFBC3, 0xA6BFFBC3, 0xA6C0FBC3, + 0xA6C1FBC3, 0xA6C2FBC3, 0xA6C3FBC3, 0xA6C4FBC3, 0xA6C5FBC3, 0xA6C6FBC3, 0xA6C7FBC3, 0xA6C8FBC3, 0xA6C9FBC3, 0xA6CAFBC3, 0xA6CBFBC3, 0xA6CCFBC3, 0xA6CDFBC3, 0xA6CEFBC3, 0xA6CFFBC3, + 0xA6D0FBC3, 0xA6D1FBC3, 0xA6D2FBC3, 0xA6D3FBC3, 0xA6D4FBC3, 0xA6D5FBC3, 0xA6D6FBC3, 0xA6D7FBC3, 0xA6D8FBC3, 0xA6D9FBC3, 0xA6DAFBC3, 0xA6DBFBC3, 0xA6DCFBC3, 0xA6DDFBC3, 0xA6DEFBC3, + 0xA6DFFBC3, 0xA6E0FBC3, 0xA6E1FBC3, 0xA6E2FBC3, 0xA6E3FBC3, 0xA6E4FBC3, 0xA6E5FBC3, 0xA6E6FBC3, 0xA6E7FBC3, 0xA6E8FBC3, 0xA6E9FBC3, 0xA6EAFBC3, 0xA6EBFBC3, 0xA6ECFBC3, 0xA6EDFBC3, + 0xA6EEFBC3, 0xA6EFFBC3, 0xA6F0FBC3, 0xA6F1FBC3, 0xA6F2FBC3, 0xA6F3FBC3, 0xA6F4FBC3, 0xA6F5FBC3, 0xA6F6FBC3, 0xA6F7FBC3, 0xA6F8FBC3, 0xA6F9FBC3, 0xA6FAFBC3, 0xA6FBFBC3, 0xA6FCFBC3, + 0xA6FDFBC3, 0xA6FEFBC3, 0xA6FFFBC3, 0xA700FBC3, 0xA701FBC3, 0xA702FBC3, 0xA703FBC3, 0xA704FBC3, 0xA705FBC3, 0xA706FBC3, 0xA707FBC3, 0xA708FBC3, 0xA709FBC3, 0xA70AFBC3, 0xA70BFBC3, + 0xA70CFBC3, 0xA70DFBC3, 0xA70EFBC3, 0xA70FFBC3, 0xA710FBC3, 0xA711FBC3, 0xA712FBC3, 0xA713FBC3, 0xA714FBC3, 0xA715FBC3, 0xA716FBC3, 0xA717FBC3, 0xA718FBC3, 0xA719FBC3, 0xA71AFBC3, + 0xA71BFBC3, 0xA71CFBC3, 0xA71DFBC3, 0xA71EFBC3, 0xA71FFBC3, 0xA720FBC3, 0xA721FBC3, 0xA722FBC3, 0xA723FBC3, 0xA724FBC3, 0xA725FBC3, 0xA726FBC3, 0xA727FBC3, 0xA728FBC3, 0xA729FBC3, + 0xA72AFBC3, 0xA72BFBC3, 0xA72CFBC3, 0xA72DFBC3, 0xA72EFBC3, 0xA72FFBC3, 0xA730FBC3, 0xA731FBC3, 0xA732FBC3, 0xA733FBC3, 0xA734FBC3, 0xA735FBC3, 0xA736FBC3, 0xA737FBC3, 0xA738FBC3, + 0xA739FBC3, 0xA73AFBC3, 0xA73BFBC3, 0xA73CFBC3, 0xA73DFBC3, 0xA73EFBC3, 0xA73FFBC3, 0xA740FBC3, 0xA741FBC3, 0xA742FBC3, 0xA743FBC3, 0xA744FBC3, 0xA745FBC3, 0xA746FBC3, 0xA747FBC3, + 0xA748FBC3, 0xA749FBC3, 0xA74AFBC3, 0xA74BFBC3, 0xA74CFBC3, 0xA74DFBC3, 0xA74EFBC3, 0xA74FFBC3, 0xA750FBC3, 0xA751FBC3, 0xA752FBC3, 0xA753FBC3, 0xA754FBC3, 0xA755FBC3, 0xA756FBC3, + 0xA757FBC3, 0xA758FBC3, 0xA759FBC3, 0xA75AFBC3, 0xA75BFBC3, 0xA75CFBC3, 0xA75DFBC3, 0xA75EFBC3, 0xA75FFBC3, 0xA760FBC3, 0xA761FBC3, 0xA762FBC3, 0xA763FBC3, 0xA764FBC3, 0xA765FBC3, + 0xA766FBC3, 0xA767FBC3, 0xA768FBC3, 0xA769FBC3, 0xA76AFBC3, 0xA76BFBC3, 0xA76CFBC3, 0xA76DFBC3, 0xA76EFBC3, 0xA76FFBC3, 0xA770FBC3, 0xA771FBC3, 0xA772FBC3, 0xA773FBC3, 0xA774FBC3, + 0xA775FBC3, 0xA776FBC3, 0xA777FBC3, 0xA778FBC3, 0xA779FBC3, 0xA77AFBC3, 0xA77BFBC3, 0xA77CFBC3, 0xA77DFBC3, 0xA77EFBC3, 0xA77FFBC3, 0xA780FBC3, 0xA781FBC3, 0xA782FBC3, 0xA783FBC3, + 0xA784FBC3, 0xA785FBC3, 0xA786FBC3, 0xA787FBC3, 0xA788FBC3, 0xA789FBC3, 0xA78AFBC3, 0xA78BFBC3, 0xA78CFBC3, 0xA78DFBC3, 0xA78EFBC3, 0xA78FFBC3, 0xA790FBC3, 0xA791FBC3, 0xA792FBC3, + 0xA793FBC3, 0xA794FBC3, 0xA795FBC3, 0xA796FBC3, 0xA797FBC3, 0xA798FBC3, 0xA799FBC3, 0xA79AFBC3, 0xA79BFBC3, 0xA79CFBC3, 0xA79DFBC3, 0xA79EFBC3, 0xA79FFBC3, 0xA7A0FBC3, 0xA7A1FBC3, + 0xA7A2FBC3, 0xA7A3FBC3, 0xA7A4FBC3, 0xA7A5FBC3, 0xA7A6FBC3, 0xA7A7FBC3, 0xA7A8FBC3, 0xA7A9FBC3, 0xA7AAFBC3, 0xA7ABFBC3, 0xA7ACFBC3, 0xA7ADFBC3, 0xA7AEFBC3, 0xA7AFFBC3, 0xA7B0FBC3, + 0xA7B1FBC3, 0xA7B2FBC3, 0xA7B3FBC3, 0xA7B4FBC3, 0xA7B5FBC3, 0xA7B6FBC3, 0xA7B7FBC3, 0xA7B8FBC3, 0xA7B9FBC3, 0xA7BAFBC3, 0xA7BBFBC3, 0xA7BCFBC3, 0xA7BDFBC3, 0xA7BEFBC3, 0xA7BFFBC3, + 0xA7C0FBC3, 0xA7C1FBC3, 0xA7C2FBC3, 0xA7C3FBC3, 0xA7C4FBC3, 0xA7C5FBC3, 0xA7C6FBC3, 0xA7C7FBC3, 0xA7C8FBC3, 0xA7C9FBC3, 0xA7CAFBC3, 0xA7CBFBC3, 0xA7CCFBC3, 0xA7CDFBC3, 0xA7CEFBC3, + 0xA7CFFBC3, 0xA7D0FBC3, 0xA7D1FBC3, 0xA7D2FBC3, 0xA7D3FBC3, 0xA7D4FBC3, 0xA7D5FBC3, 0xA7D6FBC3, 0xA7D7FBC3, 0xA7D8FBC3, 0xA7D9FBC3, 0xA7DAFBC3, 0xA7DBFBC3, 0xA7DCFBC3, 0xA7DDFBC3, + 0xA7DEFBC3, 0xA7DFFBC3, 0xA7E0FBC3, 0xA7E1FBC3, 0xA7E2FBC3, 0xA7E3FBC3, 0xA7E4FBC3, 0xA7E5FBC3, 0xA7E6FBC3, 0xA7E7FBC3, 0xA7E8FBC3, 0xA7E9FBC3, 0xA7EAFBC3, 0xA7EBFBC3, 0xA7ECFBC3, + 0xA7EDFBC3, 0xA7EEFBC3, 0xA7EFFBC3, 0xA7F0FBC3, 0xA7F1FBC3, 0xA7F2FBC3, 0xA7F3FBC3, 0xA7F4FBC3, 0xA7F5FBC3, 0xA7F6FBC3, 0xA7F7FBC3, 0xA7F8FBC3, 0xA7F9FBC3, 0xA7FAFBC3, 0xA7FBFBC3, + 0xA7FCFBC3, 0xA7FDFBC3, 0xA7FEFBC3, 0xA7FFFBC3, 0xA800FBC3, 0xA801FBC3, 0xA802FBC3, 0xA803FBC3, 0xA804FBC3, 0xA805FBC3, 0xA806FBC3, 0xA807FBC3, 0xA808FBC3, 0xA809FBC3, 0xA80AFBC3, + 0xA80BFBC3, 0xA80CFBC3, 0xA80DFBC3, 0xA80EFBC3, 0xA80FFBC3, 0xA810FBC3, 0xA811FBC3, 0xA812FBC3, 0xA813FBC3, 0xA814FBC3, 0xA815FBC3, 0xA816FBC3, 0xA817FBC3, 0xA818FBC3, 0xA819FBC3, + 0xA81AFBC3, 0xA81BFBC3, 0xA81CFBC3, 0xA81DFBC3, 0xA81EFBC3, 0xA81FFBC3, 0xA820FBC3, 0xA821FBC3, 0xA822FBC3, 0xA823FBC3, 0xA824FBC3, 0xA825FBC3, 0xA826FBC3, 0xA827FBC3, 0xA828FBC3, + 0xA829FBC3, 0xA82AFBC3, 0xA82BFBC3, 0xA82CFBC3, 0xA82DFBC3, 0xA82EFBC3, 0xA82FFBC3, 0xA830FBC3, 0xA831FBC3, 0xA832FBC3, 0xA833FBC3, 0xA834FBC3, 0xA835FBC3, 0xA836FBC3, 0xA837FBC3, + 0xA838FBC3, 0xA839FBC3, 0xA83AFBC3, 0xA83BFBC3, 0xA83CFBC3, 0xA83DFBC3, 0xA83EFBC3, 0xA83FFBC3, 0xA840FBC3, 0xA841FBC3, 0xA842FBC3, 0xA843FBC3, 0xA844FBC3, 0xA845FBC3, 0xA846FBC3, + 0xA847FBC3, 0xA848FBC3, 0xA849FBC3, 0xA84AFBC3, 0xA84BFBC3, 0xA84CFBC3, 0xA84DFBC3, 0xA84EFBC3, 0xA84FFBC3, 0xA850FBC3, 0xA851FBC3, 0xA852FBC3, 0xA853FBC3, 0xA854FBC3, 0xA855FBC3, + 0xA856FBC3, 0xA857FBC3, 0xA858FBC3, 0xA859FBC3, 0xA85AFBC3, 0xA85BFBC3, 0xA85CFBC3, 0xA85DFBC3, 0xA85EFBC3, 0xA85FFBC3, 0xA860FBC3, 0xA861FBC3, 0xA862FBC3, 0xA863FBC3, 0xA864FBC3, + 0xA865FBC3, 0xA866FBC3, 0xA867FBC3, 0xA868FBC3, 0xA869FBC3, 0xA86AFBC3, 0xA86BFBC3, 0xA86CFBC3, 0xA86DFBC3, 0xA86EFBC3, 0xA86FFBC3, 0xA870FBC3, 0xA871FBC3, 0xA872FBC3, 0xA873FBC3, + 0xA874FBC3, 0xA875FBC3, 0xA876FBC3, 0xA877FBC3, 0xA878FBC3, 0xA879FBC3, 0xA87AFBC3, 0xA87BFBC3, 0xA87CFBC3, 0xA87DFBC3, 0xA87EFBC3, 0xA87FFBC3, 0xA880FBC3, 0xA881FBC3, 0xA882FBC3, + 0xA883FBC3, 0xA884FBC3, 0xA885FBC3, 0xA886FBC3, 0xA887FBC3, 0xA888FBC3, 0xA889FBC3, 0xA88AFBC3, 0xA88BFBC3, 0xA88CFBC3, 0xA88DFBC3, 0xA88EFBC3, 0xA88FFBC3, 0xA890FBC3, 0xA891FBC3, + 0xA892FBC3, 0xA893FBC3, 0xA894FBC3, 0xA895FBC3, 0xA896FBC3, 0xA897FBC3, 0xA898FBC3, 0xA899FBC3, 0xA89AFBC3, 0xA89BFBC3, 0xA89CFBC3, 0xA89DFBC3, 0xA89EFBC3, 0xA89FFBC3, 0xA8A0FBC3, + 0xA8A1FBC3, 0xA8A2FBC3, 0xA8A3FBC3, 0xA8A4FBC3, 0xA8A5FBC3, 0xA8A6FBC3, 0xA8A7FBC3, 0xA8A8FBC3, 0xA8A9FBC3, 0xA8AAFBC3, 0xA8ABFBC3, 0xA8ACFBC3, 0xA8ADFBC3, 0xA8AEFBC3, 0xA8AFFBC3, + 0xA8B0FBC3, 0xA8B1FBC3, 0xA8B2FBC3, 0xA8B3FBC3, 0xA8B4FBC3, 0xA8B5FBC3, 0xA8B6FBC3, 0xA8B7FBC3, 0xA8B8FBC3, 0xA8B9FBC3, 0xA8BAFBC3, 0xA8BBFBC3, 0xA8BCFBC3, 0xA8BDFBC3, 0xA8BEFBC3, + 0xA8BFFBC3, 0xA8C0FBC3, 0xA8C1FBC3, 0xA8C2FBC3, 0xA8C3FBC3, 0xA8C4FBC3, 0xA8C5FBC3, 0xA8C6FBC3, 0xA8C7FBC3, 0xA8C8FBC3, 0xA8C9FBC3, 0xA8CAFBC3, 0xA8CBFBC3, 0xA8CCFBC3, 0xA8CDFBC3, + 0xA8CEFBC3, 0xA8CFFBC3, 0xA8D0FBC3, 0xA8D1FBC3, 0xA8D2FBC3, 0xA8D3FBC3, 0xA8D4FBC3, 0xA8D5FBC3, 0xA8D6FBC3, 0xA8D7FBC3, 0xA8D8FBC3, 0xA8D9FBC3, 0xA8DAFBC3, 0xA8DBFBC3, 0xA8DCFBC3, + 0xA8DDFBC3, 0xA8DEFBC3, 0xA8DFFBC3, 0xA8E0FBC3, 0xA8E1FBC3, 0xA8E2FBC3, 0xA8E3FBC3, 0xA8E4FBC3, 0xA8E5FBC3, 0xA8E6FBC3, 0xA8E7FBC3, 0xA8E8FBC3, 0xA8E9FBC3, 0xA8EAFBC3, 0xA8EBFBC3, + 0xA8ECFBC3, 0xA8EDFBC3, 0xA8EEFBC3, 0xA8EFFBC3, 0xA8F0FBC3, 0xA8F1FBC3, 0xA8F2FBC3, 0xA8F3FBC3, 0xA8F4FBC3, 0xA8F5FBC3, 0xA8F6FBC3, 0xA8F7FBC3, 0xA8F8FBC3, 0xA8F9FBC3, 0xA8FAFBC3, + 0xA8FBFBC3, 0xA8FCFBC3, 0xA8FDFBC3, 0xA8FEFBC3, 0xA8FFFBC3, 0xA900FBC3, 0xA901FBC3, 0xA902FBC3, 0xA903FBC3, 0xA904FBC3, 0xA905FBC3, 0xA906FBC3, 0xA907FBC3, 0xA908FBC3, 0xA909FBC3, + 0xA90AFBC3, 0xA90BFBC3, 0xA90CFBC3, 0xA90DFBC3, 0xA90EFBC3, 0xA90FFBC3, 0xA910FBC3, 0xA911FBC3, 0xA912FBC3, 0xA913FBC3, 0xA914FBC3, 0xA915FBC3, 0xA916FBC3, 0xA917FBC3, 0xA918FBC3, + 0xA919FBC3, 0xA91AFBC3, 0xA91BFBC3, 0xA91CFBC3, 0xA91DFBC3, 0xA91EFBC3, 0xA91FFBC3, 0xA920FBC3, 0xA921FBC3, 0xA922FBC3, 0xA923FBC3, 0xA924FBC3, 0xA925FBC3, 0xA926FBC3, 0xA927FBC3, + 0xA928FBC3, 0xA929FBC3, 0xA92AFBC3, 0xA92BFBC3, 0xA92CFBC3, 0xA92DFBC3, 0xA92EFBC3, 0xA92FFBC3, 0xA930FBC3, 0xA931FBC3, 0xA932FBC3, 0xA933FBC3, 0xA934FBC3, 0xA935FBC3, 0xA936FBC3, + 0xA937FBC3, 0xA938FBC3, 0xA939FBC3, 0xA93AFBC3, 0xA93BFBC3, 0xA93CFBC3, 0xA93DFBC3, 0xA93EFBC3, 0xA93FFBC3, 0xA940FBC3, 0xA941FBC3, 0xA942FBC3, 0xA943FBC3, 0xA944FBC3, 0xA945FBC3, + 0xA946FBC3, 0xA947FBC3, 0xA948FBC3, 0xA949FBC3, 0xA94AFBC3, 0xA94BFBC3, 0xA94CFBC3, 0xA94DFBC3, 0xA94EFBC3, 0xA94FFBC3, 0xA950FBC3, 0xA951FBC3, 0xA952FBC3, 0xA953FBC3, 0xA954FBC3, + 0xA955FBC3, 0xA956FBC3, 0xA957FBC3, 0xA958FBC3, 0xA959FBC3, 0xA95AFBC3, 0xA95BFBC3, 0xA95CFBC3, 0xA95DFBC3, 0xA95EFBC3, 0xA95FFBC3, 0xA960FBC3, 0xA961FBC3, 0xA962FBC3, 0xA963FBC3, + 0xA964FBC3, 0xA965FBC3, 0xA966FBC3, 0xA967FBC3, 0xA968FBC3, 0xA969FBC3, 0xA96AFBC3, 0xA96BFBC3, 0xA96CFBC3, 0xA96DFBC3, 0xA96EFBC3, 0xA96FFBC3, 0xA970FBC3, 0xA971FBC3, 0xA972FBC3, + 0xA973FBC3, 0xA974FBC3, 0xA975FBC3, 0xA976FBC3, 0xA977FBC3, 0xA978FBC3, 0xA979FBC3, 0xA97AFBC3, 0xA97BFBC3, 0xA97CFBC3, 0xA97DFBC3, 0xA97EFBC3, 0xA97FFBC3, 0xA980FBC3, 0xA981FBC3, + 0xA982FBC3, 0xA983FBC3, 0xA984FBC3, 0xA985FBC3, 0xA986FBC3, 0xA987FBC3, 0xA988FBC3, 0xA989FBC3, 0xA98AFBC3, 0xA98BFBC3, 0xA98CFBC3, 0xA98DFBC3, 0xA98EFBC3, 0xA98FFBC3, 0xA990FBC3, + 0xA991FBC3, 0xA992FBC3, 0xA993FBC3, 0xA994FBC3, 0xA995FBC3, 0xA996FBC3, 0xA997FBC3, 0xA998FBC3, 0xA999FBC3, 0xA99AFBC3, 0xA99BFBC3, 0xA99CFBC3, 0xA99DFBC3, 0xA99EFBC3, 0xA99FFBC3, + 0xA9A0FBC3, 0xA9A1FBC3, 0xA9A2FBC3, 0xA9A3FBC3, 0xA9A4FBC3, 0xA9A5FBC3, 0xA9A6FBC3, 0xA9A7FBC3, 0xA9A8FBC3, 0xA9A9FBC3, 0xA9AAFBC3, 0xA9ABFBC3, 0xA9ACFBC3, 0xA9ADFBC3, 0xA9AEFBC3, + 0xA9AFFBC3, 0xA9B0FBC3, 0xA9B1FBC3, 0xA9B2FBC3, 0xA9B3FBC3, 0xA9B4FBC3, 0xA9B5FBC3, 0xA9B6FBC3, 0xA9B7FBC3, 0xA9B8FBC3, 0xA9B9FBC3, 0xA9BAFBC3, 0xA9BBFBC3, 0xA9BCFBC3, 0xA9BDFBC3, + 0xA9BEFBC3, 0xA9BFFBC3, 0xA9C0FBC3, 0xA9C1FBC3, 0xA9C2FBC3, 0xA9C3FBC3, 0xA9C4FBC3, 0xA9C5FBC3, 0xA9C6FBC3, 0xA9C7FBC3, 0xA9C8FBC3, 0xA9C9FBC3, 0xA9CAFBC3, 0xA9CBFBC3, 0xA9CCFBC3, + 0xA9CDFBC3, 0xA9CEFBC3, 0xA9CFFBC3, 0xA9D0FBC3, 0xA9D1FBC3, 0xA9D2FBC3, 0xA9D3FBC3, 0xA9D4FBC3, 0xA9D5FBC3, 0xA9D6FBC3, 0xA9D7FBC3, 0xA9D8FBC3, 0xA9D9FBC3, 0xA9DAFBC3, 0xA9DBFBC3, + 0xA9DCFBC3, 0xA9DDFBC3, 0xA9DEFBC3, 0xA9DFFBC3, 0xA9E0FBC3, 0xA9E1FBC3, 0xA9E2FBC3, 0xA9E3FBC3, 0xA9E4FBC3, 0xA9E5FBC3, 0xA9E6FBC3, 0xA9E7FBC3, 0xA9E8FBC3, 0xA9E9FBC3, 0xA9EAFBC3, + 0xA9EBFBC3, 0xA9ECFBC3, 0xA9EDFBC3, 0xA9EEFBC3, 0xA9EFFBC3, 0xA9F0FBC3, 0xA9F1FBC3, 0xA9F2FBC3, 0xA9F3FBC3, 0xA9F4FBC3, 0xA9F5FBC3, 0xA9F6FBC3, 0xA9F7FBC3, 0xA9F8FBC3, 0xA9F9FBC3, + 0xA9FAFBC3, 0xA9FBFBC3, 0xA9FCFBC3, 0xA9FDFBC3, 0xA9FEFBC3, 0xA9FFFBC3, 0xAA00FBC3, 0xAA01FBC3, 0xAA02FBC3, 0xAA03FBC3, 0xAA04FBC3, 0xAA05FBC3, 0xAA06FBC3, 0xAA07FBC3, 0xAA08FBC3, + 0xAA09FBC3, 0xAA0AFBC3, 0xAA0BFBC3, 0xAA0CFBC3, 0xAA0DFBC3, 0xAA0EFBC3, 0xAA0FFBC3, 0xAA10FBC3, 0xAA11FBC3, 0xAA12FBC3, 0xAA13FBC3, 0xAA14FBC3, 0xAA15FBC3, 0xAA16FBC3, 0xAA17FBC3, + 0xAA18FBC3, 0xAA19FBC3, 0xAA1AFBC3, 0xAA1BFBC3, 0xAA1CFBC3, 0xAA1DFBC3, 0xAA1EFBC3, 0xAA1FFBC3, 0xAA20FBC3, 0xAA21FBC3, 0xAA22FBC3, 0xAA23FBC3, 0xAA24FBC3, 0xAA25FBC3, 0xAA26FBC3, + 0xAA27FBC3, 0xAA28FBC3, 0xAA29FBC3, 0xAA2AFBC3, 0xAA2BFBC3, 0xAA2CFBC3, 0xAA2DFBC3, 0xAA2EFBC3, 0xAA2FFBC3, 0xAA30FBC3, 0xAA31FBC3, 0xAA32FBC3, 0xAA33FBC3, 0xAA34FBC3, 0xAA35FBC3, + 0xAA36FBC3, 0xAA37FBC3, 0xAA38FBC3, 0xAA39FBC3, 0xAA3AFBC3, 0xAA3BFBC3, 0xAA3CFBC3, 0xAA3DFBC3, 0xAA3EFBC3, 0xAA3FFBC3, 0xAA40FBC3, 0xAA41FBC3, 0xAA42FBC3, 0xAA43FBC3, 0xAA44FBC3, + 0xAA45FBC3, 0xAA46FBC3, 0xAA47FBC3, 0xAA48FBC3, 0xAA49FBC3, 0xAA4AFBC3, 0xAA4BFBC3, 0xAA4CFBC3, 0xAA4DFBC3, 0xAA4EFBC3, 0xAA4FFBC3, 0xAA50FBC3, 0xAA51FBC3, 0xAA52FBC3, 0xAA53FBC3, + 0xAA54FBC3, 0xAA55FBC3, 0xAA56FBC3, 0xAA57FBC3, 0xAA58FBC3, 0xAA59FBC3, 0xAA5AFBC3, 0xAA5BFBC3, 0xAA5CFBC3, 0xAA5DFBC3, 0xAA5EFBC3, 0xAA5FFBC3, 0xAA60FBC3, 0xAA61FBC3, 0xAA62FBC3, + 0xAA63FBC3, 0xAA64FBC3, 0xAA65FBC3, 0xAA66FBC3, 0xAA67FBC3, 0xAA68FBC3, 0xAA69FBC3, 0xAA6AFBC3, 0xAA6BFBC3, 0xAA6CFBC3, 0xAA6DFBC3, 0xAA6EFBC3, 0xAA6FFBC3, 0xAA70FBC3, 0xAA71FBC3, + 0xAA72FBC3, 0xAA73FBC3, 0xAA74FBC3, 0xAA75FBC3, 0xAA76FBC3, 0xAA77FBC3, 0xAA78FBC3, 0xAA79FBC3, 0xAA7AFBC3, 0xAA7BFBC3, 0xAA7CFBC3, 0xAA7DFBC3, 0xAA7EFBC3, 0xAA7FFBC3, 0xAA80FBC3, + 0xAA81FBC3, 0xAA82FBC3, 0xAA83FBC3, 0xAA84FBC3, 0xAA85FBC3, 0xAA86FBC3, 0xAA87FBC3, 0xAA88FBC3, 0xAA89FBC3, 0xAA8AFBC3, 0xAA8BFBC3, 0xAA8CFBC3, 0xAA8DFBC3, 0xAA8EFBC3, 0xAA8FFBC3, + 0xAA90FBC3, 0xAA91FBC3, 0xAA92FBC3, 0xAA93FBC3, 0xAA94FBC3, 0xAA95FBC3, 0xAA96FBC3, 0xAA97FBC3, 0xAA98FBC3, 0xAA99FBC3, 0xAA9AFBC3, 0xAA9BFBC3, 0xAA9CFBC3, 0xAA9DFBC3, 0xAA9EFBC3, + 0xAA9FFBC3, 0xAAA0FBC3, 0xAAA1FBC3, 0xAAA2FBC3, 0xAAA3FBC3, 0xAAA4FBC3, 0xAAA5FBC3, 0xAAA6FBC3, 0xAAA7FBC3, 0xAAA8FBC3, 0xAAA9FBC3, 0xAAAAFBC3, 0xAAABFBC3, 0xAAACFBC3, 0xAAADFBC3, + 0xAAAEFBC3, 0xAAAFFBC3, 0xAAB0FBC3, 0xAAB1FBC3, 0xAAB2FBC3, 0xAAB3FBC3, 0xAAB4FBC3, 0xAAB5FBC3, 0xAAB6FBC3, 0xAAB7FBC3, 0xAAB8FBC3, 0xAAB9FBC3, 0xAABAFBC3, 0xAABBFBC3, 0xAABCFBC3, + 0xAABDFBC3, 0xAABEFBC3, 0xAABFFBC3, 0xAAC0FBC3, 0xAAC1FBC3, 0xAAC2FBC3, 0xAAC3FBC3, 0xAAC4FBC3, 0xAAC5FBC3, 0xAAC6FBC3, 0xAAC7FBC3, 0xAAC8FBC3, 0xAAC9FBC3, 0xAACAFBC3, 0xAACBFBC3, + 0xAACCFBC3, 0xAACDFBC3, 0xAACEFBC3, 0xAACFFBC3, 0xAAD0FBC3, 0xAAD1FBC3, 0xAAD2FBC3, 0xAAD3FBC3, 0xAAD4FBC3, 0xAAD5FBC3, 0xAAD6FBC3, 0xAAD7FBC3, 0xAAD8FBC3, 0xAAD9FBC3, 0xAADAFBC3, + 0xAADBFBC3, 0xAADCFBC3, 0xAADDFBC3, 0xAADEFBC3, 0xAADFFBC3, 0xAAE0FBC3, 0xAAE1FBC3, 0xAAE2FBC3, 0xAAE3FBC3, 0xAAE4FBC3, 0xAAE5FBC3, 0xAAE6FBC3, 0xAAE7FBC3, 0xAAE8FBC3, 0xAAE9FBC3, + 0xAAEAFBC3, 0xAAEBFBC3, 0xAAECFBC3, 0xAAEDFBC3, 0xAAEEFBC3, 0xAAEFFBC3, 0xAAF0FBC3, 0xAAF1FBC3, 0xAAF2FBC3, 0xAAF3FBC3, 0xAAF4FBC3, 0xAAF5FBC3, 0xAAF6FBC3, 0xAAF7FBC3, 0xAAF8FBC3, + 0xAAF9FBC3, 0xAAFAFBC3, 0xAAFBFBC3, 0xAAFCFBC3, 0xAAFDFBC3, 0xAAFEFBC3, 0xAAFFFBC3, 0xAB00FBC3, 0xAB01FBC3, 0xAB02FBC3, 0xAB03FBC3, 0xAB04FBC3, 0xAB05FBC3, 0xAB06FBC3, 0xAB07FBC3, + 0xAB08FBC3, 0xAB09FBC3, 0xAB0AFBC3, 0xAB0BFBC3, 0xAB0CFBC3, 0xAB0DFBC3, 0xAB0EFBC3, 0xAB0FFBC3, 0xAB10FBC3, 0xAB11FBC3, 0xAB12FBC3, 0xAB13FBC3, 0xAB14FBC3, 0xAB15FBC3, 0xAB16FBC3, + 0xAB17FBC3, 0xAB18FBC3, 0xAB19FBC3, 0xAB1AFBC3, 0xAB1BFBC3, 0xAB1CFBC3, 0xAB1DFBC3, 0xAB1EFBC3, 0xAB1FFBC3, 0xAB20FBC3, 0xAB21FBC3, 0xAB22FBC3, 0xAB23FBC3, 0xAB24FBC3, 0xAB25FBC3, + 0xAB26FBC3, 0xAB27FBC3, 0xAB28FBC3, 0xAB29FBC3, 0xAB2AFBC3, 0xAB2BFBC3, 0xAB2CFBC3, 0xAB2DFBC3, 0xAB2EFBC3, 0xAB2FFBC3, 0xAB30FBC3, 0xAB31FBC3, 0xAB32FBC3, 0xAB33FBC3, 0xAB34FBC3, + 0xAB35FBC3, 0xAB36FBC3, 0xAB37FBC3, 0xAB38FBC3, 0xAB39FBC3, 0xAB3AFBC3, 0xAB3BFBC3, 0xAB3CFBC3, 0xAB3DFBC3, 0xAB3EFBC3, 0xAB3FFBC3, 0xAB40FBC3, 0xAB41FBC3, 0xAB42FBC3, 0xAB43FBC3, + 0xAB44FBC3, 0xAB45FBC3, 0xAB46FBC3, 0xAB47FBC3, 0xAB48FBC3, 0xAB49FBC3, 0xAB4AFBC3, 0xAB4BFBC3, 0xAB4CFBC3, 0xAB4DFBC3, 0xAB4EFBC3, 0xAB4FFBC3, 0xAB50FBC3, 0xAB51FBC3, 0xAB52FBC3, + 0xAB53FBC3, 0xAB54FBC3, 0xAB55FBC3, 0xAB56FBC3, 0xAB57FBC3, 0xAB58FBC3, 0xAB59FBC3, 0xAB5AFBC3, 0xAB5BFBC3, 0xAB5CFBC3, 0xAB5DFBC3, 0xAB5EFBC3, 0xAB5FFBC3, 0xAB60FBC3, 0xAB61FBC3, + 0xAB62FBC3, 0xAB63FBC3, 0xAB64FBC3, 0xAB65FBC3, 0xAB66FBC3, 0xAB67FBC3, 0xAB68FBC3, 0xAB69FBC3, 0xAB6AFBC3, 0xAB6BFBC3, 0xAB6CFBC3, 0xAB6DFBC3, 0xAB6EFBC3, 0xAB6FFBC3, 0xAB70FBC3, + 0xAB71FBC3, 0xAB72FBC3, 0xAB73FBC3, 0xAB74FBC3, 0xAB75FBC3, 0xAB76FBC3, 0xAB77FBC3, 0xAB78FBC3, 0xAB79FBC3, 0xAB7AFBC3, 0xAB7BFBC3, 0xAB7CFBC3, 0xAB7DFBC3, 0xAB7EFBC3, 0xAB7FFBC3, + 0xAB80FBC3, 0xAB81FBC3, 0xAB82FBC3, 0xAB83FBC3, 0xAB84FBC3, 0xAB85FBC3, 0xAB86FBC3, 0xAB87FBC3, 0xAB88FBC3, 0xAB89FBC3, 0xAB8AFBC3, 0xAB8BFBC3, 0xAB8CFBC3, 0xAB8DFBC3, 0xAB8EFBC3, + 0xAB8FFBC3, 0xAB90FBC3, 0xAB91FBC3, 0xAB92FBC3, 0xAB93FBC3, 0xAB94FBC3, 0xAB95FBC3, 0xAB96FBC3, 0xAB97FBC3, 0xAB98FBC3, 0xAB99FBC3, 0xAB9AFBC3, 0xAB9BFBC3, 0xAB9CFBC3, 0xAB9DFBC3, + 0xAB9EFBC3, 0xAB9FFBC3, 0xABA0FBC3, 0xABA1FBC3, 0xABA2FBC3, 0xABA3FBC3, 0xABA4FBC3, 0xABA5FBC3, 0xABA6FBC3, 0xABA7FBC3, 0xABA8FBC3, 0xABA9FBC3, 0xABAAFBC3, 0xABABFBC3, 0xABACFBC3, + 0xABADFBC3, 0xABAEFBC3, 0xABAFFBC3, 0xABB0FBC3, 0xABB1FBC3, 0xABB2FBC3, 0xABB3FBC3, 0xABB4FBC3, 0xABB5FBC3, 0xABB6FBC3, 0xABB7FBC3, 0xABB8FBC3, 0xABB9FBC3, 0xABBAFBC3, 0xABBBFBC3, + 0xABBCFBC3, 0xABBDFBC3, 0xABBEFBC3, 0xABBFFBC3, 0xABC0FBC3, 0xABC1FBC3, 0xABC2FBC3, 0xABC3FBC3, 0xABC4FBC3, 0xABC5FBC3, 0xABC6FBC3, 0xABC7FBC3, 0xABC8FBC3, 0xABC9FBC3, 0xABCAFBC3, + 0xABCBFBC3, 0xABCCFBC3, 0xABCDFBC3, 0xABCEFBC3, 0xABCFFBC3, 0xABD0FBC3, 0xABD1FBC3, 0xABD2FBC3, 0xABD3FBC3, 0xABD4FBC3, 0xABD5FBC3, 0xABD6FBC3, 0xABD7FBC3, 0xABD8FBC3, 0xABD9FBC3, + 0xABDAFBC3, 0xABDBFBC3, 0xABDCFBC3, 0xABDDFBC3, 0xABDEFBC3, 0xABDFFBC3, 0xABE0FBC3, 0xABE1FBC3, 0xABE2FBC3, 0xABE3FBC3, 0xABE4FBC3, 0xABE5FBC3, 0xABE6FBC3, 0xABE7FBC3, 0xABE8FBC3, + 0xABE9FBC3, 0xABEAFBC3, 0xABEBFBC3, 0xABECFBC3, 0xABEDFBC3, 0xABEEFBC3, 0xABEFFBC3, 0xABF0FBC3, 0xABF1FBC3, 0xABF2FBC3, 0xABF3FBC3, 0xABF4FBC3, 0xABF5FBC3, 0xABF6FBC3, 0xABF7FBC3, + 0xABF8FBC3, 0xABF9FBC3, 0xABFAFBC3, 0xABFBFBC3, 0xABFCFBC3, 0xABFDFBC3, 0xABFEFBC3, 0xABFFFBC3, 0xAC00FBC3, 0xAC01FBC3, 0xAC02FBC3, 0xAC03FBC3, 0xAC04FBC3, 0xAC05FBC3, 0xAC06FBC3, + 0xAC07FBC3, 0xAC08FBC3, 0xAC09FBC3, 0xAC0AFBC3, 0xAC0BFBC3, 0xAC0CFBC3, 0xAC0DFBC3, 0xAC0EFBC3, 0xAC0FFBC3, 0xAC10FBC3, 0xAC11FBC3, 0xAC12FBC3, 0xAC13FBC3, 0xAC14FBC3, 0xAC15FBC3, + 0xAC16FBC3, 0xAC17FBC3, 0xAC18FBC3, 0xAC19FBC3, 0xAC1AFBC3, 0xAC1BFBC3, 0xAC1CFBC3, 0xAC1DFBC3, 0xAC1EFBC3, 0xAC1FFBC3, 0xAC20FBC3, 0xAC21FBC3, 0xAC22FBC3, 0xAC23FBC3, 0xAC24FBC3, + 0xAC25FBC3, 0xAC26FBC3, 0xAC27FBC3, 0xAC28FBC3, 0xAC29FBC3, 0xAC2AFBC3, 0xAC2BFBC3, 0xAC2CFBC3, 0xAC2DFBC3, 0xAC2EFBC3, 0xAC2FFBC3, 0xAC30FBC3, 0xAC31FBC3, 0xAC32FBC3, 0xAC33FBC3, + 0xAC34FBC3, 0xAC35FBC3, 0xAC36FBC3, 0xAC37FBC3, 0xAC38FBC3, 0xAC39FBC3, 0xAC3AFBC3, 0xAC3BFBC3, 0xAC3CFBC3, 0xAC3DFBC3, 0xAC3EFBC3, 0xAC3FFBC3, 0xAC40FBC3, 0xAC41FBC3, 0xAC42FBC3, + 0xAC43FBC3, 0xAC44FBC3, 0xAC45FBC3, 0xAC46FBC3, 0xAC47FBC3, 0xAC48FBC3, 0xAC49FBC3, 0xAC4AFBC3, 0xAC4BFBC3, 0xAC4CFBC3, 0xAC4DFBC3, 0xAC4EFBC3, 0xAC4FFBC3, 0xAC50FBC3, 0xAC51FBC3, + 0xAC52FBC3, 0xAC53FBC3, 0xAC54FBC3, 0xAC55FBC3, 0xAC56FBC3, 0xAC57FBC3, 0xAC58FBC3, 0xAC59FBC3, 0xAC5AFBC3, 0xAC5BFBC3, 0xAC5CFBC3, 0xAC5DFBC3, 0xAC5EFBC3, 0xAC5FFBC3, 0xAC60FBC3, + 0xAC61FBC3, 0xAC62FBC3, 0xAC63FBC3, 0xAC64FBC3, 0xAC65FBC3, 0xAC66FBC3, 0xAC67FBC3, 0xAC68FBC3, 0xAC69FBC3, 0xAC6AFBC3, 0xAC6BFBC3, 0xAC6CFBC3, 0xAC6DFBC3, 0xAC6EFBC3, 0xAC6FFBC3, + 0xAC70FBC3, 0xAC71FBC3, 0xAC72FBC3, 0xAC73FBC3, 0xAC74FBC3, 0xAC75FBC3, 0xAC76FBC3, 0xAC77FBC3, 0xAC78FBC3, 0xAC79FBC3, 0xAC7AFBC3, 0xAC7BFBC3, 0xAC7CFBC3, 0xAC7DFBC3, 0xAC7EFBC3, + 0xAC7FFBC3, 0xAC80FBC3, 0xAC81FBC3, 0xAC82FBC3, 0xAC83FBC3, 0xAC84FBC3, 0xAC85FBC3, 0xAC86FBC3, 0xAC87FBC3, 0xAC88FBC3, 0xAC89FBC3, 0xAC8AFBC3, 0xAC8BFBC3, 0xAC8CFBC3, 0xAC8DFBC3, + 0xAC8EFBC3, 0xAC8FFBC3, 0xAC90FBC3, 0xAC91FBC3, 0xAC92FBC3, 0xAC93FBC3, 0xAC94FBC3, 0xAC95FBC3, 0xAC96FBC3, 0xAC97FBC3, 0xAC98FBC3, 0xAC99FBC3, 0xAC9AFBC3, 0xAC9BFBC3, 0xAC9CFBC3, + 0xAC9DFBC3, 0xAC9EFBC3, 0xAC9FFBC3, 0xACA0FBC3, 0xACA1FBC3, 0xACA2FBC3, 0xACA3FBC3, 0xACA4FBC3, 0xACA5FBC3, 0xACA6FBC3, 0xACA7FBC3, 0xACA8FBC3, 0xACA9FBC3, 0xACAAFBC3, 0xACABFBC3, + 0xACACFBC3, 0xACADFBC3, 0xACAEFBC3, 0xACAFFBC3, 0xACB0FBC3, 0xACB1FBC3, 0xACB2FBC3, 0xACB3FBC3, 0xACB4FBC3, 0xACB5FBC3, 0xACB6FBC3, 0xACB7FBC3, 0xACB8FBC3, 0xACB9FBC3, 0xACBAFBC3, + 0xACBBFBC3, 0xACBCFBC3, 0xACBDFBC3, 0xACBEFBC3, 0xACBFFBC3, 0xACC0FBC3, 0xACC1FBC3, 0xACC2FBC3, 0xACC3FBC3, 0xACC4FBC3, 0xACC5FBC3, 0xACC6FBC3, 0xACC7FBC3, 0xACC8FBC3, 0xACC9FBC3, + 0xACCAFBC3, 0xACCBFBC3, 0xACCCFBC3, 0xACCDFBC3, 0xACCEFBC3, 0xACCFFBC3, 0xACD0FBC3, 0xACD1FBC3, 0xACD2FBC3, 0xACD3FBC3, 0xACD4FBC3, 0xACD5FBC3, 0xACD6FBC3, 0xACD7FBC3, 0xACD8FBC3, + 0xACD9FBC3, 0xACDAFBC3, 0xACDBFBC3, 0xACDCFBC3, 0xACDDFBC3, 0xACDEFBC3, 0xACDFFBC3, 0xACE0FBC3, 0xACE1FBC3, 0xACE2FBC3, 0xACE3FBC3, 0xACE4FBC3, 0xACE5FBC3, 0xACE6FBC3, 0xACE7FBC3, + 0xACE8FBC3, 0xACE9FBC3, 0xACEAFBC3, 0xACEBFBC3, 0xACECFBC3, 0xACEDFBC3, 0xACEEFBC3, 0xACEFFBC3, 0xACF0FBC3, 0xACF1FBC3, 0xACF2FBC3, 0xACF3FBC3, 0xACF4FBC3, 0xACF5FBC3, 0xACF6FBC3, + 0xACF7FBC3, 0xACF8FBC3, 0xACF9FBC3, 0xACFAFBC3, 0xACFBFBC3, 0xACFCFBC3, 0xACFDFBC3, 0xACFEFBC3, 0xACFFFBC3, 0xAD00FBC3, 0xAD01FBC3, 0xAD02FBC3, 0xAD03FBC3, 0xAD04FBC3, 0xAD05FBC3, + 0xAD06FBC3, 0xAD07FBC3, 0xAD08FBC3, 0xAD09FBC3, 0xAD0AFBC3, 0xAD0BFBC3, 0xAD0CFBC3, 0xAD0DFBC3, 0xAD0EFBC3, 0xAD0FFBC3, 0xAD10FBC3, 0xAD11FBC3, 0xAD12FBC3, 0xAD13FBC3, 0xAD14FBC3, + 0xAD15FBC3, 0xAD16FBC3, 0xAD17FBC3, 0xAD18FBC3, 0xAD19FBC3, 0xAD1AFBC3, 0xAD1BFBC3, 0xAD1CFBC3, 0xAD1DFBC3, 0xAD1EFBC3, 0xAD1FFBC3, 0xAD20FBC3, 0xAD21FBC3, 0xAD22FBC3, 0xAD23FBC3, + 0xAD24FBC3, 0xAD25FBC3, 0xAD26FBC3, 0xAD27FBC3, 0xAD28FBC3, 0xAD29FBC3, 0xAD2AFBC3, 0xAD2BFBC3, 0xAD2CFBC3, 0xAD2DFBC3, 0xAD2EFBC3, 0xAD2FFBC3, 0xAD30FBC3, 0xAD31FBC3, 0xAD32FBC3, + 0xAD33FBC3, 0xAD34FBC3, 0xAD35FBC3, 0xAD36FBC3, 0xAD37FBC3, 0xAD38FBC3, 0xAD39FBC3, 0xAD3AFBC3, 0xAD3BFBC3, 0xAD3CFBC3, 0xAD3DFBC3, 0xAD3EFBC3, 0xAD3FFBC3, 0xAD40FBC3, 0xAD41FBC3, + 0xAD42FBC3, 0xAD43FBC3, 0xAD44FBC3, 0xAD45FBC3, 0xAD46FBC3, 0xAD47FBC3, 0xAD48FBC3, 0xAD49FBC3, 0xAD4AFBC3, 0xAD4BFBC3, 0xAD4CFBC3, 0xAD4DFBC3, 0xAD4EFBC3, 0xAD4FFBC3, 0xAD50FBC3, + 0xAD51FBC3, 0xAD52FBC3, 0xAD53FBC3, 0xAD54FBC3, 0xAD55FBC3, 0xAD56FBC3, 0xAD57FBC3, 0xAD58FBC3, 0xAD59FBC3, 0xAD5AFBC3, 0xAD5BFBC3, 0xAD5CFBC3, 0xAD5DFBC3, 0xAD5EFBC3, 0xAD5FFBC3, + 0xAD60FBC3, 0xAD61FBC3, 0xAD62FBC3, 0xAD63FBC3, 0xAD64FBC3, 0xAD65FBC3, 0xAD66FBC3, 0xAD67FBC3, 0xAD68FBC3, 0xAD69FBC3, 0xAD6AFBC3, 0xAD6BFBC3, 0xAD6CFBC3, 0xAD6DFBC3, 0xAD6EFBC3, + 0xAD6FFBC3, 0xAD70FBC3, 0xAD71FBC3, 0xAD72FBC3, 0xAD73FBC3, 0xAD74FBC3, 0xAD75FBC3, 0xAD76FBC3, 0xAD77FBC3, 0xAD78FBC3, 0xAD79FBC3, 0xAD7AFBC3, 0xAD7BFBC3, 0xAD7CFBC3, 0xAD7DFBC3, + 0xAD7EFBC3, 0xAD7FFBC3, 0xAD80FBC3, 0xAD81FBC3, 0xAD82FBC3, 0xAD83FBC3, 0xAD84FBC3, 0xAD85FBC3, 0xAD86FBC3, 0xAD87FBC3, 0xAD88FBC3, 0xAD89FBC3, 0xAD8AFBC3, 0xAD8BFBC3, 0xAD8CFBC3, + 0xAD8DFBC3, 0xAD8EFBC3, 0xAD8FFBC3, 0xAD90FBC3, 0xAD91FBC3, 0xAD92FBC3, 0xAD93FBC3, 0xAD94FBC3, 0xAD95FBC3, 0xAD96FBC3, 0xAD97FBC3, 0xAD98FBC3, 0xAD99FBC3, 0xAD9AFBC3, 0xAD9BFBC3, + 0xAD9CFBC3, 0xAD9DFBC3, 0xAD9EFBC3, 0xAD9FFBC3, 0xADA0FBC3, 0xADA1FBC3, 0xADA2FBC3, 0xADA3FBC3, 0xADA4FBC3, 0xADA5FBC3, 0xADA6FBC3, 0xADA7FBC3, 0xADA8FBC3, 0xADA9FBC3, 0xADAAFBC3, + 0xADABFBC3, 0xADACFBC3, 0xADADFBC3, 0xADAEFBC3, 0xADAFFBC3, 0xADB0FBC3, 0xADB1FBC3, 0xADB2FBC3, 0xADB3FBC3, 0xADB4FBC3, 0xADB5FBC3, 0xADB6FBC3, 0xADB7FBC3, 0xADB8FBC3, 0xADB9FBC3, + 0xADBAFBC3, 0xADBBFBC3, 0xADBCFBC3, 0xADBDFBC3, 0xADBEFBC3, 0xADBFFBC3, 0xADC0FBC3, 0xADC1FBC3, 0xADC2FBC3, 0xADC3FBC3, 0xADC4FBC3, 0xADC5FBC3, 0xADC6FBC3, 0xADC7FBC3, 0xADC8FBC3, + 0xADC9FBC3, 0xADCAFBC3, 0xADCBFBC3, 0xADCCFBC3, 0xADCDFBC3, 0xADCEFBC3, 0xADCFFBC3, 0xADD0FBC3, 0xADD1FBC3, 0xADD2FBC3, 0xADD3FBC3, 0xADD4FBC3, 0xADD5FBC3, 0xADD6FBC3, 0xADD7FBC3, + 0xADD8FBC3, 0xADD9FBC3, 0xADDAFBC3, 0xADDBFBC3, 0xADDCFBC3, 0xADDDFBC3, 0xADDEFBC3, 0xADDFFBC3, 0xADE0FBC3, 0xADE1FBC3, 0xADE2FBC3, 0xADE3FBC3, 0xADE4FBC3, 0xADE5FBC3, 0xADE6FBC3, + 0xADE7FBC3, 0xADE8FBC3, 0xADE9FBC3, 0xADEAFBC3, 0xADEBFBC3, 0xADECFBC3, 0xADEDFBC3, 0xADEEFBC3, 0xADEFFBC3, 0xADF0FBC3, 0xADF1FBC3, 0xADF2FBC3, 0xADF3FBC3, 0xADF4FBC3, 0xADF5FBC3, + 0xADF6FBC3, 0xADF7FBC3, 0xADF8FBC3, 0xADF9FBC3, 0xADFAFBC3, 0xADFBFBC3, 0xADFCFBC3, 0xADFDFBC3, 0xADFEFBC3, 0xADFFFBC3, 0xAE00FBC3, 0xAE01FBC3, 0xAE02FBC3, 0xAE03FBC3, 0xAE04FBC3, + 0xAE05FBC3, 0xAE06FBC3, 0xAE07FBC3, 0xAE08FBC3, 0xAE09FBC3, 0xAE0AFBC3, 0xAE0BFBC3, 0xAE0CFBC3, 0xAE0DFBC3, 0xAE0EFBC3, 0xAE0FFBC3, 0xAE10FBC3, 0xAE11FBC3, 0xAE12FBC3, 0xAE13FBC3, + 0xAE14FBC3, 0xAE15FBC3, 0xAE16FBC3, 0xAE17FBC3, 0xAE18FBC3, 0xAE19FBC3, 0xAE1AFBC3, 0xAE1BFBC3, 0xAE1CFBC3, 0xAE1DFBC3, 0xAE1EFBC3, 0xAE1FFBC3, 0xAE20FBC3, 0xAE21FBC3, 0xAE22FBC3, + 0xAE23FBC3, 0xAE24FBC3, 0xAE25FBC3, 0xAE26FBC3, 0xAE27FBC3, 0xAE28FBC3, 0xAE29FBC3, 0xAE2AFBC3, 0xAE2BFBC3, 0xAE2CFBC3, 0xAE2DFBC3, 0xAE2EFBC3, 0xAE2FFBC3, 0xAE30FBC3, 0xAE31FBC3, + 0xAE32FBC3, 0xAE33FBC3, 0xAE34FBC3, 0xAE35FBC3, 0xAE36FBC3, 0xAE37FBC3, 0xAE38FBC3, 0xAE39FBC3, 0xAE3AFBC3, 0xAE3BFBC3, 0xAE3CFBC3, 0xAE3DFBC3, 0xAE3EFBC3, 0xAE3FFBC3, 0xAE40FBC3, + 0xAE41FBC3, 0xAE42FBC3, 0xAE43FBC3, 0xAE44FBC3, 0xAE45FBC3, 0xAE46FBC3, 0xAE47FBC3, 0xAE48FBC3, 0xAE49FBC3, 0xAE4AFBC3, 0xAE4BFBC3, 0xAE4CFBC3, 0xAE4DFBC3, 0xAE4EFBC3, 0xAE4FFBC3, + 0xAE50FBC3, 0xAE51FBC3, 0xAE52FBC3, 0xAE53FBC3, 0xAE54FBC3, 0xAE55FBC3, 0xAE56FBC3, 0xAE57FBC3, 0xAE58FBC3, 0xAE59FBC3, 0xAE5AFBC3, 0xAE5BFBC3, 0xAE5CFBC3, 0xAE5DFBC3, 0xAE5EFBC3, + 0xAE5FFBC3, 0xAE60FBC3, 0xAE61FBC3, 0xAE62FBC3, 0xAE63FBC3, 0xAE64FBC3, 0xAE65FBC3, 0xAE66FBC3, 0xAE67FBC3, 0xAE68FBC3, 0xAE69FBC3, 0xAE6AFBC3, 0xAE6BFBC3, 0xAE6CFBC3, 0xAE6DFBC3, + 0xAE6EFBC3, 0xAE6FFBC3, 0xAE70FBC3, 0xAE71FBC3, 0xAE72FBC3, 0xAE73FBC3, 0xAE74FBC3, 0xAE75FBC3, 0xAE76FBC3, 0xAE77FBC3, 0xAE78FBC3, 0xAE79FBC3, 0xAE7AFBC3, 0xAE7BFBC3, 0xAE7CFBC3, + 0xAE7DFBC3, 0xAE7EFBC3, 0xAE7FFBC3, 0xAE80FBC3, 0xAE81FBC3, 0xAE82FBC3, 0xAE83FBC3, 0xAE84FBC3, 0xAE85FBC3, 0xAE86FBC3, 0xAE87FBC3, 0xAE88FBC3, 0xAE89FBC3, 0xAE8AFBC3, 0xAE8BFBC3, + 0xAE8CFBC3, 0xAE8DFBC3, 0xAE8EFBC3, 0xAE8FFBC3, 0xAE90FBC3, 0xAE91FBC3, 0xAE92FBC3, 0xAE93FBC3, 0xAE94FBC3, 0xAE95FBC3, 0xAE96FBC3, 0xAE97FBC3, 0xAE98FBC3, 0xAE99FBC3, 0xAE9AFBC3, + 0xAE9BFBC3, 0xAE9CFBC3, 0xAE9DFBC3, 0xAE9EFBC3, 0xAE9FFBC3, 0xAEA0FBC3, 0xAEA1FBC3, 0xAEA2FBC3, 0xAEA3FBC3, 0xAEA4FBC3, 0xAEA5FBC3, 0xAEA6FBC3, 0xAEA7FBC3, 0xAEA8FBC3, 0xAEA9FBC3, + 0xAEAAFBC3, 0xAEABFBC3, 0xAEACFBC3, 0xAEADFBC3, 0xAEAEFBC3, 0xAEAFFBC3, 0xAEB0FBC3, 0xAEB1FBC3, 0xAEB2FBC3, 0xAEB3FBC3, 0xAEB4FBC3, 0xAEB5FBC3, 0xAEB6FBC3, 0xAEB7FBC3, 0xAEB8FBC3, + 0xAEB9FBC3, 0xAEBAFBC3, 0xAEBBFBC3, 0xAEBCFBC3, 0xAEBDFBC3, 0xAEBEFBC3, 0xAEBFFBC3, 0xAEC0FBC3, 0xAEC1FBC3, 0xAEC2FBC3, 0xAEC3FBC3, 0xAEC4FBC3, 0xAEC5FBC3, 0xAEC6FBC3, 0xAEC7FBC3, + 0xAEC8FBC3, 0xAEC9FBC3, 0xAECAFBC3, 0xAECBFBC3, 0xAECCFBC3, 0xAECDFBC3, 0xAECEFBC3, 0xAECFFBC3, 0xAED0FBC3, 0xAED1FBC3, 0xAED2FBC3, 0xAED3FBC3, 0xAED4FBC3, 0xAED5FBC3, 0xAED6FBC3, + 0xAED7FBC3, 0xAED8FBC3, 0xAED9FBC3, 0xAEDAFBC3, 0xAEDBFBC3, 0xAEDCFBC3, 0xAEDDFBC3, 0xAEDEFBC3, 0xAEDFFBC3, 0xAEE0FBC3, 0xAEE1FBC3, 0xAEE2FBC3, 0xAEE3FBC3, 0xAEE4FBC3, 0xAEE5FBC3, + 0xAEE6FBC3, 0xAEE7FBC3, 0xAEE8FBC3, 0xAEE9FBC3, 0xAEEAFBC3, 0xAEEBFBC3, 0xAEECFBC3, 0xAEEDFBC3, 0xAEEEFBC3, 0xAEEFFBC3, 0xAEF0FBC3, 0xAEF1FBC3, 0xAEF2FBC3, 0xAEF3FBC3, 0xAEF4FBC3, + 0xAEF5FBC3, 0xAEF6FBC3, 0xAEF7FBC3, 0xAEF8FBC3, 0xAEF9FBC3, 0xAEFAFBC3, 0xAEFBFBC3, 0xAEFCFBC3, 0xAEFDFBC3, 0xAEFEFBC3, 0xAEFFFBC3, 0xAF00FBC3, 0xAF01FBC3, 0xAF02FBC3, 0xAF03FBC3, + 0xAF04FBC3, 0xAF05FBC3, 0xAF06FBC3, 0xAF07FBC3, 0xAF08FBC3, 0xAF09FBC3, 0xAF0AFBC3, 0xAF0BFBC3, 0xAF0CFBC3, 0xAF0DFBC3, 0xAF0EFBC3, 0xAF0FFBC3, 0xAF10FBC3, 0xAF11FBC3, 0xAF12FBC3, + 0xAF13FBC3, 0xAF14FBC3, 0xAF15FBC3, 0xAF16FBC3, 0xAF17FBC3, 0xAF18FBC3, 0xAF19FBC3, 0xAF1AFBC3, 0xAF1BFBC3, 0xAF1CFBC3, 0xAF1DFBC3, 0xAF1EFBC3, 0xAF1FFBC3, 0xAF20FBC3, 0xAF21FBC3, + 0xAF22FBC3, 0xAF23FBC3, 0xAF24FBC3, 0xAF25FBC3, 0xAF26FBC3, 0xAF27FBC3, 0xAF28FBC3, 0xAF29FBC3, 0xAF2AFBC3, 0xAF2BFBC3, 0xAF2CFBC3, 0xAF2DFBC3, 0xAF2EFBC3, 0xAF2FFBC3, 0xAF30FBC3, + 0xAF31FBC3, 0xAF32FBC3, 0xAF33FBC3, 0xAF34FBC3, 0xAF35FBC3, 0xAF36FBC3, 0xAF37FBC3, 0xAF38FBC3, 0xAF39FBC3, 0xAF3AFBC3, 0xAF3BFBC3, 0xAF3CFBC3, 0xAF3DFBC3, 0xAF3EFBC3, 0xAF3FFBC3, + 0xAF40FBC3, 0xAF41FBC3, 0xAF42FBC3, 0xAF43FBC3, 0xAF44FBC3, 0xAF45FBC3, 0xAF46FBC3, 0xAF47FBC3, 0xAF48FBC3, 0xAF49FBC3, 0xAF4AFBC3, 0xAF4BFBC3, 0xAF4CFBC3, 0xAF4DFBC3, 0xAF4EFBC3, + 0xAF4FFBC3, 0xAF50FBC3, 0xAF51FBC3, 0xAF52FBC3, 0xAF53FBC3, 0xAF54FBC3, 0xAF55FBC3, 0xAF56FBC3, 0xAF57FBC3, 0xAF58FBC3, 0xAF59FBC3, 0xAF5AFBC3, 0xAF5BFBC3, 0xAF5CFBC3, 0xAF5DFBC3, + 0xAF5EFBC3, 0xAF5FFBC3, 0xAF60FBC3, 0xAF61FBC3, 0xAF62FBC3, 0xAF63FBC3, 0xAF64FBC3, 0xAF65FBC3, 0xAF66FBC3, 0xAF67FBC3, 0xAF68FBC3, 0xAF69FBC3, 0xAF6AFBC3, 0xAF6BFBC3, 0xAF6CFBC3, + 0xAF6DFBC3, 0xAF6EFBC3, 0xAF6FFBC3, 0xAF70FBC3, 0xAF71FBC3, 0xAF72FBC3, 0xAF73FBC3, 0xAF74FBC3, 0xAF75FBC3, 0xAF76FBC3, 0xAF77FBC3, 0xAF78FBC3, 0xAF79FBC3, 0xAF7AFBC3, 0xAF7BFBC3, + 0xAF7CFBC3, 0xAF7DFBC3, 0xAF7EFBC3, 0xAF7FFBC3, 0xAF80FBC3, 0xAF81FBC3, 0xAF82FBC3, 0xAF83FBC3, 0xAF84FBC3, 0xAF85FBC3, 0xAF86FBC3, 0xAF87FBC3, 0xAF88FBC3, 0xAF89FBC3, 0xAF8AFBC3, + 0xAF8BFBC3, 0xAF8CFBC3, 0xAF8DFBC3, 0xAF8EFBC3, 0xAF8FFBC3, 0xAF90FBC3, 0xAF91FBC3, 0xAF92FBC3, 0xAF93FBC3, 0xAF94FBC3, 0xAF95FBC3, 0xAF96FBC3, 0xAF97FBC3, 0xAF98FBC3, 0xAF99FBC3, + 0xAF9AFBC3, 0xAF9BFBC3, 0xAF9CFBC3, 0xAF9DFBC3, 0xAF9EFBC3, 0xAF9FFBC3, 0xAFA0FBC3, 0xAFA1FBC3, 0xAFA2FBC3, 0xAFA3FBC3, 0xAFA4FBC3, 0xAFA5FBC3, 0xAFA6FBC3, 0xAFA7FBC3, 0xAFA8FBC3, + 0xAFA9FBC3, 0xAFAAFBC3, 0xAFABFBC3, 0xAFACFBC3, 0xAFADFBC3, 0xAFAEFBC3, 0xAFAFFBC3, 0xAFB0FBC3, 0xAFB1FBC3, 0xAFB2FBC3, 0xAFB3FBC3, 0xAFB4FBC3, 0xAFB5FBC3, 0xAFB6FBC3, 0xAFB7FBC3, + 0xAFB8FBC3, 0xAFB9FBC3, 0xAFBAFBC3, 0xAFBBFBC3, 0xAFBCFBC3, 0xAFBDFBC3, 0xAFBEFBC3, 0xAFBFFBC3, 0xAFC0FBC3, 0xAFC1FBC3, 0xAFC2FBC3, 0xAFC3FBC3, 0xAFC4FBC3, 0xAFC5FBC3, 0xAFC6FBC3, + 0xAFC7FBC3, 0xAFC8FBC3, 0xAFC9FBC3, 0xAFCAFBC3, 0xAFCBFBC3, 0xAFCCFBC3, 0xAFCDFBC3, 0xAFCEFBC3, 0xAFCFFBC3, 0xAFD0FBC3, 0xAFD1FBC3, 0xAFD2FBC3, 0xAFD3FBC3, 0xAFD4FBC3, 0xAFD5FBC3, + 0xAFD6FBC3, 0xAFD7FBC3, 0xAFD8FBC3, 0xAFD9FBC3, 0xAFDAFBC3, 0xAFDBFBC3, 0xAFDCFBC3, 0xAFDDFBC3, 0xAFDEFBC3, 0xAFDFFBC3, 0xAFE0FBC3, 0xAFE1FBC3, 0xAFE2FBC3, 0xAFE3FBC3, 0xAFE4FBC3, + 0xAFE5FBC3, 0xAFE6FBC3, 0xAFE7FBC3, 0xAFE8FBC3, 0xAFE9FBC3, 0xAFEAFBC3, 0xAFEBFBC3, 0xAFECFBC3, 0xAFEDFBC3, 0xAFEEFBC3, 0xAFEFFBC3, 0xAFF0FBC3, 0xAFF1FBC3, 0xAFF2FBC3, 0xAFF3FBC3, + 0xAFF4FBC3, 0xAFF5FBC3, 0xAFF6FBC3, 0xAFF7FBC3, 0xAFF8FBC3, 0xAFF9FBC3, 0xAFFAFBC3, 0xAFFBFBC3, 0xAFFCFBC3, 0xAFFDFBC3, 0xAFFEFBC3, 0xAFFFFBC3, 0x3D5D, 0x3D80, 0xB002FBC3, + 0xB003FBC3, 0xB004FBC3, 0xB005FBC3, 0xB006FBC3, 0xB007FBC3, 0xB008FBC3, 0xB009FBC3, 0xB00AFBC3, 0xB00BFBC3, 0xB00CFBC3, 0xB00DFBC3, 0xB00EFBC3, 0xB00FFBC3, 0xB010FBC3, 0xB011FBC3, + 0xB012FBC3, 0xB013FBC3, 0xB014FBC3, 0xB015FBC3, 0xB016FBC3, 0xB017FBC3, 0xB018FBC3, 0xB019FBC3, 0xB01AFBC3, 0xB01BFBC3, 0xB01CFBC3, 0xB01DFBC3, 0xB01EFBC3, 0xB01FFBC3, 0xB020FBC3, + 0xB021FBC3, 0xB022FBC3, 0xB023FBC3, 0xB024FBC3, 0xB025FBC3, 0xB026FBC3, 0xB027FBC3, 0xB028FBC3, 0xB029FBC3, 0xB02AFBC3, 0xB02BFBC3, 0xB02CFBC3, 0xB02DFBC3, 0xB02EFBC3, 0xB02FFBC3, + 0xB030FBC3, 0xB031FBC3, 0xB032FBC3, 0xB033FBC3, 0xB034FBC3, 0xB035FBC3, 0xB036FBC3, 0xB037FBC3, 0xB038FBC3, 0xB039FBC3, 0xB03AFBC3, 0xB03BFBC3, 0xB03CFBC3, 0xB03DFBC3, 0xB03EFBC3, + 0xB03FFBC3, 0xB040FBC3, 0xB041FBC3, 0xB042FBC3, 0xB043FBC3, 0xB044FBC3, 0xB045FBC3, 0xB046FBC3, 0xB047FBC3, 0xB048FBC3, 0xB049FBC3, 0xB04AFBC3, 0xB04BFBC3, 0xB04CFBC3, 0xB04DFBC3, + 0xB04EFBC3, 0xB04FFBC3, 0xB050FBC3, 0xB051FBC3, 0xB052FBC3, 0xB053FBC3, 0xB054FBC3, 0xB055FBC3, 0xB056FBC3, 0xB057FBC3, 0xB058FBC3, 0xB059FBC3, 0xB05AFBC3, 0xB05BFBC3, 0xB05CFBC3, + 0xB05DFBC3, 0xB05EFBC3, 0xB05FFBC3, 0xB060FBC3, 0xB061FBC3, 0xB062FBC3, 0xB063FBC3, 0xB064FBC3, 0xB065FBC3, 0xB066FBC3, 0xB067FBC3, 0xB068FBC3, 0xB069FBC3, 0xB06AFBC3, 0xB06BFBC3, + 0xB06CFBC3, 0xB06DFBC3, 0xB06EFBC3, 0xB06FFBC3, 0xB070FBC3, 0xB071FBC3, 0xB072FBC3, 0xB073FBC3, 0xB074FBC3, 0xB075FBC3, 0xB076FBC3, 0xB077FBC3, 0xB078FBC3, 0xB079FBC3, 0xB07AFBC3, + 0xB07BFBC3, 0xB07CFBC3, 0xB07DFBC3, 0xB07EFBC3, 0xB07FFBC3, 0xB080FBC3, 0xB081FBC3, 0xB082FBC3, 0xB083FBC3, 0xB084FBC3, 0xB085FBC3, 0xB086FBC3, 0xB087FBC3, 0xB088FBC3, 0xB089FBC3, + 0xB08AFBC3, 0xB08BFBC3, 0xB08CFBC3, 0xB08DFBC3, 0xB08EFBC3, 0xB08FFBC3, 0xB090FBC3, 0xB091FBC3, 0xB092FBC3, 0xB093FBC3, 0xB094FBC3, 0xB095FBC3, 0xB096FBC3, 0xB097FBC3, 0xB098FBC3, + 0xB099FBC3, 0xB09AFBC3, 0xB09BFBC3, 0xB09CFBC3, 0xB09DFBC3, 0xB09EFBC3, 0xB09FFBC3, 0xB0A0FBC3, 0xB0A1FBC3, 0xB0A2FBC3, 0xB0A3FBC3, 0xB0A4FBC3, 0xB0A5FBC3, 0xB0A6FBC3, 0xB0A7FBC3, + 0xB0A8FBC3, 0xB0A9FBC3, 0xB0AAFBC3, 0xB0ABFBC3, 0xB0ACFBC3, 0xB0ADFBC3, 0xB0AEFBC3, 0xB0AFFBC3, 0xB0B0FBC3, 0xB0B1FBC3, 0xB0B2FBC3, 0xB0B3FBC3, 0xB0B4FBC3, 0xB0B5FBC3, 0xB0B6FBC3, + 0xB0B7FBC3, 0xB0B8FBC3, 0xB0B9FBC3, 0xB0BAFBC3, 0xB0BBFBC3, 0xB0BCFBC3, 0xB0BDFBC3, 0xB0BEFBC3, 0xB0BFFBC3, 0xB0C0FBC3, 0xB0C1FBC3, 0xB0C2FBC3, 0xB0C3FBC3, 0xB0C4FBC3, 0xB0C5FBC3, + 0xB0C6FBC3, 0xB0C7FBC3, 0xB0C8FBC3, 0xB0C9FBC3, 0xB0CAFBC3, 0xB0CBFBC3, 0xB0CCFBC3, 0xB0CDFBC3, 0xB0CEFBC3, 0xB0CFFBC3, 0xB0D0FBC3, 0xB0D1FBC3, 0xB0D2FBC3, 0xB0D3FBC3, 0xB0D4FBC3, + 0xB0D5FBC3, 0xB0D6FBC3, 0xB0D7FBC3, 0xB0D8FBC3, 0xB0D9FBC3, 0xB0DAFBC3, 0xB0DBFBC3, 0xB0DCFBC3, 0xB0DDFBC3, 0xB0DEFBC3, 0xB0DFFBC3, 0xB0E0FBC3, 0xB0E1FBC3, 0xB0E2FBC3, 0xB0E3FBC3, + 0xB0E4FBC3, 0xB0E5FBC3, 0xB0E6FBC3, 0xB0E7FBC3, 0xB0E8FBC3, 0xB0E9FBC3, 0xB0EAFBC3, 0xB0EBFBC3, 0xB0ECFBC3, 0xB0EDFBC3, 0xB0EEFBC3, 0xB0EFFBC3, 0xB0F0FBC3, 0xB0F1FBC3, 0xB0F2FBC3, + 0xB0F3FBC3, 0xB0F4FBC3, 0xB0F5FBC3, 0xB0F6FBC3, 0xB0F7FBC3, 0xB0F8FBC3, 0xB0F9FBC3, 0xB0FAFBC3, 0xB0FBFBC3, 0xB0FCFBC3, 0xB0FDFBC3, 0xB0FEFBC3, 0xB0FFFBC3, 0xB100FBC3, 0xB101FBC3, + 0xB102FBC3, 0xB103FBC3, 0xB104FBC3, 0xB105FBC3, 0xB106FBC3, 0xB107FBC3, 0xB108FBC3, 0xB109FBC3, 0xB10AFBC3, 0xB10BFBC3, 0xB10CFBC3, 0xB10DFBC3, 0xB10EFBC3, 0xB10FFBC3, 0xB110FBC3, + 0xB111FBC3, 0xB112FBC3, 0xB113FBC3, 0xB114FBC3, 0xB115FBC3, 0xB116FBC3, 0xB117FBC3, 0xB118FBC3, 0xB119FBC3, 0xB11AFBC3, 0xB11BFBC3, 0xB11CFBC3, 0xB11DFBC3, 0xB11EFBC3, 0xB11FFBC3, + 0xB120FBC3, 0xB121FBC3, 0xB122FBC3, 0xB123FBC3, 0xB124FBC3, 0xB125FBC3, 0xB126FBC3, 0xB127FBC3, 0xB128FBC3, 0xB129FBC3, 0xB12AFBC3, 0xB12BFBC3, 0xB12CFBC3, 0xB12DFBC3, 0xB12EFBC3, + 0xB12FFBC3, 0xB130FBC3, 0xB131FBC3, 0xB132FBC3, 0xB133FBC3, 0xB134FBC3, 0xB135FBC3, 0xB136FBC3, 0xB137FBC3, 0xB138FBC3, 0xB139FBC3, 0xB13AFBC3, 0xB13BFBC3, 0xB13CFBC3, 0xB13DFBC3, + 0xB13EFBC3, 0xB13FFBC3, 0xB140FBC3, 0xB141FBC3, 0xB142FBC3, 0xB143FBC3, 0xB144FBC3, 0xB145FBC3, 0xB146FBC3, 0xB147FBC3, 0xB148FBC3, 0xB149FBC3, 0xB14AFBC3, 0xB14BFBC3, 0xB14CFBC3, + 0xB14DFBC3, 0xB14EFBC3, 0xB14FFBC3, 0xB150FBC3, 0xB151FBC3, 0xB152FBC3, 0xB153FBC3, 0xB154FBC3, 0xB155FBC3, 0xB156FBC3, 0xB157FBC3, 0xB158FBC3, 0xB159FBC3, 0xB15AFBC3, 0xB15BFBC3, + 0xB15CFBC3, 0xB15DFBC3, 0xB15EFBC3, 0xB15FFBC3, 0xB160FBC3, 0xB161FBC3, 0xB162FBC3, 0xB163FBC3, 0xB164FBC3, 0xB165FBC3, 0xB166FBC3, 0xB167FBC3, 0xB168FBC3, 0xB169FBC3, 0xB16AFBC3, + 0xB16BFBC3, 0xB16CFBC3, 0xB16DFBC3, 0xB16EFBC3, 0xB16FFBC3, 0xB170FBC3, 0xB171FBC3, 0xB172FBC3, 0xB173FBC3, 0xB174FBC3, 0xB175FBC3, 0xB176FBC3, 0xB177FBC3, 0xB178FBC3, 0xB179FBC3, + 0xB17AFBC3, 0xB17BFBC3, 0xB17CFBC3, 0xB17DFBC3, 0xB17EFBC3, 0xB17FFBC3, 0xB180FBC3, 0xB181FBC3, 0xB182FBC3, 0xB183FBC3, 0xB184FBC3, 0xB185FBC3, 0xB186FBC3, 0xB187FBC3, 0xB188FBC3, + 0xB189FBC3, 0xB18AFBC3, 0xB18BFBC3, 0xB18CFBC3, 0xB18DFBC3, 0xB18EFBC3, 0xB18FFBC3, 0xB190FBC3, 0xB191FBC3, 0xB192FBC3, 0xB193FBC3, 0xB194FBC3, 0xB195FBC3, 0xB196FBC3, 0xB197FBC3, + 0xB198FBC3, 0xB199FBC3, 0xB19AFBC3, 0xB19BFBC3, 0xB19CFBC3, 0xB19DFBC3, 0xB19EFBC3, 0xB19FFBC3, 0xB1A0FBC3, 0xB1A1FBC3, 0xB1A2FBC3, 0xB1A3FBC3, 0xB1A4FBC3, 0xB1A5FBC3, 0xB1A6FBC3, + 0xB1A7FBC3, 0xB1A8FBC3, 0xB1A9FBC3, 0xB1AAFBC3, 0xB1ABFBC3, 0xB1ACFBC3, 0xB1ADFBC3, 0xB1AEFBC3, 0xB1AFFBC3, 0xB1B0FBC3, 0xB1B1FBC3, 0xB1B2FBC3, 0xB1B3FBC3, 0xB1B4FBC3, 0xB1B5FBC3, + 0xB1B6FBC3, 0xB1B7FBC3, 0xB1B8FBC3, 0xB1B9FBC3, 0xB1BAFBC3, 0xB1BBFBC3, 0xB1BCFBC3, 0xB1BDFBC3, 0xB1BEFBC3, 0xB1BFFBC3, 0xB1C0FBC3, 0xB1C1FBC3, 0xB1C2FBC3, 0xB1C3FBC3, 0xB1C4FBC3, + 0xB1C5FBC3, 0xB1C6FBC3, 0xB1C7FBC3, 0xB1C8FBC3, 0xB1C9FBC3, 0xB1CAFBC3, 0xB1CBFBC3, 0xB1CCFBC3, 0xB1CDFBC3, 0xB1CEFBC3, 0xB1CFFBC3, 0xB1D0FBC3, 0xB1D1FBC3, 0xB1D2FBC3, 0xB1D3FBC3, + 0xB1D4FBC3, 0xB1D5FBC3, 0xB1D6FBC3, 0xB1D7FBC3, 0xB1D8FBC3, 0xB1D9FBC3, 0xB1DAFBC3, 0xB1DBFBC3, 0xB1DCFBC3, 0xB1DDFBC3, 0xB1DEFBC3, 0xB1DFFBC3, 0xB1E0FBC3, 0xB1E1FBC3, 0xB1E2FBC3, + 0xB1E3FBC3, 0xB1E4FBC3, 0xB1E5FBC3, 0xB1E6FBC3, 0xB1E7FBC3, 0xB1E8FBC3, 0xB1E9FBC3, 0xB1EAFBC3, 0xB1EBFBC3, 0xB1ECFBC3, 0xB1EDFBC3, 0xB1EEFBC3, 0xB1EFFBC3, 0xB1F0FBC3, 0xB1F1FBC3, + 0xB1F2FBC3, 0xB1F3FBC3, 0xB1F4FBC3, 0xB1F5FBC3, 0xB1F6FBC3, 0xB1F7FBC3, 0xB1F8FBC3, 0xB1F9FBC3, 0xB1FAFBC3, 0xB1FBFBC3, 0xB1FCFBC3, 0xB1FDFBC3, 0xB1FEFBC3, 0xB1FFFBC3, 0xB200FBC3, + 0xB201FBC3, 0xB202FBC3, 0xB203FBC3, 0xB204FBC3, 0xB205FBC3, 0xB206FBC3, 0xB207FBC3, 0xB208FBC3, 0xB209FBC3, 0xB20AFBC3, 0xB20BFBC3, 0xB20CFBC3, 0xB20DFBC3, 0xB20EFBC3, 0xB20FFBC3, + 0xB210FBC3, 0xB211FBC3, 0xB212FBC3, 0xB213FBC3, 0xB214FBC3, 0xB215FBC3, 0xB216FBC3, 0xB217FBC3, 0xB218FBC3, 0xB219FBC3, 0xB21AFBC3, 0xB21BFBC3, 0xB21CFBC3, 0xB21DFBC3, 0xB21EFBC3, + 0xB21FFBC3, 0xB220FBC3, 0xB221FBC3, 0xB222FBC3, 0xB223FBC3, 0xB224FBC3, 0xB225FBC3, 0xB226FBC3, 0xB227FBC3, 0xB228FBC3, 0xB229FBC3, 0xB22AFBC3, 0xB22BFBC3, 0xB22CFBC3, 0xB22DFBC3, + 0xB22EFBC3, 0xB22FFBC3, 0xB230FBC3, 0xB231FBC3, 0xB232FBC3, 0xB233FBC3, 0xB234FBC3, 0xB235FBC3, 0xB236FBC3, 0xB237FBC3, 0xB238FBC3, 0xB239FBC3, 0xB23AFBC3, 0xB23BFBC3, 0xB23CFBC3, + 0xB23DFBC3, 0xB23EFBC3, 0xB23FFBC3, 0xB240FBC3, 0xB241FBC3, 0xB242FBC3, 0xB243FBC3, 0xB244FBC3, 0xB245FBC3, 0xB246FBC3, 0xB247FBC3, 0xB248FBC3, 0xB249FBC3, 0xB24AFBC3, 0xB24BFBC3, + 0xB24CFBC3, 0xB24DFBC3, 0xB24EFBC3, 0xB24FFBC3, 0xB250FBC3, 0xB251FBC3, 0xB252FBC3, 0xB253FBC3, 0xB254FBC3, 0xB255FBC3, 0xB256FBC3, 0xB257FBC3, 0xB258FBC3, 0xB259FBC3, 0xB25AFBC3, + 0xB25BFBC3, 0xB25CFBC3, 0xB25DFBC3, 0xB25EFBC3, 0xB25FFBC3, 0xB260FBC3, 0xB261FBC3, 0xB262FBC3, 0xB263FBC3, 0xB264FBC3, 0xB265FBC3, 0xB266FBC3, 0xB267FBC3, 0xB268FBC3, 0xB269FBC3, + 0xB26AFBC3, 0xB26BFBC3, 0xB26CFBC3, 0xB26DFBC3, 0xB26EFBC3, 0xB26FFBC3, 0xB270FBC3, 0xB271FBC3, 0xB272FBC3, 0xB273FBC3, 0xB274FBC3, 0xB275FBC3, 0xB276FBC3, 0xB277FBC3, 0xB278FBC3, + 0xB279FBC3, 0xB27AFBC3, 0xB27BFBC3, 0xB27CFBC3, 0xB27DFBC3, 0xB27EFBC3, 0xB27FFBC3, 0xB280FBC3, 0xB281FBC3, 0xB282FBC3, 0xB283FBC3, 0xB284FBC3, 0xB285FBC3, 0xB286FBC3, 0xB287FBC3, + 0xB288FBC3, 0xB289FBC3, 0xB28AFBC3, 0xB28BFBC3, 0xB28CFBC3, 0xB28DFBC3, 0xB28EFBC3, 0xB28FFBC3, 0xB290FBC3, 0xB291FBC3, 0xB292FBC3, 0xB293FBC3, 0xB294FBC3, 0xB295FBC3, 0xB296FBC3, + 0xB297FBC3, 0xB298FBC3, 0xB299FBC3, 0xB29AFBC3, 0xB29BFBC3, 0xB29CFBC3, 0xB29DFBC3, 0xB29EFBC3, 0xB29FFBC3, 0xB2A0FBC3, 0xB2A1FBC3, 0xB2A2FBC3, 0xB2A3FBC3, 0xB2A4FBC3, 0xB2A5FBC3, + 0xB2A6FBC3, 0xB2A7FBC3, 0xB2A8FBC3, 0xB2A9FBC3, 0xB2AAFBC3, 0xB2ABFBC3, 0xB2ACFBC3, 0xB2ADFBC3, 0xB2AEFBC3, 0xB2AFFBC3, 0xB2B0FBC3, 0xB2B1FBC3, 0xB2B2FBC3, 0xB2B3FBC3, 0xB2B4FBC3, + 0xB2B5FBC3, 0xB2B6FBC3, 0xB2B7FBC3, 0xB2B8FBC3, 0xB2B9FBC3, 0xB2BAFBC3, 0xB2BBFBC3, 0xB2BCFBC3, 0xB2BDFBC3, 0xB2BEFBC3, 0xB2BFFBC3, 0xB2C0FBC3, 0xB2C1FBC3, 0xB2C2FBC3, 0xB2C3FBC3, + 0xB2C4FBC3, 0xB2C5FBC3, 0xB2C6FBC3, 0xB2C7FBC3, 0xB2C8FBC3, 0xB2C9FBC3, 0xB2CAFBC3, 0xB2CBFBC3, 0xB2CCFBC3, 0xB2CDFBC3, 0xB2CEFBC3, 0xB2CFFBC3, 0xB2D0FBC3, 0xB2D1FBC3, 0xB2D2FBC3, + 0xB2D3FBC3, 0xB2D4FBC3, 0xB2D5FBC3, 0xB2D6FBC3, 0xB2D7FBC3, 0xB2D8FBC3, 0xB2D9FBC3, 0xB2DAFBC3, 0xB2DBFBC3, 0xB2DCFBC3, 0xB2DDFBC3, 0xB2DEFBC3, 0xB2DFFBC3, 0xB2E0FBC3, 0xB2E1FBC3, + 0xB2E2FBC3, 0xB2E3FBC3, 0xB2E4FBC3, 0xB2E5FBC3, 0xB2E6FBC3, 0xB2E7FBC3, 0xB2E8FBC3, 0xB2E9FBC3, 0xB2EAFBC3, 0xB2EBFBC3, 0xB2ECFBC3, 0xB2EDFBC3, 0xB2EEFBC3, 0xB2EFFBC3, 0xB2F0FBC3, + 0xB2F1FBC3, 0xB2F2FBC3, 0xB2F3FBC3, 0xB2F4FBC3, 0xB2F5FBC3, 0xB2F6FBC3, 0xB2F7FBC3, 0xB2F8FBC3, 0xB2F9FBC3, 0xB2FAFBC3, 0xB2FBFBC3, 0xB2FCFBC3, 0xB2FDFBC3, 0xB2FEFBC3, 0xB2FFFBC3, + 0xB300FBC3, 0xB301FBC3, 0xB302FBC3, 0xB303FBC3, 0xB304FBC3, 0xB305FBC3, 0xB306FBC3, 0xB307FBC3, 0xB308FBC3, 0xB309FBC3, 0xB30AFBC3, 0xB30BFBC3, 0xB30CFBC3, 0xB30DFBC3, 0xB30EFBC3, + 0xB30FFBC3, 0xB310FBC3, 0xB311FBC3, 0xB312FBC3, 0xB313FBC3, 0xB314FBC3, 0xB315FBC3, 0xB316FBC3, 0xB317FBC3, 0xB318FBC3, 0xB319FBC3, 0xB31AFBC3, 0xB31BFBC3, 0xB31CFBC3, 0xB31DFBC3, + 0xB31EFBC3, 0xB31FFBC3, 0xB320FBC3, 0xB321FBC3, 0xB322FBC3, 0xB323FBC3, 0xB324FBC3, 0xB325FBC3, 0xB326FBC3, 0xB327FBC3, 0xB328FBC3, 0xB329FBC3, 0xB32AFBC3, 0xB32BFBC3, 0xB32CFBC3, + 0xB32DFBC3, 0xB32EFBC3, 0xB32FFBC3, 0xB330FBC3, 0xB331FBC3, 0xB332FBC3, 0xB333FBC3, 0xB334FBC3, 0xB335FBC3, 0xB336FBC3, 0xB337FBC3, 0xB338FBC3, 0xB339FBC3, 0xB33AFBC3, 0xB33BFBC3, + 0xB33CFBC3, 0xB33DFBC3, 0xB33EFBC3, 0xB33FFBC3, 0xB340FBC3, 0xB341FBC3, 0xB342FBC3, 0xB343FBC3, 0xB344FBC3, 0xB345FBC3, 0xB346FBC3, 0xB347FBC3, 0xB348FBC3, 0xB349FBC3, 0xB34AFBC3, + 0xB34BFBC3, 0xB34CFBC3, 0xB34DFBC3, 0xB34EFBC3, 0xB34FFBC3, 0xB350FBC3, 0xB351FBC3, 0xB352FBC3, 0xB353FBC3, 0xB354FBC3, 0xB355FBC3, 0xB356FBC3, 0xB357FBC3, 0xB358FBC3, 0xB359FBC3, + 0xB35AFBC3, 0xB35BFBC3, 0xB35CFBC3, 0xB35DFBC3, 0xB35EFBC3, 0xB35FFBC3, 0xB360FBC3, 0xB361FBC3, 0xB362FBC3, 0xB363FBC3, 0xB364FBC3, 0xB365FBC3, 0xB366FBC3, 0xB367FBC3, 0xB368FBC3, + 0xB369FBC3, 0xB36AFBC3, 0xB36BFBC3, 0xB36CFBC3, 0xB36DFBC3, 0xB36EFBC3, 0xB36FFBC3, 0xB370FBC3, 0xB371FBC3, 0xB372FBC3, 0xB373FBC3, 0xB374FBC3, 0xB375FBC3, 0xB376FBC3, 0xB377FBC3, + 0xB378FBC3, 0xB379FBC3, 0xB37AFBC3, 0xB37BFBC3, 0xB37CFBC3, 0xB37DFBC3, 0xB37EFBC3, 0xB37FFBC3, 0xB380FBC3, 0xB381FBC3, 0xB382FBC3, 0xB383FBC3, 0xB384FBC3, 0xB385FBC3, 0xB386FBC3, + 0xB387FBC3, 0xB388FBC3, 0xB389FBC3, 0xB38AFBC3, 0xB38BFBC3, 0xB38CFBC3, 0xB38DFBC3, 0xB38EFBC3, 0xB38FFBC3, 0xB390FBC3, 0xB391FBC3, 0xB392FBC3, 0xB393FBC3, 0xB394FBC3, 0xB395FBC3, + 0xB396FBC3, 0xB397FBC3, 0xB398FBC3, 0xB399FBC3, 0xB39AFBC3, 0xB39BFBC3, 0xB39CFBC3, 0xB39DFBC3, 0xB39EFBC3, 0xB39FFBC3, 0xB3A0FBC3, 0xB3A1FBC3, 0xB3A2FBC3, 0xB3A3FBC3, 0xB3A4FBC3, + 0xB3A5FBC3, 0xB3A6FBC3, 0xB3A7FBC3, 0xB3A8FBC3, 0xB3A9FBC3, 0xB3AAFBC3, 0xB3ABFBC3, 0xB3ACFBC3, 0xB3ADFBC3, 0xB3AEFBC3, 0xB3AFFBC3, 0xB3B0FBC3, 0xB3B1FBC3, 0xB3B2FBC3, 0xB3B3FBC3, + 0xB3B4FBC3, 0xB3B5FBC3, 0xB3B6FBC3, 0xB3B7FBC3, 0xB3B8FBC3, 0xB3B9FBC3, 0xB3BAFBC3, 0xB3BBFBC3, 0xB3BCFBC3, 0xB3BDFBC3, 0xB3BEFBC3, 0xB3BFFBC3, 0xB3C0FBC3, 0xB3C1FBC3, 0xB3C2FBC3, + 0xB3C3FBC3, 0xB3C4FBC3, 0xB3C5FBC3, 0xB3C6FBC3, 0xB3C7FBC3, 0xB3C8FBC3, 0xB3C9FBC3, 0xB3CAFBC3, 0xB3CBFBC3, 0xB3CCFBC3, 0xB3CDFBC3, 0xB3CEFBC3, 0xB3CFFBC3, 0xB3D0FBC3, 0xB3D1FBC3, + 0xB3D2FBC3, 0xB3D3FBC3, 0xB3D4FBC3, 0xB3D5FBC3, 0xB3D6FBC3, 0xB3D7FBC3, 0xB3D8FBC3, 0xB3D9FBC3, 0xB3DAFBC3, 0xB3DBFBC3, 0xB3DCFBC3, 0xB3DDFBC3, 0xB3DEFBC3, 0xB3DFFBC3, 0xB3E0FBC3, + 0xB3E1FBC3, 0xB3E2FBC3, 0xB3E3FBC3, 0xB3E4FBC3, 0xB3E5FBC3, 0xB3E6FBC3, 0xB3E7FBC3, 0xB3E8FBC3, 0xB3E9FBC3, 0xB3EAFBC3, 0xB3EBFBC3, 0xB3ECFBC3, 0xB3EDFBC3, 0xB3EEFBC3, 0xB3EFFBC3, + 0xB3F0FBC3, 0xB3F1FBC3, 0xB3F2FBC3, 0xB3F3FBC3, 0xB3F4FBC3, 0xB3F5FBC3, 0xB3F6FBC3, 0xB3F7FBC3, 0xB3F8FBC3, 0xB3F9FBC3, 0xB3FAFBC3, 0xB3FBFBC3, 0xB3FCFBC3, 0xB3FDFBC3, 0xB3FEFBC3, + 0xB3FFFBC3, 0xB400FBC3, 0xB401FBC3, 0xB402FBC3, 0xB403FBC3, 0xB404FBC3, 0xB405FBC3, 0xB406FBC3, 0xB407FBC3, 0xB408FBC3, 0xB409FBC3, 0xB40AFBC3, 0xB40BFBC3, 0xB40CFBC3, 0xB40DFBC3, + 0xB40EFBC3, 0xB40FFBC3, 0xB410FBC3, 0xB411FBC3, 0xB412FBC3, 0xB413FBC3, 0xB414FBC3, 0xB415FBC3, 0xB416FBC3, 0xB417FBC3, 0xB418FBC3, 0xB419FBC3, 0xB41AFBC3, 0xB41BFBC3, 0xB41CFBC3, + 0xB41DFBC3, 0xB41EFBC3, 0xB41FFBC3, 0xB420FBC3, 0xB421FBC3, 0xB422FBC3, 0xB423FBC3, 0xB424FBC3, 0xB425FBC3, 0xB426FBC3, 0xB427FBC3, 0xB428FBC3, 0xB429FBC3, 0xB42AFBC3, 0xB42BFBC3, + 0xB42CFBC3, 0xB42DFBC3, 0xB42EFBC3, 0xB42FFBC3, 0xB430FBC3, 0xB431FBC3, 0xB432FBC3, 0xB433FBC3, 0xB434FBC3, 0xB435FBC3, 0xB436FBC3, 0xB437FBC3, 0xB438FBC3, 0xB439FBC3, 0xB43AFBC3, + 0xB43BFBC3, 0xB43CFBC3, 0xB43DFBC3, 0xB43EFBC3, 0xB43FFBC3, 0xB440FBC3, 0xB441FBC3, 0xB442FBC3, 0xB443FBC3, 0xB444FBC3, 0xB445FBC3, 0xB446FBC3, 0xB447FBC3, 0xB448FBC3, 0xB449FBC3, + 0xB44AFBC3, 0xB44BFBC3, 0xB44CFBC3, 0xB44DFBC3, 0xB44EFBC3, 0xB44FFBC3, 0xB450FBC3, 0xB451FBC3, 0xB452FBC3, 0xB453FBC3, 0xB454FBC3, 0xB455FBC3, 0xB456FBC3, 0xB457FBC3, 0xB458FBC3, + 0xB459FBC3, 0xB45AFBC3, 0xB45BFBC3, 0xB45CFBC3, 0xB45DFBC3, 0xB45EFBC3, 0xB45FFBC3, 0xB460FBC3, 0xB461FBC3, 0xB462FBC3, 0xB463FBC3, 0xB464FBC3, 0xB465FBC3, 0xB466FBC3, 0xB467FBC3, + 0xB468FBC3, 0xB469FBC3, 0xB46AFBC3, 0xB46BFBC3, 0xB46CFBC3, 0xB46DFBC3, 0xB46EFBC3, 0xB46FFBC3, 0xB470FBC3, 0xB471FBC3, 0xB472FBC3, 0xB473FBC3, 0xB474FBC3, 0xB475FBC3, 0xB476FBC3, + 0xB477FBC3, 0xB478FBC3, 0xB479FBC3, 0xB47AFBC3, 0xB47BFBC3, 0xB47CFBC3, 0xB47DFBC3, 0xB47EFBC3, 0xB47FFBC3, 0xB480FBC3, 0xB481FBC3, 0xB482FBC3, 0xB483FBC3, 0xB484FBC3, 0xB485FBC3, + 0xB486FBC3, 0xB487FBC3, 0xB488FBC3, 0xB489FBC3, 0xB48AFBC3, 0xB48BFBC3, 0xB48CFBC3, 0xB48DFBC3, 0xB48EFBC3, 0xB48FFBC3, 0xB490FBC3, 0xB491FBC3, 0xB492FBC3, 0xB493FBC3, 0xB494FBC3, + 0xB495FBC3, 0xB496FBC3, 0xB497FBC3, 0xB498FBC3, 0xB499FBC3, 0xB49AFBC3, 0xB49BFBC3, 0xB49CFBC3, 0xB49DFBC3, 0xB49EFBC3, 0xB49FFBC3, 0xB4A0FBC3, 0xB4A1FBC3, 0xB4A2FBC3, 0xB4A3FBC3, + 0xB4A4FBC3, 0xB4A5FBC3, 0xB4A6FBC3, 0xB4A7FBC3, 0xB4A8FBC3, 0xB4A9FBC3, 0xB4AAFBC3, 0xB4ABFBC3, 0xB4ACFBC3, 0xB4ADFBC3, 0xB4AEFBC3, 0xB4AFFBC3, 0xB4B0FBC3, 0xB4B1FBC3, 0xB4B2FBC3, + 0xB4B3FBC3, 0xB4B4FBC3, 0xB4B5FBC3, 0xB4B6FBC3, 0xB4B7FBC3, 0xB4B8FBC3, 0xB4B9FBC3, 0xB4BAFBC3, 0xB4BBFBC3, 0xB4BCFBC3, 0xB4BDFBC3, 0xB4BEFBC3, 0xB4BFFBC3, 0xB4C0FBC3, 0xB4C1FBC3, + 0xB4C2FBC3, 0xB4C3FBC3, 0xB4C4FBC3, 0xB4C5FBC3, 0xB4C6FBC3, 0xB4C7FBC3, 0xB4C8FBC3, 0xB4C9FBC3, 0xB4CAFBC3, 0xB4CBFBC3, 0xB4CCFBC3, 0xB4CDFBC3, 0xB4CEFBC3, 0xB4CFFBC3, 0xB4D0FBC3, + 0xB4D1FBC3, 0xB4D2FBC3, 0xB4D3FBC3, 0xB4D4FBC3, 0xB4D5FBC3, 0xB4D6FBC3, 0xB4D7FBC3, 0xB4D8FBC3, 0xB4D9FBC3, 0xB4DAFBC3, 0xB4DBFBC3, 0xB4DCFBC3, 0xB4DDFBC3, 0xB4DEFBC3, 0xB4DFFBC3, + 0xB4E0FBC3, 0xB4E1FBC3, 0xB4E2FBC3, 0xB4E3FBC3, 0xB4E4FBC3, 0xB4E5FBC3, 0xB4E6FBC3, 0xB4E7FBC3, 0xB4E8FBC3, 0xB4E9FBC3, 0xB4EAFBC3, 0xB4EBFBC3, 0xB4ECFBC3, 0xB4EDFBC3, 0xB4EEFBC3, + 0xB4EFFBC3, 0xB4F0FBC3, 0xB4F1FBC3, 0xB4F2FBC3, 0xB4F3FBC3, 0xB4F4FBC3, 0xB4F5FBC3, 0xB4F6FBC3, 0xB4F7FBC3, 0xB4F8FBC3, 0xB4F9FBC3, 0xB4FAFBC3, 0xB4FBFBC3, 0xB4FCFBC3, 0xB4FDFBC3, + 0xB4FEFBC3, 0xB4FFFBC3, 0xB500FBC3, 0xB501FBC3, 0xB502FBC3, 0xB503FBC3, 0xB504FBC3, 0xB505FBC3, 0xB506FBC3, 0xB507FBC3, 0xB508FBC3, 0xB509FBC3, 0xB50AFBC3, 0xB50BFBC3, 0xB50CFBC3, + 0xB50DFBC3, 0xB50EFBC3, 0xB50FFBC3, 0xB510FBC3, 0xB511FBC3, 0xB512FBC3, 0xB513FBC3, 0xB514FBC3, 0xB515FBC3, 0xB516FBC3, 0xB517FBC3, 0xB518FBC3, 0xB519FBC3, 0xB51AFBC3, 0xB51BFBC3, + 0xB51CFBC3, 0xB51DFBC3, 0xB51EFBC3, 0xB51FFBC3, 0xB520FBC3, 0xB521FBC3, 0xB522FBC3, 0xB523FBC3, 0xB524FBC3, 0xB525FBC3, 0xB526FBC3, 0xB527FBC3, 0xB528FBC3, 0xB529FBC3, 0xB52AFBC3, + 0xB52BFBC3, 0xB52CFBC3, 0xB52DFBC3, 0xB52EFBC3, 0xB52FFBC3, 0xB530FBC3, 0xB531FBC3, 0xB532FBC3, 0xB533FBC3, 0xB534FBC3, 0xB535FBC3, 0xB536FBC3, 0xB537FBC3, 0xB538FBC3, 0xB539FBC3, + 0xB53AFBC3, 0xB53BFBC3, 0xB53CFBC3, 0xB53DFBC3, 0xB53EFBC3, 0xB53FFBC3, 0xB540FBC3, 0xB541FBC3, 0xB542FBC3, 0xB543FBC3, 0xB544FBC3, 0xB545FBC3, 0xB546FBC3, 0xB547FBC3, 0xB548FBC3, + 0xB549FBC3, 0xB54AFBC3, 0xB54BFBC3, 0xB54CFBC3, 0xB54DFBC3, 0xB54EFBC3, 0xB54FFBC3, 0xB550FBC3, 0xB551FBC3, 0xB552FBC3, 0xB553FBC3, 0xB554FBC3, 0xB555FBC3, 0xB556FBC3, 0xB557FBC3, + 0xB558FBC3, 0xB559FBC3, 0xB55AFBC3, 0xB55BFBC3, 0xB55CFBC3, 0xB55DFBC3, 0xB55EFBC3, 0xB55FFBC3, 0xB560FBC3, 0xB561FBC3, 0xB562FBC3, 0xB563FBC3, 0xB564FBC3, 0xB565FBC3, 0xB566FBC3, + 0xB567FBC3, 0xB568FBC3, 0xB569FBC3, 0xB56AFBC3, 0xB56BFBC3, 0xB56CFBC3, 0xB56DFBC3, 0xB56EFBC3, 0xB56FFBC3, 0xB570FBC3, 0xB571FBC3, 0xB572FBC3, 0xB573FBC3, 0xB574FBC3, 0xB575FBC3, + 0xB576FBC3, 0xB577FBC3, 0xB578FBC3, 0xB579FBC3, 0xB57AFBC3, 0xB57BFBC3, 0xB57CFBC3, 0xB57DFBC3, 0xB57EFBC3, 0xB57FFBC3, 0xB580FBC3, 0xB581FBC3, 0xB582FBC3, 0xB583FBC3, 0xB584FBC3, + 0xB585FBC3, 0xB586FBC3, 0xB587FBC3, 0xB588FBC3, 0xB589FBC3, 0xB58AFBC3, 0xB58BFBC3, 0xB58CFBC3, 0xB58DFBC3, 0xB58EFBC3, 0xB58FFBC3, 0xB590FBC3, 0xB591FBC3, 0xB592FBC3, 0xB593FBC3, + 0xB594FBC3, 0xB595FBC3, 0xB596FBC3, 0xB597FBC3, 0xB598FBC3, 0xB599FBC3, 0xB59AFBC3, 0xB59BFBC3, 0xB59CFBC3, 0xB59DFBC3, 0xB59EFBC3, 0xB59FFBC3, 0xB5A0FBC3, 0xB5A1FBC3, 0xB5A2FBC3, + 0xB5A3FBC3, 0xB5A4FBC3, 0xB5A5FBC3, 0xB5A6FBC3, 0xB5A7FBC3, 0xB5A8FBC3, 0xB5A9FBC3, 0xB5AAFBC3, 0xB5ABFBC3, 0xB5ACFBC3, 0xB5ADFBC3, 0xB5AEFBC3, 0xB5AFFBC3, 0xB5B0FBC3, 0xB5B1FBC3, + 0xB5B2FBC3, 0xB5B3FBC3, 0xB5B4FBC3, 0xB5B5FBC3, 0xB5B6FBC3, 0xB5B7FBC3, 0xB5B8FBC3, 0xB5B9FBC3, 0xB5BAFBC3, 0xB5BBFBC3, 0xB5BCFBC3, 0xB5BDFBC3, 0xB5BEFBC3, 0xB5BFFBC3, 0xB5C0FBC3, + 0xB5C1FBC3, 0xB5C2FBC3, 0xB5C3FBC3, 0xB5C4FBC3, 0xB5C5FBC3, 0xB5C6FBC3, 0xB5C7FBC3, 0xB5C8FBC3, 0xB5C9FBC3, 0xB5CAFBC3, 0xB5CBFBC3, 0xB5CCFBC3, 0xB5CDFBC3, 0xB5CEFBC3, 0xB5CFFBC3, + 0xB5D0FBC3, 0xB5D1FBC3, 0xB5D2FBC3, 0xB5D3FBC3, 0xB5D4FBC3, 0xB5D5FBC3, 0xB5D6FBC3, 0xB5D7FBC3, 0xB5D8FBC3, 0xB5D9FBC3, 0xB5DAFBC3, 0xB5DBFBC3, 0xB5DCFBC3, 0xB5DDFBC3, 0xB5DEFBC3, + 0xB5DFFBC3, 0xB5E0FBC3, 0xB5E1FBC3, 0xB5E2FBC3, 0xB5E3FBC3, 0xB5E4FBC3, 0xB5E5FBC3, 0xB5E6FBC3, 0xB5E7FBC3, 0xB5E8FBC3, 0xB5E9FBC3, 0xB5EAFBC3, 0xB5EBFBC3, 0xB5ECFBC3, 0xB5EDFBC3, + 0xB5EEFBC3, 0xB5EFFBC3, 0xB5F0FBC3, 0xB5F1FBC3, 0xB5F2FBC3, 0xB5F3FBC3, 0xB5F4FBC3, 0xB5F5FBC3, 0xB5F6FBC3, 0xB5F7FBC3, 0xB5F8FBC3, 0xB5F9FBC3, 0xB5FAFBC3, 0xB5FBFBC3, 0xB5FCFBC3, + 0xB5FDFBC3, 0xB5FEFBC3, 0xB5FFFBC3, 0xB600FBC3, 0xB601FBC3, 0xB602FBC3, 0xB603FBC3, 0xB604FBC3, 0xB605FBC3, 0xB606FBC3, 0xB607FBC3, 0xB608FBC3, 0xB609FBC3, 0xB60AFBC3, 0xB60BFBC3, + 0xB60CFBC3, 0xB60DFBC3, 0xB60EFBC3, 0xB60FFBC3, 0xB610FBC3, 0xB611FBC3, 0xB612FBC3, 0xB613FBC3, 0xB614FBC3, 0xB615FBC3, 0xB616FBC3, 0xB617FBC3, 0xB618FBC3, 0xB619FBC3, 0xB61AFBC3, + 0xB61BFBC3, 0xB61CFBC3, 0xB61DFBC3, 0xB61EFBC3, 0xB61FFBC3, 0xB620FBC3, 0xB621FBC3, 0xB622FBC3, 0xB623FBC3, 0xB624FBC3, 0xB625FBC3, 0xB626FBC3, 0xB627FBC3, 0xB628FBC3, 0xB629FBC3, + 0xB62AFBC3, 0xB62BFBC3, 0xB62CFBC3, 0xB62DFBC3, 0xB62EFBC3, 0xB62FFBC3, 0xB630FBC3, 0xB631FBC3, 0xB632FBC3, 0xB633FBC3, 0xB634FBC3, 0xB635FBC3, 0xB636FBC3, 0xB637FBC3, 0xB638FBC3, + 0xB639FBC3, 0xB63AFBC3, 0xB63BFBC3, 0xB63CFBC3, 0xB63DFBC3, 0xB63EFBC3, 0xB63FFBC3, 0xB640FBC3, 0xB641FBC3, 0xB642FBC3, 0xB643FBC3, 0xB644FBC3, 0xB645FBC3, 0xB646FBC3, 0xB647FBC3, + 0xB648FBC3, 0xB649FBC3, 0xB64AFBC3, 0xB64BFBC3, 0xB64CFBC3, 0xB64DFBC3, 0xB64EFBC3, 0xB64FFBC3, 0xB650FBC3, 0xB651FBC3, 0xB652FBC3, 0xB653FBC3, 0xB654FBC3, 0xB655FBC3, 0xB656FBC3, + 0xB657FBC3, 0xB658FBC3, 0xB659FBC3, 0xB65AFBC3, 0xB65BFBC3, 0xB65CFBC3, 0xB65DFBC3, 0xB65EFBC3, 0xB65FFBC3, 0xB660FBC3, 0xB661FBC3, 0xB662FBC3, 0xB663FBC3, 0xB664FBC3, 0xB665FBC3, + 0xB666FBC3, 0xB667FBC3, 0xB668FBC3, 0xB669FBC3, 0xB66AFBC3, 0xB66BFBC3, 0xB66CFBC3, 0xB66DFBC3, 0xB66EFBC3, 0xB66FFBC3, 0xB670FBC3, 0xB671FBC3, 0xB672FBC3, 0xB673FBC3, 0xB674FBC3, + 0xB675FBC3, 0xB676FBC3, 0xB677FBC3, 0xB678FBC3, 0xB679FBC3, 0xB67AFBC3, 0xB67BFBC3, 0xB67CFBC3, 0xB67DFBC3, 0xB67EFBC3, 0xB67FFBC3, 0xB680FBC3, 0xB681FBC3, 0xB682FBC3, 0xB683FBC3, + 0xB684FBC3, 0xB685FBC3, 0xB686FBC3, 0xB687FBC3, 0xB688FBC3, 0xB689FBC3, 0xB68AFBC3, 0xB68BFBC3, 0xB68CFBC3, 0xB68DFBC3, 0xB68EFBC3, 0xB68FFBC3, 0xB690FBC3, 0xB691FBC3, 0xB692FBC3, + 0xB693FBC3, 0xB694FBC3, 0xB695FBC3, 0xB696FBC3, 0xB697FBC3, 0xB698FBC3, 0xB699FBC3, 0xB69AFBC3, 0xB69BFBC3, 0xB69CFBC3, 0xB69DFBC3, 0xB69EFBC3, 0xB69FFBC3, 0xB6A0FBC3, 0xB6A1FBC3, + 0xB6A2FBC3, 0xB6A3FBC3, 0xB6A4FBC3, 0xB6A5FBC3, 0xB6A6FBC3, 0xB6A7FBC3, 0xB6A8FBC3, 0xB6A9FBC3, 0xB6AAFBC3, 0xB6ABFBC3, 0xB6ACFBC3, 0xB6ADFBC3, 0xB6AEFBC3, 0xB6AFFBC3, 0xB6B0FBC3, + 0xB6B1FBC3, 0xB6B2FBC3, 0xB6B3FBC3, 0xB6B4FBC3, 0xB6B5FBC3, 0xB6B6FBC3, 0xB6B7FBC3, 0xB6B8FBC3, 0xB6B9FBC3, 0xB6BAFBC3, 0xB6BBFBC3, 0xB6BCFBC3, 0xB6BDFBC3, 0xB6BEFBC3, 0xB6BFFBC3, + 0xB6C0FBC3, 0xB6C1FBC3, 0xB6C2FBC3, 0xB6C3FBC3, 0xB6C4FBC3, 0xB6C5FBC3, 0xB6C6FBC3, 0xB6C7FBC3, 0xB6C8FBC3, 0xB6C9FBC3, 0xB6CAFBC3, 0xB6CBFBC3, 0xB6CCFBC3, 0xB6CDFBC3, 0xB6CEFBC3, + 0xB6CFFBC3, 0xB6D0FBC3, 0xB6D1FBC3, 0xB6D2FBC3, 0xB6D3FBC3, 0xB6D4FBC3, 0xB6D5FBC3, 0xB6D6FBC3, 0xB6D7FBC3, 0xB6D8FBC3, 0xB6D9FBC3, 0xB6DAFBC3, 0xB6DBFBC3, 0xB6DCFBC3, 0xB6DDFBC3, + 0xB6DEFBC3, 0xB6DFFBC3, 0xB6E0FBC3, 0xB6E1FBC3, 0xB6E2FBC3, 0xB6E3FBC3, 0xB6E4FBC3, 0xB6E5FBC3, 0xB6E6FBC3, 0xB6E7FBC3, 0xB6E8FBC3, 0xB6E9FBC3, 0xB6EAFBC3, 0xB6EBFBC3, 0xB6ECFBC3, + 0xB6EDFBC3, 0xB6EEFBC3, 0xB6EFFBC3, 0xB6F0FBC3, 0xB6F1FBC3, 0xB6F2FBC3, 0xB6F3FBC3, 0xB6F4FBC3, 0xB6F5FBC3, 0xB6F6FBC3, 0xB6F7FBC3, 0xB6F8FBC3, 0xB6F9FBC3, 0xB6FAFBC3, 0xB6FBFBC3, + 0xB6FCFBC3, 0xB6FDFBC3, 0xB6FEFBC3, 0xB6FFFBC3, 0xB700FBC3, 0xB701FBC3, 0xB702FBC3, 0xB703FBC3, 0xB704FBC3, 0xB705FBC3, 0xB706FBC3, 0xB707FBC3, 0xB708FBC3, 0xB709FBC3, 0xB70AFBC3, + 0xB70BFBC3, 0xB70CFBC3, 0xB70DFBC3, 0xB70EFBC3, 0xB70FFBC3, 0xB710FBC3, 0xB711FBC3, 0xB712FBC3, 0xB713FBC3, 0xB714FBC3, 0xB715FBC3, 0xB716FBC3, 0xB717FBC3, 0xB718FBC3, 0xB719FBC3, + 0xB71AFBC3, 0xB71BFBC3, 0xB71CFBC3, 0xB71DFBC3, 0xB71EFBC3, 0xB71FFBC3, 0xB720FBC3, 0xB721FBC3, 0xB722FBC3, 0xB723FBC3, 0xB724FBC3, 0xB725FBC3, 0xB726FBC3, 0xB727FBC3, 0xB728FBC3, + 0xB729FBC3, 0xB72AFBC3, 0xB72BFBC3, 0xB72CFBC3, 0xB72DFBC3, 0xB72EFBC3, 0xB72FFBC3, 0xB730FBC3, 0xB731FBC3, 0xB732FBC3, 0xB733FBC3, 0xB734FBC3, 0xB735FBC3, 0xB736FBC3, 0xB737FBC3, + 0xB738FBC3, 0xB739FBC3, 0xB73AFBC3, 0xB73BFBC3, 0xB73CFBC3, 0xB73DFBC3, 0xB73EFBC3, 0xB73FFBC3, 0xB740FBC3, 0xB741FBC3, 0xB742FBC3, 0xB743FBC3, 0xB744FBC3, 0xB745FBC3, 0xB746FBC3, + 0xB747FBC3, 0xB748FBC3, 0xB749FBC3, 0xB74AFBC3, 0xB74BFBC3, 0xB74CFBC3, 0xB74DFBC3, 0xB74EFBC3, 0xB74FFBC3, 0xB750FBC3, 0xB751FBC3, 0xB752FBC3, 0xB753FBC3, 0xB754FBC3, 0xB755FBC3, + 0xB756FBC3, 0xB757FBC3, 0xB758FBC3, 0xB759FBC3, 0xB75AFBC3, 0xB75BFBC3, 0xB75CFBC3, 0xB75DFBC3, 0xB75EFBC3, 0xB75FFBC3, 0xB760FBC3, 0xB761FBC3, 0xB762FBC3, 0xB763FBC3, 0xB764FBC3, + 0xB765FBC3, 0xB766FBC3, 0xB767FBC3, 0xB768FBC3, 0xB769FBC3, 0xB76AFBC3, 0xB76BFBC3, 0xB76CFBC3, 0xB76DFBC3, 0xB76EFBC3, 0xB76FFBC3, 0xB770FBC3, 0xB771FBC3, 0xB772FBC3, 0xB773FBC3, + 0xB774FBC3, 0xB775FBC3, 0xB776FBC3, 0xB777FBC3, 0xB778FBC3, 0xB779FBC3, 0xB77AFBC3, 0xB77BFBC3, 0xB77CFBC3, 0xB77DFBC3, 0xB77EFBC3, 0xB77FFBC3, 0xB780FBC3, 0xB781FBC3, 0xB782FBC3, + 0xB783FBC3, 0xB784FBC3, 0xB785FBC3, 0xB786FBC3, 0xB787FBC3, 0xB788FBC3, 0xB789FBC3, 0xB78AFBC3, 0xB78BFBC3, 0xB78CFBC3, 0xB78DFBC3, 0xB78EFBC3, 0xB78FFBC3, 0xB790FBC3, 0xB791FBC3, + 0xB792FBC3, 0xB793FBC3, 0xB794FBC3, 0xB795FBC3, 0xB796FBC3, 0xB797FBC3, 0xB798FBC3, 0xB799FBC3, 0xB79AFBC3, 0xB79BFBC3, 0xB79CFBC3, 0xB79DFBC3, 0xB79EFBC3, 0xB79FFBC3, 0xB7A0FBC3, + 0xB7A1FBC3, 0xB7A2FBC3, 0xB7A3FBC3, 0xB7A4FBC3, 0xB7A5FBC3, 0xB7A6FBC3, 0xB7A7FBC3, 0xB7A8FBC3, 0xB7A9FBC3, 0xB7AAFBC3, 0xB7ABFBC3, 0xB7ACFBC3, 0xB7ADFBC3, 0xB7AEFBC3, 0xB7AFFBC3, + 0xB7B0FBC3, 0xB7B1FBC3, 0xB7B2FBC3, 0xB7B3FBC3, 0xB7B4FBC3, 0xB7B5FBC3, 0xB7B6FBC3, 0xB7B7FBC3, 0xB7B8FBC3, 0xB7B9FBC3, 0xB7BAFBC3, 0xB7BBFBC3, 0xB7BCFBC3, 0xB7BDFBC3, 0xB7BEFBC3, + 0xB7BFFBC3, 0xB7C0FBC3, 0xB7C1FBC3, 0xB7C2FBC3, 0xB7C3FBC3, 0xB7C4FBC3, 0xB7C5FBC3, 0xB7C6FBC3, 0xB7C7FBC3, 0xB7C8FBC3, 0xB7C9FBC3, 0xB7CAFBC3, 0xB7CBFBC3, 0xB7CCFBC3, 0xB7CDFBC3, + 0xB7CEFBC3, 0xB7CFFBC3, 0xB7D0FBC3, 0xB7D1FBC3, 0xB7D2FBC3, 0xB7D3FBC3, 0xB7D4FBC3, 0xB7D5FBC3, 0xB7D6FBC3, 0xB7D7FBC3, 0xB7D8FBC3, 0xB7D9FBC3, 0xB7DAFBC3, 0xB7DBFBC3, 0xB7DCFBC3, + 0xB7DDFBC3, 0xB7DEFBC3, 0xB7DFFBC3, 0xB7E0FBC3, 0xB7E1FBC3, 0xB7E2FBC3, 0xB7E3FBC3, 0xB7E4FBC3, 0xB7E5FBC3, 0xB7E6FBC3, 0xB7E7FBC3, 0xB7E8FBC3, 0xB7E9FBC3, 0xB7EAFBC3, 0xB7EBFBC3, + 0xB7ECFBC3, 0xB7EDFBC3, 0xB7EEFBC3, 0xB7EFFBC3, 0xB7F0FBC3, 0xB7F1FBC3, 0xB7F2FBC3, 0xB7F3FBC3, 0xB7F4FBC3, 0xB7F5FBC3, 0xB7F6FBC3, 0xB7F7FBC3, 0xB7F8FBC3, 0xB7F9FBC3, 0xB7FAFBC3, + 0xB7FBFBC3, 0xB7FCFBC3, 0xB7FDFBC3, 0xB7FEFBC3, 0xB7FFFBC3, 0xB800FBC3, 0xB801FBC3, 0xB802FBC3, 0xB803FBC3, 0xB804FBC3, 0xB805FBC3, 0xB806FBC3, 0xB807FBC3, 0xB808FBC3, 0xB809FBC3, + 0xB80AFBC3, 0xB80BFBC3, 0xB80CFBC3, 0xB80DFBC3, 0xB80EFBC3, 0xB80FFBC3, 0xB810FBC3, 0xB811FBC3, 0xB812FBC3, 0xB813FBC3, 0xB814FBC3, 0xB815FBC3, 0xB816FBC3, 0xB817FBC3, 0xB818FBC3, + 0xB819FBC3, 0xB81AFBC3, 0xB81BFBC3, 0xB81CFBC3, 0xB81DFBC3, 0xB81EFBC3, 0xB81FFBC3, 0xB820FBC3, 0xB821FBC3, 0xB822FBC3, 0xB823FBC3, 0xB824FBC3, 0xB825FBC3, 0xB826FBC3, 0xB827FBC3, + 0xB828FBC3, 0xB829FBC3, 0xB82AFBC3, 0xB82BFBC3, 0xB82CFBC3, 0xB82DFBC3, 0xB82EFBC3, 0xB82FFBC3, 0xB830FBC3, 0xB831FBC3, 0xB832FBC3, 0xB833FBC3, 0xB834FBC3, 0xB835FBC3, 0xB836FBC3, + 0xB837FBC3, 0xB838FBC3, 0xB839FBC3, 0xB83AFBC3, 0xB83BFBC3, 0xB83CFBC3, 0xB83DFBC3, 0xB83EFBC3, 0xB83FFBC3, 0xB840FBC3, 0xB841FBC3, 0xB842FBC3, 0xB843FBC3, 0xB844FBC3, 0xB845FBC3, + 0xB846FBC3, 0xB847FBC3, 0xB848FBC3, 0xB849FBC3, 0xB84AFBC3, 0xB84BFBC3, 0xB84CFBC3, 0xB84DFBC3, 0xB84EFBC3, 0xB84FFBC3, 0xB850FBC3, 0xB851FBC3, 0xB852FBC3, 0xB853FBC3, 0xB854FBC3, + 0xB855FBC3, 0xB856FBC3, 0xB857FBC3, 0xB858FBC3, 0xB859FBC3, 0xB85AFBC3, 0xB85BFBC3, 0xB85CFBC3, 0xB85DFBC3, 0xB85EFBC3, 0xB85FFBC3, 0xB860FBC3, 0xB861FBC3, 0xB862FBC3, 0xB863FBC3, + 0xB864FBC3, 0xB865FBC3, 0xB866FBC3, 0xB867FBC3, 0xB868FBC3, 0xB869FBC3, 0xB86AFBC3, 0xB86BFBC3, 0xB86CFBC3, 0xB86DFBC3, 0xB86EFBC3, 0xB86FFBC3, 0xB870FBC3, 0xB871FBC3, 0xB872FBC3, + 0xB873FBC3, 0xB874FBC3, 0xB875FBC3, 0xB876FBC3, 0xB877FBC3, 0xB878FBC3, 0xB879FBC3, 0xB87AFBC3, 0xB87BFBC3, 0xB87CFBC3, 0xB87DFBC3, 0xB87EFBC3, 0xB87FFBC3, 0xB880FBC3, 0xB881FBC3, + 0xB882FBC3, 0xB883FBC3, 0xB884FBC3, 0xB885FBC3, 0xB886FBC3, 0xB887FBC3, 0xB888FBC3, 0xB889FBC3, 0xB88AFBC3, 0xB88BFBC3, 0xB88CFBC3, 0xB88DFBC3, 0xB88EFBC3, 0xB88FFBC3, 0xB890FBC3, + 0xB891FBC3, 0xB892FBC3, 0xB893FBC3, 0xB894FBC3, 0xB895FBC3, 0xB896FBC3, 0xB897FBC3, 0xB898FBC3, 0xB899FBC3, 0xB89AFBC3, 0xB89BFBC3, 0xB89CFBC3, 0xB89DFBC3, 0xB89EFBC3, 0xB89FFBC3, + 0xB8A0FBC3, 0xB8A1FBC3, 0xB8A2FBC3, 0xB8A3FBC3, 0xB8A4FBC3, 0xB8A5FBC3, 0xB8A6FBC3, 0xB8A7FBC3, 0xB8A8FBC3, 0xB8A9FBC3, 0xB8AAFBC3, 0xB8ABFBC3, 0xB8ACFBC3, 0xB8ADFBC3, 0xB8AEFBC3, + 0xB8AFFBC3, 0xB8B0FBC3, 0xB8B1FBC3, 0xB8B2FBC3, 0xB8B3FBC3, 0xB8B4FBC3, 0xB8B5FBC3, 0xB8B6FBC3, 0xB8B7FBC3, 0xB8B8FBC3, 0xB8B9FBC3, 0xB8BAFBC3, 0xB8BBFBC3, 0xB8BCFBC3, 0xB8BDFBC3, + 0xB8BEFBC3, 0xB8BFFBC3, 0xB8C0FBC3, 0xB8C1FBC3, 0xB8C2FBC3, 0xB8C3FBC3, 0xB8C4FBC3, 0xB8C5FBC3, 0xB8C6FBC3, 0xB8C7FBC3, 0xB8C8FBC3, 0xB8C9FBC3, 0xB8CAFBC3, 0xB8CBFBC3, 0xB8CCFBC3, + 0xB8CDFBC3, 0xB8CEFBC3, 0xB8CFFBC3, 0xB8D0FBC3, 0xB8D1FBC3, 0xB8D2FBC3, 0xB8D3FBC3, 0xB8D4FBC3, 0xB8D5FBC3, 0xB8D6FBC3, 0xB8D7FBC3, 0xB8D8FBC3, 0xB8D9FBC3, 0xB8DAFBC3, 0xB8DBFBC3, + 0xB8DCFBC3, 0xB8DDFBC3, 0xB8DEFBC3, 0xB8DFFBC3, 0xB8E0FBC3, 0xB8E1FBC3, 0xB8E2FBC3, 0xB8E3FBC3, 0xB8E4FBC3, 0xB8E5FBC3, 0xB8E6FBC3, 0xB8E7FBC3, 0xB8E8FBC3, 0xB8E9FBC3, 0xB8EAFBC3, + 0xB8EBFBC3, 0xB8ECFBC3, 0xB8EDFBC3, 0xB8EEFBC3, 0xB8EFFBC3, 0xB8F0FBC3, 0xB8F1FBC3, 0xB8F2FBC3, 0xB8F3FBC3, 0xB8F4FBC3, 0xB8F5FBC3, 0xB8F6FBC3, 0xB8F7FBC3, 0xB8F8FBC3, 0xB8F9FBC3, + 0xB8FAFBC3, 0xB8FBFBC3, 0xB8FCFBC3, 0xB8FDFBC3, 0xB8FEFBC3, 0xB8FFFBC3, 0xB900FBC3, 0xB901FBC3, 0xB902FBC3, 0xB903FBC3, 0xB904FBC3, 0xB905FBC3, 0xB906FBC3, 0xB907FBC3, 0xB908FBC3, + 0xB909FBC3, 0xB90AFBC3, 0xB90BFBC3, 0xB90CFBC3, 0xB90DFBC3, 0xB90EFBC3, 0xB90FFBC3, 0xB910FBC3, 0xB911FBC3, 0xB912FBC3, 0xB913FBC3, 0xB914FBC3, 0xB915FBC3, 0xB916FBC3, 0xB917FBC3, + 0xB918FBC3, 0xB919FBC3, 0xB91AFBC3, 0xB91BFBC3, 0xB91CFBC3, 0xB91DFBC3, 0xB91EFBC3, 0xB91FFBC3, 0xB920FBC3, 0xB921FBC3, 0xB922FBC3, 0xB923FBC3, 0xB924FBC3, 0xB925FBC3, 0xB926FBC3, + 0xB927FBC3, 0xB928FBC3, 0xB929FBC3, 0xB92AFBC3, 0xB92BFBC3, 0xB92CFBC3, 0xB92DFBC3, 0xB92EFBC3, 0xB92FFBC3, 0xB930FBC3, 0xB931FBC3, 0xB932FBC3, 0xB933FBC3, 0xB934FBC3, 0xB935FBC3, + 0xB936FBC3, 0xB937FBC3, 0xB938FBC3, 0xB939FBC3, 0xB93AFBC3, 0xB93BFBC3, 0xB93CFBC3, 0xB93DFBC3, 0xB93EFBC3, 0xB93FFBC3, 0xB940FBC3, 0xB941FBC3, 0xB942FBC3, 0xB943FBC3, 0xB944FBC3, + 0xB945FBC3, 0xB946FBC3, 0xB947FBC3, 0xB948FBC3, 0xB949FBC3, 0xB94AFBC3, 0xB94BFBC3, 0xB94CFBC3, 0xB94DFBC3, 0xB94EFBC3, 0xB94FFBC3, 0xB950FBC3, 0xB951FBC3, 0xB952FBC3, 0xB953FBC3, + 0xB954FBC3, 0xB955FBC3, 0xB956FBC3, 0xB957FBC3, 0xB958FBC3, 0xB959FBC3, 0xB95AFBC3, 0xB95BFBC3, 0xB95CFBC3, 0xB95DFBC3, 0xB95EFBC3, 0xB95FFBC3, 0xB960FBC3, 0xB961FBC3, 0xB962FBC3, + 0xB963FBC3, 0xB964FBC3, 0xB965FBC3, 0xB966FBC3, 0xB967FBC3, 0xB968FBC3, 0xB969FBC3, 0xB96AFBC3, 0xB96BFBC3, 0xB96CFBC3, 0xB96DFBC3, 0xB96EFBC3, 0xB96FFBC3, 0xB970FBC3, 0xB971FBC3, + 0xB972FBC3, 0xB973FBC3, 0xB974FBC3, 0xB975FBC3, 0xB976FBC3, 0xB977FBC3, 0xB978FBC3, 0xB979FBC3, 0xB97AFBC3, 0xB97BFBC3, 0xB97CFBC3, 0xB97DFBC3, 0xB97EFBC3, 0xB97FFBC3, 0xB980FBC3, + 0xB981FBC3, 0xB982FBC3, 0xB983FBC3, 0xB984FBC3, 0xB985FBC3, 0xB986FBC3, 0xB987FBC3, 0xB988FBC3, 0xB989FBC3, 0xB98AFBC3, 0xB98BFBC3, 0xB98CFBC3, 0xB98DFBC3, 0xB98EFBC3, 0xB98FFBC3, + 0xB990FBC3, 0xB991FBC3, 0xB992FBC3, 0xB993FBC3, 0xB994FBC3, 0xB995FBC3, 0xB996FBC3, 0xB997FBC3, 0xB998FBC3, 0xB999FBC3, 0xB99AFBC3, 0xB99BFBC3, 0xB99CFBC3, 0xB99DFBC3, 0xB99EFBC3, + 0xB99FFBC3, 0xB9A0FBC3, 0xB9A1FBC3, 0xB9A2FBC3, 0xB9A3FBC3, 0xB9A4FBC3, 0xB9A5FBC3, 0xB9A6FBC3, 0xB9A7FBC3, 0xB9A8FBC3, 0xB9A9FBC3, 0xB9AAFBC3, 0xB9ABFBC3, 0xB9ACFBC3, 0xB9ADFBC3, + 0xB9AEFBC3, 0xB9AFFBC3, 0xB9B0FBC3, 0xB9B1FBC3, 0xB9B2FBC3, 0xB9B3FBC3, 0xB9B4FBC3, 0xB9B5FBC3, 0xB9B6FBC3, 0xB9B7FBC3, 0xB9B8FBC3, 0xB9B9FBC3, 0xB9BAFBC3, 0xB9BBFBC3, 0xB9BCFBC3, + 0xB9BDFBC3, 0xB9BEFBC3, 0xB9BFFBC3, 0xB9C0FBC3, 0xB9C1FBC3, 0xB9C2FBC3, 0xB9C3FBC3, 0xB9C4FBC3, 0xB9C5FBC3, 0xB9C6FBC3, 0xB9C7FBC3, 0xB9C8FBC3, 0xB9C9FBC3, 0xB9CAFBC3, 0xB9CBFBC3, + 0xB9CCFBC3, 0xB9CDFBC3, 0xB9CEFBC3, 0xB9CFFBC3, 0xB9D0FBC3, 0xB9D1FBC3, 0xB9D2FBC3, 0xB9D3FBC3, 0xB9D4FBC3, 0xB9D5FBC3, 0xB9D6FBC3, 0xB9D7FBC3, 0xB9D8FBC3, 0xB9D9FBC3, 0xB9DAFBC3, + 0xB9DBFBC3, 0xB9DCFBC3, 0xB9DDFBC3, 0xB9DEFBC3, 0xB9DFFBC3, 0xB9E0FBC3, 0xB9E1FBC3, 0xB9E2FBC3, 0xB9E3FBC3, 0xB9E4FBC3, 0xB9E5FBC3, 0xB9E6FBC3, 0xB9E7FBC3, 0xB9E8FBC3, 0xB9E9FBC3, + 0xB9EAFBC3, 0xB9EBFBC3, 0xB9ECFBC3, 0xB9EDFBC3, 0xB9EEFBC3, 0xB9EFFBC3, 0xB9F0FBC3, 0xB9F1FBC3, 0xB9F2FBC3, 0xB9F3FBC3, 0xB9F4FBC3, 0xB9F5FBC3, 0xB9F6FBC3, 0xB9F7FBC3, 0xB9F8FBC3, + 0xB9F9FBC3, 0xB9FAFBC3, 0xB9FBFBC3, 0xB9FCFBC3, 0xB9FDFBC3, 0xB9FEFBC3, 0xB9FFFBC3, 0xBA00FBC3, 0xBA01FBC3, 0xBA02FBC3, 0xBA03FBC3, 0xBA04FBC3, 0xBA05FBC3, 0xBA06FBC3, 0xBA07FBC3, + 0xBA08FBC3, 0xBA09FBC3, 0xBA0AFBC3, 0xBA0BFBC3, 0xBA0CFBC3, 0xBA0DFBC3, 0xBA0EFBC3, 0xBA0FFBC3, 0xBA10FBC3, 0xBA11FBC3, 0xBA12FBC3, 0xBA13FBC3, 0xBA14FBC3, 0xBA15FBC3, 0xBA16FBC3, + 0xBA17FBC3, 0xBA18FBC3, 0xBA19FBC3, 0xBA1AFBC3, 0xBA1BFBC3, 0xBA1CFBC3, 0xBA1DFBC3, 0xBA1EFBC3, 0xBA1FFBC3, 0xBA20FBC3, 0xBA21FBC3, 0xBA22FBC3, 0xBA23FBC3, 0xBA24FBC3, 0xBA25FBC3, + 0xBA26FBC3, 0xBA27FBC3, 0xBA28FBC3, 0xBA29FBC3, 0xBA2AFBC3, 0xBA2BFBC3, 0xBA2CFBC3, 0xBA2DFBC3, 0xBA2EFBC3, 0xBA2FFBC3, 0xBA30FBC3, 0xBA31FBC3, 0xBA32FBC3, 0xBA33FBC3, 0xBA34FBC3, + 0xBA35FBC3, 0xBA36FBC3, 0xBA37FBC3, 0xBA38FBC3, 0xBA39FBC3, 0xBA3AFBC3, 0xBA3BFBC3, 0xBA3CFBC3, 0xBA3DFBC3, 0xBA3EFBC3, 0xBA3FFBC3, 0xBA40FBC3, 0xBA41FBC3, 0xBA42FBC3, 0xBA43FBC3, + 0xBA44FBC3, 0xBA45FBC3, 0xBA46FBC3, 0xBA47FBC3, 0xBA48FBC3, 0xBA49FBC3, 0xBA4AFBC3, 0xBA4BFBC3, 0xBA4CFBC3, 0xBA4DFBC3, 0xBA4EFBC3, 0xBA4FFBC3, 0xBA50FBC3, 0xBA51FBC3, 0xBA52FBC3, + 0xBA53FBC3, 0xBA54FBC3, 0xBA55FBC3, 0xBA56FBC3, 0xBA57FBC3, 0xBA58FBC3, 0xBA59FBC3, 0xBA5AFBC3, 0xBA5BFBC3, 0xBA5CFBC3, 0xBA5DFBC3, 0xBA5EFBC3, 0xBA5FFBC3, 0xBA60FBC3, 0xBA61FBC3, + 0xBA62FBC3, 0xBA63FBC3, 0xBA64FBC3, 0xBA65FBC3, 0xBA66FBC3, 0xBA67FBC3, 0xBA68FBC3, 0xBA69FBC3, 0xBA6AFBC3, 0xBA6BFBC3, 0xBA6CFBC3, 0xBA6DFBC3, 0xBA6EFBC3, 0xBA6FFBC3, 0xBA70FBC3, + 0xBA71FBC3, 0xBA72FBC3, 0xBA73FBC3, 0xBA74FBC3, 0xBA75FBC3, 0xBA76FBC3, 0xBA77FBC3, 0xBA78FBC3, 0xBA79FBC3, 0xBA7AFBC3, 0xBA7BFBC3, 0xBA7CFBC3, 0xBA7DFBC3, 0xBA7EFBC3, 0xBA7FFBC3, + 0xBA80FBC3, 0xBA81FBC3, 0xBA82FBC3, 0xBA83FBC3, 0xBA84FBC3, 0xBA85FBC3, 0xBA86FBC3, 0xBA87FBC3, 0xBA88FBC3, 0xBA89FBC3, 0xBA8AFBC3, 0xBA8BFBC3, 0xBA8CFBC3, 0xBA8DFBC3, 0xBA8EFBC3, + 0xBA8FFBC3, 0xBA90FBC3, 0xBA91FBC3, 0xBA92FBC3, 0xBA93FBC3, 0xBA94FBC3, 0xBA95FBC3, 0xBA96FBC3, 0xBA97FBC3, 0xBA98FBC3, 0xBA99FBC3, 0xBA9AFBC3, 0xBA9BFBC3, 0xBA9CFBC3, 0xBA9DFBC3, + 0xBA9EFBC3, 0xBA9FFBC3, 0xBAA0FBC3, 0xBAA1FBC3, 0xBAA2FBC3, 0xBAA3FBC3, 0xBAA4FBC3, 0xBAA5FBC3, 0xBAA6FBC3, 0xBAA7FBC3, 0xBAA8FBC3, 0xBAA9FBC3, 0xBAAAFBC3, 0xBAABFBC3, 0xBAACFBC3, + 0xBAADFBC3, 0xBAAEFBC3, 0xBAAFFBC3, 0xBAB0FBC3, 0xBAB1FBC3, 0xBAB2FBC3, 0xBAB3FBC3, 0xBAB4FBC3, 0xBAB5FBC3, 0xBAB6FBC3, 0xBAB7FBC3, 0xBAB8FBC3, 0xBAB9FBC3, 0xBABAFBC3, 0xBABBFBC3, + 0xBABCFBC3, 0xBABDFBC3, 0xBABEFBC3, 0xBABFFBC3, 0xBAC0FBC3, 0xBAC1FBC3, 0xBAC2FBC3, 0xBAC3FBC3, 0xBAC4FBC3, 0xBAC5FBC3, 0xBAC6FBC3, 0xBAC7FBC3, 0xBAC8FBC3, 0xBAC9FBC3, 0xBACAFBC3, + 0xBACBFBC3, 0xBACCFBC3, 0xBACDFBC3, 0xBACEFBC3, 0xBACFFBC3, 0xBAD0FBC3, 0xBAD1FBC3, 0xBAD2FBC3, 0xBAD3FBC3, 0xBAD4FBC3, 0xBAD5FBC3, 0xBAD6FBC3, 0xBAD7FBC3, 0xBAD8FBC3, 0xBAD9FBC3, + 0xBADAFBC3, 0xBADBFBC3, 0xBADCFBC3, 0xBADDFBC3, 0xBADEFBC3, 0xBADFFBC3, 0xBAE0FBC3, 0xBAE1FBC3, 0xBAE2FBC3, 0xBAE3FBC3, 0xBAE4FBC3, 0xBAE5FBC3, 0xBAE6FBC3, 0xBAE7FBC3, 0xBAE8FBC3, + 0xBAE9FBC3, 0xBAEAFBC3, 0xBAEBFBC3, 0xBAECFBC3, 0xBAEDFBC3, 0xBAEEFBC3, 0xBAEFFBC3, 0xBAF0FBC3, 0xBAF1FBC3, 0xBAF2FBC3, 0xBAF3FBC3, 0xBAF4FBC3, 0xBAF5FBC3, 0xBAF6FBC3, 0xBAF7FBC3, + 0xBAF8FBC3, 0xBAF9FBC3, 0xBAFAFBC3, 0xBAFBFBC3, 0xBAFCFBC3, 0xBAFDFBC3, 0xBAFEFBC3, 0xBAFFFBC3, 0xBB00FBC3, 0xBB01FBC3, 0xBB02FBC3, 0xBB03FBC3, 0xBB04FBC3, 0xBB05FBC3, 0xBB06FBC3, + 0xBB07FBC3, 0xBB08FBC3, 0xBB09FBC3, 0xBB0AFBC3, 0xBB0BFBC3, 0xBB0CFBC3, 0xBB0DFBC3, 0xBB0EFBC3, 0xBB0FFBC3, 0xBB10FBC3, 0xBB11FBC3, 0xBB12FBC3, 0xBB13FBC3, 0xBB14FBC3, 0xBB15FBC3, + 0xBB16FBC3, 0xBB17FBC3, 0xBB18FBC3, 0xBB19FBC3, 0xBB1AFBC3, 0xBB1BFBC3, 0xBB1CFBC3, 0xBB1DFBC3, 0xBB1EFBC3, 0xBB1FFBC3, 0xBB20FBC3, 0xBB21FBC3, 0xBB22FBC3, 0xBB23FBC3, 0xBB24FBC3, + 0xBB25FBC3, 0xBB26FBC3, 0xBB27FBC3, 0xBB28FBC3, 0xBB29FBC3, 0xBB2AFBC3, 0xBB2BFBC3, 0xBB2CFBC3, 0xBB2DFBC3, 0xBB2EFBC3, 0xBB2FFBC3, 0xBB30FBC3, 0xBB31FBC3, 0xBB32FBC3, 0xBB33FBC3, + 0xBB34FBC3, 0xBB35FBC3, 0xBB36FBC3, 0xBB37FBC3, 0xBB38FBC3, 0xBB39FBC3, 0xBB3AFBC3, 0xBB3BFBC3, 0xBB3CFBC3, 0xBB3DFBC3, 0xBB3EFBC3, 0xBB3FFBC3, 0xBB40FBC3, 0xBB41FBC3, 0xBB42FBC3, + 0xBB43FBC3, 0xBB44FBC3, 0xBB45FBC3, 0xBB46FBC3, 0xBB47FBC3, 0xBB48FBC3, 0xBB49FBC3, 0xBB4AFBC3, 0xBB4BFBC3, 0xBB4CFBC3, 0xBB4DFBC3, 0xBB4EFBC3, 0xBB4FFBC3, 0xBB50FBC3, 0xBB51FBC3, + 0xBB52FBC3, 0xBB53FBC3, 0xBB54FBC3, 0xBB55FBC3, 0xBB56FBC3, 0xBB57FBC3, 0xBB58FBC3, 0xBB59FBC3, 0xBB5AFBC3, 0xBB5BFBC3, 0xBB5CFBC3, 0xBB5DFBC3, 0xBB5EFBC3, 0xBB5FFBC3, 0xBB60FBC3, + 0xBB61FBC3, 0xBB62FBC3, 0xBB63FBC3, 0xBB64FBC3, 0xBB65FBC3, 0xBB66FBC3, 0xBB67FBC3, 0xBB68FBC3, 0xBB69FBC3, 0xBB6AFBC3, 0xBB6BFBC3, 0xBB6CFBC3, 0xBB6DFBC3, 0xBB6EFBC3, 0xBB6FFBC3, + 0xBB70FBC3, 0xBB71FBC3, 0xBB72FBC3, 0xBB73FBC3, 0xBB74FBC3, 0xBB75FBC3, 0xBB76FBC3, 0xBB77FBC3, 0xBB78FBC3, 0xBB79FBC3, 0xBB7AFBC3, 0xBB7BFBC3, 0xBB7CFBC3, 0xBB7DFBC3, 0xBB7EFBC3, + 0xBB7FFBC3, 0xBB80FBC3, 0xBB81FBC3, 0xBB82FBC3, 0xBB83FBC3, 0xBB84FBC3, 0xBB85FBC3, 0xBB86FBC3, 0xBB87FBC3, 0xBB88FBC3, 0xBB89FBC3, 0xBB8AFBC3, 0xBB8BFBC3, 0xBB8CFBC3, 0xBB8DFBC3, + 0xBB8EFBC3, 0xBB8FFBC3, 0xBB90FBC3, 0xBB91FBC3, 0xBB92FBC3, 0xBB93FBC3, 0xBB94FBC3, 0xBB95FBC3, 0xBB96FBC3, 0xBB97FBC3, 0xBB98FBC3, 0xBB99FBC3, 0xBB9AFBC3, 0xBB9BFBC3, 0xBB9CFBC3, + 0xBB9DFBC3, 0xBB9EFBC3, 0xBB9FFBC3, 0xBBA0FBC3, 0xBBA1FBC3, 0xBBA2FBC3, 0xBBA3FBC3, 0xBBA4FBC3, 0xBBA5FBC3, 0xBBA6FBC3, 0xBBA7FBC3, 0xBBA8FBC3, 0xBBA9FBC3, 0xBBAAFBC3, 0xBBABFBC3, + 0xBBACFBC3, 0xBBADFBC3, 0xBBAEFBC3, 0xBBAFFBC3, 0xBBB0FBC3, 0xBBB1FBC3, 0xBBB2FBC3, 0xBBB3FBC3, 0xBBB4FBC3, 0xBBB5FBC3, 0xBBB6FBC3, 0xBBB7FBC3, 0xBBB8FBC3, 0xBBB9FBC3, 0xBBBAFBC3, + 0xBBBBFBC3, 0xBBBCFBC3, 0xBBBDFBC3, 0xBBBEFBC3, 0xBBBFFBC3, 0xBBC0FBC3, 0xBBC1FBC3, 0xBBC2FBC3, 0xBBC3FBC3, 0xBBC4FBC3, 0xBBC5FBC3, 0xBBC6FBC3, 0xBBC7FBC3, 0xBBC8FBC3, 0xBBC9FBC3, + 0xBBCAFBC3, 0xBBCBFBC3, 0xBBCCFBC3, 0xBBCDFBC3, 0xBBCEFBC3, 0xBBCFFBC3, 0xBBD0FBC3, 0xBBD1FBC3, 0xBBD2FBC3, 0xBBD3FBC3, 0xBBD4FBC3, 0xBBD5FBC3, 0xBBD6FBC3, 0xBBD7FBC3, 0xBBD8FBC3, + 0xBBD9FBC3, 0xBBDAFBC3, 0xBBDBFBC3, 0xBBDCFBC3, 0xBBDDFBC3, 0xBBDEFBC3, 0xBBDFFBC3, 0xBBE0FBC3, 0xBBE1FBC3, 0xBBE2FBC3, 0xBBE3FBC3, 0xBBE4FBC3, 0xBBE5FBC3, 0xBBE6FBC3, 0xBBE7FBC3, + 0xBBE8FBC3, 0xBBE9FBC3, 0xBBEAFBC3, 0xBBEBFBC3, 0xBBECFBC3, 0xBBEDFBC3, 0xBBEEFBC3, 0xBBEFFBC3, 0xBBF0FBC3, 0xBBF1FBC3, 0xBBF2FBC3, 0xBBF3FBC3, 0xBBF4FBC3, 0xBBF5FBC3, 0xBBF6FBC3, + 0xBBF7FBC3, 0xBBF8FBC3, 0xBBF9FBC3, 0xBBFAFBC3, 0xBBFBFBC3, 0xBBFCFBC3, 0xBBFDFBC3, 0xBBFEFBC3, 0xBBFFFBC3, 0x44AA, 0x44AB, 0x44AC, 0x44AD, 0x44AE, 0x44AF, + 0x44B0, 0x44B1, 0x44B2, 0x44B3, 0x44B4, 0x44B5, 0x44B6, 0x44B7, 0x44B8, 0x44B9, 0x44BA, 0x44BB, 0x44BC, 0x44BD, 0x44BE, + 0x44BF, 0x44C0, 0x44C1, 0x44C2, 0x44C3, 0x44C4, 0x44C5, 0x44C6, 0x44C7, 0x44C8, 0x44C9, 0x44CA, 0x44CB, 0x44CC, 0x44CD, + 0x44CE, 0x44CF, 0x44D0, 0x44D1, 0x44D2, 0x44D3, 0x44D4, 0x44D5, 0x44D6, 0x44D7, 0x44D8, 0x44D9, 0x44DA, 0x44DB, 0x44DC, + 0x44DD, 0x44DE, 0x44DF, 0x44E0, 0x44E1, 0x44E2, 0x44E3, 0x44E4, 0x44E5, 0x44E6, 0x44E7, 0x44E8, 0x44E9, 0x44EA, 0x44EB, + 0x44EC, 0x44ED, 0x44EE, 0x44EF, 0x44F0, 0x44F1, 0x44F2, 0x44F3, 0x44F4, 0x44F5, 0x44F6, 0x44F7, 0x44F8, 0x44F9, 0x44FA, + 0x44FB, 0x44FC, 0x44FD, 0x44FE, 0x44FF, 0x4500, 0x4501, 0x4502, 0x4503, 0x4504, 0x4505, 0x4506, 0x4507, 0x4508, 0x4509, + 0x450A, 0x450B, 0x450C, 0x450D, 0x450E, 0x450F, 0x4510, 0x4511, 0x4512, 0x4513, 0x4514, 0xBC6BFBC3, 0xBC6CFBC3, 0xBC6DFBC3, 0xBC6EFBC3, + 0xBC6FFBC3, 0x4515, 0x4516, 0x4517, 0x4518, 0x4519, 0x451A, 0x451B, 0x451C, 0x451D, 0x451E, 0x451F, 0x4520, 0x4521, 0xBC7DFBC3, + 0xBC7EFBC3, 0xBC7FFBC3, 0x4522, 0x4523, 0x4524, 0x4525, 0x4526, 0x4527, 0x4528, 0x4529, 0x452A, 0xBC89FBC3, 0xBC8AFBC3, 0xBC8BFBC3, 0xBC8CFBC3, + 0xBC8DFBC3, 0xBC8EFBC3, 0xBC8FFBC3, 0x452B, 0x452C, 0x452D, 0x452E, 0x452F, 0x4530, 0x4531, 0x4532, 0x4533, 0x4534, 0xBC9AFBC3, 0xBC9BFBC3, + 0x11C6, 0x0, 0x0, 0x289, 0x0, 0x0, 0x0, 0x0, 0xBCA4FBC3, 0xBCA5FBC3, 0xBCA6FBC3, 0xBCA7FBC3, 0xBCA8FBC3, 0xBCA9FBC3, 0xBCAAFBC3, + 0xBCABFBC3, 0xBCACFBC3, 0xBCADFBC3, 0xBCAEFBC3, 0xBCAFFBC3, 0xBCB0FBC3, 0xBCB1FBC3, 0xBCB2FBC3, 0xBCB3FBC3, 0xBCB4FBC3, 0xBCB5FBC3, 0xBCB6FBC3, 0xBCB7FBC3, 0xBCB8FBC3, 0xBCB9FBC3, + 0xBCBAFBC3, 0xBCBBFBC3, 0xBCBCFBC3, 0xBCBDFBC3, 0xBCBEFBC3, 0xBCBFFBC3, 0xBCC0FBC3, 0xBCC1FBC3, 0xBCC2FBC3, 0xBCC3FBC3, 0xBCC4FBC3, 0xBCC5FBC3, 0xBCC6FBC3, 0xBCC7FBC3, 0xBCC8FBC3, + 0xBCC9FBC3, 0xBCCAFBC3, 0xBCCBFBC3, 0xBCCCFBC3, 0xBCCDFBC3, 0xBCCEFBC3, 0xBCCFFBC3, 0xBCD0FBC3, 0xBCD1FBC3, 0xBCD2FBC3, 0xBCD3FBC3, 0xBCD4FBC3, 0xBCD5FBC3, 0xBCD6FBC3, 0xBCD7FBC3, + 0xBCD8FBC3, 0xBCD9FBC3, 0xBCDAFBC3, 0xBCDBFBC3, 0xBCDCFBC3, 0xBCDDFBC3, 0xBCDEFBC3, 0xBCDFFBC3, 0xBCE0FBC3, 0xBCE1FBC3, 0xBCE2FBC3, 0xBCE3FBC3, 0xBCE4FBC3, 0xBCE5FBC3, 0xBCE6FBC3, + 0xBCE7FBC3, 0xBCE8FBC3, 0xBCE9FBC3, 0xBCEAFBC3, 0xBCEBFBC3, 0xBCECFBC3, 0xBCEDFBC3, 0xBCEEFBC3, 0xBCEFFBC3, 0xBCF0FBC3, 0xBCF1FBC3, 0xBCF2FBC3, 0xBCF3FBC3, 0xBCF4FBC3, 0xBCF5FBC3, + 0xBCF6FBC3, 0xBCF7FBC3, 0xBCF8FBC3, 0xBCF9FBC3, 0xBCFAFBC3, 0xBCFBFBC3, 0xBCFCFBC3, 0xBCFDFBC3, 0xBCFEFBC3, 0xBCFFFBC3, 0xBD00FBC3, 0xBD01FBC3, 0xBD02FBC3, 0xBD03FBC3, 0xBD04FBC3, + 0xBD05FBC3, 0xBD06FBC3, 0xBD07FBC3, 0xBD08FBC3, 0xBD09FBC3, 0xBD0AFBC3, 0xBD0BFBC3, 0xBD0CFBC3, 0xBD0DFBC3, 0xBD0EFBC3, 0xBD0FFBC3, 0xBD10FBC3, 0xBD11FBC3, 0xBD12FBC3, 0xBD13FBC3, + 0xBD14FBC3, 0xBD15FBC3, 0xBD16FBC3, 0xBD17FBC3, 0xBD18FBC3, 0xBD19FBC3, 0xBD1AFBC3, 0xBD1BFBC3, 0xBD1CFBC3, 0xBD1DFBC3, 0xBD1EFBC3, 0xBD1FFBC3, 0xBD20FBC3, 0xBD21FBC3, 0xBD22FBC3, + 0xBD23FBC3, 0xBD24FBC3, 0xBD25FBC3, 0xBD26FBC3, 0xBD27FBC3, 0xBD28FBC3, 0xBD29FBC3, 0xBD2AFBC3, 0xBD2BFBC3, 0xBD2CFBC3, 0xBD2DFBC3, 0xBD2EFBC3, 0xBD2FFBC3, 0xBD30FBC3, 0xBD31FBC3, + 0xBD32FBC3, 0xBD33FBC3, 0xBD34FBC3, 0xBD35FBC3, 0xBD36FBC3, 0xBD37FBC3, 0xBD38FBC3, 0xBD39FBC3, 0xBD3AFBC3, 0xBD3BFBC3, 0xBD3CFBC3, 0xBD3DFBC3, 0xBD3EFBC3, 0xBD3FFBC3, 0xBD40FBC3, + 0xBD41FBC3, 0xBD42FBC3, 0xBD43FBC3, 0xBD44FBC3, 0xBD45FBC3, 0xBD46FBC3, 0xBD47FBC3, 0xBD48FBC3, 0xBD49FBC3, 0xBD4AFBC3, 0xBD4BFBC3, 0xBD4CFBC3, 0xBD4DFBC3, 0xBD4EFBC3, 0xBD4FFBC3, + 0xBD50FBC3, 0xBD51FBC3, 0xBD52FBC3, 0xBD53FBC3, 0xBD54FBC3, 0xBD55FBC3, 0xBD56FBC3, 0xBD57FBC3, 0xBD58FBC3, 0xBD59FBC3, 0xBD5AFBC3, 0xBD5BFBC3, 0xBD5CFBC3, 0xBD5DFBC3, 0xBD5EFBC3, + 0xBD5FFBC3, 0xBD60FBC3, 0xBD61FBC3, 0xBD62FBC3, 0xBD63FBC3, 0xBD64FBC3, 0xBD65FBC3, 0xBD66FBC3, 0xBD67FBC3, 0xBD68FBC3, 0xBD69FBC3, 0xBD6AFBC3, 0xBD6BFBC3, 0xBD6CFBC3, 0xBD6DFBC3, + 0xBD6EFBC3, 0xBD6FFBC3, 0xBD70FBC3, 0xBD71FBC3, 0xBD72FBC3, 0xBD73FBC3, 0xBD74FBC3, 0xBD75FBC3, 0xBD76FBC3, 0xBD77FBC3, 0xBD78FBC3, 0xBD79FBC3, 0xBD7AFBC3, 0xBD7BFBC3, 0xBD7CFBC3, + 0xBD7DFBC3, 0xBD7EFBC3, 0xBD7FFBC3, 0xBD80FBC3, 0xBD81FBC3, 0xBD82FBC3, 0xBD83FBC3, 0xBD84FBC3, 0xBD85FBC3, 0xBD86FBC3, 0xBD87FBC3, 0xBD88FBC3, 0xBD89FBC3, 0xBD8AFBC3, 0xBD8BFBC3, + 0xBD8CFBC3, 0xBD8DFBC3, 0xBD8EFBC3, 0xBD8FFBC3, 0xBD90FBC3, 0xBD91FBC3, 0xBD92FBC3, 0xBD93FBC3, 0xBD94FBC3, 0xBD95FBC3, 0xBD96FBC3, 0xBD97FBC3, 0xBD98FBC3, 0xBD99FBC3, 0xBD9AFBC3, + 0xBD9BFBC3, 0xBD9CFBC3, 0xBD9DFBC3, 0xBD9EFBC3, 0xBD9FFBC3, 0xBDA0FBC3, 0xBDA1FBC3, 0xBDA2FBC3, 0xBDA3FBC3, 0xBDA4FBC3, 0xBDA5FBC3, 0xBDA6FBC3, 0xBDA7FBC3, 0xBDA8FBC3, 0xBDA9FBC3, + 0xBDAAFBC3, 0xBDABFBC3, 0xBDACFBC3, 0xBDADFBC3, 0xBDAEFBC3, 0xBDAFFBC3, 0xBDB0FBC3, 0xBDB1FBC3, 0xBDB2FBC3, 0xBDB3FBC3, 0xBDB4FBC3, 0xBDB5FBC3, 0xBDB6FBC3, 0xBDB7FBC3, 0xBDB8FBC3, + 0xBDB9FBC3, 0xBDBAFBC3, 0xBDBBFBC3, 0xBDBCFBC3, 0xBDBDFBC3, 0xBDBEFBC3, 0xBDBFFBC3, 0xBDC0FBC3, 0xBDC1FBC3, 0xBDC2FBC3, 0xBDC3FBC3, 0xBDC4FBC3, 0xBDC5FBC3, 0xBDC6FBC3, 0xBDC7FBC3, + 0xBDC8FBC3, 0xBDC9FBC3, 0xBDCAFBC3, 0xBDCBFBC3, 0xBDCCFBC3, 0xBDCDFBC3, 0xBDCEFBC3, 0xBDCFFBC3, 0xBDD0FBC3, 0xBDD1FBC3, 0xBDD2FBC3, 0xBDD3FBC3, 0xBDD4FBC3, 0xBDD5FBC3, 0xBDD6FBC3, + 0xBDD7FBC3, 0xBDD8FBC3, 0xBDD9FBC3, 0xBDDAFBC3, 0xBDDBFBC3, 0xBDDCFBC3, 0xBDDDFBC3, 0xBDDEFBC3, 0xBDDFFBC3, 0xBDE0FBC3, 0xBDE1FBC3, 0xBDE2FBC3, 0xBDE3FBC3, 0xBDE4FBC3, 0xBDE5FBC3, + 0xBDE6FBC3, 0xBDE7FBC3, 0xBDE8FBC3, 0xBDE9FBC3, 0xBDEAFBC3, 0xBDEBFBC3, 0xBDECFBC3, 0xBDEDFBC3, 0xBDEEFBC3, 0xBDEFFBC3, 0xBDF0FBC3, 0xBDF1FBC3, 0xBDF2FBC3, 0xBDF3FBC3, 0xBDF4FBC3, + 0xBDF5FBC3, 0xBDF6FBC3, 0xBDF7FBC3, 0xBDF8FBC3, 0xBDF9FBC3, 0xBDFAFBC3, 0xBDFBFBC3, 0xBDFCFBC3, 0xBDFDFBC3, 0xBDFEFBC3, 0xBDFFFBC3, 0xBE00FBC3, 0xBE01FBC3, 0xBE02FBC3, 0xBE03FBC3, + 0xBE04FBC3, 0xBE05FBC3, 0xBE06FBC3, 0xBE07FBC3, 0xBE08FBC3, 0xBE09FBC3, 0xBE0AFBC3, 0xBE0BFBC3, 0xBE0CFBC3, 0xBE0DFBC3, 0xBE0EFBC3, 0xBE0FFBC3, 0xBE10FBC3, 0xBE11FBC3, 0xBE12FBC3, + 0xBE13FBC3, 0xBE14FBC3, 0xBE15FBC3, 0xBE16FBC3, 0xBE17FBC3, 0xBE18FBC3, 0xBE19FBC3, 0xBE1AFBC3, 0xBE1BFBC3, 0xBE1CFBC3, 0xBE1DFBC3, 0xBE1EFBC3, 0xBE1FFBC3, 0xBE20FBC3, 0xBE21FBC3, + 0xBE22FBC3, 0xBE23FBC3, 0xBE24FBC3, 0xBE25FBC3, 0xBE26FBC3, 0xBE27FBC3, 0xBE28FBC3, 0xBE29FBC3, 0xBE2AFBC3, 0xBE2BFBC3, 0xBE2CFBC3, 0xBE2DFBC3, 0xBE2EFBC3, 0xBE2FFBC3, 0xBE30FBC3, + 0xBE31FBC3, 0xBE32FBC3, 0xBE33FBC3, 0xBE34FBC3, 0xBE35FBC3, 0xBE36FBC3, 0xBE37FBC3, 0xBE38FBC3, 0xBE39FBC3, 0xBE3AFBC3, 0xBE3BFBC3, 0xBE3CFBC3, 0xBE3DFBC3, 0xBE3EFBC3, 0xBE3FFBC3, + 0xBE40FBC3, 0xBE41FBC3, 0xBE42FBC3, 0xBE43FBC3, 0xBE44FBC3, 0xBE45FBC3, 0xBE46FBC3, 0xBE47FBC3, 0xBE48FBC3, 0xBE49FBC3, 0xBE4AFBC3, 0xBE4BFBC3, 0xBE4CFBC3, 0xBE4DFBC3, 0xBE4EFBC3, + 0xBE4FFBC3, 0xBE50FBC3, 0xBE51FBC3, 0xBE52FBC3, 0xBE53FBC3, 0xBE54FBC3, 0xBE55FBC3, 0xBE56FBC3, 0xBE57FBC3, 0xBE58FBC3, 0xBE59FBC3, 0xBE5AFBC3, 0xBE5BFBC3, 0xBE5CFBC3, 0xBE5DFBC3, + 0xBE5EFBC3, 0xBE5FFBC3, 0xBE60FBC3, 0xBE61FBC3, 0xBE62FBC3, 0xBE63FBC3, 0xBE64FBC3, 0xBE65FBC3, 0xBE66FBC3, 0xBE67FBC3, 0xBE68FBC3, 0xBE69FBC3, 0xBE6AFBC3, 0xBE6BFBC3, 0xBE6CFBC3, + 0xBE6DFBC3, 0xBE6EFBC3, 0xBE6FFBC3, 0xBE70FBC3, 0xBE71FBC3, 0xBE72FBC3, 0xBE73FBC3, 0xBE74FBC3, 0xBE75FBC3, 0xBE76FBC3, 0xBE77FBC3, 0xBE78FBC3, 0xBE79FBC3, 0xBE7AFBC3, 0xBE7BFBC3, + 0xBE7CFBC3, 0xBE7DFBC3, 0xBE7EFBC3, 0xBE7FFBC3, 0xBE80FBC3, 0xBE81FBC3, 0xBE82FBC3, 0xBE83FBC3, 0xBE84FBC3, 0xBE85FBC3, 0xBE86FBC3, 0xBE87FBC3, 0xBE88FBC3, 0xBE89FBC3, 0xBE8AFBC3, + 0xBE8BFBC3, 0xBE8CFBC3, 0xBE8DFBC3, 0xBE8EFBC3, 0xBE8FFBC3, 0xBE90FBC3, 0xBE91FBC3, 0xBE92FBC3, 0xBE93FBC3, 0xBE94FBC3, 0xBE95FBC3, 0xBE96FBC3, 0xBE97FBC3, 0xBE98FBC3, 0xBE99FBC3, + 0xBE9AFBC3, 0xBE9BFBC3, 0xBE9CFBC3, 0xBE9DFBC3, 0xBE9EFBC3, 0xBE9FFBC3, 0xBEA0FBC3, 0xBEA1FBC3, 0xBEA2FBC3, 0xBEA3FBC3, 0xBEA4FBC3, 0xBEA5FBC3, 0xBEA6FBC3, 0xBEA7FBC3, 0xBEA8FBC3, + 0xBEA9FBC3, 0xBEAAFBC3, 0xBEABFBC3, 0xBEACFBC3, 0xBEADFBC3, 0xBEAEFBC3, 0xBEAFFBC3, 0xBEB0FBC3, 0xBEB1FBC3, 0xBEB2FBC3, 0xBEB3FBC3, 0xBEB4FBC3, 0xBEB5FBC3, 0xBEB6FBC3, 0xBEB7FBC3, + 0xBEB8FBC3, 0xBEB9FBC3, 0xBEBAFBC3, 0xBEBBFBC3, 0xBEBCFBC3, 0xBEBDFBC3, 0xBEBEFBC3, 0xBEBFFBC3, 0xBEC0FBC3, 0xBEC1FBC3, 0xBEC2FBC3, 0xBEC3FBC3, 0xBEC4FBC3, 0xBEC5FBC3, 0xBEC6FBC3, + 0xBEC7FBC3, 0xBEC8FBC3, 0xBEC9FBC3, 0xBECAFBC3, 0xBECBFBC3, 0xBECCFBC3, 0xBECDFBC3, 0xBECEFBC3, 0xBECFFBC3, 0xBED0FBC3, 0xBED1FBC3, 0xBED2FBC3, 0xBED3FBC3, 0xBED4FBC3, 0xBED5FBC3, + 0xBED6FBC3, 0xBED7FBC3, 0xBED8FBC3, 0xBED9FBC3, 0xBEDAFBC3, 0xBEDBFBC3, 0xBEDCFBC3, 0xBEDDFBC3, 0xBEDEFBC3, 0xBEDFFBC3, 0xBEE0FBC3, 0xBEE1FBC3, 0xBEE2FBC3, 0xBEE3FBC3, 0xBEE4FBC3, + 0xBEE5FBC3, 0xBEE6FBC3, 0xBEE7FBC3, 0xBEE8FBC3, 0xBEE9FBC3, 0xBEEAFBC3, 0xBEEBFBC3, 0xBEECFBC3, 0xBEEDFBC3, 0xBEEEFBC3, 0xBEEFFBC3, 0xBEF0FBC3, 0xBEF1FBC3, 0xBEF2FBC3, 0xBEF3FBC3, + 0xBEF4FBC3, 0xBEF5FBC3, 0xBEF6FBC3, 0xBEF7FBC3, 0xBEF8FBC3, 0xBEF9FBC3, 0xBEFAFBC3, 0xBEFBFBC3, 0xBEFCFBC3, 0xBEFDFBC3, 0xBEFEFBC3, 0xBEFFFBC3, 0xBF00FBC3, 0xBF01FBC3, 0xBF02FBC3, + 0xBF03FBC3, 0xBF04FBC3, 0xBF05FBC3, 0xBF06FBC3, 0xBF07FBC3, 0xBF08FBC3, 0xBF09FBC3, 0xBF0AFBC3, 0xBF0BFBC3, 0xBF0CFBC3, 0xBF0DFBC3, 0xBF0EFBC3, 0xBF0FFBC3, 0xBF10FBC3, 0xBF11FBC3, + 0xBF12FBC3, 0xBF13FBC3, 0xBF14FBC3, 0xBF15FBC3, 0xBF16FBC3, 0xBF17FBC3, 0xBF18FBC3, 0xBF19FBC3, 0xBF1AFBC3, 0xBF1BFBC3, 0xBF1CFBC3, 0xBF1DFBC3, 0xBF1EFBC3, 0xBF1FFBC3, 0xBF20FBC3, + 0xBF21FBC3, 0xBF22FBC3, 0xBF23FBC3, 0xBF24FBC3, 0xBF25FBC3, 0xBF26FBC3, 0xBF27FBC3, 0xBF28FBC3, 0xBF29FBC3, 0xBF2AFBC3, 0xBF2BFBC3, 0xBF2CFBC3, 0xBF2DFBC3, 0xBF2EFBC3, 0xBF2FFBC3, + 0xBF30FBC3, 0xBF31FBC3, 0xBF32FBC3, 0xBF33FBC3, 0xBF34FBC3, 0xBF35FBC3, 0xBF36FBC3, 0xBF37FBC3, 0xBF38FBC3, 0xBF39FBC3, 0xBF3AFBC3, 0xBF3BFBC3, 0xBF3CFBC3, 0xBF3DFBC3, 0xBF3EFBC3, + 0xBF3FFBC3, 0xBF40FBC3, 0xBF41FBC3, 0xBF42FBC3, 0xBF43FBC3, 0xBF44FBC3, 0xBF45FBC3, 0xBF46FBC3, 0xBF47FBC3, 0xBF48FBC3, 0xBF49FBC3, 0xBF4AFBC3, 0xBF4BFBC3, 0xBF4CFBC3, 0xBF4DFBC3, + 0xBF4EFBC3, 0xBF4FFBC3, 0xBF50FBC3, 0xBF51FBC3, 0xBF52FBC3, 0xBF53FBC3, 0xBF54FBC3, 0xBF55FBC3, 0xBF56FBC3, 0xBF57FBC3, 0xBF58FBC3, 0xBF59FBC3, 0xBF5AFBC3, 0xBF5BFBC3, 0xBF5CFBC3, + 0xBF5DFBC3, 0xBF5EFBC3, 0xBF5FFBC3, 0xBF60FBC3, 0xBF61FBC3, 0xBF62FBC3, 0xBF63FBC3, 0xBF64FBC3, 0xBF65FBC3, 0xBF66FBC3, 0xBF67FBC3, 0xBF68FBC3, 0xBF69FBC3, 0xBF6AFBC3, 0xBF6BFBC3, + 0xBF6CFBC3, 0xBF6DFBC3, 0xBF6EFBC3, 0xBF6FFBC3, 0xBF70FBC3, 0xBF71FBC3, 0xBF72FBC3, 0xBF73FBC3, 0xBF74FBC3, 0xBF75FBC3, 0xBF76FBC3, 0xBF77FBC3, 0xBF78FBC3, 0xBF79FBC3, 0xBF7AFBC3, + 0xBF7BFBC3, 0xBF7CFBC3, 0xBF7DFBC3, 0xBF7EFBC3, 0xBF7FFBC3, 0xBF80FBC3, 0xBF81FBC3, 0xBF82FBC3, 0xBF83FBC3, 0xBF84FBC3, 0xBF85FBC3, 0xBF86FBC3, 0xBF87FBC3, 0xBF88FBC3, 0xBF89FBC3, + 0xBF8AFBC3, 0xBF8BFBC3, 0xBF8CFBC3, 0xBF8DFBC3, 0xBF8EFBC3, 0xBF8FFBC3, 0xBF90FBC3, 0xBF91FBC3, 0xBF92FBC3, 0xBF93FBC3, 0xBF94FBC3, 0xBF95FBC3, 0xBF96FBC3, 0xBF97FBC3, 0xBF98FBC3, + 0xBF99FBC3, 0xBF9AFBC3, 0xBF9BFBC3, 0xBF9CFBC3, 0xBF9DFBC3, 0xBF9EFBC3, 0xBF9FFBC3, 0xBFA0FBC3, 0xBFA1FBC3, 0xBFA2FBC3, 0xBFA3FBC3, 0xBFA4FBC3, 0xBFA5FBC3, 0xBFA6FBC3, 0xBFA7FBC3, + 0xBFA8FBC3, 0xBFA9FBC3, 0xBFAAFBC3, 0xBFABFBC3, 0xBFACFBC3, 0xBFADFBC3, 0xBFAEFBC3, 0xBFAFFBC3, 0xBFB0FBC3, 0xBFB1FBC3, 0xBFB2FBC3, 0xBFB3FBC3, 0xBFB4FBC3, 0xBFB5FBC3, 0xBFB6FBC3, + 0xBFB7FBC3, 0xBFB8FBC3, 0xBFB9FBC3, 0xBFBAFBC3, 0xBFBBFBC3, 0xBFBCFBC3, 0xBFBDFBC3, 0xBFBEFBC3, 0xBFBFFBC3, 0xBFC0FBC3, 0xBFC1FBC3, 0xBFC2FBC3, 0xBFC3FBC3, 0xBFC4FBC3, 0xBFC5FBC3, + 0xBFC6FBC3, 0xBFC7FBC3, 0xBFC8FBC3, 0xBFC9FBC3, 0xBFCAFBC3, 0xBFCBFBC3, 0xBFCCFBC3, 0xBFCDFBC3, 0xBFCEFBC3, 0xBFCFFBC3, 0xBFD0FBC3, 0xBFD1FBC3, 0xBFD2FBC3, 0xBFD3FBC3, 0xBFD4FBC3, + 0xBFD5FBC3, 0xBFD6FBC3, 0xBFD7FBC3, 0xBFD8FBC3, 0xBFD9FBC3, 0xBFDAFBC3, 0xBFDBFBC3, 0xBFDCFBC3, 0xBFDDFBC3, 0xBFDEFBC3, 0xBFDFFBC3, 0xBFE0FBC3, 0xBFE1FBC3, 0xBFE2FBC3, 0xBFE3FBC3, + 0xBFE4FBC3, 0xBFE5FBC3, 0xBFE6FBC3, 0xBFE7FBC3, 0xBFE8FBC3, 0xBFE9FBC3, 0xBFEAFBC3, 0xBFEBFBC3, 0xBFECFBC3, 0xBFEDFBC3, 0xBFEEFBC3, 0xBFEFFBC3, 0xBFF0FBC3, 0xBFF1FBC3, 0xBFF2FBC3, + 0xBFF3FBC3, 0xBFF4FBC3, 0xBFF5FBC3, 0xBFF6FBC3, 0xBFF7FBC3, 0xBFF8FBC3, 0xBFF9FBC3, 0xBFFAFBC3, 0xBFFBFBC3, 0xBFFCFBC3, 0xBFFDFBC3, 0xBFFEFBC3, 0xBFFFFBC3, 0xC000FBC3, 0xC001FBC3, + 0xC002FBC3, 0xC003FBC3, 0xC004FBC3, 0xC005FBC3, 0xC006FBC3, 0xC007FBC3, 0xC008FBC3, 0xC009FBC3, 0xC00AFBC3, 0xC00BFBC3, 0xC00CFBC3, 0xC00DFBC3, 0xC00EFBC3, 0xC00FFBC3, 0xC010FBC3, + 0xC011FBC3, 0xC012FBC3, 0xC013FBC3, 0xC014FBC3, 0xC015FBC3, 0xC016FBC3, 0xC017FBC3, 0xC018FBC3, 0xC019FBC3, 0xC01AFBC3, 0xC01BFBC3, 0xC01CFBC3, 0xC01DFBC3, 0xC01EFBC3, 0xC01FFBC3, + 0xC020FBC3, 0xC021FBC3, 0xC022FBC3, 0xC023FBC3, 0xC024FBC3, 0xC025FBC3, 0xC026FBC3, 0xC027FBC3, 0xC028FBC3, 0xC029FBC3, 0xC02AFBC3, 0xC02BFBC3, 0xC02CFBC3, 0xC02DFBC3, 0xC02EFBC3, + 0xC02FFBC3, 0xC030FBC3, 0xC031FBC3, 0xC032FBC3, 0xC033FBC3, 0xC034FBC3, 0xC035FBC3, 0xC036FBC3, 0xC037FBC3, 0xC038FBC3, 0xC039FBC3, 0xC03AFBC3, 0xC03BFBC3, 0xC03CFBC3, 0xC03DFBC3, + 0xC03EFBC3, 0xC03FFBC3, 0xC040FBC3, 0xC041FBC3, 0xC042FBC3, 0xC043FBC3, 0xC044FBC3, 0xC045FBC3, 0xC046FBC3, 0xC047FBC3, 0xC048FBC3, 0xC049FBC3, 0xC04AFBC3, 0xC04BFBC3, 0xC04CFBC3, + 0xC04DFBC3, 0xC04EFBC3, 0xC04FFBC3, 0xC050FBC3, 0xC051FBC3, 0xC052FBC3, 0xC053FBC3, 0xC054FBC3, 0xC055FBC3, 0xC056FBC3, 0xC057FBC3, 0xC058FBC3, 0xC059FBC3, 0xC05AFBC3, 0xC05BFBC3, + 0xC05CFBC3, 0xC05DFBC3, 0xC05EFBC3, 0xC05FFBC3, 0xC060FBC3, 0xC061FBC3, 0xC062FBC3, 0xC063FBC3, 0xC064FBC3, 0xC065FBC3, 0xC066FBC3, 0xC067FBC3, 0xC068FBC3, 0xC069FBC3, 0xC06AFBC3, + 0xC06BFBC3, 0xC06CFBC3, 0xC06DFBC3, 0xC06EFBC3, 0xC06FFBC3, 0xC070FBC3, 0xC071FBC3, 0xC072FBC3, 0xC073FBC3, 0xC074FBC3, 0xC075FBC3, 0xC076FBC3, 0xC077FBC3, 0xC078FBC3, 0xC079FBC3, + 0xC07AFBC3, 0xC07BFBC3, 0xC07CFBC3, 0xC07DFBC3, 0xC07EFBC3, 0xC07FFBC3, 0xC080FBC3, 0xC081FBC3, 0xC082FBC3, 0xC083FBC3, 0xC084FBC3, 0xC085FBC3, 0xC086FBC3, 0xC087FBC3, 0xC088FBC3, + 0xC089FBC3, 0xC08AFBC3, 0xC08BFBC3, 0xC08CFBC3, 0xC08DFBC3, 0xC08EFBC3, 0xC08FFBC3, 0xC090FBC3, 0xC091FBC3, 0xC092FBC3, 0xC093FBC3, 0xC094FBC3, 0xC095FBC3, 0xC096FBC3, 0xC097FBC3, + 0xC098FBC3, 0xC099FBC3, 0xC09AFBC3, 0xC09BFBC3, 0xC09CFBC3, 0xC09DFBC3, 0xC09EFBC3, 0xC09FFBC3, 0xC0A0FBC3, 0xC0A1FBC3, 0xC0A2FBC3, 0xC0A3FBC3, 0xC0A4FBC3, 0xC0A5FBC3, 0xC0A6FBC3, + 0xC0A7FBC3, 0xC0A8FBC3, 0xC0A9FBC3, 0xC0AAFBC3, 0xC0ABFBC3, 0xC0ACFBC3, 0xC0ADFBC3, 0xC0AEFBC3, 0xC0AFFBC3, 0xC0B0FBC3, 0xC0B1FBC3, 0xC0B2FBC3, 0xC0B3FBC3, 0xC0B4FBC3, 0xC0B5FBC3, + 0xC0B6FBC3, 0xC0B7FBC3, 0xC0B8FBC3, 0xC0B9FBC3, 0xC0BAFBC3, 0xC0BBFBC3, 0xC0BCFBC3, 0xC0BDFBC3, 0xC0BEFBC3, 0xC0BFFBC3, 0xC0C0FBC3, 0xC0C1FBC3, 0xC0C2FBC3, 0xC0C3FBC3, 0xC0C4FBC3, + 0xC0C5FBC3, 0xC0C6FBC3, 0xC0C7FBC3, 0xC0C8FBC3, 0xC0C9FBC3, 0xC0CAFBC3, 0xC0CBFBC3, 0xC0CCFBC3, 0xC0CDFBC3, 0xC0CEFBC3, 0xC0CFFBC3, 0xC0D0FBC3, 0xC0D1FBC3, 0xC0D2FBC3, 0xC0D3FBC3, + 0xC0D4FBC3, 0xC0D5FBC3, 0xC0D6FBC3, 0xC0D7FBC3, 0xC0D8FBC3, 0xC0D9FBC3, 0xC0DAFBC3, 0xC0DBFBC3, 0xC0DCFBC3, 0xC0DDFBC3, 0xC0DEFBC3, 0xC0DFFBC3, 0xC0E0FBC3, 0xC0E1FBC3, 0xC0E2FBC3, + 0xC0E3FBC3, 0xC0E4FBC3, 0xC0E5FBC3, 0xC0E6FBC3, 0xC0E7FBC3, 0xC0E8FBC3, 0xC0E9FBC3, 0xC0EAFBC3, 0xC0EBFBC3, 0xC0ECFBC3, 0xC0EDFBC3, 0xC0EEFBC3, 0xC0EFFBC3, 0xC0F0FBC3, 0xC0F1FBC3, + 0xC0F2FBC3, 0xC0F3FBC3, 0xC0F4FBC3, 0xC0F5FBC3, 0xC0F6FBC3, 0xC0F7FBC3, 0xC0F8FBC3, 0xC0F9FBC3, 0xC0FAFBC3, 0xC0FBFBC3, 0xC0FCFBC3, 0xC0FDFBC3, 0xC0FEFBC3, 0xC0FFFBC3, 0xC100FBC3, + 0xC101FBC3, 0xC102FBC3, 0xC103FBC3, 0xC104FBC3, 0xC105FBC3, 0xC106FBC3, 0xC107FBC3, 0xC108FBC3, 0xC109FBC3, 0xC10AFBC3, 0xC10BFBC3, 0xC10CFBC3, 0xC10DFBC3, 0xC10EFBC3, 0xC10FFBC3, + 0xC110FBC3, 0xC111FBC3, 0xC112FBC3, 0xC113FBC3, 0xC114FBC3, 0xC115FBC3, 0xC116FBC3, 0xC117FBC3, 0xC118FBC3, 0xC119FBC3, 0xC11AFBC3, 0xC11BFBC3, 0xC11CFBC3, 0xC11DFBC3, 0xC11EFBC3, + 0xC11FFBC3, 0xC120FBC3, 0xC121FBC3, 0xC122FBC3, 0xC123FBC3, 0xC124FBC3, 0xC125FBC3, 0xC126FBC3, 0xC127FBC3, 0xC128FBC3, 0xC129FBC3, 0xC12AFBC3, 0xC12BFBC3, 0xC12CFBC3, 0xC12DFBC3, + 0xC12EFBC3, 0xC12FFBC3, 0xC130FBC3, 0xC131FBC3, 0xC132FBC3, 0xC133FBC3, 0xC134FBC3, 0xC135FBC3, 0xC136FBC3, 0xC137FBC3, 0xC138FBC3, 0xC139FBC3, 0xC13AFBC3, 0xC13BFBC3, 0xC13CFBC3, + 0xC13DFBC3, 0xC13EFBC3, 0xC13FFBC3, 0xC140FBC3, 0xC141FBC3, 0xC142FBC3, 0xC143FBC3, 0xC144FBC3, 0xC145FBC3, 0xC146FBC3, 0xC147FBC3, 0xC148FBC3, 0xC149FBC3, 0xC14AFBC3, 0xC14BFBC3, + 0xC14CFBC3, 0xC14DFBC3, 0xC14EFBC3, 0xC14FFBC3, 0xC150FBC3, 0xC151FBC3, 0xC152FBC3, 0xC153FBC3, 0xC154FBC3, 0xC155FBC3, 0xC156FBC3, 0xC157FBC3, 0xC158FBC3, 0xC159FBC3, 0xC15AFBC3, + 0xC15BFBC3, 0xC15CFBC3, 0xC15DFBC3, 0xC15EFBC3, 0xC15FFBC3, 0xC160FBC3, 0xC161FBC3, 0xC162FBC3, 0xC163FBC3, 0xC164FBC3, 0xC165FBC3, 0xC166FBC3, 0xC167FBC3, 0xC168FBC3, 0xC169FBC3, + 0xC16AFBC3, 0xC16BFBC3, 0xC16CFBC3, 0xC16DFBC3, 0xC16EFBC3, 0xC16FFBC3, 0xC170FBC3, 0xC171FBC3, 0xC172FBC3, 0xC173FBC3, 0xC174FBC3, 0xC175FBC3, 0xC176FBC3, 0xC177FBC3, 0xC178FBC3, + 0xC179FBC3, 0xC17AFBC3, 0xC17BFBC3, 0xC17CFBC3, 0xC17DFBC3, 0xC17EFBC3, 0xC17FFBC3, 0xC180FBC3, 0xC181FBC3, 0xC182FBC3, 0xC183FBC3, 0xC184FBC3, 0xC185FBC3, 0xC186FBC3, 0xC187FBC3, + 0xC188FBC3, 0xC189FBC3, 0xC18AFBC3, 0xC18BFBC3, 0xC18CFBC3, 0xC18DFBC3, 0xC18EFBC3, 0xC18FFBC3, 0xC190FBC3, 0xC191FBC3, 0xC192FBC3, 0xC193FBC3, 0xC194FBC3, 0xC195FBC3, 0xC196FBC3, + 0xC197FBC3, 0xC198FBC3, 0xC199FBC3, 0xC19AFBC3, 0xC19BFBC3, 0xC19CFBC3, 0xC19DFBC3, 0xC19EFBC3, 0xC19FFBC3, 0xC1A0FBC3, 0xC1A1FBC3, 0xC1A2FBC3, 0xC1A3FBC3, 0xC1A4FBC3, 0xC1A5FBC3, + 0xC1A6FBC3, 0xC1A7FBC3, 0xC1A8FBC3, 0xC1A9FBC3, 0xC1AAFBC3, 0xC1ABFBC3, 0xC1ACFBC3, 0xC1ADFBC3, 0xC1AEFBC3, 0xC1AFFBC3, 0xC1B0FBC3, 0xC1B1FBC3, 0xC1B2FBC3, 0xC1B3FBC3, 0xC1B4FBC3, + 0xC1B5FBC3, 0xC1B6FBC3, 0xC1B7FBC3, 0xC1B8FBC3, 0xC1B9FBC3, 0xC1BAFBC3, 0xC1BBFBC3, 0xC1BCFBC3, 0xC1BDFBC3, 0xC1BEFBC3, 0xC1BFFBC3, 0xC1C0FBC3, 0xC1C1FBC3, 0xC1C2FBC3, 0xC1C3FBC3, + 0xC1C4FBC3, 0xC1C5FBC3, 0xC1C6FBC3, 0xC1C7FBC3, 0xC1C8FBC3, 0xC1C9FBC3, 0xC1CAFBC3, 0xC1CBFBC3, 0xC1CCFBC3, 0xC1CDFBC3, 0xC1CEFBC3, 0xC1CFFBC3, 0xC1D0FBC3, 0xC1D1FBC3, 0xC1D2FBC3, + 0xC1D3FBC3, 0xC1D4FBC3, 0xC1D5FBC3, 0xC1D6FBC3, 0xC1D7FBC3, 0xC1D8FBC3, 0xC1D9FBC3, 0xC1DAFBC3, 0xC1DBFBC3, 0xC1DCFBC3, 0xC1DDFBC3, 0xC1DEFBC3, 0xC1DFFBC3, 0xC1E0FBC3, 0xC1E1FBC3, + 0xC1E2FBC3, 0xC1E3FBC3, 0xC1E4FBC3, 0xC1E5FBC3, 0xC1E6FBC3, 0xC1E7FBC3, 0xC1E8FBC3, 0xC1E9FBC3, 0xC1EAFBC3, 0xC1EBFBC3, 0xC1ECFBC3, 0xC1EDFBC3, 0xC1EEFBC3, 0xC1EFFBC3, 0xC1F0FBC3, + 0xC1F1FBC3, 0xC1F2FBC3, 0xC1F3FBC3, 0xC1F4FBC3, 0xC1F5FBC3, 0xC1F6FBC3, 0xC1F7FBC3, 0xC1F8FBC3, 0xC1F9FBC3, 0xC1FAFBC3, 0xC1FBFBC3, 0xC1FCFBC3, 0xC1FDFBC3, 0xC1FEFBC3, 0xC1FFFBC3, + 0xC200FBC3, 0xC201FBC3, 0xC202FBC3, 0xC203FBC3, 0xC204FBC3, 0xC205FBC3, 0xC206FBC3, 0xC207FBC3, 0xC208FBC3, 0xC209FBC3, 0xC20AFBC3, 0xC20BFBC3, 0xC20CFBC3, 0xC20DFBC3, 0xC20EFBC3, + 0xC20FFBC3, 0xC210FBC3, 0xC211FBC3, 0xC212FBC3, 0xC213FBC3, 0xC214FBC3, 0xC215FBC3, 0xC216FBC3, 0xC217FBC3, 0xC218FBC3, 0xC219FBC3, 0xC21AFBC3, 0xC21BFBC3, 0xC21CFBC3, 0xC21DFBC3, + 0xC21EFBC3, 0xC21FFBC3, 0xC220FBC3, 0xC221FBC3, 0xC222FBC3, 0xC223FBC3, 0xC224FBC3, 0xC225FBC3, 0xC226FBC3, 0xC227FBC3, 0xC228FBC3, 0xC229FBC3, 0xC22AFBC3, 0xC22BFBC3, 0xC22CFBC3, + 0xC22DFBC3, 0xC22EFBC3, 0xC22FFBC3, 0xC230FBC3, 0xC231FBC3, 0xC232FBC3, 0xC233FBC3, 0xC234FBC3, 0xC235FBC3, 0xC236FBC3, 0xC237FBC3, 0xC238FBC3, 0xC239FBC3, 0xC23AFBC3, 0xC23BFBC3, + 0xC23CFBC3, 0xC23DFBC3, 0xC23EFBC3, 0xC23FFBC3, 0xC240FBC3, 0xC241FBC3, 0xC242FBC3, 0xC243FBC3, 0xC244FBC3, 0xC245FBC3, 0xC246FBC3, 0xC247FBC3, 0xC248FBC3, 0xC249FBC3, 0xC24AFBC3, + 0xC24BFBC3, 0xC24CFBC3, 0xC24DFBC3, 0xC24EFBC3, 0xC24FFBC3, 0xC250FBC3, 0xC251FBC3, 0xC252FBC3, 0xC253FBC3, 0xC254FBC3, 0xC255FBC3, 0xC256FBC3, 0xC257FBC3, 0xC258FBC3, 0xC259FBC3, + 0xC25AFBC3, 0xC25BFBC3, 0xC25CFBC3, 0xC25DFBC3, 0xC25EFBC3, 0xC25FFBC3, 0xC260FBC3, 0xC261FBC3, 0xC262FBC3, 0xC263FBC3, 0xC264FBC3, 0xC265FBC3, 0xC266FBC3, 0xC267FBC3, 0xC268FBC3, + 0xC269FBC3, 0xC26AFBC3, 0xC26BFBC3, 0xC26CFBC3, 0xC26DFBC3, 0xC26EFBC3, 0xC26FFBC3, 0xC270FBC3, 0xC271FBC3, 0xC272FBC3, 0xC273FBC3, 0xC274FBC3, 0xC275FBC3, 0xC276FBC3, 0xC277FBC3, + 0xC278FBC3, 0xC279FBC3, 0xC27AFBC3, 0xC27BFBC3, 0xC27CFBC3, 0xC27DFBC3, 0xC27EFBC3, 0xC27FFBC3, 0xC280FBC3, 0xC281FBC3, 0xC282FBC3, 0xC283FBC3, 0xC284FBC3, 0xC285FBC3, 0xC286FBC3, + 0xC287FBC3, 0xC288FBC3, 0xC289FBC3, 0xC28AFBC3, 0xC28BFBC3, 0xC28CFBC3, 0xC28DFBC3, 0xC28EFBC3, 0xC28FFBC3, 0xC290FBC3, 0xC291FBC3, 0xC292FBC3, 0xC293FBC3, 0xC294FBC3, 0xC295FBC3, + 0xC296FBC3, 0xC297FBC3, 0xC298FBC3, 0xC299FBC3, 0xC29AFBC3, 0xC29BFBC3, 0xC29CFBC3, 0xC29DFBC3, 0xC29EFBC3, 0xC29FFBC3, 0xC2A0FBC3, 0xC2A1FBC3, 0xC2A2FBC3, 0xC2A3FBC3, 0xC2A4FBC3, + 0xC2A5FBC3, 0xC2A6FBC3, 0xC2A7FBC3, 0xC2A8FBC3, 0xC2A9FBC3, 0xC2AAFBC3, 0xC2ABFBC3, 0xC2ACFBC3, 0xC2ADFBC3, 0xC2AEFBC3, 0xC2AFFBC3, 0xC2B0FBC3, 0xC2B1FBC3, 0xC2B2FBC3, 0xC2B3FBC3, + 0xC2B4FBC3, 0xC2B5FBC3, 0xC2B6FBC3, 0xC2B7FBC3, 0xC2B8FBC3, 0xC2B9FBC3, 0xC2BAFBC3, 0xC2BBFBC3, 0xC2BCFBC3, 0xC2BDFBC3, 0xC2BEFBC3, 0xC2BFFBC3, 0xC2C0FBC3, 0xC2C1FBC3, 0xC2C2FBC3, + 0xC2C3FBC3, 0xC2C4FBC3, 0xC2C5FBC3, 0xC2C6FBC3, 0xC2C7FBC3, 0xC2C8FBC3, 0xC2C9FBC3, 0xC2CAFBC3, 0xC2CBFBC3, 0xC2CCFBC3, 0xC2CDFBC3, 0xC2CEFBC3, 0xC2CFFBC3, 0xC2D0FBC3, 0xC2D1FBC3, + 0xC2D2FBC3, 0xC2D3FBC3, 0xC2D4FBC3, 0xC2D5FBC3, 0xC2D6FBC3, 0xC2D7FBC3, 0xC2D8FBC3, 0xC2D9FBC3, 0xC2DAFBC3, 0xC2DBFBC3, 0xC2DCFBC3, 0xC2DDFBC3, 0xC2DEFBC3, 0xC2DFFBC3, 0xC2E0FBC3, + 0xC2E1FBC3, 0xC2E2FBC3, 0xC2E3FBC3, 0xC2E4FBC3, 0xC2E5FBC3, 0xC2E6FBC3, 0xC2E7FBC3, 0xC2E8FBC3, 0xC2E9FBC3, 0xC2EAFBC3, 0xC2EBFBC3, 0xC2ECFBC3, 0xC2EDFBC3, 0xC2EEFBC3, 0xC2EFFBC3, + 0xC2F0FBC3, 0xC2F1FBC3, 0xC2F2FBC3, 0xC2F3FBC3, 0xC2F4FBC3, 0xC2F5FBC3, 0xC2F6FBC3, 0xC2F7FBC3, 0xC2F8FBC3, 0xC2F9FBC3, 0xC2FAFBC3, 0xC2FBFBC3, 0xC2FCFBC3, 0xC2FDFBC3, 0xC2FEFBC3, + 0xC2FFFBC3, 0xC300FBC3, 0xC301FBC3, 0xC302FBC3, 0xC303FBC3, 0xC304FBC3, 0xC305FBC3, 0xC306FBC3, 0xC307FBC3, 0xC308FBC3, 0xC309FBC3, 0xC30AFBC3, 0xC30BFBC3, 0xC30CFBC3, 0xC30DFBC3, + 0xC30EFBC3, 0xC30FFBC3, 0xC310FBC3, 0xC311FBC3, 0xC312FBC3, 0xC313FBC3, 0xC314FBC3, 0xC315FBC3, 0xC316FBC3, 0xC317FBC3, 0xC318FBC3, 0xC319FBC3, 0xC31AFBC3, 0xC31BFBC3, 0xC31CFBC3, + 0xC31DFBC3, 0xC31EFBC3, 0xC31FFBC3, 0xC320FBC3, 0xC321FBC3, 0xC322FBC3, 0xC323FBC3, 0xC324FBC3, 0xC325FBC3, 0xC326FBC3, 0xC327FBC3, 0xC328FBC3, 0xC329FBC3, 0xC32AFBC3, 0xC32BFBC3, + 0xC32CFBC3, 0xC32DFBC3, 0xC32EFBC3, 0xC32FFBC3, 0xC330FBC3, 0xC331FBC3, 0xC332FBC3, 0xC333FBC3, 0xC334FBC3, 0xC335FBC3, 0xC336FBC3, 0xC337FBC3, 0xC338FBC3, 0xC339FBC3, 0xC33AFBC3, + 0xC33BFBC3, 0xC33CFBC3, 0xC33DFBC3, 0xC33EFBC3, 0xC33FFBC3, 0xC340FBC3, 0xC341FBC3, 0xC342FBC3, 0xC343FBC3, 0xC344FBC3, 0xC345FBC3, 0xC346FBC3, 0xC347FBC3, 0xC348FBC3, 0xC349FBC3, + 0xC34AFBC3, 0xC34BFBC3, 0xC34CFBC3, 0xC34DFBC3, 0xC34EFBC3, 0xC34FFBC3, 0xC350FBC3, 0xC351FBC3, 0xC352FBC3, 0xC353FBC3, 0xC354FBC3, 0xC355FBC3, 0xC356FBC3, 0xC357FBC3, 0xC358FBC3, + 0xC359FBC3, 0xC35AFBC3, 0xC35BFBC3, 0xC35CFBC3, 0xC35DFBC3, 0xC35EFBC3, 0xC35FFBC3, 0xC360FBC3, 0xC361FBC3, 0xC362FBC3, 0xC363FBC3, 0xC364FBC3, 0xC365FBC3, 0xC366FBC3, 0xC367FBC3, + 0xC368FBC3, 0xC369FBC3, 0xC36AFBC3, 0xC36BFBC3, 0xC36CFBC3, 0xC36DFBC3, 0xC36EFBC3, 0xC36FFBC3, 0xC370FBC3, 0xC371FBC3, 0xC372FBC3, 0xC373FBC3, 0xC374FBC3, 0xC375FBC3, 0xC376FBC3, + 0xC377FBC3, 0xC378FBC3, 0xC379FBC3, 0xC37AFBC3, 0xC37BFBC3, 0xC37CFBC3, 0xC37DFBC3, 0xC37EFBC3, 0xC37FFBC3, 0xC380FBC3, 0xC381FBC3, 0xC382FBC3, 0xC383FBC3, 0xC384FBC3, 0xC385FBC3, + 0xC386FBC3, 0xC387FBC3, 0xC388FBC3, 0xC389FBC3, 0xC38AFBC3, 0xC38BFBC3, 0xC38CFBC3, 0xC38DFBC3, 0xC38EFBC3, 0xC38FFBC3, 0xC390FBC3, 0xC391FBC3, 0xC392FBC3, 0xC393FBC3, 0xC394FBC3, + 0xC395FBC3, 0xC396FBC3, 0xC397FBC3, 0xC398FBC3, 0xC399FBC3, 0xC39AFBC3, 0xC39BFBC3, 0xC39CFBC3, 0xC39DFBC3, 0xC39EFBC3, 0xC39FFBC3, 0xC3A0FBC3, 0xC3A1FBC3, 0xC3A2FBC3, 0xC3A3FBC3, + 0xC3A4FBC3, 0xC3A5FBC3, 0xC3A6FBC3, 0xC3A7FBC3, 0xC3A8FBC3, 0xC3A9FBC3, 0xC3AAFBC3, 0xC3ABFBC3, 0xC3ACFBC3, 0xC3ADFBC3, 0xC3AEFBC3, 0xC3AFFBC3, 0xC3B0FBC3, 0xC3B1FBC3, 0xC3B2FBC3, + 0xC3B3FBC3, 0xC3B4FBC3, 0xC3B5FBC3, 0xC3B6FBC3, 0xC3B7FBC3, 0xC3B8FBC3, 0xC3B9FBC3, 0xC3BAFBC3, 0xC3BBFBC3, 0xC3BCFBC3, 0xC3BDFBC3, 0xC3BEFBC3, 0xC3BFFBC3, 0xC3C0FBC3, 0xC3C1FBC3, + 0xC3C2FBC3, 0xC3C3FBC3, 0xC3C4FBC3, 0xC3C5FBC3, 0xC3C6FBC3, 0xC3C7FBC3, 0xC3C8FBC3, 0xC3C9FBC3, 0xC3CAFBC3, 0xC3CBFBC3, 0xC3CCFBC3, 0xC3CDFBC3, 0xC3CEFBC3, 0xC3CFFBC3, 0xC3D0FBC3, + 0xC3D1FBC3, 0xC3D2FBC3, 0xC3D3FBC3, 0xC3D4FBC3, 0xC3D5FBC3, 0xC3D6FBC3, 0xC3D7FBC3, 0xC3D8FBC3, 0xC3D9FBC3, 0xC3DAFBC3, 0xC3DBFBC3, 0xC3DCFBC3, 0xC3DDFBC3, 0xC3DEFBC3, 0xC3DFFBC3, + 0xC3E0FBC3, 0xC3E1FBC3, 0xC3E2FBC3, 0xC3E3FBC3, 0xC3E4FBC3, 0xC3E5FBC3, 0xC3E6FBC3, 0xC3E7FBC3, 0xC3E8FBC3, 0xC3E9FBC3, 0xC3EAFBC3, 0xC3EBFBC3, 0xC3ECFBC3, 0xC3EDFBC3, 0xC3EEFBC3, + 0xC3EFFBC3, 0xC3F0FBC3, 0xC3F1FBC3, 0xC3F2FBC3, 0xC3F3FBC3, 0xC3F4FBC3, 0xC3F5FBC3, 0xC3F6FBC3, 0xC3F7FBC3, 0xC3F8FBC3, 0xC3F9FBC3, 0xC3FAFBC3, 0xC3FBFBC3, 0xC3FCFBC3, 0xC3FDFBC3, + 0xC3FEFBC3, 0xC3FFFBC3, 0xC400FBC3, 0xC401FBC3, 0xC402FBC3, 0xC403FBC3, 0xC404FBC3, 0xC405FBC3, 0xC406FBC3, 0xC407FBC3, 0xC408FBC3, 0xC409FBC3, 0xC40AFBC3, 0xC40BFBC3, 0xC40CFBC3, + 0xC40DFBC3, 0xC40EFBC3, 0xC40FFBC3, 0xC410FBC3, 0xC411FBC3, 0xC412FBC3, 0xC413FBC3, 0xC414FBC3, 0xC415FBC3, 0xC416FBC3, 0xC417FBC3, 0xC418FBC3, 0xC419FBC3, 0xC41AFBC3, 0xC41BFBC3, + 0xC41CFBC3, 0xC41DFBC3, 0xC41EFBC3, 0xC41FFBC3, 0xC420FBC3, 0xC421FBC3, 0xC422FBC3, 0xC423FBC3, 0xC424FBC3, 0xC425FBC3, 0xC426FBC3, 0xC427FBC3, 0xC428FBC3, 0xC429FBC3, 0xC42AFBC3, + 0xC42BFBC3, 0xC42CFBC3, 0xC42DFBC3, 0xC42EFBC3, 0xC42FFBC3, 0xC430FBC3, 0xC431FBC3, 0xC432FBC3, 0xC433FBC3, 0xC434FBC3, 0xC435FBC3, 0xC436FBC3, 0xC437FBC3, 0xC438FBC3, 0xC439FBC3, + 0xC43AFBC3, 0xC43BFBC3, 0xC43CFBC3, 0xC43DFBC3, 0xC43EFBC3, 0xC43FFBC3, 0xC440FBC3, 0xC441FBC3, 0xC442FBC3, 0xC443FBC3, 0xC444FBC3, 0xC445FBC3, 0xC446FBC3, 0xC447FBC3, 0xC448FBC3, + 0xC449FBC3, 0xC44AFBC3, 0xC44BFBC3, 0xC44CFBC3, 0xC44DFBC3, 0xC44EFBC3, 0xC44FFBC3, 0xC450FBC3, 0xC451FBC3, 0xC452FBC3, 0xC453FBC3, 0xC454FBC3, 0xC455FBC3, 0xC456FBC3, 0xC457FBC3, + 0xC458FBC3, 0xC459FBC3, 0xC45AFBC3, 0xC45BFBC3, 0xC45CFBC3, 0xC45DFBC3, 0xC45EFBC3, 0xC45FFBC3, 0xC460FBC3, 0xC461FBC3, 0xC462FBC3, 0xC463FBC3, 0xC464FBC3, 0xC465FBC3, 0xC466FBC3, + 0xC467FBC3, 0xC468FBC3, 0xC469FBC3, 0xC46AFBC3, 0xC46BFBC3, 0xC46CFBC3, 0xC46DFBC3, 0xC46EFBC3, 0xC46FFBC3, 0xC470FBC3, 0xC471FBC3, 0xC472FBC3, 0xC473FBC3, 0xC474FBC3, 0xC475FBC3, + 0xC476FBC3, 0xC477FBC3, 0xC478FBC3, 0xC479FBC3, 0xC47AFBC3, 0xC47BFBC3, 0xC47CFBC3, 0xC47DFBC3, 0xC47EFBC3, 0xC47FFBC3, 0xC480FBC3, 0xC481FBC3, 0xC482FBC3, 0xC483FBC3, 0xC484FBC3, + 0xC485FBC3, 0xC486FBC3, 0xC487FBC3, 0xC488FBC3, 0xC489FBC3, 0xC48AFBC3, 0xC48BFBC3, 0xC48CFBC3, 0xC48DFBC3, 0xC48EFBC3, 0xC48FFBC3, 0xC490FBC3, 0xC491FBC3, 0xC492FBC3, 0xC493FBC3, + 0xC494FBC3, 0xC495FBC3, 0xC496FBC3, 0xC497FBC3, 0xC498FBC3, 0xC499FBC3, 0xC49AFBC3, 0xC49BFBC3, 0xC49CFBC3, 0xC49DFBC3, 0xC49EFBC3, 0xC49FFBC3, 0xC4A0FBC3, 0xC4A1FBC3, 0xC4A2FBC3, + 0xC4A3FBC3, 0xC4A4FBC3, 0xC4A5FBC3, 0xC4A6FBC3, 0xC4A7FBC3, 0xC4A8FBC3, 0xC4A9FBC3, 0xC4AAFBC3, 0xC4ABFBC3, 0xC4ACFBC3, 0xC4ADFBC3, 0xC4AEFBC3, 0xC4AFFBC3, 0xC4B0FBC3, 0xC4B1FBC3, + 0xC4B2FBC3, 0xC4B3FBC3, 0xC4B4FBC3, 0xC4B5FBC3, 0xC4B6FBC3, 0xC4B7FBC3, 0xC4B8FBC3, 0xC4B9FBC3, 0xC4BAFBC3, 0xC4BBFBC3, 0xC4BCFBC3, 0xC4BDFBC3, 0xC4BEFBC3, 0xC4BFFBC3, 0xC4C0FBC3, + 0xC4C1FBC3, 0xC4C2FBC3, 0xC4C3FBC3, 0xC4C4FBC3, 0xC4C5FBC3, 0xC4C6FBC3, 0xC4C7FBC3, 0xC4C8FBC3, 0xC4C9FBC3, 0xC4CAFBC3, 0xC4CBFBC3, 0xC4CCFBC3, 0xC4CDFBC3, 0xC4CEFBC3, 0xC4CFFBC3, + 0xC4D0FBC3, 0xC4D1FBC3, 0xC4D2FBC3, 0xC4D3FBC3, 0xC4D4FBC3, 0xC4D5FBC3, 0xC4D6FBC3, 0xC4D7FBC3, 0xC4D8FBC3, 0xC4D9FBC3, 0xC4DAFBC3, 0xC4DBFBC3, 0xC4DCFBC3, 0xC4DDFBC3, 0xC4DEFBC3, + 0xC4DFFBC3, 0xC4E0FBC3, 0xC4E1FBC3, 0xC4E2FBC3, 0xC4E3FBC3, 0xC4E4FBC3, 0xC4E5FBC3, 0xC4E6FBC3, 0xC4E7FBC3, 0xC4E8FBC3, 0xC4E9FBC3, 0xC4EAFBC3, 0xC4EBFBC3, 0xC4ECFBC3, 0xC4EDFBC3, + 0xC4EEFBC3, 0xC4EFFBC3, 0xC4F0FBC3, 0xC4F1FBC3, 0xC4F2FBC3, 0xC4F3FBC3, 0xC4F4FBC3, 0xC4F5FBC3, 0xC4F6FBC3, 0xC4F7FBC3, 0xC4F8FBC3, 0xC4F9FBC3, 0xC4FAFBC3, 0xC4FBFBC3, 0xC4FCFBC3, + 0xC4FDFBC3, 0xC4FEFBC3, 0xC4FFFBC3, 0xC500FBC3, 0xC501FBC3, 0xC502FBC3, 0xC503FBC3, 0xC504FBC3, 0xC505FBC3, 0xC506FBC3, 0xC507FBC3, 0xC508FBC3, 0xC509FBC3, 0xC50AFBC3, 0xC50BFBC3, + 0xC50CFBC3, 0xC50DFBC3, 0xC50EFBC3, 0xC50FFBC3, 0xC510FBC3, 0xC511FBC3, 0xC512FBC3, 0xC513FBC3, 0xC514FBC3, 0xC515FBC3, 0xC516FBC3, 0xC517FBC3, 0xC518FBC3, 0xC519FBC3, 0xC51AFBC3, + 0xC51BFBC3, 0xC51CFBC3, 0xC51DFBC3, 0xC51EFBC3, 0xC51FFBC3, 0xC520FBC3, 0xC521FBC3, 0xC522FBC3, 0xC523FBC3, 0xC524FBC3, 0xC525FBC3, 0xC526FBC3, 0xC527FBC3, 0xC528FBC3, 0xC529FBC3, + 0xC52AFBC3, 0xC52BFBC3, 0xC52CFBC3, 0xC52DFBC3, 0xC52EFBC3, 0xC52FFBC3, 0xC530FBC3, 0xC531FBC3, 0xC532FBC3, 0xC533FBC3, 0xC534FBC3, 0xC535FBC3, 0xC536FBC3, 0xC537FBC3, 0xC538FBC3, + 0xC539FBC3, 0xC53AFBC3, 0xC53BFBC3, 0xC53CFBC3, 0xC53DFBC3, 0xC53EFBC3, 0xC53FFBC3, 0xC540FBC3, 0xC541FBC3, 0xC542FBC3, 0xC543FBC3, 0xC544FBC3, 0xC545FBC3, 0xC546FBC3, 0xC547FBC3, + 0xC548FBC3, 0xC549FBC3, 0xC54AFBC3, 0xC54BFBC3, 0xC54CFBC3, 0xC54DFBC3, 0xC54EFBC3, 0xC54FFBC3, 0xC550FBC3, 0xC551FBC3, 0xC552FBC3, 0xC553FBC3, 0xC554FBC3, 0xC555FBC3, 0xC556FBC3, + 0xC557FBC3, 0xC558FBC3, 0xC559FBC3, 0xC55AFBC3, 0xC55BFBC3, 0xC55CFBC3, 0xC55DFBC3, 0xC55EFBC3, 0xC55FFBC3, 0xC560FBC3, 0xC561FBC3, 0xC562FBC3, 0xC563FBC3, 0xC564FBC3, 0xC565FBC3, + 0xC566FBC3, 0xC567FBC3, 0xC568FBC3, 0xC569FBC3, 0xC56AFBC3, 0xC56BFBC3, 0xC56CFBC3, 0xC56DFBC3, 0xC56EFBC3, 0xC56FFBC3, 0xC570FBC3, 0xC571FBC3, 0xC572FBC3, 0xC573FBC3, 0xC574FBC3, + 0xC575FBC3, 0xC576FBC3, 0xC577FBC3, 0xC578FBC3, 0xC579FBC3, 0xC57AFBC3, 0xC57BFBC3, 0xC57CFBC3, 0xC57DFBC3, 0xC57EFBC3, 0xC57FFBC3, 0xC580FBC3, 0xC581FBC3, 0xC582FBC3, 0xC583FBC3, + 0xC584FBC3, 0xC585FBC3, 0xC586FBC3, 0xC587FBC3, 0xC588FBC3, 0xC589FBC3, 0xC58AFBC3, 0xC58BFBC3, 0xC58CFBC3, 0xC58DFBC3, 0xC58EFBC3, 0xC58FFBC3, 0xC590FBC3, 0xC591FBC3, 0xC592FBC3, + 0xC593FBC3, 0xC594FBC3, 0xC595FBC3, 0xC596FBC3, 0xC597FBC3, 0xC598FBC3, 0xC599FBC3, 0xC59AFBC3, 0xC59BFBC3, 0xC59CFBC3, 0xC59DFBC3, 0xC59EFBC3, 0xC59FFBC3, 0xC5A0FBC3, 0xC5A1FBC3, + 0xC5A2FBC3, 0xC5A3FBC3, 0xC5A4FBC3, 0xC5A5FBC3, 0xC5A6FBC3, 0xC5A7FBC3, 0xC5A8FBC3, 0xC5A9FBC3, 0xC5AAFBC3, 0xC5ABFBC3, 0xC5ACFBC3, 0xC5ADFBC3, 0xC5AEFBC3, 0xC5AFFBC3, 0xC5B0FBC3, + 0xC5B1FBC3, 0xC5B2FBC3, 0xC5B3FBC3, 0xC5B4FBC3, 0xC5B5FBC3, 0xC5B6FBC3, 0xC5B7FBC3, 0xC5B8FBC3, 0xC5B9FBC3, 0xC5BAFBC3, 0xC5BBFBC3, 0xC5BCFBC3, 0xC5BDFBC3, 0xC5BEFBC3, 0xC5BFFBC3, + 0xC5C0FBC3, 0xC5C1FBC3, 0xC5C2FBC3, 0xC5C3FBC3, 0xC5C4FBC3, 0xC5C5FBC3, 0xC5C6FBC3, 0xC5C7FBC3, 0xC5C8FBC3, 0xC5C9FBC3, 0xC5CAFBC3, 0xC5CBFBC3, 0xC5CCFBC3, 0xC5CDFBC3, 0xC5CEFBC3, + 0xC5CFFBC3, 0xC5D0FBC3, 0xC5D1FBC3, 0xC5D2FBC3, 0xC5D3FBC3, 0xC5D4FBC3, 0xC5D5FBC3, 0xC5D6FBC3, 0xC5D7FBC3, 0xC5D8FBC3, 0xC5D9FBC3, 0xC5DAFBC3, 0xC5DBFBC3, 0xC5DCFBC3, 0xC5DDFBC3, + 0xC5DEFBC3, 0xC5DFFBC3, 0xC5E0FBC3, 0xC5E1FBC3, 0xC5E2FBC3, 0xC5E3FBC3, 0xC5E4FBC3, 0xC5E5FBC3, 0xC5E6FBC3, 0xC5E7FBC3, 0xC5E8FBC3, 0xC5E9FBC3, 0xC5EAFBC3, 0xC5EBFBC3, 0xC5ECFBC3, + 0xC5EDFBC3, 0xC5EEFBC3, 0xC5EFFBC3, 0xC5F0FBC3, 0xC5F1FBC3, 0xC5F2FBC3, 0xC5F3FBC3, 0xC5F4FBC3, 0xC5F5FBC3, 0xC5F6FBC3, 0xC5F7FBC3, 0xC5F8FBC3, 0xC5F9FBC3, 0xC5FAFBC3, 0xC5FBFBC3, + 0xC5FCFBC3, 0xC5FDFBC3, 0xC5FEFBC3, 0xC5FFFBC3, 0xC600FBC3, 0xC601FBC3, 0xC602FBC3, 0xC603FBC3, 0xC604FBC3, 0xC605FBC3, 0xC606FBC3, 0xC607FBC3, 0xC608FBC3, 0xC609FBC3, 0xC60AFBC3, + 0xC60BFBC3, 0xC60CFBC3, 0xC60DFBC3, 0xC60EFBC3, 0xC60FFBC3, 0xC610FBC3, 0xC611FBC3, 0xC612FBC3, 0xC613FBC3, 0xC614FBC3, 0xC615FBC3, 0xC616FBC3, 0xC617FBC3, 0xC618FBC3, 0xC619FBC3, + 0xC61AFBC3, 0xC61BFBC3, 0xC61CFBC3, 0xC61DFBC3, 0xC61EFBC3, 0xC61FFBC3, 0xC620FBC3, 0xC621FBC3, 0xC622FBC3, 0xC623FBC3, 0xC624FBC3, 0xC625FBC3, 0xC626FBC3, 0xC627FBC3, 0xC628FBC3, + 0xC629FBC3, 0xC62AFBC3, 0xC62BFBC3, 0xC62CFBC3, 0xC62DFBC3, 0xC62EFBC3, 0xC62FFBC3, 0xC630FBC3, 0xC631FBC3, 0xC632FBC3, 0xC633FBC3, 0xC634FBC3, 0xC635FBC3, 0xC636FBC3, 0xC637FBC3, + 0xC638FBC3, 0xC639FBC3, 0xC63AFBC3, 0xC63BFBC3, 0xC63CFBC3, 0xC63DFBC3, 0xC63EFBC3, 0xC63FFBC3, 0xC640FBC3, 0xC641FBC3, 0xC642FBC3, 0xC643FBC3, 0xC644FBC3, 0xC645FBC3, 0xC646FBC3, + 0xC647FBC3, 0xC648FBC3, 0xC649FBC3, 0xC64AFBC3, 0xC64BFBC3, 0xC64CFBC3, 0xC64DFBC3, 0xC64EFBC3, 0xC64FFBC3, 0xC650FBC3, 0xC651FBC3, 0xC652FBC3, 0xC653FBC3, 0xC654FBC3, 0xC655FBC3, + 0xC656FBC3, 0xC657FBC3, 0xC658FBC3, 0xC659FBC3, 0xC65AFBC3, 0xC65BFBC3, 0xC65CFBC3, 0xC65DFBC3, 0xC65EFBC3, 0xC65FFBC3, 0xC660FBC3, 0xC661FBC3, 0xC662FBC3, 0xC663FBC3, 0xC664FBC3, + 0xC665FBC3, 0xC666FBC3, 0xC667FBC3, 0xC668FBC3, 0xC669FBC3, 0xC66AFBC3, 0xC66BFBC3, 0xC66CFBC3, 0xC66DFBC3, 0xC66EFBC3, 0xC66FFBC3, 0xC670FBC3, 0xC671FBC3, 0xC672FBC3, 0xC673FBC3, + 0xC674FBC3, 0xC675FBC3, 0xC676FBC3, 0xC677FBC3, 0xC678FBC3, 0xC679FBC3, 0xC67AFBC3, 0xC67BFBC3, 0xC67CFBC3, 0xC67DFBC3, 0xC67EFBC3, 0xC67FFBC3, 0xC680FBC3, 0xC681FBC3, 0xC682FBC3, + 0xC683FBC3, 0xC684FBC3, 0xC685FBC3, 0xC686FBC3, 0xC687FBC3, 0xC688FBC3, 0xC689FBC3, 0xC68AFBC3, 0xC68BFBC3, 0xC68CFBC3, 0xC68DFBC3, 0xC68EFBC3, 0xC68FFBC3, 0xC690FBC3, 0xC691FBC3, + 0xC692FBC3, 0xC693FBC3, 0xC694FBC3, 0xC695FBC3, 0xC696FBC3, 0xC697FBC3, 0xC698FBC3, 0xC699FBC3, 0xC69AFBC3, 0xC69BFBC3, 0xC69CFBC3, 0xC69DFBC3, 0xC69EFBC3, 0xC69FFBC3, 0xC6A0FBC3, + 0xC6A1FBC3, 0xC6A2FBC3, 0xC6A3FBC3, 0xC6A4FBC3, 0xC6A5FBC3, 0xC6A6FBC3, 0xC6A7FBC3, 0xC6A8FBC3, 0xC6A9FBC3, 0xC6AAFBC3, 0xC6ABFBC3, 0xC6ACFBC3, 0xC6ADFBC3, 0xC6AEFBC3, 0xC6AFFBC3, + 0xC6B0FBC3, 0xC6B1FBC3, 0xC6B2FBC3, 0xC6B3FBC3, 0xC6B4FBC3, 0xC6B5FBC3, 0xC6B6FBC3, 0xC6B7FBC3, 0xC6B8FBC3, 0xC6B9FBC3, 0xC6BAFBC3, 0xC6BBFBC3, 0xC6BCFBC3, 0xC6BDFBC3, 0xC6BEFBC3, + 0xC6BFFBC3, 0xC6C0FBC3, 0xC6C1FBC3, 0xC6C2FBC3, 0xC6C3FBC3, 0xC6C4FBC3, 0xC6C5FBC3, 0xC6C6FBC3, 0xC6C7FBC3, 0xC6C8FBC3, 0xC6C9FBC3, 0xC6CAFBC3, 0xC6CBFBC3, 0xC6CCFBC3, 0xC6CDFBC3, + 0xC6CEFBC3, 0xC6CFFBC3, 0xC6D0FBC3, 0xC6D1FBC3, 0xC6D2FBC3, 0xC6D3FBC3, 0xC6D4FBC3, 0xC6D5FBC3, 0xC6D6FBC3, 0xC6D7FBC3, 0xC6D8FBC3, 0xC6D9FBC3, 0xC6DAFBC3, 0xC6DBFBC3, 0xC6DCFBC3, + 0xC6DDFBC3, 0xC6DEFBC3, 0xC6DFFBC3, 0xC6E0FBC3, 0xC6E1FBC3, 0xC6E2FBC3, 0xC6E3FBC3, 0xC6E4FBC3, 0xC6E5FBC3, 0xC6E6FBC3, 0xC6E7FBC3, 0xC6E8FBC3, 0xC6E9FBC3, 0xC6EAFBC3, 0xC6EBFBC3, + 0xC6ECFBC3, 0xC6EDFBC3, 0xC6EEFBC3, 0xC6EFFBC3, 0xC6F0FBC3, 0xC6F1FBC3, 0xC6F2FBC3, 0xC6F3FBC3, 0xC6F4FBC3, 0xC6F5FBC3, 0xC6F6FBC3, 0xC6F7FBC3, 0xC6F8FBC3, 0xC6F9FBC3, 0xC6FAFBC3, + 0xC6FBFBC3, 0xC6FCFBC3, 0xC6FDFBC3, 0xC6FEFBC3, 0xC6FFFBC3, 0xC700FBC3, 0xC701FBC3, 0xC702FBC3, 0xC703FBC3, 0xC704FBC3, 0xC705FBC3, 0xC706FBC3, 0xC707FBC3, 0xC708FBC3, 0xC709FBC3, + 0xC70AFBC3, 0xC70BFBC3, 0xC70CFBC3, 0xC70DFBC3, 0xC70EFBC3, 0xC70FFBC3, 0xC710FBC3, 0xC711FBC3, 0xC712FBC3, 0xC713FBC3, 0xC714FBC3, 0xC715FBC3, 0xC716FBC3, 0xC717FBC3, 0xC718FBC3, + 0xC719FBC3, 0xC71AFBC3, 0xC71BFBC3, 0xC71CFBC3, 0xC71DFBC3, 0xC71EFBC3, 0xC71FFBC3, 0xC720FBC3, 0xC721FBC3, 0xC722FBC3, 0xC723FBC3, 0xC724FBC3, 0xC725FBC3, 0xC726FBC3, 0xC727FBC3, + 0xC728FBC3, 0xC729FBC3, 0xC72AFBC3, 0xC72BFBC3, 0xC72CFBC3, 0xC72DFBC3, 0xC72EFBC3, 0xC72FFBC3, 0xC730FBC3, 0xC731FBC3, 0xC732FBC3, 0xC733FBC3, 0xC734FBC3, 0xC735FBC3, 0xC736FBC3, + 0xC737FBC3, 0xC738FBC3, 0xC739FBC3, 0xC73AFBC3, 0xC73BFBC3, 0xC73CFBC3, 0xC73DFBC3, 0xC73EFBC3, 0xC73FFBC3, 0xC740FBC3, 0xC741FBC3, 0xC742FBC3, 0xC743FBC3, 0xC744FBC3, 0xC745FBC3, + 0xC746FBC3, 0xC747FBC3, 0xC748FBC3, 0xC749FBC3, 0xC74AFBC3, 0xC74BFBC3, 0xC74CFBC3, 0xC74DFBC3, 0xC74EFBC3, 0xC74FFBC3, 0xC750FBC3, 0xC751FBC3, 0xC752FBC3, 0xC753FBC3, 0xC754FBC3, + 0xC755FBC3, 0xC756FBC3, 0xC757FBC3, 0xC758FBC3, 0xC759FBC3, 0xC75AFBC3, 0xC75BFBC3, 0xC75CFBC3, 0xC75DFBC3, 0xC75EFBC3, 0xC75FFBC3, 0xC760FBC3, 0xC761FBC3, 0xC762FBC3, 0xC763FBC3, + 0xC764FBC3, 0xC765FBC3, 0xC766FBC3, 0xC767FBC3, 0xC768FBC3, 0xC769FBC3, 0xC76AFBC3, 0xC76BFBC3, 0xC76CFBC3, 0xC76DFBC3, 0xC76EFBC3, 0xC76FFBC3, 0xC770FBC3, 0xC771FBC3, 0xC772FBC3, + 0xC773FBC3, 0xC774FBC3, 0xC775FBC3, 0xC776FBC3, 0xC777FBC3, 0xC778FBC3, 0xC779FBC3, 0xC77AFBC3, 0xC77BFBC3, 0xC77CFBC3, 0xC77DFBC3, 0xC77EFBC3, 0xC77FFBC3, 0xC780FBC3, 0xC781FBC3, + 0xC782FBC3, 0xC783FBC3, 0xC784FBC3, 0xC785FBC3, 0xC786FBC3, 0xC787FBC3, 0xC788FBC3, 0xC789FBC3, 0xC78AFBC3, 0xC78BFBC3, 0xC78CFBC3, 0xC78DFBC3, 0xC78EFBC3, 0xC78FFBC3, 0xC790FBC3, + 0xC791FBC3, 0xC792FBC3, 0xC793FBC3, 0xC794FBC3, 0xC795FBC3, 0xC796FBC3, 0xC797FBC3, 0xC798FBC3, 0xC799FBC3, 0xC79AFBC3, 0xC79BFBC3, 0xC79CFBC3, 0xC79DFBC3, 0xC79EFBC3, 0xC79FFBC3, + 0xC7A0FBC3, 0xC7A1FBC3, 0xC7A2FBC3, 0xC7A3FBC3, 0xC7A4FBC3, 0xC7A5FBC3, 0xC7A6FBC3, 0xC7A7FBC3, 0xC7A8FBC3, 0xC7A9FBC3, 0xC7AAFBC3, 0xC7ABFBC3, 0xC7ACFBC3, 0xC7ADFBC3, 0xC7AEFBC3, + 0xC7AFFBC3, 0xC7B0FBC3, 0xC7B1FBC3, 0xC7B2FBC3, 0xC7B3FBC3, 0xC7B4FBC3, 0xC7B5FBC3, 0xC7B6FBC3, 0xC7B7FBC3, 0xC7B8FBC3, 0xC7B9FBC3, 0xC7BAFBC3, 0xC7BBFBC3, 0xC7BCFBC3, 0xC7BDFBC3, + 0xC7BEFBC3, 0xC7BFFBC3, 0xC7C0FBC3, 0xC7C1FBC3, 0xC7C2FBC3, 0xC7C3FBC3, 0xC7C4FBC3, 0xC7C5FBC3, 0xC7C6FBC3, 0xC7C7FBC3, 0xC7C8FBC3, 0xC7C9FBC3, 0xC7CAFBC3, 0xC7CBFBC3, 0xC7CCFBC3, + 0xC7CDFBC3, 0xC7CEFBC3, 0xC7CFFBC3, 0xC7D0FBC3, 0xC7D1FBC3, 0xC7D2FBC3, 0xC7D3FBC3, 0xC7D4FBC3, 0xC7D5FBC3, 0xC7D6FBC3, 0xC7D7FBC3, 0xC7D8FBC3, 0xC7D9FBC3, 0xC7DAFBC3, 0xC7DBFBC3, + 0xC7DCFBC3, 0xC7DDFBC3, 0xC7DEFBC3, 0xC7DFFBC3, 0xC7E0FBC3, 0xC7E1FBC3, 0xC7E2FBC3, 0xC7E3FBC3, 0xC7E4FBC3, 0xC7E5FBC3, 0xC7E6FBC3, 0xC7E7FBC3, 0xC7E8FBC3, 0xC7E9FBC3, 0xC7EAFBC3, + 0xC7EBFBC3, 0xC7ECFBC3, 0xC7EDFBC3, 0xC7EEFBC3, 0xC7EFFBC3, 0xC7F0FBC3, 0xC7F1FBC3, 0xC7F2FBC3, 0xC7F3FBC3, 0xC7F4FBC3, 0xC7F5FBC3, 0xC7F6FBC3, 0xC7F7FBC3, 0xC7F8FBC3, 0xC7F9FBC3, + 0xC7FAFBC3, 0xC7FBFBC3, 0xC7FCFBC3, 0xC7FDFBC3, 0xC7FEFBC3, 0xC7FFFBC3, 0xC800FBC3, 0xC801FBC3, 0xC802FBC3, 0xC803FBC3, 0xC804FBC3, 0xC805FBC3, 0xC806FBC3, 0xC807FBC3, 0xC808FBC3, + 0xC809FBC3, 0xC80AFBC3, 0xC80BFBC3, 0xC80CFBC3, 0xC80DFBC3, 0xC80EFBC3, 0xC80FFBC3, 0xC810FBC3, 0xC811FBC3, 0xC812FBC3, 0xC813FBC3, 0xC814FBC3, 0xC815FBC3, 0xC816FBC3, 0xC817FBC3, + 0xC818FBC3, 0xC819FBC3, 0xC81AFBC3, 0xC81BFBC3, 0xC81CFBC3, 0xC81DFBC3, 0xC81EFBC3, 0xC81FFBC3, 0xC820FBC3, 0xC821FBC3, 0xC822FBC3, 0xC823FBC3, 0xC824FBC3, 0xC825FBC3, 0xC826FBC3, + 0xC827FBC3, 0xC828FBC3, 0xC829FBC3, 0xC82AFBC3, 0xC82BFBC3, 0xC82CFBC3, 0xC82DFBC3, 0xC82EFBC3, 0xC82FFBC3, 0xC830FBC3, 0xC831FBC3, 0xC832FBC3, 0xC833FBC3, 0xC834FBC3, 0xC835FBC3, + 0xC836FBC3, 0xC837FBC3, 0xC838FBC3, 0xC839FBC3, 0xC83AFBC3, 0xC83BFBC3, 0xC83CFBC3, 0xC83DFBC3, 0xC83EFBC3, 0xC83FFBC3, 0xC840FBC3, 0xC841FBC3, 0xC842FBC3, 0xC843FBC3, 0xC844FBC3, + 0xC845FBC3, 0xC846FBC3, 0xC847FBC3, 0xC848FBC3, 0xC849FBC3, 0xC84AFBC3, 0xC84BFBC3, 0xC84CFBC3, 0xC84DFBC3, 0xC84EFBC3, 0xC84FFBC3, 0xC850FBC3, 0xC851FBC3, 0xC852FBC3, 0xC853FBC3, + 0xC854FBC3, 0xC855FBC3, 0xC856FBC3, 0xC857FBC3, 0xC858FBC3, 0xC859FBC3, 0xC85AFBC3, 0xC85BFBC3, 0xC85CFBC3, 0xC85DFBC3, 0xC85EFBC3, 0xC85FFBC3, 0xC860FBC3, 0xC861FBC3, 0xC862FBC3, + 0xC863FBC3, 0xC864FBC3, 0xC865FBC3, 0xC866FBC3, 0xC867FBC3, 0xC868FBC3, 0xC869FBC3, 0xC86AFBC3, 0xC86BFBC3, 0xC86CFBC3, 0xC86DFBC3, 0xC86EFBC3, 0xC86FFBC3, 0xC870FBC3, 0xC871FBC3, + 0xC872FBC3, 0xC873FBC3, 0xC874FBC3, 0xC875FBC3, 0xC876FBC3, 0xC877FBC3, 0xC878FBC3, 0xC879FBC3, 0xC87AFBC3, 0xC87BFBC3, 0xC87CFBC3, 0xC87DFBC3, 0xC87EFBC3, 0xC87FFBC3, 0xC880FBC3, + 0xC881FBC3, 0xC882FBC3, 0xC883FBC3, 0xC884FBC3, 0xC885FBC3, 0xC886FBC3, 0xC887FBC3, 0xC888FBC3, 0xC889FBC3, 0xC88AFBC3, 0xC88BFBC3, 0xC88CFBC3, 0xC88DFBC3, 0xC88EFBC3, 0xC88FFBC3, + 0xC890FBC3, 0xC891FBC3, 0xC892FBC3, 0xC893FBC3, 0xC894FBC3, 0xC895FBC3, 0xC896FBC3, 0xC897FBC3, 0xC898FBC3, 0xC899FBC3, 0xC89AFBC3, 0xC89BFBC3, 0xC89CFBC3, 0xC89DFBC3, 0xC89EFBC3, + 0xC89FFBC3, 0xC8A0FBC3, 0xC8A1FBC3, 0xC8A2FBC3, 0xC8A3FBC3, 0xC8A4FBC3, 0xC8A5FBC3, 0xC8A6FBC3, 0xC8A7FBC3, 0xC8A8FBC3, 0xC8A9FBC3, 0xC8AAFBC3, 0xC8ABFBC3, 0xC8ACFBC3, 0xC8ADFBC3, + 0xC8AEFBC3, 0xC8AFFBC3, 0xC8B0FBC3, 0xC8B1FBC3, 0xC8B2FBC3, 0xC8B3FBC3, 0xC8B4FBC3, 0xC8B5FBC3, 0xC8B6FBC3, 0xC8B7FBC3, 0xC8B8FBC3, 0xC8B9FBC3, 0xC8BAFBC3, 0xC8BBFBC3, 0xC8BCFBC3, + 0xC8BDFBC3, 0xC8BEFBC3, 0xC8BFFBC3, 0xC8C0FBC3, 0xC8C1FBC3, 0xC8C2FBC3, 0xC8C3FBC3, 0xC8C4FBC3, 0xC8C5FBC3, 0xC8C6FBC3, 0xC8C7FBC3, 0xC8C8FBC3, 0xC8C9FBC3, 0xC8CAFBC3, 0xC8CBFBC3, + 0xC8CCFBC3, 0xC8CDFBC3, 0xC8CEFBC3, 0xC8CFFBC3, 0xC8D0FBC3, 0xC8D1FBC3, 0xC8D2FBC3, 0xC8D3FBC3, 0xC8D4FBC3, 0xC8D5FBC3, 0xC8D6FBC3, 0xC8D7FBC3, 0xC8D8FBC3, 0xC8D9FBC3, 0xC8DAFBC3, + 0xC8DBFBC3, 0xC8DCFBC3, 0xC8DDFBC3, 0xC8DEFBC3, 0xC8DFFBC3, 0xC8E0FBC3, 0xC8E1FBC3, 0xC8E2FBC3, 0xC8E3FBC3, 0xC8E4FBC3, 0xC8E5FBC3, 0xC8E6FBC3, 0xC8E7FBC3, 0xC8E8FBC3, 0xC8E9FBC3, + 0xC8EAFBC3, 0xC8EBFBC3, 0xC8ECFBC3, 0xC8EDFBC3, 0xC8EEFBC3, 0xC8EFFBC3, 0xC8F0FBC3, 0xC8F1FBC3, 0xC8F2FBC3, 0xC8F3FBC3, 0xC8F4FBC3, 0xC8F5FBC3, 0xC8F6FBC3, 0xC8F7FBC3, 0xC8F8FBC3, + 0xC8F9FBC3, 0xC8FAFBC3, 0xC8FBFBC3, 0xC8FCFBC3, 0xC8FDFBC3, 0xC8FEFBC3, 0xC8FFFBC3, 0xC900FBC3, 0xC901FBC3, 0xC902FBC3, 0xC903FBC3, 0xC904FBC3, 0xC905FBC3, 0xC906FBC3, 0xC907FBC3, + 0xC908FBC3, 0xC909FBC3, 0xC90AFBC3, 0xC90BFBC3, 0xC90CFBC3, 0xC90DFBC3, 0xC90EFBC3, 0xC90FFBC3, 0xC910FBC3, 0xC911FBC3, 0xC912FBC3, 0xC913FBC3, 0xC914FBC3, 0xC915FBC3, 0xC916FBC3, + 0xC917FBC3, 0xC918FBC3, 0xC919FBC3, 0xC91AFBC3, 0xC91BFBC3, 0xC91CFBC3, 0xC91DFBC3, 0xC91EFBC3, 0xC91FFBC3, 0xC920FBC3, 0xC921FBC3, 0xC922FBC3, 0xC923FBC3, 0xC924FBC3, 0xC925FBC3, + 0xC926FBC3, 0xC927FBC3, 0xC928FBC3, 0xC929FBC3, 0xC92AFBC3, 0xC92BFBC3, 0xC92CFBC3, 0xC92DFBC3, 0xC92EFBC3, 0xC92FFBC3, 0xC930FBC3, 0xC931FBC3, 0xC932FBC3, 0xC933FBC3, 0xC934FBC3, + 0xC935FBC3, 0xC936FBC3, 0xC937FBC3, 0xC938FBC3, 0xC939FBC3, 0xC93AFBC3, 0xC93BFBC3, 0xC93CFBC3, 0xC93DFBC3, 0xC93EFBC3, 0xC93FFBC3, 0xC940FBC3, 0xC941FBC3, 0xC942FBC3, 0xC943FBC3, + 0xC944FBC3, 0xC945FBC3, 0xC946FBC3, 0xC947FBC3, 0xC948FBC3, 0xC949FBC3, 0xC94AFBC3, 0xC94BFBC3, 0xC94CFBC3, 0xC94DFBC3, 0xC94EFBC3, 0xC94FFBC3, 0xC950FBC3, 0xC951FBC3, 0xC952FBC3, + 0xC953FBC3, 0xC954FBC3, 0xC955FBC3, 0xC956FBC3, 0xC957FBC3, 0xC958FBC3, 0xC959FBC3, 0xC95AFBC3, 0xC95BFBC3, 0xC95CFBC3, 0xC95DFBC3, 0xC95EFBC3, 0xC95FFBC3, 0xC960FBC3, 0xC961FBC3, + 0xC962FBC3, 0xC963FBC3, 0xC964FBC3, 0xC965FBC3, 0xC966FBC3, 0xC967FBC3, 0xC968FBC3, 0xC969FBC3, 0xC96AFBC3, 0xC96BFBC3, 0xC96CFBC3, 0xC96DFBC3, 0xC96EFBC3, 0xC96FFBC3, 0xC970FBC3, + 0xC971FBC3, 0xC972FBC3, 0xC973FBC3, 0xC974FBC3, 0xC975FBC3, 0xC976FBC3, 0xC977FBC3, 0xC978FBC3, 0xC979FBC3, 0xC97AFBC3, 0xC97BFBC3, 0xC97CFBC3, 0xC97DFBC3, 0xC97EFBC3, 0xC97FFBC3, + 0xC980FBC3, 0xC981FBC3, 0xC982FBC3, 0xC983FBC3, 0xC984FBC3, 0xC985FBC3, 0xC986FBC3, 0xC987FBC3, 0xC988FBC3, 0xC989FBC3, 0xC98AFBC3, 0xC98BFBC3, 0xC98CFBC3, 0xC98DFBC3, 0xC98EFBC3, + 0xC98FFBC3, 0xC990FBC3, 0xC991FBC3, 0xC992FBC3, 0xC993FBC3, 0xC994FBC3, 0xC995FBC3, 0xC996FBC3, 0xC997FBC3, 0xC998FBC3, 0xC999FBC3, 0xC99AFBC3, 0xC99BFBC3, 0xC99CFBC3, 0xC99DFBC3, + 0xC99EFBC3, 0xC99FFBC3, 0xC9A0FBC3, 0xC9A1FBC3, 0xC9A2FBC3, 0xC9A3FBC3, 0xC9A4FBC3, 0xC9A5FBC3, 0xC9A6FBC3, 0xC9A7FBC3, 0xC9A8FBC3, 0xC9A9FBC3, 0xC9AAFBC3, 0xC9ABFBC3, 0xC9ACFBC3, + 0xC9ADFBC3, 0xC9AEFBC3, 0xC9AFFBC3, 0xC9B0FBC3, 0xC9B1FBC3, 0xC9B2FBC3, 0xC9B3FBC3, 0xC9B4FBC3, 0xC9B5FBC3, 0xC9B6FBC3, 0xC9B7FBC3, 0xC9B8FBC3, 0xC9B9FBC3, 0xC9BAFBC3, 0xC9BBFBC3, + 0xC9BCFBC3, 0xC9BDFBC3, 0xC9BEFBC3, 0xC9BFFBC3, 0xC9C0FBC3, 0xC9C1FBC3, 0xC9C2FBC3, 0xC9C3FBC3, 0xC9C4FBC3, 0xC9C5FBC3, 0xC9C6FBC3, 0xC9C7FBC3, 0xC9C8FBC3, 0xC9C9FBC3, 0xC9CAFBC3, + 0xC9CBFBC3, 0xC9CCFBC3, 0xC9CDFBC3, 0xC9CEFBC3, 0xC9CFFBC3, 0xC9D0FBC3, 0xC9D1FBC3, 0xC9D2FBC3, 0xC9D3FBC3, 0xC9D4FBC3, 0xC9D5FBC3, 0xC9D6FBC3, 0xC9D7FBC3, 0xC9D8FBC3, 0xC9D9FBC3, + 0xC9DAFBC3, 0xC9DBFBC3, 0xC9DCFBC3, 0xC9DDFBC3, 0xC9DEFBC3, 0xC9DFFBC3, 0xC9E0FBC3, 0xC9E1FBC3, 0xC9E2FBC3, 0xC9E3FBC3, 0xC9E4FBC3, 0xC9E5FBC3, 0xC9E6FBC3, 0xC9E7FBC3, 0xC9E8FBC3, + 0xC9E9FBC3, 0xC9EAFBC3, 0xC9EBFBC3, 0xC9ECFBC3, 0xC9EDFBC3, 0xC9EEFBC3, 0xC9EFFBC3, 0xC9F0FBC3, 0xC9F1FBC3, 0xC9F2FBC3, 0xC9F3FBC3, 0xC9F4FBC3, 0xC9F5FBC3, 0xC9F6FBC3, 0xC9F7FBC3, + 0xC9F8FBC3, 0xC9F9FBC3, 0xC9FAFBC3, 0xC9FBFBC3, 0xC9FCFBC3, 0xC9FDFBC3, 0xC9FEFBC3, 0xC9FFFBC3, 0xCA00FBC3, 0xCA01FBC3, 0xCA02FBC3, 0xCA03FBC3, 0xCA04FBC3, 0xCA05FBC3, 0xCA06FBC3, + 0xCA07FBC3, 0xCA08FBC3, 0xCA09FBC3, 0xCA0AFBC3, 0xCA0BFBC3, 0xCA0CFBC3, 0xCA0DFBC3, 0xCA0EFBC3, 0xCA0FFBC3, 0xCA10FBC3, 0xCA11FBC3, 0xCA12FBC3, 0xCA13FBC3, 0xCA14FBC3, 0xCA15FBC3, + 0xCA16FBC3, 0xCA17FBC3, 0xCA18FBC3, 0xCA19FBC3, 0xCA1AFBC3, 0xCA1BFBC3, 0xCA1CFBC3, 0xCA1DFBC3, 0xCA1EFBC3, 0xCA1FFBC3, 0xCA20FBC3, 0xCA21FBC3, 0xCA22FBC3, 0xCA23FBC3, 0xCA24FBC3, + 0xCA25FBC3, 0xCA26FBC3, 0xCA27FBC3, 0xCA28FBC3, 0xCA29FBC3, 0xCA2AFBC3, 0xCA2BFBC3, 0xCA2CFBC3, 0xCA2DFBC3, 0xCA2EFBC3, 0xCA2FFBC3, 0xCA30FBC3, 0xCA31FBC3, 0xCA32FBC3, 0xCA33FBC3, + 0xCA34FBC3, 0xCA35FBC3, 0xCA36FBC3, 0xCA37FBC3, 0xCA38FBC3, 0xCA39FBC3, 0xCA3AFBC3, 0xCA3BFBC3, 0xCA3CFBC3, 0xCA3DFBC3, 0xCA3EFBC3, 0xCA3FFBC3, 0xCA40FBC3, 0xCA41FBC3, 0xCA42FBC3, + 0xCA43FBC3, 0xCA44FBC3, 0xCA45FBC3, 0xCA46FBC3, 0xCA47FBC3, 0xCA48FBC3, 0xCA49FBC3, 0xCA4AFBC3, 0xCA4BFBC3, 0xCA4CFBC3, 0xCA4DFBC3, 0xCA4EFBC3, 0xCA4FFBC3, 0xCA50FBC3, 0xCA51FBC3, + 0xCA52FBC3, 0xCA53FBC3, 0xCA54FBC3, 0xCA55FBC3, 0xCA56FBC3, 0xCA57FBC3, 0xCA58FBC3, 0xCA59FBC3, 0xCA5AFBC3, 0xCA5BFBC3, 0xCA5CFBC3, 0xCA5DFBC3, 0xCA5EFBC3, 0xCA5FFBC3, 0xCA60FBC3, + 0xCA61FBC3, 0xCA62FBC3, 0xCA63FBC3, 0xCA64FBC3, 0xCA65FBC3, 0xCA66FBC3, 0xCA67FBC3, 0xCA68FBC3, 0xCA69FBC3, 0xCA6AFBC3, 0xCA6BFBC3, 0xCA6CFBC3, 0xCA6DFBC3, 0xCA6EFBC3, 0xCA6FFBC3, + 0xCA70FBC3, 0xCA71FBC3, 0xCA72FBC3, 0xCA73FBC3, 0xCA74FBC3, 0xCA75FBC3, 0xCA76FBC3, 0xCA77FBC3, 0xCA78FBC3, 0xCA79FBC3, 0xCA7AFBC3, 0xCA7BFBC3, 0xCA7CFBC3, 0xCA7DFBC3, 0xCA7EFBC3, + 0xCA7FFBC3, 0xCA80FBC3, 0xCA81FBC3, 0xCA82FBC3, 0xCA83FBC3, 0xCA84FBC3, 0xCA85FBC3, 0xCA86FBC3, 0xCA87FBC3, 0xCA88FBC3, 0xCA89FBC3, 0xCA8AFBC3, 0xCA8BFBC3, 0xCA8CFBC3, 0xCA8DFBC3, + 0xCA8EFBC3, 0xCA8FFBC3, 0xCA90FBC3, 0xCA91FBC3, 0xCA92FBC3, 0xCA93FBC3, 0xCA94FBC3, 0xCA95FBC3, 0xCA96FBC3, 0xCA97FBC3, 0xCA98FBC3, 0xCA99FBC3, 0xCA9AFBC3, 0xCA9BFBC3, 0xCA9CFBC3, + 0xCA9DFBC3, 0xCA9EFBC3, 0xCA9FFBC3, 0xCAA0FBC3, 0xCAA1FBC3, 0xCAA2FBC3, 0xCAA3FBC3, 0xCAA4FBC3, 0xCAA5FBC3, 0xCAA6FBC3, 0xCAA7FBC3, 0xCAA8FBC3, 0xCAA9FBC3, 0xCAAAFBC3, 0xCAABFBC3, + 0xCAACFBC3, 0xCAADFBC3, 0xCAAEFBC3, 0xCAAFFBC3, 0xCAB0FBC3, 0xCAB1FBC3, 0xCAB2FBC3, 0xCAB3FBC3, 0xCAB4FBC3, 0xCAB5FBC3, 0xCAB6FBC3, 0xCAB7FBC3, 0xCAB8FBC3, 0xCAB9FBC3, 0xCABAFBC3, + 0xCABBFBC3, 0xCABCFBC3, 0xCABDFBC3, 0xCABEFBC3, 0xCABFFBC3, 0xCAC0FBC3, 0xCAC1FBC3, 0xCAC2FBC3, 0xCAC3FBC3, 0xCAC4FBC3, 0xCAC5FBC3, 0xCAC6FBC3, 0xCAC7FBC3, 0xCAC8FBC3, 0xCAC9FBC3, + 0xCACAFBC3, 0xCACBFBC3, 0xCACCFBC3, 0xCACDFBC3, 0xCACEFBC3, 0xCACFFBC3, 0xCAD0FBC3, 0xCAD1FBC3, 0xCAD2FBC3, 0xCAD3FBC3, 0xCAD4FBC3, 0xCAD5FBC3, 0xCAD6FBC3, 0xCAD7FBC3, 0xCAD8FBC3, + 0xCAD9FBC3, 0xCADAFBC3, 0xCADBFBC3, 0xCADCFBC3, 0xCADDFBC3, 0xCADEFBC3, 0xCADFFBC3, 0xCAE0FBC3, 0xCAE1FBC3, 0xCAE2FBC3, 0xCAE3FBC3, 0xCAE4FBC3, 0xCAE5FBC3, 0xCAE6FBC3, 0xCAE7FBC3, + 0xCAE8FBC3, 0xCAE9FBC3, 0xCAEAFBC3, 0xCAEBFBC3, 0xCAECFBC3, 0xCAEDFBC3, 0xCAEEFBC3, 0xCAEFFBC3, 0xCAF0FBC3, 0xCAF1FBC3, 0xCAF2FBC3, 0xCAF3FBC3, 0xCAF4FBC3, 0xCAF5FBC3, 0xCAF6FBC3, + 0xCAF7FBC3, 0xCAF8FBC3, 0xCAF9FBC3, 0xCAFAFBC3, 0xCAFBFBC3, 0xCAFCFBC3, 0xCAFDFBC3, 0xCAFEFBC3, 0xCAFFFBC3, 0xCB00FBC3, 0xCB01FBC3, 0xCB02FBC3, 0xCB03FBC3, 0xCB04FBC3, 0xCB05FBC3, + 0xCB06FBC3, 0xCB07FBC3, 0xCB08FBC3, 0xCB09FBC3, 0xCB0AFBC3, 0xCB0BFBC3, 0xCB0CFBC3, 0xCB0DFBC3, 0xCB0EFBC3, 0xCB0FFBC3, 0xCB10FBC3, 0xCB11FBC3, 0xCB12FBC3, 0xCB13FBC3, 0xCB14FBC3, + 0xCB15FBC3, 0xCB16FBC3, 0xCB17FBC3, 0xCB18FBC3, 0xCB19FBC3, 0xCB1AFBC3, 0xCB1BFBC3, 0xCB1CFBC3, 0xCB1DFBC3, 0xCB1EFBC3, 0xCB1FFBC3, 0xCB20FBC3, 0xCB21FBC3, 0xCB22FBC3, 0xCB23FBC3, + 0xCB24FBC3, 0xCB25FBC3, 0xCB26FBC3, 0xCB27FBC3, 0xCB28FBC3, 0xCB29FBC3, 0xCB2AFBC3, 0xCB2BFBC3, 0xCB2CFBC3, 0xCB2DFBC3, 0xCB2EFBC3, 0xCB2FFBC3, 0xCB30FBC3, 0xCB31FBC3, 0xCB32FBC3, + 0xCB33FBC3, 0xCB34FBC3, 0xCB35FBC3, 0xCB36FBC3, 0xCB37FBC3, 0xCB38FBC3, 0xCB39FBC3, 0xCB3AFBC3, 0xCB3BFBC3, 0xCB3CFBC3, 0xCB3DFBC3, 0xCB3EFBC3, 0xCB3FFBC3, 0xCB40FBC3, 0xCB41FBC3, + 0xCB42FBC3, 0xCB43FBC3, 0xCB44FBC3, 0xCB45FBC3, 0xCB46FBC3, 0xCB47FBC3, 0xCB48FBC3, 0xCB49FBC3, 0xCB4AFBC3, 0xCB4BFBC3, 0xCB4CFBC3, 0xCB4DFBC3, 0xCB4EFBC3, 0xCB4FFBC3, 0xCB50FBC3, + 0xCB51FBC3, 0xCB52FBC3, 0xCB53FBC3, 0xCB54FBC3, 0xCB55FBC3, 0xCB56FBC3, 0xCB57FBC3, 0xCB58FBC3, 0xCB59FBC3, 0xCB5AFBC3, 0xCB5BFBC3, 0xCB5CFBC3, 0xCB5DFBC3, 0xCB5EFBC3, 0xCB5FFBC3, + 0xCB60FBC3, 0xCB61FBC3, 0xCB62FBC3, 0xCB63FBC3, 0xCB64FBC3, 0xCB65FBC3, 0xCB66FBC3, 0xCB67FBC3, 0xCB68FBC3, 0xCB69FBC3, 0xCB6AFBC3, 0xCB6BFBC3, 0xCB6CFBC3, 0xCB6DFBC3, 0xCB6EFBC3, + 0xCB6FFBC3, 0xCB70FBC3, 0xCB71FBC3, 0xCB72FBC3, 0xCB73FBC3, 0xCB74FBC3, 0xCB75FBC3, 0xCB76FBC3, 0xCB77FBC3, 0xCB78FBC3, 0xCB79FBC3, 0xCB7AFBC3, 0xCB7BFBC3, 0xCB7CFBC3, 0xCB7DFBC3, + 0xCB7EFBC3, 0xCB7FFBC3, 0xCB80FBC3, 0xCB81FBC3, 0xCB82FBC3, 0xCB83FBC3, 0xCB84FBC3, 0xCB85FBC3, 0xCB86FBC3, 0xCB87FBC3, 0xCB88FBC3, 0xCB89FBC3, 0xCB8AFBC3, 0xCB8BFBC3, 0xCB8CFBC3, + 0xCB8DFBC3, 0xCB8EFBC3, 0xCB8FFBC3, 0xCB90FBC3, 0xCB91FBC3, 0xCB92FBC3, 0xCB93FBC3, 0xCB94FBC3, 0xCB95FBC3, 0xCB96FBC3, 0xCB97FBC3, 0xCB98FBC3, 0xCB99FBC3, 0xCB9AFBC3, 0xCB9BFBC3, + 0xCB9CFBC3, 0xCB9DFBC3, 0xCB9EFBC3, 0xCB9FFBC3, 0xCBA0FBC3, 0xCBA1FBC3, 0xCBA2FBC3, 0xCBA3FBC3, 0xCBA4FBC3, 0xCBA5FBC3, 0xCBA6FBC3, 0xCBA7FBC3, 0xCBA8FBC3, 0xCBA9FBC3, 0xCBAAFBC3, + 0xCBABFBC3, 0xCBACFBC3, 0xCBADFBC3, 0xCBAEFBC3, 0xCBAFFBC3, 0xCBB0FBC3, 0xCBB1FBC3, 0xCBB2FBC3, 0xCBB3FBC3, 0xCBB4FBC3, 0xCBB5FBC3, 0xCBB6FBC3, 0xCBB7FBC3, 0xCBB8FBC3, 0xCBB9FBC3, + 0xCBBAFBC3, 0xCBBBFBC3, 0xCBBCFBC3, 0xCBBDFBC3, 0xCBBEFBC3, 0xCBBFFBC3, 0xCBC0FBC3, 0xCBC1FBC3, 0xCBC2FBC3, 0xCBC3FBC3, 0xCBC4FBC3, 0xCBC5FBC3, 0xCBC6FBC3, 0xCBC7FBC3, 0xCBC8FBC3, + 0xCBC9FBC3, 0xCBCAFBC3, 0xCBCBFBC3, 0xCBCCFBC3, 0xCBCDFBC3, 0xCBCEFBC3, 0xCBCFFBC3, 0xCBD0FBC3, 0xCBD1FBC3, 0xCBD2FBC3, 0xCBD3FBC3, 0xCBD4FBC3, 0xCBD5FBC3, 0xCBD6FBC3, 0xCBD7FBC3, + 0xCBD8FBC3, 0xCBD9FBC3, 0xCBDAFBC3, 0xCBDBFBC3, 0xCBDCFBC3, 0xCBDDFBC3, 0xCBDEFBC3, 0xCBDFFBC3, 0xCBE0FBC3, 0xCBE1FBC3, 0xCBE2FBC3, 0xCBE3FBC3, 0xCBE4FBC3, 0xCBE5FBC3, 0xCBE6FBC3, + 0xCBE7FBC3, 0xCBE8FBC3, 0xCBE9FBC3, 0xCBEAFBC3, 0xCBEBFBC3, 0xCBECFBC3, 0xCBEDFBC3, 0xCBEEFBC3, 0xCBEFFBC3, 0xCBF0FBC3, 0xCBF1FBC3, 0xCBF2FBC3, 0xCBF3FBC3, 0xCBF4FBC3, 0xCBF5FBC3, + 0xCBF6FBC3, 0xCBF7FBC3, 0xCBF8FBC3, 0xCBF9FBC3, 0xCBFAFBC3, 0xCBFBFBC3, 0xCBFCFBC3, 0xCBFDFBC3, 0xCBFEFBC3, 0xCBFFFBC3, 0xCC00FBC3, 0xCC01FBC3, 0xCC02FBC3, 0xCC03FBC3, 0xCC04FBC3, + 0xCC05FBC3, 0xCC06FBC3, 0xCC07FBC3, 0xCC08FBC3, 0xCC09FBC3, 0xCC0AFBC3, 0xCC0BFBC3, 0xCC0CFBC3, 0xCC0DFBC3, 0xCC0EFBC3, 0xCC0FFBC3, 0xCC10FBC3, 0xCC11FBC3, 0xCC12FBC3, 0xCC13FBC3, + 0xCC14FBC3, 0xCC15FBC3, 0xCC16FBC3, 0xCC17FBC3, 0xCC18FBC3, 0xCC19FBC3, 0xCC1AFBC3, 0xCC1BFBC3, 0xCC1CFBC3, 0xCC1DFBC3, 0xCC1EFBC3, 0xCC1FFBC3, 0xCC20FBC3, 0xCC21FBC3, 0xCC22FBC3, + 0xCC23FBC3, 0xCC24FBC3, 0xCC25FBC3, 0xCC26FBC3, 0xCC27FBC3, 0xCC28FBC3, 0xCC29FBC3, 0xCC2AFBC3, 0xCC2BFBC3, 0xCC2CFBC3, 0xCC2DFBC3, 0xCC2EFBC3, 0xCC2FFBC3, 0xCC30FBC3, 0xCC31FBC3, + 0xCC32FBC3, 0xCC33FBC3, 0xCC34FBC3, 0xCC35FBC3, 0xCC36FBC3, 0xCC37FBC3, 0xCC38FBC3, 0xCC39FBC3, 0xCC3AFBC3, 0xCC3BFBC3, 0xCC3CFBC3, 0xCC3DFBC3, 0xCC3EFBC3, 0xCC3FFBC3, 0xCC40FBC3, + 0xCC41FBC3, 0xCC42FBC3, 0xCC43FBC3, 0xCC44FBC3, 0xCC45FBC3, 0xCC46FBC3, 0xCC47FBC3, 0xCC48FBC3, 0xCC49FBC3, 0xCC4AFBC3, 0xCC4BFBC3, 0xCC4CFBC3, 0xCC4DFBC3, 0xCC4EFBC3, 0xCC4FFBC3, + 0xCC50FBC3, 0xCC51FBC3, 0xCC52FBC3, 0xCC53FBC3, 0xCC54FBC3, 0xCC55FBC3, 0xCC56FBC3, 0xCC57FBC3, 0xCC58FBC3, 0xCC59FBC3, 0xCC5AFBC3, 0xCC5BFBC3, 0xCC5CFBC3, 0xCC5DFBC3, 0xCC5EFBC3, + 0xCC5FFBC3, 0xCC60FBC3, 0xCC61FBC3, 0xCC62FBC3, 0xCC63FBC3, 0xCC64FBC3, 0xCC65FBC3, 0xCC66FBC3, 0xCC67FBC3, 0xCC68FBC3, 0xCC69FBC3, 0xCC6AFBC3, 0xCC6BFBC3, 0xCC6CFBC3, 0xCC6DFBC3, + 0xCC6EFBC3, 0xCC6FFBC3, 0xCC70FBC3, 0xCC71FBC3, 0xCC72FBC3, 0xCC73FBC3, 0xCC74FBC3, 0xCC75FBC3, 0xCC76FBC3, 0xCC77FBC3, 0xCC78FBC3, 0xCC79FBC3, 0xCC7AFBC3, 0xCC7BFBC3, 0xCC7CFBC3, + 0xCC7DFBC3, 0xCC7EFBC3, 0xCC7FFBC3, 0xCC80FBC3, 0xCC81FBC3, 0xCC82FBC3, 0xCC83FBC3, 0xCC84FBC3, 0xCC85FBC3, 0xCC86FBC3, 0xCC87FBC3, 0xCC88FBC3, 0xCC89FBC3, 0xCC8AFBC3, 0xCC8BFBC3, + 0xCC8CFBC3, 0xCC8DFBC3, 0xCC8EFBC3, 0xCC8FFBC3, 0xCC90FBC3, 0xCC91FBC3, 0xCC92FBC3, 0xCC93FBC3, 0xCC94FBC3, 0xCC95FBC3, 0xCC96FBC3, 0xCC97FBC3, 0xCC98FBC3, 0xCC99FBC3, 0xCC9AFBC3, + 0xCC9BFBC3, 0xCC9CFBC3, 0xCC9DFBC3, 0xCC9EFBC3, 0xCC9FFBC3, 0xCCA0FBC3, 0xCCA1FBC3, 0xCCA2FBC3, 0xCCA3FBC3, 0xCCA4FBC3, 0xCCA5FBC3, 0xCCA6FBC3, 0xCCA7FBC3, 0xCCA8FBC3, 0xCCA9FBC3, + 0xCCAAFBC3, 0xCCABFBC3, 0xCCACFBC3, 0xCCADFBC3, 0xCCAEFBC3, 0xCCAFFBC3, 0xCCB0FBC3, 0xCCB1FBC3, 0xCCB2FBC3, 0xCCB3FBC3, 0xCCB4FBC3, 0xCCB5FBC3, 0xCCB6FBC3, 0xCCB7FBC3, 0xCCB8FBC3, + 0xCCB9FBC3, 0xCCBAFBC3, 0xCCBBFBC3, 0xCCBCFBC3, 0xCCBDFBC3, 0xCCBEFBC3, 0xCCBFFBC3, 0xCCC0FBC3, 0xCCC1FBC3, 0xCCC2FBC3, 0xCCC3FBC3, 0xCCC4FBC3, 0xCCC5FBC3, 0xCCC6FBC3, 0xCCC7FBC3, + 0xCCC8FBC3, 0xCCC9FBC3, 0xCCCAFBC3, 0xCCCBFBC3, 0xCCCCFBC3, 0xCCCDFBC3, 0xCCCEFBC3, 0xCCCFFBC3, 0xCCD0FBC3, 0xCCD1FBC3, 0xCCD2FBC3, 0xCCD3FBC3, 0xCCD4FBC3, 0xCCD5FBC3, 0xCCD6FBC3, + 0xCCD7FBC3, 0xCCD8FBC3, 0xCCD9FBC3, 0xCCDAFBC3, 0xCCDBFBC3, 0xCCDCFBC3, 0xCCDDFBC3, 0xCCDEFBC3, 0xCCDFFBC3, 0xCCE0FBC3, 0xCCE1FBC3, 0xCCE2FBC3, 0xCCE3FBC3, 0xCCE4FBC3, 0xCCE5FBC3, + 0xCCE6FBC3, 0xCCE7FBC3, 0xCCE8FBC3, 0xCCE9FBC3, 0xCCEAFBC3, 0xCCEBFBC3, 0xCCECFBC3, 0xCCEDFBC3, 0xCCEEFBC3, 0xCCEFFBC3, 0xCCF0FBC3, 0xCCF1FBC3, 0xCCF2FBC3, 0xCCF3FBC3, 0xCCF4FBC3, + 0xCCF5FBC3, 0xCCF6FBC3, 0xCCF7FBC3, 0xCCF8FBC3, 0xCCF9FBC3, 0xCCFAFBC3, 0xCCFBFBC3, 0xCCFCFBC3, 0xCCFDFBC3, 0xCCFEFBC3, 0xCCFFFBC3, 0xCD00FBC3, 0xCD01FBC3, 0xCD02FBC3, 0xCD03FBC3, + 0xCD04FBC3, 0xCD05FBC3, 0xCD06FBC3, 0xCD07FBC3, 0xCD08FBC3, 0xCD09FBC3, 0xCD0AFBC3, 0xCD0BFBC3, 0xCD0CFBC3, 0xCD0DFBC3, 0xCD0EFBC3, 0xCD0FFBC3, 0xCD10FBC3, 0xCD11FBC3, 0xCD12FBC3, + 0xCD13FBC3, 0xCD14FBC3, 0xCD15FBC3, 0xCD16FBC3, 0xCD17FBC3, 0xCD18FBC3, 0xCD19FBC3, 0xCD1AFBC3, 0xCD1BFBC3, 0xCD1CFBC3, 0xCD1DFBC3, 0xCD1EFBC3, 0xCD1FFBC3, 0xCD20FBC3, 0xCD21FBC3, + 0xCD22FBC3, 0xCD23FBC3, 0xCD24FBC3, 0xCD25FBC3, 0xCD26FBC3, 0xCD27FBC3, 0xCD28FBC3, 0xCD29FBC3, 0xCD2AFBC3, 0xCD2BFBC3, 0xCD2CFBC3, 0xCD2DFBC3, 0xCD2EFBC3, 0xCD2FFBC3, 0xCD30FBC3, + 0xCD31FBC3, 0xCD32FBC3, 0xCD33FBC3, 0xCD34FBC3, 0xCD35FBC3, 0xCD36FBC3, 0xCD37FBC3, 0xCD38FBC3, 0xCD39FBC3, 0xCD3AFBC3, 0xCD3BFBC3, 0xCD3CFBC3, 0xCD3DFBC3, 0xCD3EFBC3, 0xCD3FFBC3, + 0xCD40FBC3, 0xCD41FBC3, 0xCD42FBC3, 0xCD43FBC3, 0xCD44FBC3, 0xCD45FBC3, 0xCD46FBC3, 0xCD47FBC3, 0xCD48FBC3, 0xCD49FBC3, 0xCD4AFBC3, 0xCD4BFBC3, 0xCD4CFBC3, 0xCD4DFBC3, 0xCD4EFBC3, + 0xCD4FFBC3, 0xCD50FBC3, 0xCD51FBC3, 0xCD52FBC3, 0xCD53FBC3, 0xCD54FBC3, 0xCD55FBC3, 0xCD56FBC3, 0xCD57FBC3, 0xCD58FBC3, 0xCD59FBC3, 0xCD5AFBC3, 0xCD5BFBC3, 0xCD5CFBC3, 0xCD5DFBC3, + 0xCD5EFBC3, 0xCD5FFBC3, 0xCD60FBC3, 0xCD61FBC3, 0xCD62FBC3, 0xCD63FBC3, 0xCD64FBC3, 0xCD65FBC3, 0xCD66FBC3, 0xCD67FBC3, 0xCD68FBC3, 0xCD69FBC3, 0xCD6AFBC3, 0xCD6BFBC3, 0xCD6CFBC3, + 0xCD6DFBC3, 0xCD6EFBC3, 0xCD6FFBC3, 0xCD70FBC3, 0xCD71FBC3, 0xCD72FBC3, 0xCD73FBC3, 0xCD74FBC3, 0xCD75FBC3, 0xCD76FBC3, 0xCD77FBC3, 0xCD78FBC3, 0xCD79FBC3, 0xCD7AFBC3, 0xCD7BFBC3, + 0xCD7CFBC3, 0xCD7DFBC3, 0xCD7EFBC3, 0xCD7FFBC3, 0xCD80FBC3, 0xCD81FBC3, 0xCD82FBC3, 0xCD83FBC3, 0xCD84FBC3, 0xCD85FBC3, 0xCD86FBC3, 0xCD87FBC3, 0xCD88FBC3, 0xCD89FBC3, 0xCD8AFBC3, + 0xCD8BFBC3, 0xCD8CFBC3, 0xCD8DFBC3, 0xCD8EFBC3, 0xCD8FFBC3, 0xCD90FBC3, 0xCD91FBC3, 0xCD92FBC3, 0xCD93FBC3, 0xCD94FBC3, 0xCD95FBC3, 0xCD96FBC3, 0xCD97FBC3, 0xCD98FBC3, 0xCD99FBC3, + 0xCD9AFBC3, 0xCD9BFBC3, 0xCD9CFBC3, 0xCD9DFBC3, 0xCD9EFBC3, 0xCD9FFBC3, 0xCDA0FBC3, 0xCDA1FBC3, 0xCDA2FBC3, 0xCDA3FBC3, 0xCDA4FBC3, 0xCDA5FBC3, 0xCDA6FBC3, 0xCDA7FBC3, 0xCDA8FBC3, + 0xCDA9FBC3, 0xCDAAFBC3, 0xCDABFBC3, 0xCDACFBC3, 0xCDADFBC3, 0xCDAEFBC3, 0xCDAFFBC3, 0xCDB0FBC3, 0xCDB1FBC3, 0xCDB2FBC3, 0xCDB3FBC3, 0xCDB4FBC3, 0xCDB5FBC3, 0xCDB6FBC3, 0xCDB7FBC3, + 0xCDB8FBC3, 0xCDB9FBC3, 0xCDBAFBC3, 0xCDBBFBC3, 0xCDBCFBC3, 0xCDBDFBC3, 0xCDBEFBC3, 0xCDBFFBC3, 0xCDC0FBC3, 0xCDC1FBC3, 0xCDC2FBC3, 0xCDC3FBC3, 0xCDC4FBC3, 0xCDC5FBC3, 0xCDC6FBC3, + 0xCDC7FBC3, 0xCDC8FBC3, 0xCDC9FBC3, 0xCDCAFBC3, 0xCDCBFBC3, 0xCDCCFBC3, 0xCDCDFBC3, 0xCDCEFBC3, 0xCDCFFBC3, 0xCDD0FBC3, 0xCDD1FBC3, 0xCDD2FBC3, 0xCDD3FBC3, 0xCDD4FBC3, 0xCDD5FBC3, + 0xCDD6FBC3, 0xCDD7FBC3, 0xCDD8FBC3, 0xCDD9FBC3, 0xCDDAFBC3, 0xCDDBFBC3, 0xCDDCFBC3, 0xCDDDFBC3, 0xCDDEFBC3, 0xCDDFFBC3, 0xCDE0FBC3, 0xCDE1FBC3, 0xCDE2FBC3, 0xCDE3FBC3, 0xCDE4FBC3, + 0xCDE5FBC3, 0xCDE6FBC3, 0xCDE7FBC3, 0xCDE8FBC3, 0xCDE9FBC3, 0xCDEAFBC3, 0xCDEBFBC3, 0xCDECFBC3, 0xCDEDFBC3, 0xCDEEFBC3, 0xCDEFFBC3, 0xCDF0FBC3, 0xCDF1FBC3, 0xCDF2FBC3, 0xCDF3FBC3, + 0xCDF4FBC3, 0xCDF5FBC3, 0xCDF6FBC3, 0xCDF7FBC3, 0xCDF8FBC3, 0xCDF9FBC3, 0xCDFAFBC3, 0xCDFBFBC3, 0xCDFCFBC3, 0xCDFDFBC3, 0xCDFEFBC3, 0xCDFFFBC3, 0xCE00FBC3, 0xCE01FBC3, 0xCE02FBC3, + 0xCE03FBC3, 0xCE04FBC3, 0xCE05FBC3, 0xCE06FBC3, 0xCE07FBC3, 0xCE08FBC3, 0xCE09FBC3, 0xCE0AFBC3, 0xCE0BFBC3, 0xCE0CFBC3, 0xCE0DFBC3, 0xCE0EFBC3, 0xCE0FFBC3, 0xCE10FBC3, 0xCE11FBC3, + 0xCE12FBC3, 0xCE13FBC3, 0xCE14FBC3, 0xCE15FBC3, 0xCE16FBC3, 0xCE17FBC3, 0xCE18FBC3, 0xCE19FBC3, 0xCE1AFBC3, 0xCE1BFBC3, 0xCE1CFBC3, 0xCE1DFBC3, 0xCE1EFBC3, 0xCE1FFBC3, 0xCE20FBC3, + 0xCE21FBC3, 0xCE22FBC3, 0xCE23FBC3, 0xCE24FBC3, 0xCE25FBC3, 0xCE26FBC3, 0xCE27FBC3, 0xCE28FBC3, 0xCE29FBC3, 0xCE2AFBC3, 0xCE2BFBC3, 0xCE2CFBC3, 0xCE2DFBC3, 0xCE2EFBC3, 0xCE2FFBC3, + 0xCE30FBC3, 0xCE31FBC3, 0xCE32FBC3, 0xCE33FBC3, 0xCE34FBC3, 0xCE35FBC3, 0xCE36FBC3, 0xCE37FBC3, 0xCE38FBC3, 0xCE39FBC3, 0xCE3AFBC3, 0xCE3BFBC3, 0xCE3CFBC3, 0xCE3DFBC3, 0xCE3EFBC3, + 0xCE3FFBC3, 0xCE40FBC3, 0xCE41FBC3, 0xCE42FBC3, 0xCE43FBC3, 0xCE44FBC3, 0xCE45FBC3, 0xCE46FBC3, 0xCE47FBC3, 0xCE48FBC3, 0xCE49FBC3, 0xCE4AFBC3, 0xCE4BFBC3, 0xCE4CFBC3, 0xCE4DFBC3, + 0xCE4EFBC3, 0xCE4FFBC3, 0xCE50FBC3, 0xCE51FBC3, 0xCE52FBC3, 0xCE53FBC3, 0xCE54FBC3, 0xCE55FBC3, 0xCE56FBC3, 0xCE57FBC3, 0xCE58FBC3, 0xCE59FBC3, 0xCE5AFBC3, 0xCE5BFBC3, 0xCE5CFBC3, + 0xCE5DFBC3, 0xCE5EFBC3, 0xCE5FFBC3, 0xCE60FBC3, 0xCE61FBC3, 0xCE62FBC3, 0xCE63FBC3, 0xCE64FBC3, 0xCE65FBC3, 0xCE66FBC3, 0xCE67FBC3, 0xCE68FBC3, 0xCE69FBC3, 0xCE6AFBC3, 0xCE6BFBC3, + 0xCE6CFBC3, 0xCE6DFBC3, 0xCE6EFBC3, 0xCE6FFBC3, 0xCE70FBC3, 0xCE71FBC3, 0xCE72FBC3, 0xCE73FBC3, 0xCE74FBC3, 0xCE75FBC3, 0xCE76FBC3, 0xCE77FBC3, 0xCE78FBC3, 0xCE79FBC3, 0xCE7AFBC3, + 0xCE7BFBC3, 0xCE7CFBC3, 0xCE7DFBC3, 0xCE7EFBC3, 0xCE7FFBC3, 0xCE80FBC3, 0xCE81FBC3, 0xCE82FBC3, 0xCE83FBC3, 0xCE84FBC3, 0xCE85FBC3, 0xCE86FBC3, 0xCE87FBC3, 0xCE88FBC3, 0xCE89FBC3, + 0xCE8AFBC3, 0xCE8BFBC3, 0xCE8CFBC3, 0xCE8DFBC3, 0xCE8EFBC3, 0xCE8FFBC3, 0xCE90FBC3, 0xCE91FBC3, 0xCE92FBC3, 0xCE93FBC3, 0xCE94FBC3, 0xCE95FBC3, 0xCE96FBC3, 0xCE97FBC3, 0xCE98FBC3, + 0xCE99FBC3, 0xCE9AFBC3, 0xCE9BFBC3, 0xCE9CFBC3, 0xCE9DFBC3, 0xCE9EFBC3, 0xCE9FFBC3, 0xCEA0FBC3, 0xCEA1FBC3, 0xCEA2FBC3, 0xCEA3FBC3, 0xCEA4FBC3, 0xCEA5FBC3, 0xCEA6FBC3, 0xCEA7FBC3, + 0xCEA8FBC3, 0xCEA9FBC3, 0xCEAAFBC3, 0xCEABFBC3, 0xCEACFBC3, 0xCEADFBC3, 0xCEAEFBC3, 0xCEAFFBC3, 0xCEB0FBC3, 0xCEB1FBC3, 0xCEB2FBC3, 0xCEB3FBC3, 0xCEB4FBC3, 0xCEB5FBC3, 0xCEB6FBC3, + 0xCEB7FBC3, 0xCEB8FBC3, 0xCEB9FBC3, 0xCEBAFBC3, 0xCEBBFBC3, 0xCEBCFBC3, 0xCEBDFBC3, 0xCEBEFBC3, 0xCEBFFBC3, 0xCEC0FBC3, 0xCEC1FBC3, 0xCEC2FBC3, 0xCEC3FBC3, 0xCEC4FBC3, 0xCEC5FBC3, + 0xCEC6FBC3, 0xCEC7FBC3, 0xCEC8FBC3, 0xCEC9FBC3, 0xCECAFBC3, 0xCECBFBC3, 0xCECCFBC3, 0xCECDFBC3, 0xCECEFBC3, 0xCECFFBC3, 0xCED0FBC3, 0xCED1FBC3, 0xCED2FBC3, 0xCED3FBC3, 0xCED4FBC3, + 0xCED5FBC3, 0xCED6FBC3, 0xCED7FBC3, 0xCED8FBC3, 0xCED9FBC3, 0xCEDAFBC3, 0xCEDBFBC3, 0xCEDCFBC3, 0xCEDDFBC3, 0xCEDEFBC3, 0xCEDFFBC3, 0xCEE0FBC3, 0xCEE1FBC3, 0xCEE2FBC3, 0xCEE3FBC3, + 0xCEE4FBC3, 0xCEE5FBC3, 0xCEE6FBC3, 0xCEE7FBC3, 0xCEE8FBC3, 0xCEE9FBC3, 0xCEEAFBC3, 0xCEEBFBC3, 0xCEECFBC3, 0xCEEDFBC3, 0xCEEEFBC3, 0xCEEFFBC3, 0xCEF0FBC3, 0xCEF1FBC3, 0xCEF2FBC3, + 0xCEF3FBC3, 0xCEF4FBC3, 0xCEF5FBC3, 0xCEF6FBC3, 0xCEF7FBC3, 0xCEF8FBC3, 0xCEF9FBC3, 0xCEFAFBC3, 0xCEFBFBC3, 0xCEFCFBC3, 0xCEFDFBC3, 0xCEFEFBC3, 0xCEFFFBC3, 0xCF00FBC3, 0xCF01FBC3, + 0xCF02FBC3, 0xCF03FBC3, 0xCF04FBC3, 0xCF05FBC3, 0xCF06FBC3, 0xCF07FBC3, 0xCF08FBC3, 0xCF09FBC3, 0xCF0AFBC3, 0xCF0BFBC3, 0xCF0CFBC3, 0xCF0DFBC3, 0xCF0EFBC3, 0xCF0FFBC3, 0xCF10FBC3, + 0xCF11FBC3, 0xCF12FBC3, 0xCF13FBC3, 0xCF14FBC3, 0xCF15FBC3, 0xCF16FBC3, 0xCF17FBC3, 0xCF18FBC3, 0xCF19FBC3, 0xCF1AFBC3, 0xCF1BFBC3, 0xCF1CFBC3, 0xCF1DFBC3, 0xCF1EFBC3, 0xCF1FFBC3, + 0xCF20FBC3, 0xCF21FBC3, 0xCF22FBC3, 0xCF23FBC3, 0xCF24FBC3, 0xCF25FBC3, 0xCF26FBC3, 0xCF27FBC3, 0xCF28FBC3, 0xCF29FBC3, 0xCF2AFBC3, 0xCF2BFBC3, 0xCF2CFBC3, 0xCF2DFBC3, 0xCF2EFBC3, + 0xCF2FFBC3, 0xCF30FBC3, 0xCF31FBC3, 0xCF32FBC3, 0xCF33FBC3, 0xCF34FBC3, 0xCF35FBC3, 0xCF36FBC3, 0xCF37FBC3, 0xCF38FBC3, 0xCF39FBC3, 0xCF3AFBC3, 0xCF3BFBC3, 0xCF3CFBC3, 0xCF3DFBC3, + 0xCF3EFBC3, 0xCF3FFBC3, 0xCF40FBC3, 0xCF41FBC3, 0xCF42FBC3, 0xCF43FBC3, 0xCF44FBC3, 0xCF45FBC3, 0xCF46FBC3, 0xCF47FBC3, 0xCF48FBC3, 0xCF49FBC3, 0xCF4AFBC3, 0xCF4BFBC3, 0xCF4CFBC3, + 0xCF4DFBC3, 0xCF4EFBC3, 0xCF4FFBC3, 0xCF50FBC3, 0xCF51FBC3, 0xCF52FBC3, 0xCF53FBC3, 0xCF54FBC3, 0xCF55FBC3, 0xCF56FBC3, 0xCF57FBC3, 0xCF58FBC3, 0xCF59FBC3, 0xCF5AFBC3, 0xCF5BFBC3, + 0xCF5CFBC3, 0xCF5DFBC3, 0xCF5EFBC3, 0xCF5FFBC3, 0xCF60FBC3, 0xCF61FBC3, 0xCF62FBC3, 0xCF63FBC3, 0xCF64FBC3, 0xCF65FBC3, 0xCF66FBC3, 0xCF67FBC3, 0xCF68FBC3, 0xCF69FBC3, 0xCF6AFBC3, + 0xCF6BFBC3, 0xCF6CFBC3, 0xCF6DFBC3, 0xCF6EFBC3, 0xCF6FFBC3, 0xCF70FBC3, 0xCF71FBC3, 0xCF72FBC3, 0xCF73FBC3, 0xCF74FBC3, 0xCF75FBC3, 0xCF76FBC3, 0xCF77FBC3, 0xCF78FBC3, 0xCF79FBC3, + 0xCF7AFBC3, 0xCF7BFBC3, 0xCF7CFBC3, 0xCF7DFBC3, 0xCF7EFBC3, 0xCF7FFBC3, 0xCF80FBC3, 0xCF81FBC3, 0xCF82FBC3, 0xCF83FBC3, 0xCF84FBC3, 0xCF85FBC3, 0xCF86FBC3, 0xCF87FBC3, 0xCF88FBC3, + 0xCF89FBC3, 0xCF8AFBC3, 0xCF8BFBC3, 0xCF8CFBC3, 0xCF8DFBC3, 0xCF8EFBC3, 0xCF8FFBC3, 0xCF90FBC3, 0xCF91FBC3, 0xCF92FBC3, 0xCF93FBC3, 0xCF94FBC3, 0xCF95FBC3, 0xCF96FBC3, 0xCF97FBC3, + 0xCF98FBC3, 0xCF99FBC3, 0xCF9AFBC3, 0xCF9BFBC3, 0xCF9CFBC3, 0xCF9DFBC3, 0xCF9EFBC3, 0xCF9FFBC3, 0xCFA0FBC3, 0xCFA1FBC3, 0xCFA2FBC3, 0xCFA3FBC3, 0xCFA4FBC3, 0xCFA5FBC3, 0xCFA6FBC3, + 0xCFA7FBC3, 0xCFA8FBC3, 0xCFA9FBC3, 0xCFAAFBC3, 0xCFABFBC3, 0xCFACFBC3, 0xCFADFBC3, 0xCFAEFBC3, 0xCFAFFBC3, 0xCFB0FBC3, 0xCFB1FBC3, 0xCFB2FBC3, 0xCFB3FBC3, 0xCFB4FBC3, 0xCFB5FBC3, + 0xCFB6FBC3, 0xCFB7FBC3, 0xCFB8FBC3, 0xCFB9FBC3, 0xCFBAFBC3, 0xCFBBFBC3, 0xCFBCFBC3, 0xCFBDFBC3, 0xCFBEFBC3, 0xCFBFFBC3, 0xCFC0FBC3, 0xCFC1FBC3, 0xCFC2FBC3, 0xCFC3FBC3, 0xCFC4FBC3, + 0xCFC5FBC3, 0xCFC6FBC3, 0xCFC7FBC3, 0xCFC8FBC3, 0xCFC9FBC3, 0xCFCAFBC3, 0xCFCBFBC3, 0xCFCCFBC3, 0xCFCDFBC3, 0xCFCEFBC3, 0xCFCFFBC3, 0xCFD0FBC3, 0xCFD1FBC3, 0xCFD2FBC3, 0xCFD3FBC3, + 0xCFD4FBC3, 0xCFD5FBC3, 0xCFD6FBC3, 0xCFD7FBC3, 0xCFD8FBC3, 0xCFD9FBC3, 0xCFDAFBC3, 0xCFDBFBC3, 0xCFDCFBC3, 0xCFDDFBC3, 0xCFDEFBC3, 0xCFDFFBC3, 0xCFE0FBC3, 0xCFE1FBC3, 0xCFE2FBC3, + 0xCFE3FBC3, 0xCFE4FBC3, 0xCFE5FBC3, 0xCFE6FBC3, 0xCFE7FBC3, 0xCFE8FBC3, 0xCFE9FBC3, 0xCFEAFBC3, 0xCFEBFBC3, 0xCFECFBC3, 0xCFEDFBC3, 0xCFEEFBC3, 0xCFEFFBC3, 0xCFF0FBC3, 0xCFF1FBC3, + 0xCFF2FBC3, 0xCFF3FBC3, 0xCFF4FBC3, 0xCFF5FBC3, 0xCFF6FBC3, 0xCFF7FBC3, 0xCFF8FBC3, 0xCFF9FBC3, 0xCFFAFBC3, 0xCFFBFBC3, 0xCFFCFBC3, 0xCFFDFBC3, 0xCFFEFBC3, 0xCFFFFBC3, 0xFD6, + 0xFD7, 0xFD8, 0xFD9, 0xFDA, 0xFDB, 0xFDC, 0xFDD, 0xFDE, 0xFDF, 0xFE0, 0xFE1, 0xFE2, 0xFE3, 0xFE4, 0xFE5, + 0xFE6, 0xFE7, 0xFE8, 0xFE9, 0xFEA, 0xFEB, 0xFEC, 0xFED, 0xFEE, 0xFEF, 0xFF0, 0xFF1, 0xFF2, 0xFF3, 0xFF4, + 0xFF5, 0xFF6, 0xFF7, 0xFF8, 0xFF9, 0xFFA, 0xFFB, 0xFFC, 0xFFD, 0xFFE, 0xFFF, 0x1000, 0x1001, 0x1002, 0x1003, + 0x1004, 0x1005, 0x1006, 0x1007, 0x1008, 0x1009, 0x100A, 0x100B, 0x100C, 0x100D, 0x100E, 0x100F, 0x1010, 0x1011, 0x1012, + 0x1013, 0x1014, 0x1015, 0x1016, 0x1017, 0x1018, 0x1019, 0x101A, 0x101B, 0x101C, 0x101D, 0x101E, 0x101F, 0x1020, 0x1021, + 0x1022, 0x1023, 0x1024, 0x1025, 0x1026, 0x1027, 0x1028, 0x1029, 0x102A, 0x102B, 0x102C, 0x102D, 0x102E, 0x102F, 0x1030, + 0x1031, 0x1032, 0x1033, 0x1034, 0x1035, 0x1036, 0x1037, 0x1038, 0x1039, 0x103A, 0x103B, 0x103C, 0x103D, 0x103E, 0x103F, + 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049, 0x104A, 0x104B, 0x104C, 0x104D, 0x104E, + 0x104F, 0x1050, 0x1051, 0x1052, 0x1053, 0x1054, 0x1055, 0x1056, 0x1057, 0x1058, 0x1059, 0x105A, 0x105B, 0x105C, 0x105D, + 0x105E, 0x105F, 0x1060, 0x1061, 0x1062, 0x1063, 0x1064, 0x1065, 0x1066, 0x1067, 0x1068, 0x1069, 0x106A, 0x106B, 0x106C, + 0x106D, 0x106E, 0x106F, 0x1070, 0x1071, 0x1072, 0x1073, 0x1074, 0x1075, 0x1076, 0x1077, 0x1078, 0x1079, 0x107A, 0x107B, + 0x107C, 0x107D, 0x107E, 0x107F, 0x1080, 0x1081, 0x1082, 0x1083, 0x1084, 0x1085, 0x1086, 0x1087, 0x1088, 0x1089, 0x108A, + 0x108B, 0x108C, 0x108D, 0x108E, 0x108F, 0x1090, 0x1091, 0x1092, 0x1093, 0x1094, 0x1095, 0x1096, 0x1097, 0x1098, 0x1099, + 0x109A, 0x109B, 0x109C, 0x109D, 0x109E, 0x109F, 0x10A0, 0x10A1, 0x10A2, 0x10A3, 0x10A4, 0x10A5, 0x10A6, 0x10A7, 0x10A8, + 0x10A9, 0x10AA, 0x10AB, 0x10AC, 0x10AD, 0x10AE, 0x10AF, 0x10B0, 0x10B1, 0x10B2, 0x10B3, 0x10B4, 0x10B5, 0x10B6, 0x10B7, + 0x10B8, 0x10B9, 0x10BA, 0x10BB, 0x10BC, 0x10BD, 0x10BE, 0x10BF, 0x10C0, 0x10C1, 0x10C2, 0x10C3, 0x10C4, 0x10C5, 0x10C6, + 0x10C7, 0x10C8, 0x10C9, 0x10CA, 0x10CB, 0xD0F6FBC3, 0xD0F7FBC3, 0xD0F8FBC3, 0xD0F9FBC3, 0xD0FAFBC3, 0xD0FBFBC3, 0xD0FCFBC3, 0xD0FDFBC3, 0xD0FEFBC3, 0xD0FFFBC3, + 0x10CC, 0x10CD, 0x10CE, 0x10CF, 0x10D0, 0x10D1, 0x10D2, 0x10D3, 0x10D4, 0x10D5, 0x10D6, 0x10D7, 0x10D8, 0x10D9, 0x10DA, + 0x10DB, 0x10DC, 0x10DD, 0x10DE, 0x10DF, 0x10E0, 0x10E1, 0x10E2, 0x10E3, 0x10E4, 0x10E5, 0x10E6, 0x10E7, 0x10E8, 0x10E9, + 0x10EA, 0x10EB, 0x10EC, 0x10ED, 0x10EE, 0x10EF, 0x10F0, 0x10F1, 0x10F2, 0xD127FBC3, 0xD128FBC3, 0x1106, 0x10F6, 0x10F7, 0x10F8, + 0x10F9, 0x10FA, 0x10FB, 0x10FC, 0x10FD, 0x10FE, 0x10FF, 0x1100, 0x1101, 0x1102, 0x1103, 0x1104, 0x1105, 0x1107, 0x1108, + 0x1109, 0x110A, 0x110B, 0x110C, 0x110D, 0x110E, 0x110F, 0x1110, 0x1111, 0x1112, 0x1113, 0x1114, 0x1115, 0x1116, 0x1117, + 0x1118, 0x1119, 0x111A, 0x111B, 0x111C, 0x111D, 0x111E, 0x111F, 0x1120, 0x1121, 0x1122, 0x1123, 0x1124, 0x1125, 0x1126, + 0x1127, 0x1128, 0x1129, 0x112A, 0x1124, 0x1125, 0x1125, 0x1125, 0x1125, 0x1125, 0x1125, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x112B, 0x112C, 0x112D, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x112E, 0x112F, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x1130, 0x1131, 0x1132, 0x1133, 0x1134, 0x1135, 0x1136, 0x1137, 0x1138, 0x1139, + 0x113A, 0x113B, 0x113C, 0x113D, 0x113E, 0x113F, 0x1140, 0x1141, 0x1142, 0x1143, 0x1144, 0x1145, 0x1146, 0x1147, 0x1148, + 0x1149, 0x114A, 0x114B, 0x114C, 0x114D, 0x0, 0x0, 0x0, 0x0, 0x114E, 0x114F, 0x1150, 0x1151, 0x1152, 0x1153, + 0x1154, 0x1155, 0x1156, 0x1157, 0x1158, 0x1159, 0x115A, 0x1159, 0x115A, 0x1159, 0x115A, 0x1159, 0x115A, 0x115B, 0x115C, + 0x115D, 0x115E, 0x115F, 0x1160, 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168, 0x1169, 0x116A, 0x116B, + 0x116C, 0x116D, 0x116E, 0x116F, 0x1170, 0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0x1176, 0x1177, 0x1178, 0x1179, 0x117A, + 0x117B, 0x117C, 0x117D, 0x117E, 0x117F, 0x1180, 0x1181, 0x1182, 0xD1E9FBC3, 0xD1EAFBC3, 0xD1EBFBC3, 0xD1ECFBC3, 0xD1EDFBC3, 0xD1EEFBC3, 0xD1EFFBC3, + 0xD1F0FBC3, 0xD1F1FBC3, 0xD1F2FBC3, 0xD1F3FBC3, 0xD1F4FBC3, 0xD1F5FBC3, 0xD1F6FBC3, 0xD1F7FBC3, 0xD1F8FBC3, 0xD1F9FBC3, 0xD1FAFBC3, 0xD1FBFBC3, 0xD1FCFBC3, 0xD1FDFBC3, 0xD1FEFBC3, + 0xD1FFFBC3, 0x1183, 0x1184, 0x1185, 0x1186, 0x1187, 0x1188, 0x1189, 0x118A, 0x118B, 0x118C, 0x118D, 0x118E, 0x118F, 0x1190, + 0x1191, 0x1192, 0x1193, 0x1194, 0x1195, 0x1196, 0x1197, 0x1198, 0x1199, 0x119A, 0x119B, 0x119C, 0x119D, 0x119E, 0x119F, + 0x11A0, 0x11A1, 0x11A2, 0x11A3, 0x11A4, 0x11A5, 0x11A6, 0x11A7, 0x11A8, 0x11A9, 0x11AA, 0x11AB, 0x11AC, 0x11AD, 0x11AE, + 0x11AF, 0x11B0, 0x11B1, 0x11B2, 0x11B3, 0x11B4, 0x11B5, 0x11B6, 0x11B7, 0x11B8, 0x11B9, 0x11BA, 0x11BB, 0x11BC, 0x11BD, + 0x11BE, 0x11BF, 0x11C0, 0x11C1, 0x11C2, 0x11C3, 0x11C4, 0x0, 0x0, 0x0, 0x11C5, 0xD246FBC3, 0xD247FBC3, 0xD248FBC3, 0xD249FBC3, + 0xD24AFBC3, 0xD24BFBC3, 0xD24CFBC3, 0xD24DFBC3, 0xD24EFBC3, 0xD24FFBC3, 0xD250FBC3, 0xD251FBC3, 0xD252FBC3, 0xD253FBC3, 0xD254FBC3, 0xD255FBC3, 0xD256FBC3, 0xD257FBC3, 0xD258FBC3, + 0xD259FBC3, 0xD25AFBC3, 0xD25BFBC3, 0xD25CFBC3, 0xD25DFBC3, 0xD25EFBC3, 0xD25FFBC3, 0xD260FBC3, 0xD261FBC3, 0xD262FBC3, 0xD263FBC3, 0xD264FBC3, 0xD265FBC3, 0xD266FBC3, 0xD267FBC3, + 0xD268FBC3, 0xD269FBC3, 0xD26AFBC3, 0xD26BFBC3, 0xD26CFBC3, 0xD26DFBC3, 0xD26EFBC3, 0xD26FFBC3, 0xD270FBC3, 0xD271FBC3, 0xD272FBC3, 0xD273FBC3, 0xD274FBC3, 0xD275FBC3, 0xD276FBC3, + 0xD277FBC3, 0xD278FBC3, 0xD279FBC3, 0xD27AFBC3, 0xD27BFBC3, 0xD27CFBC3, 0xD27DFBC3, 0xD27EFBC3, 0xD27FFBC3, 0xD280FBC3, 0xD281FBC3, 0xD282FBC3, 0xD283FBC3, 0xD284FBC3, 0xD285FBC3, + 0xD286FBC3, 0xD287FBC3, 0xD288FBC3, 0xD289FBC3, 0xD28AFBC3, 0xD28BFBC3, 0xD28CFBC3, 0xD28DFBC3, 0xD28EFBC3, 0xD28FFBC3, 0xD290FBC3, 0xD291FBC3, 0xD292FBC3, 0xD293FBC3, 0xD294FBC3, + 0xD295FBC3, 0xD296FBC3, 0xD297FBC3, 0xD298FBC3, 0xD299FBC3, 0xD29AFBC3, 0xD29BFBC3, 0xD29CFBC3, 0xD29DFBC3, 0xD29EFBC3, 0xD29FFBC3, 0xD2A0FBC3, 0xD2A1FBC3, 0xD2A2FBC3, 0xD2A3FBC3, + 0xD2A4FBC3, 0xD2A5FBC3, 0xD2A6FBC3, 0xD2A7FBC3, 0xD2A8FBC3, 0xD2A9FBC3, 0xD2AAFBC3, 0xD2ABFBC3, 0xD2ACFBC3, 0xD2ADFBC3, 0xD2AEFBC3, 0xD2AFFBC3, 0xD2B0FBC3, 0xD2B1FBC3, 0xD2B2FBC3, + 0xD2B3FBC3, 0xD2B4FBC3, 0xD2B5FBC3, 0xD2B6FBC3, 0xD2B7FBC3, 0xD2B8FBC3, 0xD2B9FBC3, 0xD2BAFBC3, 0xD2BBFBC3, 0xD2BCFBC3, 0xD2BDFBC3, 0xD2BEFBC3, 0xD2BFFBC3, 0xD2C0FBC3, 0xD2C1FBC3, + 0xD2C2FBC3, 0xD2C3FBC3, 0xD2C4FBC3, 0xD2C5FBC3, 0xD2C6FBC3, 0xD2C7FBC3, 0xD2C8FBC3, 0xD2C9FBC3, 0xD2CAFBC3, 0xD2CBFBC3, 0xD2CCFBC3, 0xD2CDFBC3, 0xD2CEFBC3, 0xD2CFFBC3, 0xD2D0FBC3, + 0xD2D1FBC3, 0xD2D2FBC3, 0xD2D3FBC3, 0xD2D4FBC3, 0xD2D5FBC3, 0xD2D6FBC3, 0xD2D7FBC3, 0xD2D8FBC3, 0xD2D9FBC3, 0xD2DAFBC3, 0xD2DBFBC3, 0xD2DCFBC3, 0xD2DDFBC3, 0xD2DEFBC3, 0xD2DFFBC3, + 0xD2E0FBC3, 0xD2E1FBC3, 0xD2E2FBC3, 0xD2E3FBC3, 0xD2E4FBC3, 0xD2E5FBC3, 0xD2E6FBC3, 0xD2E7FBC3, 0xD2E8FBC3, 0xD2E9FBC3, 0xD2EAFBC3, 0xD2EBFBC3, 0xD2ECFBC3, 0xD2EDFBC3, 0xD2EEFBC3, + 0xD2EFFBC3, 0xD2F0FBC3, 0xD2F1FBC3, 0xD2F2FBC3, 0xD2F3FBC3, 0xD2F4FBC3, 0xD2F5FBC3, 0xD2F6FBC3, 0xD2F7FBC3, 0xD2F8FBC3, 0xD2F9FBC3, 0xD2FAFBC3, 0xD2FBFBC3, 0xD2FCFBC3, 0xD2FDFBC3, + 0xD2FEFBC3, 0xD2FFFBC3, 0xEEA, 0xEEB, 0xEEC, 0xEED, 0xEEE, 0xEEF, 0xEF0, 0xEF1, 0xEF2, 0xEF3, 0xEF4, 0xEF5, 0xEF6, + 0xEF7, 0xEF8, 0xEF9, 0xEFA, 0xEFB, 0xEFC, 0xEFD, 0xEFE, 0xEFF, 0xF00, 0xF01, 0xF02, 0xF03, 0xF04, 0xF05, + 0xF06, 0xF07, 0xF08, 0xF09, 0xF0A, 0xF0B, 0xF0C, 0xF0D, 0xF0E, 0xF0F, 0xF10, 0xF11, 0xF12, 0xF13, 0xF14, + 0xF15, 0xF16, 0xF17, 0xF18, 0xF19, 0xF1A, 0xF1B, 0xF1C, 0xF1D, 0xF1E, 0xF1F, 0xF20, 0xF21, 0xF22, 0xF23, + 0xF24, 0xF25, 0xF26, 0xF27, 0xF28, 0xF29, 0xF2A, 0xF2B, 0xF2C, 0xF2D, 0xF2E, 0xF2F, 0xF30, 0xF31, 0xF32, + 0xF33, 0xF34, 0xF35, 0xF36, 0xF37, 0xF38, 0xF39, 0xF3A, 0xF3B, 0xF3C, 0xF3D, 0xF3E, 0xF3F, 0xF40, 0xD357FBC3, + 0xD358FBC3, 0xD359FBC3, 0xD35AFBC3, 0xD35BFBC3, 0xD35CFBC3, 0xD35DFBC3, 0xD35EFBC3, 0xD35FFBC3, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, + 0x1C45, 0x1C46, 0x1BEF, 0x1BF0, 0x1BF1, 0x1BF2, 0x1BF3, 0x1BF4, 0x1BF5, 0x1BF6, 0x1BF7, 0xD372FBC3, 0xD373FBC3, 0xD374FBC3, 0xD375FBC3, + 0xD376FBC3, 0xD377FBC3, 0xD378FBC3, 0xD379FBC3, 0xD37AFBC3, 0xD37BFBC3, 0xD37CFBC3, 0xD37DFBC3, 0xD37EFBC3, 0xD37FFBC3, 0xD380FBC3, 0xD381FBC3, 0xD382FBC3, 0xD383FBC3, 0xD384FBC3, + 0xD385FBC3, 0xD386FBC3, 0xD387FBC3, 0xD388FBC3, 0xD389FBC3, 0xD38AFBC3, 0xD38BFBC3, 0xD38CFBC3, 0xD38DFBC3, 0xD38EFBC3, 0xD38FFBC3, 0xD390FBC3, 0xD391FBC3, 0xD392FBC3, 0xD393FBC3, + 0xD394FBC3, 0xD395FBC3, 0xD396FBC3, 0xD397FBC3, 0xD398FBC3, 0xD399FBC3, 0xD39AFBC3, 0xD39BFBC3, 0xD39CFBC3, 0xD39DFBC3, 0xD39EFBC3, 0xD39FFBC3, 0xD3A0FBC3, 0xD3A1FBC3, 0xD3A2FBC3, + 0xD3A3FBC3, 0xD3A4FBC3, 0xD3A5FBC3, 0xD3A6FBC3, 0xD3A7FBC3, 0xD3A8FBC3, 0xD3A9FBC3, 0xD3AAFBC3, 0xD3ABFBC3, 0xD3ACFBC3, 0xD3ADFBC3, 0xD3AEFBC3, 0xD3AFFBC3, 0xD3B0FBC3, 0xD3B1FBC3, + 0xD3B2FBC3, 0xD3B3FBC3, 0xD3B4FBC3, 0xD3B5FBC3, 0xD3B6FBC3, 0xD3B7FBC3, 0xD3B8FBC3, 0xD3B9FBC3, 0xD3BAFBC3, 0xD3BBFBC3, 0xD3BCFBC3, 0xD3BDFBC3, 0xD3BEFBC3, 0xD3BFFBC3, 0xD3C0FBC3, + 0xD3C1FBC3, 0xD3C2FBC3, 0xD3C3FBC3, 0xD3C4FBC3, 0xD3C5FBC3, 0xD3C6FBC3, 0xD3C7FBC3, 0xD3C8FBC3, 0xD3C9FBC3, 0xD3CAFBC3, 0xD3CBFBC3, 0xD3CCFBC3, 0xD3CDFBC3, 0xD3CEFBC3, 0xD3CFFBC3, + 0xD3D0FBC3, 0xD3D1FBC3, 0xD3D2FBC3, 0xD3D3FBC3, 0xD3D4FBC3, 0xD3D5FBC3, 0xD3D6FBC3, 0xD3D7FBC3, 0xD3D8FBC3, 0xD3D9FBC3, 0xD3DAFBC3, 0xD3DBFBC3, 0xD3DCFBC3, 0xD3DDFBC3, 0xD3DEFBC3, + 0xD3DFFBC3, 0xD3E0FBC3, 0xD3E1FBC3, 0xD3E2FBC3, 0xD3E3FBC3, 0xD3E4FBC3, 0xD3E5FBC3, 0xD3E6FBC3, 0xD3E7FBC3, 0xD3E8FBC3, 0xD3E9FBC3, 0xD3EAFBC3, 0xD3EBFBC3, 0xD3ECFBC3, 0xD3EDFBC3, + 0xD3EEFBC3, 0xD3EFFBC3, 0xD3F0FBC3, 0xD3F1FBC3, 0xD3F2FBC3, 0xD3F3FBC3, 0xD3F4FBC3, 0xD3F5FBC3, 0xD3F6FBC3, 0xD3F7FBC3, 0xD3F8FBC3, 0xD3F9FBC3, 0xD3FAFBC3, 0xD3FBFBC3, 0xD3FCFBC3, + 0xD3FDFBC3, 0xD3FEFBC3, 0xD3FFFBC3, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, + 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, + 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, + 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, + 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, + 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0xD455FBC3, 0x1D32, + 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, + 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, + 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, + 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, + 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0xD49DFBC3, 0x1C7A, 0x1C8F, 0xD4A0FBC3, 0xD4A1FBC3, + 0x1CF4, 0xD4A3FBC3, 0xD4A4FBC3, 0x1D4C, 0x1D65, 0xD4A7FBC3, 0xD4A8FBC3, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0xD4ADFBC3, 0x1E71, 0x1E95, 0x1EB5, + 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0xD4BAFBC3, 0x1CE5, 0xD4BCFBC3, 0x1D18, 0x1D32, 0x1D4C, + 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0xD4C4FBC3, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, + 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, + 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, + 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, + 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0xD506FBC3, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, + 0xD50BFBC3, 0xD50CFBC3, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0xD515FBC3, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, + 0x1EF5, 0x1EFF, 0x1F0B, 0xD51DFBC3, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, + 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, + 0x1C47, 0x1C60, 0xD53AFBC3, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0xD53FFBC3, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0xD545FBC3, 0x1DDD, + 0xD547FBC3, 0xD548FBC3, 0xD549FBC3, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0xD551FBC3, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, + 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, + 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, + 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, + 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, + 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, + 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, + 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, + 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, + 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, + 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, + 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, + 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, + 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, + 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, + 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, + 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, + 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, + 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, + 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, + 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, + 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, + 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, + 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1D36, 0x1D50, 0xD6A6FBC3, 0xD6A7FBC3, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, + 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FC5, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, + 0x1FDE, 0x1FDF, 0x1FE1, 0x60C, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, + 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FD7, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x608, + 0x1FBE, 0x1FC5, 0x1FC8, 0x1FDD, 0x1FD4, 0x1FCF, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, + 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FC5, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, + 0x1FE1, 0x60C, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, + 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FD7, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x608, 0x1FBE, 0x1FC5, + 0x1FC8, 0x1FDD, 0x1FD4, 0x1FCF, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, + 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FC5, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x60C, + 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, + 0x1FCF, 0x1FD4, 0x1FD7, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x608, 0x1FBE, 0x1FC5, 0x1FC8, 0x1FDD, + 0x1FD4, 0x1FCF, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, + 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FC5, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x60C, 0x1FB9, 0x1FBA, + 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, + 0x1FD7, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x608, 0x1FBE, 0x1FC5, 0x1FC8, 0x1FDD, 0x1FD4, 0x1FCF, + 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, + 0x1FCF, 0x1FD4, 0x1FC5, 0x1FD7, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x60C, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBD, + 0x1FBE, 0x1FC2, 0x1FC4, 0x1FC5, 0x1FC6, 0x1FC8, 0x1FC9, 0x1FCB, 0x1FCC, 0x1FCD, 0x1FCE, 0x1FCF, 0x1FD4, 0x1FD7, 0x1FD7, + 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF, 0x1FE1, 0x608, 0x1FBE, 0x1FC5, 0x1FC8, 0x1FDD, 0x1FD4, 0x1FCF, 0x1FBF, 0x1FBF, + 0xD7CCFBC3, 0xD7CDFBC3, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, + 0x1C45, 0x1C46, 0x1C3D, 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x1C3D, 0x1C3E, 0x1C3F, + 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x183F, 0x1840, 0x1841, 0x1842, 0x1843, 0x1844, 0x1845, 0x1846, + 0x1847, 0x1848, 0x1849, 0x184A, 0x184B, 0x184C, 0x184D, 0x184E, 0x184F, 0x1850, 0x1851, 0x1852, 0x1853, 0x1854, 0x1855, + 0x1856, 0x1857, 0x1858, 0x1859, 0x185A, 0x185B, 0x185C, 0x185D, 0x185E, 0x185F, 0x1860, 0x1861, 0x1862, 0x1863, 0x1864, + 0x1865, 0x1866, 0x1867, 0x1868, 0x1869, 0x186A, 0x186B, 0x186C, 0x186D, 0x186E, 0x186F, 0x1870, 0x1871, 0x1872, 0x1873, + 0x1874, 0x1875, 0x1876, 0x1877, 0x1878, 0x1879, 0x187A, 0x187B, 0x187C, 0x187D, 0x187E, 0x187F, 0x1880, 0x1881, 0x1882, + 0x1883, 0x1884, 0x1885, 0x1886, 0x1887, 0x1888, 0x1889, 0x188A, 0x188B, 0x188C, 0x188D, 0x188E, 0x188F, 0x1890, 0x1891, + 0x1892, 0x1893, 0x1894, 0x1895, 0x1896, 0x1897, 0x1898, 0x1899, 0x189A, 0x189B, 0x189C, 0x189D, 0x189E, 0x189F, 0x18A0, + 0x18A1, 0x18A2, 0x18A3, 0x18A4, 0x18A5, 0x18A6, 0x18A7, 0x18A8, 0x18A9, 0x18AA, 0x18AB, 0x18AC, 0x18AD, 0x18AE, 0x18AF, + 0x18B0, 0x18B1, 0x18B2, 0x18B3, 0x18B4, 0x18B5, 0x18B6, 0x18B7, 0x18B8, 0x18B9, 0x18BA, 0x18BB, 0x18BC, 0x18BD, 0x18BE, + 0x18BF, 0x18C0, 0x18C1, 0x18C2, 0x18C3, 0x18C4, 0x18C5, 0x18C6, 0x18C7, 0x18C8, 0x18C9, 0x18CA, 0x18CB, 0x18CC, 0x18CD, + 0x18CE, 0x18CF, 0x18D0, 0x18D1, 0x18D2, 0x18D3, 0x18D4, 0x18D5, 0x18D6, 0x18D7, 0x18D8, 0x18D9, 0x18DA, 0x18DB, 0x18DC, + 0x18DD, 0x18DE, 0x18DF, 0x18E0, 0x18E1, 0x18E2, 0x18E3, 0x18E4, 0x18E5, 0x18E6, 0x18E7, 0x18E8, 0x18E9, 0x18EA, 0x18EB, + 0x18EC, 0x18ED, 0x18EE, 0x18EF, 0x18F0, 0x18F1, 0x18F2, 0x18F3, 0x18F4, 0x18F5, 0x18F6, 0x18F7, 0x18F8, 0x18F9, 0x18FA, + 0x18FB, 0x18FC, 0x18FD, 0x18FE, 0x18FF, 0x1900, 0x1901, 0x1902, 0x1903, 0x1904, 0x1905, 0x1906, 0x1907, 0x1908, 0x1909, + 0x190A, 0x190B, 0x190C, 0x190D, 0x190E, 0x190F, 0x1910, 0x1911, 0x1912, 0x1913, 0x1914, 0x1915, 0x1916, 0x1917, 0x1918, + 0x1919, 0x191A, 0x191B, 0x191C, 0x191D, 0x191E, 0x191F, 0x1920, 0x1921, 0x1922, 0x1923, 0x1924, 0x1925, 0x1926, 0x1927, + 0x1928, 0x1929, 0x192A, 0x192B, 0x192C, 0x192D, 0x192E, 0x192F, 0x1930, 0x1931, 0x1932, 0x1933, 0x1934, 0x1935, 0x1936, + 0x1937, 0x1938, 0x1939, 0x193A, 0x193B, 0x193C, 0x193D, 0x193E, 0x193F, 0x1940, 0x1941, 0x1942, 0x1943, 0x1944, 0x1945, + 0x1946, 0x1947, 0x1948, 0x1949, 0x194A, 0x194B, 0x194C, 0x194D, 0x194E, 0x194F, 0x1950, 0x1951, 0x1952, 0x1953, 0x1954, + 0x1955, 0x1956, 0x1957, 0x1958, 0x1959, 0x195A, 0x195B, 0x195C, 0x195D, 0x195E, 0x195F, 0x1960, 0x1961, 0x1962, 0x1963, + 0x1964, 0x1965, 0x1966, 0x1967, 0x1968, 0x1969, 0x196A, 0x196B, 0x196C, 0x196D, 0x196E, 0x196F, 0x1970, 0x1971, 0x1972, + 0x1973, 0x1974, 0x1975, 0x1976, 0x1977, 0x1978, 0x1979, 0x197A, 0x197B, 0x197C, 0x197D, 0x197E, 0x197F, 0x1980, 0x1981, + 0x1982, 0x1983, 0x1984, 0x1985, 0x1986, 0x1987, 0x1988, 0x1989, 0x198A, 0x198B, 0x198C, 0x198D, 0x198E, 0x198F, 0x1990, + 0x1991, 0x1992, 0x1993, 0x1994, 0x1995, 0x1996, 0x1997, 0x1998, 0x1999, 0x199A, 0x199B, 0x199C, 0x199D, 0x199E, 0x199F, + 0x19A0, 0x19A1, 0x19A2, 0x19A3, 0x19A4, 0x19A5, 0x19A6, 0x19A7, 0x19A8, 0x19A9, 0x19AA, 0x19AB, 0x19AC, 0x19AD, 0x19AE, + 0x19AF, 0x19B0, 0x19B1, 0x19B2, 0x19B3, 0x19B4, 0x19B5, 0x19B6, 0x19B7, 0x19B8, 0x19B9, 0x19BA, 0x19BB, 0x19BC, 0x19BD, + 0x19BE, 0x19BF, 0x19C0, 0x19C1, 0x19C2, 0x19C3, 0x19C4, 0x19C5, 0x19C6, 0x19C7, 0x19C8, 0x19C9, 0x19CA, 0x19CB, 0x19CC, + 0x19CD, 0x19CE, 0x19CF, 0x19D0, 0x19D1, 0x19D2, 0x19D3, 0x19D4, 0x19D5, 0x19D6, 0x19D7, 0x19D8, 0x19D9, 0x19DA, 0x19DB, + 0x19DC, 0x19DD, 0x19DE, 0x19DF, 0x19E0, 0x19E1, 0x19E2, 0x19E3, 0x19E4, 0x19E5, 0x19E6, 0x19E7, 0x19E8, 0x19E9, 0x19EA, + 0x19EB, 0x19EC, 0x19ED, 0x19EE, 0x19EF, 0x19F0, 0x19F1, 0x19F2, 0x19F3, 0x19F4, 0x19F5, 0x19F6, 0x19F7, 0x19F8, 0x19F9, + 0x19FA, 0x19FB, 0x19FC, 0x19FD, 0x19FE, 0x19FF, 0x1A00, 0x1A01, 0x1A02, 0x1A03, 0x1A04, 0x1A05, 0x1A06, 0x1A07, 0x1A08, + 0x1A09, 0x1A0A, 0x1A0B, 0x1A0C, 0x1A0D, 0x1A0E, 0x1A0F, 0x1A10, 0x1A11, 0x1A12, 0x1A13, 0x1A14, 0x1A15, 0x1A16, 0x1A17, + 0x1A18, 0x1A19, 0x1A1A, 0x1A1B, 0x1A1C, 0x1A1D, 0x1A1E, 0x1A1F, 0x1A20, 0x1A21, 0x1A22, 0x1A23, 0x1A24, 0x1A25, 0x1A26, + 0x1A27, 0x1A28, 0x1A29, 0x1A2A, 0x1A2B, 0x1A2C, 0x1A2D, 0x1A2E, 0x1A2F, 0x1A30, 0x1A31, 0x1A32, 0x1A33, 0x1A34, 0x1A35, + 0x1A36, 0x1A37, 0x1A38, 0x1A39, 0x1A3A, 0x1A3B, 0x1A3C, 0x1A3D, 0x1A3E, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x1A3F, 0x1A40, 0x1A41, 0x1A42, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1A43, 0x1A44, + 0x1A45, 0x1A46, 0x1A47, 0x1A48, 0x1A49, 0x1A4A, 0x0, 0x1A4B, 0x1A4C, 0x1A4D, 0x1A4E, 0x1A4F, 0x1A50, 0x1A51, 0x1A52, + 0x1A53, 0x1A54, 0x1A55, 0x1A56, 0x1A57, 0x1A58, 0x0, 0x1A59, 0x1A5A, 0x47D, 0x47E, 0x47F, 0x480, 0x481, 0xDA8CFBC3, + 0xDA8DFBC3, 0xDA8EFBC3, 0xDA8FFBC3, 0xDA90FBC3, 0xDA91FBC3, 0xDA92FBC3, 0xDA93FBC3, 0xDA94FBC3, 0xDA95FBC3, 0xDA96FBC3, 0xDA97FBC3, 0xDA98FBC3, 0xDA99FBC3, 0xDA9AFBC3, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xDAA0FBC3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0xDAB0FBC3, 0xDAB1FBC3, 0xDAB2FBC3, 0xDAB3FBC3, 0xDAB4FBC3, 0xDAB5FBC3, 0xDAB6FBC3, 0xDAB7FBC3, 0xDAB8FBC3, 0xDAB9FBC3, + 0xDABAFBC3, 0xDABBFBC3, 0xDABCFBC3, 0xDABDFBC3, 0xDABEFBC3, 0xDABFFBC3, 0xDAC0FBC3, 0xDAC1FBC3, 0xDAC2FBC3, 0xDAC3FBC3, 0xDAC4FBC3, 0xDAC5FBC3, 0xDAC6FBC3, 0xDAC7FBC3, 0xDAC8FBC3, + 0xDAC9FBC3, 0xDACAFBC3, 0xDACBFBC3, 0xDACCFBC3, 0xDACDFBC3, 0xDACEFBC3, 0xDACFFBC3, 0xDAD0FBC3, 0xDAD1FBC3, 0xDAD2FBC3, 0xDAD3FBC3, 0xDAD4FBC3, 0xDAD5FBC3, 0xDAD6FBC3, 0xDAD7FBC3, + 0xDAD8FBC3, 0xDAD9FBC3, 0xDADAFBC3, 0xDADBFBC3, 0xDADCFBC3, 0xDADDFBC3, 0xDADEFBC3, 0xDADFFBC3, 0xDAE0FBC3, 0xDAE1FBC3, 0xDAE2FBC3, 0xDAE3FBC3, 0xDAE4FBC3, 0xDAE5FBC3, 0xDAE6FBC3, + 0xDAE7FBC3, 0xDAE8FBC3, 0xDAE9FBC3, 0xDAEAFBC3, 0xDAEBFBC3, 0xDAECFBC3, 0xDAEDFBC3, 0xDAEEFBC3, 0xDAEFFBC3, 0xDAF0FBC3, 0xDAF1FBC3, 0xDAF2FBC3, 0xDAF3FBC3, 0xDAF4FBC3, 0xDAF5FBC3, + 0xDAF6FBC3, 0xDAF7FBC3, 0xDAF8FBC3, 0xDAF9FBC3, 0xDAFAFBC3, 0xDAFBFBC3, 0xDAFCFBC3, 0xDAFDFBC3, 0xDAFEFBC3, 0xDAFFFBC3, 0xDB00FBC3, 0xDB01FBC3, 0xDB02FBC3, 0xDB03FBC3, 0xDB04FBC3, + 0xDB05FBC3, 0xDB06FBC3, 0xDB07FBC3, 0xDB08FBC3, 0xDB09FBC3, 0xDB0AFBC3, 0xDB0BFBC3, 0xDB0CFBC3, 0xDB0DFBC3, 0xDB0EFBC3, 0xDB0FFBC3, 0xDB10FBC3, 0xDB11FBC3, 0xDB12FBC3, 0xDB13FBC3, + 0xDB14FBC3, 0xDB15FBC3, 0xDB16FBC3, 0xDB17FBC3, 0xDB18FBC3, 0xDB19FBC3, 0xDB1AFBC3, 0xDB1BFBC3, 0xDB1CFBC3, 0xDB1DFBC3, 0xDB1EFBC3, 0xDB1FFBC3, 0xDB20FBC3, 0xDB21FBC3, 0xDB22FBC3, + 0xDB23FBC3, 0xDB24FBC3, 0xDB25FBC3, 0xDB26FBC3, 0xDB27FBC3, 0xDB28FBC3, 0xDB29FBC3, 0xDB2AFBC3, 0xDB2BFBC3, 0xDB2CFBC3, 0xDB2DFBC3, 0xDB2EFBC3, 0xDB2FFBC3, 0xDB30FBC3, 0xDB31FBC3, + 0xDB32FBC3, 0xDB33FBC3, 0xDB34FBC3, 0xDB35FBC3, 0xDB36FBC3, 0xDB37FBC3, 0xDB38FBC3, 0xDB39FBC3, 0xDB3AFBC3, 0xDB3BFBC3, 0xDB3CFBC3, 0xDB3DFBC3, 0xDB3EFBC3, 0xDB3FFBC3, 0xDB40FBC3, + 0xDB41FBC3, 0xDB42FBC3, 0xDB43FBC3, 0xDB44FBC3, 0xDB45FBC3, 0xDB46FBC3, 0xDB47FBC3, 0xDB48FBC3, 0xDB49FBC3, 0xDB4AFBC3, 0xDB4BFBC3, 0xDB4CFBC3, 0xDB4DFBC3, 0xDB4EFBC3, 0xDB4FFBC3, + 0xDB50FBC3, 0xDB51FBC3, 0xDB52FBC3, 0xDB53FBC3, 0xDB54FBC3, 0xDB55FBC3, 0xDB56FBC3, 0xDB57FBC3, 0xDB58FBC3, 0xDB59FBC3, 0xDB5AFBC3, 0xDB5BFBC3, 0xDB5CFBC3, 0xDB5DFBC3, 0xDB5EFBC3, + 0xDB5FFBC3, 0xDB60FBC3, 0xDB61FBC3, 0xDB62FBC3, 0xDB63FBC3, 0xDB64FBC3, 0xDB65FBC3, 0xDB66FBC3, 0xDB67FBC3, 0xDB68FBC3, 0xDB69FBC3, 0xDB6AFBC3, 0xDB6BFBC3, 0xDB6CFBC3, 0xDB6DFBC3, + 0xDB6EFBC3, 0xDB6FFBC3, 0xDB70FBC3, 0xDB71FBC3, 0xDB72FBC3, 0xDB73FBC3, 0xDB74FBC3, 0xDB75FBC3, 0xDB76FBC3, 0xDB77FBC3, 0xDB78FBC3, 0xDB79FBC3, 0xDB7AFBC3, 0xDB7BFBC3, 0xDB7CFBC3, + 0xDB7DFBC3, 0xDB7EFBC3, 0xDB7FFBC3, 0xDB80FBC3, 0xDB81FBC3, 0xDB82FBC3, 0xDB83FBC3, 0xDB84FBC3, 0xDB85FBC3, 0xDB86FBC3, 0xDB87FBC3, 0xDB88FBC3, 0xDB89FBC3, 0xDB8AFBC3, 0xDB8BFBC3, + 0xDB8CFBC3, 0xDB8DFBC3, 0xDB8EFBC3, 0xDB8FFBC3, 0xDB90FBC3, 0xDB91FBC3, 0xDB92FBC3, 0xDB93FBC3, 0xDB94FBC3, 0xDB95FBC3, 0xDB96FBC3, 0xDB97FBC3, 0xDB98FBC3, 0xDB99FBC3, 0xDB9AFBC3, + 0xDB9BFBC3, 0xDB9CFBC3, 0xDB9DFBC3, 0xDB9EFBC3, 0xDB9FFBC3, 0xDBA0FBC3, 0xDBA1FBC3, 0xDBA2FBC3, 0xDBA3FBC3, 0xDBA4FBC3, 0xDBA5FBC3, 0xDBA6FBC3, 0xDBA7FBC3, 0xDBA8FBC3, 0xDBA9FBC3, + 0xDBAAFBC3, 0xDBABFBC3, 0xDBACFBC3, 0xDBADFBC3, 0xDBAEFBC3, 0xDBAFFBC3, 0xDBB0FBC3, 0xDBB1FBC3, 0xDBB2FBC3, 0xDBB3FBC3, 0xDBB4FBC3, 0xDBB5FBC3, 0xDBB6FBC3, 0xDBB7FBC3, 0xDBB8FBC3, + 0xDBB9FBC3, 0xDBBAFBC3, 0xDBBBFBC3, 0xDBBCFBC3, 0xDBBDFBC3, 0xDBBEFBC3, 0xDBBFFBC3, 0xDBC0FBC3, 0xDBC1FBC3, 0xDBC2FBC3, 0xDBC3FBC3, 0xDBC4FBC3, 0xDBC5FBC3, 0xDBC6FBC3, 0xDBC7FBC3, + 0xDBC8FBC3, 0xDBC9FBC3, 0xDBCAFBC3, 0xDBCBFBC3, 0xDBCCFBC3, 0xDBCDFBC3, 0xDBCEFBC3, 0xDBCFFBC3, 0xDBD0FBC3, 0xDBD1FBC3, 0xDBD2FBC3, 0xDBD3FBC3, 0xDBD4FBC3, 0xDBD5FBC3, 0xDBD6FBC3, + 0xDBD7FBC3, 0xDBD8FBC3, 0xDBD9FBC3, 0xDBDAFBC3, 0xDBDBFBC3, 0xDBDCFBC3, 0xDBDDFBC3, 0xDBDEFBC3, 0xDBDFFBC3, 0xDBE0FBC3, 0xDBE1FBC3, 0xDBE2FBC3, 0xDBE3FBC3, 0xDBE4FBC3, 0xDBE5FBC3, + 0xDBE6FBC3, 0xDBE7FBC3, 0xDBE8FBC3, 0xDBE9FBC3, 0xDBEAFBC3, 0xDBEBFBC3, 0xDBECFBC3, 0xDBEDFBC3, 0xDBEEFBC3, 0xDBEFFBC3, 0xDBF0FBC3, 0xDBF1FBC3, 0xDBF2FBC3, 0xDBF3FBC3, 0xDBF4FBC3, + 0xDBF5FBC3, 0xDBF6FBC3, 0xDBF7FBC3, 0xDBF8FBC3, 0xDBF9FBC3, 0xDBFAFBC3, 0xDBFBFBC3, 0xDBFCFBC3, 0xDBFDFBC3, 0xDBFEFBC3, 0xDBFFFBC3, 0xDC00FBC3, 0xDC01FBC3, 0xDC02FBC3, 0xDC03FBC3, + 0xDC04FBC3, 0xDC05FBC3, 0xDC06FBC3, 0xDC07FBC3, 0xDC08FBC3, 0xDC09FBC3, 0xDC0AFBC3, 0xDC0BFBC3, 0xDC0CFBC3, 0xDC0DFBC3, 0xDC0EFBC3, 0xDC0FFBC3, 0xDC10FBC3, 0xDC11FBC3, 0xDC12FBC3, + 0xDC13FBC3, 0xDC14FBC3, 0xDC15FBC3, 0xDC16FBC3, 0xDC17FBC3, 0xDC18FBC3, 0xDC19FBC3, 0xDC1AFBC3, 0xDC1BFBC3, 0xDC1CFBC3, 0xDC1DFBC3, 0xDC1EFBC3, 0xDC1FFBC3, 0xDC20FBC3, 0xDC21FBC3, + 0xDC22FBC3, 0xDC23FBC3, 0xDC24FBC3, 0xDC25FBC3, 0xDC26FBC3, 0xDC27FBC3, 0xDC28FBC3, 0xDC29FBC3, 0xDC2AFBC3, 0xDC2BFBC3, 0xDC2CFBC3, 0xDC2DFBC3, 0xDC2EFBC3, 0xDC2FFBC3, 0xDC30FBC3, + 0xDC31FBC3, 0xDC32FBC3, 0xDC33FBC3, 0xDC34FBC3, 0xDC35FBC3, 0xDC36FBC3, 0xDC37FBC3, 0xDC38FBC3, 0xDC39FBC3, 0xDC3AFBC3, 0xDC3BFBC3, 0xDC3CFBC3, 0xDC3DFBC3, 0xDC3EFBC3, 0xDC3FFBC3, + 0xDC40FBC3, 0xDC41FBC3, 0xDC42FBC3, 0xDC43FBC3, 0xDC44FBC3, 0xDC45FBC3, 0xDC46FBC3, 0xDC47FBC3, 0xDC48FBC3, 0xDC49FBC3, 0xDC4AFBC3, 0xDC4BFBC3, 0xDC4CFBC3, 0xDC4DFBC3, 0xDC4EFBC3, + 0xDC4FFBC3, 0xDC50FBC3, 0xDC51FBC3, 0xDC52FBC3, 0xDC53FBC3, 0xDC54FBC3, 0xDC55FBC3, 0xDC56FBC3, 0xDC57FBC3, 0xDC58FBC3, 0xDC59FBC3, 0xDC5AFBC3, 0xDC5BFBC3, 0xDC5CFBC3, 0xDC5DFBC3, + 0xDC5EFBC3, 0xDC5FFBC3, 0xDC60FBC3, 0xDC61FBC3, 0xDC62FBC3, 0xDC63FBC3, 0xDC64FBC3, 0xDC65FBC3, 0xDC66FBC3, 0xDC67FBC3, 0xDC68FBC3, 0xDC69FBC3, 0xDC6AFBC3, 0xDC6BFBC3, 0xDC6CFBC3, + 0xDC6DFBC3, 0xDC6EFBC3, 0xDC6FFBC3, 0xDC70FBC3, 0xDC71FBC3, 0xDC72FBC3, 0xDC73FBC3, 0xDC74FBC3, 0xDC75FBC3, 0xDC76FBC3, 0xDC77FBC3, 0xDC78FBC3, 0xDC79FBC3, 0xDC7AFBC3, 0xDC7BFBC3, + 0xDC7CFBC3, 0xDC7DFBC3, 0xDC7EFBC3, 0xDC7FFBC3, 0xDC80FBC3, 0xDC81FBC3, 0xDC82FBC3, 0xDC83FBC3, 0xDC84FBC3, 0xDC85FBC3, 0xDC86FBC3, 0xDC87FBC3, 0xDC88FBC3, 0xDC89FBC3, 0xDC8AFBC3, + 0xDC8BFBC3, 0xDC8CFBC3, 0xDC8DFBC3, 0xDC8EFBC3, 0xDC8FFBC3, 0xDC90FBC3, 0xDC91FBC3, 0xDC92FBC3, 0xDC93FBC3, 0xDC94FBC3, 0xDC95FBC3, 0xDC96FBC3, 0xDC97FBC3, 0xDC98FBC3, 0xDC99FBC3, + 0xDC9AFBC3, 0xDC9BFBC3, 0xDC9CFBC3, 0xDC9DFBC3, 0xDC9EFBC3, 0xDC9FFBC3, 0xDCA0FBC3, 0xDCA1FBC3, 0xDCA2FBC3, 0xDCA3FBC3, 0xDCA4FBC3, 0xDCA5FBC3, 0xDCA6FBC3, 0xDCA7FBC3, 0xDCA8FBC3, + 0xDCA9FBC3, 0xDCAAFBC3, 0xDCABFBC3, 0xDCACFBC3, 0xDCADFBC3, 0xDCAEFBC3, 0xDCAFFBC3, 0xDCB0FBC3, 0xDCB1FBC3, 0xDCB2FBC3, 0xDCB3FBC3, 0xDCB4FBC3, 0xDCB5FBC3, 0xDCB6FBC3, 0xDCB7FBC3, + 0xDCB8FBC3, 0xDCB9FBC3, 0xDCBAFBC3, 0xDCBBFBC3, 0xDCBCFBC3, 0xDCBDFBC3, 0xDCBEFBC3, 0xDCBFFBC3, 0xDCC0FBC3, 0xDCC1FBC3, 0xDCC2FBC3, 0xDCC3FBC3, 0xDCC4FBC3, 0xDCC5FBC3, 0xDCC6FBC3, + 0xDCC7FBC3, 0xDCC8FBC3, 0xDCC9FBC3, 0xDCCAFBC3, 0xDCCBFBC3, 0xDCCCFBC3, 0xDCCDFBC3, 0xDCCEFBC3, 0xDCCFFBC3, 0xDCD0FBC3, 0xDCD1FBC3, 0xDCD2FBC3, 0xDCD3FBC3, 0xDCD4FBC3, 0xDCD5FBC3, + 0xDCD6FBC3, 0xDCD7FBC3, 0xDCD8FBC3, 0xDCD9FBC3, 0xDCDAFBC3, 0xDCDBFBC3, 0xDCDCFBC3, 0xDCDDFBC3, 0xDCDEFBC3, 0xDCDFFBC3, 0xDCE0FBC3, 0xDCE1FBC3, 0xDCE2FBC3, 0xDCE3FBC3, 0xDCE4FBC3, + 0xDCE5FBC3, 0xDCE6FBC3, 0xDCE7FBC3, 0xDCE8FBC3, 0xDCE9FBC3, 0xDCEAFBC3, 0xDCEBFBC3, 0xDCECFBC3, 0xDCEDFBC3, 0xDCEEFBC3, 0xDCEFFBC3, 0xDCF0FBC3, 0xDCF1FBC3, 0xDCF2FBC3, 0xDCF3FBC3, + 0xDCF4FBC3, 0xDCF5FBC3, 0xDCF6FBC3, 0xDCF7FBC3, 0xDCF8FBC3, 0xDCF9FBC3, 0xDCFAFBC3, 0xDCFBFBC3, 0xDCFCFBC3, 0xDCFDFBC3, 0xDCFEFBC3, 0xDCFFFBC3, 0xDD00FBC3, 0xDD01FBC3, 0xDD02FBC3, + 0xDD03FBC3, 0xDD04FBC3, 0xDD05FBC3, 0xDD06FBC3, 0xDD07FBC3, 0xDD08FBC3, 0xDD09FBC3, 0xDD0AFBC3, 0xDD0BFBC3, 0xDD0CFBC3, 0xDD0DFBC3, 0xDD0EFBC3, 0xDD0FFBC3, 0xDD10FBC3, 0xDD11FBC3, + 0xDD12FBC3, 0xDD13FBC3, 0xDD14FBC3, 0xDD15FBC3, 0xDD16FBC3, 0xDD17FBC3, 0xDD18FBC3, 0xDD19FBC3, 0xDD1AFBC3, 0xDD1BFBC3, 0xDD1CFBC3, 0xDD1DFBC3, 0xDD1EFBC3, 0xDD1FFBC3, 0xDD20FBC3, + 0xDD21FBC3, 0xDD22FBC3, 0xDD23FBC3, 0xDD24FBC3, 0xDD25FBC3, 0xDD26FBC3, 0xDD27FBC3, 0xDD28FBC3, 0xDD29FBC3, 0xDD2AFBC3, 0xDD2BFBC3, 0xDD2CFBC3, 0xDD2DFBC3, 0xDD2EFBC3, 0xDD2FFBC3, + 0xDD30FBC3, 0xDD31FBC3, 0xDD32FBC3, 0xDD33FBC3, 0xDD34FBC3, 0xDD35FBC3, 0xDD36FBC3, 0xDD37FBC3, 0xDD38FBC3, 0xDD39FBC3, 0xDD3AFBC3, 0xDD3BFBC3, 0xDD3CFBC3, 0xDD3DFBC3, 0xDD3EFBC3, + 0xDD3FFBC3, 0xDD40FBC3, 0xDD41FBC3, 0xDD42FBC3, 0xDD43FBC3, 0xDD44FBC3, 0xDD45FBC3, 0xDD46FBC3, 0xDD47FBC3, 0xDD48FBC3, 0xDD49FBC3, 0xDD4AFBC3, 0xDD4BFBC3, 0xDD4CFBC3, 0xDD4DFBC3, + 0xDD4EFBC3, 0xDD4FFBC3, 0xDD50FBC3, 0xDD51FBC3, 0xDD52FBC3, 0xDD53FBC3, 0xDD54FBC3, 0xDD55FBC3, 0xDD56FBC3, 0xDD57FBC3, 0xDD58FBC3, 0xDD59FBC3, 0xDD5AFBC3, 0xDD5BFBC3, 0xDD5CFBC3, + 0xDD5DFBC3, 0xDD5EFBC3, 0xDD5FFBC3, 0xDD60FBC3, 0xDD61FBC3, 0xDD62FBC3, 0xDD63FBC3, 0xDD64FBC3, 0xDD65FBC3, 0xDD66FBC3, 0xDD67FBC3, 0xDD68FBC3, 0xDD69FBC3, 0xDD6AFBC3, 0xDD6BFBC3, + 0xDD6CFBC3, 0xDD6DFBC3, 0xDD6EFBC3, 0xDD6FFBC3, 0xDD70FBC3, 0xDD71FBC3, 0xDD72FBC3, 0xDD73FBC3, 0xDD74FBC3, 0xDD75FBC3, 0xDD76FBC3, 0xDD77FBC3, 0xDD78FBC3, 0xDD79FBC3, 0xDD7AFBC3, + 0xDD7BFBC3, 0xDD7CFBC3, 0xDD7DFBC3, 0xDD7EFBC3, 0xDD7FFBC3, 0xDD80FBC3, 0xDD81FBC3, 0xDD82FBC3, 0xDD83FBC3, 0xDD84FBC3, 0xDD85FBC3, 0xDD86FBC3, 0xDD87FBC3, 0xDD88FBC3, 0xDD89FBC3, + 0xDD8AFBC3, 0xDD8BFBC3, 0xDD8CFBC3, 0xDD8DFBC3, 0xDD8EFBC3, 0xDD8FFBC3, 0xDD90FBC3, 0xDD91FBC3, 0xDD92FBC3, 0xDD93FBC3, 0xDD94FBC3, 0xDD95FBC3, 0xDD96FBC3, 0xDD97FBC3, 0xDD98FBC3, + 0xDD99FBC3, 0xDD9AFBC3, 0xDD9BFBC3, 0xDD9CFBC3, 0xDD9DFBC3, 0xDD9EFBC3, 0xDD9FFBC3, 0xDDA0FBC3, 0xDDA1FBC3, 0xDDA2FBC3, 0xDDA3FBC3, 0xDDA4FBC3, 0xDDA5FBC3, 0xDDA6FBC3, 0xDDA7FBC3, + 0xDDA8FBC3, 0xDDA9FBC3, 0xDDAAFBC3, 0xDDABFBC3, 0xDDACFBC3, 0xDDADFBC3, 0xDDAEFBC3, 0xDDAFFBC3, 0xDDB0FBC3, 0xDDB1FBC3, 0xDDB2FBC3, 0xDDB3FBC3, 0xDDB4FBC3, 0xDDB5FBC3, 0xDDB6FBC3, + 0xDDB7FBC3, 0xDDB8FBC3, 0xDDB9FBC3, 0xDDBAFBC3, 0xDDBBFBC3, 0xDDBCFBC3, 0xDDBDFBC3, 0xDDBEFBC3, 0xDDBFFBC3, 0xDDC0FBC3, 0xDDC1FBC3, 0xDDC2FBC3, 0xDDC3FBC3, 0xDDC4FBC3, 0xDDC5FBC3, + 0xDDC6FBC3, 0xDDC7FBC3, 0xDDC8FBC3, 0xDDC9FBC3, 0xDDCAFBC3, 0xDDCBFBC3, 0xDDCCFBC3, 0xDDCDFBC3, 0xDDCEFBC3, 0xDDCFFBC3, 0xDDD0FBC3, 0xDDD1FBC3, 0xDDD2FBC3, 0xDDD3FBC3, 0xDDD4FBC3, + 0xDDD5FBC3, 0xDDD6FBC3, 0xDDD7FBC3, 0xDDD8FBC3, 0xDDD9FBC3, 0xDDDAFBC3, 0xDDDBFBC3, 0xDDDCFBC3, 0xDDDDFBC3, 0xDDDEFBC3, 0xDDDFFBC3, 0xDDE0FBC3, 0xDDE1FBC3, 0xDDE2FBC3, 0xDDE3FBC3, + 0xDDE4FBC3, 0xDDE5FBC3, 0xDDE6FBC3, 0xDDE7FBC3, 0xDDE8FBC3, 0xDDE9FBC3, 0xDDEAFBC3, 0xDDEBFBC3, 0xDDECFBC3, 0xDDEDFBC3, 0xDDEEFBC3, 0xDDEFFBC3, 0xDDF0FBC3, 0xDDF1FBC3, 0xDDF2FBC3, + 0xDDF3FBC3, 0xDDF4FBC3, 0xDDF5FBC3, 0xDDF6FBC3, 0xDDF7FBC3, 0xDDF8FBC3, 0xDDF9FBC3, 0xDDFAFBC3, 0xDDFBFBC3, 0xDDFCFBC3, 0xDDFDFBC3, 0xDDFEFBC3, 0xDDFFFBC3, 0xDE00FBC3, 0xDE01FBC3, + 0xDE02FBC3, 0xDE03FBC3, 0xDE04FBC3, 0xDE05FBC3, 0xDE06FBC3, 0xDE07FBC3, 0xDE08FBC3, 0xDE09FBC3, 0xDE0AFBC3, 0xDE0BFBC3, 0xDE0CFBC3, 0xDE0DFBC3, 0xDE0EFBC3, 0xDE0FFBC3, 0xDE10FBC3, + 0xDE11FBC3, 0xDE12FBC3, 0xDE13FBC3, 0xDE14FBC3, 0xDE15FBC3, 0xDE16FBC3, 0xDE17FBC3, 0xDE18FBC3, 0xDE19FBC3, 0xDE1AFBC3, 0xDE1BFBC3, 0xDE1CFBC3, 0xDE1DFBC3, 0xDE1EFBC3, 0xDE1FFBC3, + 0xDE20FBC3, 0xDE21FBC3, 0xDE22FBC3, 0xDE23FBC3, 0xDE24FBC3, 0xDE25FBC3, 0xDE26FBC3, 0xDE27FBC3, 0xDE28FBC3, 0xDE29FBC3, 0xDE2AFBC3, 0xDE2BFBC3, 0xDE2CFBC3, 0xDE2DFBC3, 0xDE2EFBC3, + 0xDE2FFBC3, 0xDE30FBC3, 0xDE31FBC3, 0xDE32FBC3, 0xDE33FBC3, 0xDE34FBC3, 0xDE35FBC3, 0xDE36FBC3, 0xDE37FBC3, 0xDE38FBC3, 0xDE39FBC3, 0xDE3AFBC3, 0xDE3BFBC3, 0xDE3CFBC3, 0xDE3DFBC3, + 0xDE3EFBC3, 0xDE3FFBC3, 0xDE40FBC3, 0xDE41FBC3, 0xDE42FBC3, 0xDE43FBC3, 0xDE44FBC3, 0xDE45FBC3, 0xDE46FBC3, 0xDE47FBC3, 0xDE48FBC3, 0xDE49FBC3, 0xDE4AFBC3, 0xDE4BFBC3, 0xDE4CFBC3, + 0xDE4DFBC3, 0xDE4EFBC3, 0xDE4FFBC3, 0xDE50FBC3, 0xDE51FBC3, 0xDE52FBC3, 0xDE53FBC3, 0xDE54FBC3, 0xDE55FBC3, 0xDE56FBC3, 0xDE57FBC3, 0xDE58FBC3, 0xDE59FBC3, 0xDE5AFBC3, 0xDE5BFBC3, + 0xDE5CFBC3, 0xDE5DFBC3, 0xDE5EFBC3, 0xDE5FFBC3, 0xDE60FBC3, 0xDE61FBC3, 0xDE62FBC3, 0xDE63FBC3, 0xDE64FBC3, 0xDE65FBC3, 0xDE66FBC3, 0xDE67FBC3, 0xDE68FBC3, 0xDE69FBC3, 0xDE6AFBC3, + 0xDE6BFBC3, 0xDE6CFBC3, 0xDE6DFBC3, 0xDE6EFBC3, 0xDE6FFBC3, 0xDE70FBC3, 0xDE71FBC3, 0xDE72FBC3, 0xDE73FBC3, 0xDE74FBC3, 0xDE75FBC3, 0xDE76FBC3, 0xDE77FBC3, 0xDE78FBC3, 0xDE79FBC3, + 0xDE7AFBC3, 0xDE7BFBC3, 0xDE7CFBC3, 0xDE7DFBC3, 0xDE7EFBC3, 0xDE7FFBC3, 0xDE80FBC3, 0xDE81FBC3, 0xDE82FBC3, 0xDE83FBC3, 0xDE84FBC3, 0xDE85FBC3, 0xDE86FBC3, 0xDE87FBC3, 0xDE88FBC3, + 0xDE89FBC3, 0xDE8AFBC3, 0xDE8BFBC3, 0xDE8CFBC3, 0xDE8DFBC3, 0xDE8EFBC3, 0xDE8FFBC3, 0xDE90FBC3, 0xDE91FBC3, 0xDE92FBC3, 0xDE93FBC3, 0xDE94FBC3, 0xDE95FBC3, 0xDE96FBC3, 0xDE97FBC3, + 0xDE98FBC3, 0xDE99FBC3, 0xDE9AFBC3, 0xDE9BFBC3, 0xDE9CFBC3, 0xDE9DFBC3, 0xDE9EFBC3, 0xDE9FFBC3, 0xDEA0FBC3, 0xDEA1FBC3, 0xDEA2FBC3, 0xDEA3FBC3, 0xDEA4FBC3, 0xDEA5FBC3, 0xDEA6FBC3, + 0xDEA7FBC3, 0xDEA8FBC3, 0xDEA9FBC3, 0xDEAAFBC3, 0xDEABFBC3, 0xDEACFBC3, 0xDEADFBC3, 0xDEAEFBC3, 0xDEAFFBC3, 0xDEB0FBC3, 0xDEB1FBC3, 0xDEB2FBC3, 0xDEB3FBC3, 0xDEB4FBC3, 0xDEB5FBC3, + 0xDEB6FBC3, 0xDEB7FBC3, 0xDEB8FBC3, 0xDEB9FBC3, 0xDEBAFBC3, 0xDEBBFBC3, 0xDEBCFBC3, 0xDEBDFBC3, 0xDEBEFBC3, 0xDEBFFBC3, 0xDEC0FBC3, 0xDEC1FBC3, 0xDEC2FBC3, 0xDEC3FBC3, 0xDEC4FBC3, + 0xDEC5FBC3, 0xDEC6FBC3, 0xDEC7FBC3, 0xDEC8FBC3, 0xDEC9FBC3, 0xDECAFBC3, 0xDECBFBC3, 0xDECCFBC3, 0xDECDFBC3, 0xDECEFBC3, 0xDECFFBC3, 0xDED0FBC3, 0xDED1FBC3, 0xDED2FBC3, 0xDED3FBC3, + 0xDED4FBC3, 0xDED5FBC3, 0xDED6FBC3, 0xDED7FBC3, 0xDED8FBC3, 0xDED9FBC3, 0xDEDAFBC3, 0xDEDBFBC3, 0xDEDCFBC3, 0xDEDDFBC3, 0xDEDEFBC3, 0xDEDFFBC3, 0xDEE0FBC3, 0xDEE1FBC3, 0xDEE2FBC3, + 0xDEE3FBC3, 0xDEE4FBC3, 0xDEE5FBC3, 0xDEE6FBC3, 0xDEE7FBC3, 0xDEE8FBC3, 0xDEE9FBC3, 0xDEEAFBC3, 0xDEEBFBC3, 0xDEECFBC3, 0xDEEDFBC3, 0xDEEEFBC3, 0xDEEFFBC3, 0xDEF0FBC3, 0xDEF1FBC3, + 0xDEF2FBC3, 0xDEF3FBC3, 0xDEF4FBC3, 0xDEF5FBC3, 0xDEF6FBC3, 0xDEF7FBC3, 0xDEF8FBC3, 0xDEF9FBC3, 0xDEFAFBC3, 0xDEFBFBC3, 0xDEFCFBC3, 0xDEFDFBC3, 0xDEFEFBC3, 0xDEFFFBC3, 0xDF00FBC3, + 0xDF01FBC3, 0xDF02FBC3, 0xDF03FBC3, 0xDF04FBC3, 0xDF05FBC3, 0xDF06FBC3, 0xDF07FBC3, 0xDF08FBC3, 0xDF09FBC3, 0xDF0AFBC3, 0xDF0BFBC3, 0xDF0CFBC3, 0xDF0DFBC3, 0xDF0EFBC3, 0xDF0FFBC3, + 0xDF10FBC3, 0xDF11FBC3, 0xDF12FBC3, 0xDF13FBC3, 0xDF14FBC3, 0xDF15FBC3, 0xDF16FBC3, 0xDF17FBC3, 0xDF18FBC3, 0xDF19FBC3, 0xDF1AFBC3, 0xDF1BFBC3, 0xDF1CFBC3, 0xDF1DFBC3, 0xDF1EFBC3, + 0xDF1FFBC3, 0xDF20FBC3, 0xDF21FBC3, 0xDF22FBC3, 0xDF23FBC3, 0xDF24FBC3, 0xDF25FBC3, 0xDF26FBC3, 0xDF27FBC3, 0xDF28FBC3, 0xDF29FBC3, 0xDF2AFBC3, 0xDF2BFBC3, 0xDF2CFBC3, 0xDF2DFBC3, + 0xDF2EFBC3, 0xDF2FFBC3, 0xDF30FBC3, 0xDF31FBC3, 0xDF32FBC3, 0xDF33FBC3, 0xDF34FBC3, 0xDF35FBC3, 0xDF36FBC3, 0xDF37FBC3, 0xDF38FBC3, 0xDF39FBC3, 0xDF3AFBC3, 0xDF3BFBC3, 0xDF3CFBC3, + 0xDF3DFBC3, 0xDF3EFBC3, 0xDF3FFBC3, 0xDF40FBC3, 0xDF41FBC3, 0xDF42FBC3, 0xDF43FBC3, 0xDF44FBC3, 0xDF45FBC3, 0xDF46FBC3, 0xDF47FBC3, 0xDF48FBC3, 0xDF49FBC3, 0xDF4AFBC3, 0xDF4BFBC3, + 0xDF4CFBC3, 0xDF4DFBC3, 0xDF4EFBC3, 0xDF4FFBC3, 0xDF50FBC3, 0xDF51FBC3, 0xDF52FBC3, 0xDF53FBC3, 0xDF54FBC3, 0xDF55FBC3, 0xDF56FBC3, 0xDF57FBC3, 0xDF58FBC3, 0xDF59FBC3, 0xDF5AFBC3, + 0xDF5BFBC3, 0xDF5CFBC3, 0xDF5DFBC3, 0xDF5EFBC3, 0xDF5FFBC3, 0xDF60FBC3, 0xDF61FBC3, 0xDF62FBC3, 0xDF63FBC3, 0xDF64FBC3, 0xDF65FBC3, 0xDF66FBC3, 0xDF67FBC3, 0xDF68FBC3, 0xDF69FBC3, + 0xDF6AFBC3, 0xDF6BFBC3, 0xDF6CFBC3, 0xDF6DFBC3, 0xDF6EFBC3, 0xDF6FFBC3, 0xDF70FBC3, 0xDF71FBC3, 0xDF72FBC3, 0xDF73FBC3, 0xDF74FBC3, 0xDF75FBC3, 0xDF76FBC3, 0xDF77FBC3, 0xDF78FBC3, + 0xDF79FBC3, 0xDF7AFBC3, 0xDF7BFBC3, 0xDF7CFBC3, 0xDF7DFBC3, 0xDF7EFBC3, 0xDF7FFBC3, 0xDF80FBC3, 0xDF81FBC3, 0xDF82FBC3, 0xDF83FBC3, 0xDF84FBC3, 0xDF85FBC3, 0xDF86FBC3, 0xDF87FBC3, + 0xDF88FBC3, 0xDF89FBC3, 0xDF8AFBC3, 0xDF8BFBC3, 0xDF8CFBC3, 0xDF8DFBC3, 0xDF8EFBC3, 0xDF8FFBC3, 0xDF90FBC3, 0xDF91FBC3, 0xDF92FBC3, 0xDF93FBC3, 0xDF94FBC3, 0xDF95FBC3, 0xDF96FBC3, + 0xDF97FBC3, 0xDF98FBC3, 0xDF99FBC3, 0xDF9AFBC3, 0xDF9BFBC3, 0xDF9CFBC3, 0xDF9DFBC3, 0xDF9EFBC3, 0xDF9FFBC3, 0xDFA0FBC3, 0xDFA1FBC3, 0xDFA2FBC3, 0xDFA3FBC3, 0xDFA4FBC3, 0xDFA5FBC3, + 0xDFA6FBC3, 0xDFA7FBC3, 0xDFA8FBC3, 0xDFA9FBC3, 0xDFAAFBC3, 0xDFABFBC3, 0xDFACFBC3, 0xDFADFBC3, 0xDFAEFBC3, 0xDFAFFBC3, 0xDFB0FBC3, 0xDFB1FBC3, 0xDFB2FBC3, 0xDFB3FBC3, 0xDFB4FBC3, + 0xDFB5FBC3, 0xDFB6FBC3, 0xDFB7FBC3, 0xDFB8FBC3, 0xDFB9FBC3, 0xDFBAFBC3, 0xDFBBFBC3, 0xDFBCFBC3, 0xDFBDFBC3, 0xDFBEFBC3, 0xDFBFFBC3, 0xDFC0FBC3, 0xDFC1FBC3, 0xDFC2FBC3, 0xDFC3FBC3, + 0xDFC4FBC3, 0xDFC5FBC3, 0xDFC6FBC3, 0xDFC7FBC3, 0xDFC8FBC3, 0xDFC9FBC3, 0xDFCAFBC3, 0xDFCBFBC3, 0xDFCCFBC3, 0xDFCDFBC3, 0xDFCEFBC3, 0xDFCFFBC3, 0xDFD0FBC3, 0xDFD1FBC3, 0xDFD2FBC3, + 0xDFD3FBC3, 0xDFD4FBC3, 0xDFD5FBC3, 0xDFD6FBC3, 0xDFD7FBC3, 0xDFD8FBC3, 0xDFD9FBC3, 0xDFDAFBC3, 0xDFDBFBC3, 0xDFDCFBC3, 0xDFDDFBC3, 0xDFDEFBC3, 0xDFDFFBC3, 0xDFE0FBC3, 0xDFE1FBC3, + 0xDFE2FBC3, 0xDFE3FBC3, 0xDFE4FBC3, 0xDFE5FBC3, 0xDFE6FBC3, 0xDFE7FBC3, 0xDFE8FBC3, 0xDFE9FBC3, 0xDFEAFBC3, 0xDFEBFBC3, 0xDFECFBC3, 0xDFEDFBC3, 0xDFEEFBC3, 0xDFEFFBC3, 0xDFF0FBC3, + 0xDFF1FBC3, 0xDFF2FBC3, 0xDFF3FBC3, 0xDFF4FBC3, 0xDFF5FBC3, 0xDFF6FBC3, 0xDFF7FBC3, 0xDFF8FBC3, 0xDFF9FBC3, 0xDFFAFBC3, 0xDFFBFBC3, 0xDFFCFBC3, 0xDFFDFBC3, 0xDFFEFBC3, 0xDFFFFBC3, + 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0xE007FBC3, 0x21ED, 0x21EE, 0x21EF, 0x21F0, 0x21F1, 0x21F2, 0x21F3, + 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0xE019FBC3, 0xE01AFBC3, 0x2200, 0x2201, 0x2202, + 0x2203, 0x2204, 0x2205, 0x2206, 0xE022FBC3, 0x2208, 0x2209, 0xE025FBC3, 0x220B, 0x220C, 0x220D, 0x220E, 0x220F, 0xE02BFBC3, 0xE02CFBC3, + 0xE02DFBC3, 0xE02EFBC3, 0xE02FFBC3, 0xE030FBC3, 0xE031FBC3, 0xE032FBC3, 0xE033FBC3, 0xE034FBC3, 0xE035FBC3, 0xE036FBC3, 0xE037FBC3, 0xE038FBC3, 0xE039FBC3, 0xE03AFBC3, 0xE03BFBC3, + 0xE03CFBC3, 0xE03DFBC3, 0xE03EFBC3, 0xE03FFBC3, 0xE040FBC3, 0xE041FBC3, 0xE042FBC3, 0xE043FBC3, 0xE044FBC3, 0xE045FBC3, 0xE046FBC3, 0xE047FBC3, 0xE048FBC3, 0xE049FBC3, 0xE04AFBC3, + 0xE04BFBC3, 0xE04CFBC3, 0xE04DFBC3, 0xE04EFBC3, 0xE04FFBC3, 0xE050FBC3, 0xE051FBC3, 0xE052FBC3, 0xE053FBC3, 0xE054FBC3, 0xE055FBC3, 0xE056FBC3, 0xE057FBC3, 0xE058FBC3, 0xE059FBC3, + 0xE05AFBC3, 0xE05BFBC3, 0xE05CFBC3, 0xE05DFBC3, 0xE05EFBC3, 0xE05FFBC3, 0xE060FBC3, 0xE061FBC3, 0xE062FBC3, 0xE063FBC3, 0xE064FBC3, 0xE065FBC3, 0xE066FBC3, 0xE067FBC3, 0xE068FBC3, + 0xE069FBC3, 0xE06AFBC3, 0xE06BFBC3, 0xE06CFBC3, 0xE06DFBC3, 0xE06EFBC3, 0xE06FFBC3, 0xE070FBC3, 0xE071FBC3, 0xE072FBC3, 0xE073FBC3, 0xE074FBC3, 0xE075FBC3, 0xE076FBC3, 0xE077FBC3, + 0xE078FBC3, 0xE079FBC3, 0xE07AFBC3, 0xE07BFBC3, 0xE07CFBC3, 0xE07DFBC3, 0xE07EFBC3, 0xE07FFBC3, 0xE080FBC3, 0xE081FBC3, 0xE082FBC3, 0xE083FBC3, 0xE084FBC3, 0xE085FBC3, 0xE086FBC3, + 0xE087FBC3, 0xE088FBC3, 0xE089FBC3, 0xE08AFBC3, 0xE08BFBC3, 0xE08CFBC3, 0xE08DFBC3, 0xE08EFBC3, 0xE08FFBC3, 0xE090FBC3, 0xE091FBC3, 0xE092FBC3, 0xE093FBC3, 0xE094FBC3, 0xE095FBC3, + 0xE096FBC3, 0xE097FBC3, 0xE098FBC3, 0xE099FBC3, 0xE09AFBC3, 0xE09BFBC3, 0xE09CFBC3, 0xE09DFBC3, 0xE09EFBC3, 0xE09FFBC3, 0xE0A0FBC3, 0xE0A1FBC3, 0xE0A2FBC3, 0xE0A3FBC3, 0xE0A4FBC3, + 0xE0A5FBC3, 0xE0A6FBC3, 0xE0A7FBC3, 0xE0A8FBC3, 0xE0A9FBC3, 0xE0AAFBC3, 0xE0ABFBC3, 0xE0ACFBC3, 0xE0ADFBC3, 0xE0AEFBC3, 0xE0AFFBC3, 0xE0B0FBC3, 0xE0B1FBC3, 0xE0B2FBC3, 0xE0B3FBC3, + 0xE0B4FBC3, 0xE0B5FBC3, 0xE0B6FBC3, 0xE0B7FBC3, 0xE0B8FBC3, 0xE0B9FBC3, 0xE0BAFBC3, 0xE0BBFBC3, 0xE0BCFBC3, 0xE0BDFBC3, 0xE0BEFBC3, 0xE0BFFBC3, 0xE0C0FBC3, 0xE0C1FBC3, 0xE0C2FBC3, + 0xE0C3FBC3, 0xE0C4FBC3, 0xE0C5FBC3, 0xE0C6FBC3, 0xE0C7FBC3, 0xE0C8FBC3, 0xE0C9FBC3, 0xE0CAFBC3, 0xE0CBFBC3, 0xE0CCFBC3, 0xE0CDFBC3, 0xE0CEFBC3, 0xE0CFFBC3, 0xE0D0FBC3, 0xE0D1FBC3, + 0xE0D2FBC3, 0xE0D3FBC3, 0xE0D4FBC3, 0xE0D5FBC3, 0xE0D6FBC3, 0xE0D7FBC3, 0xE0D8FBC3, 0xE0D9FBC3, 0xE0DAFBC3, 0xE0DBFBC3, 0xE0DCFBC3, 0xE0DDFBC3, 0xE0DEFBC3, 0xE0DFFBC3, 0xE0E0FBC3, + 0xE0E1FBC3, 0xE0E2FBC3, 0xE0E3FBC3, 0xE0E4FBC3, 0xE0E5FBC3, 0xE0E6FBC3, 0xE0E7FBC3, 0xE0E8FBC3, 0xE0E9FBC3, 0xE0EAFBC3, 0xE0EBFBC3, 0xE0ECFBC3, 0xE0EDFBC3, 0xE0EEFBC3, 0xE0EFFBC3, + 0xE0F0FBC3, 0xE0F1FBC3, 0xE0F2FBC3, 0xE0F3FBC3, 0xE0F4FBC3, 0xE0F5FBC3, 0xE0F6FBC3, 0xE0F7FBC3, 0xE0F8FBC3, 0xE0F9FBC3, 0xE0FAFBC3, 0xE0FBFBC3, 0xE0FCFBC3, 0xE0FDFBC3, 0xE0FEFBC3, + 0xE0FFFBC3, 0xE100FBC3, 0xE101FBC3, 0xE102FBC3, 0xE103FBC3, 0xE104FBC3, 0xE105FBC3, 0xE106FBC3, 0xE107FBC3, 0xE108FBC3, 0xE109FBC3, 0xE10AFBC3, 0xE10BFBC3, 0xE10CFBC3, 0xE10DFBC3, + 0xE10EFBC3, 0xE10FFBC3, 0xE110FBC3, 0xE111FBC3, 0xE112FBC3, 0xE113FBC3, 0xE114FBC3, 0xE115FBC3, 0xE116FBC3, 0xE117FBC3, 0xE118FBC3, 0xE119FBC3, 0xE11AFBC3, 0xE11BFBC3, 0xE11CFBC3, + 0xE11DFBC3, 0xE11EFBC3, 0xE11FFBC3, 0xE120FBC3, 0xE121FBC3, 0xE122FBC3, 0xE123FBC3, 0xE124FBC3, 0xE125FBC3, 0xE126FBC3, 0xE127FBC3, 0xE128FBC3, 0xE129FBC3, 0xE12AFBC3, 0xE12BFBC3, + 0xE12CFBC3, 0xE12DFBC3, 0xE12EFBC3, 0xE12FFBC3, 0xE130FBC3, 0xE131FBC3, 0xE132FBC3, 0xE133FBC3, 0xE134FBC3, 0xE135FBC3, 0xE136FBC3, 0xE137FBC3, 0xE138FBC3, 0xE139FBC3, 0xE13AFBC3, + 0xE13BFBC3, 0xE13CFBC3, 0xE13DFBC3, 0xE13EFBC3, 0xE13FFBC3, 0xE140FBC3, 0xE141FBC3, 0xE142FBC3, 0xE143FBC3, 0xE144FBC3, 0xE145FBC3, 0xE146FBC3, 0xE147FBC3, 0xE148FBC3, 0xE149FBC3, + 0xE14AFBC3, 0xE14BFBC3, 0xE14CFBC3, 0xE14DFBC3, 0xE14EFBC3, 0xE14FFBC3, 0xE150FBC3, 0xE151FBC3, 0xE152FBC3, 0xE153FBC3, 0xE154FBC3, 0xE155FBC3, 0xE156FBC3, 0xE157FBC3, 0xE158FBC3, + 0xE159FBC3, 0xE15AFBC3, 0xE15BFBC3, 0xE15CFBC3, 0xE15DFBC3, 0xE15EFBC3, 0xE15FFBC3, 0xE160FBC3, 0xE161FBC3, 0xE162FBC3, 0xE163FBC3, 0xE164FBC3, 0xE165FBC3, 0xE166FBC3, 0xE167FBC3, + 0xE168FBC3, 0xE169FBC3, 0xE16AFBC3, 0xE16BFBC3, 0xE16CFBC3, 0xE16DFBC3, 0xE16EFBC3, 0xE16FFBC3, 0xE170FBC3, 0xE171FBC3, 0xE172FBC3, 0xE173FBC3, 0xE174FBC3, 0xE175FBC3, 0xE176FBC3, + 0xE177FBC3, 0xE178FBC3, 0xE179FBC3, 0xE17AFBC3, 0xE17BFBC3, 0xE17CFBC3, 0xE17DFBC3, 0xE17EFBC3, 0xE17FFBC3, 0xE180FBC3, 0xE181FBC3, 0xE182FBC3, 0xE183FBC3, 0xE184FBC3, 0xE185FBC3, + 0xE186FBC3, 0xE187FBC3, 0xE188FBC3, 0xE189FBC3, 0xE18AFBC3, 0xE18BFBC3, 0xE18CFBC3, 0xE18DFBC3, 0xE18EFBC3, 0xE18FFBC3, 0xE190FBC3, 0xE191FBC3, 0xE192FBC3, 0xE193FBC3, 0xE194FBC3, + 0xE195FBC3, 0xE196FBC3, 0xE197FBC3, 0xE198FBC3, 0xE199FBC3, 0xE19AFBC3, 0xE19BFBC3, 0xE19CFBC3, 0xE19DFBC3, 0xE19EFBC3, 0xE19FFBC3, 0xE1A0FBC3, 0xE1A1FBC3, 0xE1A2FBC3, 0xE1A3FBC3, + 0xE1A4FBC3, 0xE1A5FBC3, 0xE1A6FBC3, 0xE1A7FBC3, 0xE1A8FBC3, 0xE1A9FBC3, 0xE1AAFBC3, 0xE1ABFBC3, 0xE1ACFBC3, 0xE1ADFBC3, 0xE1AEFBC3, 0xE1AFFBC3, 0xE1B0FBC3, 0xE1B1FBC3, 0xE1B2FBC3, + 0xE1B3FBC3, 0xE1B4FBC3, 0xE1B5FBC3, 0xE1B6FBC3, 0xE1B7FBC3, 0xE1B8FBC3, 0xE1B9FBC3, 0xE1BAFBC3, 0xE1BBFBC3, 0xE1BCFBC3, 0xE1BDFBC3, 0xE1BEFBC3, 0xE1BFFBC3, 0xE1C0FBC3, 0xE1C1FBC3, + 0xE1C2FBC3, 0xE1C3FBC3, 0xE1C4FBC3, 0xE1C5FBC3, 0xE1C6FBC3, 0xE1C7FBC3, 0xE1C8FBC3, 0xE1C9FBC3, 0xE1CAFBC3, 0xE1CBFBC3, 0xE1CCFBC3, 0xE1CDFBC3, 0xE1CEFBC3, 0xE1CFFBC3, 0xE1D0FBC3, + 0xE1D1FBC3, 0xE1D2FBC3, 0xE1D3FBC3, 0xE1D4FBC3, 0xE1D5FBC3, 0xE1D6FBC3, 0xE1D7FBC3, 0xE1D8FBC3, 0xE1D9FBC3, 0xE1DAFBC3, 0xE1DBFBC3, 0xE1DCFBC3, 0xE1DDFBC3, 0xE1DEFBC3, 0xE1DFFBC3, + 0xE1E0FBC3, 0xE1E1FBC3, 0xE1E2FBC3, 0xE1E3FBC3, 0xE1E4FBC3, 0xE1E5FBC3, 0xE1E6FBC3, 0xE1E7FBC3, 0xE1E8FBC3, 0xE1E9FBC3, 0xE1EAFBC3, 0xE1EBFBC3, 0xE1ECFBC3, 0xE1EDFBC3, 0xE1EEFBC3, + 0xE1EFFBC3, 0xE1F0FBC3, 0xE1F1FBC3, 0xE1F2FBC3, 0xE1F3FBC3, 0xE1F4FBC3, 0xE1F5FBC3, 0xE1F6FBC3, 0xE1F7FBC3, 0xE1F8FBC3, 0xE1F9FBC3, 0xE1FAFBC3, 0xE1FBFBC3, 0xE1FCFBC3, 0xE1FDFBC3, + 0xE1FEFBC3, 0xE1FFFBC3, 0xE200FBC3, 0xE201FBC3, 0xE202FBC3, 0xE203FBC3, 0xE204FBC3, 0xE205FBC3, 0xE206FBC3, 0xE207FBC3, 0xE208FBC3, 0xE209FBC3, 0xE20AFBC3, 0xE20BFBC3, 0xE20CFBC3, + 0xE20DFBC3, 0xE20EFBC3, 0xE20FFBC3, 0xE210FBC3, 0xE211FBC3, 0xE212FBC3, 0xE213FBC3, 0xE214FBC3, 0xE215FBC3, 0xE216FBC3, 0xE217FBC3, 0xE218FBC3, 0xE219FBC3, 0xE21AFBC3, 0xE21BFBC3, + 0xE21CFBC3, 0xE21DFBC3, 0xE21EFBC3, 0xE21FFBC3, 0xE220FBC3, 0xE221FBC3, 0xE222FBC3, 0xE223FBC3, 0xE224FBC3, 0xE225FBC3, 0xE226FBC3, 0xE227FBC3, 0xE228FBC3, 0xE229FBC3, 0xE22AFBC3, + 0xE22BFBC3, 0xE22CFBC3, 0xE22DFBC3, 0xE22EFBC3, 0xE22FFBC3, 0xE230FBC3, 0xE231FBC3, 0xE232FBC3, 0xE233FBC3, 0xE234FBC3, 0xE235FBC3, 0xE236FBC3, 0xE237FBC3, 0xE238FBC3, 0xE239FBC3, + 0xE23AFBC3, 0xE23BFBC3, 0xE23CFBC3, 0xE23DFBC3, 0xE23EFBC3, 0xE23FFBC3, 0xE240FBC3, 0xE241FBC3, 0xE242FBC3, 0xE243FBC3, 0xE244FBC3, 0xE245FBC3, 0xE246FBC3, 0xE247FBC3, 0xE248FBC3, + 0xE249FBC3, 0xE24AFBC3, 0xE24BFBC3, 0xE24CFBC3, 0xE24DFBC3, 0xE24EFBC3, 0xE24FFBC3, 0xE250FBC3, 0xE251FBC3, 0xE252FBC3, 0xE253FBC3, 0xE254FBC3, 0xE255FBC3, 0xE256FBC3, 0xE257FBC3, + 0xE258FBC3, 0xE259FBC3, 0xE25AFBC3, 0xE25BFBC3, 0xE25CFBC3, 0xE25DFBC3, 0xE25EFBC3, 0xE25FFBC3, 0xE260FBC3, 0xE261FBC3, 0xE262FBC3, 0xE263FBC3, 0xE264FBC3, 0xE265FBC3, 0xE266FBC3, + 0xE267FBC3, 0xE268FBC3, 0xE269FBC3, 0xE26AFBC3, 0xE26BFBC3, 0xE26CFBC3, 0xE26DFBC3, 0xE26EFBC3, 0xE26FFBC3, 0xE270FBC3, 0xE271FBC3, 0xE272FBC3, 0xE273FBC3, 0xE274FBC3, 0xE275FBC3, + 0xE276FBC3, 0xE277FBC3, 0xE278FBC3, 0xE279FBC3, 0xE27AFBC3, 0xE27BFBC3, 0xE27CFBC3, 0xE27DFBC3, 0xE27EFBC3, 0xE27FFBC3, 0xE280FBC3, 0xE281FBC3, 0xE282FBC3, 0xE283FBC3, 0xE284FBC3, + 0xE285FBC3, 0xE286FBC3, 0xE287FBC3, 0xE288FBC3, 0xE289FBC3, 0xE28AFBC3, 0xE28BFBC3, 0xE28CFBC3, 0xE28DFBC3, 0xE28EFBC3, 0xE28FFBC3, 0xE290FBC3, 0xE291FBC3, 0xE292FBC3, 0xE293FBC3, + 0xE294FBC3, 0xE295FBC3, 0xE296FBC3, 0xE297FBC3, 0xE298FBC3, 0xE299FBC3, 0xE29AFBC3, 0xE29BFBC3, 0xE29CFBC3, 0xE29DFBC3, 0xE29EFBC3, 0xE29FFBC3, 0xE2A0FBC3, 0xE2A1FBC3, 0xE2A2FBC3, + 0xE2A3FBC3, 0xE2A4FBC3, 0xE2A5FBC3, 0xE2A6FBC3, 0xE2A7FBC3, 0xE2A8FBC3, 0xE2A9FBC3, 0xE2AAFBC3, 0xE2ABFBC3, 0xE2ACFBC3, 0xE2ADFBC3, 0xE2AEFBC3, 0xE2AFFBC3, 0xE2B0FBC3, 0xE2B1FBC3, + 0xE2B2FBC3, 0xE2B3FBC3, 0xE2B4FBC3, 0xE2B5FBC3, 0xE2B6FBC3, 0xE2B7FBC3, 0xE2B8FBC3, 0xE2B9FBC3, 0xE2BAFBC3, 0xE2BBFBC3, 0xE2BCFBC3, 0xE2BDFBC3, 0xE2BEFBC3, 0xE2BFFBC3, 0xE2C0FBC3, + 0xE2C1FBC3, 0xE2C2FBC3, 0xE2C3FBC3, 0xE2C4FBC3, 0xE2C5FBC3, 0xE2C6FBC3, 0xE2C7FBC3, 0xE2C8FBC3, 0xE2C9FBC3, 0xE2CAFBC3, 0xE2CBFBC3, 0xE2CCFBC3, 0xE2CDFBC3, 0xE2CEFBC3, 0xE2CFFBC3, + 0xE2D0FBC3, 0xE2D1FBC3, 0xE2D2FBC3, 0xE2D3FBC3, 0xE2D4FBC3, 0xE2D5FBC3, 0xE2D6FBC3, 0xE2D7FBC3, 0xE2D8FBC3, 0xE2D9FBC3, 0xE2DAFBC3, 0xE2DBFBC3, 0xE2DCFBC3, 0xE2DDFBC3, 0xE2DEFBC3, + 0xE2DFFBC3, 0xE2E0FBC3, 0xE2E1FBC3, 0xE2E2FBC3, 0xE2E3FBC3, 0xE2E4FBC3, 0xE2E5FBC3, 0xE2E6FBC3, 0xE2E7FBC3, 0xE2E8FBC3, 0xE2E9FBC3, 0xE2EAFBC3, 0xE2EBFBC3, 0xE2ECFBC3, 0xE2EDFBC3, + 0xE2EEFBC3, 0xE2EFFBC3, 0xE2F0FBC3, 0xE2F1FBC3, 0xE2F2FBC3, 0xE2F3FBC3, 0xE2F4FBC3, 0xE2F5FBC3, 0xE2F6FBC3, 0xE2F7FBC3, 0xE2F8FBC3, 0xE2F9FBC3, 0xE2FAFBC3, 0xE2FBFBC3, 0xE2FCFBC3, + 0xE2FDFBC3, 0xE2FEFBC3, 0xE2FFFBC3, 0xE300FBC3, 0xE301FBC3, 0xE302FBC3, 0xE303FBC3, 0xE304FBC3, 0xE305FBC3, 0xE306FBC3, 0xE307FBC3, 0xE308FBC3, 0xE309FBC3, 0xE30AFBC3, 0xE30BFBC3, + 0xE30CFBC3, 0xE30DFBC3, 0xE30EFBC3, 0xE30FFBC3, 0xE310FBC3, 0xE311FBC3, 0xE312FBC3, 0xE313FBC3, 0xE314FBC3, 0xE315FBC3, 0xE316FBC3, 0xE317FBC3, 0xE318FBC3, 0xE319FBC3, 0xE31AFBC3, + 0xE31BFBC3, 0xE31CFBC3, 0xE31DFBC3, 0xE31EFBC3, 0xE31FFBC3, 0xE320FBC3, 0xE321FBC3, 0xE322FBC3, 0xE323FBC3, 0xE324FBC3, 0xE325FBC3, 0xE326FBC3, 0xE327FBC3, 0xE328FBC3, 0xE329FBC3, + 0xE32AFBC3, 0xE32BFBC3, 0xE32CFBC3, 0xE32DFBC3, 0xE32EFBC3, 0xE32FFBC3, 0xE330FBC3, 0xE331FBC3, 0xE332FBC3, 0xE333FBC3, 0xE334FBC3, 0xE335FBC3, 0xE336FBC3, 0xE337FBC3, 0xE338FBC3, + 0xE339FBC3, 0xE33AFBC3, 0xE33BFBC3, 0xE33CFBC3, 0xE33DFBC3, 0xE33EFBC3, 0xE33FFBC3, 0xE340FBC3, 0xE341FBC3, 0xE342FBC3, 0xE343FBC3, 0xE344FBC3, 0xE345FBC3, 0xE346FBC3, 0xE347FBC3, + 0xE348FBC3, 0xE349FBC3, 0xE34AFBC3, 0xE34BFBC3, 0xE34CFBC3, 0xE34DFBC3, 0xE34EFBC3, 0xE34FFBC3, 0xE350FBC3, 0xE351FBC3, 0xE352FBC3, 0xE353FBC3, 0xE354FBC3, 0xE355FBC3, 0xE356FBC3, + 0xE357FBC3, 0xE358FBC3, 0xE359FBC3, 0xE35AFBC3, 0xE35BFBC3, 0xE35CFBC3, 0xE35DFBC3, 0xE35EFBC3, 0xE35FFBC3, 0xE360FBC3, 0xE361FBC3, 0xE362FBC3, 0xE363FBC3, 0xE364FBC3, 0xE365FBC3, + 0xE366FBC3, 0xE367FBC3, 0xE368FBC3, 0xE369FBC3, 0xE36AFBC3, 0xE36BFBC3, 0xE36CFBC3, 0xE36DFBC3, 0xE36EFBC3, 0xE36FFBC3, 0xE370FBC3, 0xE371FBC3, 0xE372FBC3, 0xE373FBC3, 0xE374FBC3, + 0xE375FBC3, 0xE376FBC3, 0xE377FBC3, 0xE378FBC3, 0xE379FBC3, 0xE37AFBC3, 0xE37BFBC3, 0xE37CFBC3, 0xE37DFBC3, 0xE37EFBC3, 0xE37FFBC3, 0xE380FBC3, 0xE381FBC3, 0xE382FBC3, 0xE383FBC3, + 0xE384FBC3, 0xE385FBC3, 0xE386FBC3, 0xE387FBC3, 0xE388FBC3, 0xE389FBC3, 0xE38AFBC3, 0xE38BFBC3, 0xE38CFBC3, 0xE38DFBC3, 0xE38EFBC3, 0xE38FFBC3, 0xE390FBC3, 0xE391FBC3, 0xE392FBC3, + 0xE393FBC3, 0xE394FBC3, 0xE395FBC3, 0xE396FBC3, 0xE397FBC3, 0xE398FBC3, 0xE399FBC3, 0xE39AFBC3, 0xE39BFBC3, 0xE39CFBC3, 0xE39DFBC3, 0xE39EFBC3, 0xE39FFBC3, 0xE3A0FBC3, 0xE3A1FBC3, + 0xE3A2FBC3, 0xE3A3FBC3, 0xE3A4FBC3, 0xE3A5FBC3, 0xE3A6FBC3, 0xE3A7FBC3, 0xE3A8FBC3, 0xE3A9FBC3, 0xE3AAFBC3, 0xE3ABFBC3, 0xE3ACFBC3, 0xE3ADFBC3, 0xE3AEFBC3, 0xE3AFFBC3, 0xE3B0FBC3, + 0xE3B1FBC3, 0xE3B2FBC3, 0xE3B3FBC3, 0xE3B4FBC3, 0xE3B5FBC3, 0xE3B6FBC3, 0xE3B7FBC3, 0xE3B8FBC3, 0xE3B9FBC3, 0xE3BAFBC3, 0xE3BBFBC3, 0xE3BCFBC3, 0xE3BDFBC3, 0xE3BEFBC3, 0xE3BFFBC3, + 0xE3C0FBC3, 0xE3C1FBC3, 0xE3C2FBC3, 0xE3C3FBC3, 0xE3C4FBC3, 0xE3C5FBC3, 0xE3C6FBC3, 0xE3C7FBC3, 0xE3C8FBC3, 0xE3C9FBC3, 0xE3CAFBC3, 0xE3CBFBC3, 0xE3CCFBC3, 0xE3CDFBC3, 0xE3CEFBC3, + 0xE3CFFBC3, 0xE3D0FBC3, 0xE3D1FBC3, 0xE3D2FBC3, 0xE3D3FBC3, 0xE3D4FBC3, 0xE3D5FBC3, 0xE3D6FBC3, 0xE3D7FBC3, 0xE3D8FBC3, 0xE3D9FBC3, 0xE3DAFBC3, 0xE3DBFBC3, 0xE3DCFBC3, 0xE3DDFBC3, + 0xE3DEFBC3, 0xE3DFFBC3, 0xE3E0FBC3, 0xE3E1FBC3, 0xE3E2FBC3, 0xE3E3FBC3, 0xE3E4FBC3, 0xE3E5FBC3, 0xE3E6FBC3, 0xE3E7FBC3, 0xE3E8FBC3, 0xE3E9FBC3, 0xE3EAFBC3, 0xE3EBFBC3, 0xE3ECFBC3, + 0xE3EDFBC3, 0xE3EEFBC3, 0xE3EFFBC3, 0xE3F0FBC3, 0xE3F1FBC3, 0xE3F2FBC3, 0xE3F3FBC3, 0xE3F4FBC3, 0xE3F5FBC3, 0xE3F6FBC3, 0xE3F7FBC3, 0xE3F8FBC3, 0xE3F9FBC3, 0xE3FAFBC3, 0xE3FBFBC3, + 0xE3FCFBC3, 0xE3FDFBC3, 0xE3FEFBC3, 0xE3FFFBC3, 0xE400FBC3, 0xE401FBC3, 0xE402FBC3, 0xE403FBC3, 0xE404FBC3, 0xE405FBC3, 0xE406FBC3, 0xE407FBC3, 0xE408FBC3, 0xE409FBC3, 0xE40AFBC3, + 0xE40BFBC3, 0xE40CFBC3, 0xE40DFBC3, 0xE40EFBC3, 0xE40FFBC3, 0xE410FBC3, 0xE411FBC3, 0xE412FBC3, 0xE413FBC3, 0xE414FBC3, 0xE415FBC3, 0xE416FBC3, 0xE417FBC3, 0xE418FBC3, 0xE419FBC3, + 0xE41AFBC3, 0xE41BFBC3, 0xE41CFBC3, 0xE41DFBC3, 0xE41EFBC3, 0xE41FFBC3, 0xE420FBC3, 0xE421FBC3, 0xE422FBC3, 0xE423FBC3, 0xE424FBC3, 0xE425FBC3, 0xE426FBC3, 0xE427FBC3, 0xE428FBC3, + 0xE429FBC3, 0xE42AFBC3, 0xE42BFBC3, 0xE42CFBC3, 0xE42DFBC3, 0xE42EFBC3, 0xE42FFBC3, 0xE430FBC3, 0xE431FBC3, 0xE432FBC3, 0xE433FBC3, 0xE434FBC3, 0xE435FBC3, 0xE436FBC3, 0xE437FBC3, + 0xE438FBC3, 0xE439FBC3, 0xE43AFBC3, 0xE43BFBC3, 0xE43CFBC3, 0xE43DFBC3, 0xE43EFBC3, 0xE43FFBC3, 0xE440FBC3, 0xE441FBC3, 0xE442FBC3, 0xE443FBC3, 0xE444FBC3, 0xE445FBC3, 0xE446FBC3, + 0xE447FBC3, 0xE448FBC3, 0xE449FBC3, 0xE44AFBC3, 0xE44BFBC3, 0xE44CFBC3, 0xE44DFBC3, 0xE44EFBC3, 0xE44FFBC3, 0xE450FBC3, 0xE451FBC3, 0xE452FBC3, 0xE453FBC3, 0xE454FBC3, 0xE455FBC3, + 0xE456FBC3, 0xE457FBC3, 0xE458FBC3, 0xE459FBC3, 0xE45AFBC3, 0xE45BFBC3, 0xE45CFBC3, 0xE45DFBC3, 0xE45EFBC3, 0xE45FFBC3, 0xE460FBC3, 0xE461FBC3, 0xE462FBC3, 0xE463FBC3, 0xE464FBC3, + 0xE465FBC3, 0xE466FBC3, 0xE467FBC3, 0xE468FBC3, 0xE469FBC3, 0xE46AFBC3, 0xE46BFBC3, 0xE46CFBC3, 0xE46DFBC3, 0xE46EFBC3, 0xE46FFBC3, 0xE470FBC3, 0xE471FBC3, 0xE472FBC3, 0xE473FBC3, + 0xE474FBC3, 0xE475FBC3, 0xE476FBC3, 0xE477FBC3, 0xE478FBC3, 0xE479FBC3, 0xE47AFBC3, 0xE47BFBC3, 0xE47CFBC3, 0xE47DFBC3, 0xE47EFBC3, 0xE47FFBC3, 0xE480FBC3, 0xE481FBC3, 0xE482FBC3, + 0xE483FBC3, 0xE484FBC3, 0xE485FBC3, 0xE486FBC3, 0xE487FBC3, 0xE488FBC3, 0xE489FBC3, 0xE48AFBC3, 0xE48BFBC3, 0xE48CFBC3, 0xE48DFBC3, 0xE48EFBC3, 0xE48FFBC3, 0xE490FBC3, 0xE491FBC3, + 0xE492FBC3, 0xE493FBC3, 0xE494FBC3, 0xE495FBC3, 0xE496FBC3, 0xE497FBC3, 0xE498FBC3, 0xE499FBC3, 0xE49AFBC3, 0xE49BFBC3, 0xE49CFBC3, 0xE49DFBC3, 0xE49EFBC3, 0xE49FFBC3, 0xE4A0FBC3, + 0xE4A1FBC3, 0xE4A2FBC3, 0xE4A3FBC3, 0xE4A4FBC3, 0xE4A5FBC3, 0xE4A6FBC3, 0xE4A7FBC3, 0xE4A8FBC3, 0xE4A9FBC3, 0xE4AAFBC3, 0xE4ABFBC3, 0xE4ACFBC3, 0xE4ADFBC3, 0xE4AEFBC3, 0xE4AFFBC3, + 0xE4B0FBC3, 0xE4B1FBC3, 0xE4B2FBC3, 0xE4B3FBC3, 0xE4B4FBC3, 0xE4B5FBC3, 0xE4B6FBC3, 0xE4B7FBC3, 0xE4B8FBC3, 0xE4B9FBC3, 0xE4BAFBC3, 0xE4BBFBC3, 0xE4BCFBC3, 0xE4BDFBC3, 0xE4BEFBC3, + 0xE4BFFBC3, 0xE4C0FBC3, 0xE4C1FBC3, 0xE4C2FBC3, 0xE4C3FBC3, 0xE4C4FBC3, 0xE4C5FBC3, 0xE4C6FBC3, 0xE4C7FBC3, 0xE4C8FBC3, 0xE4C9FBC3, 0xE4CAFBC3, 0xE4CBFBC3, 0xE4CCFBC3, 0xE4CDFBC3, + 0xE4CEFBC3, 0xE4CFFBC3, 0xE4D0FBC3, 0xE4D1FBC3, 0xE4D2FBC3, 0xE4D3FBC3, 0xE4D4FBC3, 0xE4D5FBC3, 0xE4D6FBC3, 0xE4D7FBC3, 0xE4D8FBC3, 0xE4D9FBC3, 0xE4DAFBC3, 0xE4DBFBC3, 0xE4DCFBC3, + 0xE4DDFBC3, 0xE4DEFBC3, 0xE4DFFBC3, 0xE4E0FBC3, 0xE4E1FBC3, 0xE4E2FBC3, 0xE4E3FBC3, 0xE4E4FBC3, 0xE4E5FBC3, 0xE4E6FBC3, 0xE4E7FBC3, 0xE4E8FBC3, 0xE4E9FBC3, 0xE4EAFBC3, 0xE4EBFBC3, + 0xE4ECFBC3, 0xE4EDFBC3, 0xE4EEFBC3, 0xE4EFFBC3, 0xE4F0FBC3, 0xE4F1FBC3, 0xE4F2FBC3, 0xE4F3FBC3, 0xE4F4FBC3, 0xE4F5FBC3, 0xE4F6FBC3, 0xE4F7FBC3, 0xE4F8FBC3, 0xE4F9FBC3, 0xE4FAFBC3, + 0xE4FBFBC3, 0xE4FCFBC3, 0xE4FDFBC3, 0xE4FEFBC3, 0xE4FFFBC3, 0xE500FBC3, 0xE501FBC3, 0xE502FBC3, 0xE503FBC3, 0xE504FBC3, 0xE505FBC3, 0xE506FBC3, 0xE507FBC3, 0xE508FBC3, 0xE509FBC3, + 0xE50AFBC3, 0xE50BFBC3, 0xE50CFBC3, 0xE50DFBC3, 0xE50EFBC3, 0xE50FFBC3, 0xE510FBC3, 0xE511FBC3, 0xE512FBC3, 0xE513FBC3, 0xE514FBC3, 0xE515FBC3, 0xE516FBC3, 0xE517FBC3, 0xE518FBC3, + 0xE519FBC3, 0xE51AFBC3, 0xE51BFBC3, 0xE51CFBC3, 0xE51DFBC3, 0xE51EFBC3, 0xE51FFBC3, 0xE520FBC3, 0xE521FBC3, 0xE522FBC3, 0xE523FBC3, 0xE524FBC3, 0xE525FBC3, 0xE526FBC3, 0xE527FBC3, + 0xE528FBC3, 0xE529FBC3, 0xE52AFBC3, 0xE52BFBC3, 0xE52CFBC3, 0xE52DFBC3, 0xE52EFBC3, 0xE52FFBC3, 0xE530FBC3, 0xE531FBC3, 0xE532FBC3, 0xE533FBC3, 0xE534FBC3, 0xE535FBC3, 0xE536FBC3, + 0xE537FBC3, 0xE538FBC3, 0xE539FBC3, 0xE53AFBC3, 0xE53BFBC3, 0xE53CFBC3, 0xE53DFBC3, 0xE53EFBC3, 0xE53FFBC3, 0xE540FBC3, 0xE541FBC3, 0xE542FBC3, 0xE543FBC3, 0xE544FBC3, 0xE545FBC3, + 0xE546FBC3, 0xE547FBC3, 0xE548FBC3, 0xE549FBC3, 0xE54AFBC3, 0xE54BFBC3, 0xE54CFBC3, 0xE54DFBC3, 0xE54EFBC3, 0xE54FFBC3, 0xE550FBC3, 0xE551FBC3, 0xE552FBC3, 0xE553FBC3, 0xE554FBC3, + 0xE555FBC3, 0xE556FBC3, 0xE557FBC3, 0xE558FBC3, 0xE559FBC3, 0xE55AFBC3, 0xE55BFBC3, 0xE55CFBC3, 0xE55DFBC3, 0xE55EFBC3, 0xE55FFBC3, 0xE560FBC3, 0xE561FBC3, 0xE562FBC3, 0xE563FBC3, + 0xE564FBC3, 0xE565FBC3, 0xE566FBC3, 0xE567FBC3, 0xE568FBC3, 0xE569FBC3, 0xE56AFBC3, 0xE56BFBC3, 0xE56CFBC3, 0xE56DFBC3, 0xE56EFBC3, 0xE56FFBC3, 0xE570FBC3, 0xE571FBC3, 0xE572FBC3, + 0xE573FBC3, 0xE574FBC3, 0xE575FBC3, 0xE576FBC3, 0xE577FBC3, 0xE578FBC3, 0xE579FBC3, 0xE57AFBC3, 0xE57BFBC3, 0xE57CFBC3, 0xE57DFBC3, 0xE57EFBC3, 0xE57FFBC3, 0xE580FBC3, 0xE581FBC3, + 0xE582FBC3, 0xE583FBC3, 0xE584FBC3, 0xE585FBC3, 0xE586FBC3, 0xE587FBC3, 0xE588FBC3, 0xE589FBC3, 0xE58AFBC3, 0xE58BFBC3, 0xE58CFBC3, 0xE58DFBC3, 0xE58EFBC3, 0xE58FFBC3, 0xE590FBC3, + 0xE591FBC3, 0xE592FBC3, 0xE593FBC3, 0xE594FBC3, 0xE595FBC3, 0xE596FBC3, 0xE597FBC3, 0xE598FBC3, 0xE599FBC3, 0xE59AFBC3, 0xE59BFBC3, 0xE59CFBC3, 0xE59DFBC3, 0xE59EFBC3, 0xE59FFBC3, + 0xE5A0FBC3, 0xE5A1FBC3, 0xE5A2FBC3, 0xE5A3FBC3, 0xE5A4FBC3, 0xE5A5FBC3, 0xE5A6FBC3, 0xE5A7FBC3, 0xE5A8FBC3, 0xE5A9FBC3, 0xE5AAFBC3, 0xE5ABFBC3, 0xE5ACFBC3, 0xE5ADFBC3, 0xE5AEFBC3, + 0xE5AFFBC3, 0xE5B0FBC3, 0xE5B1FBC3, 0xE5B2FBC3, 0xE5B3FBC3, 0xE5B4FBC3, 0xE5B5FBC3, 0xE5B6FBC3, 0xE5B7FBC3, 0xE5B8FBC3, 0xE5B9FBC3, 0xE5BAFBC3, 0xE5BBFBC3, 0xE5BCFBC3, 0xE5BDFBC3, + 0xE5BEFBC3, 0xE5BFFBC3, 0xE5C0FBC3, 0xE5C1FBC3, 0xE5C2FBC3, 0xE5C3FBC3, 0xE5C4FBC3, 0xE5C5FBC3, 0xE5C6FBC3, 0xE5C7FBC3, 0xE5C8FBC3, 0xE5C9FBC3, 0xE5CAFBC3, 0xE5CBFBC3, 0xE5CCFBC3, + 0xE5CDFBC3, 0xE5CEFBC3, 0xE5CFFBC3, 0xE5D0FBC3, 0xE5D1FBC3, 0xE5D2FBC3, 0xE5D3FBC3, 0xE5D4FBC3, 0xE5D5FBC3, 0xE5D6FBC3, 0xE5D7FBC3, 0xE5D8FBC3, 0xE5D9FBC3, 0xE5DAFBC3, 0xE5DBFBC3, + 0xE5DCFBC3, 0xE5DDFBC3, 0xE5DEFBC3, 0xE5DFFBC3, 0xE5E0FBC3, 0xE5E1FBC3, 0xE5E2FBC3, 0xE5E3FBC3, 0xE5E4FBC3, 0xE5E5FBC3, 0xE5E6FBC3, 0xE5E7FBC3, 0xE5E8FBC3, 0xE5E9FBC3, 0xE5EAFBC3, + 0xE5EBFBC3, 0xE5ECFBC3, 0xE5EDFBC3, 0xE5EEFBC3, 0xE5EFFBC3, 0xE5F0FBC3, 0xE5F1FBC3, 0xE5F2FBC3, 0xE5F3FBC3, 0xE5F4FBC3, 0xE5F5FBC3, 0xE5F6FBC3, 0xE5F7FBC3, 0xE5F8FBC3, 0xE5F9FBC3, + 0xE5FAFBC3, 0xE5FBFBC3, 0xE5FCFBC3, 0xE5FDFBC3, 0xE5FEFBC3, 0xE5FFFBC3, 0xE600FBC3, 0xE601FBC3, 0xE602FBC3, 0xE603FBC3, 0xE604FBC3, 0xE605FBC3, 0xE606FBC3, 0xE607FBC3, 0xE608FBC3, + 0xE609FBC3, 0xE60AFBC3, 0xE60BFBC3, 0xE60CFBC3, 0xE60DFBC3, 0xE60EFBC3, 0xE60FFBC3, 0xE610FBC3, 0xE611FBC3, 0xE612FBC3, 0xE613FBC3, 0xE614FBC3, 0xE615FBC3, 0xE616FBC3, 0xE617FBC3, + 0xE618FBC3, 0xE619FBC3, 0xE61AFBC3, 0xE61BFBC3, 0xE61CFBC3, 0xE61DFBC3, 0xE61EFBC3, 0xE61FFBC3, 0xE620FBC3, 0xE621FBC3, 0xE622FBC3, 0xE623FBC3, 0xE624FBC3, 0xE625FBC3, 0xE626FBC3, + 0xE627FBC3, 0xE628FBC3, 0xE629FBC3, 0xE62AFBC3, 0xE62BFBC3, 0xE62CFBC3, 0xE62DFBC3, 0xE62EFBC3, 0xE62FFBC3, 0xE630FBC3, 0xE631FBC3, 0xE632FBC3, 0xE633FBC3, 0xE634FBC3, 0xE635FBC3, + 0xE636FBC3, 0xE637FBC3, 0xE638FBC3, 0xE639FBC3, 0xE63AFBC3, 0xE63BFBC3, 0xE63CFBC3, 0xE63DFBC3, 0xE63EFBC3, 0xE63FFBC3, 0xE640FBC3, 0xE641FBC3, 0xE642FBC3, 0xE643FBC3, 0xE644FBC3, + 0xE645FBC3, 0xE646FBC3, 0xE647FBC3, 0xE648FBC3, 0xE649FBC3, 0xE64AFBC3, 0xE64BFBC3, 0xE64CFBC3, 0xE64DFBC3, 0xE64EFBC3, 0xE64FFBC3, 0xE650FBC3, 0xE651FBC3, 0xE652FBC3, 0xE653FBC3, + 0xE654FBC3, 0xE655FBC3, 0xE656FBC3, 0xE657FBC3, 0xE658FBC3, 0xE659FBC3, 0xE65AFBC3, 0xE65BFBC3, 0xE65CFBC3, 0xE65DFBC3, 0xE65EFBC3, 0xE65FFBC3, 0xE660FBC3, 0xE661FBC3, 0xE662FBC3, + 0xE663FBC3, 0xE664FBC3, 0xE665FBC3, 0xE666FBC3, 0xE667FBC3, 0xE668FBC3, 0xE669FBC3, 0xE66AFBC3, 0xE66BFBC3, 0xE66CFBC3, 0xE66DFBC3, 0xE66EFBC3, 0xE66FFBC3, 0xE670FBC3, 0xE671FBC3, + 0xE672FBC3, 0xE673FBC3, 0xE674FBC3, 0xE675FBC3, 0xE676FBC3, 0xE677FBC3, 0xE678FBC3, 0xE679FBC3, 0xE67AFBC3, 0xE67BFBC3, 0xE67CFBC3, 0xE67DFBC3, 0xE67EFBC3, 0xE67FFBC3, 0xE680FBC3, + 0xE681FBC3, 0xE682FBC3, 0xE683FBC3, 0xE684FBC3, 0xE685FBC3, 0xE686FBC3, 0xE687FBC3, 0xE688FBC3, 0xE689FBC3, 0xE68AFBC3, 0xE68BFBC3, 0xE68CFBC3, 0xE68DFBC3, 0xE68EFBC3, 0xE68FFBC3, + 0xE690FBC3, 0xE691FBC3, 0xE692FBC3, 0xE693FBC3, 0xE694FBC3, 0xE695FBC3, 0xE696FBC3, 0xE697FBC3, 0xE698FBC3, 0xE699FBC3, 0xE69AFBC3, 0xE69BFBC3, 0xE69CFBC3, 0xE69DFBC3, 0xE69EFBC3, + 0xE69FFBC3, 0xE6A0FBC3, 0xE6A1FBC3, 0xE6A2FBC3, 0xE6A3FBC3, 0xE6A4FBC3, 0xE6A5FBC3, 0xE6A6FBC3, 0xE6A7FBC3, 0xE6A8FBC3, 0xE6A9FBC3, 0xE6AAFBC3, 0xE6ABFBC3, 0xE6ACFBC3, 0xE6ADFBC3, + 0xE6AEFBC3, 0xE6AFFBC3, 0xE6B0FBC3, 0xE6B1FBC3, 0xE6B2FBC3, 0xE6B3FBC3, 0xE6B4FBC3, 0xE6B5FBC3, 0xE6B6FBC3, 0xE6B7FBC3, 0xE6B8FBC3, 0xE6B9FBC3, 0xE6BAFBC3, 0xE6BBFBC3, 0xE6BCFBC3, + 0xE6BDFBC3, 0xE6BEFBC3, 0xE6BFFBC3, 0xE6C0FBC3, 0xE6C1FBC3, 0xE6C2FBC3, 0xE6C3FBC3, 0xE6C4FBC3, 0xE6C5FBC3, 0xE6C6FBC3, 0xE6C7FBC3, 0xE6C8FBC3, 0xE6C9FBC3, 0xE6CAFBC3, 0xE6CBFBC3, + 0xE6CCFBC3, 0xE6CDFBC3, 0xE6CEFBC3, 0xE6CFFBC3, 0xE6D0FBC3, 0xE6D1FBC3, 0xE6D2FBC3, 0xE6D3FBC3, 0xE6D4FBC3, 0xE6D5FBC3, 0xE6D6FBC3, 0xE6D7FBC3, 0xE6D8FBC3, 0xE6D9FBC3, 0xE6DAFBC3, + 0xE6DBFBC3, 0xE6DCFBC3, 0xE6DDFBC3, 0xE6DEFBC3, 0xE6DFFBC3, 0xE6E0FBC3, 0xE6E1FBC3, 0xE6E2FBC3, 0xE6E3FBC3, 0xE6E4FBC3, 0xE6E5FBC3, 0xE6E6FBC3, 0xE6E7FBC3, 0xE6E8FBC3, 0xE6E9FBC3, + 0xE6EAFBC3, 0xE6EBFBC3, 0xE6ECFBC3, 0xE6EDFBC3, 0xE6EEFBC3, 0xE6EFFBC3, 0xE6F0FBC3, 0xE6F1FBC3, 0xE6F2FBC3, 0xE6F3FBC3, 0xE6F4FBC3, 0xE6F5FBC3, 0xE6F6FBC3, 0xE6F7FBC3, 0xE6F8FBC3, + 0xE6F9FBC3, 0xE6FAFBC3, 0xE6FBFBC3, 0xE6FCFBC3, 0xE6FDFBC3, 0xE6FEFBC3, 0xE6FFFBC3, 0xE700FBC3, 0xE701FBC3, 0xE702FBC3, 0xE703FBC3, 0xE704FBC3, 0xE705FBC3, 0xE706FBC3, 0xE707FBC3, + 0xE708FBC3, 0xE709FBC3, 0xE70AFBC3, 0xE70BFBC3, 0xE70CFBC3, 0xE70DFBC3, 0xE70EFBC3, 0xE70FFBC3, 0xE710FBC3, 0xE711FBC3, 0xE712FBC3, 0xE713FBC3, 0xE714FBC3, 0xE715FBC3, 0xE716FBC3, + 0xE717FBC3, 0xE718FBC3, 0xE719FBC3, 0xE71AFBC3, 0xE71BFBC3, 0xE71CFBC3, 0xE71DFBC3, 0xE71EFBC3, 0xE71FFBC3, 0xE720FBC3, 0xE721FBC3, 0xE722FBC3, 0xE723FBC3, 0xE724FBC3, 0xE725FBC3, + 0xE726FBC3, 0xE727FBC3, 0xE728FBC3, 0xE729FBC3, 0xE72AFBC3, 0xE72BFBC3, 0xE72CFBC3, 0xE72DFBC3, 0xE72EFBC3, 0xE72FFBC3, 0xE730FBC3, 0xE731FBC3, 0xE732FBC3, 0xE733FBC3, 0xE734FBC3, + 0xE735FBC3, 0xE736FBC3, 0xE737FBC3, 0xE738FBC3, 0xE739FBC3, 0xE73AFBC3, 0xE73BFBC3, 0xE73CFBC3, 0xE73DFBC3, 0xE73EFBC3, 0xE73FFBC3, 0xE740FBC3, 0xE741FBC3, 0xE742FBC3, 0xE743FBC3, + 0xE744FBC3, 0xE745FBC3, 0xE746FBC3, 0xE747FBC3, 0xE748FBC3, 0xE749FBC3, 0xE74AFBC3, 0xE74BFBC3, 0xE74CFBC3, 0xE74DFBC3, 0xE74EFBC3, 0xE74FFBC3, 0xE750FBC3, 0xE751FBC3, 0xE752FBC3, + 0xE753FBC3, 0xE754FBC3, 0xE755FBC3, 0xE756FBC3, 0xE757FBC3, 0xE758FBC3, 0xE759FBC3, 0xE75AFBC3, 0xE75BFBC3, 0xE75CFBC3, 0xE75DFBC3, 0xE75EFBC3, 0xE75FFBC3, 0xE760FBC3, 0xE761FBC3, + 0xE762FBC3, 0xE763FBC3, 0xE764FBC3, 0xE765FBC3, 0xE766FBC3, 0xE767FBC3, 0xE768FBC3, 0xE769FBC3, 0xE76AFBC3, 0xE76BFBC3, 0xE76CFBC3, 0xE76DFBC3, 0xE76EFBC3, 0xE76FFBC3, 0xE770FBC3, + 0xE771FBC3, 0xE772FBC3, 0xE773FBC3, 0xE774FBC3, 0xE775FBC3, 0xE776FBC3, 0xE777FBC3, 0xE778FBC3, 0xE779FBC3, 0xE77AFBC3, 0xE77BFBC3, 0xE77CFBC3, 0xE77DFBC3, 0xE77EFBC3, 0xE77FFBC3, + 0xE780FBC3, 0xE781FBC3, 0xE782FBC3, 0xE783FBC3, 0xE784FBC3, 0xE785FBC3, 0xE786FBC3, 0xE787FBC3, 0xE788FBC3, 0xE789FBC3, 0xE78AFBC3, 0xE78BFBC3, 0xE78CFBC3, 0xE78DFBC3, 0xE78EFBC3, + 0xE78FFBC3, 0xE790FBC3, 0xE791FBC3, 0xE792FBC3, 0xE793FBC3, 0xE794FBC3, 0xE795FBC3, 0xE796FBC3, 0xE797FBC3, 0xE798FBC3, 0xE799FBC3, 0xE79AFBC3, 0xE79BFBC3, 0xE79CFBC3, 0xE79DFBC3, + 0xE79EFBC3, 0xE79FFBC3, 0xE7A0FBC3, 0xE7A1FBC3, 0xE7A2FBC3, 0xE7A3FBC3, 0xE7A4FBC3, 0xE7A5FBC3, 0xE7A6FBC3, 0xE7A7FBC3, 0xE7A8FBC3, 0xE7A9FBC3, 0xE7AAFBC3, 0xE7ABFBC3, 0xE7ACFBC3, + 0xE7ADFBC3, 0xE7AEFBC3, 0xE7AFFBC3, 0xE7B0FBC3, 0xE7B1FBC3, 0xE7B2FBC3, 0xE7B3FBC3, 0xE7B4FBC3, 0xE7B5FBC3, 0xE7B6FBC3, 0xE7B7FBC3, 0xE7B8FBC3, 0xE7B9FBC3, 0xE7BAFBC3, 0xE7BBFBC3, + 0xE7BCFBC3, 0xE7BDFBC3, 0xE7BEFBC3, 0xE7BFFBC3, 0xE7C0FBC3, 0xE7C1FBC3, 0xE7C2FBC3, 0xE7C3FBC3, 0xE7C4FBC3, 0xE7C5FBC3, 0xE7C6FBC3, 0xE7C7FBC3, 0xE7C8FBC3, 0xE7C9FBC3, 0xE7CAFBC3, + 0xE7CBFBC3, 0xE7CCFBC3, 0xE7CDFBC3, 0xE7CEFBC3, 0xE7CFFBC3, 0xE7D0FBC3, 0xE7D1FBC3, 0xE7D2FBC3, 0xE7D3FBC3, 0xE7D4FBC3, 0xE7D5FBC3, 0xE7D6FBC3, 0xE7D7FBC3, 0xE7D8FBC3, 0xE7D9FBC3, + 0xE7DAFBC3, 0xE7DBFBC3, 0xE7DCFBC3, 0xE7DDFBC3, 0xE7DEFBC3, 0xE7DFFBC3, 0xE7E0FBC3, 0xE7E1FBC3, 0xE7E2FBC3, 0xE7E3FBC3, 0xE7E4FBC3, 0xE7E5FBC3, 0xE7E6FBC3, 0xE7E7FBC3, 0xE7E8FBC3, + 0xE7E9FBC3, 0xE7EAFBC3, 0xE7EBFBC3, 0xE7ECFBC3, 0xE7EDFBC3, 0xE7EEFBC3, 0xE7EFFBC3, 0xE7F0FBC3, 0xE7F1FBC3, 0xE7F2FBC3, 0xE7F3FBC3, 0xE7F4FBC3, 0xE7F5FBC3, 0xE7F6FBC3, 0xE7F7FBC3, + 0xE7F8FBC3, 0xE7F9FBC3, 0xE7FAFBC3, 0xE7FBFBC3, 0xE7FCFBC3, 0xE7FDFBC3, 0xE7FEFBC3, 0xE7FFFBC3, 0x3B0E, 0x3B0F, 0x3B10, 0x3B11, 0x3B12, 0x3B13, 0x3B14, + 0x3B15, 0x3B16, 0x3B17, 0x3B18, 0x3B19, 0x3B1A, 0x3B1B, 0x3B1C, 0x3B1D, 0x3B1E, 0x3B1F, 0x3B20, 0x3B21, 0x3B22, 0x3B23, + 0x3B24, 0x3B25, 0x3B26, 0x3B27, 0x3B28, 0x3B29, 0x3B2A, 0x3B2B, 0x3B2C, 0x3B2D, 0x3B2E, 0x3B2F, 0x3B30, 0x3B31, 0x3B32, + 0x3B33, 0x3B34, 0x3B35, 0x3B36, 0x3B37, 0x3B38, 0x3B39, 0x3B3A, 0x3B3B, 0x3B3C, 0x3B3D, 0x3B3E, 0x3B3F, 0x3B40, 0x3B41, + 0x3B42, 0x3B43, 0x3B44, 0x3B45, 0x3B46, 0x3B47, 0x3B48, 0x3B49, 0x3B4A, 0x3B4B, 0x3B4C, 0x3B4D, 0x3B4E, 0x3B4F, 0x3B50, + 0x3B51, 0x3B52, 0x3B53, 0x3B54, 0x3B55, 0x3B56, 0x3B57, 0x3B58, 0x3B59, 0x3B5A, 0x3B5B, 0x3B5C, 0x3B5D, 0x3B5E, 0x3B5F, + 0x3B60, 0x3B61, 0x3B62, 0x3B63, 0x3B64, 0x3B65, 0x3B66, 0x3B67, 0x3B68, 0x3B69, 0x3B6A, 0x3B6B, 0x3B6C, 0x3B6D, 0x3B6E, + 0x3B6F, 0x3B70, 0x3B71, 0x3B72, 0x3B73, 0x3B74, 0x3B75, 0x3B76, 0x3B77, 0x3B78, 0x3B79, 0x3B7A, 0x3B7B, 0x3B7C, 0x3B7D, + 0x3B7E, 0x3B7F, 0x3B80, 0x3B81, 0x3B82, 0x3B83, 0x3B84, 0x3B85, 0x3B86, 0x3B87, 0x3B88, 0x3B89, 0x3B8A, 0x3B8B, 0x3B8C, + 0x3B8D, 0x3B8E, 0x3B8F, 0x3B90, 0x3B91, 0x3B92, 0x3B93, 0x3B94, 0x3B95, 0x3B96, 0x3B97, 0x3B98, 0x3B99, 0x3B9A, 0x3B9B, + 0x3B9C, 0x3B9D, 0x3B9E, 0x3B9F, 0x3BA0, 0x3BA1, 0x3BA2, 0x3BA3, 0x3BA4, 0x3BA5, 0x3BA6, 0x3BA7, 0x3BA8, 0x3BA9, 0x3BAA, + 0x3BAB, 0x3BAC, 0x3BAD, 0x3BAE, 0x3BAF, 0x3BB0, 0x3BB1, 0x3BB2, 0x3BB3, 0x3BB4, 0x3BB5, 0x3BB6, 0x3BB7, 0x3BB8, 0x3BB9, + 0x3BBA, 0x3BBB, 0x3BBC, 0x3BBD, 0x3BBE, 0x3BBF, 0x3BC0, 0x3BC1, 0x3BC2, 0x3BC3, 0x3BC4, 0x3BC5, 0x3BC6, 0x3BC7, 0x3BC8, + 0x3BC9, 0x3BCA, 0x3BCB, 0x3BCC, 0x3BCD, 0x3BCE, 0x3BCF, 0x3BD0, 0x3BD1, 0x3BD2, 0xE8C5FBC3, 0xE8C6FBC3, 0x1C3E, 0x1C3F, 0x1C40, + 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xE8D7FBC3, 0xE8D8FBC3, + 0xE8D9FBC3, 0xE8DAFBC3, 0xE8DBFBC3, 0xE8DCFBC3, 0xE8DDFBC3, 0xE8DEFBC3, 0xE8DFFBC3, 0xE8E0FBC3, 0xE8E1FBC3, 0xE8E2FBC3, 0xE8E3FBC3, 0xE8E4FBC3, 0xE8E5FBC3, 0xE8E6FBC3, 0xE8E7FBC3, + 0xE8E8FBC3, 0xE8E9FBC3, 0xE8EAFBC3, 0xE8EBFBC3, 0xE8ECFBC3, 0xE8EDFBC3, 0xE8EEFBC3, 0xE8EFFBC3, 0xE8F0FBC3, 0xE8F1FBC3, 0xE8F2FBC3, 0xE8F3FBC3, 0xE8F4FBC3, 0xE8F5FBC3, 0xE8F6FBC3, + 0xE8F7FBC3, 0xE8F8FBC3, 0xE8F9FBC3, 0xE8FAFBC3, 0xE8FBFBC3, 0xE8FCFBC3, 0xE8FDFBC3, 0xE8FEFBC3, 0xE8FFFBC3, 0x3BD3, 0x3BD4, 0x3BD5, 0x3BD6, 0x3BD7, 0x3BD8, + 0x3BD9, 0x3BDA, 0x3BDB, 0x3BDC, 0x3BDD, 0x3BDE, 0x3BDF, 0x3BE0, 0x3BE1, 0x3BE2, 0x3BE3, 0x3BE4, 0x3BE5, 0x3BE6, 0x3BE7, + 0x3BE8, 0x3BE9, 0x3BEA, 0x3BEB, 0x3BEC, 0x3BED, 0x3BEE, 0x3BEF, 0x3BF0, 0x3BF1, 0x3BF2, 0x3BF3, 0x3BF4, 0x3BD3, 0x3BD4, + 0x3BD5, 0x3BD6, 0x3BD7, 0x3BD8, 0x3BD9, 0x3BDA, 0x3BDB, 0x3BDC, 0x3BDD, 0x3BDE, 0x3BDF, 0x3BE0, 0x3BE1, 0x3BE2, 0x3BE3, + 0x3BE4, 0x3BE5, 0x3BE6, 0x3BE7, 0x3BE8, 0x3BE9, 0x3BEA, 0x3BEB, 0x3BEC, 0x3BED, 0x3BEE, 0x3BEF, 0x3BF0, 0x3BF1, 0x3BF2, + 0x3BF3, 0x3BF4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xE94BFBC3, 0xE94CFBC3, 0xE94DFBC3, 0xE94EFBC3, 0xE94FFBC3, 0x1C3D, + 0x1C3E, 0x1C3F, 0x1C40, 0x1C41, 0x1C42, 0x1C43, 0x1C44, 0x1C45, 0x1C46, 0xE95AFBC3, 0xE95BFBC3, 0xE95CFBC3, 0xE95DFBC3, 0x265, 0x274, + 0xE960FBC3, 0xE961FBC3, 0xE962FBC3, 0xE963FBC3, 0xE964FBC3, 0xE965FBC3, 0xE966FBC3, 0xE967FBC3, 0xE968FBC3, 0xE969FBC3, 0xE96AFBC3, 0xE96BFBC3, 0xE96CFBC3, 0xE96DFBC3, 0xE96EFBC3, + 0xE96FFBC3, 0xE970FBC3, 0xE971FBC3, 0xE972FBC3, 0xE973FBC3, 0xE974FBC3, 0xE975FBC3, 0xE976FBC3, 0xE977FBC3, 0xE978FBC3, 0xE979FBC3, 0xE97AFBC3, 0xE97BFBC3, 0xE97CFBC3, 0xE97DFBC3, + 0xE97EFBC3, 0xE97FFBC3, 0xE980FBC3, 0xE981FBC3, 0xE982FBC3, 0xE983FBC3, 0xE984FBC3, 0xE985FBC3, 0xE986FBC3, 0xE987FBC3, 0xE988FBC3, 0xE989FBC3, 0xE98AFBC3, 0xE98BFBC3, 0xE98CFBC3, + 0xE98DFBC3, 0xE98EFBC3, 0xE98FFBC3, 0xE990FBC3, 0xE991FBC3, 0xE992FBC3, 0xE993FBC3, 0xE994FBC3, 0xE995FBC3, 0xE996FBC3, 0xE997FBC3, 0xE998FBC3, 0xE999FBC3, 0xE99AFBC3, 0xE99BFBC3, + 0xE99CFBC3, 0xE99DFBC3, 0xE99EFBC3, 0xE99FFBC3, 0xE9A0FBC3, 0xE9A1FBC3, 0xE9A2FBC3, 0xE9A3FBC3, 0xE9A4FBC3, 0xE9A5FBC3, 0xE9A6FBC3, 0xE9A7FBC3, 0xE9A8FBC3, 0xE9A9FBC3, 0xE9AAFBC3, + 0xE9ABFBC3, 0xE9ACFBC3, 0xE9ADFBC3, 0xE9AEFBC3, 0xE9AFFBC3, 0xE9B0FBC3, 0xE9B1FBC3, 0xE9B2FBC3, 0xE9B3FBC3, 0xE9B4FBC3, 0xE9B5FBC3, 0xE9B6FBC3, 0xE9B7FBC3, 0xE9B8FBC3, 0xE9B9FBC3, + 0xE9BAFBC3, 0xE9BBFBC3, 0xE9BCFBC3, 0xE9BDFBC3, 0xE9BEFBC3, 0xE9BFFBC3, 0xE9C0FBC3, 0xE9C1FBC3, 0xE9C2FBC3, 0xE9C3FBC3, 0xE9C4FBC3, 0xE9C5FBC3, 0xE9C6FBC3, 0xE9C7FBC3, 0xE9C8FBC3, + 0xE9C9FBC3, 0xE9CAFBC3, 0xE9CBFBC3, 0xE9CCFBC3, 0xE9CDFBC3, 0xE9CEFBC3, 0xE9CFFBC3, 0xE9D0FBC3, 0xE9D1FBC3, 0xE9D2FBC3, 0xE9D3FBC3, 0xE9D4FBC3, 0xE9D5FBC3, 0xE9D6FBC3, 0xE9D7FBC3, + 0xE9D8FBC3, 0xE9D9FBC3, 0xE9DAFBC3, 0xE9DBFBC3, 0xE9DCFBC3, 0xE9DDFBC3, 0xE9DEFBC3, 0xE9DFFBC3, 0xE9E0FBC3, 0xE9E1FBC3, 0xE9E2FBC3, 0xE9E3FBC3, 0xE9E4FBC3, 0xE9E5FBC3, 0xE9E6FBC3, + 0xE9E7FBC3, 0xE9E8FBC3, 0xE9E9FBC3, 0xE9EAFBC3, 0xE9EBFBC3, 0xE9ECFBC3, 0xE9EDFBC3, 0xE9EEFBC3, 0xE9EFFBC3, 0xE9F0FBC3, 0xE9F1FBC3, 0xE9F2FBC3, 0xE9F3FBC3, 0xE9F4FBC3, 0xE9F5FBC3, + 0xE9F6FBC3, 0xE9F7FBC3, 0xE9F8FBC3, 0xE9F9FBC3, 0xE9FAFBC3, 0xE9FBFBC3, 0xE9FCFBC3, 0xE9FDFBC3, 0xE9FEFBC3, 0xE9FFFBC3, 0xEA00FBC3, 0xEA01FBC3, 0xEA02FBC3, 0xEA03FBC3, 0xEA04FBC3, + 0xEA05FBC3, 0xEA06FBC3, 0xEA07FBC3, 0xEA08FBC3, 0xEA09FBC3, 0xEA0AFBC3, 0xEA0BFBC3, 0xEA0CFBC3, 0xEA0DFBC3, 0xEA0EFBC3, 0xEA0FFBC3, 0xEA10FBC3, 0xEA11FBC3, 0xEA12FBC3, 0xEA13FBC3, + 0xEA14FBC3, 0xEA15FBC3, 0xEA16FBC3, 0xEA17FBC3, 0xEA18FBC3, 0xEA19FBC3, 0xEA1AFBC3, 0xEA1BFBC3, 0xEA1CFBC3, 0xEA1DFBC3, 0xEA1EFBC3, 0xEA1FFBC3, 0xEA20FBC3, 0xEA21FBC3, 0xEA22FBC3, + 0xEA23FBC3, 0xEA24FBC3, 0xEA25FBC3, 0xEA26FBC3, 0xEA27FBC3, 0xEA28FBC3, 0xEA29FBC3, 0xEA2AFBC3, 0xEA2BFBC3, 0xEA2CFBC3, 0xEA2DFBC3, 0xEA2EFBC3, 0xEA2FFBC3, 0xEA30FBC3, 0xEA31FBC3, + 0xEA32FBC3, 0xEA33FBC3, 0xEA34FBC3, 0xEA35FBC3, 0xEA36FBC3, 0xEA37FBC3, 0xEA38FBC3, 0xEA39FBC3, 0xEA3AFBC3, 0xEA3BFBC3, 0xEA3CFBC3, 0xEA3DFBC3, 0xEA3EFBC3, 0xEA3FFBC3, 0xEA40FBC3, + 0xEA41FBC3, 0xEA42FBC3, 0xEA43FBC3, 0xEA44FBC3, 0xEA45FBC3, 0xEA46FBC3, 0xEA47FBC3, 0xEA48FBC3, 0xEA49FBC3, 0xEA4AFBC3, 0xEA4BFBC3, 0xEA4CFBC3, 0xEA4DFBC3, 0xEA4EFBC3, 0xEA4FFBC3, + 0xEA50FBC3, 0xEA51FBC3, 0xEA52FBC3, 0xEA53FBC3, 0xEA54FBC3, 0xEA55FBC3, 0xEA56FBC3, 0xEA57FBC3, 0xEA58FBC3, 0xEA59FBC3, 0xEA5AFBC3, 0xEA5BFBC3, 0xEA5CFBC3, 0xEA5DFBC3, 0xEA5EFBC3, + 0xEA5FFBC3, 0xEA60FBC3, 0xEA61FBC3, 0xEA62FBC3, 0xEA63FBC3, 0xEA64FBC3, 0xEA65FBC3, 0xEA66FBC3, 0xEA67FBC3, 0xEA68FBC3, 0xEA69FBC3, 0xEA6AFBC3, 0xEA6BFBC3, 0xEA6CFBC3, 0xEA6DFBC3, + 0xEA6EFBC3, 0xEA6FFBC3, 0xEA70FBC3, 0xEA71FBC3, 0xEA72FBC3, 0xEA73FBC3, 0xEA74FBC3, 0xEA75FBC3, 0xEA76FBC3, 0xEA77FBC3, 0xEA78FBC3, 0xEA79FBC3, 0xEA7AFBC3, 0xEA7BFBC3, 0xEA7CFBC3, + 0xEA7DFBC3, 0xEA7EFBC3, 0xEA7FFBC3, 0xEA80FBC3, 0xEA81FBC3, 0xEA82FBC3, 0xEA83FBC3, 0xEA84FBC3, 0xEA85FBC3, 0xEA86FBC3, 0xEA87FBC3, 0xEA88FBC3, 0xEA89FBC3, 0xEA8AFBC3, 0xEA8BFBC3, + 0xEA8CFBC3, 0xEA8DFBC3, 0xEA8EFBC3, 0xEA8FFBC3, 0xEA90FBC3, 0xEA91FBC3, 0xEA92FBC3, 0xEA93FBC3, 0xEA94FBC3, 0xEA95FBC3, 0xEA96FBC3, 0xEA97FBC3, 0xEA98FBC3, 0xEA99FBC3, 0xEA9AFBC3, + 0xEA9BFBC3, 0xEA9CFBC3, 0xEA9DFBC3, 0xEA9EFBC3, 0xEA9FFBC3, 0xEAA0FBC3, 0xEAA1FBC3, 0xEAA2FBC3, 0xEAA3FBC3, 0xEAA4FBC3, 0xEAA5FBC3, 0xEAA6FBC3, 0xEAA7FBC3, 0xEAA8FBC3, 0xEAA9FBC3, + 0xEAAAFBC3, 0xEAABFBC3, 0xEAACFBC3, 0xEAADFBC3, 0xEAAEFBC3, 0xEAAFFBC3, 0xEAB0FBC3, 0xEAB1FBC3, 0xEAB2FBC3, 0xEAB3FBC3, 0xEAB4FBC3, 0xEAB5FBC3, 0xEAB6FBC3, 0xEAB7FBC3, 0xEAB8FBC3, + 0xEAB9FBC3, 0xEABAFBC3, 0xEABBFBC3, 0xEABCFBC3, 0xEABDFBC3, 0xEABEFBC3, 0xEABFFBC3, 0xEAC0FBC3, 0xEAC1FBC3, 0xEAC2FBC3, 0xEAC3FBC3, 0xEAC4FBC3, 0xEAC5FBC3, 0xEAC6FBC3, 0xEAC7FBC3, + 0xEAC8FBC3, 0xEAC9FBC3, 0xEACAFBC3, 0xEACBFBC3, 0xEACCFBC3, 0xEACDFBC3, 0xEACEFBC3, 0xEACFFBC3, 0xEAD0FBC3, 0xEAD1FBC3, 0xEAD2FBC3, 0xEAD3FBC3, 0xEAD4FBC3, 0xEAD5FBC3, 0xEAD6FBC3, + 0xEAD7FBC3, 0xEAD8FBC3, 0xEAD9FBC3, 0xEADAFBC3, 0xEADBFBC3, 0xEADCFBC3, 0xEADDFBC3, 0xEADEFBC3, 0xEADFFBC3, 0xEAE0FBC3, 0xEAE1FBC3, 0xEAE2FBC3, 0xEAE3FBC3, 0xEAE4FBC3, 0xEAE5FBC3, + 0xEAE6FBC3, 0xEAE7FBC3, 0xEAE8FBC3, 0xEAE9FBC3, 0xEAEAFBC3, 0xEAEBFBC3, 0xEAECFBC3, 0xEAEDFBC3, 0xEAEEFBC3, 0xEAEFFBC3, 0xEAF0FBC3, 0xEAF1FBC3, 0xEAF2FBC3, 0xEAF3FBC3, 0xEAF4FBC3, + 0xEAF5FBC3, 0xEAF6FBC3, 0xEAF7FBC3, 0xEAF8FBC3, 0xEAF9FBC3, 0xEAFAFBC3, 0xEAFBFBC3, 0xEAFCFBC3, 0xEAFDFBC3, 0xEAFEFBC3, 0xEAFFFBC3, 0xEB00FBC3, 0xEB01FBC3, 0xEB02FBC3, 0xEB03FBC3, + 0xEB04FBC3, 0xEB05FBC3, 0xEB06FBC3, 0xEB07FBC3, 0xEB08FBC3, 0xEB09FBC3, 0xEB0AFBC3, 0xEB0BFBC3, 0xEB0CFBC3, 0xEB0DFBC3, 0xEB0EFBC3, 0xEB0FFBC3, 0xEB10FBC3, 0xEB11FBC3, 0xEB12FBC3, + 0xEB13FBC3, 0xEB14FBC3, 0xEB15FBC3, 0xEB16FBC3, 0xEB17FBC3, 0xEB18FBC3, 0xEB19FBC3, 0xEB1AFBC3, 0xEB1BFBC3, 0xEB1CFBC3, 0xEB1DFBC3, 0xEB1EFBC3, 0xEB1FFBC3, 0xEB20FBC3, 0xEB21FBC3, + 0xEB22FBC3, 0xEB23FBC3, 0xEB24FBC3, 0xEB25FBC3, 0xEB26FBC3, 0xEB27FBC3, 0xEB28FBC3, 0xEB29FBC3, 0xEB2AFBC3, 0xEB2BFBC3, 0xEB2CFBC3, 0xEB2DFBC3, 0xEB2EFBC3, 0xEB2FFBC3, 0xEB30FBC3, + 0xEB31FBC3, 0xEB32FBC3, 0xEB33FBC3, 0xEB34FBC3, 0xEB35FBC3, 0xEB36FBC3, 0xEB37FBC3, 0xEB38FBC3, 0xEB39FBC3, 0xEB3AFBC3, 0xEB3BFBC3, 0xEB3CFBC3, 0xEB3DFBC3, 0xEB3EFBC3, 0xEB3FFBC3, + 0xEB40FBC3, 0xEB41FBC3, 0xEB42FBC3, 0xEB43FBC3, 0xEB44FBC3, 0xEB45FBC3, 0xEB46FBC3, 0xEB47FBC3, 0xEB48FBC3, 0xEB49FBC3, 0xEB4AFBC3, 0xEB4BFBC3, 0xEB4CFBC3, 0xEB4DFBC3, 0xEB4EFBC3, + 0xEB4FFBC3, 0xEB50FBC3, 0xEB51FBC3, 0xEB52FBC3, 0xEB53FBC3, 0xEB54FBC3, 0xEB55FBC3, 0xEB56FBC3, 0xEB57FBC3, 0xEB58FBC3, 0xEB59FBC3, 0xEB5AFBC3, 0xEB5BFBC3, 0xEB5CFBC3, 0xEB5DFBC3, + 0xEB5EFBC3, 0xEB5FFBC3, 0xEB60FBC3, 0xEB61FBC3, 0xEB62FBC3, 0xEB63FBC3, 0xEB64FBC3, 0xEB65FBC3, 0xEB66FBC3, 0xEB67FBC3, 0xEB68FBC3, 0xEB69FBC3, 0xEB6AFBC3, 0xEB6BFBC3, 0xEB6CFBC3, + 0xEB6DFBC3, 0xEB6EFBC3, 0xEB6FFBC3, 0xEB70FBC3, 0xEB71FBC3, 0xEB72FBC3, 0xEB73FBC3, 0xEB74FBC3, 0xEB75FBC3, 0xEB76FBC3, 0xEB77FBC3, 0xEB78FBC3, 0xEB79FBC3, 0xEB7AFBC3, 0xEB7BFBC3, + 0xEB7CFBC3, 0xEB7DFBC3, 0xEB7EFBC3, 0xEB7FFBC3, 0xEB80FBC3, 0xEB81FBC3, 0xEB82FBC3, 0xEB83FBC3, 0xEB84FBC3, 0xEB85FBC3, 0xEB86FBC3, 0xEB87FBC3, 0xEB88FBC3, 0xEB89FBC3, 0xEB8AFBC3, + 0xEB8BFBC3, 0xEB8CFBC3, 0xEB8DFBC3, 0xEB8EFBC3, 0xEB8FFBC3, 0xEB90FBC3, 0xEB91FBC3, 0xEB92FBC3, 0xEB93FBC3, 0xEB94FBC3, 0xEB95FBC3, 0xEB96FBC3, 0xEB97FBC3, 0xEB98FBC3, 0xEB99FBC3, + 0xEB9AFBC3, 0xEB9BFBC3, 0xEB9CFBC3, 0xEB9DFBC3, 0xEB9EFBC3, 0xEB9FFBC3, 0xEBA0FBC3, 0xEBA1FBC3, 0xEBA2FBC3, 0xEBA3FBC3, 0xEBA4FBC3, 0xEBA5FBC3, 0xEBA6FBC3, 0xEBA7FBC3, 0xEBA8FBC3, + 0xEBA9FBC3, 0xEBAAFBC3, 0xEBABFBC3, 0xEBACFBC3, 0xEBADFBC3, 0xEBAEFBC3, 0xEBAFFBC3, 0xEBB0FBC3, 0xEBB1FBC3, 0xEBB2FBC3, 0xEBB3FBC3, 0xEBB4FBC3, 0xEBB5FBC3, 0xEBB6FBC3, 0xEBB7FBC3, + 0xEBB8FBC3, 0xEBB9FBC3, 0xEBBAFBC3, 0xEBBBFBC3, 0xEBBCFBC3, 0xEBBDFBC3, 0xEBBEFBC3, 0xEBBFFBC3, 0xEBC0FBC3, 0xEBC1FBC3, 0xEBC2FBC3, 0xEBC3FBC3, 0xEBC4FBC3, 0xEBC5FBC3, 0xEBC6FBC3, + 0xEBC7FBC3, 0xEBC8FBC3, 0xEBC9FBC3, 0xEBCAFBC3, 0xEBCBFBC3, 0xEBCCFBC3, 0xEBCDFBC3, 0xEBCEFBC3, 0xEBCFFBC3, 0xEBD0FBC3, 0xEBD1FBC3, 0xEBD2FBC3, 0xEBD3FBC3, 0xEBD4FBC3, 0xEBD5FBC3, + 0xEBD6FBC3, 0xEBD7FBC3, 0xEBD8FBC3, 0xEBD9FBC3, 0xEBDAFBC3, 0xEBDBFBC3, 0xEBDCFBC3, 0xEBDDFBC3, 0xEBDEFBC3, 0xEBDFFBC3, 0xEBE0FBC3, 0xEBE1FBC3, 0xEBE2FBC3, 0xEBE3FBC3, 0xEBE4FBC3, + 0xEBE5FBC3, 0xEBE6FBC3, 0xEBE7FBC3, 0xEBE8FBC3, 0xEBE9FBC3, 0xEBEAFBC3, 0xEBEBFBC3, 0xEBECFBC3, 0xEBEDFBC3, 0xEBEEFBC3, 0xEBEFFBC3, 0xEBF0FBC3, 0xEBF1FBC3, 0xEBF2FBC3, 0xEBF3FBC3, + 0xEBF4FBC3, 0xEBF5FBC3, 0xEBF6FBC3, 0xEBF7FBC3, 0xEBF8FBC3, 0xEBF9FBC3, 0xEBFAFBC3, 0xEBFBFBC3, 0xEBFCFBC3, 0xEBFDFBC3, 0xEBFEFBC3, 0xEBFFFBC3, 0xEC00FBC3, 0xEC01FBC3, 0xEC02FBC3, + 0xEC03FBC3, 0xEC04FBC3, 0xEC05FBC3, 0xEC06FBC3, 0xEC07FBC3, 0xEC08FBC3, 0xEC09FBC3, 0xEC0AFBC3, 0xEC0BFBC3, 0xEC0CFBC3, 0xEC0DFBC3, 0xEC0EFBC3, 0xEC0FFBC3, 0xEC10FBC3, 0xEC11FBC3, + 0xEC12FBC3, 0xEC13FBC3, 0xEC14FBC3, 0xEC15FBC3, 0xEC16FBC3, 0xEC17FBC3, 0xEC18FBC3, 0xEC19FBC3, 0xEC1AFBC3, 0xEC1BFBC3, 0xEC1CFBC3, 0xEC1DFBC3, 0xEC1EFBC3, 0xEC1FFBC3, 0xEC20FBC3, + 0xEC21FBC3, 0xEC22FBC3, 0xEC23FBC3, 0xEC24FBC3, 0xEC25FBC3, 0xEC26FBC3, 0xEC27FBC3, 0xEC28FBC3, 0xEC29FBC3, 0xEC2AFBC3, 0xEC2BFBC3, 0xEC2CFBC3, 0xEC2DFBC3, 0xEC2EFBC3, 0xEC2FFBC3, + 0xEC30FBC3, 0xEC31FBC3, 0xEC32FBC3, 0xEC33FBC3, 0xEC34FBC3, 0xEC35FBC3, 0xEC36FBC3, 0xEC37FBC3, 0xEC38FBC3, 0xEC39FBC3, 0xEC3AFBC3, 0xEC3BFBC3, 0xEC3CFBC3, 0xEC3DFBC3, 0xEC3EFBC3, + 0xEC3FFBC3, 0xEC40FBC3, 0xEC41FBC3, 0xEC42FBC3, 0xEC43FBC3, 0xEC44FBC3, 0xEC45FBC3, 0xEC46FBC3, 0xEC47FBC3, 0xEC48FBC3, 0xEC49FBC3, 0xEC4AFBC3, 0xEC4BFBC3, 0xEC4CFBC3, 0xEC4DFBC3, + 0xEC4EFBC3, 0xEC4FFBC3, 0xEC50FBC3, 0xEC51FBC3, 0xEC52FBC3, 0xEC53FBC3, 0xEC54FBC3, 0xEC55FBC3, 0xEC56FBC3, 0xEC57FBC3, 0xEC58FBC3, 0xEC59FBC3, 0xEC5AFBC3, 0xEC5BFBC3, 0xEC5CFBC3, + 0xEC5DFBC3, 0xEC5EFBC3, 0xEC5FFBC3, 0xEC60FBC3, 0xEC61FBC3, 0xEC62FBC3, 0xEC63FBC3, 0xEC64FBC3, 0xEC65FBC3, 0xEC66FBC3, 0xEC67FBC3, 0xEC68FBC3, 0xEC69FBC3, 0xEC6AFBC3, 0xEC6BFBC3, + 0xEC6CFBC3, 0xEC6DFBC3, 0xEC6EFBC3, 0xEC6FFBC3, 0xEC70FBC3, 0xEC71FBC3, 0xEC72FBC3, 0xEC73FBC3, 0xEC74FBC3, 0xEC75FBC3, 0xEC76FBC3, 0xEC77FBC3, 0xEC78FBC3, 0xEC79FBC3, 0xEC7AFBC3, + 0xEC7BFBC3, 0xEC7CFBC3, 0xEC7DFBC3, 0xEC7EFBC3, 0xEC7FFBC3, 0xEC80FBC3, 0xEC81FBC3, 0xEC82FBC3, 0xEC83FBC3, 0xEC84FBC3, 0xEC85FBC3, 0xEC86FBC3, 0xEC87FBC3, 0xEC88FBC3, 0xEC89FBC3, + 0xEC8AFBC3, 0xEC8BFBC3, 0xEC8CFBC3, 0xEC8DFBC3, 0xEC8EFBC3, 0xEC8FFBC3, 0xEC90FBC3, 0xEC91FBC3, 0xEC92FBC3, 0xEC93FBC3, 0xEC94FBC3, 0xEC95FBC3, 0xEC96FBC3, 0xEC97FBC3, 0xEC98FBC3, + 0xEC99FBC3, 0xEC9AFBC3, 0xEC9BFBC3, 0xEC9CFBC3, 0xEC9DFBC3, 0xEC9EFBC3, 0xEC9FFBC3, 0xECA0FBC3, 0xECA1FBC3, 0xECA2FBC3, 0xECA3FBC3, 0xECA4FBC3, 0xECA5FBC3, 0xECA6FBC3, 0xECA7FBC3, + 0xECA8FBC3, 0xECA9FBC3, 0xECAAFBC3, 0xECABFBC3, 0xECACFBC3, 0xECADFBC3, 0xECAEFBC3, 0xECAFFBC3, 0xECB0FBC3, 0xECB1FBC3, 0xECB2FBC3, 0xECB3FBC3, 0xECB4FBC3, 0xECB5FBC3, 0xECB6FBC3, + 0xECB7FBC3, 0xECB8FBC3, 0xECB9FBC3, 0xECBAFBC3, 0xECBBFBC3, 0xECBCFBC3, 0xECBDFBC3, 0xECBEFBC3, 0xECBFFBC3, 0xECC0FBC3, 0xECC1FBC3, 0xECC2FBC3, 0xECC3FBC3, 0xECC4FBC3, 0xECC5FBC3, + 0xECC6FBC3, 0xECC7FBC3, 0xECC8FBC3, 0xECC9FBC3, 0xECCAFBC3, 0xECCBFBC3, 0xECCCFBC3, 0xECCDFBC3, 0xECCEFBC3, 0xECCFFBC3, 0xECD0FBC3, 0xECD1FBC3, 0xECD2FBC3, 0xECD3FBC3, 0xECD4FBC3, + 0xECD5FBC3, 0xECD6FBC3, 0xECD7FBC3, 0xECD8FBC3, 0xECD9FBC3, 0xECDAFBC3, 0xECDBFBC3, 0xECDCFBC3, 0xECDDFBC3, 0xECDEFBC3, 0xECDFFBC3, 0xECE0FBC3, 0xECE1FBC3, 0xECE2FBC3, 0xECE3FBC3, + 0xECE4FBC3, 0xECE5FBC3, 0xECE6FBC3, 0xECE7FBC3, 0xECE8FBC3, 0xECE9FBC3, 0xECEAFBC3, 0xECEBFBC3, 0xECECFBC3, 0xECEDFBC3, 0xECEEFBC3, 0xECEFFBC3, 0xECF0FBC3, 0xECF1FBC3, 0xECF2FBC3, + 0xECF3FBC3, 0xECF4FBC3, 0xECF5FBC3, 0xECF6FBC3, 0xECF7FBC3, 0xECF8FBC3, 0xECF9FBC3, 0xECFAFBC3, 0xECFBFBC3, 0xECFCFBC3, 0xECFDFBC3, 0xECFEFBC3, 0xECFFFBC3, 0xED00FBC3, 0xED01FBC3, + 0xED02FBC3, 0xED03FBC3, 0xED04FBC3, 0xED05FBC3, 0xED06FBC3, 0xED07FBC3, 0xED08FBC3, 0xED09FBC3, 0xED0AFBC3, 0xED0BFBC3, 0xED0CFBC3, 0xED0DFBC3, 0xED0EFBC3, 0xED0FFBC3, 0xED10FBC3, + 0xED11FBC3, 0xED12FBC3, 0xED13FBC3, 0xED14FBC3, 0xED15FBC3, 0xED16FBC3, 0xED17FBC3, 0xED18FBC3, 0xED19FBC3, 0xED1AFBC3, 0xED1BFBC3, 0xED1CFBC3, 0xED1DFBC3, 0xED1EFBC3, 0xED1FFBC3, + 0xED20FBC3, 0xED21FBC3, 0xED22FBC3, 0xED23FBC3, 0xED24FBC3, 0xED25FBC3, 0xED26FBC3, 0xED27FBC3, 0xED28FBC3, 0xED29FBC3, 0xED2AFBC3, 0xED2BFBC3, 0xED2CFBC3, 0xED2DFBC3, 0xED2EFBC3, + 0xED2FFBC3, 0xED30FBC3, 0xED31FBC3, 0xED32FBC3, 0xED33FBC3, 0xED34FBC3, 0xED35FBC3, 0xED36FBC3, 0xED37FBC3, 0xED38FBC3, 0xED39FBC3, 0xED3AFBC3, 0xED3BFBC3, 0xED3CFBC3, 0xED3DFBC3, + 0xED3EFBC3, 0xED3FFBC3, 0xED40FBC3, 0xED41FBC3, 0xED42FBC3, 0xED43FBC3, 0xED44FBC3, 0xED45FBC3, 0xED46FBC3, 0xED47FBC3, 0xED48FBC3, 0xED49FBC3, 0xED4AFBC3, 0xED4BFBC3, 0xED4CFBC3, + 0xED4DFBC3, 0xED4EFBC3, 0xED4FFBC3, 0xED50FBC3, 0xED51FBC3, 0xED52FBC3, 0xED53FBC3, 0xED54FBC3, 0xED55FBC3, 0xED56FBC3, 0xED57FBC3, 0xED58FBC3, 0xED59FBC3, 0xED5AFBC3, 0xED5BFBC3, + 0xED5CFBC3, 0xED5DFBC3, 0xED5EFBC3, 0xED5FFBC3, 0xED60FBC3, 0xED61FBC3, 0xED62FBC3, 0xED63FBC3, 0xED64FBC3, 0xED65FBC3, 0xED66FBC3, 0xED67FBC3, 0xED68FBC3, 0xED69FBC3, 0xED6AFBC3, + 0xED6BFBC3, 0xED6CFBC3, 0xED6DFBC3, 0xED6EFBC3, 0xED6FFBC3, 0xED70FBC3, 0xED71FBC3, 0xED72FBC3, 0xED73FBC3, 0xED74FBC3, 0xED75FBC3, 0xED76FBC3, 0xED77FBC3, 0xED78FBC3, 0xED79FBC3, + 0xED7AFBC3, 0xED7BFBC3, 0xED7CFBC3, 0xED7DFBC3, 0xED7EFBC3, 0xED7FFBC3, 0xED80FBC3, 0xED81FBC3, 0xED82FBC3, 0xED83FBC3, 0xED84FBC3, 0xED85FBC3, 0xED86FBC3, 0xED87FBC3, 0xED88FBC3, + 0xED89FBC3, 0xED8AFBC3, 0xED8BFBC3, 0xED8CFBC3, 0xED8DFBC3, 0xED8EFBC3, 0xED8FFBC3, 0xED90FBC3, 0xED91FBC3, 0xED92FBC3, 0xED93FBC3, 0xED94FBC3, 0xED95FBC3, 0xED96FBC3, 0xED97FBC3, + 0xED98FBC3, 0xED99FBC3, 0xED9AFBC3, 0xED9BFBC3, 0xED9CFBC3, 0xED9DFBC3, 0xED9EFBC3, 0xED9FFBC3, 0xEDA0FBC3, 0xEDA1FBC3, 0xEDA2FBC3, 0xEDA3FBC3, 0xEDA4FBC3, 0xEDA5FBC3, 0xEDA6FBC3, + 0xEDA7FBC3, 0xEDA8FBC3, 0xEDA9FBC3, 0xEDAAFBC3, 0xEDABFBC3, 0xEDACFBC3, 0xEDADFBC3, 0xEDAEFBC3, 0xEDAFFBC3, 0xEDB0FBC3, 0xEDB1FBC3, 0xEDB2FBC3, 0xEDB3FBC3, 0xEDB4FBC3, 0xEDB5FBC3, + 0xEDB6FBC3, 0xEDB7FBC3, 0xEDB8FBC3, 0xEDB9FBC3, 0xEDBAFBC3, 0xEDBBFBC3, 0xEDBCFBC3, 0xEDBDFBC3, 0xEDBEFBC3, 0xEDBFFBC3, 0xEDC0FBC3, 0xEDC1FBC3, 0xEDC2FBC3, 0xEDC3FBC3, 0xEDC4FBC3, + 0xEDC5FBC3, 0xEDC6FBC3, 0xEDC7FBC3, 0xEDC8FBC3, 0xEDC9FBC3, 0xEDCAFBC3, 0xEDCBFBC3, 0xEDCCFBC3, 0xEDCDFBC3, 0xEDCEFBC3, 0xEDCFFBC3, 0xEDD0FBC3, 0xEDD1FBC3, 0xEDD2FBC3, 0xEDD3FBC3, + 0xEDD4FBC3, 0xEDD5FBC3, 0xEDD6FBC3, 0xEDD7FBC3, 0xEDD8FBC3, 0xEDD9FBC3, 0xEDDAFBC3, 0xEDDBFBC3, 0xEDDCFBC3, 0xEDDDFBC3, 0xEDDEFBC3, 0xEDDFFBC3, 0xEDE0FBC3, 0xEDE1FBC3, 0xEDE2FBC3, + 0xEDE3FBC3, 0xEDE4FBC3, 0xEDE5FBC3, 0xEDE6FBC3, 0xEDE7FBC3, 0xEDE8FBC3, 0xEDE9FBC3, 0xEDEAFBC3, 0xEDEBFBC3, 0xEDECFBC3, 0xEDEDFBC3, 0xEDEEFBC3, 0xEDEFFBC3, 0xEDF0FBC3, 0xEDF1FBC3, + 0xEDF2FBC3, 0xEDF3FBC3, 0xEDF4FBC3, 0xEDF5FBC3, 0xEDF6FBC3, 0xEDF7FBC3, 0xEDF8FBC3, 0xEDF9FBC3, 0xEDFAFBC3, 0xEDFBFBC3, 0xEDFCFBC3, 0xEDFDFBC3, 0xEDFEFBC3, 0xEDFFFBC3, 0x230B, + 0x230D, 0x2325, 0x2337, 0xEE04FBC3, 0x23B7, 0x2347, 0x232C, 0x236A, 0x23C6, 0x2387, 0x239C, 0x23A3, 0x23A7, 0x2359, 0x236E, + 0x2376, 0x2364, 0x2382, 0x2346, 0x235A, 0x231D, 0x231E, 0x232D, 0x2338, 0x2365, 0x236B, 0x236F, 0x230C, 0x23A8, 0x2377, + 0x2381, 0xEE20FBC3, 0x230D, 0x2325, 0xEE23FBC3, 0x23B1, 0xEE25FBC3, 0xEE26FBC3, 0x232C, 0xEE28FBC3, 0x23C6, 0x2387, 0x239C, 0x23A3, 0x23A7, + 0x2359, 0x236E, 0x2376, 0x2364, 0x2382, 0xEE33FBC3, 0x235A, 0x231D, 0x231E, 0x232D, 0xEE38FBC3, 0x2365, 0xEE3AFBC3, 0x236F, 0xEE3CFBC3, + 0xEE3DFBC3, 0xEE3EFBC3, 0xEE3FFBC3, 0xEE40FBC3, 0xEE41FBC3, 0x2325, 0xEE43FBC3, 0xEE44FBC3, 0xEE45FBC3, 0xEE46FBC3, 0x232C, 0xEE48FBC3, 0x23C6, 0xEE4AFBC3, 0x239C, + 0xEE4CFBC3, 0x23A7, 0x2359, 0x236E, 0xEE50FBC3, 0x2364, 0x2382, 0xEE53FBC3, 0x235A, 0xEE55FBC3, 0xEE56FBC3, 0x232D, 0xEE58FBC3, 0x2365, 0xEE5AFBC3, + 0x236F, 0xEE5CFBC3, 0x23A8, 0xEE5EFBC3, 0x2381, 0xEE60FBC3, 0x230D, 0x2325, 0xEE63FBC3, 0x23B1, 0xEE65FBC3, 0xEE66FBC3, 0x232C, 0x236A, 0x23C6, + 0x2387, 0xEE6BFBC3, 0x23A3, 0x23A7, 0x2359, 0x236E, 0x2376, 0x2364, 0x2382, 0xEE73FBC3, 0x235A, 0x231D, 0x231E, 0x232D, 0xEE78FBC3, + 0x2365, 0x236B, 0x236F, 0x230C, 0xEE7DFBC3, 0x2377, 0xEE7FFBC3, 0x230B, 0x230D, 0x2325, 0x2337, 0x23B1, 0x23B7, 0x2347, 0x232C, + 0x236A, 0x23C6, 0xEE8AFBC3, 0x239C, 0x23A3, 0x23A7, 0x2359, 0x236E, 0x2376, 0x2364, 0x2382, 0x2346, 0x235A, 0x231D, 0x231E, + 0x232D, 0x2338, 0x2365, 0x236B, 0x236F, 0xEE9CFBC3, 0xEE9DFBC3, 0xEE9EFBC3, 0xEE9FFBC3, 0xEEA0FBC3, 0x230D, 0x2325, 0x2337, 0xEEA4FBC3, 0x23B7, + 0x2347, 0x232C, 0x236A, 0x23C6, 0xEEAAFBC3, 0x239C, 0x23A3, 0x23A7, 0x2359, 0x236E, 0x2376, 0x2364, 0x2382, 0x2346, 0x235A, + 0x231D, 0x231E, 0x232D, 0x2338, 0x2365, 0x236B, 0x236F, 0xEEBCFBC3, 0xEEBDFBC3, 0xEEBEFBC3, 0xEEBFFBC3, 0xEEC0FBC3, 0xEEC1FBC3, 0xEEC2FBC3, 0xEEC3FBC3, + 0xEEC4FBC3, 0xEEC5FBC3, 0xEEC6FBC3, 0xEEC7FBC3, 0xEEC8FBC3, 0xEEC9FBC3, 0xEECAFBC3, 0xEECBFBC3, 0xEECCFBC3, 0xEECDFBC3, 0xEECEFBC3, 0xEECFFBC3, 0xEED0FBC3, 0xEED1FBC3, 0xEED2FBC3, + 0xEED3FBC3, 0xEED4FBC3, 0xEED5FBC3, 0xEED6FBC3, 0xEED7FBC3, 0xEED8FBC3, 0xEED9FBC3, 0xEEDAFBC3, 0xEEDBFBC3, 0xEEDCFBC3, 0xEEDDFBC3, 0xEEDEFBC3, 0xEEDFFBC3, 0xEEE0FBC3, 0xEEE1FBC3, + 0xEEE2FBC3, 0xEEE3FBC3, 0xEEE4FBC3, 0xEEE5FBC3, 0xEEE6FBC3, 0xEEE7FBC3, 0xEEE8FBC3, 0xEEE9FBC3, 0xEEEAFBC3, 0xEEEBFBC3, 0xEEECFBC3, 0xEEEDFBC3, 0xEEEEFBC3, 0xEEEFFBC3, 0x4FB, + 0x4FC, 0xEEF2FBC3, 0xEEF3FBC3, 0xEEF4FBC3, 0xEEF5FBC3, 0xEEF6FBC3, 0xEEF7FBC3, 0xEEF8FBC3, 0xEEF9FBC3, 0xEEFAFBC3, 0xEEFBFBC3, 0xEEFCFBC3, 0xEEFDFBC3, 0xEEFEFBC3, 0xEEFFFBC3, + 0xEF00FBC3, 0xEF01FBC3, 0xEF02FBC3, 0xEF03FBC3, 0xEF04FBC3, 0xEF05FBC3, 0xEF06FBC3, 0xEF07FBC3, 0xEF08FBC3, 0xEF09FBC3, 0xEF0AFBC3, 0xEF0BFBC3, 0xEF0CFBC3, 0xEF0DFBC3, 0xEF0EFBC3, + 0xEF0FFBC3, 0xEF10FBC3, 0xEF11FBC3, 0xEF12FBC3, 0xEF13FBC3, 0xEF14FBC3, 0xEF15FBC3, 0xEF16FBC3, 0xEF17FBC3, 0xEF18FBC3, 0xEF19FBC3, 0xEF1AFBC3, 0xEF1BFBC3, 0xEF1CFBC3, 0xEF1DFBC3, + 0xEF1EFBC3, 0xEF1FFBC3, 0xEF20FBC3, 0xEF21FBC3, 0xEF22FBC3, 0xEF23FBC3, 0xEF24FBC3, 0xEF25FBC3, 0xEF26FBC3, 0xEF27FBC3, 0xEF28FBC3, 0xEF29FBC3, 0xEF2AFBC3, 0xEF2BFBC3, 0xEF2CFBC3, + 0xEF2DFBC3, 0xEF2EFBC3, 0xEF2FFBC3, 0xEF30FBC3, 0xEF31FBC3, 0xEF32FBC3, 0xEF33FBC3, 0xEF34FBC3, 0xEF35FBC3, 0xEF36FBC3, 0xEF37FBC3, 0xEF38FBC3, 0xEF39FBC3, 0xEF3AFBC3, 0xEF3BFBC3, + 0xEF3CFBC3, 0xEF3DFBC3, 0xEF3EFBC3, 0xEF3FFBC3, 0xEF40FBC3, 0xEF41FBC3, 0xEF42FBC3, 0xEF43FBC3, 0xEF44FBC3, 0xEF45FBC3, 0xEF46FBC3, 0xEF47FBC3, 0xEF48FBC3, 0xEF49FBC3, 0xEF4AFBC3, + 0xEF4BFBC3, 0xEF4CFBC3, 0xEF4DFBC3, 0xEF4EFBC3, 0xEF4FFBC3, 0xEF50FBC3, 0xEF51FBC3, 0xEF52FBC3, 0xEF53FBC3, 0xEF54FBC3, 0xEF55FBC3, 0xEF56FBC3, 0xEF57FBC3, 0xEF58FBC3, 0xEF59FBC3, + 0xEF5AFBC3, 0xEF5BFBC3, 0xEF5CFBC3, 0xEF5DFBC3, 0xEF5EFBC3, 0xEF5FFBC3, 0xEF60FBC3, 0xEF61FBC3, 0xEF62FBC3, 0xEF63FBC3, 0xEF64FBC3, 0xEF65FBC3, 0xEF66FBC3, 0xEF67FBC3, 0xEF68FBC3, + 0xEF69FBC3, 0xEF6AFBC3, 0xEF6BFBC3, 0xEF6CFBC3, 0xEF6DFBC3, 0xEF6EFBC3, 0xEF6FFBC3, 0xEF70FBC3, 0xEF71FBC3, 0xEF72FBC3, 0xEF73FBC3, 0xEF74FBC3, 0xEF75FBC3, 0xEF76FBC3, 0xEF77FBC3, + 0xEF78FBC3, 0xEF79FBC3, 0xEF7AFBC3, 0xEF7BFBC3, 0xEF7CFBC3, 0xEF7DFBC3, 0xEF7EFBC3, 0xEF7FFBC3, 0xEF80FBC3, 0xEF81FBC3, 0xEF82FBC3, 0xEF83FBC3, 0xEF84FBC3, 0xEF85FBC3, 0xEF86FBC3, + 0xEF87FBC3, 0xEF88FBC3, 0xEF89FBC3, 0xEF8AFBC3, 0xEF8BFBC3, 0xEF8CFBC3, 0xEF8DFBC3, 0xEF8EFBC3, 0xEF8FFBC3, 0xEF90FBC3, 0xEF91FBC3, 0xEF92FBC3, 0xEF93FBC3, 0xEF94FBC3, 0xEF95FBC3, + 0xEF96FBC3, 0xEF97FBC3, 0xEF98FBC3, 0xEF99FBC3, 0xEF9AFBC3, 0xEF9BFBC3, 0xEF9CFBC3, 0xEF9DFBC3, 0xEF9EFBC3, 0xEF9FFBC3, 0xEFA0FBC3, 0xEFA1FBC3, 0xEFA2FBC3, 0xEFA3FBC3, 0xEFA4FBC3, + 0xEFA5FBC3, 0xEFA6FBC3, 0xEFA7FBC3, 0xEFA8FBC3, 0xEFA9FBC3, 0xEFAAFBC3, 0xEFABFBC3, 0xEFACFBC3, 0xEFADFBC3, 0xEFAEFBC3, 0xEFAFFBC3, 0xEFB0FBC3, 0xEFB1FBC3, 0xEFB2FBC3, 0xEFB3FBC3, + 0xEFB4FBC3, 0xEFB5FBC3, 0xEFB6FBC3, 0xEFB7FBC3, 0xEFB8FBC3, 0xEFB9FBC3, 0xEFBAFBC3, 0xEFBBFBC3, 0xEFBCFBC3, 0xEFBDFBC3, 0xEFBEFBC3, 0xEFBFFBC3, 0xEFC0FBC3, 0xEFC1FBC3, 0xEFC2FBC3, + 0xEFC3FBC3, 0xEFC4FBC3, 0xEFC5FBC3, 0xEFC6FBC3, 0xEFC7FBC3, 0xEFC8FBC3, 0xEFC9FBC3, 0xEFCAFBC3, 0xEFCBFBC3, 0xEFCCFBC3, 0xEFCDFBC3, 0xEFCEFBC3, 0xEFCFFBC3, 0xEFD0FBC3, 0xEFD1FBC3, + 0xEFD2FBC3, 0xEFD3FBC3, 0xEFD4FBC3, 0xEFD5FBC3, 0xEFD6FBC3, 0xEFD7FBC3, 0xEFD8FBC3, 0xEFD9FBC3, 0xEFDAFBC3, 0xEFDBFBC3, 0xEFDCFBC3, 0xEFDDFBC3, 0xEFDEFBC3, 0xEFDFFBC3, 0xEFE0FBC3, + 0xEFE1FBC3, 0xEFE2FBC3, 0xEFE3FBC3, 0xEFE4FBC3, 0xEFE5FBC3, 0xEFE6FBC3, 0xEFE7FBC3, 0xEFE8FBC3, 0xEFE9FBC3, 0xEFEAFBC3, 0xEFEBFBC3, 0xEFECFBC3, 0xEFEDFBC3, 0xEFEEFBC3, 0xEFEFFBC3, + 0xEFF0FBC3, 0xEFF1FBC3, 0xEFF2FBC3, 0xEFF3FBC3, 0xEFF4FBC3, 0xEFF5FBC3, 0xEFF6FBC3, 0xEFF7FBC3, 0xEFF8FBC3, 0xEFF9FBC3, 0xEFFAFBC3, 0xEFFBFBC3, 0xEFFCFBC3, 0xEFFDFBC3, 0xEFFEFBC3, + 0xEFFFFBC3, 0x11C7, 0x11C8, 0x11C9, 0x11CA, 0x11CB, 0x11CC, 0x11CD, 0x11CE, 0x11CF, 0x11D0, 0x11D1, 0x11D2, 0x11D3, 0x11D4, + 0x11D5, 0x11D6, 0x11D7, 0x11D8, 0x11D9, 0x11DA, 0x11DB, 0x11DC, 0x11DD, 0x11DE, 0x11DF, 0x11E0, 0x11E1, 0x11E2, 0x11E3, + 0x11E4, 0x11E5, 0x11E6, 0x11E7, 0x11E8, 0x11E9, 0x11EA, 0x11EB, 0x11EC, 0x11ED, 0x11EE, 0x11EF, 0x11F0, 0x11F1, 0x11F2, + 0xF02CFBC3, 0xF02DFBC3, 0xF02EFBC3, 0xF02FFBC3, 0x11F3, 0x11F4, 0x11F5, 0x11F6, 0x11F7, 0x11F8, 0x11F9, 0x11FA, 0x11FB, 0x11FC, 0x11FD, + 0x11FE, 0x11FF, 0x1200, 0x1201, 0x1202, 0x1203, 0x1204, 0x1205, 0x1206, 0x1207, 0x1208, 0x1209, 0x120A, 0x120B, 0x120C, + 0x120D, 0x120E, 0x120F, 0x1210, 0x1211, 0x1212, 0x1213, 0x1214, 0x1215, 0x1216, 0x1217, 0x1218, 0x1219, 0x121A, 0x121B, + 0x121C, 0x121D, 0x121E, 0x121F, 0x1220, 0x1221, 0x1222, 0x1223, 0x1224, 0x1225, 0x1226, 0x1227, 0x1228, 0x1229, 0x122A, + 0x122B, 0x122C, 0x122D, 0x122E, 0x122F, 0x1230, 0x1231, 0x1232, 0x1233, 0x1234, 0x1235, 0x1236, 0x1237, 0x1238, 0x1239, + 0x123A, 0x123B, 0x123C, 0x123D, 0x123E, 0x123F, 0x1240, 0x1241, 0x1242, 0x1243, 0x1244, 0x1245, 0x1246, 0x1247, 0x1248, + 0x1249, 0x124A, 0x124B, 0x124C, 0x124D, 0x124E, 0x124F, 0x1250, 0x1251, 0x1252, 0x1253, 0x1254, 0x1255, 0x1256, 0xF094FBC3, + 0xF095FBC3, 0xF096FBC3, 0xF097FBC3, 0xF098FBC3, 0xF099FBC3, 0xF09AFBC3, 0xF09BFBC3, 0xF09CFBC3, 0xF09DFBC3, 0xF09EFBC3, 0xF09FFBC3, 0x1257, 0x1258, 0x1259, 0x125A, + 0x125B, 0x125C, 0x125D, 0x125E, 0x125F, 0x1260, 0x1261, 0x1262, 0x1263, 0x1264, 0x1265, 0xF0AFFBC3, 0xF0B0FBC3, 0x1266, 0x1267, + 0x1268, 0x1269, 0x126A, 0x126B, 0x126C, 0x126D, 0x126E, 0x126F, 0x1270, 0x1271, 0x1272, 0x1273, 0x1274, 0xF0C0FBC3, 0x1275, + 0x1276, 0x1277, 0x1278, 0x1279, 0x127A, 0x127B, 0x127C, 0x127D, 0x127E, 0x127F, 0x1280, 0x1281, 0x1282, 0x1283, 0xF0D0FBC3, + 0x1284, 0x1285, 0x1286, 0x1287, 0x1288, 0x1289, 0x128A, 0x128B, 0x128C, 0x128D, 0x128E, 0x128F, 0x1290, 0x1291, 0x1292, + 0x1293, 0x1294, 0x1295, 0x1296, 0x1297, 0x1298, 0x1299, 0x129A, 0x129B, 0x129C, 0x129D, 0x129E, 0x129F, 0x12A0, 0x12A1, + 0x12A2, 0x12A3, 0x12A4, 0x12A5, 0x12A6, 0x12A7, 0x12A8, 0xF0F6FBC3, 0xF0F7FBC3, 0xF0F8FBC3, 0xF0F9FBC3, 0xF0FAFBC3, 0xF0FBFBC3, 0xF0FCFBC3, 0xF0FDFBC3, + 0xF0FEFBC3, 0xF0FFFBC3, 0x2771C3D, 0x2221C3D, 0x2221C3E, 0x2221C3F, 0x2221C40, 0x2221C41, 0x2221C42, 0x2221C43, 0x2221C44, 0x2221C45, 0x2221C46, 0x1C3D, 0x1C3D, + 0xF10DFBC3, 0xF10EFBC3, 0xF10FFBC3, 0x3181C470317, 0x3181C600317, 0x3181C7A0317, 0x3181C8F0317, 0x3181CAA0317, 0x3181CE50317, 0x3181CF40317, 0x3181D180317, 0x3181D320317, 0x3181D4C0317, 0x3181D650317, 0x3181D770317, + 0x3181DAA0317, 0x3181DB90317, 0x3181DDD0317, 0x3181E0C0317, 0x3181E210317, 0x3181E330317, 0x3181E710317, 0x3181E950317, 0x3181EB50317, 0x3181EE30317, 0x3181EF50317, 0x3181EFF0317, 0x3181F0B0317, 0x3181F210317, 0x37A1E710379, + 0x1C7A, 0x1E33, 0x1C8F1C7A, 0x1F211EF5, 0xF12FFBC3, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, + 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, + 0x1F21, 0x1EE31D18, 0x1EE31DAA, 0x1C8F1E71, 0x1E711E71, 0x1EE31E0C1E0C, 0x1C7A1EF5, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, 0x1CF4, 0x1D18, + 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, 0x1EE3, 0x1EF5, + 0x1EFF, 0x1F0B, 0x1F21, 0x1C7A1DAA, 0x1C8F1DAA, 0xF16CFBC3, 0xF16DFBC3, 0xF16EFBC3, 0xF16FFBC3, 0x1C47, 0x1C60, 0x1C7A, 0x1C8F, 0x1CAA, 0x1CE5, + 0x1CF4, 0x1D18, 0x1D32, 0x1D4C, 0x1D65, 0x1D77, 0x1DAA, 0x1DB9, 0x1DDD, 0x1E0C, 0x1E21, 0x1E33, 0x1E71, 0x1E95, 0x1EB5, + 0x1EE3, 0x1EF5, 0x1EFF, 0x1F0B, 0x1F21, 0x1E0C, 0x1C7A1D32, 0x1C471E0C, 0x1C471E71, 0x1C601C47, 0x1C7A1EF5, 0x1D4C1C8F, 0x1D771C7A, 0x1D771DDD1DDD1C7A, 0x1CAA1CAA1E331CE5, + 0x1C8F1D32, 0x1EF51CAA1DB9, 0x1CF41DB9, 0x1D651DDD, 0x1E711DDD1E71, 0x2601E0C1EB5, 0x1E711EE3, 0x1C8F1C40, 0xFFFD, 0x1D651C3F, 0x1D651C41, 0x1D651C45, 0x1C3E02771C42, 0x1C3E02771C44, 0x1C3F02771C3F1C3F, + 0x1E0C1C3D1C43, 0x1E0C1C3D1C3F1C3E, 0x1C8F, 0x1C7A1D18, 0x1E331C8F1D18, 0xFFFD, 0xFFFD, 0x1EE31D181E71, 0x1C8F1D181EB5, 0x1C8F1DDD1EE3, 0xF1ADFBC3, 0xF1AEFBC3, 0xF1AFFBC3, 0xF1B0FBC3, 0xF1B1FBC3, + 0xF1B2FBC3, 0xF1B3FBC3, 0xF1B4FBC3, 0xF1B5FBC3, 0xF1B6FBC3, 0xF1B7FBC3, 0xF1B8FBC3, 0xF1B9FBC3, 0xF1BAFBC3, 0xF1BBFBC3, 0xF1BCFBC3, 0xF1BDFBC3, 0xF1BEFBC3, 0xF1BFFBC3, 0xF1C0FBC3, + 0xF1C1FBC3, 0xF1C2FBC3, 0xF1C3FBC3, 0xF1C4FBC3, 0xF1C5FBC3, 0xF1C6FBC3, 0xF1C7FBC3, 0xF1C8FBC3, 0xF1C9FBC3, 0xF1CAFBC3, 0xF1CBFBC3, 0xF1CCFBC3, 0xF1CDFBC3, 0xF1CEFBC3, 0xF1CFFBC3, + 0xF1D0FBC3, 0xF1D1FBC3, 0xF1D2FBC3, 0xF1D3FBC3, 0xF1D4FBC3, 0xF1D5FBC3, 0xF1D6FBC3, 0xF1D7FBC3, 0xF1D8FBC3, 0xF1D9FBC3, 0xF1DAFBC3, 0xF1DBFBC3, 0xF1DCFBC3, 0xF1DDFBC3, 0xF1DEFBC3, + 0xF1DFFBC3, 0xF1E0FBC3, 0xF1E1FBC3, 0xF1E2FBC3, 0xF1E3FBC3, 0xF1E4FBC3, 0xF1E5FBC3, 0xA07, 0xA08, 0xA09, 0xA0A, 0xA0B, 0xA0C, 0xA0D, 0xA0E, + 0xA0F, 0xA10, 0xA11, 0xA12, 0xA13, 0xA14, 0xA15, 0xA16, 0xA17, 0xA18, 0xA19, 0xA1A, 0xA1B, 0xA1C, 0xA1D, + 0xA1E, 0xA1F, 0xA20, 0x3D603D78, 0x3D643D64, 0x3D65, 0xF203FBC3, 0xF204FBC3, 0xF205FBC3, 0xF206FBC3, 0xF207FBC3, 0xF208FBC3, 0xF209FBC3, 0xF20AFBC3, 0xF20BFBC3, + 0xF20CFBC3, 0xF20DFBC3, 0xF20EFBC3, 0xF20FFBC3, 0xE24BFB40, 0xDB57FB40, 0xD3CCFB40, 0x3D6D, 0xCE8CFB40, 0xD91AFB40, 0x89E3FB41, 0xD929FB40, 0xCEA4FB40, 0xE620FB40, 0xF121FB40, + 0xE599FB40, 0xD24DFB40, 0xDF8CFB40, 0xD18DFB40, 0xE5B0FB40, 0xD21DFB40, 0xFD42FB40, 0xF51FFB40, 0x8CA9FB41, 0xD8F0FB40, 0xD439FB40, 0xEF14FB40, 0xE295FB40, 0xE355FB40, 0xCE00FB40, + 0xCE09FB40, 0x904AFB41, 0xDDE6FB40, 0xCE2DFB40, 0xD3F3FB40, 0xE307FB40, 0x8D70FB41, 0xE253FB40, 0xF981FB40, 0xFA7AFB40, 0xD408FB40, 0xEE80FB40, 0xE709FB40, 0xE708FB40, 0xF533FB40, + 0xD272FB40, 0xD5B6FB40, 0x914DFB41, 0xF23CFBC3, 0xF23DFBC3, 0xF23EFBC3, 0xF23FFBC3, 0x37AE72CFB400379, 0x37ACE09FB400379, 0x37ACE8CFB400379, 0x37ADB89FB400379, 0x37AF0B9FB400379, 0x37AE253FB400379, 0x37AF6D7FB400379, 0x37AD2DDFB400379, + 0x37AE557FB400379, 0xF249FBC3, 0xF24AFBC3, 0xF24BFBC3, 0xF24CFBC3, 0xF24DFBC3, 0xF24EFBC3, 0xF24FFBC3, 0xDF97FB40, 0xD3EFFB40, 0xF252FBC3, 0xF253FBC3, 0xF254FBC3, 0xF255FBC3, 0xF256FBC3, + 0xF257FBC3, 0xF258FBC3, 0xF259FBC3, 0xF25AFBC3, 0xF25BFBC3, 0xF25CFBC3, 0xF25DFBC3, 0xF25EFBC3, 0xF25FFBC3, 0xF260FBC3, 0xF261FBC3, 0xF262FBC3, 0xF263FBC3, 0xF264FBC3, 0xF265FBC3, + 0xF266FBC3, 0xF267FBC3, 0xF268FBC3, 0xF269FBC3, 0xF26AFBC3, 0xF26BFBC3, 0xF26CFBC3, 0xF26DFBC3, 0xF26EFBC3, 0xF26FFBC3, 0xF270FBC3, 0xF271FBC3, 0xF272FBC3, 0xF273FBC3, 0xF274FBC3, + 0xF275FBC3, 0xF276FBC3, 0xF277FBC3, 0xF278FBC3, 0xF279FBC3, 0xF27AFBC3, 0xF27BFBC3, 0xF27CFBC3, 0xF27DFBC3, 0xF27EFBC3, 0xF27FFBC3, 0xF280FBC3, 0xF281FBC3, 0xF282FBC3, 0xF283FBC3, + 0xF284FBC3, 0xF285FBC3, 0xF286FBC3, 0xF287FBC3, 0xF288FBC3, 0xF289FBC3, 0xF28AFBC3, 0xF28BFBC3, 0xF28CFBC3, 0xF28DFBC3, 0xF28EFBC3, 0xF28FFBC3, 0xF290FBC3, 0xF291FBC3, 0xF292FBC3, + 0xF293FBC3, 0xF294FBC3, 0xF295FBC3, 0xF296FBC3, 0xF297FBC3, 0xF298FBC3, 0xF299FBC3, 0xF29AFBC3, 0xF29BFBC3, 0xF29CFBC3, 0xF29DFBC3, 0xF29EFBC3, 0xF29FFBC3, 0xF2A0FBC3, 0xF2A1FBC3, + 0xF2A2FBC3, 0xF2A3FBC3, 0xF2A4FBC3, 0xF2A5FBC3, 0xF2A6FBC3, 0xF2A7FBC3, 0xF2A8FBC3, 0xF2A9FBC3, 0xF2AAFBC3, 0xF2ABFBC3, 0xF2ACFBC3, 0xF2ADFBC3, 0xF2AEFBC3, 0xF2AFFBC3, 0xF2B0FBC3, + 0xF2B1FBC3, 0xF2B2FBC3, 0xF2B3FBC3, 0xF2B4FBC3, 0xF2B5FBC3, 0xF2B6FBC3, 0xF2B7FBC3, 0xF2B8FBC3, 0xF2B9FBC3, 0xF2BAFBC3, 0xF2BBFBC3, 0xF2BCFBC3, 0xF2BDFBC3, 0xF2BEFBC3, 0xF2BFFBC3, + 0xF2C0FBC3, 0xF2C1FBC3, 0xF2C2FBC3, 0xF2C3FBC3, 0xF2C4FBC3, 0xF2C5FBC3, 0xF2C6FBC3, 0xF2C7FBC3, 0xF2C8FBC3, 0xF2C9FBC3, 0xF2CAFBC3, 0xF2CBFBC3, 0xF2CCFBC3, 0xF2CDFBC3, 0xF2CEFBC3, + 0xF2CFFBC3, 0xF2D0FBC3, 0xF2D1FBC3, 0xF2D2FBC3, 0xF2D3FBC3, 0xF2D4FBC3, 0xF2D5FBC3, 0xF2D6FBC3, 0xF2D7FBC3, 0xF2D8FBC3, 0xF2D9FBC3, 0xF2DAFBC3, 0xF2DBFBC3, 0xF2DCFBC3, 0xF2DDFBC3, + 0xF2DEFBC3, 0xF2DFFBC3, 0xF2E0FBC3, 0xF2E1FBC3, 0xF2E2FBC3, 0xF2E3FBC3, 0xF2E4FBC3, 0xF2E5FBC3, 0xF2E6FBC3, 0xF2E7FBC3, 0xF2E8FBC3, 0xF2E9FBC3, 0xF2EAFBC3, 0xF2EBFBC3, 0xF2ECFBC3, + 0xF2EDFBC3, 0xF2EEFBC3, 0xF2EFFBC3, 0xF2F0FBC3, 0xF2F1FBC3, 0xF2F2FBC3, 0xF2F3FBC3, 0xF2F4FBC3, 0xF2F5FBC3, 0xF2F6FBC3, 0xF2F7FBC3, 0xF2F8FBC3, 0xF2F9FBC3, 0xF2FAFBC3, 0xF2FBFBC3, + 0xF2FCFBC3, 0xF2FDFBC3, 0xF2FEFBC3, 0xF2FFFBC3, 0x12A9, 0x12AA, 0x12AB, 0x12AC, 0x12AD, 0x12AE, 0x12AF, 0x12B0, 0x12B1, 0x12B2, 0x12B3, + 0x12B4, 0x12B5, 0x12B6, 0x12B7, 0x12B8, 0x12B9, 0x12BA, 0x12BB, 0x12BC, 0x12BD, 0x12BE, 0x12BF, 0x12C0, 0x12C1, 0x12C2, + 0x12C3, 0x12C4, 0x12C5, 0x12C6, 0x12C7, 0x12C8, 0x12C9, 0x12CA, 0x12CB, 0x12CC, 0x12CD, 0x12CE, 0x12CF, 0x12D0, 0x12D1, + 0x12D2, 0x12D3, 0x12D4, 0x12D5, 0x12D6, 0x12D7, 0x12D8, 0x12D9, 0x12DA, 0x12DB, 0x12DC, 0x12DD, 0x12DE, 0x12DF, 0x12E0, + 0x12E1, 0x12E2, 0x12E3, 0x12E4, 0x12E5, 0x12E6, 0x12E7, 0x12E8, 0x12E9, 0x12EA, 0x12EB, 0x12EC, 0x12ED, 0x12EE, 0x12EF, + 0x12F0, 0x12F1, 0x12F2, 0x12F3, 0x12F4, 0x12F5, 0x12F6, 0x12F7, 0x12F8, 0x12F9, 0x12FA, 0x12FB, 0x12FC, 0x12FD, 0x12FE, + 0x12FF, 0x1300, 0x1301, 0x1302, 0x1303, 0x1304, 0x1305, 0x1306, 0x1307, 0x1308, 0x1309, 0x130A, 0x130B, 0x130C, 0x130D, + 0x130E, 0x130F, 0x1310, 0x1311, 0x1312, 0x1313, 0x1314, 0x1315, 0x1316, 0x1317, 0x1318, 0x1319, 0x131A, 0x131B, 0x131C, + 0x131D, 0x131E, 0x131F, 0x1320, 0x1321, 0x1322, 0x1323, 0x1324, 0x1325, 0x1326, 0x1327, 0x1328, 0x1329, 0x132A, 0x132B, + 0x132C, 0x132D, 0x132E, 0x132F, 0x1330, 0x1331, 0x1332, 0x1333, 0x1334, 0x1335, 0x1336, 0x1337, 0x1338, 0x1339, 0x133A, + 0x133B, 0x133C, 0x133D, 0x133E, 0x133F, 0x1340, 0x1341, 0x1342, 0x1343, 0x1344, 0x1345, 0x1346, 0x1347, 0x1348, 0x1349, + 0x134A, 0x134B, 0x134C, 0x134D, 0x134E, 0x134F, 0x1350, 0x1351, 0x1352, 0x1353, 0x1354, 0x1355, 0x1356, 0x1357, 0x1358, + 0x1359, 0x135A, 0x135B, 0x135C, 0x135D, 0x135E, 0x135F, 0x1360, 0x1361, 0x1362, 0x1363, 0x1364, 0x1365, 0x1366, 0x1367, + 0x1368, 0x1369, 0x136A, 0x136B, 0x136C, 0x136D, 0x136E, 0x136F, 0x1370, 0x1371, 0x1372, 0x1373, 0x1374, 0x1375, 0x1376, + 0x1377, 0x1378, 0x1379, 0x137A, 0x137B, 0x137C, 0x137D, 0x137E, 0x137F, 0x1380, 0x1381, 0x1382, 0x1383, 0x1384, 0x1385, + 0x1386, 0x1387, 0x1388, 0x1389, 0x138A, 0x138B, 0x138C, 0x138D, 0x138E, 0x138F, 0x1390, 0x1391, 0x1392, 0x1393, 0x1394, + 0x1395, 0x1396, 0x1397, 0x1398, 0x1399, 0x139A, 0x139B, 0x139C, 0x139D, 0x139E, 0x139F, 0x13A0, 0x13A1, 0x13A2, 0x13A3, + 0x13A4, 0x13A5, 0x13A6, 0x13A7, 0x13A8, 0x13A9, 0x13AA, 0x13AB, 0x13AC, 0x13AD, 0x13AE, 0x13AF, 0x13B0, 0x13B1, 0x13B2, + 0x13B3, 0x13B4, 0x13B5, 0x13B6, 0x13B7, 0x13B8, 0x13B9, 0x13BA, 0x13BB, 0x13BC, 0x13BD, 0x13BE, 0x13BF, 0x13C0, 0x13C1, + 0x13C2, 0x13C3, 0x13C4, 0x13C5, 0x13C6, 0x13C7, 0x13C8, 0x13C9, 0x13CA, 0x13CB, 0x13CC, 0x13CD, 0x13CE, 0x13CF, 0x13D0, + 0x13D1, 0x13D2, 0x13D3, 0x13D4, 0x13D5, 0x13D6, 0x13D7, 0x13D8, 0x13D9, 0x13DA, 0x13DB, 0x13DC, 0x13DD, 0x13DE, 0x13DF, + 0x13E0, 0x13E1, 0x13E2, 0x13E3, 0x13E4, 0x13E5, 0x13E6, 0x13E7, 0x13E8, 0x13E9, 0x13EA, 0x13EB, 0x13EC, 0x13ED, 0x13EE, + 0x13EF, 0x13F0, 0x13F1, 0x13F2, 0x13F3, 0x13F4, 0x13F5, 0x13F6, 0x13F7, 0x13F8, 0x13F9, 0x13FA, 0x13FB, 0x13FC, 0x13FD, + 0x13FE, 0x13FF, 0x1400, 0x1401, 0x1402, 0x1403, 0x1404, 0x1405, 0x1406, 0x1407, 0x1408, 0x1409, 0x140A, 0x140B, 0x140C, + 0x140D, 0x140E, 0x140F, 0x1410, 0x1411, 0x1412, 0x1413, 0x1414, 0x1415, 0x1416, 0x1417, 0x1418, 0x1419, 0x141A, 0x141B, + 0x141C, 0x141D, 0x141E, 0x141F, 0x1420, 0x1421, 0x1422, 0x1423, 0x1424, 0x1425, 0x1426, 0x1427, 0x1428, 0x1429, 0x142A, + 0x142B, 0x142C, 0x142D, 0x142E, 0x142F, 0x1430, 0x1431, 0x1432, 0x1433, 0x1434, 0x1435, 0x1436, 0x1437, 0x1438, 0x1439, + 0x143A, 0x143B, 0x143C, 0x143D, 0x143E, 0x143F, 0x1440, 0x1441, 0x1442, 0x1443, 0x1444, 0x1445, 0x1446, 0x1447, 0x1448, + 0x1449, 0x144A, 0x144B, 0x144C, 0x144D, 0x144E, 0x144F, 0x1450, 0x1451, 0x1452, 0x1453, 0x1454, 0x1455, 0x1456, 0x1457, + 0x1458, 0x1459, 0x145A, 0x145B, 0x145C, 0x145D, 0x145E, 0x145F, 0x1460, 0x1461, 0x1462, 0x1463, 0x1464, 0x1465, 0x1466, + 0x1467, 0x1468, 0x1469, 0x146A, 0x146B, 0x146C, 0x146D, 0x146E, 0x146F, 0x1470, 0x1471, 0x1472, 0x1473, 0x1474, 0x1475, + 0x1476, 0x1477, 0x1478, 0x1479, 0x147A, 0x147B, 0x147C, 0x147D, 0x147E, 0x147F, 0x1480, 0x1481, 0x1482, 0x1483, 0x1484, + 0x1485, 0x1486, 0x1487, 0x1488, 0x1489, 0x148A, 0x148B, 0x148C, 0x148D, 0x148E, 0x148F, 0x1490, 0x1491, 0x1492, 0x1493, + 0x1494, 0x1495, 0x1496, 0x1497, 0x1498, 0x1499, 0x149A, 0x149B, 0x149C, 0x149D, 0x149E, 0x149F, 0x14A0, 0x14A1, 0x14A2, + 0x14A3, 0x14A4, 0x14A5, 0x14A6, 0x14A7, 0x14A8, 0x14A9, 0x14AA, 0x14AB, 0x14AC, 0x14AD, 0x14AE, 0x14AF, 0x14B0, 0x14B1, + 0x14B2, 0x14B3, 0x14B4, 0x14B5, 0x14B6, 0x14B7, 0x14B8, 0x14B9, 0x14BA, 0x14BB, 0x14BC, 0x14BD, 0x14BE, 0x14BF, 0x14C0, + 0x14C1, 0x14C2, 0x14C3, 0x14C4, 0x14C5, 0x14C6, 0x14C7, 0x14C8, 0x14C9, 0x14CA, 0x14CB, 0x14CC, 0x14CD, 0x14CE, 0x14CF, + 0x14D0, 0x14D1, 0x14D2, 0x14D3, 0x14D4, 0x14D5, 0x14D6, 0x14D7, 0x14D8, 0x14D9, 0x14DA, 0x14DB, 0x14DC, 0x14DD, 0x14DE, + 0x14DF, 0x14E0, 0x14E1, 0x14E2, 0x14E3, 0x14E4, 0x14E5, 0x14E6, 0x14E7, 0x14E8, 0x14E9, 0x14EA, 0x14EB, 0x14EC, 0x14ED, + 0x14EE, 0x14EF, 0x14F0, 0x14F1, 0x14F2, 0x14F3, 0x14F4, 0x14F5, 0x14F6, 0x14F7, 0x14F8, 0x14F9, 0x14FA, 0x14FB, 0x14FC, + 0x14FD, 0x14FE, 0x14FF, 0x1500, 0x1501, 0x1502, 0x1503, 0x1504, 0x1505, 0x1506, 0x1507, 0x1508, 0x1509, 0x150A, 0x150B, + 0x150C, 0x150D, 0x150E, 0x150F, 0x1510, 0x1511, 0x1512, 0x1513, 0x1514, 0x1515, 0x1516, 0x1517, 0x1518, 0x1519, 0x151A, + 0x151B, 0x151C, 0x151D, 0x151E, 0x151F, 0x1520, 0x1521, 0x1522, 0x1523, 0x1524, 0x1525, 0x1526, 0x1527, 0x1528, 0x1529, + 0x152A, 0x152B, 0x152C, 0x152D, 0x152E, 0x152F, 0x1530, 0x1531, 0x1532, 0x1533, 0x1534, 0x1535, 0x1536, 0x1537, 0x1538, + 0x1539, 0x153A, 0x153B, 0x153C, 0x153D, 0x153E, 0x153F, 0x1540, 0x1541, 0x1542, 0x1543, 0x1544, 0x1545, 0x1546, 0x1547, + 0x1548, 0x1549, 0x154A, 0x154B, 0x154C, 0x154D, 0x154E, 0x154F, 0x1550, 0x1551, 0x1552, 0x1553, 0x1554, 0x1555, 0x1556, + 0x1557, 0x1558, 0x1559, 0x155A, 0x155B, 0x155C, 0x155D, 0x155E, 0x155F, 0x1560, 0x1561, 0x1562, 0x1563, 0x1564, 0x1565, + 0x1566, 0x1567, 0x1568, 0x1569, 0x156A, 0x156B, 0x156C, 0x156D, 0x156E, 0x156F, 0x1570, 0x1571, 0x1572, 0x1573, 0x1574, + 0x1575, 0x1576, 0x1577, 0x1578, 0x1579, 0x157A, 0x157B, 0x157C, 0x157D, 0x157E, 0x157F, 0x1580, 0x1581, 0x1582, 0x1583, + 0x1584, 0x1585, 0x1586, 0x1587, 0x1588, 0x1589, 0x158A, 0x158B, 0x158C, 0x158D, 0x158E, 0x158F, 0x1590, 0x1591, 0x1592, + 0x1593, 0x1594, 0x1595, 0x1596, 0x1597, 0x1598, 0x1599, 0x159A, 0x159B, 0x159C, 0x159D, 0x159E, 0x159F, 0x15A0, 0x15A1, + 0x15A2, 0x15A3, 0x15A4, 0x15A5, 0x15A6, 0x15A7, 0x15A8, 0x15FB, 0x15FC, 0x15FD, 0x15FE, 0x15FF, 0x1600, 0x1601, 0x1602, + 0x1603, 0x1604, 0x1605, 0x1606, 0x1607, 0x1608, 0x1609, 0x160A, 0x160B, 0x160C, 0x160D, 0x160E, 0x160F, 0x1610, 0x1611, + 0x1612, 0x1613, 0x1614, 0x1615, 0x1616, 0x1617, 0x1618, 0x1619, 0x161A, 0x161B, 0x161C, 0x161D, 0x161E, 0x161F, 0x1620, + 0x1621, 0x1622, 0x1623, 0x1624, 0x1625, 0x1626, 0x1627, 0x1628, 0x1629, 0x162A, 0x162B, 0x162C, 0x162D, 0x162E, 0x162F, + 0x1630, 0x1631, 0x1632, 0x1633, 0x1634, 0x1635, 0x1636, 0x1637, 0x1638, 0x1639, 0x163A, 0x163B, 0x163C, 0x163D, 0x163E, + 0x163F, 0x1640, 0x1641, 0x1642, 0x1643, 0x1644, 0x1645, 0x1646, 0x1647, 0x1648, 0x1649, 0x164A, 0x164B, 0x164C, 0x164D, + 0x164E, 0x164F, 0x1650, 0x1651, 0x1652, 0x1653, 0x1654, 0x1655, 0x1656, 0x1657, 0x1658, 0x1659, 0x165A, 0x165B, 0x165C, + 0x165D, 0x165E, 0x165F, 0x1660, 0x1661, 0x1662, 0x1663, 0x1664, 0x1665, 0x1666, 0x1667, 0x1668, 0x1669, 0x166A, 0x166B, + 0x166C, 0x166D, 0x166E, 0x166F, 0x1670, 0x1671, 0x1672, 0x1673, 0x1674, 0x1675, 0x1676, 0x1677, 0x1678, 0x1679, 0x167A, + 0x167B, 0x167C, 0x167D, 0x167E, 0x167F, 0x1680, 0x1681, 0x1682, 0x1683, 0x1684, 0x1685, 0x1686, 0x1687, 0x1688, 0x1689, + 0x168A, 0x168B, 0x168C, 0x168D, 0x168E, 0x168F, 0x1690, 0x1691, 0x1692, 0x1693, 0x1694, 0x1695, 0x1696, 0x1697, 0x1698, + 0x1699, 0x169A, 0x169B, 0x169C, 0x169D, 0x169E, 0x169F, 0x16A0, 0x16A1, 0x16A2, 0x16A3, 0x16A4, 0x16A5, 0x16A6, 0x16A7, + 0x16A8, 0x16A9, 0x16AA, 0x16AB, 0x16AC, 0x16AD, 0x16AE, 0x16AF, 0x16B0, 0x16B1, 0x16B2, 0x16B3, 0x16B4, 0x16B5, 0x16B6, + 0x16B7, 0x16B8, 0x16B9, 0x16BA, 0x16BB, 0x16BC, 0x16BD, 0x16BE, 0x16BF, 0x16C0, 0x16C1, 0x16C2, 0x16C3, 0x16C4, 0x16C5, + 0x16C6, 0x16C7, 0x16C8, 0x16C9, 0x16CA, 0x16CB, 0x16CC, 0x16CD, 0xF6D3FBC3, 0xF6D4FBC3, 0xF6D5FBC3, 0xF6D6FBC3, 0xF6D7FBC3, 0xF6D8FBC3, 0xF6D9FBC3, + 0xF6DAFBC3, 0xF6DBFBC3, 0xF6DCFBC3, 0xF6DDFBC3, 0xF6DEFBC3, 0xF6DFFBC3, 0x16CE, 0x16CF, 0x16D0, 0x16D1, 0x16D2, 0x16D3, 0x16D4, 0x16D5, 0x16D6, + 0x16D7, 0x16D8, 0x16D9, 0x16DA, 0xF6EDFBC3, 0xF6EEFBC3, 0xF6EFFBC3, 0x16DB, 0x16DC, 0x16DD, 0x16DE, 0x16DF, 0x16E0, 0x16E1, 0xF6F7FBC3, + 0xF6F8FBC3, 0xF6F9FBC3, 0xF6FAFBC3, 0xF6FBFBC3, 0xF6FCFBC3, 0xF6FDFBC3, 0xF6FEFBC3, 0xF6FFFBC3, 0x16E2, 0x16E3, 0x16E4, 0x16E5, 0x16E6, 0x16E7, 0x16E8, + 0x16E9, 0x16EA, 0x16EB, 0x16EC, 0x16ED, 0x16EE, 0x16EF, 0x16F0, 0x16F1, 0x16F2, 0x16F3, 0x16F4, 0x16F5, 0x16F6, 0x16F7, + 0x16F8, 0x16F9, 0x16FA, 0x16FB, 0x16FC, 0x16FD, 0x16FE, 0x16FF, 0x1700, 0x1701, 0x1702, 0x1703, 0x1704, 0x1705, 0x1706, + 0x1707, 0x1708, 0x1709, 0x170A, 0x170B, 0x170C, 0x170D, 0x170E, 0x170F, 0x1710, 0x1711, 0x1712, 0x1713, 0x1714, 0x1715, + 0x1716, 0x1717, 0x1718, 0x1719, 0x171A, 0x171B, 0x171C, 0x171D, 0x171E, 0x171F, 0x1720, 0x1721, 0x1722, 0x1723, 0x1724, + 0x1725, 0x1726, 0x1727, 0x1728, 0x1729, 0x172A, 0x172B, 0x172C, 0x172D, 0x172E, 0x172F, 0x1730, 0x1731, 0x1732, 0x1733, + 0x1734, 0x1735, 0x1736, 0x1737, 0x1738, 0x1739, 0x173A, 0x173B, 0x173C, 0x173D, 0x173E, 0x173F, 0x1740, 0x1741, 0x1742, + 0x1743, 0x1744, 0x1745, 0x1746, 0x1747, 0x1748, 0x1749, 0x174A, 0x174B, 0x174C, 0x174D, 0x174E, 0x174F, 0x1750, 0x1751, + 0x1752, 0x1753, 0x1754, 0x1755, 0xF774FBC3, 0xF775FBC3, 0xF776FBC3, 0xF777FBC3, 0xF778FBC3, 0xF779FBC3, 0xF77AFBC3, 0xF77BFBC3, 0xF77CFBC3, 0xF77DFBC3, 0xF77EFBC3, + 0xF77FFBC3, 0x1756, 0x1757, 0x1758, 0x1759, 0x175A, 0x175B, 0x175C, 0x175D, 0x175E, 0x175F, 0x1760, 0x1761, 0x1762, 0x1763, + 0x1764, 0x1765, 0x1766, 0x1767, 0x1768, 0x1769, 0x176A, 0x176B, 0x176C, 0x176D, 0x176E, 0x176F, 0x1770, 0x1771, 0x1772, + 0x1773, 0x1774, 0x1775, 0x1776, 0x1777, 0x1778, 0x1779, 0x177A, 0x177B, 0x177C, 0x177D, 0x177E, 0x177F, 0x1780, 0x1781, + 0x1782, 0x1783, 0x1784, 0x1785, 0x1786, 0x1787, 0x1788, 0x1789, 0x178A, 0x178B, 0x178C, 0x178D, 0x178E, 0x178F, 0x1790, + 0x1791, 0x1792, 0x1793, 0x1794, 0x1795, 0x1796, 0x1797, 0x1798, 0x1799, 0x179A, 0x179B, 0x179C, 0x179D, 0x179E, 0x179F, + 0x17A0, 0x17A1, 0x17A2, 0x17A3, 0x17A4, 0x17A5, 0x17A6, 0x17A7, 0x17A8, 0x17A9, 0x17AA, 0xF7D5FBC3, 0xF7D6FBC3, 0xF7D7FBC3, 0xF7D8FBC3, + 0xF7D9FBC3, 0xF7DAFBC3, 0xF7DBFBC3, 0xF7DCFBC3, 0xF7DDFBC3, 0xF7DEFBC3, 0xF7DFFBC3, 0xF7E0FBC3, 0xF7E1FBC3, 0xF7E2FBC3, 0xF7E3FBC3, 0xF7E4FBC3, 0xF7E5FBC3, 0xF7E6FBC3, 0xF7E7FBC3, + 0xF7E8FBC3, 0xF7E9FBC3, 0xF7EAFBC3, 0xF7EBFBC3, 0xF7ECFBC3, 0xF7EDFBC3, 0xF7EEFBC3, 0xF7EFFBC3, 0xF7F0FBC3, 0xF7F1FBC3, 0xF7F2FBC3, 0xF7F3FBC3, 0xF7F4FBC3, 0xF7F5FBC3, 0xF7F6FBC3, + 0xF7F7FBC3, 0xF7F8FBC3, 0xF7F9FBC3, 0xF7FAFBC3, 0xF7FBFBC3, 0xF7FCFBC3, 0xF7FDFBC3, 0xF7FEFBC3, 0xF7FFFBC3, 0x17AB, 0x17AC, 0x17AD, 0x17AE, 0x17AF, 0x17B0, + 0x17B1, 0x17B2, 0x17B3, 0x17B4, 0x17B5, 0x17B6, 0xF80CFBC3, 0xF80DFBC3, 0xF80EFBC3, 0xF80FFBC3, 0x17B7, 0x17B8, 0x17B9, 0x17BA, 0x17BB, + 0x17BC, 0x17BD, 0x17BE, 0x17BF, 0x17C0, 0x17C1, 0x17C2, 0x17C3, 0x17C4, 0x17C5, 0x17C6, 0x17C7, 0x17C8, 0x17C9, 0x17CA, + 0x17CB, 0x17CC, 0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D1, 0x17D2, 0x17D3, 0x17D4, 0x17D5, 0x17D6, 0x17D7, 0x17D8, 0x17D9, + 0x17DA, 0x17DB, 0x17DC, 0x17DD, 0x17DE, 0x17DF, 0x17E0, 0x17E1, 0x17E2, 0x17E3, 0x17E4, 0x17E5, 0x17E6, 0x17E7, 0x17E8, + 0x17E9, 0x17EA, 0x17EB, 0x17EC, 0x17ED, 0x17EE, 0xF848FBC3, 0xF849FBC3, 0xF84AFBC3, 0xF84BFBC3, 0xF84CFBC3, 0xF84DFBC3, 0xF84EFBC3, 0xF84FFBC3, 0x17EF, + 0x17F0, 0x17F1, 0x17F2, 0x17F3, 0x17F4, 0x17F5, 0x17F6, 0x17F7, 0x17F8, 0xF85AFBC3, 0xF85BFBC3, 0xF85CFBC3, 0xF85DFBC3, 0xF85EFBC3, 0xF85FFBC3, + 0x17F9, 0x17FA, 0x17FB, 0x17FC, 0x17FD, 0x17FE, 0x17FF, 0x1800, 0x1801, 0x1802, 0x1803, 0x1804, 0x1805, 0x1806, 0x1807, + 0x1808, 0x1809, 0x180A, 0x180B, 0x180C, 0x180D, 0x180E, 0x180F, 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, + 0x1817, 0x1818, 0x1819, 0x181A, 0x181B, 0x181C, 0x181D, 0x181E, 0x181F, 0x1820, 0xF888FBC3, 0xF889FBC3, 0xF88AFBC3, 0xF88BFBC3, 0xF88CFBC3, + 0xF88DFBC3, 0xF88EFBC3, 0xF88FFBC3, 0x1821, 0x1822, 0x1823, 0x1824, 0x1825, 0x1826, 0x1827, 0x1828, 0x1829, 0x182A, 0x182B, 0x182C, + 0x182D, 0x182E, 0x182F, 0x1830, 0x1831, 0x1832, 0x1833, 0x1834, 0x1835, 0x1836, 0x1837, 0x1838, 0x1839, 0x183A, 0x183B, + 0x183C, 0x183D, 0x183E, 0xF8AEFBC3, 0xF8AFFBC3, 0xF8B0FBC3, 0xF8B1FBC3, 0xF8B2FBC3, 0xF8B3FBC3, 0xF8B4FBC3, 0xF8B5FBC3, 0xF8B6FBC3, 0xF8B7FBC3, 0xF8B8FBC3, 0xF8B9FBC3, + 0xF8BAFBC3, 0xF8BBFBC3, 0xF8BCFBC3, 0xF8BDFBC3, 0xF8BEFBC3, 0xF8BFFBC3, 0xF8C0FBC3, 0xF8C1FBC3, 0xF8C2FBC3, 0xF8C3FBC3, 0xF8C4FBC3, 0xF8C5FBC3, 0xF8C6FBC3, 0xF8C7FBC3, 0xF8C8FBC3, + 0xF8C9FBC3, 0xF8CAFBC3, 0xF8CBFBC3, 0xF8CCFBC3, 0xF8CDFBC3, 0xF8CEFBC3, 0xF8CFFBC3, 0xF8D0FBC3, 0xF8D1FBC3, 0xF8D2FBC3, 0xF8D3FBC3, 0xF8D4FBC3, 0xF8D5FBC3, 0xF8D6FBC3, 0xF8D7FBC3, + 0xF8D8FBC3, 0xF8D9FBC3, 0xF8DAFBC3, 0xF8DBFBC3, 0xF8DCFBC3, 0xF8DDFBC3, 0xF8DEFBC3, 0xF8DFFBC3, 0xF8E0FBC3, 0xF8E1FBC3, 0xF8E2FBC3, 0xF8E3FBC3, 0xF8E4FBC3, 0xF8E5FBC3, 0xF8E6FBC3, + 0xF8E7FBC3, 0xF8E8FBC3, 0xF8E9FBC3, 0xF8EAFBC3, 0xF8EBFBC3, 0xF8ECFBC3, 0xF8EDFBC3, 0xF8EEFBC3, 0xF8EFFBC3, 0xF8F0FBC3, 0xF8F1FBC3, 0xF8F2FBC3, 0xF8F3FBC3, 0xF8F4FBC3, 0xF8F5FBC3, + 0xF8F6FBC3, 0xF8F7FBC3, 0xF8F8FBC3, 0xF8F9FBC3, 0xF8FAFBC3, 0xF8FBFBC3, 0xF8FCFBC3, 0xF8FDFBC3, 0xF8FEFBC3, 0xF8FFFBC3, 0xF900FBC3, 0xF901FBC3, 0xF902FBC3, 0xF903FBC3, 0xF904FBC3, + 0xF905FBC3, 0xF906FBC3, 0xF907FBC3, 0xF908FBC3, 0xF909FBC3, 0xF90AFBC3, 0xF90BFBC3, 0xF90CFBC3, 0xF90DFBC3, 0xF90EFBC3, 0xF90FFBC3, 0x15A9, 0x15AA, 0x15AB, 0x15AC, + 0x15AD, 0x15AE, 0x15AF, 0x15B0, 0x15B1, 0x15B2, 0x15B3, 0x15B4, 0x15B5, 0x15B6, 0x15B7, 0xF91FFBC3, 0x15B8, 0x15B9, 0x15BA, + 0x15BB, 0x15BC, 0x15BD, 0x15BE, 0x15BF, 0xF928FBC3, 0xF929FBC3, 0xF92AFBC3, 0xF92BFBC3, 0xF92CFBC3, 0xF92DFBC3, 0xF92EFBC3, 0xF92FFBC3, 0x15C0, 0xF931FBC3, + 0xF932FBC3, 0x15C1, 0x15C2, 0x15C3, 0x15C4, 0x15C5, 0x15C6, 0x15C7, 0x15C8, 0x15C9, 0x15CA, 0x15CB, 0x15CC, 0xF93FFBC3, 0x15CD, + 0x15CE, 0x15CF, 0x15D0, 0x15D1, 0x15D2, 0x15D3, 0x15D4, 0x15D5, 0x15D6, 0x15D7, 0x15D8, 0xF94CFBC3, 0xF94DFBC3, 0xF94EFBC3, 0xF94FFBC3, + 0x15D9, 0x15DA, 0x15DB, 0x15DC, 0x15DD, 0x15DE, 0x15DF, 0x15E0, 0x15E1, 0x15E2, 0x15E3, 0x15E4, 0x15E5, 0x15E6, 0x15E7, + 0xF95FFBC3, 0xF960FBC3, 0xF961FBC3, 0xF962FBC3, 0xF963FBC3, 0xF964FBC3, 0xF965FBC3, 0xF966FBC3, 0xF967FBC3, 0xF968FBC3, 0xF969FBC3, 0xF96AFBC3, 0xF96BFBC3, 0xF96CFBC3, 0xF96DFBC3, + 0xF96EFBC3, 0xF96FFBC3, 0xF970FBC3, 0xF971FBC3, 0xF972FBC3, 0xF973FBC3, 0xF974FBC3, 0xF975FBC3, 0xF976FBC3, 0xF977FBC3, 0xF978FBC3, 0xF979FBC3, 0xF97AFBC3, 0xF97BFBC3, 0xF97CFBC3, + 0xF97DFBC3, 0xF97EFBC3, 0xF97FFBC3, 0x15E8, 0x15E9, 0x15EA, 0x15EB, 0x15EC, 0x15ED, 0x15EE, 0x15EF, 0x15F0, 0x15F1, 0x15F2, 0x15F3, + 0x15F4, 0x15F5, 0x15F6, 0x15F7, 0x15F8, 0x15F9, 0xF992FBC3, 0xF993FBC3, 0xF994FBC3, 0xF995FBC3, 0xF996FBC3, 0xF997FBC3, 0xF998FBC3, 0xF999FBC3, 0xF99AFBC3, + 0xF99BFBC3, 0xF99CFBC3, 0xF99DFBC3, 0xF99EFBC3, 0xF99FFBC3, 0xF9A0FBC3, 0xF9A1FBC3, 0xF9A2FBC3, 0xF9A3FBC3, 0xF9A4FBC3, 0xF9A5FBC3, 0xF9A6FBC3, 0xF9A7FBC3, 0xF9A8FBC3, 0xF9A9FBC3, + 0xF9AAFBC3, 0xF9ABFBC3, 0xF9ACFBC3, 0xF9ADFBC3, 0xF9AEFBC3, 0xF9AFFBC3, 0xF9B0FBC3, 0xF9B1FBC3, 0xF9B2FBC3, 0xF9B3FBC3, 0xF9B4FBC3, 0xF9B5FBC3, 0xF9B6FBC3, 0xF9B7FBC3, 0xF9B8FBC3, + 0xF9B9FBC3, 0xF9BAFBC3, 0xF9BBFBC3, 0xF9BCFBC3, 0xF9BDFBC3, 0xF9BEFBC3, 0xF9BFFBC3, 0x15FA, 0xF9C1FBC3, 0xF9C2FBC3, 0xF9C3FBC3, 0xF9C4FBC3, 0xF9C5FBC3, 0xF9C6FBC3, 0xF9C7FBC3, + 0xF9C8FBC3, 0xF9C9FBC3, 0xF9CAFBC3, 0xF9CBFBC3, 0xF9CCFBC3, 0xF9CDFBC3, 0xF9CEFBC3, 0xF9CFFBC3, 0xF9D0FBC3, 0xF9D1FBC3, 0xF9D2FBC3, 0xF9D3FBC3, 0xF9D4FBC3, 0xF9D5FBC3, 0xF9D6FBC3, + 0xF9D7FBC3, 0xF9D8FBC3, 0xF9D9FBC3, 0xF9DAFBC3, 0xF9DBFBC3, 0xF9DCFBC3, 0xF9DDFBC3, 0xF9DEFBC3, 0xF9DFFBC3, 0xF9E0FBC3, 0xF9E1FBC3, 0xF9E2FBC3, 0xF9E3FBC3, 0xF9E4FBC3, 0xF9E5FBC3, + 0xF9E6FBC3, 0xF9E7FBC3, 0xF9E8FBC3, 0xF9E9FBC3, 0xF9EAFBC3, 0xF9EBFBC3, 0xF9ECFBC3, 0xF9EDFBC3, 0xF9EEFBC3, 0xF9EFFBC3, 0xF9F0FBC3, 0xF9F1FBC3, 0xF9F2FBC3, 0xF9F3FBC3, 0xF9F4FBC3, + 0xF9F5FBC3, 0xF9F6FBC3, 0xF9F7FBC3, 0xF9F8FBC3, 0xF9F9FBC3, 0xF9FAFBC3, 0xF9FBFBC3, 0xF9FCFBC3, 0xF9FDFBC3, 0xF9FEFBC3, 0xF9FFFBC3, 0xFA00FBC3, 0xFA01FBC3, 0xFA02FBC3, 0xFA03FBC3, + 0xFA04FBC3, 0xFA05FBC3, 0xFA06FBC3, 0xFA07FBC3, 0xFA08FBC3, 0xFA09FBC3, 0xFA0AFBC3, 0xFA0BFBC3, 0xFA0CFBC3, 0xFA0DFBC3, 0xFA0EFBC3, 0xFA0FFBC3, 0xFA10FBC3, 0xFA11FBC3, 0xFA12FBC3, + 0xFA13FBC3, 0xFA14FBC3, 0xFA15FBC3, 0xFA16FBC3, 0xFA17FBC3, 0xFA18FBC3, 0xFA19FBC3, 0xFA1AFBC3, 0xFA1BFBC3, 0xFA1CFBC3, 0xFA1DFBC3, 0xFA1EFBC3, 0xFA1FFBC3, 0xFA20FBC3, 0xFA21FBC3, + 0xFA22FBC3, 0xFA23FBC3, 0xFA24FBC3, 0xFA25FBC3, 0xFA26FBC3, 0xFA27FBC3, 0xFA28FBC3, 0xFA29FBC3, 0xFA2AFBC3, 0xFA2BFBC3, 0xFA2CFBC3, 0xFA2DFBC3, 0xFA2EFBC3, 0xFA2FFBC3, 0xFA30FBC3, + 0xFA31FBC3, 0xFA32FBC3, 0xFA33FBC3, 0xFA34FBC3, 0xFA35FBC3, 0xFA36FBC3, 0xFA37FBC3, 0xFA38FBC3, 0xFA39FBC3, 0xFA3AFBC3, 0xFA3BFBC3, 0xFA3CFBC3, 0xFA3DFBC3, 0xFA3EFBC3, 0xFA3FFBC3, + 0xFA40FBC3, 0xFA41FBC3, 0xFA42FBC3, 0xFA43FBC3, 0xFA44FBC3, 0xFA45FBC3, 0xFA46FBC3, 0xFA47FBC3, 0xFA48FBC3, 0xFA49FBC3, 0xFA4AFBC3, 0xFA4BFBC3, 0xFA4CFBC3, 0xFA4DFBC3, 0xFA4EFBC3, + 0xFA4FFBC3, 0xFA50FBC3, 0xFA51FBC3, 0xFA52FBC3, 0xFA53FBC3, 0xFA54FBC3, 0xFA55FBC3, 0xFA56FBC3, 0xFA57FBC3, 0xFA58FBC3, 0xFA59FBC3, 0xFA5AFBC3, 0xFA5BFBC3, 0xFA5CFBC3, 0xFA5DFBC3, + 0xFA5EFBC3, 0xFA5FFBC3, 0xFA60FBC3, 0xFA61FBC3, 0xFA62FBC3, 0xFA63FBC3, 0xFA64FBC3, 0xFA65FBC3, 0xFA66FBC3, 0xFA67FBC3, 0xFA68FBC3, 0xFA69FBC3, 0xFA6AFBC3, 0xFA6BFBC3, 0xFA6CFBC3, + 0xFA6DFBC3, 0xFA6EFBC3, 0xFA6FFBC3, 0xFA70FBC3, 0xFA71FBC3, 0xFA72FBC3, 0xFA73FBC3, 0xFA74FBC3, 0xFA75FBC3, 0xFA76FBC3, 0xFA77FBC3, 0xFA78FBC3, 0xFA79FBC3, 0xFA7AFBC3, 0xFA7BFBC3, + 0xFA7CFBC3, 0xFA7DFBC3, 0xFA7EFBC3, 0xFA7FFBC3, 0xFA80FBC3, 0xFA81FBC3, 0xFA82FBC3, 0xFA83FBC3, 0xFA84FBC3, 0xFA85FBC3, 0xFA86FBC3, 0xFA87FBC3, 0xFA88FBC3, 0xFA89FBC3, 0xFA8AFBC3, + 0xFA8BFBC3, 0xFA8CFBC3, 0xFA8DFBC3, 0xFA8EFBC3, 0xFA8FFBC3, 0xFA90FBC3, 0xFA91FBC3, 0xFA92FBC3, 0xFA93FBC3, 0xFA94FBC3, 0xFA95FBC3, 0xFA96FBC3, 0xFA97FBC3, 0xFA98FBC3, 0xFA99FBC3, + 0xFA9AFBC3, 0xFA9BFBC3, 0xFA9CFBC3, 0xFA9DFBC3, 0xFA9EFBC3, 0xFA9FFBC3, 0xFAA0FBC3, 0xFAA1FBC3, 0xFAA2FBC3, 0xFAA3FBC3, 0xFAA4FBC3, 0xFAA5FBC3, 0xFAA6FBC3, 0xFAA7FBC3, 0xFAA8FBC3, + 0xFAA9FBC3, 0xFAAAFBC3, 0xFAABFBC3, 0xFAACFBC3, 0xFAADFBC3, 0xFAAEFBC3, 0xFAAFFBC3, 0xFAB0FBC3, 0xFAB1FBC3, 0xFAB2FBC3, 0xFAB3FBC3, 0xFAB4FBC3, 0xFAB5FBC3, 0xFAB6FBC3, 0xFAB7FBC3, + 0xFAB8FBC3, 0xFAB9FBC3, 0xFABAFBC3, 0xFABBFBC3, 0xFABCFBC3, 0xFABDFBC3, 0xFABEFBC3, 0xFABFFBC3, 0xFAC0FBC3, 0xFAC1FBC3, 0xFAC2FBC3, 0xFAC3FBC3, 0xFAC4FBC3, 0xFAC5FBC3, 0xFAC6FBC3, + 0xFAC7FBC3, 0xFAC8FBC3, 0xFAC9FBC3, 0xFACAFBC3, 0xFACBFBC3, 0xFACCFBC3, 0xFACDFBC3, 0xFACEFBC3, 0xFACFFBC3, 0xFAD0FBC3, 0xFAD1FBC3, 0xFAD2FBC3, 0xFAD3FBC3, 0xFAD4FBC3, 0xFAD5FBC3, + 0xFAD6FBC3, 0xFAD7FBC3, 0xFAD8FBC3, 0xFAD9FBC3, 0xFADAFBC3, 0xFADBFBC3, 0xFADCFBC3, 0xFADDFBC3, 0xFADEFBC3, 0xFADFFBC3, 0xFAE0FBC3, 0xFAE1FBC3, 0xFAE2FBC3, 0xFAE3FBC3, 0xFAE4FBC3, + 0xFAE5FBC3, 0xFAE6FBC3, 0xFAE7FBC3, 0xFAE8FBC3, 0xFAE9FBC3, 0xFAEAFBC3, 0xFAEBFBC3, 0xFAECFBC3, 0xFAEDFBC3, 0xFAEEFBC3, 0xFAEFFBC3, 0xFAF0FBC3, 0xFAF1FBC3, 0xFAF2FBC3, 0xFAF3FBC3, + 0xFAF4FBC3, 0xFAF5FBC3, 0xFAF6FBC3, 0xFAF7FBC3, 0xFAF8FBC3, 0xFAF9FBC3, 0xFAFAFBC3, 0xFAFBFBC3, 0xFAFCFBC3, 0xFAFDFBC3, 0xFAFEFBC3, 0xFAFFFBC3, 0xFB00FBC3, 0xFB01FBC3, 0xFB02FBC3, + 0xFB03FBC3, 0xFB04FBC3, 0xFB05FBC3, 0xFB06FBC3, 0xFB07FBC3, 0xFB08FBC3, 0xFB09FBC3, 0xFB0AFBC3, 0xFB0BFBC3, 0xFB0CFBC3, 0xFB0DFBC3, 0xFB0EFBC3, 0xFB0FFBC3, 0xFB10FBC3, 0xFB11FBC3, + 0xFB12FBC3, 0xFB13FBC3, 0xFB14FBC3, 0xFB15FBC3, 0xFB16FBC3, 0xFB17FBC3, 0xFB18FBC3, 0xFB19FBC3, 0xFB1AFBC3, 0xFB1BFBC3, 0xFB1CFBC3, 0xFB1DFBC3, 0xFB1EFBC3, 0xFB1FFBC3, 0xFB20FBC3, + 0xFB21FBC3, 0xFB22FBC3, 0xFB23FBC3, 0xFB24FBC3, 0xFB25FBC3, 0xFB26FBC3, 0xFB27FBC3, 0xFB28FBC3, 0xFB29FBC3, 0xFB2AFBC3, 0xFB2BFBC3, 0xFB2CFBC3, 0xFB2DFBC3, 0xFB2EFBC3, 0xFB2FFBC3, + 0xFB30FBC3, 0xFB31FBC3, 0xFB32FBC3, 0xFB33FBC3, 0xFB34FBC3, 0xFB35FBC3, 0xFB36FBC3, 0xFB37FBC3, 0xFB38FBC3, 0xFB39FBC3, 0xFB3AFBC3, 0xFB3BFBC3, 0xFB3CFBC3, 0xFB3DFBC3, 0xFB3EFBC3, + 0xFB3FFBC3, 0xFB40FBC3, 0xFB41FBC3, 0xFB42FBC3, 0xFB43FBC3, 0xFB44FBC3, 0xFB45FBC3, 0xFB46FBC3, 0xFB47FBC3, 0xFB48FBC3, 0xFB49FBC3, 0xFB4AFBC3, 0xFB4BFBC3, 0xFB4CFBC3, 0xFB4DFBC3, + 0xFB4EFBC3, 0xFB4FFBC3, 0xFB50FBC3, 0xFB51FBC3, 0xFB52FBC3, 0xFB53FBC3, 0xFB54FBC3, 0xFB55FBC3, 0xFB56FBC3, 0xFB57FBC3, 0xFB58FBC3, 0xFB59FBC3, 0xFB5AFBC3, 0xFB5BFBC3, 0xFB5CFBC3, + 0xFB5DFBC3, 0xFB5EFBC3, 0xFB5FFBC3, 0xFB60FBC3, 0xFB61FBC3, 0xFB62FBC3, 0xFB63FBC3, 0xFB64FBC3, 0xFB65FBC3, 0xFB66FBC3, 0xFB67FBC3, 0xFB68FBC3, 0xFB69FBC3, 0xFB6AFBC3, 0xFB6BFBC3, + 0xFB6CFBC3, 0xFB6DFBC3, 0xFB6EFBC3, 0xFB6FFBC3, 0xFB70FBC3, 0xFB71FBC3, 0xFB72FBC3, 0xFB73FBC3, 0xFB74FBC3, 0xFB75FBC3, 0xFB76FBC3, 0xFB77FBC3, 0xFB78FBC3, 0xFB79FBC3, 0xFB7AFBC3, + 0xFB7BFBC3, 0xFB7CFBC3, 0xFB7DFBC3, 0xFB7EFBC3, 0xFB7FFBC3, 0xFB80FBC3, 0xFB81FBC3, 0xFB82FBC3, 0xFB83FBC3, 0xFB84FBC3, 0xFB85FBC3, 0xFB86FBC3, 0xFB87FBC3, 0xFB88FBC3, 0xFB89FBC3, + 0xFB8AFBC3, 0xFB8BFBC3, 0xFB8CFBC3, 0xFB8DFBC3, 0xFB8EFBC3, 0xFB8FFBC3, 0xFB90FBC3, 0xFB91FBC3, 0xFB92FBC3, 0xFB93FBC3, 0xFB94FBC3, 0xFB95FBC3, 0xFB96FBC3, 0xFB97FBC3, 0xFB98FBC3, + 0xFB99FBC3, 0xFB9AFBC3, 0xFB9BFBC3, 0xFB9CFBC3, 0xFB9DFBC3, 0xFB9EFBC3, 0xFB9FFBC3, 0xFBA0FBC3, 0xFBA1FBC3, 0xFBA2FBC3, 0xFBA3FBC3, 0xFBA4FBC3, 0xFBA5FBC3, 0xFBA6FBC3, 0xFBA7FBC3, + 0xFBA8FBC3, 0xFBA9FBC3, 0xFBAAFBC3, 0xFBABFBC3, 0xFBACFBC3, 0xFBADFBC3, 0xFBAEFBC3, 0xFBAFFBC3, 0xFBB0FBC3, 0xFBB1FBC3, 0xFBB2FBC3, 0xFBB3FBC3, 0xFBB4FBC3, 0xFBB5FBC3, 0xFBB6FBC3, + 0xFBB7FBC3, 0xFBB8FBC3, 0xFBB9FBC3, 0xFBBAFBC3, 0xFBBBFBC3, 0xFBBCFBC3, 0xFBBDFBC3, 0xFBBEFBC3, 0xFBBFFBC3, 0xFBC0FBC3, 0xFBC1FBC3, 0xFBC2FBC3, 0xFBC3FBC3, 0xFBC4FBC3, 0xFBC5FBC3, + 0xFBC6FBC3, 0xFBC7FBC3, 0xFBC8FBC3, 0xFBC9FBC3, 0xFBCAFBC3, 0xFBCBFBC3, 0xFBCCFBC3, 0xFBCDFBC3, 0xFBCEFBC3, 0xFBCFFBC3, 0xFBD0FBC3, 0xFBD1FBC3, 0xFBD2FBC3, 0xFBD3FBC3, 0xFBD4FBC3, + 0xFBD5FBC3, 0xFBD6FBC3, 0xFBD7FBC3, 0xFBD8FBC3, 0xFBD9FBC3, 0xFBDAFBC3, 0xFBDBFBC3, 0xFBDCFBC3, 0xFBDDFBC3, 0xFBDEFBC3, 0xFBDFFBC3, 0xFBE0FBC3, 0xFBE1FBC3, 0xFBE2FBC3, 0xFBE3FBC3, + 0xFBE4FBC3, 0xFBE5FBC3, 0xFBE6FBC3, 0xFBE7FBC3, 0xFBE8FBC3, 0xFBE9FBC3, 0xFBEAFBC3, 0xFBEBFBC3, 0xFBECFBC3, 0xFBEDFBC3, 0xFBEEFBC3, 0xFBEFFBC3, 0xFBF0FBC3, 0xFBF1FBC3, 0xFBF2FBC3, + 0xFBF3FBC3, 0xFBF4FBC3, 0xFBF5FBC3, 0xFBF6FBC3, 0xFBF7FBC3, 0xFBF8FBC3, 0xFBF9FBC3, 0xFBFAFBC3, 0xFBFBFBC3, 0xFBFCFBC3, 0xFBFDFBC3, 0xFBFEFBC3, 0xFBFFFBC3, 0xFC00FBC3, 0xFC01FBC3, + 0xFC02FBC3, 0xFC03FBC3, 0xFC04FBC3, 0xFC05FBC3, 0xFC06FBC3, 0xFC07FBC3, 0xFC08FBC3, 0xFC09FBC3, 0xFC0AFBC3, 0xFC0BFBC3, 0xFC0CFBC3, 0xFC0DFBC3, 0xFC0EFBC3, 0xFC0FFBC3, 0xFC10FBC3, + 0xFC11FBC3, 0xFC12FBC3, 0xFC13FBC3, 0xFC14FBC3, 0xFC15FBC3, 0xFC16FBC3, 0xFC17FBC3, 0xFC18FBC3, 0xFC19FBC3, 0xFC1AFBC3, 0xFC1BFBC3, 0xFC1CFBC3, 0xFC1DFBC3, 0xFC1EFBC3, 0xFC1FFBC3, + 0xFC20FBC3, 0xFC21FBC3, 0xFC22FBC3, 0xFC23FBC3, 0xFC24FBC3, 0xFC25FBC3, 0xFC26FBC3, 0xFC27FBC3, 0xFC28FBC3, 0xFC29FBC3, 0xFC2AFBC3, 0xFC2BFBC3, 0xFC2CFBC3, 0xFC2DFBC3, 0xFC2EFBC3, + 0xFC2FFBC3, 0xFC30FBC3, 0xFC31FBC3, 0xFC32FBC3, 0xFC33FBC3, 0xFC34FBC3, 0xFC35FBC3, 0xFC36FBC3, 0xFC37FBC3, 0xFC38FBC3, 0xFC39FBC3, 0xFC3AFBC3, 0xFC3BFBC3, 0xFC3CFBC3, 0xFC3DFBC3, + 0xFC3EFBC3, 0xFC3FFBC3, 0xFC40FBC3, 0xFC41FBC3, 0xFC42FBC3, 0xFC43FBC3, 0xFC44FBC3, 0xFC45FBC3, 0xFC46FBC3, 0xFC47FBC3, 0xFC48FBC3, 0xFC49FBC3, 0xFC4AFBC3, 0xFC4BFBC3, 0xFC4CFBC3, + 0xFC4DFBC3, 0xFC4EFBC3, 0xFC4FFBC3, 0xFC50FBC3, 0xFC51FBC3, 0xFC52FBC3, 0xFC53FBC3, 0xFC54FBC3, 0xFC55FBC3, 0xFC56FBC3, 0xFC57FBC3, 0xFC58FBC3, 0xFC59FBC3, 0xFC5AFBC3, 0xFC5BFBC3, + 0xFC5CFBC3, 0xFC5DFBC3, 0xFC5EFBC3, 0xFC5FFBC3, 0xFC60FBC3, 0xFC61FBC3, 0xFC62FBC3, 0xFC63FBC3, 0xFC64FBC3, 0xFC65FBC3, 0xFC66FBC3, 0xFC67FBC3, 0xFC68FBC3, 0xFC69FBC3, 0xFC6AFBC3, + 0xFC6BFBC3, 0xFC6CFBC3, 0xFC6DFBC3, 0xFC6EFBC3, 0xFC6FFBC3, 0xFC70FBC3, 0xFC71FBC3, 0xFC72FBC3, 0xFC73FBC3, 0xFC74FBC3, 0xFC75FBC3, 0xFC76FBC3, 0xFC77FBC3, 0xFC78FBC3, 0xFC79FBC3, + 0xFC7AFBC3, 0xFC7BFBC3, 0xFC7CFBC3, 0xFC7DFBC3, 0xFC7EFBC3, 0xFC7FFBC3, 0xFC80FBC3, 0xFC81FBC3, 0xFC82FBC3, 0xFC83FBC3, 0xFC84FBC3, 0xFC85FBC3, 0xFC86FBC3, 0xFC87FBC3, 0xFC88FBC3, + 0xFC89FBC3, 0xFC8AFBC3, 0xFC8BFBC3, 0xFC8CFBC3, 0xFC8DFBC3, 0xFC8EFBC3, 0xFC8FFBC3, 0xFC90FBC3, 0xFC91FBC3, 0xFC92FBC3, 0xFC93FBC3, 0xFC94FBC3, 0xFC95FBC3, 0xFC96FBC3, 0xFC97FBC3, + 0xFC98FBC3, 0xFC99FBC3, 0xFC9AFBC3, 0xFC9BFBC3, 0xFC9CFBC3, 0xFC9DFBC3, 0xFC9EFBC3, 0xFC9FFBC3, 0xFCA0FBC3, 0xFCA1FBC3, 0xFCA2FBC3, 0xFCA3FBC3, 0xFCA4FBC3, 0xFCA5FBC3, 0xFCA6FBC3, + 0xFCA7FBC3, 0xFCA8FBC3, 0xFCA9FBC3, 0xFCAAFBC3, 0xFCABFBC3, 0xFCACFBC3, 0xFCADFBC3, 0xFCAEFBC3, 0xFCAFFBC3, 0xFCB0FBC3, 0xFCB1FBC3, 0xFCB2FBC3, 0xFCB3FBC3, 0xFCB4FBC3, 0xFCB5FBC3, + 0xFCB6FBC3, 0xFCB7FBC3, 0xFCB8FBC3, 0xFCB9FBC3, 0xFCBAFBC3, 0xFCBBFBC3, 0xFCBCFBC3, 0xFCBDFBC3, 0xFCBEFBC3, 0xFCBFFBC3, 0xFCC0FBC3, 0xFCC1FBC3, 0xFCC2FBC3, 0xFCC3FBC3, 0xFCC4FBC3, + 0xFCC5FBC3, 0xFCC6FBC3, 0xFCC7FBC3, 0xFCC8FBC3, 0xFCC9FBC3, 0xFCCAFBC3, 0xFCCBFBC3, 0xFCCCFBC3, 0xFCCDFBC3, 0xFCCEFBC3, 0xFCCFFBC3, 0xFCD0FBC3, 0xFCD1FBC3, 0xFCD2FBC3, 0xFCD3FBC3, + 0xFCD4FBC3, 0xFCD5FBC3, 0xFCD6FBC3, 0xFCD7FBC3, 0xFCD8FBC3, 0xFCD9FBC3, 0xFCDAFBC3, 0xFCDBFBC3, 0xFCDCFBC3, 0xFCDDFBC3, 0xFCDEFBC3, 0xFCDFFBC3, 0xFCE0FBC3, 0xFCE1FBC3, 0xFCE2FBC3, + 0xFCE3FBC3, 0xFCE4FBC3, 0xFCE5FBC3, 0xFCE6FBC3, 0xFCE7FBC3, 0xFCE8FBC3, 0xFCE9FBC3, 0xFCEAFBC3, 0xFCEBFBC3, 0xFCECFBC3, 0xFCEDFBC3, 0xFCEEFBC3, 0xFCEFFBC3, 0xFCF0FBC3, 0xFCF1FBC3, + 0xFCF2FBC3, 0xFCF3FBC3, 0xFCF4FBC3, 0xFCF5FBC3, 0xFCF6FBC3, 0xFCF7FBC3, 0xFCF8FBC3, 0xFCF9FBC3, 0xFCFAFBC3, 0xFCFBFBC3, 0xFCFCFBC3, 0xFCFDFBC3, 0xFCFEFBC3, 0xFCFFFBC3, 0xFD00FBC3, + 0xFD01FBC3, 0xFD02FBC3, 0xFD03FBC3, 0xFD04FBC3, 0xFD05FBC3, 0xFD06FBC3, 0xFD07FBC3, 0xFD08FBC3, 0xFD09FBC3, 0xFD0AFBC3, 0xFD0BFBC3, 0xFD0CFBC3, 0xFD0DFBC3, 0xFD0EFBC3, 0xFD0FFBC3, + 0xFD10FBC3, 0xFD11FBC3, 0xFD12FBC3, 0xFD13FBC3, 0xFD14FBC3, 0xFD15FBC3, 0xFD16FBC3, 0xFD17FBC3, 0xFD18FBC3, 0xFD19FBC3, 0xFD1AFBC3, 0xFD1BFBC3, 0xFD1CFBC3, 0xFD1DFBC3, 0xFD1EFBC3, + 0xFD1FFBC3, 0xFD20FBC3, 0xFD21FBC3, 0xFD22FBC3, 0xFD23FBC3, 0xFD24FBC3, 0xFD25FBC3, 0xFD26FBC3, 0xFD27FBC3, 0xFD28FBC3, 0xFD29FBC3, 0xFD2AFBC3, 0xFD2BFBC3, 0xFD2CFBC3, 0xFD2DFBC3, + 0xFD2EFBC3, 0xFD2FFBC3, 0xFD30FBC3, 0xFD31FBC3, 0xFD32FBC3, 0xFD33FBC3, 0xFD34FBC3, 0xFD35FBC3, 0xFD36FBC3, 0xFD37FBC3, 0xFD38FBC3, 0xFD39FBC3, 0xFD3AFBC3, 0xFD3BFBC3, 0xFD3CFBC3, + 0xFD3DFBC3, 0xFD3EFBC3, 0xFD3FFBC3, 0xFD40FBC3, 0xFD41FBC3, 0xFD42FBC3, 0xFD43FBC3, 0xFD44FBC3, 0xFD45FBC3, 0xFD46FBC3, 0xFD47FBC3, 0xFD48FBC3, 0xFD49FBC3, 0xFD4AFBC3, 0xFD4BFBC3, + 0xFD4CFBC3, 0xFD4DFBC3, 0xFD4EFBC3, 0xFD4FFBC3, 0xFD50FBC3, 0xFD51FBC3, 0xFD52FBC3, 0xFD53FBC3, 0xFD54FBC3, 0xFD55FBC3, 0xFD56FBC3, 0xFD57FBC3, 0xFD58FBC3, 0xFD59FBC3, 0xFD5AFBC3, + 0xFD5BFBC3, 0xFD5CFBC3, 0xFD5DFBC3, 0xFD5EFBC3, 0xFD5FFBC3, 0xFD60FBC3, 0xFD61FBC3, 0xFD62FBC3, 0xFD63FBC3, 0xFD64FBC3, 0xFD65FBC3, 0xFD66FBC3, 0xFD67FBC3, 0xFD68FBC3, 0xFD69FBC3, + 0xFD6AFBC3, 0xFD6BFBC3, 0xFD6CFBC3, 0xFD6DFBC3, 0xFD6EFBC3, 0xFD6FFBC3, 0xFD70FBC3, 0xFD71FBC3, 0xFD72FBC3, 0xFD73FBC3, 0xFD74FBC3, 0xFD75FBC3, 0xFD76FBC3, 0xFD77FBC3, 0xFD78FBC3, + 0xFD79FBC3, 0xFD7AFBC3, 0xFD7BFBC3, 0xFD7CFBC3, 0xFD7DFBC3, 0xFD7EFBC3, 0xFD7FFBC3, 0xFD80FBC3, 0xFD81FBC3, 0xFD82FBC3, 0xFD83FBC3, 0xFD84FBC3, 0xFD85FBC3, 0xFD86FBC3, 0xFD87FBC3, + 0xFD88FBC3, 0xFD89FBC3, 0xFD8AFBC3, 0xFD8BFBC3, 0xFD8CFBC3, 0xFD8DFBC3, 0xFD8EFBC3, 0xFD8FFBC3, 0xFD90FBC3, 0xFD91FBC3, 0xFD92FBC3, 0xFD93FBC3, 0xFD94FBC3, 0xFD95FBC3, 0xFD96FBC3, + 0xFD97FBC3, 0xFD98FBC3, 0xFD99FBC3, 0xFD9AFBC3, 0xFD9BFBC3, 0xFD9CFBC3, 0xFD9DFBC3, 0xFD9EFBC3, 0xFD9FFBC3, 0xFDA0FBC3, 0xFDA1FBC3, 0xFDA2FBC3, 0xFDA3FBC3, 0xFDA4FBC3, 0xFDA5FBC3, + 0xFDA6FBC3, 0xFDA7FBC3, 0xFDA8FBC3, 0xFDA9FBC3, 0xFDAAFBC3, 0xFDABFBC3, 0xFDACFBC3, 0xFDADFBC3, 0xFDAEFBC3, 0xFDAFFBC3, 0xFDB0FBC3, 0xFDB1FBC3, 0xFDB2FBC3, 0xFDB3FBC3, 0xFDB4FBC3, + 0xFDB5FBC3, 0xFDB6FBC3, 0xFDB7FBC3, 0xFDB8FBC3, 0xFDB9FBC3, 0xFDBAFBC3, 0xFDBBFBC3, 0xFDBCFBC3, 0xFDBDFBC3, 0xFDBEFBC3, 0xFDBFFBC3, 0xFDC0FBC3, 0xFDC1FBC3, 0xFDC2FBC3, 0xFDC3FBC3, + 0xFDC4FBC3, 0xFDC5FBC3, 0xFDC6FBC3, 0xFDC7FBC3, 0xFDC8FBC3, 0xFDC9FBC3, 0xFDCAFBC3, 0xFDCBFBC3, 0xFDCCFBC3, 0xFDCDFBC3, 0xFDCEFBC3, 0xFDCFFBC3, 0xFDD0FBC3, 0xFDD1FBC3, 0xFDD2FBC3, + 0xFDD3FBC3, 0xFDD4FBC3, 0xFDD5FBC3, 0xFDD6FBC3, 0xFDD7FBC3, 0xFDD8FBC3, 0xFDD9FBC3, 0xFDDAFBC3, 0xFDDBFBC3, 0xFDDCFBC3, 0xFDDDFBC3, 0xFDDEFBC3, 0xFDDFFBC3, 0xFDE0FBC3, 0xFDE1FBC3, + 0xFDE2FBC3, 0xFDE3FBC3, 0xFDE4FBC3, 0xFDE5FBC3, 0xFDE6FBC3, 0xFDE7FBC3, 0xFDE8FBC3, 0xFDE9FBC3, 0xFDEAFBC3, 0xFDEBFBC3, 0xFDECFBC3, 0xFDEDFBC3, 0xFDEEFBC3, 0xFDEFFBC3, 0xFDF0FBC3, + 0xFDF1FBC3, 0xFDF2FBC3, 0xFDF3FBC3, 0xFDF4FBC3, 0xFDF5FBC3, 0xFDF6FBC3, 0xFDF7FBC3, 0xFDF8FBC3, 0xFDF9FBC3, 0xFDFAFBC3, 0xFDFBFBC3, 0xFDFCFBC3, 0xFDFDFBC3, 0xFDFEFBC3, 0xFDFFFBC3, + 0xFE00FBC3, 0xFE01FBC3, 0xFE02FBC3, 0xFE03FBC3, 0xFE04FBC3, 0xFE05FBC3, 0xFE06FBC3, 0xFE07FBC3, 0xFE08FBC3, 0xFE09FBC3, 0xFE0AFBC3, 0xFE0BFBC3, 0xFE0CFBC3, 0xFE0DFBC3, 0xFE0EFBC3, + 0xFE0FFBC3, 0xFE10FBC3, 0xFE11FBC3, 0xFE12FBC3, 0xFE13FBC3, 0xFE14FBC3, 0xFE15FBC3, 0xFE16FBC3, 0xFE17FBC3, 0xFE18FBC3, 0xFE19FBC3, 0xFE1AFBC3, 0xFE1BFBC3, 0xFE1CFBC3, 0xFE1DFBC3, + 0xFE1EFBC3, 0xFE1FFBC3, 0xFE20FBC3, 0xFE21FBC3, 0xFE22FBC3, 0xFE23FBC3, 0xFE24FBC3, 0xFE25FBC3, 0xFE26FBC3, 0xFE27FBC3, 0xFE28FBC3, 0xFE29FBC3, 0xFE2AFBC3, 0xFE2BFBC3, 0xFE2CFBC3, + 0xFE2DFBC3, 0xFE2EFBC3, 0xFE2FFBC3, 0xFE30FBC3, 0xFE31FBC3, 0xFE32FBC3, 0xFE33FBC3, 0xFE34FBC3, 0xFE35FBC3, 0xFE36FBC3, 0xFE37FBC3, 0xFE38FBC3, 0xFE39FBC3, 0xFE3AFBC3, 0xFE3BFBC3, + 0xFE3CFBC3, 0xFE3DFBC3, 0xFE3EFBC3, 0xFE3FFBC3, 0xFE40FBC3, 0xFE41FBC3, 0xFE42FBC3, 0xFE43FBC3, 0xFE44FBC3, 0xFE45FBC3, 0xFE46FBC3, 0xFE47FBC3, 0xFE48FBC3, 0xFE49FBC3, 0xFE4AFBC3, + 0xFE4BFBC3, 0xFE4CFBC3, 0xFE4DFBC3, 0xFE4EFBC3, 0xFE4FFBC3, 0xFE50FBC3, 0xFE51FBC3, 0xFE52FBC3, 0xFE53FBC3, 0xFE54FBC3, 0xFE55FBC3, 0xFE56FBC3, 0xFE57FBC3, 0xFE58FBC3, 0xFE59FBC3, + 0xFE5AFBC3, 0xFE5BFBC3, 0xFE5CFBC3, 0xFE5DFBC3, 0xFE5EFBC3, 0xFE5FFBC3, 0xFE60FBC3, 0xFE61FBC3, 0xFE62FBC3, 0xFE63FBC3, 0xFE64FBC3, 0xFE65FBC3, 0xFE66FBC3, 0xFE67FBC3, 0xFE68FBC3, + 0xFE69FBC3, 0xFE6AFBC3, 0xFE6BFBC3, 0xFE6CFBC3, 0xFE6DFBC3, 0xFE6EFBC3, 0xFE6FFBC3, 0xFE70FBC3, 0xFE71FBC3, 0xFE72FBC3, 0xFE73FBC3, 0xFE74FBC3, 0xFE75FBC3, 0xFE76FBC3, 0xFE77FBC3, + 0xFE78FBC3, 0xFE79FBC3, 0xFE7AFBC3, 0xFE7BFBC3, 0xFE7CFBC3, 0xFE7DFBC3, 0xFE7EFBC3, 0xFE7FFBC3, 0xFE80FBC3, 0xFE81FBC3, 0xFE82FBC3, 0xFE83FBC3, 0xFE84FBC3, 0xFE85FBC3, 0xFE86FBC3, + 0xFE87FBC3, 0xFE88FBC3, 0xFE89FBC3, 0xFE8AFBC3, 0xFE8BFBC3, 0xFE8CFBC3, 0xFE8DFBC3, 0xFE8EFBC3, 0xFE8FFBC3, 0xFE90FBC3, 0xFE91FBC3, 0xFE92FBC3, 0xFE93FBC3, 0xFE94FBC3, 0xFE95FBC3, + 0xFE96FBC3, 0xFE97FBC3, 0xFE98FBC3, 0xFE99FBC3, 0xFE9AFBC3, 0xFE9BFBC3, 0xFE9CFBC3, 0xFE9DFBC3, 0xFE9EFBC3, 0xFE9FFBC3, 0xFEA0FBC3, 0xFEA1FBC3, 0xFEA2FBC3, 0xFEA3FBC3, 0xFEA4FBC3, + 0xFEA5FBC3, 0xFEA6FBC3, 0xFEA7FBC3, 0xFEA8FBC3, 0xFEA9FBC3, 0xFEAAFBC3, 0xFEABFBC3, 0xFEACFBC3, 0xFEADFBC3, 0xFEAEFBC3, 0xFEAFFBC3, 0xFEB0FBC3, 0xFEB1FBC3, 0xFEB2FBC3, 0xFEB3FBC3, + 0xFEB4FBC3, 0xFEB5FBC3, 0xFEB6FBC3, 0xFEB7FBC3, 0xFEB8FBC3, 0xFEB9FBC3, 0xFEBAFBC3, 0xFEBBFBC3, 0xFEBCFBC3, 0xFEBDFBC3, 0xFEBEFBC3, 0xFEBFFBC3, 0xFEC0FBC3, 0xFEC1FBC3, 0xFEC2FBC3, + 0xFEC3FBC3, 0xFEC4FBC3, 0xFEC5FBC3, 0xFEC6FBC3, 0xFEC7FBC3, 0xFEC8FBC3, 0xFEC9FBC3, 0xFECAFBC3, 0xFECBFBC3, 0xFECCFBC3, 0xFECDFBC3, 0xFECEFBC3, 0xFECFFBC3, 0xFED0FBC3, 0xFED1FBC3, + 0xFED2FBC3, 0xFED3FBC3, 0xFED4FBC3, 0xFED5FBC3, 0xFED6FBC3, 0xFED7FBC3, 0xFED8FBC3, 0xFED9FBC3, 0xFEDAFBC3, 0xFEDBFBC3, 0xFEDCFBC3, 0xFEDDFBC3, 0xFEDEFBC3, 0xFEDFFBC3, 0xFEE0FBC3, + 0xFEE1FBC3, 0xFEE2FBC3, 0xFEE3FBC3, 0xFEE4FBC3, 0xFEE5FBC3, 0xFEE6FBC3, 0xFEE7FBC3, 0xFEE8FBC3, 0xFEE9FBC3, 0xFEEAFBC3, 0xFEEBFBC3, 0xFEECFBC3, 0xFEEDFBC3, 0xFEEEFBC3, 0xFEEFFBC3, + 0xFEF0FBC3, 0xFEF1FBC3, 0xFEF2FBC3, 0xFEF3FBC3, 0xFEF4FBC3, 0xFEF5FBC3, 0xFEF6FBC3, 0xFEF7FBC3, 0xFEF8FBC3, 0xFEF9FBC3, 0xFEFAFBC3, 0xFEFBFBC3, 0xFEFCFBC3, 0xFEFDFBC3, 0xFEFEFBC3, + 0xFEFFFBC3, 0xFF00FBC3, 0xFF01FBC3, 0xFF02FBC3, 0xFF03FBC3, 0xFF04FBC3, 0xFF05FBC3, 0xFF06FBC3, 0xFF07FBC3, 0xFF08FBC3, 0xFF09FBC3, 0xFF0AFBC3, 0xFF0BFBC3, 0xFF0CFBC3, 0xFF0DFBC3, + 0xFF0EFBC3, 0xFF0FFBC3, 0xFF10FBC3, 0xFF11FBC3, 0xFF12FBC3, 0xFF13FBC3, 0xFF14FBC3, 0xFF15FBC3, 0xFF16FBC3, 0xFF17FBC3, 0xFF18FBC3, 0xFF19FBC3, 0xFF1AFBC3, 0xFF1BFBC3, 0xFF1CFBC3, + 0xFF1DFBC3, 0xFF1EFBC3, 0xFF1FFBC3, 0xFF20FBC3, 0xFF21FBC3, 0xFF22FBC3, 0xFF23FBC3, 0xFF24FBC3, 0xFF25FBC3, 0xFF26FBC3, 0xFF27FBC3, 0xFF28FBC3, 0xFF29FBC3, 0xFF2AFBC3, 0xFF2BFBC3, + 0xFF2CFBC3, 0xFF2DFBC3, 0xFF2EFBC3, 0xFF2FFBC3, 0xFF30FBC3, 0xFF31FBC3, 0xFF32FBC3, 0xFF33FBC3, 0xFF34FBC3, 0xFF35FBC3, 0xFF36FBC3, 0xFF37FBC3, 0xFF38FBC3, 0xFF39FBC3, 0xFF3AFBC3, + 0xFF3BFBC3, 0xFF3CFBC3, 0xFF3DFBC3, 0xFF3EFBC3, 0xFF3FFBC3, 0xFF40FBC3, 0xFF41FBC3, 0xFF42FBC3, 0xFF43FBC3, 0xFF44FBC3, 0xFF45FBC3, 0xFF46FBC3, 0xFF47FBC3, 0xFF48FBC3, 0xFF49FBC3, + 0xFF4AFBC3, 0xFF4BFBC3, 0xFF4CFBC3, 0xFF4DFBC3, 0xFF4EFBC3, 0xFF4FFBC3, 0xFF50FBC3, 0xFF51FBC3, 0xFF52FBC3, 0xFF53FBC3, 0xFF54FBC3, 0xFF55FBC3, 0xFF56FBC3, 0xFF57FBC3, 0xFF58FBC3, + 0xFF59FBC3, 0xFF5AFBC3, 0xFF5BFBC3, 0xFF5CFBC3, 0xFF5DFBC3, 0xFF5EFBC3, 0xFF5FFBC3, 0xFF60FBC3, 0xFF61FBC3, 0xFF62FBC3, 0xFF63FBC3, 0xFF64FBC3, 0xFF65FBC3, 0xFF66FBC3, 0xFF67FBC3, + 0xFF68FBC3, 0xFF69FBC3, 0xFF6AFBC3, 0xFF6BFBC3, 0xFF6CFBC3, 0xFF6DFBC3, 0xFF6EFBC3, 0xFF6FFBC3, 0xFF70FBC3, 0xFF71FBC3, 0xFF72FBC3, 0xFF73FBC3, 0xFF74FBC3, 0xFF75FBC3, 0xFF76FBC3, + 0xFF77FBC3, 0xFF78FBC3, 0xFF79FBC3, 0xFF7AFBC3, 0xFF7BFBC3, 0xFF7CFBC3, 0xFF7DFBC3, 0xFF7EFBC3, 0xFF7FFBC3, 0xFF80FBC3, 0xFF81FBC3, 0xFF82FBC3, 0xFF83FBC3, 0xFF84FBC3, 0xFF85FBC3, + 0xFF86FBC3, 0xFF87FBC3, 0xFF88FBC3, 0xFF89FBC3, 0xFF8AFBC3, 0xFF8BFBC3, 0xFF8CFBC3, 0xFF8DFBC3, 0xFF8EFBC3, 0xFF8FFBC3, 0xFF90FBC3, 0xFF91FBC3, 0xFF92FBC3, 0xFF93FBC3, 0xFF94FBC3, + 0xFF95FBC3, 0xFF96FBC3, 0xFF97FBC3, 0xFF98FBC3, 0xFF99FBC3, 0xFF9AFBC3, 0xFF9BFBC3, 0xFF9CFBC3, 0xFF9DFBC3, 0xFF9EFBC3, 0xFF9FFBC3, 0xFFA0FBC3, 0xFFA1FBC3, 0xFFA2FBC3, 0xFFA3FBC3, + 0xFFA4FBC3, 0xFFA5FBC3, 0xFFA6FBC3, 0xFFA7FBC3, 0xFFA8FBC3, 0xFFA9FBC3, 0xFFAAFBC3, 0xFFABFBC3, 0xFFACFBC3, 0xFFADFBC3, 0xFFAEFBC3, 0xFFAFFBC3, 0xFFB0FBC3, 0xFFB1FBC3, 0xFFB2FBC3, + 0xFFB3FBC3, 0xFFB4FBC3, 0xFFB5FBC3, 0xFFB6FBC3, 0xFFB7FBC3, 0xFFB8FBC3, 0xFFB9FBC3, 0xFFBAFBC3, 0xFFBBFBC3, 0xFFBCFBC3, 0xFFBDFBC3, 0xFFBEFBC3, 0xFFBFFBC3, 0xFFC0FBC3, 0xFFC1FBC3, + 0xFFC2FBC3, 0xFFC3FBC3, 0xFFC4FBC3, 0xFFC5FBC3, 0xFFC6FBC3, 0xFFC7FBC3, 0xFFC8FBC3, 0xFFC9FBC3, 0xFFCAFBC3, 0xFFCBFBC3, 0xFFCCFBC3, 0xFFCDFBC3, 0xFFCEFBC3, 0xFFCFFBC3, 0xFFD0FBC3, + 0xFFD1FBC3, 0xFFD2FBC3, 0xFFD3FBC3, 0xFFD4FBC3, 0xFFD5FBC3, 0xFFD6FBC3, 0xFFD7FBC3, 0xFFD8FBC3, 0xFFD9FBC3, 0xFFDAFBC3, 0xFFDBFBC3, 0xFFDCFBC3, 0xFFDDFBC3, 0xFFDEFBC3, 0xFFDFFBC3, + 0xFFE0FBC3, 0xFFE1FBC3, 0xFFE2FBC3, 0xFFE3FBC3, 0xFFE4FBC3, 0xFFE5FBC3, 0xFFE6FBC3, 0xFFE7FBC3, 0xFFE8FBC3, 0xFFE9FBC3, 0xFFEAFBC3, 0xFFEBFBC3, 0xFFECFBC3, 0xFFEDFBC3, 0xFFEEFBC3, + 0xFFEFFBC3, 0xFFF0FBC3, 0xFFF1FBC3, 0xFFF2FBC3, 0xFFF3FBC3, 0xFFF4FBC3, 0xFFF5FBC3, 0xFFF6FBC3, 0xFFF7FBC3, 0xFFF8FBC3, 0xFFF9FBC3, 0xFFFAFBC3, 0xFFFBFBC3, 0xFFFCFBC3, 0xFFFDFBC3, + 0xFFFEFBC3, 0xFFFFFBC3, 0x8000FB84, 0x8001FB84, 0x8002FB84, 0x8003FB84, 0x8004FB84, 0x8005FB84, 0x8006FB84, 0x8007FB84, 0x8008FB84, 0x8009FB84, 0x800AFB84, 0x800BFB84, 0x800CFB84, + 0x800DFB84, 0x800EFB84, 0x800FFB84, 0x8010FB84, 0x8011FB84, 0x8012FB84, 0x8013FB84, 0x8014FB84, 0x8015FB84, 0x8016FB84, 0x8017FB84, 0x8018FB84, 0x8019FB84, 0x801AFB84, 0x801BFB84, + 0x801CFB84, 0x801DFB84, 0x801EFB84, 0x801FFB84, 0x8020FB84, 0x8021FB84, 0x8022FB84, 0x8023FB84, 0x8024FB84, 0x8025FB84, 0x8026FB84, 0x8027FB84, 0x8028FB84, 0x8029FB84, 0x802AFB84, + 0x802BFB84, 0x802CFB84, 0x802DFB84, 0x802EFB84, 0x802FFB84, 0x8030FB84, 0x8031FB84, 0x8032FB84, 0x8033FB84, 0x8034FB84, 0x8035FB84, 0x8036FB84, 0x8037FB84, 0x8038FB84, 0x8039FB84, + 0x803AFB84, 0x803BFB84, 0x803CFB84, 0x803DFB84, 0x803EFB84, 0x803FFB84, 0x8040FB84, 0x8041FB84, 0x8042FB84, 0x8043FB84, 0x8044FB84, 0x8045FB84, 0x8046FB84, 0x8047FB84, 0x8048FB84, + 0x8049FB84, 0x804AFB84, 0x804BFB84, 0x804CFB84, 0x804DFB84, 0x804EFB84, 0x804FFB84, 0x8050FB84, 0x8051FB84, 0x8052FB84, 0x8053FB84, 0x8054FB84, 0x8055FB84, 0x8056FB84, 0x8057FB84, + 0x8058FB84, 0x8059FB84, 0x805AFB84, 0x805BFB84, 0x805CFB84, 0x805DFB84, 0x805EFB84, 0x805FFB84, 0x8060FB84, 0x8061FB84, 0x8062FB84, 0x8063FB84, 0x8064FB84, 0x8065FB84, 0x8066FB84, + 0x8067FB84, 0x8068FB84, 0x8069FB84, 0x806AFB84, 0x806BFB84, 0x806CFB84, 0x806DFB84, 0x806EFB84, 0x806FFB84, 0x8070FB84, 0x8071FB84, 0x8072FB84, 0x8073FB84, 0x8074FB84, 0x8075FB84, + 0x8076FB84, 0x8077FB84, 0x8078FB84, 0x8079FB84, 0x807AFB84, 0x807BFB84, 0x807CFB84, 0x807DFB84, 0x807EFB84, 0x807FFB84, 0x8080FB84, 0x8081FB84, 0x8082FB84, 0x8083FB84, 0x8084FB84, + 0x8085FB84, 0x8086FB84, 0x8087FB84, 0x8088FB84, 0x8089FB84, 0x808AFB84, 0x808BFB84, 0x808CFB84, 0x808DFB84, 0x808EFB84, 0x808FFB84, 0x8090FB84, 0x8091FB84, 0x8092FB84, 0x8093FB84, + 0x8094FB84, 0x8095FB84, 0x8096FB84, 0x8097FB84, 0x8098FB84, 0x8099FB84, 0x809AFB84, 0x809BFB84, 0x809CFB84, 0x809DFB84, 0x809EFB84, 0x809FFB84, 0x80A0FB84, 0x80A1FB84, 0x80A2FB84, + 0x80A3FB84, 0x80A4FB84, 0x80A5FB84, 0x80A6FB84, 0x80A7FB84, 0x80A8FB84, 0x80A9FB84, 0x80AAFB84, 0x80ABFB84, 0x80ACFB84, 0x80ADFB84, 0x80AEFB84, 0x80AFFB84, 0x80B0FB84, 0x80B1FB84, + 0x80B2FB84, 0x80B3FB84, 0x80B4FB84, 0x80B5FB84, 0x80B6FB84, 0x80B7FB84, 0x80B8FB84, 0x80B9FB84, 0x80BAFB84, 0x80BBFB84, 0x80BCFB84, 0x80BDFB84, 0x80BEFB84, 0x80BFFB84, 0x80C0FB84, + 0x80C1FB84, 0x80C2FB84, 0x80C3FB84, 0x80C4FB84, 0x80C5FB84, 0x80C6FB84, 0x80C7FB84, 0x80C8FB84, 0x80C9FB84, 0x80CAFB84, 0x80CBFB84, 0x80CCFB84, 0x80CDFB84, 0x80CEFB84, 0x80CFFB84, + 0x80D0FB84, 0x80D1FB84, 0x80D2FB84, 0x80D3FB84, 0x80D4FB84, 0x80D5FB84, 0x80D6FB84, 0x80D7FB84, 0x80D8FB84, 0x80D9FB84, 0x80DAFB84, 0x80DBFB84, 0x80DCFB84, 0x80DDFB84, 0x80DEFB84, + 0x80DFFB84, 0x80E0FB84, 0x80E1FB84, 0x80E2FB84, 0x80E3FB84, 0x80E4FB84, 0x80E5FB84, 0x80E6FB84, 0x80E7FB84, 0x80E8FB84, 0x80E9FB84, 0x80EAFB84, 0x80EBFB84, 0x80ECFB84, 0x80EDFB84, + 0x80EEFB84, 0x80EFFB84, 0x80F0FB84, 0x80F1FB84, 0x80F2FB84, 0x80F3FB84, 0x80F4FB84, 0x80F5FB84, 0x80F6FB84, 0x80F7FB84, 0x80F8FB84, 0x80F9FB84, 0x80FAFB84, 0x80FBFB84, 0x80FCFB84, + 0x80FDFB84, 0x80FEFB84, 0x80FFFB84, 0x8100FB84, 0x8101FB84, 0x8102FB84, 0x8103FB84, 0x8104FB84, 0x8105FB84, 0x8106FB84, 0x8107FB84, 0x8108FB84, 0x8109FB84, 0x810AFB84, 0x810BFB84, + 0x810CFB84, 0x810DFB84, 0x810EFB84, 0x810FFB84, 0x8110FB84, 0x8111FB84, 0x8112FB84, 0x8113FB84, 0x8114FB84, 0x8115FB84, 0x8116FB84, 0x8117FB84, 0x8118FB84, 0x8119FB84, 0x811AFB84, + 0x811BFB84, 0x811CFB84, 0x811DFB84, 0x811EFB84, 0x811FFB84, 0x8120FB84, 0x8121FB84, 0x8122FB84, 0x8123FB84, 0x8124FB84, 0x8125FB84, 0x8126FB84, 0x8127FB84, 0x8128FB84, 0x8129FB84, + 0x812AFB84, 0x812BFB84, 0x812CFB84, 0x812DFB84, 0x812EFB84, 0x812FFB84, 0x8130FB84, 0x8131FB84, 0x8132FB84, 0x8133FB84, 0x8134FB84, 0x8135FB84, 0x8136FB84, 0x8137FB84, 0x8138FB84, + 0x8139FB84, 0x813AFB84, 0x813BFB84, 0x813CFB84, 0x813DFB84, 0x813EFB84, 0x813FFB84, 0x8140FB84, 0x8141FB84, 0x8142FB84, 0x8143FB84, 0x8144FB84, 0x8145FB84, 0x8146FB84, 0x8147FB84, + 0x8148FB84, 0x8149FB84, 0x814AFB84, 0x814BFB84, 0x814CFB84, 0x814DFB84, 0x814EFB84, 0x814FFB84, 0x8150FB84, 0x8151FB84, 0x8152FB84, 0x8153FB84, 0x8154FB84, 0x8155FB84, 0x8156FB84, + 0x8157FB84, 0x8158FB84, 0x8159FB84, 0x815AFB84, 0x815BFB84, 0x815CFB84, 0x815DFB84, 0x815EFB84, 0x815FFB84, 0x8160FB84, 0x8161FB84, 0x8162FB84, 0x8163FB84, 0x8164FB84, 0x8165FB84, + 0x8166FB84, 0x8167FB84, 0x8168FB84, 0x8169FB84, 0x816AFB84, 0x816BFB84, 0x816CFB84, 0x816DFB84, 0x816EFB84, 0x816FFB84, 0x8170FB84, 0x8171FB84, 0x8172FB84, 0x8173FB84, 0x8174FB84, + 0x8175FB84, 0x8176FB84, 0x8177FB84, 0x8178FB84, 0x8179FB84, 0x817AFB84, 0x817BFB84, 0x817CFB84, 0x817DFB84, 0x817EFB84, 0x817FFB84, 0x8180FB84, 0x8181FB84, 0x8182FB84, 0x8183FB84, + 0x8184FB84, 0x8185FB84, 0x8186FB84, 0x8187FB84, 0x8188FB84, 0x8189FB84, 0x818AFB84, 0x818BFB84, 0x818CFB84, 0x818DFB84, 0x818EFB84, 0x818FFB84, 0x8190FB84, 0x8191FB84, 0x8192FB84, + 0x8193FB84, 0x8194FB84, 0x8195FB84, 0x8196FB84, 0x8197FB84, 0x8198FB84, 0x8199FB84, 0x819AFB84, 0x819BFB84, 0x819CFB84, 0x819DFB84, 0x819EFB84, 0x819FFB84, 0x81A0FB84, 0x81A1FB84, + 0x81A2FB84, 0x81A3FB84, 0x81A4FB84, 0x81A5FB84, 0x81A6FB84, 0x81A7FB84, 0x81A8FB84, 0x81A9FB84, 0x81AAFB84, 0x81ABFB84, 0x81ACFB84, 0x81ADFB84, 0x81AEFB84, 0x81AFFB84, 0x81B0FB84, + 0x81B1FB84, 0x81B2FB84, 0x81B3FB84, 0x81B4FB84, 0x81B5FB84, 0x81B6FB84, 0x81B7FB84, 0x81B8FB84, 0x81B9FB84, 0x81BAFB84, 0x81BBFB84, 0x81BCFB84, 0x81BDFB84, 0x81BEFB84, 0x81BFFB84, + 0x81C0FB84, 0x81C1FB84, 0x81C2FB84, 0x81C3FB84, 0x81C4FB84, 0x81C5FB84, 0x81C6FB84, 0x81C7FB84, 0x81C8FB84, 0x81C9FB84, 0x81CAFB84, 0x81CBFB84, 0x81CCFB84, 0x81CDFB84, 0x81CEFB84, + 0x81CFFB84, 0x81D0FB84, 0x81D1FB84, 0x81D2FB84, 0x81D3FB84, 0x81D4FB84, 0x81D5FB84, 0x81D6FB84, 0x81D7FB84, 0x81D8FB84, 0x81D9FB84, 0x81DAFB84, 0x81DBFB84, 0x81DCFB84, 0x81DDFB84, + 0x81DEFB84, 0x81DFFB84, 0x81E0FB84, 0x81E1FB84, 0x81E2FB84, 0x81E3FB84, 0x81E4FB84, 0x81E5FB84, 0x81E6FB84, 0x81E7FB84, 0x81E8FB84, 0x81E9FB84, 0x81EAFB84, 0x81EBFB84, 0x81ECFB84, + 0x81EDFB84, 0x81EEFB84, 0x81EFFB84, 0x81F0FB84, 0x81F1FB84, 0x81F2FB84, 0x81F3FB84, 0x81F4FB84, 0x81F5FB84, 0x81F6FB84, 0x81F7FB84, 0x81F8FB84, 0x81F9FB84, 0x81FAFB84, 0x81FBFB84, + 0x81FCFB84, 0x81FDFB84, 0x81FEFB84, 0x81FFFB84, 0x8200FB84, 0x8201FB84, 0x8202FB84, 0x8203FB84, 0x8204FB84, 0x8205FB84, 0x8206FB84, 0x8207FB84, 0x8208FB84, 0x8209FB84, 0x820AFB84, + 0x820BFB84, 0x820CFB84, 0x820DFB84, 0x820EFB84, 0x820FFB84, 0x8210FB84, 0x8211FB84, 0x8212FB84, 0x8213FB84, 0x8214FB84, 0x8215FB84, 0x8216FB84, 0x8217FB84, 0x8218FB84, 0x8219FB84, + 0x821AFB84, 0x821BFB84, 0x821CFB84, 0x821DFB84, 0x821EFB84, 0x821FFB84, 0x8220FB84, 0x8221FB84, 0x8222FB84, 0x8223FB84, 0x8224FB84, 0x8225FB84, 0x8226FB84, 0x8227FB84, 0x8228FB84, + 0x8229FB84, 0x822AFB84, 0x822BFB84, 0x822CFB84, 0x822DFB84, 0x822EFB84, 0x822FFB84, 0x8230FB84, 0x8231FB84, 0x8232FB84, 0x8233FB84, 0x8234FB84, 0x8235FB84, 0x8236FB84, 0x8237FB84, + 0x8238FB84, 0x8239FB84, 0x823AFB84, 0x823BFB84, 0x823CFB84, 0x823DFB84, 0x823EFB84, 0x823FFB84, 0x8240FB84, 0x8241FB84, 0x8242FB84, 0x8243FB84, 0x8244FB84, 0x8245FB84, 0x8246FB84, + 0x8247FB84, 0x8248FB84, 0x8249FB84, 0x824AFB84, 0x824BFB84, 0x824CFB84, 0x824DFB84, 0x824EFB84, 0x824FFB84, 0x8250FB84, 0x8251FB84, 0x8252FB84, 0x8253FB84, 0x8254FB84, 0x8255FB84, + 0x8256FB84, 0x8257FB84, 0x8258FB84, 0x8259FB84, 0x825AFB84, 0x825BFB84, 0x825CFB84, 0x825DFB84, 0x825EFB84, 0x825FFB84, 0x8260FB84, 0x8261FB84, 0x8262FB84, 0x8263FB84, 0x8264FB84, + 0x8265FB84, 0x8266FB84, 0x8267FB84, 0x8268FB84, 0x8269FB84, 0x826AFB84, 0x826BFB84, 0x826CFB84, 0x826DFB84, 0x826EFB84, 0x826FFB84, 0x8270FB84, 0x8271FB84, 0x8272FB84, 0x8273FB84, + 0x8274FB84, 0x8275FB84, 0x8276FB84, 0x8277FB84, 0x8278FB84, 0x8279FB84, 0x827AFB84, 0x827BFB84, 0x827CFB84, 0x827DFB84, 0x827EFB84, 0x827FFB84, 0x8280FB84, 0x8281FB84, 0x8282FB84, + 0x8283FB84, 0x8284FB84, 0x8285FB84, 0x8286FB84, 0x8287FB84, 0x8288FB84, 0x8289FB84, 0x828AFB84, 0x828BFB84, 0x828CFB84, 0x828DFB84, 0x828EFB84, 0x828FFB84, 0x8290FB84, 0x8291FB84, + 0x8292FB84, 0x8293FB84, 0x8294FB84, 0x8295FB84, 0x8296FB84, 0x8297FB84, 0x8298FB84, 0x8299FB84, 0x829AFB84, 0x829BFB84, 0x829CFB84, 0x829DFB84, 0x829EFB84, 0x829FFB84, 0x82A0FB84, + 0x82A1FB84, 0x82A2FB84, 0x82A3FB84, 0x82A4FB84, 0x82A5FB84, 0x82A6FB84, 0x82A7FB84, 0x82A8FB84, 0x82A9FB84, 0x82AAFB84, 0x82ABFB84, 0x82ACFB84, 0x82ADFB84, 0x82AEFB84, 0x82AFFB84, + 0x82B0FB84, 0x82B1FB84, 0x82B2FB84, 0x82B3FB84, 0x82B4FB84, 0x82B5FB84, 0x82B6FB84, 0x82B7FB84, 0x82B8FB84, 0x82B9FB84, 0x82BAFB84, 0x82BBFB84, 0x82BCFB84, 0x82BDFB84, 0x82BEFB84, + 0x82BFFB84, 0x82C0FB84, 0x82C1FB84, 0x82C2FB84, 0x82C3FB84, 0x82C4FB84, 0x82C5FB84, 0x82C6FB84, 0x82C7FB84, 0x82C8FB84, 0x82C9FB84, 0x82CAFB84, 0x82CBFB84, 0x82CCFB84, 0x82CDFB84, + 0x82CEFB84, 0x82CFFB84, 0x82D0FB84, 0x82D1FB84, 0x82D2FB84, 0x82D3FB84, 0x82D4FB84, 0x82D5FB84, 0x82D6FB84, 0x82D7FB84, 0x82D8FB84, 0x82D9FB84, 0x82DAFB84, 0x82DBFB84, 0x82DCFB84, + 0x82DDFB84, 0x82DEFB84, 0x82DFFB84, 0x82E0FB84, 0x82E1FB84, 0x82E2FB84, 0x82E3FB84, 0x82E4FB84, 0x82E5FB84, 0x82E6FB84, 0x82E7FB84, 0x82E8FB84, 0x82E9FB84, 0x82EAFB84, 0x82EBFB84, + 0x82ECFB84, 0x82EDFB84, 0x82EEFB84, 0x82EFFB84, 0x82F0FB84, 0x82F1FB84, 0x82F2FB84, 0x82F3FB84, 0x82F4FB84, 0x82F5FB84, 0x82F6FB84, 0x82F7FB84, 0x82F8FB84, 0x82F9FB84, 0x82FAFB84, + 0x82FBFB84, 0x82FCFB84, 0x82FDFB84, 0x82FEFB84, 0x82FFFB84, 0x8300FB84, 0x8301FB84, 0x8302FB84, 0x8303FB84, 0x8304FB84, 0x8305FB84, 0x8306FB84, 0x8307FB84, 0x8308FB84, 0x8309FB84, + 0x830AFB84, 0x830BFB84, 0x830CFB84, 0x830DFB84, 0x830EFB84, 0x830FFB84, 0x8310FB84, 0x8311FB84, 0x8312FB84, 0x8313FB84, 0x8314FB84, 0x8315FB84, 0x8316FB84, 0x8317FB84, 0x8318FB84, + 0x8319FB84, 0x831AFB84, 0x831BFB84, 0x831CFB84, 0x831DFB84, 0x831EFB84, 0x831FFB84, 0x8320FB84, 0x8321FB84, 0x8322FB84, 0x8323FB84, 0x8324FB84, 0x8325FB84, 0x8326FB84, 0x8327FB84, + 0x8328FB84, 0x8329FB84, 0x832AFB84, 0x832BFB84, 0x832CFB84, 0x832DFB84, 0x832EFB84, 0x832FFB84, 0x8330FB84, 0x8331FB84, 0x8332FB84, 0x8333FB84, 0x8334FB84, 0x8335FB84, 0x8336FB84, + 0x8337FB84, 0x8338FB84, 0x8339FB84, 0x833AFB84, 0x833BFB84, 0x833CFB84, 0x833DFB84, 0x833EFB84, 0x833FFB84, 0x8340FB84, 0x8341FB84, 0x8342FB84, 0x8343FB84, 0x8344FB84, 0x8345FB84, + 0x8346FB84, 0x8347FB84, 0x8348FB84, 0x8349FB84, 0x834AFB84, 0x834BFB84, 0x834CFB84, 0x834DFB84, 0x834EFB84, 0x834FFB84, 0x8350FB84, 0x8351FB84, 0x8352FB84, 0x8353FB84, 0x8354FB84, + 0x8355FB84, 0x8356FB84, 0x8357FB84, 0x8358FB84, 0x8359FB84, 0x835AFB84, 0x835BFB84, 0x835CFB84, 0x835DFB84, 0x835EFB84, 0x835FFB84, 0x8360FB84, 0x8361FB84, 0x8362FB84, 0x8363FB84, + 0x8364FB84, 0x8365FB84, 0x8366FB84, 0x8367FB84, 0x8368FB84, 0x8369FB84, 0x836AFB84, 0x836BFB84, 0x836CFB84, 0x836DFB84, 0x836EFB84, 0x836FFB84, 0x8370FB84, 0x8371FB84, 0x8372FB84, + 0x8373FB84, 0x8374FB84, 0x8375FB84, 0x8376FB84, 0x8377FB84, 0x8378FB84, 0x8379FB84, 0x837AFB84, 0x837BFB84, 0x837CFB84, 0x837DFB84, 0x837EFB84, 0x837FFB84, 0x8380FB84, 0x8381FB84, + 0x8382FB84, 0x8383FB84, 0x8384FB84, 0x8385FB84, 0x8386FB84, 0x8387FB84, 0x8388FB84, 0x8389FB84, 0x838AFB84, 0x838BFB84, 0x838CFB84, 0x838DFB84, 0x838EFB84, 0x838FFB84, 0x8390FB84, + 0x8391FB84, 0x8392FB84, 0x8393FB84, 0x8394FB84, 0x8395FB84, 0x8396FB84, 0x8397FB84, 0x8398FB84, 0x8399FB84, 0x839AFB84, 0x839BFB84, 0x839CFB84, 0x839DFB84, 0x839EFB84, 0x839FFB84, + 0x83A0FB84, 0x83A1FB84, 0x83A2FB84, 0x83A3FB84, 0x83A4FB84, 0x83A5FB84, 0x83A6FB84, 0x83A7FB84, 0x83A8FB84, 0x83A9FB84, 0x83AAFB84, 0x83ABFB84, 0x83ACFB84, 0x83ADFB84, 0x83AEFB84, + 0x83AFFB84, 0x83B0FB84, 0x83B1FB84, 0x83B2FB84, 0x83B3FB84, 0x83B4FB84, 0x83B5FB84, 0x83B6FB84, 0x83B7FB84, 0x83B8FB84, 0x83B9FB84, 0x83BAFB84, 0x83BBFB84, 0x83BCFB84, 0x83BDFB84, + 0x83BEFB84, 0x83BFFB84, 0x83C0FB84, 0x83C1FB84, 0x83C2FB84, 0x83C3FB84, 0x83C4FB84, 0x83C5FB84, 0x83C6FB84, 0x83C7FB84, 0x83C8FB84, 0x83C9FB84, 0x83CAFB84, 0x83CBFB84, 0x83CCFB84, + 0x83CDFB84, 0x83CEFB84, 0x83CFFB84, 0x83D0FB84, 0x83D1FB84, 0x83D2FB84, 0x83D3FB84, 0x83D4FB84, 0x83D5FB84, 0x83D6FB84, 0x83D7FB84, 0x83D8FB84, 0x83D9FB84, 0x83DAFB84, 0x83DBFB84, + 0x83DCFB84, 0x83DDFB84, 0x83DEFB84, 0x83DFFB84, 0x83E0FB84, 0x83E1FB84, 0x83E2FB84, 0x83E3FB84, 0x83E4FB84, 0x83E5FB84, 0x83E6FB84, 0x83E7FB84, 0x83E8FB84, 0x83E9FB84, 0x83EAFB84, + 0x83EBFB84, 0x83ECFB84, 0x83EDFB84, 0x83EEFB84, 0x83EFFB84, 0x83F0FB84, 0x83F1FB84, 0x83F2FB84, 0x83F3FB84, 0x83F4FB84, 0x83F5FB84, 0x83F6FB84, 0x83F7FB84, 0x83F8FB84, 0x83F9FB84, + 0x83FAFB84, 0x83FBFB84, 0x83FCFB84, 0x83FDFB84, 0x83FEFB84, 0x83FFFB84, 0x8400FB84, 0x8401FB84, 0x8402FB84, 0x8403FB84, 0x8404FB84, 0x8405FB84, 0x8406FB84, 0x8407FB84, 0x8408FB84, + 0x8409FB84, 0x840AFB84, 0x840BFB84, 0x840CFB84, 0x840DFB84, 0x840EFB84, 0x840FFB84, 0x8410FB84, 0x8411FB84, 0x8412FB84, 0x8413FB84, 0x8414FB84, 0x8415FB84, 0x8416FB84, 0x8417FB84, + 0x8418FB84, 0x8419FB84, 0x841AFB84, 0x841BFB84, 0x841CFB84, 0x841DFB84, 0x841EFB84, 0x841FFB84, 0x8420FB84, 0x8421FB84, 0x8422FB84, 0x8423FB84, 0x8424FB84, 0x8425FB84, 0x8426FB84, + 0x8427FB84, 0x8428FB84, 0x8429FB84, 0x842AFB84, 0x842BFB84, 0x842CFB84, 0x842DFB84, 0x842EFB84, 0x842FFB84, 0x8430FB84, 0x8431FB84, 0x8432FB84, 0x8433FB84, 0x8434FB84, 0x8435FB84, + 0x8436FB84, 0x8437FB84, 0x8438FB84, 0x8439FB84, 0x843AFB84, 0x843BFB84, 0x843CFB84, 0x843DFB84, 0x843EFB84, 0x843FFB84, 0x8440FB84, 0x8441FB84, 0x8442FB84, 0x8443FB84, 0x8444FB84, + 0x8445FB84, 0x8446FB84, 0x8447FB84, 0x8448FB84, 0x8449FB84, 0x844AFB84, 0x844BFB84, 0x844CFB84, 0x844DFB84, 0x844EFB84, 0x844FFB84, 0x8450FB84, 0x8451FB84, 0x8452FB84, 0x8453FB84, + 0x8454FB84, 0x8455FB84, 0x8456FB84, 0x8457FB84, 0x8458FB84, 0x8459FB84, 0x845AFB84, 0x845BFB84, 0x845CFB84, 0x845DFB84, 0x845EFB84, 0x845FFB84, 0x8460FB84, 0x8461FB84, 0x8462FB84, + 0x8463FB84, 0x8464FB84, 0x8465FB84, 0x8466FB84, 0x8467FB84, 0x8468FB84, 0x8469FB84, 0x846AFB84, 0x846BFB84, 0x846CFB84, 0x846DFB84, 0x846EFB84, 0x846FFB84, 0x8470FB84, 0x8471FB84, + 0x8472FB84, 0x8473FB84, 0x8474FB84, 0x8475FB84, 0x8476FB84, 0x8477FB84, 0x8478FB84, 0x8479FB84, 0x847AFB84, 0x847BFB84, 0x847CFB84, 0x847DFB84, 0x847EFB84, 0x847FFB84, 0x8480FB84, + 0x8481FB84, 0x8482FB84, 0x8483FB84, 0x8484FB84, 0x8485FB84, 0x8486FB84, 0x8487FB84, 0x8488FB84, 0x8489FB84, 0x848AFB84, 0x848BFB84, 0x848CFB84, 0x848DFB84, 0x848EFB84, 0x848FFB84, + 0x8490FB84, 0x8491FB84, 0x8492FB84, 0x8493FB84, 0x8494FB84, 0x8495FB84, 0x8496FB84, 0x8497FB84, 0x8498FB84, 0x8499FB84, 0x849AFB84, 0x849BFB84, 0x849CFB84, 0x849DFB84, 0x849EFB84, + 0x849FFB84, 0x84A0FB84, 0x84A1FB84, 0x84A2FB84, 0x84A3FB84, 0x84A4FB84, 0x84A5FB84, 0x84A6FB84, 0x84A7FB84, 0x84A8FB84, 0x84A9FB84, 0x84AAFB84, 0x84ABFB84, 0x84ACFB84, 0x84ADFB84, + 0x84AEFB84, 0x84AFFB84, 0x84B0FB84, 0x84B1FB84, 0x84B2FB84, 0x84B3FB84, 0x84B4FB84, 0x84B5FB84, 0x84B6FB84, 0x84B7FB84, 0x84B8FB84, 0x84B9FB84, 0x84BAFB84, 0x84BBFB84, 0x84BCFB84, + 0x84BDFB84, 0x84BEFB84, 0x84BFFB84, 0x84C0FB84, 0x84C1FB84, 0x84C2FB84, 0x84C3FB84, 0x84C4FB84, 0x84C5FB84, 0x84C6FB84, 0x84C7FB84, 0x84C8FB84, 0x84C9FB84, 0x84CAFB84, 0x84CBFB84, + 0x84CCFB84, 0x84CDFB84, 0x84CEFB84, 0x84CFFB84, 0x84D0FB84, 0x84D1FB84, 0x84D2FB84, 0x84D3FB84, 0x84D4FB84, 0x84D5FB84, 0x84D6FB84, 0x84D7FB84, 0x84D8FB84, 0x84D9FB84, 0x84DAFB84, + 0x84DBFB84, 0x84DCFB84, 0x84DDFB84, 0x84DEFB84, 0x84DFFB84, 0x84E0FB84, 0x84E1FB84, 0x84E2FB84, 0x84E3FB84, 0x84E4FB84, 0x84E5FB84, 0x84E6FB84, 0x84E7FB84, 0x84E8FB84, 0x84E9FB84, + 0x84EAFB84, 0x84EBFB84, 0x84ECFB84, 0x84EDFB84, 0x84EEFB84, 0x84EFFB84, 0x84F0FB84, 0x84F1FB84, 0x84F2FB84, 0x84F3FB84, 0x84F4FB84, 0x84F5FB84, 0x84F6FB84, 0x84F7FB84, 0x84F8FB84, + 0x84F9FB84, 0x84FAFB84, 0x84FBFB84, 0x84FCFB84, 0x84FDFB84, 0x84FEFB84, 0x84FFFB84, 0x8500FB84, 0x8501FB84, 0x8502FB84, 0x8503FB84, 0x8504FB84, 0x8505FB84, 0x8506FB84, 0x8507FB84, + 0x8508FB84, 0x8509FB84, 0x850AFB84, 0x850BFB84, 0x850CFB84, 0x850DFB84, 0x850EFB84, 0x850FFB84, 0x8510FB84, 0x8511FB84, 0x8512FB84, 0x8513FB84, 0x8514FB84, 0x8515FB84, 0x8516FB84, + 0x8517FB84, 0x8518FB84, 0x8519FB84, 0x851AFB84, 0x851BFB84, 0x851CFB84, 0x851DFB84, 0x851EFB84, 0x851FFB84, 0x8520FB84, 0x8521FB84, 0x8522FB84, 0x8523FB84, 0x8524FB84, 0x8525FB84, + 0x8526FB84, 0x8527FB84, 0x8528FB84, 0x8529FB84, 0x852AFB84, 0x852BFB84, 0x852CFB84, 0x852DFB84, 0x852EFB84, 0x852FFB84, 0x8530FB84, 0x8531FB84, 0x8532FB84, 0x8533FB84, 0x8534FB84, + 0x8535FB84, 0x8536FB84, 0x8537FB84, 0x8538FB84, 0x8539FB84, 0x853AFB84, 0x853BFB84, 0x853CFB84, 0x853DFB84, 0x853EFB84, 0x853FFB84, 0x8540FB84, 0x8541FB84, 0x8542FB84, 0x8543FB84, + 0x8544FB84, 0x8545FB84, 0x8546FB84, 0x8547FB84, 0x8548FB84, 0x8549FB84, 0x854AFB84, 0x854BFB84, 0x854CFB84, 0x854DFB84, 0x854EFB84, 0x854FFB84, 0x8550FB84, 0x8551FB84, 0x8552FB84, + 0x8553FB84, 0x8554FB84, 0x8555FB84, 0x8556FB84, 0x8557FB84, 0x8558FB84, 0x8559FB84, 0x855AFB84, 0x855BFB84, 0x855CFB84, 0x855DFB84, 0x855EFB84, 0x855FFB84, 0x8560FB84, 0x8561FB84, + 0x8562FB84, 0x8563FB84, 0x8564FB84, 0x8565FB84, 0x8566FB84, 0x8567FB84, 0x8568FB84, 0x8569FB84, 0x856AFB84, 0x856BFB84, 0x856CFB84, 0x856DFB84, 0x856EFB84, 0x856FFB84, 0x8570FB84, + 0x8571FB84, 0x8572FB84, 0x8573FB84, 0x8574FB84, 0x8575FB84, 0x8576FB84, 0x8577FB84, 0x8578FB84, 0x8579FB84, 0x857AFB84, 0x857BFB84, 0x857CFB84, 0x857DFB84, 0x857EFB84, 0x857FFB84, + 0x8580FB84, 0x8581FB84, 0x8582FB84, 0x8583FB84, 0x8584FB84, 0x8585FB84, 0x8586FB84, 0x8587FB84, 0x8588FB84, 0x8589FB84, 0x858AFB84, 0x858BFB84, 0x858CFB84, 0x858DFB84, 0x858EFB84, + 0x858FFB84, 0x8590FB84, 0x8591FB84, 0x8592FB84, 0x8593FB84, 0x8594FB84, 0x8595FB84, 0x8596FB84, 0x8597FB84, 0x8598FB84, 0x8599FB84, 0x859AFB84, 0x859BFB84, 0x859CFB84, 0x859DFB84, + 0x859EFB84, 0x859FFB84, 0x85A0FB84, 0x85A1FB84, 0x85A2FB84, 0x85A3FB84, 0x85A4FB84, 0x85A5FB84, 0x85A6FB84, 0x85A7FB84, 0x85A8FB84, 0x85A9FB84, 0x85AAFB84, 0x85ABFB84, 0x85ACFB84, + 0x85ADFB84, 0x85AEFB84, 0x85AFFB84, 0x85B0FB84, 0x85B1FB84, 0x85B2FB84, 0x85B3FB84, 0x85B4FB84, 0x85B5FB84, 0x85B6FB84, 0x85B7FB84, 0x85B8FB84, 0x85B9FB84, 0x85BAFB84, 0x85BBFB84, + 0x85BCFB84, 0x85BDFB84, 0x85BEFB84, 0x85BFFB84, 0x85C0FB84, 0x85C1FB84, 0x85C2FB84, 0x85C3FB84, 0x85C4FB84, 0x85C5FB84, 0x85C6FB84, 0x85C7FB84, 0x85C8FB84, 0x85C9FB84, 0x85CAFB84, + 0x85CBFB84, 0x85CCFB84, 0x85CDFB84, 0x85CEFB84, 0x85CFFB84, 0x85D0FB84, 0x85D1FB84, 0x85D2FB84, 0x85D3FB84, 0x85D4FB84, 0x85D5FB84, 0x85D6FB84, 0x85D7FB84, 0x85D8FB84, 0x85D9FB84, + 0x85DAFB84, 0x85DBFB84, 0x85DCFB84, 0x85DDFB84, 0x85DEFB84, 0x85DFFB84, 0x85E0FB84, 0x85E1FB84, 0x85E2FB84, 0x85E3FB84, 0x85E4FB84, 0x85E5FB84, 0x85E6FB84, 0x85E7FB84, 0x85E8FB84, + 0x85E9FB84, 0x85EAFB84, 0x85EBFB84, 0x85ECFB84, 0x85EDFB84, 0x85EEFB84, 0x85EFFB84, 0x85F0FB84, 0x85F1FB84, 0x85F2FB84, 0x85F3FB84, 0x85F4FB84, 0x85F5FB84, 0x85F6FB84, 0x85F7FB84, + 0x85F8FB84, 0x85F9FB84, 0x85FAFB84, 0x85FBFB84, 0x85FCFB84, 0x85FDFB84, 0x85FEFB84, 0x85FFFB84, 0x8600FB84, 0x8601FB84, 0x8602FB84, 0x8603FB84, 0x8604FB84, 0x8605FB84, 0x8606FB84, + 0x8607FB84, 0x8608FB84, 0x8609FB84, 0x860AFB84, 0x860BFB84, 0x860CFB84, 0x860DFB84, 0x860EFB84, 0x860FFB84, 0x8610FB84, 0x8611FB84, 0x8612FB84, 0x8613FB84, 0x8614FB84, 0x8615FB84, + 0x8616FB84, 0x8617FB84, 0x8618FB84, 0x8619FB84, 0x861AFB84, 0x861BFB84, 0x861CFB84, 0x861DFB84, 0x861EFB84, 0x861FFB84, 0x8620FB84, 0x8621FB84, 0x8622FB84, 0x8623FB84, 0x8624FB84, + 0x8625FB84, 0x8626FB84, 0x8627FB84, 0x8628FB84, 0x8629FB84, 0x862AFB84, 0x862BFB84, 0x862CFB84, 0x862DFB84, 0x862EFB84, 0x862FFB84, 0x8630FB84, 0x8631FB84, 0x8632FB84, 0x8633FB84, + 0x8634FB84, 0x8635FB84, 0x8636FB84, 0x8637FB84, 0x8638FB84, 0x8639FB84, 0x863AFB84, 0x863BFB84, 0x863CFB84, 0x863DFB84, 0x863EFB84, 0x863FFB84, 0x8640FB84, 0x8641FB84, 0x8642FB84, + 0x8643FB84, 0x8644FB84, 0x8645FB84, 0x8646FB84, 0x8647FB84, 0x8648FB84, 0x8649FB84, 0x864AFB84, 0x864BFB84, 0x864CFB84, 0x864DFB84, 0x864EFB84, 0x864FFB84, 0x8650FB84, 0x8651FB84, + 0x8652FB84, 0x8653FB84, 0x8654FB84, 0x8655FB84, 0x8656FB84, 0x8657FB84, 0x8658FB84, 0x8659FB84, 0x865AFB84, 0x865BFB84, 0x865CFB84, 0x865DFB84, 0x865EFB84, 0x865FFB84, 0x8660FB84, + 0x8661FB84, 0x8662FB84, 0x8663FB84, 0x8664FB84, 0x8665FB84, 0x8666FB84, 0x8667FB84, 0x8668FB84, 0x8669FB84, 0x866AFB84, 0x866BFB84, 0x866CFB84, 0x866DFB84, 0x866EFB84, 0x866FFB84, + 0x8670FB84, 0x8671FB84, 0x8672FB84, 0x8673FB84, 0x8674FB84, 0x8675FB84, 0x8676FB84, 0x8677FB84, 0x8678FB84, 0x8679FB84, 0x867AFB84, 0x867BFB84, 0x867CFB84, 0x867DFB84, 0x867EFB84, + 0x867FFB84, 0x8680FB84, 0x8681FB84, 0x8682FB84, 0x8683FB84, 0x8684FB84, 0x8685FB84, 0x8686FB84, 0x8687FB84, 0x8688FB84, 0x8689FB84, 0x868AFB84, 0x868BFB84, 0x868CFB84, 0x868DFB84, + 0x868EFB84, 0x868FFB84, 0x8690FB84, 0x8691FB84, 0x8692FB84, 0x8693FB84, 0x8694FB84, 0x8695FB84, 0x8696FB84, 0x8697FB84, 0x8698FB84, 0x8699FB84, 0x869AFB84, 0x869BFB84, 0x869CFB84, + 0x869DFB84, 0x869EFB84, 0x869FFB84, 0x86A0FB84, 0x86A1FB84, 0x86A2FB84, 0x86A3FB84, 0x86A4FB84, 0x86A5FB84, 0x86A6FB84, 0x86A7FB84, 0x86A8FB84, 0x86A9FB84, 0x86AAFB84, 0x86ABFB84, + 0x86ACFB84, 0x86ADFB84, 0x86AEFB84, 0x86AFFB84, 0x86B0FB84, 0x86B1FB84, 0x86B2FB84, 0x86B3FB84, 0x86B4FB84, 0x86B5FB84, 0x86B6FB84, 0x86B7FB84, 0x86B8FB84, 0x86B9FB84, 0x86BAFB84, + 0x86BBFB84, 0x86BCFB84, 0x86BDFB84, 0x86BEFB84, 0x86BFFB84, 0x86C0FB84, 0x86C1FB84, 0x86C2FB84, 0x86C3FB84, 0x86C4FB84, 0x86C5FB84, 0x86C6FB84, 0x86C7FB84, 0x86C8FB84, 0x86C9FB84, + 0x86CAFB84, 0x86CBFB84, 0x86CCFB84, 0x86CDFB84, 0x86CEFB84, 0x86CFFB84, 0x86D0FB84, 0x86D1FB84, 0x86D2FB84, 0x86D3FB84, 0x86D4FB84, 0x86D5FB84, 0x86D6FB84, 0x86D7FB84, 0x86D8FB84, + 0x86D9FB84, 0x86DAFB84, 0x86DBFB84, 0x86DCFB84, 0x86DDFB84, 0x86DEFB84, 0x86DFFB84, 0x86E0FB84, 0x86E1FB84, 0x86E2FB84, 0x86E3FB84, 0x86E4FB84, 0x86E5FB84, 0x86E6FB84, 0x86E7FB84, + 0x86E8FB84, 0x86E9FB84, 0x86EAFB84, 0x86EBFB84, 0x86ECFB84, 0x86EDFB84, 0x86EEFB84, 0x86EFFB84, 0x86F0FB84, 0x86F1FB84, 0x86F2FB84, 0x86F3FB84, 0x86F4FB84, 0x86F5FB84, 0x86F6FB84, + 0x86F7FB84, 0x86F8FB84, 0x86F9FB84, 0x86FAFB84, 0x86FBFB84, 0x86FCFB84, 0x86FDFB84, 0x86FEFB84, 0x86FFFB84, 0x8700FB84, 0x8701FB84, 0x8702FB84, 0x8703FB84, 0x8704FB84, 0x8705FB84, + 0x8706FB84, 0x8707FB84, 0x8708FB84, 0x8709FB84, 0x870AFB84, 0x870BFB84, 0x870CFB84, 0x870DFB84, 0x870EFB84, 0x870FFB84, 0x8710FB84, 0x8711FB84, 0x8712FB84, 0x8713FB84, 0x8714FB84, + 0x8715FB84, 0x8716FB84, 0x8717FB84, 0x8718FB84, 0x8719FB84, 0x871AFB84, 0x871BFB84, 0x871CFB84, 0x871DFB84, 0x871EFB84, 0x871FFB84, 0x8720FB84, 0x8721FB84, 0x8722FB84, 0x8723FB84, + 0x8724FB84, 0x8725FB84, 0x8726FB84, 0x8727FB84, 0x8728FB84, 0x8729FB84, 0x872AFB84, 0x872BFB84, 0x872CFB84, 0x872DFB84, 0x872EFB84, 0x872FFB84, 0x8730FB84, 0x8731FB84, 0x8732FB84, + 0x8733FB84, 0x8734FB84, 0x8735FB84, 0x8736FB84, 0x8737FB84, 0x8738FB84, 0x8739FB84, 0x873AFB84, 0x873BFB84, 0x873CFB84, 0x873DFB84, 0x873EFB84, 0x873FFB84, 0x8740FB84, 0x8741FB84, + 0x8742FB84, 0x8743FB84, 0x8744FB84, 0x8745FB84, 0x8746FB84, 0x8747FB84, 0x8748FB84, 0x8749FB84, 0x874AFB84, 0x874BFB84, 0x874CFB84, 0x874DFB84, 0x874EFB84, 0x874FFB84, 0x8750FB84, + 0x8751FB84, 0x8752FB84, 0x8753FB84, 0x8754FB84, 0x8755FB84, 0x8756FB84, 0x8757FB84, 0x8758FB84, 0x8759FB84, 0x875AFB84, 0x875BFB84, 0x875CFB84, 0x875DFB84, 0x875EFB84, 0x875FFB84, + 0x8760FB84, 0x8761FB84, 0x8762FB84, 0x8763FB84, 0x8764FB84, 0x8765FB84, 0x8766FB84, 0x8767FB84, 0x8768FB84, 0x8769FB84, 0x876AFB84, 0x876BFB84, 0x876CFB84, 0x876DFB84, 0x876EFB84, + 0x876FFB84, 0x8770FB84, 0x8771FB84, 0x8772FB84, 0x8773FB84, 0x8774FB84, 0x8775FB84, 0x8776FB84, 0x8777FB84, 0x8778FB84, 0x8779FB84, 0x877AFB84, 0x877BFB84, 0x877CFB84, 0x877DFB84, + 0x877EFB84, 0x877FFB84, 0x8780FB84, 0x8781FB84, 0x8782FB84, 0x8783FB84, 0x8784FB84, 0x8785FB84, 0x8786FB84, 0x8787FB84, 0x8788FB84, 0x8789FB84, 0x878AFB84, 0x878BFB84, 0x878CFB84, + 0x878DFB84, 0x878EFB84, 0x878FFB84, 0x8790FB84, 0x8791FB84, 0x8792FB84, 0x8793FB84, 0x8794FB84, 0x8795FB84, 0x8796FB84, 0x8797FB84, 0x8798FB84, 0x8799FB84, 0x879AFB84, 0x879BFB84, + 0x879CFB84, 0x879DFB84, 0x879EFB84, 0x879FFB84, 0x87A0FB84, 0x87A1FB84, 0x87A2FB84, 0x87A3FB84, 0x87A4FB84, 0x87A5FB84, 0x87A6FB84, 0x87A7FB84, 0x87A8FB84, 0x87A9FB84, 0x87AAFB84, + 0x87ABFB84, 0x87ACFB84, 0x87ADFB84, 0x87AEFB84, 0x87AFFB84, 0x87B0FB84, 0x87B1FB84, 0x87B2FB84, 0x87B3FB84, 0x87B4FB84, 0x87B5FB84, 0x87B6FB84, 0x87B7FB84, 0x87B8FB84, 0x87B9FB84, + 0x87BAFB84, 0x87BBFB84, 0x87BCFB84, 0x87BDFB84, 0x87BEFB84, 0x87BFFB84, 0x87C0FB84, 0x87C1FB84, 0x87C2FB84, 0x87C3FB84, 0x87C4FB84, 0x87C5FB84, 0x87C6FB84, 0x87C7FB84, 0x87C8FB84, + 0x87C9FB84, 0x87CAFB84, 0x87CBFB84, 0x87CCFB84, 0x87CDFB84, 0x87CEFB84, 0x87CFFB84, 0x87D0FB84, 0x87D1FB84, 0x87D2FB84, 0x87D3FB84, 0x87D4FB84, 0x87D5FB84, 0x87D6FB84, 0x87D7FB84, + 0x87D8FB84, 0x87D9FB84, 0x87DAFB84, 0x87DBFB84, 0x87DCFB84, 0x87DDFB84, 0x87DEFB84, 0x87DFFB84, 0x87E0FB84, 0x87E1FB84, 0x87E2FB84, 0x87E3FB84, 0x87E4FB84, 0x87E5FB84, 0x87E6FB84, + 0x87E7FB84, 0x87E8FB84, 0x87E9FB84, 0x87EAFB84, 0x87EBFB84, 0x87ECFB84, 0x87EDFB84, 0x87EEFB84, 0x87EFFB84, 0x87F0FB84, 0x87F1FB84, 0x87F2FB84, 0x87F3FB84, 0x87F4FB84, 0x87F5FB84, + 0x87F6FB84, 0x87F7FB84, 0x87F8FB84, 0x87F9FB84, 0x87FAFB84, 0x87FBFB84, 0x87FCFB84, 0x87FDFB84, 0x87FEFB84, 0x87FFFB84, 0x8800FB84, 0x8801FB84, 0x8802FB84, 0x8803FB84, 0x8804FB84, + 0x8805FB84, 0x8806FB84, 0x8807FB84, 0x8808FB84, 0x8809FB84, 0x880AFB84, 0x880BFB84, 0x880CFB84, 0x880DFB84, 0x880EFB84, 0x880FFB84, 0x8810FB84, 0x8811FB84, 0x8812FB84, 0x8813FB84, + 0x8814FB84, 0x8815FB84, 0x8816FB84, 0x8817FB84, 0x8818FB84, 0x8819FB84, 0x881AFB84, 0x881BFB84, 0x881CFB84, 0x881DFB84, 0x881EFB84, 0x881FFB84, 0x8820FB84, 0x8821FB84, 0x8822FB84, + 0x8823FB84, 0x8824FB84, 0x8825FB84, 0x8826FB84, 0x8827FB84, 0x8828FB84, 0x8829FB84, 0x882AFB84, 0x882BFB84, 0x882CFB84, 0x882DFB84, 0x882EFB84, 0x882FFB84, 0x8830FB84, 0x8831FB84, + 0x8832FB84, 0x8833FB84, 0x8834FB84, 0x8835FB84, 0x8836FB84, 0x8837FB84, 0x8838FB84, 0x8839FB84, 0x883AFB84, 0x883BFB84, 0x883CFB84, 0x883DFB84, 0x883EFB84, 0x883FFB84, 0x8840FB84, + 0x8841FB84, 0x8842FB84, 0x8843FB84, 0x8844FB84, 0x8845FB84, 0x8846FB84, 0x8847FB84, 0x8848FB84, 0x8849FB84, 0x884AFB84, 0x884BFB84, 0x884CFB84, 0x884DFB84, 0x884EFB84, 0x884FFB84, + 0x8850FB84, 0x8851FB84, 0x8852FB84, 0x8853FB84, 0x8854FB84, 0x8855FB84, 0x8856FB84, 0x8857FB84, 0x8858FB84, 0x8859FB84, 0x885AFB84, 0x885BFB84, 0x885CFB84, 0x885DFB84, 0x885EFB84, + 0x885FFB84, 0x8860FB84, 0x8861FB84, 0x8862FB84, 0x8863FB84, 0x8864FB84, 0x8865FB84, 0x8866FB84, 0x8867FB84, 0x8868FB84, 0x8869FB84, 0x886AFB84, 0x886BFB84, 0x886CFB84, 0x886DFB84, + 0x886EFB84, 0x886FFB84, 0x8870FB84, 0x8871FB84, 0x8872FB84, 0x8873FB84, 0x8874FB84, 0x8875FB84, 0x8876FB84, 0x8877FB84, 0x8878FB84, 0x8879FB84, 0x887AFB84, 0x887BFB84, 0x887CFB84, + 0x887DFB84, 0x887EFB84, 0x887FFB84, 0x8880FB84, 0x8881FB84, 0x8882FB84, 0x8883FB84, 0x8884FB84, 0x8885FB84, 0x8886FB84, 0x8887FB84, 0x8888FB84, 0x8889FB84, 0x888AFB84, 0x888BFB84, + 0x888CFB84, 0x888DFB84, 0x888EFB84, 0x888FFB84, 0x8890FB84, 0x8891FB84, 0x8892FB84, 0x8893FB84, 0x8894FB84, 0x8895FB84, 0x8896FB84, 0x8897FB84, 0x8898FB84, 0x8899FB84, 0x889AFB84, + 0x889BFB84, 0x889CFB84, 0x889DFB84, 0x889EFB84, 0x889FFB84, 0x88A0FB84, 0x88A1FB84, 0x88A2FB84, 0x88A3FB84, 0x88A4FB84, 0x88A5FB84, 0x88A6FB84, 0x88A7FB84, 0x88A8FB84, 0x88A9FB84, + 0x88AAFB84, 0x88ABFB84, 0x88ACFB84, 0x88ADFB84, 0x88AEFB84, 0x88AFFB84, 0x88B0FB84, 0x88B1FB84, 0x88B2FB84, 0x88B3FB84, 0x88B4FB84, 0x88B5FB84, 0x88B6FB84, 0x88B7FB84, 0x88B8FB84, + 0x88B9FB84, 0x88BAFB84, 0x88BBFB84, 0x88BCFB84, 0x88BDFB84, 0x88BEFB84, 0x88BFFB84, 0x88C0FB84, 0x88C1FB84, 0x88C2FB84, 0x88C3FB84, 0x88C4FB84, 0x88C5FB84, 0x88C6FB84, 0x88C7FB84, + 0x88C8FB84, 0x88C9FB84, 0x88CAFB84, 0x88CBFB84, 0x88CCFB84, 0x88CDFB84, 0x88CEFB84, 0x88CFFB84, 0x88D0FB84, 0x88D1FB84, 0x88D2FB84, 0x88D3FB84, 0x88D4FB84, 0x88D5FB84, 0x88D6FB84, + 0x88D7FB84, 0x88D8FB84, 0x88D9FB84, 0x88DAFB84, 0x88DBFB84, 0x88DCFB84, 0x88DDFB84, 0x88DEFB84, 0x88DFFB84, 0x88E0FB84, 0x88E1FB84, 0x88E2FB84, 0x88E3FB84, 0x88E4FB84, 0x88E5FB84, + 0x88E6FB84, 0x88E7FB84, 0x88E8FB84, 0x88E9FB84, 0x88EAFB84, 0x88EBFB84, 0x88ECFB84, 0x88EDFB84, 0x88EEFB84, 0x88EFFB84, 0x88F0FB84, 0x88F1FB84, 0x88F2FB84, 0x88F3FB84, 0x88F4FB84, + 0x88F5FB84, 0x88F6FB84, 0x88F7FB84, 0x88F8FB84, 0x88F9FB84, 0x88FAFB84, 0x88FBFB84, 0x88FCFB84, 0x88FDFB84, 0x88FEFB84, 0x88FFFB84, 0x8900FB84, 0x8901FB84, 0x8902FB84, 0x8903FB84, + 0x8904FB84, 0x8905FB84, 0x8906FB84, 0x8907FB84, 0x8908FB84, 0x8909FB84, 0x890AFB84, 0x890BFB84, 0x890CFB84, 0x890DFB84, 0x890EFB84, 0x890FFB84, 0x8910FB84, 0x8911FB84, 0x8912FB84, + 0x8913FB84, 0x8914FB84, 0x8915FB84, 0x8916FB84, 0x8917FB84, 0x8918FB84, 0x8919FB84, 0x891AFB84, 0x891BFB84, 0x891CFB84, 0x891DFB84, 0x891EFB84, 0x891FFB84, 0x8920FB84, 0x8921FB84, + 0x8922FB84, 0x8923FB84, 0x8924FB84, 0x8925FB84, 0x8926FB84, 0x8927FB84, 0x8928FB84, 0x8929FB84, 0x892AFB84, 0x892BFB84, 0x892CFB84, 0x892DFB84, 0x892EFB84, 0x892FFB84, 0x8930FB84, + 0x8931FB84, 0x8932FB84, 0x8933FB84, 0x8934FB84, 0x8935FB84, 0x8936FB84, 0x8937FB84, 0x8938FB84, 0x8939FB84, 0x893AFB84, 0x893BFB84, 0x893CFB84, 0x893DFB84, 0x893EFB84, 0x893FFB84, + 0x8940FB84, 0x8941FB84, 0x8942FB84, 0x8943FB84, 0x8944FB84, 0x8945FB84, 0x8946FB84, 0x8947FB84, 0x8948FB84, 0x8949FB84, 0x894AFB84, 0x894BFB84, 0x894CFB84, 0x894DFB84, 0x894EFB84, + 0x894FFB84, 0x8950FB84, 0x8951FB84, 0x8952FB84, 0x8953FB84, 0x8954FB84, 0x8955FB84, 0x8956FB84, 0x8957FB84, 0x8958FB84, 0x8959FB84, 0x895AFB84, 0x895BFB84, 0x895CFB84, 0x895DFB84, + 0x895EFB84, 0x895FFB84, 0x8960FB84, 0x8961FB84, 0x8962FB84, 0x8963FB84, 0x8964FB84, 0x8965FB84, 0x8966FB84, 0x8967FB84, 0x8968FB84, 0x8969FB84, 0x896AFB84, 0x896BFB84, 0x896CFB84, + 0x896DFB84, 0x896EFB84, 0x896FFB84, 0x8970FB84, 0x8971FB84, 0x8972FB84, 0x8973FB84, 0x8974FB84, 0x8975FB84, 0x8976FB84, 0x8977FB84, 0x8978FB84, 0x8979FB84, 0x897AFB84, 0x897BFB84, + 0x897CFB84, 0x897DFB84, 0x897EFB84, 0x897FFB84, 0x8980FB84, 0x8981FB84, 0x8982FB84, 0x8983FB84, 0x8984FB84, 0x8985FB84, 0x8986FB84, 0x8987FB84, 0x8988FB84, 0x8989FB84, 0x898AFB84, + 0x898BFB84, 0x898CFB84, 0x898DFB84, 0x898EFB84, 0x898FFB84, 0x8990FB84, 0x8991FB84, 0x8992FB84, 0x8993FB84, 0x8994FB84, 0x8995FB84, 0x8996FB84, 0x8997FB84, 0x8998FB84, 0x8999FB84, + 0x899AFB84, 0x899BFB84, 0x899CFB84, 0x899DFB84, 0x899EFB84, 0x899FFB84, 0x89A0FB84, 0x89A1FB84, 0x89A2FB84, 0x89A3FB84, 0x89A4FB84, 0x89A5FB84, 0x89A6FB84, 0x89A7FB84, 0x89A8FB84, + 0x89A9FB84, 0x89AAFB84, 0x89ABFB84, 0x89ACFB84, 0x89ADFB84, 0x89AEFB84, 0x89AFFB84, 0x89B0FB84, 0x89B1FB84, 0x89B2FB84, 0x89B3FB84, 0x89B4FB84, 0x89B5FB84, 0x89B6FB84, 0x89B7FB84, + 0x89B8FB84, 0x89B9FB84, 0x89BAFB84, 0x89BBFB84, 0x89BCFB84, 0x89BDFB84, 0x89BEFB84, 0x89BFFB84, 0x89C0FB84, 0x89C1FB84, 0x89C2FB84, 0x89C3FB84, 0x89C4FB84, 0x89C5FB84, 0x89C6FB84, + 0x89C7FB84, 0x89C8FB84, 0x89C9FB84, 0x89CAFB84, 0x89CBFB84, 0x89CCFB84, 0x89CDFB84, 0x89CEFB84, 0x89CFFB84, 0x89D0FB84, 0x89D1FB84, 0x89D2FB84, 0x89D3FB84, 0x89D4FB84, 0x89D5FB84, + 0x89D6FB84, 0x89D7FB84, 0x89D8FB84, 0x89D9FB84, 0x89DAFB84, 0x89DBFB84, 0x89DCFB84, 0x89DDFB84, 0x89DEFB84, 0x89DFFB84, 0x89E0FB84, 0x89E1FB84, 0x89E2FB84, 0x89E3FB84, 0x89E4FB84, + 0x89E5FB84, 0x89E6FB84, 0x89E7FB84, 0x89E8FB84, 0x89E9FB84, 0x89EAFB84, 0x89EBFB84, 0x89ECFB84, 0x89EDFB84, 0x89EEFB84, 0x89EFFB84, 0x89F0FB84, 0x89F1FB84, 0x89F2FB84, 0x89F3FB84, + 0x89F4FB84, 0x89F5FB84, 0x89F6FB84, 0x89F7FB84, 0x89F8FB84, 0x89F9FB84, 0x89FAFB84, 0x89FBFB84, 0x89FCFB84, 0x89FDFB84, 0x89FEFB84, 0x89FFFB84, 0x8A00FB84, 0x8A01FB84, 0x8A02FB84, + 0x8A03FB84, 0x8A04FB84, 0x8A05FB84, 0x8A06FB84, 0x8A07FB84, 0x8A08FB84, 0x8A09FB84, 0x8A0AFB84, 0x8A0BFB84, 0x8A0CFB84, 0x8A0DFB84, 0x8A0EFB84, 0x8A0FFB84, 0x8A10FB84, 0x8A11FB84, + 0x8A12FB84, 0x8A13FB84, 0x8A14FB84, 0x8A15FB84, 0x8A16FB84, 0x8A17FB84, 0x8A18FB84, 0x8A19FB84, 0x8A1AFB84, 0x8A1BFB84, 0x8A1CFB84, 0x8A1DFB84, 0x8A1EFB84, 0x8A1FFB84, 0x8A20FB84, + 0x8A21FB84, 0x8A22FB84, 0x8A23FB84, 0x8A24FB84, 0x8A25FB84, 0x8A26FB84, 0x8A27FB84, 0x8A28FB84, 0x8A29FB84, 0x8A2AFB84, 0x8A2BFB84, 0x8A2CFB84, 0x8A2DFB84, 0x8A2EFB84, 0x8A2FFB84, + 0x8A30FB84, 0x8A31FB84, 0x8A32FB84, 0x8A33FB84, 0x8A34FB84, 0x8A35FB84, 0x8A36FB84, 0x8A37FB84, 0x8A38FB84, 0x8A39FB84, 0x8A3AFB84, 0x8A3BFB84, 0x8A3CFB84, 0x8A3DFB84, 0x8A3EFB84, + 0x8A3FFB84, 0x8A40FB84, 0x8A41FB84, 0x8A42FB84, 0x8A43FB84, 0x8A44FB84, 0x8A45FB84, 0x8A46FB84, 0x8A47FB84, 0x8A48FB84, 0x8A49FB84, 0x8A4AFB84, 0x8A4BFB84, 0x8A4CFB84, 0x8A4DFB84, + 0x8A4EFB84, 0x8A4FFB84, 0x8A50FB84, 0x8A51FB84, 0x8A52FB84, 0x8A53FB84, 0x8A54FB84, 0x8A55FB84, 0x8A56FB84, 0x8A57FB84, 0x8A58FB84, 0x8A59FB84, 0x8A5AFB84, 0x8A5BFB84, 0x8A5CFB84, + 0x8A5DFB84, 0x8A5EFB84, 0x8A5FFB84, 0x8A60FB84, 0x8A61FB84, 0x8A62FB84, 0x8A63FB84, 0x8A64FB84, 0x8A65FB84, 0x8A66FB84, 0x8A67FB84, 0x8A68FB84, 0x8A69FB84, 0x8A6AFB84, 0x8A6BFB84, + 0x8A6CFB84, 0x8A6DFB84, 0x8A6EFB84, 0x8A6FFB84, 0x8A70FB84, 0x8A71FB84, 0x8A72FB84, 0x8A73FB84, 0x8A74FB84, 0x8A75FB84, 0x8A76FB84, 0x8A77FB84, 0x8A78FB84, 0x8A79FB84, 0x8A7AFB84, + 0x8A7BFB84, 0x8A7CFB84, 0x8A7DFB84, 0x8A7EFB84, 0x8A7FFB84, 0x8A80FB84, 0x8A81FB84, 0x8A82FB84, 0x8A83FB84, 0x8A84FB84, 0x8A85FB84, 0x8A86FB84, 0x8A87FB84, 0x8A88FB84, 0x8A89FB84, + 0x8A8AFB84, 0x8A8BFB84, 0x8A8CFB84, 0x8A8DFB84, 0x8A8EFB84, 0x8A8FFB84, 0x8A90FB84, 0x8A91FB84, 0x8A92FB84, 0x8A93FB84, 0x8A94FB84, 0x8A95FB84, 0x8A96FB84, 0x8A97FB84, 0x8A98FB84, + 0x8A99FB84, 0x8A9AFB84, 0x8A9BFB84, 0x8A9CFB84, 0x8A9DFB84, 0x8A9EFB84, 0x8A9FFB84, 0x8AA0FB84, 0x8AA1FB84, 0x8AA2FB84, 0x8AA3FB84, 0x8AA4FB84, 0x8AA5FB84, 0x8AA6FB84, 0x8AA7FB84, + 0x8AA8FB84, 0x8AA9FB84, 0x8AAAFB84, 0x8AABFB84, 0x8AACFB84, 0x8AADFB84, 0x8AAEFB84, 0x8AAFFB84, 0x8AB0FB84, 0x8AB1FB84, 0x8AB2FB84, 0x8AB3FB84, 0x8AB4FB84, 0x8AB5FB84, 0x8AB6FB84, + 0x8AB7FB84, 0x8AB8FB84, 0x8AB9FB84, 0x8ABAFB84, 0x8ABBFB84, 0x8ABCFB84, 0x8ABDFB84, 0x8ABEFB84, 0x8ABFFB84, 0x8AC0FB84, 0x8AC1FB84, 0x8AC2FB84, 0x8AC3FB84, 0x8AC4FB84, 0x8AC5FB84, + 0x8AC6FB84, 0x8AC7FB84, 0x8AC8FB84, 0x8AC9FB84, 0x8ACAFB84, 0x8ACBFB84, 0x8ACCFB84, 0x8ACDFB84, 0x8ACEFB84, 0x8ACFFB84, 0x8AD0FB84, 0x8AD1FB84, 0x8AD2FB84, 0x8AD3FB84, 0x8AD4FB84, + 0x8AD5FB84, 0x8AD6FB84, 0x8AD7FB84, 0x8AD8FB84, 0x8AD9FB84, 0x8ADAFB84, 0x8ADBFB84, 0x8ADCFB84, 0x8ADDFB84, 0x8ADEFB84, 0x8ADFFB84, 0x8AE0FB84, 0x8AE1FB84, 0x8AE2FB84, 0x8AE3FB84, + 0x8AE4FB84, 0x8AE5FB84, 0x8AE6FB84, 0x8AE7FB84, 0x8AE8FB84, 0x8AE9FB84, 0x8AEAFB84, 0x8AEBFB84, 0x8AECFB84, 0x8AEDFB84, 0x8AEEFB84, 0x8AEFFB84, 0x8AF0FB84, 0x8AF1FB84, 0x8AF2FB84, + 0x8AF3FB84, 0x8AF4FB84, 0x8AF5FB84, 0x8AF6FB84, 0x8AF7FB84, 0x8AF8FB84, 0x8AF9FB84, 0x8AFAFB84, 0x8AFBFB84, 0x8AFCFB84, 0x8AFDFB84, 0x8AFEFB84, 0x8AFFFB84, 0x8B00FB84, 0x8B01FB84, + 0x8B02FB84, 0x8B03FB84, 0x8B04FB84, 0x8B05FB84, 0x8B06FB84, 0x8B07FB84, 0x8B08FB84, 0x8B09FB84, 0x8B0AFB84, 0x8B0BFB84, 0x8B0CFB84, 0x8B0DFB84, 0x8B0EFB84, 0x8B0FFB84, 0x8B10FB84, + 0x8B11FB84, 0x8B12FB84, 0x8B13FB84, 0x8B14FB84, 0x8B15FB84, 0x8B16FB84, 0x8B17FB84, 0x8B18FB84, 0x8B19FB84, 0x8B1AFB84, 0x8B1BFB84, 0x8B1CFB84, 0x8B1DFB84, 0x8B1EFB84, 0x8B1FFB84, + 0x8B20FB84, 0x8B21FB84, 0x8B22FB84, 0x8B23FB84, 0x8B24FB84, 0x8B25FB84, 0x8B26FB84, 0x8B27FB84, 0x8B28FB84, 0x8B29FB84, 0x8B2AFB84, 0x8B2BFB84, 0x8B2CFB84, 0x8B2DFB84, 0x8B2EFB84, + 0x8B2FFB84, 0x8B30FB84, 0x8B31FB84, 0x8B32FB84, 0x8B33FB84, 0x8B34FB84, 0x8B35FB84, 0x8B36FB84, 0x8B37FB84, 0x8B38FB84, 0x8B39FB84, 0x8B3AFB84, 0x8B3BFB84, 0x8B3CFB84, 0x8B3DFB84, + 0x8B3EFB84, 0x8B3FFB84, 0x8B40FB84, 0x8B41FB84, 0x8B42FB84, 0x8B43FB84, 0x8B44FB84, 0x8B45FB84, 0x8B46FB84, 0x8B47FB84, 0x8B48FB84, 0x8B49FB84, 0x8B4AFB84, 0x8B4BFB84, 0x8B4CFB84, + 0x8B4DFB84, 0x8B4EFB84, 0x8B4FFB84, 0x8B50FB84, 0x8B51FB84, 0x8B52FB84, 0x8B53FB84, 0x8B54FB84, 0x8B55FB84, 0x8B56FB84, 0x8B57FB84, 0x8B58FB84, 0x8B59FB84, 0x8B5AFB84, 0x8B5BFB84, + 0x8B5CFB84, 0x8B5DFB84, 0x8B5EFB84, 0x8B5FFB84, 0x8B60FB84, 0x8B61FB84, 0x8B62FB84, 0x8B63FB84, 0x8B64FB84, 0x8B65FB84, 0x8B66FB84, 0x8B67FB84, 0x8B68FB84, 0x8B69FB84, 0x8B6AFB84, + 0x8B6BFB84, 0x8B6CFB84, 0x8B6DFB84, 0x8B6EFB84, 0x8B6FFB84, 0x8B70FB84, 0x8B71FB84, 0x8B72FB84, 0x8B73FB84, 0x8B74FB84, 0x8B75FB84, 0x8B76FB84, 0x8B77FB84, 0x8B78FB84, 0x8B79FB84, + 0x8B7AFB84, 0x8B7BFB84, 0x8B7CFB84, 0x8B7DFB84, 0x8B7EFB84, 0x8B7FFB84, 0x8B80FB84, 0x8B81FB84, 0x8B82FB84, 0x8B83FB84, 0x8B84FB84, 0x8B85FB84, 0x8B86FB84, 0x8B87FB84, 0x8B88FB84, + 0x8B89FB84, 0x8B8AFB84, 0x8B8BFB84, 0x8B8CFB84, 0x8B8DFB84, 0x8B8EFB84, 0x8B8FFB84, 0x8B90FB84, 0x8B91FB84, 0x8B92FB84, 0x8B93FB84, 0x8B94FB84, 0x8B95FB84, 0x8B96FB84, 0x8B97FB84, + 0x8B98FB84, 0x8B99FB84, 0x8B9AFB84, 0x8B9BFB84, 0x8B9CFB84, 0x8B9DFB84, 0x8B9EFB84, 0x8B9FFB84, 0x8BA0FB84, 0x8BA1FB84, 0x8BA2FB84, 0x8BA3FB84, 0x8BA4FB84, 0x8BA5FB84, 0x8BA6FB84, + 0x8BA7FB84, 0x8BA8FB84, 0x8BA9FB84, 0x8BAAFB84, 0x8BABFB84, 0x8BACFB84, 0x8BADFB84, 0x8BAEFB84, 0x8BAFFB84, 0x8BB0FB84, 0x8BB1FB84, 0x8BB2FB84, 0x8BB3FB84, 0x8BB4FB84, 0x8BB5FB84, + 0x8BB6FB84, 0x8BB7FB84, 0x8BB8FB84, 0x8BB9FB84, 0x8BBAFB84, 0x8BBBFB84, 0x8BBCFB84, 0x8BBDFB84, 0x8BBEFB84, 0x8BBFFB84, 0x8BC0FB84, 0x8BC1FB84, 0x8BC2FB84, 0x8BC3FB84, 0x8BC4FB84, + 0x8BC5FB84, 0x8BC6FB84, 0x8BC7FB84, 0x8BC8FB84, 0x8BC9FB84, 0x8BCAFB84, 0x8BCBFB84, 0x8BCCFB84, 0x8BCDFB84, 0x8BCEFB84, 0x8BCFFB84, 0x8BD0FB84, 0x8BD1FB84, 0x8BD2FB84, 0x8BD3FB84, + 0x8BD4FB84, 0x8BD5FB84, 0x8BD6FB84, 0x8BD7FB84, 0x8BD8FB84, 0x8BD9FB84, 0x8BDAFB84, 0x8BDBFB84, 0x8BDCFB84, 0x8BDDFB84, 0x8BDEFB84, 0x8BDFFB84, 0x8BE0FB84, 0x8BE1FB84, 0x8BE2FB84, + 0x8BE3FB84, 0x8BE4FB84, 0x8BE5FB84, 0x8BE6FB84, 0x8BE7FB84, 0x8BE8FB84, 0x8BE9FB84, 0x8BEAFB84, 0x8BEBFB84, 0x8BECFB84, 0x8BEDFB84, 0x8BEEFB84, 0x8BEFFB84, 0x8BF0FB84, 0x8BF1FB84, + 0x8BF2FB84, 0x8BF3FB84, 0x8BF4FB84, 0x8BF5FB84, 0x8BF6FB84, 0x8BF7FB84, 0x8BF8FB84, 0x8BF9FB84, 0x8BFAFB84, 0x8BFBFB84, 0x8BFCFB84, 0x8BFDFB84, 0x8BFEFB84, 0x8BFFFB84, 0x8C00FB84, + 0x8C01FB84, 0x8C02FB84, 0x8C03FB84, 0x8C04FB84, 0x8C05FB84, 0x8C06FB84, 0x8C07FB84, 0x8C08FB84, 0x8C09FB84, 0x8C0AFB84, 0x8C0BFB84, 0x8C0CFB84, 0x8C0DFB84, 0x8C0EFB84, 0x8C0FFB84, + 0x8C10FB84, 0x8C11FB84, 0x8C12FB84, 0x8C13FB84, 0x8C14FB84, 0x8C15FB84, 0x8C16FB84, 0x8C17FB84, 0x8C18FB84, 0x8C19FB84, 0x8C1AFB84, 0x8C1BFB84, 0x8C1CFB84, 0x8C1DFB84, 0x8C1EFB84, + 0x8C1FFB84, 0x8C20FB84, 0x8C21FB84, 0x8C22FB84, 0x8C23FB84, 0x8C24FB84, 0x8C25FB84, 0x8C26FB84, 0x8C27FB84, 0x8C28FB84, 0x8C29FB84, 0x8C2AFB84, 0x8C2BFB84, 0x8C2CFB84, 0x8C2DFB84, + 0x8C2EFB84, 0x8C2FFB84, 0x8C30FB84, 0x8C31FB84, 0x8C32FB84, 0x8C33FB84, 0x8C34FB84, 0x8C35FB84, 0x8C36FB84, 0x8C37FB84, 0x8C38FB84, 0x8C39FB84, 0x8C3AFB84, 0x8C3BFB84, 0x8C3CFB84, + 0x8C3DFB84, 0x8C3EFB84, 0x8C3FFB84, 0x8C40FB84, 0x8C41FB84, 0x8C42FB84, 0x8C43FB84, 0x8C44FB84, 0x8C45FB84, 0x8C46FB84, 0x8C47FB84, 0x8C48FB84, 0x8C49FB84, 0x8C4AFB84, 0x8C4BFB84, + 0x8C4CFB84, 0x8C4DFB84, 0x8C4EFB84, 0x8C4FFB84, 0x8C50FB84, 0x8C51FB84, 0x8C52FB84, 0x8C53FB84, 0x8C54FB84, 0x8C55FB84, 0x8C56FB84, 0x8C57FB84, 0x8C58FB84, 0x8C59FB84, 0x8C5AFB84, + 0x8C5BFB84, 0x8C5CFB84, 0x8C5DFB84, 0x8C5EFB84, 0x8C5FFB84, 0x8C60FB84, 0x8C61FB84, 0x8C62FB84, 0x8C63FB84, 0x8C64FB84, 0x8C65FB84, 0x8C66FB84, 0x8C67FB84, 0x8C68FB84, 0x8C69FB84, + 0x8C6AFB84, 0x8C6BFB84, 0x8C6CFB84, 0x8C6DFB84, 0x8C6EFB84, 0x8C6FFB84, 0x8C70FB84, 0x8C71FB84, 0x8C72FB84, 0x8C73FB84, 0x8C74FB84, 0x8C75FB84, 0x8C76FB84, 0x8C77FB84, 0x8C78FB84, + 0x8C79FB84, 0x8C7AFB84, 0x8C7BFB84, 0x8C7CFB84, 0x8C7DFB84, 0x8C7EFB84, 0x8C7FFB84, 0x8C80FB84, 0x8C81FB84, 0x8C82FB84, 0x8C83FB84, 0x8C84FB84, 0x8C85FB84, 0x8C86FB84, 0x8C87FB84, + 0x8C88FB84, 0x8C89FB84, 0x8C8AFB84, 0x8C8BFB84, 0x8C8CFB84, 0x8C8DFB84, 0x8C8EFB84, 0x8C8FFB84, 0x8C90FB84, 0x8C91FB84, 0x8C92FB84, 0x8C93FB84, 0x8C94FB84, 0x8C95FB84, 0x8C96FB84, + 0x8C97FB84, 0x8C98FB84, 0x8C99FB84, 0x8C9AFB84, 0x8C9BFB84, 0x8C9CFB84, 0x8C9DFB84, 0x8C9EFB84, 0x8C9FFB84, 0x8CA0FB84, 0x8CA1FB84, 0x8CA2FB84, 0x8CA3FB84, 0x8CA4FB84, 0x8CA5FB84, + 0x8CA6FB84, 0x8CA7FB84, 0x8CA8FB84, 0x8CA9FB84, 0x8CAAFB84, 0x8CABFB84, 0x8CACFB84, 0x8CADFB84, 0x8CAEFB84, 0x8CAFFB84, 0x8CB0FB84, 0x8CB1FB84, 0x8CB2FB84, 0x8CB3FB84, 0x8CB4FB84, + 0x8CB5FB84, 0x8CB6FB84, 0x8CB7FB84, 0x8CB8FB84, 0x8CB9FB84, 0x8CBAFB84, 0x8CBBFB84, 0x8CBCFB84, 0x8CBDFB84, 0x8CBEFB84, 0x8CBFFB84, 0x8CC0FB84, 0x8CC1FB84, 0x8CC2FB84, 0x8CC3FB84, + 0x8CC4FB84, 0x8CC5FB84, 0x8CC6FB84, 0x8CC7FB84, 0x8CC8FB84, 0x8CC9FB84, 0x8CCAFB84, 0x8CCBFB84, 0x8CCCFB84, 0x8CCDFB84, 0x8CCEFB84, 0x8CCFFB84, 0x8CD0FB84, 0x8CD1FB84, 0x8CD2FB84, + 0x8CD3FB84, 0x8CD4FB84, 0x8CD5FB84, 0x8CD6FB84, 0x8CD7FB84, 0x8CD8FB84, 0x8CD9FB84, 0x8CDAFB84, 0x8CDBFB84, 0x8CDCFB84, 0x8CDDFB84, 0x8CDEFB84, 0x8CDFFB84, 0x8CE0FB84, 0x8CE1FB84, + 0x8CE2FB84, 0x8CE3FB84, 0x8CE4FB84, 0x8CE5FB84, 0x8CE6FB84, 0x8CE7FB84, 0x8CE8FB84, 0x8CE9FB84, 0x8CEAFB84, 0x8CEBFB84, 0x8CECFB84, 0x8CEDFB84, 0x8CEEFB84, 0x8CEFFB84, 0x8CF0FB84, + 0x8CF1FB84, 0x8CF2FB84, 0x8CF3FB84, 0x8CF4FB84, 0x8CF5FB84, 0x8CF6FB84, 0x8CF7FB84, 0x8CF8FB84, 0x8CF9FB84, 0x8CFAFB84, 0x8CFBFB84, 0x8CFCFB84, 0x8CFDFB84, 0x8CFEFB84, 0x8CFFFB84, + 0x8D00FB84, 0x8D01FB84, 0x8D02FB84, 0x8D03FB84, 0x8D04FB84, 0x8D05FB84, 0x8D06FB84, 0x8D07FB84, 0x8D08FB84, 0x8D09FB84, 0x8D0AFB84, 0x8D0BFB84, 0x8D0CFB84, 0x8D0DFB84, 0x8D0EFB84, + 0x8D0FFB84, 0x8D10FB84, 0x8D11FB84, 0x8D12FB84, 0x8D13FB84, 0x8D14FB84, 0x8D15FB84, 0x8D16FB84, 0x8D17FB84, 0x8D18FB84, 0x8D19FB84, 0x8D1AFB84, 0x8D1BFB84, 0x8D1CFB84, 0x8D1DFB84, + 0x8D1EFB84, 0x8D1FFB84, 0x8D20FB84, 0x8D21FB84, 0x8D22FB84, 0x8D23FB84, 0x8D24FB84, 0x8D25FB84, 0x8D26FB84, 0x8D27FB84, 0x8D28FB84, 0x8D29FB84, 0x8D2AFB84, 0x8D2BFB84, 0x8D2CFB84, + 0x8D2DFB84, 0x8D2EFB84, 0x8D2FFB84, 0x8D30FB84, 0x8D31FB84, 0x8D32FB84, 0x8D33FB84, 0x8D34FB84, 0x8D35FB84, 0x8D36FB84, 0x8D37FB84, 0x8D38FB84, 0x8D39FB84, 0x8D3AFB84, 0x8D3BFB84, + 0x8D3CFB84, 0x8D3DFB84, 0x8D3EFB84, 0x8D3FFB84, 0x8D40FB84, 0x8D41FB84, 0x8D42FB84, 0x8D43FB84, 0x8D44FB84, 0x8D45FB84, 0x8D46FB84, 0x8D47FB84, 0x8D48FB84, 0x8D49FB84, 0x8D4AFB84, + 0x8D4BFB84, 0x8D4CFB84, 0x8D4DFB84, 0x8D4EFB84, 0x8D4FFB84, 0x8D50FB84, 0x8D51FB84, 0x8D52FB84, 0x8D53FB84, 0x8D54FB84, 0x8D55FB84, 0x8D56FB84, 0x8D57FB84, 0x8D58FB84, 0x8D59FB84, + 0x8D5AFB84, 0x8D5BFB84, 0x8D5CFB84, 0x8D5DFB84, 0x8D5EFB84, 0x8D5FFB84, 0x8D60FB84, 0x8D61FB84, 0x8D62FB84, 0x8D63FB84, 0x8D64FB84, 0x8D65FB84, 0x8D66FB84, 0x8D67FB84, 0x8D68FB84, + 0x8D69FB84, 0x8D6AFB84, 0x8D6BFB84, 0x8D6CFB84, 0x8D6DFB84, 0x8D6EFB84, 0x8D6FFB84, 0x8D70FB84, 0x8D71FB84, 0x8D72FB84, 0x8D73FB84, 0x8D74FB84, 0x8D75FB84, 0x8D76FB84, 0x8D77FB84, + 0x8D78FB84, 0x8D79FB84, 0x8D7AFB84, 0x8D7BFB84, 0x8D7CFB84, 0x8D7DFB84, 0x8D7EFB84, 0x8D7FFB84, 0x8D80FB84, 0x8D81FB84, 0x8D82FB84, 0x8D83FB84, 0x8D84FB84, 0x8D85FB84, 0x8D86FB84, + 0x8D87FB84, 0x8D88FB84, 0x8D89FB84, 0x8D8AFB84, 0x8D8BFB84, 0x8D8CFB84, 0x8D8DFB84, 0x8D8EFB84, 0x8D8FFB84, 0x8D90FB84, 0x8D91FB84, 0x8D92FB84, 0x8D93FB84, 0x8D94FB84, 0x8D95FB84, + 0x8D96FB84, 0x8D97FB84, 0x8D98FB84, 0x8D99FB84, 0x8D9AFB84, 0x8D9BFB84, 0x8D9CFB84, 0x8D9DFB84, 0x8D9EFB84, 0x8D9FFB84, 0x8DA0FB84, 0x8DA1FB84, 0x8DA2FB84, 0x8DA3FB84, 0x8DA4FB84, + 0x8DA5FB84, 0x8DA6FB84, 0x8DA7FB84, 0x8DA8FB84, 0x8DA9FB84, 0x8DAAFB84, 0x8DABFB84, 0x8DACFB84, 0x8DADFB84, 0x8DAEFB84, 0x8DAFFB84, 0x8DB0FB84, 0x8DB1FB84, 0x8DB2FB84, 0x8DB3FB84, + 0x8DB4FB84, 0x8DB5FB84, 0x8DB6FB84, 0x8DB7FB84, 0x8DB8FB84, 0x8DB9FB84, 0x8DBAFB84, 0x8DBBFB84, 0x8DBCFB84, 0x8DBDFB84, 0x8DBEFB84, 0x8DBFFB84, 0x8DC0FB84, 0x8DC1FB84, 0x8DC2FB84, + 0x8DC3FB84, 0x8DC4FB84, 0x8DC5FB84, 0x8DC6FB84, 0x8DC7FB84, 0x8DC8FB84, 0x8DC9FB84, 0x8DCAFB84, 0x8DCBFB84, 0x8DCCFB84, 0x8DCDFB84, 0x8DCEFB84, 0x8DCFFB84, 0x8DD0FB84, 0x8DD1FB84, + 0x8DD2FB84, 0x8DD3FB84, 0x8DD4FB84, 0x8DD5FB84, 0x8DD6FB84, 0x8DD7FB84, 0x8DD8FB84, 0x8DD9FB84, 0x8DDAFB84, 0x8DDBFB84, 0x8DDCFB84, 0x8DDDFB84, 0x8DDEFB84, 0x8DDFFB84, 0x8DE0FB84, + 0x8DE1FB84, 0x8DE2FB84, 0x8DE3FB84, 0x8DE4FB84, 0x8DE5FB84, 0x8DE6FB84, 0x8DE7FB84, 0x8DE8FB84, 0x8DE9FB84, 0x8DEAFB84, 0x8DEBFB84, 0x8DECFB84, 0x8DEDFB84, 0x8DEEFB84, 0x8DEFFB84, + 0x8DF0FB84, 0x8DF1FB84, 0x8DF2FB84, 0x8DF3FB84, 0x8DF4FB84, 0x8DF5FB84, 0x8DF6FB84, 0x8DF7FB84, 0x8DF8FB84, 0x8DF9FB84, 0x8DFAFB84, 0x8DFBFB84, 0x8DFCFB84, 0x8DFDFB84, 0x8DFEFB84, + 0x8DFFFB84, 0x8E00FB84, 0x8E01FB84, 0x8E02FB84, 0x8E03FB84, 0x8E04FB84, 0x8E05FB84, 0x8E06FB84, 0x8E07FB84, 0x8E08FB84, 0x8E09FB84, 0x8E0AFB84, 0x8E0BFB84, 0x8E0CFB84, 0x8E0DFB84, + 0x8E0EFB84, 0x8E0FFB84, 0x8E10FB84, 0x8E11FB84, 0x8E12FB84, 0x8E13FB84, 0x8E14FB84, 0x8E15FB84, 0x8E16FB84, 0x8E17FB84, 0x8E18FB84, 0x8E19FB84, 0x8E1AFB84, 0x8E1BFB84, 0x8E1CFB84, + 0x8E1DFB84, 0x8E1EFB84, 0x8E1FFB84, 0x8E20FB84, 0x8E21FB84, 0x8E22FB84, 0x8E23FB84, 0x8E24FB84, 0x8E25FB84, 0x8E26FB84, 0x8E27FB84, 0x8E28FB84, 0x8E29FB84, 0x8E2AFB84, 0x8E2BFB84, + 0x8E2CFB84, 0x8E2DFB84, 0x8E2EFB84, 0x8E2FFB84, 0x8E30FB84, 0x8E31FB84, 0x8E32FB84, 0x8E33FB84, 0x8E34FB84, 0x8E35FB84, 0x8E36FB84, 0x8E37FB84, 0x8E38FB84, 0x8E39FB84, 0x8E3AFB84, + 0x8E3BFB84, 0x8E3CFB84, 0x8E3DFB84, 0x8E3EFB84, 0x8E3FFB84, 0x8E40FB84, 0x8E41FB84, 0x8E42FB84, 0x8E43FB84, 0x8E44FB84, 0x8E45FB84, 0x8E46FB84, 0x8E47FB84, 0x8E48FB84, 0x8E49FB84, + 0x8E4AFB84, 0x8E4BFB84, 0x8E4CFB84, 0x8E4DFB84, 0x8E4EFB84, 0x8E4FFB84, 0x8E50FB84, 0x8E51FB84, 0x8E52FB84, 0x8E53FB84, 0x8E54FB84, 0x8E55FB84, 0x8E56FB84, 0x8E57FB84, 0x8E58FB84, + 0x8E59FB84, 0x8E5AFB84, 0x8E5BFB84, 0x8E5CFB84, 0x8E5DFB84, 0x8E5EFB84, 0x8E5FFB84, 0x8E60FB84, 0x8E61FB84, 0x8E62FB84, 0x8E63FB84, 0x8E64FB84, 0x8E65FB84, 0x8E66FB84, 0x8E67FB84, + 0x8E68FB84, 0x8E69FB84, 0x8E6AFB84, 0x8E6BFB84, 0x8E6CFB84, 0x8E6DFB84, 0x8E6EFB84, 0x8E6FFB84, 0x8E70FB84, 0x8E71FB84, 0x8E72FB84, 0x8E73FB84, 0x8E74FB84, 0x8E75FB84, 0x8E76FB84, + 0x8E77FB84, 0x8E78FB84, 0x8E79FB84, 0x8E7AFB84, 0x8E7BFB84, 0x8E7CFB84, 0x8E7DFB84, 0x8E7EFB84, 0x8E7FFB84, 0x8E80FB84, 0x8E81FB84, 0x8E82FB84, 0x8E83FB84, 0x8E84FB84, 0x8E85FB84, + 0x8E86FB84, 0x8E87FB84, 0x8E88FB84, 0x8E89FB84, 0x8E8AFB84, 0x8E8BFB84, 0x8E8CFB84, 0x8E8DFB84, 0x8E8EFB84, 0x8E8FFB84, 0x8E90FB84, 0x8E91FB84, 0x8E92FB84, 0x8E93FB84, 0x8E94FB84, + 0x8E95FB84, 0x8E96FB84, 0x8E97FB84, 0x8E98FB84, 0x8E99FB84, 0x8E9AFB84, 0x8E9BFB84, 0x8E9CFB84, 0x8E9DFB84, 0x8E9EFB84, 0x8E9FFB84, 0x8EA0FB84, 0x8EA1FB84, 0x8EA2FB84, 0x8EA3FB84, + 0x8EA4FB84, 0x8EA5FB84, 0x8EA6FB84, 0x8EA7FB84, 0x8EA8FB84, 0x8EA9FB84, 0x8EAAFB84, 0x8EABFB84, 0x8EACFB84, 0x8EADFB84, 0x8EAEFB84, 0x8EAFFB84, 0x8EB0FB84, 0x8EB1FB84, 0x8EB2FB84, + 0x8EB3FB84, 0x8EB4FB84, 0x8EB5FB84, 0x8EB6FB84, 0x8EB7FB84, 0x8EB8FB84, 0x8EB9FB84, 0x8EBAFB84, 0x8EBBFB84, 0x8EBCFB84, 0x8EBDFB84, 0x8EBEFB84, 0x8EBFFB84, 0x8EC0FB84, 0x8EC1FB84, + 0x8EC2FB84, 0x8EC3FB84, 0x8EC4FB84, 0x8EC5FB84, 0x8EC6FB84, 0x8EC7FB84, 0x8EC8FB84, 0x8EC9FB84, 0x8ECAFB84, 0x8ECBFB84, 0x8ECCFB84, 0x8ECDFB84, 0x8ECEFB84, 0x8ECFFB84, 0x8ED0FB84, + 0x8ED1FB84, 0x8ED2FB84, 0x8ED3FB84, 0x8ED4FB84, 0x8ED5FB84, 0x8ED6FB84, 0x8ED7FB84, 0x8ED8FB84, 0x8ED9FB84, 0x8EDAFB84, 0x8EDBFB84, 0x8EDCFB84, 0x8EDDFB84, 0x8EDEFB84, 0x8EDFFB84, + 0x8EE0FB84, 0x8EE1FB84, 0x8EE2FB84, 0x8EE3FB84, 0x8EE4FB84, 0x8EE5FB84, 0x8EE6FB84, 0x8EE7FB84, 0x8EE8FB84, 0x8EE9FB84, 0x8EEAFB84, 0x8EEBFB84, 0x8EECFB84, 0x8EEDFB84, 0x8EEEFB84, + 0x8EEFFB84, 0x8EF0FB84, 0x8EF1FB84, 0x8EF2FB84, 0x8EF3FB84, 0x8EF4FB84, 0x8EF5FB84, 0x8EF6FB84, 0x8EF7FB84, 0x8EF8FB84, 0x8EF9FB84, 0x8EFAFB84, 0x8EFBFB84, 0x8EFCFB84, 0x8EFDFB84, + 0x8EFEFB84, 0x8EFFFB84, 0x8F00FB84, 0x8F01FB84, 0x8F02FB84, 0x8F03FB84, 0x8F04FB84, 0x8F05FB84, 0x8F06FB84, 0x8F07FB84, 0x8F08FB84, 0x8F09FB84, 0x8F0AFB84, 0x8F0BFB84, 0x8F0CFB84, + 0x8F0DFB84, 0x8F0EFB84, 0x8F0FFB84, 0x8F10FB84, 0x8F11FB84, 0x8F12FB84, 0x8F13FB84, 0x8F14FB84, 0x8F15FB84, 0x8F16FB84, 0x8F17FB84, 0x8F18FB84, 0x8F19FB84, 0x8F1AFB84, 0x8F1BFB84, + 0x8F1CFB84, 0x8F1DFB84, 0x8F1EFB84, 0x8F1FFB84, 0x8F20FB84, 0x8F21FB84, 0x8F22FB84, 0x8F23FB84, 0x8F24FB84, 0x8F25FB84, 0x8F26FB84, 0x8F27FB84, 0x8F28FB84, 0x8F29FB84, 0x8F2AFB84, + 0x8F2BFB84, 0x8F2CFB84, 0x8F2DFB84, 0x8F2EFB84, 0x8F2FFB84, 0x8F30FB84, 0x8F31FB84, 0x8F32FB84, 0x8F33FB84, 0x8F34FB84, 0x8F35FB84, 0x8F36FB84, 0x8F37FB84, 0x8F38FB84, 0x8F39FB84, + 0x8F3AFB84, 0x8F3BFB84, 0x8F3CFB84, 0x8F3DFB84, 0x8F3EFB84, 0x8F3FFB84, 0x8F40FB84, 0x8F41FB84, 0x8F42FB84, 0x8F43FB84, 0x8F44FB84, 0x8F45FB84, 0x8F46FB84, 0x8F47FB84, 0x8F48FB84, + 0x8F49FB84, 0x8F4AFB84, 0x8F4BFB84, 0x8F4CFB84, 0x8F4DFB84, 0x8F4EFB84, 0x8F4FFB84, 0x8F50FB84, 0x8F51FB84, 0x8F52FB84, 0x8F53FB84, 0x8F54FB84, 0x8F55FB84, 0x8F56FB84, 0x8F57FB84, + 0x8F58FB84, 0x8F59FB84, 0x8F5AFB84, 0x8F5BFB84, 0x8F5CFB84, 0x8F5DFB84, 0x8F5EFB84, 0x8F5FFB84, 0x8F60FB84, 0x8F61FB84, 0x8F62FB84, 0x8F63FB84, 0x8F64FB84, 0x8F65FB84, 0x8F66FB84, + 0x8F67FB84, 0x8F68FB84, 0x8F69FB84, 0x8F6AFB84, 0x8F6BFB84, 0x8F6CFB84, 0x8F6DFB84, 0x8F6EFB84, 0x8F6FFB84, 0x8F70FB84, 0x8F71FB84, 0x8F72FB84, 0x8F73FB84, 0x8F74FB84, 0x8F75FB84, + 0x8F76FB84, 0x8F77FB84, 0x8F78FB84, 0x8F79FB84, 0x8F7AFB84, 0x8F7BFB84, 0x8F7CFB84, 0x8F7DFB84, 0x8F7EFB84, 0x8F7FFB84, 0x8F80FB84, 0x8F81FB84, 0x8F82FB84, 0x8F83FB84, 0x8F84FB84, + 0x8F85FB84, 0x8F86FB84, 0x8F87FB84, 0x8F88FB84, 0x8F89FB84, 0x8F8AFB84, 0x8F8BFB84, 0x8F8CFB84, 0x8F8DFB84, 0x8F8EFB84, 0x8F8FFB84, 0x8F90FB84, 0x8F91FB84, 0x8F92FB84, 0x8F93FB84, + 0x8F94FB84, 0x8F95FB84, 0x8F96FB84, 0x8F97FB84, 0x8F98FB84, 0x8F99FB84, 0x8F9AFB84, 0x8F9BFB84, 0x8F9CFB84, 0x8F9DFB84, 0x8F9EFB84, 0x8F9FFB84, 0x8FA0FB84, 0x8FA1FB84, 0x8FA2FB84, + 0x8FA3FB84, 0x8FA4FB84, 0x8FA5FB84, 0x8FA6FB84, 0x8FA7FB84, 0x8FA8FB84, 0x8FA9FB84, 0x8FAAFB84, 0x8FABFB84, 0x8FACFB84, 0x8FADFB84, 0x8FAEFB84, 0x8FAFFB84, 0x8FB0FB84, 0x8FB1FB84, + 0x8FB2FB84, 0x8FB3FB84, 0x8FB4FB84, 0x8FB5FB84, 0x8FB6FB84, 0x8FB7FB84, 0x8FB8FB84, 0x8FB9FB84, 0x8FBAFB84, 0x8FBBFB84, 0x8FBCFB84, 0x8FBDFB84, 0x8FBEFB84, 0x8FBFFB84, 0x8FC0FB84, + 0x8FC1FB84, 0x8FC2FB84, 0x8FC3FB84, 0x8FC4FB84, 0x8FC5FB84, 0x8FC6FB84, 0x8FC7FB84, 0x8FC8FB84, 0x8FC9FB84, 0x8FCAFB84, 0x8FCBFB84, 0x8FCCFB84, 0x8FCDFB84, 0x8FCEFB84, 0x8FCFFB84, + 0x8FD0FB84, 0x8FD1FB84, 0x8FD2FB84, 0x8FD3FB84, 0x8FD4FB84, 0x8FD5FB84, 0x8FD6FB84, 0x8FD7FB84, 0x8FD8FB84, 0x8FD9FB84, 0x8FDAFB84, 0x8FDBFB84, 0x8FDCFB84, 0x8FDDFB84, 0x8FDEFB84, + 0x8FDFFB84, 0x8FE0FB84, 0x8FE1FB84, 0x8FE2FB84, 0x8FE3FB84, 0x8FE4FB84, 0x8FE5FB84, 0x8FE6FB84, 0x8FE7FB84, 0x8FE8FB84, 0x8FE9FB84, 0x8FEAFB84, 0x8FEBFB84, 0x8FECFB84, 0x8FEDFB84, + 0x8FEEFB84, 0x8FEFFB84, 0x8FF0FB84, 0x8FF1FB84, 0x8FF2FB84, 0x8FF3FB84, 0x8FF4FB84, 0x8FF5FB84, 0x8FF6FB84, 0x8FF7FB84, 0x8FF8FB84, 0x8FF9FB84, 0x8FFAFB84, 0x8FFBFB84, 0x8FFCFB84, + 0x8FFDFB84, 0x8FFEFB84, 0x8FFFFB84, 0x9000FB84, 0x9001FB84, 0x9002FB84, 0x9003FB84, 0x9004FB84, 0x9005FB84, 0x9006FB84, 0x9007FB84, 0x9008FB84, 0x9009FB84, 0x900AFB84, 0x900BFB84, + 0x900CFB84, 0x900DFB84, 0x900EFB84, 0x900FFB84, 0x9010FB84, 0x9011FB84, 0x9012FB84, 0x9013FB84, 0x9014FB84, 0x9015FB84, 0x9016FB84, 0x9017FB84, 0x9018FB84, 0x9019FB84, 0x901AFB84, + 0x901BFB84, 0x901CFB84, 0x901DFB84, 0x901EFB84, 0x901FFB84, 0x9020FB84, 0x9021FB84, 0x9022FB84, 0x9023FB84, 0x9024FB84, 0x9025FB84, 0x9026FB84, 0x9027FB84, 0x9028FB84, 0x9029FB84, + 0x902AFB84, 0x902BFB84, 0x902CFB84, 0x902DFB84, 0x902EFB84, 0x902FFB84, 0x9030FB84, 0x9031FB84, 0x9032FB84, 0x9033FB84, 0x9034FB84, 0x9035FB84, 0x9036FB84, 0x9037FB84, 0x9038FB84, + 0x9039FB84, 0x903AFB84, 0x903BFB84, 0x903CFB84, 0x903DFB84, 0x903EFB84, 0x903FFB84, 0x9040FB84, 0x9041FB84, 0x9042FB84, 0x9043FB84, 0x9044FB84, 0x9045FB84, 0x9046FB84, 0x9047FB84, + 0x9048FB84, 0x9049FB84, 0x904AFB84, 0x904BFB84, 0x904CFB84, 0x904DFB84, 0x904EFB84, 0x904FFB84, 0x9050FB84, 0x9051FB84, 0x9052FB84, 0x9053FB84, 0x9054FB84, 0x9055FB84, 0x9056FB84, + 0x9057FB84, 0x9058FB84, 0x9059FB84, 0x905AFB84, 0x905BFB84, 0x905CFB84, 0x905DFB84, 0x905EFB84, 0x905FFB84, 0x9060FB84, 0x9061FB84, 0x9062FB84, 0x9063FB84, 0x9064FB84, 0x9065FB84, + 0x9066FB84, 0x9067FB84, 0x9068FB84, 0x9069FB84, 0x906AFB84, 0x906BFB84, 0x906CFB84, 0x906DFB84, 0x906EFB84, 0x906FFB84, 0x9070FB84, 0x9071FB84, 0x9072FB84, 0x9073FB84, 0x9074FB84, + 0x9075FB84, 0x9076FB84, 0x9077FB84, 0x9078FB84, 0x9079FB84, 0x907AFB84, 0x907BFB84, 0x907CFB84, 0x907DFB84, 0x907EFB84, 0x907FFB84, 0x9080FB84, 0x9081FB84, 0x9082FB84, 0x9083FB84, + 0x9084FB84, 0x9085FB84, 0x9086FB84, 0x9087FB84, 0x9088FB84, 0x9089FB84, 0x908AFB84, 0x908BFB84, 0x908CFB84, 0x908DFB84, 0x908EFB84, 0x908FFB84, 0x9090FB84, 0x9091FB84, 0x9092FB84, + 0x9093FB84, 0x9094FB84, 0x9095FB84, 0x9096FB84, 0x9097FB84, 0x9098FB84, 0x9099FB84, 0x909AFB84, 0x909BFB84, 0x909CFB84, 0x909DFB84, 0x909EFB84, 0x909FFB84, 0x90A0FB84, 0x90A1FB84, + 0x90A2FB84, 0x90A3FB84, 0x90A4FB84, 0x90A5FB84, 0x90A6FB84, 0x90A7FB84, 0x90A8FB84, 0x90A9FB84, 0x90AAFB84, 0x90ABFB84, 0x90ACFB84, 0x90ADFB84, 0x90AEFB84, 0x90AFFB84, 0x90B0FB84, + 0x90B1FB84, 0x90B2FB84, 0x90B3FB84, 0x90B4FB84, 0x90B5FB84, 0x90B6FB84, 0x90B7FB84, 0x90B8FB84, 0x90B9FB84, 0x90BAFB84, 0x90BBFB84, 0x90BCFB84, 0x90BDFB84, 0x90BEFB84, 0x90BFFB84, + 0x90C0FB84, 0x90C1FB84, 0x90C2FB84, 0x90C3FB84, 0x90C4FB84, 0x90C5FB84, 0x90C6FB84, 0x90C7FB84, 0x90C8FB84, 0x90C9FB84, 0x90CAFB84, 0x90CBFB84, 0x90CCFB84, 0x90CDFB84, 0x90CEFB84, + 0x90CFFB84, 0x90D0FB84, 0x90D1FB84, 0x90D2FB84, 0x90D3FB84, 0x90D4FB84, 0x90D5FB84, 0x90D6FB84, 0x90D7FB84, 0x90D8FB84, 0x90D9FB84, 0x90DAFB84, 0x90DBFB84, 0x90DCFB84, 0x90DDFB84, + 0x90DEFB84, 0x90DFFB84, 0x90E0FB84, 0x90E1FB84, 0x90E2FB84, 0x90E3FB84, 0x90E4FB84, 0x90E5FB84, 0x90E6FB84, 0x90E7FB84, 0x90E8FB84, 0x90E9FB84, 0x90EAFB84, 0x90EBFB84, 0x90ECFB84, + 0x90EDFB84, 0x90EEFB84, 0x90EFFB84, 0x90F0FB84, 0x90F1FB84, 0x90F2FB84, 0x90F3FB84, 0x90F4FB84, 0x90F5FB84, 0x90F6FB84, 0x90F7FB84, 0x90F8FB84, 0x90F9FB84, 0x90FAFB84, 0x90FBFB84, + 0x90FCFB84, 0x90FDFB84, 0x90FEFB84, 0x90FFFB84, 0x9100FB84, 0x9101FB84, 0x9102FB84, 0x9103FB84, 0x9104FB84, 0x9105FB84, 0x9106FB84, 0x9107FB84, 0x9108FB84, 0x9109FB84, 0x910AFB84, + 0x910BFB84, 0x910CFB84, 0x910DFB84, 0x910EFB84, 0x910FFB84, 0x9110FB84, 0x9111FB84, 0x9112FB84, 0x9113FB84, 0x9114FB84, 0x9115FB84, 0x9116FB84, 0x9117FB84, 0x9118FB84, 0x9119FB84, + 0x911AFB84, 0x911BFB84, 0x911CFB84, 0x911DFB84, 0x911EFB84, 0x911FFB84, 0x9120FB84, 0x9121FB84, 0x9122FB84, 0x9123FB84, 0x9124FB84, 0x9125FB84, 0x9126FB84, 0x9127FB84, 0x9128FB84, + 0x9129FB84, 0x912AFB84, 0x912BFB84, 0x912CFB84, 0x912DFB84, 0x912EFB84, 0x912FFB84, 0x9130FB84, 0x9131FB84, 0x9132FB84, 0x9133FB84, 0x9134FB84, 0x9135FB84, 0x9136FB84, 0x9137FB84, + 0x9138FB84, 0x9139FB84, 0x913AFB84, 0x913BFB84, 0x913CFB84, 0x913DFB84, 0x913EFB84, 0x913FFB84, 0x9140FB84, 0x9141FB84, 0x9142FB84, 0x9143FB84, 0x9144FB84, 0x9145FB84, 0x9146FB84, + 0x9147FB84, 0x9148FB84, 0x9149FB84, 0x914AFB84, 0x914BFB84, 0x914CFB84, 0x914DFB84, 0x914EFB84, 0x914FFB84, 0x9150FB84, 0x9151FB84, 0x9152FB84, 0x9153FB84, 0x9154FB84, 0x9155FB84, + 0x9156FB84, 0x9157FB84, 0x9158FB84, 0x9159FB84, 0x915AFB84, 0x915BFB84, 0x915CFB84, 0x915DFB84, 0x915EFB84, 0x915FFB84, 0x9160FB84, 0x9161FB84, 0x9162FB84, 0x9163FB84, 0x9164FB84, + 0x9165FB84, 0x9166FB84, 0x9167FB84, 0x9168FB84, 0x9169FB84, 0x916AFB84, 0x916BFB84, 0x916CFB84, 0x916DFB84, 0x916EFB84, 0x916FFB84, 0x9170FB84, 0x9171FB84, 0x9172FB84, 0x9173FB84, + 0x9174FB84, 0x9175FB84, 0x9176FB84, 0x9177FB84, 0x9178FB84, 0x9179FB84, 0x917AFB84, 0x917BFB84, 0x917CFB84, 0x917DFB84, 0x917EFB84, 0x917FFB84, 0x9180FB84, 0x9181FB84, 0x9182FB84, + 0x9183FB84, 0x9184FB84, 0x9185FB84, 0x9186FB84, 0x9187FB84, 0x9188FB84, 0x9189FB84, 0x918AFB84, 0x918BFB84, 0x918CFB84, 0x918DFB84, 0x918EFB84, 0x918FFB84, 0x9190FB84, 0x9191FB84, + 0x9192FB84, 0x9193FB84, 0x9194FB84, 0x9195FB84, 0x9196FB84, 0x9197FB84, 0x9198FB84, 0x9199FB84, 0x919AFB84, 0x919BFB84, 0x919CFB84, 0x919DFB84, 0x919EFB84, 0x919FFB84, 0x91A0FB84, + 0x91A1FB84, 0x91A2FB84, 0x91A3FB84, 0x91A4FB84, 0x91A5FB84, 0x91A6FB84, 0x91A7FB84, 0x91A8FB84, 0x91A9FB84, 0x91AAFB84, 0x91ABFB84, 0x91ACFB84, 0x91ADFB84, 0x91AEFB84, 0x91AFFB84, + 0x91B0FB84, 0x91B1FB84, 0x91B2FB84, 0x91B3FB84, 0x91B4FB84, 0x91B5FB84, 0x91B6FB84, 0x91B7FB84, 0x91B8FB84, 0x91B9FB84, 0x91BAFB84, 0x91BBFB84, 0x91BCFB84, 0x91BDFB84, 0x91BEFB84, + 0x91BFFB84, 0x91C0FB84, 0x91C1FB84, 0x91C2FB84, 0x91C3FB84, 0x91C4FB84, 0x91C5FB84, 0x91C6FB84, 0x91C7FB84, 0x91C8FB84, 0x91C9FB84, 0x91CAFB84, 0x91CBFB84, 0x91CCFB84, 0x91CDFB84, + 0x91CEFB84, 0x91CFFB84, 0x91D0FB84, 0x91D1FB84, 0x91D2FB84, 0x91D3FB84, 0x91D4FB84, 0x91D5FB84, 0x91D6FB84, 0x91D7FB84, 0x91D8FB84, 0x91D9FB84, 0x91DAFB84, 0x91DBFB84, 0x91DCFB84, + 0x91DDFB84, 0x91DEFB84, 0x91DFFB84, 0x91E0FB84, 0x91E1FB84, 0x91E2FB84, 0x91E3FB84, 0x91E4FB84, 0x91E5FB84, 0x91E6FB84, 0x91E7FB84, 0x91E8FB84, 0x91E9FB84, 0x91EAFB84, 0x91EBFB84, + 0x91ECFB84, 0x91EDFB84, 0x91EEFB84, 0x91EFFB84, 0x91F0FB84, 0x91F1FB84, 0x91F2FB84, 0x91F3FB84, 0x91F4FB84, 0x91F5FB84, 0x91F6FB84, 0x91F7FB84, 0x91F8FB84, 0x91F9FB84, 0x91FAFB84, + 0x91FBFB84, 0x91FCFB84, 0x91FDFB84, 0x91FEFB84, 0x91FFFB84, 0x9200FB84, 0x9201FB84, 0x9202FB84, 0x9203FB84, 0x9204FB84, 0x9205FB84, 0x9206FB84, 0x9207FB84, 0x9208FB84, 0x9209FB84, + 0x920AFB84, 0x920BFB84, 0x920CFB84, 0x920DFB84, 0x920EFB84, 0x920FFB84, 0x9210FB84, 0x9211FB84, 0x9212FB84, 0x9213FB84, 0x9214FB84, 0x9215FB84, 0x9216FB84, 0x9217FB84, 0x9218FB84, + 0x9219FB84, 0x921AFB84, 0x921BFB84, 0x921CFB84, 0x921DFB84, 0x921EFB84, 0x921FFB84, 0x9220FB84, 0x9221FB84, 0x9222FB84, 0x9223FB84, 0x9224FB84, 0x9225FB84, 0x9226FB84, 0x9227FB84, + 0x9228FB84, 0x9229FB84, 0x922AFB84, 0x922BFB84, 0x922CFB84, 0x922DFB84, 0x922EFB84, 0x922FFB84, 0x9230FB84, 0x9231FB84, 0x9232FB84, 0x9233FB84, 0x9234FB84, 0x9235FB84, 0x9236FB84, + 0x9237FB84, 0x9238FB84, 0x9239FB84, 0x923AFB84, 0x923BFB84, 0x923CFB84, 0x923DFB84, 0x923EFB84, 0x923FFB84, 0x9240FB84, 0x9241FB84, 0x9242FB84, 0x9243FB84, 0x9244FB84, 0x9245FB84, + 0x9246FB84, 0x9247FB84, 0x9248FB84, 0x9249FB84, 0x924AFB84, 0x924BFB84, 0x924CFB84, 0x924DFB84, 0x924EFB84, 0x924FFB84, 0x9250FB84, 0x9251FB84, 0x9252FB84, 0x9253FB84, 0x9254FB84, + 0x9255FB84, 0x9256FB84, 0x9257FB84, 0x9258FB84, 0x9259FB84, 0x925AFB84, 0x925BFB84, 0x925CFB84, 0x925DFB84, 0x925EFB84, 0x925FFB84, 0x9260FB84, 0x9261FB84, 0x9262FB84, 0x9263FB84, + 0x9264FB84, 0x9265FB84, 0x9266FB84, 0x9267FB84, 0x9268FB84, 0x9269FB84, 0x926AFB84, 0x926BFB84, 0x926CFB84, 0x926DFB84, 0x926EFB84, 0x926FFB84, 0x9270FB84, 0x9271FB84, 0x9272FB84, + 0x9273FB84, 0x9274FB84, 0x9275FB84, 0x9276FB84, 0x9277FB84, 0x9278FB84, 0x9279FB84, 0x927AFB84, 0x927BFB84, 0x927CFB84, 0x927DFB84, 0x927EFB84, 0x927FFB84, 0x9280FB84, 0x9281FB84, + 0x9282FB84, 0x9283FB84, 0x9284FB84, 0x9285FB84, 0x9286FB84, 0x9287FB84, 0x9288FB84, 0x9289FB84, 0x928AFB84, 0x928BFB84, 0x928CFB84, 0x928DFB84, 0x928EFB84, 0x928FFB84, 0x9290FB84, + 0x9291FB84, 0x9292FB84, 0x9293FB84, 0x9294FB84, 0x9295FB84, 0x9296FB84, 0x9297FB84, 0x9298FB84, 0x9299FB84, 0x929AFB84, 0x929BFB84, 0x929CFB84, 0x929DFB84, 0x929EFB84, 0x929FFB84, + 0x92A0FB84, 0x92A1FB84, 0x92A2FB84, 0x92A3FB84, 0x92A4FB84, 0x92A5FB84, 0x92A6FB84, 0x92A7FB84, 0x92A8FB84, 0x92A9FB84, 0x92AAFB84, 0x92ABFB84, 0x92ACFB84, 0x92ADFB84, 0x92AEFB84, + 0x92AFFB84, 0x92B0FB84, 0x92B1FB84, 0x92B2FB84, 0x92B3FB84, 0x92B4FB84, 0x92B5FB84, 0x92B6FB84, 0x92B7FB84, 0x92B8FB84, 0x92B9FB84, 0x92BAFB84, 0x92BBFB84, 0x92BCFB84, 0x92BDFB84, + 0x92BEFB84, 0x92BFFB84, 0x92C0FB84, 0x92C1FB84, 0x92C2FB84, 0x92C3FB84, 0x92C4FB84, 0x92C5FB84, 0x92C6FB84, 0x92C7FB84, 0x92C8FB84, 0x92C9FB84, 0x92CAFB84, 0x92CBFB84, 0x92CCFB84, + 0x92CDFB84, 0x92CEFB84, 0x92CFFB84, 0x92D0FB84, 0x92D1FB84, 0x92D2FB84, 0x92D3FB84, 0x92D4FB84, 0x92D5FB84, 0x92D6FB84, 0x92D7FB84, 0x92D8FB84, 0x92D9FB84, 0x92DAFB84, 0x92DBFB84, + 0x92DCFB84, 0x92DDFB84, 0x92DEFB84, 0x92DFFB84, 0x92E0FB84, 0x92E1FB84, 0x92E2FB84, 0x92E3FB84, 0x92E4FB84, 0x92E5FB84, 0x92E6FB84, 0x92E7FB84, 0x92E8FB84, 0x92E9FB84, 0x92EAFB84, + 0x92EBFB84, 0x92ECFB84, 0x92EDFB84, 0x92EEFB84, 0x92EFFB84, 0x92F0FB84, 0x92F1FB84, 0x92F2FB84, 0x92F3FB84, 0x92F4FB84, 0x92F5FB84, 0x92F6FB84, 0x92F7FB84, 0x92F8FB84, 0x92F9FB84, + 0x92FAFB84, 0x92FBFB84, 0x92FCFB84, 0x92FDFB84, 0x92FEFB84, 0x92FFFB84, 0x9300FB84, 0x9301FB84, 0x9302FB84, 0x9303FB84, 0x9304FB84, 0x9305FB84, 0x9306FB84, 0x9307FB84, 0x9308FB84, + 0x9309FB84, 0x930AFB84, 0x930BFB84, 0x930CFB84, 0x930DFB84, 0x930EFB84, 0x930FFB84, 0x9310FB84, 0x9311FB84, 0x9312FB84, 0x9313FB84, 0x9314FB84, 0x9315FB84, 0x9316FB84, 0x9317FB84, + 0x9318FB84, 0x9319FB84, 0x931AFB84, 0x931BFB84, 0x931CFB84, 0x931DFB84, 0x931EFB84, 0x931FFB84, 0x9320FB84, 0x9321FB84, 0x9322FB84, 0x9323FB84, 0x9324FB84, 0x9325FB84, 0x9326FB84, + 0x9327FB84, 0x9328FB84, 0x9329FB84, 0x932AFB84, 0x932BFB84, 0x932CFB84, 0x932DFB84, 0x932EFB84, 0x932FFB84, 0x9330FB84, 0x9331FB84, 0x9332FB84, 0x9333FB84, 0x9334FB84, 0x9335FB84, + 0x9336FB84, 0x9337FB84, 0x9338FB84, 0x9339FB84, 0x933AFB84, 0x933BFB84, 0x933CFB84, 0x933DFB84, 0x933EFB84, 0x933FFB84, 0x9340FB84, 0x9341FB84, 0x9342FB84, 0x9343FB84, 0x9344FB84, + 0x9345FB84, 0x9346FB84, 0x9347FB84, 0x9348FB84, 0x9349FB84, 0x934AFB84, 0x934BFB84, 0x934CFB84, 0x934DFB84, 0x934EFB84, 0x934FFB84, 0x9350FB84, 0x9351FB84, 0x9352FB84, 0x9353FB84, + 0x9354FB84, 0x9355FB84, 0x9356FB84, 0x9357FB84, 0x9358FB84, 0x9359FB84, 0x935AFB84, 0x935BFB84, 0x935CFB84, 0x935DFB84, 0x935EFB84, 0x935FFB84, 0x9360FB84, 0x9361FB84, 0x9362FB84, + 0x9363FB84, 0x9364FB84, 0x9365FB84, 0x9366FB84, 0x9367FB84, 0x9368FB84, 0x9369FB84, 0x936AFB84, 0x936BFB84, 0x936CFB84, 0x936DFB84, 0x936EFB84, 0x936FFB84, 0x9370FB84, 0x9371FB84, + 0x9372FB84, 0x9373FB84, 0x9374FB84, 0x9375FB84, 0x9376FB84, 0x9377FB84, 0x9378FB84, 0x9379FB84, 0x937AFB84, 0x937BFB84, 0x937CFB84, 0x937DFB84, 0x937EFB84, 0x937FFB84, 0x9380FB84, + 0x9381FB84, 0x9382FB84, 0x9383FB84, 0x9384FB84, 0x9385FB84, 0x9386FB84, 0x9387FB84, 0x9388FB84, 0x9389FB84, 0x938AFB84, 0x938BFB84, 0x938CFB84, 0x938DFB84, 0x938EFB84, 0x938FFB84, + 0x9390FB84, 0x9391FB84, 0x9392FB84, 0x9393FB84, 0x9394FB84, 0x9395FB84, 0x9396FB84, 0x9397FB84, 0x9398FB84, 0x9399FB84, 0x939AFB84, 0x939BFB84, 0x939CFB84, 0x939DFB84, 0x939EFB84, + 0x939FFB84, 0x93A0FB84, 0x93A1FB84, 0x93A2FB84, 0x93A3FB84, 0x93A4FB84, 0x93A5FB84, 0x93A6FB84, 0x93A7FB84, 0x93A8FB84, 0x93A9FB84, 0x93AAFB84, 0x93ABFB84, 0x93ACFB84, 0x93ADFB84, + 0x93AEFB84, 0x93AFFB84, 0x93B0FB84, 0x93B1FB84, 0x93B2FB84, 0x93B3FB84, 0x93B4FB84, 0x93B5FB84, 0x93B6FB84, 0x93B7FB84, 0x93B8FB84, 0x93B9FB84, 0x93BAFB84, 0x93BBFB84, 0x93BCFB84, + 0x93BDFB84, 0x93BEFB84, 0x93BFFB84, 0x93C0FB84, 0x93C1FB84, 0x93C2FB84, 0x93C3FB84, 0x93C4FB84, 0x93C5FB84, 0x93C6FB84, 0x93C7FB84, 0x93C8FB84, 0x93C9FB84, 0x93CAFB84, 0x93CBFB84, + 0x93CCFB84, 0x93CDFB84, 0x93CEFB84, 0x93CFFB84, 0x93D0FB84, 0x93D1FB84, 0x93D2FB84, 0x93D3FB84, 0x93D4FB84, 0x93D5FB84, 0x93D6FB84, 0x93D7FB84, 0x93D8FB84, 0x93D9FB84, 0x93DAFB84, + 0x93DBFB84, 0x93DCFB84, 0x93DDFB84, 0x93DEFB84, 0x93DFFB84, 0x93E0FB84, 0x93E1FB84, 0x93E2FB84, 0x93E3FB84, 0x93E4FB84, 0x93E5FB84, 0x93E6FB84, 0x93E7FB84, 0x93E8FB84, 0x93E9FB84, + 0x93EAFB84, 0x93EBFB84, 0x93ECFB84, 0x93EDFB84, 0x93EEFB84, 0x93EFFB84, 0x93F0FB84, 0x93F1FB84, 0x93F2FB84, 0x93F3FB84, 0x93F4FB84, 0x93F5FB84, 0x93F6FB84, 0x93F7FB84, 0x93F8FB84, + 0x93F9FB84, 0x93FAFB84, 0x93FBFB84, 0x93FCFB84, 0x93FDFB84, 0x93FEFB84, 0x93FFFB84, 0x9400FB84, 0x9401FB84, 0x9402FB84, 0x9403FB84, 0x9404FB84, 0x9405FB84, 0x9406FB84, 0x9407FB84, + 0x9408FB84, 0x9409FB84, 0x940AFB84, 0x940BFB84, 0x940CFB84, 0x940DFB84, 0x940EFB84, 0x940FFB84, 0x9410FB84, 0x9411FB84, 0x9412FB84, 0x9413FB84, 0x9414FB84, 0x9415FB84, 0x9416FB84, + 0x9417FB84, 0x9418FB84, 0x9419FB84, 0x941AFB84, 0x941BFB84, 0x941CFB84, 0x941DFB84, 0x941EFB84, 0x941FFB84, 0x9420FB84, 0x9421FB84, 0x9422FB84, 0x9423FB84, 0x9424FB84, 0x9425FB84, + 0x9426FB84, 0x9427FB84, 0x9428FB84, 0x9429FB84, 0x942AFB84, 0x942BFB84, 0x942CFB84, 0x942DFB84, 0x942EFB84, 0x942FFB84, 0x9430FB84, 0x9431FB84, 0x9432FB84, 0x9433FB84, 0x9434FB84, + 0x9435FB84, 0x9436FB84, 0x9437FB84, 0x9438FB84, 0x9439FB84, 0x943AFB84, 0x943BFB84, 0x943CFB84, 0x943DFB84, 0x943EFB84, 0x943FFB84, 0x9440FB84, 0x9441FB84, 0x9442FB84, 0x9443FB84, + 0x9444FB84, 0x9445FB84, 0x9446FB84, 0x9447FB84, 0x9448FB84, 0x9449FB84, 0x944AFB84, 0x944BFB84, 0x944CFB84, 0x944DFB84, 0x944EFB84, 0x944FFB84, 0x9450FB84, 0x9451FB84, 0x9452FB84, + 0x9453FB84, 0x9454FB84, 0x9455FB84, 0x9456FB84, 0x9457FB84, 0x9458FB84, 0x9459FB84, 0x945AFB84, 0x945BFB84, 0x945CFB84, 0x945DFB84, 0x945EFB84, 0x945FFB84, 0x9460FB84, 0x9461FB84, + 0x9462FB84, 0x9463FB84, 0x9464FB84, 0x9465FB84, 0x9466FB84, 0x9467FB84, 0x9468FB84, 0x9469FB84, 0x946AFB84, 0x946BFB84, 0x946CFB84, 0x946DFB84, 0x946EFB84, 0x946FFB84, 0x9470FB84, + 0x9471FB84, 0x9472FB84, 0x9473FB84, 0x9474FB84, 0x9475FB84, 0x9476FB84, 0x9477FB84, 0x9478FB84, 0x9479FB84, 0x947AFB84, 0x947BFB84, 0x947CFB84, 0x947DFB84, 0x947EFB84, 0x947FFB84, + 0x9480FB84, 0x9481FB84, 0x9482FB84, 0x9483FB84, 0x9484FB84, 0x9485FB84, 0x9486FB84, 0x9487FB84, 0x9488FB84, 0x9489FB84, 0x948AFB84, 0x948BFB84, 0x948CFB84, 0x948DFB84, 0x948EFB84, + 0x948FFB84, 0x9490FB84, 0x9491FB84, 0x9492FB84, 0x9493FB84, 0x9494FB84, 0x9495FB84, 0x9496FB84, 0x9497FB84, 0x9498FB84, 0x9499FB84, 0x949AFB84, 0x949BFB84, 0x949CFB84, 0x949DFB84, + 0x949EFB84, 0x949FFB84, 0x94A0FB84, 0x94A1FB84, 0x94A2FB84, 0x94A3FB84, 0x94A4FB84, 0x94A5FB84, 0x94A6FB84, 0x94A7FB84, 0x94A8FB84, 0x94A9FB84, 0x94AAFB84, 0x94ABFB84, 0x94ACFB84, + 0x94ADFB84, 0x94AEFB84, 0x94AFFB84, 0x94B0FB84, 0x94B1FB84, 0x94B2FB84, 0x94B3FB84, 0x94B4FB84, 0x94B5FB84, 0x94B6FB84, 0x94B7FB84, 0x94B8FB84, 0x94B9FB84, 0x94BAFB84, 0x94BBFB84, + 0x94BCFB84, 0x94BDFB84, 0x94BEFB84, 0x94BFFB84, 0x94C0FB84, 0x94C1FB84, 0x94C2FB84, 0x94C3FB84, 0x94C4FB84, 0x94C5FB84, 0x94C6FB84, 0x94C7FB84, 0x94C8FB84, 0x94C9FB84, 0x94CAFB84, + 0x94CBFB84, 0x94CCFB84, 0x94CDFB84, 0x94CEFB84, 0x94CFFB84, 0x94D0FB84, 0x94D1FB84, 0x94D2FB84, 0x94D3FB84, 0x94D4FB84, 0x94D5FB84, 0x94D6FB84, 0x94D7FB84, 0x94D8FB84, 0x94D9FB84, + 0x94DAFB84, 0x94DBFB84, 0x94DCFB84, 0x94DDFB84, 0x94DEFB84, 0x94DFFB84, 0x94E0FB84, 0x94E1FB84, 0x94E2FB84, 0x94E3FB84, 0x94E4FB84, 0x94E5FB84, 0x94E6FB84, 0x94E7FB84, 0x94E8FB84, + 0x94E9FB84, 0x94EAFB84, 0x94EBFB84, 0x94ECFB84, 0x94EDFB84, 0x94EEFB84, 0x94EFFB84, 0x94F0FB84, 0x94F1FB84, 0x94F2FB84, 0x94F3FB84, 0x94F4FB84, 0x94F5FB84, 0x94F6FB84, 0x94F7FB84, + 0x94F8FB84, 0x94F9FB84, 0x94FAFB84, 0x94FBFB84, 0x94FCFB84, 0x94FDFB84, 0x94FEFB84, 0x94FFFB84, 0x9500FB84, 0x9501FB84, 0x9502FB84, 0x9503FB84, 0x9504FB84, 0x9505FB84, 0x9506FB84, + 0x9507FB84, 0x9508FB84, 0x9509FB84, 0x950AFB84, 0x950BFB84, 0x950CFB84, 0x950DFB84, 0x950EFB84, 0x950FFB84, 0x9510FB84, 0x9511FB84, 0x9512FB84, 0x9513FB84, 0x9514FB84, 0x9515FB84, + 0x9516FB84, 0x9517FB84, 0x9518FB84, 0x9519FB84, 0x951AFB84, 0x951BFB84, 0x951CFB84, 0x951DFB84, 0x951EFB84, 0x951FFB84, 0x9520FB84, 0x9521FB84, 0x9522FB84, 0x9523FB84, 0x9524FB84, + 0x9525FB84, 0x9526FB84, 0x9527FB84, 0x9528FB84, 0x9529FB84, 0x952AFB84, 0x952BFB84, 0x952CFB84, 0x952DFB84, 0x952EFB84, 0x952FFB84, 0x9530FB84, 0x9531FB84, 0x9532FB84, 0x9533FB84, + 0x9534FB84, 0x9535FB84, 0x9536FB84, 0x9537FB84, 0x9538FB84, 0x9539FB84, 0x953AFB84, 0x953BFB84, 0x953CFB84, 0x953DFB84, 0x953EFB84, 0x953FFB84, 0x9540FB84, 0x9541FB84, 0x9542FB84, + 0x9543FB84, 0x9544FB84, 0x9545FB84, 0x9546FB84, 0x9547FB84, 0x9548FB84, 0x9549FB84, 0x954AFB84, 0x954BFB84, 0x954CFB84, 0x954DFB84, 0x954EFB84, 0x954FFB84, 0x9550FB84, 0x9551FB84, + 0x9552FB84, 0x9553FB84, 0x9554FB84, 0x9555FB84, 0x9556FB84, 0x9557FB84, 0x9558FB84, 0x9559FB84, 0x955AFB84, 0x955BFB84, 0x955CFB84, 0x955DFB84, 0x955EFB84, 0x955FFB84, 0x9560FB84, + 0x9561FB84, 0x9562FB84, 0x9563FB84, 0x9564FB84, 0x9565FB84, 0x9566FB84, 0x9567FB84, 0x9568FB84, 0x9569FB84, 0x956AFB84, 0x956BFB84, 0x956CFB84, 0x956DFB84, 0x956EFB84, 0x956FFB84, + 0x9570FB84, 0x9571FB84, 0x9572FB84, 0x9573FB84, 0x9574FB84, 0x9575FB84, 0x9576FB84, 0x9577FB84, 0x9578FB84, 0x9579FB84, 0x957AFB84, 0x957BFB84, 0x957CFB84, 0x957DFB84, 0x957EFB84, + 0x957FFB84, 0x9580FB84, 0x9581FB84, 0x9582FB84, 0x9583FB84, 0x9584FB84, 0x9585FB84, 0x9586FB84, 0x9587FB84, 0x9588FB84, 0x9589FB84, 0x958AFB84, 0x958BFB84, 0x958CFB84, 0x958DFB84, + 0x958EFB84, 0x958FFB84, 0x9590FB84, 0x9591FB84, 0x9592FB84, 0x9593FB84, 0x9594FB84, 0x9595FB84, 0x9596FB84, 0x9597FB84, 0x9598FB84, 0x9599FB84, 0x959AFB84, 0x959BFB84, 0x959CFB84, + 0x959DFB84, 0x959EFB84, 0x959FFB84, 0x95A0FB84, 0x95A1FB84, 0x95A2FB84, 0x95A3FB84, 0x95A4FB84, 0x95A5FB84, 0x95A6FB84, 0x95A7FB84, 0x95A8FB84, 0x95A9FB84, 0x95AAFB84, 0x95ABFB84, + 0x95ACFB84, 0x95ADFB84, 0x95AEFB84, 0x95AFFB84, 0x95B0FB84, 0x95B1FB84, 0x95B2FB84, 0x95B3FB84, 0x95B4FB84, 0x95B5FB84, 0x95B6FB84, 0x95B7FB84, 0x95B8FB84, 0x95B9FB84, 0x95BAFB84, + 0x95BBFB84, 0x95BCFB84, 0x95BDFB84, 0x95BEFB84, 0x95BFFB84, 0x95C0FB84, 0x95C1FB84, 0x95C2FB84, 0x95C3FB84, 0x95C4FB84, 0x95C5FB84, 0x95C6FB84, 0x95C7FB84, 0x95C8FB84, 0x95C9FB84, + 0x95CAFB84, 0x95CBFB84, 0x95CCFB84, 0x95CDFB84, 0x95CEFB84, 0x95CFFB84, 0x95D0FB84, 0x95D1FB84, 0x95D2FB84, 0x95D3FB84, 0x95D4FB84, 0x95D5FB84, 0x95D6FB84, 0x95D7FB84, 0x95D8FB84, + 0x95D9FB84, 0x95DAFB84, 0x95DBFB84, 0x95DCFB84, 0x95DDFB84, 0x95DEFB84, 0x95DFFB84, 0x95E0FB84, 0x95E1FB84, 0x95E2FB84, 0x95E3FB84, 0x95E4FB84, 0x95E5FB84, 0x95E6FB84, 0x95E7FB84, + 0x95E8FB84, 0x95E9FB84, 0x95EAFB84, 0x95EBFB84, 0x95ECFB84, 0x95EDFB84, 0x95EEFB84, 0x95EFFB84, 0x95F0FB84, 0x95F1FB84, 0x95F2FB84, 0x95F3FB84, 0x95F4FB84, 0x95F5FB84, 0x95F6FB84, + 0x95F7FB84, 0x95F8FB84, 0x95F9FB84, 0x95FAFB84, 0x95FBFB84, 0x95FCFB84, 0x95FDFB84, 0x95FEFB84, 0x95FFFB84, 0x9600FB84, 0x9601FB84, 0x9602FB84, 0x9603FB84, 0x9604FB84, 0x9605FB84, + 0x9606FB84, 0x9607FB84, 0x9608FB84, 0x9609FB84, 0x960AFB84, 0x960BFB84, 0x960CFB84, 0x960DFB84, 0x960EFB84, 0x960FFB84, 0x9610FB84, 0x9611FB84, 0x9612FB84, 0x9613FB84, 0x9614FB84, + 0x9615FB84, 0x9616FB84, 0x9617FB84, 0x9618FB84, 0x9619FB84, 0x961AFB84, 0x961BFB84, 0x961CFB84, 0x961DFB84, 0x961EFB84, 0x961FFB84, 0x9620FB84, 0x9621FB84, 0x9622FB84, 0x9623FB84, + 0x9624FB84, 0x9625FB84, 0x9626FB84, 0x9627FB84, 0x9628FB84, 0x9629FB84, 0x962AFB84, 0x962BFB84, 0x962CFB84, 0x962DFB84, 0x962EFB84, 0x962FFB84, 0x9630FB84, 0x9631FB84, 0x9632FB84, + 0x9633FB84, 0x9634FB84, 0x9635FB84, 0x9636FB84, 0x9637FB84, 0x9638FB84, 0x9639FB84, 0x963AFB84, 0x963BFB84, 0x963CFB84, 0x963DFB84, 0x963EFB84, 0x963FFB84, 0x9640FB84, 0x9641FB84, + 0x9642FB84, 0x9643FB84, 0x9644FB84, 0x9645FB84, 0x9646FB84, 0x9647FB84, 0x9648FB84, 0x9649FB84, 0x964AFB84, 0x964BFB84, 0x964CFB84, 0x964DFB84, 0x964EFB84, 0x964FFB84, 0x9650FB84, + 0x9651FB84, 0x9652FB84, 0x9653FB84, 0x9654FB84, 0x9655FB84, 0x9656FB84, 0x9657FB84, 0x9658FB84, 0x9659FB84, 0x965AFB84, 0x965BFB84, 0x965CFB84, 0x965DFB84, 0x965EFB84, 0x965FFB84, + 0x9660FB84, 0x9661FB84, 0x9662FB84, 0x9663FB84, 0x9664FB84, 0x9665FB84, 0x9666FB84, 0x9667FB84, 0x9668FB84, 0x9669FB84, 0x966AFB84, 0x966BFB84, 0x966CFB84, 0x966DFB84, 0x966EFB84, + 0x966FFB84, 0x9670FB84, 0x9671FB84, 0x9672FB84, 0x9673FB84, 0x9674FB84, 0x9675FB84, 0x9676FB84, 0x9677FB84, 0x9678FB84, 0x9679FB84, 0x967AFB84, 0x967BFB84, 0x967CFB84, 0x967DFB84, + 0x967EFB84, 0x967FFB84, 0x9680FB84, 0x9681FB84, 0x9682FB84, 0x9683FB84, 0x9684FB84, 0x9685FB84, 0x9686FB84, 0x9687FB84, 0x9688FB84, 0x9689FB84, 0x968AFB84, 0x968BFB84, 0x968CFB84, + 0x968DFB84, 0x968EFB84, 0x968FFB84, 0x9690FB84, 0x9691FB84, 0x9692FB84, 0x9693FB84, 0x9694FB84, 0x9695FB84, 0x9696FB84, 0x9697FB84, 0x9698FB84, 0x9699FB84, 0x969AFB84, 0x969BFB84, + 0x969CFB84, 0x969DFB84, 0x969EFB84, 0x969FFB84, 0x96A0FB84, 0x96A1FB84, 0x96A2FB84, 0x96A3FB84, 0x96A4FB84, 0x96A5FB84, 0x96A6FB84, 0x96A7FB84, 0x96A8FB84, 0x96A9FB84, 0x96AAFB84, + 0x96ABFB84, 0x96ACFB84, 0x96ADFB84, 0x96AEFB84, 0x96AFFB84, 0x96B0FB84, 0x96B1FB84, 0x96B2FB84, 0x96B3FB84, 0x96B4FB84, 0x96B5FB84, 0x96B6FB84, 0x96B7FB84, 0x96B8FB84, 0x96B9FB84, + 0x96BAFB84, 0x96BBFB84, 0x96BCFB84, 0x96BDFB84, 0x96BEFB84, 0x96BFFB84, 0x96C0FB84, 0x96C1FB84, 0x96C2FB84, 0x96C3FB84, 0x96C4FB84, 0x96C5FB84, 0x96C6FB84, 0x96C7FB84, 0x96C8FB84, + 0x96C9FB84, 0x96CAFB84, 0x96CBFB84, 0x96CCFB84, 0x96CDFB84, 0x96CEFB84, 0x96CFFB84, 0x96D0FB84, 0x96D1FB84, 0x96D2FB84, 0x96D3FB84, 0x96D4FB84, 0x96D5FB84, 0x96D6FB84, 0x96D7FB84, + 0x96D8FB84, 0x96D9FB84, 0x96DAFB84, 0x96DBFB84, 0x96DCFB84, 0x96DDFB84, 0x96DEFB84, 0x96DFFB84, 0x96E0FB84, 0x96E1FB84, 0x96E2FB84, 0x96E3FB84, 0x96E4FB84, 0x96E5FB84, 0x96E6FB84, + 0x96E7FB84, 0x96E8FB84, 0x96E9FB84, 0x96EAFB84, 0x96EBFB84, 0x96ECFB84, 0x96EDFB84, 0x96EEFB84, 0x96EFFB84, 0x96F0FB84, 0x96F1FB84, 0x96F2FB84, 0x96F3FB84, 0x96F4FB84, 0x96F5FB84, + 0x96F6FB84, 0x96F7FB84, 0x96F8FB84, 0x96F9FB84, 0x96FAFB84, 0x96FBFB84, 0x96FCFB84, 0x96FDFB84, 0x96FEFB84, 0x96FFFB84, 0x9700FB84, 0x9701FB84, 0x9702FB84, 0x9703FB84, 0x9704FB84, + 0x9705FB84, 0x9706FB84, 0x9707FB84, 0x9708FB84, 0x9709FB84, 0x970AFB84, 0x970BFB84, 0x970CFB84, 0x970DFB84, 0x970EFB84, 0x970FFB84, 0x9710FB84, 0x9711FB84, 0x9712FB84, 0x9713FB84, + 0x9714FB84, 0x9715FB84, 0x9716FB84, 0x9717FB84, 0x9718FB84, 0x9719FB84, 0x971AFB84, 0x971BFB84, 0x971CFB84, 0x971DFB84, 0x971EFB84, 0x971FFB84, 0x9720FB84, 0x9721FB84, 0x9722FB84, + 0x9723FB84, 0x9724FB84, 0x9725FB84, 0x9726FB84, 0x9727FB84, 0x9728FB84, 0x9729FB84, 0x972AFB84, 0x972BFB84, 0x972CFB84, 0x972DFB84, 0x972EFB84, 0x972FFB84, 0x9730FB84, 0x9731FB84, + 0x9732FB84, 0x9733FB84, 0x9734FB84, 0x9735FB84, 0x9736FB84, 0x9737FB84, 0x9738FB84, 0x9739FB84, 0x973AFB84, 0x973BFB84, 0x973CFB84, 0x973DFB84, 0x973EFB84, 0x973FFB84, 0x9740FB84, + 0x9741FB84, 0x9742FB84, 0x9743FB84, 0x9744FB84, 0x9745FB84, 0x9746FB84, 0x9747FB84, 0x9748FB84, 0x9749FB84, 0x974AFB84, 0x974BFB84, 0x974CFB84, 0x974DFB84, 0x974EFB84, 0x974FFB84, + 0x9750FB84, 0x9751FB84, 0x9752FB84, 0x9753FB84, 0x9754FB84, 0x9755FB84, 0x9756FB84, 0x9757FB84, 0x9758FB84, 0x9759FB84, 0x975AFB84, 0x975BFB84, 0x975CFB84, 0x975DFB84, 0x975EFB84, + 0x975FFB84, 0x9760FB84, 0x9761FB84, 0x9762FB84, 0x9763FB84, 0x9764FB84, 0x9765FB84, 0x9766FB84, 0x9767FB84, 0x9768FB84, 0x9769FB84, 0x976AFB84, 0x976BFB84, 0x976CFB84, 0x976DFB84, + 0x976EFB84, 0x976FFB84, 0x9770FB84, 0x9771FB84, 0x9772FB84, 0x9773FB84, 0x9774FB84, 0x9775FB84, 0x9776FB84, 0x9777FB84, 0x9778FB84, 0x9779FB84, 0x977AFB84, 0x977BFB84, 0x977CFB84, + 0x977DFB84, 0x977EFB84, 0x977FFB84, 0x9780FB84, 0x9781FB84, 0x9782FB84, 0x9783FB84, 0x9784FB84, 0x9785FB84, 0x9786FB84, 0x9787FB84, 0x9788FB84, 0x9789FB84, 0x978AFB84, 0x978BFB84, + 0x978CFB84, 0x978DFB84, 0x978EFB84, 0x978FFB84, 0x9790FB84, 0x9791FB84, 0x9792FB84, 0x9793FB84, 0x9794FB84, 0x9795FB84, 0x9796FB84, 0x9797FB84, 0x9798FB84, 0x9799FB84, 0x979AFB84, + 0x979BFB84, 0x979CFB84, 0x979DFB84, 0x979EFB84, 0x979FFB84, 0x97A0FB84, 0x97A1FB84, 0x97A2FB84, 0x97A3FB84, 0x97A4FB84, 0x97A5FB84, 0x97A6FB84, 0x97A7FB84, 0x97A8FB84, 0x97A9FB84, + 0x97AAFB84, 0x97ABFB84, 0x97ACFB84, 0x97ADFB84, 0x97AEFB84, 0x97AFFB84, 0x97B0FB84, 0x97B1FB84, 0x97B2FB84, 0x97B3FB84, 0x97B4FB84, 0x97B5FB84, 0x97B6FB84, 0x97B7FB84, 0x97B8FB84, + 0x97B9FB84, 0x97BAFB84, 0x97BBFB84, 0x97BCFB84, 0x97BDFB84, 0x97BEFB84, 0x97BFFB84, 0x97C0FB84, 0x97C1FB84, 0x97C2FB84, 0x97C3FB84, 0x97C4FB84, 0x97C5FB84, 0x97C6FB84, 0x97C7FB84, + 0x97C8FB84, 0x97C9FB84, 0x97CAFB84, 0x97CBFB84, 0x97CCFB84, 0x97CDFB84, 0x97CEFB84, 0x97CFFB84, 0x97D0FB84, 0x97D1FB84, 0x97D2FB84, 0x97D3FB84, 0x97D4FB84, 0x97D5FB84, 0x97D6FB84, + 0x97D7FB84, 0x97D8FB84, 0x97D9FB84, 0x97DAFB84, 0x97DBFB84, 0x97DCFB84, 0x97DDFB84, 0x97DEFB84, 0x97DFFB84, 0x97E0FB84, 0x97E1FB84, 0x97E2FB84, 0x97E3FB84, 0x97E4FB84, 0x97E5FB84, + 0x97E6FB84, 0x97E7FB84, 0x97E8FB84, 0x97E9FB84, 0x97EAFB84, 0x97EBFB84, 0x97ECFB84, 0x97EDFB84, 0x97EEFB84, 0x97EFFB84, 0x97F0FB84, 0x97F1FB84, 0x97F2FB84, 0x97F3FB84, 0x97F4FB84, + 0x97F5FB84, 0x97F6FB84, 0x97F7FB84, 0x97F8FB84, 0x97F9FB84, 0x97FAFB84, 0x97FBFB84, 0x97FCFB84, 0x97FDFB84, 0x97FEFB84, 0x97FFFB84, 0x9800FB84, 0x9801FB84, 0x9802FB84, 0x9803FB84, + 0x9804FB84, 0x9805FB84, 0x9806FB84, 0x9807FB84, 0x9808FB84, 0x9809FB84, 0x980AFB84, 0x980BFB84, 0x980CFB84, 0x980DFB84, 0x980EFB84, 0x980FFB84, 0x9810FB84, 0x9811FB84, 0x9812FB84, + 0x9813FB84, 0x9814FB84, 0x9815FB84, 0x9816FB84, 0x9817FB84, 0x9818FB84, 0x9819FB84, 0x981AFB84, 0x981BFB84, 0x981CFB84, 0x981DFB84, 0x981EFB84, 0x981FFB84, 0x9820FB84, 0x9821FB84, + 0x9822FB84, 0x9823FB84, 0x9824FB84, 0x9825FB84, 0x9826FB84, 0x9827FB84, 0x9828FB84, 0x9829FB84, 0x982AFB84, 0x982BFB84, 0x982CFB84, 0x982DFB84, 0x982EFB84, 0x982FFB84, 0x9830FB84, + 0x9831FB84, 0x9832FB84, 0x9833FB84, 0x9834FB84, 0x9835FB84, 0x9836FB84, 0x9837FB84, 0x9838FB84, 0x9839FB84, 0x983AFB84, 0x983BFB84, 0x983CFB84, 0x983DFB84, 0x983EFB84, 0x983FFB84, + 0x9840FB84, 0x9841FB84, 0x9842FB84, 0x9843FB84, 0x9844FB84, 0x9845FB84, 0x9846FB84, 0x9847FB84, 0x9848FB84, 0x9849FB84, 0x984AFB84, 0x984BFB84, 0x984CFB84, 0x984DFB84, 0x984EFB84, + 0x984FFB84, 0x9850FB84, 0x9851FB84, 0x9852FB84, 0x9853FB84, 0x9854FB84, 0x9855FB84, 0x9856FB84, 0x9857FB84, 0x9858FB84, 0x9859FB84, 0x985AFB84, 0x985BFB84, 0x985CFB84, 0x985DFB84, + 0x985EFB84, 0x985FFB84, 0x9860FB84, 0x9861FB84, 0x9862FB84, 0x9863FB84, 0x9864FB84, 0x9865FB84, 0x9866FB84, 0x9867FB84, 0x9868FB84, 0x9869FB84, 0x986AFB84, 0x986BFB84, 0x986CFB84, + 0x986DFB84, 0x986EFB84, 0x986FFB84, 0x9870FB84, 0x9871FB84, 0x9872FB84, 0x9873FB84, 0x9874FB84, 0x9875FB84, 0x9876FB84, 0x9877FB84, 0x9878FB84, 0x9879FB84, 0x987AFB84, 0x987BFB84, + 0x987CFB84, 0x987DFB84, 0x987EFB84, 0x987FFB84, 0x9880FB84, 0x9881FB84, 0x9882FB84, 0x9883FB84, 0x9884FB84, 0x9885FB84, 0x9886FB84, 0x9887FB84, 0x9888FB84, 0x9889FB84, 0x988AFB84, + 0x988BFB84, 0x988CFB84, 0x988DFB84, 0x988EFB84, 0x988FFB84, 0x9890FB84, 0x9891FB84, 0x9892FB84, 0x9893FB84, 0x9894FB84, 0x9895FB84, 0x9896FB84, 0x9897FB84, 0x9898FB84, 0x9899FB84, + 0x989AFB84, 0x989BFB84, 0x989CFB84, 0x989DFB84, 0x989EFB84, 0x989FFB84, 0x98A0FB84, 0x98A1FB84, 0x98A2FB84, 0x98A3FB84, 0x98A4FB84, 0x98A5FB84, 0x98A6FB84, 0x98A7FB84, 0x98A8FB84, + 0x98A9FB84, 0x98AAFB84, 0x98ABFB84, 0x98ACFB84, 0x98ADFB84, 0x98AEFB84, 0x98AFFB84, 0x98B0FB84, 0x98B1FB84, 0x98B2FB84, 0x98B3FB84, 0x98B4FB84, 0x98B5FB84, 0x98B6FB84, 0x98B7FB84, + 0x98B8FB84, 0x98B9FB84, 0x98BAFB84, 0x98BBFB84, 0x98BCFB84, 0x98BDFB84, 0x98BEFB84, 0x98BFFB84, 0x98C0FB84, 0x98C1FB84, 0x98C2FB84, 0x98C3FB84, 0x98C4FB84, 0x98C5FB84, 0x98C6FB84, + 0x98C7FB84, 0x98C8FB84, 0x98C9FB84, 0x98CAFB84, 0x98CBFB84, 0x98CCFB84, 0x98CDFB84, 0x98CEFB84, 0x98CFFB84, 0x98D0FB84, 0x98D1FB84, 0x98D2FB84, 0x98D3FB84, 0x98D4FB84, 0x98D5FB84, + 0x98D6FB84, 0x98D7FB84, 0x98D8FB84, 0x98D9FB84, 0x98DAFB84, 0x98DBFB84, 0x98DCFB84, 0x98DDFB84, 0x98DEFB84, 0x98DFFB84, 0x98E0FB84, 0x98E1FB84, 0x98E2FB84, 0x98E3FB84, 0x98E4FB84, + 0x98E5FB84, 0x98E6FB84, 0x98E7FB84, 0x98E8FB84, 0x98E9FB84, 0x98EAFB84, 0x98EBFB84, 0x98ECFB84, 0x98EDFB84, 0x98EEFB84, 0x98EFFB84, 0x98F0FB84, 0x98F1FB84, 0x98F2FB84, 0x98F3FB84, + 0x98F4FB84, 0x98F5FB84, 0x98F6FB84, 0x98F7FB84, 0x98F8FB84, 0x98F9FB84, 0x98FAFB84, 0x98FBFB84, 0x98FCFB84, 0x98FDFB84, 0x98FEFB84, 0x98FFFB84, 0x9900FB84, 0x9901FB84, 0x9902FB84, + 0x9903FB84, 0x9904FB84, 0x9905FB84, 0x9906FB84, 0x9907FB84, 0x9908FB84, 0x9909FB84, 0x990AFB84, 0x990BFB84, 0x990CFB84, 0x990DFB84, 0x990EFB84, 0x990FFB84, 0x9910FB84, 0x9911FB84, + 0x9912FB84, 0x9913FB84, 0x9914FB84, 0x9915FB84, 0x9916FB84, 0x9917FB84, 0x9918FB84, 0x9919FB84, 0x991AFB84, 0x991BFB84, 0x991CFB84, 0x991DFB84, 0x991EFB84, 0x991FFB84, 0x9920FB84, + 0x9921FB84, 0x9922FB84, 0x9923FB84, 0x9924FB84, 0x9925FB84, 0x9926FB84, 0x9927FB84, 0x9928FB84, 0x9929FB84, 0x992AFB84, 0x992BFB84, 0x992CFB84, 0x992DFB84, 0x992EFB84, 0x992FFB84, + 0x9930FB84, 0x9931FB84, 0x9932FB84, 0x9933FB84, 0x9934FB84, 0x9935FB84, 0x9936FB84, 0x9937FB84, 0x9938FB84, 0x9939FB84, 0x993AFB84, 0x993BFB84, 0x993CFB84, 0x993DFB84, 0x993EFB84, + 0x993FFB84, 0x9940FB84, 0x9941FB84, 0x9942FB84, 0x9943FB84, 0x9944FB84, 0x9945FB84, 0x9946FB84, 0x9947FB84, 0x9948FB84, 0x9949FB84, 0x994AFB84, 0x994BFB84, 0x994CFB84, 0x994DFB84, + 0x994EFB84, 0x994FFB84, 0x9950FB84, 0x9951FB84, 0x9952FB84, 0x9953FB84, 0x9954FB84, 0x9955FB84, 0x9956FB84, 0x9957FB84, 0x9958FB84, 0x9959FB84, 0x995AFB84, 0x995BFB84, 0x995CFB84, + 0x995DFB84, 0x995EFB84, 0x995FFB84, 0x9960FB84, 0x9961FB84, 0x9962FB84, 0x9963FB84, 0x9964FB84, 0x9965FB84, 0x9966FB84, 0x9967FB84, 0x9968FB84, 0x9969FB84, 0x996AFB84, 0x996BFB84, + 0x996CFB84, 0x996DFB84, 0x996EFB84, 0x996FFB84, 0x9970FB84, 0x9971FB84, 0x9972FB84, 0x9973FB84, 0x9974FB84, 0x9975FB84, 0x9976FB84, 0x9977FB84, 0x9978FB84, 0x9979FB84, 0x997AFB84, + 0x997BFB84, 0x997CFB84, 0x997DFB84, 0x997EFB84, 0x997FFB84, 0x9980FB84, 0x9981FB84, 0x9982FB84, 0x9983FB84, 0x9984FB84, 0x9985FB84, 0x9986FB84, 0x9987FB84, 0x9988FB84, 0x9989FB84, + 0x998AFB84, 0x998BFB84, 0x998CFB84, 0x998DFB84, 0x998EFB84, 0x998FFB84, 0x9990FB84, 0x9991FB84, 0x9992FB84, 0x9993FB84, 0x9994FB84, 0x9995FB84, 0x9996FB84, 0x9997FB84, 0x9998FB84, + 0x9999FB84, 0x999AFB84, 0x999BFB84, 0x999CFB84, 0x999DFB84, 0x999EFB84, 0x999FFB84, 0x99A0FB84, 0x99A1FB84, 0x99A2FB84, 0x99A3FB84, 0x99A4FB84, 0x99A5FB84, 0x99A6FB84, 0x99A7FB84, + 0x99A8FB84, 0x99A9FB84, 0x99AAFB84, 0x99ABFB84, 0x99ACFB84, 0x99ADFB84, 0x99AEFB84, 0x99AFFB84, 0x99B0FB84, 0x99B1FB84, 0x99B2FB84, 0x99B3FB84, 0x99B4FB84, 0x99B5FB84, 0x99B6FB84, + 0x99B7FB84, 0x99B8FB84, 0x99B9FB84, 0x99BAFB84, 0x99BBFB84, 0x99BCFB84, 0x99BDFB84, 0x99BEFB84, 0x99BFFB84, 0x99C0FB84, 0x99C1FB84, 0x99C2FB84, 0x99C3FB84, 0x99C4FB84, 0x99C5FB84, + 0x99C6FB84, 0x99C7FB84, 0x99C8FB84, 0x99C9FB84, 0x99CAFB84, 0x99CBFB84, 0x99CCFB84, 0x99CDFB84, 0x99CEFB84, 0x99CFFB84, 0x99D0FB84, 0x99D1FB84, 0x99D2FB84, 0x99D3FB84, 0x99D4FB84, + 0x99D5FB84, 0x99D6FB84, 0x99D7FB84, 0x99D8FB84, 0x99D9FB84, 0x99DAFB84, 0x99DBFB84, 0x99DCFB84, 0x99DDFB84, 0x99DEFB84, 0x99DFFB84, 0x99E0FB84, 0x99E1FB84, 0x99E2FB84, 0x99E3FB84, + 0x99E4FB84, 0x99E5FB84, 0x99E6FB84, 0x99E7FB84, 0x99E8FB84, 0x99E9FB84, 0x99EAFB84, 0x99EBFB84, 0x99ECFB84, 0x99EDFB84, 0x99EEFB84, 0x99EFFB84, 0x99F0FB84, 0x99F1FB84, 0x99F2FB84, + 0x99F3FB84, 0x99F4FB84, 0x99F5FB84, 0x99F6FB84, 0x99F7FB84, 0x99F8FB84, 0x99F9FB84, 0x99FAFB84, 0x99FBFB84, 0x99FCFB84, 0x99FDFB84, 0x99FEFB84, 0x99FFFB84, 0x9A00FB84, 0x9A01FB84, + 0x9A02FB84, 0x9A03FB84, 0x9A04FB84, 0x9A05FB84, 0x9A06FB84, 0x9A07FB84, 0x9A08FB84, 0x9A09FB84, 0x9A0AFB84, 0x9A0BFB84, 0x9A0CFB84, 0x9A0DFB84, 0x9A0EFB84, 0x9A0FFB84, 0x9A10FB84, + 0x9A11FB84, 0x9A12FB84, 0x9A13FB84, 0x9A14FB84, 0x9A15FB84, 0x9A16FB84, 0x9A17FB84, 0x9A18FB84, 0x9A19FB84, 0x9A1AFB84, 0x9A1BFB84, 0x9A1CFB84, 0x9A1DFB84, 0x9A1EFB84, 0x9A1FFB84, + 0x9A20FB84, 0x9A21FB84, 0x9A22FB84, 0x9A23FB84, 0x9A24FB84, 0x9A25FB84, 0x9A26FB84, 0x9A27FB84, 0x9A28FB84, 0x9A29FB84, 0x9A2AFB84, 0x9A2BFB84, 0x9A2CFB84, 0x9A2DFB84, 0x9A2EFB84, + 0x9A2FFB84, 0x9A30FB84, 0x9A31FB84, 0x9A32FB84, 0x9A33FB84, 0x9A34FB84, 0x9A35FB84, 0x9A36FB84, 0x9A37FB84, 0x9A38FB84, 0x9A39FB84, 0x9A3AFB84, 0x9A3BFB84, 0x9A3CFB84, 0x9A3DFB84, + 0x9A3EFB84, 0x9A3FFB84, 0x9A40FB84, 0x9A41FB84, 0x9A42FB84, 0x9A43FB84, 0x9A44FB84, 0x9A45FB84, 0x9A46FB84, 0x9A47FB84, 0x9A48FB84, 0x9A49FB84, 0x9A4AFB84, 0x9A4BFB84, 0x9A4CFB84, + 0x9A4DFB84, 0x9A4EFB84, 0x9A4FFB84, 0x9A50FB84, 0x9A51FB84, 0x9A52FB84, 0x9A53FB84, 0x9A54FB84, 0x9A55FB84, 0x9A56FB84, 0x9A57FB84, 0x9A58FB84, 0x9A59FB84, 0x9A5AFB84, 0x9A5BFB84, + 0x9A5CFB84, 0x9A5DFB84, 0x9A5EFB84, 0x9A5FFB84, 0x9A60FB84, 0x9A61FB84, 0x9A62FB84, 0x9A63FB84, 0x9A64FB84, 0x9A65FB84, 0x9A66FB84, 0x9A67FB84, 0x9A68FB84, 0x9A69FB84, 0x9A6AFB84, + 0x9A6BFB84, 0x9A6CFB84, 0x9A6DFB84, 0x9A6EFB84, 0x9A6FFB84, 0x9A70FB84, 0x9A71FB84, 0x9A72FB84, 0x9A73FB84, 0x9A74FB84, 0x9A75FB84, 0x9A76FB84, 0x9A77FB84, 0x9A78FB84, 0x9A79FB84, + 0x9A7AFB84, 0x9A7BFB84, 0x9A7CFB84, 0x9A7DFB84, 0x9A7EFB84, 0x9A7FFB84, 0x9A80FB84, 0x9A81FB84, 0x9A82FB84, 0x9A83FB84, 0x9A84FB84, 0x9A85FB84, 0x9A86FB84, 0x9A87FB84, 0x9A88FB84, + 0x9A89FB84, 0x9A8AFB84, 0x9A8BFB84, 0x9A8CFB84, 0x9A8DFB84, 0x9A8EFB84, 0x9A8FFB84, 0x9A90FB84, 0x9A91FB84, 0x9A92FB84, 0x9A93FB84, 0x9A94FB84, 0x9A95FB84, 0x9A96FB84, 0x9A97FB84, + 0x9A98FB84, 0x9A99FB84, 0x9A9AFB84, 0x9A9BFB84, 0x9A9CFB84, 0x9A9DFB84, 0x9A9EFB84, 0x9A9FFB84, 0x9AA0FB84, 0x9AA1FB84, 0x9AA2FB84, 0x9AA3FB84, 0x9AA4FB84, 0x9AA5FB84, 0x9AA6FB84, + 0x9AA7FB84, 0x9AA8FB84, 0x9AA9FB84, 0x9AAAFB84, 0x9AABFB84, 0x9AACFB84, 0x9AADFB84, 0x9AAEFB84, 0x9AAFFB84, 0x9AB0FB84, 0x9AB1FB84, 0x9AB2FB84, 0x9AB3FB84, 0x9AB4FB84, 0x9AB5FB84, + 0x9AB6FB84, 0x9AB7FB84, 0x9AB8FB84, 0x9AB9FB84, 0x9ABAFB84, 0x9ABBFB84, 0x9ABCFB84, 0x9ABDFB84, 0x9ABEFB84, 0x9ABFFB84, 0x9AC0FB84, 0x9AC1FB84, 0x9AC2FB84, 0x9AC3FB84, 0x9AC4FB84, + 0x9AC5FB84, 0x9AC6FB84, 0x9AC7FB84, 0x9AC8FB84, 0x9AC9FB84, 0x9ACAFB84, 0x9ACBFB84, 0x9ACCFB84, 0x9ACDFB84, 0x9ACEFB84, 0x9ACFFB84, 0x9AD0FB84, 0x9AD1FB84, 0x9AD2FB84, 0x9AD3FB84, + 0x9AD4FB84, 0x9AD5FB84, 0x9AD6FB84, 0x9AD7FB84, 0x9AD8FB84, 0x9AD9FB84, 0x9ADAFB84, 0x9ADBFB84, 0x9ADCFB84, 0x9ADDFB84, 0x9ADEFB84, 0x9ADFFB84, 0x9AE0FB84, 0x9AE1FB84, 0x9AE2FB84, + 0x9AE3FB84, 0x9AE4FB84, 0x9AE5FB84, 0x9AE6FB84, 0x9AE7FB84, 0x9AE8FB84, 0x9AE9FB84, 0x9AEAFB84, 0x9AEBFB84, 0x9AECFB84, 0x9AEDFB84, 0x9AEEFB84, 0x9AEFFB84, 0x9AF0FB84, 0x9AF1FB84, + 0x9AF2FB84, 0x9AF3FB84, 0x9AF4FB84, 0x9AF5FB84, 0x9AF6FB84, 0x9AF7FB84, 0x9AF8FB84, 0x9AF9FB84, 0x9AFAFB84, 0x9AFBFB84, 0x9AFCFB84, 0x9AFDFB84, 0x9AFEFB84, 0x9AFFFB84, 0x9B00FB84, + 0x9B01FB84, 0x9B02FB84, 0x9B03FB84, 0x9B04FB84, 0x9B05FB84, 0x9B06FB84, 0x9B07FB84, 0x9B08FB84, 0x9B09FB84, 0x9B0AFB84, 0x9B0BFB84, 0x9B0CFB84, 0x9B0DFB84, 0x9B0EFB84, 0x9B0FFB84, + 0x9B10FB84, 0x9B11FB84, 0x9B12FB84, 0x9B13FB84, 0x9B14FB84, 0x9B15FB84, 0x9B16FB84, 0x9B17FB84, 0x9B18FB84, 0x9B19FB84, 0x9B1AFB84, 0x9B1BFB84, 0x9B1CFB84, 0x9B1DFB84, 0x9B1EFB84, + 0x9B1FFB84, 0x9B20FB84, 0x9B21FB84, 0x9B22FB84, 0x9B23FB84, 0x9B24FB84, 0x9B25FB84, 0x9B26FB84, 0x9B27FB84, 0x9B28FB84, 0x9B29FB84, 0x9B2AFB84, 0x9B2BFB84, 0x9B2CFB84, 0x9B2DFB84, + 0x9B2EFB84, 0x9B2FFB84, 0x9B30FB84, 0x9B31FB84, 0x9B32FB84, 0x9B33FB84, 0x9B34FB84, 0x9B35FB84, 0x9B36FB84, 0x9B37FB84, 0x9B38FB84, 0x9B39FB84, 0x9B3AFB84, 0x9B3BFB84, 0x9B3CFB84, + 0x9B3DFB84, 0x9B3EFB84, 0x9B3FFB84, 0x9B40FB84, 0x9B41FB84, 0x9B42FB84, 0x9B43FB84, 0x9B44FB84, 0x9B45FB84, 0x9B46FB84, 0x9B47FB84, 0x9B48FB84, 0x9B49FB84, 0x9B4AFB84, 0x9B4BFB84, + 0x9B4CFB84, 0x9B4DFB84, 0x9B4EFB84, 0x9B4FFB84, 0x9B50FB84, 0x9B51FB84, 0x9B52FB84, 0x9B53FB84, 0x9B54FB84, 0x9B55FB84, 0x9B56FB84, 0x9B57FB84, 0x9B58FB84, 0x9B59FB84, 0x9B5AFB84, + 0x9B5BFB84, 0x9B5CFB84, 0x9B5DFB84, 0x9B5EFB84, 0x9B5FFB84, 0x9B60FB84, 0x9B61FB84, 0x9B62FB84, 0x9B63FB84, 0x9B64FB84, 0x9B65FB84, 0x9B66FB84, 0x9B67FB84, 0x9B68FB84, 0x9B69FB84, + 0x9B6AFB84, 0x9B6BFB84, 0x9B6CFB84, 0x9B6DFB84, 0x9B6EFB84, 0x9B6FFB84, 0x9B70FB84, 0x9B71FB84, 0x9B72FB84, 0x9B73FB84, 0x9B74FB84, 0x9B75FB84, 0x9B76FB84, 0x9B77FB84, 0x9B78FB84, + 0x9B79FB84, 0x9B7AFB84, 0x9B7BFB84, 0x9B7CFB84, 0x9B7DFB84, 0x9B7EFB84, 0x9B7FFB84, 0x9B80FB84, 0x9B81FB84, 0x9B82FB84, 0x9B83FB84, 0x9B84FB84, 0x9B85FB84, 0x9B86FB84, 0x9B87FB84, + 0x9B88FB84, 0x9B89FB84, 0x9B8AFB84, 0x9B8BFB84, 0x9B8CFB84, 0x9B8DFB84, 0x9B8EFB84, 0x9B8FFB84, 0x9B90FB84, 0x9B91FB84, 0x9B92FB84, 0x9B93FB84, 0x9B94FB84, 0x9B95FB84, 0x9B96FB84, + 0x9B97FB84, 0x9B98FB84, 0x9B99FB84, 0x9B9AFB84, 0x9B9BFB84, 0x9B9CFB84, 0x9B9DFB84, 0x9B9EFB84, 0x9B9FFB84, 0x9BA0FB84, 0x9BA1FB84, 0x9BA2FB84, 0x9BA3FB84, 0x9BA4FB84, 0x9BA5FB84, + 0x9BA6FB84, 0x9BA7FB84, 0x9BA8FB84, 0x9BA9FB84, 0x9BAAFB84, 0x9BABFB84, 0x9BACFB84, 0x9BADFB84, 0x9BAEFB84, 0x9BAFFB84, 0x9BB0FB84, 0x9BB1FB84, 0x9BB2FB84, 0x9BB3FB84, 0x9BB4FB84, + 0x9BB5FB84, 0x9BB6FB84, 0x9BB7FB84, 0x9BB8FB84, 0x9BB9FB84, 0x9BBAFB84, 0x9BBBFB84, 0x9BBCFB84, 0x9BBDFB84, 0x9BBEFB84, 0x9BBFFB84, 0x9BC0FB84, 0x9BC1FB84, 0x9BC2FB84, 0x9BC3FB84, + 0x9BC4FB84, 0x9BC5FB84, 0x9BC6FB84, 0x9BC7FB84, 0x9BC8FB84, 0x9BC9FB84, 0x9BCAFB84, 0x9BCBFB84, 0x9BCCFB84, 0x9BCDFB84, 0x9BCEFB84, 0x9BCFFB84, 0x9BD0FB84, 0x9BD1FB84, 0x9BD2FB84, + 0x9BD3FB84, 0x9BD4FB84, 0x9BD5FB84, 0x9BD6FB84, 0x9BD7FB84, 0x9BD8FB84, 0x9BD9FB84, 0x9BDAFB84, 0x9BDBFB84, 0x9BDCFB84, 0x9BDDFB84, 0x9BDEFB84, 0x9BDFFB84, 0x9BE0FB84, 0x9BE1FB84, + 0x9BE2FB84, 0x9BE3FB84, 0x9BE4FB84, 0x9BE5FB84, 0x9BE6FB84, 0x9BE7FB84, 0x9BE8FB84, 0x9BE9FB84, 0x9BEAFB84, 0x9BEBFB84, 0x9BECFB84, 0x9BEDFB84, 0x9BEEFB84, 0x9BEFFB84, 0x9BF0FB84, + 0x9BF1FB84, 0x9BF2FB84, 0x9BF3FB84, 0x9BF4FB84, 0x9BF5FB84, 0x9BF6FB84, 0x9BF7FB84, 0x9BF8FB84, 0x9BF9FB84, 0x9BFAFB84, 0x9BFBFB84, 0x9BFCFB84, 0x9BFDFB84, 0x9BFEFB84, 0x9BFFFB84, + 0x9C00FB84, 0x9C01FB84, 0x9C02FB84, 0x9C03FB84, 0x9C04FB84, 0x9C05FB84, 0x9C06FB84, 0x9C07FB84, 0x9C08FB84, 0x9C09FB84, 0x9C0AFB84, 0x9C0BFB84, 0x9C0CFB84, 0x9C0DFB84, 0x9C0EFB84, + 0x9C0FFB84, 0x9C10FB84, 0x9C11FB84, 0x9C12FB84, 0x9C13FB84, 0x9C14FB84, 0x9C15FB84, 0x9C16FB84, 0x9C17FB84, 0x9C18FB84, 0x9C19FB84, 0x9C1AFB84, 0x9C1BFB84, 0x9C1CFB84, 0x9C1DFB84, + 0x9C1EFB84, 0x9C1FFB84, 0x9C20FB84, 0x9C21FB84, 0x9C22FB84, 0x9C23FB84, 0x9C24FB84, 0x9C25FB84, 0x9C26FB84, 0x9C27FB84, 0x9C28FB84, 0x9C29FB84, 0x9C2AFB84, 0x9C2BFB84, 0x9C2CFB84, + 0x9C2DFB84, 0x9C2EFB84, 0x9C2FFB84, 0x9C30FB84, 0x9C31FB84, 0x9C32FB84, 0x9C33FB84, 0x9C34FB84, 0x9C35FB84, 0x9C36FB84, 0x9C37FB84, 0x9C38FB84, 0x9C39FB84, 0x9C3AFB84, 0x9C3BFB84, + 0x9C3CFB84, 0x9C3DFB84, 0x9C3EFB84, 0x9C3FFB84, 0x9C40FB84, 0x9C41FB84, 0x9C42FB84, 0x9C43FB84, 0x9C44FB84, 0x9C45FB84, 0x9C46FB84, 0x9C47FB84, 0x9C48FB84, 0x9C49FB84, 0x9C4AFB84, + 0x9C4BFB84, 0x9C4CFB84, 0x9C4DFB84, 0x9C4EFB84, 0x9C4FFB84, 0x9C50FB84, 0x9C51FB84, 0x9C52FB84, 0x9C53FB84, 0x9C54FB84, 0x9C55FB84, 0x9C56FB84, 0x9C57FB84, 0x9C58FB84, 0x9C59FB84, + 0x9C5AFB84, 0x9C5BFB84, 0x9C5CFB84, 0x9C5DFB84, 0x9C5EFB84, 0x9C5FFB84, 0x9C60FB84, 0x9C61FB84, 0x9C62FB84, 0x9C63FB84, 0x9C64FB84, 0x9C65FB84, 0x9C66FB84, 0x9C67FB84, 0x9C68FB84, + 0x9C69FB84, 0x9C6AFB84, 0x9C6BFB84, 0x9C6CFB84, 0x9C6DFB84, 0x9C6EFB84, 0x9C6FFB84, 0x9C70FB84, 0x9C71FB84, 0x9C72FB84, 0x9C73FB84, 0x9C74FB84, 0x9C75FB84, 0x9C76FB84, 0x9C77FB84, + 0x9C78FB84, 0x9C79FB84, 0x9C7AFB84, 0x9C7BFB84, 0x9C7CFB84, 0x9C7DFB84, 0x9C7EFB84, 0x9C7FFB84, 0x9C80FB84, 0x9C81FB84, 0x9C82FB84, 0x9C83FB84, 0x9C84FB84, 0x9C85FB84, 0x9C86FB84, + 0x9C87FB84, 0x9C88FB84, 0x9C89FB84, 0x9C8AFB84, 0x9C8BFB84, 0x9C8CFB84, 0x9C8DFB84, 0x9C8EFB84, 0x9C8FFB84, 0x9C90FB84, 0x9C91FB84, 0x9C92FB84, 0x9C93FB84, 0x9C94FB84, 0x9C95FB84, + 0x9C96FB84, 0x9C97FB84, 0x9C98FB84, 0x9C99FB84, 0x9C9AFB84, 0x9C9BFB84, 0x9C9CFB84, 0x9C9DFB84, 0x9C9EFB84, 0x9C9FFB84, 0x9CA0FB84, 0x9CA1FB84, 0x9CA2FB84, 0x9CA3FB84, 0x9CA4FB84, + 0x9CA5FB84, 0x9CA6FB84, 0x9CA7FB84, 0x9CA8FB84, 0x9CA9FB84, 0x9CAAFB84, 0x9CABFB84, 0x9CACFB84, 0x9CADFB84, 0x9CAEFB84, 0x9CAFFB84, 0x9CB0FB84, 0x9CB1FB84, 0x9CB2FB84, 0x9CB3FB84, + 0x9CB4FB84, 0x9CB5FB84, 0x9CB6FB84, 0x9CB7FB84, 0x9CB8FB84, 0x9CB9FB84, 0x9CBAFB84, 0x9CBBFB84, 0x9CBCFB84, 0x9CBDFB84, 0x9CBEFB84, 0x9CBFFB84, 0x9CC0FB84, 0x9CC1FB84, 0x9CC2FB84, + 0x9CC3FB84, 0x9CC4FB84, 0x9CC5FB84, 0x9CC6FB84, 0x9CC7FB84, 0x9CC8FB84, 0x9CC9FB84, 0x9CCAFB84, 0x9CCBFB84, 0x9CCCFB84, 0x9CCDFB84, 0x9CCEFB84, 0x9CCFFB84, 0x9CD0FB84, 0x9CD1FB84, + 0x9CD2FB84, 0x9CD3FB84, 0x9CD4FB84, 0x9CD5FB84, 0x9CD6FB84, 0x9CD7FB84, 0x9CD8FB84, 0x9CD9FB84, 0x9CDAFB84, 0x9CDBFB84, 0x9CDCFB84, 0x9CDDFB84, 0x9CDEFB84, 0x9CDFFB84, 0x9CE0FB84, + 0x9CE1FB84, 0x9CE2FB84, 0x9CE3FB84, 0x9CE4FB84, 0x9CE5FB84, 0x9CE6FB84, 0x9CE7FB84, 0x9CE8FB84, 0x9CE9FB84, 0x9CEAFB84, 0x9CEBFB84, 0x9CECFB84, 0x9CEDFB84, 0x9CEEFB84, 0x9CEFFB84, + 0x9CF0FB84, 0x9CF1FB84, 0x9CF2FB84, 0x9CF3FB84, 0x9CF4FB84, 0x9CF5FB84, 0x9CF6FB84, 0x9CF7FB84, 0x9CF8FB84, 0x9CF9FB84, 0x9CFAFB84, 0x9CFBFB84, 0x9CFCFB84, 0x9CFDFB84, 0x9CFEFB84, + 0x9CFFFB84, 0x9D00FB84, 0x9D01FB84, 0x9D02FB84, 0x9D03FB84, 0x9D04FB84, 0x9D05FB84, 0x9D06FB84, 0x9D07FB84, 0x9D08FB84, 0x9D09FB84, 0x9D0AFB84, 0x9D0BFB84, 0x9D0CFB84, 0x9D0DFB84, + 0x9D0EFB84, 0x9D0FFB84, 0x9D10FB84, 0x9D11FB84, 0x9D12FB84, 0x9D13FB84, 0x9D14FB84, 0x9D15FB84, 0x9D16FB84, 0x9D17FB84, 0x9D18FB84, 0x9D19FB84, 0x9D1AFB84, 0x9D1BFB84, 0x9D1CFB84, + 0x9D1DFB84, 0x9D1EFB84, 0x9D1FFB84, 0x9D20FB84, 0x9D21FB84, 0x9D22FB84, 0x9D23FB84, 0x9D24FB84, 0x9D25FB84, 0x9D26FB84, 0x9D27FB84, 0x9D28FB84, 0x9D29FB84, 0x9D2AFB84, 0x9D2BFB84, + 0x9D2CFB84, 0x9D2DFB84, 0x9D2EFB84, 0x9D2FFB84, 0x9D30FB84, 0x9D31FB84, 0x9D32FB84, 0x9D33FB84, 0x9D34FB84, 0x9D35FB84, 0x9D36FB84, 0x9D37FB84, 0x9D38FB84, 0x9D39FB84, 0x9D3AFB84, + 0x9D3BFB84, 0x9D3CFB84, 0x9D3DFB84, 0x9D3EFB84, 0x9D3FFB84, 0x9D40FB84, 0x9D41FB84, 0x9D42FB84, 0x9D43FB84, 0x9D44FB84, 0x9D45FB84, 0x9D46FB84, 0x9D47FB84, 0x9D48FB84, 0x9D49FB84, + 0x9D4AFB84, 0x9D4BFB84, 0x9D4CFB84, 0x9D4DFB84, 0x9D4EFB84, 0x9D4FFB84, 0x9D50FB84, 0x9D51FB84, 0x9D52FB84, 0x9D53FB84, 0x9D54FB84, 0x9D55FB84, 0x9D56FB84, 0x9D57FB84, 0x9D58FB84, + 0x9D59FB84, 0x9D5AFB84, 0x9D5BFB84, 0x9D5CFB84, 0x9D5DFB84, 0x9D5EFB84, 0x9D5FFB84, 0x9D60FB84, 0x9D61FB84, 0x9D62FB84, 0x9D63FB84, 0x9D64FB84, 0x9D65FB84, 0x9D66FB84, 0x9D67FB84, + 0x9D68FB84, 0x9D69FB84, 0x9D6AFB84, 0x9D6BFB84, 0x9D6CFB84, 0x9D6DFB84, 0x9D6EFB84, 0x9D6FFB84, 0x9D70FB84, 0x9D71FB84, 0x9D72FB84, 0x9D73FB84, 0x9D74FB84, 0x9D75FB84, 0x9D76FB84, + 0x9D77FB84, 0x9D78FB84, 0x9D79FB84, 0x9D7AFB84, 0x9D7BFB84, 0x9D7CFB84, 0x9D7DFB84, 0x9D7EFB84, 0x9D7FFB84, 0x9D80FB84, 0x9D81FB84, 0x9D82FB84, 0x9D83FB84, 0x9D84FB84, 0x9D85FB84, + 0x9D86FB84, 0x9D87FB84, 0x9D88FB84, 0x9D89FB84, 0x9D8AFB84, 0x9D8BFB84, 0x9D8CFB84, 0x9D8DFB84, 0x9D8EFB84, 0x9D8FFB84, 0x9D90FB84, 0x9D91FB84, 0x9D92FB84, 0x9D93FB84, 0x9D94FB84, + 0x9D95FB84, 0x9D96FB84, 0x9D97FB84, 0x9D98FB84, 0x9D99FB84, 0x9D9AFB84, 0x9D9BFB84, 0x9D9CFB84, 0x9D9DFB84, 0x9D9EFB84, 0x9D9FFB84, 0x9DA0FB84, 0x9DA1FB84, 0x9DA2FB84, 0x9DA3FB84, + 0x9DA4FB84, 0x9DA5FB84, 0x9DA6FB84, 0x9DA7FB84, 0x9DA8FB84, 0x9DA9FB84, 0x9DAAFB84, 0x9DABFB84, 0x9DACFB84, 0x9DADFB84, 0x9DAEFB84, 0x9DAFFB84, 0x9DB0FB84, 0x9DB1FB84, 0x9DB2FB84, + 0x9DB3FB84, 0x9DB4FB84, 0x9DB5FB84, 0x9DB6FB84, 0x9DB7FB84, 0x9DB8FB84, 0x9DB9FB84, 0x9DBAFB84, 0x9DBBFB84, 0x9DBCFB84, 0x9DBDFB84, 0x9DBEFB84, 0x9DBFFB84, 0x9DC0FB84, 0x9DC1FB84, + 0x9DC2FB84, 0x9DC3FB84, 0x9DC4FB84, 0x9DC5FB84, 0x9DC6FB84, 0x9DC7FB84, 0x9DC8FB84, 0x9DC9FB84, 0x9DCAFB84, 0x9DCBFB84, 0x9DCCFB84, 0x9DCDFB84, 0x9DCEFB84, 0x9DCFFB84, 0x9DD0FB84, + 0x9DD1FB84, 0x9DD2FB84, 0x9DD3FB84, 0x9DD4FB84, 0x9DD5FB84, 0x9DD6FB84, 0x9DD7FB84, 0x9DD8FB84, 0x9DD9FB84, 0x9DDAFB84, 0x9DDBFB84, 0x9DDCFB84, 0x9DDDFB84, 0x9DDEFB84, 0x9DDFFB84, + 0x9DE0FB84, 0x9DE1FB84, 0x9DE2FB84, 0x9DE3FB84, 0x9DE4FB84, 0x9DE5FB84, 0x9DE6FB84, 0x9DE7FB84, 0x9DE8FB84, 0x9DE9FB84, 0x9DEAFB84, 0x9DEBFB84, 0x9DECFB84, 0x9DEDFB84, 0x9DEEFB84, + 0x9DEFFB84, 0x9DF0FB84, 0x9DF1FB84, 0x9DF2FB84, 0x9DF3FB84, 0x9DF4FB84, 0x9DF5FB84, 0x9DF6FB84, 0x9DF7FB84, 0x9DF8FB84, 0x9DF9FB84, 0x9DFAFB84, 0x9DFBFB84, 0x9DFCFB84, 0x9DFDFB84, + 0x9DFEFB84, 0x9DFFFB84, 0x9E00FB84, 0x9E01FB84, 0x9E02FB84, 0x9E03FB84, 0x9E04FB84, 0x9E05FB84, 0x9E06FB84, 0x9E07FB84, 0x9E08FB84, 0x9E09FB84, 0x9E0AFB84, 0x9E0BFB84, 0x9E0CFB84, + 0x9E0DFB84, 0x9E0EFB84, 0x9E0FFB84, 0x9E10FB84, 0x9E11FB84, 0x9E12FB84, 0x9E13FB84, 0x9E14FB84, 0x9E15FB84, 0x9E16FB84, 0x9E17FB84, 0x9E18FB84, 0x9E19FB84, 0x9E1AFB84, 0x9E1BFB84, + 0x9E1CFB84, 0x9E1DFB84, 0x9E1EFB84, 0x9E1FFB84, 0x9E20FB84, 0x9E21FB84, 0x9E22FB84, 0x9E23FB84, 0x9E24FB84, 0x9E25FB84, 0x9E26FB84, 0x9E27FB84, 0x9E28FB84, 0x9E29FB84, 0x9E2AFB84, + 0x9E2BFB84, 0x9E2CFB84, 0x9E2DFB84, 0x9E2EFB84, 0x9E2FFB84, 0x9E30FB84, 0x9E31FB84, 0x9E32FB84, 0x9E33FB84, 0x9E34FB84, 0x9E35FB84, 0x9E36FB84, 0x9E37FB84, 0x9E38FB84, 0x9E39FB84, + 0x9E3AFB84, 0x9E3BFB84, 0x9E3CFB84, 0x9E3DFB84, 0x9E3EFB84, 0x9E3FFB84, 0x9E40FB84, 0x9E41FB84, 0x9E42FB84, 0x9E43FB84, 0x9E44FB84, 0x9E45FB84, 0x9E46FB84, 0x9E47FB84, 0x9E48FB84, + 0x9E49FB84, 0x9E4AFB84, 0x9E4BFB84, 0x9E4CFB84, 0x9E4DFB84, 0x9E4EFB84, 0x9E4FFB84, 0x9E50FB84, 0x9E51FB84, 0x9E52FB84, 0x9E53FB84, 0x9E54FB84, 0x9E55FB84, 0x9E56FB84, 0x9E57FB84, + 0x9E58FB84, 0x9E59FB84, 0x9E5AFB84, 0x9E5BFB84, 0x9E5CFB84, 0x9E5DFB84, 0x9E5EFB84, 0x9E5FFB84, 0x9E60FB84, 0x9E61FB84, 0x9E62FB84, 0x9E63FB84, 0x9E64FB84, 0x9E65FB84, 0x9E66FB84, + 0x9E67FB84, 0x9E68FB84, 0x9E69FB84, 0x9E6AFB84, 0x9E6BFB84, 0x9E6CFB84, 0x9E6DFB84, 0x9E6EFB84, 0x9E6FFB84, 0x9E70FB84, 0x9E71FB84, 0x9E72FB84, 0x9E73FB84, 0x9E74FB84, 0x9E75FB84, + 0x9E76FB84, 0x9E77FB84, 0x9E78FB84, 0x9E79FB84, 0x9E7AFB84, 0x9E7BFB84, 0x9E7CFB84, 0x9E7DFB84, 0x9E7EFB84, 0x9E7FFB84, 0x9E80FB84, 0x9E81FB84, 0x9E82FB84, 0x9E83FB84, 0x9E84FB84, + 0x9E85FB84, 0x9E86FB84, 0x9E87FB84, 0x9E88FB84, 0x9E89FB84, 0x9E8AFB84, 0x9E8BFB84, 0x9E8CFB84, 0x9E8DFB84, 0x9E8EFB84, 0x9E8FFB84, 0x9E90FB84, 0x9E91FB84, 0x9E92FB84, 0x9E93FB84, + 0x9E94FB84, 0x9E95FB84, 0x9E96FB84, 0x9E97FB84, 0x9E98FB84, 0x9E99FB84, 0x9E9AFB84, 0x9E9BFB84, 0x9E9CFB84, 0x9E9DFB84, 0x9E9EFB84, 0x9E9FFB84, 0x9EA0FB84, 0x9EA1FB84, 0x9EA2FB84, + 0x9EA3FB84, 0x9EA4FB84, 0x9EA5FB84, 0x9EA6FB84, 0x9EA7FB84, 0x9EA8FB84, 0x9EA9FB84, 0x9EAAFB84, 0x9EABFB84, 0x9EACFB84, 0x9EADFB84, 0x9EAEFB84, 0x9EAFFB84, 0x9EB0FB84, 0x9EB1FB84, + 0x9EB2FB84, 0x9EB3FB84, 0x9EB4FB84, 0x9EB5FB84, 0x9EB6FB84, 0x9EB7FB84, 0x9EB8FB84, 0x9EB9FB84, 0x9EBAFB84, 0x9EBBFB84, 0x9EBCFB84, 0x9EBDFB84, 0x9EBEFB84, 0x9EBFFB84, 0x9EC0FB84, + 0x9EC1FB84, 0x9EC2FB84, 0x9EC3FB84, 0x9EC4FB84, 0x9EC5FB84, 0x9EC6FB84, 0x9EC7FB84, 0x9EC8FB84, 0x9EC9FB84, 0x9ECAFB84, 0x9ECBFB84, 0x9ECCFB84, 0x9ECDFB84, 0x9ECEFB84, 0x9ECFFB84, + 0x9ED0FB84, 0x9ED1FB84, 0x9ED2FB84, 0x9ED3FB84, 0x9ED4FB84, 0x9ED5FB84, 0x9ED6FB84, 0x9ED7FB84, 0x9ED8FB84, 0x9ED9FB84, 0x9EDAFB84, 0x9EDBFB84, 0x9EDCFB84, 0x9EDDFB84, 0x9EDEFB84, + 0x9EDFFB84, 0x9EE0FB84, 0x9EE1FB84, 0x9EE2FB84, 0x9EE3FB84, 0x9EE4FB84, 0x9EE5FB84, 0x9EE6FB84, 0x9EE7FB84, 0x9EE8FB84, 0x9EE9FB84, 0x9EEAFB84, 0x9EEBFB84, 0x9EECFB84, 0x9EEDFB84, + 0x9EEEFB84, 0x9EEFFB84, 0x9EF0FB84, 0x9EF1FB84, 0x9EF2FB84, 0x9EF3FB84, 0x9EF4FB84, 0x9EF5FB84, 0x9EF6FB84, 0x9EF7FB84, 0x9EF8FB84, 0x9EF9FB84, 0x9EFAFB84, 0x9EFBFB84, 0x9EFCFB84, + 0x9EFDFB84, 0x9EFEFB84, 0x9EFFFB84, 0x9F00FB84, 0x9F01FB84, 0x9F02FB84, 0x9F03FB84, 0x9F04FB84, 0x9F05FB84, 0x9F06FB84, 0x9F07FB84, 0x9F08FB84, 0x9F09FB84, 0x9F0AFB84, 0x9F0BFB84, + 0x9F0CFB84, 0x9F0DFB84, 0x9F0EFB84, 0x9F0FFB84, 0x9F10FB84, 0x9F11FB84, 0x9F12FB84, 0x9F13FB84, 0x9F14FB84, 0x9F15FB84, 0x9F16FB84, 0x9F17FB84, 0x9F18FB84, 0x9F19FB84, 0x9F1AFB84, + 0x9F1BFB84, 0x9F1CFB84, 0x9F1DFB84, 0x9F1EFB84, 0x9F1FFB84, 0x9F20FB84, 0x9F21FB84, 0x9F22FB84, 0x9F23FB84, 0x9F24FB84, 0x9F25FB84, 0x9F26FB84, 0x9F27FB84, 0x9F28FB84, 0x9F29FB84, + 0x9F2AFB84, 0x9F2BFB84, 0x9F2CFB84, 0x9F2DFB84, 0x9F2EFB84, 0x9F2FFB84, 0x9F30FB84, 0x9F31FB84, 0x9F32FB84, 0x9F33FB84, 0x9F34FB84, 0x9F35FB84, 0x9F36FB84, 0x9F37FB84, 0x9F38FB84, + 0x9F39FB84, 0x9F3AFB84, 0x9F3BFB84, 0x9F3CFB84, 0x9F3DFB84, 0x9F3EFB84, 0x9F3FFB84, 0x9F40FB84, 0x9F41FB84, 0x9F42FB84, 0x9F43FB84, 0x9F44FB84, 0x9F45FB84, 0x9F46FB84, 0x9F47FB84, + 0x9F48FB84, 0x9F49FB84, 0x9F4AFB84, 0x9F4BFB84, 0x9F4CFB84, 0x9F4DFB84, 0x9F4EFB84, 0x9F4FFB84, 0x9F50FB84, 0x9F51FB84, 0x9F52FB84, 0x9F53FB84, 0x9F54FB84, 0x9F55FB84, 0x9F56FB84, + 0x9F57FB84, 0x9F58FB84, 0x9F59FB84, 0x9F5AFB84, 0x9F5BFB84, 0x9F5CFB84, 0x9F5DFB84, 0x9F5EFB84, 0x9F5FFB84, 0x9F60FB84, 0x9F61FB84, 0x9F62FB84, 0x9F63FB84, 0x9F64FB84, 0x9F65FB84, + 0x9F66FB84, 0x9F67FB84, 0x9F68FB84, 0x9F69FB84, 0x9F6AFB84, 0x9F6BFB84, 0x9F6CFB84, 0x9F6DFB84, 0x9F6EFB84, 0x9F6FFB84, 0x9F70FB84, 0x9F71FB84, 0x9F72FB84, 0x9F73FB84, 0x9F74FB84, + 0x9F75FB84, 0x9F76FB84, 0x9F77FB84, 0x9F78FB84, 0x9F79FB84, 0x9F7AFB84, 0x9F7BFB84, 0x9F7CFB84, 0x9F7DFB84, 0x9F7EFB84, 0x9F7FFB84, 0x9F80FB84, 0x9F81FB84, 0x9F82FB84, 0x9F83FB84, + 0x9F84FB84, 0x9F85FB84, 0x9F86FB84, 0x9F87FB84, 0x9F88FB84, 0x9F89FB84, 0x9F8AFB84, 0x9F8BFB84, 0x9F8CFB84, 0x9F8DFB84, 0x9F8EFB84, 0x9F8FFB84, 0x9F90FB84, 0x9F91FB84, 0x9F92FB84, + 0x9F93FB84, 0x9F94FB84, 0x9F95FB84, 0x9F96FB84, 0x9F97FB84, 0x9F98FB84, 0x9F99FB84, 0x9F9AFB84, 0x9F9BFB84, 0x9F9CFB84, 0x9F9DFB84, 0x9F9EFB84, 0x9F9FFB84, 0x9FA0FB84, 0x9FA1FB84, + 0x9FA2FB84, 0x9FA3FB84, 0x9FA4FB84, 0x9FA5FB84, 0x9FA6FB84, 0x9FA7FB84, 0x9FA8FB84, 0x9FA9FB84, 0x9FAAFB84, 0x9FABFB84, 0x9FACFB84, 0x9FADFB84, 0x9FAEFB84, 0x9FAFFB84, 0x9FB0FB84, + 0x9FB1FB84, 0x9FB2FB84, 0x9FB3FB84, 0x9FB4FB84, 0x9FB5FB84, 0x9FB6FB84, 0x9FB7FB84, 0x9FB8FB84, 0x9FB9FB84, 0x9FBAFB84, 0x9FBBFB84, 0x9FBCFB84, 0x9FBDFB84, 0x9FBEFB84, 0x9FBFFB84, + 0x9FC0FB84, 0x9FC1FB84, 0x9FC2FB84, 0x9FC3FB84, 0x9FC4FB84, 0x9FC5FB84, 0x9FC6FB84, 0x9FC7FB84, 0x9FC8FB84, 0x9FC9FB84, 0x9FCAFB84, 0x9FCBFB84, 0x9FCCFB84, 0x9FCDFB84, 0x9FCEFB84, + 0x9FCFFB84, 0x9FD0FB84, 0x9FD1FB84, 0x9FD2FB84, 0x9FD3FB84, 0x9FD4FB84, 0x9FD5FB84, 0x9FD6FB84, 0x9FD7FB84, 0x9FD8FB84, 0x9FD9FB84, 0x9FDAFB84, 0x9FDBFB84, 0x9FDCFB84, 0x9FDDFB84, + 0x9FDEFB84, 0x9FDFFB84, 0x9FE0FB84, 0x9FE1FB84, 0x9FE2FB84, 0x9FE3FB84, 0x9FE4FB84, 0x9FE5FB84, 0x9FE6FB84, 0x9FE7FB84, 0x9FE8FB84, 0x9FE9FB84, 0x9FEAFB84, 0x9FEBFB84, 0x9FECFB84, + 0x9FEDFB84, 0x9FEEFB84, 0x9FEFFB84, 0x9FF0FB84, 0x9FF1FB84, 0x9FF2FB84, 0x9FF3FB84, 0x9FF4FB84, 0x9FF5FB84, 0x9FF6FB84, 0x9FF7FB84, 0x9FF8FB84, 0x9FF9FB84, 0x9FFAFB84, 0x9FFBFB84, + 0x9FFCFB84, 0x9FFDFB84, 0x9FFEFB84, 0x9FFFFB84, 0xA000FB84, 0xA001FB84, 0xA002FB84, 0xA003FB84, 0xA004FB84, 0xA005FB84, 0xA006FB84, 0xA007FB84, 0xA008FB84, 0xA009FB84, 0xA00AFB84, + 0xA00BFB84, 0xA00CFB84, 0xA00DFB84, 0xA00EFB84, 0xA00FFB84, 0xA010FB84, 0xA011FB84, 0xA012FB84, 0xA013FB84, 0xA014FB84, 0xA015FB84, 0xA016FB84, 0xA017FB84, 0xA018FB84, 0xA019FB84, + 0xA01AFB84, 0xA01BFB84, 0xA01CFB84, 0xA01DFB84, 0xA01EFB84, 0xA01FFB84, 0xA020FB84, 0xA021FB84, 0xA022FB84, 0xA023FB84, 0xA024FB84, 0xA025FB84, 0xA026FB84, 0xA027FB84, 0xA028FB84, + 0xA029FB84, 0xA02AFB84, 0xA02BFB84, 0xA02CFB84, 0xA02DFB84, 0xA02EFB84, 0xA02FFB84, 0xA030FB84, 0xA031FB84, 0xA032FB84, 0xA033FB84, 0xA034FB84, 0xA035FB84, 0xA036FB84, 0xA037FB84, + 0xA038FB84, 0xA039FB84, 0xA03AFB84, 0xA03BFB84, 0xA03CFB84, 0xA03DFB84, 0xA03EFB84, 0xA03FFB84, 0xA040FB84, 0xA041FB84, 0xA042FB84, 0xA043FB84, 0xA044FB84, 0xA045FB84, 0xA046FB84, + 0xA047FB84, 0xA048FB84, 0xA049FB84, 0xA04AFB84, 0xA04BFB84, 0xA04CFB84, 0xA04DFB84, 0xA04EFB84, 0xA04FFB84, 0xA050FB84, 0xA051FB84, 0xA052FB84, 0xA053FB84, 0xA054FB84, 0xA055FB84, + 0xA056FB84, 0xA057FB84, 0xA058FB84, 0xA059FB84, 0xA05AFB84, 0xA05BFB84, 0xA05CFB84, 0xA05DFB84, 0xA05EFB84, 0xA05FFB84, 0xA060FB84, 0xA061FB84, 0xA062FB84, 0xA063FB84, 0xA064FB84, + 0xA065FB84, 0xA066FB84, 0xA067FB84, 0xA068FB84, 0xA069FB84, 0xA06AFB84, 0xA06BFB84, 0xA06CFB84, 0xA06DFB84, 0xA06EFB84, 0xA06FFB84, 0xA070FB84, 0xA071FB84, 0xA072FB84, 0xA073FB84, + 0xA074FB84, 0xA075FB84, 0xA076FB84, 0xA077FB84, 0xA078FB84, 0xA079FB84, 0xA07AFB84, 0xA07BFB84, 0xA07CFB84, 0xA07DFB84, 0xA07EFB84, 0xA07FFB84, 0xA080FB84, 0xA081FB84, 0xA082FB84, + 0xA083FB84, 0xA084FB84, 0xA085FB84, 0xA086FB84, 0xA087FB84, 0xA088FB84, 0xA089FB84, 0xA08AFB84, 0xA08BFB84, 0xA08CFB84, 0xA08DFB84, 0xA08EFB84, 0xA08FFB84, 0xA090FB84, 0xA091FB84, + 0xA092FB84, 0xA093FB84, 0xA094FB84, 0xA095FB84, 0xA096FB84, 0xA097FB84, 0xA098FB84, 0xA099FB84, 0xA09AFB84, 0xA09BFB84, 0xA09CFB84, 0xA09DFB84, 0xA09EFB84, 0xA09FFB84, 0xA0A0FB84, + 0xA0A1FB84, 0xA0A2FB84, 0xA0A3FB84, 0xA0A4FB84, 0xA0A5FB84, 0xA0A6FB84, 0xA0A7FB84, 0xA0A8FB84, 0xA0A9FB84, 0xA0AAFB84, 0xA0ABFB84, 0xA0ACFB84, 0xA0ADFB84, 0xA0AEFB84, 0xA0AFFB84, + 0xA0B0FB84, 0xA0B1FB84, 0xA0B2FB84, 0xA0B3FB84, 0xA0B4FB84, 0xA0B5FB84, 0xA0B6FB84, 0xA0B7FB84, 0xA0B8FB84, 0xA0B9FB84, 0xA0BAFB84, 0xA0BBFB84, 0xA0BCFB84, 0xA0BDFB84, 0xA0BEFB84, + 0xA0BFFB84, 0xA0C0FB84, 0xA0C1FB84, 0xA0C2FB84, 0xA0C3FB84, 0xA0C4FB84, 0xA0C5FB84, 0xA0C6FB84, 0xA0C7FB84, 0xA0C8FB84, 0xA0C9FB84, 0xA0CAFB84, 0xA0CBFB84, 0xA0CCFB84, 0xA0CDFB84, + 0xA0CEFB84, 0xA0CFFB84, 0xA0D0FB84, 0xA0D1FB84, 0xA0D2FB84, 0xA0D3FB84, 0xA0D4FB84, 0xA0D5FB84, 0xA0D6FB84, 0xA0D7FB84, 0xA0D8FB84, 0xA0D9FB84, 0xA0DAFB84, 0xA0DBFB84, 0xA0DCFB84, + 0xA0DDFB84, 0xA0DEFB84, 0xA0DFFB84, 0xA0E0FB84, 0xA0E1FB84, 0xA0E2FB84, 0xA0E3FB84, 0xA0E4FB84, 0xA0E5FB84, 0xA0E6FB84, 0xA0E7FB84, 0xA0E8FB84, 0xA0E9FB84, 0xA0EAFB84, 0xA0EBFB84, + 0xA0ECFB84, 0xA0EDFB84, 0xA0EEFB84, 0xA0EFFB84, 0xA0F0FB84, 0xA0F1FB84, 0xA0F2FB84, 0xA0F3FB84, 0xA0F4FB84, 0xA0F5FB84, 0xA0F6FB84, 0xA0F7FB84, 0xA0F8FB84, 0xA0F9FB84, 0xA0FAFB84, + 0xA0FBFB84, 0xA0FCFB84, 0xA0FDFB84, 0xA0FEFB84, 0xA0FFFB84, 0xA100FB84, 0xA101FB84, 0xA102FB84, 0xA103FB84, 0xA104FB84, 0xA105FB84, 0xA106FB84, 0xA107FB84, 0xA108FB84, 0xA109FB84, + 0xA10AFB84, 0xA10BFB84, 0xA10CFB84, 0xA10DFB84, 0xA10EFB84, 0xA10FFB84, 0xA110FB84, 0xA111FB84, 0xA112FB84, 0xA113FB84, 0xA114FB84, 0xA115FB84, 0xA116FB84, 0xA117FB84, 0xA118FB84, + 0xA119FB84, 0xA11AFB84, 0xA11BFB84, 0xA11CFB84, 0xA11DFB84, 0xA11EFB84, 0xA11FFB84, 0xA120FB84, 0xA121FB84, 0xA122FB84, 0xA123FB84, 0xA124FB84, 0xA125FB84, 0xA126FB84, 0xA127FB84, + 0xA128FB84, 0xA129FB84, 0xA12AFB84, 0xA12BFB84, 0xA12CFB84, 0xA12DFB84, 0xA12EFB84, 0xA12FFB84, 0xA130FB84, 0xA131FB84, 0xA132FB84, 0xA133FB84, 0xA134FB84, 0xA135FB84, 0xA136FB84, + 0xA137FB84, 0xA138FB84, 0xA139FB84, 0xA13AFB84, 0xA13BFB84, 0xA13CFB84, 0xA13DFB84, 0xA13EFB84, 0xA13FFB84, 0xA140FB84, 0xA141FB84, 0xA142FB84, 0xA143FB84, 0xA144FB84, 0xA145FB84, + 0xA146FB84, 0xA147FB84, 0xA148FB84, 0xA149FB84, 0xA14AFB84, 0xA14BFB84, 0xA14CFB84, 0xA14DFB84, 0xA14EFB84, 0xA14FFB84, 0xA150FB84, 0xA151FB84, 0xA152FB84, 0xA153FB84, 0xA154FB84, + 0xA155FB84, 0xA156FB84, 0xA157FB84, 0xA158FB84, 0xA159FB84, 0xA15AFB84, 0xA15BFB84, 0xA15CFB84, 0xA15DFB84, 0xA15EFB84, 0xA15FFB84, 0xA160FB84, 0xA161FB84, 0xA162FB84, 0xA163FB84, + 0xA164FB84, 0xA165FB84, 0xA166FB84, 0xA167FB84, 0xA168FB84, 0xA169FB84, 0xA16AFB84, 0xA16BFB84, 0xA16CFB84, 0xA16DFB84, 0xA16EFB84, 0xA16FFB84, 0xA170FB84, 0xA171FB84, 0xA172FB84, + 0xA173FB84, 0xA174FB84, 0xA175FB84, 0xA176FB84, 0xA177FB84, 0xA178FB84, 0xA179FB84, 0xA17AFB84, 0xA17BFB84, 0xA17CFB84, 0xA17DFB84, 0xA17EFB84, 0xA17FFB84, 0xA180FB84, 0xA181FB84, + 0xA182FB84, 0xA183FB84, 0xA184FB84, 0xA185FB84, 0xA186FB84, 0xA187FB84, 0xA188FB84, 0xA189FB84, 0xA18AFB84, 0xA18BFB84, 0xA18CFB84, 0xA18DFB84, 0xA18EFB84, 0xA18FFB84, 0xA190FB84, + 0xA191FB84, 0xA192FB84, 0xA193FB84, 0xA194FB84, 0xA195FB84, 0xA196FB84, 0xA197FB84, 0xA198FB84, 0xA199FB84, 0xA19AFB84, 0xA19BFB84, 0xA19CFB84, 0xA19DFB84, 0xA19EFB84, 0xA19FFB84, + 0xA1A0FB84, 0xA1A1FB84, 0xA1A2FB84, 0xA1A3FB84, 0xA1A4FB84, 0xA1A5FB84, 0xA1A6FB84, 0xA1A7FB84, 0xA1A8FB84, 0xA1A9FB84, 0xA1AAFB84, 0xA1ABFB84, 0xA1ACFB84, 0xA1ADFB84, 0xA1AEFB84, + 0xA1AFFB84, 0xA1B0FB84, 0xA1B1FB84, 0xA1B2FB84, 0xA1B3FB84, 0xA1B4FB84, 0xA1B5FB84, 0xA1B6FB84, 0xA1B7FB84, 0xA1B8FB84, 0xA1B9FB84, 0xA1BAFB84, 0xA1BBFB84, 0xA1BCFB84, 0xA1BDFB84, + 0xA1BEFB84, 0xA1BFFB84, 0xA1C0FB84, 0xA1C1FB84, 0xA1C2FB84, 0xA1C3FB84, 0xA1C4FB84, 0xA1C5FB84, 0xA1C6FB84, 0xA1C7FB84, 0xA1C8FB84, 0xA1C9FB84, 0xA1CAFB84, 0xA1CBFB84, 0xA1CCFB84, + 0xA1CDFB84, 0xA1CEFB84, 0xA1CFFB84, 0xA1D0FB84, 0xA1D1FB84, 0xA1D2FB84, 0xA1D3FB84, 0xA1D4FB84, 0xA1D5FB84, 0xA1D6FB84, 0xA1D7FB84, 0xA1D8FB84, 0xA1D9FB84, 0xA1DAFB84, 0xA1DBFB84, + 0xA1DCFB84, 0xA1DDFB84, 0xA1DEFB84, 0xA1DFFB84, 0xA1E0FB84, 0xA1E1FB84, 0xA1E2FB84, 0xA1E3FB84, 0xA1E4FB84, 0xA1E5FB84, 0xA1E6FB84, 0xA1E7FB84, 0xA1E8FB84, 0xA1E9FB84, 0xA1EAFB84, + 0xA1EBFB84, 0xA1ECFB84, 0xA1EDFB84, 0xA1EEFB84, 0xA1EFFB84, 0xA1F0FB84, 0xA1F1FB84, 0xA1F2FB84, 0xA1F3FB84, 0xA1F4FB84, 0xA1F5FB84, 0xA1F6FB84, 0xA1F7FB84, 0xA1F8FB84, 0xA1F9FB84, + 0xA1FAFB84, 0xA1FBFB84, 0xA1FCFB84, 0xA1FDFB84, 0xA1FEFB84, 0xA1FFFB84, 0xA200FB84, 0xA201FB84, 0xA202FB84, 0xA203FB84, 0xA204FB84, 0xA205FB84, 0xA206FB84, 0xA207FB84, 0xA208FB84, + 0xA209FB84, 0xA20AFB84, 0xA20BFB84, 0xA20CFB84, 0xA20DFB84, 0xA20EFB84, 0xA20FFB84, 0xA210FB84, 0xA211FB84, 0xA212FB84, 0xA213FB84, 0xA214FB84, 0xA215FB84, 0xA216FB84, 0xA217FB84, + 0xA218FB84, 0xA219FB84, 0xA21AFB84, 0xA21BFB84, 0xA21CFB84, 0xA21DFB84, 0xA21EFB84, 0xA21FFB84, 0xA220FB84, 0xA221FB84, 0xA222FB84, 0xA223FB84, 0xA224FB84, 0xA225FB84, 0xA226FB84, + 0xA227FB84, 0xA228FB84, 0xA229FB84, 0xA22AFB84, 0xA22BFB84, 0xA22CFB84, 0xA22DFB84, 0xA22EFB84, 0xA22FFB84, 0xA230FB84, 0xA231FB84, 0xA232FB84, 0xA233FB84, 0xA234FB84, 0xA235FB84, + 0xA236FB84, 0xA237FB84, 0xA238FB84, 0xA239FB84, 0xA23AFB84, 0xA23BFB84, 0xA23CFB84, 0xA23DFB84, 0xA23EFB84, 0xA23FFB84, 0xA240FB84, 0xA241FB84, 0xA242FB84, 0xA243FB84, 0xA244FB84, + 0xA245FB84, 0xA246FB84, 0xA247FB84, 0xA248FB84, 0xA249FB84, 0xA24AFB84, 0xA24BFB84, 0xA24CFB84, 0xA24DFB84, 0xA24EFB84, 0xA24FFB84, 0xA250FB84, 0xA251FB84, 0xA252FB84, 0xA253FB84, + 0xA254FB84, 0xA255FB84, 0xA256FB84, 0xA257FB84, 0xA258FB84, 0xA259FB84, 0xA25AFB84, 0xA25BFB84, 0xA25CFB84, 0xA25DFB84, 0xA25EFB84, 0xA25FFB84, 0xA260FB84, 0xA261FB84, 0xA262FB84, + 0xA263FB84, 0xA264FB84, 0xA265FB84, 0xA266FB84, 0xA267FB84, 0xA268FB84, 0xA269FB84, 0xA26AFB84, 0xA26BFB84, 0xA26CFB84, 0xA26DFB84, 0xA26EFB84, 0xA26FFB84, 0xA270FB84, 0xA271FB84, + 0xA272FB84, 0xA273FB84, 0xA274FB84, 0xA275FB84, 0xA276FB84, 0xA277FB84, 0xA278FB84, 0xA279FB84, 0xA27AFB84, 0xA27BFB84, 0xA27CFB84, 0xA27DFB84, 0xA27EFB84, 0xA27FFB84, 0xA280FB84, + 0xA281FB84, 0xA282FB84, 0xA283FB84, 0xA284FB84, 0xA285FB84, 0xA286FB84, 0xA287FB84, 0xA288FB84, 0xA289FB84, 0xA28AFB84, 0xA28BFB84, 0xA28CFB84, 0xA28DFB84, 0xA28EFB84, 0xA28FFB84, + 0xA290FB84, 0xA291FB84, 0xA292FB84, 0xA293FB84, 0xA294FB84, 0xA295FB84, 0xA296FB84, 0xA297FB84, 0xA298FB84, 0xA299FB84, 0xA29AFB84, 0xA29BFB84, 0xA29CFB84, 0xA29DFB84, 0xA29EFB84, + 0xA29FFB84, 0xA2A0FB84, 0xA2A1FB84, 0xA2A2FB84, 0xA2A3FB84, 0xA2A4FB84, 0xA2A5FB84, 0xA2A6FB84, 0xA2A7FB84, 0xA2A8FB84, 0xA2A9FB84, 0xA2AAFB84, 0xA2ABFB84, 0xA2ACFB84, 0xA2ADFB84, + 0xA2AEFB84, 0xA2AFFB84, 0xA2B0FB84, 0xA2B1FB84, 0xA2B2FB84, 0xA2B3FB84, 0xA2B4FB84, 0xA2B5FB84, 0xA2B6FB84, 0xA2B7FB84, 0xA2B8FB84, 0xA2B9FB84, 0xA2BAFB84, 0xA2BBFB84, 0xA2BCFB84, + 0xA2BDFB84, 0xA2BEFB84, 0xA2BFFB84, 0xA2C0FB84, 0xA2C1FB84, 0xA2C2FB84, 0xA2C3FB84, 0xA2C4FB84, 0xA2C5FB84, 0xA2C6FB84, 0xA2C7FB84, 0xA2C8FB84, 0xA2C9FB84, 0xA2CAFB84, 0xA2CBFB84, + 0xA2CCFB84, 0xA2CDFB84, 0xA2CEFB84, 0xA2CFFB84, 0xA2D0FB84, 0xA2D1FB84, 0xA2D2FB84, 0xA2D3FB84, 0xA2D4FB84, 0xA2D5FB84, 0xA2D6FB84, 0xA2D7FB84, 0xA2D8FB84, 0xA2D9FB84, 0xA2DAFB84, + 0xA2DBFB84, 0xA2DCFB84, 0xA2DDFB84, 0xA2DEFB84, 0xA2DFFB84, 0xA2E0FB84, 0xA2E1FB84, 0xA2E2FB84, 0xA2E3FB84, 0xA2E4FB84, 0xA2E5FB84, 0xA2E6FB84, 0xA2E7FB84, 0xA2E8FB84, 0xA2E9FB84, + 0xA2EAFB84, 0xA2EBFB84, 0xA2ECFB84, 0xA2EDFB84, 0xA2EEFB84, 0xA2EFFB84, 0xA2F0FB84, 0xA2F1FB84, 0xA2F2FB84, 0xA2F3FB84, 0xA2F4FB84, 0xA2F5FB84, 0xA2F6FB84, 0xA2F7FB84, 0xA2F8FB84, + 0xA2F9FB84, 0xA2FAFB84, 0xA2FBFB84, 0xA2FCFB84, 0xA2FDFB84, 0xA2FEFB84, 0xA2FFFB84, 0xA300FB84, 0xA301FB84, 0xA302FB84, 0xA303FB84, 0xA304FB84, 0xA305FB84, 0xA306FB84, 0xA307FB84, + 0xA308FB84, 0xA309FB84, 0xA30AFB84, 0xA30BFB84, 0xA30CFB84, 0xA30DFB84, 0xA30EFB84, 0xA30FFB84, 0xA310FB84, 0xA311FB84, 0xA312FB84, 0xA313FB84, 0xA314FB84, 0xA315FB84, 0xA316FB84, + 0xA317FB84, 0xA318FB84, 0xA319FB84, 0xA31AFB84, 0xA31BFB84, 0xA31CFB84, 0xA31DFB84, 0xA31EFB84, 0xA31FFB84, 0xA320FB84, 0xA321FB84, 0xA322FB84, 0xA323FB84, 0xA324FB84, 0xA325FB84, + 0xA326FB84, 0xA327FB84, 0xA328FB84, 0xA329FB84, 0xA32AFB84, 0xA32BFB84, 0xA32CFB84, 0xA32DFB84, 0xA32EFB84, 0xA32FFB84, 0xA330FB84, 0xA331FB84, 0xA332FB84, 0xA333FB84, 0xA334FB84, + 0xA335FB84, 0xA336FB84, 0xA337FB84, 0xA338FB84, 0xA339FB84, 0xA33AFB84, 0xA33BFB84, 0xA33CFB84, 0xA33DFB84, 0xA33EFB84, 0xA33FFB84, 0xA340FB84, 0xA341FB84, 0xA342FB84, 0xA343FB84, + 0xA344FB84, 0xA345FB84, 0xA346FB84, 0xA347FB84, 0xA348FB84, 0xA349FB84, 0xA34AFB84, 0xA34BFB84, 0xA34CFB84, 0xA34DFB84, 0xA34EFB84, 0xA34FFB84, 0xA350FB84, 0xA351FB84, 0xA352FB84, + 0xA353FB84, 0xA354FB84, 0xA355FB84, 0xA356FB84, 0xA357FB84, 0xA358FB84, 0xA359FB84, 0xA35AFB84, 0xA35BFB84, 0xA35CFB84, 0xA35DFB84, 0xA35EFB84, 0xA35FFB84, 0xA360FB84, 0xA361FB84, + 0xA362FB84, 0xA363FB84, 0xA364FB84, 0xA365FB84, 0xA366FB84, 0xA367FB84, 0xA368FB84, 0xA369FB84, 0xA36AFB84, 0xA36BFB84, 0xA36CFB84, 0xA36DFB84, 0xA36EFB84, 0xA36FFB84, 0xA370FB84, + 0xA371FB84, 0xA372FB84, 0xA373FB84, 0xA374FB84, 0xA375FB84, 0xA376FB84, 0xA377FB84, 0xA378FB84, 0xA379FB84, 0xA37AFB84, 0xA37BFB84, 0xA37CFB84, 0xA37DFB84, 0xA37EFB84, 0xA37FFB84, + 0xA380FB84, 0xA381FB84, 0xA382FB84, 0xA383FB84, 0xA384FB84, 0xA385FB84, 0xA386FB84, 0xA387FB84, 0xA388FB84, 0xA389FB84, 0xA38AFB84, 0xA38BFB84, 0xA38CFB84, 0xA38DFB84, 0xA38EFB84, + 0xA38FFB84, 0xA390FB84, 0xA391FB84, 0xA392FB84, 0xA393FB84, 0xA394FB84, 0xA395FB84, 0xA396FB84, 0xA397FB84, 0xA398FB84, 0xA399FB84, 0xA39AFB84, 0xA39BFB84, 0xA39CFB84, 0xA39DFB84, + 0xA39EFB84, 0xA39FFB84, 0xA3A0FB84, 0xA3A1FB84, 0xA3A2FB84, 0xA3A3FB84, 0xA3A4FB84, 0xA3A5FB84, 0xA3A6FB84, 0xA3A7FB84, 0xA3A8FB84, 0xA3A9FB84, 0xA3AAFB84, 0xA3ABFB84, 0xA3ACFB84, + 0xA3ADFB84, 0xA3AEFB84, 0xA3AFFB84, 0xA3B0FB84, 0xA3B1FB84, 0xA3B2FB84, 0xA3B3FB84, 0xA3B4FB84, 0xA3B5FB84, 0xA3B6FB84, 0xA3B7FB84, 0xA3B8FB84, 0xA3B9FB84, 0xA3BAFB84, 0xA3BBFB84, + 0xA3BCFB84, 0xA3BDFB84, 0xA3BEFB84, 0xA3BFFB84, 0xA3C0FB84, 0xA3C1FB84, 0xA3C2FB84, 0xA3C3FB84, 0xA3C4FB84, 0xA3C5FB84, 0xA3C6FB84, 0xA3C7FB84, 0xA3C8FB84, 0xA3C9FB84, 0xA3CAFB84, + 0xA3CBFB84, 0xA3CCFB84, 0xA3CDFB84, 0xA3CEFB84, 0xA3CFFB84, 0xA3D0FB84, 0xA3D1FB84, 0xA3D2FB84, 0xA3D3FB84, 0xA3D4FB84, 0xA3D5FB84, 0xA3D6FB84, 0xA3D7FB84, 0xA3D8FB84, 0xA3D9FB84, + 0xA3DAFB84, 0xA3DBFB84, 0xA3DCFB84, 0xA3DDFB84, 0xA3DEFB84, 0xA3DFFB84, 0xA3E0FB84, 0xA3E1FB84, 0xA3E2FB84, 0xA3E3FB84, 0xA3E4FB84, 0xA3E5FB84, 0xA3E6FB84, 0xA3E7FB84, 0xA3E8FB84, + 0xA3E9FB84, 0xA3EAFB84, 0xA3EBFB84, 0xA3ECFB84, 0xA3EDFB84, 0xA3EEFB84, 0xA3EFFB84, 0xA3F0FB84, 0xA3F1FB84, 0xA3F2FB84, 0xA3F3FB84, 0xA3F4FB84, 0xA3F5FB84, 0xA3F6FB84, 0xA3F7FB84, + 0xA3F8FB84, 0xA3F9FB84, 0xA3FAFB84, 0xA3FBFB84, 0xA3FCFB84, 0xA3FDFB84, 0xA3FEFB84, 0xA3FFFB84, 0xA400FB84, 0xA401FB84, 0xA402FB84, 0xA403FB84, 0xA404FB84, 0xA405FB84, 0xA406FB84, + 0xA407FB84, 0xA408FB84, 0xA409FB84, 0xA40AFB84, 0xA40BFB84, 0xA40CFB84, 0xA40DFB84, 0xA40EFB84, 0xA40FFB84, 0xA410FB84, 0xA411FB84, 0xA412FB84, 0xA413FB84, 0xA414FB84, 0xA415FB84, + 0xA416FB84, 0xA417FB84, 0xA418FB84, 0xA419FB84, 0xA41AFB84, 0xA41BFB84, 0xA41CFB84, 0xA41DFB84, 0xA41EFB84, 0xA41FFB84, 0xA420FB84, 0xA421FB84, 0xA422FB84, 0xA423FB84, 0xA424FB84, + 0xA425FB84, 0xA426FB84, 0xA427FB84, 0xA428FB84, 0xA429FB84, 0xA42AFB84, 0xA42BFB84, 0xA42CFB84, 0xA42DFB84, 0xA42EFB84, 0xA42FFB84, 0xA430FB84, 0xA431FB84, 0xA432FB84, 0xA433FB84, + 0xA434FB84, 0xA435FB84, 0xA436FB84, 0xA437FB84, 0xA438FB84, 0xA439FB84, 0xA43AFB84, 0xA43BFB84, 0xA43CFB84, 0xA43DFB84, 0xA43EFB84, 0xA43FFB84, 0xA440FB84, 0xA441FB84, 0xA442FB84, + 0xA443FB84, 0xA444FB84, 0xA445FB84, 0xA446FB84, 0xA447FB84, 0xA448FB84, 0xA449FB84, 0xA44AFB84, 0xA44BFB84, 0xA44CFB84, 0xA44DFB84, 0xA44EFB84, 0xA44FFB84, 0xA450FB84, 0xA451FB84, + 0xA452FB84, 0xA453FB84, 0xA454FB84, 0xA455FB84, 0xA456FB84, 0xA457FB84, 0xA458FB84, 0xA459FB84, 0xA45AFB84, 0xA45BFB84, 0xA45CFB84, 0xA45DFB84, 0xA45EFB84, 0xA45FFB84, 0xA460FB84, + 0xA461FB84, 0xA462FB84, 0xA463FB84, 0xA464FB84, 0xA465FB84, 0xA466FB84, 0xA467FB84, 0xA468FB84, 0xA469FB84, 0xA46AFB84, 0xA46BFB84, 0xA46CFB84, 0xA46DFB84, 0xA46EFB84, 0xA46FFB84, + 0xA470FB84, 0xA471FB84, 0xA472FB84, 0xA473FB84, 0xA474FB84, 0xA475FB84, 0xA476FB84, 0xA477FB84, 0xA478FB84, 0xA479FB84, 0xA47AFB84, 0xA47BFB84, 0xA47CFB84, 0xA47DFB84, 0xA47EFB84, + 0xA47FFB84, 0xA480FB84, 0xA481FB84, 0xA482FB84, 0xA483FB84, 0xA484FB84, 0xA485FB84, 0xA486FB84, 0xA487FB84, 0xA488FB84, 0xA489FB84, 0xA48AFB84, 0xA48BFB84, 0xA48CFB84, 0xA48DFB84, + 0xA48EFB84, 0xA48FFB84, 0xA490FB84, 0xA491FB84, 0xA492FB84, 0xA493FB84, 0xA494FB84, 0xA495FB84, 0xA496FB84, 0xA497FB84, 0xA498FB84, 0xA499FB84, 0xA49AFB84, 0xA49BFB84, 0xA49CFB84, + 0xA49DFB84, 0xA49EFB84, 0xA49FFB84, 0xA4A0FB84, 0xA4A1FB84, 0xA4A2FB84, 0xA4A3FB84, 0xA4A4FB84, 0xA4A5FB84, 0xA4A6FB84, 0xA4A7FB84, 0xA4A8FB84, 0xA4A9FB84, 0xA4AAFB84, 0xA4ABFB84, + 0xA4ACFB84, 0xA4ADFB84, 0xA4AEFB84, 0xA4AFFB84, 0xA4B0FB84, 0xA4B1FB84, 0xA4B2FB84, 0xA4B3FB84, 0xA4B4FB84, 0xA4B5FB84, 0xA4B6FB84, 0xA4B7FB84, 0xA4B8FB84, 0xA4B9FB84, 0xA4BAFB84, + 0xA4BBFB84, 0xA4BCFB84, 0xA4BDFB84, 0xA4BEFB84, 0xA4BFFB84, 0xA4C0FB84, 0xA4C1FB84, 0xA4C2FB84, 0xA4C3FB84, 0xA4C4FB84, 0xA4C5FB84, 0xA4C6FB84, 0xA4C7FB84, 0xA4C8FB84, 0xA4C9FB84, + 0xA4CAFB84, 0xA4CBFB84, 0xA4CCFB84, 0xA4CDFB84, 0xA4CEFB84, 0xA4CFFB84, 0xA4D0FB84, 0xA4D1FB84, 0xA4D2FB84, 0xA4D3FB84, 0xA4D4FB84, 0xA4D5FB84, 0xA4D6FB84, 0xA4D7FB84, 0xA4D8FB84, + 0xA4D9FB84, 0xA4DAFB84, 0xA4DBFB84, 0xA4DCFB84, 0xA4DDFB84, 0xA4DEFB84, 0xA4DFFB84, 0xA4E0FB84, 0xA4E1FB84, 0xA4E2FB84, 0xA4E3FB84, 0xA4E4FB84, 0xA4E5FB84, 0xA4E6FB84, 0xA4E7FB84, + 0xA4E8FB84, 0xA4E9FB84, 0xA4EAFB84, 0xA4EBFB84, 0xA4ECFB84, 0xA4EDFB84, 0xA4EEFB84, 0xA4EFFB84, 0xA4F0FB84, 0xA4F1FB84, 0xA4F2FB84, 0xA4F3FB84, 0xA4F4FB84, 0xA4F5FB84, 0xA4F6FB84, + 0xA4F7FB84, 0xA4F8FB84, 0xA4F9FB84, 0xA4FAFB84, 0xA4FBFB84, 0xA4FCFB84, 0xA4FDFB84, 0xA4FEFB84, 0xA4FFFB84, 0xA500FB84, 0xA501FB84, 0xA502FB84, 0xA503FB84, 0xA504FB84, 0xA505FB84, + 0xA506FB84, 0xA507FB84, 0xA508FB84, 0xA509FB84, 0xA50AFB84, 0xA50BFB84, 0xA50CFB84, 0xA50DFB84, 0xA50EFB84, 0xA50FFB84, 0xA510FB84, 0xA511FB84, 0xA512FB84, 0xA513FB84, 0xA514FB84, + 0xA515FB84, 0xA516FB84, 0xA517FB84, 0xA518FB84, 0xA519FB84, 0xA51AFB84, 0xA51BFB84, 0xA51CFB84, 0xA51DFB84, 0xA51EFB84, 0xA51FFB84, 0xA520FB84, 0xA521FB84, 0xA522FB84, 0xA523FB84, + 0xA524FB84, 0xA525FB84, 0xA526FB84, 0xA527FB84, 0xA528FB84, 0xA529FB84, 0xA52AFB84, 0xA52BFB84, 0xA52CFB84, 0xA52DFB84, 0xA52EFB84, 0xA52FFB84, 0xA530FB84, 0xA531FB84, 0xA532FB84, + 0xA533FB84, 0xA534FB84, 0xA535FB84, 0xA536FB84, 0xA537FB84, 0xA538FB84, 0xA539FB84, 0xA53AFB84, 0xA53BFB84, 0xA53CFB84, 0xA53DFB84, 0xA53EFB84, 0xA53FFB84, 0xA540FB84, 0xA541FB84, + 0xA542FB84, 0xA543FB84, 0xA544FB84, 0xA545FB84, 0xA546FB84, 0xA547FB84, 0xA548FB84, 0xA549FB84, 0xA54AFB84, 0xA54BFB84, 0xA54CFB84, 0xA54DFB84, 0xA54EFB84, 0xA54FFB84, 0xA550FB84, + 0xA551FB84, 0xA552FB84, 0xA553FB84, 0xA554FB84, 0xA555FB84, 0xA556FB84, 0xA557FB84, 0xA558FB84, 0xA559FB84, 0xA55AFB84, 0xA55BFB84, 0xA55CFB84, 0xA55DFB84, 0xA55EFB84, 0xA55FFB84, + 0xA560FB84, 0xA561FB84, 0xA562FB84, 0xA563FB84, 0xA564FB84, 0xA565FB84, 0xA566FB84, 0xA567FB84, 0xA568FB84, 0xA569FB84, 0xA56AFB84, 0xA56BFB84, 0xA56CFB84, 0xA56DFB84, 0xA56EFB84, + 0xA56FFB84, 0xA570FB84, 0xA571FB84, 0xA572FB84, 0xA573FB84, 0xA574FB84, 0xA575FB84, 0xA576FB84, 0xA577FB84, 0xA578FB84, 0xA579FB84, 0xA57AFB84, 0xA57BFB84, 0xA57CFB84, 0xA57DFB84, + 0xA57EFB84, 0xA57FFB84, 0xA580FB84, 0xA581FB84, 0xA582FB84, 0xA583FB84, 0xA584FB84, 0xA585FB84, 0xA586FB84, 0xA587FB84, 0xA588FB84, 0xA589FB84, 0xA58AFB84, 0xA58BFB84, 0xA58CFB84, + 0xA58DFB84, 0xA58EFB84, 0xA58FFB84, 0xA590FB84, 0xA591FB84, 0xA592FB84, 0xA593FB84, 0xA594FB84, 0xA595FB84, 0xA596FB84, 0xA597FB84, 0xA598FB84, 0xA599FB84, 0xA59AFB84, 0xA59BFB84, + 0xA59CFB84, 0xA59DFB84, 0xA59EFB84, 0xA59FFB84, 0xA5A0FB84, 0xA5A1FB84, 0xA5A2FB84, 0xA5A3FB84, 0xA5A4FB84, 0xA5A5FB84, 0xA5A6FB84, 0xA5A7FB84, 0xA5A8FB84, 0xA5A9FB84, 0xA5AAFB84, + 0xA5ABFB84, 0xA5ACFB84, 0xA5ADFB84, 0xA5AEFB84, 0xA5AFFB84, 0xA5B0FB84, 0xA5B1FB84, 0xA5B2FB84, 0xA5B3FB84, 0xA5B4FB84, 0xA5B5FB84, 0xA5B6FB84, 0xA5B7FB84, 0xA5B8FB84, 0xA5B9FB84, + 0xA5BAFB84, 0xA5BBFB84, 0xA5BCFB84, 0xA5BDFB84, 0xA5BEFB84, 0xA5BFFB84, 0xA5C0FB84, 0xA5C1FB84, 0xA5C2FB84, 0xA5C3FB84, 0xA5C4FB84, 0xA5C5FB84, 0xA5C6FB84, 0xA5C7FB84, 0xA5C8FB84, + 0xA5C9FB84, 0xA5CAFB84, 0xA5CBFB84, 0xA5CCFB84, 0xA5CDFB84, 0xA5CEFB84, 0xA5CFFB84, 0xA5D0FB84, 0xA5D1FB84, 0xA5D2FB84, 0xA5D3FB84, 0xA5D4FB84, 0xA5D5FB84, 0xA5D6FB84, 0xA5D7FB84, + 0xA5D8FB84, 0xA5D9FB84, 0xA5DAFB84, 0xA5DBFB84, 0xA5DCFB84, 0xA5DDFB84, 0xA5DEFB84, 0xA5DFFB84, 0xA5E0FB84, 0xA5E1FB84, 0xA5E2FB84, 0xA5E3FB84, 0xA5E4FB84, 0xA5E5FB84, 0xA5E6FB84, + 0xA5E7FB84, 0xA5E8FB84, 0xA5E9FB84, 0xA5EAFB84, 0xA5EBFB84, 0xA5ECFB84, 0xA5EDFB84, 0xA5EEFB84, 0xA5EFFB84, 0xA5F0FB84, 0xA5F1FB84, 0xA5F2FB84, 0xA5F3FB84, 0xA5F4FB84, 0xA5F5FB84, + 0xA5F6FB84, 0xA5F7FB84, 0xA5F8FB84, 0xA5F9FB84, 0xA5FAFB84, 0xA5FBFB84, 0xA5FCFB84, 0xA5FDFB84, 0xA5FEFB84, 0xA5FFFB84, 0xA600FB84, 0xA601FB84, 0xA602FB84, 0xA603FB84, 0xA604FB84, + 0xA605FB84, 0xA606FB84, 0xA607FB84, 0xA608FB84, 0xA609FB84, 0xA60AFB84, 0xA60BFB84, 0xA60CFB84, 0xA60DFB84, 0xA60EFB84, 0xA60FFB84, 0xA610FB84, 0xA611FB84, 0xA612FB84, 0xA613FB84, + 0xA614FB84, 0xA615FB84, 0xA616FB84, 0xA617FB84, 0xA618FB84, 0xA619FB84, 0xA61AFB84, 0xA61BFB84, 0xA61CFB84, 0xA61DFB84, 0xA61EFB84, 0xA61FFB84, 0xA620FB84, 0xA621FB84, 0xA622FB84, + 0xA623FB84, 0xA624FB84, 0xA625FB84, 0xA626FB84, 0xA627FB84, 0xA628FB84, 0xA629FB84, 0xA62AFB84, 0xA62BFB84, 0xA62CFB84, 0xA62DFB84, 0xA62EFB84, 0xA62FFB84, 0xA630FB84, 0xA631FB84, + 0xA632FB84, 0xA633FB84, 0xA634FB84, 0xA635FB84, 0xA636FB84, 0xA637FB84, 0xA638FB84, 0xA639FB84, 0xA63AFB84, 0xA63BFB84, 0xA63CFB84, 0xA63DFB84, 0xA63EFB84, 0xA63FFB84, 0xA640FB84, + 0xA641FB84, 0xA642FB84, 0xA643FB84, 0xA644FB84, 0xA645FB84, 0xA646FB84, 0xA647FB84, 0xA648FB84, 0xA649FB84, 0xA64AFB84, 0xA64BFB84, 0xA64CFB84, 0xA64DFB84, 0xA64EFB84, 0xA64FFB84, + 0xA650FB84, 0xA651FB84, 0xA652FB84, 0xA653FB84, 0xA654FB84, 0xA655FB84, 0xA656FB84, 0xA657FB84, 0xA658FB84, 0xA659FB84, 0xA65AFB84, 0xA65BFB84, 0xA65CFB84, 0xA65DFB84, 0xA65EFB84, + 0xA65FFB84, 0xA660FB84, 0xA661FB84, 0xA662FB84, 0xA663FB84, 0xA664FB84, 0xA665FB84, 0xA666FB84, 0xA667FB84, 0xA668FB84, 0xA669FB84, 0xA66AFB84, 0xA66BFB84, 0xA66CFB84, 0xA66DFB84, + 0xA66EFB84, 0xA66FFB84, 0xA670FB84, 0xA671FB84, 0xA672FB84, 0xA673FB84, 0xA674FB84, 0xA675FB84, 0xA676FB84, 0xA677FB84, 0xA678FB84, 0xA679FB84, 0xA67AFB84, 0xA67BFB84, 0xA67CFB84, + 0xA67DFB84, 0xA67EFB84, 0xA67FFB84, 0xA680FB84, 0xA681FB84, 0xA682FB84, 0xA683FB84, 0xA684FB84, 0xA685FB84, 0xA686FB84, 0xA687FB84, 0xA688FB84, 0xA689FB84, 0xA68AFB84, 0xA68BFB84, + 0xA68CFB84, 0xA68DFB84, 0xA68EFB84, 0xA68FFB84, 0xA690FB84, 0xA691FB84, 0xA692FB84, 0xA693FB84, 0xA694FB84, 0xA695FB84, 0xA696FB84, 0xA697FB84, 0xA698FB84, 0xA699FB84, 0xA69AFB84, + 0xA69BFB84, 0xA69CFB84, 0xA69DFB84, 0xA69EFB84, 0xA69FFB84, 0xA6A0FB84, 0xA6A1FB84, 0xA6A2FB84, 0xA6A3FB84, 0xA6A4FB84, 0xA6A5FB84, 0xA6A6FB84, 0xA6A7FB84, 0xA6A8FB84, 0xA6A9FB84, + 0xA6AAFB84, 0xA6ABFB84, 0xA6ACFB84, 0xA6ADFB84, 0xA6AEFB84, 0xA6AFFB84, 0xA6B0FB84, 0xA6B1FB84, 0xA6B2FB84, 0xA6B3FB84, 0xA6B4FB84, 0xA6B5FB84, 0xA6B6FB84, 0xA6B7FB84, 0xA6B8FB84, + 0xA6B9FB84, 0xA6BAFB84, 0xA6BBFB84, 0xA6BCFB84, 0xA6BDFB84, 0xA6BEFB84, 0xA6BFFB84, 0xA6C0FB84, 0xA6C1FB84, 0xA6C2FB84, 0xA6C3FB84, 0xA6C4FB84, 0xA6C5FB84, 0xA6C6FB84, 0xA6C7FB84, + 0xA6C8FB84, 0xA6C9FB84, 0xA6CAFB84, 0xA6CBFB84, 0xA6CCFB84, 0xA6CDFB84, 0xA6CEFB84, 0xA6CFFB84, 0xA6D0FB84, 0xA6D1FB84, 0xA6D2FB84, 0xA6D3FB84, 0xA6D4FB84, 0xA6D5FB84, 0xA6D6FB84, + 0xA6D7FB84, 0xA6D8FB84, 0xA6D9FB84, 0xA6DAFB84, 0xA6DBFB84, 0xA6DCFB84, 0xA6DDFB84, 0xA6DEFB84, 0xA6DFFB84, 0xA6E0FB84, 0xA6E1FB84, 0xA6E2FB84, 0xA6E3FB84, 0xA6E4FB84, 0xA6E5FB84, + 0xA6E6FB84, 0xA6E7FB84, 0xA6E8FB84, 0xA6E9FB84, 0xA6EAFB84, 0xA6EBFB84, 0xA6ECFB84, 0xA6EDFB84, 0xA6EEFB84, 0xA6EFFB84, 0xA6F0FB84, 0xA6F1FB84, 0xA6F2FB84, 0xA6F3FB84, 0xA6F4FB84, + 0xA6F5FB84, 0xA6F6FB84, 0xA6F7FB84, 0xA6F8FB84, 0xA6F9FB84, 0xA6FAFB84, 0xA6FBFB84, 0xA6FCFB84, 0xA6FDFB84, 0xA6FEFB84, 0xA6FFFB84, 0xA700FB84, 0xA701FB84, 0xA702FB84, 0xA703FB84, + 0xA704FB84, 0xA705FB84, 0xA706FB84, 0xA707FB84, 0xA708FB84, 0xA709FB84, 0xA70AFB84, 0xA70BFB84, 0xA70CFB84, 0xA70DFB84, 0xA70EFB84, 0xA70FFB84, 0xA710FB84, 0xA711FB84, 0xA712FB84, + 0xA713FB84, 0xA714FB84, 0xA715FB84, 0xA716FB84, 0xA717FB84, 0xA718FB84, 0xA719FB84, 0xA71AFB84, 0xA71BFB84, 0xA71CFB84, 0xA71DFB84, 0xA71EFB84, 0xA71FFB84, 0xA720FB84, 0xA721FB84, + 0xA722FB84, 0xA723FB84, 0xA724FB84, 0xA725FB84, 0xA726FB84, 0xA727FB84, 0xA728FB84, 0xA729FB84, 0xA72AFB84, 0xA72BFB84, 0xA72CFB84, 0xA72DFB84, 0xA72EFB84, 0xA72FFB84, 0xA730FB84, + 0xA731FB84, 0xA732FB84, 0xA733FB84, 0xA734FB84, 0xA735FB84, 0xA736FB84, 0xA737FB84, 0xA738FB84, 0xA739FB84, 0xA73AFB84, 0xA73BFB84, 0xA73CFB84, 0xA73DFB84, 0xA73EFB84, 0xA73FFB84, + 0xA740FB84, 0xA741FB84, 0xA742FB84, 0xA743FB84, 0xA744FB84, 0xA745FB84, 0xA746FB84, 0xA747FB84, 0xA748FB84, 0xA749FB84, 0xA74AFB84, 0xA74BFB84, 0xA74CFB84, 0xA74DFB84, 0xA74EFB84, + 0xA74FFB84, 0xA750FB84, 0xA751FB84, 0xA752FB84, 0xA753FB84, 0xA754FB84, 0xA755FB84, 0xA756FB84, 0xA757FB84, 0xA758FB84, 0xA759FB84, 0xA75AFB84, 0xA75BFB84, 0xA75CFB84, 0xA75DFB84, + 0xA75EFB84, 0xA75FFB84, 0xA760FB84, 0xA761FB84, 0xA762FB84, 0xA763FB84, 0xA764FB84, 0xA765FB84, 0xA766FB84, 0xA767FB84, 0xA768FB84, 0xA769FB84, 0xA76AFB84, 0xA76BFB84, 0xA76CFB84, + 0xA76DFB84, 0xA76EFB84, 0xA76FFB84, 0xA770FB84, 0xA771FB84, 0xA772FB84, 0xA773FB84, 0xA774FB84, 0xA775FB84, 0xA776FB84, 0xA777FB84, 0xA778FB84, 0xA779FB84, 0xA77AFB84, 0xA77BFB84, + 0xA77CFB84, 0xA77DFB84, 0xA77EFB84, 0xA77FFB84, 0xA780FB84, 0xA781FB84, 0xA782FB84, 0xA783FB84, 0xA784FB84, 0xA785FB84, 0xA786FB84, 0xA787FB84, 0xA788FB84, 0xA789FB84, 0xA78AFB84, + 0xA78BFB84, 0xA78CFB84, 0xA78DFB84, 0xA78EFB84, 0xA78FFB84, 0xA790FB84, 0xA791FB84, 0xA792FB84, 0xA793FB84, 0xA794FB84, 0xA795FB84, 0xA796FB84, 0xA797FB84, 0xA798FB84, 0xA799FB84, + 0xA79AFB84, 0xA79BFB84, 0xA79CFB84, 0xA79DFB84, 0xA79EFB84, 0xA79FFB84, 0xA7A0FB84, 0xA7A1FB84, 0xA7A2FB84, 0xA7A3FB84, 0xA7A4FB84, 0xA7A5FB84, 0xA7A6FB84, 0xA7A7FB84, 0xA7A8FB84, + 0xA7A9FB84, 0xA7AAFB84, 0xA7ABFB84, 0xA7ACFB84, 0xA7ADFB84, 0xA7AEFB84, 0xA7AFFB84, 0xA7B0FB84, 0xA7B1FB84, 0xA7B2FB84, 0xA7B3FB84, 0xA7B4FB84, 0xA7B5FB84, 0xA7B6FB84, 0xA7B7FB84, + 0xA7B8FB84, 0xA7B9FB84, 0xA7BAFB84, 0xA7BBFB84, 0xA7BCFB84, 0xA7BDFB84, 0xA7BEFB84, 0xA7BFFB84, 0xA7C0FB84, 0xA7C1FB84, 0xA7C2FB84, 0xA7C3FB84, 0xA7C4FB84, 0xA7C5FB84, 0xA7C6FB84, + 0xA7C7FB84, 0xA7C8FB84, 0xA7C9FB84, 0xA7CAFB84, 0xA7CBFB84, 0xA7CCFB84, 0xA7CDFB84, 0xA7CEFB84, 0xA7CFFB84, 0xA7D0FB84, 0xA7D1FB84, 0xA7D2FB84, 0xA7D3FB84, 0xA7D4FB84, 0xA7D5FB84, + 0xA7D6FB84, 0xA7D7FB84, 0xA7D8FB84, 0xA7D9FB84, 0xA7DAFB84, 0xA7DBFB84, 0xA7DCFB84, 0xA7DDFB84, 0xA7DEFB84, 0xA7DFFB84, 0xA7E0FB84, 0xA7E1FB84, 0xA7E2FB84, 0xA7E3FB84, 0xA7E4FB84, + 0xA7E5FB84, 0xA7E6FB84, 0xA7E7FB84, 0xA7E8FB84, 0xA7E9FB84, 0xA7EAFB84, 0xA7EBFB84, 0xA7ECFB84, 0xA7EDFB84, 0xA7EEFB84, 0xA7EFFB84, 0xA7F0FB84, 0xA7F1FB84, 0xA7F2FB84, 0xA7F3FB84, + 0xA7F4FB84, 0xA7F5FB84, 0xA7F6FB84, 0xA7F7FB84, 0xA7F8FB84, 0xA7F9FB84, 0xA7FAFB84, 0xA7FBFB84, 0xA7FCFB84, 0xA7FDFB84, 0xA7FEFB84, 0xA7FFFB84, 0xA800FB84, 0xA801FB84, 0xA802FB84, + 0xA803FB84, 0xA804FB84, 0xA805FB84, 0xA806FB84, 0xA807FB84, 0xA808FB84, 0xA809FB84, 0xA80AFB84, 0xA80BFB84, 0xA80CFB84, 0xA80DFB84, 0xA80EFB84, 0xA80FFB84, 0xA810FB84, 0xA811FB84, + 0xA812FB84, 0xA813FB84, 0xA814FB84, 0xA815FB84, 0xA816FB84, 0xA817FB84, 0xA818FB84, 0xA819FB84, 0xA81AFB84, 0xA81BFB84, 0xA81CFB84, 0xA81DFB84, 0xA81EFB84, 0xA81FFB84, 0xA820FB84, + 0xA821FB84, 0xA822FB84, 0xA823FB84, 0xA824FB84, 0xA825FB84, 0xA826FB84, 0xA827FB84, 0xA828FB84, 0xA829FB84, 0xA82AFB84, 0xA82BFB84, 0xA82CFB84, 0xA82DFB84, 0xA82EFB84, 0xA82FFB84, + 0xA830FB84, 0xA831FB84, 0xA832FB84, 0xA833FB84, 0xA834FB84, 0xA835FB84, 0xA836FB84, 0xA837FB84, 0xA838FB84, 0xA839FB84, 0xA83AFB84, 0xA83BFB84, 0xA83CFB84, 0xA83DFB84, 0xA83EFB84, + 0xA83FFB84, 0xA840FB84, 0xA841FB84, 0xA842FB84, 0xA843FB84, 0xA844FB84, 0xA845FB84, 0xA846FB84, 0xA847FB84, 0xA848FB84, 0xA849FB84, 0xA84AFB84, 0xA84BFB84, 0xA84CFB84, 0xA84DFB84, + 0xA84EFB84, 0xA84FFB84, 0xA850FB84, 0xA851FB84, 0xA852FB84, 0xA853FB84, 0xA854FB84, 0xA855FB84, 0xA856FB84, 0xA857FB84, 0xA858FB84, 0xA859FB84, 0xA85AFB84, 0xA85BFB84, 0xA85CFB84, + 0xA85DFB84, 0xA85EFB84, 0xA85FFB84, 0xA860FB84, 0xA861FB84, 0xA862FB84, 0xA863FB84, 0xA864FB84, 0xA865FB84, 0xA866FB84, 0xA867FB84, 0xA868FB84, 0xA869FB84, 0xA86AFB84, 0xA86BFB84, + 0xA86CFB84, 0xA86DFB84, 0xA86EFB84, 0xA86FFB84, 0xA870FB84, 0xA871FB84, 0xA872FB84, 0xA873FB84, 0xA874FB84, 0xA875FB84, 0xA876FB84, 0xA877FB84, 0xA878FB84, 0xA879FB84, 0xA87AFB84, + 0xA87BFB84, 0xA87CFB84, 0xA87DFB84, 0xA87EFB84, 0xA87FFB84, 0xA880FB84, 0xA881FB84, 0xA882FB84, 0xA883FB84, 0xA884FB84, 0xA885FB84, 0xA886FB84, 0xA887FB84, 0xA888FB84, 0xA889FB84, + 0xA88AFB84, 0xA88BFB84, 0xA88CFB84, 0xA88DFB84, 0xA88EFB84, 0xA88FFB84, 0xA890FB84, 0xA891FB84, 0xA892FB84, 0xA893FB84, 0xA894FB84, 0xA895FB84, 0xA896FB84, 0xA897FB84, 0xA898FB84, + 0xA899FB84, 0xA89AFB84, 0xA89BFB84, 0xA89CFB84, 0xA89DFB84, 0xA89EFB84, 0xA89FFB84, 0xA8A0FB84, 0xA8A1FB84, 0xA8A2FB84, 0xA8A3FB84, 0xA8A4FB84, 0xA8A5FB84, 0xA8A6FB84, 0xA8A7FB84, + 0xA8A8FB84, 0xA8A9FB84, 0xA8AAFB84, 0xA8ABFB84, 0xA8ACFB84, 0xA8ADFB84, 0xA8AEFB84, 0xA8AFFB84, 0xA8B0FB84, 0xA8B1FB84, 0xA8B2FB84, 0xA8B3FB84, 0xA8B4FB84, 0xA8B5FB84, 0xA8B6FB84, + 0xA8B7FB84, 0xA8B8FB84, 0xA8B9FB84, 0xA8BAFB84, 0xA8BBFB84, 0xA8BCFB84, 0xA8BDFB84, 0xA8BEFB84, 0xA8BFFB84, 0xA8C0FB84, 0xA8C1FB84, 0xA8C2FB84, 0xA8C3FB84, 0xA8C4FB84, 0xA8C5FB84, + 0xA8C6FB84, 0xA8C7FB84, 0xA8C8FB84, 0xA8C9FB84, 0xA8CAFB84, 0xA8CBFB84, 0xA8CCFB84, 0xA8CDFB84, 0xA8CEFB84, 0xA8CFFB84, 0xA8D0FB84, 0xA8D1FB84, 0xA8D2FB84, 0xA8D3FB84, 0xA8D4FB84, + 0xA8D5FB84, 0xA8D6FB84, 0xA8D7FB84, 0xA8D8FB84, 0xA8D9FB84, 0xA8DAFB84, 0xA8DBFB84, 0xA8DCFB84, 0xA8DDFB84, 0xA8DEFB84, 0xA8DFFB84, 0xA8E0FB84, 0xA8E1FB84, 0xA8E2FB84, 0xA8E3FB84, + 0xA8E4FB84, 0xA8E5FB84, 0xA8E6FB84, 0xA8E7FB84, 0xA8E8FB84, 0xA8E9FB84, 0xA8EAFB84, 0xA8EBFB84, 0xA8ECFB84, 0xA8EDFB84, 0xA8EEFB84, 0xA8EFFB84, 0xA8F0FB84, 0xA8F1FB84, 0xA8F2FB84, + 0xA8F3FB84, 0xA8F4FB84, 0xA8F5FB84, 0xA8F6FB84, 0xA8F7FB84, 0xA8F8FB84, 0xA8F9FB84, 0xA8FAFB84, 0xA8FBFB84, 0xA8FCFB84, 0xA8FDFB84, 0xA8FEFB84, 0xA8FFFB84, 0xA900FB84, 0xA901FB84, + 0xA902FB84, 0xA903FB84, 0xA904FB84, 0xA905FB84, 0xA906FB84, 0xA907FB84, 0xA908FB84, 0xA909FB84, 0xA90AFB84, 0xA90BFB84, 0xA90CFB84, 0xA90DFB84, 0xA90EFB84, 0xA90FFB84, 0xA910FB84, + 0xA911FB84, 0xA912FB84, 0xA913FB84, 0xA914FB84, 0xA915FB84, 0xA916FB84, 0xA917FB84, 0xA918FB84, 0xA919FB84, 0xA91AFB84, 0xA91BFB84, 0xA91CFB84, 0xA91DFB84, 0xA91EFB84, 0xA91FFB84, + 0xA920FB84, 0xA921FB84, 0xA922FB84, 0xA923FB84, 0xA924FB84, 0xA925FB84, 0xA926FB84, 0xA927FB84, 0xA928FB84, 0xA929FB84, 0xA92AFB84, 0xA92BFB84, 0xA92CFB84, 0xA92DFB84, 0xA92EFB84, + 0xA92FFB84, 0xA930FB84, 0xA931FB84, 0xA932FB84, 0xA933FB84, 0xA934FB84, 0xA935FB84, 0xA936FB84, 0xA937FB84, 0xA938FB84, 0xA939FB84, 0xA93AFB84, 0xA93BFB84, 0xA93CFB84, 0xA93DFB84, + 0xA93EFB84, 0xA93FFB84, 0xA940FB84, 0xA941FB84, 0xA942FB84, 0xA943FB84, 0xA944FB84, 0xA945FB84, 0xA946FB84, 0xA947FB84, 0xA948FB84, 0xA949FB84, 0xA94AFB84, 0xA94BFB84, 0xA94CFB84, + 0xA94DFB84, 0xA94EFB84, 0xA94FFB84, 0xA950FB84, 0xA951FB84, 0xA952FB84, 0xA953FB84, 0xA954FB84, 0xA955FB84, 0xA956FB84, 0xA957FB84, 0xA958FB84, 0xA959FB84, 0xA95AFB84, 0xA95BFB84, + 0xA95CFB84, 0xA95DFB84, 0xA95EFB84, 0xA95FFB84, 0xA960FB84, 0xA961FB84, 0xA962FB84, 0xA963FB84, 0xA964FB84, 0xA965FB84, 0xA966FB84, 0xA967FB84, 0xA968FB84, 0xA969FB84, 0xA96AFB84, + 0xA96BFB84, 0xA96CFB84, 0xA96DFB84, 0xA96EFB84, 0xA96FFB84, 0xA970FB84, 0xA971FB84, 0xA972FB84, 0xA973FB84, 0xA974FB84, 0xA975FB84, 0xA976FB84, 0xA977FB84, 0xA978FB84, 0xA979FB84, + 0xA97AFB84, 0xA97BFB84, 0xA97CFB84, 0xA97DFB84, 0xA97EFB84, 0xA97FFB84, 0xA980FB84, 0xA981FB84, 0xA982FB84, 0xA983FB84, 0xA984FB84, 0xA985FB84, 0xA986FB84, 0xA987FB84, 0xA988FB84, + 0xA989FB84, 0xA98AFB84, 0xA98BFB84, 0xA98CFB84, 0xA98DFB84, 0xA98EFB84, 0xA98FFB84, 0xA990FB84, 0xA991FB84, 0xA992FB84, 0xA993FB84, 0xA994FB84, 0xA995FB84, 0xA996FB84, 0xA997FB84, + 0xA998FB84, 0xA999FB84, 0xA99AFB84, 0xA99BFB84, 0xA99CFB84, 0xA99DFB84, 0xA99EFB84, 0xA99FFB84, 0xA9A0FB84, 0xA9A1FB84, 0xA9A2FB84, 0xA9A3FB84, 0xA9A4FB84, 0xA9A5FB84, 0xA9A6FB84, + 0xA9A7FB84, 0xA9A8FB84, 0xA9A9FB84, 0xA9AAFB84, 0xA9ABFB84, 0xA9ACFB84, 0xA9ADFB84, 0xA9AEFB84, 0xA9AFFB84, 0xA9B0FB84, 0xA9B1FB84, 0xA9B2FB84, 0xA9B3FB84, 0xA9B4FB84, 0xA9B5FB84, + 0xA9B6FB84, 0xA9B7FB84, 0xA9B8FB84, 0xA9B9FB84, 0xA9BAFB84, 0xA9BBFB84, 0xA9BCFB84, 0xA9BDFB84, 0xA9BEFB84, 0xA9BFFB84, 0xA9C0FB84, 0xA9C1FB84, 0xA9C2FB84, 0xA9C3FB84, 0xA9C4FB84, + 0xA9C5FB84, 0xA9C6FB84, 0xA9C7FB84, 0xA9C8FB84, 0xA9C9FB84, 0xA9CAFB84, 0xA9CBFB84, 0xA9CCFB84, 0xA9CDFB84, 0xA9CEFB84, 0xA9CFFB84, 0xA9D0FB84, 0xA9D1FB84, 0xA9D2FB84, 0xA9D3FB84, + 0xA9D4FB84, 0xA9D5FB84, 0xA9D6FB84, 0xA9D7FB84, 0xA9D8FB84, 0xA9D9FB84, 0xA9DAFB84, 0xA9DBFB84, 0xA9DCFB84, 0xA9DDFB84, 0xA9DEFB84, 0xA9DFFB84, 0xA9E0FB84, 0xA9E1FB84, 0xA9E2FB84, + 0xA9E3FB84, 0xA9E4FB84, 0xA9E5FB84, 0xA9E6FB84, 0xA9E7FB84, 0xA9E8FB84, 0xA9E9FB84, 0xA9EAFB84, 0xA9EBFB84, 0xA9ECFB84, 0xA9EDFB84, 0xA9EEFB84, 0xA9EFFB84, 0xA9F0FB84, 0xA9F1FB84, + 0xA9F2FB84, 0xA9F3FB84, 0xA9F4FB84, 0xA9F5FB84, 0xA9F6FB84, 0xA9F7FB84, 0xA9F8FB84, 0xA9F9FB84, 0xA9FAFB84, 0xA9FBFB84, 0xA9FCFB84, 0xA9FDFB84, 0xA9FEFB84, 0xA9FFFB84, 0xAA00FB84, + 0xAA01FB84, 0xAA02FB84, 0xAA03FB84, 0xAA04FB84, 0xAA05FB84, 0xAA06FB84, 0xAA07FB84, 0xAA08FB84, 0xAA09FB84, 0xAA0AFB84, 0xAA0BFB84, 0xAA0CFB84, 0xAA0DFB84, 0xAA0EFB84, 0xAA0FFB84, + 0xAA10FB84, 0xAA11FB84, 0xAA12FB84, 0xAA13FB84, 0xAA14FB84, 0xAA15FB84, 0xAA16FB84, 0xAA17FB84, 0xAA18FB84, 0xAA19FB84, 0xAA1AFB84, 0xAA1BFB84, 0xAA1CFB84, 0xAA1DFB84, 0xAA1EFB84, + 0xAA1FFB84, 0xAA20FB84, 0xAA21FB84, 0xAA22FB84, 0xAA23FB84, 0xAA24FB84, 0xAA25FB84, 0xAA26FB84, 0xAA27FB84, 0xAA28FB84, 0xAA29FB84, 0xAA2AFB84, 0xAA2BFB84, 0xAA2CFB84, 0xAA2DFB84, + 0xAA2EFB84, 0xAA2FFB84, 0xAA30FB84, 0xAA31FB84, 0xAA32FB84, 0xAA33FB84, 0xAA34FB84, 0xAA35FB84, 0xAA36FB84, 0xAA37FB84, 0xAA38FB84, 0xAA39FB84, 0xAA3AFB84, 0xAA3BFB84, 0xAA3CFB84, + 0xAA3DFB84, 0xAA3EFB84, 0xAA3FFB84, 0xAA40FB84, 0xAA41FB84, 0xAA42FB84, 0xAA43FB84, 0xAA44FB84, 0xAA45FB84, 0xAA46FB84, 0xAA47FB84, 0xAA48FB84, 0xAA49FB84, 0xAA4AFB84, 0xAA4BFB84, + 0xAA4CFB84, 0xAA4DFB84, 0xAA4EFB84, 0xAA4FFB84, 0xAA50FB84, 0xAA51FB84, 0xAA52FB84, 0xAA53FB84, 0xAA54FB84, 0xAA55FB84, 0xAA56FB84, 0xAA57FB84, 0xAA58FB84, 0xAA59FB84, 0xAA5AFB84, + 0xAA5BFB84, 0xAA5CFB84, 0xAA5DFB84, 0xAA5EFB84, 0xAA5FFB84, 0xAA60FB84, 0xAA61FB84, 0xAA62FB84, 0xAA63FB84, 0xAA64FB84, 0xAA65FB84, 0xAA66FB84, 0xAA67FB84, 0xAA68FB84, 0xAA69FB84, + 0xAA6AFB84, 0xAA6BFB84, 0xAA6CFB84, 0xAA6DFB84, 0xAA6EFB84, 0xAA6FFB84, 0xAA70FB84, 0xAA71FB84, 0xAA72FB84, 0xAA73FB84, 0xAA74FB84, 0xAA75FB84, 0xAA76FB84, 0xAA77FB84, 0xAA78FB84, + 0xAA79FB84, 0xAA7AFB84, 0xAA7BFB84, 0xAA7CFB84, 0xAA7DFB84, 0xAA7EFB84, 0xAA7FFB84, 0xAA80FB84, 0xAA81FB84, 0xAA82FB84, 0xAA83FB84, 0xAA84FB84, 0xAA85FB84, 0xAA86FB84, 0xAA87FB84, + 0xAA88FB84, 0xAA89FB84, 0xAA8AFB84, 0xAA8BFB84, 0xAA8CFB84, 0xAA8DFB84, 0xAA8EFB84, 0xAA8FFB84, 0xAA90FB84, 0xAA91FB84, 0xAA92FB84, 0xAA93FB84, 0xAA94FB84, 0xAA95FB84, 0xAA96FB84, + 0xAA97FB84, 0xAA98FB84, 0xAA99FB84, 0xAA9AFB84, 0xAA9BFB84, 0xAA9CFB84, 0xAA9DFB84, 0xAA9EFB84, 0xAA9FFB84, 0xAAA0FB84, 0xAAA1FB84, 0xAAA2FB84, 0xAAA3FB84, 0xAAA4FB84, 0xAAA5FB84, + 0xAAA6FB84, 0xAAA7FB84, 0xAAA8FB84, 0xAAA9FB84, 0xAAAAFB84, 0xAAABFB84, 0xAAACFB84, 0xAAADFB84, 0xAAAEFB84, 0xAAAFFB84, 0xAAB0FB84, 0xAAB1FB84, 0xAAB2FB84, 0xAAB3FB84, 0xAAB4FB84, + 0xAAB5FB84, 0xAAB6FB84, 0xAAB7FB84, 0xAAB8FB84, 0xAAB9FB84, 0xAABAFB84, 0xAABBFB84, 0xAABCFB84, 0xAABDFB84, 0xAABEFB84, 0xAABFFB84, 0xAAC0FB84, 0xAAC1FB84, 0xAAC2FB84, 0xAAC3FB84, + 0xAAC4FB84, 0xAAC5FB84, 0xAAC6FB84, 0xAAC7FB84, 0xAAC8FB84, 0xAAC9FB84, 0xAACAFB84, 0xAACBFB84, 0xAACCFB84, 0xAACDFB84, 0xAACEFB84, 0xAACFFB84, 0xAAD0FB84, 0xAAD1FB84, 0xAAD2FB84, + 0xAAD3FB84, 0xAAD4FB84, 0xAAD5FB84, 0xAAD6FB84, 0xAAD7FB84, 0xAAD8FB84, 0xAAD9FB84, 0xAADAFB84, 0xAADBFB84, 0xAADCFB84, 0xAADDFB84, 0xAADEFB84, 0xAADFFB84, 0xAAE0FB84, 0xAAE1FB84, + 0xAAE2FB84, 0xAAE3FB84, 0xAAE4FB84, 0xAAE5FB84, 0xAAE6FB84, 0xAAE7FB84, 0xAAE8FB84, 0xAAE9FB84, 0xAAEAFB84, 0xAAEBFB84, 0xAAECFB84, 0xAAEDFB84, 0xAAEEFB84, 0xAAEFFB84, 0xAAF0FB84, + 0xAAF1FB84, 0xAAF2FB84, 0xAAF3FB84, 0xAAF4FB84, 0xAAF5FB84, 0xAAF6FB84, 0xAAF7FB84, 0xAAF8FB84, 0xAAF9FB84, 0xAAFAFB84, 0xAAFBFB84, 0xAAFCFB84, 0xAAFDFB84, 0xAAFEFB84, 0xAAFFFB84, + 0xAB00FB84, 0xAB01FB84, 0xAB02FB84, 0xAB03FB84, 0xAB04FB84, 0xAB05FB84, 0xAB06FB84, 0xAB07FB84, 0xAB08FB84, 0xAB09FB84, 0xAB0AFB84, 0xAB0BFB84, 0xAB0CFB84, 0xAB0DFB84, 0xAB0EFB84, + 0xAB0FFB84, 0xAB10FB84, 0xAB11FB84, 0xAB12FB84, 0xAB13FB84, 0xAB14FB84, 0xAB15FB84, 0xAB16FB84, 0xAB17FB84, 0xAB18FB84, 0xAB19FB84, 0xAB1AFB84, 0xAB1BFB84, 0xAB1CFB84, 0xAB1DFB84, + 0xAB1EFB84, 0xAB1FFB84, 0xAB20FB84, 0xAB21FB84, 0xAB22FB84, 0xAB23FB84, 0xAB24FB84, 0xAB25FB84, 0xAB26FB84, 0xAB27FB84, 0xAB28FB84, 0xAB29FB84, 0xAB2AFB84, 0xAB2BFB84, 0xAB2CFB84, + 0xAB2DFB84, 0xAB2EFB84, 0xAB2FFB84, 0xAB30FB84, 0xAB31FB84, 0xAB32FB84, 0xAB33FB84, 0xAB34FB84, 0xAB35FB84, 0xAB36FB84, 0xAB37FB84, 0xAB38FB84, 0xAB39FB84, 0xAB3AFB84, 0xAB3BFB84, + 0xAB3CFB84, 0xAB3DFB84, 0xAB3EFB84, 0xAB3FFB84, 0xAB40FB84, 0xAB41FB84, 0xAB42FB84, 0xAB43FB84, 0xAB44FB84, 0xAB45FB84, 0xAB46FB84, 0xAB47FB84, 0xAB48FB84, 0xAB49FB84, 0xAB4AFB84, + 0xAB4BFB84, 0xAB4CFB84, 0xAB4DFB84, 0xAB4EFB84, 0xAB4FFB84, 0xAB50FB84, 0xAB51FB84, 0xAB52FB84, 0xAB53FB84, 0xAB54FB84, 0xAB55FB84, 0xAB56FB84, 0xAB57FB84, 0xAB58FB84, 0xAB59FB84, + 0xAB5AFB84, 0xAB5BFB84, 0xAB5CFB84, 0xAB5DFB84, 0xAB5EFB84, 0xAB5FFB84, 0xAB60FB84, 0xAB61FB84, 0xAB62FB84, 0xAB63FB84, 0xAB64FB84, 0xAB65FB84, 0xAB66FB84, 0xAB67FB84, 0xAB68FB84, + 0xAB69FB84, 0xAB6AFB84, 0xAB6BFB84, 0xAB6CFB84, 0xAB6DFB84, 0xAB6EFB84, 0xAB6FFB84, 0xAB70FB84, 0xAB71FB84, 0xAB72FB84, 0xAB73FB84, 0xAB74FB84, 0xAB75FB84, 0xAB76FB84, 0xAB77FB84, + 0xAB78FB84, 0xAB79FB84, 0xAB7AFB84, 0xAB7BFB84, 0xAB7CFB84, 0xAB7DFB84, 0xAB7EFB84, 0xAB7FFB84, 0xAB80FB84, 0xAB81FB84, 0xAB82FB84, 0xAB83FB84, 0xAB84FB84, 0xAB85FB84, 0xAB86FB84, + 0xAB87FB84, 0xAB88FB84, 0xAB89FB84, 0xAB8AFB84, 0xAB8BFB84, 0xAB8CFB84, 0xAB8DFB84, 0xAB8EFB84, 0xAB8FFB84, 0xAB90FB84, 0xAB91FB84, 0xAB92FB84, 0xAB93FB84, 0xAB94FB84, 0xAB95FB84, + 0xAB96FB84, 0xAB97FB84, 0xAB98FB84, 0xAB99FB84, 0xAB9AFB84, 0xAB9BFB84, 0xAB9CFB84, 0xAB9DFB84, 0xAB9EFB84, 0xAB9FFB84, 0xABA0FB84, 0xABA1FB84, 0xABA2FB84, 0xABA3FB84, 0xABA4FB84, + 0xABA5FB84, 0xABA6FB84, 0xABA7FB84, 0xABA8FB84, 0xABA9FB84, 0xABAAFB84, 0xABABFB84, 0xABACFB84, 0xABADFB84, 0xABAEFB84, 0xABAFFB84, 0xABB0FB84, 0xABB1FB84, 0xABB2FB84, 0xABB3FB84, + 0xABB4FB84, 0xABB5FB84, 0xABB6FB84, 0xABB7FB84, 0xABB8FB84, 0xABB9FB84, 0xABBAFB84, 0xABBBFB84, 0xABBCFB84, 0xABBDFB84, 0xABBEFB84, 0xABBFFB84, 0xABC0FB84, 0xABC1FB84, 0xABC2FB84, + 0xABC3FB84, 0xABC4FB84, 0xABC5FB84, 0xABC6FB84, 0xABC7FB84, 0xABC8FB84, 0xABC9FB84, 0xABCAFB84, 0xABCBFB84, 0xABCCFB84, 0xABCDFB84, 0xABCEFB84, 0xABCFFB84, 0xABD0FB84, 0xABD1FB84, + 0xABD2FB84, 0xABD3FB84, 0xABD4FB84, 0xABD5FB84, 0xABD6FB84, 0xABD7FB84, 0xABD8FB84, 0xABD9FB84, 0xABDAFB84, 0xABDBFB84, 0xABDCFB84, 0xABDDFB84, 0xABDEFB84, 0xABDFFB84, 0xABE0FB84, + 0xABE1FB84, 0xABE2FB84, 0xABE3FB84, 0xABE4FB84, 0xABE5FB84, 0xABE6FB84, 0xABE7FB84, 0xABE8FB84, 0xABE9FB84, 0xABEAFB84, 0xABEBFB84, 0xABECFB84, 0xABEDFB84, 0xABEEFB84, 0xABEFFB84, + 0xABF0FB84, 0xABF1FB84, 0xABF2FB84, 0xABF3FB84, 0xABF4FB84, 0xABF5FB84, 0xABF6FB84, 0xABF7FB84, 0xABF8FB84, 0xABF9FB84, 0xABFAFB84, 0xABFBFB84, 0xABFCFB84, 0xABFDFB84, 0xABFEFB84, + 0xABFFFB84, 0xAC00FB84, 0xAC01FB84, 0xAC02FB84, 0xAC03FB84, 0xAC04FB84, 0xAC05FB84, 0xAC06FB84, 0xAC07FB84, 0xAC08FB84, 0xAC09FB84, 0xAC0AFB84, 0xAC0BFB84, 0xAC0CFB84, 0xAC0DFB84, + 0xAC0EFB84, 0xAC0FFB84, 0xAC10FB84, 0xAC11FB84, 0xAC12FB84, 0xAC13FB84, 0xAC14FB84, 0xAC15FB84, 0xAC16FB84, 0xAC17FB84, 0xAC18FB84, 0xAC19FB84, 0xAC1AFB84, 0xAC1BFB84, 0xAC1CFB84, + 0xAC1DFB84, 0xAC1EFB84, 0xAC1FFB84, 0xAC20FB84, 0xAC21FB84, 0xAC22FB84, 0xAC23FB84, 0xAC24FB84, 0xAC25FB84, 0xAC26FB84, 0xAC27FB84, 0xAC28FB84, 0xAC29FB84, 0xAC2AFB84, 0xAC2BFB84, + 0xAC2CFB84, 0xAC2DFB84, 0xAC2EFB84, 0xAC2FFB84, 0xAC30FB84, 0xAC31FB84, 0xAC32FB84, 0xAC33FB84, 0xAC34FB84, 0xAC35FB84, 0xAC36FB84, 0xAC37FB84, 0xAC38FB84, 0xAC39FB84, 0xAC3AFB84, + 0xAC3BFB84, 0xAC3CFB84, 0xAC3DFB84, 0xAC3EFB84, 0xAC3FFB84, 0xAC40FB84, 0xAC41FB84, 0xAC42FB84, 0xAC43FB84, 0xAC44FB84, 0xAC45FB84, 0xAC46FB84, 0xAC47FB84, 0xAC48FB84, 0xAC49FB84, + 0xAC4AFB84, 0xAC4BFB84, 0xAC4CFB84, 0xAC4DFB84, 0xAC4EFB84, 0xAC4FFB84, 0xAC50FB84, 0xAC51FB84, 0xAC52FB84, 0xAC53FB84, 0xAC54FB84, 0xAC55FB84, 0xAC56FB84, 0xAC57FB84, 0xAC58FB84, + 0xAC59FB84, 0xAC5AFB84, 0xAC5BFB84, 0xAC5CFB84, 0xAC5DFB84, 0xAC5EFB84, 0xAC5FFB84, 0xAC60FB84, 0xAC61FB84, 0xAC62FB84, 0xAC63FB84, 0xAC64FB84, 0xAC65FB84, 0xAC66FB84, 0xAC67FB84, + 0xAC68FB84, 0xAC69FB84, 0xAC6AFB84, 0xAC6BFB84, 0xAC6CFB84, 0xAC6DFB84, 0xAC6EFB84, 0xAC6FFB84, 0xAC70FB84, 0xAC71FB84, 0xAC72FB84, 0xAC73FB84, 0xAC74FB84, 0xAC75FB84, 0xAC76FB84, + 0xAC77FB84, 0xAC78FB84, 0xAC79FB84, 0xAC7AFB84, 0xAC7BFB84, 0xAC7CFB84, 0xAC7DFB84, 0xAC7EFB84, 0xAC7FFB84, 0xAC80FB84, 0xAC81FB84, 0xAC82FB84, 0xAC83FB84, 0xAC84FB84, 0xAC85FB84, + 0xAC86FB84, 0xAC87FB84, 0xAC88FB84, 0xAC89FB84, 0xAC8AFB84, 0xAC8BFB84, 0xAC8CFB84, 0xAC8DFB84, 0xAC8EFB84, 0xAC8FFB84, 0xAC90FB84, 0xAC91FB84, 0xAC92FB84, 0xAC93FB84, 0xAC94FB84, + 0xAC95FB84, 0xAC96FB84, 0xAC97FB84, 0xAC98FB84, 0xAC99FB84, 0xAC9AFB84, 0xAC9BFB84, 0xAC9CFB84, 0xAC9DFB84, 0xAC9EFB84, 0xAC9FFB84, 0xACA0FB84, 0xACA1FB84, 0xACA2FB84, 0xACA3FB84, + 0xACA4FB84, 0xACA5FB84, 0xACA6FB84, 0xACA7FB84, 0xACA8FB84, 0xACA9FB84, 0xACAAFB84, 0xACABFB84, 0xACACFB84, 0xACADFB84, 0xACAEFB84, 0xACAFFB84, 0xACB0FB84, 0xACB1FB84, 0xACB2FB84, + 0xACB3FB84, 0xACB4FB84, 0xACB5FB84, 0xACB6FB84, 0xACB7FB84, 0xACB8FB84, 0xACB9FB84, 0xACBAFB84, 0xACBBFB84, 0xACBCFB84, 0xACBDFB84, 0xACBEFB84, 0xACBFFB84, 0xACC0FB84, 0xACC1FB84, + 0xACC2FB84, 0xACC3FB84, 0xACC4FB84, 0xACC5FB84, 0xACC6FB84, 0xACC7FB84, 0xACC8FB84, 0xACC9FB84, 0xACCAFB84, 0xACCBFB84, 0xACCCFB84, 0xACCDFB84, 0xACCEFB84, 0xACCFFB84, 0xACD0FB84, + 0xACD1FB84, 0xACD2FB84, 0xACD3FB84, 0xACD4FB84, 0xACD5FB84, 0xACD6FB84, 0xACD7FB84, 0xACD8FB84, 0xACD9FB84, 0xACDAFB84, 0xACDBFB84, 0xACDCFB84, 0xACDDFB84, 0xACDEFB84, 0xACDFFB84, + 0xACE0FB84, 0xACE1FB84, 0xACE2FB84, 0xACE3FB84, 0xACE4FB84, 0xACE5FB84, 0xACE6FB84, 0xACE7FB84, 0xACE8FB84, 0xACE9FB84, 0xACEAFB84, 0xACEBFB84, 0xACECFB84, 0xACEDFB84, 0xACEEFB84, + 0xACEFFB84, 0xACF0FB84, 0xACF1FB84, 0xACF2FB84, 0xACF3FB84, 0xACF4FB84, 0xACF5FB84, 0xACF6FB84, 0xACF7FB84, 0xACF8FB84, 0xACF9FB84, 0xACFAFB84, 0xACFBFB84, 0xACFCFB84, 0xACFDFB84, + 0xACFEFB84, 0xACFFFB84, 0xAD00FB84, 0xAD01FB84, 0xAD02FB84, 0xAD03FB84, 0xAD04FB84, 0xAD05FB84, 0xAD06FB84, 0xAD07FB84, 0xAD08FB84, 0xAD09FB84, 0xAD0AFB84, 0xAD0BFB84, 0xAD0CFB84, + 0xAD0DFB84, 0xAD0EFB84, 0xAD0FFB84, 0xAD10FB84, 0xAD11FB84, 0xAD12FB84, 0xAD13FB84, 0xAD14FB84, 0xAD15FB84, 0xAD16FB84, 0xAD17FB84, 0xAD18FB84, 0xAD19FB84, 0xAD1AFB84, 0xAD1BFB84, + 0xAD1CFB84, 0xAD1DFB84, 0xAD1EFB84, 0xAD1FFB84, 0xAD20FB84, 0xAD21FB84, 0xAD22FB84, 0xAD23FB84, 0xAD24FB84, 0xAD25FB84, 0xAD26FB84, 0xAD27FB84, 0xAD28FB84, 0xAD29FB84, 0xAD2AFB84, + 0xAD2BFB84, 0xAD2CFB84, 0xAD2DFB84, 0xAD2EFB84, 0xAD2FFB84, 0xAD30FB84, 0xAD31FB84, 0xAD32FB84, 0xAD33FB84, 0xAD34FB84, 0xAD35FB84, 0xAD36FB84, 0xAD37FB84, 0xAD38FB84, 0xAD39FB84, + 0xAD3AFB84, 0xAD3BFB84, 0xAD3CFB84, 0xAD3DFB84, 0xAD3EFB84, 0xAD3FFB84, 0xAD40FB84, 0xAD41FB84, 0xAD42FB84, 0xAD43FB84, 0xAD44FB84, 0xAD45FB84, 0xAD46FB84, 0xAD47FB84, 0xAD48FB84, + 0xAD49FB84, 0xAD4AFB84, 0xAD4BFB84, 0xAD4CFB84, 0xAD4DFB84, 0xAD4EFB84, 0xAD4FFB84, 0xAD50FB84, 0xAD51FB84, 0xAD52FB84, 0xAD53FB84, 0xAD54FB84, 0xAD55FB84, 0xAD56FB84, 0xAD57FB84, + 0xAD58FB84, 0xAD59FB84, 0xAD5AFB84, 0xAD5BFB84, 0xAD5CFB84, 0xAD5DFB84, 0xAD5EFB84, 0xAD5FFB84, 0xAD60FB84, 0xAD61FB84, 0xAD62FB84, 0xAD63FB84, 0xAD64FB84, 0xAD65FB84, 0xAD66FB84, + 0xAD67FB84, 0xAD68FB84, 0xAD69FB84, 0xAD6AFB84, 0xAD6BFB84, 0xAD6CFB84, 0xAD6DFB84, 0xAD6EFB84, 0xAD6FFB84, 0xAD70FB84, 0xAD71FB84, 0xAD72FB84, 0xAD73FB84, 0xAD74FB84, 0xAD75FB84, + 0xAD76FB84, 0xAD77FB84, 0xAD78FB84, 0xAD79FB84, 0xAD7AFB84, 0xAD7BFB84, 0xAD7CFB84, 0xAD7DFB84, 0xAD7EFB84, 0xAD7FFB84, 0xAD80FB84, 0xAD81FB84, 0xAD82FB84, 0xAD83FB84, 0xAD84FB84, + 0xAD85FB84, 0xAD86FB84, 0xAD87FB84, 0xAD88FB84, 0xAD89FB84, 0xAD8AFB84, 0xAD8BFB84, 0xAD8CFB84, 0xAD8DFB84, 0xAD8EFB84, 0xAD8FFB84, 0xAD90FB84, 0xAD91FB84, 0xAD92FB84, 0xAD93FB84, + 0xAD94FB84, 0xAD95FB84, 0xAD96FB84, 0xAD97FB84, 0xAD98FB84, 0xAD99FB84, 0xAD9AFB84, 0xAD9BFB84, 0xAD9CFB84, 0xAD9DFB84, 0xAD9EFB84, 0xAD9FFB84, 0xADA0FB84, 0xADA1FB84, 0xADA2FB84, + 0xADA3FB84, 0xADA4FB84, 0xADA5FB84, 0xADA6FB84, 0xADA7FB84, 0xADA8FB84, 0xADA9FB84, 0xADAAFB84, 0xADABFB84, 0xADACFB84, 0xADADFB84, 0xADAEFB84, 0xADAFFB84, 0xADB0FB84, 0xADB1FB84, + 0xADB2FB84, 0xADB3FB84, 0xADB4FB84, 0xADB5FB84, 0xADB6FB84, 0xADB7FB84, 0xADB8FB84, 0xADB9FB84, 0xADBAFB84, 0xADBBFB84, 0xADBCFB84, 0xADBDFB84, 0xADBEFB84, 0xADBFFB84, 0xADC0FB84, + 0xADC1FB84, 0xADC2FB84, 0xADC3FB84, 0xADC4FB84, 0xADC5FB84, 0xADC6FB84, 0xADC7FB84, 0xADC8FB84, 0xADC9FB84, 0xADCAFB84, 0xADCBFB84, 0xADCCFB84, 0xADCDFB84, 0xADCEFB84, 0xADCFFB84, + 0xADD0FB84, 0xADD1FB84, 0xADD2FB84, 0xADD3FB84, 0xADD4FB84, 0xADD5FB84, 0xADD6FB84, 0xADD7FB84, 0xADD8FB84, 0xADD9FB84, 0xADDAFB84, 0xADDBFB84, 0xADDCFB84, 0xADDDFB84, 0xADDEFB84, + 0xADDFFB84, 0xADE0FB84, 0xADE1FB84, 0xADE2FB84, 0xADE3FB84, 0xADE4FB84, 0xADE5FB84, 0xADE6FB84, 0xADE7FB84, 0xADE8FB84, 0xADE9FB84, 0xADEAFB84, 0xADEBFB84, 0xADECFB84, 0xADEDFB84, + 0xADEEFB84, 0xADEFFB84, 0xADF0FB84, 0xADF1FB84, 0xADF2FB84, 0xADF3FB84, 0xADF4FB84, 0xADF5FB84, 0xADF6FB84, 0xADF7FB84, 0xADF8FB84, 0xADF9FB84, 0xADFAFB84, 0xADFBFB84, 0xADFCFB84, + 0xADFDFB84, 0xADFEFB84, 0xADFFFB84, 0xAE00FB84, 0xAE01FB84, 0xAE02FB84, 0xAE03FB84, 0xAE04FB84, 0xAE05FB84, 0xAE06FB84, 0xAE07FB84, 0xAE08FB84, 0xAE09FB84, 0xAE0AFB84, 0xAE0BFB84, + 0xAE0CFB84, 0xAE0DFB84, 0xAE0EFB84, 0xAE0FFB84, 0xAE10FB84, 0xAE11FB84, 0xAE12FB84, 0xAE13FB84, 0xAE14FB84, 0xAE15FB84, 0xAE16FB84, 0xAE17FB84, 0xAE18FB84, 0xAE19FB84, 0xAE1AFB84, + 0xAE1BFB84, 0xAE1CFB84, 0xAE1DFB84, 0xAE1EFB84, 0xAE1FFB84, 0xAE20FB84, 0xAE21FB84, 0xAE22FB84, 0xAE23FB84, 0xAE24FB84, 0xAE25FB84, 0xAE26FB84, 0xAE27FB84, 0xAE28FB84, 0xAE29FB84, + 0xAE2AFB84, 0xAE2BFB84, 0xAE2CFB84, 0xAE2DFB84, 0xAE2EFB84, 0xAE2FFB84, 0xAE30FB84, 0xAE31FB84, 0xAE32FB84, 0xAE33FB84, 0xAE34FB84, 0xAE35FB84, 0xAE36FB84, 0xAE37FB84, 0xAE38FB84, + 0xAE39FB84, 0xAE3AFB84, 0xAE3BFB84, 0xAE3CFB84, 0xAE3DFB84, 0xAE3EFB84, 0xAE3FFB84, 0xAE40FB84, 0xAE41FB84, 0xAE42FB84, 0xAE43FB84, 0xAE44FB84, 0xAE45FB84, 0xAE46FB84, 0xAE47FB84, + 0xAE48FB84, 0xAE49FB84, 0xAE4AFB84, 0xAE4BFB84, 0xAE4CFB84, 0xAE4DFB84, 0xAE4EFB84, 0xAE4FFB84, 0xAE50FB84, 0xAE51FB84, 0xAE52FB84, 0xAE53FB84, 0xAE54FB84, 0xAE55FB84, 0xAE56FB84, + 0xAE57FB84, 0xAE58FB84, 0xAE59FB84, 0xAE5AFB84, 0xAE5BFB84, 0xAE5CFB84, 0xAE5DFB84, 0xAE5EFB84, 0xAE5FFB84, 0xAE60FB84, 0xAE61FB84, 0xAE62FB84, 0xAE63FB84, 0xAE64FB84, 0xAE65FB84, + 0xAE66FB84, 0xAE67FB84, 0xAE68FB84, 0xAE69FB84, 0xAE6AFB84, 0xAE6BFB84, 0xAE6CFB84, 0xAE6DFB84, 0xAE6EFB84, 0xAE6FFB84, 0xAE70FB84, 0xAE71FB84, 0xAE72FB84, 0xAE73FB84, 0xAE74FB84, + 0xAE75FB84, 0xAE76FB84, 0xAE77FB84, 0xAE78FB84, 0xAE79FB84, 0xAE7AFB84, 0xAE7BFB84, 0xAE7CFB84, 0xAE7DFB84, 0xAE7EFB84, 0xAE7FFB84, 0xAE80FB84, 0xAE81FB84, 0xAE82FB84, 0xAE83FB84, + 0xAE84FB84, 0xAE85FB84, 0xAE86FB84, 0xAE87FB84, 0xAE88FB84, 0xAE89FB84, 0xAE8AFB84, 0xAE8BFB84, 0xAE8CFB84, 0xAE8DFB84, 0xAE8EFB84, 0xAE8FFB84, 0xAE90FB84, 0xAE91FB84, 0xAE92FB84, + 0xAE93FB84, 0xAE94FB84, 0xAE95FB84, 0xAE96FB84, 0xAE97FB84, 0xAE98FB84, 0xAE99FB84, 0xAE9AFB84, 0xAE9BFB84, 0xAE9CFB84, 0xAE9DFB84, 0xAE9EFB84, 0xAE9FFB84, 0xAEA0FB84, 0xAEA1FB84, + 0xAEA2FB84, 0xAEA3FB84, 0xAEA4FB84, 0xAEA5FB84, 0xAEA6FB84, 0xAEA7FB84, 0xAEA8FB84, 0xAEA9FB84, 0xAEAAFB84, 0xAEABFB84, 0xAEACFB84, 0xAEADFB84, 0xAEAEFB84, 0xAEAFFB84, 0xAEB0FB84, + 0xAEB1FB84, 0xAEB2FB84, 0xAEB3FB84, 0xAEB4FB84, 0xAEB5FB84, 0xAEB6FB84, 0xAEB7FB84, 0xAEB8FB84, 0xAEB9FB84, 0xAEBAFB84, 0xAEBBFB84, 0xAEBCFB84, 0xAEBDFB84, 0xAEBEFB84, 0xAEBFFB84, + 0xAEC0FB84, 0xAEC1FB84, 0xAEC2FB84, 0xAEC3FB84, 0xAEC4FB84, 0xAEC5FB84, 0xAEC6FB84, 0xAEC7FB84, 0xAEC8FB84, 0xAEC9FB84, 0xAECAFB84, 0xAECBFB84, 0xAECCFB84, 0xAECDFB84, 0xAECEFB84, + 0xAECFFB84, 0xAED0FB84, 0xAED1FB84, 0xAED2FB84, 0xAED3FB84, 0xAED4FB84, 0xAED5FB84, 0xAED6FB84, 0xAED7FB84, 0xAED8FB84, 0xAED9FB84, 0xAEDAFB84, 0xAEDBFB84, 0xAEDCFB84, 0xAEDDFB84, + 0xAEDEFB84, 0xAEDFFB84, 0xAEE0FB84, 0xAEE1FB84, 0xAEE2FB84, 0xAEE3FB84, 0xAEE4FB84, 0xAEE5FB84, 0xAEE6FB84, 0xAEE7FB84, 0xAEE8FB84, 0xAEE9FB84, 0xAEEAFB84, 0xAEEBFB84, 0xAEECFB84, + 0xAEEDFB84, 0xAEEEFB84, 0xAEEFFB84, 0xAEF0FB84, 0xAEF1FB84, 0xAEF2FB84, 0xAEF3FB84, 0xAEF4FB84, 0xAEF5FB84, 0xAEF6FB84, 0xAEF7FB84, 0xAEF8FB84, 0xAEF9FB84, 0xAEFAFB84, 0xAEFBFB84, + 0xAEFCFB84, 0xAEFDFB84, 0xAEFEFB84, 0xAEFFFB84, 0xAF00FB84, 0xAF01FB84, 0xAF02FB84, 0xAF03FB84, 0xAF04FB84, 0xAF05FB84, 0xAF06FB84, 0xAF07FB84, 0xAF08FB84, 0xAF09FB84, 0xAF0AFB84, + 0xAF0BFB84, 0xAF0CFB84, 0xAF0DFB84, 0xAF0EFB84, 0xAF0FFB84, 0xAF10FB84, 0xAF11FB84, 0xAF12FB84, 0xAF13FB84, 0xAF14FB84, 0xAF15FB84, 0xAF16FB84, 0xAF17FB84, 0xAF18FB84, 0xAF19FB84, + 0xAF1AFB84, 0xAF1BFB84, 0xAF1CFB84, 0xAF1DFB84, 0xAF1EFB84, 0xAF1FFB84, 0xAF20FB84, 0xAF21FB84, 0xAF22FB84, 0xAF23FB84, 0xAF24FB84, 0xAF25FB84, 0xAF26FB84, 0xAF27FB84, 0xAF28FB84, + 0xAF29FB84, 0xAF2AFB84, 0xAF2BFB84, 0xAF2CFB84, 0xAF2DFB84, 0xAF2EFB84, 0xAF2FFB84, 0xAF30FB84, 0xAF31FB84, 0xAF32FB84, 0xAF33FB84, 0xAF34FB84, 0xAF35FB84, 0xAF36FB84, 0xAF37FB84, + 0xAF38FB84, 0xAF39FB84, 0xAF3AFB84, 0xAF3BFB84, 0xAF3CFB84, 0xAF3DFB84, 0xAF3EFB84, 0xAF3FFB84, 0xAF40FB84, 0xAF41FB84, 0xAF42FB84, 0xAF43FB84, 0xAF44FB84, 0xAF45FB84, 0xAF46FB84, + 0xAF47FB84, 0xAF48FB84, 0xAF49FB84, 0xAF4AFB84, 0xAF4BFB84, 0xAF4CFB84, 0xAF4DFB84, 0xAF4EFB84, 0xAF4FFB84, 0xAF50FB84, 0xAF51FB84, 0xAF52FB84, 0xAF53FB84, 0xAF54FB84, 0xAF55FB84, + 0xAF56FB84, 0xAF57FB84, 0xAF58FB84, 0xAF59FB84, 0xAF5AFB84, 0xAF5BFB84, 0xAF5CFB84, 0xAF5DFB84, 0xAF5EFB84, 0xAF5FFB84, 0xAF60FB84, 0xAF61FB84, 0xAF62FB84, 0xAF63FB84, 0xAF64FB84, + 0xAF65FB84, 0xAF66FB84, 0xAF67FB84, 0xAF68FB84, 0xAF69FB84, 0xAF6AFB84, 0xAF6BFB84, 0xAF6CFB84, 0xAF6DFB84, 0xAF6EFB84, 0xAF6FFB84, 0xAF70FB84, 0xAF71FB84, 0xAF72FB84, 0xAF73FB84, + 0xAF74FB84, 0xAF75FB84, 0xAF76FB84, 0xAF77FB84, 0xAF78FB84, 0xAF79FB84, 0xAF7AFB84, 0xAF7BFB84, 0xAF7CFB84, 0xAF7DFB84, 0xAF7EFB84, 0xAF7FFB84, 0xAF80FB84, 0xAF81FB84, 0xAF82FB84, + 0xAF83FB84, 0xAF84FB84, 0xAF85FB84, 0xAF86FB84, 0xAF87FB84, 0xAF88FB84, 0xAF89FB84, 0xAF8AFB84, 0xAF8BFB84, 0xAF8CFB84, 0xAF8DFB84, 0xAF8EFB84, 0xAF8FFB84, 0xAF90FB84, 0xAF91FB84, + 0xAF92FB84, 0xAF93FB84, 0xAF94FB84, 0xAF95FB84, 0xAF96FB84, 0xAF97FB84, 0xAF98FB84, 0xAF99FB84, 0xAF9AFB84, 0xAF9BFB84, 0xAF9CFB84, 0xAF9DFB84, 0xAF9EFB84, 0xAF9FFB84, 0xAFA0FB84, + 0xAFA1FB84, 0xAFA2FB84, 0xAFA3FB84, 0xAFA4FB84, 0xAFA5FB84, 0xAFA6FB84, 0xAFA7FB84, 0xAFA8FB84, 0xAFA9FB84, 0xAFAAFB84, 0xAFABFB84, 0xAFACFB84, 0xAFADFB84, 0xAFAEFB84, 0xAFAFFB84, + 0xAFB0FB84, 0xAFB1FB84, 0xAFB2FB84, 0xAFB3FB84, 0xAFB4FB84, 0xAFB5FB84, 0xAFB6FB84, 0xAFB7FB84, 0xAFB8FB84, 0xAFB9FB84, 0xAFBAFB84, 0xAFBBFB84, 0xAFBCFB84, 0xAFBDFB84, 0xAFBEFB84, + 0xAFBFFB84, 0xAFC0FB84, 0xAFC1FB84, 0xAFC2FB84, 0xAFC3FB84, 0xAFC4FB84, 0xAFC5FB84, 0xAFC6FB84, 0xAFC7FB84, 0xAFC8FB84, 0xAFC9FB84, 0xAFCAFB84, 0xAFCBFB84, 0xAFCCFB84, 0xAFCDFB84, + 0xAFCEFB84, 0xAFCFFB84, 0xAFD0FB84, 0xAFD1FB84, 0xAFD2FB84, 0xAFD3FB84, 0xAFD4FB84, 0xAFD5FB84, 0xAFD6FB84, 0xAFD7FB84, 0xAFD8FB84, 0xAFD9FB84, 0xAFDAFB84, 0xAFDBFB84, 0xAFDCFB84, + 0xAFDDFB84, 0xAFDEFB84, 0xAFDFFB84, 0xAFE0FB84, 0xAFE1FB84, 0xAFE2FB84, 0xAFE3FB84, 0xAFE4FB84, 0xAFE5FB84, 0xAFE6FB84, 0xAFE7FB84, 0xAFE8FB84, 0xAFE9FB84, 0xAFEAFB84, 0xAFEBFB84, + 0xAFECFB84, 0xAFEDFB84, 0xAFEEFB84, 0xAFEFFB84, 0xAFF0FB84, 0xAFF1FB84, 0xAFF2FB84, 0xAFF3FB84, 0xAFF4FB84, 0xAFF5FB84, 0xAFF6FB84, 0xAFF7FB84, 0xAFF8FB84, 0xAFF9FB84, 0xAFFAFB84, + 0xAFFBFB84, 0xAFFCFB84, 0xAFFDFB84, 0xAFFEFB84, 0xAFFFFB84, 0xB000FB84, 0xB001FB84, 0xB002FB84, 0xB003FB84, 0xB004FB84, 0xB005FB84, 0xB006FB84, 0xB007FB84, 0xB008FB84, 0xB009FB84, + 0xB00AFB84, 0xB00BFB84, 0xB00CFB84, 0xB00DFB84, 0xB00EFB84, 0xB00FFB84, 0xB010FB84, 0xB011FB84, 0xB012FB84, 0xB013FB84, 0xB014FB84, 0xB015FB84, 0xB016FB84, 0xB017FB84, 0xB018FB84, + 0xB019FB84, 0xB01AFB84, 0xB01BFB84, 0xB01CFB84, 0xB01DFB84, 0xB01EFB84, 0xB01FFB84, 0xB020FB84, 0xB021FB84, 0xB022FB84, 0xB023FB84, 0xB024FB84, 0xB025FB84, 0xB026FB84, 0xB027FB84, + 0xB028FB84, 0xB029FB84, 0xB02AFB84, 0xB02BFB84, 0xB02CFB84, 0xB02DFB84, 0xB02EFB84, 0xB02FFB84, 0xB030FB84, 0xB031FB84, 0xB032FB84, 0xB033FB84, 0xB034FB84, 0xB035FB84, 0xB036FB84, + 0xB037FB84, 0xB038FB84, 0xB039FB84, 0xB03AFB84, 0xB03BFB84, 0xB03CFB84, 0xB03DFB84, 0xB03EFB84, 0xB03FFB84, 0xB040FB84, 0xB041FB84, 0xB042FB84, 0xB043FB84, 0xB044FB84, 0xB045FB84, + 0xB046FB84, 0xB047FB84, 0xB048FB84, 0xB049FB84, 0xB04AFB84, 0xB04BFB84, 0xB04CFB84, 0xB04DFB84, 0xB04EFB84, 0xB04FFB84, 0xB050FB84, 0xB051FB84, 0xB052FB84, 0xB053FB84, 0xB054FB84, + 0xB055FB84, 0xB056FB84, 0xB057FB84, 0xB058FB84, 0xB059FB84, 0xB05AFB84, 0xB05BFB84, 0xB05CFB84, 0xB05DFB84, 0xB05EFB84, 0xB05FFB84, 0xB060FB84, 0xB061FB84, 0xB062FB84, 0xB063FB84, + 0xB064FB84, 0xB065FB84, 0xB066FB84, 0xB067FB84, 0xB068FB84, 0xB069FB84, 0xB06AFB84, 0xB06BFB84, 0xB06CFB84, 0xB06DFB84, 0xB06EFB84, 0xB06FFB84, 0xB070FB84, 0xB071FB84, 0xB072FB84, + 0xB073FB84, 0xB074FB84, 0xB075FB84, 0xB076FB84, 0xB077FB84, 0xB078FB84, 0xB079FB84, 0xB07AFB84, 0xB07BFB84, 0xB07CFB84, 0xB07DFB84, 0xB07EFB84, 0xB07FFB84, 0xB080FB84, 0xB081FB84, + 0xB082FB84, 0xB083FB84, 0xB084FB84, 0xB085FB84, 0xB086FB84, 0xB087FB84, 0xB088FB84, 0xB089FB84, 0xB08AFB84, 0xB08BFB84, 0xB08CFB84, 0xB08DFB84, 0xB08EFB84, 0xB08FFB84, 0xB090FB84, + 0xB091FB84, 0xB092FB84, 0xB093FB84, 0xB094FB84, 0xB095FB84, 0xB096FB84, 0xB097FB84, 0xB098FB84, 0xB099FB84, 0xB09AFB84, 0xB09BFB84, 0xB09CFB84, 0xB09DFB84, 0xB09EFB84, 0xB09FFB84, + 0xB0A0FB84, 0xB0A1FB84, 0xB0A2FB84, 0xB0A3FB84, 0xB0A4FB84, 0xB0A5FB84, 0xB0A6FB84, 0xB0A7FB84, 0xB0A8FB84, 0xB0A9FB84, 0xB0AAFB84, 0xB0ABFB84, 0xB0ACFB84, 0xB0ADFB84, 0xB0AEFB84, + 0xB0AFFB84, 0xB0B0FB84, 0xB0B1FB84, 0xB0B2FB84, 0xB0B3FB84, 0xB0B4FB84, 0xB0B5FB84, 0xB0B6FB84, 0xB0B7FB84, 0xB0B8FB84, 0xB0B9FB84, 0xB0BAFB84, 0xB0BBFB84, 0xB0BCFB84, 0xB0BDFB84, + 0xB0BEFB84, 0xB0BFFB84, 0xB0C0FB84, 0xB0C1FB84, 0xB0C2FB84, 0xB0C3FB84, 0xB0C4FB84, 0xB0C5FB84, 0xB0C6FB84, 0xB0C7FB84, 0xB0C8FB84, 0xB0C9FB84, 0xB0CAFB84, 0xB0CBFB84, 0xB0CCFB84, + 0xB0CDFB84, 0xB0CEFB84, 0xB0CFFB84, 0xB0D0FB84, 0xB0D1FB84, 0xB0D2FB84, 0xB0D3FB84, 0xB0D4FB84, 0xB0D5FB84, 0xB0D6FB84, 0xB0D7FB84, 0xB0D8FB84, 0xB0D9FB84, 0xB0DAFB84, 0xB0DBFB84, + 0xB0DCFB84, 0xB0DDFB84, 0xB0DEFB84, 0xB0DFFB84, 0xB0E0FB84, 0xB0E1FB84, 0xB0E2FB84, 0xB0E3FB84, 0xB0E4FB84, 0xB0E5FB84, 0xB0E6FB84, 0xB0E7FB84, 0xB0E8FB84, 0xB0E9FB84, 0xB0EAFB84, + 0xB0EBFB84, 0xB0ECFB84, 0xB0EDFB84, 0xB0EEFB84, 0xB0EFFB84, 0xB0F0FB84, 0xB0F1FB84, 0xB0F2FB84, 0xB0F3FB84, 0xB0F4FB84, 0xB0F5FB84, 0xB0F6FB84, 0xB0F7FB84, 0xB0F8FB84, 0xB0F9FB84, + 0xB0FAFB84, 0xB0FBFB84, 0xB0FCFB84, 0xB0FDFB84, 0xB0FEFB84, 0xB0FFFB84, 0xB100FB84, 0xB101FB84, 0xB102FB84, 0xB103FB84, 0xB104FB84, 0xB105FB84, 0xB106FB84, 0xB107FB84, 0xB108FB84, + 0xB109FB84, 0xB10AFB84, 0xB10BFB84, 0xB10CFB84, 0xB10DFB84, 0xB10EFB84, 0xB10FFB84, 0xB110FB84, 0xB111FB84, 0xB112FB84, 0xB113FB84, 0xB114FB84, 0xB115FB84, 0xB116FB84, 0xB117FB84, + 0xB118FB84, 0xB119FB84, 0xB11AFB84, 0xB11BFB84, 0xB11CFB84, 0xB11DFB84, 0xB11EFB84, 0xB11FFB84, 0xB120FB84, 0xB121FB84, 0xB122FB84, 0xB123FB84, 0xB124FB84, 0xB125FB84, 0xB126FB84, + 0xB127FB84, 0xB128FB84, 0xB129FB84, 0xB12AFB84, 0xB12BFB84, 0xB12CFB84, 0xB12DFB84, 0xB12EFB84, 0xB12FFB84, 0xB130FB84, 0xB131FB84, 0xB132FB84, 0xB133FB84, 0xB134FB84, 0xB135FB84, + 0xB136FB84, 0xB137FB84, 0xB138FB84, 0xB139FB84, 0xB13AFB84, 0xB13BFB84, 0xB13CFB84, 0xB13DFB84, 0xB13EFB84, 0xB13FFB84, 0xB140FB84, 0xB141FB84, 0xB142FB84, 0xB143FB84, 0xB144FB84, + 0xB145FB84, 0xB146FB84, 0xB147FB84, 0xB148FB84, 0xB149FB84, 0xB14AFB84, 0xB14BFB84, 0xB14CFB84, 0xB14DFB84, 0xB14EFB84, 0xB14FFB84, 0xB150FB84, 0xB151FB84, 0xB152FB84, 0xB153FB84, + 0xB154FB84, 0xB155FB84, 0xB156FB84, 0xB157FB84, 0xB158FB84, 0xB159FB84, 0xB15AFB84, 0xB15BFB84, 0xB15CFB84, 0xB15DFB84, 0xB15EFB84, 0xB15FFB84, 0xB160FB84, 0xB161FB84, 0xB162FB84, + 0xB163FB84, 0xB164FB84, 0xB165FB84, 0xB166FB84, 0xB167FB84, 0xB168FB84, 0xB169FB84, 0xB16AFB84, 0xB16BFB84, 0xB16CFB84, 0xB16DFB84, 0xB16EFB84, 0xB16FFB84, 0xB170FB84, 0xB171FB84, + 0xB172FB84, 0xB173FB84, 0xB174FB84, 0xB175FB84, 0xB176FB84, 0xB177FB84, 0xB178FB84, 0xB179FB84, 0xB17AFB84, 0xB17BFB84, 0xB17CFB84, 0xB17DFB84, 0xB17EFB84, 0xB17FFB84, 0xB180FB84, + 0xB181FB84, 0xB182FB84, 0xB183FB84, 0xB184FB84, 0xB185FB84, 0xB186FB84, 0xB187FB84, 0xB188FB84, 0xB189FB84, 0xB18AFB84, 0xB18BFB84, 0xB18CFB84, 0xB18DFB84, 0xB18EFB84, 0xB18FFB84, + 0xB190FB84, 0xB191FB84, 0xB192FB84, 0xB193FB84, 0xB194FB84, 0xB195FB84, 0xB196FB84, 0xB197FB84, 0xB198FB84, 0xB199FB84, 0xB19AFB84, 0xB19BFB84, 0xB19CFB84, 0xB19DFB84, 0xB19EFB84, + 0xB19FFB84, 0xB1A0FB84, 0xB1A1FB84, 0xB1A2FB84, 0xB1A3FB84, 0xB1A4FB84, 0xB1A5FB84, 0xB1A6FB84, 0xB1A7FB84, 0xB1A8FB84, 0xB1A9FB84, 0xB1AAFB84, 0xB1ABFB84, 0xB1ACFB84, 0xB1ADFB84, + 0xB1AEFB84, 0xB1AFFB84, 0xB1B0FB84, 0xB1B1FB84, 0xB1B2FB84, 0xB1B3FB84, 0xB1B4FB84, 0xB1B5FB84, 0xB1B6FB84, 0xB1B7FB84, 0xB1B8FB84, 0xB1B9FB84, 0xB1BAFB84, 0xB1BBFB84, 0xB1BCFB84, + 0xB1BDFB84, 0xB1BEFB84, 0xB1BFFB84, 0xB1C0FB84, 0xB1C1FB84, 0xB1C2FB84, 0xB1C3FB84, 0xB1C4FB84, 0xB1C5FB84, 0xB1C6FB84, 0xB1C7FB84, 0xB1C8FB84, 0xB1C9FB84, 0xB1CAFB84, 0xB1CBFB84, + 0xB1CCFB84, 0xB1CDFB84, 0xB1CEFB84, 0xB1CFFB84, 0xB1D0FB84, 0xB1D1FB84, 0xB1D2FB84, 0xB1D3FB84, 0xB1D4FB84, 0xB1D5FB84, 0xB1D6FB84, 0xB1D7FB84, 0xB1D8FB84, 0xB1D9FB84, 0xB1DAFB84, + 0xB1DBFB84, 0xB1DCFB84, 0xB1DDFB84, 0xB1DEFB84, 0xB1DFFB84, 0xB1E0FB84, 0xB1E1FB84, 0xB1E2FB84, 0xB1E3FB84, 0xB1E4FB84, 0xB1E5FB84, 0xB1E6FB84, 0xB1E7FB84, 0xB1E8FB84, 0xB1E9FB84, + 0xB1EAFB84, 0xB1EBFB84, 0xB1ECFB84, 0xB1EDFB84, 0xB1EEFB84, 0xB1EFFB84, 0xB1F0FB84, 0xB1F1FB84, 0xB1F2FB84, 0xB1F3FB84, 0xB1F4FB84, 0xB1F5FB84, 0xB1F6FB84, 0xB1F7FB84, 0xB1F8FB84, + 0xB1F9FB84, 0xB1FAFB84, 0xB1FBFB84, 0xB1FCFB84, 0xB1FDFB84, 0xB1FEFB84, 0xB1FFFB84, 0xB200FB84, 0xB201FB84, 0xB202FB84, 0xB203FB84, 0xB204FB84, 0xB205FB84, 0xB206FB84, 0xB207FB84, + 0xB208FB84, 0xB209FB84, 0xB20AFB84, 0xB20BFB84, 0xB20CFB84, 0xB20DFB84, 0xB20EFB84, 0xB20FFB84, 0xB210FB84, 0xB211FB84, 0xB212FB84, 0xB213FB84, 0xB214FB84, 0xB215FB84, 0xB216FB84, + 0xB217FB84, 0xB218FB84, 0xB219FB84, 0xB21AFB84, 0xB21BFB84, 0xB21CFB84, 0xB21DFB84, 0xB21EFB84, 0xB21FFB84, 0xB220FB84, 0xB221FB84, 0xB222FB84, 0xB223FB84, 0xB224FB84, 0xB225FB84, + 0xB226FB84, 0xB227FB84, 0xB228FB84, 0xB229FB84, 0xB22AFB84, 0xB22BFB84, 0xB22CFB84, 0xB22DFB84, 0xB22EFB84, 0xB22FFB84, 0xB230FB84, 0xB231FB84, 0xB232FB84, 0xB233FB84, 0xB234FB84, + 0xB235FB84, 0xB236FB84, 0xB237FB84, 0xB238FB84, 0xB239FB84, 0xB23AFB84, 0xB23BFB84, 0xB23CFB84, 0xB23DFB84, 0xB23EFB84, 0xB23FFB84, 0xB240FB84, 0xB241FB84, 0xB242FB84, 0xB243FB84, + 0xB244FB84, 0xB245FB84, 0xB246FB84, 0xB247FB84, 0xB248FB84, 0xB249FB84, 0xB24AFB84, 0xB24BFB84, 0xB24CFB84, 0xB24DFB84, 0xB24EFB84, 0xB24FFB84, 0xB250FB84, 0xB251FB84, 0xB252FB84, + 0xB253FB84, 0xB254FB84, 0xB255FB84, 0xB256FB84, 0xB257FB84, 0xB258FB84, 0xB259FB84, 0xB25AFB84, 0xB25BFB84, 0xB25CFB84, 0xB25DFB84, 0xB25EFB84, 0xB25FFB84, 0xB260FB84, 0xB261FB84, + 0xB262FB84, 0xB263FB84, 0xB264FB84, 0xB265FB84, 0xB266FB84, 0xB267FB84, 0xB268FB84, 0xB269FB84, 0xB26AFB84, 0xB26BFB84, 0xB26CFB84, 0xB26DFB84, 0xB26EFB84, 0xB26FFB84, 0xB270FB84, + 0xB271FB84, 0xB272FB84, 0xB273FB84, 0xB274FB84, 0xB275FB84, 0xB276FB84, 0xB277FB84, 0xB278FB84, 0xB279FB84, 0xB27AFB84, 0xB27BFB84, 0xB27CFB84, 0xB27DFB84, 0xB27EFB84, 0xB27FFB84, + 0xB280FB84, 0xB281FB84, 0xB282FB84, 0xB283FB84, 0xB284FB84, 0xB285FB84, 0xB286FB84, 0xB287FB84, 0xB288FB84, 0xB289FB84, 0xB28AFB84, 0xB28BFB84, 0xB28CFB84, 0xB28DFB84, 0xB28EFB84, + 0xB28FFB84, 0xB290FB84, 0xB291FB84, 0xB292FB84, 0xB293FB84, 0xB294FB84, 0xB295FB84, 0xB296FB84, 0xB297FB84, 0xB298FB84, 0xB299FB84, 0xB29AFB84, 0xB29BFB84, 0xB29CFB84, 0xB29DFB84, + 0xB29EFB84, 0xB29FFB84, 0xB2A0FB84, 0xB2A1FB84, 0xB2A2FB84, 0xB2A3FB84, 0xB2A4FB84, 0xB2A5FB84, 0xB2A6FB84, 0xB2A7FB84, 0xB2A8FB84, 0xB2A9FB84, 0xB2AAFB84, 0xB2ABFB84, 0xB2ACFB84, + 0xB2ADFB84, 0xB2AEFB84, 0xB2AFFB84, 0xB2B0FB84, 0xB2B1FB84, 0xB2B2FB84, 0xB2B3FB84, 0xB2B4FB84, 0xB2B5FB84, 0xB2B6FB84, 0xB2B7FB84, 0xB2B8FB84, 0xB2B9FB84, 0xB2BAFB84, 0xB2BBFB84, + 0xB2BCFB84, 0xB2BDFB84, 0xB2BEFB84, 0xB2BFFB84, 0xB2C0FB84, 0xB2C1FB84, 0xB2C2FB84, 0xB2C3FB84, 0xB2C4FB84, 0xB2C5FB84, 0xB2C6FB84, 0xB2C7FB84, 0xB2C8FB84, 0xB2C9FB84, 0xB2CAFB84, + 0xB2CBFB84, 0xB2CCFB84, 0xB2CDFB84, 0xB2CEFB84, 0xB2CFFB84, 0xB2D0FB84, 0xB2D1FB84, 0xB2D2FB84, 0xB2D3FB84, 0xB2D4FB84, 0xB2D5FB84, 0xB2D6FB84, 0xB2D7FB84, 0xB2D8FB84, 0xB2D9FB84, + 0xB2DAFB84, 0xB2DBFB84, 0xB2DCFB84, 0xB2DDFB84, 0xB2DEFB84, 0xB2DFFB84, 0xB2E0FB84, 0xB2E1FB84, 0xB2E2FB84, 0xB2E3FB84, 0xB2E4FB84, 0xB2E5FB84, 0xB2E6FB84, 0xB2E7FB84, 0xB2E8FB84, + 0xB2E9FB84, 0xB2EAFB84, 0xB2EBFB84, 0xB2ECFB84, 0xB2EDFB84, 0xB2EEFB84, 0xB2EFFB84, 0xB2F0FB84, 0xB2F1FB84, 0xB2F2FB84, 0xB2F3FB84, 0xB2F4FB84, 0xB2F5FB84, 0xB2F6FB84, 0xB2F7FB84, + 0xB2F8FB84, 0xB2F9FB84, 0xB2FAFB84, 0xB2FBFB84, 0xB2FCFB84, 0xB2FDFB84, 0xB2FEFB84, 0xB2FFFB84, 0xB300FB84, 0xB301FB84, 0xB302FB84, 0xB303FB84, 0xB304FB84, 0xB305FB84, 0xB306FB84, + 0xB307FB84, 0xB308FB84, 0xB309FB84, 0xB30AFB84, 0xB30BFB84, 0xB30CFB84, 0xB30DFB84, 0xB30EFB84, 0xB30FFB84, 0xB310FB84, 0xB311FB84, 0xB312FB84, 0xB313FB84, 0xB314FB84, 0xB315FB84, + 0xB316FB84, 0xB317FB84, 0xB318FB84, 0xB319FB84, 0xB31AFB84, 0xB31BFB84, 0xB31CFB84, 0xB31DFB84, 0xB31EFB84, 0xB31FFB84, 0xB320FB84, 0xB321FB84, 0xB322FB84, 0xB323FB84, 0xB324FB84, + 0xB325FB84, 0xB326FB84, 0xB327FB84, 0xB328FB84, 0xB329FB84, 0xB32AFB84, 0xB32BFB84, 0xB32CFB84, 0xB32DFB84, 0xB32EFB84, 0xB32FFB84, 0xB330FB84, 0xB331FB84, 0xB332FB84, 0xB333FB84, + 0xB334FB84, 0xB335FB84, 0xB336FB84, 0xB337FB84, 0xB338FB84, 0xB339FB84, 0xB33AFB84, 0xB33BFB84, 0xB33CFB84, 0xB33DFB84, 0xB33EFB84, 0xB33FFB84, 0xB340FB84, 0xB341FB84, 0xB342FB84, + 0xB343FB84, 0xB344FB84, 0xB345FB84, 0xB346FB84, 0xB347FB84, 0xB348FB84, 0xB349FB84, 0xB34AFB84, 0xB34BFB84, 0xB34CFB84, 0xB34DFB84, 0xB34EFB84, 0xB34FFB84, 0xB350FB84, 0xB351FB84, + 0xB352FB84, 0xB353FB84, 0xB354FB84, 0xB355FB84, 0xB356FB84, 0xB357FB84, 0xB358FB84, 0xB359FB84, 0xB35AFB84, 0xB35BFB84, 0xB35CFB84, 0xB35DFB84, 0xB35EFB84, 0xB35FFB84, 0xB360FB84, + 0xB361FB84, 0xB362FB84, 0xB363FB84, 0xB364FB84, 0xB365FB84, 0xB366FB84, 0xB367FB84, 0xB368FB84, 0xB369FB84, 0xB36AFB84, 0xB36BFB84, 0xB36CFB84, 0xB36DFB84, 0xB36EFB84, 0xB36FFB84, + 0xB370FB84, 0xB371FB84, 0xB372FB84, 0xB373FB84, 0xB374FB84, 0xB375FB84, 0xB376FB84, 0xB377FB84, 0xB378FB84, 0xB379FB84, 0xB37AFB84, 0xB37BFB84, 0xB37CFB84, 0xB37DFB84, 0xB37EFB84, + 0xB37FFB84, 0xB380FB84, 0xB381FB84, 0xB382FB84, 0xB383FB84, 0xB384FB84, 0xB385FB84, 0xB386FB84, 0xB387FB84, 0xB388FB84, 0xB389FB84, 0xB38AFB84, 0xB38BFB84, 0xB38CFB84, 0xB38DFB84, + 0xB38EFB84, 0xB38FFB84, 0xB390FB84, 0xB391FB84, 0xB392FB84, 0xB393FB84, 0xB394FB84, 0xB395FB84, 0xB396FB84, 0xB397FB84, 0xB398FB84, 0xB399FB84, 0xB39AFB84, 0xB39BFB84, 0xB39CFB84, + 0xB39DFB84, 0xB39EFB84, 0xB39FFB84, 0xB3A0FB84, 0xB3A1FB84, 0xB3A2FB84, 0xB3A3FB84, 0xB3A4FB84, 0xB3A5FB84, 0xB3A6FB84, 0xB3A7FB84, 0xB3A8FB84, 0xB3A9FB84, 0xB3AAFB84, 0xB3ABFB84, + 0xB3ACFB84, 0xB3ADFB84, 0xB3AEFB84, 0xB3AFFB84, 0xB3B0FB84, 0xB3B1FB84, 0xB3B2FB84, 0xB3B3FB84, 0xB3B4FB84, 0xB3B5FB84, 0xB3B6FB84, 0xB3B7FB84, 0xB3B8FB84, 0xB3B9FB84, 0xB3BAFB84, + 0xB3BBFB84, 0xB3BCFB84, 0xB3BDFB84, 0xB3BEFB84, 0xB3BFFB84, 0xB3C0FB84, 0xB3C1FB84, 0xB3C2FB84, 0xB3C3FB84, 0xB3C4FB84, 0xB3C5FB84, 0xB3C6FB84, 0xB3C7FB84, 0xB3C8FB84, 0xB3C9FB84, + 0xB3CAFB84, 0xB3CBFB84, 0xB3CCFB84, 0xB3CDFB84, 0xB3CEFB84, 0xB3CFFB84, 0xB3D0FB84, 0xB3D1FB84, 0xB3D2FB84, 0xB3D3FB84, 0xB3D4FB84, 0xB3D5FB84, 0xB3D6FB84, 0xB3D7FB84, 0xB3D8FB84, + 0xB3D9FB84, 0xB3DAFB84, 0xB3DBFB84, 0xB3DCFB84, 0xB3DDFB84, 0xB3DEFB84, 0xB3DFFB84, 0xB3E0FB84, 0xB3E1FB84, 0xB3E2FB84, 0xB3E3FB84, 0xB3E4FB84, 0xB3E5FB84, 0xB3E6FB84, 0xB3E7FB84, + 0xB3E8FB84, 0xB3E9FB84, 0xB3EAFB84, 0xB3EBFB84, 0xB3ECFB84, 0xB3EDFB84, 0xB3EEFB84, 0xB3EFFB84, 0xB3F0FB84, 0xB3F1FB84, 0xB3F2FB84, 0xB3F3FB84, 0xB3F4FB84, 0xB3F5FB84, 0xB3F6FB84, + 0xB3F7FB84, 0xB3F8FB84, 0xB3F9FB84, 0xB3FAFB84, 0xB3FBFB84, 0xB3FCFB84, 0xB3FDFB84, 0xB3FEFB84, 0xB3FFFB84, 0xB400FB84, 0xB401FB84, 0xB402FB84, 0xB403FB84, 0xB404FB84, 0xB405FB84, + 0xB406FB84, 0xB407FB84, 0xB408FB84, 0xB409FB84, 0xB40AFB84, 0xB40BFB84, 0xB40CFB84, 0xB40DFB84, 0xB40EFB84, 0xB40FFB84, 0xB410FB84, 0xB411FB84, 0xB412FB84, 0xB413FB84, 0xB414FB84, + 0xB415FB84, 0xB416FB84, 0xB417FB84, 0xB418FB84, 0xB419FB84, 0xB41AFB84, 0xB41BFB84, 0xB41CFB84, 0xB41DFB84, 0xB41EFB84, 0xB41FFB84, 0xB420FB84, 0xB421FB84, 0xB422FB84, 0xB423FB84, + 0xB424FB84, 0xB425FB84, 0xB426FB84, 0xB427FB84, 0xB428FB84, 0xB429FB84, 0xB42AFB84, 0xB42BFB84, 0xB42CFB84, 0xB42DFB84, 0xB42EFB84, 0xB42FFB84, 0xB430FB84, 0xB431FB84, 0xB432FB84, + 0xB433FB84, 0xB434FB84, 0xB435FB84, 0xB436FB84, 0xB437FB84, 0xB438FB84, 0xB439FB84, 0xB43AFB84, 0xB43BFB84, 0xB43CFB84, 0xB43DFB84, 0xB43EFB84, 0xB43FFB84, 0xB440FB84, 0xB441FB84, + 0xB442FB84, 0xB443FB84, 0xB444FB84, 0xB445FB84, 0xB446FB84, 0xB447FB84, 0xB448FB84, 0xB449FB84, 0xB44AFB84, 0xB44BFB84, 0xB44CFB84, 0xB44DFB84, 0xB44EFB84, 0xB44FFB84, 0xB450FB84, + 0xB451FB84, 0xB452FB84, 0xB453FB84, 0xB454FB84, 0xB455FB84, 0xB456FB84, 0xB457FB84, 0xB458FB84, 0xB459FB84, 0xB45AFB84, 0xB45BFB84, 0xB45CFB84, 0xB45DFB84, 0xB45EFB84, 0xB45FFB84, + 0xB460FB84, 0xB461FB84, 0xB462FB84, 0xB463FB84, 0xB464FB84, 0xB465FB84, 0xB466FB84, 0xB467FB84, 0xB468FB84, 0xB469FB84, 0xB46AFB84, 0xB46BFB84, 0xB46CFB84, 0xB46DFB84, 0xB46EFB84, + 0xB46FFB84, 0xB470FB84, 0xB471FB84, 0xB472FB84, 0xB473FB84, 0xB474FB84, 0xB475FB84, 0xB476FB84, 0xB477FB84, 0xB478FB84, 0xB479FB84, 0xB47AFB84, 0xB47BFB84, 0xB47CFB84, 0xB47DFB84, + 0xB47EFB84, 0xB47FFB84, 0xB480FB84, 0xB481FB84, 0xB482FB84, 0xB483FB84, 0xB484FB84, 0xB485FB84, 0xB486FB84, 0xB487FB84, 0xB488FB84, 0xB489FB84, 0xB48AFB84, 0xB48BFB84, 0xB48CFB84, + 0xB48DFB84, 0xB48EFB84, 0xB48FFB84, 0xB490FB84, 0xB491FB84, 0xB492FB84, 0xB493FB84, 0xB494FB84, 0xB495FB84, 0xB496FB84, 0xB497FB84, 0xB498FB84, 0xB499FB84, 0xB49AFB84, 0xB49BFB84, + 0xB49CFB84, 0xB49DFB84, 0xB49EFB84, 0xB49FFB84, 0xB4A0FB84, 0xB4A1FB84, 0xB4A2FB84, 0xB4A3FB84, 0xB4A4FB84, 0xB4A5FB84, 0xB4A6FB84, 0xB4A7FB84, 0xB4A8FB84, 0xB4A9FB84, 0xB4AAFB84, + 0xB4ABFB84, 0xB4ACFB84, 0xB4ADFB84, 0xB4AEFB84, 0xB4AFFB84, 0xB4B0FB84, 0xB4B1FB84, 0xB4B2FB84, 0xB4B3FB84, 0xB4B4FB84, 0xB4B5FB84, 0xB4B6FB84, 0xB4B7FB84, 0xB4B8FB84, 0xB4B9FB84, + 0xB4BAFB84, 0xB4BBFB84, 0xB4BCFB84, 0xB4BDFB84, 0xB4BEFB84, 0xB4BFFB84, 0xB4C0FB84, 0xB4C1FB84, 0xB4C2FB84, 0xB4C3FB84, 0xB4C4FB84, 0xB4C5FB84, 0xB4C6FB84, 0xB4C7FB84, 0xB4C8FB84, + 0xB4C9FB84, 0xB4CAFB84, 0xB4CBFB84, 0xB4CCFB84, 0xB4CDFB84, 0xB4CEFB84, 0xB4CFFB84, 0xB4D0FB84, 0xB4D1FB84, 0xB4D2FB84, 0xB4D3FB84, 0xB4D4FB84, 0xB4D5FB84, 0xB4D6FB84, 0xB4D7FB84, + 0xB4D8FB84, 0xB4D9FB84, 0xB4DAFB84, 0xB4DBFB84, 0xB4DCFB84, 0xB4DDFB84, 0xB4DEFB84, 0xB4DFFB84, 0xB4E0FB84, 0xB4E1FB84, 0xB4E2FB84, 0xB4E3FB84, 0xB4E4FB84, 0xB4E5FB84, 0xB4E6FB84, + 0xB4E7FB84, 0xB4E8FB84, 0xB4E9FB84, 0xB4EAFB84, 0xB4EBFB84, 0xB4ECFB84, 0xB4EDFB84, 0xB4EEFB84, 0xB4EFFB84, 0xB4F0FB84, 0xB4F1FB84, 0xB4F2FB84, 0xB4F3FB84, 0xB4F4FB84, 0xB4F5FB84, + 0xB4F6FB84, 0xB4F7FB84, 0xB4F8FB84, 0xB4F9FB84, 0xB4FAFB84, 0xB4FBFB84, 0xB4FCFB84, 0xB4FDFB84, 0xB4FEFB84, 0xB4FFFB84, 0xB500FB84, 0xB501FB84, 0xB502FB84, 0xB503FB84, 0xB504FB84, + 0xB505FB84, 0xB506FB84, 0xB507FB84, 0xB508FB84, 0xB509FB84, 0xB50AFB84, 0xB50BFB84, 0xB50CFB84, 0xB50DFB84, 0xB50EFB84, 0xB50FFB84, 0xB510FB84, 0xB511FB84, 0xB512FB84, 0xB513FB84, + 0xB514FB84, 0xB515FB84, 0xB516FB84, 0xB517FB84, 0xB518FB84, 0xB519FB84, 0xB51AFB84, 0xB51BFB84, 0xB51CFB84, 0xB51DFB84, 0xB51EFB84, 0xB51FFB84, 0xB520FB84, 0xB521FB84, 0xB522FB84, + 0xB523FB84, 0xB524FB84, 0xB525FB84, 0xB526FB84, 0xB527FB84, 0xB528FB84, 0xB529FB84, 0xB52AFB84, 0xB52BFB84, 0xB52CFB84, 0xB52DFB84, 0xB52EFB84, 0xB52FFB84, 0xB530FB84, 0xB531FB84, + 0xB532FB84, 0xB533FB84, 0xB534FB84, 0xB535FB84, 0xB536FB84, 0xB537FB84, 0xB538FB84, 0xB539FB84, 0xB53AFB84, 0xB53BFB84, 0xB53CFB84, 0xB53DFB84, 0xB53EFB84, 0xB53FFB84, 0xB540FB84, + 0xB541FB84, 0xB542FB84, 0xB543FB84, 0xB544FB84, 0xB545FB84, 0xB546FB84, 0xB547FB84, 0xB548FB84, 0xB549FB84, 0xB54AFB84, 0xB54BFB84, 0xB54CFB84, 0xB54DFB84, 0xB54EFB84, 0xB54FFB84, + 0xB550FB84, 0xB551FB84, 0xB552FB84, 0xB553FB84, 0xB554FB84, 0xB555FB84, 0xB556FB84, 0xB557FB84, 0xB558FB84, 0xB559FB84, 0xB55AFB84, 0xB55BFB84, 0xB55CFB84, 0xB55DFB84, 0xB55EFB84, + 0xB55FFB84, 0xB560FB84, 0xB561FB84, 0xB562FB84, 0xB563FB84, 0xB564FB84, 0xB565FB84, 0xB566FB84, 0xB567FB84, 0xB568FB84, 0xB569FB84, 0xB56AFB84, 0xB56BFB84, 0xB56CFB84, 0xB56DFB84, + 0xB56EFB84, 0xB56FFB84, 0xB570FB84, 0xB571FB84, 0xB572FB84, 0xB573FB84, 0xB574FB84, 0xB575FB84, 0xB576FB84, 0xB577FB84, 0xB578FB84, 0xB579FB84, 0xB57AFB84, 0xB57BFB84, 0xB57CFB84, + 0xB57DFB84, 0xB57EFB84, 0xB57FFB84, 0xB580FB84, 0xB581FB84, 0xB582FB84, 0xB583FB84, 0xB584FB84, 0xB585FB84, 0xB586FB84, 0xB587FB84, 0xB588FB84, 0xB589FB84, 0xB58AFB84, 0xB58BFB84, + 0xB58CFB84, 0xB58DFB84, 0xB58EFB84, 0xB58FFB84, 0xB590FB84, 0xB591FB84, 0xB592FB84, 0xB593FB84, 0xB594FB84, 0xB595FB84, 0xB596FB84, 0xB597FB84, 0xB598FB84, 0xB599FB84, 0xB59AFB84, + 0xB59BFB84, 0xB59CFB84, 0xB59DFB84, 0xB59EFB84, 0xB59FFB84, 0xB5A0FB84, 0xB5A1FB84, 0xB5A2FB84, 0xB5A3FB84, 0xB5A4FB84, 0xB5A5FB84, 0xB5A6FB84, 0xB5A7FB84, 0xB5A8FB84, 0xB5A9FB84, + 0xB5AAFB84, 0xB5ABFB84, 0xB5ACFB84, 0xB5ADFB84, 0xB5AEFB84, 0xB5AFFB84, 0xB5B0FB84, 0xB5B1FB84, 0xB5B2FB84, 0xB5B3FB84, 0xB5B4FB84, 0xB5B5FB84, 0xB5B6FB84, 0xB5B7FB84, 0xB5B8FB84, + 0xB5B9FB84, 0xB5BAFB84, 0xB5BBFB84, 0xB5BCFB84, 0xB5BDFB84, 0xB5BEFB84, 0xB5BFFB84, 0xB5C0FB84, 0xB5C1FB84, 0xB5C2FB84, 0xB5C3FB84, 0xB5C4FB84, 0xB5C5FB84, 0xB5C6FB84, 0xB5C7FB84, + 0xB5C8FB84, 0xB5C9FB84, 0xB5CAFB84, 0xB5CBFB84, 0xB5CCFB84, 0xB5CDFB84, 0xB5CEFB84, 0xB5CFFB84, 0xB5D0FB84, 0xB5D1FB84, 0xB5D2FB84, 0xB5D3FB84, 0xB5D4FB84, 0xB5D5FB84, 0xB5D6FB84, + 0xB5D7FB84, 0xB5D8FB84, 0xB5D9FB84, 0xB5DAFB84, 0xB5DBFB84, 0xB5DCFB84, 0xB5DDFB84, 0xB5DEFB84, 0xB5DFFB84, 0xB5E0FB84, 0xB5E1FB84, 0xB5E2FB84, 0xB5E3FB84, 0xB5E4FB84, 0xB5E5FB84, + 0xB5E6FB84, 0xB5E7FB84, 0xB5E8FB84, 0xB5E9FB84, 0xB5EAFB84, 0xB5EBFB84, 0xB5ECFB84, 0xB5EDFB84, 0xB5EEFB84, 0xB5EFFB84, 0xB5F0FB84, 0xB5F1FB84, 0xB5F2FB84, 0xB5F3FB84, 0xB5F4FB84, + 0xB5F5FB84, 0xB5F6FB84, 0xB5F7FB84, 0xB5F8FB84, 0xB5F9FB84, 0xB5FAFB84, 0xB5FBFB84, 0xB5FCFB84, 0xB5FDFB84, 0xB5FEFB84, 0xB5FFFB84, 0xB600FB84, 0xB601FB84, 0xB602FB84, 0xB603FB84, + 0xB604FB84, 0xB605FB84, 0xB606FB84, 0xB607FB84, 0xB608FB84, 0xB609FB84, 0xB60AFB84, 0xB60BFB84, 0xB60CFB84, 0xB60DFB84, 0xB60EFB84, 0xB60FFB84, 0xB610FB84, 0xB611FB84, 0xB612FB84, + 0xB613FB84, 0xB614FB84, 0xB615FB84, 0xB616FB84, 0xB617FB84, 0xB618FB84, 0xB619FB84, 0xB61AFB84, 0xB61BFB84, 0xB61CFB84, 0xB61DFB84, 0xB61EFB84, 0xB61FFB84, 0xB620FB84, 0xB621FB84, + 0xB622FB84, 0xB623FB84, 0xB624FB84, 0xB625FB84, 0xB626FB84, 0xB627FB84, 0xB628FB84, 0xB629FB84, 0xB62AFB84, 0xB62BFB84, 0xB62CFB84, 0xB62DFB84, 0xB62EFB84, 0xB62FFB84, 0xB630FB84, + 0xB631FB84, 0xB632FB84, 0xB633FB84, 0xB634FB84, 0xB635FB84, 0xB636FB84, 0xB637FB84, 0xB638FB84, 0xB639FB84, 0xB63AFB84, 0xB63BFB84, 0xB63CFB84, 0xB63DFB84, 0xB63EFB84, 0xB63FFB84, + 0xB640FB84, 0xB641FB84, 0xB642FB84, 0xB643FB84, 0xB644FB84, 0xB645FB84, 0xB646FB84, 0xB647FB84, 0xB648FB84, 0xB649FB84, 0xB64AFB84, 0xB64BFB84, 0xB64CFB84, 0xB64DFB84, 0xB64EFB84, + 0xB64FFB84, 0xB650FB84, 0xB651FB84, 0xB652FB84, 0xB653FB84, 0xB654FB84, 0xB655FB84, 0xB656FB84, 0xB657FB84, 0xB658FB84, 0xB659FB84, 0xB65AFB84, 0xB65BFB84, 0xB65CFB84, 0xB65DFB84, + 0xB65EFB84, 0xB65FFB84, 0xB660FB84, 0xB661FB84, 0xB662FB84, 0xB663FB84, 0xB664FB84, 0xB665FB84, 0xB666FB84, 0xB667FB84, 0xB668FB84, 0xB669FB84, 0xB66AFB84, 0xB66BFB84, 0xB66CFB84, + 0xB66DFB84, 0xB66EFB84, 0xB66FFB84, 0xB670FB84, 0xB671FB84, 0xB672FB84, 0xB673FB84, 0xB674FB84, 0xB675FB84, 0xB676FB84, 0xB677FB84, 0xB678FB84, 0xB679FB84, 0xB67AFB84, 0xB67BFB84, + 0xB67CFB84, 0xB67DFB84, 0xB67EFB84, 0xB67FFB84, 0xB680FB84, 0xB681FB84, 0xB682FB84, 0xB683FB84, 0xB684FB84, 0xB685FB84, 0xB686FB84, 0xB687FB84, 0xB688FB84, 0xB689FB84, 0xB68AFB84, + 0xB68BFB84, 0xB68CFB84, 0xB68DFB84, 0xB68EFB84, 0xB68FFB84, 0xB690FB84, 0xB691FB84, 0xB692FB84, 0xB693FB84, 0xB694FB84, 0xB695FB84, 0xB696FB84, 0xB697FB84, 0xB698FB84, 0xB699FB84, + 0xB69AFB84, 0xB69BFB84, 0xB69CFB84, 0xB69DFB84, 0xB69EFB84, 0xB69FFB84, 0xB6A0FB84, 0xB6A1FB84, 0xB6A2FB84, 0xB6A3FB84, 0xB6A4FB84, 0xB6A5FB84, 0xB6A6FB84, 0xB6A7FB84, 0xB6A8FB84, + 0xB6A9FB84, 0xB6AAFB84, 0xB6ABFB84, 0xB6ACFB84, 0xB6ADFB84, 0xB6AEFB84, 0xB6AFFB84, 0xB6B0FB84, 0xB6B1FB84, 0xB6B2FB84, 0xB6B3FB84, 0xB6B4FB84, 0xB6B5FB84, 0xB6B6FB84, 0xB6B7FB84, + 0xB6B8FB84, 0xB6B9FB84, 0xB6BAFB84, 0xB6BBFB84, 0xB6BCFB84, 0xB6BDFB84, 0xB6BEFB84, 0xB6BFFB84, 0xB6C0FB84, 0xB6C1FB84, 0xB6C2FB84, 0xB6C3FB84, 0xB6C4FB84, 0xB6C5FB84, 0xB6C6FB84, + 0xB6C7FB84, 0xB6C8FB84, 0xB6C9FB84, 0xB6CAFB84, 0xB6CBFB84, 0xB6CCFB84, 0xB6CDFB84, 0xB6CEFB84, 0xB6CFFB84, 0xB6D0FB84, 0xB6D1FB84, 0xB6D2FB84, 0xB6D3FB84, 0xB6D4FB84, 0xB6D5FB84, + 0xB6D6FB84, 0xB6D7FB84, 0xB6D8FB84, 0xB6D9FB84, 0xB6DAFB84, 0xB6DBFB84, 0xB6DCFB84, 0xB6DDFB84, 0xB6DEFB84, 0xB6DFFB84, 0xB6E0FB84, 0xB6E1FB84, 0xB6E2FB84, 0xB6E3FB84, 0xB6E4FB84, + 0xB6E5FB84, 0xB6E6FB84, 0xB6E7FB84, 0xB6E8FB84, 0xB6E9FB84, 0xB6EAFB84, 0xB6EBFB84, 0xB6ECFB84, 0xB6EDFB84, 0xB6EEFB84, 0xB6EFFB84, 0xB6F0FB84, 0xB6F1FB84, 0xB6F2FB84, 0xB6F3FB84, + 0xB6F4FB84, 0xB6F5FB84, 0xB6F6FB84, 0xB6F7FB84, 0xB6F8FB84, 0xB6F9FB84, 0xB6FAFB84, 0xB6FBFB84, 0xB6FCFB84, 0xB6FDFB84, 0xB6FEFB84, 0xB6FFFB84, 0xB700FB84, 0xB701FB84, 0xB702FB84, + 0xB703FB84, 0xB704FB84, 0xB705FB84, 0xB706FB84, 0xB707FB84, 0xB708FB84, 0xB709FB84, 0xB70AFB84, 0xB70BFB84, 0xB70CFB84, 0xB70DFB84, 0xB70EFB84, 0xB70FFB84, 0xB710FB84, 0xB711FB84, + 0xB712FB84, 0xB713FB84, 0xB714FB84, 0xB715FB84, 0xB716FB84, 0xB717FB84, 0xB718FB84, 0xB719FB84, 0xB71AFB84, 0xB71BFB84, 0xB71CFB84, 0xB71DFB84, 0xB71EFB84, 0xB71FFB84, 0xB720FB84, + 0xB721FB84, 0xB722FB84, 0xB723FB84, 0xB724FB84, 0xB725FB84, 0xB726FB84, 0xB727FB84, 0xB728FB84, 0xB729FB84, 0xB72AFB84, 0xB72BFB84, 0xB72CFB84, 0xB72DFB84, 0xB72EFB84, 0xB72FFB84, + 0xB730FB84, 0xB731FB84, 0xB732FB84, 0xB733FB84, 0xB734FB84, 0xB735FB84, 0xB736FB84, 0xB737FB84, 0xB738FB84, 0xB739FB84, 0xB73AFB84, 0xB73BFB84, 0xB73CFB84, 0xB73DFB84, 0xB73EFB84, + 0xB73FFB84, 0xB740FB84, 0xB741FB84, 0xB742FB84, 0xB743FB84, 0xB744FB84, 0xB745FB84, 0xB746FB84, 0xB747FB84, 0xB748FB84, 0xB749FB84, 0xB74AFB84, 0xB74BFB84, 0xB74CFB84, 0xB74DFB84, + 0xB74EFB84, 0xB74FFB84, 0xB750FB84, 0xB751FB84, 0xB752FB84, 0xB753FB84, 0xB754FB84, 0xB755FB84, 0xB756FB84, 0xB757FB84, 0xB758FB84, 0xB759FB84, 0xB75AFB84, 0xB75BFB84, 0xB75CFB84, + 0xB75DFB84, 0xB75EFB84, 0xB75FFB84, 0xB760FB84, 0xB761FB84, 0xB762FB84, 0xB763FB84, 0xB764FB84, 0xB765FB84, 0xB766FB84, 0xB767FB84, 0xB768FB84, 0xB769FB84, 0xB76AFB84, 0xB76BFB84, + 0xB76CFB84, 0xB76DFB84, 0xB76EFB84, 0xB76FFB84, 0xB770FB84, 0xB771FB84, 0xB772FB84, 0xB773FB84, 0xB774FB84, 0xB775FB84, 0xB776FB84, 0xB777FB84, 0xB778FB84, 0xB779FB84, 0xB77AFB84, + 0xB77BFB84, 0xB77CFB84, 0xB77DFB84, 0xB77EFB84, 0xB77FFB84, 0xB780FB84, 0xB781FB84, 0xB782FB84, 0xB783FB84, 0xB784FB84, 0xB785FB84, 0xB786FB84, 0xB787FB84, 0xB788FB84, 0xB789FB84, + 0xB78AFB84, 0xB78BFB84, 0xB78CFB84, 0xB78DFB84, 0xB78EFB84, 0xB78FFB84, 0xB790FB84, 0xB791FB84, 0xB792FB84, 0xB793FB84, 0xB794FB84, 0xB795FB84, 0xB796FB84, 0xB797FB84, 0xB798FB84, + 0xB799FB84, 0xB79AFB84, 0xB79BFB84, 0xB79CFB84, 0xB79DFB84, 0xB79EFB84, 0xB79FFB84, 0xB7A0FB84, 0xB7A1FB84, 0xB7A2FB84, 0xB7A3FB84, 0xB7A4FB84, 0xB7A5FB84, 0xB7A6FB84, 0xB7A7FB84, + 0xB7A8FB84, 0xB7A9FB84, 0xB7AAFB84, 0xB7ABFB84, 0xB7ACFB84, 0xB7ADFB84, 0xB7AEFB84, 0xB7AFFB84, 0xB7B0FB84, 0xB7B1FB84, 0xB7B2FB84, 0xB7B3FB84, 0xB7B4FB84, 0xB7B5FB84, 0xB7B6FB84, + 0xB7B7FB84, 0xB7B8FB84, 0xB7B9FB84, 0xB7BAFB84, 0xB7BBFB84, 0xB7BCFB84, 0xB7BDFB84, 0xB7BEFB84, 0xB7BFFB84, 0xB7C0FB84, 0xB7C1FB84, 0xB7C2FB84, 0xB7C3FB84, 0xB7C4FB84, 0xB7C5FB84, + 0xB7C6FB84, 0xB7C7FB84, 0xB7C8FB84, 0xB7C9FB84, 0xB7CAFB84, 0xB7CBFB84, 0xB7CCFB84, 0xB7CDFB84, 0xB7CEFB84, 0xB7CFFB84, 0xB7D0FB84, 0xB7D1FB84, 0xB7D2FB84, 0xB7D3FB84, 0xB7D4FB84, + 0xB7D5FB84, 0xB7D6FB84, 0xB7D7FB84, 0xB7D8FB84, 0xB7D9FB84, 0xB7DAFB84, 0xB7DBFB84, 0xB7DCFB84, 0xB7DDFB84, 0xB7DEFB84, 0xB7DFFB84, 0xB7E0FB84, 0xB7E1FB84, 0xB7E2FB84, 0xB7E3FB84, + 0xB7E4FB84, 0xB7E5FB84, 0xB7E6FB84, 0xB7E7FB84, 0xB7E8FB84, 0xB7E9FB84, 0xB7EAFB84, 0xB7EBFB84, 0xB7ECFB84, 0xB7EDFB84, 0xB7EEFB84, 0xB7EFFB84, 0xB7F0FB84, 0xB7F1FB84, 0xB7F2FB84, + 0xB7F3FB84, 0xB7F4FB84, 0xB7F5FB84, 0xB7F6FB84, 0xB7F7FB84, 0xB7F8FB84, 0xB7F9FB84, 0xB7FAFB84, 0xB7FBFB84, 0xB7FCFB84, 0xB7FDFB84, 0xB7FEFB84, 0xB7FFFB84, 0xB800FB84, 0xB801FB84, + 0xB802FB84, 0xB803FB84, 0xB804FB84, 0xB805FB84, 0xB806FB84, 0xB807FB84, 0xB808FB84, 0xB809FB84, 0xB80AFB84, 0xB80BFB84, 0xB80CFB84, 0xB80DFB84, 0xB80EFB84, 0xB80FFB84, 0xB810FB84, + 0xB811FB84, 0xB812FB84, 0xB813FB84, 0xB814FB84, 0xB815FB84, 0xB816FB84, 0xB817FB84, 0xB818FB84, 0xB819FB84, 0xB81AFB84, 0xB81BFB84, 0xB81CFB84, 0xB81DFB84, 0xB81EFB84, 0xB81FFB84, + 0xB820FB84, 0xB821FB84, 0xB822FB84, 0xB823FB84, 0xB824FB84, 0xB825FB84, 0xB826FB84, 0xB827FB84, 0xB828FB84, 0xB829FB84, 0xB82AFB84, 0xB82BFB84, 0xB82CFB84, 0xB82DFB84, 0xB82EFB84, + 0xB82FFB84, 0xB830FB84, 0xB831FB84, 0xB832FB84, 0xB833FB84, 0xB834FB84, 0xB835FB84, 0xB836FB84, 0xB837FB84, 0xB838FB84, 0xB839FB84, 0xB83AFB84, 0xB83BFB84, 0xB83CFB84, 0xB83DFB84, + 0xB83EFB84, 0xB83FFB84, 0xB840FB84, 0xB841FB84, 0xB842FB84, 0xB843FB84, 0xB844FB84, 0xB845FB84, 0xB846FB84, 0xB847FB84, 0xB848FB84, 0xB849FB84, 0xB84AFB84, 0xB84BFB84, 0xB84CFB84, + 0xB84DFB84, 0xB84EFB84, 0xB84FFB84, 0xB850FB84, 0xB851FB84, 0xB852FB84, 0xB853FB84, 0xB854FB84, 0xB855FB84, 0xB856FB84, 0xB857FB84, 0xB858FB84, 0xB859FB84, 0xB85AFB84, 0xB85BFB84, + 0xB85CFB84, 0xB85DFB84, 0xB85EFB84, 0xB85FFB84, 0xB860FB84, 0xB861FB84, 0xB862FB84, 0xB863FB84, 0xB864FB84, 0xB865FB84, 0xB866FB84, 0xB867FB84, 0xB868FB84, 0xB869FB84, 0xB86AFB84, + 0xB86BFB84, 0xB86CFB84, 0xB86DFB84, 0xB86EFB84, 0xB86FFB84, 0xB870FB84, 0xB871FB84, 0xB872FB84, 0xB873FB84, 0xB874FB84, 0xB875FB84, 0xB876FB84, 0xB877FB84, 0xB878FB84, 0xB879FB84, + 0xB87AFB84, 0xB87BFB84, 0xB87CFB84, 0xB87DFB84, 0xB87EFB84, 0xB87FFB84, 0xB880FB84, 0xB881FB84, 0xB882FB84, 0xB883FB84, 0xB884FB84, 0xB885FB84, 0xB886FB84, 0xB887FB84, 0xB888FB84, + 0xB889FB84, 0xB88AFB84, 0xB88BFB84, 0xB88CFB84, 0xB88DFB84, 0xB88EFB84, 0xB88FFB84, 0xB890FB84, 0xB891FB84, 0xB892FB84, 0xB893FB84, 0xB894FB84, 0xB895FB84, 0xB896FB84, 0xB897FB84, + 0xB898FB84, 0xB899FB84, 0xB89AFB84, 0xB89BFB84, 0xB89CFB84, 0xB89DFB84, 0xB89EFB84, 0xB89FFB84, 0xB8A0FB84, 0xB8A1FB84, 0xB8A2FB84, 0xB8A3FB84, 0xB8A4FB84, 0xB8A5FB84, 0xB8A6FB84, + 0xB8A7FB84, 0xB8A8FB84, 0xB8A9FB84, 0xB8AAFB84, 0xB8ABFB84, 0xB8ACFB84, 0xB8ADFB84, 0xB8AEFB84, 0xB8AFFB84, 0xB8B0FB84, 0xB8B1FB84, 0xB8B2FB84, 0xB8B3FB84, 0xB8B4FB84, 0xB8B5FB84, + 0xB8B6FB84, 0xB8B7FB84, 0xB8B8FB84, 0xB8B9FB84, 0xB8BAFB84, 0xB8BBFB84, 0xB8BCFB84, 0xB8BDFB84, 0xB8BEFB84, 0xB8BFFB84, 0xB8C0FB84, 0xB8C1FB84, 0xB8C2FB84, 0xB8C3FB84, 0xB8C4FB84, + 0xB8C5FB84, 0xB8C6FB84, 0xB8C7FB84, 0xB8C8FB84, 0xB8C9FB84, 0xB8CAFB84, 0xB8CBFB84, 0xB8CCFB84, 0xB8CDFB84, 0xB8CEFB84, 0xB8CFFB84, 0xB8D0FB84, 0xB8D1FB84, 0xB8D2FB84, 0xB8D3FB84, + 0xB8D4FB84, 0xB8D5FB84, 0xB8D6FB84, 0xB8D7FB84, 0xB8D8FB84, 0xB8D9FB84, 0xB8DAFB84, 0xB8DBFB84, 0xB8DCFB84, 0xB8DDFB84, 0xB8DEFB84, 0xB8DFFB84, 0xB8E0FB84, 0xB8E1FB84, 0xB8E2FB84, + 0xB8E3FB84, 0xB8E4FB84, 0xB8E5FB84, 0xB8E6FB84, 0xB8E7FB84, 0xB8E8FB84, 0xB8E9FB84, 0xB8EAFB84, 0xB8EBFB84, 0xB8ECFB84, 0xB8EDFB84, 0xB8EEFB84, 0xB8EFFB84, 0xB8F0FB84, 0xB8F1FB84, + 0xB8F2FB84, 0xB8F3FB84, 0xB8F4FB84, 0xB8F5FB84, 0xB8F6FB84, 0xB8F7FB84, 0xB8F8FB84, 0xB8F9FB84, 0xB8FAFB84, 0xB8FBFB84, 0xB8FCFB84, 0xB8FDFB84, 0xB8FEFB84, 0xB8FFFB84, 0xB900FB84, + 0xB901FB84, 0xB902FB84, 0xB903FB84, 0xB904FB84, 0xB905FB84, 0xB906FB84, 0xB907FB84, 0xB908FB84, 0xB909FB84, 0xB90AFB84, 0xB90BFB84, 0xB90CFB84, 0xB90DFB84, 0xB90EFB84, 0xB90FFB84, + 0xB910FB84, 0xB911FB84, 0xB912FB84, 0xB913FB84, 0xB914FB84, 0xB915FB84, 0xB916FB84, 0xB917FB84, 0xB918FB84, 0xB919FB84, 0xB91AFB84, 0xB91BFB84, 0xB91CFB84, 0xB91DFB84, 0xB91EFB84, + 0xB91FFB84, 0xB920FB84, 0xB921FB84, 0xB922FB84, 0xB923FB84, 0xB924FB84, 0xB925FB84, 0xB926FB84, 0xB927FB84, 0xB928FB84, 0xB929FB84, 0xB92AFB84, 0xB92BFB84, 0xB92CFB84, 0xB92DFB84, + 0xB92EFB84, 0xB92FFB84, 0xB930FB84, 0xB931FB84, 0xB932FB84, 0xB933FB84, 0xB934FB84, 0xB935FB84, 0xB936FB84, 0xB937FB84, 0xB938FB84, 0xB939FB84, 0xB93AFB84, 0xB93BFB84, 0xB93CFB84, + 0xB93DFB84, 0xB93EFB84, 0xB93FFB84, 0xB940FB84, 0xB941FB84, 0xB942FB84, 0xB943FB84, 0xB944FB84, 0xB945FB84, 0xB946FB84, 0xB947FB84, 0xB948FB84, 0xB949FB84, 0xB94AFB84, 0xB94BFB84, + 0xB94CFB84, 0xB94DFB84, 0xB94EFB84, 0xB94FFB84, 0xB950FB84, 0xB951FB84, 0xB952FB84, 0xB953FB84, 0xB954FB84, 0xB955FB84, 0xB956FB84, 0xB957FB84, 0xB958FB84, 0xB959FB84, 0xB95AFB84, + 0xB95BFB84, 0xB95CFB84, 0xB95DFB84, 0xB95EFB84, 0xB95FFB84, 0xB960FB84, 0xB961FB84, 0xB962FB84, 0xB963FB84, 0xB964FB84, 0xB965FB84, 0xB966FB84, 0xB967FB84, 0xB968FB84, 0xB969FB84, + 0xB96AFB84, 0xB96BFB84, 0xB96CFB84, 0xB96DFB84, 0xB96EFB84, 0xB96FFB84, 0xB970FB84, 0xB971FB84, 0xB972FB84, 0xB973FB84, 0xB974FB84, 0xB975FB84, 0xB976FB84, 0xB977FB84, 0xB978FB84, + 0xB979FB84, 0xB97AFB84, 0xB97BFB84, 0xB97CFB84, 0xB97DFB84, 0xB97EFB84, 0xB97FFB84, 0xB980FB84, 0xB981FB84, 0xB982FB84, 0xB983FB84, 0xB984FB84, 0xB985FB84, 0xB986FB84, 0xB987FB84, + 0xB988FB84, 0xB989FB84, 0xB98AFB84, 0xB98BFB84, 0xB98CFB84, 0xB98DFB84, 0xB98EFB84, 0xB98FFB84, 0xB990FB84, 0xB991FB84, 0xB992FB84, 0xB993FB84, 0xB994FB84, 0xB995FB84, 0xB996FB84, + 0xB997FB84, 0xB998FB84, 0xB999FB84, 0xB99AFB84, 0xB99BFB84, 0xB99CFB84, 0xB99DFB84, 0xB99EFB84, 0xB99FFB84, 0xB9A0FB84, 0xB9A1FB84, 0xB9A2FB84, 0xB9A3FB84, 0xB9A4FB84, 0xB9A5FB84, + 0xB9A6FB84, 0xB9A7FB84, 0xB9A8FB84, 0xB9A9FB84, 0xB9AAFB84, 0xB9ABFB84, 0xB9ACFB84, 0xB9ADFB84, 0xB9AEFB84, 0xB9AFFB84, 0xB9B0FB84, 0xB9B1FB84, 0xB9B2FB84, 0xB9B3FB84, 0xB9B4FB84, + 0xB9B5FB84, 0xB9B6FB84, 0xB9B7FB84, 0xB9B8FB84, 0xB9B9FB84, 0xB9BAFB84, 0xB9BBFB84, 0xB9BCFB84, 0xB9BDFB84, 0xB9BEFB84, 0xB9BFFB84, 0xB9C0FB84, 0xB9C1FB84, 0xB9C2FB84, 0xB9C3FB84, + 0xB9C4FB84, 0xB9C5FB84, 0xB9C6FB84, 0xB9C7FB84, 0xB9C8FB84, 0xB9C9FB84, 0xB9CAFB84, 0xB9CBFB84, 0xB9CCFB84, 0xB9CDFB84, 0xB9CEFB84, 0xB9CFFB84, 0xB9D0FB84, 0xB9D1FB84, 0xB9D2FB84, + 0xB9D3FB84, 0xB9D4FB84, 0xB9D5FB84, 0xB9D6FB84, 0xB9D7FB84, 0xB9D8FB84, 0xB9D9FB84, 0xB9DAFB84, 0xB9DBFB84, 0xB9DCFB84, 0xB9DDFB84, 0xB9DEFB84, 0xB9DFFB84, 0xB9E0FB84, 0xB9E1FB84, + 0xB9E2FB84, 0xB9E3FB84, 0xB9E4FB84, 0xB9E5FB84, 0xB9E6FB84, 0xB9E7FB84, 0xB9E8FB84, 0xB9E9FB84, 0xB9EAFB84, 0xB9EBFB84, 0xB9ECFB84, 0xB9EDFB84, 0xB9EEFB84, 0xB9EFFB84, 0xB9F0FB84, + 0xB9F1FB84, 0xB9F2FB84, 0xB9F3FB84, 0xB9F4FB84, 0xB9F5FB84, 0xB9F6FB84, 0xB9F7FB84, 0xB9F8FB84, 0xB9F9FB84, 0xB9FAFB84, 0xB9FBFB84, 0xB9FCFB84, 0xB9FDFB84, 0xB9FEFB84, 0xB9FFFB84, + 0xBA00FB84, 0xBA01FB84, 0xBA02FB84, 0xBA03FB84, 0xBA04FB84, 0xBA05FB84, 0xBA06FB84, 0xBA07FB84, 0xBA08FB84, 0xBA09FB84, 0xBA0AFB84, 0xBA0BFB84, 0xBA0CFB84, 0xBA0DFB84, 0xBA0EFB84, + 0xBA0FFB84, 0xBA10FB84, 0xBA11FB84, 0xBA12FB84, 0xBA13FB84, 0xBA14FB84, 0xBA15FB84, 0xBA16FB84, 0xBA17FB84, 0xBA18FB84, 0xBA19FB84, 0xBA1AFB84, 0xBA1BFB84, 0xBA1CFB84, 0xBA1DFB84, + 0xBA1EFB84, 0xBA1FFB84, 0xBA20FB84, 0xBA21FB84, 0xBA22FB84, 0xBA23FB84, 0xBA24FB84, 0xBA25FB84, 0xBA26FB84, 0xBA27FB84, 0xBA28FB84, 0xBA29FB84, 0xBA2AFB84, 0xBA2BFB84, 0xBA2CFB84, + 0xBA2DFB84, 0xBA2EFB84, 0xBA2FFB84, 0xBA30FB84, 0xBA31FB84, 0xBA32FB84, 0xBA33FB84, 0xBA34FB84, 0xBA35FB84, 0xBA36FB84, 0xBA37FB84, 0xBA38FB84, 0xBA39FB84, 0xBA3AFB84, 0xBA3BFB84, + 0xBA3CFB84, 0xBA3DFB84, 0xBA3EFB84, 0xBA3FFB84, 0xBA40FB84, 0xBA41FB84, 0xBA42FB84, 0xBA43FB84, 0xBA44FB84, 0xBA45FB84, 0xBA46FB84, 0xBA47FB84, 0xBA48FB84, 0xBA49FB84, 0xBA4AFB84, + 0xBA4BFB84, 0xBA4CFB84, 0xBA4DFB84, 0xBA4EFB84, 0xBA4FFB84, 0xBA50FB84, 0xBA51FB84, 0xBA52FB84, 0xBA53FB84, 0xBA54FB84, 0xBA55FB84, 0xBA56FB84, 0xBA57FB84, 0xBA58FB84, 0xBA59FB84, + 0xBA5AFB84, 0xBA5BFB84, 0xBA5CFB84, 0xBA5DFB84, 0xBA5EFB84, 0xBA5FFB84, 0xBA60FB84, 0xBA61FB84, 0xBA62FB84, 0xBA63FB84, 0xBA64FB84, 0xBA65FB84, 0xBA66FB84, 0xBA67FB84, 0xBA68FB84, + 0xBA69FB84, 0xBA6AFB84, 0xBA6BFB84, 0xBA6CFB84, 0xBA6DFB84, 0xBA6EFB84, 0xBA6FFB84, 0xBA70FB84, 0xBA71FB84, 0xBA72FB84, 0xBA73FB84, 0xBA74FB84, 0xBA75FB84, 0xBA76FB84, 0xBA77FB84, + 0xBA78FB84, 0xBA79FB84, 0xBA7AFB84, 0xBA7BFB84, 0xBA7CFB84, 0xBA7DFB84, 0xBA7EFB84, 0xBA7FFB84, 0xBA80FB84, 0xBA81FB84, 0xBA82FB84, 0xBA83FB84, 0xBA84FB84, 0xBA85FB84, 0xBA86FB84, + 0xBA87FB84, 0xBA88FB84, 0xBA89FB84, 0xBA8AFB84, 0xBA8BFB84, 0xBA8CFB84, 0xBA8DFB84, 0xBA8EFB84, 0xBA8FFB84, 0xBA90FB84, 0xBA91FB84, 0xBA92FB84, 0xBA93FB84, 0xBA94FB84, 0xBA95FB84, + 0xBA96FB84, 0xBA97FB84, 0xBA98FB84, 0xBA99FB84, 0xBA9AFB84, 0xBA9BFB84, 0xBA9CFB84, 0xBA9DFB84, 0xBA9EFB84, 0xBA9FFB84, 0xBAA0FB84, 0xBAA1FB84, 0xBAA2FB84, 0xBAA3FB84, 0xBAA4FB84, + 0xBAA5FB84, 0xBAA6FB84, 0xBAA7FB84, 0xBAA8FB84, 0xBAA9FB84, 0xBAAAFB84, 0xBAABFB84, 0xBAACFB84, 0xBAADFB84, 0xBAAEFB84, 0xBAAFFB84, 0xBAB0FB84, 0xBAB1FB84, 0xBAB2FB84, 0xBAB3FB84, + 0xBAB4FB84, 0xBAB5FB84, 0xBAB6FB84, 0xBAB7FB84, 0xBAB8FB84, 0xBAB9FB84, 0xBABAFB84, 0xBABBFB84, 0xBABCFB84, 0xBABDFB84, 0xBABEFB84, 0xBABFFB84, 0xBAC0FB84, 0xBAC1FB84, 0xBAC2FB84, + 0xBAC3FB84, 0xBAC4FB84, 0xBAC5FB84, 0xBAC6FB84, 0xBAC7FB84, 0xBAC8FB84, 0xBAC9FB84, 0xBACAFB84, 0xBACBFB84, 0xBACCFB84, 0xBACDFB84, 0xBACEFB84, 0xBACFFB84, 0xBAD0FB84, 0xBAD1FB84, + 0xBAD2FB84, 0xBAD3FB84, 0xBAD4FB84, 0xBAD5FB84, 0xBAD6FB84, 0xBAD7FB84, 0xBAD8FB84, 0xBAD9FB84, 0xBADAFB84, 0xBADBFB84, 0xBADCFB84, 0xBADDFB84, 0xBADEFB84, 0xBADFFB84, 0xBAE0FB84, + 0xBAE1FB84, 0xBAE2FB84, 0xBAE3FB84, 0xBAE4FB84, 0xBAE5FB84, 0xBAE6FB84, 0xBAE7FB84, 0xBAE8FB84, 0xBAE9FB84, 0xBAEAFB84, 0xBAEBFB84, 0xBAECFB84, 0xBAEDFB84, 0xBAEEFB84, 0xBAEFFB84, + 0xBAF0FB84, 0xBAF1FB84, 0xBAF2FB84, 0xBAF3FB84, 0xBAF4FB84, 0xBAF5FB84, 0xBAF6FB84, 0xBAF7FB84, 0xBAF8FB84, 0xBAF9FB84, 0xBAFAFB84, 0xBAFBFB84, 0xBAFCFB84, 0xBAFDFB84, 0xBAFEFB84, + 0xBAFFFB84, 0xBB00FB84, 0xBB01FB84, 0xBB02FB84, 0xBB03FB84, 0xBB04FB84, 0xBB05FB84, 0xBB06FB84, 0xBB07FB84, 0xBB08FB84, 0xBB09FB84, 0xBB0AFB84, 0xBB0BFB84, 0xBB0CFB84, 0xBB0DFB84, + 0xBB0EFB84, 0xBB0FFB84, 0xBB10FB84, 0xBB11FB84, 0xBB12FB84, 0xBB13FB84, 0xBB14FB84, 0xBB15FB84, 0xBB16FB84, 0xBB17FB84, 0xBB18FB84, 0xBB19FB84, 0xBB1AFB84, 0xBB1BFB84, 0xBB1CFB84, + 0xBB1DFB84, 0xBB1EFB84, 0xBB1FFB84, 0xBB20FB84, 0xBB21FB84, 0xBB22FB84, 0xBB23FB84, 0xBB24FB84, 0xBB25FB84, 0xBB26FB84, 0xBB27FB84, 0xBB28FB84, 0xBB29FB84, 0xBB2AFB84, 0xBB2BFB84, + 0xBB2CFB84, 0xBB2DFB84, 0xBB2EFB84, 0xBB2FFB84, 0xBB30FB84, 0xBB31FB84, 0xBB32FB84, 0xBB33FB84, 0xBB34FB84, 0xBB35FB84, 0xBB36FB84, 0xBB37FB84, 0xBB38FB84, 0xBB39FB84, 0xBB3AFB84, + 0xBB3BFB84, 0xBB3CFB84, 0xBB3DFB84, 0xBB3EFB84, 0xBB3FFB84, 0xBB40FB84, 0xBB41FB84, 0xBB42FB84, 0xBB43FB84, 0xBB44FB84, 0xBB45FB84, 0xBB46FB84, 0xBB47FB84, 0xBB48FB84, 0xBB49FB84, + 0xBB4AFB84, 0xBB4BFB84, 0xBB4CFB84, 0xBB4DFB84, 0xBB4EFB84, 0xBB4FFB84, 0xBB50FB84, 0xBB51FB84, 0xBB52FB84, 0xBB53FB84, 0xBB54FB84, 0xBB55FB84, 0xBB56FB84, 0xBB57FB84, 0xBB58FB84, + 0xBB59FB84, 0xBB5AFB84, 0xBB5BFB84, 0xBB5CFB84, 0xBB5DFB84, 0xBB5EFB84, 0xBB5FFB84, 0xBB60FB84, 0xBB61FB84, 0xBB62FB84, 0xBB63FB84, 0xBB64FB84, 0xBB65FB84, 0xBB66FB84, 0xBB67FB84, + 0xBB68FB84, 0xBB69FB84, 0xBB6AFB84, 0xBB6BFB84, 0xBB6CFB84, 0xBB6DFB84, 0xBB6EFB84, 0xBB6FFB84, 0xBB70FB84, 0xBB71FB84, 0xBB72FB84, 0xBB73FB84, 0xBB74FB84, 0xBB75FB84, 0xBB76FB84, + 0xBB77FB84, 0xBB78FB84, 0xBB79FB84, 0xBB7AFB84, 0xBB7BFB84, 0xBB7CFB84, 0xBB7DFB84, 0xBB7EFB84, 0xBB7FFB84, 0xBB80FB84, 0xBB81FB84, 0xBB82FB84, 0xBB83FB84, 0xBB84FB84, 0xBB85FB84, + 0xBB86FB84, 0xBB87FB84, 0xBB88FB84, 0xBB89FB84, 0xBB8AFB84, 0xBB8BFB84, 0xBB8CFB84, 0xBB8DFB84, 0xBB8EFB84, 0xBB8FFB84, 0xBB90FB84, 0xBB91FB84, 0xBB92FB84, 0xBB93FB84, 0xBB94FB84, + 0xBB95FB84, 0xBB96FB84, 0xBB97FB84, 0xBB98FB84, 0xBB99FB84, 0xBB9AFB84, 0xBB9BFB84, 0xBB9CFB84, 0xBB9DFB84, 0xBB9EFB84, 0xBB9FFB84, 0xBBA0FB84, 0xBBA1FB84, 0xBBA2FB84, 0xBBA3FB84, + 0xBBA4FB84, 0xBBA5FB84, 0xBBA6FB84, 0xBBA7FB84, 0xBBA8FB84, 0xBBA9FB84, 0xBBAAFB84, 0xBBABFB84, 0xBBACFB84, 0xBBADFB84, 0xBBAEFB84, 0xBBAFFB84, 0xBBB0FB84, 0xBBB1FB84, 0xBBB2FB84, + 0xBBB3FB84, 0xBBB4FB84, 0xBBB5FB84, 0xBBB6FB84, 0xBBB7FB84, 0xBBB8FB84, 0xBBB9FB84, 0xBBBAFB84, 0xBBBBFB84, 0xBBBCFB84, 0xBBBDFB84, 0xBBBEFB84, 0xBBBFFB84, 0xBBC0FB84, 0xBBC1FB84, + 0xBBC2FB84, 0xBBC3FB84, 0xBBC4FB84, 0xBBC5FB84, 0xBBC6FB84, 0xBBC7FB84, 0xBBC8FB84, 0xBBC9FB84, 0xBBCAFB84, 0xBBCBFB84, 0xBBCCFB84, 0xBBCDFB84, 0xBBCEFB84, 0xBBCFFB84, 0xBBD0FB84, + 0xBBD1FB84, 0xBBD2FB84, 0xBBD3FB84, 0xBBD4FB84, 0xBBD5FB84, 0xBBD6FB84, 0xBBD7FB84, 0xBBD8FB84, 0xBBD9FB84, 0xBBDAFB84, 0xBBDBFB84, 0xBBDCFB84, 0xBBDDFB84, 0xBBDEFB84, 0xBBDFFB84, + 0xBBE0FB84, 0xBBE1FB84, 0xBBE2FB84, 0xBBE3FB84, 0xBBE4FB84, 0xBBE5FB84, 0xBBE6FB84, 0xBBE7FB84, 0xBBE8FB84, 0xBBE9FB84, 0xBBEAFB84, 0xBBEBFB84, 0xBBECFB84, 0xBBEDFB84, 0xBBEEFB84, + 0xBBEFFB84, 0xBBF0FB84, 0xBBF1FB84, 0xBBF2FB84, 0xBBF3FB84, 0xBBF4FB84, 0xBBF5FB84, 0xBBF6FB84, 0xBBF7FB84, 0xBBF8FB84, 0xBBF9FB84, 0xBBFAFB84, 0xBBFBFB84, 0xBBFCFB84, 0xBBFDFB84, + 0xBBFEFB84, 0xBBFFFB84, 0xBC00FB84, 0xBC01FB84, 0xBC02FB84, 0xBC03FB84, 0xBC04FB84, 0xBC05FB84, 0xBC06FB84, 0xBC07FB84, 0xBC08FB84, 0xBC09FB84, 0xBC0AFB84, 0xBC0BFB84, 0xBC0CFB84, + 0xBC0DFB84, 0xBC0EFB84, 0xBC0FFB84, 0xBC10FB84, 0xBC11FB84, 0xBC12FB84, 0xBC13FB84, 0xBC14FB84, 0xBC15FB84, 0xBC16FB84, 0xBC17FB84, 0xBC18FB84, 0xBC19FB84, 0xBC1AFB84, 0xBC1BFB84, + 0xBC1CFB84, 0xBC1DFB84, 0xBC1EFB84, 0xBC1FFB84, 0xBC20FB84, 0xBC21FB84, 0xBC22FB84, 0xBC23FB84, 0xBC24FB84, 0xBC25FB84, 0xBC26FB84, 0xBC27FB84, 0xBC28FB84, 0xBC29FB84, 0xBC2AFB84, + 0xBC2BFB84, 0xBC2CFB84, 0xBC2DFB84, 0xBC2EFB84, 0xBC2FFB84, 0xBC30FB84, 0xBC31FB84, 0xBC32FB84, 0xBC33FB84, 0xBC34FB84, 0xBC35FB84, 0xBC36FB84, 0xBC37FB84, 0xBC38FB84, 0xBC39FB84, + 0xBC3AFB84, 0xBC3BFB84, 0xBC3CFB84, 0xBC3DFB84, 0xBC3EFB84, 0xBC3FFB84, 0xBC40FB84, 0xBC41FB84, 0xBC42FB84, 0xBC43FB84, 0xBC44FB84, 0xBC45FB84, 0xBC46FB84, 0xBC47FB84, 0xBC48FB84, + 0xBC49FB84, 0xBC4AFB84, 0xBC4BFB84, 0xBC4CFB84, 0xBC4DFB84, 0xBC4EFB84, 0xBC4FFB84, 0xBC50FB84, 0xBC51FB84, 0xBC52FB84, 0xBC53FB84, 0xBC54FB84, 0xBC55FB84, 0xBC56FB84, 0xBC57FB84, + 0xBC58FB84, 0xBC59FB84, 0xBC5AFB84, 0xBC5BFB84, 0xBC5CFB84, 0xBC5DFB84, 0xBC5EFB84, 0xBC5FFB84, 0xBC60FB84, 0xBC61FB84, 0xBC62FB84, 0xBC63FB84, 0xBC64FB84, 0xBC65FB84, 0xBC66FB84, + 0xBC67FB84, 0xBC68FB84, 0xBC69FB84, 0xBC6AFB84, 0xBC6BFB84, 0xBC6CFB84, 0xBC6DFB84, 0xBC6EFB84, 0xBC6FFB84, 0xBC70FB84, 0xBC71FB84, 0xBC72FB84, 0xBC73FB84, 0xBC74FB84, 0xBC75FB84, + 0xBC76FB84, 0xBC77FB84, 0xBC78FB84, 0xBC79FB84, 0xBC7AFB84, 0xBC7BFB84, 0xBC7CFB84, 0xBC7DFB84, 0xBC7EFB84, 0xBC7FFB84, 0xBC80FB84, 0xBC81FB84, 0xBC82FB84, 0xBC83FB84, 0xBC84FB84, + 0xBC85FB84, 0xBC86FB84, 0xBC87FB84, 0xBC88FB84, 0xBC89FB84, 0xBC8AFB84, 0xBC8BFB84, 0xBC8CFB84, 0xBC8DFB84, 0xBC8EFB84, 0xBC8FFB84, 0xBC90FB84, 0xBC91FB84, 0xBC92FB84, 0xBC93FB84, + 0xBC94FB84, 0xBC95FB84, 0xBC96FB84, 0xBC97FB84, 0xBC98FB84, 0xBC99FB84, 0xBC9AFB84, 0xBC9BFB84, 0xBC9CFB84, 0xBC9DFB84, 0xBC9EFB84, 0xBC9FFB84, 0xBCA0FB84, 0xBCA1FB84, 0xBCA2FB84, + 0xBCA3FB84, 0xBCA4FB84, 0xBCA5FB84, 0xBCA6FB84, 0xBCA7FB84, 0xBCA8FB84, 0xBCA9FB84, 0xBCAAFB84, 0xBCABFB84, 0xBCACFB84, 0xBCADFB84, 0xBCAEFB84, 0xBCAFFB84, 0xBCB0FB84, 0xBCB1FB84, + 0xBCB2FB84, 0xBCB3FB84, 0xBCB4FB84, 0xBCB5FB84, 0xBCB6FB84, 0xBCB7FB84, 0xBCB8FB84, 0xBCB9FB84, 0xBCBAFB84, 0xBCBBFB84, 0xBCBCFB84, 0xBCBDFB84, 0xBCBEFB84, 0xBCBFFB84, 0xBCC0FB84, + 0xBCC1FB84, 0xBCC2FB84, 0xBCC3FB84, 0xBCC4FB84, 0xBCC5FB84, 0xBCC6FB84, 0xBCC7FB84, 0xBCC8FB84, 0xBCC9FB84, 0xBCCAFB84, 0xBCCBFB84, 0xBCCCFB84, 0xBCCDFB84, 0xBCCEFB84, 0xBCCFFB84, + 0xBCD0FB84, 0xBCD1FB84, 0xBCD2FB84, 0xBCD3FB84, 0xBCD4FB84, 0xBCD5FB84, 0xBCD6FB84, 0xBCD7FB84, 0xBCD8FB84, 0xBCD9FB84, 0xBCDAFB84, 0xBCDBFB84, 0xBCDCFB84, 0xBCDDFB84, 0xBCDEFB84, + 0xBCDFFB84, 0xBCE0FB84, 0xBCE1FB84, 0xBCE2FB84, 0xBCE3FB84, 0xBCE4FB84, 0xBCE5FB84, 0xBCE6FB84, 0xBCE7FB84, 0xBCE8FB84, 0xBCE9FB84, 0xBCEAFB84, 0xBCEBFB84, 0xBCECFB84, 0xBCEDFB84, + 0xBCEEFB84, 0xBCEFFB84, 0xBCF0FB84, 0xBCF1FB84, 0xBCF2FB84, 0xBCF3FB84, 0xBCF4FB84, 0xBCF5FB84, 0xBCF6FB84, 0xBCF7FB84, 0xBCF8FB84, 0xBCF9FB84, 0xBCFAFB84, 0xBCFBFB84, 0xBCFCFB84, + 0xBCFDFB84, 0xBCFEFB84, 0xBCFFFB84, 0xBD00FB84, 0xBD01FB84, 0xBD02FB84, 0xBD03FB84, 0xBD04FB84, 0xBD05FB84, 0xBD06FB84, 0xBD07FB84, 0xBD08FB84, 0xBD09FB84, 0xBD0AFB84, 0xBD0BFB84, + 0xBD0CFB84, 0xBD0DFB84, 0xBD0EFB84, 0xBD0FFB84, 0xBD10FB84, 0xBD11FB84, 0xBD12FB84, 0xBD13FB84, 0xBD14FB84, 0xBD15FB84, 0xBD16FB84, 0xBD17FB84, 0xBD18FB84, 0xBD19FB84, 0xBD1AFB84, + 0xBD1BFB84, 0xBD1CFB84, 0xBD1DFB84, 0xBD1EFB84, 0xBD1FFB84, 0xBD20FB84, 0xBD21FB84, 0xBD22FB84, 0xBD23FB84, 0xBD24FB84, 0xBD25FB84, 0xBD26FB84, 0xBD27FB84, 0xBD28FB84, 0xBD29FB84, + 0xBD2AFB84, 0xBD2BFB84, 0xBD2CFB84, 0xBD2DFB84, 0xBD2EFB84, 0xBD2FFB84, 0xBD30FB84, 0xBD31FB84, 0xBD32FB84, 0xBD33FB84, 0xBD34FB84, 0xBD35FB84, 0xBD36FB84, 0xBD37FB84, 0xBD38FB84, + 0xBD39FB84, 0xBD3AFB84, 0xBD3BFB84, 0xBD3CFB84, 0xBD3DFB84, 0xBD3EFB84, 0xBD3FFB84, 0xBD40FB84, 0xBD41FB84, 0xBD42FB84, 0xBD43FB84, 0xBD44FB84, 0xBD45FB84, 0xBD46FB84, 0xBD47FB84, + 0xBD48FB84, 0xBD49FB84, 0xBD4AFB84, 0xBD4BFB84, 0xBD4CFB84, 0xBD4DFB84, 0xBD4EFB84, 0xBD4FFB84, 0xBD50FB84, 0xBD51FB84, 0xBD52FB84, 0xBD53FB84, 0xBD54FB84, 0xBD55FB84, 0xBD56FB84, + 0xBD57FB84, 0xBD58FB84, 0xBD59FB84, 0xBD5AFB84, 0xBD5BFB84, 0xBD5CFB84, 0xBD5DFB84, 0xBD5EFB84, 0xBD5FFB84, 0xBD60FB84, 0xBD61FB84, 0xBD62FB84, 0xBD63FB84, 0xBD64FB84, 0xBD65FB84, + 0xBD66FB84, 0xBD67FB84, 0xBD68FB84, 0xBD69FB84, 0xBD6AFB84, 0xBD6BFB84, 0xBD6CFB84, 0xBD6DFB84, 0xBD6EFB84, 0xBD6FFB84, 0xBD70FB84, 0xBD71FB84, 0xBD72FB84, 0xBD73FB84, 0xBD74FB84, + 0xBD75FB84, 0xBD76FB84, 0xBD77FB84, 0xBD78FB84, 0xBD79FB84, 0xBD7AFB84, 0xBD7BFB84, 0xBD7CFB84, 0xBD7DFB84, 0xBD7EFB84, 0xBD7FFB84, 0xBD80FB84, 0xBD81FB84, 0xBD82FB84, 0xBD83FB84, + 0xBD84FB84, 0xBD85FB84, 0xBD86FB84, 0xBD87FB84, 0xBD88FB84, 0xBD89FB84, 0xBD8AFB84, 0xBD8BFB84, 0xBD8CFB84, 0xBD8DFB84, 0xBD8EFB84, 0xBD8FFB84, 0xBD90FB84, 0xBD91FB84, 0xBD92FB84, + 0xBD93FB84, 0xBD94FB84, 0xBD95FB84, 0xBD96FB84, 0xBD97FB84, 0xBD98FB84, 0xBD99FB84, 0xBD9AFB84, 0xBD9BFB84, 0xBD9CFB84, 0xBD9DFB84, 0xBD9EFB84, 0xBD9FFB84, 0xBDA0FB84, 0xBDA1FB84, + 0xBDA2FB84, 0xBDA3FB84, 0xBDA4FB84, 0xBDA5FB84, 0xBDA6FB84, 0xBDA7FB84, 0xBDA8FB84, 0xBDA9FB84, 0xBDAAFB84, 0xBDABFB84, 0xBDACFB84, 0xBDADFB84, 0xBDAEFB84, 0xBDAFFB84, 0xBDB0FB84, + 0xBDB1FB84, 0xBDB2FB84, 0xBDB3FB84, 0xBDB4FB84, 0xBDB5FB84, 0xBDB6FB84, 0xBDB7FB84, 0xBDB8FB84, 0xBDB9FB84, 0xBDBAFB84, 0xBDBBFB84, 0xBDBCFB84, 0xBDBDFB84, 0xBDBEFB84, 0xBDBFFB84, + 0xBDC0FB84, 0xBDC1FB84, 0xBDC2FB84, 0xBDC3FB84, 0xBDC4FB84, 0xBDC5FB84, 0xBDC6FB84, 0xBDC7FB84, 0xBDC8FB84, 0xBDC9FB84, 0xBDCAFB84, 0xBDCBFB84, 0xBDCCFB84, 0xBDCDFB84, 0xBDCEFB84, + 0xBDCFFB84, 0xBDD0FB84, 0xBDD1FB84, 0xBDD2FB84, 0xBDD3FB84, 0xBDD4FB84, 0xBDD5FB84, 0xBDD6FB84, 0xBDD7FB84, 0xBDD8FB84, 0xBDD9FB84, 0xBDDAFB84, 0xBDDBFB84, 0xBDDCFB84, 0xBDDDFB84, + 0xBDDEFB84, 0xBDDFFB84, 0xBDE0FB84, 0xBDE1FB84, 0xBDE2FB84, 0xBDE3FB84, 0xBDE4FB84, 0xBDE5FB84, 0xBDE6FB84, 0xBDE7FB84, 0xBDE8FB84, 0xBDE9FB84, 0xBDEAFB84, 0xBDEBFB84, 0xBDECFB84, + 0xBDEDFB84, 0xBDEEFB84, 0xBDEFFB84, 0xBDF0FB84, 0xBDF1FB84, 0xBDF2FB84, 0xBDF3FB84, 0xBDF4FB84, 0xBDF5FB84, 0xBDF6FB84, 0xBDF7FB84, 0xBDF8FB84, 0xBDF9FB84, 0xBDFAFB84, 0xBDFBFB84, + 0xBDFCFB84, 0xBDFDFB84, 0xBDFEFB84, 0xBDFFFB84, 0xBE00FB84, 0xBE01FB84, 0xBE02FB84, 0xBE03FB84, 0xBE04FB84, 0xBE05FB84, 0xBE06FB84, 0xBE07FB84, 0xBE08FB84, 0xBE09FB84, 0xBE0AFB84, + 0xBE0BFB84, 0xBE0CFB84, 0xBE0DFB84, 0xBE0EFB84, 0xBE0FFB84, 0xBE10FB84, 0xBE11FB84, 0xBE12FB84, 0xBE13FB84, 0xBE14FB84, 0xBE15FB84, 0xBE16FB84, 0xBE17FB84, 0xBE18FB84, 0xBE19FB84, + 0xBE1AFB84, 0xBE1BFB84, 0xBE1CFB84, 0xBE1DFB84, 0xBE1EFB84, 0xBE1FFB84, 0xBE20FB84, 0xBE21FB84, 0xBE22FB84, 0xBE23FB84, 0xBE24FB84, 0xBE25FB84, 0xBE26FB84, 0xBE27FB84, 0xBE28FB84, + 0xBE29FB84, 0xBE2AFB84, 0xBE2BFB84, 0xBE2CFB84, 0xBE2DFB84, 0xBE2EFB84, 0xBE2FFB84, 0xBE30FB84, 0xBE31FB84, 0xBE32FB84, 0xBE33FB84, 0xBE34FB84, 0xBE35FB84, 0xBE36FB84, 0xBE37FB84, + 0xBE38FB84, 0xBE39FB84, 0xBE3AFB84, 0xBE3BFB84, 0xBE3CFB84, 0xBE3DFB84, 0xBE3EFB84, 0xBE3FFB84, 0xBE40FB84, 0xBE41FB84, 0xBE42FB84, 0xBE43FB84, 0xBE44FB84, 0xBE45FB84, 0xBE46FB84, + 0xBE47FB84, 0xBE48FB84, 0xBE49FB84, 0xBE4AFB84, 0xBE4BFB84, 0xBE4CFB84, 0xBE4DFB84, 0xBE4EFB84, 0xBE4FFB84, 0xBE50FB84, 0xBE51FB84, 0xBE52FB84, 0xBE53FB84, 0xBE54FB84, 0xBE55FB84, + 0xBE56FB84, 0xBE57FB84, 0xBE58FB84, 0xBE59FB84, 0xBE5AFB84, 0xBE5BFB84, 0xBE5CFB84, 0xBE5DFB84, 0xBE5EFB84, 0xBE5FFB84, 0xBE60FB84, 0xBE61FB84, 0xBE62FB84, 0xBE63FB84, 0xBE64FB84, + 0xBE65FB84, 0xBE66FB84, 0xBE67FB84, 0xBE68FB84, 0xBE69FB84, 0xBE6AFB84, 0xBE6BFB84, 0xBE6CFB84, 0xBE6DFB84, 0xBE6EFB84, 0xBE6FFB84, 0xBE70FB84, 0xBE71FB84, 0xBE72FB84, 0xBE73FB84, + 0xBE74FB84, 0xBE75FB84, 0xBE76FB84, 0xBE77FB84, 0xBE78FB84, 0xBE79FB84, 0xBE7AFB84, 0xBE7BFB84, 0xBE7CFB84, 0xBE7DFB84, 0xBE7EFB84, 0xBE7FFB84, 0xBE80FB84, 0xBE81FB84, 0xBE82FB84, + 0xBE83FB84, 0xBE84FB84, 0xBE85FB84, 0xBE86FB84, 0xBE87FB84, 0xBE88FB84, 0xBE89FB84, 0xBE8AFB84, 0xBE8BFB84, 0xBE8CFB84, 0xBE8DFB84, 0xBE8EFB84, 0xBE8FFB84, 0xBE90FB84, 0xBE91FB84, + 0xBE92FB84, 0xBE93FB84, 0xBE94FB84, 0xBE95FB84, 0xBE96FB84, 0xBE97FB84, 0xBE98FB84, 0xBE99FB84, 0xBE9AFB84, 0xBE9BFB84, 0xBE9CFB84, 0xBE9DFB84, 0xBE9EFB84, 0xBE9FFB84, 0xBEA0FB84, + 0xBEA1FB84, 0xBEA2FB84, 0xBEA3FB84, 0xBEA4FB84, 0xBEA5FB84, 0xBEA6FB84, 0xBEA7FB84, 0xBEA8FB84, 0xBEA9FB84, 0xBEAAFB84, 0xBEABFB84, 0xBEACFB84, 0xBEADFB84, 0xBEAEFB84, 0xBEAFFB84, + 0xBEB0FB84, 0xBEB1FB84, 0xBEB2FB84, 0xBEB3FB84, 0xBEB4FB84, 0xBEB5FB84, 0xBEB6FB84, 0xBEB7FB84, 0xBEB8FB84, 0xBEB9FB84, 0xBEBAFB84, 0xBEBBFB84, 0xBEBCFB84, 0xBEBDFB84, 0xBEBEFB84, + 0xBEBFFB84, 0xBEC0FB84, 0xBEC1FB84, 0xBEC2FB84, 0xBEC3FB84, 0xBEC4FB84, 0xBEC5FB84, 0xBEC6FB84, 0xBEC7FB84, 0xBEC8FB84, 0xBEC9FB84, 0xBECAFB84, 0xBECBFB84, 0xBECCFB84, 0xBECDFB84, + 0xBECEFB84, 0xBECFFB84, 0xBED0FB84, 0xBED1FB84, 0xBED2FB84, 0xBED3FB84, 0xBED4FB84, 0xBED5FB84, 0xBED6FB84, 0xBED7FB84, 0xBED8FB84, 0xBED9FB84, 0xBEDAFB84, 0xBEDBFB84, 0xBEDCFB84, + 0xBEDDFB84, 0xBEDEFB84, 0xBEDFFB84, 0xBEE0FB84, 0xBEE1FB84, 0xBEE2FB84, 0xBEE3FB84, 0xBEE4FB84, 0xBEE5FB84, 0xBEE6FB84, 0xBEE7FB84, 0xBEE8FB84, 0xBEE9FB84, 0xBEEAFB84, 0xBEEBFB84, + 0xBEECFB84, 0xBEEDFB84, 0xBEEEFB84, 0xBEEFFB84, 0xBEF0FB84, 0xBEF1FB84, 0xBEF2FB84, 0xBEF3FB84, 0xBEF4FB84, 0xBEF5FB84, 0xBEF6FB84, 0xBEF7FB84, 0xBEF8FB84, 0xBEF9FB84, 0xBEFAFB84, + 0xBEFBFB84, 0xBEFCFB84, 0xBEFDFB84, 0xBEFEFB84, 0xBEFFFB84, 0xBF00FB84, 0xBF01FB84, 0xBF02FB84, 0xBF03FB84, 0xBF04FB84, 0xBF05FB84, 0xBF06FB84, 0xBF07FB84, 0xBF08FB84, 0xBF09FB84, + 0xBF0AFB84, 0xBF0BFB84, 0xBF0CFB84, 0xBF0DFB84, 0xBF0EFB84, 0xBF0FFB84, 0xBF10FB84, 0xBF11FB84, 0xBF12FB84, 0xBF13FB84, 0xBF14FB84, 0xBF15FB84, 0xBF16FB84, 0xBF17FB84, 0xBF18FB84, + 0xBF19FB84, 0xBF1AFB84, 0xBF1BFB84, 0xBF1CFB84, 0xBF1DFB84, 0xBF1EFB84, 0xBF1FFB84, 0xBF20FB84, 0xBF21FB84, 0xBF22FB84, 0xBF23FB84, 0xBF24FB84, 0xBF25FB84, 0xBF26FB84, 0xBF27FB84, + 0xBF28FB84, 0xBF29FB84, 0xBF2AFB84, 0xBF2BFB84, 0xBF2CFB84, 0xBF2DFB84, 0xBF2EFB84, 0xBF2FFB84, 0xBF30FB84, 0xBF31FB84, 0xBF32FB84, 0xBF33FB84, 0xBF34FB84, 0xBF35FB84, 0xBF36FB84, + 0xBF37FB84, 0xBF38FB84, 0xBF39FB84, 0xBF3AFB84, 0xBF3BFB84, 0xBF3CFB84, 0xBF3DFB84, 0xBF3EFB84, 0xBF3FFB84, 0xBF40FB84, 0xBF41FB84, 0xBF42FB84, 0xBF43FB84, 0xBF44FB84, 0xBF45FB84, + 0xBF46FB84, 0xBF47FB84, 0xBF48FB84, 0xBF49FB84, 0xBF4AFB84, 0xBF4BFB84, 0xBF4CFB84, 0xBF4DFB84, 0xBF4EFB84, 0xBF4FFB84, 0xBF50FB84, 0xBF51FB84, 0xBF52FB84, 0xBF53FB84, 0xBF54FB84, + 0xBF55FB84, 0xBF56FB84, 0xBF57FB84, 0xBF58FB84, 0xBF59FB84, 0xBF5AFB84, 0xBF5BFB84, 0xBF5CFB84, 0xBF5DFB84, 0xBF5EFB84, 0xBF5FFB84, 0xBF60FB84, 0xBF61FB84, 0xBF62FB84, 0xBF63FB84, + 0xBF64FB84, 0xBF65FB84, 0xBF66FB84, 0xBF67FB84, 0xBF68FB84, 0xBF69FB84, 0xBF6AFB84, 0xBF6BFB84, 0xBF6CFB84, 0xBF6DFB84, 0xBF6EFB84, 0xBF6FFB84, 0xBF70FB84, 0xBF71FB84, 0xBF72FB84, + 0xBF73FB84, 0xBF74FB84, 0xBF75FB84, 0xBF76FB84, 0xBF77FB84, 0xBF78FB84, 0xBF79FB84, 0xBF7AFB84, 0xBF7BFB84, 0xBF7CFB84, 0xBF7DFB84, 0xBF7EFB84, 0xBF7FFB84, 0xBF80FB84, 0xBF81FB84, + 0xBF82FB84, 0xBF83FB84, 0xBF84FB84, 0xBF85FB84, 0xBF86FB84, 0xBF87FB84, 0xBF88FB84, 0xBF89FB84, 0xBF8AFB84, 0xBF8BFB84, 0xBF8CFB84, 0xBF8DFB84, 0xBF8EFB84, 0xBF8FFB84, 0xBF90FB84, + 0xBF91FB84, 0xBF92FB84, 0xBF93FB84, 0xBF94FB84, 0xBF95FB84, 0xBF96FB84, 0xBF97FB84, 0xBF98FB84, 0xBF99FB84, 0xBF9AFB84, 0xBF9BFB84, 0xBF9CFB84, 0xBF9DFB84, 0xBF9EFB84, 0xBF9FFB84, + 0xBFA0FB84, 0xBFA1FB84, 0xBFA2FB84, 0xBFA3FB84, 0xBFA4FB84, 0xBFA5FB84, 0xBFA6FB84, 0xBFA7FB84, 0xBFA8FB84, 0xBFA9FB84, 0xBFAAFB84, 0xBFABFB84, 0xBFACFB84, 0xBFADFB84, 0xBFAEFB84, + 0xBFAFFB84, 0xBFB0FB84, 0xBFB1FB84, 0xBFB2FB84, 0xBFB3FB84, 0xBFB4FB84, 0xBFB5FB84, 0xBFB6FB84, 0xBFB7FB84, 0xBFB8FB84, 0xBFB9FB84, 0xBFBAFB84, 0xBFBBFB84, 0xBFBCFB84, 0xBFBDFB84, + 0xBFBEFB84, 0xBFBFFB84, 0xBFC0FB84, 0xBFC1FB84, 0xBFC2FB84, 0xBFC3FB84, 0xBFC4FB84, 0xBFC5FB84, 0xBFC6FB84, 0xBFC7FB84, 0xBFC8FB84, 0xBFC9FB84, 0xBFCAFB84, 0xBFCBFB84, 0xBFCCFB84, + 0xBFCDFB84, 0xBFCEFB84, 0xBFCFFB84, 0xBFD0FB84, 0xBFD1FB84, 0xBFD2FB84, 0xBFD3FB84, 0xBFD4FB84, 0xBFD5FB84, 0xBFD6FB84, 0xBFD7FB84, 0xBFD8FB84, 0xBFD9FB84, 0xBFDAFB84, 0xBFDBFB84, + 0xBFDCFB84, 0xBFDDFB84, 0xBFDEFB84, 0xBFDFFB84, 0xBFE0FB84, 0xBFE1FB84, 0xBFE2FB84, 0xBFE3FB84, 0xBFE4FB84, 0xBFE5FB84, 0xBFE6FB84, 0xBFE7FB84, 0xBFE8FB84, 0xBFE9FB84, 0xBFEAFB84, + 0xBFEBFB84, 0xBFECFB84, 0xBFEDFB84, 0xBFEEFB84, 0xBFEFFB84, 0xBFF0FB84, 0xBFF1FB84, 0xBFF2FB84, 0xBFF3FB84, 0xBFF4FB84, 0xBFF5FB84, 0xBFF6FB84, 0xBFF7FB84, 0xBFF8FB84, 0xBFF9FB84, + 0xBFFAFB84, 0xBFFBFB84, 0xBFFCFB84, 0xBFFDFB84, 0xBFFEFB84, 0xBFFFFB84, 0xC000FB84, 0xC001FB84, 0xC002FB84, 0xC003FB84, 0xC004FB84, 0xC005FB84, 0xC006FB84, 0xC007FB84, 0xC008FB84, + 0xC009FB84, 0xC00AFB84, 0xC00BFB84, 0xC00CFB84, 0xC00DFB84, 0xC00EFB84, 0xC00FFB84, 0xC010FB84, 0xC011FB84, 0xC012FB84, 0xC013FB84, 0xC014FB84, 0xC015FB84, 0xC016FB84, 0xC017FB84, + 0xC018FB84, 0xC019FB84, 0xC01AFB84, 0xC01BFB84, 0xC01CFB84, 0xC01DFB84, 0xC01EFB84, 0xC01FFB84, 0xC020FB84, 0xC021FB84, 0xC022FB84, 0xC023FB84, 0xC024FB84, 0xC025FB84, 0xC026FB84, + 0xC027FB84, 0xC028FB84, 0xC029FB84, 0xC02AFB84, 0xC02BFB84, 0xC02CFB84, 0xC02DFB84, 0xC02EFB84, 0xC02FFB84, 0xC030FB84, 0xC031FB84, 0xC032FB84, 0xC033FB84, 0xC034FB84, 0xC035FB84, + 0xC036FB84, 0xC037FB84, 0xC038FB84, 0xC039FB84, 0xC03AFB84, 0xC03BFB84, 0xC03CFB84, 0xC03DFB84, 0xC03EFB84, 0xC03FFB84, 0xC040FB84, 0xC041FB84, 0xC042FB84, 0xC043FB84, 0xC044FB84, + 0xC045FB84, 0xC046FB84, 0xC047FB84, 0xC048FB84, 0xC049FB84, 0xC04AFB84, 0xC04BFB84, 0xC04CFB84, 0xC04DFB84, 0xC04EFB84, 0xC04FFB84, 0xC050FB84, 0xC051FB84, 0xC052FB84, 0xC053FB84, + 0xC054FB84, 0xC055FB84, 0xC056FB84, 0xC057FB84, 0xC058FB84, 0xC059FB84, 0xC05AFB84, 0xC05BFB84, 0xC05CFB84, 0xC05DFB84, 0xC05EFB84, 0xC05FFB84, 0xC060FB84, 0xC061FB84, 0xC062FB84, + 0xC063FB84, 0xC064FB84, 0xC065FB84, 0xC066FB84, 0xC067FB84, 0xC068FB84, 0xC069FB84, 0xC06AFB84, 0xC06BFB84, 0xC06CFB84, 0xC06DFB84, 0xC06EFB84, 0xC06FFB84, 0xC070FB84, 0xC071FB84, + 0xC072FB84, 0xC073FB84, 0xC074FB84, 0xC075FB84, 0xC076FB84, 0xC077FB84, 0xC078FB84, 0xC079FB84, 0xC07AFB84, 0xC07BFB84, 0xC07CFB84, 0xC07DFB84, 0xC07EFB84, 0xC07FFB84, 0xC080FB84, + 0xC081FB84, 0xC082FB84, 0xC083FB84, 0xC084FB84, 0xC085FB84, 0xC086FB84, 0xC087FB84, 0xC088FB84, 0xC089FB84, 0xC08AFB84, 0xC08BFB84, 0xC08CFB84, 0xC08DFB84, 0xC08EFB84, 0xC08FFB84, + 0xC090FB84, 0xC091FB84, 0xC092FB84, 0xC093FB84, 0xC094FB84, 0xC095FB84, 0xC096FB84, 0xC097FB84, 0xC098FB84, 0xC099FB84, 0xC09AFB84, 0xC09BFB84, 0xC09CFB84, 0xC09DFB84, 0xC09EFB84, + 0xC09FFB84, 0xC0A0FB84, 0xC0A1FB84, 0xC0A2FB84, 0xC0A3FB84, 0xC0A4FB84, 0xC0A5FB84, 0xC0A6FB84, 0xC0A7FB84, 0xC0A8FB84, 0xC0A9FB84, 0xC0AAFB84, 0xC0ABFB84, 0xC0ACFB84, 0xC0ADFB84, + 0xC0AEFB84, 0xC0AFFB84, 0xC0B0FB84, 0xC0B1FB84, 0xC0B2FB84, 0xC0B3FB84, 0xC0B4FB84, 0xC0B5FB84, 0xC0B6FB84, 0xC0B7FB84, 0xC0B8FB84, 0xC0B9FB84, 0xC0BAFB84, 0xC0BBFB84, 0xC0BCFB84, + 0xC0BDFB84, 0xC0BEFB84, 0xC0BFFB84, 0xC0C0FB84, 0xC0C1FB84, 0xC0C2FB84, 0xC0C3FB84, 0xC0C4FB84, 0xC0C5FB84, 0xC0C6FB84, 0xC0C7FB84, 0xC0C8FB84, 0xC0C9FB84, 0xC0CAFB84, 0xC0CBFB84, + 0xC0CCFB84, 0xC0CDFB84, 0xC0CEFB84, 0xC0CFFB84, 0xC0D0FB84, 0xC0D1FB84, 0xC0D2FB84, 0xC0D3FB84, 0xC0D4FB84, 0xC0D5FB84, 0xC0D6FB84, 0xC0D7FB84, 0xC0D8FB84, 0xC0D9FB84, 0xC0DAFB84, + 0xC0DBFB84, 0xC0DCFB84, 0xC0DDFB84, 0xC0DEFB84, 0xC0DFFB84, 0xC0E0FB84, 0xC0E1FB84, 0xC0E2FB84, 0xC0E3FB84, 0xC0E4FB84, 0xC0E5FB84, 0xC0E6FB84, 0xC0E7FB84, 0xC0E8FB84, 0xC0E9FB84, + 0xC0EAFB84, 0xC0EBFB84, 0xC0ECFB84, 0xC0EDFB84, 0xC0EEFB84, 0xC0EFFB84, 0xC0F0FB84, 0xC0F1FB84, 0xC0F2FB84, 0xC0F3FB84, 0xC0F4FB84, 0xC0F5FB84, 0xC0F6FB84, 0xC0F7FB84, 0xC0F8FB84, + 0xC0F9FB84, 0xC0FAFB84, 0xC0FBFB84, 0xC0FCFB84, 0xC0FDFB84, 0xC0FEFB84, 0xC0FFFB84, 0xC100FB84, 0xC101FB84, 0xC102FB84, 0xC103FB84, 0xC104FB84, 0xC105FB84, 0xC106FB84, 0xC107FB84, + 0xC108FB84, 0xC109FB84, 0xC10AFB84, 0xC10BFB84, 0xC10CFB84, 0xC10DFB84, 0xC10EFB84, 0xC10FFB84, 0xC110FB84, 0xC111FB84, 0xC112FB84, 0xC113FB84, 0xC114FB84, 0xC115FB84, 0xC116FB84, + 0xC117FB84, 0xC118FB84, 0xC119FB84, 0xC11AFB84, 0xC11BFB84, 0xC11CFB84, 0xC11DFB84, 0xC11EFB84, 0xC11FFB84, 0xC120FB84, 0xC121FB84, 0xC122FB84, 0xC123FB84, 0xC124FB84, 0xC125FB84, + 0xC126FB84, 0xC127FB84, 0xC128FB84, 0xC129FB84, 0xC12AFB84, 0xC12BFB84, 0xC12CFB84, 0xC12DFB84, 0xC12EFB84, 0xC12FFB84, 0xC130FB84, 0xC131FB84, 0xC132FB84, 0xC133FB84, 0xC134FB84, + 0xC135FB84, 0xC136FB84, 0xC137FB84, 0xC138FB84, 0xC139FB84, 0xC13AFB84, 0xC13BFB84, 0xC13CFB84, 0xC13DFB84, 0xC13EFB84, 0xC13FFB84, 0xC140FB84, 0xC141FB84, 0xC142FB84, 0xC143FB84, + 0xC144FB84, 0xC145FB84, 0xC146FB84, 0xC147FB84, 0xC148FB84, 0xC149FB84, 0xC14AFB84, 0xC14BFB84, 0xC14CFB84, 0xC14DFB84, 0xC14EFB84, 0xC14FFB84, 0xC150FB84, 0xC151FB84, 0xC152FB84, + 0xC153FB84, 0xC154FB84, 0xC155FB84, 0xC156FB84, 0xC157FB84, 0xC158FB84, 0xC159FB84, 0xC15AFB84, 0xC15BFB84, 0xC15CFB84, 0xC15DFB84, 0xC15EFB84, 0xC15FFB84, 0xC160FB84, 0xC161FB84, + 0xC162FB84, 0xC163FB84, 0xC164FB84, 0xC165FB84, 0xC166FB84, 0xC167FB84, 0xC168FB84, 0xC169FB84, 0xC16AFB84, 0xC16BFB84, 0xC16CFB84, 0xC16DFB84, 0xC16EFB84, 0xC16FFB84, 0xC170FB84, + 0xC171FB84, 0xC172FB84, 0xC173FB84, 0xC174FB84, 0xC175FB84, 0xC176FB84, 0xC177FB84, 0xC178FB84, 0xC179FB84, 0xC17AFB84, 0xC17BFB84, 0xC17CFB84, 0xC17DFB84, 0xC17EFB84, 0xC17FFB84, + 0xC180FB84, 0xC181FB84, 0xC182FB84, 0xC183FB84, 0xC184FB84, 0xC185FB84, 0xC186FB84, 0xC187FB84, 0xC188FB84, 0xC189FB84, 0xC18AFB84, 0xC18BFB84, 0xC18CFB84, 0xC18DFB84, 0xC18EFB84, + 0xC18FFB84, 0xC190FB84, 0xC191FB84, 0xC192FB84, 0xC193FB84, 0xC194FB84, 0xC195FB84, 0xC196FB84, 0xC197FB84, 0xC198FB84, 0xC199FB84, 0xC19AFB84, 0xC19BFB84, 0xC19CFB84, 0xC19DFB84, + 0xC19EFB84, 0xC19FFB84, 0xC1A0FB84, 0xC1A1FB84, 0xC1A2FB84, 0xC1A3FB84, 0xC1A4FB84, 0xC1A5FB84, 0xC1A6FB84, 0xC1A7FB84, 0xC1A8FB84, 0xC1A9FB84, 0xC1AAFB84, 0xC1ABFB84, 0xC1ACFB84, + 0xC1ADFB84, 0xC1AEFB84, 0xC1AFFB84, 0xC1B0FB84, 0xC1B1FB84, 0xC1B2FB84, 0xC1B3FB84, 0xC1B4FB84, 0xC1B5FB84, 0xC1B6FB84, 0xC1B7FB84, 0xC1B8FB84, 0xC1B9FB84, 0xC1BAFB84, 0xC1BBFB84, + 0xC1BCFB84, 0xC1BDFB84, 0xC1BEFB84, 0xC1BFFB84, 0xC1C0FB84, 0xC1C1FB84, 0xC1C2FB84, 0xC1C3FB84, 0xC1C4FB84, 0xC1C5FB84, 0xC1C6FB84, 0xC1C7FB84, 0xC1C8FB84, 0xC1C9FB84, 0xC1CAFB84, + 0xC1CBFB84, 0xC1CCFB84, 0xC1CDFB84, 0xC1CEFB84, 0xC1CFFB84, 0xC1D0FB84, 0xC1D1FB84, 0xC1D2FB84, 0xC1D3FB84, 0xC1D4FB84, 0xC1D5FB84, 0xC1D6FB84, 0xC1D7FB84, 0xC1D8FB84, 0xC1D9FB84, + 0xC1DAFB84, 0xC1DBFB84, 0xC1DCFB84, 0xC1DDFB84, 0xC1DEFB84, 0xC1DFFB84, 0xC1E0FB84, 0xC1E1FB84, 0xC1E2FB84, 0xC1E3FB84, 0xC1E4FB84, 0xC1E5FB84, 0xC1E6FB84, 0xC1E7FB84, 0xC1E8FB84, + 0xC1E9FB84, 0xC1EAFB84, 0xC1EBFB84, 0xC1ECFB84, 0xC1EDFB84, 0xC1EEFB84, 0xC1EFFB84, 0xC1F0FB84, 0xC1F1FB84, 0xC1F2FB84, 0xC1F3FB84, 0xC1F4FB84, 0xC1F5FB84, 0xC1F6FB84, 0xC1F7FB84, + 0xC1F8FB84, 0xC1F9FB84, 0xC1FAFB84, 0xC1FBFB84, 0xC1FCFB84, 0xC1FDFB84, 0xC1FEFB84, 0xC1FFFB84, 0xC200FB84, 0xC201FB84, 0xC202FB84, 0xC203FB84, 0xC204FB84, 0xC205FB84, 0xC206FB84, + 0xC207FB84, 0xC208FB84, 0xC209FB84, 0xC20AFB84, 0xC20BFB84, 0xC20CFB84, 0xC20DFB84, 0xC20EFB84, 0xC20FFB84, 0xC210FB84, 0xC211FB84, 0xC212FB84, 0xC213FB84, 0xC214FB84, 0xC215FB84, + 0xC216FB84, 0xC217FB84, 0xC218FB84, 0xC219FB84, 0xC21AFB84, 0xC21BFB84, 0xC21CFB84, 0xC21DFB84, 0xC21EFB84, 0xC21FFB84, 0xC220FB84, 0xC221FB84, 0xC222FB84, 0xC223FB84, 0xC224FB84, + 0xC225FB84, 0xC226FB84, 0xC227FB84, 0xC228FB84, 0xC229FB84, 0xC22AFB84, 0xC22BFB84, 0xC22CFB84, 0xC22DFB84, 0xC22EFB84, 0xC22FFB84, 0xC230FB84, 0xC231FB84, 0xC232FB84, 0xC233FB84, + 0xC234FB84, 0xC235FB84, 0xC236FB84, 0xC237FB84, 0xC238FB84, 0xC239FB84, 0xC23AFB84, 0xC23BFB84, 0xC23CFB84, 0xC23DFB84, 0xC23EFB84, 0xC23FFB84, 0xC240FB84, 0xC241FB84, 0xC242FB84, + 0xC243FB84, 0xC244FB84, 0xC245FB84, 0xC246FB84, 0xC247FB84, 0xC248FB84, 0xC249FB84, 0xC24AFB84, 0xC24BFB84, 0xC24CFB84, 0xC24DFB84, 0xC24EFB84, 0xC24FFB84, 0xC250FB84, 0xC251FB84, + 0xC252FB84, 0xC253FB84, 0xC254FB84, 0xC255FB84, 0xC256FB84, 0xC257FB84, 0xC258FB84, 0xC259FB84, 0xC25AFB84, 0xC25BFB84, 0xC25CFB84, 0xC25DFB84, 0xC25EFB84, 0xC25FFB84, 0xC260FB84, + 0xC261FB84, 0xC262FB84, 0xC263FB84, 0xC264FB84, 0xC265FB84, 0xC266FB84, 0xC267FB84, 0xC268FB84, 0xC269FB84, 0xC26AFB84, 0xC26BFB84, 0xC26CFB84, 0xC26DFB84, 0xC26EFB84, 0xC26FFB84, + 0xC270FB84, 0xC271FB84, 0xC272FB84, 0xC273FB84, 0xC274FB84, 0xC275FB84, 0xC276FB84, 0xC277FB84, 0xC278FB84, 0xC279FB84, 0xC27AFB84, 0xC27BFB84, 0xC27CFB84, 0xC27DFB84, 0xC27EFB84, + 0xC27FFB84, 0xC280FB84, 0xC281FB84, 0xC282FB84, 0xC283FB84, 0xC284FB84, 0xC285FB84, 0xC286FB84, 0xC287FB84, 0xC288FB84, 0xC289FB84, 0xC28AFB84, 0xC28BFB84, 0xC28CFB84, 0xC28DFB84, + 0xC28EFB84, 0xC28FFB84, 0xC290FB84, 0xC291FB84, 0xC292FB84, 0xC293FB84, 0xC294FB84, 0xC295FB84, 0xC296FB84, 0xC297FB84, 0xC298FB84, 0xC299FB84, 0xC29AFB84, 0xC29BFB84, 0xC29CFB84, + 0xC29DFB84, 0xC29EFB84, 0xC29FFB84, 0xC2A0FB84, 0xC2A1FB84, 0xC2A2FB84, 0xC2A3FB84, 0xC2A4FB84, 0xC2A5FB84, 0xC2A6FB84, 0xC2A7FB84, 0xC2A8FB84, 0xC2A9FB84, 0xC2AAFB84, 0xC2ABFB84, + 0xC2ACFB84, 0xC2ADFB84, 0xC2AEFB84, 0xC2AFFB84, 0xC2B0FB84, 0xC2B1FB84, 0xC2B2FB84, 0xC2B3FB84, 0xC2B4FB84, 0xC2B5FB84, 0xC2B6FB84, 0xC2B7FB84, 0xC2B8FB84, 0xC2B9FB84, 0xC2BAFB84, + 0xC2BBFB84, 0xC2BCFB84, 0xC2BDFB84, 0xC2BEFB84, 0xC2BFFB84, 0xC2C0FB84, 0xC2C1FB84, 0xC2C2FB84, 0xC2C3FB84, 0xC2C4FB84, 0xC2C5FB84, 0xC2C6FB84, 0xC2C7FB84, 0xC2C8FB84, 0xC2C9FB84, + 0xC2CAFB84, 0xC2CBFB84, 0xC2CCFB84, 0xC2CDFB84, 0xC2CEFB84, 0xC2CFFB84, 0xC2D0FB84, 0xC2D1FB84, 0xC2D2FB84, 0xC2D3FB84, 0xC2D4FB84, 0xC2D5FB84, 0xC2D6FB84, 0xC2D7FB84, 0xC2D8FB84, + 0xC2D9FB84, 0xC2DAFB84, 0xC2DBFB84, 0xC2DCFB84, 0xC2DDFB84, 0xC2DEFB84, 0xC2DFFB84, 0xC2E0FB84, 0xC2E1FB84, 0xC2E2FB84, 0xC2E3FB84, 0xC2E4FB84, 0xC2E5FB84, 0xC2E6FB84, 0xC2E7FB84, + 0xC2E8FB84, 0xC2E9FB84, 0xC2EAFB84, 0xC2EBFB84, 0xC2ECFB84, 0xC2EDFB84, 0xC2EEFB84, 0xC2EFFB84, 0xC2F0FB84, 0xC2F1FB84, 0xC2F2FB84, 0xC2F3FB84, 0xC2F4FB84, 0xC2F5FB84, 0xC2F6FB84, + 0xC2F7FB84, 0xC2F8FB84, 0xC2F9FB84, 0xC2FAFB84, 0xC2FBFB84, 0xC2FCFB84, 0xC2FDFB84, 0xC2FEFB84, 0xC2FFFB84, 0xC300FB84, 0xC301FB84, 0xC302FB84, 0xC303FB84, 0xC304FB84, 0xC305FB84, + 0xC306FB84, 0xC307FB84, 0xC308FB84, 0xC309FB84, 0xC30AFB84, 0xC30BFB84, 0xC30CFB84, 0xC30DFB84, 0xC30EFB84, 0xC30FFB84, 0xC310FB84, 0xC311FB84, 0xC312FB84, 0xC313FB84, 0xC314FB84, + 0xC315FB84, 0xC316FB84, 0xC317FB84, 0xC318FB84, 0xC319FB84, 0xC31AFB84, 0xC31BFB84, 0xC31CFB84, 0xC31DFB84, 0xC31EFB84, 0xC31FFB84, 0xC320FB84, 0xC321FB84, 0xC322FB84, 0xC323FB84, + 0xC324FB84, 0xC325FB84, 0xC326FB84, 0xC327FB84, 0xC328FB84, 0xC329FB84, 0xC32AFB84, 0xC32BFB84, 0xC32CFB84, 0xC32DFB84, 0xC32EFB84, 0xC32FFB84, 0xC330FB84, 0xC331FB84, 0xC332FB84, + 0xC333FB84, 0xC334FB84, 0xC335FB84, 0xC336FB84, 0xC337FB84, 0xC338FB84, 0xC339FB84, 0xC33AFB84, 0xC33BFB84, 0xC33CFB84, 0xC33DFB84, 0xC33EFB84, 0xC33FFB84, 0xC340FB84, 0xC341FB84, + 0xC342FB84, 0xC343FB84, 0xC344FB84, 0xC345FB84, 0xC346FB84, 0xC347FB84, 0xC348FB84, 0xC349FB84, 0xC34AFB84, 0xC34BFB84, 0xC34CFB84, 0xC34DFB84, 0xC34EFB84, 0xC34FFB84, 0xC350FB84, + 0xC351FB84, 0xC352FB84, 0xC353FB84, 0xC354FB84, 0xC355FB84, 0xC356FB84, 0xC357FB84, 0xC358FB84, 0xC359FB84, 0xC35AFB84, 0xC35BFB84, 0xC35CFB84, 0xC35DFB84, 0xC35EFB84, 0xC35FFB84, + 0xC360FB84, 0xC361FB84, 0xC362FB84, 0xC363FB84, 0xC364FB84, 0xC365FB84, 0xC366FB84, 0xC367FB84, 0xC368FB84, 0xC369FB84, 0xC36AFB84, 0xC36BFB84, 0xC36CFB84, 0xC36DFB84, 0xC36EFB84, + 0xC36FFB84, 0xC370FB84, 0xC371FB84, 0xC372FB84, 0xC373FB84, 0xC374FB84, 0xC375FB84, 0xC376FB84, 0xC377FB84, 0xC378FB84, 0xC379FB84, 0xC37AFB84, 0xC37BFB84, 0xC37CFB84, 0xC37DFB84, + 0xC37EFB84, 0xC37FFB84, 0xC380FB84, 0xC381FB84, 0xC382FB84, 0xC383FB84, 0xC384FB84, 0xC385FB84, 0xC386FB84, 0xC387FB84, 0xC388FB84, 0xC389FB84, 0xC38AFB84, 0xC38BFB84, 0xC38CFB84, + 0xC38DFB84, 0xC38EFB84, 0xC38FFB84, 0xC390FB84, 0xC391FB84, 0xC392FB84, 0xC393FB84, 0xC394FB84, 0xC395FB84, 0xC396FB84, 0xC397FB84, 0xC398FB84, 0xC399FB84, 0xC39AFB84, 0xC39BFB84, + 0xC39CFB84, 0xC39DFB84, 0xC39EFB84, 0xC39FFB84, 0xC3A0FB84, 0xC3A1FB84, 0xC3A2FB84, 0xC3A3FB84, 0xC3A4FB84, 0xC3A5FB84, 0xC3A6FB84, 0xC3A7FB84, 0xC3A8FB84, 0xC3A9FB84, 0xC3AAFB84, + 0xC3ABFB84, 0xC3ACFB84, 0xC3ADFB84, 0xC3AEFB84, 0xC3AFFB84, 0xC3B0FB84, 0xC3B1FB84, 0xC3B2FB84, 0xC3B3FB84, 0xC3B4FB84, 0xC3B5FB84, 0xC3B6FB84, 0xC3B7FB84, 0xC3B8FB84, 0xC3B9FB84, + 0xC3BAFB84, 0xC3BBFB84, 0xC3BCFB84, 0xC3BDFB84, 0xC3BEFB84, 0xC3BFFB84, 0xC3C0FB84, 0xC3C1FB84, 0xC3C2FB84, 0xC3C3FB84, 0xC3C4FB84, 0xC3C5FB84, 0xC3C6FB84, 0xC3C7FB84, 0xC3C8FB84, + 0xC3C9FB84, 0xC3CAFB84, 0xC3CBFB84, 0xC3CCFB84, 0xC3CDFB84, 0xC3CEFB84, 0xC3CFFB84, 0xC3D0FB84, 0xC3D1FB84, 0xC3D2FB84, 0xC3D3FB84, 0xC3D4FB84, 0xC3D5FB84, 0xC3D6FB84, 0xC3D7FB84, + 0xC3D8FB84, 0xC3D9FB84, 0xC3DAFB84, 0xC3DBFB84, 0xC3DCFB84, 0xC3DDFB84, 0xC3DEFB84, 0xC3DFFB84, 0xC3E0FB84, 0xC3E1FB84, 0xC3E2FB84, 0xC3E3FB84, 0xC3E4FB84, 0xC3E5FB84, 0xC3E6FB84, + 0xC3E7FB84, 0xC3E8FB84, 0xC3E9FB84, 0xC3EAFB84, 0xC3EBFB84, 0xC3ECFB84, 0xC3EDFB84, 0xC3EEFB84, 0xC3EFFB84, 0xC3F0FB84, 0xC3F1FB84, 0xC3F2FB84, 0xC3F3FB84, 0xC3F4FB84, 0xC3F5FB84, + 0xC3F6FB84, 0xC3F7FB84, 0xC3F8FB84, 0xC3F9FB84, 0xC3FAFB84, 0xC3FBFB84, 0xC3FCFB84, 0xC3FDFB84, 0xC3FEFB84, 0xC3FFFB84, 0xC400FB84, 0xC401FB84, 0xC402FB84, 0xC403FB84, 0xC404FB84, + 0xC405FB84, 0xC406FB84, 0xC407FB84, 0xC408FB84, 0xC409FB84, 0xC40AFB84, 0xC40BFB84, 0xC40CFB84, 0xC40DFB84, 0xC40EFB84, 0xC40FFB84, 0xC410FB84, 0xC411FB84, 0xC412FB84, 0xC413FB84, + 0xC414FB84, 0xC415FB84, 0xC416FB84, 0xC417FB84, 0xC418FB84, 0xC419FB84, 0xC41AFB84, 0xC41BFB84, 0xC41CFB84, 0xC41DFB84, 0xC41EFB84, 0xC41FFB84, 0xC420FB84, 0xC421FB84, 0xC422FB84, + 0xC423FB84, 0xC424FB84, 0xC425FB84, 0xC426FB84, 0xC427FB84, 0xC428FB84, 0xC429FB84, 0xC42AFB84, 0xC42BFB84, 0xC42CFB84, 0xC42DFB84, 0xC42EFB84, 0xC42FFB84, 0xC430FB84, 0xC431FB84, + 0xC432FB84, 0xC433FB84, 0xC434FB84, 0xC435FB84, 0xC436FB84, 0xC437FB84, 0xC438FB84, 0xC439FB84, 0xC43AFB84, 0xC43BFB84, 0xC43CFB84, 0xC43DFB84, 0xC43EFB84, 0xC43FFB84, 0xC440FB84, + 0xC441FB84, 0xC442FB84, 0xC443FB84, 0xC444FB84, 0xC445FB84, 0xC446FB84, 0xC447FB84, 0xC448FB84, 0xC449FB84, 0xC44AFB84, 0xC44BFB84, 0xC44CFB84, 0xC44DFB84, 0xC44EFB84, 0xC44FFB84, + 0xC450FB84, 0xC451FB84, 0xC452FB84, 0xC453FB84, 0xC454FB84, 0xC455FB84, 0xC456FB84, 0xC457FB84, 0xC458FB84, 0xC459FB84, 0xC45AFB84, 0xC45BFB84, 0xC45CFB84, 0xC45DFB84, 0xC45EFB84, + 0xC45FFB84, 0xC460FB84, 0xC461FB84, 0xC462FB84, 0xC463FB84, 0xC464FB84, 0xC465FB84, 0xC466FB84, 0xC467FB84, 0xC468FB84, 0xC469FB84, 0xC46AFB84, 0xC46BFB84, 0xC46CFB84, 0xC46DFB84, + 0xC46EFB84, 0xC46FFB84, 0xC470FB84, 0xC471FB84, 0xC472FB84, 0xC473FB84, 0xC474FB84, 0xC475FB84, 0xC476FB84, 0xC477FB84, 0xC478FB84, 0xC479FB84, 0xC47AFB84, 0xC47BFB84, 0xC47CFB84, + 0xC47DFB84, 0xC47EFB84, 0xC47FFB84, 0xC480FB84, 0xC481FB84, 0xC482FB84, 0xC483FB84, 0xC484FB84, 0xC485FB84, 0xC486FB84, 0xC487FB84, 0xC488FB84, 0xC489FB84, 0xC48AFB84, 0xC48BFB84, + 0xC48CFB84, 0xC48DFB84, 0xC48EFB84, 0xC48FFB84, 0xC490FB84, 0xC491FB84, 0xC492FB84, 0xC493FB84, 0xC494FB84, 0xC495FB84, 0xC496FB84, 0xC497FB84, 0xC498FB84, 0xC499FB84, 0xC49AFB84, + 0xC49BFB84, 0xC49CFB84, 0xC49DFB84, 0xC49EFB84, 0xC49FFB84, 0xC4A0FB84, 0xC4A1FB84, 0xC4A2FB84, 0xC4A3FB84, 0xC4A4FB84, 0xC4A5FB84, 0xC4A6FB84, 0xC4A7FB84, 0xC4A8FB84, 0xC4A9FB84, + 0xC4AAFB84, 0xC4ABFB84, 0xC4ACFB84, 0xC4ADFB84, 0xC4AEFB84, 0xC4AFFB84, 0xC4B0FB84, 0xC4B1FB84, 0xC4B2FB84, 0xC4B3FB84, 0xC4B4FB84, 0xC4B5FB84, 0xC4B6FB84, 0xC4B7FB84, 0xC4B8FB84, + 0xC4B9FB84, 0xC4BAFB84, 0xC4BBFB84, 0xC4BCFB84, 0xC4BDFB84, 0xC4BEFB84, 0xC4BFFB84, 0xC4C0FB84, 0xC4C1FB84, 0xC4C2FB84, 0xC4C3FB84, 0xC4C4FB84, 0xC4C5FB84, 0xC4C6FB84, 0xC4C7FB84, + 0xC4C8FB84, 0xC4C9FB84, 0xC4CAFB84, 0xC4CBFB84, 0xC4CCFB84, 0xC4CDFB84, 0xC4CEFB84, 0xC4CFFB84, 0xC4D0FB84, 0xC4D1FB84, 0xC4D2FB84, 0xC4D3FB84, 0xC4D4FB84, 0xC4D5FB84, 0xC4D6FB84, + 0xC4D7FB84, 0xC4D8FB84, 0xC4D9FB84, 0xC4DAFB84, 0xC4DBFB84, 0xC4DCFB84, 0xC4DDFB84, 0xC4DEFB84, 0xC4DFFB84, 0xC4E0FB84, 0xC4E1FB84, 0xC4E2FB84, 0xC4E3FB84, 0xC4E4FB84, 0xC4E5FB84, + 0xC4E6FB84, 0xC4E7FB84, 0xC4E8FB84, 0xC4E9FB84, 0xC4EAFB84, 0xC4EBFB84, 0xC4ECFB84, 0xC4EDFB84, 0xC4EEFB84, 0xC4EFFB84, 0xC4F0FB84, 0xC4F1FB84, 0xC4F2FB84, 0xC4F3FB84, 0xC4F4FB84, + 0xC4F5FB84, 0xC4F6FB84, 0xC4F7FB84, 0xC4F8FB84, 0xC4F9FB84, 0xC4FAFB84, 0xC4FBFB84, 0xC4FCFB84, 0xC4FDFB84, 0xC4FEFB84, 0xC4FFFB84, 0xC500FB84, 0xC501FB84, 0xC502FB84, 0xC503FB84, + 0xC504FB84, 0xC505FB84, 0xC506FB84, 0xC507FB84, 0xC508FB84, 0xC509FB84, 0xC50AFB84, 0xC50BFB84, 0xC50CFB84, 0xC50DFB84, 0xC50EFB84, 0xC50FFB84, 0xC510FB84, 0xC511FB84, 0xC512FB84, + 0xC513FB84, 0xC514FB84, 0xC515FB84, 0xC516FB84, 0xC517FB84, 0xC518FB84, 0xC519FB84, 0xC51AFB84, 0xC51BFB84, 0xC51CFB84, 0xC51DFB84, 0xC51EFB84, 0xC51FFB84, 0xC520FB84, 0xC521FB84, + 0xC522FB84, 0xC523FB84, 0xC524FB84, 0xC525FB84, 0xC526FB84, 0xC527FB84, 0xC528FB84, 0xC529FB84, 0xC52AFB84, 0xC52BFB84, 0xC52CFB84, 0xC52DFB84, 0xC52EFB84, 0xC52FFB84, 0xC530FB84, + 0xC531FB84, 0xC532FB84, 0xC533FB84, 0xC534FB84, 0xC535FB84, 0xC536FB84, 0xC537FB84, 0xC538FB84, 0xC539FB84, 0xC53AFB84, 0xC53BFB84, 0xC53CFB84, 0xC53DFB84, 0xC53EFB84, 0xC53FFB84, + 0xC540FB84, 0xC541FB84, 0xC542FB84, 0xC543FB84, 0xC544FB84, 0xC545FB84, 0xC546FB84, 0xC547FB84, 0xC548FB84, 0xC549FB84, 0xC54AFB84, 0xC54BFB84, 0xC54CFB84, 0xC54DFB84, 0xC54EFB84, + 0xC54FFB84, 0xC550FB84, 0xC551FB84, 0xC552FB84, 0xC553FB84, 0xC554FB84, 0xC555FB84, 0xC556FB84, 0xC557FB84, 0xC558FB84, 0xC559FB84, 0xC55AFB84, 0xC55BFB84, 0xC55CFB84, 0xC55DFB84, + 0xC55EFB84, 0xC55FFB84, 0xC560FB84, 0xC561FB84, 0xC562FB84, 0xC563FB84, 0xC564FB84, 0xC565FB84, 0xC566FB84, 0xC567FB84, 0xC568FB84, 0xC569FB84, 0xC56AFB84, 0xC56BFB84, 0xC56CFB84, + 0xC56DFB84, 0xC56EFB84, 0xC56FFB84, 0xC570FB84, 0xC571FB84, 0xC572FB84, 0xC573FB84, 0xC574FB84, 0xC575FB84, 0xC576FB84, 0xC577FB84, 0xC578FB84, 0xC579FB84, 0xC57AFB84, 0xC57BFB84, + 0xC57CFB84, 0xC57DFB84, 0xC57EFB84, 0xC57FFB84, 0xC580FB84, 0xC581FB84, 0xC582FB84, 0xC583FB84, 0xC584FB84, 0xC585FB84, 0xC586FB84, 0xC587FB84, 0xC588FB84, 0xC589FB84, 0xC58AFB84, + 0xC58BFB84, 0xC58CFB84, 0xC58DFB84, 0xC58EFB84, 0xC58FFB84, 0xC590FB84, 0xC591FB84, 0xC592FB84, 0xC593FB84, 0xC594FB84, 0xC595FB84, 0xC596FB84, 0xC597FB84, 0xC598FB84, 0xC599FB84, + 0xC59AFB84, 0xC59BFB84, 0xC59CFB84, 0xC59DFB84, 0xC59EFB84, 0xC59FFB84, 0xC5A0FB84, 0xC5A1FB84, 0xC5A2FB84, 0xC5A3FB84, 0xC5A4FB84, 0xC5A5FB84, 0xC5A6FB84, 0xC5A7FB84, 0xC5A8FB84, + 0xC5A9FB84, 0xC5AAFB84, 0xC5ABFB84, 0xC5ACFB84, 0xC5ADFB84, 0xC5AEFB84, 0xC5AFFB84, 0xC5B0FB84, 0xC5B1FB84, 0xC5B2FB84, 0xC5B3FB84, 0xC5B4FB84, 0xC5B5FB84, 0xC5B6FB84, 0xC5B7FB84, + 0xC5B8FB84, 0xC5B9FB84, 0xC5BAFB84, 0xC5BBFB84, 0xC5BCFB84, 0xC5BDFB84, 0xC5BEFB84, 0xC5BFFB84, 0xC5C0FB84, 0xC5C1FB84, 0xC5C2FB84, 0xC5C3FB84, 0xC5C4FB84, 0xC5C5FB84, 0xC5C6FB84, + 0xC5C7FB84, 0xC5C8FB84, 0xC5C9FB84, 0xC5CAFB84, 0xC5CBFB84, 0xC5CCFB84, 0xC5CDFB84, 0xC5CEFB84, 0xC5CFFB84, 0xC5D0FB84, 0xC5D1FB84, 0xC5D2FB84, 0xC5D3FB84, 0xC5D4FB84, 0xC5D5FB84, + 0xC5D6FB84, 0xC5D7FB84, 0xC5D8FB84, 0xC5D9FB84, 0xC5DAFB84, 0xC5DBFB84, 0xC5DCFB84, 0xC5DDFB84, 0xC5DEFB84, 0xC5DFFB84, 0xC5E0FB84, 0xC5E1FB84, 0xC5E2FB84, 0xC5E3FB84, 0xC5E4FB84, + 0xC5E5FB84, 0xC5E6FB84, 0xC5E7FB84, 0xC5E8FB84, 0xC5E9FB84, 0xC5EAFB84, 0xC5EBFB84, 0xC5ECFB84, 0xC5EDFB84, 0xC5EEFB84, 0xC5EFFB84, 0xC5F0FB84, 0xC5F1FB84, 0xC5F2FB84, 0xC5F3FB84, + 0xC5F4FB84, 0xC5F5FB84, 0xC5F6FB84, 0xC5F7FB84, 0xC5F8FB84, 0xC5F9FB84, 0xC5FAFB84, 0xC5FBFB84, 0xC5FCFB84, 0xC5FDFB84, 0xC5FEFB84, 0xC5FFFB84, 0xC600FB84, 0xC601FB84, 0xC602FB84, + 0xC603FB84, 0xC604FB84, 0xC605FB84, 0xC606FB84, 0xC607FB84, 0xC608FB84, 0xC609FB84, 0xC60AFB84, 0xC60BFB84, 0xC60CFB84, 0xC60DFB84, 0xC60EFB84, 0xC60FFB84, 0xC610FB84, 0xC611FB84, + 0xC612FB84, 0xC613FB84, 0xC614FB84, 0xC615FB84, 0xC616FB84, 0xC617FB84, 0xC618FB84, 0xC619FB84, 0xC61AFB84, 0xC61BFB84, 0xC61CFB84, 0xC61DFB84, 0xC61EFB84, 0xC61FFB84, 0xC620FB84, + 0xC621FB84, 0xC622FB84, 0xC623FB84, 0xC624FB84, 0xC625FB84, 0xC626FB84, 0xC627FB84, 0xC628FB84, 0xC629FB84, 0xC62AFB84, 0xC62BFB84, 0xC62CFB84, 0xC62DFB84, 0xC62EFB84, 0xC62FFB84, + 0xC630FB84, 0xC631FB84, 0xC632FB84, 0xC633FB84, 0xC634FB84, 0xC635FB84, 0xC636FB84, 0xC637FB84, 0xC638FB84, 0xC639FB84, 0xC63AFB84, 0xC63BFB84, 0xC63CFB84, 0xC63DFB84, 0xC63EFB84, + 0xC63FFB84, 0xC640FB84, 0xC641FB84, 0xC642FB84, 0xC643FB84, 0xC644FB84, 0xC645FB84, 0xC646FB84, 0xC647FB84, 0xC648FB84, 0xC649FB84, 0xC64AFB84, 0xC64BFB84, 0xC64CFB84, 0xC64DFB84, + 0xC64EFB84, 0xC64FFB84, 0xC650FB84, 0xC651FB84, 0xC652FB84, 0xC653FB84, 0xC654FB84, 0xC655FB84, 0xC656FB84, 0xC657FB84, 0xC658FB84, 0xC659FB84, 0xC65AFB84, 0xC65BFB84, 0xC65CFB84, + 0xC65DFB84, 0xC65EFB84, 0xC65FFB84, 0xC660FB84, 0xC661FB84, 0xC662FB84, 0xC663FB84, 0xC664FB84, 0xC665FB84, 0xC666FB84, 0xC667FB84, 0xC668FB84, 0xC669FB84, 0xC66AFB84, 0xC66BFB84, + 0xC66CFB84, 0xC66DFB84, 0xC66EFB84, 0xC66FFB84, 0xC670FB84, 0xC671FB84, 0xC672FB84, 0xC673FB84, 0xC674FB84, 0xC675FB84, 0xC676FB84, 0xC677FB84, 0xC678FB84, 0xC679FB84, 0xC67AFB84, + 0xC67BFB84, 0xC67CFB84, 0xC67DFB84, 0xC67EFB84, 0xC67FFB84, 0xC680FB84, 0xC681FB84, 0xC682FB84, 0xC683FB84, 0xC684FB84, 0xC685FB84, 0xC686FB84, 0xC687FB84, 0xC688FB84, 0xC689FB84, + 0xC68AFB84, 0xC68BFB84, 0xC68CFB84, 0xC68DFB84, 0xC68EFB84, 0xC68FFB84, 0xC690FB84, 0xC691FB84, 0xC692FB84, 0xC693FB84, 0xC694FB84, 0xC695FB84, 0xC696FB84, 0xC697FB84, 0xC698FB84, + 0xC699FB84, 0xC69AFB84, 0xC69BFB84, 0xC69CFB84, 0xC69DFB84, 0xC69EFB84, 0xC69FFB84, 0xC6A0FB84, 0xC6A1FB84, 0xC6A2FB84, 0xC6A3FB84, 0xC6A4FB84, 0xC6A5FB84, 0xC6A6FB84, 0xC6A7FB84, + 0xC6A8FB84, 0xC6A9FB84, 0xC6AAFB84, 0xC6ABFB84, 0xC6ACFB84, 0xC6ADFB84, 0xC6AEFB84, 0xC6AFFB84, 0xC6B0FB84, 0xC6B1FB84, 0xC6B2FB84, 0xC6B3FB84, 0xC6B4FB84, 0xC6B5FB84, 0xC6B6FB84, + 0xC6B7FB84, 0xC6B8FB84, 0xC6B9FB84, 0xC6BAFB84, 0xC6BBFB84, 0xC6BCFB84, 0xC6BDFB84, 0xC6BEFB84, 0xC6BFFB84, 0xC6C0FB84, 0xC6C1FB84, 0xC6C2FB84, 0xC6C3FB84, 0xC6C4FB84, 0xC6C5FB84, + 0xC6C6FB84, 0xC6C7FB84, 0xC6C8FB84, 0xC6C9FB84, 0xC6CAFB84, 0xC6CBFB84, 0xC6CCFB84, 0xC6CDFB84, 0xC6CEFB84, 0xC6CFFB84, 0xC6D0FB84, 0xC6D1FB84, 0xC6D2FB84, 0xC6D3FB84, 0xC6D4FB84, + 0xC6D5FB84, 0xC6D6FB84, 0xC6D7FB84, 0xC6D8FB84, 0xC6D9FB84, 0xC6DAFB84, 0xC6DBFB84, 0xC6DCFB84, 0xC6DDFB84, 0xC6DEFB84, 0xC6DFFB84, 0xC6E0FB84, 0xC6E1FB84, 0xC6E2FB84, 0xC6E3FB84, + 0xC6E4FB84, 0xC6E5FB84, 0xC6E6FB84, 0xC6E7FB84, 0xC6E8FB84, 0xC6E9FB84, 0xC6EAFB84, 0xC6EBFB84, 0xC6ECFB84, 0xC6EDFB84, 0xC6EEFB84, 0xC6EFFB84, 0xC6F0FB84, 0xC6F1FB84, 0xC6F2FB84, + 0xC6F3FB84, 0xC6F4FB84, 0xC6F5FB84, 0xC6F6FB84, 0xC6F7FB84, 0xC6F8FB84, 0xC6F9FB84, 0xC6FAFB84, 0xC6FBFB84, 0xC6FCFB84, 0xC6FDFB84, 0xC6FEFB84, 0xC6FFFB84, 0xC700FB84, 0xC701FB84, + 0xC702FB84, 0xC703FB84, 0xC704FB84, 0xC705FB84, 0xC706FB84, 0xC707FB84, 0xC708FB84, 0xC709FB84, 0xC70AFB84, 0xC70BFB84, 0xC70CFB84, 0xC70DFB84, 0xC70EFB84, 0xC70FFB84, 0xC710FB84, + 0xC711FB84, 0xC712FB84, 0xC713FB84, 0xC714FB84, 0xC715FB84, 0xC716FB84, 0xC717FB84, 0xC718FB84, 0xC719FB84, 0xC71AFB84, 0xC71BFB84, 0xC71CFB84, 0xC71DFB84, 0xC71EFB84, 0xC71FFB84, + 0xC720FB84, 0xC721FB84, 0xC722FB84, 0xC723FB84, 0xC724FB84, 0xC725FB84, 0xC726FB84, 0xC727FB84, 0xC728FB84, 0xC729FB84, 0xC72AFB84, 0xC72BFB84, 0xC72CFB84, 0xC72DFB84, 0xC72EFB84, + 0xC72FFB84, 0xC730FB84, 0xC731FB84, 0xC732FB84, 0xC733FB84, 0xC734FB84, 0xC735FB84, 0xC736FB84, 0xC737FB84, 0xC738FB84, 0xC739FB84, 0xC73AFB84, 0xC73BFB84, 0xC73CFB84, 0xC73DFB84, + 0xC73EFB84, 0xC73FFB84, 0xC740FB84, 0xC741FB84, 0xC742FB84, 0xC743FB84, 0xC744FB84, 0xC745FB84, 0xC746FB84, 0xC747FB84, 0xC748FB84, 0xC749FB84, 0xC74AFB84, 0xC74BFB84, 0xC74CFB84, + 0xC74DFB84, 0xC74EFB84, 0xC74FFB84, 0xC750FB84, 0xC751FB84, 0xC752FB84, 0xC753FB84, 0xC754FB84, 0xC755FB84, 0xC756FB84, 0xC757FB84, 0xC758FB84, 0xC759FB84, 0xC75AFB84, 0xC75BFB84, + 0xC75CFB84, 0xC75DFB84, 0xC75EFB84, 0xC75FFB84, 0xC760FB84, 0xC761FB84, 0xC762FB84, 0xC763FB84, 0xC764FB84, 0xC765FB84, 0xC766FB84, 0xC767FB84, 0xC768FB84, 0xC769FB84, 0xC76AFB84, + 0xC76BFB84, 0xC76CFB84, 0xC76DFB84, 0xC76EFB84, 0xC76FFB84, 0xC770FB84, 0xC771FB84, 0xC772FB84, 0xC773FB84, 0xC774FB84, 0xC775FB84, 0xC776FB84, 0xC777FB84, 0xC778FB84, 0xC779FB84, + 0xC77AFB84, 0xC77BFB84, 0xC77CFB84, 0xC77DFB84, 0xC77EFB84, 0xC77FFB84, 0xC780FB84, 0xC781FB84, 0xC782FB84, 0xC783FB84, 0xC784FB84, 0xC785FB84, 0xC786FB84, 0xC787FB84, 0xC788FB84, + 0xC789FB84, 0xC78AFB84, 0xC78BFB84, 0xC78CFB84, 0xC78DFB84, 0xC78EFB84, 0xC78FFB84, 0xC790FB84, 0xC791FB84, 0xC792FB84, 0xC793FB84, 0xC794FB84, 0xC795FB84, 0xC796FB84, 0xC797FB84, + 0xC798FB84, 0xC799FB84, 0xC79AFB84, 0xC79BFB84, 0xC79CFB84, 0xC79DFB84, 0xC79EFB84, 0xC79FFB84, 0xC7A0FB84, 0xC7A1FB84, 0xC7A2FB84, 0xC7A3FB84, 0xC7A4FB84, 0xC7A5FB84, 0xC7A6FB84, + 0xC7A7FB84, 0xC7A8FB84, 0xC7A9FB84, 0xC7AAFB84, 0xC7ABFB84, 0xC7ACFB84, 0xC7ADFB84, 0xC7AEFB84, 0xC7AFFB84, 0xC7B0FB84, 0xC7B1FB84, 0xC7B2FB84, 0xC7B3FB84, 0xC7B4FB84, 0xC7B5FB84, + 0xC7B6FB84, 0xC7B7FB84, 0xC7B8FB84, 0xC7B9FB84, 0xC7BAFB84, 0xC7BBFB84, 0xC7BCFB84, 0xC7BDFB84, 0xC7BEFB84, 0xC7BFFB84, 0xC7C0FB84, 0xC7C1FB84, 0xC7C2FB84, 0xC7C3FB84, 0xC7C4FB84, + 0xC7C5FB84, 0xC7C6FB84, 0xC7C7FB84, 0xC7C8FB84, 0xC7C9FB84, 0xC7CAFB84, 0xC7CBFB84, 0xC7CCFB84, 0xC7CDFB84, 0xC7CEFB84, 0xC7CFFB84, 0xC7D0FB84, 0xC7D1FB84, 0xC7D2FB84, 0xC7D3FB84, + 0xC7D4FB84, 0xC7D5FB84, 0xC7D6FB84, 0xC7D7FB84, 0xC7D8FB84, 0xC7D9FB84, 0xC7DAFB84, 0xC7DBFB84, 0xC7DCFB84, 0xC7DDFB84, 0xC7DEFB84, 0xC7DFFB84, 0xC7E0FB84, 0xC7E1FB84, 0xC7E2FB84, + 0xC7E3FB84, 0xC7E4FB84, 0xC7E5FB84, 0xC7E6FB84, 0xC7E7FB84, 0xC7E8FB84, 0xC7E9FB84, 0xC7EAFB84, 0xC7EBFB84, 0xC7ECFB84, 0xC7EDFB84, 0xC7EEFB84, 0xC7EFFB84, 0xC7F0FB84, 0xC7F1FB84, + 0xC7F2FB84, 0xC7F3FB84, 0xC7F4FB84, 0xC7F5FB84, 0xC7F6FB84, 0xC7F7FB84, 0xC7F8FB84, 0xC7F9FB84, 0xC7FAFB84, 0xC7FBFB84, 0xC7FCFB84, 0xC7FDFB84, 0xC7FEFB84, 0xC7FFFB84, 0xC800FB84, + 0xC801FB84, 0xC802FB84, 0xC803FB84, 0xC804FB84, 0xC805FB84, 0xC806FB84, 0xC807FB84, 0xC808FB84, 0xC809FB84, 0xC80AFB84, 0xC80BFB84, 0xC80CFB84, 0xC80DFB84, 0xC80EFB84, 0xC80FFB84, + 0xC810FB84, 0xC811FB84, 0xC812FB84, 0xC813FB84, 0xC814FB84, 0xC815FB84, 0xC816FB84, 0xC817FB84, 0xC818FB84, 0xC819FB84, 0xC81AFB84, 0xC81BFB84, 0xC81CFB84, 0xC81DFB84, 0xC81EFB84, + 0xC81FFB84, 0xC820FB84, 0xC821FB84, 0xC822FB84, 0xC823FB84, 0xC824FB84, 0xC825FB84, 0xC826FB84, 0xC827FB84, 0xC828FB84, 0xC829FB84, 0xC82AFB84, 0xC82BFB84, 0xC82CFB84, 0xC82DFB84, + 0xC82EFB84, 0xC82FFB84, 0xC830FB84, 0xC831FB84, 0xC832FB84, 0xC833FB84, 0xC834FB84, 0xC835FB84, 0xC836FB84, 0xC837FB84, 0xC838FB84, 0xC839FB84, 0xC83AFB84, 0xC83BFB84, 0xC83CFB84, + 0xC83DFB84, 0xC83EFB84, 0xC83FFB84, 0xC840FB84, 0xC841FB84, 0xC842FB84, 0xC843FB84, 0xC844FB84, 0xC845FB84, 0xC846FB84, 0xC847FB84, 0xC848FB84, 0xC849FB84, 0xC84AFB84, 0xC84BFB84, + 0xC84CFB84, 0xC84DFB84, 0xC84EFB84, 0xC84FFB84, 0xC850FB84, 0xC851FB84, 0xC852FB84, 0xC853FB84, 0xC854FB84, 0xC855FB84, 0xC856FB84, 0xC857FB84, 0xC858FB84, 0xC859FB84, 0xC85AFB84, + 0xC85BFB84, 0xC85CFB84, 0xC85DFB84, 0xC85EFB84, 0xC85FFB84, 0xC860FB84, 0xC861FB84, 0xC862FB84, 0xC863FB84, 0xC864FB84, 0xC865FB84, 0xC866FB84, 0xC867FB84, 0xC868FB84, 0xC869FB84, + 0xC86AFB84, 0xC86BFB84, 0xC86CFB84, 0xC86DFB84, 0xC86EFB84, 0xC86FFB84, 0xC870FB84, 0xC871FB84, 0xC872FB84, 0xC873FB84, 0xC874FB84, 0xC875FB84, 0xC876FB84, 0xC877FB84, 0xC878FB84, + 0xC879FB84, 0xC87AFB84, 0xC87BFB84, 0xC87CFB84, 0xC87DFB84, 0xC87EFB84, 0xC87FFB84, 0xC880FB84, 0xC881FB84, 0xC882FB84, 0xC883FB84, 0xC884FB84, 0xC885FB84, 0xC886FB84, 0xC887FB84, + 0xC888FB84, 0xC889FB84, 0xC88AFB84, 0xC88BFB84, 0xC88CFB84, 0xC88DFB84, 0xC88EFB84, 0xC88FFB84, 0xC890FB84, 0xC891FB84, 0xC892FB84, 0xC893FB84, 0xC894FB84, 0xC895FB84, 0xC896FB84, + 0xC897FB84, 0xC898FB84, 0xC899FB84, 0xC89AFB84, 0xC89BFB84, 0xC89CFB84, 0xC89DFB84, 0xC89EFB84, 0xC89FFB84, 0xC8A0FB84, 0xC8A1FB84, 0xC8A2FB84, 0xC8A3FB84, 0xC8A4FB84, 0xC8A5FB84, + 0xC8A6FB84, 0xC8A7FB84, 0xC8A8FB84, 0xC8A9FB84, 0xC8AAFB84, 0xC8ABFB84, 0xC8ACFB84, 0xC8ADFB84, 0xC8AEFB84, 0xC8AFFB84, 0xC8B0FB84, 0xC8B1FB84, 0xC8B2FB84, 0xC8B3FB84, 0xC8B4FB84, + 0xC8B5FB84, 0xC8B6FB84, 0xC8B7FB84, 0xC8B8FB84, 0xC8B9FB84, 0xC8BAFB84, 0xC8BBFB84, 0xC8BCFB84, 0xC8BDFB84, 0xC8BEFB84, 0xC8BFFB84, 0xC8C0FB84, 0xC8C1FB84, 0xC8C2FB84, 0xC8C3FB84, + 0xC8C4FB84, 0xC8C5FB84, 0xC8C6FB84, 0xC8C7FB84, 0xC8C8FB84, 0xC8C9FB84, 0xC8CAFB84, 0xC8CBFB84, 0xC8CCFB84, 0xC8CDFB84, 0xC8CEFB84, 0xC8CFFB84, 0xC8D0FB84, 0xC8D1FB84, 0xC8D2FB84, + 0xC8D3FB84, 0xC8D4FB84, 0xC8D5FB84, 0xC8D6FB84, 0xC8D7FB84, 0xC8D8FB84, 0xC8D9FB84, 0xC8DAFB84, 0xC8DBFB84, 0xC8DCFB84, 0xC8DDFB84, 0xC8DEFB84, 0xC8DFFB84, 0xC8E0FB84, 0xC8E1FB84, + 0xC8E2FB84, 0xC8E3FB84, 0xC8E4FB84, 0xC8E5FB84, 0xC8E6FB84, 0xC8E7FB84, 0xC8E8FB84, 0xC8E9FB84, 0xC8EAFB84, 0xC8EBFB84, 0xC8ECFB84, 0xC8EDFB84, 0xC8EEFB84, 0xC8EFFB84, 0xC8F0FB84, + 0xC8F1FB84, 0xC8F2FB84, 0xC8F3FB84, 0xC8F4FB84, 0xC8F5FB84, 0xC8F6FB84, 0xC8F7FB84, 0xC8F8FB84, 0xC8F9FB84, 0xC8FAFB84, 0xC8FBFB84, 0xC8FCFB84, 0xC8FDFB84, 0xC8FEFB84, 0xC8FFFB84, + 0xC900FB84, 0xC901FB84, 0xC902FB84, 0xC903FB84, 0xC904FB84, 0xC905FB84, 0xC906FB84, 0xC907FB84, 0xC908FB84, 0xC909FB84, 0xC90AFB84, 0xC90BFB84, 0xC90CFB84, 0xC90DFB84, 0xC90EFB84, + 0xC90FFB84, 0xC910FB84, 0xC911FB84, 0xC912FB84, 0xC913FB84, 0xC914FB84, 0xC915FB84, 0xC916FB84, 0xC917FB84, 0xC918FB84, 0xC919FB84, 0xC91AFB84, 0xC91BFB84, 0xC91CFB84, 0xC91DFB84, + 0xC91EFB84, 0xC91FFB84, 0xC920FB84, 0xC921FB84, 0xC922FB84, 0xC923FB84, 0xC924FB84, 0xC925FB84, 0xC926FB84, 0xC927FB84, 0xC928FB84, 0xC929FB84, 0xC92AFB84, 0xC92BFB84, 0xC92CFB84, + 0xC92DFB84, 0xC92EFB84, 0xC92FFB84, 0xC930FB84, 0xC931FB84, 0xC932FB84, 0xC933FB84, 0xC934FB84, 0xC935FB84, 0xC936FB84, 0xC937FB84, 0xC938FB84, 0xC939FB84, 0xC93AFB84, 0xC93BFB84, + 0xC93CFB84, 0xC93DFB84, 0xC93EFB84, 0xC93FFB84, 0xC940FB84, 0xC941FB84, 0xC942FB84, 0xC943FB84, 0xC944FB84, 0xC945FB84, 0xC946FB84, 0xC947FB84, 0xC948FB84, 0xC949FB84, 0xC94AFB84, + 0xC94BFB84, 0xC94CFB84, 0xC94DFB84, 0xC94EFB84, 0xC94FFB84, 0xC950FB84, 0xC951FB84, 0xC952FB84, 0xC953FB84, 0xC954FB84, 0xC955FB84, 0xC956FB84, 0xC957FB84, 0xC958FB84, 0xC959FB84, + 0xC95AFB84, 0xC95BFB84, 0xC95CFB84, 0xC95DFB84, 0xC95EFB84, 0xC95FFB84, 0xC960FB84, 0xC961FB84, 0xC962FB84, 0xC963FB84, 0xC964FB84, 0xC965FB84, 0xC966FB84, 0xC967FB84, 0xC968FB84, + 0xC969FB84, 0xC96AFB84, 0xC96BFB84, 0xC96CFB84, 0xC96DFB84, 0xC96EFB84, 0xC96FFB84, 0xC970FB84, 0xC971FB84, 0xC972FB84, 0xC973FB84, 0xC974FB84, 0xC975FB84, 0xC976FB84, 0xC977FB84, + 0xC978FB84, 0xC979FB84, 0xC97AFB84, 0xC97BFB84, 0xC97CFB84, 0xC97DFB84, 0xC97EFB84, 0xC97FFB84, 0xC980FB84, 0xC981FB84, 0xC982FB84, 0xC983FB84, 0xC984FB84, 0xC985FB84, 0xC986FB84, + 0xC987FB84, 0xC988FB84, 0xC989FB84, 0xC98AFB84, 0xC98BFB84, 0xC98CFB84, 0xC98DFB84, 0xC98EFB84, 0xC98FFB84, 0xC990FB84, 0xC991FB84, 0xC992FB84, 0xC993FB84, 0xC994FB84, 0xC995FB84, + 0xC996FB84, 0xC997FB84, 0xC998FB84, 0xC999FB84, 0xC99AFB84, 0xC99BFB84, 0xC99CFB84, 0xC99DFB84, 0xC99EFB84, 0xC99FFB84, 0xC9A0FB84, 0xC9A1FB84, 0xC9A2FB84, 0xC9A3FB84, 0xC9A4FB84, + 0xC9A5FB84, 0xC9A6FB84, 0xC9A7FB84, 0xC9A8FB84, 0xC9A9FB84, 0xC9AAFB84, 0xC9ABFB84, 0xC9ACFB84, 0xC9ADFB84, 0xC9AEFB84, 0xC9AFFB84, 0xC9B0FB84, 0xC9B1FB84, 0xC9B2FB84, 0xC9B3FB84, + 0xC9B4FB84, 0xC9B5FB84, 0xC9B6FB84, 0xC9B7FB84, 0xC9B8FB84, 0xC9B9FB84, 0xC9BAFB84, 0xC9BBFB84, 0xC9BCFB84, 0xC9BDFB84, 0xC9BEFB84, 0xC9BFFB84, 0xC9C0FB84, 0xC9C1FB84, 0xC9C2FB84, + 0xC9C3FB84, 0xC9C4FB84, 0xC9C5FB84, 0xC9C6FB84, 0xC9C7FB84, 0xC9C8FB84, 0xC9C9FB84, 0xC9CAFB84, 0xC9CBFB84, 0xC9CCFB84, 0xC9CDFB84, 0xC9CEFB84, 0xC9CFFB84, 0xC9D0FB84, 0xC9D1FB84, + 0xC9D2FB84, 0xC9D3FB84, 0xC9D4FB84, 0xC9D5FB84, 0xC9D6FB84, 0xC9D7FB84, 0xC9D8FB84, 0xC9D9FB84, 0xC9DAFB84, 0xC9DBFB84, 0xC9DCFB84, 0xC9DDFB84, 0xC9DEFB84, 0xC9DFFB84, 0xC9E0FB84, + 0xC9E1FB84, 0xC9E2FB84, 0xC9E3FB84, 0xC9E4FB84, 0xC9E5FB84, 0xC9E6FB84, 0xC9E7FB84, 0xC9E8FB84, 0xC9E9FB84, 0xC9EAFB84, 0xC9EBFB84, 0xC9ECFB84, 0xC9EDFB84, 0xC9EEFB84, 0xC9EFFB84, + 0xC9F0FB84, 0xC9F1FB84, 0xC9F2FB84, 0xC9F3FB84, 0xC9F4FB84, 0xC9F5FB84, 0xC9F6FB84, 0xC9F7FB84, 0xC9F8FB84, 0xC9F9FB84, 0xC9FAFB84, 0xC9FBFB84, 0xC9FCFB84, 0xC9FDFB84, 0xC9FEFB84, + 0xC9FFFB84, 0xCA00FB84, 0xCA01FB84, 0xCA02FB84, 0xCA03FB84, 0xCA04FB84, 0xCA05FB84, 0xCA06FB84, 0xCA07FB84, 0xCA08FB84, 0xCA09FB84, 0xCA0AFB84, 0xCA0BFB84, 0xCA0CFB84, 0xCA0DFB84, + 0xCA0EFB84, 0xCA0FFB84, 0xCA10FB84, 0xCA11FB84, 0xCA12FB84, 0xCA13FB84, 0xCA14FB84, 0xCA15FB84, 0xCA16FB84, 0xCA17FB84, 0xCA18FB84, 0xCA19FB84, 0xCA1AFB84, 0xCA1BFB84, 0xCA1CFB84, + 0xCA1DFB84, 0xCA1EFB84, 0xCA1FFB84, 0xCA20FB84, 0xCA21FB84, 0xCA22FB84, 0xCA23FB84, 0xCA24FB84, 0xCA25FB84, 0xCA26FB84, 0xCA27FB84, 0xCA28FB84, 0xCA29FB84, 0xCA2AFB84, 0xCA2BFB84, + 0xCA2CFB84, 0xCA2DFB84, 0xCA2EFB84, 0xCA2FFB84, 0xCA30FB84, 0xCA31FB84, 0xCA32FB84, 0xCA33FB84, 0xCA34FB84, 0xCA35FB84, 0xCA36FB84, 0xCA37FB84, 0xCA38FB84, 0xCA39FB84, 0xCA3AFB84, + 0xCA3BFB84, 0xCA3CFB84, 0xCA3DFB84, 0xCA3EFB84, 0xCA3FFB84, 0xCA40FB84, 0xCA41FB84, 0xCA42FB84, 0xCA43FB84, 0xCA44FB84, 0xCA45FB84, 0xCA46FB84, 0xCA47FB84, 0xCA48FB84, 0xCA49FB84, + 0xCA4AFB84, 0xCA4BFB84, 0xCA4CFB84, 0xCA4DFB84, 0xCA4EFB84, 0xCA4FFB84, 0xCA50FB84, 0xCA51FB84, 0xCA52FB84, 0xCA53FB84, 0xCA54FB84, 0xCA55FB84, 0xCA56FB84, 0xCA57FB84, 0xCA58FB84, + 0xCA59FB84, 0xCA5AFB84, 0xCA5BFB84, 0xCA5CFB84, 0xCA5DFB84, 0xCA5EFB84, 0xCA5FFB84, 0xCA60FB84, 0xCA61FB84, 0xCA62FB84, 0xCA63FB84, 0xCA64FB84, 0xCA65FB84, 0xCA66FB84, 0xCA67FB84, + 0xCA68FB84, 0xCA69FB84, 0xCA6AFB84, 0xCA6BFB84, 0xCA6CFB84, 0xCA6DFB84, 0xCA6EFB84, 0xCA6FFB84, 0xCA70FB84, 0xCA71FB84, 0xCA72FB84, 0xCA73FB84, 0xCA74FB84, 0xCA75FB84, 0xCA76FB84, + 0xCA77FB84, 0xCA78FB84, 0xCA79FB84, 0xCA7AFB84, 0xCA7BFB84, 0xCA7CFB84, 0xCA7DFB84, 0xCA7EFB84, 0xCA7FFB84, 0xCA80FB84, 0xCA81FB84, 0xCA82FB84, 0xCA83FB84, 0xCA84FB84, 0xCA85FB84, + 0xCA86FB84, 0xCA87FB84, 0xCA88FB84, 0xCA89FB84, 0xCA8AFB84, 0xCA8BFB84, 0xCA8CFB84, 0xCA8DFB84, 0xCA8EFB84, 0xCA8FFB84, 0xCA90FB84, 0xCA91FB84, 0xCA92FB84, 0xCA93FB84, 0xCA94FB84, + 0xCA95FB84, 0xCA96FB84, 0xCA97FB84, 0xCA98FB84, 0xCA99FB84, 0xCA9AFB84, 0xCA9BFB84, 0xCA9CFB84, 0xCA9DFB84, 0xCA9EFB84, 0xCA9FFB84, 0xCAA0FB84, 0xCAA1FB84, 0xCAA2FB84, 0xCAA3FB84, + 0xCAA4FB84, 0xCAA5FB84, 0xCAA6FB84, 0xCAA7FB84, 0xCAA8FB84, 0xCAA9FB84, 0xCAAAFB84, 0xCAABFB84, 0xCAACFB84, 0xCAADFB84, 0xCAAEFB84, 0xCAAFFB84, 0xCAB0FB84, 0xCAB1FB84, 0xCAB2FB84, + 0xCAB3FB84, 0xCAB4FB84, 0xCAB5FB84, 0xCAB6FB84, 0xCAB7FB84, 0xCAB8FB84, 0xCAB9FB84, 0xCABAFB84, 0xCABBFB84, 0xCABCFB84, 0xCABDFB84, 0xCABEFB84, 0xCABFFB84, 0xCAC0FB84, 0xCAC1FB84, + 0xCAC2FB84, 0xCAC3FB84, 0xCAC4FB84, 0xCAC5FB84, 0xCAC6FB84, 0xCAC7FB84, 0xCAC8FB84, 0xCAC9FB84, 0xCACAFB84, 0xCACBFB84, 0xCACCFB84, 0xCACDFB84, 0xCACEFB84, 0xCACFFB84, 0xCAD0FB84, + 0xCAD1FB84, 0xCAD2FB84, 0xCAD3FB84, 0xCAD4FB84, 0xCAD5FB84, 0xCAD6FB84, 0xCAD7FB84, 0xCAD8FB84, 0xCAD9FB84, 0xCADAFB84, 0xCADBFB84, 0xCADCFB84, 0xCADDFB84, 0xCADEFB84, 0xCADFFB84, + 0xCAE0FB84, 0xCAE1FB84, 0xCAE2FB84, 0xCAE3FB84, 0xCAE4FB84, 0xCAE5FB84, 0xCAE6FB84, 0xCAE7FB84, 0xCAE8FB84, 0xCAE9FB84, 0xCAEAFB84, 0xCAEBFB84, 0xCAECFB84, 0xCAEDFB84, 0xCAEEFB84, + 0xCAEFFB84, 0xCAF0FB84, 0xCAF1FB84, 0xCAF2FB84, 0xCAF3FB84, 0xCAF4FB84, 0xCAF5FB84, 0xCAF6FB84, 0xCAF7FB84, 0xCAF8FB84, 0xCAF9FB84, 0xCAFAFB84, 0xCAFBFB84, 0xCAFCFB84, 0xCAFDFB84, + 0xCAFEFB84, 0xCAFFFB84, 0xCB00FB84, 0xCB01FB84, 0xCB02FB84, 0xCB03FB84, 0xCB04FB84, 0xCB05FB84, 0xCB06FB84, 0xCB07FB84, 0xCB08FB84, 0xCB09FB84, 0xCB0AFB84, 0xCB0BFB84, 0xCB0CFB84, + 0xCB0DFB84, 0xCB0EFB84, 0xCB0FFB84, 0xCB10FB84, 0xCB11FB84, 0xCB12FB84, 0xCB13FB84, 0xCB14FB84, 0xCB15FB84, 0xCB16FB84, 0xCB17FB84, 0xCB18FB84, 0xCB19FB84, 0xCB1AFB84, 0xCB1BFB84, + 0xCB1CFB84, 0xCB1DFB84, 0xCB1EFB84, 0xCB1FFB84, 0xCB20FB84, 0xCB21FB84, 0xCB22FB84, 0xCB23FB84, 0xCB24FB84, 0xCB25FB84, 0xCB26FB84, 0xCB27FB84, 0xCB28FB84, 0xCB29FB84, 0xCB2AFB84, + 0xCB2BFB84, 0xCB2CFB84, 0xCB2DFB84, 0xCB2EFB84, 0xCB2FFB84, 0xCB30FB84, 0xCB31FB84, 0xCB32FB84, 0xCB33FB84, 0xCB34FB84, 0xCB35FB84, 0xCB36FB84, 0xCB37FB84, 0xCB38FB84, 0xCB39FB84, + 0xCB3AFB84, 0xCB3BFB84, 0xCB3CFB84, 0xCB3DFB84, 0xCB3EFB84, 0xCB3FFB84, 0xCB40FB84, 0xCB41FB84, 0xCB42FB84, 0xCB43FB84, 0xCB44FB84, 0xCB45FB84, 0xCB46FB84, 0xCB47FB84, 0xCB48FB84, + 0xCB49FB84, 0xCB4AFB84, 0xCB4BFB84, 0xCB4CFB84, 0xCB4DFB84, 0xCB4EFB84, 0xCB4FFB84, 0xCB50FB84, 0xCB51FB84, 0xCB52FB84, 0xCB53FB84, 0xCB54FB84, 0xCB55FB84, 0xCB56FB84, 0xCB57FB84, + 0xCB58FB84, 0xCB59FB84, 0xCB5AFB84, 0xCB5BFB84, 0xCB5CFB84, 0xCB5DFB84, 0xCB5EFB84, 0xCB5FFB84, 0xCB60FB84, 0xCB61FB84, 0xCB62FB84, 0xCB63FB84, 0xCB64FB84, 0xCB65FB84, 0xCB66FB84, + 0xCB67FB84, 0xCB68FB84, 0xCB69FB84, 0xCB6AFB84, 0xCB6BFB84, 0xCB6CFB84, 0xCB6DFB84, 0xCB6EFB84, 0xCB6FFB84, 0xCB70FB84, 0xCB71FB84, 0xCB72FB84, 0xCB73FB84, 0xCB74FB84, 0xCB75FB84, + 0xCB76FB84, 0xCB77FB84, 0xCB78FB84, 0xCB79FB84, 0xCB7AFB84, 0xCB7BFB84, 0xCB7CFB84, 0xCB7DFB84, 0xCB7EFB84, 0xCB7FFB84, 0xCB80FB84, 0xCB81FB84, 0xCB82FB84, 0xCB83FB84, 0xCB84FB84, + 0xCB85FB84, 0xCB86FB84, 0xCB87FB84, 0xCB88FB84, 0xCB89FB84, 0xCB8AFB84, 0xCB8BFB84, 0xCB8CFB84, 0xCB8DFB84, 0xCB8EFB84, 0xCB8FFB84, 0xCB90FB84, 0xCB91FB84, 0xCB92FB84, 0xCB93FB84, + 0xCB94FB84, 0xCB95FB84, 0xCB96FB84, 0xCB97FB84, 0xCB98FB84, 0xCB99FB84, 0xCB9AFB84, 0xCB9BFB84, 0xCB9CFB84, 0xCB9DFB84, 0xCB9EFB84, 0xCB9FFB84, 0xCBA0FB84, 0xCBA1FB84, 0xCBA2FB84, + 0xCBA3FB84, 0xCBA4FB84, 0xCBA5FB84, 0xCBA6FB84, 0xCBA7FB84, 0xCBA8FB84, 0xCBA9FB84, 0xCBAAFB84, 0xCBABFB84, 0xCBACFB84, 0xCBADFB84, 0xCBAEFB84, 0xCBAFFB84, 0xCBB0FB84, 0xCBB1FB84, + 0xCBB2FB84, 0xCBB3FB84, 0xCBB4FB84, 0xCBB5FB84, 0xCBB6FB84, 0xCBB7FB84, 0xCBB8FB84, 0xCBB9FB84, 0xCBBAFB84, 0xCBBBFB84, 0xCBBCFB84, 0xCBBDFB84, 0xCBBEFB84, 0xCBBFFB84, 0xCBC0FB84, + 0xCBC1FB84, 0xCBC2FB84, 0xCBC3FB84, 0xCBC4FB84, 0xCBC5FB84, 0xCBC6FB84, 0xCBC7FB84, 0xCBC8FB84, 0xCBC9FB84, 0xCBCAFB84, 0xCBCBFB84, 0xCBCCFB84, 0xCBCDFB84, 0xCBCEFB84, 0xCBCFFB84, + 0xCBD0FB84, 0xCBD1FB84, 0xCBD2FB84, 0xCBD3FB84, 0xCBD4FB84, 0xCBD5FB84, 0xCBD6FB84, 0xCBD7FB84, 0xCBD8FB84, 0xCBD9FB84, 0xCBDAFB84, 0xCBDBFB84, 0xCBDCFB84, 0xCBDDFB84, 0xCBDEFB84, + 0xCBDFFB84, 0xCBE0FB84, 0xCBE1FB84, 0xCBE2FB84, 0xCBE3FB84, 0xCBE4FB84, 0xCBE5FB84, 0xCBE6FB84, 0xCBE7FB84, 0xCBE8FB84, 0xCBE9FB84, 0xCBEAFB84, 0xCBEBFB84, 0xCBECFB84, 0xCBEDFB84, + 0xCBEEFB84, 0xCBEFFB84, 0xCBF0FB84, 0xCBF1FB84, 0xCBF2FB84, 0xCBF3FB84, 0xCBF4FB84, 0xCBF5FB84, 0xCBF6FB84, 0xCBF7FB84, 0xCBF8FB84, 0xCBF9FB84, 0xCBFAFB84, 0xCBFBFB84, 0xCBFCFB84, + 0xCBFDFB84, 0xCBFEFB84, 0xCBFFFB84, 0xCC00FB84, 0xCC01FB84, 0xCC02FB84, 0xCC03FB84, 0xCC04FB84, 0xCC05FB84, 0xCC06FB84, 0xCC07FB84, 0xCC08FB84, 0xCC09FB84, 0xCC0AFB84, 0xCC0BFB84, + 0xCC0CFB84, 0xCC0DFB84, 0xCC0EFB84, 0xCC0FFB84, 0xCC10FB84, 0xCC11FB84, 0xCC12FB84, 0xCC13FB84, 0xCC14FB84, 0xCC15FB84, 0xCC16FB84, 0xCC17FB84, 0xCC18FB84, 0xCC19FB84, 0xCC1AFB84, + 0xCC1BFB84, 0xCC1CFB84, 0xCC1DFB84, 0xCC1EFB84, 0xCC1FFB84, 0xCC20FB84, 0xCC21FB84, 0xCC22FB84, 0xCC23FB84, 0xCC24FB84, 0xCC25FB84, 0xCC26FB84, 0xCC27FB84, 0xCC28FB84, 0xCC29FB84, + 0xCC2AFB84, 0xCC2BFB84, 0xCC2CFB84, 0xCC2DFB84, 0xCC2EFB84, 0xCC2FFB84, 0xCC30FB84, 0xCC31FB84, 0xCC32FB84, 0xCC33FB84, 0xCC34FB84, 0xCC35FB84, 0xCC36FB84, 0xCC37FB84, 0xCC38FB84, + 0xCC39FB84, 0xCC3AFB84, 0xCC3BFB84, 0xCC3CFB84, 0xCC3DFB84, 0xCC3EFB84, 0xCC3FFB84, 0xCC40FB84, 0xCC41FB84, 0xCC42FB84, 0xCC43FB84, 0xCC44FB84, 0xCC45FB84, 0xCC46FB84, 0xCC47FB84, + 0xCC48FB84, 0xCC49FB84, 0xCC4AFB84, 0xCC4BFB84, 0xCC4CFB84, 0xCC4DFB84, 0xCC4EFB84, 0xCC4FFB84, 0xCC50FB84, 0xCC51FB84, 0xCC52FB84, 0xCC53FB84, 0xCC54FB84, 0xCC55FB84, 0xCC56FB84, + 0xCC57FB84, 0xCC58FB84, 0xCC59FB84, 0xCC5AFB84, 0xCC5BFB84, 0xCC5CFB84, 0xCC5DFB84, 0xCC5EFB84, 0xCC5FFB84, 0xCC60FB84, 0xCC61FB84, 0xCC62FB84, 0xCC63FB84, 0xCC64FB84, 0xCC65FB84, + 0xCC66FB84, 0xCC67FB84, 0xCC68FB84, 0xCC69FB84, 0xCC6AFB84, 0xCC6BFB84, 0xCC6CFB84, 0xCC6DFB84, 0xCC6EFB84, 0xCC6FFB84, 0xCC70FB84, 0xCC71FB84, 0xCC72FB84, 0xCC73FB84, 0xCC74FB84, + 0xCC75FB84, 0xCC76FB84, 0xCC77FB84, 0xCC78FB84, 0xCC79FB84, 0xCC7AFB84, 0xCC7BFB84, 0xCC7CFB84, 0xCC7DFB84, 0xCC7EFB84, 0xCC7FFB84, 0xCC80FB84, 0xCC81FB84, 0xCC82FB84, 0xCC83FB84, + 0xCC84FB84, 0xCC85FB84, 0xCC86FB84, 0xCC87FB84, 0xCC88FB84, 0xCC89FB84, 0xCC8AFB84, 0xCC8BFB84, 0xCC8CFB84, 0xCC8DFB84, 0xCC8EFB84, 0xCC8FFB84, 0xCC90FB84, 0xCC91FB84, 0xCC92FB84, + 0xCC93FB84, 0xCC94FB84, 0xCC95FB84, 0xCC96FB84, 0xCC97FB84, 0xCC98FB84, 0xCC99FB84, 0xCC9AFB84, 0xCC9BFB84, 0xCC9CFB84, 0xCC9DFB84, 0xCC9EFB84, 0xCC9FFB84, 0xCCA0FB84, 0xCCA1FB84, + 0xCCA2FB84, 0xCCA3FB84, 0xCCA4FB84, 0xCCA5FB84, 0xCCA6FB84, 0xCCA7FB84, 0xCCA8FB84, 0xCCA9FB84, 0xCCAAFB84, 0xCCABFB84, 0xCCACFB84, 0xCCADFB84, 0xCCAEFB84, 0xCCAFFB84, 0xCCB0FB84, + 0xCCB1FB84, 0xCCB2FB84, 0xCCB3FB84, 0xCCB4FB84, 0xCCB5FB84, 0xCCB6FB84, 0xCCB7FB84, 0xCCB8FB84, 0xCCB9FB84, 0xCCBAFB84, 0xCCBBFB84, 0xCCBCFB84, 0xCCBDFB84, 0xCCBEFB84, 0xCCBFFB84, + 0xCCC0FB84, 0xCCC1FB84, 0xCCC2FB84, 0xCCC3FB84, 0xCCC4FB84, 0xCCC5FB84, 0xCCC6FB84, 0xCCC7FB84, 0xCCC8FB84, 0xCCC9FB84, 0xCCCAFB84, 0xCCCBFB84, 0xCCCCFB84, 0xCCCDFB84, 0xCCCEFB84, + 0xCCCFFB84, 0xCCD0FB84, 0xCCD1FB84, 0xCCD2FB84, 0xCCD3FB84, 0xCCD4FB84, 0xCCD5FB84, 0xCCD6FB84, 0xCCD7FB84, 0xCCD8FB84, 0xCCD9FB84, 0xCCDAFB84, 0xCCDBFB84, 0xCCDCFB84, 0xCCDDFB84, + 0xCCDEFB84, 0xCCDFFB84, 0xCCE0FB84, 0xCCE1FB84, 0xCCE2FB84, 0xCCE3FB84, 0xCCE4FB84, 0xCCE5FB84, 0xCCE6FB84, 0xCCE7FB84, 0xCCE8FB84, 0xCCE9FB84, 0xCCEAFB84, 0xCCEBFB84, 0xCCECFB84, + 0xCCEDFB84, 0xCCEEFB84, 0xCCEFFB84, 0xCCF0FB84, 0xCCF1FB84, 0xCCF2FB84, 0xCCF3FB84, 0xCCF4FB84, 0xCCF5FB84, 0xCCF6FB84, 0xCCF7FB84, 0xCCF8FB84, 0xCCF9FB84, 0xCCFAFB84, 0xCCFBFB84, + 0xCCFCFB84, 0xCCFDFB84, 0xCCFEFB84, 0xCCFFFB84, 0xCD00FB84, 0xCD01FB84, 0xCD02FB84, 0xCD03FB84, 0xCD04FB84, 0xCD05FB84, 0xCD06FB84, 0xCD07FB84, 0xCD08FB84, 0xCD09FB84, 0xCD0AFB84, + 0xCD0BFB84, 0xCD0CFB84, 0xCD0DFB84, 0xCD0EFB84, 0xCD0FFB84, 0xCD10FB84, 0xCD11FB84, 0xCD12FB84, 0xCD13FB84, 0xCD14FB84, 0xCD15FB84, 0xCD16FB84, 0xCD17FB84, 0xCD18FB84, 0xCD19FB84, + 0xCD1AFB84, 0xCD1BFB84, 0xCD1CFB84, 0xCD1DFB84, 0xCD1EFB84, 0xCD1FFB84, 0xCD20FB84, 0xCD21FB84, 0xCD22FB84, 0xCD23FB84, 0xCD24FB84, 0xCD25FB84, 0xCD26FB84, 0xCD27FB84, 0xCD28FB84, + 0xCD29FB84, 0xCD2AFB84, 0xCD2BFB84, 0xCD2CFB84, 0xCD2DFB84, 0xCD2EFB84, 0xCD2FFB84, 0xCD30FB84, 0xCD31FB84, 0xCD32FB84, 0xCD33FB84, 0xCD34FB84, 0xCD35FB84, 0xCD36FB84, 0xCD37FB84, + 0xCD38FB84, 0xCD39FB84, 0xCD3AFB84, 0xCD3BFB84, 0xCD3CFB84, 0xCD3DFB84, 0xCD3EFB84, 0xCD3FFB84, 0xCD40FB84, 0xCD41FB84, 0xCD42FB84, 0xCD43FB84, 0xCD44FB84, 0xCD45FB84, 0xCD46FB84, + 0xCD47FB84, 0xCD48FB84, 0xCD49FB84, 0xCD4AFB84, 0xCD4BFB84, 0xCD4CFB84, 0xCD4DFB84, 0xCD4EFB84, 0xCD4FFB84, 0xCD50FB84, 0xCD51FB84, 0xCD52FB84, 0xCD53FB84, 0xCD54FB84, 0xCD55FB84, + 0xCD56FB84, 0xCD57FB84, 0xCD58FB84, 0xCD59FB84, 0xCD5AFB84, 0xCD5BFB84, 0xCD5CFB84, 0xCD5DFB84, 0xCD5EFB84, 0xCD5FFB84, 0xCD60FB84, 0xCD61FB84, 0xCD62FB84, 0xCD63FB84, 0xCD64FB84, + 0xCD65FB84, 0xCD66FB84, 0xCD67FB84, 0xCD68FB84, 0xCD69FB84, 0xCD6AFB84, 0xCD6BFB84, 0xCD6CFB84, 0xCD6DFB84, 0xCD6EFB84, 0xCD6FFB84, 0xCD70FB84, 0xCD71FB84, 0xCD72FB84, 0xCD73FB84, + 0xCD74FB84, 0xCD75FB84, 0xCD76FB84, 0xCD77FB84, 0xCD78FB84, 0xCD79FB84, 0xCD7AFB84, 0xCD7BFB84, 0xCD7CFB84, 0xCD7DFB84, 0xCD7EFB84, 0xCD7FFB84, 0xCD80FB84, 0xCD81FB84, 0xCD82FB84, + 0xCD83FB84, 0xCD84FB84, 0xCD85FB84, 0xCD86FB84, 0xCD87FB84, 0xCD88FB84, 0xCD89FB84, 0xCD8AFB84, 0xCD8BFB84, 0xCD8CFB84, 0xCD8DFB84, 0xCD8EFB84, 0xCD8FFB84, 0xCD90FB84, 0xCD91FB84, + 0xCD92FB84, 0xCD93FB84, 0xCD94FB84, 0xCD95FB84, 0xCD96FB84, 0xCD97FB84, 0xCD98FB84, 0xCD99FB84, 0xCD9AFB84, 0xCD9BFB84, 0xCD9CFB84, 0xCD9DFB84, 0xCD9EFB84, 0xCD9FFB84, 0xCDA0FB84, + 0xCDA1FB84, 0xCDA2FB84, 0xCDA3FB84, 0xCDA4FB84, 0xCDA5FB84, 0xCDA6FB84, 0xCDA7FB84, 0xCDA8FB84, 0xCDA9FB84, 0xCDAAFB84, 0xCDABFB84, 0xCDACFB84, 0xCDADFB84, 0xCDAEFB84, 0xCDAFFB84, + 0xCDB0FB84, 0xCDB1FB84, 0xCDB2FB84, 0xCDB3FB84, 0xCDB4FB84, 0xCDB5FB84, 0xCDB6FB84, 0xCDB7FB84, 0xCDB8FB84, 0xCDB9FB84, 0xCDBAFB84, 0xCDBBFB84, 0xCDBCFB84, 0xCDBDFB84, 0xCDBEFB84, + 0xCDBFFB84, 0xCDC0FB84, 0xCDC1FB84, 0xCDC2FB84, 0xCDC3FB84, 0xCDC4FB84, 0xCDC5FB84, 0xCDC6FB84, 0xCDC7FB84, 0xCDC8FB84, 0xCDC9FB84, 0xCDCAFB84, 0xCDCBFB84, 0xCDCCFB84, 0xCDCDFB84, + 0xCDCEFB84, 0xCDCFFB84, 0xCDD0FB84, 0xCDD1FB84, 0xCDD2FB84, 0xCDD3FB84, 0xCDD4FB84, 0xCDD5FB84, 0xCDD6FB84, 0xCDD7FB84, 0xCDD8FB84, 0xCDD9FB84, 0xCDDAFB84, 0xCDDBFB84, 0xCDDCFB84, + 0xCDDDFB84, 0xCDDEFB84, 0xCDDFFB84, 0xCDE0FB84, 0xCDE1FB84, 0xCDE2FB84, 0xCDE3FB84, 0xCDE4FB84, 0xCDE5FB84, 0xCDE6FB84, 0xCDE7FB84, 0xCDE8FB84, 0xCDE9FB84, 0xCDEAFB84, 0xCDEBFB84, + 0xCDECFB84, 0xCDEDFB84, 0xCDEEFB84, 0xCDEFFB84, 0xCDF0FB84, 0xCDF1FB84, 0xCDF2FB84, 0xCDF3FB84, 0xCDF4FB84, 0xCDF5FB84, 0xCDF6FB84, 0xCDF7FB84, 0xCDF8FB84, 0xCDF9FB84, 0xCDFAFB84, + 0xCDFBFB84, 0xCDFCFB84, 0xCDFDFB84, 0xCDFEFB84, 0xCDFFFB84, 0xCE00FB84, 0xCE01FB84, 0xCE02FB84, 0xCE03FB84, 0xCE04FB84, 0xCE05FB84, 0xCE06FB84, 0xCE07FB84, 0xCE08FB84, 0xCE09FB84, + 0xCE0AFB84, 0xCE0BFB84, 0xCE0CFB84, 0xCE0DFB84, 0xCE0EFB84, 0xCE0FFB84, 0xCE10FB84, 0xCE11FB84, 0xCE12FB84, 0xCE13FB84, 0xCE14FB84, 0xCE15FB84, 0xCE16FB84, 0xCE17FB84, 0xCE18FB84, + 0xCE19FB84, 0xCE1AFB84, 0xCE1BFB84, 0xCE1CFB84, 0xCE1DFB84, 0xCE1EFB84, 0xCE1FFB84, 0xCE20FB84, 0xCE21FB84, 0xCE22FB84, 0xCE23FB84, 0xCE24FB84, 0xCE25FB84, 0xCE26FB84, 0xCE27FB84, + 0xCE28FB84, 0xCE29FB84, 0xCE2AFB84, 0xCE2BFB84, 0xCE2CFB84, 0xCE2DFB84, 0xCE2EFB84, 0xCE2FFB84, 0xCE30FB84, 0xCE31FB84, 0xCE32FB84, 0xCE33FB84, 0xCE34FB84, 0xCE35FB84, 0xCE36FB84, + 0xCE37FB84, 0xCE38FB84, 0xCE39FB84, 0xCE3AFB84, 0xCE3BFB84, 0xCE3CFB84, 0xCE3DFB84, 0xCE3EFB84, 0xCE3FFB84, 0xCE40FB84, 0xCE41FB84, 0xCE42FB84, 0xCE43FB84, 0xCE44FB84, 0xCE45FB84, + 0xCE46FB84, 0xCE47FB84, 0xCE48FB84, 0xCE49FB84, 0xCE4AFB84, 0xCE4BFB84, 0xCE4CFB84, 0xCE4DFB84, 0xCE4EFB84, 0xCE4FFB84, 0xCE50FB84, 0xCE51FB84, 0xCE52FB84, 0xCE53FB84, 0xCE54FB84, + 0xCE55FB84, 0xCE56FB84, 0xCE57FB84, 0xCE58FB84, 0xCE59FB84, 0xCE5AFB84, 0xCE5BFB84, 0xCE5CFB84, 0xCE5DFB84, 0xCE5EFB84, 0xCE5FFB84, 0xCE60FB84, 0xCE61FB84, 0xCE62FB84, 0xCE63FB84, + 0xCE64FB84, 0xCE65FB84, 0xCE66FB84, 0xCE67FB84, 0xCE68FB84, 0xCE69FB84, 0xCE6AFB84, 0xCE6BFB84, 0xCE6CFB84, 0xCE6DFB84, 0xCE6EFB84, 0xCE6FFB84, 0xCE70FB84, 0xCE71FB84, 0xCE72FB84, + 0xCE73FB84, 0xCE74FB84, 0xCE75FB84, 0xCE76FB84, 0xCE77FB84, 0xCE78FB84, 0xCE79FB84, 0xCE7AFB84, 0xCE7BFB84, 0xCE7CFB84, 0xCE7DFB84, 0xCE7EFB84, 0xCE7FFB84, 0xCE80FB84, 0xCE81FB84, + 0xCE82FB84, 0xCE83FB84, 0xCE84FB84, 0xCE85FB84, 0xCE86FB84, 0xCE87FB84, 0xCE88FB84, 0xCE89FB84, 0xCE8AFB84, 0xCE8BFB84, 0xCE8CFB84, 0xCE8DFB84, 0xCE8EFB84, 0xCE8FFB84, 0xCE90FB84, + 0xCE91FB84, 0xCE92FB84, 0xCE93FB84, 0xCE94FB84, 0xCE95FB84, 0xCE96FB84, 0xCE97FB84, 0xCE98FB84, 0xCE99FB84, 0xCE9AFB84, 0xCE9BFB84, 0xCE9CFB84, 0xCE9DFB84, 0xCE9EFB84, 0xCE9FFB84, + 0xCEA0FB84, 0xCEA1FB84, 0xCEA2FB84, 0xCEA3FB84, 0xCEA4FB84, 0xCEA5FB84, 0xCEA6FB84, 0xCEA7FB84, 0xCEA8FB84, 0xCEA9FB84, 0xCEAAFB84, 0xCEABFB84, 0xCEACFB84, 0xCEADFB84, 0xCEAEFB84, + 0xCEAFFB84, 0xCEB0FB84, 0xCEB1FB84, 0xCEB2FB84, 0xCEB3FB84, 0xCEB4FB84, 0xCEB5FB84, 0xCEB6FB84, 0xCEB7FB84, 0xCEB8FB84, 0xCEB9FB84, 0xCEBAFB84, 0xCEBBFB84, 0xCEBCFB84, 0xCEBDFB84, + 0xCEBEFB84, 0xCEBFFB84, 0xCEC0FB84, 0xCEC1FB84, 0xCEC2FB84, 0xCEC3FB84, 0xCEC4FB84, 0xCEC5FB84, 0xCEC6FB84, 0xCEC7FB84, 0xCEC8FB84, 0xCEC9FB84, 0xCECAFB84, 0xCECBFB84, 0xCECCFB84, + 0xCECDFB84, 0xCECEFB84, 0xCECFFB84, 0xCED0FB84, 0xCED1FB84, 0xCED2FB84, 0xCED3FB84, 0xCED4FB84, 0xCED5FB84, 0xCED6FB84, 0xCED7FB84, 0xCED8FB84, 0xCED9FB84, 0xCEDAFB84, 0xCEDBFB84, + 0xCEDCFB84, 0xCEDDFB84, 0xCEDEFB84, 0xCEDFFB84, 0xCEE0FB84, 0xCEE1FB84, 0xCEE2FB84, 0xCEE3FB84, 0xCEE4FB84, 0xCEE5FB84, 0xCEE6FB84, 0xCEE7FB84, 0xCEE8FB84, 0xCEE9FB84, 0xCEEAFB84, + 0xCEEBFB84, 0xCEECFB84, 0xCEEDFB84, 0xCEEEFB84, 0xCEEFFB84, 0xCEF0FB84, 0xCEF1FB84, 0xCEF2FB84, 0xCEF3FB84, 0xCEF4FB84, 0xCEF5FB84, 0xCEF6FB84, 0xCEF7FB84, 0xCEF8FB84, 0xCEF9FB84, + 0xCEFAFB84, 0xCEFBFB84, 0xCEFCFB84, 0xCEFDFB84, 0xCEFEFB84, 0xCEFFFB84, 0xCF00FB84, 0xCF01FB84, 0xCF02FB84, 0xCF03FB84, 0xCF04FB84, 0xCF05FB84, 0xCF06FB84, 0xCF07FB84, 0xCF08FB84, + 0xCF09FB84, 0xCF0AFB84, 0xCF0BFB84, 0xCF0CFB84, 0xCF0DFB84, 0xCF0EFB84, 0xCF0FFB84, 0xCF10FB84, 0xCF11FB84, 0xCF12FB84, 0xCF13FB84, 0xCF14FB84, 0xCF15FB84, 0xCF16FB84, 0xCF17FB84, + 0xCF18FB84, 0xCF19FB84, 0xCF1AFB84, 0xCF1BFB84, 0xCF1CFB84, 0xCF1DFB84, 0xCF1EFB84, 0xCF1FFB84, 0xCF20FB84, 0xCF21FB84, 0xCF22FB84, 0xCF23FB84, 0xCF24FB84, 0xCF25FB84, 0xCF26FB84, + 0xCF27FB84, 0xCF28FB84, 0xCF29FB84, 0xCF2AFB84, 0xCF2BFB84, 0xCF2CFB84, 0xCF2DFB84, 0xCF2EFB84, 0xCF2FFB84, 0xCF30FB84, 0xCF31FB84, 0xCF32FB84, 0xCF33FB84, 0xCF34FB84, 0xCF35FB84, + 0xCF36FB84, 0xCF37FB84, 0xCF38FB84, 0xCF39FB84, 0xCF3AFB84, 0xCF3BFB84, 0xCF3CFB84, 0xCF3DFB84, 0xCF3EFB84, 0xCF3FFB84, 0xCF40FB84, 0xCF41FB84, 0xCF42FB84, 0xCF43FB84, 0xCF44FB84, + 0xCF45FB84, 0xCF46FB84, 0xCF47FB84, 0xCF48FB84, 0xCF49FB84, 0xCF4AFB84, 0xCF4BFB84, 0xCF4CFB84, 0xCF4DFB84, 0xCF4EFB84, 0xCF4FFB84, 0xCF50FB84, 0xCF51FB84, 0xCF52FB84, 0xCF53FB84, + 0xCF54FB84, 0xCF55FB84, 0xCF56FB84, 0xCF57FB84, 0xCF58FB84, 0xCF59FB84, 0xCF5AFB84, 0xCF5BFB84, 0xCF5CFB84, 0xCF5DFB84, 0xCF5EFB84, 0xCF5FFB84, 0xCF60FB84, 0xCF61FB84, 0xCF62FB84, + 0xCF63FB84, 0xCF64FB84, 0xCF65FB84, 0xCF66FB84, 0xCF67FB84, 0xCF68FB84, 0xCF69FB84, 0xCF6AFB84, 0xCF6BFB84, 0xCF6CFB84, 0xCF6DFB84, 0xCF6EFB84, 0xCF6FFB84, 0xCF70FB84, 0xCF71FB84, + 0xCF72FB84, 0xCF73FB84, 0xCF74FB84, 0xCF75FB84, 0xCF76FB84, 0xCF77FB84, 0xCF78FB84, 0xCF79FB84, 0xCF7AFB84, 0xCF7BFB84, 0xCF7CFB84, 0xCF7DFB84, 0xCF7EFB84, 0xCF7FFB84, 0xCF80FB84, + 0xCF81FB84, 0xCF82FB84, 0xCF83FB84, 0xCF84FB84, 0xCF85FB84, 0xCF86FB84, 0xCF87FB84, 0xCF88FB84, 0xCF89FB84, 0xCF8AFB84, 0xCF8BFB84, 0xCF8CFB84, 0xCF8DFB84, 0xCF8EFB84, 0xCF8FFB84, + 0xCF90FB84, 0xCF91FB84, 0xCF92FB84, 0xCF93FB84, 0xCF94FB84, 0xCF95FB84, 0xCF96FB84, 0xCF97FB84, 0xCF98FB84, 0xCF99FB84, 0xCF9AFB84, 0xCF9BFB84, 0xCF9CFB84, 0xCF9DFB84, 0xCF9EFB84, + 0xCF9FFB84, 0xCFA0FB84, 0xCFA1FB84, 0xCFA2FB84, 0xCFA3FB84, 0xCFA4FB84, 0xCFA5FB84, 0xCFA6FB84, 0xCFA7FB84, 0xCFA8FB84, 0xCFA9FB84, 0xCFAAFB84, 0xCFABFB84, 0xCFACFB84, 0xCFADFB84, + 0xCFAEFB84, 0xCFAFFB84, 0xCFB0FB84, 0xCFB1FB84, 0xCFB2FB84, 0xCFB3FB84, 0xCFB4FB84, 0xCFB5FB84, 0xCFB6FB84, 0xCFB7FB84, 0xCFB8FB84, 0xCFB9FB84, 0xCFBAFB84, 0xCFBBFB84, 0xCFBCFB84, + 0xCFBDFB84, 0xCFBEFB84, 0xCFBFFB84, 0xCFC0FB84, 0xCFC1FB84, 0xCFC2FB84, 0xCFC3FB84, 0xCFC4FB84, 0xCFC5FB84, 0xCFC6FB84, 0xCFC7FB84, 0xCFC8FB84, 0xCFC9FB84, 0xCFCAFB84, 0xCFCBFB84, + 0xCFCCFB84, 0xCFCDFB84, 0xCFCEFB84, 0xCFCFFB84, 0xCFD0FB84, 0xCFD1FB84, 0xCFD2FB84, 0xCFD3FB84, 0xCFD4FB84, 0xCFD5FB84, 0xCFD6FB84, 0xCFD7FB84, 0xCFD8FB84, 0xCFD9FB84, 0xCFDAFB84, + 0xCFDBFB84, 0xCFDCFB84, 0xCFDDFB84, 0xCFDEFB84, 0xCFDFFB84, 0xCFE0FB84, 0xCFE1FB84, 0xCFE2FB84, 0xCFE3FB84, 0xCFE4FB84, 0xCFE5FB84, 0xCFE6FB84, 0xCFE7FB84, 0xCFE8FB84, 0xCFE9FB84, + 0xCFEAFB84, 0xCFEBFB84, 0xCFECFB84, 0xCFEDFB84, 0xCFEEFB84, 0xCFEFFB84, 0xCFF0FB84, 0xCFF1FB84, 0xCFF2FB84, 0xCFF3FB84, 0xCFF4FB84, 0xCFF5FB84, 0xCFF6FB84, 0xCFF7FB84, 0xCFF8FB84, + 0xCFF9FB84, 0xCFFAFB84, 0xCFFBFB84, 0xCFFCFB84, 0xCFFDFB84, 0xCFFEFB84, 0xCFFFFB84, 0xD000FB84, 0xD001FB84, 0xD002FB84, 0xD003FB84, 0xD004FB84, 0xD005FB84, 0xD006FB84, 0xD007FB84, + 0xD008FB84, 0xD009FB84, 0xD00AFB84, 0xD00BFB84, 0xD00CFB84, 0xD00DFB84, 0xD00EFB84, 0xD00FFB84, 0xD010FB84, 0xD011FB84, 0xD012FB84, 0xD013FB84, 0xD014FB84, 0xD015FB84, 0xD016FB84, + 0xD017FB84, 0xD018FB84, 0xD019FB84, 0xD01AFB84, 0xD01BFB84, 0xD01CFB84, 0xD01DFB84, 0xD01EFB84, 0xD01FFB84, 0xD020FB84, 0xD021FB84, 0xD022FB84, 0xD023FB84, 0xD024FB84, 0xD025FB84, + 0xD026FB84, 0xD027FB84, 0xD028FB84, 0xD029FB84, 0xD02AFB84, 0xD02BFB84, 0xD02CFB84, 0xD02DFB84, 0xD02EFB84, 0xD02FFB84, 0xD030FB84, 0xD031FB84, 0xD032FB84, 0xD033FB84, 0xD034FB84, + 0xD035FB84, 0xD036FB84, 0xD037FB84, 0xD038FB84, 0xD039FB84, 0xD03AFB84, 0xD03BFB84, 0xD03CFB84, 0xD03DFB84, 0xD03EFB84, 0xD03FFB84, 0xD040FB84, 0xD041FB84, 0xD042FB84, 0xD043FB84, + 0xD044FB84, 0xD045FB84, 0xD046FB84, 0xD047FB84, 0xD048FB84, 0xD049FB84, 0xD04AFB84, 0xD04BFB84, 0xD04CFB84, 0xD04DFB84, 0xD04EFB84, 0xD04FFB84, 0xD050FB84, 0xD051FB84, 0xD052FB84, + 0xD053FB84, 0xD054FB84, 0xD055FB84, 0xD056FB84, 0xD057FB84, 0xD058FB84, 0xD059FB84, 0xD05AFB84, 0xD05BFB84, 0xD05CFB84, 0xD05DFB84, 0xD05EFB84, 0xD05FFB84, 0xD060FB84, 0xD061FB84, + 0xD062FB84, 0xD063FB84, 0xD064FB84, 0xD065FB84, 0xD066FB84, 0xD067FB84, 0xD068FB84, 0xD069FB84, 0xD06AFB84, 0xD06BFB84, 0xD06CFB84, 0xD06DFB84, 0xD06EFB84, 0xD06FFB84, 0xD070FB84, + 0xD071FB84, 0xD072FB84, 0xD073FB84, 0xD074FB84, 0xD075FB84, 0xD076FB84, 0xD077FB84, 0xD078FB84, 0xD079FB84, 0xD07AFB84, 0xD07BFB84, 0xD07CFB84, 0xD07DFB84, 0xD07EFB84, 0xD07FFB84, + 0xD080FB84, 0xD081FB84, 0xD082FB84, 0xD083FB84, 0xD084FB84, 0xD085FB84, 0xD086FB84, 0xD087FB84, 0xD088FB84, 0xD089FB84, 0xD08AFB84, 0xD08BFB84, 0xD08CFB84, 0xD08DFB84, 0xD08EFB84, + 0xD08FFB84, 0xD090FB84, 0xD091FB84, 0xD092FB84, 0xD093FB84, 0xD094FB84, 0xD095FB84, 0xD096FB84, 0xD097FB84, 0xD098FB84, 0xD099FB84, 0xD09AFB84, 0xD09BFB84, 0xD09CFB84, 0xD09DFB84, + 0xD09EFB84, 0xD09FFB84, 0xD0A0FB84, 0xD0A1FB84, 0xD0A2FB84, 0xD0A3FB84, 0xD0A4FB84, 0xD0A5FB84, 0xD0A6FB84, 0xD0A7FB84, 0xD0A8FB84, 0xD0A9FB84, 0xD0AAFB84, 0xD0ABFB84, 0xD0ACFB84, + 0xD0ADFB84, 0xD0AEFB84, 0xD0AFFB84, 0xD0B0FB84, 0xD0B1FB84, 0xD0B2FB84, 0xD0B3FB84, 0xD0B4FB84, 0xD0B5FB84, 0xD0B6FB84, 0xD0B7FB84, 0xD0B8FB84, 0xD0B9FB84, 0xD0BAFB84, 0xD0BBFB84, + 0xD0BCFB84, 0xD0BDFB84, 0xD0BEFB84, 0xD0BFFB84, 0xD0C0FB84, 0xD0C1FB84, 0xD0C2FB84, 0xD0C3FB84, 0xD0C4FB84, 0xD0C5FB84, 0xD0C6FB84, 0xD0C7FB84, 0xD0C8FB84, 0xD0C9FB84, 0xD0CAFB84, + 0xD0CBFB84, 0xD0CCFB84, 0xD0CDFB84, 0xD0CEFB84, 0xD0CFFB84, 0xD0D0FB84, 0xD0D1FB84, 0xD0D2FB84, 0xD0D3FB84, 0xD0D4FB84, 0xD0D5FB84, 0xD0D6FB84, 0xD0D7FB84, 0xD0D8FB84, 0xD0D9FB84, + 0xD0DAFB84, 0xD0DBFB84, 0xD0DCFB84, 0xD0DDFB84, 0xD0DEFB84, 0xD0DFFB84, 0xD0E0FB84, 0xD0E1FB84, 0xD0E2FB84, 0xD0E3FB84, 0xD0E4FB84, 0xD0E5FB84, 0xD0E6FB84, 0xD0E7FB84, 0xD0E8FB84, + 0xD0E9FB84, 0xD0EAFB84, 0xD0EBFB84, 0xD0ECFB84, 0xD0EDFB84, 0xD0EEFB84, 0xD0EFFB84, 0xD0F0FB84, 0xD0F1FB84, 0xD0F2FB84, 0xD0F3FB84, 0xD0F4FB84, 0xD0F5FB84, 0xD0F6FB84, 0xD0F7FB84, + 0xD0F8FB84, 0xD0F9FB84, 0xD0FAFB84, 0xD0FBFB84, 0xD0FCFB84, 0xD0FDFB84, 0xD0FEFB84, 0xD0FFFB84, 0xD100FB84, 0xD101FB84, 0xD102FB84, 0xD103FB84, 0xD104FB84, 0xD105FB84, 0xD106FB84, + 0xD107FB84, 0xD108FB84, 0xD109FB84, 0xD10AFB84, 0xD10BFB84, 0xD10CFB84, 0xD10DFB84, 0xD10EFB84, 0xD10FFB84, 0xD110FB84, 0xD111FB84, 0xD112FB84, 0xD113FB84, 0xD114FB84, 0xD115FB84, + 0xD116FB84, 0xD117FB84, 0xD118FB84, 0xD119FB84, 0xD11AFB84, 0xD11BFB84, 0xD11CFB84, 0xD11DFB84, 0xD11EFB84, 0xD11FFB84, 0xD120FB84, 0xD121FB84, 0xD122FB84, 0xD123FB84, 0xD124FB84, + 0xD125FB84, 0xD126FB84, 0xD127FB84, 0xD128FB84, 0xD129FB84, 0xD12AFB84, 0xD12BFB84, 0xD12CFB84, 0xD12DFB84, 0xD12EFB84, 0xD12FFB84, 0xD130FB84, 0xD131FB84, 0xD132FB84, 0xD133FB84, + 0xD134FB84, 0xD135FB84, 0xD136FB84, 0xD137FB84, 0xD138FB84, 0xD139FB84, 0xD13AFB84, 0xD13BFB84, 0xD13CFB84, 0xD13DFB84, 0xD13EFB84, 0xD13FFB84, 0xD140FB84, 0xD141FB84, 0xD142FB84, + 0xD143FB84, 0xD144FB84, 0xD145FB84, 0xD146FB84, 0xD147FB84, 0xD148FB84, 0xD149FB84, 0xD14AFB84, 0xD14BFB84, 0xD14CFB84, 0xD14DFB84, 0xD14EFB84, 0xD14FFB84, 0xD150FB84, 0xD151FB84, + 0xD152FB84, 0xD153FB84, 0xD154FB84, 0xD155FB84, 0xD156FB84, 0xD157FB84, 0xD158FB84, 0xD159FB84, 0xD15AFB84, 0xD15BFB84, 0xD15CFB84, 0xD15DFB84, 0xD15EFB84, 0xD15FFB84, 0xD160FB84, + 0xD161FB84, 0xD162FB84, 0xD163FB84, 0xD164FB84, 0xD165FB84, 0xD166FB84, 0xD167FB84, 0xD168FB84, 0xD169FB84, 0xD16AFB84, 0xD16BFB84, 0xD16CFB84, 0xD16DFB84, 0xD16EFB84, 0xD16FFB84, + 0xD170FB84, 0xD171FB84, 0xD172FB84, 0xD173FB84, 0xD174FB84, 0xD175FB84, 0xD176FB84, 0xD177FB84, 0xD178FB84, 0xD179FB84, 0xD17AFB84, 0xD17BFB84, 0xD17CFB84, 0xD17DFB84, 0xD17EFB84, + 0xD17FFB84, 0xD180FB84, 0xD181FB84, 0xD182FB84, 0xD183FB84, 0xD184FB84, 0xD185FB84, 0xD186FB84, 0xD187FB84, 0xD188FB84, 0xD189FB84, 0xD18AFB84, 0xD18BFB84, 0xD18CFB84, 0xD18DFB84, + 0xD18EFB84, 0xD18FFB84, 0xD190FB84, 0xD191FB84, 0xD192FB84, 0xD193FB84, 0xD194FB84, 0xD195FB84, 0xD196FB84, 0xD197FB84, 0xD198FB84, 0xD199FB84, 0xD19AFB84, 0xD19BFB84, 0xD19CFB84, + 0xD19DFB84, 0xD19EFB84, 0xD19FFB84, 0xD1A0FB84, 0xD1A1FB84, 0xD1A2FB84, 0xD1A3FB84, 0xD1A4FB84, 0xD1A5FB84, 0xD1A6FB84, 0xD1A7FB84, 0xD1A8FB84, 0xD1A9FB84, 0xD1AAFB84, 0xD1ABFB84, + 0xD1ACFB84, 0xD1ADFB84, 0xD1AEFB84, 0xD1AFFB84, 0xD1B0FB84, 0xD1B1FB84, 0xD1B2FB84, 0xD1B3FB84, 0xD1B4FB84, 0xD1B5FB84, 0xD1B6FB84, 0xD1B7FB84, 0xD1B8FB84, 0xD1B9FB84, 0xD1BAFB84, + 0xD1BBFB84, 0xD1BCFB84, 0xD1BDFB84, 0xD1BEFB84, 0xD1BFFB84, 0xD1C0FB84, 0xD1C1FB84, 0xD1C2FB84, 0xD1C3FB84, 0xD1C4FB84, 0xD1C5FB84, 0xD1C6FB84, 0xD1C7FB84, 0xD1C8FB84, 0xD1C9FB84, + 0xD1CAFB84, 0xD1CBFB84, 0xD1CCFB84, 0xD1CDFB84, 0xD1CEFB84, 0xD1CFFB84, 0xD1D0FB84, 0xD1D1FB84, 0xD1D2FB84, 0xD1D3FB84, 0xD1D4FB84, 0xD1D5FB84, 0xD1D6FB84, 0xD1D7FB84, 0xD1D8FB84, + 0xD1D9FB84, 0xD1DAFB84, 0xD1DBFB84, 0xD1DCFB84, 0xD1DDFB84, 0xD1DEFB84, 0xD1DFFB84, 0xD1E0FB84, 0xD1E1FB84, 0xD1E2FB84, 0xD1E3FB84, 0xD1E4FB84, 0xD1E5FB84, 0xD1E6FB84, 0xD1E7FB84, + 0xD1E8FB84, 0xD1E9FB84, 0xD1EAFB84, 0xD1EBFB84, 0xD1ECFB84, 0xD1EDFB84, 0xD1EEFB84, 0xD1EFFB84, 0xD1F0FB84, 0xD1F1FB84, 0xD1F2FB84, 0xD1F3FB84, 0xD1F4FB84, 0xD1F5FB84, 0xD1F6FB84, + 0xD1F7FB84, 0xD1F8FB84, 0xD1F9FB84, 0xD1FAFB84, 0xD1FBFB84, 0xD1FCFB84, 0xD1FDFB84, 0xD1FEFB84, 0xD1FFFB84, 0xD200FB84, 0xD201FB84, 0xD202FB84, 0xD203FB84, 0xD204FB84, 0xD205FB84, + 0xD206FB84, 0xD207FB84, 0xD208FB84, 0xD209FB84, 0xD20AFB84, 0xD20BFB84, 0xD20CFB84, 0xD20DFB84, 0xD20EFB84, 0xD20FFB84, 0xD210FB84, 0xD211FB84, 0xD212FB84, 0xD213FB84, 0xD214FB84, + 0xD215FB84, 0xD216FB84, 0xD217FB84, 0xD218FB84, 0xD219FB84, 0xD21AFB84, 0xD21BFB84, 0xD21CFB84, 0xD21DFB84, 0xD21EFB84, 0xD21FFB84, 0xD220FB84, 0xD221FB84, 0xD222FB84, 0xD223FB84, + 0xD224FB84, 0xD225FB84, 0xD226FB84, 0xD227FB84, 0xD228FB84, 0xD229FB84, 0xD22AFB84, 0xD22BFB84, 0xD22CFB84, 0xD22DFB84, 0xD22EFB84, 0xD22FFB84, 0xD230FB84, 0xD231FB84, 0xD232FB84, + 0xD233FB84, 0xD234FB84, 0xD235FB84, 0xD236FB84, 0xD237FB84, 0xD238FB84, 0xD239FB84, 0xD23AFB84, 0xD23BFB84, 0xD23CFB84, 0xD23DFB84, 0xD23EFB84, 0xD23FFB84, 0xD240FB84, 0xD241FB84, + 0xD242FB84, 0xD243FB84, 0xD244FB84, 0xD245FB84, 0xD246FB84, 0xD247FB84, 0xD248FB84, 0xD249FB84, 0xD24AFB84, 0xD24BFB84, 0xD24CFB84, 0xD24DFB84, 0xD24EFB84, 0xD24FFB84, 0xD250FB84, + 0xD251FB84, 0xD252FB84, 0xD253FB84, 0xD254FB84, 0xD255FB84, 0xD256FB84, 0xD257FB84, 0xD258FB84, 0xD259FB84, 0xD25AFB84, 0xD25BFB84, 0xD25CFB84, 0xD25DFB84, 0xD25EFB84, 0xD25FFB84, + 0xD260FB84, 0xD261FB84, 0xD262FB84, 0xD263FB84, 0xD264FB84, 0xD265FB84, 0xD266FB84, 0xD267FB84, 0xD268FB84, 0xD269FB84, 0xD26AFB84, 0xD26BFB84, 0xD26CFB84, 0xD26DFB84, 0xD26EFB84, + 0xD26FFB84, 0xD270FB84, 0xD271FB84, 0xD272FB84, 0xD273FB84, 0xD274FB84, 0xD275FB84, 0xD276FB84, 0xD277FB84, 0xD278FB84, 0xD279FB84, 0xD27AFB84, 0xD27BFB84, 0xD27CFB84, 0xD27DFB84, + 0xD27EFB84, 0xD27FFB84, 0xD280FB84, 0xD281FB84, 0xD282FB84, 0xD283FB84, 0xD284FB84, 0xD285FB84, 0xD286FB84, 0xD287FB84, 0xD288FB84, 0xD289FB84, 0xD28AFB84, 0xD28BFB84, 0xD28CFB84, + 0xD28DFB84, 0xD28EFB84, 0xD28FFB84, 0xD290FB84, 0xD291FB84, 0xD292FB84, 0xD293FB84, 0xD294FB84, 0xD295FB84, 0xD296FB84, 0xD297FB84, 0xD298FB84, 0xD299FB84, 0xD29AFB84, 0xD29BFB84, + 0xD29CFB84, 0xD29DFB84, 0xD29EFB84, 0xD29FFB84, 0xD2A0FB84, 0xD2A1FB84, 0xD2A2FB84, 0xD2A3FB84, 0xD2A4FB84, 0xD2A5FB84, 0xD2A6FB84, 0xD2A7FB84, 0xD2A8FB84, 0xD2A9FB84, 0xD2AAFB84, + 0xD2ABFB84, 0xD2ACFB84, 0xD2ADFB84, 0xD2AEFB84, 0xD2AFFB84, 0xD2B0FB84, 0xD2B1FB84, 0xD2B2FB84, 0xD2B3FB84, 0xD2B4FB84, 0xD2B5FB84, 0xD2B6FB84, 0xD2B7FB84, 0xD2B8FB84, 0xD2B9FB84, + 0xD2BAFB84, 0xD2BBFB84, 0xD2BCFB84, 0xD2BDFB84, 0xD2BEFB84, 0xD2BFFB84, 0xD2C0FB84, 0xD2C1FB84, 0xD2C2FB84, 0xD2C3FB84, 0xD2C4FB84, 0xD2C5FB84, 0xD2C6FB84, 0xD2C7FB84, 0xD2C8FB84, + 0xD2C9FB84, 0xD2CAFB84, 0xD2CBFB84, 0xD2CCFB84, 0xD2CDFB84, 0xD2CEFB84, 0xD2CFFB84, 0xD2D0FB84, 0xD2D1FB84, 0xD2D2FB84, 0xD2D3FB84, 0xD2D4FB84, 0xD2D5FB84, 0xD2D6FB84, 0xD2D7FB84, + 0xD2D8FB84, 0xD2D9FB84, 0xD2DAFB84, 0xD2DBFB84, 0xD2DCFB84, 0xD2DDFB84, 0xD2DEFB84, 0xD2DFFB84, 0xD2E0FB84, 0xD2E1FB84, 0xD2E2FB84, 0xD2E3FB84, 0xD2E4FB84, 0xD2E5FB84, 0xD2E6FB84, + 0xD2E7FB84, 0xD2E8FB84, 0xD2E9FB84, 0xD2EAFB84, 0xD2EBFB84, 0xD2ECFB84, 0xD2EDFB84, 0xD2EEFB84, 0xD2EFFB84, 0xD2F0FB84, 0xD2F1FB84, 0xD2F2FB84, 0xD2F3FB84, 0xD2F4FB84, 0xD2F5FB84, + 0xD2F6FB84, 0xD2F7FB84, 0xD2F8FB84, 0xD2F9FB84, 0xD2FAFB84, 0xD2FBFB84, 0xD2FCFB84, 0xD2FDFB84, 0xD2FEFB84, 0xD2FFFB84, 0xD300FB84, 0xD301FB84, 0xD302FB84, 0xD303FB84, 0xD304FB84, + 0xD305FB84, 0xD306FB84, 0xD307FB84, 0xD308FB84, 0xD309FB84, 0xD30AFB84, 0xD30BFB84, 0xD30CFB84, 0xD30DFB84, 0xD30EFB84, 0xD30FFB84, 0xD310FB84, 0xD311FB84, 0xD312FB84, 0xD313FB84, + 0xD314FB84, 0xD315FB84, 0xD316FB84, 0xD317FB84, 0xD318FB84, 0xD319FB84, 0xD31AFB84, 0xD31BFB84, 0xD31CFB84, 0xD31DFB84, 0xD31EFB84, 0xD31FFB84, 0xD320FB84, 0xD321FB84, 0xD322FB84, + 0xD323FB84, 0xD324FB84, 0xD325FB84, 0xD326FB84, 0xD327FB84, 0xD328FB84, 0xD329FB84, 0xD32AFB84, 0xD32BFB84, 0xD32CFB84, 0xD32DFB84, 0xD32EFB84, 0xD32FFB84, 0xD330FB84, 0xD331FB84, + 0xD332FB84, 0xD333FB84, 0xD334FB84, 0xD335FB84, 0xD336FB84, 0xD337FB84, 0xD338FB84, 0xD339FB84, 0xD33AFB84, 0xD33BFB84, 0xD33CFB84, 0xD33DFB84, 0xD33EFB84, 0xD33FFB84, 0xD340FB84, + 0xD341FB84, 0xD342FB84, 0xD343FB84, 0xD344FB84, 0xD345FB84, 0xD346FB84, 0xD347FB84, 0xD348FB84, 0xD349FB84, 0xD34AFB84, 0xD34BFB84, 0xD34CFB84, 0xD34DFB84, 0xD34EFB84, 0xD34FFB84, + 0xD350FB84, 0xD351FB84, 0xD352FB84, 0xD353FB84, 0xD354FB84, 0xD355FB84, 0xD356FB84, 0xD357FB84, 0xD358FB84, 0xD359FB84, 0xD35AFB84, 0xD35BFB84, 0xD35CFB84, 0xD35DFB84, 0xD35EFB84, + 0xD35FFB84, 0xD360FB84, 0xD361FB84, 0xD362FB84, 0xD363FB84, 0xD364FB84, 0xD365FB84, 0xD366FB84, 0xD367FB84, 0xD368FB84, 0xD369FB84, 0xD36AFB84, 0xD36BFB84, 0xD36CFB84, 0xD36DFB84, + 0xD36EFB84, 0xD36FFB84, 0xD370FB84, 0xD371FB84, 0xD372FB84, 0xD373FB84, 0xD374FB84, 0xD375FB84, 0xD376FB84, 0xD377FB84, 0xD378FB84, 0xD379FB84, 0xD37AFB84, 0xD37BFB84, 0xD37CFB84, + 0xD37DFB84, 0xD37EFB84, 0xD37FFB84, 0xD380FB84, 0xD381FB84, 0xD382FB84, 0xD383FB84, 0xD384FB84, 0xD385FB84, 0xD386FB84, 0xD387FB84, 0xD388FB84, 0xD389FB84, 0xD38AFB84, 0xD38BFB84, + 0xD38CFB84, 0xD38DFB84, 0xD38EFB84, 0xD38FFB84, 0xD390FB84, 0xD391FB84, 0xD392FB84, 0xD393FB84, 0xD394FB84, 0xD395FB84, 0xD396FB84, 0xD397FB84, 0xD398FB84, 0xD399FB84, 0xD39AFB84, + 0xD39BFB84, 0xD39CFB84, 0xD39DFB84, 0xD39EFB84, 0xD39FFB84, 0xD3A0FB84, 0xD3A1FB84, 0xD3A2FB84, 0xD3A3FB84, 0xD3A4FB84, 0xD3A5FB84, 0xD3A6FB84, 0xD3A7FB84, 0xD3A8FB84, 0xD3A9FB84, + 0xD3AAFB84, 0xD3ABFB84, 0xD3ACFB84, 0xD3ADFB84, 0xD3AEFB84, 0xD3AFFB84, 0xD3B0FB84, 0xD3B1FB84, 0xD3B2FB84, 0xD3B3FB84, 0xD3B4FB84, 0xD3B5FB84, 0xD3B6FB84, 0xD3B7FB84, 0xD3B8FB84, + 0xD3B9FB84, 0xD3BAFB84, 0xD3BBFB84, 0xD3BCFB84, 0xD3BDFB84, 0xD3BEFB84, 0xD3BFFB84, 0xD3C0FB84, 0xD3C1FB84, 0xD3C2FB84, 0xD3C3FB84, 0xD3C4FB84, 0xD3C5FB84, 0xD3C6FB84, 0xD3C7FB84, + 0xD3C8FB84, 0xD3C9FB84, 0xD3CAFB84, 0xD3CBFB84, 0xD3CCFB84, 0xD3CDFB84, 0xD3CEFB84, 0xD3CFFB84, 0xD3D0FB84, 0xD3D1FB84, 0xD3D2FB84, 0xD3D3FB84, 0xD3D4FB84, 0xD3D5FB84, 0xD3D6FB84, + 0xD3D7FB84, 0xD3D8FB84, 0xD3D9FB84, 0xD3DAFB84, 0xD3DBFB84, 0xD3DCFB84, 0xD3DDFB84, 0xD3DEFB84, 0xD3DFFB84, 0xD3E0FB84, 0xD3E1FB84, 0xD3E2FB84, 0xD3E3FB84, 0xD3E4FB84, 0xD3E5FB84, + 0xD3E6FB84, 0xD3E7FB84, 0xD3E8FB84, 0xD3E9FB84, 0xD3EAFB84, 0xD3EBFB84, 0xD3ECFB84, 0xD3EDFB84, 0xD3EEFB84, 0xD3EFFB84, 0xD3F0FB84, 0xD3F1FB84, 0xD3F2FB84, 0xD3F3FB84, 0xD3F4FB84, + 0xD3F5FB84, 0xD3F6FB84, 0xD3F7FB84, 0xD3F8FB84, 0xD3F9FB84, 0xD3FAFB84, 0xD3FBFB84, 0xD3FCFB84, 0xD3FDFB84, 0xD3FEFB84, 0xD3FFFB84, 0xD400FB84, 0xD401FB84, 0xD402FB84, 0xD403FB84, + 0xD404FB84, 0xD405FB84, 0xD406FB84, 0xD407FB84, 0xD408FB84, 0xD409FB84, 0xD40AFB84, 0xD40BFB84, 0xD40CFB84, 0xD40DFB84, 0xD40EFB84, 0xD40FFB84, 0xD410FB84, 0xD411FB84, 0xD412FB84, + 0xD413FB84, 0xD414FB84, 0xD415FB84, 0xD416FB84, 0xD417FB84, 0xD418FB84, 0xD419FB84, 0xD41AFB84, 0xD41BFB84, 0xD41CFB84, 0xD41DFB84, 0xD41EFB84, 0xD41FFB84, 0xD420FB84, 0xD421FB84, + 0xD422FB84, 0xD423FB84, 0xD424FB84, 0xD425FB84, 0xD426FB84, 0xD427FB84, 0xD428FB84, 0xD429FB84, 0xD42AFB84, 0xD42BFB84, 0xD42CFB84, 0xD42DFB84, 0xD42EFB84, 0xD42FFB84, 0xD430FB84, + 0xD431FB84, 0xD432FB84, 0xD433FB84, 0xD434FB84, 0xD435FB84, 0xD436FB84, 0xD437FB84, 0xD438FB84, 0xD439FB84, 0xD43AFB84, 0xD43BFB84, 0xD43CFB84, 0xD43DFB84, 0xD43EFB84, 0xD43FFB84, + 0xD440FB84, 0xD441FB84, 0xD442FB84, 0xD443FB84, 0xD444FB84, 0xD445FB84, 0xD446FB84, 0xD447FB84, 0xD448FB84, 0xD449FB84, 0xD44AFB84, 0xD44BFB84, 0xD44CFB84, 0xD44DFB84, 0xD44EFB84, + 0xD44FFB84, 0xD450FB84, 0xD451FB84, 0xD452FB84, 0xD453FB84, 0xD454FB84, 0xD455FB84, 0xD456FB84, 0xD457FB84, 0xD458FB84, 0xD459FB84, 0xD45AFB84, 0xD45BFB84, 0xD45CFB84, 0xD45DFB84, + 0xD45EFB84, 0xD45FFB84, 0xD460FB84, 0xD461FB84, 0xD462FB84, 0xD463FB84, 0xD464FB84, 0xD465FB84, 0xD466FB84, 0xD467FB84, 0xD468FB84, 0xD469FB84, 0xD46AFB84, 0xD46BFB84, 0xD46CFB84, + 0xD46DFB84, 0xD46EFB84, 0xD46FFB84, 0xD470FB84, 0xD471FB84, 0xD472FB84, 0xD473FB84, 0xD474FB84, 0xD475FB84, 0xD476FB84, 0xD477FB84, 0xD478FB84, 0xD479FB84, 0xD47AFB84, 0xD47BFB84, + 0xD47CFB84, 0xD47DFB84, 0xD47EFB84, 0xD47FFB84, 0xD480FB84, 0xD481FB84, 0xD482FB84, 0xD483FB84, 0xD484FB84, 0xD485FB84, 0xD486FB84, 0xD487FB84, 0xD488FB84, 0xD489FB84, 0xD48AFB84, + 0xD48BFB84, 0xD48CFB84, 0xD48DFB84, 0xD48EFB84, 0xD48FFB84, 0xD490FB84, 0xD491FB84, 0xD492FB84, 0xD493FB84, 0xD494FB84, 0xD495FB84, 0xD496FB84, 0xD497FB84, 0xD498FB84, 0xD499FB84, + 0xD49AFB84, 0xD49BFB84, 0xD49CFB84, 0xD49DFB84, 0xD49EFB84, 0xD49FFB84, 0xD4A0FB84, 0xD4A1FB84, 0xD4A2FB84, 0xD4A3FB84, 0xD4A4FB84, 0xD4A5FB84, 0xD4A6FB84, 0xD4A7FB84, 0xD4A8FB84, + 0xD4A9FB84, 0xD4AAFB84, 0xD4ABFB84, 0xD4ACFB84, 0xD4ADFB84, 0xD4AEFB84, 0xD4AFFB84, 0xD4B0FB84, 0xD4B1FB84, 0xD4B2FB84, 0xD4B3FB84, 0xD4B4FB84, 0xD4B5FB84, 0xD4B6FB84, 0xD4B7FB84, + 0xD4B8FB84, 0xD4B9FB84, 0xD4BAFB84, 0xD4BBFB84, 0xD4BCFB84, 0xD4BDFB84, 0xD4BEFB84, 0xD4BFFB84, 0xD4C0FB84, 0xD4C1FB84, 0xD4C2FB84, 0xD4C3FB84, 0xD4C4FB84, 0xD4C5FB84, 0xD4C6FB84, + 0xD4C7FB84, 0xD4C8FB84, 0xD4C9FB84, 0xD4CAFB84, 0xD4CBFB84, 0xD4CCFB84, 0xD4CDFB84, 0xD4CEFB84, 0xD4CFFB84, 0xD4D0FB84, 0xD4D1FB84, 0xD4D2FB84, 0xD4D3FB84, 0xD4D4FB84, 0xD4D5FB84, + 0xD4D6FB84, 0xD4D7FB84, 0xD4D8FB84, 0xD4D9FB84, 0xD4DAFB84, 0xD4DBFB84, 0xD4DCFB84, 0xD4DDFB84, 0xD4DEFB84, 0xD4DFFB84, 0xD4E0FB84, 0xD4E1FB84, 0xD4E2FB84, 0xD4E3FB84, 0xD4E4FB84, + 0xD4E5FB84, 0xD4E6FB84, 0xD4E7FB84, 0xD4E8FB84, 0xD4E9FB84, 0xD4EAFB84, 0xD4EBFB84, 0xD4ECFB84, 0xD4EDFB84, 0xD4EEFB84, 0xD4EFFB84, 0xD4F0FB84, 0xD4F1FB84, 0xD4F2FB84, 0xD4F3FB84, + 0xD4F4FB84, 0xD4F5FB84, 0xD4F6FB84, 0xD4F7FB84, 0xD4F8FB84, 0xD4F9FB84, 0xD4FAFB84, 0xD4FBFB84, 0xD4FCFB84, 0xD4FDFB84, 0xD4FEFB84, 0xD4FFFB84, 0xD500FB84, 0xD501FB84, 0xD502FB84, + 0xD503FB84, 0xD504FB84, 0xD505FB84, 0xD506FB84, 0xD507FB84, 0xD508FB84, 0xD509FB84, 0xD50AFB84, 0xD50BFB84, 0xD50CFB84, 0xD50DFB84, 0xD50EFB84, 0xD50FFB84, 0xD510FB84, 0xD511FB84, + 0xD512FB84, 0xD513FB84, 0xD514FB84, 0xD515FB84, 0xD516FB84, 0xD517FB84, 0xD518FB84, 0xD519FB84, 0xD51AFB84, 0xD51BFB84, 0xD51CFB84, 0xD51DFB84, 0xD51EFB84, 0xD51FFB84, 0xD520FB84, + 0xD521FB84, 0xD522FB84, 0xD523FB84, 0xD524FB84, 0xD525FB84, 0xD526FB84, 0xD527FB84, 0xD528FB84, 0xD529FB84, 0xD52AFB84, 0xD52BFB84, 0xD52CFB84, 0xD52DFB84, 0xD52EFB84, 0xD52FFB84, + 0xD530FB84, 0xD531FB84, 0xD532FB84, 0xD533FB84, 0xD534FB84, 0xD535FB84, 0xD536FB84, 0xD537FB84, 0xD538FB84, 0xD539FB84, 0xD53AFB84, 0xD53BFB84, 0xD53CFB84, 0xD53DFB84, 0xD53EFB84, + 0xD53FFB84, 0xD540FB84, 0xD541FB84, 0xD542FB84, 0xD543FB84, 0xD544FB84, 0xD545FB84, 0xD546FB84, 0xD547FB84, 0xD548FB84, 0xD549FB84, 0xD54AFB84, 0xD54BFB84, 0xD54CFB84, 0xD54DFB84, + 0xD54EFB84, 0xD54FFB84, 0xD550FB84, 0xD551FB84, 0xD552FB84, 0xD553FB84, 0xD554FB84, 0xD555FB84, 0xD556FB84, 0xD557FB84, 0xD558FB84, 0xD559FB84, 0xD55AFB84, 0xD55BFB84, 0xD55CFB84, + 0xD55DFB84, 0xD55EFB84, 0xD55FFB84, 0xD560FB84, 0xD561FB84, 0xD562FB84, 0xD563FB84, 0xD564FB84, 0xD565FB84, 0xD566FB84, 0xD567FB84, 0xD568FB84, 0xD569FB84, 0xD56AFB84, 0xD56BFB84, + 0xD56CFB84, 0xD56DFB84, 0xD56EFB84, 0xD56FFB84, 0xD570FB84, 0xD571FB84, 0xD572FB84, 0xD573FB84, 0xD574FB84, 0xD575FB84, 0xD576FB84, 0xD577FB84, 0xD578FB84, 0xD579FB84, 0xD57AFB84, + 0xD57BFB84, 0xD57CFB84, 0xD57DFB84, 0xD57EFB84, 0xD57FFB84, 0xD580FB84, 0xD581FB84, 0xD582FB84, 0xD583FB84, 0xD584FB84, 0xD585FB84, 0xD586FB84, 0xD587FB84, 0xD588FB84, 0xD589FB84, + 0xD58AFB84, 0xD58BFB84, 0xD58CFB84, 0xD58DFB84, 0xD58EFB84, 0xD58FFB84, 0xD590FB84, 0xD591FB84, 0xD592FB84, 0xD593FB84, 0xD594FB84, 0xD595FB84, 0xD596FB84, 0xD597FB84, 0xD598FB84, + 0xD599FB84, 0xD59AFB84, 0xD59BFB84, 0xD59CFB84, 0xD59DFB84, 0xD59EFB84, 0xD59FFB84, 0xD5A0FB84, 0xD5A1FB84, 0xD5A2FB84, 0xD5A3FB84, 0xD5A4FB84, 0xD5A5FB84, 0xD5A6FB84, 0xD5A7FB84, + 0xD5A8FB84, 0xD5A9FB84, 0xD5AAFB84, 0xD5ABFB84, 0xD5ACFB84, 0xD5ADFB84, 0xD5AEFB84, 0xD5AFFB84, 0xD5B0FB84, 0xD5B1FB84, 0xD5B2FB84, 0xD5B3FB84, 0xD5B4FB84, 0xD5B5FB84, 0xD5B6FB84, + 0xD5B7FB84, 0xD5B8FB84, 0xD5B9FB84, 0xD5BAFB84, 0xD5BBFB84, 0xD5BCFB84, 0xD5BDFB84, 0xD5BEFB84, 0xD5BFFB84, 0xD5C0FB84, 0xD5C1FB84, 0xD5C2FB84, 0xD5C3FB84, 0xD5C4FB84, 0xD5C5FB84, + 0xD5C6FB84, 0xD5C7FB84, 0xD5C8FB84, 0xD5C9FB84, 0xD5CAFB84, 0xD5CBFB84, 0xD5CCFB84, 0xD5CDFB84, 0xD5CEFB84, 0xD5CFFB84, 0xD5D0FB84, 0xD5D1FB84, 0xD5D2FB84, 0xD5D3FB84, 0xD5D4FB84, + 0xD5D5FB84, 0xD5D6FB84, 0xD5D7FB84, 0xD5D8FB84, 0xD5D9FB84, 0xD5DAFB84, 0xD5DBFB84, 0xD5DCFB84, 0xD5DDFB84, 0xD5DEFB84, 0xD5DFFB84, 0xD5E0FB84, 0xD5E1FB84, 0xD5E2FB84, 0xD5E3FB84, + 0xD5E4FB84, 0xD5E5FB84, 0xD5E6FB84, 0xD5E7FB84, 0xD5E8FB84, 0xD5E9FB84, 0xD5EAFB84, 0xD5EBFB84, 0xD5ECFB84, 0xD5EDFB84, 0xD5EEFB84, 0xD5EFFB84, 0xD5F0FB84, 0xD5F1FB84, 0xD5F2FB84, + 0xD5F3FB84, 0xD5F4FB84, 0xD5F5FB84, 0xD5F6FB84, 0xD5F7FB84, 0xD5F8FB84, 0xD5F9FB84, 0xD5FAFB84, 0xD5FBFB84, 0xD5FCFB84, 0xD5FDFB84, 0xD5FEFB84, 0xD5FFFB84, 0xD600FB84, 0xD601FB84, + 0xD602FB84, 0xD603FB84, 0xD604FB84, 0xD605FB84, 0xD606FB84, 0xD607FB84, 0xD608FB84, 0xD609FB84, 0xD60AFB84, 0xD60BFB84, 0xD60CFB84, 0xD60DFB84, 0xD60EFB84, 0xD60FFB84, 0xD610FB84, + 0xD611FB84, 0xD612FB84, 0xD613FB84, 0xD614FB84, 0xD615FB84, 0xD616FB84, 0xD617FB84, 0xD618FB84, 0xD619FB84, 0xD61AFB84, 0xD61BFB84, 0xD61CFB84, 0xD61DFB84, 0xD61EFB84, 0xD61FFB84, + 0xD620FB84, 0xD621FB84, 0xD622FB84, 0xD623FB84, 0xD624FB84, 0xD625FB84, 0xD626FB84, 0xD627FB84, 0xD628FB84, 0xD629FB84, 0xD62AFB84, 0xD62BFB84, 0xD62CFB84, 0xD62DFB84, 0xD62EFB84, + 0xD62FFB84, 0xD630FB84, 0xD631FB84, 0xD632FB84, 0xD633FB84, 0xD634FB84, 0xD635FB84, 0xD636FB84, 0xD637FB84, 0xD638FB84, 0xD639FB84, 0xD63AFB84, 0xD63BFB84, 0xD63CFB84, 0xD63DFB84, + 0xD63EFB84, 0xD63FFB84, 0xD640FB84, 0xD641FB84, 0xD642FB84, 0xD643FB84, 0xD644FB84, 0xD645FB84, 0xD646FB84, 0xD647FB84, 0xD648FB84, 0xD649FB84, 0xD64AFB84, 0xD64BFB84, 0xD64CFB84, + 0xD64DFB84, 0xD64EFB84, 0xD64FFB84, 0xD650FB84, 0xD651FB84, 0xD652FB84, 0xD653FB84, 0xD654FB84, 0xD655FB84, 0xD656FB84, 0xD657FB84, 0xD658FB84, 0xD659FB84, 0xD65AFB84, 0xD65BFB84, + 0xD65CFB84, 0xD65DFB84, 0xD65EFB84, 0xD65FFB84, 0xD660FB84, 0xD661FB84, 0xD662FB84, 0xD663FB84, 0xD664FB84, 0xD665FB84, 0xD666FB84, 0xD667FB84, 0xD668FB84, 0xD669FB84, 0xD66AFB84, + 0xD66BFB84, 0xD66CFB84, 0xD66DFB84, 0xD66EFB84, 0xD66FFB84, 0xD670FB84, 0xD671FB84, 0xD672FB84, 0xD673FB84, 0xD674FB84, 0xD675FB84, 0xD676FB84, 0xD677FB84, 0xD678FB84, 0xD679FB84, + 0xD67AFB84, 0xD67BFB84, 0xD67CFB84, 0xD67DFB84, 0xD67EFB84, 0xD67FFB84, 0xD680FB84, 0xD681FB84, 0xD682FB84, 0xD683FB84, 0xD684FB84, 0xD685FB84, 0xD686FB84, 0xD687FB84, 0xD688FB84, + 0xD689FB84, 0xD68AFB84, 0xD68BFB84, 0xD68CFB84, 0xD68DFB84, 0xD68EFB84, 0xD68FFB84, 0xD690FB84, 0xD691FB84, 0xD692FB84, 0xD693FB84, 0xD694FB84, 0xD695FB84, 0xD696FB84, 0xD697FB84, + 0xD698FB84, 0xD699FB84, 0xD69AFB84, 0xD69BFB84, 0xD69CFB84, 0xD69DFB84, 0xD69EFB84, 0xD69FFB84, 0xD6A0FB84, 0xD6A1FB84, 0xD6A2FB84, 0xD6A3FB84, 0xD6A4FB84, 0xD6A5FB84, 0xD6A6FB84, + 0xD6A7FB84, 0xD6A8FB84, 0xD6A9FB84, 0xD6AAFB84, 0xD6ABFB84, 0xD6ACFB84, 0xD6ADFB84, 0xD6AEFB84, 0xD6AFFB84, 0xD6B0FB84, 0xD6B1FB84, 0xD6B2FB84, 0xD6B3FB84, 0xD6B4FB84, 0xD6B5FB84, + 0xD6B6FB84, 0xD6B7FB84, 0xD6B8FB84, 0xD6B9FB84, 0xD6BAFB84, 0xD6BBFB84, 0xD6BCFB84, 0xD6BDFB84, 0xD6BEFB84, 0xD6BFFB84, 0xD6C0FB84, 0xD6C1FB84, 0xD6C2FB84, 0xD6C3FB84, 0xD6C4FB84, + 0xD6C5FB84, 0xD6C6FB84, 0xD6C7FB84, 0xD6C8FB84, 0xD6C9FB84, 0xD6CAFB84, 0xD6CBFB84, 0xD6CCFB84, 0xD6CDFB84, 0xD6CEFB84, 0xD6CFFB84, 0xD6D0FB84, 0xD6D1FB84, 0xD6D2FB84, 0xD6D3FB84, + 0xD6D4FB84, 0xD6D5FB84, 0xD6D6FB84, 0xD6D7FB84, 0xD6D8FB84, 0xD6D9FB84, 0xD6DAFB84, 0xD6DBFB84, 0xD6DCFB84, 0xD6DDFB84, 0xD6DEFB84, 0xD6DFFB84, 0xD6E0FB84, 0xD6E1FB84, 0xD6E2FB84, + 0xD6E3FB84, 0xD6E4FB84, 0xD6E5FB84, 0xD6E6FB84, 0xD6E7FB84, 0xD6E8FB84, 0xD6E9FB84, 0xD6EAFB84, 0xD6EBFB84, 0xD6ECFB84, 0xD6EDFB84, 0xD6EEFB84, 0xD6EFFB84, 0xD6F0FB84, 0xD6F1FB84, + 0xD6F2FB84, 0xD6F3FB84, 0xD6F4FB84, 0xD6F5FB84, 0xD6F6FB84, 0xD6F7FB84, 0xD6F8FB84, 0xD6F9FB84, 0xD6FAFB84, 0xD6FBFB84, 0xD6FCFB84, 0xD6FDFB84, 0xD6FEFB84, 0xD6FFFB84, 0xD700FB84, + 0xD701FB84, 0xD702FB84, 0xD703FB84, 0xD704FB84, 0xD705FB84, 0xD706FB84, 0xD707FB84, 0xD708FB84, 0xD709FB84, 0xD70AFB84, 0xD70BFB84, 0xD70CFB84, 0xD70DFB84, 0xD70EFB84, 0xD70FFB84, + 0xD710FB84, 0xD711FB84, 0xD712FB84, 0xD713FB84, 0xD714FB84, 0xD715FB84, 0xD716FB84, 0xD717FB84, 0xD718FB84, 0xD719FB84, 0xD71AFB84, 0xD71BFB84, 0xD71CFB84, 0xD71DFB84, 0xD71EFB84, + 0xD71FFB84, 0xD720FB84, 0xD721FB84, 0xD722FB84, 0xD723FB84, 0xD724FB84, 0xD725FB84, 0xD726FB84, 0xD727FB84, 0xD728FB84, 0xD729FB84, 0xD72AFB84, 0xD72BFB84, 0xD72CFB84, 0xD72DFB84, + 0xD72EFB84, 0xD72FFB84, 0xD730FB84, 0xD731FB84, 0xD732FB84, 0xD733FB84, 0xD734FB84, 0xD735FB84, 0xD736FB84, 0xD737FB84, 0xD738FB84, 0xD739FB84, 0xD73AFB84, 0xD73BFB84, 0xD73CFB84, + 0xD73DFB84, 0xD73EFB84, 0xD73FFB84, 0xD740FB84, 0xD741FB84, 0xD742FB84, 0xD743FB84, 0xD744FB84, 0xD745FB84, 0xD746FB84, 0xD747FB84, 0xD748FB84, 0xD749FB84, 0xD74AFB84, 0xD74BFB84, + 0xD74CFB84, 0xD74DFB84, 0xD74EFB84, 0xD74FFB84, 0xD750FB84, 0xD751FB84, 0xD752FB84, 0xD753FB84, 0xD754FB84, 0xD755FB84, 0xD756FB84, 0xD757FB84, 0xD758FB84, 0xD759FB84, 0xD75AFB84, + 0xD75BFB84, 0xD75CFB84, 0xD75DFB84, 0xD75EFB84, 0xD75FFB84, 0xD760FB84, 0xD761FB84, 0xD762FB84, 0xD763FB84, 0xD764FB84, 0xD765FB84, 0xD766FB84, 0xD767FB84, 0xD768FB84, 0xD769FB84, + 0xD76AFB84, 0xD76BFB84, 0xD76CFB84, 0xD76DFB84, 0xD76EFB84, 0xD76FFB84, 0xD770FB84, 0xD771FB84, 0xD772FB84, 0xD773FB84, 0xD774FB84, 0xD775FB84, 0xD776FB84, 0xD777FB84, 0xD778FB84, + 0xD779FB84, 0xD77AFB84, 0xD77BFB84, 0xD77CFB84, 0xD77DFB84, 0xD77EFB84, 0xD77FFB84, 0xD780FB84, 0xD781FB84, 0xD782FB84, 0xD783FB84, 0xD784FB84, 0xD785FB84, 0xD786FB84, 0xD787FB84, + 0xD788FB84, 0xD789FB84, 0xD78AFB84, 0xD78BFB84, 0xD78CFB84, 0xD78DFB84, 0xD78EFB84, 0xD78FFB84, 0xD790FB84, 0xD791FB84, 0xD792FB84, 0xD793FB84, 0xD794FB84, 0xD795FB84, 0xD796FB84, + 0xD797FB84, 0xD798FB84, 0xD799FB84, 0xD79AFB84, 0xD79BFB84, 0xD79CFB84, 0xD79DFB84, 0xD79EFB84, 0xD79FFB84, 0xD7A0FB84, 0xD7A1FB84, 0xD7A2FB84, 0xD7A3FB84, 0xD7A4FB84, 0xD7A5FB84, + 0xD7A6FB84, 0xD7A7FB84, 0xD7A8FB84, 0xD7A9FB84, 0xD7AAFB84, 0xD7ABFB84, 0xD7ACFB84, 0xD7ADFB84, 0xD7AEFB84, 0xD7AFFB84, 0xD7B0FB84, 0xD7B1FB84, 0xD7B2FB84, 0xD7B3FB84, 0xD7B4FB84, + 0xD7B5FB84, 0xD7B6FB84, 0xD7B7FB84, 0xD7B8FB84, 0xD7B9FB84, 0xD7BAFB84, 0xD7BBFB84, 0xD7BCFB84, 0xD7BDFB84, 0xD7BEFB84, 0xD7BFFB84, 0xD7C0FB84, 0xD7C1FB84, 0xD7C2FB84, 0xD7C3FB84, + 0xD7C4FB84, 0xD7C5FB84, 0xD7C6FB84, 0xD7C7FB84, 0xD7C8FB84, 0xD7C9FB84, 0xD7CAFB84, 0xD7CBFB84, 0xD7CCFB84, 0xD7CDFB84, 0xD7CEFB84, 0xD7CFFB84, 0xD7D0FB84, 0xD7D1FB84, 0xD7D2FB84, + 0xD7D3FB84, 0xD7D4FB84, 0xD7D5FB84, 0xD7D6FB84, 0xD7D7FB84, 0xD7D8FB84, 0xD7D9FB84, 0xD7DAFB84, 0xD7DBFB84, 0xD7DCFB84, 0xD7DDFB84, 0xD7DEFB84, 0xD7DFFB84, 0xD7E0FB84, 0xD7E1FB84, + 0xD7E2FB84, 0xD7E3FB84, 0xD7E4FB84, 0xD7E5FB84, 0xD7E6FB84, 0xD7E7FB84, 0xD7E8FB84, 0xD7E9FB84, 0xD7EAFB84, 0xD7EBFB84, 0xD7ECFB84, 0xD7EDFB84, 0xD7EEFB84, 0xD7EFFB84, 0xD7F0FB84, + 0xD7F1FB84, 0xD7F2FB84, 0xD7F3FB84, 0xD7F4FB84, 0xD7F5FB84, 0xD7F6FB84, 0xD7F7FB84, 0xD7F8FB84, 0xD7F9FB84, 0xD7FAFB84, 0xD7FBFB84, 0xD7FCFB84, 0xD7FDFB84, 0xD7FEFB84, 0xD7FFFB84, + 0xD800FB84, 0xD801FB84, 0xD802FB84, 0xD803FB84, 0xD804FB84, 0xD805FB84, 0xD806FB84, 0xD807FB84, 0xD808FB84, 0xD809FB84, 0xD80AFB84, 0xD80BFB84, 0xD80CFB84, 0xD80DFB84, 0xD80EFB84, + 0xD80FFB84, 0xD810FB84, 0xD811FB84, 0xD812FB84, 0xD813FB84, 0xD814FB84, 0xD815FB84, 0xD816FB84, 0xD817FB84, 0xD818FB84, 0xD819FB84, 0xD81AFB84, 0xD81BFB84, 0xD81CFB84, 0xD81DFB84, + 0xD81EFB84, 0xD81FFB84, 0xD820FB84, 0xD821FB84, 0xD822FB84, 0xD823FB84, 0xD824FB84, 0xD825FB84, 0xD826FB84, 0xD827FB84, 0xD828FB84, 0xD829FB84, 0xD82AFB84, 0xD82BFB84, 0xD82CFB84, + 0xD82DFB84, 0xD82EFB84, 0xD82FFB84, 0xD830FB84, 0xD831FB84, 0xD832FB84, 0xD833FB84, 0xD834FB84, 0xD835FB84, 0xD836FB84, 0xD837FB84, 0xD838FB84, 0xD839FB84, 0xD83AFB84, 0xD83BFB84, + 0xD83CFB84, 0xD83DFB84, 0xD83EFB84, 0xD83FFB84, 0xD840FB84, 0xD841FB84, 0xD842FB84, 0xD843FB84, 0xD844FB84, 0xD845FB84, 0xD846FB84, 0xD847FB84, 0xD848FB84, 0xD849FB84, 0xD84AFB84, + 0xD84BFB84, 0xD84CFB84, 0xD84DFB84, 0xD84EFB84, 0xD84FFB84, 0xD850FB84, 0xD851FB84, 0xD852FB84, 0xD853FB84, 0xD854FB84, 0xD855FB84, 0xD856FB84, 0xD857FB84, 0xD858FB84, 0xD859FB84, + 0xD85AFB84, 0xD85BFB84, 0xD85CFB84, 0xD85DFB84, 0xD85EFB84, 0xD85FFB84, 0xD860FB84, 0xD861FB84, 0xD862FB84, 0xD863FB84, 0xD864FB84, 0xD865FB84, 0xD866FB84, 0xD867FB84, 0xD868FB84, + 0xD869FB84, 0xD86AFB84, 0xD86BFB84, 0xD86CFB84, 0xD86DFB84, 0xD86EFB84, 0xD86FFB84, 0xD870FB84, 0xD871FB84, 0xD872FB84, 0xD873FB84, 0xD874FB84, 0xD875FB84, 0xD876FB84, 0xD877FB84, + 0xD878FB84, 0xD879FB84, 0xD87AFB84, 0xD87BFB84, 0xD87CFB84, 0xD87DFB84, 0xD87EFB84, 0xD87FFB84, 0xD880FB84, 0xD881FB84, 0xD882FB84, 0xD883FB84, 0xD884FB84, 0xD885FB84, 0xD886FB84, + 0xD887FB84, 0xD888FB84, 0xD889FB84, 0xD88AFB84, 0xD88BFB84, 0xD88CFB84, 0xD88DFB84, 0xD88EFB84, 0xD88FFB84, 0xD890FB84, 0xD891FB84, 0xD892FB84, 0xD893FB84, 0xD894FB84, 0xD895FB84, + 0xD896FB84, 0xD897FB84, 0xD898FB84, 0xD899FB84, 0xD89AFB84, 0xD89BFB84, 0xD89CFB84, 0xD89DFB84, 0xD89EFB84, 0xD89FFB84, 0xD8A0FB84, 0xD8A1FB84, 0xD8A2FB84, 0xD8A3FB84, 0xD8A4FB84, + 0xD8A5FB84, 0xD8A6FB84, 0xD8A7FB84, 0xD8A8FB84, 0xD8A9FB84, 0xD8AAFB84, 0xD8ABFB84, 0xD8ACFB84, 0xD8ADFB84, 0xD8AEFB84, 0xD8AFFB84, 0xD8B0FB84, 0xD8B1FB84, 0xD8B2FB84, 0xD8B3FB84, + 0xD8B4FB84, 0xD8B5FB84, 0xD8B6FB84, 0xD8B7FB84, 0xD8B8FB84, 0xD8B9FB84, 0xD8BAFB84, 0xD8BBFB84, 0xD8BCFB84, 0xD8BDFB84, 0xD8BEFB84, 0xD8BFFB84, 0xD8C0FB84, 0xD8C1FB84, 0xD8C2FB84, + 0xD8C3FB84, 0xD8C4FB84, 0xD8C5FB84, 0xD8C6FB84, 0xD8C7FB84, 0xD8C8FB84, 0xD8C9FB84, 0xD8CAFB84, 0xD8CBFB84, 0xD8CCFB84, 0xD8CDFB84, 0xD8CEFB84, 0xD8CFFB84, 0xD8D0FB84, 0xD8D1FB84, + 0xD8D2FB84, 0xD8D3FB84, 0xD8D4FB84, 0xD8D5FB84, 0xD8D6FB84, 0xD8D7FB84, 0xD8D8FB84, 0xD8D9FB84, 0xD8DAFB84, 0xD8DBFB84, 0xD8DCFB84, 0xD8DDFB84, 0xD8DEFB84, 0xD8DFFB84, 0xD8E0FB84, + 0xD8E1FB84, 0xD8E2FB84, 0xD8E3FB84, 0xD8E4FB84, 0xD8E5FB84, 0xD8E6FB84, 0xD8E7FB84, 0xD8E8FB84, 0xD8E9FB84, 0xD8EAFB84, 0xD8EBFB84, 0xD8ECFB84, 0xD8EDFB84, 0xD8EEFB84, 0xD8EFFB84, + 0xD8F0FB84, 0xD8F1FB84, 0xD8F2FB84, 0xD8F3FB84, 0xD8F4FB84, 0xD8F5FB84, 0xD8F6FB84, 0xD8F7FB84, 0xD8F8FB84, 0xD8F9FB84, 0xD8FAFB84, 0xD8FBFB84, 0xD8FCFB84, 0xD8FDFB84, 0xD8FEFB84, + 0xD8FFFB84, 0xD900FB84, 0xD901FB84, 0xD902FB84, 0xD903FB84, 0xD904FB84, 0xD905FB84, 0xD906FB84, 0xD907FB84, 0xD908FB84, 0xD909FB84, 0xD90AFB84, 0xD90BFB84, 0xD90CFB84, 0xD90DFB84, + 0xD90EFB84, 0xD90FFB84, 0xD910FB84, 0xD911FB84, 0xD912FB84, 0xD913FB84, 0xD914FB84, 0xD915FB84, 0xD916FB84, 0xD917FB84, 0xD918FB84, 0xD919FB84, 0xD91AFB84, 0xD91BFB84, 0xD91CFB84, + 0xD91DFB84, 0xD91EFB84, 0xD91FFB84, 0xD920FB84, 0xD921FB84, 0xD922FB84, 0xD923FB84, 0xD924FB84, 0xD925FB84, 0xD926FB84, 0xD927FB84, 0xD928FB84, 0xD929FB84, 0xD92AFB84, 0xD92BFB84, + 0xD92CFB84, 0xD92DFB84, 0xD92EFB84, 0xD92FFB84, 0xD930FB84, 0xD931FB84, 0xD932FB84, 0xD933FB84, 0xD934FB84, 0xD935FB84, 0xD936FB84, 0xD937FB84, 0xD938FB84, 0xD939FB84, 0xD93AFB84, + 0xD93BFB84, 0xD93CFB84, 0xD93DFB84, 0xD93EFB84, 0xD93FFB84, 0xD940FB84, 0xD941FB84, 0xD942FB84, 0xD943FB84, 0xD944FB84, 0xD945FB84, 0xD946FB84, 0xD947FB84, 0xD948FB84, 0xD949FB84, + 0xD94AFB84, 0xD94BFB84, 0xD94CFB84, 0xD94DFB84, 0xD94EFB84, 0xD94FFB84, 0xD950FB84, 0xD951FB84, 0xD952FB84, 0xD953FB84, 0xD954FB84, 0xD955FB84, 0xD956FB84, 0xD957FB84, 0xD958FB84, + 0xD959FB84, 0xD95AFB84, 0xD95BFB84, 0xD95CFB84, 0xD95DFB84, 0xD95EFB84, 0xD95FFB84, 0xD960FB84, 0xD961FB84, 0xD962FB84, 0xD963FB84, 0xD964FB84, 0xD965FB84, 0xD966FB84, 0xD967FB84, + 0xD968FB84, 0xD969FB84, 0xD96AFB84, 0xD96BFB84, 0xD96CFB84, 0xD96DFB84, 0xD96EFB84, 0xD96FFB84, 0xD970FB84, 0xD971FB84, 0xD972FB84, 0xD973FB84, 0xD974FB84, 0xD975FB84, 0xD976FB84, + 0xD977FB84, 0xD978FB84, 0xD979FB84, 0xD97AFB84, 0xD97BFB84, 0xD97CFB84, 0xD97DFB84, 0xD97EFB84, 0xD97FFB84, 0xD980FB84, 0xD981FB84, 0xD982FB84, 0xD983FB84, 0xD984FB84, 0xD985FB84, + 0xD986FB84, 0xD987FB84, 0xD988FB84, 0xD989FB84, 0xD98AFB84, 0xD98BFB84, 0xD98CFB84, 0xD98DFB84, 0xD98EFB84, 0xD98FFB84, 0xD990FB84, 0xD991FB84, 0xD992FB84, 0xD993FB84, 0xD994FB84, + 0xD995FB84, 0xD996FB84, 0xD997FB84, 0xD998FB84, 0xD999FB84, 0xD99AFB84, 0xD99BFB84, 0xD99CFB84, 0xD99DFB84, 0xD99EFB84, 0xD99FFB84, 0xD9A0FB84, 0xD9A1FB84, 0xD9A2FB84, 0xD9A3FB84, + 0xD9A4FB84, 0xD9A5FB84, 0xD9A6FB84, 0xD9A7FB84, 0xD9A8FB84, 0xD9A9FB84, 0xD9AAFB84, 0xD9ABFB84, 0xD9ACFB84, 0xD9ADFB84, 0xD9AEFB84, 0xD9AFFB84, 0xD9B0FB84, 0xD9B1FB84, 0xD9B2FB84, + 0xD9B3FB84, 0xD9B4FB84, 0xD9B5FB84, 0xD9B6FB84, 0xD9B7FB84, 0xD9B8FB84, 0xD9B9FB84, 0xD9BAFB84, 0xD9BBFB84, 0xD9BCFB84, 0xD9BDFB84, 0xD9BEFB84, 0xD9BFFB84, 0xD9C0FB84, 0xD9C1FB84, + 0xD9C2FB84, 0xD9C3FB84, 0xD9C4FB84, 0xD9C5FB84, 0xD9C6FB84, 0xD9C7FB84, 0xD9C8FB84, 0xD9C9FB84, 0xD9CAFB84, 0xD9CBFB84, 0xD9CCFB84, 0xD9CDFB84, 0xD9CEFB84, 0xD9CFFB84, 0xD9D0FB84, + 0xD9D1FB84, 0xD9D2FB84, 0xD9D3FB84, 0xD9D4FB84, 0xD9D5FB84, 0xD9D6FB84, 0xD9D7FB84, 0xD9D8FB84, 0xD9D9FB84, 0xD9DAFB84, 0xD9DBFB84, 0xD9DCFB84, 0xD9DDFB84, 0xD9DEFB84, 0xD9DFFB84, + 0xD9E0FB84, 0xD9E1FB84, 0xD9E2FB84, 0xD9E3FB84, 0xD9E4FB84, 0xD9E5FB84, 0xD9E6FB84, 0xD9E7FB84, 0xD9E8FB84, 0xD9E9FB84, 0xD9EAFB84, 0xD9EBFB84, 0xD9ECFB84, 0xD9EDFB84, 0xD9EEFB84, + 0xD9EFFB84, 0xD9F0FB84, 0xD9F1FB84, 0xD9F2FB84, 0xD9F3FB84, 0xD9F4FB84, 0xD9F5FB84, 0xD9F6FB84, 0xD9F7FB84, 0xD9F8FB84, 0xD9F9FB84, 0xD9FAFB84, 0xD9FBFB84, 0xD9FCFB84, 0xD9FDFB84, + 0xD9FEFB84, 0xD9FFFB84, 0xDA00FB84, 0xDA01FB84, 0xDA02FB84, 0xDA03FB84, 0xDA04FB84, 0xDA05FB84, 0xDA06FB84, 0xDA07FB84, 0xDA08FB84, 0xDA09FB84, 0xDA0AFB84, 0xDA0BFB84, 0xDA0CFB84, + 0xDA0DFB84, 0xDA0EFB84, 0xDA0FFB84, 0xDA10FB84, 0xDA11FB84, 0xDA12FB84, 0xDA13FB84, 0xDA14FB84, 0xDA15FB84, 0xDA16FB84, 0xDA17FB84, 0xDA18FB84, 0xDA19FB84, 0xDA1AFB84, 0xDA1BFB84, + 0xDA1CFB84, 0xDA1DFB84, 0xDA1EFB84, 0xDA1FFB84, 0xDA20FB84, 0xDA21FB84, 0xDA22FB84, 0xDA23FB84, 0xDA24FB84, 0xDA25FB84, 0xDA26FB84, 0xDA27FB84, 0xDA28FB84, 0xDA29FB84, 0xDA2AFB84, + 0xDA2BFB84, 0xDA2CFB84, 0xDA2DFB84, 0xDA2EFB84, 0xDA2FFB84, 0xDA30FB84, 0xDA31FB84, 0xDA32FB84, 0xDA33FB84, 0xDA34FB84, 0xDA35FB84, 0xDA36FB84, 0xDA37FB84, 0xDA38FB84, 0xDA39FB84, + 0xDA3AFB84, 0xDA3BFB84, 0xDA3CFB84, 0xDA3DFB84, 0xDA3EFB84, 0xDA3FFB84, 0xDA40FB84, 0xDA41FB84, 0xDA42FB84, 0xDA43FB84, 0xDA44FB84, 0xDA45FB84, 0xDA46FB84, 0xDA47FB84, 0xDA48FB84, + 0xDA49FB84, 0xDA4AFB84, 0xDA4BFB84, 0xDA4CFB84, 0xDA4DFB84, 0xDA4EFB84, 0xDA4FFB84, 0xDA50FB84, 0xDA51FB84, 0xDA52FB84, 0xDA53FB84, 0xDA54FB84, 0xDA55FB84, 0xDA56FB84, 0xDA57FB84, + 0xDA58FB84, 0xDA59FB84, 0xDA5AFB84, 0xDA5BFB84, 0xDA5CFB84, 0xDA5DFB84, 0xDA5EFB84, 0xDA5FFB84, 0xDA60FB84, 0xDA61FB84, 0xDA62FB84, 0xDA63FB84, 0xDA64FB84, 0xDA65FB84, 0xDA66FB84, + 0xDA67FB84, 0xDA68FB84, 0xDA69FB84, 0xDA6AFB84, 0xDA6BFB84, 0xDA6CFB84, 0xDA6DFB84, 0xDA6EFB84, 0xDA6FFB84, 0xDA70FB84, 0xDA71FB84, 0xDA72FB84, 0xDA73FB84, 0xDA74FB84, 0xDA75FB84, + 0xDA76FB84, 0xDA77FB84, 0xDA78FB84, 0xDA79FB84, 0xDA7AFB84, 0xDA7BFB84, 0xDA7CFB84, 0xDA7DFB84, 0xDA7EFB84, 0xDA7FFB84, 0xDA80FB84, 0xDA81FB84, 0xDA82FB84, 0xDA83FB84, 0xDA84FB84, + 0xDA85FB84, 0xDA86FB84, 0xDA87FB84, 0xDA88FB84, 0xDA89FB84, 0xDA8AFB84, 0xDA8BFB84, 0xDA8CFB84, 0xDA8DFB84, 0xDA8EFB84, 0xDA8FFB84, 0xDA90FB84, 0xDA91FB84, 0xDA92FB84, 0xDA93FB84, + 0xDA94FB84, 0xDA95FB84, 0xDA96FB84, 0xDA97FB84, 0xDA98FB84, 0xDA99FB84, 0xDA9AFB84, 0xDA9BFB84, 0xDA9CFB84, 0xDA9DFB84, 0xDA9EFB84, 0xDA9FFB84, 0xDAA0FB84, 0xDAA1FB84, 0xDAA2FB84, + 0xDAA3FB84, 0xDAA4FB84, 0xDAA5FB84, 0xDAA6FB84, 0xDAA7FB84, 0xDAA8FB84, 0xDAA9FB84, 0xDAAAFB84, 0xDAABFB84, 0xDAACFB84, 0xDAADFB84, 0xDAAEFB84, 0xDAAFFB84, 0xDAB0FB84, 0xDAB1FB84, + 0xDAB2FB84, 0xDAB3FB84, 0xDAB4FB84, 0xDAB5FB84, 0xDAB6FB84, 0xDAB7FB84, 0xDAB8FB84, 0xDAB9FB84, 0xDABAFB84, 0xDABBFB84, 0xDABCFB84, 0xDABDFB84, 0xDABEFB84, 0xDABFFB84, 0xDAC0FB84, + 0xDAC1FB84, 0xDAC2FB84, 0xDAC3FB84, 0xDAC4FB84, 0xDAC5FB84, 0xDAC6FB84, 0xDAC7FB84, 0xDAC8FB84, 0xDAC9FB84, 0xDACAFB84, 0xDACBFB84, 0xDACCFB84, 0xDACDFB84, 0xDACEFB84, 0xDACFFB84, + 0xDAD0FB84, 0xDAD1FB84, 0xDAD2FB84, 0xDAD3FB84, 0xDAD4FB84, 0xDAD5FB84, 0xDAD6FB84, 0xDAD7FB84, 0xDAD8FB84, 0xDAD9FB84, 0xDADAFB84, 0xDADBFB84, 0xDADCFB84, 0xDADDFB84, 0xDADEFB84, + 0xDADFFB84, 0xDAE0FB84, 0xDAE1FB84, 0xDAE2FB84, 0xDAE3FB84, 0xDAE4FB84, 0xDAE5FB84, 0xDAE6FB84, 0xDAE7FB84, 0xDAE8FB84, 0xDAE9FB84, 0xDAEAFB84, 0xDAEBFB84, 0xDAECFB84, 0xDAEDFB84, + 0xDAEEFB84, 0xDAEFFB84, 0xDAF0FB84, 0xDAF1FB84, 0xDAF2FB84, 0xDAF3FB84, 0xDAF4FB84, 0xDAF5FB84, 0xDAF6FB84, 0xDAF7FB84, 0xDAF8FB84, 0xDAF9FB84, 0xDAFAFB84, 0xDAFBFB84, 0xDAFCFB84, + 0xDAFDFB84, 0xDAFEFB84, 0xDAFFFB84, 0xDB00FB84, 0xDB01FB84, 0xDB02FB84, 0xDB03FB84, 0xDB04FB84, 0xDB05FB84, 0xDB06FB84, 0xDB07FB84, 0xDB08FB84, 0xDB09FB84, 0xDB0AFB84, 0xDB0BFB84, + 0xDB0CFB84, 0xDB0DFB84, 0xDB0EFB84, 0xDB0FFB84, 0xDB10FB84, 0xDB11FB84, 0xDB12FB84, 0xDB13FB84, 0xDB14FB84, 0xDB15FB84, 0xDB16FB84, 0xDB17FB84, 0xDB18FB84, 0xDB19FB84, 0xDB1AFB84, + 0xDB1BFB84, 0xDB1CFB84, 0xDB1DFB84, 0xDB1EFB84, 0xDB1FFB84, 0xDB20FB84, 0xDB21FB84, 0xDB22FB84, 0xDB23FB84, 0xDB24FB84, 0xDB25FB84, 0xDB26FB84, 0xDB27FB84, 0xDB28FB84, 0xDB29FB84, + 0xDB2AFB84, 0xDB2BFB84, 0xDB2CFB84, 0xDB2DFB84, 0xDB2EFB84, 0xDB2FFB84, 0xDB30FB84, 0xDB31FB84, 0xDB32FB84, 0xDB33FB84, 0xDB34FB84, 0xDB35FB84, 0xDB36FB84, 0xDB37FB84, 0xDB38FB84, + 0xDB39FB84, 0xDB3AFB84, 0xDB3BFB84, 0xDB3CFB84, 0xDB3DFB84, 0xDB3EFB84, 0xDB3FFB84, 0xDB40FB84, 0xDB41FB84, 0xDB42FB84, 0xDB43FB84, 0xDB44FB84, 0xDB45FB84, 0xDB46FB84, 0xDB47FB84, + 0xDB48FB84, 0xDB49FB84, 0xDB4AFB84, 0xDB4BFB84, 0xDB4CFB84, 0xDB4DFB84, 0xDB4EFB84, 0xDB4FFB84, 0xDB50FB84, 0xDB51FB84, 0xDB52FB84, 0xDB53FB84, 0xDB54FB84, 0xDB55FB84, 0xDB56FB84, + 0xDB57FB84, 0xDB58FB84, 0xDB59FB84, 0xDB5AFB84, 0xDB5BFB84, 0xDB5CFB84, 0xDB5DFB84, 0xDB5EFB84, 0xDB5FFB84, 0xDB60FB84, 0xDB61FB84, 0xDB62FB84, 0xDB63FB84, 0xDB64FB84, 0xDB65FB84, + 0xDB66FB84, 0xDB67FB84, 0xDB68FB84, 0xDB69FB84, 0xDB6AFB84, 0xDB6BFB84, 0xDB6CFB84, 0xDB6DFB84, 0xDB6EFB84, 0xDB6FFB84, 0xDB70FB84, 0xDB71FB84, 0xDB72FB84, 0xDB73FB84, 0xDB74FB84, + 0xDB75FB84, 0xDB76FB84, 0xDB77FB84, 0xDB78FB84, 0xDB79FB84, 0xDB7AFB84, 0xDB7BFB84, 0xDB7CFB84, 0xDB7DFB84, 0xDB7EFB84, 0xDB7FFB84, 0xDB80FB84, 0xDB81FB84, 0xDB82FB84, 0xDB83FB84, + 0xDB84FB84, 0xDB85FB84, 0xDB86FB84, 0xDB87FB84, 0xDB88FB84, 0xDB89FB84, 0xDB8AFB84, 0xDB8BFB84, 0xDB8CFB84, 0xDB8DFB84, 0xDB8EFB84, 0xDB8FFB84, 0xDB90FB84, 0xDB91FB84, 0xDB92FB84, + 0xDB93FB84, 0xDB94FB84, 0xDB95FB84, 0xDB96FB84, 0xDB97FB84, 0xDB98FB84, 0xDB99FB84, 0xDB9AFB84, 0xDB9BFB84, 0xDB9CFB84, 0xDB9DFB84, 0xDB9EFB84, 0xDB9FFB84, 0xDBA0FB84, 0xDBA1FB84, + 0xDBA2FB84, 0xDBA3FB84, 0xDBA4FB84, 0xDBA5FB84, 0xDBA6FB84, 0xDBA7FB84, 0xDBA8FB84, 0xDBA9FB84, 0xDBAAFB84, 0xDBABFB84, 0xDBACFB84, 0xDBADFB84, 0xDBAEFB84, 0xDBAFFB84, 0xDBB0FB84, + 0xDBB1FB84, 0xDBB2FB84, 0xDBB3FB84, 0xDBB4FB84, 0xDBB5FB84, 0xDBB6FB84, 0xDBB7FB84, 0xDBB8FB84, 0xDBB9FB84, 0xDBBAFB84, 0xDBBBFB84, 0xDBBCFB84, 0xDBBDFB84, 0xDBBEFB84, 0xDBBFFB84, + 0xDBC0FB84, 0xDBC1FB84, 0xDBC2FB84, 0xDBC3FB84, 0xDBC4FB84, 0xDBC5FB84, 0xDBC6FB84, 0xDBC7FB84, 0xDBC8FB84, 0xDBC9FB84, 0xDBCAFB84, 0xDBCBFB84, 0xDBCCFB84, 0xDBCDFB84, 0xDBCEFB84, + 0xDBCFFB84, 0xDBD0FB84, 0xDBD1FB84, 0xDBD2FB84, 0xDBD3FB84, 0xDBD4FB84, 0xDBD5FB84, 0xDBD6FB84, 0xDBD7FB84, 0xDBD8FB84, 0xDBD9FB84, 0xDBDAFB84, 0xDBDBFB84, 0xDBDCFB84, 0xDBDDFB84, + 0xDBDEFB84, 0xDBDFFB84, 0xDBE0FB84, 0xDBE1FB84, 0xDBE2FB84, 0xDBE3FB84, 0xDBE4FB84, 0xDBE5FB84, 0xDBE6FB84, 0xDBE7FB84, 0xDBE8FB84, 0xDBE9FB84, 0xDBEAFB84, 0xDBEBFB84, 0xDBECFB84, + 0xDBEDFB84, 0xDBEEFB84, 0xDBEFFB84, 0xDBF0FB84, 0xDBF1FB84, 0xDBF2FB84, 0xDBF3FB84, 0xDBF4FB84, 0xDBF5FB84, 0xDBF6FB84, 0xDBF7FB84, 0xDBF8FB84, 0xDBF9FB84, 0xDBFAFB84, 0xDBFBFB84, + 0xDBFCFB84, 0xDBFDFB84, 0xDBFEFB84, 0xDBFFFB84, 0xDC00FB84, 0xDC01FB84, 0xDC02FB84, 0xDC03FB84, 0xDC04FB84, 0xDC05FB84, 0xDC06FB84, 0xDC07FB84, 0xDC08FB84, 0xDC09FB84, 0xDC0AFB84, + 0xDC0BFB84, 0xDC0CFB84, 0xDC0DFB84, 0xDC0EFB84, 0xDC0FFB84, 0xDC10FB84, 0xDC11FB84, 0xDC12FB84, 0xDC13FB84, 0xDC14FB84, 0xDC15FB84, 0xDC16FB84, 0xDC17FB84, 0xDC18FB84, 0xDC19FB84, + 0xDC1AFB84, 0xDC1BFB84, 0xDC1CFB84, 0xDC1DFB84, 0xDC1EFB84, 0xDC1FFB84, 0xDC20FB84, 0xDC21FB84, 0xDC22FB84, 0xDC23FB84, 0xDC24FB84, 0xDC25FB84, 0xDC26FB84, 0xDC27FB84, 0xDC28FB84, + 0xDC29FB84, 0xDC2AFB84, 0xDC2BFB84, 0xDC2CFB84, 0xDC2DFB84, 0xDC2EFB84, 0xDC2FFB84, 0xDC30FB84, 0xDC31FB84, 0xDC32FB84, 0xDC33FB84, 0xDC34FB84, 0xDC35FB84, 0xDC36FB84, 0xDC37FB84, + 0xDC38FB84, 0xDC39FB84, 0xDC3AFB84, 0xDC3BFB84, 0xDC3CFB84, 0xDC3DFB84, 0xDC3EFB84, 0xDC3FFB84, 0xDC40FB84, 0xDC41FB84, 0xDC42FB84, 0xDC43FB84, 0xDC44FB84, 0xDC45FB84, 0xDC46FB84, + 0xDC47FB84, 0xDC48FB84, 0xDC49FB84, 0xDC4AFB84, 0xDC4BFB84, 0xDC4CFB84, 0xDC4DFB84, 0xDC4EFB84, 0xDC4FFB84, 0xDC50FB84, 0xDC51FB84, 0xDC52FB84, 0xDC53FB84, 0xDC54FB84, 0xDC55FB84, + 0xDC56FB84, 0xDC57FB84, 0xDC58FB84, 0xDC59FB84, 0xDC5AFB84, 0xDC5BFB84, 0xDC5CFB84, 0xDC5DFB84, 0xDC5EFB84, 0xDC5FFB84, 0xDC60FB84, 0xDC61FB84, 0xDC62FB84, 0xDC63FB84, 0xDC64FB84, + 0xDC65FB84, 0xDC66FB84, 0xDC67FB84, 0xDC68FB84, 0xDC69FB84, 0xDC6AFB84, 0xDC6BFB84, 0xDC6CFB84, 0xDC6DFB84, 0xDC6EFB84, 0xDC6FFB84, 0xDC70FB84, 0xDC71FB84, 0xDC72FB84, 0xDC73FB84, + 0xDC74FB84, 0xDC75FB84, 0xDC76FB84, 0xDC77FB84, 0xDC78FB84, 0xDC79FB84, 0xDC7AFB84, 0xDC7BFB84, 0xDC7CFB84, 0xDC7DFB84, 0xDC7EFB84, 0xDC7FFB84, 0xDC80FB84, 0xDC81FB84, 0xDC82FB84, + 0xDC83FB84, 0xDC84FB84, 0xDC85FB84, 0xDC86FB84, 0xDC87FB84, 0xDC88FB84, 0xDC89FB84, 0xDC8AFB84, 0xDC8BFB84, 0xDC8CFB84, 0xDC8DFB84, 0xDC8EFB84, 0xDC8FFB84, 0xDC90FB84, 0xDC91FB84, + 0xDC92FB84, 0xDC93FB84, 0xDC94FB84, 0xDC95FB84, 0xDC96FB84, 0xDC97FB84, 0xDC98FB84, 0xDC99FB84, 0xDC9AFB84, 0xDC9BFB84, 0xDC9CFB84, 0xDC9DFB84, 0xDC9EFB84, 0xDC9FFB84, 0xDCA0FB84, + 0xDCA1FB84, 0xDCA2FB84, 0xDCA3FB84, 0xDCA4FB84, 0xDCA5FB84, 0xDCA6FB84, 0xDCA7FB84, 0xDCA8FB84, 0xDCA9FB84, 0xDCAAFB84, 0xDCABFB84, 0xDCACFB84, 0xDCADFB84, 0xDCAEFB84, 0xDCAFFB84, + 0xDCB0FB84, 0xDCB1FB84, 0xDCB2FB84, 0xDCB3FB84, 0xDCB4FB84, 0xDCB5FB84, 0xDCB6FB84, 0xDCB7FB84, 0xDCB8FB84, 0xDCB9FB84, 0xDCBAFB84, 0xDCBBFB84, 0xDCBCFB84, 0xDCBDFB84, 0xDCBEFB84, + 0xDCBFFB84, 0xDCC0FB84, 0xDCC1FB84, 0xDCC2FB84, 0xDCC3FB84, 0xDCC4FB84, 0xDCC5FB84, 0xDCC6FB84, 0xDCC7FB84, 0xDCC8FB84, 0xDCC9FB84, 0xDCCAFB84, 0xDCCBFB84, 0xDCCCFB84, 0xDCCDFB84, + 0xDCCEFB84, 0xDCCFFB84, 0xDCD0FB84, 0xDCD1FB84, 0xDCD2FB84, 0xDCD3FB84, 0xDCD4FB84, 0xDCD5FB84, 0xDCD6FB84, 0xDCD7FB84, 0xDCD8FB84, 0xDCD9FB84, 0xDCDAFB84, 0xDCDBFB84, 0xDCDCFB84, + 0xDCDDFB84, 0xDCDEFB84, 0xDCDFFB84, 0xDCE0FB84, 0xDCE1FB84, 0xDCE2FB84, 0xDCE3FB84, 0xDCE4FB84, 0xDCE5FB84, 0xDCE6FB84, 0xDCE7FB84, 0xDCE8FB84, 0xDCE9FB84, 0xDCEAFB84, 0xDCEBFB84, + 0xDCECFB84, 0xDCEDFB84, 0xDCEEFB84, 0xDCEFFB84, 0xDCF0FB84, 0xDCF1FB84, 0xDCF2FB84, 0xDCF3FB84, 0xDCF4FB84, 0xDCF5FB84, 0xDCF6FB84, 0xDCF7FB84, 0xDCF8FB84, 0xDCF9FB84, 0xDCFAFB84, + 0xDCFBFB84, 0xDCFCFB84, 0xDCFDFB84, 0xDCFEFB84, 0xDCFFFB84, 0xDD00FB84, 0xDD01FB84, 0xDD02FB84, 0xDD03FB84, 0xDD04FB84, 0xDD05FB84, 0xDD06FB84, 0xDD07FB84, 0xDD08FB84, 0xDD09FB84, + 0xDD0AFB84, 0xDD0BFB84, 0xDD0CFB84, 0xDD0DFB84, 0xDD0EFB84, 0xDD0FFB84, 0xDD10FB84, 0xDD11FB84, 0xDD12FB84, 0xDD13FB84, 0xDD14FB84, 0xDD15FB84, 0xDD16FB84, 0xDD17FB84, 0xDD18FB84, + 0xDD19FB84, 0xDD1AFB84, 0xDD1BFB84, 0xDD1CFB84, 0xDD1DFB84, 0xDD1EFB84, 0xDD1FFB84, 0xDD20FB84, 0xDD21FB84, 0xDD22FB84, 0xDD23FB84, 0xDD24FB84, 0xDD25FB84, 0xDD26FB84, 0xDD27FB84, + 0xDD28FB84, 0xDD29FB84, 0xDD2AFB84, 0xDD2BFB84, 0xDD2CFB84, 0xDD2DFB84, 0xDD2EFB84, 0xDD2FFB84, 0xDD30FB84, 0xDD31FB84, 0xDD32FB84, 0xDD33FB84, 0xDD34FB84, 0xDD35FB84, 0xDD36FB84, + 0xDD37FB84, 0xDD38FB84, 0xDD39FB84, 0xDD3AFB84, 0xDD3BFB84, 0xDD3CFB84, 0xDD3DFB84, 0xDD3EFB84, 0xDD3FFB84, 0xDD40FB84, 0xDD41FB84, 0xDD42FB84, 0xDD43FB84, 0xDD44FB84, 0xDD45FB84, + 0xDD46FB84, 0xDD47FB84, 0xDD48FB84, 0xDD49FB84, 0xDD4AFB84, 0xDD4BFB84, 0xDD4CFB84, 0xDD4DFB84, 0xDD4EFB84, 0xDD4FFB84, 0xDD50FB84, 0xDD51FB84, 0xDD52FB84, 0xDD53FB84, 0xDD54FB84, + 0xDD55FB84, 0xDD56FB84, 0xDD57FB84, 0xDD58FB84, 0xDD59FB84, 0xDD5AFB84, 0xDD5BFB84, 0xDD5CFB84, 0xDD5DFB84, 0xDD5EFB84, 0xDD5FFB84, 0xDD60FB84, 0xDD61FB84, 0xDD62FB84, 0xDD63FB84, + 0xDD64FB84, 0xDD65FB84, 0xDD66FB84, 0xDD67FB84, 0xDD68FB84, 0xDD69FB84, 0xDD6AFB84, 0xDD6BFB84, 0xDD6CFB84, 0xDD6DFB84, 0xDD6EFB84, 0xDD6FFB84, 0xDD70FB84, 0xDD71FB84, 0xDD72FB84, + 0xDD73FB84, 0xDD74FB84, 0xDD75FB84, 0xDD76FB84, 0xDD77FB84, 0xDD78FB84, 0xDD79FB84, 0xDD7AFB84, 0xDD7BFB84, 0xDD7CFB84, 0xDD7DFB84, 0xDD7EFB84, 0xDD7FFB84, 0xDD80FB84, 0xDD81FB84, + 0xDD82FB84, 0xDD83FB84, 0xDD84FB84, 0xDD85FB84, 0xDD86FB84, 0xDD87FB84, 0xDD88FB84, 0xDD89FB84, 0xDD8AFB84, 0xDD8BFB84, 0xDD8CFB84, 0xDD8DFB84, 0xDD8EFB84, 0xDD8FFB84, 0xDD90FB84, + 0xDD91FB84, 0xDD92FB84, 0xDD93FB84, 0xDD94FB84, 0xDD95FB84, 0xDD96FB84, 0xDD97FB84, 0xDD98FB84, 0xDD99FB84, 0xDD9AFB84, 0xDD9BFB84, 0xDD9CFB84, 0xDD9DFB84, 0xDD9EFB84, 0xDD9FFB84, + 0xDDA0FB84, 0xDDA1FB84, 0xDDA2FB84, 0xDDA3FB84, 0xDDA4FB84, 0xDDA5FB84, 0xDDA6FB84, 0xDDA7FB84, 0xDDA8FB84, 0xDDA9FB84, 0xDDAAFB84, 0xDDABFB84, 0xDDACFB84, 0xDDADFB84, 0xDDAEFB84, + 0xDDAFFB84, 0xDDB0FB84, 0xDDB1FB84, 0xDDB2FB84, 0xDDB3FB84, 0xDDB4FB84, 0xDDB5FB84, 0xDDB6FB84, 0xDDB7FB84, 0xDDB8FB84, 0xDDB9FB84, 0xDDBAFB84, 0xDDBBFB84, 0xDDBCFB84, 0xDDBDFB84, + 0xDDBEFB84, 0xDDBFFB84, 0xDDC0FB84, 0xDDC1FB84, 0xDDC2FB84, 0xDDC3FB84, 0xDDC4FB84, 0xDDC5FB84, 0xDDC6FB84, 0xDDC7FB84, 0xDDC8FB84, 0xDDC9FB84, 0xDDCAFB84, 0xDDCBFB84, 0xDDCCFB84, + 0xDDCDFB84, 0xDDCEFB84, 0xDDCFFB84, 0xDDD0FB84, 0xDDD1FB84, 0xDDD2FB84, 0xDDD3FB84, 0xDDD4FB84, 0xDDD5FB84, 0xDDD6FB84, 0xDDD7FB84, 0xDDD8FB84, 0xDDD9FB84, 0xDDDAFB84, 0xDDDBFB84, + 0xDDDCFB84, 0xDDDDFB84, 0xDDDEFB84, 0xDDDFFB84, 0xDDE0FB84, 0xDDE1FB84, 0xDDE2FB84, 0xDDE3FB84, 0xDDE4FB84, 0xDDE5FB84, 0xDDE6FB84, 0xDDE7FB84, 0xDDE8FB84, 0xDDE9FB84, 0xDDEAFB84, + 0xDDEBFB84, 0xDDECFB84, 0xDDEDFB84, 0xDDEEFB84, 0xDDEFFB84, 0xDDF0FB84, 0xDDF1FB84, 0xDDF2FB84, 0xDDF3FB84, 0xDDF4FB84, 0xDDF5FB84, 0xDDF6FB84, 0xDDF7FB84, 0xDDF8FB84, 0xDDF9FB84, + 0xDDFAFB84, 0xDDFBFB84, 0xDDFCFB84, 0xDDFDFB84, 0xDDFEFB84, 0xDDFFFB84, 0xDE00FB84, 0xDE01FB84, 0xDE02FB84, 0xDE03FB84, 0xDE04FB84, 0xDE05FB84, 0xDE06FB84, 0xDE07FB84, 0xDE08FB84, + 0xDE09FB84, 0xDE0AFB84, 0xDE0BFB84, 0xDE0CFB84, 0xDE0DFB84, 0xDE0EFB84, 0xDE0FFB84, 0xDE10FB84, 0xDE11FB84, 0xDE12FB84, 0xDE13FB84, 0xDE14FB84, 0xDE15FB84, 0xDE16FB84, 0xDE17FB84, + 0xDE18FB84, 0xDE19FB84, 0xDE1AFB84, 0xDE1BFB84, 0xDE1CFB84, 0xDE1DFB84, 0xDE1EFB84, 0xDE1FFB84, 0xDE20FB84, 0xDE21FB84, 0xDE22FB84, 0xDE23FB84, 0xDE24FB84, 0xDE25FB84, 0xDE26FB84, + 0xDE27FB84, 0xDE28FB84, 0xDE29FB84, 0xDE2AFB84, 0xDE2BFB84, 0xDE2CFB84, 0xDE2DFB84, 0xDE2EFB84, 0xDE2FFB84, 0xDE30FB84, 0xDE31FB84, 0xDE32FB84, 0xDE33FB84, 0xDE34FB84, 0xDE35FB84, + 0xDE36FB84, 0xDE37FB84, 0xDE38FB84, 0xDE39FB84, 0xDE3AFB84, 0xDE3BFB84, 0xDE3CFB84, 0xDE3DFB84, 0xDE3EFB84, 0xDE3FFB84, 0xDE40FB84, 0xDE41FB84, 0xDE42FB84, 0xDE43FB84, 0xDE44FB84, + 0xDE45FB84, 0xDE46FB84, 0xDE47FB84, 0xDE48FB84, 0xDE49FB84, 0xDE4AFB84, 0xDE4BFB84, 0xDE4CFB84, 0xDE4DFB84, 0xDE4EFB84, 0xDE4FFB84, 0xDE50FB84, 0xDE51FB84, 0xDE52FB84, 0xDE53FB84, + 0xDE54FB84, 0xDE55FB84, 0xDE56FB84, 0xDE57FB84, 0xDE58FB84, 0xDE59FB84, 0xDE5AFB84, 0xDE5BFB84, 0xDE5CFB84, 0xDE5DFB84, 0xDE5EFB84, 0xDE5FFB84, 0xDE60FB84, 0xDE61FB84, 0xDE62FB84, + 0xDE63FB84, 0xDE64FB84, 0xDE65FB84, 0xDE66FB84, 0xDE67FB84, 0xDE68FB84, 0xDE69FB84, 0xDE6AFB84, 0xDE6BFB84, 0xDE6CFB84, 0xDE6DFB84, 0xDE6EFB84, 0xDE6FFB84, 0xDE70FB84, 0xDE71FB84, + 0xDE72FB84, 0xDE73FB84, 0xDE74FB84, 0xDE75FB84, 0xDE76FB84, 0xDE77FB84, 0xDE78FB84, 0xDE79FB84, 0xDE7AFB84, 0xDE7BFB84, 0xDE7CFB84, 0xDE7DFB84, 0xDE7EFB84, 0xDE7FFB84, 0xDE80FB84, + 0xDE81FB84, 0xDE82FB84, 0xDE83FB84, 0xDE84FB84, 0xDE85FB84, 0xDE86FB84, 0xDE87FB84, 0xDE88FB84, 0xDE89FB84, 0xDE8AFB84, 0xDE8BFB84, 0xDE8CFB84, 0xDE8DFB84, 0xDE8EFB84, 0xDE8FFB84, + 0xDE90FB84, 0xDE91FB84, 0xDE92FB84, 0xDE93FB84, 0xDE94FB84, 0xDE95FB84, 0xDE96FB84, 0xDE97FB84, 0xDE98FB84, 0xDE99FB84, 0xDE9AFB84, 0xDE9BFB84, 0xDE9CFB84, 0xDE9DFB84, 0xDE9EFB84, + 0xDE9FFB84, 0xDEA0FB84, 0xDEA1FB84, 0xDEA2FB84, 0xDEA3FB84, 0xDEA4FB84, 0xDEA5FB84, 0xDEA6FB84, 0xDEA7FB84, 0xDEA8FB84, 0xDEA9FB84, 0xDEAAFB84, 0xDEABFB84, 0xDEACFB84, 0xDEADFB84, + 0xDEAEFB84, 0xDEAFFB84, 0xDEB0FB84, 0xDEB1FB84, 0xDEB2FB84, 0xDEB3FB84, 0xDEB4FB84, 0xDEB5FB84, 0xDEB6FB84, 0xDEB7FB84, 0xDEB8FB84, 0xDEB9FB84, 0xDEBAFB84, 0xDEBBFB84, 0xDEBCFB84, + 0xDEBDFB84, 0xDEBEFB84, 0xDEBFFB84, 0xDEC0FB84, 0xDEC1FB84, 0xDEC2FB84, 0xDEC3FB84, 0xDEC4FB84, 0xDEC5FB84, 0xDEC6FB84, 0xDEC7FB84, 0xDEC8FB84, 0xDEC9FB84, 0xDECAFB84, 0xDECBFB84, + 0xDECCFB84, 0xDECDFB84, 0xDECEFB84, 0xDECFFB84, 0xDED0FB84, 0xDED1FB84, 0xDED2FB84, 0xDED3FB84, 0xDED4FB84, 0xDED5FB84, 0xDED6FB84, 0xDED7FB84, 0xDED8FB84, 0xDED9FB84, 0xDEDAFB84, + 0xDEDBFB84, 0xDEDCFB84, 0xDEDDFB84, 0xDEDEFB84, 0xDEDFFB84, 0xDEE0FB84, 0xDEE1FB84, 0xDEE2FB84, 0xDEE3FB84, 0xDEE4FB84, 0xDEE5FB84, 0xDEE6FB84, 0xDEE7FB84, 0xDEE8FB84, 0xDEE9FB84, + 0xDEEAFB84, 0xDEEBFB84, 0xDEECFB84, 0xDEEDFB84, 0xDEEEFB84, 0xDEEFFB84, 0xDEF0FB84, 0xDEF1FB84, 0xDEF2FB84, 0xDEF3FB84, 0xDEF4FB84, 0xDEF5FB84, 0xDEF6FB84, 0xDEF7FB84, 0xDEF8FB84, + 0xDEF9FB84, 0xDEFAFB84, 0xDEFBFB84, 0xDEFCFB84, 0xDEFDFB84, 0xDEFEFB84, 0xDEFFFB84, 0xDF00FB84, 0xDF01FB84, 0xDF02FB84, 0xDF03FB84, 0xDF04FB84, 0xDF05FB84, 0xDF06FB84, 0xDF07FB84, + 0xDF08FB84, 0xDF09FB84, 0xDF0AFB84, 0xDF0BFB84, 0xDF0CFB84, 0xDF0DFB84, 0xDF0EFB84, 0xDF0FFB84, 0xDF10FB84, 0xDF11FB84, 0xDF12FB84, 0xDF13FB84, 0xDF14FB84, 0xDF15FB84, 0xDF16FB84, + 0xDF17FB84, 0xDF18FB84, 0xDF19FB84, 0xDF1AFB84, 0xDF1BFB84, 0xDF1CFB84, 0xDF1DFB84, 0xDF1EFB84, 0xDF1FFB84, 0xDF20FB84, 0xDF21FB84, 0xDF22FB84, 0xDF23FB84, 0xDF24FB84, 0xDF25FB84, + 0xDF26FB84, 0xDF27FB84, 0xDF28FB84, 0xDF29FB84, 0xDF2AFB84, 0xDF2BFB84, 0xDF2CFB84, 0xDF2DFB84, 0xDF2EFB84, 0xDF2FFB84, 0xDF30FB84, 0xDF31FB84, 0xDF32FB84, 0xDF33FB84, 0xDF34FB84, + 0xDF35FB84, 0xDF36FB84, 0xDF37FB84, 0xDF38FB84, 0xDF39FB84, 0xDF3AFB84, 0xDF3BFB84, 0xDF3CFB84, 0xDF3DFB84, 0xDF3EFB84, 0xDF3FFB84, 0xDF40FB84, 0xDF41FB84, 0xDF42FB84, 0xDF43FB84, + 0xDF44FB84, 0xDF45FB84, 0xDF46FB84, 0xDF47FB84, 0xDF48FB84, 0xDF49FB84, 0xDF4AFB84, 0xDF4BFB84, 0xDF4CFB84, 0xDF4DFB84, 0xDF4EFB84, 0xDF4FFB84, 0xDF50FB84, 0xDF51FB84, 0xDF52FB84, + 0xDF53FB84, 0xDF54FB84, 0xDF55FB84, 0xDF56FB84, 0xDF57FB84, 0xDF58FB84, 0xDF59FB84, 0xDF5AFB84, 0xDF5BFB84, 0xDF5CFB84, 0xDF5DFB84, 0xDF5EFB84, 0xDF5FFB84, 0xDF60FB84, 0xDF61FB84, + 0xDF62FB84, 0xDF63FB84, 0xDF64FB84, 0xDF65FB84, 0xDF66FB84, 0xDF67FB84, 0xDF68FB84, 0xDF69FB84, 0xDF6AFB84, 0xDF6BFB84, 0xDF6CFB84, 0xDF6DFB84, 0xDF6EFB84, 0xDF6FFB84, 0xDF70FB84, + 0xDF71FB84, 0xDF72FB84, 0xDF73FB84, 0xDF74FB84, 0xDF75FB84, 0xDF76FB84, 0xDF77FB84, 0xDF78FB84, 0xDF79FB84, 0xDF7AFB84, 0xDF7BFB84, 0xDF7CFB84, 0xDF7DFB84, 0xDF7EFB84, 0xDF7FFB84, + 0xDF80FB84, 0xDF81FB84, 0xDF82FB84, 0xDF83FB84, 0xDF84FB84, 0xDF85FB84, 0xDF86FB84, 0xDF87FB84, 0xDF88FB84, 0xDF89FB84, 0xDF8AFB84, 0xDF8BFB84, 0xDF8CFB84, 0xDF8DFB84, 0xDF8EFB84, + 0xDF8FFB84, 0xDF90FB84, 0xDF91FB84, 0xDF92FB84, 0xDF93FB84, 0xDF94FB84, 0xDF95FB84, 0xDF96FB84, 0xDF97FB84, 0xDF98FB84, 0xDF99FB84, 0xDF9AFB84, 0xDF9BFB84, 0xDF9CFB84, 0xDF9DFB84, + 0xDF9EFB84, 0xDF9FFB84, 0xDFA0FB84, 0xDFA1FB84, 0xDFA2FB84, 0xDFA3FB84, 0xDFA4FB84, 0xDFA5FB84, 0xDFA6FB84, 0xDFA7FB84, 0xDFA8FB84, 0xDFA9FB84, 0xDFAAFB84, 0xDFABFB84, 0xDFACFB84, + 0xDFADFB84, 0xDFAEFB84, 0xDFAFFB84, 0xDFB0FB84, 0xDFB1FB84, 0xDFB2FB84, 0xDFB3FB84, 0xDFB4FB84, 0xDFB5FB84, 0xDFB6FB84, 0xDFB7FB84, 0xDFB8FB84, 0xDFB9FB84, 0xDFBAFB84, 0xDFBBFB84, + 0xDFBCFB84, 0xDFBDFB84, 0xDFBEFB84, 0xDFBFFB84, 0xDFC0FB84, 0xDFC1FB84, 0xDFC2FB84, 0xDFC3FB84, 0xDFC4FB84, 0xDFC5FB84, 0xDFC6FB84, 0xDFC7FB84, 0xDFC8FB84, 0xDFC9FB84, 0xDFCAFB84, + 0xDFCBFB84, 0xDFCCFB84, 0xDFCDFB84, 0xDFCEFB84, 0xDFCFFB84, 0xDFD0FB84, 0xDFD1FB84, 0xDFD2FB84, 0xDFD3FB84, 0xDFD4FB84, 0xDFD5FB84, 0xDFD6FB84, 0xDFD7FB84, 0xDFD8FB84, 0xDFD9FB84, + 0xDFDAFB84, 0xDFDBFB84, 0xDFDCFB84, 0xDFDDFB84, 0xDFDEFB84, 0xDFDFFB84, 0xDFE0FB84, 0xDFE1FB84, 0xDFE2FB84, 0xDFE3FB84, 0xDFE4FB84, 0xDFE5FB84, 0xDFE6FB84, 0xDFE7FB84, 0xDFE8FB84, + 0xDFE9FB84, 0xDFEAFB84, 0xDFEBFB84, 0xDFECFB84, 0xDFEDFB84, 0xDFEEFB84, 0xDFEFFB84, 0xDFF0FB84, 0xDFF1FB84, 0xDFF2FB84, 0xDFF3FB84, 0xDFF4FB84, 0xDFF5FB84, 0xDFF6FB84, 0xDFF7FB84, + 0xDFF8FB84, 0xDFF9FB84, 0xDFFAFB84, 0xDFFBFB84, 0xDFFCFB84, 0xDFFDFB84, 0xDFFEFB84, 0xDFFFFB84, 0xE000FB84, 0xE001FB84, 0xE002FB84, 0xE003FB84, 0xE004FB84, 0xE005FB84, 0xE006FB84, + 0xE007FB84, 0xE008FB84, 0xE009FB84, 0xE00AFB84, 0xE00BFB84, 0xE00CFB84, 0xE00DFB84, 0xE00EFB84, 0xE00FFB84, 0xE010FB84, 0xE011FB84, 0xE012FB84, 0xE013FB84, 0xE014FB84, 0xE015FB84, + 0xE016FB84, 0xE017FB84, 0xE018FB84, 0xE019FB84, 0xE01AFB84, 0xE01BFB84, 0xE01CFB84, 0xE01DFB84, 0xE01EFB84, 0xE01FFB84, 0xE020FB84, 0xE021FB84, 0xE022FB84, 0xE023FB84, 0xE024FB84, + 0xE025FB84, 0xE026FB84, 0xE027FB84, 0xE028FB84, 0xE029FB84, 0xE02AFB84, 0xE02BFB84, 0xE02CFB84, 0xE02DFB84, 0xE02EFB84, 0xE02FFB84, 0xE030FB84, 0xE031FB84, 0xE032FB84, 0xE033FB84, + 0xE034FB84, 0xE035FB84, 0xE036FB84, 0xE037FB84, 0xE038FB84, 0xE039FB84, 0xE03AFB84, 0xE03BFB84, 0xE03CFB84, 0xE03DFB84, 0xE03EFB84, 0xE03FFB84, 0xE040FB84, 0xE041FB84, 0xE042FB84, + 0xE043FB84, 0xE044FB84, 0xE045FB84, 0xE046FB84, 0xE047FB84, 0xE048FB84, 0xE049FB84, 0xE04AFB84, 0xE04BFB84, 0xE04CFB84, 0xE04DFB84, 0xE04EFB84, 0xE04FFB84, 0xE050FB84, 0xE051FB84, + 0xE052FB84, 0xE053FB84, 0xE054FB84, 0xE055FB84, 0xE056FB84, 0xE057FB84, 0xE058FB84, 0xE059FB84, 0xE05AFB84, 0xE05BFB84, 0xE05CFB84, 0xE05DFB84, 0xE05EFB84, 0xE05FFB84, 0xE060FB84, + 0xE061FB84, 0xE062FB84, 0xE063FB84, 0xE064FB84, 0xE065FB84, 0xE066FB84, 0xE067FB84, 0xE068FB84, 0xE069FB84, 0xE06AFB84, 0xE06BFB84, 0xE06CFB84, 0xE06DFB84, 0xE06EFB84, 0xE06FFB84, + 0xE070FB84, 0xE071FB84, 0xE072FB84, 0xE073FB84, 0xE074FB84, 0xE075FB84, 0xE076FB84, 0xE077FB84, 0xE078FB84, 0xE079FB84, 0xE07AFB84, 0xE07BFB84, 0xE07CFB84, 0xE07DFB84, 0xE07EFB84, + 0xE07FFB84, 0xE080FB84, 0xE081FB84, 0xE082FB84, 0xE083FB84, 0xE084FB84, 0xE085FB84, 0xE086FB84, 0xE087FB84, 0xE088FB84, 0xE089FB84, 0xE08AFB84, 0xE08BFB84, 0xE08CFB84, 0xE08DFB84, + 0xE08EFB84, 0xE08FFB84, 0xE090FB84, 0xE091FB84, 0xE092FB84, 0xE093FB84, 0xE094FB84, 0xE095FB84, 0xE096FB84, 0xE097FB84, 0xE098FB84, 0xE099FB84, 0xE09AFB84, 0xE09BFB84, 0xE09CFB84, + 0xE09DFB84, 0xE09EFB84, 0xE09FFB84, 0xE0A0FB84, 0xE0A1FB84, 0xE0A2FB84, 0xE0A3FB84, 0xE0A4FB84, 0xE0A5FB84, 0xE0A6FB84, 0xE0A7FB84, 0xE0A8FB84, 0xE0A9FB84, 0xE0AAFB84, 0xE0ABFB84, + 0xE0ACFB84, 0xE0ADFB84, 0xE0AEFB84, 0xE0AFFB84, 0xE0B0FB84, 0xE0B1FB84, 0xE0B2FB84, 0xE0B3FB84, 0xE0B4FB84, 0xE0B5FB84, 0xE0B6FB84, 0xE0B7FB84, 0xE0B8FB84, 0xE0B9FB84, 0xE0BAFB84, + 0xE0BBFB84, 0xE0BCFB84, 0xE0BDFB84, 0xE0BEFB84, 0xE0BFFB84, 0xE0C0FB84, 0xE0C1FB84, 0xE0C2FB84, 0xE0C3FB84, 0xE0C4FB84, 0xE0C5FB84, 0xE0C6FB84, 0xE0C7FB84, 0xE0C8FB84, 0xE0C9FB84, + 0xE0CAFB84, 0xE0CBFB84, 0xE0CCFB84, 0xE0CDFB84, 0xE0CEFB84, 0xE0CFFB84, 0xE0D0FB84, 0xE0D1FB84, 0xE0D2FB84, 0xE0D3FB84, 0xE0D4FB84, 0xE0D5FB84, 0xE0D6FB84, 0xE0D7FB84, 0xE0D8FB84, + 0xE0D9FB84, 0xE0DAFB84, 0xE0DBFB84, 0xE0DCFB84, 0xE0DDFB84, 0xE0DEFB84, 0xE0DFFB84, 0xE0E0FB84, 0xE0E1FB84, 0xE0E2FB84, 0xE0E3FB84, 0xE0E4FB84, 0xE0E5FB84, 0xE0E6FB84, 0xE0E7FB84, + 0xE0E8FB84, 0xE0E9FB84, 0xE0EAFB84, 0xE0EBFB84, 0xE0ECFB84, 0xE0EDFB84, 0xE0EEFB84, 0xE0EFFB84, 0xE0F0FB84, 0xE0F1FB84, 0xE0F2FB84, 0xE0F3FB84, 0xE0F4FB84, 0xE0F5FB84, 0xE0F6FB84, + 0xE0F7FB84, 0xE0F8FB84, 0xE0F9FB84, 0xE0FAFB84, 0xE0FBFB84, 0xE0FCFB84, 0xE0FDFB84, 0xE0FEFB84, 0xE0FFFB84, 0xE100FB84, 0xE101FB84, 0xE102FB84, 0xE103FB84, 0xE104FB84, 0xE105FB84, + 0xE106FB84, 0xE107FB84, 0xE108FB84, 0xE109FB84, 0xE10AFB84, 0xE10BFB84, 0xE10CFB84, 0xE10DFB84, 0xE10EFB84, 0xE10FFB84, 0xE110FB84, 0xE111FB84, 0xE112FB84, 0xE113FB84, 0xE114FB84, + 0xE115FB84, 0xE116FB84, 0xE117FB84, 0xE118FB84, 0xE119FB84, 0xE11AFB84, 0xE11BFB84, 0xE11CFB84, 0xE11DFB84, 0xE11EFB84, 0xE11FFB84, 0xE120FB84, 0xE121FB84, 0xE122FB84, 0xE123FB84, + 0xE124FB84, 0xE125FB84, 0xE126FB84, 0xE127FB84, 0xE128FB84, 0xE129FB84, 0xE12AFB84, 0xE12BFB84, 0xE12CFB84, 0xE12DFB84, 0xE12EFB84, 0xE12FFB84, 0xE130FB84, 0xE131FB84, 0xE132FB84, + 0xE133FB84, 0xE134FB84, 0xE135FB84, 0xE136FB84, 0xE137FB84, 0xE138FB84, 0xE139FB84, 0xE13AFB84, 0xE13BFB84, 0xE13CFB84, 0xE13DFB84, 0xE13EFB84, 0xE13FFB84, 0xE140FB84, 0xE141FB84, + 0xE142FB84, 0xE143FB84, 0xE144FB84, 0xE145FB84, 0xE146FB84, 0xE147FB84, 0xE148FB84, 0xE149FB84, 0xE14AFB84, 0xE14BFB84, 0xE14CFB84, 0xE14DFB84, 0xE14EFB84, 0xE14FFB84, 0xE150FB84, + 0xE151FB84, 0xE152FB84, 0xE153FB84, 0xE154FB84, 0xE155FB84, 0xE156FB84, 0xE157FB84, 0xE158FB84, 0xE159FB84, 0xE15AFB84, 0xE15BFB84, 0xE15CFB84, 0xE15DFB84, 0xE15EFB84, 0xE15FFB84, + 0xE160FB84, 0xE161FB84, 0xE162FB84, 0xE163FB84, 0xE164FB84, 0xE165FB84, 0xE166FB84, 0xE167FB84, 0xE168FB84, 0xE169FB84, 0xE16AFB84, 0xE16BFB84, 0xE16CFB84, 0xE16DFB84, 0xE16EFB84, + 0xE16FFB84, 0xE170FB84, 0xE171FB84, 0xE172FB84, 0xE173FB84, 0xE174FB84, 0xE175FB84, 0xE176FB84, 0xE177FB84, 0xE178FB84, 0xE179FB84, 0xE17AFB84, 0xE17BFB84, 0xE17CFB84, 0xE17DFB84, + 0xE17EFB84, 0xE17FFB84, 0xE180FB84, 0xE181FB84, 0xE182FB84, 0xE183FB84, 0xE184FB84, 0xE185FB84, 0xE186FB84, 0xE187FB84, 0xE188FB84, 0xE189FB84, 0xE18AFB84, 0xE18BFB84, 0xE18CFB84, + 0xE18DFB84, 0xE18EFB84, 0xE18FFB84, 0xE190FB84, 0xE191FB84, 0xE192FB84, 0xE193FB84, 0xE194FB84, 0xE195FB84, 0xE196FB84, 0xE197FB84, 0xE198FB84, 0xE199FB84, 0xE19AFB84, 0xE19BFB84, + 0xE19CFB84, 0xE19DFB84, 0xE19EFB84, 0xE19FFB84, 0xE1A0FB84, 0xE1A1FB84, 0xE1A2FB84, 0xE1A3FB84, 0xE1A4FB84, 0xE1A5FB84, 0xE1A6FB84, 0xE1A7FB84, 0xE1A8FB84, 0xE1A9FB84, 0xE1AAFB84, + 0xE1ABFB84, 0xE1ACFB84, 0xE1ADFB84, 0xE1AEFB84, 0xE1AFFB84, 0xE1B0FB84, 0xE1B1FB84, 0xE1B2FB84, 0xE1B3FB84, 0xE1B4FB84, 0xE1B5FB84, 0xE1B6FB84, 0xE1B7FB84, 0xE1B8FB84, 0xE1B9FB84, + 0xE1BAFB84, 0xE1BBFB84, 0xE1BCFB84, 0xE1BDFB84, 0xE1BEFB84, 0xE1BFFB84, 0xE1C0FB84, 0xE1C1FB84, 0xE1C2FB84, 0xE1C3FB84, 0xE1C4FB84, 0xE1C5FB84, 0xE1C6FB84, 0xE1C7FB84, 0xE1C8FB84, + 0xE1C9FB84, 0xE1CAFB84, 0xE1CBFB84, 0xE1CCFB84, 0xE1CDFB84, 0xE1CEFB84, 0xE1CFFB84, 0xE1D0FB84, 0xE1D1FB84, 0xE1D2FB84, 0xE1D3FB84, 0xE1D4FB84, 0xE1D5FB84, 0xE1D6FB84, 0xE1D7FB84, + 0xE1D8FB84, 0xE1D9FB84, 0xE1DAFB84, 0xE1DBFB84, 0xE1DCFB84, 0xE1DDFB84, 0xE1DEFB84, 0xE1DFFB84, 0xE1E0FB84, 0xE1E1FB84, 0xE1E2FB84, 0xE1E3FB84, 0xE1E4FB84, 0xE1E5FB84, 0xE1E6FB84, + 0xE1E7FB84, 0xE1E8FB84, 0xE1E9FB84, 0xE1EAFB84, 0xE1EBFB84, 0xE1ECFB84, 0xE1EDFB84, 0xE1EEFB84, 0xE1EFFB84, 0xE1F0FB84, 0xE1F1FB84, 0xE1F2FB84, 0xE1F3FB84, 0xE1F4FB84, 0xE1F5FB84, + 0xE1F6FB84, 0xE1F7FB84, 0xE1F8FB84, 0xE1F9FB84, 0xE1FAFB84, 0xE1FBFB84, 0xE1FCFB84, 0xE1FDFB84, 0xE1FEFB84, 0xE1FFFB84, 0xE200FB84, 0xE201FB84, 0xE202FB84, 0xE203FB84, 0xE204FB84, + 0xE205FB84, 0xE206FB84, 0xE207FB84, 0xE208FB84, 0xE209FB84, 0xE20AFB84, 0xE20BFB84, 0xE20CFB84, 0xE20DFB84, 0xE20EFB84, 0xE20FFB84, 0xE210FB84, 0xE211FB84, 0xE212FB84, 0xE213FB84, + 0xE214FB84, 0xE215FB84, 0xE216FB84, 0xE217FB84, 0xE218FB84, 0xE219FB84, 0xE21AFB84, 0xE21BFB84, 0xE21CFB84, 0xE21DFB84, 0xE21EFB84, 0xE21FFB84, 0xE220FB84, 0xE221FB84, 0xE222FB84, + 0xE223FB84, 0xE224FB84, 0xE225FB84, 0xE226FB84, 0xE227FB84, 0xE228FB84, 0xE229FB84, 0xE22AFB84, 0xE22BFB84, 0xE22CFB84, 0xE22DFB84, 0xE22EFB84, 0xE22FFB84, 0xE230FB84, 0xE231FB84, + 0xE232FB84, 0xE233FB84, 0xE234FB84, 0xE235FB84, 0xE236FB84, 0xE237FB84, 0xE238FB84, 0xE239FB84, 0xE23AFB84, 0xE23BFB84, 0xE23CFB84, 0xE23DFB84, 0xE23EFB84, 0xE23FFB84, 0xE240FB84, + 0xE241FB84, 0xE242FB84, 0xE243FB84, 0xE244FB84, 0xE245FB84, 0xE246FB84, 0xE247FB84, 0xE248FB84, 0xE249FB84, 0xE24AFB84, 0xE24BFB84, 0xE24CFB84, 0xE24DFB84, 0xE24EFB84, 0xE24FFB84, + 0xE250FB84, 0xE251FB84, 0xE252FB84, 0xE253FB84, 0xE254FB84, 0xE255FB84, 0xE256FB84, 0xE257FB84, 0xE258FB84, 0xE259FB84, 0xE25AFB84, 0xE25BFB84, 0xE25CFB84, 0xE25DFB84, 0xE25EFB84, + 0xE25FFB84, 0xE260FB84, 0xE261FB84, 0xE262FB84, 0xE263FB84, 0xE264FB84, 0xE265FB84, 0xE266FB84, 0xE267FB84, 0xE268FB84, 0xE269FB84, 0xE26AFB84, 0xE26BFB84, 0xE26CFB84, 0xE26DFB84, + 0xE26EFB84, 0xE26FFB84, 0xE270FB84, 0xE271FB84, 0xE272FB84, 0xE273FB84, 0xE274FB84, 0xE275FB84, 0xE276FB84, 0xE277FB84, 0xE278FB84, 0xE279FB84, 0xE27AFB84, 0xE27BFB84, 0xE27CFB84, + 0xE27DFB84, 0xE27EFB84, 0xE27FFB84, 0xE280FB84, 0xE281FB84, 0xE282FB84, 0xE283FB84, 0xE284FB84, 0xE285FB84, 0xE286FB84, 0xE287FB84, 0xE288FB84, 0xE289FB84, 0xE28AFB84, 0xE28BFB84, + 0xE28CFB84, 0xE28DFB84, 0xE28EFB84, 0xE28FFB84, 0xE290FB84, 0xE291FB84, 0xE292FB84, 0xE293FB84, 0xE294FB84, 0xE295FB84, 0xE296FB84, 0xE297FB84, 0xE298FB84, 0xE299FB84, 0xE29AFB84, + 0xE29BFB84, 0xE29CFB84, 0xE29DFB84, 0xE29EFB84, 0xE29FFB84, 0xE2A0FB84, 0xE2A1FB84, 0xE2A2FB84, 0xE2A3FB84, 0xE2A4FB84, 0xE2A5FB84, 0xE2A6FB84, 0xE2A7FB84, 0xE2A8FB84, 0xE2A9FB84, + 0xE2AAFB84, 0xE2ABFB84, 0xE2ACFB84, 0xE2ADFB84, 0xE2AEFB84, 0xE2AFFB84, 0xE2B0FB84, 0xE2B1FB84, 0xE2B2FB84, 0xE2B3FB84, 0xE2B4FB84, 0xE2B5FB84, 0xE2B6FB84, 0xE2B7FB84, 0xE2B8FB84, + 0xE2B9FB84, 0xE2BAFB84, 0xE2BBFB84, 0xE2BCFB84, 0xE2BDFB84, 0xE2BEFB84, 0xE2BFFB84, 0xE2C0FB84, 0xE2C1FB84, 0xE2C2FB84, 0xE2C3FB84, 0xE2C4FB84, 0xE2C5FB84, 0xE2C6FB84, 0xE2C7FB84, + 0xE2C8FB84, 0xE2C9FB84, 0xE2CAFB84, 0xE2CBFB84, 0xE2CCFB84, 0xE2CDFB84, 0xE2CEFB84, 0xE2CFFB84, 0xE2D0FB84, 0xE2D1FB84, 0xE2D2FB84, 0xE2D3FB84, 0xE2D4FB84, 0xE2D5FB84, 0xE2D6FB84, + 0xE2D7FB84, 0xE2D8FB84, 0xE2D9FB84, 0xE2DAFB84, 0xE2DBFB84, 0xE2DCFB84, 0xE2DDFB84, 0xE2DEFB84, 0xE2DFFB84, 0xE2E0FB84, 0xE2E1FB84, 0xE2E2FB84, 0xE2E3FB84, 0xE2E4FB84, 0xE2E5FB84, + 0xE2E6FB84, 0xE2E7FB84, 0xE2E8FB84, 0xE2E9FB84, 0xE2EAFB84, 0xE2EBFB84, 0xE2ECFB84, 0xE2EDFB84, 0xE2EEFB84, 0xE2EFFB84, 0xE2F0FB84, 0xE2F1FB84, 0xE2F2FB84, 0xE2F3FB84, 0xE2F4FB84, + 0xE2F5FB84, 0xE2F6FB84, 0xE2F7FB84, 0xE2F8FB84, 0xE2F9FB84, 0xE2FAFB84, 0xE2FBFB84, 0xE2FCFB84, 0xE2FDFB84, 0xE2FEFB84, 0xE2FFFB84, 0xE300FB84, 0xE301FB84, 0xE302FB84, 0xE303FB84, + 0xE304FB84, 0xE305FB84, 0xE306FB84, 0xE307FB84, 0xE308FB84, 0xE309FB84, 0xE30AFB84, 0xE30BFB84, 0xE30CFB84, 0xE30DFB84, 0xE30EFB84, 0xE30FFB84, 0xE310FB84, 0xE311FB84, 0xE312FB84, + 0xE313FB84, 0xE314FB84, 0xE315FB84, 0xE316FB84, 0xE317FB84, 0xE318FB84, 0xE319FB84, 0xE31AFB84, 0xE31BFB84, 0xE31CFB84, 0xE31DFB84, 0xE31EFB84, 0xE31FFB84, 0xE320FB84, 0xE321FB84, + 0xE322FB84, 0xE323FB84, 0xE324FB84, 0xE325FB84, 0xE326FB84, 0xE327FB84, 0xE328FB84, 0xE329FB84, 0xE32AFB84, 0xE32BFB84, 0xE32CFB84, 0xE32DFB84, 0xE32EFB84, 0xE32FFB84, 0xE330FB84, + 0xE331FB84, 0xE332FB84, 0xE333FB84, 0xE334FB84, 0xE335FB84, 0xE336FB84, 0xE337FB84, 0xE338FB84, 0xE339FB84, 0xE33AFB84, 0xE33BFB84, 0xE33CFB84, 0xE33DFB84, 0xE33EFB84, 0xE33FFB84, + 0xE340FB84, 0xE341FB84, 0xE342FB84, 0xE343FB84, 0xE344FB84, 0xE345FB84, 0xE346FB84, 0xE347FB84, 0xE348FB84, 0xE349FB84, 0xE34AFB84, 0xE34BFB84, 0xE34CFB84, 0xE34DFB84, 0xE34EFB84, + 0xE34FFB84, 0xE350FB84, 0xE351FB84, 0xE352FB84, 0xE353FB84, 0xE354FB84, 0xE355FB84, 0xE356FB84, 0xE357FB84, 0xE358FB84, 0xE359FB84, 0xE35AFB84, 0xE35BFB84, 0xE35CFB84, 0xE35DFB84, + 0xE35EFB84, 0xE35FFB84, 0xE360FB84, 0xE361FB84, 0xE362FB84, 0xE363FB84, 0xE364FB84, 0xE365FB84, 0xE366FB84, 0xE367FB84, 0xE368FB84, 0xE369FB84, 0xE36AFB84, 0xE36BFB84, 0xE36CFB84, + 0xE36DFB84, 0xE36EFB84, 0xE36FFB84, 0xE370FB84, 0xE371FB84, 0xE372FB84, 0xE373FB84, 0xE374FB84, 0xE375FB84, 0xE376FB84, 0xE377FB84, 0xE378FB84, 0xE379FB84, 0xE37AFB84, 0xE37BFB84, + 0xE37CFB84, 0xE37DFB84, 0xE37EFB84, 0xE37FFB84, 0xE380FB84, 0xE381FB84, 0xE382FB84, 0xE383FB84, 0xE384FB84, 0xE385FB84, 0xE386FB84, 0xE387FB84, 0xE388FB84, 0xE389FB84, 0xE38AFB84, + 0xE38BFB84, 0xE38CFB84, 0xE38DFB84, 0xE38EFB84, 0xE38FFB84, 0xE390FB84, 0xE391FB84, 0xE392FB84, 0xE393FB84, 0xE394FB84, 0xE395FB84, 0xE396FB84, 0xE397FB84, 0xE398FB84, 0xE399FB84, + 0xE39AFB84, 0xE39BFB84, 0xE39CFB84, 0xE39DFB84, 0xE39EFB84, 0xE39FFB84, 0xE3A0FB84, 0xE3A1FB84, 0xE3A2FB84, 0xE3A3FB84, 0xE3A4FB84, 0xE3A5FB84, 0xE3A6FB84, 0xE3A7FB84, 0xE3A8FB84, + 0xE3A9FB84, 0xE3AAFB84, 0xE3ABFB84, 0xE3ACFB84, 0xE3ADFB84, 0xE3AEFB84, 0xE3AFFB84, 0xE3B0FB84, 0xE3B1FB84, 0xE3B2FB84, 0xE3B3FB84, 0xE3B4FB84, 0xE3B5FB84, 0xE3B6FB84, 0xE3B7FB84, + 0xE3B8FB84, 0xE3B9FB84, 0xE3BAFB84, 0xE3BBFB84, 0xE3BCFB84, 0xE3BDFB84, 0xE3BEFB84, 0xE3BFFB84, 0xE3C0FB84, 0xE3C1FB84, 0xE3C2FB84, 0xE3C3FB84, 0xE3C4FB84, 0xE3C5FB84, 0xE3C6FB84, + 0xE3C7FB84, 0xE3C8FB84, 0xE3C9FB84, 0xE3CAFB84, 0xE3CBFB84, 0xE3CCFB84, 0xE3CDFB84, 0xE3CEFB84, 0xE3CFFB84, 0xE3D0FB84, 0xE3D1FB84, 0xE3D2FB84, 0xE3D3FB84, 0xE3D4FB84, 0xE3D5FB84, + 0xE3D6FB84, 0xE3D7FB84, 0xE3D8FB84, 0xE3D9FB84, 0xE3DAFB84, 0xE3DBFB84, 0xE3DCFB84, 0xE3DDFB84, 0xE3DEFB84, 0xE3DFFB84, 0xE3E0FB84, 0xE3E1FB84, 0xE3E2FB84, 0xE3E3FB84, 0xE3E4FB84, + 0xE3E5FB84, 0xE3E6FB84, 0xE3E7FB84, 0xE3E8FB84, 0xE3E9FB84, 0xE3EAFB84, 0xE3EBFB84, 0xE3ECFB84, 0xE3EDFB84, 0xE3EEFB84, 0xE3EFFB84, 0xE3F0FB84, 0xE3F1FB84, 0xE3F2FB84, 0xE3F3FB84, + 0xE3F4FB84, 0xE3F5FB84, 0xE3F6FB84, 0xE3F7FB84, 0xE3F8FB84, 0xE3F9FB84, 0xE3FAFB84, 0xE3FBFB84, 0xE3FCFB84, 0xE3FDFB84, 0xE3FEFB84, 0xE3FFFB84, 0xE400FB84, 0xE401FB84, 0xE402FB84, + 0xE403FB84, 0xE404FB84, 0xE405FB84, 0xE406FB84, 0xE407FB84, 0xE408FB84, 0xE409FB84, 0xE40AFB84, 0xE40BFB84, 0xE40CFB84, 0xE40DFB84, 0xE40EFB84, 0xE40FFB84, 0xE410FB84, 0xE411FB84, + 0xE412FB84, 0xE413FB84, 0xE414FB84, 0xE415FB84, 0xE416FB84, 0xE417FB84, 0xE418FB84, 0xE419FB84, 0xE41AFB84, 0xE41BFB84, 0xE41CFB84, 0xE41DFB84, 0xE41EFB84, 0xE41FFB84, 0xE420FB84, + 0xE421FB84, 0xE422FB84, 0xE423FB84, 0xE424FB84, 0xE425FB84, 0xE426FB84, 0xE427FB84, 0xE428FB84, 0xE429FB84, 0xE42AFB84, 0xE42BFB84, 0xE42CFB84, 0xE42DFB84, 0xE42EFB84, 0xE42FFB84, + 0xE430FB84, 0xE431FB84, 0xE432FB84, 0xE433FB84, 0xE434FB84, 0xE435FB84, 0xE436FB84, 0xE437FB84, 0xE438FB84, 0xE439FB84, 0xE43AFB84, 0xE43BFB84, 0xE43CFB84, 0xE43DFB84, 0xE43EFB84, + 0xE43FFB84, 0xE440FB84, 0xE441FB84, 0xE442FB84, 0xE443FB84, 0xE444FB84, 0xE445FB84, 0xE446FB84, 0xE447FB84, 0xE448FB84, 0xE449FB84, 0xE44AFB84, 0xE44BFB84, 0xE44CFB84, 0xE44DFB84, + 0xE44EFB84, 0xE44FFB84, 0xE450FB84, 0xE451FB84, 0xE452FB84, 0xE453FB84, 0xE454FB84, 0xE455FB84, 0xE456FB84, 0xE457FB84, 0xE458FB84, 0xE459FB84, 0xE45AFB84, 0xE45BFB84, 0xE45CFB84, + 0xE45DFB84, 0xE45EFB84, 0xE45FFB84, 0xE460FB84, 0xE461FB84, 0xE462FB84, 0xE463FB84, 0xE464FB84, 0xE465FB84, 0xE466FB84, 0xE467FB84, 0xE468FB84, 0xE469FB84, 0xE46AFB84, 0xE46BFB84, + 0xE46CFB84, 0xE46DFB84, 0xE46EFB84, 0xE46FFB84, 0xE470FB84, 0xE471FB84, 0xE472FB84, 0xE473FB84, 0xE474FB84, 0xE475FB84, 0xE476FB84, 0xE477FB84, 0xE478FB84, 0xE479FB84, 0xE47AFB84, + 0xE47BFB84, 0xE47CFB84, 0xE47DFB84, 0xE47EFB84, 0xE47FFB84, 0xE480FB84, 0xE481FB84, 0xE482FB84, 0xE483FB84, 0xE484FB84, 0xE485FB84, 0xE486FB84, 0xE487FB84, 0xE488FB84, 0xE489FB84, + 0xE48AFB84, 0xE48BFB84, 0xE48CFB84, 0xE48DFB84, 0xE48EFB84, 0xE48FFB84, 0xE490FB84, 0xE491FB84, 0xE492FB84, 0xE493FB84, 0xE494FB84, 0xE495FB84, 0xE496FB84, 0xE497FB84, 0xE498FB84, + 0xE499FB84, 0xE49AFB84, 0xE49BFB84, 0xE49CFB84, 0xE49DFB84, 0xE49EFB84, 0xE49FFB84, 0xE4A0FB84, 0xE4A1FB84, 0xE4A2FB84, 0xE4A3FB84, 0xE4A4FB84, 0xE4A5FB84, 0xE4A6FB84, 0xE4A7FB84, + 0xE4A8FB84, 0xE4A9FB84, 0xE4AAFB84, 0xE4ABFB84, 0xE4ACFB84, 0xE4ADFB84, 0xE4AEFB84, 0xE4AFFB84, 0xE4B0FB84, 0xE4B1FB84, 0xE4B2FB84, 0xE4B3FB84, 0xE4B4FB84, 0xE4B5FB84, 0xE4B6FB84, + 0xE4B7FB84, 0xE4B8FB84, 0xE4B9FB84, 0xE4BAFB84, 0xE4BBFB84, 0xE4BCFB84, 0xE4BDFB84, 0xE4BEFB84, 0xE4BFFB84, 0xE4C0FB84, 0xE4C1FB84, 0xE4C2FB84, 0xE4C3FB84, 0xE4C4FB84, 0xE4C5FB84, + 0xE4C6FB84, 0xE4C7FB84, 0xE4C8FB84, 0xE4C9FB84, 0xE4CAFB84, 0xE4CBFB84, 0xE4CCFB84, 0xE4CDFB84, 0xE4CEFB84, 0xE4CFFB84, 0xE4D0FB84, 0xE4D1FB84, 0xE4D2FB84, 0xE4D3FB84, 0xE4D4FB84, + 0xE4D5FB84, 0xE4D6FB84, 0xE4D7FB84, 0xE4D8FB84, 0xE4D9FB84, 0xE4DAFB84, 0xE4DBFB84, 0xE4DCFB84, 0xE4DDFB84, 0xE4DEFB84, 0xE4DFFB84, 0xE4E0FB84, 0xE4E1FB84, 0xE4E2FB84, 0xE4E3FB84, + 0xE4E4FB84, 0xE4E5FB84, 0xE4E6FB84, 0xE4E7FB84, 0xE4E8FB84, 0xE4E9FB84, 0xE4EAFB84, 0xE4EBFB84, 0xE4ECFB84, 0xE4EDFB84, 0xE4EEFB84, 0xE4EFFB84, 0xE4F0FB84, 0xE4F1FB84, 0xE4F2FB84, + 0xE4F3FB84, 0xE4F4FB84, 0xE4F5FB84, 0xE4F6FB84, 0xE4F7FB84, 0xE4F8FB84, 0xE4F9FB84, 0xE4FAFB84, 0xE4FBFB84, 0xE4FCFB84, 0xE4FDFB84, 0xE4FEFB84, 0xE4FFFB84, 0xE500FB84, 0xE501FB84, + 0xE502FB84, 0xE503FB84, 0xE504FB84, 0xE505FB84, 0xE506FB84, 0xE507FB84, 0xE508FB84, 0xE509FB84, 0xE50AFB84, 0xE50BFB84, 0xE50CFB84, 0xE50DFB84, 0xE50EFB84, 0xE50FFB84, 0xE510FB84, + 0xE511FB84, 0xE512FB84, 0xE513FB84, 0xE514FB84, 0xE515FB84, 0xE516FB84, 0xE517FB84, 0xE518FB84, 0xE519FB84, 0xE51AFB84, 0xE51BFB84, 0xE51CFB84, 0xE51DFB84, 0xE51EFB84, 0xE51FFB84, + 0xE520FB84, 0xE521FB84, 0xE522FB84, 0xE523FB84, 0xE524FB84, 0xE525FB84, 0xE526FB84, 0xE527FB84, 0xE528FB84, 0xE529FB84, 0xE52AFB84, 0xE52BFB84, 0xE52CFB84, 0xE52DFB84, 0xE52EFB84, + 0xE52FFB84, 0xE530FB84, 0xE531FB84, 0xE532FB84, 0xE533FB84, 0xE534FB84, 0xE535FB84, 0xE536FB84, 0xE537FB84, 0xE538FB84, 0xE539FB84, 0xE53AFB84, 0xE53BFB84, 0xE53CFB84, 0xE53DFB84, + 0xE53EFB84, 0xE53FFB84, 0xE540FB84, 0xE541FB84, 0xE542FB84, 0xE543FB84, 0xE544FB84, 0xE545FB84, 0xE546FB84, 0xE547FB84, 0xE548FB84, 0xE549FB84, 0xE54AFB84, 0xE54BFB84, 0xE54CFB84, + 0xE54DFB84, 0xE54EFB84, 0xE54FFB84, 0xE550FB84, 0xE551FB84, 0xE552FB84, 0xE553FB84, 0xE554FB84, 0xE555FB84, 0xE556FB84, 0xE557FB84, 0xE558FB84, 0xE559FB84, 0xE55AFB84, 0xE55BFB84, + 0xE55CFB84, 0xE55DFB84, 0xE55EFB84, 0xE55FFB84, 0xE560FB84, 0xE561FB84, 0xE562FB84, 0xE563FB84, 0xE564FB84, 0xE565FB84, 0xE566FB84, 0xE567FB84, 0xE568FB84, 0xE569FB84, 0xE56AFB84, + 0xE56BFB84, 0xE56CFB84, 0xE56DFB84, 0xE56EFB84, 0xE56FFB84, 0xE570FB84, 0xE571FB84, 0xE572FB84, 0xE573FB84, 0xE574FB84, 0xE575FB84, 0xE576FB84, 0xE577FB84, 0xE578FB84, 0xE579FB84, + 0xE57AFB84, 0xE57BFB84, 0xE57CFB84, 0xE57DFB84, 0xE57EFB84, 0xE57FFB84, 0xE580FB84, 0xE581FB84, 0xE582FB84, 0xE583FB84, 0xE584FB84, 0xE585FB84, 0xE586FB84, 0xE587FB84, 0xE588FB84, + 0xE589FB84, 0xE58AFB84, 0xE58BFB84, 0xE58CFB84, 0xE58DFB84, 0xE58EFB84, 0xE58FFB84, 0xE590FB84, 0xE591FB84, 0xE592FB84, 0xE593FB84, 0xE594FB84, 0xE595FB84, 0xE596FB84, 0xE597FB84, + 0xE598FB84, 0xE599FB84, 0xE59AFB84, 0xE59BFB84, 0xE59CFB84, 0xE59DFB84, 0xE59EFB84, 0xE59FFB84, 0xE5A0FB84, 0xE5A1FB84, 0xE5A2FB84, 0xE5A3FB84, 0xE5A4FB84, 0xE5A5FB84, 0xE5A6FB84, + 0xE5A7FB84, 0xE5A8FB84, 0xE5A9FB84, 0xE5AAFB84, 0xE5ABFB84, 0xE5ACFB84, 0xE5ADFB84, 0xE5AEFB84, 0xE5AFFB84, 0xE5B0FB84, 0xE5B1FB84, 0xE5B2FB84, 0xE5B3FB84, 0xE5B4FB84, 0xE5B5FB84, + 0xE5B6FB84, 0xE5B7FB84, 0xE5B8FB84, 0xE5B9FB84, 0xE5BAFB84, 0xE5BBFB84, 0xE5BCFB84, 0xE5BDFB84, 0xE5BEFB84, 0xE5BFFB84, 0xE5C0FB84, 0xE5C1FB84, 0xE5C2FB84, 0xE5C3FB84, 0xE5C4FB84, + 0xE5C5FB84, 0xE5C6FB84, 0xE5C7FB84, 0xE5C8FB84, 0xE5C9FB84, 0xE5CAFB84, 0xE5CBFB84, 0xE5CCFB84, 0xE5CDFB84, 0xE5CEFB84, 0xE5CFFB84, 0xE5D0FB84, 0xE5D1FB84, 0xE5D2FB84, 0xE5D3FB84, + 0xE5D4FB84, 0xE5D5FB84, 0xE5D6FB84, 0xE5D7FB84, 0xE5D8FB84, 0xE5D9FB84, 0xE5DAFB84, 0xE5DBFB84, 0xE5DCFB84, 0xE5DDFB84, 0xE5DEFB84, 0xE5DFFB84, 0xE5E0FB84, 0xE5E1FB84, 0xE5E2FB84, + 0xE5E3FB84, 0xE5E4FB84, 0xE5E5FB84, 0xE5E6FB84, 0xE5E7FB84, 0xE5E8FB84, 0xE5E9FB84, 0xE5EAFB84, 0xE5EBFB84, 0xE5ECFB84, 0xE5EDFB84, 0xE5EEFB84, 0xE5EFFB84, 0xE5F0FB84, 0xE5F1FB84, + 0xE5F2FB84, 0xE5F3FB84, 0xE5F4FB84, 0xE5F5FB84, 0xE5F6FB84, 0xE5F7FB84, 0xE5F8FB84, 0xE5F9FB84, 0xE5FAFB84, 0xE5FBFB84, 0xE5FCFB84, 0xE5FDFB84, 0xE5FEFB84, 0xE5FFFB84, 0xE600FB84, + 0xE601FB84, 0xE602FB84, 0xE603FB84, 0xE604FB84, 0xE605FB84, 0xE606FB84, 0xE607FB84, 0xE608FB84, 0xE609FB84, 0xE60AFB84, 0xE60BFB84, 0xE60CFB84, 0xE60DFB84, 0xE60EFB84, 0xE60FFB84, + 0xE610FB84, 0xE611FB84, 0xE612FB84, 0xE613FB84, 0xE614FB84, 0xE615FB84, 0xE616FB84, 0xE617FB84, 0xE618FB84, 0xE619FB84, 0xE61AFB84, 0xE61BFB84, 0xE61CFB84, 0xE61DFB84, 0xE61EFB84, + 0xE61FFB84, 0xE620FB84, 0xE621FB84, 0xE622FB84, 0xE623FB84, 0xE624FB84, 0xE625FB84, 0xE626FB84, 0xE627FB84, 0xE628FB84, 0xE629FB84, 0xE62AFB84, 0xE62BFB84, 0xE62CFB84, 0xE62DFB84, + 0xE62EFB84, 0xE62FFB84, 0xE630FB84, 0xE631FB84, 0xE632FB84, 0xE633FB84, 0xE634FB84, 0xE635FB84, 0xE636FB84, 0xE637FB84, 0xE638FB84, 0xE639FB84, 0xE63AFB84, 0xE63BFB84, 0xE63CFB84, + 0xE63DFB84, 0xE63EFB84, 0xE63FFB84, 0xE640FB84, 0xE641FB84, 0xE642FB84, 0xE643FB84, 0xE644FB84, 0xE645FB84, 0xE646FB84, 0xE647FB84, 0xE648FB84, 0xE649FB84, 0xE64AFB84, 0xE64BFB84, + 0xE64CFB84, 0xE64DFB84, 0xE64EFB84, 0xE64FFB84, 0xE650FB84, 0xE651FB84, 0xE652FB84, 0xE653FB84, 0xE654FB84, 0xE655FB84, 0xE656FB84, 0xE657FB84, 0xE658FB84, 0xE659FB84, 0xE65AFB84, + 0xE65BFB84, 0xE65CFB84, 0xE65DFB84, 0xE65EFB84, 0xE65FFB84, 0xE660FB84, 0xE661FB84, 0xE662FB84, 0xE663FB84, 0xE664FB84, 0xE665FB84, 0xE666FB84, 0xE667FB84, 0xE668FB84, 0xE669FB84, + 0xE66AFB84, 0xE66BFB84, 0xE66CFB84, 0xE66DFB84, 0xE66EFB84, 0xE66FFB84, 0xE670FB84, 0xE671FB84, 0xE672FB84, 0xE673FB84, 0xE674FB84, 0xE675FB84, 0xE676FB84, 0xE677FB84, 0xE678FB84, + 0xE679FB84, 0xE67AFB84, 0xE67BFB84, 0xE67CFB84, 0xE67DFB84, 0xE67EFB84, 0xE67FFB84, 0xE680FB84, 0xE681FB84, 0xE682FB84, 0xE683FB84, 0xE684FB84, 0xE685FB84, 0xE686FB84, 0xE687FB84, + 0xE688FB84, 0xE689FB84, 0xE68AFB84, 0xE68BFB84, 0xE68CFB84, 0xE68DFB84, 0xE68EFB84, 0xE68FFB84, 0xE690FB84, 0xE691FB84, 0xE692FB84, 0xE693FB84, 0xE694FB84, 0xE695FB84, 0xE696FB84, + 0xE697FB84, 0xE698FB84, 0xE699FB84, 0xE69AFB84, 0xE69BFB84, 0xE69CFB84, 0xE69DFB84, 0xE69EFB84, 0xE69FFB84, 0xE6A0FB84, 0xE6A1FB84, 0xE6A2FB84, 0xE6A3FB84, 0xE6A4FB84, 0xE6A5FB84, + 0xE6A6FB84, 0xE6A7FB84, 0xE6A8FB84, 0xE6A9FB84, 0xE6AAFB84, 0xE6ABFB84, 0xE6ACFB84, 0xE6ADFB84, 0xE6AEFB84, 0xE6AFFB84, 0xE6B0FB84, 0xE6B1FB84, 0xE6B2FB84, 0xE6B3FB84, 0xE6B4FB84, + 0xE6B5FB84, 0xE6B6FB84, 0xE6B7FB84, 0xE6B8FB84, 0xE6B9FB84, 0xE6BAFB84, 0xE6BBFB84, 0xE6BCFB84, 0xE6BDFB84, 0xE6BEFB84, 0xE6BFFB84, 0xE6C0FB84, 0xE6C1FB84, 0xE6C2FB84, 0xE6C3FB84, + 0xE6C4FB84, 0xE6C5FB84, 0xE6C6FB84, 0xE6C7FB84, 0xE6C8FB84, 0xE6C9FB84, 0xE6CAFB84, 0xE6CBFB84, 0xE6CCFB84, 0xE6CDFB84, 0xE6CEFB84, 0xE6CFFB84, 0xE6D0FB84, 0xE6D1FB84, 0xE6D2FB84, + 0xE6D3FB84, 0xE6D4FB84, 0xE6D5FB84, 0xE6D6FB84, 0xE6D7FB84, 0xE6D8FB84, 0xE6D9FB84, 0xE6DAFB84, 0xE6DBFB84, 0xE6DCFB84, 0xE6DDFB84, 0xE6DEFB84, 0xE6DFFB84, 0xE6E0FB84, 0xE6E1FB84, + 0xE6E2FB84, 0xE6E3FB84, 0xE6E4FB84, 0xE6E5FB84, 0xE6E6FB84, 0xE6E7FB84, 0xE6E8FB84, 0xE6E9FB84, 0xE6EAFB84, 0xE6EBFB84, 0xE6ECFB84, 0xE6EDFB84, 0xE6EEFB84, 0xE6EFFB84, 0xE6F0FB84, + 0xE6F1FB84, 0xE6F2FB84, 0xE6F3FB84, 0xE6F4FB84, 0xE6F5FB84, 0xE6F6FB84, 0xE6F7FB84, 0xE6F8FB84, 0xE6F9FB84, 0xE6FAFB84, 0xE6FBFB84, 0xE6FCFB84, 0xE6FDFB84, 0xE6FEFB84, 0xE6FFFB84, + 0xE700FB84, 0xE701FB84, 0xE702FB84, 0xE703FB84, 0xE704FB84, 0xE705FB84, 0xE706FB84, 0xE707FB84, 0xE708FB84, 0xE709FB84, 0xE70AFB84, 0xE70BFB84, 0xE70CFB84, 0xE70DFB84, 0xE70EFB84, + 0xE70FFB84, 0xE710FB84, 0xE711FB84, 0xE712FB84, 0xE713FB84, 0xE714FB84, 0xE715FB84, 0xE716FB84, 0xE717FB84, 0xE718FB84, 0xE719FB84, 0xE71AFB84, 0xE71BFB84, 0xE71CFB84, 0xE71DFB84, + 0xE71EFB84, 0xE71FFB84, 0xE720FB84, 0xE721FB84, 0xE722FB84, 0xE723FB84, 0xE724FB84, 0xE725FB84, 0xE726FB84, 0xE727FB84, 0xE728FB84, 0xE729FB84, 0xE72AFB84, 0xE72BFB84, 0xE72CFB84, + 0xE72DFB84, 0xE72EFB84, 0xE72FFB84, 0xE730FB84, 0xE731FB84, 0xE732FB84, 0xE733FB84, 0xE734FB84, 0xE735FB84, 0xE736FB84, 0xE737FB84, 0xE738FB84, 0xE739FB84, 0xE73AFB84, 0xE73BFB84, + 0xE73CFB84, 0xE73DFB84, 0xE73EFB84, 0xE73FFB84, 0xE740FB84, 0xE741FB84, 0xE742FB84, 0xE743FB84, 0xE744FB84, 0xE745FB84, 0xE746FB84, 0xE747FB84, 0xE748FB84, 0xE749FB84, 0xE74AFB84, + 0xE74BFB84, 0xE74CFB84, 0xE74DFB84, 0xE74EFB84, 0xE74FFB84, 0xE750FB84, 0xE751FB84, 0xE752FB84, 0xE753FB84, 0xE754FB84, 0xE755FB84, 0xE756FB84, 0xE757FB84, 0xE758FB84, 0xE759FB84, + 0xE75AFB84, 0xE75BFB84, 0xE75CFB84, 0xE75DFB84, 0xE75EFB84, 0xE75FFB84, 0xE760FB84, 0xE761FB84, 0xE762FB84, 0xE763FB84, 0xE764FB84, 0xE765FB84, 0xE766FB84, 0xE767FB84, 0xE768FB84, + 0xE769FB84, 0xE76AFB84, 0xE76BFB84, 0xE76CFB84, 0xE76DFB84, 0xE76EFB84, 0xE76FFB84, 0xE770FB84, 0xE771FB84, 0xE772FB84, 0xE773FB84, 0xE774FB84, 0xE775FB84, 0xE776FB84, 0xE777FB84, + 0xE778FB84, 0xE779FB84, 0xE77AFB84, 0xE77BFB84, 0xE77CFB84, 0xE77DFB84, 0xE77EFB84, 0xE77FFB84, 0xE780FB84, 0xE781FB84, 0xE782FB84, 0xE783FB84, 0xE784FB84, 0xE785FB84, 0xE786FB84, + 0xE787FB84, 0xE788FB84, 0xE789FB84, 0xE78AFB84, 0xE78BFB84, 0xE78CFB84, 0xE78DFB84, 0xE78EFB84, 0xE78FFB84, 0xE790FB84, 0xE791FB84, 0xE792FB84, 0xE793FB84, 0xE794FB84, 0xE795FB84, + 0xE796FB84, 0xE797FB84, 0xE798FB84, 0xE799FB84, 0xE79AFB84, 0xE79BFB84, 0xE79CFB84, 0xE79DFB84, 0xE79EFB84, 0xE79FFB84, 0xE7A0FB84, 0xE7A1FB84, 0xE7A2FB84, 0xE7A3FB84, 0xE7A4FB84, + 0xE7A5FB84, 0xE7A6FB84, 0xE7A7FB84, 0xE7A8FB84, 0xE7A9FB84, 0xE7AAFB84, 0xE7ABFB84, 0xE7ACFB84, 0xE7ADFB84, 0xE7AEFB84, 0xE7AFFB84, 0xE7B0FB84, 0xE7B1FB84, 0xE7B2FB84, 0xE7B3FB84, + 0xE7B4FB84, 0xE7B5FB84, 0xE7B6FB84, 0xE7B7FB84, 0xE7B8FB84, 0xE7B9FB84, 0xE7BAFB84, 0xE7BBFB84, 0xE7BCFB84, 0xE7BDFB84, 0xE7BEFB84, 0xE7BFFB84, 0xE7C0FB84, 0xE7C1FB84, 0xE7C2FB84, + 0xE7C3FB84, 0xE7C4FB84, 0xE7C5FB84, 0xE7C6FB84, 0xE7C7FB84, 0xE7C8FB84, 0xE7C9FB84, 0xE7CAFB84, 0xE7CBFB84, 0xE7CCFB84, 0xE7CDFB84, 0xE7CEFB84, 0xE7CFFB84, 0xE7D0FB84, 0xE7D1FB84, + 0xE7D2FB84, 0xE7D3FB84, 0xE7D4FB84, 0xE7D5FB84, 0xE7D6FB84, 0xE7D7FB84, 0xE7D8FB84, 0xE7D9FB84, 0xE7DAFB84, 0xE7DBFB84, 0xE7DCFB84, 0xE7DDFB84, 0xE7DEFB84, 0xE7DFFB84, 0xE7E0FB84, + 0xE7E1FB84, 0xE7E2FB84, 0xE7E3FB84, 0xE7E4FB84, 0xE7E5FB84, 0xE7E6FB84, 0xE7E7FB84, 0xE7E8FB84, 0xE7E9FB84, 0xE7EAFB84, 0xE7EBFB84, 0xE7ECFB84, 0xE7EDFB84, 0xE7EEFB84, 0xE7EFFB84, + 0xE7F0FB84, 0xE7F1FB84, 0xE7F2FB84, 0xE7F3FB84, 0xE7F4FB84, 0xE7F5FB84, 0xE7F6FB84, 0xE7F7FB84, 0xE7F8FB84, 0xE7F9FB84, 0xE7FAFB84, 0xE7FBFB84, 0xE7FCFB84, 0xE7FDFB84, 0xE7FEFB84, + 0xE7FFFB84, 0xE800FB84, 0xE801FB84, 0xE802FB84, 0xE803FB84, 0xE804FB84, 0xE805FB84, 0xE806FB84, 0xE807FB84, 0xE808FB84, 0xE809FB84, 0xE80AFB84, 0xE80BFB84, 0xE80CFB84, 0xE80DFB84, + 0xE80EFB84, 0xE80FFB84, 0xE810FB84, 0xE811FB84, 0xE812FB84, 0xE813FB84, 0xE814FB84, 0xE815FB84, 0xE816FB84, 0xE817FB84, 0xE818FB84, 0xE819FB84, 0xE81AFB84, 0xE81BFB84, 0xE81CFB84, + 0xE81DFB84, 0xE81EFB84, 0xE81FFB84, 0xE820FB84, 0xE821FB84, 0xE822FB84, 0xE823FB84, 0xE824FB84, 0xE825FB84, 0xE826FB84, 0xE827FB84, 0xE828FB84, 0xE829FB84, 0xE82AFB84, 0xE82BFB84, + 0xE82CFB84, 0xE82DFB84, 0xE82EFB84, 0xE82FFB84, 0xE830FB84, 0xE831FB84, 0xE832FB84, 0xE833FB84, 0xE834FB84, 0xE835FB84, 0xE836FB84, 0xE837FB84, 0xE838FB84, 0xE839FB84, 0xE83AFB84, + 0xE83BFB84, 0xE83CFB84, 0xE83DFB84, 0xE83EFB84, 0xE83FFB84, 0xE840FB84, 0xE841FB84, 0xE842FB84, 0xE843FB84, 0xE844FB84, 0xE845FB84, 0xE846FB84, 0xE847FB84, 0xE848FB84, 0xE849FB84, + 0xE84AFB84, 0xE84BFB84, 0xE84CFB84, 0xE84DFB84, 0xE84EFB84, 0xE84FFB84, 0xE850FB84, 0xE851FB84, 0xE852FB84, 0xE853FB84, 0xE854FB84, 0xE855FB84, 0xE856FB84, 0xE857FB84, 0xE858FB84, + 0xE859FB84, 0xE85AFB84, 0xE85BFB84, 0xE85CFB84, 0xE85DFB84, 0xE85EFB84, 0xE85FFB84, 0xE860FB84, 0xE861FB84, 0xE862FB84, 0xE863FB84, 0xE864FB84, 0xE865FB84, 0xE866FB84, 0xE867FB84, + 0xE868FB84, 0xE869FB84, 0xE86AFB84, 0xE86BFB84, 0xE86CFB84, 0xE86DFB84, 0xE86EFB84, 0xE86FFB84, 0xE870FB84, 0xE871FB84, 0xE872FB84, 0xE873FB84, 0xE874FB84, 0xE875FB84, 0xE876FB84, + 0xE877FB84, 0xE878FB84, 0xE879FB84, 0xE87AFB84, 0xE87BFB84, 0xE87CFB84, 0xE87DFB84, 0xE87EFB84, 0xE87FFB84, 0xE880FB84, 0xE881FB84, 0xE882FB84, 0xE883FB84, 0xE884FB84, 0xE885FB84, + 0xE886FB84, 0xE887FB84, 0xE888FB84, 0xE889FB84, 0xE88AFB84, 0xE88BFB84, 0xE88CFB84, 0xE88DFB84, 0xE88EFB84, 0xE88FFB84, 0xE890FB84, 0xE891FB84, 0xE892FB84, 0xE893FB84, 0xE894FB84, + 0xE895FB84, 0xE896FB84, 0xE897FB84, 0xE898FB84, 0xE899FB84, 0xE89AFB84, 0xE89BFB84, 0xE89CFB84, 0xE89DFB84, 0xE89EFB84, 0xE89FFB84, 0xE8A0FB84, 0xE8A1FB84, 0xE8A2FB84, 0xE8A3FB84, + 0xE8A4FB84, 0xE8A5FB84, 0xE8A6FB84, 0xE8A7FB84, 0xE8A8FB84, 0xE8A9FB84, 0xE8AAFB84, 0xE8ABFB84, 0xE8ACFB84, 0xE8ADFB84, 0xE8AEFB84, 0xE8AFFB84, 0xE8B0FB84, 0xE8B1FB84, 0xE8B2FB84, + 0xE8B3FB84, 0xE8B4FB84, 0xE8B5FB84, 0xE8B6FB84, 0xE8B7FB84, 0xE8B8FB84, 0xE8B9FB84, 0xE8BAFB84, 0xE8BBFB84, 0xE8BCFB84, 0xE8BDFB84, 0xE8BEFB84, 0xE8BFFB84, 0xE8C0FB84, 0xE8C1FB84, + 0xE8C2FB84, 0xE8C3FB84, 0xE8C4FB84, 0xE8C5FB84, 0xE8C6FB84, 0xE8C7FB84, 0xE8C8FB84, 0xE8C9FB84, 0xE8CAFB84, 0xE8CBFB84, 0xE8CCFB84, 0xE8CDFB84, 0xE8CEFB84, 0xE8CFFB84, 0xE8D0FB84, + 0xE8D1FB84, 0xE8D2FB84, 0xE8D3FB84, 0xE8D4FB84, 0xE8D5FB84, 0xE8D6FB84, 0xE8D7FB84, 0xE8D8FB84, 0xE8D9FB84, 0xE8DAFB84, 0xE8DBFB84, 0xE8DCFB84, 0xE8DDFB84, 0xE8DEFB84, 0xE8DFFB84, + 0xE8E0FB84, 0xE8E1FB84, 0xE8E2FB84, 0xE8E3FB84, 0xE8E4FB84, 0xE8E5FB84, 0xE8E6FB84, 0xE8E7FB84, 0xE8E8FB84, 0xE8E9FB84, 0xE8EAFB84, 0xE8EBFB84, 0xE8ECFB84, 0xE8EDFB84, 0xE8EEFB84, + 0xE8EFFB84, 0xE8F0FB84, 0xE8F1FB84, 0xE8F2FB84, 0xE8F3FB84, 0xE8F4FB84, 0xE8F5FB84, 0xE8F6FB84, 0xE8F7FB84, 0xE8F8FB84, 0xE8F9FB84, 0xE8FAFB84, 0xE8FBFB84, 0xE8FCFB84, 0xE8FDFB84, + 0xE8FEFB84, 0xE8FFFB84, 0xE900FB84, 0xE901FB84, 0xE902FB84, 0xE903FB84, 0xE904FB84, 0xE905FB84, 0xE906FB84, 0xE907FB84, 0xE908FB84, 0xE909FB84, 0xE90AFB84, 0xE90BFB84, 0xE90CFB84, + 0xE90DFB84, 0xE90EFB84, 0xE90FFB84, 0xE910FB84, 0xE911FB84, 0xE912FB84, 0xE913FB84, 0xE914FB84, 0xE915FB84, 0xE916FB84, 0xE917FB84, 0xE918FB84, 0xE919FB84, 0xE91AFB84, 0xE91BFB84, + 0xE91CFB84, 0xE91DFB84, 0xE91EFB84, 0xE91FFB84, 0xE920FB84, 0xE921FB84, 0xE922FB84, 0xE923FB84, 0xE924FB84, 0xE925FB84, 0xE926FB84, 0xE927FB84, 0xE928FB84, 0xE929FB84, 0xE92AFB84, + 0xE92BFB84, 0xE92CFB84, 0xE92DFB84, 0xE92EFB84, 0xE92FFB84, 0xE930FB84, 0xE931FB84, 0xE932FB84, 0xE933FB84, 0xE934FB84, 0xE935FB84, 0xE936FB84, 0xE937FB84, 0xE938FB84, 0xE939FB84, + 0xE93AFB84, 0xE93BFB84, 0xE93CFB84, 0xE93DFB84, 0xE93EFB84, 0xE93FFB84, 0xE940FB84, 0xE941FB84, 0xE942FB84, 0xE943FB84, 0xE944FB84, 0xE945FB84, 0xE946FB84, 0xE947FB84, 0xE948FB84, + 0xE949FB84, 0xE94AFB84, 0xE94BFB84, 0xE94CFB84, 0xE94DFB84, 0xE94EFB84, 0xE94FFB84, 0xE950FB84, 0xE951FB84, 0xE952FB84, 0xE953FB84, 0xE954FB84, 0xE955FB84, 0xE956FB84, 0xE957FB84, + 0xE958FB84, 0xE959FB84, 0xE95AFB84, 0xE95BFB84, 0xE95CFB84, 0xE95DFB84, 0xE95EFB84, 0xE95FFB84, 0xE960FB84, 0xE961FB84, 0xE962FB84, 0xE963FB84, 0xE964FB84, 0xE965FB84, 0xE966FB84, + 0xE967FB84, 0xE968FB84, 0xE969FB84, 0xE96AFB84, 0xE96BFB84, 0xE96CFB84, 0xE96DFB84, 0xE96EFB84, 0xE96FFB84, 0xE970FB84, 0xE971FB84, 0xE972FB84, 0xE973FB84, 0xE974FB84, 0xE975FB84, + 0xE976FB84, 0xE977FB84, 0xE978FB84, 0xE979FB84, 0xE97AFB84, 0xE97BFB84, 0xE97CFB84, 0xE97DFB84, 0xE97EFB84, 0xE97FFB84, 0xE980FB84, 0xE981FB84, 0xE982FB84, 0xE983FB84, 0xE984FB84, + 0xE985FB84, 0xE986FB84, 0xE987FB84, 0xE988FB84, 0xE989FB84, 0xE98AFB84, 0xE98BFB84, 0xE98CFB84, 0xE98DFB84, 0xE98EFB84, 0xE98FFB84, 0xE990FB84, 0xE991FB84, 0xE992FB84, 0xE993FB84, + 0xE994FB84, 0xE995FB84, 0xE996FB84, 0xE997FB84, 0xE998FB84, 0xE999FB84, 0xE99AFB84, 0xE99BFB84, 0xE99CFB84, 0xE99DFB84, 0xE99EFB84, 0xE99FFB84, 0xE9A0FB84, 0xE9A1FB84, 0xE9A2FB84, + 0xE9A3FB84, 0xE9A4FB84, 0xE9A5FB84, 0xE9A6FB84, 0xE9A7FB84, 0xE9A8FB84, 0xE9A9FB84, 0xE9AAFB84, 0xE9ABFB84, 0xE9ACFB84, 0xE9ADFB84, 0xE9AEFB84, 0xE9AFFB84, 0xE9B0FB84, 0xE9B1FB84, + 0xE9B2FB84, 0xE9B3FB84, 0xE9B4FB84, 0xE9B5FB84, 0xE9B6FB84, 0xE9B7FB84, 0xE9B8FB84, 0xE9B9FB84, 0xE9BAFB84, 0xE9BBFB84, 0xE9BCFB84, 0xE9BDFB84, 0xE9BEFB84, 0xE9BFFB84, 0xE9C0FB84, + 0xE9C1FB84, 0xE9C2FB84, 0xE9C3FB84, 0xE9C4FB84, 0xE9C5FB84, 0xE9C6FB84, 0xE9C7FB84, 0xE9C8FB84, 0xE9C9FB84, 0xE9CAFB84, 0xE9CBFB84, 0xE9CCFB84, 0xE9CDFB84, 0xE9CEFB84, 0xE9CFFB84, + 0xE9D0FB84, 0xE9D1FB84, 0xE9D2FB84, 0xE9D3FB84, 0xE9D4FB84, 0xE9D5FB84, 0xE9D6FB84, 0xE9D7FB84, 0xE9D8FB84, 0xE9D9FB84, 0xE9DAFB84, 0xE9DBFB84, 0xE9DCFB84, 0xE9DDFB84, 0xE9DEFB84, + 0xE9DFFB84, 0xE9E0FB84, 0xE9E1FB84, 0xE9E2FB84, 0xE9E3FB84, 0xE9E4FB84, 0xE9E5FB84, 0xE9E6FB84, 0xE9E7FB84, 0xE9E8FB84, 0xE9E9FB84, 0xE9EAFB84, 0xE9EBFB84, 0xE9ECFB84, 0xE9EDFB84, + 0xE9EEFB84, 0xE9EFFB84, 0xE9F0FB84, 0xE9F1FB84, 0xE9F2FB84, 0xE9F3FB84, 0xE9F4FB84, 0xE9F5FB84, 0xE9F6FB84, 0xE9F7FB84, 0xE9F8FB84, 0xE9F9FB84, 0xE9FAFB84, 0xE9FBFB84, 0xE9FCFB84, + 0xE9FDFB84, 0xE9FEFB84, 0xE9FFFB84, 0xEA00FB84, 0xEA01FB84, 0xEA02FB84, 0xEA03FB84, 0xEA04FB84, 0xEA05FB84, 0xEA06FB84, 0xEA07FB84, 0xEA08FB84, 0xEA09FB84, 0xEA0AFB84, 0xEA0BFB84, + 0xEA0CFB84, 0xEA0DFB84, 0xEA0EFB84, 0xEA0FFB84, 0xEA10FB84, 0xEA11FB84, 0xEA12FB84, 0xEA13FB84, 0xEA14FB84, 0xEA15FB84, 0xEA16FB84, 0xEA17FB84, 0xEA18FB84, 0xEA19FB84, 0xEA1AFB84, + 0xEA1BFB84, 0xEA1CFB84, 0xEA1DFB84, 0xEA1EFB84, 0xEA1FFB84, 0xEA20FB84, 0xEA21FB84, 0xEA22FB84, 0xEA23FB84, 0xEA24FB84, 0xEA25FB84, 0xEA26FB84, 0xEA27FB84, 0xEA28FB84, 0xEA29FB84, + 0xEA2AFB84, 0xEA2BFB84, 0xEA2CFB84, 0xEA2DFB84, 0xEA2EFB84, 0xEA2FFB84, 0xEA30FB84, 0xEA31FB84, 0xEA32FB84, 0xEA33FB84, 0xEA34FB84, 0xEA35FB84, 0xEA36FB84, 0xEA37FB84, 0xEA38FB84, + 0xEA39FB84, 0xEA3AFB84, 0xEA3BFB84, 0xEA3CFB84, 0xEA3DFB84, 0xEA3EFB84, 0xEA3FFB84, 0xEA40FB84, 0xEA41FB84, 0xEA42FB84, 0xEA43FB84, 0xEA44FB84, 0xEA45FB84, 0xEA46FB84, 0xEA47FB84, + 0xEA48FB84, 0xEA49FB84, 0xEA4AFB84, 0xEA4BFB84, 0xEA4CFB84, 0xEA4DFB84, 0xEA4EFB84, 0xEA4FFB84, 0xEA50FB84, 0xEA51FB84, 0xEA52FB84, 0xEA53FB84, 0xEA54FB84, 0xEA55FB84, 0xEA56FB84, + 0xEA57FB84, 0xEA58FB84, 0xEA59FB84, 0xEA5AFB84, 0xEA5BFB84, 0xEA5CFB84, 0xEA5DFB84, 0xEA5EFB84, 0xEA5FFB84, 0xEA60FB84, 0xEA61FB84, 0xEA62FB84, 0xEA63FB84, 0xEA64FB84, 0xEA65FB84, + 0xEA66FB84, 0xEA67FB84, 0xEA68FB84, 0xEA69FB84, 0xEA6AFB84, 0xEA6BFB84, 0xEA6CFB84, 0xEA6DFB84, 0xEA6EFB84, 0xEA6FFB84, 0xEA70FB84, 0xEA71FB84, 0xEA72FB84, 0xEA73FB84, 0xEA74FB84, + 0xEA75FB84, 0xEA76FB84, 0xEA77FB84, 0xEA78FB84, 0xEA79FB84, 0xEA7AFB84, 0xEA7BFB84, 0xEA7CFB84, 0xEA7DFB84, 0xEA7EFB84, 0xEA7FFB84, 0xEA80FB84, 0xEA81FB84, 0xEA82FB84, 0xEA83FB84, + 0xEA84FB84, 0xEA85FB84, 0xEA86FB84, 0xEA87FB84, 0xEA88FB84, 0xEA89FB84, 0xEA8AFB84, 0xEA8BFB84, 0xEA8CFB84, 0xEA8DFB84, 0xEA8EFB84, 0xEA8FFB84, 0xEA90FB84, 0xEA91FB84, 0xEA92FB84, + 0xEA93FB84, 0xEA94FB84, 0xEA95FB84, 0xEA96FB84, 0xEA97FB84, 0xEA98FB84, 0xEA99FB84, 0xEA9AFB84, 0xEA9BFB84, 0xEA9CFB84, 0xEA9DFB84, 0xEA9EFB84, 0xEA9FFB84, 0xEAA0FB84, 0xEAA1FB84, + 0xEAA2FB84, 0xEAA3FB84, 0xEAA4FB84, 0xEAA5FB84, 0xEAA6FB84, 0xEAA7FB84, 0xEAA8FB84, 0xEAA9FB84, 0xEAAAFB84, 0xEAABFB84, 0xEAACFB84, 0xEAADFB84, 0xEAAEFB84, 0xEAAFFB84, 0xEAB0FB84, + 0xEAB1FB84, 0xEAB2FB84, 0xEAB3FB84, 0xEAB4FB84, 0xEAB5FB84, 0xEAB6FB84, 0xEAB7FB84, 0xEAB8FB84, 0xEAB9FB84, 0xEABAFB84, 0xEABBFB84, 0xEABCFB84, 0xEABDFB84, 0xEABEFB84, 0xEABFFB84, + 0xEAC0FB84, 0xEAC1FB84, 0xEAC2FB84, 0xEAC3FB84, 0xEAC4FB84, 0xEAC5FB84, 0xEAC6FB84, 0xEAC7FB84, 0xEAC8FB84, 0xEAC9FB84, 0xEACAFB84, 0xEACBFB84, 0xEACCFB84, 0xEACDFB84, 0xEACEFB84, + 0xEACFFB84, 0xEAD0FB84, 0xEAD1FB84, 0xEAD2FB84, 0xEAD3FB84, 0xEAD4FB84, 0xEAD5FB84, 0xEAD6FB84, 0xEAD7FB84, 0xEAD8FB84, 0xEAD9FB84, 0xEADAFB84, 0xEADBFB84, 0xEADCFB84, 0xEADDFB84, + 0xEADEFB84, 0xEADFFB84, 0xEAE0FB84, 0xEAE1FB84, 0xEAE2FB84, 0xEAE3FB84, 0xEAE4FB84, 0xEAE5FB84, 0xEAE6FB84, 0xEAE7FB84, 0xEAE8FB84, 0xEAE9FB84, 0xEAEAFB84, 0xEAEBFB84, 0xEAECFB84, + 0xEAEDFB84, 0xEAEEFB84, 0xEAEFFB84, 0xEAF0FB84, 0xEAF1FB84, 0xEAF2FB84, 0xEAF3FB84, 0xEAF4FB84, 0xEAF5FB84, 0xEAF6FB84, 0xEAF7FB84, 0xEAF8FB84, 0xEAF9FB84, 0xEAFAFB84, 0xEAFBFB84, + 0xEAFCFB84, 0xEAFDFB84, 0xEAFEFB84, 0xEAFFFB84, 0xEB00FB84, 0xEB01FB84, 0xEB02FB84, 0xEB03FB84, 0xEB04FB84, 0xEB05FB84, 0xEB06FB84, 0xEB07FB84, 0xEB08FB84, 0xEB09FB84, 0xEB0AFB84, + 0xEB0BFB84, 0xEB0CFB84, 0xEB0DFB84, 0xEB0EFB84, 0xEB0FFB84, 0xEB10FB84, 0xEB11FB84, 0xEB12FB84, 0xEB13FB84, 0xEB14FB84, 0xEB15FB84, 0xEB16FB84, 0xEB17FB84, 0xEB18FB84, 0xEB19FB84, + 0xEB1AFB84, 0xEB1BFB84, 0xEB1CFB84, 0xEB1DFB84, 0xEB1EFB84, 0xEB1FFB84, 0xEB20FB84, 0xEB21FB84, 0xEB22FB84, 0xEB23FB84, 0xEB24FB84, 0xEB25FB84, 0xEB26FB84, 0xEB27FB84, 0xEB28FB84, + 0xEB29FB84, 0xEB2AFB84, 0xEB2BFB84, 0xEB2CFB84, 0xEB2DFB84, 0xEB2EFB84, 0xEB2FFB84, 0xEB30FB84, 0xEB31FB84, 0xEB32FB84, 0xEB33FB84, 0xEB34FB84, 0xEB35FB84, 0xEB36FB84, 0xEB37FB84, + 0xEB38FB84, 0xEB39FB84, 0xEB3AFB84, 0xEB3BFB84, 0xEB3CFB84, 0xEB3DFB84, 0xEB3EFB84, 0xEB3FFB84, 0xEB40FB84, 0xEB41FB84, 0xEB42FB84, 0xEB43FB84, 0xEB44FB84, 0xEB45FB84, 0xEB46FB84, + 0xEB47FB84, 0xEB48FB84, 0xEB49FB84, 0xEB4AFB84, 0xEB4BFB84, 0xEB4CFB84, 0xEB4DFB84, 0xEB4EFB84, 0xEB4FFB84, 0xEB50FB84, 0xEB51FB84, 0xEB52FB84, 0xEB53FB84, 0xEB54FB84, 0xEB55FB84, + 0xEB56FB84, 0xEB57FB84, 0xEB58FB84, 0xEB59FB84, 0xEB5AFB84, 0xEB5BFB84, 0xEB5CFB84, 0xEB5DFB84, 0xEB5EFB84, 0xEB5FFB84, 0xEB60FB84, 0xEB61FB84, 0xEB62FB84, 0xEB63FB84, 0xEB64FB84, + 0xEB65FB84, 0xEB66FB84, 0xEB67FB84, 0xEB68FB84, 0xEB69FB84, 0xEB6AFB84, 0xEB6BFB84, 0xEB6CFB84, 0xEB6DFB84, 0xEB6EFB84, 0xEB6FFB84, 0xEB70FB84, 0xEB71FB84, 0xEB72FB84, 0xEB73FB84, + 0xEB74FB84, 0xEB75FB84, 0xEB76FB84, 0xEB77FB84, 0xEB78FB84, 0xEB79FB84, 0xEB7AFB84, 0xEB7BFB84, 0xEB7CFB84, 0xEB7DFB84, 0xEB7EFB84, 0xEB7FFB84, 0xEB80FB84, 0xEB81FB84, 0xEB82FB84, + 0xEB83FB84, 0xEB84FB84, 0xEB85FB84, 0xEB86FB84, 0xEB87FB84, 0xEB88FB84, 0xEB89FB84, 0xEB8AFB84, 0xEB8BFB84, 0xEB8CFB84, 0xEB8DFB84, 0xEB8EFB84, 0xEB8FFB84, 0xEB90FB84, 0xEB91FB84, + 0xEB92FB84, 0xEB93FB84, 0xEB94FB84, 0xEB95FB84, 0xEB96FB84, 0xEB97FB84, 0xEB98FB84, 0xEB99FB84, 0xEB9AFB84, 0xEB9BFB84, 0xEB9CFB84, 0xEB9DFB84, 0xEB9EFB84, 0xEB9FFB84, 0xEBA0FB84, + 0xEBA1FB84, 0xEBA2FB84, 0xEBA3FB84, 0xEBA4FB84, 0xEBA5FB84, 0xEBA6FB84, 0xEBA7FB84, 0xEBA8FB84, 0xEBA9FB84, 0xEBAAFB84, 0xEBABFB84, 0xEBACFB84, 0xEBADFB84, 0xEBAEFB84, 0xEBAFFB84, + 0xEBB0FB84, 0xEBB1FB84, 0xEBB2FB84, 0xEBB3FB84, 0xEBB4FB84, 0xEBB5FB84, 0xEBB6FB84, 0xEBB7FB84, 0xEBB8FB84, 0xEBB9FB84, 0xEBBAFB84, 0xEBBBFB84, 0xEBBCFB84, 0xEBBDFB84, 0xEBBEFB84, + 0xEBBFFB84, 0xEBC0FB84, 0xEBC1FB84, 0xEBC2FB84, 0xEBC3FB84, 0xEBC4FB84, 0xEBC5FB84, 0xEBC6FB84, 0xEBC7FB84, 0xEBC8FB84, 0xEBC9FB84, 0xEBCAFB84, 0xEBCBFB84, 0xEBCCFB84, 0xEBCDFB84, + 0xEBCEFB84, 0xEBCFFB84, 0xEBD0FB84, 0xEBD1FB84, 0xEBD2FB84, 0xEBD3FB84, 0xEBD4FB84, 0xEBD5FB84, 0xEBD6FB84, 0xEBD7FB84, 0xEBD8FB84, 0xEBD9FB84, 0xEBDAFB84, 0xEBDBFB84, 0xEBDCFB84, + 0xEBDDFB84, 0xEBDEFB84, 0xEBDFFB84, 0xEBE0FB84, 0xEBE1FB84, 0xEBE2FB84, 0xEBE3FB84, 0xEBE4FB84, 0xEBE5FB84, 0xEBE6FB84, 0xEBE7FB84, 0xEBE8FB84, 0xEBE9FB84, 0xEBEAFB84, 0xEBEBFB84, + 0xEBECFB84, 0xEBEDFB84, 0xEBEEFB84, 0xEBEFFB84, 0xEBF0FB84, 0xEBF1FB84, 0xEBF2FB84, 0xEBF3FB84, 0xEBF4FB84, 0xEBF5FB84, 0xEBF6FB84, 0xEBF7FB84, 0xEBF8FB84, 0xEBF9FB84, 0xEBFAFB84, + 0xEBFBFB84, 0xEBFCFB84, 0xEBFDFB84, 0xEBFEFB84, 0xEBFFFB84, 0xEC00FB84, 0xEC01FB84, 0xEC02FB84, 0xEC03FB84, 0xEC04FB84, 0xEC05FB84, 0xEC06FB84, 0xEC07FB84, 0xEC08FB84, 0xEC09FB84, + 0xEC0AFB84, 0xEC0BFB84, 0xEC0CFB84, 0xEC0DFB84, 0xEC0EFB84, 0xEC0FFB84, 0xEC10FB84, 0xEC11FB84, 0xEC12FB84, 0xEC13FB84, 0xEC14FB84, 0xEC15FB84, 0xEC16FB84, 0xEC17FB84, 0xEC18FB84, + 0xEC19FB84, 0xEC1AFB84, 0xEC1BFB84, 0xEC1CFB84, 0xEC1DFB84, 0xEC1EFB84, 0xEC1FFB84, 0xEC20FB84, 0xEC21FB84, 0xEC22FB84, 0xEC23FB84, 0xEC24FB84, 0xEC25FB84, 0xEC26FB84, 0xEC27FB84, + 0xEC28FB84, 0xEC29FB84, 0xEC2AFB84, 0xEC2BFB84, 0xEC2CFB84, 0xEC2DFB84, 0xEC2EFB84, 0xEC2FFB84, 0xEC30FB84, 0xEC31FB84, 0xEC32FB84, 0xEC33FB84, 0xEC34FB84, 0xEC35FB84, 0xEC36FB84, + 0xEC37FB84, 0xEC38FB84, 0xEC39FB84, 0xEC3AFB84, 0xEC3BFB84, 0xEC3CFB84, 0xEC3DFB84, 0xEC3EFB84, 0xEC3FFB84, 0xEC40FB84, 0xEC41FB84, 0xEC42FB84, 0xEC43FB84, 0xEC44FB84, 0xEC45FB84, + 0xEC46FB84, 0xEC47FB84, 0xEC48FB84, 0xEC49FB84, 0xEC4AFB84, 0xEC4BFB84, 0xEC4CFB84, 0xEC4DFB84, 0xEC4EFB84, 0xEC4FFB84, 0xEC50FB84, 0xEC51FB84, 0xEC52FB84, 0xEC53FB84, 0xEC54FB84, + 0xEC55FB84, 0xEC56FB84, 0xEC57FB84, 0xEC58FB84, 0xEC59FB84, 0xEC5AFB84, 0xEC5BFB84, 0xEC5CFB84, 0xEC5DFB84, 0xEC5EFB84, 0xEC5FFB84, 0xEC60FB84, 0xEC61FB84, 0xEC62FB84, 0xEC63FB84, + 0xEC64FB84, 0xEC65FB84, 0xEC66FB84, 0xEC67FB84, 0xEC68FB84, 0xEC69FB84, 0xEC6AFB84, 0xEC6BFB84, 0xEC6CFB84, 0xEC6DFB84, 0xEC6EFB84, 0xEC6FFB84, 0xEC70FB84, 0xEC71FB84, 0xEC72FB84, + 0xEC73FB84, 0xEC74FB84, 0xEC75FB84, 0xEC76FB84, 0xEC77FB84, 0xEC78FB84, 0xEC79FB84, 0xEC7AFB84, 0xEC7BFB84, 0xEC7CFB84, 0xEC7DFB84, 0xEC7EFB84, 0xEC7FFB84, 0xEC80FB84, 0xEC81FB84, + 0xEC82FB84, 0xEC83FB84, 0xEC84FB84, 0xEC85FB84, 0xEC86FB84, 0xEC87FB84, 0xEC88FB84, 0xEC89FB84, 0xEC8AFB84, 0xEC8BFB84, 0xEC8CFB84, 0xEC8DFB84, 0xEC8EFB84, 0xEC8FFB84, 0xEC90FB84, + 0xEC91FB84, 0xEC92FB84, 0xEC93FB84, 0xEC94FB84, 0xEC95FB84, 0xEC96FB84, 0xEC97FB84, 0xEC98FB84, 0xEC99FB84, 0xEC9AFB84, 0xEC9BFB84, 0xEC9CFB84, 0xEC9DFB84, 0xEC9EFB84, 0xEC9FFB84, + 0xECA0FB84, 0xECA1FB84, 0xECA2FB84, 0xECA3FB84, 0xECA4FB84, 0xECA5FB84, 0xECA6FB84, 0xECA7FB84, 0xECA8FB84, 0xECA9FB84, 0xECAAFB84, 0xECABFB84, 0xECACFB84, 0xECADFB84, 0xECAEFB84, + 0xECAFFB84, 0xECB0FB84, 0xECB1FB84, 0xECB2FB84, 0xECB3FB84, 0xECB4FB84, 0xECB5FB84, 0xECB6FB84, 0xECB7FB84, 0xECB8FB84, 0xECB9FB84, 0xECBAFB84, 0xECBBFB84, 0xECBCFB84, 0xECBDFB84, + 0xECBEFB84, 0xECBFFB84, 0xECC0FB84, 0xECC1FB84, 0xECC2FB84, 0xECC3FB84, 0xECC4FB84, 0xECC5FB84, 0xECC6FB84, 0xECC7FB84, 0xECC8FB84, 0xECC9FB84, 0xECCAFB84, 0xECCBFB84, 0xECCCFB84, + 0xECCDFB84, 0xECCEFB84, 0xECCFFB84, 0xECD0FB84, 0xECD1FB84, 0xECD2FB84, 0xECD3FB84, 0xECD4FB84, 0xECD5FB84, 0xECD6FB84, 0xECD7FB84, 0xECD8FB84, 0xECD9FB84, 0xECDAFB84, 0xECDBFB84, + 0xECDCFB84, 0xECDDFB84, 0xECDEFB84, 0xECDFFB84, 0xECE0FB84, 0xECE1FB84, 0xECE2FB84, 0xECE3FB84, 0xECE4FB84, 0xECE5FB84, 0xECE6FB84, 0xECE7FB84, 0xECE8FB84, 0xECE9FB84, 0xECEAFB84, + 0xECEBFB84, 0xECECFB84, 0xECEDFB84, 0xECEEFB84, 0xECEFFB84, 0xECF0FB84, 0xECF1FB84, 0xECF2FB84, 0xECF3FB84, 0xECF4FB84, 0xECF5FB84, 0xECF6FB84, 0xECF7FB84, 0xECF8FB84, 0xECF9FB84, + 0xECFAFB84, 0xECFBFB84, 0xECFCFB84, 0xECFDFB84, 0xECFEFB84, 0xECFFFB84, 0xED00FB84, 0xED01FB84, 0xED02FB84, 0xED03FB84, 0xED04FB84, 0xED05FB84, 0xED06FB84, 0xED07FB84, 0xED08FB84, + 0xED09FB84, 0xED0AFB84, 0xED0BFB84, 0xED0CFB84, 0xED0DFB84, 0xED0EFB84, 0xED0FFB84, 0xED10FB84, 0xED11FB84, 0xED12FB84, 0xED13FB84, 0xED14FB84, 0xED15FB84, 0xED16FB84, 0xED17FB84, + 0xED18FB84, 0xED19FB84, 0xED1AFB84, 0xED1BFB84, 0xED1CFB84, 0xED1DFB84, 0xED1EFB84, 0xED1FFB84, 0xED20FB84, 0xED21FB84, 0xED22FB84, 0xED23FB84, 0xED24FB84, 0xED25FB84, 0xED26FB84, + 0xED27FB84, 0xED28FB84, 0xED29FB84, 0xED2AFB84, 0xED2BFB84, 0xED2CFB84, 0xED2DFB84, 0xED2EFB84, 0xED2FFB84, 0xED30FB84, 0xED31FB84, 0xED32FB84, 0xED33FB84, 0xED34FB84, 0xED35FB84, + 0xED36FB84, 0xED37FB84, 0xED38FB84, 0xED39FB84, 0xED3AFB84, 0xED3BFB84, 0xED3CFB84, 0xED3DFB84, 0xED3EFB84, 0xED3FFB84, 0xED40FB84, 0xED41FB84, 0xED42FB84, 0xED43FB84, 0xED44FB84, + 0xED45FB84, 0xED46FB84, 0xED47FB84, 0xED48FB84, 0xED49FB84, 0xED4AFB84, 0xED4BFB84, 0xED4CFB84, 0xED4DFB84, 0xED4EFB84, 0xED4FFB84, 0xED50FB84, 0xED51FB84, 0xED52FB84, 0xED53FB84, + 0xED54FB84, 0xED55FB84, 0xED56FB84, 0xED57FB84, 0xED58FB84, 0xED59FB84, 0xED5AFB84, 0xED5BFB84, 0xED5CFB84, 0xED5DFB84, 0xED5EFB84, 0xED5FFB84, 0xED60FB84, 0xED61FB84, 0xED62FB84, + 0xED63FB84, 0xED64FB84, 0xED65FB84, 0xED66FB84, 0xED67FB84, 0xED68FB84, 0xED69FB84, 0xED6AFB84, 0xED6BFB84, 0xED6CFB84, 0xED6DFB84, 0xED6EFB84, 0xED6FFB84, 0xED70FB84, 0xED71FB84, + 0xED72FB84, 0xED73FB84, 0xED74FB84, 0xED75FB84, 0xED76FB84, 0xED77FB84, 0xED78FB84, 0xED79FB84, 0xED7AFB84, 0xED7BFB84, 0xED7CFB84, 0xED7DFB84, 0xED7EFB84, 0xED7FFB84, 0xED80FB84, + 0xED81FB84, 0xED82FB84, 0xED83FB84, 0xED84FB84, 0xED85FB84, 0xED86FB84, 0xED87FB84, 0xED88FB84, 0xED89FB84, 0xED8AFB84, 0xED8BFB84, 0xED8CFB84, 0xED8DFB84, 0xED8EFB84, 0xED8FFB84, + 0xED90FB84, 0xED91FB84, 0xED92FB84, 0xED93FB84, 0xED94FB84, 0xED95FB84, 0xED96FB84, 0xED97FB84, 0xED98FB84, 0xED99FB84, 0xED9AFB84, 0xED9BFB84, 0xED9CFB84, 0xED9DFB84, 0xED9EFB84, + 0xED9FFB84, 0xEDA0FB84, 0xEDA1FB84, 0xEDA2FB84, 0xEDA3FB84, 0xEDA4FB84, 0xEDA5FB84, 0xEDA6FB84, 0xEDA7FB84, 0xEDA8FB84, 0xEDA9FB84, 0xEDAAFB84, 0xEDABFB84, 0xEDACFB84, 0xEDADFB84, + 0xEDAEFB84, 0xEDAFFB84, 0xEDB0FB84, 0xEDB1FB84, 0xEDB2FB84, 0xEDB3FB84, 0xEDB4FB84, 0xEDB5FB84, 0xEDB6FB84, 0xEDB7FB84, 0xEDB8FB84, 0xEDB9FB84, 0xEDBAFB84, 0xEDBBFB84, 0xEDBCFB84, + 0xEDBDFB84, 0xEDBEFB84, 0xEDBFFB84, 0xEDC0FB84, 0xEDC1FB84, 0xEDC2FB84, 0xEDC3FB84, 0xEDC4FB84, 0xEDC5FB84, 0xEDC6FB84, 0xEDC7FB84, 0xEDC8FB84, 0xEDC9FB84, 0xEDCAFB84, 0xEDCBFB84, + 0xEDCCFB84, 0xEDCDFB84, 0xEDCEFB84, 0xEDCFFB84, 0xEDD0FB84, 0xEDD1FB84, 0xEDD2FB84, 0xEDD3FB84, 0xEDD4FB84, 0xEDD5FB84, 0xEDD6FB84, 0xEDD7FB84, 0xEDD8FB84, 0xEDD9FB84, 0xEDDAFB84, + 0xEDDBFB84, 0xEDDCFB84, 0xEDDDFB84, 0xEDDEFB84, 0xEDDFFB84, 0xEDE0FB84, 0xEDE1FB84, 0xEDE2FB84, 0xEDE3FB84, 0xEDE4FB84, 0xEDE5FB84, 0xEDE6FB84, 0xEDE7FB84, 0xEDE8FB84, 0xEDE9FB84, + 0xEDEAFB84, 0xEDEBFB84, 0xEDECFB84, 0xEDEDFB84, 0xEDEEFB84, 0xEDEFFB84, 0xEDF0FB84, 0xEDF1FB84, 0xEDF2FB84, 0xEDF3FB84, 0xEDF4FB84, 0xEDF5FB84, 0xEDF6FB84, 0xEDF7FB84, 0xEDF8FB84, + 0xEDF9FB84, 0xEDFAFB84, 0xEDFBFB84, 0xEDFCFB84, 0xEDFDFB84, 0xEDFEFB84, 0xEDFFFB84, 0xEE00FB84, 0xEE01FB84, 0xEE02FB84, 0xEE03FB84, 0xEE04FB84, 0xEE05FB84, 0xEE06FB84, 0xEE07FB84, + 0xEE08FB84, 0xEE09FB84, 0xEE0AFB84, 0xEE0BFB84, 0xEE0CFB84, 0xEE0DFB84, 0xEE0EFB84, 0xEE0FFB84, 0xEE10FB84, 0xEE11FB84, 0xEE12FB84, 0xEE13FB84, 0xEE14FB84, 0xEE15FB84, 0xEE16FB84, + 0xEE17FB84, 0xEE18FB84, 0xEE19FB84, 0xEE1AFB84, 0xEE1BFB84, 0xEE1CFB84, 0xEE1DFB84, 0xEE1EFB84, 0xEE1FFB84, 0xEE20FB84, 0xEE21FB84, 0xEE22FB84, 0xEE23FB84, 0xEE24FB84, 0xEE25FB84, + 0xEE26FB84, 0xEE27FB84, 0xEE28FB84, 0xEE29FB84, 0xEE2AFB84, 0xEE2BFB84, 0xEE2CFB84, 0xEE2DFB84, 0xEE2EFB84, 0xEE2FFB84, 0xEE30FB84, 0xEE31FB84, 0xEE32FB84, 0xEE33FB84, 0xEE34FB84, + 0xEE35FB84, 0xEE36FB84, 0xEE37FB84, 0xEE38FB84, 0xEE39FB84, 0xEE3AFB84, 0xEE3BFB84, 0xEE3CFB84, 0xEE3DFB84, 0xEE3EFB84, 0xEE3FFB84, 0xEE40FB84, 0xEE41FB84, 0xEE42FB84, 0xEE43FB84, + 0xEE44FB84, 0xEE45FB84, 0xEE46FB84, 0xEE47FB84, 0xEE48FB84, 0xEE49FB84, 0xEE4AFB84, 0xEE4BFB84, 0xEE4CFB84, 0xEE4DFB84, 0xEE4EFB84, 0xEE4FFB84, 0xEE50FB84, 0xEE51FB84, 0xEE52FB84, + 0xEE53FB84, 0xEE54FB84, 0xEE55FB84, 0xEE56FB84, 0xEE57FB84, 0xEE58FB84, 0xEE59FB84, 0xEE5AFB84, 0xEE5BFB84, 0xEE5CFB84, 0xEE5DFB84, 0xEE5EFB84, 0xEE5FFB84, 0xEE60FB84, 0xEE61FB84, + 0xEE62FB84, 0xEE63FB84, 0xEE64FB84, 0xEE65FB84, 0xEE66FB84, 0xEE67FB84, 0xEE68FB84, 0xEE69FB84, 0xEE6AFB84, 0xEE6BFB84, 0xEE6CFB84, 0xEE6DFB84, 0xEE6EFB84, 0xEE6FFB84, 0xEE70FB84, + 0xEE71FB84, 0xEE72FB84, 0xEE73FB84, 0xEE74FB84, 0xEE75FB84, 0xEE76FB84, 0xEE77FB84, 0xEE78FB84, 0xEE79FB84, 0xEE7AFB84, 0xEE7BFB84, 0xEE7CFB84, 0xEE7DFB84, 0xEE7EFB84, 0xEE7FFB84, + 0xEE80FB84, 0xEE81FB84, 0xEE82FB84, 0xEE83FB84, 0xEE84FB84, 0xEE85FB84, 0xEE86FB84, 0xEE87FB84, 0xEE88FB84, 0xEE89FB84, 0xEE8AFB84, 0xEE8BFB84, 0xEE8CFB84, 0xEE8DFB84, 0xEE8EFB84, + 0xEE8FFB84, 0xEE90FB84, 0xEE91FB84, 0xEE92FB84, 0xEE93FB84, 0xEE94FB84, 0xEE95FB84, 0xEE96FB84, 0xEE97FB84, 0xEE98FB84, 0xEE99FB84, 0xEE9AFB84, 0xEE9BFB84, 0xEE9CFB84, 0xEE9DFB84, + 0xEE9EFB84, 0xEE9FFB84, 0xEEA0FB84, 0xEEA1FB84, 0xEEA2FB84, 0xEEA3FB84, 0xEEA4FB84, 0xEEA5FB84, 0xEEA6FB84, 0xEEA7FB84, 0xEEA8FB84, 0xEEA9FB84, 0xEEAAFB84, 0xEEABFB84, 0xEEACFB84, + 0xEEADFB84, 0xEEAEFB84, 0xEEAFFB84, 0xEEB0FB84, 0xEEB1FB84, 0xEEB2FB84, 0xEEB3FB84, 0xEEB4FB84, 0xEEB5FB84, 0xEEB6FB84, 0xEEB7FB84, 0xEEB8FB84, 0xEEB9FB84, 0xEEBAFB84, 0xEEBBFB84, + 0xEEBCFB84, 0xEEBDFB84, 0xEEBEFB84, 0xEEBFFB84, 0xEEC0FB84, 0xEEC1FB84, 0xEEC2FB84, 0xEEC3FB84, 0xEEC4FB84, 0xEEC5FB84, 0xEEC6FB84, 0xEEC7FB84, 0xEEC8FB84, 0xEEC9FB84, 0xEECAFB84, + 0xEECBFB84, 0xEECCFB84, 0xEECDFB84, 0xEECEFB84, 0xEECFFB84, 0xEED0FB84, 0xEED1FB84, 0xEED2FB84, 0xEED3FB84, 0xEED4FB84, 0xEED5FB84, 0xEED6FB84, 0xEED7FB84, 0xEED8FB84, 0xEED9FB84, + 0xEEDAFB84, 0xEEDBFB84, 0xEEDCFB84, 0xEEDDFB84, 0xEEDEFB84, 0xEEDFFB84, 0xEEE0FB84, 0xEEE1FB84, 0xEEE2FB84, 0xEEE3FB84, 0xEEE4FB84, 0xEEE5FB84, 0xEEE6FB84, 0xEEE7FB84, 0xEEE8FB84, + 0xEEE9FB84, 0xEEEAFB84, 0xEEEBFB84, 0xEEECFB84, 0xEEEDFB84, 0xEEEEFB84, 0xEEEFFB84, 0xEEF0FB84, 0xEEF1FB84, 0xEEF2FB84, 0xEEF3FB84, 0xEEF4FB84, 0xEEF5FB84, 0xEEF6FB84, 0xEEF7FB84, + 0xEEF8FB84, 0xEEF9FB84, 0xEEFAFB84, 0xEEFBFB84, 0xEEFCFB84, 0xEEFDFB84, 0xEEFEFB84, 0xEEFFFB84, 0xEF00FB84, 0xEF01FB84, 0xEF02FB84, 0xEF03FB84, 0xEF04FB84, 0xEF05FB84, 0xEF06FB84, + 0xEF07FB84, 0xEF08FB84, 0xEF09FB84, 0xEF0AFB84, 0xEF0BFB84, 0xEF0CFB84, 0xEF0DFB84, 0xEF0EFB84, 0xEF0FFB84, 0xEF10FB84, 0xEF11FB84, 0xEF12FB84, 0xEF13FB84, 0xEF14FB84, 0xEF15FB84, + 0xEF16FB84, 0xEF17FB84, 0xEF18FB84, 0xEF19FB84, 0xEF1AFB84, 0xEF1BFB84, 0xEF1CFB84, 0xEF1DFB84, 0xEF1EFB84, 0xEF1FFB84, 0xEF20FB84, 0xEF21FB84, 0xEF22FB84, 0xEF23FB84, 0xEF24FB84, + 0xEF25FB84, 0xEF26FB84, 0xEF27FB84, 0xEF28FB84, 0xEF29FB84, 0xEF2AFB84, 0xEF2BFB84, 0xEF2CFB84, 0xEF2DFB84, 0xEF2EFB84, 0xEF2FFB84, 0xEF30FB84, 0xEF31FB84, 0xEF32FB84, 0xEF33FB84, + 0xEF34FB84, 0xEF35FB84, 0xEF36FB84, 0xEF37FB84, 0xEF38FB84, 0xEF39FB84, 0xEF3AFB84, 0xEF3BFB84, 0xEF3CFB84, 0xEF3DFB84, 0xEF3EFB84, 0xEF3FFB84, 0xEF40FB84, 0xEF41FB84, 0xEF42FB84, + 0xEF43FB84, 0xEF44FB84, 0xEF45FB84, 0xEF46FB84, 0xEF47FB84, 0xEF48FB84, 0xEF49FB84, 0xEF4AFB84, 0xEF4BFB84, 0xEF4CFB84, 0xEF4DFB84, 0xEF4EFB84, 0xEF4FFB84, 0xEF50FB84, 0xEF51FB84, + 0xEF52FB84, 0xEF53FB84, 0xEF54FB84, 0xEF55FB84, 0xEF56FB84, 0xEF57FB84, 0xEF58FB84, 0xEF59FB84, 0xEF5AFB84, 0xEF5BFB84, 0xEF5CFB84, 0xEF5DFB84, 0xEF5EFB84, 0xEF5FFB84, 0xEF60FB84, + 0xEF61FB84, 0xEF62FB84, 0xEF63FB84, 0xEF64FB84, 0xEF65FB84, 0xEF66FB84, 0xEF67FB84, 0xEF68FB84, 0xEF69FB84, 0xEF6AFB84, 0xEF6BFB84, 0xEF6CFB84, 0xEF6DFB84, 0xEF6EFB84, 0xEF6FFB84, + 0xEF70FB84, 0xEF71FB84, 0xEF72FB84, 0xEF73FB84, 0xEF74FB84, 0xEF75FB84, 0xEF76FB84, 0xEF77FB84, 0xEF78FB84, 0xEF79FB84, 0xEF7AFB84, 0xEF7BFB84, 0xEF7CFB84, 0xEF7DFB84, 0xEF7EFB84, + 0xEF7FFB84, 0xEF80FB84, 0xEF81FB84, 0xEF82FB84, 0xEF83FB84, 0xEF84FB84, 0xEF85FB84, 0xEF86FB84, 0xEF87FB84, 0xEF88FB84, 0xEF89FB84, 0xEF8AFB84, 0xEF8BFB84, 0xEF8CFB84, 0xEF8DFB84, + 0xEF8EFB84, 0xEF8FFB84, 0xEF90FB84, 0xEF91FB84, 0xEF92FB84, 0xEF93FB84, 0xEF94FB84, 0xEF95FB84, 0xEF96FB84, 0xEF97FB84, 0xEF98FB84, 0xEF99FB84, 0xEF9AFB84, 0xEF9BFB84, 0xEF9CFB84, + 0xEF9DFB84, 0xEF9EFB84, 0xEF9FFB84, 0xEFA0FB84, 0xEFA1FB84, 0xEFA2FB84, 0xEFA3FB84, 0xEFA4FB84, 0xEFA5FB84, 0xEFA6FB84, 0xEFA7FB84, 0xEFA8FB84, 0xEFA9FB84, 0xEFAAFB84, 0xEFABFB84, + 0xEFACFB84, 0xEFADFB84, 0xEFAEFB84, 0xEFAFFB84, 0xEFB0FB84, 0xEFB1FB84, 0xEFB2FB84, 0xEFB3FB84, 0xEFB4FB84, 0xEFB5FB84, 0xEFB6FB84, 0xEFB7FB84, 0xEFB8FB84, 0xEFB9FB84, 0xEFBAFB84, + 0xEFBBFB84, 0xEFBCFB84, 0xEFBDFB84, 0xEFBEFB84, 0xEFBFFB84, 0xEFC0FB84, 0xEFC1FB84, 0xEFC2FB84, 0xEFC3FB84, 0xEFC4FB84, 0xEFC5FB84, 0xEFC6FB84, 0xEFC7FB84, 0xEFC8FB84, 0xEFC9FB84, + 0xEFCAFB84, 0xEFCBFB84, 0xEFCCFB84, 0xEFCDFB84, 0xEFCEFB84, 0xEFCFFB84, 0xEFD0FB84, 0xEFD1FB84, 0xEFD2FB84, 0xEFD3FB84, 0xEFD4FB84, 0xEFD5FB84, 0xEFD6FB84, 0xEFD7FB84, 0xEFD8FB84, + 0xEFD9FB84, 0xEFDAFB84, 0xEFDBFB84, 0xEFDCFB84, 0xEFDDFB84, 0xEFDEFB84, 0xEFDFFB84, 0xEFE0FB84, 0xEFE1FB84, 0xEFE2FB84, 0xEFE3FB84, 0xEFE4FB84, 0xEFE5FB84, 0xEFE6FB84, 0xEFE7FB84, + 0xEFE8FB84, 0xEFE9FB84, 0xEFEAFB84, 0xEFEBFB84, 0xEFECFB84, 0xEFEDFB84, 0xEFEEFB84, 0xEFEFFB84, 0xEFF0FB84, 0xEFF1FB84, 0xEFF2FB84, 0xEFF3FB84, 0xEFF4FB84, 0xEFF5FB84, 0xEFF6FB84, + 0xEFF7FB84, 0xEFF8FB84, 0xEFF9FB84, 0xEFFAFB84, 0xEFFBFB84, 0xEFFCFB84, 0xEFFDFB84, 0xEFFEFB84, 0xEFFFFB84, 0xF000FB84, 0xF001FB84, 0xF002FB84, 0xF003FB84, 0xF004FB84, 0xF005FB84, + 0xF006FB84, 0xF007FB84, 0xF008FB84, 0xF009FB84, 0xF00AFB84, 0xF00BFB84, 0xF00CFB84, 0xF00DFB84, 0xF00EFB84, 0xF00FFB84, 0xF010FB84, 0xF011FB84, 0xF012FB84, 0xF013FB84, 0xF014FB84, + 0xF015FB84, 0xF016FB84, 0xF017FB84, 0xF018FB84, 0xF019FB84, 0xF01AFB84, 0xF01BFB84, 0xF01CFB84, 0xF01DFB84, 0xF01EFB84, 0xF01FFB84, 0xF020FB84, 0xF021FB84, 0xF022FB84, 0xF023FB84, + 0xF024FB84, 0xF025FB84, 0xF026FB84, 0xF027FB84, 0xF028FB84, 0xF029FB84, 0xF02AFB84, 0xF02BFB84, 0xF02CFB84, 0xF02DFB84, 0xF02EFB84, 0xF02FFB84, 0xF030FB84, 0xF031FB84, 0xF032FB84, + 0xF033FB84, 0xF034FB84, 0xF035FB84, 0xF036FB84, 0xF037FB84, 0xF038FB84, 0xF039FB84, 0xF03AFB84, 0xF03BFB84, 0xF03CFB84, 0xF03DFB84, 0xF03EFB84, 0xF03FFB84, 0xF040FB84, 0xF041FB84, + 0xF042FB84, 0xF043FB84, 0xF044FB84, 0xF045FB84, 0xF046FB84, 0xF047FB84, 0xF048FB84, 0xF049FB84, 0xF04AFB84, 0xF04BFB84, 0xF04CFB84, 0xF04DFB84, 0xF04EFB84, 0xF04FFB84, 0xF050FB84, + 0xF051FB84, 0xF052FB84, 0xF053FB84, 0xF054FB84, 0xF055FB84, 0xF056FB84, 0xF057FB84, 0xF058FB84, 0xF059FB84, 0xF05AFB84, 0xF05BFB84, 0xF05CFB84, 0xF05DFB84, 0xF05EFB84, 0xF05FFB84, + 0xF060FB84, 0xF061FB84, 0xF062FB84, 0xF063FB84, 0xF064FB84, 0xF065FB84, 0xF066FB84, 0xF067FB84, 0xF068FB84, 0xF069FB84, 0xF06AFB84, 0xF06BFB84, 0xF06CFB84, 0xF06DFB84, 0xF06EFB84, + 0xF06FFB84, 0xF070FB84, 0xF071FB84, 0xF072FB84, 0xF073FB84, 0xF074FB84, 0xF075FB84, 0xF076FB84, 0xF077FB84, 0xF078FB84, 0xF079FB84, 0xF07AFB84, 0xF07BFB84, 0xF07CFB84, 0xF07DFB84, + 0xF07EFB84, 0xF07FFB84, 0xF080FB84, 0xF081FB84, 0xF082FB84, 0xF083FB84, 0xF084FB84, 0xF085FB84, 0xF086FB84, 0xF087FB84, 0xF088FB84, 0xF089FB84, 0xF08AFB84, 0xF08BFB84, 0xF08CFB84, + 0xF08DFB84, 0xF08EFB84, 0xF08FFB84, 0xF090FB84, 0xF091FB84, 0xF092FB84, 0xF093FB84, 0xF094FB84, 0xF095FB84, 0xF096FB84, 0xF097FB84, 0xF098FB84, 0xF099FB84, 0xF09AFB84, 0xF09BFB84, + 0xF09CFB84, 0xF09DFB84, 0xF09EFB84, 0xF09FFB84, 0xF0A0FB84, 0xF0A1FB84, 0xF0A2FB84, 0xF0A3FB84, 0xF0A4FB84, 0xF0A5FB84, 0xF0A6FB84, 0xF0A7FB84, 0xF0A8FB84, 0xF0A9FB84, 0xF0AAFB84, + 0xF0ABFB84, 0xF0ACFB84, 0xF0ADFB84, 0xF0AEFB84, 0xF0AFFB84, 0xF0B0FB84, 0xF0B1FB84, 0xF0B2FB84, 0xF0B3FB84, 0xF0B4FB84, 0xF0B5FB84, 0xF0B6FB84, 0xF0B7FB84, 0xF0B8FB84, 0xF0B9FB84, + 0xF0BAFB84, 0xF0BBFB84, 0xF0BCFB84, 0xF0BDFB84, 0xF0BEFB84, 0xF0BFFB84, 0xF0C0FB84, 0xF0C1FB84, 0xF0C2FB84, 0xF0C3FB84, 0xF0C4FB84, 0xF0C5FB84, 0xF0C6FB84, 0xF0C7FB84, 0xF0C8FB84, + 0xF0C9FB84, 0xF0CAFB84, 0xF0CBFB84, 0xF0CCFB84, 0xF0CDFB84, 0xF0CEFB84, 0xF0CFFB84, 0xF0D0FB84, 0xF0D1FB84, 0xF0D2FB84, 0xF0D3FB84, 0xF0D4FB84, 0xF0D5FB84, 0xF0D6FB84, 0xF0D7FB84, + 0xF0D8FB84, 0xF0D9FB84, 0xF0DAFB84, 0xF0DBFB84, 0xF0DCFB84, 0xF0DDFB84, 0xF0DEFB84, 0xF0DFFB84, 0xF0E0FB84, 0xF0E1FB84, 0xF0E2FB84, 0xF0E3FB84, 0xF0E4FB84, 0xF0E5FB84, 0xF0E6FB84, + 0xF0E7FB84, 0xF0E8FB84, 0xF0E9FB84, 0xF0EAFB84, 0xF0EBFB84, 0xF0ECFB84, 0xF0EDFB84, 0xF0EEFB84, 0xF0EFFB84, 0xF0F0FB84, 0xF0F1FB84, 0xF0F2FB84, 0xF0F3FB84, 0xF0F4FB84, 0xF0F5FB84, + 0xF0F6FB84, 0xF0F7FB84, 0xF0F8FB84, 0xF0F9FB84, 0xF0FAFB84, 0xF0FBFB84, 0xF0FCFB84, 0xF0FDFB84, 0xF0FEFB84, 0xF0FFFB84, 0xF100FB84, 0xF101FB84, 0xF102FB84, 0xF103FB84, 0xF104FB84, + 0xF105FB84, 0xF106FB84, 0xF107FB84, 0xF108FB84, 0xF109FB84, 0xF10AFB84, 0xF10BFB84, 0xF10CFB84, 0xF10DFB84, 0xF10EFB84, 0xF10FFB84, 0xF110FB84, 0xF111FB84, 0xF112FB84, 0xF113FB84, + 0xF114FB84, 0xF115FB84, 0xF116FB84, 0xF117FB84, 0xF118FB84, 0xF119FB84, 0xF11AFB84, 0xF11BFB84, 0xF11CFB84, 0xF11DFB84, 0xF11EFB84, 0xF11FFB84, 0xF120FB84, 0xF121FB84, 0xF122FB84, + 0xF123FB84, 0xF124FB84, 0xF125FB84, 0xF126FB84, 0xF127FB84, 0xF128FB84, 0xF129FB84, 0xF12AFB84, 0xF12BFB84, 0xF12CFB84, 0xF12DFB84, 0xF12EFB84, 0xF12FFB84, 0xF130FB84, 0xF131FB84, + 0xF132FB84, 0xF133FB84, 0xF134FB84, 0xF135FB84, 0xF136FB84, 0xF137FB84, 0xF138FB84, 0xF139FB84, 0xF13AFB84, 0xF13BFB84, 0xF13CFB84, 0xF13DFB84, 0xF13EFB84, 0xF13FFB84, 0xF140FB84, + 0xF141FB84, 0xF142FB84, 0xF143FB84, 0xF144FB84, 0xF145FB84, 0xF146FB84, 0xF147FB84, 0xF148FB84, 0xF149FB84, 0xF14AFB84, 0xF14BFB84, 0xF14CFB84, 0xF14DFB84, 0xF14EFB84, 0xF14FFB84, + 0xF150FB84, 0xF151FB84, 0xF152FB84, 0xF153FB84, 0xF154FB84, 0xF155FB84, 0xF156FB84, 0xF157FB84, 0xF158FB84, 0xF159FB84, 0xF15AFB84, 0xF15BFB84, 0xF15CFB84, 0xF15DFB84, 0xF15EFB84, + 0xF15FFB84, 0xF160FB84, 0xF161FB84, 0xF162FB84, 0xF163FB84, 0xF164FB84, 0xF165FB84, 0xF166FB84, 0xF167FB84, 0xF168FB84, 0xF169FB84, 0xF16AFB84, 0xF16BFB84, 0xF16CFB84, 0xF16DFB84, + 0xF16EFB84, 0xF16FFB84, 0xF170FB84, 0xF171FB84, 0xF172FB84, 0xF173FB84, 0xF174FB84, 0xF175FB84, 0xF176FB84, 0xF177FB84, 0xF178FB84, 0xF179FB84, 0xF17AFB84, 0xF17BFB84, 0xF17CFB84, + 0xF17DFB84, 0xF17EFB84, 0xF17FFB84, 0xF180FB84, 0xF181FB84, 0xF182FB84, 0xF183FB84, 0xF184FB84, 0xF185FB84, 0xF186FB84, 0xF187FB84, 0xF188FB84, 0xF189FB84, 0xF18AFB84, 0xF18BFB84, + 0xF18CFB84, 0xF18DFB84, 0xF18EFB84, 0xF18FFB84, 0xF190FB84, 0xF191FB84, 0xF192FB84, 0xF193FB84, 0xF194FB84, 0xF195FB84, 0xF196FB84, 0xF197FB84, 0xF198FB84, 0xF199FB84, 0xF19AFB84, + 0xF19BFB84, 0xF19CFB84, 0xF19DFB84, 0xF19EFB84, 0xF19FFB84, 0xF1A0FB84, 0xF1A1FB84, 0xF1A2FB84, 0xF1A3FB84, 0xF1A4FB84, 0xF1A5FB84, 0xF1A6FB84, 0xF1A7FB84, 0xF1A8FB84, 0xF1A9FB84, + 0xF1AAFB84, 0xF1ABFB84, 0xF1ACFB84, 0xF1ADFB84, 0xF1AEFB84, 0xF1AFFB84, 0xF1B0FB84, 0xF1B1FB84, 0xF1B2FB84, 0xF1B3FB84, 0xF1B4FB84, 0xF1B5FB84, 0xF1B6FB84, 0xF1B7FB84, 0xF1B8FB84, + 0xF1B9FB84, 0xF1BAFB84, 0xF1BBFB84, 0xF1BCFB84, 0xF1BDFB84, 0xF1BEFB84, 0xF1BFFB84, 0xF1C0FB84, 0xF1C1FB84, 0xF1C2FB84, 0xF1C3FB84, 0xF1C4FB84, 0xF1C5FB84, 0xF1C6FB84, 0xF1C7FB84, + 0xF1C8FB84, 0xF1C9FB84, 0xF1CAFB84, 0xF1CBFB84, 0xF1CCFB84, 0xF1CDFB84, 0xF1CEFB84, 0xF1CFFB84, 0xF1D0FB84, 0xF1D1FB84, 0xF1D2FB84, 0xF1D3FB84, 0xF1D4FB84, 0xF1D5FB84, 0xF1D6FB84, + 0xF1D7FB84, 0xF1D8FB84, 0xF1D9FB84, 0xF1DAFB84, 0xF1DBFB84, 0xF1DCFB84, 0xF1DDFB84, 0xF1DEFB84, 0xF1DFFB84, 0xF1E0FB84, 0xF1E1FB84, 0xF1E2FB84, 0xF1E3FB84, 0xF1E4FB84, 0xF1E5FB84, + 0xF1E6FB84, 0xF1E7FB84, 0xF1E8FB84, 0xF1E9FB84, 0xF1EAFB84, 0xF1EBFB84, 0xF1ECFB84, 0xF1EDFB84, 0xF1EEFB84, 0xF1EFFB84, 0xF1F0FB84, 0xF1F1FB84, 0xF1F2FB84, 0xF1F3FB84, 0xF1F4FB84, + 0xF1F5FB84, 0xF1F6FB84, 0xF1F7FB84, 0xF1F8FB84, 0xF1F9FB84, 0xF1FAFB84, 0xF1FBFB84, 0xF1FCFB84, 0xF1FDFB84, 0xF1FEFB84, 0xF1FFFB84, 0xF200FB84, 0xF201FB84, 0xF202FB84, 0xF203FB84, + 0xF204FB84, 0xF205FB84, 0xF206FB84, 0xF207FB84, 0xF208FB84, 0xF209FB84, 0xF20AFB84, 0xF20BFB84, 0xF20CFB84, 0xF20DFB84, 0xF20EFB84, 0xF20FFB84, 0xF210FB84, 0xF211FB84, 0xF212FB84, + 0xF213FB84, 0xF214FB84, 0xF215FB84, 0xF216FB84, 0xF217FB84, 0xF218FB84, 0xF219FB84, 0xF21AFB84, 0xF21BFB84, 0xF21CFB84, 0xF21DFB84, 0xF21EFB84, 0xF21FFB84, 0xF220FB84, 0xF221FB84, + 0xF222FB84, 0xF223FB84, 0xF224FB84, 0xF225FB84, 0xF226FB84, 0xF227FB84, 0xF228FB84, 0xF229FB84, 0xF22AFB84, 0xF22BFB84, 0xF22CFB84, 0xF22DFB84, 0xF22EFB84, 0xF22FFB84, 0xF230FB84, + 0xF231FB84, 0xF232FB84, 0xF233FB84, 0xF234FB84, 0xF235FB84, 0xF236FB84, 0xF237FB84, 0xF238FB84, 0xF239FB84, 0xF23AFB84, 0xF23BFB84, 0xF23CFB84, 0xF23DFB84, 0xF23EFB84, 0xF23FFB84, + 0xF240FB84, 0xF241FB84, 0xF242FB84, 0xF243FB84, 0xF244FB84, 0xF245FB84, 0xF246FB84, 0xF247FB84, 0xF248FB84, 0xF249FB84, 0xF24AFB84, 0xF24BFB84, 0xF24CFB84, 0xF24DFB84, 0xF24EFB84, + 0xF24FFB84, 0xF250FB84, 0xF251FB84, 0xF252FB84, 0xF253FB84, 0xF254FB84, 0xF255FB84, 0xF256FB84, 0xF257FB84, 0xF258FB84, 0xF259FB84, 0xF25AFB84, 0xF25BFB84, 0xF25CFB84, 0xF25DFB84, + 0xF25EFB84, 0xF25FFB84, 0xF260FB84, 0xF261FB84, 0xF262FB84, 0xF263FB84, 0xF264FB84, 0xF265FB84, 0xF266FB84, 0xF267FB84, 0xF268FB84, 0xF269FB84, 0xF26AFB84, 0xF26BFB84, 0xF26CFB84, + 0xF26DFB84, 0xF26EFB84, 0xF26FFB84, 0xF270FB84, 0xF271FB84, 0xF272FB84, 0xF273FB84, 0xF274FB84, 0xF275FB84, 0xF276FB84, 0xF277FB84, 0xF278FB84, 0xF279FB84, 0xF27AFB84, 0xF27BFB84, + 0xF27CFB84, 0xF27DFB84, 0xF27EFB84, 0xF27FFB84, 0xF280FB84, 0xF281FB84, 0xF282FB84, 0xF283FB84, 0xF284FB84, 0xF285FB84, 0xF286FB84, 0xF287FB84, 0xF288FB84, 0xF289FB84, 0xF28AFB84, + 0xF28BFB84, 0xF28CFB84, 0xF28DFB84, 0xF28EFB84, 0xF28FFB84, 0xF290FB84, 0xF291FB84, 0xF292FB84, 0xF293FB84, 0xF294FB84, 0xF295FB84, 0xF296FB84, 0xF297FB84, 0xF298FB84, 0xF299FB84, + 0xF29AFB84, 0xF29BFB84, 0xF29CFB84, 0xF29DFB84, 0xF29EFB84, 0xF29FFB84, 0xF2A0FB84, 0xF2A1FB84, 0xF2A2FB84, 0xF2A3FB84, 0xF2A4FB84, 0xF2A5FB84, 0xF2A6FB84, 0xF2A7FB84, 0xF2A8FB84, + 0xF2A9FB84, 0xF2AAFB84, 0xF2ABFB84, 0xF2ACFB84, 0xF2ADFB84, 0xF2AEFB84, 0xF2AFFB84, 0xF2B0FB84, 0xF2B1FB84, 0xF2B2FB84, 0xF2B3FB84, 0xF2B4FB84, 0xF2B5FB84, 0xF2B6FB84, 0xF2B7FB84, + 0xF2B8FB84, 0xF2B9FB84, 0xF2BAFB84, 0xF2BBFB84, 0xF2BCFB84, 0xF2BDFB84, 0xF2BEFB84, 0xF2BFFB84, 0xF2C0FB84, 0xF2C1FB84, 0xF2C2FB84, 0xF2C3FB84, 0xF2C4FB84, 0xF2C5FB84, 0xF2C6FB84, + 0xF2C7FB84, 0xF2C8FB84, 0xF2C9FB84, 0xF2CAFB84, 0xF2CBFB84, 0xF2CCFB84, 0xF2CDFB84, 0xF2CEFB84, 0xF2CFFB84, 0xF2D0FB84, 0xF2D1FB84, 0xF2D2FB84, 0xF2D3FB84, 0xF2D4FB84, 0xF2D5FB84, + 0xF2D6FB84, 0xF2D7FB84, 0xF2D8FB84, 0xF2D9FB84, 0xF2DAFB84, 0xF2DBFB84, 0xF2DCFB84, 0xF2DDFB84, 0xF2DEFB84, 0xF2DFFB84, 0xF2E0FB84, 0xF2E1FB84, 0xF2E2FB84, 0xF2E3FB84, 0xF2E4FB84, + 0xF2E5FB84, 0xF2E6FB84, 0xF2E7FB84, 0xF2E8FB84, 0xF2E9FB84, 0xF2EAFB84, 0xF2EBFB84, 0xF2ECFB84, 0xF2EDFB84, 0xF2EEFB84, 0xF2EFFB84, 0xF2F0FB84, 0xF2F1FB84, 0xF2F2FB84, 0xF2F3FB84, + 0xF2F4FB84, 0xF2F5FB84, 0xF2F6FB84, 0xF2F7FB84, 0xF2F8FB84, 0xF2F9FB84, 0xF2FAFB84, 0xF2FBFB84, 0xF2FCFB84, 0xF2FDFB84, 0xF2FEFB84, 0xF2FFFB84, 0xF300FB84, 0xF301FB84, 0xF302FB84, + 0xF303FB84, 0xF304FB84, 0xF305FB84, 0xF306FB84, 0xF307FB84, 0xF308FB84, 0xF309FB84, 0xF30AFB84, 0xF30BFB84, 0xF30CFB84, 0xF30DFB84, 0xF30EFB84, 0xF30FFB84, 0xF310FB84, 0xF311FB84, + 0xF312FB84, 0xF313FB84, 0xF314FB84, 0xF315FB84, 0xF316FB84, 0xF317FB84, 0xF318FB84, 0xF319FB84, 0xF31AFB84, 0xF31BFB84, 0xF31CFB84, 0xF31DFB84, 0xF31EFB84, 0xF31FFB84, 0xF320FB84, + 0xF321FB84, 0xF322FB84, 0xF323FB84, 0xF324FB84, 0xF325FB84, 0xF326FB84, 0xF327FB84, 0xF328FB84, 0xF329FB84, 0xF32AFB84, 0xF32BFB84, 0xF32CFB84, 0xF32DFB84, 0xF32EFB84, 0xF32FFB84, + 0xF330FB84, 0xF331FB84, 0xF332FB84, 0xF333FB84, 0xF334FB84, 0xF335FB84, 0xF336FB84, 0xF337FB84, 0xF338FB84, 0xF339FB84, 0xF33AFB84, 0xF33BFB84, 0xF33CFB84, 0xF33DFB84, 0xF33EFB84, + 0xF33FFB84, 0xF340FB84, 0xF341FB84, 0xF342FB84, 0xF343FB84, 0xF344FB84, 0xF345FB84, 0xF346FB84, 0xF347FB84, 0xF348FB84, 0xF349FB84, 0xF34AFB84, 0xF34BFB84, 0xF34CFB84, 0xF34DFB84, + 0xF34EFB84, 0xF34FFB84, 0xF350FB84, 0xF351FB84, 0xF352FB84, 0xF353FB84, 0xF354FB84, 0xF355FB84, 0xF356FB84, 0xF357FB84, 0xF358FB84, 0xF359FB84, 0xF35AFB84, 0xF35BFB84, 0xF35CFB84, + 0xF35DFB84, 0xF35EFB84, 0xF35FFB84, 0xF360FB84, 0xF361FB84, 0xF362FB84, 0xF363FB84, 0xF364FB84, 0xF365FB84, 0xF366FB84, 0xF367FB84, 0xF368FB84, 0xF369FB84, 0xF36AFB84, 0xF36BFB84, + 0xF36CFB84, 0xF36DFB84, 0xF36EFB84, 0xF36FFB84, 0xF370FB84, 0xF371FB84, 0xF372FB84, 0xF373FB84, 0xF374FB84, 0xF375FB84, 0xF376FB84, 0xF377FB84, 0xF378FB84, 0xF379FB84, 0xF37AFB84, + 0xF37BFB84, 0xF37CFB84, 0xF37DFB84, 0xF37EFB84, 0xF37FFB84, 0xF380FB84, 0xF381FB84, 0xF382FB84, 0xF383FB84, 0xF384FB84, 0xF385FB84, 0xF386FB84, 0xF387FB84, 0xF388FB84, 0xF389FB84, + 0xF38AFB84, 0xF38BFB84, 0xF38CFB84, 0xF38DFB84, 0xF38EFB84, 0xF38FFB84, 0xF390FB84, 0xF391FB84, 0xF392FB84, 0xF393FB84, 0xF394FB84, 0xF395FB84, 0xF396FB84, 0xF397FB84, 0xF398FB84, + 0xF399FB84, 0xF39AFB84, 0xF39BFB84, 0xF39CFB84, 0xF39DFB84, 0xF39EFB84, 0xF39FFB84, 0xF3A0FB84, 0xF3A1FB84, 0xF3A2FB84, 0xF3A3FB84, 0xF3A4FB84, 0xF3A5FB84, 0xF3A6FB84, 0xF3A7FB84, + 0xF3A8FB84, 0xF3A9FB84, 0xF3AAFB84, 0xF3ABFB84, 0xF3ACFB84, 0xF3ADFB84, 0xF3AEFB84, 0xF3AFFB84, 0xF3B0FB84, 0xF3B1FB84, 0xF3B2FB84, 0xF3B3FB84, 0xF3B4FB84, 0xF3B5FB84, 0xF3B6FB84, + 0xF3B7FB84, 0xF3B8FB84, 0xF3B9FB84, 0xF3BAFB84, 0xF3BBFB84, 0xF3BCFB84, 0xF3BDFB84, 0xF3BEFB84, 0xF3BFFB84, 0xF3C0FB84, 0xF3C1FB84, 0xF3C2FB84, 0xF3C3FB84, 0xF3C4FB84, 0xF3C5FB84, + 0xF3C6FB84, 0xF3C7FB84, 0xF3C8FB84, 0xF3C9FB84, 0xF3CAFB84, 0xF3CBFB84, 0xF3CCFB84, 0xF3CDFB84, 0xF3CEFB84, 0xF3CFFB84, 0xF3D0FB84, 0xF3D1FB84, 0xF3D2FB84, 0xF3D3FB84, 0xF3D4FB84, + 0xF3D5FB84, 0xF3D6FB84, 0xF3D7FB84, 0xF3D8FB84, 0xF3D9FB84, 0xF3DAFB84, 0xF3DBFB84, 0xF3DCFB84, 0xF3DDFB84, 0xF3DEFB84, 0xF3DFFB84, 0xF3E0FB84, 0xF3E1FB84, 0xF3E2FB84, 0xF3E3FB84, + 0xF3E4FB84, 0xF3E5FB84, 0xF3E6FB84, 0xF3E7FB84, 0xF3E8FB84, 0xF3E9FB84, 0xF3EAFB84, 0xF3EBFB84, 0xF3ECFB84, 0xF3EDFB84, 0xF3EEFB84, 0xF3EFFB84, 0xF3F0FB84, 0xF3F1FB84, 0xF3F2FB84, + 0xF3F3FB84, 0xF3F4FB84, 0xF3F5FB84, 0xF3F6FB84, 0xF3F7FB84, 0xF3F8FB84, 0xF3F9FB84, 0xF3FAFB84, 0xF3FBFB84, 0xF3FCFB84, 0xF3FDFB84, 0xF3FEFB84, 0xF3FFFB84, 0xF400FB84, 0xF401FB84, + 0xF402FB84, 0xF403FB84, 0xF404FB84, 0xF405FB84, 0xF406FB84, 0xF407FB84, 0xF408FB84, 0xF409FB84, 0xF40AFB84, 0xF40BFB84, 0xF40CFB84, 0xF40DFB84, 0xF40EFB84, 0xF40FFB84, 0xF410FB84, + 0xF411FB84, 0xF412FB84, 0xF413FB84, 0xF414FB84, 0xF415FB84, 0xF416FB84, 0xF417FB84, 0xF418FB84, 0xF419FB84, 0xF41AFB84, 0xF41BFB84, 0xF41CFB84, 0xF41DFB84, 0xF41EFB84, 0xF41FFB84, + 0xF420FB84, 0xF421FB84, 0xF422FB84, 0xF423FB84, 0xF424FB84, 0xF425FB84, 0xF426FB84, 0xF427FB84, 0xF428FB84, 0xF429FB84, 0xF42AFB84, 0xF42BFB84, 0xF42CFB84, 0xF42DFB84, 0xF42EFB84, + 0xF42FFB84, 0xF430FB84, 0xF431FB84, 0xF432FB84, 0xF433FB84, 0xF434FB84, 0xF435FB84, 0xF436FB84, 0xF437FB84, 0xF438FB84, 0xF439FB84, 0xF43AFB84, 0xF43BFB84, 0xF43CFB84, 0xF43DFB84, + 0xF43EFB84, 0xF43FFB84, 0xF440FB84, 0xF441FB84, 0xF442FB84, 0xF443FB84, 0xF444FB84, 0xF445FB84, 0xF446FB84, 0xF447FB84, 0xF448FB84, 0xF449FB84, 0xF44AFB84, 0xF44BFB84, 0xF44CFB84, + 0xF44DFB84, 0xF44EFB84, 0xF44FFB84, 0xF450FB84, 0xF451FB84, 0xF452FB84, 0xF453FB84, 0xF454FB84, 0xF455FB84, 0xF456FB84, 0xF457FB84, 0xF458FB84, 0xF459FB84, 0xF45AFB84, 0xF45BFB84, + 0xF45CFB84, 0xF45DFB84, 0xF45EFB84, 0xF45FFB84, 0xF460FB84, 0xF461FB84, 0xF462FB84, 0xF463FB84, 0xF464FB84, 0xF465FB84, 0xF466FB84, 0xF467FB84, 0xF468FB84, 0xF469FB84, 0xF46AFB84, + 0xF46BFB84, 0xF46CFB84, 0xF46DFB84, 0xF46EFB84, 0xF46FFB84, 0xF470FB84, 0xF471FB84, 0xF472FB84, 0xF473FB84, 0xF474FB84, 0xF475FB84, 0xF476FB84, 0xF477FB84, 0xF478FB84, 0xF479FB84, + 0xF47AFB84, 0xF47BFB84, 0xF47CFB84, 0xF47DFB84, 0xF47EFB84, 0xF47FFB84, 0xF480FB84, 0xF481FB84, 0xF482FB84, 0xF483FB84, 0xF484FB84, 0xF485FB84, 0xF486FB84, 0xF487FB84, 0xF488FB84, + 0xF489FB84, 0xF48AFB84, 0xF48BFB84, 0xF48CFB84, 0xF48DFB84, 0xF48EFB84, 0xF48FFB84, 0xF490FB84, 0xF491FB84, 0xF492FB84, 0xF493FB84, 0xF494FB84, 0xF495FB84, 0xF496FB84, 0xF497FB84, + 0xF498FB84, 0xF499FB84, 0xF49AFB84, 0xF49BFB84, 0xF49CFB84, 0xF49DFB84, 0xF49EFB84, 0xF49FFB84, 0xF4A0FB84, 0xF4A1FB84, 0xF4A2FB84, 0xF4A3FB84, 0xF4A4FB84, 0xF4A5FB84, 0xF4A6FB84, + 0xF4A7FB84, 0xF4A8FB84, 0xF4A9FB84, 0xF4AAFB84, 0xF4ABFB84, 0xF4ACFB84, 0xF4ADFB84, 0xF4AEFB84, 0xF4AFFB84, 0xF4B0FB84, 0xF4B1FB84, 0xF4B2FB84, 0xF4B3FB84, 0xF4B4FB84, 0xF4B5FB84, + 0xF4B6FB84, 0xF4B7FB84, 0xF4B8FB84, 0xF4B9FB84, 0xF4BAFB84, 0xF4BBFB84, 0xF4BCFB84, 0xF4BDFB84, 0xF4BEFB84, 0xF4BFFB84, 0xF4C0FB84, 0xF4C1FB84, 0xF4C2FB84, 0xF4C3FB84, 0xF4C4FB84, + 0xF4C5FB84, 0xF4C6FB84, 0xF4C7FB84, 0xF4C8FB84, 0xF4C9FB84, 0xF4CAFB84, 0xF4CBFB84, 0xF4CCFB84, 0xF4CDFB84, 0xF4CEFB84, 0xF4CFFB84, 0xF4D0FB84, 0xF4D1FB84, 0xF4D2FB84, 0xF4D3FB84, + 0xF4D4FB84, 0xF4D5FB84, 0xF4D6FB84, 0xF4D7FB84, 0xF4D8FB84, 0xF4D9FB84, 0xF4DAFB84, 0xF4DBFB84, 0xF4DCFB84, 0xF4DDFB84, 0xF4DEFB84, 0xF4DFFB84, 0xF4E0FB84, 0xF4E1FB84, 0xF4E2FB84, + 0xF4E3FB84, 0xF4E4FB84, 0xF4E5FB84, 0xF4E6FB84, 0xF4E7FB84, 0xF4E8FB84, 0xF4E9FB84, 0xF4EAFB84, 0xF4EBFB84, 0xF4ECFB84, 0xF4EDFB84, 0xF4EEFB84, 0xF4EFFB84, 0xF4F0FB84, 0xF4F1FB84, + 0xF4F2FB84, 0xF4F3FB84, 0xF4F4FB84, 0xF4F5FB84, 0xF4F6FB84, 0xF4F7FB84, 0xF4F8FB84, 0xF4F9FB84, 0xF4FAFB84, 0xF4FBFB84, 0xF4FCFB84, 0xF4FDFB84, 0xF4FEFB84, 0xF4FFFB84, 0xF500FB84, + 0xF501FB84, 0xF502FB84, 0xF503FB84, 0xF504FB84, 0xF505FB84, 0xF506FB84, 0xF507FB84, 0xF508FB84, 0xF509FB84, 0xF50AFB84, 0xF50BFB84, 0xF50CFB84, 0xF50DFB84, 0xF50EFB84, 0xF50FFB84, + 0xF510FB84, 0xF511FB84, 0xF512FB84, 0xF513FB84, 0xF514FB84, 0xF515FB84, 0xF516FB84, 0xF517FB84, 0xF518FB84, 0xF519FB84, 0xF51AFB84, 0xF51BFB84, 0xF51CFB84, 0xF51DFB84, 0xF51EFB84, + 0xF51FFB84, 0xF520FB84, 0xF521FB84, 0xF522FB84, 0xF523FB84, 0xF524FB84, 0xF525FB84, 0xF526FB84, 0xF527FB84, 0xF528FB84, 0xF529FB84, 0xF52AFB84, 0xF52BFB84, 0xF52CFB84, 0xF52DFB84, + 0xF52EFB84, 0xF52FFB84, 0xF530FB84, 0xF531FB84, 0xF532FB84, 0xF533FB84, 0xF534FB84, 0xF535FB84, 0xF536FB84, 0xF537FB84, 0xF538FB84, 0xF539FB84, 0xF53AFB84, 0xF53BFB84, 0xF53CFB84, + 0xF53DFB84, 0xF53EFB84, 0xF53FFB84, 0xF540FB84, 0xF541FB84, 0xF542FB84, 0xF543FB84, 0xF544FB84, 0xF545FB84, 0xF546FB84, 0xF547FB84, 0xF548FB84, 0xF549FB84, 0xF54AFB84, 0xF54BFB84, + 0xF54CFB84, 0xF54DFB84, 0xF54EFB84, 0xF54FFB84, 0xF550FB84, 0xF551FB84, 0xF552FB84, 0xF553FB84, 0xF554FB84, 0xF555FB84, 0xF556FB84, 0xF557FB84, 0xF558FB84, 0xF559FB84, 0xF55AFB84, + 0xF55BFB84, 0xF55CFB84, 0xF55DFB84, 0xF55EFB84, 0xF55FFB84, 0xF560FB84, 0xF561FB84, 0xF562FB84, 0xF563FB84, 0xF564FB84, 0xF565FB84, 0xF566FB84, 0xF567FB84, 0xF568FB84, 0xF569FB84, + 0xF56AFB84, 0xF56BFB84, 0xF56CFB84, 0xF56DFB84, 0xF56EFB84, 0xF56FFB84, 0xF570FB84, 0xF571FB84, 0xF572FB84, 0xF573FB84, 0xF574FB84, 0xF575FB84, 0xF576FB84, 0xF577FB84, 0xF578FB84, + 0xF579FB84, 0xF57AFB84, 0xF57BFB84, 0xF57CFB84, 0xF57DFB84, 0xF57EFB84, 0xF57FFB84, 0xF580FB84, 0xF581FB84, 0xF582FB84, 0xF583FB84, 0xF584FB84, 0xF585FB84, 0xF586FB84, 0xF587FB84, + 0xF588FB84, 0xF589FB84, 0xF58AFB84, 0xF58BFB84, 0xF58CFB84, 0xF58DFB84, 0xF58EFB84, 0xF58FFB84, 0xF590FB84, 0xF591FB84, 0xF592FB84, 0xF593FB84, 0xF594FB84, 0xF595FB84, 0xF596FB84, + 0xF597FB84, 0xF598FB84, 0xF599FB84, 0xF59AFB84, 0xF59BFB84, 0xF59CFB84, 0xF59DFB84, 0xF59EFB84, 0xF59FFB84, 0xF5A0FB84, 0xF5A1FB84, 0xF5A2FB84, 0xF5A3FB84, 0xF5A4FB84, 0xF5A5FB84, + 0xF5A6FB84, 0xF5A7FB84, 0xF5A8FB84, 0xF5A9FB84, 0xF5AAFB84, 0xF5ABFB84, 0xF5ACFB84, 0xF5ADFB84, 0xF5AEFB84, 0xF5AFFB84, 0xF5B0FB84, 0xF5B1FB84, 0xF5B2FB84, 0xF5B3FB84, 0xF5B4FB84, + 0xF5B5FB84, 0xF5B6FB84, 0xF5B7FB84, 0xF5B8FB84, 0xF5B9FB84, 0xF5BAFB84, 0xF5BBFB84, 0xF5BCFB84, 0xF5BDFB84, 0xF5BEFB84, 0xF5BFFB84, 0xF5C0FB84, 0xF5C1FB84, 0xF5C2FB84, 0xF5C3FB84, + 0xF5C4FB84, 0xF5C5FB84, 0xF5C6FB84, 0xF5C7FB84, 0xF5C8FB84, 0xF5C9FB84, 0xF5CAFB84, 0xF5CBFB84, 0xF5CCFB84, 0xF5CDFB84, 0xF5CEFB84, 0xF5CFFB84, 0xF5D0FB84, 0xF5D1FB84, 0xF5D2FB84, + 0xF5D3FB84, 0xF5D4FB84, 0xF5D5FB84, 0xF5D6FB84, 0xF5D7FB84, 0xF5D8FB84, 0xF5D9FB84, 0xF5DAFB84, 0xF5DBFB84, 0xF5DCFB84, 0xF5DDFB84, 0xF5DEFB84, 0xF5DFFB84, 0xF5E0FB84, 0xF5E1FB84, + 0xF5E2FB84, 0xF5E3FB84, 0xF5E4FB84, 0xF5E5FB84, 0xF5E6FB84, 0xF5E7FB84, 0xF5E8FB84, 0xF5E9FB84, 0xF5EAFB84, 0xF5EBFB84, 0xF5ECFB84, 0xF5EDFB84, 0xF5EEFB84, 0xF5EFFB84, 0xF5F0FB84, + 0xF5F1FB84, 0xF5F2FB84, 0xF5F3FB84, 0xF5F4FB84, 0xF5F5FB84, 0xF5F6FB84, 0xF5F7FB84, 0xF5F8FB84, 0xF5F9FB84, 0xF5FAFB84, 0xF5FBFB84, 0xF5FCFB84, 0xF5FDFB84, 0xF5FEFB84, 0xF5FFFB84, + 0xF600FB84, 0xF601FB84, 0xF602FB84, 0xF603FB84, 0xF604FB84, 0xF605FB84, 0xF606FB84, 0xF607FB84, 0xF608FB84, 0xF609FB84, 0xF60AFB84, 0xF60BFB84, 0xF60CFB84, 0xF60DFB84, 0xF60EFB84, + 0xF60FFB84, 0xF610FB84, 0xF611FB84, 0xF612FB84, 0xF613FB84, 0xF614FB84, 0xF615FB84, 0xF616FB84, 0xF617FB84, 0xF618FB84, 0xF619FB84, 0xF61AFB84, 0xF61BFB84, 0xF61CFB84, 0xF61DFB84, + 0xF61EFB84, 0xF61FFB84, 0xF620FB84, 0xF621FB84, 0xF622FB84, 0xF623FB84, 0xF624FB84, 0xF625FB84, 0xF626FB84, 0xF627FB84, 0xF628FB84, 0xF629FB84, 0xF62AFB84, 0xF62BFB84, 0xF62CFB84, + 0xF62DFB84, 0xF62EFB84, 0xF62FFB84, 0xF630FB84, 0xF631FB84, 0xF632FB84, 0xF633FB84, 0xF634FB84, 0xF635FB84, 0xF636FB84, 0xF637FB84, 0xF638FB84, 0xF639FB84, 0xF63AFB84, 0xF63BFB84, + 0xF63CFB84, 0xF63DFB84, 0xF63EFB84, 0xF63FFB84, 0xF640FB84, 0xF641FB84, 0xF642FB84, 0xF643FB84, 0xF644FB84, 0xF645FB84, 0xF646FB84, 0xF647FB84, 0xF648FB84, 0xF649FB84, 0xF64AFB84, + 0xF64BFB84, 0xF64CFB84, 0xF64DFB84, 0xF64EFB84, 0xF64FFB84, 0xF650FB84, 0xF651FB84, 0xF652FB84, 0xF653FB84, 0xF654FB84, 0xF655FB84, 0xF656FB84, 0xF657FB84, 0xF658FB84, 0xF659FB84, + 0xF65AFB84, 0xF65BFB84, 0xF65CFB84, 0xF65DFB84, 0xF65EFB84, 0xF65FFB84, 0xF660FB84, 0xF661FB84, 0xF662FB84, 0xF663FB84, 0xF664FB84, 0xF665FB84, 0xF666FB84, 0xF667FB84, 0xF668FB84, + 0xF669FB84, 0xF66AFB84, 0xF66BFB84, 0xF66CFB84, 0xF66DFB84, 0xF66EFB84, 0xF66FFB84, 0xF670FB84, 0xF671FB84, 0xF672FB84, 0xF673FB84, 0xF674FB84, 0xF675FB84, 0xF676FB84, 0xF677FB84, + 0xF678FB84, 0xF679FB84, 0xF67AFB84, 0xF67BFB84, 0xF67CFB84, 0xF67DFB84, 0xF67EFB84, 0xF67FFB84, 0xF680FB84, 0xF681FB84, 0xF682FB84, 0xF683FB84, 0xF684FB84, 0xF685FB84, 0xF686FB84, + 0xF687FB84, 0xF688FB84, 0xF689FB84, 0xF68AFB84, 0xF68BFB84, 0xF68CFB84, 0xF68DFB84, 0xF68EFB84, 0xF68FFB84, 0xF690FB84, 0xF691FB84, 0xF692FB84, 0xF693FB84, 0xF694FB84, 0xF695FB84, + 0xF696FB84, 0xF697FB84, 0xF698FB84, 0xF699FB84, 0xF69AFB84, 0xF69BFB84, 0xF69CFB84, 0xF69DFB84, 0xF69EFB84, 0xF69FFB84, 0xF6A0FB84, 0xF6A1FB84, 0xF6A2FB84, 0xF6A3FB84, 0xF6A4FB84, + 0xF6A5FB84, 0xF6A6FB84, 0xF6A7FB84, 0xF6A8FB84, 0xF6A9FB84, 0xF6AAFB84, 0xF6ABFB84, 0xF6ACFB84, 0xF6ADFB84, 0xF6AEFB84, 0xF6AFFB84, 0xF6B0FB84, 0xF6B1FB84, 0xF6B2FB84, 0xF6B3FB84, + 0xF6B4FB84, 0xF6B5FB84, 0xF6B6FB84, 0xF6B7FB84, 0xF6B8FB84, 0xF6B9FB84, 0xF6BAFB84, 0xF6BBFB84, 0xF6BCFB84, 0xF6BDFB84, 0xF6BEFB84, 0xF6BFFB84, 0xF6C0FB84, 0xF6C1FB84, 0xF6C2FB84, + 0xF6C3FB84, 0xF6C4FB84, 0xF6C5FB84, 0xF6C6FB84, 0xF6C7FB84, 0xF6C8FB84, 0xF6C9FB84, 0xF6CAFB84, 0xF6CBFB84, 0xF6CCFB84, 0xF6CDFB84, 0xF6CEFB84, 0xF6CFFB84, 0xF6D0FB84, 0xF6D1FB84, + 0xF6D2FB84, 0xF6D3FB84, 0xF6D4FB84, 0xF6D5FB84, 0xF6D6FB84, 0xF6D7FB84, 0xF6D8FB84, 0xF6D9FB84, 0xF6DAFB84, 0xF6DBFB84, 0xF6DCFB84, 0xF6DDFB84, 0xF6DEFB84, 0xF6DFFB84, 0xF6E0FB84, + 0xF6E1FB84, 0xF6E2FB84, 0xF6E3FB84, 0xF6E4FB84, 0xF6E5FB84, 0xF6E6FB84, 0xF6E7FB84, 0xF6E8FB84, 0xF6E9FB84, 0xF6EAFB84, 0xF6EBFB84, 0xF6ECFB84, 0xF6EDFB84, 0xF6EEFB84, 0xF6EFFB84, + 0xF6F0FB84, 0xF6F1FB84, 0xF6F2FB84, 0xF6F3FB84, 0xF6F4FB84, 0xF6F5FB84, 0xF6F6FB84, 0xF6F7FB84, 0xF6F8FB84, 0xF6F9FB84, 0xF6FAFB84, 0xF6FBFB84, 0xF6FCFB84, 0xF6FDFB84, 0xF6FEFB84, + 0xF6FFFB84, 0xF700FB84, 0xF701FB84, 0xF702FB84, 0xF703FB84, 0xF704FB84, 0xF705FB84, 0xF706FB84, 0xF707FB84, 0xF708FB84, 0xF709FB84, 0xF70AFB84, 0xF70BFB84, 0xF70CFB84, 0xF70DFB84, + 0xF70EFB84, 0xF70FFB84, 0xF710FB84, 0xF711FB84, 0xF712FB84, 0xF713FB84, 0xF714FB84, 0xF715FB84, 0xF716FB84, 0xF717FB84, 0xF718FB84, 0xF719FB84, 0xF71AFB84, 0xF71BFB84, 0xF71CFB84, + 0xF71DFB84, 0xF71EFB84, 0xF71FFB84, 0xF720FB84, 0xF721FB84, 0xF722FB84, 0xF723FB84, 0xF724FB84, 0xF725FB84, 0xF726FB84, 0xF727FB84, 0xF728FB84, 0xF729FB84, 0xF72AFB84, 0xF72BFB84, + 0xF72CFB84, 0xF72DFB84, 0xF72EFB84, 0xF72FFB84, 0xF730FB84, 0xF731FB84, 0xF732FB84, 0xF733FB84, 0xF734FB84, 0xF735FB84, 0xF736FB84, 0xF737FB84, 0xF738FB84, 0xF739FB84, 0xF73AFB84, + 0xF73BFB84, 0xF73CFB84, 0xF73DFB84, 0xF73EFB84, 0xF73FFB84, 0xF740FB84, 0xF741FB84, 0xF742FB84, 0xF743FB84, 0xF744FB84, 0xF745FB84, 0xF746FB84, 0xF747FB84, 0xF748FB84, 0xF749FB84, + 0xF74AFB84, 0xF74BFB84, 0xF74CFB84, 0xF74DFB84, 0xF74EFB84, 0xF74FFB84, 0xF750FB84, 0xF751FB84, 0xF752FB84, 0xF753FB84, 0xF754FB84, 0xF755FB84, 0xF756FB84, 0xF757FB84, 0xF758FB84, + 0xF759FB84, 0xF75AFB84, 0xF75BFB84, 0xF75CFB84, 0xF75DFB84, 0xF75EFB84, 0xF75FFB84, 0xF760FB84, 0xF761FB84, 0xF762FB84, 0xF763FB84, 0xF764FB84, 0xF765FB84, 0xF766FB84, 0xF767FB84, + 0xF768FB84, 0xF769FB84, 0xF76AFB84, 0xF76BFB84, 0xF76CFB84, 0xF76DFB84, 0xF76EFB84, 0xF76FFB84, 0xF770FB84, 0xF771FB84, 0xF772FB84, 0xF773FB84, 0xF774FB84, 0xF775FB84, 0xF776FB84, + 0xF777FB84, 0xF778FB84, 0xF779FB84, 0xF77AFB84, 0xF77BFB84, 0xF77CFB84, 0xF77DFB84, 0xF77EFB84, 0xF77FFB84, 0xF780FB84, 0xF781FB84, 0xF782FB84, 0xF783FB84, 0xF784FB84, 0xF785FB84, + 0xF786FB84, 0xF787FB84, 0xF788FB84, 0xF789FB84, 0xF78AFB84, 0xF78BFB84, 0xF78CFB84, 0xF78DFB84, 0xF78EFB84, 0xF78FFB84, 0xF790FB84, 0xF791FB84, 0xF792FB84, 0xF793FB84, 0xF794FB84, + 0xF795FB84, 0xF796FB84, 0xF797FB84, 0xF798FB84, 0xF799FB84, 0xF79AFB84, 0xF79BFB84, 0xF79CFB84, 0xF79DFB84, 0xF79EFB84, 0xF79FFB84, 0xF7A0FB84, 0xF7A1FB84, 0xF7A2FB84, 0xF7A3FB84, + 0xF7A4FB84, 0xF7A5FB84, 0xF7A6FB84, 0xF7A7FB84, 0xF7A8FB84, 0xF7A9FB84, 0xF7AAFB84, 0xF7ABFB84, 0xF7ACFB84, 0xF7ADFB84, 0xF7AEFB84, 0xF7AFFB84, 0xF7B0FB84, 0xF7B1FB84, 0xF7B2FB84, + 0xF7B3FB84, 0xF7B4FB84, 0xF7B5FB84, 0xF7B6FB84, 0xF7B7FB84, 0xF7B8FB84, 0xF7B9FB84, 0xF7BAFB84, 0xF7BBFB84, 0xF7BCFB84, 0xF7BDFB84, 0xF7BEFB84, 0xF7BFFB84, 0xF7C0FB84, 0xF7C1FB84, + 0xF7C2FB84, 0xF7C3FB84, 0xF7C4FB84, 0xF7C5FB84, 0xF7C6FB84, 0xF7C7FB84, 0xF7C8FB84, 0xF7C9FB84, 0xF7CAFB84, 0xF7CBFB84, 0xF7CCFB84, 0xF7CDFB84, 0xF7CEFB84, 0xF7CFFB84, 0xF7D0FB84, + 0xF7D1FB84, 0xF7D2FB84, 0xF7D3FB84, 0xF7D4FB84, 0xF7D5FB84, 0xF7D6FB84, 0xF7D7FB84, 0xF7D8FB84, 0xF7D9FB84, 0xF7DAFB84, 0xF7DBFB84, 0xF7DCFB84, 0xF7DDFB84, 0xF7DEFB84, 0xF7DFFB84, + 0xF7E0FB84, 0xF7E1FB84, 0xF7E2FB84, 0xF7E3FB84, 0xF7E4FB84, 0xF7E5FB84, 0xF7E6FB84, 0xF7E7FB84, 0xF7E8FB84, 0xF7E9FB84, 0xF7EAFB84, 0xF7EBFB84, 0xF7ECFB84, 0xF7EDFB84, 0xF7EEFB84, + 0xF7EFFB84, 0xF7F0FB84, 0xF7F1FB84, 0xF7F2FB84, 0xF7F3FB84, 0xF7F4FB84, 0xF7F5FB84, 0xF7F6FB84, 0xF7F7FB84, 0xF7F8FB84, 0xF7F9FB84, 0xF7FAFB84, 0xF7FBFB84, 0xF7FCFB84, 0xF7FDFB84, + 0xF7FEFB84, 0xF7FFFB84, 0xF800FB84, 0xF801FB84, 0xF802FB84, 0xF803FB84, 0xF804FB84, 0xF805FB84, 0xF806FB84, 0xF807FB84, 0xF808FB84, 0xF809FB84, 0xF80AFB84, 0xF80BFB84, 0xF80CFB84, + 0xF80DFB84, 0xF80EFB84, 0xF80FFB84, 0xF810FB84, 0xF811FB84, 0xF812FB84, 0xF813FB84, 0xF814FB84, 0xF815FB84, 0xF816FB84, 0xF817FB84, 0xF818FB84, 0xF819FB84, 0xF81AFB84, 0xF81BFB84, + 0xF81CFB84, 0xF81DFB84, 0xF81EFB84, 0xF81FFB84, 0xF820FB84, 0xF821FB84, 0xF822FB84, 0xF823FB84, 0xF824FB84, 0xF825FB84, 0xF826FB84, 0xF827FB84, 0xF828FB84, 0xF829FB84, 0xF82AFB84, + 0xF82BFB84, 0xF82CFB84, 0xF82DFB84, 0xF82EFB84, 0xF82FFB84, 0xF830FB84, 0xF831FB84, 0xF832FB84, 0xF833FB84, 0xF834FB84, 0xF835FB84, 0xF836FB84, 0xF837FB84, 0xF838FB84, 0xF839FB84, + 0xF83AFB84, 0xF83BFB84, 0xF83CFB84, 0xF83DFB84, 0xF83EFB84, 0xF83FFB84, 0xF840FB84, 0xF841FB84, 0xF842FB84, 0xF843FB84, 0xF844FB84, 0xF845FB84, 0xF846FB84, 0xF847FB84, 0xF848FB84, + 0xF849FB84, 0xF84AFB84, 0xF84BFB84, 0xF84CFB84, 0xF84DFB84, 0xF84EFB84, 0xF84FFB84, 0xF850FB84, 0xF851FB84, 0xF852FB84, 0xF853FB84, 0xF854FB84, 0xF855FB84, 0xF856FB84, 0xF857FB84, + 0xF858FB84, 0xF859FB84, 0xF85AFB84, 0xF85BFB84, 0xF85CFB84, 0xF85DFB84, 0xF85EFB84, 0xF85FFB84, 0xF860FB84, 0xF861FB84, 0xF862FB84, 0xF863FB84, 0xF864FB84, 0xF865FB84, 0xF866FB84, + 0xF867FB84, 0xF868FB84, 0xF869FB84, 0xF86AFB84, 0xF86BFB84, 0xF86CFB84, 0xF86DFB84, 0xF86EFB84, 0xF86FFB84, 0xF870FB84, 0xF871FB84, 0xF872FB84, 0xF873FB84, 0xF874FB84, 0xF875FB84, + 0xF876FB84, 0xF877FB84, 0xF878FB84, 0xF879FB84, 0xF87AFB84, 0xF87BFB84, 0xF87CFB84, 0xF87DFB84, 0xF87EFB84, 0xF87FFB84, 0xF880FB84, 0xF881FB84, 0xF882FB84, 0xF883FB84, 0xF884FB84, + 0xF885FB84, 0xF886FB84, 0xF887FB84, 0xF888FB84, 0xF889FB84, 0xF88AFB84, 0xF88BFB84, 0xF88CFB84, 0xF88DFB84, 0xF88EFB84, 0xF88FFB84, 0xF890FB84, 0xF891FB84, 0xF892FB84, 0xF893FB84, + 0xF894FB84, 0xF895FB84, 0xF896FB84, 0xF897FB84, 0xF898FB84, 0xF899FB84, 0xF89AFB84, 0xF89BFB84, 0xF89CFB84, 0xF89DFB84, 0xF89EFB84, 0xF89FFB84, 0xF8A0FB84, 0xF8A1FB84, 0xF8A2FB84, + 0xF8A3FB84, 0xF8A4FB84, 0xF8A5FB84, 0xF8A6FB84, 0xF8A7FB84, 0xF8A8FB84, 0xF8A9FB84, 0xF8AAFB84, 0xF8ABFB84, 0xF8ACFB84, 0xF8ADFB84, 0xF8AEFB84, 0xF8AFFB84, 0xF8B0FB84, 0xF8B1FB84, + 0xF8B2FB84, 0xF8B3FB84, 0xF8B4FB84, 0xF8B5FB84, 0xF8B6FB84, 0xF8B7FB84, 0xF8B8FB84, 0xF8B9FB84, 0xF8BAFB84, 0xF8BBFB84, 0xF8BCFB84, 0xF8BDFB84, 0xF8BEFB84, 0xF8BFFB84, 0xF8C0FB84, + 0xF8C1FB84, 0xF8C2FB84, 0xF8C3FB84, 0xF8C4FB84, 0xF8C5FB84, 0xF8C6FB84, 0xF8C7FB84, 0xF8C8FB84, 0xF8C9FB84, 0xF8CAFB84, 0xF8CBFB84, 0xF8CCFB84, 0xF8CDFB84, 0xF8CEFB84, 0xF8CFFB84, + 0xF8D0FB84, 0xF8D1FB84, 0xF8D2FB84, 0xF8D3FB84, 0xF8D4FB84, 0xF8D5FB84, 0xF8D6FB84, 0xF8D7FB84, 0xF8D8FB84, 0xF8D9FB84, 0xF8DAFB84, 0xF8DBFB84, 0xF8DCFB84, 0xF8DDFB84, 0xF8DEFB84, + 0xF8DFFB84, 0xF8E0FB84, 0xF8E1FB84, 0xF8E2FB84, 0xF8E3FB84, 0xF8E4FB84, 0xF8E5FB84, 0xF8E6FB84, 0xF8E7FB84, 0xF8E8FB84, 0xF8E9FB84, 0xF8EAFB84, 0xF8EBFB84, 0xF8ECFB84, 0xF8EDFB84, + 0xF8EEFB84, 0xF8EFFB84, 0xF8F0FB84, 0xF8F1FB84, 0xF8F2FB84, 0xF8F3FB84, 0xF8F4FB84, 0xF8F5FB84, 0xF8F6FB84, 0xF8F7FB84, 0xF8F8FB84, 0xF8F9FB84, 0xF8FAFB84, 0xF8FBFB84, 0xF8FCFB84, + 0xF8FDFB84, 0xF8FEFB84, 0xF8FFFB84, 0xF900FB84, 0xF901FB84, 0xF902FB84, 0xF903FB84, 0xF904FB84, 0xF905FB84, 0xF906FB84, 0xF907FB84, 0xF908FB84, 0xF909FB84, 0xF90AFB84, 0xF90BFB84, + 0xF90CFB84, 0xF90DFB84, 0xF90EFB84, 0xF90FFB84, 0xF910FB84, 0xF911FB84, 0xF912FB84, 0xF913FB84, 0xF914FB84, 0xF915FB84, 0xF916FB84, 0xF917FB84, 0xF918FB84, 0xF919FB84, 0xF91AFB84, + 0xF91BFB84, 0xF91CFB84, 0xF91DFB84, 0xF91EFB84, 0xF91FFB84, 0xF920FB84, 0xF921FB84, 0xF922FB84, 0xF923FB84, 0xF924FB84, 0xF925FB84, 0xF926FB84, 0xF927FB84, 0xF928FB84, 0xF929FB84, + 0xF92AFB84, 0xF92BFB84, 0xF92CFB84, 0xF92DFB84, 0xF92EFB84, 0xF92FFB84, 0xF930FB84, 0xF931FB84, 0xF932FB84, 0xF933FB84, 0xF934FB84, 0xF935FB84, 0xF936FB84, 0xF937FB84, 0xF938FB84, + 0xF939FB84, 0xF93AFB84, 0xF93BFB84, 0xF93CFB84, 0xF93DFB84, 0xF93EFB84, 0xF93FFB84, 0xF940FB84, 0xF941FB84, 0xF942FB84, 0xF943FB84, 0xF944FB84, 0xF945FB84, 0xF946FB84, 0xF947FB84, + 0xF948FB84, 0xF949FB84, 0xF94AFB84, 0xF94BFB84, 0xF94CFB84, 0xF94DFB84, 0xF94EFB84, 0xF94FFB84, 0xF950FB84, 0xF951FB84, 0xF952FB84, 0xF953FB84, 0xF954FB84, 0xF955FB84, 0xF956FB84, + 0xF957FB84, 0xF958FB84, 0xF959FB84, 0xF95AFB84, 0xF95BFB84, 0xF95CFB84, 0xF95DFB84, 0xF95EFB84, 0xF95FFB84, 0xF960FB84, 0xF961FB84, 0xF962FB84, 0xF963FB84, 0xF964FB84, 0xF965FB84, + 0xF966FB84, 0xF967FB84, 0xF968FB84, 0xF969FB84, 0xF96AFB84, 0xF96BFB84, 0xF96CFB84, 0xF96DFB84, 0xF96EFB84, 0xF96FFB84, 0xF970FB84, 0xF971FB84, 0xF972FB84, 0xF973FB84, 0xF974FB84, + 0xF975FB84, 0xF976FB84, 0xF977FB84, 0xF978FB84, 0xF979FB84, 0xF97AFB84, 0xF97BFB84, 0xF97CFB84, 0xF97DFB84, 0xF97EFB84, 0xF97FFB84, 0xF980FB84, 0xF981FB84, 0xF982FB84, 0xF983FB84, + 0xF984FB84, 0xF985FB84, 0xF986FB84, 0xF987FB84, 0xF988FB84, 0xF989FB84, 0xF98AFB84, 0xF98BFB84, 0xF98CFB84, 0xF98DFB84, 0xF98EFB84, 0xF98FFB84, 0xF990FB84, 0xF991FB84, 0xF992FB84, + 0xF993FB84, 0xF994FB84, 0xF995FB84, 0xF996FB84, 0xF997FB84, 0xF998FB84, 0xF999FB84, 0xF99AFB84, 0xF99BFB84, 0xF99CFB84, 0xF99DFB84, 0xF99EFB84, 0xF99FFB84, 0xF9A0FB84, 0xF9A1FB84, + 0xF9A2FB84, 0xF9A3FB84, 0xF9A4FB84, 0xF9A5FB84, 0xF9A6FB84, 0xF9A7FB84, 0xF9A8FB84, 0xF9A9FB84, 0xF9AAFB84, 0xF9ABFB84, 0xF9ACFB84, 0xF9ADFB84, 0xF9AEFB84, 0xF9AFFB84, 0xF9B0FB84, + 0xF9B1FB84, 0xF9B2FB84, 0xF9B3FB84, 0xF9B4FB84, 0xF9B5FB84, 0xF9B6FB84, 0xF9B7FB84, 0xF9B8FB84, 0xF9B9FB84, 0xF9BAFB84, 0xF9BBFB84, 0xF9BCFB84, 0xF9BDFB84, 0xF9BEFB84, 0xF9BFFB84, + 0xF9C0FB84, 0xF9C1FB84, 0xF9C2FB84, 0xF9C3FB84, 0xF9C4FB84, 0xF9C5FB84, 0xF9C6FB84, 0xF9C7FB84, 0xF9C8FB84, 0xF9C9FB84, 0xF9CAFB84, 0xF9CBFB84, 0xF9CCFB84, 0xF9CDFB84, 0xF9CEFB84, + 0xF9CFFB84, 0xF9D0FB84, 0xF9D1FB84, 0xF9D2FB84, 0xF9D3FB84, 0xF9D4FB84, 0xF9D5FB84, 0xF9D6FB84, 0xF9D7FB84, 0xF9D8FB84, 0xF9D9FB84, 0xF9DAFB84, 0xF9DBFB84, 0xF9DCFB84, 0xF9DDFB84, + 0xF9DEFB84, 0xF9DFFB84, 0xF9E0FB84, 0xF9E1FB84, 0xF9E2FB84, 0xF9E3FB84, 0xF9E4FB84, 0xF9E5FB84, 0xF9E6FB84, 0xF9E7FB84, 0xF9E8FB84, 0xF9E9FB84, 0xF9EAFB84, 0xF9EBFB84, 0xF9ECFB84, + 0xF9EDFB84, 0xF9EEFB84, 0xF9EFFB84, 0xF9F0FB84, 0xF9F1FB84, 0xF9F2FB84, 0xF9F3FB84, 0xF9F4FB84, 0xF9F5FB84, 0xF9F6FB84, 0xF9F7FB84, 0xF9F8FB84, 0xF9F9FB84, 0xF9FAFB84, 0xF9FBFB84, + 0xF9FCFB84, 0xF9FDFB84, 0xF9FEFB84, 0xF9FFFB84, 0xFA00FB84, 0xFA01FB84, 0xFA02FB84, 0xFA03FB84, 0xFA04FB84, 0xFA05FB84, 0xFA06FB84, 0xFA07FB84, 0xFA08FB84, 0xFA09FB84, 0xFA0AFB84, + 0xFA0BFB84, 0xFA0CFB84, 0xFA0DFB84, 0xFA0EFB84, 0xFA0FFB84, 0xFA10FB84, 0xFA11FB84, 0xFA12FB84, 0xFA13FB84, 0xFA14FB84, 0xFA15FB84, 0xFA16FB84, 0xFA17FB84, 0xFA18FB84, 0xFA19FB84, + 0xFA1AFB84, 0xFA1BFB84, 0xFA1CFB84, 0xFA1DFB84, 0xFA1EFB84, 0xFA1FFB84, 0xFA20FB84, 0xFA21FB84, 0xFA22FB84, 0xFA23FB84, 0xFA24FB84, 0xFA25FB84, 0xFA26FB84, 0xFA27FB84, 0xFA28FB84, + 0xFA29FB84, 0xFA2AFB84, 0xFA2BFB84, 0xFA2CFB84, 0xFA2DFB84, 0xFA2EFB84, 0xFA2FFB84, 0xFA30FB84, 0xFA31FB84, 0xFA32FB84, 0xFA33FB84, 0xFA34FB84, 0xFA35FB84, 0xFA36FB84, 0xFA37FB84, + 0xFA38FB84, 0xFA39FB84, 0xFA3AFB84, 0xFA3BFB84, 0xFA3CFB84, 0xFA3DFB84, 0xFA3EFB84, 0xFA3FFB84, 0xFA40FB84, 0xFA41FB84, 0xFA42FB84, 0xFA43FB84, 0xFA44FB84, 0xFA45FB84, 0xFA46FB84, + 0xFA47FB84, 0xFA48FB84, 0xFA49FB84, 0xFA4AFB84, 0xFA4BFB84, 0xFA4CFB84, 0xFA4DFB84, 0xFA4EFB84, 0xFA4FFB84, 0xFA50FB84, 0xFA51FB84, 0xFA52FB84, 0xFA53FB84, 0xFA54FB84, 0xFA55FB84, + 0xFA56FB84, 0xFA57FB84, 0xFA58FB84, 0xFA59FB84, 0xFA5AFB84, 0xFA5BFB84, 0xFA5CFB84, 0xFA5DFB84, 0xFA5EFB84, 0xFA5FFB84, 0xFA60FB84, 0xFA61FB84, 0xFA62FB84, 0xFA63FB84, 0xFA64FB84, + 0xFA65FB84, 0xFA66FB84, 0xFA67FB84, 0xFA68FB84, 0xFA69FB84, 0xFA6AFB84, 0xFA6BFB84, 0xFA6CFB84, 0xFA6DFB84, 0xFA6EFB84, 0xFA6FFB84, 0xFA70FB84, 0xFA71FB84, 0xFA72FB84, 0xFA73FB84, + 0xFA74FB84, 0xFA75FB84, 0xFA76FB84, 0xFA77FB84, 0xFA78FB84, 0xFA79FB84, 0xFA7AFB84, 0xFA7BFB84, 0xFA7CFB84, 0xFA7DFB84, 0xFA7EFB84, 0xFA7FFB84, 0xFA80FB84, 0xFA81FB84, 0xFA82FB84, + 0xFA83FB84, 0xFA84FB84, 0xFA85FB84, 0xFA86FB84, 0xFA87FB84, 0xFA88FB84, 0xFA89FB84, 0xFA8AFB84, 0xFA8BFB84, 0xFA8CFB84, 0xFA8DFB84, 0xFA8EFB84, 0xFA8FFB84, 0xFA90FB84, 0xFA91FB84, + 0xFA92FB84, 0xFA93FB84, 0xFA94FB84, 0xFA95FB84, 0xFA96FB84, 0xFA97FB84, 0xFA98FB84, 0xFA99FB84, 0xFA9AFB84, 0xFA9BFB84, 0xFA9CFB84, 0xFA9DFB84, 0xFA9EFB84, 0xFA9FFB84, 0xFAA0FB84, + 0xFAA1FB84, 0xFAA2FB84, 0xFAA3FB84, 0xFAA4FB84, 0xFAA5FB84, 0xFAA6FB84, 0xFAA7FB84, 0xFAA8FB84, 0xFAA9FB84, 0xFAAAFB84, 0xFAABFB84, 0xFAACFB84, 0xFAADFB84, 0xFAAEFB84, 0xFAAFFB84, + 0xFAB0FB84, 0xFAB1FB84, 0xFAB2FB84, 0xFAB3FB84, 0xFAB4FB84, 0xFAB5FB84, 0xFAB6FB84, 0xFAB7FB84, 0xFAB8FB84, 0xFAB9FB84, 0xFABAFB84, 0xFABBFB84, 0xFABCFB84, 0xFABDFB84, 0xFABEFB84, + 0xFABFFB84, 0xFAC0FB84, 0xFAC1FB84, 0xFAC2FB84, 0xFAC3FB84, 0xFAC4FB84, 0xFAC5FB84, 0xFAC6FB84, 0xFAC7FB84, 0xFAC8FB84, 0xFAC9FB84, 0xFACAFB84, 0xFACBFB84, 0xFACCFB84, 0xFACDFB84, + 0xFACEFB84, 0xFACFFB84, 0xFAD0FB84, 0xFAD1FB84, 0xFAD2FB84, 0xFAD3FB84, 0xFAD4FB84, 0xFAD5FB84, 0xFAD6FB84, 0xFAD7FB84, 0xFAD8FB84, 0xFAD9FB84, 0xFADAFB84, 0xFADBFB84, 0xFADCFB84, + 0xFADDFB84, 0xFADEFB84, 0xFADFFB84, 0xFAE0FB84, 0xFAE1FB84, 0xFAE2FB84, 0xFAE3FB84, 0xFAE4FB84, 0xFAE5FB84, 0xFAE6FB84, 0xFAE7FB84, 0xFAE8FB84, 0xFAE9FB84, 0xFAEAFB84, 0xFAEBFB84, + 0xFAECFB84, 0xFAEDFB84, 0xFAEEFB84, 0xFAEFFB84, 0xFAF0FB84, 0xFAF1FB84, 0xFAF2FB84, 0xFAF3FB84, 0xFAF4FB84, 0xFAF5FB84, 0xFAF6FB84, 0xFAF7FB84, 0xFAF8FB84, 0xFAF9FB84, 0xFAFAFB84, + 0xFAFBFB84, 0xFAFCFB84, 0xFAFDFB84, 0xFAFEFB84, 0xFAFFFB84, 0xFB00FB84, 0xFB01FB84, 0xFB02FB84, 0xFB03FB84, 0xFB04FB84, 0xFB05FB84, 0xFB06FB84, 0xFB07FB84, 0xFB08FB84, 0xFB09FB84, + 0xFB0AFB84, 0xFB0BFB84, 0xFB0CFB84, 0xFB0DFB84, 0xFB0EFB84, 0xFB0FFB84, 0xFB10FB84, 0xFB11FB84, 0xFB12FB84, 0xFB13FB84, 0xFB14FB84, 0xFB15FB84, 0xFB16FB84, 0xFB17FB84, 0xFB18FB84, + 0xFB19FB84, 0xFB1AFB84, 0xFB1BFB84, 0xFB1CFB84, 0xFB1DFB84, 0xFB1EFB84, 0xFB1FFB84, 0xFB20FB84, 0xFB21FB84, 0xFB22FB84, 0xFB23FB84, 0xFB24FB84, 0xFB25FB84, 0xFB26FB84, 0xFB27FB84, + 0xFB28FB84, 0xFB29FB84, 0xFB2AFB84, 0xFB2BFB84, 0xFB2CFB84, 0xFB2DFB84, 0xFB2EFB84, 0xFB2FFB84, 0xFB30FB84, 0xFB31FB84, 0xFB32FB84, 0xFB33FB84, 0xFB34FB84, 0xFB35FB84, 0xFB36FB84, + 0xFB37FB84, 0xFB38FB84, 0xFB39FB84, 0xFB3AFB84, 0xFB3BFB84, 0xFB3CFB84, 0xFB3DFB84, 0xFB3EFB84, 0xFB3FFB84, 0xFB40FB84, 0xFB41FB84, 0xFB42FB84, 0xFB43FB84, 0xFB44FB84, 0xFB45FB84, + 0xFB46FB84, 0xFB47FB84, 0xFB48FB84, 0xFB49FB84, 0xFB4AFB84, 0xFB4BFB84, 0xFB4CFB84, 0xFB4DFB84, 0xFB4EFB84, 0xFB4FFB84, 0xFB50FB84, 0xFB51FB84, 0xFB52FB84, 0xFB53FB84, 0xFB54FB84, + 0xFB55FB84, 0xFB56FB84, 0xFB57FB84, 0xFB58FB84, 0xFB59FB84, 0xFB5AFB84, 0xFB5BFB84, 0xFB5CFB84, 0xFB5DFB84, 0xFB5EFB84, 0xFB5FFB84, 0xFB60FB84, 0xFB61FB84, 0xFB62FB84, 0xFB63FB84, + 0xFB64FB84, 0xFB65FB84, 0xFB66FB84, 0xFB67FB84, 0xFB68FB84, 0xFB69FB84, 0xFB6AFB84, 0xFB6BFB84, 0xFB6CFB84, 0xFB6DFB84, 0xFB6EFB84, 0xFB6FFB84, 0xFB70FB84, 0xFB71FB84, 0xFB72FB84, + 0xFB73FB84, 0xFB74FB84, 0xFB75FB84, 0xFB76FB84, 0xFB77FB84, 0xFB78FB84, 0xFB79FB84, 0xFB7AFB84, 0xFB7BFB84, 0xFB7CFB84, 0xFB7DFB84, 0xFB7EFB84, 0xFB7FFB84, 0xFB80FB84, 0xFB81FB84, + 0xFB82FB84, 0xFB83FB84, 0xFB84FB84, 0xFB85FB84, 0xFB86FB84, 0xFB87FB84, 0xFB88FB84, 0xFB89FB84, 0xFB8AFB84, 0xFB8BFB84, 0xFB8CFB84, 0xFB8DFB84, 0xFB8EFB84, 0xFB8FFB84, 0xFB90FB84, + 0xFB91FB84, 0xFB92FB84, 0xFB93FB84, 0xFB94FB84, 0xFB95FB84, 0xFB96FB84, 0xFB97FB84, 0xFB98FB84, 0xFB99FB84, 0xFB9AFB84, 0xFB9BFB84, 0xFB9CFB84, 0xFB9DFB84, 0xFB9EFB84, 0xFB9FFB84, + 0xFBA0FB84, 0xFBA1FB84, 0xFBA2FB84, 0xFBA3FB84, 0xFBA4FB84, 0xFBA5FB84, 0xFBA6FB84, 0xFBA7FB84, 0xFBA8FB84, 0xFBA9FB84, 0xFBAAFB84, 0xFBABFB84, 0xFBACFB84, 0xFBADFB84, 0xFBAEFB84, + 0xFBAFFB84, 0xFBB0FB84, 0xFBB1FB84, 0xFBB2FB84, 0xFBB3FB84, 0xFBB4FB84, 0xFBB5FB84, 0xFBB6FB84, 0xFBB7FB84, 0xFBB8FB84, 0xFBB9FB84, 0xFBBAFB84, 0xFBBBFB84, 0xFBBCFB84, 0xFBBDFB84, + 0xFBBEFB84, 0xFBBFFB84, 0xFBC0FB84, 0xFBC1FB84, 0xFBC2FB84, 0xFBC3FB84, 0xFBC4FB84, 0xFBC5FB84, 0xFBC6FB84, 0xFBC7FB84, 0xFBC8FB84, 0xFBC9FB84, 0xFBCAFB84, 0xFBCBFB84, 0xFBCCFB84, + 0xFBCDFB84, 0xFBCEFB84, 0xFBCFFB84, 0xFBD0FB84, 0xFBD1FB84, 0xFBD2FB84, 0xFBD3FB84, 0xFBD4FB84, 0xFBD5FB84, 0xFBD6FB84, 0xFBD7FB84, 0xFBD8FB84, 0xFBD9FB84, 0xFBDAFB84, 0xFBDBFB84, + 0xFBDCFB84, 0xFBDDFB84, 0xFBDEFB84, 0xFBDFFB84, 0xFBE0FB84, 0xFBE1FB84, 0xFBE2FB84, 0xFBE3FB84, 0xFBE4FB84, 0xFBE5FB84, 0xFBE6FB84, 0xFBE7FB84, 0xFBE8FB84, 0xFBE9FB84, 0xFBEAFB84, + 0xFBEBFB84, 0xFBECFB84, 0xFBEDFB84, 0xFBEEFB84, 0xFBEFFB84, 0xFBF0FB84, 0xFBF1FB84, 0xFBF2FB84, 0xFBF3FB84, 0xFBF4FB84, 0xFBF5FB84, 0xFBF6FB84, 0xFBF7FB84, 0xFBF8FB84, 0xFBF9FB84, + 0xFBFAFB84, 0xFBFBFB84, 0xFBFCFB84, 0xFBFDFB84, 0xFBFEFB84, 0xFBFFFB84, 0xFC00FB84, 0xFC01FB84, 0xFC02FB84, 0xFC03FB84, 0xFC04FB84, 0xFC05FB84, 0xFC06FB84, 0xFC07FB84, 0xFC08FB84, + 0xFC09FB84, 0xFC0AFB84, 0xFC0BFB84, 0xFC0CFB84, 0xFC0DFB84, 0xFC0EFB84, 0xFC0FFB84, 0xFC10FB84, 0xFC11FB84, 0xFC12FB84, 0xFC13FB84, 0xFC14FB84, 0xFC15FB84, 0xFC16FB84, 0xFC17FB84, + 0xFC18FB84, 0xFC19FB84, 0xFC1AFB84, 0xFC1BFB84, 0xFC1CFB84, 0xFC1DFB84, 0xFC1EFB84, 0xFC1FFB84, 0xFC20FB84, 0xFC21FB84, 0xFC22FB84, 0xFC23FB84, 0xFC24FB84, 0xFC25FB84, 0xFC26FB84, + 0xFC27FB84, 0xFC28FB84, 0xFC29FB84, 0xFC2AFB84, 0xFC2BFB84, 0xFC2CFB84, 0xFC2DFB84, 0xFC2EFB84, 0xFC2FFB84, 0xFC30FB84, 0xFC31FB84, 0xFC32FB84, 0xFC33FB84, 0xFC34FB84, 0xFC35FB84, + 0xFC36FB84, 0xFC37FB84, 0xFC38FB84, 0xFC39FB84, 0xFC3AFB84, 0xFC3BFB84, 0xFC3CFB84, 0xFC3DFB84, 0xFC3EFB84, 0xFC3FFB84, 0xFC40FB84, 0xFC41FB84, 0xFC42FB84, 0xFC43FB84, 0xFC44FB84, + 0xFC45FB84, 0xFC46FB84, 0xFC47FB84, 0xFC48FB84, 0xFC49FB84, 0xFC4AFB84, 0xFC4BFB84, 0xFC4CFB84, 0xFC4DFB84, 0xFC4EFB84, 0xFC4FFB84, 0xFC50FB84, 0xFC51FB84, 0xFC52FB84, 0xFC53FB84, + 0xFC54FB84, 0xFC55FB84, 0xFC56FB84, 0xFC57FB84, 0xFC58FB84, 0xFC59FB84, 0xFC5AFB84, 0xFC5BFB84, 0xFC5CFB84, 0xFC5DFB84, 0xFC5EFB84, 0xFC5FFB84, 0xFC60FB84, 0xFC61FB84, 0xFC62FB84, + 0xFC63FB84, 0xFC64FB84, 0xFC65FB84, 0xFC66FB84, 0xFC67FB84, 0xFC68FB84, 0xFC69FB84, 0xFC6AFB84, 0xFC6BFB84, 0xFC6CFB84, 0xFC6DFB84, 0xFC6EFB84, 0xFC6FFB84, 0xFC70FB84, 0xFC71FB84, + 0xFC72FB84, 0xFC73FB84, 0xFC74FB84, 0xFC75FB84, 0xFC76FB84, 0xFC77FB84, 0xFC78FB84, 0xFC79FB84, 0xFC7AFB84, 0xFC7BFB84, 0xFC7CFB84, 0xFC7DFB84, 0xFC7EFB84, 0xFC7FFB84, 0xFC80FB84, + 0xFC81FB84, 0xFC82FB84, 0xFC83FB84, 0xFC84FB84, 0xFC85FB84, 0xFC86FB84, 0xFC87FB84, 0xFC88FB84, 0xFC89FB84, 0xFC8AFB84, 0xFC8BFB84, 0xFC8CFB84, 0xFC8DFB84, 0xFC8EFB84, 0xFC8FFB84, + 0xFC90FB84, 0xFC91FB84, 0xFC92FB84, 0xFC93FB84, 0xFC94FB84, 0xFC95FB84, 0xFC96FB84, 0xFC97FB84, 0xFC98FB84, 0xFC99FB84, 0xFC9AFB84, 0xFC9BFB84, 0xFC9CFB84, 0xFC9DFB84, 0xFC9EFB84, + 0xFC9FFB84, 0xFCA0FB84, 0xFCA1FB84, 0xFCA2FB84, 0xFCA3FB84, 0xFCA4FB84, 0xFCA5FB84, 0xFCA6FB84, 0xFCA7FB84, 0xFCA8FB84, 0xFCA9FB84, 0xFCAAFB84, 0xFCABFB84, 0xFCACFB84, 0xFCADFB84, + 0xFCAEFB84, 0xFCAFFB84, 0xFCB0FB84, 0xFCB1FB84, 0xFCB2FB84, 0xFCB3FB84, 0xFCB4FB84, 0xFCB5FB84, 0xFCB6FB84, 0xFCB7FB84, 0xFCB8FB84, 0xFCB9FB84, 0xFCBAFB84, 0xFCBBFB84, 0xFCBCFB84, + 0xFCBDFB84, 0xFCBEFB84, 0xFCBFFB84, 0xFCC0FB84, 0xFCC1FB84, 0xFCC2FB84, 0xFCC3FB84, 0xFCC4FB84, 0xFCC5FB84, 0xFCC6FB84, 0xFCC7FB84, 0xFCC8FB84, 0xFCC9FB84, 0xFCCAFB84, 0xFCCBFB84, + 0xFCCCFB84, 0xFCCDFB84, 0xFCCEFB84, 0xFCCFFB84, 0xFCD0FB84, 0xFCD1FB84, 0xFCD2FB84, 0xFCD3FB84, 0xFCD4FB84, 0xFCD5FB84, 0xFCD6FB84, 0xFCD7FB84, 0xFCD8FB84, 0xFCD9FB84, 0xFCDAFB84, + 0xFCDBFB84, 0xFCDCFB84, 0xFCDDFB84, 0xFCDEFB84, 0xFCDFFB84, 0xFCE0FB84, 0xFCE1FB84, 0xFCE2FB84, 0xFCE3FB84, 0xFCE4FB84, 0xFCE5FB84, 0xFCE6FB84, 0xFCE7FB84, 0xFCE8FB84, 0xFCE9FB84, + 0xFCEAFB84, 0xFCEBFB84, 0xFCECFB84, 0xFCEDFB84, 0xFCEEFB84, 0xFCEFFB84, 0xFCF0FB84, 0xFCF1FB84, 0xFCF2FB84, 0xFCF3FB84, 0xFCF4FB84, 0xFCF5FB84, 0xFCF6FB84, 0xFCF7FB84, 0xFCF8FB84, + 0xFCF9FB84, 0xFCFAFB84, 0xFCFBFB84, 0xFCFCFB84, 0xFCFDFB84, 0xFCFEFB84, 0xFCFFFB84, 0xFD00FB84, 0xFD01FB84, 0xFD02FB84, 0xFD03FB84, 0xFD04FB84, 0xFD05FB84, 0xFD06FB84, 0xFD07FB84, + 0xFD08FB84, 0xFD09FB84, 0xFD0AFB84, 0xFD0BFB84, 0xFD0CFB84, 0xFD0DFB84, 0xFD0EFB84, 0xFD0FFB84, 0xFD10FB84, 0xFD11FB84, 0xFD12FB84, 0xFD13FB84, 0xFD14FB84, 0xFD15FB84, 0xFD16FB84, + 0xFD17FB84, 0xFD18FB84, 0xFD19FB84, 0xFD1AFB84, 0xFD1BFB84, 0xFD1CFB84, 0xFD1DFB84, 0xFD1EFB84, 0xFD1FFB84, 0xFD20FB84, 0xFD21FB84, 0xFD22FB84, 0xFD23FB84, 0xFD24FB84, 0xFD25FB84, + 0xFD26FB84, 0xFD27FB84, 0xFD28FB84, 0xFD29FB84, 0xFD2AFB84, 0xFD2BFB84, 0xFD2CFB84, 0xFD2DFB84, 0xFD2EFB84, 0xFD2FFB84, 0xFD30FB84, 0xFD31FB84, 0xFD32FB84, 0xFD33FB84, 0xFD34FB84, + 0xFD35FB84, 0xFD36FB84, 0xFD37FB84, 0xFD38FB84, 0xFD39FB84, 0xFD3AFB84, 0xFD3BFB84, 0xFD3CFB84, 0xFD3DFB84, 0xFD3EFB84, 0xFD3FFB84, 0xFD40FB84, 0xFD41FB84, 0xFD42FB84, 0xFD43FB84, + 0xFD44FB84, 0xFD45FB84, 0xFD46FB84, 0xFD47FB84, 0xFD48FB84, 0xFD49FB84, 0xFD4AFB84, 0xFD4BFB84, 0xFD4CFB84, 0xFD4DFB84, 0xFD4EFB84, 0xFD4FFB84, 0xFD50FB84, 0xFD51FB84, 0xFD52FB84, + 0xFD53FB84, 0xFD54FB84, 0xFD55FB84, 0xFD56FB84, 0xFD57FB84, 0xFD58FB84, 0xFD59FB84, 0xFD5AFB84, 0xFD5BFB84, 0xFD5CFB84, 0xFD5DFB84, 0xFD5EFB84, 0xFD5FFB84, 0xFD60FB84, 0xFD61FB84, + 0xFD62FB84, 0xFD63FB84, 0xFD64FB84, 0xFD65FB84, 0xFD66FB84, 0xFD67FB84, 0xFD68FB84, 0xFD69FB84, 0xFD6AFB84, 0xFD6BFB84, 0xFD6CFB84, 0xFD6DFB84, 0xFD6EFB84, 0xFD6FFB84, 0xFD70FB84, + 0xFD71FB84, 0xFD72FB84, 0xFD73FB84, 0xFD74FB84, 0xFD75FB84, 0xFD76FB84, 0xFD77FB84, 0xFD78FB84, 0xFD79FB84, 0xFD7AFB84, 0xFD7BFB84, 0xFD7CFB84, 0xFD7DFB84, 0xFD7EFB84, 0xFD7FFB84, + 0xFD80FB84, 0xFD81FB84, 0xFD82FB84, 0xFD83FB84, 0xFD84FB84, 0xFD85FB84, 0xFD86FB84, 0xFD87FB84, 0xFD88FB84, 0xFD89FB84, 0xFD8AFB84, 0xFD8BFB84, 0xFD8CFB84, 0xFD8DFB84, 0xFD8EFB84, + 0xFD8FFB84, 0xFD90FB84, 0xFD91FB84, 0xFD92FB84, 0xFD93FB84, 0xFD94FB84, 0xFD95FB84, 0xFD96FB84, 0xFD97FB84, 0xFD98FB84, 0xFD99FB84, 0xFD9AFB84, 0xFD9BFB84, 0xFD9CFB84, 0xFD9DFB84, + 0xFD9EFB84, 0xFD9FFB84, 0xFDA0FB84, 0xFDA1FB84, 0xFDA2FB84, 0xFDA3FB84, 0xFDA4FB84, 0xFDA5FB84, 0xFDA6FB84, 0xFDA7FB84, 0xFDA8FB84, 0xFDA9FB84, 0xFDAAFB84, 0xFDABFB84, 0xFDACFB84, + 0xFDADFB84, 0xFDAEFB84, 0xFDAFFB84, 0xFDB0FB84, 0xFDB1FB84, 0xFDB2FB84, 0xFDB3FB84, 0xFDB4FB84, 0xFDB5FB84, 0xFDB6FB84, 0xFDB7FB84, 0xFDB8FB84, 0xFDB9FB84, 0xFDBAFB84, 0xFDBBFB84, + 0xFDBCFB84, 0xFDBDFB84, 0xFDBEFB84, 0xFDBFFB84, 0xFDC0FB84, 0xFDC1FB84, 0xFDC2FB84, 0xFDC3FB84, 0xFDC4FB84, 0xFDC5FB84, 0xFDC6FB84, 0xFDC7FB84, 0xFDC8FB84, 0xFDC9FB84, 0xFDCAFB84, + 0xFDCBFB84, 0xFDCCFB84, 0xFDCDFB84, 0xFDCEFB84, 0xFDCFFB84, 0xFDD0FB84, 0xFDD1FB84, 0xFDD2FB84, 0xFDD3FB84, 0xFDD4FB84, 0xFDD5FB84, 0xFDD6FB84, 0xFDD7FB84, 0xFDD8FB84, 0xFDD9FB84, + 0xFDDAFB84, 0xFDDBFB84, 0xFDDCFB84, 0xFDDDFB84, 0xFDDEFB84, 0xFDDFFB84, 0xFDE0FB84, 0xFDE1FB84, 0xFDE2FB84, 0xFDE3FB84, 0xFDE4FB84, 0xFDE5FB84, 0xFDE6FB84, 0xFDE7FB84, 0xFDE8FB84, + 0xFDE9FB84, 0xFDEAFB84, 0xFDEBFB84, 0xFDECFB84, 0xFDEDFB84, 0xFDEEFB84, 0xFDEFFB84, 0xFDF0FB84, 0xFDF1FB84, 0xFDF2FB84, 0xFDF3FB84, 0xFDF4FB84, 0xFDF5FB84, 0xFDF6FB84, 0xFDF7FB84, + 0xFDF8FB84, 0xFDF9FB84, 0xFDFAFB84, 0xFDFBFB84, 0xFDFCFB84, 0xFDFDFB84, 0xFDFEFB84, 0xFDFFFB84, 0xFE00FB84, 0xFE01FB84, 0xFE02FB84, 0xFE03FB84, 0xFE04FB84, 0xFE05FB84, 0xFE06FB84, + 0xFE07FB84, 0xFE08FB84, 0xFE09FB84, 0xFE0AFB84, 0xFE0BFB84, 0xFE0CFB84, 0xFE0DFB84, 0xFE0EFB84, 0xFE0FFB84, 0xFE10FB84, 0xFE11FB84, 0xFE12FB84, 0xFE13FB84, 0xFE14FB84, 0xFE15FB84, + 0xFE16FB84, 0xFE17FB84, 0xFE18FB84, 0xFE19FB84, 0xFE1AFB84, 0xFE1BFB84, 0xFE1CFB84, 0xFE1DFB84, 0xFE1EFB84, 0xFE1FFB84, 0xFE20FB84, 0xFE21FB84, 0xFE22FB84, 0xFE23FB84, 0xFE24FB84, + 0xFE25FB84, 0xFE26FB84, 0xFE27FB84, 0xFE28FB84, 0xFE29FB84, 0xFE2AFB84, 0xFE2BFB84, 0xFE2CFB84, 0xFE2DFB84, 0xFE2EFB84, 0xFE2FFB84, 0xFE30FB84, 0xFE31FB84, 0xFE32FB84, 0xFE33FB84, + 0xFE34FB84, 0xFE35FB84, 0xFE36FB84, 0xFE37FB84, 0xFE38FB84, 0xFE39FB84, 0xFE3AFB84, 0xFE3BFB84, 0xFE3CFB84, 0xFE3DFB84, 0xFE3EFB84, 0xFE3FFB84, 0xFE40FB84, 0xFE41FB84, 0xFE42FB84, + 0xFE43FB84, 0xFE44FB84, 0xFE45FB84, 0xFE46FB84, 0xFE47FB84, 0xFE48FB84, 0xFE49FB84, 0xFE4AFB84, 0xFE4BFB84, 0xFE4CFB84, 0xFE4DFB84, 0xFE4EFB84, 0xFE4FFB84, 0xFE50FB84, 0xFE51FB84, + 0xFE52FB84, 0xFE53FB84, 0xFE54FB84, 0xFE55FB84, 0xFE56FB84, 0xFE57FB84, 0xFE58FB84, 0xFE59FB84, 0xFE5AFB84, 0xFE5BFB84, 0xFE5CFB84, 0xFE5DFB84, 0xFE5EFB84, 0xFE5FFB84, 0xFE60FB84, + 0xFE61FB84, 0xFE62FB84, 0xFE63FB84, 0xFE64FB84, 0xFE65FB84, 0xFE66FB84, 0xFE67FB84, 0xFE68FB84, 0xFE69FB84, 0xFE6AFB84, 0xFE6BFB84, 0xFE6CFB84, 0xFE6DFB84, 0xFE6EFB84, 0xFE6FFB84, + 0xFE70FB84, 0xFE71FB84, 0xFE72FB84, 0xFE73FB84, 0xFE74FB84, 0xFE75FB84, 0xFE76FB84, 0xFE77FB84, 0xFE78FB84, 0xFE79FB84, 0xFE7AFB84, 0xFE7BFB84, 0xFE7CFB84, 0xFE7DFB84, 0xFE7EFB84, + 0xFE7FFB84, 0xFE80FB84, 0xFE81FB84, 0xFE82FB84, 0xFE83FB84, 0xFE84FB84, 0xFE85FB84, 0xFE86FB84, 0xFE87FB84, 0xFE88FB84, 0xFE89FB84, 0xFE8AFB84, 0xFE8BFB84, 0xFE8CFB84, 0xFE8DFB84, + 0xFE8EFB84, 0xFE8FFB84, 0xFE90FB84, 0xFE91FB84, 0xFE92FB84, 0xFE93FB84, 0xFE94FB84, 0xFE95FB84, 0xFE96FB84, 0xFE97FB84, 0xFE98FB84, 0xFE99FB84, 0xFE9AFB84, 0xFE9BFB84, 0xFE9CFB84, + 0xFE9DFB84, 0xFE9EFB84, 0xFE9FFB84, 0xFEA0FB84, 0xFEA1FB84, 0xFEA2FB84, 0xFEA3FB84, 0xFEA4FB84, 0xFEA5FB84, 0xFEA6FB84, 0xFEA7FB84, 0xFEA8FB84, 0xFEA9FB84, 0xFEAAFB84, 0xFEABFB84, + 0xFEACFB84, 0xFEADFB84, 0xFEAEFB84, 0xFEAFFB84, 0xFEB0FB84, 0xFEB1FB84, 0xFEB2FB84, 0xFEB3FB84, 0xFEB4FB84, 0xFEB5FB84, 0xFEB6FB84, 0xFEB7FB84, 0xFEB8FB84, 0xFEB9FB84, 0xFEBAFB84, + 0xFEBBFB84, 0xFEBCFB84, 0xFEBDFB84, 0xFEBEFB84, 0xFEBFFB84, 0xFEC0FB84, 0xFEC1FB84, 0xFEC2FB84, 0xFEC3FB84, 0xFEC4FB84, 0xFEC5FB84, 0xFEC6FB84, 0xFEC7FB84, 0xFEC8FB84, 0xFEC9FB84, + 0xFECAFB84, 0xFECBFB84, 0xFECCFB84, 0xFECDFB84, 0xFECEFB84, 0xFECFFB84, 0xFED0FB84, 0xFED1FB84, 0xFED2FB84, 0xFED3FB84, 0xFED4FB84, 0xFED5FB84, 0xFED6FB84, 0xFED7FB84, 0xFED8FB84, + 0xFED9FB84, 0xFEDAFB84, 0xFEDBFB84, 0xFEDCFB84, 0xFEDDFB84, 0xFEDEFB84, 0xFEDFFB84, 0xFEE0FB84, 0xFEE1FB84, 0xFEE2FB84, 0xFEE3FB84, 0xFEE4FB84, 0xFEE5FB84, 0xFEE6FB84, 0xFEE7FB84, + 0xFEE8FB84, 0xFEE9FB84, 0xFEEAFB84, 0xFEEBFB84, 0xFEECFB84, 0xFEEDFB84, 0xFEEEFB84, 0xFEEFFB84, 0xFEF0FB84, 0xFEF1FB84, 0xFEF2FB84, 0xFEF3FB84, 0xFEF4FB84, 0xFEF5FB84, 0xFEF6FB84, + 0xFEF7FB84, 0xFEF8FB84, 0xFEF9FB84, 0xFEFAFB84, 0xFEFBFB84, 0xFEFCFB84, 0xFEFDFB84, 0xFEFEFB84, 0xFEFFFB84, 0xFF00FB84, 0xFF01FB84, 0xFF02FB84, 0xFF03FB84, 0xFF04FB84, 0xFF05FB84, + 0xFF06FB84, 0xFF07FB84, 0xFF08FB84, 0xFF09FB84, 0xFF0AFB84, 0xFF0BFB84, 0xFF0CFB84, 0xFF0DFB84, 0xFF0EFB84, 0xFF0FFB84, 0xFF10FB84, 0xFF11FB84, 0xFF12FB84, 0xFF13FB84, 0xFF14FB84, + 0xFF15FB84, 0xFF16FB84, 0xFF17FB84, 0xFF18FB84, 0xFF19FB84, 0xFF1AFB84, 0xFF1BFB84, 0xFF1CFB84, 0xFF1DFB84, 0xFF1EFB84, 0xFF1FFB84, 0xFF20FB84, 0xFF21FB84, 0xFF22FB84, 0xFF23FB84, + 0xFF24FB84, 0xFF25FB84, 0xFF26FB84, 0xFF27FB84, 0xFF28FB84, 0xFF29FB84, 0xFF2AFB84, 0xFF2BFB84, 0xFF2CFB84, 0xFF2DFB84, 0xFF2EFB84, 0xFF2FFB84, 0xFF30FB84, 0xFF31FB84, 0xFF32FB84, + 0xFF33FB84, 0xFF34FB84, 0xFF35FB84, 0xFF36FB84, 0xFF37FB84, 0xFF38FB84, 0xFF39FB84, 0xFF3AFB84, 0xFF3BFB84, 0xFF3CFB84, 0xFF3DFB84, 0xFF3EFB84, 0xFF3FFB84, 0xFF40FB84, 0xFF41FB84, + 0xFF42FB84, 0xFF43FB84, 0xFF44FB84, 0xFF45FB84, 0xFF46FB84, 0xFF47FB84, 0xFF48FB84, 0xFF49FB84, 0xFF4AFB84, 0xFF4BFB84, 0xFF4CFB84, 0xFF4DFB84, 0xFF4EFB84, 0xFF4FFB84, 0xFF50FB84, + 0xFF51FB84, 0xFF52FB84, 0xFF53FB84, 0xFF54FB84, 0xFF55FB84, 0xFF56FB84, 0xFF57FB84, 0xFF58FB84, 0xFF59FB84, 0xFF5AFB84, 0xFF5BFB84, 0xFF5CFB84, 0xFF5DFB84, 0xFF5EFB84, 0xFF5FFB84, + 0xFF60FB84, 0xFF61FB84, 0xFF62FB84, 0xFF63FB84, 0xFF64FB84, 0xFF65FB84, 0xFF66FB84, 0xFF67FB84, 0xFF68FB84, 0xFF69FB84, 0xFF6AFB84, 0xFF6BFB84, 0xFF6CFB84, 0xFF6DFB84, 0xFF6EFB84, + 0xFF6FFB84, 0xFF70FB84, 0xFF71FB84, 0xFF72FB84, 0xFF73FB84, 0xFF74FB84, 0xFF75FB84, 0xFF76FB84, 0xFF77FB84, 0xFF78FB84, 0xFF79FB84, 0xFF7AFB84, 0xFF7BFB84, 0xFF7CFB84, 0xFF7DFB84, + 0xFF7EFB84, 0xFF7FFB84, 0xFF80FB84, 0xFF81FB84, 0xFF82FB84, 0xFF83FB84, 0xFF84FB84, 0xFF85FB84, 0xFF86FB84, 0xFF87FB84, 0xFF88FB84, 0xFF89FB84, 0xFF8AFB84, 0xFF8BFB84, 0xFF8CFB84, + 0xFF8DFB84, 0xFF8EFB84, 0xFF8FFB84, 0xFF90FB84, 0xFF91FB84, 0xFF92FB84, 0xFF93FB84, 0xFF94FB84, 0xFF95FB84, 0xFF96FB84, 0xFF97FB84, 0xFF98FB84, 0xFF99FB84, 0xFF9AFB84, 0xFF9BFB84, + 0xFF9CFB84, 0xFF9DFB84, 0xFF9EFB84, 0xFF9FFB84, 0xFFA0FB84, 0xFFA1FB84, 0xFFA2FB84, 0xFFA3FB84, 0xFFA4FB84, 0xFFA5FB84, 0xFFA6FB84, 0xFFA7FB84, 0xFFA8FB84, 0xFFA9FB84, 0xFFAAFB84, + 0xFFABFB84, 0xFFACFB84, 0xFFADFB84, 0xFFAEFB84, 0xFFAFFB84, 0xFFB0FB84, 0xFFB1FB84, 0xFFB2FB84, 0xFFB3FB84, 0xFFB4FB84, 0xFFB5FB84, 0xFFB6FB84, 0xFFB7FB84, 0xFFB8FB84, 0xFFB9FB84, + 0xFFBAFB84, 0xFFBBFB84, 0xFFBCFB84, 0xFFBDFB84, 0xFFBEFB84, 0xFFBFFB84, 0xFFC0FB84, 0xFFC1FB84, 0xFFC2FB84, 0xFFC3FB84, 0xFFC4FB84, 0xFFC5FB84, 0xFFC6FB84, 0xFFC7FB84, 0xFFC8FB84, + 0xFFC9FB84, 0xFFCAFB84, 0xFFCBFB84, 0xFFCCFB84, 0xFFCDFB84, 0xFFCEFB84, 0xFFCFFB84, 0xFFD0FB84, 0xFFD1FB84, 0xFFD2FB84, 0xFFD3FB84, 0xFFD4FB84, 0xFFD5FB84, 0xFFD6FB84, 0xFFD7FB84, + 0xFFD8FB84, 0xFFD9FB84, 0xFFDAFB84, 0xFFDBFB84, 0xFFDCFB84, 0xFFDDFB84, 0xFFDEFB84, 0xFFDFFB84, 0xFFE0FB84, 0xFFE1FB84, 0xFFE2FB84, 0xFFE3FB84, 0xFFE4FB84, 0xFFE5FB84, 0xFFE6FB84, + 0xFFE7FB84, 0xFFE8FB84, 0xFFE9FB84, 0xFFEAFB84, 0xFFEBFB84, 0xFFECFB84, 0xFFEDFB84, 0xFFEEFB84, 0xFFEFFB84, 0xFFF0FB84, 0xFFF1FB84, 0xFFF2FB84, 0xFFF3FB84, 0xFFF4FB84, 0xFFF5FB84, + 0xFFF6FB84, 0xFFF7FB84, 0xFFF8FB84, 0xFFF9FB84, 0xFFFAFB84, 0xFFFBFB84, 0xFFFCFB84, 0xFFFDFB84, 0xFFFEFB84, 0xFFFFFB84, 0x8000FB85, 0x8001FB85, 0x8002FB85, 0x8003FB85, 0x8004FB85, + 0x8005FB85, 0x8006FB85, 0x8007FB85, 0x8008FB85, 0x8009FB85, 0x800AFB85, 0x800BFB85, 0x800CFB85, 0x800DFB85, 0x800EFB85, 0x800FFB85, 0x8010FB85, 0x8011FB85, 0x8012FB85, 0x8013FB85, + 0x8014FB85, 0x8015FB85, 0x8016FB85, 0x8017FB85, 0x8018FB85, 0x8019FB85, 0x801AFB85, 0x801BFB85, 0x801CFB85, 0x801DFB85, 0x801EFB85, 0x801FFB85, 0x8020FB85, 0x8021FB85, 0x8022FB85, + 0x8023FB85, 0x8024FB85, 0x8025FB85, 0x8026FB85, 0x8027FB85, 0x8028FB85, 0x8029FB85, 0x802AFB85, 0x802BFB85, 0x802CFB85, 0x802DFB85, 0x802EFB85, 0x802FFB85, 0x8030FB85, 0x8031FB85, + 0x8032FB85, 0x8033FB85, 0x8034FB85, 0x8035FB85, 0x8036FB85, 0x8037FB85, 0x8038FB85, 0x8039FB85, 0x803AFB85, 0x803BFB85, 0x803CFB85, 0x803DFB85, 0x803EFB85, 0x803FFB85, 0x8040FB85, + 0x8041FB85, 0x8042FB85, 0x8043FB85, 0x8044FB85, 0x8045FB85, 0x8046FB85, 0x8047FB85, 0x8048FB85, 0x8049FB85, 0x804AFB85, 0x804BFB85, 0x804CFB85, 0x804DFB85, 0x804EFB85, 0x804FFB85, + 0x8050FB85, 0x8051FB85, 0x8052FB85, 0x8053FB85, 0x8054FB85, 0x8055FB85, 0x8056FB85, 0x8057FB85, 0x8058FB85, 0x8059FB85, 0x805AFB85, 0x805BFB85, 0x805CFB85, 0x805DFB85, 0x805EFB85, + 0x805FFB85, 0x8060FB85, 0x8061FB85, 0x8062FB85, 0x8063FB85, 0x8064FB85, 0x8065FB85, 0x8066FB85, 0x8067FB85, 0x8068FB85, 0x8069FB85, 0x806AFB85, 0x806BFB85, 0x806CFB85, 0x806DFB85, + 0x806EFB85, 0x806FFB85, 0x8070FB85, 0x8071FB85, 0x8072FB85, 0x8073FB85, 0x8074FB85, 0x8075FB85, 0x8076FB85, 0x8077FB85, 0x8078FB85, 0x8079FB85, 0x807AFB85, 0x807BFB85, 0x807CFB85, + 0x807DFB85, 0x807EFB85, 0x807FFB85, 0x8080FB85, 0x8081FB85, 0x8082FB85, 0x8083FB85, 0x8084FB85, 0x8085FB85, 0x8086FB85, 0x8087FB85, 0x8088FB85, 0x8089FB85, 0x808AFB85, 0x808BFB85, + 0x808CFB85, 0x808DFB85, 0x808EFB85, 0x808FFB85, 0x8090FB85, 0x8091FB85, 0x8092FB85, 0x8093FB85, 0x8094FB85, 0x8095FB85, 0x8096FB85, 0x8097FB85, 0x8098FB85, 0x8099FB85, 0x809AFB85, + 0x809BFB85, 0x809CFB85, 0x809DFB85, 0x809EFB85, 0x809FFB85, 0x80A0FB85, 0x80A1FB85, 0x80A2FB85, 0x80A3FB85, 0x80A4FB85, 0x80A5FB85, 0x80A6FB85, 0x80A7FB85, 0x80A8FB85, 0x80A9FB85, + 0x80AAFB85, 0x80ABFB85, 0x80ACFB85, 0x80ADFB85, 0x80AEFB85, 0x80AFFB85, 0x80B0FB85, 0x80B1FB85, 0x80B2FB85, 0x80B3FB85, 0x80B4FB85, 0x80B5FB85, 0x80B6FB85, 0x80B7FB85, 0x80B8FB85, + 0x80B9FB85, 0x80BAFB85, 0x80BBFB85, 0x80BCFB85, 0x80BDFB85, 0x80BEFB85, 0x80BFFB85, 0x80C0FB85, 0x80C1FB85, 0x80C2FB85, 0x80C3FB85, 0x80C4FB85, 0x80C5FB85, 0x80C6FB85, 0x80C7FB85, + 0x80C8FB85, 0x80C9FB85, 0x80CAFB85, 0x80CBFB85, 0x80CCFB85, 0x80CDFB85, 0x80CEFB85, 0x80CFFB85, 0x80D0FB85, 0x80D1FB85, 0x80D2FB85, 0x80D3FB85, 0x80D4FB85, 0x80D5FB85, 0x80D6FB85, + 0x80D7FB85, 0x80D8FB85, 0x80D9FB85, 0x80DAFB85, 0x80DBFB85, 0x80DCFB85, 0x80DDFB85, 0x80DEFB85, 0x80DFFB85, 0x80E0FB85, 0x80E1FB85, 0x80E2FB85, 0x80E3FB85, 0x80E4FB85, 0x80E5FB85, + 0x80E6FB85, 0x80E7FB85, 0x80E8FB85, 0x80E9FB85, 0x80EAFB85, 0x80EBFB85, 0x80ECFB85, 0x80EDFB85, 0x80EEFB85, 0x80EFFB85, 0x80F0FB85, 0x80F1FB85, 0x80F2FB85, 0x80F3FB85, 0x80F4FB85, + 0x80F5FB85, 0x80F6FB85, 0x80F7FB85, 0x80F8FB85, 0x80F9FB85, 0x80FAFB85, 0x80FBFB85, 0x80FCFB85, 0x80FDFB85, 0x80FEFB85, 0x80FFFB85, 0x8100FB85, 0x8101FB85, 0x8102FB85, 0x8103FB85, + 0x8104FB85, 0x8105FB85, 0x8106FB85, 0x8107FB85, 0x8108FB85, 0x8109FB85, 0x810AFB85, 0x810BFB85, 0x810CFB85, 0x810DFB85, 0x810EFB85, 0x810FFB85, 0x8110FB85, 0x8111FB85, 0x8112FB85, + 0x8113FB85, 0x8114FB85, 0x8115FB85, 0x8116FB85, 0x8117FB85, 0x8118FB85, 0x8119FB85, 0x811AFB85, 0x811BFB85, 0x811CFB85, 0x811DFB85, 0x811EFB85, 0x811FFB85, 0x8120FB85, 0x8121FB85, + 0x8122FB85, 0x8123FB85, 0x8124FB85, 0x8125FB85, 0x8126FB85, 0x8127FB85, 0x8128FB85, 0x8129FB85, 0x812AFB85, 0x812BFB85, 0x812CFB85, 0x812DFB85, 0x812EFB85, 0x812FFB85, 0x8130FB85, + 0x8131FB85, 0x8132FB85, 0x8133FB85, 0x8134FB85, 0x8135FB85, 0x8136FB85, 0x8137FB85, 0x8138FB85, 0x8139FB85, 0x813AFB85, 0x813BFB85, 0x813CFB85, 0x813DFB85, 0x813EFB85, 0x813FFB85, + 0x8140FB85, 0x8141FB85, 0x8142FB85, 0x8143FB85, 0x8144FB85, 0x8145FB85, 0x8146FB85, 0x8147FB85, 0x8148FB85, 0x8149FB85, 0x814AFB85, 0x814BFB85, 0x814CFB85, 0x814DFB85, 0x814EFB85, + 0x814FFB85, 0x8150FB85, 0x8151FB85, 0x8152FB85, 0x8153FB85, 0x8154FB85, 0x8155FB85, 0x8156FB85, 0x8157FB85, 0x8158FB85, 0x8159FB85, 0x815AFB85, 0x815BFB85, 0x815CFB85, 0x815DFB85, + 0x815EFB85, 0x815FFB85, 0x8160FB85, 0x8161FB85, 0x8162FB85, 0x8163FB85, 0x8164FB85, 0x8165FB85, 0x8166FB85, 0x8167FB85, 0x8168FB85, 0x8169FB85, 0x816AFB85, 0x816BFB85, 0x816CFB85, + 0x816DFB85, 0x816EFB85, 0x816FFB85, 0x8170FB85, 0x8171FB85, 0x8172FB85, 0x8173FB85, 0x8174FB85, 0x8175FB85, 0x8176FB85, 0x8177FB85, 0x8178FB85, 0x8179FB85, 0x817AFB85, 0x817BFB85, + 0x817CFB85, 0x817DFB85, 0x817EFB85, 0x817FFB85, 0x8180FB85, 0x8181FB85, 0x8182FB85, 0x8183FB85, 0x8184FB85, 0x8185FB85, 0x8186FB85, 0x8187FB85, 0x8188FB85, 0x8189FB85, 0x818AFB85, + 0x818BFB85, 0x818CFB85, 0x818DFB85, 0x818EFB85, 0x818FFB85, 0x8190FB85, 0x8191FB85, 0x8192FB85, 0x8193FB85, 0x8194FB85, 0x8195FB85, 0x8196FB85, 0x8197FB85, 0x8198FB85, 0x8199FB85, + 0x819AFB85, 0x819BFB85, 0x819CFB85, 0x819DFB85, 0x819EFB85, 0x819FFB85, 0x81A0FB85, 0x81A1FB85, 0x81A2FB85, 0x81A3FB85, 0x81A4FB85, 0x81A5FB85, 0x81A6FB85, 0x81A7FB85, 0x81A8FB85, + 0x81A9FB85, 0x81AAFB85, 0x81ABFB85, 0x81ACFB85, 0x81ADFB85, 0x81AEFB85, 0x81AFFB85, 0x81B0FB85, 0x81B1FB85, 0x81B2FB85, 0x81B3FB85, 0x81B4FB85, 0x81B5FB85, 0x81B6FB85, 0x81B7FB85, + 0x81B8FB85, 0x81B9FB85, 0x81BAFB85, 0x81BBFB85, 0x81BCFB85, 0x81BDFB85, 0x81BEFB85, 0x81BFFB85, 0x81C0FB85, 0x81C1FB85, 0x81C2FB85, 0x81C3FB85, 0x81C4FB85, 0x81C5FB85, 0x81C6FB85, + 0x81C7FB85, 0x81C8FB85, 0x81C9FB85, 0x81CAFB85, 0x81CBFB85, 0x81CCFB85, 0x81CDFB85, 0x81CEFB85, 0x81CFFB85, 0x81D0FB85, 0x81D1FB85, 0x81D2FB85, 0x81D3FB85, 0x81D4FB85, 0x81D5FB85, + 0x81D6FB85, 0x81D7FB85, 0x81D8FB85, 0x81D9FB85, 0x81DAFB85, 0x81DBFB85, 0x81DCFB85, 0x81DDFB85, 0x81DEFB85, 0x81DFFB85, 0x81E0FB85, 0x81E1FB85, 0x81E2FB85, 0x81E3FB85, 0x81E4FB85, + 0x81E5FB85, 0x81E6FB85, 0x81E7FB85, 0x81E8FB85, 0x81E9FB85, 0x81EAFB85, 0x81EBFB85, 0x81ECFB85, 0x81EDFB85, 0x81EEFB85, 0x81EFFB85, 0x81F0FB85, 0x81F1FB85, 0x81F2FB85, 0x81F3FB85, + 0x81F4FB85, 0x81F5FB85, 0x81F6FB85, 0x81F7FB85, 0x81F8FB85, 0x81F9FB85, 0x81FAFB85, 0x81FBFB85, 0x81FCFB85, 0x81FDFB85, 0x81FEFB85, 0x81FFFB85, 0x8200FB85, 0x8201FB85, 0x8202FB85, + 0x8203FB85, 0x8204FB85, 0x8205FB85, 0x8206FB85, 0x8207FB85, 0x8208FB85, 0x8209FB85, 0x820AFB85, 0x820BFB85, 0x820CFB85, 0x820DFB85, 0x820EFB85, 0x820FFB85, 0x8210FB85, 0x8211FB85, + 0x8212FB85, 0x8213FB85, 0x8214FB85, 0x8215FB85, 0x8216FB85, 0x8217FB85, 0x8218FB85, 0x8219FB85, 0x821AFB85, 0x821BFB85, 0x821CFB85, 0x821DFB85, 0x821EFB85, 0x821FFB85, 0x8220FB85, + 0x8221FB85, 0x8222FB85, 0x8223FB85, 0x8224FB85, 0x8225FB85, 0x8226FB85, 0x8227FB85, 0x8228FB85, 0x8229FB85, 0x822AFB85, 0x822BFB85, 0x822CFB85, 0x822DFB85, 0x822EFB85, 0x822FFB85, + 0x8230FB85, 0x8231FB85, 0x8232FB85, 0x8233FB85, 0x8234FB85, 0x8235FB85, 0x8236FB85, 0x8237FB85, 0x8238FB85, 0x8239FB85, 0x823AFB85, 0x823BFB85, 0x823CFB85, 0x823DFB85, 0x823EFB85, + 0x823FFB85, 0x8240FB85, 0x8241FB85, 0x8242FB85, 0x8243FB85, 0x8244FB85, 0x8245FB85, 0x8246FB85, 0x8247FB85, 0x8248FB85, 0x8249FB85, 0x824AFB85, 0x824BFB85, 0x824CFB85, 0x824DFB85, + 0x824EFB85, 0x824FFB85, 0x8250FB85, 0x8251FB85, 0x8252FB85, 0x8253FB85, 0x8254FB85, 0x8255FB85, 0x8256FB85, 0x8257FB85, 0x8258FB85, 0x8259FB85, 0x825AFB85, 0x825BFB85, 0x825CFB85, + 0x825DFB85, 0x825EFB85, 0x825FFB85, 0x8260FB85, 0x8261FB85, 0x8262FB85, 0x8263FB85, 0x8264FB85, 0x8265FB85, 0x8266FB85, 0x8267FB85, 0x8268FB85, 0x8269FB85, 0x826AFB85, 0x826BFB85, + 0x826CFB85, 0x826DFB85, 0x826EFB85, 0x826FFB85, 0x8270FB85, 0x8271FB85, 0x8272FB85, 0x8273FB85, 0x8274FB85, 0x8275FB85, 0x8276FB85, 0x8277FB85, 0x8278FB85, 0x8279FB85, 0x827AFB85, + 0x827BFB85, 0x827CFB85, 0x827DFB85, 0x827EFB85, 0x827FFB85, 0x8280FB85, 0x8281FB85, 0x8282FB85, 0x8283FB85, 0x8284FB85, 0x8285FB85, 0x8286FB85, 0x8287FB85, 0x8288FB85, 0x8289FB85, + 0x828AFB85, 0x828BFB85, 0x828CFB85, 0x828DFB85, 0x828EFB85, 0x828FFB85, 0x8290FB85, 0x8291FB85, 0x8292FB85, 0x8293FB85, 0x8294FB85, 0x8295FB85, 0x8296FB85, 0x8297FB85, 0x8298FB85, + 0x8299FB85, 0x829AFB85, 0x829BFB85, 0x829CFB85, 0x829DFB85, 0x829EFB85, 0x829FFB85, 0x82A0FB85, 0x82A1FB85, 0x82A2FB85, 0x82A3FB85, 0x82A4FB85, 0x82A5FB85, 0x82A6FB85, 0x82A7FB85, + 0x82A8FB85, 0x82A9FB85, 0x82AAFB85, 0x82ABFB85, 0x82ACFB85, 0x82ADFB85, 0x82AEFB85, 0x82AFFB85, 0x82B0FB85, 0x82B1FB85, 0x82B2FB85, 0x82B3FB85, 0x82B4FB85, 0x82B5FB85, 0x82B6FB85, + 0x82B7FB85, 0x82B8FB85, 0x82B9FB85, 0x82BAFB85, 0x82BBFB85, 0x82BCFB85, 0x82BDFB85, 0x82BEFB85, 0x82BFFB85, 0x82C0FB85, 0x82C1FB85, 0x82C2FB85, 0x82C3FB85, 0x82C4FB85, 0x82C5FB85, + 0x82C6FB85, 0x82C7FB85, 0x82C8FB85, 0x82C9FB85, 0x82CAFB85, 0x82CBFB85, 0x82CCFB85, 0x82CDFB85, 0x82CEFB85, 0x82CFFB85, 0x82D0FB85, 0x82D1FB85, 0x82D2FB85, 0x82D3FB85, 0x82D4FB85, + 0x82D5FB85, 0x82D6FB85, 0x82D7FB85, 0x82D8FB85, 0x82D9FB85, 0x82DAFB85, 0x82DBFB85, 0x82DCFB85, 0x82DDFB85, 0x82DEFB85, 0x82DFFB85, 0x82E0FB85, 0x82E1FB85, 0x82E2FB85, 0x82E3FB85, + 0x82E4FB85, 0x82E5FB85, 0x82E6FB85, 0x82E7FB85, 0x82E8FB85, 0x82E9FB85, 0x82EAFB85, 0x82EBFB85, 0x82ECFB85, 0x82EDFB85, 0x82EEFB85, 0x82EFFB85, 0x82F0FB85, 0x82F1FB85, 0x82F2FB85, + 0x82F3FB85, 0x82F4FB85, 0x82F5FB85, 0x82F6FB85, 0x82F7FB85, 0x82F8FB85, 0x82F9FB85, 0x82FAFB85, 0x82FBFB85, 0x82FCFB85, 0x82FDFB85, 0x82FEFB85, 0x82FFFB85, 0x8300FB85, 0x8301FB85, + 0x8302FB85, 0x8303FB85, 0x8304FB85, 0x8305FB85, 0x8306FB85, 0x8307FB85, 0x8308FB85, 0x8309FB85, 0x830AFB85, 0x830BFB85, 0x830CFB85, 0x830DFB85, 0x830EFB85, 0x830FFB85, 0x8310FB85, + 0x8311FB85, 0x8312FB85, 0x8313FB85, 0x8314FB85, 0x8315FB85, 0x8316FB85, 0x8317FB85, 0x8318FB85, 0x8319FB85, 0x831AFB85, 0x831BFB85, 0x831CFB85, 0x831DFB85, 0x831EFB85, 0x831FFB85, + 0x8320FB85, 0x8321FB85, 0x8322FB85, 0x8323FB85, 0x8324FB85, 0x8325FB85, 0x8326FB85, 0x8327FB85, 0x8328FB85, 0x8329FB85, 0x832AFB85, 0x832BFB85, 0x832CFB85, 0x832DFB85, 0x832EFB85, + 0x832FFB85, 0x8330FB85, 0x8331FB85, 0x8332FB85, 0x8333FB85, 0x8334FB85, 0x8335FB85, 0x8336FB85, 0x8337FB85, 0x8338FB85, 0x8339FB85, 0x833AFB85, 0x833BFB85, 0x833CFB85, 0x833DFB85, + 0x833EFB85, 0x833FFB85, 0x8340FB85, 0x8341FB85, 0x8342FB85, 0x8343FB85, 0x8344FB85, 0x8345FB85, 0x8346FB85, 0x8347FB85, 0x8348FB85, 0x8349FB85, 0x834AFB85, 0x834BFB85, 0x834CFB85, + 0x834DFB85, 0x834EFB85, 0x834FFB85, 0x8350FB85, 0x8351FB85, 0x8352FB85, 0x8353FB85, 0x8354FB85, 0x8355FB85, 0x8356FB85, 0x8357FB85, 0x8358FB85, 0x8359FB85, 0x835AFB85, 0x835BFB85, + 0x835CFB85, 0x835DFB85, 0x835EFB85, 0x835FFB85, 0x8360FB85, 0x8361FB85, 0x8362FB85, 0x8363FB85, 0x8364FB85, 0x8365FB85, 0x8366FB85, 0x8367FB85, 0x8368FB85, 0x8369FB85, 0x836AFB85, + 0x836BFB85, 0x836CFB85, 0x836DFB85, 0x836EFB85, 0x836FFB85, 0x8370FB85, 0x8371FB85, 0x8372FB85, 0x8373FB85, 0x8374FB85, 0x8375FB85, 0x8376FB85, 0x8377FB85, 0x8378FB85, 0x8379FB85, + 0x837AFB85, 0x837BFB85, 0x837CFB85, 0x837DFB85, 0x837EFB85, 0x837FFB85, 0x8380FB85, 0x8381FB85, 0x8382FB85, 0x8383FB85, 0x8384FB85, 0x8385FB85, 0x8386FB85, 0x8387FB85, 0x8388FB85, + 0x8389FB85, 0x838AFB85, 0x838BFB85, 0x838CFB85, 0x838DFB85, 0x838EFB85, 0x838FFB85, 0x8390FB85, 0x8391FB85, 0x8392FB85, 0x8393FB85, 0x8394FB85, 0x8395FB85, 0x8396FB85, 0x8397FB85, + 0x8398FB85, 0x8399FB85, 0x839AFB85, 0x839BFB85, 0x839CFB85, 0x839DFB85, 0x839EFB85, 0x839FFB85, 0x83A0FB85, 0x83A1FB85, 0x83A2FB85, 0x83A3FB85, 0x83A4FB85, 0x83A5FB85, 0x83A6FB85, + 0x83A7FB85, 0x83A8FB85, 0x83A9FB85, 0x83AAFB85, 0x83ABFB85, 0x83ACFB85, 0x83ADFB85, 0x83AEFB85, 0x83AFFB85, 0x83B0FB85, 0x83B1FB85, 0x83B2FB85, 0x83B3FB85, 0x83B4FB85, 0x83B5FB85, + 0x83B6FB85, 0x83B7FB85, 0x83B8FB85, 0x83B9FB85, 0x83BAFB85, 0x83BBFB85, 0x83BCFB85, 0x83BDFB85, 0x83BEFB85, 0x83BFFB85, 0x83C0FB85, 0x83C1FB85, 0x83C2FB85, 0x83C3FB85, 0x83C4FB85, + 0x83C5FB85, 0x83C6FB85, 0x83C7FB85, 0x83C8FB85, 0x83C9FB85, 0x83CAFB85, 0x83CBFB85, 0x83CCFB85, 0x83CDFB85, 0x83CEFB85, 0x83CFFB85, 0x83D0FB85, 0x83D1FB85, 0x83D2FB85, 0x83D3FB85, + 0x83D4FB85, 0x83D5FB85, 0x83D6FB85, 0x83D7FB85, 0x83D8FB85, 0x83D9FB85, 0x83DAFB85, 0x83DBFB85, 0x83DCFB85, 0x83DDFB85, 0x83DEFB85, 0x83DFFB85, 0x83E0FB85, 0x83E1FB85, 0x83E2FB85, + 0x83E3FB85, 0x83E4FB85, 0x83E5FB85, 0x83E6FB85, 0x83E7FB85, 0x83E8FB85, 0x83E9FB85, 0x83EAFB85, 0x83EBFB85, 0x83ECFB85, 0x83EDFB85, 0x83EEFB85, 0x83EFFB85, 0x83F0FB85, 0x83F1FB85, + 0x83F2FB85, 0x83F3FB85, 0x83F4FB85, 0x83F5FB85, 0x83F6FB85, 0x83F7FB85, 0x83F8FB85, 0x83F9FB85, 0x83FAFB85, 0x83FBFB85, 0x83FCFB85, 0x83FDFB85, 0x83FEFB85, 0x83FFFB85, 0x8400FB85, + 0x8401FB85, 0x8402FB85, 0x8403FB85, 0x8404FB85, 0x8405FB85, 0x8406FB85, 0x8407FB85, 0x8408FB85, 0x8409FB85, 0x840AFB85, 0x840BFB85, 0x840CFB85, 0x840DFB85, 0x840EFB85, 0x840FFB85, + 0x8410FB85, 0x8411FB85, 0x8412FB85, 0x8413FB85, 0x8414FB85, 0x8415FB85, 0x8416FB85, 0x8417FB85, 0x8418FB85, 0x8419FB85, 0x841AFB85, 0x841BFB85, 0x841CFB85, 0x841DFB85, 0x841EFB85, + 0x841FFB85, 0x8420FB85, 0x8421FB85, 0x8422FB85, 0x8423FB85, 0x8424FB85, 0x8425FB85, 0x8426FB85, 0x8427FB85, 0x8428FB85, 0x8429FB85, 0x842AFB85, 0x842BFB85, 0x842CFB85, 0x842DFB85, + 0x842EFB85, 0x842FFB85, 0x8430FB85, 0x8431FB85, 0x8432FB85, 0x8433FB85, 0x8434FB85, 0x8435FB85, 0x8436FB85, 0x8437FB85, 0x8438FB85, 0x8439FB85, 0x843AFB85, 0x843BFB85, 0x843CFB85, + 0x843DFB85, 0x843EFB85, 0x843FFB85, 0x8440FB85, 0x8441FB85, 0x8442FB85, 0x8443FB85, 0x8444FB85, 0x8445FB85, 0x8446FB85, 0x8447FB85, 0x8448FB85, 0x8449FB85, 0x844AFB85, 0x844BFB85, + 0x844CFB85, 0x844DFB85, 0x844EFB85, 0x844FFB85, 0x8450FB85, 0x8451FB85, 0x8452FB85, 0x8453FB85, 0x8454FB85, 0x8455FB85, 0x8456FB85, 0x8457FB85, 0x8458FB85, 0x8459FB85, 0x845AFB85, + 0x845BFB85, 0x845CFB85, 0x845DFB85, 0x845EFB85, 0x845FFB85, 0x8460FB85, 0x8461FB85, 0x8462FB85, 0x8463FB85, 0x8464FB85, 0x8465FB85, 0x8466FB85, 0x8467FB85, 0x8468FB85, 0x8469FB85, + 0x846AFB85, 0x846BFB85, 0x846CFB85, 0x846DFB85, 0x846EFB85, 0x846FFB85, 0x8470FB85, 0x8471FB85, 0x8472FB85, 0x8473FB85, 0x8474FB85, 0x8475FB85, 0x8476FB85, 0x8477FB85, 0x8478FB85, + 0x8479FB85, 0x847AFB85, 0x847BFB85, 0x847CFB85, 0x847DFB85, 0x847EFB85, 0x847FFB85, 0x8480FB85, 0x8481FB85, 0x8482FB85, 0x8483FB85, 0x8484FB85, 0x8485FB85, 0x8486FB85, 0x8487FB85, + 0x8488FB85, 0x8489FB85, 0x848AFB85, 0x848BFB85, 0x848CFB85, 0x848DFB85, 0x848EFB85, 0x848FFB85, 0x8490FB85, 0x8491FB85, 0x8492FB85, 0x8493FB85, 0x8494FB85, 0x8495FB85, 0x8496FB85, + 0x8497FB85, 0x8498FB85, 0x8499FB85, 0x849AFB85, 0x849BFB85, 0x849CFB85, 0x849DFB85, 0x849EFB85, 0x849FFB85, 0x84A0FB85, 0x84A1FB85, 0x84A2FB85, 0x84A3FB85, 0x84A4FB85, 0x84A5FB85, + 0x84A6FB85, 0x84A7FB85, 0x84A8FB85, 0x84A9FB85, 0x84AAFB85, 0x84ABFB85, 0x84ACFB85, 0x84ADFB85, 0x84AEFB85, 0x84AFFB85, 0x84B0FB85, 0x84B1FB85, 0x84B2FB85, 0x84B3FB85, 0x84B4FB85, + 0x84B5FB85, 0x84B6FB85, 0x84B7FB85, 0x84B8FB85, 0x84B9FB85, 0x84BAFB85, 0x84BBFB85, 0x84BCFB85, 0x84BDFB85, 0x84BEFB85, 0x84BFFB85, 0x84C0FB85, 0x84C1FB85, 0x84C2FB85, 0x84C3FB85, + 0x84C4FB85, 0x84C5FB85, 0x84C6FB85, 0x84C7FB85, 0x84C8FB85, 0x84C9FB85, 0x84CAFB85, 0x84CBFB85, 0x84CCFB85, 0x84CDFB85, 0x84CEFB85, 0x84CFFB85, 0x84D0FB85, 0x84D1FB85, 0x84D2FB85, + 0x84D3FB85, 0x84D4FB85, 0x84D5FB85, 0x84D6FB85, 0x84D7FB85, 0x84D8FB85, 0x84D9FB85, 0x84DAFB85, 0x84DBFB85, 0x84DCFB85, 0x84DDFB85, 0x84DEFB85, 0x84DFFB85, 0x84E0FB85, 0x84E1FB85, + 0x84E2FB85, 0x84E3FB85, 0x84E4FB85, 0x84E5FB85, 0x84E6FB85, 0x84E7FB85, 0x84E8FB85, 0x84E9FB85, 0x84EAFB85, 0x84EBFB85, 0x84ECFB85, 0x84EDFB85, 0x84EEFB85, 0x84EFFB85, 0x84F0FB85, + 0x84F1FB85, 0x84F2FB85, 0x84F3FB85, 0x84F4FB85, 0x84F5FB85, 0x84F6FB85, 0x84F7FB85, 0x84F8FB85, 0x84F9FB85, 0x84FAFB85, 0x84FBFB85, 0x84FCFB85, 0x84FDFB85, 0x84FEFB85, 0x84FFFB85, + 0x8500FB85, 0x8501FB85, 0x8502FB85, 0x8503FB85, 0x8504FB85, 0x8505FB85, 0x8506FB85, 0x8507FB85, 0x8508FB85, 0x8509FB85, 0x850AFB85, 0x850BFB85, 0x850CFB85, 0x850DFB85, 0x850EFB85, + 0x850FFB85, 0x8510FB85, 0x8511FB85, 0x8512FB85, 0x8513FB85, 0x8514FB85, 0x8515FB85, 0x8516FB85, 0x8517FB85, 0x8518FB85, 0x8519FB85, 0x851AFB85, 0x851BFB85, 0x851CFB85, 0x851DFB85, + 0x851EFB85, 0x851FFB85, 0x8520FB85, 0x8521FB85, 0x8522FB85, 0x8523FB85, 0x8524FB85, 0x8525FB85, 0x8526FB85, 0x8527FB85, 0x8528FB85, 0x8529FB85, 0x852AFB85, 0x852BFB85, 0x852CFB85, + 0x852DFB85, 0x852EFB85, 0x852FFB85, 0x8530FB85, 0x8531FB85, 0x8532FB85, 0x8533FB85, 0x8534FB85, 0x8535FB85, 0x8536FB85, 0x8537FB85, 0x8538FB85, 0x8539FB85, 0x853AFB85, 0x853BFB85, + 0x853CFB85, 0x853DFB85, 0x853EFB85, 0x853FFB85, 0x8540FB85, 0x8541FB85, 0x8542FB85, 0x8543FB85, 0x8544FB85, 0x8545FB85, 0x8546FB85, 0x8547FB85, 0x8548FB85, 0x8549FB85, 0x854AFB85, + 0x854BFB85, 0x854CFB85, 0x854DFB85, 0x854EFB85, 0x854FFB85, 0x8550FB85, 0x8551FB85, 0x8552FB85, 0x8553FB85, 0x8554FB85, 0x8555FB85, 0x8556FB85, 0x8557FB85, 0x8558FB85, 0x8559FB85, + 0x855AFB85, 0x855BFB85, 0x855CFB85, 0x855DFB85, 0x855EFB85, 0x855FFB85, 0x8560FB85, 0x8561FB85, 0x8562FB85, 0x8563FB85, 0x8564FB85, 0x8565FB85, 0x8566FB85, 0x8567FB85, 0x8568FB85, + 0x8569FB85, 0x856AFB85, 0x856BFB85, 0x856CFB85, 0x856DFB85, 0x856EFB85, 0x856FFB85, 0x8570FB85, 0x8571FB85, 0x8572FB85, 0x8573FB85, 0x8574FB85, 0x8575FB85, 0x8576FB85, 0x8577FB85, + 0x8578FB85, 0x8579FB85, 0x857AFB85, 0x857BFB85, 0x857CFB85, 0x857DFB85, 0x857EFB85, 0x857FFB85, 0x8580FB85, 0x8581FB85, 0x8582FB85, 0x8583FB85, 0x8584FB85, 0x8585FB85, 0x8586FB85, + 0x8587FB85, 0x8588FB85, 0x8589FB85, 0x858AFB85, 0x858BFB85, 0x858CFB85, 0x858DFB85, 0x858EFB85, 0x858FFB85, 0x8590FB85, 0x8591FB85, 0x8592FB85, 0x8593FB85, 0x8594FB85, 0x8595FB85, + 0x8596FB85, 0x8597FB85, 0x8598FB85, 0x8599FB85, 0x859AFB85, 0x859BFB85, 0x859CFB85, 0x859DFB85, 0x859EFB85, 0x859FFB85, 0x85A0FB85, 0x85A1FB85, 0x85A2FB85, 0x85A3FB85, 0x85A4FB85, + 0x85A5FB85, 0x85A6FB85, 0x85A7FB85, 0x85A8FB85, 0x85A9FB85, 0x85AAFB85, 0x85ABFB85, 0x85ACFB85, 0x85ADFB85, 0x85AEFB85, 0x85AFFB85, 0x85B0FB85, 0x85B1FB85, 0x85B2FB85, 0x85B3FB85, + 0x85B4FB85, 0x85B5FB85, 0x85B6FB85, 0x85B7FB85, 0x85B8FB85, 0x85B9FB85, 0x85BAFB85, 0x85BBFB85, 0x85BCFB85, 0x85BDFB85, 0x85BEFB85, 0x85BFFB85, 0x85C0FB85, 0x85C1FB85, 0x85C2FB85, + 0x85C3FB85, 0x85C4FB85, 0x85C5FB85, 0x85C6FB85, 0x85C7FB85, 0x85C8FB85, 0x85C9FB85, 0x85CAFB85, 0x85CBFB85, 0x85CCFB85, 0x85CDFB85, 0x85CEFB85, 0x85CFFB85, 0x85D0FB85, 0x85D1FB85, + 0x85D2FB85, 0x85D3FB85, 0x85D4FB85, 0x85D5FB85, 0x85D6FB85, 0x85D7FB85, 0x85D8FB85, 0x85D9FB85, 0x85DAFB85, 0x85DBFB85, 0x85DCFB85, 0x85DDFB85, 0x85DEFB85, 0x85DFFB85, 0x85E0FB85, + 0x85E1FB85, 0x85E2FB85, 0x85E3FB85, 0x85E4FB85, 0x85E5FB85, 0x85E6FB85, 0x85E7FB85, 0x85E8FB85, 0x85E9FB85, 0x85EAFB85, 0x85EBFB85, 0x85ECFB85, 0x85EDFB85, 0x85EEFB85, 0x85EFFB85, + 0x85F0FB85, 0x85F1FB85, 0x85F2FB85, 0x85F3FB85, 0x85F4FB85, 0x85F5FB85, 0x85F6FB85, 0x85F7FB85, 0x85F8FB85, 0x85F9FB85, 0x85FAFB85, 0x85FBFB85, 0x85FCFB85, 0x85FDFB85, 0x85FEFB85, + 0x85FFFB85, 0x8600FB85, 0x8601FB85, 0x8602FB85, 0x8603FB85, 0x8604FB85, 0x8605FB85, 0x8606FB85, 0x8607FB85, 0x8608FB85, 0x8609FB85, 0x860AFB85, 0x860BFB85, 0x860CFB85, 0x860DFB85, + 0x860EFB85, 0x860FFB85, 0x8610FB85, 0x8611FB85, 0x8612FB85, 0x8613FB85, 0x8614FB85, 0x8615FB85, 0x8616FB85, 0x8617FB85, 0x8618FB85, 0x8619FB85, 0x861AFB85, 0x861BFB85, 0x861CFB85, + 0x861DFB85, 0x861EFB85, 0x861FFB85, 0x8620FB85, 0x8621FB85, 0x8622FB85, 0x8623FB85, 0x8624FB85, 0x8625FB85, 0x8626FB85, 0x8627FB85, 0x8628FB85, 0x8629FB85, 0x862AFB85, 0x862BFB85, + 0x862CFB85, 0x862DFB85, 0x862EFB85, 0x862FFB85, 0x8630FB85, 0x8631FB85, 0x8632FB85, 0x8633FB85, 0x8634FB85, 0x8635FB85, 0x8636FB85, 0x8637FB85, 0x8638FB85, 0x8639FB85, 0x863AFB85, + 0x863BFB85, 0x863CFB85, 0x863DFB85, 0x863EFB85, 0x863FFB85, 0x8640FB85, 0x8641FB85, 0x8642FB85, 0x8643FB85, 0x8644FB85, 0x8645FB85, 0x8646FB85, 0x8647FB85, 0x8648FB85, 0x8649FB85, + 0x864AFB85, 0x864BFB85, 0x864CFB85, 0x864DFB85, 0x864EFB85, 0x864FFB85, 0x8650FB85, 0x8651FB85, 0x8652FB85, 0x8653FB85, 0x8654FB85, 0x8655FB85, 0x8656FB85, 0x8657FB85, 0x8658FB85, + 0x8659FB85, 0x865AFB85, 0x865BFB85, 0x865CFB85, 0x865DFB85, 0x865EFB85, 0x865FFB85, 0x8660FB85, 0x8661FB85, 0x8662FB85, 0x8663FB85, 0x8664FB85, 0x8665FB85, 0x8666FB85, 0x8667FB85, + 0x8668FB85, 0x8669FB85, 0x866AFB85, 0x866BFB85, 0x866CFB85, 0x866DFB85, 0x866EFB85, 0x866FFB85, 0x8670FB85, 0x8671FB85, 0x8672FB85, 0x8673FB85, 0x8674FB85, 0x8675FB85, 0x8676FB85, + 0x8677FB85, 0x8678FB85, 0x8679FB85, 0x867AFB85, 0x867BFB85, 0x867CFB85, 0x867DFB85, 0x867EFB85, 0x867FFB85, 0x8680FB85, 0x8681FB85, 0x8682FB85, 0x8683FB85, 0x8684FB85, 0x8685FB85, + 0x8686FB85, 0x8687FB85, 0x8688FB85, 0x8689FB85, 0x868AFB85, 0x868BFB85, 0x868CFB85, 0x868DFB85, 0x868EFB85, 0x868FFB85, 0x8690FB85, 0x8691FB85, 0x8692FB85, 0x8693FB85, 0x8694FB85, + 0x8695FB85, 0x8696FB85, 0x8697FB85, 0x8698FB85, 0x8699FB85, 0x869AFB85, 0x869BFB85, 0x869CFB85, 0x869DFB85, 0x869EFB85, 0x869FFB85, 0x86A0FB85, 0x86A1FB85, 0x86A2FB85, 0x86A3FB85, + 0x86A4FB85, 0x86A5FB85, 0x86A6FB85, 0x86A7FB85, 0x86A8FB85, 0x86A9FB85, 0x86AAFB85, 0x86ABFB85, 0x86ACFB85, 0x86ADFB85, 0x86AEFB85, 0x86AFFB85, 0x86B0FB85, 0x86B1FB85, 0x86B2FB85, + 0x86B3FB85, 0x86B4FB85, 0x86B5FB85, 0x86B6FB85, 0x86B7FB85, 0x86B8FB85, 0x86B9FB85, 0x86BAFB85, 0x86BBFB85, 0x86BCFB85, 0x86BDFB85, 0x86BEFB85, 0x86BFFB85, 0x86C0FB85, 0x86C1FB85, + 0x86C2FB85, 0x86C3FB85, 0x86C4FB85, 0x86C5FB85, 0x86C6FB85, 0x86C7FB85, 0x86C8FB85, 0x86C9FB85, 0x86CAFB85, 0x86CBFB85, 0x86CCFB85, 0x86CDFB85, 0x86CEFB85, 0x86CFFB85, 0x86D0FB85, + 0x86D1FB85, 0x86D2FB85, 0x86D3FB85, 0x86D4FB85, 0x86D5FB85, 0x86D6FB85, 0x86D7FB85, 0x86D8FB85, 0x86D9FB85, 0x86DAFB85, 0x86DBFB85, 0x86DCFB85, 0x86DDFB85, 0x86DEFB85, 0x86DFFB85, + 0x86E0FB85, 0x86E1FB85, 0x86E2FB85, 0x86E3FB85, 0x86E4FB85, 0x86E5FB85, 0x86E6FB85, 0x86E7FB85, 0x86E8FB85, 0x86E9FB85, 0x86EAFB85, 0x86EBFB85, 0x86ECFB85, 0x86EDFB85, 0x86EEFB85, + 0x86EFFB85, 0x86F0FB85, 0x86F1FB85, 0x86F2FB85, 0x86F3FB85, 0x86F4FB85, 0x86F5FB85, 0x86F6FB85, 0x86F7FB85, 0x86F8FB85, 0x86F9FB85, 0x86FAFB85, 0x86FBFB85, 0x86FCFB85, 0x86FDFB85, + 0x86FEFB85, 0x86FFFB85, 0x8700FB85, 0x8701FB85, 0x8702FB85, 0x8703FB85, 0x8704FB85, 0x8705FB85, 0x8706FB85, 0x8707FB85, 0x8708FB85, 0x8709FB85, 0x870AFB85, 0x870BFB85, 0x870CFB85, + 0x870DFB85, 0x870EFB85, 0x870FFB85, 0x8710FB85, 0x8711FB85, 0x8712FB85, 0x8713FB85, 0x8714FB85, 0x8715FB85, 0x8716FB85, 0x8717FB85, 0x8718FB85, 0x8719FB85, 0x871AFB85, 0x871BFB85, + 0x871CFB85, 0x871DFB85, 0x871EFB85, 0x871FFB85, 0x8720FB85, 0x8721FB85, 0x8722FB85, 0x8723FB85, 0x8724FB85, 0x8725FB85, 0x8726FB85, 0x8727FB85, 0x8728FB85, 0x8729FB85, 0x872AFB85, + 0x872BFB85, 0x872CFB85, 0x872DFB85, 0x872EFB85, 0x872FFB85, 0x8730FB85, 0x8731FB85, 0x8732FB85, 0x8733FB85, 0x8734FB85, 0x8735FB85, 0x8736FB85, 0x8737FB85, 0x8738FB85, 0x8739FB85, + 0x873AFB85, 0x873BFB85, 0x873CFB85, 0x873DFB85, 0x873EFB85, 0x873FFB85, 0x8740FB85, 0x8741FB85, 0x8742FB85, 0x8743FB85, 0x8744FB85, 0x8745FB85, 0x8746FB85, 0x8747FB85, 0x8748FB85, + 0x8749FB85, 0x874AFB85, 0x874BFB85, 0x874CFB85, 0x874DFB85, 0x874EFB85, 0x874FFB85, 0x8750FB85, 0x8751FB85, 0x8752FB85, 0x8753FB85, 0x8754FB85, 0x8755FB85, 0x8756FB85, 0x8757FB85, + 0x8758FB85, 0x8759FB85, 0x875AFB85, 0x875BFB85, 0x875CFB85, 0x875DFB85, 0x875EFB85, 0x875FFB85, 0x8760FB85, 0x8761FB85, 0x8762FB85, 0x8763FB85, 0x8764FB85, 0x8765FB85, 0x8766FB85, + 0x8767FB85, 0x8768FB85, 0x8769FB85, 0x876AFB85, 0x876BFB85, 0x876CFB85, 0x876DFB85, 0x876EFB85, 0x876FFB85, 0x8770FB85, 0x8771FB85, 0x8772FB85, 0x8773FB85, 0x8774FB85, 0x8775FB85, + 0x8776FB85, 0x8777FB85, 0x8778FB85, 0x8779FB85, 0x877AFB85, 0x877BFB85, 0x877CFB85, 0x877DFB85, 0x877EFB85, 0x877FFB85, 0x8780FB85, 0x8781FB85, 0x8782FB85, 0x8783FB85, 0x8784FB85, + 0x8785FB85, 0x8786FB85, 0x8787FB85, 0x8788FB85, 0x8789FB85, 0x878AFB85, 0x878BFB85, 0x878CFB85, 0x878DFB85, 0x878EFB85, 0x878FFB85, 0x8790FB85, 0x8791FB85, 0x8792FB85, 0x8793FB85, + 0x8794FB85, 0x8795FB85, 0x8796FB85, 0x8797FB85, 0x8798FB85, 0x8799FB85, 0x879AFB85, 0x879BFB85, 0x879CFB85, 0x879DFB85, 0x879EFB85, 0x879FFB85, 0x87A0FB85, 0x87A1FB85, 0x87A2FB85, + 0x87A3FB85, 0x87A4FB85, 0x87A5FB85, 0x87A6FB85, 0x87A7FB85, 0x87A8FB85, 0x87A9FB85, 0x87AAFB85, 0x87ABFB85, 0x87ACFB85, 0x87ADFB85, 0x87AEFB85, 0x87AFFB85, 0x87B0FB85, 0x87B1FB85, + 0x87B2FB85, 0x87B3FB85, 0x87B4FB85, 0x87B5FB85, 0x87B6FB85, 0x87B7FB85, 0x87B8FB85, 0x87B9FB85, 0x87BAFB85, 0x87BBFB85, 0x87BCFB85, 0x87BDFB85, 0x87BEFB85, 0x87BFFB85, 0x87C0FB85, + 0x87C1FB85, 0x87C2FB85, 0x87C3FB85, 0x87C4FB85, 0x87C5FB85, 0x87C6FB85, 0x87C7FB85, 0x87C8FB85, 0x87C9FB85, 0x87CAFB85, 0x87CBFB85, 0x87CCFB85, 0x87CDFB85, 0x87CEFB85, 0x87CFFB85, + 0x87D0FB85, 0x87D1FB85, 0x87D2FB85, 0x87D3FB85, 0x87D4FB85, 0x87D5FB85, 0x87D6FB85, 0x87D7FB85, 0x87D8FB85, 0x87D9FB85, 0x87DAFB85, 0x87DBFB85, 0x87DCFB85, 0x87DDFB85, 0x87DEFB85, + 0x87DFFB85, 0x87E0FB85, 0x87E1FB85, 0x87E2FB85, 0x87E3FB85, 0x87E4FB85, 0x87E5FB85, 0x87E6FB85, 0x87E7FB85, 0x87E8FB85, 0x87E9FB85, 0x87EAFB85, 0x87EBFB85, 0x87ECFB85, 0x87EDFB85, + 0x87EEFB85, 0x87EFFB85, 0x87F0FB85, 0x87F1FB85, 0x87F2FB85, 0x87F3FB85, 0x87F4FB85, 0x87F5FB85, 0x87F6FB85, 0x87F7FB85, 0x87F8FB85, 0x87F9FB85, 0x87FAFB85, 0x87FBFB85, 0x87FCFB85, + 0x87FDFB85, 0x87FEFB85, 0x87FFFB85, 0x8800FB85, 0x8801FB85, 0x8802FB85, 0x8803FB85, 0x8804FB85, 0x8805FB85, 0x8806FB85, 0x8807FB85, 0x8808FB85, 0x8809FB85, 0x880AFB85, 0x880BFB85, + 0x880CFB85, 0x880DFB85, 0x880EFB85, 0x880FFB85, 0x8810FB85, 0x8811FB85, 0x8812FB85, 0x8813FB85, 0x8814FB85, 0x8815FB85, 0x8816FB85, 0x8817FB85, 0x8818FB85, 0x8819FB85, 0x881AFB85, + 0x881BFB85, 0x881CFB85, 0x881DFB85, 0x881EFB85, 0x881FFB85, 0x8820FB85, 0x8821FB85, 0x8822FB85, 0x8823FB85, 0x8824FB85, 0x8825FB85, 0x8826FB85, 0x8827FB85, 0x8828FB85, 0x8829FB85, + 0x882AFB85, 0x882BFB85, 0x882CFB85, 0x882DFB85, 0x882EFB85, 0x882FFB85, 0x8830FB85, 0x8831FB85, 0x8832FB85, 0x8833FB85, 0x8834FB85, 0x8835FB85, 0x8836FB85, 0x8837FB85, 0x8838FB85, + 0x8839FB85, 0x883AFB85, 0x883BFB85, 0x883CFB85, 0x883DFB85, 0x883EFB85, 0x883FFB85, 0x8840FB85, 0x8841FB85, 0x8842FB85, 0x8843FB85, 0x8844FB85, 0x8845FB85, 0x8846FB85, 0x8847FB85, + 0x8848FB85, 0x8849FB85, 0x884AFB85, 0x884BFB85, 0x884CFB85, 0x884DFB85, 0x884EFB85, 0x884FFB85, 0x8850FB85, 0x8851FB85, 0x8852FB85, 0x8853FB85, 0x8854FB85, 0x8855FB85, 0x8856FB85, + 0x8857FB85, 0x8858FB85, 0x8859FB85, 0x885AFB85, 0x885BFB85, 0x885CFB85, 0x885DFB85, 0x885EFB85, 0x885FFB85, 0x8860FB85, 0x8861FB85, 0x8862FB85, 0x8863FB85, 0x8864FB85, 0x8865FB85, + 0x8866FB85, 0x8867FB85, 0x8868FB85, 0x8869FB85, 0x886AFB85, 0x886BFB85, 0x886CFB85, 0x886DFB85, 0x886EFB85, 0x886FFB85, 0x8870FB85, 0x8871FB85, 0x8872FB85, 0x8873FB85, 0x8874FB85, + 0x8875FB85, 0x8876FB85, 0x8877FB85, 0x8878FB85, 0x8879FB85, 0x887AFB85, 0x887BFB85, 0x887CFB85, 0x887DFB85, 0x887EFB85, 0x887FFB85, 0x8880FB85, 0x8881FB85, 0x8882FB85, 0x8883FB85, + 0x8884FB85, 0x8885FB85, 0x8886FB85, 0x8887FB85, 0x8888FB85, 0x8889FB85, 0x888AFB85, 0x888BFB85, 0x888CFB85, 0x888DFB85, 0x888EFB85, 0x888FFB85, 0x8890FB85, 0x8891FB85, 0x8892FB85, + 0x8893FB85, 0x8894FB85, 0x8895FB85, 0x8896FB85, 0x8897FB85, 0x8898FB85, 0x8899FB85, 0x889AFB85, 0x889BFB85, 0x889CFB85, 0x889DFB85, 0x889EFB85, 0x889FFB85, 0x88A0FB85, 0x88A1FB85, + 0x88A2FB85, 0x88A3FB85, 0x88A4FB85, 0x88A5FB85, 0x88A6FB85, 0x88A7FB85, 0x88A8FB85, 0x88A9FB85, 0x88AAFB85, 0x88ABFB85, 0x88ACFB85, 0x88ADFB85, 0x88AEFB85, 0x88AFFB85, 0x88B0FB85, + 0x88B1FB85, 0x88B2FB85, 0x88B3FB85, 0x88B4FB85, 0x88B5FB85, 0x88B6FB85, 0x88B7FB85, 0x88B8FB85, 0x88B9FB85, 0x88BAFB85, 0x88BBFB85, 0x88BCFB85, 0x88BDFB85, 0x88BEFB85, 0x88BFFB85, + 0x88C0FB85, 0x88C1FB85, 0x88C2FB85, 0x88C3FB85, 0x88C4FB85, 0x88C5FB85, 0x88C6FB85, 0x88C7FB85, 0x88C8FB85, 0x88C9FB85, 0x88CAFB85, 0x88CBFB85, 0x88CCFB85, 0x88CDFB85, 0x88CEFB85, + 0x88CFFB85, 0x88D0FB85, 0x88D1FB85, 0x88D2FB85, 0x88D3FB85, 0x88D4FB85, 0x88D5FB85, 0x88D6FB85, 0x88D7FB85, 0x88D8FB85, 0x88D9FB85, 0x88DAFB85, 0x88DBFB85, 0x88DCFB85, 0x88DDFB85, + 0x88DEFB85, 0x88DFFB85, 0x88E0FB85, 0x88E1FB85, 0x88E2FB85, 0x88E3FB85, 0x88E4FB85, 0x88E5FB85, 0x88E6FB85, 0x88E7FB85, 0x88E8FB85, 0x88E9FB85, 0x88EAFB85, 0x88EBFB85, 0x88ECFB85, + 0x88EDFB85, 0x88EEFB85, 0x88EFFB85, 0x88F0FB85, 0x88F1FB85, 0x88F2FB85, 0x88F3FB85, 0x88F4FB85, 0x88F5FB85, 0x88F6FB85, 0x88F7FB85, 0x88F8FB85, 0x88F9FB85, 0x88FAFB85, 0x88FBFB85, + 0x88FCFB85, 0x88FDFB85, 0x88FEFB85, 0x88FFFB85, 0x8900FB85, 0x8901FB85, 0x8902FB85, 0x8903FB85, 0x8904FB85, 0x8905FB85, 0x8906FB85, 0x8907FB85, 0x8908FB85, 0x8909FB85, 0x890AFB85, + 0x890BFB85, 0x890CFB85, 0x890DFB85, 0x890EFB85, 0x890FFB85, 0x8910FB85, 0x8911FB85, 0x8912FB85, 0x8913FB85, 0x8914FB85, 0x8915FB85, 0x8916FB85, 0x8917FB85, 0x8918FB85, 0x8919FB85, + 0x891AFB85, 0x891BFB85, 0x891CFB85, 0x891DFB85, 0x891EFB85, 0x891FFB85, 0x8920FB85, 0x8921FB85, 0x8922FB85, 0x8923FB85, 0x8924FB85, 0x8925FB85, 0x8926FB85, 0x8927FB85, 0x8928FB85, + 0x8929FB85, 0x892AFB85, 0x892BFB85, 0x892CFB85, 0x892DFB85, 0x892EFB85, 0x892FFB85, 0x8930FB85, 0x8931FB85, 0x8932FB85, 0x8933FB85, 0x8934FB85, 0x8935FB85, 0x8936FB85, 0x8937FB85, + 0x8938FB85, 0x8939FB85, 0x893AFB85, 0x893BFB85, 0x893CFB85, 0x893DFB85, 0x893EFB85, 0x893FFB85, 0x8940FB85, 0x8941FB85, 0x8942FB85, 0x8943FB85, 0x8944FB85, 0x8945FB85, 0x8946FB85, + 0x8947FB85, 0x8948FB85, 0x8949FB85, 0x894AFB85, 0x894BFB85, 0x894CFB85, 0x894DFB85, 0x894EFB85, 0x894FFB85, 0x8950FB85, 0x8951FB85, 0x8952FB85, 0x8953FB85, 0x8954FB85, 0x8955FB85, + 0x8956FB85, 0x8957FB85, 0x8958FB85, 0x8959FB85, 0x895AFB85, 0x895BFB85, 0x895CFB85, 0x895DFB85, 0x895EFB85, 0x895FFB85, 0x8960FB85, 0x8961FB85, 0x8962FB85, 0x8963FB85, 0x8964FB85, + 0x8965FB85, 0x8966FB85, 0x8967FB85, 0x8968FB85, 0x8969FB85, 0x896AFB85, 0x896BFB85, 0x896CFB85, 0x896DFB85, 0x896EFB85, 0x896FFB85, 0x8970FB85, 0x8971FB85, 0x8972FB85, 0x8973FB85, + 0x8974FB85, 0x8975FB85, 0x8976FB85, 0x8977FB85, 0x8978FB85, 0x8979FB85, 0x897AFB85, 0x897BFB85, 0x897CFB85, 0x897DFB85, 0x897EFB85, 0x897FFB85, 0x8980FB85, 0x8981FB85, 0x8982FB85, + 0x8983FB85, 0x8984FB85, 0x8985FB85, 0x8986FB85, 0x8987FB85, 0x8988FB85, 0x8989FB85, 0x898AFB85, 0x898BFB85, 0x898CFB85, 0x898DFB85, 0x898EFB85, 0x898FFB85, 0x8990FB85, 0x8991FB85, + 0x8992FB85, 0x8993FB85, 0x8994FB85, 0x8995FB85, 0x8996FB85, 0x8997FB85, 0x8998FB85, 0x8999FB85, 0x899AFB85, 0x899BFB85, 0x899CFB85, 0x899DFB85, 0x899EFB85, 0x899FFB85, 0x89A0FB85, + 0x89A1FB85, 0x89A2FB85, 0x89A3FB85, 0x89A4FB85, 0x89A5FB85, 0x89A6FB85, 0x89A7FB85, 0x89A8FB85, 0x89A9FB85, 0x89AAFB85, 0x89ABFB85, 0x89ACFB85, 0x89ADFB85, 0x89AEFB85, 0x89AFFB85, + 0x89B0FB85, 0x89B1FB85, 0x89B2FB85, 0x89B3FB85, 0x89B4FB85, 0x89B5FB85, 0x89B6FB85, 0x89B7FB85, 0x89B8FB85, 0x89B9FB85, 0x89BAFB85, 0x89BBFB85, 0x89BCFB85, 0x89BDFB85, 0x89BEFB85, + 0x89BFFB85, 0x89C0FB85, 0x89C1FB85, 0x89C2FB85, 0x89C3FB85, 0x89C4FB85, 0x89C5FB85, 0x89C6FB85, 0x89C7FB85, 0x89C8FB85, 0x89C9FB85, 0x89CAFB85, 0x89CBFB85, 0x89CCFB85, 0x89CDFB85, + 0x89CEFB85, 0x89CFFB85, 0x89D0FB85, 0x89D1FB85, 0x89D2FB85, 0x89D3FB85, 0x89D4FB85, 0x89D5FB85, 0x89D6FB85, 0x89D7FB85, 0x89D8FB85, 0x89D9FB85, 0x89DAFB85, 0x89DBFB85, 0x89DCFB85, + 0x89DDFB85, 0x89DEFB85, 0x89DFFB85, 0x89E0FB85, 0x89E1FB85, 0x89E2FB85, 0x89E3FB85, 0x89E4FB85, 0x89E5FB85, 0x89E6FB85, 0x89E7FB85, 0x89E8FB85, 0x89E9FB85, 0x89EAFB85, 0x89EBFB85, + 0x89ECFB85, 0x89EDFB85, 0x89EEFB85, 0x89EFFB85, 0x89F0FB85, 0x89F1FB85, 0x89F2FB85, 0x89F3FB85, 0x89F4FB85, 0x89F5FB85, 0x89F6FB85, 0x89F7FB85, 0x89F8FB85, 0x89F9FB85, 0x89FAFB85, + 0x89FBFB85, 0x89FCFB85, 0x89FDFB85, 0x89FEFB85, 0x89FFFB85, 0x8A00FB85, 0x8A01FB85, 0x8A02FB85, 0x8A03FB85, 0x8A04FB85, 0x8A05FB85, 0x8A06FB85, 0x8A07FB85, 0x8A08FB85, 0x8A09FB85, + 0x8A0AFB85, 0x8A0BFB85, 0x8A0CFB85, 0x8A0DFB85, 0x8A0EFB85, 0x8A0FFB85, 0x8A10FB85, 0x8A11FB85, 0x8A12FB85, 0x8A13FB85, 0x8A14FB85, 0x8A15FB85, 0x8A16FB85, 0x8A17FB85, 0x8A18FB85, + 0x8A19FB85, 0x8A1AFB85, 0x8A1BFB85, 0x8A1CFB85, 0x8A1DFB85, 0x8A1EFB85, 0x8A1FFB85, 0x8A20FB85, 0x8A21FB85, 0x8A22FB85, 0x8A23FB85, 0x8A24FB85, 0x8A25FB85, 0x8A26FB85, 0x8A27FB85, + 0x8A28FB85, 0x8A29FB85, 0x8A2AFB85, 0x8A2BFB85, 0x8A2CFB85, 0x8A2DFB85, 0x8A2EFB85, 0x8A2FFB85, 0x8A30FB85, 0x8A31FB85, 0x8A32FB85, 0x8A33FB85, 0x8A34FB85, 0x8A35FB85, 0x8A36FB85, + 0x8A37FB85, 0x8A38FB85, 0x8A39FB85, 0x8A3AFB85, 0x8A3BFB85, 0x8A3CFB85, 0x8A3DFB85, 0x8A3EFB85, 0x8A3FFB85, 0x8A40FB85, 0x8A41FB85, 0x8A42FB85, 0x8A43FB85, 0x8A44FB85, 0x8A45FB85, + 0x8A46FB85, 0x8A47FB85, 0x8A48FB85, 0x8A49FB85, 0x8A4AFB85, 0x8A4BFB85, 0x8A4CFB85, 0x8A4DFB85, 0x8A4EFB85, 0x8A4FFB85, 0x8A50FB85, 0x8A51FB85, 0x8A52FB85, 0x8A53FB85, 0x8A54FB85, + 0x8A55FB85, 0x8A56FB85, 0x8A57FB85, 0x8A58FB85, 0x8A59FB85, 0x8A5AFB85, 0x8A5BFB85, 0x8A5CFB85, 0x8A5DFB85, 0x8A5EFB85, 0x8A5FFB85, 0x8A60FB85, 0x8A61FB85, 0x8A62FB85, 0x8A63FB85, + 0x8A64FB85, 0x8A65FB85, 0x8A66FB85, 0x8A67FB85, 0x8A68FB85, 0x8A69FB85, 0x8A6AFB85, 0x8A6BFB85, 0x8A6CFB85, 0x8A6DFB85, 0x8A6EFB85, 0x8A6FFB85, 0x8A70FB85, 0x8A71FB85, 0x8A72FB85, + 0x8A73FB85, 0x8A74FB85, 0x8A75FB85, 0x8A76FB85, 0x8A77FB85, 0x8A78FB85, 0x8A79FB85, 0x8A7AFB85, 0x8A7BFB85, 0x8A7CFB85, 0x8A7DFB85, 0x8A7EFB85, 0x8A7FFB85, 0x8A80FB85, 0x8A81FB85, + 0x8A82FB85, 0x8A83FB85, 0x8A84FB85, 0x8A85FB85, 0x8A86FB85, 0x8A87FB85, 0x8A88FB85, 0x8A89FB85, 0x8A8AFB85, 0x8A8BFB85, 0x8A8CFB85, 0x8A8DFB85, 0x8A8EFB85, 0x8A8FFB85, 0x8A90FB85, + 0x8A91FB85, 0x8A92FB85, 0x8A93FB85, 0x8A94FB85, 0x8A95FB85, 0x8A96FB85, 0x8A97FB85, 0x8A98FB85, 0x8A99FB85, 0x8A9AFB85, 0x8A9BFB85, 0x8A9CFB85, 0x8A9DFB85, 0x8A9EFB85, 0x8A9FFB85, + 0x8AA0FB85, 0x8AA1FB85, 0x8AA2FB85, 0x8AA3FB85, 0x8AA4FB85, 0x8AA5FB85, 0x8AA6FB85, 0x8AA7FB85, 0x8AA8FB85, 0x8AA9FB85, 0x8AAAFB85, 0x8AABFB85, 0x8AACFB85, 0x8AADFB85, 0x8AAEFB85, + 0x8AAFFB85, 0x8AB0FB85, 0x8AB1FB85, 0x8AB2FB85, 0x8AB3FB85, 0x8AB4FB85, 0x8AB5FB85, 0x8AB6FB85, 0x8AB7FB85, 0x8AB8FB85, 0x8AB9FB85, 0x8ABAFB85, 0x8ABBFB85, 0x8ABCFB85, 0x8ABDFB85, + 0x8ABEFB85, 0x8ABFFB85, 0x8AC0FB85, 0x8AC1FB85, 0x8AC2FB85, 0x8AC3FB85, 0x8AC4FB85, 0x8AC5FB85, 0x8AC6FB85, 0x8AC7FB85, 0x8AC8FB85, 0x8AC9FB85, 0x8ACAFB85, 0x8ACBFB85, 0x8ACCFB85, + 0x8ACDFB85, 0x8ACEFB85, 0x8ACFFB85, 0x8AD0FB85, 0x8AD1FB85, 0x8AD2FB85, 0x8AD3FB85, 0x8AD4FB85, 0x8AD5FB85, 0x8AD6FB85, 0x8AD7FB85, 0x8AD8FB85, 0x8AD9FB85, 0x8ADAFB85, 0x8ADBFB85, + 0x8ADCFB85, 0x8ADDFB85, 0x8ADEFB85, 0x8ADFFB85, 0x8AE0FB85, 0x8AE1FB85, 0x8AE2FB85, 0x8AE3FB85, 0x8AE4FB85, 0x8AE5FB85, 0x8AE6FB85, 0x8AE7FB85, 0x8AE8FB85, 0x8AE9FB85, 0x8AEAFB85, + 0x8AEBFB85, 0x8AECFB85, 0x8AEDFB85, 0x8AEEFB85, 0x8AEFFB85, 0x8AF0FB85, 0x8AF1FB85, 0x8AF2FB85, 0x8AF3FB85, 0x8AF4FB85, 0x8AF5FB85, 0x8AF6FB85, 0x8AF7FB85, 0x8AF8FB85, 0x8AF9FB85, + 0x8AFAFB85, 0x8AFBFB85, 0x8AFCFB85, 0x8AFDFB85, 0x8AFEFB85, 0x8AFFFB85, 0x8B00FB85, 0x8B01FB85, 0x8B02FB85, 0x8B03FB85, 0x8B04FB85, 0x8B05FB85, 0x8B06FB85, 0x8B07FB85, 0x8B08FB85, + 0x8B09FB85, 0x8B0AFB85, 0x8B0BFB85, 0x8B0CFB85, 0x8B0DFB85, 0x8B0EFB85, 0x8B0FFB85, 0x8B10FB85, 0x8B11FB85, 0x8B12FB85, 0x8B13FB85, 0x8B14FB85, 0x8B15FB85, 0x8B16FB85, 0x8B17FB85, + 0x8B18FB85, 0x8B19FB85, 0x8B1AFB85, 0x8B1BFB85, 0x8B1CFB85, 0x8B1DFB85, 0x8B1EFB85, 0x8B1FFB85, 0x8B20FB85, 0x8B21FB85, 0x8B22FB85, 0x8B23FB85, 0x8B24FB85, 0x8B25FB85, 0x8B26FB85, + 0x8B27FB85, 0x8B28FB85, 0x8B29FB85, 0x8B2AFB85, 0x8B2BFB85, 0x8B2CFB85, 0x8B2DFB85, 0x8B2EFB85, 0x8B2FFB85, 0x8B30FB85, 0x8B31FB85, 0x8B32FB85, 0x8B33FB85, 0x8B34FB85, 0x8B35FB85, + 0x8B36FB85, 0x8B37FB85, 0x8B38FB85, 0x8B39FB85, 0x8B3AFB85, 0x8B3BFB85, 0x8B3CFB85, 0x8B3DFB85, 0x8B3EFB85, 0x8B3FFB85, 0x8B40FB85, 0x8B41FB85, 0x8B42FB85, 0x8B43FB85, 0x8B44FB85, + 0x8B45FB85, 0x8B46FB85, 0x8B47FB85, 0x8B48FB85, 0x8B49FB85, 0x8B4AFB85, 0x8B4BFB85, 0x8B4CFB85, 0x8B4DFB85, 0x8B4EFB85, 0x8B4FFB85, 0x8B50FB85, 0x8B51FB85, 0x8B52FB85, 0x8B53FB85, + 0x8B54FB85, 0x8B55FB85, 0x8B56FB85, 0x8B57FB85, 0x8B58FB85, 0x8B59FB85, 0x8B5AFB85, 0x8B5BFB85, 0x8B5CFB85, 0x8B5DFB85, 0x8B5EFB85, 0x8B5FFB85, 0x8B60FB85, 0x8B61FB85, 0x8B62FB85, + 0x8B63FB85, 0x8B64FB85, 0x8B65FB85, 0x8B66FB85, 0x8B67FB85, 0x8B68FB85, 0x8B69FB85, 0x8B6AFB85, 0x8B6BFB85, 0x8B6CFB85, 0x8B6DFB85, 0x8B6EFB85, 0x8B6FFB85, 0x8B70FB85, 0x8B71FB85, + 0x8B72FB85, 0x8B73FB85, 0x8B74FB85, 0x8B75FB85, 0x8B76FB85, 0x8B77FB85, 0x8B78FB85, 0x8B79FB85, 0x8B7AFB85, 0x8B7BFB85, 0x8B7CFB85, 0x8B7DFB85, 0x8B7EFB85, 0x8B7FFB85, 0x8B80FB85, + 0x8B81FB85, 0x8B82FB85, 0x8B83FB85, 0x8B84FB85, 0x8B85FB85, 0x8B86FB85, 0x8B87FB85, 0x8B88FB85, 0x8B89FB85, 0x8B8AFB85, 0x8B8BFB85, 0x8B8CFB85, 0x8B8DFB85, 0x8B8EFB85, 0x8B8FFB85, + 0x8B90FB85, 0x8B91FB85, 0x8B92FB85, 0x8B93FB85, 0x8B94FB85, 0x8B95FB85, 0x8B96FB85, 0x8B97FB85, 0x8B98FB85, 0x8B99FB85, 0x8B9AFB85, 0x8B9BFB85, 0x8B9CFB85, 0x8B9DFB85, 0x8B9EFB85, + 0x8B9FFB85, 0x8BA0FB85, 0x8BA1FB85, 0x8BA2FB85, 0x8BA3FB85, 0x8BA4FB85, 0x8BA5FB85, 0x8BA6FB85, 0x8BA7FB85, 0x8BA8FB85, 0x8BA9FB85, 0x8BAAFB85, 0x8BABFB85, 0x8BACFB85, 0x8BADFB85, + 0x8BAEFB85, 0x8BAFFB85, 0x8BB0FB85, 0x8BB1FB85, 0x8BB2FB85, 0x8BB3FB85, 0x8BB4FB85, 0x8BB5FB85, 0x8BB6FB85, 0x8BB7FB85, 0x8BB8FB85, 0x8BB9FB85, 0x8BBAFB85, 0x8BBBFB85, 0x8BBCFB85, + 0x8BBDFB85, 0x8BBEFB85, 0x8BBFFB85, 0x8BC0FB85, 0x8BC1FB85, 0x8BC2FB85, 0x8BC3FB85, 0x8BC4FB85, 0x8BC5FB85, 0x8BC6FB85, 0x8BC7FB85, 0x8BC8FB85, 0x8BC9FB85, 0x8BCAFB85, 0x8BCBFB85, + 0x8BCCFB85, 0x8BCDFB85, 0x8BCEFB85, 0x8BCFFB85, 0x8BD0FB85, 0x8BD1FB85, 0x8BD2FB85, 0x8BD3FB85, 0x8BD4FB85, 0x8BD5FB85, 0x8BD6FB85, 0x8BD7FB85, 0x8BD8FB85, 0x8BD9FB85, 0x8BDAFB85, + 0x8BDBFB85, 0x8BDCFB85, 0x8BDDFB85, 0x8BDEFB85, 0x8BDFFB85, 0x8BE0FB85, 0x8BE1FB85, 0x8BE2FB85, 0x8BE3FB85, 0x8BE4FB85, 0x8BE5FB85, 0x8BE6FB85, 0x8BE7FB85, 0x8BE8FB85, 0x8BE9FB85, + 0x8BEAFB85, 0x8BEBFB85, 0x8BECFB85, 0x8BEDFB85, 0x8BEEFB85, 0x8BEFFB85, 0x8BF0FB85, 0x8BF1FB85, 0x8BF2FB85, 0x8BF3FB85, 0x8BF4FB85, 0x8BF5FB85, 0x8BF6FB85, 0x8BF7FB85, 0x8BF8FB85, + 0x8BF9FB85, 0x8BFAFB85, 0x8BFBFB85, 0x8BFCFB85, 0x8BFDFB85, 0x8BFEFB85, 0x8BFFFB85, 0x8C00FB85, 0x8C01FB85, 0x8C02FB85, 0x8C03FB85, 0x8C04FB85, 0x8C05FB85, 0x8C06FB85, 0x8C07FB85, + 0x8C08FB85, 0x8C09FB85, 0x8C0AFB85, 0x8C0BFB85, 0x8C0CFB85, 0x8C0DFB85, 0x8C0EFB85, 0x8C0FFB85, 0x8C10FB85, 0x8C11FB85, 0x8C12FB85, 0x8C13FB85, 0x8C14FB85, 0x8C15FB85, 0x8C16FB85, + 0x8C17FB85, 0x8C18FB85, 0x8C19FB85, 0x8C1AFB85, 0x8C1BFB85, 0x8C1CFB85, 0x8C1DFB85, 0x8C1EFB85, 0x8C1FFB85, 0x8C20FB85, 0x8C21FB85, 0x8C22FB85, 0x8C23FB85, 0x8C24FB85, 0x8C25FB85, + 0x8C26FB85, 0x8C27FB85, 0x8C28FB85, 0x8C29FB85, 0x8C2AFB85, 0x8C2BFB85, 0x8C2CFB85, 0x8C2DFB85, 0x8C2EFB85, 0x8C2FFB85, 0x8C30FB85, 0x8C31FB85, 0x8C32FB85, 0x8C33FB85, 0x8C34FB85, + 0x8C35FB85, 0x8C36FB85, 0x8C37FB85, 0x8C38FB85, 0x8C39FB85, 0x8C3AFB85, 0x8C3BFB85, 0x8C3CFB85, 0x8C3DFB85, 0x8C3EFB85, 0x8C3FFB85, 0x8C40FB85, 0x8C41FB85, 0x8C42FB85, 0x8C43FB85, + 0x8C44FB85, 0x8C45FB85, 0x8C46FB85, 0x8C47FB85, 0x8C48FB85, 0x8C49FB85, 0x8C4AFB85, 0x8C4BFB85, 0x8C4CFB85, 0x8C4DFB85, 0x8C4EFB85, 0x8C4FFB85, 0x8C50FB85, 0x8C51FB85, 0x8C52FB85, + 0x8C53FB85, 0x8C54FB85, 0x8C55FB85, 0x8C56FB85, 0x8C57FB85, 0x8C58FB85, 0x8C59FB85, 0x8C5AFB85, 0x8C5BFB85, 0x8C5CFB85, 0x8C5DFB85, 0x8C5EFB85, 0x8C5FFB85, 0x8C60FB85, 0x8C61FB85, + 0x8C62FB85, 0x8C63FB85, 0x8C64FB85, 0x8C65FB85, 0x8C66FB85, 0x8C67FB85, 0x8C68FB85, 0x8C69FB85, 0x8C6AFB85, 0x8C6BFB85, 0x8C6CFB85, 0x8C6DFB85, 0x8C6EFB85, 0x8C6FFB85, 0x8C70FB85, + 0x8C71FB85, 0x8C72FB85, 0x8C73FB85, 0x8C74FB85, 0x8C75FB85, 0x8C76FB85, 0x8C77FB85, 0x8C78FB85, 0x8C79FB85, 0x8C7AFB85, 0x8C7BFB85, 0x8C7CFB85, 0x8C7DFB85, 0x8C7EFB85, 0x8C7FFB85, + 0x8C80FB85, 0x8C81FB85, 0x8C82FB85, 0x8C83FB85, 0x8C84FB85, 0x8C85FB85, 0x8C86FB85, 0x8C87FB85, 0x8C88FB85, 0x8C89FB85, 0x8C8AFB85, 0x8C8BFB85, 0x8C8CFB85, 0x8C8DFB85, 0x8C8EFB85, + 0x8C8FFB85, 0x8C90FB85, 0x8C91FB85, 0x8C92FB85, 0x8C93FB85, 0x8C94FB85, 0x8C95FB85, 0x8C96FB85, 0x8C97FB85, 0x8C98FB85, 0x8C99FB85, 0x8C9AFB85, 0x8C9BFB85, 0x8C9CFB85, 0x8C9DFB85, + 0x8C9EFB85, 0x8C9FFB85, 0x8CA0FB85, 0x8CA1FB85, 0x8CA2FB85, 0x8CA3FB85, 0x8CA4FB85, 0x8CA5FB85, 0x8CA6FB85, 0x8CA7FB85, 0x8CA8FB85, 0x8CA9FB85, 0x8CAAFB85, 0x8CABFB85, 0x8CACFB85, + 0x8CADFB85, 0x8CAEFB85, 0x8CAFFB85, 0x8CB0FB85, 0x8CB1FB85, 0x8CB2FB85, 0x8CB3FB85, 0x8CB4FB85, 0x8CB5FB85, 0x8CB6FB85, 0x8CB7FB85, 0x8CB8FB85, 0x8CB9FB85, 0x8CBAFB85, 0x8CBBFB85, + 0x8CBCFB85, 0x8CBDFB85, 0x8CBEFB85, 0x8CBFFB85, 0x8CC0FB85, 0x8CC1FB85, 0x8CC2FB85, 0x8CC3FB85, 0x8CC4FB85, 0x8CC5FB85, 0x8CC6FB85, 0x8CC7FB85, 0x8CC8FB85, 0x8CC9FB85, 0x8CCAFB85, + 0x8CCBFB85, 0x8CCCFB85, 0x8CCDFB85, 0x8CCEFB85, 0x8CCFFB85, 0x8CD0FB85, 0x8CD1FB85, 0x8CD2FB85, 0x8CD3FB85, 0x8CD4FB85, 0x8CD5FB85, 0x8CD6FB85, 0x8CD7FB85, 0x8CD8FB85, 0x8CD9FB85, + 0x8CDAFB85, 0x8CDBFB85, 0x8CDCFB85, 0x8CDDFB85, 0x8CDEFB85, 0x8CDFFB85, 0x8CE0FB85, 0x8CE1FB85, 0x8CE2FB85, 0x8CE3FB85, 0x8CE4FB85, 0x8CE5FB85, 0x8CE6FB85, 0x8CE7FB85, 0x8CE8FB85, + 0x8CE9FB85, 0x8CEAFB85, 0x8CEBFB85, 0x8CECFB85, 0x8CEDFB85, 0x8CEEFB85, 0x8CEFFB85, 0x8CF0FB85, 0x8CF1FB85, 0x8CF2FB85, 0x8CF3FB85, 0x8CF4FB85, 0x8CF5FB85, 0x8CF6FB85, 0x8CF7FB85, + 0x8CF8FB85, 0x8CF9FB85, 0x8CFAFB85, 0x8CFBFB85, 0x8CFCFB85, 0x8CFDFB85, 0x8CFEFB85, 0x8CFFFB85, 0x8D00FB85, 0x8D01FB85, 0x8D02FB85, 0x8D03FB85, 0x8D04FB85, 0x8D05FB85, 0x8D06FB85, + 0x8D07FB85, 0x8D08FB85, 0x8D09FB85, 0x8D0AFB85, 0x8D0BFB85, 0x8D0CFB85, 0x8D0DFB85, 0x8D0EFB85, 0x8D0FFB85, 0x8D10FB85, 0x8D11FB85, 0x8D12FB85, 0x8D13FB85, 0x8D14FB85, 0x8D15FB85, + 0x8D16FB85, 0x8D17FB85, 0x8D18FB85, 0x8D19FB85, 0x8D1AFB85, 0x8D1BFB85, 0x8D1CFB85, 0x8D1DFB85, 0x8D1EFB85, 0x8D1FFB85, 0x8D20FB85, 0x8D21FB85, 0x8D22FB85, 0x8D23FB85, 0x8D24FB85, + 0x8D25FB85, 0x8D26FB85, 0x8D27FB85, 0x8D28FB85, 0x8D29FB85, 0x8D2AFB85, 0x8D2BFB85, 0x8D2CFB85, 0x8D2DFB85, 0x8D2EFB85, 0x8D2FFB85, 0x8D30FB85, 0x8D31FB85, 0x8D32FB85, 0x8D33FB85, + 0x8D34FB85, 0x8D35FB85, 0x8D36FB85, 0x8D37FB85, 0x8D38FB85, 0x8D39FB85, 0x8D3AFB85, 0x8D3BFB85, 0x8D3CFB85, 0x8D3DFB85, 0x8D3EFB85, 0x8D3FFB85, 0x8D40FB85, 0x8D41FB85, 0x8D42FB85, + 0x8D43FB85, 0x8D44FB85, 0x8D45FB85, 0x8D46FB85, 0x8D47FB85, 0x8D48FB85, 0x8D49FB85, 0x8D4AFB85, 0x8D4BFB85, 0x8D4CFB85, 0x8D4DFB85, 0x8D4EFB85, 0x8D4FFB85, 0x8D50FB85, 0x8D51FB85, + 0x8D52FB85, 0x8D53FB85, 0x8D54FB85, 0x8D55FB85, 0x8D56FB85, 0x8D57FB85, 0x8D58FB85, 0x8D59FB85, 0x8D5AFB85, 0x8D5BFB85, 0x8D5CFB85, 0x8D5DFB85, 0x8D5EFB85, 0x8D5FFB85, 0x8D60FB85, + 0x8D61FB85, 0x8D62FB85, 0x8D63FB85, 0x8D64FB85, 0x8D65FB85, 0x8D66FB85, 0x8D67FB85, 0x8D68FB85, 0x8D69FB85, 0x8D6AFB85, 0x8D6BFB85, 0x8D6CFB85, 0x8D6DFB85, 0x8D6EFB85, 0x8D6FFB85, + 0x8D70FB85, 0x8D71FB85, 0x8D72FB85, 0x8D73FB85, 0x8D74FB85, 0x8D75FB85, 0x8D76FB85, 0x8D77FB85, 0x8D78FB85, 0x8D79FB85, 0x8D7AFB85, 0x8D7BFB85, 0x8D7CFB85, 0x8D7DFB85, 0x8D7EFB85, + 0x8D7FFB85, 0x8D80FB85, 0x8D81FB85, 0x8D82FB85, 0x8D83FB85, 0x8D84FB85, 0x8D85FB85, 0x8D86FB85, 0x8D87FB85, 0x8D88FB85, 0x8D89FB85, 0x8D8AFB85, 0x8D8BFB85, 0x8D8CFB85, 0x8D8DFB85, + 0x8D8EFB85, 0x8D8FFB85, 0x8D90FB85, 0x8D91FB85, 0x8D92FB85, 0x8D93FB85, 0x8D94FB85, 0x8D95FB85, 0x8D96FB85, 0x8D97FB85, 0x8D98FB85, 0x8D99FB85, 0x8D9AFB85, 0x8D9BFB85, 0x8D9CFB85, + 0x8D9DFB85, 0x8D9EFB85, 0x8D9FFB85, 0x8DA0FB85, 0x8DA1FB85, 0x8DA2FB85, 0x8DA3FB85, 0x8DA4FB85, 0x8DA5FB85, 0x8DA6FB85, 0x8DA7FB85, 0x8DA8FB85, 0x8DA9FB85, 0x8DAAFB85, 0x8DABFB85, + 0x8DACFB85, 0x8DADFB85, 0x8DAEFB85, 0x8DAFFB85, 0x8DB0FB85, 0x8DB1FB85, 0x8DB2FB85, 0x8DB3FB85, 0x8DB4FB85, 0x8DB5FB85, 0x8DB6FB85, 0x8DB7FB85, 0x8DB8FB85, 0x8DB9FB85, 0x8DBAFB85, + 0x8DBBFB85, 0x8DBCFB85, 0x8DBDFB85, 0x8DBEFB85, 0x8DBFFB85, 0x8DC0FB85, 0x8DC1FB85, 0x8DC2FB85, 0x8DC3FB85, 0x8DC4FB85, 0x8DC5FB85, 0x8DC6FB85, 0x8DC7FB85, 0x8DC8FB85, 0x8DC9FB85, + 0x8DCAFB85, 0x8DCBFB85, 0x8DCCFB85, 0x8DCDFB85, 0x8DCEFB85, 0x8DCFFB85, 0x8DD0FB85, 0x8DD1FB85, 0x8DD2FB85, 0x8DD3FB85, 0x8DD4FB85, 0x8DD5FB85, 0x8DD6FB85, 0x8DD7FB85, 0x8DD8FB85, + 0x8DD9FB85, 0x8DDAFB85, 0x8DDBFB85, 0x8DDCFB85, 0x8DDDFB85, 0x8DDEFB85, 0x8DDFFB85, 0x8DE0FB85, 0x8DE1FB85, 0x8DE2FB85, 0x8DE3FB85, 0x8DE4FB85, 0x8DE5FB85, 0x8DE6FB85, 0x8DE7FB85, + 0x8DE8FB85, 0x8DE9FB85, 0x8DEAFB85, 0x8DEBFB85, 0x8DECFB85, 0x8DEDFB85, 0x8DEEFB85, 0x8DEFFB85, 0x8DF0FB85, 0x8DF1FB85, 0x8DF2FB85, 0x8DF3FB85, 0x8DF4FB85, 0x8DF5FB85, 0x8DF6FB85, + 0x8DF7FB85, 0x8DF8FB85, 0x8DF9FB85, 0x8DFAFB85, 0x8DFBFB85, 0x8DFCFB85, 0x8DFDFB85, 0x8DFEFB85, 0x8DFFFB85, 0x8E00FB85, 0x8E01FB85, 0x8E02FB85, 0x8E03FB85, 0x8E04FB85, 0x8E05FB85, + 0x8E06FB85, 0x8E07FB85, 0x8E08FB85, 0x8E09FB85, 0x8E0AFB85, 0x8E0BFB85, 0x8E0CFB85, 0x8E0DFB85, 0x8E0EFB85, 0x8E0FFB85, 0x8E10FB85, 0x8E11FB85, 0x8E12FB85, 0x8E13FB85, 0x8E14FB85, + 0x8E15FB85, 0x8E16FB85, 0x8E17FB85, 0x8E18FB85, 0x8E19FB85, 0x8E1AFB85, 0x8E1BFB85, 0x8E1CFB85, 0x8E1DFB85, 0x8E1EFB85, 0x8E1FFB85, 0x8E20FB85, 0x8E21FB85, 0x8E22FB85, 0x8E23FB85, + 0x8E24FB85, 0x8E25FB85, 0x8E26FB85, 0x8E27FB85, 0x8E28FB85, 0x8E29FB85, 0x8E2AFB85, 0x8E2BFB85, 0x8E2CFB85, 0x8E2DFB85, 0x8E2EFB85, 0x8E2FFB85, 0x8E30FB85, 0x8E31FB85, 0x8E32FB85, + 0x8E33FB85, 0x8E34FB85, 0x8E35FB85, 0x8E36FB85, 0x8E37FB85, 0x8E38FB85, 0x8E39FB85, 0x8E3AFB85, 0x8E3BFB85, 0x8E3CFB85, 0x8E3DFB85, 0x8E3EFB85, 0x8E3FFB85, 0x8E40FB85, 0x8E41FB85, + 0x8E42FB85, 0x8E43FB85, 0x8E44FB85, 0x8E45FB85, 0x8E46FB85, 0x8E47FB85, 0x8E48FB85, 0x8E49FB85, 0x8E4AFB85, 0x8E4BFB85, 0x8E4CFB85, 0x8E4DFB85, 0x8E4EFB85, 0x8E4FFB85, 0x8E50FB85, + 0x8E51FB85, 0x8E52FB85, 0x8E53FB85, 0x8E54FB85, 0x8E55FB85, 0x8E56FB85, 0x8E57FB85, 0x8E58FB85, 0x8E59FB85, 0x8E5AFB85, 0x8E5BFB85, 0x8E5CFB85, 0x8E5DFB85, 0x8E5EFB85, 0x8E5FFB85, + 0x8E60FB85, 0x8E61FB85, 0x8E62FB85, 0x8E63FB85, 0x8E64FB85, 0x8E65FB85, 0x8E66FB85, 0x8E67FB85, 0x8E68FB85, 0x8E69FB85, 0x8E6AFB85, 0x8E6BFB85, 0x8E6CFB85, 0x8E6DFB85, 0x8E6EFB85, + 0x8E6FFB85, 0x8E70FB85, 0x8E71FB85, 0x8E72FB85, 0x8E73FB85, 0x8E74FB85, 0x8E75FB85, 0x8E76FB85, 0x8E77FB85, 0x8E78FB85, 0x8E79FB85, 0x8E7AFB85, 0x8E7BFB85, 0x8E7CFB85, 0x8E7DFB85, + 0x8E7EFB85, 0x8E7FFB85, 0x8E80FB85, 0x8E81FB85, 0x8E82FB85, 0x8E83FB85, 0x8E84FB85, 0x8E85FB85, 0x8E86FB85, 0x8E87FB85, 0x8E88FB85, 0x8E89FB85, 0x8E8AFB85, 0x8E8BFB85, 0x8E8CFB85, + 0x8E8DFB85, 0x8E8EFB85, 0x8E8FFB85, 0x8E90FB85, 0x8E91FB85, 0x8E92FB85, 0x8E93FB85, 0x8E94FB85, 0x8E95FB85, 0x8E96FB85, 0x8E97FB85, 0x8E98FB85, 0x8E99FB85, 0x8E9AFB85, 0x8E9BFB85, + 0x8E9CFB85, 0x8E9DFB85, 0x8E9EFB85, 0x8E9FFB85, 0x8EA0FB85, 0x8EA1FB85, 0x8EA2FB85, 0x8EA3FB85, 0x8EA4FB85, 0x8EA5FB85, 0x8EA6FB85, 0x8EA7FB85, 0x8EA8FB85, 0x8EA9FB85, 0x8EAAFB85, + 0x8EABFB85, 0x8EACFB85, 0x8EADFB85, 0x8EAEFB85, 0x8EAFFB85, 0x8EB0FB85, 0x8EB1FB85, 0x8EB2FB85, 0x8EB3FB85, 0x8EB4FB85, 0x8EB5FB85, 0x8EB6FB85, 0x8EB7FB85, 0x8EB8FB85, 0x8EB9FB85, + 0x8EBAFB85, 0x8EBBFB85, 0x8EBCFB85, 0x8EBDFB85, 0x8EBEFB85, 0x8EBFFB85, 0x8EC0FB85, 0x8EC1FB85, 0x8EC2FB85, 0x8EC3FB85, 0x8EC4FB85, 0x8EC5FB85, 0x8EC6FB85, 0x8EC7FB85, 0x8EC8FB85, + 0x8EC9FB85, 0x8ECAFB85, 0x8ECBFB85, 0x8ECCFB85, 0x8ECDFB85, 0x8ECEFB85, 0x8ECFFB85, 0x8ED0FB85, 0x8ED1FB85, 0x8ED2FB85, 0x8ED3FB85, 0x8ED4FB85, 0x8ED5FB85, 0x8ED6FB85, 0x8ED7FB85, + 0x8ED8FB85, 0x8ED9FB85, 0x8EDAFB85, 0x8EDBFB85, 0x8EDCFB85, 0x8EDDFB85, 0x8EDEFB85, 0x8EDFFB85, 0x8EE0FB85, 0x8EE1FB85, 0x8EE2FB85, 0x8EE3FB85, 0x8EE4FB85, 0x8EE5FB85, 0x8EE6FB85, + 0x8EE7FB85, 0x8EE8FB85, 0x8EE9FB85, 0x8EEAFB85, 0x8EEBFB85, 0x8EECFB85, 0x8EEDFB85, 0x8EEEFB85, 0x8EEFFB85, 0x8EF0FB85, 0x8EF1FB85, 0x8EF2FB85, 0x8EF3FB85, 0x8EF4FB85, 0x8EF5FB85, + 0x8EF6FB85, 0x8EF7FB85, 0x8EF8FB85, 0x8EF9FB85, 0x8EFAFB85, 0x8EFBFB85, 0x8EFCFB85, 0x8EFDFB85, 0x8EFEFB85, 0x8EFFFB85, 0x8F00FB85, 0x8F01FB85, 0x8F02FB85, 0x8F03FB85, 0x8F04FB85, + 0x8F05FB85, 0x8F06FB85, 0x8F07FB85, 0x8F08FB85, 0x8F09FB85, 0x8F0AFB85, 0x8F0BFB85, 0x8F0CFB85, 0x8F0DFB85, 0x8F0EFB85, 0x8F0FFB85, 0x8F10FB85, 0x8F11FB85, 0x8F12FB85, 0x8F13FB85, + 0x8F14FB85, 0x8F15FB85, 0x8F16FB85, 0x8F17FB85, 0x8F18FB85, 0x8F19FB85, 0x8F1AFB85, 0x8F1BFB85, 0x8F1CFB85, 0x8F1DFB85, 0x8F1EFB85, 0x8F1FFB85, 0x8F20FB85, 0x8F21FB85, 0x8F22FB85, + 0x8F23FB85, 0x8F24FB85, 0x8F25FB85, 0x8F26FB85, 0x8F27FB85, 0x8F28FB85, 0x8F29FB85, 0x8F2AFB85, 0x8F2BFB85, 0x8F2CFB85, 0x8F2DFB85, 0x8F2EFB85, 0x8F2FFB85, 0x8F30FB85, 0x8F31FB85, + 0x8F32FB85, 0x8F33FB85, 0x8F34FB85, 0x8F35FB85, 0x8F36FB85, 0x8F37FB85, 0x8F38FB85, 0x8F39FB85, 0x8F3AFB85, 0x8F3BFB85, 0x8F3CFB85, 0x8F3DFB85, 0x8F3EFB85, 0x8F3FFB85, 0x8F40FB85, + 0x8F41FB85, 0x8F42FB85, 0x8F43FB85, 0x8F44FB85, 0x8F45FB85, 0x8F46FB85, 0x8F47FB85, 0x8F48FB85, 0x8F49FB85, 0x8F4AFB85, 0x8F4BFB85, 0x8F4CFB85, 0x8F4DFB85, 0x8F4EFB85, 0x8F4FFB85, + 0x8F50FB85, 0x8F51FB85, 0x8F52FB85, 0x8F53FB85, 0x8F54FB85, 0x8F55FB85, 0x8F56FB85, 0x8F57FB85, 0x8F58FB85, 0x8F59FB85, 0x8F5AFB85, 0x8F5BFB85, 0x8F5CFB85, 0x8F5DFB85, 0x8F5EFB85, + 0x8F5FFB85, 0x8F60FB85, 0x8F61FB85, 0x8F62FB85, 0x8F63FB85, 0x8F64FB85, 0x8F65FB85, 0x8F66FB85, 0x8F67FB85, 0x8F68FB85, 0x8F69FB85, 0x8F6AFB85, 0x8F6BFB85, 0x8F6CFB85, 0x8F6DFB85, + 0x8F6EFB85, 0x8F6FFB85, 0x8F70FB85, 0x8F71FB85, 0x8F72FB85, 0x8F73FB85, 0x8F74FB85, 0x8F75FB85, 0x8F76FB85, 0x8F77FB85, 0x8F78FB85, 0x8F79FB85, 0x8F7AFB85, 0x8F7BFB85, 0x8F7CFB85, + 0x8F7DFB85, 0x8F7EFB85, 0x8F7FFB85, 0x8F80FB85, 0x8F81FB85, 0x8F82FB85, 0x8F83FB85, 0x8F84FB85, 0x8F85FB85, 0x8F86FB85, 0x8F87FB85, 0x8F88FB85, 0x8F89FB85, 0x8F8AFB85, 0x8F8BFB85, + 0x8F8CFB85, 0x8F8DFB85, 0x8F8EFB85, 0x8F8FFB85, 0x8F90FB85, 0x8F91FB85, 0x8F92FB85, 0x8F93FB85, 0x8F94FB85, 0x8F95FB85, 0x8F96FB85, 0x8F97FB85, 0x8F98FB85, 0x8F99FB85, 0x8F9AFB85, + 0x8F9BFB85, 0x8F9CFB85, 0x8F9DFB85, 0x8F9EFB85, 0x8F9FFB85, 0x8FA0FB85, 0x8FA1FB85, 0x8FA2FB85, 0x8FA3FB85, 0x8FA4FB85, 0x8FA5FB85, 0x8FA6FB85, 0x8FA7FB85, 0x8FA8FB85, 0x8FA9FB85, + 0x8FAAFB85, 0x8FABFB85, 0x8FACFB85, 0x8FADFB85, 0x8FAEFB85, 0x8FAFFB85, 0x8FB0FB85, 0x8FB1FB85, 0x8FB2FB85, 0x8FB3FB85, 0x8FB4FB85, 0x8FB5FB85, 0x8FB6FB85, 0x8FB7FB85, 0x8FB8FB85, + 0x8FB9FB85, 0x8FBAFB85, 0x8FBBFB85, 0x8FBCFB85, 0x8FBDFB85, 0x8FBEFB85, 0x8FBFFB85, 0x8FC0FB85, 0x8FC1FB85, 0x8FC2FB85, 0x8FC3FB85, 0x8FC4FB85, 0x8FC5FB85, 0x8FC6FB85, 0x8FC7FB85, + 0x8FC8FB85, 0x8FC9FB85, 0x8FCAFB85, 0x8FCBFB85, 0x8FCCFB85, 0x8FCDFB85, 0x8FCEFB85, 0x8FCFFB85, 0x8FD0FB85, 0x8FD1FB85, 0x8FD2FB85, 0x8FD3FB85, 0x8FD4FB85, 0x8FD5FB85, 0x8FD6FB85, + 0x8FD7FB85, 0x8FD8FB85, 0x8FD9FB85, 0x8FDAFB85, 0x8FDBFB85, 0x8FDCFB85, 0x8FDDFB85, 0x8FDEFB85, 0x8FDFFB85, 0x8FE0FB85, 0x8FE1FB85, 0x8FE2FB85, 0x8FE3FB85, 0x8FE4FB85, 0x8FE5FB85, + 0x8FE6FB85, 0x8FE7FB85, 0x8FE8FB85, 0x8FE9FB85, 0x8FEAFB85, 0x8FEBFB85, 0x8FECFB85, 0x8FEDFB85, 0x8FEEFB85, 0x8FEFFB85, 0x8FF0FB85, 0x8FF1FB85, 0x8FF2FB85, 0x8FF3FB85, 0x8FF4FB85, + 0x8FF5FB85, 0x8FF6FB85, 0x8FF7FB85, 0x8FF8FB85, 0x8FF9FB85, 0x8FFAFB85, 0x8FFBFB85, 0x8FFCFB85, 0x8FFDFB85, 0x8FFEFB85, 0x8FFFFB85, 0x9000FB85, 0x9001FB85, 0x9002FB85, 0x9003FB85, + 0x9004FB85, 0x9005FB85, 0x9006FB85, 0x9007FB85, 0x9008FB85, 0x9009FB85, 0x900AFB85, 0x900BFB85, 0x900CFB85, 0x900DFB85, 0x900EFB85, 0x900FFB85, 0x9010FB85, 0x9011FB85, 0x9012FB85, + 0x9013FB85, 0x9014FB85, 0x9015FB85, 0x9016FB85, 0x9017FB85, 0x9018FB85, 0x9019FB85, 0x901AFB85, 0x901BFB85, 0x901CFB85, 0x901DFB85, 0x901EFB85, 0x901FFB85, 0x9020FB85, 0x9021FB85, + 0x9022FB85, 0x9023FB85, 0x9024FB85, 0x9025FB85, 0x9026FB85, 0x9027FB85, 0x9028FB85, 0x9029FB85, 0x902AFB85, 0x902BFB85, 0x902CFB85, 0x902DFB85, 0x902EFB85, 0x902FFB85, 0x9030FB85, + 0x9031FB85, 0x9032FB85, 0x9033FB85, 0x9034FB85, 0x9035FB85, 0x9036FB85, 0x9037FB85, 0x9038FB85, 0x9039FB85, 0x903AFB85, 0x903BFB85, 0x903CFB85, 0x903DFB85, 0x903EFB85, 0x903FFB85, + 0x9040FB85, 0x9041FB85, 0x9042FB85, 0x9043FB85, 0x9044FB85, 0x9045FB85, 0x9046FB85, 0x9047FB85, 0x9048FB85, 0x9049FB85, 0x904AFB85, 0x904BFB85, 0x904CFB85, 0x904DFB85, 0x904EFB85, + 0x904FFB85, 0x9050FB85, 0x9051FB85, 0x9052FB85, 0x9053FB85, 0x9054FB85, 0x9055FB85, 0x9056FB85, 0x9057FB85, 0x9058FB85, 0x9059FB85, 0x905AFB85, 0x905BFB85, 0x905CFB85, 0x905DFB85, + 0x905EFB85, 0x905FFB85, 0x9060FB85, 0x9061FB85, 0x9062FB85, 0x9063FB85, 0x9064FB85, 0x9065FB85, 0x9066FB85, 0x9067FB85, 0x9068FB85, 0x9069FB85, 0x906AFB85, 0x906BFB85, 0x906CFB85, + 0x906DFB85, 0x906EFB85, 0x906FFB85, 0x9070FB85, 0x9071FB85, 0x9072FB85, 0x9073FB85, 0x9074FB85, 0x9075FB85, 0x9076FB85, 0x9077FB85, 0x9078FB85, 0x9079FB85, 0x907AFB85, 0x907BFB85, + 0x907CFB85, 0x907DFB85, 0x907EFB85, 0x907FFB85, 0x9080FB85, 0x9081FB85, 0x9082FB85, 0x9083FB85, 0x9084FB85, 0x9085FB85, 0x9086FB85, 0x9087FB85, 0x9088FB85, 0x9089FB85, 0x908AFB85, + 0x908BFB85, 0x908CFB85, 0x908DFB85, 0x908EFB85, 0x908FFB85, 0x9090FB85, 0x9091FB85, 0x9092FB85, 0x9093FB85, 0x9094FB85, 0x9095FB85, 0x9096FB85, 0x9097FB85, 0x9098FB85, 0x9099FB85, + 0x909AFB85, 0x909BFB85, 0x909CFB85, 0x909DFB85, 0x909EFB85, 0x909FFB85, 0x90A0FB85, 0x90A1FB85, 0x90A2FB85, 0x90A3FB85, 0x90A4FB85, 0x90A5FB85, 0x90A6FB85, 0x90A7FB85, 0x90A8FB85, + 0x90A9FB85, 0x90AAFB85, 0x90ABFB85, 0x90ACFB85, 0x90ADFB85, 0x90AEFB85, 0x90AFFB85, 0x90B0FB85, 0x90B1FB85, 0x90B2FB85, 0x90B3FB85, 0x90B4FB85, 0x90B5FB85, 0x90B6FB85, 0x90B7FB85, + 0x90B8FB85, 0x90B9FB85, 0x90BAFB85, 0x90BBFB85, 0x90BCFB85, 0x90BDFB85, 0x90BEFB85, 0x90BFFB85, 0x90C0FB85, 0x90C1FB85, 0x90C2FB85, 0x90C3FB85, 0x90C4FB85, 0x90C5FB85, 0x90C6FB85, + 0x90C7FB85, 0x90C8FB85, 0x90C9FB85, 0x90CAFB85, 0x90CBFB85, 0x90CCFB85, 0x90CDFB85, 0x90CEFB85, 0x90CFFB85, 0x90D0FB85, 0x90D1FB85, 0x90D2FB85, 0x90D3FB85, 0x90D4FB85, 0x90D5FB85, + 0x90D6FB85, 0x90D7FB85, 0x90D8FB85, 0x90D9FB85, 0x90DAFB85, 0x90DBFB85, 0x90DCFB85, 0x90DDFB85, 0x90DEFB85, 0x90DFFB85, 0x90E0FB85, 0x90E1FB85, 0x90E2FB85, 0x90E3FB85, 0x90E4FB85, + 0x90E5FB85, 0x90E6FB85, 0x90E7FB85, 0x90E8FB85, 0x90E9FB85, 0x90EAFB85, 0x90EBFB85, 0x90ECFB85, 0x90EDFB85, 0x90EEFB85, 0x90EFFB85, 0x90F0FB85, 0x90F1FB85, 0x90F2FB85, 0x90F3FB85, + 0x90F4FB85, 0x90F5FB85, 0x90F6FB85, 0x90F7FB85, 0x90F8FB85, 0x90F9FB85, 0x90FAFB85, 0x90FBFB85, 0x90FCFB85, 0x90FDFB85, 0x90FEFB85, 0x90FFFB85, 0x9100FB85, 0x9101FB85, 0x9102FB85, + 0x9103FB85, 0x9104FB85, 0x9105FB85, 0x9106FB85, 0x9107FB85, 0x9108FB85, 0x9109FB85, 0x910AFB85, 0x910BFB85, 0x910CFB85, 0x910DFB85, 0x910EFB85, 0x910FFB85, 0x9110FB85, 0x9111FB85, + 0x9112FB85, 0x9113FB85, 0x9114FB85, 0x9115FB85, 0x9116FB85, 0x9117FB85, 0x9118FB85, 0x9119FB85, 0x911AFB85, 0x911BFB85, 0x911CFB85, 0x911DFB85, 0x911EFB85, 0x911FFB85, 0x9120FB85, + 0x9121FB85, 0x9122FB85, 0x9123FB85, 0x9124FB85, 0x9125FB85, 0x9126FB85, 0x9127FB85, 0x9128FB85, 0x9129FB85, 0x912AFB85, 0x912BFB85, 0x912CFB85, 0x912DFB85, 0x912EFB85, 0x912FFB85, + 0x9130FB85, 0x9131FB85, 0x9132FB85, 0x9133FB85, 0x9134FB85, 0x9135FB85, 0x9136FB85, 0x9137FB85, 0x9138FB85, 0x9139FB85, 0x913AFB85, 0x913BFB85, 0x913CFB85, 0x913DFB85, 0x913EFB85, + 0x913FFB85, 0x9140FB85, 0x9141FB85, 0x9142FB85, 0x9143FB85, 0x9144FB85, 0x9145FB85, 0x9146FB85, 0x9147FB85, 0x9148FB85, 0x9149FB85, 0x914AFB85, 0x914BFB85, 0x914CFB85, 0x914DFB85, + 0x914EFB85, 0x914FFB85, 0x9150FB85, 0x9151FB85, 0x9152FB85, 0x9153FB85, 0x9154FB85, 0x9155FB85, 0x9156FB85, 0x9157FB85, 0x9158FB85, 0x9159FB85, 0x915AFB85, 0x915BFB85, 0x915CFB85, + 0x915DFB85, 0x915EFB85, 0x915FFB85, 0x9160FB85, 0x9161FB85, 0x9162FB85, 0x9163FB85, 0x9164FB85, 0x9165FB85, 0x9166FB85, 0x9167FB85, 0x9168FB85, 0x9169FB85, 0x916AFB85, 0x916BFB85, + 0x916CFB85, 0x916DFB85, 0x916EFB85, 0x916FFB85, 0x9170FB85, 0x9171FB85, 0x9172FB85, 0x9173FB85, 0x9174FB85, 0x9175FB85, 0x9176FB85, 0x9177FB85, 0x9178FB85, 0x9179FB85, 0x917AFB85, + 0x917BFB85, 0x917CFB85, 0x917DFB85, 0x917EFB85, 0x917FFB85, 0x9180FB85, 0x9181FB85, 0x9182FB85, 0x9183FB85, 0x9184FB85, 0x9185FB85, 0x9186FB85, 0x9187FB85, 0x9188FB85, 0x9189FB85, + 0x918AFB85, 0x918BFB85, 0x918CFB85, 0x918DFB85, 0x918EFB85, 0x918FFB85, 0x9190FB85, 0x9191FB85, 0x9192FB85, 0x9193FB85, 0x9194FB85, 0x9195FB85, 0x9196FB85, 0x9197FB85, 0x9198FB85, + 0x9199FB85, 0x919AFB85, 0x919BFB85, 0x919CFB85, 0x919DFB85, 0x919EFB85, 0x919FFB85, 0x91A0FB85, 0x91A1FB85, 0x91A2FB85, 0x91A3FB85, 0x91A4FB85, 0x91A5FB85, 0x91A6FB85, 0x91A7FB85, + 0x91A8FB85, 0x91A9FB85, 0x91AAFB85, 0x91ABFB85, 0x91ACFB85, 0x91ADFB85, 0x91AEFB85, 0x91AFFB85, 0x91B0FB85, 0x91B1FB85, 0x91B2FB85, 0x91B3FB85, 0x91B4FB85, 0x91B5FB85, 0x91B6FB85, + 0x91B7FB85, 0x91B8FB85, 0x91B9FB85, 0x91BAFB85, 0x91BBFB85, 0x91BCFB85, 0x91BDFB85, 0x91BEFB85, 0x91BFFB85, 0x91C0FB85, 0x91C1FB85, 0x91C2FB85, 0x91C3FB85, 0x91C4FB85, 0x91C5FB85, + 0x91C6FB85, 0x91C7FB85, 0x91C8FB85, 0x91C9FB85, 0x91CAFB85, 0x91CBFB85, 0x91CCFB85, 0x91CDFB85, 0x91CEFB85, 0x91CFFB85, 0x91D0FB85, 0x91D1FB85, 0x91D2FB85, 0x91D3FB85, 0x91D4FB85, + 0x91D5FB85, 0x91D6FB85, 0x91D7FB85, 0x91D8FB85, 0x91D9FB85, 0x91DAFB85, 0x91DBFB85, 0x91DCFB85, 0x91DDFB85, 0x91DEFB85, 0x91DFFB85, 0x91E0FB85, 0x91E1FB85, 0x91E2FB85, 0x91E3FB85, + 0x91E4FB85, 0x91E5FB85, 0x91E6FB85, 0x91E7FB85, 0x91E8FB85, 0x91E9FB85, 0x91EAFB85, 0x91EBFB85, 0x91ECFB85, 0x91EDFB85, 0x91EEFB85, 0x91EFFB85, 0x91F0FB85, 0x91F1FB85, 0x91F2FB85, + 0x91F3FB85, 0x91F4FB85, 0x91F5FB85, 0x91F6FB85, 0x91F7FB85, 0x91F8FB85, 0x91F9FB85, 0x91FAFB85, 0x91FBFB85, 0x91FCFB85, 0x91FDFB85, 0x91FEFB85, 0x91FFFB85, 0x9200FB85, 0x9201FB85, + 0x9202FB85, 0x9203FB85, 0x9204FB85, 0x9205FB85, 0x9206FB85, 0x9207FB85, 0x9208FB85, 0x9209FB85, 0x920AFB85, 0x920BFB85, 0x920CFB85, 0x920DFB85, 0x920EFB85, 0x920FFB85, 0x9210FB85, + 0x9211FB85, 0x9212FB85, 0x9213FB85, 0x9214FB85, 0x9215FB85, 0x9216FB85, 0x9217FB85, 0x9218FB85, 0x9219FB85, 0x921AFB85, 0x921BFB85, 0x921CFB85, 0x921DFB85, 0x921EFB85, 0x921FFB85, + 0x9220FB85, 0x9221FB85, 0x9222FB85, 0x9223FB85, 0x9224FB85, 0x9225FB85, 0x9226FB85, 0x9227FB85, 0x9228FB85, 0x9229FB85, 0x922AFB85, 0x922BFB85, 0x922CFB85, 0x922DFB85, 0x922EFB85, + 0x922FFB85, 0x9230FB85, 0x9231FB85, 0x9232FB85, 0x9233FB85, 0x9234FB85, 0x9235FB85, 0x9236FB85, 0x9237FB85, 0x9238FB85, 0x9239FB85, 0x923AFB85, 0x923BFB85, 0x923CFB85, 0x923DFB85, + 0x923EFB85, 0x923FFB85, 0x9240FB85, 0x9241FB85, 0x9242FB85, 0x9243FB85, 0x9244FB85, 0x9245FB85, 0x9246FB85, 0x9247FB85, 0x9248FB85, 0x9249FB85, 0x924AFB85, 0x924BFB85, 0x924CFB85, + 0x924DFB85, 0x924EFB85, 0x924FFB85, 0x9250FB85, 0x9251FB85, 0x9252FB85, 0x9253FB85, 0x9254FB85, 0x9255FB85, 0x9256FB85, 0x9257FB85, 0x9258FB85, 0x9259FB85, 0x925AFB85, 0x925BFB85, + 0x925CFB85, 0x925DFB85, 0x925EFB85, 0x925FFB85, 0x9260FB85, 0x9261FB85, 0x9262FB85, 0x9263FB85, 0x9264FB85, 0x9265FB85, 0x9266FB85, 0x9267FB85, 0x9268FB85, 0x9269FB85, 0x926AFB85, + 0x926BFB85, 0x926CFB85, 0x926DFB85, 0x926EFB85, 0x926FFB85, 0x9270FB85, 0x9271FB85, 0x9272FB85, 0x9273FB85, 0x9274FB85, 0x9275FB85, 0x9276FB85, 0x9277FB85, 0x9278FB85, 0x9279FB85, + 0x927AFB85, 0x927BFB85, 0x927CFB85, 0x927DFB85, 0x927EFB85, 0x927FFB85, 0x9280FB85, 0x9281FB85, 0x9282FB85, 0x9283FB85, 0x9284FB85, 0x9285FB85, 0x9286FB85, 0x9287FB85, 0x9288FB85, + 0x9289FB85, 0x928AFB85, 0x928BFB85, 0x928CFB85, 0x928DFB85, 0x928EFB85, 0x928FFB85, 0x9290FB85, 0x9291FB85, 0x9292FB85, 0x9293FB85, 0x9294FB85, 0x9295FB85, 0x9296FB85, 0x9297FB85, + 0x9298FB85, 0x9299FB85, 0x929AFB85, 0x929BFB85, 0x929CFB85, 0x929DFB85, 0x929EFB85, 0x929FFB85, 0x92A0FB85, 0x92A1FB85, 0x92A2FB85, 0x92A3FB85, 0x92A4FB85, 0x92A5FB85, 0x92A6FB85, + 0x92A7FB85, 0x92A8FB85, 0x92A9FB85, 0x92AAFB85, 0x92ABFB85, 0x92ACFB85, 0x92ADFB85, 0x92AEFB85, 0x92AFFB85, 0x92B0FB85, 0x92B1FB85, 0x92B2FB85, 0x92B3FB85, 0x92B4FB85, 0x92B5FB85, + 0x92B6FB85, 0x92B7FB85, 0x92B8FB85, 0x92B9FB85, 0x92BAFB85, 0x92BBFB85, 0x92BCFB85, 0x92BDFB85, 0x92BEFB85, 0x92BFFB85, 0x92C0FB85, 0x92C1FB85, 0x92C2FB85, 0x92C3FB85, 0x92C4FB85, + 0x92C5FB85, 0x92C6FB85, 0x92C7FB85, 0x92C8FB85, 0x92C9FB85, 0x92CAFB85, 0x92CBFB85, 0x92CCFB85, 0x92CDFB85, 0x92CEFB85, 0x92CFFB85, 0x92D0FB85, 0x92D1FB85, 0x92D2FB85, 0x92D3FB85, + 0x92D4FB85, 0x92D5FB85, 0x92D6FB85, 0x92D7FB85, 0x92D8FB85, 0x92D9FB85, 0x92DAFB85, 0x92DBFB85, 0x92DCFB85, 0x92DDFB85, 0x92DEFB85, 0x92DFFB85, 0x92E0FB85, 0x92E1FB85, 0x92E2FB85, + 0x92E3FB85, 0x92E4FB85, 0x92E5FB85, 0x92E6FB85, 0x92E7FB85, 0x92E8FB85, 0x92E9FB85, 0x92EAFB85, 0x92EBFB85, 0x92ECFB85, 0x92EDFB85, 0x92EEFB85, 0x92EFFB85, 0x92F0FB85, 0x92F1FB85, + 0x92F2FB85, 0x92F3FB85, 0x92F4FB85, 0x92F5FB85, 0x92F6FB85, 0x92F7FB85, 0x92F8FB85, 0x92F9FB85, 0x92FAFB85, 0x92FBFB85, 0x92FCFB85, 0x92FDFB85, 0x92FEFB85, 0x92FFFB85, 0x9300FB85, + 0x9301FB85, 0x9302FB85, 0x9303FB85, 0x9304FB85, 0x9305FB85, 0x9306FB85, 0x9307FB85, 0x9308FB85, 0x9309FB85, 0x930AFB85, 0x930BFB85, 0x930CFB85, 0x930DFB85, 0x930EFB85, 0x930FFB85, + 0x9310FB85, 0x9311FB85, 0x9312FB85, 0x9313FB85, 0x9314FB85, 0x9315FB85, 0x9316FB85, 0x9317FB85, 0x9318FB85, 0x9319FB85, 0x931AFB85, 0x931BFB85, 0x931CFB85, 0x931DFB85, 0x931EFB85, + 0x931FFB85, 0x9320FB85, 0x9321FB85, 0x9322FB85, 0x9323FB85, 0x9324FB85, 0x9325FB85, 0x9326FB85, 0x9327FB85, 0x9328FB85, 0x9329FB85, 0x932AFB85, 0x932BFB85, 0x932CFB85, 0x932DFB85, + 0x932EFB85, 0x932FFB85, 0x9330FB85, 0x9331FB85, 0x9332FB85, 0x9333FB85, 0x9334FB85, 0x9335FB85, 0x9336FB85, 0x9337FB85, 0x9338FB85, 0x9339FB85, 0x933AFB85, 0x933BFB85, 0x933CFB85, + 0x933DFB85, 0x933EFB85, 0x933FFB85, 0x9340FB85, 0x9341FB85, 0x9342FB85, 0x9343FB85, 0x9344FB85, 0x9345FB85, 0x9346FB85, 0x9347FB85, 0x9348FB85, 0x9349FB85, 0x934AFB85, 0x934BFB85, + 0x934CFB85, 0x934DFB85, 0x934EFB85, 0x934FFB85, 0x9350FB85, 0x9351FB85, 0x9352FB85, 0x9353FB85, 0x9354FB85, 0x9355FB85, 0x9356FB85, 0x9357FB85, 0x9358FB85, 0x9359FB85, 0x935AFB85, + 0x935BFB85, 0x935CFB85, 0x935DFB85, 0x935EFB85, 0x935FFB85, 0x9360FB85, 0x9361FB85, 0x9362FB85, 0x9363FB85, 0x9364FB85, 0x9365FB85, 0x9366FB85, 0x9367FB85, 0x9368FB85, 0x9369FB85, + 0x936AFB85, 0x936BFB85, 0x936CFB85, 0x936DFB85, 0x936EFB85, 0x936FFB85, 0x9370FB85, 0x9371FB85, 0x9372FB85, 0x9373FB85, 0x9374FB85, 0x9375FB85, 0x9376FB85, 0x9377FB85, 0x9378FB85, + 0x9379FB85, 0x937AFB85, 0x937BFB85, 0x937CFB85, 0x937DFB85, 0x937EFB85, 0x937FFB85, 0x9380FB85, 0x9381FB85, 0x9382FB85, 0x9383FB85, 0x9384FB85, 0x9385FB85, 0x9386FB85, 0x9387FB85, + 0x9388FB85, 0x9389FB85, 0x938AFB85, 0x938BFB85, 0x938CFB85, 0x938DFB85, 0x938EFB85, 0x938FFB85, 0x9390FB85, 0x9391FB85, 0x9392FB85, 0x9393FB85, 0x9394FB85, 0x9395FB85, 0x9396FB85, + 0x9397FB85, 0x9398FB85, 0x9399FB85, 0x939AFB85, 0x939BFB85, 0x939CFB85, 0x939DFB85, 0x939EFB85, 0x939FFB85, 0x93A0FB85, 0x93A1FB85, 0x93A2FB85, 0x93A3FB85, 0x93A4FB85, 0x93A5FB85, + 0x93A6FB85, 0x93A7FB85, 0x93A8FB85, 0x93A9FB85, 0x93AAFB85, 0x93ABFB85, 0x93ACFB85, 0x93ADFB85, 0x93AEFB85, 0x93AFFB85, 0x93B0FB85, 0x93B1FB85, 0x93B2FB85, 0x93B3FB85, 0x93B4FB85, + 0x93B5FB85, 0x93B6FB85, 0x93B7FB85, 0x93B8FB85, 0x93B9FB85, 0x93BAFB85, 0x93BBFB85, 0x93BCFB85, 0x93BDFB85, 0x93BEFB85, 0x93BFFB85, 0x93C0FB85, 0x93C1FB85, 0x93C2FB85, 0x93C3FB85, + 0x93C4FB85, 0x93C5FB85, 0x93C6FB85, 0x93C7FB85, 0x93C8FB85, 0x93C9FB85, 0x93CAFB85, 0x93CBFB85, 0x93CCFB85, 0x93CDFB85, 0x93CEFB85, 0x93CFFB85, 0x93D0FB85, 0x93D1FB85, 0x93D2FB85, + 0x93D3FB85, 0x93D4FB85, 0x93D5FB85, 0x93D6FB85, 0x93D7FB85, 0x93D8FB85, 0x93D9FB85, 0x93DAFB85, 0x93DBFB85, 0x93DCFB85, 0x93DDFB85, 0x93DEFB85, 0x93DFFB85, 0x93E0FB85, 0x93E1FB85, + 0x93E2FB85, 0x93E3FB85, 0x93E4FB85, 0x93E5FB85, 0x93E6FB85, 0x93E7FB85, 0x93E8FB85, 0x93E9FB85, 0x93EAFB85, 0x93EBFB85, 0x93ECFB85, 0x93EDFB85, 0x93EEFB85, 0x93EFFB85, 0x93F0FB85, + 0x93F1FB85, 0x93F2FB85, 0x93F3FB85, 0x93F4FB85, 0x93F5FB85, 0x93F6FB85, 0x93F7FB85, 0x93F8FB85, 0x93F9FB85, 0x93FAFB85, 0x93FBFB85, 0x93FCFB85, 0x93FDFB85, 0x93FEFB85, 0x93FFFB85, + 0x9400FB85, 0x9401FB85, 0x9402FB85, 0x9403FB85, 0x9404FB85, 0x9405FB85, 0x9406FB85, 0x9407FB85, 0x9408FB85, 0x9409FB85, 0x940AFB85, 0x940BFB85, 0x940CFB85, 0x940DFB85, 0x940EFB85, + 0x940FFB85, 0x9410FB85, 0x9411FB85, 0x9412FB85, 0x9413FB85, 0x9414FB85, 0x9415FB85, 0x9416FB85, 0x9417FB85, 0x9418FB85, 0x9419FB85, 0x941AFB85, 0x941BFB85, 0x941CFB85, 0x941DFB85, + 0x941EFB85, 0x941FFB85, 0x9420FB85, 0x9421FB85, 0x9422FB85, 0x9423FB85, 0x9424FB85, 0x9425FB85, 0x9426FB85, 0x9427FB85, 0x9428FB85, 0x9429FB85, 0x942AFB85, 0x942BFB85, 0x942CFB85, + 0x942DFB85, 0x942EFB85, 0x942FFB85, 0x9430FB85, 0x9431FB85, 0x9432FB85, 0x9433FB85, 0x9434FB85, 0x9435FB85, 0x9436FB85, 0x9437FB85, 0x9438FB85, 0x9439FB85, 0x943AFB85, 0x943BFB85, + 0x943CFB85, 0x943DFB85, 0x943EFB85, 0x943FFB85, 0x9440FB85, 0x9441FB85, 0x9442FB85, 0x9443FB85, 0x9444FB85, 0x9445FB85, 0x9446FB85, 0x9447FB85, 0x9448FB85, 0x9449FB85, 0x944AFB85, + 0x944BFB85, 0x944CFB85, 0x944DFB85, 0x944EFB85, 0x944FFB85, 0x9450FB85, 0x9451FB85, 0x9452FB85, 0x9453FB85, 0x9454FB85, 0x9455FB85, 0x9456FB85, 0x9457FB85, 0x9458FB85, 0x9459FB85, + 0x945AFB85, 0x945BFB85, 0x945CFB85, 0x945DFB85, 0x945EFB85, 0x945FFB85, 0x9460FB85, 0x9461FB85, 0x9462FB85, 0x9463FB85, 0x9464FB85, 0x9465FB85, 0x9466FB85, 0x9467FB85, 0x9468FB85, + 0x9469FB85, 0x946AFB85, 0x946BFB85, 0x946CFB85, 0x946DFB85, 0x946EFB85, 0x946FFB85, 0x9470FB85, 0x9471FB85, 0x9472FB85, 0x9473FB85, 0x9474FB85, 0x9475FB85, 0x9476FB85, 0x9477FB85, + 0x9478FB85, 0x9479FB85, 0x947AFB85, 0x947BFB85, 0x947CFB85, 0x947DFB85, 0x947EFB85, 0x947FFB85, 0x9480FB85, 0x9481FB85, 0x9482FB85, 0x9483FB85, 0x9484FB85, 0x9485FB85, 0x9486FB85, + 0x9487FB85, 0x9488FB85, 0x9489FB85, 0x948AFB85, 0x948BFB85, 0x948CFB85, 0x948DFB85, 0x948EFB85, 0x948FFB85, 0x9490FB85, 0x9491FB85, 0x9492FB85, 0x9493FB85, 0x9494FB85, 0x9495FB85, + 0x9496FB85, 0x9497FB85, 0x9498FB85, 0x9499FB85, 0x949AFB85, 0x949BFB85, 0x949CFB85, 0x949DFB85, 0x949EFB85, 0x949FFB85, 0x94A0FB85, 0x94A1FB85, 0x94A2FB85, 0x94A3FB85, 0x94A4FB85, + 0x94A5FB85, 0x94A6FB85, 0x94A7FB85, 0x94A8FB85, 0x94A9FB85, 0x94AAFB85, 0x94ABFB85, 0x94ACFB85, 0x94ADFB85, 0x94AEFB85, 0x94AFFB85, 0x94B0FB85, 0x94B1FB85, 0x94B2FB85, 0x94B3FB85, + 0x94B4FB85, 0x94B5FB85, 0x94B6FB85, 0x94B7FB85, 0x94B8FB85, 0x94B9FB85, 0x94BAFB85, 0x94BBFB85, 0x94BCFB85, 0x94BDFB85, 0x94BEFB85, 0x94BFFB85, 0x94C0FB85, 0x94C1FB85, 0x94C2FB85, + 0x94C3FB85, 0x94C4FB85, 0x94C5FB85, 0x94C6FB85, 0x94C7FB85, 0x94C8FB85, 0x94C9FB85, 0x94CAFB85, 0x94CBFB85, 0x94CCFB85, 0x94CDFB85, 0x94CEFB85, 0x94CFFB85, 0x94D0FB85, 0x94D1FB85, + 0x94D2FB85, 0x94D3FB85, 0x94D4FB85, 0x94D5FB85, 0x94D6FB85, 0x94D7FB85, 0x94D8FB85, 0x94D9FB85, 0x94DAFB85, 0x94DBFB85, 0x94DCFB85, 0x94DDFB85, 0x94DEFB85, 0x94DFFB85, 0x94E0FB85, + 0x94E1FB85, 0x94E2FB85, 0x94E3FB85, 0x94E4FB85, 0x94E5FB85, 0x94E6FB85, 0x94E7FB85, 0x94E8FB85, 0x94E9FB85, 0x94EAFB85, 0x94EBFB85, 0x94ECFB85, 0x94EDFB85, 0x94EEFB85, 0x94EFFB85, + 0x94F0FB85, 0x94F1FB85, 0x94F2FB85, 0x94F3FB85, 0x94F4FB85, 0x94F5FB85, 0x94F6FB85, 0x94F7FB85, 0x94F8FB85, 0x94F9FB85, 0x94FAFB85, 0x94FBFB85, 0x94FCFB85, 0x94FDFB85, 0x94FEFB85, + 0x94FFFB85, 0x9500FB85, 0x9501FB85, 0x9502FB85, 0x9503FB85, 0x9504FB85, 0x9505FB85, 0x9506FB85, 0x9507FB85, 0x9508FB85, 0x9509FB85, 0x950AFB85, 0x950BFB85, 0x950CFB85, 0x950DFB85, + 0x950EFB85, 0x950FFB85, 0x9510FB85, 0x9511FB85, 0x9512FB85, 0x9513FB85, 0x9514FB85, 0x9515FB85, 0x9516FB85, 0x9517FB85, 0x9518FB85, 0x9519FB85, 0x951AFB85, 0x951BFB85, 0x951CFB85, + 0x951DFB85, 0x951EFB85, 0x951FFB85, 0x9520FB85, 0x9521FB85, 0x9522FB85, 0x9523FB85, 0x9524FB85, 0x9525FB85, 0x9526FB85, 0x9527FB85, 0x9528FB85, 0x9529FB85, 0x952AFB85, 0x952BFB85, + 0x952CFB85, 0x952DFB85, 0x952EFB85, 0x952FFB85, 0x9530FB85, 0x9531FB85, 0x9532FB85, 0x9533FB85, 0x9534FB85, 0x9535FB85, 0x9536FB85, 0x9537FB85, 0x9538FB85, 0x9539FB85, 0x953AFB85, + 0x953BFB85, 0x953CFB85, 0x953DFB85, 0x953EFB85, 0x953FFB85, 0x9540FB85, 0x9541FB85, 0x9542FB85, 0x9543FB85, 0x9544FB85, 0x9545FB85, 0x9546FB85, 0x9547FB85, 0x9548FB85, 0x9549FB85, + 0x954AFB85, 0x954BFB85, 0x954CFB85, 0x954DFB85, 0x954EFB85, 0x954FFB85, 0x9550FB85, 0x9551FB85, 0x9552FB85, 0x9553FB85, 0x9554FB85, 0x9555FB85, 0x9556FB85, 0x9557FB85, 0x9558FB85, + 0x9559FB85, 0x955AFB85, 0x955BFB85, 0x955CFB85, 0x955DFB85, 0x955EFB85, 0x955FFB85, 0x9560FB85, 0x9561FB85, 0x9562FB85, 0x9563FB85, 0x9564FB85, 0x9565FB85, 0x9566FB85, 0x9567FB85, + 0x9568FB85, 0x9569FB85, 0x956AFB85, 0x956BFB85, 0x956CFB85, 0x956DFB85, 0x956EFB85, 0x956FFB85, 0x9570FB85, 0x9571FB85, 0x9572FB85, 0x9573FB85, 0x9574FB85, 0x9575FB85, 0x9576FB85, + 0x9577FB85, 0x9578FB85, 0x9579FB85, 0x957AFB85, 0x957BFB85, 0x957CFB85, 0x957DFB85, 0x957EFB85, 0x957FFB85, 0x9580FB85, 0x9581FB85, 0x9582FB85, 0x9583FB85, 0x9584FB85, 0x9585FB85, + 0x9586FB85, 0x9587FB85, 0x9588FB85, 0x9589FB85, 0x958AFB85, 0x958BFB85, 0x958CFB85, 0x958DFB85, 0x958EFB85, 0x958FFB85, 0x9590FB85, 0x9591FB85, 0x9592FB85, 0x9593FB85, 0x9594FB85, + 0x9595FB85, 0x9596FB85, 0x9597FB85, 0x9598FB85, 0x9599FB85, 0x959AFB85, 0x959BFB85, 0x959CFB85, 0x959DFB85, 0x959EFB85, 0x959FFB85, 0x95A0FB85, 0x95A1FB85, 0x95A2FB85, 0x95A3FB85, + 0x95A4FB85, 0x95A5FB85, 0x95A6FB85, 0x95A7FB85, 0x95A8FB85, 0x95A9FB85, 0x95AAFB85, 0x95ABFB85, 0x95ACFB85, 0x95ADFB85, 0x95AEFB85, 0x95AFFB85, 0x95B0FB85, 0x95B1FB85, 0x95B2FB85, + 0x95B3FB85, 0x95B4FB85, 0x95B5FB85, 0x95B6FB85, 0x95B7FB85, 0x95B8FB85, 0x95B9FB85, 0x95BAFB85, 0x95BBFB85, 0x95BCFB85, 0x95BDFB85, 0x95BEFB85, 0x95BFFB85, 0x95C0FB85, 0x95C1FB85, + 0x95C2FB85, 0x95C3FB85, 0x95C4FB85, 0x95C5FB85, 0x95C6FB85, 0x95C7FB85, 0x95C8FB85, 0x95C9FB85, 0x95CAFB85, 0x95CBFB85, 0x95CCFB85, 0x95CDFB85, 0x95CEFB85, 0x95CFFB85, 0x95D0FB85, + 0x95D1FB85, 0x95D2FB85, 0x95D3FB85, 0x95D4FB85, 0x95D5FB85, 0x95D6FB85, 0x95D7FB85, 0x95D8FB85, 0x95D9FB85, 0x95DAFB85, 0x95DBFB85, 0x95DCFB85, 0x95DDFB85, 0x95DEFB85, 0x95DFFB85, + 0x95E0FB85, 0x95E1FB85, 0x95E2FB85, 0x95E3FB85, 0x95E4FB85, 0x95E5FB85, 0x95E6FB85, 0x95E7FB85, 0x95E8FB85, 0x95E9FB85, 0x95EAFB85, 0x95EBFB85, 0x95ECFB85, 0x95EDFB85, 0x95EEFB85, + 0x95EFFB85, 0x95F0FB85, 0x95F1FB85, 0x95F2FB85, 0x95F3FB85, 0x95F4FB85, 0x95F5FB85, 0x95F6FB85, 0x95F7FB85, 0x95F8FB85, 0x95F9FB85, 0x95FAFB85, 0x95FBFB85, 0x95FCFB85, 0x95FDFB85, + 0x95FEFB85, 0x95FFFB85, 0x9600FB85, 0x9601FB85, 0x9602FB85, 0x9603FB85, 0x9604FB85, 0x9605FB85, 0x9606FB85, 0x9607FB85, 0x9608FB85, 0x9609FB85, 0x960AFB85, 0x960BFB85, 0x960CFB85, + 0x960DFB85, 0x960EFB85, 0x960FFB85, 0x9610FB85, 0x9611FB85, 0x9612FB85, 0x9613FB85, 0x9614FB85, 0x9615FB85, 0x9616FB85, 0x9617FB85, 0x9618FB85, 0x9619FB85, 0x961AFB85, 0x961BFB85, + 0x961CFB85, 0x961DFB85, 0x961EFB85, 0x961FFB85, 0x9620FB85, 0x9621FB85, 0x9622FB85, 0x9623FB85, 0x9624FB85, 0x9625FB85, 0x9626FB85, 0x9627FB85, 0x9628FB85, 0x9629FB85, 0x962AFB85, + 0x962BFB85, 0x962CFB85, 0x962DFB85, 0x962EFB85, 0x962FFB85, 0x9630FB85, 0x9631FB85, 0x9632FB85, 0x9633FB85, 0x9634FB85, 0x9635FB85, 0x9636FB85, 0x9637FB85, 0x9638FB85, 0x9639FB85, + 0x963AFB85, 0x963BFB85, 0x963CFB85, 0x963DFB85, 0x963EFB85, 0x963FFB85, 0x9640FB85, 0x9641FB85, 0x9642FB85, 0x9643FB85, 0x9644FB85, 0x9645FB85, 0x9646FB85, 0x9647FB85, 0x9648FB85, + 0x9649FB85, 0x964AFB85, 0x964BFB85, 0x964CFB85, 0x964DFB85, 0x964EFB85, 0x964FFB85, 0x9650FB85, 0x9651FB85, 0x9652FB85, 0x9653FB85, 0x9654FB85, 0x9655FB85, 0x9656FB85, 0x9657FB85, + 0x9658FB85, 0x9659FB85, 0x965AFB85, 0x965BFB85, 0x965CFB85, 0x965DFB85, 0x965EFB85, 0x965FFB85, 0x9660FB85, 0x9661FB85, 0x9662FB85, 0x9663FB85, 0x9664FB85, 0x9665FB85, 0x9666FB85, + 0x9667FB85, 0x9668FB85, 0x9669FB85, 0x966AFB85, 0x966BFB85, 0x966CFB85, 0x966DFB85, 0x966EFB85, 0x966FFB85, 0x9670FB85, 0x9671FB85, 0x9672FB85, 0x9673FB85, 0x9674FB85, 0x9675FB85, + 0x9676FB85, 0x9677FB85, 0x9678FB85, 0x9679FB85, 0x967AFB85, 0x967BFB85, 0x967CFB85, 0x967DFB85, 0x967EFB85, 0x967FFB85, 0x9680FB85, 0x9681FB85, 0x9682FB85, 0x9683FB85, 0x9684FB85, + 0x9685FB85, 0x9686FB85, 0x9687FB85, 0x9688FB85, 0x9689FB85, 0x968AFB85, 0x968BFB85, 0x968CFB85, 0x968DFB85, 0x968EFB85, 0x968FFB85, 0x9690FB85, 0x9691FB85, 0x9692FB85, 0x9693FB85, + 0x9694FB85, 0x9695FB85, 0x9696FB85, 0x9697FB85, 0x9698FB85, 0x9699FB85, 0x969AFB85, 0x969BFB85, 0x969CFB85, 0x969DFB85, 0x969EFB85, 0x969FFB85, 0x96A0FB85, 0x96A1FB85, 0x96A2FB85, + 0x96A3FB85, 0x96A4FB85, 0x96A5FB85, 0x96A6FB85, 0x96A7FB85, 0x96A8FB85, 0x96A9FB85, 0x96AAFB85, 0x96ABFB85, 0x96ACFB85, 0x96ADFB85, 0x96AEFB85, 0x96AFFB85, 0x96B0FB85, 0x96B1FB85, + 0x96B2FB85, 0x96B3FB85, 0x96B4FB85, 0x96B5FB85, 0x96B6FB85, 0x96B7FB85, 0x96B8FB85, 0x96B9FB85, 0x96BAFB85, 0x96BBFB85, 0x96BCFB85, 0x96BDFB85, 0x96BEFB85, 0x96BFFB85, 0x96C0FB85, + 0x96C1FB85, 0x96C2FB85, 0x96C3FB85, 0x96C4FB85, 0x96C5FB85, 0x96C6FB85, 0x96C7FB85, 0x96C8FB85, 0x96C9FB85, 0x96CAFB85, 0x96CBFB85, 0x96CCFB85, 0x96CDFB85, 0x96CEFB85, 0x96CFFB85, + 0x96D0FB85, 0x96D1FB85, 0x96D2FB85, 0x96D3FB85, 0x96D4FB85, 0x96D5FB85, 0x96D6FB85, 0x96D7FB85, 0x96D8FB85, 0x96D9FB85, 0x96DAFB85, 0x96DBFB85, 0x96DCFB85, 0x96DDFB85, 0x96DEFB85, + 0x96DFFB85, 0x96E0FB85, 0x96E1FB85, 0x96E2FB85, 0x96E3FB85, 0x96E4FB85, 0x96E5FB85, 0x96E6FB85, 0x96E7FB85, 0x96E8FB85, 0x96E9FB85, 0x96EAFB85, 0x96EBFB85, 0x96ECFB85, 0x96EDFB85, + 0x96EEFB85, 0x96EFFB85, 0x96F0FB85, 0x96F1FB85, 0x96F2FB85, 0x96F3FB85, 0x96F4FB85, 0x96F5FB85, 0x96F6FB85, 0x96F7FB85, 0x96F8FB85, 0x96F9FB85, 0x96FAFB85, 0x96FBFB85, 0x96FCFB85, + 0x96FDFB85, 0x96FEFB85, 0x96FFFB85, 0x9700FB85, 0x9701FB85, 0x9702FB85, 0x9703FB85, 0x9704FB85, 0x9705FB85, 0x9706FB85, 0x9707FB85, 0x9708FB85, 0x9709FB85, 0x970AFB85, 0x970BFB85, + 0x970CFB85, 0x970DFB85, 0x970EFB85, 0x970FFB85, 0x9710FB85, 0x9711FB85, 0x9712FB85, 0x9713FB85, 0x9714FB85, 0x9715FB85, 0x9716FB85, 0x9717FB85, 0x9718FB85, 0x9719FB85, 0x971AFB85, + 0x971BFB85, 0x971CFB85, 0x971DFB85, 0x971EFB85, 0x971FFB85, 0x9720FB85, 0x9721FB85, 0x9722FB85, 0x9723FB85, 0x9724FB85, 0x9725FB85, 0x9726FB85, 0x9727FB85, 0x9728FB85, 0x9729FB85, + 0x972AFB85, 0x972BFB85, 0x972CFB85, 0x972DFB85, 0x972EFB85, 0x972FFB85, 0x9730FB85, 0x9731FB85, 0x9732FB85, 0x9733FB85, 0x9734FB85, 0x9735FB85, 0x9736FB85, 0x9737FB85, 0x9738FB85, + 0x9739FB85, 0x973AFB85, 0x973BFB85, 0x973CFB85, 0x973DFB85, 0x973EFB85, 0x973FFB85, 0x9740FB85, 0x9741FB85, 0x9742FB85, 0x9743FB85, 0x9744FB85, 0x9745FB85, 0x9746FB85, 0x9747FB85, + 0x9748FB85, 0x9749FB85, 0x974AFB85, 0x974BFB85, 0x974CFB85, 0x974DFB85, 0x974EFB85, 0x974FFB85, 0x9750FB85, 0x9751FB85, 0x9752FB85, 0x9753FB85, 0x9754FB85, 0x9755FB85, 0x9756FB85, + 0x9757FB85, 0x9758FB85, 0x9759FB85, 0x975AFB85, 0x975BFB85, 0x975CFB85, 0x975DFB85, 0x975EFB85, 0x975FFB85, 0x9760FB85, 0x9761FB85, 0x9762FB85, 0x9763FB85, 0x9764FB85, 0x9765FB85, + 0x9766FB85, 0x9767FB85, 0x9768FB85, 0x9769FB85, 0x976AFB85, 0x976BFB85, 0x976CFB85, 0x976DFB85, 0x976EFB85, 0x976FFB85, 0x9770FB85, 0x9771FB85, 0x9772FB85, 0x9773FB85, 0x9774FB85, + 0x9775FB85, 0x9776FB85, 0x9777FB85, 0x9778FB85, 0x9779FB85, 0x977AFB85, 0x977BFB85, 0x977CFB85, 0x977DFB85, 0x977EFB85, 0x977FFB85, 0x9780FB85, 0x9781FB85, 0x9782FB85, 0x9783FB85, + 0x9784FB85, 0x9785FB85, 0x9786FB85, 0x9787FB85, 0x9788FB85, 0x9789FB85, 0x978AFB85, 0x978BFB85, 0x978CFB85, 0x978DFB85, 0x978EFB85, 0x978FFB85, 0x9790FB85, 0x9791FB85, 0x9792FB85, + 0x9793FB85, 0x9794FB85, 0x9795FB85, 0x9796FB85, 0x9797FB85, 0x9798FB85, 0x9799FB85, 0x979AFB85, 0x979BFB85, 0x979CFB85, 0x979DFB85, 0x979EFB85, 0x979FFB85, 0x97A0FB85, 0x97A1FB85, + 0x97A2FB85, 0x97A3FB85, 0x97A4FB85, 0x97A5FB85, 0x97A6FB85, 0x97A7FB85, 0x97A8FB85, 0x97A9FB85, 0x97AAFB85, 0x97ABFB85, 0x97ACFB85, 0x97ADFB85, 0x97AEFB85, 0x97AFFB85, 0x97B0FB85, + 0x97B1FB85, 0x97B2FB85, 0x97B3FB85, 0x97B4FB85, 0x97B5FB85, 0x97B6FB85, 0x97B7FB85, 0x97B8FB85, 0x97B9FB85, 0x97BAFB85, 0x97BBFB85, 0x97BCFB85, 0x97BDFB85, 0x97BEFB85, 0x97BFFB85, + 0x97C0FB85, 0x97C1FB85, 0x97C2FB85, 0x97C3FB85, 0x97C4FB85, 0x97C5FB85, 0x97C6FB85, 0x97C7FB85, 0x97C8FB85, 0x97C9FB85, 0x97CAFB85, 0x97CBFB85, 0x97CCFB85, 0x97CDFB85, 0x97CEFB85, + 0x97CFFB85, 0x97D0FB85, 0x97D1FB85, 0x97D2FB85, 0x97D3FB85, 0x97D4FB85, 0x97D5FB85, 0x97D6FB85, 0x97D7FB85, 0x97D8FB85, 0x97D9FB85, 0x97DAFB85, 0x97DBFB85, 0x97DCFB85, 0x97DDFB85, + 0x97DEFB85, 0x97DFFB85, 0x97E0FB85, 0x97E1FB85, 0x97E2FB85, 0x97E3FB85, 0x97E4FB85, 0x97E5FB85, 0x97E6FB85, 0x97E7FB85, 0x97E8FB85, 0x97E9FB85, 0x97EAFB85, 0x97EBFB85, 0x97ECFB85, + 0x97EDFB85, 0x97EEFB85, 0x97EFFB85, 0x97F0FB85, 0x97F1FB85, 0x97F2FB85, 0x97F3FB85, 0x97F4FB85, 0x97F5FB85, 0x97F6FB85, 0x97F7FB85, 0x97F8FB85, 0x97F9FB85, 0x97FAFB85, 0x97FBFB85, + 0x97FCFB85, 0x97FDFB85, 0x97FEFB85, 0x97FFFB85, 0x9800FB85, 0x9801FB85, 0x9802FB85, 0x9803FB85, 0x9804FB85, 0x9805FB85, 0x9806FB85, 0x9807FB85, 0x9808FB85, 0x9809FB85, 0x980AFB85, + 0x980BFB85, 0x980CFB85, 0x980DFB85, 0x980EFB85, 0x980FFB85, 0x9810FB85, 0x9811FB85, 0x9812FB85, 0x9813FB85, 0x9814FB85, 0x9815FB85, 0x9816FB85, 0x9817FB85, 0x9818FB85, 0x9819FB85, + 0x981AFB85, 0x981BFB85, 0x981CFB85, 0x981DFB85, 0x981EFB85, 0x981FFB85, 0x9820FB85, 0x9821FB85, 0x9822FB85, 0x9823FB85, 0x9824FB85, 0x9825FB85, 0x9826FB85, 0x9827FB85, 0x9828FB85, + 0x9829FB85, 0x982AFB85, 0x982BFB85, 0x982CFB85, 0x982DFB85, 0x982EFB85, 0x982FFB85, 0x9830FB85, 0x9831FB85, 0x9832FB85, 0x9833FB85, 0x9834FB85, 0x9835FB85, 0x9836FB85, 0x9837FB85, + 0x9838FB85, 0x9839FB85, 0x983AFB85, 0x983BFB85, 0x983CFB85, 0x983DFB85, 0x983EFB85, 0x983FFB85, 0x9840FB85, 0x9841FB85, 0x9842FB85, 0x9843FB85, 0x9844FB85, 0x9845FB85, 0x9846FB85, + 0x9847FB85, 0x9848FB85, 0x9849FB85, 0x984AFB85, 0x984BFB85, 0x984CFB85, 0x984DFB85, 0x984EFB85, 0x984FFB85, 0x9850FB85, 0x9851FB85, 0x9852FB85, 0x9853FB85, 0x9854FB85, 0x9855FB85, + 0x9856FB85, 0x9857FB85, 0x9858FB85, 0x9859FB85, 0x985AFB85, 0x985BFB85, 0x985CFB85, 0x985DFB85, 0x985EFB85, 0x985FFB85, 0x9860FB85, 0x9861FB85, 0x9862FB85, 0x9863FB85, 0x9864FB85, + 0x9865FB85, 0x9866FB85, 0x9867FB85, 0x9868FB85, 0x9869FB85, 0x986AFB85, 0x986BFB85, 0x986CFB85, 0x986DFB85, 0x986EFB85, 0x986FFB85, 0x9870FB85, 0x9871FB85, 0x9872FB85, 0x9873FB85, + 0x9874FB85, 0x9875FB85, 0x9876FB85, 0x9877FB85, 0x9878FB85, 0x9879FB85, 0x987AFB85, 0x987BFB85, 0x987CFB85, 0x987DFB85, 0x987EFB85, 0x987FFB85, 0x9880FB85, 0x9881FB85, 0x9882FB85, + 0x9883FB85, 0x9884FB85, 0x9885FB85, 0x9886FB85, 0x9887FB85, 0x9888FB85, 0x9889FB85, 0x988AFB85, 0x988BFB85, 0x988CFB85, 0x988DFB85, 0x988EFB85, 0x988FFB85, 0x9890FB85, 0x9891FB85, + 0x9892FB85, 0x9893FB85, 0x9894FB85, 0x9895FB85, 0x9896FB85, 0x9897FB85, 0x9898FB85, 0x9899FB85, 0x989AFB85, 0x989BFB85, 0x989CFB85, 0x989DFB85, 0x989EFB85, 0x989FFB85, 0x98A0FB85, + 0x98A1FB85, 0x98A2FB85, 0x98A3FB85, 0x98A4FB85, 0x98A5FB85, 0x98A6FB85, 0x98A7FB85, 0x98A8FB85, 0x98A9FB85, 0x98AAFB85, 0x98ABFB85, 0x98ACFB85, 0x98ADFB85, 0x98AEFB85, 0x98AFFB85, + 0x98B0FB85, 0x98B1FB85, 0x98B2FB85, 0x98B3FB85, 0x98B4FB85, 0x98B5FB85, 0x98B6FB85, 0x98B7FB85, 0x98B8FB85, 0x98B9FB85, 0x98BAFB85, 0x98BBFB85, 0x98BCFB85, 0x98BDFB85, 0x98BEFB85, + 0x98BFFB85, 0x98C0FB85, 0x98C1FB85, 0x98C2FB85, 0x98C3FB85, 0x98C4FB85, 0x98C5FB85, 0x98C6FB85, 0x98C7FB85, 0x98C8FB85, 0x98C9FB85, 0x98CAFB85, 0x98CBFB85, 0x98CCFB85, 0x98CDFB85, + 0x98CEFB85, 0x98CFFB85, 0x98D0FB85, 0x98D1FB85, 0x98D2FB85, 0x98D3FB85, 0x98D4FB85, 0x98D5FB85, 0x98D6FB85, 0x98D7FB85, 0x98D8FB85, 0x98D9FB85, 0x98DAFB85, 0x98DBFB85, 0x98DCFB85, + 0x98DDFB85, 0x98DEFB85, 0x98DFFB85, 0x98E0FB85, 0x98E1FB85, 0x98E2FB85, 0x98E3FB85, 0x98E4FB85, 0x98E5FB85, 0x98E6FB85, 0x98E7FB85, 0x98E8FB85, 0x98E9FB85, 0x98EAFB85, 0x98EBFB85, + 0x98ECFB85, 0x98EDFB85, 0x98EEFB85, 0x98EFFB85, 0x98F0FB85, 0x98F1FB85, 0x98F2FB85, 0x98F3FB85, 0x98F4FB85, 0x98F5FB85, 0x98F6FB85, 0x98F7FB85, 0x98F8FB85, 0x98F9FB85, 0x98FAFB85, + 0x98FBFB85, 0x98FCFB85, 0x98FDFB85, 0x98FEFB85, 0x98FFFB85, 0x9900FB85, 0x9901FB85, 0x9902FB85, 0x9903FB85, 0x9904FB85, 0x9905FB85, 0x9906FB85, 0x9907FB85, 0x9908FB85, 0x9909FB85, + 0x990AFB85, 0x990BFB85, 0x990CFB85, 0x990DFB85, 0x990EFB85, 0x990FFB85, 0x9910FB85, 0x9911FB85, 0x9912FB85, 0x9913FB85, 0x9914FB85, 0x9915FB85, 0x9916FB85, 0x9917FB85, 0x9918FB85, + 0x9919FB85, 0x991AFB85, 0x991BFB85, 0x991CFB85, 0x991DFB85, 0x991EFB85, 0x991FFB85, 0x9920FB85, 0x9921FB85, 0x9922FB85, 0x9923FB85, 0x9924FB85, 0x9925FB85, 0x9926FB85, 0x9927FB85, + 0x9928FB85, 0x9929FB85, 0x992AFB85, 0x992BFB85, 0x992CFB85, 0x992DFB85, 0x992EFB85, 0x992FFB85, 0x9930FB85, 0x9931FB85, 0x9932FB85, 0x9933FB85, 0x9934FB85, 0x9935FB85, 0x9936FB85, + 0x9937FB85, 0x9938FB85, 0x9939FB85, 0x993AFB85, 0x993BFB85, 0x993CFB85, 0x993DFB85, 0x993EFB85, 0x993FFB85, 0x9940FB85, 0x9941FB85, 0x9942FB85, 0x9943FB85, 0x9944FB85, 0x9945FB85, + 0x9946FB85, 0x9947FB85, 0x9948FB85, 0x9949FB85, 0x994AFB85, 0x994BFB85, 0x994CFB85, 0x994DFB85, 0x994EFB85, 0x994FFB85, 0x9950FB85, 0x9951FB85, 0x9952FB85, 0x9953FB85, 0x9954FB85, + 0x9955FB85, 0x9956FB85, 0x9957FB85, 0x9958FB85, 0x9959FB85, 0x995AFB85, 0x995BFB85, 0x995CFB85, 0x995DFB85, 0x995EFB85, 0x995FFB85, 0x9960FB85, 0x9961FB85, 0x9962FB85, 0x9963FB85, + 0x9964FB85, 0x9965FB85, 0x9966FB85, 0x9967FB85, 0x9968FB85, 0x9969FB85, 0x996AFB85, 0x996BFB85, 0x996CFB85, 0x996DFB85, 0x996EFB85, 0x996FFB85, 0x9970FB85, 0x9971FB85, 0x9972FB85, + 0x9973FB85, 0x9974FB85, 0x9975FB85, 0x9976FB85, 0x9977FB85, 0x9978FB85, 0x9979FB85, 0x997AFB85, 0x997BFB85, 0x997CFB85, 0x997DFB85, 0x997EFB85, 0x997FFB85, 0x9980FB85, 0x9981FB85, + 0x9982FB85, 0x9983FB85, 0x9984FB85, 0x9985FB85, 0x9986FB85, 0x9987FB85, 0x9988FB85, 0x9989FB85, 0x998AFB85, 0x998BFB85, 0x998CFB85, 0x998DFB85, 0x998EFB85, 0x998FFB85, 0x9990FB85, + 0x9991FB85, 0x9992FB85, 0x9993FB85, 0x9994FB85, 0x9995FB85, 0x9996FB85, 0x9997FB85, 0x9998FB85, 0x9999FB85, 0x999AFB85, 0x999BFB85, 0x999CFB85, 0x999DFB85, 0x999EFB85, 0x999FFB85, + 0x99A0FB85, 0x99A1FB85, 0x99A2FB85, 0x99A3FB85, 0x99A4FB85, 0x99A5FB85, 0x99A6FB85, 0x99A7FB85, 0x99A8FB85, 0x99A9FB85, 0x99AAFB85, 0x99ABFB85, 0x99ACFB85, 0x99ADFB85, 0x99AEFB85, + 0x99AFFB85, 0x99B0FB85, 0x99B1FB85, 0x99B2FB85, 0x99B3FB85, 0x99B4FB85, 0x99B5FB85, 0x99B6FB85, 0x99B7FB85, 0x99B8FB85, 0x99B9FB85, 0x99BAFB85, 0x99BBFB85, 0x99BCFB85, 0x99BDFB85, + 0x99BEFB85, 0x99BFFB85, 0x99C0FB85, 0x99C1FB85, 0x99C2FB85, 0x99C3FB85, 0x99C4FB85, 0x99C5FB85, 0x99C6FB85, 0x99C7FB85, 0x99C8FB85, 0x99C9FB85, 0x99CAFB85, 0x99CBFB85, 0x99CCFB85, + 0x99CDFB85, 0x99CEFB85, 0x99CFFB85, 0x99D0FB85, 0x99D1FB85, 0x99D2FB85, 0x99D3FB85, 0x99D4FB85, 0x99D5FB85, 0x99D6FB85, 0x99D7FB85, 0x99D8FB85, 0x99D9FB85, 0x99DAFB85, 0x99DBFB85, + 0x99DCFB85, 0x99DDFB85, 0x99DEFB85, 0x99DFFB85, 0x99E0FB85, 0x99E1FB85, 0x99E2FB85, 0x99E3FB85, 0x99E4FB85, 0x99E5FB85, 0x99E6FB85, 0x99E7FB85, 0x99E8FB85, 0x99E9FB85, 0x99EAFB85, + 0x99EBFB85, 0x99ECFB85, 0x99EDFB85, 0x99EEFB85, 0x99EFFB85, 0x99F0FB85, 0x99F1FB85, 0x99F2FB85, 0x99F3FB85, 0x99F4FB85, 0x99F5FB85, 0x99F6FB85, 0x99F7FB85, 0x99F8FB85, 0x99F9FB85, + 0x99FAFB85, 0x99FBFB85, 0x99FCFB85, 0x99FDFB85, 0x99FEFB85, 0x99FFFB85, 0x9A00FB85, 0x9A01FB85, 0x9A02FB85, 0x9A03FB85, 0x9A04FB85, 0x9A05FB85, 0x9A06FB85, 0x9A07FB85, 0x9A08FB85, + 0x9A09FB85, 0x9A0AFB85, 0x9A0BFB85, 0x9A0CFB85, 0x9A0DFB85, 0x9A0EFB85, 0x9A0FFB85, 0x9A10FB85, 0x9A11FB85, 0x9A12FB85, 0x9A13FB85, 0x9A14FB85, 0x9A15FB85, 0x9A16FB85, 0x9A17FB85, + 0x9A18FB85, 0x9A19FB85, 0x9A1AFB85, 0x9A1BFB85, 0x9A1CFB85, 0x9A1DFB85, 0x9A1EFB85, 0x9A1FFB85, 0x9A20FB85, 0x9A21FB85, 0x9A22FB85, 0x9A23FB85, 0x9A24FB85, 0x9A25FB85, 0x9A26FB85, + 0x9A27FB85, 0x9A28FB85, 0x9A29FB85, 0x9A2AFB85, 0x9A2BFB85, 0x9A2CFB85, 0x9A2DFB85, 0x9A2EFB85, 0x9A2FFB85, 0x9A30FB85, 0x9A31FB85, 0x9A32FB85, 0x9A33FB85, 0x9A34FB85, 0x9A35FB85, + 0x9A36FB85, 0x9A37FB85, 0x9A38FB85, 0x9A39FB85, 0x9A3AFB85, 0x9A3BFB85, 0x9A3CFB85, 0x9A3DFB85, 0x9A3EFB85, 0x9A3FFB85, 0x9A40FB85, 0x9A41FB85, 0x9A42FB85, 0x9A43FB85, 0x9A44FB85, + 0x9A45FB85, 0x9A46FB85, 0x9A47FB85, 0x9A48FB85, 0x9A49FB85, 0x9A4AFB85, 0x9A4BFB85, 0x9A4CFB85, 0x9A4DFB85, 0x9A4EFB85, 0x9A4FFB85, 0x9A50FB85, 0x9A51FB85, 0x9A52FB85, 0x9A53FB85, + 0x9A54FB85, 0x9A55FB85, 0x9A56FB85, 0x9A57FB85, 0x9A58FB85, 0x9A59FB85, 0x9A5AFB85, 0x9A5BFB85, 0x9A5CFB85, 0x9A5DFB85, 0x9A5EFB85, 0x9A5FFB85, 0x9A60FB85, 0x9A61FB85, 0x9A62FB85, + 0x9A63FB85, 0x9A64FB85, 0x9A65FB85, 0x9A66FB85, 0x9A67FB85, 0x9A68FB85, 0x9A69FB85, 0x9A6AFB85, 0x9A6BFB85, 0x9A6CFB85, 0x9A6DFB85, 0x9A6EFB85, 0x9A6FFB85, 0x9A70FB85, 0x9A71FB85, + 0x9A72FB85, 0x9A73FB85, 0x9A74FB85, 0x9A75FB85, 0x9A76FB85, 0x9A77FB85, 0x9A78FB85, 0x9A79FB85, 0x9A7AFB85, 0x9A7BFB85, 0x9A7CFB85, 0x9A7DFB85, 0x9A7EFB85, 0x9A7FFB85, 0x9A80FB85, + 0x9A81FB85, 0x9A82FB85, 0x9A83FB85, 0x9A84FB85, 0x9A85FB85, 0x9A86FB85, 0x9A87FB85, 0x9A88FB85, 0x9A89FB85, 0x9A8AFB85, 0x9A8BFB85, 0x9A8CFB85, 0x9A8DFB85, 0x9A8EFB85, 0x9A8FFB85, + 0x9A90FB85, 0x9A91FB85, 0x9A92FB85, 0x9A93FB85, 0x9A94FB85, 0x9A95FB85, 0x9A96FB85, 0x9A97FB85, 0x9A98FB85, 0x9A99FB85, 0x9A9AFB85, 0x9A9BFB85, 0x9A9CFB85, 0x9A9DFB85, 0x9A9EFB85, + 0x9A9FFB85, 0x9AA0FB85, 0x9AA1FB85, 0x9AA2FB85, 0x9AA3FB85, 0x9AA4FB85, 0x9AA5FB85, 0x9AA6FB85, 0x9AA7FB85, 0x9AA8FB85, 0x9AA9FB85, 0x9AAAFB85, 0x9AABFB85, 0x9AACFB85, 0x9AADFB85, + 0x9AAEFB85, 0x9AAFFB85, 0x9AB0FB85, 0x9AB1FB85, 0x9AB2FB85, 0x9AB3FB85, 0x9AB4FB85, 0x9AB5FB85, 0x9AB6FB85, 0x9AB7FB85, 0x9AB8FB85, 0x9AB9FB85, 0x9ABAFB85, 0x9ABBFB85, 0x9ABCFB85, + 0x9ABDFB85, 0x9ABEFB85, 0x9ABFFB85, 0x9AC0FB85, 0x9AC1FB85, 0x9AC2FB85, 0x9AC3FB85, 0x9AC4FB85, 0x9AC5FB85, 0x9AC6FB85, 0x9AC7FB85, 0x9AC8FB85, 0x9AC9FB85, 0x9ACAFB85, 0x9ACBFB85, + 0x9ACCFB85, 0x9ACDFB85, 0x9ACEFB85, 0x9ACFFB85, 0x9AD0FB85, 0x9AD1FB85, 0x9AD2FB85, 0x9AD3FB85, 0x9AD4FB85, 0x9AD5FB85, 0x9AD6FB85, 0x9AD7FB85, 0x9AD8FB85, 0x9AD9FB85, 0x9ADAFB85, + 0x9ADBFB85, 0x9ADCFB85, 0x9ADDFB85, 0x9ADEFB85, 0x9ADFFB85, 0x9AE0FB85, 0x9AE1FB85, 0x9AE2FB85, 0x9AE3FB85, 0x9AE4FB85, 0x9AE5FB85, 0x9AE6FB85, 0x9AE7FB85, 0x9AE8FB85, 0x9AE9FB85, + 0x9AEAFB85, 0x9AEBFB85, 0x9AECFB85, 0x9AEDFB85, 0x9AEEFB85, 0x9AEFFB85, 0x9AF0FB85, 0x9AF1FB85, 0x9AF2FB85, 0x9AF3FB85, 0x9AF4FB85, 0x9AF5FB85, 0x9AF6FB85, 0x9AF7FB85, 0x9AF8FB85, + 0x9AF9FB85, 0x9AFAFB85, 0x9AFBFB85, 0x9AFCFB85, 0x9AFDFB85, 0x9AFEFB85, 0x9AFFFB85, 0x9B00FB85, 0x9B01FB85, 0x9B02FB85, 0x9B03FB85, 0x9B04FB85, 0x9B05FB85, 0x9B06FB85, 0x9B07FB85, + 0x9B08FB85, 0x9B09FB85, 0x9B0AFB85, 0x9B0BFB85, 0x9B0CFB85, 0x9B0DFB85, 0x9B0EFB85, 0x9B0FFB85, 0x9B10FB85, 0x9B11FB85, 0x9B12FB85, 0x9B13FB85, 0x9B14FB85, 0x9B15FB85, 0x9B16FB85, + 0x9B17FB85, 0x9B18FB85, 0x9B19FB85, 0x9B1AFB85, 0x9B1BFB85, 0x9B1CFB85, 0x9B1DFB85, 0x9B1EFB85, 0x9B1FFB85, 0x9B20FB85, 0x9B21FB85, 0x9B22FB85, 0x9B23FB85, 0x9B24FB85, 0x9B25FB85, + 0x9B26FB85, 0x9B27FB85, 0x9B28FB85, 0x9B29FB85, 0x9B2AFB85, 0x9B2BFB85, 0x9B2CFB85, 0x9B2DFB85, 0x9B2EFB85, 0x9B2FFB85, 0x9B30FB85, 0x9B31FB85, 0x9B32FB85, 0x9B33FB85, 0x9B34FB85, + 0x9B35FB85, 0x9B36FB85, 0x9B37FB85, 0x9B38FB85, 0x9B39FB85, 0x9B3AFB85, 0x9B3BFB85, 0x9B3CFB85, 0x9B3DFB85, 0x9B3EFB85, 0x9B3FFB85, 0x9B40FB85, 0x9B41FB85, 0x9B42FB85, 0x9B43FB85, + 0x9B44FB85, 0x9B45FB85, 0x9B46FB85, 0x9B47FB85, 0x9B48FB85, 0x9B49FB85, 0x9B4AFB85, 0x9B4BFB85, 0x9B4CFB85, 0x9B4DFB85, 0x9B4EFB85, 0x9B4FFB85, 0x9B50FB85, 0x9B51FB85, 0x9B52FB85, + 0x9B53FB85, 0x9B54FB85, 0x9B55FB85, 0x9B56FB85, 0x9B57FB85, 0x9B58FB85, 0x9B59FB85, 0x9B5AFB85, 0x9B5BFB85, 0x9B5CFB85, 0x9B5DFB85, 0x9B5EFB85, 0x9B5FFB85, 0x9B60FB85, 0x9B61FB85, + 0x9B62FB85, 0x9B63FB85, 0x9B64FB85, 0x9B65FB85, 0x9B66FB85, 0x9B67FB85, 0x9B68FB85, 0x9B69FB85, 0x9B6AFB85, 0x9B6BFB85, 0x9B6CFB85, 0x9B6DFB85, 0x9B6EFB85, 0x9B6FFB85, 0x9B70FB85, + 0x9B71FB85, 0x9B72FB85, 0x9B73FB85, 0x9B74FB85, 0x9B75FB85, 0x9B76FB85, 0x9B77FB85, 0x9B78FB85, 0x9B79FB85, 0x9B7AFB85, 0x9B7BFB85, 0x9B7CFB85, 0x9B7DFB85, 0x9B7EFB85, 0x9B7FFB85, + 0x9B80FB85, 0x9B81FB85, 0x9B82FB85, 0x9B83FB85, 0x9B84FB85, 0x9B85FB85, 0x9B86FB85, 0x9B87FB85, 0x9B88FB85, 0x9B89FB85, 0x9B8AFB85, 0x9B8BFB85, 0x9B8CFB85, 0x9B8DFB85, 0x9B8EFB85, + 0x9B8FFB85, 0x9B90FB85, 0x9B91FB85, 0x9B92FB85, 0x9B93FB85, 0x9B94FB85, 0x9B95FB85, 0x9B96FB85, 0x9B97FB85, 0x9B98FB85, 0x9B99FB85, 0x9B9AFB85, 0x9B9BFB85, 0x9B9CFB85, 0x9B9DFB85, + 0x9B9EFB85, 0x9B9FFB85, 0x9BA0FB85, 0x9BA1FB85, 0x9BA2FB85, 0x9BA3FB85, 0x9BA4FB85, 0x9BA5FB85, 0x9BA6FB85, 0x9BA7FB85, 0x9BA8FB85, 0x9BA9FB85, 0x9BAAFB85, 0x9BABFB85, 0x9BACFB85, + 0x9BADFB85, 0x9BAEFB85, 0x9BAFFB85, 0x9BB0FB85, 0x9BB1FB85, 0x9BB2FB85, 0x9BB3FB85, 0x9BB4FB85, 0x9BB5FB85, 0x9BB6FB85, 0x9BB7FB85, 0x9BB8FB85, 0x9BB9FB85, 0x9BBAFB85, 0x9BBBFB85, + 0x9BBCFB85, 0x9BBDFB85, 0x9BBEFB85, 0x9BBFFB85, 0x9BC0FB85, 0x9BC1FB85, 0x9BC2FB85, 0x9BC3FB85, 0x9BC4FB85, 0x9BC5FB85, 0x9BC6FB85, 0x9BC7FB85, 0x9BC8FB85, 0x9BC9FB85, 0x9BCAFB85, + 0x9BCBFB85, 0x9BCCFB85, 0x9BCDFB85, 0x9BCEFB85, 0x9BCFFB85, 0x9BD0FB85, 0x9BD1FB85, 0x9BD2FB85, 0x9BD3FB85, 0x9BD4FB85, 0x9BD5FB85, 0x9BD6FB85, 0x9BD7FB85, 0x9BD8FB85, 0x9BD9FB85, + 0x9BDAFB85, 0x9BDBFB85, 0x9BDCFB85, 0x9BDDFB85, 0x9BDEFB85, 0x9BDFFB85, 0x9BE0FB85, 0x9BE1FB85, 0x9BE2FB85, 0x9BE3FB85, 0x9BE4FB85, 0x9BE5FB85, 0x9BE6FB85, 0x9BE7FB85, 0x9BE8FB85, + 0x9BE9FB85, 0x9BEAFB85, 0x9BEBFB85, 0x9BECFB85, 0x9BEDFB85, 0x9BEEFB85, 0x9BEFFB85, 0x9BF0FB85, 0x9BF1FB85, 0x9BF2FB85, 0x9BF3FB85, 0x9BF4FB85, 0x9BF5FB85, 0x9BF6FB85, 0x9BF7FB85, + 0x9BF8FB85, 0x9BF9FB85, 0x9BFAFB85, 0x9BFBFB85, 0x9BFCFB85, 0x9BFDFB85, 0x9BFEFB85, 0x9BFFFB85, 0x9C00FB85, 0x9C01FB85, 0x9C02FB85, 0x9C03FB85, 0x9C04FB85, 0x9C05FB85, 0x9C06FB85, + 0x9C07FB85, 0x9C08FB85, 0x9C09FB85, 0x9C0AFB85, 0x9C0BFB85, 0x9C0CFB85, 0x9C0DFB85, 0x9C0EFB85, 0x9C0FFB85, 0x9C10FB85, 0x9C11FB85, 0x9C12FB85, 0x9C13FB85, 0x9C14FB85, 0x9C15FB85, + 0x9C16FB85, 0x9C17FB85, 0x9C18FB85, 0x9C19FB85, 0x9C1AFB85, 0x9C1BFB85, 0x9C1CFB85, 0x9C1DFB85, 0x9C1EFB85, 0x9C1FFB85, 0x9C20FB85, 0x9C21FB85, 0x9C22FB85, 0x9C23FB85, 0x9C24FB85, + 0x9C25FB85, 0x9C26FB85, 0x9C27FB85, 0x9C28FB85, 0x9C29FB85, 0x9C2AFB85, 0x9C2BFB85, 0x9C2CFB85, 0x9C2DFB85, 0x9C2EFB85, 0x9C2FFB85, 0x9C30FB85, 0x9C31FB85, 0x9C32FB85, 0x9C33FB85, + 0x9C34FB85, 0x9C35FB85, 0x9C36FB85, 0x9C37FB85, 0x9C38FB85, 0x9C39FB85, 0x9C3AFB85, 0x9C3BFB85, 0x9C3CFB85, 0x9C3DFB85, 0x9C3EFB85, 0x9C3FFB85, 0x9C40FB85, 0x9C41FB85, 0x9C42FB85, + 0x9C43FB85, 0x9C44FB85, 0x9C45FB85, 0x9C46FB85, 0x9C47FB85, 0x9C48FB85, 0x9C49FB85, 0x9C4AFB85, 0x9C4BFB85, 0x9C4CFB85, 0x9C4DFB85, 0x9C4EFB85, 0x9C4FFB85, 0x9C50FB85, 0x9C51FB85, + 0x9C52FB85, 0x9C53FB85, 0x9C54FB85, 0x9C55FB85, 0x9C56FB85, 0x9C57FB85, 0x9C58FB85, 0x9C59FB85, 0x9C5AFB85, 0x9C5BFB85, 0x9C5CFB85, 0x9C5DFB85, 0x9C5EFB85, 0x9C5FFB85, 0x9C60FB85, + 0x9C61FB85, 0x9C62FB85, 0x9C63FB85, 0x9C64FB85, 0x9C65FB85, 0x9C66FB85, 0x9C67FB85, 0x9C68FB85, 0x9C69FB85, 0x9C6AFB85, 0x9C6BFB85, 0x9C6CFB85, 0x9C6DFB85, 0x9C6EFB85, 0x9C6FFB85, + 0x9C70FB85, 0x9C71FB85, 0x9C72FB85, 0x9C73FB85, 0x9C74FB85, 0x9C75FB85, 0x9C76FB85, 0x9C77FB85, 0x9C78FB85, 0x9C79FB85, 0x9C7AFB85, 0x9C7BFB85, 0x9C7CFB85, 0x9C7DFB85, 0x9C7EFB85, + 0x9C7FFB85, 0x9C80FB85, 0x9C81FB85, 0x9C82FB85, 0x9C83FB85, 0x9C84FB85, 0x9C85FB85, 0x9C86FB85, 0x9C87FB85, 0x9C88FB85, 0x9C89FB85, 0x9C8AFB85, 0x9C8BFB85, 0x9C8CFB85, 0x9C8DFB85, + 0x9C8EFB85, 0x9C8FFB85, 0x9C90FB85, 0x9C91FB85, 0x9C92FB85, 0x9C93FB85, 0x9C94FB85, 0x9C95FB85, 0x9C96FB85, 0x9C97FB85, 0x9C98FB85, 0x9C99FB85, 0x9C9AFB85, 0x9C9BFB85, 0x9C9CFB85, + 0x9C9DFB85, 0x9C9EFB85, 0x9C9FFB85, 0x9CA0FB85, 0x9CA1FB85, 0x9CA2FB85, 0x9CA3FB85, 0x9CA4FB85, 0x9CA5FB85, 0x9CA6FB85, 0x9CA7FB85, 0x9CA8FB85, 0x9CA9FB85, 0x9CAAFB85, 0x9CABFB85, + 0x9CACFB85, 0x9CADFB85, 0x9CAEFB85, 0x9CAFFB85, 0x9CB0FB85, 0x9CB1FB85, 0x9CB2FB85, 0x9CB3FB85, 0x9CB4FB85, 0x9CB5FB85, 0x9CB6FB85, 0x9CB7FB85, 0x9CB8FB85, 0x9CB9FB85, 0x9CBAFB85, + 0x9CBBFB85, 0x9CBCFB85, 0x9CBDFB85, 0x9CBEFB85, 0x9CBFFB85, 0x9CC0FB85, 0x9CC1FB85, 0x9CC2FB85, 0x9CC3FB85, 0x9CC4FB85, 0x9CC5FB85, 0x9CC6FB85, 0x9CC7FB85, 0x9CC8FB85, 0x9CC9FB85, + 0x9CCAFB85, 0x9CCBFB85, 0x9CCCFB85, 0x9CCDFB85, 0x9CCEFB85, 0x9CCFFB85, 0x9CD0FB85, 0x9CD1FB85, 0x9CD2FB85, 0x9CD3FB85, 0x9CD4FB85, 0x9CD5FB85, 0x9CD6FB85, 0x9CD7FB85, 0x9CD8FB85, + 0x9CD9FB85, 0x9CDAFB85, 0x9CDBFB85, 0x9CDCFB85, 0x9CDDFB85, 0x9CDEFB85, 0x9CDFFB85, 0x9CE0FB85, 0x9CE1FB85, 0x9CE2FB85, 0x9CE3FB85, 0x9CE4FB85, 0x9CE5FB85, 0x9CE6FB85, 0x9CE7FB85, + 0x9CE8FB85, 0x9CE9FB85, 0x9CEAFB85, 0x9CEBFB85, 0x9CECFB85, 0x9CEDFB85, 0x9CEEFB85, 0x9CEFFB85, 0x9CF0FB85, 0x9CF1FB85, 0x9CF2FB85, 0x9CF3FB85, 0x9CF4FB85, 0x9CF5FB85, 0x9CF6FB85, + 0x9CF7FB85, 0x9CF8FB85, 0x9CF9FB85, 0x9CFAFB85, 0x9CFBFB85, 0x9CFCFB85, 0x9CFDFB85, 0x9CFEFB85, 0x9CFFFB85, 0x9D00FB85, 0x9D01FB85, 0x9D02FB85, 0x9D03FB85, 0x9D04FB85, 0x9D05FB85, + 0x9D06FB85, 0x9D07FB85, 0x9D08FB85, 0x9D09FB85, 0x9D0AFB85, 0x9D0BFB85, 0x9D0CFB85, 0x9D0DFB85, 0x9D0EFB85, 0x9D0FFB85, 0x9D10FB85, 0x9D11FB85, 0x9D12FB85, 0x9D13FB85, 0x9D14FB85, + 0x9D15FB85, 0x9D16FB85, 0x9D17FB85, 0x9D18FB85, 0x9D19FB85, 0x9D1AFB85, 0x9D1BFB85, 0x9D1CFB85, 0x9D1DFB85, 0x9D1EFB85, 0x9D1FFB85, 0x9D20FB85, 0x9D21FB85, 0x9D22FB85, 0x9D23FB85, + 0x9D24FB85, 0x9D25FB85, 0x9D26FB85, 0x9D27FB85, 0x9D28FB85, 0x9D29FB85, 0x9D2AFB85, 0x9D2BFB85, 0x9D2CFB85, 0x9D2DFB85, 0x9D2EFB85, 0x9D2FFB85, 0x9D30FB85, 0x9D31FB85, 0x9D32FB85, + 0x9D33FB85, 0x9D34FB85, 0x9D35FB85, 0x9D36FB85, 0x9D37FB85, 0x9D38FB85, 0x9D39FB85, 0x9D3AFB85, 0x9D3BFB85, 0x9D3CFB85, 0x9D3DFB85, 0x9D3EFB85, 0x9D3FFB85, 0x9D40FB85, 0x9D41FB85, + 0x9D42FB85, 0x9D43FB85, 0x9D44FB85, 0x9D45FB85, 0x9D46FB85, 0x9D47FB85, 0x9D48FB85, 0x9D49FB85, 0x9D4AFB85, 0x9D4BFB85, 0x9D4CFB85, 0x9D4DFB85, 0x9D4EFB85, 0x9D4FFB85, 0x9D50FB85, + 0x9D51FB85, 0x9D52FB85, 0x9D53FB85, 0x9D54FB85, 0x9D55FB85, 0x9D56FB85, 0x9D57FB85, 0x9D58FB85, 0x9D59FB85, 0x9D5AFB85, 0x9D5BFB85, 0x9D5CFB85, 0x9D5DFB85, 0x9D5EFB85, 0x9D5FFB85, + 0x9D60FB85, 0x9D61FB85, 0x9D62FB85, 0x9D63FB85, 0x9D64FB85, 0x9D65FB85, 0x9D66FB85, 0x9D67FB85, 0x9D68FB85, 0x9D69FB85, 0x9D6AFB85, 0x9D6BFB85, 0x9D6CFB85, 0x9D6DFB85, 0x9D6EFB85, + 0x9D6FFB85, 0x9D70FB85, 0x9D71FB85, 0x9D72FB85, 0x9D73FB85, 0x9D74FB85, 0x9D75FB85, 0x9D76FB85, 0x9D77FB85, 0x9D78FB85, 0x9D79FB85, 0x9D7AFB85, 0x9D7BFB85, 0x9D7CFB85, 0x9D7DFB85, + 0x9D7EFB85, 0x9D7FFB85, 0x9D80FB85, 0x9D81FB85, 0x9D82FB85, 0x9D83FB85, 0x9D84FB85, 0x9D85FB85, 0x9D86FB85, 0x9D87FB85, 0x9D88FB85, 0x9D89FB85, 0x9D8AFB85, 0x9D8BFB85, 0x9D8CFB85, + 0x9D8DFB85, 0x9D8EFB85, 0x9D8FFB85, 0x9D90FB85, 0x9D91FB85, 0x9D92FB85, 0x9D93FB85, 0x9D94FB85, 0x9D95FB85, 0x9D96FB85, 0x9D97FB85, 0x9D98FB85, 0x9D99FB85, 0x9D9AFB85, 0x9D9BFB85, + 0x9D9CFB85, 0x9D9DFB85, 0x9D9EFB85, 0x9D9FFB85, 0x9DA0FB85, 0x9DA1FB85, 0x9DA2FB85, 0x9DA3FB85, 0x9DA4FB85, 0x9DA5FB85, 0x9DA6FB85, 0x9DA7FB85, 0x9DA8FB85, 0x9DA9FB85, 0x9DAAFB85, + 0x9DABFB85, 0x9DACFB85, 0x9DADFB85, 0x9DAEFB85, 0x9DAFFB85, 0x9DB0FB85, 0x9DB1FB85, 0x9DB2FB85, 0x9DB3FB85, 0x9DB4FB85, 0x9DB5FB85, 0x9DB6FB85, 0x9DB7FB85, 0x9DB8FB85, 0x9DB9FB85, + 0x9DBAFB85, 0x9DBBFB85, 0x9DBCFB85, 0x9DBDFB85, 0x9DBEFB85, 0x9DBFFB85, 0x9DC0FB85, 0x9DC1FB85, 0x9DC2FB85, 0x9DC3FB85, 0x9DC4FB85, 0x9DC5FB85, 0x9DC6FB85, 0x9DC7FB85, 0x9DC8FB85, + 0x9DC9FB85, 0x9DCAFB85, 0x9DCBFB85, 0x9DCCFB85, 0x9DCDFB85, 0x9DCEFB85, 0x9DCFFB85, 0x9DD0FB85, 0x9DD1FB85, 0x9DD2FB85, 0x9DD3FB85, 0x9DD4FB85, 0x9DD5FB85, 0x9DD6FB85, 0x9DD7FB85, + 0x9DD8FB85, 0x9DD9FB85, 0x9DDAFB85, 0x9DDBFB85, 0x9DDCFB85, 0x9DDDFB85, 0x9DDEFB85, 0x9DDFFB85, 0x9DE0FB85, 0x9DE1FB85, 0x9DE2FB85, 0x9DE3FB85, 0x9DE4FB85, 0x9DE5FB85, 0x9DE6FB85, + 0x9DE7FB85, 0x9DE8FB85, 0x9DE9FB85, 0x9DEAFB85, 0x9DEBFB85, 0x9DECFB85, 0x9DEDFB85, 0x9DEEFB85, 0x9DEFFB85, 0x9DF0FB85, 0x9DF1FB85, 0x9DF2FB85, 0x9DF3FB85, 0x9DF4FB85, 0x9DF5FB85, + 0x9DF6FB85, 0x9DF7FB85, 0x9DF8FB85, 0x9DF9FB85, 0x9DFAFB85, 0x9DFBFB85, 0x9DFCFB85, 0x9DFDFB85, 0x9DFEFB85, 0x9DFFFB85, 0x9E00FB85, 0x9E01FB85, 0x9E02FB85, 0x9E03FB85, 0x9E04FB85, + 0x9E05FB85, 0x9E06FB85, 0x9E07FB85, 0x9E08FB85, 0x9E09FB85, 0x9E0AFB85, 0x9E0BFB85, 0x9E0CFB85, 0x9E0DFB85, 0x9E0EFB85, 0x9E0FFB85, 0x9E10FB85, 0x9E11FB85, 0x9E12FB85, 0x9E13FB85, + 0x9E14FB85, 0x9E15FB85, 0x9E16FB85, 0x9E17FB85, 0x9E18FB85, 0x9E19FB85, 0x9E1AFB85, 0x9E1BFB85, 0x9E1CFB85, 0x9E1DFB85, 0x9E1EFB85, 0x9E1FFB85, 0x9E20FB85, 0x9E21FB85, 0x9E22FB85, + 0x9E23FB85, 0x9E24FB85, 0x9E25FB85, 0x9E26FB85, 0x9E27FB85, 0x9E28FB85, 0x9E29FB85, 0x9E2AFB85, 0x9E2BFB85, 0x9E2CFB85, 0x9E2DFB85, 0x9E2EFB85, 0x9E2FFB85, 0x9E30FB85, 0x9E31FB85, + 0x9E32FB85, 0x9E33FB85, 0x9E34FB85, 0x9E35FB85, 0x9E36FB85, 0x9E37FB85, 0x9E38FB85, 0x9E39FB85, 0x9E3AFB85, 0x9E3BFB85, 0x9E3CFB85, 0x9E3DFB85, 0x9E3EFB85, 0x9E3FFB85, 0x9E40FB85, + 0x9E41FB85, 0x9E42FB85, 0x9E43FB85, 0x9E44FB85, 0x9E45FB85, 0x9E46FB85, 0x9E47FB85, 0x9E48FB85, 0x9E49FB85, 0x9E4AFB85, 0x9E4BFB85, 0x9E4CFB85, 0x9E4DFB85, 0x9E4EFB85, 0x9E4FFB85, + 0x9E50FB85, 0x9E51FB85, 0x9E52FB85, 0x9E53FB85, 0x9E54FB85, 0x9E55FB85, 0x9E56FB85, 0x9E57FB85, 0x9E58FB85, 0x9E59FB85, 0x9E5AFB85, 0x9E5BFB85, 0x9E5CFB85, 0x9E5DFB85, 0x9E5EFB85, + 0x9E5FFB85, 0x9E60FB85, 0x9E61FB85, 0x9E62FB85, 0x9E63FB85, 0x9E64FB85, 0x9E65FB85, 0x9E66FB85, 0x9E67FB85, 0x9E68FB85, 0x9E69FB85, 0x9E6AFB85, 0x9E6BFB85, 0x9E6CFB85, 0x9E6DFB85, + 0x9E6EFB85, 0x9E6FFB85, 0x9E70FB85, 0x9E71FB85, 0x9E72FB85, 0x9E73FB85, 0x9E74FB85, 0x9E75FB85, 0x9E76FB85, 0x9E77FB85, 0x9E78FB85, 0x9E79FB85, 0x9E7AFB85, 0x9E7BFB85, 0x9E7CFB85, + 0x9E7DFB85, 0x9E7EFB85, 0x9E7FFB85, 0x9E80FB85, 0x9E81FB85, 0x9E82FB85, 0x9E83FB85, 0x9E84FB85, 0x9E85FB85, 0x9E86FB85, 0x9E87FB85, 0x9E88FB85, 0x9E89FB85, 0x9E8AFB85, 0x9E8BFB85, + 0x9E8CFB85, 0x9E8DFB85, 0x9E8EFB85, 0x9E8FFB85, 0x9E90FB85, 0x9E91FB85, 0x9E92FB85, 0x9E93FB85, 0x9E94FB85, 0x9E95FB85, 0x9E96FB85, 0x9E97FB85, 0x9E98FB85, 0x9E99FB85, 0x9E9AFB85, + 0x9E9BFB85, 0x9E9CFB85, 0x9E9DFB85, 0x9E9EFB85, 0x9E9FFB85, 0x9EA0FB85, 0x9EA1FB85, 0x9EA2FB85, 0x9EA3FB85, 0x9EA4FB85, 0x9EA5FB85, 0x9EA6FB85, 0x9EA7FB85, 0x9EA8FB85, 0x9EA9FB85, + 0x9EAAFB85, 0x9EABFB85, 0x9EACFB85, 0x9EADFB85, 0x9EAEFB85, 0x9EAFFB85, 0x9EB0FB85, 0x9EB1FB85, 0x9EB2FB85, 0x9EB3FB85, 0x9EB4FB85, 0x9EB5FB85, 0x9EB6FB85, 0x9EB7FB85, 0x9EB8FB85, + 0x9EB9FB85, 0x9EBAFB85, 0x9EBBFB85, 0x9EBCFB85, 0x9EBDFB85, 0x9EBEFB85, 0x9EBFFB85, 0x9EC0FB85, 0x9EC1FB85, 0x9EC2FB85, 0x9EC3FB85, 0x9EC4FB85, 0x9EC5FB85, 0x9EC6FB85, 0x9EC7FB85, + 0x9EC8FB85, 0x9EC9FB85, 0x9ECAFB85, 0x9ECBFB85, 0x9ECCFB85, 0x9ECDFB85, 0x9ECEFB85, 0x9ECFFB85, 0x9ED0FB85, 0x9ED1FB85, 0x9ED2FB85, 0x9ED3FB85, 0x9ED4FB85, 0x9ED5FB85, 0x9ED6FB85, + 0x9ED7FB85, 0x9ED8FB85, 0x9ED9FB85, 0x9EDAFB85, 0x9EDBFB85, 0x9EDCFB85, 0x9EDDFB85, 0x9EDEFB85, 0x9EDFFB85, 0x9EE0FB85, 0x9EE1FB85, 0x9EE2FB85, 0x9EE3FB85, 0x9EE4FB85, 0x9EE5FB85, + 0x9EE6FB85, 0x9EE7FB85, 0x9EE8FB85, 0x9EE9FB85, 0x9EEAFB85, 0x9EEBFB85, 0x9EECFB85, 0x9EEDFB85, 0x9EEEFB85, 0x9EEFFB85, 0x9EF0FB85, 0x9EF1FB85, 0x9EF2FB85, 0x9EF3FB85, 0x9EF4FB85, + 0x9EF5FB85, 0x9EF6FB85, 0x9EF7FB85, 0x9EF8FB85, 0x9EF9FB85, 0x9EFAFB85, 0x9EFBFB85, 0x9EFCFB85, 0x9EFDFB85, 0x9EFEFB85, 0x9EFFFB85, 0x9F00FB85, 0x9F01FB85, 0x9F02FB85, 0x9F03FB85, + 0x9F04FB85, 0x9F05FB85, 0x9F06FB85, 0x9F07FB85, 0x9F08FB85, 0x9F09FB85, 0x9F0AFB85, 0x9F0BFB85, 0x9F0CFB85, 0x9F0DFB85, 0x9F0EFB85, 0x9F0FFB85, 0x9F10FB85, 0x9F11FB85, 0x9F12FB85, + 0x9F13FB85, 0x9F14FB85, 0x9F15FB85, 0x9F16FB85, 0x9F17FB85, 0x9F18FB85, 0x9F19FB85, 0x9F1AFB85, 0x9F1BFB85, 0x9F1CFB85, 0x9F1DFB85, 0x9F1EFB85, 0x9F1FFB85, 0x9F20FB85, 0x9F21FB85, + 0x9F22FB85, 0x9F23FB85, 0x9F24FB85, 0x9F25FB85, 0x9F26FB85, 0x9F27FB85, 0x9F28FB85, 0x9F29FB85, 0x9F2AFB85, 0x9F2BFB85, 0x9F2CFB85, 0x9F2DFB85, 0x9F2EFB85, 0x9F2FFB85, 0x9F30FB85, + 0x9F31FB85, 0x9F32FB85, 0x9F33FB85, 0x9F34FB85, 0x9F35FB85, 0x9F36FB85, 0x9F37FB85, 0x9F38FB85, 0x9F39FB85, 0x9F3AFB85, 0x9F3BFB85, 0x9F3CFB85, 0x9F3DFB85, 0x9F3EFB85, 0x9F3FFB85, + 0x9F40FB85, 0x9F41FB85, 0x9F42FB85, 0x9F43FB85, 0x9F44FB85, 0x9F45FB85, 0x9F46FB85, 0x9F47FB85, 0x9F48FB85, 0x9F49FB85, 0x9F4AFB85, 0x9F4BFB85, 0x9F4CFB85, 0x9F4DFB85, 0x9F4EFB85, + 0x9F4FFB85, 0x9F50FB85, 0x9F51FB85, 0x9F52FB85, 0x9F53FB85, 0x9F54FB85, 0x9F55FB85, 0x9F56FB85, 0x9F57FB85, 0x9F58FB85, 0x9F59FB85, 0x9F5AFB85, 0x9F5BFB85, 0x9F5CFB85, 0x9F5DFB85, + 0x9F5EFB85, 0x9F5FFB85, 0x9F60FB85, 0x9F61FB85, 0x9F62FB85, 0x9F63FB85, 0x9F64FB85, 0x9F65FB85, 0x9F66FB85, 0x9F67FB85, 0x9F68FB85, 0x9F69FB85, 0x9F6AFB85, 0x9F6BFB85, 0x9F6CFB85, + 0x9F6DFB85, 0x9F6EFB85, 0x9F6FFB85, 0x9F70FB85, 0x9F71FB85, 0x9F72FB85, 0x9F73FB85, 0x9F74FB85, 0x9F75FB85, 0x9F76FB85, 0x9F77FB85, 0x9F78FB85, 0x9F79FB85, 0x9F7AFB85, 0x9F7BFB85, + 0x9F7CFB85, 0x9F7DFB85, 0x9F7EFB85, 0x9F7FFB85, 0x9F80FB85, 0x9F81FB85, 0x9F82FB85, 0x9F83FB85, 0x9F84FB85, 0x9F85FB85, 0x9F86FB85, 0x9F87FB85, 0x9F88FB85, 0x9F89FB85, 0x9F8AFB85, + 0x9F8BFB85, 0x9F8CFB85, 0x9F8DFB85, 0x9F8EFB85, 0x9F8FFB85, 0x9F90FB85, 0x9F91FB85, 0x9F92FB85, 0x9F93FB85, 0x9F94FB85, 0x9F95FB85, 0x9F96FB85, 0x9F97FB85, 0x9F98FB85, 0x9F99FB85, + 0x9F9AFB85, 0x9F9BFB85, 0x9F9CFB85, 0x9F9DFB85, 0x9F9EFB85, 0x9F9FFB85, 0x9FA0FB85, 0x9FA1FB85, 0x9FA2FB85, 0x9FA3FB85, 0x9FA4FB85, 0x9FA5FB85, 0x9FA6FB85, 0x9FA7FB85, 0x9FA8FB85, + 0x9FA9FB85, 0x9FAAFB85, 0x9FABFB85, 0x9FACFB85, 0x9FADFB85, 0x9FAEFB85, 0x9FAFFB85, 0x9FB0FB85, 0x9FB1FB85, 0x9FB2FB85, 0x9FB3FB85, 0x9FB4FB85, 0x9FB5FB85, 0x9FB6FB85, 0x9FB7FB85, + 0x9FB8FB85, 0x9FB9FB85, 0x9FBAFB85, 0x9FBBFB85, 0x9FBCFB85, 0x9FBDFB85, 0x9FBEFB85, 0x9FBFFB85, 0x9FC0FB85, 0x9FC1FB85, 0x9FC2FB85, 0x9FC3FB85, 0x9FC4FB85, 0x9FC5FB85, 0x9FC6FB85, + 0x9FC7FB85, 0x9FC8FB85, 0x9FC9FB85, 0x9FCAFB85, 0x9FCBFB85, 0x9FCCFB85, 0x9FCDFB85, 0x9FCEFB85, 0x9FCFFB85, 0x9FD0FB85, 0x9FD1FB85, 0x9FD2FB85, 0x9FD3FB85, 0x9FD4FB85, 0x9FD5FB85, + 0x9FD6FB85, 0x9FD7FB85, 0x9FD8FB85, 0x9FD9FB85, 0x9FDAFB85, 0x9FDBFB85, 0x9FDCFB85, 0x9FDDFB85, 0x9FDEFB85, 0x9FDFFB85, 0x9FE0FB85, 0x9FE1FB85, 0x9FE2FB85, 0x9FE3FB85, 0x9FE4FB85, + 0x9FE5FB85, 0x9FE6FB85, 0x9FE7FB85, 0x9FE8FB85, 0x9FE9FB85, 0x9FEAFB85, 0x9FEBFB85, 0x9FECFB85, 0x9FEDFB85, 0x9FEEFB85, 0x9FEFFB85, 0x9FF0FB85, 0x9FF1FB85, 0x9FF2FB85, 0x9FF3FB85, + 0x9FF4FB85, 0x9FF5FB85, 0x9FF6FB85, 0x9FF7FB85, 0x9FF8FB85, 0x9FF9FB85, 0x9FFAFB85, 0x9FFBFB85, 0x9FFCFB85, 0x9FFDFB85, 0x9FFEFB85, 0x9FFFFB85, 0xA000FB85, 0xA001FB85, 0xA002FB85, + 0xA003FB85, 0xA004FB85, 0xA005FB85, 0xA006FB85, 0xA007FB85, 0xA008FB85, 0xA009FB85, 0xA00AFB85, 0xA00BFB85, 0xA00CFB85, 0xA00DFB85, 0xA00EFB85, 0xA00FFB85, 0xA010FB85, 0xA011FB85, + 0xA012FB85, 0xA013FB85, 0xA014FB85, 0xA015FB85, 0xA016FB85, 0xA017FB85, 0xA018FB85, 0xA019FB85, 0xA01AFB85, 0xA01BFB85, 0xA01CFB85, 0xA01DFB85, 0xA01EFB85, 0xA01FFB85, 0xA020FB85, + 0xA021FB85, 0xA022FB85, 0xA023FB85, 0xA024FB85, 0xA025FB85, 0xA026FB85, 0xA027FB85, 0xA028FB85, 0xA029FB85, 0xA02AFB85, 0xA02BFB85, 0xA02CFB85, 0xA02DFB85, 0xA02EFB85, 0xA02FFB85, + 0xA030FB85, 0xA031FB85, 0xA032FB85, 0xA033FB85, 0xA034FB85, 0xA035FB85, 0xA036FB85, 0xA037FB85, 0xA038FB85, 0xA039FB85, 0xA03AFB85, 0xA03BFB85, 0xA03CFB85, 0xA03DFB85, 0xA03EFB85, + 0xA03FFB85, 0xA040FB85, 0xA041FB85, 0xA042FB85, 0xA043FB85, 0xA044FB85, 0xA045FB85, 0xA046FB85, 0xA047FB85, 0xA048FB85, 0xA049FB85, 0xA04AFB85, 0xA04BFB85, 0xA04CFB85, 0xA04DFB85, + 0xA04EFB85, 0xA04FFB85, 0xA050FB85, 0xA051FB85, 0xA052FB85, 0xA053FB85, 0xA054FB85, 0xA055FB85, 0xA056FB85, 0xA057FB85, 0xA058FB85, 0xA059FB85, 0xA05AFB85, 0xA05BFB85, 0xA05CFB85, + 0xA05DFB85, 0xA05EFB85, 0xA05FFB85, 0xA060FB85, 0xA061FB85, 0xA062FB85, 0xA063FB85, 0xA064FB85, 0xA065FB85, 0xA066FB85, 0xA067FB85, 0xA068FB85, 0xA069FB85, 0xA06AFB85, 0xA06BFB85, + 0xA06CFB85, 0xA06DFB85, 0xA06EFB85, 0xA06FFB85, 0xA070FB85, 0xA071FB85, 0xA072FB85, 0xA073FB85, 0xA074FB85, 0xA075FB85, 0xA076FB85, 0xA077FB85, 0xA078FB85, 0xA079FB85, 0xA07AFB85, + 0xA07BFB85, 0xA07CFB85, 0xA07DFB85, 0xA07EFB85, 0xA07FFB85, 0xA080FB85, 0xA081FB85, 0xA082FB85, 0xA083FB85, 0xA084FB85, 0xA085FB85, 0xA086FB85, 0xA087FB85, 0xA088FB85, 0xA089FB85, + 0xA08AFB85, 0xA08BFB85, 0xA08CFB85, 0xA08DFB85, 0xA08EFB85, 0xA08FFB85, 0xA090FB85, 0xA091FB85, 0xA092FB85, 0xA093FB85, 0xA094FB85, 0xA095FB85, 0xA096FB85, 0xA097FB85, 0xA098FB85, + 0xA099FB85, 0xA09AFB85, 0xA09BFB85, 0xA09CFB85, 0xA09DFB85, 0xA09EFB85, 0xA09FFB85, 0xA0A0FB85, 0xA0A1FB85, 0xA0A2FB85, 0xA0A3FB85, 0xA0A4FB85, 0xA0A5FB85, 0xA0A6FB85, 0xA0A7FB85, + 0xA0A8FB85, 0xA0A9FB85, 0xA0AAFB85, 0xA0ABFB85, 0xA0ACFB85, 0xA0ADFB85, 0xA0AEFB85, 0xA0AFFB85, 0xA0B0FB85, 0xA0B1FB85, 0xA0B2FB85, 0xA0B3FB85, 0xA0B4FB85, 0xA0B5FB85, 0xA0B6FB85, + 0xA0B7FB85, 0xA0B8FB85, 0xA0B9FB85, 0xA0BAFB85, 0xA0BBFB85, 0xA0BCFB85, 0xA0BDFB85, 0xA0BEFB85, 0xA0BFFB85, 0xA0C0FB85, 0xA0C1FB85, 0xA0C2FB85, 0xA0C3FB85, 0xA0C4FB85, 0xA0C5FB85, + 0xA0C6FB85, 0xA0C7FB85, 0xA0C8FB85, 0xA0C9FB85, 0xA0CAFB85, 0xA0CBFB85, 0xA0CCFB85, 0xA0CDFB85, 0xA0CEFB85, 0xA0CFFB85, 0xA0D0FB85, 0xA0D1FB85, 0xA0D2FB85, 0xA0D3FB85, 0xA0D4FB85, + 0xA0D5FB85, 0xA0D6FB85, 0xA0D7FB85, 0xA0D8FB85, 0xA0D9FB85, 0xA0DAFB85, 0xA0DBFB85, 0xA0DCFB85, 0xA0DDFB85, 0xA0DEFB85, 0xA0DFFB85, 0xA0E0FB85, 0xA0E1FB85, 0xA0E2FB85, 0xA0E3FB85, + 0xA0E4FB85, 0xA0E5FB85, 0xA0E6FB85, 0xA0E7FB85, 0xA0E8FB85, 0xA0E9FB85, 0xA0EAFB85, 0xA0EBFB85, 0xA0ECFB85, 0xA0EDFB85, 0xA0EEFB85, 0xA0EFFB85, 0xA0F0FB85, 0xA0F1FB85, 0xA0F2FB85, + 0xA0F3FB85, 0xA0F4FB85, 0xA0F5FB85, 0xA0F6FB85, 0xA0F7FB85, 0xA0F8FB85, 0xA0F9FB85, 0xA0FAFB85, 0xA0FBFB85, 0xA0FCFB85, 0xA0FDFB85, 0xA0FEFB85, 0xA0FFFB85, 0xA100FB85, 0xA101FB85, + 0xA102FB85, 0xA103FB85, 0xA104FB85, 0xA105FB85, 0xA106FB85, 0xA107FB85, 0xA108FB85, 0xA109FB85, 0xA10AFB85, 0xA10BFB85, 0xA10CFB85, 0xA10DFB85, 0xA10EFB85, 0xA10FFB85, 0xA110FB85, + 0xA111FB85, 0xA112FB85, 0xA113FB85, 0xA114FB85, 0xA115FB85, 0xA116FB85, 0xA117FB85, 0xA118FB85, 0xA119FB85, 0xA11AFB85, 0xA11BFB85, 0xA11CFB85, 0xA11DFB85, 0xA11EFB85, 0xA11FFB85, + 0xA120FB85, 0xA121FB85, 0xA122FB85, 0xA123FB85, 0xA124FB85, 0xA125FB85, 0xA126FB85, 0xA127FB85, 0xA128FB85, 0xA129FB85, 0xA12AFB85, 0xA12BFB85, 0xA12CFB85, 0xA12DFB85, 0xA12EFB85, + 0xA12FFB85, 0xA130FB85, 0xA131FB85, 0xA132FB85, 0xA133FB85, 0xA134FB85, 0xA135FB85, 0xA136FB85, 0xA137FB85, 0xA138FB85, 0xA139FB85, 0xA13AFB85, 0xA13BFB85, 0xA13CFB85, 0xA13DFB85, + 0xA13EFB85, 0xA13FFB85, 0xA140FB85, 0xA141FB85, 0xA142FB85, 0xA143FB85, 0xA144FB85, 0xA145FB85, 0xA146FB85, 0xA147FB85, 0xA148FB85, 0xA149FB85, 0xA14AFB85, 0xA14BFB85, 0xA14CFB85, + 0xA14DFB85, 0xA14EFB85, 0xA14FFB85, 0xA150FB85, 0xA151FB85, 0xA152FB85, 0xA153FB85, 0xA154FB85, 0xA155FB85, 0xA156FB85, 0xA157FB85, 0xA158FB85, 0xA159FB85, 0xA15AFB85, 0xA15BFB85, + 0xA15CFB85, 0xA15DFB85, 0xA15EFB85, 0xA15FFB85, 0xA160FB85, 0xA161FB85, 0xA162FB85, 0xA163FB85, 0xA164FB85, 0xA165FB85, 0xA166FB85, 0xA167FB85, 0xA168FB85, 0xA169FB85, 0xA16AFB85, + 0xA16BFB85, 0xA16CFB85, 0xA16DFB85, 0xA16EFB85, 0xA16FFB85, 0xA170FB85, 0xA171FB85, 0xA172FB85, 0xA173FB85, 0xA174FB85, 0xA175FB85, 0xA176FB85, 0xA177FB85, 0xA178FB85, 0xA179FB85, + 0xA17AFB85, 0xA17BFB85, 0xA17CFB85, 0xA17DFB85, 0xA17EFB85, 0xA17FFB85, 0xA180FB85, 0xA181FB85, 0xA182FB85, 0xA183FB85, 0xA184FB85, 0xA185FB85, 0xA186FB85, 0xA187FB85, 0xA188FB85, + 0xA189FB85, 0xA18AFB85, 0xA18BFB85, 0xA18CFB85, 0xA18DFB85, 0xA18EFB85, 0xA18FFB85, 0xA190FB85, 0xA191FB85, 0xA192FB85, 0xA193FB85, 0xA194FB85, 0xA195FB85, 0xA196FB85, 0xA197FB85, + 0xA198FB85, 0xA199FB85, 0xA19AFB85, 0xA19BFB85, 0xA19CFB85, 0xA19DFB85, 0xA19EFB85, 0xA19FFB85, 0xA1A0FB85, 0xA1A1FB85, 0xA1A2FB85, 0xA1A3FB85, 0xA1A4FB85, 0xA1A5FB85, 0xA1A6FB85, + 0xA1A7FB85, 0xA1A8FB85, 0xA1A9FB85, 0xA1AAFB85, 0xA1ABFB85, 0xA1ACFB85, 0xA1ADFB85, 0xA1AEFB85, 0xA1AFFB85, 0xA1B0FB85, 0xA1B1FB85, 0xA1B2FB85, 0xA1B3FB85, 0xA1B4FB85, 0xA1B5FB85, + 0xA1B6FB85, 0xA1B7FB85, 0xA1B8FB85, 0xA1B9FB85, 0xA1BAFB85, 0xA1BBFB85, 0xA1BCFB85, 0xA1BDFB85, 0xA1BEFB85, 0xA1BFFB85, 0xA1C0FB85, 0xA1C1FB85, 0xA1C2FB85, 0xA1C3FB85, 0xA1C4FB85, + 0xA1C5FB85, 0xA1C6FB85, 0xA1C7FB85, 0xA1C8FB85, 0xA1C9FB85, 0xA1CAFB85, 0xA1CBFB85, 0xA1CCFB85, 0xA1CDFB85, 0xA1CEFB85, 0xA1CFFB85, 0xA1D0FB85, 0xA1D1FB85, 0xA1D2FB85, 0xA1D3FB85, + 0xA1D4FB85, 0xA1D5FB85, 0xA1D6FB85, 0xA1D7FB85, 0xA1D8FB85, 0xA1D9FB85, 0xA1DAFB85, 0xA1DBFB85, 0xA1DCFB85, 0xA1DDFB85, 0xA1DEFB85, 0xA1DFFB85, 0xA1E0FB85, 0xA1E1FB85, 0xA1E2FB85, + 0xA1E3FB85, 0xA1E4FB85, 0xA1E5FB85, 0xA1E6FB85, 0xA1E7FB85, 0xA1E8FB85, 0xA1E9FB85, 0xA1EAFB85, 0xA1EBFB85, 0xA1ECFB85, 0xA1EDFB85, 0xA1EEFB85, 0xA1EFFB85, 0xA1F0FB85, 0xA1F1FB85, + 0xA1F2FB85, 0xA1F3FB85, 0xA1F4FB85, 0xA1F5FB85, 0xA1F6FB85, 0xA1F7FB85, 0xA1F8FB85, 0xA1F9FB85, 0xA1FAFB85, 0xA1FBFB85, 0xA1FCFB85, 0xA1FDFB85, 0xA1FEFB85, 0xA1FFFB85, 0xA200FB85, + 0xA201FB85, 0xA202FB85, 0xA203FB85, 0xA204FB85, 0xA205FB85, 0xA206FB85, 0xA207FB85, 0xA208FB85, 0xA209FB85, 0xA20AFB85, 0xA20BFB85, 0xA20CFB85, 0xA20DFB85, 0xA20EFB85, 0xA20FFB85, + 0xA210FB85, 0xA211FB85, 0xA212FB85, 0xA213FB85, 0xA214FB85, 0xA215FB85, 0xA216FB85, 0xA217FB85, 0xA218FB85, 0xA219FB85, 0xA21AFB85, 0xA21BFB85, 0xA21CFB85, 0xA21DFB85, 0xA21EFB85, + 0xA21FFB85, 0xA220FB85, 0xA221FB85, 0xA222FB85, 0xA223FB85, 0xA224FB85, 0xA225FB85, 0xA226FB85, 0xA227FB85, 0xA228FB85, 0xA229FB85, 0xA22AFB85, 0xA22BFB85, 0xA22CFB85, 0xA22DFB85, + 0xA22EFB85, 0xA22FFB85, 0xA230FB85, 0xA231FB85, 0xA232FB85, 0xA233FB85, 0xA234FB85, 0xA235FB85, 0xA236FB85, 0xA237FB85, 0xA238FB85, 0xA239FB85, 0xA23AFB85, 0xA23BFB85, 0xA23CFB85, + 0xA23DFB85, 0xA23EFB85, 0xA23FFB85, 0xA240FB85, 0xA241FB85, 0xA242FB85, 0xA243FB85, 0xA244FB85, 0xA245FB85, 0xA246FB85, 0xA247FB85, 0xA248FB85, 0xA249FB85, 0xA24AFB85, 0xA24BFB85, + 0xA24CFB85, 0xA24DFB85, 0xA24EFB85, 0xA24FFB85, 0xA250FB85, 0xA251FB85, 0xA252FB85, 0xA253FB85, 0xA254FB85, 0xA255FB85, 0xA256FB85, 0xA257FB85, 0xA258FB85, 0xA259FB85, 0xA25AFB85, + 0xA25BFB85, 0xA25CFB85, 0xA25DFB85, 0xA25EFB85, 0xA25FFB85, 0xA260FB85, 0xA261FB85, 0xA262FB85, 0xA263FB85, 0xA264FB85, 0xA265FB85, 0xA266FB85, 0xA267FB85, 0xA268FB85, 0xA269FB85, + 0xA26AFB85, 0xA26BFB85, 0xA26CFB85, 0xA26DFB85, 0xA26EFB85, 0xA26FFB85, 0xA270FB85, 0xA271FB85, 0xA272FB85, 0xA273FB85, 0xA274FB85, 0xA275FB85, 0xA276FB85, 0xA277FB85, 0xA278FB85, + 0xA279FB85, 0xA27AFB85, 0xA27BFB85, 0xA27CFB85, 0xA27DFB85, 0xA27EFB85, 0xA27FFB85, 0xA280FB85, 0xA281FB85, 0xA282FB85, 0xA283FB85, 0xA284FB85, 0xA285FB85, 0xA286FB85, 0xA287FB85, + 0xA288FB85, 0xA289FB85, 0xA28AFB85, 0xA28BFB85, 0xA28CFB85, 0xA28DFB85, 0xA28EFB85, 0xA28FFB85, 0xA290FB85, 0xA291FB85, 0xA292FB85, 0xA293FB85, 0xA294FB85, 0xA295FB85, 0xA296FB85, + 0xA297FB85, 0xA298FB85, 0xA299FB85, 0xA29AFB85, 0xA29BFB85, 0xA29CFB85, 0xA29DFB85, 0xA29EFB85, 0xA29FFB85, 0xA2A0FB85, 0xA2A1FB85, 0xA2A2FB85, 0xA2A3FB85, 0xA2A4FB85, 0xA2A5FB85, + 0xA2A6FB85, 0xA2A7FB85, 0xA2A8FB85, 0xA2A9FB85, 0xA2AAFB85, 0xA2ABFB85, 0xA2ACFB85, 0xA2ADFB85, 0xA2AEFB85, 0xA2AFFB85, 0xA2B0FB85, 0xA2B1FB85, 0xA2B2FB85, 0xA2B3FB85, 0xA2B4FB85, + 0xA2B5FB85, 0xA2B6FB85, 0xA2B7FB85, 0xA2B8FB85, 0xA2B9FB85, 0xA2BAFB85, 0xA2BBFB85, 0xA2BCFB85, 0xA2BDFB85, 0xA2BEFB85, 0xA2BFFB85, 0xA2C0FB85, 0xA2C1FB85, 0xA2C2FB85, 0xA2C3FB85, + 0xA2C4FB85, 0xA2C5FB85, 0xA2C6FB85, 0xA2C7FB85, 0xA2C8FB85, 0xA2C9FB85, 0xA2CAFB85, 0xA2CBFB85, 0xA2CCFB85, 0xA2CDFB85, 0xA2CEFB85, 0xA2CFFB85, 0xA2D0FB85, 0xA2D1FB85, 0xA2D2FB85, + 0xA2D3FB85, 0xA2D4FB85, 0xA2D5FB85, 0xA2D6FB85, 0xA2D7FB85, 0xA2D8FB85, 0xA2D9FB85, 0xA2DAFB85, 0xA2DBFB85, 0xA2DCFB85, 0xA2DDFB85, 0xA2DEFB85, 0xA2DFFB85, 0xA2E0FB85, 0xA2E1FB85, + 0xA2E2FB85, 0xA2E3FB85, 0xA2E4FB85, 0xA2E5FB85, 0xA2E6FB85, 0xA2E7FB85, 0xA2E8FB85, 0xA2E9FB85, 0xA2EAFB85, 0xA2EBFB85, 0xA2ECFB85, 0xA2EDFB85, 0xA2EEFB85, 0xA2EFFB85, 0xA2F0FB85, + 0xA2F1FB85, 0xA2F2FB85, 0xA2F3FB85, 0xA2F4FB85, 0xA2F5FB85, 0xA2F6FB85, 0xA2F7FB85, 0xA2F8FB85, 0xA2F9FB85, 0xA2FAFB85, 0xA2FBFB85, 0xA2FCFB85, 0xA2FDFB85, 0xA2FEFB85, 0xA2FFFB85, + 0xA300FB85, 0xA301FB85, 0xA302FB85, 0xA303FB85, 0xA304FB85, 0xA305FB85, 0xA306FB85, 0xA307FB85, 0xA308FB85, 0xA309FB85, 0xA30AFB85, 0xA30BFB85, 0xA30CFB85, 0xA30DFB85, 0xA30EFB85, + 0xA30FFB85, 0xA310FB85, 0xA311FB85, 0xA312FB85, 0xA313FB85, 0xA314FB85, 0xA315FB85, 0xA316FB85, 0xA317FB85, 0xA318FB85, 0xA319FB85, 0xA31AFB85, 0xA31BFB85, 0xA31CFB85, 0xA31DFB85, + 0xA31EFB85, 0xA31FFB85, 0xA320FB85, 0xA321FB85, 0xA322FB85, 0xA323FB85, 0xA324FB85, 0xA325FB85, 0xA326FB85, 0xA327FB85, 0xA328FB85, 0xA329FB85, 0xA32AFB85, 0xA32BFB85, 0xA32CFB85, + 0xA32DFB85, 0xA32EFB85, 0xA32FFB85, 0xA330FB85, 0xA331FB85, 0xA332FB85, 0xA333FB85, 0xA334FB85, 0xA335FB85, 0xA336FB85, 0xA337FB85, 0xA338FB85, 0xA339FB85, 0xA33AFB85, 0xA33BFB85, + 0xA33CFB85, 0xA33DFB85, 0xA33EFB85, 0xA33FFB85, 0xA340FB85, 0xA341FB85, 0xA342FB85, 0xA343FB85, 0xA344FB85, 0xA345FB85, 0xA346FB85, 0xA347FB85, 0xA348FB85, 0xA349FB85, 0xA34AFB85, + 0xA34BFB85, 0xA34CFB85, 0xA34DFB85, 0xA34EFB85, 0xA34FFB85, 0xA350FB85, 0xA351FB85, 0xA352FB85, 0xA353FB85, 0xA354FB85, 0xA355FB85, 0xA356FB85, 0xA357FB85, 0xA358FB85, 0xA359FB85, + 0xA35AFB85, 0xA35BFB85, 0xA35CFB85, 0xA35DFB85, 0xA35EFB85, 0xA35FFB85, 0xA360FB85, 0xA361FB85, 0xA362FB85, 0xA363FB85, 0xA364FB85, 0xA365FB85, 0xA366FB85, 0xA367FB85, 0xA368FB85, + 0xA369FB85, 0xA36AFB85, 0xA36BFB85, 0xA36CFB85, 0xA36DFB85, 0xA36EFB85, 0xA36FFB85, 0xA370FB85, 0xA371FB85, 0xA372FB85, 0xA373FB85, 0xA374FB85, 0xA375FB85, 0xA376FB85, 0xA377FB85, + 0xA378FB85, 0xA379FB85, 0xA37AFB85, 0xA37BFB85, 0xA37CFB85, 0xA37DFB85, 0xA37EFB85, 0xA37FFB85, 0xA380FB85, 0xA381FB85, 0xA382FB85, 0xA383FB85, 0xA384FB85, 0xA385FB85, 0xA386FB85, + 0xA387FB85, 0xA388FB85, 0xA389FB85, 0xA38AFB85, 0xA38BFB85, 0xA38CFB85, 0xA38DFB85, 0xA38EFB85, 0xA38FFB85, 0xA390FB85, 0xA391FB85, 0xA392FB85, 0xA393FB85, 0xA394FB85, 0xA395FB85, + 0xA396FB85, 0xA397FB85, 0xA398FB85, 0xA399FB85, 0xA39AFB85, 0xA39BFB85, 0xA39CFB85, 0xA39DFB85, 0xA39EFB85, 0xA39FFB85, 0xA3A0FB85, 0xA3A1FB85, 0xA3A2FB85, 0xA3A3FB85, 0xA3A4FB85, + 0xA3A5FB85, 0xA3A6FB85, 0xA3A7FB85, 0xA3A8FB85, 0xA3A9FB85, 0xA3AAFB85, 0xA3ABFB85, 0xA3ACFB85, 0xA3ADFB85, 0xA3AEFB85, 0xA3AFFB85, 0xA3B0FB85, 0xA3B1FB85, 0xA3B2FB85, 0xA3B3FB85, + 0xA3B4FB85, 0xA3B5FB85, 0xA3B6FB85, 0xA3B7FB85, 0xA3B8FB85, 0xA3B9FB85, 0xA3BAFB85, 0xA3BBFB85, 0xA3BCFB85, 0xA3BDFB85, 0xA3BEFB85, 0xA3BFFB85, 0xA3C0FB85, 0xA3C1FB85, 0xA3C2FB85, + 0xA3C3FB85, 0xA3C4FB85, 0xA3C5FB85, 0xA3C6FB85, 0xA3C7FB85, 0xA3C8FB85, 0xA3C9FB85, 0xA3CAFB85, 0xA3CBFB85, 0xA3CCFB85, 0xA3CDFB85, 0xA3CEFB85, 0xA3CFFB85, 0xA3D0FB85, 0xA3D1FB85, + 0xA3D2FB85, 0xA3D3FB85, 0xA3D4FB85, 0xA3D5FB85, 0xA3D6FB85, 0xA3D7FB85, 0xA3D8FB85, 0xA3D9FB85, 0xA3DAFB85, 0xA3DBFB85, 0xA3DCFB85, 0xA3DDFB85, 0xA3DEFB85, 0xA3DFFB85, 0xA3E0FB85, + 0xA3E1FB85, 0xA3E2FB85, 0xA3E3FB85, 0xA3E4FB85, 0xA3E5FB85, 0xA3E6FB85, 0xA3E7FB85, 0xA3E8FB85, 0xA3E9FB85, 0xA3EAFB85, 0xA3EBFB85, 0xA3ECFB85, 0xA3EDFB85, 0xA3EEFB85, 0xA3EFFB85, + 0xA3F0FB85, 0xA3F1FB85, 0xA3F2FB85, 0xA3F3FB85, 0xA3F4FB85, 0xA3F5FB85, 0xA3F6FB85, 0xA3F7FB85, 0xA3F8FB85, 0xA3F9FB85, 0xA3FAFB85, 0xA3FBFB85, 0xA3FCFB85, 0xA3FDFB85, 0xA3FEFB85, + 0xA3FFFB85, 0xA400FB85, 0xA401FB85, 0xA402FB85, 0xA403FB85, 0xA404FB85, 0xA405FB85, 0xA406FB85, 0xA407FB85, 0xA408FB85, 0xA409FB85, 0xA40AFB85, 0xA40BFB85, 0xA40CFB85, 0xA40DFB85, + 0xA40EFB85, 0xA40FFB85, 0xA410FB85, 0xA411FB85, 0xA412FB85, 0xA413FB85, 0xA414FB85, 0xA415FB85, 0xA416FB85, 0xA417FB85, 0xA418FB85, 0xA419FB85, 0xA41AFB85, 0xA41BFB85, 0xA41CFB85, + 0xA41DFB85, 0xA41EFB85, 0xA41FFB85, 0xA420FB85, 0xA421FB85, 0xA422FB85, 0xA423FB85, 0xA424FB85, 0xA425FB85, 0xA426FB85, 0xA427FB85, 0xA428FB85, 0xA429FB85, 0xA42AFB85, 0xA42BFB85, + 0xA42CFB85, 0xA42DFB85, 0xA42EFB85, 0xA42FFB85, 0xA430FB85, 0xA431FB85, 0xA432FB85, 0xA433FB85, 0xA434FB85, 0xA435FB85, 0xA436FB85, 0xA437FB85, 0xA438FB85, 0xA439FB85, 0xA43AFB85, + 0xA43BFB85, 0xA43CFB85, 0xA43DFB85, 0xA43EFB85, 0xA43FFB85, 0xA440FB85, 0xA441FB85, 0xA442FB85, 0xA443FB85, 0xA444FB85, 0xA445FB85, 0xA446FB85, 0xA447FB85, 0xA448FB85, 0xA449FB85, + 0xA44AFB85, 0xA44BFB85, 0xA44CFB85, 0xA44DFB85, 0xA44EFB85, 0xA44FFB85, 0xA450FB85, 0xA451FB85, 0xA452FB85, 0xA453FB85, 0xA454FB85, 0xA455FB85, 0xA456FB85, 0xA457FB85, 0xA458FB85, + 0xA459FB85, 0xA45AFB85, 0xA45BFB85, 0xA45CFB85, 0xA45DFB85, 0xA45EFB85, 0xA45FFB85, 0xA460FB85, 0xA461FB85, 0xA462FB85, 0xA463FB85, 0xA464FB85, 0xA465FB85, 0xA466FB85, 0xA467FB85, + 0xA468FB85, 0xA469FB85, 0xA46AFB85, 0xA46BFB85, 0xA46CFB85, 0xA46DFB85, 0xA46EFB85, 0xA46FFB85, 0xA470FB85, 0xA471FB85, 0xA472FB85, 0xA473FB85, 0xA474FB85, 0xA475FB85, 0xA476FB85, + 0xA477FB85, 0xA478FB85, 0xA479FB85, 0xA47AFB85, 0xA47BFB85, 0xA47CFB85, 0xA47DFB85, 0xA47EFB85, 0xA47FFB85, 0xA480FB85, 0xA481FB85, 0xA482FB85, 0xA483FB85, 0xA484FB85, 0xA485FB85, + 0xA486FB85, 0xA487FB85, 0xA488FB85, 0xA489FB85, 0xA48AFB85, 0xA48BFB85, 0xA48CFB85, 0xA48DFB85, 0xA48EFB85, 0xA48FFB85, 0xA490FB85, 0xA491FB85, 0xA492FB85, 0xA493FB85, 0xA494FB85, + 0xA495FB85, 0xA496FB85, 0xA497FB85, 0xA498FB85, 0xA499FB85, 0xA49AFB85, 0xA49BFB85, 0xA49CFB85, 0xA49DFB85, 0xA49EFB85, 0xA49FFB85, 0xA4A0FB85, 0xA4A1FB85, 0xA4A2FB85, 0xA4A3FB85, + 0xA4A4FB85, 0xA4A5FB85, 0xA4A6FB85, 0xA4A7FB85, 0xA4A8FB85, 0xA4A9FB85, 0xA4AAFB85, 0xA4ABFB85, 0xA4ACFB85, 0xA4ADFB85, 0xA4AEFB85, 0xA4AFFB85, 0xA4B0FB85, 0xA4B1FB85, 0xA4B2FB85, + 0xA4B3FB85, 0xA4B4FB85, 0xA4B5FB85, 0xA4B6FB85, 0xA4B7FB85, 0xA4B8FB85, 0xA4B9FB85, 0xA4BAFB85, 0xA4BBFB85, 0xA4BCFB85, 0xA4BDFB85, 0xA4BEFB85, 0xA4BFFB85, 0xA4C0FB85, 0xA4C1FB85, + 0xA4C2FB85, 0xA4C3FB85, 0xA4C4FB85, 0xA4C5FB85, 0xA4C6FB85, 0xA4C7FB85, 0xA4C8FB85, 0xA4C9FB85, 0xA4CAFB85, 0xA4CBFB85, 0xA4CCFB85, 0xA4CDFB85, 0xA4CEFB85, 0xA4CFFB85, 0xA4D0FB85, + 0xA4D1FB85, 0xA4D2FB85, 0xA4D3FB85, 0xA4D4FB85, 0xA4D5FB85, 0xA4D6FB85, 0xA4D7FB85, 0xA4D8FB85, 0xA4D9FB85, 0xA4DAFB85, 0xA4DBFB85, 0xA4DCFB85, 0xA4DDFB85, 0xA4DEFB85, 0xA4DFFB85, + 0xA4E0FB85, 0xA4E1FB85, 0xA4E2FB85, 0xA4E3FB85, 0xA4E4FB85, 0xA4E5FB85, 0xA4E6FB85, 0xA4E7FB85, 0xA4E8FB85, 0xA4E9FB85, 0xA4EAFB85, 0xA4EBFB85, 0xA4ECFB85, 0xA4EDFB85, 0xA4EEFB85, + 0xA4EFFB85, 0xA4F0FB85, 0xA4F1FB85, 0xA4F2FB85, 0xA4F3FB85, 0xA4F4FB85, 0xA4F5FB85, 0xA4F6FB85, 0xA4F7FB85, 0xA4F8FB85, 0xA4F9FB85, 0xA4FAFB85, 0xA4FBFB85, 0xA4FCFB85, 0xA4FDFB85, + 0xA4FEFB85, 0xA4FFFB85, 0xA500FB85, 0xA501FB85, 0xA502FB85, 0xA503FB85, 0xA504FB85, 0xA505FB85, 0xA506FB85, 0xA507FB85, 0xA508FB85, 0xA509FB85, 0xA50AFB85, 0xA50BFB85, 0xA50CFB85, + 0xA50DFB85, 0xA50EFB85, 0xA50FFB85, 0xA510FB85, 0xA511FB85, 0xA512FB85, 0xA513FB85, 0xA514FB85, 0xA515FB85, 0xA516FB85, 0xA517FB85, 0xA518FB85, 0xA519FB85, 0xA51AFB85, 0xA51BFB85, + 0xA51CFB85, 0xA51DFB85, 0xA51EFB85, 0xA51FFB85, 0xA520FB85, 0xA521FB85, 0xA522FB85, 0xA523FB85, 0xA524FB85, 0xA525FB85, 0xA526FB85, 0xA527FB85, 0xA528FB85, 0xA529FB85, 0xA52AFB85, + 0xA52BFB85, 0xA52CFB85, 0xA52DFB85, 0xA52EFB85, 0xA52FFB85, 0xA530FB85, 0xA531FB85, 0xA532FB85, 0xA533FB85, 0xA534FB85, 0xA535FB85, 0xA536FB85, 0xA537FB85, 0xA538FB85, 0xA539FB85, + 0xA53AFB85, 0xA53BFB85, 0xA53CFB85, 0xA53DFB85, 0xA53EFB85, 0xA53FFB85, 0xA540FB85, 0xA541FB85, 0xA542FB85, 0xA543FB85, 0xA544FB85, 0xA545FB85, 0xA546FB85, 0xA547FB85, 0xA548FB85, + 0xA549FB85, 0xA54AFB85, 0xA54BFB85, 0xA54CFB85, 0xA54DFB85, 0xA54EFB85, 0xA54FFB85, 0xA550FB85, 0xA551FB85, 0xA552FB85, 0xA553FB85, 0xA554FB85, 0xA555FB85, 0xA556FB85, 0xA557FB85, + 0xA558FB85, 0xA559FB85, 0xA55AFB85, 0xA55BFB85, 0xA55CFB85, 0xA55DFB85, 0xA55EFB85, 0xA55FFB85, 0xA560FB85, 0xA561FB85, 0xA562FB85, 0xA563FB85, 0xA564FB85, 0xA565FB85, 0xA566FB85, + 0xA567FB85, 0xA568FB85, 0xA569FB85, 0xA56AFB85, 0xA56BFB85, 0xA56CFB85, 0xA56DFB85, 0xA56EFB85, 0xA56FFB85, 0xA570FB85, 0xA571FB85, 0xA572FB85, 0xA573FB85, 0xA574FB85, 0xA575FB85, + 0xA576FB85, 0xA577FB85, 0xA578FB85, 0xA579FB85, 0xA57AFB85, 0xA57BFB85, 0xA57CFB85, 0xA57DFB85, 0xA57EFB85, 0xA57FFB85, 0xA580FB85, 0xA581FB85, 0xA582FB85, 0xA583FB85, 0xA584FB85, + 0xA585FB85, 0xA586FB85, 0xA587FB85, 0xA588FB85, 0xA589FB85, 0xA58AFB85, 0xA58BFB85, 0xA58CFB85, 0xA58DFB85, 0xA58EFB85, 0xA58FFB85, 0xA590FB85, 0xA591FB85, 0xA592FB85, 0xA593FB85, + 0xA594FB85, 0xA595FB85, 0xA596FB85, 0xA597FB85, 0xA598FB85, 0xA599FB85, 0xA59AFB85, 0xA59BFB85, 0xA59CFB85, 0xA59DFB85, 0xA59EFB85, 0xA59FFB85, 0xA5A0FB85, 0xA5A1FB85, 0xA5A2FB85, + 0xA5A3FB85, 0xA5A4FB85, 0xA5A5FB85, 0xA5A6FB85, 0xA5A7FB85, 0xA5A8FB85, 0xA5A9FB85, 0xA5AAFB85, 0xA5ABFB85, 0xA5ACFB85, 0xA5ADFB85, 0xA5AEFB85, 0xA5AFFB85, 0xA5B0FB85, 0xA5B1FB85, + 0xA5B2FB85, 0xA5B3FB85, 0xA5B4FB85, 0xA5B5FB85, 0xA5B6FB85, 0xA5B7FB85, 0xA5B8FB85, 0xA5B9FB85, 0xA5BAFB85, 0xA5BBFB85, 0xA5BCFB85, 0xA5BDFB85, 0xA5BEFB85, 0xA5BFFB85, 0xA5C0FB85, + 0xA5C1FB85, 0xA5C2FB85, 0xA5C3FB85, 0xA5C4FB85, 0xA5C5FB85, 0xA5C6FB85, 0xA5C7FB85, 0xA5C8FB85, 0xA5C9FB85, 0xA5CAFB85, 0xA5CBFB85, 0xA5CCFB85, 0xA5CDFB85, 0xA5CEFB85, 0xA5CFFB85, + 0xA5D0FB85, 0xA5D1FB85, 0xA5D2FB85, 0xA5D3FB85, 0xA5D4FB85, 0xA5D5FB85, 0xA5D6FB85, 0xA5D7FB85, 0xA5D8FB85, 0xA5D9FB85, 0xA5DAFB85, 0xA5DBFB85, 0xA5DCFB85, 0xA5DDFB85, 0xA5DEFB85, + 0xA5DFFB85, 0xA5E0FB85, 0xA5E1FB85, 0xA5E2FB85, 0xA5E3FB85, 0xA5E4FB85, 0xA5E5FB85, 0xA5E6FB85, 0xA5E7FB85, 0xA5E8FB85, 0xA5E9FB85, 0xA5EAFB85, 0xA5EBFB85, 0xA5ECFB85, 0xA5EDFB85, + 0xA5EEFB85, 0xA5EFFB85, 0xA5F0FB85, 0xA5F1FB85, 0xA5F2FB85, 0xA5F3FB85, 0xA5F4FB85, 0xA5F5FB85, 0xA5F6FB85, 0xA5F7FB85, 0xA5F8FB85, 0xA5F9FB85, 0xA5FAFB85, 0xA5FBFB85, 0xA5FCFB85, + 0xA5FDFB85, 0xA5FEFB85, 0xA5FFFB85, 0xA600FB85, 0xA601FB85, 0xA602FB85, 0xA603FB85, 0xA604FB85, 0xA605FB85, 0xA606FB85, 0xA607FB85, 0xA608FB85, 0xA609FB85, 0xA60AFB85, 0xA60BFB85, + 0xA60CFB85, 0xA60DFB85, 0xA60EFB85, 0xA60FFB85, 0xA610FB85, 0xA611FB85, 0xA612FB85, 0xA613FB85, 0xA614FB85, 0xA615FB85, 0xA616FB85, 0xA617FB85, 0xA618FB85, 0xA619FB85, 0xA61AFB85, + 0xA61BFB85, 0xA61CFB85, 0xA61DFB85, 0xA61EFB85, 0xA61FFB85, 0xA620FB85, 0xA621FB85, 0xA622FB85, 0xA623FB85, 0xA624FB85, 0xA625FB85, 0xA626FB85, 0xA627FB85, 0xA628FB85, 0xA629FB85, + 0xA62AFB85, 0xA62BFB85, 0xA62CFB85, 0xA62DFB85, 0xA62EFB85, 0xA62FFB85, 0xA630FB85, 0xA631FB85, 0xA632FB85, 0xA633FB85, 0xA634FB85, 0xA635FB85, 0xA636FB85, 0xA637FB85, 0xA638FB85, + 0xA639FB85, 0xA63AFB85, 0xA63BFB85, 0xA63CFB85, 0xA63DFB85, 0xA63EFB85, 0xA63FFB85, 0xA640FB85, 0xA641FB85, 0xA642FB85, 0xA643FB85, 0xA644FB85, 0xA645FB85, 0xA646FB85, 0xA647FB85, + 0xA648FB85, 0xA649FB85, 0xA64AFB85, 0xA64BFB85, 0xA64CFB85, 0xA64DFB85, 0xA64EFB85, 0xA64FFB85, 0xA650FB85, 0xA651FB85, 0xA652FB85, 0xA653FB85, 0xA654FB85, 0xA655FB85, 0xA656FB85, + 0xA657FB85, 0xA658FB85, 0xA659FB85, 0xA65AFB85, 0xA65BFB85, 0xA65CFB85, 0xA65DFB85, 0xA65EFB85, 0xA65FFB85, 0xA660FB85, 0xA661FB85, 0xA662FB85, 0xA663FB85, 0xA664FB85, 0xA665FB85, + 0xA666FB85, 0xA667FB85, 0xA668FB85, 0xA669FB85, 0xA66AFB85, 0xA66BFB85, 0xA66CFB85, 0xA66DFB85, 0xA66EFB85, 0xA66FFB85, 0xA670FB85, 0xA671FB85, 0xA672FB85, 0xA673FB85, 0xA674FB85, + 0xA675FB85, 0xA676FB85, 0xA677FB85, 0xA678FB85, 0xA679FB85, 0xA67AFB85, 0xA67BFB85, 0xA67CFB85, 0xA67DFB85, 0xA67EFB85, 0xA67FFB85, 0xA680FB85, 0xA681FB85, 0xA682FB85, 0xA683FB85, + 0xA684FB85, 0xA685FB85, 0xA686FB85, 0xA687FB85, 0xA688FB85, 0xA689FB85, 0xA68AFB85, 0xA68BFB85, 0xA68CFB85, 0xA68DFB85, 0xA68EFB85, 0xA68FFB85, 0xA690FB85, 0xA691FB85, 0xA692FB85, + 0xA693FB85, 0xA694FB85, 0xA695FB85, 0xA696FB85, 0xA697FB85, 0xA698FB85, 0xA699FB85, 0xA69AFB85, 0xA69BFB85, 0xA69CFB85, 0xA69DFB85, 0xA69EFB85, 0xA69FFB85, 0xA6A0FB85, 0xA6A1FB85, + 0xA6A2FB85, 0xA6A3FB85, 0xA6A4FB85, 0xA6A5FB85, 0xA6A6FB85, 0xA6A7FB85, 0xA6A8FB85, 0xA6A9FB85, 0xA6AAFB85, 0xA6ABFB85, 0xA6ACFB85, 0xA6ADFB85, 0xA6AEFB85, 0xA6AFFB85, 0xA6B0FB85, + 0xA6B1FB85, 0xA6B2FB85, 0xA6B3FB85, 0xA6B4FB85, 0xA6B5FB85, 0xA6B6FB85, 0xA6B7FB85, 0xA6B8FB85, 0xA6B9FB85, 0xA6BAFB85, 0xA6BBFB85, 0xA6BCFB85, 0xA6BDFB85, 0xA6BEFB85, 0xA6BFFB85, + 0xA6C0FB85, 0xA6C1FB85, 0xA6C2FB85, 0xA6C3FB85, 0xA6C4FB85, 0xA6C5FB85, 0xA6C6FB85, 0xA6C7FB85, 0xA6C8FB85, 0xA6C9FB85, 0xA6CAFB85, 0xA6CBFB85, 0xA6CCFB85, 0xA6CDFB85, 0xA6CEFB85, + 0xA6CFFB85, 0xA6D0FB85, 0xA6D1FB85, 0xA6D2FB85, 0xA6D3FB85, 0xA6D4FB85, 0xA6D5FB85, 0xA6D6FB85, 0xA6D7FBC5, 0xA6D8FBC5, 0xA6D9FBC5, 0xA6DAFBC5, 0xA6DBFBC5, 0xA6DCFBC5, 0xA6DDFBC5, + 0xA6DEFBC5, 0xA6DFFBC5, 0xA6E0FBC5, 0xA6E1FBC5, 0xA6E2FBC5, 0xA6E3FBC5, 0xA6E4FBC5, 0xA6E5FBC5, 0xA6E6FBC5, 0xA6E7FBC5, 0xA6E8FBC5, 0xA6E9FBC5, 0xA6EAFBC5, 0xA6EBFBC5, 0xA6ECFBC5, + 0xA6EDFBC5, 0xA6EEFBC5, 0xA6EFFBC5, 0xA6F0FBC5, 0xA6F1FBC5, 0xA6F2FBC5, 0xA6F3FBC5, 0xA6F4FBC5, 0xA6F5FBC5, 0xA6F6FBC5, 0xA6F7FBC5, 0xA6F8FBC5, 0xA6F9FBC5, 0xA6FAFBC5, 0xA6FBFBC5, + 0xA6FCFBC5, 0xA6FDFBC5, 0xA6FEFBC5, 0xA6FFFBC5, 0xA700FB85, 0xA701FB85, 0xA702FB85, 0xA703FB85, 0xA704FB85, 0xA705FB85, 0xA706FB85, 0xA707FB85, 0xA708FB85, 0xA709FB85, 0xA70AFB85, + 0xA70BFB85, 0xA70CFB85, 0xA70DFB85, 0xA70EFB85, 0xA70FFB85, 0xA710FB85, 0xA711FB85, 0xA712FB85, 0xA713FB85, 0xA714FB85, 0xA715FB85, 0xA716FB85, 0xA717FB85, 0xA718FB85, 0xA719FB85, + 0xA71AFB85, 0xA71BFB85, 0xA71CFB85, 0xA71DFB85, 0xA71EFB85, 0xA71FFB85, 0xA720FB85, 0xA721FB85, 0xA722FB85, 0xA723FB85, 0xA724FB85, 0xA725FB85, 0xA726FB85, 0xA727FB85, 0xA728FB85, + 0xA729FB85, 0xA72AFB85, 0xA72BFB85, 0xA72CFB85, 0xA72DFB85, 0xA72EFB85, 0xA72FFB85, 0xA730FB85, 0xA731FB85, 0xA732FB85, 0xA733FB85, 0xA734FB85, 0xA735FB85, 0xA736FB85, 0xA737FB85, + 0xA738FB85, 0xA739FB85, 0xA73AFB85, 0xA73BFB85, 0xA73CFB85, 0xA73DFB85, 0xA73EFB85, 0xA73FFB85, 0xA740FB85, 0xA741FB85, 0xA742FB85, 0xA743FB85, 0xA744FB85, 0xA745FB85, 0xA746FB85, + 0xA747FB85, 0xA748FB85, 0xA749FB85, 0xA74AFB85, 0xA74BFB85, 0xA74CFB85, 0xA74DFB85, 0xA74EFB85, 0xA74FFB85, 0xA750FB85, 0xA751FB85, 0xA752FB85, 0xA753FB85, 0xA754FB85, 0xA755FB85, + 0xA756FB85, 0xA757FB85, 0xA758FB85, 0xA759FB85, 0xA75AFB85, 0xA75BFB85, 0xA75CFB85, 0xA75DFB85, 0xA75EFB85, 0xA75FFB85, 0xA760FB85, 0xA761FB85, 0xA762FB85, 0xA763FB85, 0xA764FB85, + 0xA765FB85, 0xA766FB85, 0xA767FB85, 0xA768FB85, 0xA769FB85, 0xA76AFB85, 0xA76BFB85, 0xA76CFB85, 0xA76DFB85, 0xA76EFB85, 0xA76FFB85, 0xA770FB85, 0xA771FB85, 0xA772FB85, 0xA773FB85, + 0xA774FB85, 0xA775FB85, 0xA776FB85, 0xA777FB85, 0xA778FB85, 0xA779FB85, 0xA77AFB85, 0xA77BFB85, 0xA77CFB85, 0xA77DFB85, 0xA77EFB85, 0xA77FFB85, 0xA780FB85, 0xA781FB85, 0xA782FB85, + 0xA783FB85, 0xA784FB85, 0xA785FB85, 0xA786FB85, 0xA787FB85, 0xA788FB85, 0xA789FB85, 0xA78AFB85, 0xA78BFB85, 0xA78CFB85, 0xA78DFB85, 0xA78EFB85, 0xA78FFB85, 0xA790FB85, 0xA791FB85, + 0xA792FB85, 0xA793FB85, 0xA794FB85, 0xA795FB85, 0xA796FB85, 0xA797FB85, 0xA798FB85, 0xA799FB85, 0xA79AFB85, 0xA79BFB85, 0xA79CFB85, 0xA79DFB85, 0xA79EFB85, 0xA79FFB85, 0xA7A0FB85, + 0xA7A1FB85, 0xA7A2FB85, 0xA7A3FB85, 0xA7A4FB85, 0xA7A5FB85, 0xA7A6FB85, 0xA7A7FB85, 0xA7A8FB85, 0xA7A9FB85, 0xA7AAFB85, 0xA7ABFB85, 0xA7ACFB85, 0xA7ADFB85, 0xA7AEFB85, 0xA7AFFB85, + 0xA7B0FB85, 0xA7B1FB85, 0xA7B2FB85, 0xA7B3FB85, 0xA7B4FB85, 0xA7B5FB85, 0xA7B6FB85, 0xA7B7FB85, 0xA7B8FB85, 0xA7B9FB85, 0xA7BAFB85, 0xA7BBFB85, 0xA7BCFB85, 0xA7BDFB85, 0xA7BEFB85, + 0xA7BFFB85, 0xA7C0FB85, 0xA7C1FB85, 0xA7C2FB85, 0xA7C3FB85, 0xA7C4FB85, 0xA7C5FB85, 0xA7C6FB85, 0xA7C7FB85, 0xA7C8FB85, 0xA7C9FB85, 0xA7CAFB85, 0xA7CBFB85, 0xA7CCFB85, 0xA7CDFB85, + 0xA7CEFB85, 0xA7CFFB85, 0xA7D0FB85, 0xA7D1FB85, 0xA7D2FB85, 0xA7D3FB85, 0xA7D4FB85, 0xA7D5FB85, 0xA7D6FB85, 0xA7D7FB85, 0xA7D8FB85, 0xA7D9FB85, 0xA7DAFB85, 0xA7DBFB85, 0xA7DCFB85, + 0xA7DDFB85, 0xA7DEFB85, 0xA7DFFB85, 0xA7E0FB85, 0xA7E1FB85, 0xA7E2FB85, 0xA7E3FB85, 0xA7E4FB85, 0xA7E5FB85, 0xA7E6FB85, 0xA7E7FB85, 0xA7E8FB85, 0xA7E9FB85, 0xA7EAFB85, 0xA7EBFB85, + 0xA7ECFB85, 0xA7EDFB85, 0xA7EEFB85, 0xA7EFFB85, 0xA7F0FB85, 0xA7F1FB85, 0xA7F2FB85, 0xA7F3FB85, 0xA7F4FB85, 0xA7F5FB85, 0xA7F6FB85, 0xA7F7FB85, 0xA7F8FB85, 0xA7F9FB85, 0xA7FAFB85, + 0xA7FBFB85, 0xA7FCFB85, 0xA7FDFB85, 0xA7FEFB85, 0xA7FFFB85, 0xA800FB85, 0xA801FB85, 0xA802FB85, 0xA803FB85, 0xA804FB85, 0xA805FB85, 0xA806FB85, 0xA807FB85, 0xA808FB85, 0xA809FB85, + 0xA80AFB85, 0xA80BFB85, 0xA80CFB85, 0xA80DFB85, 0xA80EFB85, 0xA80FFB85, 0xA810FB85, 0xA811FB85, 0xA812FB85, 0xA813FB85, 0xA814FB85, 0xA815FB85, 0xA816FB85, 0xA817FB85, 0xA818FB85, + 0xA819FB85, 0xA81AFB85, 0xA81BFB85, 0xA81CFB85, 0xA81DFB85, 0xA81EFB85, 0xA81FFB85, 0xA820FB85, 0xA821FB85, 0xA822FB85, 0xA823FB85, 0xA824FB85, 0xA825FB85, 0xA826FB85, 0xA827FB85, + 0xA828FB85, 0xA829FB85, 0xA82AFB85, 0xA82BFB85, 0xA82CFB85, 0xA82DFB85, 0xA82EFB85, 0xA82FFB85, 0xA830FB85, 0xA831FB85, 0xA832FB85, 0xA833FB85, 0xA834FB85, 0xA835FB85, 0xA836FB85, + 0xA837FB85, 0xA838FB85, 0xA839FB85, 0xA83AFB85, 0xA83BFB85, 0xA83CFB85, 0xA83DFB85, 0xA83EFB85, 0xA83FFB85, 0xA840FB85, 0xA841FB85, 0xA842FB85, 0xA843FB85, 0xA844FB85, 0xA845FB85, + 0xA846FB85, 0xA847FB85, 0xA848FB85, 0xA849FB85, 0xA84AFB85, 0xA84BFB85, 0xA84CFB85, 0xA84DFB85, 0xA84EFB85, 0xA84FFB85, 0xA850FB85, 0xA851FB85, 0xA852FB85, 0xA853FB85, 0xA854FB85, + 0xA855FB85, 0xA856FB85, 0xA857FB85, 0xA858FB85, 0xA859FB85, 0xA85AFB85, 0xA85BFB85, 0xA85CFB85, 0xA85DFB85, 0xA85EFB85, 0xA85FFB85, 0xA860FB85, 0xA861FB85, 0xA862FB85, 0xA863FB85, + 0xA864FB85, 0xA865FB85, 0xA866FB85, 0xA867FB85, 0xA868FB85, 0xA869FB85, 0xA86AFB85, 0xA86BFB85, 0xA86CFB85, 0xA86DFB85, 0xA86EFB85, 0xA86FFB85, 0xA870FB85, 0xA871FB85, 0xA872FB85, + 0xA873FB85, 0xA874FB85, 0xA875FB85, 0xA876FB85, 0xA877FB85, 0xA878FB85, 0xA879FB85, 0xA87AFB85, 0xA87BFB85, 0xA87CFB85, 0xA87DFB85, 0xA87EFB85, 0xA87FFB85, 0xA880FB85, 0xA881FB85, + 0xA882FB85, 0xA883FB85, 0xA884FB85, 0xA885FB85, 0xA886FB85, 0xA887FB85, 0xA888FB85, 0xA889FB85, 0xA88AFB85, 0xA88BFB85, 0xA88CFB85, 0xA88DFB85, 0xA88EFB85, 0xA88FFB85, 0xA890FB85, + 0xA891FB85, 0xA892FB85, 0xA893FB85, 0xA894FB85, 0xA895FB85, 0xA896FB85, 0xA897FB85, 0xA898FB85, 0xA899FB85, 0xA89AFB85, 0xA89BFB85, 0xA89CFB85, 0xA89DFB85, 0xA89EFB85, 0xA89FFB85, + 0xA8A0FB85, 0xA8A1FB85, 0xA8A2FB85, 0xA8A3FB85, 0xA8A4FB85, 0xA8A5FB85, 0xA8A6FB85, 0xA8A7FB85, 0xA8A8FB85, 0xA8A9FB85, 0xA8AAFB85, 0xA8ABFB85, 0xA8ACFB85, 0xA8ADFB85, 0xA8AEFB85, + 0xA8AFFB85, 0xA8B0FB85, 0xA8B1FB85, 0xA8B2FB85, 0xA8B3FB85, 0xA8B4FB85, 0xA8B5FB85, 0xA8B6FB85, 0xA8B7FB85, 0xA8B8FB85, 0xA8B9FB85, 0xA8BAFB85, 0xA8BBFB85, 0xA8BCFB85, 0xA8BDFB85, + 0xA8BEFB85, 0xA8BFFB85, 0xA8C0FB85, 0xA8C1FB85, 0xA8C2FB85, 0xA8C3FB85, 0xA8C4FB85, 0xA8C5FB85, 0xA8C6FB85, 0xA8C7FB85, 0xA8C8FB85, 0xA8C9FB85, 0xA8CAFB85, 0xA8CBFB85, 0xA8CCFB85, + 0xA8CDFB85, 0xA8CEFB85, 0xA8CFFB85, 0xA8D0FB85, 0xA8D1FB85, 0xA8D2FB85, 0xA8D3FB85, 0xA8D4FB85, 0xA8D5FB85, 0xA8D6FB85, 0xA8D7FB85, 0xA8D8FB85, 0xA8D9FB85, 0xA8DAFB85, 0xA8DBFB85, + 0xA8DCFB85, 0xA8DDFB85, 0xA8DEFB85, 0xA8DFFB85, 0xA8E0FB85, 0xA8E1FB85, 0xA8E2FB85, 0xA8E3FB85, 0xA8E4FB85, 0xA8E5FB85, 0xA8E6FB85, 0xA8E7FB85, 0xA8E8FB85, 0xA8E9FB85, 0xA8EAFB85, + 0xA8EBFB85, 0xA8ECFB85, 0xA8EDFB85, 0xA8EEFB85, 0xA8EFFB85, 0xA8F0FB85, 0xA8F1FB85, 0xA8F2FB85, 0xA8F3FB85, 0xA8F4FB85, 0xA8F5FB85, 0xA8F6FB85, 0xA8F7FB85, 0xA8F8FB85, 0xA8F9FB85, + 0xA8FAFB85, 0xA8FBFB85, 0xA8FCFB85, 0xA8FDFB85, 0xA8FEFB85, 0xA8FFFB85, 0xA900FB85, 0xA901FB85, 0xA902FB85, 0xA903FB85, 0xA904FB85, 0xA905FB85, 0xA906FB85, 0xA907FB85, 0xA908FB85, + 0xA909FB85, 0xA90AFB85, 0xA90BFB85, 0xA90CFB85, 0xA90DFB85, 0xA90EFB85, 0xA90FFB85, 0xA910FB85, 0xA911FB85, 0xA912FB85, 0xA913FB85, 0xA914FB85, 0xA915FB85, 0xA916FB85, 0xA917FB85, + 0xA918FB85, 0xA919FB85, 0xA91AFB85, 0xA91BFB85, 0xA91CFB85, 0xA91DFB85, 0xA91EFB85, 0xA91FFB85, 0xA920FB85, 0xA921FB85, 0xA922FB85, 0xA923FB85, 0xA924FB85, 0xA925FB85, 0xA926FB85, + 0xA927FB85, 0xA928FB85, 0xA929FB85, 0xA92AFB85, 0xA92BFB85, 0xA92CFB85, 0xA92DFB85, 0xA92EFB85, 0xA92FFB85, 0xA930FB85, 0xA931FB85, 0xA932FB85, 0xA933FB85, 0xA934FB85, 0xA935FB85, + 0xA936FB85, 0xA937FB85, 0xA938FB85, 0xA939FB85, 0xA93AFB85, 0xA93BFB85, 0xA93CFB85, 0xA93DFB85, 0xA93EFB85, 0xA93FFB85, 0xA940FB85, 0xA941FB85, 0xA942FB85, 0xA943FB85, 0xA944FB85, + 0xA945FB85, 0xA946FB85, 0xA947FB85, 0xA948FB85, 0xA949FB85, 0xA94AFB85, 0xA94BFB85, 0xA94CFB85, 0xA94DFB85, 0xA94EFB85, 0xA94FFB85, 0xA950FB85, 0xA951FB85, 0xA952FB85, 0xA953FB85, + 0xA954FB85, 0xA955FB85, 0xA956FB85, 0xA957FB85, 0xA958FB85, 0xA959FB85, 0xA95AFB85, 0xA95BFB85, 0xA95CFB85, 0xA95DFB85, 0xA95EFB85, 0xA95FFB85, 0xA960FB85, 0xA961FB85, 0xA962FB85, + 0xA963FB85, 0xA964FB85, 0xA965FB85, 0xA966FB85, 0xA967FB85, 0xA968FB85, 0xA969FB85, 0xA96AFB85, 0xA96BFB85, 0xA96CFB85, 0xA96DFB85, 0xA96EFB85, 0xA96FFB85, 0xA970FB85, 0xA971FB85, + 0xA972FB85, 0xA973FB85, 0xA974FB85, 0xA975FB85, 0xA976FB85, 0xA977FB85, 0xA978FB85, 0xA979FB85, 0xA97AFB85, 0xA97BFB85, 0xA97CFB85, 0xA97DFB85, 0xA97EFB85, 0xA97FFB85, 0xA980FB85, + 0xA981FB85, 0xA982FB85, 0xA983FB85, 0xA984FB85, 0xA985FB85, 0xA986FB85, 0xA987FB85, 0xA988FB85, 0xA989FB85, 0xA98AFB85, 0xA98BFB85, 0xA98CFB85, 0xA98DFB85, 0xA98EFB85, 0xA98FFB85, + 0xA990FB85, 0xA991FB85, 0xA992FB85, 0xA993FB85, 0xA994FB85, 0xA995FB85, 0xA996FB85, 0xA997FB85, 0xA998FB85, 0xA999FB85, 0xA99AFB85, 0xA99BFB85, 0xA99CFB85, 0xA99DFB85, 0xA99EFB85, + 0xA99FFB85, 0xA9A0FB85, 0xA9A1FB85, 0xA9A2FB85, 0xA9A3FB85, 0xA9A4FB85, 0xA9A5FB85, 0xA9A6FB85, 0xA9A7FB85, 0xA9A8FB85, 0xA9A9FB85, 0xA9AAFB85, 0xA9ABFB85, 0xA9ACFB85, 0xA9ADFB85, + 0xA9AEFB85, 0xA9AFFB85, 0xA9B0FB85, 0xA9B1FB85, 0xA9B2FB85, 0xA9B3FB85, 0xA9B4FB85, 0xA9B5FB85, 0xA9B6FB85, 0xA9B7FB85, 0xA9B8FB85, 0xA9B9FB85, 0xA9BAFB85, 0xA9BBFB85, 0xA9BCFB85, + 0xA9BDFB85, 0xA9BEFB85, 0xA9BFFB85, 0xA9C0FB85, 0xA9C1FB85, 0xA9C2FB85, 0xA9C3FB85, 0xA9C4FB85, 0xA9C5FB85, 0xA9C6FB85, 0xA9C7FB85, 0xA9C8FB85, 0xA9C9FB85, 0xA9CAFB85, 0xA9CBFB85, + 0xA9CCFB85, 0xA9CDFB85, 0xA9CEFB85, 0xA9CFFB85, 0xA9D0FB85, 0xA9D1FB85, 0xA9D2FB85, 0xA9D3FB85, 0xA9D4FB85, 0xA9D5FB85, 0xA9D6FB85, 0xA9D7FB85, 0xA9D8FB85, 0xA9D9FB85, 0xA9DAFB85, + 0xA9DBFB85, 0xA9DCFB85, 0xA9DDFB85, 0xA9DEFB85, 0xA9DFFB85, 0xA9E0FB85, 0xA9E1FB85, 0xA9E2FB85, 0xA9E3FB85, 0xA9E4FB85, 0xA9E5FB85, 0xA9E6FB85, 0xA9E7FB85, 0xA9E8FB85, 0xA9E9FB85, + 0xA9EAFB85, 0xA9EBFB85, 0xA9ECFB85, 0xA9EDFB85, 0xA9EEFB85, 0xA9EFFB85, 0xA9F0FB85, 0xA9F1FB85, 0xA9F2FB85, 0xA9F3FB85, 0xA9F4FB85, 0xA9F5FB85, 0xA9F6FB85, 0xA9F7FB85, 0xA9F8FB85, + 0xA9F9FB85, 0xA9FAFB85, 0xA9FBFB85, 0xA9FCFB85, 0xA9FDFB85, 0xA9FEFB85, 0xA9FFFB85, 0xAA00FB85, 0xAA01FB85, 0xAA02FB85, 0xAA03FB85, 0xAA04FB85, 0xAA05FB85, 0xAA06FB85, 0xAA07FB85, + 0xAA08FB85, 0xAA09FB85, 0xAA0AFB85, 0xAA0BFB85, 0xAA0CFB85, 0xAA0DFB85, 0xAA0EFB85, 0xAA0FFB85, 0xAA10FB85, 0xAA11FB85, 0xAA12FB85, 0xAA13FB85, 0xAA14FB85, 0xAA15FB85, 0xAA16FB85, + 0xAA17FB85, 0xAA18FB85, 0xAA19FB85, 0xAA1AFB85, 0xAA1BFB85, 0xAA1CFB85, 0xAA1DFB85, 0xAA1EFB85, 0xAA1FFB85, 0xAA20FB85, 0xAA21FB85, 0xAA22FB85, 0xAA23FB85, 0xAA24FB85, 0xAA25FB85, + 0xAA26FB85, 0xAA27FB85, 0xAA28FB85, 0xAA29FB85, 0xAA2AFB85, 0xAA2BFB85, 0xAA2CFB85, 0xAA2DFB85, 0xAA2EFB85, 0xAA2FFB85, 0xAA30FB85, 0xAA31FB85, 0xAA32FB85, 0xAA33FB85, 0xAA34FB85, + 0xAA35FB85, 0xAA36FB85, 0xAA37FB85, 0xAA38FB85, 0xAA39FB85, 0xAA3AFB85, 0xAA3BFB85, 0xAA3CFB85, 0xAA3DFB85, 0xAA3EFB85, 0xAA3FFB85, 0xAA40FB85, 0xAA41FB85, 0xAA42FB85, 0xAA43FB85, + 0xAA44FB85, 0xAA45FB85, 0xAA46FB85, 0xAA47FB85, 0xAA48FB85, 0xAA49FB85, 0xAA4AFB85, 0xAA4BFB85, 0xAA4CFB85, 0xAA4DFB85, 0xAA4EFB85, 0xAA4FFB85, 0xAA50FB85, 0xAA51FB85, 0xAA52FB85, + 0xAA53FB85, 0xAA54FB85, 0xAA55FB85, 0xAA56FB85, 0xAA57FB85, 0xAA58FB85, 0xAA59FB85, 0xAA5AFB85, 0xAA5BFB85, 0xAA5CFB85, 0xAA5DFB85, 0xAA5EFB85, 0xAA5FFB85, 0xAA60FB85, 0xAA61FB85, + 0xAA62FB85, 0xAA63FB85, 0xAA64FB85, 0xAA65FB85, 0xAA66FB85, 0xAA67FB85, 0xAA68FB85, 0xAA69FB85, 0xAA6AFB85, 0xAA6BFB85, 0xAA6CFB85, 0xAA6DFB85, 0xAA6EFB85, 0xAA6FFB85, 0xAA70FB85, + 0xAA71FB85, 0xAA72FB85, 0xAA73FB85, 0xAA74FB85, 0xAA75FB85, 0xAA76FB85, 0xAA77FB85, 0xAA78FB85, 0xAA79FB85, 0xAA7AFB85, 0xAA7BFB85, 0xAA7CFB85, 0xAA7DFB85, 0xAA7EFB85, 0xAA7FFB85, + 0xAA80FB85, 0xAA81FB85, 0xAA82FB85, 0xAA83FB85, 0xAA84FB85, 0xAA85FB85, 0xAA86FB85, 0xAA87FB85, 0xAA88FB85, 0xAA89FB85, 0xAA8AFB85, 0xAA8BFB85, 0xAA8CFB85, 0xAA8DFB85, 0xAA8EFB85, + 0xAA8FFB85, 0xAA90FB85, 0xAA91FB85, 0xAA92FB85, 0xAA93FB85, 0xAA94FB85, 0xAA95FB85, 0xAA96FB85, 0xAA97FB85, 0xAA98FB85, 0xAA99FB85, 0xAA9AFB85, 0xAA9BFB85, 0xAA9CFB85, 0xAA9DFB85, + 0xAA9EFB85, 0xAA9FFB85, 0xAAA0FB85, 0xAAA1FB85, 0xAAA2FB85, 0xAAA3FB85, 0xAAA4FB85, 0xAAA5FB85, 0xAAA6FB85, 0xAAA7FB85, 0xAAA8FB85, 0xAAA9FB85, 0xAAAAFB85, 0xAAABFB85, 0xAAACFB85, + 0xAAADFB85, 0xAAAEFB85, 0xAAAFFB85, 0xAAB0FB85, 0xAAB1FB85, 0xAAB2FB85, 0xAAB3FB85, 0xAAB4FB85, 0xAAB5FB85, 0xAAB6FB85, 0xAAB7FB85, 0xAAB8FB85, 0xAAB9FB85, 0xAABAFB85, 0xAABBFB85, + 0xAABCFB85, 0xAABDFB85, 0xAABEFB85, 0xAABFFB85, 0xAAC0FB85, 0xAAC1FB85, 0xAAC2FB85, 0xAAC3FB85, 0xAAC4FB85, 0xAAC5FB85, 0xAAC6FB85, 0xAAC7FB85, 0xAAC8FB85, 0xAAC9FB85, 0xAACAFB85, + 0xAACBFB85, 0xAACCFB85, 0xAACDFB85, 0xAACEFB85, 0xAACFFB85, 0xAAD0FB85, 0xAAD1FB85, 0xAAD2FB85, 0xAAD3FB85, 0xAAD4FB85, 0xAAD5FB85, 0xAAD6FB85, 0xAAD7FB85, 0xAAD8FB85, 0xAAD9FB85, + 0xAADAFB85, 0xAADBFB85, 0xAADCFB85, 0xAADDFB85, 0xAADEFB85, 0xAADFFB85, 0xAAE0FB85, 0xAAE1FB85, 0xAAE2FB85, 0xAAE3FB85, 0xAAE4FB85, 0xAAE5FB85, 0xAAE6FB85, 0xAAE7FB85, 0xAAE8FB85, + 0xAAE9FB85, 0xAAEAFB85, 0xAAEBFB85, 0xAAECFB85, 0xAAEDFB85, 0xAAEEFB85, 0xAAEFFB85, 0xAAF0FB85, 0xAAF1FB85, 0xAAF2FB85, 0xAAF3FB85, 0xAAF4FB85, 0xAAF5FB85, 0xAAF6FB85, 0xAAF7FB85, + 0xAAF8FB85, 0xAAF9FB85, 0xAAFAFB85, 0xAAFBFB85, 0xAAFCFB85, 0xAAFDFB85, 0xAAFEFB85, 0xAAFFFB85, 0xAB00FB85, 0xAB01FB85, 0xAB02FB85, 0xAB03FB85, 0xAB04FB85, 0xAB05FB85, 0xAB06FB85, + 0xAB07FB85, 0xAB08FB85, 0xAB09FB85, 0xAB0AFB85, 0xAB0BFB85, 0xAB0CFB85, 0xAB0DFB85, 0xAB0EFB85, 0xAB0FFB85, 0xAB10FB85, 0xAB11FB85, 0xAB12FB85, 0xAB13FB85, 0xAB14FB85, 0xAB15FB85, + 0xAB16FB85, 0xAB17FB85, 0xAB18FB85, 0xAB19FB85, 0xAB1AFB85, 0xAB1BFB85, 0xAB1CFB85, 0xAB1DFB85, 0xAB1EFB85, 0xAB1FFB85, 0xAB20FB85, 0xAB21FB85, 0xAB22FB85, 0xAB23FB85, 0xAB24FB85, + 0xAB25FB85, 0xAB26FB85, 0xAB27FB85, 0xAB28FB85, 0xAB29FB85, 0xAB2AFB85, 0xAB2BFB85, 0xAB2CFB85, 0xAB2DFB85, 0xAB2EFB85, 0xAB2FFB85, 0xAB30FB85, 0xAB31FB85, 0xAB32FB85, 0xAB33FB85, + 0xAB34FB85, 0xAB35FB85, 0xAB36FB85, 0xAB37FB85, 0xAB38FB85, 0xAB39FB85, 0xAB3AFB85, 0xAB3BFB85, 0xAB3CFB85, 0xAB3DFB85, 0xAB3EFB85, 0xAB3FFB85, 0xAB40FB85, 0xAB41FB85, 0xAB42FB85, + 0xAB43FB85, 0xAB44FB85, 0xAB45FB85, 0xAB46FB85, 0xAB47FB85, 0xAB48FB85, 0xAB49FB85, 0xAB4AFB85, 0xAB4BFB85, 0xAB4CFB85, 0xAB4DFB85, 0xAB4EFB85, 0xAB4FFB85, 0xAB50FB85, 0xAB51FB85, + 0xAB52FB85, 0xAB53FB85, 0xAB54FB85, 0xAB55FB85, 0xAB56FB85, 0xAB57FB85, 0xAB58FB85, 0xAB59FB85, 0xAB5AFB85, 0xAB5BFB85, 0xAB5CFB85, 0xAB5DFB85, 0xAB5EFB85, 0xAB5FFB85, 0xAB60FB85, + 0xAB61FB85, 0xAB62FB85, 0xAB63FB85, 0xAB64FB85, 0xAB65FB85, 0xAB66FB85, 0xAB67FB85, 0xAB68FB85, 0xAB69FB85, 0xAB6AFB85, 0xAB6BFB85, 0xAB6CFB85, 0xAB6DFB85, 0xAB6EFB85, 0xAB6FFB85, + 0xAB70FB85, 0xAB71FB85, 0xAB72FB85, 0xAB73FB85, 0xAB74FB85, 0xAB75FB85, 0xAB76FB85, 0xAB77FB85, 0xAB78FB85, 0xAB79FB85, 0xAB7AFB85, 0xAB7BFB85, 0xAB7CFB85, 0xAB7DFB85, 0xAB7EFB85, + 0xAB7FFB85, 0xAB80FB85, 0xAB81FB85, 0xAB82FB85, 0xAB83FB85, 0xAB84FB85, 0xAB85FB85, 0xAB86FB85, 0xAB87FB85, 0xAB88FB85, 0xAB89FB85, 0xAB8AFB85, 0xAB8BFB85, 0xAB8CFB85, 0xAB8DFB85, + 0xAB8EFB85, 0xAB8FFB85, 0xAB90FB85, 0xAB91FB85, 0xAB92FB85, 0xAB93FB85, 0xAB94FB85, 0xAB95FB85, 0xAB96FB85, 0xAB97FB85, 0xAB98FB85, 0xAB99FB85, 0xAB9AFB85, 0xAB9BFB85, 0xAB9CFB85, + 0xAB9DFB85, 0xAB9EFB85, 0xAB9FFB85, 0xABA0FB85, 0xABA1FB85, 0xABA2FB85, 0xABA3FB85, 0xABA4FB85, 0xABA5FB85, 0xABA6FB85, 0xABA7FB85, 0xABA8FB85, 0xABA9FB85, 0xABAAFB85, 0xABABFB85, + 0xABACFB85, 0xABADFB85, 0xABAEFB85, 0xABAFFB85, 0xABB0FB85, 0xABB1FB85, 0xABB2FB85, 0xABB3FB85, 0xABB4FB85, 0xABB5FB85, 0xABB6FB85, 0xABB7FB85, 0xABB8FB85, 0xABB9FB85, 0xABBAFB85, + 0xABBBFB85, 0xABBCFB85, 0xABBDFB85, 0xABBEFB85, 0xABBFFB85, 0xABC0FB85, 0xABC1FB85, 0xABC2FB85, 0xABC3FB85, 0xABC4FB85, 0xABC5FB85, 0xABC6FB85, 0xABC7FB85, 0xABC8FB85, 0xABC9FB85, + 0xABCAFB85, 0xABCBFB85, 0xABCCFB85, 0xABCDFB85, 0xABCEFB85, 0xABCFFB85, 0xABD0FB85, 0xABD1FB85, 0xABD2FB85, 0xABD3FB85, 0xABD4FB85, 0xABD5FB85, 0xABD6FB85, 0xABD7FB85, 0xABD8FB85, + 0xABD9FB85, 0xABDAFB85, 0xABDBFB85, 0xABDCFB85, 0xABDDFB85, 0xABDEFB85, 0xABDFFB85, 0xABE0FB85, 0xABE1FB85, 0xABE2FB85, 0xABE3FB85, 0xABE4FB85, 0xABE5FB85, 0xABE6FB85, 0xABE7FB85, + 0xABE8FB85, 0xABE9FB85, 0xABEAFB85, 0xABEBFB85, 0xABECFB85, 0xABEDFB85, 0xABEEFB85, 0xABEFFB85, 0xABF0FB85, 0xABF1FB85, 0xABF2FB85, 0xABF3FB85, 0xABF4FB85, 0xABF5FB85, 0xABF6FB85, + 0xABF7FB85, 0xABF8FB85, 0xABF9FB85, 0xABFAFB85, 0xABFBFB85, 0xABFCFB85, 0xABFDFB85, 0xABFEFB85, 0xABFFFB85, 0xAC00FB85, 0xAC01FB85, 0xAC02FB85, 0xAC03FB85, 0xAC04FB85, 0xAC05FB85, + 0xAC06FB85, 0xAC07FB85, 0xAC08FB85, 0xAC09FB85, 0xAC0AFB85, 0xAC0BFB85, 0xAC0CFB85, 0xAC0DFB85, 0xAC0EFB85, 0xAC0FFB85, 0xAC10FB85, 0xAC11FB85, 0xAC12FB85, 0xAC13FB85, 0xAC14FB85, + 0xAC15FB85, 0xAC16FB85, 0xAC17FB85, 0xAC18FB85, 0xAC19FB85, 0xAC1AFB85, 0xAC1BFB85, 0xAC1CFB85, 0xAC1DFB85, 0xAC1EFB85, 0xAC1FFB85, 0xAC20FB85, 0xAC21FB85, 0xAC22FB85, 0xAC23FB85, + 0xAC24FB85, 0xAC25FB85, 0xAC26FB85, 0xAC27FB85, 0xAC28FB85, 0xAC29FB85, 0xAC2AFB85, 0xAC2BFB85, 0xAC2CFB85, 0xAC2DFB85, 0xAC2EFB85, 0xAC2FFB85, 0xAC30FB85, 0xAC31FB85, 0xAC32FB85, + 0xAC33FB85, 0xAC34FB85, 0xAC35FB85, 0xAC36FB85, 0xAC37FB85, 0xAC38FB85, 0xAC39FB85, 0xAC3AFB85, 0xAC3BFB85, 0xAC3CFB85, 0xAC3DFB85, 0xAC3EFB85, 0xAC3FFB85, 0xAC40FB85, 0xAC41FB85, + 0xAC42FB85, 0xAC43FB85, 0xAC44FB85, 0xAC45FB85, 0xAC46FB85, 0xAC47FB85, 0xAC48FB85, 0xAC49FB85, 0xAC4AFB85, 0xAC4BFB85, 0xAC4CFB85, 0xAC4DFB85, 0xAC4EFB85, 0xAC4FFB85, 0xAC50FB85, + 0xAC51FB85, 0xAC52FB85, 0xAC53FB85, 0xAC54FB85, 0xAC55FB85, 0xAC56FB85, 0xAC57FB85, 0xAC58FB85, 0xAC59FB85, 0xAC5AFB85, 0xAC5BFB85, 0xAC5CFB85, 0xAC5DFB85, 0xAC5EFB85, 0xAC5FFB85, + 0xAC60FB85, 0xAC61FB85, 0xAC62FB85, 0xAC63FB85, 0xAC64FB85, 0xAC65FB85, 0xAC66FB85, 0xAC67FB85, 0xAC68FB85, 0xAC69FB85, 0xAC6AFB85, 0xAC6BFB85, 0xAC6CFB85, 0xAC6DFB85, 0xAC6EFB85, + 0xAC6FFB85, 0xAC70FB85, 0xAC71FB85, 0xAC72FB85, 0xAC73FB85, 0xAC74FB85, 0xAC75FB85, 0xAC76FB85, 0xAC77FB85, 0xAC78FB85, 0xAC79FB85, 0xAC7AFB85, 0xAC7BFB85, 0xAC7CFB85, 0xAC7DFB85, + 0xAC7EFB85, 0xAC7FFB85, 0xAC80FB85, 0xAC81FB85, 0xAC82FB85, 0xAC83FB85, 0xAC84FB85, 0xAC85FB85, 0xAC86FB85, 0xAC87FB85, 0xAC88FB85, 0xAC89FB85, 0xAC8AFB85, 0xAC8BFB85, 0xAC8CFB85, + 0xAC8DFB85, 0xAC8EFB85, 0xAC8FFB85, 0xAC90FB85, 0xAC91FB85, 0xAC92FB85, 0xAC93FB85, 0xAC94FB85, 0xAC95FB85, 0xAC96FB85, 0xAC97FB85, 0xAC98FB85, 0xAC99FB85, 0xAC9AFB85, 0xAC9BFB85, + 0xAC9CFB85, 0xAC9DFB85, 0xAC9EFB85, 0xAC9FFB85, 0xACA0FB85, 0xACA1FB85, 0xACA2FB85, 0xACA3FB85, 0xACA4FB85, 0xACA5FB85, 0xACA6FB85, 0xACA7FB85, 0xACA8FB85, 0xACA9FB85, 0xACAAFB85, + 0xACABFB85, 0xACACFB85, 0xACADFB85, 0xACAEFB85, 0xACAFFB85, 0xACB0FB85, 0xACB1FB85, 0xACB2FB85, 0xACB3FB85, 0xACB4FB85, 0xACB5FB85, 0xACB6FB85, 0xACB7FB85, 0xACB8FB85, 0xACB9FB85, + 0xACBAFB85, 0xACBBFB85, 0xACBCFB85, 0xACBDFB85, 0xACBEFB85, 0xACBFFB85, 0xACC0FB85, 0xACC1FB85, 0xACC2FB85, 0xACC3FB85, 0xACC4FB85, 0xACC5FB85, 0xACC6FB85, 0xACC7FB85, 0xACC8FB85, + 0xACC9FB85, 0xACCAFB85, 0xACCBFB85, 0xACCCFB85, 0xACCDFB85, 0xACCEFB85, 0xACCFFB85, 0xACD0FB85, 0xACD1FB85, 0xACD2FB85, 0xACD3FB85, 0xACD4FB85, 0xACD5FB85, 0xACD6FB85, 0xACD7FB85, + 0xACD8FB85, 0xACD9FB85, 0xACDAFB85, 0xACDBFB85, 0xACDCFB85, 0xACDDFB85, 0xACDEFB85, 0xACDFFB85, 0xACE0FB85, 0xACE1FB85, 0xACE2FB85, 0xACE3FB85, 0xACE4FB85, 0xACE5FB85, 0xACE6FB85, + 0xACE7FB85, 0xACE8FB85, 0xACE9FB85, 0xACEAFB85, 0xACEBFB85, 0xACECFB85, 0xACEDFB85, 0xACEEFB85, 0xACEFFB85, 0xACF0FB85, 0xACF1FB85, 0xACF2FB85, 0xACF3FB85, 0xACF4FB85, 0xACF5FB85, + 0xACF6FB85, 0xACF7FB85, 0xACF8FB85, 0xACF9FB85, 0xACFAFB85, 0xACFBFB85, 0xACFCFB85, 0xACFDFB85, 0xACFEFB85, 0xACFFFB85, 0xAD00FB85, 0xAD01FB85, 0xAD02FB85, 0xAD03FB85, 0xAD04FB85, + 0xAD05FB85, 0xAD06FB85, 0xAD07FB85, 0xAD08FB85, 0xAD09FB85, 0xAD0AFB85, 0xAD0BFB85, 0xAD0CFB85, 0xAD0DFB85, 0xAD0EFB85, 0xAD0FFB85, 0xAD10FB85, 0xAD11FB85, 0xAD12FB85, 0xAD13FB85, + 0xAD14FB85, 0xAD15FB85, 0xAD16FB85, 0xAD17FB85, 0xAD18FB85, 0xAD19FB85, 0xAD1AFB85, 0xAD1BFB85, 0xAD1CFB85, 0xAD1DFB85, 0xAD1EFB85, 0xAD1FFB85, 0xAD20FB85, 0xAD21FB85, 0xAD22FB85, + 0xAD23FB85, 0xAD24FB85, 0xAD25FB85, 0xAD26FB85, 0xAD27FB85, 0xAD28FB85, 0xAD29FB85, 0xAD2AFB85, 0xAD2BFB85, 0xAD2CFB85, 0xAD2DFB85, 0xAD2EFB85, 0xAD2FFB85, 0xAD30FB85, 0xAD31FB85, + 0xAD32FB85, 0xAD33FB85, 0xAD34FB85, 0xAD35FB85, 0xAD36FB85, 0xAD37FB85, 0xAD38FB85, 0xAD39FB85, 0xAD3AFB85, 0xAD3BFB85, 0xAD3CFB85, 0xAD3DFB85, 0xAD3EFB85, 0xAD3FFB85, 0xAD40FB85, + 0xAD41FB85, 0xAD42FB85, 0xAD43FB85, 0xAD44FB85, 0xAD45FB85, 0xAD46FB85, 0xAD47FB85, 0xAD48FB85, 0xAD49FB85, 0xAD4AFB85, 0xAD4BFB85, 0xAD4CFB85, 0xAD4DFB85, 0xAD4EFB85, 0xAD4FFB85, + 0xAD50FB85, 0xAD51FB85, 0xAD52FB85, 0xAD53FB85, 0xAD54FB85, 0xAD55FB85, 0xAD56FB85, 0xAD57FB85, 0xAD58FB85, 0xAD59FB85, 0xAD5AFB85, 0xAD5BFB85, 0xAD5CFB85, 0xAD5DFB85, 0xAD5EFB85, + 0xAD5FFB85, 0xAD60FB85, 0xAD61FB85, 0xAD62FB85, 0xAD63FB85, 0xAD64FB85, 0xAD65FB85, 0xAD66FB85, 0xAD67FB85, 0xAD68FB85, 0xAD69FB85, 0xAD6AFB85, 0xAD6BFB85, 0xAD6CFB85, 0xAD6DFB85, + 0xAD6EFB85, 0xAD6FFB85, 0xAD70FB85, 0xAD71FB85, 0xAD72FB85, 0xAD73FB85, 0xAD74FB85, 0xAD75FB85, 0xAD76FB85, 0xAD77FB85, 0xAD78FB85, 0xAD79FB85, 0xAD7AFB85, 0xAD7BFB85, 0xAD7CFB85, + 0xAD7DFB85, 0xAD7EFB85, 0xAD7FFB85, 0xAD80FB85, 0xAD81FB85, 0xAD82FB85, 0xAD83FB85, 0xAD84FB85, 0xAD85FB85, 0xAD86FB85, 0xAD87FB85, 0xAD88FB85, 0xAD89FB85, 0xAD8AFB85, 0xAD8BFB85, + 0xAD8CFB85, 0xAD8DFB85, 0xAD8EFB85, 0xAD8FFB85, 0xAD90FB85, 0xAD91FB85, 0xAD92FB85, 0xAD93FB85, 0xAD94FB85, 0xAD95FB85, 0xAD96FB85, 0xAD97FB85, 0xAD98FB85, 0xAD99FB85, 0xAD9AFB85, + 0xAD9BFB85, 0xAD9CFB85, 0xAD9DFB85, 0xAD9EFB85, 0xAD9FFB85, 0xADA0FB85, 0xADA1FB85, 0xADA2FB85, 0xADA3FB85, 0xADA4FB85, 0xADA5FB85, 0xADA6FB85, 0xADA7FB85, 0xADA8FB85, 0xADA9FB85, + 0xADAAFB85, 0xADABFB85, 0xADACFB85, 0xADADFB85, 0xADAEFB85, 0xADAFFB85, 0xADB0FB85, 0xADB1FB85, 0xADB2FB85, 0xADB3FB85, 0xADB4FB85, 0xADB5FB85, 0xADB6FB85, 0xADB7FB85, 0xADB8FB85, + 0xADB9FB85, 0xADBAFB85, 0xADBBFB85, 0xADBCFB85, 0xADBDFB85, 0xADBEFB85, 0xADBFFB85, 0xADC0FB85, 0xADC1FB85, 0xADC2FB85, 0xADC3FB85, 0xADC4FB85, 0xADC5FB85, 0xADC6FB85, 0xADC7FB85, + 0xADC8FB85, 0xADC9FB85, 0xADCAFB85, 0xADCBFB85, 0xADCCFB85, 0xADCDFB85, 0xADCEFB85, 0xADCFFB85, 0xADD0FB85, 0xADD1FB85, 0xADD2FB85, 0xADD3FB85, 0xADD4FB85, 0xADD5FB85, 0xADD6FB85, + 0xADD7FB85, 0xADD8FB85, 0xADD9FB85, 0xADDAFB85, 0xADDBFB85, 0xADDCFB85, 0xADDDFB85, 0xADDEFB85, 0xADDFFB85, 0xADE0FB85, 0xADE1FB85, 0xADE2FB85, 0xADE3FB85, 0xADE4FB85, 0xADE5FB85, + 0xADE6FB85, 0xADE7FB85, 0xADE8FB85, 0xADE9FB85, 0xADEAFB85, 0xADEBFB85, 0xADECFB85, 0xADEDFB85, 0xADEEFB85, 0xADEFFB85, 0xADF0FB85, 0xADF1FB85, 0xADF2FB85, 0xADF3FB85, 0xADF4FB85, + 0xADF5FB85, 0xADF6FB85, 0xADF7FB85, 0xADF8FB85, 0xADF9FB85, 0xADFAFB85, 0xADFBFB85, 0xADFCFB85, 0xADFDFB85, 0xADFEFB85, 0xADFFFB85, 0xAE00FB85, 0xAE01FB85, 0xAE02FB85, 0xAE03FB85, + 0xAE04FB85, 0xAE05FB85, 0xAE06FB85, 0xAE07FB85, 0xAE08FB85, 0xAE09FB85, 0xAE0AFB85, 0xAE0BFB85, 0xAE0CFB85, 0xAE0DFB85, 0xAE0EFB85, 0xAE0FFB85, 0xAE10FB85, 0xAE11FB85, 0xAE12FB85, + 0xAE13FB85, 0xAE14FB85, 0xAE15FB85, 0xAE16FB85, 0xAE17FB85, 0xAE18FB85, 0xAE19FB85, 0xAE1AFB85, 0xAE1BFB85, 0xAE1CFB85, 0xAE1DFB85, 0xAE1EFB85, 0xAE1FFB85, 0xAE20FB85, 0xAE21FB85, + 0xAE22FB85, 0xAE23FB85, 0xAE24FB85, 0xAE25FB85, 0xAE26FB85, 0xAE27FB85, 0xAE28FB85, 0xAE29FB85, 0xAE2AFB85, 0xAE2BFB85, 0xAE2CFB85, 0xAE2DFB85, 0xAE2EFB85, 0xAE2FFB85, 0xAE30FB85, + 0xAE31FB85, 0xAE32FB85, 0xAE33FB85, 0xAE34FB85, 0xAE35FB85, 0xAE36FB85, 0xAE37FB85, 0xAE38FB85, 0xAE39FB85, 0xAE3AFB85, 0xAE3BFB85, 0xAE3CFB85, 0xAE3DFB85, 0xAE3EFB85, 0xAE3FFB85, + 0xAE40FB85, 0xAE41FB85, 0xAE42FB85, 0xAE43FB85, 0xAE44FB85, 0xAE45FB85, 0xAE46FB85, 0xAE47FB85, 0xAE48FB85, 0xAE49FB85, 0xAE4AFB85, 0xAE4BFB85, 0xAE4CFB85, 0xAE4DFB85, 0xAE4EFB85, + 0xAE4FFB85, 0xAE50FB85, 0xAE51FB85, 0xAE52FB85, 0xAE53FB85, 0xAE54FB85, 0xAE55FB85, 0xAE56FB85, 0xAE57FB85, 0xAE58FB85, 0xAE59FB85, 0xAE5AFB85, 0xAE5BFB85, 0xAE5CFB85, 0xAE5DFB85, + 0xAE5EFB85, 0xAE5FFB85, 0xAE60FB85, 0xAE61FB85, 0xAE62FB85, 0xAE63FB85, 0xAE64FB85, 0xAE65FB85, 0xAE66FB85, 0xAE67FB85, 0xAE68FB85, 0xAE69FB85, 0xAE6AFB85, 0xAE6BFB85, 0xAE6CFB85, + 0xAE6DFB85, 0xAE6EFB85, 0xAE6FFB85, 0xAE70FB85, 0xAE71FB85, 0xAE72FB85, 0xAE73FB85, 0xAE74FB85, 0xAE75FB85, 0xAE76FB85, 0xAE77FB85, 0xAE78FB85, 0xAE79FB85, 0xAE7AFB85, 0xAE7BFB85, + 0xAE7CFB85, 0xAE7DFB85, 0xAE7EFB85, 0xAE7FFB85, 0xAE80FB85, 0xAE81FB85, 0xAE82FB85, 0xAE83FB85, 0xAE84FB85, 0xAE85FB85, 0xAE86FB85, 0xAE87FB85, 0xAE88FB85, 0xAE89FB85, 0xAE8AFB85, + 0xAE8BFB85, 0xAE8CFB85, 0xAE8DFB85, 0xAE8EFB85, 0xAE8FFB85, 0xAE90FB85, 0xAE91FB85, 0xAE92FB85, 0xAE93FB85, 0xAE94FB85, 0xAE95FB85, 0xAE96FB85, 0xAE97FB85, 0xAE98FB85, 0xAE99FB85, + 0xAE9AFB85, 0xAE9BFB85, 0xAE9CFB85, 0xAE9DFB85, 0xAE9EFB85, 0xAE9FFB85, 0xAEA0FB85, 0xAEA1FB85, 0xAEA2FB85, 0xAEA3FB85, 0xAEA4FB85, 0xAEA5FB85, 0xAEA6FB85, 0xAEA7FB85, 0xAEA8FB85, + 0xAEA9FB85, 0xAEAAFB85, 0xAEABFB85, 0xAEACFB85, 0xAEADFB85, 0xAEAEFB85, 0xAEAFFB85, 0xAEB0FB85, 0xAEB1FB85, 0xAEB2FB85, 0xAEB3FB85, 0xAEB4FB85, 0xAEB5FB85, 0xAEB6FB85, 0xAEB7FB85, + 0xAEB8FB85, 0xAEB9FB85, 0xAEBAFB85, 0xAEBBFB85, 0xAEBCFB85, 0xAEBDFB85, 0xAEBEFB85, 0xAEBFFB85, 0xAEC0FB85, 0xAEC1FB85, 0xAEC2FB85, 0xAEC3FB85, 0xAEC4FB85, 0xAEC5FB85, 0xAEC6FB85, + 0xAEC7FB85, 0xAEC8FB85, 0xAEC9FB85, 0xAECAFB85, 0xAECBFB85, 0xAECCFB85, 0xAECDFB85, 0xAECEFB85, 0xAECFFB85, 0xAED0FB85, 0xAED1FB85, 0xAED2FB85, 0xAED3FB85, 0xAED4FB85, 0xAED5FB85, + 0xAED6FB85, 0xAED7FB85, 0xAED8FB85, 0xAED9FB85, 0xAEDAFB85, 0xAEDBFB85, 0xAEDCFB85, 0xAEDDFB85, 0xAEDEFB85, 0xAEDFFB85, 0xAEE0FB85, 0xAEE1FB85, 0xAEE2FB85, 0xAEE3FB85, 0xAEE4FB85, + 0xAEE5FB85, 0xAEE6FB85, 0xAEE7FB85, 0xAEE8FB85, 0xAEE9FB85, 0xAEEAFB85, 0xAEEBFB85, 0xAEECFB85, 0xAEEDFB85, 0xAEEEFB85, 0xAEEFFB85, 0xAEF0FB85, 0xAEF1FB85, 0xAEF2FB85, 0xAEF3FB85, + 0xAEF4FB85, 0xAEF5FB85, 0xAEF6FB85, 0xAEF7FB85, 0xAEF8FB85, 0xAEF9FB85, 0xAEFAFB85, 0xAEFBFB85, 0xAEFCFB85, 0xAEFDFB85, 0xAEFEFB85, 0xAEFFFB85, 0xAF00FB85, 0xAF01FB85, 0xAF02FB85, + 0xAF03FB85, 0xAF04FB85, 0xAF05FB85, 0xAF06FB85, 0xAF07FB85, 0xAF08FB85, 0xAF09FB85, 0xAF0AFB85, 0xAF0BFB85, 0xAF0CFB85, 0xAF0DFB85, 0xAF0EFB85, 0xAF0FFB85, 0xAF10FB85, 0xAF11FB85, + 0xAF12FB85, 0xAF13FB85, 0xAF14FB85, 0xAF15FB85, 0xAF16FB85, 0xAF17FB85, 0xAF18FB85, 0xAF19FB85, 0xAF1AFB85, 0xAF1BFB85, 0xAF1CFB85, 0xAF1DFB85, 0xAF1EFB85, 0xAF1FFB85, 0xAF20FB85, + 0xAF21FB85, 0xAF22FB85, 0xAF23FB85, 0xAF24FB85, 0xAF25FB85, 0xAF26FB85, 0xAF27FB85, 0xAF28FB85, 0xAF29FB85, 0xAF2AFB85, 0xAF2BFB85, 0xAF2CFB85, 0xAF2DFB85, 0xAF2EFB85, 0xAF2FFB85, + 0xAF30FB85, 0xAF31FB85, 0xAF32FB85, 0xAF33FB85, 0xAF34FB85, 0xAF35FB85, 0xAF36FB85, 0xAF37FB85, 0xAF38FB85, 0xAF39FB85, 0xAF3AFB85, 0xAF3BFB85, 0xAF3CFB85, 0xAF3DFB85, 0xAF3EFB85, + 0xAF3FFB85, 0xAF40FB85, 0xAF41FB85, 0xAF42FB85, 0xAF43FB85, 0xAF44FB85, 0xAF45FB85, 0xAF46FB85, 0xAF47FB85, 0xAF48FB85, 0xAF49FB85, 0xAF4AFB85, 0xAF4BFB85, 0xAF4CFB85, 0xAF4DFB85, + 0xAF4EFB85, 0xAF4FFB85, 0xAF50FB85, 0xAF51FB85, 0xAF52FB85, 0xAF53FB85, 0xAF54FB85, 0xAF55FB85, 0xAF56FB85, 0xAF57FB85, 0xAF58FB85, 0xAF59FB85, 0xAF5AFB85, 0xAF5BFB85, 0xAF5CFB85, + 0xAF5DFB85, 0xAF5EFB85, 0xAF5FFB85, 0xAF60FB85, 0xAF61FB85, 0xAF62FB85, 0xAF63FB85, 0xAF64FB85, 0xAF65FB85, 0xAF66FB85, 0xAF67FB85, 0xAF68FB85, 0xAF69FB85, 0xAF6AFB85, 0xAF6BFB85, + 0xAF6CFB85, 0xAF6DFB85, 0xAF6EFB85, 0xAF6FFB85, 0xAF70FB85, 0xAF71FB85, 0xAF72FB85, 0xAF73FB85, 0xAF74FB85, 0xAF75FB85, 0xAF76FB85, 0xAF77FB85, 0xAF78FB85, 0xAF79FB85, 0xAF7AFB85, + 0xAF7BFB85, 0xAF7CFB85, 0xAF7DFB85, 0xAF7EFB85, 0xAF7FFB85, 0xAF80FB85, 0xAF81FB85, 0xAF82FB85, 0xAF83FB85, 0xAF84FB85, 0xAF85FB85, 0xAF86FB85, 0xAF87FB85, 0xAF88FB85, 0xAF89FB85, + 0xAF8AFB85, 0xAF8BFB85, 0xAF8CFB85, 0xAF8DFB85, 0xAF8EFB85, 0xAF8FFB85, 0xAF90FB85, 0xAF91FB85, 0xAF92FB85, 0xAF93FB85, 0xAF94FB85, 0xAF95FB85, 0xAF96FB85, 0xAF97FB85, 0xAF98FB85, + 0xAF99FB85, 0xAF9AFB85, 0xAF9BFB85, 0xAF9CFB85, 0xAF9DFB85, 0xAF9EFB85, 0xAF9FFB85, 0xAFA0FB85, 0xAFA1FB85, 0xAFA2FB85, 0xAFA3FB85, 0xAFA4FB85, 0xAFA5FB85, 0xAFA6FB85, 0xAFA7FB85, + 0xAFA8FB85, 0xAFA9FB85, 0xAFAAFB85, 0xAFABFB85, 0xAFACFB85, 0xAFADFB85, 0xAFAEFB85, 0xAFAFFB85, 0xAFB0FB85, 0xAFB1FB85, 0xAFB2FB85, 0xAFB3FB85, 0xAFB4FB85, 0xAFB5FB85, 0xAFB6FB85, + 0xAFB7FB85, 0xAFB8FB85, 0xAFB9FB85, 0xAFBAFB85, 0xAFBBFB85, 0xAFBCFB85, 0xAFBDFB85, 0xAFBEFB85, 0xAFBFFB85, 0xAFC0FB85, 0xAFC1FB85, 0xAFC2FB85, 0xAFC3FB85, 0xAFC4FB85, 0xAFC5FB85, + 0xAFC6FB85, 0xAFC7FB85, 0xAFC8FB85, 0xAFC9FB85, 0xAFCAFB85, 0xAFCBFB85, 0xAFCCFB85, 0xAFCDFB85, 0xAFCEFB85, 0xAFCFFB85, 0xAFD0FB85, 0xAFD1FB85, 0xAFD2FB85, 0xAFD3FB85, 0xAFD4FB85, + 0xAFD5FB85, 0xAFD6FB85, 0xAFD7FB85, 0xAFD8FB85, 0xAFD9FB85, 0xAFDAFB85, 0xAFDBFB85, 0xAFDCFB85, 0xAFDDFB85, 0xAFDEFB85, 0xAFDFFB85, 0xAFE0FB85, 0xAFE1FB85, 0xAFE2FB85, 0xAFE3FB85, + 0xAFE4FB85, 0xAFE5FB85, 0xAFE6FB85, 0xAFE7FB85, 0xAFE8FB85, 0xAFE9FB85, 0xAFEAFB85, 0xAFEBFB85, 0xAFECFB85, 0xAFEDFB85, 0xAFEEFB85, 0xAFEFFB85, 0xAFF0FB85, 0xAFF1FB85, 0xAFF2FB85, + 0xAFF3FB85, 0xAFF4FB85, 0xAFF5FB85, 0xAFF6FB85, 0xAFF7FB85, 0xAFF8FB85, 0xAFF9FB85, 0xAFFAFB85, 0xAFFBFB85, 0xAFFCFB85, 0xAFFDFB85, 0xAFFEFB85, 0xAFFFFB85, 0xB000FB85, 0xB001FB85, + 0xB002FB85, 0xB003FB85, 0xB004FB85, 0xB005FB85, 0xB006FB85, 0xB007FB85, 0xB008FB85, 0xB009FB85, 0xB00AFB85, 0xB00BFB85, 0xB00CFB85, 0xB00DFB85, 0xB00EFB85, 0xB00FFB85, 0xB010FB85, + 0xB011FB85, 0xB012FB85, 0xB013FB85, 0xB014FB85, 0xB015FB85, 0xB016FB85, 0xB017FB85, 0xB018FB85, 0xB019FB85, 0xB01AFB85, 0xB01BFB85, 0xB01CFB85, 0xB01DFB85, 0xB01EFB85, 0xB01FFB85, + 0xB020FB85, 0xB021FB85, 0xB022FB85, 0xB023FB85, 0xB024FB85, 0xB025FB85, 0xB026FB85, 0xB027FB85, 0xB028FB85, 0xB029FB85, 0xB02AFB85, 0xB02BFB85, 0xB02CFB85, 0xB02DFB85, 0xB02EFB85, + 0xB02FFB85, 0xB030FB85, 0xB031FB85, 0xB032FB85, 0xB033FB85, 0xB034FB85, 0xB035FB85, 0xB036FB85, 0xB037FB85, 0xB038FB85, 0xB039FB85, 0xB03AFB85, 0xB03BFB85, 0xB03CFB85, 0xB03DFB85, + 0xB03EFB85, 0xB03FFB85, 0xB040FB85, 0xB041FB85, 0xB042FB85, 0xB043FB85, 0xB044FB85, 0xB045FB85, 0xB046FB85, 0xB047FB85, 0xB048FB85, 0xB049FB85, 0xB04AFB85, 0xB04BFB85, 0xB04CFB85, + 0xB04DFB85, 0xB04EFB85, 0xB04FFB85, 0xB050FB85, 0xB051FB85, 0xB052FB85, 0xB053FB85, 0xB054FB85, 0xB055FB85, 0xB056FB85, 0xB057FB85, 0xB058FB85, 0xB059FB85, 0xB05AFB85, 0xB05BFB85, + 0xB05CFB85, 0xB05DFB85, 0xB05EFB85, 0xB05FFB85, 0xB060FB85, 0xB061FB85, 0xB062FB85, 0xB063FB85, 0xB064FB85, 0xB065FB85, 0xB066FB85, 0xB067FB85, 0xB068FB85, 0xB069FB85, 0xB06AFB85, + 0xB06BFB85, 0xB06CFB85, 0xB06DFB85, 0xB06EFB85, 0xB06FFB85, 0xB070FB85, 0xB071FB85, 0xB072FB85, 0xB073FB85, 0xB074FB85, 0xB075FB85, 0xB076FB85, 0xB077FB85, 0xB078FB85, 0xB079FB85, + 0xB07AFB85, 0xB07BFB85, 0xB07CFB85, 0xB07DFB85, 0xB07EFB85, 0xB07FFB85, 0xB080FB85, 0xB081FB85, 0xB082FB85, 0xB083FB85, 0xB084FB85, 0xB085FB85, 0xB086FB85, 0xB087FB85, 0xB088FB85, + 0xB089FB85, 0xB08AFB85, 0xB08BFB85, 0xB08CFB85, 0xB08DFB85, 0xB08EFB85, 0xB08FFB85, 0xB090FB85, 0xB091FB85, 0xB092FB85, 0xB093FB85, 0xB094FB85, 0xB095FB85, 0xB096FB85, 0xB097FB85, + 0xB098FB85, 0xB099FB85, 0xB09AFB85, 0xB09BFB85, 0xB09CFB85, 0xB09DFB85, 0xB09EFB85, 0xB09FFB85, 0xB0A0FB85, 0xB0A1FB85, 0xB0A2FB85, 0xB0A3FB85, 0xB0A4FB85, 0xB0A5FB85, 0xB0A6FB85, + 0xB0A7FB85, 0xB0A8FB85, 0xB0A9FB85, 0xB0AAFB85, 0xB0ABFB85, 0xB0ACFB85, 0xB0ADFB85, 0xB0AEFB85, 0xB0AFFB85, 0xB0B0FB85, 0xB0B1FB85, 0xB0B2FB85, 0xB0B3FB85, 0xB0B4FB85, 0xB0B5FB85, + 0xB0B6FB85, 0xB0B7FB85, 0xB0B8FB85, 0xB0B9FB85, 0xB0BAFB85, 0xB0BBFB85, 0xB0BCFB85, 0xB0BDFB85, 0xB0BEFB85, 0xB0BFFB85, 0xB0C0FB85, 0xB0C1FB85, 0xB0C2FB85, 0xB0C3FB85, 0xB0C4FB85, + 0xB0C5FB85, 0xB0C6FB85, 0xB0C7FB85, 0xB0C8FB85, 0xB0C9FB85, 0xB0CAFB85, 0xB0CBFB85, 0xB0CCFB85, 0xB0CDFB85, 0xB0CEFB85, 0xB0CFFB85, 0xB0D0FB85, 0xB0D1FB85, 0xB0D2FB85, 0xB0D3FB85, + 0xB0D4FB85, 0xB0D5FB85, 0xB0D6FB85, 0xB0D7FB85, 0xB0D8FB85, 0xB0D9FB85, 0xB0DAFB85, 0xB0DBFB85, 0xB0DCFB85, 0xB0DDFB85, 0xB0DEFB85, 0xB0DFFB85, 0xB0E0FB85, 0xB0E1FB85, 0xB0E2FB85, + 0xB0E3FB85, 0xB0E4FB85, 0xB0E5FB85, 0xB0E6FB85, 0xB0E7FB85, 0xB0E8FB85, 0xB0E9FB85, 0xB0EAFB85, 0xB0EBFB85, 0xB0ECFB85, 0xB0EDFB85, 0xB0EEFB85, 0xB0EFFB85, 0xB0F0FB85, 0xB0F1FB85, + 0xB0F2FB85, 0xB0F3FB85, 0xB0F4FB85, 0xB0F5FB85, 0xB0F6FB85, 0xB0F7FB85, 0xB0F8FB85, 0xB0F9FB85, 0xB0FAFB85, 0xB0FBFB85, 0xB0FCFB85, 0xB0FDFB85, 0xB0FEFB85, 0xB0FFFB85, 0xB100FB85, + 0xB101FB85, 0xB102FB85, 0xB103FB85, 0xB104FB85, 0xB105FB85, 0xB106FB85, 0xB107FB85, 0xB108FB85, 0xB109FB85, 0xB10AFB85, 0xB10BFB85, 0xB10CFB85, 0xB10DFB85, 0xB10EFB85, 0xB10FFB85, + 0xB110FB85, 0xB111FB85, 0xB112FB85, 0xB113FB85, 0xB114FB85, 0xB115FB85, 0xB116FB85, 0xB117FB85, 0xB118FB85, 0xB119FB85, 0xB11AFB85, 0xB11BFB85, 0xB11CFB85, 0xB11DFB85, 0xB11EFB85, + 0xB11FFB85, 0xB120FB85, 0xB121FB85, 0xB122FB85, 0xB123FB85, 0xB124FB85, 0xB125FB85, 0xB126FB85, 0xB127FB85, 0xB128FB85, 0xB129FB85, 0xB12AFB85, 0xB12BFB85, 0xB12CFB85, 0xB12DFB85, + 0xB12EFB85, 0xB12FFB85, 0xB130FB85, 0xB131FB85, 0xB132FB85, 0xB133FB85, 0xB134FB85, 0xB135FB85, 0xB136FB85, 0xB137FB85, 0xB138FB85, 0xB139FB85, 0xB13AFB85, 0xB13BFB85, 0xB13CFB85, + 0xB13DFB85, 0xB13EFB85, 0xB13FFB85, 0xB140FB85, 0xB141FB85, 0xB142FB85, 0xB143FB85, 0xB144FB85, 0xB145FB85, 0xB146FB85, 0xB147FB85, 0xB148FB85, 0xB149FB85, 0xB14AFB85, 0xB14BFB85, + 0xB14CFB85, 0xB14DFB85, 0xB14EFB85, 0xB14FFB85, 0xB150FB85, 0xB151FB85, 0xB152FB85, 0xB153FB85, 0xB154FB85, 0xB155FB85, 0xB156FB85, 0xB157FB85, 0xB158FB85, 0xB159FB85, 0xB15AFB85, + 0xB15BFB85, 0xB15CFB85, 0xB15DFB85, 0xB15EFB85, 0xB15FFB85, 0xB160FB85, 0xB161FB85, 0xB162FB85, 0xB163FB85, 0xB164FB85, 0xB165FB85, 0xB166FB85, 0xB167FB85, 0xB168FB85, 0xB169FB85, + 0xB16AFB85, 0xB16BFB85, 0xB16CFB85, 0xB16DFB85, 0xB16EFB85, 0xB16FFB85, 0xB170FB85, 0xB171FB85, 0xB172FB85, 0xB173FB85, 0xB174FB85, 0xB175FB85, 0xB176FB85, 0xB177FB85, 0xB178FB85, + 0xB179FB85, 0xB17AFB85, 0xB17BFB85, 0xB17CFB85, 0xB17DFB85, 0xB17EFB85, 0xB17FFB85, 0xB180FB85, 0xB181FB85, 0xB182FB85, 0xB183FB85, 0xB184FB85, 0xB185FB85, 0xB186FB85, 0xB187FB85, + 0xB188FB85, 0xB189FB85, 0xB18AFB85, 0xB18BFB85, 0xB18CFB85, 0xB18DFB85, 0xB18EFB85, 0xB18FFB85, 0xB190FB85, 0xB191FB85, 0xB192FB85, 0xB193FB85, 0xB194FB85, 0xB195FB85, 0xB196FB85, + 0xB197FB85, 0xB198FB85, 0xB199FB85, 0xB19AFB85, 0xB19BFB85, 0xB19CFB85, 0xB19DFB85, 0xB19EFB85, 0xB19FFB85, 0xB1A0FB85, 0xB1A1FB85, 0xB1A2FB85, 0xB1A3FB85, 0xB1A4FB85, 0xB1A5FB85, + 0xB1A6FB85, 0xB1A7FB85, 0xB1A8FB85, 0xB1A9FB85, 0xB1AAFB85, 0xB1ABFB85, 0xB1ACFB85, 0xB1ADFB85, 0xB1AEFB85, 0xB1AFFB85, 0xB1B0FB85, 0xB1B1FB85, 0xB1B2FB85, 0xB1B3FB85, 0xB1B4FB85, + 0xB1B5FB85, 0xB1B6FB85, 0xB1B7FB85, 0xB1B8FB85, 0xB1B9FB85, 0xB1BAFB85, 0xB1BBFB85, 0xB1BCFB85, 0xB1BDFB85, 0xB1BEFB85, 0xB1BFFB85, 0xB1C0FB85, 0xB1C1FB85, 0xB1C2FB85, 0xB1C3FB85, + 0xB1C4FB85, 0xB1C5FB85, 0xB1C6FB85, 0xB1C7FB85, 0xB1C8FB85, 0xB1C9FB85, 0xB1CAFB85, 0xB1CBFB85, 0xB1CCFB85, 0xB1CDFB85, 0xB1CEFB85, 0xB1CFFB85, 0xB1D0FB85, 0xB1D1FB85, 0xB1D2FB85, + 0xB1D3FB85, 0xB1D4FB85, 0xB1D5FB85, 0xB1D6FB85, 0xB1D7FB85, 0xB1D8FB85, 0xB1D9FB85, 0xB1DAFB85, 0xB1DBFB85, 0xB1DCFB85, 0xB1DDFB85, 0xB1DEFB85, 0xB1DFFB85, 0xB1E0FB85, 0xB1E1FB85, + 0xB1E2FB85, 0xB1E3FB85, 0xB1E4FB85, 0xB1E5FB85, 0xB1E6FB85, 0xB1E7FB85, 0xB1E8FB85, 0xB1E9FB85, 0xB1EAFB85, 0xB1EBFB85, 0xB1ECFB85, 0xB1EDFB85, 0xB1EEFB85, 0xB1EFFB85, 0xB1F0FB85, + 0xB1F1FB85, 0xB1F2FB85, 0xB1F3FB85, 0xB1F4FB85, 0xB1F5FB85, 0xB1F6FB85, 0xB1F7FB85, 0xB1F8FB85, 0xB1F9FB85, 0xB1FAFB85, 0xB1FBFB85, 0xB1FCFB85, 0xB1FDFB85, 0xB1FEFB85, 0xB1FFFB85, + 0xB200FB85, 0xB201FB85, 0xB202FB85, 0xB203FB85, 0xB204FB85, 0xB205FB85, 0xB206FB85, 0xB207FB85, 0xB208FB85, 0xB209FB85, 0xB20AFB85, 0xB20BFB85, 0xB20CFB85, 0xB20DFB85, 0xB20EFB85, + 0xB20FFB85, 0xB210FB85, 0xB211FB85, 0xB212FB85, 0xB213FB85, 0xB214FB85, 0xB215FB85, 0xB216FB85, 0xB217FB85, 0xB218FB85, 0xB219FB85, 0xB21AFB85, 0xB21BFB85, 0xB21CFB85, 0xB21DFB85, + 0xB21EFB85, 0xB21FFB85, 0xB220FB85, 0xB221FB85, 0xB222FB85, 0xB223FB85, 0xB224FB85, 0xB225FB85, 0xB226FB85, 0xB227FB85, 0xB228FB85, 0xB229FB85, 0xB22AFB85, 0xB22BFB85, 0xB22CFB85, + 0xB22DFB85, 0xB22EFB85, 0xB22FFB85, 0xB230FB85, 0xB231FB85, 0xB232FB85, 0xB233FB85, 0xB234FB85, 0xB235FB85, 0xB236FB85, 0xB237FB85, 0xB238FB85, 0xB239FB85, 0xB23AFB85, 0xB23BFB85, + 0xB23CFB85, 0xB23DFB85, 0xB23EFB85, 0xB23FFB85, 0xB240FB85, 0xB241FB85, 0xB242FB85, 0xB243FB85, 0xB244FB85, 0xB245FB85, 0xB246FB85, 0xB247FB85, 0xB248FB85, 0xB249FB85, 0xB24AFB85, + 0xB24BFB85, 0xB24CFB85, 0xB24DFB85, 0xB24EFB85, 0xB24FFB85, 0xB250FB85, 0xB251FB85, 0xB252FB85, 0xB253FB85, 0xB254FB85, 0xB255FB85, 0xB256FB85, 0xB257FB85, 0xB258FB85, 0xB259FB85, + 0xB25AFB85, 0xB25BFB85, 0xB25CFB85, 0xB25DFB85, 0xB25EFB85, 0xB25FFB85, 0xB260FB85, 0xB261FB85, 0xB262FB85, 0xB263FB85, 0xB264FB85, 0xB265FB85, 0xB266FB85, 0xB267FB85, 0xB268FB85, + 0xB269FB85, 0xB26AFB85, 0xB26BFB85, 0xB26CFB85, 0xB26DFB85, 0xB26EFB85, 0xB26FFB85, 0xB270FB85, 0xB271FB85, 0xB272FB85, 0xB273FB85, 0xB274FB85, 0xB275FB85, 0xB276FB85, 0xB277FB85, + 0xB278FB85, 0xB279FB85, 0xB27AFB85, 0xB27BFB85, 0xB27CFB85, 0xB27DFB85, 0xB27EFB85, 0xB27FFB85, 0xB280FB85, 0xB281FB85, 0xB282FB85, 0xB283FB85, 0xB284FB85, 0xB285FB85, 0xB286FB85, + 0xB287FB85, 0xB288FB85, 0xB289FB85, 0xB28AFB85, 0xB28BFB85, 0xB28CFB85, 0xB28DFB85, 0xB28EFB85, 0xB28FFB85, 0xB290FB85, 0xB291FB85, 0xB292FB85, 0xB293FB85, 0xB294FB85, 0xB295FB85, + 0xB296FB85, 0xB297FB85, 0xB298FB85, 0xB299FB85, 0xB29AFB85, 0xB29BFB85, 0xB29CFB85, 0xB29DFB85, 0xB29EFB85, 0xB29FFB85, 0xB2A0FB85, 0xB2A1FB85, 0xB2A2FB85, 0xB2A3FB85, 0xB2A4FB85, + 0xB2A5FB85, 0xB2A6FB85, 0xB2A7FB85, 0xB2A8FB85, 0xB2A9FB85, 0xB2AAFB85, 0xB2ABFB85, 0xB2ACFB85, 0xB2ADFB85, 0xB2AEFB85, 0xB2AFFB85, 0xB2B0FB85, 0xB2B1FB85, 0xB2B2FB85, 0xB2B3FB85, + 0xB2B4FB85, 0xB2B5FB85, 0xB2B6FB85, 0xB2B7FB85, 0xB2B8FB85, 0xB2B9FB85, 0xB2BAFB85, 0xB2BBFB85, 0xB2BCFB85, 0xB2BDFB85, 0xB2BEFB85, 0xB2BFFB85, 0xB2C0FB85, 0xB2C1FB85, 0xB2C2FB85, + 0xB2C3FB85, 0xB2C4FB85, 0xB2C5FB85, 0xB2C6FB85, 0xB2C7FB85, 0xB2C8FB85, 0xB2C9FB85, 0xB2CAFB85, 0xB2CBFB85, 0xB2CCFB85, 0xB2CDFB85, 0xB2CEFB85, 0xB2CFFB85, 0xB2D0FB85, 0xB2D1FB85, + 0xB2D2FB85, 0xB2D3FB85, 0xB2D4FB85, 0xB2D5FB85, 0xB2D6FB85, 0xB2D7FB85, 0xB2D8FB85, 0xB2D9FB85, 0xB2DAFB85, 0xB2DBFB85, 0xB2DCFB85, 0xB2DDFB85, 0xB2DEFB85, 0xB2DFFB85, 0xB2E0FB85, + 0xB2E1FB85, 0xB2E2FB85, 0xB2E3FB85, 0xB2E4FB85, 0xB2E5FB85, 0xB2E6FB85, 0xB2E7FB85, 0xB2E8FB85, 0xB2E9FB85, 0xB2EAFB85, 0xB2EBFB85, 0xB2ECFB85, 0xB2EDFB85, 0xB2EEFB85, 0xB2EFFB85, + 0xB2F0FB85, 0xB2F1FB85, 0xB2F2FB85, 0xB2F3FB85, 0xB2F4FB85, 0xB2F5FB85, 0xB2F6FB85, 0xB2F7FB85, 0xB2F8FB85, 0xB2F9FB85, 0xB2FAFB85, 0xB2FBFB85, 0xB2FCFB85, 0xB2FDFB85, 0xB2FEFB85, + 0xB2FFFB85, 0xB300FB85, 0xB301FB85, 0xB302FB85, 0xB303FB85, 0xB304FB85, 0xB305FB85, 0xB306FB85, 0xB307FB85, 0xB308FB85, 0xB309FB85, 0xB30AFB85, 0xB30BFB85, 0xB30CFB85, 0xB30DFB85, + 0xB30EFB85, 0xB30FFB85, 0xB310FB85, 0xB311FB85, 0xB312FB85, 0xB313FB85, 0xB314FB85, 0xB315FB85, 0xB316FB85, 0xB317FB85, 0xB318FB85, 0xB319FB85, 0xB31AFB85, 0xB31BFB85, 0xB31CFB85, + 0xB31DFB85, 0xB31EFB85, 0xB31FFB85, 0xB320FB85, 0xB321FB85, 0xB322FB85, 0xB323FB85, 0xB324FB85, 0xB325FB85, 0xB326FB85, 0xB327FB85, 0xB328FB85, 0xB329FB85, 0xB32AFB85, 0xB32BFB85, + 0xB32CFB85, 0xB32DFB85, 0xB32EFB85, 0xB32FFB85, 0xB330FB85, 0xB331FB85, 0xB332FB85, 0xB333FB85, 0xB334FB85, 0xB335FB85, 0xB336FB85, 0xB337FB85, 0xB338FB85, 0xB339FB85, 0xB33AFB85, + 0xB33BFB85, 0xB33CFB85, 0xB33DFB85, 0xB33EFB85, 0xB33FFB85, 0xB340FB85, 0xB341FB85, 0xB342FB85, 0xB343FB85, 0xB344FB85, 0xB345FB85, 0xB346FB85, 0xB347FB85, 0xB348FB85, 0xB349FB85, + 0xB34AFB85, 0xB34BFB85, 0xB34CFB85, 0xB34DFB85, 0xB34EFB85, 0xB34FFB85, 0xB350FB85, 0xB351FB85, 0xB352FB85, 0xB353FB85, 0xB354FB85, 0xB355FB85, 0xB356FB85, 0xB357FB85, 0xB358FB85, + 0xB359FB85, 0xB35AFB85, 0xB35BFB85, 0xB35CFB85, 0xB35DFB85, 0xB35EFB85, 0xB35FFB85, 0xB360FB85, 0xB361FB85, 0xB362FB85, 0xB363FB85, 0xB364FB85, 0xB365FB85, 0xB366FB85, 0xB367FB85, + 0xB368FB85, 0xB369FB85, 0xB36AFB85, 0xB36BFB85, 0xB36CFB85, 0xB36DFB85, 0xB36EFB85, 0xB36FFB85, 0xB370FB85, 0xB371FB85, 0xB372FB85, 0xB373FB85, 0xB374FB85, 0xB375FB85, 0xB376FB85, + 0xB377FB85, 0xB378FB85, 0xB379FB85, 0xB37AFB85, 0xB37BFB85, 0xB37CFB85, 0xB37DFB85, 0xB37EFB85, 0xB37FFB85, 0xB380FB85, 0xB381FB85, 0xB382FB85, 0xB383FB85, 0xB384FB85, 0xB385FB85, + 0xB386FB85, 0xB387FB85, 0xB388FB85, 0xB389FB85, 0xB38AFB85, 0xB38BFB85, 0xB38CFB85, 0xB38DFB85, 0xB38EFB85, 0xB38FFB85, 0xB390FB85, 0xB391FB85, 0xB392FB85, 0xB393FB85, 0xB394FB85, + 0xB395FB85, 0xB396FB85, 0xB397FB85, 0xB398FB85, 0xB399FB85, 0xB39AFB85, 0xB39BFB85, 0xB39CFB85, 0xB39DFB85, 0xB39EFB85, 0xB39FFB85, 0xB3A0FB85, 0xB3A1FB85, 0xB3A2FB85, 0xB3A3FB85, + 0xB3A4FB85, 0xB3A5FB85, 0xB3A6FB85, 0xB3A7FB85, 0xB3A8FB85, 0xB3A9FB85, 0xB3AAFB85, 0xB3ABFB85, 0xB3ACFB85, 0xB3ADFB85, 0xB3AEFB85, 0xB3AFFB85, 0xB3B0FB85, 0xB3B1FB85, 0xB3B2FB85, + 0xB3B3FB85, 0xB3B4FB85, 0xB3B5FB85, 0xB3B6FB85, 0xB3B7FB85, 0xB3B8FB85, 0xB3B9FB85, 0xB3BAFB85, 0xB3BBFB85, 0xB3BCFB85, 0xB3BDFB85, 0xB3BEFB85, 0xB3BFFB85, 0xB3C0FB85, 0xB3C1FB85, + 0xB3C2FB85, 0xB3C3FB85, 0xB3C4FB85, 0xB3C5FB85, 0xB3C6FB85, 0xB3C7FB85, 0xB3C8FB85, 0xB3C9FB85, 0xB3CAFB85, 0xB3CBFB85, 0xB3CCFB85, 0xB3CDFB85, 0xB3CEFB85, 0xB3CFFB85, 0xB3D0FB85, + 0xB3D1FB85, 0xB3D2FB85, 0xB3D3FB85, 0xB3D4FB85, 0xB3D5FB85, 0xB3D6FB85, 0xB3D7FB85, 0xB3D8FB85, 0xB3D9FB85, 0xB3DAFB85, 0xB3DBFB85, 0xB3DCFB85, 0xB3DDFB85, 0xB3DEFB85, 0xB3DFFB85, + 0xB3E0FB85, 0xB3E1FB85, 0xB3E2FB85, 0xB3E3FB85, 0xB3E4FB85, 0xB3E5FB85, 0xB3E6FB85, 0xB3E7FB85, 0xB3E8FB85, 0xB3E9FB85, 0xB3EAFB85, 0xB3EBFB85, 0xB3ECFB85, 0xB3EDFB85, 0xB3EEFB85, + 0xB3EFFB85, 0xB3F0FB85, 0xB3F1FB85, 0xB3F2FB85, 0xB3F3FB85, 0xB3F4FB85, 0xB3F5FB85, 0xB3F6FB85, 0xB3F7FB85, 0xB3F8FB85, 0xB3F9FB85, 0xB3FAFB85, 0xB3FBFB85, 0xB3FCFB85, 0xB3FDFB85, + 0xB3FEFB85, 0xB3FFFB85, 0xB400FB85, 0xB401FB85, 0xB402FB85, 0xB403FB85, 0xB404FB85, 0xB405FB85, 0xB406FB85, 0xB407FB85, 0xB408FB85, 0xB409FB85, 0xB40AFB85, 0xB40BFB85, 0xB40CFB85, + 0xB40DFB85, 0xB40EFB85, 0xB40FFB85, 0xB410FB85, 0xB411FB85, 0xB412FB85, 0xB413FB85, 0xB414FB85, 0xB415FB85, 0xB416FB85, 0xB417FB85, 0xB418FB85, 0xB419FB85, 0xB41AFB85, 0xB41BFB85, + 0xB41CFB85, 0xB41DFB85, 0xB41EFB85, 0xB41FFB85, 0xB420FB85, 0xB421FB85, 0xB422FB85, 0xB423FB85, 0xB424FB85, 0xB425FB85, 0xB426FB85, 0xB427FB85, 0xB428FB85, 0xB429FB85, 0xB42AFB85, + 0xB42BFB85, 0xB42CFB85, 0xB42DFB85, 0xB42EFB85, 0xB42FFB85, 0xB430FB85, 0xB431FB85, 0xB432FB85, 0xB433FB85, 0xB434FB85, 0xB435FB85, 0xB436FB85, 0xB437FB85, 0xB438FB85, 0xB439FB85, + 0xB43AFB85, 0xB43BFB85, 0xB43CFB85, 0xB43DFB85, 0xB43EFB85, 0xB43FFB85, 0xB440FB85, 0xB441FB85, 0xB442FB85, 0xB443FB85, 0xB444FB85, 0xB445FB85, 0xB446FB85, 0xB447FB85, 0xB448FB85, + 0xB449FB85, 0xB44AFB85, 0xB44BFB85, 0xB44CFB85, 0xB44DFB85, 0xB44EFB85, 0xB44FFB85, 0xB450FB85, 0xB451FB85, 0xB452FB85, 0xB453FB85, 0xB454FB85, 0xB455FB85, 0xB456FB85, 0xB457FB85, + 0xB458FB85, 0xB459FB85, 0xB45AFB85, 0xB45BFB85, 0xB45CFB85, 0xB45DFB85, 0xB45EFB85, 0xB45FFB85, 0xB460FB85, 0xB461FB85, 0xB462FB85, 0xB463FB85, 0xB464FB85, 0xB465FB85, 0xB466FB85, + 0xB467FB85, 0xB468FB85, 0xB469FB85, 0xB46AFB85, 0xB46BFB85, 0xB46CFB85, 0xB46DFB85, 0xB46EFB85, 0xB46FFB85, 0xB470FB85, 0xB471FB85, 0xB472FB85, 0xB473FB85, 0xB474FB85, 0xB475FB85, + 0xB476FB85, 0xB477FB85, 0xB478FB85, 0xB479FB85, 0xB47AFB85, 0xB47BFB85, 0xB47CFB85, 0xB47DFB85, 0xB47EFB85, 0xB47FFB85, 0xB480FB85, 0xB481FB85, 0xB482FB85, 0xB483FB85, 0xB484FB85, + 0xB485FB85, 0xB486FB85, 0xB487FB85, 0xB488FB85, 0xB489FB85, 0xB48AFB85, 0xB48BFB85, 0xB48CFB85, 0xB48DFB85, 0xB48EFB85, 0xB48FFB85, 0xB490FB85, 0xB491FB85, 0xB492FB85, 0xB493FB85, + 0xB494FB85, 0xB495FB85, 0xB496FB85, 0xB497FB85, 0xB498FB85, 0xB499FB85, 0xB49AFB85, 0xB49BFB85, 0xB49CFB85, 0xB49DFB85, 0xB49EFB85, 0xB49FFB85, 0xB4A0FB85, 0xB4A1FB85, 0xB4A2FB85, + 0xB4A3FB85, 0xB4A4FB85, 0xB4A5FB85, 0xB4A6FB85, 0xB4A7FB85, 0xB4A8FB85, 0xB4A9FB85, 0xB4AAFB85, 0xB4ABFB85, 0xB4ACFB85, 0xB4ADFB85, 0xB4AEFB85, 0xB4AFFB85, 0xB4B0FB85, 0xB4B1FB85, + 0xB4B2FB85, 0xB4B3FB85, 0xB4B4FB85, 0xB4B5FB85, 0xB4B6FB85, 0xB4B7FB85, 0xB4B8FB85, 0xB4B9FB85, 0xB4BAFB85, 0xB4BBFB85, 0xB4BCFB85, 0xB4BDFB85, 0xB4BEFB85, 0xB4BFFB85, 0xB4C0FB85, + 0xB4C1FB85, 0xB4C2FB85, 0xB4C3FB85, 0xB4C4FB85, 0xB4C5FB85, 0xB4C6FB85, 0xB4C7FB85, 0xB4C8FB85, 0xB4C9FB85, 0xB4CAFB85, 0xB4CBFB85, 0xB4CCFB85, 0xB4CDFB85, 0xB4CEFB85, 0xB4CFFB85, + 0xB4D0FB85, 0xB4D1FB85, 0xB4D2FB85, 0xB4D3FB85, 0xB4D4FB85, 0xB4D5FB85, 0xB4D6FB85, 0xB4D7FB85, 0xB4D8FB85, 0xB4D9FB85, 0xB4DAFB85, 0xB4DBFB85, 0xB4DCFB85, 0xB4DDFB85, 0xB4DEFB85, + 0xB4DFFB85, 0xB4E0FB85, 0xB4E1FB85, 0xB4E2FB85, 0xB4E3FB85, 0xB4E4FB85, 0xB4E5FB85, 0xB4E6FB85, 0xB4E7FB85, 0xB4E8FB85, 0xB4E9FB85, 0xB4EAFB85, 0xB4EBFB85, 0xB4ECFB85, 0xB4EDFB85, + 0xB4EEFB85, 0xB4EFFB85, 0xB4F0FB85, 0xB4F1FB85, 0xB4F2FB85, 0xB4F3FB85, 0xB4F4FB85, 0xB4F5FB85, 0xB4F6FB85, 0xB4F7FB85, 0xB4F8FB85, 0xB4F9FB85, 0xB4FAFB85, 0xB4FBFB85, 0xB4FCFB85, + 0xB4FDFB85, 0xB4FEFB85, 0xB4FFFB85, 0xB500FB85, 0xB501FB85, 0xB502FB85, 0xB503FB85, 0xB504FB85, 0xB505FB85, 0xB506FB85, 0xB507FB85, 0xB508FB85, 0xB509FB85, 0xB50AFB85, 0xB50BFB85, + 0xB50CFB85, 0xB50DFB85, 0xB50EFB85, 0xB50FFB85, 0xB510FB85, 0xB511FB85, 0xB512FB85, 0xB513FB85, 0xB514FB85, 0xB515FB85, 0xB516FB85, 0xB517FB85, 0xB518FB85, 0xB519FB85, 0xB51AFB85, + 0xB51BFB85, 0xB51CFB85, 0xB51DFB85, 0xB51EFB85, 0xB51FFB85, 0xB520FB85, 0xB521FB85, 0xB522FB85, 0xB523FB85, 0xB524FB85, 0xB525FB85, 0xB526FB85, 0xB527FB85, 0xB528FB85, 0xB529FB85, + 0xB52AFB85, 0xB52BFB85, 0xB52CFB85, 0xB52DFB85, 0xB52EFB85, 0xB52FFB85, 0xB530FB85, 0xB531FB85, 0xB532FB85, 0xB533FB85, 0xB534FB85, 0xB535FB85, 0xB536FB85, 0xB537FB85, 0xB538FB85, + 0xB539FB85, 0xB53AFB85, 0xB53BFB85, 0xB53CFB85, 0xB53DFB85, 0xB53EFB85, 0xB53FFB85, 0xB540FB85, 0xB541FB85, 0xB542FB85, 0xB543FB85, 0xB544FB85, 0xB545FB85, 0xB546FB85, 0xB547FB85, + 0xB548FB85, 0xB549FB85, 0xB54AFB85, 0xB54BFB85, 0xB54CFB85, 0xB54DFB85, 0xB54EFB85, 0xB54FFB85, 0xB550FB85, 0xB551FB85, 0xB552FB85, 0xB553FB85, 0xB554FB85, 0xB555FB85, 0xB556FB85, + 0xB557FB85, 0xB558FB85, 0xB559FB85, 0xB55AFB85, 0xB55BFB85, 0xB55CFB85, 0xB55DFB85, 0xB55EFB85, 0xB55FFB85, 0xB560FB85, 0xB561FB85, 0xB562FB85, 0xB563FB85, 0xB564FB85, 0xB565FB85, + 0xB566FB85, 0xB567FB85, 0xB568FB85, 0xB569FB85, 0xB56AFB85, 0xB56BFB85, 0xB56CFB85, 0xB56DFB85, 0xB56EFB85, 0xB56FFB85, 0xB570FB85, 0xB571FB85, 0xB572FB85, 0xB573FB85, 0xB574FB85, + 0xB575FB85, 0xB576FB85, 0xB577FB85, 0xB578FB85, 0xB579FB85, 0xB57AFB85, 0xB57BFB85, 0xB57CFB85, 0xB57DFB85, 0xB57EFB85, 0xB57FFB85, 0xB580FB85, 0xB581FB85, 0xB582FB85, 0xB583FB85, + 0xB584FB85, 0xB585FB85, 0xB586FB85, 0xB587FB85, 0xB588FB85, 0xB589FB85, 0xB58AFB85, 0xB58BFB85, 0xB58CFB85, 0xB58DFB85, 0xB58EFB85, 0xB58FFB85, 0xB590FB85, 0xB591FB85, 0xB592FB85, + 0xB593FB85, 0xB594FB85, 0xB595FB85, 0xB596FB85, 0xB597FB85, 0xB598FB85, 0xB599FB85, 0xB59AFB85, 0xB59BFB85, 0xB59CFB85, 0xB59DFB85, 0xB59EFB85, 0xB59FFB85, 0xB5A0FB85, 0xB5A1FB85, + 0xB5A2FB85, 0xB5A3FB85, 0xB5A4FB85, 0xB5A5FB85, 0xB5A6FB85, 0xB5A7FB85, 0xB5A8FB85, 0xB5A9FB85, 0xB5AAFB85, 0xB5ABFB85, 0xB5ACFB85, 0xB5ADFB85, 0xB5AEFB85, 0xB5AFFB85, 0xB5B0FB85, + 0xB5B1FB85, 0xB5B2FB85, 0xB5B3FB85, 0xB5B4FB85, 0xB5B5FB85, 0xB5B6FB85, 0xB5B7FB85, 0xB5B8FB85, 0xB5B9FB85, 0xB5BAFB85, 0xB5BBFB85, 0xB5BCFB85, 0xB5BDFB85, 0xB5BEFB85, 0xB5BFFB85, + 0xB5C0FB85, 0xB5C1FB85, 0xB5C2FB85, 0xB5C3FB85, 0xB5C4FB85, 0xB5C5FB85, 0xB5C6FB85, 0xB5C7FB85, 0xB5C8FB85, 0xB5C9FB85, 0xB5CAFB85, 0xB5CBFB85, 0xB5CCFB85, 0xB5CDFB85, 0xB5CEFB85, + 0xB5CFFB85, 0xB5D0FB85, 0xB5D1FB85, 0xB5D2FB85, 0xB5D3FB85, 0xB5D4FB85, 0xB5D5FB85, 0xB5D6FB85, 0xB5D7FB85, 0xB5D8FB85, 0xB5D9FB85, 0xB5DAFB85, 0xB5DBFB85, 0xB5DCFB85, 0xB5DDFB85, + 0xB5DEFB85, 0xB5DFFB85, 0xB5E0FB85, 0xB5E1FB85, 0xB5E2FB85, 0xB5E3FB85, 0xB5E4FB85, 0xB5E5FB85, 0xB5E6FB85, 0xB5E7FB85, 0xB5E8FB85, 0xB5E9FB85, 0xB5EAFB85, 0xB5EBFB85, 0xB5ECFB85, + 0xB5EDFB85, 0xB5EEFB85, 0xB5EFFB85, 0xB5F0FB85, 0xB5F1FB85, 0xB5F2FB85, 0xB5F3FB85, 0xB5F4FB85, 0xB5F5FB85, 0xB5F6FB85, 0xB5F7FB85, 0xB5F8FB85, 0xB5F9FB85, 0xB5FAFB85, 0xB5FBFB85, + 0xB5FCFB85, 0xB5FDFB85, 0xB5FEFB85, 0xB5FFFB85, 0xB600FB85, 0xB601FB85, 0xB602FB85, 0xB603FB85, 0xB604FB85, 0xB605FB85, 0xB606FB85, 0xB607FB85, 0xB608FB85, 0xB609FB85, 0xB60AFB85, + 0xB60BFB85, 0xB60CFB85, 0xB60DFB85, 0xB60EFB85, 0xB60FFB85, 0xB610FB85, 0xB611FB85, 0xB612FB85, 0xB613FB85, 0xB614FB85, 0xB615FB85, 0xB616FB85, 0xB617FB85, 0xB618FB85, 0xB619FB85, + 0xB61AFB85, 0xB61BFB85, 0xB61CFB85, 0xB61DFB85, 0xB61EFB85, 0xB61FFB85, 0xB620FB85, 0xB621FB85, 0xB622FB85, 0xB623FB85, 0xB624FB85, 0xB625FB85, 0xB626FB85, 0xB627FB85, 0xB628FB85, + 0xB629FB85, 0xB62AFB85, 0xB62BFB85, 0xB62CFB85, 0xB62DFB85, 0xB62EFB85, 0xB62FFB85, 0xB630FB85, 0xB631FB85, 0xB632FB85, 0xB633FB85, 0xB634FB85, 0xB635FB85, 0xB636FB85, 0xB637FB85, + 0xB638FB85, 0xB639FB85, 0xB63AFB85, 0xB63BFB85, 0xB63CFB85, 0xB63DFB85, 0xB63EFB85, 0xB63FFB85, 0xB640FB85, 0xB641FB85, 0xB642FB85, 0xB643FB85, 0xB644FB85, 0xB645FB85, 0xB646FB85, + 0xB647FB85, 0xB648FB85, 0xB649FB85, 0xB64AFB85, 0xB64BFB85, 0xB64CFB85, 0xB64DFB85, 0xB64EFB85, 0xB64FFB85, 0xB650FB85, 0xB651FB85, 0xB652FB85, 0xB653FB85, 0xB654FB85, 0xB655FB85, + 0xB656FB85, 0xB657FB85, 0xB658FB85, 0xB659FB85, 0xB65AFB85, 0xB65BFB85, 0xB65CFB85, 0xB65DFB85, 0xB65EFB85, 0xB65FFB85, 0xB660FB85, 0xB661FB85, 0xB662FB85, 0xB663FB85, 0xB664FB85, + 0xB665FB85, 0xB666FB85, 0xB667FB85, 0xB668FB85, 0xB669FB85, 0xB66AFB85, 0xB66BFB85, 0xB66CFB85, 0xB66DFB85, 0xB66EFB85, 0xB66FFB85, 0xB670FB85, 0xB671FB85, 0xB672FB85, 0xB673FB85, + 0xB674FB85, 0xB675FB85, 0xB676FB85, 0xB677FB85, 0xB678FB85, 0xB679FB85, 0xB67AFB85, 0xB67BFB85, 0xB67CFB85, 0xB67DFB85, 0xB67EFB85, 0xB67FFB85, 0xB680FB85, 0xB681FB85, 0xB682FB85, + 0xB683FB85, 0xB684FB85, 0xB685FB85, 0xB686FB85, 0xB687FB85, 0xB688FB85, 0xB689FB85, 0xB68AFB85, 0xB68BFB85, 0xB68CFB85, 0xB68DFB85, 0xB68EFB85, 0xB68FFB85, 0xB690FB85, 0xB691FB85, + 0xB692FB85, 0xB693FB85, 0xB694FB85, 0xB695FB85, 0xB696FB85, 0xB697FB85, 0xB698FB85, 0xB699FB85, 0xB69AFB85, 0xB69BFB85, 0xB69CFB85, 0xB69DFB85, 0xB69EFB85, 0xB69FFB85, 0xB6A0FB85, + 0xB6A1FB85, 0xB6A2FB85, 0xB6A3FB85, 0xB6A4FB85, 0xB6A5FB85, 0xB6A6FB85, 0xB6A7FB85, 0xB6A8FB85, 0xB6A9FB85, 0xB6AAFB85, 0xB6ABFB85, 0xB6ACFB85, 0xB6ADFB85, 0xB6AEFB85, 0xB6AFFB85, + 0xB6B0FB85, 0xB6B1FB85, 0xB6B2FB85, 0xB6B3FB85, 0xB6B4FB85, 0xB6B5FB85, 0xB6B6FB85, 0xB6B7FB85, 0xB6B8FB85, 0xB6B9FB85, 0xB6BAFB85, 0xB6BBFB85, 0xB6BCFB85, 0xB6BDFB85, 0xB6BEFB85, + 0xB6BFFB85, 0xB6C0FB85, 0xB6C1FB85, 0xB6C2FB85, 0xB6C3FB85, 0xB6C4FB85, 0xB6C5FB85, 0xB6C6FB85, 0xB6C7FB85, 0xB6C8FB85, 0xB6C9FB85, 0xB6CAFB85, 0xB6CBFB85, 0xB6CCFB85, 0xB6CDFB85, + 0xB6CEFB85, 0xB6CFFB85, 0xB6D0FB85, 0xB6D1FB85, 0xB6D2FB85, 0xB6D3FB85, 0xB6D4FB85, 0xB6D5FB85, 0xB6D6FB85, 0xB6D7FB85, 0xB6D8FB85, 0xB6D9FB85, 0xB6DAFB85, 0xB6DBFB85, 0xB6DCFB85, + 0xB6DDFB85, 0xB6DEFB85, 0xB6DFFB85, 0xB6E0FB85, 0xB6E1FB85, 0xB6E2FB85, 0xB6E3FB85, 0xB6E4FB85, 0xB6E5FB85, 0xB6E6FB85, 0xB6E7FB85, 0xB6E8FB85, 0xB6E9FB85, 0xB6EAFB85, 0xB6EBFB85, + 0xB6ECFB85, 0xB6EDFB85, 0xB6EEFB85, 0xB6EFFB85, 0xB6F0FB85, 0xB6F1FB85, 0xB6F2FB85, 0xB6F3FB85, 0xB6F4FB85, 0xB6F5FB85, 0xB6F6FB85, 0xB6F7FB85, 0xB6F8FB85, 0xB6F9FB85, 0xB6FAFB85, + 0xB6FBFB85, 0xB6FCFB85, 0xB6FDFB85, 0xB6FEFB85, 0xB6FFFB85, 0xB700FB85, 0xB701FB85, 0xB702FB85, 0xB703FB85, 0xB704FB85, 0xB705FB85, 0xB706FB85, 0xB707FB85, 0xB708FB85, 0xB709FB85, + 0xB70AFB85, 0xB70BFB85, 0xB70CFB85, 0xB70DFB85, 0xB70EFB85, 0xB70FFB85, 0xB710FB85, 0xB711FB85, 0xB712FB85, 0xB713FB85, 0xB714FB85, 0xB715FB85, 0xB716FB85, 0xB717FB85, 0xB718FB85, + 0xB719FB85, 0xB71AFB85, 0xB71BFB85, 0xB71CFB85, 0xB71DFB85, 0xB71EFB85, 0xB71FFB85, 0xB720FB85, 0xB721FB85, 0xB722FB85, 0xB723FB85, 0xB724FB85, 0xB725FB85, 0xB726FB85, 0xB727FB85, + 0xB728FB85, 0xB729FB85, 0xB72AFB85, 0xB72BFB85, 0xB72CFB85, 0xB72DFB85, 0xB72EFB85, 0xB72FFB85, 0xB730FB85, 0xB731FB85, 0xB732FB85, 0xB733FB85, 0xB734FB85, 0xB735FBC5, 0xB736FBC5, + 0xB737FBC5, 0xB738FBC5, 0xB739FBC5, 0xB73AFBC5, 0xB73BFBC5, 0xB73CFBC5, 0xB73DFBC5, 0xB73EFBC5, 0xB73FFBC5, 0xB740FB85, 0xB741FB85, 0xB742FB85, 0xB743FB85, 0xB744FB85, 0xB745FB85, + 0xB746FB85, 0xB747FB85, 0xB748FB85, 0xB749FB85, 0xB74AFB85, 0xB74BFB85, 0xB74CFB85, 0xB74DFB85, 0xB74EFB85, 0xB74FFB85, 0xB750FB85, 0xB751FB85, 0xB752FB85, 0xB753FB85, 0xB754FB85, + 0xB755FB85, 0xB756FB85, 0xB757FB85, 0xB758FB85, 0xB759FB85, 0xB75AFB85, 0xB75BFB85, 0xB75CFB85, 0xB75DFB85, 0xB75EFB85, 0xB75FFB85, 0xB760FB85, 0xB761FB85, 0xB762FB85, 0xB763FB85, + 0xB764FB85, 0xB765FB85, 0xB766FB85, 0xB767FB85, 0xB768FB85, 0xB769FB85, 0xB76AFB85, 0xB76BFB85, 0xB76CFB85, 0xB76DFB85, 0xB76EFB85, 0xB76FFB85, 0xB770FB85, 0xB771FB85, 0xB772FB85, + 0xB773FB85, 0xB774FB85, 0xB775FB85, 0xB776FB85, 0xB777FB85, 0xB778FB85, 0xB779FB85, 0xB77AFB85, 0xB77BFB85, 0xB77CFB85, 0xB77DFB85, 0xB77EFB85, 0xB77FFB85, 0xB780FB85, 0xB781FB85, + 0xB782FB85, 0xB783FB85, 0xB784FB85, 0xB785FB85, 0xB786FB85, 0xB787FB85, 0xB788FB85, 0xB789FB85, 0xB78AFB85, 0xB78BFB85, 0xB78CFB85, 0xB78DFB85, 0xB78EFB85, 0xB78FFB85, 0xB790FB85, + 0xB791FB85, 0xB792FB85, 0xB793FB85, 0xB794FB85, 0xB795FB85, 0xB796FB85, 0xB797FB85, 0xB798FB85, 0xB799FB85, 0xB79AFB85, 0xB79BFB85, 0xB79CFB85, 0xB79DFB85, 0xB79EFB85, 0xB79FFB85, + 0xB7A0FB85, 0xB7A1FB85, 0xB7A2FB85, 0xB7A3FB85, 0xB7A4FB85, 0xB7A5FB85, 0xB7A6FB85, 0xB7A7FB85, 0xB7A8FB85, 0xB7A9FB85, 0xB7AAFB85, 0xB7ABFB85, 0xB7ACFB85, 0xB7ADFB85, 0xB7AEFB85, + 0xB7AFFB85, 0xB7B0FB85, 0xB7B1FB85, 0xB7B2FB85, 0xB7B3FB85, 0xB7B4FB85, 0xB7B5FB85, 0xB7B6FB85, 0xB7B7FB85, 0xB7B8FB85, 0xB7B9FB85, 0xB7BAFB85, 0xB7BBFB85, 0xB7BCFB85, 0xB7BDFB85, + 0xB7BEFB85, 0xB7BFFB85, 0xB7C0FB85, 0xB7C1FB85, 0xB7C2FB85, 0xB7C3FB85, 0xB7C4FB85, 0xB7C5FB85, 0xB7C6FB85, 0xB7C7FB85, 0xB7C8FB85, 0xB7C9FB85, 0xB7CAFB85, 0xB7CBFB85, 0xB7CCFB85, + 0xB7CDFB85, 0xB7CEFB85, 0xB7CFFB85, 0xB7D0FB85, 0xB7D1FB85, 0xB7D2FB85, 0xB7D3FB85, 0xB7D4FB85, 0xB7D5FB85, 0xB7D6FB85, 0xB7D7FB85, 0xB7D8FB85, 0xB7D9FB85, 0xB7DAFB85, 0xB7DBFB85, + 0xB7DCFB85, 0xB7DDFB85, 0xB7DEFB85, 0xB7DFFB85, 0xB7E0FB85, 0xB7E1FB85, 0xB7E2FB85, 0xB7E3FB85, 0xB7E4FB85, 0xB7E5FB85, 0xB7E6FB85, 0xB7E7FB85, 0xB7E8FB85, 0xB7E9FB85, 0xB7EAFB85, + 0xB7EBFB85, 0xB7ECFB85, 0xB7EDFB85, 0xB7EEFB85, 0xB7EFFB85, 0xB7F0FB85, 0xB7F1FB85, 0xB7F2FB85, 0xB7F3FB85, 0xB7F4FB85, 0xB7F5FB85, 0xB7F6FB85, 0xB7F7FB85, 0xB7F8FB85, 0xB7F9FB85, + 0xB7FAFB85, 0xB7FBFB85, 0xB7FCFB85, 0xB7FDFB85, 0xB7FEFB85, 0xB7FFFB85, 0xB800FB85, 0xB801FB85, 0xB802FB85, 0xB803FB85, 0xB804FB85, 0xB805FB85, 0xB806FB85, 0xB807FB85, 0xB808FB85, + 0xB809FB85, 0xB80AFB85, 0xB80BFB85, 0xB80CFB85, 0xB80DFB85, 0xB80EFB85, 0xB80FFB85, 0xB810FB85, 0xB811FB85, 0xB812FB85, 0xB813FB85, 0xB814FB85, 0xB815FB85, 0xB816FB85, 0xB817FB85, + 0xB818FB85, 0xB819FB85, 0xB81AFB85, 0xB81BFB85, 0xB81CFB85, 0xB81DFB85, 0xB81EFBC5, 0xB81FFBC5, 0xB820FB85, 0xB821FB85, 0xB822FB85, 0xB823FB85, 0xB824FB85, 0xB825FB85, 0xB826FB85, + 0xB827FB85, 0xB828FB85, 0xB829FB85, 0xB82AFB85, 0xB82BFB85, 0xB82CFB85, 0xB82DFB85, 0xB82EFB85, 0xB82FFB85, 0xB830FB85, 0xB831FB85, 0xB832FB85, 0xB833FB85, 0xB834FB85, 0xB835FB85, + 0xB836FB85, 0xB837FB85, 0xB838FB85, 0xB839FB85, 0xB83AFB85, 0xB83BFB85, 0xB83CFB85, 0xB83DFB85, 0xB83EFB85, 0xB83FFB85, 0xB840FB85, 0xB841FB85, 0xB842FB85, 0xB843FB85, 0xB844FB85, + 0xB845FB85, 0xB846FB85, 0xB847FB85, 0xB848FB85, 0xB849FB85, 0xB84AFB85, 0xB84BFB85, 0xB84CFB85, 0xB84DFB85, 0xB84EFB85, 0xB84FFB85, 0xB850FB85, 0xB851FB85, 0xB852FB85, 0xB853FB85, + 0xB854FB85, 0xB855FB85, 0xB856FB85, 0xB857FB85, 0xB858FB85, 0xB859FB85, 0xB85AFB85, 0xB85BFB85, 0xB85CFB85, 0xB85DFB85, 0xB85EFB85, 0xB85FFB85, 0xB860FB85, 0xB861FB85, 0xB862FB85, + 0xB863FB85, 0xB864FB85, 0xB865FB85, 0xB866FB85, 0xB867FB85, 0xB868FB85, 0xB869FB85, 0xB86AFB85, 0xB86BFB85, 0xB86CFB85, 0xB86DFB85, 0xB86EFB85, 0xB86FFB85, 0xB870FB85, 0xB871FB85, + 0xB872FB85, 0xB873FB85, 0xB874FB85, 0xB875FB85, 0xB876FB85, 0xB877FB85, 0xB878FB85, 0xB879FB85, 0xB87AFB85, 0xB87BFB85, 0xB87CFB85, 0xB87DFB85, 0xB87EFB85, 0xB87FFB85, 0xB880FB85, + 0xB881FB85, 0xB882FB85, 0xB883FB85, 0xB884FB85, 0xB885FB85, 0xB886FB85, 0xB887FB85, 0xB888FB85, 0xB889FB85, 0xB88AFB85, 0xB88BFB85, 0xB88CFB85, 0xB88DFB85, 0xB88EFB85, 0xB88FFB85, + 0xB890FB85, 0xB891FB85, 0xB892FB85, 0xB893FB85, 0xB894FB85, 0xB895FB85, 0xB896FB85, 0xB897FB85, 0xB898FB85, 0xB899FB85, 0xB89AFB85, 0xB89BFB85, 0xB89CFB85, 0xB89DFB85, 0xB89EFB85, + 0xB89FFB85, 0xB8A0FB85, 0xB8A1FB85, 0xB8A2FB85, 0xB8A3FB85, 0xB8A4FB85, 0xB8A5FB85, 0xB8A6FB85, 0xB8A7FB85, 0xB8A8FB85, 0xB8A9FB85, 0xB8AAFB85, 0xB8ABFB85, 0xB8ACFB85, 0xB8ADFB85, + 0xB8AEFB85, 0xB8AFFB85, 0xB8B0FB85, 0xB8B1FB85, 0xB8B2FB85, 0xB8B3FB85, 0xB8B4FB85, 0xB8B5FB85, 0xB8B6FB85, 0xB8B7FB85, 0xB8B8FB85, 0xB8B9FB85, 0xB8BAFB85, 0xB8BBFB85, 0xB8BCFB85, + 0xB8BDFB85, 0xB8BEFB85, 0xB8BFFB85, 0xB8C0FB85, 0xB8C1FB85, 0xB8C2FB85, 0xB8C3FB85, 0xB8C4FB85, 0xB8C5FB85, 0xB8C6FB85, 0xB8C7FB85, 0xB8C8FB85, 0xB8C9FB85, 0xB8CAFB85, 0xB8CBFB85, + 0xB8CCFB85, 0xB8CDFB85, 0xB8CEFB85, 0xB8CFFB85, 0xB8D0FB85, 0xB8D1FB85, 0xB8D2FB85, 0xB8D3FB85, 0xB8D4FB85, 0xB8D5FB85, 0xB8D6FB85, 0xB8D7FB85, 0xB8D8FB85, 0xB8D9FB85, 0xB8DAFB85, + 0xB8DBFB85, 0xB8DCFB85, 0xB8DDFB85, 0xB8DEFB85, 0xB8DFFB85, 0xB8E0FB85, 0xB8E1FB85, 0xB8E2FB85, 0xB8E3FB85, 0xB8E4FB85, 0xB8E5FB85, 0xB8E6FB85, 0xB8E7FB85, 0xB8E8FB85, 0xB8E9FB85, + 0xB8EAFB85, 0xB8EBFB85, 0xB8ECFB85, 0xB8EDFB85, 0xB8EEFB85, 0xB8EFFB85, 0xB8F0FB85, 0xB8F1FB85, 0xB8F2FB85, 0xB8F3FB85, 0xB8F4FB85, 0xB8F5FB85, 0xB8F6FB85, 0xB8F7FB85, 0xB8F8FB85, + 0xB8F9FB85, 0xB8FAFB85, 0xB8FBFB85, 0xB8FCFB85, 0xB8FDFB85, 0xB8FEFB85, 0xB8FFFB85, 0xB900FB85, 0xB901FB85, 0xB902FB85, 0xB903FB85, 0xB904FB85, 0xB905FB85, 0xB906FB85, 0xB907FB85, + 0xB908FB85, 0xB909FB85, 0xB90AFB85, 0xB90BFB85, 0xB90CFB85, 0xB90DFB85, 0xB90EFB85, 0xB90FFB85, 0xB910FB85, 0xB911FB85, 0xB912FB85, 0xB913FB85, 0xB914FB85, 0xB915FB85, 0xB916FB85, + 0xB917FB85, 0xB918FB85, 0xB919FB85, 0xB91AFB85, 0xB91BFB85, 0xB91CFB85, 0xB91DFB85, 0xB91EFB85, 0xB91FFB85, 0xB920FB85, 0xB921FB85, 0xB922FB85, 0xB923FB85, 0xB924FB85, 0xB925FB85, + 0xB926FB85, 0xB927FB85, 0xB928FB85, 0xB929FB85, 0xB92AFB85, 0xB92BFB85, 0xB92CFB85, 0xB92DFB85, 0xB92EFB85, 0xB92FFB85, 0xB930FB85, 0xB931FB85, 0xB932FB85, 0xB933FB85, 0xB934FB85, + 0xB935FB85, 0xB936FB85, 0xB937FB85, 0xB938FB85, 0xB939FB85, 0xB93AFB85, 0xB93BFB85, 0xB93CFB85, 0xB93DFB85, 0xB93EFB85, 0xB93FFB85, 0xB940FB85, 0xB941FB85, 0xB942FB85, 0xB943FB85, + 0xB944FB85, 0xB945FB85, 0xB946FB85, 0xB947FB85, 0xB948FB85, 0xB949FB85, 0xB94AFB85, 0xB94BFB85, 0xB94CFB85, 0xB94DFB85, 0xB94EFB85, 0xB94FFB85, 0xB950FB85, 0xB951FB85, 0xB952FB85, + 0xB953FB85, 0xB954FB85, 0xB955FB85, 0xB956FB85, 0xB957FB85, 0xB958FB85, 0xB959FB85, 0xB95AFB85, 0xB95BFB85, 0xB95CFB85, 0xB95DFB85, 0xB95EFB85, 0xB95FFB85, 0xB960FB85, 0xB961FB85, + 0xB962FB85, 0xB963FB85, 0xB964FB85, 0xB965FB85, 0xB966FB85, 0xB967FB85, 0xB968FB85, 0xB969FB85, 0xB96AFB85, 0xB96BFB85, 0xB96CFB85, 0xB96DFB85, 0xB96EFB85, 0xB96FFB85, 0xB970FB85, + 0xB971FB85, 0xB972FB85, 0xB973FB85, 0xB974FB85, 0xB975FB85, 0xB976FB85, 0xB977FB85, 0xB978FB85, 0xB979FB85, 0xB97AFB85, 0xB97BFB85, 0xB97CFB85, 0xB97DFB85, 0xB97EFB85, 0xB97FFB85, + 0xB980FB85, 0xB981FB85, 0xB982FB85, 0xB983FB85, 0xB984FB85, 0xB985FB85, 0xB986FB85, 0xB987FB85, 0xB988FB85, 0xB989FB85, 0xB98AFB85, 0xB98BFB85, 0xB98CFB85, 0xB98DFB85, 0xB98EFB85, + 0xB98FFB85, 0xB990FB85, 0xB991FB85, 0xB992FB85, 0xB993FB85, 0xB994FB85, 0xB995FB85, 0xB996FB85, 0xB997FB85, 0xB998FB85, 0xB999FB85, 0xB99AFB85, 0xB99BFB85, 0xB99CFB85, 0xB99DFB85, + 0xB99EFB85, 0xB99FFB85, 0xB9A0FB85, 0xB9A1FB85, 0xB9A2FB85, 0xB9A3FB85, 0xB9A4FB85, 0xB9A5FB85, 0xB9A6FB85, 0xB9A7FB85, 0xB9A8FB85, 0xB9A9FB85, 0xB9AAFB85, 0xB9ABFB85, 0xB9ACFB85, + 0xB9ADFB85, 0xB9AEFB85, 0xB9AFFB85, 0xB9B0FB85, 0xB9B1FB85, 0xB9B2FB85, 0xB9B3FB85, 0xB9B4FB85, 0xB9B5FB85, 0xB9B6FB85, 0xB9B7FB85, 0xB9B8FB85, 0xB9B9FB85, 0xB9BAFB85, 0xB9BBFB85, + 0xB9BCFB85, 0xB9BDFB85, 0xB9BEFB85, 0xB9BFFB85, 0xB9C0FB85, 0xB9C1FB85, 0xB9C2FB85, 0xB9C3FB85, 0xB9C4FB85, 0xB9C5FB85, 0xB9C6FB85, 0xB9C7FB85, 0xB9C8FB85, 0xB9C9FB85, 0xB9CAFB85, + 0xB9CBFB85, 0xB9CCFB85, 0xB9CDFB85, 0xB9CEFB85, 0xB9CFFB85, 0xB9D0FB85, 0xB9D1FB85, 0xB9D2FB85, 0xB9D3FB85, 0xB9D4FB85, 0xB9D5FB85, 0xB9D6FB85, 0xB9D7FB85, 0xB9D8FB85, 0xB9D9FB85, + 0xB9DAFB85, 0xB9DBFB85, 0xB9DCFB85, 0xB9DDFB85, 0xB9DEFB85, 0xB9DFFB85, 0xB9E0FB85, 0xB9E1FB85, 0xB9E2FB85, 0xB9E3FB85, 0xB9E4FB85, 0xB9E5FB85, 0xB9E6FB85, 0xB9E7FB85, 0xB9E8FB85, + 0xB9E9FB85, 0xB9EAFB85, 0xB9EBFB85, 0xB9ECFB85, 0xB9EDFB85, 0xB9EEFB85, 0xB9EFFB85, 0xB9F0FB85, 0xB9F1FB85, 0xB9F2FB85, 0xB9F3FB85, 0xB9F4FB85, 0xB9F5FB85, 0xB9F6FB85, 0xB9F7FB85, + 0xB9F8FB85, 0xB9F9FB85, 0xB9FAFB85, 0xB9FBFB85, 0xB9FCFB85, 0xB9FDFB85, 0xB9FEFB85, 0xB9FFFB85, 0xBA00FB85, 0xBA01FB85, 0xBA02FB85, 0xBA03FB85, 0xBA04FB85, 0xBA05FB85, 0xBA06FB85, + 0xBA07FB85, 0xBA08FB85, 0xBA09FB85, 0xBA0AFB85, 0xBA0BFB85, 0xBA0CFB85, 0xBA0DFB85, 0xBA0EFB85, 0xBA0FFB85, 0xBA10FB85, 0xBA11FB85, 0xBA12FB85, 0xBA13FB85, 0xBA14FB85, 0xBA15FB85, + 0xBA16FB85, 0xBA17FB85, 0xBA18FB85, 0xBA19FB85, 0xBA1AFB85, 0xBA1BFB85, 0xBA1CFB85, 0xBA1DFB85, 0xBA1EFB85, 0xBA1FFB85, 0xBA20FB85, 0xBA21FB85, 0xBA22FB85, 0xBA23FB85, 0xBA24FB85, + 0xBA25FB85, 0xBA26FB85, 0xBA27FB85, 0xBA28FB85, 0xBA29FB85, 0xBA2AFB85, 0xBA2BFB85, 0xBA2CFB85, 0xBA2DFB85, 0xBA2EFB85, 0xBA2FFB85, 0xBA30FB85, 0xBA31FB85, 0xBA32FB85, 0xBA33FB85, + 0xBA34FB85, 0xBA35FB85, 0xBA36FB85, 0xBA37FB85, 0xBA38FB85, 0xBA39FB85, 0xBA3AFB85, 0xBA3BFB85, 0xBA3CFB85, 0xBA3DFB85, 0xBA3EFB85, 0xBA3FFB85, 0xBA40FB85, 0xBA41FB85, 0xBA42FB85, + 0xBA43FB85, 0xBA44FB85, 0xBA45FB85, 0xBA46FB85, 0xBA47FB85, 0xBA48FB85, 0xBA49FB85, 0xBA4AFB85, 0xBA4BFB85, 0xBA4CFB85, 0xBA4DFB85, 0xBA4EFB85, 0xBA4FFB85, 0xBA50FB85, 0xBA51FB85, + 0xBA52FB85, 0xBA53FB85, 0xBA54FB85, 0xBA55FB85, 0xBA56FB85, 0xBA57FB85, 0xBA58FB85, 0xBA59FB85, 0xBA5AFB85, 0xBA5BFB85, 0xBA5CFB85, 0xBA5DFB85, 0xBA5EFB85, 0xBA5FFB85, 0xBA60FB85, + 0xBA61FB85, 0xBA62FB85, 0xBA63FB85, 0xBA64FB85, 0xBA65FB85, 0xBA66FB85, 0xBA67FB85, 0xBA68FB85, 0xBA69FB85, 0xBA6AFB85, 0xBA6BFB85, 0xBA6CFB85, 0xBA6DFB85, 0xBA6EFB85, 0xBA6FFB85, + 0xBA70FB85, 0xBA71FB85, 0xBA72FB85, 0xBA73FB85, 0xBA74FB85, 0xBA75FB85, 0xBA76FB85, 0xBA77FB85, 0xBA78FB85, 0xBA79FB85, 0xBA7AFB85, 0xBA7BFB85, 0xBA7CFB85, 0xBA7DFB85, 0xBA7EFB85, + 0xBA7FFB85, 0xBA80FB85, 0xBA81FB85, 0xBA82FB85, 0xBA83FB85, 0xBA84FB85, 0xBA85FB85, 0xBA86FB85, 0xBA87FB85, 0xBA88FB85, 0xBA89FB85, 0xBA8AFB85, 0xBA8BFB85, 0xBA8CFB85, 0xBA8DFB85, + 0xBA8EFB85, 0xBA8FFB85, 0xBA90FB85, 0xBA91FB85, 0xBA92FB85, 0xBA93FB85, 0xBA94FB85, 0xBA95FB85, 0xBA96FB85, 0xBA97FB85, 0xBA98FB85, 0xBA99FB85, 0xBA9AFB85, 0xBA9BFB85, 0xBA9CFB85, + 0xBA9DFB85, 0xBA9EFB85, 0xBA9FFB85, 0xBAA0FB85, 0xBAA1FB85, 0xBAA2FB85, 0xBAA3FB85, 0xBAA4FB85, 0xBAA5FB85, 0xBAA6FB85, 0xBAA7FB85, 0xBAA8FB85, 0xBAA9FB85, 0xBAAAFB85, 0xBAABFB85, + 0xBAACFB85, 0xBAADFB85, 0xBAAEFB85, 0xBAAFFB85, 0xBAB0FB85, 0xBAB1FB85, 0xBAB2FB85, 0xBAB3FB85, 0xBAB4FB85, 0xBAB5FB85, 0xBAB6FB85, 0xBAB7FB85, 0xBAB8FB85, 0xBAB9FB85, 0xBABAFB85, + 0xBABBFB85, 0xBABCFB85, 0xBABDFB85, 0xBABEFB85, 0xBABFFB85, 0xBAC0FB85, 0xBAC1FB85, 0xBAC2FB85, 0xBAC3FB85, 0xBAC4FB85, 0xBAC5FB85, 0xBAC6FB85, 0xBAC7FB85, 0xBAC8FB85, 0xBAC9FB85, + 0xBACAFB85, 0xBACBFB85, 0xBACCFB85, 0xBACDFB85, 0xBACEFB85, 0xBACFFB85, 0xBAD0FB85, 0xBAD1FB85, 0xBAD2FB85, 0xBAD3FB85, 0xBAD4FB85, 0xBAD5FB85, 0xBAD6FB85, 0xBAD7FB85, 0xBAD8FB85, + 0xBAD9FB85, 0xBADAFB85, 0xBADBFB85, 0xBADCFB85, 0xBADDFB85, 0xBADEFB85, 0xBADFFB85, 0xBAE0FB85, 0xBAE1FB85, 0xBAE2FB85, 0xBAE3FB85, 0xBAE4FB85, 0xBAE5FB85, 0xBAE6FB85, 0xBAE7FB85, + 0xBAE8FB85, 0xBAE9FB85, 0xBAEAFB85, 0xBAEBFB85, 0xBAECFB85, 0xBAEDFB85, 0xBAEEFB85, 0xBAEFFB85, 0xBAF0FB85, 0xBAF1FB85, 0xBAF2FB85, 0xBAF3FB85, 0xBAF4FB85, 0xBAF5FB85, 0xBAF6FB85, + 0xBAF7FB85, 0xBAF8FB85, 0xBAF9FB85, 0xBAFAFB85, 0xBAFBFB85, 0xBAFCFB85, 0xBAFDFB85, 0xBAFEFB85, 0xBAFFFB85, 0xBB00FB85, 0xBB01FB85, 0xBB02FB85, 0xBB03FB85, 0xBB04FB85, 0xBB05FB85, + 0xBB06FB85, 0xBB07FB85, 0xBB08FB85, 0xBB09FB85, 0xBB0AFB85, 0xBB0BFB85, 0xBB0CFB85, 0xBB0DFB85, 0xBB0EFB85, 0xBB0FFB85, 0xBB10FB85, 0xBB11FB85, 0xBB12FB85, 0xBB13FB85, 0xBB14FB85, + 0xBB15FB85, 0xBB16FB85, 0xBB17FB85, 0xBB18FB85, 0xBB19FB85, 0xBB1AFB85, 0xBB1BFB85, 0xBB1CFB85, 0xBB1DFB85, 0xBB1EFB85, 0xBB1FFB85, 0xBB20FB85, 0xBB21FB85, 0xBB22FB85, 0xBB23FB85, + 0xBB24FB85, 0xBB25FB85, 0xBB26FB85, 0xBB27FB85, 0xBB28FB85, 0xBB29FB85, 0xBB2AFB85, 0xBB2BFB85, 0xBB2CFB85, 0xBB2DFB85, 0xBB2EFB85, 0xBB2FFB85, 0xBB30FB85, 0xBB31FB85, 0xBB32FB85, + 0xBB33FB85, 0xBB34FB85, 0xBB35FB85, 0xBB36FB85, 0xBB37FB85, 0xBB38FB85, 0xBB39FB85, 0xBB3AFB85, 0xBB3BFB85, 0xBB3CFB85, 0xBB3DFB85, 0xBB3EFB85, 0xBB3FFB85, 0xBB40FB85, 0xBB41FB85, + 0xBB42FB85, 0xBB43FB85, 0xBB44FB85, 0xBB45FB85, 0xBB46FB85, 0xBB47FB85, 0xBB48FB85, 0xBB49FB85, 0xBB4AFB85, 0xBB4BFB85, 0xBB4CFB85, 0xBB4DFB85, 0xBB4EFB85, 0xBB4FFB85, 0xBB50FB85, + 0xBB51FB85, 0xBB52FB85, 0xBB53FB85, 0xBB54FB85, 0xBB55FB85, 0xBB56FB85, 0xBB57FB85, 0xBB58FB85, 0xBB59FB85, 0xBB5AFB85, 0xBB5BFB85, 0xBB5CFB85, 0xBB5DFB85, 0xBB5EFB85, 0xBB5FFB85, + 0xBB60FB85, 0xBB61FB85, 0xBB62FB85, 0xBB63FB85, 0xBB64FB85, 0xBB65FB85, 0xBB66FB85, 0xBB67FB85, 0xBB68FB85, 0xBB69FB85, 0xBB6AFB85, 0xBB6BFB85, 0xBB6CFB85, 0xBB6DFB85, 0xBB6EFB85, + 0xBB6FFB85, 0xBB70FB85, 0xBB71FB85, 0xBB72FB85, 0xBB73FB85, 0xBB74FB85, 0xBB75FB85, 0xBB76FB85, 0xBB77FB85, 0xBB78FB85, 0xBB79FB85, 0xBB7AFB85, 0xBB7BFB85, 0xBB7CFB85, 0xBB7DFB85, + 0xBB7EFB85, 0xBB7FFB85, 0xBB80FB85, 0xBB81FB85, 0xBB82FB85, 0xBB83FB85, 0xBB84FB85, 0xBB85FB85, 0xBB86FB85, 0xBB87FB85, 0xBB88FB85, 0xBB89FB85, 0xBB8AFB85, 0xBB8BFB85, 0xBB8CFB85, + 0xBB8DFB85, 0xBB8EFB85, 0xBB8FFB85, 0xBB90FB85, 0xBB91FB85, 0xBB92FB85, 0xBB93FB85, 0xBB94FB85, 0xBB95FB85, 0xBB96FB85, 0xBB97FB85, 0xBB98FB85, 0xBB99FB85, 0xBB9AFB85, 0xBB9BFB85, + 0xBB9CFB85, 0xBB9DFB85, 0xBB9EFB85, 0xBB9FFB85, 0xBBA0FB85, 0xBBA1FB85, 0xBBA2FB85, 0xBBA3FB85, 0xBBA4FB85, 0xBBA5FB85, 0xBBA6FB85, 0xBBA7FB85, 0xBBA8FB85, 0xBBA9FB85, 0xBBAAFB85, + 0xBBABFB85, 0xBBACFB85, 0xBBADFB85, 0xBBAEFB85, 0xBBAFFB85, 0xBBB0FB85, 0xBBB1FB85, 0xBBB2FB85, 0xBBB3FB85, 0xBBB4FB85, 0xBBB5FB85, 0xBBB6FB85, 0xBBB7FB85, 0xBBB8FB85, 0xBBB9FB85, + 0xBBBAFB85, 0xBBBBFB85, 0xBBBCFB85, 0xBBBDFB85, 0xBBBEFB85, 0xBBBFFB85, 0xBBC0FB85, 0xBBC1FB85, 0xBBC2FB85, 0xBBC3FB85, 0xBBC4FB85, 0xBBC5FB85, 0xBBC6FB85, 0xBBC7FB85, 0xBBC8FB85, + 0xBBC9FB85, 0xBBCAFB85, 0xBBCBFB85, 0xBBCCFB85, 0xBBCDFB85, 0xBBCEFB85, 0xBBCFFB85, 0xBBD0FB85, 0xBBD1FB85, 0xBBD2FB85, 0xBBD3FB85, 0xBBD4FB85, 0xBBD5FB85, 0xBBD6FB85, 0xBBD7FB85, + 0xBBD8FB85, 0xBBD9FB85, 0xBBDAFB85, 0xBBDBFB85, 0xBBDCFB85, 0xBBDDFB85, 0xBBDEFB85, 0xBBDFFB85, 0xBBE0FB85, 0xBBE1FB85, 0xBBE2FB85, 0xBBE3FB85, 0xBBE4FB85, 0xBBE5FB85, 0xBBE6FB85, + 0xBBE7FB85, 0xBBE8FB85, 0xBBE9FB85, 0xBBEAFB85, 0xBBEBFB85, 0xBBECFB85, 0xBBEDFB85, 0xBBEEFB85, 0xBBEFFB85, 0xBBF0FB85, 0xBBF1FB85, 0xBBF2FB85, 0xBBF3FB85, 0xBBF4FB85, 0xBBF5FB85, + 0xBBF6FB85, 0xBBF7FB85, 0xBBF8FB85, 0xBBF9FB85, 0xBBFAFB85, 0xBBFBFB85, 0xBBFCFB85, 0xBBFDFB85, 0xBBFEFB85, 0xBBFFFB85, 0xBC00FB85, 0xBC01FB85, 0xBC02FB85, 0xBC03FB85, 0xBC04FB85, + 0xBC05FB85, 0xBC06FB85, 0xBC07FB85, 0xBC08FB85, 0xBC09FB85, 0xBC0AFB85, 0xBC0BFB85, 0xBC0CFB85, 0xBC0DFB85, 0xBC0EFB85, 0xBC0FFB85, 0xBC10FB85, 0xBC11FB85, 0xBC12FB85, 0xBC13FB85, + 0xBC14FB85, 0xBC15FB85, 0xBC16FB85, 0xBC17FB85, 0xBC18FB85, 0xBC19FB85, 0xBC1AFB85, 0xBC1BFB85, 0xBC1CFB85, 0xBC1DFB85, 0xBC1EFB85, 0xBC1FFB85, 0xBC20FB85, 0xBC21FB85, 0xBC22FB85, + 0xBC23FB85, 0xBC24FB85, 0xBC25FB85, 0xBC26FB85, 0xBC27FB85, 0xBC28FB85, 0xBC29FB85, 0xBC2AFB85, 0xBC2BFB85, 0xBC2CFB85, 0xBC2DFB85, 0xBC2EFB85, 0xBC2FFB85, 0xBC30FB85, 0xBC31FB85, + 0xBC32FB85, 0xBC33FB85, 0xBC34FB85, 0xBC35FB85, 0xBC36FB85, 0xBC37FB85, 0xBC38FB85, 0xBC39FB85, 0xBC3AFB85, 0xBC3BFB85, 0xBC3CFB85, 0xBC3DFB85, 0xBC3EFB85, 0xBC3FFB85, 0xBC40FB85, + 0xBC41FB85, 0xBC42FB85, 0xBC43FB85, 0xBC44FB85, 0xBC45FB85, 0xBC46FB85, 0xBC47FB85, 0xBC48FB85, 0xBC49FB85, 0xBC4AFB85, 0xBC4BFB85, 0xBC4CFB85, 0xBC4DFB85, 0xBC4EFB85, 0xBC4FFB85, + 0xBC50FB85, 0xBC51FB85, 0xBC52FB85, 0xBC53FB85, 0xBC54FB85, 0xBC55FB85, 0xBC56FB85, 0xBC57FB85, 0xBC58FB85, 0xBC59FB85, 0xBC5AFB85, 0xBC5BFB85, 0xBC5CFB85, 0xBC5DFB85, 0xBC5EFB85, + 0xBC5FFB85, 0xBC60FB85, 0xBC61FB85, 0xBC62FB85, 0xBC63FB85, 0xBC64FB85, 0xBC65FB85, 0xBC66FB85, 0xBC67FB85, 0xBC68FB85, 0xBC69FB85, 0xBC6AFB85, 0xBC6BFB85, 0xBC6CFB85, 0xBC6DFB85, + 0xBC6EFB85, 0xBC6FFB85, 0xBC70FB85, 0xBC71FB85, 0xBC72FB85, 0xBC73FB85, 0xBC74FB85, 0xBC75FB85, 0xBC76FB85, 0xBC77FB85, 0xBC78FB85, 0xBC79FB85, 0xBC7AFB85, 0xBC7BFB85, 0xBC7CFB85, + 0xBC7DFB85, 0xBC7EFB85, 0xBC7FFB85, 0xBC80FB85, 0xBC81FB85, 0xBC82FB85, 0xBC83FB85, 0xBC84FB85, 0xBC85FB85, 0xBC86FB85, 0xBC87FB85, 0xBC88FB85, 0xBC89FB85, 0xBC8AFB85, 0xBC8BFB85, + 0xBC8CFB85, 0xBC8DFB85, 0xBC8EFB85, 0xBC8FFB85, 0xBC90FB85, 0xBC91FB85, 0xBC92FB85, 0xBC93FB85, 0xBC94FB85, 0xBC95FB85, 0xBC96FB85, 0xBC97FB85, 0xBC98FB85, 0xBC99FB85, 0xBC9AFB85, + 0xBC9BFB85, 0xBC9CFB85, 0xBC9DFB85, 0xBC9EFB85, 0xBC9FFB85, 0xBCA0FB85, 0xBCA1FB85, 0xBCA2FB85, 0xBCA3FB85, 0xBCA4FB85, 0xBCA5FB85, 0xBCA6FB85, 0xBCA7FB85, 0xBCA8FB85, 0xBCA9FB85, + 0xBCAAFB85, 0xBCABFB85, 0xBCACFB85, 0xBCADFB85, 0xBCAEFB85, 0xBCAFFB85, 0xBCB0FB85, 0xBCB1FB85, 0xBCB2FB85, 0xBCB3FB85, 0xBCB4FB85, 0xBCB5FB85, 0xBCB6FB85, 0xBCB7FB85, 0xBCB8FB85, + 0xBCB9FB85, 0xBCBAFB85, 0xBCBBFB85, 0xBCBCFB85, 0xBCBDFB85, 0xBCBEFB85, 0xBCBFFB85, 0xBCC0FB85, 0xBCC1FB85, 0xBCC2FB85, 0xBCC3FB85, 0xBCC4FB85, 0xBCC5FB85, 0xBCC6FB85, 0xBCC7FB85, + 0xBCC8FB85, 0xBCC9FB85, 0xBCCAFB85, 0xBCCBFB85, 0xBCCCFB85, 0xBCCDFB85, 0xBCCEFB85, 0xBCCFFB85, 0xBCD0FB85, 0xBCD1FB85, 0xBCD2FB85, 0xBCD3FB85, 0xBCD4FB85, 0xBCD5FB85, 0xBCD6FB85, + 0xBCD7FB85, 0xBCD8FB85, 0xBCD9FB85, 0xBCDAFB85, 0xBCDBFB85, 0xBCDCFB85, 0xBCDDFB85, 0xBCDEFB85, 0xBCDFFB85, 0xBCE0FB85, 0xBCE1FB85, 0xBCE2FB85, 0xBCE3FB85, 0xBCE4FB85, 0xBCE5FB85, + 0xBCE6FB85, 0xBCE7FB85, 0xBCE8FB85, 0xBCE9FB85, 0xBCEAFB85, 0xBCEBFB85, 0xBCECFB85, 0xBCEDFB85, 0xBCEEFB85, 0xBCEFFB85, 0xBCF0FB85, 0xBCF1FB85, 0xBCF2FB85, 0xBCF3FB85, 0xBCF4FB85, + 0xBCF5FB85, 0xBCF6FB85, 0xBCF7FB85, 0xBCF8FB85, 0xBCF9FB85, 0xBCFAFB85, 0xBCFBFB85, 0xBCFCFB85, 0xBCFDFB85, 0xBCFEFB85, 0xBCFFFB85, 0xBD00FB85, 0xBD01FB85, 0xBD02FB85, 0xBD03FB85, + 0xBD04FB85, 0xBD05FB85, 0xBD06FB85, 0xBD07FB85, 0xBD08FB85, 0xBD09FB85, 0xBD0AFB85, 0xBD0BFB85, 0xBD0CFB85, 0xBD0DFB85, 0xBD0EFB85, 0xBD0FFB85, 0xBD10FB85, 0xBD11FB85, 0xBD12FB85, + 0xBD13FB85, 0xBD14FB85, 0xBD15FB85, 0xBD16FB85, 0xBD17FB85, 0xBD18FB85, 0xBD19FB85, 0xBD1AFB85, 0xBD1BFB85, 0xBD1CFB85, 0xBD1DFB85, 0xBD1EFB85, 0xBD1FFB85, 0xBD20FB85, 0xBD21FB85, + 0xBD22FB85, 0xBD23FB85, 0xBD24FB85, 0xBD25FB85, 0xBD26FB85, 0xBD27FB85, 0xBD28FB85, 0xBD29FB85, 0xBD2AFB85, 0xBD2BFB85, 0xBD2CFB85, 0xBD2DFB85, 0xBD2EFB85, 0xBD2FFB85, 0xBD30FB85, + 0xBD31FB85, 0xBD32FB85, 0xBD33FB85, 0xBD34FB85, 0xBD35FB85, 0xBD36FB85, 0xBD37FB85, 0xBD38FB85, 0xBD39FB85, 0xBD3AFB85, 0xBD3BFB85, 0xBD3CFB85, 0xBD3DFB85, 0xBD3EFB85, 0xBD3FFB85, + 0xBD40FB85, 0xBD41FB85, 0xBD42FB85, 0xBD43FB85, 0xBD44FB85, 0xBD45FB85, 0xBD46FB85, 0xBD47FB85, 0xBD48FB85, 0xBD49FB85, 0xBD4AFB85, 0xBD4BFB85, 0xBD4CFB85, 0xBD4DFB85, 0xBD4EFB85, + 0xBD4FFB85, 0xBD50FB85, 0xBD51FB85, 0xBD52FB85, 0xBD53FB85, 0xBD54FB85, 0xBD55FB85, 0xBD56FB85, 0xBD57FB85, 0xBD58FB85, 0xBD59FB85, 0xBD5AFB85, 0xBD5BFB85, 0xBD5CFB85, 0xBD5DFB85, + 0xBD5EFB85, 0xBD5FFB85, 0xBD60FB85, 0xBD61FB85, 0xBD62FB85, 0xBD63FB85, 0xBD64FB85, 0xBD65FB85, 0xBD66FB85, 0xBD67FB85, 0xBD68FB85, 0xBD69FB85, 0xBD6AFB85, 0xBD6BFB85, 0xBD6CFB85, + 0xBD6DFB85, 0xBD6EFB85, 0xBD6FFB85, 0xBD70FB85, 0xBD71FB85, 0xBD72FB85, 0xBD73FB85, 0xBD74FB85, 0xBD75FB85, 0xBD76FB85, 0xBD77FB85, 0xBD78FB85, 0xBD79FB85, 0xBD7AFB85, 0xBD7BFB85, + 0xBD7CFB85, 0xBD7DFB85, 0xBD7EFB85, 0xBD7FFB85, 0xBD80FB85, 0xBD81FB85, 0xBD82FB85, 0xBD83FB85, 0xBD84FB85, 0xBD85FB85, 0xBD86FB85, 0xBD87FB85, 0xBD88FB85, 0xBD89FB85, 0xBD8AFB85, + 0xBD8BFB85, 0xBD8CFB85, 0xBD8DFB85, 0xBD8EFB85, 0xBD8FFB85, 0xBD90FB85, 0xBD91FB85, 0xBD92FB85, 0xBD93FB85, 0xBD94FB85, 0xBD95FB85, 0xBD96FB85, 0xBD97FB85, 0xBD98FB85, 0xBD99FB85, + 0xBD9AFB85, 0xBD9BFB85, 0xBD9CFB85, 0xBD9DFB85, 0xBD9EFB85, 0xBD9FFB85, 0xBDA0FB85, 0xBDA1FB85, 0xBDA2FB85, 0xBDA3FB85, 0xBDA4FB85, 0xBDA5FB85, 0xBDA6FB85, 0xBDA7FB85, 0xBDA8FB85, + 0xBDA9FB85, 0xBDAAFB85, 0xBDABFB85, 0xBDACFB85, 0xBDADFB85, 0xBDAEFB85, 0xBDAFFB85, 0xBDB0FB85, 0xBDB1FB85, 0xBDB2FB85, 0xBDB3FB85, 0xBDB4FB85, 0xBDB5FB85, 0xBDB6FB85, 0xBDB7FB85, + 0xBDB8FB85, 0xBDB9FB85, 0xBDBAFB85, 0xBDBBFB85, 0xBDBCFB85, 0xBDBDFB85, 0xBDBEFB85, 0xBDBFFB85, 0xBDC0FB85, 0xBDC1FB85, 0xBDC2FB85, 0xBDC3FB85, 0xBDC4FB85, 0xBDC5FB85, 0xBDC6FB85, + 0xBDC7FB85, 0xBDC8FB85, 0xBDC9FB85, 0xBDCAFB85, 0xBDCBFB85, 0xBDCCFB85, 0xBDCDFB85, 0xBDCEFB85, 0xBDCFFB85, 0xBDD0FB85, 0xBDD1FB85, 0xBDD2FB85, 0xBDD3FB85, 0xBDD4FB85, 0xBDD5FB85, + 0xBDD6FB85, 0xBDD7FB85, 0xBDD8FB85, 0xBDD9FB85, 0xBDDAFB85, 0xBDDBFB85, 0xBDDCFB85, 0xBDDDFB85, 0xBDDEFB85, 0xBDDFFB85, 0xBDE0FB85, 0xBDE1FB85, 0xBDE2FB85, 0xBDE3FB85, 0xBDE4FB85, + 0xBDE5FB85, 0xBDE6FB85, 0xBDE7FB85, 0xBDE8FB85, 0xBDE9FB85, 0xBDEAFB85, 0xBDEBFB85, 0xBDECFB85, 0xBDEDFB85, 0xBDEEFB85, 0xBDEFFB85, 0xBDF0FB85, 0xBDF1FB85, 0xBDF2FB85, 0xBDF3FB85, + 0xBDF4FB85, 0xBDF5FB85, 0xBDF6FB85, 0xBDF7FB85, 0xBDF8FB85, 0xBDF9FB85, 0xBDFAFB85, 0xBDFBFB85, 0xBDFCFB85, 0xBDFDFB85, 0xBDFEFB85, 0xBDFFFB85, 0xBE00FB85, 0xBE01FB85, 0xBE02FB85, + 0xBE03FB85, 0xBE04FB85, 0xBE05FB85, 0xBE06FB85, 0xBE07FB85, 0xBE08FB85, 0xBE09FB85, 0xBE0AFB85, 0xBE0BFB85, 0xBE0CFB85, 0xBE0DFB85, 0xBE0EFB85, 0xBE0FFB85, 0xBE10FB85, 0xBE11FB85, + 0xBE12FB85, 0xBE13FB85, 0xBE14FB85, 0xBE15FB85, 0xBE16FB85, 0xBE17FB85, 0xBE18FB85, 0xBE19FB85, 0xBE1AFB85, 0xBE1BFB85, 0xBE1CFB85, 0xBE1DFB85, 0xBE1EFB85, 0xBE1FFB85, 0xBE20FB85, + 0xBE21FB85, 0xBE22FB85, 0xBE23FB85, 0xBE24FB85, 0xBE25FB85, 0xBE26FB85, 0xBE27FB85, 0xBE28FB85, 0xBE29FB85, 0xBE2AFB85, 0xBE2BFB85, 0xBE2CFB85, 0xBE2DFB85, 0xBE2EFB85, 0xBE2FFB85, + 0xBE30FB85, 0xBE31FB85, 0xBE32FB85, 0xBE33FB85, 0xBE34FB85, 0xBE35FB85, 0xBE36FB85, 0xBE37FB85, 0xBE38FB85, 0xBE39FB85, 0xBE3AFB85, 0xBE3BFB85, 0xBE3CFB85, 0xBE3DFB85, 0xBE3EFB85, + 0xBE3FFB85, 0xBE40FB85, 0xBE41FB85, 0xBE42FB85, 0xBE43FB85, 0xBE44FB85, 0xBE45FB85, 0xBE46FB85, 0xBE47FB85, 0xBE48FB85, 0xBE49FB85, 0xBE4AFB85, 0xBE4BFB85, 0xBE4CFB85, 0xBE4DFB85, + 0xBE4EFB85, 0xBE4FFB85, 0xBE50FB85, 0xBE51FB85, 0xBE52FB85, 0xBE53FB85, 0xBE54FB85, 0xBE55FB85, 0xBE56FB85, 0xBE57FB85, 0xBE58FB85, 0xBE59FB85, 0xBE5AFB85, 0xBE5BFB85, 0xBE5CFB85, + 0xBE5DFB85, 0xBE5EFB85, 0xBE5FFB85, 0xBE60FB85, 0xBE61FB85, 0xBE62FB85, 0xBE63FB85, 0xBE64FB85, 0xBE65FB85, 0xBE66FB85, 0xBE67FB85, 0xBE68FB85, 0xBE69FB85, 0xBE6AFB85, 0xBE6BFB85, + 0xBE6CFB85, 0xBE6DFB85, 0xBE6EFB85, 0xBE6FFB85, 0xBE70FB85, 0xBE71FB85, 0xBE72FB85, 0xBE73FB85, 0xBE74FB85, 0xBE75FB85, 0xBE76FB85, 0xBE77FB85, 0xBE78FB85, 0xBE79FB85, 0xBE7AFB85, + 0xBE7BFB85, 0xBE7CFB85, 0xBE7DFB85, 0xBE7EFB85, 0xBE7FFB85, 0xBE80FB85, 0xBE81FB85, 0xBE82FB85, 0xBE83FB85, 0xBE84FB85, 0xBE85FB85, 0xBE86FB85, 0xBE87FB85, 0xBE88FB85, 0xBE89FB85, + 0xBE8AFB85, 0xBE8BFB85, 0xBE8CFB85, 0xBE8DFB85, 0xBE8EFB85, 0xBE8FFB85, 0xBE90FB85, 0xBE91FB85, 0xBE92FB85, 0xBE93FB85, 0xBE94FB85, 0xBE95FB85, 0xBE96FB85, 0xBE97FB85, 0xBE98FB85, + 0xBE99FB85, 0xBE9AFB85, 0xBE9BFB85, 0xBE9CFB85, 0xBE9DFB85, 0xBE9EFB85, 0xBE9FFB85, 0xBEA0FB85, 0xBEA1FB85, 0xBEA2FB85, 0xBEA3FB85, 0xBEA4FB85, 0xBEA5FB85, 0xBEA6FB85, 0xBEA7FB85, + 0xBEA8FB85, 0xBEA9FB85, 0xBEAAFB85, 0xBEABFB85, 0xBEACFB85, 0xBEADFB85, 0xBEAEFB85, 0xBEAFFB85, 0xBEB0FB85, 0xBEB1FB85, 0xBEB2FB85, 0xBEB3FB85, 0xBEB4FB85, 0xBEB5FB85, 0xBEB6FB85, + 0xBEB7FB85, 0xBEB8FB85, 0xBEB9FB85, 0xBEBAFB85, 0xBEBBFB85, 0xBEBCFB85, 0xBEBDFB85, 0xBEBEFB85, 0xBEBFFB85, 0xBEC0FB85, 0xBEC1FB85, 0xBEC2FB85, 0xBEC3FB85, 0xBEC4FB85, 0xBEC5FB85, + 0xBEC6FB85, 0xBEC7FB85, 0xBEC8FB85, 0xBEC9FB85, 0xBECAFB85, 0xBECBFB85, 0xBECCFB85, 0xBECDFB85, 0xBECEFB85, 0xBECFFB85, 0xBED0FB85, 0xBED1FB85, 0xBED2FB85, 0xBED3FB85, 0xBED4FB85, + 0xBED5FB85, 0xBED6FB85, 0xBED7FB85, 0xBED8FB85, 0xBED9FB85, 0xBEDAFB85, 0xBEDBFB85, 0xBEDCFB85, 0xBEDDFB85, 0xBEDEFB85, 0xBEDFFB85, 0xBEE0FB85, 0xBEE1FB85, 0xBEE2FB85, 0xBEE3FB85, + 0xBEE4FB85, 0xBEE5FB85, 0xBEE6FB85, 0xBEE7FB85, 0xBEE8FB85, 0xBEE9FB85, 0xBEEAFB85, 0xBEEBFB85, 0xBEECFB85, 0xBEEDFB85, 0xBEEEFB85, 0xBEEFFB85, 0xBEF0FB85, 0xBEF1FB85, 0xBEF2FB85, + 0xBEF3FB85, 0xBEF4FB85, 0xBEF5FB85, 0xBEF6FB85, 0xBEF7FB85, 0xBEF8FB85, 0xBEF9FB85, 0xBEFAFB85, 0xBEFBFB85, 0xBEFCFB85, 0xBEFDFB85, 0xBEFEFB85, 0xBEFFFB85, 0xBF00FB85, 0xBF01FB85, + 0xBF02FB85, 0xBF03FB85, 0xBF04FB85, 0xBF05FB85, 0xBF06FB85, 0xBF07FB85, 0xBF08FB85, 0xBF09FB85, 0xBF0AFB85, 0xBF0BFB85, 0xBF0CFB85, 0xBF0DFB85, 0xBF0EFB85, 0xBF0FFB85, 0xBF10FB85, + 0xBF11FB85, 0xBF12FB85, 0xBF13FB85, 0xBF14FB85, 0xBF15FB85, 0xBF16FB85, 0xBF17FB85, 0xBF18FB85, 0xBF19FB85, 0xBF1AFB85, 0xBF1BFB85, 0xBF1CFB85, 0xBF1DFB85, 0xBF1EFB85, 0xBF1FFB85, + 0xBF20FB85, 0xBF21FB85, 0xBF22FB85, 0xBF23FB85, 0xBF24FB85, 0xBF25FB85, 0xBF26FB85, 0xBF27FB85, 0xBF28FB85, 0xBF29FB85, 0xBF2AFB85, 0xBF2BFB85, 0xBF2CFB85, 0xBF2DFB85, 0xBF2EFB85, + 0xBF2FFB85, 0xBF30FB85, 0xBF31FB85, 0xBF32FB85, 0xBF33FB85, 0xBF34FB85, 0xBF35FB85, 0xBF36FB85, 0xBF37FB85, 0xBF38FB85, 0xBF39FB85, 0xBF3AFB85, 0xBF3BFB85, 0xBF3CFB85, 0xBF3DFB85, + 0xBF3EFB85, 0xBF3FFB85, 0xBF40FB85, 0xBF41FB85, 0xBF42FB85, 0xBF43FB85, 0xBF44FB85, 0xBF45FB85, 0xBF46FB85, 0xBF47FB85, 0xBF48FB85, 0xBF49FB85, 0xBF4AFB85, 0xBF4BFB85, 0xBF4CFB85, + 0xBF4DFB85, 0xBF4EFB85, 0xBF4FFB85, 0xBF50FB85, 0xBF51FB85, 0xBF52FB85, 0xBF53FB85, 0xBF54FB85, 0xBF55FB85, 0xBF56FB85, 0xBF57FB85, 0xBF58FB85, 0xBF59FB85, 0xBF5AFB85, 0xBF5BFB85, + 0xBF5CFB85, 0xBF5DFB85, 0xBF5EFB85, 0xBF5FFB85, 0xBF60FB85, 0xBF61FB85, 0xBF62FB85, 0xBF63FB85, 0xBF64FB85, 0xBF65FB85, 0xBF66FB85, 0xBF67FB85, 0xBF68FB85, 0xBF69FB85, 0xBF6AFB85, + 0xBF6BFB85, 0xBF6CFB85, 0xBF6DFB85, 0xBF6EFB85, 0xBF6FFB85, 0xBF70FB85, 0xBF71FB85, 0xBF72FB85, 0xBF73FB85, 0xBF74FB85, 0xBF75FB85, 0xBF76FB85, 0xBF77FB85, 0xBF78FB85, 0xBF79FB85, + 0xBF7AFB85, 0xBF7BFB85, 0xBF7CFB85, 0xBF7DFB85, 0xBF7EFB85, 0xBF7FFB85, 0xBF80FB85, 0xBF81FB85, 0xBF82FB85, 0xBF83FB85, 0xBF84FB85, 0xBF85FB85, 0xBF86FB85, 0xBF87FB85, 0xBF88FB85, + 0xBF89FB85, 0xBF8AFB85, 0xBF8BFB85, 0xBF8CFB85, 0xBF8DFB85, 0xBF8EFB85, 0xBF8FFB85, 0xBF90FB85, 0xBF91FB85, 0xBF92FB85, 0xBF93FB85, 0xBF94FB85, 0xBF95FB85, 0xBF96FB85, 0xBF97FB85, + 0xBF98FB85, 0xBF99FB85, 0xBF9AFB85, 0xBF9BFB85, 0xBF9CFB85, 0xBF9DFB85, 0xBF9EFB85, 0xBF9FFB85, 0xBFA0FB85, 0xBFA1FB85, 0xBFA2FB85, 0xBFA3FB85, 0xBFA4FB85, 0xBFA5FB85, 0xBFA6FB85, + 0xBFA7FB85, 0xBFA8FB85, 0xBFA9FB85, 0xBFAAFB85, 0xBFABFB85, 0xBFACFB85, 0xBFADFB85, 0xBFAEFB85, 0xBFAFFB85, 0xBFB0FB85, 0xBFB1FB85, 0xBFB2FB85, 0xBFB3FB85, 0xBFB4FB85, 0xBFB5FB85, + 0xBFB6FB85, 0xBFB7FB85, 0xBFB8FB85, 0xBFB9FB85, 0xBFBAFB85, 0xBFBBFB85, 0xBFBCFB85, 0xBFBDFB85, 0xBFBEFB85, 0xBFBFFB85, 0xBFC0FB85, 0xBFC1FB85, 0xBFC2FB85, 0xBFC3FB85, 0xBFC4FB85, + 0xBFC5FB85, 0xBFC6FB85, 0xBFC7FB85, 0xBFC8FB85, 0xBFC9FB85, 0xBFCAFB85, 0xBFCBFB85, 0xBFCCFB85, 0xBFCDFB85, 0xBFCEFB85, 0xBFCFFB85, 0xBFD0FB85, 0xBFD1FB85, 0xBFD2FB85, 0xBFD3FB85, + 0xBFD4FB85, 0xBFD5FB85, 0xBFD6FB85, 0xBFD7FB85, 0xBFD8FB85, 0xBFD9FB85, 0xBFDAFB85, 0xBFDBFB85, 0xBFDCFB85, 0xBFDDFB85, 0xBFDEFB85, 0xBFDFFB85, 0xBFE0FB85, 0xBFE1FB85, 0xBFE2FB85, + 0xBFE3FB85, 0xBFE4FB85, 0xBFE5FB85, 0xBFE6FB85, 0xBFE7FB85, 0xBFE8FB85, 0xBFE9FB85, 0xBFEAFB85, 0xBFEBFB85, 0xBFECFB85, 0xBFEDFB85, 0xBFEEFB85, 0xBFEFFB85, 0xBFF0FB85, 0xBFF1FB85, + 0xBFF2FB85, 0xBFF3FB85, 0xBFF4FB85, 0xBFF5FB85, 0xBFF6FB85, 0xBFF7FB85, 0xBFF8FB85, 0xBFF9FB85, 0xBFFAFB85, 0xBFFBFB85, 0xBFFCFB85, 0xBFFDFB85, 0xBFFEFB85, 0xBFFFFB85, 0xC000FB85, + 0xC001FB85, 0xC002FB85, 0xC003FB85, 0xC004FB85, 0xC005FB85, 0xC006FB85, 0xC007FB85, 0xC008FB85, 0xC009FB85, 0xC00AFB85, 0xC00BFB85, 0xC00CFB85, 0xC00DFB85, 0xC00EFB85, 0xC00FFB85, + 0xC010FB85, 0xC011FB85, 0xC012FB85, 0xC013FB85, 0xC014FB85, 0xC015FB85, 0xC016FB85, 0xC017FB85, 0xC018FB85, 0xC019FB85, 0xC01AFB85, 0xC01BFB85, 0xC01CFB85, 0xC01DFB85, 0xC01EFB85, + 0xC01FFB85, 0xC020FB85, 0xC021FB85, 0xC022FB85, 0xC023FB85, 0xC024FB85, 0xC025FB85, 0xC026FB85, 0xC027FB85, 0xC028FB85, 0xC029FB85, 0xC02AFB85, 0xC02BFB85, 0xC02CFB85, 0xC02DFB85, + 0xC02EFB85, 0xC02FFB85, 0xC030FB85, 0xC031FB85, 0xC032FB85, 0xC033FB85, 0xC034FB85, 0xC035FB85, 0xC036FB85, 0xC037FB85, 0xC038FB85, 0xC039FB85, 0xC03AFB85, 0xC03BFB85, 0xC03CFB85, + 0xC03DFB85, 0xC03EFB85, 0xC03FFB85, 0xC040FB85, 0xC041FB85, 0xC042FB85, 0xC043FB85, 0xC044FB85, 0xC045FB85, 0xC046FB85, 0xC047FB85, 0xC048FB85, 0xC049FB85, 0xC04AFB85, 0xC04BFB85, + 0xC04CFB85, 0xC04DFB85, 0xC04EFB85, 0xC04FFB85, 0xC050FB85, 0xC051FB85, 0xC052FB85, 0xC053FB85, 0xC054FB85, 0xC055FB85, 0xC056FB85, 0xC057FB85, 0xC058FB85, 0xC059FB85, 0xC05AFB85, + 0xC05BFB85, 0xC05CFB85, 0xC05DFB85, 0xC05EFB85, 0xC05FFB85, 0xC060FB85, 0xC061FB85, 0xC062FB85, 0xC063FB85, 0xC064FB85, 0xC065FB85, 0xC066FB85, 0xC067FB85, 0xC068FB85, 0xC069FB85, + 0xC06AFB85, 0xC06BFB85, 0xC06CFB85, 0xC06DFB85, 0xC06EFB85, 0xC06FFB85, 0xC070FB85, 0xC071FB85, 0xC072FB85, 0xC073FB85, 0xC074FB85, 0xC075FB85, 0xC076FB85, 0xC077FB85, 0xC078FB85, + 0xC079FB85, 0xC07AFB85, 0xC07BFB85, 0xC07CFB85, 0xC07DFB85, 0xC07EFB85, 0xC07FFB85, 0xC080FB85, 0xC081FB85, 0xC082FB85, 0xC083FB85, 0xC084FB85, 0xC085FB85, 0xC086FB85, 0xC087FB85, + 0xC088FB85, 0xC089FB85, 0xC08AFB85, 0xC08BFB85, 0xC08CFB85, 0xC08DFB85, 0xC08EFB85, 0xC08FFB85, 0xC090FB85, 0xC091FB85, 0xC092FB85, 0xC093FB85, 0xC094FB85, 0xC095FB85, 0xC096FB85, + 0xC097FB85, 0xC098FB85, 0xC099FB85, 0xC09AFB85, 0xC09BFB85, 0xC09CFB85, 0xC09DFB85, 0xC09EFB85, 0xC09FFB85, 0xC0A0FB85, 0xC0A1FB85, 0xC0A2FB85, 0xC0A3FB85, 0xC0A4FB85, 0xC0A5FB85, + 0xC0A6FB85, 0xC0A7FB85, 0xC0A8FB85, 0xC0A9FB85, 0xC0AAFB85, 0xC0ABFB85, 0xC0ACFB85, 0xC0ADFB85, 0xC0AEFB85, 0xC0AFFB85, 0xC0B0FB85, 0xC0B1FB85, 0xC0B2FB85, 0xC0B3FB85, 0xC0B4FB85, + 0xC0B5FB85, 0xC0B6FB85, 0xC0B7FB85, 0xC0B8FB85, 0xC0B9FB85, 0xC0BAFB85, 0xC0BBFB85, 0xC0BCFB85, 0xC0BDFB85, 0xC0BEFB85, 0xC0BFFB85, 0xC0C0FB85, 0xC0C1FB85, 0xC0C2FB85, 0xC0C3FB85, + 0xC0C4FB85, 0xC0C5FB85, 0xC0C6FB85, 0xC0C7FB85, 0xC0C8FB85, 0xC0C9FB85, 0xC0CAFB85, 0xC0CBFB85, 0xC0CCFB85, 0xC0CDFB85, 0xC0CEFB85, 0xC0CFFB85, 0xC0D0FB85, 0xC0D1FB85, 0xC0D2FB85, + 0xC0D3FB85, 0xC0D4FB85, 0xC0D5FB85, 0xC0D6FB85, 0xC0D7FB85, 0xC0D8FB85, 0xC0D9FB85, 0xC0DAFB85, 0xC0DBFB85, 0xC0DCFB85, 0xC0DDFB85, 0xC0DEFB85, 0xC0DFFB85, 0xC0E0FB85, 0xC0E1FB85, + 0xC0E2FB85, 0xC0E3FB85, 0xC0E4FB85, 0xC0E5FB85, 0xC0E6FB85, 0xC0E7FB85, 0xC0E8FB85, 0xC0E9FB85, 0xC0EAFB85, 0xC0EBFB85, 0xC0ECFB85, 0xC0EDFB85, 0xC0EEFB85, 0xC0EFFB85, 0xC0F0FB85, + 0xC0F1FB85, 0xC0F2FB85, 0xC0F3FB85, 0xC0F4FB85, 0xC0F5FB85, 0xC0F6FB85, 0xC0F7FB85, 0xC0F8FB85, 0xC0F9FB85, 0xC0FAFB85, 0xC0FBFB85, 0xC0FCFB85, 0xC0FDFB85, 0xC0FEFB85, 0xC0FFFB85, + 0xC100FB85, 0xC101FB85, 0xC102FB85, 0xC103FB85, 0xC104FB85, 0xC105FB85, 0xC106FB85, 0xC107FB85, 0xC108FB85, 0xC109FB85, 0xC10AFB85, 0xC10BFB85, 0xC10CFB85, 0xC10DFB85, 0xC10EFB85, + 0xC10FFB85, 0xC110FB85, 0xC111FB85, 0xC112FB85, 0xC113FB85, 0xC114FB85, 0xC115FB85, 0xC116FB85, 0xC117FB85, 0xC118FB85, 0xC119FB85, 0xC11AFB85, 0xC11BFB85, 0xC11CFB85, 0xC11DFB85, + 0xC11EFB85, 0xC11FFB85, 0xC120FB85, 0xC121FB85, 0xC122FB85, 0xC123FB85, 0xC124FB85, 0xC125FB85, 0xC126FB85, 0xC127FB85, 0xC128FB85, 0xC129FB85, 0xC12AFB85, 0xC12BFB85, 0xC12CFB85, + 0xC12DFB85, 0xC12EFB85, 0xC12FFB85, 0xC130FB85, 0xC131FB85, 0xC132FB85, 0xC133FB85, 0xC134FB85, 0xC135FB85, 0xC136FB85, 0xC137FB85, 0xC138FB85, 0xC139FB85, 0xC13AFB85, 0xC13BFB85, + 0xC13CFB85, 0xC13DFB85, 0xC13EFB85, 0xC13FFB85, 0xC140FB85, 0xC141FB85, 0xC142FB85, 0xC143FB85, 0xC144FB85, 0xC145FB85, 0xC146FB85, 0xC147FB85, 0xC148FB85, 0xC149FB85, 0xC14AFB85, + 0xC14BFB85, 0xC14CFB85, 0xC14DFB85, 0xC14EFB85, 0xC14FFB85, 0xC150FB85, 0xC151FB85, 0xC152FB85, 0xC153FB85, 0xC154FB85, 0xC155FB85, 0xC156FB85, 0xC157FB85, 0xC158FB85, 0xC159FB85, + 0xC15AFB85, 0xC15BFB85, 0xC15CFB85, 0xC15DFB85, 0xC15EFB85, 0xC15FFB85, 0xC160FB85, 0xC161FB85, 0xC162FB85, 0xC163FB85, 0xC164FB85, 0xC165FB85, 0xC166FB85, 0xC167FB85, 0xC168FB85, + 0xC169FB85, 0xC16AFB85, 0xC16BFB85, 0xC16CFB85, 0xC16DFB85, 0xC16EFB85, 0xC16FFB85, 0xC170FB85, 0xC171FB85, 0xC172FB85, 0xC173FB85, 0xC174FB85, 0xC175FB85, 0xC176FB85, 0xC177FB85, + 0xC178FB85, 0xC179FB85, 0xC17AFB85, 0xC17BFB85, 0xC17CFB85, 0xC17DFB85, 0xC17EFB85, 0xC17FFB85, 0xC180FB85, 0xC181FB85, 0xC182FB85, 0xC183FB85, 0xC184FB85, 0xC185FB85, 0xC186FB85, + 0xC187FB85, 0xC188FB85, 0xC189FB85, 0xC18AFB85, 0xC18BFB85, 0xC18CFB85, 0xC18DFB85, 0xC18EFB85, 0xC18FFB85, 0xC190FB85, 0xC191FB85, 0xC192FB85, 0xC193FB85, 0xC194FB85, 0xC195FB85, + 0xC196FB85, 0xC197FB85, 0xC198FB85, 0xC199FB85, 0xC19AFB85, 0xC19BFB85, 0xC19CFB85, 0xC19DFB85, 0xC19EFB85, 0xC19FFB85, 0xC1A0FB85, 0xC1A1FB85, 0xC1A2FB85, 0xC1A3FB85, 0xC1A4FB85, + 0xC1A5FB85, 0xC1A6FB85, 0xC1A7FB85, 0xC1A8FB85, 0xC1A9FB85, 0xC1AAFB85, 0xC1ABFB85, 0xC1ACFB85, 0xC1ADFB85, 0xC1AEFB85, 0xC1AFFB85, 0xC1B0FB85, 0xC1B1FB85, 0xC1B2FB85, 0xC1B3FB85, + 0xC1B4FB85, 0xC1B5FB85, 0xC1B6FB85, 0xC1B7FB85, 0xC1B8FB85, 0xC1B9FB85, 0xC1BAFB85, 0xC1BBFB85, 0xC1BCFB85, 0xC1BDFB85, 0xC1BEFB85, 0xC1BFFB85, 0xC1C0FB85, 0xC1C1FB85, 0xC1C2FB85, + 0xC1C3FB85, 0xC1C4FB85, 0xC1C5FB85, 0xC1C6FB85, 0xC1C7FB85, 0xC1C8FB85, 0xC1C9FB85, 0xC1CAFB85, 0xC1CBFB85, 0xC1CCFB85, 0xC1CDFB85, 0xC1CEFB85, 0xC1CFFB85, 0xC1D0FB85, 0xC1D1FB85, + 0xC1D2FB85, 0xC1D3FB85, 0xC1D4FB85, 0xC1D5FB85, 0xC1D6FB85, 0xC1D7FB85, 0xC1D8FB85, 0xC1D9FB85, 0xC1DAFB85, 0xC1DBFB85, 0xC1DCFB85, 0xC1DDFB85, 0xC1DEFB85, 0xC1DFFB85, 0xC1E0FB85, + 0xC1E1FB85, 0xC1E2FB85, 0xC1E3FB85, 0xC1E4FB85, 0xC1E5FB85, 0xC1E6FB85, 0xC1E7FB85, 0xC1E8FB85, 0xC1E9FB85, 0xC1EAFB85, 0xC1EBFB85, 0xC1ECFB85, 0xC1EDFB85, 0xC1EEFB85, 0xC1EFFB85, + 0xC1F0FB85, 0xC1F1FB85, 0xC1F2FB85, 0xC1F3FB85, 0xC1F4FB85, 0xC1F5FB85, 0xC1F6FB85, 0xC1F7FB85, 0xC1F8FB85, 0xC1F9FB85, 0xC1FAFB85, 0xC1FBFB85, 0xC1FCFB85, 0xC1FDFB85, 0xC1FEFB85, + 0xC1FFFB85, 0xC200FB85, 0xC201FB85, 0xC202FB85, 0xC203FB85, 0xC204FB85, 0xC205FB85, 0xC206FB85, 0xC207FB85, 0xC208FB85, 0xC209FB85, 0xC20AFB85, 0xC20BFB85, 0xC20CFB85, 0xC20DFB85, + 0xC20EFB85, 0xC20FFB85, 0xC210FB85, 0xC211FB85, 0xC212FB85, 0xC213FB85, 0xC214FB85, 0xC215FB85, 0xC216FB85, 0xC217FB85, 0xC218FB85, 0xC219FB85, 0xC21AFB85, 0xC21BFB85, 0xC21CFB85, + 0xC21DFB85, 0xC21EFB85, 0xC21FFB85, 0xC220FB85, 0xC221FB85, 0xC222FB85, 0xC223FB85, 0xC224FB85, 0xC225FB85, 0xC226FB85, 0xC227FB85, 0xC228FB85, 0xC229FB85, 0xC22AFB85, 0xC22BFB85, + 0xC22CFB85, 0xC22DFB85, 0xC22EFB85, 0xC22FFB85, 0xC230FB85, 0xC231FB85, 0xC232FB85, 0xC233FB85, 0xC234FB85, 0xC235FB85, 0xC236FB85, 0xC237FB85, 0xC238FB85, 0xC239FB85, 0xC23AFB85, + 0xC23BFB85, 0xC23CFB85, 0xC23DFB85, 0xC23EFB85, 0xC23FFB85, 0xC240FB85, 0xC241FB85, 0xC242FB85, 0xC243FB85, 0xC244FB85, 0xC245FB85, 0xC246FB85, 0xC247FB85, 0xC248FB85, 0xC249FB85, + 0xC24AFB85, 0xC24BFB85, 0xC24CFB85, 0xC24DFB85, 0xC24EFB85, 0xC24FFB85, 0xC250FB85, 0xC251FB85, 0xC252FB85, 0xC253FB85, 0xC254FB85, 0xC255FB85, 0xC256FB85, 0xC257FB85, 0xC258FB85, + 0xC259FB85, 0xC25AFB85, 0xC25BFB85, 0xC25CFB85, 0xC25DFB85, 0xC25EFB85, 0xC25FFB85, 0xC260FB85, 0xC261FB85, 0xC262FB85, 0xC263FB85, 0xC264FB85, 0xC265FB85, 0xC266FB85, 0xC267FB85, + 0xC268FB85, 0xC269FB85, 0xC26AFB85, 0xC26BFB85, 0xC26CFB85, 0xC26DFB85, 0xC26EFB85, 0xC26FFB85, 0xC270FB85, 0xC271FB85, 0xC272FB85, 0xC273FB85, 0xC274FB85, 0xC275FB85, 0xC276FB85, + 0xC277FB85, 0xC278FB85, 0xC279FB85, 0xC27AFB85, 0xC27BFB85, 0xC27CFB85, 0xC27DFB85, 0xC27EFB85, 0xC27FFB85, 0xC280FB85, 0xC281FB85, 0xC282FB85, 0xC283FB85, 0xC284FB85, 0xC285FB85, + 0xC286FB85, 0xC287FB85, 0xC288FB85, 0xC289FB85, 0xC28AFB85, 0xC28BFB85, 0xC28CFB85, 0xC28DFB85, 0xC28EFB85, 0xC28FFB85, 0xC290FB85, 0xC291FB85, 0xC292FB85, 0xC293FB85, 0xC294FB85, + 0xC295FB85, 0xC296FB85, 0xC297FB85, 0xC298FB85, 0xC299FB85, 0xC29AFB85, 0xC29BFB85, 0xC29CFB85, 0xC29DFB85, 0xC29EFB85, 0xC29FFB85, 0xC2A0FB85, 0xC2A1FB85, 0xC2A2FB85, 0xC2A3FB85, + 0xC2A4FB85, 0xC2A5FB85, 0xC2A6FB85, 0xC2A7FB85, 0xC2A8FB85, 0xC2A9FB85, 0xC2AAFB85, 0xC2ABFB85, 0xC2ACFB85, 0xC2ADFB85, 0xC2AEFB85, 0xC2AFFB85, 0xC2B0FB85, 0xC2B1FB85, 0xC2B2FB85, + 0xC2B3FB85, 0xC2B4FB85, 0xC2B5FB85, 0xC2B6FB85, 0xC2B7FB85, 0xC2B8FB85, 0xC2B9FB85, 0xC2BAFB85, 0xC2BBFB85, 0xC2BCFB85, 0xC2BDFB85, 0xC2BEFB85, 0xC2BFFB85, 0xC2C0FB85, 0xC2C1FB85, + 0xC2C2FB85, 0xC2C3FB85, 0xC2C4FB85, 0xC2C5FB85, 0xC2C6FB85, 0xC2C7FB85, 0xC2C8FB85, 0xC2C9FB85, 0xC2CAFB85, 0xC2CBFB85, 0xC2CCFB85, 0xC2CDFB85, 0xC2CEFB85, 0xC2CFFB85, 0xC2D0FB85, + 0xC2D1FB85, 0xC2D2FB85, 0xC2D3FB85, 0xC2D4FB85, 0xC2D5FB85, 0xC2D6FB85, 0xC2D7FB85, 0xC2D8FB85, 0xC2D9FB85, 0xC2DAFB85, 0xC2DBFB85, 0xC2DCFB85, 0xC2DDFB85, 0xC2DEFB85, 0xC2DFFB85, + 0xC2E0FB85, 0xC2E1FB85, 0xC2E2FB85, 0xC2E3FB85, 0xC2E4FB85, 0xC2E5FB85, 0xC2E6FB85, 0xC2E7FB85, 0xC2E8FB85, 0xC2E9FB85, 0xC2EAFB85, 0xC2EBFB85, 0xC2ECFB85, 0xC2EDFB85, 0xC2EEFB85, + 0xC2EFFB85, 0xC2F0FB85, 0xC2F1FB85, 0xC2F2FB85, 0xC2F3FB85, 0xC2F4FB85, 0xC2F5FB85, 0xC2F6FB85, 0xC2F7FB85, 0xC2F8FB85, 0xC2F9FB85, 0xC2FAFB85, 0xC2FBFB85, 0xC2FCFB85, 0xC2FDFB85, + 0xC2FEFB85, 0xC2FFFB85, 0xC300FB85, 0xC301FB85, 0xC302FB85, 0xC303FB85, 0xC304FB85, 0xC305FB85, 0xC306FB85, 0xC307FB85, 0xC308FB85, 0xC309FB85, 0xC30AFB85, 0xC30BFB85, 0xC30CFB85, + 0xC30DFB85, 0xC30EFB85, 0xC30FFB85, 0xC310FB85, 0xC311FB85, 0xC312FB85, 0xC313FB85, 0xC314FB85, 0xC315FB85, 0xC316FB85, 0xC317FB85, 0xC318FB85, 0xC319FB85, 0xC31AFB85, 0xC31BFB85, + 0xC31CFB85, 0xC31DFB85, 0xC31EFB85, 0xC31FFB85, 0xC320FB85, 0xC321FB85, 0xC322FB85, 0xC323FB85, 0xC324FB85, 0xC325FB85, 0xC326FB85, 0xC327FB85, 0xC328FB85, 0xC329FB85, 0xC32AFB85, + 0xC32BFB85, 0xC32CFB85, 0xC32DFB85, 0xC32EFB85, 0xC32FFB85, 0xC330FB85, 0xC331FB85, 0xC332FB85, 0xC333FB85, 0xC334FB85, 0xC335FB85, 0xC336FB85, 0xC337FB85, 0xC338FB85, 0xC339FB85, + 0xC33AFB85, 0xC33BFB85, 0xC33CFB85, 0xC33DFB85, 0xC33EFB85, 0xC33FFB85, 0xC340FB85, 0xC341FB85, 0xC342FB85, 0xC343FB85, 0xC344FB85, 0xC345FB85, 0xC346FB85, 0xC347FB85, 0xC348FB85, + 0xC349FB85, 0xC34AFB85, 0xC34BFB85, 0xC34CFB85, 0xC34DFB85, 0xC34EFB85, 0xC34FFB85, 0xC350FB85, 0xC351FB85, 0xC352FB85, 0xC353FB85, 0xC354FB85, 0xC355FB85, 0xC356FB85, 0xC357FB85, + 0xC358FB85, 0xC359FB85, 0xC35AFB85, 0xC35BFB85, 0xC35CFB85, 0xC35DFB85, 0xC35EFB85, 0xC35FFB85, 0xC360FB85, 0xC361FB85, 0xC362FB85, 0xC363FB85, 0xC364FB85, 0xC365FB85, 0xC366FB85, + 0xC367FB85, 0xC368FB85, 0xC369FB85, 0xC36AFB85, 0xC36BFB85, 0xC36CFB85, 0xC36DFB85, 0xC36EFB85, 0xC36FFB85, 0xC370FB85, 0xC371FB85, 0xC372FB85, 0xC373FB85, 0xC374FB85, 0xC375FB85, + 0xC376FB85, 0xC377FB85, 0xC378FB85, 0xC379FB85, 0xC37AFB85, 0xC37BFB85, 0xC37CFB85, 0xC37DFB85, 0xC37EFB85, 0xC37FFB85, 0xC380FB85, 0xC381FB85, 0xC382FB85, 0xC383FB85, 0xC384FB85, + 0xC385FB85, 0xC386FB85, 0xC387FB85, 0xC388FB85, 0xC389FB85, 0xC38AFB85, 0xC38BFB85, 0xC38CFB85, 0xC38DFB85, 0xC38EFB85, 0xC38FFB85, 0xC390FB85, 0xC391FB85, 0xC392FB85, 0xC393FB85, + 0xC394FB85, 0xC395FB85, 0xC396FB85, 0xC397FB85, 0xC398FB85, 0xC399FB85, 0xC39AFB85, 0xC39BFB85, 0xC39CFB85, 0xC39DFB85, 0xC39EFB85, 0xC39FFB85, 0xC3A0FB85, 0xC3A1FB85, 0xC3A2FB85, + 0xC3A3FB85, 0xC3A4FB85, 0xC3A5FB85, 0xC3A6FB85, 0xC3A7FB85, 0xC3A8FB85, 0xC3A9FB85, 0xC3AAFB85, 0xC3ABFB85, 0xC3ACFB85, 0xC3ADFB85, 0xC3AEFB85, 0xC3AFFB85, 0xC3B0FB85, 0xC3B1FB85, + 0xC3B2FB85, 0xC3B3FB85, 0xC3B4FB85, 0xC3B5FB85, 0xC3B6FB85, 0xC3B7FB85, 0xC3B8FB85, 0xC3B9FB85, 0xC3BAFB85, 0xC3BBFB85, 0xC3BCFB85, 0xC3BDFB85, 0xC3BEFB85, 0xC3BFFB85, 0xC3C0FB85, + 0xC3C1FB85, 0xC3C2FB85, 0xC3C3FB85, 0xC3C4FB85, 0xC3C5FB85, 0xC3C6FB85, 0xC3C7FB85, 0xC3C8FB85, 0xC3C9FB85, 0xC3CAFB85, 0xC3CBFB85, 0xC3CCFB85, 0xC3CDFB85, 0xC3CEFB85, 0xC3CFFB85, + 0xC3D0FB85, 0xC3D1FB85, 0xC3D2FB85, 0xC3D3FB85, 0xC3D4FB85, 0xC3D5FB85, 0xC3D6FB85, 0xC3D7FB85, 0xC3D8FB85, 0xC3D9FB85, 0xC3DAFB85, 0xC3DBFB85, 0xC3DCFB85, 0xC3DDFB85, 0xC3DEFB85, + 0xC3DFFB85, 0xC3E0FB85, 0xC3E1FB85, 0xC3E2FB85, 0xC3E3FB85, 0xC3E4FB85, 0xC3E5FB85, 0xC3E6FB85, 0xC3E7FB85, 0xC3E8FB85, 0xC3E9FB85, 0xC3EAFB85, 0xC3EBFB85, 0xC3ECFB85, 0xC3EDFB85, + 0xC3EEFB85, 0xC3EFFB85, 0xC3F0FB85, 0xC3F1FB85, 0xC3F2FB85, 0xC3F3FB85, 0xC3F4FB85, 0xC3F5FB85, 0xC3F6FB85, 0xC3F7FB85, 0xC3F8FB85, 0xC3F9FB85, 0xC3FAFB85, 0xC3FBFB85, 0xC3FCFB85, + 0xC3FDFB85, 0xC3FEFB85, 0xC3FFFB85, 0xC400FB85, 0xC401FB85, 0xC402FB85, 0xC403FB85, 0xC404FB85, 0xC405FB85, 0xC406FB85, 0xC407FB85, 0xC408FB85, 0xC409FB85, 0xC40AFB85, 0xC40BFB85, + 0xC40CFB85, 0xC40DFB85, 0xC40EFB85, 0xC40FFB85, 0xC410FB85, 0xC411FB85, 0xC412FB85, 0xC413FB85, 0xC414FB85, 0xC415FB85, 0xC416FB85, 0xC417FB85, 0xC418FB85, 0xC419FB85, 0xC41AFB85, + 0xC41BFB85, 0xC41CFB85, 0xC41DFB85, 0xC41EFB85, 0xC41FFB85, 0xC420FB85, 0xC421FB85, 0xC422FB85, 0xC423FB85, 0xC424FB85, 0xC425FB85, 0xC426FB85, 0xC427FB85, 0xC428FB85, 0xC429FB85, + 0xC42AFB85, 0xC42BFB85, 0xC42CFB85, 0xC42DFB85, 0xC42EFB85, 0xC42FFB85, 0xC430FB85, 0xC431FB85, 0xC432FB85, 0xC433FB85, 0xC434FB85, 0xC435FB85, 0xC436FB85, 0xC437FB85, 0xC438FB85, + 0xC439FB85, 0xC43AFB85, 0xC43BFB85, 0xC43CFB85, 0xC43DFB85, 0xC43EFB85, 0xC43FFB85, 0xC440FB85, 0xC441FB85, 0xC442FB85, 0xC443FB85, 0xC444FB85, 0xC445FB85, 0xC446FB85, 0xC447FB85, + 0xC448FB85, 0xC449FB85, 0xC44AFB85, 0xC44BFB85, 0xC44CFB85, 0xC44DFB85, 0xC44EFB85, 0xC44FFB85, 0xC450FB85, 0xC451FB85, 0xC452FB85, 0xC453FB85, 0xC454FB85, 0xC455FB85, 0xC456FB85, + 0xC457FB85, 0xC458FB85, 0xC459FB85, 0xC45AFB85, 0xC45BFB85, 0xC45CFB85, 0xC45DFB85, 0xC45EFB85, 0xC45FFB85, 0xC460FB85, 0xC461FB85, 0xC462FB85, 0xC463FB85, 0xC464FB85, 0xC465FB85, + 0xC466FB85, 0xC467FB85, 0xC468FB85, 0xC469FB85, 0xC46AFB85, 0xC46BFB85, 0xC46CFB85, 0xC46DFB85, 0xC46EFB85, 0xC46FFB85, 0xC470FB85, 0xC471FB85, 0xC472FB85, 0xC473FB85, 0xC474FB85, + 0xC475FB85, 0xC476FB85, 0xC477FB85, 0xC478FB85, 0xC479FB85, 0xC47AFB85, 0xC47BFB85, 0xC47CFB85, 0xC47DFB85, 0xC47EFB85, 0xC47FFB85, 0xC480FB85, 0xC481FB85, 0xC482FB85, 0xC483FB85, + 0xC484FB85, 0xC485FB85, 0xC486FB85, 0xC487FB85, 0xC488FB85, 0xC489FB85, 0xC48AFB85, 0xC48BFB85, 0xC48CFB85, 0xC48DFB85, 0xC48EFB85, 0xC48FFB85, 0xC490FB85, 0xC491FB85, 0xC492FB85, + 0xC493FB85, 0xC494FB85, 0xC495FB85, 0xC496FB85, 0xC497FB85, 0xC498FB85, 0xC499FB85, 0xC49AFB85, 0xC49BFB85, 0xC49CFB85, 0xC49DFB85, 0xC49EFB85, 0xC49FFB85, 0xC4A0FB85, 0xC4A1FB85, + 0xC4A2FB85, 0xC4A3FB85, 0xC4A4FB85, 0xC4A5FB85, 0xC4A6FB85, 0xC4A7FB85, 0xC4A8FB85, 0xC4A9FB85, 0xC4AAFB85, 0xC4ABFB85, 0xC4ACFB85, 0xC4ADFB85, 0xC4AEFB85, 0xC4AFFB85, 0xC4B0FB85, + 0xC4B1FB85, 0xC4B2FB85, 0xC4B3FB85, 0xC4B4FB85, 0xC4B5FB85, 0xC4B6FB85, 0xC4B7FB85, 0xC4B8FB85, 0xC4B9FB85, 0xC4BAFB85, 0xC4BBFB85, 0xC4BCFB85, 0xC4BDFB85, 0xC4BEFB85, 0xC4BFFB85, + 0xC4C0FB85, 0xC4C1FB85, 0xC4C2FB85, 0xC4C3FB85, 0xC4C4FB85, 0xC4C5FB85, 0xC4C6FB85, 0xC4C7FB85, 0xC4C8FB85, 0xC4C9FB85, 0xC4CAFB85, 0xC4CBFB85, 0xC4CCFB85, 0xC4CDFB85, 0xC4CEFB85, + 0xC4CFFB85, 0xC4D0FB85, 0xC4D1FB85, 0xC4D2FB85, 0xC4D3FB85, 0xC4D4FB85, 0xC4D5FB85, 0xC4D6FB85, 0xC4D7FB85, 0xC4D8FB85, 0xC4D9FB85, 0xC4DAFB85, 0xC4DBFB85, 0xC4DCFB85, 0xC4DDFB85, + 0xC4DEFB85, 0xC4DFFB85, 0xC4E0FB85, 0xC4E1FB85, 0xC4E2FB85, 0xC4E3FB85, 0xC4E4FB85, 0xC4E5FB85, 0xC4E6FB85, 0xC4E7FB85, 0xC4E8FB85, 0xC4E9FB85, 0xC4EAFB85, 0xC4EBFB85, 0xC4ECFB85, + 0xC4EDFB85, 0xC4EEFB85, 0xC4EFFB85, 0xC4F0FB85, 0xC4F1FB85, 0xC4F2FB85, 0xC4F3FB85, 0xC4F4FB85, 0xC4F5FB85, 0xC4F6FB85, 0xC4F7FB85, 0xC4F8FB85, 0xC4F9FB85, 0xC4FAFB85, 0xC4FBFB85, + 0xC4FCFB85, 0xC4FDFB85, 0xC4FEFB85, 0xC4FFFB85, 0xC500FB85, 0xC501FB85, 0xC502FB85, 0xC503FB85, 0xC504FB85, 0xC505FB85, 0xC506FB85, 0xC507FB85, 0xC508FB85, 0xC509FB85, 0xC50AFB85, + 0xC50BFB85, 0xC50CFB85, 0xC50DFB85, 0xC50EFB85, 0xC50FFB85, 0xC510FB85, 0xC511FB85, 0xC512FB85, 0xC513FB85, 0xC514FB85, 0xC515FB85, 0xC516FB85, 0xC517FB85, 0xC518FB85, 0xC519FB85, + 0xC51AFB85, 0xC51BFB85, 0xC51CFB85, 0xC51DFB85, 0xC51EFB85, 0xC51FFB85, 0xC520FB85, 0xC521FB85, 0xC522FB85, 0xC523FB85, 0xC524FB85, 0xC525FB85, 0xC526FB85, 0xC527FB85, 0xC528FB85, + 0xC529FB85, 0xC52AFB85, 0xC52BFB85, 0xC52CFB85, 0xC52DFB85, 0xC52EFB85, 0xC52FFB85, 0xC530FB85, 0xC531FB85, 0xC532FB85, 0xC533FB85, 0xC534FB85, 0xC535FB85, 0xC536FB85, 0xC537FB85, + 0xC538FB85, 0xC539FB85, 0xC53AFB85, 0xC53BFB85, 0xC53CFB85, 0xC53DFB85, 0xC53EFB85, 0xC53FFB85, 0xC540FB85, 0xC541FB85, 0xC542FB85, 0xC543FB85, 0xC544FB85, 0xC545FB85, 0xC546FB85, + 0xC547FB85, 0xC548FB85, 0xC549FB85, 0xC54AFB85, 0xC54BFB85, 0xC54CFB85, 0xC54DFB85, 0xC54EFB85, 0xC54FFB85, 0xC550FB85, 0xC551FB85, 0xC552FB85, 0xC553FB85, 0xC554FB85, 0xC555FB85, + 0xC556FB85, 0xC557FB85, 0xC558FB85, 0xC559FB85, 0xC55AFB85, 0xC55BFB85, 0xC55CFB85, 0xC55DFB85, 0xC55EFB85, 0xC55FFB85, 0xC560FB85, 0xC561FB85, 0xC562FB85, 0xC563FB85, 0xC564FB85, + 0xC565FB85, 0xC566FB85, 0xC567FB85, 0xC568FB85, 0xC569FB85, 0xC56AFB85, 0xC56BFB85, 0xC56CFB85, 0xC56DFB85, 0xC56EFB85, 0xC56FFB85, 0xC570FB85, 0xC571FB85, 0xC572FB85, 0xC573FB85, + 0xC574FB85, 0xC575FB85, 0xC576FB85, 0xC577FB85, 0xC578FB85, 0xC579FB85, 0xC57AFB85, 0xC57BFB85, 0xC57CFB85, 0xC57DFB85, 0xC57EFB85, 0xC57FFB85, 0xC580FB85, 0xC581FB85, 0xC582FB85, + 0xC583FB85, 0xC584FB85, 0xC585FB85, 0xC586FB85, 0xC587FB85, 0xC588FB85, 0xC589FB85, 0xC58AFB85, 0xC58BFB85, 0xC58CFB85, 0xC58DFB85, 0xC58EFB85, 0xC58FFB85, 0xC590FB85, 0xC591FB85, + 0xC592FB85, 0xC593FB85, 0xC594FB85, 0xC595FB85, 0xC596FB85, 0xC597FB85, 0xC598FB85, 0xC599FB85, 0xC59AFB85, 0xC59BFB85, 0xC59CFB85, 0xC59DFB85, 0xC59EFB85, 0xC59FFB85, 0xC5A0FB85, + 0xC5A1FB85, 0xC5A2FB85, 0xC5A3FB85, 0xC5A4FB85, 0xC5A5FB85, 0xC5A6FB85, 0xC5A7FB85, 0xC5A8FB85, 0xC5A9FB85, 0xC5AAFB85, 0xC5ABFB85, 0xC5ACFB85, 0xC5ADFB85, 0xC5AEFB85, 0xC5AFFB85, + 0xC5B0FB85, 0xC5B1FB85, 0xC5B2FB85, 0xC5B3FB85, 0xC5B4FB85, 0xC5B5FB85, 0xC5B6FB85, 0xC5B7FB85, 0xC5B8FB85, 0xC5B9FB85, 0xC5BAFB85, 0xC5BBFB85, 0xC5BCFB85, 0xC5BDFB85, 0xC5BEFB85, + 0xC5BFFB85, 0xC5C0FB85, 0xC5C1FB85, 0xC5C2FB85, 0xC5C3FB85, 0xC5C4FB85, 0xC5C5FB85, 0xC5C6FB85, 0xC5C7FB85, 0xC5C8FB85, 0xC5C9FB85, 0xC5CAFB85, 0xC5CBFB85, 0xC5CCFB85, 0xC5CDFB85, + 0xC5CEFB85, 0xC5CFFB85, 0xC5D0FB85, 0xC5D1FB85, 0xC5D2FB85, 0xC5D3FB85, 0xC5D4FB85, 0xC5D5FB85, 0xC5D6FB85, 0xC5D7FB85, 0xC5D8FB85, 0xC5D9FB85, 0xC5DAFB85, 0xC5DBFB85, 0xC5DCFB85, + 0xC5DDFB85, 0xC5DEFB85, 0xC5DFFB85, 0xC5E0FB85, 0xC5E1FB85, 0xC5E2FB85, 0xC5E3FB85, 0xC5E4FB85, 0xC5E5FB85, 0xC5E6FB85, 0xC5E7FB85, 0xC5E8FB85, 0xC5E9FB85, 0xC5EAFB85, 0xC5EBFB85, + 0xC5ECFB85, 0xC5EDFB85, 0xC5EEFB85, 0xC5EFFB85, 0xC5F0FB85, 0xC5F1FB85, 0xC5F2FB85, 0xC5F3FB85, 0xC5F4FB85, 0xC5F5FB85, 0xC5F6FB85, 0xC5F7FB85, 0xC5F8FB85, 0xC5F9FB85, 0xC5FAFB85, + 0xC5FBFB85, 0xC5FCFB85, 0xC5FDFB85, 0xC5FEFB85, 0xC5FFFB85, 0xC600FB85, 0xC601FB85, 0xC602FB85, 0xC603FB85, 0xC604FB85, 0xC605FB85, 0xC606FB85, 0xC607FB85, 0xC608FB85, 0xC609FB85, + 0xC60AFB85, 0xC60BFB85, 0xC60CFB85, 0xC60DFB85, 0xC60EFB85, 0xC60FFB85, 0xC610FB85, 0xC611FB85, 0xC612FB85, 0xC613FB85, 0xC614FB85, 0xC615FB85, 0xC616FB85, 0xC617FB85, 0xC618FB85, + 0xC619FB85, 0xC61AFB85, 0xC61BFB85, 0xC61CFB85, 0xC61DFB85, 0xC61EFB85, 0xC61FFB85, 0xC620FB85, 0xC621FB85, 0xC622FB85, 0xC623FB85, 0xC624FB85, 0xC625FB85, 0xC626FB85, 0xC627FB85, + 0xC628FB85, 0xC629FB85, 0xC62AFB85, 0xC62BFB85, 0xC62CFB85, 0xC62DFB85, 0xC62EFB85, 0xC62FFB85, 0xC630FB85, 0xC631FB85, 0xC632FB85, 0xC633FB85, 0xC634FB85, 0xC635FB85, 0xC636FB85, + 0xC637FB85, 0xC638FB85, 0xC639FB85, 0xC63AFB85, 0xC63BFB85, 0xC63CFB85, 0xC63DFB85, 0xC63EFB85, 0xC63FFB85, 0xC640FB85, 0xC641FB85, 0xC642FB85, 0xC643FB85, 0xC644FB85, 0xC645FB85, + 0xC646FB85, 0xC647FB85, 0xC648FB85, 0xC649FB85, 0xC64AFB85, 0xC64BFB85, 0xC64CFB85, 0xC64DFB85, 0xC64EFB85, 0xC64FFB85, 0xC650FB85, 0xC651FB85, 0xC652FB85, 0xC653FB85, 0xC654FB85, + 0xC655FB85, 0xC656FB85, 0xC657FB85, 0xC658FB85, 0xC659FB85, 0xC65AFB85, 0xC65BFB85, 0xC65CFB85, 0xC65DFB85, 0xC65EFB85, 0xC65FFB85, 0xC660FB85, 0xC661FB85, 0xC662FB85, 0xC663FB85, + 0xC664FB85, 0xC665FB85, 0xC666FB85, 0xC667FB85, 0xC668FB85, 0xC669FB85, 0xC66AFB85, 0xC66BFB85, 0xC66CFB85, 0xC66DFB85, 0xC66EFB85, 0xC66FFB85, 0xC670FB85, 0xC671FB85, 0xC672FB85, + 0xC673FB85, 0xC674FB85, 0xC675FB85, 0xC676FB85, 0xC677FB85, 0xC678FB85, 0xC679FB85, 0xC67AFB85, 0xC67BFB85, 0xC67CFB85, 0xC67DFB85, 0xC67EFB85, 0xC67FFB85, 0xC680FB85, 0xC681FB85, + 0xC682FB85, 0xC683FB85, 0xC684FB85, 0xC685FB85, 0xC686FB85, 0xC687FB85, 0xC688FB85, 0xC689FB85, 0xC68AFB85, 0xC68BFB85, 0xC68CFB85, 0xC68DFB85, 0xC68EFB85, 0xC68FFB85, 0xC690FB85, + 0xC691FB85, 0xC692FB85, 0xC693FB85, 0xC694FB85, 0xC695FB85, 0xC696FB85, 0xC697FB85, 0xC698FB85, 0xC699FB85, 0xC69AFB85, 0xC69BFB85, 0xC69CFB85, 0xC69DFB85, 0xC69EFB85, 0xC69FFB85, + 0xC6A0FB85, 0xC6A1FB85, 0xC6A2FB85, 0xC6A3FB85, 0xC6A4FB85, 0xC6A5FB85, 0xC6A6FB85, 0xC6A7FB85, 0xC6A8FB85, 0xC6A9FB85, 0xC6AAFB85, 0xC6ABFB85, 0xC6ACFB85, 0xC6ADFB85, 0xC6AEFB85, + 0xC6AFFB85, 0xC6B0FB85, 0xC6B1FB85, 0xC6B2FB85, 0xC6B3FB85, 0xC6B4FB85, 0xC6B5FB85, 0xC6B6FB85, 0xC6B7FB85, 0xC6B8FB85, 0xC6B9FB85, 0xC6BAFB85, 0xC6BBFB85, 0xC6BCFB85, 0xC6BDFB85, + 0xC6BEFB85, 0xC6BFFB85, 0xC6C0FB85, 0xC6C1FB85, 0xC6C2FB85, 0xC6C3FB85, 0xC6C4FB85, 0xC6C5FB85, 0xC6C6FB85, 0xC6C7FB85, 0xC6C8FB85, 0xC6C9FB85, 0xC6CAFB85, 0xC6CBFB85, 0xC6CCFB85, + 0xC6CDFB85, 0xC6CEFB85, 0xC6CFFB85, 0xC6D0FB85, 0xC6D1FB85, 0xC6D2FB85, 0xC6D3FB85, 0xC6D4FB85, 0xC6D5FB85, 0xC6D6FB85, 0xC6D7FB85, 0xC6D8FB85, 0xC6D9FB85, 0xC6DAFB85, 0xC6DBFB85, + 0xC6DCFB85, 0xC6DDFB85, 0xC6DEFB85, 0xC6DFFB85, 0xC6E0FB85, 0xC6E1FB85, 0xC6E2FB85, 0xC6E3FB85, 0xC6E4FB85, 0xC6E5FB85, 0xC6E6FB85, 0xC6E7FB85, 0xC6E8FB85, 0xC6E9FB85, 0xC6EAFB85, + 0xC6EBFB85, 0xC6ECFB85, 0xC6EDFB85, 0xC6EEFB85, 0xC6EFFB85, 0xC6F0FB85, 0xC6F1FB85, 0xC6F2FB85, 0xC6F3FB85, 0xC6F4FB85, 0xC6F5FB85, 0xC6F6FB85, 0xC6F7FB85, 0xC6F8FB85, 0xC6F9FB85, + 0xC6FAFB85, 0xC6FBFB85, 0xC6FCFB85, 0xC6FDFB85, 0xC6FEFB85, 0xC6FFFB85, 0xC700FB85, 0xC701FB85, 0xC702FB85, 0xC703FB85, 0xC704FB85, 0xC705FB85, 0xC706FB85, 0xC707FB85, 0xC708FB85, + 0xC709FB85, 0xC70AFB85, 0xC70BFB85, 0xC70CFB85, 0xC70DFB85, 0xC70EFB85, 0xC70FFB85, 0xC710FB85, 0xC711FB85, 0xC712FB85, 0xC713FB85, 0xC714FB85, 0xC715FB85, 0xC716FB85, 0xC717FB85, + 0xC718FB85, 0xC719FB85, 0xC71AFB85, 0xC71BFB85, 0xC71CFB85, 0xC71DFB85, 0xC71EFB85, 0xC71FFB85, 0xC720FB85, 0xC721FB85, 0xC722FB85, 0xC723FB85, 0xC724FB85, 0xC725FB85, 0xC726FB85, + 0xC727FB85, 0xC728FB85, 0xC729FB85, 0xC72AFB85, 0xC72BFB85, 0xC72CFB85, 0xC72DFB85, 0xC72EFB85, 0xC72FFB85, 0xC730FB85, 0xC731FB85, 0xC732FB85, 0xC733FB85, 0xC734FB85, 0xC735FB85, + 0xC736FB85, 0xC737FB85, 0xC738FB85, 0xC739FB85, 0xC73AFB85, 0xC73BFB85, 0xC73CFB85, 0xC73DFB85, 0xC73EFB85, 0xC73FFB85, 0xC740FB85, 0xC741FB85, 0xC742FB85, 0xC743FB85, 0xC744FB85, + 0xC745FB85, 0xC746FB85, 0xC747FB85, 0xC748FB85, 0xC749FB85, 0xC74AFB85, 0xC74BFB85, 0xC74CFB85, 0xC74DFB85, 0xC74EFB85, 0xC74FFB85, 0xC750FB85, 0xC751FB85, 0xC752FB85, 0xC753FB85, + 0xC754FB85, 0xC755FB85, 0xC756FB85, 0xC757FB85, 0xC758FB85, 0xC759FB85, 0xC75AFB85, 0xC75BFB85, 0xC75CFB85, 0xC75DFB85, 0xC75EFB85, 0xC75FFB85, 0xC760FB85, 0xC761FB85, 0xC762FB85, + 0xC763FB85, 0xC764FB85, 0xC765FB85, 0xC766FB85, 0xC767FB85, 0xC768FB85, 0xC769FB85, 0xC76AFB85, 0xC76BFB85, 0xC76CFB85, 0xC76DFB85, 0xC76EFB85, 0xC76FFB85, 0xC770FB85, 0xC771FB85, + 0xC772FB85, 0xC773FB85, 0xC774FB85, 0xC775FB85, 0xC776FB85, 0xC777FB85, 0xC778FB85, 0xC779FB85, 0xC77AFB85, 0xC77BFB85, 0xC77CFB85, 0xC77DFB85, 0xC77EFB85, 0xC77FFB85, 0xC780FB85, + 0xC781FB85, 0xC782FB85, 0xC783FB85, 0xC784FB85, 0xC785FB85, 0xC786FB85, 0xC787FB85, 0xC788FB85, 0xC789FB85, 0xC78AFB85, 0xC78BFB85, 0xC78CFB85, 0xC78DFB85, 0xC78EFB85, 0xC78FFB85, + 0xC790FB85, 0xC791FB85, 0xC792FB85, 0xC793FB85, 0xC794FB85, 0xC795FB85, 0xC796FB85, 0xC797FB85, 0xC798FB85, 0xC799FB85, 0xC79AFB85, 0xC79BFB85, 0xC79CFB85, 0xC79DFB85, 0xC79EFB85, + 0xC79FFB85, 0xC7A0FB85, 0xC7A1FB85, 0xC7A2FB85, 0xC7A3FB85, 0xC7A4FB85, 0xC7A5FB85, 0xC7A6FB85, 0xC7A7FB85, 0xC7A8FB85, 0xC7A9FB85, 0xC7AAFB85, 0xC7ABFB85, 0xC7ACFB85, 0xC7ADFB85, + 0xC7AEFB85, 0xC7AFFB85, 0xC7B0FB85, 0xC7B1FB85, 0xC7B2FB85, 0xC7B3FB85, 0xC7B4FB85, 0xC7B5FB85, 0xC7B6FB85, 0xC7B7FB85, 0xC7B8FB85, 0xC7B9FB85, 0xC7BAFB85, 0xC7BBFB85, 0xC7BCFB85, + 0xC7BDFB85, 0xC7BEFB85, 0xC7BFFB85, 0xC7C0FB85, 0xC7C1FB85, 0xC7C2FB85, 0xC7C3FB85, 0xC7C4FB85, 0xC7C5FB85, 0xC7C6FB85, 0xC7C7FB85, 0xC7C8FB85, 0xC7C9FB85, 0xC7CAFB85, 0xC7CBFB85, + 0xC7CCFB85, 0xC7CDFB85, 0xC7CEFB85, 0xC7CFFB85, 0xC7D0FB85, 0xC7D1FB85, 0xC7D2FB85, 0xC7D3FB85, 0xC7D4FB85, 0xC7D5FB85, 0xC7D6FB85, 0xC7D7FB85, 0xC7D8FB85, 0xC7D9FB85, 0xC7DAFB85, + 0xC7DBFB85, 0xC7DCFB85, 0xC7DDFB85, 0xC7DEFB85, 0xC7DFFB85, 0xC7E0FB85, 0xC7E1FB85, 0xC7E2FB85, 0xC7E3FB85, 0xC7E4FB85, 0xC7E5FB85, 0xC7E6FB85, 0xC7E7FB85, 0xC7E8FB85, 0xC7E9FB85, + 0xC7EAFB85, 0xC7EBFB85, 0xC7ECFB85, 0xC7EDFB85, 0xC7EEFB85, 0xC7EFFB85, 0xC7F0FB85, 0xC7F1FB85, 0xC7F2FB85, 0xC7F3FB85, 0xC7F4FB85, 0xC7F5FB85, 0xC7F6FB85, 0xC7F7FB85, 0xC7F8FB85, + 0xC7F9FB85, 0xC7FAFB85, 0xC7FBFB85, 0xC7FCFB85, 0xC7FDFB85, 0xC7FEFB85, 0xC7FFFB85, 0xC800FB85, 0xC801FB85, 0xC802FB85, 0xC803FB85, 0xC804FB85, 0xC805FB85, 0xC806FB85, 0xC807FB85, + 0xC808FB85, 0xC809FB85, 0xC80AFB85, 0xC80BFB85, 0xC80CFB85, 0xC80DFB85, 0xC80EFB85, 0xC80FFB85, 0xC810FB85, 0xC811FB85, 0xC812FB85, 0xC813FB85, 0xC814FB85, 0xC815FB85, 0xC816FB85, + 0xC817FB85, 0xC818FB85, 0xC819FB85, 0xC81AFB85, 0xC81BFB85, 0xC81CFB85, 0xC81DFB85, 0xC81EFB85, 0xC81FFB85, 0xC820FB85, 0xC821FB85, 0xC822FB85, 0xC823FB85, 0xC824FB85, 0xC825FB85, + 0xC826FB85, 0xC827FB85, 0xC828FB85, 0xC829FB85, 0xC82AFB85, 0xC82BFB85, 0xC82CFB85, 0xC82DFB85, 0xC82EFB85, 0xC82FFB85, 0xC830FB85, 0xC831FB85, 0xC832FB85, 0xC833FB85, 0xC834FB85, + 0xC835FB85, 0xC836FB85, 0xC837FB85, 0xC838FB85, 0xC839FB85, 0xC83AFB85, 0xC83BFB85, 0xC83CFB85, 0xC83DFB85, 0xC83EFB85, 0xC83FFB85, 0xC840FB85, 0xC841FB85, 0xC842FB85, 0xC843FB85, + 0xC844FB85, 0xC845FB85, 0xC846FB85, 0xC847FB85, 0xC848FB85, 0xC849FB85, 0xC84AFB85, 0xC84BFB85, 0xC84CFB85, 0xC84DFB85, 0xC84EFB85, 0xC84FFB85, 0xC850FB85, 0xC851FB85, 0xC852FB85, + 0xC853FB85, 0xC854FB85, 0xC855FB85, 0xC856FB85, 0xC857FB85, 0xC858FB85, 0xC859FB85, 0xC85AFB85, 0xC85BFB85, 0xC85CFB85, 0xC85DFB85, 0xC85EFB85, 0xC85FFB85, 0xC860FB85, 0xC861FB85, + 0xC862FB85, 0xC863FB85, 0xC864FB85, 0xC865FB85, 0xC866FB85, 0xC867FB85, 0xC868FB85, 0xC869FB85, 0xC86AFB85, 0xC86BFB85, 0xC86CFB85, 0xC86DFB85, 0xC86EFB85, 0xC86FFB85, 0xC870FB85, + 0xC871FB85, 0xC872FB85, 0xC873FB85, 0xC874FB85, 0xC875FB85, 0xC876FB85, 0xC877FB85, 0xC878FB85, 0xC879FB85, 0xC87AFB85, 0xC87BFB85, 0xC87CFB85, 0xC87DFB85, 0xC87EFB85, 0xC87FFB85, + 0xC880FB85, 0xC881FB85, 0xC882FB85, 0xC883FB85, 0xC884FB85, 0xC885FB85, 0xC886FB85, 0xC887FB85, 0xC888FB85, 0xC889FB85, 0xC88AFB85, 0xC88BFB85, 0xC88CFB85, 0xC88DFB85, 0xC88EFB85, + 0xC88FFB85, 0xC890FB85, 0xC891FB85, 0xC892FB85, 0xC893FB85, 0xC894FB85, 0xC895FB85, 0xC896FB85, 0xC897FB85, 0xC898FB85, 0xC899FB85, 0xC89AFB85, 0xC89BFB85, 0xC89CFB85, 0xC89DFB85, + 0xC89EFB85, 0xC89FFB85, 0xC8A0FB85, 0xC8A1FB85, 0xC8A2FB85, 0xC8A3FB85, 0xC8A4FB85, 0xC8A5FB85, 0xC8A6FB85, 0xC8A7FB85, 0xC8A8FB85, 0xC8A9FB85, 0xC8AAFB85, 0xC8ABFB85, 0xC8ACFB85, + 0xC8ADFB85, 0xC8AEFB85, 0xC8AFFB85, 0xC8B0FB85, 0xC8B1FB85, 0xC8B2FB85, 0xC8B3FB85, 0xC8B4FB85, 0xC8B5FB85, 0xC8B6FB85, 0xC8B7FB85, 0xC8B8FB85, 0xC8B9FB85, 0xC8BAFB85, 0xC8BBFB85, + 0xC8BCFB85, 0xC8BDFB85, 0xC8BEFB85, 0xC8BFFB85, 0xC8C0FB85, 0xC8C1FB85, 0xC8C2FB85, 0xC8C3FB85, 0xC8C4FB85, 0xC8C5FB85, 0xC8C6FB85, 0xC8C7FB85, 0xC8C8FB85, 0xC8C9FB85, 0xC8CAFB85, + 0xC8CBFB85, 0xC8CCFB85, 0xC8CDFB85, 0xC8CEFB85, 0xC8CFFB85, 0xC8D0FB85, 0xC8D1FB85, 0xC8D2FB85, 0xC8D3FB85, 0xC8D4FB85, 0xC8D5FB85, 0xC8D6FB85, 0xC8D7FB85, 0xC8D8FB85, 0xC8D9FB85, + 0xC8DAFB85, 0xC8DBFB85, 0xC8DCFB85, 0xC8DDFB85, 0xC8DEFB85, 0xC8DFFB85, 0xC8E0FB85, 0xC8E1FB85, 0xC8E2FB85, 0xC8E3FB85, 0xC8E4FB85, 0xC8E5FB85, 0xC8E6FB85, 0xC8E7FB85, 0xC8E8FB85, + 0xC8E9FB85, 0xC8EAFB85, 0xC8EBFB85, 0xC8ECFB85, 0xC8EDFB85, 0xC8EEFB85, 0xC8EFFB85, 0xC8F0FB85, 0xC8F1FB85, 0xC8F2FB85, 0xC8F3FB85, 0xC8F4FB85, 0xC8F5FB85, 0xC8F6FB85, 0xC8F7FB85, + 0xC8F8FB85, 0xC8F9FB85, 0xC8FAFB85, 0xC8FBFB85, 0xC8FCFB85, 0xC8FDFB85, 0xC8FEFB85, 0xC8FFFB85, 0xC900FB85, 0xC901FB85, 0xC902FB85, 0xC903FB85, 0xC904FB85, 0xC905FB85, 0xC906FB85, + 0xC907FB85, 0xC908FB85, 0xC909FB85, 0xC90AFB85, 0xC90BFB85, 0xC90CFB85, 0xC90DFB85, 0xC90EFB85, 0xC90FFB85, 0xC910FB85, 0xC911FB85, 0xC912FB85, 0xC913FB85, 0xC914FB85, 0xC915FB85, + 0xC916FB85, 0xC917FB85, 0xC918FB85, 0xC919FB85, 0xC91AFB85, 0xC91BFB85, 0xC91CFB85, 0xC91DFB85, 0xC91EFB85, 0xC91FFB85, 0xC920FB85, 0xC921FB85, 0xC922FB85, 0xC923FB85, 0xC924FB85, + 0xC925FB85, 0xC926FB85, 0xC927FB85, 0xC928FB85, 0xC929FB85, 0xC92AFB85, 0xC92BFB85, 0xC92CFB85, 0xC92DFB85, 0xC92EFB85, 0xC92FFB85, 0xC930FB85, 0xC931FB85, 0xC932FB85, 0xC933FB85, + 0xC934FB85, 0xC935FB85, 0xC936FB85, 0xC937FB85, 0xC938FB85, 0xC939FB85, 0xC93AFB85, 0xC93BFB85, 0xC93CFB85, 0xC93DFB85, 0xC93EFB85, 0xC93FFB85, 0xC940FB85, 0xC941FB85, 0xC942FB85, + 0xC943FB85, 0xC944FB85, 0xC945FB85, 0xC946FB85, 0xC947FB85, 0xC948FB85, 0xC949FB85, 0xC94AFB85, 0xC94BFB85, 0xC94CFB85, 0xC94DFB85, 0xC94EFB85, 0xC94FFB85, 0xC950FB85, 0xC951FB85, + 0xC952FB85, 0xC953FB85, 0xC954FB85, 0xC955FB85, 0xC956FB85, 0xC957FB85, 0xC958FB85, 0xC959FB85, 0xC95AFB85, 0xC95BFB85, 0xC95CFB85, 0xC95DFB85, 0xC95EFB85, 0xC95FFB85, 0xC960FB85, + 0xC961FB85, 0xC962FB85, 0xC963FB85, 0xC964FB85, 0xC965FB85, 0xC966FB85, 0xC967FB85, 0xC968FB85, 0xC969FB85, 0xC96AFB85, 0xC96BFB85, 0xC96CFB85, 0xC96DFB85, 0xC96EFB85, 0xC96FFB85, + 0xC970FB85, 0xC971FB85, 0xC972FB85, 0xC973FB85, 0xC974FB85, 0xC975FB85, 0xC976FB85, 0xC977FB85, 0xC978FB85, 0xC979FB85, 0xC97AFB85, 0xC97BFB85, 0xC97CFB85, 0xC97DFB85, 0xC97EFB85, + 0xC97FFB85, 0xC980FB85, 0xC981FB85, 0xC982FB85, 0xC983FB85, 0xC984FB85, 0xC985FB85, 0xC986FB85, 0xC987FB85, 0xC988FB85, 0xC989FB85, 0xC98AFB85, 0xC98BFB85, 0xC98CFB85, 0xC98DFB85, + 0xC98EFB85, 0xC98FFB85, 0xC990FB85, 0xC991FB85, 0xC992FB85, 0xC993FB85, 0xC994FB85, 0xC995FB85, 0xC996FB85, 0xC997FB85, 0xC998FB85, 0xC999FB85, 0xC99AFB85, 0xC99BFB85, 0xC99CFB85, + 0xC99DFB85, 0xC99EFB85, 0xC99FFB85, 0xC9A0FB85, 0xC9A1FB85, 0xC9A2FB85, 0xC9A3FB85, 0xC9A4FB85, 0xC9A5FB85, 0xC9A6FB85, 0xC9A7FB85, 0xC9A8FB85, 0xC9A9FB85, 0xC9AAFB85, 0xC9ABFB85, + 0xC9ACFB85, 0xC9ADFB85, 0xC9AEFB85, 0xC9AFFB85, 0xC9B0FB85, 0xC9B1FB85, 0xC9B2FB85, 0xC9B3FB85, 0xC9B4FB85, 0xC9B5FB85, 0xC9B6FB85, 0xC9B7FB85, 0xC9B8FB85, 0xC9B9FB85, 0xC9BAFB85, + 0xC9BBFB85, 0xC9BCFB85, 0xC9BDFB85, 0xC9BEFB85, 0xC9BFFB85, 0xC9C0FB85, 0xC9C1FB85, 0xC9C2FB85, 0xC9C3FB85, 0xC9C4FB85, 0xC9C5FB85, 0xC9C6FB85, 0xC9C7FB85, 0xC9C8FB85, 0xC9C9FB85, + 0xC9CAFB85, 0xC9CBFB85, 0xC9CCFB85, 0xC9CDFB85, 0xC9CEFB85, 0xC9CFFB85, 0xC9D0FB85, 0xC9D1FB85, 0xC9D2FB85, 0xC9D3FB85, 0xC9D4FB85, 0xC9D5FB85, 0xC9D6FB85, 0xC9D7FB85, 0xC9D8FB85, + 0xC9D9FB85, 0xC9DAFB85, 0xC9DBFB85, 0xC9DCFB85, 0xC9DDFB85, 0xC9DEFB85, 0xC9DFFB85, 0xC9E0FB85, 0xC9E1FB85, 0xC9E2FB85, 0xC9E3FB85, 0xC9E4FB85, 0xC9E5FB85, 0xC9E6FB85, 0xC9E7FB85, + 0xC9E8FB85, 0xC9E9FB85, 0xC9EAFB85, 0xC9EBFB85, 0xC9ECFB85, 0xC9EDFB85, 0xC9EEFB85, 0xC9EFFB85, 0xC9F0FB85, 0xC9F1FB85, 0xC9F2FB85, 0xC9F3FB85, 0xC9F4FB85, 0xC9F5FB85, 0xC9F6FB85, + 0xC9F7FB85, 0xC9F8FB85, 0xC9F9FB85, 0xC9FAFB85, 0xC9FBFB85, 0xC9FCFB85, 0xC9FDFB85, 0xC9FEFB85, 0xC9FFFB85, 0xCA00FB85, 0xCA01FB85, 0xCA02FB85, 0xCA03FB85, 0xCA04FB85, 0xCA05FB85, + 0xCA06FB85, 0xCA07FB85, 0xCA08FB85, 0xCA09FB85, 0xCA0AFB85, 0xCA0BFB85, 0xCA0CFB85, 0xCA0DFB85, 0xCA0EFB85, 0xCA0FFB85, 0xCA10FB85, 0xCA11FB85, 0xCA12FB85, 0xCA13FB85, 0xCA14FB85, + 0xCA15FB85, 0xCA16FB85, 0xCA17FB85, 0xCA18FB85, 0xCA19FB85, 0xCA1AFB85, 0xCA1BFB85, 0xCA1CFB85, 0xCA1DFB85, 0xCA1EFB85, 0xCA1FFB85, 0xCA20FB85, 0xCA21FB85, 0xCA22FB85, 0xCA23FB85, + 0xCA24FB85, 0xCA25FB85, 0xCA26FB85, 0xCA27FB85, 0xCA28FB85, 0xCA29FB85, 0xCA2AFB85, 0xCA2BFB85, 0xCA2CFB85, 0xCA2DFB85, 0xCA2EFB85, 0xCA2FFB85, 0xCA30FB85, 0xCA31FB85, 0xCA32FB85, + 0xCA33FB85, 0xCA34FB85, 0xCA35FB85, 0xCA36FB85, 0xCA37FB85, 0xCA38FB85, 0xCA39FB85, 0xCA3AFB85, 0xCA3BFB85, 0xCA3CFB85, 0xCA3DFB85, 0xCA3EFB85, 0xCA3FFB85, 0xCA40FB85, 0xCA41FB85, + 0xCA42FB85, 0xCA43FB85, 0xCA44FB85, 0xCA45FB85, 0xCA46FB85, 0xCA47FB85, 0xCA48FB85, 0xCA49FB85, 0xCA4AFB85, 0xCA4BFB85, 0xCA4CFB85, 0xCA4DFB85, 0xCA4EFB85, 0xCA4FFB85, 0xCA50FB85, + 0xCA51FB85, 0xCA52FB85, 0xCA53FB85, 0xCA54FB85, 0xCA55FB85, 0xCA56FB85, 0xCA57FB85, 0xCA58FB85, 0xCA59FB85, 0xCA5AFB85, 0xCA5BFB85, 0xCA5CFB85, 0xCA5DFB85, 0xCA5EFB85, 0xCA5FFB85, + 0xCA60FB85, 0xCA61FB85, 0xCA62FB85, 0xCA63FB85, 0xCA64FB85, 0xCA65FB85, 0xCA66FB85, 0xCA67FB85, 0xCA68FB85, 0xCA69FB85, 0xCA6AFB85, 0xCA6BFB85, 0xCA6CFB85, 0xCA6DFB85, 0xCA6EFB85, + 0xCA6FFB85, 0xCA70FB85, 0xCA71FB85, 0xCA72FB85, 0xCA73FB85, 0xCA74FB85, 0xCA75FB85, 0xCA76FB85, 0xCA77FB85, 0xCA78FB85, 0xCA79FB85, 0xCA7AFB85, 0xCA7BFB85, 0xCA7CFB85, 0xCA7DFB85, + 0xCA7EFB85, 0xCA7FFB85, 0xCA80FB85, 0xCA81FB85, 0xCA82FB85, 0xCA83FB85, 0xCA84FB85, 0xCA85FB85, 0xCA86FB85, 0xCA87FB85, 0xCA88FB85, 0xCA89FB85, 0xCA8AFB85, 0xCA8BFB85, 0xCA8CFB85, + 0xCA8DFB85, 0xCA8EFB85, 0xCA8FFB85, 0xCA90FB85, 0xCA91FB85, 0xCA92FB85, 0xCA93FB85, 0xCA94FB85, 0xCA95FB85, 0xCA96FB85, 0xCA97FB85, 0xCA98FB85, 0xCA99FB85, 0xCA9AFB85, 0xCA9BFB85, + 0xCA9CFB85, 0xCA9DFB85, 0xCA9EFB85, 0xCA9FFB85, 0xCAA0FB85, 0xCAA1FB85, 0xCAA2FB85, 0xCAA3FB85, 0xCAA4FB85, 0xCAA5FB85, 0xCAA6FB85, 0xCAA7FB85, 0xCAA8FB85, 0xCAA9FB85, 0xCAAAFB85, + 0xCAABFB85, 0xCAACFB85, 0xCAADFB85, 0xCAAEFB85, 0xCAAFFB85, 0xCAB0FB85, 0xCAB1FB85, 0xCAB2FB85, 0xCAB3FB85, 0xCAB4FB85, 0xCAB5FB85, 0xCAB6FB85, 0xCAB7FB85, 0xCAB8FB85, 0xCAB9FB85, + 0xCABAFB85, 0xCABBFB85, 0xCABCFB85, 0xCABDFB85, 0xCABEFB85, 0xCABFFB85, 0xCAC0FB85, 0xCAC1FB85, 0xCAC2FB85, 0xCAC3FB85, 0xCAC4FB85, 0xCAC5FB85, 0xCAC6FB85, 0xCAC7FB85, 0xCAC8FB85, + 0xCAC9FB85, 0xCACAFB85, 0xCACBFB85, 0xCACCFB85, 0xCACDFB85, 0xCACEFB85, 0xCACFFB85, 0xCAD0FB85, 0xCAD1FB85, 0xCAD2FB85, 0xCAD3FB85, 0xCAD4FB85, 0xCAD5FB85, 0xCAD6FB85, 0xCAD7FB85, + 0xCAD8FB85, 0xCAD9FB85, 0xCADAFB85, 0xCADBFB85, 0xCADCFB85, 0xCADDFB85, 0xCADEFB85, 0xCADFFB85, 0xCAE0FB85, 0xCAE1FB85, 0xCAE2FB85, 0xCAE3FB85, 0xCAE4FB85, 0xCAE5FB85, 0xCAE6FB85, + 0xCAE7FB85, 0xCAE8FB85, 0xCAE9FB85, 0xCAEAFB85, 0xCAEBFB85, 0xCAECFB85, 0xCAEDFB85, 0xCAEEFB85, 0xCAEFFB85, 0xCAF0FB85, 0xCAF1FB85, 0xCAF2FB85, 0xCAF3FB85, 0xCAF4FB85, 0xCAF5FB85, + 0xCAF6FB85, 0xCAF7FB85, 0xCAF8FB85, 0xCAF9FB85, 0xCAFAFB85, 0xCAFBFB85, 0xCAFCFB85, 0xCAFDFB85, 0xCAFEFB85, 0xCAFFFB85, 0xCB00FB85, 0xCB01FB85, 0xCB02FB85, 0xCB03FB85, 0xCB04FB85, + 0xCB05FB85, 0xCB06FB85, 0xCB07FB85, 0xCB08FB85, 0xCB09FB85, 0xCB0AFB85, 0xCB0BFB85, 0xCB0CFB85, 0xCB0DFB85, 0xCB0EFB85, 0xCB0FFB85, 0xCB10FB85, 0xCB11FB85, 0xCB12FB85, 0xCB13FB85, + 0xCB14FB85, 0xCB15FB85, 0xCB16FB85, 0xCB17FB85, 0xCB18FB85, 0xCB19FB85, 0xCB1AFB85, 0xCB1BFB85, 0xCB1CFB85, 0xCB1DFB85, 0xCB1EFB85, 0xCB1FFB85, 0xCB20FB85, 0xCB21FB85, 0xCB22FB85, + 0xCB23FB85, 0xCB24FB85, 0xCB25FB85, 0xCB26FB85, 0xCB27FB85, 0xCB28FB85, 0xCB29FB85, 0xCB2AFB85, 0xCB2BFB85, 0xCB2CFB85, 0xCB2DFB85, 0xCB2EFB85, 0xCB2FFB85, 0xCB30FB85, 0xCB31FB85, + 0xCB32FB85, 0xCB33FB85, 0xCB34FB85, 0xCB35FB85, 0xCB36FB85, 0xCB37FB85, 0xCB38FB85, 0xCB39FB85, 0xCB3AFB85, 0xCB3BFB85, 0xCB3CFB85, 0xCB3DFB85, 0xCB3EFB85, 0xCB3FFB85, 0xCB40FB85, + 0xCB41FB85, 0xCB42FB85, 0xCB43FB85, 0xCB44FB85, 0xCB45FB85, 0xCB46FB85, 0xCB47FB85, 0xCB48FB85, 0xCB49FB85, 0xCB4AFB85, 0xCB4BFB85, 0xCB4CFB85, 0xCB4DFB85, 0xCB4EFB85, 0xCB4FFB85, + 0xCB50FB85, 0xCB51FB85, 0xCB52FB85, 0xCB53FB85, 0xCB54FB85, 0xCB55FB85, 0xCB56FB85, 0xCB57FB85, 0xCB58FB85, 0xCB59FB85, 0xCB5AFB85, 0xCB5BFB85, 0xCB5CFB85, 0xCB5DFB85, 0xCB5EFB85, + 0xCB5FFB85, 0xCB60FB85, 0xCB61FB85, 0xCB62FB85, 0xCB63FB85, 0xCB64FB85, 0xCB65FB85, 0xCB66FB85, 0xCB67FB85, 0xCB68FB85, 0xCB69FB85, 0xCB6AFB85, 0xCB6BFB85, 0xCB6CFB85, 0xCB6DFB85, + 0xCB6EFB85, 0xCB6FFB85, 0xCB70FB85, 0xCB71FB85, 0xCB72FB85, 0xCB73FB85, 0xCB74FB85, 0xCB75FB85, 0xCB76FB85, 0xCB77FB85, 0xCB78FB85, 0xCB79FB85, 0xCB7AFB85, 0xCB7BFB85, 0xCB7CFB85, + 0xCB7DFB85, 0xCB7EFB85, 0xCB7FFB85, 0xCB80FB85, 0xCB81FB85, 0xCB82FB85, 0xCB83FB85, 0xCB84FB85, 0xCB85FB85, 0xCB86FB85, 0xCB87FB85, 0xCB88FB85, 0xCB89FB85, 0xCB8AFB85, 0xCB8BFB85, + 0xCB8CFB85, 0xCB8DFB85, 0xCB8EFB85, 0xCB8FFB85, 0xCB90FB85, 0xCB91FB85, 0xCB92FB85, 0xCB93FB85, 0xCB94FB85, 0xCB95FB85, 0xCB96FB85, 0xCB97FB85, 0xCB98FB85, 0xCB99FB85, 0xCB9AFB85, + 0xCB9BFB85, 0xCB9CFB85, 0xCB9DFB85, 0xCB9EFB85, 0xCB9FFB85, 0xCBA0FB85, 0xCBA1FB85, 0xCBA2FB85, 0xCBA3FB85, 0xCBA4FB85, 0xCBA5FB85, 0xCBA6FB85, 0xCBA7FB85, 0xCBA8FB85, 0xCBA9FB85, + 0xCBAAFB85, 0xCBABFB85, 0xCBACFB85, 0xCBADFB85, 0xCBAEFB85, 0xCBAFFB85, 0xCBB0FB85, 0xCBB1FB85, 0xCBB2FB85, 0xCBB3FB85, 0xCBB4FB85, 0xCBB5FB85, 0xCBB6FB85, 0xCBB7FB85, 0xCBB8FB85, + 0xCBB9FB85, 0xCBBAFB85, 0xCBBBFB85, 0xCBBCFB85, 0xCBBDFB85, 0xCBBEFB85, 0xCBBFFB85, 0xCBC0FB85, 0xCBC1FB85, 0xCBC2FB85, 0xCBC3FB85, 0xCBC4FB85, 0xCBC5FB85, 0xCBC6FB85, 0xCBC7FB85, + 0xCBC8FB85, 0xCBC9FB85, 0xCBCAFB85, 0xCBCBFB85, 0xCBCCFB85, 0xCBCDFB85, 0xCBCEFB85, 0xCBCFFB85, 0xCBD0FB85, 0xCBD1FB85, 0xCBD2FB85, 0xCBD3FB85, 0xCBD4FB85, 0xCBD5FB85, 0xCBD6FB85, + 0xCBD7FB85, 0xCBD8FB85, 0xCBD9FB85, 0xCBDAFB85, 0xCBDBFB85, 0xCBDCFB85, 0xCBDDFB85, 0xCBDEFB85, 0xCBDFFB85, 0xCBE0FB85, 0xCBE1FB85, 0xCBE2FB85, 0xCBE3FB85, 0xCBE4FB85, 0xCBE5FB85, + 0xCBE6FB85, 0xCBE7FB85, 0xCBE8FB85, 0xCBE9FB85, 0xCBEAFB85, 0xCBEBFB85, 0xCBECFB85, 0xCBEDFB85, 0xCBEEFB85, 0xCBEFFB85, 0xCBF0FB85, 0xCBF1FB85, 0xCBF2FB85, 0xCBF3FB85, 0xCBF4FB85, + 0xCBF5FB85, 0xCBF6FB85, 0xCBF7FB85, 0xCBF8FB85, 0xCBF9FB85, 0xCBFAFB85, 0xCBFBFB85, 0xCBFCFB85, 0xCBFDFB85, 0xCBFEFB85, 0xCBFFFB85, 0xCC00FB85, 0xCC01FB85, 0xCC02FB85, 0xCC03FB85, + 0xCC04FB85, 0xCC05FB85, 0xCC06FB85, 0xCC07FB85, 0xCC08FB85, 0xCC09FB85, 0xCC0AFB85, 0xCC0BFB85, 0xCC0CFB85, 0xCC0DFB85, 0xCC0EFB85, 0xCC0FFB85, 0xCC10FB85, 0xCC11FB85, 0xCC12FB85, + 0xCC13FB85, 0xCC14FB85, 0xCC15FB85, 0xCC16FB85, 0xCC17FB85, 0xCC18FB85, 0xCC19FB85, 0xCC1AFB85, 0xCC1BFB85, 0xCC1CFB85, 0xCC1DFB85, 0xCC1EFB85, 0xCC1FFB85, 0xCC20FB85, 0xCC21FB85, + 0xCC22FB85, 0xCC23FB85, 0xCC24FB85, 0xCC25FB85, 0xCC26FB85, 0xCC27FB85, 0xCC28FB85, 0xCC29FB85, 0xCC2AFB85, 0xCC2BFB85, 0xCC2CFB85, 0xCC2DFB85, 0xCC2EFB85, 0xCC2FFB85, 0xCC30FB85, + 0xCC31FB85, 0xCC32FB85, 0xCC33FB85, 0xCC34FB85, 0xCC35FB85, 0xCC36FB85, 0xCC37FB85, 0xCC38FB85, 0xCC39FB85, 0xCC3AFB85, 0xCC3BFB85, 0xCC3CFB85, 0xCC3DFB85, 0xCC3EFB85, 0xCC3FFB85, + 0xCC40FB85, 0xCC41FB85, 0xCC42FB85, 0xCC43FB85, 0xCC44FB85, 0xCC45FB85, 0xCC46FB85, 0xCC47FB85, 0xCC48FB85, 0xCC49FB85, 0xCC4AFB85, 0xCC4BFB85, 0xCC4CFB85, 0xCC4DFB85, 0xCC4EFB85, + 0xCC4FFB85, 0xCC50FB85, 0xCC51FB85, 0xCC52FB85, 0xCC53FB85, 0xCC54FB85, 0xCC55FB85, 0xCC56FB85, 0xCC57FB85, 0xCC58FB85, 0xCC59FB85, 0xCC5AFB85, 0xCC5BFB85, 0xCC5CFB85, 0xCC5DFB85, + 0xCC5EFB85, 0xCC5FFB85, 0xCC60FB85, 0xCC61FB85, 0xCC62FB85, 0xCC63FB85, 0xCC64FB85, 0xCC65FB85, 0xCC66FB85, 0xCC67FB85, 0xCC68FB85, 0xCC69FB85, 0xCC6AFB85, 0xCC6BFB85, 0xCC6CFB85, + 0xCC6DFB85, 0xCC6EFB85, 0xCC6FFB85, 0xCC70FB85, 0xCC71FB85, 0xCC72FB85, 0xCC73FB85, 0xCC74FB85, 0xCC75FB85, 0xCC76FB85, 0xCC77FB85, 0xCC78FB85, 0xCC79FB85, 0xCC7AFB85, 0xCC7BFB85, + 0xCC7CFB85, 0xCC7DFB85, 0xCC7EFB85, 0xCC7FFB85, 0xCC80FB85, 0xCC81FB85, 0xCC82FB85, 0xCC83FB85, 0xCC84FB85, 0xCC85FB85, 0xCC86FB85, 0xCC87FB85, 0xCC88FB85, 0xCC89FB85, 0xCC8AFB85, + 0xCC8BFB85, 0xCC8CFB85, 0xCC8DFB85, 0xCC8EFB85, 0xCC8FFB85, 0xCC90FB85, 0xCC91FB85, 0xCC92FB85, 0xCC93FB85, 0xCC94FB85, 0xCC95FB85, 0xCC96FB85, 0xCC97FB85, 0xCC98FB85, 0xCC99FB85, + 0xCC9AFB85, 0xCC9BFB85, 0xCC9CFB85, 0xCC9DFB85, 0xCC9EFB85, 0xCC9FFB85, 0xCCA0FB85, 0xCCA1FB85, 0xCCA2FB85, 0xCCA3FB85, 0xCCA4FB85, 0xCCA5FB85, 0xCCA6FB85, 0xCCA7FB85, 0xCCA8FB85, + 0xCCA9FB85, 0xCCAAFB85, 0xCCABFB85, 0xCCACFB85, 0xCCADFB85, 0xCCAEFB85, 0xCCAFFB85, 0xCCB0FB85, 0xCCB1FB85, 0xCCB2FB85, 0xCCB3FB85, 0xCCB4FB85, 0xCCB5FB85, 0xCCB6FB85, 0xCCB7FB85, + 0xCCB8FB85, 0xCCB9FB85, 0xCCBAFB85, 0xCCBBFB85, 0xCCBCFB85, 0xCCBDFB85, 0xCCBEFB85, 0xCCBFFB85, 0xCCC0FB85, 0xCCC1FB85, 0xCCC2FB85, 0xCCC3FB85, 0xCCC4FB85, 0xCCC5FB85, 0xCCC6FB85, + 0xCCC7FB85, 0xCCC8FB85, 0xCCC9FB85, 0xCCCAFB85, 0xCCCBFB85, 0xCCCCFB85, 0xCCCDFB85, 0xCCCEFB85, 0xCCCFFB85, 0xCCD0FB85, 0xCCD1FB85, 0xCCD2FB85, 0xCCD3FB85, 0xCCD4FB85, 0xCCD5FB85, + 0xCCD6FB85, 0xCCD7FB85, 0xCCD8FB85, 0xCCD9FB85, 0xCCDAFB85, 0xCCDBFB85, 0xCCDCFB85, 0xCCDDFB85, 0xCCDEFB85, 0xCCDFFB85, 0xCCE0FB85, 0xCCE1FB85, 0xCCE2FB85, 0xCCE3FB85, 0xCCE4FB85, + 0xCCE5FB85, 0xCCE6FB85, 0xCCE7FB85, 0xCCE8FB85, 0xCCE9FB85, 0xCCEAFB85, 0xCCEBFB85, 0xCCECFB85, 0xCCEDFB85, 0xCCEEFB85, 0xCCEFFB85, 0xCCF0FB85, 0xCCF1FB85, 0xCCF2FB85, 0xCCF3FB85, + 0xCCF4FB85, 0xCCF5FB85, 0xCCF6FB85, 0xCCF7FB85, 0xCCF8FB85, 0xCCF9FB85, 0xCCFAFB85, 0xCCFBFB85, 0xCCFCFB85, 0xCCFDFB85, 0xCCFEFB85, 0xCCFFFB85, 0xCD00FB85, 0xCD01FB85, 0xCD02FB85, + 0xCD03FB85, 0xCD04FB85, 0xCD05FB85, 0xCD06FB85, 0xCD07FB85, 0xCD08FB85, 0xCD09FB85, 0xCD0AFB85, 0xCD0BFB85, 0xCD0CFB85, 0xCD0DFB85, 0xCD0EFB85, 0xCD0FFB85, 0xCD10FB85, 0xCD11FB85, + 0xCD12FB85, 0xCD13FB85, 0xCD14FB85, 0xCD15FB85, 0xCD16FB85, 0xCD17FB85, 0xCD18FB85, 0xCD19FB85, 0xCD1AFB85, 0xCD1BFB85, 0xCD1CFB85, 0xCD1DFB85, 0xCD1EFB85, 0xCD1FFB85, 0xCD20FB85, + 0xCD21FB85, 0xCD22FB85, 0xCD23FB85, 0xCD24FB85, 0xCD25FB85, 0xCD26FB85, 0xCD27FB85, 0xCD28FB85, 0xCD29FB85, 0xCD2AFB85, 0xCD2BFB85, 0xCD2CFB85, 0xCD2DFB85, 0xCD2EFB85, 0xCD2FFB85, + 0xCD30FB85, 0xCD31FB85, 0xCD32FB85, 0xCD33FB85, 0xCD34FB85, 0xCD35FB85, 0xCD36FB85, 0xCD37FB85, 0xCD38FB85, 0xCD39FB85, 0xCD3AFB85, 0xCD3BFB85, 0xCD3CFB85, 0xCD3DFB85, 0xCD3EFB85, + 0xCD3FFB85, 0xCD40FB85, 0xCD41FB85, 0xCD42FB85, 0xCD43FB85, 0xCD44FB85, 0xCD45FB85, 0xCD46FB85, 0xCD47FB85, 0xCD48FB85, 0xCD49FB85, 0xCD4AFB85, 0xCD4BFB85, 0xCD4CFB85, 0xCD4DFB85, + 0xCD4EFB85, 0xCD4FFB85, 0xCD50FB85, 0xCD51FB85, 0xCD52FB85, 0xCD53FB85, 0xCD54FB85, 0xCD55FB85, 0xCD56FB85, 0xCD57FB85, 0xCD58FB85, 0xCD59FB85, 0xCD5AFB85, 0xCD5BFB85, 0xCD5CFB85, + 0xCD5DFB85, 0xCD5EFB85, 0xCD5FFB85, 0xCD60FB85, 0xCD61FB85, 0xCD62FB85, 0xCD63FB85, 0xCD64FB85, 0xCD65FB85, 0xCD66FB85, 0xCD67FB85, 0xCD68FB85, 0xCD69FB85, 0xCD6AFB85, 0xCD6BFB85, + 0xCD6CFB85, 0xCD6DFB85, 0xCD6EFB85, 0xCD6FFB85, 0xCD70FB85, 0xCD71FB85, 0xCD72FB85, 0xCD73FB85, 0xCD74FB85, 0xCD75FB85, 0xCD76FB85, 0xCD77FB85, 0xCD78FB85, 0xCD79FB85, 0xCD7AFB85, + 0xCD7BFB85, 0xCD7CFB85, 0xCD7DFB85, 0xCD7EFB85, 0xCD7FFB85, 0xCD80FB85, 0xCD81FB85, 0xCD82FB85, 0xCD83FB85, 0xCD84FB85, 0xCD85FB85, 0xCD86FB85, 0xCD87FB85, 0xCD88FB85, 0xCD89FB85, + 0xCD8AFB85, 0xCD8BFB85, 0xCD8CFB85, 0xCD8DFB85, 0xCD8EFB85, 0xCD8FFB85, 0xCD90FB85, 0xCD91FB85, 0xCD92FB85, 0xCD93FB85, 0xCD94FB85, 0xCD95FB85, 0xCD96FB85, 0xCD97FB85, 0xCD98FB85, + 0xCD99FB85, 0xCD9AFB85, 0xCD9BFB85, 0xCD9CFB85, 0xCD9DFB85, 0xCD9EFB85, 0xCD9FFB85, 0xCDA0FB85, 0xCDA1FB85, 0xCDA2FB85, 0xCDA3FB85, 0xCDA4FB85, 0xCDA5FB85, 0xCDA6FB85, 0xCDA7FB85, + 0xCDA8FB85, 0xCDA9FB85, 0xCDAAFB85, 0xCDABFB85, 0xCDACFB85, 0xCDADFB85, 0xCDAEFB85, 0xCDAFFB85, 0xCDB0FB85, 0xCDB1FB85, 0xCDB2FB85, 0xCDB3FB85, 0xCDB4FB85, 0xCDB5FB85, 0xCDB6FB85, + 0xCDB7FB85, 0xCDB8FB85, 0xCDB9FB85, 0xCDBAFB85, 0xCDBBFB85, 0xCDBCFB85, 0xCDBDFB85, 0xCDBEFB85, 0xCDBFFB85, 0xCDC0FB85, 0xCDC1FB85, 0xCDC2FB85, 0xCDC3FB85, 0xCDC4FB85, 0xCDC5FB85, + 0xCDC6FB85, 0xCDC7FB85, 0xCDC8FB85, 0xCDC9FB85, 0xCDCAFB85, 0xCDCBFB85, 0xCDCCFB85, 0xCDCDFB85, 0xCDCEFB85, 0xCDCFFB85, 0xCDD0FB85, 0xCDD1FB85, 0xCDD2FB85, 0xCDD3FB85, 0xCDD4FB85, + 0xCDD5FB85, 0xCDD6FB85, 0xCDD7FB85, 0xCDD8FB85, 0xCDD9FB85, 0xCDDAFB85, 0xCDDBFB85, 0xCDDCFB85, 0xCDDDFB85, 0xCDDEFB85, 0xCDDFFB85, 0xCDE0FB85, 0xCDE1FB85, 0xCDE2FB85, 0xCDE3FB85, + 0xCDE4FB85, 0xCDE5FB85, 0xCDE6FB85, 0xCDE7FB85, 0xCDE8FB85, 0xCDE9FB85, 0xCDEAFB85, 0xCDEBFB85, 0xCDECFB85, 0xCDEDFB85, 0xCDEEFB85, 0xCDEFFB85, 0xCDF0FB85, 0xCDF1FB85, 0xCDF2FB85, + 0xCDF3FB85, 0xCDF4FB85, 0xCDF5FB85, 0xCDF6FB85, 0xCDF7FB85, 0xCDF8FB85, 0xCDF9FB85, 0xCDFAFB85, 0xCDFBFB85, 0xCDFCFB85, 0xCDFDFB85, 0xCDFEFB85, 0xCDFFFB85, 0xCE00FB85, 0xCE01FB85, + 0xCE02FB85, 0xCE03FB85, 0xCE04FB85, 0xCE05FB85, 0xCE06FB85, 0xCE07FB85, 0xCE08FB85, 0xCE09FB85, 0xCE0AFB85, 0xCE0BFB85, 0xCE0CFB85, 0xCE0DFB85, 0xCE0EFB85, 0xCE0FFB85, 0xCE10FB85, + 0xCE11FB85, 0xCE12FB85, 0xCE13FB85, 0xCE14FB85, 0xCE15FB85, 0xCE16FB85, 0xCE17FB85, 0xCE18FB85, 0xCE19FB85, 0xCE1AFB85, 0xCE1BFB85, 0xCE1CFB85, 0xCE1DFB85, 0xCE1EFB85, 0xCE1FFB85, + 0xCE20FB85, 0xCE21FB85, 0xCE22FB85, 0xCE23FB85, 0xCE24FB85, 0xCE25FB85, 0xCE26FB85, 0xCE27FB85, 0xCE28FB85, 0xCE29FB85, 0xCE2AFB85, 0xCE2BFB85, 0xCE2CFB85, 0xCE2DFB85, 0xCE2EFB85, + 0xCE2FFB85, 0xCE30FB85, 0xCE31FB85, 0xCE32FB85, 0xCE33FB85, 0xCE34FB85, 0xCE35FB85, 0xCE36FB85, 0xCE37FB85, 0xCE38FB85, 0xCE39FB85, 0xCE3AFB85, 0xCE3BFB85, 0xCE3CFB85, 0xCE3DFB85, + 0xCE3EFB85, 0xCE3FFB85, 0xCE40FB85, 0xCE41FB85, 0xCE42FB85, 0xCE43FB85, 0xCE44FB85, 0xCE45FB85, 0xCE46FB85, 0xCE47FB85, 0xCE48FB85, 0xCE49FB85, 0xCE4AFB85, 0xCE4BFB85, 0xCE4CFB85, + 0xCE4DFB85, 0xCE4EFB85, 0xCE4FFB85, 0xCE50FB85, 0xCE51FB85, 0xCE52FB85, 0xCE53FB85, 0xCE54FB85, 0xCE55FB85, 0xCE56FB85, 0xCE57FB85, 0xCE58FB85, 0xCE59FB85, 0xCE5AFB85, 0xCE5BFB85, + 0xCE5CFB85, 0xCE5DFB85, 0xCE5EFB85, 0xCE5FFB85, 0xCE60FB85, 0xCE61FB85, 0xCE62FB85, 0xCE63FB85, 0xCE64FB85, 0xCE65FB85, 0xCE66FB85, 0xCE67FB85, 0xCE68FB85, 0xCE69FB85, 0xCE6AFB85, + 0xCE6BFB85, 0xCE6CFB85, 0xCE6DFB85, 0xCE6EFB85, 0xCE6FFB85, 0xCE70FB85, 0xCE71FB85, 0xCE72FB85, 0xCE73FB85, 0xCE74FB85, 0xCE75FB85, 0xCE76FB85, 0xCE77FB85, 0xCE78FB85, 0xCE79FB85, + 0xCE7AFB85, 0xCE7BFB85, 0xCE7CFB85, 0xCE7DFB85, 0xCE7EFB85, 0xCE7FFB85, 0xCE80FB85, 0xCE81FB85, 0xCE82FB85, 0xCE83FB85, 0xCE84FB85, 0xCE85FB85, 0xCE86FB85, 0xCE87FB85, 0xCE88FB85, + 0xCE89FB85, 0xCE8AFB85, 0xCE8BFB85, 0xCE8CFB85, 0xCE8DFB85, 0xCE8EFB85, 0xCE8FFB85, 0xCE90FB85, 0xCE91FB85, 0xCE92FB85, 0xCE93FB85, 0xCE94FB85, 0xCE95FB85, 0xCE96FB85, 0xCE97FB85, + 0xCE98FB85, 0xCE99FB85, 0xCE9AFB85, 0xCE9BFB85, 0xCE9CFB85, 0xCE9DFB85, 0xCE9EFB85, 0xCE9FFB85, 0xCEA0FB85, +]; diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs new file mode 100644 index 00000000000..007c8ae4b03 --- /dev/null +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs @@ -0,0 +1,114 @@ +// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. + +mod data_0400; +mod data_0900; + +use std::{fmt::Debug, marker::PhantomData}; + +use super::*; + +/// Collator for `utf8mb4_unicode_ci` collation with padding behavior (trims +/// right spaces). +pub type CollatorUtf8Mb4UnicodeCi = CollatorUca; + +/// Collator for `utf8mb4_0900_ai-ci` collation without padding +pub type CollatorUtf8Mb40900AiCi = CollatorUca; + +pub trait UnicodeVersion: 'static + Send + Sync + Debug { + fn preprocess(s: &str) -> &str; + + fn char_weight(ch: char) -> u128; +} + +#[derive(Debug)] +pub struct CollatorUca { + _impl: PhantomData, +} + +impl Collator for CollatorUca { + type Charset = CharsetUtf8mb4; + type Weight = u128; + + const IS_CASE_INSENSITIVE: bool = true; + + #[inline] + fn char_weight(ch: char) -> Self::Weight { + T::char_weight(ch) + } + + #[inline] + fn write_sort_key(writer: &mut W, bstr: &[u8]) -> Result { + let s = T::preprocess(str::from_utf8(bstr)?); + + let mut n = 0; + for ch in s.chars() { + let mut weight = Self::char_weight(ch); + while weight != 0 { + writer.write_u16_be((weight & 0xFFFF) as u16)?; + n += 1; + weight >>= 16 + } + } + Ok(n * std::mem::size_of::()) + } + + #[inline] + fn sort_compare(a: &[u8], b: &[u8]) -> Result { + let mut ca = T::preprocess(str::from_utf8(a)?).chars(); + let mut cb = T::preprocess(str::from_utf8(b)?).chars(); + let mut an = 0; + let mut bn = 0; + + loop { + if an == 0 { + for ach in &mut ca { + an = Self::char_weight(ach); + if an != 0 { + break; + } + } + } + + if bn == 0 { + for bch in &mut cb { + bn = Self::char_weight(bch); + if bn != 0 { + break; + } + } + } + + if an == 0 || bn == 0 { + return Ok(an.cmp(&bn)); + } + + if an == bn { + an = 0; + bn = 0; + continue; + } + + while an != 0 && bn != 0 { + if (an ^ bn) & 0xFFFF == 0 { + an >>= 16; + bn >>= 16; + } else { + return Ok((an & 0xFFFF).cmp(&(bn & 0xFFFF))); + } + } + } + } + + #[inline] + fn sort_hash(state: &mut H, bstr: &[u8]) -> Result<()> { + let s = T::preprocess(str::from_utf8(bstr)?); + for ch in s.chars() { + let mut weight = Self::char_weight(ch); + while weight != 0 { + (weight & 0xFFFF).hash(state); + weight >>= 16; + } + } + Ok(()) + } +} diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 9fbef4f1ee2..f1fea754e87 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -32,6 +32,7 @@ macro_rules! match_template_collator { Utf8Mb4BinNoPadding => CollatorUtf8Mb4BinNoPadding, Utf8Mb4GeneralCi => CollatorUtf8Mb4GeneralCi, Utf8Mb4UnicodeCi => CollatorUtf8Mb4UnicodeCi, + Utf8Mb40900AiCi => CollatorUtf8Mb40900AiCi, Latin1Bin => CollatorLatin1Bin, GbkBin => CollatorGbkBin, GbkChineseCi => CollatorGbkChineseCi, diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index e8debe626f7..bbaa1e42737 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -110,6 +110,7 @@ pub enum Collation { Utf8Mb4BinNoPadding = 46, Utf8Mb4GeneralCi = -45, Utf8Mb4UnicodeCi = -224, + Utf8Mb40900AiCi = -255, Latin1Bin = -47, GbkBin = -87, GbkChineseCi = -28, @@ -130,6 +131,7 @@ impl Collation { -224 | -192 => Ok(Collation::Utf8Mb4UnicodeCi), -87 => Ok(Collation::GbkBin), -28 => Ok(Collation::GbkChineseCi), + -255 => Ok(Collation::Utf8Mb40900AiCi), n if n >= 0 => Ok(Collation::Utf8Mb4BinNoPadding), n => Err(DataTypeError::UnsupportedCollation { code: n }), } @@ -530,7 +532,7 @@ mod tests { (83, Some(Collation::Utf8Mb4BinNoPadding)), (-83, Some(Collation::Utf8Mb4Bin)), (255, Some(Collation::Utf8Mb4BinNoPadding)), - (-255, None), + (-255, Some(Collation::Utf8Mb40900AiCi)), (i32::MAX, Some(Collation::Utf8Mb4BinNoPadding)), (i32::MIN, None), (-192, Some(Collation::Utf8Mb4UnicodeCi)), diff --git a/components/tidb_query_expr/src/impl_compare.rs b/components/tidb_query_expr/src/impl_compare.rs index 3eae996f249..4c2cbb58dfc 100644 --- a/components/tidb_query_expr/src/impl_compare.rs +++ b/components/tidb_query_expr/src/impl_compare.rs @@ -979,6 +979,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Less, ], ), ( @@ -990,6 +991,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -1001,6 +1003,7 @@ mod tests { Ordering::Greater, Ordering::Equal, Ordering::Equal, + Ordering::Equal, ], ), ( @@ -1012,6 +1015,7 @@ mod tests { Ordering::Greater, Ordering::Equal, Ordering::Equal, + Ordering::Less, ], ), ( @@ -1023,6 +1027,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Less, ], ), ( @@ -1034,6 +1039,7 @@ mod tests { Ordering::Greater, Ordering::Equal, Ordering::Equal, + Ordering::Equal, ], ), ( @@ -1045,6 +1051,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -1056,6 +1063,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -1067,6 +1075,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -1078,6 +1087,7 @@ mod tests { Ordering::Less, Ordering::Equal, Ordering::Equal, + Ordering::Equal, ], ), ( @@ -1089,6 +1099,7 @@ mod tests { Ordering::Greater, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -1100,6 +1111,7 @@ mod tests { Ordering::Greater, Ordering::Equal, Ordering::Equal, + Ordering::Greater, ], ), ( @@ -1111,6 +1123,7 @@ mod tests { Ordering::Greater, Ordering::Less, Ordering::Equal, + Ordering::Equal, ], ), ]; @@ -1120,6 +1133,7 @@ mod tests { (Collation::Utf8Mb4Bin, 2), (Collation::Utf8Mb4GeneralCi, 3), (Collation::Utf8Mb4UnicodeCi, 4), + (Collation::Utf8Mb40900AiCi, 5), ]; for (str_a, str_b, ordering_in_collations) in cases { diff --git a/components/tidb_query_expr/src/impl_like.rs b/components/tidb_query_expr/src/impl_like.rs index 2fe99017fe0..bd00c1d888b 100644 --- a/components/tidb_query_expr/src/impl_like.rs +++ b/components/tidb_query_expr/src/impl_like.rs @@ -199,6 +199,7 @@ mod tests { Collation::Utf8Mb4GeneralCi, Some(0), ), + (r#"Ⱕ"#, r#"ⱕ"#, '\\', Collation::Utf8Mb40900AiCi, Some(1)), ]; for (target, pattern, escape, collation, expected) in cases { let output = RpnFnScalarEvaluator::new() From d23f762f930c9666ce0ef215d5fa68d6c7cfff0d Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 17 Aug 2023 16:11:01 +0800 Subject: [PATCH 0852/1149] raftstore & raftstore-v2: bugfix when detecting hung jitters on disk-io. (#15291) ref tikv/tikv#15070, close tikv/tikv#15268 Make a bugfix on the detection strategy on slowness when there exists io-hang exceptions. Signed-off-by: lucasliang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/worker/pd/store.rs | 6 +++++- .../raftstore-v2/tests/failpoints/test_pd_heartbeat.rs | 5 ++++- components/raftstore/src/store/worker/pd.rs | 6 +++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index f6aaa7f6ab0..a5aad42d85c 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -414,7 +414,11 @@ where let now = UnixSecs::now(); let interval_second = now.into_inner() - self.store_stat.last_report_ts.into_inner(); let store_heartbeat_interval = std::cmp::max(self.store_heartbeat_interval.as_secs(), 1); - (interval_second >= store_heartbeat_interval) + // Only if the `last_report_ts`, that is, the last timestamp of + // store_heartbeat, exceeds the interval of store heartbaet but less than + // the given limitation, will it trigger a report of fake heartbeat to + // make the statistics of slowness percepted by PD timely. + (interval_second > store_heartbeat_interval) && (interval_second <= STORE_HEARTBEAT_DELAY_LIMIT) && (interval_second % store_heartbeat_interval == 0) } diff --git a/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs b/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs index b4faa3a8f13..f175e3cd5c9 100644 --- a/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs +++ b/components/raftstore-v2/tests/failpoints/test_pd_heartbeat.rs @@ -31,15 +31,18 @@ fn test_fake_store_heartbeat() { } // Inject failpoints to trigger reporting fake store heartbeat to pd. fail::cfg("mock_slowness_last_tick_unfinished", "return(0)").unwrap(); - std::thread::sleep(std::time::Duration::from_millis(50)); + std::thread::sleep(std::time::Duration::from_secs(1)); let after_stats = block_on(cluster.node(0).pd_client().get_store_stats_async(store_id)).unwrap(); assert_ne!(after_stats.get_capacity(), 0); assert_ne!(after_stats.get_used_size(), 0); assert_eq!(after_stats.get_keys_written(), 0); if after_stats.get_start_time() == 0 { + // It means that current store_heartbeat is timeout, and triggers a fake + // heartbeat. assert!(after_stats.get_is_busy()); } else { + // Normal. assert!(!after_stats.get_is_busy()); } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 8ae8d7fc5ed..c73346505f4 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1977,7 +1977,11 @@ where fn is_store_heartbeat_delayed(&self) -> bool { let now = UnixSecs::now(); let interval_second = now.into_inner() - self.store_stat.last_report_ts.into_inner(); - (interval_second >= self.store_heartbeat_interval.as_secs()) + // Only if the `last_report_ts`, that is, the last timestamp of + // store_heartbeat, exceeds the interval of store heartbaet but less than + // the given limitation, will it trigger a report of fake heartbeat to + // make the statistics of slowness percepted by PD timely. + (interval_second > self.store_heartbeat_interval.as_secs()) && (interval_second <= STORE_HEARTBEAT_DELAY_LIMIT) } From f5b30021f1f54d0a0d5a2d30972b2be712afe65d Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 17 Aug 2023 02:23:01 -0700 Subject: [PATCH 0853/1149] raftstore: online unsafe recovery aborts on timeout (#15283) close tikv/tikv#15346 Make online unsafe recovery abort on timeout Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/operation/unsafe_recovery/create.rs | 3 +- .../src/operation/unsafe_recovery/demote.rs | 3 +- .../src/operation/unsafe_recovery/destroy.rs | 3 +- .../src/operation/unsafe_recovery/report.rs | 13 +-- components/raftstore/src/store/fsm/peer.rs | 34 +++++--- .../raftstore/src/store/unsafe_recovery.rs | 46 ++++++++++- .../failpoints/cases/test_unsafe_recovery.rs | 82 ++++++++++++++++++- 7 files changed, 159 insertions(+), 25 deletions(-) diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/create.rs b/components/raftstore-v2/src/operation/unsafe_recovery/create.rs index 9f710a90fea..5795d68c1b9 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/create.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/create.rs @@ -110,9 +110,10 @@ impl Store { impl Peer { pub fn on_unsafe_recovery_wait_initialized(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { - if self.unsafe_recovery_state().is_some() { + if let Some(state) = self.unsafe_recovery_state() && !state.is_abort() { warn!(self.logger, "Unsafe recovery, can't wait initialize, another plan is executing in progress"; + "state" => ?state, ); syncer.abort(); return; diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index 131a5b2109f..37962a45452 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -17,9 +17,10 @@ impl Peer { syncer: UnsafeRecoveryExecutePlanSyncer, failed_voters: Vec, ) { - if self.unsafe_recovery_state().is_some() { + if let Some(state) = self.unsafe_recovery_state() { warn!(self.logger, "Unsafe recovery, demote failed voters has already been initiated"; + "state" => ?state, ); syncer.abort(); return; diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs b/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs index 66f048f31d5..70275f93590 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs @@ -8,9 +8,10 @@ use crate::raft::Peer; impl Peer { pub fn on_unsafe_recovery_destroy_peer(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { - if self.unsafe_recovery_state().is_some() { + if let Some(state) = self.unsafe_recovery_state() && !state.is_abort() { warn!(self.logger, "Unsafe recovery, can't destroy, another plan is executing in progress"; + "state" => ?state, ); syncer.abort(); return; diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 1e1365ddf9f..7173d00363a 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -27,12 +27,13 @@ impl Store { impl Peer { pub fn on_unsafe_recovery_wait_apply(&mut self, syncer: UnsafeRecoveryWaitApplySyncer) { - if self.unsafe_recovery_state().is_some() { - warn!(self.logger, - "Unsafe recovery, can't wait apply, another plan is executing in progress"; - ); - syncer.abort(); - return; + if let Some(state) = self.unsafe_recovery_state() && !state.is_abort() { + warn!(self.logger, + "Unsafe recovery, can't wait apply, another plan is executing in progress"; + "state" => ?state, + ); + syncer.abort(); + return; } let target_index = if self.has_force_leader() { // For regions that lose quorum (or regions have force leader), whatever has diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index c22cb001369..2f0123b0bab 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -128,6 +128,7 @@ enum DelayReason { /// in most case. const MAX_REGIONS_IN_ERROR: usize = 10; const REGION_SPLIT_SKIP_MAX_COUNT: usize = 3; +const UNSAFE_RECOVERY_STATE_TIMEOUT: Duration = Duration::from_secs(60); pub const MAX_PROPOSAL_SIZE_RATIO: f64 = 0.4; @@ -771,11 +772,12 @@ where syncer: UnsafeRecoveryExecutePlanSyncer, failed_voters: Vec, ) { - if self.fsm.peer.unsafe_recovery_state.is_some() { + if let Some(state) = &self.fsm.peer.unsafe_recovery_state && !state.is_abort() { warn!( "Unsafe recovery, demote failed voters has already been initiated"; "region_id" => self.region().get_id(), "peer_id" => self.fsm.peer.peer.get_id(), + "state" => ?state, ); syncer.abort(); return; @@ -872,11 +874,12 @@ where } fn on_unsafe_recovery_destroy(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { - if self.fsm.peer.unsafe_recovery_state.is_some() { + if let Some(state) = &self.fsm.peer.unsafe_recovery_state && !state.is_abort() { warn!( "Unsafe recovery, can't destroy, another plan is executing in progress"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), + "state" => ?state, ); syncer.abort(); return; @@ -890,11 +893,12 @@ where } fn on_unsafe_recovery_wait_apply(&mut self, syncer: UnsafeRecoveryWaitApplySyncer) { - if self.fsm.peer.unsafe_recovery_state.is_some() { + if let Some(state) = &self.fsm.peer.unsafe_recovery_state && !state.is_abort() { warn!( "Unsafe recovery, can't wait apply, another plan is executing in progress"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), + "state" => ?state, ); syncer.abort(); return; @@ -921,11 +925,12 @@ where // last log index func be invoked secondly wait follower apply to last // index, however the second call is broadcast, it may improve in future fn on_snapshot_recovery_wait_apply(&mut self, syncer: SnapshotRecoveryWaitApplySyncer) { - if self.fsm.peer.snapshot_recovery_state.is_some() { + if let Some(state) = &self.fsm.peer.snapshot_recovery_state { warn!( "can't wait apply, another recovery in progress"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), + "state" => ?state, ); syncer.abort(); return; @@ -6294,15 +6299,24 @@ where if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { // Clean up the force leader state after a timeout, since the PD recovery // process may have been aborted for some reasons. - if time.saturating_elapsed() - > cmp::max( - self.ctx.cfg.peer_stale_state_check_interval.0, - Duration::from_secs(60), - ) - { + if time.saturating_elapsed() > UNSAFE_RECOVERY_STATE_TIMEOUT { self.on_exit_force_leader(); } } + if let Some(state) = &mut self.fsm.peer.unsafe_recovery_state { + let unsafe_recovery_state_timeout_failpoint = || -> bool { + fail_point!("unsafe_recovery_state_timeout", |_| true); + false + }; + // Clean up the unsafe recovery state after a timeout, since the PD recovery + // process may have been aborted for some reasons. + if unsafe_recovery_state_timeout_failpoint() + || state.check_timeout(UNSAFE_RECOVERY_STATE_TIMEOUT) + { + info!("timeout, abort unsafe recovery"; "state" => ?state); + state.abort(); + } + } if self.ctx.cfg.hibernate_regions { let group_state = self.fsm.hibernate_state.group_state(); diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index 92baeda00a1..f98fcaea581 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -3,6 +3,7 @@ use std::{ fmt, mem, sync::{mpsc::SyncSender, Arc, Mutex}, + time::Duration, }; use collections::HashSet; @@ -205,7 +206,7 @@ pub enum ForceLeaderState { // type explosion problem. // 2. Invoke on drop, so that it can be easily and safely used (together with // Arc) as a coordinator between all concerning peers. Each of the peers -// holds a reference to the same strcuture, and whoever finishes the task +// holds a reference to the same structure, and whoever finishes the task // drops its reference. Once the last reference is dropped, indicating all // the peers have finished their own tasks, the closure is invoked. pub struct InvokeClosureOnDrop(Option>); @@ -249,8 +250,9 @@ impl UnsafeRecoveryForceLeaderSyncer { #[derive(Clone, Debug)] pub struct UnsafeRecoveryExecutePlanSyncer { + pub(self) time: TiInstant, _closure: Arc, - abort: Arc>, + pub(self) abort: Arc>, } impl UnsafeRecoveryExecutePlanSyncer { @@ -266,6 +268,7 @@ impl UnsafeRecoveryExecutePlanSyncer { start_unsafe_recovery_report(router, report_id, true); }))); UnsafeRecoveryExecutePlanSyncer { + time: TiInstant::now(), _closure: Arc::new(closure), abort, } @@ -312,8 +315,9 @@ impl SnapshotRecoveryWaitApplySyncer { #[derive(Clone, Debug)] pub struct UnsafeRecoveryWaitApplySyncer { + pub(self) time: TiInstant, _closure: Arc, - abort: Arc>, + pub(self) abort: Arc>, } impl UnsafeRecoveryWaitApplySyncer { @@ -325,11 +329,11 @@ impl UnsafeRecoveryWaitApplySyncer { let abort = Arc::new(Mutex::new(false)); let abort_clone = abort.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, wait apply finished"); if *abort_clone.lock().unwrap() { warn!("Unsafe recovery, wait apply aborted"); return; } + info!("Unsafe recovery, wait apply finished"); if exit_force_leader { router.broadcast_exit_force_leader(); } @@ -337,6 +341,7 @@ impl UnsafeRecoveryWaitApplySyncer { router.broadcast_fill_out_report(fill_out_report); }))); UnsafeRecoveryWaitApplySyncer { + time: TiInstant::now(), _closure: Arc::new(closure), abort, } @@ -381,6 +386,7 @@ impl UnsafeRecoveryFillOutReportSyncer { } } +#[derive(Debug)] pub enum SnapshotRecoveryState { // This state is set by the leader peer fsm. Once set, it sync and check leader commit index // and force forward to last index once follower appended and then it also is checked @@ -393,6 +399,7 @@ pub enum SnapshotRecoveryState { }, } +#[derive(Debug)] pub enum UnsafeRecoveryState { // Stores the state that is necessary for the wait apply stage of unsafe recovery process. // This state is set by the peer fsm. Once set, it is checked every time this peer applies a @@ -415,6 +422,37 @@ pub enum UnsafeRecoveryState { WaitInitialize(UnsafeRecoveryExecutePlanSyncer), } +impl UnsafeRecoveryState { + pub fn check_timeout(&self, timeout: Duration) -> bool { + let time = match self { + UnsafeRecoveryState::WaitApply { syncer, .. } => syncer.time, + UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } + | UnsafeRecoveryState::Destroy(syncer) + | UnsafeRecoveryState::WaitInitialize(syncer) => syncer.time, + }; + time.saturating_elapsed() >= timeout + } + + pub fn is_abort(&self) -> bool { + let abort = match &self { + UnsafeRecoveryState::WaitApply { syncer, .. } => &syncer.abort, + UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } + | UnsafeRecoveryState::Destroy(syncer) + | UnsafeRecoveryState::WaitInitialize(syncer) => &syncer.abort, + }; + *abort.lock().unwrap() + } + + pub fn abort(&mut self) { + match self { + UnsafeRecoveryState::WaitApply { syncer, .. } => syncer.abort(), + UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } + | UnsafeRecoveryState::Destroy(syncer) + | UnsafeRecoveryState::WaitInitialize(syncer) => syncer.abort(), + } + } +} + pub fn exit_joint_request(region: &metapb::Region, peer: &metapb::Peer) -> RaftCmdRequest { let mut req = new_admin_request(region.get_id(), peer.clone()); req.mut_header() diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index e9c70cef73b..cc33a01ff03 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -74,9 +74,87 @@ fn test_unsafe_recovery_send_report() { } #[test_case(test_raftstore::new_node_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] -fn test_unsafe_recovery_execution_result_report() { +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_unsafe_recovery_timeout_abort() { let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::millis(150); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + + // Makes the leadership definite. + let store2_peer = find_peer(®ion, nodes[1]).unwrap().to_owned(); + cluster.must_transfer_leader(region.get_id(), store2_peer); + cluster.put(b"random_key1", b"random_val1").unwrap(); + + // Blocks the raft apply process on store 1 entirely. + let (apply_triggered_tx, apply_triggered_rx) = mpsc::bounded::<()>(1); + let (apply_released_tx, apply_released_rx) = mpsc::bounded::<()>(1); + fail::cfg_callback("on_handle_apply_store_1", move || { + let _ = apply_triggered_tx.send(()); + let _ = apply_released_rx.recv(); + }) + .unwrap(); + + // Manually makes an update, and wait for the apply to be triggered, to + // simulate "some entries are committed but not applied" scenario. + cluster.put(b"random_key2", b"random_val2").unwrap(); + apply_triggered_rx + .recv_timeout(Duration::from_secs(1)) + .unwrap(); + + // Makes the group lose its quorum. + cluster.stop_node(nodes[1]); + cluster.stop_node(nodes[2]); + + // Triggers the unsafe recovery store reporting process. + let plan = pdpb::RecoveryPlan::default(); + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); + cluster.must_send_store_heartbeat(nodes[0]); + + // sleep for a while to trigger timeout + fail::cfg("unsafe_recovery_state_timeout", "return").unwrap(); + sleep_ms(200); + fail::remove("unsafe_recovery_state_timeout"); + + // Unblocks the apply process. + drop(apply_released_tx); + + // No store report is sent, cause the plan is aborted. + for _ in 0..20 { + assert_eq!(pd_client.must_get_store_report(nodes[0]), None); + sleep_ms(100); + } + + // resend the plan + let plan = pdpb::RecoveryPlan::default(); + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); + cluster.must_send_store_heartbeat(nodes[0]); + + // Store reports are sent once the entries are applied. + let mut store_report = None; + for _ in 0..20 { + store_report = pd_client.must_get_store_report(nodes[0]); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + fail::remove("on_handle_apply_store_1"); +} + +#[test] +fn test_unsafe_recovery_execution_result_report() { + let mut cluster = new_server_cluster(0, 3); // Prolong force leader time. cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); From 265602eae30734b265e81f4d5fae7fe9449a47fb Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 18 Aug 2023 09:17:00 +0800 Subject: [PATCH 0854/1149] storage: add deadline check in get and batch_get request handler (#15307) close tikv/tikv#15306 add deadline check in get and batch_get request handler. Signed-off-by: crazycs520 --- Cargo.lock | 1 + src/storage/mod.rs | 16 +++++++ tests/Cargo.toml | 1 + tests/failpoints/cases/test_storage.rs | 59 ++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index c4465f73def..8bf3cf216d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6483,6 +6483,7 @@ dependencies = [ "tipb_helper", "tokio", "toml", + "tracker", "txn_types", "uuid 0.8.2", ] diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 2c5fe7dc750..0d4679fbe18 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -601,6 +601,7 @@ impl Storage { start_ts: TimeStamp, ) -> impl Future, KvGetStatistics)>> { let stage_begin_ts = Instant::now(); + let deadline = Self::get_deadline(&ctx); const CMD: CommandKind = CommandKind::get; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); @@ -640,6 +641,8 @@ impl Storage { .get(priority_tag) .inc(); + deadline.check()?; + Self::check_api_version(api_version, ctx.api_version, CMD, [key.as_encoded()])?; let command_duration = tikv_util::time::Instant::now(); @@ -661,6 +664,7 @@ impl Storage { Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { + deadline.check()?; let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; let buckets = snapshot.ext().get_buckets(); @@ -809,6 +813,7 @@ impl Storage { for ((mut req, id), tracker) in requests.into_iter().zip(ids).zip(trackers) { set_tls_tracker_token(tracker); let mut ctx = req.take_context(); + let deadline = Self::get_deadline(&ctx); let source = ctx.take_request_source(); let region_id = ctx.get_region_id(); let peer = ctx.get_peer(); @@ -867,6 +872,7 @@ impl Storage { id, source, tracker, + deadline, )); } Self::with_tls_engine(|engine| engine.release_snapshot()); @@ -883,8 +889,14 @@ impl Storage { id, source, tracker, + deadline, ) = req_snap; let snap_res = snap.await; + if let Err(e) = deadline.check() { + consumer.consume(id, Err(Error::from(e)), begin_instant, source); + continue; + } + set_tls_tracker_token(tracker); match snap_res { Ok(snapshot) => Self::with_perf_context(CMD, || { @@ -955,6 +967,7 @@ impl Storage { start_ts: TimeStamp, ) -> impl Future>, KvGetStatistics)>> { let stage_begin_ts = Instant::now(); + let deadline = Self::get_deadline(&ctx); const CMD: CommandKind = CommandKind::batch_get; let priority = ctx.get_priority(); let metadata = TaskMetadata::from_ctx(ctx.get_resource_control_context()); @@ -997,6 +1010,8 @@ impl Storage { .get(priority_tag) .inc(); + deadline.check()?; + Self::check_api_version( api_version, ctx.api_version, @@ -1020,6 +1035,7 @@ impl Storage { let snapshot = Self::with_tls_engine(|engine| Self::snapshot(engine, snap_ctx)).await?; { + deadline.check()?; let begin_instant = Instant::now(); let stage_snap_recv_ts = begin_instant; diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 6c2b96cf642..79367c00631 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -112,6 +112,7 @@ tikv_util = { workspace = true } time = "0.1" tipb = { workspace = true } toml = "0.5" +tracker = { workspace = true } txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index dd57f28ab94..57047bef9d4 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1532,6 +1532,65 @@ fn test_before_async_write_deadline() { )); } +#[test] +fn test_deadline_exceeded_on_get_and_batch_get() { + use tikv_util::time::Instant; + use tracker::INVALID_TRACKER_TOKEN; + + let mut cluster = new_server_cluster(0, 1); + cluster.run(); + + let engine = cluster + .sim + .read() + .unwrap() + .storages + .get(&1) + .unwrap() + .clone(); + let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) + .build() + .unwrap(); + + fail::cfg("after-snapshot", "sleep(100)").unwrap(); + let mut ctx = Context::default(); + ctx.set_region_id(1); + ctx.set_region_epoch(cluster.get_region_epoch(1)); + ctx.set_peer(cluster.leader_of_region(1).unwrap()); + ctx.max_execution_duration_ms = 20; + let f = storage.get(ctx.clone(), Key::from_raw(b"a"), 1.into()); + assert!(matches!( + block_on(f), + Err(StorageError(box StorageErrorInner::DeadlineExceeded)) + )); + let f = storage.batch_get(ctx.clone(), vec![Key::from_raw(b"a")], 1.into()); + assert!(matches!( + block_on(f), + Err(StorageError(box StorageErrorInner::DeadlineExceeded)) + )); + + let consumer = GetConsumer::new(); + let mut get_req = GetRequest::default(); + get_req.set_key(b"a".to_vec()); + get_req.set_version(1_u64); + get_req.set_context(ctx.clone()); + block_on(storage.batch_get_command( + vec![get_req], + vec![1], + vec![INVALID_TRACKER_TOKEN; 1], + consumer.clone(), + Instant::now(), + )) + .unwrap(); + let result = consumer.take_data(); + assert_eq!(1, result.len()); + assert!(matches!( + result[0], + Err(StorageError(box StorageErrorInner::DeadlineExceeded)) + )); + fail::remove("after-snapshot"); +} + #[test] fn test_before_propose_deadline() { let mut cluster = new_server_cluster(0, 1); From b8075bf3e5ecf399f7bd5f54c51c8670636f051d Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 18 Aug 2023 16:59:01 +0800 Subject: [PATCH 0855/1149] *: replace space and hyphen with underscore in log kv (#15345) close tikv/tikv#15344 Using space in log kv can cause slog to emit "", which inflates the log file. Also, we prefer underscores instead of hyphens as it aligns with Rust variable naming conventions. By unifying the style of kv names, makes it easier to search information during debugging. Signed-off-by: Neil Shen --- Makefile | 7 ++++--- components/backup-stream/src/endpoint.rs | 10 +++++----- components/backup-stream/src/observer.rs | 2 +- components/backup-stream/src/router.rs | 2 +- components/cdc/src/endpoint.rs | 2 +- components/encryption/src/file_dict_file.rs | 10 +++++----- .../operation/command/admin/conf_change.rs | 6 +++--- .../src/operation/ready/snapshot.rs | 4 ++-- .../raftstore-v2/src/worker/refresh_config.rs | 2 +- components/raftstore/src/store/fsm/apply.rs | 4 ++-- components/raftstore/src/store/fsm/peer.rs | 4 ++-- components/raftstore/src/store/peer.rs | 8 ++++---- components/raftstore/src/store/read_queue.rs | 2 +- components/raftstore/src/store/worker/pd.rs | 2 +- .../src/store/worker/refresh_config.rs | 2 +- components/resolved_ts/src/endpoint.rs | 12 ++++++------ components/resolved_ts/src/resolver.rs | 2 +- components/server/src/common.rs | 2 +- components/server/src/server.rs | 4 ++-- components/server/src/server2.rs | 4 ++-- components/sst_importer/src/sst_importer.rs | 13 +++++++------ components/sst_importer/src/sst_writer.rs | 14 +++++++------- scripts/check-log-style | 19 +++++++++++++++++++ src/server/service/kv.rs | 10 +++++----- src/storage/txn/actions/prewrite.rs | 10 +++++----- 25 files changed, 89 insertions(+), 68 deletions(-) create mode 100755 scripts/check-log-style diff --git a/Makefile b/Makefile index 30209caa3d9..bb1d7316e1b 100644 --- a/Makefile +++ b/Makefile @@ -331,7 +331,7 @@ unset-override: pre-format: unset-override @rustup component add rustfmt - @which cargo-sort &> /dev/null || cargo install -q cargo-sort + @which cargo-sort &> /dev/null || cargo install -q cargo-sort format: pre-format @cargo fmt @@ -347,6 +347,7 @@ pre-clippy: unset-override clippy: pre-clippy @./scripts/check-redact-log + @./scripts/check-log-style @./scripts/check-docker-build @./scripts/check-license @./scripts/clippy-all @@ -400,11 +401,11 @@ docker: docker_test: docker build -f Dockerfile.test \ -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ - . + . docker run -i -v $(shell pwd):/tikv \ ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test - + ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 657c3fe5b61..b11259d5be6 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -552,7 +552,7 @@ where let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); match range_init_result { Ok(()) => { - info!("backup stream success to initialize"; + info!("backup stream success to initialize"; "start_key" => utils::redact(&start_key), "end_key" => utils::redact(&end_key), "take" => ?start.saturating_elapsed(),) @@ -607,7 +607,7 @@ where info!( "register backup stream ranges"; "task" => ?task, - "ranges-count" => ranges.inner.len(), + "ranges_count" => ranges.inner.len(), ); let ranges = ranges .inner @@ -820,7 +820,7 @@ where { warn!("backup stream failed to set global checkpoint."; "task" => ?task, - "global-checkpoint" => global_checkpoint, + "global_checkpoint" => global_checkpoint, "err" => ?err, ); } @@ -828,7 +828,7 @@ where Ok(false) => { debug!("backup stream no need update global checkpoint."; "task" => ?task, - "global-checkpoint" => global_checkpoint, + "global_checkpoint" => global_checkpoint, ); } Err(e) => { @@ -1011,7 +1011,7 @@ where /// Create a standard tokio runtime /// (which allows io and time reactor, involve thread memory accessor), fn create_tokio_runtime(thread_count: usize, thread_name: &str) -> TokioResult { - info!("create tokio runtime for backup stream"; "thread_name" => thread_name, "thread-count" => thread_count); + info!("create tokio runtime for backup stream"; "thread_name" => thread_name, "thread_count" => thread_count); tokio::runtime::Builder::new_multi_thread() .thread_name(thread_name) diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index 169c3b72268..8947d2068c3 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -102,7 +102,7 @@ impl CmdObserver for BackupStreamObserver { assert!(!cmd_batches.is_empty()); debug!( "observe backup stream kv"; - "cmd_batches len" => cmd_batches.len(), + "cmd_batches_len" => cmd_batches.len(), "level" => ?max_level, ); diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index ff3254fa091..b0d3453c958 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -1071,7 +1071,7 @@ impl StreamTaskInfo { Ok(_) => { debug!( "backup stream flush success"; - "storage file" => ?filepath, + "storage_file" => ?filepath, "est_len" => ?stat_length, ); } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 9cd7367c7ca..23a3e410467 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -498,7 +498,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint ?self.config, + "current_config" => ?self.config, "change" => ?change ); // Update the config here. The following adjustments will all use the new diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index 6563de30372..0d1dcbbbd6e 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -349,9 +349,9 @@ impl FileDictionaryFile { if remained.len() < name_len + info_len { warn!( "file corrupted! record content size is too small, discarded the tail record"; - "content size" => remained.len(), - "expected name length" => name_len, - "expected content length" =>info_len, + "content_size" => remained.len(), + "expected_name_length" => name_len, + "expected_content_length" =>info_len, ); return Err(Error::TailRecordParseIncomplete); } @@ -366,8 +366,8 @@ impl FileDictionaryFile { // Only when this record is the last one can the panic be skipped. warn!( "file corrupted! crc32 mismatch, discarded the tail record"; - "expected crc32" => crc32, - "checksum crc32" => crc32_checksum, + "expected_crc32" => crc32, + "checksum_crc32" => crc32_checksum, ); return Err(Error::TailRecordParseIncomplete); } else { diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 9d58be13b04..2bd06fca6c2 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -286,7 +286,7 @@ impl Apply { error!(self.logger, "failed to apply conf change"; "changes" => ?changes, "legacy" => legacy, - "original region" => ?region, "err" => ?e); + "original_region" => ?region, "err" => ?e); return Err(e); } } @@ -300,8 +300,8 @@ impl Apply { "conf change successfully"; "changes" => ?changes, "legacy" => legacy, - "original region" => ?region, - "current region" => ?new_region, + "original_region" => ?region, + "current_region" => ?new_region, ); let my_id = self.peer().get_id(); let state = self.region_state_mut(); diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 55f5c0b6379..17deed333c1 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -606,8 +606,8 @@ impl Storage { warn!( self.logger(), "snapshot is staled, skip"; - "snap index" => snapshot.get_metadata().get_index(), - "required index" => index.load(Ordering::SeqCst), + "snap_index" => snapshot.get_metadata().get_index(), + "required_index" => index.load(Ordering::SeqCst), "to_peer_id" => to_peer_id, ); return false; diff --git a/components/raftstore-v2/src/worker/refresh_config.rs b/components/raftstore-v2/src/worker/refresh_config.rs index 633a92a0e24..797f5b821ab 100644 --- a/components/raftstore-v2/src/worker/refresh_config.rs +++ b/components/raftstore-v2/src/worker/refresh_config.rs @@ -41,7 +41,7 @@ where error!( self.logger, "failed to decrease thread pool"; - "decrease to" => size, + "decrease_to" => size, "err" => %e, ); return; diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 1566334bb87..873b5facf84 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2384,8 +2384,8 @@ where "region_id" => self.region_id(), "peer_id" => self.id(), "peer" => ?peer, - "exist peer" => ?exist_peer, - "confchange type" => ?change_type, + "exist_peer" => ?exist_peer, + "confchange_type" => ?change_type, "region" => ?&self.region ); return Err(box_err!( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 2f0123b0bab..57ebcd91aa4 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6061,8 +6061,8 @@ where debug!( "finished on_refresh_region_buckets"; "region_id" => self.fsm.region_id(), - "buckets count" => buckets_count, - "buckets size" => ?self.fsm.peer.region_buckets.as_ref().unwrap().meta.sizes, + "buckets_count" => buckets_count, + "buckets_size" => ?self.fsm.peer.region_buckets.as_ref().unwrap().meta.sizes, ); // test purpose #[cfg(any(test, feature = "testexport"))] diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index c1ed078d93f..8c1a7ef61e9 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2416,8 +2416,8 @@ where self.send_extra_message(msg, &mut ctx.trans, &leader); info!( "notify leader the peer is available"; - "region id" => self.region().get_id(), - "peer id" => self.peer.id + "region_id" => self.region().get_id(), + "peer_id" => self.peer.id ); } } @@ -4677,8 +4677,8 @@ where } warn!( "read rejected by safe timestamp"; - "safe ts" => safe_ts, - "read ts" => read_ts, + "safe_ts" => safe_ts, + "read_ts" => read_ts, "tag" => &self.tag, ); let mut response = cmd_resp::new_error(Error::DataIsNotReady { diff --git a/components/raftstore/src/store/read_queue.rs b/components/raftstore/src/store/read_queue.rs index 7ab0ca0cd93..376f168c26d 100644 --- a/components/raftstore/src/store/read_queue.rs +++ b/components/raftstore/src/store/read_queue.rs @@ -282,7 +282,7 @@ impl ReadIndexQueue { } debug!( "cannot find corresponding read from pending reads"; - "uuid" => ?uuid, "read-index" => index, + "uuid" => ?uuid, "read_index" => index, ); } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index c73346505f4..d812830569a 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1677,7 +1677,7 @@ where let mut switches = resp.take_switch_witnesses(); info!("try to switch witness"; "region_id" => region_id, - "switch witness" => ?switches + "switch_witness" => ?switches ); let req = new_batch_switch_witness(switches.take_switch_witnesses().into()); send_admin_request(&router, region_id, epoch, peer, req, Callback::None, Default::default()); diff --git a/components/raftstore/src/store/worker/refresh_config.rs b/components/raftstore/src/store/worker/refresh_config.rs index dae9fafd4ef..066b463e75e 100644 --- a/components/raftstore/src/store/worker/refresh_config.rs +++ b/components/raftstore/src/store/worker/refresh_config.rs @@ -50,7 +50,7 @@ where if let Err(e) = self.state.fsm_sender.send(FsmTypes::Empty, None) { error!( "failed to decrease thread pool"; - "decrease to" => size, + "decrease_to" => size, "err" => %e, ); return; diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 6d4ebf9986b..c931a61f33b 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -402,7 +402,7 @@ where observe_id, cause: format!("met error while handle scan task {:?}", e), }) - .unwrap_or_else(|schedule_err| warn!("schedule re-register task failed"; "err" => ?schedule_err, "re-register cause" => ?e)); + .unwrap_or_else(|schedule_err| warn!("schedule re-register task failed"; "err" => ?schedule_err, "re_register_cause" => ?e)); RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); })), } @@ -895,11 +895,11 @@ where info!( "the max gap of safe-ts is large"; "gap" => safe_ts_gap, - "oldest safe-ts" => ?oldest_safe_ts, - "region id" => oldest_safe_ts_region, - "advance-ts-interval" => ?self.cfg.advance_ts_interval, - "lock num" => lock_num, - "min start ts" => min_start_ts, + "oldest_safe_ts" => ?oldest_safe_ts, + "region_id" => oldest_safe_ts_region, + "advance_ts_interval" => ?self.cfg.advance_ts_interval, + "lock_num" => lock_num, + "min_start_ts" => min_start_ts, ); } RTS_MIN_SAFE_TS_GAP.set(safe_ts_gap as i64); diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 5e8f58e97e1..28ee468d322 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -217,7 +217,7 @@ impl Resolver { "locks with the minimum start_ts in resolver"; "region_id" => self.region_id, "start_ts" => start_ts, - "sampled keys" => ?keys_for_log, + "sampled_keys" => ?keys_for_log, ); } } diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 094c845016f..165a1c8509e 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -404,7 +404,7 @@ impl TikvServerCore { debug!( "cpu_time_limiter tuned for backend request"; "cpu_util" => ?cpu_util, - "new quota" => ?target_quota); + "new_quota" => ?target_quota); INSTANCE_BACKEND_CPU_QUOTA.set(target_quota as i64); } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index c11df6cd502..7ff51474d7d 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1308,7 +1308,7 @@ where Err(e) => { error!( "get disk stat for kv store failed"; - "kv path" => store_path.to_str(), + "kv_path" => store_path.to_str(), "err" => ?e ); return; @@ -1334,7 +1334,7 @@ where Err(e) => { error!( "get disk stat for raft engine failed"; - "raft engine path" => raft_path.clone(), + "raft_engine_path" => raft_path.clone(), "err" => ?e ); return; diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index d4f65b8ce88..fe2b685313e 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1150,7 +1150,7 @@ where Err(e) => { error!( "get disk stat for kv store failed"; - "kv path" => store_path.to_str(), + "kv_path" => store_path.to_str(), "err" => ?e ); return; @@ -1178,7 +1178,7 @@ where Err(e) => { error!( "get disk stat for raft engine failed"; - "raft engine path" => raft_path.clone(), + "raft_engine_path" => raft_path.clone(), "err" => ?e ); return; diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 54b23dfdb78..33f3c691a26 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -552,7 +552,7 @@ impl SstImporter { if self.mem_limit.load(Ordering::SeqCst) != memory_limit { self.mem_limit.store(memory_limit, Ordering::SeqCst); info!("update importer config"; - "memory-use-ratio" => mem_ratio, + "memory_use_ratio" => mem_ratio, "size" => memory_limit, ) } @@ -598,7 +598,7 @@ impl SstImporter { if self.import_support_download() { let shrink_file_count = shrink_files.len(); if shrink_file_count > 0 || retain_file_count > 0 { - info!("shrink space by tick"; "shrink files count" => shrink_file_count, "retain files count" => retain_file_count); + info!("shrink space by tick"; "shrink_files_count" => shrink_file_count, "retain_files_count" => retain_file_count); } for f in shrink_files { @@ -609,7 +609,7 @@ impl SstImporter { shrink_file_count } else { if shrink_buff_size > 0 || retain_buff_size > 0 { - info!("shrink cache by tick"; "shrink size" => shrink_buff_size, "retain size" => retain_buff_size); + info!("shrink cache by tick"; "shrink_size" => shrink_buff_size, "retain_size" => retain_buff_size); } shrink_buff_size } @@ -1029,9 +1029,10 @@ impl SstImporter { .map_or_else(|| Some(key.clone()), |v: Vec| Some(v.max(key.clone()))); } if total_key != not_in_range { - info!("build download request file done"; "total keys" => %total_key, - "ts filtered keys" => %ts_not_expected, - "range filtered keys" => %not_in_range); + info!("build download request file done"; + "total_keys" => %total_key, + "ts_filtered_keys" => %ts_not_expected, + "range_filtered_keys" => %not_in_range); } IMPORTER_APPLY_DURATION diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index 0f9ac62f643..f6f896a0923 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -130,10 +130,10 @@ impl TxnSstWriter { } info!("finish write to sst"; - "default entries" => default_entries, - "default bytes" => default_bytes, - "write entries" => write_entries, - "write bytes" => write_bytes, + "default_entries" => default_entries, + "default_bytes" => default_bytes, + "write_entries" => write_entries, + "write_bytes" => write_bytes, ); IMPORT_LOCAL_WRITE_KEYS_VEC .with_label_values(&["txn_default_cf"]) @@ -270,9 +270,9 @@ impl RawSstWriter { info!( "finish raw write to sst"; - "default entries" => self.default_entries, - "default bytes" => self.default_deletes, - "default bytes" => self.default_bytes + "default_entries" => self.default_entries, + "default_deletes" => self.default_deletes, + "default_bytes" => self.default_bytes ); IMPORT_LOCAL_WRITE_KEYS_VEC .with_label_values(&["raw_default_cf"]) diff --git a/scripts/check-log-style b/scripts/check-log-style new file mode 100755 index 00000000000..b6a1893ac7d --- /dev/null +++ b/scripts/check-log-style @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# This script checks if there is plain (not redacted) log in TiKV. +set -euo pipefail + +function error_msg() { + echo "Prefer snake_case for log kv." >&2 +} + +if grep -r -n --color=always \ + -E '"[a-zA-Z0-9_ -]*( |-)[a-zA-Z0-9_ -]*" ?=>[\?% ]*[a-zA-Z0-9:\._\(\["]+[,|\)|\n]?' \ + --include \*.rs \ + --exclude-dir target . \ + | grep -v -E "config\.rs|tikv_util/src/logger|file_system/src/rate_limiter.rs" ; # ignore files contain kebab-case names. +then + error_msg + exit 1 +fi + +echo "Log style check passed." diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index e2bd23f6bb0..5a4327ba46e 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -2298,11 +2298,11 @@ fn needs_reject_raft_append(reject_messages_on_memory_ratio: f64) -> bool { { // FIXME: this doesn't output to logfile. debug!("need reject log append on memory limit"; - "raft messages" => raft_msg_usage, - "cached entries" => cached_entries, - "applying entries" => applying_entries, - "current usage" => usage, - "reject ratio" => reject_messages_on_memory_ratio); + "raft_messages" => raft_msg_usage, + "cached_entries" => cached_entries, + "applying_entries" => applying_entries, + "current_usage" => usage, + "reject_ratio" => reject_messages_on_memory_ratio); return true; } } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index cdae37dcf94..90f739b8705 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -474,12 +474,12 @@ impl<'a> PrewriteMutation<'a> { continue; } - warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; - "key" => %self.key, - "start_ts" => self.txn_props.start_ts, + warn!("conflicting write was found, pessimistic lock must be lost for the corresponding row key"; + "key" => %self.key, + "start_ts" => self.txn_props.start_ts, "for_update_ts" => for_update_ts, - "conflicting start_ts" => write.start_ts, - "conflicting commit_ts" => commit_ts); + "conflicting_start_ts" => write.start_ts, + "conflicting_commit_ts" => commit_ts); return Err(ErrorInner::PessimisticLockNotFound { start_ts: self.txn_props.start_ts, key: self.key.clone().into_raw()?, From cb6531fa573addc6b852e8ddaaf6c0b182ce4c99 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Fri, 18 Aug 2023 22:02:00 +0800 Subject: [PATCH 0856/1149] compaction: disable periodic-compaction and ttl as default. (#15359) close tikv/tikv#15355 Disable `periodic-compaction` and `ttl` features as default. Signed-off-by: lucasliang --- etc/config-template.toml | 12 ++++++------ src/config/mod.rs | 17 +++++++++-------- tests/integrations/config/mod.rs | 16 ++++++++-------- tests/integrations/config/test-custom.toml | 8 ++++++++ 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/etc/config-template.toml b/etc/config-template.toml index 4d580db0a5b..36d8d25d883 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -888,16 +888,16 @@ ## SST files containing updates older than TTL will go through the compaction ## process. This usually happens in a cascading way so that those entries -## will be compacted to bottommost level/file. +## will be compacted to bottommost level/file. Disabled as default. ## -## Default: 30 days. -# ttl = "30d" +## Default: 0s. +# ttl = "0s" ## SST files older than this value will be picked up for compaction, and -## re-written to the same level as they were before. +## re-written to the same level as they were before. Disabled as default. ## -## Default: 30 days. -# periodic-compaction-seconds = "30d" +## Default: 0s. +# periodic-compaction-seconds = "0s" ## Options for "Default" Column Family for `Titan`. [rocksdb.defaultcf.titan] diff --git a/src/config/mod.rs b/src/config/mod.rs index d81b50a88f7..5c7f1424c38 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -366,11 +366,11 @@ macro_rules! cf_config { pub checksum: ChecksumType, #[online_config(skip)] pub max_compactions: Option, - // `ttl == None` means using default setting in Rocksdb. + // `ttl == None` means disable this feature in Rocksdb. // `ttl` in Rocksdb is 30 days as default. #[online_config(skip)] pub ttl: Option, - // `periodic_compaction_seconds == None` means using default setting in Rocksdb. + // `periodic_compaction_seconds == None` means disabled this feature in Rocksdb. // `periodic_compaction_seconds` in Rocksdb is 30 days as default. #[online_config(skip)] pub periodic_compaction_seconds: Option, @@ -654,12 +654,13 @@ macro_rules! build_cf_opt { if let Some(r) = $compaction_limiter { cf_opts.set_compaction_thread_limiter(r); } - if let Some(ttl) = $opt.ttl { - cf_opts.set_ttl(ttl.0.as_secs()); - } - if let Some(secs) = $opt.periodic_compaction_seconds { - cf_opts.set_periodic_compaction_seconds(secs.0.as_secs()); - } + cf_opts.set_ttl($opt.ttl.unwrap_or(ReadableDuration::secs(0)).0.as_secs()); + cf_opts.set_periodic_compaction_seconds( + $opt.periodic_compaction_seconds + .unwrap_or(ReadableDuration::secs(0)) + .0 + .as_secs(), + ); cf_opts }}; } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 8ead30e03ff..a65d4cfb46c 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -386,8 +386,8 @@ fn test_serde_custom_tikv_config() { format_version: Some(0), checksum: ChecksumType::XXH3, max_compactions: Some(3), - ttl: None, - periodic_compaction_seconds: None, + ttl: Some(ReadableDuration::days(10)), + periodic_compaction_seconds: Some(ReadableDuration::days(10)), }, writecf: WriteCfConfig { block_size: ReadableSize::kb(12), @@ -459,8 +459,8 @@ fn test_serde_custom_tikv_config() { format_version: Some(0), checksum: ChecksumType::XXH3, max_compactions: Some(3), - ttl: None, - periodic_compaction_seconds: None, + ttl: Some(ReadableDuration::days(10)), + periodic_compaction_seconds: Some(ReadableDuration::days(10)), }, lockcf: LockCfConfig { block_size: ReadableSize::kb(12), @@ -532,8 +532,8 @@ fn test_serde_custom_tikv_config() { format_version: Some(0), checksum: ChecksumType::XXH3, max_compactions: Some(3), - ttl: None, - periodic_compaction_seconds: None, + ttl: Some(ReadableDuration::days(10)), + periodic_compaction_seconds: Some(ReadableDuration::days(10)), }, raftcf: RaftCfConfig { block_size: ReadableSize::kb(12), @@ -605,8 +605,8 @@ fn test_serde_custom_tikv_config() { format_version: Some(0), checksum: ChecksumType::XXH3, max_compactions: Some(3), - ttl: None, - periodic_compaction_seconds: None, + ttl: Some(ReadableDuration::days(10)), + periodic_compaction_seconds: Some(ReadableDuration::days(10)), }, titan: titan_db_config.clone(), }; diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index df777784b86..053e7c45939 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -341,6 +341,8 @@ prepopulate-block-cache = "flush-only" format-version = 0 checksum = "xxh3" max-compactions = 3 +ttl = "10d" +periodic-compaction-seconds = "10d" [rocksdb.defaultcf.titan] min-blob-size = "2018B" @@ -406,6 +408,8 @@ prepopulate-block-cache = "flush-only" format-version = 0 checksum = "xxh3" max-compactions = 3 +ttl = "10d" +periodic-compaction-seconds = "10d" [rocksdb.lockcf] block-size = "12KB" @@ -458,6 +462,8 @@ prepopulate-block-cache = "flush-only" format-version = 0 checksum = "xxh3" max-compactions = 3 +ttl = "10d" +periodic-compaction-seconds = "10d" [rocksdb.raftcf] block-size = "12KB" @@ -510,6 +516,8 @@ prepopulate-block-cache = "flush-only" format-version = 0 checksum = "xxh3" max-compactions = 3 +ttl = "10d" +periodic-compaction-seconds = "10d" [raftdb] wal-recovery-mode = "skip-any-corrupted-records" From f6c75b31bfdad10d71a071b2860a2a9de3d8a843 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 21 Aug 2023 14:42:32 +0800 Subject: [PATCH 0857/1149] raft-store-v2: fix report down peers (#15317) close tikv/tikv#15316 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/raft/peer.rs | 2 +- components/test_raftstore-v2/src/cluster.rs | 4 + components/test_raftstore-v2/src/util.rs | 19 +++ components/test_raftstore/src/util.rs | 15 ++ .../raftstore/test_region_heartbeat.rs | 130 +++++++++--------- 5 files changed, 101 insertions(+), 69 deletions(-) diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index d7ac62763e9..2f3a3376fe9 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -603,7 +603,7 @@ impl Peer { continue; } if let Some(instant) = self.peer_heartbeats.get(&p.get_id()) { - let elapsed = instant.saturating_duration_since(now); + let elapsed = now.saturating_duration_since(*instant); if elapsed >= max_duration { let mut stats = pdpb::PeerStats::default(); stats.set_peer(p.clone()); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 84d8d0fcca5..08de4cc3aa1 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1894,6 +1894,10 @@ impl, EK: KvEngine> Cluster { } }); } + + pub fn get_down_peers(&self) -> HashMap { + self.pd_client.get_down_peers() + } } pub fn bootstrap_store( diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index ff0362fb4a4..805394b1ea0 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -428,3 +428,22 @@ pub fn put_with_timeout, EK: KvEngine>( ); cluster.call_command_on_node(node_id, req, timeout) } + +pub fn wait_down_peers, EK: KvEngine>( + cluster: &Cluster, + count: u64, + peer: Option, +) { + let mut peers = cluster.get_down_peers(); + for _ in 1..1000 { + if peers.len() == count as usize && peer.as_ref().map_or(true, |p| peers.contains_key(p)) { + return; + } + std::thread::sleep(Duration::from_millis(10)); + peers = cluster.get_down_peers(); + } + panic!( + "got {:?}, want {} peers which should include {:?}", + peers, count, peer + ); +} diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 98e39c4fb13..02a74136bb6 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1561,3 +1561,18 @@ pub fn put_with_timeout( ); cluster.call_command_on_node(node_id, req, timeout) } + +pub fn wait_down_peers(cluster: &Cluster, count: u64, peer: Option) { + let mut peers = cluster.get_down_peers(); + for _ in 1..1000 { + if peers.len() == count as usize && peer.as_ref().map_or(true, |p| peers.contains_key(p)) { + return; + } + std::thread::sleep(Duration::from_millis(10)); + peers = cluster.get_down_peers(); + } + panic!( + "got {:?}, want {} peers which should include {:?}", + peers, count, peer + ); +} diff --git a/tests/integrations/raftstore/test_region_heartbeat.rs b/tests/integrations/raftstore/test_region_heartbeat.rs index 117c10a3d19..29f7e8b10dd 100644 --- a/tests/integrations/raftstore/test_region_heartbeat.rs +++ b/tests/integrations/raftstore/test_region_heartbeat.rs @@ -7,94 +7,88 @@ use std::{ }; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{ config::*, time::{Instant, UnixSecs as PdInstant}, HandyRwLock, }; -fn wait_down_peers(cluster: &Cluster, count: u64, peer: Option) { - let mut peers = cluster.get_down_peers(); - for _ in 1..1000 { - if peers.len() == count as usize && peer.as_ref().map_or(true, |p| peers.contains_key(p)) { - return; - } - sleep(Duration::from_millis(10)); - peers = cluster.get_down_peers(); - } - panic!( - "got {:?}, want {} peers which should include {:?}", - peers, count, peer - ); -} - -fn test_down_peers(cluster: &mut Cluster) { - cluster.cfg.raft_store.max_peer_down_duration = ReadableDuration::secs(1); - cluster.run(); - - // Kill 1, 2 - for len in 1..3 { - let id = len; - cluster.stop_node(id); - wait_down_peers(cluster, len, Some(id)); - } - - // Restart 1, 2 - cluster.run_node(1).unwrap(); - cluster.run_node(2).unwrap(); - wait_down_peers(cluster, 0, None); - - cluster.stop_node(1); +macro_rules! test_down_peers { + ($cluster:expr) => { + // depress false-positive warning. + #[allow(clippy::unnecessary_mut_passed)] + { + $cluster.cfg.raft_store.max_peer_down_duration = ReadableDuration::secs(1); + $cluster.run(); + + // Kill 1, 2 + for len in 1..3 { + let id = len; + $cluster.stop_node(id); + wait_down_peers($cluster, len, Some(id)); + } + + // Restart 1, 2 + $cluster.run_node(1).unwrap(); + $cluster.run_node(2).unwrap(); + wait_down_peers($cluster, 0, None); + + $cluster.stop_node(1); + + $cluster.must_put(b"k1", b"v1"); + // max peer down duration is 500 millis, but we only report down time in + // seconds, so sleep 1 second to make the old down second is always larger + // than new down second by at lease 1 second. + sleep_ms(1000); + + wait_down_peers($cluster, 1, Some(1)); + let down_secs = $cluster.get_down_peers()[&1].get_down_seconds(); + let timer = Instant::now(); + let leader = $cluster.leader_of_region(1).unwrap(); + let new_leader = if leader.get_id() == 2 { + new_peer(3, 3) + } else { + new_peer(2, 2) + }; + + $cluster.must_transfer_leader(1, new_leader); + // new leader should reset all down peer list. + wait_down_peers($cluster, 0, None); + wait_down_peers($cluster, 1, Some(1)); + assert!( + $cluster.get_down_peers()[&1].get_down_seconds() + < down_secs + timer.saturating_elapsed().as_secs() + ); - cluster.must_put(b"k1", b"v1"); - // max peer down duration is 500 millis, but we only report down time in - // seconds, so sleep 1 second to make the old down second is always larger - // than new down second by at lease 1 second. - sleep_ms(1000); - - wait_down_peers(cluster, 1, Some(1)); - let down_secs = cluster.get_down_peers()[&1].get_down_seconds(); - let timer = Instant::now(); - let leader = cluster.leader_of_region(1).unwrap(); - let new_leader = if leader.get_id() == 2 { - new_peer(3, 3) - } else { - new_peer(2, 2) + // Ensure that node will not reuse the previous peer heartbeats. + $cluster.must_transfer_leader(1, leader); + wait_down_peers($cluster, 0, None); + wait_down_peers($cluster, 1, Some(1)); + assert!( + $cluster.get_down_peers()[&1].get_down_seconds() + < timer.saturating_elapsed().as_secs() + 1 + ); + } }; - - cluster.must_transfer_leader(1, new_leader); - // new leader should reset all down peer list. - wait_down_peers(cluster, 0, None); - wait_down_peers(cluster, 1, Some(1)); - assert!( - cluster.get_down_peers()[&1].get_down_seconds() - < down_secs + timer.saturating_elapsed().as_secs() - ); - - // Ensure that node will not reuse the previous peer heartbeats. - cluster.must_transfer_leader(1, leader); - wait_down_peers(cluster, 0, None); - wait_down_peers(cluster, 1, Some(1)); - assert!( - cluster.get_down_peers()[&1].get_down_seconds() < timer.saturating_elapsed().as_secs() + 1 - ); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] fn test_server_down_peers_with_hibernate_regions() { let mut cluster = new_server_cluster(0, 5); // When hibernate_regions is enabled, down peers are not detected in time // by design. So here use a short check interval to trigger region heartbeat // more frequently. cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(500); - test_down_peers(&mut cluster); + test_down_peers!(&mut cluster); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_server_down_peers_without_hibernate_regions() { - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); cluster.cfg.raft_store.hibernate_regions = false; - test_down_peers(&mut cluster); + test_down_peers!(&mut cluster); } fn test_pending_peers(cluster: &mut Cluster) { From 2c5e7ebdb6244e9b5d623db3a1b5da096548740a Mon Sep 17 00:00:00 2001 From: ekexium Date: Mon, 21 Aug 2023 15:41:03 +0800 Subject: [PATCH 0858/1149] ctl: print durations in get_region_read_progress (#15327) ref tikv/tikv#15082 Record and print `duration to last update_safe_ts` and `duration to last consume leader`. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/cmd.rs | 1 + cmd/tikv-ctl/src/executor.rs | 25 ++++++----- .../backup-stream/src/subscription_track.rs | 4 +- components/cdc/src/delegate.rs | 2 +- components/cdc/src/initializer.rs | 2 +- components/raftstore/src/store/util.rs | 43 ++++++++++++++++--- components/resolved_ts/src/endpoint.rs | 19 +++++++- components/resolved_ts/src/metrics.rs | 10 +++++ components/resolved_ts/src/resolver.rs | 12 ++++-- src/server/service/debug.rs | 13 ++++-- 10 files changed, 104 insertions(+), 27 deletions(-) diff --git a/cmd/tikv-ctl/src/cmd.rs b/cmd/tikv-ctl/src/cmd.rs index 8a432fc35df..1fafa33f5a7 100644 --- a/cmd/tikv-ctl/src/cmd.rs +++ b/cmd/tikv-ctl/src/cmd.rs @@ -627,6 +627,7 @@ pub enum Cmd { /// hex end key end: String, }, + /// Get the state of a region's RegionReadProgress. GetRegionReadProgress { #[structopt(short = "r", long)] /// The target region id diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 19a65e9bd20..a145118acea 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -2,7 +2,7 @@ use std::{ borrow::ToOwned, cmp::Ordering, path::Path, pin::Pin, result, str, string::ToString, sync::Arc, - time::Duration, u64, + time::Duration, }; use api_version::{ApiV1, KvFormat}; @@ -1008,15 +1008,20 @@ impl DebugExecutor for DebugClient { ), ("paused", resp.get_region_read_progress_paused().to_string()), ("discarding", resp.get_discard().to_string()), - // TODO: figure out the performance impact here before implementing it. - // ( - // "duration to last update_safe_ts", - // format!("{} ms", resp.get_duration_to_last_update_safe_ts_ms()), - // ), - // ( - // "duration to last consume_leader_info", - // format!("{} ms", resp.get_duration_to_last_consume_leader_ms()), - // ), + ( + "duration since resolved-ts last called update_safe_ts()", + match resp.get_duration_to_last_update_safe_ts_ms() { + u64::MAX => "none".to_owned(), + x => format!("{} ms", x), + }, + ), + ( + "duration to last consume_leader_info()", + match resp.get_duration_to_last_consume_leader_ms() { + u64::MAX => "none".to_owned(), + x => format!("{} ms", x), + }, + ), ("Resolver:", "".to_owned()), ("exist", resp.get_resolver_exist().to_string()), ("resolved_ts", resp.get_resolved_ts().to_string()), diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 7fee1b1b438..e92759bc2b2 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -511,7 +511,7 @@ impl TwoPhaseResolver { return min_ts.min(stable_ts); } - self.resolver.resolve(min_ts) + self.resolver.resolve(min_ts, None) } pub fn resolved_ts(&self) -> TimeStamp { @@ -541,7 +541,7 @@ impl TwoPhaseResolver { // advance the internal resolver. // the start ts of initial scanning would be a safe ts for min ts // -- because is used to be a resolved ts. - self.resolver.resolve(ts); + self.resolver.resolve(ts, None); } None => { warn!("BUG: a two-phase resolver is executing phase_one_done when not in phase one"; "resolver" => ?self) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 7eb45480163..4c8b2226f49 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -439,7 +439,7 @@ impl Delegate { } debug!("cdc try to advance ts"; "region_id" => self.region_id, "min_ts" => min_ts); let resolver = self.resolver.as_mut().unwrap(); - let resolved_ts = resolver.resolve(min_ts); + let resolved_ts = resolver.resolve(min_ts, None); debug!("cdc resolved ts updated"; "region_id" => self.region_id, "resolved_ts" => resolved_ts); Some(resolved_ts) diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index d41b7ae2702..2c0884bb303 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -458,7 +458,7 @@ impl Initializer { fn finish_building_resolver(&self, mut resolver: Resolver, region: Region) { let observe_id = self.observe_id; - let rts = resolver.resolve(TimeStamp::zero()); + let rts = resolver.resolve(TimeStamp::zero(), None); info!( "cdc resolver initialized and schedule resolver ready"; "region_id" => region.get_id(), diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d3dc0b3e920..880a394fdae 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -35,7 +35,7 @@ use tikv_util::{ codec::number::{decode_u64, NumberEncoder}, debug, info, store::{find_peer_by_id, region}, - time::monotonic_raw_now, + time::{monotonic_raw_now, Instant}, Either, }; use time::{Duration, Timespec}; @@ -1200,10 +1200,11 @@ impl RegionReadProgressRegistry { ) -> Vec { let mut regions = Vec::with_capacity(leaders.len()); let registry = self.registry.lock().unwrap(); + let now = Some(Instant::now_coarse()); for leader_info in &leaders { let region_id = leader_info.get_region_id(); if let Some(rp) = registry.get(®ion_id) { - if rp.consume_leader_info(leader_info, coprocessor) { + if rp.consume_leader_info(leader_info, coprocessor, now) { regions.push(region_id); } } @@ -1309,7 +1310,7 @@ impl RegionReadProgress { } } - pub fn update_safe_ts(&self, apply_index: u64, ts: u64) { + pub fn update_safe_ts_with_time(&self, apply_index: u64, ts: u64, now: Option) { if apply_index == 0 || ts == 0 { return; } @@ -1317,13 +1318,17 @@ impl RegionReadProgress { if core.discard { return; } - if let Some(ts) = core.update_safe_ts(apply_index, ts) { + if let Some(ts) = core.update_safe_ts(apply_index, ts, now) { if !core.pause { self.safe_ts.store(ts, AtomicOrdering::Release); } } } + pub fn update_safe_ts(&self, apply_index: u64, ts: u64) { + self.update_safe_ts_with_time(apply_index, ts, None) + } + pub fn merge_safe_ts( &self, source_safe_ts: u64, @@ -1347,15 +1352,21 @@ impl RegionReadProgress { &self, leader_info: &LeaderInfo, coprocessor: &CoprocessorHost, + now: Option, ) -> bool { let mut core = self.core.lock().unwrap(); + if matches!((core.last_instant_of_consume_leader, now), (None, Some(_))) + || matches!((core.last_instant_of_consume_leader, now), (Some(l), Some(r)) if l < r) + { + core.last_instant_of_consume_leader = now; + } if leader_info.has_read_state() { // It is okay to update `safe_ts` without checking the `LeaderInfo`, the // `read_state` is guaranteed to be valid when it is published by the leader let rs = leader_info.get_read_state(); let (apply_index, ts) = (rs.get_applied_index(), rs.get_safe_ts()); if apply_index != 0 && ts != 0 && !core.discard { - if let Some(ts) = core.update_safe_ts(apply_index, ts) { + if let Some(ts) = core.update_safe_ts(apply_index, ts, now) { if !core.pause { self.safe_ts.store(ts, AtomicOrdering::Release); } @@ -1457,6 +1468,11 @@ pub struct RegionReadProgressCore { discard: bool, // A notify to trigger advancing resolved ts immediately. advance_notify: Option>, + // The approximate last instant of calling update_safe_ts(), used for diagnosis. + // Only the update from advance of resolved-ts is counted. Other sources like CDC or + // backup-stream are ignored. + last_instant_of_update_safe_ts: Option, + last_instant_of_consume_leader: Option, } // A helpful wrapper of `(apply_index, safe_ts)` item @@ -1529,6 +1545,8 @@ impl RegionReadProgressCore { pause: is_witness, discard: is_witness, advance_notify: None, + last_instant_of_update_safe_ts: None, + last_instant_of_consume_leader: None, } } @@ -1582,9 +1600,14 @@ impl RegionReadProgressCore { } // Return the `safe_ts` if it is updated - fn update_safe_ts(&mut self, idx: u64, ts: u64) -> Option { + fn update_safe_ts(&mut self, idx: u64, ts: u64, now: Option) -> Option { // Discard stale item with `apply_index` before `last_merge_index` // in order to prevent the stale item makes the `safe_ts` larger again + if matches!((self.last_instant_of_update_safe_ts, now), (None, Some(_))) + || matches!((self.last_instant_of_update_safe_ts, now), (Some(l), Some(r)) if l < r) + { + self.last_instant_of_update_safe_ts = now; + } if idx < self.last_merge_index { return None; } @@ -1674,6 +1697,14 @@ impl RegionReadProgressCore { pub fn discarding(&self) -> bool { self.discard } + + pub fn last_instant_of_update_ts(&self) -> &Option { + &self.last_instant_of_update_safe_ts + } + + pub fn last_instant_of_consume_leader(&self) -> &Option { + &self.last_instant_of_consume_leader + } } /// Represent the duration of all stages of raftstore recorded by one diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index c931a61f33b..5d0dbdcd689 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -511,10 +511,11 @@ where if regions.is_empty() { return; } + let now = tikv_util::time::Instant::now_coarse(); for region_id in regions.iter() { if let Some(observe_region) = self.regions.get_mut(region_id) { if let ResolverStatus::Ready = observe_region.resolver_status { - let _ = observe_region.resolver.resolve(ts); + let _ = observe_region.resolver.resolve(ts, Some(now)); } } } @@ -810,6 +811,8 @@ where let (mut oldest_ts, mut oldest_region, mut zero_ts_count) = (u64::MAX, 0, 0); let (mut oldest_leader_ts, mut oldest_leader_region) = (u64::MAX, 0); let (mut oldest_safe_ts, mut oldest_safe_ts_region) = (u64::MAX, 0); + let mut oldest_duration_to_last_update_ms = 0; + let mut oldest_duration_to_last_consume_leader_ms = 0; self.region_read_progress.with(|registry| { for (region_id, read_progress) in registry { let safe_ts = read_progress.safe_ts(); @@ -829,6 +832,17 @@ where if ts < oldest_ts { oldest_ts = ts; oldest_region = *region_id; + // use -1 to denote none. + oldest_duration_to_last_update_ms = read_progress + .get_core() + .last_instant_of_consume_leader() + .map(|t| t.saturating_elapsed().as_millis() as i64) + .unwrap_or(-1); + oldest_duration_to_last_consume_leader_ms = read_progress + .get_core() + .last_instant_of_consume_leader() + .map(|t| t.saturating_elapsed().as_millis() as i64) + .unwrap_or(-1); } if let (Some(store_id), Some(leader_store_id)) = (store_id, leader_store_id) { @@ -903,6 +917,9 @@ where ); } RTS_MIN_SAFE_TS_GAP.set(safe_ts_gap as i64); + RTS_MIN_SAFE_TS_DUATION_TO_UPDATE_SAFE_TS.set(oldest_duration_to_last_update_ms); + RTS_MIN_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER + .set(oldest_duration_to_last_consume_leader_ms); RTS_MIN_RESOLVED_TS_REGION.set(oldest_region as i64); RTS_MIN_RESOLVED_TS.set(oldest_ts as i64); diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 979da747fb1..15b3463f70e 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -84,6 +84,16 @@ lazy_static! { "The minimal (non-zero) safe ts gap for observed regions" ) .unwrap(); + pub static ref RTS_MIN_SAFE_TS_DUATION_TO_UPDATE_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts_duration_to_update_safe_ts", + "The duration since last update_safe_ts() called by resolved-ts routine. -1 denotes None." + ) + .unwrap(); + pub static ref RTS_MIN_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_safe_ts_duration_to_last_consume_leader", + "The duration since last check_leader(). -1 denotes None." + ) + .unwrap(); pub static ref RTS_ZERO_RESOLVED_TS: IntGauge = register_int_gauge!( "tikv_resolved_ts_zero_resolved_ts", "The number of zero resolved ts for observed regions" diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 28ee468d322..799c5584723 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -4,6 +4,7 @@ use std::{cmp, collections::BTreeMap, sync::Arc}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; +use tikv_util::time::Instant; use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; @@ -159,7 +160,7 @@ impl Resolver { /// /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. - pub fn resolve(&mut self, min_ts: TimeStamp) -> TimeStamp { + pub fn resolve(&mut self, min_ts: TimeStamp, now: Option) -> TimeStamp { // The `Resolver` is stopped, not need to advance, just return the current // `resolved_ts` if self.stopped { @@ -184,7 +185,7 @@ impl Resolver { // Publish an `(apply index, safe ts)` item into the region read progress if let Some(rrp) = &self.read_progress { - rrp.update_safe_ts(self.tracked_index, self.resolved_ts.into_inner()); + rrp.update_safe_ts_with_time(self.tracked_index, self.resolved_ts.into_inner(), now); } let new_min_ts = if has_lock { @@ -307,7 +308,12 @@ mod tests { } Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { - assert_eq!(resolver.resolve(min_ts.into()), expect.into(), "case {}", i) + assert_eq!( + resolver.resolve(min_ts.into(), None), + expect.into(), + "case {}", + i + ) } } } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index c7d5175f5ed..d0b715542d5 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -658,9 +658,16 @@ where resp.set_read_state_ts(core.read_state().ts); resp.set_read_state_apply_index(core.read_state().idx); resp.set_discard(core.discarding()); - // TODO: set durations - // resp.set_duration_to_last_consume_leader_ms(); - // resp.set_duration_to_last_update_safe_ts_ms(); + resp.set_duration_to_last_consume_leader_ms( + core.last_instant_of_consume_leader() + .map(|t| t.saturating_elapsed().as_millis() as u64) + .unwrap_or(u64::MAX), + ); + resp.set_duration_to_last_update_safe_ts_ms( + core.last_instant_of_update_ts() + .map(|t| t.saturating_elapsed().as_millis() as u64) + .unwrap_or(u64::MAX), + ); } else { resp.set_region_read_progress_exist(false); } From e59ad221e3a317922a50933100807e3fc8a3ee09 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 21 Aug 2023 17:29:04 +0800 Subject: [PATCH 0859/1149] resource_metering: free leaked thread CPU stats (#15373) close tikv/tikv#15304 resource_metering: free leaked thread CPU stats Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../resource_metering/src/recorder/sub_recorder/cpu.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/components/resource_metering/src/recorder/sub_recorder/cpu.rs b/components/resource_metering/src/recorder/sub_recorder/cpu.rs index f51f9a593b6..8c4053a80ab 100644 --- a/components/resource_metering/src/recorder/sub_recorder/cpu.rs +++ b/components/resource_metering/src/recorder/sub_recorder/cpu.rs @@ -49,10 +49,13 @@ impl SubRecorder for CpuRecorder { fn cleanup( &mut self, _records: &mut RawRecords, - _thread_stores: &mut HashMap, + thread_stores: &mut HashMap, ) { - const THREAD_STAT_LEN_THRESHOLD: usize = 500; + // Remove thread stats that are no longer in thread_stores. + self.thread_stats + .retain(|tid, _| thread_stores.contains_key(tid)); + const THREAD_STAT_LEN_THRESHOLD: usize = 500; if self.thread_stats.capacity() > THREAD_STAT_LEN_THRESHOLD && self.thread_stats.len() < THREAD_STAT_LEN_THRESHOLD / 2 { From 844851ebc3d2de078080036738590d7240b75b69 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Tue, 22 Aug 2023 14:21:34 +0800 Subject: [PATCH 0860/1149] br: show the details of the error occurred when get the azure ad token (#15385) close tikv/tikv#15384 Signed-off-by: Leavrth --- components/cloud/azure/src/azblob.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index f7327902724..d88020aa944 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -517,7 +517,7 @@ impl ContainerBuilder for TokenCredContainerBuilder { .token_cred .get_token(&self.token_resource) .await - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}", &e)))?; + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{:?}", &e)))?; let blob_service = BlobServiceClient::new( self.account_name.clone(), StorageCredentials::BearerToken(token.token.secret().into()), From c099e482cbe28546b6b9c3b7b5d3ce0eb6e9c239 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 22 Aug 2023 14:57:34 +0800 Subject: [PATCH 0861/1149] raftstore: consider duplicated mvcc versions when check compact (#15342) ref tikv/tikv#15282 consider duplicated mvcc versions when check compact --- components/raftstore-v2/src/operation/misc.rs | 7 +- .../src/worker/cleanup/compact.rs | 43 +----- .../raftstore-v2/src/worker/cleanup/mod.rs | 2 +- components/raftstore/src/store/fsm/store.rs | 13 +- components/raftstore/src/store/mod.rs | 14 +- .../raftstore/src/store/worker/compact.rs | 142 ++++++++++++------ components/raftstore/src/store/worker/mod.rs | 2 +- 7 files changed, 118 insertions(+), 105 deletions(-) diff --git a/components/raftstore-v2/src/operation/misc.rs b/components/raftstore-v2/src/operation/misc.rs index b3c5d9eb89e..867b4192dac 100644 --- a/components/raftstore-v2/src/operation/misc.rs +++ b/components/raftstore-v2/src/operation/misc.rs @@ -8,7 +8,10 @@ use std::collections::{ use collections::HashMap; use crossbeam::channel::TrySendError; use engine_traits::{KvEngine, RaftEngine, CF_DEFAULT, CF_WRITE}; -use raftstore::{store::TabletSnapKey, Result}; +use raftstore::{ + store::{CompactThreshold, TabletSnapKey}, + Result, +}; use slog::{debug, error, info}; use crate::{ @@ -16,7 +19,7 @@ use crate::{ fsm::{Store, StoreFsmDelegate}, router::{PeerMsg, StoreTick}, worker::{ - cleanup::{self, CompactThreshold}, + cleanup::{self}, tablet, }, CompactTask::CheckAndCompact, diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs index c7d7aef897d..7acdb943b91 100644 --- a/components/raftstore-v2/src/worker/cleanup/compact.rs +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -5,9 +5,10 @@ use std::{ fmt::{self, Display, Formatter}, }; -use engine_traits::{KvEngine, RangeStats, TabletRegistry, CF_WRITE}; +use engine_traits::{KvEngine, TabletRegistry, CF_WRITE}; use fail::fail_point; use keys::{DATA_MAX_KEY, DATA_MIN_KEY}; +use raftstore::store::{need_compact, CompactThreshold}; use slog::{debug, error, info, warn, Logger}; use thiserror::Error; use tikv_util::{box_try, worker::Runnable}; @@ -21,29 +22,6 @@ pub enum Task { }, } -pub struct CompactThreshold { - tombstones_num_threshold: u64, - tombstones_percent_threshold: u64, - redundant_rows_threshold: u64, - redundant_rows_percent_threshold: u64, -} - -impl CompactThreshold { - pub fn new( - tombstones_num_threshold: u64, - tombstones_percent_threshold: u64, - redundant_rows_threshold: u64, - redundant_rows_percent_threshold: u64, - ) -> Self { - Self { - tombstones_num_threshold, - tombstones_percent_threshold, - redundant_rows_percent_threshold, - redundant_rows_threshold, - } - } -} - impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -151,23 +129,6 @@ where } } -fn need_compact(range_stats: &RangeStats, compact_threshold: &CompactThreshold) -> bool { - if range_stats.num_entries < range_stats.num_versions { - return false; - } - - // We trigger region compaction when their are to many tombstones as well as - // redundant keys, both of which can severly impact scan operation: - let estimate_num_del = range_stats.num_entries - range_stats.num_versions; - let redundant_keys = range_stats.num_entries - range_stats.num_rows; - (redundant_keys >= compact_threshold.redundant_rows_threshold - && redundant_keys * 100 - >= compact_threshold.redundant_rows_percent_threshold * range_stats.num_entries) - || (estimate_num_del >= compact_threshold.tombstones_num_threshold - && estimate_num_del * 100 - >= compact_threshold.tombstones_percent_threshold * range_stats.num_entries) -} - fn collect_regions_to_compact( reg: &TabletRegistry, region_ids: Vec, diff --git a/components/raftstore-v2/src/worker/cleanup/mod.rs b/components/raftstore-v2/src/worker/cleanup/mod.rs index 0d04fd1eb70..fa95fbcc480 100644 --- a/components/raftstore-v2/src/worker/cleanup/mod.rs +++ b/components/raftstore-v2/src/worker/cleanup/mod.rs @@ -2,7 +2,7 @@ use std::fmt::{self, Display, Formatter}; -pub use compact::{CompactThreshold, Runner as CompactRunner, Task as CompactTask}; +pub use compact::{Runner as CompactRunner, Task as CompactTask}; use engine_traits::KvEngine; use tikv_util::worker::Runnable; diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index b674a2379e9..c21ea65a589 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -105,8 +105,9 @@ use crate::{ ReadDelegate, RefreshConfigRunner, RefreshConfigTask, RegionRunner, RegionTask, SplitCheckTask, }, - Callback, CasualMessage, GlobalReplicationState, InspectedRaftMessage, MergeResultKind, - PdTask, PeerMsg, PeerTick, RaftCommand, SignificantMsg, SnapManager, StoreMsg, StoreTick, + Callback, CasualMessage, CompactThreshold, GlobalReplicationState, InspectedRaftMessage, + MergeResultKind, PdTask, PeerMsg, PeerTick, RaftCommand, SignificantMsg, SnapManager, + StoreMsg, StoreTick, }, Error, Result, }; @@ -2520,8 +2521,12 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER CompactTask::CheckAndCompact { cf_names, ranges: ranges_need_check, - tombstones_num_threshold: self.ctx.cfg.region_compact_min_tombstones, - tombstones_percent_threshold: self.ctx.cfg.region_compact_tombstones_percent, + compact_threshold: CompactThreshold::new( + self.ctx.cfg.region_compact_min_tombstones, + self.ctx.cfg.region_compact_tombstones_percent, + self.ctx.cfg.region_compact_min_redundant_rows, + self.ctx.cfg.region_compact_redundant_rows_percent, + ), }, )) { error!( diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index c0164c88b04..efd149e7c41 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -84,13 +84,13 @@ pub use self::{ }, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ - metrics as worker_metrics, AutoSplitController, BatchComponent, Bucket, BucketRange, - CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, FlowStatistics, FlowStatsReporter, - KeyEntry, LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, - ReadDelegate, ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, - RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, - SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, - WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + metrics as worker_metrics, need_compact, AutoSplitController, BatchComponent, Bucket, + BucketRange, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, CompactThreshold, + FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, LocalReader, + LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, + ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, + SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, + TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index f97f5d6fa34..3b2a2ec0404 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -6,7 +6,7 @@ use std::{ fmt::{self, Display, Formatter}, }; -use engine_traits::{KvEngine, CF_WRITE}; +use engine_traits::{KvEngine, RangeStats, CF_WRITE}; use fail::fail_point; use thiserror::Error; use tikv_util::{box_try, error, info, time::Instant, warn, worker::Runnable}; @@ -27,10 +27,32 @@ pub enum Task { cf_names: Vec, // Ranges need to check ranges: Vec, - // The minimum RocksDB tombstones a range that need compacting has + // The minimum RocksDB tombstones/duplicate versions a range that need compacting has + compact_threshold: CompactThreshold, + }, +} + +pub struct CompactThreshold { + pub tombstones_num_threshold: u64, + pub tombstones_percent_threshold: u64, + pub redundant_rows_threshold: u64, + pub redundant_rows_percent_threshold: u64, +} + +impl CompactThreshold { + pub fn new( tombstones_num_threshold: u64, tombstones_percent_threshold: u64, - }, + redundant_rows_threshold: u64, + redundant_rows_percent_threshold: u64, + ) -> Self { + Self { + tombstones_num_threshold, + tombstones_percent_threshold, + redundant_rows_percent_threshold, + redundant_rows_threshold, + } + } } impl Display for Task { @@ -55,8 +77,7 @@ impl Display for Task { Task::CheckAndCompact { ref cf_names, ref ranges, - tombstones_num_threshold, - tombstones_percent_threshold, + ref compact_threshold, } => f .debug_struct("CheckAndCompact") .field("cf_names", cf_names) @@ -67,10 +88,21 @@ impl Display for Task { ranges.last().as_ref().map(|k| log_wrappers::Value::key(k)), ), ) - .field("tombstones_num_threshold", &tombstones_num_threshold) + .field( + "tombstones_num_threshold", + &compact_threshold.tombstones_num_threshold, + ) .field( "tombstones_percent_threshold", - &tombstones_percent_threshold, + &compact_threshold.tombstones_percent_threshold, + ) + .field( + "redundant_rows_threshold", + &compact_threshold.redundant_rows_threshold, + ) + .field( + "redundant_rows_percent_threshold", + &compact_threshold.redundant_rows_percent_threshold, ) .finish(), } @@ -145,14 +177,8 @@ where Task::CheckAndCompact { cf_names, ranges, - tombstones_num_threshold, - tombstones_percent_threshold, - } => match collect_ranges_need_compact( - &self.engine, - ranges, - tombstones_num_threshold, - tombstones_percent_threshold, - ) { + compact_threshold, + } => match collect_ranges_need_compact(&self.engine, ranges, compact_threshold) { Ok(mut ranges) => { for (start, end) in ranges.drain(..) { for cf in &cf_names { @@ -175,28 +201,27 @@ where } } -fn need_compact( - num_entires: u64, - num_versions: u64, - tombstones_num_threshold: u64, - tombstones_percent_threshold: u64, -) -> bool { - if num_entires <= num_versions { +pub fn need_compact(range_stats: &RangeStats, compact_threshold: &CompactThreshold) -> bool { + if range_stats.num_entries < range_stats.num_versions { return false; } - // When the number of tombstones exceed threshold and ratio, this range need - // compacting. - let estimate_num_del = num_entires - num_versions; - estimate_num_del >= tombstones_num_threshold - && estimate_num_del * 100 >= tombstones_percent_threshold * num_entires + // We trigger region compaction when their are to many tombstones as well as + // redundant keys, both of which can severly impact scan operation: + let estimate_num_del = range_stats.num_entries - range_stats.num_versions; + let redundant_keys = range_stats.num_entries - range_stats.num_rows; + (redundant_keys >= compact_threshold.redundant_rows_threshold + && redundant_keys * 100 + >= compact_threshold.redundant_rows_percent_threshold * range_stats.num_entries) + || (estimate_num_del >= compact_threshold.tombstones_num_threshold + && estimate_num_del * 100 + >= compact_threshold.tombstones_percent_threshold * range_stats.num_entries) } fn collect_ranges_need_compact( engine: &impl KvEngine, ranges: Vec, - tombstones_num_threshold: u64, - tombstones_percent_threshold: u64, + compact_threshold: CompactThreshold, ) -> Result, Error> { // Check the SST properties for each range, and TiKV will compact a range if the // range contains too many RocksDB tombstones. TiKV will merge multiple @@ -209,12 +234,7 @@ fn collect_ranges_need_compact( // be compacted. if let Some(range_stats) = box_try!(engine.get_range_stats(CF_WRITE, &range[0], &range[1])) { - if need_compact( - range_stats.num_entries, - range_stats.num_versions, - tombstones_num_threshold, - tombstones_percent_threshold, - ) { + if need_compact(&range_stats, &compact_threshold) { if compact_start.is_none() { // The previous range doesn't need compacting. compact_start = Some(range[0].clone()); @@ -346,13 +366,14 @@ mod tests { for i in 0..5 { let (k, v) = (format!("k{}", i), format!("value{}", i)); mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 3.into(), 4.into()); } engine.flush_cf(CF_WRITE, true).unwrap(); // gc 0..5 for i in 0..5 { let k = format!("k{}", i); - delete(&engine, k.as_bytes(), 2.into()); + delete(&engine, k.as_bytes(), 4.into()); } engine.flush_cf(CF_WRITE, true).unwrap(); @@ -361,26 +382,32 @@ mod tests { .get_range_stats(CF_WRITE, &start, &end) .unwrap() .unwrap(); - assert_eq!(range_stats.num_entries, 10); - assert_eq!(range_stats.num_versions, 5); + assert_eq!(range_stats.num_entries, 15); + assert_eq!(range_stats.num_versions, 10); + assert_eq!(range_stats.num_rows, 5); // mvcc_put 5..10 for i in 5..10 { let (k, v) = (format!("k{}", i), format!("value{}", i)); mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); } + for i in 5..8 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 3.into(), 4.into()); + } engine.flush_cf(CF_WRITE, true).unwrap(); let (s, e) = (data_key(b"k5"), data_key(b"k9")); let range_stats = engine.get_range_stats(CF_WRITE, &s, &e).unwrap().unwrap(); - assert_eq!(range_stats.num_entries, 5); - assert_eq!(range_stats.num_versions, 5); + assert_eq!(range_stats.num_entries, 8); + assert_eq!(range_stats.num_versions, 8); + assert_eq!(range_stats.num_rows, 5); + // tombstone triggers compaction let ranges_need_to_compact = collect_ranges_need_compact( &engine, vec![data_key(b"k0"), data_key(b"k5"), data_key(b"k9")], - 1, - 50, + CompactThreshold::new(4, 30, 100, 100), ) .unwrap(); let (s, e) = (data_key(b"k0"), data_key(b"k5")); @@ -388,28 +415,45 @@ mod tests { expected_ranges.push_back((s, e)); assert_eq!(ranges_need_to_compact, expected_ranges); - // gc 5..10 - for i in 5..10 { + // duplicated mvcc triggers compaction + let ranges_need_to_compact = collect_ranges_need_compact( + &engine, + vec![data_key(b"k0"), data_key(b"k5"), data_key(b"k9")], + CompactThreshold::new(100, 100, 5, 50), + ) + .unwrap(); + assert_eq!(ranges_need_to_compact, expected_ranges); + + // gc 5..8 + for i in 5..8 { let k = format!("k{}", i); - delete(&engine, k.as_bytes(), 2.into()); + delete(&engine, k.as_bytes(), 4.into()); } engine.flush_cf(CF_WRITE, true).unwrap(); let (s, e) = (data_key(b"k5"), data_key(b"k9")); let range_stats = engine.get_range_stats(CF_WRITE, &s, &e).unwrap().unwrap(); - assert_eq!(range_stats.num_entries, 10); - assert_eq!(range_stats.num_versions, 5); + assert_eq!(range_stats.num_entries, 11); + assert_eq!(range_stats.num_versions, 8); + assert_eq!(range_stats.num_rows, 5); let ranges_need_to_compact = collect_ranges_need_compact( &engine, vec![data_key(b"k0"), data_key(b"k5"), data_key(b"k9")], - 1, - 50, + CompactThreshold::new(3, 25, 100, 100), ) .unwrap(); let (s, e) = (data_key(b"k0"), data_key(b"k9")); let mut expected_ranges = VecDeque::new(); expected_ranges.push_back((s, e)); assert_eq!(ranges_need_to_compact, expected_ranges); + + let ranges_need_to_compact = collect_ranges_need_compact( + &engine, + vec![data_key(b"k0"), data_key(b"k5"), data_key(b"k9")], + CompactThreshold::new(100, 100, 3, 35), + ) + .unwrap(); + assert_eq!(ranges_need_to_compact, expected_ranges); } } diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index 084542c313d..e79f37a4bc4 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -23,7 +23,7 @@ pub use self::{ cleanup::{Runner as CleanupRunner, Task as CleanupTask}, cleanup_snapshot::{Runner as GcSnapshotRunner, Task as GcSnapshotTask}, cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, - compact::{Runner as CompactRunner, Task as CompactTask}, + compact::{need_compact, CompactThreshold, Runner as CompactRunner, Task as CompactTask}, consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, From 37845d77d97ad705c841d94bb19a7646bde891a5 Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Tue, 22 Aug 2023 16:00:34 +0800 Subject: [PATCH 0862/1149] expr: support json function member_of (#15391) close tikv/tikv#15388 Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- .../src/codec/mysql/json/json_memberof.rs | 78 +++++++++++++++++++ .../src/codec/mysql/json/mod.rs | 1 + components/tidb_query_expr/src/impl_json.rs | 71 +++++++++++++++++ components/tidb_query_expr/src/lib.rs | 1 + 5 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 components/tidb_query_datatype/src/codec/mysql/json/json_memberof.rs diff --git a/Cargo.lock b/Cargo.lock index 8bf3cf216d8..63e9e77efae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7096,7 +7096,7 @@ dependencies = [ [[package]] name = "tipb" version = "0.0.1" -source = "git+https://github.com/pingcap/tipb.git#55b921cfdca1e29bcc29a83c1532bfdf53f88c51" +source = "git+https://github.com/pingcap/tipb.git#711da6fede03533302fbc9fa3a8fca3556683197" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_memberof.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_memberof.rs new file mode 100644 index 00000000000..d85b1e57af8 --- /dev/null +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_memberof.rs @@ -0,0 +1,78 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::cmp::Ordering; + +use super::{super::Result, JsonRef, JsonType}; + +impl<'a> JsonRef<'a> { + /// `member_of` is the implementation for MEMBER OF in mysql + /// + /// See `builtinJSONMemberOfSig` in TiDB `expression/builtin_json.go` + pub fn member_of(&self, json_array: JsonRef<'_>) -> Result { + match json_array.type_code { + JsonType::Array => { + let elem_count = json_array.get_elem_count(); + for i in 0..elem_count { + if json_array.array_get_elem(i)?.partial_cmp(self).unwrap() == Ordering::Equal { + return Ok(true); + }; + } + } + _ => { + // If `json_array` is not a JSON_ARRAY, compare the two JSON directly. + return match json_array.partial_cmp(self).unwrap() { + Ordering::Equal => Ok(true), + _ => Ok(false), + }; + } + }; + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use super::super::Json; + #[test] + fn test_json_member_of() { + let mut test_cases = vec![ + (r#"1"#, r#"[1,2]"#, true), + (r#"1"#, r#"[1]"#, true), + (r#"1"#, r#"[0]"#, false), + (r#"1"#, r#"[[1]]"#, false), + (r#""1""#, r#"[1]"#, false), + (r#""1""#, r#"["1"]"#, true), + (r#""{\"a\":1}""#, r#"{"a":1}"#, false), + (r#""{\"a\":1}""#, r#"[{"a":1}]"#, false), + (r#""{\"a\":1}""#, r#"[{"a":1}, 1]"#, false), + (r#""{\"a\":1}""#, r#"["{\"a\":1}"]"#, true), + (r#""{\"a\":1}""#, r#"["{\"a\":1}",1]"#, true), + (r#"1"#, r#"1"#, true), + (r#"[4,5]"#, r#"[[3,4],[4,5]]"#, true), + (r#""[4,5]""#, r#"[[3,4],"[4,5]"]"#, true), + (r#"{"a":1}"#, r#"{"a":1}"#, true), + (r#"{"a":1}"#, r#"{"a":1, "b":2}"#, false), + (r#"{"a":1}"#, r#"[{"a":1}]"#, true), + (r#"{"a":1}"#, r#"{"b": {"a":1}}"#, false), + (r#"1"#, r#"1"#, true), + (r#"[1,2]"#, r#"[1,2]"#, false), + (r#"[1,2]"#, r#"[[1,2]]"#, true), + (r#"[[1,2]]"#, r#"[[1,2]]"#, false), + (r#"[[1,2]]"#, r#"[[[1,2]]]"#, true), + ]; + for (i, (js, value, expected)) in test_cases.drain(..).enumerate() { + let j = js.parse(); + assert!(j.is_ok(), "#{} expect parse ok but got {:?}", i, j); + let j: Json = j.unwrap(); + let value = value.parse(); + assert!(value.is_ok(), "#{} expect parse ok but got {:?}", i, j); + let value: Json = value.unwrap(); + let got = j.as_ref().member_of(value.as_ref()).unwrap(); + assert_eq!( + got, expected, + "#{} expect {:?}, but got {:?}", + i, expected, got + ); + } + } +} diff --git a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs index 0cd382f6d65..e6d2a391fae 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/mod.rs @@ -70,6 +70,7 @@ mod json_depth; mod json_extract; mod json_keys; mod json_length; +mod json_memberof; mod json_merge; mod json_modify; mod json_remove; diff --git a/components/tidb_query_expr/src/impl_json.rs b/components/tidb_query_expr/src/impl_json.rs index 0c905b7458c..68132ae08e2 100644 --- a/components/tidb_query_expr/src/impl_json.rs +++ b/components/tidb_query_expr/src/impl_json.rs @@ -362,6 +362,34 @@ fn json_contains(args: &[ScalarValueRef]) -> Result> { Ok(Some(j.as_ref().json_contains(target)? as i64)) } +// Args should be like `(Option , Option)` +fn member_of_validator(expr: &tipb::Expr) -> Result<()> { + assert!(expr.get_children().len() == 2); + let children = expr.get_children(); + super::function::validate_expr_return_type(&children[0], EvalType::Json)?; + super::function::validate_expr_return_type(&children[1], EvalType::Json)?; + Ok(()) +} + +#[rpn_fn(nullable, raw_varg,min_args= 2, max_args = 2, extra_validator = member_of_validator)] +#[inline] +fn member_of(args: &[ScalarValueRef]) -> Result> { + assert!(args.len() == 2); + let value: Option = args[0].as_json(); + let value = match value { + None => return Ok(None), + Some(value) => value.to_owned(), + }; + + let json_array: Option = args[1].as_json(); + let json_array = match json_array { + None => return Ok(None), + Some(json_array) => json_array, + }; + + Ok(Some(value.as_ref().member_of(json_array)? as i64)) +} + #[rpn_fn(nullable, raw_varg, min_args = 2, extra_validator = json_with_paths_validator)] #[inline] fn json_remove(args: &[ScalarValueRef]) -> Result> { @@ -1392,4 +1420,47 @@ mod tests { } } } + + #[test] + fn test_json_member_of() { + let test_cases = vec![ + (Some(r#"1"#), Some(r#"[1,2]"#), Some(1)), + (Some(r#"1"#), Some(r#"[1]"#), Some(1)), + (Some(r#"1"#), Some(r#"[0]"#), Some(0)), + (Some(r#"1"#), Some(r#"[[1]]"#), Some(0)), + (Some(r#""1""#), Some(r#"[1]"#), Some(0)), + (Some(r#""1""#), Some(r#"["1"]"#), Some(1)), + (Some(r#""{\"a\":1}""#), Some(r#"{"a":1}"#), Some(0)), + (Some(r#""{\"a\":1}""#), Some(r#"[{"a":1}]"#), Some(0)), + (Some(r#""{\"a\":1}""#), Some(r#"[{"a":1}, 1]"#), Some(0)), + (Some(r#""{\"a\":1}""#), Some(r#"["{\"a\":1}"]"#), Some(1)), + (Some(r#""{\"a\":1}""#), Some(r#"["{\"a\":1}",1]"#), Some(1)), + (Some(r#"1"#), Some(r#"1"#), Some(1)), + (Some(r#"[4,5]"#), Some(r#"[[3,4],[4,5]]"#), Some(1)), + (Some(r#""[4,5]""#), Some(r#"[[3,4],"[4,5]"]"#), Some(1)), + (Some(r#"{"a":1}"#), Some(r#"{"a":1}"#), Some(1)), + (Some(r#"{"a":1}"#), Some(r#"{"a":1, "b":2}"#), Some(0)), + (Some(r#"{"a":1}"#), Some(r#"[{"a":1}]"#), Some(1)), + (Some(r#"{"a":1}"#), Some(r#"{"b": {"a":1}}"#), Some(0)), + (Some(r#"1"#), Some(r#"1"#), Some(1)), + (Some(r#"[1,2]"#), Some(r#"[1,2]"#), Some(0)), + (Some(r#"[1,2]"#), Some(r#"[[1,2]]"#), Some(1)), + (Some(r#"[[1,2]]"#), Some(r#"[[1,2]]"#), Some(0)), + (Some(r#"[[1,2]]"#), Some(r#"[[[1,2]]]"#), Some(1)), + (None, Some(r#"[[[1,2]]]"#), None), + (Some(r#"[[1,2]]"#), None, None), + (None, None, None), + ]; + for (js, value, expected) in test_cases { + let args: Vec = vec![ + js.map(|js| Json::from_str(js).unwrap()).into(), + value.map(|value| Json::from_str(value).unwrap()).into(), + ]; + let output = RpnFnScalarEvaluator::new() + .push_params(args.clone()) + .evaluate(ScalarFuncSig::JsonMemberOfSig) + .unwrap(); + assert_eq!(output, expected, "{:?}", args); + } + } } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 649a7cfa1c8..c2ef6722148 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -631,6 +631,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::JsonValidJsonSig => json_valid_fn_meta(), ScalarFuncSig::JsonValidStringSig => json_valid_fn_meta(), ScalarFuncSig::JsonValidOthersSig => json_valid_fn_meta(), + ScalarFuncSig::JsonMemberOfSig => member_of_fn_meta(), // impl_like ScalarFuncSig::LikeSig => map_like_sig(ft, children)?, // impl_regexp From d7fc4b3b2ead5d2a627162604fb148f8ef1c8feb Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 22 Aug 2023 17:11:05 +0800 Subject: [PATCH 0863/1149] coprocessor: return `bucket version not match` error if the given bucket's version is stale (#15224) close tikv/tikv#15123 reject cop request if the given buckets verison is stale, and return the new error called `bucket_version_not_match` which will carry with the latest bucket keys, so the client should update it cache depend on this errror. And then split this cop request again. Signed-off-by: bufferflies <1045931706@qq.com> --- Cargo.lock | 2 +- .../raftstore-v2/src/operation/bucket.rs | 5 ++- components/raftstore/src/store/fsm/peer.rs | 5 ++- src/coprocessor/endpoint.rs | 16 ++++++++- src/server/metrics.rs | 4 +++ src/storage/errors.rs | 4 +++ tests/failpoints/cases/test_coprocessor.rs | 34 +++++++++++++------ 7 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63e9e77efae..0663bfae493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2978,7 +2978,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#ee4a4ff68ac385bd61ea9da868b4b6a6c0dadcec" +source = "git+https://github.com/pingcap/kvproto.git#7b612d935bf96f9daf7a537db379bcc88b4644e0" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index de4abb76712..432ea72456a 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -303,6 +303,7 @@ impl Peer { apply_scheduler.send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } let version = region_buckets.meta.version; + let keys = region_buckets.meta.keys.clone(); // Notify followers to flush their relevant memtables let peers = self.region().get_peers().to_vec(); if !self.is_leader() { @@ -321,6 +322,7 @@ impl Peer { extra_msg.set_type(ExtraMessageType::MsgRefreshBuckets); let mut refresh_buckets = RefreshBuckets::new(); refresh_buckets.set_version(version); + refresh_buckets.set_keys(keys.clone().into()); extra_msg.set_refresh_buckets(refresh_buckets); self.send_raft_message(store_ctx, msg); } @@ -337,13 +339,14 @@ impl Peer { } let extra_msg = msg.get_extra_msg(); let version = extra_msg.get_refresh_buckets().get_version(); + let keys = extra_msg.get_refresh_buckets().get_keys().to_vec(); let region_epoch = msg.get_region_epoch(); let meta = BucketMeta { region_id: self.region_id(), version, region_epoch: region_epoch.clone(), - keys: vec![], + keys, sizes: vec![], }; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 57ebcd91aa4..fc99de93455 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6029,6 +6029,7 @@ where RegionChangeEvent::UpdateBuckets(buckets_count), self.fsm.peer.get_role(), ); + let keys = region_buckets.meta.keys.clone(); let old_region_buckets: Option = self.fsm.peer.region_buckets.replace(region_buckets); self.fsm.peer.last_region_buckets = old_region_buckets; @@ -6051,6 +6052,7 @@ where extra_msg.set_type(ExtraMessageType::MsgRefreshBuckets); let mut refresh_buckets = RefreshBuckets::new(); refresh_buckets.set_version(version); + refresh_buckets.set_keys(keys.clone().into()); extra_msg.set_refresh_buckets(refresh_buckets); self.fsm .peer @@ -6078,13 +6080,14 @@ where return; } let version = msg.get_extra_msg().get_refresh_buckets().get_version(); + let keys = msg.get_extra_msg().get_refresh_buckets().get_keys(); let region_epoch = msg.get_region_epoch().clone(); let meta = BucketMeta { region_id: self.region_id(), version, region_epoch, - keys: vec![], + keys: keys.to_vec(), sizes: vec![], }; diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index c517e6fb08a..7a12c7493e5 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -418,10 +418,24 @@ impl Endpoint { let snapshot = unsafe { with_tls_engine(|engine| Self::async_snapshot(engine, &tracker.req_ctx)) } .await?; + let latest_buckets = snapshot.ext().get_buckets(); + + // Check if the buckets version is latest. + // skip if request don't carry this bucket version. + if let Some(ref buckets) = latest_buckets&& + buckets.version > tracker.req_ctx.context.buckets_version && + tracker.req_ctx.context.buckets_version!=0 { + let mut bucket_not_match = errorpb::BucketVersionNotMatch::default(); + bucket_not_match.set_version(buckets.version); + bucket_not_match.set_keys(buckets.keys.clone().into()); + let mut err = errorpb::Error::default(); + err.set_bucket_version_not_match(bucket_not_match); + return Err(Error::Region(err)); + } // When snapshot is retrieved, deadline may exceed. tracker.on_snapshot_finished(); tracker.req_ctx.deadline.check()?; - tracker.buckets = snapshot.ext().get_buckets(); + tracker.buckets = latest_buckets; let buckets_version = tracker.buckets.as_ref().map_or(0, |b| b.version); let mut handler = if tracker.req_ctx.cache_match_version.is_some() diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 25ff3237c6f..baf8e200cbb 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -476,6 +476,7 @@ make_auto_flush_static_metric! { err_disk_full, err_recovery_in_progress, err_flashback_in_progress, + err_buckets_version_not_match, err_undetermind, } @@ -512,6 +513,9 @@ impl From for RequestStatusKind { ErrorHeaderKind::DiskFull => RequestStatusKind::err_disk_full, ErrorHeaderKind::RecoveryInProgress => RequestStatusKind::err_recovery_in_progress, ErrorHeaderKind::FlashbackInProgress => RequestStatusKind::err_flashback_in_progress, + ErrorHeaderKind::BucketsVersionNotMatch => { + RequestStatusKind::err_buckets_version_not_match + } ErrorHeaderKind::Other => RequestStatusKind::err_other, } } diff --git a/src/storage/errors.rs b/src/storage/errors.rs index d3c56c48984..0e7db9ffc96 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -183,6 +183,7 @@ pub enum ErrorHeaderKind { DiskFull, RecoveryInProgress, FlashbackInProgress, + BucketsVersionNotMatch, Other, } @@ -207,6 +208,7 @@ impl ErrorHeaderKind { ErrorHeaderKind::DiskFull => "disk_full", ErrorHeaderKind::RecoveryInProgress => "recovery_in_progress", ErrorHeaderKind::FlashbackInProgress => "flashback_in_progress", + ErrorHeaderKind::BucketsVersionNotMatch => "buckets_version_not_match", ErrorHeaderKind::Other => "other", } } @@ -255,6 +257,8 @@ pub fn get_error_kind_from_header(header: &errorpb::Error) -> ErrorHeaderKind { ErrorHeaderKind::RecoveryInProgress } else if header.has_flashback_in_progress() { ErrorHeaderKind::FlashbackInProgress + } else if header.has_bucket_version_not_match() { + ErrorHeaderKind::BucketsVersionNotMatch } else { ErrorHeaderKind::Other } diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 2fa3525eba2..0710f778aa7 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -448,24 +448,38 @@ fn test_follower_buckets() { }; cluster.refresh_region_bucket_keys(®ion, vec![bucket], None, None); - thread::sleep(Duration::from_millis(1000)); - let wait_refresh_buckets = |endpoint, req: Request, old_buckets_ver| { - let mut resp = Default::default(); + thread::sleep(Duration::from_millis(100)); + let wait_refresh_buckets = |endpoint, req: &mut Request| { for _ in 0..10 { - resp = handle_request(&endpoint, req.clone()); - if resp.get_latest_buckets_version() != old_buckets_ver { - break; + req.mut_context().set_buckets_version(0); + let resp = handle_request(&endpoint, req.clone()); + if resp.get_latest_buckets_version() == 0 { + thread::sleep(Duration::from_millis(100)); + continue; } - thread::sleep(Duration::from_millis(100)); + + req.mut_context().set_buckets_version(1); + let resp = handle_request(&endpoint, req.clone()); + if !resp.has_region_error() { + thread::sleep(Duration::from_millis(100)); + continue; + } + assert_ge!( + resp.get_region_error() + .get_bucket_version_not_match() + .version, + 1 + ); + return; } - assert_ne!(resp.get_latest_buckets_version(), old_buckets_ver); + panic!("test_follower_buckets test case failed, can not get bucket version in time"); }; - wait_refresh_buckets(endpoint, req.clone(), 0); + wait_refresh_buckets(endpoint, &mut req.clone()); for (engine, ctx) in follower_raft_engine!(cluster, "") { req.set_context(ctx.clone()); let (_, endpoint, _) = init_data_with_engine_and_commit(ctx.clone(), engine, &product, &[], true); - wait_refresh_buckets(endpoint, req.clone(), 0); + wait_refresh_buckets(endpoint, &mut req.clone()); } fail::remove("skip_check_stale_read_safe"); } From 03eab8cde970011e55ed46aeff1dabaf77bac428 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 22 Aug 2023 19:51:33 +0800 Subject: [PATCH 0864/1149] raftstore: add some metrics about async snapshot (#15278) ref tikv/tikv#15401 Add some metrics about async snapshot Signed-off-by: SpadeA-Tang --- components/raftstore/src/store/worker/read.rs | 7 + components/tikv_util/src/time.rs | 16 + components/tracker/src/lib.rs | 1 + metrics/grafana/tikv_details.json | 548 +++++++++++++++++- src/server/metrics.rs | 8 + src/server/raftkv/mod.rs | 46 +- 6 files changed, 610 insertions(+), 16 deletions(-) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index da4a9fb8376..0c4641770be 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -29,6 +29,7 @@ use tikv_util::{ time::{monotonic_raw_now, ThreadReadId}, }; use time::Timespec; +use tracker::GLOBAL_TRACKERS; use txn_types::TimeStamp; use super::metrics::*; @@ -1040,6 +1041,12 @@ where _ => unreachable!(), }; + cb.read_tracker().map(|tracker| { + GLOBAL_TRACKERS.with_tracker(tracker, |t| { + t.metrics.local_read = true; + }) + }); + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_requests.inc()); if !snap_updated { TLS_LOCAL_READ_METRICS diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index 7a3bb7cdb55..8594379a9bd 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -545,6 +545,8 @@ mod tests { time::{Duration, SystemTime}, }; + use test::Bencher; + use super::*; #[test] @@ -685,4 +687,18 @@ mod tests { assert!(now_coarse.saturating_elapsed() >= zero); } } + + #[bench] + fn bench_instant_now(b: &mut Bencher) { + b.iter(|| { + let _now = Instant::now(); + }); + } + + #[bench] + fn bench_instant_now_coarse(b: &mut Bencher) { + b.iter(|| { + let _now = Instant::now_coarse(); + }); + } } diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 0682439bb45..6307c51f907 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -133,6 +133,7 @@ pub struct RequestMetrics { pub read_index_propose_wait_nanos: u64, pub read_index_confirm_wait_nanos: u64, pub read_pool_schedule_wait_nanos: u64, + pub local_read: bool, pub block_cache_hit_count: u64, pub block_read_count: u64, pub block_read_byte: u64, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index ce5571f9657..d327041cd8a 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -18424,7 +18424,7 @@ "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 35 }, "heatmap": {}, "hideZeroBuckets": true, @@ -18489,7 +18489,7 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, + "x": 0, "y": 22 }, "heatmap": {}, @@ -18553,7 +18553,7 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, + "x": 12, "y": 35 }, "hiddenSeries": false, @@ -18652,6 +18652,204 @@ "alignLevel": null } }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async snapshot duration without the involving of raftstore", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 31111, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage async snapshot duration (pure local read)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index propose wait duration associated with async snapshot", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 31112, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Read index propose wait duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index confirm duration associated with async snapshot", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 72 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 31113, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Read index confirm duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, @@ -18669,7 +18867,7 @@ "h": 8, "w": 12, "x": 12, - "y": 35 + "y": 22 }, "hiddenSeries": false, "id": 20001, @@ -18765,6 +18963,348 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async snapshot duration without the involving of raftstore", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 48 + }, + "hiddenSeries": false, + "id": 31114, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m]))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Storage async snapshot duration (pure local read)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index propose wait duration associated with async snapshot", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 60 + }, + "hiddenSeries": false, + "id": 31115, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m]))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read index propose wait duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index confirm duration associated with async snapshot", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 72 + }, + "hiddenSeries": false, + "id": 31116, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m]))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read index confirm duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, diff --git a/src/server/metrics.rs b/src/server/metrics.rs index baf8e200cbb..2745be59a71 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -483,6 +483,14 @@ make_auto_flush_static_metric! { pub label_enum RequestTypeKind { write, snapshot, + // exclude those handled by raftstore + snapshot_local_read, + // If async snapshot is involved with read index request(due to lease + // expire or explicitly specified), the async snapshot duration will + // includes the duration before raft leader propsoe it (snapshot_read_index_propose_wait) + // and the time used for checking quorum (snapshot_read_index_confirm). + snapshot_read_index_propose_wait, + snapshot_read_index_confirm, } pub struct AsyncRequestsCounterVec: LocalIntCounter { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index f5f4d77aa9f..2074d469310 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -45,7 +45,7 @@ use raftstore::{ router::{LocalReadRouter, RaftStoreRouter}, store::{ self, util::encode_start_ts_into_flag_data, Callback as StoreCallback, RaftCmdExtraOpts, - ReadIndexContext, ReadResponse, RegionSnapshot, StoreMsg, WriteResponse, + ReadCallback, ReadIndexContext, ReadResponse, RegionSnapshot, StoreMsg, WriteResponse, }, }; use thiserror::Error; @@ -55,6 +55,7 @@ use tikv_util::{ future::{paired_future_callback, paired_must_called_future_callback}, time::Instant, }; +use tracker::GLOBAL_TRACKERS; use txn_types::{Key, TimeStamp, TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::metrics::*; @@ -614,7 +615,7 @@ where .set_key_ranges(mem::take(&mut ctx.key_ranges).into()); } ASYNC_REQUESTS_COUNTER_VEC.snapshot.all.inc(); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let (cb, f) = paired_must_called_future_callback(drop_snapshot_callback); let mut header = new_request_header(ctx.pb_ctx); @@ -638,16 +639,15 @@ where let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); cmd.set_requests(vec![req].into()); + let store_cb = StoreCallback::read(Box::new(move |resp| { + cb(on_read_result(resp).map_err(Error::into)); + })); + let tracker = store_cb.read_tracker().unwrap(); + if res.is_ok() { res = self .router - .read( - ctx.read_id, - cmd, - StoreCallback::read(Box::new(move |resp| { - cb(on_read_result(resp).map_err(Error::into)); - })), - ) + .read(ctx.read_id, cmd, store_cb) .map_err(kv::Error::from); } async move { @@ -674,9 +674,31 @@ where Err(e) } Ok(CmdRes::Snap(s)) => { - ASYNC_REQUESTS_DURATIONS_VEC - .snapshot - .observe(begin_instant.saturating_elapsed_secs()); + let elapse = begin_instant.saturating_elapsed_secs(); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + if tracker.metrics.read_index_propose_wait_nanos > 0 { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_read_index_propose_wait + .observe( + tracker.metrics.read_index_propose_wait_nanos as f64 + / 1_000_000_000.0, + ); + // snapshot may be hanlded by lease read in raftstore + if tracker.metrics.read_index_confirm_wait_nanos > 0 { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_read_index_confirm + .observe( + tracker.metrics.read_index_confirm_wait_nanos as f64 + / 1_000_000_000.0, + ); + } + } else if tracker.metrics.local_read { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_local_read + .observe(elapse); + } + }); + ASYNC_REQUESTS_DURATIONS_VEC.snapshot.observe(elapse); ASYNC_REQUESTS_COUNTER_VEC.snapshot.success.inc(); Ok(s) } From 46379efd4ee037f47456d03655639b8f4a79da1f Mon Sep 17 00:00:00 2001 From: YangKeao Date: Tue, 22 Aug 2023 20:15:33 +0800 Subject: [PATCH 0865/1149] charset: implement utf8mb4_0900_bin collation (#15398) close tikv/tikv#15397 Signed-off-by: Yang Keao Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/codec/collation/collator/mod.rs | 23 +++++++++++++++++++ .../collation/collator/utf8mb4_uca/mod.rs | 2 +- .../src/codec/collation/mod.rs | 1 + .../tidb_query_datatype/src/def/field_type.rs | 3 +++ .../tidb_query_expr/src/impl_compare.rs | 14 +++++++++++ 5 files changed, 42 insertions(+), 1 deletion(-) diff --git a/components/tidb_query_datatype/src/codec/collation/collator/mod.rs b/components/tidb_query_datatype/src/codec/collation/collator/mod.rs index 20a89090535..913d1dced9f 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/mod.rs @@ -44,6 +44,7 @@ mod tests { (Collation::GbkBin, 5), (Collation::GbkChineseCi, 6), (Collation::Utf8Mb40900AiCi, 7), + (Collation::Utf8Mb40900Bin, 8), ]; let cases = vec![ // (sa, sb, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, @@ -60,6 +61,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Equal, ], ), ( @@ -74,6 +76,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Less, + Ordering::Less, ], ), ( @@ -88,6 +91,7 @@ mod tests { Ordering::Greater, Ordering::Equal, Ordering::Less, + Ordering::Greater, ], ), ( @@ -102,6 +106,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -116,6 +121,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -130,6 +136,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Equal, + Ordering::Less, ], ), ( @@ -144,6 +151,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Greater, + Ordering::Less, ], ), ( @@ -158,6 +166,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Equal, + Ordering::Greater, ], ), ( @@ -172,6 +181,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Less, + Ordering::Less, ], ), ( @@ -186,6 +196,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ]; @@ -243,6 +254,7 @@ mod tests { (Collation::GbkBin, 5), (Collation::GbkChineseCi, 6), (Collation::Utf8Mb40900AiCi, 7), + (Collation::Utf8Mb40900Bin, 8), ]; let cases = vec![ // (str, [Utf8Mb4Bin, Utf8Mb4BinNoPadding, Utf8Mb4GeneralCi, Utf8Mb4UnicodeCi, Latin1, @@ -258,6 +270,7 @@ mod tests { vec![0x61], vec![0x41], vec![0x1C, 0x47], + vec![0x61], ], ), ( @@ -271,6 +284,7 @@ mod tests { vec![0x41], vec![0x41], vec![0x1C, 0x47, 0x2, 0x9], + vec![0x41, 0x20], ], ), ( @@ -284,6 +298,7 @@ mod tests { vec![0x41], vec![0x41], vec![0x1C, 0x47], + vec![0x41], ], ), ( @@ -297,6 +312,7 @@ mod tests { vec![0x3F], vec![0x3F], vec![0x15, 0xFE], + vec![0xF0, 0x9F, 0x98, 0x83], ], ), ( @@ -343,6 +359,11 @@ mod tests { 0x1C, 0x47, 0x1F, 0x21, 0x2, 0x9, 0x9, 0x1B, 0x2, 0x9, 0x1E, 0x21, 0x1E, 0xB5, 0x1E, 0xFF, ], + vec![ + 0x46, 0x6F, 0x6F, 0x20, 0xC2, 0xA9, 0x20, 0x62, 0x61, 0x72, 0x20, 0xF0, + 0x9D, 0x8C, 0x86, 0x20, 0x62, 0x61, 0x7A, 0x20, 0xE2, 0x98, 0x83, 0x20, + 0x71, 0x75, 0x78, + ], ], ), ( @@ -362,6 +383,7 @@ mod tests { 0x23, 0x25, 0x23, 0x9C, 0x2, 0x9, 0x23, 0x25, 0x23, 0x9C, 0x23, 0xB, 0x23, 0x9C, 0x23, 0xB1, ], + vec![0xEF, 0xB7, 0xBB], ], ), ( @@ -375,6 +397,7 @@ mod tests { vec![0xD6, 0xD0, 0xCE, 0xC4], vec![0xD3, 0x21, 0xC1, 0xAD], vec![0xFB, 0x40, 0xCE, 0x2D, 0xFB, 0x40, 0xE5, 0x87], + vec![0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87], ], ), ]; diff --git a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs index 007c8ae4b03..b90d28d0e11 100644 --- a/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/collator/utf8mb4_uca/mod.rs @@ -11,7 +11,7 @@ use super::*; /// right spaces). pub type CollatorUtf8Mb4UnicodeCi = CollatorUca; -/// Collator for `utf8mb4_0900_ai-ci` collation without padding +/// Collator for `utf8mb4_0900_ai_ci` collation without padding pub type CollatorUtf8Mb40900AiCi = CollatorUca; pub trait UnicodeVersion: 'static + Send + Sync + Debug { diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index f1fea754e87..22127e62f49 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -33,6 +33,7 @@ macro_rules! match_template_collator { Utf8Mb4GeneralCi => CollatorUtf8Mb4GeneralCi, Utf8Mb4UnicodeCi => CollatorUtf8Mb4UnicodeCi, Utf8Mb40900AiCi => CollatorUtf8Mb40900AiCi, + Utf8Mb40900Bin => CollatorUtf8Mb4BinNoPadding, Latin1Bin => CollatorLatin1Bin, GbkBin => CollatorGbkBin, GbkChineseCi => CollatorGbkChineseCi, diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index bbaa1e42737..06f4454b36d 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -111,6 +111,7 @@ pub enum Collation { Utf8Mb4GeneralCi = -45, Utf8Mb4UnicodeCi = -224, Utf8Mb40900AiCi = -255, + Utf8Mb40900Bin = -309, Latin1Bin = -47, GbkBin = -87, GbkChineseCi = -28, @@ -132,6 +133,7 @@ impl Collation { -87 => Ok(Collation::GbkBin), -28 => Ok(Collation::GbkChineseCi), -255 => Ok(Collation::Utf8Mb40900AiCi), + -309 => Ok(Collation::Utf8Mb40900Bin), n if n >= 0 => Ok(Collation::Utf8Mb4BinNoPadding), n => Err(DataTypeError::UnsupportedCollation { code: n }), } @@ -533,6 +535,7 @@ mod tests { (-83, Some(Collation::Utf8Mb4Bin)), (255, Some(Collation::Utf8Mb4BinNoPadding)), (-255, Some(Collation::Utf8Mb40900AiCi)), + (-309, Some(Collation::Utf8Mb40900Bin)), (i32::MAX, Some(Collation::Utf8Mb4BinNoPadding)), (i32::MIN, None), (-192, Some(Collation::Utf8Mb4UnicodeCi)), diff --git a/components/tidb_query_expr/src/impl_compare.rs b/components/tidb_query_expr/src/impl_compare.rs index 4c2cbb58dfc..e0a6acab2f8 100644 --- a/components/tidb_query_expr/src/impl_compare.rs +++ b/components/tidb_query_expr/src/impl_compare.rs @@ -980,6 +980,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Less, + Ordering::Less, ], ), ( @@ -992,6 +993,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -1004,6 +1006,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Greater, ], ), ( @@ -1016,6 +1019,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Less, + Ordering::Greater, ], ), ( @@ -1028,6 +1032,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Less, + Ordering::Less, ], ), ( @@ -1040,6 +1045,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Greater, ], ), ( @@ -1052,6 +1058,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -1064,6 +1071,7 @@ mod tests { Ordering::Greater, Ordering::Greater, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -1076,6 +1084,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Less, ], ), ( @@ -1088,6 +1097,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Equal, + Ordering::Less, ], ), ( @@ -1100,6 +1110,7 @@ mod tests { Ordering::Less, Ordering::Less, Ordering::Less, + Ordering::Greater, ], ), ( @@ -1112,6 +1123,7 @@ mod tests { Ordering::Equal, Ordering::Equal, Ordering::Greater, + Ordering::Greater, ], ), ( @@ -1124,6 +1136,7 @@ mod tests { Ordering::Less, Ordering::Equal, Ordering::Equal, + Ordering::Greater, ], ), ]; @@ -1134,6 +1147,7 @@ mod tests { (Collation::Utf8Mb4GeneralCi, 3), (Collation::Utf8Mb4UnicodeCi, 4), (Collation::Utf8Mb40900AiCi, 5), + (Collation::Utf8Mb40900Bin, 6), ]; for (str_a, str_b, ordering_in_collations) in cases { From d83a73370baa686823d4736b7a05bc15c2680373 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 22 Aug 2023 23:03:34 -0700 Subject: [PATCH 0866/1149] raftstore: Fix incorrect raftstore applys memory trace (#15372) close tikv/tikv#15371 Fix incorrect raftstore applys memory trace. When performing dropping apply fsm, it should reset the memory trace to zero instead of calling `update_memory_trace`, otherwise it would record extra footage of the interior vector capacity. Signed-off-by: Connor1996 Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/apply.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 873b5facf84..e2b1cedc88d 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -4470,7 +4470,9 @@ where self.delegate.clear_all_commands_as_stale(); } let mut event = TraceEvent::default(); - self.delegate.update_memory_trace(&mut event); + if let Some(e) = self.delegate.trace.reset(ApplyMemoryTrace::default()) { + event = event + e; + } MEMTRACE_APPLYS.trace(event); } } From fa903baee1df26bd4450cea89f89a65d1a93673c Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 23 Aug 2023 17:30:34 +0800 Subject: [PATCH 0867/1149] raftstore: more log information for region meta (#15381) ref tikv/tikv#13311 Adding more log information for diagnosing the issue where inconsistencies between `region` and `region range` information in the meta may occur during region split and self-destroy scenarios. Signed-off-by: cfzjywxk --- components/raftstore/src/store/fsm/peer.rs | 42 +++++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index fc99de93455..da91e26eb09 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -97,7 +97,7 @@ use crate::{ UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, }, util, - util::{KeysInfoFormatter, LeaseState}, + util::{is_region_initialized, KeysInfoFormatter, LeaseState}, worker::{ Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, GcSnapshotTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, @@ -2952,6 +2952,7 @@ where "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), "target_peer" => ?target, + "job.initialized" => job.initialized, ); if self.handle_destroy_peer(job) { // It's not frequent, so use 0 as `heap_size` is ok. @@ -3662,13 +3663,15 @@ where return false; } + let region_id = self.region_id(); + let is_initialized = self.fsm.peer.is_initialized(); info!( "starts destroy"; - "region_id" => self.fsm.region_id(), + "region_id" => region_id, "peer_id" => self.fsm.peer_id(), "merged_by_target" => merged_by_target, + "is_initialized" => is_initialized, ); - let region_id = self.region_id(); // We can't destroy a peer which is handling snapshot. assert!(!self.fsm.peer.is_handling_snapshot()); @@ -3686,6 +3689,25 @@ where } let mut meta = self.ctx.store_meta.lock().unwrap(); + let is_region_initialized_in_meta = meta + .regions + .get(®ion_id) + .map_or(false, |region| is_region_initialized(region)); + if !is_initialized && is_region_initialized_in_meta { + let region_in_meta = meta.regions.get(®ion_id).unwrap(); + error!( + "peer is destroyed inconsistently"; + "region_id" => region_id, + "peer_id" => self.fsm.peer_id(), + "peers" => ?self.region().get_peers(), + "merged_by_target" => merged_by_target, + "is_initialized" => is_initialized, + "is_region_initialized_in_meta" => is_region_initialized_in_meta, + "start_key_in_meta" => log_wrappers::Value::key(region_in_meta.get_start_key()), + "end_key_in_meta" => log_wrappers::Value::key(region_in_meta.get_end_key()), + "peers_in_meta" => ?region_in_meta.get_peers(), + ); + } if meta.atomic_snap_regions.contains_key(&self.region_id()) { drop(meta); @@ -3723,7 +3745,6 @@ where "err" => %e, ); } - let is_initialized = self.fsm.peer.is_initialized(); if let Err(e) = self.fsm.peer.destroy( &self.ctx.engines, &mut self.ctx.raft_perf_context, @@ -4117,11 +4138,7 @@ where } // Insert new regions and validation - info!( - "insert new region"; - "region_id" => new_region_id, - "region" => ?new_region, - ); + let mut is_uninitialized_peer_exist = false; if let Some(r) = meta.regions.get(&new_region_id) { // Suppose a new node is added by conf change and the snapshot comes slowly. // Then, the region splits and the first vote message comes to the new node @@ -4135,8 +4152,15 @@ where new_region_id, r, new_region ); } + is_uninitialized_peer_exist = true; self.ctx.router.close(new_region_id); } + info!( + "insert new region"; + "region_id" => new_region_id, + "region" => ?new_region, + "is_uninitialized_peer_exist" => is_uninitialized_peer_exist, + ); let (sender, mut new_peer) = match PeerFsm::create( self.ctx.store_id(), From 8d212a58fc01fba342d38f3359b9b3d559ed3a3c Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 23 Aug 2023 17:45:34 +0800 Subject: [PATCH 0868/1149] Tablet's flow control improvement (#15325) ref tikv/tikv#15269 Make flow control of compaction pending bytes global in Partitioned-raft-kv as well. The major reason is because compaction pending bytes is a trend signal. If overall the TiKV's pending bytes is increasing, then it means the write speed is too fast and it needs to slow down. Essentially it means TiKV's capability is not enough to handle the traffic. And the side benefit is that it could greatly save the memory because we only need one VecDeq object for compaction pending bytes instead of one per region. The last reason to not make it regional is that the threshold is hard to set because the proper threshold depends on the region size (The idea is that if there're fewer regions, then we have higher tolerance in the extreme case if we only have one region, then it should be same as SingleRocksDB.), and that's the indicator that we should make it global. Signed-off-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Neil Shen --- .../singleton_flow_controller.rs | 144 ++++++++------ .../flow_controller/tablet_flow_controller.rs | 178 +++++++++++++++++- 2 files changed, 258 insertions(+), 64 deletions(-) diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index f15806e7d94..5d52c272db6 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -406,7 +406,7 @@ struct CfFlowChecker { // When the write flow is about 100MB/s, we observed that the compaction ops // is about 2.5, it means there are 750 compaction events in 5 minutes. long_term_pending_bytes: - Smoother, + Option>, pending_bytes_before_unsafe_destroy_range: Option, // On start related markers. Because after restart, the memtable, l0 files @@ -422,6 +422,12 @@ struct CfFlowChecker { impl Default for CfFlowChecker { fn default() -> Self { + CfFlowChecker::new(true) + } +} + +impl CfFlowChecker { + pub fn new(include_pending_bytes: bool) -> Self { Self { last_num_memtables: Smoother::default(), memtable_debt: 0.0, @@ -433,7 +439,11 @@ impl Default for CfFlowChecker { last_l0_bytes: 0, last_l0_bytes_time: Instant::now_coarse(), short_term_l0_consumption_flow: Smoother::default(), - long_term_pending_bytes: Smoother::default(), + long_term_pending_bytes: if include_pending_bytes { + Some(Smoother::default()) + } else { + None + }, pending_bytes_before_unsafe_destroy_range: None, on_start_memtable: true, on_start_l0_files: true, @@ -525,10 +535,11 @@ impl FlowChecker { discard_ratio: Arc, limiter: Arc, ) -> Self { + let include_pending_bytes = region_id == 0; let cf_checkers = engine .cf_names(region_id) .into_iter() - .map(|cf_name| (cf_name, CfFlowChecker::default())) + .map(|cf_name| (cf_name, CfFlowChecker::new(include_pending_bytes))) .collect(); Self { @@ -590,9 +601,13 @@ impl FlowChecker { self.wait_for_destroy_range_finish = true; let soft = (self.soft_pending_compaction_bytes_limit as f64).log2(); for cf_checker in self.cf_checkers.values_mut() { - let v = cf_checker.long_term_pending_bytes.get_avg(); - if v <= soft { - cf_checker.pending_bytes_before_unsafe_destroy_range = Some(v); + if let Some(long_term_pending_bytes) = + cf_checker.long_term_pending_bytes.as_ref() + { + let v = long_term_pending_bytes.get_avg(); + if v <= soft { + cf_checker.pending_bytes_before_unsafe_destroy_range = Some(v); + } } } } @@ -719,74 +734,89 @@ impl FlowChecker { (rate, cf_throttle_flags) } - fn on_pending_compaction_bytes_change(&mut self, cf: String) { + pub fn on_pending_compaction_bytes_change(&mut self, cf: String) -> u64 { + let pending_compaction_bytes = self.engine.pending_compaction_bytes(self.region_id, &cf); + self.on_pending_compaction_bytes_change_cf(pending_compaction_bytes, cf); + pending_compaction_bytes + } + + pub fn on_pending_compaction_bytes_change_cf( + &mut self, + pending_compaction_bytes: u64, + cf: String, + ) { let hard = (self.hard_pending_compaction_bytes_limit as f64).log2(); let soft = (self.soft_pending_compaction_bytes_limit as f64).log2(); - // Because pending compaction bytes changes dramatically, take the // logarithm of pending compaction bytes to make the values fall into // a relative small range - let mut num = (self.engine.pending_compaction_bytes(self.region_id, &cf) as f64).log2(); + let mut num = (pending_compaction_bytes as f64).log2(); if !num.is_finite() { // 0.log2() == -inf, which is not expected and may lead to sum always be NaN num = 0.0; } let checker = self.cf_checkers.get_mut(&cf).unwrap(); - checker.long_term_pending_bytes.observe(num); - SCHED_PENDING_COMPACTION_BYTES_GAUGE - .with_label_values(&[&cf]) - .set((checker.long_term_pending_bytes.get_avg() * RATIO_SCALE_FACTOR as f64) as i64); - // do special check on start, see the comment of the variable definition for - // detail. - if checker.on_start_pending_bytes { - if num < soft || checker.long_term_pending_bytes.trend() == Trend::Increasing { - // the write is accumulating, still need to throttle - checker.on_start_pending_bytes = false; - } else { - // still on start, should not throttle now - return; + // only be called by v1 + if let Some(long_term_pending_bytes) = checker.long_term_pending_bytes.as_mut() { + long_term_pending_bytes.observe(num); + SCHED_PENDING_COMPACTION_BYTES_GAUGE + .with_label_values(&[&cf]) + .set((long_term_pending_bytes.get_avg() * RATIO_SCALE_FACTOR as f64) as i64); + + // do special check on start, see the comment of the variable definition for + // detail. + if checker.on_start_pending_bytes { + if num < soft || long_term_pending_bytes.trend() == Trend::Increasing { + // the write is accumulating, still need to throttle + checker.on_start_pending_bytes = false; + } else { + // still on start, should not throttle now + return; + } } - } - let pending_compaction_bytes = checker.long_term_pending_bytes.get_avg(); - let ignore = if let Some(before) = checker.pending_bytes_before_unsafe_destroy_range { - if pending_compaction_bytes <= before && !self.wait_for_destroy_range_finish { - checker.pending_bytes_before_unsafe_destroy_range = None; - } - true - } else { - false - }; + let pending_compaction_bytes = long_term_pending_bytes.get_avg(); + let ignore = if let Some(before) = checker.pending_bytes_before_unsafe_destroy_range { + if pending_compaction_bytes <= before && !self.wait_for_destroy_range_finish { + checker.pending_bytes_before_unsafe_destroy_range = None; + } + true + } else { + false + }; - for checker in self.cf_checkers.values() { - if num < checker.long_term_pending_bytes.get_recent() { - return; + for checker in self.cf_checkers.values() { + if let Some(long_term_pending_bytes) = checker.long_term_pending_bytes.as_ref() + && num < long_term_pending_bytes.get_recent() + { + return; + } } - } - let mut ratio = if pending_compaction_bytes < soft || ignore { - 0 - } else { - let new_ratio = (pending_compaction_bytes - soft) / (hard - soft); - let old_ratio = self.discard_ratio.load(Ordering::Relaxed); - - // Because pending compaction bytes changes up and down, so using - // EMA(Exponential Moving Average) to smooth it. - (if old_ratio != 0 { - EMA_FACTOR * (old_ratio as f64 / RATIO_SCALE_FACTOR as f64) - + (1.0 - EMA_FACTOR) * new_ratio - } else if new_ratio > 0.01 { - 0.01 + let mut ratio = if pending_compaction_bytes < soft || ignore { + 0 } else { - new_ratio - } * RATIO_SCALE_FACTOR as f64) as u32 - }; - SCHED_DISCARD_RATIO_GAUGE.set(ratio as i64); - if ratio > RATIO_SCALE_FACTOR { - ratio = RATIO_SCALE_FACTOR; + let new_ratio = (pending_compaction_bytes - soft) / (hard - soft); + let old_ratio = self.discard_ratio.load(Ordering::Relaxed); + + // Because pending compaction bytes changes up and down, so using + // EMA(Exponential Moving Average) to smooth it. + (if old_ratio != 0 { + EMA_FACTOR * (old_ratio as f64 / RATIO_SCALE_FACTOR as f64) + + (1.0 - EMA_FACTOR) * new_ratio + } else if new_ratio > 0.01 { + 0.01 + } else { + new_ratio + } * RATIO_SCALE_FACTOR as f64) as u32 + }; + SCHED_DISCARD_RATIO_GAUGE.set(ratio as i64); + if ratio > RATIO_SCALE_FACTOR { + ratio = RATIO_SCALE_FACTOR; + } + self.discard_ratio.store(ratio, Ordering::Relaxed); } - self.discard_ratio.store(ratio, Ordering::Relaxed); } fn on_memtable_change(&mut self, cf: &str) { @@ -1080,7 +1110,7 @@ pub(super) mod tests { } } - fn send_flow_info(tx: &mpsc::SyncSender, region_id: u64) { + pub fn send_flow_info(tx: &mpsc::SyncSender, region_id: u64) { tx.send(FlowInfo::Flush("default".to_string(), 0, region_id)) .unwrap(); tx.send(FlowInfo::Compaction("default".to_string(), region_id)) diff --git a/src/storage/txn/flow_controller/tablet_flow_controller.rs b/src/storage/txn/flow_controller/tablet_flow_controller.rs index f53512b749c..eee28997332 100644 --- a/src/storage/txn/flow_controller/tablet_flow_controller.rs +++ b/src/storage/txn/flow_controller/tablet_flow_controller.rs @@ -13,7 +13,7 @@ use std::{ use collections::{HashMap, HashMapEntry}; use engine_rocks::FlowInfo; -use engine_traits::{CfNamesExt, FlowControlFactorsExt, TabletRegistry}; +use engine_traits::{CfNamesExt, FlowControlFactorsExt, TabletRegistry, DATA_CFS}; use rand::Rng; use tikv_util::{sys::thread::StdThreadBuildWrapper, time::Limiter}; @@ -65,6 +65,7 @@ pub struct TabletFlowController { tx: Option>, handle: Option>, limiters: Limiters, + global_discard_ratio: Arc, } impl Drop for TabletFlowController { @@ -100,6 +101,7 @@ impl TabletFlowController { .unwrap(); let flow_checkers = Arc::new(RwLock::new(HashMap::default())); let limiters: Limiters = Arc::new(RwLock::new(HashMap::default())); + let global_discard_ratio = Arc::new(AtomicU32::new(0)); Self { enabled: Arc::new(AtomicBool::new(config.enable)), tx: Some(tx), @@ -111,7 +113,9 @@ impl TabletFlowController { flow_checkers, limiters, config.clone(), + global_discard_ratio.clone(), )), + global_discard_ratio, } } @@ -131,12 +135,19 @@ impl FlowInfoDispatcher { flow_checkers: Arc>>>>, limiters: Limiters, config: FlowControlConfig, + global_discard_ratio: Arc, ) -> JoinHandle<()> { Builder::new() .name(thd_name!("flow-checker")) .spawn_wrapper(move || { let mut deadline = std::time::Instant::now(); - let mut enabled = true; + let mut enabled = config.enable; + let engine = TabletFlowFactorStore::new(registry.clone()); + let mut pending_compaction_checker = CompactionPendingBytesChecker::new( + config.clone(), + global_discard_ratio, + engine, + ); loop { match rx.try_recv() { Ok(Msg::Close) => break, @@ -157,13 +168,28 @@ impl FlowInfoDispatcher { match msg.clone() { Ok(FlowInfo::L0(_cf, _, region_id)) | Ok(FlowInfo::L0Intra(_cf, _, region_id)) - | Ok(FlowInfo::Flush(_cf, _, region_id)) - | Ok(FlowInfo::Compaction(_cf, region_id)) => { + | Ok(FlowInfo::Flush(_cf, _, region_id)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); if let Some(checker) = checkers.get_mut(®ion_id) { checker.on_flow_info_msg(enabled, msg); } } + Ok(FlowInfo::Compaction(cf, region_id)) => { + if !enabled { + continue; + } + let mut checkers = flow_checkers.as_ref().write().unwrap(); + if let Some(checker) = checkers.get_mut(®ion_id) { + let current_pending_bytes = + checker.on_pending_compaction_bytes_change(cf.clone()); + pending_compaction_checker.report_pending_compaction_bytes( + region_id, + cf.clone(), + current_pending_bytes, + ); + pending_compaction_checker.on_pending_compaction_bytes_change(cf); + } + } Ok(FlowInfo::BeforeUnsafeDestroyRange(region_id)) | Ok(FlowInfo::AfterUnsafeDestroyRange(region_id)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); @@ -173,6 +199,7 @@ impl FlowInfoDispatcher { } Ok(FlowInfo::Created(region_id)) => { let mut checkers = flow_checkers.as_ref().write().unwrap(); + let current_count = checkers.len(); match checkers.entry(region_id) { HashMapEntry::Occupied(e) => { let val = e.into_mut(); @@ -191,6 +218,11 @@ impl FlowInfoDispatcher { ), discard_ratio, )); + info!( + "add FlowChecker"; + "region_id" => region_id, + "current_count" => current_count, + ); e.insert(FlowChecker::new_with_region_id( region_id, &config, @@ -203,8 +235,10 @@ impl FlowInfoDispatcher { } Ok(FlowInfo::Destroyed(region_id)) => { let mut remove_limiter = false; + let current_count: usize; { let mut checkers = flow_checkers.as_ref().write().unwrap(); + current_count = checkers.len(); if let Some(checker) = checkers.get(®ion_id) { // if the previous value is 1, then the updated reference count // will be 0 @@ -216,6 +250,12 @@ impl FlowInfoDispatcher { } if remove_limiter { limiters.as_ref().write().unwrap().remove(®ion_id); + pending_compaction_checker.on_region_destroy(®ion_id); + info!( + "remove FlowChecker"; + "region_id" => region_id, + "current_count" => current_count, + ); } } Err(RecvTimeoutError::Timeout) => { @@ -253,7 +293,10 @@ impl TabletFlowController { pub fn should_drop(&self, region_id: u64) -> bool { let limiters = self.limiters.as_ref().read().unwrap(); if let Some(limiter) = limiters.get(®ion_id) { - let ratio = limiter.1.load(Ordering::Relaxed); + let ratio = std::cmp::max( + limiter.1.load(Ordering::Relaxed), + self.global_discard_ratio.load(Ordering::Relaxed), + ); let mut rng = rand::thread_rng(); return rng.gen_ratio(ratio, RATIO_SCALE_FACTOR); } @@ -264,7 +307,10 @@ impl TabletFlowController { pub fn discard_ratio(&self, region_id: u64) -> f64 { let limiters = self.limiters.as_ref().read().unwrap(); if let Some(limiter) = limiters.get(®ion_id) { - let ratio = limiter.1.load(Ordering::Relaxed); + let ratio = std::cmp::max( + limiter.1.load(Ordering::Relaxed), + self.global_discard_ratio.load(Ordering::Relaxed), + ); return ratio as f64 / RATIO_SCALE_FACTOR as f64; } 0.0 @@ -326,6 +372,74 @@ impl TabletFlowController { } } +struct CompactionPendingBytesChecker { + pending_compaction_bytes: HashMap>, + checker: FlowChecker, +} + +impl CompactionPendingBytesChecker { + pub fn new(config: FlowControlConfig, discard_ratio: Arc, engine: E) -> Self { + CompactionPendingBytesChecker { + pending_compaction_bytes: HashMap::default(), + checker: FlowChecker::new_with_region_id( + 0, // global checker + &config, + engine, + discard_ratio, + Arc::new( + ::builder(f64::INFINITY) + .refill(Duration::from_millis(1)) + .build(), // not used + ), + ), + } + } + + fn total_pending_compaction_bytes(&self, cf: &String) -> u64 { + let mut total = 0; + for pending_compaction_bytes_cf in self.pending_compaction_bytes.values() { + if let Some(v) = pending_compaction_bytes_cf.get(cf) { + total += v; + } + } + total + } + + /// Update region's pending compaction bytes on cf + pub fn report_pending_compaction_bytes( + &mut self, + region_id: u64, + cf: String, + pending_bytes: u64, + ) { + match self.pending_compaction_bytes.entry(region_id) { + HashMapEntry::Occupied(e) => { + let val = e.into_mut(); + val.insert(cf, pending_bytes); + } + HashMapEntry::Vacant(e) => { + let mut pending_bytes_cf = HashMap::default(); + pending_bytes_cf.insert(cf, pending_bytes); + e.insert(pending_bytes_cf); + } + }; + } + + /// called when region is destroy + pub fn on_region_destroy(&mut self, region_id: &u64) { + self.pending_compaction_bytes.remove(region_id); + for cf in DATA_CFS { + self.on_pending_compaction_bytes_change(cf.to_string()); + } + } + + /// called when a specific cf's pending compaction bytes is changed + pub fn on_pending_compaction_bytes_change(&mut self, cf: String) { + self.checker + .on_pending_compaction_bytes_change_cf(self.total_pending_compaction_bytes(&cf), cf); + } +} + #[cfg(test)] mod tests { use engine_rocks::FlowInfo; @@ -456,6 +570,51 @@ mod tests { test_flow_controller_l0_impl(&flow_controller, &stub, &tx, region_id); } + pub fn test_tablet_flow_controller_pending_compaction_bytes_impl( + flow_controller: &FlowController, + stub: &EngineStub, + tx: &mpsc::SyncSender, + region_id: u64, + ) { + // exceeds the threshold + stub.0 + .pending_compaction_bytes + .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(tx, region_id); + // on start check forbids flow control + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); + // once fall below the threshold, pass the on start check + stub.0 + .pending_compaction_bytes + .store(100 * 1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(tx, region_id); + + stub.0 + .pending_compaction_bytes + .store(1000 * 1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(tx, region_id); + assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); + + stub.0 + .pending_compaction_bytes + .store(1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(tx, region_id); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); + + // unfreeze the control + stub.0 + .pending_compaction_bytes + .store(1024 * 1024, Ordering::Relaxed); + send_flow_info(tx, region_id); + assert!(flow_controller.discard_ratio(region_id) < f64::EPSILON); + + stub.0 + .pending_compaction_bytes + .store(1000000000 * 1024 * 1024 * 1024, Ordering::Relaxed); + send_flow_info(tx, region_id); + assert!(flow_controller.discard_ratio(region_id) > f64::EPSILON); + } + #[test] fn test_tablet_flow_controller_pending_compaction_bytes() { let (_dir, flow_controller, tx, reg) = create_tablet_flow_controller(); @@ -468,6 +627,11 @@ mod tests { tx.send(FlowInfo::L0Intra("default".to_string(), 0, region_id)) .unwrap(); - test_flow_controller_pending_compaction_bytes_impl(&flow_controller, &stub, &tx, region_id); + test_tablet_flow_controller_pending_compaction_bytes_impl( + &flow_controller, + &stub, + &tx, + region_id, + ); } } From e0e1cccb8bf6d059d97ea81111f54e3278c4d7a9 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 23 Aug 2023 20:26:05 +0800 Subject: [PATCH 0869/1149] tikv_util: update procinfo to hanlde no data in context switch metrics panel (#15415) close tikv/tikv#15413 update procinfo to hanlde no data in context switch metrics panel Signed-off-by: SpadeA-Tang --- Cargo.lock | 2 +- Cargo.toml | 2 +- components/resource_metering/Cargo.toml | 2 +- components/tikv_util/Cargo.toml | 2 +- tests/Cargo.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0663bfae493..bc8233ed509 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4294,7 +4294,7 @@ dependencies = [ [[package]] name = "procinfo" version = "0.4.2" -source = "git+https://github.com/tikv/procinfo-rs?rev=6599eb9dca74229b2c1fcc44118bef7eff127128#6599eb9dca74229b2c1fcc44118bef7eff127128" +source = "git+https://github.com/tikv/procinfo-rs?rev=7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1#7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" dependencies = [ "byteorder", "libc 0.2.146", diff --git a/Cargo.toml b/Cargo.toml index d4d296214d2..5bc49b17e42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -197,7 +197,7 @@ cmake = { git = "https://github.com/rust-lang/cmake-rs" } sysinfo ={ git = "https://github.com/tikv/sysinfo", branch = "0.26-fix-cpu" } [target.'cfg(target_os = "linux")'.dependencies] -procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } +procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } # When you modify TiKV cooperatively with kvproto, this will be useful to submit the PR to TiKV and the PR to # kvproto at the same time. # After the PR to kvproto is merged, remember to comment this out and run `cargo update -p kvproto`. diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index 068b26483ff..f6776970aa0 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -23,7 +23,7 @@ slog-global = { workspace = true } tikv_util = { workspace = true } [target.'cfg(target_os = "linux")'.dependencies] -procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } +procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } [dev-dependencies] rand = "0.8" diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 35c4940ae70..6de354fa259 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -64,7 +64,7 @@ url = "2" yatp = { workspace = true } [target.'cfg(target_os = "linux")'.dependencies] -procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } +procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } page_size = "0.4" procfs = { version = "0.12", default-features = false } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 79367c00631..158e56abcb1 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -117,7 +117,7 @@ txn_types = { workspace = true } uuid = { version = "0.8.1", features = ["serde", "v4"] } [target.'cfg(target_os = "linux")'.dependencies] -procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "6599eb9dca74229b2c1fcc44118bef7eff127128" } +procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } [dev-dependencies] arrow = "13.0" From 630ddc059a19dfee704d3ba80afa6e6f6e7483ba Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 23 Aug 2023 21:09:34 -0700 Subject: [PATCH 0870/1149] server: add back heap profile HTTP API and make it secure (#15408) close tikv/tikv#11161 Add back heap profile HTTP API and make it secure. The API is removed by #11162 due to a secure issue that can visit arbitrary files on the server. This PR makes it only show the file name instead of the absolute path, and adds a paranoid check to make sure the passed file name is in the set of heap profiles. Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/status_server/mod.rs | 41 +++++++++++++++++++++++------ src/server/status_server/profile.rs | 17 +++++++++--- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 679f21fdf6c..b49fdce12af 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -40,8 +40,9 @@ use openssl::{ }; use pin_project::pin_project; pub use profile::{ - activate_heap_profile, deactivate_heap_profile, jeprof_heap_profile, list_heap_profiles, - read_file, start_one_cpu_profile, start_one_heap_profile, + activate_heap_profile, deactivate_heap_profile, heap_profiles_dir, jeprof_heap_profile, + list_heap_profiles, read_file, start_one_cpu_profile, start_one_heap_profile, + HEAP_PROFILE_REGEX, }; use prometheus::TEXT_FORMAT; use regex::Regex; @@ -207,10 +208,34 @@ where let use_jeprof = query_pairs.get("jeprof").map(|x| x.as_ref()) == Some("true"); let result = if let Some(name) = query_pairs.get("name") { - if use_jeprof { - jeprof_heap_profile(name) + let re = Regex::new(HEAP_PROFILE_REGEX).unwrap(); + if !re.is_match(name) { + let errmsg = format!("heap profile name {} is invalid", name); + return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); + } + let profiles = match list_heap_profiles() { + Ok(s) => s, + Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), + }; + if profiles.iter().any(|(f, _)| f == name) { + let dir = match heap_profiles_dir() { + Some(path) => path, + None => { + return Ok(make_response( + StatusCode::INTERNAL_SERVER_ERROR, + "heap profile is not active", + )); + } + }; + let path = dir.join(name.as_ref()); + if use_jeprof { + jeprof_heap_profile(path.to_str().unwrap()) + } else { + read_file(path.to_str().unwrap()) + } } else { - read_file(name) + let errmsg = format!("heap profile {} not found", name); + return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); } } else { let mut seconds = 10; @@ -649,9 +674,9 @@ where (Method::GET, "/debug/pprof/heap_deactivate") => { Self::deactivate_heap_prof(req) } - // (Method::GET, "/debug/pprof/heap") => { - // Self::dump_heap_prof_to_resp(req).await - // } + (Method::GET, "/debug/pprof/heap") => { + Self::dump_heap_prof_to_resp(req).await + } (Method::GET, "/config") => { Self::get_config(req, &cfg_controller).await } diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index b3d91d3bea6..dd49c394046 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -31,7 +31,8 @@ pub use self::test_utils::TEST_PROFILE_MUTEX; use self::test_utils::{activate_prof, deactivate_prof, dump_prof}; // File name suffix for periodically dumped heap profiles. -const HEAP_PROFILE_SUFFIX: &str = ".heap"; +pub const HEAP_PROFILE_SUFFIX: &str = ".heap"; +pub const HEAP_PROFILE_REGEX: &str = r"^[0-9]{6,6}\.heap$"; lazy_static! { // If it's locked it means there are already a heap or CPU profiling. @@ -244,9 +245,17 @@ pub fn jeprof_heap_profile(path: &str) -> Result, String> { Ok(output.stdout) } +pub fn heap_profiles_dir() -> Option { + PROFILE_ACTIVE + .lock() + .unwrap() + .as_ref() + .map(|(_, dir)| dir.path().to_owned()) +} + pub fn list_heap_profiles() -> Result, String> { - let path = match &*PROFILE_ACTIVE.lock().unwrap() { - Some((_, ref dir)) => dir.path().to_str().unwrap().to_owned(), + let path = match heap_profiles_dir() { + Some(path) => path.into_os_string().into_string().unwrap(), None => return Ok(vec![]), }; @@ -257,7 +266,7 @@ pub fn list_heap_profiles() -> Result, String> { Ok(x) => x, _ => continue, }; - let f = item.path().to_str().unwrap().to_owned(); + let f = item.file_name().to_str().unwrap().to_owned(); if !f.ends_with(HEAP_PROFILE_SUFFIX) { continue; } From 6560d758f9143dc5125b0c5c3b0eaadbfecffa3c Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Thu, 24 Aug 2023 13:59:04 +0800 Subject: [PATCH 0871/1149] raftstore-v2: fix compact range bugs that causes false positive clean tablet (#15332) ref tikv/tikv#12842 - Fix a bug of compact range that causes a dirty tablet being reported as clean. - Added an additional check to ensure trim's correctness. - Fix a bug that some tablets are not destroyed and block peer destroy progress. Signed-off-by: tabokie Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +- components/engine_panic/src/compact.rs | 4 ++ components/engine_rocks/src/compact.rs | 4 ++ components/engine_traits/src/compact.rs | 3 + .../operation/command/admin/compact_log.rs | 42 +++++++++++--- components/raftstore-v2/src/operation/life.rs | 6 +- .../src/operation/ready/snapshot.rs | 2 + components/raftstore-v2/src/worker/tablet.rs | 12 ++++ components/test_raftstore/src/util.rs | 9 ++- tests/failpoints/cases/test_sst_recovery.rs | 4 +- .../raftstore/test_compact_after_delete.rs | 6 +- tests/integrations/raftstore/test_snap.rs | 55 ++++++++++++++++++- 12 files changed, 131 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc8233ed509..abe174e638f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3107,7 +3107,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" +source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3126,7 +3126,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" +source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" dependencies = [ "bzip2-sys", "cc", @@ -5100,7 +5100,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#0c78f4072d766b152e83b25d3068b5c72b5feca1" +source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_panic/src/compact.rs b/components/engine_panic/src/compact.rs index 988bec790de..f64c97ff5b0 100644 --- a/components/engine_panic/src/compact.rs +++ b/components/engine_panic/src/compact.rs @@ -44,6 +44,10 @@ impl CompactExt for PanicEngine { ) -> Result<()> { panic!() } + + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()> { + panic!() + } } pub struct PanicCompactedEvent; diff --git a/components/engine_rocks/src/compact.rs b/components/engine_rocks/src/compact.rs index 199b7d9f3be..f64c9a7d49e 100644 --- a/components/engine_rocks/src/compact.rs +++ b/components/engine_rocks/src/compact.rs @@ -121,6 +121,10 @@ impl CompactExt for RocksEngine { db.compact_files_cf(handle, &opts, &files, output_level) .map_err(r2e) } + + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()> { + self.as_inner().check_in_range(start, end).map_err(r2e) + } } #[cfg(test)] diff --git a/components/engine_traits/src/compact.rs b/components/engine_traits/src/compact.rs index 05590a1ff32..2a4341a6788 100644 --- a/components/engine_traits/src/compact.rs +++ b/components/engine_traits/src/compact.rs @@ -71,6 +71,9 @@ pub trait CompactExt: CfNamesExt { max_subcompactions: u32, exclude_l0: bool, ) -> Result<()>; + + // Check all data is in the range [start, end). + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()>; } pub trait CompactedEvent: Send { diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 8920ea97e1d..93876475f5f 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -13,7 +13,13 @@ //! Updates truncated index, and compacts logs if the corresponding changes have //! been persisted in kvdb. -use std::path::PathBuf; +use std::{ + path::PathBuf, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse, RaftCmdRequest}; @@ -50,6 +56,10 @@ pub struct CompactLogContext { /// persisted. When persisted_apply is advanced, we need to notify tablet /// worker to destroy them. tombstone_tablets_wait_index: Vec, + /// Sometimes a tombstone tablet can be registered after tablet index is + /// advanced. We should not consider it as an active tablet otherwise it + /// might block peer destroy progress. + persisted_tablet_index: Arc, } impl CompactLogContext { @@ -60,6 +70,7 @@ impl CompactLogContext { last_applying_index, last_compacted_idx: 0, tombstone_tablets_wait_index: vec![], + persisted_tablet_index: AtomicU64::new(0).into(), } } @@ -379,7 +390,9 @@ impl Peer { )); } - /// Returns if there's any tombstone being removed. + /// Returns if there's any tombstone being removed. `persisted` state may + /// not be persisted yet, caller is responsible for actually destroying the + /// physical tablets afterwards. #[inline] pub fn remove_tombstone_tablets(&mut self, persisted: u64) -> bool { let compact_log_context = self.compact_log_context_mut(); @@ -398,11 +411,21 @@ impl Peer { } } + /// User can only increase this counter. + #[inline] + pub fn remember_persisted_tablet_index(&self) -> Arc { + self.compact_log_context().persisted_tablet_index.clone() + } + + /// Returns whether there's any tombstone tablet newer than persisted tablet + /// index. They might still be referenced by inflight apply and cannot be + /// destroyed. pub fn has_pending_tombstone_tablets(&self) -> bool { - !self - .compact_log_context() - .tombstone_tablets_wait_index - .is_empty() + let ctx = self.compact_log_context(); + let persisted = ctx.persisted_tablet_index.load(Ordering::Relaxed); + ctx.tombstone_tablets_wait_index + .iter() + .any(|i| *i > persisted) } #[inline] @@ -411,6 +434,8 @@ impl Peer { ctx: &StoreContext, task: &mut WriteTask, ) { + let applied_index = self.entry_storage().applied_index(); + self.remove_tombstone_tablets(applied_index); assert!( !self.has_pending_tombstone_tablets(), "{} all tombstone should be cleared before being destroyed.", @@ -421,7 +446,6 @@ impl Peer { None => return, }; let region_id = self.region_id(); - let applied_index = self.entry_storage().applied_index(); let sched = ctx.schedulers.tablet.clone(); let _ = sched.schedule(tablet::Task::prepare_destroy( tablet, @@ -557,13 +581,17 @@ impl Peer { } if self.remove_tombstone_tablets(new_persisted) { let sched = store_ctx.schedulers.tablet.clone(); + let counter = self.remember_persisted_tablet_index(); if !task.has_snapshot { task.persisted_cbs.push(Box::new(move || { let _ = sched.schedule(tablet::Task::destroy(region_id, new_persisted)); + // Writer guarantees no race between different callbacks. + counter.store(new_persisted, Ordering::Relaxed); })); } else { // In snapshot, the index is persisted, tablet can be destroyed directly. let _ = sched.schedule(tablet::Task::destroy(region_id, new_persisted)); + counter.store(new_persisted, Ordering::Relaxed); } } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 52f00d137f8..e0e7f63785d 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -795,9 +795,13 @@ impl Peer { } // Wait for critical commands like split. if self.has_pending_tombstone_tablets() { + let applied_index = self.entry_storage().applied_index(); + let last_index = self.entry_storage().last_index(); info!( self.logger, - "postpone destroy because there're pending tombstone tablets" + "postpone destroy because there're pending tombstone tablets"; + "applied_index" => applied_index, + "last_index" => last_index, ); return true; } diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 17deed333c1..9e0ed449cef 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -343,10 +343,12 @@ impl Peer { } self.schedule_apply_fsm(ctx); if self.remove_tombstone_tablets(snapshot_index) { + let counter = self.remember_persisted_tablet_index(); let _ = ctx .schedulers .tablet .schedule(tablet::Task::destroy(region_id, snapshot_index)); + counter.store(snapshot_index, Ordering::Relaxed); } if let Some(msg) = self.split_pending_append_mut().take_append_message() { let _ = ctx.router.send_raft_message(msg); diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 183bb33cd34..7c330353836 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -298,6 +298,8 @@ impl Runner { .spawn(async move { let range1 = Range::new(&[], &start_key); let range2 = Range::new(&end_key, keys::DATA_MAX_KEY); + // Note: Refer to https://github.com/facebook/rocksdb/pull/11468. There's could be + // some files missing from compaction if dynamic_level_bytes is off. for r in [range1, range2] { // When compaction filter is present, trivial move is disallowed. if let Err(e) = @@ -323,6 +325,16 @@ impl Runner { return; } } + if let Err(e) = tablet.check_in_range(Some(&start_key), Some(&end_key)) { + debug_assert!(false, "check_in_range failed {:?}, is titan enabled?", e); + error!( + logger, + "trim did not remove all dirty data"; + "path" => tablet.path(), + "err" => %e, + ); + return; + } // drop before callback. drop(tablet); fail_point!("tablet_trimmed_finished"); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 02a74136bb6..f63c69f9631 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -81,15 +81,14 @@ pub fn must_get( } debug!("last try to get {}", log_wrappers::hex_encode_upper(key)); let res = engine.get_value_cf(cf, &keys::data_key(key)).unwrap(); - if value.is_none() && res.is_none() - || value.is_some() && res.is_some() && value.unwrap() == &*res.unwrap() - { + if value == res.as_ref().map(|r| r.as_ref()) { return; } panic!( - "can't get value {:?} for key {}", + "can't get value {:?} for key {}, actual={:?}", value.map(escape), - log_wrappers::hex_encode_upper(key) + log_wrappers::hex_encode_upper(key), + res ) } diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index a4c1f10b5ae..da5a3da1a32 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -105,7 +105,7 @@ fn test_sst_recovery_overlap_range_sst_exist() { must_get_equal(&engine1, b"7", b"val_1"); // Validate the damaged sst has been deleted. - compact_files_to_target_level(&engine1, true, 3).unwrap(); + compact_files_to_target_level(&engine1, true, 6).unwrap(); let files = engine1.as_inner().get_live_files(); assert_eq!(files.get_files_count(), 1); @@ -252,7 +252,7 @@ fn create_tikv_cluster_with_one_node_damaged() disturb_sst_file(&sst_path); // The sst file is damaged, so this action will fail. - assert_corruption(compact_files_to_target_level(&engine1, true, 3)); + assert_corruption(compact_files_to_target_level(&engine1, true, 6)); (cluster, pd_client, engine1) } diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 6ba405bb918..a79fdfd4425 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -98,7 +98,8 @@ fn test_node_compact_after_delete_v2() { // disable it cluster.cfg.raft_store.region_compact_min_redundant_rows = 10000000; cluster.cfg.raft_store.region_compact_check_step = Some(2); - cluster.cfg.rocksdb.titan.enabled = true; + // TODO: v2 doesn't support titan. + // cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); let region = cluster.get_region(b""); @@ -169,7 +170,8 @@ fn test_node_compact_after_update_v2() { cluster.cfg.raft_store.region_compact_redundant_rows_percent = 40; cluster.cfg.raft_store.region_compact_min_redundant_rows = 50; cluster.cfg.raft_store.region_compact_check_step = Some(2); - cluster.cfg.rocksdb.titan.enabled = true; + // TODO: titan is not supported in v2. + // cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); let region = cluster.get_region(b""); diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 9eda281e9e4..0b71978f63b 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -227,8 +227,6 @@ fn test_server_snap_gc() { #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] -#[test_case(test_raftstore_v2::new_node_cluster)] -#[test_case(test_raftstore_v2::new_server_cluster)] fn test_concurrent_snap() { let mut cluster = new_cluster(0, 3); // Test that the handling of snapshot is correct when there are multiple @@ -279,6 +277,59 @@ fn test_concurrent_snap() { must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); } +#[test_case(test_raftstore_v2::new_node_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_concurrent_snap_v2() { + let mut cluster = new_cluster(0, 3); + // TODO: v2 doesn't support titan. + // Test that the handling of snapshot is correct when there are multiple + // snapshots which have overlapped region ranges arrive at the same + // raftstore. + // cluster.cfg.rocksdb.titan.enabled = true; + // Disable raft log gc in this test case. + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); + // For raftstore v2, after split, follower delays first messages (see + // is_first_message() for details), so leader does not send snapshot to + // follower and CollectSnapshotFilter holds parent region snapshot forever. + // We need to set a short wait duration so that leader can send snapshot + // in time and thus CollectSnapshotFilter can send parent region snapshot. + cluster.cfg.raft_store.snap_wait_split_duration = ReadableDuration::millis(100); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer count check. + pd_client.disable_default_operator(); + + let r1 = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + pd_client.must_add_peer(r1, new_peer(2, 2)); + // Force peer 2 to be followers all the way. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(r1, 2) + .msg_type(MessageType::MsgRequestVote) + .direction(Direction::Send), + )); + cluster.must_transfer_leader(r1, new_peer(1, 1)); + cluster.must_put(b"k3", b"v3"); + // Pile up snapshots of overlapped region ranges and deliver them all at once. + let (tx, rx) = mpsc::channel(); + cluster.add_recv_filter_on_node(3, Box::new(CollectSnapshotFilter::new(tx))); + pd_client.must_add_peer(r1, new_peer(3, 3)); + let region = cluster.get_region(b"k1"); + // Ensure the snapshot of range ("", "") is sent and piled in filter. + if let Err(e) = rx.recv_timeout(Duration::from_secs(1)) { + panic!("the snapshot is not sent before split, e: {:?}", e); + } + // Split the region range and then there should be another snapshot for the + // split ranges. + cluster.must_split(®ion, b"k2"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Ensure the regions work after split. + cluster.must_put(b"k11", b"v11"); + must_get_equal(&cluster.get_engine(3), b"k11", b"v11"); + cluster.must_put(b"k4", b"v4"); + must_get_equal(&cluster.get_engine(3), b"k4", b"v4"); +} + #[test_case(test_raftstore::new_node_cluster)] #[test_case(test_raftstore::new_server_cluster)] #[test_case(test_raftstore_v2::new_node_cluster)] From 3ae1fb4320737c71a1c9d3f8ee6a3b7a9af6f6ea Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 24 Aug 2023 16:36:35 +0800 Subject: [PATCH 0872/1149] scheduler: not panic in the case of unexepected dropped channel when shutting dowm (#15426) ref tikv/tikv#15202 not panic in the case of unexepected dropped channel when shutting dowm Signed-off-by: SpadeA-Tang --- src/storage/txn/scheduler.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 4df7033c21a..3c6a66c3941 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1665,10 +1665,15 @@ impl TxnScheduler { // it may break correctness. // However, not release latch will cause deadlock which may ultimately block all // following txns, so we panic here. - panic!( - "response channel is unexpectedly dropped, tag {:?}, cid {}", - tag, cid - ); + // + // todo(spadea): Now, we only panic if it's not shutting down, although even in + // close, this behavior is not acceptable. + if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { + panic!( + "response channel is unexpectedly dropped, tag {:?}, cid {}", + tag, cid + ); + } } /// Returns whether it succeeds to write pessimistic locks to the in-memory From 8a44a2c4c11b3da9d776d2877f631922d3833933 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 24 Aug 2023 19:50:06 +0800 Subject: [PATCH 0873/1149] raftstore: disable duplicated mvcc key compaction check by default (#15427) close tikv/tikv#15282 disable duplicated mvcc key check compaction by default Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/operation/misc.rs | 2 +- components/raftstore/src/store/config.rs | 27 ++++++++++++++++--- components/raftstore/src/store/fsm/store.rs | 2 +- etc/config-template.toml | 9 +++++++ src/config/mod.rs | 3 +++ tests/integrations/config/mod.rs | 2 +- .../raftstore/test_compact_after_delete.rs | 4 ++- 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/components/raftstore-v2/src/operation/misc.rs b/components/raftstore-v2/src/operation/misc.rs index 867b4192dac..fafca29ea85 100644 --- a/components/raftstore-v2/src/operation/misc.rs +++ b/components/raftstore-v2/src/operation/misc.rs @@ -102,7 +102,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T> StoreFsmDelegate<'a, EK, ER, T> { self.store_ctx.cfg.region_compact_min_tombstones, self.store_ctx.cfg.region_compact_tombstones_percent, self.store_ctx.cfg.region_compact_min_redundant_rows, - self.store_ctx.cfg.region_compact_redundant_rows_percent, + self.store_ctx.cfg.region_compact_redundant_rows_percent(), ), })) { diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 817be7eb969..257480b4c25 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -140,7 +140,7 @@ pub struct Config { pub region_compact_min_redundant_rows: u64, /// Minimum percentage of redundant rows to trigger manual compaction. /// Should between 1 and 100. - pub region_compact_redundant_rows_percent: u64, + pub region_compact_redundant_rows_percent: Option, pub pd_heartbeat_tick_interval: ReadableDuration, pub pd_store_heartbeat_tick_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, @@ -429,7 +429,7 @@ impl Default for Config { region_compact_min_tombstones: 10000, region_compact_tombstones_percent: 30, region_compact_min_redundant_rows: 50000, - region_compact_redundant_rows_percent: 20, + region_compact_redundant_rows_percent: None, pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), notify_capacity: 40960, @@ -581,6 +581,10 @@ impl Config { self.region_compact_check_step.unwrap() } + pub fn region_compact_redundant_rows_percent(&self) -> u64 { + self.region_compact_redundant_rows_percent.unwrap() + } + #[inline] pub fn warmup_entry_cache_enabled(&self) -> bool { self.max_entry_cache_warmup_duration.0 != Duration::from_secs(0) @@ -604,8 +608,11 @@ impl Config { if self.region_compact_check_step.is_none() { if raft_kv_v2 { self.region_compact_check_step = Some(5); + self.region_compact_redundant_rows_percent = Some(20); } else { self.region_compact_check_step = Some(100); + // Disable redundant rows check in default for v1. + self.region_compact_redundant_rows_percent = Some(100); } } @@ -766,6 +773,15 @@ impl Config { )); } + let region_compact_redundant_rows_percent = + self.region_compact_redundant_rows_percent.unwrap(); + if !(1..=100).contains(®ion_compact_redundant_rows_percent) { + return Err(box_err!( + "region-compact-redundant-rows-percent must between 1 and 100, current value is {}", + region_compact_redundant_rows_percent + )); + } + if self.local_read_batch_size == 0 { return Err(box_err!("local-read-batch-size must be greater than 0")); } @@ -992,8 +1008,11 @@ impl Config { .with_label_values(&["region_compact_min_redundant_rows"]) .set(self.region_compact_min_redundant_rows as f64); CONFIG_RAFTSTORE_GAUGE - .with_label_values(&["region_compact_tombstones_percent"]) - .set(self.region_compact_tombstones_percent as f64); + .with_label_values(&["region_compact_redundant_rows_percent"]) + .set( + self.region_compact_redundant_rows_percent + .unwrap_or_default() as f64, + ); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["pd_heartbeat_tick_interval"]) .set(self.pd_heartbeat_tick_interval.as_secs_f64()); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index c21ea65a589..df11ba51fc8 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -2525,7 +2525,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.ctx.cfg.region_compact_min_tombstones, self.ctx.cfg.region_compact_tombstones_percent, self.ctx.cfg.region_compact_min_redundant_rows, - self.ctx.cfg.region_compact_redundant_rows_percent, + self.ctx.cfg.region_compact_redundant_rows_percent(), ), }, )) { diff --git a/etc/config-template.toml b/etc/config-template.toml index 36d8d25d883..3c8a6015910 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -437,6 +437,15 @@ ## exceeds `region-compact-tombstones-percent`. # region-compact-tombstones-percent = 30 +## The minimum number of duplicated MVCC keys to trigger manual compaction. +# region-compact-min-redundant-rows = 50000 + +## The minimum percentage of duplicated MVCC keys to trigger manual compaction. +## It should be set between 1 and 100. Manual compaction is only triggered when the number of +## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys +## exceeds `region-compact-redundant-rows-percent`. +# region-compact-redundant-rows-percent = 100 + ## Interval to check whether to start a manual compaction for Lock Column Family. ## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will ## trigger a manual compaction for Lock Column Family. diff --git a/src/config/mod.rs b/src/config/mod.rs index 5c7f1424c38..ecb31c8aec6 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5966,6 +5966,9 @@ mod tests { default_cfg .server .optimize_for(default_cfg.coprocessor.region_split_size()); + default_cfg + .raft_store + .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); default_cfg.security.redact_info_log = Some(false); default_cfg.coprocessor.region_max_size = Some(default_cfg.coprocessor.region_max_size()); default_cfg.coprocessor.region_max_keys = Some(default_cfg.coprocessor.region_max_keys()); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index a65d4cfb46c..8fdbaa00f25 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -200,7 +200,7 @@ fn test_serde_custom_tikv_config() { region_compact_min_tombstones: 999, region_compact_tombstones_percent: 33, region_compact_min_redundant_rows: 999, - region_compact_redundant_rows_percent: 33, + region_compact_redundant_rows_percent: Some(33), pd_heartbeat_tick_interval: ReadableDuration::minutes(12), pd_store_heartbeat_tick_interval: ReadableDuration::secs(12), notify_capacity: 12_345, diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index a79fdfd4425..24034c83192 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -36,6 +36,7 @@ fn test_compact_after_delete(cluster: &mut Cluster) { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); cluster.cfg.raft_store.region_compact_min_tombstones = 500; cluster.cfg.raft_store.region_compact_tombstones_percent = 50; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = Some(1); cluster.cfg.raft_store.region_compact_check_step = Some(1); cluster.cfg.rocksdb.titan.enabled = true; cluster.run(); @@ -97,6 +98,7 @@ fn test_node_compact_after_delete_v2() { cluster.cfg.raft_store.region_compact_tombstones_percent = 50; // disable it cluster.cfg.raft_store.region_compact_min_redundant_rows = 10000000; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = Some(100); cluster.cfg.raft_store.region_compact_check_step = Some(2); // TODO: v2 doesn't support titan. // cluster.cfg.rocksdb.titan.enabled = true; @@ -167,7 +169,7 @@ fn test_node_compact_after_update_v2() { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); // disable it cluster.cfg.raft_store.region_compact_min_tombstones = 1000000; - cluster.cfg.raft_store.region_compact_redundant_rows_percent = 40; + cluster.cfg.raft_store.region_compact_redundant_rows_percent = Some(40); cluster.cfg.raft_store.region_compact_min_redundant_rows = 50; cluster.cfg.raft_store.region_compact_check_step = Some(2); // TODO: titan is not supported in v2. From 25959655f33ac27985962887d25a0da593fd62c8 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 24 Aug 2023 22:48:35 +0800 Subject: [PATCH 0874/1149] server: fix memory trace's leak metrics (#15353) close tikv/tikv#15357 Correct the raft_router/apply_router's alive and leak metrics. Signed-off-by: tonyxuqqi --- components/server/src/memory.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/components/server/src/memory.rs b/components/server/src/memory.rs index 303ff257a78..fadf18f7534 100644 --- a/components/server/src/memory.rs +++ b/components/server/src/memory.rs @@ -19,9 +19,24 @@ impl MemoryTraceManager { for id in ids { let sub_trace = provider.sub_trace(id); let sub_trace_name = sub_trace.name(); - MEM_TRACE_SUM_GAUGE - .with_label_values(&[&format!("{}-{}", provider_name, sub_trace_name)]) - .set(sub_trace.sum() as i64) + let leaf_ids = sub_trace.get_children_ids(); + if leaf_ids.is_empty() { + MEM_TRACE_SUM_GAUGE + .with_label_values(&[&format!("{}-{}", provider_name, sub_trace_name)]) + .set(sub_trace.sum() as i64); + } else { + for leaf_id in leaf_ids { + let leaf = sub_trace.sub_trace(leaf_id); + MEM_TRACE_SUM_GAUGE + .with_label_values(&[&format!( + "{}-{}-{}", + provider_name, + sub_trace_name, + leaf.name(), + )]) + .set(leaf.sum() as i64); + } + } } MEM_TRACE_SUM_GAUGE From bea230d98c61de9847121a0f0bb9c4588b20e4de Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 25 Aug 2023 12:35:07 +0800 Subject: [PATCH 0875/1149] raftstore: fix unwrap panic of region_compact_redundant_rows_percent (#15440) close tikv/tikv#15438 fix unwrap panic of region_compact_redundant_rows_percent Signed-off-by: SpadeA-Tang --- components/raftstore/src/store/config.rs | 8 ++- src/config/mod.rs | 63 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 257480b4c25..f96ed2b7a45 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -608,9 +608,15 @@ impl Config { if self.region_compact_check_step.is_none() { if raft_kv_v2 { self.region_compact_check_step = Some(5); - self.region_compact_redundant_rows_percent = Some(20); } else { self.region_compact_check_step = Some(100); + } + } + + if self.region_compact_redundant_rows_percent.is_none() { + if raft_kv_v2 { + self.region_compact_redundant_rows_percent = Some(20); + } else { // Disable redundant rows check in default for v1. self.region_compact_redundant_rows_percent = Some(100); } diff --git a/src/config/mod.rs b/src/config/mod.rs index ecb31c8aec6..f7c338379ef 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -6428,4 +6428,67 @@ mod tests { Some(ReadableSize::gb(1)) ); } + + #[test] + fn test_compact_check_default() { + let content = r#" + [raftstore] + region-compact-check-step = 50 + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 50); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 100 + ); + + let content = r#" + [raftstore] + region-compact-check-step = 50 + [storage] + engine = "partitioned-raft-kv" + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 50); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 20 + ); + + let content = r#" + [raftstore] + region-compact-redundant-rows-percent = 50 + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 100); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 50 + ); + + let content = r#" + [raftstore] + region-compact-redundant-rows-percent = 50 + [storage] + engine = "partitioned-raft-kv" + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert_eq!(cfg.raft_store.region_compact_check_step.unwrap(), 5); + assert_eq!( + cfg.raft_store + .region_compact_redundant_rows_percent + .unwrap(), + 50 + ); + } } From 40440210d81ea1770d5921475a51350f0bee50cd Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 25 Aug 2023 00:14:05 -0700 Subject: [PATCH 0876/1149] batch-system: use concurrent hashmap to avoid router cache (#15431) close tikv/tikv#15430 Use concurrent hashmap to avoid router cache occupying too much memory Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 5 +- components/batch-system/Cargo.toml | 1 + components/batch-system/src/router.rs | 171 +++++------------- components/batch-system/tests/cases/router.rs | 20 +- components/raftstore/src/store/fsm/store.rs | 6 - components/tikv_util/src/mpsc/mod.rs | 25 ++- 6 files changed, 65 insertions(+), 163 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abe174e638f..3c44a639e38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -652,6 +652,7 @@ dependencies = [ "collections", "criterion", "crossbeam", + "dashmap", "derive_more", "fail", "file_system", @@ -1449,9 +1450,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.1.0" +version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0834a35a3fce649144119e18da2a4d8ed12ef3862f47183fd46f625d072d96c" +checksum = "4c8858831f7781322e539ea39e72449c46b059638250c14344fec8d0aa6e539c" dependencies = [ "cfg-if 1.0.0", "num_cpus", diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index ac69d544a21..bd1ae6c56b4 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -10,6 +10,7 @@ test-runner = ["derive_more"] [dependencies] collections = { workspace = true } crossbeam = "0.8" +dashmap = "5.2" derive_more = { version = "0.99", optional = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/batch-system/src/router.rs b/components/batch-system/src/router.rs index 119b7875506..4f886fe3b3d 100644 --- a/components/batch-system/src/router.rs +++ b/components/batch-system/src/router.rs @@ -1,21 +1,17 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::{ - cell::Cell, - mem, - sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, Mutex, - }, +use std::sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, }; -use collections::HashMap; use crossbeam::channel::{SendError, TrySendError}; -use tikv_util::{debug, info, lru::LruCache, time::Instant, Either}; +use dashmap::DashMap; +use tikv_util::{debug, info, time::Instant, Either}; use crate::{ - fsm::{Fsm, FsmScheduler, FsmState}, + fsm::{Fsm, FsmScheduler}, mailbox::{BasicMailbox, Mailbox}, metrics::*, }; @@ -27,18 +23,14 @@ pub struct RouterTrace { pub leak: usize, } -struct NormalMailMap { - map: HashMap>, - // Count of Mailboxes that is stored in `map`. - alive_cnt: Arc, -} - enum CheckDoResult { NotExist, Invalid, Valid(T), } +const ROUTER_SHRINK_SIZE: usize = 1000; + /// Router routes messages to its target FSM's mailbox. /// /// In our abstract model, every batch system has two different kind of @@ -54,8 +46,7 @@ enum CheckDoResult { /// Normal FSM and control FSM can have different scheduler, but this is not /// required. pub struct Router { - normals: Arc>>, - caches: Cell>>, + normals: Arc>>, pub(super) control_box: BasicMailbox, // TODO: These two schedulers should be unified as single one. However // it's not possible to write FsmScheduler + FsmScheduler @@ -85,11 +76,7 @@ where state_cnt: Arc, ) -> Router { Router { - normals: Arc::new(Mutex::new(NormalMailMap { - map: HashMap::default(), - alive_cnt: Arc::default(), - })), - caches: Cell::new(LruCache::with_capacity_and_sample(1024, 7)), + normals: Arc::new(DashMap::default()), control_box, normal_scheduler, control_scheduler, @@ -106,72 +93,32 @@ where /// A helper function that tries to unify a common access pattern to /// mailbox. /// - /// Generally, when sending a message to a mailbox, cache should be - /// check first, if not found, lock should be acquired. - /// /// Returns None means there is no mailbox inside the normal registry. /// Some(None) means there is expected mailbox inside the normal registry /// but it returns None after apply the given function. Some(Some) means - /// the given function returns Some and cache is updated if it's invalid. + /// the given function returns Some. #[inline] fn check_do(&self, addr: u64, mut f: F) -> CheckDoResult where F: FnMut(&BasicMailbox) -> Option, { - let caches = unsafe { &mut *self.caches.as_ptr() }; - let mut connected = true; - if let Some(mailbox) = caches.get(&addr) { - match f(mailbox) { - Some(r) => return CheckDoResult::Valid(r), - None => { - connected = false; - } - } - } - - let (cnt, mailbox) = { - let mut boxes = self.normals.lock().unwrap(); - let cnt = boxes.map.len(); - let b = match boxes.map.get_mut(&addr) { - Some(mailbox) => mailbox.clone(), - None => { - drop(boxes); - if !connected { - caches.remove(&addr); - } - return CheckDoResult::NotExist; - } - }; - (cnt, b) - }; - if cnt > caches.capacity() || cnt < caches.capacity() / 2 { - caches.resize(cnt); - } - - let res = f(&mailbox); - match res { - Some(r) => { - caches.insert(addr, mailbox); - CheckDoResult::Valid(r) - } + let mailbox = match self.normals.get_mut(&addr) { + Some(mailbox) => mailbox, None => { - if !connected { - caches.remove(&addr); - } - CheckDoResult::Invalid + return CheckDoResult::NotExist; } + }; + match f(&mailbox) { + Some(r) => CheckDoResult::Valid(r), + None => CheckDoResult::Invalid, } } /// Register a mailbox with given address. pub fn register(&self, addr: u64, mailbox: BasicMailbox) { - let mut normals = self.normals.lock().unwrap(); - if let Some(mailbox) = normals.map.insert(addr, mailbox) { + if let Some(mailbox) = self.normals.insert(addr, mailbox) { mailbox.close(); } - normals - .alive_cnt - .store(normals.map.len(), Ordering::Relaxed); } /// Same as send a message and then register the mailbox. @@ -183,32 +130,22 @@ where mailbox: BasicMailbox, msg: N::Message, ) -> Result<(), (BasicMailbox, N::Message)> { - let mut normals = self.normals.lock().unwrap(); - // Send has to be done within lock, otherwise the message may be handled - // before the mailbox is register. + if let Some(mailbox) = self.normals.insert(addr, mailbox.clone()) { + mailbox.close(); + } if let Err(SendError(m)) = mailbox.force_send(msg, &self.normal_scheduler) { + self.normals.remove(&addr); return Err((mailbox, m)); } - if let Some(mailbox) = normals.map.insert(addr, mailbox) { - mailbox.close(); - } - normals - .alive_cnt - .store(normals.map.len(), Ordering::Relaxed); Ok(()) } pub fn register_all(&self, mailboxes: Vec<(u64, BasicMailbox)>) { - let mut normals = self.normals.lock().unwrap(); - normals.map.reserve(mailboxes.len()); for (addr, mailbox) in mailboxes { - if let Some(m) = normals.map.insert(addr, mailbox) { + if let Some(m) = self.normals.insert(addr, mailbox) { m.close(); } } - normals - .alive_cnt - .store(normals.map.len(), Ordering::Relaxed); } /// Get the mailbox of specified address. @@ -280,13 +217,11 @@ where pub fn force_send(&self, addr: u64, msg: N::Message) -> Result<(), SendError> { match self.send(addr, msg) { Ok(()) => Ok(()), - Err(TrySendError::Full(m)) => { - let caches = unsafe { &mut *self.caches.as_ptr() }; - caches - .get(&addr) - .unwrap() - .force_send(m, &self.normal_scheduler) - } + Err(TrySendError::Full(m)) => self + .normals + .get(&addr) + .unwrap() + .force_send(m, &self.normal_scheduler), Err(TrySendError::Disconnected(m)) => { if self.is_shutdown() { Ok(()) @@ -321,10 +256,9 @@ where /// Try to notify all normal FSMs a message. pub fn broadcast_normal(&self, mut msg_gen: impl FnMut() -> N::Message) { let timer = Instant::now_coarse(); - let mailboxes = self.normals.lock().unwrap(); - for mailbox in mailboxes.map.values() { + self.normals.iter().for_each(|mailbox| { let _ = mailbox.force_send(msg_gen(), &self.normal_scheduler); - } + }); BROADCAST_NORMAL_DURATION.observe(timer.saturating_elapsed_secs()); } @@ -332,12 +266,13 @@ where pub fn broadcast_shutdown(&self) { info!("broadcasting shutdown"); self.shutdown.store(true, Ordering::SeqCst); - unsafe { &mut *self.caches.as_ptr() }.clear(); - let mut mailboxes = self.normals.lock().unwrap(); - for (addr, mailbox) in mailboxes.map.drain() { + for e in self.normals.iter() { + let addr = e.key(); + let mailbox = e.value(); debug!("[region {}] shutdown mailbox", addr); mailbox.close(); } + self.normals.clear(); self.control_box.close(); self.normal_scheduler.shutdown(); self.control_scheduler.shutdown(); @@ -346,51 +281,32 @@ where /// Close the mailbox of address. pub fn close(&self, addr: u64) { info!("shutdown mailbox"; "region_id" => addr); - unsafe { &mut *self.caches.as_ptr() }.remove(&addr); - let mut mailboxes = self.normals.lock().unwrap(); - if let Some(mb) = mailboxes.map.remove(&addr) { + if let Some((_, mb)) = self.normals.remove(&addr) { mb.close(); } - mailboxes - .alive_cnt - .store(mailboxes.map.len(), Ordering::Relaxed); - } - - pub fn clear_cache(&self) { - unsafe { &mut *self.caches.as_ptr() }.clear(); + if self.normals.capacity() - self.normals.len() > ROUTER_SHRINK_SIZE { + self.normals.shrink_to_fit(); + } } pub fn state_cnt(&self) -> &Arc { &self.state_cnt } - pub fn alive_cnt(&self) -> Arc { - self.normals.lock().unwrap().alive_cnt.clone() + pub fn alive_cnt(&self) -> usize { + self.normals.len() } pub fn trace(&self) -> RouterTrace { - let alive = self.normals.lock().unwrap().alive_cnt.clone(); + let alive = self.alive_cnt(); let total = self.state_cnt.load(Ordering::Relaxed); - let alive = alive.load(Ordering::Relaxed); // 1 represents the control fsm. let leak = if total > alive + 1 { total - alive - 1 } else { 0 }; - let mailbox_unit = mem::size_of::<(u64, BasicMailbox)>(); - let state_unit = mem::size_of::>(); - // Every message in crossbeam sender needs 8 bytes to store state. - let message_unit = mem::size_of::() + 8; - // crossbeam unbounded channel sender has a list of blocks. Every block has 31 - // unit and every sender has at least one sender. - let sender_block_unit = 31; - RouterTrace { - alive: (mailbox_unit * 8 / 7 // hashmap uses 7/8 of allocated memory. - + state_unit + message_unit * sender_block_unit) - * alive, - leak: (state_unit + message_unit * sender_block_unit) * leak, - } + RouterTrace { alive, leak } } } @@ -398,7 +314,6 @@ impl Clone for Router { fn clone(&self) -> Router { Router { normals: self.normals.clone(), - caches: Cell::new(LruCache::with_capacity_and_sample(1024, 7)), control_box: self.control_box.clone(), // These two schedulers should be unified as single one. However // it's not possible to write FsmScheduler + FsmScheduler diff --git a/components/batch-system/tests/cases/router.rs b/components/batch-system/tests/cases/router.rs index d746dfad5cb..66d0770d544 100644 --- a/components/batch-system/tests/cases/router.rs +++ b/components/batch-system/tests/cases/router.rs @@ -143,25 +143,19 @@ fn test_router_trace() { router.close(addr); }; - let router_clone = router.clone(); + let mut mailboxes = vec![]; for i in 0..10 { register_runner(i); - // Read mailbox to cache. - router_clone.mailbox(i).unwrap(); + mailboxes.push(router.mailbox(i).unwrap()); } - assert_eq!(router.alive_cnt().load(Ordering::Relaxed), 10); + assert_eq!(router.alive_cnt(), 10); assert_eq!(router.state_cnt().load(Ordering::Relaxed), 11); - // Routers closed but exist in the cache. for i in 0..10 { close_runner(i); } - assert_eq!(router.alive_cnt().load(Ordering::Relaxed), 0); + assert_eq!(router.alive_cnt(), 0); assert_eq!(router.state_cnt().load(Ordering::Relaxed), 11); - for i in 0..1024 { - register_runner(i); - // Read mailbox to cache, closed routers should be evicted. - router_clone.mailbox(i).unwrap(); - } - assert_eq!(router.alive_cnt().load(Ordering::Relaxed), 1024); - assert_eq!(router.state_cnt().load(Ordering::Relaxed), 1025); + drop(mailboxes); + assert_eq!(router.alive_cnt(), 0); + assert_eq!(router.state_cnt().load(Ordering::Relaxed), 1); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index df11ba51fc8..11167a4c395 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -468,10 +468,6 @@ where self.update_trace(); } - pub fn clear_cache(&self) { - self.router.clear_cache(); - } - fn update_trace(&self) { let router_trace = self.router.trace(); MEMTRACE_RAFT_ROUTER_ALIVE.trace(TraceEvent::Reset(router_trace.alive)); @@ -1847,8 +1843,6 @@ impl RaftBatchSystem { warn!("set thread priority for raftstore failed"; "error" => ?e); } self.workers = Some(workers); - // This router will not be accessed again, free all caches. - self.router.clear_cache(); Ok(()) } diff --git a/components/tikv_util/src/mpsc/mod.rs b/components/tikv_util/src/mpsc/mod.rs index 700691f1189..9a71dbc0c5e 100644 --- a/components/tikv_util/src/mpsc/mod.rs +++ b/components/tikv_util/src/mpsc/mod.rs @@ -8,9 +8,8 @@ pub mod future; pub mod priority_queue; use std::{ - cell::Cell, sync::{ - atomic::{AtomicBool, AtomicIsize, Ordering}, + atomic::{AtomicBool, AtomicIsize, AtomicUsize, Ordering}, Arc, }, time::Duration, @@ -208,7 +207,7 @@ const CHECK_INTERVAL: usize = 8; /// A sender of channel that limits the maximun pending messages count loosely. pub struct LooseBoundedSender { sender: Sender, - tried_cnt: Cell, + tried_cnt: AtomicUsize, limit: usize, } @@ -230,25 +229,23 @@ impl LooseBoundedSender { /// Send a message regardless its capacity limit. #[inline] pub fn force_send(&self, t: T) -> Result<(), SendError> { - let cnt = self.tried_cnt.get(); - self.tried_cnt.set(cnt + 1); + self.tried_cnt.fetch_add(1, Ordering::AcqRel); self.sender.send(t) } /// Attempts to send a message into the channel without blocking. #[inline] pub fn try_send(&self, t: T) -> Result<(), TrySendError> { - let cnt = self.tried_cnt.get(); let check_interval = || { fail_point!("loose_bounded_sender_check_interval", |_| 0); CHECK_INTERVAL }; - if cnt < check_interval() { - self.tried_cnt.set(cnt + 1); - } else if self.len() < self.limit { - self.tried_cnt.set(1); - } else { - return Err(TrySendError::Full(t)); + if self.tried_cnt.fetch_add(1, Ordering::AcqRel) >= check_interval() { + if self.len() < self.limit { + self.tried_cnt.store(1, Ordering::Release); + } else { + return Err(TrySendError::Full(t)); + } } match self.sender.send(t) { @@ -275,7 +272,7 @@ impl Clone for LooseBoundedSender { fn clone(&self) -> LooseBoundedSender { LooseBoundedSender { sender: self.sender.clone(), - tried_cnt: self.tried_cnt.clone(), + tried_cnt: AtomicUsize::new(0), limit: self.limit, } } @@ -287,7 +284,7 @@ pub fn loose_bounded(cap: usize) -> (LooseBoundedSender, Receiver) { ( LooseBoundedSender { sender, - tried_cnt: Cell::new(0), + tried_cnt: AtomicUsize::new(0), limit: cap, }, receiver, From 40b225f70c92db96baae7b85891c193c1674d2d4 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Fri, 25 Aug 2023 15:29:05 +0800 Subject: [PATCH 0877/1149] raftstore: fix meta inconsistency issue (#15423) close tikv/tikv#13311 Fix the possible meta inconsistency issue. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 62 ++++++++------ components/raftstore/src/store/fsm/store.rs | 3 +- .../raftstore/src/store/peer_storage.rs | 3 + tests/failpoints/cases/test_split_region.rs | 80 ++++++++++++++++++- 4 files changed, 121 insertions(+), 27 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index da91e26eb09..62a3a2650de 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -97,7 +97,7 @@ use crate::{ UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, }, util, - util::{is_region_initialized, KeysInfoFormatter, LeaseState}, + util::{KeysInfoFormatter, LeaseState}, worker::{ Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, GcSnapshotTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, @@ -322,6 +322,7 @@ where "replicate peer"; "region_id" => region_id, "peer_id" => peer.get_id(), + "store_id" => store_id, ); let mut region = metapb::Region::default(); @@ -2460,6 +2461,7 @@ where } }); + let is_initialized_peer = self.fsm.peer.is_initialized(); debug!( "handle raft message"; "region_id" => self.region_id(), @@ -2467,6 +2469,7 @@ where "message_type" => %util::MsgType(&msg), "from_peer_id" => msg.get_from_peer().get_id(), "to_peer_id" => msg.get_to_peer().get_id(), + "is_initialized_peer" => is_initialized_peer, ); if self.fsm.peer.pending_remove || self.fsm.stopped { @@ -3664,14 +3667,7 @@ where } let region_id = self.region_id(); - let is_initialized = self.fsm.peer.is_initialized(); - info!( - "starts destroy"; - "region_id" => region_id, - "peer_id" => self.fsm.peer_id(), - "merged_by_target" => merged_by_target, - "is_initialized" => is_initialized, - ); + let is_peer_initialized = self.fsm.peer.is_initialized(); // We can't destroy a peer which is handling snapshot. assert!(!self.fsm.peer.is_handling_snapshot()); @@ -3688,27 +3684,40 @@ where .snapshot_recovery_maybe_finish_wait_apply(/* force= */ true); } + (|| { + fail_point!( + "before_destroy_peer_on_peer_1003", + self.fsm.peer.peer_id() == 1003, + |_| {} + ); + })(); let mut meta = self.ctx.store_meta.lock().unwrap(); - let is_region_initialized_in_meta = meta - .regions - .get(®ion_id) - .map_or(false, |region| is_region_initialized(region)); - if !is_initialized && is_region_initialized_in_meta { - let region_in_meta = meta.regions.get(®ion_id).unwrap(); - error!( - "peer is destroyed inconsistently"; - "region_id" => region_id, + let is_latest_initialized = { + if let Some(latest_region_info) = meta.regions.get(®ion_id) { + util::is_region_initialized(latest_region_info) + } else { + false + } + }; + + if !is_peer_initialized && is_latest_initialized { + info!("skip destroy uninitialized peer as it's already initialized in meta"; + "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), - "peers" => ?self.region().get_peers(), "merged_by_target" => merged_by_target, - "is_initialized" => is_initialized, - "is_region_initialized_in_meta" => is_region_initialized_in_meta, - "start_key_in_meta" => log_wrappers::Value::key(region_in_meta.get_start_key()), - "end_key_in_meta" => log_wrappers::Value::key(region_in_meta.get_end_key()), - "peers_in_meta" => ?region_in_meta.get_peers(), ); + return false; } + info!( + "starts destroy"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "merged_by_target" => merged_by_target, + "is_peer_initialized" => is_peer_initialized, + "is_latest_initialized" => is_latest_initialized, + ); + if meta.atomic_snap_regions.contains_key(&self.region_id()) { drop(meta); panic!( @@ -3764,7 +3773,7 @@ where self.ctx.router.close(region_id); self.fsm.stop(); - if is_initialized + if is_peer_initialized && !merged_by_target && meta .region_ranges @@ -3773,6 +3782,7 @@ where { panic!("{} meta corruption detected", self.fsm.peer.tag); } + if meta.regions.remove(®ion_id).is_none() && !merged_by_target { panic!("{} meta corruption detected", self.fsm.peer.tag) } @@ -4139,6 +4149,7 @@ where // Insert new regions and validation let mut is_uninitialized_peer_exist = false; + let self_store_id = self.ctx.store.get_id(); if let Some(r) = meta.regions.get(&new_region_id) { // Suppose a new node is added by conf change and the snapshot comes slowly. // Then, the region splits and the first vote message comes to the new node @@ -4160,6 +4171,7 @@ where "region_id" => new_region_id, "region" => ?new_region, "is_uninitialized_peer_exist" => is_uninitialized_peer_exist, + "store_id" => self_store_id, ); let (sender, mut new_peer) = match PeerFsm::create( diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 11167a4c395..53559bbe1b8 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -1955,7 +1955,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } info!( "region doesn't exist yet, wait for it to be split"; - "region_id" => region_id + "region_id" => region_id, + "to_peer_id" => msg.get_to_peer().get_id(), ); return Ok(CheckMsgStatus::FirstRequest); } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index d89eafc3a46..a888929ca98 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -1017,6 +1017,9 @@ where // The `region` is updated after persisting in order to stay consistent with the // one in `StoreMeta::regions` (will be updated soon). // See comments in `apply_snapshot` for more details. + (|| { + fail_point!("before_set_region_on_peer_3", self.peer_id == 3, |_| {}); + })(); self.set_region(res.region.clone()); } } diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 3520de4e3ad..dfd7002495c 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1,5 +1,4 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - use std::{ sync::{ atomic::{AtomicBool, Ordering}, @@ -41,6 +40,85 @@ use tikv_util::{ }; use txn_types::{Key, LastChange, PessimisticLock, TimeStamp}; +#[test] +fn test_meta_inconsistency() { + let mut cluster = new_server_cluster(0, 3); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); + cluster.cfg.raft_store.apply_batch_system.pool_size = 2; + cluster.cfg.raft_store.apply_batch_system.max_batch_size = Some(1); + cluster.cfg.raft_store.hibernate_regions = false; + cluster.cfg.raft_store.raft_log_gc_threshold = 1000; + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + let region_id = cluster.run_conf_change(); + pd_client.must_add_peer(region_id, new_peer(2, 2)); + cluster.must_transfer_leader(region_id, new_peer(1, 1)); + cluster.must_put(b"k1", b"v1"); + + // Add new peer on node 3, its snapshot apply is paused. + fail::cfg("before_set_region_on_peer_3", "pause").unwrap(); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + + // Let only heartbeat msg to pass so a replicate peer could be created on node 3 + // for peer 1003. + let region_packet_filter_region_1000_peer_1003 = + RegionPacketFilter::new(1000, 3).skip(MessageType::MsgHeartbeat); + cluster + .sim + .wl() + .add_recv_filter(3, Box::new(region_packet_filter_region_1000_peer_1003)); + + // Trigger a region split to create region 1000 with peer 1001, 1002 and 1003. + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k5"); + + // Scheduler a larger peed id heartbeat msg to trigger peer destroy for peer + // 1003, pause it before the meta.lock operation so new region insertions by + // region split could go first. + // Thus a inconsistency could happen because the destroy is handled + // by a uninitialized peer but the new initialized region info is inserted into + // the meta by region split. + fail::cfg("before_destroy_peer_on_peer_1003", "pause").unwrap(); + let new_region = cluster.get_region(b"k4"); + let mut larger_id_msg = Box::::default(); + larger_id_msg.set_region_id(1000); + larger_id_msg.set_to_peer(new_peer(3, 1113)); + larger_id_msg.set_region_epoch(new_region.get_region_epoch().clone()); + larger_id_msg + .mut_region_epoch() + .set_conf_ver(new_region.get_region_epoch().get_conf_ver() + 1); + larger_id_msg.set_from_peer(new_peer(1, 1001)); + let raft_message = larger_id_msg.mut_message(); + raft_message.set_msg_type(MessageType::MsgHeartbeat); + raft_message.set_from(1001); + raft_message.set_to(1113); + raft_message.set_term(6); + cluster.sim.wl().send_raft_msg(*larger_id_msg).unwrap(); + thread::sleep(Duration::from_millis(500)); + + // Let snapshot apply continue on peer 3 from region 0, then region split would + // be applied too. + fail::remove("before_set_region_on_peer_3"); + thread::sleep(Duration::from_millis(2000)); + + // Let self destroy continue after the region split is finished. + fail::remove("before_destroy_peer_on_peer_1003"); + sleep_ms(1000); + + // Clear the network partition nemesis, trigger a new region split, panic would + // be encountered The thread 'raftstore-3-1::test_message_order_3' panicked + // at 'meta corrupted: no region for 1000 7A6B35 when creating 1004 + // region_id: 1004 from_peer { id: 1005 store_id: 1 } to_peer { id: 1007 + // store_id: 3 } message { msg_type: MsgRequestPreVote to: 1007 from: 1005 + // term: 6 log_term: 5 index: 5 commit: 5 commit_term: 5 } region_epoch { + // conf_ver: 3 version: 3 } end_key: 6B32'. + cluster.sim.wl().clear_recv_filters(3); + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + cluster.must_put(b"k1", b"v1"); +} + #[test] fn test_follower_slow_split() { let mut cluster = new_node_cluster(0, 3); From 503648f18312b8978f19b17f4e58b3f011bb3cb0 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 25 Aug 2023 18:42:35 +0800 Subject: [PATCH 0878/1149] *: add memory quota to resolved_ts::Resolver (#15400) ref tikv/tikv#14864 This is the first PR to fix OOM caused by Resolver tracking large txns. Resolver checks memory quota before tracking a lock, and returns false if it exceeds memory quota. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 19 ++- components/cdc/src/channel.rs | 87 ++------------ components/cdc/src/delegate.rs | 23 ++-- components/cdc/src/endpoint.rs | 49 ++++---- components/cdc/src/initializer.rs | 14 ++- components/cdc/src/lib.rs | 2 +- components/cdc/src/service.rs | 10 +- components/cdc/tests/mod.rs | 7 +- components/resolved_ts/src/endpoint.rs | 100 ++++++++++------ components/resolved_ts/src/resolver.rs | 87 +++++++++++--- components/server/src/server.rs | 9 +- components/server/src/server2.rs | 9 +- components/tikv_util/src/memory.rs | 113 +++++++++++++++++- 13 files changed, 347 insertions(+), 182 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index e92759bc2b2..ef6e24d9d8f 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -9,7 +9,7 @@ use dashmap::{ use kvproto::metapb::Region; use raftstore::coprocessor::*; use resolved_ts::Resolver; -use tikv_util::{info, warn}; +use tikv_util::{info, memory::MemoryQuota, warn}; use txn_types::TimeStamp; use crate::{debug, metrics::TRACK_REGION, utils}; @@ -401,7 +401,7 @@ impl<'a> SubscriptionRef<'a> { } } -/// This enhanced version of `Resolver` allow some unordered lock events. +/// This enhanced version of `Resolver` allow some unordered lock events. /// The name "2-phase" means this is used for 2 *concurrency* phases of /// observing a region: /// 1. Doing the initial scanning. @@ -479,7 +479,8 @@ impl TwoPhaseResolver { if !self.in_phase_one() { warn!("backup stream tracking lock as if in phase one"; "start_ts" => %start_ts, "key" => %utils::redact(&key)) } - self.resolver.track_lock(start_ts, key, None) + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(self.resolver.track_lock(start_ts, key, None)); } pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec) { @@ -487,7 +488,8 @@ impl TwoPhaseResolver { self.future_locks.push(FutureLock::Lock(key, start_ts)); return; } - self.resolver.track_lock(start_ts, key, None) + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(self.resolver.track_lock(start_ts, key, None)); } pub fn untrack_lock(&mut self, key: &[u8]) { @@ -501,7 +503,10 @@ impl TwoPhaseResolver { fn handle_future_lock(&mut self, lock: FutureLock) { match lock { - FutureLock::Lock(key, ts) => self.resolver.track_lock(ts, key, None), + FutureLock::Lock(key, ts) => { + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(self.resolver.track_lock(ts, key, None)); + } FutureLock::Unlock(key) => self.resolver.untrack_lock(&key, None), } } @@ -523,8 +528,10 @@ impl TwoPhaseResolver { } pub fn new(region_id: u64, stable_ts: Option) -> Self { + // TODO: limit the memory usage of the resolver. + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); Self { - resolver: Resolver::new(region_id), + resolver: Resolver::new(region_id, memory_quota), future_locks: Default::default(), stable_ts, } diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index b11799d87c1..6a8c3d5c3aa 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -1,13 +1,6 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - fmt, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, - time::Duration, -}; +use std::{fmt, sync::Arc, time::Duration}; use futures::{ channel::mpsc::{ @@ -20,7 +13,9 @@ use futures::{ use grpcio::WriteFlags; use kvproto::cdcpb::{ChangeDataEvent, Event, ResolvedTs}; use protobuf::Message; -use tikv_util::{future::block_on_timeout, impl_display_as_debug, time::Instant, warn}; +use tikv_util::{ + future::block_on_timeout, impl_display_as_debug, memory::MemoryQuota, time::Instant, warn, +}; use crate::metrics::*; @@ -185,71 +180,7 @@ impl EventBatcher { } } -#[derive(Clone)] -pub struct MemoryQuota { - capacity: Arc, - in_use: Arc, -} - -impl MemoryQuota { - pub fn new(capacity: usize) -> MemoryQuota { - MemoryQuota { - capacity: Arc::new(AtomicUsize::new(capacity)), - in_use: Arc::new(AtomicUsize::new(0)), - } - } - - pub fn in_use(&self) -> usize { - self.in_use.load(Ordering::Relaxed) - } - - pub(crate) fn capacity(&self) -> usize { - self.capacity.load(Ordering::Acquire) - } - - pub(crate) fn set_capacity(&self, capacity: usize) { - self.capacity.store(capacity, Ordering::Release) - } - - fn alloc(&self, bytes: usize) -> bool { - let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); - let capacity = self.capacity.load(Ordering::Acquire); - loop { - if in_use_bytes + bytes > capacity { - return false; - } - let new_in_use_bytes = in_use_bytes + bytes; - match self.in_use.compare_exchange_weak( - in_use_bytes, - new_in_use_bytes, - Ordering::Acquire, - Ordering::Relaxed, - ) { - Ok(_) => return true, - Err(current) => in_use_bytes = current, - } - } - } - - fn free(&self, bytes: usize) { - let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); - loop { - // Saturating at the numeric bounds instead of overflowing. - let new_in_use_bytes = in_use_bytes - std::cmp::min(bytes, in_use_bytes); - match self.in_use.compare_exchange_weak( - in_use_bytes, - new_in_use_bytes, - Ordering::Acquire, - Ordering::Relaxed, - ) { - Ok(_) => return, - Err(current) => in_use_bytes = current, - } - } - } -} - -pub fn channel(buffer: usize, memory_quota: MemoryQuota) -> (Sink, Drain) { +pub fn channel(buffer: usize, memory_quota: Arc) -> (Sink, Drain) { let (unbounded_sender, unbounded_receiver) = unbounded(); let (bounded_sender, bounded_receiver) = bounded(buffer); ( @@ -304,7 +235,7 @@ impl_from_future_send_error! { pub struct Sink { unbounded_sender: UnboundedSender<(CdcEvent, usize)>, bounded_sender: Sender<(CdcEvent, usize)>, - memory_quota: MemoryQuota, + memory_quota: Arc, } impl Sink { @@ -354,7 +285,7 @@ impl Sink { pub struct Drain { unbounded_receiver: UnboundedReceiver<(CdcEvent, usize)>, bounded_receiver: Receiver<(CdcEvent, usize)>, - memory_quota: MemoryQuota, + memory_quota: Arc, } impl<'a> Drain { @@ -451,7 +382,7 @@ mod tests { type Send = Box Result<(), SendError>>; fn new_test_channel(buffer: usize, capacity: usize, force_send: bool) -> (Send, Drain) { - let memory_quota = MemoryQuota::new(capacity); + let memory_quota = Arc::new(MemoryQuota::new(capacity)); let (mut tx, rx) = channel(buffer, memory_quota); let mut flag = true; let send = move |event| { @@ -599,7 +530,7 @@ mod tests { // 1KB let max_pending_bytes = 1024; let buffer = max_pending_bytes / event.size(); - let memory_quota = MemoryQuota::new(max_pending_bytes as _); + let memory_quota = Arc::new(MemoryQuota::new(max_pending_bytes as _)); let (tx, _rx) = channel(buffer as _, memory_quota); for _ in 0..buffer { tx.unbounded_send(CdcEvent::Event(e.clone()), false) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 4c8b2226f49..da5c26aad30 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -414,7 +414,10 @@ impl Delegate { for lock in mem::take(&mut pending.locks) { match lock { - PendingLock::Track { key, start_ts } => resolver.track_lock(start_ts, key, None), + PendingLock::Track { key, start_ts } => { + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(resolver.track_lock(start_ts, key, None)); + } PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), } } @@ -822,7 +825,8 @@ impl Delegate { // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { - resolver.track_lock(row.start_ts.into(), row.key.clone(), None) + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(resolver.track_lock(row.start_ts.into(), row.key.clone(), None)); } None => { assert!(self.pending.is_some(), "region resolver not ready"); @@ -1151,9 +1155,10 @@ mod tests { use api_version::RawValue; use futures::{executor::block_on, stream::StreamExt}; use kvproto::{errorpb::Error as ErrorHeader, metapb::Region}; + use tikv_util::memory::MemoryQuota; use super::*; - use crate::channel::{channel, recv_timeout, MemoryQuota}; + use crate::channel::{channel, recv_timeout}; #[test] fn test_error() { @@ -1165,7 +1170,7 @@ mod tests { region.mut_region_epoch().set_conf_ver(2); let region_epoch = region.get_region_epoch().clone(); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (sink, mut drain) = crate::channel::channel(1, quota); let rx = drain.drain(); let request_id = 123; @@ -1182,7 +1187,8 @@ mod tests { let mut delegate = Delegate::new(region_id, Default::default()); delegate.subscribe(downstream).unwrap(); assert!(delegate.handle.is_observing()); - let resolver = Resolver::new(region_id); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(region_id, memory_quota); assert!(delegate.on_region_ready(resolver, region).is_empty()); assert!(delegate.downstreams()[0].observed_range.all_key_covered); @@ -1333,7 +1339,8 @@ mod tests { region.mut_region_epoch().set_conf_ver(1); region.mut_region_epoch().set_version(1); { - let failures = delegate.on_region_ready(Resolver::new(1), region); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let failures = delegate.on_region_ready(Resolver::new(1, memory_quota), region); assert_eq!(failures.len(), 1); let id = failures[0].0.id; delegate.unsubscribe(id, None); @@ -1456,7 +1463,7 @@ mod tests { } assert_eq!(map.len(), 5); - let (sink, mut drain) = channel(1, MemoryQuota::new(1024)); + let (sink, mut drain) = channel(1, Arc::new(MemoryQuota::new(1024))); let downstream = Downstream { id: DownstreamId::new(), req_id: 1, @@ -1529,7 +1536,7 @@ mod tests { } assert_eq!(map.len(), 5); - let (sink, mut drain) = channel(1, MemoryQuota::new(1024)); + let (sink, mut drain) = channel(1, Arc::new(MemoryQuota::new(1024))); let downstream = Downstream { id: DownstreamId::new(), req_id: 1, diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 23a3e410467..72042bb5aec 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -41,6 +41,7 @@ use tikv::{ }; use tikv_util::{ debug, defer, error, impl_display_as_debug, info, + memory::MemoryQuota, mpsc::bounded, slow_log, sys::thread::ThreadBuildWrapper, @@ -56,7 +57,7 @@ use tokio::{ use txn_types::{TimeStamp, TxnExtra, TxnExtraScheduler}; use crate::{ - channel::{CdcEvent, MemoryQuota, SendError}, + channel::{CdcEvent, SendError}, delegate::{on_init_downstream, Delegate, Downstream, DownstreamId, DownstreamState}, initializer::Initializer, metrics::*, @@ -370,7 +371,7 @@ pub struct Endpoint { scan_speed_limiter: Limiter, max_scan_batch_bytes: usize, max_scan_batch_size: usize, - sink_memory_quota: MemoryQuota, + sink_memory_quota: Arc, old_value_cache: OldValueCache, resolved_region_heap: RefCell, @@ -401,7 +402,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, security_mgr: Arc, - sink_memory_quota: MemoryQuota, + sink_memory_quota: Arc, causal_ts_provider: Option>, ) -> Endpoint { let workers = Builder::new_multi_thread() @@ -1455,7 +1456,7 @@ mod tests { ConcurrencyManager::new(1.into()), env, security_mgr, - MemoryQuota::new(usize::MAX), + Arc::new(MemoryQuota::new(usize::MAX)), causal_ts_provider, ); @@ -1476,7 +1477,7 @@ mod tests { let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -1732,7 +1733,7 @@ mod tests { #[test] fn test_raftstore_is_busy() { - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, _rx) = channel::channel(1, quota); let mut suite = mock_endpoint(&CdcConfig::default(), None, ApiVersion::V1); @@ -1785,7 +1786,7 @@ mod tests { }; let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -1966,7 +1967,7 @@ mod tests { let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); let mut region = Region::default(); @@ -1999,7 +2000,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(1); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(1, memory_quota); let observe_id = suite.endpoint.capture_regions[&1].handle.id; suite.on_region_ready(observe_id, resolver, region.clone()); suite.run(Task::MinTs { @@ -2035,7 +2037,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(2); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(2, memory_quota); region.set_id(2); let observe_id = suite.endpoint.capture_regions[&2].handle.id; suite.on_region_ready(observe_id, resolver, region); @@ -2056,7 +2059,7 @@ mod tests { } // Register region 3 to another conn which is not support batch resolved ts. - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx2) = channel::channel(1, quota); let mut rx2 = rx2.drain(); let mut region = Region::default(); @@ -2084,7 +2087,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(3); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(3, memory_quota); region.set_id(3); let observe_id = suite.endpoint.capture_regions[&3].handle.id; suite.on_region_ready(observe_id, resolver, region); @@ -2127,7 +2131,7 @@ mod tests { fn test_deregister() { let mut suite = mock_endpoint(&CdcConfig::default(), None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -2279,7 +2283,7 @@ mod tests { // Open two connections a and b, registers region 1, 2 to conn a and // region 3 to conn b. let mut conn_rxs = vec![]; - let quota = channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); for region_ids in vec![vec![1, 2], vec![3]] { let (tx, rx) = channel::channel(1, quota.clone()); conn_rxs.push(rx); @@ -2311,7 +2315,8 @@ mod tests { downstream, conn_id, }); - let resolver = Resolver::new(region_id); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let resolver = Resolver::new(region_id, memory_quota); let observe_id = suite.endpoint.capture_regions[®ion_id].handle.id; let mut region = Region::default(); region.set_id(region_id); @@ -2392,7 +2397,7 @@ mod tests { fn test_deregister_conn_then_delegate() { let mut suite = mock_endpoint(&CdcConfig::default(), None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); // Open conn a let (tx1, _rx1) = channel::channel(1, quota.clone()); @@ -2470,10 +2475,11 @@ mod tests { let mut region = Region::default(); region.id = 1; region.set_region_epoch(region_epoch_2); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); suite.run(Task::ResolverReady { observe_id, region: region.clone(), - resolver: Resolver::new(1), + resolver: Resolver::new(1, memory_quota), }); // Deregister deletgate due to epoch not match for conn b. @@ -2557,7 +2563,7 @@ mod tests { ..Default::default() }; let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); @@ -2596,8 +2602,9 @@ mod tests { conn_id, }); - let mut resolver = Resolver::new(id); - resolver.track_lock(TimeStamp::compose(0, id), vec![], None); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(id, memory_quota); + assert!(resolver.track_lock(TimeStamp::compose(0, id), vec![], None)); let mut region = Region::default(); region.id = id; region.set_region_epoch(region_epoch); @@ -2646,7 +2653,7 @@ mod tests { }; let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); suite.add_region(1, 100); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (tx, mut rx) = channel::channel(1, quota); let mut rx = rx.drain(); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 2c0884bb303..44b564ce663 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -36,6 +36,7 @@ use tikv_util::{ box_err, codec::number, debug, error, info, + memory::MemoryQuota, sys::inspector::{self_thread_inspector, ThreadInspector}, time::{Instant, Limiter}, warn, @@ -215,7 +216,9 @@ impl Initializer { "end_key" => log_wrappers::Value::key(snap.upper_bound().unwrap_or_default())); let mut resolver = if self.build_resolver { - Some(Resolver::new(region_id)) + // TODO: limit the memory usage of the resolver. + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + Some(Resolver::new(region_id, memory_quota)) } else { None }; @@ -418,7 +421,11 @@ impl Initializer { let key = Key::from_encoded_slice(encoded_key).into_raw().unwrap(); let lock = Lock::parse(value)?; match lock.lock_type { - LockType::Put | LockType::Delete => resolver.track_lock(lock.ts, key, None), + LockType::Put | LockType::Delete => { + // TODO: handle memory quota exceed, for now, quota is set to + // usize::MAX. + assert!(resolver.track_lock(lock.ts, key, None)); + } _ => (), }; } @@ -587,6 +594,7 @@ mod tests { TestEngineBuilder, }; use tikv_util::{ + memory::MemoryQuota, sys::thread::ThreadBuildWrapper, worker::{LazyWorker, Runnable}, }; @@ -629,7 +637,7 @@ mod tests { crate::channel::Drain, ) { let (receiver_worker, rx) = new_receiver_worker(); - let quota = crate::channel::MemoryQuota::new(usize::MAX); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); let (sink, drain) = crate::channel::channel(buffer, quota); let pool = Builder::new_multi_thread() diff --git a/components/cdc/src/lib.rs b/components/cdc/src/lib.rs index c913cefb92e..64f110f5c45 100644 --- a/components/cdc/src/lib.rs +++ b/components/cdc/src/lib.rs @@ -15,7 +15,7 @@ mod old_value; mod service; mod txn_source; -pub use channel::{recv_timeout, CdcEvent, MemoryQuota}; +pub use channel::{recv_timeout, CdcEvent}; pub use config::CdcConfigManager; pub use delegate::Delegate; pub use endpoint::{CdcTxnExtraScheduler, Endpoint, Task, Validate}; diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index d07b5283380..7478e3afbad 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -16,10 +16,10 @@ use kvproto::{ }, kvrpcpb::ApiVersion, }; -use tikv_util::{error, info, warn, worker::*}; +use tikv_util::{error, info, memory::MemoryQuota, warn, worker::*}; use crate::{ - channel::{channel, MemoryQuota, Sink, CDC_CHANNLE_CAPACITY}, + channel::{channel, Sink, CDC_CHANNLE_CAPACITY}, delegate::{Downstream, DownstreamId, DownstreamState, ObservedRange}, endpoint::{Deregister, Task}, }; @@ -244,14 +244,14 @@ impl EventFeedHeaders { #[derive(Clone)] pub struct Service { scheduler: Scheduler, - memory_quota: MemoryQuota, + memory_quota: Arc, } impl Service { /// Create a ChangeData service. /// /// It requires a scheduler of an `Endpoint` in order to schedule tasks. - pub fn new(scheduler: Scheduler, memory_quota: MemoryQuota) -> Service { + pub fn new(scheduler: Scheduler, memory_quota: Arc) -> Service { Service { scheduler, memory_quota, @@ -518,7 +518,7 @@ mod tests { use crate::channel::{recv_timeout, CdcEvent}; fn new_rpc_suite(capacity: usize) -> (Server, ChangeDataClient, ReceiverWrapper) { - let memory_quota = MemoryQuota::new(capacity); + let memory_quota = Arc::new(MemoryQuota::new(capacity)); let (scheduler, rx) = dummy_scheduler(); let cdc_service = Service::new(scheduler, memory_quota); let env = Arc::new(EnvBuilder::new().build()); diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index d2c4519d50d..ec479909793 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -6,7 +6,7 @@ use std::{ }; use causal_ts::CausalTsProvider; -use cdc::{recv_timeout, CdcObserver, Delegate, FeatureGate, MemoryQuota, Task, Validate}; +use cdc::{recv_timeout, CdcObserver, Delegate, FeatureGate, Task, Validate}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; @@ -26,6 +26,7 @@ use test_raftstore::*; use tikv::{config::CdcConfig, server::DEFAULT_CLUSTER_ID, storage::kv::LocalTablets}; use tikv_util::{ config::ReadableDuration, + memory::MemoryQuota, worker::{LazyWorker, Runnable}, HandyRwLock, }; @@ -183,7 +184,7 @@ impl TestSuiteBuilder { .push(Box::new(move || { create_change_data(cdc::Service::new( scheduler.clone(), - MemoryQuota::new(memory_quota), + Arc::new(MemoryQuota::new(memory_quota)), )) })); sim.txn_extra_schedulers.insert( @@ -223,7 +224,7 @@ impl TestSuiteBuilder { cm.clone(), env, sim.security_mgr.clone(), - MemoryQuota::new(usize::MAX), + Arc::new(MemoryQuota::new(usize::MAX)), sim.get_causal_ts_provider(*id), ); let mut updated_cfg = cfg.clone(); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 5d0dbdcd689..36cd3030d2a 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -28,6 +28,7 @@ use raftstore::{ use security::SecurityManager; use tikv::config::ResolvedTsConfig; use tikv_util::{ + memory::MemoryQuota, warn, worker::{Runnable, RunnableWithTimer, Scheduler}, }; @@ -80,9 +81,9 @@ struct ObserveRegion { } impl ObserveRegion { - fn new(meta: Region, rrp: Arc) -> Self { + fn new(meta: Region, rrp: Arc, memory_quota: Arc) -> Self { ObserveRegion { - resolver: Resolver::with_read_progress(meta.id, Some(rrp)), + resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota), meta, handle: ObserveHandle::new(), resolver_status: ResolverStatus::Pending { @@ -93,8 +94,8 @@ impl ObserveRegion { } } - fn read_progress(&self) -> &RegionReadProgress { - self.resolver.read_progress.as_ref().unwrap() + fn read_progress(&self) -> &Arc { + self.resolver.read_progress().unwrap() } fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> std::result::Result<(), String> { @@ -192,21 +193,29 @@ impl ObserveRegion { } }, ChangeLog::Rows { rows, index } => { - rows.iter().for_each(|row| match row { - ChangeRow::Prewrite { key, start_ts, .. } => self - .resolver - .track_lock(*start_ts, key.to_raw().unwrap(), Some(*index)), - ChangeRow::Commit { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(*index)), - // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => { - self.resolver.update_tracked_index(*index); - } - ChangeRow::IngestSsT => { - self.resolver.update_tracked_index(*index); + for row in rows { + match row { + ChangeRow::Prewrite { key, start_ts, .. } => { + if !self.resolver.track_lock( + *start_ts, + key.to_raw().unwrap(), + Some(*index), + ) { + return Err("memory quota exceed".to_owned()); + } + } + ChangeRow::Commit { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(*index)), + // One pc command do not contains any lock, so just skip it + ChangeRow::OnePc { .. } => { + self.resolver.update_tracked_index(*index); + } + ChangeRow::IngestSsT => { + self.resolver.update_tracked_index(*index); + } } - }); + } } } } @@ -215,7 +224,10 @@ impl ObserveRegion { Ok(()) } - fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) { + /// Track locks in incoming scan entries. + /// Return false if resolver exceeds memory quota. + #[must_use] + fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> bool { for es in entries { match es { ScanEntry::Lock(locks) => { @@ -223,8 +235,13 @@ impl ObserveRegion { panic!("region {:?} resolver has ready", self.meta.id) } for (key, lock) in locks { - self.resolver - .track_lock(lock.ts, key.to_raw().unwrap(), Some(apply_index)); + if !self.resolver.track_lock( + lock.ts, + key.to_raw().unwrap(), + Some(apply_index), + ) { + return false; + } } } ScanEntry::None => { @@ -237,18 +254,25 @@ impl ObserveRegion { tracked_index, .. } => { - locks.into_iter().for_each(|lock| match lock { - PendingLock::Track { key, start_ts } => { - self.resolver.track_lock( - start_ts, - key.to_raw().unwrap(), - Some(tracked_index), - ) + for lock in locks { + match lock { + PendingLock::Track { key, start_ts } => { + if !self.resolver.track_lock( + start_ts, + key.to_raw().unwrap(), + Some(tracked_index), + ) { + return false; + } + } + PendingLock::Untrack { key, .. } => { + self.resolver.untrack_lock( + &key.to_raw().unwrap(), + Some(tracked_index), + ) + } } - PendingLock::Untrack { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(tracked_index)), - }); + } tracked_index } ResolverStatus::Ready => { @@ -266,12 +290,14 @@ impl ObserveRegion { ScanEntry::TxnEntry(_) => panic!("unexpected entry type"), } } + true } } pub struct Endpoint { store_id: Option, cfg: ResolvedTsConfig, + memory_quota: Arc, advance_notify: Arc, store_meta: Arc>, region_read_progress: RegionReadProgressRegistry, @@ -321,6 +347,8 @@ where let ep = Self { store_id: Some(store_id), cfg: cfg.clone(), + // TODO: add memory quota to config. + memory_quota: Arc::new(MemoryQuota::new(std::usize::MAX)), advance_notify: Arc::new(Notify::new()), scheduler, store_meta, @@ -343,7 +371,7 @@ where "register observe region"; "region" => ?region ); - ObserveRegion::new(region.clone(), read_progress) + ObserveRegion::new(region.clone(), read_progress, self.memory_quota.clone()) } else { warn!( "try register unexit region"; @@ -537,6 +565,7 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. drop(observe_region); self.re_register_region(region_id, observe_id, e); } @@ -561,7 +590,8 @@ where match self.regions.get_mut(®ion_id) { Some(observe_region) => { if observe_region.handle.id == observe_id { - observe_region.track_scan_locks(entries, apply_index); + // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. + assert!(observe_region.track_scan_locks(entries, apply_index)); } } None => { @@ -904,7 +934,7 @@ where .next() .cloned() .map(TimeStamp::into_inner); - lock_num = Some(ob.resolver.locks_by_key.len()); + lock_num = Some(ob.resolver.num_locks()); } info!( "the max gap of safe-ts is large"; diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 799c5584723..4b04bf02322 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -4,7 +4,10 @@ use std::{cmp, collections::BTreeMap, sync::Arc}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; -use tikv_util::time::Instant; +use tikv_util::{ + memory::{HeapSize, MemoryQuota}, + time::Instant, +}; use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; @@ -16,7 +19,7 @@ const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub struct Resolver { region_id: u64, // key -> start_ts - pub(crate) locks_by_key: HashMap, TimeStamp>, + locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, // The timestamps that guarantees no more commit will happen before. @@ -24,11 +27,14 @@ pub struct Resolver { // The highest index `Resolver` had been tracked tracked_index: u64, // The region read progress used to utilize `resolved_ts` to serve stale read request - pub(crate) read_progress: Option>, + read_progress: Option>, // The timestamps that advance the resolved_ts when there is no more write. min_ts: TimeStamp, // Whether the `Resolver` is stopped stopped: bool, + + // The memory quota for the `Resolver` and its lock keys and timestamps. + memory_quota: Arc, } impl std::fmt::Debug for Resolver { @@ -39,27 +45,38 @@ impl std::fmt::Debug for Resolver { if let Some((ts, keys)) = far_lock { dt.field(&format_args!( - "far_lock={:?}", + "oldest_lock={:?}", keys.iter() // We must use Display format here or the redact won't take effect. .map(|k| format!("{}", log_wrappers::Value::key(k))) .collect::>() )); - dt.field(&format_args!("far_lock_ts={:?}", ts)); + dt.field(&format_args!("oldest_lock_ts={:?}", ts)); } dt.finish() } } +impl Drop for Resolver { + fn drop(&mut self) { + // Free memory quota used by locks_by_key. + for key in self.locks_by_key.keys() { + let bytes = key.heap_size(); + self.memory_quota.free(bytes); + } + } +} + impl Resolver { - pub fn new(region_id: u64) -> Resolver { - Resolver::with_read_progress(region_id, None) + pub fn new(region_id: u64, memory_quota: Arc) -> Resolver { + Resolver::with_read_progress(region_id, None, memory_quota) } pub fn with_read_progress( region_id: u64, read_progress: Option>, + memory_quota: Arc, ) -> Resolver { Resolver { region_id, @@ -70,6 +87,7 @@ impl Resolver { tracked_index: 0, min_ts: TimeStamp::zero(), stopped: false, + memory_quota, } } @@ -87,11 +105,9 @@ impl Resolver { pub fn size(&self) -> usize { self.locks_by_key.keys().map(|k| k.len()).sum::() - + self - .lock_ts_heap - .values() - .map(|h| h.iter().map(|k| k.len()).sum::()) - .sum::() + + self.locks_by_key.len() * std::mem::size_of::() + + self.lock_ts_heap.len() + * (std::mem::size_of::() + std::mem::size_of::>>()) } pub fn locks(&self) -> &BTreeMap>> { @@ -115,7 +131,8 @@ impl Resolver { self.tracked_index = index; } - pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) { + #[must_use] + pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { if let Some(index) = index { self.update_tracked_index(index); } @@ -125,9 +142,14 @@ impl Resolver { start_ts, self.region_id ); + let bytes = key.as_slice().heap_size(); + if !self.memory_quota.alloc(bytes) { + return false; + } let key: Arc<[u8]> = key.into_boxed_slice().into(); self.locks_by_key.insert(key.clone(), start_ts); self.lock_ts_heap.entry(start_ts).or_default().insert(key); + true } pub fn untrack_lock(&mut self, key: &[u8], index: Option) { @@ -135,6 +157,8 @@ impl Resolver { self.update_tracked_index(index); } let start_ts = if let Some(start_ts) = self.locks_by_key.remove(key) { + let bytes = key.heap_size(); + self.memory_quota.free(bytes); start_ts } else { debug!("untrack a lock that was not tracked before"; "key" => &log_wrappers::Value::key(key)); @@ -230,6 +254,10 @@ impl Resolver { pub(crate) fn num_transactions(&self) -> u64 { self.lock_ts_heap.len() as u64 } + + pub(crate) fn read_progress(&self) -> Option<&Arc> { + self.read_progress.as_ref() + } } #[cfg(test)] @@ -300,11 +328,16 @@ mod tests { ]; for (i, case) in cases.into_iter().enumerate() { - let mut resolver = Resolver::new(1); + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(1, memory_quota); for e in case.clone() { match e { Event::Lock(start_ts, key) => { - resolver.track_lock(start_ts.into(), key.into_raw().unwrap(), None) + assert!(resolver.track_lock( + start_ts.into(), + key.into_raw().unwrap(), + None + )); } Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { @@ -319,4 +352,28 @@ mod tests { } } } + + #[test] + fn test_memory_quota() { + let memory_quota = Arc::new(MemoryQuota::new(1024)); + let mut resolver = Resolver::new(1, memory_quota.clone()); + let mut key = vec![0; 77]; + let mut ts = TimeStamp::default(); + while resolver.track_lock(ts, key.clone(), None) { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + } + let remain = 1024 % key.len(); + assert_eq!(memory_quota.in_use(), 1024 - remain); + + let mut ts = TimeStamp::default(); + for _ in 0..5 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + assert_eq!(memory_quota.in_use(), 1024 - 5 * key.len() - remain); + drop(resolver); + assert_eq!(memory_quota.in_use(), 0); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 7ff51474d7d..57afb85d5b5 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -28,7 +28,7 @@ use backup_stream::{ BackupStreamResolver, }; use causal_ts::CausalTsProviderImpl; -use cdc::{CdcConfigManager, MemoryQuota}; +use cdc::CdcConfigManager; use concurrency_manager::ConcurrencyManager; use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; @@ -108,6 +108,7 @@ use tikv::{ use tikv_util::{ check_environment_variables, config::VersionTrack, + memory::MemoryQuota, mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, @@ -266,7 +267,7 @@ struct Servers { node: Node, importer: Arc, cdc_scheduler: tikv_util::worker::Scheduler, - cdc_memory_quota: MemoryQuota, + cdc_memory_quota: Arc, rsmeter_pubsub_service: resource_metering::PubSubService, backup_stream_scheduler: Option>, debugger: DebuggerImpl>, LockManager, F>, @@ -986,7 +987,9 @@ where } // Start CDC. - let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); + let cdc_memory_quota = Arc::new(MemoryQuota::new( + self.core.config.cdc.sink_memory_quota.0 as _, + )); let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index fe2b685313e..32d7ab14da9 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -32,7 +32,7 @@ use backup_stream::{ BackupStreamResolver, }; use causal_ts::CausalTsProviderImpl; -use cdc::{CdcConfigManager, MemoryQuota}; +use cdc::CdcConfigManager; use concurrency_manager::ConcurrencyManager; use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine, TabletRegistry, CF_DEFAULT, CF_WRITE}; @@ -106,6 +106,7 @@ use tikv::{ use tikv_util::{ check_environment_variables, config::VersionTrack, + memory::MemoryQuota, mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, sys::{disk, path_in_diff_mount_point, register_memory_usage_high_water, SysQuota}, @@ -243,7 +244,7 @@ struct TikvServer { env: Arc, cdc_worker: Option>>, cdc_scheduler: Option>, - cdc_memory_quota: Option, + cdc_memory_quota: Option>, backup_stream_scheduler: Option>, sst_worker: Option>>, quota_limiter: Arc, @@ -637,7 +638,9 @@ where Box::new(CdcConfigManager(cdc_scheduler.clone())), ); // Start cdc endpoint. - let cdc_memory_quota = MemoryQuota::new(self.core.config.cdc.sink_memory_quota.0 as _); + let cdc_memory_quota = Arc::new(MemoryQuota::new( + self.core.config.cdc.sink_memory_quota.0 as _, + )); let cdc_endpoint = cdc::Endpoint::new( self.core.config.server.cluster_id, &self.core.config.cdc, diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 0a2f49461c5..17b6b23cf78 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -1,6 +1,9 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::mem; +use std::{ + mem, + sync::atomic::{AtomicUsize, Ordering}, +}; use kvproto::{ encryptionpb::EncryptionMeta, @@ -28,6 +31,12 @@ pub trait HeapSize { } } +impl HeapSize for [u8] { + fn heap_size(&self) -> usize { + self.len() * mem::size_of::() + } +} + impl HeapSize for Region { fn heap_size(&self) -> usize { let mut size = self.start_key.capacity() + self.end_key.capacity(); @@ -65,3 +74,105 @@ impl HeapSize for RaftCmdRequest { + mem::size_of_val(&self.status_request) } } + +pub struct MemoryQuota { + capacity: AtomicUsize, + in_use: AtomicUsize, +} + +impl MemoryQuota { + pub fn new(capacity: usize) -> MemoryQuota { + MemoryQuota { + capacity: AtomicUsize::new(capacity), + in_use: AtomicUsize::new(0), + } + } + + pub fn in_use(&self) -> usize { + self.in_use.load(Ordering::Relaxed) + } + + pub fn capacity(&self) -> usize { + self.capacity.load(Ordering::Acquire) + } + + pub fn set_capacity(&self, capacity: usize) { + self.capacity.store(capacity, Ordering::Release) + } + + pub fn alloc(&self, bytes: usize) -> bool { + let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); + let capacity = self.capacity.load(Ordering::Acquire); + loop { + if in_use_bytes + bytes > capacity { + return false; + } + let new_in_use_bytes = in_use_bytes + bytes; + match self.in_use.compare_exchange_weak( + in_use_bytes, + new_in_use_bytes, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => return true, + Err(current) => in_use_bytes = current, + } + } + } + + pub fn free(&self, bytes: usize) { + let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); + loop { + // Saturating at the numeric bounds instead of overflowing. + let new_in_use_bytes = in_use_bytes - std::cmp::min(bytes, in_use_bytes); + match self.in_use.compare_exchange_weak( + in_use_bytes, + new_in_use_bytes, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => return, + Err(current) => in_use_bytes = current, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memory_quota() { + let quota = MemoryQuota::new(100); + assert!(quota.alloc(10)); + assert_eq!(quota.in_use(), 10); + assert!(!quota.alloc(100)); + assert_eq!(quota.in_use(), 10); + quota.free(5); + assert_eq!(quota.in_use(), 5); + assert!(quota.alloc(95)); + assert_eq!(quota.in_use(), 100); + quota.free(95); + assert_eq!(quota.in_use(), 5); + } + + #[test] + fn test_resize_memory_quota() { + let quota = MemoryQuota::new(100); + assert!(quota.alloc(10)); + assert_eq!(quota.in_use(), 10); + assert!(!quota.alloc(100)); + assert_eq!(quota.in_use(), 10); + quota.set_capacity(200); + assert!(quota.alloc(100)); + assert_eq!(quota.in_use(), 110); + quota.set_capacity(50); + assert!(!quota.alloc(100)); + assert_eq!(quota.in_use(), 110); + quota.free(100); + assert_eq!(quota.in_use(), 10); + assert!(quota.alloc(40)); + assert_eq!(quota.in_use(), 50); + } +} From f3b5bf51e9105fb5685ef23e454301f48fd27caf Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 28 Aug 2023 11:30:36 +0800 Subject: [PATCH 0879/1149] config: support changed adjust max-background-compactions dynamically (#15425) close tikv/tikv#15424 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/server/src/common.rs | 6 ++++- components/server/src/server.rs | 6 ++++- components/server/src/server2.rs | 6 ++++- src/config/mod.rs | 42 +++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 165a1c8509e..c8cf879d905 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -762,7 +762,11 @@ impl ConfiguredRaftEngine for RocksEngine { fn register_config(&self, cfg_controller: &mut ConfigController) { cfg_controller.register( tikv::config::Module::Raftdb, - Box::new(DbConfigManger::new(self.clone(), DbType::Raft)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + self.clone(), + DbType::Raft, + )), ); } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 57afb85d5b5..72f7b936956 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1552,7 +1552,11 @@ impl TikvServer { let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DbConfigManger::new(kv_engine.clone(), DbType::Kv)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + kv_engine.clone(), + DbType::Kv, + )), ); let reg = TabletRegistry::new( Box::new(SingletonFactory::new(kv_engine)), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 32d7ab14da9..1289ffe848d 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1485,7 +1485,11 @@ impl TikvServer { let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, - Box::new(DbConfigManger::new(registry.clone(), DbType::Kv)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + registry.clone(), + DbType::Kv, + )), ); self.tablet_registry = Some(registry.clone()); raft_engine.register_config(cfg_controller); diff --git a/src/config/mod.rs b/src/config/mod.rs index f7c338379ef..38369b3ee93 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1903,13 +1903,14 @@ pub enum DbType { } pub struct DbConfigManger { + cfg: DbConfig, db: D, db_type: DbType, } impl DbConfigManger { - pub fn new(db: D, db_type: DbType) -> Self { - DbConfigManger { db, db_type } + pub fn new(cfg: DbConfig, db: D, db_type: DbType) -> Self { + DbConfigManger { cfg, db, db_type } } } @@ -1944,10 +1945,31 @@ impl DbConfigManger { _ => Err(format!("invalid cf {:?} for db {:?}", cf, self.db_type).into()), } } + + fn update_background_cfg( + &self, + max_background_jobs: i32, + max_background_flushes: i32, + ) -> Result<(), Box> { + assert!(max_background_jobs > 0 && max_background_flushes > 0); + let max_background_compacts = + std::cmp::max(max_background_jobs - max_background_flushes, 1); + self.db + .set_db_config(&[("max_background_jobs", &max_background_jobs.to_string())])?; + self.db.set_db_config(&[( + "max_background_flushes", + &max_background_flushes.to_string(), + )])?; + self.db.set_db_config(&[( + "max_background_compactions", + &max_background_compacts.to_string(), + )]) + } } impl ConfigManager for DbConfigManger { fn dispatch(&mut self, change: ConfigChange) -> Result<(), Box> { + self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); @@ -2011,8 +2033,7 @@ impl ConfigManager for DbConfigManger { .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); - self.db - .set_db_config(&[("max_background_jobs", &max_background_jobs.to_string())])?; + self.update_background_cfg(max_background_jobs, self.cfg.max_background_flushes)?; } if let Some(background_subcompactions_config) = change @@ -2029,10 +2050,7 @@ impl ConfigManager for DbConfigManger { .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); - self.db.set_db_config(&[( - "max_background_flushes", - &max_background_flushes.to_string(), - )])?; + self.update_background_cfg(self.cfg.max_background_jobs, max_background_flushes)?; } if !change.is_empty() { @@ -4958,7 +4976,11 @@ mod tests { let cfg_controller = ConfigController::new(cfg); cfg_controller.register( Module::Rocksdb, - Box::new(DbConfigManger::new(engine.clone(), DbType::Kv)), + Box::new(DbConfigManger::new( + cfg_controller.get_current().rocksdb, + engine.clone(), + DbType::Kv, + )), ); let (scheduler, receiver) = dummy_scheduler(); cfg_controller.register( @@ -5108,6 +5130,7 @@ mod tests { .update_config("rocksdb.max-background-jobs", "8") .unwrap(); assert_eq!(db.get_db_options().get_max_background_jobs(), 8); + assert_eq!(db.get_db_options().get_max_background_compactions(), 6); // update max_background_flushes, set to a bigger value assert_eq!(db.get_db_options().get_max_background_flushes(), 2); @@ -5116,6 +5139,7 @@ mod tests { .update_config("rocksdb.max-background-flushes", "5") .unwrap(); assert_eq!(db.get_db_options().get_max_background_flushes(), 5); + assert_eq!(db.get_db_options().get_max_background_compactions(), 3); // update rate_bytes_per_sec assert_eq!( From e5efbe697455bd7814c6979df06a8ccf0189909a Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 28 Aug 2023 15:53:06 +0800 Subject: [PATCH 0880/1149] raftstore-v2: enable failpoint for raftstore v2 in stale-peer (#15421) ref tikv/tikv#15409 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- .../src/operation/command/admin/conf_change.rs | 9 +++++++++ components/raftstore/src/store/fsm/apply.rs | 4 ++-- tests/failpoints/cases/test_stale_peer.rs | 18 +++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 2bd06fca6c2..c7b8481aa7c 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -10,6 +10,7 @@ use std::time::Instant; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use fail::fail_point; use kvproto::{ metapb::{self, PeerRole}, raft_cmdpb::{AdminRequest, AdminResponse, ChangePeerRequest, RaftCmdRequest}, @@ -392,6 +393,14 @@ impl Apply { match change_type { ConfChangeType::AddNode => { + let add_node_fp = || { + fail_point!( + "apply_on_add_node_1_2", + self.peer_id() == 2 && self.region_id() == 1, + |_| {} + ) + }; + add_node_fp(); PEER_ADMIN_CMD_COUNTER_VEC .with_label_values(&["add_peer", "all"]) .inc(); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index e2b1cedc88d..0bc1ccf7d85 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2114,14 +2114,14 @@ where match change_type { ConfChangeType::AddNode => { - let add_ndoe_fp = || { + let add_node_fp = || { fail_point!( "apply_on_add_node_1_2", self.id() == 2 && self.region_id() == 1, |_| {} ) }; - add_ndoe_fp(); + add_node_fp(); PEER_ADMIN_CMD_COUNTER_VEC .with_label_values(&["add_peer", "all"]) diff --git a/tests/failpoints/cases/test_stale_peer.rs b/tests/failpoints/cases/test_stale_peer.rs index 39fa09ef014..80c73f03a16 100644 --- a/tests/failpoints/cases/test_stale_peer.rs +++ b/tests/failpoints/cases/test_stale_peer.rs @@ -12,6 +12,7 @@ use kvproto::raft_serverpb::{PeerState, RaftLocalState, RaftMessage}; use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; #[test] @@ -44,7 +45,8 @@ fn test_one_node_leader_missing() { fail::remove(check_stale_state); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_update_localreader_after_removed() { let mut cluster = new_node_cluster(0, 6); let pd_client = cluster.pd_client.clone(); @@ -90,7 +92,8 @@ fn test_node_update_localreader_after_removed() { cluster.must_region_not_exist(r1, 2); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_stale_learner_restart() { let mut cluster = new_node_cluster(0, 2); cluster.pd_client.disable_default_operator(); @@ -133,9 +136,11 @@ fn test_stale_learner_restart() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } +/// pass /// Test if a peer can be destroyed through tombstone msg when applying /// snapshot. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_stale_peer_destroy_when_apply_snapshot() { let mut cluster = new_node_cluster(0, 3); configure_for_snapshot(&mut cluster.cfg); @@ -210,9 +215,11 @@ fn test_stale_peer_destroy_when_apply_snapshot() { must_get_none(&cluster.get_engine(3), b"k1"); } +/// pass /// Test if destroy a uninitialized peer through tombstone msg would allow a /// staled peer be created again. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_destroy_uninitialized_peer_when_there_exists_old_peer() { // 4 stores cluster. let mut cluster = new_node_cluster(0, 4); @@ -291,7 +298,8 @@ fn test_destroy_uninitialized_peer_when_there_exists_old_peer() { /// Logs scan are now moved to raftlog gc threads. The case is to test if logs /// are still cleaned up when there is stale logs before first index during /// destroy. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_destroy_clean_up_logs_with_unfinished_log_gc() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(15); From c66bfe87c17a2892c5d7440cd30d17147b3fff15 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 29 Aug 2023 17:03:38 +0800 Subject: [PATCH 0881/1149] resolved_ts: re-register region if memory quota exceeded (#15411) close tikv/tikv#14864 Fix resolved ts OOM caused by Resolver tracking large txns. `ObserveRegion` is deregistered if it exceeds memory quota. It may cause higher CPU usage because of scanning locks, but it's better than OOM. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/endpoint.rs | 98 ++++++++++++------- components/resolved_ts/src/errors.rs | 53 +--------- components/resolved_ts/src/resolver.rs | 57 ++++++++--- components/resolved_ts/src/scanner.rs | 61 +++++++----- .../resolved_ts/tests/integrations/mod.rs | 92 ++++++++++++++++- components/resolved_ts/tests/mod.rs | 15 ++- src/config/mod.rs | 2 + tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 9 files changed, 254 insertions(+), 126 deletions(-) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 36cd3030d2a..3c1ad9d8c8d 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -41,10 +41,12 @@ use crate::{ metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, + Error, Result, }; /// grace period for logging safe-ts and resolved-ts gap in slow log const SLOW_LOG_GRACE_PERIOD_MS: u64 = 1000; +const MEMORY_QUOTA_EXCEEDED_BACKOFF: Duration = Duration::from_secs(30); enum ResolverStatus { Pending { @@ -98,7 +100,7 @@ impl ObserveRegion { self.resolver.read_progress().unwrap() } - fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> std::result::Result<(), String> { + fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> Result<()> { match &mut self.resolver_status { ResolverStatus::Pending { locks, @@ -119,7 +121,7 @@ impl ObserveRegion { // TODO: for admin cmd that won't change the region meta like peer list // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to // return error - return Err(format!( + return Err(box_err!( "region met admin command {:?} while initializing resolver", req_type )); @@ -201,7 +203,7 @@ impl ObserveRegion { key.to_raw().unwrap(), Some(*index), ) { - return Err("memory quota exceed".to_owned()); + return Err(Error::MemoryQuotaExceeded); } } ChangeRow::Commit { key, .. } => self @@ -225,9 +227,7 @@ impl ObserveRegion { } /// Track locks in incoming scan entries. - /// Return false if resolver exceeds memory quota. - #[must_use] - fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> bool { + fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> Result<()> { for es in entries { match es { ScanEntry::Lock(locks) => { @@ -240,7 +240,7 @@ impl ObserveRegion { key.to_raw().unwrap(), Some(apply_index), ) { - return false; + return Err(Error::MemoryQuotaExceeded); } } } @@ -262,7 +262,7 @@ impl ObserveRegion { key.to_raw().unwrap(), Some(tracked_index), ) { - return false; + return Err(Error::MemoryQuotaExceeded); } } PendingLock::Untrack { key, .. } => { @@ -290,7 +290,7 @@ impl ObserveRegion { ScanEntry::TxnEntry(_) => panic!("unexpected entry type"), } } - true + Ok(()) } } @@ -347,8 +347,7 @@ where let ep = Self { store_id: Some(store_id), cfg: cfg.clone(), - // TODO: add memory quota to config. - memory_quota: Arc::new(MemoryQuota::new(std::usize::MAX)), + memory_quota: Arc::new(MemoryQuota::new(cfg.memory_quota.0 as usize)), advance_notify: Arc::new(Notify::new()), scheduler, store_meta, @@ -362,7 +361,7 @@ where ep } - fn register_region(&mut self, region: Region) { + fn register_region(&mut self, region: Region, backoff: Option) { let region_id = region.get_id(); assert!(self.regions.get(®ion_id).is_none()); let observe_region = { @@ -390,7 +389,7 @@ where .update_advance_resolved_ts_notify(self.advance_notify.clone()); self.regions.insert(region_id, observe_region); - let scan_task = self.build_scan_task(region, observe_handle, cancelled); + let scan_task = self.build_scan_task(region, observe_handle, cancelled, backoff); self.scanner_pool.spawn_task(scan_task); RTS_SCAN_TASKS.with_label_values(&["total"]).inc(); } @@ -400,6 +399,7 @@ where region: Region, observe_handle: ObserveHandle, cancelled: Arc, + backoff: Option, ) -> ScanTask { let scheduler = self.scheduler.clone(); let scheduler_error = self.scheduler.clone(); @@ -411,6 +411,7 @@ where mode: ScanMode::LockOnly, region, checkpoint_ts: TimeStamp::zero(), + backoff, is_cancelled: Box::new(move || cancelled.load(Ordering::Acquire)), send_entries: Box::new(move |entries, apply_index| { scheduler @@ -424,13 +425,16 @@ where RTS_SCAN_TASKS.with_label_values(&["finish"]).inc(); }), on_error: Some(Box::new(move |observe_id, _region, e| { - scheduler_error - .schedule(Task::ReRegisterRegion { - region_id, - observe_id, - cause: format!("met error while handle scan task {:?}", e), - }) - .unwrap_or_else(|schedule_err| warn!("schedule re-register task failed"; "err" => ?schedule_err, "re_register_cause" => ?e)); + if let Err(e) = scheduler_error.schedule(Task::ReRegisterRegion { + region_id, + observe_id, + cause: e, + }) { + warn!("schedule re-register task failed"; + "region_id" => region_id, + "observe_id" => ?observe_id, + "error" => ?e); + } RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); })), } @@ -476,7 +480,7 @@ where // the `Resolver`'s lock heap // - `PrepareMerge` and `RollbackMerge`, the key range is unchanged self.deregister_region(region_id); - self.register_region(incoming_region); + self.register_region(incoming_region, None); } } @@ -507,7 +511,13 @@ where } // Deregister current observed region and try to register it again. - fn re_register_region(&mut self, region_id: u64, observe_id: ObserveId, cause: String) { + fn re_register_region( + &mut self, + region_id: u64, + observe_id: ObserveId, + cause: Error, + backoff: Option, + ) { if let Some(observe_region) = self.regions.get(®ion_id) { if observe_region.handle.id != observe_id { warn!("resolved ts deregister region failed due to observe_id not match"); @@ -518,7 +528,7 @@ where "register region again"; "region_id" => region_id, "observe_id" => ?observe_id, - "cause" => cause + "cause" => ?cause ); self.deregister_region(region_id); let region; @@ -529,7 +539,7 @@ where None => return, } } - self.register_region(region); + self.register_region(region, backoff); } } @@ -565,9 +575,12 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. drop(observe_region); - self.re_register_region(region_id, observe_id, e); + let backoff = match e { + Error::MemoryQuotaExceeded => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), + Error::Other(_) => None, + }; + self.re_register_region(region_id, observe_id, e, backoff); } } else { debug!("resolved ts CmdBatch discarded"; @@ -587,16 +600,23 @@ where entries: Vec, apply_index: u64, ) { - match self.regions.get_mut(®ion_id) { - Some(observe_region) => { - if observe_region.handle.id == observe_id { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(observe_region.track_scan_locks(entries, apply_index)); + let mut is_memory_quota_exceeded = false; + if let Some(observe_region) = self.regions.get_mut(®ion_id) { + if observe_region.handle.id == observe_id { + if let Err(Error::MemoryQuotaExceeded) = + observe_region.track_scan_locks(entries, apply_index) + { + is_memory_quota_exceeded = true; } } - None => { - debug!("scan locks region not exist"; "region_id" => region_id, "observe_id" => ?observe_id); - } + } else { + debug!("scan locks region not exist"; + "region_id" => region_id, + "observe_id" => ?observe_id); + } + if is_memory_quota_exceeded { + let backoff = Some(MEMORY_QUOTA_EXCEEDED_BACKOFF); + self.re_register_region(region_id, observe_id, Error::MemoryQuotaExceeded, backoff); } } @@ -616,6 +636,8 @@ where warn!("resolved-ts config fails"; "error" => ?e); } else { self.advance_notify.notify_waiters(); + self.memory_quota + .set_capacity(self.cfg.memory_quota.0 as usize); info!( "resolved-ts config changed"; "prev" => prev, @@ -668,7 +690,7 @@ pub enum Task { ReRegisterRegion { region_id: u64, observe_id: ObserveId, - cause: String, + cause: Error, }, AdvanceResolvedTs { leader_resolver: LeadershipResolver, @@ -780,13 +802,13 @@ where match task { Task::RegionDestroyed(region) => self.region_destroyed(region), Task::RegionUpdated(region) => self.region_updated(region), - Task::RegisterRegion { region } => self.register_region(region), + Task::RegisterRegion { region } => self.register_region(region, None), Task::DeRegisterRegion { region_id } => self.deregister_region(region_id), Task::ReRegisterRegion { region_id, observe_id, cause, - } => self.re_register_region(region_id, observe_id, cause), + } => self.re_register_region(region_id, observe_id, cause, None), Task::AdvanceResolvedTs { leader_resolver } => { self.handle_advance_resolved_ts(leader_resolver) } @@ -897,7 +919,7 @@ where unresolved_count += 1; } ResolverStatus::Ready { .. } => { - lock_heap_size += observe_region.resolver.size(); + lock_heap_size += observe_region.resolver.approximate_heap_bytes(); resolved_count += 1; } } diff --git a/components/resolved_ts/src/errors.rs b/components/resolved_ts/src/errors.rs index d9845440c07..b4a59a2c7a0 100644 --- a/components/resolved_ts/src/errors.rs +++ b/components/resolved_ts/src/errors.rs @@ -1,62 +1,13 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::io::Error as IoError; - -use engine_traits::Error as EngineTraitsError; -use kvproto::errorpb::Error as ErrorHeader; -use raftstore::Error as RaftstoreError; use thiserror::Error; -use tikv::storage::{ - kv::{Error as KvError, ErrorInner as EngineErrorInner}, - mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, - txn::{Error as TxnError, ErrorInner as TxnErrorInner}, -}; -use txn_types::Error as TxnTypesError; #[derive(Debug, Error)] pub enum Error { - #[error("IO error {0}")] - Io(#[from] IoError), - #[error("Engine error {0}")] - Kv(#[from] KvError), - #[error("Transaction error {0}")] - Txn(#[from] TxnError), - #[error("Mvcc error {0}")] - Mvcc(#[from] MvccError), - #[error("Request error {0:?}")] - Request(Box), - #[error("Engine traits error {0}")] - EngineTraits(#[from] EngineTraitsError), - #[error("Txn types error {0}")] - TxnTypes(#[from] TxnTypesError), - #[error("Raftstore error {0}")] - Raftstore(#[from] RaftstoreError), + #[error("Memory quota exceeded")] + MemoryQuotaExceeded, #[error("Other error {0}")] Other(#[from] Box), } -impl Error { - pub fn request(err: ErrorHeader) -> Error { - Error::Request(Box::new(err)) - } - - pub fn extract_error_header(self) -> ErrorHeader { - match self { - Error::Kv(KvError(box EngineErrorInner::Request(e))) - | Error::Txn(TxnError(box TxnErrorInner::Engine(KvError( - box EngineErrorInner::Request(e), - )))) - | Error::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::Kv( - KvError(box EngineErrorInner::Request(e)), - ))))) - | Error::Request(box e) => e, - other => { - let mut e = ErrorHeader::default(); - e.set_message(format!("{:?}", other)); - e - } - } - } -} - pub type Result = std::result::Result; diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 4b04bf02322..1b0a07bf8e2 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,6 +13,7 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; +const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. @@ -61,10 +62,19 @@ impl std::fmt::Debug for Resolver { impl Drop for Resolver { fn drop(&mut self) { // Free memory quota used by locks_by_key. + let mut bytes = 0; + let num_locks = self.num_locks(); for key in self.locks_by_key.keys() { - let bytes = key.heap_size(); - self.memory_quota.free(bytes); + bytes += self.lock_heap_size(key); + } + if bytes > ON_DROP_WARN_HEAP_SIZE { + warn!("drop huge resolver"; + "region_id" => self.region_id, + "bytes" => bytes, + "num_locks" => num_locks, + ); } + self.memory_quota.free(bytes); } } @@ -103,13 +113,6 @@ impl Resolver { self.stopped } - pub fn size(&self) -> usize { - self.locks_by_key.keys().map(|k| k.len()).sum::() - + self.locks_by_key.len() * std::mem::size_of::() - + self.lock_ts_heap.len() - * (std::mem::size_of::() + std::mem::size_of::>>()) - } - pub fn locks(&self) -> &BTreeMap>> { &self.lock_ts_heap } @@ -131,6 +134,33 @@ impl Resolver { self.tracked_index = index; } + // Return an approximate heap memory usage in bytes. + pub fn approximate_heap_bytes(&self) -> usize { + // memory used by locks_by_key. + let memory_quota_in_use = self.memory_quota.in_use(); + + // memory used by lock_ts_heap. + let memory_lock_ts_heap = self.lock_ts_heap.len() + * (std::mem::size_of::() + std::mem::size_of::>>()) + // memory used by HashSet> + + self.locks_by_key.len() * std::mem::size_of::>(); + + memory_quota_in_use + memory_lock_ts_heap + } + + fn lock_heap_size(&self, key: &[u8]) -> usize { + // A resolver has + // * locks_by_key: HashMap, TimeStamp> + // * lock_ts_heap: BTreeMap>> + // + // We only count memory used by locks_by_key. Because the majority of + // memory is consumed by keys, locks_by_key and lock_ts_heap shares + // the same Arc<[u8]>, so lock_ts_heap is negligible. Also, it's hard to + // track accurate memory usage of lock_ts_heap as a timestamp may have + // many keys. + key.heap_size() + std::mem::size_of::() + } + #[must_use] pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { if let Some(index) = index { @@ -142,7 +172,7 @@ impl Resolver { start_ts, self.region_id ); - let bytes = key.as_slice().heap_size(); + let bytes = self.lock_heap_size(&key); if !self.memory_quota.alloc(bytes) { return false; } @@ -157,7 +187,7 @@ impl Resolver { self.update_tracked_index(index); } let start_ts = if let Some(start_ts) = self.locks_by_key.remove(key) { - let bytes = key.heap_size(); + let bytes = self.lock_heap_size(key); self.memory_quota.free(bytes); start_ts } else { @@ -358,12 +388,13 @@ mod tests { let memory_quota = Arc::new(MemoryQuota::new(1024)); let mut resolver = Resolver::new(1, memory_quota.clone()); let mut key = vec![0; 77]; + let lock_size = resolver.lock_heap_size(&key); let mut ts = TimeStamp::default(); while resolver.track_lock(ts, key.clone(), None) { ts.incr(); key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); } - let remain = 1024 % key.len(); + let remain = 1024 % lock_size; assert_eq!(memory_quota.in_use(), 1024 - remain); let mut ts = TimeStamp::default(); @@ -372,7 +403,7 @@ mod tests { key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); resolver.untrack_lock(&key, None); } - assert_eq!(memory_quota.in_use(), 1024 - 5 * key.len() - remain); + assert_eq!(memory_quota.in_use(), 1024 - 5 * lock_size - remain); drop(resolver); assert_eq!(memory_quota.in_use(), 0); } diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 0ca74bda29d..e8665e9d860 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -45,6 +45,7 @@ pub struct ScanTask { pub mode: ScanMode, pub region: Region, pub checkpoint_ts: TimeStamp, + pub backoff: Option, pub is_cancelled: IsCancelledCallback, pub send_entries: OnEntriesCallback, pub on_error: Option, @@ -84,6 +85,18 @@ impl, E: KvEngine> ScannerPool { pub fn spawn_task(&self, mut task: ScanTask) { let cdc_handle = self.cdc_handle.clone(); let fut = async move { + if let Some(backoff) = task.backoff { + if let Err(e) = GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + backoff) + .compat() + .await + { + error!("failed to backoff"; "err" => ?e); + } + if (task.is_cancelled)() { + return; + } + } let snap = match Self::get_snapshot(&mut task, cdc_handle).await { Ok(snap) => snap, Err(e) => { @@ -193,37 +206,36 @@ impl, E: KvEngine> ScannerPool { error!("failed to backoff"; "err" => ?e); } if (task.is_cancelled)() { - return Err(Error::Other("scan task cancelled".into())); + return Err(box_err!("scan task cancelled")); } } let (cb, fut) = tikv_util::future::paired_future_callback(); let change_cmd = ChangeObserver::from_rts(task.region.id, task.handle.clone()); - cdc_handle.capture_change( - task.region.id, - task.region.get_region_epoch().clone(), - change_cmd, - Callback::read(Box::new(cb)), - )?; + cdc_handle + .capture_change( + task.region.id, + task.region.get_region_epoch().clone(), + change_cmd, + Callback::read(Box::new(cb)), + ) + .map_err(|e| Error::Other(box_err!("{:?}", e)))?; let mut resp = box_try!(fut.await); if resp.response.get_header().has_error() { let err = resp.response.take_header().take_error(); // These two errors can't handled by retrying since the epoch and observe id is // unchanged if err.has_epoch_not_match() || err.get_message().contains("stale observe id") { - return Err(Error::request(err)); + return Err(box_err!("get snapshot failed: {:?}", err)); } last_err = Some(err) } else { return Ok(resp.snapshot.unwrap()); } } - Err(Error::Other( - format!( - "backoff timeout after {} try, last error: {:?}", - GET_SNAPSHOT_RETRY_TIME, - last_err.unwrap() - ) - .into(), + Err(box_err!( + "backoff timeout after {} try, last error: {:?}", + GET_SNAPSHOT_RETRY_TIME, + last_err.unwrap() )) } @@ -232,12 +244,14 @@ impl, E: KvEngine> ScannerPool { start: Option<&Key>, _checkpoint_ts: TimeStamp, ) -> Result<(Vec<(Key, Lock)>, bool)> { - let (locks, has_remaining) = reader.scan_locks( - start, - None, - |lock| matches!(lock.lock_type, LockType::Put | LockType::Delete), - DEFAULT_SCAN_BATCH_SIZE, - )?; + let (locks, has_remaining) = reader + .scan_locks( + start, + None, + |lock| matches!(lock.lock_type, LockType::Put | LockType::Delete), + DEFAULT_SCAN_BATCH_SIZE, + ) + .map_err(|e| Error::Other(box_err!("{:?}", e)))?; Ok((locks, has_remaining)) } @@ -245,7 +259,10 @@ impl, E: KvEngine> ScannerPool { let mut entries = Vec::with_capacity(DEFAULT_SCAN_BATCH_SIZE); let mut has_remaining = true; while entries.len() < entries.capacity() { - match scanner.next_entry()? { + match scanner + .next_entry() + .map_err(|e| Error::Other(box_err!("{:?}", e)))? + { Some(entry) => { entries.push(entry); } diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index 7802108b92b..634aa66c601 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -2,11 +2,12 @@ #[path = "../mod.rs"] mod testsuite; -use std::time::Duration; +use std::{sync::mpsc::channel, time::Duration}; use futures::executor::block_on; use kvproto::{kvrpcpb::*, metapb::RegionEpoch}; use pd_client::PdClient; +use resolved_ts::Task; use tempfile::Builder; use test_raftstore::sleep_ms; use test_sst_importer::*; @@ -141,3 +142,92 @@ fn test_dynamic_change_advance_ts_interval() { suite.stop(); } + +#[test] +fn test_change_log_memory_quota_exceeded() { + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + suite.must_get_rts_ge( + region.id, + block_on(suite.cluster.pd_client.get_tso()).unwrap(), + ); + + // Set a small memory quota to trigger memory quota exceeded. + suite.must_change_memory_quota(1, 1); + let (k, v) = (b"k1", b"v"); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.to_vec(); + mutation.value = v.to_vec(); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); + + // Resolved ts should not advance. + let (tx, rx) = channel(); + suite.must_schedule_task( + 1, + Task::GetDiagnosisInfo { + region_id: 1, + log_locks: false, + min_start_ts: u64::MAX, + callback: Box::new(move |res| { + tx.send(res).unwrap(); + }), + }, + ); + let res = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + assert_eq!(res.unwrap().1, 0, "{:?}", res); + + suite.stop(); +} + +#[test] +fn test_scan_log_memory_quota_exceeded() { + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + suite.must_get_rts_ge( + region.id, + block_on(suite.cluster.pd_client.get_tso()).unwrap(), + ); + + let (k, v) = (b"k1", b"v"); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.to_vec(); + mutation.value = v.to_vec(); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); + + // Set a small memory quota to trigger memory quota exceeded. + suite.must_change_memory_quota(1, 1); + // Split region + suite.cluster.must_split(®ion, k); + + let r1 = suite.cluster.get_region(&[]); + let r2 = suite.cluster.get_region(k); + let current_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + // Wait for scan log. + sleep_ms(500); + // Resolved ts of region1 should be advanced + suite.must_get_rts_ge(r1.id, current_ts); + + // Resolved ts should not advance. + let (tx, rx) = channel(); + suite.must_schedule_task( + r2.id, + Task::GetDiagnosisInfo { + region_id: r2.id, + log_locks: false, + min_start_ts: u64::MAX, + callback: Box::new(move |res| { + tx.send(res).unwrap(); + }), + }, + ); + let res = rx.recv_timeout(Duration::from_secs(5)).unwrap(); + assert_eq!(res.unwrap().1, 0, "{:?}", res); + + suite.stop(); +} diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 4e6226f8935..830e2156e9f 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -122,8 +122,21 @@ impl TestSuite { ); c }; + self.must_schedule_task(store_id, Task::ChangeConfig { change }); + } + + pub fn must_change_memory_quota(&self, store_id: u64, bytes: u64) { + let change = { + let mut c = std::collections::HashMap::default(); + c.insert("memory_quota".to_owned(), ConfigValue::Size(bytes)); + c + }; + self.must_schedule_task(store_id, Task::ChangeConfig { change }); + } + + pub fn must_schedule_task(&self, store_id: u64, task: Task) { let scheduler = self.endpoints.get(&store_id).unwrap().scheduler(); - scheduler.schedule(Task::ChangeConfig { change }).unwrap(); + scheduler.schedule(task).unwrap(); } pub fn must_kv_prewrite( diff --git a/src/config/mod.rs b/src/config/mod.rs index 38369b3ee93..d9b9263e928 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2955,6 +2955,7 @@ pub struct ResolvedTsConfig { pub advance_ts_interval: ReadableDuration, #[online_config(skip)] pub scan_lock_pool_size: usize, + pub memory_quota: ReadableSize, } impl ResolvedTsConfig { @@ -2975,6 +2976,7 @@ impl Default for ResolvedTsConfig { enable: true, advance_ts_interval: ReadableDuration::secs(20), scan_lock_pool_size: 2, + memory_quota: ReadableSize::mb(256), } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 8fdbaa00f25..87b1830e4f6 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -851,6 +851,7 @@ fn test_serde_custom_tikv_config() { enable: true, advance_ts_interval: ReadableDuration::secs(5), scan_lock_pool_size: 1, + memory_quota: ReadableSize::mb(1), }; value.causal_ts = CausalTsConfig { renew_interval: ReadableDuration::millis(100), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 053e7c45939..94f9ef1ecf1 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -705,6 +705,7 @@ sink-memory-quota = "7MB" enable = true advance-ts-interval = "5s" scan-lock-pool-size = 1 +memory-quota = "1MB" [split] detect-times = 10 From 517522b5e77b8e0aae667790b2961d88fb61a23b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 29 Aug 2023 18:57:37 +0800 Subject: [PATCH 0882/1149] raftstore-v2: support column family based write buffer manager (#15453) ref tikv/tikv#12842 support column family based write buffer manager Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +-- components/engine_traits/src/flush.rs | 5 ++ src/config/mod.rs | 49 ++++++++++++++++++-- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_engine.rs | 53 ++++++++++++++++++++++ tests/integrations/config/mod.rs | 5 ++ tests/integrations/config/test-custom.toml | 1 + 7 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 tests/failpoints/cases/test_engine.rs diff --git a/Cargo.lock b/Cargo.lock index 3c44a639e38..162d1f3ae07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" +source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3127,7 +3127,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" +source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" dependencies = [ "bzip2-sys", "cc", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d861ede96cc2aae3c2ed5ea1c1c71454130a325e" +source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index eebf0e7c32a..d0f9f892f34 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -204,6 +204,11 @@ impl PersistenceListener { /// /// `smallest_seqno` should be the smallest seqno of the memtable. pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64) { + (|| { + fail_point!("on_memtable_sealed", |t| { + assert_eq!(t.unwrap().as_str(), cf); + }) + })(); // The correctness relies on the assumption that there will be only one // thread writting to the DB and increasing apply index. // Apply index will be set within DB lock, so it's correct even with manual diff --git a/src/config/mod.rs b/src/config/mod.rs index d9b9263e928..2494e84dfbd 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,6 +110,7 @@ const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; +const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: u64 = ReadableSize::mb(32).0; /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; @@ -311,6 +312,7 @@ macro_rules! cf_config { #[online_config(skip)] pub compression_per_level: [DBCompressionType; 7], pub write_buffer_size: Option, + pub write_buffer_limit: Option, pub max_write_buffer_number: i32, #[online_config(skip)] pub min_write_buffer_number_to_merge: i32, @@ -668,6 +670,7 @@ macro_rules! build_cf_opt { pub struct CfResources { pub cache: Cache, pub compaction_thread_limiters: HashMap<&'static str, ConcurrentTaskLimiter>, + pub write_buffer_managers: HashMap<&'static str, Arc>, } cf_config!(DefaultCfConfig); @@ -734,6 +737,7 @@ impl Default for DefaultCfConfig { ttl: None, periodic_compaction_seconds: None, titan: TitanCfConfig::default(), + write_buffer_limit: None, } } } @@ -832,6 +836,9 @@ impl DefaultCfConfig { } } cf_opts.set_titan_cf_options(&self.titan.build_opts()); + if let Some(write_buffer_manager) = shared.write_buffer_managers.get(CF_DEFAULT) { + cf_opts.set_write_buffer_manager(write_buffer_manager); + } cf_opts } } @@ -906,6 +913,7 @@ impl Default for WriteCfConfig { ttl: None, periodic_compaction_seconds: None, titan, + write_buffer_limit: None, } } } @@ -962,6 +970,9 @@ impl WriteCfConfig { .unwrap(); } cf_opts.set_titan_cf_options(&self.titan.build_opts()); + if let Some(write_buffer_manager) = shared.write_buffer_managers.get(CF_WRITE) { + cf_opts.set_write_buffer_manager(write_buffer_manager); + } cf_opts } } @@ -1028,6 +1039,7 @@ impl Default for LockCfConfig { ttl: None, periodic_compaction_seconds: None, titan, + write_buffer_limit: None, } } } @@ -1062,6 +1074,9 @@ impl LockCfConfig { .unwrap(); } cf_opts.set_titan_cf_options(&self.titan.build_opts()); + if let Some(write_buffer_manager) = shared.write_buffer_managers.get(CF_LOCK) { + cf_opts.set_write_buffer_manager(write_buffer_manager); + } cf_opts } } @@ -1127,6 +1142,7 @@ impl Default for RaftCfConfig { ttl: None, periodic_compaction_seconds: None, titan, + write_buffer_limit: None, } } } @@ -1385,9 +1401,12 @@ impl DbConfig { // strategy is consistent with single RocksDB. self.defaultcf.max_compactions.get_or_insert(1); self.writecf.max_compactions.get_or_insert(1); - if self.lockcf.write_buffer_size.is_none() { - self.lockcf.write_buffer_size = Some(ReadableSize::mb(4)); - } + self.lockcf + .write_buffer_size + .get_or_insert(ReadableSize::mb(4)); + self.lockcf + .write_buffer_limit + .get_or_insert(ReadableSize::mb(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT)); } } } @@ -1510,9 +1529,29 @@ impl DbConfig { ConcurrentTaskLimiter::new(CF_RAFT, n), ); } + let mut write_buffer_managers = HashMap::default(); + self.lockcf.write_buffer_limit.map(|limit| { + write_buffer_managers.insert( + CF_LOCK, + Arc::new(WriteBufferManager::new(limit.0 as usize, 0f32, true)), + ) + }); + self.defaultcf.write_buffer_limit.map(|limit| { + write_buffer_managers.insert( + CF_DEFAULT, + Arc::new(WriteBufferManager::new(limit.0 as usize, 0f32, true)), + ) + }); + self.writecf.write_buffer_limit.map(|limit| { + write_buffer_managers.insert( + CF_WRITE, + Arc::new(WriteBufferManager::new(limit.0 as usize, 0f32, true)), + ) + }); CfResources { cache, compaction_thread_limiters, + write_buffer_managers, } } @@ -1556,6 +1595,9 @@ impl DbConfig { self.writecf.validate()?; self.raftcf.validate()?; self.titan.validate()?; + if self.raftcf.write_buffer_limit.is_some() { + return Err("raftcf does not support cf based write buffer manager".into()); + } if self.enable_unordered_write { if self.titan.enabled { return Err("RocksDB.unordered_write does not support Titan".into()); @@ -1660,6 +1702,7 @@ impl Default for RaftDefaultCfConfig { ttl: None, periodic_compaction_seconds: None, titan: TitanCfConfig::default(), + write_buffer_limit: None, } } } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 9c90211c073..9baa04d0b4f 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -10,6 +10,7 @@ mod test_coprocessor; mod test_disk_full; mod test_early_apply; mod test_encryption; +mod test_engine; mod test_gc_metrics; mod test_gc_worker; mod test_hibernate; diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs new file mode 100644 index 00000000000..93d1c96597b --- /dev/null +++ b/tests/failpoints/cases/test_engine.rs @@ -0,0 +1,53 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; +use tikv_util::config::ReadableSize; + +fn dummy_string(len: usize) -> String { + String::from_utf8(vec![0; len]).unwrap() +} + +#[test] +fn test_write_buffer_manager() { + use test_raftstore_v2::*; + let count = 1; + let mut cluster = new_node_cluster(0, count); + cluster.cfg.rocksdb.lockcf.write_buffer_limit = Some(ReadableSize::kb(10)); + cluster.cfg.rocksdb.defaultcf.write_buffer_limit = Some(ReadableSize::kb(10)); + cluster.cfg.rocksdb.write_buffer_limit = Some(ReadableSize::kb(30)); + + // Let write buffer size small to make memtable request fewer memories. + // Otherwise, one single memory request can exceeds the write buffer limit set + // above. + cluster.cfg.rocksdb.lockcf.write_buffer_size = Some(ReadableSize::kb(64)); + cluster.cfg.rocksdb.writecf.write_buffer_size = Some(ReadableSize::kb(64)); + cluster.cfg.rocksdb.defaultcf.write_buffer_size = Some(ReadableSize::kb(64)); + cluster.run(); + + let dummy = dummy_string(500); + let fp = "on_memtable_sealed"; + fail::cfg(fp, "return(lock)").unwrap(); + + for i in 0..10 { + let key = format!("key-{:03}", i); + for cf in &[CF_WRITE, CF_LOCK] { + cluster.must_put_cf(cf, key.as_bytes(), dummy.as_bytes()); + } + } + + fail::cfg(fp, "return(default)").unwrap(); + + for i in 0..10 { + let key = format!("key-{:03}", i); + for cf in &[CF_WRITE, CF_DEFAULT] { + cluster.must_put_cf(cf, key.as_bytes(), dummy.as_bytes()); + } + } + + fail::cfg(fp, "return(write)").unwrap(); + let dummy = dummy_string(1000); + for i in 0..10 { + let key = format!("key-{:03}", i); + cluster.must_put_cf(CF_WRITE, key.as_bytes(), dummy.as_bytes()); + } +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 87b1830e4f6..d3091e30eed 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -388,6 +388,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: None, }, writecf: WriteCfConfig { block_size: ReadableSize::kb(12), @@ -461,6 +462,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: None, }, lockcf: LockCfConfig { block_size: ReadableSize::kb(12), @@ -534,6 +536,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: Some(ReadableSize::mb(16)), }, raftcf: RaftCfConfig { block_size: ReadableSize::kb(12), @@ -607,6 +610,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: Some(ReadableDuration::days(10)), periodic_compaction_seconds: Some(ReadableDuration::days(10)), + write_buffer_limit: None, }, titan: titan_db_config.clone(), }; @@ -695,6 +699,7 @@ fn test_serde_custom_tikv_config() { max_compactions: Some(3), ttl: None, periodic_compaction_seconds: None, + write_buffer_limit: None, }, titan: titan_db_config, }; diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 94f9ef1ecf1..653c3d2daef 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -435,6 +435,7 @@ compression-per-level = [ "lz4", ] write-buffer-size = "1MB" +write-buffer-limit = "16MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 max-bytes-for-level-base = "12KB" From 4b3e33e6c208e445388c43a99a5707d03421f7bd Mon Sep 17 00:00:00 2001 From: ShuNing Date: Wed, 30 Aug 2023 10:21:37 +0800 Subject: [PATCH 0883/1149] pd_client: add backoff for the reconnect retries (#15429) ref tikv/pd#6556, close tikv/tikv#15428 pc_client: add store-level backoff for the reconnect retries Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/client_v2.rs | 14 ++-- components/pd_client/src/metrics.rs | 27 +++++-- components/pd_client/src/util.rs | 100 +++++++++++++++++++------- 3 files changed, 99 insertions(+), 42 deletions(-) diff --git a/components/pd_client/src/client_v2.rs b/components/pd_client/src/client_v2.rs index 5b0d563f2b8..97b2702fc39 100644 --- a/components/pd_client/src/client_v2.rs +++ b/components/pd_client/src/client_v2.rs @@ -117,7 +117,7 @@ impl RawClient { /// Returns Ok(true) when a new connection is established. async fn maybe_reconnect(&mut self, ctx: &ConnectContext, force: bool) -> Result { - PD_RECONNECT_COUNTER_VEC.with_label_values(&["try"]).inc(); + PD_RECONNECT_COUNTER_VEC.try_connect.inc(); let start = Instant::now(); let members = self.members.clone(); @@ -135,21 +135,15 @@ impl RawClient { .await { Err(e) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["failure"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.failure.inc(); return Err(e); } Ok(None) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["no-need"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.no_need.inc(); return Ok(false); } Ok(Some(tuple)) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["success"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.success.inc(); tuple } }; diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index d92e334396a..4e185658f15 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -2,7 +2,7 @@ use lazy_static::lazy_static; use prometheus::*; -use prometheus_static_metric::{make_static_metric, register_static_histogram_vec}; +use prometheus_static_metric::*; make_static_metric! { pub label_enum PDRequestEventType { @@ -40,9 +40,20 @@ make_static_metric! { meta_storage_watch, } + pub label_enum PDReconnectEventKind { + success, + failure, + no_need, + cancel, + try_connect, + } + pub struct PDRequestEventHistogramVec: Histogram { "type" => PDRequestEventType, } + pub struct PDReconnectEventCounterVec: IntCounter { + "type" => PDReconnectEventKind, + } } lazy_static! { @@ -66,12 +77,14 @@ lazy_static! { &["type"] ) .unwrap(); - pub static ref PD_RECONNECT_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( - "tikv_pd_reconnect_total", - "Total number of PD reconnections.", - &["type"] - ) - .unwrap(); + pub static ref PD_RECONNECT_COUNTER_VEC: PDReconnectEventCounterVec = + register_static_int_counter_vec!( + PDReconnectEventCounterVec, + "tikv_pd_reconnect_total", + "Total number of PD reconnections.", + &["type"] + ) + .unwrap(); pub static ref PD_PENDING_HEARTBEAT_GAUGE: IntGauge = register_int_gauge!( "tikv_pd_pending_heartbeat_total", "Total number of pending region heartbeat" diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 5491a51c047..66b084d4998 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -50,6 +50,7 @@ const MAX_RETRY_TIMES: u64 = 5; // The max duration when retrying to connect to leader. No matter if the // MAX_RETRY_TIMES is reached. const MAX_RETRY_DURATION: Duration = Duration::from_secs(10); +const MAX_BACKOFF: Duration = Duration::from_secs(3); // FIXME: Use a request-independent way to handle reconnection. pub const REQUEST_RECONNECT_INTERVAL: Duration = Duration::from_secs(1); // 1s @@ -116,6 +117,7 @@ pub struct Inner { pub rg_resp: Option>, last_try_reconnect: Instant, + bo: ExponentialBackoff, } impl Inner { @@ -168,7 +170,6 @@ pub struct Client { pub(crate) inner: RwLock, pub feature_gate: FeatureGate, enable_forwarding: bool, - retry_interval: Duration, } impl Client { @@ -219,6 +220,7 @@ impl Client { pending_heartbeat: Arc::default(), pending_buckets: Arc::default(), last_try_reconnect: Instant::now(), + bo: ExponentialBackoff::new(retry_interval), tso, meta_storage, rg_sender: Either::Left(Some(rg_sender)), @@ -226,7 +228,6 @@ impl Client { }), feature_gate: FeatureGate::default(), enable_forwarding, - retry_interval, } } @@ -363,17 +364,15 @@ impl Client { /// Note: Retrying too quickly will return an error due to cancellation. /// Please always try to reconnect after sending the request first. pub async fn reconnect(&self, force: bool) -> Result<()> { - PD_RECONNECT_COUNTER_VEC.with_label_values(&["try"]).inc(); + PD_RECONNECT_COUNTER_VEC.try_connect.inc(); let start = Instant::now(); let future = { let inner = self.inner.rl(); - if start.saturating_duration_since(inner.last_try_reconnect) < self.retry_interval { + if start.saturating_duration_since(inner.last_try_reconnect) < inner.bo.get_interval() { // Avoid unnecessary updating. // Prevent a large number of reconnections in a short time. - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["cancel"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.cancel.inc(); return Err(box_err!("cancel reconnection due to too small interval")); } let connector = PdConnector::new(inner.env.clone(), inner.security_mgr.clone()); @@ -394,36 +393,38 @@ impl Client { { let mut inner = self.inner.wl(); - if start.saturating_duration_since(inner.last_try_reconnect) < self.retry_interval { + if start.saturating_duration_since(inner.last_try_reconnect) < inner.bo.get_interval() { // There may be multiple reconnections that pass the read lock at the same time. // Check again in the write lock to avoid unnecessary updating. - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["cancel"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.cancel.inc(); return Err(box_err!("cancel reconnection due to too small interval")); } inner.last_try_reconnect = start; + inner.bo.next_backoff(); } slow_log!(start.saturating_elapsed(), "try reconnect pd"); let (client, target_info, members, tso) = match future.await { Err(e) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["failure"]) - .inc(); + PD_RECONNECT_COUNTER_VEC.failure.inc(); return Err(e); } - Ok(None) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["no-need"]) - .inc(); - return Ok(()); - } - Ok(Some(tuple)) => { - PD_RECONNECT_COUNTER_VEC - .with_label_values(&["success"]) - .inc(); - tuple + Ok(res) => { + // Reset the retry count. + { + let mut inner = self.inner.wl(); + inner.bo.reset() + } + match res { + None => { + PD_RECONNECT_COUNTER_VEC.no_need.inc(); + return Ok(()); + } + Some(tuple) => { + PD_RECONNECT_COUNTER_VEC.success.inc(); + tuple + } + } } }; @@ -900,6 +901,33 @@ impl PdConnector { } } +/// Simple backoff strategy. +struct ExponentialBackoff { + base: Duration, + interval: Duration, +} + +impl ExponentialBackoff { + pub fn new(base: Duration) -> Self { + Self { + base, + interval: base, + } + } + pub fn next_backoff(&mut self) -> Duration { + self.interval = std::cmp::min(self.interval * 2, MAX_BACKOFF); + self.interval + } + + pub fn get_interval(&self) -> Duration { + self.interval + } + + pub fn reset(&mut self) { + self.interval = self.base; + } +} + pub fn trim_http_prefix(s: &str) -> &str { s.trim_start_matches("http://") .trim_start_matches("https://") @@ -1045,8 +1073,11 @@ pub fn merge_bucket_stats, I: AsRef<[u8]>>( mod test { use kvproto::metapb::BucketStats; + use super::*; use crate::{merge_bucket_stats, util::find_bucket_index}; + const BASE_BACKOFF: Duration = Duration::from_millis(100); + #[test] fn test_merge_bucket_stats() { #[allow(clippy::type_complexity)] @@ -1162,4 +1193,23 @@ mod test { assert_eq!(find_bucket_index(b"k7", &keys), Some(4)); assert_eq!(find_bucket_index(b"k8", &keys), Some(4)); } + + #[test] + fn test_exponential_backoff() { + let mut backoff = ExponentialBackoff::new(BASE_BACKOFF); + assert_eq!(backoff.get_interval(), BASE_BACKOFF); + + assert_eq!(backoff.next_backoff(), 2 * BASE_BACKOFF); + assert_eq!(backoff.next_backoff(), Duration::from_millis(400)); + assert_eq!(backoff.get_interval(), Duration::from_millis(400)); + + // Should not exceed MAX_BACKOFF + for _ in 0..20 { + backoff.next_backoff(); + } + assert_eq!(backoff.get_interval(), MAX_BACKOFF); + + backoff.reset(); + assert_eq!(backoff.get_interval(), BASE_BACKOFF); + } } From 0bb270621f6d561560156c38cc21240ceae97c00 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 30 Aug 2023 14:44:08 +0800 Subject: [PATCH 0884/1149] coprocessor: skip transient read request (#15406) close tikv/tikv#15405 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- src/coprocessor/metrics.rs | 6 ++ src/coprocessor/tracker.rs | 175 +++++++++++++++++++++---------------- 2 files changed, 104 insertions(+), 77 deletions(-) diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 64905b3dfba..02f45d35311 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -208,6 +208,12 @@ impl CopLocalMetrics { pub fn local_read_stats(&self) -> &ReadStats { &self.local_read_stats } + + #[cfg(test)] + pub fn clear(&mut self) { + self.local_read_stats.region_infos.clear(); + self.local_read_stats.region_buckets.clear(); + } } thread_local! { diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 18eaa0b6e98..71d84388c3b 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -350,20 +350,24 @@ impl Tracker { false }; - tls_collect_query( - region_id, - peer, - start_key.as_encoded(), - end_key.as_encoded(), - reverse_scan, - ); - tls_collect_read_flow( - self.req_ctx.context.get_region_id(), - Some(start_key.as_encoded()), - Some(end_key.as_encoded()), - &total_storage_stats, - self.buckets.as_ref(), - ); + // only collect metrics for select and index, exclude transient read flow such + // like analyze and checksum. + if self.req_ctx.tag == ReqTag::select || self.req_ctx.tag == ReqTag::index { + tls_collect_query( + region_id, + peer, + start_key.as_encoded(), + end_key.as_encoded(), + reverse_scan, + ); + tls_collect_read_flow( + self.req_ctx.context.get_region_id(), + Some(start_key.as_encoded()), + Some(end_key.as_encoded()), + &total_storage_stats, + self.buckets.as_ref(), + ); + } self.current_stage = TrackerState::Tracked; } @@ -443,69 +447,86 @@ mod tests { #[test] fn test_track() { - let mut context = kvrpcpb::Context::default(); - context.set_region_id(1); - - let mut req_ctx = ReqContext::new( - ReqTag::test, - context, - vec![], - Duration::from_secs(0), - None, - None, - TimeStamp::max(), - None, - PerfLevel::EnableCount, - ); - req_ctx.lower_bound = vec![ - 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 67, - ]; - req_ctx.upper_bound = vec![ - 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 167, - ]; - let mut track: Tracker = Tracker::new(req_ctx, Duration::default()); - let mut bucket = BucketMeta::default(); - bucket.region_id = 1; - bucket.version = 1; - bucket.keys = vec![ - vec![ - 116, 128, 0, 0, 0, 0, 0, 0, 255, 179, 95, 114, 128, 0, 0, 0, 0, 255, 0, 175, 155, - 0, 0, 0, 0, 0, 250, - ], - vec![ - 116, 128, 0, 255, 255, 255, 255, 255, 255, 254, 0, 0, 0, 0, 0, 0, 0, 248, - ], - ]; - bucket.sizes = vec![10]; - track.buckets = Some(Arc::new(bucket)); - - let mut stat = Statistics::default(); - stat.write.flow_stats.read_keys = 10; - track.total_storage_stats = stat; - - track.track(); - drop(track); - TLS_COP_METRICS.with(|m| { - assert_eq!( - 10, - m.borrow() - .local_read_stats() - .region_infos - .get(&1) - .unwrap() - .flow - .read_keys - ); - assert_eq!( - vec![10], - m.borrow() - .local_read_stats() - .region_buckets - .get(&1) - .unwrap() - .stats - .read_keys + let check = move |tag: ReqTag, flow: u64| { + let mut context = kvrpcpb::Context::default(); + context.set_region_id(1); + let mut req_ctx = ReqContext::new( + tag, + context, + vec![], + Duration::from_secs(0), + None, + None, + TimeStamp::max(), + None, + PerfLevel::EnableCount, ); - }); + + req_ctx.lower_bound = vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 67, + ]; + req_ctx.upper_bound = vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 184, 95, 114, 128, 0, 0, 0, 0, 0, 70, 167, + ]; + let mut track: Tracker = Tracker::new(req_ctx, Duration::default()); + let mut bucket = BucketMeta::default(); + bucket.region_id = 1; + bucket.version = 1; + bucket.keys = vec![ + vec![ + 116, 128, 0, 0, 0, 0, 0, 0, 255, 179, 95, 114, 128, 0, 0, 0, 0, 255, 0, 175, + 155, 0, 0, 0, 0, 0, 250, + ], + vec![ + 116, 128, 0, 255, 255, 255, 255, 255, 255, 254, 0, 0, 0, 0, 0, 0, 0, 248, + ], + ]; + bucket.sizes = vec![10]; + track.buckets = Some(Arc::new(bucket)); + + let mut stat = Statistics::default(); + stat.write.flow_stats.read_keys = 10; + track.total_storage_stats = stat; + + track.track(); + drop(track); + TLS_COP_METRICS.with(|m| { + if flow > 0 { + assert_eq!( + flow as usize, + m.borrow() + .local_read_stats() + .region_infos + .get(&1) + .unwrap() + .flow + .read_keys + ); + assert_eq!( + flow, + m.borrow() + .local_read_stats() + .region_buckets + .get(&1) + .unwrap() + .stats + .read_keys[0] + ); + } else { + assert!(m.borrow().local_read_stats().region_infos.get(&1).is_none()); + assert!( + m.borrow() + .local_read_stats() + .region_buckets + .get(&1) + .is_none() + ); + } + + m.borrow_mut().clear(); + }); + }; + check(ReqTag::select, 10); + check(ReqTag::analyze_full_sampling, 0); } } From fb9a40d20dcfb9ceb7cecba9d471fa8575c05913 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Wed, 30 Aug 2023 15:18:38 +0800 Subject: [PATCH 0885/1149] raftstore-v2: init persisted_tablet_index on startup (#15441) ref tikv/tikv#12842 - Initialize `persisted_apply_index` on startup. Signed-off-by: tabokie --- .../raftstore-v2/src/operation/command/admin/compact_log.rs | 4 ++-- components/raftstore-v2/src/operation/life.rs | 4 ++++ components/raftstore-v2/src/raft/peer.rs | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 93876475f5f..d054234b46f 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -63,14 +63,14 @@ pub struct CompactLogContext { } impl CompactLogContext { - pub fn new(last_applying_index: u64) -> CompactLogContext { + pub fn new(last_applying_index: u64, persisted_applied: u64) -> CompactLogContext { CompactLogContext { skipped_ticks: 0, approximate_log_size: 0, last_applying_index, last_compacted_idx: 0, tombstone_tablets_wait_index: vec![], - persisted_tablet_index: AtomicU64::new(0).into(), + persisted_tablet_index: AtomicU64::new(persisted_applied).into(), } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index e0e7f63785d..8fe1d2a07b3 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -797,11 +797,15 @@ impl Peer { if self.has_pending_tombstone_tablets() { let applied_index = self.entry_storage().applied_index(); let last_index = self.entry_storage().last_index(); + let persisted = self + .remember_persisted_tablet_index() + .load(std::sync::atomic::Ordering::Relaxed); info!( self.logger, "postpone destroy because there're pending tombstone tablets"; "applied_index" => applied_index, "last_index" => last_index, + "persisted_applied" => persisted, ); return true; } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 2f3a3376fe9..c3a80e3756c 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -158,6 +158,7 @@ impl Peer { let region_id = storage.region().get_id(); let tablet_index = storage.region_state().get_tablet_index(); let merge_context = MergeContext::from_region_state(&logger, storage.region_state()); + let persisted_applied = storage.apply_trace().persisted_apply_index(); let raft_group = RawNode::new(&raft_cfg, storage, &logger)?; let region = raft_group.store().region_state().get_region().clone(); @@ -184,7 +185,7 @@ impl Peer { self_stat: PeerStat::default(), peer_cache: vec![], peer_heartbeats: HashMap::default(), - compact_log_context: CompactLogContext::new(applied_index), + compact_log_context: CompactLogContext::new(applied_index, persisted_applied), merge_context: merge_context.map(|c| Box::new(c)), last_sent_snapshot_index: 0, raw_write_encoder: None, From 69b8ac5717119290ba721fae61edb894440a80fc Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 30 Aug 2023 17:30:39 +0800 Subject: [PATCH 0886/1149] raftstore-v2: consider unmatch between region range and tablet range for mvcc scan (#15455) ref tikv/tikv#14654 consider unmatch between region range and tablet range for mvcc scan --- components/engine_rocks/src/util.rs | 8 + src/server/debug2.rs | 240 ++++++------------------ tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_debugger.rs | 147 +++++++++++++++ 4 files changed, 216 insertions(+), 180 deletions(-) create mode 100644 tests/failpoints/cases/test_debugger.rs diff --git a/components/engine_rocks/src/util.rs b/components/engine_rocks/src/util.rs index 225cd1d7f06..e4991419eed 100644 --- a/components/engine_rocks/src/util.rs +++ b/components/engine_rocks/src/util.rs @@ -3,6 +3,7 @@ use std::{ffi::CString, fs, path::Path, str::FromStr, sync::Arc}; use engine_traits::{Engines, Range, Result, CF_DEFAULT}; +use fail::fail_point; use rocksdb::{ load_latest_options, CColumnFamilyDescriptor, CFHandle, ColumnFamilyOptions, CompactionFilter, CompactionFilterContext, CompactionFilterDecision, CompactionFilterFactory, @@ -462,6 +463,13 @@ pub struct RangeCompactionFilterFactory(Arc); impl RangeCompactionFilterFactory { pub fn new(start_key: Box<[u8]>, end_key: Box<[u8]>) -> Self { + fail_point!("unlimited_range_compaction_filter", |_| { + let range = OwnedRange { + start_key: keys::data_key(b"").into_boxed_slice(), + end_key: keys::data_end_key(b"").into_boxed_slice(), + }; + Self(Arc::new(range)) + }); let range = OwnedRange { start_key, end_key }; Self(Arc::new(range)) } diff --git a/src/server/debug2.rs b/src/server/debug2.rs index e914b353760..cf17aea81eb 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -10,7 +10,7 @@ use engine_traits::{ TabletRegistry, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use futures::future::Future; -use keys::{data_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; +use keys::{data_key, enc_end_key, enc_start_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; use kvproto::{ debugpb::Db as DbType, kvrpcpb::MvccInfo, @@ -36,6 +36,34 @@ use crate::{ storage::mvcc::{MvccInfoCollector, MvccInfoScanner}, }; +// `key1` and `key2` should both be start_key or end_key. +fn smaller_key<'a>(key1: &'a [u8], key2: &'a [u8], is_end_key: bool) -> &'a [u8] { + if is_end_key && key1.is_empty() { + return key2; + } + if is_end_key && key2.is_empty() { + return key1; + } + if key1 < key2 { + return key1; + } + key2 +} + +// `key1` and `key2` should both be start_key or end_key. +fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], is_end_key: bool) -> &'a [u8] { + if is_end_key && key1.is_empty() { + return key1; + } + if is_end_key && key2.is_empty() { + return key2; + } + if key1 < key2 { + return key2; + } + key1 +} + // return the region containing the seek_key or the next region if not existed fn seek_region( seek_key: &[u8], @@ -98,11 +126,16 @@ impl MvccInfoIteratorV2 { )?; let tablet = tablet_cache.latest().unwrap(); + let region_start_key = enc_start_key(first_region_state.get_region()); + let region_end_key = enc_end_key(first_region_state.get_region()); + let iter_start = larger_key(start, ®ion_start_key, false); + let iter_end = smaller_key(end, ®ion_end_key, true); + assert!(!iter_start.is_empty() && !iter_start.is_empty()); let scanner = Some( MvccInfoScanner::new( |cf, opts| tablet.iterator_opt(cf, opts).map_err(|e| box_err!(e)), - if start.is_empty() { None } else { Some(start) }, - if end.is_empty() { None } else { Some(end) }, + Some(iter_start), + Some(iter_end), MvccInfoCollector::default(), ) .map_err(|e| -> Error { box_err!(e) })?, @@ -171,19 +204,16 @@ impl Iterator for MvccInfoIteratorV2 { ) .unwrap(); let tablet = tablet_cache.latest().unwrap(); + let region_start_key = enc_start_key(&self.cur_region); + let region_end_key = enc_end_key(&self.cur_region); + let iter_start = larger_key(&self.start, ®ion_start_key, false); + let iter_end = smaller_key(&self.end, ®ion_end_key, true); + assert!(!iter_start.is_empty() && !iter_start.is_empty()); self.scanner = Some( MvccInfoScanner::new( |cf, opts| tablet.iterator_opt(cf, opts).map_err(|e| box_err!(e)), - if self.start.is_empty() { - None - } else { - Some(self.start.as_bytes()) - }, - if self.end.is_empty() { - None - } else { - Some(self.end.as_bytes()) - }, + Some(iter_start), + Some(iter_end), MvccInfoCollector::default(), ) .unwrap(), @@ -1154,38 +1184,28 @@ fn deivde_regions_for_concurrency( Ok(regions_groups) } -// `key1` and `key2` should both be start_key or end_key. -fn smaller_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { - if end_key && key1.is_empty() { - return key2; - } - if end_key && key2.is_empty() { - return key1; - } - if key1 < key2 { - return key1; - } - key2 -} +#[cfg(any(test, feature = "testexport"))] +pub fn new_debugger(path: &std::path::Path) -> DebuggerImplV2 { + use crate::{config::TikvConfig, server::KvEngineFactoryBuilder}; -// `key1` and `key2` should both be start_key or end_key. -fn larger_key<'a>(key1: &'a [u8], key2: &'a [u8], end_key: bool) -> &'a [u8] { - if end_key && key1.is_empty() { - return key1; - } - if end_key && key2.is_empty() { - return key2; - } - if key1 < key2 { - return key2; - } - key1 + let mut cfg = TikvConfig::default(); + cfg.storage.data_dir = path.to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + let cache = cfg.storage.block_cache.build_shared_cache(); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); + let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); + + let raft_engine = + raft_log_engine::RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); + + DebuggerImplV2::new(reg, raft_engine, ConfigController::default()) } #[cfg(test)] mod tests { - use std::path::Path; - use collections::HashMap; use engine_traits::{ RaftEngineReadOnly, RaftLogBatch, SyncMutable, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_WRITE, @@ -1196,36 +1216,13 @@ mod tests { raft_serverpb::*, }; use raft::prelude::EntryType; - use raft_log_engine::RaftLogEngine; use raftstore::store::RAFT_INIT_LOG_INDEX; use tikv_util::store::new_peer; use super::*; - use crate::{ - config::TikvConfig, - server::KvEngineFactoryBuilder, - storage::{txn::tests::must_prewrite_put, TestEngineBuilder}, - }; - const INITIAL_TABLET_INDEX: u64 = 5; const INITIAL_APPLY_INDEX: u64 = 5; - fn new_debugger(path: &Path) -> DebuggerImplV2 { - let mut cfg = TikvConfig::default(); - cfg.storage.data_dir = path.to_str().unwrap().to_string(); - cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); - cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); - let cache = cfg.storage.block_cache.build_shared_cache(); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); - let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); - - let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); - - DebuggerImplV2::new(reg, raft_engine, ConfigController::default()) - } - impl DebuggerImplV2 { fn set_store_id(&self, store_id: u64) { let mut ident = self.get_store_ident().unwrap_or_default(); @@ -1458,123 +1455,6 @@ mod tests { debugger.region_size(region_id, cfs.clone()).unwrap_err(); } - // For simplicity, the format of the key is inline with data in - // prepare_data_on_disk - fn extract_key(key: &[u8]) -> &[u8] { - &key[1..4] - } - - // Prepare some data - // Data for each region: - // Region 1: k00 .. k04 - // Region 2: k05 .. k09 - // Region 3: k10 .. k14 - // Region 4: k15 .. k19 - // Region 5: k20 .. k24 - // Region 6: k26 .. k28 - fn prepare_data_on_disk(path: &Path) { - let mut cfg = TikvConfig::default(); - cfg.storage.data_dir = path.to_str().unwrap().to_string(); - cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); - cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); - cfg.gc.enable_compaction_filter = false; - let cache = cfg.storage.block_cache.build_shared_cache(); - let env = cfg.build_shared_rocks_env(None, None).unwrap(); - - let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); - let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); - - let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); - let mut wb = raft_engine.log_batch(5); - for i in 0..6 { - let mut region = metapb::Region::default(); - let start_key = format!("k{:02}", i * 5); - let end_key = format!("k{:02}", (i + 1) * 5); - region.set_id(i + 1); - region.set_start_key(start_key.into_bytes()); - region.set_end_key(end_key.into_bytes()); - let mut region_state = RegionLocalState::default(); - region_state.set_tablet_index(INITIAL_TABLET_INDEX); - if region.get_id() == 4 { - region_state.set_state(PeerState::Tombstone); - } else if region.get_id() == 6 { - region.set_start_key(b"k26".to_vec()); - region.set_end_key(b"k28".to_vec()); - } - region_state.set_region(region); - - let tablet_path = reg.tablet_path(i + 1, INITIAL_TABLET_INDEX); - // Use tikv_kv::RocksEngine instead of loading tablet from registry in order to - // use prewrite method to prepare mvcc data - let mut engine = TestEngineBuilder::new().path(tablet_path).build().unwrap(); - for i in i * 5..(i + 1) * 5 { - let key = format!("zk{:02}", i); - let val = format!("val{:02}", i); - // Use prewrite only is enough for preparing mvcc data - must_prewrite_put( - &mut engine, - key.as_bytes(), - val.as_bytes(), - key.as_bytes(), - 10, - ); - } - - wb.put_region_state(i + 1, INITIAL_APPLY_INDEX, ®ion_state) - .unwrap(); - } - raft_engine.consume(&mut wb, true).unwrap(); - } - - #[test] - fn test_scan_mvcc() { - let dir = test_util::temp_dir("test-debugger", false); - prepare_data_on_disk(dir.path()); - let debugger = new_debugger(dir.path()); - // Test scan with bad start, end or limit. - assert!(debugger.scan_mvcc(b"z", b"", 0).is_err()); - assert!(debugger.scan_mvcc(b"z", b"x", 3).is_err()); - - let verify_scanner = - |range, scanner: &mut dyn Iterator, MvccInfo)>>| { - for i in range { - let key = format!("k{:02}", i).into_bytes(); - assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); - } - }; - - // full scann - let mut scanner = debugger.scan_mvcc(b"", b"", 100).unwrap(); - verify_scanner(0..15, &mut scanner); - verify_scanner(20..25, &mut scanner); - verify_scanner(26..28, &mut scanner); - assert!(scanner.next().is_none()); - - // Range has more elements than limit - let mut scanner = debugger.scan_mvcc(b"zk01", b"zk09", 5).unwrap(); - verify_scanner(1..6, &mut scanner); - assert!(scanner.next().is_none()); - - // Range has less elements than limit - let mut scanner = debugger.scan_mvcc(b"zk07", b"zk10", 10).unwrap(); - verify_scanner(7..10, &mut scanner); - assert!(scanner.next().is_none()); - - // Start from the key where no region contains it - let mut scanner = debugger.scan_mvcc(b"zk16", b"", 100).unwrap(); - verify_scanner(20..25, &mut scanner); - verify_scanner(26..28, &mut scanner); - assert!(scanner.next().is_none()); - - // Scan a range not existed in the cluster - let mut scanner = debugger.scan_mvcc(b"zk16", b"zk19", 100).unwrap(); - assert!(scanner.next().is_none()); - - // The end key is less than the start_key of the first region - let mut scanner = debugger.scan_mvcc(b"", b"zj", 100).unwrap(); - assert!(scanner.next().is_none()); - } - #[test] fn test_compact() { let dir = test_util::temp_dir("test-debugger", false); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index 9baa04d0b4f..a9dbd36a81a 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -7,6 +7,7 @@ mod test_bootstrap; mod test_cmd_epoch_checker; mod test_conf_change; mod test_coprocessor; +mod test_debugger; mod test_disk_full; mod test_early_apply; mod test_encryption; diff --git a/tests/failpoints/cases/test_debugger.rs b/tests/failpoints/cases/test_debugger.rs new file mode 100644 index 00000000000..f70ebcb6d32 --- /dev/null +++ b/tests/failpoints/cases/test_debugger.rs @@ -0,0 +1,147 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::path::Path; + +use engine_traits::{RaftEngine, RaftLogBatch, TabletRegistry}; +use kvproto::{ + kvrpcpb::MvccInfo, + metapb, + raft_serverpb::{PeerState, RegionLocalState}, +}; +use raft_log_engine::RaftLogEngine; +use test_raftstore::new_peer; +use tikv::{ + config::TikvConfig, + server::{debug::Debugger, debug2::new_debugger, KvEngineFactoryBuilder}, + storage::{txn::tests::must_prewrite_put, TestEngineBuilder}, +}; + +const INITIAL_TABLET_INDEX: u64 = 5; +const INITIAL_APPLY_INDEX: u64 = 5; + +// Prepare some data +// Region meta range and rocksdb range of each region: +// Region 1: k01 .. k04 rocksdb: zk00 .. zk04 +// Region 2: k05 .. k09 rocksdb: zk05 .. zk09 +// Region 3: k10 .. k14 rocksdb: zk10 .. zk14 +// Region 4: k15 .. k19 rocksdb: zk15 .. zk19 +// Region 5: k20 .. k24 rocksdb: zk20 .. zk24 +// Region 6: k26 .. k27 rocksdb: zk25 .. zk29 +fn prepare_data_on_disk(path: &Path) { + let mut cfg = TikvConfig::default(); + cfg.storage.data_dir = path.to_str().unwrap().to_string(); + cfg.raft_store.raftdb_path = cfg.infer_raft_db_path(None).unwrap(); + cfg.raft_engine.mut_config().dir = cfg.infer_raft_engine_path(None).unwrap(); + cfg.gc.enable_compaction_filter = false; + let cache = cfg.storage.block_cache.build_shared_cache(); + let env = cfg.build_shared_rocks_env(None, None).unwrap(); + + let factory = KvEngineFactoryBuilder::new(env, &cfg, cache, None).build(); + let reg = TabletRegistry::new(Box::new(factory), path).unwrap(); + + let raft_engine = RaftLogEngine::new(cfg.raft_engine.config(), None, None).unwrap(); + let mut wb = raft_engine.log_batch(5); + for i in 0..6 { + let mut region = metapb::Region::default(); + let start_key = if i != 0 { + format!("k{:02}", i * 5) + } else { + String::from("k01") + }; + let end_key = format!("k{:02}", (i + 1) * 5); + region.set_id(i + 1); + region.set_start_key(start_key.into_bytes()); + region.set_end_key(end_key.into_bytes()); + let mut region_state = RegionLocalState::default(); + region_state.set_tablet_index(INITIAL_TABLET_INDEX); + if region.get_id() == 4 { + region_state.set_state(PeerState::Tombstone); + } else if region.get_id() == 6 { + region.set_start_key(b"k26".to_vec()); + region.set_end_key(b"k28".to_vec()); + } + // add dummy peer to pass verification + region.mut_peers().push(new_peer(0, 0)); + region_state.set_region(region); + + let tablet_path = reg.tablet_path(i + 1, INITIAL_TABLET_INDEX); + // Use tikv_kv::RocksEngine instead of loading tablet from registry in order to + // use prewrite method to prepare mvcc data + let mut engine = TestEngineBuilder::new().path(tablet_path).build().unwrap(); + for i in i * 5..(i + 1) * 5 { + let key = format!("zk{:02}", i); + let val = format!("val{:02}", i); + // Use prewrite only is enough for preparing mvcc data + must_prewrite_put( + &mut engine, + key.as_bytes(), + val.as_bytes(), + key.as_bytes(), + 10, + ); + } + + wb.put_region_state(i + 1, INITIAL_APPLY_INDEX, ®ion_state) + .unwrap(); + } + raft_engine.consume(&mut wb, true).unwrap(); +} + +// For simplicity, the format of the key is inline with data in +// prepare_data_on_disk +fn extract_key(key: &[u8]) -> &[u8] { + &key[1..4] +} + +#[test] +fn test_scan_mvcc() { + // We deliberately make region meta not match with rocksdb, set unlimited range + // compaction filter to avoid trim operation. + fail::cfg("unlimited_range_compaction_filter", "return").unwrap(); + + let dir = test_util::temp_dir("test-debugger", false); + prepare_data_on_disk(dir.path()); + let debugger = new_debugger(dir.path()); + // Test scan with bad start, end or limit. + assert!(debugger.scan_mvcc(b"z", b"", 0).is_err()); + assert!(debugger.scan_mvcc(b"z", b"x", 3).is_err()); + + let verify_scanner = + |range, scanner: &mut dyn Iterator, MvccInfo)>>| { + for i in range { + let key = format!("k{:02}", i).into_bytes(); + assert_eq!(key, extract_key(&scanner.next().unwrap().unwrap().0)); + } + }; + + // full scan + let mut scanner = debugger.scan_mvcc(b"", b"", 100).unwrap(); + verify_scanner(1..15, &mut scanner); + verify_scanner(20..25, &mut scanner); + verify_scanner(26..28, &mut scanner); + assert!(scanner.next().is_none()); + + // Range has more elements than limit + let mut scanner = debugger.scan_mvcc(b"zk01", b"zk09", 5).unwrap(); + verify_scanner(1..6, &mut scanner); + assert!(scanner.next().is_none()); + + // Range has less elements than limit + let mut scanner = debugger.scan_mvcc(b"zk07", b"zk10", 10).unwrap(); + verify_scanner(7..10, &mut scanner); + assert!(scanner.next().is_none()); + + // Start from the key where no region contains it + let mut scanner = debugger.scan_mvcc(b"zk16", b"", 100).unwrap(); + verify_scanner(20..25, &mut scanner); + verify_scanner(26..28, &mut scanner); + assert!(scanner.next().is_none()); + + // Scan a range not existed in the cluster + let mut scanner = debugger.scan_mvcc(b"zk16", b"zk19", 100).unwrap(); + assert!(scanner.next().is_none()); + + // The end key is less than the start_key of the first region + let mut scanner = debugger.scan_mvcc(b"", b"zj", 100).unwrap(); + assert!(scanner.next().is_none()); +} From 1669a72fac8176cc7a2be7fe10f43f1657d4c21f Mon Sep 17 00:00:00 2001 From: ekexium Date: Wed, 30 Aug 2023 17:45:40 +0800 Subject: [PATCH 0887/1149] txn: add logs for assertion failure (#12305) close tikv/tikv#12304 Add logs for assertion failure Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/txn/actions/prewrite.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 90f739b8705..64e22a13585 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -239,6 +239,7 @@ impl LockStatus { } /// A single mutation to be prewritten. +#[derive(Debug)] struct PrewriteMutation<'a> { key: Key, value: Option, @@ -677,6 +678,12 @@ impl<'a> PrewriteMutation<'a> { if self.skip_constraint_check() { self.check_for_newer_version(reader)?; } + let (write, commit_ts) = write + .as_ref() + .map(|(w, ts)| (Some(w), Some(ts))) + .unwrap_or((None, None)); + error!("assertion failure"; "assertion" => ?self.assertion, "write" => ?write, + "commit_ts" => commit_ts, "mutation" => ?self); assertion_err?; } From b507aad3be0eaa6c96033ef7300605bda833bf54 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 31 Aug 2023 16:05:09 +0800 Subject: [PATCH 0888/1149] config: make split config can update (#15473) close tikv/tikv#15403 1. split config support to update dynamic. In past, the `optimize_for` function will set the config immutable. Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore/src/store/worker/pd.rs | 2 +- .../src/store/worker/split_config.rs | 58 ++++++++++++++----- .../src/store/worker/split_controller.rs | 45 +++++++------- src/config/mod.rs | 12 ++-- tests/integrations/config/mod.rs | 6 +- tests/integrations/raftstore/test_stats.rs | 2 +- 6 files changed, 81 insertions(+), 44 deletions(-) diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index d812830569a..e8c8e2f575b 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -658,7 +658,7 @@ where // Register the region CPU records collector. if auto_split_controller .cfg - .region_cpu_overload_threshold_ratio + .region_cpu_overload_threshold_ratio() > 0.0 { region_cpu_records_collector = diff --git a/components/raftstore/src/store/worker/split_config.rs b/components/raftstore/src/store/worker/split_config.rs index 8fec853bb00..2d29bd21a89 100644 --- a/components/raftstore/src/store/worker/split_config.rs +++ b/components/raftstore/src/store/worker/split_config.rs @@ -68,18 +68,18 @@ pub fn get_sample_num() -> usize { #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct SplitConfig { - pub qps_threshold: usize, + pub qps_threshold: Option, pub split_balance_score: f64, pub split_contained_score: f64, pub detect_times: u64, pub sample_num: usize, pub sample_threshold: u64, - pub byte_threshold: usize, + pub byte_threshold: Option, #[doc(hidden)] pub grpc_thread_cpu_overload_threshold_ratio: f64, #[doc(hidden)] pub unified_read_pool_thread_cpu_overload_threshold_ratio: f64, - pub region_cpu_overload_threshold_ratio: f64, + pub region_cpu_overload_threshold_ratio: Option, // deprecated. #[online_config(skip)] #[doc(hidden)] @@ -95,18 +95,18 @@ pub struct SplitConfig { impl Default for SplitConfig { fn default() -> SplitConfig { SplitConfig { - qps_threshold: DEFAULT_QPS_THRESHOLD, + qps_threshold: None, split_balance_score: DEFAULT_SPLIT_BALANCE_SCORE, split_contained_score: DEFAULT_SPLIT_CONTAINED_SCORE, detect_times: DEFAULT_DETECT_TIMES, sample_num: DEFAULT_SAMPLE_NUM, sample_threshold: DEFAULT_SAMPLE_THRESHOLD, - byte_threshold: DEFAULT_BYTE_THRESHOLD, + byte_threshold: None, grpc_thread_cpu_overload_threshold_ratio: DEFAULT_GRPC_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO, unified_read_pool_thread_cpu_overload_threshold_ratio: DEFAULT_UNIFIED_READ_POOL_THREAD_CPU_OVERLOAD_THRESHOLD_RATIO, - region_cpu_overload_threshold_ratio: REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + region_cpu_overload_threshold_ratio: None, size_threshold: None, // deprecated. key_threshold: None, // deprecated. } @@ -124,7 +124,7 @@ impl SplitConfig { ("split_balance_score or split_contained_score should be between 0 and 1.").into(), ); } - if self.sample_num >= self.qps_threshold { + if self.sample_num >= self.qps_threshold() { return Err( ("sample_num should be less than qps_threshold for load-base-split.").into(), ); @@ -133,20 +133,52 @@ impl SplitConfig { || self.grpc_thread_cpu_overload_threshold_ratio < 0.0 || self.unified_read_pool_thread_cpu_overload_threshold_ratio > 1.0 || self.unified_read_pool_thread_cpu_overload_threshold_ratio < 0.0 - || self.region_cpu_overload_threshold_ratio > 1.0 - || self.region_cpu_overload_threshold_ratio < 0.0 + || self.region_cpu_overload_threshold_ratio() > 1.0 + || self.region_cpu_overload_threshold_ratio() < 0.0 { return Err(("threshold ratio should be between 0 and 1.").into()); } Ok(()) } + pub fn qps_threshold(&self) -> usize { + self.qps_threshold.unwrap_or(DEFAULT_QPS_THRESHOLD) + } + + pub fn byte_threshold(&self) -> usize { + self.byte_threshold.unwrap_or(DEFAULT_BYTE_THRESHOLD) + } + + pub fn region_cpu_overload_threshold_ratio(&self) -> f64 { + self.region_cpu_overload_threshold_ratio + .unwrap_or(REGION_CPU_OVERLOAD_THRESHOLD_RATIO) + } + pub fn optimize_for(&mut self, region_size: ReadableSize) { const LARGE_REGION_SIZE_IN_MB: u64 = 4096; - if region_size.as_mb() >= LARGE_REGION_SIZE_IN_MB { - self.qps_threshold = DEFAULT_BIG_REGION_QPS_THRESHOLD; - self.region_cpu_overload_threshold_ratio = BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO; - self.byte_threshold = DEFAULT_BIG_REGION_BYTE_THRESHOLD; + let big_size = region_size.as_mb() >= LARGE_REGION_SIZE_IN_MB; + if self.qps_threshold.is_none() { + self.qps_threshold = Some(if big_size { + DEFAULT_BIG_REGION_QPS_THRESHOLD + } else { + DEFAULT_QPS_THRESHOLD + }); + } + + if self.byte_threshold.is_none() { + self.byte_threshold = Some(if big_size { + DEFAULT_BIG_REGION_BYTE_THRESHOLD + } else { + DEFAULT_BYTE_THRESHOLD + }); + } + + if self.region_cpu_overload_threshold_ratio.is_none() { + self.region_cpu_overload_threshold_ratio = Some(if big_size { + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO + } else { + REGION_CPU_OVERLOAD_THRESHOLD_RATIO + }); } } } diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index d432f264e01..4bbcc773763 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -608,7 +608,7 @@ impl AutoSplitController { } fn should_check_region_cpu(&self) -> bool { - self.cfg.region_cpu_overload_threshold_ratio > 0.0 + self.cfg.region_cpu_overload_threshold_ratio() > 0.0 } fn is_grpc_poll_busy(&self, avg_grpc_thread_usage: f64) -> bool { @@ -643,7 +643,7 @@ impl AutoSplitController { return false; } region_cpu_usage / unified_read_pool_thread_usage - >= self.cfg.region_cpu_overload_threshold_ratio + >= self.cfg.region_cpu_overload_threshold_ratio() } // collect the read stats from read_stats_vec and dispatch them to a Region @@ -787,9 +787,9 @@ impl AutoSplitController { debug!("load base split params"; "region_id" => region_id, "qps" => qps, - "qps_threshold" => self.cfg.qps_threshold, + "qps_threshold" => self.cfg.qps_threshold(), "byte" => byte, - "byte_threshold" => self.cfg.byte_threshold, + "byte_threshold" => self.cfg.byte_threshold(), "cpu_usage" => cpu_usage, "is_region_busy" => is_region_busy, ); @@ -800,8 +800,8 @@ impl AutoSplitController { // 1. If the QPS or the byte does not meet the threshold, skip. // 2. If the Unified Read Pool or the region is not hot enough, skip. - if qps < self.cfg.qps_threshold - && byte < self.cfg.byte_threshold + if qps < self.cfg.qps_threshold() + && byte < self.cfg.byte_threshold() && (!is_unified_read_pool_busy || !is_region_busy) { self.recorders.remove_entry(®ion_id); @@ -917,13 +917,13 @@ impl AutoSplitController { pub fn refresh_and_check_cfg(&mut self) -> SplitConfigChange { let mut cfg_change = SplitConfigChange::Noop; if let Some(incoming) = self.cfg_tracker.any_new() { - if self.cfg.region_cpu_overload_threshold_ratio <= 0.0 - && incoming.region_cpu_overload_threshold_ratio > 0.0 + if self.cfg.region_cpu_overload_threshold_ratio() <= 0.0 + && incoming.region_cpu_overload_threshold_ratio() > 0.0 { cfg_change = SplitConfigChange::UpdateRegionCpuCollector(true); } - if self.cfg.region_cpu_overload_threshold_ratio > 0.0 - && incoming.region_cpu_overload_threshold_ratio <= 0.0 + if self.cfg.region_cpu_overload_threshold_ratio() > 0.0 + && incoming.region_cpu_overload_threshold_ratio() <= 0.0 { cfg_change = SplitConfigChange::UpdateRegionCpuCollector(false); } @@ -943,12 +943,12 @@ impl AutoSplitController { mod tests { use online_config::{ConfigChange, ConfigManager, ConfigValue}; use resource_metering::{RawRecord, TagInfos}; - use tikv_util::config::VersionTrack; + use tikv_util::config::{ReadableSize, VersionTrack}; use txn_types::Key; use super::*; use crate::store::worker::split_config::{ - DEFAULT_SAMPLE_NUM, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_SAMPLE_NUM, }; enum Position { @@ -1193,7 +1193,7 @@ mod tests { fn check_split_key(mode: &[u8], qps_stats: Vec, split_keys: Vec<&[u8]>) { let mode = String::from_utf8(Vec::from(mode)).unwrap(); let mut hub = AutoSplitController::default(); - hub.cfg.qps_threshold = 1; + hub.cfg.qps_threshold = Some(1); hub.cfg.sample_threshold = 0; for i in 0..10 { @@ -1226,7 +1226,7 @@ mod tests { ) { let mode = String::from_utf8(Vec::from(mode)).unwrap(); let mut hub = AutoSplitController::default(); - hub.cfg.qps_threshold = 1; + hub.cfg.qps_threshold = Some(1); hub.cfg.sample_threshold = 0; for i in 0..10 { @@ -1291,7 +1291,7 @@ mod tests { #[test] fn test_sample_key_num() { let mut hub = AutoSplitController::default(); - hub.cfg.qps_threshold = 2000; + hub.cfg.qps_threshold = Some(2000); hub.cfg.sample_num = 2000; hub.cfg.sample_threshold = 0; @@ -1608,7 +1608,8 @@ mod tests { #[test] fn test_refresh_and_check_cfg() { - let split_config = SplitConfig::default(); + let mut split_config = SplitConfig::default(); + split_config.optimize_for(ReadableSize::mb(5000)); let mut split_cfg_manager = SplitConfigManager::new(Arc::new(VersionTrack::new(split_config))); let mut auto_split_controller = @@ -1620,8 +1621,8 @@ mod tests { assert_eq!( auto_split_controller .cfg - .region_cpu_overload_threshold_ratio, - REGION_CPU_OVERLOAD_THRESHOLD_RATIO + .region_cpu_overload_threshold_ratio(), + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO ); // Set to zero. dispatch_split_cfg_change( @@ -1636,7 +1637,7 @@ mod tests { assert_eq!( auto_split_controller .cfg - .region_cpu_overload_threshold_ratio, + .region_cpu_overload_threshold_ratio(), 0.0 ); assert_eq!( @@ -1647,7 +1648,7 @@ mod tests { dispatch_split_cfg_change( &mut split_cfg_manager, "region_cpu_overload_threshold_ratio", - ConfigValue::F64(REGION_CPU_OVERLOAD_THRESHOLD_RATIO), + ConfigValue::F64(0.1), ); assert_eq!( auto_split_controller.refresh_and_check_cfg(), @@ -1656,8 +1657,8 @@ mod tests { assert_eq!( auto_split_controller .cfg - .region_cpu_overload_threshold_ratio, - REGION_CPU_OVERLOAD_THRESHOLD_RATIO + .region_cpu_overload_threshold_ratio(), + 0.1 ); assert_eq!( auto_split_controller.refresh_and_check_cfg(), diff --git a/src/config/mod.rs b/src/config/mod.rs index 2494e84dfbd..8c0c04957b1 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -6139,12 +6139,12 @@ mod tests { assert_eq!(default_cfg.coprocessor.region_split_size(), SPLIT_SIZE); assert!(!default_cfg.coprocessor.enable_region_bucket()); - assert_eq!(default_cfg.split.qps_threshold, DEFAULT_QPS_THRESHOLD); + assert_eq!(default_cfg.split.qps_threshold(), DEFAULT_QPS_THRESHOLD); assert_eq!( - default_cfg.split.region_cpu_overload_threshold_ratio, + default_cfg.split.region_cpu_overload_threshold_ratio(), REGION_CPU_OVERLOAD_THRESHOLD_RATIO ); - assert_eq!(default_cfg.split.byte_threshold, DEFAULT_BYTE_THRESHOLD); + assert_eq!(default_cfg.split.byte_threshold(), DEFAULT_BYTE_THRESHOLD); let mut default_cfg = TikvConfig::default(); default_cfg.storage.engine = EngineType::RaftKv2; @@ -6154,15 +6154,15 @@ mod tests { RAFTSTORE_V2_SPLIT_SIZE ); assert_eq!( - default_cfg.split.qps_threshold, + default_cfg.split.qps_threshold(), DEFAULT_BIG_REGION_QPS_THRESHOLD ); assert_eq!( - default_cfg.split.region_cpu_overload_threshold_ratio, + default_cfg.split.region_cpu_overload_threshold_ratio(), BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO ); assert_eq!( - default_cfg.split.byte_threshold, + default_cfg.split.byte_threshold(), DEFAULT_BIG_REGION_BYTE_THRESHOLD ); assert!(default_cfg.coprocessor.enable_region_bucket()); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d3091e30eed..c6e98e95c05 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -864,10 +864,14 @@ fn test_serde_custom_tikv_config() { renew_batch_max_size: 8192, alloc_ahead_buffer: ReadableDuration::millis(3000), }; + value + .split + .optimize_for(value.coprocessor.region_max_size()); value.resource_control = ResourceControlConfig { enabled: false }; let custom = read_file_in_project_dir("integrations/config/test-custom.toml"); - let load = toml::from_str(&custom).unwrap(); + let mut load: TikvConfig = toml::from_str(&custom).unwrap(); + load.split.optimize_for(load.coprocessor.region_max_size()); assert_eq_debug(&value, &load); let dump = toml::to_string_pretty(&load).unwrap(); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 67e5e261dab..d61d6a59182 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -575,7 +575,7 @@ pub fn test_rollback() { fn test_query_num(query: Box, is_raw_kv: bool) { let (mut cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); - cluster.cfg.split.qps_threshold = 0; + cluster.cfg.split.qps_threshold = Some(0); cluster.cfg.split.split_balance_score = 2.0; cluster.cfg.split.split_contained_score = 2.0; cluster.cfg.split.detect_times = 1; From 251df183b0d089d01e629791124f70c3cbb6fdbf Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 31 Aug 2023 16:24:39 +0800 Subject: [PATCH 0889/1149] raftstore-v2: reuse failpoint tests in async_io_test.rs (#15476) ref tikv/tikv#15409 reuse failpoint tests in async_io_test Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/command/mod.rs | 1 + components/test_raftstore-v2/src/cluster.rs | 25 ++++++++++++++++++- components/test_raftstore/src/cluster.rs | 22 ++++++++-------- tests/failpoints/cases/test_async_io.rs | 17 ++++++++++--- .../cases/test_cmd_epoch_checker.rs | 9 +++---- tests/failpoints/cases/test_disk_full.rs | 20 +++++++-------- tests/failpoints/cases/test_merge.rs | 6 ++--- .../raftstore/test_joint_consensus.rs | 16 ++++-------- tests/integrations/raftstore/test_merge.rs | 6 ++--- 9 files changed, 74 insertions(+), 48 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 0fd88cc987b..c39f2412f32 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -583,6 +583,7 @@ impl Apply { fail::fail_point!("APPLY_COMMITTED_ENTRIES"); fail::fail_point!("on_handle_apply_1003", self.peer_id() == 1003, |_| {}); fail::fail_point!("on_handle_apply_2", self.peer_id() == 2, |_| {}); + fail::fail_point!("on_handle_apply", |_| {}); fail::fail_point!("on_handle_apply_store_1", self.store_id() == 1, |_| {}); let now = std::time::Instant::now(); let apply_wait_time = APPLY_TASK_WAIT_TIME_HISTOGRAM.local(); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 08de4cc3aa1..8ede3290167 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -51,7 +51,7 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{ check_raft_cmd_request, is_error_response, new_admin_request, new_delete_cmd, - new_delete_range_cmd, new_get_cf_cmd, new_peer, new_prepare_merge, new_put_cf_cmd, + new_delete_range_cmd, new_get_cf_cmd, new_peer, new_prepare_merge, new_put_cf_cmd, new_put_cmd, new_region_detail_cmd, new_region_leader_cmd, new_request, new_status_request, new_store, new_tikv_config_with_api_ver, new_transfer_leader_cmd, sleep_ms, Config, Filter, FilterFactory, PartitionFilterFactory, RawEngine, @@ -1263,6 +1263,29 @@ impl, EK: KvEngine> Cluster { panic!("find no region for {}", log_wrappers::hex_encode_upper(key)); } + pub fn async_request( + &mut self, + mut req: RaftCmdRequest, + ) -> BoxFuture<'static, RaftCmdResponse> { + let region_id = req.get_header().get_region_id(); + let leader = self.leader_of_region(region_id).unwrap(); + req.mut_header().set_peer(leader.clone()); + self.sim + .wl() + .async_command_on_node(leader.get_store_id(), req) + } + + pub fn async_put( + &mut self, + key: &[u8], + value: &[u8], + ) -> Result> { + let mut region = self.get_region(key); + let reqs = vec![new_put_cmd(key, value)]; + let put = new_request(region.get_id(), region.take_region_epoch(), reqs, false); + Ok(self.async_request(put)) + } + pub fn must_put(&mut self, key: &[u8], value: &[u8]) { self.must_put_cf(CF_DEFAULT, key, value); } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 23edf0efab1..e65028fe968 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -19,7 +19,7 @@ use engine_traits::{ WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; -use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture}; +use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture, StreamExt}; use kvproto::{ errorpb::Error as PbError, kvrpcpb::{ApiVersion, Context, DiskFullOpt}, @@ -51,7 +51,6 @@ use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::server::Result as ServerResult; use tikv_util::{ - mpsc::future, thread_group::GroupProperties, time::{Instant, ThreadReadId}, worker::LazyWorker, @@ -969,7 +968,7 @@ impl Cluster { pub fn async_request( &mut self, req: RaftCmdRequest, - ) -> Result> { + ) -> Result> { self.async_request_with_opts(req, Default::default()) } @@ -977,21 +976,24 @@ impl Cluster { &mut self, mut req: RaftCmdRequest, opts: RaftCmdExtraOpts, - ) -> Result> { + ) -> Result> { let region_id = req.get_header().get_region_id(); let leader = self.leader_of_region(region_id).unwrap(); req.mut_header().set_peer(leader.clone()); - let (cb, rx) = make_cb(&req); + let (cb, mut rx) = make_cb(&req); self.sim .rl() .async_command_on_node_with_opts(leader.get_store_id(), req, cb, opts)?; - Ok(rx) + Ok(Box::pin(async move { + let fut = rx.next(); + fut.await.unwrap() + })) } pub fn async_exit_joint( &mut self, region_id: u64, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); @@ -1007,7 +1009,7 @@ impl Cluster { &mut self, key: &[u8], value: &[u8], - ) -> Result> { + ) -> Result> { let mut region = self.get_region(key); let reqs = vec![new_put_cmd(key, value)]; let put = new_request(region.get_id(), region.take_region_epoch(), reqs, false); @@ -1018,7 +1020,7 @@ impl Cluster { &mut self, region_id: u64, peer: metapb::Peer, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); @@ -1031,7 +1033,7 @@ impl Cluster { &mut self, region_id: u64, peer: metapb::Peer, - ) -> Result> { + ) -> Result> { let region = block_on(self.pd_client.get_region_by_id(region_id)) .unwrap() .unwrap(); diff --git a/tests/failpoints/cases/test_async_io.rs b/tests/failpoints/cases/test_async_io.rs index 3d53b9c5f14..8ce349805b0 100644 --- a/tests/failpoints/cases/test_async_io.rs +++ b/tests/failpoints/cases/test_async_io.rs @@ -8,13 +8,15 @@ use std::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::HandyRwLock; // Test if the entries can be committed and applied on followers even when // leader's io is paused. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_async_io_commit_without_leader_persist() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.cmd_batch_concurrent_ready_max_count = 0; cluster.cfg.raft_store.store_io_pool_size = 2; let pd_client = Arc::clone(&cluster.pd_client); @@ -49,9 +51,10 @@ fn test_async_io_commit_without_leader_persist() { /// Test if the leader delays its destroy after applying conf change to /// remove itself. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_async_io_delay_destroy_after_conf_change() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.store_io_pool_size = 2; let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -93,6 +96,9 @@ fn test_async_io_delay_destroy_after_conf_change() { /// Test if the peer can be destroyed when it receives a tombstone msg and /// its snapshot is persisting. +/// +/// Note: snapshot flow is changed, so partitioend-raft-kv does not support this +/// test. #[test] fn test_async_io_cannot_destroy_when_persist_snapshot() { let mut cluster = new_node_cluster(0, 3); @@ -176,6 +182,9 @@ fn test_async_io_cannot_destroy_when_persist_snapshot() { } /// Test if the peer can handle ready when its snapshot is persisting. +/// +/// Note: snapshot flow is changed, so partitioend-raft-kv does not support this +/// test. #[test] fn test_async_io_cannot_handle_ready_when_persist_snapshot() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 73bc741d9bb..8af8e29f3ac 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -10,7 +10,7 @@ use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}; use raft::eraftpb::MessageType; use raftstore::store::msg::*; use test_raftstore::*; -use tikv_util::{mpsc::future, HandyRwLock}; +use tikv_util::{future::block_on_timeout, mpsc::future, HandyRwLock}; struct CbReceivers { proposed: mpsc::Receiver<()>, @@ -399,9 +399,7 @@ fn test_accept_proposal_during_conf_change() { let conf_change_fp = "apply_on_conf_change_all_1"; fail::cfg(conf_change_fp, "pause").unwrap(); let mut add_peer_rx = cluster.async_add_peer(r, new_peer(2, 2)).unwrap(); - add_peer_rx - .recv_timeout(Duration::from_millis(100)) - .unwrap_err(); + block_on_timeout(add_peer_rx.as_mut(), Duration::from_millis(100)).unwrap_err(); // Conf change doesn't affect proposals. let write_req = make_write_req(&mut cluster, b"k"); @@ -419,8 +417,7 @@ fn test_accept_proposal_during_conf_change() { fail::remove(conf_change_fp); assert!( - !add_peer_rx - .recv_timeout(Duration::from_secs(1)) + !block_on_timeout(add_peer_rx, Duration::from_secs(1)) .unwrap() .get_header() .has_error() diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index bd4271be12d..217269bb5b8 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -86,8 +86,8 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { // Test new normal proposals won't be allowed when disk is full. let old_last_index = cluster.raft_local_state(1, 1).last_index; - let mut rx = cluster.async_put(b"k2", b"v2").unwrap(); - assert_disk_full(&rx.recv_timeout(Duration::from_secs(2)).unwrap()); + let rx = cluster.async_put(b"k2", b"v2").unwrap(); + assert_disk_full(&block_on_timeout(rx, Duration::from_secs(2)).unwrap()); let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); @@ -299,8 +299,8 @@ fn test_majority_disk_full() { } // Normal proposals will be rejected because of majority peers' disk full. - let mut ch = cluster.async_put(b"k2", b"v2").unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); + let ch = cluster.async_put(b"k2", b"v2").unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); // Proposals with special `DiskFullOpt`s can be accepted even if all peers are @@ -310,8 +310,8 @@ fn test_majority_disk_full() { let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); + let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(1)).unwrap(); assert!(!resp.get_header().has_error()); // Reset disk full status for peer 2 and 3. 2 follower reads must success @@ -335,8 +335,8 @@ fn test_majority_disk_full() { let put = new_request(1, epoch.clone(), reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(10)).unwrap(); + let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(10)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![2, 3]); // Peer 2 disk usage changes from already full to almost full. @@ -354,8 +354,8 @@ fn test_majority_disk_full() { let put = new_request(1, epoch, reqs, false); let mut opts = RaftCmdExtraOpts::default(); opts.disk_full_opt = DiskFullOpt::AllowedOnAlmostFull; - let mut ch = cluster.async_request_with_opts(put, opts).unwrap(); - let resp = ch.recv_timeout(Duration::from_secs(1)).unwrap(); + let ch = cluster.async_request_with_opts(put, opts).unwrap(); + let resp = block_on_timeout(ch, Duration::from_secs(1)).unwrap(); assert_eq!(disk_full_stores(&resp), vec![3]); for i in 0..3 { diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 3cc72d44da1..eb6b8a235e1 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -21,7 +21,7 @@ use raft::eraftpb::MessageType; use raftstore::store::*; use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; -use tikv_util::{config::*, time::Instant, HandyRwLock}; +use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is rollback as expected. @@ -1532,7 +1532,7 @@ fn test_retry_pending_prepare_merge_fail() { let mut rx = cluster.async_put(b"k1", b"v11").unwrap(); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); - rx.recv_timeout(Duration::from_millis(200)).unwrap_err(); + block_on_timeout(rx.as_mut(), Duration::from_millis(200)).unwrap_err(); // Then, start merging. PrepareMerge should become pending because applied_index // is smaller than proposed_index. @@ -1546,7 +1546,7 @@ fn test_retry_pending_prepare_merge_fail() { fail::cfg("disk_already_full_peer_1", "return").unwrap(); fail::cfg("disk_already_full_peer_2", "return").unwrap(); fail::remove("on_handle_apply"); - let res = rx.recv_timeout(Duration::from_secs(1)).unwrap(); + let res = block_on_timeout(rx, Duration::from_secs(1)).unwrap(); assert!(!res.get_header().has_error(), "{:?}", res); propose_rx.recv_timeout(Duration::from_secs(2)).unwrap(); diff --git a/tests/integrations/raftstore/test_joint_consensus.rs b/tests/integrations/raftstore/test_joint_consensus.rs index 282d0d0525c..55def7a099b 100644 --- a/tests/integrations/raftstore/test_joint_consensus.rs +++ b/tests/integrations/raftstore/test_joint_consensus.rs @@ -10,7 +10,7 @@ use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use raftstore::Result; use test_raftstore::*; -use tikv_util::{mpsc::future, store::find_peer}; +use tikv_util::{future::block_on_timeout, store::find_peer}; /// Tests multiple confchange commands can be done by one request #[test] @@ -164,24 +164,18 @@ fn test_request_in_joint_state() { // Isolated peer 2, so the old configuation can't reach quorum cluster.add_send_filter(IsolationFilterFactory::new(2)); - let mut rx = cluster + let rx = cluster .async_request(put_request(®ion, 1, b"k3", b"v3")) .unwrap(); - assert_eq!( - rx.recv_timeout(Duration::from_millis(100)), - Err(future::RecvTimeoutError::Timeout) - ); + block_on_timeout(rx, Duration::from_millis(100)).unwrap_err(); cluster.clear_send_filters(); // Isolated peer 3, so the new configuation can't reach quorum cluster.add_send_filter(IsolationFilterFactory::new(3)); - let mut rx = cluster + let rx = cluster .async_request(put_request(®ion, 1, b"k4", b"v4")) .unwrap(); - assert_eq!( - rx.recv_timeout(Duration::from_millis(100)), - Err(future::RecvTimeoutError::Timeout) - ); + block_on_timeout(rx, Duration::from_millis(100)).unwrap_err(); cluster.clear_send_filters(); // Leave joint diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index afc0c9afab4..ceb888a2b22 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -14,7 +14,7 @@ use raftstore::store::{Callback, LocksStatus}; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; -use tikv_util::{config::*, HandyRwLock}; +use tikv_util::{config::*, future::block_on_timeout, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is working as expected in a general condition. @@ -1444,10 +1444,10 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { // The gap is too large, so the previous merge should fail. And this new put // request should be allowed. - let mut res = cluster.async_put(b"k1", b"new_val").unwrap(); + let res = cluster.async_put(b"k1", b"new_val").unwrap(); cluster.clear_send_filters(); - res.recv_timeout(Duration::from_secs(5)).unwrap(); + block_on_timeout(res, Duration::from_secs(5)).unwrap(); assert_eq!(cluster.must_get(b"k1").unwrap(), b"new_val"); } From 437a68d7daff44ad243d24cb5caeee9fc29b3a5a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:14:09 +0800 Subject: [PATCH 0890/1149] storage: avoid duplicated Instant:now (#15489) close tikv/tikv#15490 avoid duplicated Instant:now Signed-off-by: SpadeA-Tang --- src/storage/mod.rs | 171 ++++++++++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 72 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0d4679fbe18..cb4057bfd7e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -97,7 +97,7 @@ use tikv_util::{ deadline::Deadline, future::try_poll, quota_limiter::QuotaLimiter, - time::{duration_to_ms, Instant, ThreadReadId}, + time::{duration_to_ms, duration_to_sec, Instant, ThreadReadId}, }; use tracker::{ clear_tls_tracker_token, set_tls_tracker_token, with_tls_tracker, TrackedFuture, TrackerToken, @@ -645,7 +645,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [key.as_encoded()])?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); // The bypass_locks and access_locks set will be checked at most once. // `TsSet::vec` is more efficient here. @@ -697,12 +697,15 @@ impl Storage { &statistics, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); let read_bytes = key.len() + result @@ -765,7 +768,7 @@ impl Storage { ids: Vec, trackers: Vec, consumer: P, - begin_instant: tikv_util::time::Instant, + begin_instant: Instant, ) -> impl Future> { const CMD: CommandKind = CommandKind::batch_get_command; // all requests in a batch have the same region, epoch, term, replica_read @@ -805,7 +808,7 @@ impl Storage { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC .get(CMD) .observe(requests.len() as f64); - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let read_id = Some(ThreadReadId::new()); let mut statistics = Statistics::default(); let mut req_snaps = vec![]; @@ -1019,7 +1022,7 @@ impl Storage { keys.iter().map(Key::as_encoded), )?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -1086,12 +1089,15 @@ impl Storage { (result, stats) }); metrics::tls_collect_scan_details(CMD, &stats); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); let read_bytes = stats.cf_statistics(CF_DEFAULT).flow_stats.read_bytes + stats.cf_statistics(CF_LOCK).flow_stats.read_bytes @@ -1217,7 +1223,7 @@ impl Storage { if reverse_scan { std::mem::swap(&mut start_key, &mut end_key); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let bypass_locks = TsSet::from_u64s(ctx.take_resolved_locks()); let access_locks = TsSet::from_u64s(ctx.take_committed_locks()); @@ -1296,12 +1302,15 @@ impl Storage { &statistics, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); res.map_err(Error::from).map(|results| { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC @@ -1383,7 +1392,7 @@ impl Storage { // which resolves locks on regions, and boundary of regions will be out of range // of TiDB keys. - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); concurrency_manager.update_max_ts(max_ts); let begin_instant = Instant::now(); @@ -1455,12 +1464,15 @@ impl Storage { &statistics, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(locks) }) @@ -1669,7 +1681,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1704,12 +1716,15 @@ impl Storage { &stats, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); r } } @@ -1776,7 +1791,7 @@ impl Storage { .map_err(Error::from)?; } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let read_id = Some(ThreadReadId::new()); let mut snaps = vec![]; for (mut req, id) in gets.into_iter().zip(ids) { @@ -1845,12 +1860,15 @@ impl Storage { } } + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(()) } .in_resource_metering_tag(resource_tag), @@ -1896,7 +1914,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, &keys)?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -1947,12 +1965,15 @@ impl Storage { KV_COMMAND_KEYREAD_HISTOGRAM_STATIC .get(CMD) .observe(stats.data.flow_stats.read_keys as f64); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(result) } } @@ -2028,7 +2049,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2140,7 +2161,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2205,7 +2226,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2266,7 +2287,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let start_key = F::encode_raw_key_owned(start_key, None); let end_key = F::encode_raw_key_owned(end_key, None); @@ -2314,7 +2335,7 @@ impl Storage { if let Err(e) = deadline.check() { return callback(Err(Error::from(e))); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); if let Err(e) = Self::check_causal_ts_flushed(&mut ctx, CMD).await { return callback(Err(e)); @@ -2400,7 +2421,7 @@ impl Storage { [(Some(&start_key), end_key.as_ref())], )?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2477,12 +2498,15 @@ impl Storage { .get(CMD) .observe(statistics.data.flow_stats.read_keys as f64); metrics::tls_collect_scan_details(CMD, &statistics); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); result } @@ -2542,7 +2566,7 @@ impl Storage { .map(|range| (Some(range.get_start_key()), Some(range.get_end_key()))), )?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2640,12 +2664,15 @@ impl Storage { .get(CMD) .observe(statistics.data.flow_stats.read_keys as f64); metrics::tls_collect_scan_details(CMD, &statistics); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); Ok(result) } } @@ -2690,7 +2717,7 @@ impl Storage { Self::check_api_version(api_version, ctx.api_version, CMD, [&key])?; - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2725,12 +2752,15 @@ impl Storage { &stats, buckets.as_ref(), ); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed_secs()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed_secs()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); r } } @@ -2887,7 +2917,7 @@ impl Storage { range.set_end_key(end_key.into_encoded()); } - let command_duration = tikv_util::time::Instant::now(); + let command_duration = Instant::now(); let snap_ctx = SnapContext { pb_ctx: &ctx, ..Default::default() @@ -2898,7 +2928,7 @@ impl Storage { let store = RawStore::new(snapshot, api_version); let cf = Self::rawkv_cf("", api_version)?; - let begin_instant = tikv_util::time::Instant::now(); + let begin_instant = Instant::now(); let mut stats = Vec::with_capacity(ranges.len()); let ret = store .raw_checksum_ranges(cf, &ranges, &mut stats) @@ -2913,12 +2943,15 @@ impl Storage { buckets.as_ref(), ); }); + let now = Instant::now(); SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(CMD) - .observe(begin_instant.saturating_elapsed().as_secs_f64()); - SCHED_HISTOGRAM_VEC_STATIC - .get(CMD) - .observe(command_duration.saturating_elapsed().as_secs_f64()); + .observe(duration_to_sec( + now.saturating_duration_since(begin_instant), + )); + SCHED_HISTOGRAM_VEC_STATIC.get(CMD).observe(duration_to_sec( + now.saturating_duration_since(command_duration), + )); ret } @@ -3695,7 +3728,7 @@ pub mod test_util { &self, id: u64, res: Result<(Option>, Statistics)>, - _: tikv_util::time::Instant, + _: Instant, _source: String, ) { self.data.lock().unwrap().push(GetResult { @@ -3706,13 +3739,7 @@ pub mod test_util { } impl ResponseBatchConsumer>> for GetConsumer { - fn consume( - &self, - id: u64, - res: Result>>, - _: tikv_util::time::Instant, - _source: String, - ) { + fn consume(&self, id: u64, res: Result>>, _: Instant, _source: String) { self.data.lock().unwrap().push(GetResult { id, res }); } } From 87b2fe35aefc0d12e53ea0a471b5d9a7cb8606c9 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 1 Sep 2023 10:29:09 +0800 Subject: [PATCH 0891/1149] resolved_ts: shrink resolver lock map (#15484) close tikv/tikv#15458 Resolver owns a hash map to tracking locks and unlock events, and so for calculating resolved ts. However, it does not shrink map even after all lock are removed, this may result OOM if there are transactions that modify many rows across many regions. The total memory usage is proportional to the number of modified rows. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/lib.rs | 1 + components/resolved_ts/src/resolver.rs | 129 ++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 3 deletions(-) diff --git a/components/resolved_ts/src/lib.rs b/components/resolved_ts/src/lib.rs index eef1211a580..f9eeb7c8b70 100644 --- a/components/resolved_ts/src/lib.rs +++ b/components/resolved_ts/src/lib.rs @@ -14,6 +14,7 @@ #![feature(box_patterns)] #![feature(result_flattening)] +#![feature(let_chains)] #[macro_use] extern crate tikv_util; diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 1b0a07bf8e2..6bee5efd2f6 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{cmp, collections::BTreeMap, sync::Arc}; +use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; @@ -23,6 +23,8 @@ pub struct Resolver { locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. lock_ts_heap: BTreeMap>>, + // The last shrink time. + last_aggressive_shrink_time: Instant, // The timestamps that guarantees no more commit will happen before. resolved_ts: TimeStamp, // The highest index `Resolver` had been tracked @@ -93,6 +95,7 @@ impl Resolver { resolved_ts: TimeStamp::zero(), locks_by_key: HashMap::default(), lock_ts_heap: BTreeMap::new(), + last_aggressive_shrink_time: Instant::now_coarse(), read_progress, tracked_index: 0, min_ts: TimeStamp::zero(), @@ -161,6 +164,23 @@ impl Resolver { key.heap_size() + std::mem::size_of::() } + fn shrink_ratio(&mut self, ratio: usize, timestamp: Option) { + // HashMap load factor is 87% approximately, leave some margin to avoid + // frequent rehash. + // + // See https://github.com/rust-lang/hashbrown/blob/v0.14.0/src/raw/mod.rs#L208-L220 + const MIN_SHRINK_RATIO: usize = 2; + if self.locks_by_key.capacity() + > self.locks_by_key.len() * cmp::max(MIN_SHRINK_RATIO, ratio) + { + self.locks_by_key.shrink_to_fit(); + } + if let Some(ts) = timestamp && let Some(lock_set) = self.lock_ts_heap.get_mut(&ts) + && lock_set.capacity() > lock_set.len() * cmp::max(MIN_SHRINK_RATIO, ratio) { + lock_set.shrink_to_fit(); + } + } + #[must_use] pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { if let Some(index) = index { @@ -201,13 +221,22 @@ impl Resolver { self.region_id, ); - let entry = self.lock_ts_heap.get_mut(&start_ts); - if let Some(locked_keys) = entry { + let mut shrink_ts = None; + if let Some(locked_keys) = self.lock_ts_heap.get_mut(&start_ts) { + // Only shrink large set, because committing a small transaction is + // fast and shrink adds unnecessary overhead. + const SHRINK_SET_CAPACITY: usize = 256; + if locked_keys.capacity() > SHRINK_SET_CAPACITY { + shrink_ts = Some(start_ts); + } locked_keys.remove(key); if locked_keys.is_empty() { self.lock_ts_heap.remove(&start_ts); } } + // Use a large ratio to amortize the cost of rehash. + let shrink_ratio = 8; + self.shrink_ratio(shrink_ratio, shrink_ts); } /// Try to advance resolved ts. @@ -215,11 +244,20 @@ impl Resolver { /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. pub fn resolve(&mut self, min_ts: TimeStamp, now: Option) -> TimeStamp { + // Use a small ratio to shrink the memory usage aggressively. + const AGGRESSIVE_SHRINK_RATIO: usize = 2; + const AGGRESSIVE_SHRINK_INTERVAL: Duration = Duration::from_secs(10); + if self.last_aggressive_shrink_time.saturating_elapsed() > AGGRESSIVE_SHRINK_INTERVAL { + self.shrink_ratio(AGGRESSIVE_SHRINK_RATIO, None); + self.last_aggressive_shrink_time = Instant::now_coarse(); + } + // The `Resolver` is stopped, not need to advance, just return the current // `resolved_ts` if self.stopped { return self.resolved_ts; } + // Find the min start ts. let min_lock = self.lock_ts_heap.keys().next().cloned(); let has_lock = min_lock.is_some(); @@ -407,4 +445,89 @@ mod tests { drop(resolver); assert_eq!(memory_quota.in_use(), 0); } + + #[test] + fn test_untrack_lock_shrink_ratio() { + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(1, memory_quota); + let mut key = vec![0; 16]; + let mut ts = TimeStamp::default(); + for _ in 0..1000 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + assert!( + resolver.locks_by_key.capacity() >= 1000, + "{}", + resolver.locks_by_key.capacity() + ); + + let mut ts = TimeStamp::default(); + for _ in 0..901 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + // shrink_to_fit may reserve some space in accordance with the resize + // policy, but it is expected to be less than 500. + assert!( + resolver.locks_by_key.capacity() < 500, + "{}, {}", + resolver.locks_by_key.capacity(), + resolver.locks_by_key.len(), + ); + + for _ in 0..99 { + ts.incr(); + key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + assert!( + resolver.locks_by_key.capacity() < 100, + "{}, {}", + resolver.locks_by_key.capacity(), + resolver.locks_by_key.len(), + ); + + // Trigger aggressive shrink. + resolver.last_aggressive_shrink_time = Instant::now_coarse() - Duration::from_secs(600); + resolver.resolve(TimeStamp::new(0), None); + assert!( + resolver.locks_by_key.capacity() == 0, + "{}, {}", + resolver.locks_by_key.capacity(), + resolver.locks_by_key.len(), + ); + } + + #[test] + fn test_untrack_lock_set_shrink_ratio() { + let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); + let mut resolver = Resolver::new(1, memory_quota); + let mut key = vec![0; 16]; + let ts = TimeStamp::new(1); + for i in 0..1000usize { + key[0..8].copy_from_slice(&i.to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + assert!( + resolver.lock_ts_heap[&ts].capacity() >= 1000, + "{}", + resolver.lock_ts_heap[&ts].capacity() + ); + + for i in 0..990usize { + key[0..8].copy_from_slice(&i.to_be_bytes()); + resolver.untrack_lock(&key, None); + } + // shrink_to_fit may reserve some space in accordance with the resize + // policy, but it is expected to be less than 100. + assert!( + resolver.lock_ts_heap[&ts].capacity() < 500, + "{}, {}", + resolver.lock_ts_heap[&ts].capacity(), + resolver.lock_ts_heap[&ts].len(), + ); + } } From 32c030dcdb54e81718bce98b79f056a38cde9a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:45:39 +0800 Subject: [PATCH 0892/1149] raftstore: don't return is_witness while region not found (#15475) close tikv/tikv#15468 Return `RegionNotFound` while cannot find peer in the current store. Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/worker/read.rs | 19 ++++++-- tests/failpoints/cases/test_witness.rs | 47 +++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 0c4641770be..5d6ede9c193 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -824,10 +824,21 @@ where return Ok(None); } - // Check witness - if find_peer_by_id(&delegate.region, delegate.peer_id).map_or(true, |p| p.is_witness) { - TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); - return Err(Error::IsWitness(region_id)); + match find_peer_by_id(&delegate.region, delegate.peer_id) { + // Check witness + Some(peer) => { + if peer.is_witness { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.witness.inc()); + return Err(Error::IsWitness(region_id)); + } + } + // This (rarely) happen in witness disabled clusters while the conf change applied but + // region not removed. We shouldn't return `IsWitness` here because our client back off + // for a long time while encountering that. + None => { + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().reject_reason.no_region.inc()); + return Err(Error::RegionNotFound(region_id)); + } } // Check non-witness hasn't finish applying snapshot yet. diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index 02411ba1b76..33a62f0532b 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -16,6 +16,7 @@ fn test_witness_update_region_in_local_reader() { cluster.run(); let nodes = Vec::from_iter(cluster.get_node_ids()); assert_eq!(nodes.len(), 3); + assert_eq!(nodes[2], 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -64,6 +65,52 @@ fn test_witness_update_region_in_local_reader() { fail::remove("change_peer_after_update_region_store_3"); } +// This case is almost the same as `test_witness_update_region_in_local_reader`, +// but this omitted changing the peer to witness, for ensuring `peer_is_witness` +// won't be returned in a cluster without witnesses. +#[test] +fn test_witness_not_reported_while_disabled() { + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + assert_eq!(nodes[2], 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let peer_on_store1 = find_peer(®ion, nodes[0]).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + let peer_on_store3 = find_peer(®ion, nodes[2]).unwrap().clone(); + + cluster.must_put(b"k0", b"v0"); + + // update region but the peer is not destroyed yet + fail::cfg("change_peer_after_update_region_store_3", "pause").unwrap(); + + cluster + .pd_client + .must_remove_peer(region.get_id(), peer_on_store3.clone()); + + let region = block_on(pd_client.get_region_by_id(1)).unwrap().unwrap(); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_get_cmd(b"k0")], + false, + ); + request.mut_header().set_peer(peer_on_store3); + request.mut_header().set_replica_read(true); + + let resp = cluster + .read(None, request.clone(), Duration::from_millis(100)) + .unwrap(); + assert!(resp.get_header().has_error()); + assert!(!resp.get_header().get_error().has_is_witness()); + fail::remove("change_peer_after_update_region_store_3"); +} + // Test the case witness pull voter_replicated_index when has pending compact // cmd. #[test] From fa3892be7ff7acad80cdac19bbe2f5bb1423f8ac Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 1 Sep 2023 11:54:39 +0800 Subject: [PATCH 0893/1149] server: track grpc threads memory throughput (#15488) ref tikv/tikv#8235 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/server/src/server.rs | 8 ++++++++ components/server/src/server2.rs | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 72f7b936956..8d44890e5a6 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -105,6 +105,7 @@ use tikv::{ Engine, Storage, }, }; +use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; use tikv_util::{ check_environment_variables, config::VersionTrack, @@ -294,6 +295,13 @@ where EnvBuilder::new() .cq_count(config.server.grpc_concurrency) .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) + .after_start(|| { + // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. + unsafe { add_thread_memory_accessor() }; + }) + .before_stop(|| { + remove_thread_memory_accessor(); + }) .build(), ); let pd_client = TikvServerCore::connect_to_pd_cluster( diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 1289ffe848d..2593035618d 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -103,6 +103,7 @@ use tikv::{ Engine, Storage, }, }; +use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; use tikv_util::{ check_environment_variables, config::VersionTrack, @@ -289,6 +290,13 @@ where EnvBuilder::new() .cq_count(config.server.grpc_concurrency) .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) + .after_start(|| { + // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. + unsafe { add_thread_memory_accessor() }; + }) + .before_stop(|| { + remove_thread_memory_accessor(); + }) .build(), ); let pd_client = TikvServerCore::connect_to_pd_cluster( From a56fe6abdccdf98657eb880f1b55792bbabb29ac Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 4 Sep 2023 12:53:13 +0800 Subject: [PATCH 0894/1149] raftstore-v2: fix panic of dynamic changing write-buffer-limit (#15504) close tikv/tikv#15503 fix panic of dynamic changing write-buffer-limit Signed-off-by: SpadeA-Tang --- components/engine_panic/src/db_options.rs | 4 ++++ components/engine_rocks/src/db_options.rs | 8 ++++++++ components/engine_traits/src/db_options.rs | 1 + src/config/mod.rs | 13 ++++++++++--- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/components/engine_panic/src/db_options.rs b/components/engine_panic/src/db_options.rs index c081a5c1d12..05147ca06fb 100644 --- a/components/engine_panic/src/db_options.rs +++ b/components/engine_panic/src/db_options.rs @@ -40,6 +40,10 @@ impl DbOptions for PanicDbOptions { panic!() } + fn get_flush_size(&self) -> Result { + panic!() + } + fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()> { panic!() } diff --git a/components/engine_rocks/src/db_options.rs b/components/engine_rocks/src/db_options.rs index c9ef2cfda98..38587663084 100644 --- a/components/engine_rocks/src/db_options.rs +++ b/components/engine_rocks/src/db_options.rs @@ -100,6 +100,14 @@ impl DbOptions for RocksDbOptions { Ok(()) } + fn get_flush_size(&self) -> Result { + if let Some(m) = self.0.get_write_buffer_manager() { + return Ok(m.flush_size() as u64); + } + + Err(box_err!("write buffer manager not found")) + } + fn set_flush_oldest_first(&mut self, f: bool) -> Result<()> { if let Some(m) = self.0.get_write_buffer_manager() { m.set_flush_oldest_first(f); diff --git a/components/engine_traits/src/db_options.rs b/components/engine_traits/src/db_options.rs index 2c6e9c3d4e8..9713c406978 100644 --- a/components/engine_traits/src/db_options.rs +++ b/components/engine_traits/src/db_options.rs @@ -21,6 +21,7 @@ pub trait DbOptions { fn get_rate_limiter_auto_tuned(&self) -> Option; fn set_rate_limiter_auto_tuned(&mut self, rate_limiter_auto_tuned: bool) -> Result<()>; fn set_flush_size(&mut self, f: usize) -> Result<()>; + fn get_flush_size(&self) -> Result; fn set_flush_oldest_first(&mut self, f: bool) -> Result<()>; fn set_titandb_options(&mut self, opts: &Self::TitanDbOptions); } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8c0c04957b1..be2a52d9b07 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,7 +110,7 @@ const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; -const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: u64 = ReadableSize::mb(32).0; +const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(32); /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; @@ -1406,7 +1406,7 @@ impl DbConfig { .get_or_insert(ReadableSize::mb(4)); self.lockcf .write_buffer_limit - .get_or_insert(ReadableSize::mb(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT)); + .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); } } } @@ -2061,7 +2061,8 @@ impl ConfigManager for DbConfigManger { .drain_filter(|(name, _)| name == "write_buffer_limit") .next() { - self.db.set_flush_size(size.1.into())?; + let size: ReadableSize = size.1.into(); + self.db.set_flush_size(size.0 as usize)?; } if let Some(f) = change @@ -5200,6 +5201,12 @@ mod tests { ReadableSize::mb(128).0 as i64 ); + cfg_controller + .update_config("rocksdb.write-buffer-limit", "10MB") + .unwrap(); + let flush_size = db.get_db_options().get_flush_size().unwrap(); + assert_eq!(flush_size, ReadableSize::mb(10).0); + // update some configs on default cf let cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); assert_eq!(cf_opts.get_disable_auto_compactions(), false); From 280b39c1fa0ec4bf85dae06561f2f792bf826e6a Mon Sep 17 00:00:00 2001 From: qupeng Date: Mon, 4 Sep 2023 15:44:13 +0800 Subject: [PATCH 0895/1149] cdc: enhance deregister protocol (#15485) close tikv/tikv#15487 Signed-off-by: qupeng --- components/cdc/src/endpoint.rs | 97 ++++++++++++++++++++++++++++++++-- components/cdc/src/service.rs | 23 ++++++-- 2 files changed, 112 insertions(+), 8 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 72042bb5aec..969d0cba0d9 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -80,6 +80,11 @@ pub enum Deregister { conn_id: ConnId, request_id: u64, }, + Region { + conn_id: ConnId, + request_id: u64, + region_id: u64, + }, Downstream { conn_id: ConnId, request_id: u64, @@ -112,6 +117,16 @@ impl fmt::Debug for Deregister { .field("conn_id", conn_id) .field("request_id", request_id) .finish(), + Deregister::Region { + ref conn_id, + ref request_id, + ref region_id, + } => de + .field("deregister", &"region") + .field("conn_id", conn_id) + .field("request_id", request_id) + .field("region_id", region_id) + .finish(), Deregister::Downstream { ref conn_id, ref request_id, @@ -583,8 +598,20 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { let conn = self.connections.get_mut(&conn_id).unwrap(); - for (region, downstream) in conn.unsubscribe_request(request_id) { - self.deregister_downstream(region, downstream, None); + for (region_id, downstream) in conn.unsubscribe_request(request_id) { + let err = Some(Error::Other("region not found".into())); + self.deregister_downstream(region_id, downstream, err); + } + } + Deregister::Region { + conn_id, + request_id, + region_id, + } => { + let conn = self.connections.get_mut(&conn_id).unwrap(); + if let Some(downstream) = conn.unsubscribe(request_id, region_id) { + let err = Some(Error::Other("region not found".into())); + self.deregister_downstream(region_id, downstream, err); } } Deregister::Downstream { @@ -1248,13 +1275,12 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable for Endpoint { fn on_timeout(&mut self) { - CDC_ENDPOINT_PENDING_TASKS.set(self.scheduler.pending_tasks() as _); - // Reclaim resolved_region_heap memory. self.resolved_region_heap .borrow_mut() .reset_and_shrink_to(self.capture_regions.len()); + CDC_ENDPOINT_PENDING_TASKS.set(self.scheduler.pending_tasks() as _); CDC_CAPTURED_REGION_COUNT.set(self.capture_regions.len() as i64); CDC_REGION_RESOLVE_STATUS_GAUGE_VEC .with_label_values(&["unresolved"]) @@ -1262,6 +1288,7 @@ impl, E: KvEngine, S: StoreRegionMeta + Send> Runnable CDC_REGION_RESOLVE_STATUS_GAUGE_VEC .with_label_values(&["resolved"]) .set(self.resolved_region_count as _); + if self.min_resolved_ts != TimeStamp::max() { CDC_MIN_RESOLVED_TS_REGION.set(self.min_ts_region_id as i64); CDC_MIN_RESOLVED_TS.set(self.min_resolved_ts.physical() as i64); @@ -2841,5 +2868,67 @@ mod tests { })); assert_eq!(suite.connections[&conn_id].downstreams_count(), 0); assert_eq!(suite.capture_regions.len(), 0); + for _ in 0..2 { + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_region_not_found() + }) + }); + assert!(check); + } + + // Resubscribe the region. + suite.add_region(2, 100); + for i in 1..=2 { + req.set_request_id(1); + req.set_region_id(i); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.connections[&conn_id].downstreams_count(), i as usize); + } + + // Deregister regions one by one in the request. + suite.run(Task::Deregister(Deregister::Region { + conn_id, + request_id: 1, + region_id: 1, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 1); + assert_eq!(suite.capture_regions.len(), 1); + + suite.run(Task::Deregister(Deregister::Region { + conn_id, + request_id: 1, + region_id: 2, + })); + assert_eq!(suite.connections[&conn_id].downstreams_count(), 0); + assert_eq!(suite.capture_regions.len(), 0); + + for _ in 0..2 { + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + let check = matches!(cdc_event.0, CdcEvent::Event(e) if { + matches!(e.event, Some(Event_oneof_event::Error(ref err)) if { + err.has_region_not_found() + }) + }); + assert!(check); + } } } diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index 7478e3afbad..7cbf268f2b7 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -304,6 +304,13 @@ impl Service { scheduler.schedule(task).map_err(|e| format!("{:?}", e)) } + // ### Command types: + // * Register registers a region. 1) both `request_id` and `region_id` must be + // specified; 2) `request_id` can be 0 but `region_id` can not. + // * Deregister deregisters some regions in one same `request_id` or just one + // region. 1) if both `request_id` and `region_id` are specified, just + // deregister the region; 2) if only `request_id` is specified, all region + // subscriptions with the same `request_id` will be deregistered. fn handle_request( scheduler: &Scheduler, peer: &str, @@ -361,10 +368,18 @@ impl Service { request: ChangeDataRequest, conn_id: ConnId, ) -> Result<(), String> { - let task = Task::Deregister(Deregister::Request { - conn_id, - request_id: request.request_id, - }); + let task = if request.region_id != 0 { + Task::Deregister(Deregister::Region { + conn_id, + request_id: request.request_id, + region_id: request.region_id, + }) + } else { + Task::Deregister(Deregister::Request { + conn_id, + request_id: request.request_id, + }) + }; scheduler.schedule(task).map_err(|e| format!("{:?}", e)) } From 1cd6dda7d351ed969811ebdea1a52f30c97d7094 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 4 Sep 2023 16:14:15 +0800 Subject: [PATCH 0896/1149] raftstore-v2: reuse failpoint tests in test_early_apply.rs (#15501) ref tikv/tikv#15409 reuse failpoint tests in test_early_apply Signed-off-by: SpadeA-Tang --- components/test_raftstore/src/util.rs | 8 +++---- tests/failpoints/cases/test_early_apply.rs | 22 +++++++++++++------ tests/failpoints/cases/test_split_region.rs | 2 +- tests/failpoints/cases/test_stale_read.rs | 2 +- .../raftstore/test_early_apply.rs | 4 ++-- .../integrations/raftstore/test_lease_read.rs | 2 +- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index f63c69f9631..e88df1fb0ca 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -668,11 +668,11 @@ pub fn create_test_engine( ) } -pub fn configure_for_request_snapshot(cluster: &mut Cluster) { +pub fn configure_for_request_snapshot(config: &mut Config) { // We don't want to generate snapshots due to compact log. - cluster.cfg.raft_store.raft_log_gc_threshold = 1000; - cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); - cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); + config.raft_store.raft_log_gc_threshold = 1000; + config.raft_store.raft_log_gc_count_limit = Some(1000); + config.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); } pub fn configure_for_hibernate(config: &mut Config) { diff --git a/tests/failpoints/cases/test_early_apply.rs b/tests/failpoints/cases/test_early_apply.rs index a194ef74d8f..bf403fb4668 100644 --- a/tests/failpoints/cases/test_early_apply.rs +++ b/tests/failpoints/cases/test_early_apply.rs @@ -7,14 +7,16 @@ use std::sync::{ use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; // Test if a singleton can apply a log before persisting it. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_singleton_cannot_early_apply() { - let mut cluster = new_node_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.pd_client.disable_default_operator(); // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run(); // Put one key first to cache leader. @@ -33,13 +35,14 @@ fn test_singleton_cannot_early_apply() { must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_multi_early_apply() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.cfg.raft_store.store_batch_system.pool_size = 1; // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run_conf_change(); // Check mixed regions can be scheduled correctly. @@ -68,9 +71,11 @@ fn test_multi_early_apply() { })), )); cluster.async_put(b"k4", b"v4").unwrap(); - // Sleep a while so that follower will send append response. + // Sleep a while so that follower will send append response sleep_ms(100); cluster.async_put(b"k11", b"v22").unwrap(); + // Sleep a while so that follower will send append response. + sleep_ms(100); // Now the store thread of store 1 pauses on `store_1_fp`. // Set `store_1_fp` again to make this store thread does not pause on it. // Then leader 1 will receive the append response and commit the log. @@ -92,6 +97,9 @@ fn test_multi_early_apply() { /// the peer to fix this issue. /// For simplicity, this test uses region merge to ensure that the apply state /// will be written to kv db before crash. +/// +/// Note: partitioned-raft-kv does not need this due to change in disk +/// persistence logic #[test] fn test_early_apply_yield_followed_with_many_entries() { let mut cluster = new_node_cluster(0, 3); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index dfd7002495c..ed01386b528 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -752,7 +752,7 @@ impl Filter for CollectSnapshotFilter { #[test] fn test_split_duplicated_batch() { let mut cluster = new_node_cluster(0, 3); - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); // Disable raft log gc in this test case. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); // Use one thread to make it more possible to be fetched into one batch. diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index 523bb54f7cb..a9c6fa5d6e6 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -325,7 +325,7 @@ fn test_read_index_when_transfer_leader_2() { // Increase the election tick to make this test case running reliably. configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); // Stop log compaction to transfer leader with filter easier. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); let max_lease = Duration::from_secs(2); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index b30a861e2fe..44537e8b409 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -109,7 +109,7 @@ fn test_early_apply(mode: DataLost) { let mut cluster = new_node_cluster(0, 3); cluster.pd_client.disable_default_operator(); // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run(); if mode == DataLost::LeaderCommit || mode == DataLost::AllLost { cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -175,7 +175,7 @@ fn test_update_internal_apply_index() { let mut cluster = new_node_cluster(0, 4); cluster.pd_client.disable_default_operator(); // So compact log will not be triggered automatically. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.run(); cluster.must_transfer_leader(1, new_peer(3, 3)); cluster.must_put(b"k1", b"v1"); diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index 60c87fd4e00..abf17e01e9d 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -481,7 +481,7 @@ fn test_read_index_stale_in_suspect_lease() { configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); let max_lease = Duration::from_secs(2); // Stop log compaction to transfer leader with filter easier. - configure_for_request_snapshot(&mut cluster); + configure_for_request_snapshot(&mut cluster.cfg); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); cluster.pd_client.disable_default_operator(); From 640143a2daba90bfcc9a3848d19887a7a2f39170 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Mon, 4 Sep 2023 17:48:43 +0800 Subject: [PATCH 0897/1149] raftstore: region initial size depends on the split resource . (#15456) close tikv/tikv#15457 there are three triggers will split the regions: 1. load split include sizekeys, load etc. In this cases, the new region should contains the data after split. 2. tidb split tables or partition table, such like `create table test.t1(id int,b int) shard_row_id_bits=4 partition by hash(id) partitions 2000`. In this cases , the new region shouldn't contains any data after split. Signed-off-by: bufferflies <1045931706@qq.com> --- Cargo.lock | 2 +- .../src/operation/command/admin/split.rs | 33 ++++++++-- components/raftstore-v2/src/operation/pd.rs | 2 + components/raftstore-v2/src/router/imp.rs | 2 +- components/raftstore-v2/src/router/message.rs | 3 + components/raftstore-v2/src/worker/pd/mod.rs | 11 +++- .../raftstore-v2/src/worker/pd/region.rs | 1 + .../raftstore-v2/src/worker/pd/split.rs | 8 +++ components/raftstore/src/router.rs | 1 + components/raftstore/src/store/fsm/apply.rs | 7 ++ components/raftstore/src/store/fsm/peer.rs | 43 ++++++++++--- components/raftstore/src/store/msg.rs | 1 + components/raftstore/src/store/worker/pd.rs | 19 ++++++ components/test_raftstore/src/cluster.rs | 1 + src/server/raftkv/raft_extension.rs | 1 + src/server/raftkv2/raft_extension.rs | 2 +- tests/failpoints/cases/test_split_region.rs | 64 +++++++++++++++++++ 17 files changed, 181 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 162d1f3ae07..4cd0882628b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2979,7 +2979,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#7b612d935bf96f9daf7a537db379bcc88b4644e0" +source = "git+https://github.com/pingcap/kvproto.git#ecdbf1f8c130089392a9bb5f86f7577deddfbed5" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index c744c1b9161..0f9cae7218d 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -75,6 +75,9 @@ pub struct SplitResult { // The index of the derived region in `regions` pub derived_index: usize, pub tablet_index: u64, + // new regions will share the region size if it's true. + // otherwise, the new region's size will be 0. + pub share_source_region_size: bool, // Hack: in common case we should use generic, but split is an infrequent // event that performance is not critical. And using `Any` can avoid polluting // all existing code. @@ -148,6 +151,9 @@ pub struct RequestSplit { pub epoch: RegionEpoch, pub split_keys: Vec>, pub source: Cow<'static, str>, + // new regions will share the region size if it's true. + // otherwise, the new region's size will be 0. + pub share_source_region_size: bool, } #[derive(Debug)] @@ -235,6 +241,7 @@ impl Peer { { return true; } + fail_point!("on_split_region_check_tick", |_| true); if ctx.schedulers.split_check.is_busy() { return false; } @@ -336,7 +343,7 @@ impl Peer { ch.set_result(cmd_resp::new_error(e)); return; } - self.ask_batch_split_pd(ctx, rs.split_keys, ch); + self.ask_batch_split_pd(ctx, rs.split_keys, rs.share_source_region_size, ch); } pub fn on_request_half_split( @@ -479,6 +486,7 @@ impl Apply { let derived_req = &[derived_req]; let right_derive = split_reqs.get_right_derive(); + let share_source_region_size = split_reqs.get_share_source_region_size(); let reqs = if right_derive { split_reqs.get_requests().iter().chain(derived_req) } else { @@ -615,6 +623,7 @@ impl Apply { derived_index, tablet_index: log_index, tablet: Box::new(tablet), + share_source_region_size, }), )) } @@ -665,6 +674,7 @@ impl Peer { fail_point!("on_split", self.peer().get_store_id() == 3, |_| {}); let derived = &res.regions[res.derived_index]; + let share_source_region_size = res.share_source_region_size; let region_id = derived.get_id(); let region_locks = self.txn_context().split(&res.regions, derived); @@ -695,8 +705,14 @@ impl Peer { let new_region_count = res.regions.len() as u64; let control = self.split_flow_control_mut(); - let estimated_size = control.approximate_size.map(|v| v / new_region_count); - let estimated_keys = control.approximate_keys.map(|v| v / new_region_count); + // if share_source_region_size is true, it means the new region contains any + // data from the origin region. + let mut share_size = None; + let mut share_keys = None; + if share_source_region_size { + share_size = control.approximate_size.map(|v| v / new_region_count); + share_keys = control.approximate_keys.map(|v| v / new_region_count); + } self.post_split(); @@ -714,8 +730,11 @@ impl Peer { // After split, the peer may need to update its metrics. let control = self.split_flow_control_mut(); control.may_skip_split_check = false; - control.approximate_size = estimated_size; - control.approximate_keys = estimated_keys; + if share_source_region_size { + control.approximate_size = share_size; + control.approximate_keys = share_keys; + } + self.add_pending_tick(PeerTick::SplitRegionCheck); } self.storage_mut().set_has_dirty_data(true); @@ -760,8 +779,8 @@ impl Peer { derived_region_id: region_id, check_split: last_region_id == new_region_id, scheduled: false, - approximate_size: estimated_size, - approximate_keys: estimated_keys, + approximate_size: share_size, + approximate_keys: share_keys, locks, })); diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 817b3aa6eb6..9bce8f3ba02 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -215,6 +215,7 @@ impl Peer { &self, ctx: &StoreContext, split_keys: Vec>, + share_source_region_size: bool, ch: CmdResChannel, ) { let task = pd::Task::AskBatchSplit { @@ -222,6 +223,7 @@ impl Peer { split_keys, peer: self.peer().clone(), right_derive: ctx.cfg.right_derive_when_split, + share_source_region_size, ch, }; if let Err(e) = ctx.schedulers.pd.schedule(task) { diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 9c6cca96ae4..23a8a3c7d4e 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -58,7 +58,7 @@ impl raftstore::coprocessor::StoreHandle for Store split_keys: Vec>, source: Cow<'static, str>, ) { - let (msg, _) = PeerMsg::request_split(region_epoch, split_keys, source.to_string()); + let (msg, _) = PeerMsg::request_split(region_epoch, split_keys, source.to_string(), true); let res = self.send(region_id, msg); if let Err(e) = res { warn!( diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index f09314b4f17..2d364af44e1 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -315,6 +315,7 @@ impl PeerMsg { epoch: metapb::RegionEpoch, split_keys: Vec>, source: String, + share_source_region_size: bool, ) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); ( @@ -323,6 +324,7 @@ impl PeerMsg { epoch, split_keys, source: source.into(), + share_source_region_size, }, ch, }, @@ -344,6 +346,7 @@ impl PeerMsg { epoch, split_keys, source: source.into(), + share_source_region_size: false, }, ch, }, diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index f89ea75b604..061a5ad5126 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -70,6 +70,7 @@ pub enum Task { split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, ch: CmdResChannel, }, ReportBatchSplit { @@ -324,7 +325,15 @@ where peer, right_derive, ch, - } => self.handle_ask_batch_split(region, split_keys, peer, right_derive, ch), + share_source_region_size, + } => self.handle_ask_batch_split( + region, + split_keys, + peer, + right_derive, + share_source_region_size, + ch, + ), Task::ReportBatchSplit { regions } => self.handle_report_batch_split(regions), Task::AutoSplit { split_infos } => self.handle_auto_split(split_infos), Task::UpdateMaxTimestamp { diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index e825dd54c32..763e12fff07 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -288,6 +288,7 @@ where epoch, split_keys: split_region.take_keys().into(), source: "pd".into(), + share_source_region_size: false, }, ch, } diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index bf13e01120a..7fec5a31bb6 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -17,10 +17,13 @@ fn new_batch_split_region_request( split_keys: Vec>, ids: Vec, right_derive: bool, + share_source_region_size: bool, ) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::BatchSplit); req.mut_splits().set_right_derive(right_derive); + req.mut_splits() + .set_share_source_region_size(share_source_region_size); let mut requests = Vec::with_capacity(ids.len()); for (mut id, key) in ids.into_iter().zip(split_keys) { let mut split = SplitRequest::default(); @@ -46,6 +49,7 @@ where split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, ch: CmdResChannel, ) { Self::ask_batch_split_imp( @@ -57,6 +61,7 @@ where split_keys, peer, right_derive, + share_source_region_size, Some(ch), ); } @@ -70,6 +75,7 @@ where split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, ch: Option, ) { if split_keys.is_empty() { @@ -98,6 +104,7 @@ where split_keys, resp.take_ids().into(), right_derive, + share_source_region_size, ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); @@ -148,6 +155,7 @@ where vec![split_key], split_info.peer, true, + false, None, ); // Try to split the region on half within the given key diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 3a76a5ad26f..09f389a2230 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -331,6 +331,7 @@ impl crate::coprocessor::StoreHandle for RaftRoute split_keys, callback: Callback::None, source, + share_source_region_size: true, }, ) { warn!( diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 0bc1ccf7d85..c170e5a35f9 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -272,6 +272,7 @@ pub enum ExecResult { regions: Vec, derived: Region, new_split_regions: HashMap, + share_source_region_size: bool, }, PrepareMerge { region: Region, @@ -2516,6 +2517,9 @@ where admin_req .mut_splits() .set_right_derive(split.get_right_derive()); + admin_req + .mut_split() + .set_share_source_region_size(split.get_share_source_region_size()); admin_req.mut_splits().mut_requests().push(split); // This method is executed only when there are unapplied entries after being // restarted. So there will be no callback, it's OK to return a response @@ -2560,6 +2564,7 @@ where derived.mut_region_epoch().set_version(new_version); let right_derive = split_reqs.get_right_derive(); + let share_source_region_size = split_reqs.get_share_source_region_size(); let mut regions = Vec::with_capacity(new_region_cnt + 1); // Note that the split requests only contain ids for new regions, so we need // to handle new regions and old region separately. @@ -2724,6 +2729,7 @@ where regions, derived, new_split_regions, + share_source_region_size, }), )) } @@ -7088,6 +7094,7 @@ mod tests { regions, derived: _, new_split_regions: _, + share_source_region_size: _, } = apply_res.exec_res.front().unwrap() { let r8 = regions.get(0).unwrap(); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 62a3a2650de..9f7934e806e 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1049,8 +1049,15 @@ where split_keys, callback, source, + share_source_region_size, } => { - self.on_prepare_split_region(region_epoch, split_keys, callback, &source); + self.on_prepare_split_region( + region_epoch, + split_keys, + callback, + &source, + share_source_region_size, + ); } CasualMessage::ComputeHashResult { index, @@ -4042,6 +4049,7 @@ where derived: metapb::Region, regions: Vec, new_split_regions: HashMap, + share_source_region_size: bool, ) { fail_point!("on_split", self.ctx.store_id() == 3, |_| {}); @@ -4063,8 +4071,15 @@ where // Roughly estimate the size and keys for new regions. let new_region_count = regions.len() as u64; - let estimated_size = self.fsm.peer.approximate_size.map(|v| v / new_region_count); - let estimated_keys = self.fsm.peer.approximate_keys.map(|v| v / new_region_count); + let mut share_size = None; + let mut share_keys = None; + // if share_source_region_size is true, it means the new region contains any + // data from the origin region + if share_source_region_size { + share_size = self.fsm.peer.approximate_size.map(|v| v / new_region_count); + share_keys = self.fsm.peer.approximate_keys.map(|v| v / new_region_count); + } + let mut meta = self.ctx.store_meta.lock().unwrap(); meta.set_region( &self.ctx.coprocessor_host, @@ -4079,8 +4094,10 @@ where let is_leader = self.fsm.peer.is_leader(); if is_leader { - self.fsm.peer.approximate_size = estimated_size; - self.fsm.peer.approximate_keys = estimated_keys; + if share_source_region_size { + self.fsm.peer.approximate_size = share_size; + self.fsm.peer.approximate_keys = share_keys; + } self.fsm.peer.heartbeat_pd(self.ctx); // Notify pd immediately to let it update the region meta. info!( @@ -4215,8 +4232,8 @@ where new_peer.has_ready |= campaigned; if is_leader { - new_peer.peer.approximate_size = estimated_size; - new_peer.peer.approximate_keys = estimated_keys; + new_peer.peer.approximate_size = share_size; + new_peer.peer.approximate_keys = share_keys; *new_peer.peer.txn_ext.pessimistic_locks.write() = locks; // The new peer is likely to become leader, send a heartbeat immediately to // reduce client query miss. @@ -5043,7 +5060,13 @@ where derived, regions, new_split_regions, - } => self.on_ready_split_region(derived, regions, new_split_regions), + share_source_region_size, + } => self.on_ready_split_region( + derived, + regions, + new_split_regions, + share_source_region_size, + ), ExecResult::PrepareMerge { region, state } => { self.on_ready_prepare_merge(region, state) } @@ -5768,7 +5791,7 @@ where return; } - fail_point!("on_split_region_check_tick"); + fail_point!("on_split_region_check_tick", |_| {}); self.register_split_region_check_tick(); // To avoid frequent scan, we only add new scan tasks if all previous tasks @@ -5828,6 +5851,7 @@ where split_keys: Vec>, cb: Callback, source: &str, + share_source_region_size: bool, ) { info!( "on split"; @@ -5873,6 +5897,7 @@ where split_keys, peer: self.fsm.peer.peer.clone(), right_derive: self.ctx.cfg.right_derive_when_split, + share_source_region_size, callback: cb, }; if let Err(ScheduleError::Stopped(t)) = self.ctx.pd_scheduler.schedule(task) { diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 1ed8934e0f0..64c5be6d7e1 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -545,6 +545,7 @@ pub enum CasualMessage { split_keys: Vec>, callback: Callback, source: Cow<'static, str>, + share_source_region_size: bool, }, /// Hash result of ComputeHash command. diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index e8c8e2f575b..32fbdbc3145 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -141,6 +141,7 @@ where peer: metapb::Peer, // If true, right Region derives origin region_id. right_derive: bool, + share_source_region_size: bool, callback: Callback, }, AskBatchSplit { @@ -149,6 +150,7 @@ where peer: metapb::Peer, // If true, right Region derives origin region_id. right_derive: bool, + share_source_region_size: bool, callback: Callback, }, AutoSplit { @@ -1066,6 +1068,7 @@ where split_key: Vec, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, callback: Callback, task: String, ) { @@ -1087,6 +1090,7 @@ where resp.get_new_region_id(), resp.take_new_peer_ids(), right_derive, + share_source_region_size, ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); @@ -1121,6 +1125,7 @@ where mut split_keys: Vec>, peer: metapb::Peer, right_derive: bool, + share_source_region_size: bool, callback: Callback, task: String, remote: Remote, @@ -1146,6 +1151,7 @@ where split_keys, resp.take_ids().into(), right_derive, + share_source_region_size, ); let region_id = region.get_id(); let epoch = region.take_region_epoch(); @@ -1174,6 +1180,7 @@ where split_key: split_keys.pop().unwrap(), peer, right_derive, + share_source_region_size, callback, }; if let Err(ScheduleError::Stopped(t)) = scheduler.schedule(task) { @@ -1645,6 +1652,7 @@ where split_keys: split_region.take_keys().into(), callback: Callback::None, source: "pd".into(), + share_source_region_size: false, } } else { CasualMessage::HalfSplitRegion { @@ -2048,12 +2056,14 @@ where split_key, peer, right_derive, + share_source_region_size, callback, } => self.handle_ask_split( region, split_key, peer, right_derive, + share_source_region_size, callback, String::from("ask_split"), ), @@ -2062,6 +2072,7 @@ where split_keys, peer, right_derive, + share_source_region_size, callback, } => Self::handle_ask_batch_split( self.router.clone(), @@ -2071,6 +2082,7 @@ where split_keys, peer, right_derive, + share_source_region_size, callback, String::from("batch_split"), self.remote.clone(), @@ -2095,6 +2107,7 @@ where vec![split_key], split_info.peer, true, + false, Callback::None, String::from("auto_split"), remote.clone(), @@ -2385,6 +2398,7 @@ fn new_split_region_request( new_region_id: u64, peer_ids: Vec, right_derive: bool, + share_source_region_size: bool, ) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::Split); @@ -2392,6 +2406,8 @@ fn new_split_region_request( req.mut_split().set_new_region_id(new_region_id); req.mut_split().set_new_peer_ids(peer_ids); req.mut_split().set_right_derive(right_derive); + req.mut_split() + .set_share_source_region_size(share_source_region_size); req } @@ -2399,10 +2415,13 @@ fn new_batch_split_region_request( split_keys: Vec>, ids: Vec, right_derive: bool, + share_source_region_size: bool, ) -> AdminRequest { let mut req = AdminRequest::default(); req.set_cmd_type(AdminCmdType::BatchSplit); req.mut_splits().set_right_derive(right_derive); + req.mut_splits() + .set_share_source_region_size(share_source_region_size); let mut requests = Vec::with_capacity(ids.len()); for (mut id, key) in ids.into_iter().zip(split_keys) { let mut split = SplitRequest::default(); diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index e65028fe968..26fa2a47d5f 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1463,6 +1463,7 @@ impl Cluster { split_keys: vec![split_key], callback: cb, source: "test".into(), + share_source_region_size: false, }, ) .unwrap(); diff --git a/src/server/raftkv/raft_extension.rs b/src/server/raftkv/raft_extension.rs index d3178842489..733d60c838c 100644 --- a/src/server/raftkv/raft_extension.rs +++ b/src/server/raftkv/raft_extension.rs @@ -121,6 +121,7 @@ where split_keys, callback: raftstore::store::Callback::write(cb), source: source.into(), + share_source_region_size: false, }; let res = self.router.send_casual_msg(region_id, req); Box::pin(async move { diff --git a/src/server/raftkv2/raft_extension.rs b/src/server/raftkv2/raft_extension.rs index f2f433999b9..f6bb66e9e11 100644 --- a/src/server/raftkv2/raft_extension.rs +++ b/src/server/raftkv2/raft_extension.rs @@ -71,7 +71,7 @@ impl tikv_kv::RaftExtension for Extension split_keys: Vec>, source: String, ) -> futures::future::BoxFuture<'static, tikv_kv::Result>> { - let (msg, sub) = PeerMsg::request_split(region_epoch, split_keys, source); + let (msg, sub) = PeerMsg::request_split(region_epoch, split_keys, source, true); let res = self.router.check_send(region_id, msg); Box::pin(async move { res?; diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index ed01386b528..65c50793d7a 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -17,6 +17,7 @@ use kvproto::{ Mutation, Op, PessimisticLockRequest, PrewriteRequest, PrewriteRequestPessimisticAction::*, }, metapb::Region, + pdpb::CheckPolicy, raft_serverpb::{PeerState, RaftMessage}, tikvpb::TikvClient, }; @@ -31,6 +32,7 @@ use raftstore::{ Result, }; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{ config::{ReadableDuration, ReadableSize}, @@ -346,6 +348,68 @@ impl Filter for PrevoteRangeFilter { } } +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_region_size_after_split() { + let mut cluster = new_cluster(0, 1); + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); + let region_max_size = 1440; + let region_split_size = 960; + cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let _r = cluster.run_conf_change(); + + // insert 20 key value pairs into the cluster. + // from 000000001 to 000000020 + let mut range = 1..; + put_till_size(&mut cluster, region_max_size - 100, &mut range); + sleep_ms(100); + // disable check split. + fail::cfg("on_split_region_check_tick", "return").unwrap(); + let max_key = put_till_size(&mut cluster, region_max_size, &mut range); + // split by use key, split region 1 to region 1 and region 2. + // region 1: ["000000010",""] + // region 2: ["","000000010") + let region = pd_client.get_region(&max_key).unwrap(); + cluster.must_split(®ion, b"000000010"); + let size = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert!(size >= region_max_size - 100, "{}", size); + + let region = pd_client.get_region(b"000000009").unwrap(); + let size1 = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert_eq!(0, size1, "{}", size1); + + // split region by size check, the region 1 will be split to region 1 and region + // 3. and the region3 will contains one half region size data. + let region = pd_client.get_region(&max_key).unwrap(); + pd_client.split_region(region.clone(), CheckPolicy::Scan, vec![]); + sleep_ms(200); + let size2 = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert!(size > size2, "{}:{}", size, size2); + fail::remove("on_split_region_check_tick"); + + let region = pd_client.get_region(b"000000010").unwrap(); + let size3 = cluster + .pd_client + .get_region_approximate_size(region.get_id()) + .unwrap_or_default(); + assert!(size3 > 0, "{}", size3); +} + // Test if a peer is created from splitting when another initialized peer with // the same region id has already existed. In previous implementation, it can be // created and panic will happen because there are two initialized peer with the From 02061bec4b8c2520eb2d5b003c064e3cd1a76a21 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 5 Sep 2023 15:09:43 +0800 Subject: [PATCH 0898/1149] raftstore-v2: limit the flush times during server stop (#15511) ref tikv/tikv#15461 limit the flush times during server stop Signed-off-by: SpadeA-Tang --- components/engine_traits/src/flush.rs | 2 +- .../src/operation/ready/apply_trace.rs | 15 ++++++++-- .../integrations/raftstore/test_bootstrap.rs | 30 +++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index d0f9f892f34..9344e84bb4e 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -234,7 +234,7 @@ impl PersistenceListener { /// /// `largest_seqno` should be the largest seqno of the generated file. pub fn on_flush_completed(&self, cf: &str, largest_seqno: u64, file_no: u64) { - fail_point!("on_flush_completed"); + fail_point!("on_flush_completed", |_| {}); // Maybe we should hook the compaction to avoid the file is compacted before // being recorded. let offset = data_cf_offset(cf); diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index d4743448d07..1601e1f01dd 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -46,7 +46,7 @@ use kvproto::{ use raftstore::store::{ util, ReadTask, TabletSnapManager, WriteTask, RAFT_INIT_LOG_INDEX, RAFT_INIT_LOG_TERM, }; -use slog::{info, trace, Logger}; +use slog::{info, trace, warn, Logger}; use tikv_util::{box_err, slog_panic, worker::Scheduler}; use crate::{ @@ -619,7 +619,18 @@ impl Peer { // flush the oldest cf one by one until we are under the replay count threshold loop { let replay_count = self.storage().estimate_replay_count(); - if replay_count < flush_threshold { + if replay_count < flush_threshold || tried_count == 3 { + // Ideally, the replay count should be 0 after three flush_oldest_cf. If not, + // there may exist bug, but it's not desireable to block here, so we at most try + // three times. + if replay_count >= flush_threshold && tried_count == 3 { + warn!( + self.logger, + "after three flush_oldest_cf, the expected replay count still exceeds the threshold"; + "replay_count" => replay_count, + "threshold" => flush_threshold, + ); + } if flushed { let admin_flush = self.storage_mut().apply_trace_mut().admin.flushed; let (_, _, tablet_index) = ctx diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index b43a3d00d16..056641e1e3f 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -252,6 +252,36 @@ fn test_flush_before_stop() { .unwrap(); } +// test flush_before_close will not flush forever +#[test] +fn test_flush_before_stop2() { + use test_raftstore_v2::*; + + let mut cluster = new_server_cluster(0, 3); + cluster.run(); + + fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + fail::cfg("on_flush_completed", "return").unwrap(); + + for i in 0..20 { + let key = format!("k{:03}", i); + cluster.must_put_cf(CF_WRITE, key.as_bytes(), b"val"); + cluster.must_put_cf(CF_LOCK, key.as_bytes(), b"val"); + } + + let router = cluster.get_router(1).unwrap(); + let raft_engine = cluster.get_raft_engine(1); + + let (tx, rx) = sync_channel(1); + let msg = PeerMsg::FlushBeforeClose { tx }; + router.force_send(1, msg).unwrap(); + + rx.recv().unwrap(); + + let admin_flush = raft_engine.get_flushed_index(1, CF_RAFT).unwrap().unwrap(); + assert!(admin_flush < 10); +} + // We cannot use a flushed index to call `maybe_advance_admin_flushed` // consider a case: // 1. lock `k` with index 6 From 1c21d07f2bfb181993838f2ae3ed34dceff1b6cb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Sep 2023 15:41:11 +0800 Subject: [PATCH 0899/1149] resolved_ts: track pending lock memory usage (#15452) ref tikv/tikv#14864 * Fix resolved ts OOM caused by adding large txns locks to `ResolverStatus`. * Add initial scan backoff duration metrics. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Connor --- components/resolved_ts/src/endpoint.rs | 340 +++++++++++------- components/resolved_ts/src/metrics.rs | 6 + components/resolved_ts/src/resolver.rs | 4 +- components/resolved_ts/src/scanner.rs | 4 +- .../resolved_ts/tests/failpoints/mod.rs | 45 +++ metrics/grafana/tikv_details.json | 73 ++++ 6 files changed, 339 insertions(+), 133 deletions(-) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 3c1ad9d8c8d..fc3e24de1e4 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -28,7 +28,7 @@ use raftstore::{ use security::SecurityManager; use tikv::config::ResolvedTsConfig; use tikv_util::{ - memory::MemoryQuota, + memory::{HeapSize, MemoryQuota}, warn, worker::{Runnable, RunnableWithTimer, Scheduler}, }; @@ -41,7 +41,7 @@ use crate::{ metrics::*, resolver::Resolver, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, - Error, Result, + Error, Result, ON_DROP_WARN_HEAP_SIZE, }; /// grace period for logging safe-ts and resolved-ts gap in slow log @@ -53,10 +53,102 @@ enum ResolverStatus { tracked_index: u64, locks: Vec, cancelled: Arc, + memory_quota: Arc, }, Ready, } +impl Drop for ResolverStatus { + fn drop(&mut self) { + let ResolverStatus::Pending { + locks, + memory_quota, + .. + } = self else { + return; + }; + if locks.is_empty() { + return; + } + + // Free memory quota used by pending locks and unlocks. + let mut bytes = 0; + let num_locks = locks.len(); + for lock in locks { + bytes += lock.heap_size(); + } + if bytes > ON_DROP_WARN_HEAP_SIZE { + warn!("drop huge ResolverStatus"; + "bytes" => bytes, + "num_locks" => num_locks, + "memory_quota_in_use" => memory_quota.in_use(), + "memory_quota_capacity" => memory_quota.capacity(), + ); + } + memory_quota.free(bytes); + } +} + +impl ResolverStatus { + fn push_pending_lock(&mut self, lock: PendingLock, region_id: u64) -> Result<()> { + let ResolverStatus::Pending { + locks, + memory_quota, + .. + } = self else { + panic!("region {:?} resolver has ready", region_id) + }; + // Check if adding a new lock or unlock will exceed the memory + // quota. + if !memory_quota.alloc(lock.heap_size()) { + fail::fail_point!("resolved_ts_on_pending_locks_memory_quota_exceeded"); + return Err(Error::MemoryQuotaExceeded); + } + locks.push(lock); + Ok(()) + } + + fn update_tracked_index(&mut self, index: u64, region_id: u64) { + let ResolverStatus::Pending { + tracked_index, + .. + } = self else { + panic!("region {:?} resolver has ready", region_id) + }; + assert!( + *tracked_index < index, + "region {}, tracked_index: {}, incoming index: {}", + region_id, + *tracked_index, + index + ); + *tracked_index = index; + } + + fn drain_pending_locks( + &mut self, + region_id: u64, + ) -> (u64, impl Iterator + '_) { + let ResolverStatus::Pending { + locks, + memory_quota, + tracked_index, + .. + } = self else { + panic!("region {:?} resolver has ready", region_id) + }; + // Must take locks, otherwise it may double free memory quota on drop. + let locks = std::mem::take(locks); + ( + *tracked_index, + locks.into_iter().map(|lock| { + memory_quota.free(lock.heap_size()); + lock + }), + ) + } +} + #[allow(dead_code)] enum PendingLock { Track { @@ -70,6 +162,16 @@ enum PendingLock { }, } +impl HeapSize for PendingLock { + fn heap_size(&self) -> usize { + match self { + PendingLock::Track { key, .. } | PendingLock::Untrack { key, .. } => { + key.as_encoded().heap_size() + } + } + } +} + // Records information related to observed region. // observe_id is used for avoiding ABA problems in incremental scan task, // advance resolved ts task, and command observing. @@ -85,13 +187,14 @@ struct ObserveRegion { impl ObserveRegion { fn new(meta: Region, rrp: Arc, memory_quota: Arc) -> Self { ObserveRegion { - resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota), + resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota.clone()), meta, handle: ObserveHandle::new(), resolver_status: ResolverStatus::Pending { tracked_index: 0, locks: vec![], cancelled: Arc::new(AtomicBool::new(false)), + memory_quota, }, } } @@ -101,122 +204,109 @@ impl ObserveRegion { } fn track_change_log(&mut self, change_logs: &[ChangeLog]) -> Result<()> { - match &mut self.resolver_status { - ResolverStatus::Pending { - locks, - tracked_index, - .. - } => { - for log in change_logs { - match log { - ChangeLog::Error(e) => { - debug!( - "skip change log error"; - "region" => self.meta.id, - "error" => ?e, - ); - continue; - } - ChangeLog::Admin(req_type) => { - // TODO: for admin cmd that won't change the region meta like peer list - // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to - // return error - return Err(box_err!( - "region met admin command {:?} while initializing resolver", - req_type - )); - } - ChangeLog::Rows { rows, index } => { - rows.iter().for_each(|row| match row { - ChangeRow::Prewrite { key, start_ts, .. } => { - locks.push(PendingLock::Track { - key: key.clone(), - start_ts: *start_ts, - }) - } + if matches!(self.resolver_status, ResolverStatus::Pending { .. }) { + for log in change_logs { + match log { + ChangeLog::Error(e) => { + debug!( + "skip change log error"; + "region" => self.meta.id, + "error" => ?e, + ); + continue; + } + ChangeLog::Admin(req_type) => { + // TODO: for admin cmd that won't change the region meta like peer list + // and key range (i.e. `CompactLog`, `ComputeHash`) we may not need to + // return error + return Err(box_err!( + "region met admin command {:?} while initializing resolver", + req_type + )); + } + ChangeLog::Rows { rows, index } => { + for row in rows { + let lock = match row { + ChangeRow::Prewrite { key, start_ts, .. } => PendingLock::Track { + key: key.clone(), + start_ts: *start_ts, + }, ChangeRow::Commit { key, start_ts, commit_ts, .. - } => locks.push(PendingLock::Untrack { + } => PendingLock::Untrack { key: key.clone(), start_ts: *start_ts, commit_ts: *commit_ts, - }), + }, // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => {} - ChangeRow::IngestSsT => {} - }); - assert!( - *tracked_index < *index, - "region {}, tracked_index: {}, incoming index: {}", - self.meta.id, - *tracked_index, - *index - ); - *tracked_index = *index; + ChangeRow::OnePc { .. } | ChangeRow::IngestSsT => continue, + }; + self.resolver_status.push_pending_lock(lock, self.meta.id)?; } + self.resolver_status + .update_tracked_index(*index, self.meta.id); } } } - ResolverStatus::Ready => { - for log in change_logs { - match log { - ChangeLog::Error(e) => { + } else { + for log in change_logs { + match log { + ChangeLog::Error(e) => { + debug!( + "skip change log error"; + "region" => self.meta.id, + "error" => ?e, + ); + continue; + } + ChangeLog::Admin(req_type) => match req_type { + AdminCmdType::Split + | AdminCmdType::BatchSplit + | AdminCmdType::PrepareMerge + | AdminCmdType::RollbackMerge + | AdminCmdType::CommitMerge => { + info!( + "region met split/merge command, stop tracking since key range changed, wait for re-register"; + "req_type" => ?req_type, + ); + // Stop tracking so that `tracked_index` larger than the split/merge + // command index won't be published until `RegionUpdate` event + // trigger the region re-register and re-scan the new key range + self.resolver.stop_tracking(); + } + _ => { debug!( - "skip change log error"; + "skip change log admin"; "region" => self.meta.id, - "error" => ?e, + "req_type" => ?req_type, ); - continue; } - ChangeLog::Admin(req_type) => match req_type { - AdminCmdType::Split - | AdminCmdType::BatchSplit - | AdminCmdType::PrepareMerge - | AdminCmdType::RollbackMerge - | AdminCmdType::CommitMerge => { - info!( - "region met split/merge command, stop tracking since key range changed, wait for re-register"; - "req_type" => ?req_type, - ); - // Stop tracking so that `tracked_index` larger than the split/merge - // command index won't be published until `RegionUpdate` event - // trigger the region re-register and re-scan the new key range - self.resolver.stop_tracking(); - } - _ => { - debug!( - "skip change log admin"; - "region" => self.meta.id, - "req_type" => ?req_type, - ); - } - }, - ChangeLog::Rows { rows, index } => { - for row in rows { - match row { - ChangeRow::Prewrite { key, start_ts, .. } => { - if !self.resolver.track_lock( - *start_ts, - key.to_raw().unwrap(), - Some(*index), - ) { - return Err(Error::MemoryQuotaExceeded); - } - } - ChangeRow::Commit { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(*index)), - // One pc command do not contains any lock, so just skip it - ChangeRow::OnePc { .. } => { - self.resolver.update_tracked_index(*index); - } - ChangeRow::IngestSsT => { - self.resolver.update_tracked_index(*index); + }, + ChangeLog::Rows { rows, index } => { + for row in rows { + match row { + ChangeRow::Prewrite { key, start_ts, .. } => { + if !self.resolver.track_lock( + *start_ts, + key.to_raw().unwrap(), + Some(*index), + ) { + return Err(Error::MemoryQuotaExceeded); } } + ChangeRow::Commit { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(*index)), + // One pc command do not contains any lock, so just skip it + ChangeRow::OnePc { .. } => { + self.resolver.update_tracked_index(*index); + } + ChangeRow::IngestSsT => { + self.resolver.update_tracked_index(*index); + } } } } @@ -247,38 +337,26 @@ impl ObserveRegion { ScanEntry::None => { // Update the `tracked_index` to the snapshot's `apply_index` self.resolver.update_tracked_index(apply_index); - let pending_tracked_index = - match std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready) { - ResolverStatus::Pending { - locks, - tracked_index, - .. - } => { - for lock in locks { - match lock { - PendingLock::Track { key, start_ts } => { - if !self.resolver.track_lock( - start_ts, - key.to_raw().unwrap(), - Some(tracked_index), - ) { - return Err(Error::MemoryQuotaExceeded); - } - } - PendingLock::Untrack { key, .. } => { - self.resolver.untrack_lock( - &key.to_raw().unwrap(), - Some(tracked_index), - ) - } - } + let mut resolver_status = + std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready); + let (pending_tracked_index, pending_locks) = + resolver_status.drain_pending_locks(self.meta.id); + for lock in pending_locks { + match lock { + PendingLock::Track { key, start_ts } => { + if !self.resolver.track_lock( + start_ts, + key.to_raw().unwrap(), + Some(pending_tracked_index), + ) { + return Err(Error::MemoryQuotaExceeded); } - tracked_index } - ResolverStatus::Ready => { - panic!("region {:?} resolver has ready", self.meta.id) - } - }; + PendingLock::Untrack { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(pending_tracked_index)), + } + } info!( "Resolver initialized"; "region" => self.meta.id, @@ -457,7 +535,7 @@ where // Stop observing data handle.stop_observing(); // Stop scanning data - if let ResolverStatus::Pending { cancelled, .. } = resolver_status { + if let ResolverStatus::Pending { ref cancelled, .. } = resolver_status { cancelled.store(true, Ordering::Release); } } else { diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 15b3463f70e..74da743952c 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -138,4 +138,10 @@ lazy_static! { "The minimal (non-zero) resolved ts gap for observe leader peers" ) .unwrap(); + pub static ref RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM: Histogram = register_histogram!( + "tikv_resolved_ts_initial_scan_backoff_duration_seconds", + "Bucketed histogram of resolved-ts initial scan backoff duration", + exponential_buckets(0.1, 2.0, 16).unwrap(), + ) + .unwrap(); } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 6bee5efd2f6..405138d41cf 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,7 +13,7 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; -const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB +pub(crate) const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. @@ -74,6 +74,8 @@ impl Drop for Resolver { "region_id" => self.region_id, "bytes" => bytes, "num_locks" => num_locks, + "memory_quota_in_use" => self.memory_quota.in_use(), + "memory_quota_capacity" => self.memory_quota.capacity(), ); } self.memory_quota.free(bytes); diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index e8665e9d860..615819db799 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -21,7 +21,7 @@ use txn_types::{Key, Lock, LockType, TimeStamp}; use crate::{ errors::{Error, Result}, - metrics::RTS_SCAN_DURATION_HISTOGRAM, + metrics::*, }; const DEFAULT_SCAN_BATCH_SIZE: usize = 1024; @@ -86,6 +86,7 @@ impl, E: KvEngine> ScannerPool { let cdc_handle = self.cdc_handle.clone(); let fut = async move { if let Some(backoff) = task.backoff { + RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM.observe(backoff.as_secs_f64()); if let Err(e) = GLOBAL_TIMER_HANDLE .delay(std::time::Instant::now() + backoff) .compat() @@ -113,6 +114,7 @@ impl, E: KvEngine> ScannerPool { return; } }; + fail::fail_point!("resolved_ts_after_scanner_get_snapshot"); let start = Instant::now(); let apply_index = snap.get_apply_index().unwrap(); let mut entries = vec![]; diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index 808f5ed62ff..0c594ab1d1d 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -2,6 +2,11 @@ #[path = "../mod.rs"] mod testsuite; +use std::{ + sync::{mpsc::channel, Mutex}, + time::Duration, +}; + use futures::executor::block_on; use kvproto::kvrpcpb::*; use pd_client::PdClient; @@ -128,3 +133,43 @@ fn test_report_min_resolved_ts_disable() { fail::remove("mock_min_resolved_ts_interval_disable"); suite.stop(); } + +#[test] +fn test_pending_locks_memory_quota_exceeded() { + // Pause scan lock so that locks will be put in pending locks. + fail::cfg("resolved_ts_after_scanner_get_snapshot", "pause").unwrap(); + // Check if memory quota exceeded is triggered. + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback( + "resolved_ts_on_pending_locks_memory_quota_exceeded", + move || { + let sender = tx.lock().unwrap(); + sender.send(()).unwrap(); + }, + ) + .unwrap(); + + let mut suite = TestSuite::new(1); + let region = suite.cluster.get_region(&[]); + + // Must not trigger memory quota exceeded. + rx.recv_timeout(Duration::from_millis(100)).unwrap_err(); + + // Set a small memory quota to trigger memory quota exceeded. + suite.must_change_memory_quota(1, 1); + let (k, v) = (b"k1", b"v"); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.to_vec(); + mutation.value = v.to_vec(); + suite.must_kv_prewrite(region.id, vec![mutation], k.to_vec(), start_ts, false); + + // Must trigger memory quota exceeded. + rx.recv_timeout(Duration::from_secs(5)).unwrap(); + + fail::remove("resolved_ts_after_scanner_get_snapshot"); + fail::remove("resolved_ts_on_pending_locks_memory_quota_exceeded"); + suite.stop(); +} diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index d327041cd8a..c78540c601a 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -39471,6 +39471,79 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The backoff duration before starting initial scan", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 23763573950, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Initial scan backoff duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, From 6b91e4a2284296887c1a0eb32865e5d8ab90ebb7 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Sep 2023 16:45:42 +0800 Subject: [PATCH 0900/1149] cdc: deregister delegate if memory quota exceeded (#15486) close tikv/tikv#15412 Similar to resolved-ts endpoint, cdc endpoint maintains resolvers for subscribed regions. These resolvers also need memory quota, otherwise they may cause OOM. This commit lets cdc endpoint deregister regions if they exceed memory quota. Signed-off-by: Neil Shen --- components/cdc/src/channel.rs | 3 + components/cdc/src/delegate.rs | 183 ++++++++--- components/cdc/src/endpoint.rs | 40 ++- components/cdc/src/errors.rs | 2 + components/cdc/src/initializer.rs | 56 +++- components/cdc/tests/failpoints/mod.rs | 1 + .../cdc/tests/failpoints/test_memory_quota.rs | 289 ++++++++++++++++++ components/cdc/tests/mod.rs | 11 +- components/resolved_ts/src/resolver.rs | 25 +- 9 files changed, 517 insertions(+), 93 deletions(-) create mode 100644 components/cdc/tests/failpoints/test_memory_quota.rs diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index 6a8c3d5c3aa..a3ddeeb9030 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -52,6 +52,9 @@ pub enum CdcEvent { impl CdcEvent { pub fn size(&self) -> u32 { + fail::fail_point!("cdc_event_size", |size| size + .map(|s| s.parse::().unwrap()) + .unwrap_or(0)); match self { CdcEvent::ResolvedTs(ref r) => { // For region id, it is unlikely to exceed 100,000,000 which is diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index da5c26aad30..e109b3368b4 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -28,9 +28,13 @@ use raftstore::{ store::util::compare_region_epoch, Error as RaftStoreError, }; -use resolved_ts::Resolver; +use resolved_ts::{Resolver, ON_DROP_WARN_HEAP_SIZE}; use tikv::storage::{txn::TxnEntry, Statistics}; -use tikv_util::{debug, info, warn}; +use tikv_util::{ + debug, info, + memory::{HeapSize, MemoryQuota}, + warn, +}; use txn_types::{Key, Lock, LockType, TimeStamp, WriteBatchFlags, WriteRef, WriteType}; use crate::{ @@ -226,16 +230,77 @@ impl Downstream { } } -#[derive(Default)] struct Pending { - pub downstreams: Vec, - pub locks: Vec, - pub pending_bytes: usize, + downstreams: Vec, + locks: Vec, + pending_bytes: usize, + memory_quota: Arc, +} + +impl Pending { + fn new(memory_quota: Arc) -> Pending { + Pending { + downstreams: vec![], + locks: vec![], + pending_bytes: 0, + memory_quota, + } + } + + fn push_pending_lock(&mut self, lock: PendingLock) -> Result<()> { + let bytes = lock.heap_size(); + if !self.memory_quota.alloc(bytes) { + return Err(Error::MemoryQuotaExceeded); + } + self.locks.push(lock); + self.pending_bytes += bytes; + CDC_PENDING_BYTES_GAUGE.add(bytes as i64); + Ok(()) + } + + fn on_region_ready(&mut self, resolver: &mut Resolver) -> Result<()> { + fail::fail_point!("cdc_pending_on_region_ready", |_| Err( + Error::MemoryQuotaExceeded + )); + // Must take locks, otherwise it may double free memory quota on drop. + for lock in mem::take(&mut self.locks) { + self.memory_quota.free(lock.heap_size()); + match lock { + PendingLock::Track { key, start_ts } => { + if !resolver.track_lock(start_ts, key, None) { + return Err(Error::MemoryQuotaExceeded); + } + } + PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), + } + } + Ok(()) + } } impl Drop for Pending { fn drop(&mut self) { CDC_PENDING_BYTES_GAUGE.sub(self.pending_bytes as i64); + let locks = mem::take(&mut self.locks); + if locks.is_empty() { + return; + } + + // Free memory quota used by pending locks and unlocks. + let mut bytes = 0; + let num_locks = locks.len(); + for lock in locks { + bytes += lock.heap_size(); + } + if bytes > ON_DROP_WARN_HEAP_SIZE { + warn!("cdc drop huge Pending"; + "bytes" => bytes, + "num_locks" => num_locks, + "memory_quota_in_use" => self.memory_quota.in_use(), + "memory_quota_capacity" => self.memory_quota.capacity(), + ); + } + self.memory_quota.free(bytes); } } @@ -244,6 +309,14 @@ enum PendingLock { Untrack { key: Vec }, } +impl HeapSize for PendingLock { + fn heap_size(&self) -> usize { + match self { + PendingLock::Track { key, .. } | PendingLock::Untrack { key } => key.heap_size(), + } + } +} + /// A CDC delegate of a raftstore region peer. /// /// It converts raft commands into CDC events and broadcast to downstreams. @@ -265,14 +338,18 @@ pub struct Delegate { impl Delegate { /// Create a Delegate the given region. - pub fn new(region_id: u64, txn_extra_op: Arc>) -> Delegate { + pub fn new( + region_id: u64, + txn_extra_op: Arc>, + memory_quota: Arc, + ) -> Delegate { Delegate { region_id, handle: ObserveHandle::new(), resolver: None, region: None, resolved_downstreams: Vec::new(), - pending: Some(Pending::default()), + pending: Some(Pending::new(memory_quota)), txn_extra_op, failed: false, } @@ -395,7 +472,7 @@ impl Delegate { &mut self, mut resolver: Resolver, region: Region, - ) -> Vec<(&Downstream, Error)> { + ) -> Result> { assert!( self.resolver.is_none(), "region {} resolver should not be ready", @@ -408,29 +485,24 @@ impl Delegate { } // Mark the delegate as initialized. - let mut pending = self.pending.take().unwrap(); - self.region = Some(region); info!("cdc region is ready"; "region_id" => self.region_id); + // Downstreams in pending must be moved to resolved_downstreams + // immediately and must not return in the middle, otherwise the delegate + // loses downstreams. + let mut pending = self.pending.take().unwrap(); + self.resolved_downstreams = mem::take(&mut pending.downstreams); - for lock in mem::take(&mut pending.locks) { - match lock { - PendingLock::Track { key, start_ts } => { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(resolver.track_lock(start_ts, key, None)); - } - PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), - } - } + pending.on_region_ready(&mut resolver)?; self.resolver = Some(resolver); + self.region = Some(region); - self.resolved_downstreams = mem::take(&mut pending.downstreams); let mut failed_downstreams = Vec::new(); for downstream in self.downstreams() { if let Err(e) = self.check_epoch_on_ready(downstream) { failed_downstreams.push((downstream, e)); } } - failed_downstreams + Ok(failed_downstreams) } /// Try advance and broadcast resolved ts. @@ -611,16 +683,14 @@ impl Delegate { let mut txn_rows: HashMap, (EventRow, bool)> = HashMap::default(); let mut raw_rows: Vec = Vec::new(); for mut req in requests { - match req.get_cmd_type() { - CmdType::Put => { - self.sink_put( - req.take_put(), - is_one_pc, - &mut txn_rows, - &mut raw_rows, - &mut read_old_value, - )?; - } + let res = match req.get_cmd_type() { + CmdType::Put => self.sink_put( + req.take_put(), + is_one_pc, + &mut txn_rows, + &mut raw_rows, + &mut read_old_value, + ), CmdType::Delete => self.sink_delete(req.take_delete()), _ => { debug!( @@ -628,7 +698,12 @@ impl Delegate { "region_id" => self.region_id, "command" => ?req, ); + Ok(()) } + }; + if res.is_err() { + self.mark_failed(); + return res; } } @@ -825,18 +900,17 @@ impl Delegate { // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(resolver.track_lock(row.start_ts.into(), row.key.clone(), None)); + if !resolver.track_lock(row.start_ts.into(), row.key.clone(), None) { + return Err(Error::MemoryQuotaExceeded); + } } None => { assert!(self.pending.is_some(), "region resolver not ready"); let pending = self.pending.as_mut().unwrap(); - pending.locks.push(PendingLock::Track { + pending.push_pending_lock(PendingLock::Track { key: row.key.clone(), start_ts: row.start_ts.into(), - }); - pending.pending_bytes += row.key.len(); - CDC_PENDING_BYTES_GAUGE.add(row.key.len() as i64); + })?; } } @@ -858,7 +932,7 @@ impl Delegate { Ok(()) } - fn sink_delete(&mut self, mut delete: DeleteRequest) { + fn sink_delete(&mut self, mut delete: DeleteRequest) -> Result<()> { match delete.cf.as_str() { "lock" => { let raw_key = Key::from_encoded(delete.take_key()).into_raw().unwrap(); @@ -866,11 +940,8 @@ impl Delegate { Some(ref mut resolver) => resolver.untrack_lock(&raw_key, None), None => { assert!(self.pending.is_some(), "region resolver not ready"); - let key_len = raw_key.len(); let pending = self.pending.as_mut().unwrap(); - pending.locks.push(PendingLock::Untrack { key: raw_key }); - pending.pending_bytes += key_len; - CDC_PENDING_BYTES_GAUGE.add(key_len as i64); + pending.push_pending_lock(PendingLock::Untrack { key: raw_key })?; } } } @@ -879,6 +950,7 @@ impl Delegate { panic!("invalid cf {}", other); } } + Ok(()) } fn sink_admin(&mut self, request: AdminRequest, mut response: AdminResponse) -> Result<()> { @@ -949,7 +1021,7 @@ impl Delegate { } fn stop_observing(&self) { - info!("stop observing"; "region_id" => self.region_id, "failed" => self.failed); + info!("cdc stop observing"; "region_id" => self.region_id, "failed" => self.failed); // Stop observe further events. self.handle.stop_observing(); // To inform transaction layer no more old values are required for the region. @@ -1184,12 +1256,18 @@ mod tests { ObservedRange::default(), ); downstream.set_sink(sink); - let mut delegate = Delegate::new(region_id, Default::default()); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + let mut delegate = Delegate::new(region_id, Default::default(), memory_quota); delegate.subscribe(downstream).unwrap(); assert!(delegate.handle.is_observing()); let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let resolver = Resolver::new(region_id, memory_quota); - assert!(delegate.on_region_ready(resolver, region).is_empty()); + assert!( + delegate + .on_region_ready(resolver, region) + .unwrap() + .is_empty() + ); assert!(delegate.downstreams()[0].observed_range.all_key_covered); let rx_wrap = Cell::new(Some(rx)); @@ -1313,8 +1391,9 @@ mod tests { }; // Create a new delegate. + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); - let mut delegate = Delegate::new(1, txn_extra_op.clone()); + let mut delegate = Delegate::new(1, txn_extra_op.clone(), memory_quota); assert_eq!(txn_extra_op.load(), TxnExtraOp::Noop); assert!(delegate.handle.is_observing()); @@ -1340,7 +1419,9 @@ mod tests { region.mut_region_epoch().set_version(1); { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); - let failures = delegate.on_region_ready(Resolver::new(1, memory_quota), region); + let failures = delegate + .on_region_ready(Resolver::new(1, memory_quota), region) + .unwrap(); assert_eq!(failures.len(), 1); let id = failures[0].0.id; delegate.unsubscribe(id, None); @@ -1431,8 +1512,9 @@ mod tests { Key::from_raw(b"d").into_encoded(), ) .unwrap(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); - let mut delegate = Delegate::new(1, txn_extra_op); + let mut delegate = Delegate::new(1, txn_extra_op, memory_quota); assert!(delegate.handle.is_observing()); let mut map = HashMap::default(); @@ -1500,8 +1582,9 @@ mod tests { Key::from_raw(b"f").into_encoded(), ) .unwrap(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); let txn_extra_op = Arc::new(AtomicCell::new(TxnExtraOp::Noop)); - let mut delegate = Delegate::new(1, txn_extra_op); + let mut delegate = Delegate::new(1, txn_extra_op, memory_quota); assert!(delegate.handle.is_observing()); let mut map = HashMap::default(); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 969d0cba0d9..2b314f22443 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -736,7 +736,11 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint e.into_mut(), HashMapEntry::Vacant(e) => { is_new_delegate = true; - e.insert(Delegate::new(region_id, txn_extra_op)) + e.insert(Delegate::new( + region_id, + txn_extra_op, + self.sink_memory_quota.clone(), + )) } }; @@ -802,10 +806,11 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { @@ -858,18 +863,26 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { + for (downstream, e) in fails { + deregisters.push(Deregister::Downstream { + conn_id: downstream.get_conn_id(), + request_id: downstream.get_req_id(), + region_id, + downstream_id: downstream.get_id(), + err: Some(e), + }); + } + } + Err(e) => deregisters.push(Deregister::Delegate { region_id, - downstream_id: downstream.get_id(), - err: Some(e), - }); + observe_id, + err: e, + }), } } else { debug!("cdc stale region ready"; @@ -883,7 +896,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint Initializer { change_observer: ChangeObserver, cdc_handle: T, concurrency_semaphore: Arc, + memory_quota: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); let _permit = concurrency_semaphore.acquire().await; @@ -173,7 +174,7 @@ impl Initializer { } match fut.await { - Ok(resp) => self.on_change_cmd_response(resp).await, + Ok(resp) => self.on_change_cmd_response(resp, memory_quota).await, Err(e) => Err(Error::Other(box_err!(e))), } } @@ -181,11 +182,13 @@ impl Initializer { pub(crate) async fn on_change_cmd_response( &mut self, mut resp: ReadResponse, + memory_quota: Arc, ) -> Result<()> { if let Some(region_snapshot) = resp.snapshot { assert_eq!(self.region_id, region_snapshot.get_region().get_id()); let region = region_snapshot.get_region().clone(); - self.async_incremental_scan(region_snapshot, region).await + self.async_incremental_scan(region_snapshot, region, memory_quota) + .await } else { assert!( resp.response.get_header().has_error(), @@ -201,6 +204,7 @@ impl Initializer { &mut self, snap: S, region: Region, + memory_quota: Arc, ) -> Result<()> { let downstream_id = self.downstream_id; let region_id = region.get_id(); @@ -216,8 +220,6 @@ impl Initializer { "end_key" => log_wrappers::Value::key(snap.upper_bound().unwrap_or_default())); let mut resolver = if self.build_resolver { - // TODO: limit the memory usage of the resolver. - let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); Some(Resolver::new(region_id, memory_quota)) } else { None @@ -422,9 +424,9 @@ impl Initializer { let lock = Lock::parse(value)?; match lock.lock_type { LockType::Put | LockType::Delete => { - // TODO: handle memory quota exceed, for now, quota is set to - // usize::MAX. - assert!(resolver.track_lock(lock.ts, key, None)); + if !resolver.track_lock(lock.ts, key, None) { + return Err(Error::MemoryQuotaExceeded); + } } _ => (), }; @@ -745,21 +747,37 @@ mod tests { } }); - block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + block_on(initializer.async_incremental_scan( + snap.clone(), + region.clone(), + memory_quota.clone(), + )) + .unwrap(); check_result(); initializer .downstream_state .store(DownstreamState::Initializing); initializer.max_scan_batch_bytes = total_bytes; - block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); + block_on(initializer.async_incremental_scan( + snap.clone(), + region.clone(), + memory_quota.clone(), + )) + .unwrap(); check_result(); initializer .downstream_state .store(DownstreamState::Initializing); initializer.build_resolver = false; - block_on(initializer.async_incremental_scan(snap.clone(), region.clone())).unwrap(); + block_on(initializer.async_incremental_scan( + snap.clone(), + region.clone(), + memory_quota.clone(), + )) + .unwrap(); loop { let task = rx.recv_timeout(Duration::from_millis(100)); @@ -772,7 +790,8 @@ mod tests { // Test cancellation. initializer.downstream_state.store(DownstreamState::Stopped); - block_on(initializer.async_incremental_scan(snap.clone(), region)).unwrap_err(); + block_on(initializer.async_incremental_scan(snap.clone(), region, memory_quota.clone())) + .unwrap_err(); // Cancel error should trigger a deregsiter. let mut region = Region::default(); @@ -784,14 +803,15 @@ mod tests { response: Default::default(), txn_extra_op: Default::default(), }; - block_on(initializer.on_change_cmd_response(resp.clone())).unwrap_err(); + block_on(initializer.on_change_cmd_response(resp.clone(), memory_quota.clone())) + .unwrap_err(); // Disconnect sink by dropping runtime (it also drops drain). drop(pool); initializer .downstream_state .store(DownstreamState::Initializing); - block_on(initializer.on_change_cmd_response(resp)).unwrap_err(); + block_on(initializer.on_change_cmd_response(resp, memory_quota)).unwrap_err(); worker.stop(); } @@ -819,8 +839,9 @@ mod tests { filter_loop, ); let th = pool.spawn(async move { + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer - .async_incremental_scan(snap, Region::default()) + .async_incremental_scan(snap, Region::default(), memory_quota) .await .unwrap(); }); @@ -904,8 +925,9 @@ mod tests { let snap = engine.snapshot(Default::default()).unwrap(); let th = pool.spawn(async move { + let memory_qutoa = Arc::new(MemoryQuota::new(usize::MAX)); initializer - .async_incremental_scan(snap, Region::default()) + .async_incremental_scan(snap, Region::default(), memory_qutoa) .await .unwrap(); }); @@ -1017,12 +1039,14 @@ mod tests { let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); let concurrency_semaphore = Arc::new(Semaphore::new(1)); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer.downstream_state.store(DownstreamState::Stopped); block_on(initializer.initialize( change_cmd, raft_router.clone(), concurrency_semaphore.clone(), + memory_quota.clone(), )) .unwrap_err(); @@ -1048,7 +1072,7 @@ mod tests { &concurrency_semaphore, ); let res = initializer - .initialize(change_cmd, raft_router, concurrency_semaphore) + .initialize(change_cmd, raft_router, concurrency_semaphore, memory_quota) .await; tx1.send(res).unwrap(); }); diff --git a/components/cdc/tests/failpoints/mod.rs b/components/cdc/tests/failpoints/mod.rs index 082b1c15f67..619ee200985 100644 --- a/components/cdc/tests/failpoints/mod.rs +++ b/components/cdc/tests/failpoints/mod.rs @@ -4,6 +4,7 @@ #![test_runner(test_util::run_failpoint_tests)] mod test_endpoint; +mod test_memory_quota; mod test_observe; mod test_register; mod test_resolve; diff --git a/components/cdc/tests/failpoints/test_memory_quota.rs b/components/cdc/tests/failpoints/test_memory_quota.rs new file mode 100644 index 00000000000..5b564ba61ec --- /dev/null +++ b/components/cdc/tests/failpoints/test_memory_quota.rs @@ -0,0 +1,289 @@ +// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{sync::*, time::Duration}; + +use cdc::{Task, Validate}; +use futures::{executor::block_on, SinkExt}; +use grpcio::WriteFlags; +use kvproto::{cdcpb::*, kvrpcpb::*}; +use pd_client::PdClient; +use test_raftstore::*; + +use crate::{new_event_feed, TestSuiteBuilder}; + +#[test] +fn test_resolver_track_lock_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let event = receive_event(false); + event.events.into_iter().for_each(|e| { + match e.event.unwrap() { + // Even if there is no write, + // it should always outputs an Initialized event. + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + } + }); + + // Client must receive messages when there is no congest error. + let key_size = memory_quota / 2; + let (k, v) = (vec![1; key_size], vec![5]); + // Prewrite + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Entries(entries) => { + assert_eq!(entries.entries.len(), 1); + assert_eq!(entries.entries[0].get_type(), EventLogType::Prewrite); + } + other => panic!("unknown event {:?}", other), + } + + // Trigger congest error. + let key_size = memory_quota * 2; + let (k, v) = (vec![2; key_size], vec![5]); + // Prewrite + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + + // The delegate must be removed. + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + suite.stop(); +} + +#[test] +fn test_pending_on_region_ready_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + // Trigger memory quota exceeded error. + fail::cfg("cdc_pending_on_region_ready", "return").unwrap(); + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let event = receive_event(false); + event.events.into_iter().for_each(|e| { + match e.event.unwrap() { + // Even if there is no write, + // it should always outputs an Initialized event. + Event_oneof_event::Entries(es) => { + assert!(es.entries.len() == 1, "{:?}", es); + let e = &es.entries[0]; + assert_eq!(e.get_type(), EventLogType::Initialized, "{:?}", es); + } + other => panic!("unknown event {:?}", other), + } + }); + // MemoryQuotaExceeded error is triggered on_region_ready. + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + + // The delegate must be removed. + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + fail::remove("cdc_incremental_scan_start"); + suite.stop(); +} + +#[test] +fn test_pending_push_lock_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + // Pause scan so that no region can be initialized, and all locks will be + // put in pending locks. + fail::cfg("cdc_incremental_scan_start", "pause").unwrap(); + + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + + // Trigger congest error. + let key_size = memory_quota * 2; + let (k, v) = (vec![1; key_size], vec![5]); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + + // The delegate must be removed. + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + fail::remove("cdc_incremental_scan_start"); + suite.stop(); +} + +#[test] +fn test_scan_lock_memory_quota_exceeded() { + let mut cluster = new_server_cluster(1, 1); + // Increase the Raft tick interval to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(100), None); + let memory_quota = 1024; // 1KB + let mut suite = TestSuiteBuilder::new() + .cluster(cluster) + .memory_quota(memory_quota) + .build(); + + // Let CdcEvent size be 0 to effectively disable memory quota for CdcEvent. + fail::cfg("cdc_event_size", "return(0)").unwrap(); + + // Put a lock that exceeds memory quota. + let key_size = memory_quota * 2; + let (k, v) = (vec![1; key_size], vec![5]); + let start_ts = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.key = k.clone(); + mutation.value = v; + suite.must_kv_prewrite(1, vec![mutation], k, start_ts); + + // No region can be initialized. + let req = suite.new_changedata_request(1); + let (mut req_tx, _event_feed_wrap, receive_event) = + new_event_feed(suite.get_region_cdc_client(1)); + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + let mut events = receive_event(false).events.to_vec(); + assert_eq!(events.len(), 1, "{:?}", events); + match events.pop().unwrap().event.unwrap() { + Event_oneof_event::Error(e) => { + // Unknown errors are translated into region_not_found. + assert!(e.has_region_not_found(), "{:?}", e); + } + other => panic!("unknown event {:?}", other), + } + let scheduler = suite.endpoints.values().next().unwrap().scheduler(); + let (tx, rx) = mpsc::channel(); + scheduler + .schedule(Task::Validate(Validate::Region( + 1, + Box::new(move |delegate| { + tx.send(delegate.is_none()).unwrap(); + }), + ))) + .unwrap(); + + assert!( + rx.recv_timeout(Duration::from_millis(1000)).unwrap(), + "find unexpected delegate" + ); + + suite.stop(); +} diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index ec479909793..afd209af2d3 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -168,6 +168,7 @@ impl TestSuiteBuilder { let count = cluster.count; let pd_cli = cluster.pd_client.clone(); let mut endpoints = HashMap::default(); + let mut quotas = HashMap::default(); let mut obs = HashMap::default(); let mut concurrency_managers = HashMap::default(); // Hack! node id are generated from 1..count+1. @@ -177,15 +178,14 @@ impl TestSuiteBuilder { let mut sim = cluster.sim.wl(); // Register cdc service to gRPC server. + let memory_quota = Arc::new(MemoryQuota::new(memory_quota)); + let memory_quota_ = memory_quota.clone(); let scheduler = worker.scheduler(); sim.pending_services .entry(id) .or_default() .push(Box::new(move || { - create_change_data(cdc::Service::new( - scheduler.clone(), - Arc::new(MemoryQuota::new(memory_quota)), - )) + create_change_data(cdc::Service::new(scheduler.clone(), memory_quota_.clone())) })); sim.txn_extra_schedulers.insert( id, @@ -200,6 +200,7 @@ impl TestSuiteBuilder { }, )); endpoints.insert(id, worker); + quotas.insert(id, memory_quota); } runner(&mut cluster); @@ -224,7 +225,7 @@ impl TestSuiteBuilder { cm.clone(), env, sim.security_mgr.clone(), - Arc::new(MemoryQuota::new(usize::MAX)), + quotas[id].clone(), sim.get_causal_ts_provider(*id), ); let mut updated_cfg = cfg.clone(); diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 405138d41cf..ef257ad4762 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -13,7 +13,7 @@ use txn_types::TimeStamp; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; -pub(crate) const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB +pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. @@ -188,13 +188,16 @@ impl Resolver { if let Some(index) = index { self.update_tracked_index(index); } + let bytes = self.lock_heap_size(&key); debug!( - "track lock {}@{}, region {}", + "track lock {}@{}", &log_wrappers::Value::key(&key), - start_ts, - self.region_id + start_ts; + "region_id" => self.region_id, + "memory_in_use" => self.memory_quota.in_use(), + "memory_capacity" => self.memory_quota.capacity(), + "key_heap_size" => bytes, ); - let bytes = self.lock_heap_size(&key); if !self.memory_quota.alloc(bytes) { return false; } @@ -213,14 +216,18 @@ impl Resolver { self.memory_quota.free(bytes); start_ts } else { - debug!("untrack a lock that was not tracked before"; "key" => &log_wrappers::Value::key(key)); + debug!("untrack a lock that was not tracked before"; + "key" => &log_wrappers::Value::key(key), + "region_id" => self.region_id, + ); return; }; debug!( - "untrack lock {}@{}, region {}", + "untrack lock {}@{}", &log_wrappers::Value::key(key), - start_ts, - self.region_id, + start_ts; + "region_id" => self.region_id, + "memory_in_use" => self.memory_quota.in_use(), ); let mut shrink_ts = None; From 9bf96f921637f1823f8507f822a215dff55d50e1 Mon Sep 17 00:00:00 2001 From: ekexium Date: Wed, 6 Sep 2023 07:20:12 +0800 Subject: [PATCH 0901/1149] metrics: more logs and metrics for resolved-ts (#15416) ref tikv/tikv#15082 Add more logs and metrics for resolved-ts. Signed-off-by: ekexium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 6 +- components/cdc/src/delegate.rs | 4 +- components/cdc/src/initializer.rs | 4 +- components/concurrency_manager/src/lib.rs | 17 + .../concurrency_manager/src/lock_table.rs | 8 + components/raftstore/src/store/util.rs | 1 - components/resolved_ts/src/advance.rs | 9 +- components/resolved_ts/src/endpoint.rs | 563 +++++++++++++----- components/resolved_ts/src/metrics.rs | 68 ++- components/resolved_ts/src/resolver.rs | 112 +++- metrics/grafana/tikv_details.json | 12 +- 11 files changed, 615 insertions(+), 189 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index ef6e24d9d8f..d6d49f0cf1c 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -8,7 +8,7 @@ use dashmap::{ }; use kvproto::metapb::Region; use raftstore::coprocessor::*; -use resolved_ts::Resolver; +use resolved_ts::{Resolver, TsSource}; use tikv_util::{info, memory::MemoryQuota, warn}; use txn_types::TimeStamp; @@ -516,7 +516,7 @@ impl TwoPhaseResolver { return min_ts.min(stable_ts); } - self.resolver.resolve(min_ts, None) + self.resolver.resolve(min_ts, None, TsSource::BackupStream) } pub fn resolved_ts(&self) -> TimeStamp { @@ -548,7 +548,7 @@ impl TwoPhaseResolver { // advance the internal resolver. // the start ts of initial scanning would be a safe ts for min ts // -- because is used to be a resolved ts. - self.resolver.resolve(ts, None); + self.resolver.resolve(ts, None, TsSource::BackupStream); } None => { warn!("BUG: a two-phase resolver is executing phase_one_done when not in phase one"; "resolver" => ?self) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index e109b3368b4..f7125aa8882 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -28,7 +28,7 @@ use raftstore::{ store::util::compare_region_epoch, Error as RaftStoreError, }; -use resolved_ts::{Resolver, ON_DROP_WARN_HEAP_SIZE}; +use resolved_ts::{Resolver, TsSource, ON_DROP_WARN_HEAP_SIZE}; use tikv::storage::{txn::TxnEntry, Statistics}; use tikv_util::{ debug, info, @@ -514,7 +514,7 @@ impl Delegate { } debug!("cdc try to advance ts"; "region_id" => self.region_id, "min_ts" => min_ts); let resolver = self.resolver.as_mut().unwrap(); - let resolved_ts = resolver.resolve(min_ts, None); + let resolved_ts = resolver.resolve(min_ts, None, TsSource::Cdc); debug!("cdc resolved ts updated"; "region_id" => self.region_id, "resolved_ts" => resolved_ts); Some(resolved_ts) diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 25b7175a08d..ef0b15caab9 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -23,7 +23,7 @@ use raftstore::{ msg::{Callback, ReadResponse}, }, }; -use resolved_ts::Resolver; +use resolved_ts::{Resolver, TsSource}; use tikv::storage::{ kv::Snapshot, mvcc::{DeltaScanner, ScannerBuilder}, @@ -467,7 +467,7 @@ impl Initializer { fn finish_building_resolver(&self, mut resolver: Resolver, region: Region) { let observe_id = self.observe_id; - let rts = resolver.resolve(TimeStamp::zero(), None); + let rts = resolver.resolve(TimeStamp::zero(), None, TsSource::Cdc); info!( "cdc resolver initialized and schedule resolver ready"; "region_id" => region.get_id(), diff --git a/components/concurrency_manager/src/lib.rs b/components/concurrency_manager/src/lib.rs index ce77cb87a42..1c6bdb8dbf1 100644 --- a/components/concurrency_manager/src/lib.rs +++ b/components/concurrency_manager/src/lib.rs @@ -124,6 +124,23 @@ impl ConcurrencyManager { }); min_lock_ts } + + pub fn global_min_lock(&self) -> Option<(TimeStamp, Key)> { + let mut min_lock: Option<(TimeStamp, Key)> = None; + // TODO: The iteration looks not so efficient. It's better to be optimized. + self.lock_table.for_each_kv(|key, handle| { + if let Some(curr_ts) = handle.with_lock(|lock| lock.as_ref().map(|l| l.ts)) { + if min_lock + .as_ref() + .map(|(ts, _)| ts > &curr_ts) + .unwrap_or(true) + { + min_lock = Some((curr_ts, key.clone())); + } + } + }); + min_lock + } } #[cfg(test)] diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index db6995fa1d0..8f4fb8952c3 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -115,6 +115,14 @@ impl LockTable { } } + pub fn for_each_kv(&self, mut f: impl FnMut(&Key, Arc)) { + for entry in self.0.iter() { + if let Some(handle) = entry.value().upgrade() { + f(entry.key(), handle); + } + } + } + /// Removes the key and its key handle from the map. pub fn remove(&self, key: &Key) { self.0.remove(key); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 880a394fdae..3f34fe691ee 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1439,7 +1439,6 @@ impl RegionReadProgress { self.safe_ts() } - // Dump the `LeaderInfo` and the peer list pub fn get_core(&self) -> MutexGuard<'_, RegionReadProgressCore> { self.core.lock().unwrap() } diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 4428ed01a35..59478f5affb 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -43,7 +43,7 @@ use tokio::{ }; use txn_types::TimeStamp; -use crate::{endpoint::Task, metrics::*}; +use crate::{endpoint::Task, metrics::*, TsSource}; pub(crate) const DEFAULT_CHECK_LEADER_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // 5s const DEFAULT_GRPC_GZIP_COMPRESSION_LEVEL: usize = 2; @@ -57,7 +57,7 @@ pub struct AdvanceTsWorker { scheduler: Scheduler, /// The concurrency manager for transactions. It's needed for CDC to check /// locks when calculating resolved_ts. - concurrency_manager: ConcurrencyManager, + pub(crate) concurrency_manager: ConcurrencyManager, // cache the last pd tso, used to approximate the next timestamp w/o an actual TSO RPC pub(crate) last_pd_tso: Arc>>, @@ -114,15 +114,17 @@ impl AdvanceTsWorker { if let Ok(mut last_pd_tso) = last_pd_tso.try_lock() { *last_pd_tso = Some((min_ts, Instant::now())); } + let mut ts_source = TsSource::PdTso; // Sync with concurrency manager so that it can work correctly when // optimizations like async commit is enabled. // Note: This step must be done before scheduling `Task::MinTs` task, and the // resolver must be checked in or after `Task::MinTs`' execution. cm.update_max_ts(min_ts); - if let Some(min_mem_lock_ts) = cm.global_min_lock_ts() { + if let Some((min_mem_lock_ts, lock)) = cm.global_min_lock() { if min_mem_lock_ts < min_ts { min_ts = min_mem_lock_ts; + ts_source = TsSource::MemoryLock(lock); } } @@ -131,6 +133,7 @@ impl AdvanceTsWorker { if let Err(e) = scheduler.schedule(Task::ResolvedTsAdvanced { regions, ts: min_ts, + ts_source, }) { info!("failed to schedule advance event"; "err" => ?e); } diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index fc3e24de1e4..e2d2aec4f70 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -1,12 +1,13 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + cmp::min, collections::HashMap, fmt, marker::PhantomData, sync::{ atomic::{AtomicBool, Ordering}, - Arc, Mutex, + Arc, Mutex, MutexGuard, }, time::Duration, }; @@ -14,7 +15,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use grpcio::Environment; -use kvproto::{metapb::Region, raft_cmdpb::AdminCmdType}; +use kvproto::{kvrpcpb::LeaderInfo, metapb::Region, raft_cmdpb::AdminCmdType}; use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; use pd_client::PdClient; use raftstore::{ @@ -22,7 +23,9 @@ use raftstore::{ router::CdcHandle, store::{ fsm::store::StoreRegionMeta, - util::{self, RegionReadProgress, RegionReadProgressRegistry}, + util::{ + self, ReadState, RegionReadProgress, RegionReadProgressCore, RegionReadProgressRegistry, + }, }, }; use security::SecurityManager; @@ -39,12 +42,12 @@ use crate::{ advance::{AdvanceTsWorker, LeadershipResolver, DEFAULT_CHECK_LEADER_TIMEOUT_DURATION}, cmd::{ChangeLog, ChangeRow}, metrics::*, - resolver::Resolver, + resolver::{LastAttempt, Resolver}, scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, - Error, Result, ON_DROP_WARN_HEAP_SIZE, + Error, Result, TsSource, ON_DROP_WARN_HEAP_SIZE, }; -/// grace period for logging safe-ts and resolved-ts gap in slow log +/// grace period for identifying identifying slow resolved-ts and safe-ts. const SLOW_LOG_GRACE_PERIOD_MS: u64 = 1000; const MEMORY_QUOTA_EXCEEDED_BACKOFF: Duration = Duration::from_secs(30); @@ -386,6 +389,265 @@ pub struct Endpoint { _phantom: PhantomData<(T, E)>, } +// methods that are used for metrics and logging +impl Endpoint +where + T: 'static + CdcHandle, + E: KvEngine, + S: StoreRegionMeta, +{ + fn is_leader(&self, store_id: Option, leader_store_id: Option) -> bool { + store_id.is_some() && store_id == leader_store_id + } + + fn collect_stats(&mut self) -> Stats { + let store_id = self.get_or_init_store_id(); + let mut stats = Stats::default(); + self.region_read_progress.with(|registry| { + for (region_id, read_progress) in registry { + let (leader_info, leader_store_id) = read_progress.dump_leader_info(); + let core = read_progress.get_core(); + let resolved_ts = leader_info.get_read_state().get_safe_ts(); + let safe_ts = core.read_state().ts; + + if resolved_ts == 0 { + stats.zero_ts_count += 1; + continue; + } + + if self.is_leader(store_id, leader_store_id) { + // leader resolved-ts + if resolved_ts < stats.min_leader_resolved_ts.resolved_ts { + let resolver = self.regions.get(region_id).map(|x| &x.resolver); + stats + .min_leader_resolved_ts + .set(*region_id, resolver, &core, &leader_info); + } + } else { + // follower safe-ts + if safe_ts > 0 && safe_ts < stats.min_follower_safe_ts.safe_ts { + stats.min_follower_safe_ts.set(*region_id, &core); + } + + // follower resolved-ts + if resolved_ts < stats.min_follower_resolved_ts.resolved_ts { + stats.min_follower_resolved_ts.set(*region_id, &core); + } + } + } + }); + + stats.resolver = self.collect_resolver_stats(); + stats.cm_min_lock = self.advance_worker.concurrency_manager.global_min_lock(); + stats + } + + fn collect_resolver_stats(&mut self) -> ResolverStats { + let mut stats = ResolverStats::default(); + for observed_region in self.regions.values() { + match &observed_region.resolver_status { + ResolverStatus::Pending { locks, .. } => { + for l in locks { + match l { + PendingLock::Track { key, .. } => stats.heap_size += key.len() as i64, + PendingLock::Untrack { key, .. } => stats.heap_size += key.len() as i64, + } + } + stats.unresolved_count += 1; + } + ResolverStatus::Ready { .. } => { + stats.heap_size += observed_region.resolver.approximate_heap_bytes() as i64; + stats.resolved_count += 1; + } + } + } + stats + } + + fn update_metrics(&self, stats: &Stats) { + let now = self.approximate_now_tso(); + // general + if stats.min_follower_resolved_ts.resolved_ts < stats.min_leader_resolved_ts.resolved_ts { + RTS_MIN_RESOLVED_TS.set(stats.min_follower_resolved_ts.resolved_ts as i64); + RTS_MIN_RESOLVED_TS_GAP.set(now.saturating_sub( + TimeStamp::from(stats.min_follower_resolved_ts.resolved_ts).physical(), + ) as i64); + RTS_MIN_RESOLVED_TS_REGION.set(stats.min_follower_resolved_ts.region_id as i64); + } else { + RTS_MIN_RESOLVED_TS.set(stats.min_leader_resolved_ts.resolved_ts as i64); + RTS_MIN_RESOLVED_TS_GAP.set(now.saturating_sub( + TimeStamp::from(stats.min_leader_resolved_ts.resolved_ts).physical(), + ) as i64); + RTS_MIN_RESOLVED_TS_REGION.set(stats.min_leader_resolved_ts.region_id as i64); + } + RTS_ZERO_RESOLVED_TS.set(stats.zero_ts_count); + + RTS_LOCK_HEAP_BYTES_GAUGE.set(stats.resolver.heap_size); + RTS_REGION_RESOLVE_STATUS_GAUGE_VEC + .with_label_values(&["resolved"]) + .set(stats.resolver.resolved_count); + RTS_REGION_RESOLVE_STATUS_GAUGE_VEC + .with_label_values(&["unresolved"]) + .set(stats.resolver.unresolved_count); + + CONCURRENCY_MANAGER_MIN_LOCK_TS.set( + stats + .cm_min_lock + .clone() + .map(|(ts, _)| ts.into_inner()) + .unwrap_or_default() as i64, + ); + + // min follower safe ts + RTS_MIN_FOLLOWER_SAFE_TS_REGION.set(stats.min_follower_safe_ts.region_id as i64); + RTS_MIN_FOLLOWER_SAFE_TS.set(stats.min_follower_safe_ts.safe_ts as i64); + RTS_MIN_FOLLOWER_SAFE_TS_GAP.set( + now.saturating_sub(TimeStamp::from(stats.min_follower_safe_ts.safe_ts).physical()) + as i64, + ); + RTS_MIN_FOLLOWER_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER.set( + stats + .min_follower_safe_ts + .duration_to_last_consume_leader + .map(|x| x as i64) + .unwrap_or(-1), + ); + + // min leader resolved ts + RTS_MIN_LEADER_RESOLVED_TS.set(stats.min_leader_resolved_ts.resolved_ts as i64); + RTS_MIN_LEADER_RESOLVED_TS_REGION.set(stats.min_leader_resolved_ts.region_id as i64); + RTS_MIN_LEADER_RESOLVED_TS_REGION_MIN_LOCK_TS.set( + stats + .min_leader_resolved_ts + .min_lock + .as_ref() + .map(|(ts, _)| (*ts).into_inner() as i64) + .unwrap_or(-1), + ); + RTS_MIN_LEADER_RESOLVED_TS_GAP + .set(now.saturating_sub( + TimeStamp::from(stats.min_leader_resolved_ts.resolved_ts).physical(), + ) as i64); + RTS_MIN_LEADER_DUATION_TO_LAST_UPDATE_SAFE_TS.set( + stats + .min_leader_resolved_ts + .duration_to_last_update_ms + .map(|x| x as i64) + .unwrap_or(-1), + ); + + // min follower resolved ts + RTS_MIN_FOLLOWER_RESOLVED_TS.set(stats.min_follower_resolved_ts.resolved_ts as i64); + RTS_MIN_FOLLOWER_RESOLVED_TS_REGION.set(stats.min_follower_resolved_ts.region_id as i64); + RTS_MIN_FOLLOWER_RESOLVED_TS_GAP.set( + now.saturating_sub( + TimeStamp::from(stats.min_follower_resolved_ts.resolved_ts).physical(), + ) as i64, + ); + RTS_MIN_FOLLOWER_RESOLVED_TS_DURATION_TO_LAST_CONSUME_LEADER.set( + stats + .min_follower_resolved_ts + .duration_to_last_consume_leader + .map(|x| x as i64) + .unwrap_or(-1), + ); + } + + // Approximate a TSO from PD. It is better than local timestamp when clock skew + // exists. + // Returns the physical part. + fn approximate_now_tso(&self) -> u64 { + self.advance_worker + .last_pd_tso + .try_lock() + .map(|opt| { + opt.map(|(pd_ts, instant)| { + pd_ts.physical() + instant.saturating_elapsed().as_millis() as u64 + }) + .unwrap_or_else(|| TimeStamp::physical_now()) + }) + .unwrap_or_else(|_| TimeStamp::physical_now()) + } + + fn log_slow_regions(&self, stats: &Stats) { + let expected_interval = min( + self.cfg.advance_ts_interval.as_millis(), + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION.as_millis() as u64, + ) + self.cfg.advance_ts_interval.as_millis(); + let leader_threshold = expected_interval + SLOW_LOG_GRACE_PERIOD_MS; + let follower_threshold = 2 * expected_interval + SLOW_LOG_GRACE_PERIOD_MS; + let now = self.approximate_now_tso(); + + // min leader resolved ts + let min_leader_resolved_ts_gap = now + .saturating_sub(TimeStamp::from(stats.min_leader_resolved_ts.resolved_ts).physical()); + if min_leader_resolved_ts_gap > leader_threshold { + info!( + "the max gap of leader resolved-ts is large"; + "region_id" => stats.min_leader_resolved_ts.region_id, + "gap" => format!("{}ms", min_leader_resolved_ts_gap), + "read_state" => ?stats.min_leader_resolved_ts.read_state, + "applied_index" => stats.min_leader_resolved_ts.applied_index, + "min_lock" => ?stats.min_leader_resolved_ts.min_lock, + "lock_num" => stats.min_leader_resolved_ts.lock_num, + "txn_num" => stats.min_leader_resolved_ts.txn_num, + "min_memory_lock" => ?stats.cm_min_lock, + "duration_to_last_update_safe_ts" => match stats.min_leader_resolved_ts.duration_to_last_update_ms { + Some(d) => format!("{}ms", d), + None => "none".to_owned(), + }, + "last_resolve_attempt" => &stats.min_leader_resolved_ts.last_resolve_attempt, + ); + } + + // min follower safe ts + let min_follower_safe_ts_gap = + now.saturating_sub(TimeStamp::from(stats.min_follower_safe_ts.safe_ts).physical()); + if min_follower_safe_ts_gap > follower_threshold { + info!( + "the max gap of follower safe-ts is large"; + "region_id" => stats.min_follower_safe_ts.region_id, + "gap" => format!("{}ms", min_follower_safe_ts_gap), + "safe_ts" => stats.min_follower_safe_ts.safe_ts, + "resolved_ts" => stats.min_follower_safe_ts.resolved_ts, + "duration_to_last_consume_leader" => match stats.min_follower_safe_ts.duration_to_last_consume_leader { + Some(d) => format!("{}ms", d), + None => "none".to_owned(), + }, + "applied_index" => stats.min_follower_safe_ts.applied_index, + "latest_candidate" => ?stats.min_follower_safe_ts.latest_candidate, + "oldest_candidate" => ?stats.min_follower_safe_ts.oldest_candidate, + ); + } + + // min follower resolved ts + let min_follower_resolved_ts_gap = now + .saturating_sub(TimeStamp::from(stats.min_follower_resolved_ts.resolved_ts).physical()); + if min_follower_resolved_ts_gap > follower_threshold { + if stats.min_follower_resolved_ts.region_id == stats.min_follower_safe_ts.region_id { + info!( + "the max gap of follower resolved-ts is large; it's the same region that has the min safe-ts" + ); + } else { + info!( + "the max gap of follower resolved-ts is large"; + "region_id" => stats.min_follower_resolved_ts.region_id, + "gap" => format!("{}ms", min_follower_resolved_ts_gap), + "safe_ts" => stats.min_follower_resolved_ts.safe_ts, + "resolved_ts" => stats.min_follower_resolved_ts.resolved_ts, + "duration_to_last_consume_leader" => match stats.min_follower_resolved_ts.duration_to_last_consume_leader { + Some(d) => format!("{}ms", d), + None => "none".to_owned(), + }, + "applied_index" => stats.min_follower_resolved_ts.applied_index, + "latest_candidate" => ?stats.min_follower_resolved_ts.latest_candidate, + "oldest_candidate" => ?stats.min_follower_resolved_ts.oldest_candidate, + ); + } + } + } +} + impl Endpoint where T: 'static + CdcHandle, @@ -623,7 +885,12 @@ where // Update advanced resolved ts. // Must ensure all regions are leaders at the point of ts. - fn handle_resolved_ts_advanced(&mut self, regions: Vec, ts: TimeStamp) { + fn handle_resolved_ts_advanced( + &mut self, + regions: Vec, + ts: TimeStamp, + ts_source: TsSource, + ) { if regions.is_empty() { return; } @@ -631,7 +898,9 @@ where for region_id in regions.iter() { if let Some(observe_region) = self.regions.get_mut(region_id) { if let ResolverStatus::Ready = observe_region.resolver_status { - let _ = observe_region.resolver.resolve(ts, Some(now)); + let _ = observe_region + .resolver + .resolve(ts, Some(now), ts_source.clone()); } } } @@ -776,6 +1045,7 @@ pub enum Task { ResolvedTsAdvanced { regions: Vec, ts: TimeStamp, + ts_source: TsSource, }, ChangeLog { cmd_batch: Vec, @@ -830,10 +1100,12 @@ impl fmt::Debug for Task { Task::ResolvedTsAdvanced { ref regions, ref ts, + ref ts_source, } => de .field("name", &"advance_resolved_ts") .field("regions", ®ions) .field("ts", &ts) + .field("ts_source", &ts_source.label()) .finish(), Task::ChangeLog { .. } => de.field("name", &"change_log").finish(), Task::ScanLocks { @@ -890,9 +1162,11 @@ where Task::AdvanceResolvedTs { leader_resolver } => { self.handle_advance_resolved_ts(leader_resolver) } - Task::ResolvedTsAdvanced { regions, ts } => { - self.handle_resolved_ts_advanced(regions, ts) - } + Task::ResolvedTsAdvanced { + regions, + ts, + ts_source, + } => self.handle_resolved_ts_advanced(regions, ts, ts_source), Task::ChangeLog { cmd_batch } => self.handle_change_log(cmd_batch), Task::ScanLocks { region_id, @@ -928,6 +1202,138 @@ impl ConfigManager for ResolvedTsConfigManager { } } +#[derive(Default)] +struct Stats { + // stats for metrics + zero_ts_count: i64, + min_leader_resolved_ts: LeaderStats, + min_follower_safe_ts: FollowerStats, + min_follower_resolved_ts: FollowerStats, + resolver: ResolverStats, + // we don't care about min_safe_ts_leader, because safe_ts should be equal to resolved_ts in + // leaders + // The min memory lock in concurrency manager. + cm_min_lock: Option<(TimeStamp, Key)>, +} + +struct LeaderStats { + region_id: u64, + resolved_ts: u64, + read_state: ReadState, + duration_to_last_update_ms: Option, + last_resolve_attempt: Option, + applied_index: u64, + // min lock in LOCK CF + min_lock: Option<(TimeStamp, Key)>, + lock_num: Option, + txn_num: Option, +} + +impl Default for LeaderStats { + fn default() -> Self { + Self { + region_id: 0, + resolved_ts: u64::MAX, + read_state: ReadState::default(), + duration_to_last_update_ms: None, + applied_index: 0, + last_resolve_attempt: None, + min_lock: None, + lock_num: None, + txn_num: None, + } + } +} + +impl LeaderStats { + fn set( + &mut self, + region_id: u64, + resolver: Option<&Resolver>, + region_read_progress: &MutexGuard<'_, RegionReadProgressCore>, + leader_info: &LeaderInfo, + ) { + *self = LeaderStats { + region_id, + resolved_ts: leader_info.get_read_state().get_safe_ts(), + read_state: region_read_progress.read_state().clone(), + duration_to_last_update_ms: region_read_progress + .last_instant_of_update_ts() + .map(|i| i.saturating_elapsed().as_millis() as u64), + last_resolve_attempt: resolver.and_then(|r| r.last_attempt.clone()), + min_lock: resolver.and_then(|r| { + r.oldest_transaction().map(|(ts, keys)| { + ( + *ts, + keys.iter() + .next() + .map(|k| Key::from_encoded_slice(k.as_ref())) + .unwrap_or_else(|| Key::from_encoded_slice("no_keys_found".as_ref())), + ) + }) + }), + applied_index: region_read_progress.applied_index(), + lock_num: resolver.map(|r| r.num_locks()), + txn_num: resolver.map(|r| r.num_transactions()), + }; + } +} + +struct FollowerStats { + region_id: u64, + resolved_ts: u64, + safe_ts: u64, + latest_candidate: Option, + oldest_candidate: Option, + applied_index: u64, + duration_to_last_consume_leader: Option, +} + +impl Default for FollowerStats { + fn default() -> Self { + Self { + region_id: 0, + safe_ts: u64::MAX, + resolved_ts: u64::MAX, + latest_candidate: None, + oldest_candidate: None, + applied_index: 0, + duration_to_last_consume_leader: None, + } + } +} + +impl FollowerStats { + fn set( + &mut self, + region_id: u64, + region_read_progress: &MutexGuard<'_, RegionReadProgressCore>, + ) { + let read_state = region_read_progress.read_state(); + *self = FollowerStats { + region_id, + resolved_ts: region_read_progress + .get_leader_info() + .get_read_state() + .get_safe_ts(), + safe_ts: read_state.ts, + applied_index: region_read_progress.applied_index(), + latest_candidate: region_read_progress.pending_items().back().cloned(), + oldest_candidate: region_read_progress.pending_items().front().cloned(), + duration_to_last_consume_leader: region_read_progress + .last_instant_of_consume_leader() + .map(|i| i.saturating_elapsed().as_millis() as u64), + }; + } +} + +#[derive(Default)] +struct ResolverStats { + resolved_count: i64, + unresolved_count: i64, + heap_size: i64, +} + const METRICS_FLUSH_INTERVAL: u64 = 10_000; // 10s impl RunnableWithTimer for Endpoint @@ -937,138 +1343,9 @@ where S: StoreRegionMeta, { fn on_timeout(&mut self) { - let store_id = self.get_or_init_store_id(); - let (mut oldest_ts, mut oldest_region, mut zero_ts_count) = (u64::MAX, 0, 0); - let (mut oldest_leader_ts, mut oldest_leader_region) = (u64::MAX, 0); - let (mut oldest_safe_ts, mut oldest_safe_ts_region) = (u64::MAX, 0); - let mut oldest_duration_to_last_update_ms = 0; - let mut oldest_duration_to_last_consume_leader_ms = 0; - self.region_read_progress.with(|registry| { - for (region_id, read_progress) in registry { - let safe_ts = read_progress.safe_ts(); - if safe_ts > 0 && safe_ts < oldest_safe_ts { - oldest_safe_ts = safe_ts; - oldest_safe_ts_region = *region_id; - } - - let (leader_info, leader_store_id) = read_progress.dump_leader_info(); - // this is maximum resolved-ts pushed to region_read_progress, namely candidates - // of safe_ts. It may not be the safe_ts yet - let ts = leader_info.get_read_state().get_safe_ts(); - if ts == 0 { - zero_ts_count += 1; - continue; - } - if ts < oldest_ts { - oldest_ts = ts; - oldest_region = *region_id; - // use -1 to denote none. - oldest_duration_to_last_update_ms = read_progress - .get_core() - .last_instant_of_consume_leader() - .map(|t| t.saturating_elapsed().as_millis() as i64) - .unwrap_or(-1); - oldest_duration_to_last_consume_leader_ms = read_progress - .get_core() - .last_instant_of_consume_leader() - .map(|t| t.saturating_elapsed().as_millis() as i64) - .unwrap_or(-1); - } - - if let (Some(store_id), Some(leader_store_id)) = (store_id, leader_store_id) { - if leader_store_id == store_id && ts < oldest_leader_ts { - oldest_leader_ts = ts; - oldest_leader_region = *region_id; - } - } - } - }); - let mut lock_heap_size = 0; - let (mut resolved_count, mut unresolved_count) = (0, 0); - for observe_region in self.regions.values() { - match &observe_region.resolver_status { - ResolverStatus::Pending { locks, .. } => { - for l in locks { - match l { - PendingLock::Track { key, .. } => lock_heap_size += key.len(), - PendingLock::Untrack { key, .. } => lock_heap_size += key.len(), - } - } - unresolved_count += 1; - } - ResolverStatus::Ready { .. } => { - lock_heap_size += observe_region.resolver.approximate_heap_bytes(); - resolved_count += 1; - } - } - } - // approximate a TSO from PD. It is better than local timestamp when clock skew - // exists. - let now: u64 = self - .advance_worker - .last_pd_tso - .try_lock() - .map(|opt| { - opt.map(|(pd_ts, instant)| { - pd_ts.physical() + instant.saturating_elapsed().as_millis() as u64 - }) - .unwrap_or_else(|| TimeStamp::physical_now()) - }) - .unwrap_or_else(|_| TimeStamp::physical_now()); - - RTS_MIN_SAFE_TS.set(oldest_safe_ts as i64); - RTS_MIN_SAFE_TS_REGION.set(oldest_safe_ts_region as i64); - let safe_ts_gap = now.saturating_sub(TimeStamp::from(oldest_safe_ts).physical()); - if safe_ts_gap - > self.cfg.advance_ts_interval.as_millis() - + DEFAULT_CHECK_LEADER_TIMEOUT_DURATION.as_millis() as u64 - + SLOW_LOG_GRACE_PERIOD_MS - { - let mut lock_num = None; - let mut min_start_ts = None; - if let Some(ob) = self.regions.get(&oldest_safe_ts_region) { - min_start_ts = ob - .resolver - .locks() - .keys() - .next() - .cloned() - .map(TimeStamp::into_inner); - lock_num = Some(ob.resolver.num_locks()); - } - info!( - "the max gap of safe-ts is large"; - "gap" => safe_ts_gap, - "oldest_safe_ts" => ?oldest_safe_ts, - "region_id" => oldest_safe_ts_region, - "advance_ts_interval" => ?self.cfg.advance_ts_interval, - "lock_num" => lock_num, - "min_start_ts" => min_start_ts, - ); - } - RTS_MIN_SAFE_TS_GAP.set(safe_ts_gap as i64); - RTS_MIN_SAFE_TS_DUATION_TO_UPDATE_SAFE_TS.set(oldest_duration_to_last_update_ms); - RTS_MIN_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER - .set(oldest_duration_to_last_consume_leader_ms); - - RTS_MIN_RESOLVED_TS_REGION.set(oldest_region as i64); - RTS_MIN_RESOLVED_TS.set(oldest_ts as i64); - RTS_ZERO_RESOLVED_TS.set(zero_ts_count as i64); - RTS_MIN_RESOLVED_TS_GAP - .set(now.saturating_sub(TimeStamp::from(oldest_ts).physical()) as i64); - - RTS_MIN_LEADER_RESOLVED_TS_REGION.set(oldest_leader_region as i64); - RTS_MIN_LEADER_RESOLVED_TS.set(oldest_leader_ts as i64); - RTS_MIN_LEADER_RESOLVED_TS_GAP - .set(now.saturating_sub(TimeStamp::from(oldest_leader_ts).physical()) as i64); - - RTS_LOCK_HEAP_BYTES_GAUGE.set(lock_heap_size as i64); - RTS_REGION_RESOLVE_STATUS_GAUGE_VEC - .with_label_values(&["resolved"]) - .set(resolved_count as _); - RTS_REGION_RESOLVE_STATUS_GAUGE_VEC - .with_label_values(&["unresolved"]) - .set(unresolved_count as _); + let stats = self.collect_stats(); + self.update_metrics(&stats); + self.log_slow_regions(&stats); } fn get_interval(&self) -> Duration { diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 74da743952c..02bb92f7887 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -38,7 +38,7 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_resolved_ts_gap_millis", - "The minimal (non-zero) resolved ts gap for observed regions" + "The gap between now() and the minimal (non-zero) resolved ts" ) .unwrap(); pub static ref RTS_RESOLVED_FAIL_ADVANCE_VEC: IntCounterVec = register_int_counter_vec!( @@ -69,29 +69,29 @@ lazy_static! { "The minimal (non-zero) resolved ts for observed regions" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_REGION: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_region", - "The region which has minimal safe ts" + pub static ref RTS_MIN_FOLLOWER_SAFE_TS_REGION: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts_region", + "The region id of the follower that has minimal safe ts" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts", - "The minimal (non-zero) safe ts for observed regions" + pub static ref RTS_MIN_FOLLOWER_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts", + "The minimal (non-zero) safe ts for followers" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_GAP: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_gap_millis", - "The minimal (non-zero) safe ts gap for observed regions" + pub static ref RTS_MIN_FOLLOWER_SAFE_TS_GAP: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts_gap_millis", + "The gap between now() and the minimal (non-zero) safe ts for followers" ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_DUATION_TO_UPDATE_SAFE_TS: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_duration_to_update_safe_ts", - "The duration since last update_safe_ts() called by resolved-ts routine. -1 denotes None." + pub static ref RTS_MIN_LEADER_DUATION_TO_LAST_UPDATE_SAFE_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_leader_min_resolved_ts_duration_to_last_update_safe_ts", + "The duration since last update_safe_ts() called by resolved-ts routine in the leader with min resolved ts. -1 denotes None." ) .unwrap(); - pub static ref RTS_MIN_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( - "tikv_resolved_ts_min_safe_ts_duration_to_last_consume_leader", - "The duration since last check_leader(). -1 denotes None." + pub static ref RTS_MIN_FOLLOWER_SAFE_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_safe_ts_duration_to_last_consume_leader", + "The duration since last check_leader() in the follower region with min safe ts. -1 denotes None." ) .unwrap(); pub static ref RTS_ZERO_RESOLVED_TS: IntGauge = register_int_gauge!( @@ -125,7 +125,17 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_LEADER_RESOLVED_TS_REGION: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_leader_resolved_ts_region", - "The region which its leader peer has minimal resolved ts" + "The region whose leader peer has minimal resolved ts" + ) + .unwrap(); + pub static ref RTS_MIN_LEADER_RESOLVED_TS_REGION_MIN_LOCK_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_leader_resolved_ts_region_min_lock_ts", + "The minimal lock ts for the region whose leader peer has minimal resolved ts. 0 means no lock. -1 means no region found." + ) + .unwrap(); + pub static ref CONCURRENCY_MANAGER_MIN_LOCK_TS: IntGauge = register_int_gauge!( + "tikv_concurrency_manager_min_lock_ts", + "The minimal lock ts in concurrency manager. 0 means no lock." ) .unwrap(); pub static ref RTS_MIN_LEADER_RESOLVED_TS: IntGauge = register_int_gauge!( @@ -135,7 +145,29 @@ lazy_static! { .unwrap(); pub static ref RTS_MIN_LEADER_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( "tikv_resolved_ts_min_leader_resolved_ts_gap_millis", - "The minimal (non-zero) resolved ts gap for observe leader peers" + "The gap between now() and the minimal (non-zero) resolved ts for leader peers" + ) + .unwrap(); + + // for min_follower_resolved_ts + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS_REGION: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts_region", + "The region id of the follower has minimal resolved ts" + ) + .unwrap(); + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts", + "The minimal (non-zero) resolved ts for follower regions" + ) + .unwrap(); + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS_GAP: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts_gap_millis", + "The max gap of now() and the minimal (non-zero) resolved ts for follower regions" + ) + .unwrap(); + pub static ref RTS_MIN_FOLLOWER_RESOLVED_TS_DURATION_TO_LAST_CONSUME_LEADER: IntGauge = register_int_gauge!( + "tikv_resolved_ts_min_follower_resolved_ts_duration_to_last_consume_leader", + "The duration since last check_leader() in the follower region with min resolved ts. -1 denotes None." ) .unwrap(); pub static ref RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM: Histogram = register_histogram!( diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index ef257ad4762..e0814176a92 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -8,13 +8,46 @@ use tikv_util::{ memory::{HeapSize, MemoryQuota}, time::Instant, }; -use txn_types::TimeStamp; +use txn_types::{Key, TimeStamp}; use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB +#[derive(Clone)] +pub enum TsSource { + // A lock in LOCK CF + Lock(Arc<[u8]>), + // A memory lock in concurrency manager + MemoryLock(Key), + PdTso, + // The following sources can also come from PD or memory lock, but we care more about sources + // in resolved-ts. + BackupStream, + Cdc, +} + +impl TsSource { + pub fn label(&self) -> &str { + match self { + TsSource::Lock(_) => "lock", + TsSource::MemoryLock(_) => "rts_cm_min_lock", + TsSource::PdTso => "pd_tso", + TsSource::BackupStream => "backup_stream", + TsSource::Cdc => "cdc", + } + } + + pub fn key(&self) -> Option { + match self { + TsSource::Lock(k) => Some(Key::from_encoded_slice(k)), + TsSource::MemoryLock(k) => Some(k.clone()), + _ => None, + } + } +} + // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. pub struct Resolver { @@ -22,7 +55,7 @@ pub struct Resolver { // key -> start_ts locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. - lock_ts_heap: BTreeMap>>, + pub(crate) lock_ts_heap: BTreeMap>>, // The last shrink time. last_aggressive_shrink_time: Instant, // The timestamps that guarantees no more commit will happen before. @@ -35,14 +68,42 @@ pub struct Resolver { min_ts: TimeStamp, // Whether the `Resolver` is stopped stopped: bool, - // The memory quota for the `Resolver` and its lock keys and timestamps. memory_quota: Arc, + // The last attempt of resolve(), used for diagnosis. + pub(crate) last_attempt: Option, +} + +#[derive(Clone)] +pub(crate) struct LastAttempt { + success: bool, + ts: TimeStamp, + reason: TsSource, +} + +impl slog::Value for LastAttempt { + fn serialize( + &self, + _record: &slog::Record<'_>, + key: slog::Key, + serializer: &mut dyn slog::Serializer, + ) -> slog::Result { + serializer.emit_arguments( + key, + &format_args!( + "{{ success={}, ts={}, reason={}, key={:?} }}", + self.success, + self.ts, + self.reason.label(), + self.reason.key(), + ), + ) + } } impl std::fmt::Debug for Resolver { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let far_lock = self.lock_ts_heap.iter().next(); + let far_lock = self.oldest_transaction(); let mut dt = f.debug_tuple("Resolver"); dt.field(&format_args!("region={}", self.region_id)); @@ -103,6 +164,7 @@ impl Resolver { min_ts: TimeStamp::zero(), stopped: false, memory_quota, + last_attempt: None, } } @@ -252,7 +314,12 @@ impl Resolver { /// /// `min_ts` advances the resolver even if there is no write. /// Return None means the resolver is not initialized. - pub fn resolve(&mut self, min_ts: TimeStamp, now: Option) -> TimeStamp { + pub fn resolve( + &mut self, + min_ts: TimeStamp, + now: Option, + source: TsSource, + ) -> TimeStamp { // Use a small ratio to shrink the memory usage aggressively. const AGGRESSIVE_SHRINK_RATIO: usize = 2; const AGGRESSIVE_SHRINK_INTERVAL: Duration = Duration::from_secs(10); @@ -268,17 +335,36 @@ impl Resolver { } // Find the min start ts. - let min_lock = self.lock_ts_heap.keys().next().cloned(); + let min_lock = self + .oldest_transaction() + .and_then(|(ts, locks)| locks.iter().next().map(|lock| (*ts, lock))); let has_lock = min_lock.is_some(); - let min_start_ts = min_lock.unwrap_or(min_ts); + let min_start_ts = min_lock.map(|(ts, _)| ts).unwrap_or(min_ts); // No more commit happens before the ts. let new_resolved_ts = cmp::min(min_start_ts, min_ts); + // reason is the min source of the new resolved ts. + let reason = match (min_lock, min_ts) { + (Some(lock), min_ts) if lock.0 < min_ts => TsSource::Lock(lock.1.clone()), + (Some(_), _) => source, + (None, _) => source, + }; + if self.resolved_ts >= new_resolved_ts { - let label = if has_lock { "has_lock" } else { "stale_ts" }; RTS_RESOLVED_FAIL_ADVANCE_VEC - .with_label_values(&[label]) + .with_label_values(&[reason.label()]) .inc(); + self.last_attempt = Some(LastAttempt { + success: false, + ts: new_resolved_ts, + reason, + }); + } else { + self.last_attempt = Some(LastAttempt { + success: true, + ts: new_resolved_ts, + reason, + }) } // Resolved ts never decrease. @@ -335,6 +421,10 @@ impl Resolver { pub(crate) fn read_progress(&self) -> Option<&Arc> { self.read_progress.as_ref() } + + pub(crate) fn oldest_transaction(&self) -> Option<(&TimeStamp, &HashSet>)> { + self.lock_ts_heap.iter().next() + } } #[cfg(test)] @@ -419,7 +509,7 @@ mod tests { Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { assert_eq!( - resolver.resolve(min_ts.into(), None), + resolver.resolve(min_ts.into(), None, TsSource::PdTso), expect.into(), "case {}", i @@ -501,7 +591,7 @@ mod tests { // Trigger aggressive shrink. resolver.last_aggressive_shrink_time = Instant::now_coarse() - Duration::from_secs(600); - resolver.resolve(TimeStamp::new(0), None); + resolver.resolve(TimeStamp::new(0), None, TsSource::PdTso); assert!( resolver.locks_by_key.capacity() == 0, "{}, {}", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c78540c601a..ceed5c6314c 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -39068,7 +39068,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between safe ts and current time", + "description": "The gap between now() and the minimal (non-zero) safe ts for followers", "editable": true, "error": false, "fieldConfig": { @@ -39119,7 +39119,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -39132,7 +39132,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Max gap of safe-ts", + "title": "Max gap of follower safe-ts", "tooltip": { "msResolution": false, "shared": true, @@ -39292,7 +39292,7 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The region that has minimal safe ts", + "description": "The region id of the follower that has minimal safe ts", "editable": true, "error": false, "fieldConfig": { @@ -39348,7 +39348,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", @@ -39362,7 +39362,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Min Safe TS Region", + "title": "Min Safe TS Follower Region", "tooltip": { "msResolution": false, "shared": true, From 1abc220dca85950a728c7be06f469870373fb463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E8=B6=85?= Date: Wed, 6 Sep 2023 14:48:43 +0800 Subject: [PATCH 0902/1149] coprocessor: add SQL statement tracing in tikv slow log (#15514) close tikv/tikv#15513 coprocessor: add SQL statement tracing in tikv slow log Signed-off-by: Chao Wang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/tracker.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index 71d84388c3b..bb32a3a0e03 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -264,8 +264,11 @@ impl Tracker { .unwrap_or_default() }); + let source_stmt = self.req_ctx.context.get_source_stmt(); with_tls_tracker(|tracker| { info!(#"slow_log", "slow-query"; + "connection_id" => source_stmt.get_connection_id(), + "session_alias" => source_stmt.get_session_alias(), "region_id" => &self.req_ctx.context.get_region_id(), "remote_host" => &self.req_ctx.peer, "total_lifetime" => ?self.req_lifetime, From fd896513d1c1bf274cf11acae1a09b6034b3c149 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 7 Sep 2023 15:00:44 +0800 Subject: [PATCH 0903/1149] engine_rocks: trace all memtables including pinned (#15547) close tikv/tikv#15546 Signed-off-by: Neil Shen --- components/engine_rocks/src/rocks_metrics.rs | 15 ++++++++++++--- components/engine_rocks/src/rocks_metrics_defs.rs | 1 + metrics/grafana/tikv_details.json | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 522696cb150..2b32af111ec 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -920,6 +920,7 @@ struct CfStats { blob_cache_size: Option, readers_mem: Option, mem_tables: Option, + mem_tables_all: Option, num_keys: Option, pending_compaction_bytes: Option, num_immutable_mem_table: Option, @@ -978,6 +979,9 @@ impl StatisticsReporter for RocksStatisticsReporter { if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_CUR_SIZE_ALL_MEM_TABLES) { *cf_stats.mem_tables.get_or_insert_default() += v; } + if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_SIZE_ALL_MEM_TABLES) { + *cf_stats.mem_tables_all.get_or_insert_default() += v; + } // TODO: add cache usage and pinned usage. if let Some(v) = db.get_property_int_cf(handle, ROCKSDB_ESTIMATE_NUM_KEYS) { *cf_stats.num_keys.get_or_insert_default() += v; @@ -1119,6 +1123,11 @@ impl StatisticsReporter for RocksStatisticsReporter { .with_label_values(&[&self.name, cf, "mem-tables"]) .set(v as i64); } + if let Some(v) = cf_stats.mem_tables_all { + STORE_ENGINE_MEMORY_GAUGE_VEC + .with_label_values(&[&self.name, cf, "mem-tables-all"]) + .set(v as i64); + } if let Some(v) = cf_stats.num_keys { STORE_ENGINE_ESTIMATE_NUM_KEYS_VEC .with_label_values(&[&self.name, cf]) @@ -1538,9 +1547,9 @@ lazy_static! { "Number of times titan blob file sync is done", &["db"] ).unwrap(); - pub static ref STORE_ENGINE_BLOB_FILE_SYNCED: SimpleEngineTickerMetrics = - auto_flush_from!(STORE_ENGINE_BLOB_FILE_SYNCED_VEC, SimpleEngineTickerMetrics); - + pub static ref STORE_ENGINE_BLOB_FILE_SYNCED: SimpleEngineTickerMetrics = + auto_flush_from!(STORE_ENGINE_BLOB_FILE_SYNCED_VEC, SimpleEngineTickerMetrics); + pub static ref STORE_ENGINE_BLOB_CACHE_EFFICIENCY_VEC: IntCounterVec = register_int_counter_vec!( "tikv_engine_blob_cache_efficiency", "Efficiency of titan's blob cache", diff --git a/components/engine_rocks/src/rocks_metrics_defs.rs b/components/engine_rocks/src/rocks_metrics_defs.rs index 042949f1c09..5bbc6245c72 100644 --- a/components/engine_rocks/src/rocks_metrics_defs.rs +++ b/components/engine_rocks/src/rocks_metrics_defs.rs @@ -5,6 +5,7 @@ use rocksdb::{DBStatisticsHistogramType as HistType, DBStatisticsTickerType as T pub const ROCKSDB_TOTAL_SST_FILES_SIZE: &str = "rocksdb.total-sst-files-size"; pub const ROCKSDB_TABLE_READERS_MEM: &str = "rocksdb.estimate-table-readers-mem"; pub const ROCKSDB_CUR_SIZE_ALL_MEM_TABLES: &str = "rocksdb.cur-size-all-mem-tables"; +pub const ROCKSDB_SIZE_ALL_MEM_TABLES: &str = "rocksdb.size-all-mem-tables"; pub const ROCKSDB_ESTIMATE_NUM_KEYS: &str = "rocksdb.estimate-num-keys"; pub const ROCKSDB_PENDING_COMPACTION_BYTES: &str = "rocksdb.\ estimate-pending-compaction-bytes"; diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index ceed5c6314c..c31ee12b27b 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -31941,7 +31941,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"mem-tables\"}) by (cf)", + "expr": "avg(tikv_engine_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"mem-tables-all\"}) by (cf)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cf}}", From 23c89b3fd2d0395d868b76deb0a0c820c3e48aab Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 7 Sep 2023 15:15:44 +0800 Subject: [PATCH 0904/1149] *: let alloc API return result (#15529) ref tikv/tikv#15412 MemoryQuota alloc API returns result, make it more ergonomic. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 6 +-- components/cdc/src/channel.rs | 30 ++++++++----- components/cdc/src/delegate.rs | 14 ++---- components/cdc/src/endpoint.rs | 4 +- components/cdc/src/errors.rs | 3 +- components/cdc/src/initializer.rs | 4 +- components/resolved_ts/src/endpoint.rs | 36 +++++++--------- components/resolved_ts/src/errors.rs | 3 +- components/resolved_ts/src/resolver.rs | 26 +++++------ components/tikv_util/src/memory.rs | 43 +++++++++++-------- 10 files changed, 87 insertions(+), 82 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index d6d49f0cf1c..4f44ec46853 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -480,7 +480,7 @@ impl TwoPhaseResolver { warn!("backup stream tracking lock as if in phase one"; "start_ts" => %start_ts, "key" => %utils::redact(&key)) } // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(self.resolver.track_lock(start_ts, key, None)); + self.resolver.track_lock(start_ts, key, None).unwrap(); } pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec) { @@ -489,7 +489,7 @@ impl TwoPhaseResolver { return; } // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(self.resolver.track_lock(start_ts, key, None)); + self.resolver.track_lock(start_ts, key, None).unwrap(); } pub fn untrack_lock(&mut self, key: &[u8]) { @@ -505,7 +505,7 @@ impl TwoPhaseResolver { match lock { FutureLock::Lock(key, ts) => { // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - assert!(self.resolver.track_lock(ts, key, None)); + self.resolver.track_lock(ts, key, None).unwrap(); } FutureLock::Unlock(key) => self.resolver.untrack_lock(&key, None), } diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index a3ddeeb9030..b386c3561bb 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -14,7 +14,11 @@ use grpcio::WriteFlags; use kvproto::cdcpb::{ChangeDataEvent, Event, ResolvedTs}; use protobuf::Message; use tikv_util::{ - future::block_on_timeout, impl_display_as_debug, memory::MemoryQuota, time::Instant, warn, + future::block_on_timeout, + impl_display_as_debug, + memory::{MemoryQuota, MemoryQuotaExceeded}, + time::Instant, + warn, }; use crate::metrics::*; @@ -234,6 +238,12 @@ impl_from_future_send_error! { TrySendError<(CdcEvent, usize)>, } +impl From for SendError { + fn from(_: MemoryQuotaExceeded) -> Self { + SendError::Congested + } +} + #[derive(Clone)] pub struct Sink { unbounded_sender: UnboundedSender<(CdcEvent, usize)>, @@ -245,8 +255,8 @@ impl Sink { pub fn unbounded_send(&self, event: CdcEvent, force: bool) -> Result<(), SendError> { // Try it's best to send error events. let bytes = if !force { event.size() as usize } else { 0 }; - if bytes != 0 && !self.memory_quota.alloc(bytes) { - return Err(SendError::Congested); + if bytes != 0 { + self.memory_quota.alloc(bytes)?; } match self.unbounded_sender.unbounded_send((event, bytes)) { Ok(_) => Ok(()), @@ -265,9 +275,7 @@ impl Sink { let bytes = event.size(); total_bytes += bytes; } - if !self.memory_quota.alloc(total_bytes as _) { - return Err(SendError::Congested); - } + self.memory_quota.alloc(total_bytes as _)?; for event in events { let bytes = event.size() as usize; if let Err(e) = self.bounded_sender.feed((event, bytes)).await { @@ -570,9 +578,9 @@ mod tests { } } let memory_quota = rx.memory_quota.clone(); - assert_eq!(memory_quota.alloc(event.size() as _), false,); + memory_quota.alloc(event.size() as _).unwrap_err(); drop(rx); - assert_eq!(memory_quota.alloc(1024), true); + memory_quota.alloc(1024).unwrap(); } // Make sure memory quota is freed when tx is dropped before rx. { @@ -587,10 +595,10 @@ mod tests { } } let memory_quota = rx.memory_quota.clone(); - assert_eq!(memory_quota.alloc(event.size() as _), false,); + memory_quota.alloc(event.size() as _).unwrap_err(); drop(send); drop(rx); - assert_eq!(memory_quota.alloc(1024), true); + memory_quota.alloc(1024).unwrap(); } // Make sure sending message to a closed channel does not leak memory quota. { @@ -602,7 +610,7 @@ mod tests { send(CdcEvent::Event(e.clone())).unwrap_err(); } assert_eq!(memory_quota.in_use(), 0); - assert_eq!(memory_quota.alloc(1024), true); + memory_quota.alloc(1024).unwrap(); // Freeing bytes should not cause overflow. memory_quota.free(1024); diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index f7125aa8882..c82c4cb6f13 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -249,9 +249,7 @@ impl Pending { fn push_pending_lock(&mut self, lock: PendingLock) -> Result<()> { let bytes = lock.heap_size(); - if !self.memory_quota.alloc(bytes) { - return Err(Error::MemoryQuotaExceeded); - } + self.memory_quota.alloc(bytes)?; self.locks.push(lock); self.pending_bytes += bytes; CDC_PENDING_BYTES_GAUGE.add(bytes as i64); @@ -260,16 +258,14 @@ impl Pending { fn on_region_ready(&mut self, resolver: &mut Resolver) -> Result<()> { fail::fail_point!("cdc_pending_on_region_ready", |_| Err( - Error::MemoryQuotaExceeded + Error::MemoryQuotaExceeded(tikv_util::memory::MemoryQuotaExceeded) )); // Must take locks, otherwise it may double free memory quota on drop. for lock in mem::take(&mut self.locks) { self.memory_quota.free(lock.heap_size()); match lock { PendingLock::Track { key, start_ts } => { - if !resolver.track_lock(start_ts, key, None) { - return Err(Error::MemoryQuotaExceeded); - } + resolver.track_lock(start_ts, key, None)?; } PendingLock::Untrack { key } => resolver.untrack_lock(&key, None), } @@ -900,9 +896,7 @@ impl Delegate { // In order to compute resolved ts, we must track inflight txns. match self.resolver { Some(ref mut resolver) => { - if !resolver.track_lock(row.start_ts.into(), row.key.clone(), None) { - return Err(Error::MemoryQuotaExceeded); - } + resolver.track_lock(row.start_ts.into(), row.key.clone(), None)?; } None => { assert!(self.pending.is_some(), "region resolver not ready"); diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 2b314f22443..a5f00a08028 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -2644,7 +2644,9 @@ mod tests { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let mut resolver = Resolver::new(id, memory_quota); - assert!(resolver.track_lock(TimeStamp::compose(0, id), vec![], None)); + resolver + .track_lock(TimeStamp::compose(0, id), vec![], None) + .unwrap(); let mut region = Region::default(); region.id = id; region.set_region_epoch(region_epoch); diff --git a/components/cdc/src/errors.rs b/components/cdc/src/errors.rs index e44c39e3999..e7bd7605e7d 100644 --- a/components/cdc/src/errors.rs +++ b/components/cdc/src/errors.rs @@ -10,6 +10,7 @@ use tikv::storage::{ mvcc::{Error as MvccError, ErrorInner as MvccErrorInner}, txn::{Error as TxnError, ErrorInner as TxnErrorInner}, }; +use tikv_util::memory::MemoryQuotaExceeded; use txn_types::Error as TxnTypesError; use crate::channel::SendError; @@ -36,7 +37,7 @@ pub enum Error { #[error("Sink send error {0:?}")] Sink(#[from] SendError), #[error("Memory quota exceeded")] - MemoryQuotaExceeded, + MemoryQuotaExceeded(#[from] MemoryQuotaExceeded), } macro_rules! impl_from { diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index ef0b15caab9..31cda4b9e72 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -424,9 +424,7 @@ impl Initializer { let lock = Lock::parse(value)?; match lock.lock_type { LockType::Put | LockType::Delete => { - if !resolver.track_lock(lock.ts, key, None) { - return Err(Error::MemoryQuotaExceeded); - } + resolver.track_lock(lock.ts, key, None)?; } _ => (), }; diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index e2d2aec4f70..2a2f56eaadd 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -103,10 +103,10 @@ impl ResolverStatus { }; // Check if adding a new lock or unlock will exceed the memory // quota. - if !memory_quota.alloc(lock.heap_size()) { + memory_quota.alloc(lock.heap_size()).map_err(|e| { fail::fail_point!("resolved_ts_on_pending_locks_memory_quota_exceeded"); - return Err(Error::MemoryQuotaExceeded); - } + Error::MemoryQuotaExceeded(e) + })?; locks.push(lock); Ok(()) } @@ -292,13 +292,11 @@ impl ObserveRegion { for row in rows { match row { ChangeRow::Prewrite { key, start_ts, .. } => { - if !self.resolver.track_lock( + self.resolver.track_lock( *start_ts, key.to_raw().unwrap(), Some(*index), - ) { - return Err(Error::MemoryQuotaExceeded); - } + )?; } ChangeRow::Commit { key, .. } => self .resolver @@ -328,13 +326,11 @@ impl ObserveRegion { panic!("region {:?} resolver has ready", self.meta.id) } for (key, lock) in locks { - if !self.resolver.track_lock( + self.resolver.track_lock( lock.ts, key.to_raw().unwrap(), Some(apply_index), - ) { - return Err(Error::MemoryQuotaExceeded); - } + )?; } } ScanEntry::None => { @@ -347,13 +343,11 @@ impl ObserveRegion { for lock in pending_locks { match lock { PendingLock::Track { key, start_ts } => { - if !self.resolver.track_lock( + self.resolver.track_lock( start_ts, key.to_raw().unwrap(), Some(pending_tracked_index), - ) { - return Err(Error::MemoryQuotaExceeded); - } + )?; } PendingLock::Untrack { key, .. } => self .resolver @@ -924,7 +918,7 @@ where if let Err(e) = observe_region.track_change_log(&logs) { drop(observe_region); let backoff = match e { - Error::MemoryQuotaExceeded => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), + Error::MemoryQuotaExceeded(_) => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), Error::Other(_) => None, }; self.re_register_region(region_id, observe_id, e, backoff); @@ -947,13 +941,13 @@ where entries: Vec, apply_index: u64, ) { - let mut is_memory_quota_exceeded = false; + let mut memory_quota_exceeded = None; if let Some(observe_region) = self.regions.get_mut(®ion_id) { if observe_region.handle.id == observe_id { - if let Err(Error::MemoryQuotaExceeded) = + if let Err(Error::MemoryQuotaExceeded(e)) = observe_region.track_scan_locks(entries, apply_index) { - is_memory_quota_exceeded = true; + memory_quota_exceeded = Some(Error::MemoryQuotaExceeded(e)); } } } else { @@ -961,9 +955,9 @@ where "region_id" => region_id, "observe_id" => ?observe_id); } - if is_memory_quota_exceeded { + if let Some(e) = memory_quota_exceeded { let backoff = Some(MEMORY_QUOTA_EXCEEDED_BACKOFF); - self.re_register_region(region_id, observe_id, Error::MemoryQuotaExceeded, backoff); + self.re_register_region(region_id, observe_id, e, backoff); } } diff --git a/components/resolved_ts/src/errors.rs b/components/resolved_ts/src/errors.rs index b4a59a2c7a0..4e14c1d78d9 100644 --- a/components/resolved_ts/src/errors.rs +++ b/components/resolved_ts/src/errors.rs @@ -1,11 +1,12 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use thiserror::Error; +use tikv_util::memory::MemoryQuotaExceeded; #[derive(Debug, Error)] pub enum Error { #[error("Memory quota exceeded")] - MemoryQuotaExceeded, + MemoryQuotaExceeded(#[from] MemoryQuotaExceeded), #[error("Other error {0}")] Other(#[from] Box), } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index e0814176a92..9a62a0eea98 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -5,7 +5,7 @@ use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; use collections::{HashMap, HashSet}; use raftstore::store::RegionReadProgress; use tikv_util::{ - memory::{HeapSize, MemoryQuota}, + memory::{HeapSize, MemoryQuota, MemoryQuotaExceeded}, time::Instant, }; use txn_types::{Key, TimeStamp}; @@ -245,8 +245,12 @@ impl Resolver { } } - #[must_use] - pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec, index: Option) -> bool { + pub fn track_lock( + &mut self, + start_ts: TimeStamp, + key: Vec, + index: Option, + ) -> Result<(), MemoryQuotaExceeded> { if let Some(index) = index { self.update_tracked_index(index); } @@ -260,13 +264,11 @@ impl Resolver { "memory_capacity" => self.memory_quota.capacity(), "key_heap_size" => bytes, ); - if !self.memory_quota.alloc(bytes) { - return false; - } + self.memory_quota.alloc(bytes)?; let key: Arc<[u8]> = key.into_boxed_slice().into(); self.locks_by_key.insert(key.clone(), start_ts); self.lock_ts_heap.entry(start_ts).or_default().insert(key); - true + Ok(()) } pub fn untrack_lock(&mut self, key: &[u8], index: Option) { @@ -500,11 +502,9 @@ mod tests { for e in case.clone() { match e { Event::Lock(start_ts, key) => { - assert!(resolver.track_lock( - start_ts.into(), - key.into_raw().unwrap(), - None - )); + resolver + .track_lock(start_ts.into(), key.into_raw().unwrap(), None) + .unwrap(); } Event::Unlock(key) => resolver.untrack_lock(&key.into_raw().unwrap(), None), Event::Resolve(min_ts, expect) => { @@ -527,7 +527,7 @@ mod tests { let mut key = vec![0; 77]; let lock_size = resolver.lock_heap_size(&key); let mut ts = TimeStamp::default(); - while resolver.track_lock(ts, key.clone(), None) { + while resolver.track_lock(ts, key.clone(), None).is_ok() { ts.incr(); key[0..8].copy_from_slice(&ts.into_inner().to_be_bytes()); } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 17b6b23cf78..291254c5227 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -75,16 +75,23 @@ impl HeapSize for RaftCmdRequest { } } +#[derive(Debug)] +pub struct MemoryQuotaExceeded; + +impl std::error::Error for MemoryQuotaExceeded {} + +impl_display_as_debug!(MemoryQuotaExceeded); + pub struct MemoryQuota { - capacity: AtomicUsize, in_use: AtomicUsize, + capacity: AtomicUsize, } impl MemoryQuota { pub fn new(capacity: usize) -> MemoryQuota { MemoryQuota { - capacity: AtomicUsize::new(capacity), in_use: AtomicUsize::new(0), + capacity: AtomicUsize::new(capacity), } } @@ -93,28 +100,28 @@ impl MemoryQuota { } pub fn capacity(&self) -> usize { - self.capacity.load(Ordering::Acquire) + self.capacity.load(Ordering::Relaxed) } pub fn set_capacity(&self, capacity: usize) { - self.capacity.store(capacity, Ordering::Release) + self.capacity.store(capacity, Ordering::Relaxed); } - pub fn alloc(&self, bytes: usize) -> bool { + pub fn alloc(&self, bytes: usize) -> Result<(), MemoryQuotaExceeded> { + let capacity = self.capacity.load(Ordering::Relaxed); let mut in_use_bytes = self.in_use.load(Ordering::Relaxed); - let capacity = self.capacity.load(Ordering::Acquire); loop { if in_use_bytes + bytes > capacity { - return false; + return Err(MemoryQuotaExceeded); } let new_in_use_bytes = in_use_bytes + bytes; match self.in_use.compare_exchange_weak( in_use_bytes, new_in_use_bytes, - Ordering::Acquire, + Ordering::Relaxed, Ordering::Relaxed, ) { - Ok(_) => return true, + Ok(_) => return Ok(()), Err(current) => in_use_bytes = current, } } @@ -128,7 +135,7 @@ impl MemoryQuota { match self.in_use.compare_exchange_weak( in_use_bytes, new_in_use_bytes, - Ordering::Acquire, + Ordering::Relaxed, Ordering::Relaxed, ) { Ok(_) => return, @@ -145,13 +152,13 @@ mod tests { #[test] fn test_memory_quota() { let quota = MemoryQuota::new(100); - assert!(quota.alloc(10)); + quota.alloc(10).unwrap(); assert_eq!(quota.in_use(), 10); - assert!(!quota.alloc(100)); + quota.alloc(100).unwrap_err(); assert_eq!(quota.in_use(), 10); quota.free(5); assert_eq!(quota.in_use(), 5); - assert!(quota.alloc(95)); + quota.alloc(95).unwrap(); assert_eq!(quota.in_use(), 100); quota.free(95); assert_eq!(quota.in_use(), 5); @@ -160,19 +167,19 @@ mod tests { #[test] fn test_resize_memory_quota() { let quota = MemoryQuota::new(100); - assert!(quota.alloc(10)); + quota.alloc(10).unwrap(); assert_eq!(quota.in_use(), 10); - assert!(!quota.alloc(100)); + quota.alloc(100).unwrap_err(); assert_eq!(quota.in_use(), 10); quota.set_capacity(200); - assert!(quota.alloc(100)); + quota.alloc(100).unwrap(); assert_eq!(quota.in_use(), 110); quota.set_capacity(50); - assert!(!quota.alloc(100)); + quota.alloc(100).unwrap_err(); assert_eq!(quota.in_use(), 110); quota.free(100); assert_eq!(quota.in_use(), 10); - assert!(quota.alloc(40)); + quota.alloc(40).unwrap(); assert_eq!(quota.in_use(), 50); } } From 87d0f7cf143524222b4b0d80a4a8c5e02d11cf67 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 7 Sep 2023 15:44:15 +0800 Subject: [PATCH 0905/1149] raftstore-v2: supplement read track metrics (#15508) ref tikv/tikv#15409 supplement read track metrics Signed-off-by: SpadeA-Tang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 21 ++++++++++++------- .../raftstore-v2/src/operation/query/local.rs | 4 ++++ .../cases/test_read_execution_tracker.rs | 15 +++++++------ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index f6b9217ecbf..d51d8eedb2a 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -9,7 +9,7 @@ use crossbeam::channel::TryRecvError; use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use kvproto::{errorpb, raft_cmdpb::RaftCmdResponse}; -use raftstore::store::{Config, TabletSnapManager, Transport}; +use raftstore::store::{Config, ReadCallback, TabletSnapManager, Transport}; use slog::{debug, info, trace, Logger}; use tikv_util::{ is_zero_duration, @@ -17,6 +17,7 @@ use tikv_util::{ slog_panic, time::{duration_to_sec, Instant}, }; +use tracker::{TrackerToken, GLOBAL_TRACKERS}; use crate::{ batch::StoreContext, @@ -206,11 +207,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, } #[inline] - fn on_receive_command(&self, send_time: Instant) { + fn on_receive_command(&self, send_time: Instant, read_token: Option) { + let propose_wait_time = send_time.saturating_elapsed(); self.store_ctx .raft_metrics .propose_wait_time - .observe(duration_to_sec(send_time.saturating_elapsed())); + .observe(duration_to_sec(propose_wait_time)); + if let Some(token) = read_token { + GLOBAL_TRACKERS.with_tracker(token, |tracker| { + tracker.metrics.read_index_propose_wait_nanos = propose_wait_time.as_nanos() as u64; + }); + } } fn on_tick(&mut self, tick: PeerTick) { @@ -243,17 +250,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.fsm.peer.on_raft_message(self.store_ctx, msg); } PeerMsg::RaftQuery(cmd) => { - self.on_receive_command(cmd.send_time); + self.on_receive_command(cmd.send_time, cmd.ch.read_tracker()); self.on_query(cmd.request, cmd.ch) } PeerMsg::AdminCommand(cmd) => { - self.on_receive_command(cmd.send_time); + self.on_receive_command(cmd.send_time, None); self.fsm .peer_mut() .on_admin_command(self.store_ctx, cmd.request, cmd.ch) } PeerMsg::SimpleWrite(write) => { - self.on_receive_command(write.send_time); + self.on_receive_command(write.send_time, None); self.fsm.peer_mut().on_simple_write( self.store_ctx, write.header, @@ -262,7 +269,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, ); } PeerMsg::UnsafeWrite(write) => { - self.on_receive_command(write.send_time); + self.on_receive_command(write.send_time, None); self.fsm .peer_mut() .on_unsafe_write(self.store_ctx, write.data); diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 36dbb26e4c7..2f074fdc04d 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -580,6 +580,10 @@ impl<'r> SnapRequestInspector<'r> { )); } + fail::fail_point!("perform_read_index", |_| Ok(ReadRequestPolicy::ReadIndex)); + + fail::fail_point!("perform_read_local", |_| Ok(ReadRequestPolicy::ReadLocal)); + let flags = WriteBatchFlags::from_bits_check(req.get_header().get_flags()); if flags.contains(WriteBatchFlags::STALE_READ) { return Ok(ReadRequestPolicy::StaleRead); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index c5ff93a70c1..7351044b297 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -2,13 +2,13 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; -use test_raftstore::{ - kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, -}; +use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; +use test_raftstore_macro::test_case; -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); @@ -104,18 +104,21 @@ fn test_read_execution_tracking() { ); }; - fail::cfg("perform_read_index", "return()").unwrap(); + // return read_index twich: one for local reader and one for raftstore + fail::cfg("perform_read_index", "2*return()").unwrap(); // should perform read index let resp = kv_read(&client, ctx.clone(), k1.clone(), 100); read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + fail::cfg("perform_read_index", "2*return()").unwrap(); // should perform read index let resp = kv_batch_read(&client, ctx, vec![k1, k2], 100); read_index_checker(resp.get_exec_details_v2().get_scan_detail_v2()); + fail::cfg("perform_read_index", "2*return()").unwrap(); // should perform read index let resp = client.coprocessor(&coprocessor_request).unwrap(); From 98eb383b41695b11a03e3d1ce471181f02bfc741 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:06:14 +0800 Subject: [PATCH 0906/1149] raftstore-v2: fix chaos between on_memtable_sealed and on_flush_completed (#15543) close tikv/tikv#15534 fix chaos between on_memtable_sealed and on_flush_completed Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +- components/engine_rocks/src/event_listener.rs | 11 ++- components/engine_traits/src/flush.rs | 31 +++++-- tests/failpoints/cases/test_engine.rs | 88 ++++++++++++++++++- 4 files changed, 124 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4cd0882628b..7e09c3d2979 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" +source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3127,7 +3127,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" +source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" dependencies = [ "bzip2-sys", "cc", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b68565569d711d78f8ae0d24e2d2b59f0fd03ef1" +source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 9628c61c23f..03a40d005c8 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -194,8 +194,15 @@ impl rocksdb::EventListener for RocksPersistenceListener { fn on_memtable_sealed(&self, info: &MemTableInfo) { // Note: first_seqno is effectively the smallest seqno of memtable. // earliest_seqno has ambiguous semantics. - self.0 - .on_memtable_sealed(info.cf_name().to_string(), info.first_seqno()); + self.0.on_memtable_sealed( + info.cf_name().to_string(), + info.first_seqno(), + info.largest_seqno(), + ); + } + + fn on_flush_begin(&self, _: &FlushJobInfo) { + fail::fail_point!("on_flush_begin"); } fn on_flush_completed(&self, job: &FlushJobInfo) { diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 9344e84bb4e..8590236e126 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -18,14 +18,17 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, RwLock, }, + time::Duration, }; use kvproto::import_sstpb::SstMeta; -use slog_global::info; -use tikv_util::set_panic_mark; +use slog_global::{info, warn}; +use tikv_util::{set_panic_mark, time::Instant}; use crate::{data_cf_offset, RaftEngine, RaftLogBatch, DATA_CFS_LEN}; +const HEAVY_WORKER_THRESHOLD: Duration = Duration::from_millis(25); + #[derive(Debug)] pub struct ApplyProgress { cf: String, @@ -203,7 +206,11 @@ impl PersistenceListener { /// Called when memtable is frozen. /// /// `smallest_seqno` should be the smallest seqno of the memtable. - pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64) { + /// + /// Note: After https://github.com/tikv/rocksdb/pull/347, rocksdb global lock will + /// be held during this method, so we should avoid do heavy things in it. + pub fn on_memtable_sealed(&self, cf: String, smallest_seqno: u64, largest_seqno: u64) { + let t = Instant::now_coarse(); (|| { fail_point!("on_memtable_sealed", |t| { assert_eq!(t.unwrap().as_str(), cf); @@ -219,8 +226,9 @@ impl PersistenceListener { let flushed = prs.last_flushed[offset]; if flushed > smallest_seqno { panic!( - "sealed seqno has been flushed {} {} {} <= {}", - cf, apply_index, smallest_seqno, flushed + "sealed seqno conflict with latest flushed index, cf {}, + sealed smallest_seqno {}, sealed largest_seqno {}, last_flushed {}, apply_index {}", + cf, smallest_seqno, largest_seqno, flushed, apply_index, ); } prs.prs.push_back(ApplyProgress { @@ -228,6 +236,11 @@ impl PersistenceListener { apply_index, smallest_seqno, }); + if t.saturating_elapsed() > HEAVY_WORKER_THRESHOLD { + warn!( + "heavy work in on_memtable_sealed, the code should be reviewed"; + ); + } } /// Called a memtable finished flushing. @@ -244,7 +257,13 @@ impl PersistenceListener { if flushed >= largest_seqno { // According to facebook/rocksdb#11183, it's possible OnFlushCompleted can be // called out of order. But it's guaranteed files are installed in order. - info!("flush complete reorder found"; "flushed" => flushed, "largest_seqno" => largest_seqno, "file_no" => file_no, "cf" => cf); + info!( + "flush complete reorder found"; + "flushed" => flushed, + "largest_seqno" => largest_seqno, + "file_no" => file_no, + "cf" => cf + ); return; } prs.last_flushed[offset] = largest_seqno; diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 93d1c96597b..073f7276419 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -1,6 +1,11 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; +use std::{ + sync::{mpsc::channel, Mutex}, + time::Duration, +}; + +use engine_traits::{MiscExt, CF_DEFAULT, CF_LOCK, CF_WRITE}; use tikv_util::config::ReadableSize; fn dummy_string(len: usize) -> String { @@ -51,3 +56,84 @@ fn test_write_buffer_manager() { cluster.must_put_cf(CF_WRITE, key.as_bytes(), dummy.as_bytes()); } } + +// The test mocks the senario before https://github.com/tikv/rocksdb/pull/347: +// note: before rocksdb/pull/347, lock is called before on_memtable_sealed. +// Case: +// Assume FlushMemtable cf1 (schedule flush task) and BackgroundCallFlush cf1 +// (execute flush task) are performed concurrently. +// t FlushMemtable cf1 BackgroundCallFlush cf1 +// 1. lock +// 2. convert memtable t2(seqno. 10-20) +// to immemtable +// 3. unlock +// 4. lock +// 5. pick memtables to flush: +// t1(0-10), t2(10-20) +// flush job(0-20) +// 6. finish flush +// 7. unlock +// 8. on_flush_completed: +// update last_flushed to 20 +// 9. on_memtable_sealed +// 10 > 20 *panic* +#[test] +fn test_rocksdb_listener() { + use test_raftstore_v2::*; + let count = 1; + let mut cluster = new_node_cluster(0, count); + // make flush thread num 1 to be easy to construct the case + cluster.cfg.rocksdb.max_background_flushes = 1; + cluster.run(); + + let r = cluster.get_region(b"k10"); + cluster.must_split(&r, b"k10"); + + for i in 0..20 { + let k = format!("k{:02}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + let r1 = cluster.get_region(b"k00").get_id(); + let r2 = cluster.get_region(b"k15").get_id(); + + let engine = cluster.get_engine(1); + let tablet1 = engine.get_tablet_by_id(r1).unwrap(); + let tablet2 = engine.get_tablet_by_id(r2).unwrap(); + + fail::cfg("on_flush_begin", "1*pause").unwrap(); + tablet1.flush_cf("default", false).unwrap(); // call flush 1 + std::thread::sleep(Duration::from_secs(1)); + + tablet2.flush_cf("default", false).unwrap(); // call flush 2 + for i in 20..30 { + let k = format!("k{:02}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + fail::cfg("on_memtable_sealed", "pause").unwrap(); + + let h = std::thread::spawn(move || { + tablet2.flush_cf("default", true).unwrap(); + }); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_flush_completed", move || { + let _ = tx.lock().unwrap().send(true); // call flush 3 + }) + .unwrap(); + fail::remove("on_flush_begin"); + + let _ = rx.recv(); // flush 1 done + // Now, flush 1 has done, flush 3 is blocked at on_memtable_sealed. + // Before https://github.com/tikv/rocksdb/pull/347, unlock will be called + // before calling on_memtable_sealed, so flush 2 can pick the memtable sealed by + // flush 3 and thus make the order chaos. + // Now, unlock will not be called, so we have to remove failpoint to avoid + // deadlock. 2 seconds is long enough to make the test failed before + // rocksdb/pull/347. + std::thread::sleep(Duration::from_secs(2)); + fail::remove("on_memtable_sealed"); + + h.join().unwrap(); +} From 6f0d84e911a86837263b914e8b1ddba9a1da5232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 12 Sep 2023 08:49:39 +0800 Subject: [PATCH 0907/1149] sst_importer: don't cache rewritten files (#15502) close tikv/tikv#15483 The rewrite step of sst_importer::apply has been delayed to while iterating the file. Signed-off-by: hillium Co-authored-by: 3pointer --- components/sst_importer/src/sst_importer.rs | 42 +++--- .../tikv_util/src/codec/stream_event.rs | 109 ++++++++++++++-- src/import/sst_service.rs | 5 +- tests/integrations/import/mod.rs | 1 + tests/integrations/import/test_apply_log.rs | 72 ++++++++++ tests/integrations/import/util.rs | 123 +++++++++++++++++- 6 files changed, 322 insertions(+), 30 deletions(-) create mode 100644 tests/integrations/import/test_apply_log.rs diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 33f3c691a26..181f9d67b2f 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -642,7 +642,6 @@ impl SstImporter { async fn exec_download( &self, meta: &KvMeta, - rewrite_rule: &RewriteRule, ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { @@ -690,9 +689,8 @@ impl SstImporter { .with_label_values(&["exec_download"]) .observe(start.saturating_elapsed().as_secs_f64()); - let rewrite_buff = self.rewrite_kv_file(buff, rewrite_rule)?; Ok(LoadedFile { - content: Arc::from(rewrite_buff.into_boxed_slice()), + content: Arc::from(buff.into_boxed_slice()), permit, }) } @@ -700,7 +698,6 @@ impl SstImporter { pub async fn do_read_kv_file( &self, meta: &KvMeta, - rewrite_rule: &RewriteRule, ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { @@ -741,7 +738,7 @@ impl SstImporter { } cache - .get_or_try_init(|| self.exec_download(meta, rewrite_rule, ext_storage, speed_limiter)) + .get_or_try_init(|| self.exec_download(meta, ext_storage, speed_limiter)) .await?; Ok(CacheKvFile::Mem(cache)) } @@ -814,7 +811,6 @@ impl SstImporter { pub async fn read_from_kv_file( &self, meta: &KvMeta, - rewrite_rule: &RewriteRule, ext_storage: Arc, backend: &StorageBackend, speed_limiter: &Limiter, @@ -823,7 +819,7 @@ impl SstImporter { self.do_download_kv_file(meta, backend, speed_limiter) .await? } else { - self.do_read_kv_file(meta, rewrite_rule, ext_storage, speed_limiter) + self.do_read_kv_file(meta, ext_storage, speed_limiter) .await? }; match c { @@ -841,8 +837,7 @@ impl SstImporter { let mut buffer = Vec::new(); reader.read_to_end(&mut buffer)?; - let rewrite_buff = self.rewrite_kv_file(buffer, rewrite_rule)?; - Ok(Arc::from(rewrite_buff.into_boxed_slice())) + Ok(Arc::from(buffer.into_boxed_slice())) } } } @@ -940,7 +935,11 @@ impl SstImporter { // perform iteration and key rewrite. let mut new_buff = Vec::with_capacity(file_buff.len()); - let mut event_iter = EventIterator::new(file_buff.as_slice()); + let mut event_iter = EventIterator::with_rewriting( + file_buff.as_slice(), + rewrite_rule.get_old_key_prefix(), + rewrite_rule.get_new_key_prefix(), + ); let mut key = new_prefix.to_vec(); let new_prefix_data_key_len = key.len(); @@ -983,9 +982,14 @@ impl SstImporter { start_ts: u64, restore_ts: u64, file_buff: Arc<[u8]>, + rewrite_rule: &RewriteRule, mut build_fn: impl FnMut(Vec, Vec), ) -> Result> { - let mut event_iter = EventIterator::new(file_buff.as_ref()); + let mut event_iter = EventIterator::with_rewriting( + file_buff.as_ref(), + rewrite_rule.get_old_key_prefix(), + rewrite_rule.get_new_key_prefix(), + ); let mut smallest_key = None; let mut largest_key = None; let mut total_key = 0; @@ -1001,6 +1005,16 @@ impl SstImporter { event_iter.next()?; INPORTER_APPLY_COUNT.with_label_values(&["key_meet"]).inc(); + if !event_iter + .key() + .starts_with(rewrite_rule.get_new_key_prefix()) + { + return Err(Error::WrongKeyPrefix { + what: "do_apply_kv_file", + key: event_iter.key().to_vec(), + prefix: rewrite_rule.get_old_key_prefix().to_vec(), + }); + } let key = event_iter.key().to_vec(); let value = event_iter.value().to_vec(); let ts = Key::decode_ts_from(&key)?; @@ -1028,7 +1042,7 @@ impl SstImporter { largest_key = largest_key .map_or_else(|| Some(key.clone()), |v: Vec| Some(v.max(key.clone()))); } - if total_key != not_in_range { + if not_in_range != 0 || ts_not_expected != 0 { info!("build download request file done"; "total_keys" => %total_key, "ts_filtered_keys" => %ts_not_expected, @@ -2050,10 +2064,8 @@ mod tests { }; // test do_read_kv_file() - let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); let output = block_on_external_io(importer.do_read_kv_file( &kv_meta, - rewrite_rule, ext_storage, &Limiter::new(f64::INFINITY), )) @@ -2163,7 +2175,6 @@ mod tests { }; let importer = SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1, false).unwrap(); - let rewrite_rule = &new_rewrite_rule(b"", b"", 12345); let ext_storage = { importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2181,7 +2192,6 @@ mod tests { assert!(importer.import_support_download()); let output = block_on_external_io(importer.read_from_kv_file( &kv_meta, - rewrite_rule, ext_storage, &backend, &Limiter::new(f64::INFINITY), diff --git a/components/tikv_util/src/codec/stream_event.rs b/components/tikv_util/src/codec/stream_event.rs index 5b00cad6372..3c1a04f77e3 100644 --- a/components/tikv_util/src/codec/stream_event.rs +++ b/components/tikv_util/src/codec/stream_event.rs @@ -6,6 +6,13 @@ use bytes::{Buf, Bytes}; use crate::{codec::Result, Either}; +// Note: maybe allow them to be different lifetime. +// But not necessary for now, so keep it simple...? +pub struct Rewrite<'a> { + from: &'a [u8], + to: &'a [u8], +} + pub trait Iterator { fn next(&mut self) -> Result<()>; @@ -19,10 +26,12 @@ pub trait Iterator { pub struct EventIterator<'a> { buf: &'a [u8], offset: usize, - key_offset: usize, value_offset: usize, - key_len: usize, value_len: usize, + + key_buf: Vec, + + rewrite_rule: Option>, } impl EventIterator<'_> { @@ -30,10 +39,21 @@ impl EventIterator<'_> { EventIterator { buf, offset: 0, - key_offset: 0, - key_len: 0, + key_buf: vec![], value_offset: 0, value_len: 0, + rewrite_rule: None, + } + } + + pub fn with_rewriting<'a>(buf: &'a [u8], from: &'a [u8], to: &'a [u8]) -> EventIterator<'a> { + EventIterator { + buf, + offset: 0, + key_buf: vec![], + value_offset: 0, + value_len: 0, + rewrite_rule: Some(Rewrite { from, to }), } } @@ -42,14 +62,47 @@ impl EventIterator<'_> { self.offset += 4; result } + + fn consume_key_with_len(&mut self, key_len: usize) { + self.key_buf.clear(); + self.key_buf.reserve(key_len); + self.key_buf + .extend_from_slice(&self.buf[self.offset..self.offset + key_len]); + self.offset += key_len; + } + + fn move_to_next_key_with_rewrite(&mut self) { + let key_len = self.get_size() as usize; + let rewrite = self.rewrite_rule.as_ref().expect("rewrite rule not set"); + if key_len < rewrite.from.len() + || &self.buf[self.offset..self.offset + rewrite.from.len()] != rewrite.from + { + self.consume_key_with_len(key_len); + return; + } + self.key_buf.clear(); + self.key_buf + .reserve(rewrite.to.len() + key_len - rewrite.from.len()); + self.key_buf.extend_from_slice(rewrite.to); + self.key_buf + .extend_from_slice(&self.buf[self.offset + rewrite.from.len()..self.offset + key_len]); + self.offset += key_len; + } + + fn fetch_key_buffer_and_move_to_value(&mut self) { + if self.rewrite_rule.is_some() { + self.move_to_next_key_with_rewrite() + } else { + let key_len = self.get_size() as usize; + self.consume_key_with_len(key_len); + } + } } impl Iterator for EventIterator<'_> { fn next(&mut self) -> Result<()> { if self.valid() { - self.key_len = self.get_size() as usize; - self.key_offset = self.offset; - self.offset += self.key_len; + self.fetch_key_buffer_and_move_to_value(); self.value_len = self.get_size() as usize; self.value_offset = self.offset; @@ -63,7 +116,7 @@ impl Iterator for EventIterator<'_> { } fn key(&self) -> &[u8] { - &self.buf[self.key_offset..self.key_offset + self.key_len] + &self.key_buf[..] } fn value(&self) -> &[u8] { @@ -155,4 +208,44 @@ mod tests { } assert_eq!(count, index); } + + #[test] + fn test_rewrite() { + let mut rng = rand::thread_rng(); + let mut event = vec![]; + let mut keys = vec![]; + let mut vals = vec![]; + let count = 20; + + for _i in 0..count { + let should_rewrite = rng.gen::(); + let mut key: Vec = std::iter::once(if should_rewrite { b'k' } else { b'l' }) + .chain((0..100).map(|_| rng.gen_range(0..255))) + .collect(); + let val: Vec = (0..100).map(|_| rng.gen_range(0..255)).collect(); + let e = EventEncoder::encode_event(&key, &val); + for s in e { + event.extend_from_slice(s.as_ref()); + } + if should_rewrite { + key[0] = b'r'; + } + keys.push(key); + vals.push(val); + } + + let mut iter = EventIterator::with_rewriting(&event, b"k", b"r"); + + let mut index = 0_usize; + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + assert_eq!(iter.key(), keys[index]); + assert_eq!(iter.value(), vals[index]); + index += 1; + } + assert_eq!(count, index); + } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 0c81873c130..6d40ffe959c 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -165,6 +165,9 @@ impl RequestCollector { } fn accept_kv(&mut self, cf: &str, is_delete: bool, k: Vec, v: Vec) { + debug!("Accepting KV."; "cf" => %cf, + "key" => %log_wrappers::Value::key(&k), + "value" => %log_wrappers::Value::key(&v)); // Need to skip the empty key/value that could break the transaction or cause // data corruption. see details at https://github.com/pingcap/tiflow/issues/5468. if k.is_empty() || (!is_delete && v.is_empty()) { @@ -567,7 +570,6 @@ impl ImportSstService { let buff = importer .read_from_kv_file( meta, - rule, ext_storage.clone(), req.get_storage_backend(), &limiter, @@ -579,6 +581,7 @@ impl ImportSstService { meta.get_start_ts(), meta.get_restore_ts(), buff, + rule, |k, v| collector.accept_kv(meta.get_cf(), meta.get_is_delete(), k, v), )? { if let Some(range) = range.as_mut() { diff --git a/tests/integrations/import/mod.rs b/tests/integrations/import/mod.rs index 96e2c655e18..4de0fa26472 100644 --- a/tests/integrations/import/mod.rs +++ b/tests/integrations/import/mod.rs @@ -1,4 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +mod test_apply_log; mod test_sst_service; mod util; diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs new file mode 100644 index 00000000000..3d8cf85b02c --- /dev/null +++ b/tests/integrations/import/test_apply_log.rs @@ -0,0 +1,72 @@ +use engine_traits::CF_DEFAULT; +use external_storage_export::LocalStorage; +use kvproto::import_sstpb::ApplyRequest; +use tempfile::TempDir; + +use crate::import::util; + +#[test] +fn test_basic_apply() { + let (_cluster, ctx, tikv, import) = util::new_cluster_and_tikv_import_client(); + let tmp = TempDir::new().unwrap(); + let storage = LocalStorage::new(tmp.path()).unwrap(); + let default = [ + (b"k1", b"v1", 1), + (b"k2", b"v2", 2), + (b"k3", b"v3", 3), + (b"k4", b"v4", 4), + ]; + let default_rewritten = [(b"r1", b"v1", 1), (b"r2", b"v2", 2), (b"r3", b"v3", 3)]; + let mut sst_meta = util::make_plain_file(&storage, "file1.log", default.into_iter()); + util::register_range_for(&mut sst_meta, b"k1", b"k3a"); + let mut req = ApplyRequest::new(); + req.set_context(ctx.clone()); + req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"r")].into()); + req.set_metas(vec![sst_meta].into()); + req.set_storage_backend(util::local_storage(&tmp)); + import.apply(&req).unwrap(); + util::check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_rewritten.into_iter()); +} + +#[test] +fn test_apply_twice() { + let (_cluster, ctx, tikv, import) = util::new_cluster_and_tikv_import_client(); + let tmp = TempDir::new().unwrap(); + let storage = LocalStorage::new(tmp.path()).unwrap(); + let default = [( + b"k1", + b"In this case, we are going to test write twice, but with different rewrite rule.", + 1, + )]; + let default_fst = [( + b"r1", + b"In this case, we are going to test write twice, but with different rewrite rule.", + 1, + )]; + let default_snd = [( + b"z1", + b"In this case, we are going to test write twice, but with different rewrite rule.", + 1, + )]; + + let mut sst_meta = util::make_plain_file(&storage, "file2.log", default.into_iter()); + util::register_range_for(&mut sst_meta, b"k1", b"k1a"); + let mut req = ApplyRequest::new(); + req.set_context(ctx.clone()); + req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"r")].into()); + req.set_metas(vec![sst_meta.clone()].into()); + req.set_storage_backend(util::local_storage(&tmp)); + import.apply(&req).unwrap(); + util::check_applied_kvs_cf(&tikv, &ctx, CF_DEFAULT, default_fst.into_iter()); + + util::register_range_for(&mut sst_meta, b"k1", b"k1a"); + req.set_rewrite_rules(vec![util::rewrite_for(&mut sst_meta, b"k", b"z")].into()); + req.set_metas(vec![sst_meta].into()); + import.apply(&req).unwrap(); + util::check_applied_kvs_cf( + &tikv, + &ctx, + CF_DEFAULT, + default_fst.into_iter().chain(default_snd.into_iter()), + ); +} diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index cc5d22d517d..d8a11d50746 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -1,16 +1,31 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, thread, time::Duration}; - +use std::{ + io::{Cursor, Write}, + sync::Arc, + thread, + time::Duration, +}; + +use collections::HashMap; use engine_rocks::RocksEngine; -use futures::{executor::block_on, stream, SinkExt}; +use engine_traits::CF_DEFAULT; +use external_storage_export::{ExternalStorage, UnpinReader}; +use futures::{executor::block_on, io::Cursor as AsyncCursor, stream, SinkExt}; use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; -use kvproto::{import_sstpb::*, kvrpcpb::*, tikvpb::*}; +use kvproto::{ + brpb::{Local, StorageBackend}, + import_sstpb::{KvMeta, *}, + kvrpcpb::*, + tikvpb::*, +}; use security::SecurityConfig; +use tempfile::TempDir; use test_raftstore::*; use test_raftstore_v2::{Cluster as ClusterV2, ServerCluster as ServerClusterV2}; use tikv::config::TikvConfig; -use tikv_util::HandyRwLock; +use tikv_util::{codec::stream_event::EventEncoder, stream::block_on_external_io, HandyRwLock}; +use txn_types::Key; use uuid::Uuid; const CLEANUP_SST_MILLIS: u64 = 10; @@ -246,6 +261,40 @@ pub fn check_ingested_kvs_cf(tikv: &TikvClient, ctx: &Context, cf: &str, sst_ran } } +#[track_caller] +pub fn check_applied_kvs_cf, V: AsRef<[u8]> + std::fmt::Debug>( + tikv: &TikvClient, + ctx: &Context, + cf: &str, + entries: impl Iterator, +) { + let mut get = RawBatchGetRequest::default(); + get.set_cf(cf.to_owned()); + get.set_context(ctx.clone()); + let mut keymap = HashMap::default(); + for (key, value, ts) in entries { + let the_key = Key::from_raw(key.as_ref()) + .append_ts(ts.into()) + .into_encoded(); + keymap.insert(the_key.clone(), value); + get.mut_keys().push(the_key); + } + for pair in tikv.raw_batch_get(&get).unwrap().get_pairs() { + let entry = keymap.remove(pair.get_key()).expect("unexpected key"); + assert_eq!( + entry.as_ref(), + pair.get_value(), + "key is {:?}", + pair.get_key() + ); + } + assert!( + keymap.is_empty(), + "not all keys consumed, remained {:?}", + keymap + ); +} + pub fn check_ingested_txn_kvs( tikv: &TikvClient, ctx: &Context, @@ -273,3 +322,67 @@ pub fn check_sst_deleted(client: &ImportSstClient, meta: &SstMeta, data: &[u8]) } send_upload_sst(client, meta, data).unwrap(); } + +pub fn make_plain_file(storage: &dyn ExternalStorage, name: &str, kvs: I) -> KvMeta +where + I: Iterator, + K: AsRef<[u8]>, + V: AsRef<[u8]>, +{ + let mut buf = vec![]; + let mut file = Cursor::new(&mut buf); + let mut start_ts: Option = None; + for (key, value, ts) in kvs { + let the_key = Key::from_raw(key.as_ref()) + .append_ts(ts.into()) + .into_encoded(); + start_ts = Some(start_ts.map_or(ts, |ts0| ts0.min(ts))); + for segment in EventEncoder::encode_event(&the_key, value.as_ref()) { + file.write_all(segment.as_ref()).unwrap(); + } + } + file.flush().unwrap(); + let len = buf.len() as u64; + block_on_external_io(storage.write(name, UnpinReader(Box::new(AsyncCursor::new(buf))), len)) + .unwrap(); + let mut meta = KvMeta::new(); + meta.set_start_ts(start_ts.unwrap_or_default()); + meta.set_length(len); + meta.set_restore_ts(u64::MAX); + meta.set_compression_type(kvproto::brpb::CompressionType::Unknown); + meta.set_name(name.to_owned()); + meta.set_cf(CF_DEFAULT.to_owned()); + meta +} + +pub fn rewrite_for(meta: &mut KvMeta, old_prefix: &[u8], new_prefix: &[u8]) -> RewriteRule { + assert_eq!(old_prefix.len(), new_prefix.len()); + fn rewrite(key: &mut Vec, old_prefix: &[u8], new_prefix: &[u8]) { + assert!(key.starts_with(old_prefix)); + let len = old_prefix.len(); + key.splice(..len, new_prefix.iter().cloned()); + } + rewrite(meta.mut_start_key(), old_prefix, new_prefix); + rewrite(meta.mut_end_key(), old_prefix, new_prefix); + let mut rule = RewriteRule::default(); + rule.set_old_key_prefix(old_prefix.to_vec()); + rule.set_new_key_prefix(new_prefix.to_vec()); + rule +} + +pub fn register_range_for(meta: &mut KvMeta, start: &[u8], end: &[u8]) { + let start = Key::from_raw(start); + let end = Key::from_raw(end); + meta.set_start_key(start.into_encoded()); + meta.set_end_key(end.into_encoded()); +} + +pub fn local_storage(tmp: &TempDir) -> StorageBackend { + let mut backend = StorageBackend::default(); + backend.set_local({ + let mut local = Local::default(); + local.set_path(tmp.path().to_str().unwrap().to_owned()); + local + }); + backend +} From d830a58335839fe02434727f2d8b252a02ba386d Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 12 Sep 2023 18:04:41 +0800 Subject: [PATCH 0908/1149] [Dynamic Region] Supply extra test cases for `gc`. (#15544) ref tikv/tikv#15409 Supply extra test cases, including integration tests and unit tests for raftstore-v2 on `gc`. Signed-off-by: lucasliang --- tests/failpoints/cases/test_gc_worker.rs | 105 +++++++++++------------ tests/integrations/server/gc_worker.rs | 18 ++-- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/tests/failpoints/cases/test_gc_worker.rs b/tests/failpoints/cases/test_gc_worker.rs index d24ec85f040..50b71b59f47 100644 --- a/tests/failpoints/cases/test_gc_worker.rs +++ b/tests/failpoints/cases/test_gc_worker.rs @@ -14,9 +14,10 @@ use raftstore::coprocessor::{ RegionInfo, RegionInfoCallback, RegionInfoProvider, Result as CopResult, SeekRegionCallback, }; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::{ server::gc_worker::{ - AutoGcConfig, GcSafePointProvider, GcTask, Result as GcWorkerResult, TestGcRunner, + sync_gc, AutoGcConfig, GcSafePointProvider, GcTask, Result as GcWorkerResult, TestGcRunner, }, storage::{ kv::TestEngineBuilder, @@ -61,11 +62,38 @@ fn test_error_in_compaction_filter() { fail::remove(fp); } +#[derive(Clone)] +struct MockSafePointProvider; +impl GcSafePointProvider for MockSafePointProvider { + fn get_safe_point(&self) -> GcWorkerResult { + Ok(TimeStamp::from(0)) + } +} + +#[derive(Clone)] +struct MockRegionInfoProvider; +impl RegionInfoProvider for MockRegionInfoProvider { + fn seek_region(&self, _: &[u8], _: SeekRegionCallback) -> CopResult<()> { + Ok(()) + } + fn find_region_by_id( + &self, + _: u64, + _: RegionInfoCallback>, + ) -> CopResult<()> { + Ok(()) + } + fn get_regions_in_range(&self, _start_key: &[u8], _end_key: &[u8]) -> CopResult> { + Ok(vec![]) + } +} + // Test GC worker can receive and handle orphan versions emit from write CF's // compaction filter correctly. -#[test] +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] fn test_orphan_versions_from_compaction_filter() { - let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { + let (cluster, leader, ctx) = new_cluster(|cluster| { cluster.cfg.gc.enable_compaction_filter = true; cluster.cfg.gc.compaction_filter_skip_version_check = true; cluster.pd_client.disable_default_operator(); @@ -76,8 +104,20 @@ fn test_orphan_versions_from_compaction_filter() { let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader_store)); let client = TikvClient::new(channel); - init_compaction_filter(&cluster, leader_store); - let engine = cluster.engines.get(&leader_store).unwrap(); + // Call `start_auto_gc` like `cmd/src/server.rs` does. It will combine + // compaction filter and GC worker so that GC worker can help to process orphan + // versions on default CF. + { + let sim = cluster.sim.rl(); + let gc_worker = sim.get_gc_worker(leader_store); + gc_worker + .start_auto_gc( + AutoGcConfig::new(MockSafePointProvider, MockRegionInfoProvider, 1), + Arc::new(AtomicU64::new(0)), + ) + .unwrap(); + } + let engine = cluster.get_engine(leader_store); let pk = b"k1".to_vec(); let large_value = vec![b'x'; 300]; @@ -91,22 +131,23 @@ fn test_orphan_versions_from_compaction_filter() { if start_ts < 40 { let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_some()); + assert!(engine.get_value(&key).unwrap().is_some()); } } let fp = "write_compaction_filter_flush_write_batch"; fail::cfg(fp, "return").unwrap(); - let mut gc_runner = TestGcRunner::new(100); - gc_runner.gc_scheduler = cluster.sim.rl().get_gc_worker(1).scheduler(); - gc_runner.gc(&engine.kv); + let gc_safe_ponit = TimeStamp::from(100); + let gc_scheduler = cluster.sim.rl().get_gc_worker(1).scheduler(); + let region = cluster.get_region(&pk); + sync_gc(&gc_scheduler, region, gc_safe_ponit).unwrap(); 'IterKeys: for &start_ts in &[10, 20, 30] { let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); for _ in 0..100 { - if engine.kv.get_value(&key).unwrap().is_some() { + if engine.get_value(&key).unwrap().is_some() { thread::sleep(Duration::from_millis(20)); continue; } @@ -117,47 +158,3 @@ fn test_orphan_versions_from_compaction_filter() { fail::remove(fp); } - -// Call `start_auto_gc` like `cmd/src/server.rs` does. It will combine -// compaction filter and GC worker so that GC worker can help to process orphan -// versions on default CF. -fn init_compaction_filter(cluster: &Cluster, store_id: u64) { - #[derive(Clone)] - struct MockSafePointProvider; - impl GcSafePointProvider for MockSafePointProvider { - fn get_safe_point(&self) -> GcWorkerResult { - Ok(TimeStamp::from(0)) - } - } - - #[derive(Clone)] - struct MockRegionInfoProvider; - impl RegionInfoProvider for MockRegionInfoProvider { - fn seek_region(&self, _: &[u8], _: SeekRegionCallback) -> CopResult<()> { - Ok(()) - } - fn find_region_by_id( - &self, - _: u64, - _: RegionInfoCallback>, - ) -> CopResult<()> { - Ok(()) - } - fn get_regions_in_range( - &self, - _start_key: &[u8], - _end_key: &[u8], - ) -> CopResult> { - Ok(vec![]) - } - } - - let sim = cluster.sim.rl(); - let gc_worker = sim.get_gc_worker(store_id); - gc_worker - .start_auto_gc( - AutoGcConfig::new(MockSafePointProvider, MockRegionInfoProvider, 1), - Arc::new(AtomicU64::new(0)), - ) - .unwrap(); -} diff --git a/tests/integrations/server/gc_worker.rs b/tests/integrations/server/gc_worker.rs index cfadde84405..238102df6b6 100644 --- a/tests/integrations/server/gc_worker.rs +++ b/tests/integrations/server/gc_worker.rs @@ -7,15 +7,17 @@ use grpcio::{ChannelBuilder, Environment}; use keys::data_key; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::server::gc_worker::sync_gc; use tikv_util::HandyRwLock; use txn_types::Key; // Since v5.0 GC bypasses Raft, which means GC scans/deletes records with // `keys::DATA_PREFIX`. This case ensures it's performed correctly. -#[test] +#[test_case(test_raftstore::must_new_cluster_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_mul)] fn test_gc_bypass_raft() { - let (cluster, leader, ctx) = must_new_cluster_mul(2); + let (cluster, leader, ctx) = new_cluster(2); cluster.pd_client.disable_default_operator(); let env = Arc::new(Environment::new(1)); @@ -25,7 +27,7 @@ fn test_gc_bypass_raft() { let pk = b"k1".to_vec(); let value = vec![b'x'; 300]; - let engine = cluster.engines.get(&leader_store).unwrap(); + let engine = cluster.get_engine(leader_store); for &start_ts in &[10, 20, 30, 40] { let commit_ts = start_ts + 5; @@ -37,11 +39,11 @@ fn test_gc_bypass_raft() { let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_some()); + assert!(engine.get_value(&key).unwrap().is_some()); let key = Key::from_raw(b"k1").append_ts(commit_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_some()); + assert!(engine.get_value_cf(CF_WRITE, &key).unwrap().is_some()); } let node_ids = cluster.get_node_ids(); @@ -53,16 +55,16 @@ fn test_gc_bypass_raft() { region.set_end_key(b"k2".to_vec()); sync_gc(&gc_sched, region, 200.into()).unwrap(); - let engine = cluster.engines.get(&store_id).unwrap(); + let engine = cluster.get_engine(store_id); for &start_ts in &[10, 20, 30] { let commit_ts = start_ts + 5; let key = Key::from_raw(b"k1").append_ts(start_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value(&key).unwrap().is_none()); + assert!(engine.get_value(&key).unwrap().is_none()); let key = Key::from_raw(b"k1").append_ts(commit_ts.into()); let key = data_key(key.as_encoded()); - assert!(engine.kv.get_value_cf(CF_WRITE, &key).unwrap().is_none()); + assert!(engine.get_value_cf(CF_WRITE, &key).unwrap().is_none()); } } } From db0304e65045fdc6701e8fe0db80416a0210e412 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 13 Sep 2023 07:43:38 +0800 Subject: [PATCH 0909/1149] *: update cargo.lock (#15573) close tikv/tikv#15579 update cargo.lock Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e09c3d2979..fb5e711d34d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,7 +3108,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" +source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3127,7 +3127,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" +source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" dependencies = [ "bzip2-sys", "cc", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/SpadeA-Tang/rust-rocksdb.git?branch=fix-sealed-chaos#f5121f48a1543c5d576ad7964c617f30f79a3d66" +source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" dependencies = [ "libc 0.2.146", "librocksdb_sys", From d5d89ba60b07e508e4073b5460b192680c272213 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 13 Sep 2023 14:10:38 +0800 Subject: [PATCH 0910/1149] coprocessor: use the deadline in kvrpcpb::Context (#15564) close tikv/tikv#15565 Signed-off-by: lance6716 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/mod.rs | 46 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 140d3c0476e..fcd16f9b947 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -159,7 +159,11 @@ impl ReqContext { cache_match_version: Option, perf_level: PerfLevel, ) -> Self { - let deadline = Deadline::from_now(max_handle_duration); + let mut deadline_duration = max_handle_duration; + if context.max_execution_duration_ms > 0 { + deadline_duration = Duration::from_millis(context.max_execution_duration_ms); + } + let deadline = Deadline::from_now(deadline_duration); let bypass_locks = TsSet::from_u64s(context.take_resolved_locks()); let access_locks = TsSet::from_u64s(context.take_committed_locks()); let lower_bound = match ranges.first().as_ref() { @@ -235,6 +239,23 @@ lazy_static! { mod tests { use super::*; + fn default_req_ctx_with_ctx_duration( + context: kvrpcpb::Context, + max_handle_duration: Duration, + ) -> ReqContext { + ReqContext::new( + ReqTag::test, + context, + Vec::new(), + max_handle_duration, + None, + None, + TimeStamp::max(), + None, + PerfLevel::EnableCount, + ) + } + #[test] fn test_build_task_id() { let mut ctx = ReqContext::default_for_test(); @@ -246,4 +267,27 @@ mod tests { ctx.context.set_task_id(0); assert_eq!(ctx.build_task_id(), start_ts); } + + #[test] + fn test_deadline_from_req_ctx() { + let ctx = kvrpcpb::Context::default(); + let max_handle_duration = Duration::from_millis(100); + let req_ctx = default_req_ctx_with_ctx_duration(ctx, max_handle_duration); + // sleep at least 100ms + std::thread::sleep(Duration::from_millis(200)); + req_ctx + .deadline + .check() + .expect_err("deadline should exceed"); + + let mut ctx = kvrpcpb::Context::default(); + ctx.max_execution_duration_ms = 100_000; + let req_ctx = default_req_ctx_with_ctx_duration(ctx, max_handle_duration); + // sleep at least 100ms + std::thread::sleep(Duration::from_millis(200)); + req_ctx + .deadline + .check() + .expect("deadline should not exceed"); + } } From b75f55901e5defd5c87a10de2ca7088749c16b7f Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 13 Sep 2023 17:19:38 +0800 Subject: [PATCH 0911/1149] tidb_query_datatype,collation: remove utf8mb4_0900_bin from need_restored_data (#15572) close tikv/tikv#15571 Signed-off-by: Yang Keao --- .../tidb_query_datatype/src/def/field_type.rs | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/components/tidb_query_datatype/src/def/field_type.rs b/components/tidb_query_datatype/src/def/field_type.rs index 06f4454b36d..8a56ac5ac68 100644 --- a/components/tidb_query_datatype/src/def/field_type.rs +++ b/components/tidb_query_datatype/src/def/field_type.rs @@ -140,7 +140,10 @@ impl Collation { } pub fn is_bin_collation(&self) -> bool { - matches!(self, Collation::Utf8Mb4Bin | Collation::Latin1Bin) + matches!( + self, + Collation::Utf8Mb4Bin | Collation::Latin1Bin | Collation::Utf8Mb40900Bin + ) } } @@ -333,6 +336,10 @@ pub trait FieldTypeAccessor { .map(|col| col.is_bin_collation()) .unwrap_or(false) || self.is_varchar_like()) + && self + .collation() + .map(|col| col != Collation::Utf8Mb40900Bin) + .unwrap_or(false) } } @@ -455,6 +462,7 @@ mod tests { use std::i32; use super::*; + use crate::builder::FieldTypeBuilder; fn field_types() -> Vec { vec![ @@ -583,4 +591,31 @@ mod tests { } } } + + #[test] + fn test_need_restored_data() { + let cases = vec![ + (FieldTypeTp::String, Collation::Binary, false), + (FieldTypeTp::VarString, Collation::Binary, false), + (FieldTypeTp::String, Collation::Utf8Mb4Bin, false), + (FieldTypeTp::VarString, Collation::Utf8Mb4Bin, true), + (FieldTypeTp::String, Collation::Utf8Mb4GeneralCi, true), + (FieldTypeTp::VarString, Collation::Utf8Mb4GeneralCi, true), + (FieldTypeTp::String, Collation::Utf8Mb4UnicodeCi, true), + (FieldTypeTp::VarString, Collation::Utf8Mb4UnicodeCi, true), + (FieldTypeTp::String, Collation::Utf8Mb40900AiCi, true), + (FieldTypeTp::VarString, Collation::Utf8Mb40900AiCi, true), + (FieldTypeTp::String, Collation::Utf8Mb40900Bin, false), + (FieldTypeTp::VarString, Collation::Utf8Mb40900Bin, false), + (FieldTypeTp::String, Collation::GbkBin, true), + (FieldTypeTp::VarString, Collation::GbkBin, true), + (FieldTypeTp::String, Collation::GbkChineseCi, true), + (FieldTypeTp::VarString, Collation::GbkChineseCi, true), + ]; + + for (tp, collation, result) in cases { + let ft = FieldTypeBuilder::new().tp(tp).collation(collation).build(); + assert_eq!(ft.need_restored_data(), result) + } + } } From 063c9cd64c8bcf0c2373358354994499d9edeb0b Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 14 Sep 2023 00:52:38 +0800 Subject: [PATCH 0912/1149] raftstore-v2: persist applied index after ingset sst (#15538) close tikv/tikv#15461 Signed-off-by: glorv Co-authored-by: tonyxuqqi --- .../raftstore-v2/src/operation/command/mod.rs | 6 + .../src/operation/command/write/ingest.rs | 12 +- .../src/operation/ready/apply_trace.rs | 244 ++++++++++++++++-- .../raftstore-v2/src/operation/ready/mod.rs | 10 +- components/raftstore-v2/src/raft/apply.rs | 14 +- .../src/router/internal_message.rs | 7 + components/raftstore-v2/src/router/mod.rs | 2 +- tests/failpoints/cases/test_import_service.rs | 76 +++++- 8 files changed, 341 insertions(+), 30 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index c39f2412f32..e579d22c6da 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -455,6 +455,11 @@ impl Peer { if is_leader { self.retry_pending_prepare_merge(ctx, apply_res.applied_index); } + if !apply_res.sst_applied_index.is_empty() { + self.storage_mut() + .apply_trace_mut() + .on_sst_ingested(&apply_res.sst_applied_index); + } self.on_data_modified(apply_res.modifications); self.handle_read_on_apply( ctx, @@ -866,6 +871,7 @@ impl Apply { apply_res.modifications = *self.modifications_mut(); apply_res.metrics = mem::take(&mut self.metrics); apply_res.bucket_stat = self.buckets.clone(); + apply_res.sst_applied_index = self.take_sst_applied_index(); let written_bytes = apply_res.metrics.written_bytes; let skip_report = || -> bool { diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 7e8ed381ad0..92f5923d167 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -2,7 +2,7 @@ use collections::HashMap; use crossbeam::channel::TrySendError; -use engine_traits::{data_cf_offset, KvEngine, RaftEngine}; +use engine_traits::{data_cf_offset, KvEngine, RaftEngine, DATA_CFS_LEN}; use kvproto::import_sstpb::SstMeta; use raftstore::{ store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, @@ -16,7 +16,7 @@ use crate::{ batch::StoreContext, fsm::{ApplyResReporter, Store, StoreFsmDelegate}, raft::{Apply, Peer}, - router::{PeerMsg, StoreTick}, + router::{PeerMsg, SstApplyIndex, StoreTick}, worker::tablet, }; @@ -107,10 +107,12 @@ impl Peer { impl Apply { #[inline] pub fn apply_ingest(&mut self, index: u64, ssts: Vec) -> Result<()> { + fail::fail_point!("on_apply_ingest"); PEER_WRITE_CMD_COUNTER.ingest_sst.inc(); let mut infos = Vec::with_capacity(ssts.len()); let mut size: i64 = 0; let mut keys: u64 = 0; + let mut cf_indexes = [u64::MAX; DATA_CFS_LEN]; for sst in &ssts { // This may not be enough as ingest sst may not trigger flush at all. let off = data_cf_offset(sst.get_cf_name()); @@ -138,6 +140,7 @@ impl Apply { slog_panic!(self.logger, "corrupted sst"; "sst" => ?sst, "error" => ?e); } } + cf_indexes[off] = index; } if !infos.is_empty() { // Unlike v1, we can't batch ssts accross regions. @@ -154,6 +157,11 @@ impl Apply { self.metrics.size_diff_hint += size; self.metrics.written_bytes += size as u64; self.metrics.written_keys += keys; + for (cf_index, index) in cf_indexes.into_iter().enumerate() { + if index != u64::MAX { + self.push_sst_applied_index(SstApplyIndex { cf_index, index }); + } + } Ok(()) } } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 1601e1f01dd..af0257e763f 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -29,6 +29,7 @@ use std::{ cmp, + collections::VecDeque, path::Path, sync::{atomic::Ordering, mpsc::SyncSender, Mutex}, }; @@ -56,7 +57,7 @@ use crate::{ ready::snapshot::{install_tablet, recv_snap_path}, }, raft::{Peer, Storage}, - router::PeerMsg, + router::{PeerMsg, SstApplyIndex}, worker::tablet, Result, StoreRouter, }; @@ -138,7 +139,7 @@ impl engine_traits::StateStorage for StateStorage< /// Mapping from data cf to an u64 index. pub type DataTrace = [u64; DATA_CFS_LEN]; -#[derive(Clone, Copy, Default, Debug)] +#[derive(Clone, Default, Debug)] struct Progress { flushed: u64, /// The index of last entry that has modification to the CF. The value @@ -146,6 +147,20 @@ struct Progress { /// /// If `flushed` == `last_modified`, then all data in the CF is persisted. last_modified: u64, + // applied indexes ranges that represent sst is ingested but not flushed indexes. + pending_sst_ranges: VecDeque, +} + +// A range representing [start, end], upper bound inclusive for handling +// convenience. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct IndexRange(u64, u64); + +#[derive(Debug)] +// track the global flushed index related to the write task. +struct ReadyFlushedIndex { + ready_number: u64, + flushed_index: u64, } /// `ApplyTrace` is used to track the indexes of modifications and flushes. @@ -178,6 +193,9 @@ pub struct ApplyTrace { last_flush_trigger: u64, /// `true` means the raft cf record should be persisted in next ready. try_persist: bool, + // Because we persist the global flushed in the write task, so we should track + // the task and handle sst cleanup after the write task finished. + flushed_index_queue: VecDeque, } impl ApplyTrace { @@ -230,6 +248,25 @@ impl ApplyTrace { self.admin.last_modified = index; } + pub fn on_sst_ingested(&mut self, sst_applied_index: &[SstApplyIndex]) { + use std::cmp::Ordering; + for &SstApplyIndex { cf_index, index } in sst_applied_index { + let p = &mut self.data_cfs[cf_index]; + if p.flushed < index { + let max_idx = p.pending_sst_ranges.iter().last().map(|r| r.1).unwrap_or(0) + 1; + match max_idx.cmp(&index) { + Ordering::Less => { + p.pending_sst_ranges.push_back(IndexRange(index, index)); + } + Ordering::Equal => { + p.pending_sst_ranges.iter_mut().last().unwrap().1 = index; + } + _ => {} + } + } + } + } + pub fn persisted_apply_index(&self) -> u64 { self.persisted_applied } @@ -283,17 +320,45 @@ impl ApplyTrace { } }) .min(); + // At best effort, we can only advance the index to `mem_index`. let candidate = cmp::min(mem_index, min_flushed.unwrap_or(u64::MAX)); + // try advance the index if there are any sst ingestion next to the flushed + // index, and always trigger a flush if there is any sst ingestion. + let (candidate, has_ingested_sst) = self.advance_flushed_index_for_ingest(candidate); if candidate > self.admin.flushed { self.admin.flushed = candidate; - if self.admin.flushed > self.persisted_applied + 100 { + if has_ingested_sst || (self.admin.flushed > self.persisted_applied + 100) { self.try_persist = true; } } // TODO: persist admin.flushed every 10 minutes. } + fn advance_flushed_index_for_ingest(&mut self, mut max_index: u64) -> (u64, bool) { + let mut has_ingest = false; + loop { + let mut has_change = false; + for p in self.data_cfs.iter_mut() { + while let Some(r) = p.pending_sst_ranges.front_mut() { + if r.0 > max_index + 1 { + break; + } else if r.1 > max_index { + max_index = r.1; + has_change = true; + } + p.pending_sst_ranges.pop_front(); + has_ingest = true; + } + } + if !has_change { + break; + } + } + + (max_index, has_ingest) + } + /// Get the flushed indexes of all data CF that is needed when recoverying /// logs. /// @@ -348,6 +413,38 @@ impl ApplyTrace { fail_point!("should_persist_apply_trace", |_| true); self.try_persist } + + #[inline] + pub fn register_flush_task(&mut self, ready_number: u64, flushed_index: u64) { + assert!( + self.flushed_index_queue + .iter() + .last() + .map(|f| f.ready_number) + .unwrap_or(0) + < ready_number + ); + self.flushed_index_queue.push_back(ReadyFlushedIndex { + ready_number, + flushed_index, + }); + } + + #[inline] + pub fn take_flush_index(&mut self, ready_number: u64) -> Option { + use std::cmp::Ordering; + while let Some(r) = self.flushed_index_queue.pop_front() { + match r.ready_number.cmp(&ready_number) { + Ordering::Equal => return Some(r.flushed_index), + Ordering::Greater => { + self.flushed_index_queue.push_front(r); + break; + } + _ => {} + } + } + None + } } impl Storage { @@ -546,6 +643,7 @@ impl Storage { .unwrap(); trace.try_persist = false; trace.persisted_applied = trace.admin.flushed; + trace.register_flush_task(write_task.ready_number(), trace.admin.flushed); } } @@ -566,24 +664,7 @@ impl Peer { let apply_trace = self.storage_mut().apply_trace_mut(); apply_trace.on_flush(cf, index); apply_trace.maybe_advance_admin_flushed(apply_index); - let stale_ssts = self.sst_apply_state().stale_ssts(cf, index); - if stale_ssts.is_empty() { - return; - } - info!( - self.logger, - "schedule delete stale ssts after flush"; - "stale_ssts" => ?stale_ssts, - "apply_index" => apply_index, - "cf" => cf, - "flushed_index" => index, - ); - let _ = ctx - .schedulers - .tablet - .schedule(tablet::Task::CleanupImportSst( - stale_ssts.into_boxed_slice(), - )); + self.cleanup_stale_ssts(ctx, &[cf], index, apply_index); } pub fn on_data_modified(&mut self, modification: DataTrace) { @@ -598,6 +679,38 @@ impl Peer { apply_trace.maybe_advance_admin_flushed(apply_index); } + pub fn cleanup_stale_ssts( + &mut self, + ctx: &mut StoreContext, + cfs: &[&str], + index: u64, + apply_index: u64, + ) { + let mut stale_ssts = vec![]; + for cf in cfs { + let ssts = self.sst_apply_state().stale_ssts(cf, index); + if !ssts.is_empty() { + info!( + self.logger, + "schedule delete stale ssts after flush"; + "stale_ssts" => ?stale_ssts, + "apply_index" => apply_index, + "cf" => cf, + "flushed_index" => index, + ); + stale_ssts.extend(ssts); + } + } + if !stale_ssts.is_empty() { + _ = ctx + .schedulers + .tablet + .schedule(tablet::Task::CleanupImportSst( + stale_ssts.into_boxed_slice(), + )); + } + } + pub fn flush_before_close(&mut self, ctx: &StoreContext, tx: SyncSender<()>) { info!( self.logger, @@ -689,7 +802,7 @@ impl Peer { #[cfg(test)] mod tests { - use engine_traits::RaftEngineReadOnly; + use engine_traits::{CfName, RaftEngineReadOnly}; use kvproto::metapb::Peer; use tempfile::TempDir; @@ -809,6 +922,93 @@ mod tests { // Because modify is recorded, so we know there should be no admin // modification and index can be advanced. assert_eq!(5, trace.admin.flushed); + + fn range_equals(trace: &ApplyTrace, cf: &str, expected: Vec) { + let pending_ranges = &trace.data_cfs[data_cf_offset(cf)].pending_sst_ranges; + assert_eq!( + pending_ranges.len(), + expected.len(), + "actual: {:?}, expected: {:?}", + pending_ranges, + &expected + ); + pending_ranges + .iter() + .zip(expected.iter()) + .for_each(|(r, e)| { + assert_eq!(r, e); + }); + } + + trace.on_modify(CF_DEFAULT, 8); + let ingested_ssts_idx = + make_sst_apply_index(vec![(CF_DEFAULT, 6), (CF_WRITE, 6), (CF_WRITE, 7)]); + trace.on_sst_ingested(&ingested_ssts_idx); + range_equals(&trace, CF_DEFAULT, vec![IndexRange(6, 6)]); + range_equals(&trace, CF_WRITE, vec![IndexRange(6, 7)]); + trace.maybe_advance_admin_flushed(8); + assert_eq!(7, trace.admin.flushed); + for cf in [CF_DEFAULT, CF_WRITE] { + assert_eq!( + trace.data_cfs[data_cf_offset(cf)].pending_sst_ranges.len(), + 0 + ); + } + trace.on_modify(CF_DEFAULT, 10); + let ingested_ssts_idx = make_sst_apply_index(vec![(CF_DEFAULT, 10)]); + trace.on_sst_ingested(&ingested_ssts_idx); + trace.on_flush(CF_DEFAULT, 8); + trace.maybe_advance_admin_flushed(10); + assert_eq!(8, trace.admin.flushed); + range_equals(&trace, CF_DEFAULT, vec![IndexRange(10, 10)]); + + trace.on_modify(CF_DEFAULT, 16); + let ingested_ssts_idx = make_sst_apply_index(vec![ + (CF_DEFAULT, 11), + (CF_WRITE, 12), + (CF_LOCK, 13), + (CF_DEFAULT, 14), + (CF_WRITE, 14), + (CF_WRITE, 15), + (CF_LOCK, 16), + ]); + trace.on_sst_ingested(&ingested_ssts_idx); + range_equals( + &trace, + CF_DEFAULT, + vec![IndexRange(10, 11), IndexRange(14, 14)], + ); + range_equals( + &trace, + CF_WRITE, + vec![IndexRange(12, 12), IndexRange(14, 15)], + ); + range_equals( + &trace, + CF_LOCK, + vec![IndexRange(13, 13), IndexRange(16, 16)], + ); + trace.maybe_advance_admin_flushed(16); + assert_eq!(8, trace.admin.flushed); + + trace.on_flush(CF_DEFAULT, 9); + trace.maybe_advance_admin_flushed(16); + assert_eq!(16, trace.admin.flushed); + for cf in DATA_CFS { + assert_eq!( + trace.data_cfs[data_cf_offset(cf)].pending_sst_ranges.len(), + 0 + ); + } + } + + fn make_sst_apply_index(data: Vec<(CfName, u64)>) -> Vec { + data.into_iter() + .map(|d| SstApplyIndex { + cf_index: data_cf_offset(d.0), + index: d.1, + }) + .collect() } #[test] diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index b985fd69c27..ba7170ac8c8 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -31,7 +31,7 @@ use std::{ time::Instant, }; -use engine_traits::{KvEngine, RaftEngine}; +use engine_traits::{KvEngine, RaftEngine, DATA_CFS}; use error_code::ErrorCodeExt; use kvproto::{ raft_cmdpb::AdminCmdType, @@ -896,6 +896,14 @@ impl Peer { self.storage_mut() .entry_storage_mut() .update_cache_persisted(persisted_index); + if let Some(idx) = self + .storage_mut() + .apply_trace_mut() + .take_flush_index(ready_number) + { + let apply_index = self.flush_state().applied_index(); + self.cleanup_stale_ssts(ctx, DATA_CFS, idx, apply_index); + } if self.is_in_force_leader() { // forward commit index, the committed entries will be applied in diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index 3e660c4549c..f3aa5a541c1 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -20,7 +20,7 @@ use tikv_util::{log::SlogFormat, worker::Scheduler, yatp_pool::FuturePool}; use crate::{ operation::{AdminCmdResult, ApplyFlowControl, DataTrace}, - router::CmdResChannel, + router::{CmdResChannel, SstApplyIndex}, TabletTask, }; @@ -64,6 +64,7 @@ pub struct Apply { admin_cmd_result: Vec, flush_state: Arc, sst_apply_state: SstApplyState, + sst_applied_index: Vec, /// The flushed indexes of each column family before being restarted. /// /// If an apply index is less than the flushed index, the log can be @@ -138,6 +139,7 @@ impl Apply { res_reporter, flush_state, sst_apply_state, + sst_applied_index: vec![], log_recovery, metrics: ApplyMetrics::default(), buckets, @@ -308,6 +310,16 @@ impl Apply { &self.sst_apply_state } + #[inline] + pub fn push_sst_applied_index(&mut self, sst_index: SstApplyIndex) { + self.sst_applied_index.push(sst_index); + } + + #[inline] + pub fn take_sst_applied_index(&mut self) -> Vec { + mem::take(&mut self.sst_applied_index) + } + #[inline] pub fn log_recovery(&self) -> &Option> { &self.log_recovery diff --git a/components/raftstore-v2/src/router/internal_message.rs b/components/raftstore-v2/src/router/internal_message.rs index 6c8d1136b3a..7ac86c3f8c7 100644 --- a/components/raftstore-v2/src/router/internal_message.rs +++ b/components/raftstore-v2/src/router/internal_message.rs @@ -25,4 +25,11 @@ pub struct ApplyRes { pub modifications: DataTrace, pub metrics: ApplyMetrics, pub bucket_stat: Option, + pub sst_applied_index: Vec, +} + +#[derive(Copy, Clone, Debug)] +pub struct SstApplyIndex { + pub cf_index: usize, + pub index: u64, } diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 7630e35c2a5..83a2497b331 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -12,7 +12,7 @@ pub use self::response_channel::FlushChannel; pub use self::response_channel::FlushSubscriber; pub use self::{ imp::{RaftRouter, UnsafeRecoveryRouter}, - internal_message::ApplyRes, + internal_message::{ApplyRes, SstApplyIndex}, message::{PeerMsg, PeerTick, RaftRequest, StoreMsg, StoreTick}, response_channel::{ build_any_channel, AnyResChannel, AnyResSubscriber, BaseSubscriber, CmdResChannel, diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index a2487456108..e51c9862e47 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -7,10 +7,10 @@ use std::{ use file_system::calc_crc32; use futures::{executor::block_on, stream, SinkExt}; -use grpcio::{Result, WriteFlags}; -use kvproto::import_sstpb::*; +use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; +use kvproto::{import_sstpb::*, tikvpb_grpc::TikvClient}; use tempfile::{Builder, TempDir}; -use test_raftstore::Simulator; +use test_raftstore::{must_raw_put, Simulator}; use test_sst_importer::*; use tikv::config::TikvConfig; use tikv_util::{config::ReadableSize, HandyRwLock}; @@ -455,3 +455,73 @@ fn sst_file_count(paths: &Vec) -> u64 { } count } + +#[test] +fn test_flushed_applied_index_after_ingset() { + // disable data flushed + fail::cfg("on_flush_completed", "return()").unwrap(); + // disable data flushed + let (mut cluster, ctx, _tikv, import) = open_cluster_and_tikv_import_client_v2(None); + let temp_dir = Builder::new().prefix("test_ingest_sst").tempdir().unwrap(); + let sst_path = temp_dir.path().join("test.sst"); + + // Create clients. + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(Arc::clone(&env)).connect(&cluster.sim.rl().get_addr(1)); + let client = TikvClient::new(channel); + + for i in 0..5 { + let sst_range = (i * 20, (i + 1) * 20); + let (mut meta, data) = gen_sst_file(sst_path.clone(), sst_range); + // No region id and epoch. + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta.clone()); + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + } + + // only 1 sst left because there is no more event to trigger a raft ready flush. + let count = sst_file_count(&cluster.paths); + assert_eq!(1, count); + + for i in 5..8 { + let sst_range = (i * 20, (i + 1) * 20); + let (mut meta, data) = gen_sst_file(sst_path.clone(), sst_range); + // No region id and epoch. + send_upload_sst(&import, &meta, &data).unwrap(); + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(meta.clone()); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + send_upload_sst(&import, &meta, &data).unwrap(); + ingest.set_sst(meta.clone()); + let resp = import.ingest(&ingest).unwrap(); + assert!(!resp.has_error(), "{:?}", resp.get_error()); + } + + // ingest more sst files, unflushed index still be 1. + let count = sst_file_count(&cluster.paths); + assert_eq!(1, count); + + // file a write to trigger ready flush, even if the write is not flushed. + must_raw_put(&client, ctx, b"key1".to_vec(), b"value1".to_vec()); + let count = sst_file_count(&cluster.paths); + assert_eq!(0, count); + + // restart node, should not tirgger any ingest + fail::cfg("on_apply_ingest", "panic").unwrap(); + cluster.stop_node(1); + cluster.start().unwrap(); + let count = sst_file_count(&cluster.paths); + assert_eq!(0, count); + + fail::remove("on_apply_ingest"); + fail::remove("on_flush_completed"); +} From b172835345cb015572faabb2bc164d532ba8d62f Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 13 Sep 2023 19:57:08 -0700 Subject: [PATCH 0913/1149] add option to update config without persist (#15587) close tikv/tikv#15588 add option to update TiKV config without persist in status API "POST /config?persist=false|true" Signed-off-by: tonyxuqqi --- src/server/status_server/mod.rs | 88 ++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index b49fdce12af..98077d9e93f 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -313,6 +313,18 @@ where req: Request, ) -> hyper::Result> { let mut body = Vec::new(); + let mut persist = true; + if let Some(query) = req.uri().query() { + let query_pairs: HashMap<_, _> = + url::form_urlencoded::parse(query.as_bytes()).collect(); + persist = match query_pairs.get("persist") { + Some(val) => match val.parse() { + Ok(val) => val, + Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), + }, + None => true, + }; + } req.into_body() .try_for_each(|bytes| { body.extend(bytes); @@ -320,7 +332,11 @@ where }) .await?; Ok(match decode_json(&body) { - Ok(change) => match cfg_controller.update(change) { + Ok(change) => match if persist { + cfg_controller.update(change) + } else { + cfg_controller.update_without_persist(change) + } { Err(e) => { if let Some(e) = e.downcast_ref::() { make_response( @@ -1227,6 +1243,76 @@ mod tests { status_server.stop(); } + #[test] + fn test_update_config_endpoint() { + let test_config = |persist: bool| { + let temp_dir = tempfile::TempDir::new().unwrap(); + let mut config = TikvConfig::default(); + config.cfg_path = temp_dir + .path() + .join("tikv.toml") + .to_str() + .unwrap() + .to_string(); + let mut status_server = StatusServer::new( + 1, + ConfigController::new(config), + Arc::new(SecurityConfig::default()), + MockRouter, + temp_dir.path().to_path_buf(), + None, + GrpcServiceManager::dummy(), + ) + .unwrap(); + let addr = "127.0.0.1:0".to_owned(); + let _ = status_server.start(addr); + let client = Client::new(); + let uri = if persist { + Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/config") + .build() + .unwrap() + } else { + Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/config?persist=false") + .build() + .unwrap() + }; + let mut req = Request::new(Body::from("{\"coprocessor.region-split-size\": \"1GB\"}")); + *req.method_mut() = Method::POST; + *req.uri_mut() = uri.clone(); + let handle = status_server.thread_pool.spawn(async move { + let resp = client.request(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + }); + block_on(handle).unwrap(); + + let client = Client::new(); + let handle2 = status_server.thread_pool.spawn(async move { + let resp = client.get(uri).await.unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let mut v = Vec::new(); + resp.into_body() + .try_for_each(|bytes| { + v.extend(bytes); + ok(()) + }) + .await + .unwrap(); + let resp_json = String::from_utf8_lossy(&v).to_string(); + assert!(resp_json.contains("\"region-split-size\":\"1GiB\"")); + }); + block_on(handle2).unwrap(); + status_server.stop(); + }; + test_config(true); + test_config(false); + } + #[cfg(feature = "failpoints")] #[test] fn test_status_service_fail_endpoints() { From 905ecd79ee9a30bcd8b9b1949c430062c4c3fd07 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 14 Sep 2023 12:07:39 +0800 Subject: [PATCH 0914/1149] tracker: add a warn log for deadline exceeded query (#15577) ref tikv/tikv#15566 Signed-off-by: glorv Co-authored-by: tonyxuqqi --- src/coprocessor/tracker.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index bb32a3a0e03..f6502c2459e 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -434,6 +434,36 @@ impl Drop for Tracker { if let TrackerState::ItemFinished(_) = self.current_stage { self.on_finish_all_items(); } + + if self.current_stage != TrackerState::AllItemFinished + && self.req_ctx.deadline.check().is_err() + { + // record deadline exceeded error log. + let total_lifetime = self.request_begin_at.saturating_elapsed(); + let source_stmt = self.req_ctx.context.get_source_stmt(); + let first_range = self.req_ctx.ranges.first(); + let some_table_id = first_range.as_ref().map(|range| { + tidb_query_datatype::codec::table::decode_table_id(range.get_start()) + .unwrap_or_default() + }); + warn!("query deadline exceeded"; + "current_stage" => ?self.current_stage, + "connection_id" => source_stmt.get_connection_id(), + "session_alias" => source_stmt.get_session_alias(), + "region_id" => &self.req_ctx.context.get_region_id(), + "remote_host" => &self.req_ctx.peer, + "total_lifetime" => ?total_lifetime, + "wait_time" => ?self.wait_time, + "wait_time.schedule" => ?self.schedule_wait_time, + "wait_time.snapshot" => ?self.snapshot_wait_time, + "handler_build_time" => ?self.handler_build_time, + "total_process_time" => ?self.total_process_time, + "total_suspend_time" => ?self.total_suspend_time, + "txn_start_ts" => self.req_ctx.txn_start_ts, + "table_id" => some_table_id, + "tag" => self.req_ctx.tag.get_str(), + ); + } } } From 62c17991fd73269929bdfbd8e408710078e53351 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Wed, 13 Sep 2023 21:42:09 -0700 Subject: [PATCH 0915/1149] unsafe recovery: Enable force leader to rollback merge (#15578) close tikv/tikv#15580 Enable force leader to rollback merges when they are not able to proceed, previously, only regions with quorum can do this. Signed-off-by: Yang Zhang Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/peer.rs | 17 ++- components/raftstore/src/store/peer.rs | 4 +- .../failpoints/cases/test_unsafe_recovery.rs | 110 ++++++++++++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 9f7934e806e..d61e6784295 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4434,6 +4434,9 @@ where fn schedule_merge(&mut self) -> Result<()> { fail_point!("on_schedule_merge", |_| Ok(())); + fail_point!("on_schedule_merge_ret_err", |_| Err(Error::RegionNotFound( + 1 + ))); let (request, target_id) = { let state = self.fsm.peer.pending_merge_state.as_ref().unwrap(); let expect_region = state.get_target(); @@ -4557,6 +4560,17 @@ where "error_code" => %e.error_code(), ); self.rollback_merge(); + } else if let Some(ForceLeaderState::ForceLeader { .. }) = + &self.fsm.peer.force_leader + { + info!( + "failed to schedule merge, rollback in force leader state"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "err" => %e, + "error_code" => %e.error_code(), + ); + self.rollback_merge(); } } else if !is_learner(&self.fsm.peer.peer) { info!( @@ -5228,7 +5242,8 @@ where // error-prone if !(msg.has_admin_request() && (msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeer - || msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeerV2)) + || msg.get_admin_request().get_cmd_type() == AdminCmdType::ChangePeerV2 + || msg.get_admin_request().get_cmd_type() == AdminCmdType::RollbackMerge)) { return Err(Error::RecoveryInProgress(self.region_id())); } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8c1a7ef61e9..8ef857bfa12 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4237,7 +4237,9 @@ where // Should not propose normal in force leader state. // In `pre_propose_raft_command`, it rejects all the requests expect conf-change // if in force leader state. - if self.force_leader.is_some() { + if self.force_leader.is_some() + && req.get_admin_request().get_cmd_type() != AdminCmdType::RollbackMerge + { poll_ctx.raft_metrics.invalid_proposal.force_leader.inc(); panic!( "{} propose normal in force leader state {:?}", diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index cc33a01ff03..978489b5cd6 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -440,3 +440,113 @@ fn test_unsafe_recovery_demotion_reentrancy() { assert_eq!(demoted, true); fail::remove("on_handle_apply_store_1"); } + +#[test_case(test_raftstore::new_node_cluster)] +fn test_unsafe_recovery_rollback_merge() { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(20); + cluster.run(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 3); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + for i in 0..10 { + cluster.must_put(format!("k{}", i).as_bytes(), b"v"); + } + + // Block merge commit, let go of the merge prepare. + fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); + + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + // Makes the leadership definite. + let left_peer_2 = find_peer(&left, nodes[2]).unwrap().to_owned(); + let right_peer_2 = find_peer(&right, nodes[2]).unwrap().to_owned(); + cluster.must_transfer_leader(left.get_id(), left_peer_2); + cluster.must_transfer_leader(right.get_id(), right_peer_2); + cluster.must_try_merge(left.get_id(), right.get_id()); + + // Makes the group lose its quorum. + cluster.stop_node(nodes[1]); + cluster.stop_node(nodes[2]); + { + let put = new_put_cmd(b"k2", b"v2"); + let req = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![put], + true, + ); + // marjority is lost, can't propose command successfully. + cluster + .call_command_on_leader(req, Duration::from_millis(10)) + .unwrap_err(); + } + + cluster.must_enter_force_leader(left.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + cluster.must_enter_force_leader(right.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + + // Construct recovery plan. + let mut plan = pdpb::RecoveryPlan::default(); + + let left_demote_peers: Vec = left + .get_peers() + .iter() + .filter(|&peer| peer.get_store_id() != nodes[0]) + .cloned() + .collect(); + let mut left_demote = pdpb::DemoteFailedVoters::default(); + left_demote.set_region_id(left.get_id()); + left_demote.set_failed_voters(left_demote_peers.into()); + let right_demote_peers: Vec = right + .get_peers() + .iter() + .filter(|&peer| peer.get_store_id() != nodes[0]) + .cloned() + .collect(); + let mut right_demote = pdpb::DemoteFailedVoters::default(); + right_demote.set_region_id(right.get_id()); + right_demote.set_failed_voters(right_demote_peers.into()); + plan.mut_demotes().push(left_demote); + plan.mut_demotes().push(right_demote); + + // Triggers the unsafe recovery plan execution. + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); + cluster.must_send_store_heartbeat(nodes[0]); + + let mut demoted = false; + for _ in 0..10 { + let new_left = block_on(pd_client.get_region_by_id(left.get_id())) + .unwrap() + .unwrap(); + let new_right = block_on(pd_client.get_region_by_id(right.get_id())) + .unwrap() + .unwrap(); + assert_eq!(new_left.get_peers().len(), 3); + assert_eq!(new_right.get_peers().len(), 3); + demoted = new_left + .get_peers() + .iter() + .filter(|peer| peer.get_store_id() != nodes[0]) + .all(|peer| peer.get_role() == metapb::PeerRole::Learner) + && new_right + .get_peers() + .iter() + .filter(|peer| peer.get_store_id() != nodes[0]) + .all(|peer| peer.get_role() == metapb::PeerRole::Learner); + if demoted { + break; + } + sleep_ms(100); + } + assert_eq!(demoted, true); + + fail::remove("on_schedule_merge_ret_err"); +} From e43a157c4a35034dfd705bdd94fac6d958e8a1ff Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 14 Sep 2023 16:10:39 +0800 Subject: [PATCH 0916/1149] resolved_ts: limit scanner memory usage (#15523) ref tikv/tikv#14864 * Break resolved ts scan entry into multiple tasks. * Limit concurrent resolved ts scan tasks. * Remove resolved ts dead code. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resolved_ts/src/endpoint.rs | 190 ++++++++---------- components/resolved_ts/src/metrics.rs | 5 + components/resolved_ts/src/resolver.rs | 27 ++- components/resolved_ts/src/scanner.rs | 222 +++++++++------------ src/config/mod.rs | 2 + tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 7 files changed, 198 insertions(+), 250 deletions(-) diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 2a2f56eaadd..34f00672fa7 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -5,15 +5,13 @@ use std::{ collections::HashMap, fmt, marker::PhantomData, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, MutexGuard, - }, + sync::{Arc, Mutex, MutexGuard}, time::Duration, }; use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; +use futures::channel::oneshot::{channel, Receiver, Sender}; use grpcio::Environment; use kvproto::{kvrpcpb::LeaderInfo, metapb::Region, raft_cmdpb::AdminCmdType}; use online_config::{self, ConfigChange, ConfigManager, OnlineConfig}; @@ -35,7 +33,7 @@ use tikv_util::{ warn, worker::{Runnable, RunnableWithTimer, Scheduler}, }; -use tokio::sync::Notify; +use tokio::sync::{Notify, Semaphore}; use txn_types::{Key, TimeStamp}; use crate::{ @@ -43,7 +41,7 @@ use crate::{ cmd::{ChangeLog, ChangeRow}, metrics::*, resolver::{LastAttempt, Resolver}, - scanner::{ScanEntry, ScanMode, ScanTask, ScannerPool}, + scanner::{ScanEntries, ScanTask, ScannerPool}, Error, Result, TsSource, ON_DROP_WARN_HEAP_SIZE, }; @@ -55,7 +53,7 @@ enum ResolverStatus { Pending { tracked_index: u64, locks: Vec, - cancelled: Arc, + cancelled: Option>, memory_quota: Arc, }, Ready, @@ -188,7 +186,12 @@ struct ObserveRegion { } impl ObserveRegion { - fn new(meta: Region, rrp: Arc, memory_quota: Arc) -> Self { + fn new( + meta: Region, + rrp: Arc, + memory_quota: Arc, + cancelled: Sender<()>, + ) -> Self { ObserveRegion { resolver: Resolver::with_read_progress(meta.id, Some(rrp), memory_quota.clone()), meta, @@ -196,7 +199,7 @@ impl ObserveRegion { resolver_status: ResolverStatus::Pending { tracked_index: 0, locks: vec![], - cancelled: Arc::new(AtomicBool::new(false)), + cancelled: Some(cancelled), memory_quota, }, } @@ -318,51 +321,45 @@ impl ObserveRegion { } /// Track locks in incoming scan entries. - fn track_scan_locks(&mut self, entries: Vec, apply_index: u64) -> Result<()> { - for es in entries { - match es { - ScanEntry::Lock(locks) => { - if let ResolverStatus::Ready = self.resolver_status { - panic!("region {:?} resolver has ready", self.meta.id) - } - for (key, lock) in locks { - self.resolver.track_lock( - lock.ts, - key.to_raw().unwrap(), - Some(apply_index), - )?; - } + fn track_scan_locks(&mut self, entries: ScanEntries, apply_index: u64) -> Result<()> { + match entries { + ScanEntries::Lock(locks) => { + if let ResolverStatus::Ready = self.resolver_status { + panic!("region {:?} resolver has ready", self.meta.id) } - ScanEntry::None => { - // Update the `tracked_index` to the snapshot's `apply_index` - self.resolver.update_tracked_index(apply_index); - let mut resolver_status = - std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready); - let (pending_tracked_index, pending_locks) = - resolver_status.drain_pending_locks(self.meta.id); - for lock in pending_locks { - match lock { - PendingLock::Track { key, start_ts } => { - self.resolver.track_lock( - start_ts, - key.to_raw().unwrap(), - Some(pending_tracked_index), - )?; - } - PendingLock::Untrack { key, .. } => self - .resolver - .untrack_lock(&key.to_raw().unwrap(), Some(pending_tracked_index)), + for (key, lock) in locks { + self.resolver + .track_lock(lock.ts, key.to_raw().unwrap(), Some(apply_index))?; + } + } + ScanEntries::None => { + // Update the `tracked_index` to the snapshot's `apply_index` + self.resolver.update_tracked_index(apply_index); + let mut resolver_status = + std::mem::replace(&mut self.resolver_status, ResolverStatus::Ready); + let (pending_tracked_index, pending_locks) = + resolver_status.drain_pending_locks(self.meta.id); + for lock in pending_locks { + match lock { + PendingLock::Track { key, start_ts } => { + self.resolver.track_lock( + start_ts, + key.to_raw().unwrap(), + Some(pending_tracked_index), + )?; } + PendingLock::Untrack { key, .. } => self + .resolver + .untrack_lock(&key.to_raw().unwrap(), Some(pending_tracked_index)), } - info!( - "Resolver initialized"; - "region" => self.meta.id, - "observe_id" => ?self.handle.id, - "snapshot_index" => apply_index, - "pending_data_index" => pending_tracked_index, - ); } - ScanEntry::TxnEntry(_) => panic!("unexpected entry type"), + info!( + "Resolver initialized"; + "region" => self.meta.id, + "observe_id" => ?self.handle.id, + "snapshot_index" => apply_index, + "pending_data_index" => pending_tracked_index, + ); } } Ok(()) @@ -378,6 +375,7 @@ pub struct Endpoint { region_read_progress: RegionReadProgressRegistry, regions: HashMap, scanner_pool: ScannerPool, + scan_concurrency_semaphore: Arc, scheduler: Scheduler, advance_worker: AdvanceTsWorker, _phantom: PhantomData<(T, E)>, @@ -442,10 +440,7 @@ where match &observed_region.resolver_status { ResolverStatus::Pending { locks, .. } => { for l in locks { - match l { - PendingLock::Track { key, .. } => stats.heap_size += key.len() as i64, - PendingLock::Untrack { key, .. } => stats.heap_size += key.len() as i64, - } + stats.heap_size += l.heap_size() as i64; } stats.unresolved_count += 1; } @@ -477,6 +472,7 @@ where RTS_ZERO_RESOLVED_TS.set(stats.zero_ts_count); RTS_LOCK_HEAP_BYTES_GAUGE.set(stats.resolver.heap_size); + RTS_LOCK_QUOTA_IN_USE_BYTES_GAUGE.set(self.memory_quota.in_use() as i64); RTS_REGION_RESOLVE_STATUS_GAUGE_VEC .with_label_values(&["resolved"]) .set(stats.resolver.resolved_count); @@ -678,6 +674,7 @@ where region_read_progress.clone(), store_resolver_gc_interval, ); + let scan_concurrency_semaphore = Arc::new(Semaphore::new(cfg.incremental_scan_concurrency)); let ep = Self { store_id: Some(store_id), cfg: cfg.clone(), @@ -688,6 +685,7 @@ where region_read_progress, advance_worker, scanner_pool, + scan_concurrency_semaphore, regions: HashMap::default(), _phantom: PhantomData::default(), }; @@ -698,33 +696,28 @@ where fn register_region(&mut self, region: Region, backoff: Option) { let region_id = region.get_id(); assert!(self.regions.get(®ion_id).is_none()); - let observe_region = { - if let Some(read_progress) = self.region_read_progress.get(®ion_id) { - info!( - "register observe region"; - "region" => ?region - ); - ObserveRegion::new(region.clone(), read_progress, self.memory_quota.clone()) - } else { - warn!( - "try register unexit region"; - "region" => ?region, - ); - return; - } + let Some(read_progress) = self.region_read_progress.get(®ion_id) else { + warn!("try register nonexistent region"; "region" => ?region); + return; }; + info!("register observe region"; "region" => ?region); + let (cancelled_tx, cancelled_rx) = channel(); + let observe_region = ObserveRegion::new( + region.clone(), + read_progress, + self.memory_quota.clone(), + cancelled_tx, + ); let observe_handle = observe_region.handle.clone(); - let cancelled = match observe_region.resolver_status { - ResolverStatus::Pending { ref cancelled, .. } => cancelled.clone(), - ResolverStatus::Ready => panic!("resolved ts illeagal created observe region"), - }; observe_region .read_progress() .update_advance_resolved_ts_notify(self.advance_notify.clone()); self.regions.insert(region_id, observe_region); - let scan_task = self.build_scan_task(region, observe_handle, cancelled, backoff); - self.scanner_pool.spawn_task(scan_task); + let scan_task = self.build_scan_task(region, observe_handle, cancelled_rx, backoff); + let concurrency_semaphore = self.scan_concurrency_semaphore.clone(); + self.scanner_pool + .spawn_task(scan_task, concurrency_semaphore); RTS_SCAN_TASKS.with_label_values(&["total"]).inc(); } @@ -732,45 +725,17 @@ where &self, region: Region, observe_handle: ObserveHandle, - cancelled: Arc, + cancelled: Receiver<()>, backoff: Option, ) -> ScanTask { let scheduler = self.scheduler.clone(); - let scheduler_error = self.scheduler.clone(); - let region_id = region.id; - let observe_id = observe_handle.id; ScanTask { handle: observe_handle, - tag: String::new(), - mode: ScanMode::LockOnly, region, checkpoint_ts: TimeStamp::zero(), backoff, - is_cancelled: Box::new(move || cancelled.load(Ordering::Acquire)), - send_entries: Box::new(move |entries, apply_index| { - scheduler - .schedule(Task::ScanLocks { - region_id, - observe_id, - entries, - apply_index, - }) - .unwrap_or_else(|e| warn!("schedule resolved ts task failed"; "err" => ?e)); - RTS_SCAN_TASKS.with_label_values(&["finish"]).inc(); - }), - on_error: Some(Box::new(move |observe_id, _region, e| { - if let Err(e) = scheduler_error.schedule(Task::ReRegisterRegion { - region_id, - observe_id, - cause: e, - }) { - warn!("schedule re-register task failed"; - "region_id" => region_id, - "observe_id" => ?observe_id, - "error" => ?e); - } - RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); - })), + cancelled, + scheduler, } } @@ -778,7 +743,7 @@ where if let Some(observe_region) = self.regions.remove(®ion_id) { let ObserveRegion { handle, - resolver_status, + mut resolver_status, .. } = observe_region; @@ -791,8 +756,11 @@ where // Stop observing data handle.stop_observing(); // Stop scanning data - if let ResolverStatus::Pending { ref cancelled, .. } = resolver_status { - cancelled.store(true, Ordering::Release); + if let ResolverStatus::Pending { + ref mut cancelled, .. + } = resolver_status + { + let _ = cancelled.take(); } } else { debug!("deregister unregister region"; "region_id" => region_id); @@ -938,7 +906,7 @@ where &mut self, region_id: u64, observe_id: ObserveId, - entries: Vec, + entries: ScanEntries, apply_index: u64, ) { let mut memory_quota_exceeded = None; @@ -979,6 +947,8 @@ where self.advance_notify.notify_waiters(); self.memory_quota .set_capacity(self.cfg.memory_quota.0 as usize); + self.scan_concurrency_semaphore = + Arc::new(Semaphore::new(self.cfg.incremental_scan_concurrency)); info!( "resolved-ts config changed"; "prev" => prev, @@ -1047,7 +1017,7 @@ pub enum Task { ScanLocks { region_id: u64, observe_id: ObserveId, - entries: Vec, + entries: ScanEntries, apply_index: u64, }, ChangeConfig { diff --git a/components/resolved_ts/src/metrics.rs b/components/resolved_ts/src/metrics.rs index 02bb92f7887..fb751491d10 100644 --- a/components/resolved_ts/src/metrics.rs +++ b/components/resolved_ts/src/metrics.rs @@ -104,6 +104,11 @@ lazy_static! { "Total bytes in memory of resolved-ts observed regions's lock heap" ) .unwrap(); + pub static ref RTS_LOCK_QUOTA_IN_USE_BYTES_GAUGE: IntGauge = register_int_gauge!( + "tikv_resolved_ts_memory_quota_in_use_bytes", + "Total bytes in memory of resolved-ts observed regions's lock heap" + ) + .unwrap(); pub static ref RTS_REGION_RESOLVE_STATUS_GAUGE_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_resolved_ts_region_resolve_status", "The status of resolved-ts observed regions", diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 9a62a0eea98..85e7acff4a4 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -10,7 +10,7 @@ use tikv_util::{ }; use txn_types::{Key, TimeStamp}; -use crate::metrics::RTS_RESOLVED_FAIL_ADVANCE_VEC; +use crate::metrics::*; const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB @@ -203,16 +203,23 @@ impl Resolver { // Return an approximate heap memory usage in bytes. pub fn approximate_heap_bytes(&self) -> usize { - // memory used by locks_by_key. - let memory_quota_in_use = self.memory_quota.in_use(); - - // memory used by lock_ts_heap. - let memory_lock_ts_heap = self.lock_ts_heap.len() - * (std::mem::size_of::() + std::mem::size_of::>>()) - // memory used by HashSet> - + self.locks_by_key.len() * std::mem::size_of::>(); + if self.locks_by_key.is_empty() { + return 0; + } - memory_quota_in_use + memory_lock_ts_heap + const SAMPLE_COUNT: usize = 8; + let mut key_count = 0; + let mut key_bytes = 0; + for key in self.locks_by_key.keys() { + key_count += 1; + key_bytes += key.len(); + if key_count >= SAMPLE_COUNT { + break; + } + } + self.locks_by_key.len() * (key_bytes / key_count + std::mem::size_of::()) + + self.lock_ts_heap.len() + * (std::mem::size_of::() + std::mem::size_of::>>()) } fn lock_heap_size(&self, key: &[u8]) -> usize { diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 615819db799..6c8c90dc38f 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -3,57 +3,79 @@ use std::{marker::PhantomData, sync::Arc, time::Duration}; use engine_traits::KvEngine; -use futures::compat::Future01CompatExt; -use kvproto::{kvrpcpb::ExtraOp as TxnExtraOp, metapb::Region}; +use futures::{channel::oneshot::Receiver, compat::Future01CompatExt, FutureExt}; +use kvproto::metapb::Region; use raftstore::{ - coprocessor::{ObserveHandle, ObserveId}, + coprocessor::ObserveHandle, router::CdcHandle, store::{fsm::ChangeObserver, msg::Callback, RegionSnapshot}, }; use tikv::storage::{ kv::{ScanMode as MvccScanMode, Snapshot}, - mvcc::{DeltaScanner, MvccReader, ScannerBuilder}, - txn::{TxnEntry, TxnEntryScanner}, + mvcc::MvccReader, +}; +use tikv_util::{ + sys::thread::ThreadBuildWrapper, time::Instant, timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, +}; +use tokio::{ + runtime::{Builder, Runtime}, + sync::Semaphore, }; -use tikv_util::{sys::thread::ThreadBuildWrapper, time::Instant, timer::GLOBAL_TIMER_HANDLE}; -use tokio::runtime::{Builder, Runtime}; use txn_types::{Key, Lock, LockType, TimeStamp}; use crate::{ errors::{Error, Result}, metrics::*, + Task, }; -const DEFAULT_SCAN_BATCH_SIZE: usize = 1024; +const DEFAULT_SCAN_BATCH_SIZE: usize = 128; const GET_SNAPSHOT_RETRY_TIME: u32 = 3; const GET_SNAPSHOT_RETRY_BACKOFF_STEP: Duration = Duration::from_millis(100); -pub type BeforeStartCallback = Box; -pub type OnErrorCallback = Box; -pub type OnEntriesCallback = Box, u64) + Send>; -pub type IsCancelledCallback = Box bool + Send>; - -pub enum ScanMode { - LockOnly, - All, - AllWithOldValue, -} - pub struct ScanTask { pub handle: ObserveHandle, - pub tag: String, - pub mode: ScanMode, pub region: Region, pub checkpoint_ts: TimeStamp, pub backoff: Option, - pub is_cancelled: IsCancelledCallback, - pub send_entries: OnEntriesCallback, - pub on_error: Option, + pub cancelled: Receiver<()>, + pub scheduler: Scheduler, +} + +impl ScanTask { + async fn send_entries(&self, entries: ScanEntries, apply_index: u64) { + let task = Task::ScanLocks { + region_id: self.region.get_id(), + observe_id: self.handle.id, + entries, + apply_index, + }; + if let Err(e) = self.scheduler.schedule(task) { + warn!("resolved_ts scheduler send entries failed"; "err" => ?e); + } + } + + fn is_cancelled(&mut self) -> bool { + matches!(self.cancelled.try_recv(), Err(_) | Ok(Some(_))) + } + + fn on_error(&self, err: Error) { + if let Err(e) = self.scheduler.schedule(Task::ReRegisterRegion { + region_id: self.region.get_id(), + observe_id: self.handle.id, + cause: err, + }) { + warn!("schedule re-register task failed"; + "region_id" => self.region.get_id(), + "observe_id" => ?self.handle.id, + "error" => ?e); + } + RTS_SCAN_TASKS.with_label_values(&["abort"]).inc(); + } } #[derive(Debug)] -pub enum ScanEntry { - TxnEntry(Vec), +pub enum ScanEntries { Lock(Vec<(Key, Lock)>), None, } @@ -82,109 +104,66 @@ impl, E: KvEngine> ScannerPool { } } - pub fn spawn_task(&self, mut task: ScanTask) { + pub fn spawn_task(&self, mut task: ScanTask, concurrency_semaphore: Arc) { let cdc_handle = self.cdc_handle.clone(); let fut = async move { + tikv_util::defer!({ + RTS_SCAN_TASKS.with_label_values(&["finish"]).inc(); + }); if let Some(backoff) = task.backoff { RTS_INITIAL_SCAN_BACKOFF_DURATION_HISTOGRAM.observe(backoff.as_secs_f64()); - if let Err(e) = GLOBAL_TIMER_HANDLE + let mut backoff = GLOBAL_TIMER_HANDLE .delay(std::time::Instant::now() + backoff) .compat() - .await - { - error!("failed to backoff"; "err" => ?e); + .fuse(); + futures::select! { + res = backoff => if let Err(e) = res { + error!("failed to backoff"; "err" => ?e); + }, + _ = &mut task.cancelled => {} } - if (task.is_cancelled)() { + if task.is_cancelled() { return; } } + let _permit = concurrency_semaphore.acquire().await; + if task.is_cancelled() { + return; + } + fail::fail_point!("resolved_ts_before_scanner_get_snapshot"); let snap = match Self::get_snapshot(&mut task, cdc_handle).await { Ok(snap) => snap, Err(e) => { warn!("resolved_ts scan get snapshot failed"; "err" => ?e); - let ScanTask { - on_error, - region, - handle, - .. - } = task; - if let Some(on_error) = on_error { - on_error(handle.id, region, e); - } + task.on_error(e); return; } }; fail::fail_point!("resolved_ts_after_scanner_get_snapshot"); let start = Instant::now(); let apply_index = snap.get_apply_index().unwrap(); - let mut entries = vec![]; - match task.mode { - ScanMode::All | ScanMode::AllWithOldValue => { - let txn_extra_op = if let ScanMode::AllWithOldValue = task.mode { - TxnExtraOp::ReadOldValue - } else { - TxnExtraOp::Noop - }; - let mut scanner = ScannerBuilder::new(snap, TimeStamp::max()) - .range(None, None) - .build_delta_scanner(task.checkpoint_ts, txn_extra_op) - .unwrap(); - let mut done = false; - while !done && !(task.is_cancelled)() { - let (es, has_remaining) = match Self::scan_delta(&mut scanner) { - Ok(rs) => rs, - Err(e) => { - warn!("resolved_ts scan delta failed"; "err" => ?e); - let ScanTask { - on_error, - region, - handle, - .. - } = task; - if let Some(on_error) = on_error { - on_error(handle.id, region, e); - } - return; - } - }; - done = !has_remaining; - entries.push(ScanEntry::TxnEntry(es)); - } - } - ScanMode::LockOnly => { - let mut reader = MvccReader::new(snap, Some(MvccScanMode::Forward), false); - let mut done = false; - let mut start = None; - while !done && !(task.is_cancelled)() { - let (locks, has_remaining) = - match Self::scan_locks(&mut reader, start.as_ref(), task.checkpoint_ts) - { - Ok(rs) => rs, - Err(e) => { - warn!("resolved_ts scan lock failed"; "err" => ?e); - let ScanTask { - on_error, - region, - handle, - .. - } = task; - if let Some(on_error) = on_error { - on_error(handle.id, region, e); - } - return; - } - }; - done = !has_remaining; - if has_remaining { - start = Some(locks.last().unwrap().0.clone()) + let mut reader = MvccReader::new(snap, Some(MvccScanMode::Forward), false); + let mut done = false; + let mut start_key = None; + while !done && !task.is_cancelled() { + let (locks, has_remaining) = + match Self::scan_locks(&mut reader, start_key.as_ref(), task.checkpoint_ts) { + Ok(rs) => rs, + Err(e) => { + warn!("resolved_ts scan lock failed"; "err" => ?e); + task.on_error(e); + return; } - entries.push(ScanEntry::Lock(locks)); - } + }; + done = !has_remaining; + if has_remaining { + start_key = Some(locks.last().unwrap().0.clone()) } + task.send_entries(ScanEntries::Lock(locks), apply_index) + .await; } - entries.push(ScanEntry::None); RTS_SCAN_DURATION_HISTOGRAM.observe(start.saturating_elapsed().as_secs_f64()); - (task.send_entries)(entries, apply_index); + task.send_entries(ScanEntries::None, apply_index).await; }; self.workers.spawn(fut); } @@ -196,18 +175,21 @@ impl, E: KvEngine> ScannerPool { let mut last_err = None; for retry_times in 0..=GET_SNAPSHOT_RETRY_TIME { if retry_times != 0 { - if let Err(e) = GLOBAL_TIMER_HANDLE + let mut backoff = GLOBAL_TIMER_HANDLE .delay( std::time::Instant::now() + GET_SNAPSHOT_RETRY_BACKOFF_STEP .mul_f64(10_f64.powi(retry_times as i32 - 1)), ) .compat() - .await - { - error!("failed to backoff"; "err" => ?e); + .fuse(); + futures::select! { + res = backoff => if let Err(e) = res { + error!("failed to backoff"; "err" => ?e); + }, + _ = &mut task.cancelled => {} } - if (task.is_cancelled)() { + if task.is_cancelled() { return Err(box_err!("scan task cancelled")); } } @@ -256,24 +238,4 @@ impl, E: KvEngine> ScannerPool { .map_err(|e| Error::Other(box_err!("{:?}", e)))?; Ok((locks, has_remaining)) } - - fn scan_delta(scanner: &mut DeltaScanner) -> Result<(Vec, bool)> { - let mut entries = Vec::with_capacity(DEFAULT_SCAN_BATCH_SIZE); - let mut has_remaining = true; - while entries.len() < entries.capacity() { - match scanner - .next_entry() - .map_err(|e| Error::Other(box_err!("{:?}", e)))? - { - Some(entry) => { - entries.push(entry); - } - None => { - has_remaining = false; - break; - } - } - } - Ok((entries, has_remaining)) - } } diff --git a/src/config/mod.rs b/src/config/mod.rs index be2a52d9b07..4f9a9a01b4a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3000,6 +3000,7 @@ pub struct ResolvedTsConfig { #[online_config(skip)] pub scan_lock_pool_size: usize, pub memory_quota: ReadableSize, + pub incremental_scan_concurrency: usize, } impl ResolvedTsConfig { @@ -3021,6 +3022,7 @@ impl Default for ResolvedTsConfig { advance_ts_interval: ReadableDuration::secs(20), scan_lock_pool_size: 2, memory_quota: ReadableSize::mb(256), + incremental_scan_concurrency: 6, } } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index c6e98e95c05..c6f787df9a7 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -857,6 +857,7 @@ fn test_serde_custom_tikv_config() { advance_ts_interval: ReadableDuration::secs(5), scan_lock_pool_size: 1, memory_quota: ReadableSize::mb(1), + incremental_scan_concurrency: 7, }; value.causal_ts = CausalTsConfig { renew_interval: ReadableDuration::millis(100), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 653c3d2daef..ece8cabae49 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -707,6 +707,7 @@ enable = true advance-ts-interval = "5s" scan-lock-pool-size = 1 memory-quota = "1MB" +incremental-scan-concurrency = 7 [split] detect-times = 10 From 32f58924b825230d159714db63bed344e913a58a Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 15 Sep 2023 13:16:39 +0800 Subject: [PATCH 0917/1149] *: update rust-toolchain (#15584) close tikv/tikv#15581 Signed-off-by: glorv --- cmd/tikv-ctl/src/fork_readonly_tikv.rs | 1 + cmd/tikv-ctl/src/main.rs | 2 +- components/backup-stream/src/errors.rs | 4 +- .../backup-stream/src/metadata/client.rs | 5 +- components/backup-stream/src/router.rs | 4 +- .../backup-stream/src/subscription_track.rs | 2 +- components/backup-stream/src/utils.rs | 4 +- components/backup/src/endpoint.rs | 6 +-- components/batch-system/src/fsm.rs | 8 +++- components/case_macros/src/lib.rs | 10 ++-- components/cdc/src/delegate.rs | 2 +- components/cdc/src/endpoint.rs | 6 +-- .../concurrency_manager/src/lock_table.rs | 4 +- components/coprocessor_plugin_api/src/util.rs | 4 ++ components/encryption/src/config.rs | 9 +--- components/engine_rocks/src/logger.rs | 2 - components/engine_rocks/src/properties.rs | 15 +++--- .../engine_tirocks/src/properties/mvcc.rs | 2 +- .../engine_tirocks/src/properties/range.rs | 10 ++-- components/engine_traits/src/flush.rs | 2 +- components/engine_traits/src/lib.rs | 4 +- components/engine_traits/src/tablet.rs | 2 +- .../online_config_derive/src/lib.rs | 14 ++---- components/raftstore-v2/src/batch/store.rs | 6 ++- components/raftstore-v2/src/lib.rs | 1 + .../operation/command/admin/merge/prepare.rs | 4 +- .../src/operation/command/admin/split.rs | 4 +- .../command/admin/transfer_leader.rs | 20 ++++---- components/raftstore-v2/src/operation/life.rs | 8 +++- .../raftstore-v2/src/operation/query/local.rs | 4 +- .../src/operation/ready/apply_trace.rs | 2 +- .../src/operation/ready/snapshot.rs | 14 +++--- .../raftstore-v2/src/operation/txn_ext.rs | 4 +- .../src/operation/unsafe_recovery/demote.rs | 5 +- .../src/worker/cleanup/compact.rs | 16 +++++-- .../raftstore-v2/src/worker/pd/region.rs | 15 ++---- .../raftstore-v2/src/worker/pd/split.rs | 6 ++- components/raftstore-v2/src/worker/tablet.rs | 13 ++++- .../tests/integrations/cluster.rs | 4 +- .../raftstore/src/coprocessor/dispatcher.rs | 5 +- components/raftstore/src/errors.rs | 2 +- components/raftstore/src/lib.rs | 4 +- .../raftstore/src/store/async_io/write.rs | 6 ++- .../raftstore/src/store/entry_storage.rs | 8 ++-- components/raftstore/src/store/fsm/apply.rs | 18 +++---- components/raftstore/src/store/fsm/peer.rs | 8 ++-- components/raftstore/src/store/msg.rs | 24 +++++++--- components/raftstore/src/store/peer.rs | 48 +++++++++---------- .../raftstore/src/store/peer_storage.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 6 +-- .../raftstore/src/store/simple_write.rs | 24 +++++++--- components/raftstore/src/store/snap.rs | 4 +- components/raftstore/src/store/snap/io.rs | 4 +- components/raftstore/src/store/txn_ext.rs | 2 +- components/raftstore/src/store/util.rs | 3 +- components/raftstore/src/store/worker/pd.rs | 20 +++----- components/raftstore/src/store/worker/read.rs | 3 +- .../raftstore/src/store/worker/region.rs | 4 +- .../raftstore/src/store/worker/split_check.rs | 8 ++-- .../src/store/worker/split_controller.rs | 11 ++--- components/resolved_ts/src/cmd.rs | 6 +-- components/resolved_ts/src/endpoint.rs | 20 ++++---- components/resolved_ts/src/scanner.rs | 3 +- .../resource_control/src/resource_group.rs | 4 +- components/resource_metering/src/lib.rs | 2 +- components/resource_metering/src/model.rs | 2 +- .../src/recorder/sub_recorder/cpu.rs | 4 +- .../resource_metering/tests/recorder_test.rs | 12 ++--- components/server/src/common.rs | 4 +- components/snap_recovery/src/leader_keeper.rs | 4 +- components/sst_importer/src/import_mode2.rs | 2 +- components/sst_importer/src/sst_importer.rs | 17 +++---- components/sst_importer/src/util.rs | 3 +- components/test_coprocessor/src/store.rs | 2 +- .../example_plugin/src/lib.rs | 2 +- components/test_pd/src/server.rs | 8 +--- components/test_pd_client/src/pd.rs | 2 +- components/test_raftstore-v2/src/cluster.rs | 3 +- components/test_raftstore-v2/src/lib.rs | 2 + components/test_raftstore-v2/src/node.rs | 2 +- components/test_raftstore-v2/src/server.rs | 14 +++++- components/test_raftstore/src/lib.rs | 2 + components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 8 +++- .../tidb_query_codegen/src/rpn_function.rs | 35 +++++++------- .../src/codec/collation/mod.rs | 2 +- .../tidb_query_datatype/src/codec/convert.rs | 12 ++--- .../src/codec/data_type/mod.rs | 2 +- .../src/codec/data_type/scalar.rs | 17 ++++--- .../tidb_query_datatype/src/codec/datum.rs | 8 ++-- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/duration.rs | 4 +- .../src/codec/mysql/json/comparison.rs | 4 +- .../src/codec/mysql/json/jcodec.rs | 8 ++-- .../src/codec/mysql/json/json_modify.rs | 2 +- .../src/codec/mysql/time/mod.rs | 10 ++-- .../src/codec/row/v2/row_slice.rs | 2 +- .../tidb_query_datatype/src/codec/table.rs | 2 +- .../src/index_scan_executor.rs | 4 +- components/tidb_query_executors/src/runner.rs | 18 +++---- .../src/selection_executor.rs | 4 +- .../src/util/aggr_executor.rs | 4 +- .../tidb_query_executors/src/util/mod.rs | 4 +- components/tidb_query_expr/src/impl_cast.rs | 2 +- .../tidb_query_expr/src/impl_miscellaneous.rs | 5 +- components/tidb_query_expr/src/impl_string.rs | 6 +-- components/tidb_query_expr/src/lib.rs | 2 + .../tidb_query_expr/src/types/expr_eval.rs | 11 ++--- components/tikv_kv/src/cursor.rs | 2 +- components/tikv_kv/src/lib.rs | 1 + components/tikv_util/src/logger/formatter.rs | 6 +-- components/tikv_util/src/lru.rs | 2 +- components/tikv_util/src/memory.rs | 2 +- .../src/metrics/allocator_metrics.rs | 2 +- components/tikv_util/src/mpsc/future.rs | 2 + components/tikv_util/src/sys/cpu_time.rs | 2 +- components/tikv_util/src/timer.rs | 4 +- components/txn_types/src/timestamp.rs | 10 +--- components/txn_types/src/types.rs | 18 ++----- rust-toolchain | 2 +- src/config/mod.rs | 20 ++++---- src/coprocessor/metrics.rs | 2 +- src/coprocessor/mod.rs | 2 + src/import/sst_service.rs | 6 +-- src/lib.rs | 3 +- src/server/debug2.rs | 2 +- src/server/gc_worker/compaction_filter.rs | 1 + src/server/gc_worker/gc_manager.rs | 8 ++-- src/server/gc_worker/gc_worker.rs | 14 ++---- src/server/lock_manager/deadlock.rs | 9 +--- src/server/raftkv/mod.rs | 5 +- src/server/raftkv2/mod.rs | 4 +- src/server/raftkv2/node.rs | 4 +- src/server/service/debug.rs | 1 - src/server/service/diagnostics/log.rs | 18 ++++--- src/server/service/diagnostics/sys.rs | 2 +- src/server/service/kv.rs | 1 - src/storage/lock_manager/lock_wait_context.rs | 12 ++--- .../lock_manager/lock_waiting_queue.rs | 7 +-- src/storage/metrics.rs | 2 +- src/storage/mod.rs | 32 ++++++------- src/storage/mvcc/reader/point_getter.rs | 2 +- src/storage/mvcc/reader/reader.rs | 21 ++++---- src/storage/mvcc/reader/scanner/forward.rs | 4 +- src/storage/raw/raw_mvcc.rs | 2 +- src/storage/txn/actions/prewrite.rs | 2 - src/storage/txn/commands/atomic_store.rs | 4 +- src/storage/txn/commands/prewrite.rs | 26 +++++----- src/storage/txn/latch.rs | 20 ++++---- src/storage/txn/sched_pool.rs | 2 +- .../benches/coprocessor_executors/util/mod.rs | 2 +- tests/benches/hierarchy/mvcc/mod.rs | 2 +- .../misc/coprocessor/codec/chunk/chunk.rs | 2 +- tests/benches/misc/raftkv/mod.rs | 2 + tests/benches/raftstore/mod.rs | 2 +- tests/failpoints/cases/mod.rs | 3 ++ tests/failpoints/cases/test_disk_full.rs | 8 ++-- tests/failpoints/cases/test_engine.rs | 1 + tests/failpoints/cases/test_hibernate.rs | 1 + tests/failpoints/cases/test_pd_client.rs | 1 + .../failpoints/cases/test_pd_client_legacy.rs | 1 + tests/failpoints/cases/test_rawkv.rs | 2 +- .../cases/test_read_execution_tracker.rs | 11 +++-- tests/failpoints/cases/test_split_region.rs | 3 +- tests/failpoints/cases/test_storage.rs | 4 +- tests/failpoints/cases/test_transaction.rs | 2 +- .../failpoints/cases/test_transfer_leader.rs | 4 +- tests/integrations/backup/mod.rs | 1 + tests/integrations/import/test_apply_log.rs | 2 +- tests/integrations/mod.rs | 2 + .../integrations/raftstore/test_bootstrap.rs | 4 +- .../raftstore/test_compact_lock_cf.rs | 4 +- tests/integrations/raftstore/test_stats.rs | 1 + 173 files changed, 584 insertions(+), 534 deletions(-) diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs index ef3ae7f8023..d1a917f5624 100644 --- a/cmd/tikv-ctl/src/fork_readonly_tikv.rs +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -265,6 +265,7 @@ where .map_err(|e| format!("copy({}, {}): {}", src.display(), dst.display(), e)) } +#[allow(clippy::permissions_set_readonly_false)] fn add_write_permission>(path: P) -> Result<(), String> { let path = path.as_ref(); let mut pmt = std::fs::metadata(path) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 6baa1fe6c39..c1ab11cc507 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(once_cell)] #![feature(let_chains)] +#![feature(lazy_cell)] #[macro_use] extern crate log; diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index c3cc91da9ff..cc720d5aecc 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -158,7 +158,7 @@ where /// Like `errors.Annotate` in Go. /// Wrap an unknown error with [`Error::Other`]. -#[macro_export(crate)] +#[macro_export] macro_rules! annotate { ($inner: expr, $message: expr) => { { @@ -242,6 +242,7 @@ mod test { #[bench] // 2,685 ns/iter (+/- 194) + #[allow(clippy::unnecessary_literal_unwrap)] fn contextual_add_format_strings_directly(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( @@ -305,6 +306,7 @@ mod test { #[bench] // 773 ns/iter (+/- 8) + #[allow(clippy::unnecessary_literal_unwrap)] fn baseline(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 1fdc1b3b1e8..df8f0f025b1 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -663,11 +663,10 @@ impl MetadataClient { let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; - let cp = match global_cp { + match global_cp { None => self.get_task_start_ts_checkpoint(task).await?, Some(cp) => cp, - }; - cp + } } _ => Checkpoint::from_kv(&r[0])?, }; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index b0d3453c958..6ce8486109f 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -950,7 +950,9 @@ impl StreamTaskInfo { .last_flush_time .swap(Box::into_raw(Box::new(Instant::now())), Ordering::SeqCst); // manual gc last instant - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } } pub fn should_flush(&self, flush_interval: &Duration) -> bool { diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 4f44ec46853..0803ba1b99a 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -309,7 +309,7 @@ impl SubscriptionTracer { } }; - let mut subscription = sub.value_mut(); + let subscription = sub.value_mut(); let old_epoch = subscription.meta.get_region_epoch(); let new_epoch = new_region.get_region_epoch(); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 1b150eaa1f0..52b6f0e9391 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -342,7 +342,7 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, /// once meet an error, would report it, with the current file and line (so it /// is made as a macro). returns whether it success. -#[macro_export(crate)] +#[macro_export] macro_rules! try_send { ($s:expr, $task:expr) => { match $s.schedule($task) { @@ -366,7 +366,7 @@ macro_rules! try_send { /// `backup_stream_debug`. because once we enable debug log for all crates, it /// would soon get too verbose to read. using this macro now we can enable debug /// log level for the crate only (even compile time...). -#[macro_export(crate)] +#[macro_export] macro_rules! debug { ($($t: tt)+) => { if cfg!(feature = "backup-stream-debug") { diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index a4efc162092..d6330f49966 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -2493,8 +2493,8 @@ pub mod tests { fn test_backup_file_name() { let region = metapb::Region::default(); let store_id = 1; - let test_cases = vec!["s3", "local", "gcs", "azure", "hdfs"]; - let test_target = vec![ + let test_cases = ["s3", "local", "gcs", "azure", "hdfs"]; + let test_target = [ "1/0_0_000", "1/0_0_000", "1_0_0_000", @@ -2513,7 +2513,7 @@ pub mod tests { assert_eq!(target.to_string(), prefix_arr.join(delimiter)); } - let test_target = vec!["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; + let test_target = ["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { let key = None; let filename = backup_file_name(store_id, ®ion, key, storage_name); diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 3fa5ad15a64..16113dde8e2 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -149,7 +149,9 @@ impl FsmState { Ok(_) => return, Err(Self::NOTIFYSTATE_DROP) => { let ptr = self.data.swap(ptr::null_mut(), Ordering::AcqRel); - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } return; } Err(s) => s, @@ -179,7 +181,9 @@ impl Drop for FsmState { fn drop(&mut self) { let ptr = self.data.swap(ptr::null_mut(), Ordering::SeqCst); if !ptr.is_null() { - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + } } self.state_cnt.fetch_sub(1, Ordering::Relaxed); } diff --git a/components/case_macros/src/lib.rs b/components/case_macros/src/lib.rs index 057b68065d2..b779373a59d 100644 --- a/components/case_macros/src/lib.rs +++ b/components/case_macros/src/lib.rs @@ -5,12 +5,12 @@ use proc_macro::{Group, Literal, TokenStream, TokenTree}; macro_rules! transform_idents_in_stream_to_string { - ($stream:ident, $transform:expr) => { + ($stream:ident, $transform:ident) => { $stream .into_iter() .map(|token_tree| match token_tree { TokenTree::Ident(ref ident) => { - Literal::string(&$transform(ident.to_string())).into() + Literal::string(&$transform(&ident.to_string())).into() } // find all idents in `TokenGroup` apply and reconstruct the group TokenTree::Group(ref group) => TokenTree::Group(Group::new( @@ -20,7 +20,7 @@ macro_rules! transform_idents_in_stream_to_string { .into_iter() .map(|group_token_tree| { if let TokenTree::Ident(ref ident) = group_token_tree { - Literal::string(&$transform(ident.to_string())).into() + Literal::string(&$transform(&ident.to_string())).into() } else { group_token_tree } @@ -53,7 +53,7 @@ fn to_snake(s: &str) -> String { /// e.g. `HelloWorld` -> `hello-world` #[proc_macro] pub fn kebab_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, |s: String| to_kebab(&s)) + transform_idents_in_stream_to_string!(stream, to_kebab) } /// Expands idents in the input stream as snake-case string literal @@ -61,5 +61,5 @@ pub fn kebab_case(stream: TokenStream) -> TokenStream { /// e.g. `HelloWorld` -> `hello_world` #[proc_macro] pub fn snake_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, |s: String| to_snake(&s)) + transform_idents_in_stream_to_string!(stream, to_snake) } diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index c82c4cb6f13..18528fd08e9 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -1437,7 +1437,7 @@ mod tests { #[test] fn test_observed_range() { - for case in vec![ + for case in [ (b"".as_slice(), b"".as_slice(), false), (b"a", b"", false), (b"", b"b", false), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index a5f00a08028..9d5601eba84 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1015,10 +1015,10 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint( - &'m self, + pub fn find_first( + &self, start_key: Option<&Key>, end_key: Option<&Key>, mut pred: impl FnMut(Arc) -> Option, diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 31d75610d75..06e8847402f 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -19,10 +19,14 @@ pub type PluginConstructorSignature = /// Type signature of the exported function with symbol /// [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. +// emit this warn because to fix it need to change the data type which is a breaking change. +#[allow(improper_ctypes_definitions)] pub type PluginGetBuildInfoSignature = extern "C" fn() -> BuildInfo; /// Type signature of the exported function with symbol /// [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. +// emit this warn because to fix it need to change the data type which is a breaking change. +#[allow(improper_ctypes_definitions)] pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; /// Automatically collected build information about the plugin that is exposed diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 23e049e0df4..4455e4ce7cc 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -134,11 +134,12 @@ impl KmsConfig { } } -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case", tag = "type")] pub enum MasterKeyConfig { // Store encryption metadata as plaintext. Data still get encrypted. Not allowed to use if // encryption is enabled. (i.e. when encryption_config.method != Plaintext). + #[default] Plaintext, // Pass master key from a file, with key encoded as a readable hex string. The file should end @@ -156,12 +157,6 @@ pub enum MasterKeyConfig { }, } -impl Default for MasterKeyConfig { - fn default() -> Self { - MasterKeyConfig::Plaintext - } -} - mod encryption_method_serde { use std::fmt; diff --git a/components/engine_rocks/src/logger.rs b/components/engine_rocks/src/logger.rs index 85f4de713ac..185411dcacf 100644 --- a/components/engine_rocks/src/logger.rs +++ b/components/engine_rocks/src/logger.rs @@ -3,7 +3,6 @@ use rocksdb::{DBInfoLogLevel as InfoLogLevel, Logger}; use tikv_util::{crit, debug, error, info, warn}; // TODO(yiwu): abstract the Logger interface. -#[derive(Default)] pub struct RocksdbLogger; impl Logger for RocksdbLogger { @@ -44,7 +43,6 @@ impl Logger for TabletLogger { } } -#[derive(Default)] pub struct RaftDbLogger; impl Logger for RaftDbLogger { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 87ccab9e5ab..700d7621dc6 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -144,10 +144,7 @@ pub struct RangeProperties { impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { - let idx = self - .offsets - .binary_search_by_key(&key, |&(ref k, _)| k) - .unwrap(); + let idx = self.offsets.binary_search_by_key(&key, |(k, _)| k).unwrap(); &self.offsets[idx].1 } @@ -205,11 +202,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -227,7 +224,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |&(ref k, _)| k) + .binary_search_by_key(&start_key, |(ref k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -239,7 +236,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |(ref k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; @@ -869,7 +866,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(KeyMode::Txn); b.iter(|| { - for &(ref k, ref v) in &entries { + for (k, v) in &entries { collector.add(k, v, DBEntryType::Put, 0, 0); } }); diff --git a/components/engine_tirocks/src/properties/mvcc.rs b/components/engine_tirocks/src/properties/mvcc.rs index 1ca170f33d5..66c96284ea3 100644 --- a/components/engine_tirocks/src/properties/mvcc.rs +++ b/components/engine_tirocks/src/properties/mvcc.rs @@ -356,7 +356,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Txn); b.iter(|| { - for &(ref k, ref v) in &entries { + for (k, v) in &entries { collector.add(k, v, EntryType::kEntryPut, 0, 0).unwrap(); } }); diff --git a/components/engine_tirocks/src/properties/range.rs b/components/engine_tirocks/src/properties/range.rs index 59b9e68a6bb..e8a3411b02f 100644 --- a/components/engine_tirocks/src/properties/range.rs +++ b/components/engine_tirocks/src/properties/range.rs @@ -53,7 +53,7 @@ impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { let idx = self .offsets - .binary_search_by_key(&key, |&(ref k, _)| k) + .binary_search_by_key(&key, |(k, _)| k) .unwrap(); &self.offsets[idx].1 } @@ -112,11 +112,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -134,7 +134,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |&(ref k, _)| k) + .binary_search_by_key(&start_key, |(k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -146,7 +146,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |(k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 8590236e126..6449399cef8 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -119,7 +119,7 @@ impl SstApplyState { for sst in ssts { let cf_index = data_cf_offset(sst.get_cf_name()); if let Some(metas) = sst_list.get_mut(cf_index) { - metas.drain_filter(|entry| entry.sst.get_uuid() == sst.get_uuid()); + let _ = metas.extract_if(|entry| entry.sst.get_uuid() == sst.get_uuid()); } } } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index e09b1b52733..0f89776e7fd 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -253,8 +253,8 @@ #![feature(assert_matches)] #![feature(linked_list_cursors)] #![feature(let_chains)] -#![feature(str_split_as_str)] -#![feature(drain_filter)] +#![feature(str_split_remainder)] +#![feature(extract_if)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index c88f1548513..64e6dcbd4b4 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -241,7 +241,7 @@ impl TabletRegistry { let mut parts = name.rsplit('_'); let suffix = parts.next()?.parse().ok()?; let id = parts.next()?.parse().ok()?; - let prefix = parts.as_str(); + let prefix = parts.remainder().unwrap_or(""); Some((prefix, id, suffix)) } diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index bb37aad5924..e48a540c6b8 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -330,15 +330,11 @@ fn is_option_type(ty: &Type) -> bool { // TODO store (with lazy static) the vec of string // TODO maybe optimization, reverse the order of segments fn extract_option_segment(path: &Path) -> Option<&PathSegment> { - let idents_of_path = path - .segments - .iter() - .into_iter() - .fold(String::new(), |mut acc, v| { - acc.push_str(&v.ident.to_string()); - acc.push('|'); - acc - }); + let idents_of_path = path.segments.iter().fold(String::new(), |mut acc, v| { + acc.push_str(&v.ident.to_string()); + acc.push('|'); + acc + }); vec!["Option|", "std|option|Option|", "core|option|Option|"] .into_iter() .find(|s| idents_of_path == *s) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 4c142a43abf..5f036c61020 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -489,7 +489,11 @@ impl StorePollerBuilder { self.remove_dir(&path)?; continue; } - let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; + let Some((prefix, region_id, tablet_index)) = + self.tablet_registry.parse_tablet_name(&path) + else { + continue; + }; if prefix == MERGE_SOURCE_PREFIX { continue; } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 5b5e132b9ce..697d0525169 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -27,6 +27,7 @@ #![feature(box_into_inner)] #![feature(assert_matches)] #![feature(option_get_or_insert_default)] +#![allow(clippy::needless_pass_by_ref_mut)] mod batch; mod bootstrap; diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index d3d1896287c..76b71a8906c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -343,7 +343,9 @@ impl Peer { entry.get_data(), entry.get_index(), entry.get_term(), - ) else { continue }; + ) else { + continue; + }; let cmd_type = cmd.get_admin_request().get_cmd_type(); match cmd_type { AdminCmdType::TransferLeader diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0f9cae7218d..2fe2b4b5735 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -1098,7 +1098,9 @@ mod test { } } - let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { panic!() }; + let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { + panic!() + }; // update cache let mut cache = apply.tablet_registry().get(parent_id).unwrap(); cache.set(*tablet.downcast().unwrap()); diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 4cdeba3bc41..f60b9828bbb 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -50,21 +50,21 @@ impl Peer { /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: - /// Leader will send a MsgTransferLeader to follower. - /// 2. execute_transfer_leader on follower - /// If follower passes all necessary checks, it will reply an - /// ACK with type MsgTransferLeader and its promised applied index. - /// 3. ready_to_transfer_leader on leader: - /// Leader checks if it's appropriate to transfer leadership. If it - /// does, it calls raft transfer_leader API to do the remaining work. + /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader + /// to follower. + /// 2. execute_transfer_leader on follower If follower passes all necessary + /// checks, it will reply an ACK with type MsgTransferLeader and its + /// promised applied index. + /// 3. ready_to_transfer_leader on leader: Leader checks if it's appropriate + /// to transfer leadership. If it does, it calls raft transfer_leader API + /// to do the remaining work. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. The follower applies the TransferLeader command and replies an - /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. The follower applies the TransferLeader command and replies an ACK + /// with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// /// See also: tikv/rfcs#37. pub fn propose_transfer_leader( diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 8fe1d2a07b3..395774e17f1 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -637,8 +637,12 @@ impl Peer { let check_peer_id = check.get_check_peer().get_id(); let records = self.storage().region_state().get_merged_records(); let Some(record) = records.iter().find(|r| { - r.get_source_peers().iter().any(|p| p.get_id() == check_peer_id) - }) else { return }; + r.get_source_peers() + .iter() + .any(|p| p.get_id() == check_peer_id) + }) else { + return; + }; let source_index = record.get_source_index(); forward_destroy_to_source_peer(msg, |m| { let source_checkpoint = super::merge_source_path( diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 2f074fdc04d..5f6d589eca6 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -345,7 +345,9 @@ where match fut.await? { Some(query_res) => { if query_res.read().is_none() { - let QueryResult::Response(res) = query_res else { unreachable!() }; + let QueryResult::Response(res) = query_res else { + unreachable!() + }; // Get an error explicitly in header, // or leader reports KeyIsLocked error via read index. assert!( diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index af0257e763f..2b6c9c666e6 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -774,7 +774,7 @@ impl Peer { flushed = true; let flush_state = self.flush_state().clone(); - let mut apply_trace = self.storage_mut().apply_trace_mut(); + let apply_trace = self.storage_mut().apply_trace_mut(); let flushed_indexes = flush_state.as_ref().flushed_index(); for i in 0..flushed_indexes.len() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 9e0ed449cef..15caf5f0c84 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -570,10 +570,9 @@ impl Storage { pub fn cancel_generating_snap_due_to_compacted(&self, compact_to: u64) { let mut states = self.snap_states.borrow_mut(); states.retain(|id, state| { - let SnapState::Generating { - ref index, - .. - } = *state else { return true; }; + let SnapState::Generating { ref index, .. } = *state else { + return true; + }; let snap_index = index.load(Ordering::SeqCst); if snap_index == 0 || compact_to <= snap_index + 1 { return true; @@ -600,10 +599,9 @@ impl Storage { } let (mut snapshot, to_peer_id) = *res.unwrap(); if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { - let SnapState::Generating { - ref index, - .. - } = *state else { return false }; + let SnapState::Generating { ref index, .. } = *state else { + return false; + }; if snapshot.get_metadata().get_index() < index.load(Ordering::SeqCst) { warn!( self.logger(), diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 272b2526b39..6c3a9269a7f 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -266,7 +266,9 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else { + unreachable!() + }; self.on_simple_write(ctx, write.header, write.data, write.ch); true } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index 37962a45452..e7b3c8e62b8 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -100,7 +100,10 @@ impl Peer { failed_voters, target_index, demote_after_exit, - }) = self.unsafe_recovery_state() else { return }; + }) = self.unsafe_recovery_state() + else { + return; + }; if self.raft_group().raft.raft_log.applied < *target_index { return; diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs index 7acdb943b91..feb519a04ad 100644 --- a/components/raftstore-v2/src/worker/cleanup/compact.rs +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -97,8 +97,12 @@ where ) { Ok(mut region_ids) => { for region_id in region_ids.drain(..) { - let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else {continue}; - let Some(tablet) = tablet_cache.latest() else {continue}; + let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else { + continue; + }; + let Some(tablet) = tablet_cache.latest() else { + continue; + }; for cf in &cf_names { if let Err(e) = tablet.compact_range_cf(cf, None, None, false, 1 /* threads */) @@ -143,8 +147,12 @@ fn collect_regions_to_compact( ); let mut regions_to_compact = vec![]; for id in region_ids { - let Some(mut tablet_cache) = reg.get(id) else {continue}; - let Some(tablet) = tablet_cache.latest() else {continue}; + let Some(mut tablet_cache) = reg.get(id) else { + continue; + }; + let Some(tablet) = tablet_cache.latest() else { + continue; + }; if tablet.auto_compactions_is_disabled().expect("cf") { info!( logger, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index 763e12fff07..999eccb4962 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -113,10 +113,7 @@ where let approximate_keys = task.approximate_keys.unwrap_or_default(); let region_id = task.region.get_id(); - let peer_stat = self - .region_peers - .entry(region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(region_id).or_default(); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; @@ -373,10 +370,7 @@ where pub fn handle_update_read_stats(&mut self, mut stats: ReadStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -398,10 +392,7 @@ where pub fn handle_update_write_stats(&mut self, mut stats: WriteStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index 7fec5a31bb6..7bafb6c442a 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -142,8 +142,10 @@ where let f = async move { for split_info in split_infos { - let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await else { continue }; + let Ok(Some(region)) = pd_client.get_region_by_id(split_info.region_id).await + else { + continue; + }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::ask_batch_split_imp( diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 7c330353836..0b0429eb8d1 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -467,7 +467,8 @@ impl Runner { let Some(Some(tablet)) = self .tablet_registry .get(region_id) - .map(|mut cache| cache.latest().cloned()) else { + .map(|mut cache| cache.latest().cloned()) + else { warn!( self.logger, "flush memtable failed to acquire tablet"; @@ -555,7 +556,15 @@ impl Runner { } fn delete_range(&self, delete_range: Task) { - let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, cb } = delete_range else { + let Task::DeleteRange { + region_id, + tablet, + cf, + start_key, + end_key, + cb, + } = delete_range + else { slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) }; diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 5b3cc5feb93..a949725090d 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -137,7 +137,9 @@ impl TestRouter { match res { Ok(_) => return block_on(sub.result()).is_some(), Err(TrySendError::Disconnected(m)) => { - let PeerMsg::WaitFlush(ch) = m else { unreachable!() }; + let PeerMsg::WaitFlush(ch) = m else { + unreachable!() + }; match self .store_router() .send_control(StoreMsg::WaitFlush { region_id, ch }) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index d082013cd2c..756b7dc399e 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -471,10 +471,7 @@ impl CoprocessorHost { BoxSplitCheckObserver::new(KeysCheckObserver::new(ch)), ); registry.register_split_check_observer(100, BoxSplitCheckObserver::new(HalfCheckObserver)); - registry.register_split_check_observer( - 400, - BoxSplitCheckObserver::new(TableCheckObserver::default()), - ); + registry.register_split_check_observer(400, BoxSplitCheckObserver::new(TableCheckObserver)); registry.register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); CoprocessorHost { registry, cfg } } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index d1597a77121..6cf83a6cf84 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -223,7 +223,7 @@ impl From for errorpb::Error { .mut_proposal_in_merging_mode() .set_region_id(region_id); } - Error::Transport(reason) if reason == DiscardReason::Full => { + Error::Transport(DiscardReason::Full) => { let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(RAFTSTORE_IS_BUSY.to_owned()); errorpb.set_server_is_busy(server_is_busy_err); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 1db5f79d226..197eaefeac7 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -5,11 +5,13 @@ #![feature(div_duration)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(hash_drain_filter)] +#![feature(hash_extract_if)] #![feature(let_chains)] #![feature(assert_matches)] #![feature(type_alias_impl_trait)] +#![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] +#![allow(clippy::needless_pass_by_ref_mut)] #[cfg(test)] extern crate test; diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index eedd5052bbb..12617bc28a2 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -419,7 +419,11 @@ where } self.state_size = 0; if let ExtraBatchWrite::V2(_) = self.extra_batch_write { - let ExtraBatchWrite::V2(lb) = mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) else { unreachable!() }; + let ExtraBatchWrite::V2(lb) = + mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) + else { + unreachable!() + }; wb.merge(lb).unwrap(); } } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index c91c68538dd..95f099f77a7 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1338,14 +1338,14 @@ pub mod tests { // Test the initial data structure size. let (tx, rx) = mpsc::sync_channel(8); let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); - assert_eq!(rx.try_recv().unwrap(), 896); + assert_eq!(rx.try_recv().unwrap(), 0); cache.append( 0, 0, &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], ); - assert_eq!(rx.try_recv().unwrap(), 3); + assert_eq!(rx.try_recv().unwrap(), 419); cache.prepend(vec![new_padded_entry(100, 1, 1)]); assert_eq!(rx.try_recv().unwrap(), 1); @@ -1371,7 +1371,7 @@ pub mod tests { // Test trace a dangle entry. let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 1); + assert_eq!(rx.try_recv().unwrap(), 97); // Test trace an entry which is still in cache. let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); @@ -1398,7 +1398,7 @@ pub mod tests { assert_eq!(rx.try_recv().unwrap(), -7); drop(cache); - assert_eq!(rx.try_recv().unwrap(), -896); + assert_eq!(rx.try_recv().unwrap(), -512); } #[test] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index c170e5a35f9..406c8d79d18 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1262,9 +1262,9 @@ where apply_ctx.host.on_empty_cmd(&self.region, index, term); // 1. When a peer become leader, it will send an empty entry. - // 2. When a leader tries to read index during transferring leader, - // it will also propose an empty entry. But that entry will not contain - // any associated callback. So no need to clear callback. + // 2. When a leader tries to read index during transferring leader, it will also + // propose an empty entry. But that entry will not contain any associated + // callback. So no need to clear callback. while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { if let Some(cb) = cmd.cb.take() { apply_ctx @@ -4787,12 +4787,12 @@ where // command may not read the writes of previous commands and break ACID. If // it's still leader, there are two possibility that mailbox is closed: // 1. The process is shutting down. - // 2. The leader is destroyed. A leader won't propose to destroy itself, so - // it should either destroyed by older leaders or newer leaders. Leader - // won't respond to read until it has applied to current term, so no - // command will be proposed until command from older leaders have applied, - // which will then stop it from accepting proposals. If the command is - // proposed by new leader, then it won't be able to propose new proposals. + // 2. The leader is destroyed. A leader won't propose to destroy itself, so it + // should either destroyed by older leaders or newer leaders. Leader won't + // respond to read until it has applied to current term, so no command will + // be proposed until command from older leaders have applied, which will then + // stop it from accepting proposals. If the command is proposed by new + // leader, then it won't be able to propose new proposals. // So only shutdown needs to be checked here. if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { for p in apply.cbs.drain(..) { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index d61e6784295..36c4c7e8e5f 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1015,10 +1015,10 @@ where // in snapshot recovery after we stopped all conf changes from PD. // if the follower slow than leader and has the pending conf change. // that's means - // 1. if the follower didn't finished the conf change - // => it cannot be chosen to be leader during recovery. - // 2. if the follower has been chosen to be leader - // => it already apply the pending conf change already. + // 1. if the follower didn't finished the conf change => it cannot be chosen to + // be leader during recovery. + // 2. if the follower has been chosen to be leader => it already apply the + // pending conf change already. return; } debug!( diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 64c5be6d7e1..a858b5afddd 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -169,19 +169,25 @@ where } pub fn has_proposed_cb(&self) -> bool { - let Callback::Write { proposed_cb, .. } = self else { return false; }; + let Callback::Write { proposed_cb, .. } = self else { + return false; + }; proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - let Callback::Write { proposed_cb, .. } = self else { return; }; + let Callback::Write { proposed_cb, .. } = self else { + return; + }; if let Some(cb) = proposed_cb.take() { cb(); } } pub fn invoke_committed(&mut self) { - let Callback::Write { committed_cb, .. } = self else { return; }; + let Callback::Write { committed_cb, .. } = self else { + return; + }; if let Some(cb) = committed_cb.take() { cb(); } @@ -195,12 +201,16 @@ where } pub fn take_proposed_cb(&mut self) -> Option { - let Callback::Write { proposed_cb, .. } = self else { return None; }; + let Callback::Write { proposed_cb, .. } = self else { + return None; + }; proposed_cb.take() } pub fn take_committed_cb(&mut self) -> Option { - let Callback::Write { committed_cb, .. } = self else { return None; }; + let Callback::Write { committed_cb, .. } = self else { + return None; + }; committed_cb.take() } } @@ -258,7 +268,9 @@ impl ReadCallback for Callback { } fn read_tracker(&self) -> Option { - let Callback::Read { tracker, .. } = self else { return None; }; + let Callback::Read { tracker, .. } = self else { + return None; + }; Some(*tracker) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8ef857bfa12..aafd2f9695b 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2314,14 +2314,14 @@ where CheckApplyingSnapStatus::Applying => { // If this peer is applying snapshot, we should not get a new ready. // There are two reasons in my opinion: - // 1. If we handle a new ready and persist the data(e.g. entries), - // we can not tell raft-rs that this ready has been persisted because - // the ready need to be persisted one by one from raft-rs's view. - // 2. When this peer is applying snapshot, the response msg should not - // be sent to leader, thus the leader will not send new entries to - // this peer. Although it's possible a new leader may send a AppendEntries - // msg to this peer, this possibility is very low. In most cases, there - // is no msg need to be handled. + // 1. If we handle a new ready and persist the data(e.g. entries), we can not + // tell raft-rs that this ready has been persisted because the ready need + // to be persisted one by one from raft-rs's view. + // 2. When this peer is applying snapshot, the response msg should not be sent + // to leader, thus the leader will not send new entries to this peer. + // Although it's possible a new leader may send a AppendEntries msg to this + // peer, this possibility is very low. In most cases, there is no msg need + // to be handled. // So we choose to not get a new ready which makes the logic more clear. debug!( "still applying snapshot, skip further handling"; @@ -4467,27 +4467,25 @@ where /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: - /// Leader will send a MsgTransferLeader to follower. - /// 2. pre_ack_transfer_leader_msg on follower: - /// If follower passes all necessary checks, it will try to warmup - /// the entry cache. - /// 3. ack_transfer_leader_msg on follower: - /// When the entry cache has been warmed up or the operator is timeout, - /// the follower reply an ACK with type MsgTransferLeader and - /// its promised persistent index. + /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader + /// to follower. + /// 2. pre_ack_transfer_leader_msg on follower: If follower passes all + /// necessary checks, it will try to warmup the entry cache. + /// 3. ack_transfer_leader_msg on follower: When the entry cache has been + /// warmed up or the operator is timeout, the follower reply an ACK with + /// type MsgTransferLeader and its promised persistent index. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. ack_transfer_leader_msg on follower again: - /// The follower applies the TransferLeader command and replies an - /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. ack_transfer_leader_msg on follower again: The follower applies + /// the TransferLeader command and replies an ACK with special context + /// TRANSFER_LEADER_COMMAND_REPLY_CTX. /// - /// 4. ready_to_transfer_leader on leader: - /// Leader checks if it's appropriate to transfer leadership. If it - /// does, it calls raft transfer_leader API to do the remaining work. + /// 4. ready_to_transfer_leader on leader: Leader checks if it's appropriate + /// to transfer leadership. If it does, it calls raft transfer_leader API + /// to do the remaining work. /// /// See also: tikv/rfcs#37. fn propose_transfer_leader( @@ -5820,7 +5818,7 @@ mod tests { admin_req.clear_transfer_leader(); req.clear_admin_request(); - for (op, policy) in vec![ + for (op, policy) in [ (CmdType::Get, RequestPolicy::ReadLocal), (CmdType::Snap, RequestPolicy::ReadLocal), (CmdType::Put, RequestPolicy::ProposeNormal), @@ -5973,7 +5971,7 @@ mod tests { // (1, 4) and (1, 5) is not committed let entries = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7)]; - let committed = vec![(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; + let committed = [(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; for (index, term) in entries.clone() { if term != 1 { continue; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index a888929ca98..1556338e9c0 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -96,7 +96,7 @@ impl PartialEq for SnapState { (&SnapState::Relax, &SnapState::Relax) | (&SnapState::ApplyAborted, &SnapState::ApplyAborted) | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, - (&SnapState::Applying(ref b1), &SnapState::Applying(ref b2)) => { + (SnapState::Applying(b1), SnapState::Applying(b2)) => { b1.load(Ordering::Relaxed) == b2.load(Ordering::Relaxed) } _ => false, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index bc22dfbf586..40168707f6a 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -438,7 +438,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for &(ref k, ref v) in &base_data { + for (k, v) in &base_data { engines.kv.put(&data_key(k), v).unwrap(); } let store = new_peer_storage(engines, &r); @@ -482,11 +482,11 @@ mod tests { let mut data = vec![]; { let db = &engines.kv; - for &(ref k, level) in &levels { + for (k, level) in &levels { db.put(&data_key(k), k).unwrap(); db.flush_cfs(&[], true).unwrap(); data.push((k.to_vec(), k.to_vec())); - db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(level)) + db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(*level)) .unwrap(); } } diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index a303a586935..1d8341c1c0b 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -579,13 +579,17 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); - let SimpleWrite::Put(put) = write else { panic!("should be put") }; + let SimpleWrite::Put(put) = write else { + panic!("should be put") + }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); let write = decoder.next().unwrap(); - let SimpleWrite::Delete(delete) = write else { panic!("should be delete") }; + let SimpleWrite::Delete(delete) = write else { + panic!("should be delete") + }; assert_eq!(delete.cf, CF_WRITE); assert_eq!(delete.key, &delete_key); assert_matches!(decoder.next(), None); @@ -593,14 +597,18 @@ mod tests { let (bytes, _) = req_encoder2.encode(); decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; + let SimpleWrite::DeleteRange(dr) = write else { + panic!("should be delete range") + }; assert_eq!(dr.cf, CF_LOCK); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); assert!(dr.notify_only); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; + let SimpleWrite::DeleteRange(dr) = write else { + panic!("should be delete range") + }; assert_eq!(dr.cf, "cf"); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); @@ -626,7 +634,9 @@ mod tests { let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; + let SimpleWrite::Ingest(ssts) = write else { + panic!("should be ingest") + }; assert_eq!(exp, ssts); assert_matches!(decoder.next(), None); } @@ -715,7 +725,9 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); - let SimpleWrite::Put(put) = req else { panic!("should be put") }; + let SimpleWrite::Put(put) = req else { + panic!("should be put") + }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6fe21fe9750..dcb98dd9cb2 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1323,7 +1323,7 @@ impl Write for Snapshot { } assert!(cf_file.size[self.cf_file_index] != 0); - let mut file_for_recving = cf_file + let file_for_recving = cf_file .file_for_recving .get_mut(self.cf_file_index) .unwrap(); @@ -2162,7 +2162,7 @@ impl TabletSnapManager { .stats .lock() .unwrap() - .drain_filter(|_, (_, stat)| stat.get_region_id() > 0) + .extract_if(|_, (_, stat)| stat.get_region_id() > 0) .map(|(_, (_, stat))| stat) .filter(|stat| stat.get_total_duration_sec() > 1) .collect(); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 3cdee1e40f1..8fcaf826c6a 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -327,7 +327,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_plain_files_enc"); - for db_opt in vec![None, Some(enc_opts)] { + for db_opt in [None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db: KvTestEngine = db_creater(dir.path(), db_opt.clone(), None).unwrap(); // Collect keys via the key_callback into a collection. @@ -408,7 +408,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_sst_files_enc"); - for db_opt in vec![None, Some(enc_opts)] { + for db_opt in [None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db = db_creater(dir.path(), db_opt.clone(), None).unwrap(); let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 0091fd4e7bb..9c73be2b9eb 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -244,7 +244,7 @@ impl PeerPessimisticLocks { // Locks that are marked deleted still need to be moved to the new regions, // and the deleted mark should also be cleared. // Refer to the comment in `PeerPessimisticLocks` for details. - let removed_locks = self.map.drain_filter(|key, _| { + let removed_locks = self.map.extract_if(|key, _| { let key = &**key.as_encoded(); let (start_key, end_key) = (derived.get_start_key(), derived.get_end_key()); key < start_key || (!end_key.is_empty() && key >= end_key) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 3f34fe691ee..ed2c70822c9 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -124,8 +124,7 @@ pub fn is_vote_msg(msg: &eraftpb::Message) -> bool { /// peer or not. // There could be two cases: // 1. Target peer already exists but has not established communication with leader yet -// 2. Target peer is added newly due to member change or region split, but it's not -// created yet +// 2. Target peer is added newly due to member change or region split, but it's not created yet // For both cases the region start key and end key are attached in RequestVote and // Heartbeat message for the store of that peer to check whether to create a new peer // when receiving these messages, or just to wait for a pending region split to perform diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 32fbdbc3145..cb067ca840b 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1710,10 +1710,7 @@ where fn handle_read_stats(&mut self, mut read_stats: ReadStats) { for (region_id, region_info) in read_stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -1735,10 +1732,7 @@ where fn handle_write_stats(&mut self, mut write_stats: WriteStats) { for (region_id, region_info) in write_stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num @@ -2096,7 +2090,10 @@ where let f = async move { for split_info in split_infos { let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await else { continue }; + pd_client.get_region_by_id(split_info.region_id).await + else { + continue; + }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::handle_ask_batch_split( @@ -2161,10 +2158,7 @@ where cpu_usage, ) = { let region_id = hb_task.region.get_id(); - let peer_stat = self - .region_peers - .entry(region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(region_id).or_default(); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5d6ede9c193..5a6e641f5dc 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -2155,11 +2155,12 @@ mod tests { let (notify_tx, notify_rx) = channel(); let (wait_spawn_tx, wait_spawn_rx) = channel(); let runtime = tokio::runtime::Runtime::new().unwrap(); - let _ = runtime.spawn(async move { + let handler = runtime.spawn(async move { wait_spawn_tx.send(()).unwrap(); notify.notified().await; notify_tx.send(()).unwrap(); }); + drop(handler); wait_spawn_rx.recv().unwrap(); thread::sleep(std::time::Duration::from_millis(500)); // Prevent lost notify. must_not_redirect(&mut reader, &rx, task); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 068904b2a67..7a675646f5c 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -179,7 +179,7 @@ impl PendingDeleteRanges { ) -> Vec<(u64, Vec, Vec, u64)> { let ranges = self.find_overlap_ranges(start_key, end_key); - for &(_, ref s_key, ..) in &ranges { + for (_, s_key, ..) in &ranges { self.ranges.remove(s_key).unwrap(); } ranges @@ -1293,7 +1293,7 @@ pub(crate) mod tests { } }; - #[allow(dead_code)] + #[cfg(feature = "failpoints")] let must_not_finish = |ids: &[u64]| { for id in ids { let region_key = keys::region_state_key(*id); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 4ff853f70a0..468c06febd4 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -64,14 +64,14 @@ impl KeyEntry { impl PartialOrd for KeyEntry { fn partial_cmp(&self, rhs: &KeyEntry) -> Option { - // BinaryHeap is max heap, so we have to reverse order to get a min heap. - Some(self.key.cmp(&rhs.key).reverse()) + Some(self.cmp(rhs)) } } impl Ord for KeyEntry { fn cmp(&self, rhs: &KeyEntry) -> Ordering { - self.partial_cmp(rhs).unwrap() + // BinaryHeap is max heap, so we have to reverse order to get a min heap. + self.key.cmp(&rhs.key).reverse() } } @@ -287,7 +287,7 @@ impl Runner { region: &Region, bucket_ranges: &Vec, ) { - for (mut bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { + for (bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { let mut bucket_region = region.clone(); bucket_region.set_start_key(bucket_range.0.clone()); bucket_region.set_end_key(bucket_range.1.clone()); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 4bbcc773763..9cf534c62b0 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -178,7 +178,7 @@ impl Samples { // evaluate the samples according to the given key range, it will update the // sample's left, right and contained counter. fn evaluate(&mut self, key_range: &KeyRange) { - for mut sample in self.0.iter_mut() { + for sample in self.0.iter_mut() { let order_start = if key_range.start_key.is_empty() { Ordering::Greater } else { @@ -496,10 +496,7 @@ pub struct WriteStats { impl WriteStats { pub fn add_query_num(&mut self, region_id: u64, kind: QueryKind) { - let query_stats = self - .region_infos - .entry(region_id) - .or_insert_with(QueryStats::default); + let query_stats = self.region_infos.entry(region_id).or_default(); query_stats.add_query_num(kind, 1); } @@ -988,8 +985,8 @@ mod tests { #[test] fn test_prefix_sum() { - let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; - let expect = vec![1, 3, 6, 10, 15, 21, 28, 36, 45]; + let v = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expect = [1, 3, 6, 10, 15, 21, 28, 36, 45]; let pre = prefix_sum(v.iter(), |x| *x); for i in 0..v.len() { assert_eq!(expect[i], pre[i]); diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 47d14304112..328f725edaa 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -213,13 +213,13 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) CF_WRITE => { if let Ok(ts) = key.decode_ts() { let key = key.truncate_ts().unwrap(); - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); assert!(row.write.is_none()); row.write = Some(KeyOp::Put(Some(ts), value)); } } CF_LOCK => { - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); assert!(row.lock.is_none()); row.lock = Some(KeyOp::Put(None, value)); } @@ -239,7 +239,7 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) match delete.cf.as_str() { CF_LOCK => { let key = Key::from_encoded(delete.take_key()); - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); row.lock = Some(KeyOp::Delete); } "" | CF_WRITE | CF_DEFAULT => {} diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 34f00672fa7..600da207ec4 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -65,7 +65,8 @@ impl Drop for ResolverStatus { locks, memory_quota, .. - } = self else { + } = self + else { return; }; if locks.is_empty() { @@ -96,7 +97,8 @@ impl ResolverStatus { locks, memory_quota, .. - } = self else { + } = self + else { panic!("region {:?} resolver has ready", region_id) }; // Check if adding a new lock or unlock will exceed the memory @@ -110,10 +112,7 @@ impl ResolverStatus { } fn update_tracked_index(&mut self, index: u64, region_id: u64) { - let ResolverStatus::Pending { - tracked_index, - .. - } = self else { + let ResolverStatus::Pending { tracked_index, .. } = self else { panic!("region {:?} resolver has ready", region_id) }; assert!( @@ -135,7 +134,8 @@ impl ResolverStatus { memory_quota, tracked_index, .. - } = self else { + } = self + else { panic!("region {:?} resolver has ready", region_id) }; // Must take locks, otherwise it may double free memory quota on drop. @@ -687,7 +687,7 @@ where scanner_pool, scan_concurrency_semaphore, regions: HashMap::default(), - _phantom: PhantomData::default(), + _phantom: PhantomData, }; ep.handle_advance_resolved_ts(leader_resolver); ep @@ -870,7 +870,6 @@ where // Tracking or untracking locks with incoming commands that corresponding // observe id is valid. - #[allow(clippy::drop_ref)] fn handle_change_log(&mut self, cmd_batch: Vec) { let size = cmd_batch.iter().map(|b| b.size()).sum::(); RTS_CHANNEL_PENDING_CMD_BYTES.sub(size as i64); @@ -884,7 +883,6 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { - drop(observe_region); let backoff = match e { Error::MemoryQuotaExceeded(_) => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), Error::Other(_) => None, @@ -930,7 +928,7 @@ where } fn handle_advance_resolved_ts(&self, leader_resolver: LeadershipResolver) { - let regions = self.regions.keys().into_iter().copied().collect(); + let regions = self.regions.keys().copied().collect(); self.advance_worker.advance_ts_for_regions( regions, leader_resolver, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 6c8c90dc38f..ad052338fa2 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -100,7 +100,7 @@ impl, E: KvEngine> ScannerPool { Self { workers, cdc_handle, - _phantom: PhantomData::default(), + _phantom: PhantomData, } } @@ -168,6 +168,7 @@ impl, E: KvEngine> ScannerPool { self.workers.spawn(fut); } + #[allow(clippy::needless_pass_by_ref_mut)] async fn get_snapshot( task: &mut ScanTask, cdc_handle: T, diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index a356d30a7ac..0e40255b354 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -307,8 +307,8 @@ pub struct ResourceController { // 1. the priority factor is calculate based on read/write RU settings. // 2. for read request, we increase a constant virtual time delta at each `get_priority` call // because the cost can't be calculated at start, so we only increase a constant delta and - // increase the real cost after task is executed; but don't increase it at write because - // the cost is known so we just pre-consume it. + // increase the real cost after task is executed; but don't increase it at write because the + // cost is known so we just pre-consume it. is_read: bool, // Track the maximum ru quota used to calculate the factor of each resource group. // factor = max_ru_quota / group_ru_quota * 10.0 diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index ba8e2174e19..7b437ea4303 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -2,7 +2,7 @@ // TODO(mornyx): crate doc. -#![feature(hash_drain_filter)] +#![feature(hash_extract_if)] #![feature(core_intrinsics)] use std::{ diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 6f7118ef9e1..03cd500eb2e 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -87,7 +87,7 @@ impl RawRecords { pdqselect::select_by(&mut buf, k, |a, b| b.cmp(a)); let kth = buf[k]; // Evict records with cpu time less or equal than `kth` - let evicted_records = self.records.drain_filter(|_, r| r.cpu_time <= kth); + let evicted_records = self.records.extract_if(|_, r| r.cpu_time <= kth); // Record evicted into others for (_, record) in evicted_records { others.merge(&record); diff --git a/components/resource_metering/src/recorder/sub_recorder/cpu.rs b/components/resource_metering/src/recorder/sub_recorder/cpu.rs index 8c4053a80ab..08675bb6153 100644 --- a/components/resource_metering/src/recorder/sub_recorder/cpu.rs +++ b/components/resource_metering/src/recorder/sub_recorder/cpu.rs @@ -9,7 +9,7 @@ use crate::{ localstorage::{LocalStorage, SharedTagInfos}, SubRecorder, }, - RawRecord, RawRecords, + RawRecords, }; /// An implementation of [SubRecorder] for collecting cpu statistics. @@ -37,7 +37,7 @@ impl SubRecorder for CpuRecorder { if *last_stat != cur_stat { let delta_ms = (cur_stat.total_cpu_time() - last_stat.total_cpu_time()) * 1_000.; - let record = records.entry(cur_tag).or_insert_with(RawRecord::default); + let record = records.entry(cur_tag).or_default(); record.cpu_time += delta_ms as u32; } thread_stat.stat = cur_stat; diff --git a/components/resource_metering/tests/recorder_test.rs b/components/resource_metering/tests/recorder_test.rs index daa371e7477..6e164b8e5e8 100644 --- a/components/resource_metering/tests/recorder_test.rs +++ b/components/resource_metering/tests/recorder_test.rs @@ -55,7 +55,7 @@ mod tests { if let Some(tag) = self.current_ctx { self.records .entry(tag.as_bytes().to_vec()) - .or_insert_with(RawRecord::default) + .or_default() .cpu_time += ms; } self.ops.push(op); @@ -140,7 +140,7 @@ mod tests { if let Ok(mut r) = self.records.lock() { for (tag, record) in records.records.iter() { r.entry(tag.extra_attachment.to_vec()) - .or_insert_with(RawRecord::default) + .or_default() .merge(record); } } @@ -156,10 +156,10 @@ mod tests { let mut records = self.records.lock().unwrap(); for k in expected.keys() { - records.entry(k.clone()).or_insert_with(RawRecord::default); + records.entry(k.clone()).or_default(); } for k in records.keys() { - expected.entry(k.clone()).or_insert_with(RawRecord::default); + expected.entry(k.clone()).or_default(); } for (k, expected_value) in expected { let value = records.get(&k).unwrap(); @@ -324,10 +324,10 @@ mod tests { fn merge( maps: impl IntoIterator, RawRecord>>, ) -> HashMap, RawRecord> { - let mut map = HashMap::default(); + let mut map: HashMap, RawRecord> = HashMap::default(); for m in maps { for (k, v) in m { - map.entry(k).or_insert_with(RawRecord::default).merge(&v); + map.entry(k).or_default().merge(&v); } } map diff --git a/components/server/src/common.rs b/components/server/src/common.rs index c8cf879d905..43b0314cbbe 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -558,7 +558,9 @@ impl EnginesResourceInfo { }); for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { continue }; + let Some(tablet) = cache.latest() else { + continue; + }; for cf in DATA_CFS { fetch_engine_cf(tablet, cf); } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 417d5becca3..48344fe5012 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -206,7 +206,7 @@ mod test { #[test] fn test_basic() { - let leaders = vec![1, 2, 3]; + let leaders = [1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, leaders); @@ -217,7 +217,7 @@ mod test { #[test] fn test_failure() { - let leaders = vec![1, 2, 3]; + let leaders = [1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, vec![1, 2, 3, 4]); diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index 70b7d7fac5e..4db29c47a6f 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -139,7 +139,7 @@ impl ImportModeSwitcherV2 { pub fn ranges_in_import(&self) -> HashSet { let inner = self.inner.lock().unwrap(); - HashSet::from_iter(inner.import_mode_ranges.keys().into_iter().cloned()) + HashSet::from_iter(inner.import_mode_ranges.keys().cloned()) } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 181f9d67b2f..502a81ff6a6 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -367,8 +367,8 @@ impl SstImporter { // This method is blocking. It performs the following transformations before // writing to disk: // - // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. - // (set the range to `["", ""]` to import everything). + // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. (set + // the range to `["", ""]` to import everything). // 2. keys are rewritten according to the given rewrite rule. // // Both the range and rewrite keys are specified using origin keys. However, @@ -1541,7 +1541,7 @@ mod tests { let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), &[CF_DEFAULT], env); - let cases = vec![(0, 10), (5, 15), (10, 20), (0, 100)]; + let cases = [(0, 10), (5, 15), (10, 20), (0, 100)]; let mut ingested = Vec::new(); @@ -2055,13 +2055,10 @@ mod tests { false, ) .unwrap(); - let ext_storage = { - let inner = importer.wrap_kms( - importer.external_storage_or_cache(&backend, "").unwrap(), - false, - ); - inner - }; + let ext_storage = importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ); // test do_read_kv_file() let output = block_on_external_io(importer.do_read_kv_file( diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index ff7526172d5..654971b0d41 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -97,7 +97,8 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( let mut pmts = file_system::metadata(clone)?.permissions(); if pmts.readonly() { - pmts.set_readonly(false); + use std::os::unix::fs::PermissionsExt; + pmts.set_mode(0o644); file_system::set_permissions(clone, pmts)?; } diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 96f405d8f39..6763ea7bb1a 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -203,7 +203,7 @@ impl Store { } pub fn put(&mut self, ctx: Context, mut kv: Vec<(Vec, Vec)>) { - self.handles.extend(kv.iter().map(|&(ref k, _)| k.clone())); + self.handles.extend(kv.iter().map(|(k, _)| k.clone())); let pk = kv[0].0.clone(); let kv = kv .drain(..) diff --git a/components/test_coprocessor_plugin/example_plugin/src/lib.rs b/components/test_coprocessor_plugin/example_plugin/src/lib.rs index afcaa4962b9..d383797c069 100644 --- a/components/test_coprocessor_plugin/example_plugin/src/lib.rs +++ b/components/test_coprocessor_plugin/example_plugin/src/lib.rs @@ -18,4 +18,4 @@ impl CoprocessorPlugin for ExamplePlugin { } } -declare_plugin!(ExamplePlugin::default()); +declare_plugin!(ExamplePlugin); diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 90a420fbba0..02833e030eb 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -128,12 +128,8 @@ impl Server { } #[allow(unused_mut)] -fn hijack_unary( - mock: &mut PdMock, - ctx: RpcContext<'_>, - sink: UnarySink, - f: F, -) where +fn hijack_unary(mock: &PdMock, ctx: RpcContext<'_>, sink: UnarySink, f: F) +where R: Send + 'static, F: Fn(&dyn PdMocker) -> Option>, { diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index c81230f6a16..58df5998758 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1438,7 +1438,7 @@ impl TestPdClient { pub fn switch_replication_mode(&self, state: DrAutoSyncState, available_stores: Vec) { let mut cluster = self.cluster.wl(); let status = cluster.replication_status.as_mut().unwrap(); - let mut dr = status.mut_dr_auto_sync(); + let dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state); dr.available_stores = available_stores; diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 8ede3290167..346813e7d1f 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -220,7 +220,7 @@ pub trait Simulator { None => { error!("call_query_on_node receives none response"; "request" => ?request); // Do not unwrap here, sometimes raftstore v2 may return none. - return Err(box_err!("receives none response {:?}", request)); + Err(box_err!("receives none response {:?}", request)) } } } @@ -1612,6 +1612,7 @@ impl, EK: KvEngine> Cluster { ) } + #[allow(clippy::let_underscore_future)] pub fn merge_region(&mut self, source: u64, target: u64, _cb: Callback) { // FIXME: callback is ignored. let mut req = self.new_prepare_merge(source, target); diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index 685affe45d0..45642df1e7f 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -3,6 +3,8 @@ #![feature(type_alias_impl_trait)] #![feature(return_position_impl_trait_in_trait)] #![feature(let_chains)] +#![allow(clippy::needless_pass_by_ref_mut)] +#![allow(clippy::arc_with_non_send_sync)] mod cluster; mod node; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index d63ca0aa2f2..70b6ccb1407 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -258,7 +258,7 @@ impl Simulator for NodeCluster { ) } else { let trans = self.trans.core.lock().unwrap(); - let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + let (snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; self.snap_mgrs.insert(node_id, snap_mgr.clone()); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 7b5d501a59f..a7d64591fe1 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1006,7 +1006,18 @@ pub fn must_new_cluster_and_kv_client_mul( TikvClient, Context, ) { - let (cluster, leader, ctx) = must_new_cluster_mul(count); + must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) +} + +pub fn must_new_cluster_with_cfg_and_kv_client_mul( + count: usize, + configure: impl FnMut(&mut Cluster, RocksEngine>), +) -> ( + Cluster, RocksEngine>, + TikvClient, + Context, +) { + let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = @@ -1015,6 +1026,7 @@ pub fn must_new_cluster_and_kv_client_mul( (cluster, client, ctx) } + pub fn must_new_cluster_mul( count: usize, ) -> ( diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 04dfbd24de1..6f48c17190a 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,6 +1,8 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![feature(let_chains)] +#![allow(clippy::needless_pass_by_ref_mut)] +#![allow(clippy::arc_with_non_send_sync)] #[macro_use] extern crate lazy_static; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index f429f27ff8b..8a9969c1913 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -281,7 +281,7 @@ impl Simulator for NodeCluster { (snap_mgr, Some(tmp)) } else { let trans = self.trans.core.lock().unwrap(); - let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + let (snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 8d26bae968d..0df44b4e784 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -918,8 +918,14 @@ pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, pub fn must_new_cluster_and_kv_client_mul( count: usize, ) -> (Cluster, TikvClient, Context) { - let (cluster, leader, ctx) = must_new_cluster_mul(count); + must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) +} +pub fn must_new_cluster_with_cfg_and_kv_client_mul( + count: usize, + configure: impl FnMut(&mut Cluster), +) -> (Cluster, TikvClient, Context) { + let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index 33976939c83..ea3017d5d02 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -1739,27 +1739,24 @@ mod tests_normal { /// Compare TokenStream with all white chars trimmed. fn assert_token_stream_equal(l: TokenStream, r: TokenStream) { - let result = l - .clone() - .into_iter() - .eq_by(r.clone().into_iter(), |x, y| match x { - TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), - TokenTree::Literal(x) => { - matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) - } - TokenTree::Punct(x) => { - matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) - } - TokenTree::Group(x) => { - if let TokenTree::Group(y) = y { - assert_token_stream_equal(x.stream(), y.stream()); + let result = l.clone().into_iter().eq_by(r.clone(), |x, y| match x { + TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), + TokenTree::Literal(x) => { + matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) + } + TokenTree::Punct(x) => { + matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) + } + TokenTree::Group(x) => { + if let TokenTree::Group(y) = y { + assert_token_stream_equal(x.stream(), y.stream()); - true - } else { - false - } + true + } else { + false } - }); + } + }); assert!(result, "expect: {:#?}, actual: {:#?}", &l, &r); } diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 22127e62f49..738e0020de7 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -251,7 +251,7 @@ where { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - C::sort_compare(self.inner.as_ref(), other.inner.as_ref()).ok() + Some(self.cmp(other)) } } diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 418841547ca..d2bbee78078 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -574,13 +574,13 @@ pub fn bytes_to_int_without_context(bytes: &[u8]) -> Result { if let Some(&c) = trimed.next() { if c == b'-' { negative = true; - } else if (b'0'..=b'9').contains(&c) { + } else if c.is_ascii_digit() { r = Some(i64::from(c) - i64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimed.take_while(|&c| c.is_ascii_digit()) { let cur = i64::from(*c - b'0'); r = r.and_then(|r| r.checked_mul(10)).and_then(|r| { if negative { @@ -605,13 +605,13 @@ pub fn bytes_to_uint_without_context(bytes: &[u8]) -> Result { let mut trimed = bytes.iter().skip_while(|&&b| b == b' ' || b == b'\t'); let mut r = Some(0u64); if let Some(&c) = trimed.next() { - if (b'0'..=b'9').contains(&c) { + if c.is_ascii_digit() { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimed.take_while(|&c| c.is_ascii_digit()) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -856,7 +856,7 @@ pub fn get_valid_int_prefix_helper<'a>( if (c == '+' || c == '-') && i == 0 { continue; } - if ('0'..='9').contains(&c) { + if c.is_ascii_digit() { valid_len = i + 1; continue; } @@ -917,7 +917,7 @@ pub fn get_valid_float_prefix_helper<'a>( break; } e_idx = i - } else if !('0'..='9').contains(&c) { + } else if !c.is_ascii_digit() { break; } else { saw_digit = true; diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index 8ca36790824..b464b1119c8 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -248,7 +248,7 @@ macro_rules! impl_evaluable_type { } #[inline] - fn borrow_scalar_value_ref<'a>(v: ScalarValueRef<'a>) -> Option<&'a Self> { + fn borrow_scalar_value_ref(v: ScalarValueRef<'_>) -> Option<&Self> { match v { ScalarValueRef::$ty(x) => x, other => panic!( diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index c74423107e4..ff66ddc42ee 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -467,24 +467,23 @@ impl<'a> ScalarValueRef<'a> { impl<'a> Ord for ScalarValueRef<'a> { fn cmp(&self, other: &Self) -> Ordering { - self.partial_cmp(other) - .expect("Cannot compare two ScalarValueRef in different type") - } -} - -impl<'a> PartialOrd for ScalarValueRef<'a> { - fn partial_cmp(&self, other: &Self) -> Option { match_template_evaltype! { TT, match (self, other) { // v1 and v2 are `Option`. However, in MySQL NULL values are considered lower // than any non-NULL value, so using `Option::PartialOrd` directly is fine. - (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => Some(v1.cmp(v2)), - _ => None, + (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => v1.cmp(v2), + _ => panic!("Cannot compare two ScalarValueRef in different type"), } } } } +impl<'a> PartialOrd for ScalarValueRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + impl<'a> PartialEq for ScalarValueRef<'a> { fn eq(&self, other: &ScalarValue) -> bool { self == &other.as_scalar_value_ref() diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index dde98003475..f91d204b3b0 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -668,7 +668,7 @@ impl Datum { Datum::F64(res) } } - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { + (Datum::Dec(l), Datum::Dec(r)) => { let dec: Result = (l + r).into(); return dec.map(Datum::Dec); } @@ -700,7 +700,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_sub(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l - r)), - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { + (Datum::Dec(l), Datum::Dec(r)) => { let dec: Result = (l - r).into(); return dec.map(Datum::Dec); } @@ -724,7 +724,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_mul(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l * r)), - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => return Ok(Datum::Dec((l * r).unwrap())), + (Datum::Dec(l), Datum::Dec(r)) => return Ok(Datum::Dec((l * r).unwrap())), (l, r) => return Err(invalid_type!("{} can't multiply {}", l, r)), }; @@ -1179,7 +1179,7 @@ mod tests { | (&Datum::Null, &Datum::Null) | (&Datum::Time(_), &Datum::Time(_)) | (&Datum::Json(_), &Datum::Json(_)) => true, - (&Datum::Dec(ref d1), &Datum::Dec(ref d2)) => d1.prec_and_frac() == d2.prec_and_frac(), + (Datum::Dec(d1), Datum::Dec(d2)) => d1.prec_and_frac() == d2.prec_and_frac(), _ => false, } } diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 143ec6c7760..8853a1d6a16 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -1872,7 +1872,7 @@ impl<'a> ConvertTo for JsonRef<'a> { fn first_non_digit(bs: &[u8], start_idx: usize) -> usize { bs.iter() .skip(start_idx) - .position(|c| !(b'0'..=b'9').contains(c)) + .position(|c| !c.is_ascii_digit()) .map_or_else(|| bs.len(), |s| s + start_idx) } diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 7279f788146..4b735977712 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -629,14 +629,14 @@ impl Eq for Duration {} impl PartialOrd for Duration { #[inline] fn partial_cmp(&self, rhs: &Duration) -> Option { - self.nanos.partial_cmp(&rhs.nanos) + Some(self.cmp(rhs)) } } impl Ord for Duration { #[inline] fn cmp(&self, rhs: &Duration) -> Ordering { - self.partial_cmp(rhs).unwrap() + self.nanos.partial_cmp(&rhs.nanos).unwrap() } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index d9104385bc6..73e04885890 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -77,6 +77,8 @@ impl<'a> PartialEq for JsonRef<'a> { .map_or(false, |r| r == Ordering::Equal) } } + +#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)] impl<'a> PartialOrd for JsonRef<'a> { // See `CompareBinary` in TiDB `types/json/binary_functions.go` fn partial_cmp(&self, right: &JsonRef<'_>) -> Option { @@ -197,7 +199,7 @@ impl PartialEq for Json { impl PartialOrd for Json { fn partial_cmp(&self, right: &Json) -> Option { - self.as_ref().partial_cmp(&right.as_ref()) + Some(self.cmp(right)) } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index 867d8ec2c20..f76b29790f9 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -28,9 +28,9 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryObject` in TiDB `types/json/binary.go` - fn write_json_obj_from_keys_values<'a>( + fn write_json_obj_from_keys_values( &mut self, - mut entries: Vec<(&[u8], JsonRef<'a>)>, + mut entries: Vec<(&[u8], JsonRef<'_>)>, ) -> Result<()> { entries.sort_by(|a, b| a.0.cmp(b.0)); // object: element-count size key-entry* value-entry* key* value* @@ -122,7 +122,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryArray` in TiDB `types/json/binary.go` - fn write_json_ref_array<'a>(&mut self, data: &[JsonRef<'a>]) -> Result<()> { + fn write_json_ref_array(&mut self, data: &[JsonRef<'_>]) -> Result<()> { let element_count = data.len(); let value_entries_len = VALUE_ENTRY_LEN * element_count; let values_len = data.iter().fold(0, |acc, v| acc + v.encoded_len()); @@ -167,7 +167,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryValElem` in TiDB `types/json/binary.go` - fn write_value_entry<'a>(&mut self, value_offset: &mut u32, v: &JsonRef<'a>) -> Result<()> { + fn write_value_entry(&mut self, value_offset: &mut u32, v: &JsonRef<'_>) -> Result<()> { let tp = v.get_type(); self.write_u8(tp as u8)?; match tp { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs index b359158d06b..3cc78270d60 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs @@ -41,7 +41,7 @@ impl<'a> JsonRef<'a> { } } let mut res = self.to_owned(); - for (expr, value) in path_expr_list.iter().zip(values.into_iter()) { + for (expr, value) in path_expr_list.iter().zip(values) { let modifier = BinaryModifier::new(res.as_ref()); res = match mt { ModifyType::Insert => modifier.insert(expr, value)?, diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 4c6c2f676d7..44228f2d88e 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1094,7 +1094,7 @@ impl Time { ) } - fn try_into_chrono_datetime(self, ctx: &mut EvalContext) -> Result> { + fn try_into_chrono_datetime(self, ctx: &EvalContext) -> Result> { chrono_datetime( &ctx.cfg.tz, self.year(), @@ -2670,9 +2670,9 @@ mod tests { #[test] fn test_no_zero_in_date() -> Result<()> { - let cases = vec!["2019-01-00", "2019-00-01"]; + let cases = ["2019-01-00", "2019-00-01"]; - for &case in cases.iter() { + for case in cases { // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is // produced. let mut ctx = EvalContext::from(TimeEnv { @@ -2817,7 +2817,7 @@ mod tests { let actual = Time::from_duration(&mut ctx, duration, TimeType::DateTime)?; let today = actual - .try_into_chrono_datetime(&mut ctx)? + .try_into_chrono_datetime(&ctx)? .checked_sub_signed(chrono::Duration::nanoseconds(duration.to_nanos())) .unwrap(); @@ -2837,7 +2837,7 @@ mod tests { let mut ctx = EvalContext::default(); for i in 2..10 { let actual = Time::from_local_time(&mut ctx, TimeType::DateTime, i % MAX_FSP)?; - let c_datetime = actual.try_into_chrono_datetime(&mut ctx)?; + let c_datetime = actual.try_into_chrono_datetime(&ctx)?; let now0 = c_datetime.timestamp_millis() as u64; let now1 = Utc::now().timestamp_millis() as u64; diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index da117c96e2c..aa5eb3fc56f 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -298,7 +298,7 @@ impl<'a, T: PrimInt> LeBytes<'a, T> { fn new(slice: &'a [u8]) -> Self { Self { slice, - _marker: PhantomData::default(), + _marker: PhantomData, } } diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 37becbfb801..81ef4b072c6 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -528,7 +528,7 @@ pub fn generate_index_data_for_test( let mut expect_row = HashMap::default(); let mut v: Vec<_> = indice .iter() - .map(|&(ref cid, ref value)| { + .map(|(cid, value)| { expect_row.insert( *cid, datum::encode_key(&mut EvalContext::default(), &[value.clone()]).unwrap(), diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 3a5c53a4d09..5ebf8a031d3 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -611,8 +611,8 @@ impl IndexScanExecutorImpl { } #[inline] - fn build_operations<'a, 'b>( - &'b self, + fn build_operations<'a>( + &self, mut key_payload: &'a [u8], index_value: &'a [u8], ) -> Result<(DecodeHandleOp<'a>, DecodePartitionIdOp<'a>, RestoreData<'a>)> { diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 7c410befb25..27e52dde288 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -137,31 +137,31 @@ impl BatchExecutorsRunner<()> { .map_err(|e| other_err!("BatchProjectionExecutor: {}", e))?; } ExecType::TypeJoin => { - other_err!("Join executor not implemented"); + return Err(other_err!("Join executor not implemented")); } ExecType::TypeKill => { - other_err!("Kill executor not implemented"); + return Err(other_err!("Kill executor not implemented")); } ExecType::TypeExchangeSender => { - other_err!("ExchangeSender executor not implemented"); + return Err(other_err!("ExchangeSender executor not implemented")); } ExecType::TypeExchangeReceiver => { - other_err!("ExchangeReceiver executor not implemented"); + return Err(other_err!("ExchangeReceiver executor not implemented")); } ExecType::TypePartitionTableScan => { - other_err!("PartitionTableScan executor not implemented"); + return Err(other_err!("PartitionTableScan executor not implemented")); } ExecType::TypeSort => { - other_err!("Sort executor not implemented"); + return Err(other_err!("Sort executor not implemented")); } ExecType::TypeWindow => { - other_err!("Window executor not implemented"); + return Err(other_err!("Window executor not implemented")); } ExecType::TypeExpand => { - other_err!("Expand executor not implemented"); + return Err(other_err!("Expand executor not implemented")); } ExecType::TypeExpand2 => { - other_err!("Expand2 executor not implemented"); + return Err(other_err!("Expand2 executor not implemented")); } } } diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index bd65547109d..ffcb22671da 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -537,7 +537,7 @@ mod tests { }) .collect(); - for predicates in vec![ + for predicates in [ // Swap predicates should produce same results. vec![predicate[0](), predicate[1]()], vec![predicate[1](), predicate[0]()], @@ -572,7 +572,7 @@ mod tests { }) .collect(); - for predicates in vec![ + for predicates in [ // Swap predicates should produce same results. vec![predicate[0](), predicate[1](), predicate[2]()], vec![predicate[1](), predicate[2](), predicate[0]()], diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index 0535e8dbd83..a5d760dc80d 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -641,8 +641,8 @@ pub mod tests { )) as Box> }; - let test_paging_size = vec![2, 5, 7]; - let expect_call_num = vec![1, 3, 4]; + let test_paging_size = [2, 5, 7]; + let expect_call_num = [1, 3, 4]; let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; let executor_builders: Vec) -> _>> = vec![Box::new(exec_fast), Box::new(exec_slow)]; diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index ca05e49fcd3..db456a84883 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -28,13 +28,13 @@ pub fn ensure_columns_decoded( /// Evaluates expressions and outputs the result into the given Vec. Lifetime of /// the expressions are erased. -pub unsafe fn eval_exprs_decoded_no_lifetime<'a>( +pub unsafe fn eval_exprs_decoded_no_lifetime( ctx: &mut EvalContext, exprs: &[RpnExpression], schema: &[FieldType], input_physical_columns: &LazyBatchColumnVec, input_logical_rows: &[usize], - output: &mut Vec>, + output: &mut Vec>, ) -> Result<()> { unsafe fn erase_lifetime<'a, T: ?Sized>(v: &T) -> &'a T { &*(v as *const T) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 76e90f79c5b..b6619f9d8cc 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -6528,7 +6528,7 @@ mod tests { "cast_decimal_as_duration", ); - let values = vec![ + let values = [ Decimal::from_bytes(b"9995959").unwrap().unwrap(), Decimal::from_bytes(b"-9995959").unwrap().unwrap(), ]; diff --git a/components/tidb_query_expr/src/impl_miscellaneous.rs b/components/tidb_query_expr/src/impl_miscellaneous.rs index 5d2daed7f9a..663571804ae 100644 --- a/components/tidb_query_expr/src/impl_miscellaneous.rs +++ b/components/tidb_query_expr/src/impl_miscellaneous.rs @@ -58,7 +58,7 @@ pub fn inet_aton(addr: BytesRef) -> Result> { } let (mut byte_result, mut result, mut dot_count): (u64, u64, usize) = (0, 0, 0); for c in addr.chars() { - if ('0'..='9').contains(&c) { + if c.is_ascii_digit() { let digit = c as u64 - '0' as u64; byte_result = byte_result * 10 + digit; if byte_result > 255 { @@ -501,8 +501,9 @@ mod tests { (Some(hex("00000000")), Some(b"0.0.0.0".to_vec())), (Some(hex("0A000509")), Some(b"10.0.5.9".to_vec())), ( + // the output format has changed, see: https://github.com/rust-lang/rust/pull/112606 Some(hex("00000000000000000000000001020304")), - Some(b"::1.2.3.4".to_vec()), + Some(b"::102:304".to_vec()), ), ( Some(hex("00000000000000000000FFFF01020304")), diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index f3b9b03c287..45754d0a101 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -63,13 +63,13 @@ pub fn oct_string(s: BytesRef, writer: BytesWriter) -> Result { if let Some(&c) = trimmed.next() { if c == b'-' { negative = true; - } else if (b'0'..=b'9').contains(&c) { + } else if c.is_ascii_digit() { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(writer.write(Some(b"0".to_vec()))); } - for c in trimmed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimmed.take_while(|&c| c.is_ascii_digit()) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -879,7 +879,7 @@ impl TrimDirection { } #[inline] -fn trim<'a, 'b>(string: &'a [u8], pattern: &'b [u8], direction: TrimDirection) -> &'a [u8] { +fn trim<'a>(string: &'a [u8], pattern: &[u8], direction: TrimDirection) -> &'a [u8] { if pattern.is_empty() { return string; } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index c2ef6722148..40c1f485e54 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -10,6 +10,8 @@ #![allow(elided_lifetimes_in_paths)] // Necessary until rpn_fn accepts functions annotated with lifetimes. #![allow(incomplete_features)] +#![allow(clippy::needless_raw_string_hashes)] +#![allow(clippy::needless_return_with_question_mark)] #![feature(proc_macro_hygiene)] #![feature(specialization)] #![feature(test)] diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index b892333b0ef..e3ab7d35297 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1091,16 +1091,13 @@ mod tests { use tipb::{Expr, ScalarFuncSig}; #[allow(clippy::trivially_copy_pass_by_ref)] - #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a::)] - fn fn_a_nonnull( - metadata: &i64, - v: &Int, - ) -> Result> { + #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a)] + fn fn_a_nonnull(metadata: &i64, v: &Int) -> Result> { assert_eq!(*metadata, 42); Ok(Some(v + *metadata)) } - fn prepare_a(_expr: &mut Expr) -> Result { + fn prepare_a(_expr: &mut Expr) -> Result { Ok(42) } @@ -1136,7 +1133,7 @@ mod tests { // fn_b: CastIntAsReal // fn_c: CastIntAsString Ok(match expr.get_sig() { - ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta::(), + ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta(), ScalarFuncSig::CastIntAsReal => fn_b_fn_meta::(), ScalarFuncSig::CastIntAsString => fn_c_fn_meta::(), _ => unreachable!(), diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 576aa5cfa76..858edfffec2 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -605,7 +605,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for &(ref k, ref v) in &base_data { + for (k, v) in &base_data { engine.put(&data_key(k), v).unwrap(); } (r, base_data) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 25f58352750..43e5f1bea05 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -9,6 +9,7 @@ #![feature(min_specialization)] #![feature(type_alias_impl_trait)] #![feature(associated_type_defaults)] +#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/tikv_util/src/logger/formatter.rs b/components/tikv_util/src/logger/formatter.rs index c53c5896519..b786d2aa681 100644 --- a/components/tikv_util/src/logger/formatter.rs +++ b/components/tikv_util/src/logger/formatter.rs @@ -11,9 +11,9 @@ where let mut start = 0; let bytes = file_name.as_bytes(); for (index, &b) in bytes.iter().enumerate() { - if (b'A'..=b'Z').contains(&b) - || (b'a'..=b'z').contains(&b) - || (b'0'..=b'9').contains(&b) + if b.is_ascii_uppercase() + || b.is_ascii_lowercase() + || b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'_' diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index 76fad6e8a34..a2d0943df90 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -247,7 +247,7 @@ where HashMapEntry::Occupied(mut e) => { self.size_policy.on_remove(e.key(), &e.get().value); self.size_policy.on_insert(e.key(), &value); - let mut entry = e.get_mut(); + let entry = e.get_mut(); self.trace.promote(entry.record); entry.value = value; } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 291254c5227..a2897809683 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -33,7 +33,7 @@ pub trait HeapSize { impl HeapSize for [u8] { fn heap_size(&self) -> usize { - self.len() * mem::size_of::() + mem::size_of_val(self) } } diff --git a/components/tikv_util/src/metrics/allocator_metrics.rs b/components/tikv_util/src/metrics/allocator_metrics.rs index 260aa88ac8e..af22e411767 100644 --- a/components/tikv_util/src/metrics/allocator_metrics.rs +++ b/components/tikv_util/src/metrics/allocator_metrics.rs @@ -64,7 +64,7 @@ impl Collector for AllocStatsCollector { .set(dealloc as _); }); let mut g = self.memory_stats.collect(); - g.extend(self.allocation.collect().into_iter()); + g.extend(self.allocation.collect()); g } } diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index 4492e33a933..354ef74adb0 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -302,6 +302,8 @@ mod tests { use super::*; + // the JoinHandler is useless here, so just ignore this warning. + #[allow(clippy::let_underscore_future)] fn spawn_and_wait( rx_builder: impl FnOnce() -> S, ) -> (Runtime, Arc) { diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index 6ec1621c629..61608d1518f 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -333,7 +333,7 @@ mod tests { for _ in 0..num * 10 { std::thread::spawn(move || { loop { - let _ = (0..10_000_000).into_iter().sum::(); + let _ = (0..10_000_000).sum::(); } }); } diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index bb555e11794..a7a2b421ab0 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -81,14 +81,14 @@ impl Eq for TimeoutTask {} impl PartialOrd for TimeoutTask { fn partial_cmp(&self, other: &TimeoutTask) -> Option { - self.next_tick.partial_cmp(&other.next_tick) + Some(self.cmp(other)) } } impl Ord for TimeoutTask { fn cmp(&self, other: &TimeoutTask) -> Ordering { // TimeoutTask.next_tick must have same type of instants. - self.partial_cmp(other).unwrap() + self.next_tick.partial_cmp(&other.next_tick).unwrap() } } diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index fb0cd900123..79727575d60 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -118,9 +118,10 @@ impl slog::Value for TimeStamp { const TS_SET_USE_VEC_LIMIT: usize = 8; /// A hybrid immutable set for timestamps. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Default, Clone, PartialEq)] pub enum TsSet { /// When the set is empty, avoid the useless cloning of Arc. + #[default] Empty, /// `Vec` is suitable when the set is small or the set is barely used, and /// it doesn't worth converting a `Vec` into a `HashSet`. @@ -130,13 +131,6 @@ pub enum TsSet { Set(Arc>), } -impl Default for TsSet { - #[inline] - fn default() -> TsSet { - TsSet::Empty - } -} - impl TsSet { /// Create a `TsSet` from the given vec of timestamps. It will select the /// proper internal collection type according to the size. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 624ac81212d..5305e3ec69a 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -451,7 +451,7 @@ impl From for Mutation { /// `OldValue` is used by cdc to read the previous value associated with some /// key during the prewrite process. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Default, Clone, PartialEq)] pub enum OldValue { /// A real `OldValue`. Value { value: Value }, @@ -460,18 +460,13 @@ pub enum OldValue { /// `None` means we don't found a previous value. None, /// The user doesn't care about the previous value. + #[default] Unspecified, /// Not sure whether the old value exists or not. users can seek CF_WRITE to /// the give position to take a look. SeekWrite(Key), } -impl Default for OldValue { - fn default() -> Self { - OldValue::Unspecified - } -} - impl OldValue { pub fn value(value: Value) -> Self { OldValue::Value { value } @@ -590,8 +585,9 @@ impl WriteBatchFlags { /// The position info of the last actual write (PUT or DELETE) of a LOCK record. /// Note that if the last change is a DELETE, its LastChange can be either /// Exist(which points to it) or NotExist. -#[derive(Clone, Eq, PartialEq, Debug)] +#[derive(Clone, Default, Eq, PartialEq, Debug)] pub enum LastChange { + #[default] Unknown, /// The pointer may point to a PUT or a DELETE record. Exist { @@ -647,12 +643,6 @@ impl LastChange { } } -impl Default for LastChange { - fn default() -> Self { - LastChange::Unknown - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/rust-toolchain b/rust-toolchain index 4e5f9a4d82b..c1eb62e26cb 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2022-11-15 +nightly-2023-08-15 diff --git a/src/config/mod.rs b/src/config/mod.rs index 4f9a9a01b4a..6b3332fb015 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1482,7 +1482,7 @@ impl DbConfig { opts.set_paranoid_checks(b); } if for_engine == EngineType::RaftKv { - opts.set_info_log(RocksdbLogger::default()); + opts.set_info_log(RocksdbLogger); } opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { @@ -1858,7 +1858,7 @@ impl RaftDbConfig { opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - opts.set_info_log(RaftDbLogger::default()); + opts.set_info_log(RaftDbLogger); opts.set_info_log_level(self.info_log_level.into()); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); @@ -2015,7 +2015,7 @@ impl ConfigManager for DbConfigManger { self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); - let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); + let cf_config = change.extract_if(|(name, _)| name.ends_with("cf")); for (cf_name, cf_change) in cf_config { if let ConfigValue::Module(mut cf_change) = cf_change { // defaultcf -> default @@ -2040,7 +2040,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .drain_filter(|(name, _)| name == "rate_bytes_per_sec") + .extract_if(|(name, _)| name == "rate_bytes_per_sec") .next() { let rate_bytes_per_sec: ReadableSize = rate_bytes_config.1.into(); @@ -2049,7 +2049,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .drain_filter(|(name, _)| name == "rate_limiter_auto_tuned") + .extract_if(|(name, _)| name == "rate_limiter_auto_tuned") .next() { let rate_limiter_auto_tuned: bool = rate_bytes_config.1.into(); @@ -2058,7 +2058,7 @@ impl ConfigManager for DbConfigManger { } if let Some(size) = change - .drain_filter(|(name, _)| name == "write_buffer_limit") + .extract_if(|(name, _)| name == "write_buffer_limit") .next() { let size: ReadableSize = size.1.into(); @@ -2066,14 +2066,14 @@ impl ConfigManager for DbConfigManger { } if let Some(f) = change - .drain_filter(|(name, _)| name == "write_buffer_flush_oldest_first") + .extract_if(|(name, _)| name == "write_buffer_flush_oldest_first") .next() { self.db.set_flush_oldest_first(f.1.into())?; } if let Some(background_jobs_config) = change - .drain_filter(|(name, _)| name == "max_background_jobs") + .extract_if(|(name, _)| name == "max_background_jobs") .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); @@ -2081,7 +2081,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_subcompactions_config) = change - .drain_filter(|(name, _)| name == "max_sub_compactions") + .extract_if(|(name, _)| name == "max_sub_compactions") .next() { let max_subcompactions: u32 = background_subcompactions_config.1.into(); @@ -2090,7 +2090,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_flushes_config) = change - .drain_filter(|(name, _)| name == "max_background_flushes") + .extract_if(|(name, _)| name == "max_background_flushes") .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 02f45d35311..7d2d7e9e947 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -285,7 +285,7 @@ pub fn tls_collect_scan_details(cmd: ReqTag, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index fcd16f9b947..874917130e4 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -64,11 +64,13 @@ type HandlerStreamStepResult = Result<(Option, bool)>; #[async_trait] pub trait RequestHandler: Send { /// Processes current request and produces a response. + #[allow(clippy::diverging_sub_expression)] async fn handle_request(&mut self) -> Result> { panic!("unary request is not supported for this handler"); } /// Processes current request and produces streaming responses. + #[allow(clippy::diverging_sub_expression)] async fn handle_streaming_request(&mut self) -> HandlerStreamStepResult { panic!("streaming request is not supported for this handler"); } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 6d40ffe959c..1a670c917ca 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -66,9 +66,9 @@ const REQUEST_WRITE_CONCURRENCY: usize = 16; /// bytes. In detail, they are: /// - 2 bytes for the request type (Tag+Value). /// - 2 bytes for every string or bytes field (Tag+Length), they are: -/// . + the key field -/// . + the value field -/// . + the CF field (None for CF_DEFAULT) +/// . + the key field +/// . + the value field +/// . + the CF field (None for CF_DEFAULT) /// - 2 bytes for the embedded message field `PutRequest` (Tag+Length). /// - 2 bytes for the request itself (which would be embedded into a /// [`RaftCmdRequest`].) diff --git a/src/lib.rs b/src/lib.rs index b3e9ebaf8e8..aafb099c6cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,13 +23,14 @@ #![feature(proc_macro_hygiene)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(drain_filter)] +#![feature(extract_if)] #![feature(deadline_api)] #![feature(let_chains)] #![feature(read_buf)] #![feature(type_alias_impl_trait)] #![allow(incomplete_features)] #![feature(return_position_impl_trait_in_trait)] +#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/debug2.rs b/src/server/debug2.rs index cf17aea81eb..1ee1d108edc 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1096,7 +1096,7 @@ fn get_tablet_cache( "tablet load failed, region_state {:?}", region_state.get_state() ); - return Err(box_err!(e)); + Err(box_err!(e)) } } } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 665824a1bac..fe5a252b8db 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -826,6 +826,7 @@ pub mod test_utils { use crate::storage::kv::RocksEngine as StorageRocksEngine; /// Do a global GC with the given safe point. + #[allow(clippy::needless_pass_by_ref_mut)] pub fn gc_by_compact(engine: &mut StorageRocksEngine, _: &[u8], safe_point: u64) { let engine = engine.get_rocksdb(); // Put a new key-value pair to ensure compaction can be triggered correctly. diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index be18f8216d5..d2dc6532200 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -546,7 +546,9 @@ impl GcMan ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); - let Some(region) = region else { return Ok(None) }; + let Some(region) = region else { + return Ok(None); + }; let hex_start = format!("{:?}", log_wrappers::Value::key(region.get_start_key())); let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); @@ -807,7 +809,7 @@ mod tests { // Following code asserts gc_tasks == expected_gc_tasks. assert_eq!(gc_tasks.len(), expected_gc_tasks.len()); - let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks.into_iter()).all( + let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks).all( |((region, safe_point), (expect_region, expect_safe_point))| { region == expect_region && safe_point == expect_safe_point.into() }, @@ -884,7 +886,7 @@ mod tests { #[test] fn test_auto_gc_rewinding() { - for regions in vec![ + for regions in [ // First region starts with empty and last region ends with empty. vec![ (b"".to_vec(), b"1".to_vec(), 1), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index c608470ba87..de40975632f 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -254,7 +254,7 @@ fn get_keys_in_region(keys: &mut Peekable>, region: &Region) -> Ve let mut keys_in_region = Vec::new(); loop { - let Some(key) = keys.peek() else {break}; + let Some(key) = keys.peek() else { break }; let key = key.as_encoded().as_slice(); if key < region.get_start_key() { @@ -552,7 +552,7 @@ impl GcRunner { let mut keys = keys.into_iter().peekable(); for region in regions { let mut raw_modifies = MvccRaw::new(); - let mut snapshot = self.get_snapshot(self.store_id, ®ion)?; + let snapshot = self.get_snapshot(self.store_id, ®ion)?; let mut keys_in_region = get_keys_in_region(&mut keys, ®ion).into_iter(); let mut next_gc_key = keys_in_region.next(); @@ -563,7 +563,7 @@ impl GcRunner { &range_start_key, &range_end_key, &mut raw_modifies, - &mut snapshot, + &snapshot, &mut gc_info, ) { GC_KEY_FAILURES.inc(); @@ -615,7 +615,7 @@ impl GcRunner { range_start_key: &Key, range_end_key: &Key, raw_modifies: &mut MvccRaw, - kv_snapshot: &mut ::Snap, + kv_snapshot: &::Snap, gc_info: &mut GcInfo, ) -> Result<()> { let start_key = key.clone().append_ts(safe_point.prev()); @@ -669,10 +669,7 @@ impl GcRunner { } pub fn mut_stats(&mut self, key_mode: GcKeyMode) -> &mut Statistics { - let stats = self - .stats_map - .entry(key_mode) - .or_insert_with(Default::default); + let stats = self.stats_map.entry(key_mode).or_default(); stats } @@ -2269,7 +2266,6 @@ mod tests { fn generate_keys(start: u64, end: u64) -> Vec { (start..end) - .into_iter() .map(|i| { let key = format!("k{:02}", i); Key::from_raw(key.as_bytes()) diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 9583df80dd6..938dfaff8a6 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -361,20 +361,15 @@ impl DetectTable { } /// The role of the detector. -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Default, PartialEq, Clone, Copy)] pub enum Role { /// The node is the leader of the detector. Leader, /// The node is a follower of the leader. + #[default] Follower, } -impl Default for Role { - fn default() -> Role { - Role::Follower - } -} - impl From for Role { fn from(role: StateRole) -> Role { match role { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 2074d469310..f5b36dffbac 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -306,6 +306,7 @@ struct WriteResFeed { unsafe impl Send for WriteResFeed {} impl WriteResFeed { + #[allow(clippy::arc_with_non_send_sync)] fn pair() -> (Self, WriteResSub) { let core = Arc::new(WriteResCore { ev: AtomicU8::new(0), @@ -581,7 +582,9 @@ where tx.notify(res); } rx.inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { return }; + let WriteEvent::Finished(res) = ev else { + return; + }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index dacc90a91f0..81143e6c2be 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -291,7 +291,9 @@ impl tikv_kv::Engine for RaftKv2 { early_err: res.err(), }) .inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { return }; + let WriteEvent::Finished(res) = ev else { + return; + }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index d9b17c5d35c..73a15983bd0 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -269,7 +269,9 @@ where /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); - let Some((_, mut system)) = self.system.take() else { return }; + let Some((_, mut system)) = self.system.take() else { + return; + }; info!(self.logger, "stop raft store thread"; "store_id" => store_id); system.shutdown(); } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index d0b715542d5..497d8240684 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -300,7 +300,6 @@ where let debugger = self.debugger.clone(); let res = self.pool.spawn(async move { - let req = req; debugger .compact( req.get_db(), diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 8e77d65233e..413e36a6645 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -612,7 +612,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -639,7 +639,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -662,7 +662,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec!["2019/08/23 18:09:53.387 +08:00"] + let expected = ["2019/08/23 18:09:53.387 +08:00"] .iter() .map(|s| timestamp(s)) .collect::>(); @@ -671,7 +671,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# expected ); - for time in vec![0, i64::MAX].into_iter() { + for time in [0, i64::MAX].into_iter() { let log_iter = LogIterator::new( &log_file, timestamp("2019/08/23 18:09:53.387 +08:00"), @@ -680,7 +680,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:09:59.387 +08:00", "2019/08/23 18:10:06.387 +08:00", @@ -704,7 +704,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![regex::Regex::new(".*test-filter.*").unwrap()], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:10:06.387 +08:00", // for invalid line ] @@ -783,7 +783,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# req.set_end_time(i64::MAX); req.set_levels(vec![LogLevel::Warn as _]); req.set_patterns(vec![".*test-filter.*".to_string()].into()); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:11:58.387 +08:00", "2019/08/23 18:11:59.387 +08:00", // for invalid line @@ -796,9 +796,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# s.collect::>() .await .into_iter() - .map(|mut resp| resp.take_messages().into_iter()) - .into_iter() - .flatten() + .flat_map(|mut resp| resp.take_messages().into_iter()) .map(|msg| msg.get_time()) .collect::>() }); diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 8a84eaf6293..12494e9e7c4 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -601,7 +601,7 @@ mod tests { ] ); // memory - for name in vec!["virtual", "swap"].into_iter() { + for name in ["virtual", "swap"].into_iter() { let item = collector .iter() .find(|x| x.get_tp() == "memory" && x.get_name() == name); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 5a4327ba46e..6f1cf0eaa1f 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -889,7 +889,6 @@ impl Tikv for Service { forward_duplex!(self.proxy, batch_commands, ctx, stream, sink); let (tx, rx) = unbounded(WakePolicy::TillReach(GRPC_MSG_NOTIFY_SIZE)); - let ctx = Arc::new(ctx); let peer = ctx.peer(); let storage = self.storage.clone(); let copr = self.copr.clone(); diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 32c99867a3f..1eba8cd81b7 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -387,9 +387,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. })) - ))) + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::WriteConflict { .. }, + ))))) )); // The tx should be dropped. rx.recv().unwrap_err(); @@ -422,9 +422,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) - ))) + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::KeyIsLocked(_), + ))))) )); // Since the cancellation callback can fully execute only when it's successfully // removed from the lock waiting queues, it's impossible that `finish_request` diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index a81248fe9e2..68e0118610a 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -110,12 +110,7 @@ impl Eq for LockWaitEntry {} impl PartialOrd for LockWaitEntry { fn partial_cmp(&self, other: &Self) -> Option { - // Reverse it since the priority queue is a max heap and we want to pop the - // minimal. - other - .parameters - .start_ts - .partial_cmp(&self.parameters.start_ts) + Some(self.cmp(other)) } } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index e9477b56b0f..d3b3e89a3f8 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -63,7 +63,7 @@ pub fn tls_collect_scan_details(cmd: CommandKind, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cb4057bfd7e..b8224df696b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1946,7 +1946,7 @@ impl Storage { key_ranges.push(build_key_range(k.as_encoded(), k.as_encoded(), false)); (k, v) }) - .filter(|&(_, ref v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) + .filter(|(_, v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) .map(|(k, v)| match v { Ok(v) => { let (user_key, _) = F::decode_raw_key_owned(k, false).unwrap(); @@ -3892,9 +3892,9 @@ mod tests { let result = block_on(storage.get(Context::default(), Key::from_raw(b"x"), 100.into())); assert!(matches!( result, - Err(Error(box ErrorInner::Txn(txn::Error( - box txn::ErrorInner::Mvcc(mvcc::Error(box mvcc::ErrorInner::KeyIsLocked { .. })) - )))) + Err(Error(box ErrorInner::Txn(txn::Error(box txn::ErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::KeyIsLocked { .. }, + )))))) )); } @@ -5744,7 +5744,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -5803,7 +5803,7 @@ mod tests { let mut total_bytes: u64 = 0; let mut is_first = true; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6116,7 +6116,7 @@ mod tests { #[test] fn test_raw_batch_put() { - for for_cas in vec![false, true].into_iter() { + for for_cas in [false, true].into_iter() { test_kv_format_impl!(test_raw_batch_put_impl(for_cas)); } } @@ -6245,7 +6245,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6260,7 +6260,7 @@ mod tests { } // Verify pairs in a batch - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, @@ -6292,7 +6292,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6310,7 +6310,7 @@ mod tests { let mut ids = vec![]; let cmds = test_data .iter() - .map(|&(ref k, _)| { + .map(|(k, _)| { let mut req = RawGetRequest::default(); req.set_context(ctx.clone()); req.set_key(k.clone()); @@ -6331,7 +6331,7 @@ mod tests { #[test] fn test_raw_batch_delete() { - for for_cas in vec![false, true].into_iter() { + for for_cas in [false, true].into_iter() { test_kv_format_impl!(test_raw_batch_delete_impl(for_cas)); } } @@ -6381,10 +6381,10 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data .iter() - .map(|&(ref k, ref v)| Some((k.clone(), v.clone()))) + .map(|(k, v)| Some((k.clone(), v.clone()))) .collect(); expect_multi_values( results, @@ -6512,7 +6512,7 @@ mod tests { // Scan pairs with key only let mut results: Vec> = test_data .iter() - .map(|&(ref k, _)| Some((k.clone(), vec![]))) + .map(|(k, _)| Some((k.clone(), vec![]))) .collect(); expect_multi_values( results.clone(), @@ -6909,7 +6909,7 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index cc4403229c1..474c789a31d 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -1287,7 +1287,7 @@ mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (1..30).into_iter().step_by(2) { + for start_ts in (1..30).step_by(2) { must_prewrite_lock(&mut engine, k, k, start_ts); must_commit(&mut engine, k, start_ts, start_ts + 1); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 48158eda946..61a366c12ee 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -418,11 +418,10 @@ impl MvccReader { estimated_versions_to_last_change, } if estimated_versions_to_last_change >= SEEK_BOUND => { let key_with_ts = key.clone().append_ts(commit_ts); - let Some(value) = self - .snapshot - .get_cf(CF_WRITE, &key_with_ts)? else { - return Ok(None); - }; + let Some(value) = self.snapshot.get_cf(CF_WRITE, &key_with_ts)? + else { + return Ok(None); + }; self.statistics.write.get += 1; let write = WriteRef::parse(&value)?.to_owned(); assert!( @@ -2421,7 +2420,7 @@ pub mod tests { engine.commit(k, 1, 2); // Write enough LOCK recrods - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2430,7 +2429,7 @@ pub mod tests { engine.commit(k, 45, 46); // Write enough LOCK recrods - for start_ts in (50..80).into_iter().step_by(2) { + for start_ts in (50..80).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2485,7 +2484,7 @@ pub mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2522,7 +2521,7 @@ pub mod tests { engine.put(k, 1, 2); // 10 locks were put - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2549,7 +2548,7 @@ pub mod tests { feature_gate.set_version("6.1.0").unwrap(); set_tls_feature_gate(feature_gate); engine.delete(k, 51, 52); - for start_ts in (56..80).into_iter().step_by(2) { + for start_ts in (56..80).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } let feature_gate = FeatureGate::default(); @@ -2581,7 +2580,7 @@ pub mod tests { let k = b"k"; engine.put(k, 1, 2); - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } engine.rollback(k, 30); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 3437a1e5432..2b0a8e13582 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -633,7 +633,7 @@ impl ScanPolicy for LatestEntryPolicy { fn scan_latest_handle_lock( current_user_key: Key, - cfg: &mut ScannerConfig, + cfg: &ScannerConfig, cursors: &mut Cursors, statistics: &mut Statistics, ) -> Result> { @@ -1636,7 +1636,7 @@ mod latest_kv_tests { must_prewrite_put(&mut engine, b"k4", b"v41", b"k4", 3); must_commit(&mut engine, b"k4", 3, 7); - for start_ts in (10..30).into_iter().step_by(2) { + for start_ts in (10..30).step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 8c4ad5da08b..aa635827961 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -290,7 +290,7 @@ mod tests { RawEncodeSnapshot::from_snapshot(raw_mvcc_snapshot); // get_cf - for &(ref key, ref value, _) in &test_data[6..12] { + for (key, value, _) in &test_data[6..12] { let res = encode_snapshot.get_cf(CF_DEFAULT, &ApiV2::encode_raw_key(key, None)); assert_eq!(res.unwrap(), Some(value.to_owned())); } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 64e22a13585..713155f9160 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -766,7 +766,6 @@ fn async_commit_timestamps( #[cfg(not(feature = "failpoints"))] let injected_fallback = false; - let max_commit_ts = max_commit_ts; if (!max_commit_ts.is_zero() && min_commit_ts > max_commit_ts) || injected_fallback { warn!("commit_ts is too large, fallback to normal 2PC"; "key" => log_wrappers::Value::key(key.as_encoded()), @@ -1875,7 +1874,6 @@ pub mod tests { // At most 12 ops per-case. let ops_count = rg.gen::() % 12; let ops = (0..ops_count) - .into_iter() .enumerate() .map(|(i, _)| { if i == 0 { diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 9a54895e7e2..61dbdac6565 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -88,8 +88,8 @@ mod tests { fn test_atomic_process_write_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let raw_keys = vec![b"ra", b"rz"]; - let raw_values = vec![b"valuea", b"valuez"]; + let raw_keys = [b"ra", b"rz"]; + let raw_values = [b"valuea", b"valuez"]; let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); let mut modifies = vec![]; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 10446db6292..2f39b29bc64 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1853,9 +1853,7 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::AlreadyExist { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::AlreadyExist { .. }))) )); assert_eq!(cm.max_ts().into_inner(), 15); @@ -1878,9 +1876,7 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::WriteConflict { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. }))) )); } @@ -2286,9 +2282,9 @@ mod tests { .unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request @@ -2469,9 +2465,9 @@ mod tests { let err = prewrite_command(&mut engine, cm.clone(), &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); // Passing keys in different order gets the same result: let cmd = PrewritePessimistic::with_defaults( @@ -2492,9 +2488,9 @@ mod tests { let err = prewrite_command(&mut engine, cm, &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); // If the two keys are sent in different requests, it would be the client's duty diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index a662d9bab79..549d1d22636 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -224,7 +224,7 @@ impl Latches { keep_latches_for_next_cmd: Option<(u64, &Lock)>, ) -> Vec { // Used to - let dummy_vec = vec![]; + let dummy_vec = []; let (keep_latches_for_cid, mut keep_latches_it) = match keep_latches_for_next_cmd { Some((cid, lock)) => (Some(cid), lock.required_hashes.iter().peekable()), None => (None, dummy_vec.iter().peekable()), @@ -282,9 +282,9 @@ mod tests { fn test_wakeup() { let latches = Latches::new(256); - let keys_a = vec!["k1", "k3", "k5"]; + let keys_a = ["k1", "k3", "k5"]; let mut lock_a = Lock::new(keys_a.iter()); - let keys_b = vec!["k4", "k5", "k6"]; + let keys_b = ["k4", "k5", "k6"]; let mut lock_b = Lock::new(keys_b.iter()); let cid_a: u64 = 1; let cid_b: u64 = 2; @@ -310,9 +310,9 @@ mod tests { fn test_wakeup_by_multi_cmds() { let latches = Latches::new(256); - let keys_a = vec!["k1", "k2", "k3"]; - let keys_b = vec!["k4", "k5", "k6"]; - let keys_c = vec!["k3", "k4"]; + let keys_a = ["k1", "k2", "k3"]; + let keys_b = ["k4", "k5", "k6"]; + let keys_c = ["k3", "k4"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); @@ -353,10 +353,10 @@ mod tests { fn test_wakeup_by_small_latch_slot() { let latches = Latches::new(5); - let keys_a = vec!["k1", "k2", "k3"]; - let keys_b = vec!["k6", "k7", "k8"]; - let keys_c = vec!["k3", "k4"]; - let keys_d = vec!["k7", "k10"]; + let keys_a = ["k1", "k2", "k3"]; + let keys_b = ["k6", "k7", "k8"]; + let keys_c = ["k3", "k4"]; + let keys_d = ["k7", "k10"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 19736304373..2ca3ef145c8 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -267,7 +267,7 @@ pub fn tls_collect_scan_details(cmd: &'static str, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 0a5708c74ce..3698860b4ea 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -147,7 +147,7 @@ where I: 'static, { fn partial_cmp(&self, other: &Self) -> Option { - self.get_name().partial_cmp(other.get_name()) + Some(self.cmp(other)) } } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index 92dacfe6dc9..99f2c9ee1f4 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -61,7 +61,7 @@ where .unwrap(); } let write_data = WriteData::from_modifies(txn.into_modifies()); - let _ = tikv_kv::write(engine, &ctx, write_data, None); + let _ = futures::executor::block_on(tikv_kv::write(engine, &ctx, write_data, None)); let keys: Vec = kvs.iter().map(|(k, _)| Key::from_raw(k)).collect(); let snapshot = engine.snapshot(Default::default()).unwrap(); (snapshot, keys) diff --git a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs b/tests/benches/misc/coprocessor/codec/chunk/chunk.rs index 4c033f2a80d..241284a7228 100644 --- a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs +++ b/tests/benches/misc/coprocessor/codec/chunk/chunk.rs @@ -79,7 +79,7 @@ impl ChunkBuilder { pub fn build(self, tps: &[FieldType]) -> Chunk { let mut fields = Vec::with_capacity(tps.len()); let mut arrays: Vec> = Vec::with_capacity(tps.len()); - for (field_type, column) in tps.iter().zip(self.columns.into_iter()) { + for (field_type, column) in tps.iter().zip(self.columns) { match field_type.as_accessor().tp() { FieldTypeTp::Tiny | FieldTypeTp::Short diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index d567edd5add..a545d9935e6 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -171,6 +171,7 @@ fn bench_async_snapshots_noop(b: &mut test::Bencher) { } #[bench] +#[allow(clippy::let_underscore_future)] fn bench_async_snapshot(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); @@ -205,6 +206,7 @@ fn bench_async_snapshot(b: &mut test::Bencher) { } #[bench] +#[allow(clippy::let_underscore_future)] fn bench_async_write(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index 05c602824c2..e164d59f82a 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -12,7 +12,7 @@ const DEFAULT_DATA_SIZE: usize = 100_000; fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { let mut wb = db.write_batch(); - for &(ref k, ref v) in kvs { + for (k, v) in kvs { wb.put(&keys::data_key(k), v).unwrap(); } wb.write().unwrap(); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index a9dbd36a81a..f40f40e6af1 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -1,5 +1,8 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. +#![allow(clippy::arc_with_non_send_sync)] +#![allow(clippy::unnecessary_mut_passed)] +#[allow(clippy::let_underscore_future)] mod test_async_fetch; mod test_async_io; mod test_backup; diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 217269bb5b8..55c06d87b07 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -35,7 +35,7 @@ fn get_fp(usage: DiskUsage, store_id: u64) -> String { // check the region new leader is elected. fn assert_region_leader_changed( - cluster: &mut Cluster, + cluster: &Cluster, region_id: u64, original_leader: u64, ) { @@ -91,7 +91,7 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); - assert_region_leader_changed(&mut cluster, 1, 1); + assert_region_leader_changed(&cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); fail::cfg(get_fp(usage, 1), "return").unwrap(); @@ -199,7 +199,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { DiskFullOpt::NotAllowedOnFull, ); assert!(res.get_region_error().has_disk_full()); - assert_region_leader_changed(&mut cluster, 1, 1); + assert_region_leader_changed(&cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -393,7 +393,7 @@ fn test_disk_full_followers_with_hibernate_regions() { // check the region new leader is elected. fn assert_region_merged( - cluster: &mut Cluster, + cluster: &Cluster, left_region_key: &[u8], right_region_key: &[u8], ) { diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 073f7276419..2dd5b6ac04b 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -57,6 +57,7 @@ fn test_write_buffer_manager() { } } +#[rustfmt::skip] // The test mocks the senario before https://github.com/tikv/rocksdb/pull/347: // note: before rocksdb/pull/347, lock is called before on_memtable_sealed. // Case: diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 616a4e5e196..74561396593 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -93,6 +93,7 @@ fn test_break_leadership_on_restart() { // received, and become `GroupState::Ordered` after the proposal is received. // But they should keep wakeful for a while. #[test] +#[allow(clippy::let_underscore_future)] fn test_store_disconnect_with_hibernate() { let mut cluster = new_server_cluster(0, 3); let base_tick_ms = 50; diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 0115d6d7ba5..201aafce6fb 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -43,6 +43,7 @@ macro_rules! request { } #[test] +#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let pd_client_reconnect_fp = "pd_client_reconnect"; diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index ac427c29e69..583dad2ff34 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -43,6 +43,7 @@ macro_rules! request { } #[test] +#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let client = Arc::new(client); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index a795422c120..5ab7edb503f 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -208,7 +208,7 @@ fn test_leader_transfer() { #[test] fn test_region_merge() { let mut suite = TestSuite::new(3, ApiVersion::V2); - let keys = vec![b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; + let keys = [b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; suite.must_raw_put(keys[1], b"v1"); suite.must_raw_put(keys[3], b"v3"); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index 7351044b297..dc6906b668a 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -4,11 +4,16 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; use test_raftstore_macro::test_case; +use tikv_util::config::ReadableDuration; -#[test_case(test_raftstore::must_new_cluster_and_kv_client)] -#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore::must_new_cluster_with_cfg_and_kv_client_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_with_cfg_and_kv_client_mul)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = new_cluster(); + let (_cluster, client, ctx) = new_cluster(1, |c| { + // set a small renew duration to avoid trigger pre-renew that can affact the + // metrics. + c.cfg.tikv.raft_store.renew_leader_lease_advance_duration = ReadableDuration::millis(1); + }); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 65c50793d7a..10a65271462 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1426,8 +1426,7 @@ impl Filter for TeeFilter { // 2. the splitted region set has_dirty_data be true in `apply_snapshot` // 3. the splitted region schedule tablet trim task in `on_applied_snapshot` // with tablet index 5 -// 4. the splitted region received a snapshot sent from its -// leader +// 4. the splitted region received a snapshot sent from its leader // 5. after finishing applying this snapshot, the tablet index in storage // changed to 6 // 6. tablet trim complete and callbacked to raftstore diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 57047bef9d4..4668c24ad66 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1620,9 +1620,7 @@ fn test_before_propose_deadline() { assert!( matches!( res, - Err(StorageError(box StorageErrorInner::Kv(KvError( - box KvErrorInner::Request(_), - )))) + Err(StorageError(box StorageErrorInner::Kv(KvError(box KvErrorInner::Request(_))))) ), "actual: {:?}", res diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 14f4161c7ae..4154a764d99 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -751,7 +751,7 @@ fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { let handle = std::thread::spawn(move || { let mut mutations = vec![]; - for key in vec![b"key3".to_vec(), b"key4".to_vec()] { + for key in [b"key3".to_vec(), b"key4".to_vec()] { let mut mutation = kvproto::kvrpcpb::Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(key); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 75eb62bab99..02fb8c046c8 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -361,8 +361,8 @@ fn test_read_lock_after_become_follower() { /// 1. Inserted 5 entries and make all stores commit and apply them. /// 2. Prevent the store 3 from append following logs. /// 3. Insert another 20 entries. -/// 4. Wait for some time so that part of the entry cache are compacted -/// on the leader(store 1). +/// 4. Wait for some time so that part of the entry cache are compacted on the +/// leader(store 1). macro_rules! run_cluster_for_test_warmup_entry_cache { ($cluster:expr) => { // Let the leader compact the entry cache. diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 4cfd4be07be..bd5461e6134 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -492,6 +492,7 @@ fn test_backup_raw_meta() { } #[test] +#[allow(clippy::permissions_set_readonly_false)] fn test_invalid_external_storage() { let mut suite = TestSuite::new(1, 144 * 1024 * 1024, ApiVersion::V1); // Put some data. diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index 3d8cf85b02c..f821ffea2e7 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -67,6 +67,6 @@ fn test_apply_twice() { &tikv, &ctx, CF_DEFAULT, - default_fst.into_iter().chain(default_snd.into_iter()), + default_fst.into_iter().chain(default_snd), ); } diff --git a/tests/integrations/mod.rs b/tests/integrations/mod.rs index 2b68c0a8ba9..86ceb5369e7 100644 --- a/tests/integrations/mod.rs +++ b/tests/integrations/mod.rs @@ -4,6 +4,8 @@ #![feature(box_patterns)] #![feature(custom_test_frameworks)] #![test_runner(test_util::run_tests)] +#![allow(clippy::needless_pass_by_ref_mut)] +#![allow(clippy::extra_unused_type_parameters)] extern crate test; diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 056641e1e3f..30ea12a424b 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -287,8 +287,8 @@ fn test_flush_before_stop2() { // 1. lock `k` with index 6 // 2. on_applied_res => lockcf's last_modified = 6 // 3. flush lock cf => lockcf's flushed_index = 6 -// 4. batch {unlock `k`, write `k`} with index 7 -// (last_modified is updated in store but RocksDB is modified in apply. So, +// 4. batch {unlock `k`, write `k`} with index 7 (last_modified is updated in +// store but RocksDB is modified in apply. So, // before on_apply_res, the last_modified is not updated.) // // flush-before-close: diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index fbc7629c73f..56cb65cce87 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -5,13 +5,13 @@ use engine_traits::{MiscExt, CF_LOCK}; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &mut Cluster) { +fn flush(cluster: &Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cf(CF_LOCK, true).unwrap(); } } -fn flush_then_check(cluster: &mut Cluster, interval: u64, written: bool) { +fn flush_then_check(cluster: &Cluster, interval: u64, written: bool) { flush(cluster); // Wait for compaction. sleep_ms(interval * 2); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index d61d6a59182..13e718b269d 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -420,6 +420,7 @@ fn test_txn_query_stats_tmpl() { fail::remove("mock_collect_tick_interval"); } +#[allow(clippy::extra_unused_type_parameters)] fn raw_put( _cluster: &Cluster, client: &TikvClient, From 058336850ce52cd0eb2691931b92f10318529d09 Mon Sep 17 00:00:00 2001 From: qupeng Date: Fri, 15 Sep 2023 14:55:39 +0800 Subject: [PATCH 0918/1149] stablize case test_store_disconnect_with_hibernate (#15596) close tikv/tikv#15607 None Signed-off-by: qupeng Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/peer.rs | 5 +++++ tests/failpoints/cases/test_hibernate.rs | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 36c4c7e8e5f..371e8cd8eb5 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2859,6 +2859,11 @@ where } fn reset_raft_tick(&mut self, state: GroupState) { + debug!( + "reset raft tick to {:?}", state; + "region_id"=> self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + ); self.fsm.reset_hibernate_state(state); self.fsm.missing_ticks = 0; self.fsm.peer.should_wake_up = false; diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index 74561396593..d8f73f312b6 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -102,10 +102,10 @@ fn test_store_disconnect_with_hibernate() { cluster.cfg.raft_store.raft_election_timeout_ticks = 10; cluster.cfg.raft_store.unreachable_backoff = ReadableDuration::millis(500); cluster.cfg.server.raft_client_max_backoff = ReadableDuration::millis(200); - // So the random election timeout will always be 10, which makes the case more - // stable. + // Use a small range but still random election timeouts, which makes the case + // more stable. cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; - cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; + cluster.cfg.raft_store.raft_max_election_timeout_ticks = 13; configure_for_hibernate(&mut cluster.cfg); cluster.pd_client.disable_default_operator(); let r = cluster.run_conf_change(); @@ -117,7 +117,7 @@ fn test_store_disconnect_with_hibernate() { must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); // Wait until all peers of region 1 hibernate. - thread::sleep(Duration::from_millis(base_tick_ms * 30)); + thread::sleep(Duration::from_millis(base_tick_ms * 40)); // Stop the region leader. fail::cfg("receive_raft_message_from_outside", "pause").unwrap(); @@ -129,7 +129,7 @@ fn test_store_disconnect_with_hibernate() { fail::remove("receive_raft_message_from_outside"); // Wait for a while. Peers of region 1 shouldn't hibernate. - thread::sleep(Duration::from_millis(base_tick_ms * 30)); + thread::sleep(Duration::from_millis(base_tick_ms * 40)); must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); must_get_equal(&cluster.get_engine(3), b"k2", b"v2"); } From 820ed9395b97853145fea4a21d6d906cbcd4d2fb Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Sat, 16 Sep 2023 14:42:09 +0800 Subject: [PATCH 0919/1149] tikv-ctl v2: get_all_regions_in_store excludes `tombstone` (#15522) ref tikv/tikv#14654 get_all_regions_in_store should exclude tombstone Signed-off-by: SpadeA-Tang Co-authored-by: tonyxuqqi --- cmd/tikv-ctl/src/executor.rs | 15 ++++-- src/server/debug2.rs | 94 +++++++++++++++++++++++++++--------- 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index a145118acea..a20d6ce2602 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1332,11 +1332,16 @@ impl DebugExecutor for DebuggerImplV2 { } fn get_region_size(&self, region: u64, cfs: Vec<&str>) -> Vec<(String, usize)> { - self.region_size(region, cfs) - .unwrap_or_else(|e| perror_and_exit("Debugger::region_size", e)) - .into_iter() - .map(|(cf, size)| (cf.to_owned(), size)) - .collect() + match self.region_size(region, cfs) { + Ok(v) => v + .into_iter() + .map(|(cf, size)| (cf.to_owned(), size)) + .collect(), + Err(e) => { + println!("Debugger::region_size: {}", e); + vec![] + } + } } fn get_region_info(&self, region: u64) -> RegionInfo { diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 1ee1d108edc..7060b20bdb2 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -688,19 +688,19 @@ impl Debugger for DebuggerImplV2 { fn region_size>(&self, region_id: u64, cfs: Vec) -> Result> { match self.raft_engine.get_region_state(region_id, u64::MAX) { Ok(Some(region_state)) => { - if region_state.get_state() != PeerState::Normal { - return Err(Error::NotFound(format!( - "region {:?} has been deleted", - region_id - ))); - } let region = region_state.get_region(); + let state = region_state.get_state(); let start_key = &keys::data_key(region.get_start_key()); let end_key = &keys::data_end_key(region.get_end_key()); let mut sizes = vec![]; let mut tablet_cache = get_tablet_cache(&self.tablet_reg, region.id, Some(region_state))?; - let tablet = tablet_cache.latest().unwrap(); + let Some(tablet) = tablet_cache.latest() else { + return Err(Error::NotFound(format!( + "tablet not found, region_id={:?}, peer_state={:?}", + region_id, state + ))); + }; for cf in cfs { let mut size = 0; box_try!(tablet.scan(cf.as_ref(), start_key, end_key, false, |k, v| { @@ -731,7 +731,7 @@ impl Debugger for DebuggerImplV2 { )); } - let mut region_states = get_all_region_states_with_normal_state(&self.raft_engine); + let mut region_states = get_all_active_region_states(&self.raft_engine); region_states.sort_by(|r1, r2| { r1.get_region() @@ -786,12 +786,21 @@ impl Debugger for DebuggerImplV2 { fn get_all_regions_in_store(&self) -> Result> { let mut region_ids = vec![]; + let raft_engine = &self.raft_engine; self.raft_engine .for_each_raft_group::(&mut |region_id| { + let region_state = raft_engine + .get_region_state(region_id, u64::MAX) + .unwrap() + .unwrap(); + if region_state.state == PeerState::Tombstone { + return Ok(()); + } region_ids.push(region_id); Ok(()) }) .unwrap(); + region_ids.sort_unstable(); Ok(region_ids) } @@ -844,21 +853,29 @@ impl Debugger for DebuggerImplV2 { Err(e) => return Err(Error::EngineTrait(e)), }; - if region_state.state != PeerState::Normal { - return Err(Error::NotFound(format!("none region {:?}", region_id))); + let state = region_state.get_state(); + if state == PeerState::Tombstone { + return Err(Error::NotFound(format!( + "region {:?} is tombstone", + region_id + ))); } - let region = region_state.get_region(); - let start = keys::enc_start_key(region); - let end = keys::enc_end_key(region); - - let mut tablet_cache = - get_tablet_cache(&self.tablet_reg, region.id, Some(region_state.clone())).unwrap(); - let tablet = tablet_cache.latest().unwrap(); + let region = region_state.get_region().clone(); + let start = keys::enc_start_key(®ion); + let end = keys::enc_end_key(®ion); + + let mut tablet_cache = get_tablet_cache(&self.tablet_reg, region.id, Some(region_state))?; + let Some(tablet) = tablet_cache.latest() else { + return Err(Error::NotFound(format!( + "tablet not found, region_id={:?}, peer_state={:?}", + region_id, state + ))); + }; let mut res = dump_write_cf_properties(tablet, &start, &end)?; let mut res1 = dump_default_cf_properties(tablet, &start, &end)?; res.append(&mut res1); - let middle_key = match box_try!(get_region_approximate_middle(tablet, region)) { + let middle_key = match box_try!(get_region_approximate_middle(tablet, ®ion)) { Some(data_key) => keys::origin_key(&data_key).to_vec(), None => Vec::new(), }; @@ -1102,9 +1119,7 @@ fn get_tablet_cache( } } -fn get_all_region_states_with_normal_state( - raft_engine: &ER, -) -> Vec { +fn get_all_active_region_states(raft_engine: &ER) -> Vec { let mut region_states = vec![]; raft_engine .for_each_raft_group::(&mut |region_id| { @@ -1112,7 +1127,7 @@ fn get_all_region_states_with_normal_state( .get_region_state(region_id, u64::MAX) .unwrap() .unwrap(); - if region_state.state == PeerState::Normal { + if region_state.state != PeerState::Tombstone { region_states.push(region_state); } Ok(()) @@ -1133,7 +1148,7 @@ fn deivde_regions_for_concurrency( registry: &TabletRegistry, threads: u64, ) -> Result>> { - let region_states = get_all_region_states_with_normal_state(raft_engine); + let region_states = get_all_active_region_states(raft_engine); if threads == 1 { return Ok(vec![ @@ -1452,6 +1467,7 @@ mod tests { let mut wb = raft_engine.log_batch(10); wb.put_region_state(region_id, 10, &state).unwrap(); raft_engine.consume(&mut wb, true).unwrap(); + debugger.tablet_reg.remove(region_id); debugger.region_size(region_id, cfs.clone()).unwrap_err(); } @@ -1930,9 +1946,9 @@ mod tests { assert_eq!(region_info_2, region_info_2_before); } - #[test] // It tests that the latest apply state cannot be read as it is invisible // on persisted_applied + #[test] fn test_drop_unapplied_raftlog_2() { let dir = test_util::temp_dir("test-debugger", false); let debugger = new_debugger(dir.path()); @@ -1968,4 +1984,34 @@ mod tests { 80 ); } + + #[test] + fn test_get_all_regions_in_store() { + let dir = test_util::temp_dir("test-debugger", false); + let debugger = new_debugger(dir.path()); + let raft_engine = &debugger.raft_engine; + + init_region_state(raft_engine, 1, &[100, 101], 1); + init_region_state(raft_engine, 3, &[100, 101], 1); + init_region_state(raft_engine, 4, &[100, 101], 1); + + let mut lb = raft_engine.log_batch(3); + + let mut put_tombsotne_region = |region_id: u64| { + let mut region = metapb::Region::default(); + region.set_id(region_id); + let mut region_state = RegionLocalState::default(); + region_state.set_state(PeerState::Tombstone); + region_state.set_region(region.clone()); + lb.put_region_state(region_id, INITIAL_APPLY_INDEX, ®ion_state) + .unwrap(); + raft_engine.consume(&mut lb, true).unwrap(); + }; + + put_tombsotne_region(2); + put_tombsotne_region(5); + + let regions = debugger.get_all_regions_in_store().unwrap(); + assert_eq!(regions, vec![1, 3, 4]); + } } From 086965358f0109340b84261695fbeaccce3a62e2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 18 Sep 2023 18:06:11 +0800 Subject: [PATCH 0920/1149] raftstore-v2: report async snapshot metrics to prometheus (#15562) ref tikv/tikv#15401 report async snapshot metrics to prometheus Signed-off-by: SpadeA-Tang --- .../raftstore-v2/src/operation/query/local.rs | 8 ++++- src/server/raftkv/mod.rs | 2 +- src/server/raftkv2/mod.rs | 32 ++++++++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 5f6d589eca6..fcc93636640 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -28,6 +28,7 @@ use raftstore::{ use slog::{debug, Logger}; use tikv_util::{box_err, codec::number::decode_u64, time::monotonic_raw_now, Either}; use time::Timespec; +use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS}; use txn_types::WriteBatchFlags; use crate::{ @@ -335,7 +336,12 @@ where async move { let (mut fut, mut reader) = match res { - Either::Left(Ok(snap)) => return Ok(snap), + Either::Left(Ok(snap)) => { + GLOBAL_TRACKERS.with_tracker(get_tls_tracker_token(), |t| { + t.metrics.local_read = true; + }); + return Ok(snap); + } Either::Left(Err(e)) => return Err(e), Either::Right((fut, reader)) => (fut, reader), }; diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index f5b36dffbac..58287c2bb83 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -686,7 +686,7 @@ where tracker.metrics.read_index_propose_wait_nanos as f64 / 1_000_000_000.0, ); - // snapshot may be hanlded by lease read in raftstore + // snapshot may be handled by lease read in raftstore if tracker.metrics.read_index_confirm_wait_nanos > 0 { ASYNC_REQUESTS_DURATIONS_VEC .snapshot_read_index_confirm diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 81143e6c2be..9785e821312 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -28,6 +28,7 @@ use raftstore_v2::{ }; use tikv_kv::{Modify, WriteEvent}; use tikv_util::time::Instant; +use tracker::{get_tls_tracker_token, GLOBAL_TRACKERS}; use txn_types::{TxnExtra, TxnExtraScheduler, WriteBatchFlags}; use super::{ @@ -172,7 +173,7 @@ impl tikv_kv::Engine for RaftKv2 { .set_key_ranges(mem::take(&mut ctx.key_ranges).into()); } ASYNC_REQUESTS_COUNTER_VEC.snapshot.all.inc(); - let begin_instant = Instant::now_coarse(); + let begin_instant = Instant::now(); let mut header = new_request_header(ctx.pb_ctx); let mut flags = 0; @@ -200,9 +201,32 @@ impl tikv_kv::Engine for RaftKv2 { let res = f.await; match res { Ok(snap) => { - ASYNC_REQUESTS_DURATIONS_VEC - .snapshot - .observe(begin_instant.saturating_elapsed_secs()); + let elapse = begin_instant.saturating_elapsed_secs(); + let tracker = get_tls_tracker_token(); + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + if tracker.metrics.read_index_propose_wait_nanos > 0 { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_read_index_propose_wait + .observe( + tracker.metrics.read_index_propose_wait_nanos as f64 + / 1_000_000_000.0, + ); + // snapshot may be handled by lease read in raftstore + if tracker.metrics.read_index_confirm_wait_nanos > 0 { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_read_index_confirm + .observe( + tracker.metrics.read_index_confirm_wait_nanos as f64 + / 1_000_000_000.0, + ); + } + } else if tracker.metrics.local_read { + ASYNC_REQUESTS_DURATIONS_VEC + .snapshot_local_read + .observe(elapse); + } + }); + ASYNC_REQUESTS_DURATIONS_VEC.snapshot.observe(elapse); ASYNC_REQUESTS_COUNTER_VEC.snapshot.success.inc(); Ok(snap) } From 4a5fb7321ca2ee2bab0b31f6556c8fb196a590f4 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 18 Sep 2023 18:24:11 +0800 Subject: [PATCH 0921/1149] test: make test test_destroy_missing more stable (#15616) close tikv/tikv#15615 Signed-off-by: glorv Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore-v2/src/worker/tablet.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 0b0429eb8d1..ef9739226e7 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -602,6 +602,13 @@ impl Runner { } } +#[cfg(test)] +impl Runner { + pub fn get_running_task_count(&self) -> usize { + self.low_pri_pool.get_running_task_count() + } +} + impl Runnable for Runner where EK: KvEngine, @@ -822,6 +829,14 @@ mod tests { runner.run(Task::destroy(r_1, 100)); assert!(path.exists()); registry.remove(r_1); + // waiting for async `pause_background_work` to be finished, + // this task can block tablet's destroy. + for _i in 0..100 { + if runner.get_running_task_count() == 0 { + break; + } + std::thread::sleep(Duration::from_millis(5)); + } runner.on_timeout(); assert!(!path.exists()); assert!(runner.pending_destroy_tasks.is_empty()); From 2db4b895a1e82d32830493eb10cea30925f65c7e Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 20 Sep 2023 10:40:42 +0800 Subject: [PATCH 0922/1149] raftstore-v2: fix rollback merge and commit merge can happen simultaneously (#15625) ref tikv/tikv#15242 fix rollback merge and commit merge can happen simultaneously Signed-off-by: SpadeA-Tang --- .../operation/command/admin/merge/commit.rs | 10 +- .../operation/command/admin/merge/rollback.rs | 12 +- components/raftstore-v2/src/raft/peer.rs | 10 ++ tests/failpoints/cases/test_merge.rs | 147 ++++++++++++++++++ 4 files changed, 177 insertions(+), 2 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 5bd92e3ea1c..5208dcc96a8 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -319,7 +319,7 @@ impl Peer { region ); assert!(!self.storage().has_dirty_data()); - if self.is_leader() { + if self.is_leader() && !self.leader_transferring() { let index = commit_of_merge(req.get_admin_request().get_commit_merge()); if self.proposal_control().is_merging() { // `on_admin_command` may delay our request indefinitely. It's better to check @@ -341,12 +341,19 @@ impl Peer { "res" => ?res, ); } else { + fail::fail_point!("on_propose_commit_merge_success"); return; } } let _ = store_ctx .router .force_send(source_id, PeerMsg::RejectCommitMerge { index }); + } else if self.leader_transferring() { + info!( + self.logger, + "not to propose commit merge when transferring leader"; + "transferee" => self.leader_transferee(), + ); } } else { info!( @@ -362,6 +369,7 @@ impl Peer { store_ctx: &mut StoreContext, req: RaftCmdRequest, ) -> Result { + (|| fail::fail_point!("propose_commit_merge_1", store_ctx.store_id == 1, |_| {}))(); let mut proposal_ctx = ProposalContext::empty(); proposal_ctx.insert(ProposalContext::COMMIT_MERGE); let data = req.write_to_bytes().unwrap(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index cb45fdcf1cf..d931a295f4d 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -38,6 +38,7 @@ impl Peer { store_ctx: &mut StoreContext, index: u64, ) { + fail::fail_point!("on_reject_commit_merge_1", store_ctx.store_id == 1, |_| {}); let self_index = self.merge_context().and_then(|c| c.prepare_merge_index()); if self_index != Some(index) { info!( @@ -75,7 +76,7 @@ impl Apply { pub fn apply_rollback_merge( &mut self, req: &AdminRequest, - _index: u64, + index: u64, ) -> Result<(AdminResponse, AdminCmdResult)> { fail::fail_point!("apply_rollback_merge"); PEER_ADMIN_CMD_COUNTER.rollback_merge.all.inc(); @@ -95,6 +96,15 @@ impl Apply { "state" => ?merge_state, ); } + + let prepare_merge_commit = rollback.commit; + info!( + self.logger, + "execute RollbackMerge"; + "commit" => prepare_merge_commit, + "index" => index, + ); + let mut region = self.region().clone(); let version = region.get_region_epoch().get_version(); // Update version to avoid duplicated rollback requests. diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index c3a80e3756c..87d41de776c 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -862,6 +862,16 @@ impl Peer { ) } + #[inline] + pub fn leader_transferee(&self) -> u64 { + self.leader_transferee + } + + #[inline] + pub fn leader_transferring(&self) -> bool { + self.leader_transferee != raft::INVALID_ID + } + #[inline] pub fn long_uncommitted_threshold(&self) -> Duration { Duration::from_secs(self.long_uncommitted_threshold) diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index eb6b8a235e1..08b7474bb8e 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -3,6 +3,7 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, + mpsc::{channel, Sender}, *, }, thread, @@ -19,6 +20,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::store::*; +use raftstore_v2::router::PeerMsg; use test_raftstore::*; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; @@ -1706,3 +1708,148 @@ fn test_destroy_source_peer_while_merging() { must_get_equal(&cluster.get_engine(i), b"k5", b"v5"); } } + +struct MsgTimeoutFilter { + tx: Sender, +} + +impl Filter for MsgTimeoutFilter { + fn before(&self, msgs: &mut Vec) -> raftstore::Result<()> { + let mut res = Vec::with_capacity(msgs.len()); + for m in msgs.drain(..) { + if m.get_message().msg_type == MessageType::MsgTimeoutNow { + self.tx.send(m).unwrap(); + } else { + res.push(m); + } + } + + *msgs = res; + check_messages(msgs) + } +} + +// Concurrent execution between transfer leader and merge can cause rollback and +// commit merge at the same time before this fix which corrupt the region. +// It can happen as this: +// Assume at the begin, leader of source and target are both on node-1 +// 1. node-1 transfer leader to node-2: execute up to sending MsgTimeoutNow +// (leader_transferre has been set), but before becoming follower. +// 2. node-1 source region propose, and apply PrepareMerge +// 3. node-1 target region propose CommitMerge but fail (due to +// leader_transferre being set) +// 4. node-1 source region successfully proposed rollback merge +// 5. node-2 target region became leader and apply the first no-op entry +// 6. node-2 target region successfully proposed commit merge +// Now, rollback at source region and commit at target region are both proposed +// and will be executed which will cause region corrupt +#[test] +fn test_concurrent_between_transfer_leader_and_merge() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.run(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + for i in 0..3 { + must_get_equal(&cluster.get_engine(i + 1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); + } + + let pd_client = Arc::clone(&cluster.pd_client); + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + + let right = pd_client.get_region(b"k1").unwrap(); + let left = pd_client.get_region(b"k3").unwrap(); + cluster.must_transfer_leader( + left.get_id(), + left.get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + + cluster.must_transfer_leader( + right.get_id(), + right + .get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + + // Source region: 1, Target Region: 1000 + // Let target region in leader_transfering status by interceptting MsgTimeoutNow + // msg by using Filter. So we make node-1-1000 be in leader_transferring status + // for some time. + let (tx, rx_msg) = channel(); + let filter = MsgTimeoutFilter { tx }; + cluster.add_send_filter_on_node(1, Box::new(filter)); + + pd_client.transfer_leader( + right.get_id(), + right + .get_peers() + .iter() + .find(|p| p.store_id == 2) + .cloned() + .unwrap(), + vec![], + ); + + let msg = rx_msg.recv().unwrap(); + + // Now, node-1-1000 is in leader_transferring status. After it reject proposing + // commit merge, make node-1-1 block before proposing rollback merge until + // node-2-1000 propose commit merge. + + fail::cfg("on_reject_commit_merge_1", "pause").unwrap(); + + let router = cluster.get_router(2).unwrap(); + let (tx, rx) = channel(); + let _ = fail::cfg_callback("propose_commit_merge_1", move || { + tx.send(()).unwrap(); + }); + + let (tx2, rx2) = channel(); + let _ = fail::cfg_callback("on_propose_commit_merge_success", move || { + tx2.send(()).unwrap(); + }); + + cluster.merge_region(left.get_id(), right.get_id(), Callback::None); + + // Actually, store 1 should not reach the line of propose_commit_merge_1 + let _ = rx.recv_timeout(Duration::from_secs(2)); + router + .force_send(msg.get_region_id(), PeerMsg::RaftMessage(Box::new(msg))) + .unwrap(); + + // Wait region 1 of node 2 to become leader + rx2.recv().unwrap(); + fail::remove("on_reject_commit_merge_1"); + + let timer = Instant::now(); + loop { + if right.get_region_epoch().get_version() + == cluster.get_region_epoch(right.get_id()).get_version() + { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("region {:?} is still not merged.", right); + } + } else { + break; + } + sleep_ms(10); + } + + let region = pd_client.get_region(b"k1").unwrap(); + assert_eq!(region.get_id(), right.get_id()); + assert_eq!(region.get_start_key(), right.get_start_key()); + assert_eq!(region.get_end_key(), left.get_end_key()); + + cluster.must_put(b"k4", b"v4"); +} From ec4a9002f153f86c609e902ba685eee7a1224e6c Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 20 Sep 2023 11:51:13 +0800 Subject: [PATCH 0923/1149] raftstore: upgrade tokio timer to fix insecure issues. (#15622) ref tikv/tikv#15621 Signed-off-by: lucasliang --- Cargo.lock | 42 ++++++++++++--------------------- Cargo.toml | 1 + components/tikv_util/Cargo.toml | 2 +- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb5e711d34d..34f9c381958 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1290,7 +1290,7 @@ dependencies = [ "crossbeam-deque", "crossbeam-epoch", "crossbeam-queue", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1300,7 +1300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1311,7 +1311,7 @@ checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1322,7 +1322,7 @@ checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" dependencies = [ "autocfg", "cfg-if 1.0.0", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "lazy_static", "memoffset 0.6.4", "scopeguard", @@ -1335,7 +1335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f25d8400f4a7a5778f0e4e52384a48cbd9b5c495d110786187fc750075277a2" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.8", + "crossbeam-utils", ] [[package]] @@ -1346,21 +1346,10 @@ checksum = "883a5821d7d079fcf34ac55f27a833ee61678110f6b97637cc74513c0d0b42fc" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "scopeguard", ] -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg", - "cfg-if 0.1.10", - "lazy_static", -] - [[package]] name = "crossbeam-utils" version = "0.8.8" @@ -1989,7 +1978,7 @@ dependencies = [ "bcc", "collections", "crc32fast", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "fs2", "lazy_static", "libc 0.2.146", @@ -4839,7 +4828,7 @@ checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" dependencies = [ "crossbeam-channel", "crossbeam-deque", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "num_cpus", ] @@ -7136,11 +7125,10 @@ dependencies = [ [[package]] name = "tokio-executor" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb2d1b8f4548dbf5e1f7818512e9c406860678f29c300cdf0ebac72d1a3a1671" +version = "0.1.9" +source = "git+https://github.com/tikv/tokio?branch=tokio-timer-hotfix#4394380fa3c1f7f2c702a4ccc5ff01384746fdfd" dependencies = [ - "crossbeam-utils 0.7.2", + "crossbeam-utils", "futures 0.1.31", ] @@ -7201,9 +7189,9 @@ dependencies = [ [[package]] name = "tokio-timer" version = "0.2.13" -source = "git+https://github.com/tikv/tokio?branch=tokio-timer-hotfix#e8ac149d93f4a9bf49ea569d8d313ee40c5eb448" +source = "git+https://github.com/tikv/tokio?branch=tokio-timer-hotfix#4394380fa3c1f7f2c702a4ccc5ff01384746fdfd" dependencies = [ - "crossbeam-utils 0.7.2", + "crossbeam-utils", "futures 0.1.31", "slab", "tokio-executor", @@ -7377,7 +7365,7 @@ name = "tracker" version = "0.0.1" dependencies = [ "collections", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "kvproto", "lazy_static", "parking_lot 0.12.1", @@ -7907,7 +7895,7 @@ source = "git+https://github.com/tikv/yatp.git?branch=master#5572a78702572087cab dependencies = [ "crossbeam-deque", "crossbeam-skiplist", - "crossbeam-utils 0.8.8", + "crossbeam-utils", "dashmap", "fail", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 5bc49b17e42..c4c70e999be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -368,6 +368,7 @@ tipb = { git = "https://github.com/pingcap/tipb.git" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } +tokio-executor = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 6de354fa259..b502a701136 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -57,7 +57,7 @@ thiserror = "1.0" tikv_alloc = { workspace = true } time = "0.1" tokio = { version = "1.5", features = ["rt-multi-thread"] } -tokio-executor = "0.1" +tokio-executor = { workspace = true } tokio-timer = { workspace = true } tracker = { workspace = true } url = "2" From 76df17e2c67e139a79653293b566d604a94a0352 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Wed, 20 Sep 2023 14:22:43 +0800 Subject: [PATCH 0924/1149] log backup: fix the race of on events and do flush (#15618) close tikv/tikv#15602 Signed-off-by: 3pointer --- components/backup-stream/src/router.rs | 132 ++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 16 deletions(-) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 6ce8486109f..b2fd9acc743 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -708,22 +708,25 @@ impl TempFileKey { /// The full name of the file owns the key. fn temp_file_name(&self) -> String { + let timestamp = (|| { + fail::fail_point!("temp_file_name_timestamp", |t| t.map_or_else( + || TimeStamp::physical_now(), + |v| + // reduce the precision of timestamp + v.parse::().ok().map_or(0, |u| TimeStamp::physical_now() / u) + )); + TimeStamp::physical_now() + })(); + let uuid = uuid::Uuid::new_v4(); if self.is_meta { format!( - "meta_{:08}_{}_{:?}_{}.temp.log", - self.region_id, - self.cf, - self.cmd_type, - TimeStamp::physical_now(), + "meta_{:08}_{}_{:?}_{:?}_{}.temp.log", + self.region_id, self.cf, self.cmd_type, uuid, timestamp, ) } else { format!( - "{:08}_{:08}_{}_{:?}_{}.temp.log", - self.table_id, - self.region_id, - self.cf, - self.cmd_type, - TimeStamp::physical_now(), + "{:08}_{:08}_{}_{:?}_{:?}_{}.temp.log", + self.table_id, self.region_id, self.cf, self.cmd_type, uuid, timestamp, ) } } @@ -864,6 +867,7 @@ impl StreamTaskInfo { } async fn on_events_of_key(&self, key: TempFileKey, events: ApplyEvents) -> Result<()> { + fail::fail_point!("before_generate_temp_file"); if let Some(f) = self.files.read().await.get(&key) { self.total_size .fetch_add(f.lock().await.on_events(events).await?, Ordering::SeqCst); @@ -886,6 +890,7 @@ impl StreamTaskInfo { let f = w.get(&key).unwrap(); self.total_size .fetch_add(f.lock().await.on_events(events).await?, Ordering::SeqCst); + fail::fail_point!("after_write_to_file"); Ok(()) } @@ -970,7 +975,9 @@ impl StreamTaskInfo { pub async fn move_to_flushing_files(&self) -> Result<&Self> { // if flushing_files is not empty, which represents this flush is a retry // operation. - if !self.flushing_files.read().await.is_empty() { + if !self.flushing_files.read().await.is_empty() + || !self.flushing_meta_files.read().await.is_empty() + { return Ok(self); } @@ -1032,7 +1039,12 @@ impl StreamTaskInfo { // and push it into merged_file_info(DataFileGroup). file_info_clone.set_range_offset(stat_length); data_files_open.push({ - let file = shared_pool.open_raw_for_read(data_file.inner.path())?; + let file = shared_pool + .open_raw_for_read(data_file.inner.path()) + .context(format_args!( + "failed to open read file {:?}", + data_file.inner.path() + ))?; let compress_length = file.len().await?; stat_length += compress_length; file_info_clone.set_range_length(compress_length); @@ -1097,7 +1109,6 @@ impl StreamTaskInfo { .await?; self.merge_log(metadata, storage.clone(), &self.flushing_meta_files, true) .await?; - Ok(()) } @@ -1157,7 +1168,8 @@ impl StreamTaskInfo { UnpinReader(Box::new(Cursor::new(meta_buff))), buflen as _, ) - .await?; + .await + .context(format_args!("flush meta {:?}", meta_path))?; } Ok(()) } @@ -1191,13 +1203,14 @@ impl StreamTaskInfo { .await? .generate_metadata(store_id) .await?; + + fail::fail_point!("after_moving_to_flushing_files"); crate::metrics::FLUSH_DURATION .with_label_values(&["generate_metadata"]) .observe(sw.lap().as_secs_f64()); // flush log file to storage. self.flush_log(&mut metadata_info).await?; - // the field `min_resolved_ts` of metadata will be updated // only after flush is done. metadata_info.min_resolved_ts = metadata_info @@ -2413,4 +2426,91 @@ mod tests { let r = cfg_manager.dispatch(changed); assert!(r.is_err()); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn test_flush_on_events_race() -> Result<()> { + let (tx, _rx) = dummy_scheduler(); + let tmp = std::env::temp_dir().join(format!("{}", uuid::Uuid::new_v4())); + let router = Arc::new(RouterInner::new( + tx, + Config { + prefix: tmp.clone(), + // disable auto flush. + temp_file_size_limit: 1000, + temp_file_memory_quota: 2, + max_flush_interval: Duration::from_secs(300), + }, + )); + + let (task, _path) = task("race".to_owned()).await?; + must_register_table(router.as_ref(), task, 1).await; + router + .must_mut_task_info("race", |i| { + i.storage = Arc::new(NoopStorage::default()); + }) + .await; + let mut b = KvEventsBuilder::new(42, 0); + b.put_table(CF_DEFAULT, 1, b"k1", b"v1"); + let events_before_flush = b.finish(); + + b.put_table(CF_DEFAULT, 1, b"k1", b"v1"); + let events_after_flush = b.finish(); + + // make timestamp precision to 1 seconds. + fail::cfg("temp_file_name_timestamp", "return(1000)").unwrap(); + + let (trigger_tx, trigger_rx) = std::sync::mpsc::sync_channel(0); + let trigger_rx = std::sync::Mutex::new(trigger_rx); + + let (fp_tx, fp_rx) = std::sync::mpsc::sync_channel(0); + let fp_rx = std::sync::Mutex::new(fp_rx); + + let t = router.get_task_info("race").await.unwrap(); + let _ = router.on_events(events_before_flush).await; + + // make generate temp files ***happen after*** moving files to flushing_files + // and read flush file ***happen between*** genenrate file name and + // write kv to file. T1 is write thread. T2 is flush thread + // The order likes + // [T1] generate file name -> [T2] moving files to flushing_files -> [T1] write + // kv to file -> [T2] read flush file. + fail::cfg_callback("after_write_to_file", move || { + fp_tx.send(()).unwrap(); + }) + .unwrap(); + + fail::cfg_callback("before_generate_temp_file", move || { + trigger_rx.lock().unwrap().recv().unwrap(); + }) + .unwrap(); + + fail::cfg_callback("after_moving_to_flushing_files", move || { + trigger_tx.send(()).unwrap(); + fp_rx.lock().unwrap().recv().unwrap(); + }) + .unwrap(); + + // set flush status to true, because we disabled the auto flush. + t.set_flushing_status(true); + let router_clone = router.clone(); + let _ = tokio::join!( + // do flush in another thread + tokio::spawn(async move { + router_clone.do_flush("race", 42, TimeStamp::max()).await; + }), + router.on_events(events_after_flush) + ); + fail::remove("after_write_to_file"); + fail::remove("before_generate_temp_file"); + fail::remove("after_moving_to_flushing_files"); + fail::remove("temp_file_name_timestamp"); + + // set flush status to true, because we disabled the auto flush. + t.set_flushing_status(true); + let res = router.do_flush("race", 42, TimeStamp::max()).await; + // this time flush should success. + assert!(res.is_some()); + assert_eq!(t.files.read().await.len(), 0,); + Ok(()) + } } From 641f9b8dab1d8770ef5fded564490f8dbc094b74 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 20 Sep 2023 14:42:13 +0800 Subject: [PATCH 0925/1149] metrics: make disk usage clearer in the grafana (#15583) close tikv/tikv#15582 add metrics for detail disk usage. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/metrics.rs | 24 +++++++++++-- .../src/operation/command/write/ingest.rs | 3 ++ .../raftstore-v2/src/worker/pd/store.rs | 30 ++++++++-------- components/raftstore/src/store/fsm/store.rs | 4 ++- components/raftstore/src/store/worker/pd.rs | 36 +++++++++---------- components/sst_importer/src/sst_importer.rs | 19 +++++++++- metrics/grafana/tikv_details.json | 29 ++++++++++++++- 7 files changed, 106 insertions(+), 39 deletions(-) diff --git a/components/pd_client/src/metrics.rs b/components/pd_client/src/metrics.rs index 4e185658f15..7e7121170d6 100644 --- a/components/pd_client/src/metrics.rs +++ b/components/pd_client/src/metrics.rs @@ -48,6 +48,20 @@ make_static_metric! { try_connect, } + pub label_enum StoreSizeEventType { + capacity, + available, + used, + snap_size, + raft_size, + kv_size, + import_size, + } + + pub struct StoreSizeEventIntrVec: IntGauge { + "type" => StoreSizeEventType, + } + pub struct PDRequestEventHistogramVec: Histogram { "type" => PDRequestEventType, } @@ -101,8 +115,14 @@ lazy_static! { &["type"] ) .unwrap(); - pub static ref STORE_SIZE_GAUGE_VEC: IntGaugeVec = - register_int_gauge_vec!("tikv_store_size_bytes", "Size of storage.", &["type"]).unwrap(); + pub static ref STORE_SIZE_EVENT_INT_VEC: StoreSizeEventIntrVec = + register_static_int_gauge_vec!( + StoreSizeEventIntrVec, + "tikv_store_size_bytes", + "Size of storage.", + &["type"] + ) + .unwrap(); pub static ref REGION_READ_KEYS_HISTOGRAM: Histogram = register_histogram!( "tikv_region_read_keys", "Histogram of keys written for regions", diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 92f5923d167..e963434fe83 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -4,6 +4,7 @@ use collections::HashMap; use crossbeam::channel::TrySendError; use engine_traits::{data_cf_offset, KvEngine, RaftEngine, DATA_CFS_LEN}; use kvproto::import_sstpb::SstMeta; +use pd_client::metrics::STORE_SIZE_EVENT_INT_VEC; use raftstore::{ store::{check_sst_for_ingestion, metrics::PEER_WRITE_CMD_COUNTER, util}, Result, @@ -39,6 +40,8 @@ impl Store { &mut self, ctx: &mut StoreContext, ) -> Result<()> { + let import_size = box_try!(ctx.sst_importer.get_total_size()); + STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); if ssts.is_empty() { return Ok(()); diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index a5aad42d85c..b3fd3245be6 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -9,7 +9,7 @@ use kvproto::pdpb; use pd_client::{ metrics::{ REGION_READ_BYTES_HISTOGRAM, REGION_READ_KEYS_HISTOGRAM, REGION_WRITTEN_BYTES_HISTOGRAM, - REGION_WRITTEN_KEYS_HISTOGRAM, STORE_SIZE_GAUGE_VEC, + REGION_WRITTEN_KEYS_HISTOGRAM, STORE_SIZE_EVENT_INT_VEC, }, PdClient, }; @@ -263,15 +263,9 @@ where self.store_stat.region_bytes_read.flush(); self.store_stat.region_keys_read.flush(); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["capacity"]) - .set(capacity as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["available"]) - .set(available as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["used"]) - .set(used_size as i64); + STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); + STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); + STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); // Update slowness statistics self.update_slowness_in_store_stats(&mut stats, last_query_sum); @@ -473,12 +467,16 @@ where true }); let snap_size = self.snap_mgr.total_snap_size().unwrap(); - let used_size = snap_size - + kv_size - + self - .raft_engine - .get_engine_size() - .expect("raft engine used size"); + let raft_size = self + .raft_engine + .get_engine_size() + .expect("engine used size"); + + STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); + STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); + STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_size as i64); + + let used_size = snap_size + kv_size + raft_size; let mut available = capacity.checked_sub(used_size).unwrap_or_default(); // We only care about rocksdb SST file size, so we should check disk available // here. diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 53559bbe1b8..2434dfdd8e6 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -43,7 +43,7 @@ use kvproto::{ raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{Feature, FeatureGate, PdClient}; +use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -2791,6 +2791,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); let mut validate_ssts = Vec::new(); + let import_size = box_try!(self.ctx.importer.get_total_size()); + STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index cb067ca840b..6aa192bd28e 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1359,15 +1359,9 @@ where self.store_stat.region_bytes_read.flush(); self.store_stat.region_keys_read.flush(); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["capacity"]) - .set(capacity as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["available"]) - .set(available as i64); - STORE_SIZE_GAUGE_VEC - .with_label_values(&["used"]) - .set(used_size as i64); + STORE_SIZE_EVENT_INT_VEC.capacity.set(capacity as i64); + STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); + STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); let slow_score = self.slow_score.get(); stats.set_slow_score(slow_score as u64); @@ -2590,15 +2584,21 @@ fn collect_engine_size( } else { store_info.capacity }; - let used_size = snap_mgr_size - + store_info - .kv_engine - .get_engine_used_size() - .expect("kv engine used size") - + store_info - .raft_engine - .get_engine_size() - .expect("raft engine used size"); + let raft_size = store_info + .raft_engine + .get_engine_size() + .expect("raft engine used size"); + + let kv_size = store_info + .kv_engine + .get_engine_used_size() + .expect("kv engine used size"); + + STORE_SIZE_EVENT_INT_VEC.raft_size.set(raft_size as i64); + STORE_SIZE_EVENT_INT_VEC.snap_size.set(snap_mgr_size as i64); + STORE_SIZE_EVENT_INT_VEC.kv_size.set(kv_size as i64); + + let used_size = snap_mgr_size + kv_size + raft_size; let mut available = capacity.checked_sub(used_size).unwrap_or_default(); // We only care about rocksdb SST file size, so we should check disk available // here. diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 502a81ff6a6..910cfa602dd 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -4,7 +4,7 @@ use std::{ borrow::Cow, collections::HashMap, fs::File, - io::{self, BufReader, Read}, + io::{self, BufReader, ErrorKind, Read}, ops::Bound, path::{Path, PathBuf}, sync::{ @@ -293,6 +293,23 @@ impl SstImporter { path.save } + pub fn get_total_size(&self) -> Result { + let mut total_size = 0; + for entry in file_system::read_dir(self.dir.get_root_dir())? { + match entry.and_then(|e| e.metadata().map(|m| (e, m))) { + Ok((_, m)) => { + if !m.is_file() { + continue; + } + total_size += m.len(); + } + Err(e) if e.kind() == ErrorKind::NotFound => continue, + Err(e) => return Err(Error::from(e)), + }; + } + Ok(total_size) + } + pub fn create(&self, meta: &SstMeta) -> Result { match self.dir.create(meta, self.key_manager.clone()) { Ok(f) => { diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c31ee12b27b..57c88782031 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -422,9 +422,36 @@ "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"used\"}) by (instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-used", "refId": "A", "step": 10 + }, + { + "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"kv_size\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-kv_size", + "refId": "B", + "step": 10, + "hide": true + }, + { + "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"raft_size\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-raft_size", + "refId": "C", + "step": 10, + "hide": true + }, + { + "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"import_size\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}-import_size", + "refId": "D", + "step": 10, + "hide": true } ], "thresholds": [], From 10f51d8478e488dcef026b4d2e7fdeea80f478eb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 20 Sep 2023 14:55:43 +0800 Subject: [PATCH 0926/1149] resolved_ts: remove hash set to save memory (#15554) close tikv/tikv#15553 The Resolver uses a hash set to keep track of locks associated with the same timestamp. When the length of the hash set reaches zero, it indicates that the transaction has been fully committed. To save memory, we can replace the hash set with an integer. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../backup-stream/src/subscription_track.rs | 26 ++- components/cdc/src/initializer.rs | 14 +- components/resolved_ts/src/endpoint.rs | 38 ++-- components/resolved_ts/src/resolver.rs | 210 ++++++++++++------ 4 files changed, 181 insertions(+), 107 deletions(-) diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 0803ba1b99a..2dae8ce745d 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -8,7 +8,7 @@ use dashmap::{ }; use kvproto::metapb::Region; use raftstore::coprocessor::*; -use resolved_ts::{Resolver, TsSource}; +use resolved_ts::{Resolver, TsSource, TxnLocks}; use tikv_util::{info, memory::MemoryQuota, warn}; use txn_types::TimeStamp; @@ -99,7 +99,7 @@ impl ActiveSubscription { pub enum CheckpointType { MinTs, StartTsOfInitialScan, - StartTsOfTxn(Option>), + StartTsOfTxn(Option<(TimeStamp, TxnLocks)>), } impl std::fmt::Debug for CheckpointType { @@ -109,10 +109,7 @@ impl std::fmt::Debug for CheckpointType { Self::StartTsOfInitialScan => write!(f, "StartTsOfInitialScan"), Self::StartTsOfTxn(arg0) => f .debug_tuple("StartTsOfTxn") - .field(&format_args!( - "{}", - utils::redact(&arg0.as_ref().map(|x| x.as_ref()).unwrap_or(&[])) - )) + .field(&format_args!("{:?}", arg0)) .finish(), } } @@ -466,9 +463,11 @@ impl std::fmt::Debug for FutureLock { impl TwoPhaseResolver { /// try to get one of the key of the oldest lock in the resolver. - pub fn sample_far_lock(&self) -> Option> { - let (_, keys) = self.resolver.locks().first_key_value()?; - keys.iter().next().cloned() + pub fn sample_far_lock(&self) -> Option<(TimeStamp, TxnLocks)> { + self.resolver + .locks() + .first_key_value() + .map(|(ts, txn_locks)| (*ts, txn_locks.clone())) } pub fn in_phase_one(&self) -> bool { @@ -572,6 +571,7 @@ mod test { use kvproto::metapb::{Region, RegionEpoch}; use raftstore::coprocessor::ObserveHandle; + use resolved_ts::TxnLocks; use txn_types::TimeStamp; use super::{SubscriptionTracer, TwoPhaseResolver}; @@ -674,7 +674,13 @@ mod test { ( region(4, 8, 1), 128.into(), - StartTsOfTxn(Some(Arc::from(b"Alpi".as_slice()))) + StartTsOfTxn(Some(( + TimeStamp::new(128), + TxnLocks { + lock_count: 1, + sample_lock: Some(Arc::from(b"Alpi".as_slice())), + } + ))) ), ] ); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 31cda4b9e72..504eab621ff 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -575,7 +575,6 @@ mod tests { time::Duration, }; - use collections::HashSet; use engine_rocks::RocksEngine; use engine_traits::{MiscExt, CF_WRITE}; use futures::{executor::block_on, StreamExt}; @@ -584,6 +583,7 @@ mod tests { errorpb::Error as ErrorHeader, }; use raftstore::{coprocessor::ObserveHandle, router::CdcRaftRouter, store::RegionSnapshot}; + use resolved_ts::TxnLocks; use test_raftstore::MockRaftStoreRouter; use tikv::storage::{ kv::Engine, @@ -681,7 +681,7 @@ mod tests { fn test_initializer_build_resolver() { let mut engine = TestEngineBuilder::new().build_without_cache().unwrap(); - let mut expected_locks = BTreeMap::>>::new(); + let mut expected_locks = BTreeMap::::new(); // Only observe ["", "b\0x90"] let observed_range = ObservedRange::new( @@ -704,10 +704,12 @@ mod tests { total_bytes += v.len(); let ts = TimeStamp::new(i as _); must_prewrite_put(&mut engine, k, v, k, ts); - expected_locks - .entry(ts) - .or_default() - .insert(k.to_vec().into()); + let txn_locks = expected_locks.entry(ts).or_insert_with(|| { + let mut txn_locks = TxnLocks::default(); + txn_locks.sample_lock = Some(k.to_vec().into()); + txn_locks + }); + txn_locks.lock_count += 1; } let region = Region::default(); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 600da207ec4..406d931ed7f 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -42,7 +42,7 @@ use crate::{ metrics::*, resolver::{LastAttempt, Resolver}, scanner::{ScanEntries, ScanTask, ScannerPool}, - Error, Result, TsSource, ON_DROP_WARN_HEAP_SIZE, + Error, Result, TsSource, TxnLocks, ON_DROP_WARN_HEAP_SIZE, }; /// grace period for identifying identifying slow resolved-ts and safe-ts. @@ -388,11 +388,11 @@ where E: KvEngine, S: StoreRegionMeta, { - fn is_leader(&self, store_id: Option, leader_store_id: Option) -> bool { - store_id.is_some() && store_id == leader_store_id - } - fn collect_stats(&mut self) -> Stats { + fn is_leader(store_id: Option, leader_store_id: Option) -> bool { + store_id.is_some() && store_id == leader_store_id + } + let store_id = self.get_or_init_store_id(); let mut stats = Stats::default(); self.region_read_progress.with(|registry| { @@ -407,10 +407,10 @@ where continue; } - if self.is_leader(store_id, leader_store_id) { + if is_leader(store_id, leader_store_id) { // leader resolved-ts if resolved_ts < stats.min_leader_resolved_ts.resolved_ts { - let resolver = self.regions.get(region_id).map(|x| &x.resolver); + let resolver = self.regions.get_mut(region_id).map(|x| &mut x.resolver); stats .min_leader_resolved_ts .set(*region_id, resolver, &core, &leader_info); @@ -1186,7 +1186,7 @@ struct LeaderStats { last_resolve_attempt: Option, applied_index: u64, // min lock in LOCK CF - min_lock: Option<(TimeStamp, Key)>, + min_lock: Option<(TimeStamp, TxnLocks)>, lock_num: Option, txn_num: Option, } @@ -1211,7 +1211,7 @@ impl LeaderStats { fn set( &mut self, region_id: u64, - resolver: Option<&Resolver>, + mut resolver: Option<&mut Resolver>, region_read_progress: &MutexGuard<'_, RegionReadProgressCore>, leader_info: &LeaderInfo, ) { @@ -1222,21 +1222,13 @@ impl LeaderStats { duration_to_last_update_ms: region_read_progress .last_instant_of_update_ts() .map(|i| i.saturating_elapsed().as_millis() as u64), - last_resolve_attempt: resolver.and_then(|r| r.last_attempt.clone()), - min_lock: resolver.and_then(|r| { - r.oldest_transaction().map(|(ts, keys)| { - ( - *ts, - keys.iter() - .next() - .map(|k| Key::from_encoded_slice(k.as_ref())) - .unwrap_or_else(|| Key::from_encoded_slice("no_keys_found".as_ref())), - ) - }) - }), + last_resolve_attempt: resolver.as_mut().and_then(|r| r.take_last_attempt()), + min_lock: resolver + .as_ref() + .and_then(|r| r.oldest_transaction().map(|(t, tk)| (*t, tk.clone()))), applied_index: region_read_progress.applied_index(), - lock_num: resolver.map(|r| r.num_locks()), - txn_num: resolver.map(|r| r.num_transactions()), + lock_num: resolver.as_ref().map(|r| r.num_locks()), + txn_num: resolver.as_ref().map(|r| r.num_transactions()), }; } } diff --git a/components/resolved_ts/src/resolver.rs b/components/resolved_ts/src/resolver.rs index 85e7acff4a4..239ef566605 100644 --- a/components/resolved_ts/src/resolver.rs +++ b/components/resolved_ts/src/resolver.rs @@ -2,7 +2,7 @@ use std::{cmp, collections::BTreeMap, sync::Arc, time::Duration}; -use collections::{HashMap, HashSet}; +use collections::{HashMap, HashMapEntry}; use raftstore::store::RegionReadProgress; use tikv_util::{ memory::{HeapSize, MemoryQuota, MemoryQuotaExceeded}, @@ -12,13 +12,12 @@ use txn_types::{Key, TimeStamp}; use crate::metrics::*; -const MAX_NUMBER_OF_LOCKS_IN_LOG: usize = 10; pub const ON_DROP_WARN_HEAP_SIZE: usize = 64 * 1024 * 1024; // 64MB #[derive(Clone)] pub enum TsSource { // A lock in LOCK CF - Lock(Arc<[u8]>), + Lock(TxnLocks), // A memory lock in concurrency manager MemoryLock(Key), PdTso, @@ -41,13 +40,38 @@ impl TsSource { pub fn key(&self) -> Option { match self { - TsSource::Lock(k) => Some(Key::from_encoded_slice(k)), + TsSource::Lock(locks) => locks + .sample_lock + .as_ref() + .map(|k| Key::from_encoded_slice(k)), TsSource::MemoryLock(k) => Some(k.clone()), _ => None, } } } +#[derive(Default, Clone, PartialEq, Eq)] +pub struct TxnLocks { + pub lock_count: usize, + // A sample key in a transaction. + pub sample_lock: Option>, +} + +impl std::fmt::Debug for TxnLocks { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TxnLocks") + .field("lock_count", &self.lock_count) + .field( + "sample_lock", + &self + .sample_lock + .as_ref() + .map(|k| log_wrappers::Value::key(k)), + ) + .finish() + } +} + // Resolver resolves timestamps that guarantee no more commit will happen before // the timestamp. pub struct Resolver { @@ -55,7 +79,7 @@ pub struct Resolver { // key -> start_ts locks_by_key: HashMap, TimeStamp>, // start_ts -> locked keys. - pub(crate) lock_ts_heap: BTreeMap>>, + lock_ts_heap: BTreeMap, // The last shrink time. last_aggressive_shrink_time: Instant, // The timestamps that guarantees no more commit will happen before. @@ -71,7 +95,7 @@ pub struct Resolver { // The memory quota for the `Resolver` and its lock keys and timestamps. memory_quota: Arc, // The last attempt of resolve(), used for diagnosis. - pub(crate) last_attempt: Option, + last_attempt: Option, } #[derive(Clone)] @@ -107,13 +131,14 @@ impl std::fmt::Debug for Resolver { let mut dt = f.debug_tuple("Resolver"); dt.field(&format_args!("region={}", self.region_id)); - if let Some((ts, keys)) = far_lock { + if let Some((ts, txn_locks)) = far_lock { + dt.field(&format_args!( + "oldest_lock_count={:?}", + txn_locks.lock_count + )); dt.field(&format_args!( - "oldest_lock={:?}", - keys.iter() - // We must use Display format here or the redact won't take effect. - .map(|k| format!("{}", log_wrappers::Value::key(k))) - .collect::>() + "oldest_lock_sample={:?}", + txn_locks.sample_lock )); dt.field(&format_args!("oldest_lock_ts={:?}", ts)); } @@ -180,7 +205,7 @@ impl Resolver { self.stopped } - pub fn locks(&self) -> &BTreeMap>> { + pub fn locks(&self) -> &BTreeMap { &self.lock_ts_heap } @@ -219,13 +244,13 @@ impl Resolver { } self.locks_by_key.len() * (key_bytes / key_count + std::mem::size_of::()) + self.lock_ts_heap.len() - * (std::mem::size_of::() + std::mem::size_of::>>()) + * (std::mem::size_of::() + std::mem::size_of::()) } fn lock_heap_size(&self, key: &[u8]) -> usize { // A resolver has // * locks_by_key: HashMap, TimeStamp> - // * lock_ts_heap: BTreeMap>> + // * lock_ts_heap: BTreeMap // // We only count memory used by locks_by_key. Because the majority of // memory is consumed by keys, locks_by_key and lock_ts_heap shares @@ -235,7 +260,7 @@ impl Resolver { key.heap_size() + std::mem::size_of::() } - fn shrink_ratio(&mut self, ratio: usize, timestamp: Option) { + fn shrink_ratio(&mut self, ratio: usize) { // HashMap load factor is 87% approximately, leave some margin to avoid // frequent rehash. // @@ -246,10 +271,6 @@ impl Resolver { { self.locks_by_key.shrink_to_fit(); } - if let Some(ts) = timestamp && let Some(lock_set) = self.lock_ts_heap.get_mut(&ts) - && lock_set.capacity() > lock_set.len() * cmp::max(MIN_SHRINK_RATIO, ratio) { - lock_set.shrink_to_fit(); - } } pub fn track_lock( @@ -273,8 +294,23 @@ impl Resolver { ); self.memory_quota.alloc(bytes)?; let key: Arc<[u8]> = key.into_boxed_slice().into(); - self.locks_by_key.insert(key.clone(), start_ts); - self.lock_ts_heap.entry(start_ts).or_default().insert(key); + match self.locks_by_key.entry(key) { + HashMapEntry::Occupied(_) => { + // Free memory quota because it's already in the map. + self.memory_quota.free(bytes); + } + HashMapEntry::Vacant(entry) => { + // Add lock count for the start ts. + let txn_locks = self.lock_ts_heap.entry(start_ts).or_insert_with(|| { + let mut txn_locks = TxnLocks::default(); + txn_locks.sample_lock = Some(entry.key().clone()); + txn_locks + }); + txn_locks.lock_count += 1; + + entry.insert(start_ts); + } + } Ok(()) } @@ -301,22 +337,17 @@ impl Resolver { "memory_in_use" => self.memory_quota.in_use(), ); - let mut shrink_ts = None; - if let Some(locked_keys) = self.lock_ts_heap.get_mut(&start_ts) { - // Only shrink large set, because committing a small transaction is - // fast and shrink adds unnecessary overhead. - const SHRINK_SET_CAPACITY: usize = 256; - if locked_keys.capacity() > SHRINK_SET_CAPACITY { - shrink_ts = Some(start_ts); + if let Some(txn_locks) = self.lock_ts_heap.get_mut(&start_ts) { + if txn_locks.lock_count > 0 { + txn_locks.lock_count -= 1; } - locked_keys.remove(key); - if locked_keys.is_empty() { + if txn_locks.lock_count == 0 { self.lock_ts_heap.remove(&start_ts); } - } + }; // Use a large ratio to amortize the cost of rehash. let shrink_ratio = 8; - self.shrink_ratio(shrink_ratio, shrink_ts); + self.shrink_ratio(shrink_ratio); } /// Try to advance resolved ts. @@ -333,7 +364,7 @@ impl Resolver { const AGGRESSIVE_SHRINK_RATIO: usize = 2; const AGGRESSIVE_SHRINK_INTERVAL: Duration = Duration::from_secs(10); if self.last_aggressive_shrink_time.saturating_elapsed() > AGGRESSIVE_SHRINK_INTERVAL { - self.shrink_ratio(AGGRESSIVE_SHRINK_RATIO, None); + self.shrink_ratio(AGGRESSIVE_SHRINK_RATIO); self.last_aggressive_shrink_time = Instant::now_coarse(); } @@ -344,17 +375,17 @@ impl Resolver { } // Find the min start ts. - let min_lock = self - .oldest_transaction() - .and_then(|(ts, locks)| locks.iter().next().map(|lock| (*ts, lock))); + let min_lock = self.oldest_transaction(); let has_lock = min_lock.is_some(); - let min_start_ts = min_lock.map(|(ts, _)| ts).unwrap_or(min_ts); + let min_start_ts = min_lock.as_ref().map(|(ts, _)| **ts).unwrap_or(min_ts); // No more commit happens before the ts. let new_resolved_ts = cmp::min(min_start_ts, min_ts); // reason is the min source of the new resolved ts. let reason = match (min_lock, min_ts) { - (Some(lock), min_ts) if lock.0 < min_ts => TsSource::Lock(lock.1.clone()), + (Some((lock_ts, txn_locks)), min_ts) if *lock_ts < min_ts => { + TsSource::Lock(txn_locks.clone()) + } (Some(_), _) => source, (None, _) => source, }; @@ -400,21 +431,16 @@ impl Resolver { pub(crate) fn log_locks(&self, min_start_ts: u64) { // log lock with the minimum start_ts >= min_start_ts - if let Some((start_ts, keys)) = self + if let Some((start_ts, txn_locks)) = self .lock_ts_heap .range(TimeStamp::new(min_start_ts)..) .next() { - let keys_for_log = keys - .iter() - .map(|key| log_wrappers::Value::key(key)) - .take(MAX_NUMBER_OF_LOCKS_IN_LOG) - .collect::>(); info!( "locks with the minimum start_ts in resolver"; "region_id" => self.region_id, "start_ts" => start_ts, - "sampled_keys" => ?keys_for_log, + "txn_locks" => ?txn_locks, ); } } @@ -431,9 +457,13 @@ impl Resolver { self.read_progress.as_ref() } - pub(crate) fn oldest_transaction(&self) -> Option<(&TimeStamp, &HashSet>)> { + pub(crate) fn oldest_transaction(&self) -> Option<(&TimeStamp, &TxnLocks)> { self.lock_ts_heap.iter().next() } + + pub(crate) fn take_last_attempt(&mut self) -> Option { + self.last_attempt.take() + } } #[cfg(test)] @@ -608,32 +638,76 @@ mod tests { } #[test] - fn test_untrack_lock_set_shrink_ratio() { + fn test_idempotent_track_and_untrack_lock() { let memory_quota = Arc::new(MemoryQuota::new(std::usize::MAX)); let mut resolver = Resolver::new(1, memory_quota); let mut key = vec![0; 16]; - let ts = TimeStamp::new(1); - for i in 0..1000usize { - key[0..8].copy_from_slice(&i.to_be_bytes()); - let _ = resolver.track_lock(ts, key.clone(), None); + + // track_lock + let mut ts = TimeStamp::default(); + for c in 0..10 { + ts.incr(); + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + let in_use1 = resolver.memory_quota.in_use(); + let key_count1 = resolver.locks_by_key.len(); + let txn_count1 = resolver.lock_ts_heap.len(); + let txn_lock_count1 = resolver.lock_ts_heap[&ts].lock_count; + assert!(in_use1 > 0); + assert_eq!(key_count1, (c + 1) * 100); + assert_eq!(txn_count1, c + 1); + + // Put same keys again, resolver internal state must be idempotent. + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + let _ = resolver.track_lock(ts, key.clone(), None); + } + let in_use2 = resolver.memory_quota.in_use(); + let key_count2 = resolver.locks_by_key.len(); + let txn_count2 = resolver.lock_ts_heap.len(); + let txn_lock_count2 = resolver.lock_ts_heap[&ts].lock_count; + assert_eq!(in_use1, in_use2); + assert_eq!(key_count1, key_count2); + assert_eq!(txn_count1, txn_count2); + assert_eq!(txn_lock_count1, txn_lock_count2); } - assert!( - resolver.lock_ts_heap[&ts].capacity() >= 1000, - "{}", - resolver.lock_ts_heap[&ts].capacity() - ); + assert_eq!(resolver.resolve(ts, None, TsSource::PdTso), 1.into()); - for i in 0..990usize { - key[0..8].copy_from_slice(&i.to_be_bytes()); - resolver.untrack_lock(&key, None); + // untrack_lock + let mut ts = TimeStamp::default(); + for _ in 0..10 { + ts.incr(); + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + let in_use1 = resolver.memory_quota.in_use(); + let key_count1 = resolver.locks_by_key.len(); + let txn_count1 = resolver.lock_ts_heap.len(); + + // Unlock same keys again, resolver internal state must be idempotent. + for k in 0..100u64 { + key[0..8].copy_from_slice(&k.to_be_bytes()); + key[8..16].copy_from_slice(&ts.into_inner().to_be_bytes()); + resolver.untrack_lock(&key, None); + } + let in_use2 = resolver.memory_quota.in_use(); + let key_count2 = resolver.locks_by_key.len(); + let txn_count2 = resolver.lock_ts_heap.len(); + assert_eq!(in_use1, in_use2); + assert_eq!(key_count1, key_count2); + assert_eq!(txn_count1, txn_count2); + + assert_eq!(resolver.resolve(ts, None, TsSource::PdTso), ts); } - // shrink_to_fit may reserve some space in accordance with the resize - // policy, but it is expected to be less than 100. - assert!( - resolver.lock_ts_heap[&ts].capacity() < 500, - "{}, {}", - resolver.lock_ts_heap[&ts].capacity(), - resolver.lock_ts_heap[&ts].len(), - ); + + assert_eq!(resolver.memory_quota.in_use(), 0); + assert_eq!(resolver.locks_by_key.len(), 0); + assert_eq!(resolver.lock_ts_heap.len(), 0); } } From 6971a4635b6b3a27b5be3db0fc4c8200d995d605 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 20 Sep 2023 13:09:12 -0700 Subject: [PATCH 0927/1149] upgrade flatbuffers from 2.1.2 to 23.5.26 to address security issue (#15628) ref tikv/tikv#15621 The security issue is https://github.com/google/flatbuffers/issues/6627. Upgrade flatbuffers from 2.1.2 to 23.5.26 to address it. Signed-off-by: tonyxuqqi Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- Cargo.lock | 539 ++++++++++++++---- components/backup-stream/Cargo.toml | 2 +- components/backup-stream/src/router.rs | 1 + .../src/codec/mysql/time/mod.rs | 3 + .../src/codec/mysql/time/tz.rs | 4 + tests/Cargo.toml | 2 +- 6 files changed, 453 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34f9c381958..4f35ae6b935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", "once_cell", "version_check 0.9.4", ] @@ -59,6 +59,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if 1.0.0", + "const-random", + "getrandom 0.2.10", "once_cell", "version_check 0.9.4", ] @@ -78,6 +80,21 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc 0.2.146", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -133,28 +150,215 @@ dependencies = [ [[package]] name = "arrow" -version = "13.0.0" +version = "46.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a" +checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" dependencies = [ - "bitflags", + "ahash 0.8.3", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-array" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" +dependencies = [ + "ahash 0.8.3", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half 2.3.1", + "hashbrown 0.14.0", + "num 0.4.1", +] + +[[package]] +name = "arrow-buffer" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" +dependencies = [ + "bytes", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-cast" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "chrono", + "half 2.3.1", + "lexical-core", + "num 0.4.1", +] + +[[package]] +name = "arrow-csv" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "csv", - "flatbuffers", - "half", - "hex 0.4.2", - "indexmap", + "csv-core", "lazy_static", "lexical-core", - "multiversion", - "num 0.4.0", - "rand 0.8.5", "regex", +] + +[[package]] +name = "arrow-data" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-ipc" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half 2.3.1", + "indexmap 2.0.0", + "lexical-core", + "num 0.4.1", "serde", - "serde_derive", "serde_json", ] +[[package]] +name = "arrow-ord" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half 2.3.1", + "num 0.4.1", +] + +[[package]] +name = "arrow-row" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" +dependencies = [ + "ahash 0.8.3", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half 2.3.1", + "hashbrown 0.14.0", +] + +[[package]] +name = "arrow-schema" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" + +[[package]] +name = "arrow-select" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num 0.4.1", +] + +[[package]] +name = "arrow-string" +version = "46.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num 0.4.1", + "regex", + "regex-syntax 0.7.5", +] + [[package]] name = "async-channel" version = "1.6.1" @@ -407,7 +611,7 @@ dependencies = [ "bytes", "dyn-clone", "futures 0.3.15", - "getrandom 0.2.3", + "getrandom 0.2.10", "http-types", "log", "paste", @@ -591,7 +795,7 @@ dependencies = [ "futures-io", "grpcio", "hex 0.4.2", - "indexmap", + "indexmap 1.9.3", "kvproto", "lazy_static", "log_wrappers", @@ -807,9 +1011,9 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" [[package]] name = "bytes" -version = "1.0.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" dependencies = [ "serde", ] @@ -908,11 +1112,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.73" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", + "libc 0.2.146", ] [[package]] @@ -984,14 +1189,17 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.11" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ - "num-integer", + "android-tzdata", + "iana-time-zone", + "js-sys", "num-traits", "serde", - "time 0.1.42", + "wasm-bindgen", + "windows-targets", ] [[package]] @@ -1039,7 +1247,7 @@ dependencies = [ "atty", "bitflags", "clap_derive", - "indexmap", + "indexmap 1.9.3", "lazy_static", "os_str_bytes", "strsim 0.10.0", @@ -1138,6 +1346,28 @@ dependencies = [ "cache-padded", ] +[[package]] +name = "const-random" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +dependencies = [ + "getrandom 0.2.10", + "once_cell", + "proc-macro-hack", + "tiny-keccak", +] + [[package]] name = "const_format" version = "0.2.30" @@ -1179,9 +1409,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpu-time" @@ -1360,6 +1590,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -1781,6 +2017,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.2.8" @@ -2063,13 +2305,12 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "2.1.2" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ "bitflags", - "smallvec", - "thiserror", + "rustc_version 0.4.0", ] [[package]] @@ -2403,14 +2644,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if 1.0.0", "js-sys", "libc 0.2.146", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2514,7 +2755,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 1.9.3", "slab", "tokio", "tokio-util", @@ -2527,11 +2768,22 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", + "num-traits", +] + [[package]] name = "hashbrown" -version = "0.9.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" @@ -2751,6 +3003,29 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "iana-time-zone" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2776,12 +3051,22 @@ checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" [[package]] name = "indexmap" -version = "1.6.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown 0.9.1", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", ] [[package]] @@ -2798,7 +3083,7 @@ checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ "ahash 0.7.4", "atty", - "indexmap", + "indexmap 1.9.3", "itoa 1.0.1", "lazy_static", "log", @@ -2949,7 +3234,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d63b6407b66fc81fc539dccf3ddecb669f393c5101b6a2be3976c95099a06e8" dependencies = [ - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -3085,6 +3370,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" + [[package]] name = "libmimalloc-sys" version = "0.1.21" @@ -3438,26 +3729,6 @@ dependencies = [ "serde", ] -[[package]] -name = "multiversion" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" -dependencies = [ - "multiversion-macros", -] - -[[package]] -name = "multiversion-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.103", -] - [[package]] name = "mur3" version = "0.1.0" @@ -3604,15 +3875,15 @@ dependencies = [ [[package]] name = "num" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", - "num-complex 0.4.1", + "num-complex 0.4.4", "num-integer", "num-iter", - "num-rational 0.4.0", + "num-rational 0.4.1", "num-traits", ] @@ -3638,9 +3909,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.1" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" dependencies = [ "num-traits", ] @@ -3668,9 +3939,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -3678,9 +3949,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" dependencies = [ "autocfg", "num-integer", @@ -3700,9 +3971,9 @@ dependencies = [ [[package]] name = "num-rational" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", "num-bigint", @@ -3712,11 +3983,12 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -3746,7 +4018,7 @@ checksum = "80e47cfc4c0a1a519d9a025ebfbac3a2439d1b5cdf397d72dcb79b11d9920dab" dependencies = [ "base64 0.13.0", "chrono", - "getrandom 0.2.3", + "getrandom 0.2.10", "http", "rand 0.8.5", "serde", @@ -4037,7 +4309,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -4769,7 +5041,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", ] [[package]] @@ -4862,19 +5134,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", "redox_syscall 0.2.11", ] [[package]] name = "regex" -version = "1.5.6" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.6.29", ] [[package]] @@ -4888,9 +5160,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.26" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "remove_dir_all" @@ -5433,7 +5711,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" dependencies = [ - "half", + "half 1.8.2", "serde", ] @@ -5463,7 +5741,7 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" dependencies = [ - "indexmap", + "indexmap 1.9.3", "itoa 0.4.4", "ryu", "serde", @@ -7073,6 +7351,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinytemplate" version = "1.2.0" @@ -7274,7 +7561,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap", + "indexmap 1.9.3", "pin-project", "pin-project-lite", "rand 0.8.5", @@ -7512,7 +7799,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", "serde", ] @@ -7522,7 +7809,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.10", ] [[package]] @@ -7598,12 +7885,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d" -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -7742,6 +8023,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.32.0" @@ -7761,21 +8051,42 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.0", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.0", "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -7788,6 +8099,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -7800,6 +8117,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -7812,6 +8135,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -7824,12 +8153,24 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -7842,6 +8183,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "winreg" version = "0.7.0" diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 8c1edc89a48..4f53c39b9db 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -51,7 +51,7 @@ futures-io = "0.3" grpcio = { workspace = true } hex = "0.4" # Fixing ahash cyclic dep: https://github.com/tkaitchuck/ahash/issues/95 -indexmap = "=1.6.2" +indexmap = "=1.9.3" kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index b2fd9acc743..ae4b98b1687 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -731,6 +731,7 @@ impl TempFileKey { } } + #[allow(deprecated)] fn format_date_time(ts: u64, t: FormatType) -> impl Display { use chrono::prelude::*; let millis = TimeStamp::physical(ts.into()); diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 44228f2d88e..621d4384bcc 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1342,6 +1342,7 @@ impl Time { Ok((((ymd << 17) | hms) << 24) | u64::from(self.micro())) } + #[allow(deprecated)] pub fn from_duration( ctx: &mut EvalContext, duration: Duration, @@ -1415,6 +1416,7 @@ impl Time { .ok_or_else(|| Error::incorrect_datetime_value(self)) } + #[allow(deprecated)] pub fn normalized(self, ctx: &mut EvalContext) -> Result { if self.get_time_type() == TimeType::Timestamp { return Ok(self); @@ -1500,6 +1502,7 @@ impl Time { + self.day()) as i32 } + #[allow(deprecated)] pub fn weekday(self) -> Weekday { let date = if self.month() == 0 { NaiveDate::from_ymd(self.year() as i32 - 1, 12, 1) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs index 25b35a90fc0..9dfc3ebf288 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs @@ -120,6 +120,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_local_date(&self, local: &NaiveDate) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -134,6 +135,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -148,6 +150,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_utc_date(&self, utc: &NaiveDate) -> Date { match *self { Tz::Local(ref offset) => { @@ -165,6 +168,7 @@ impl TimeZone for Tz { } } + #[allow(deprecated)] fn from_utc_datetime(&self, utc: &NaiveDateTime) -> DateTime { match *self { Tz::Local(ref offset) => { diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 158e56abcb1..0081d5e95bc 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -120,7 +120,7 @@ uuid = { version = "0.8.1", features = ["serde", "v4"] } procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } [dev-dependencies] -arrow = "13.0" +arrow = "46.0" byteorder = "1.2" # See https://bheisler.github.io/criterion.rs/book/user_guide/known_limitations.html for the usage # of `real_blackbox` feature. From 533b205efd231f13ca716e40a0cc33fa59ee6809 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 21 Sep 2023 14:37:43 +0800 Subject: [PATCH 0928/1149] raft-engine: update raft-engine to newest version (#15559) close tikv/tikv#15462 Signed-off-by: glorv Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 23 +++++++++++++++++------ components/raft_log_engine/Cargo.toml | 3 +++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f35ae6b935..f4adccf26fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3927,6 +3927,17 @@ dependencies = [ "syn 1.0.103", ] +[[package]] +name = "num-derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + [[package]] name = "num-format" version = "0.4.0" @@ -4756,8 +4767,8 @@ dependencies = [ [[package]] name = "raft-engine" -version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#de3ad04a2db9cdf795b1c82d7413b9b53bac92a8" +version = "0.4.1" +source = "git+https://github.com/tikv/raft-engine.git#fa56f891fdf0b1cb5b7849b7bee3c5dadbb96103" dependencies = [ "byteorder", "crc32fast", @@ -4773,7 +4784,7 @@ dependencies = [ "lz4-sys", "memmap2 0.7.0", "nix 0.26.2", - "num-derive", + "num-derive 0.4.0", "num-traits", "parking_lot 0.12.1", "prometheus", @@ -4790,8 +4801,8 @@ dependencies = [ [[package]] name = "raft-engine-ctl" -version = "0.3.0" -source = "git+https://github.com/tikv/raft-engine.git#de3ad04a2db9cdf795b1c82d7413b9b53bac92a8" +version = "0.4.1" +source = "git+https://github.com/tikv/raft-engine.git#fa56f891fdf0b1cb5b7849b7bee3c5dadbb96103" dependencies = [ "clap 3.1.6", "env_logger 0.10.0", @@ -6873,7 +6884,7 @@ dependencies = [ "match-template", "nom 7.1.0", "num 0.3.0", - "num-derive", + "num-derive 0.3.0", "num-traits", "ordered-float", "protobuf", diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index e643089a872..0e640991eea 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -4,6 +4,9 @@ version = "0.0.1" publish = false edition = "2021" +[features] +failpoints = ["raft-engine/failpoints"] + [dependencies] encryption = { workspace = true } engine_traits = { workspace = true } From 241b8f53d3b35ba6b0ff5d905527f93528af192a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 21 Sep 2023 21:22:45 +0800 Subject: [PATCH 0929/1149] raftstore-v2: support online change lock write buffer limit (#15632) ref tikv/tikv#14320 support online change lock write buffer limit Signed-off-by: SpadeA-Tang --- Cargo.lock | 6 ++-- components/engine_rocks/src/cf_options.rs | 17 ++++++++++ src/config/configurable.rs | 17 ++++++++++ src/config/mod.rs | 38 +++++++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4adccf26fc..f05b651b1ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3388,7 +3388,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" +source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3407,7 +3407,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" +source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" dependencies = [ "bzip2-sys", "cc", @@ -5379,7 +5379,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#fc38a5b427e6c9b351f835c641e2ee95b8ff8306" +source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/components/engine_rocks/src/cf_options.rs b/components/engine_rocks/src/cf_options.rs index 1162c67f210..6a2372fb31f 100644 --- a/components/engine_rocks/src/cf_options.rs +++ b/components/engine_rocks/src/cf_options.rs @@ -40,6 +40,23 @@ impl RocksCfOptions { pub fn into_raw(self) -> RawCfOptions { self.0 } + + pub fn set_flush_size(&mut self, f: usize) -> Result<()> { + if let Some(m) = self.0.get_write_buffer_manager() { + m.set_flush_size(f); + } else { + return Err(box_err!("write buffer manager not found")); + } + Ok(()) + } + + pub fn get_flush_size(&self) -> Result { + if let Some(m) = self.0.get_write_buffer_manager() { + return Ok(m.flush_size() as u64); + } + + Err(box_err!("write buffer manager not found")) + } } impl Deref for RocksCfOptions { diff --git a/src/config/configurable.rs b/src/config/configurable.rs index 6fe9409c1c0..c92b01cf465 100644 --- a/src/config/configurable.rs +++ b/src/config/configurable.rs @@ -15,6 +15,7 @@ pub trait ConfigurableDb { fn set_rate_bytes_per_sec(&self, rate_bytes_per_sec: i64) -> ConfigRes; fn set_rate_limiter_auto_tuned(&self, auto_tuned: bool) -> ConfigRes; fn set_flush_size(&self, f: usize) -> ConfigRes; + fn set_cf_flush_size(&self, cf: &str, f: usize) -> ConfigRes; fn set_flush_oldest_first(&self, f: bool) -> ConfigRes; fn set_shared_block_cache_capacity(&self, capacity: usize) -> ConfigRes; fn set_high_priority_background_threads(&self, n: i32, allow_reduce: bool) -> ConfigRes; @@ -57,6 +58,11 @@ impl ConfigurableDb for RocksEngine { opt.set_flush_size(f).map_err(Box::from) } + fn set_cf_flush_size(&self, cf: &str, f: usize) -> ConfigRes { + let mut cf_option = self.get_options_cf(cf)?; + cf_option.set_flush_size(f).map_err(Box::from) + } + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes { let mut opt = self.get_db_options(); opt.set_flush_oldest_first(f).map_err(Box::from) @@ -171,6 +177,17 @@ impl ConfigurableDb for TabletRegistry { }) } + fn set_cf_flush_size(&self, cf: &str, f: usize) -> ConfigRes { + loop_registry(self, |cache| { + if let Some(latest) = cache.latest() { + latest.set_cf_flush_size(cf, f)?; + Ok(false) + } else { + Ok(true) + } + }) + } + fn set_flush_oldest_first(&self, f: bool) -> ConfigRes { loop_registry(self, |cache| { if let Some(latest) = cache.latest() { diff --git a/src/config/mod.rs b/src/config/mod.rs index 6b3332fb015..9b8ecad50f9 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2031,6 +2031,15 @@ impl ConfigManager for DbConfigManger { cf_change.insert(name, value); } } + if let Some(f) = cf_change.remove("write_buffer_limit") { + if cf_name != CF_LOCK { + return Err( + "cf write buffer manager is only supportted for lock cf now".into() + ); + } + let size: ReadableSize = f.into(); + self.db.set_cf_flush_size(cf_name, size.0 as usize)?; + } if !cf_change.is_empty() { let cf_change = config_value_to_string(cf_change.into_iter().collect()); let cf_change_slice = config_to_slice(&cf_change); @@ -5167,6 +5176,7 @@ mod tests { cfg.rocksdb.defaultcf.block_cache_size = Some(ReadableSize::mb(8)); cfg.rocksdb.rate_bytes_per_sec = ReadableSize::mb(64); cfg.rocksdb.rate_limiter_auto_tuned = false; + cfg.rocksdb.lockcf.write_buffer_limit = Some(ReadableSize::mb(1)); cfg.validate().unwrap(); let (storage, cfg_controller, ..) = new_engines::(cfg); let db = storage.get_engine().get_rocksdb(); @@ -5209,6 +5219,34 @@ mod tests { let flush_size = db.get_db_options().get_flush_size().unwrap(); assert_eq!(flush_size, ReadableSize::mb(10).0); + cfg_controller + .update_config("rocksdb.lockcf.write-buffer-limit", "22MB") + .unwrap(); + let cf_opt = db.get_options_cf("lock").unwrap(); + let flush_size = cf_opt.get_flush_size().unwrap(); + assert_eq!(flush_size, ReadableSize::mb(22).0); + + cfg_controller + .update_config("rocksdb.lockcf.write-buffer-size", "102MB") + .unwrap(); + let cf_opt = db.get_options_cf("lock").unwrap(); + let bsize = cf_opt.get_write_buffer_size(); + assert_eq!(bsize, ReadableSize::mb(102).0); + + cfg_controller + .update_config("rocksdb.writecf.write-buffer-size", "102MB") + .unwrap(); + let cf_opt = db.get_options_cf("write").unwrap(); + let bsize = cf_opt.get_write_buffer_size(); + assert_eq!(bsize, ReadableSize::mb(102).0); + + cfg_controller + .update_config("rocksdb.defaultcf.write-buffer-size", "102MB") + .unwrap(); + let cf_opt = db.get_options_cf("default").unwrap(); + let bsize = cf_opt.get_write_buffer_size(); + assert_eq!(bsize, ReadableSize::mb(102).0); + // update some configs on default cf let cf_opts = db.get_options_cf(CF_DEFAULT).unwrap(); assert_eq!(cf_opts.get_disable_auto_compactions(), false); From 9b76ac97e1de01c1b0e70af406720b2c368d9624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 21 Sep 2023 21:39:15 +0800 Subject: [PATCH 0930/1149] log-bakcup: make initial scan asynchronous (#15541) ref tikv/tikv#15410 This PR also removed some fields in `Endpoint`, now they should be in the `InitialDataLoader`. The latter will communicate with the former by messages. Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 156 ++++++++----- components/backup-stream/src/event_loader.rs | 215 ++++++++---------- .../backup-stream/src/subscription_manager.rs | 213 +++++++++-------- .../backup-stream/src/subscription_track.rs | 2 + components/backup-stream/src/utils.rs | 64 +----- .../backup-stream/tests/integration/mod.rs | 22 ++ components/backup-stream/tests/suite.rs | 12 +- components/raftstore/src/router.rs | 32 ++- src/config/mod.rs | 5 + 9 files changed, 357 insertions(+), 364 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index b11259d5be6..834a40f8bdd 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1,16 +1,24 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{any::Any, collections::HashSet, fmt, marker::PhantomData, sync::Arc, time::Duration}; +use std::{ + any::Any, + collections::HashSet, + fmt, + marker::PhantomData, + sync::{Arc, Mutex}, + time::Duration, +}; use concurrency_manager::ConcurrencyManager; use engine_traits::KvEngine; use error_code::ErrorCodeExt; -use futures::{stream::AbortHandle, FutureExt}; +use futures::{stream::AbortHandle, FutureExt, TryFutureExt}; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, metapb::Region, }; use pd_client::PdClient; +use raft::StateRole; use raftstore::{ coprocessor::{CmdBatch, ObserveHandle, RegionInfoProvider}, router::CdcHandle, @@ -30,7 +38,7 @@ use tikv_util::{ use tokio::{ io::Result as TokioResult, runtime::{Handle, Runtime}, - sync::oneshot, + sync::{oneshot, Semaphore}, }; use tokio_stream::StreamExt; use txn_types::TimeStamp; @@ -60,7 +68,7 @@ const SLOW_EVENT_THRESHOLD: f64 = 120.0; /// task has fatal error. const CHECKPOINT_SAFEPOINT_TTL_IF_ERROR: u64 = 24; -pub struct Endpoint { +pub struct Endpoint { // Note: those fields are more like a shared context between components. // For now, we copied them everywhere, maybe we'd better extract them into a // context type. @@ -69,7 +77,6 @@ pub struct Endpoint { pub(crate) store_id: u64, pub(crate) regions: R, pub(crate) engine: PhantomData, - pub(crate) router: RT, pub(crate) pd_client: Arc, pub(crate) subs: SubscriptionTracer, pub(crate) concurrency_manager: ConcurrencyManager, @@ -78,8 +85,6 @@ pub struct Endpoint { pub range_router: Router, observer: BackupStreamObserver, pool: Runtime, - initial_scan_memory_quota: PendingMemoryQuota, - initial_scan_throughput_quota: Limiter, region_operator: RegionSubscriptionManager, failover_time: Option, // We holds the config before, even it is useless for now, @@ -92,17 +97,17 @@ pub struct Endpoint { /// This is used for simulating an asynchronous background worker. /// Each time we spawn a task, once time goes by, we abort that task. pub abort_last_storage_save: Option, + pub initial_scan_semaphore: Arc, } -impl Endpoint +impl Endpoint where R: RegionInfoProvider + 'static + Clone, E: KvEngine, - RT: CdcHandle + 'static, PDC: PdClient + 'static, S: MetaStore + 'static, { - pub fn new( + pub fn new + 'static>( store_id: u64, store: S, config: BackupStreamConfig, @@ -145,17 +150,21 @@ where info!("the endpoint of stream backup started"; "path" => %config.temp_path); let subs = SubscriptionTracer::default(); + let initial_scan_semaphore = Arc::new(Semaphore::new(config.initial_scan_concurrency)); let (region_operator, op_loop) = RegionSubscriptionManager::start( InitialDataLoader::new( - router.clone(), - accessor.clone(), range_router.clone(), subs.clone(), scheduler.clone(), - initial_scan_memory_quota.clone(), - pool.handle().clone(), - initial_scan_throughput_quota.clone(), + initial_scan_memory_quota, + initial_scan_throughput_quota, + // NOTE: in fact we can get rid of the `Arc`. Just need to warp the router when the + // scanner pool is created. But at that time the handle has been sealed in the + // `InitialScan` trait -- we cannot do that. + Arc::new(Mutex::new(router)), + Arc::clone(&initial_scan_semaphore), ), + accessor.clone(), observer.clone(), meta_client.clone(), pd_client.clone(), @@ -166,6 +175,7 @@ where let mut checkpoint_mgr = CheckpointManager::default(); pool.spawn(checkpoint_mgr.spawn_subscription_mgr()); let ep = Endpoint { + initial_scan_semaphore, meta_client, range_router, scheduler, @@ -174,12 +184,9 @@ where store_id, regions: accessor, engine: PhantomData, - router, pd_client, subs, concurrency_manager, - initial_scan_memory_quota, - initial_scan_throughput_quota, region_operator, failover_time: None, config, @@ -191,12 +198,11 @@ where } } -impl Endpoint +impl Endpoint where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, E: KvEngine, - RT: CdcHandle + 'static, PDC: PdClient + 'static, { fn get_meta_client(&self) -> MetadataClient { @@ -494,20 +500,6 @@ where }); } - /// Make an initial data loader using the resource of the endpoint. - pub fn make_initial_loader(&self) -> InitialDataLoader { - InitialDataLoader::new( - self.router.clone(), - self.regions.clone(), - self.range_router.clone(), - self.subs.clone(), - self.scheduler.clone(), - self.initial_scan_memory_quota.clone(), - self.pool.handle().clone(), - self.initial_scan_throughput_quota.clone(), - ) - } - pub fn handle_watch_task(&self, op: TaskOp) { match op { TaskOp::AddTask(task) => { @@ -525,13 +517,12 @@ where } } - async fn observe_and_scan_region( + async fn observe_regions_in_range( &self, - init: InitialDataLoader, task: &StreamTask, start_key: Vec, end_key: Vec, - ) -> Result<()> { + ) { let start = Instant::now_coarse(); let success = self .observer @@ -549,7 +540,9 @@ where // directly and this would be fast. If this gets slow, maybe make it async // again. (Will that bring race conditions? say `Start` handled after // `ResfreshResolver` of some region.) - let range_init_result = init.initialize_range(start_key.clone(), end_key.clone()); + let range_init_result = self + .initialize_range(start_key.clone(), end_key.clone()) + .await; match range_init_result { Ok(()) => { info!("backup stream success to initialize"; @@ -561,6 +554,45 @@ where e.report("backup stream initialize failed"); } } + } + + /// initialize a range: it simply scan the regions with leader role and send + /// them to [`initialize_region`]. + pub async fn initialize_range(&self, start_key: Vec, end_key: Vec) -> Result<()> { + // Generally we will be very very fast to consume. + // Directly clone the initial data loader to the background thread looks a + // little heavier than creating a new channel. TODO: Perhaps we need a + // handle to the `InitialDataLoader`. Making it a `Runnable` worker might be a + // good idea. + let (tx, mut rx) = tokio::sync::mpsc::channel(1); + self.regions + .seek_region( + &start_key, + Box::new(move |i| { + // Ignore the error, this can only happen while the server is shutting down, the + // future has been canceled. + let _ = i + .filter(|r| r.role == StateRole::Leader) + .take_while(|r| r.region.start_key < end_key) + .try_for_each(|r| { + tx.blocking_send(ObserveOp::Start { + region: r.region.clone(), + }) + }); + }), + ) + .map_err(|err| { + Error::Other(box_err!( + "failed to seek region for start key {}: {}", + utils::redact(&start_key), + err + )) + })?; + // Don't reschedule this command: or once the endpoint's mailbox gets + // full, the system might deadlock. + while let Some(cmd) = rx.recv().await { + self.region_operator.request(cmd).await; + } Ok(()) } @@ -578,7 +610,6 @@ where /// Load the task into memory: this would make the endpint start to observe. fn load_task(&self, task: StreamTask) { let cli = self.meta_client.clone(); - let init = self.make_initial_loader(); let range_router = self.range_router.clone(); info!( @@ -621,10 +652,8 @@ where .await?; for (start_key, end_key) in ranges { - let init = init.clone(); - - self.observe_and_scan_region(init, &task, start_key, end_key) - .await? + self.observe_regions_in_range(&task, start_key, end_key) + .await } info!( "finish register backup stream ranges"; @@ -859,11 +888,16 @@ where } fn on_update_change_config(&mut self, cfg: BackupStreamConfig) { + let concurrency_diff = + cfg.initial_scan_concurrency as isize - self.config.initial_scan_concurrency as isize; info!( "update log backup config"; "config" => ?cfg, + "concurrency_diff" => concurrency_diff, ); self.range_router.udpate_config(&cfg); + self.update_semaphore_capacity(&self.initial_scan_semaphore, concurrency_diff); + self.config = cfg; } @@ -873,6 +907,24 @@ where self.pool.block_on(self.region_operator.request(op)); } + fn update_semaphore_capacity(&self, sema: &Arc, diff: isize) { + use std::cmp::Ordering::*; + match diff.cmp(&0) { + Less => { + self.pool.spawn( + Arc::clone(sema) + .acquire_many_owned(-diff as _) + // It is OK to trivially ignore the Error case (semaphore has been closed, we are shutting down the server.) + .map_ok(|p| p.forget()), + ); + } + Equal => {} + Greater => { + sema.add_permits(diff as _); + } + } + } + pub fn run_task(&mut self, task: Task) { debug!("run backup stream task"; "task" => ?task, "store_id" => %self.store_id); let now = Instant::now_coarse(); @@ -1279,12 +1331,11 @@ impl Task { } } -impl Runnable for Endpoint +impl Runnable for Endpoint where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, E: KvEngine, - RT: CdcHandle + 'static, PDC: PdClient + 'static, { type Task = Task; @@ -1297,10 +1348,7 @@ where #[cfg(test)] mod test { use engine_rocks::RocksEngine; - use raftstore::{ - coprocessor::region_info_accessor::MockRegionInfoProvider, router::CdcRaftRouter, - }; - use test_raftstore::MockRaftStoreRouter; + use raftstore::coprocessor::region_info_accessor::MockRegionInfoProvider; use tikv_util::worker::dummy_scheduler; use crate::{ @@ -1315,13 +1363,9 @@ mod test { cli.insert_task_with_range(&task, &[]).await.unwrap(); fail::cfg("failed_to_get_tasks", "1*return").unwrap(); - Endpoint::< - _, - MockRegionInfoProvider, - RocksEngine, - CdcRaftRouter, - MockPdClient, - >::start_and_watch_tasks(cli, sched) + Endpoint::<_, MockRegionInfoProvider, RocksEngine, MockPdClient>::start_and_watch_tasks( + cli, sched, + ) .await .unwrap(); fail::remove("failed_to_get_tasks"); diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 1b663c0e982..bfb88d5cd5f 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -3,10 +3,9 @@ use std::{marker::PhantomData, sync::Arc, time::Duration}; use engine_traits::{KvEngine, CF_DEFAULT, CF_WRITE}; -use futures::executor::block_on; use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::CmdType}; use raftstore::{ - coprocessor::{ObserveHandle, RegionInfoProvider}, + coprocessor::ObserveHandle, router::CdcHandle, store::{fsm::ChangeObserver, Callback}, }; @@ -21,22 +20,16 @@ use tikv_util::{ time::{Instant, Limiter}, worker::Scheduler, }; -use tokio::{ - runtime::Handle, - sync::{OwnedSemaphorePermit, Semaphore}, -}; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use txn_types::{Key, Lock, TimeStamp}; use crate::{ annotate, debug, - endpoint::ObserveOp, errors::{ContextualResultExt, Error, Result}, metrics, router::{ApplyEvent, ApplyEvents, Router}, subscription_track::{Ref, RefMut, SubscriptionTracer, TwoPhaseResolver}, - try_send, - utils::{self, RegionPager}, - Task, + utils, Task, }; const MAX_GET_SNAPSHOT_RETRY: usize = 5; @@ -60,10 +53,12 @@ impl PendingMemoryQuota { Self(Arc::new(Semaphore::new(quota))) } - pub fn pending(&self, size: usize) -> PendingMemory { + pub async fn pending(&self, size: usize) -> PendingMemory { PendingMemory( - Handle::current() - .block_on(self.0.clone().acquire_many_owned(size as _)) + self.0 + .clone() + .acquire_many_owned(size as _) + .await .expect("BUG: the semaphore is closed unexpectedly."), ) } @@ -175,121 +170,64 @@ impl EventLoader { } /// The context for loading incremental data between range. -/// Like [`cdc::Initializer`], but supports initialize over range. +/// Like [`cdc::Initializer`]. /// Note: maybe we can merge those two structures? -/// Note': maybe extract more fields to trait so it would be easier to test. #[derive(Clone)] -pub struct InitialDataLoader { +pub struct InitialDataLoader { // Note: maybe we can make it an abstract thing like `EventSink` with // method `async (KvEvent) -> Result<()>`? pub(crate) sink: Router, pub(crate) tracing: SubscriptionTracer, pub(crate) scheduler: Scheduler, - // Note: this is only for `init_range`, maybe make it an argument? - pub(crate) regions: R, - // Note: Maybe move those fields about initial scanning into some trait? - pub(crate) router: RT, + pub(crate) quota: PendingMemoryQuota, pub(crate) limit: Limiter, + // If there are too many concurrent initial scanning, the limit of disk speed or pending memory + // quota will probably be triggered. Then the whole scanning will be pretty slow. And when + // we are holding a iterator for a long time, the memtable may not be able to be flushed. + // Using this to restrict the possibility of that. + concurrency_limit: Arc, + + cdc_handle: H, - pub(crate) handle: Handle, _engine: PhantomData, } -impl InitialDataLoader +impl InitialDataLoader where E: KvEngine, - R: RegionInfoProvider + Clone + 'static, - RT: CdcHandle, + H: CdcHandle + Sync, { pub fn new( - router: RT, - regions: R, sink: Router, tracing: SubscriptionTracer, sched: Scheduler, quota: PendingMemoryQuota, - handle: Handle, limiter: Limiter, + cdc_handle: H, + concurrency_limit: Arc, ) -> Self { Self { - router, - regions, sink, tracing, scheduler: sched, _engine: PhantomData, quota, - handle, + cdc_handle, + concurrency_limit, limit: limiter, } } - pub fn observe_over_with_retry( + pub async fn capture_change( &self, region: &Region, - mut cmd: impl FnMut() -> ChangeObserver, + cmd: ChangeObserver, ) -> Result { - let mut last_err = None; - for _ in 0..MAX_GET_SNAPSHOT_RETRY { - let c = cmd(); - let r = self.observe_over(region, c); - match r { - Ok(s) => { - return Ok(s); - } - Err(e) => { - let can_retry = match e.without_context() { - Error::RaftRequest(pbe) => { - !(pbe.has_epoch_not_match() - || pbe.has_not_leader() - || pbe.get_message().contains("stale observe id") - || pbe.has_region_not_found()) - } - Error::RaftStore(raftstore::Error::RegionNotFound(_)) - | Error::RaftStore(raftstore::Error::NotLeader(..)) => false, - _ => true, - }; - e.report(format_args!( - "during getting initial snapshot for region {:?}; can retry = {}", - region, can_retry - )); - last_err = match last_err { - None => Some(e), - Some(err) => Some(Error::Contextual { - context: format!("and error {}", err), - inner_error: Box::new(e), - }), - }; - - if !can_retry { - break; - } - std::thread::sleep(Duration::from_secs(1)); - continue; - } - } - } - Err(last_err.expect("BUG: max retry time exceed but no error")) - } - - /// Start observe over some region. - /// This will register the region to the raftstore as observing, - /// and return the current snapshot of that region. - fn observe_over(&self, region: &Region, cmd: ChangeObserver) -> Result { - // There are 2 ways for getting the initial snapshot of a region: - // - the BR method: use the interface in the RaftKv interface, read the - // key-values directly. - // - the CDC method: use the raftstore message `SignificantMsg::CaptureChange` - // to register the region to CDC observer and get a snapshot at the same time. - // Registering the observer to the raftstore is necessary because we should only - // listen events from leader. In CDC, the change observer is - // per-delegate(i.e. per-region), we can create the command per-region here too. - let (callback, fut) = tikv_util::future::paired_future_callback::>(); - self.router + self.cdc_handle .capture_change( region.get_id(), region.get_region_epoch().clone(), @@ -315,7 +253,8 @@ where region.get_id() ))?; - let snap = block_on(fut) + let snap = fut + .await .map_err(|err| { annotate!( err, @@ -332,6 +271,54 @@ where Ok(snap) } + pub async fn observe_over_with_retry( + &self, + region: &Region, + mut cmd: impl FnMut() -> ChangeObserver, + ) -> Result { + let mut last_err = None; + for _ in 0..MAX_GET_SNAPSHOT_RETRY { + let c = cmd(); + let r = self.capture_change(region, c).await; + match r { + Ok(s) => { + return Ok(s); + } + Err(e) => { + let can_retry = match e.without_context() { + Error::RaftRequest(pbe) => { + !(pbe.has_epoch_not_match() + || pbe.has_not_leader() + || pbe.get_message().contains("stale observe id") + || pbe.has_region_not_found()) + } + Error::RaftStore(raftstore::Error::RegionNotFound(_)) + | Error::RaftStore(raftstore::Error::NotLeader(..)) => false, + _ => true, + }; + e.report(format_args!( + "during getting initial snapshot for region {:?}; can retry = {}", + region, can_retry + )); + last_err = match last_err { + None => Some(e), + Some(err) => Some(Error::Contextual { + context: format!("and error {}", err), + inner_error: Box::new(e), + }), + }; + + if !can_retry { + break; + } + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + } + } + Err(last_err.expect("BUG: max retry time exceed but no error")) + } + fn with_resolver( &self, region: &Region, @@ -381,7 +368,7 @@ where f(v.value_mut().resolver()) } - fn scan_and_async_send( + async fn scan_and_async_send( &self, region: &Region, handle: &ObserveHandle, @@ -419,8 +406,8 @@ where let sink = self.sink.clone(); let event_size = events.size(); let sched = self.scheduler.clone(); - let permit = self.quota.pending(event_size); - self.limit.blocking_consume(disk_read as _); + let permit = self.quota.pending(event_size).await; + self.limit.consume(disk_read as _).await; debug!("sending events to router"; "size" => %event_size, "region" => %region_id); metrics::INCREMENTAL_SCAN_SIZE.observe(event_size as f64); metrics::INCREMENTAL_SCAN_DISK_READ.inc_by(disk_read as f64); @@ -434,7 +421,7 @@ where } } - pub fn do_initial_scan( + pub async fn do_initial_scan( &self, region: &Region, // We are using this handle for checking whether the initial scan is stale. @@ -442,18 +429,25 @@ where start_ts: TimeStamp, snap: impl Snapshot, ) -> Result { - let _guard = self.handle.enter(); let tr = self.tracing.clone(); let region_id = region.get_id(); let mut join_handles = Vec::with_capacity(8); + let permit = self + .concurrency_limit + .acquire() + .await + .expect("BUG: semaphore closed"); // It is ok to sink more data than needed. So scan to +inf TS for convenance. let event_loader = EventLoader::load_from(snap, start_ts, TimeStamp::max(), region)?; - let stats = self.scan_and_async_send(region, &handle, event_loader, &mut join_handles)?; + let stats = self + .scan_and_async_send(region, &handle, event_loader, &mut join_handles) + .await?; + drop(permit); - Handle::current() - .block_on(futures::future::try_join_all(join_handles)) + futures::future::try_join_all(join_handles) + .await .map_err(|err| annotate!(err, "tokio runtime failed to join consuming threads"))?; Self::with_resolver_by(&tr, region, &handle, |r| { @@ -467,31 +461,6 @@ where Ok(stats) } - - /// initialize a range: it simply scan the regions with leader role and send - /// them to [`initialize_region`]. - pub fn initialize_range(&self, start_key: Vec, end_key: Vec) -> Result<()> { - let mut pager = RegionPager::scan_from(self.regions.clone(), start_key, end_key); - loop { - let regions = pager.next_page(8)?; - debug!("scanning for entries in region."; "regions" => ?regions); - if regions.is_empty() { - break; - } - for r in regions { - // Note: Even we did the initial scanning, and blocking resolved ts from - // advancing, if the next_backup_ts was updated in some extreme condition, there - // is still little chance to lost data: For example, if a region cannot elect - // the leader for long time. (say, net work partition) At that time, we have - // nowhere to record the lock status of this region. - try_send!( - self.scheduler, - Task::ModifyObserve(ObserveOp::Start { region: r.region }) - ); - } - } - Ok(()) - } } #[cfg(test)] diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index e418d59029d..7aeecb775cc 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -1,15 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::Duration, -}; +use std::{sync::Arc, time::Duration}; -use crossbeam::channel::{Receiver as SyncReceiver, Sender as SyncSender}; -use crossbeam_channel::SendError; use engine_traits::KvEngine; use error_code::ErrorCodeExt; use futures::FutureExt; @@ -22,10 +14,11 @@ use raftstore::{ store::fsm::ChangeObserver, }; use tikv::storage::Statistics; -use tikv_util::{box_err, debug, info, time::Instant, warn, worker::Scheduler}; -use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tikv_util::{ + box_err, debug, info, sys::thread::ThreadBuildWrapper, time::Instant, warn, worker::Scheduler, +}; +use tokio::sync::mpsc::{channel, error::SendError, Receiver, Sender}; use txn_types::TimeStamp; -use yatp::task::callback::Handle as YatpHandle; use crate::{ annotate, @@ -43,7 +36,7 @@ use crate::{ Task, }; -type ScanPool = yatp::ThreadPool; +type ScanPool = tokio::runtime::Runtime; const INITIAL_SCAN_FAILURE_MAX_RETRY_TIME: usize = 10; @@ -128,8 +121,9 @@ fn should_retry(err: &Error) -> bool { } /// the abstraction over a "DB" which provides the initial scanning. -trait InitialScan: Clone { - fn do_initial_scan( +#[async_trait::async_trait] +trait InitialScan: Clone + Sync + Send + 'static { + async fn do_initial_scan( &self, region: &Region, start_ts: TimeStamp, @@ -139,13 +133,13 @@ trait InitialScan: Clone { fn handle_fatal_error(&self, region: &Region, err: Error); } -impl InitialScan for InitialDataLoader +#[async_trait::async_trait] +impl InitialScan for InitialDataLoader where E: KvEngine, - R: RegionInfoProvider + Clone + 'static, - RT: CdcHandle, + RT: CdcHandle + Sync + 'static, { - fn do_initial_scan( + async fn do_initial_scan( &self, region: &Region, start_ts: TimeStamp, @@ -155,12 +149,14 @@ where let h = handle.clone(); // Note: we have external retry at `ScanCmd::exec_by_with_retry`, should we keep // retrying here? - let snap = self.observe_over_with_retry(region, move || { - ChangeObserver::from_pitr(region_id, handle.clone()) - })?; + let snap = self + .observe_over_with_retry(region, move || { + ChangeObserver::from_pitr(region_id, handle.clone()) + }) + .await?; #[cfg(feature = "failpoints")] fail::fail_point!("scan_after_get_snapshot"); - let stat = self.do_initial_scan(region, h, start_ts, snap)?; + let stat = self.do_initial_scan(region, h, start_ts, snap).await?; Ok(stat) } @@ -180,7 +176,7 @@ where impl ScanCmd { /// execute the initial scanning via the specificated [`InitialDataLoader`]. - fn exec_by(&self, initial_scan: impl InitialScan) -> Result<()> { + async fn exec_by(&self, initial_scan: impl InitialScan) -> Result<()> { let Self { region, handle, @@ -188,7 +184,9 @@ impl ScanCmd { .. } = self; let begin = Instant::now_coarse(); - let stat = initial_scan.do_initial_scan(region, *last_checkpoint, handle.clone())?; + let stat = initial_scan + .do_initial_scan(region, *last_checkpoint, handle.clone()) + .await?; info!("initial scanning finished!"; "takes" => ?begin.saturating_elapsed(), "from_ts" => %last_checkpoint, utils::slog_region(region)); utils::record_cf_stat("lock", &stat.lock); utils::record_cf_stat("write", &stat.write); @@ -197,17 +195,12 @@ impl ScanCmd { } /// execute the command, when meeting error, retrying. - fn exec_by_with_retry(self, init: impl InitialScan, cancel: &AtomicBool) { + async fn exec_by_with_retry(self, init: impl InitialScan) { let mut retry_time = INITIAL_SCAN_FAILURE_MAX_RETRY_TIME; loop { - if cancel.load(Ordering::SeqCst) { - return; - } - match self.exec_by(init.clone()) { + match self.exec_by(init.clone()).await { Err(err) if should_retry(&err) && retry_time > 0 => { - // NOTE: blocking this thread may stick the process. - // Maybe spawn a task to tokio and reschedule the task then? - std::thread::sleep(Duration::from_millis(500)); + tokio::time::sleep(Duration::from_millis(500)).await; warn!("meet retryable error"; "err" => %err, "retry_time" => retry_time); retry_time -= 1; continue; @@ -223,82 +216,62 @@ impl ScanCmd { } } -fn scan_executor_loop( - init: impl InitialScan, - cmds: SyncReceiver, - canceled: Arc, -) { - while let Ok(cmd) = cmds.recv() { - fail::fail_point!("execute_scan_command"); +async fn scan_executor_loop(init: impl InitialScan, mut cmds: Receiver) { + while let Some(cmd) = cmds.recv().await { debug!("handling initial scan request"; "region_id" => %cmd.region.get_id()); metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["queuing"]) .dec(); - if canceled.load(Ordering::Acquire) { - return; + #[cfg(feature = "failpoints")] + { + let sleep = (|| { + fail::fail_point!("execute_scan_command_sleep_100", |_| { 100 }); + 0 + })(); + tokio::time::sleep(std::time::Duration::from_secs(sleep)).await; } - metrics::PENDING_INITIAL_SCAN_LEN - .with_label_values(&["executing"]) - .inc(); - cmd.exec_by_with_retry(init.clone(), &canceled); - metrics::PENDING_INITIAL_SCAN_LEN - .with_label_values(&["executing"]) - .dec(); + let init = init.clone(); + tokio::task::spawn(async move { + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["executing"]) + .inc(); + cmd.exec_by_with_retry(init).await; + metrics::PENDING_INITIAL_SCAN_LEN + .with_label_values(&["executing"]) + .dec(); + }); } } /// spawn the executors in the scan pool. -/// we make workers thread instead of spawn scan task directly into the pool -/// because the [`InitialDataLoader`] isn't `Sync` hence we must use it very -/// carefully or rustc (along with tokio) would complain that we made a `!Send` -/// future. so we have moved the data loader to the synchronous context so its -/// reference won't be shared between threads any more. -fn spawn_executors(init: impl InitialScan + Send + 'static, number: usize) -> ScanPoolHandle { - let (tx, rx) = crossbeam::channel::bounded(MESSAGE_BUFFER_SIZE); +fn spawn_executors( + init: impl InitialScan + Send + Sync + 'static, + number: usize, +) -> ScanPoolHandle { + let (tx, rx) = tokio::sync::mpsc::channel(MESSAGE_BUFFER_SIZE); let pool = create_scan_pool(number); - let stopped = Arc::new(AtomicBool::new(false)); - for _ in 0..number { - let init = init.clone(); - let rx = rx.clone(); - let stopped = stopped.clone(); - pool.spawn(move |_: &mut YatpHandle<'_>| { - let _io_guard = file_system::WithIoType::new(file_system::IoType::Replication); - scan_executor_loop(init, rx, stopped); - }) - } - ScanPoolHandle { - tx, - _pool: pool, - stopped, - } + pool.spawn(async move { + scan_executor_loop(init, rx).await; + }); + ScanPoolHandle { tx, _pool: pool } } struct ScanPoolHandle { - tx: SyncSender, - stopped: Arc, + // Theoretically, we can get rid of the sender, and spawn a new task via initial loader in each + // thread. But that will make `SubscribeManager` holds a reference to the implementation of + // `InitialScan`, which will get the type information a mass. + tx: Sender, - // in fact, we won't use the pool any more. - // but we should hold the reference to the pool so it won't try to join the threads running. _pool: ScanPool, } -impl Drop for ScanPoolHandle { - fn drop(&mut self) { - self.stopped.store(true, Ordering::Release); - } -} - impl ScanPoolHandle { - fn request(&self, cmd: ScanCmd) -> std::result::Result<(), SendError> { - if self.stopped.load(Ordering::Acquire) { - warn!("scan pool is stopped, ignore the scan command"; "region" => %cmd.region.get_id()); - return Ok(()); - } + async fn request(&self, cmd: ScanCmd) -> std::result::Result<(), SendError> { metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["queuing"]) .inc(); - self.tx.send(cmd) + self.tx.send(cmd).await } } @@ -348,11 +321,20 @@ where } } -/// Create a yatp pool for doing initial scanning. +/// Create a pool for doing initial scanning. fn create_scan_pool(num_threads: usize) -> ScanPool { - yatp::Builder::new("log-backup-scan") - .max_thread_count(num_threads) - .build_callback_pool() + tokio::runtime::Builder::new_multi_thread() + .with_sys_and_custom_hooks( + move || { + file_system::set_io_type(file_system::IoType::Replication); + }, + || {}, + ) + .thread_name("log-backup-scan") + .enable_time() + .worker_threads(num_threads) + .build() + .unwrap() } impl RegionSubscriptionManager @@ -367,22 +349,24 @@ where /// /// a two-tuple, the first is the handle to the manager, the second is the /// operator loop future. - pub fn start( - initial_loader: InitialDataLoader, + pub fn start( + initial_loader: InitialDataLoader, + regions: R, observer: BackupStreamObserver, meta_cli: MetadataClient, pd_client: Arc, scan_pool_size: usize, - resolver: BackupStreamResolver, + resolver: BackupStreamResolver, ) -> (Self, future![()]) where E: KvEngine, - RT: CdcHandle + 'static, + HInit: CdcHandle + Sync + 'static, + HChkLd: CdcHandle + 'static, { let (tx, rx) = channel(MESSAGE_BUFFER_SIZE); let scan_pool_handle = spawn_executors(initial_loader.clone(), scan_pool_size); let op = Self { - regions: initial_loader.regions.clone(), + regions, meta_cli, pd_client, range_router: initial_loader.sink.clone(), @@ -522,7 +506,8 @@ where region, self.get_last_checkpoint_of(&for_task, region).await?, handle.clone(), - ); + ) + .await; Result::Ok(()) } .await; @@ -567,7 +552,8 @@ where Err(Error::Other(box_err!("Nature is boring"))) }); let tso = self.get_last_checkpoint_of(&for_task, region).await?; - self.observe_over_with_initial_data_from_checkpoint(region, tso, handle.clone()); + self.observe_over_with_initial_data_from_checkpoint(region, tso, handle.clone()) + .await; } } Ok(()) @@ -702,13 +688,13 @@ where Ok(cp.ts) } - fn spawn_scan(&self, cmd: ScanCmd) { + async fn spawn_scan(&self, cmd: ScanCmd) { // we should not spawn initial scanning tasks to the tokio blocking pool // because it is also used for converting sync File I/O to async. (for now!) // In that condition, if we blocking for some resources(for example, the // `MemoryQuota`) at the block threads, we may meet some ghosty // deadlock. - let s = self.scan_pool_handle.request(cmd); + let s = self.scan_pool_handle.request(cmd).await; if let Err(err) = s { let region_id = err.0.region.get_id(); annotate!(err, "BUG: scan_pool closed") @@ -716,7 +702,7 @@ where } } - fn observe_over_with_initial_data_from_checkpoint( + async fn observe_over_with_initial_data_from_checkpoint( &self, region: &Region, last_checkpoint: TimeStamp, @@ -730,6 +716,7 @@ where last_checkpoint, _work: self.scans.clone().work(), }) + .await } fn find_task_by_region(&self, r: &Region) -> Option { @@ -748,8 +735,9 @@ mod test { #[derive(Clone, Copy)] struct NoopInitialScan; + #[async_trait::async_trait] impl InitialScan for NoopInitialScan { - fn do_initial_scan( + async fn do_initial_scan( &self, _region: &Region, _start_ts: txn_types::TimeStamp, @@ -787,17 +775,20 @@ mod test { let pool = spawn_executors(NoopInitialScan, 1); let wg = CallbackWaitGroup::new(); - fail::cfg("execute_scan_command", "sleep(100)").unwrap(); + fail::cfg("execute_scan_command_sleep_100", "return").unwrap(); for _ in 0..100 { let wg = wg.clone(); - pool.request(ScanCmd { - region: Default::default(), - handle: Default::default(), - last_checkpoint: Default::default(), - // Note: Maybe make here a Box or some other trait? - _work: wg.work(), - }) - .unwrap() + assert!( + pool._pool + .block_on(pool.request(ScanCmd { + region: Default::default(), + handle: Default::default(), + last_checkpoint: Default::default(), + // Note: Maybe make here a Box or some other trait? + _work: wg.work(), + })) + .is_ok() + ) } should_finish_in(move || drop(pool), Duration::from_secs(5)); diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 2dae8ce745d..5a6b2e0753b 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -82,6 +82,7 @@ impl ActiveSubscription { self.handle.stop_observing(); } + #[cfg(test)] pub fn is_observing(&self) -> bool { self.handle.is_observing() } @@ -319,6 +320,7 @@ impl SubscriptionTracer { } /// check whether the region_id should be observed by this observer. + #[cfg(test)] pub fn is_observing(&self, region_id: u64) -> bool { let sub = self.0.get_mut(®ion_id); match sub { diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 52b6f0e9391..5e798a8428c 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -18,14 +18,12 @@ use std::{ use async_compression::{tokio::write::ZstdEncoder, Level}; use engine_rocks::ReadPerfInstant; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE}; -use futures::{channel::mpsc, executor::block_on, ready, task::Poll, FutureExt, StreamExt}; +use futures::{ready, task::Poll, FutureExt}; use kvproto::{ brpb::CompressionType, metapb::Region, raft_cmdpb::{CmdType, Request}, }; -use raft::StateRole; -use raftstore::{coprocessor::RegionInfoProvider, RegionInfo}; use tikv::storage::CfStatistics; use tikv_util::{ box_err, @@ -33,7 +31,6 @@ use tikv_util::{ self_thread_inspector, IoStat, ThreadInspector, ThreadInspectorImpl as OsInspector, }, time::Instant, - warn, worker::Scheduler, Either, }; @@ -79,65 +76,6 @@ pub fn redact(key: &impl AsRef<[u8]>) -> log_wrappers::Value<'_> { log_wrappers::Value::key(key.as_ref()) } -/// RegionPager seeks regions with leader role in the range. -pub struct RegionPager

{ - regions: P, - start_key: Vec, - end_key: Vec, - reach_last_region: bool, -} - -impl RegionPager

{ - pub fn scan_from(regions: P, start_key: Vec, end_key: Vec) -> Self { - Self { - regions, - start_key, - end_key, - reach_last_region: false, - } - } - - pub fn next_page(&mut self, size: usize) -> Result> { - if self.start_key >= self.end_key || self.reach_last_region { - return Ok(vec![]); - } - - let (mut tx, rx) = mpsc::channel(size); - let end_key = self.end_key.clone(); - self.regions - .seek_region( - &self.start_key, - Box::new(move |i| { - let r = i - .filter(|r| r.role == StateRole::Leader) - .take(size) - .take_while(|r| r.region.start_key < end_key) - .try_for_each(|r| tx.try_send(r.clone())); - if let Err(_err) = r { - warn!("failed to scan region and send to initlizer") - } - }), - ) - .map_err(|err| { - Error::Other(box_err!( - "failed to seek region for start key {}: {}", - redact(&self.start_key), - err - )) - })?; - let collected_regions = block_on(rx.collect::>()); - self.start_key = collected_regions - .last() - .map(|region| region.region.end_key.to_owned()) - // no leader region found. - .unwrap_or_default(); - if self.start_key.is_empty() { - self.reach_last_region = true; - } - Ok(collected_regions) - } -} - /// StopWatch is a utility for record time cost in multi-stage tasks. /// NOTE: Maybe it should be generic over somewhat Clock type? pub struct StopWatch(Instant); diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index a209572c6d8..79a756f684d 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -16,6 +16,7 @@ mod all { use futures::{Stream, StreamExt}; use pd_client::PdClient; use test_raftstore::IsolationFilterFactory; + use tikv::config::BackupStreamConfig; use tikv_util::{box_err, defer, info, HandyRwLock}; use tokio::time::timeout; use txn_types::{Key, TimeStamp}; @@ -430,4 +431,25 @@ mod all { round1.iter().map(|k| k.as_slice()), )) } + + #[test] + fn update_config() { + let suite = SuiteBuilder::new_named("network_partition") + .nodes(1) + .build(); + let mut basic_config = BackupStreamConfig::default(); + basic_config.initial_scan_concurrency = 4; + suite.run(|| Task::ChangeConfig(basic_config.clone())); + suite.wait_with(|e| { + assert_eq!(e.initial_scan_semaphore.available_permits(), 4,); + true + }); + + basic_config.initial_scan_concurrency = 16; + suite.run(|| Task::ChangeConfig(basic_config.clone())); + suite.wait_with(|e| { + assert_eq!(e.initial_scan_semaphore.available_permits(), 16,); + true + }); + } } diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index e1df628d76b..41a57f5858b 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -31,14 +31,11 @@ use kvproto::{ }; use pd_client::PdClient; use protobuf::parse_from_bytes; -use raftstore::{ - router::{CdcRaftRouter, ServerRaftStoreRouter}, - RegionInfoAccessor, -}; +use raftstore::{router::CdcRaftRouter, RegionInfoAccessor}; use resolved_ts::LeadershipResolver; use tempdir::TempDir; use test_pd_client::TestPdClient; -use test_raftstore::{new_server_cluster, Cluster, ServerCluster, SimulateTransport}; +use test_raftstore::{new_server_cluster, Cluster, ServerCluster}; use test_util::retry; use tikv::config::BackupStreamConfig; use tikv_util::{ @@ -57,11 +54,6 @@ pub type TestEndpoint = Endpoint< ErrorStore, RegionInfoAccessor, engine_test::kv::KvTestEngine, - CdcRaftRouter< - SimulateTransport< - ServerRaftStoreRouter, - >, - >, TestPdClient, >; diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 09f389a2230..77d3a35e306 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -1,6 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::borrow::Cow; +use std::{ + borrow::Cow, + sync::{Arc, Mutex}, +}; // #[PerformanceCriticalPath] use crossbeam::channel::TrySendError; @@ -406,6 +409,33 @@ where ) -> RaftStoreResult<()>; } +impl> CdcHandle for Arc> { + fn capture_change( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + change_observer: ChangeObserver, + callback: Callback<::Snapshot>, + ) -> RaftStoreResult<()> { + Mutex::lock(self).unwrap().capture_change( + region_id, + region_epoch, + change_observer, + callback, + ) + } + + fn check_leadership( + &self, + region_id: u64, + callback: Callback<::Snapshot>, + ) -> RaftStoreResult<()> { + Mutex::lock(self) + .unwrap() + .check_leadership(region_id, callback) + } +} + /// A wrapper of SignificantRouter that is specialized for implementing /// CdcHandle. #[derive(Clone)] diff --git a/src/config/mod.rs b/src/config/mod.rs index 9b8ecad50f9..8a2fa291ff1 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2833,6 +2833,7 @@ pub struct BackupStreamConfig { pub initial_scan_pending_memory_quota: ReadableSize, #[online_config(skip)] pub initial_scan_rate_limit: ReadableSize, + pub initial_scan_concurrency: usize, } impl BackupStreamConfig { @@ -2860,6 +2861,9 @@ impl BackupStreamConfig { ) .into()); } + if self.initial_scan_concurrency == 0 { + return Err("the `initial_scan_concurrency` shouldn't be zero".into()); + } Ok(()) } } @@ -2887,6 +2891,7 @@ impl Default for BackupStreamConfig { file_size_limit, initial_scan_pending_memory_quota: ReadableSize(quota_size as _), initial_scan_rate_limit: ReadableSize::mb(60), + initial_scan_concurrency: 6, temp_file_memory_quota: cache_size, } } From 6ff85fcc7a6384da445ef166b745ab998cc20b8d Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 22 Sep 2023 11:28:45 +0800 Subject: [PATCH 0931/1149] tests: fix unstable test_query_stats test (#15657) close tikv/tikv#15656 tests: fix unstable test_query_stats test Signed-off-by: nolouch --- tests/Cargo.toml | 2 +- tests/integrations/raftstore/test_stats.rs | 78 +++++++++++----------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 0081d5e95bc..f3928e97eb8 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -41,7 +41,7 @@ path = "benches/deadlock_detector/mod.rs" [features] default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] +failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints", "raft_log_engine/failpoints"] cloud-aws = ["external_storage_export/cloud-aws"] cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 13e718b269d..073382ced17 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -262,19 +262,10 @@ fn test_raw_query_stats_tmpl() { req.set_raw_get(get_req); req }); - batch_commands(&ctx, &client, get_command, &start_key); - assert!(check_split_key( - cluster, - F::encode_raw_key_owned(start_key.clone(), None).into_encoded(), - None - )); - if check_query_num_read( - cluster, - store_id, - region_id, - QueryKind::Get, - (i + 1) * 1000, - ) { + if i == 0 { + batch_commands(&ctx, &client, get_command, &start_key); + } + if check_query_num_read(cluster, store_id, region_id, QueryKind::Get, 1000) { flag = true; break; } @@ -284,14 +275,16 @@ fn test_raw_query_stats_tmpl() { fail::cfg("mock_hotspot_threshold", "return(0)").unwrap(); fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); - test_query_num::(raw_get, true); - test_query_num::(raw_batch_get, true); - test_query_num::(raw_scan, true); - test_query_num::(raw_batch_scan, true); + test_query_num::(raw_get, true, true); + test_query_num::(raw_batch_get, true, true); + test_query_num::(raw_scan, true, true); + test_query_num::(raw_batch_scan, true, true); if F::IS_TTL_ENABLED { - test_query_num::(raw_get_key_ttl, true); + test_query_num::(raw_get_key_ttl, true, true); } - test_query_num::(raw_batch_get_command, true); + // requests may failed caused by `EpochNotMatch` after split when auto split is + // enabled, disable it. + test_query_num::(raw_batch_get_command, true, false); test_raw_delete_query::(); fail::remove("mock_tick_interval"); fail::remove("mock_hotspot_threshold"); @@ -385,19 +378,10 @@ fn test_txn_query_stats_tmpl() { req.set_get(get_req); req }); - batch_commands(&ctx, &client, get_command, &start_key); - assert!(check_split_key( - cluster, - Key::from_raw(&start_key).as_encoded().to_vec(), - None - )); - if check_query_num_read( - cluster, - store_id, - region_id, - QueryKind::Get, - (i + 1) * 1000, - ) { + if i == 0 { + batch_commands(&ctx, &client, get_command, &start_key); + } + if check_query_num_read(cluster, store_id, region_id, QueryKind::Get, 1000) { flag = true; break; } @@ -407,11 +391,13 @@ fn test_txn_query_stats_tmpl() { fail::cfg("mock_hotspot_threshold", "return(0)").unwrap(); fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); - test_query_num::(get, false); - test_query_num::(batch_get, false); - test_query_num::(scan, false); - test_query_num::(scan_lock, false); - test_query_num::(batch_get_command, false); + test_query_num::(get, false, true); + test_query_num::(batch_get, false, true); + test_query_num::(scan, false, true); + test_query_num::(scan_lock, false, true); + // requests may failed caused by `EpochNotMatch` after split when auto split is + // enabled, disable it. + test_query_num::(batch_get_command, false, false); test_txn_delete_query::(); test_pessimistic_lock(); test_rollback(); @@ -573,15 +559,20 @@ pub fn test_rollback() { )); } -fn test_query_num(query: Box, is_raw_kv: bool) { +fn test_query_num(query: Box, is_raw_kv: bool, auto_split: bool) { let (mut cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); - cluster.cfg.split.qps_threshold = Some(0); + if auto_split { + cluster.cfg.split.qps_threshold = Some(0); + } else { + cluster.cfg.split.qps_threshold = Some(1000000); + } cluster.cfg.split.split_balance_score = 2.0; cluster.cfg.split.split_contained_score = 2.0; cluster.cfg.split.detect_times = 1; cluster.cfg.split.sample_threshold = 0; cluster.cfg.storage.set_api_version(F::TAG); + cluster.cfg.server.enable_request_batch = false; }); ctx.set_api_version(F::CLIENT_TAG); @@ -763,4 +754,13 @@ fn batch_commands( } }); rx.recv_timeout(Duration::from_secs(10)).unwrap(); + sleep_ms(100); + // triage metrics flush + for _ in 0..10 { + let mut req = ScanRequest::default(); + req.set_context(ctx.to_owned()); + req.start_key = start_key.to_owned(); + req.end_key = vec![]; + client.kv_scan(&req).unwrap(); + } } From 15d2c7dcd1780d11ee118e0b9b68ca06bf2bf388 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 22 Sep 2023 13:43:44 +0800 Subject: [PATCH 0932/1149] raftstore-v2: fix incorrect GC peer requests to source peer after merge (#15643) close tikv/tikv#15623 After merge, target region sends GC peer requests to removed source peers, however the region_id in requests is set to target region id incorrectly. As results, source region removed peers may be left forever. This commit fixes above issue by putting source removed_records to merged_records, so that region id can be set correctly. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- components/raftstore-v2/src/batch/store.rs | 3 +- .../operation/command/admin/conf_change.rs | 15 +++- .../operation/command/admin/merge/commit.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 65 ++++++++++++--- .../raftstore-v2/src/operation/ready/mod.rs | 11 ++- components/raftstore/src/store/config.rs | 7 ++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + tests/integrations/raftstore/test_life.rs | 4 +- tests/integrations/raftstore/test_merge.rs | 81 +++++++++++++++++++ 11 files changed, 168 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f05b651b1ad..0ba7b9d3499 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3253,7 +3253,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#ecdbf1f8c130089392a9bb5f86f7577deddfbed5" +source = "git+https://github.com/pingcap/kvproto.git#090f247be15c00a6000a4d23669ac3e95ea9fcd5" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 5f036c61020..73b65bc0904 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -1,7 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - cmp, ops::{Deref, DerefMut}, path::Path, sync::{ @@ -140,7 +139,7 @@ impl StoreContext { self.tick_batch[PeerTick::CheckLongUncommitted as usize].wait_duration = self.cfg.check_long_uncommitted_interval.0; self.tick_batch[PeerTick::GcPeer as usize].wait_duration = - 60 * cmp::min(Duration::from_secs(1), self.cfg.raft_base_tick_interval.0); + self.cfg.gc_peer_check_interval.0; } // Return None means it has passed unsafe vote period. diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index c7b8481aa7c..77ef6c823c1 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -609,10 +609,17 @@ impl Apply { ); removed_records.retain(|p| !updates.contains(&p.get_id())); merged_records.retain_mut(|r| { - let mut sources: Vec<_> = r.take_source_peers().into(); - sources.retain(|p| !updates.contains(&p.get_id())); - r.set_source_peers(sources.into()); - !r.get_source_peers().is_empty() + // Clean up source peers if they acknowledge GcPeerRequest. + let mut source_peers: Vec<_> = r.take_source_peers().into(); + source_peers.retain(|p| !updates.contains(&p.get_id())); + r.set_source_peers(source_peers.into()); + // Clean up source removed records (peers) if they acknowledge GcPeerRequest. + let mut source_removed_records: Vec<_> = r.take_source_removed_records().into(); + source_removed_records.retain(|p| !updates.contains(&p.get_id())); + r.set_source_removed_records(source_removed_records.into()); + // Clean up merged records if all source peers and source removed records are + // empty. + !r.get_source_peers().is_empty() || !r.get_source_removed_records().is_empty() }); self.region_state_mut() .set_removed_records(removed_records.into()); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 5208dcc96a8..8e55f89a7d2 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -540,9 +540,6 @@ impl Apply { state.set_state(PeerState::Normal); assert!(!state.has_merge_state()); state.set_tablet_index(index); - let mut removed_records: Vec<_> = state.take_removed_records().into(); - removed_records.append(&mut source_state.get_removed_records().into()); - state.set_removed_records(removed_records.into()); let mut merged_records: Vec<_> = state.take_merged_records().into(); merged_records.append(&mut source_state.get_merged_records().into()); state.set_merged_records(merged_records.into()); @@ -550,6 +547,7 @@ impl Apply { merged_record.set_source_region_id(source_region.get_id()); merged_record.set_source_epoch(source_region.get_region_epoch().clone()); merged_record.set_source_peers(source_region.get_peers().into()); + merged_record.set_source_removed_records(source_state.get_removed_records().into()); merged_record.set_target_region_id(region.get_id()); merged_record.set_target_epoch(region.get_region_epoch().clone()); merged_record.set_target_peers(region.get_peers().into()); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 395774e17f1..6b778ad6c4a 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -424,7 +424,13 @@ impl Store { }; if destroyed { if msg.get_is_tombstone() { + let msg_region_epoch = msg.get_region_epoch().clone(); if let Some(msg) = build_peer_destroyed_report(&mut msg) { + info!(self.logger(), "peer reports destroyed"; + "from_peer" => ?msg.get_from_peer(), + "from_region_epoch" => ?msg_region_epoch, + "region_id" => ?msg.get_region_id(), + "to_peer_id" => ?msg.get_to_peer().get_id()); let _ = ctx.trans.send(msg); } return false; @@ -581,7 +587,11 @@ impl Peer { .iter() .find(|p| p.id == msg.get_from_peer().get_id()) { - let tombstone_msg = self.tombstone_message_for_same_region(peer.clone()); + let tombstone_msg = self.tombstone_message( + self.region_id(), + self.region().get_region_epoch().clone(), + peer.clone(), + ); self.add_message(tombstone_msg); true } else { @@ -589,13 +599,24 @@ impl Peer { } } - fn tombstone_message_for_same_region(&self, peer: metapb::Peer) -> RaftMessage { - let region_id = self.region_id(); + fn tombstone_message( + &self, + region_id: u64, + region_epoch: metapb::RegionEpoch, + peer: metapb::Peer, + ) -> RaftMessage { let mut tombstone_message = RaftMessage::default(); + if self.region_id() != region_id { + // After merge, target region needs to GC peers of source region. + let extra_msg = tombstone_message.mut_extra_msg(); + extra_msg.set_type(ExtraMessageType::MsgGcPeerRequest); + let check_peer = extra_msg.mut_check_gc_peer(); + check_peer.set_from_region_id(self.region_id()); + } tombstone_message.set_region_id(region_id); tombstone_message.set_from_peer(self.peer().clone()); tombstone_message.set_to_peer(peer); - tombstone_message.set_region_epoch(self.region().get_region_epoch().clone()); + tombstone_message.set_region_epoch(region_epoch); tombstone_message.set_is_tombstone(true); tombstone_message } @@ -604,6 +625,10 @@ impl Peer { match msg.get_to_peer().get_id().cmp(&self.peer_id()) { cmp::Ordering::Less => { if let Some(msg) = build_peer_destroyed_report(msg) { + info!(self.logger, "peer reports destroyed"; + "from_peer" => ?msg.get_from_peer(), + "from_region_epoch" => ?msg.get_region_epoch(), + "to_peer_id" => ?msg.get_to_peer().get_id()); self.add_message(msg); } } @@ -675,6 +700,7 @@ impl Peer { && state.get_merged_records().iter().all(|p| { p.get_source_peers() .iter() + .chain(p.get_source_removed_records()) .all(|p| p.get_id() != gc_peer_id) }) { @@ -699,18 +725,33 @@ impl Peer { } let mut need_gc_ids = Vec::with_capacity(5); let gc_context = self.gc_peer_context(); + let mut tombstone_removed_records = + |region_id, region_epoch: &metapb::RegionEpoch, peer: &metapb::Peer| { + need_gc_ids.push(peer.get_id()); + if gc_context.confirmed_ids.contains(&peer.get_id()) { + return; + } + + let msg = self.tombstone_message(region_id, region_epoch.clone(), peer.clone()); + // For leader, it's OK to send gc message immediately. + let _ = ctx.trans.send(msg); + }; for peer in state.get_removed_records() { - need_gc_ids.push(peer.get_id()); - if gc_context.confirmed_ids.contains(&peer.get_id()) { - continue; + tombstone_removed_records(self.region_id(), self.region().get_region_epoch(), peer); + } + // For merge, we need to + // 1. ask source removed peers to destroy. + for record in state.get_merged_records() { + for peer in record.get_source_removed_records() { + tombstone_removed_records( + record.get_source_region_id(), + record.get_source_epoch(), + peer, + ); } - - let msg = self.tombstone_message_for_same_region(peer.clone()); - // For leader, it's OK to send gc message immediately. - let _ = ctx.trans.send(msg); } + // 2. ask target to check whether source should be deleted. for record in state.get_merged_records() { - // For merge, we ask target to check whether source should be deleted. for (source, target) in record .get_source_peers() .iter() diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index ba7170ac8c8..17845b5d0b8 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -418,9 +418,10 @@ impl Peer { return; } + let msg_type = msg.get_message().get_msg_type(); // This can be a message that sent when it's still a follower. Nevertheleast, // it's meaningless to continue to handle the request as callbacks are cleared. - if msg.get_message().get_msg_type() == MessageType::MsgReadIndex + if msg_type == MessageType::MsgReadIndex && self.is_leader() && (msg.get_message().get_from() == raft::INVALID_ID || msg.get_message().get_from() == self.peer_id()) @@ -429,14 +430,18 @@ impl Peer { return; } - if msg.get_message().get_msg_type() == MessageType::MsgReadIndex + if msg_type == MessageType::MsgReadIndex && self.is_leader() && self.on_step_read_index(ctx, msg.mut_message()) { // Read index has respond in `on_step_read_index`, // No need to step again. } else if let Err(e) = self.raft_group_mut().step(msg.take_message()) { - error!(self.logger, "raft step error"; "err" => ?e); + error!(self.logger, "raft step error"; + "from_peer" => ?msg.get_from_peer(), + "region_epoch" => ?msg.get_region_epoch(), + "message_type" => ?msg_type, + "err" => ?e); } else { let committed_index = self.raft_group().raft.raft_log.committed; self.report_commit_log_duration(ctx, pre_committed_index, committed_index); diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index f96ed2b7a45..95c4aed9349 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -169,6 +169,9 @@ pub struct Config { /// and try to alert monitoring systems, if there is any. pub abnormal_leader_missing_duration: ReadableDuration, pub peer_stale_state_check_interval: ReadableDuration, + /// Interval to check GC peers. + #[doc(hidden)] + pub gc_peer_check_interval: ReadableDuration, #[online_config(hidden)] pub leader_transfer_max_log_lag: u64, @@ -510,6 +513,7 @@ impl Default for Config { renew_leader_lease_advance_duration: ReadableDuration::secs(0), allow_unsafe_vote_after_start: false, report_region_buckets_tick_interval: ReadableDuration::secs(10), + gc_peer_check_interval: ReadableDuration::secs(60), max_snapshot_file_raw_size: ReadableSize::mb(100), unreachable_backoff: ReadableDuration::secs(10), // TODO: make its value reasonable @@ -1060,6 +1064,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["leader_transfer_max_log_lag"]) .set(self.leader_transfer_max_log_lag as f64); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["gc_peer_check_interval"]) + .set(self.gc_peer_check_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["snap_apply_batch_size"]) diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index c6f787df9a7..1ac6e3840f1 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -212,6 +212,7 @@ fn test_serde_custom_tikv_config() { max_leader_missing_duration: ReadableDuration::hours(12), abnormal_leader_missing_duration: ReadableDuration::hours(6), peer_stale_state_check_interval: ReadableDuration::hours(2), + gc_peer_check_interval: ReadableDuration::days(1), leader_transfer_max_log_lag: 123, snap_apply_batch_size: ReadableSize::mb(12), snap_apply_copy_symlink: true, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index ece8cabae49..fe1fa066ae8 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -183,6 +183,7 @@ max-peer-down-duration = "12m" max-leader-missing-duration = "12h" abnormal-leader-missing-duration = "6h" peer-stale-state-check-interval = "2h" +gc-peer-check-interval = "1d" leader-transfer-max-log-lag = 123 snap-apply-batch-size = "12MB" snap-apply-copy-symlink = true diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs index e940ca30a7c..f3b5704a586 100644 --- a/tests/integrations/raftstore/test_life.rs +++ b/tests/integrations/raftstore/test_life.rs @@ -11,7 +11,7 @@ use test_raftstore::{ new_learner_peer, new_peer, sleep_ms, Filter, FilterFactory, Simulator as S1, }; use test_raftstore_v2::Simulator as S2; -use tikv_util::{time::Instant, HandyRwLock}; +use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; struct ForwardFactory { node_id: u64, @@ -64,6 +64,7 @@ fn test_gc_peer_tiflash_engine() { let mut cluster_v1 = test_raftstore::new_node_cluster(1, 2); let mut cluster_v2 = test_raftstore_v2::new_node_cluster(1, 2); cluster_v1.cfg.raft_store.enable_v2_compatible_learner = true; + cluster_v2.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); cluster_v1.pd_client.disable_default_operator(); cluster_v2.pd_client.disable_default_operator(); let r11 = cluster_v1.run_conf_change(); @@ -144,6 +145,7 @@ fn test_gc_peer_tiflash_engine() { fn test_gc_removed_peer() { let mut cluster = test_raftstore::new_node_cluster(1, 2); cluster.cfg.raft_store.enable_v2_compatible_learner = true; + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); cluster.pd_client.disable_default_operator(); let region_id = cluster.run_conf_change(); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index ceb888a2b22..0b17ff72ae7 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1731,3 +1731,84 @@ fn test_prepare_merge_with_5_nodes_snapshot() { // Now leader should replicate more logs and figure out a safe index. pd_client.must_merge(left.get_id(), right.get_id()); } + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_peer_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + let left_peer_on_store3 = find_peer(&left, 3).unwrap().clone(); + pd_client.must_remove_peer(left.get_id(), left_peer_on_store3); + must_get_none(&cluster.get_engine(3), b"k1"); + + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + let right_peer_on_store3 = find_peer(&right, 3).unwrap().clone(); + cluster.add_send_filter(IsolationFilterFactory::new(3)); + pd_client.must_remove_peer(right.get_id(), right_peer_on_store3.clone()); + + // So cluster becomes + // left region: 1(leader) 2 | + // right region: 1(leader) 2 | 3 (removed but not yet destroyed) + // | means isolation. + + // Merge right to left. + pd_client.must_merge(right.get_id(), left.get_id()); + let region_state = cluster.region_local_state(left.get_id(), 1); + assert!( + !region_state.get_merged_records()[0] + .get_source_removed_records() + .is_empty(), + "{:?}", + region_state + ); + assert!( + !region_state + .get_removed_records() + .iter() + .any(|p| p.get_id() == right_peer_on_store3.get_id()), + "{:?}", + region_state + ); + + // Cluster filters and wait for gc peer ticks. + cluster.clear_send_filters(); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Right region replica on store 3 must be removed. + cluster.must_region_not_exist(right.get_id(), 3); + + let start = Instant::now(); + loop { + sleep_ms(cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + let region_state = cluster.region_local_state(left.get_id(), 1); + if (region_state.get_merged_records().is_empty() + || region_state.get_merged_records()[0] + .get_source_removed_records() + .is_empty()) + && region_state.get_removed_records().is_empty() + { + break; + } + if start.elapsed() > Duration::from_secs(5) { + panic!( + "source removed records and removed records must be empty, {:?}", + region_state + ); + } + } +} From bbfedd409b5965c04b9edcb34f0a0907c75d6dd2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:36:13 +0800 Subject: [PATCH 0933/1149] upgrade lz4-sys to 1.9.4 to tackle security issue (#15652) ref tikv/tikv#15621 upgrade lz4-sys to 1.9.4 to tackle security issue Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ba7b9d3499..e9f937e3266 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3483,9 +3483,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.2" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" dependencies = [ "cc", "libc 0.2.146", From 384aaeb381ffc8f9ac881432a00e437933777c55 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Mon, 25 Sep 2023 13:42:15 +0800 Subject: [PATCH 0934/1149] copr: fix cannot get the request source for resource control (#15606) close tikv/tikv#15663 copr: fix cannot get the request source for analyze with resource control Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../resource_control/src/resource_group.rs | 4 ++ src/server/service/kv.rs | 8 ++-- tests/integrations/raftstore/test_stats.rs | 40 +++++++++++++++++-- .../resource_metering/test_cpu.rs | 6 ++- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 0e40255b354..09e90e9dd01 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -239,6 +239,10 @@ impl ResourceGroupManager { rg: &str, request_source: &str, ) -> Option> { + fail_point!("only_check_source_task_name", |name| { + assert_eq!(name.clone().unwrap(), request_source.to_string()); + None + }); if let Some(group) = self.resource_groups.get(rg) { if !group.fallback_default { return group.get_resource_limiter(request_source); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 6f1cf0eaa1f..4a961eedf19 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1190,7 +1190,7 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { + Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -1199,7 +1199,7 @@ fn handle_batch_commands_request( .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = future_copr(copr, Some(peer.to_string()), req) .map_ok(|resp| { resp.map(oneof!(batch_commands_response::response::Cmd::Coprocessor)) @@ -1224,7 +1224,7 @@ fn handle_batch_commands_request( String::default(), ); } - $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { + $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); @@ -1233,7 +1233,7 @@ fn handle_batch_commands_request( .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); - let source = req.mut_context().take_request_source(); + let source = req.get_context().get_request_source().to_owned(); let resp = $future_fn($($arg,)* req) .map_ok(oneof!(batch_commands_response::response::Cmd::$cmd)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.$metric_name.inc()); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 073382ced17..7701fe167c8 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -12,6 +12,7 @@ use futures::{executor::block_on, SinkExt, StreamExt}; use grpcio::*; use kvproto::{kvrpcpb::*, pdpb::QueryKind, tikvpb::*, tikvpb_grpc::TikvClient}; use pd_client::PdClient; +use test_coprocessor::{DagSelect, ProductTable}; use test_raftstore::*; use tikv_util::{config::*, store::QueryStats}; use txn_types::Key; @@ -388,9 +389,34 @@ fn test_txn_query_stats_tmpl() { } assert!(flag); }); + let batch_coprocessor: Box = + Box::new(|ctx, cluster, client, store_id, region_id, start_key| { + let mut flag = false; + for i in 0..3 { + let coprocessor: Box = Box::new(|ctx, _start_key| { + let mut req = BatchCommandsRequestRequest::new(); + let table = ProductTable::new(); + let mut cop_req = DagSelect::from(&table).build(); + cop_req.set_context(ctx.clone()); + req.set_coprocessor(cop_req); + req + }); + if i == 0 { + batch_commands(&ctx, &client, coprocessor, &start_key); + } + // here cannot read any data, so expect is 0. may need fix. here mainly used to + // verify the request source is as expect. + if check_query_num_read(cluster, store_id, region_id, QueryKind::Coprocessor, 0) { + flag = true; + break; + } + } + assert!(flag); + }); fail::cfg("mock_hotspot_threshold", "return(0)").unwrap(); fail::cfg("mock_tick_interval", "return(0)").unwrap(); fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); + fail::cfg("only_check_source_task_name", "return(test_stats)").unwrap(); test_query_num::(get, false, true); test_query_num::(batch_get, false, true); test_query_num::(scan, false, true); @@ -398,12 +424,14 @@ fn test_txn_query_stats_tmpl() { // requests may failed caused by `EpochNotMatch` after split when auto split is // enabled, disable it. test_query_num::(batch_get_command, false, false); + test_query_num::(batch_coprocessor, false, false); test_txn_delete_query::(); test_pessimistic_lock(); test_rollback(); fail::remove("mock_tick_interval"); fail::remove("mock_hotspot_threshold"); fail::remove("mock_collect_tick_interval"); + fail::remove("only_check_source_task_name"); } #[allow(clippy::extra_unused_type_parameters)] @@ -488,10 +516,11 @@ fn put( } fn test_pessimistic_lock() { - let (cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); }); + ctx.set_request_source("test_stats".to_owned()); let key = b"key2".to_vec(); let store_id = 1; put(&cluster, &client, &ctx, store_id, key.clone()); @@ -528,9 +557,10 @@ fn test_pessimistic_lock() { } pub fn test_rollback() { - let (cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); }); + ctx.set_request_source("test_stats".to_owned()); let key = b"key2".to_vec(); let store_id = 1; put(&cluster, &client, &ctx, store_id, key.clone()); @@ -575,6 +605,7 @@ fn test_query_num(query: Box, is_raw_kv: bool, auto_split: b cluster.cfg.server.enable_request_batch = false; }); ctx.set_api_version(F::CLIENT_TAG); + ctx.set_request_source("test_stats".to_owned()); let mut k = b"key".to_vec(); // When a peer becomes leader, it can't read before committing to current term. @@ -602,6 +633,7 @@ fn test_raw_delete_query() { cluster.cfg.storage.set_api_version(F::TAG); }); ctx.set_api_version(F::CLIENT_TAG); + ctx.set_request_source("test_stats".to_owned()); raw_put::(&cluster, &client, &ctx, store_id, k.clone()); // Raw Delete @@ -627,10 +659,10 @@ fn test_txn_delete_query() { let store_id = 1; { - let (cluster, client, ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { + let (cluster, client, mut ctx) = must_new_and_configure_cluster_and_kv_client(|cluster| { cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); }); - + ctx.set_request_source("test_stats".to_owned()); put(&cluster, &client, &ctx, store_id, k.clone()); // DeleteRange let mut delete_req = DeleteRangeRequest::default(); diff --git a/tests/integrations/resource_metering/test_cpu.rs b/tests/integrations/resource_metering/test_cpu.rs index c15bf445ed3..12d6fa4fbe0 100644 --- a/tests/integrations/resource_metering/test_cpu.rs +++ b/tests/integrations/resource_metering/test_cpu.rs @@ -12,6 +12,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use futures::{executor::block_on, StreamExt}; use kvproto::kvrpcpb::Context; +use resource_control::ResourceGroupManager; use test_coprocessor::{DagSelect, Insert, ProductTable, Store}; use tidb_query_datatype::codec::Datum; use tikv::{ @@ -95,7 +96,10 @@ pub fn test_reschedule_coprocessor() { let mut req = DagSelect::from(&table).build(); let mut ctx = Context::default(); ctx.set_resource_group_tag(tag.as_bytes().to_vec()); + ctx.set_request_source("test".to_owned()); req.set_context(ctx); + fail::cfg("only_check_source_task_name", "return(test)").unwrap(); + defer!(fail::remove("only_check_source_task_name")); assert!( !block_on(endpoint.parse_and_handle_unary_request(req, None)) .consume() @@ -229,7 +233,7 @@ fn setup_test_suite() -> (TestSuite, Store, Endpoint) cm, test_suite.get_tag_factory(), Arc::new(QuotaLimiter::default()), - None, + Some(Arc::new(ResourceGroupManager::default())), ); (test_suite, store, endpoint) } From e01c97891e6520f48e93a507d21c1f2ae0915dbf Mon Sep 17 00:00:00 2001 From: qupeng Date: Mon, 25 Sep 2023 16:42:16 +0800 Subject: [PATCH 0935/1149] resolved-ts: speed up advancing when stores get partitioned (#15567) close tikv/tikv#15679 Signed-off-by: qupeng --- components/resolved_ts/src/advance.rs | 100 ++++++++++-------- .../resolved_ts/tests/integrations/mod.rs | 31 +++++- 2 files changed, 86 insertions(+), 45 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index 59478f5affb..dd6e9c2002c 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -167,10 +167,7 @@ pub struct LeadershipResolver { // store_id -> check leader request, record the request to each stores. store_req_map: HashMap, - // region_id -> region, cache the information of regions. - region_map: HashMap>, - // region_id -> peers id, record the responses. - resp_map: HashMap>, + progresses: HashMap, checking_regions: HashSet, valid_regions: HashSet, @@ -196,8 +193,7 @@ impl LeadershipResolver { region_read_progress, store_req_map: HashMap::default(), - region_map: HashMap::default(), - resp_map: HashMap::default(), + progresses: HashMap::default(), valid_regions: HashSet::default(), checking_regions: HashSet::default(), last_gc_time: Instant::now_coarse(), @@ -209,8 +205,7 @@ impl LeadershipResolver { let now = Instant::now_coarse(); if now - self.last_gc_time > self.gc_interval { self.store_req_map = HashMap::default(); - self.region_map = HashMap::default(); - self.resp_map = HashMap::default(); + self.progresses = HashMap::default(); self.valid_regions = HashSet::default(); self.checking_regions = HashSet::default(); self.last_gc_time = now; @@ -222,10 +217,7 @@ impl LeadershipResolver { v.regions.clear(); v.ts = 0; } - for v in self.region_map.values_mut() { - v.clear(); - } - for v in self.resp_map.values_mut() { + for v in self.progresses.values_mut() { v.clear(); } self.checking_regions.clear(); @@ -252,8 +244,7 @@ impl LeadershipResolver { let store_id = self.store_id; let valid_regions = &mut self.valid_regions; - let region_map = &mut self.region_map; - let resp_map = &mut self.resp_map; + let progresses = &mut self.progresses; let store_req_map = &mut self.store_req_map; let checking_regions = &mut self.checking_regions; for region_id in ®ions { @@ -275,13 +266,13 @@ impl LeadershipResolver { } let leader_info = core.get_leader_info(); + let prog = progresses + .entry(*region_id) + .or_insert_with(|| RegionProgress::new(peer_list.len())); let mut unvotes = 0; for peer in peer_list { if peer.store_id == store_id && peer.id == leader_id { - resp_map - .entry(*region_id) - .or_insert_with(|| Vec::with_capacity(peer_list.len())) - .push(store_id); + prog.resps.push(store_id); } else { // It's still necessary to check leader on learners even if they don't vote // because performing stale read on learners require it. @@ -299,15 +290,14 @@ impl LeadershipResolver { } } } + // Check `region_has_quorum` here because `store_map` can be empty, // in which case `region_has_quorum` won't be called any more. - if unvotes == 0 && region_has_quorum(peer_list, &resp_map[region_id]) { + if unvotes == 0 && region_has_quorum(peer_list, &prog.resps) { + prog.resolved = true; valid_regions.insert(*region_id); } else { - region_map - .entry(*region_id) - .or_insert_with(|| Vec::with_capacity(peer_list.len())) - .extend_from_slice(peer_list); + prog.peers.extend_from_slice(peer_list); } } }); @@ -321,7 +311,6 @@ impl LeadershipResolver { .values() .find(|req| !req.regions.is_empty()) .map_or(0, |req| req.regions[0].compute_size()); - let store_count = store_req_map.len(); let mut check_leader_rpcs = Vec::with_capacity(store_req_map.len()); for (store_id, req) in store_req_map { if req.regions.is_empty() { @@ -387,6 +376,7 @@ impl LeadershipResolver { .with_label_values(&["all"]) .observe(start.saturating_elapsed_secs()); }); + let rpc_count = check_leader_rpcs.len(); for _ in 0..rpc_count { // Use `select_all` to avoid the process getting blocked when some @@ -396,10 +386,16 @@ impl LeadershipResolver { match res { Ok((to_store, resp)) => { for region_id in resp.regions { - resp_map - .entry(region_id) - .or_insert_with(|| Vec::with_capacity(store_count)) - .push(to_store); + if let Some(prog) = progresses.get_mut(®ion_id) { + if prog.resolved { + continue; + } + prog.resps.push(to_store); + if region_has_quorum(&prog.peers, &prog.resps) { + prog.resolved = true; + valid_regions.insert(region_id); + } + } } } Err((to_store, reconnect, err)) => { @@ -409,24 +405,19 @@ impl LeadershipResolver { } } } - } - for (region_id, prs) in region_map { - if prs.is_empty() { - // The peer had the leadership before, but now it's no longer - // the case. Skip checking the region. - continue; - } - if let Some(resp) = resp_map.get(region_id) { - if resp.is_empty() { - // No response, maybe the peer lost leadership. - continue; - } - if region_has_quorum(prs, resp) { - valid_regions.insert(*region_id); - } + if valid_regions.len() >= progresses.len() { + break; } } - self.valid_regions.drain().collect() + let res: Vec = self.valid_regions.drain().collect(); + if res.len() != checking_regions.len() { + warn!( + "check leader returns valid regions different from checking regions"; + "valid_regions" => res.len(), + "checking_regions" => checking_regions.len(), + ); + } + res } } @@ -552,6 +543,27 @@ async fn get_tikv_client( Ok(cli) } +struct RegionProgress { + resolved: bool, + peers: Vec, + resps: Vec, +} + +impl RegionProgress { + fn new(len: usize) -> Self { + RegionProgress { + resolved: false, + peers: Vec::with_capacity(len), + resps: Vec::with_capacity(len), + } + } + fn clear(&mut self) { + self.resolved = false; + self.peers.clear(); + self.resps.clear(); + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/components/resolved_ts/tests/integrations/mod.rs b/components/resolved_ts/tests/integrations/mod.rs index 634aa66c601..881d0b299f1 100644 --- a/components/resolved_ts/tests/integrations/mod.rs +++ b/components/resolved_ts/tests/integrations/mod.rs @@ -9,9 +9,10 @@ use kvproto::{kvrpcpb::*, metapb::RegionEpoch}; use pd_client::PdClient; use resolved_ts::Task; use tempfile::Builder; -use test_raftstore::sleep_ms; +use test_raftstore::{sleep_ms, IsolationFilterFactory}; use test_sst_importer::*; pub use testsuite::*; +use tikv_util::store::new_peer; #[test] fn test_resolved_ts_basic() { @@ -231,3 +232,31 @@ fn test_scan_log_memory_quota_exceeded() { suite.stop(); } + +// This case checks resolved ts can still be advanced quickly even if some TiKV +// stores are partitioned. +#[test] +fn test_store_partitioned() { + let mut suite = TestSuite::new(3); + let r = suite.cluster.get_region(&[]); + suite.cluster.must_transfer_leader(r.id, new_peer(1, 1)); + suite.must_get_rts_ge(r.id, block_on(suite.cluster.pd_client.get_tso()).unwrap()); + + suite + .cluster + .add_send_filter(IsolationFilterFactory::new(3)); + let tso = block_on(suite.cluster.pd_client.get_tso()).unwrap(); + for _ in 0..50 { + let rts = suite.region_resolved_ts(r.id).unwrap(); + if rts > tso { + if rts.physical() - tso.physical() < 3000 { + break; + } else { + panic!("resolved ts doesn't advance in time") + } + } + sleep_ms(100); + } + + suite.stop(); +} From b95f5cd0353506d728d0a50b7a898b503de072e1 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 25 Sep 2023 17:07:47 +0800 Subject: [PATCH 0936/1149] build: add missing failpoint feature for raft-engine (#15676) ref tikv/tikv#15462 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.toml | 2 +- tests/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c4c70e999be..81be4d36906 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ snmalloc = ["tikv_alloc/snmalloc"] portable = ["engine_rocks/portable"] sse = ["engine_rocks/sse"] mem-profiling = ["tikv_alloc/mem-profiling"] -failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints"] +failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints", "raft_log_engine/failpoints"] cloud-aws = ["encryption_export/cloud-aws", "sst_importer/cloud-aws"] cloud-gcp = ["encryption_export/cloud-gcp", "sst_importer/cloud-gcp"] cloud-azure = ["encryption_export/cloud-azure", "sst_importer/cloud-azure"] diff --git a/tests/Cargo.toml b/tests/Cargo.toml index f3928e97eb8..0081d5e95bc 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -41,7 +41,7 @@ path = "benches/deadlock_detector/mod.rs" [features] default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints", "raft_log_engine/failpoints"] +failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] cloud-aws = ["external_storage_export/cloud-aws"] cloud-gcp = ["external_storage_export/cloud-gcp"] cloud-azure = ["external_storage_export/cloud-azure"] From 8fb721ef18a9e1ba354e5a91d780ed6647641ab9 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 26 Sep 2023 07:01:45 +0800 Subject: [PATCH 0937/1149] raftstore-v2: adjust lockcf default write buffer size and limit (#15678) close tikv/tikv#15630 Signed-off-by: glorv Co-authored-by: tonyxuqqi --- src/config/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 8a2fa291ff1..63e36a543dc 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -110,7 +110,7 @@ const RAFT_ENGINE_MEMORY_LIMIT_RATE: f64 = 0.15; const WRITE_BUFFER_MEMORY_LIMIT_RATE: f64 = 0.2; // Too large will increase Raft Engine memory usage. const WRITE_BUFFER_MEMORY_LIMIT_MAX: u64 = ReadableSize::gb(8).0; -const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(32); +const DEFAULT_LOCK_BUFFER_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(128); /// Configs that actually took effect in the last run pub const LAST_CONFIG_FILE: &str = "last_tikv.toml"; @@ -1403,7 +1403,7 @@ impl DbConfig { self.writecf.max_compactions.get_or_insert(1); self.lockcf .write_buffer_size - .get_or_insert(ReadableSize::mb(4)); + .get_or_insert(ReadableSize::mb(32)); self.lockcf .write_buffer_limit .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); From 312e0fb7f9f77e6002d0a336a58e84f3c4c12216 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 26 Sep 2023 13:22:46 +0800 Subject: [PATCH 0938/1149] *: Revert "*: update rust-toolchain (#15584)" (#15683) close tikv/tikv#15653 Signed-off-by: glorv Signed-off-by: tonyxuqqi Co-authored-by: tonyxuqqi --- Cargo.lock | 627 ++---------------- cmd/tikv-ctl/src/fork_readonly_tikv.rs | 1 - cmd/tikv-ctl/src/main.rs | 2 +- components/backup-stream/Cargo.toml | 2 +- components/backup-stream/src/errors.rs | 4 +- .../backup-stream/src/metadata/client.rs | 5 +- components/backup-stream/src/router.rs | 5 +- .../backup-stream/src/subscription_track.rs | 2 +- components/backup-stream/src/utils.rs | 4 +- components/backup/src/endpoint.rs | 6 +- components/batch-system/src/fsm.rs | 8 +- components/case_macros/src/lib.rs | 10 +- components/cdc/src/delegate.rs | 2 +- components/cdc/src/endpoint.rs | 6 +- .../concurrency_manager/src/lock_table.rs | 4 +- components/coprocessor_plugin_api/src/util.rs | 4 - components/encryption/src/config.rs | 9 +- components/engine_rocks/src/logger.rs | 2 + components/engine_rocks/src/properties.rs | 15 +- .../engine_tirocks/src/properties/mvcc.rs | 2 +- .../engine_tirocks/src/properties/range.rs | 10 +- components/engine_traits/src/flush.rs | 2 +- components/engine_traits/src/lib.rs | 4 +- components/engine_traits/src/tablet.rs | 2 +- .../online_config_derive/src/lib.rs | 14 +- components/raftstore-v2/src/batch/store.rs | 6 +- components/raftstore-v2/src/lib.rs | 1 - .../operation/command/admin/merge/prepare.rs | 4 +- .../src/operation/command/admin/split.rs | 4 +- .../command/admin/transfer_leader.rs | 20 +- components/raftstore-v2/src/operation/life.rs | 8 +- .../raftstore-v2/src/operation/query/local.rs | 4 +- .../src/operation/ready/apply_trace.rs | 2 +- .../src/operation/ready/snapshot.rs | 14 +- .../raftstore-v2/src/operation/txn_ext.rs | 4 +- .../src/operation/unsafe_recovery/demote.rs | 5 +- .../src/worker/cleanup/compact.rs | 16 +- .../raftstore-v2/src/worker/pd/region.rs | 15 +- .../raftstore-v2/src/worker/pd/split.rs | 6 +- components/raftstore-v2/src/worker/tablet.rs | 13 +- .../tests/integrations/cluster.rs | 4 +- .../raftstore/src/coprocessor/dispatcher.rs | 5 +- components/raftstore/src/errors.rs | 2 +- components/raftstore/src/lib.rs | 4 +- .../raftstore/src/store/async_io/write.rs | 6 +- .../raftstore/src/store/entry_storage.rs | 8 +- components/raftstore/src/store/fsm/apply.rs | 18 +- components/raftstore/src/store/fsm/peer.rs | 8 +- components/raftstore/src/store/msg.rs | 24 +- components/raftstore/src/store/peer.rs | 48 +- .../raftstore/src/store/peer_storage.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 6 +- .../raftstore/src/store/simple_write.rs | 24 +- components/raftstore/src/store/snap.rs | 4 +- components/raftstore/src/store/snap/io.rs | 4 +- components/raftstore/src/store/txn_ext.rs | 2 +- components/raftstore/src/store/util.rs | 3 +- components/raftstore/src/store/worker/pd.rs | 20 +- components/raftstore/src/store/worker/read.rs | 3 +- .../raftstore/src/store/worker/region.rs | 4 +- .../raftstore/src/store/worker/split_check.rs | 8 +- .../src/store/worker/split_controller.rs | 11 +- components/resolved_ts/src/cmd.rs | 6 +- components/resolved_ts/src/endpoint.rs | 20 +- components/resolved_ts/src/scanner.rs | 3 +- .../resource_control/src/resource_group.rs | 6 +- components/resource_metering/src/lib.rs | 2 +- components/resource_metering/src/model.rs | 2 +- .../src/recorder/sub_recorder/cpu.rs | 4 +- .../resource_metering/tests/recorder_test.rs | 12 +- components/server/src/common.rs | 4 +- components/snap_recovery/src/leader_keeper.rs | 4 +- components/sst_importer/src/import_mode2.rs | 2 +- components/sst_importer/src/sst_importer.rs | 17 +- components/sst_importer/src/util.rs | 3 +- components/test_coprocessor/src/store.rs | 2 +- .../example_plugin/src/lib.rs | 2 +- components/test_pd/src/server.rs | 8 +- components/test_pd_client/src/pd.rs | 2 +- components/test_raftstore-v2/src/cluster.rs | 3 +- components/test_raftstore-v2/src/lib.rs | 2 - components/test_raftstore-v2/src/node.rs | 2 +- components/test_raftstore-v2/src/server.rs | 14 +- components/test_raftstore/src/lib.rs | 2 - components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 8 +- .../tidb_query_codegen/src/rpn_function.rs | 35 +- .../src/codec/collation/mod.rs | 2 +- .../tidb_query_datatype/src/codec/convert.rs | 12 +- .../src/codec/data_type/mod.rs | 2 +- .../src/codec/data_type/scalar.rs | 17 +- .../tidb_query_datatype/src/codec/datum.rs | 8 +- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/duration.rs | 4 +- .../src/codec/mysql/json/comparison.rs | 4 +- .../src/codec/mysql/json/jcodec.rs | 8 +- .../src/codec/mysql/json/json_modify.rs | 2 +- .../src/codec/mysql/time/mod.rs | 13 +- .../src/codec/mysql/time/tz.rs | 4 - .../src/codec/row/v2/row_slice.rs | 2 +- .../tidb_query_datatype/src/codec/table.rs | 2 +- .../src/index_scan_executor.rs | 4 +- components/tidb_query_executors/src/runner.rs | 18 +- .../src/selection_executor.rs | 4 +- .../src/util/aggr_executor.rs | 4 +- .../tidb_query_executors/src/util/mod.rs | 4 +- components/tidb_query_expr/src/impl_cast.rs | 2 +- .../tidb_query_expr/src/impl_miscellaneous.rs | 5 +- components/tidb_query_expr/src/impl_string.rs | 6 +- components/tidb_query_expr/src/lib.rs | 2 - .../tidb_query_expr/src/types/expr_eval.rs | 11 +- components/tikv_kv/src/cursor.rs | 2 +- components/tikv_kv/src/lib.rs | 1 - components/tikv_util/src/logger/formatter.rs | 6 +- components/tikv_util/src/lru.rs | 2 +- components/tikv_util/src/memory.rs | 2 +- .../src/metrics/allocator_metrics.rs | 2 +- components/tikv_util/src/mpsc/future.rs | 2 - components/tikv_util/src/sys/cpu_time.rs | 2 +- components/tikv_util/src/timer.rs | 4 +- components/txn_types/src/timestamp.rs | 10 +- components/txn_types/src/types.rs | 18 +- rust-toolchain | 2 +- src/config/mod.rs | 20 +- src/coprocessor/metrics.rs | 2 +- src/coprocessor/mod.rs | 2 - src/import/sst_service.rs | 6 +- src/lib.rs | 3 +- src/server/debug2.rs | 2 +- src/server/gc_worker/compaction_filter.rs | 1 - src/server/gc_worker/gc_manager.rs | 8 +- src/server/gc_worker/gc_worker.rs | 14 +- src/server/lock_manager/deadlock.rs | 9 +- src/server/raftkv/mod.rs | 5 +- src/server/raftkv2/mod.rs | 4 +- src/server/raftkv2/node.rs | 4 +- src/server/service/debug.rs | 1 + src/server/service/diagnostics/log.rs | 18 +- src/server/service/diagnostics/sys.rs | 2 +- src/server/service/kv.rs | 1 + src/storage/lock_manager/lock_wait_context.rs | 12 +- .../lock_manager/lock_waiting_queue.rs | 7 +- src/storage/metrics.rs | 2 +- src/storage/mod.rs | 32 +- src/storage/mvcc/reader/point_getter.rs | 2 +- src/storage/mvcc/reader/reader.rs | 21 +- src/storage/mvcc/reader/scanner/forward.rs | 4 +- src/storage/raw/raw_mvcc.rs | 2 +- src/storage/txn/actions/prewrite.rs | 2 + src/storage/txn/commands/atomic_store.rs | 4 +- src/storage/txn/commands/prewrite.rs | 26 +- src/storage/txn/latch.rs | 20 +- src/storage/txn/sched_pool.rs | 2 +- tests/Cargo.toml | 1 - .../benches/coprocessor_executors/util/mod.rs | 2 +- tests/benches/hierarchy/mvcc/mod.rs | 2 +- .../misc/coprocessor/codec/chunk/chunk.rs | 176 ----- .../misc/coprocessor/codec/chunk/mod.rs | 140 ---- tests/benches/misc/coprocessor/codec/mod.rs | 1 - tests/benches/misc/raftkv/mod.rs | 2 - tests/benches/raftstore/mod.rs | 2 +- tests/failpoints/cases/mod.rs | 3 - tests/failpoints/cases/test_disk_full.rs | 8 +- tests/failpoints/cases/test_engine.rs | 1 - tests/failpoints/cases/test_hibernate.rs | 1 - tests/failpoints/cases/test_merge.rs | 13 +- tests/failpoints/cases/test_pd_client.rs | 1 - .../failpoints/cases/test_pd_client_legacy.rs | 1 - tests/failpoints/cases/test_rawkv.rs | 2 +- .../cases/test_read_execution_tracker.rs | 11 +- tests/failpoints/cases/test_split_region.rs | 3 +- tests/failpoints/cases/test_storage.rs | 4 +- tests/failpoints/cases/test_transaction.rs | 2 +- .../failpoints/cases/test_transfer_leader.rs | 4 +- tests/integrations/backup/mod.rs | 1 - tests/integrations/import/test_apply_log.rs | 2 +- tests/integrations/mod.rs | 2 - .../integrations/raftstore/test_bootstrap.rs | 4 +- .../raftstore/test_compact_lock_cf.rs | 4 +- tests/integrations/raftstore/test_stats.rs | 1 - 180 files changed, 600 insertions(+), 1486 deletions(-) delete mode 100644 tests/benches/misc/coprocessor/codec/chunk/chunk.rs delete mode 100644 tests/benches/misc/coprocessor/codec/chunk/mod.rs diff --git a/Cargo.lock b/Cargo.lock index e9f937e3266..124a87f069e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", "once_cell", "version_check 0.9.4", ] @@ -59,8 +59,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if 1.0.0", - "const-random", - "getrandom 0.2.10", "once_cell", "version_check 0.9.4", ] @@ -80,21 +78,6 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc 0.2.146", -] - [[package]] name = "ansi_term" version = "0.11.0" @@ -148,217 +131,6 @@ dependencies = [ "nodrop", ] -[[package]] -name = "arrow" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a8801ebb147ad240b2d978d3ab9f73c9ccd4557ba6a03e7800496770ed10e0" -dependencies = [ - "ahash 0.8.3", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "895263144bd4a69751cbe6a34a53f26626e19770b313a9fa792c415cd0e78f11" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-array" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226fdc6c3a4ae154a74c24091d36a90b514f0ed7112f5b8322c1d8f354d8e20d" -dependencies = [ - "ahash 0.8.3", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "hashbrown 0.14.0", - "num 0.4.1", -] - -[[package]] -name = "arrow-buffer" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4843af4dd679c2f35b69c572874da8fde33be53eb549a5fb128e7a4b763510" -dependencies = [ - "bytes", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-cast" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e8b9990733a9b635f656efda3c9b8308c7a19695c9ec2c7046dd154f9b144b" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "chrono", - "half 2.3.1", - "lexical-core", - "num 0.4.1", -] - -[[package]] -name = "arrow-csv" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646fbb4e11dd0afb8083e883f53117713b8caadb4413b3c9e63e3f535da3683c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da900f31ff01a0a84da0572209be72b2b6f980f3ea58803635de47913191c188" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-ipc" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2707a8d7ee2d345d045283ece3ae43416175873483e5d96319c929da542a0b1f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "flatbuffers", -] - -[[package]] -name = "arrow-json" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1b91a63c356d14eedc778b76d66a88f35ac8498426bb0799a769a49a74a8b4" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half 2.3.1", - "indexmap 2.0.0", - "lexical-core", - "num 0.4.1", - "serde", - "serde_json", -] - -[[package]] -name = "arrow-ord" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "584325c91293abbca7aaaabf8da9fe303245d641f5f4a18a6058dc68009c7ebf" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "half 2.3.1", - "num 0.4.1", -] - -[[package]] -name = "arrow-row" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e32afc1329f7b372463b21c6ca502b07cf237e1ed420d87706c1770bb0ebd38" -dependencies = [ - "ahash 0.8.3", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half 2.3.1", - "hashbrown 0.14.0", -] - -[[package]] -name = "arrow-schema" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b104f5daa730f00fde22adc03a12aa5a2ae9ccbbf99cbd53d284119ddc90e03d" - -[[package]] -name = "arrow-select" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b3ca55356d1eae07cf48808d8c462cea674393ae6ad1e0b120f40b422eb2b4" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num 0.4.1", -] - -[[package]] -name = "arrow-string" -version = "46.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1433ce02590cae68da0a18ed3a3ed868ffac2c6f24c533ddd2067f7ee04b4a" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "num 0.4.1", - "regex", - "regex-syntax 0.7.5", -] - [[package]] name = "async-channel" version = "1.6.1" @@ -611,7 +383,7 @@ dependencies = [ "bytes", "dyn-clone", "futures 0.3.15", - "getrandom 0.2.10", + "getrandom 0.2.3", "http-types", "log", "paste", @@ -795,7 +567,7 @@ dependencies = [ "futures-io", "grpcio", "hex 0.4.2", - "indexmap 1.9.3", + "indexmap", "kvproto", "lazy_static", "log_wrappers", @@ -1011,9 +783,9 @@ checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" [[package]] name = "bytes" -version = "1.5.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" dependencies = [ "serde", ] @@ -1112,12 +884,11 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" dependencies = [ "jobserver", - "libc 0.2.146", ] [[package]] @@ -1189,17 +960,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", + "num-integer", "num-traits", "serde", - "wasm-bindgen", - "windows-targets", + "time 0.1.42", ] [[package]] @@ -1247,7 +1015,7 @@ dependencies = [ "atty", "bitflags", "clap_derive", - "indexmap 1.9.3", + "indexmap", "lazy_static", "os_str_bytes", "strsim 0.10.0", @@ -1346,28 +1114,6 @@ dependencies = [ "cache-padded", ] -[[package]] -name = "const-random" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" -dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" -dependencies = [ - "getrandom 0.2.10", - "once_cell", - "proc-macro-hack", - "tiny-keccak", -] - [[package]] name = "const_format" version = "0.2.30" @@ -1409,9 +1155,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" [[package]] name = "cpu-time" @@ -1590,12 +1336,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - [[package]] name = "crypto-common" version = "0.1.6" @@ -2017,12 +1757,6 @@ dependencies = [ "termcolor", ] -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - [[package]] name = "errno" version = "0.2.8" @@ -2303,16 +2037,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags", - "rustc_version 0.4.0", -] - [[package]] name = "flate2" version = "1.0.11" @@ -2644,14 +2368,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if 1.0.0", "js-sys", "libc 0.2.146", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.10.2+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2755,7 +2479,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap", "slab", "tokio", "tokio-util", @@ -2768,22 +2492,11 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" -[[package]] -name = "half" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" -dependencies = [ - "cfg-if 1.0.0", - "crunchy", - "num-traits", -] - [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] name = "hashbrown" @@ -3003,29 +2716,6 @@ dependencies = [ "tokio-native-tls", ] -[[package]] -name = "iana-time-zone" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "ident_case" version = "1.0.1" @@ -3051,22 +2741,12 @@ checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" [[package]] name = "indexmap" -version = "1.9.3" +version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" dependencies = [ "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown 0.14.0", + "hashbrown 0.9.1", ] [[package]] @@ -3083,7 +2763,7 @@ checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ "ahash 0.7.4", "atty", - "indexmap 1.9.3", + "indexmap", "itoa 1.0.1", "lazy_static", "log", @@ -3234,7 +2914,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d63b6407b66fc81fc539dccf3ddecb669f393c5101b6a2be3976c95099a06e8" dependencies = [ - "indexmap 1.9.3", + "indexmap", ] [[package]] @@ -3274,70 +2954,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" -[[package]] -name = "lexical-core" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92912c4af2e7d9075be3e5e3122c4d7263855fa6cce34fbece4dd08e5884624d" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f518eed87c3be6debe6d26b855c97358d8a11bf05acec137e5f53080f5ad2dd8" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc852ec67c6538bbb2b9911116a385b24510e879a69ab516e6a151b15a79168" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c72a9d52c5c4e62fa2cdc2cb6c694a39ae1382d9c2a17a466f18e272a0930eb1" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a89ec1d062e481210c309b672f73a0567b7855f21e7d2fae636df44d12e97f9" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "094060bd2a7c2ff3a16d5304a6ae82727cb3cc9d1c70f813cc73f744c319337e" -dependencies = [ - "lexical-util", - "static_assertions", -] - [[package]] name = "libc" version = "0.1.12" @@ -3370,12 +2986,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "libm" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" - [[package]] name = "libmimalloc-sys" version = "0.1.21" @@ -3866,35 +3476,10 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab3e176191bc4faad357e3122c4747aa098ac880e88b168f106386128736cf4a" dependencies = [ - "num-complex 0.3.0", - "num-integer", - "num-iter", - "num-rational 0.3.0", - "num-traits", -] - -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex 0.4.4", + "num-complex", "num-integer", "num-iter", - "num-rational 0.4.1", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", + "num-rational", "num-traits", ] @@ -3907,15 +3492,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-complex" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" -dependencies = [ - "num-traits", -] - [[package]] name = "num-derive" version = "0.3.0" @@ -3950,9 +3526,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" dependencies = [ "autocfg", "num-traits", @@ -3960,9 +3536,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" dependencies = [ "autocfg", "num-integer", @@ -3980,26 +3556,13 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ "autocfg", - "libm", ] [[package]] @@ -4029,7 +3592,7 @@ checksum = "80e47cfc4c0a1a519d9a025ebfbac3a2439d1b5cdf397d72dcb79b11d9920dab" dependencies = [ "base64 0.13.0", "chrono", - "getrandom 0.2.10", + "getrandom 0.2.3", "http", "rand 0.8.5", "serde", @@ -4320,7 +3883,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" dependencies = [ "fixedbitset", - "indexmap 1.9.3", + "indexmap", ] [[package]] @@ -5052,7 +4615,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", ] [[package]] @@ -5145,19 +4708,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", "redox_syscall 0.2.11", ] [[package]] name = "regex" -version = "1.7.3" +version = "1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.6.29", + "regex-syntax", ] [[package]] @@ -5171,15 +4734,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" [[package]] name = "remove_dir_all" @@ -5722,7 +5279,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" dependencies = [ - "half 1.8.2", + "half", "serde", ] @@ -5752,7 +5309,7 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" dependencies = [ - "indexmap 1.9.3", + "indexmap", "itoa 0.4.4", "ryu", "serde", @@ -6684,7 +6241,6 @@ name = "tests" version = "0.0.1" dependencies = [ "api_version", - "arrow", "async-trait", "batch-system", "byteorder", @@ -6883,7 +6439,7 @@ dependencies = [ "log_wrappers", "match-template", "nom 7.1.0", - "num 0.3.0", + "num", "num-derive 0.3.0", "num-traits", "ordered-float", @@ -6945,7 +6501,7 @@ dependencies = [ "hex 0.4.2", "log_wrappers", "match-template", - "num 0.3.0", + "num", "num-traits", "openssl", "panic_hook", @@ -7362,15 +6918,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinytemplate" version = "1.2.0" @@ -7572,7 +7119,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap 1.9.3", + "indexmap", "pin-project", "pin-project-lite", "rand 0.8.5", @@ -7810,7 +7357,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", "serde", ] @@ -7820,7 +7367,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ - "getrandom 0.2.10", + "getrandom 0.2.3", ] [[package]] @@ -7896,6 +7443,12 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -8034,15 +7587,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.32.0" @@ -8062,42 +7606,21 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm 0.42.0", + "windows_aarch64_gnullvm", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm 0.42.0", + "windows_x86_64_gnullvm", "windows_x86_64_msvc 0.42.0", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -8110,12 +7633,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -8128,12 +7645,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -8146,12 +7657,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -8164,24 +7669,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -8194,12 +7687,6 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "winreg" version = "0.7.0" diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs index d1a917f5624..ef3ae7f8023 100644 --- a/cmd/tikv-ctl/src/fork_readonly_tikv.rs +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -265,7 +265,6 @@ where .map_err(|e| format!("copy({}, {}): {}", src.display(), dst.display(), e)) } -#[allow(clippy::permissions_set_readonly_false)] fn add_write_permission>(path: P) -> Result<(), String> { let path = path.as_ref(); let mut pmt = std::fs::metadata(path) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index c1ab11cc507..6baa1fe6c39 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +#![feature(once_cell)] #![feature(let_chains)] -#![feature(lazy_cell)] #[macro_use] extern crate log; diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 4f53c39b9db..8c1edc89a48 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -51,7 +51,7 @@ futures-io = "0.3" grpcio = { workspace = true } hex = "0.4" # Fixing ahash cyclic dep: https://github.com/tkaitchuck/ahash/issues/95 -indexmap = "=1.9.3" +indexmap = "=1.6.2" kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index cc720d5aecc..c3cc91da9ff 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -158,7 +158,7 @@ where /// Like `errors.Annotate` in Go. /// Wrap an unknown error with [`Error::Other`]. -#[macro_export] +#[macro_export(crate)] macro_rules! annotate { ($inner: expr, $message: expr) => { { @@ -242,7 +242,6 @@ mod test { #[bench] // 2,685 ns/iter (+/- 194) - #[allow(clippy::unnecessary_literal_unwrap)] fn contextual_add_format_strings_directly(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( @@ -306,7 +305,6 @@ mod test { #[bench] // 773 ns/iter (+/- 8) - #[allow(clippy::unnecessary_literal_unwrap)] fn baseline(b: &mut test::Bencher) { b.iter(|| { let err = Error::Io(io::Error::new( diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index df8f0f025b1..1fdc1b3b1e8 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -663,10 +663,11 @@ impl MetadataClient { let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; - match global_cp { + let cp = match global_cp { None => self.get_task_start_ts_checkpoint(task).await?, Some(cp) => cp, - } + }; + cp } _ => Checkpoint::from_kv(&r[0])?, }; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index ae4b98b1687..1786d513dc8 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -731,7 +731,6 @@ impl TempFileKey { } } - #[allow(deprecated)] fn format_date_time(ts: u64, t: FormatType) -> impl Display { use chrono::prelude::*; let millis = TimeStamp::physical(ts.into()); @@ -956,9 +955,7 @@ impl StreamTaskInfo { .last_flush_time .swap(Box::into_raw(Box::new(Instant::now())), Ordering::SeqCst); // manual gc last instant - unsafe { - let _ = Box::from_raw(ptr); - } + unsafe { Box::from_raw(ptr) }; } pub fn should_flush(&self, flush_interval: &Duration) -> bool { diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 5a6b2e0753b..c70ad9c8038 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -307,7 +307,7 @@ impl SubscriptionTracer { } }; - let subscription = sub.value_mut(); + let mut subscription = sub.value_mut(); let old_epoch = subscription.meta.get_region_epoch(); let new_epoch = new_region.get_region_epoch(); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 5e798a8428c..974b1762cf2 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -280,7 +280,7 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, /// once meet an error, would report it, with the current file and line (so it /// is made as a macro). returns whether it success. -#[macro_export] +#[macro_export(crate)] macro_rules! try_send { ($s:expr, $task:expr) => { match $s.schedule($task) { @@ -304,7 +304,7 @@ macro_rules! try_send { /// `backup_stream_debug`. because once we enable debug log for all crates, it /// would soon get too verbose to read. using this macro now we can enable debug /// log level for the crate only (even compile time...). -#[macro_export] +#[macro_export(crate)] macro_rules! debug { ($($t: tt)+) => { if cfg!(feature = "backup-stream-debug") { diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index d6330f49966..a4efc162092 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -2493,8 +2493,8 @@ pub mod tests { fn test_backup_file_name() { let region = metapb::Region::default(); let store_id = 1; - let test_cases = ["s3", "local", "gcs", "azure", "hdfs"]; - let test_target = [ + let test_cases = vec!["s3", "local", "gcs", "azure", "hdfs"]; + let test_target = vec![ "1/0_0_000", "1/0_0_000", "1_0_0_000", @@ -2513,7 +2513,7 @@ pub mod tests { assert_eq!(target.to_string(), prefix_arr.join(delimiter)); } - let test_target = ["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; + let test_target = vec!["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { let key = None; let filename = backup_file_name(store_id, ®ion, key, storage_name); diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 16113dde8e2..3fa5ad15a64 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -149,9 +149,7 @@ impl FsmState { Ok(_) => return, Err(Self::NOTIFYSTATE_DROP) => { let ptr = self.data.swap(ptr::null_mut(), Ordering::AcqRel); - unsafe { - let _ = Box::from_raw(ptr); - } + unsafe { Box::from_raw(ptr) }; return; } Err(s) => s, @@ -181,9 +179,7 @@ impl Drop for FsmState { fn drop(&mut self) { let ptr = self.data.swap(ptr::null_mut(), Ordering::SeqCst); if !ptr.is_null() { - unsafe { - let _ = Box::from_raw(ptr); - } + unsafe { Box::from_raw(ptr) }; } self.state_cnt.fetch_sub(1, Ordering::Relaxed); } diff --git a/components/case_macros/src/lib.rs b/components/case_macros/src/lib.rs index b779373a59d..057b68065d2 100644 --- a/components/case_macros/src/lib.rs +++ b/components/case_macros/src/lib.rs @@ -5,12 +5,12 @@ use proc_macro::{Group, Literal, TokenStream, TokenTree}; macro_rules! transform_idents_in_stream_to_string { - ($stream:ident, $transform:ident) => { + ($stream:ident, $transform:expr) => { $stream .into_iter() .map(|token_tree| match token_tree { TokenTree::Ident(ref ident) => { - Literal::string(&$transform(&ident.to_string())).into() + Literal::string(&$transform(ident.to_string())).into() } // find all idents in `TokenGroup` apply and reconstruct the group TokenTree::Group(ref group) => TokenTree::Group(Group::new( @@ -20,7 +20,7 @@ macro_rules! transform_idents_in_stream_to_string { .into_iter() .map(|group_token_tree| { if let TokenTree::Ident(ref ident) = group_token_tree { - Literal::string(&$transform(&ident.to_string())).into() + Literal::string(&$transform(ident.to_string())).into() } else { group_token_tree } @@ -53,7 +53,7 @@ fn to_snake(s: &str) -> String { /// e.g. `HelloWorld` -> `hello-world` #[proc_macro] pub fn kebab_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, to_kebab) + transform_idents_in_stream_to_string!(stream, |s: String| to_kebab(&s)) } /// Expands idents in the input stream as snake-case string literal @@ -61,5 +61,5 @@ pub fn kebab_case(stream: TokenStream) -> TokenStream { /// e.g. `HelloWorld` -> `hello_world` #[proc_macro] pub fn snake_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, to_snake) + transform_idents_in_stream_to_string!(stream, |s: String| to_snake(&s)) } diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 18528fd08e9..c82c4cb6f13 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -1437,7 +1437,7 @@ mod tests { #[test] fn test_observed_range() { - for case in [ + for case in vec![ (b"".as_slice(), b"".as_slice(), false), (b"a", b"", false), (b"", b"b", false), diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 9d5601eba84..a5f00a08028 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -1015,10 +1015,10 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint( - &self, + pub fn find_first<'m, T>( + &'m self, start_key: Option<&Key>, end_key: Option<&Key>, mut pred: impl FnMut(Arc) -> Option, diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 06e8847402f..31d75610d75 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -19,14 +19,10 @@ pub type PluginConstructorSignature = /// Type signature of the exported function with symbol /// [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. -// emit this warn because to fix it need to change the data type which is a breaking change. -#[allow(improper_ctypes_definitions)] pub type PluginGetBuildInfoSignature = extern "C" fn() -> BuildInfo; /// Type signature of the exported function with symbol /// [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. -// emit this warn because to fix it need to change the data type which is a breaking change. -#[allow(improper_ctypes_definitions)] pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; /// Automatically collected build information about the plugin that is exposed diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 4455e4ce7cc..23e049e0df4 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -134,12 +134,11 @@ impl KmsConfig { } } -#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case", tag = "type")] pub enum MasterKeyConfig { // Store encryption metadata as plaintext. Data still get encrypted. Not allowed to use if // encryption is enabled. (i.e. when encryption_config.method != Plaintext). - #[default] Plaintext, // Pass master key from a file, with key encoded as a readable hex string. The file should end @@ -157,6 +156,12 @@ pub enum MasterKeyConfig { }, } +impl Default for MasterKeyConfig { + fn default() -> Self { + MasterKeyConfig::Plaintext + } +} + mod encryption_method_serde { use std::fmt; diff --git a/components/engine_rocks/src/logger.rs b/components/engine_rocks/src/logger.rs index 185411dcacf..85f4de713ac 100644 --- a/components/engine_rocks/src/logger.rs +++ b/components/engine_rocks/src/logger.rs @@ -3,6 +3,7 @@ use rocksdb::{DBInfoLogLevel as InfoLogLevel, Logger}; use tikv_util::{crit, debug, error, info, warn}; // TODO(yiwu): abstract the Logger interface. +#[derive(Default)] pub struct RocksdbLogger; impl Logger for RocksdbLogger { @@ -43,6 +44,7 @@ impl Logger for TabletLogger { } } +#[derive(Default)] pub struct RaftDbLogger; impl Logger for RaftDbLogger { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 700d7621dc6..87ccab9e5ab 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -144,7 +144,10 @@ pub struct RangeProperties { impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { - let idx = self.offsets.binary_search_by_key(&key, |(k, _)| k).unwrap(); + let idx = self + .offsets + .binary_search_by_key(&key, |&(ref k, _)| k) + .unwrap(); &self.offsets[idx].1 } @@ -202,11 +205,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -224,7 +227,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |(ref k, _)| k) + .binary_search_by_key(&start_key, |&(ref k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -236,7 +239,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; @@ -866,7 +869,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(KeyMode::Txn); b.iter(|| { - for (k, v) in &entries { + for &(ref k, ref v) in &entries { collector.add(k, v, DBEntryType::Put, 0, 0); } }); diff --git a/components/engine_tirocks/src/properties/mvcc.rs b/components/engine_tirocks/src/properties/mvcc.rs index 66c96284ea3..1ca170f33d5 100644 --- a/components/engine_tirocks/src/properties/mvcc.rs +++ b/components/engine_tirocks/src/properties/mvcc.rs @@ -356,7 +356,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(CStr::from_bytes_with_nul(b"\0").unwrap(), KeyMode::Txn); b.iter(|| { - for (k, v) in &entries { + for &(ref k, ref v) in &entries { collector.add(k, v, EntryType::kEntryPut, 0, 0).unwrap(); } }); diff --git a/components/engine_tirocks/src/properties/range.rs b/components/engine_tirocks/src/properties/range.rs index e8a3411b02f..59b9e68a6bb 100644 --- a/components/engine_tirocks/src/properties/range.rs +++ b/components/engine_tirocks/src/properties/range.rs @@ -53,7 +53,7 @@ impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { let idx = self .offsets - .binary_search_by_key(&key, |(k, _)| k) + .binary_search_by_key(&key, |&(ref k, _)| k) .unwrap(); &self.offsets[idx].1 } @@ -112,11 +112,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -134,7 +134,7 @@ impl RangeProperties { ) -> Vec<(Vec, RangeOffsets)> { let start_offset = match self .offsets - .binary_search_by_key(&start_key, |(k, _)| k) + .binary_search_by_key(&start_key, |&(ref k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { @@ -146,7 +146,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |(k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 6449399cef8..8590236e126 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -119,7 +119,7 @@ impl SstApplyState { for sst in ssts { let cf_index = data_cf_offset(sst.get_cf_name()); if let Some(metas) = sst_list.get_mut(cf_index) { - let _ = metas.extract_if(|entry| entry.sst.get_uuid() == sst.get_uuid()); + metas.drain_filter(|entry| entry.sst.get_uuid() == sst.get_uuid()); } } } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 0f89776e7fd..e09b1b52733 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -253,8 +253,8 @@ #![feature(assert_matches)] #![feature(linked_list_cursors)] #![feature(let_chains)] -#![feature(str_split_remainder)] -#![feature(extract_if)] +#![feature(str_split_as_str)] +#![feature(drain_filter)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index 64e6dcbd4b4..c88f1548513 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -241,7 +241,7 @@ impl TabletRegistry { let mut parts = name.rsplit('_'); let suffix = parts.next()?.parse().ok()?; let id = parts.next()?.parse().ok()?; - let prefix = parts.remainder().unwrap_or(""); + let prefix = parts.as_str(); Some((prefix, id, suffix)) } diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index e48a540c6b8..bb37aad5924 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -330,11 +330,15 @@ fn is_option_type(ty: &Type) -> bool { // TODO store (with lazy static) the vec of string // TODO maybe optimization, reverse the order of segments fn extract_option_segment(path: &Path) -> Option<&PathSegment> { - let idents_of_path = path.segments.iter().fold(String::new(), |mut acc, v| { - acc.push_str(&v.ident.to_string()); - acc.push('|'); - acc - }); + let idents_of_path = path + .segments + .iter() + .into_iter() + .fold(String::new(), |mut acc, v| { + acc.push_str(&v.ident.to_string()); + acc.push('|'); + acc + }); vec!["Option|", "std|option|Option|", "core|option|Option|"] .into_iter() .find(|s| idents_of_path == *s) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 73b65bc0904..cd5ae8f42f7 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -488,11 +488,7 @@ impl StorePollerBuilder { self.remove_dir(&path)?; continue; } - let Some((prefix, region_id, tablet_index)) = - self.tablet_registry.parse_tablet_name(&path) - else { - continue; - }; + let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; if prefix == MERGE_SOURCE_PREFIX { continue; } diff --git a/components/raftstore-v2/src/lib.rs b/components/raftstore-v2/src/lib.rs index 697d0525169..5b5e132b9ce 100644 --- a/components/raftstore-v2/src/lib.rs +++ b/components/raftstore-v2/src/lib.rs @@ -27,7 +27,6 @@ #![feature(box_into_inner)] #![feature(assert_matches)] #![feature(option_get_or_insert_default)] -#![allow(clippy::needless_pass_by_ref_mut)] mod batch; mod bootstrap; diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 76b71a8906c..d3d1896287c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -343,9 +343,7 @@ impl Peer { entry.get_data(), entry.get_index(), entry.get_term(), - ) else { - continue; - }; + ) else { continue }; let cmd_type = cmd.get_admin_request().get_cmd_type(); match cmd_type { AdminCmdType::TransferLeader diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 2fe2b4b5735..0f9cae7218d 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -1098,9 +1098,7 @@ mod test { } } - let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { - panic!() - }; + let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { panic!() }; // update cache let mut cache = apply.tablet_registry().get(parent_id).unwrap(); cache.set(*tablet.downcast().unwrap()); diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index f60b9828bbb..4cdeba3bc41 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -50,21 +50,21 @@ impl Peer { /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader - /// to follower. - /// 2. execute_transfer_leader on follower If follower passes all necessary - /// checks, it will reply an ACK with type MsgTransferLeader and its - /// promised applied index. - /// 3. ready_to_transfer_leader on leader: Leader checks if it's appropriate - /// to transfer leadership. If it does, it calls raft transfer_leader API - /// to do the remaining work. + /// 1. pre_transfer_leader on leader: + /// Leader will send a MsgTransferLeader to follower. + /// 2. execute_transfer_leader on follower + /// If follower passes all necessary checks, it will reply an + /// ACK with type MsgTransferLeader and its promised applied index. + /// 3. ready_to_transfer_leader on leader: + /// Leader checks if it's appropriate to transfer leadership. If it + /// does, it calls raft transfer_leader API to do the remaining work. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. The follower applies the TransferLeader command and replies an ACK - /// with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. The follower applies the TransferLeader command and replies an + /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// /// See also: tikv/rfcs#37. pub fn propose_transfer_leader( diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 6b778ad6c4a..4d1a59de0a6 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -662,12 +662,8 @@ impl Peer { let check_peer_id = check.get_check_peer().get_id(); let records = self.storage().region_state().get_merged_records(); let Some(record) = records.iter().find(|r| { - r.get_source_peers() - .iter() - .any(|p| p.get_id() == check_peer_id) - }) else { - return; - }; + r.get_source_peers().iter().any(|p| p.get_id() == check_peer_id) + }) else { return }; let source_index = record.get_source_index(); forward_destroy_to_source_peer(msg, |m| { let source_checkpoint = super::merge_source_path( diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index fcc93636640..ea802650f3d 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -351,9 +351,7 @@ where match fut.await? { Some(query_res) => { if query_res.read().is_none() { - let QueryResult::Response(res) = query_res else { - unreachable!() - }; + let QueryResult::Response(res) = query_res else { unreachable!() }; // Get an error explicitly in header, // or leader reports KeyIsLocked error via read index. assert!( diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index 2b6c9c666e6..af0257e763f 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -774,7 +774,7 @@ impl Peer { flushed = true; let flush_state = self.flush_state().clone(); - let apply_trace = self.storage_mut().apply_trace_mut(); + let mut apply_trace = self.storage_mut().apply_trace_mut(); let flushed_indexes = flush_state.as_ref().flushed_index(); for i in 0..flushed_indexes.len() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 15caf5f0c84..9e0ed449cef 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -570,9 +570,10 @@ impl Storage { pub fn cancel_generating_snap_due_to_compacted(&self, compact_to: u64) { let mut states = self.snap_states.borrow_mut(); states.retain(|id, state| { - let SnapState::Generating { ref index, .. } = *state else { - return true; - }; + let SnapState::Generating { + ref index, + .. + } = *state else { return true; }; let snap_index = index.load(Ordering::SeqCst); if snap_index == 0 || compact_to <= snap_index + 1 { return true; @@ -599,9 +600,10 @@ impl Storage { } let (mut snapshot, to_peer_id) = *res.unwrap(); if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { - let SnapState::Generating { ref index, .. } = *state else { - return false; - }; + let SnapState::Generating { + ref index, + .. + } = *state else { return false }; if snapshot.get_metadata().get_index() < index.load(Ordering::SeqCst) { warn!( self.logger(), diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 6c3a9269a7f..272b2526b39 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -266,9 +266,7 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else { - unreachable!() - }; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; self.on_simple_write(ctx, write.header, write.data, write.ch); true } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index e7b3c8e62b8..37962a45452 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -100,10 +100,7 @@ impl Peer { failed_voters, target_index, demote_after_exit, - }) = self.unsafe_recovery_state() - else { - return; - }; + }) = self.unsafe_recovery_state() else { return }; if self.raft_group().raft.raft_log.applied < *target_index { return; diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs index feb519a04ad..7acdb943b91 100644 --- a/components/raftstore-v2/src/worker/cleanup/compact.rs +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -97,12 +97,8 @@ where ) { Ok(mut region_ids) => { for region_id in region_ids.drain(..) { - let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else { - continue; - }; - let Some(tablet) = tablet_cache.latest() else { - continue; - }; + let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else {continue}; + let Some(tablet) = tablet_cache.latest() else {continue}; for cf in &cf_names { if let Err(e) = tablet.compact_range_cf(cf, None, None, false, 1 /* threads */) @@ -147,12 +143,8 @@ fn collect_regions_to_compact( ); let mut regions_to_compact = vec![]; for id in region_ids { - let Some(mut tablet_cache) = reg.get(id) else { - continue; - }; - let Some(tablet) = tablet_cache.latest() else { - continue; - }; + let Some(mut tablet_cache) = reg.get(id) else {continue}; + let Some(tablet) = tablet_cache.latest() else {continue}; if tablet.auto_compactions_is_disabled().expect("cf") { info!( logger, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index 999eccb4962..763e12fff07 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -113,7 +113,10 @@ where let approximate_keys = task.approximate_keys.unwrap_or_default(); let region_id = task.region.get_id(); - let peer_stat = self.region_peers.entry(region_id).or_default(); + let peer_stat = self + .region_peers + .entry(region_id) + .or_insert_with(PeerStat::default); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; @@ -370,7 +373,10 @@ where pub fn handle_update_read_stats(&mut self, mut stats: ReadStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -392,7 +398,10 @@ where pub fn handle_update_write_stats(&mut self, mut stats: WriteStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index 7bafb6c442a..7fec5a31bb6 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -142,10 +142,8 @@ where let f = async move { for split_info in split_infos { - let Ok(Some(region)) = pd_client.get_region_by_id(split_info.region_id).await - else { - continue; - }; + let Ok(Some(region)) = + pd_client.get_region_by_id(split_info.region_id).await else { continue }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::ask_batch_split_imp( diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index ef9739226e7..206e87b3a8e 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -467,8 +467,7 @@ impl Runner { let Some(Some(tablet)) = self .tablet_registry .get(region_id) - .map(|mut cache| cache.latest().cloned()) - else { + .map(|mut cache| cache.latest().cloned()) else { warn!( self.logger, "flush memtable failed to acquire tablet"; @@ -556,15 +555,7 @@ impl Runner { } fn delete_range(&self, delete_range: Task) { - let Task::DeleteRange { - region_id, - tablet, - cf, - start_key, - end_key, - cb, - } = delete_range - else { + let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, cb } = delete_range else { slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) }; diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index a949725090d..5b3cc5feb93 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -137,9 +137,7 @@ impl TestRouter { match res { Ok(_) => return block_on(sub.result()).is_some(), Err(TrySendError::Disconnected(m)) => { - let PeerMsg::WaitFlush(ch) = m else { - unreachable!() - }; + let PeerMsg::WaitFlush(ch) = m else { unreachable!() }; match self .store_router() .send_control(StoreMsg::WaitFlush { region_id, ch }) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index 756b7dc399e..d082013cd2c 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -471,7 +471,10 @@ impl CoprocessorHost { BoxSplitCheckObserver::new(KeysCheckObserver::new(ch)), ); registry.register_split_check_observer(100, BoxSplitCheckObserver::new(HalfCheckObserver)); - registry.register_split_check_observer(400, BoxSplitCheckObserver::new(TableCheckObserver)); + registry.register_split_check_observer( + 400, + BoxSplitCheckObserver::new(TableCheckObserver::default()), + ); registry.register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); CoprocessorHost { registry, cfg } } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 6cf83a6cf84..d1597a77121 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -223,7 +223,7 @@ impl From for errorpb::Error { .mut_proposal_in_merging_mode() .set_region_id(region_id); } - Error::Transport(DiscardReason::Full) => { + Error::Transport(reason) if reason == DiscardReason::Full => { let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(RAFTSTORE_IS_BUSY.to_owned()); errorpb.set_server_is_busy(server_is_busy_err); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 197eaefeac7..1db5f79d226 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -5,13 +5,11 @@ #![feature(div_duration)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(hash_extract_if)] +#![feature(hash_drain_filter)] #![feature(let_chains)] #![feature(assert_matches)] #![feature(type_alias_impl_trait)] -#![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] -#![allow(clippy::needless_pass_by_ref_mut)] #[cfg(test)] extern crate test; diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 12617bc28a2..eedd5052bbb 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -419,11 +419,7 @@ where } self.state_size = 0; if let ExtraBatchWrite::V2(_) = self.extra_batch_write { - let ExtraBatchWrite::V2(lb) = - mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) - else { - unreachable!() - }; + let ExtraBatchWrite::V2(lb) = mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) else { unreachable!() }; wb.merge(lb).unwrap(); } } diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index 95f099f77a7..c91c68538dd 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1338,14 +1338,14 @@ pub mod tests { // Test the initial data structure size. let (tx, rx) = mpsc::sync_channel(8); let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); - assert_eq!(rx.try_recv().unwrap(), 0); + assert_eq!(rx.try_recv().unwrap(), 896); cache.append( 0, 0, &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], ); - assert_eq!(rx.try_recv().unwrap(), 419); + assert_eq!(rx.try_recv().unwrap(), 3); cache.prepend(vec![new_padded_entry(100, 1, 1)]); assert_eq!(rx.try_recv().unwrap(), 1); @@ -1371,7 +1371,7 @@ pub mod tests { // Test trace a dangle entry. let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 97); + assert_eq!(rx.try_recv().unwrap(), 1); // Test trace an entry which is still in cache. let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); @@ -1398,7 +1398,7 @@ pub mod tests { assert_eq!(rx.try_recv().unwrap(), -7); drop(cache); - assert_eq!(rx.try_recv().unwrap(), -512); + assert_eq!(rx.try_recv().unwrap(), -896); } #[test] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 406c8d79d18..c170e5a35f9 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1262,9 +1262,9 @@ where apply_ctx.host.on_empty_cmd(&self.region, index, term); // 1. When a peer become leader, it will send an empty entry. - // 2. When a leader tries to read index during transferring leader, it will also - // propose an empty entry. But that entry will not contain any associated - // callback. So no need to clear callback. + // 2. When a leader tries to read index during transferring leader, + // it will also propose an empty entry. But that entry will not contain + // any associated callback. So no need to clear callback. while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { if let Some(cb) = cmd.cb.take() { apply_ctx @@ -4787,12 +4787,12 @@ where // command may not read the writes of previous commands and break ACID. If // it's still leader, there are two possibility that mailbox is closed: // 1. The process is shutting down. - // 2. The leader is destroyed. A leader won't propose to destroy itself, so it - // should either destroyed by older leaders or newer leaders. Leader won't - // respond to read until it has applied to current term, so no command will - // be proposed until command from older leaders have applied, which will then - // stop it from accepting proposals. If the command is proposed by new - // leader, then it won't be able to propose new proposals. + // 2. The leader is destroyed. A leader won't propose to destroy itself, so + // it should either destroyed by older leaders or newer leaders. Leader + // won't respond to read until it has applied to current term, so no + // command will be proposed until command from older leaders have applied, + // which will then stop it from accepting proposals. If the command is + // proposed by new leader, then it won't be able to propose new proposals. // So only shutdown needs to be checked here. if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { for p in apply.cbs.drain(..) { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 371e8cd8eb5..30ba0c3059d 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -1015,10 +1015,10 @@ where // in snapshot recovery after we stopped all conf changes from PD. // if the follower slow than leader and has the pending conf change. // that's means - // 1. if the follower didn't finished the conf change => it cannot be chosen to - // be leader during recovery. - // 2. if the follower has been chosen to be leader => it already apply the - // pending conf change already. + // 1. if the follower didn't finished the conf change + // => it cannot be chosen to be leader during recovery. + // 2. if the follower has been chosen to be leader + // => it already apply the pending conf change already. return; } debug!( diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a858b5afddd..64c5be6d7e1 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -169,25 +169,19 @@ where } pub fn has_proposed_cb(&self) -> bool { - let Callback::Write { proposed_cb, .. } = self else { - return false; - }; + let Callback::Write { proposed_cb, .. } = self else { return false; }; proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - let Callback::Write { proposed_cb, .. } = self else { - return; - }; + let Callback::Write { proposed_cb, .. } = self else { return; }; if let Some(cb) = proposed_cb.take() { cb(); } } pub fn invoke_committed(&mut self) { - let Callback::Write { committed_cb, .. } = self else { - return; - }; + let Callback::Write { committed_cb, .. } = self else { return; }; if let Some(cb) = committed_cb.take() { cb(); } @@ -201,16 +195,12 @@ where } pub fn take_proposed_cb(&mut self) -> Option { - let Callback::Write { proposed_cb, .. } = self else { - return None; - }; + let Callback::Write { proposed_cb, .. } = self else { return None; }; proposed_cb.take() } pub fn take_committed_cb(&mut self) -> Option { - let Callback::Write { committed_cb, .. } = self else { - return None; - }; + let Callback::Write { committed_cb, .. } = self else { return None; }; committed_cb.take() } } @@ -268,9 +258,7 @@ impl ReadCallback for Callback { } fn read_tracker(&self) -> Option { - let Callback::Read { tracker, .. } = self else { - return None; - }; + let Callback::Read { tracker, .. } = self else { return None; }; Some(*tracker) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index aafd2f9695b..8ef857bfa12 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2314,14 +2314,14 @@ where CheckApplyingSnapStatus::Applying => { // If this peer is applying snapshot, we should not get a new ready. // There are two reasons in my opinion: - // 1. If we handle a new ready and persist the data(e.g. entries), we can not - // tell raft-rs that this ready has been persisted because the ready need - // to be persisted one by one from raft-rs's view. - // 2. When this peer is applying snapshot, the response msg should not be sent - // to leader, thus the leader will not send new entries to this peer. - // Although it's possible a new leader may send a AppendEntries msg to this - // peer, this possibility is very low. In most cases, there is no msg need - // to be handled. + // 1. If we handle a new ready and persist the data(e.g. entries), + // we can not tell raft-rs that this ready has been persisted because + // the ready need to be persisted one by one from raft-rs's view. + // 2. When this peer is applying snapshot, the response msg should not + // be sent to leader, thus the leader will not send new entries to + // this peer. Although it's possible a new leader may send a AppendEntries + // msg to this peer, this possibility is very low. In most cases, there + // is no msg need to be handled. // So we choose to not get a new ready which makes the logic more clear. debug!( "still applying snapshot, skip further handling"; @@ -4467,25 +4467,27 @@ where /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader - /// to follower. - /// 2. pre_ack_transfer_leader_msg on follower: If follower passes all - /// necessary checks, it will try to warmup the entry cache. - /// 3. ack_transfer_leader_msg on follower: When the entry cache has been - /// warmed up or the operator is timeout, the follower reply an ACK with - /// type MsgTransferLeader and its promised persistent index. + /// 1. pre_transfer_leader on leader: + /// Leader will send a MsgTransferLeader to follower. + /// 2. pre_ack_transfer_leader_msg on follower: + /// If follower passes all necessary checks, it will try to warmup + /// the entry cache. + /// 3. ack_transfer_leader_msg on follower: + /// When the entry cache has been warmed up or the operator is timeout, + /// the follower reply an ACK with type MsgTransferLeader and + /// its promised persistent index. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. ack_transfer_leader_msg on follower again: The follower applies - /// the TransferLeader command and replies an ACK with special context - /// TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. ack_transfer_leader_msg on follower again: + /// The follower applies the TransferLeader command and replies an + /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// - /// 4. ready_to_transfer_leader on leader: Leader checks if it's appropriate - /// to transfer leadership. If it does, it calls raft transfer_leader API - /// to do the remaining work. + /// 4. ready_to_transfer_leader on leader: + /// Leader checks if it's appropriate to transfer leadership. If it + /// does, it calls raft transfer_leader API to do the remaining work. /// /// See also: tikv/rfcs#37. fn propose_transfer_leader( @@ -5818,7 +5820,7 @@ mod tests { admin_req.clear_transfer_leader(); req.clear_admin_request(); - for (op, policy) in [ + for (op, policy) in vec![ (CmdType::Get, RequestPolicy::ReadLocal), (CmdType::Snap, RequestPolicy::ReadLocal), (CmdType::Put, RequestPolicy::ProposeNormal), @@ -5971,7 +5973,7 @@ mod tests { // (1, 4) and (1, 5) is not committed let entries = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7)]; - let committed = [(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; + let committed = vec![(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; for (index, term) in entries.clone() { if term != 1 { continue; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 1556338e9c0..a888929ca98 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -96,7 +96,7 @@ impl PartialEq for SnapState { (&SnapState::Relax, &SnapState::Relax) | (&SnapState::ApplyAborted, &SnapState::ApplyAborted) | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, - (SnapState::Applying(b1), SnapState::Applying(b2)) => { + (&SnapState::Applying(ref b1), &SnapState::Applying(ref b2)) => { b1.load(Ordering::Relaxed) == b2.load(Ordering::Relaxed) } _ => false, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 40168707f6a..bc22dfbf586 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -438,7 +438,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for (k, v) in &base_data { + for &(ref k, ref v) in &base_data { engines.kv.put(&data_key(k), v).unwrap(); } let store = new_peer_storage(engines, &r); @@ -482,11 +482,11 @@ mod tests { let mut data = vec![]; { let db = &engines.kv; - for (k, level) in &levels { + for &(ref k, level) in &levels { db.put(&data_key(k), k).unwrap(); db.flush_cfs(&[], true).unwrap(); data.push((k.to_vec(), k.to_vec())); - db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(*level)) + db.compact_files_in_range(Some(&data_key(k)), Some(&data_key(k)), Some(level)) .unwrap(); } } diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index 1d8341c1c0b..a303a586935 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -579,17 +579,13 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); - let SimpleWrite::Put(put) = write else { - panic!("should be put") - }; + let SimpleWrite::Put(put) = write else { panic!("should be put") }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); let write = decoder.next().unwrap(); - let SimpleWrite::Delete(delete) = write else { - panic!("should be delete") - }; + let SimpleWrite::Delete(delete) = write else { panic!("should be delete") }; assert_eq!(delete.cf, CF_WRITE); assert_eq!(delete.key, &delete_key); assert_matches!(decoder.next(), None); @@ -597,18 +593,14 @@ mod tests { let (bytes, _) = req_encoder2.encode(); decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { - panic!("should be delete range") - }; + let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; assert_eq!(dr.cf, CF_LOCK); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); assert!(dr.notify_only); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { - panic!("should be delete range") - }; + let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; assert_eq!(dr.cf, "cf"); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); @@ -634,9 +626,7 @@ mod tests { let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::Ingest(ssts) = write else { - panic!("should be ingest") - }; + let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; assert_eq!(exp, ssts); assert_matches!(decoder.next(), None); } @@ -725,9 +715,7 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); - let SimpleWrite::Put(put) = req else { - panic!("should be put") - }; + let SimpleWrite::Put(put) = req else { panic!("should be put") }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index dcb98dd9cb2..6fe21fe9750 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1323,7 +1323,7 @@ impl Write for Snapshot { } assert!(cf_file.size[self.cf_file_index] != 0); - let file_for_recving = cf_file + let mut file_for_recving = cf_file .file_for_recving .get_mut(self.cf_file_index) .unwrap(); @@ -2162,7 +2162,7 @@ impl TabletSnapManager { .stats .lock() .unwrap() - .extract_if(|_, (_, stat)| stat.get_region_id() > 0) + .drain_filter(|_, (_, stat)| stat.get_region_id() > 0) .map(|(_, (_, stat))| stat) .filter(|stat| stat.get_total_duration_sec() > 1) .collect(); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 8fcaf826c6a..3cdee1e40f1 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -327,7 +327,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_plain_files_enc"); - for db_opt in [None, Some(enc_opts)] { + for db_opt in vec![None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db: KvTestEngine = db_creater(dir.path(), db_opt.clone(), None).unwrap(); // Collect keys via the key_callback into a collection. @@ -408,7 +408,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_sst_files_enc"); - for db_opt in [None, Some(enc_opts)] { + for db_opt in vec![None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db = db_creater(dir.path(), db_opt.clone(), None).unwrap(); let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 9c73be2b9eb..0091fd4e7bb 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -244,7 +244,7 @@ impl PeerPessimisticLocks { // Locks that are marked deleted still need to be moved to the new regions, // and the deleted mark should also be cleared. // Refer to the comment in `PeerPessimisticLocks` for details. - let removed_locks = self.map.extract_if(|key, _| { + let removed_locks = self.map.drain_filter(|key, _| { let key = &**key.as_encoded(); let (start_key, end_key) = (derived.get_start_key(), derived.get_end_key()); key < start_key || (!end_key.is_empty() && key >= end_key) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index ed2c70822c9..3f34fe691ee 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -124,7 +124,8 @@ pub fn is_vote_msg(msg: &eraftpb::Message) -> bool { /// peer or not. // There could be two cases: // 1. Target peer already exists but has not established communication with leader yet -// 2. Target peer is added newly due to member change or region split, but it's not created yet +// 2. Target peer is added newly due to member change or region split, but it's not +// created yet // For both cases the region start key and end key are attached in RequestVote and // Heartbeat message for the store of that peer to check whether to create a new peer // when receiving these messages, or just to wait for a pending region split to perform diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 6aa192bd28e..606576b22e4 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1704,7 +1704,10 @@ where fn handle_read_stats(&mut self, mut read_stats: ReadStats) { for (region_id, region_info) in read_stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -1726,7 +1729,10 @@ where fn handle_write_stats(&mut self, mut write_stats: WriteStats) { for (region_id, region_info) in write_stats.region_infos.iter_mut() { - let peer_stat = self.region_peers.entry(*region_id).or_default(); + let peer_stat = self + .region_peers + .entry(*region_id) + .or_insert_with(PeerStat::default); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num @@ -2084,10 +2090,7 @@ where let f = async move { for split_info in split_infos { let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await - else { - continue; - }; + pd_client.get_region_by_id(split_info.region_id).await else { continue }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::handle_ask_batch_split( @@ -2152,7 +2155,10 @@ where cpu_usage, ) = { let region_id = hb_task.region.get_id(); - let peer_stat = self.region_peers.entry(region_id).or_default(); + let peer_stat = self + .region_peers + .entry(region_id) + .or_insert_with(PeerStat::default); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5a6e641f5dc..5d6ede9c193 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -2155,12 +2155,11 @@ mod tests { let (notify_tx, notify_rx) = channel(); let (wait_spawn_tx, wait_spawn_rx) = channel(); let runtime = tokio::runtime::Runtime::new().unwrap(); - let handler = runtime.spawn(async move { + let _ = runtime.spawn(async move { wait_spawn_tx.send(()).unwrap(); notify.notified().await; notify_tx.send(()).unwrap(); }); - drop(handler); wait_spawn_rx.recv().unwrap(); thread::sleep(std::time::Duration::from_millis(500)); // Prevent lost notify. must_not_redirect(&mut reader, &rx, task); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 7a675646f5c..068904b2a67 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -179,7 +179,7 @@ impl PendingDeleteRanges { ) -> Vec<(u64, Vec, Vec, u64)> { let ranges = self.find_overlap_ranges(start_key, end_key); - for (_, s_key, ..) in &ranges { + for &(_, ref s_key, ..) in &ranges { self.ranges.remove(s_key).unwrap(); } ranges @@ -1293,7 +1293,7 @@ pub(crate) mod tests { } }; - #[cfg(feature = "failpoints")] + #[allow(dead_code)] let must_not_finish = |ids: &[u64]| { for id in ids { let region_key = keys::region_state_key(*id); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 468c06febd4..4ff853f70a0 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -64,14 +64,14 @@ impl KeyEntry { impl PartialOrd for KeyEntry { fn partial_cmp(&self, rhs: &KeyEntry) -> Option { - Some(self.cmp(rhs)) + // BinaryHeap is max heap, so we have to reverse order to get a min heap. + Some(self.key.cmp(&rhs.key).reverse()) } } impl Ord for KeyEntry { fn cmp(&self, rhs: &KeyEntry) -> Ordering { - // BinaryHeap is max heap, so we have to reverse order to get a min heap. - self.key.cmp(&rhs.key).reverse() + self.partial_cmp(rhs).unwrap() } } @@ -287,7 +287,7 @@ impl Runner { region: &Region, bucket_ranges: &Vec, ) { - for (bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { + for (mut bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { let mut bucket_region = region.clone(); bucket_region.set_start_key(bucket_range.0.clone()); bucket_region.set_end_key(bucket_range.1.clone()); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 9cf534c62b0..4bbcc773763 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -178,7 +178,7 @@ impl Samples { // evaluate the samples according to the given key range, it will update the // sample's left, right and contained counter. fn evaluate(&mut self, key_range: &KeyRange) { - for sample in self.0.iter_mut() { + for mut sample in self.0.iter_mut() { let order_start = if key_range.start_key.is_empty() { Ordering::Greater } else { @@ -496,7 +496,10 @@ pub struct WriteStats { impl WriteStats { pub fn add_query_num(&mut self, region_id: u64, kind: QueryKind) { - let query_stats = self.region_infos.entry(region_id).or_default(); + let query_stats = self + .region_infos + .entry(region_id) + .or_insert_with(QueryStats::default); query_stats.add_query_num(kind, 1); } @@ -985,8 +988,8 @@ mod tests { #[test] fn test_prefix_sum() { - let v = [1, 2, 3, 4, 5, 6, 7, 8, 9]; - let expect = [1, 3, 6, 10, 15, 21, 28, 36, 45]; + let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expect = vec![1, 3, 6, 10, 15, 21, 28, 36, 45]; let pre = prefix_sum(v.iter(), |x| *x); for i in 0..v.len() { assert_eq!(expect[i], pre[i]); diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 328f725edaa..47d14304112 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -213,13 +213,13 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) CF_WRITE => { if let Ok(ts) = key.decode_ts() { let key = key.truncate_ts().unwrap(); - let row = changes.entry(key).or_default(); + let mut row = changes.entry(key).or_default(); assert!(row.write.is_none()); row.write = Some(KeyOp::Put(Some(ts), value)); } } CF_LOCK => { - let row = changes.entry(key).or_default(); + let mut row = changes.entry(key).or_default(); assert!(row.lock.is_none()); row.lock = Some(KeyOp::Put(None, value)); } @@ -239,7 +239,7 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) match delete.cf.as_str() { CF_LOCK => { let key = Key::from_encoded(delete.take_key()); - let row = changes.entry(key).or_default(); + let mut row = changes.entry(key).or_default(); row.lock = Some(KeyOp::Delete); } "" | CF_WRITE | CF_DEFAULT => {} diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 406d931ed7f..9de21b27d9e 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -65,8 +65,7 @@ impl Drop for ResolverStatus { locks, memory_quota, .. - } = self - else { + } = self else { return; }; if locks.is_empty() { @@ -97,8 +96,7 @@ impl ResolverStatus { locks, memory_quota, .. - } = self - else { + } = self else { panic!("region {:?} resolver has ready", region_id) }; // Check if adding a new lock or unlock will exceed the memory @@ -112,7 +110,10 @@ impl ResolverStatus { } fn update_tracked_index(&mut self, index: u64, region_id: u64) { - let ResolverStatus::Pending { tracked_index, .. } = self else { + let ResolverStatus::Pending { + tracked_index, + .. + } = self else { panic!("region {:?} resolver has ready", region_id) }; assert!( @@ -134,8 +135,7 @@ impl ResolverStatus { memory_quota, tracked_index, .. - } = self - else { + } = self else { panic!("region {:?} resolver has ready", region_id) }; // Must take locks, otherwise it may double free memory quota on drop. @@ -687,7 +687,7 @@ where scanner_pool, scan_concurrency_semaphore, regions: HashMap::default(), - _phantom: PhantomData, + _phantom: PhantomData::default(), }; ep.handle_advance_resolved_ts(leader_resolver); ep @@ -870,6 +870,7 @@ where // Tracking or untracking locks with incoming commands that corresponding // observe id is valid. + #[allow(clippy::drop_ref)] fn handle_change_log(&mut self, cmd_batch: Vec) { let size = cmd_batch.iter().map(|b| b.size()).sum::(); RTS_CHANNEL_PENDING_CMD_BYTES.sub(size as i64); @@ -883,6 +884,7 @@ where if observe_region.handle.id == observe_id { let logs = ChangeLog::encode_change_log(region_id, batch); if let Err(e) = observe_region.track_change_log(&logs) { + drop(observe_region); let backoff = match e { Error::MemoryQuotaExceeded(_) => Some(MEMORY_QUOTA_EXCEEDED_BACKOFF), Error::Other(_) => None, @@ -928,7 +930,7 @@ where } fn handle_advance_resolved_ts(&self, leader_resolver: LeadershipResolver) { - let regions = self.regions.keys().copied().collect(); + let regions = self.regions.keys().into_iter().copied().collect(); self.advance_worker.advance_ts_for_regions( regions, leader_resolver, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index ad052338fa2..6c8c90dc38f 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -100,7 +100,7 @@ impl, E: KvEngine> ScannerPool { Self { workers, cdc_handle, - _phantom: PhantomData, + _phantom: PhantomData::default(), } } @@ -168,7 +168,6 @@ impl, E: KvEngine> ScannerPool { self.workers.spawn(fut); } - #[allow(clippy::needless_pass_by_ref_mut)] async fn get_snapshot( task: &mut ScanTask, cdc_handle: T, diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 09e90e9dd01..a4b30e3d4ad 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -240,7 +240,7 @@ impl ResourceGroupManager { request_source: &str, ) -> Option> { fail_point!("only_check_source_task_name", |name| { - assert_eq!(name.clone().unwrap(), request_source.to_string()); + assert_eq!(&name.unwrap(), request_source); None }); if let Some(group) = self.resource_groups.get(rg) { @@ -311,8 +311,8 @@ pub struct ResourceController { // 1. the priority factor is calculate based on read/write RU settings. // 2. for read request, we increase a constant virtual time delta at each `get_priority` call // because the cost can't be calculated at start, so we only increase a constant delta and - // increase the real cost after task is executed; but don't increase it at write because the - // cost is known so we just pre-consume it. + // increase the real cost after task is executed; but don't increase it at write because + // the cost is known so we just pre-consume it. is_read: bool, // Track the maximum ru quota used to calculate the factor of each resource group. // factor = max_ru_quota / group_ru_quota * 10.0 diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index 7b437ea4303..ba8e2174e19 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -2,7 +2,7 @@ // TODO(mornyx): crate doc. -#![feature(hash_extract_if)] +#![feature(hash_drain_filter)] #![feature(core_intrinsics)] use std::{ diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 03cd500eb2e..6f7118ef9e1 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -87,7 +87,7 @@ impl RawRecords { pdqselect::select_by(&mut buf, k, |a, b| b.cmp(a)); let kth = buf[k]; // Evict records with cpu time less or equal than `kth` - let evicted_records = self.records.extract_if(|_, r| r.cpu_time <= kth); + let evicted_records = self.records.drain_filter(|_, r| r.cpu_time <= kth); // Record evicted into others for (_, record) in evicted_records { others.merge(&record); diff --git a/components/resource_metering/src/recorder/sub_recorder/cpu.rs b/components/resource_metering/src/recorder/sub_recorder/cpu.rs index 08675bb6153..8c4053a80ab 100644 --- a/components/resource_metering/src/recorder/sub_recorder/cpu.rs +++ b/components/resource_metering/src/recorder/sub_recorder/cpu.rs @@ -9,7 +9,7 @@ use crate::{ localstorage::{LocalStorage, SharedTagInfos}, SubRecorder, }, - RawRecords, + RawRecord, RawRecords, }; /// An implementation of [SubRecorder] for collecting cpu statistics. @@ -37,7 +37,7 @@ impl SubRecorder for CpuRecorder { if *last_stat != cur_stat { let delta_ms = (cur_stat.total_cpu_time() - last_stat.total_cpu_time()) * 1_000.; - let record = records.entry(cur_tag).or_default(); + let record = records.entry(cur_tag).or_insert_with(RawRecord::default); record.cpu_time += delta_ms as u32; } thread_stat.stat = cur_stat; diff --git a/components/resource_metering/tests/recorder_test.rs b/components/resource_metering/tests/recorder_test.rs index 6e164b8e5e8..daa371e7477 100644 --- a/components/resource_metering/tests/recorder_test.rs +++ b/components/resource_metering/tests/recorder_test.rs @@ -55,7 +55,7 @@ mod tests { if let Some(tag) = self.current_ctx { self.records .entry(tag.as_bytes().to_vec()) - .or_default() + .or_insert_with(RawRecord::default) .cpu_time += ms; } self.ops.push(op); @@ -140,7 +140,7 @@ mod tests { if let Ok(mut r) = self.records.lock() { for (tag, record) in records.records.iter() { r.entry(tag.extra_attachment.to_vec()) - .or_default() + .or_insert_with(RawRecord::default) .merge(record); } } @@ -156,10 +156,10 @@ mod tests { let mut records = self.records.lock().unwrap(); for k in expected.keys() { - records.entry(k.clone()).or_default(); + records.entry(k.clone()).or_insert_with(RawRecord::default); } for k in records.keys() { - expected.entry(k.clone()).or_default(); + expected.entry(k.clone()).or_insert_with(RawRecord::default); } for (k, expected_value) in expected { let value = records.get(&k).unwrap(); @@ -324,10 +324,10 @@ mod tests { fn merge( maps: impl IntoIterator, RawRecord>>, ) -> HashMap, RawRecord> { - let mut map: HashMap, RawRecord> = HashMap::default(); + let mut map = HashMap::default(); for m in maps { for (k, v) in m { - map.entry(k).or_default().merge(&v); + map.entry(k).or_insert_with(RawRecord::default).merge(&v); } } map diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 43b0314cbbe..c8cf879d905 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -558,9 +558,7 @@ impl EnginesResourceInfo { }); for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { - continue; - }; + let Some(tablet) = cache.latest() else { continue }; for cf in DATA_CFS { fetch_engine_cf(tablet, cf); } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 48344fe5012..417d5becca3 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -206,7 +206,7 @@ mod test { #[test] fn test_basic() { - let leaders = [1, 2, 3]; + let leaders = vec![1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, leaders); @@ -217,7 +217,7 @@ mod test { #[test] fn test_failure() { - let leaders = [1, 2, 3]; + let leaders = vec![1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, vec![1, 2, 3, 4]); diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index 4db29c47a6f..70b7d7fac5e 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -139,7 +139,7 @@ impl ImportModeSwitcherV2 { pub fn ranges_in_import(&self) -> HashSet { let inner = self.inner.lock().unwrap(); - HashSet::from_iter(inner.import_mode_ranges.keys().cloned()) + HashSet::from_iter(inner.import_mode_ranges.keys().into_iter().cloned()) } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 910cfa602dd..5530862e6a3 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -384,8 +384,8 @@ impl SstImporter { // This method is blocking. It performs the following transformations before // writing to disk: // - // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. (set - // the range to `["", ""]` to import everything). + // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. + // (set the range to `["", ""]` to import everything). // 2. keys are rewritten according to the given rewrite rule. // // Both the range and rewrite keys are specified using origin keys. However, @@ -1558,7 +1558,7 @@ mod tests { let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), &[CF_DEFAULT], env); - let cases = [(0, 10), (5, 15), (10, 20), (0, 100)]; + let cases = vec![(0, 10), (5, 15), (10, 20), (0, 100)]; let mut ingested = Vec::new(); @@ -2072,10 +2072,13 @@ mod tests { false, ) .unwrap(); - let ext_storage = importer.wrap_kms( - importer.external_storage_or_cache(&backend, "").unwrap(), - false, - ); + let ext_storage = { + let inner = importer.wrap_kms( + importer.external_storage_or_cache(&backend, "").unwrap(), + false, + ); + inner + }; // test do_read_kv_file() let output = block_on_external_io(importer.do_read_kv_file( diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 654971b0d41..ff7526172d5 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -97,8 +97,7 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( let mut pmts = file_system::metadata(clone)?.permissions(); if pmts.readonly() { - use std::os::unix::fs::PermissionsExt; - pmts.set_mode(0o644); + pmts.set_readonly(false); file_system::set_permissions(clone, pmts)?; } diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 6763ea7bb1a..96f405d8f39 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -203,7 +203,7 @@ impl Store { } pub fn put(&mut self, ctx: Context, mut kv: Vec<(Vec, Vec)>) { - self.handles.extend(kv.iter().map(|(k, _)| k.clone())); + self.handles.extend(kv.iter().map(|&(ref k, _)| k.clone())); let pk = kv[0].0.clone(); let kv = kv .drain(..) diff --git a/components/test_coprocessor_plugin/example_plugin/src/lib.rs b/components/test_coprocessor_plugin/example_plugin/src/lib.rs index d383797c069..afcaa4962b9 100644 --- a/components/test_coprocessor_plugin/example_plugin/src/lib.rs +++ b/components/test_coprocessor_plugin/example_plugin/src/lib.rs @@ -18,4 +18,4 @@ impl CoprocessorPlugin for ExamplePlugin { } } -declare_plugin!(ExamplePlugin); +declare_plugin!(ExamplePlugin::default()); diff --git a/components/test_pd/src/server.rs b/components/test_pd/src/server.rs index 02833e030eb..90a420fbba0 100644 --- a/components/test_pd/src/server.rs +++ b/components/test_pd/src/server.rs @@ -128,8 +128,12 @@ impl Server { } #[allow(unused_mut)] -fn hijack_unary(mock: &PdMock, ctx: RpcContext<'_>, sink: UnarySink, f: F) -where +fn hijack_unary( + mock: &mut PdMock, + ctx: RpcContext<'_>, + sink: UnarySink, + f: F, +) where R: Send + 'static, F: Fn(&dyn PdMocker) -> Option>, { diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 58df5998758..c81230f6a16 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1438,7 +1438,7 @@ impl TestPdClient { pub fn switch_replication_mode(&self, state: DrAutoSyncState, available_stores: Vec) { let mut cluster = self.cluster.wl(); let status = cluster.replication_status.as_mut().unwrap(); - let dr = status.mut_dr_auto_sync(); + let mut dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state); dr.available_stores = available_stores; diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 346813e7d1f..8ede3290167 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -220,7 +220,7 @@ pub trait Simulator { None => { error!("call_query_on_node receives none response"; "request" => ?request); // Do not unwrap here, sometimes raftstore v2 may return none. - Err(box_err!("receives none response {:?}", request)) + return Err(box_err!("receives none response {:?}", request)); } } } @@ -1612,7 +1612,6 @@ impl, EK: KvEngine> Cluster { ) } - #[allow(clippy::let_underscore_future)] pub fn merge_region(&mut self, source: u64, target: u64, _cb: Callback) { // FIXME: callback is ignored. let mut req = self.new_prepare_merge(source, target); diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index 45642df1e7f..685affe45d0 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -3,8 +3,6 @@ #![feature(type_alias_impl_trait)] #![feature(return_position_impl_trait_in_trait)] #![feature(let_chains)] -#![allow(clippy::needless_pass_by_ref_mut)] -#![allow(clippy::arc_with_non_send_sync)] mod cluster; mod node; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index 70b6ccb1407..d63ca0aa2f2 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -258,7 +258,7 @@ impl Simulator for NodeCluster { ) } else { let trans = self.trans.core.lock().unwrap(); - let (snap_mgr, _) = &trans.snap_paths[&node_id]; + let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; self.snap_mgrs.insert(node_id, snap_mgr.clone()); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index a7d64591fe1..7b5d501a59f 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1006,18 +1006,7 @@ pub fn must_new_cluster_and_kv_client_mul( TikvClient, Context, ) { - must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) -} - -pub fn must_new_cluster_with_cfg_and_kv_client_mul( - count: usize, - configure: impl FnMut(&mut Cluster, RocksEngine>), -) -> ( - Cluster, RocksEngine>, - TikvClient, - Context, -) { - let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); + let (cluster, leader, ctx) = must_new_cluster_mul(count); let env = Arc::new(Environment::new(1)); let channel = @@ -1026,7 +1015,6 @@ pub fn must_new_cluster_with_cfg_and_kv_client_mul( (cluster, client, ctx) } - pub fn must_new_cluster_mul( count: usize, ) -> ( diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 6f48c17190a..04dfbd24de1 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,8 +1,6 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![feature(let_chains)] -#![allow(clippy::needless_pass_by_ref_mut)] -#![allow(clippy::arc_with_non_send_sync)] #[macro_use] extern crate lazy_static; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 8a9969c1913..f429f27ff8b 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -281,7 +281,7 @@ impl Simulator for NodeCluster { (snap_mgr, Some(tmp)) } else { let trans = self.trans.core.lock().unwrap(); - let (snap_mgr, _) = &trans.snap_paths[&node_id]; + let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0df44b4e784..8d26bae968d 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -918,14 +918,8 @@ pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, pub fn must_new_cluster_and_kv_client_mul( count: usize, ) -> (Cluster, TikvClient, Context) { - must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) -} + let (cluster, leader, ctx) = must_new_cluster_mul(count); -pub fn must_new_cluster_with_cfg_and_kv_client_mul( - count: usize, - configure: impl FnMut(&mut Cluster), -) -> (Cluster, TikvClient, Context) { - let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index ea3017d5d02..33976939c83 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -1739,24 +1739,27 @@ mod tests_normal { /// Compare TokenStream with all white chars trimmed. fn assert_token_stream_equal(l: TokenStream, r: TokenStream) { - let result = l.clone().into_iter().eq_by(r.clone(), |x, y| match x { - TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), - TokenTree::Literal(x) => { - matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) - } - TokenTree::Punct(x) => { - matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) - } - TokenTree::Group(x) => { - if let TokenTree::Group(y) = y { - assert_token_stream_equal(x.stream(), y.stream()); + let result = l + .clone() + .into_iter() + .eq_by(r.clone().into_iter(), |x, y| match x { + TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), + TokenTree::Literal(x) => { + matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) + } + TokenTree::Punct(x) => { + matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) + } + TokenTree::Group(x) => { + if let TokenTree::Group(y) = y { + assert_token_stream_equal(x.stream(), y.stream()); - true - } else { - false + true + } else { + false + } } - } - }); + }); assert!(result, "expect: {:#?}, actual: {:#?}", &l, &r); } diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 738e0020de7..22127e62f49 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -251,7 +251,7 @@ where { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + C::sort_compare(self.inner.as_ref(), other.inner.as_ref()).ok() } } diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index d2bbee78078..418841547ca 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -574,13 +574,13 @@ pub fn bytes_to_int_without_context(bytes: &[u8]) -> Result { if let Some(&c) = trimed.next() { if c == b'-' { negative = true; - } else if c.is_ascii_digit() { + } else if (b'0'..=b'9').contains(&c) { r = Some(i64::from(c) - i64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| c.is_ascii_digit()) { + for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { let cur = i64::from(*c - b'0'); r = r.and_then(|r| r.checked_mul(10)).and_then(|r| { if negative { @@ -605,13 +605,13 @@ pub fn bytes_to_uint_without_context(bytes: &[u8]) -> Result { let mut trimed = bytes.iter().skip_while(|&&b| b == b' ' || b == b'\t'); let mut r = Some(0u64); if let Some(&c) = trimed.next() { - if c.is_ascii_digit() { + if (b'0'..=b'9').contains(&c) { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| c.is_ascii_digit()) { + for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -856,7 +856,7 @@ pub fn get_valid_int_prefix_helper<'a>( if (c == '+' || c == '-') && i == 0 { continue; } - if c.is_ascii_digit() { + if ('0'..='9').contains(&c) { valid_len = i + 1; continue; } @@ -917,7 +917,7 @@ pub fn get_valid_float_prefix_helper<'a>( break; } e_idx = i - } else if !c.is_ascii_digit() { + } else if !('0'..='9').contains(&c) { break; } else { saw_digit = true; diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index b464b1119c8..8ca36790824 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -248,7 +248,7 @@ macro_rules! impl_evaluable_type { } #[inline] - fn borrow_scalar_value_ref(v: ScalarValueRef<'_>) -> Option<&Self> { + fn borrow_scalar_value_ref<'a>(v: ScalarValueRef<'a>) -> Option<&'a Self> { match v { ScalarValueRef::$ty(x) => x, other => panic!( diff --git a/components/tidb_query_datatype/src/codec/data_type/scalar.rs b/components/tidb_query_datatype/src/codec/data_type/scalar.rs index ff66ddc42ee..c74423107e4 100644 --- a/components/tidb_query_datatype/src/codec/data_type/scalar.rs +++ b/components/tidb_query_datatype/src/codec/data_type/scalar.rs @@ -467,23 +467,24 @@ impl<'a> ScalarValueRef<'a> { impl<'a> Ord for ScalarValueRef<'a> { fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other) + .expect("Cannot compare two ScalarValueRef in different type") + } +} + +impl<'a> PartialOrd for ScalarValueRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option { match_template_evaltype! { TT, match (self, other) { // v1 and v2 are `Option`. However, in MySQL NULL values are considered lower // than any non-NULL value, so using `Option::PartialOrd` directly is fine. - (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => v1.cmp(v2), - _ => panic!("Cannot compare two ScalarValueRef in different type"), + (ScalarValueRef::TT(v1), ScalarValueRef::TT(v2)) => Some(v1.cmp(v2)), + _ => None, } } } } -impl<'a> PartialOrd for ScalarValueRef<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - impl<'a> PartialEq for ScalarValueRef<'a> { fn eq(&self, other: &ScalarValue) -> bool { self == &other.as_scalar_value_ref() diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index f91d204b3b0..dde98003475 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -668,7 +668,7 @@ impl Datum { Datum::F64(res) } } - (Datum::Dec(l), Datum::Dec(r)) => { + (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { let dec: Result = (l + r).into(); return dec.map(Datum::Dec); } @@ -700,7 +700,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_sub(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l - r)), - (Datum::Dec(l), Datum::Dec(r)) => { + (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { let dec: Result = (l - r).into(); return dec.map(Datum::Dec); } @@ -724,7 +724,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_mul(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l * r)), - (Datum::Dec(l), Datum::Dec(r)) => return Ok(Datum::Dec((l * r).unwrap())), + (&Datum::Dec(ref l), &Datum::Dec(ref r)) => return Ok(Datum::Dec((l * r).unwrap())), (l, r) => return Err(invalid_type!("{} can't multiply {}", l, r)), }; @@ -1179,7 +1179,7 @@ mod tests { | (&Datum::Null, &Datum::Null) | (&Datum::Time(_), &Datum::Time(_)) | (&Datum::Json(_), &Datum::Json(_)) => true, - (Datum::Dec(d1), Datum::Dec(d2)) => d1.prec_and_frac() == d2.prec_and_frac(), + (&Datum::Dec(ref d1), &Datum::Dec(ref d2)) => d1.prec_and_frac() == d2.prec_and_frac(), _ => false, } } diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 8853a1d6a16..143ec6c7760 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -1872,7 +1872,7 @@ impl<'a> ConvertTo for JsonRef<'a> { fn first_non_digit(bs: &[u8], start_idx: usize) -> usize { bs.iter() .skip(start_idx) - .position(|c| !c.is_ascii_digit()) + .position(|c| !(b'0'..=b'9').contains(c)) .map_or_else(|| bs.len(), |s| s + start_idx) } diff --git a/components/tidb_query_datatype/src/codec/mysql/duration.rs b/components/tidb_query_datatype/src/codec/mysql/duration.rs index 4b735977712..7279f788146 100644 --- a/components/tidb_query_datatype/src/codec/mysql/duration.rs +++ b/components/tidb_query_datatype/src/codec/mysql/duration.rs @@ -629,14 +629,14 @@ impl Eq for Duration {} impl PartialOrd for Duration { #[inline] fn partial_cmp(&self, rhs: &Duration) -> Option { - Some(self.cmp(rhs)) + self.nanos.partial_cmp(&rhs.nanos) } } impl Ord for Duration { #[inline] fn cmp(&self, rhs: &Duration) -> Ordering { - self.nanos.partial_cmp(&rhs.nanos).unwrap() + self.partial_cmp(rhs).unwrap() } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs index 73e04885890..d9104385bc6 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/comparison.rs @@ -77,8 +77,6 @@ impl<'a> PartialEq for JsonRef<'a> { .map_or(false, |r| r == Ordering::Equal) } } - -#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)] impl<'a> PartialOrd for JsonRef<'a> { // See `CompareBinary` in TiDB `types/json/binary_functions.go` fn partial_cmp(&self, right: &JsonRef<'_>) -> Option { @@ -199,7 +197,7 @@ impl PartialEq for Json { impl PartialOrd for Json { fn partial_cmp(&self, right: &Json) -> Option { - Some(self.cmp(right)) + self.as_ref().partial_cmp(&right.as_ref()) } } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index f76b29790f9..867d8ec2c20 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -28,9 +28,9 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryObject` in TiDB `types/json/binary.go` - fn write_json_obj_from_keys_values( + fn write_json_obj_from_keys_values<'a>( &mut self, - mut entries: Vec<(&[u8], JsonRef<'_>)>, + mut entries: Vec<(&[u8], JsonRef<'a>)>, ) -> Result<()> { entries.sort_by(|a, b| a.0.cmp(b.0)); // object: element-count size key-entry* value-entry* key* value* @@ -122,7 +122,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryArray` in TiDB `types/json/binary.go` - fn write_json_ref_array(&mut self, data: &[JsonRef<'_>]) -> Result<()> { + fn write_json_ref_array<'a>(&mut self, data: &[JsonRef<'a>]) -> Result<()> { let element_count = data.len(); let value_entries_len = VALUE_ENTRY_LEN * element_count; let values_len = data.iter().fold(0, |acc, v| acc + v.encoded_len()); @@ -167,7 +167,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryValElem` in TiDB `types/json/binary.go` - fn write_value_entry(&mut self, value_offset: &mut u32, v: &JsonRef<'_>) -> Result<()> { + fn write_value_entry<'a>(&mut self, value_offset: &mut u32, v: &JsonRef<'a>) -> Result<()> { let tp = v.get_type(); self.write_u8(tp as u8)?; match tp { diff --git a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs index 3cc78270d60..b359158d06b 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/json_modify.rs @@ -41,7 +41,7 @@ impl<'a> JsonRef<'a> { } } let mut res = self.to_owned(); - for (expr, value) in path_expr_list.iter().zip(values) { + for (expr, value) in path_expr_list.iter().zip(values.into_iter()) { let modifier = BinaryModifier::new(res.as_ref()); res = match mt { ModifyType::Insert => modifier.insert(expr, value)?, diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 621d4384bcc..4c6c2f676d7 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1094,7 +1094,7 @@ impl Time { ) } - fn try_into_chrono_datetime(self, ctx: &EvalContext) -> Result> { + fn try_into_chrono_datetime(self, ctx: &mut EvalContext) -> Result> { chrono_datetime( &ctx.cfg.tz, self.year(), @@ -1342,7 +1342,6 @@ impl Time { Ok((((ymd << 17) | hms) << 24) | u64::from(self.micro())) } - #[allow(deprecated)] pub fn from_duration( ctx: &mut EvalContext, duration: Duration, @@ -1416,7 +1415,6 @@ impl Time { .ok_or_else(|| Error::incorrect_datetime_value(self)) } - #[allow(deprecated)] pub fn normalized(self, ctx: &mut EvalContext) -> Result { if self.get_time_type() == TimeType::Timestamp { return Ok(self); @@ -1502,7 +1500,6 @@ impl Time { + self.day()) as i32 } - #[allow(deprecated)] pub fn weekday(self) -> Weekday { let date = if self.month() == 0 { NaiveDate::from_ymd(self.year() as i32 - 1, 12, 1) @@ -2673,9 +2670,9 @@ mod tests { #[test] fn test_no_zero_in_date() -> Result<()> { - let cases = ["2019-01-00", "2019-00-01"]; + let cases = vec!["2019-01-00", "2019-00-01"]; - for case in cases { + for &case in cases.iter() { // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is // produced. let mut ctx = EvalContext::from(TimeEnv { @@ -2820,7 +2817,7 @@ mod tests { let actual = Time::from_duration(&mut ctx, duration, TimeType::DateTime)?; let today = actual - .try_into_chrono_datetime(&ctx)? + .try_into_chrono_datetime(&mut ctx)? .checked_sub_signed(chrono::Duration::nanoseconds(duration.to_nanos())) .unwrap(); @@ -2840,7 +2837,7 @@ mod tests { let mut ctx = EvalContext::default(); for i in 2..10 { let actual = Time::from_local_time(&mut ctx, TimeType::DateTime, i % MAX_FSP)?; - let c_datetime = actual.try_into_chrono_datetime(&ctx)?; + let c_datetime = actual.try_into_chrono_datetime(&mut ctx)?; let now0 = c_datetime.timestamp_millis() as u64; let now1 = Utc::now().timestamp_millis() as u64; diff --git a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs index 9dfc3ebf288..25b35a90fc0 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/tz.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/tz.rs @@ -120,7 +120,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_local_date(&self, local: &NaiveDate) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -135,7 +134,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult> { match *self { Tz::Local(ref offset) => offset @@ -150,7 +148,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_utc_date(&self, utc: &NaiveDate) -> Date { match *self { Tz::Local(ref offset) => { @@ -168,7 +165,6 @@ impl TimeZone for Tz { } } - #[allow(deprecated)] fn from_utc_datetime(&self, utc: &NaiveDateTime) -> DateTime { match *self { Tz::Local(ref offset) => { diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index aa5eb3fc56f..da117c96e2c 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -298,7 +298,7 @@ impl<'a, T: PrimInt> LeBytes<'a, T> { fn new(slice: &'a [u8]) -> Self { Self { slice, - _marker: PhantomData, + _marker: PhantomData::default(), } } diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 81ef4b072c6..37becbfb801 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -528,7 +528,7 @@ pub fn generate_index_data_for_test( let mut expect_row = HashMap::default(); let mut v: Vec<_> = indice .iter() - .map(|(cid, value)| { + .map(|&(ref cid, ref value)| { expect_row.insert( *cid, datum::encode_key(&mut EvalContext::default(), &[value.clone()]).unwrap(), diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 5ebf8a031d3..3a5c53a4d09 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -611,8 +611,8 @@ impl IndexScanExecutorImpl { } #[inline] - fn build_operations<'a>( - &self, + fn build_operations<'a, 'b>( + &'b self, mut key_payload: &'a [u8], index_value: &'a [u8], ) -> Result<(DecodeHandleOp<'a>, DecodePartitionIdOp<'a>, RestoreData<'a>)> { diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 27e52dde288..7c410befb25 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -137,31 +137,31 @@ impl BatchExecutorsRunner<()> { .map_err(|e| other_err!("BatchProjectionExecutor: {}", e))?; } ExecType::TypeJoin => { - return Err(other_err!("Join executor not implemented")); + other_err!("Join executor not implemented"); } ExecType::TypeKill => { - return Err(other_err!("Kill executor not implemented")); + other_err!("Kill executor not implemented"); } ExecType::TypeExchangeSender => { - return Err(other_err!("ExchangeSender executor not implemented")); + other_err!("ExchangeSender executor not implemented"); } ExecType::TypeExchangeReceiver => { - return Err(other_err!("ExchangeReceiver executor not implemented")); + other_err!("ExchangeReceiver executor not implemented"); } ExecType::TypePartitionTableScan => { - return Err(other_err!("PartitionTableScan executor not implemented")); + other_err!("PartitionTableScan executor not implemented"); } ExecType::TypeSort => { - return Err(other_err!("Sort executor not implemented")); + other_err!("Sort executor not implemented"); } ExecType::TypeWindow => { - return Err(other_err!("Window executor not implemented")); + other_err!("Window executor not implemented"); } ExecType::TypeExpand => { - return Err(other_err!("Expand executor not implemented")); + other_err!("Expand executor not implemented"); } ExecType::TypeExpand2 => { - return Err(other_err!("Expand2 executor not implemented")); + other_err!("Expand2 executor not implemented"); } } } diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index ffcb22671da..bd65547109d 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -537,7 +537,7 @@ mod tests { }) .collect(); - for predicates in [ + for predicates in vec![ // Swap predicates should produce same results. vec![predicate[0](), predicate[1]()], vec![predicate[1](), predicate[0]()], @@ -572,7 +572,7 @@ mod tests { }) .collect(); - for predicates in [ + for predicates in vec![ // Swap predicates should produce same results. vec![predicate[0](), predicate[1](), predicate[2]()], vec![predicate[1](), predicate[2](), predicate[0]()], diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index a5d760dc80d..0535e8dbd83 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -641,8 +641,8 @@ pub mod tests { )) as Box> }; - let test_paging_size = [2, 5, 7]; - let expect_call_num = [1, 3, 4]; + let test_paging_size = vec![2, 5, 7]; + let expect_call_num = vec![1, 3, 4]; let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; let executor_builders: Vec) -> _>> = vec![Box::new(exec_fast), Box::new(exec_slow)]; diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index db456a84883..ca05e49fcd3 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -28,13 +28,13 @@ pub fn ensure_columns_decoded( /// Evaluates expressions and outputs the result into the given Vec. Lifetime of /// the expressions are erased. -pub unsafe fn eval_exprs_decoded_no_lifetime( +pub unsafe fn eval_exprs_decoded_no_lifetime<'a>( ctx: &mut EvalContext, exprs: &[RpnExpression], schema: &[FieldType], input_physical_columns: &LazyBatchColumnVec, input_logical_rows: &[usize], - output: &mut Vec>, + output: &mut Vec>, ) -> Result<()> { unsafe fn erase_lifetime<'a, T: ?Sized>(v: &T) -> &'a T { &*(v as *const T) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index b6619f9d8cc..76e90f79c5b 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -6528,7 +6528,7 @@ mod tests { "cast_decimal_as_duration", ); - let values = [ + let values = vec![ Decimal::from_bytes(b"9995959").unwrap().unwrap(), Decimal::from_bytes(b"-9995959").unwrap().unwrap(), ]; diff --git a/components/tidb_query_expr/src/impl_miscellaneous.rs b/components/tidb_query_expr/src/impl_miscellaneous.rs index 663571804ae..5d2daed7f9a 100644 --- a/components/tidb_query_expr/src/impl_miscellaneous.rs +++ b/components/tidb_query_expr/src/impl_miscellaneous.rs @@ -58,7 +58,7 @@ pub fn inet_aton(addr: BytesRef) -> Result> { } let (mut byte_result, mut result, mut dot_count): (u64, u64, usize) = (0, 0, 0); for c in addr.chars() { - if c.is_ascii_digit() { + if ('0'..='9').contains(&c) { let digit = c as u64 - '0' as u64; byte_result = byte_result * 10 + digit; if byte_result > 255 { @@ -501,9 +501,8 @@ mod tests { (Some(hex("00000000")), Some(b"0.0.0.0".to_vec())), (Some(hex("0A000509")), Some(b"10.0.5.9".to_vec())), ( - // the output format has changed, see: https://github.com/rust-lang/rust/pull/112606 Some(hex("00000000000000000000000001020304")), - Some(b"::102:304".to_vec()), + Some(b"::1.2.3.4".to_vec()), ), ( Some(hex("00000000000000000000FFFF01020304")), diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index 45754d0a101..f3b9b03c287 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -63,13 +63,13 @@ pub fn oct_string(s: BytesRef, writer: BytesWriter) -> Result { if let Some(&c) = trimmed.next() { if c == b'-' { negative = true; - } else if c.is_ascii_digit() { + } else if (b'0'..=b'9').contains(&c) { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(writer.write(Some(b"0".to_vec()))); } - for c in trimmed.take_while(|&c| c.is_ascii_digit()) { + for c in trimmed.take_while(|&c| (b'0'..=b'9').contains(c)) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -879,7 +879,7 @@ impl TrimDirection { } #[inline] -fn trim<'a>(string: &'a [u8], pattern: &[u8], direction: TrimDirection) -> &'a [u8] { +fn trim<'a, 'b>(string: &'a [u8], pattern: &'b [u8], direction: TrimDirection) -> &'a [u8] { if pattern.is_empty() { return string; } diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 40c1f485e54..c2ef6722148 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -10,8 +10,6 @@ #![allow(elided_lifetimes_in_paths)] // Necessary until rpn_fn accepts functions annotated with lifetimes. #![allow(incomplete_features)] -#![allow(clippy::needless_raw_string_hashes)] -#![allow(clippy::needless_return_with_question_mark)] #![feature(proc_macro_hygiene)] #![feature(specialization)] #![feature(test)] diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index e3ab7d35297..b892333b0ef 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1091,13 +1091,16 @@ mod tests { use tipb::{Expr, ScalarFuncSig}; #[allow(clippy::trivially_copy_pass_by_ref)] - #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a)] - fn fn_a_nonnull(metadata: &i64, v: &Int) -> Result> { + #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a::)] + fn fn_a_nonnull( + metadata: &i64, + v: &Int, + ) -> Result> { assert_eq!(*metadata, 42); Ok(Some(v + *metadata)) } - fn prepare_a(_expr: &mut Expr) -> Result { + fn prepare_a(_expr: &mut Expr) -> Result { Ok(42) } @@ -1133,7 +1136,7 @@ mod tests { // fn_b: CastIntAsReal // fn_c: CastIntAsString Ok(match expr.get_sig() { - ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta(), + ScalarFuncSig::CastIntAsInt => fn_a_nonnull_fn_meta::(), ScalarFuncSig::CastIntAsReal => fn_b_fn_meta::(), ScalarFuncSig::CastIntAsString => fn_c_fn_meta::(), _ => unreachable!(), diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 858edfffec2..576aa5cfa76 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -605,7 +605,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for (k, v) in &base_data { + for &(ref k, ref v) in &base_data { engine.put(&data_key(k), v).unwrap(); } (r, base_data) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 43e5f1bea05..25f58352750 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -9,7 +9,6 @@ #![feature(min_specialization)] #![feature(type_alias_impl_trait)] #![feature(associated_type_defaults)] -#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/tikv_util/src/logger/formatter.rs b/components/tikv_util/src/logger/formatter.rs index b786d2aa681..c53c5896519 100644 --- a/components/tikv_util/src/logger/formatter.rs +++ b/components/tikv_util/src/logger/formatter.rs @@ -11,9 +11,9 @@ where let mut start = 0; let bytes = file_name.as_bytes(); for (index, &b) in bytes.iter().enumerate() { - if b.is_ascii_uppercase() - || b.is_ascii_lowercase() - || b.is_ascii_digit() + if (b'A'..=b'Z').contains(&b) + || (b'a'..=b'z').contains(&b) + || (b'0'..=b'9').contains(&b) || b == b'.' || b == b'-' || b == b'_' diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index a2d0943df90..76fad6e8a34 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -247,7 +247,7 @@ where HashMapEntry::Occupied(mut e) => { self.size_policy.on_remove(e.key(), &e.get().value); self.size_policy.on_insert(e.key(), &value); - let entry = e.get_mut(); + let mut entry = e.get_mut(); self.trace.promote(entry.record); entry.value = value; } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index a2897809683..291254c5227 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -33,7 +33,7 @@ pub trait HeapSize { impl HeapSize for [u8] { fn heap_size(&self) -> usize { - mem::size_of_val(self) + self.len() * mem::size_of::() } } diff --git a/components/tikv_util/src/metrics/allocator_metrics.rs b/components/tikv_util/src/metrics/allocator_metrics.rs index af22e411767..260aa88ac8e 100644 --- a/components/tikv_util/src/metrics/allocator_metrics.rs +++ b/components/tikv_util/src/metrics/allocator_metrics.rs @@ -64,7 +64,7 @@ impl Collector for AllocStatsCollector { .set(dealloc as _); }); let mut g = self.memory_stats.collect(); - g.extend(self.allocation.collect()); + g.extend(self.allocation.collect().into_iter()); g } } diff --git a/components/tikv_util/src/mpsc/future.rs b/components/tikv_util/src/mpsc/future.rs index 354ef74adb0..4492e33a933 100644 --- a/components/tikv_util/src/mpsc/future.rs +++ b/components/tikv_util/src/mpsc/future.rs @@ -302,8 +302,6 @@ mod tests { use super::*; - // the JoinHandler is useless here, so just ignore this warning. - #[allow(clippy::let_underscore_future)] fn spawn_and_wait( rx_builder: impl FnOnce() -> S, ) -> (Runtime, Arc) { diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index 61608d1518f..6ec1621c629 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -333,7 +333,7 @@ mod tests { for _ in 0..num * 10 { std::thread::spawn(move || { loop { - let _ = (0..10_000_000).sum::(); + let _ = (0..10_000_000).into_iter().sum::(); } }); } diff --git a/components/tikv_util/src/timer.rs b/components/tikv_util/src/timer.rs index a7a2b421ab0..bb555e11794 100644 --- a/components/tikv_util/src/timer.rs +++ b/components/tikv_util/src/timer.rs @@ -81,14 +81,14 @@ impl Eq for TimeoutTask {} impl PartialOrd for TimeoutTask { fn partial_cmp(&self, other: &TimeoutTask) -> Option { - Some(self.cmp(other)) + self.next_tick.partial_cmp(&other.next_tick) } } impl Ord for TimeoutTask { fn cmp(&self, other: &TimeoutTask) -> Ordering { // TimeoutTask.next_tick must have same type of instants. - self.next_tick.partial_cmp(&other.next_tick).unwrap() + self.partial_cmp(other).unwrap() } } diff --git a/components/txn_types/src/timestamp.rs b/components/txn_types/src/timestamp.rs index 79727575d60..fb0cd900123 100644 --- a/components/txn_types/src/timestamp.rs +++ b/components/txn_types/src/timestamp.rs @@ -118,10 +118,9 @@ impl slog::Value for TimeStamp { const TS_SET_USE_VEC_LIMIT: usize = 8; /// A hybrid immutable set for timestamps. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum TsSet { /// When the set is empty, avoid the useless cloning of Arc. - #[default] Empty, /// `Vec` is suitable when the set is small or the set is barely used, and /// it doesn't worth converting a `Vec` into a `HashSet`. @@ -131,6 +130,13 @@ pub enum TsSet { Set(Arc>), } +impl Default for TsSet { + #[inline] + fn default() -> TsSet { + TsSet::Empty + } +} + impl TsSet { /// Create a `TsSet` from the given vec of timestamps. It will select the /// proper internal collection type according to the size. diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 5305e3ec69a..624ac81212d 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -451,7 +451,7 @@ impl From for Mutation { /// `OldValue` is used by cdc to read the previous value associated with some /// key during the prewrite process. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum OldValue { /// A real `OldValue`. Value { value: Value }, @@ -460,13 +460,18 @@ pub enum OldValue { /// `None` means we don't found a previous value. None, /// The user doesn't care about the previous value. - #[default] Unspecified, /// Not sure whether the old value exists or not. users can seek CF_WRITE to /// the give position to take a look. SeekWrite(Key), } +impl Default for OldValue { + fn default() -> Self { + OldValue::Unspecified + } +} + impl OldValue { pub fn value(value: Value) -> Self { OldValue::Value { value } @@ -585,9 +590,8 @@ impl WriteBatchFlags { /// The position info of the last actual write (PUT or DELETE) of a LOCK record. /// Note that if the last change is a DELETE, its LastChange can be either /// Exist(which points to it) or NotExist. -#[derive(Clone, Default, Eq, PartialEq, Debug)] +#[derive(Clone, Eq, PartialEq, Debug)] pub enum LastChange { - #[default] Unknown, /// The pointer may point to a PUT or a DELETE record. Exist { @@ -643,6 +647,12 @@ impl LastChange { } } +impl Default for LastChange { + fn default() -> Self { + LastChange::Unknown + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust-toolchain b/rust-toolchain index c1eb62e26cb..4e5f9a4d82b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2023-08-15 +nightly-2022-11-15 diff --git a/src/config/mod.rs b/src/config/mod.rs index 63e36a543dc..8318556483e 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1482,7 +1482,7 @@ impl DbConfig { opts.set_paranoid_checks(b); } if for_engine == EngineType::RaftKv { - opts.set_info_log(RocksdbLogger); + opts.set_info_log(RocksdbLogger::default()); } opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { @@ -1858,7 +1858,7 @@ impl RaftDbConfig { opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - opts.set_info_log(RaftDbLogger); + opts.set_info_log(RaftDbLogger::default()); opts.set_info_log_level(self.info_log_level.into()); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); @@ -2015,7 +2015,7 @@ impl ConfigManager for DbConfigManger { self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); - let cf_config = change.extract_if(|(name, _)| name.ends_with("cf")); + let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); for (cf_name, cf_change) in cf_config { if let ConfigValue::Module(mut cf_change) = cf_change { // defaultcf -> default @@ -2049,7 +2049,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .extract_if(|(name, _)| name == "rate_bytes_per_sec") + .drain_filter(|(name, _)| name == "rate_bytes_per_sec") .next() { let rate_bytes_per_sec: ReadableSize = rate_bytes_config.1.into(); @@ -2058,7 +2058,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .extract_if(|(name, _)| name == "rate_limiter_auto_tuned") + .drain_filter(|(name, _)| name == "rate_limiter_auto_tuned") .next() { let rate_limiter_auto_tuned: bool = rate_bytes_config.1.into(); @@ -2067,7 +2067,7 @@ impl ConfigManager for DbConfigManger { } if let Some(size) = change - .extract_if(|(name, _)| name == "write_buffer_limit") + .drain_filter(|(name, _)| name == "write_buffer_limit") .next() { let size: ReadableSize = size.1.into(); @@ -2075,14 +2075,14 @@ impl ConfigManager for DbConfigManger { } if let Some(f) = change - .extract_if(|(name, _)| name == "write_buffer_flush_oldest_first") + .drain_filter(|(name, _)| name == "write_buffer_flush_oldest_first") .next() { self.db.set_flush_oldest_first(f.1.into())?; } if let Some(background_jobs_config) = change - .extract_if(|(name, _)| name == "max_background_jobs") + .drain_filter(|(name, _)| name == "max_background_jobs") .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); @@ -2090,7 +2090,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_subcompactions_config) = change - .extract_if(|(name, _)| name == "max_sub_compactions") + .drain_filter(|(name, _)| name == "max_sub_compactions") .next() { let max_subcompactions: u32 = background_subcompactions_config.1.into(); @@ -2099,7 +2099,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_flushes_config) = change - .extract_if(|(name, _)| name == "max_background_flushes") + .drain_filter(|(name, _)| name == "max_background_flushes") .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 7d2d7e9e947..02f45d35311 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -285,7 +285,7 @@ pub fn tls_collect_scan_details(cmd: ReqTag, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_default() + .or_insert_with(Default::default) .add(stats); }); } diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index 874917130e4..fcd16f9b947 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -64,13 +64,11 @@ type HandlerStreamStepResult = Result<(Option, bool)>; #[async_trait] pub trait RequestHandler: Send { /// Processes current request and produces a response. - #[allow(clippy::diverging_sub_expression)] async fn handle_request(&mut self) -> Result> { panic!("unary request is not supported for this handler"); } /// Processes current request and produces streaming responses. - #[allow(clippy::diverging_sub_expression)] async fn handle_streaming_request(&mut self) -> HandlerStreamStepResult { panic!("streaming request is not supported for this handler"); } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 1a670c917ca..6d40ffe959c 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -66,9 +66,9 @@ const REQUEST_WRITE_CONCURRENCY: usize = 16; /// bytes. In detail, they are: /// - 2 bytes for the request type (Tag+Value). /// - 2 bytes for every string or bytes field (Tag+Length), they are: -/// . + the key field -/// . + the value field -/// . + the CF field (None for CF_DEFAULT) +/// . + the key field +/// . + the value field +/// . + the CF field (None for CF_DEFAULT) /// - 2 bytes for the embedded message field `PutRequest` (Tag+Length). /// - 2 bytes for the request itself (which would be embedded into a /// [`RaftCmdRequest`].) diff --git a/src/lib.rs b/src/lib.rs index aafb099c6cc..b3e9ebaf8e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,14 +23,13 @@ #![feature(proc_macro_hygiene)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(extract_if)] +#![feature(drain_filter)] #![feature(deadline_api)] #![feature(let_chains)] #![feature(read_buf)] #![feature(type_alias_impl_trait)] #![allow(incomplete_features)] #![feature(return_position_impl_trait_in_trait)] -#![feature(impl_trait_in_assoc_type)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 7060b20bdb2..4230828dff1 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -1113,7 +1113,7 @@ fn get_tablet_cache( "tablet load failed, region_state {:?}", region_state.get_state() ); - Err(box_err!(e)) + return Err(box_err!(e)); } } } diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index fe5a252b8db..665824a1bac 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -826,7 +826,6 @@ pub mod test_utils { use crate::storage::kv::RocksEngine as StorageRocksEngine; /// Do a global GC with the given safe point. - #[allow(clippy::needless_pass_by_ref_mut)] pub fn gc_by_compact(engine: &mut StorageRocksEngine, _: &[u8], safe_point: u64) { let engine = engine.get_rocksdb(); // Put a new key-value pair to ensure compaction can be triggered correctly. diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index d2dc6532200..be18f8216d5 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -546,9 +546,7 @@ impl GcMan ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); - let Some(region) = region else { - return Ok(None); - }; + let Some(region) = region else { return Ok(None) }; let hex_start = format!("{:?}", log_wrappers::Value::key(region.get_start_key())); let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); @@ -809,7 +807,7 @@ mod tests { // Following code asserts gc_tasks == expected_gc_tasks. assert_eq!(gc_tasks.len(), expected_gc_tasks.len()); - let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks).all( + let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks.into_iter()).all( |((region, safe_point), (expect_region, expect_safe_point))| { region == expect_region && safe_point == expect_safe_point.into() }, @@ -886,7 +884,7 @@ mod tests { #[test] fn test_auto_gc_rewinding() { - for regions in [ + for regions in vec![ // First region starts with empty and last region ends with empty. vec![ (b"".to_vec(), b"1".to_vec(), 1), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index de40975632f..c608470ba87 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -254,7 +254,7 @@ fn get_keys_in_region(keys: &mut Peekable>, region: &Region) -> Ve let mut keys_in_region = Vec::new(); loop { - let Some(key) = keys.peek() else { break }; + let Some(key) = keys.peek() else {break}; let key = key.as_encoded().as_slice(); if key < region.get_start_key() { @@ -552,7 +552,7 @@ impl GcRunner { let mut keys = keys.into_iter().peekable(); for region in regions { let mut raw_modifies = MvccRaw::new(); - let snapshot = self.get_snapshot(self.store_id, ®ion)?; + let mut snapshot = self.get_snapshot(self.store_id, ®ion)?; let mut keys_in_region = get_keys_in_region(&mut keys, ®ion).into_iter(); let mut next_gc_key = keys_in_region.next(); @@ -563,7 +563,7 @@ impl GcRunner { &range_start_key, &range_end_key, &mut raw_modifies, - &snapshot, + &mut snapshot, &mut gc_info, ) { GC_KEY_FAILURES.inc(); @@ -615,7 +615,7 @@ impl GcRunner { range_start_key: &Key, range_end_key: &Key, raw_modifies: &mut MvccRaw, - kv_snapshot: &::Snap, + kv_snapshot: &mut ::Snap, gc_info: &mut GcInfo, ) -> Result<()> { let start_key = key.clone().append_ts(safe_point.prev()); @@ -669,7 +669,10 @@ impl GcRunner { } pub fn mut_stats(&mut self, key_mode: GcKeyMode) -> &mut Statistics { - let stats = self.stats_map.entry(key_mode).or_default(); + let stats = self + .stats_map + .entry(key_mode) + .or_insert_with(Default::default); stats } @@ -2266,6 +2269,7 @@ mod tests { fn generate_keys(start: u64, end: u64) -> Vec { (start..end) + .into_iter() .map(|i| { let key = format!("k{:02}", i); Key::from_raw(key.as_bytes()) diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 938dfaff8a6..9583df80dd6 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -361,15 +361,20 @@ impl DetectTable { } /// The role of the detector. -#[derive(Debug, Default, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum Role { /// The node is the leader of the detector. Leader, /// The node is a follower of the leader. - #[default] Follower, } +impl Default for Role { + fn default() -> Role { + Role::Follower + } +} + impl From for Role { fn from(role: StateRole) -> Role { match role { diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 58287c2bb83..82563666f04 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -306,7 +306,6 @@ struct WriteResFeed { unsafe impl Send for WriteResFeed {} impl WriteResFeed { - #[allow(clippy::arc_with_non_send_sync)] fn pair() -> (Self, WriteResSub) { let core = Arc::new(WriteResCore { ev: AtomicU8::new(0), @@ -582,9 +581,7 @@ where tx.notify(res); } rx.inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { - return; - }; + let WriteEvent::Finished(res) = ev else { return }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 9785e821312..5183ecd6567 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -315,9 +315,7 @@ impl tikv_kv::Engine for RaftKv2 { early_err: res.err(), }) .inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { - return; - }; + let WriteEvent::Finished(res) = ev else { return }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 73a15983bd0..d9b17c5d35c 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -269,9 +269,7 @@ where /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); - let Some((_, mut system)) = self.system.take() else { - return; - }; + let Some((_, mut system)) = self.system.take() else { return }; info!(self.logger, "stop raft store thread"; "store_id" => store_id); system.shutdown(); } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index 497d8240684..d0b715542d5 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -300,6 +300,7 @@ where let debugger = self.debugger.clone(); let res = self.pool.spawn(async move { + let req = req; debugger .compact( req.get_db(), diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 413e36a6645..8e77d65233e 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -612,7 +612,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -639,7 +639,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -662,7 +662,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = ["2019/08/23 18:09:53.387 +08:00"] + let expected = vec!["2019/08/23 18:09:53.387 +08:00"] .iter() .map(|s| timestamp(s)) .collect::>(); @@ -671,7 +671,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# expected ); - for time in [0, i64::MAX].into_iter() { + for time in vec![0, i64::MAX].into_iter() { let log_iter = LogIterator::new( &log_file, timestamp("2019/08/23 18:09:53.387 +08:00"), @@ -680,7 +680,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:09:59.387 +08:00", "2019/08/23 18:10:06.387 +08:00", @@ -704,7 +704,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![regex::Regex::new(".*test-filter.*").unwrap()], ) .unwrap(); - let expected = [ + let expected = vec![ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:10:06.387 +08:00", // for invalid line ] @@ -783,7 +783,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# req.set_end_time(i64::MAX); req.set_levels(vec![LogLevel::Warn as _]); req.set_patterns(vec![".*test-filter.*".to_string()].into()); - let expected = [ + let expected = vec![ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:11:58.387 +08:00", "2019/08/23 18:11:59.387 +08:00", // for invalid line @@ -796,7 +796,9 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# s.collect::>() .await .into_iter() - .flat_map(|mut resp| resp.take_messages().into_iter()) + .map(|mut resp| resp.take_messages().into_iter()) + .into_iter() + .flatten() .map(|msg| msg.get_time()) .collect::>() }); diff --git a/src/server/service/diagnostics/sys.rs b/src/server/service/diagnostics/sys.rs index 12494e9e7c4..8a84eaf6293 100644 --- a/src/server/service/diagnostics/sys.rs +++ b/src/server/service/diagnostics/sys.rs @@ -601,7 +601,7 @@ mod tests { ] ); // memory - for name in ["virtual", "swap"].into_iter() { + for name in vec!["virtual", "swap"].into_iter() { let item = collector .iter() .find(|x| x.get_tp() == "memory" && x.get_name() == name); diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 4a961eedf19..77f92d33d95 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -889,6 +889,7 @@ impl Tikv for Service { forward_duplex!(self.proxy, batch_commands, ctx, stream, sink); let (tx, rx) = unbounded(WakePolicy::TillReach(GRPC_MSG_NOTIFY_SIZE)); + let ctx = Arc::new(ctx); let peer = ctx.peer(); let storage = self.storage.clone(); let copr = self.copr.clone(); diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 1eba8cd81b7..32c99867a3f 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -387,9 +387,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( - box MvccErrorInner::WriteConflict { .. }, - ))))) + StorageError(box StorageErrorInner::Txn(TxnError( + box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. })) + ))) )); // The tx should be dropped. rx.recv().unwrap_err(); @@ -422,9 +422,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( - box MvccErrorInner::KeyIsLocked(_), - ))))) + StorageError(box StorageErrorInner::Txn(TxnError( + box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) + ))) )); // Since the cancellation callback can fully execute only when it's successfully // removed from the lock waiting queues, it's impossible that `finish_request` diff --git a/src/storage/lock_manager/lock_waiting_queue.rs b/src/storage/lock_manager/lock_waiting_queue.rs index 68e0118610a..a81248fe9e2 100644 --- a/src/storage/lock_manager/lock_waiting_queue.rs +++ b/src/storage/lock_manager/lock_waiting_queue.rs @@ -110,7 +110,12 @@ impl Eq for LockWaitEntry {} impl PartialOrd for LockWaitEntry { fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + // Reverse it since the priority queue is a max heap and we want to pop the + // minimal. + other + .parameters + .start_ts + .partial_cmp(&self.parameters.start_ts) } } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index d3b3e89a3f8..e9477b56b0f 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -63,7 +63,7 @@ pub fn tls_collect_scan_details(cmd: CommandKind, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_default() + .or_insert_with(Default::default) .add(stats); }); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index b8224df696b..cb4057bfd7e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1946,7 +1946,7 @@ impl Storage { key_ranges.push(build_key_range(k.as_encoded(), k.as_encoded(), false)); (k, v) }) - .filter(|(_, v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) + .filter(|&(_, ref v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) .map(|(k, v)| match v { Ok(v) => { let (user_key, _) = F::decode_raw_key_owned(k, false).unwrap(); @@ -3892,9 +3892,9 @@ mod tests { let result = block_on(storage.get(Context::default(), Key::from_raw(b"x"), 100.into())); assert!(matches!( result, - Err(Error(box ErrorInner::Txn(txn::Error(box txn::ErrorInner::Mvcc(mvcc::Error( - box mvcc::ErrorInner::KeyIsLocked { .. }, - )))))) + Err(Error(box ErrorInner::Txn(txn::Error( + box txn::ErrorInner::Mvcc(mvcc::Error(box mvcc::ErrorInner::KeyIsLocked { .. })) + )))) )); } @@ -5744,7 +5744,7 @@ mod tests { ]; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -5803,7 +5803,7 @@ mod tests { let mut total_bytes: u64 = 0; let mut is_first = true; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -6116,7 +6116,7 @@ mod tests { #[test] fn test_raw_batch_put() { - for for_cas in [false, true].into_iter() { + for for_cas in vec![false, true].into_iter() { test_kv_format_impl!(test_raw_batch_put_impl(for_cas)); } } @@ -6245,7 +6245,7 @@ mod tests { ]; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -6260,7 +6260,7 @@ mod tests { } // Verify pairs in a batch - let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, @@ -6292,7 +6292,7 @@ mod tests { ]; // Write key-value pairs one by one - for (key, value) in &test_data { + for &(ref key, ref value) in &test_data { storage .raw_put( ctx.clone(), @@ -6310,7 +6310,7 @@ mod tests { let mut ids = vec![]; let cmds = test_data .iter() - .map(|(k, _)| { + .map(|&(ref k, _)| { let mut req = RawGetRequest::default(); req.set_context(ctx.clone()); req.set_key(k.clone()); @@ -6331,7 +6331,7 @@ mod tests { #[test] fn test_raw_batch_delete() { - for for_cas in [false, true].into_iter() { + for for_cas in vec![false, true].into_iter() { test_kv_format_impl!(test_raw_batch_delete_impl(for_cas)); } } @@ -6381,10 +6381,10 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); let results = test_data .iter() - .map(|(k, v)| Some((k.clone(), v.clone()))) + .map(|&(ref k, ref v)| Some((k.clone(), v.clone()))) .collect(); expect_multi_values( results, @@ -6512,7 +6512,7 @@ mod tests { // Scan pairs with key only let mut results: Vec> = test_data .iter() - .map(|(k, _)| Some((k.clone(), vec![]))) + .map(|&(ref k, _)| Some((k.clone(), vec![]))) .collect(); expect_multi_values( results.clone(), @@ -6909,7 +6909,7 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index 474c789a31d..cc4403229c1 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -1287,7 +1287,7 @@ mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (1..30).step_by(2) { + for start_ts in (1..30).into_iter().step_by(2) { must_prewrite_lock(&mut engine, k, k, start_ts); must_commit(&mut engine, k, start_ts, start_ts + 1); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 61a366c12ee..48158eda946 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -418,10 +418,11 @@ impl MvccReader { estimated_versions_to_last_change, } if estimated_versions_to_last_change >= SEEK_BOUND => { let key_with_ts = key.clone().append_ts(commit_ts); - let Some(value) = self.snapshot.get_cf(CF_WRITE, &key_with_ts)? - else { - return Ok(None); - }; + let Some(value) = self + .snapshot + .get_cf(CF_WRITE, &key_with_ts)? else { + return Ok(None); + }; self.statistics.write.get += 1; let write = WriteRef::parse(&value)?.to_owned(); assert!( @@ -2420,7 +2421,7 @@ pub mod tests { engine.commit(k, 1, 2); // Write enough LOCK recrods - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2429,7 +2430,7 @@ pub mod tests { engine.commit(k, 45, 46); // Write enough LOCK recrods - for start_ts in (50..80).step_by(2) { + for start_ts in (50..80).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2484,7 +2485,7 @@ pub mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2521,7 +2522,7 @@ pub mod tests { engine.put(k, 1, 2); // 10 locks were put - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2548,7 +2549,7 @@ pub mod tests { feature_gate.set_version("6.1.0").unwrap(); set_tls_feature_gate(feature_gate); engine.delete(k, 51, 52); - for start_ts in (56..80).step_by(2) { + for start_ts in (56..80).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } let feature_gate = FeatureGate::default(); @@ -2580,7 +2581,7 @@ pub mod tests { let k = b"k"; engine.put(k, 1, 2); - for start_ts in (6..30).step_by(2) { + for start_ts in (6..30).into_iter().step_by(2) { engine.lock(k, start_ts, start_ts + 1); } engine.rollback(k, 30); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 2b0a8e13582..3437a1e5432 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -633,7 +633,7 @@ impl ScanPolicy for LatestEntryPolicy { fn scan_latest_handle_lock( current_user_key: Key, - cfg: &ScannerConfig, + cfg: &mut ScannerConfig, cursors: &mut Cursors, statistics: &mut Statistics, ) -> Result> { @@ -1636,7 +1636,7 @@ mod latest_kv_tests { must_prewrite_put(&mut engine, b"k4", b"v41", b"k4", 3); must_commit(&mut engine, b"k4", 3, 7); - for start_ts in (10..30).step_by(2) { + for start_ts in (10..30).into_iter().step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index aa635827961..8c4ad5da08b 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -290,7 +290,7 @@ mod tests { RawEncodeSnapshot::from_snapshot(raw_mvcc_snapshot); // get_cf - for (key, value, _) in &test_data[6..12] { + for &(ref key, ref value, _) in &test_data[6..12] { let res = encode_snapshot.get_cf(CF_DEFAULT, &ApiV2::encode_raw_key(key, None)); assert_eq!(res.unwrap(), Some(value.to_owned())); } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 713155f9160..64e22a13585 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -766,6 +766,7 @@ fn async_commit_timestamps( #[cfg(not(feature = "failpoints"))] let injected_fallback = false; + let max_commit_ts = max_commit_ts; if (!max_commit_ts.is_zero() && min_commit_ts > max_commit_ts) || injected_fallback { warn!("commit_ts is too large, fallback to normal 2PC"; "key" => log_wrappers::Value::key(key.as_encoded()), @@ -1874,6 +1875,7 @@ pub mod tests { // At most 12 ops per-case. let ops_count = rg.gen::() % 12; let ops = (0..ops_count) + .into_iter() .enumerate() .map(|(i, _)| { if i == 0 { diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 61dbdac6565..9a54895e7e2 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -88,8 +88,8 @@ mod tests { fn test_atomic_process_write_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let raw_keys = [b"ra", b"rz"]; - let raw_values = [b"valuea", b"valuez"]; + let raw_keys = vec![b"ra", b"rz"]; + let raw_values = vec![b"valuea", b"valuez"]; let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); let mut modifies = vec![]; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 2f39b29bc64..10446db6292 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1853,7 +1853,9 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::AlreadyExist { .. }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::AlreadyExist { .. } + ))) )); assert_eq!(cm.max_ts().into_inner(), 15); @@ -1876,7 +1878,9 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::WriteConflict { .. } + ))) )); } @@ -2282,9 +2286,9 @@ mod tests { .unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { - .. - }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. } + ))) )); must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request @@ -2465,9 +2469,9 @@ mod tests { let err = prewrite_command(&mut engine, cm.clone(), &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { - .. - }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. } + ))) )); // Passing keys in different order gets the same result: let cmd = PrewritePessimistic::with_defaults( @@ -2488,9 +2492,9 @@ mod tests { let err = prewrite_command(&mut engine, cm, &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { - .. - }))) + Error(box ErrorInner::Mvcc(MvccError( + box MvccErrorInner::PessimisticLockNotFound { .. } + ))) )); // If the two keys are sent in different requests, it would be the client's duty diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index 549d1d22636..a662d9bab79 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -224,7 +224,7 @@ impl Latches { keep_latches_for_next_cmd: Option<(u64, &Lock)>, ) -> Vec { // Used to - let dummy_vec = []; + let dummy_vec = vec![]; let (keep_latches_for_cid, mut keep_latches_it) = match keep_latches_for_next_cmd { Some((cid, lock)) => (Some(cid), lock.required_hashes.iter().peekable()), None => (None, dummy_vec.iter().peekable()), @@ -282,9 +282,9 @@ mod tests { fn test_wakeup() { let latches = Latches::new(256); - let keys_a = ["k1", "k3", "k5"]; + let keys_a = vec!["k1", "k3", "k5"]; let mut lock_a = Lock::new(keys_a.iter()); - let keys_b = ["k4", "k5", "k6"]; + let keys_b = vec!["k4", "k5", "k6"]; let mut lock_b = Lock::new(keys_b.iter()); let cid_a: u64 = 1; let cid_b: u64 = 2; @@ -310,9 +310,9 @@ mod tests { fn test_wakeup_by_multi_cmds() { let latches = Latches::new(256); - let keys_a = ["k1", "k2", "k3"]; - let keys_b = ["k4", "k5", "k6"]; - let keys_c = ["k3", "k4"]; + let keys_a = vec!["k1", "k2", "k3"]; + let keys_b = vec!["k4", "k5", "k6"]; + let keys_c = vec!["k3", "k4"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); @@ -353,10 +353,10 @@ mod tests { fn test_wakeup_by_small_latch_slot() { let latches = Latches::new(5); - let keys_a = ["k1", "k2", "k3"]; - let keys_b = ["k6", "k7", "k8"]; - let keys_c = ["k3", "k4"]; - let keys_d = ["k7", "k10"]; + let keys_a = vec!["k1", "k2", "k3"]; + let keys_b = vec!["k6", "k7", "k8"]; + let keys_c = vec!["k3", "k4"]; + let keys_d = vec!["k7", "k10"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 2ca3ef145c8..19736304373 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -267,7 +267,7 @@ pub fn tls_collect_scan_details(cmd: &'static str, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_default() + .or_insert_with(Default::default) .add(stats); }); } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 0081d5e95bc..aa0c2c29dec 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -120,7 +120,6 @@ uuid = { version = "0.8.1", features = ["serde", "v4"] } procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" } [dev-dependencies] -arrow = "46.0" byteorder = "1.2" # See https://bheisler.github.io/criterion.rs/book/user_guide/known_limitations.html for the usage # of `real_blackbox` feature. diff --git a/tests/benches/coprocessor_executors/util/mod.rs b/tests/benches/coprocessor_executors/util/mod.rs index 3698860b4ea..0a5708c74ce 100644 --- a/tests/benches/coprocessor_executors/util/mod.rs +++ b/tests/benches/coprocessor_executors/util/mod.rs @@ -147,7 +147,7 @@ where I: 'static, { fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + self.get_name().partial_cmp(other.get_name()) } } diff --git a/tests/benches/hierarchy/mvcc/mod.rs b/tests/benches/hierarchy/mvcc/mod.rs index 99f2c9ee1f4..92dacfe6dc9 100644 --- a/tests/benches/hierarchy/mvcc/mod.rs +++ b/tests/benches/hierarchy/mvcc/mod.rs @@ -61,7 +61,7 @@ where .unwrap(); } let write_data = WriteData::from_modifies(txn.into_modifies()); - let _ = futures::executor::block_on(tikv_kv::write(engine, &ctx, write_data, None)); + let _ = tikv_kv::write(engine, &ctx, write_data, None); let keys: Vec = kvs.iter().map(|(k, _)| Key::from_raw(k)).collect(); let snapshot = engine.snapshot(Default::default()).unwrap(); (snapshot, keys) diff --git a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs b/tests/benches/misc/coprocessor/codec/chunk/chunk.rs deleted file mode 100644 index 241284a7228..00000000000 --- a/tests/benches/misc/coprocessor/codec/chunk/chunk.rs +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::Arc; - -use arrow::{ - array, - datatypes::{self, DataType, Field}, - record_batch::RecordBatch, -}; -use tidb_query_datatype::{codec::Datum, prelude::*, FieldTypeFlag, FieldTypeTp}; -use tipb::FieldType; - -pub struct Chunk { - pub data: RecordBatch, -} - -impl Chunk { - pub fn get_datum(&self, col_id: usize, row_id: usize, field_type: &FieldType) -> Datum { - if self.data.column(col_id).is_null(row_id) { - return Datum::Null; - } - - match field_type.as_accessor().tp() { - FieldTypeTp::Tiny - | FieldTypeTp::Short - | FieldTypeTp::Int24 - | FieldTypeTp::Long - | FieldTypeTp::LongLong - | FieldTypeTp::Year => { - if field_type - .as_accessor() - .flag() - .contains(FieldTypeFlag::UNSIGNED) - { - let data = self - .data - .column(col_id) - .as_any() - .downcast_ref::() - .unwrap(); - - Datum::U64(data.value(row_id)) - } else { - let data = self - .data - .column(col_id) - .as_any() - .downcast_ref::() - .unwrap(); - - Datum::I64(data.value(row_id)) - } - } - FieldTypeTp::Float | FieldTypeTp::Double => { - let data = self - .data - .column(col_id) - .as_any() - .downcast_ref::() - .unwrap(); - Datum::F64(data.value(row_id)) - } - _ => unreachable!(), - } - } -} - -pub struct ChunkBuilder { - columns: Vec, -} - -impl ChunkBuilder { - pub fn new(cols: usize, rows: usize) -> ChunkBuilder { - ChunkBuilder { - columns: vec![ColumnsBuilder::new(rows); cols], - } - } - - pub fn build(self, tps: &[FieldType]) -> Chunk { - let mut fields = Vec::with_capacity(tps.len()); - let mut arrays: Vec> = Vec::with_capacity(tps.len()); - for (field_type, column) in tps.iter().zip(self.columns) { - match field_type.as_accessor().tp() { - FieldTypeTp::Tiny - | FieldTypeTp::Short - | FieldTypeTp::Int24 - | FieldTypeTp::Long - | FieldTypeTp::LongLong - | FieldTypeTp::Year => { - if field_type - .as_accessor() - .flag() - .contains(FieldTypeFlag::UNSIGNED) - { - let (f, d) = column.into_u64_array(); - fields.push(f); - arrays.push(d); - } else { - let (f, d) = column.into_i64_array(); - fields.push(f); - arrays.push(d); - } - } - FieldTypeTp::Float | FieldTypeTp::Double => { - let (f, d) = column.into_f64_array(); - fields.push(f); - arrays.push(d); - } - _ => unreachable!(), - }; - } - let schema = datatypes::Schema::new(fields); - let batch = RecordBatch::try_new(Arc::new(schema), arrays).unwrap(); - Chunk { data: batch } - } - - pub fn append_datum(&mut self, col_id: usize, data: Datum) { - self.columns[col_id].append_datum(data) - } -} - -#[derive(Clone)] -pub struct ColumnsBuilder { - data: Vec, -} - -impl ColumnsBuilder { - fn new(rows: usize) -> ColumnsBuilder { - ColumnsBuilder { - data: Vec::with_capacity(rows), - } - } - - fn append_datum(&mut self, data: Datum) { - self.data.push(data) - } - - fn into_i64_array(self) -> (Field, Arc) { - let field = Field::new("", DataType::Int64, true); - let mut data: Vec> = Vec::with_capacity(self.data.len()); - for v in self.data { - match v { - Datum::Null => data.push(None), - Datum::I64(v) => data.push(Some(v)), - _ => unreachable!(), - } - } - (field, Arc::new(array::PrimitiveArray::from(data))) - } - - fn into_u64_array(self) -> (Field, Arc) { - let field = Field::new("", DataType::UInt64, true); - let mut data: Vec> = Vec::with_capacity(self.data.len()); - for v in self.data { - match v { - Datum::Null => data.push(None), - Datum::U64(v) => data.push(Some(v)), - _ => unreachable!(), - } - } - (field, Arc::new(array::PrimitiveArray::from(data))) - } - - fn into_f64_array(self) -> (Field, Arc) { - let field = Field::new("", DataType::Float64, true); - let mut data: Vec> = Vec::with_capacity(self.data.len()); - for v in self.data { - match v { - Datum::Null => data.push(None), - Datum::F64(v) => data.push(Some(v)), - _ => unreachable!(), - } - } - (field, Arc::new(array::PrimitiveArray::from(data))) - } -} diff --git a/tests/benches/misc/coprocessor/codec/chunk/mod.rs b/tests/benches/misc/coprocessor/codec/chunk/mod.rs deleted file mode 100644 index f956e2cb14e..00000000000 --- a/tests/benches/misc/coprocessor/codec/chunk/mod.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. - -mod chunk; - -use test::Bencher; -use tidb_query_datatype::{ - codec::{ - chunk::{Chunk, ChunkEncoder}, - datum::Datum, - mysql::*, - }, - FieldTypeTp, -}; -use tipb::FieldType; - -#[bench] -fn bench_encode_chunk(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![ - FieldTypeTp::LongLong.into(), - FieldTypeTp::LongLong.into(), - FieldTypeTp::VarChar.into(), - FieldTypeTp::VarChar.into(), - FieldTypeTp::NewDecimal.into(), - FieldTypeTp::Json.into(), - ]; - let mut chunk = Chunk::new(&fields, rows); - for row_id in 0..rows { - let s = format!("{}.123435", row_id); - let bs = Datum::Bytes(s.as_bytes().to_vec()); - let dec = Datum::Dec(s.parse().unwrap()); - let json = Datum::Json(Json::from_string(s).unwrap()); - chunk.append_datum(0, &Datum::Null).unwrap(); - chunk.append_datum(1, &Datum::I64(row_id as i64)).unwrap(); - chunk.append_datum(2, &bs).unwrap(); - chunk.append_datum(3, &bs).unwrap(); - chunk.append_datum(4, &dec).unwrap(); - chunk.append_datum(5, &json).unwrap(); - } - - b.iter(|| { - let mut buf = vec![]; - buf.write_chunk(&chunk).unwrap(); - }); -} - -#[bench] -fn bench_chunk_build_tidb(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::LongLong.into()]; - - b.iter(|| { - let mut chunk = Chunk::new(&fields, rows); - for row_id in 0..rows { - chunk.append_datum(0, &Datum::Null).unwrap(); - chunk.append_datum(1, &Datum::I64(row_id as i64)).unwrap(); - } - }); -} - -#[bench] -fn bench_chunk_build_official(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::LongLong.into()]; - - b.iter(|| { - let mut chunk = chunk::ChunkBuilder::new(fields.len(), rows); - for row_id in 0..rows { - chunk.append_datum(0, Datum::Null); - chunk.append_datum(1, Datum::I64(row_id as i64)); - } - chunk.build(&fields); - }); -} - -#[bench] -fn bench_chunk_iter_tidb(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::Double.into()]; - let mut chunk = Chunk::new(&fields, rows); - for row_id in 0..rows { - if row_id & 1 == 0 { - chunk.append_datum(0, &Datum::Null).unwrap(); - } else { - chunk.append_datum(0, &Datum::I64(row_id as i64)).unwrap(); - } - chunk.append_datum(1, &Datum::F64(row_id as f64)).unwrap(); - } - - b.iter(|| { - let mut col1 = 0; - let mut col2 = 0.0; - for row in chunk.iter() { - col1 += match row.get_datum(0, &fields[0]).unwrap() { - Datum::I64(v) => v, - Datum::Null => 0, - _ => unreachable!(), - }; - col2 += match row.get_datum(1, &fields[1]).unwrap() { - Datum::F64(v) => v, - _ => unreachable!(), - }; - } - assert_eq!(col1, 262_144); - assert!(!(523_776.0 - col2).is_normal()); - }); -} - -#[bench] -fn bench_chunk_iter_official(b: &mut Bencher) { - let rows = 1024; - let fields: Vec = vec![FieldTypeTp::LongLong.into(), FieldTypeTp::Double.into()]; - let mut chunk = chunk::ChunkBuilder::new(fields.len(), rows); - for row_id in 0..rows { - if row_id & 1 == 0 { - chunk.append_datum(0, Datum::Null); - } else { - chunk.append_datum(0, Datum::I64(row_id as i64)); - } - - chunk.append_datum(1, Datum::F64(row_id as f64)); - } - let chunk = chunk.build(&fields); - b.iter(|| { - let (mut col1, mut col2) = (0, 0.0); - for row_id in 0..chunk.data.num_rows() { - col1 += match chunk.get_datum(0, row_id, &fields[0]) { - Datum::I64(v) => v, - Datum::Null => 0, - _ => unreachable!(), - }; - col2 += match chunk.get_datum(1, row_id, &fields[1]) { - Datum::F64(v) => v, - _ => unreachable!(), - }; - } - assert_eq!(col1, 262_144); - assert!(!(523_776.0 - col2).is_normal()); - }); -} diff --git a/tests/benches/misc/coprocessor/codec/mod.rs b/tests/benches/misc/coprocessor/codec/mod.rs index 274ec362377..082f1c55894 100644 --- a/tests/benches/misc/coprocessor/codec/mod.rs +++ b/tests/benches/misc/coprocessor/codec/mod.rs @@ -1,6 +1,5 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -mod chunk; mod mysql; use byteorder::{BigEndian, ByteOrder, LittleEndian}; diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index a545d9935e6..d567edd5add 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -171,7 +171,6 @@ fn bench_async_snapshots_noop(b: &mut test::Bencher) { } #[bench] -#[allow(clippy::let_underscore_future)] fn bench_async_snapshot(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); @@ -206,7 +205,6 @@ fn bench_async_snapshot(b: &mut test::Bencher) { } #[bench] -#[allow(clippy::let_underscore_future)] fn bench_async_write(b: &mut test::Bencher) { let leader = new_peer(2, 3); let mut region = Region::default(); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index e164d59f82a..05c602824c2 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -12,7 +12,7 @@ const DEFAULT_DATA_SIZE: usize = 100_000; fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { let mut wb = db.write_batch(); - for (k, v) in kvs { + for &(ref k, ref v) in kvs { wb.put(&keys::data_key(k), v).unwrap(); } wb.write().unwrap(); diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index f40f40e6af1..a9dbd36a81a 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -1,8 +1,5 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -#![allow(clippy::arc_with_non_send_sync)] -#![allow(clippy::unnecessary_mut_passed)] -#[allow(clippy::let_underscore_future)] mod test_async_fetch; mod test_async_io; mod test_backup; diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 55c06d87b07..217269bb5b8 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -35,7 +35,7 @@ fn get_fp(usage: DiskUsage, store_id: u64) -> String { // check the region new leader is elected. fn assert_region_leader_changed( - cluster: &Cluster, + cluster: &mut Cluster, region_id: u64, original_leader: u64, ) { @@ -91,7 +91,7 @@ fn test_disk_full_leader_behaviors(usage: DiskUsage) { let new_last_index = cluster.raft_local_state(1, 1).last_index; assert_eq!(old_last_index, new_last_index); - assert_region_leader_changed(&cluster, 1, 1); + assert_region_leader_changed(&mut cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); fail::cfg(get_fp(usage, 1), "return").unwrap(); @@ -199,7 +199,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { DiskFullOpt::NotAllowedOnFull, ); assert!(res.get_region_error().has_disk_full()); - assert_region_leader_changed(&cluster, 1, 1); + assert_region_leader_changed(&mut cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -393,7 +393,7 @@ fn test_disk_full_followers_with_hibernate_regions() { // check the region new leader is elected. fn assert_region_merged( - cluster: &Cluster, + cluster: &mut Cluster, left_region_key: &[u8], right_region_key: &[u8], ) { diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 2dd5b6ac04b..073f7276419 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -57,7 +57,6 @@ fn test_write_buffer_manager() { } } -#[rustfmt::skip] // The test mocks the senario before https://github.com/tikv/rocksdb/pull/347: // note: before rocksdb/pull/347, lock is called before on_memtable_sealed. // Case: diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index d8f73f312b6..d2eb9aa10dd 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -93,7 +93,6 @@ fn test_break_leadership_on_restart() { // received, and become `GroupState::Ordered` after the proposal is received. // But they should keep wakeful for a while. #[test] -#[allow(clippy::let_underscore_future)] fn test_store_disconnect_with_hibernate() { let mut cluster = new_server_cluster(0, 3); let base_tick_ms = 50; diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 08b7474bb8e..0c16819082b 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1710,7 +1710,8 @@ fn test_destroy_source_peer_while_merging() { } struct MsgTimeoutFilter { - tx: Sender, + // wrap with mutex to make tx Sync. + tx: Mutex>, } impl Filter for MsgTimeoutFilter { @@ -1718,7 +1719,7 @@ impl Filter for MsgTimeoutFilter { let mut res = Vec::with_capacity(msgs.len()); for m in msgs.drain(..) { if m.get_message().msg_type == MessageType::MsgTimeoutNow { - self.tx.send(m).unwrap(); + self.tx.lock().unwrap().send(m).unwrap(); } else { res.push(m); } @@ -1787,7 +1788,7 @@ fn test_concurrent_between_transfer_leader_and_merge() { // msg by using Filter. So we make node-1-1000 be in leader_transferring status // for some time. let (tx, rx_msg) = channel(); - let filter = MsgTimeoutFilter { tx }; + let filter = MsgTimeoutFilter { tx: Mutex::new(tx) }; cluster.add_send_filter_on_node(1, Box::new(filter)); pd_client.transfer_leader( @@ -1811,13 +1812,15 @@ fn test_concurrent_between_transfer_leader_and_merge() { let router = cluster.get_router(2).unwrap(); let (tx, rx) = channel(); + let tx = Mutex::new(tx); let _ = fail::cfg_callback("propose_commit_merge_1", move || { - tx.send(()).unwrap(); + tx.lock().unwrap().send(()).unwrap(); }); let (tx2, rx2) = channel(); + let tx2 = Mutex::new(tx2); let _ = fail::cfg_callback("on_propose_commit_merge_success", move || { - tx2.send(()).unwrap(); + tx2.lock().unwrap().send(()).unwrap(); }); cluster.merge_region(left.get_id(), right.get_id(), Callback::None); diff --git a/tests/failpoints/cases/test_pd_client.rs b/tests/failpoints/cases/test_pd_client.rs index 201aafce6fb..0115d6d7ba5 100644 --- a/tests/failpoints/cases/test_pd_client.rs +++ b/tests/failpoints/cases/test_pd_client.rs @@ -43,7 +43,6 @@ macro_rules! request { } #[test] -#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let pd_client_reconnect_fp = "pd_client_reconnect"; diff --git a/tests/failpoints/cases/test_pd_client_legacy.rs b/tests/failpoints/cases/test_pd_client_legacy.rs index 583dad2ff34..ac427c29e69 100644 --- a/tests/failpoints/cases/test_pd_client_legacy.rs +++ b/tests/failpoints/cases/test_pd_client_legacy.rs @@ -43,7 +43,6 @@ macro_rules! request { } #[test] -#[allow(clippy::let_underscore_future)] fn test_pd_client_deadlock() { let (_server, client) = new_test_server_and_client(ReadableDuration::millis(100)); let client = Arc::new(client); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index 5ab7edb503f..a795422c120 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -208,7 +208,7 @@ fn test_leader_transfer() { #[test] fn test_region_merge() { let mut suite = TestSuite::new(3, ApiVersion::V2); - let keys = [b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; + let keys = vec![b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; suite.must_raw_put(keys[1], b"v1"); suite.must_raw_put(keys[3], b"v3"); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index dc6906b668a..7351044b297 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -4,16 +4,11 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; use test_raftstore_macro::test_case; -use tikv_util::config::ReadableDuration; -#[test_case(test_raftstore::must_new_cluster_with_cfg_and_kv_client_mul)] -#[test_case(test_raftstore_v2::must_new_cluster_with_cfg_and_kv_client_mul)] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = new_cluster(1, |c| { - // set a small renew duration to avoid trigger pre-renew that can affact the - // metrics. - c.cfg.tikv.raft_store.renew_leader_lease_advance_duration = ReadableDuration::millis(1); - }); + let (_cluster, client, ctx) = new_cluster(); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 10a65271462..65c50793d7a 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1426,7 +1426,8 @@ impl Filter for TeeFilter { // 2. the splitted region set has_dirty_data be true in `apply_snapshot` // 3. the splitted region schedule tablet trim task in `on_applied_snapshot` // with tablet index 5 -// 4. the splitted region received a snapshot sent from its leader +// 4. the splitted region received a snapshot sent from its +// leader // 5. after finishing applying this snapshot, the tablet index in storage // changed to 6 // 6. tablet trim complete and callbacked to raftstore diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 4668c24ad66..57047bef9d4 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1620,7 +1620,9 @@ fn test_before_propose_deadline() { assert!( matches!( res, - Err(StorageError(box StorageErrorInner::Kv(KvError(box KvErrorInner::Request(_))))) + Err(StorageError(box StorageErrorInner::Kv(KvError( + box KvErrorInner::Request(_), + )))) ), "actual: {:?}", res diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 4154a764d99..14f4161c7ae 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -751,7 +751,7 @@ fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { let handle = std::thread::spawn(move || { let mut mutations = vec![]; - for key in [b"key3".to_vec(), b"key4".to_vec()] { + for key in vec![b"key3".to_vec(), b"key4".to_vec()] { let mut mutation = kvproto::kvrpcpb::Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(key); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 02fb8c046c8..75eb62bab99 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -361,8 +361,8 @@ fn test_read_lock_after_become_follower() { /// 1. Inserted 5 entries and make all stores commit and apply them. /// 2. Prevent the store 3 from append following logs. /// 3. Insert another 20 entries. -/// 4. Wait for some time so that part of the entry cache are compacted on the -/// leader(store 1). +/// 4. Wait for some time so that part of the entry cache are compacted +/// on the leader(store 1). macro_rules! run_cluster_for_test_warmup_entry_cache { ($cluster:expr) => { // Let the leader compact the entry cache. diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index bd5461e6134..4cfd4be07be 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -492,7 +492,6 @@ fn test_backup_raw_meta() { } #[test] -#[allow(clippy::permissions_set_readonly_false)] fn test_invalid_external_storage() { let mut suite = TestSuite::new(1, 144 * 1024 * 1024, ApiVersion::V1); // Put some data. diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index f821ffea2e7..3d8cf85b02c 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -67,6 +67,6 @@ fn test_apply_twice() { &tikv, &ctx, CF_DEFAULT, - default_fst.into_iter().chain(default_snd), + default_fst.into_iter().chain(default_snd.into_iter()), ); } diff --git a/tests/integrations/mod.rs b/tests/integrations/mod.rs index 86ceb5369e7..2b68c0a8ba9 100644 --- a/tests/integrations/mod.rs +++ b/tests/integrations/mod.rs @@ -4,8 +4,6 @@ #![feature(box_patterns)] #![feature(custom_test_frameworks)] #![test_runner(test_util::run_tests)] -#![allow(clippy::needless_pass_by_ref_mut)] -#![allow(clippy::extra_unused_type_parameters)] extern crate test; diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 30ea12a424b..056641e1e3f 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -287,8 +287,8 @@ fn test_flush_before_stop2() { // 1. lock `k` with index 6 // 2. on_applied_res => lockcf's last_modified = 6 // 3. flush lock cf => lockcf's flushed_index = 6 -// 4. batch {unlock `k`, write `k`} with index 7 (last_modified is updated in -// store but RocksDB is modified in apply. So, +// 4. batch {unlock `k`, write `k`} with index 7 +// (last_modified is updated in store but RocksDB is modified in apply. So, // before on_apply_res, the last_modified is not updated.) // // flush-before-close: diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index 56cb65cce87..fbc7629c73f 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -5,13 +5,13 @@ use engine_traits::{MiscExt, CF_LOCK}; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &Cluster) { +fn flush(cluster: &mut Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cf(CF_LOCK, true).unwrap(); } } -fn flush_then_check(cluster: &Cluster, interval: u64, written: bool) { +fn flush_then_check(cluster: &mut Cluster, interval: u64, written: bool) { flush(cluster); // Wait for compaction. sleep_ms(interval * 2); diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 7701fe167c8..60f10936f2d 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -434,7 +434,6 @@ fn test_txn_query_stats_tmpl() { fail::remove("only_check_source_task_name"); } -#[allow(clippy::extra_unused_type_parameters)] fn raw_put( _cluster: &Cluster, client: &TikvClient, From 74f82f651654dba267438782af8756ccb65e7fda Mon Sep 17 00:00:00 2001 From: SeaRise Date: Tue, 26 Sep 2023 16:03:16 +0800 Subject: [PATCH 0939/1149] expr: fix wrong result of 0 / decimal and 0 % decimal (#15675) close tikv/tikv#15631 Signed-off-by: SeaRise --- .../src/codec/mysql/decimal.rs | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 143ec6c7760..bc18d7192f9 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -590,17 +590,24 @@ fn do_div_mod_impl( rhs: &Decimal, mut frac_incr: u8, do_mod: bool, + result_frac_cnt: Option, ) -> Option> { let r_frac_cnt = word_cnt!(rhs.frac_cnt) * DIGITS_PER_WORD; let (r_idx, r_prec) = rhs.remove_leading_zeroes(rhs.int_cnt + r_frac_cnt); if r_prec == 0 { + // short-circuit everything: rhs == 0 return None; } let l_frac_cnt = word_cnt!(lhs.frac_cnt) * DIGITS_PER_WORD; let (l_idx, l_prec) = lhs.remove_leading_zeroes(lhs.int_cnt + l_frac_cnt); if l_prec == 0 { - return Some(Res::Ok(Decimal::zero())); + // short-circuit everything: lhs == 0 + if let Some(result_frac) = result_frac_cnt { + return Some(Res::Ok(Decimal::new(0, result_frac, false))); + } else { + return Some(Res::Ok(Decimal::zero())); + } } frac_incr = frac_incr.saturating_sub(l_frac_cnt - lhs.frac_cnt + r_frac_cnt - rhs.frac_cnt); @@ -784,8 +791,9 @@ fn do_div_mod_impl( Some(res) } +#[allow(dead_code)] fn do_div_mod(lhs: &Decimal, rhs: &Decimal, frac_incr: u8, do_mod: bool) -> Option> { - do_div_mod_impl(lhs, rhs, frac_incr, do_mod) + do_div_mod_impl(lhs, rhs, frac_incr, do_mod, None) } /// `do_mul` multiplies two decimals. @@ -1704,7 +1712,7 @@ impl Decimal { fn div(&self, rhs: &Decimal, frac_incr: u8) -> Option> { let result_frac_cnt = cmp::min(self.result_frac_cnt.saturating_add(frac_incr), MAX_FRACTION); - let mut res = do_div_mod(self, rhs, frac_incr, false); + let mut res = do_div_mod_impl(self, rhs, frac_incr, false, Some(result_frac_cnt)); if let Some(ref mut dec) = res { dec.result_frac_cnt = result_frac_cnt; } @@ -2362,7 +2370,7 @@ impl<'a, 'b> Rem<&'a Decimal> for &'b Decimal { type Output = Option>; fn rem(self, rhs: &'a Decimal) -> Self::Output { let result_frac_cnt = cmp::max(self.result_frac_cnt, rhs.result_frac_cnt); - let mut res = do_div_mod_impl(self, rhs, 0, true); + let mut res = do_div_mod_impl(self, rhs, 0, true, Some(result_frac_cnt)); if let Some(ref mut dec) = res { dec.result_frac_cnt = result_frac_cnt; } @@ -3545,17 +3553,28 @@ mod tests { assert_eq!(res, rem_exp.map(|s| s.to_owned())); } - let div_cases = vec![( - "-43791957044243810000000000000000000000000000000000000000000000000000000000000", - "-0.0000000000000000000000000000000000000000000000000012867433602814482", - Res::Overflow( - "34033171179267041433424155279291553259014210153022524070386565694757521640", + let div_cases = vec![ + ( + "-43791957044243810000000000000000000000000000000000000000000000000000000000000", + "-0.0000000000000000000000000000000000000000000000000012867433602814482", + Res::Overflow( + "34033171179267041433424155279291553259014210153022524070386565694757521640", + ), ), - )]; - for (lhs_str, rhs_str, rem_exp) in div_cases { + ("0", "0.5", Res::Ok("0.0000")), + ]; + for (lhs_str, rhs_str, div_exp) in div_cases { let lhs: Decimal = lhs_str.parse().unwrap(); let rhs: Decimal = rhs_str.parse().unwrap(); let res = (&lhs / &rhs).unwrap().map(|d| d.to_string()); + assert_eq!(res, div_exp.map(|s| s.to_owned())) + } + + let rem_cases = vec![("0", "0.5", Res::Ok("0.0"))]; + for (lhs_str, rhs_str, rem_exp) in rem_cases { + let lhs: Decimal = lhs_str.parse().unwrap(); + let rhs: Decimal = rhs_str.parse().unwrap(); + let res = (lhs % rhs).unwrap().map(|d| d.to_string()); assert_eq!(res, rem_exp.map(|s| s.to_owned())) } } From 977888de9b218abd56928ab51e0f78a5b13c9063 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 26 Sep 2023 16:30:16 +0800 Subject: [PATCH 0940/1149] raftstore-v2: fix "failed to get merge entries" panic (#15649) close tikv/tikv#15633 fix "failed to get merge entries" panic Signed-off-by: SpadeA-Tang --- components/raftstore-v2/src/fsm/peer.rs | 1 + .../operation/command/admin/compact_log.rs | 2 + .../operation/command/admin/merge/commit.rs | 10 +- .../operation/command/admin/merge/prepare.rs | 2 + .../operation/command/admin/merge/rollback.rs | 18 +- .../raftstore-v2/src/operation/query/mod.rs | 1 + .../src/operation/ready/apply_trace.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 11 +- components/raftstore/src/store/fsm/peer.rs | 1 + components/test_raftstore-v2/src/util.rs | 38 +++- components/test_raftstore/src/cluster.rs | 5 +- tests/failpoints/cases/test_merge.rs | 179 +++++++++++++++--- .../integrations/raftstore/test_bootstrap.rs | 6 +- 13 files changed, 224 insertions(+), 52 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index d51d8eedb2a..872b2c4e7e6 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -196,6 +196,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, self.schedule_tick(PeerTick::SplitRegionCheck); self.schedule_tick(PeerTick::PdHeartbeat); self.schedule_tick(PeerTick::CompactLog); + self.fsm.peer.on_check_merge(self.store_ctx); if self.fsm.peer.storage().is_initialized() { self.fsm.peer.schedule_apply_fsm(self.store_ctx); } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index d054234b46f..1c4538ab51e 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -145,6 +145,8 @@ impl Peer { store_ctx: &mut StoreContext, force: bool, ) { + fail::fail_point!("maybe_propose_compact_log", |_| {}); + // As leader, we would not keep caches for the peers that didn't response // heartbeat in the last few seconds. That happens probably because // another TiKV is down. In this case if we do not clean up the cache, diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index 8e55f89a7d2..bec0265ffc3 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -172,6 +172,7 @@ impl Peer { &mut self, store_ctx: &mut StoreContext, ) { + fail::fail_point!("on_schedule_merge", |_| {}); fail::fail_point!( "ask_target_peer_to_commit_merge_2", self.region_id() == 2, @@ -198,7 +199,7 @@ impl Peer { Ok(ents) => ents, Err(e) => slog_panic!( self.logger, - "failed to get merge entires"; + "failed to get merge entries"; "err" => ?e, "low" => low, "commit" => state.get_commit() @@ -261,6 +262,7 @@ impl Peer { store_ctx: &mut StoreContext, req: RaftCmdRequest, ) { + fail::fail_point!("on_ask_commit_merge", |_| {}); let expected_epoch = req.get_header().get_region_epoch(); let merge = req.get_admin_request().get_commit_merge(); assert!(merge.has_source_state() && merge.get_source_state().has_merge_state()); @@ -736,6 +738,12 @@ impl Peer { store_ctx: &mut StoreContext, mut res: CommitMergeResult, ) { + fail::fail_point!( + "on_apply_res_commit_merge_2", + self.peer().store_id == 2, + |_| {} + ); + let region = res.region_state.get_region(); assert!( res.source.get_end_key() == region.get_end_key() diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index d3d1896287c..6ff982eea8c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -812,6 +812,8 @@ impl Peer { store_ctx: &mut StoreContext, res: PrepareMergeResult, ) { + fail::fail_point!("on_apply_res_prepare_merge"); + let region = res.region_state.get_region().clone(); { let mut meta = store_ctx.store_meta.lock().unwrap(); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs index d931a295f4d..adc49a928b3 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/rollback.rs @@ -4,9 +4,8 @@ use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ - metapb, raft_cmdpb::{AdminCmdType, AdminRequest, AdminResponse}, - raft_serverpb::PeerState, + raft_serverpb::{PeerState, RegionLocalState}, }; use raftstore::{ coprocessor::RegionChangeReason, @@ -28,7 +27,7 @@ use crate::{ #[derive(Debug)] pub struct RollbackMergeResult { commit: u64, - region: metapb::Region, + region_state: RegionLocalState, } impl Peer { @@ -118,7 +117,7 @@ impl Apply { AdminResponse::default(), AdminCmdResult::RollbackMerge(RollbackMergeResult { commit: rollback.get_commit(), - region, + region_state: self.region_state().clone(), }), )) } @@ -131,6 +130,7 @@ impl Peer { store_ctx: &mut StoreContext, res: RollbackMergeResult, ) { + let region = res.region_state.get_region(); assert_ne!(res.commit, 0); let current = self.merge_context().and_then(|c| c.prepare_merge_index()); if current != Some(res.commit) { @@ -143,21 +143,21 @@ impl Peer { } { let mut meta = store_ctx.store_meta.lock().unwrap(); - meta.set_region(&res.region, true, &self.logger); - let (reader, _) = meta.readers.get_mut(&res.region.get_id()).unwrap(); + meta.set_region(region, true, &self.logger); + let (reader, _) = meta.readers.get_mut(®ion.get_id()).unwrap(); self.set_region( &store_ctx.coprocessor_host, reader, - res.region.clone(), + region.clone(), RegionChangeReason::RollbackMerge, self.storage().region_state().get_tablet_index(), ); } - let region_state = self.storage().region_state().clone(); let region_id = self.region_id(); self.state_changes_mut() - .put_region_state(region_id, res.commit, ®ion_state) + .put_region_state(region_id, res.commit, &res.region_state) .unwrap(); + self.storage_mut().set_region_state(res.region_state); self.set_has_extra_write(); self.rollback_merge(store_ctx); diff --git a/components/raftstore-v2/src/operation/query/mod.rs b/components/raftstore-v2/src/operation/query/mod.rs index 2f1b1cd0138..10f6e3279c3 100644 --- a/components/raftstore-v2/src/operation/query/mod.rs +++ b/components/raftstore-v2/src/operation/query/mod.rs @@ -471,6 +471,7 @@ impl Peer { // Only leaders need to update applied_term. if progress_to_be_updated && self.is_leader() { if applied_term == self.term() { + fail::fail_point!("on_applied_current_term"); ctx.coprocessor_host .on_applied_current_term(StateRole::Leader, self.region()); } diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index af0257e763f..e839089837d 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -718,7 +718,7 @@ impl Peer { ); let region_id = self.region_id(); let flush_threshold: u64 = (|| { - fail_point!("flush_before_cluse_threshold", |t| { + fail_point!("flush_before_close_threshold", |t| { t.unwrap().parse::().unwrap() }); 50 diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 87d41de776c..4ff47c4b4bb 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -231,6 +231,14 @@ impl Peer { unsafe_recovery_state: None, }; + // If merge_context is not None, it means the PrepareMerge is applied before + // restart. So we have to neter prepare merge again to prevent all proposals + // except for RollbackMerge. + if let Some(ref state) = peer.merge_context { + peer.proposal_control + .enter_prepare_merge(state.prepare_merge_index().unwrap()); + } + // If this region has only one peer and I am the one, campaign directly. let region = peer.region(); if region.get_peers().len() == 1 @@ -265,9 +273,6 @@ impl Peer { } /// Set the region of a peer. - /// - /// This will update the region of the peer, caller must ensure the region - /// has been preserved in a durable device. pub fn set_region( &mut self, host: &CoprocessorHost, diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 30ba0c3059d..513e9c0636a 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -4602,6 +4602,7 @@ where } fn on_ready_prepare_merge(&mut self, region: metapb::Region, state: MergeState) { + fail_point!("on_apply_res_prepare_merge"); { let mut meta = self.ctx.store_meta.lock().unwrap(); meta.set_region( diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index 805394b1ea0..d83dff12e9a 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -1,6 +1,12 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt::Write, path::Path, sync::Arc, thread, time::Duration}; +use std::{ + fmt::Write, + path::Path, + sync::Arc, + thread, + time::{Duration, Instant}, +}; use encryption_export::{data_key_manager_from_config, DataKeyManager}; use engine_rocks::{RocksEngine, RocksStatistics}; @@ -18,7 +24,7 @@ use raftstore::{store::ReadResponse, Result}; use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; -use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, Config}; +use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, sleep_ms, Config}; use tikv::{ server::KvEngineFactoryBuilder, storage::{ @@ -27,7 +33,8 @@ use tikv::{ }, }; use tikv_util::{ - config::ReadableDuration, escape, future::block_on_timeout, worker::LazyWorker, HandyRwLock, + config::ReadableDuration, escape, future::block_on_timeout, time::InstantExt, + worker::LazyWorker, HandyRwLock, }; use txn_types::Key; @@ -447,3 +454,28 @@ pub fn wait_down_peers, EK: KvEngine>( peers, count, peer ); } + +pub fn wait_region_epoch_change, EK: KvEngine>( + cluster: &Cluster, + waited_region: &metapb::Region, + timeout: Duration, +) { + let timer = Instant::now(); + loop { + if waited_region.get_region_epoch().get_version() + == cluster + .get_region_epoch(waited_region.get_id()) + .get_version() + { + if timer.saturating_elapsed() > timeout { + panic!( + "region {:?}, region epoch is still not changed.", + waited_region + ); + } + } else { + break; + } + sleep_ms(10); + } +} diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 26fa2a47d5f..2a4082893e7 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -4,7 +4,10 @@ use std::{ collections::hash_map::Entry as MapEntry, error::Error as StdError, result, - sync::{mpsc, Arc, Mutex, RwLock}, + sync::{ + mpsc::{self}, + Arc, Mutex, RwLock, + }, thread, time::Duration, }; diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 0c16819082b..861e4a658ce 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -3,7 +3,7 @@ use std::{ sync::{ atomic::{AtomicBool, Ordering}, - mpsc::{channel, Sender}, + mpsc::{channel, sync_channel, Sender}, *, }, thread, @@ -22,14 +22,16 @@ use raft::eraftpb::MessageType; use raftstore::store::*; use raftstore_v2::router::PeerMsg; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, future::block_on_timeout, time::Instant, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; /// Test if merge is rollback as expected. -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_node_merge_rollback() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); @@ -53,8 +55,16 @@ fn test_node_merge_rollback() { let schedule_merge_fp = "on_schedule_merge"; fail::cfg(schedule_merge_fp, "return()").unwrap(); - // The call is finished when prepare_merge is applied. - cluster.must_try_merge(region.get_id(), target_region.get_id()); + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_apply_res_prepare_merge", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + cluster.merge_region(region.get_id(), target_region.get_id(), Callback::None); + // PrepareMerge is applied. + rx.recv().unwrap(); // Add a peer to trigger rollback. pd_client.must_add_peer(right.get_id(), new_peer(3, 5)); @@ -74,12 +84,7 @@ fn test_node_merge_rollback() { region.mut_region_epoch().set_version(4); for i in 1..3 { must_get_equal(&cluster.get_engine(i), b"k11", b"v11"); - let state_key = keys::region_state_key(region.get_id()); - let state: RegionLocalState = cluster - .get_engine(i) - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state = cluster.region_local_state(region.get_id(), i); assert_eq!(state.get_state(), PeerState::Normal); assert_eq!(*state.get_region(), region); } @@ -88,7 +93,10 @@ fn test_node_merge_rollback() { fail::cfg(schedule_merge_fp, "return()").unwrap(); let target_region = pd_client.get_region(b"k3").unwrap(); - cluster.must_try_merge(region.get_id(), target_region.get_id()); + cluster.merge_region(region.get_id(), target_region.get_id(), Callback::None); + // PrepareMerge is applied. + rx.recv().unwrap(); + let mut region = pd_client.get_region(b"k1").unwrap(); // Split to trigger rollback. @@ -103,12 +111,7 @@ fn test_node_merge_rollback() { region.mut_region_epoch().set_version(6); for i in 1..3 { must_get_equal(&cluster.get_engine(i), b"k12", b"v12"); - let state_key = keys::region_state_key(region.get_id()); - let state: RegionLocalState = cluster - .get_engine(i) - .get_msg_cf(CF_RAFT, &state_key) - .unwrap() - .unwrap(); + let state = cluster.region_local_state(region.get_id(), i); assert_eq!(state.get_state(), PeerState::Normal); assert_eq!(*state.get_region(), region); } @@ -1835,19 +1838,7 @@ fn test_concurrent_between_transfer_leader_and_merge() { rx2.recv().unwrap(); fail::remove("on_reject_commit_merge_1"); - let timer = Instant::now(); - loop { - if right.get_region_epoch().get_version() - == cluster.get_region_epoch(right.get_id()).get_version() - { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("region {:?} is still not merged.", right); - } - } else { - break; - } - sleep_ms(10); - } + wait_region_epoch_change(&cluster, &right, Duration::from_secs(5)); let region = pd_client.get_region(b"k1").unwrap(); assert_eq!(region.get_id(), right.get_id()); @@ -1856,3 +1847,129 @@ fn test_concurrent_between_transfer_leader_and_merge() { cluster.must_put(b"k4", b"v4"); } + +struct MsgVoteFilter {} + +impl Filter for MsgVoteFilter { + fn before(&self, msgs: &mut Vec) -> raftstore::Result<()> { + msgs.retain(|m| { + let msg_type = m.get_message().msg_type; + msg_type != MessageType::MsgRequestPreVote && msg_type != MessageType::MsgRequestVote + }); + check_messages(msgs) + } +} + +// Before the fix of this PR (#15649), after prepare merge, raft cmd can still +// be proposed if restart is involved. If the proposed raft cmd is CompactLog, +// panic can occur during fetch entries: see issue https://github.com/tikv/tikv/issues/15633. +// Consider the case: +// 1. node-1 apply PrepareMerge (assume log index 30), so it's in is_merging +// status which reject all proposals except for Rollback Merge +// 2. node-1 advance persisted_apply to 30 +// 3. node-1 restart and became leader. Now, it's not in is_merging status, so +// proposals can be proposed +// 4. node-1 propose CompactLog, replicate it to other nodes, and commit +// 5. node-0 apply PrepareMerge +// 6. node-0 apply CompactLog +// 6. node-0 fetches raft log entries which is required by +// AdminCmdType::CommitMerge and panic (due to compacted) +#[test] +fn test_restart_may_lose_merging_state() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 2); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(12); + cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.merge_check_tick_interval = ReadableDuration::millis(10); + + cluster.run(); + fail::cfg("maybe_propose_compact_log", "return").unwrap(); + fail::cfg("on_ask_commit_merge", "return").unwrap(); + fail::cfg("flush_before_close_threshold", "return(0)").unwrap(); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_apply_res_prepare_merge", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + let region = cluster.get_region(b""); + cluster.must_split(®ion, b"k20"); + + let source = cluster.get_region(b"k05"); + let target = cluster.get_region(b"k25"); + + cluster.add_send_filter_on_node(2, Box::new(MsgVoteFilter {})); + + cluster.must_transfer_leader( + source.id, + source + .get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + cluster.must_transfer_leader( + target.id, + target + .get_peers() + .iter() + .find(|p| p.store_id == 1) + .cloned() + .unwrap(), + ); + + for i in 0..20 { + let k = format!("k{:02}", i); + cluster.must_put(k.as_bytes(), b"val"); + } + + cluster.merge_region(source.id, target.id, Callback::None); + + rx.recv().unwrap(); + let router = cluster.get_router(1).unwrap(); + let (tx, rx) = sync_channel(1); + let msg = PeerMsg::FlushBeforeClose { tx }; + router.force_send(source.id, msg).unwrap(); + rx.recv().unwrap(); + + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + fail::cfg_callback("on_apply_res_commit_merge_2", move || { + tx.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + cluster.stop_node(1); + // Need to avoid propose commit merge, before node 1 becomes leader. Otherwise, + // the commit merge will be rejected. + let (tx2, rx2) = channel(); + let tx2 = Mutex::new(tx2); + fail::cfg_callback("on_applied_current_term", move || { + tx2.lock().unwrap().send(()).unwrap(); + }) + .unwrap(); + + fail::remove("maybe_propose_compact_log"); + cluster.run_node(1).unwrap(); + + // we have two regions. + rx2.recv().unwrap(); + rx2.recv().unwrap(); + fail::remove("on_ask_commit_merge"); + // wait node 2 to apply commit merge + rx.recv_timeout(Duration::from_secs(10)).unwrap(); + + wait_region_epoch_change(&cluster, &target, Duration::from_secs(5)); + + let region = cluster.get_region(b"k1"); + assert_eq!(region.get_id(), target.get_id()); + assert_eq!(region.get_start_key(), source.get_start_key()); + assert_eq!(region.get_end_key(), target.get_end_key()); + + cluster.must_put(b"k400", b"v400"); +} diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 056641e1e3f..bca389b26e6 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -216,7 +216,7 @@ fn test_flush_before_stop() { let region = cluster.get_region(b"k60"); cluster.must_split(®ion, b"k070"); - fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + fail::cfg("flush_before_close_threshold", "return(10)").unwrap(); for i in 0..100 { let key = format!("k{:03}", i); @@ -260,7 +260,7 @@ fn test_flush_before_stop2() { let mut cluster = new_server_cluster(0, 3); cluster.run(); - fail::cfg("flush_before_cluse_threshold", "return(10)").unwrap(); + fail::cfg("flush_before_close_threshold", "return(10)").unwrap(); fail::cfg("on_flush_completed", "return").unwrap(); for i in 0..20 { @@ -331,7 +331,7 @@ fn test_flush_index_exceed_last_modified() { ) .unwrap(); - fail::cfg("flush_before_cluse_threshold", "return(1)").unwrap(); + fail::cfg("flush_before_close_threshold", "return(1)").unwrap(); let router = cluster.get_router(1).unwrap(); let (tx, rx) = sync_channel(1); let msg = PeerMsg::FlushBeforeClose { tx }; From 9307f7ccfdf11c1047f833f888cbd77487b1c707 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 26 Sep 2023 18:06:17 +0800 Subject: [PATCH 0941/1149] raftstore-v2: fix MergedRecords not being cleaned up (#15650) close tikv/tikv#15644 MergedRecords were not being properly cleaned up, causing unnecessary bloating of RegionLocalState and continuous sending of GcPeerRequest by raftstore. This commit addresses the issue by enhancing the handling of GcPeerRequests, ensuring that target region followers forward GcPeerRequests to the source peer. The source peer or store then reports GcPeerResponse accordingly. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- .../operation/command/admin/conf_change.rs | 4 +- components/raftstore-v2/src/operation/life.rs | 22 ++- components/test_raftstore-v2/src/cluster.rs | 44 +++++ tests/integrations/raftstore/test_life.rs | 19 +- tests/integrations/raftstore/test_merge.rs | 170 +++++++++++++++--- 5 files changed, 214 insertions(+), 45 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 77ef6c823c1..55cee490e52 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -604,8 +604,8 @@ impl Apply { "update gc peer"; "index" => log_index, "updates" => ?updates, - "gc_peers" => ?removed_records, - "merged_peers" => ?merged_records + "removed_records" => ?removed_records, + "merged_records" => ?merged_records ); removed_records.retain(|p| !updates.contains(&p.get_id())); merged_records.retain_mut(|r| { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 4d1a59de0a6..8591d5daf23 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -681,6 +681,10 @@ impl Peer { let _ = router.send_raft_message(m.into()); }, ); + } else { + // Source peer is already destroyed. Forward to store, and let + // it report GcPeer response. + let _ = ctx.router.send_raft_message(m.into()); } }); } @@ -748,15 +752,23 @@ impl Peer { } // 2. ask target to check whether source should be deleted. for record in state.get_merged_records() { - for (source, target) in record - .get_source_peers() - .iter() - .zip(record.get_target_peers()) - { + for source in record.get_source_peers() { need_gc_ids.push(source.get_id()); if gc_context.confirmed_ids.contains(&source.get_id()) { continue; } + let Some(target) = record + .get_target_peers() + .iter() + .find(|p| p.get_store_id() == source.get_store_id()) + else { + panic!( + "[region {}] {} target peer not found, {:?}", + self.region_id(), + self.peer_id(), + state + ); + }; let mut msg = RaftMessage::default(); msg.set_region_id(record.get_target_region_id()); diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 8ede3290167..9d61918bd1f 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -1689,6 +1689,50 @@ impl, EK: KvEngine> Cluster { } } + pub fn must_empty_region_removed_records(&mut self, region_id: u64) { + let timer = Instant::now(); + loop { + thread::sleep(Duration::from_millis(100)); + + let leader = match self.leader_of_region(region_id) { + None => continue, + Some(l) => l, + }; + let region_state = self.region_local_state(region_id, leader.get_store_id()); + if region_state.get_removed_records().is_empty() { + return; + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!( + "merged records and removed records must be empty, {:?}", + region_state + ); + } + } + } + + pub fn must_empty_region_merged_records(&mut self, region_id: u64) { + let timer = Instant::now(); + loop { + thread::sleep(Duration::from_millis(100)); + + let leader = match self.leader_of_region(region_id) { + None => continue, + Some(l) => l, + }; + let region_state = self.region_local_state(region_id, leader.get_store_id()); + if region_state.get_merged_records().is_empty() { + return; + } + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!( + "merged records and removed records must be empty, {:?}", + region_state + ); + } + } + } + pub fn get_snap_dir(&self, node_id: u64) -> String { self.sim.rl().get_snap_dir(node_id) } diff --git a/tests/integrations/raftstore/test_life.rs b/tests/integrations/raftstore/test_life.rs index f3b5704a586..809904c7f46 100644 --- a/tests/integrations/raftstore/test_life.rs +++ b/tests/integrations/raftstore/test_life.rs @@ -7,9 +7,7 @@ use std::{ use kvproto::raft_serverpb::{ExtraMessageType, PeerState, RaftMessage}; use raftstore::errors::Result; -use test_raftstore::{ - new_learner_peer, new_peer, sleep_ms, Filter, FilterFactory, Simulator as S1, -}; +use test_raftstore::{new_learner_peer, new_peer, Filter, FilterFactory, Simulator as S1}; use test_raftstore_v2::Simulator as S2; use tikv_util::{config::ReadableDuration, time::Instant, HandyRwLock}; @@ -125,20 +123,7 @@ fn test_gc_peer_tiflash_engine() { .must_remove_peer(r21, new_learner_peer(2, 10)); // Make sure leader cleans up removed_records. - let start = Instant::now(); - loop { - sleep_ms(500); - if cluster_v2 - .region_local_state(r21, 1) - .get_removed_records() - .is_empty() - { - break; - } - if start.saturating_elapsed() > Duration::from_secs(5) { - panic!("timeout"); - } - } + cluster_v2.must_empty_region_removed_records(r21); } #[test] diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 0b17ff72ae7..080724b15a7 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -6,7 +6,7 @@ use api_version::{test_kv_format_impl, KvFormat}; use engine_traits::{CF_LOCK, CF_WRITE}; use kvproto::{ raft_cmdpb::CmdType, - raft_serverpb::{PeerState, RaftMessage, RegionLocalState}, + raft_serverpb::{ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, }; use pd_client::PdClient; use raft::eraftpb::{ConfChangeType, MessageType}; @@ -1733,7 +1733,7 @@ fn test_prepare_merge_with_5_nodes_snapshot() { } #[test_case(test_raftstore_v2::new_node_cluster)] -fn test_gc_peer_after_merge() { +fn test_gc_source_removed_records_after_merge() { let mut cluster = new_cluster(0, 3); configure_for_merge(&mut cluster.cfg); cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); @@ -1792,23 +1792,151 @@ fn test_gc_peer_after_merge() { // Right region replica on store 3 must be removed. cluster.must_region_not_exist(right.get_id(), 3); - let start = Instant::now(); - loop { - sleep_ms(cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); - let region_state = cluster.region_local_state(left.get_id(), 1); - if (region_state.get_merged_records().is_empty() - || region_state.get_merged_records()[0] - .get_source_removed_records() - .is_empty()) - && region_state.get_removed_records().is_empty() - { - break; - } - if start.elapsed() > Duration::from_secs(5) { - panic!( - "source removed records and removed records must be empty, {:?}", - region_state - ); - } - } + // Right region must clean up removed and merged records. + cluster.must_empty_region_merged_records(left.get_id()); + cluster.must_empty_region_removed_records(left.get_id()); +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_source_peers_forward_by_target_peer_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.raft_log_gc_threshold = 40; + cluster.cfg.raft_store.raft_log_gc_count_limit = Some(40); + cluster.cfg.raft_store.merge_max_log_gap = 15; + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Use DropMessageFilter to drop messages to store 3 without reporting error. + cluster.add_recv_filter_on_node( + 3, + Box::new(DropMessageFilter::new(Arc::new(|m| { + // Do not drop MsgAvailabilityRequest and MsgAvailabilityResponse + // messages, otherwise merge is blocked. + matches!( + m.get_extra_msg().get_type(), + ExtraMessageType::MsgAvailabilityRequest + | ExtraMessageType::MsgAvailabilityResponse + ) + }))), + ); + + // So cluster becomes + // left region: 1(leader) 2 | 3 + // right region: 1(leader) 2 | 3 + // | means isolation. + + // Merge left to right and remove left peer on store 3. + pd_client.must_merge(left.get_id(), right.get_id()); + let right_peer_on_store3 = find_peer(&right, 3).unwrap().clone(); + pd_client.must_remove_peer(right.get_id(), right_peer_on_store3); + let region_state = cluster.region_local_state(right.get_id(), 1); + assert!( + !region_state.get_merged_records().is_empty(), + "{:?}", + region_state + ); + + // So cluster becomes + // left region: merged + // right region: 1(leader) 2 | 3 (removed but not yet destroyed) + // | means isolation. + + let state1 = cluster.truncated_state(right.get_id(), 1); + (0..50).for_each(|i| cluster.must_put(b"k2", format!("v{}", i).as_bytes())); + // Wait to trigger compact raft log + cluster.wait_log_truncated(right.get_id(), 1, state1.get_index() + 1); + + // Cluster filters and wait for gc peer ticks. + cluster.clear_recv_filter_on_node(3); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Left region replica on store 3 must be removed. + cluster.must_region_not_exist(left.get_id(), 3); + // Right region must clean up removed and merged records. + cluster.must_empty_region_merged_records(right.get_id()); + cluster.must_empty_region_removed_records(right.get_id()); +} + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_source_peers_forward_by_store_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k3", b"v3"); + // Drop GcPeerResponse. + cluster.add_recv_filter_on_node( + 1, + Box::new(DropMessageFilter::new(Arc::new(|m| { + m.get_extra_msg().get_type() != ExtraMessageType::MsgGcPeerResponse + }))), + ); + + // So cluster becomes + // left region: 1(leader) 2 | 3 + // right region: 1(leader) 2 | 3 + // | means isolation. + + // Merge left to right and remove left peer on store 3. + pd_client.must_merge(left.get_id(), right.get_id()); + let right_peer_on_store3 = find_peer(&right, 3).unwrap().clone(); + pd_client.must_remove_peer(right.get_id(), right_peer_on_store3); + // Right region replica on store 3 must be removed. + cluster.must_region_not_exist(right.get_id(), 3); + let region_state = cluster.region_local_state(right.get_id(), 1); + assert!( + !region_state.get_merged_records().is_empty(), + "{:?}", + region_state + ); + assert!( + !region_state.get_removed_records().is_empty(), + "{:?}", + region_state + ); + + // So cluster becomes + // left region: merged + // right region: 1(leader) 2 | 3 (destroyed but not yet cleaned in removed + // records) + // | means isolation. + + // Cluster filters and wait for gc peer ticks. + cluster.clear_recv_filter_on_node(1); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Right region must clean up removed and merged records. + cluster.must_empty_region_merged_records(right.get_id()); + cluster.must_empty_region_removed_records(right.get_id()); } From df263d287dbdc8397030a3437ee97c918c43abb4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 27 Sep 2023 15:34:47 +0800 Subject: [PATCH 0942/1149] raftstore-v2: check gc peer after commit merge (#15693) close tikv/tikv#15672 This commit addresses the issue of orphan peers remaining in TiKV due to the absence of GcPeer tick registration after commit merge. The lack of regular checks on removed_records and merged_records can lead to delays in detecting and resolving these issues. To improve this, we have implemented a solution that ensures TiKV registers the GcPeer tick after commit merge. This change enables regular checks on the removed_records and merged_records, preventing them from being overlooked for an extended period. Signed-off-by: Neil Shen --- .../operation/command/admin/merge/commit.rs | 1 + tests/integrations/raftstore/test_merge.rs | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index bec0265ffc3..e95a13600fb 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -829,6 +829,7 @@ impl Peer { "target_region" => ?self.region(), ); self.add_pending_tick(PeerTick::SplitRegionCheck); + self.maybe_schedule_gc_peer_tick(); } } diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 080724b15a7..8d93d2c5a5c 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -1940,3 +1940,34 @@ fn test_gc_source_peers_forward_by_store_after_merge() { cluster.must_empty_region_merged_records(right.get_id()); cluster.must_empty_region_removed_records(right.get_id()); } + +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_gc_merged_record_in_time() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(100); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + let left_peer_on_store1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_peer_on_store1); + let right_peer_on_store1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_peer_on_store1); + + // Wait enough time to trigger gc peer, and if there is nothing to gc, + // leader skips registering gc peer tick. + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + // Merge left to right. + pd_client.must_merge(left.get_id(), right.get_id()); + + // Once merge complete, gc peer tick should be registered and merged record + // will be cleaned up in time. + cluster.must_empty_region_merged_records(right.get_id()); +} From 73bc4012f0ea5c49870639ccf353d1de5382025f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 27 Sep 2023 17:45:16 +0800 Subject: [PATCH 0943/1149] sst_importer: impl SuspendImport interface (#15612) close tikv/tikv#15611 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 2 +- components/error_code/src/sst_importer.rs | 5 +- components/raftstore/src/store/util.rs | 2 +- components/sst_importer/src/errors.rs | 16 +++ src/import/sst_service.rs | 99 ++++++++++++++++++- tests/integrations/import/test_sst_service.rs | 94 ++++++++++++++++++ 6 files changed, 212 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 124a87f069e..b3842f92752 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2933,7 +2933,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#090f247be15c00a6000a4d23669ac3e95ea9fcd5" +source = "git+https://github.com/pingcap/kvproto.git#87bebcc0d071a18cbbd94a4fc02de9c4988af815" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 001f4f146f6..117400e8aff 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -22,5 +22,8 @@ define_error_codes!( TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""), INCOMPATIBLE_API_VERSION => ("IncompatibleApiVersion", "", ""), INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), - RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", "") + RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""), + SUSPENDED => ("Suspended", + "this request has been suspended.", + "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them.") ); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 3f34fe691ee..519d486102c 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -306,7 +306,7 @@ pub fn compare_region_epoch( // tells TiDB with a epoch not match error contains the latest target Region // info, TiDB updates its region cache and sends requests to TiKV B, // and TiKV B has not applied commit merge yet, since the region epoch in - // request is higher than TiKV B, the request must be denied due to epoch + // request is higher than TiKV B, the request must be suspended due to epoch // not match, so it does not read on a stale snapshot, thus avoid the // KeyNotInRegion error. let current_epoch = region.get_region_epoch(); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index 7ff940fff12..acca7523427 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -2,6 +2,7 @@ use std::{ error::Error as StdError, io::Error as IoError, num::ParseIntError, path::PathBuf, result, + time::Duration, }; use encryption::Error as EncryptionError; @@ -31,6 +32,7 @@ pub fn error_inc(type_: &str, err: &Error) { Error::BadFormat(..) => "bad_format", Error::Encryption(..) => "encryption", Error::CodecError(..) => "codec", + Error::Suspended { .. } => "suspended", _ => return, }; IMPORTER_ERROR_VEC.with_label_values(&[type_, label]).inc(); @@ -125,6 +127,9 @@ pub enum Error { #[error("resource is not enough {0}")] ResourceNotEnough(String), + + #[error("imports are suspended for {time_to_lease_expire:?}")] + Suspended { time_to_lease_expire: Duration }, } impl Error { @@ -160,6 +165,16 @@ impl From for import_sstpb::Error { err.set_store_error(import_err); err.set_message(format!("{}", e)); } + Error::Suspended { + time_to_lease_expire, + } => { + let mut store_err = errorpb::Error::default(); + let mut server_is_busy = errorpb::ServerIsBusy::default(); + server_is_busy.set_backoff_ms(time_to_lease_expire.as_millis() as _); + store_err.set_server_is_busy(server_is_busy); + err.set_store_error(store_err); + err.set_message(format!("{}", e)); + } _ => { err.set_message(format!("{}", e)); } @@ -197,6 +212,7 @@ impl ErrorCodeExt for Error { Error::IncompatibleApiVersion => error_code::sst_importer::INCOMPATIBLE_API_VERSION, Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, + Error::Suspended { .. } => error_code::sst_importer::SUSPENDED, } } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 6d40ffe959c..68403e226f8 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -5,7 +5,10 @@ use std::{ convert::identity, future::Future, path::PathBuf, - sync::{Arc, Mutex}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, Mutex, + }, time::Duration, }; @@ -20,7 +23,8 @@ use kvproto::{ errorpb, import_sstpb::{ Error as ImportPbError, ImportSst, Range, RawWriteRequest_oneof_chunk as RawChunk, SstMeta, - SwitchMode, WriteRequest_oneof_chunk as Chunk, *, + SuspendImportRpcRequest, SuspendImportRpcResponse, SwitchMode, + WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, }; @@ -41,7 +45,7 @@ use tikv_util::{ HandyRwLock, }; use tokio::{runtime::Runtime, time::sleep}; -use txn_types::{Key, WriteRef, WriteType}; +use txn_types::{Key, TimeStamp, WriteRef, WriteType}; use super::{ make_rpc_error, @@ -49,6 +53,7 @@ use super::{ }; use crate::{ import::duplicate_detect::DuplicateDetector, + send_rpc_response, server::CONFIG_ROCKSDB_GAUGE, storage::{self, errors::extract_region_error_from_error}, }; @@ -80,6 +85,10 @@ const WIRE_EXTRA_BYTES: usize = 12; /// [`raft_writer::ThrottledTlsEngineWriter`]. There aren't too many items held /// in the writer. So we can run the GC less frequently. const WRITER_GC_INTERVAL: Duration = Duration::from_secs(300); +/// The max time of suspending requests. +/// This may save us from some client sending insane value to the server. +const SUSPEND_REQUEST_MAX_SECS: u64 = // 6h + 6 * 60 * 60; fn transfer_error(err: storage::Error) -> ImportPbError { let mut e = ImportPbError::default(); @@ -121,6 +130,9 @@ pub struct ImportSstService { // it's some iff multi-rocksdb is enabled store_meta: Option>>>, resource_manager: Option>, + + // When less than now, don't accept any requests. + suspend_req_until: Arc, } struct RequestCollector { @@ -356,6 +368,7 @@ impl ImportSstService { writer, store_meta, resource_manager, + suspend_req_until: Arc::new(AtomicU64::new(0)), } } @@ -619,6 +632,47 @@ impl ImportSstService { Ok(range) } + + /// Check whether we should suspend the current request. + fn check_suspend(&self) -> Result<()> { + let now = TimeStamp::physical_now(); + let suspend_until = self.suspend_req_until.load(Ordering::SeqCst); + if now < suspend_until { + Err(Error::Suspended { + time_to_lease_expire: Duration::from_millis(suspend_until - now), + }) + } else { + Ok(()) + } + } + + /// suspend requests for a period. + /// + /// # returns + /// + /// whether for now, the requests has already been suspended. + pub fn suspend_requests(&self, for_time: Duration) -> bool { + let now = TimeStamp::physical_now(); + let last_suspend_until = self.suspend_req_until.load(Ordering::SeqCst); + let suspended = now < last_suspend_until; + let suspend_until = TimeStamp::physical_now() + for_time.as_millis() as u64; + self.suspend_req_until + .store(suspend_until, Ordering::SeqCst); + suspended + } + + /// allow all requests to enter. + /// + /// # returns + /// + /// whether requests has already been previously suspended. + pub fn allow_requests(&self) -> bool { + let now = TimeStamp::physical_now(); + let last_suspend_until = self.suspend_req_until.load(Ordering::SeqCst); + let suspended = now < last_suspend_until; + self.suspend_req_until.store(0, Ordering::SeqCst); + suspended + } } #[macro_export] @@ -993,6 +1047,10 @@ impl ImportSst for ImportSstService { ) { let label = "ingest"; let timer = Instant::now_coarse(); + if let Err(err) = self.check_suspend() { + ctx.spawn(async move { crate::send_rpc_response!(Err(err), sink, label, timer) }); + return; + } let mut resp = IngestResponse::default(); let region_id = req.get_context().get_region_id(); @@ -1036,6 +1094,10 @@ impl ImportSst for ImportSstService { ) { let label = "multi-ingest"; let timer = Instant::now_coarse(); + if let Err(err) = self.check_suspend() { + ctx.spawn(async move { crate::send_rpc_response!(Err(err), sink, label, timer) }); + return; + } let mut resp = IngestResponse::default(); if let Some(errorpb) = self.check_write_stall(req.get_context().get_region_id()) { @@ -1240,6 +1302,37 @@ impl ImportSst for ImportSstService { RawChunk, new_raw_writer ); + + fn suspend_import_rpc( + &mut self, + ctx: RpcContext<'_>, + req: SuspendImportRpcRequest, + sink: UnarySink, + ) { + let label = "suspend_import_rpc"; + let timer = Instant::now_coarse(); + + if req.should_suspend_imports && req.get_duration_in_secs() > SUSPEND_REQUEST_MAX_SECS { + ctx.spawn(async move { + send_rpc_response!(Err(Error::Io( + std::io::Error::new(std::io::ErrorKind::InvalidInput, + format!("you are going to suspend the import RPCs too long. (for {} seconds, max acceptable duration is {} seconds)", + req.get_duration_in_secs(), SUSPEND_REQUEST_MAX_SECS)))), sink, label, timer); + }); + return; + } + + let suspended = if req.should_suspend_imports { + info!("suspend incoming import RPCs."; "for_second" => req.get_duration_in_secs(), "caller" => req.get_caller()); + self.suspend_requests(Duration::from_secs(req.get_duration_in_secs())) + } else { + info!("allow incoming import RPCs."; "caller" => req.get_caller()); + self.allow_requests() + }; + let mut resp = SuspendImportRpcResponse::default(); + resp.set_already_suspended(suspended); + ctx.spawn(async move { send_rpc_response!(Ok(resp), sink, label, timer) }); + } } // add error statistics from pb error response diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 22ab9c7d7fe..6c56ab0018b 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -555,3 +555,97 @@ fn test_duplicate_and_close() { req.set_mode(SwitchMode::Normal); import.switch_mode(&req).unwrap(); } + +#[test] +fn test_suspend_import() { + let (_cluster, ctx, tikv, import) = new_cluster_and_tikv_import_client(); + let sst_range = (0, 10); + let write = |sst_range: (u8, u8)| { + let mut meta = new_sst_meta(0, 0); + meta.set_region_id(ctx.get_region_id()); + meta.set_region_epoch(ctx.get_region_epoch().clone()); + + let mut keys = vec![]; + let mut values = vec![]; + for i in sst_range.0..sst_range.1 { + keys.push(vec![i]); + values.push(vec![i]); + } + send_write_sst(&import, &meta, keys, values, 1) + }; + let ingest = |sst_meta: &SstMeta| { + let mut ingest = IngestRequest::default(); + ingest.set_context(ctx.clone()); + ingest.set_sst(sst_meta.clone()); + import.ingest(&ingest) + }; + let multi_ingest = |sst_metas: &[SstMeta]| { + let mut multi_ingest = MultiIngestRequest::default(); + multi_ingest.set_context(ctx.clone()); + multi_ingest.set_ssts(sst_metas.to_vec().into()); + import.multi_ingest(&multi_ingest) + }; + let suspendctl = |for_time| { + let mut req = SuspendImportRpcRequest::default(); + req.set_caller("test_suspend_import".to_owned()); + if for_time == 0 { + req.set_should_suspend_imports(false); + } else { + req.set_should_suspend_imports(true); + req.set_duration_in_secs(for_time); + } + req + }; + + let write_res = write(sst_range).unwrap(); + assert_eq!(write_res.metas.len(), 1); + let sst = write_res.metas[0].clone(); + + assert!( + !import + .suspend_import_rpc(&suspendctl(6000)) + .unwrap() + .already_suspended + ); + let write_res = write(sst_range); + write_res.unwrap(); + let ingest_res = ingest(&sst); + assert_to_string_contains!(ingest_res.unwrap_err(), "Suspended"); + let multi_ingest_res = multi_ingest(&[sst.clone()]); + assert_to_string_contains!(multi_ingest_res.unwrap_err(), "Suspended"); + + assert!( + import + .suspend_import_rpc(&suspendctl(0)) + .unwrap() + .already_suspended + ); + + let ingest_res = ingest(&sst); + assert!(ingest_res.is_ok(), "{:?} => {:?}", sst, ingest_res); + + check_ingested_txn_kvs(&tikv, &ctx, sst_range, 2); + + // test timeout. + assert!( + !import + .suspend_import_rpc(&suspendctl(1)) + .unwrap() + .already_suspended + ); + let sst_range = (10, 20); + let write_res = write(sst_range); + let sst = write_res.unwrap().metas; + let res = multi_ingest(&sst); + assert_to_string_contains!(res.unwrap_err(), "Suspended"); + std::thread::sleep(Duration::from_secs(1)); + multi_ingest(&sst).unwrap(); + + // check an insane value should be rejected. + import + .suspend_import_rpc(&suspendctl(u64::MAX - 42)) + .unwrap_err(); + let sst_range = (20, 30); + let ssts = write(sst_range).unwrap(); + multi_ingest(ssts.get_metas()).unwrap(); +} From 4814a6129b8a4ae122bb6152c140a064787456bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 28 Sep 2023 11:34:48 +0800 Subject: [PATCH 0944/1149] compaction_guard: split SST when detected possible huge compaction (#15379) close tikv/tikv#15058 This PR make the compaction guard splits SSTs when it find that there are possible huge compactions. It works by iterating the next of the output level (Let is be level L+1), when a SST crosses such a huge key range that making L+1 contains size greater than the `max-compaction-size`. Signed-off-by: hillium Co-authored-by: tonyxuqqi --- .../engine_rocks/src/sst_partitioner.rs | 2 + .../engine_traits/src/sst_partitioner.rs | 2 + .../raftstore/src/store/compaction_guard.rs | 314 ++++++++++++++++-- src/config/mod.rs | 1 + 4 files changed, 293 insertions(+), 26 deletions(-) diff --git a/components/engine_rocks/src/sst_partitioner.rs b/components/engine_rocks/src/sst_partitioner.rs index fc1dcd40270..f642a94f28f 100644 --- a/components/engine_rocks/src/sst_partitioner.rs +++ b/components/engine_rocks/src/sst_partitioner.rs @@ -23,6 +23,8 @@ impl rocksdb::SstPartitionerFactory output_level: context.output_level, smallest_key: context.smallest_key, largest_key: context.largest_key, + next_level_boundaries: context.next_level_boundaries.clone(), + next_level_sizes: context.next_level_sizes.clone(), }; self.0.create_partitioner(&ctx).map(RocksSstPartitioner) } diff --git a/components/engine_traits/src/sst_partitioner.rs b/components/engine_traits/src/sst_partitioner.rs index bc6ec13a4eb..4a8ee9e71bc 100644 --- a/components/engine_traits/src/sst_partitioner.rs +++ b/components/engine_traits/src/sst_partitioner.rs @@ -22,6 +22,8 @@ pub struct SstPartitionerContext<'a> { pub output_level: i32, pub smallest_key: &'a [u8], pub largest_key: &'a [u8], + pub next_level_boundaries: Vec<&'a [u8]>, + pub next_level_sizes: Vec, } pub trait SstPartitioner { diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index efee09be906..138d730fa29 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -23,10 +23,16 @@ pub struct CompactionGuardGeneratorFactory { cf_name: CfNames, provider: P, min_output_file_size: u64, + max_compaction_size: u64, } impl CompactionGuardGeneratorFactory

{ - pub fn new(cf: CfName, provider: P, min_output_file_size: u64) -> Result { + pub fn new( + cf: CfName, + provider: P, + min_output_file_size: u64, + max_compaction_size: u64, + ) -> Result { let cf_name = match cf { CF_DEFAULT => CfNames::default, CF_LOCK => CfNames::lock, @@ -43,6 +49,7 @@ impl CompactionGuardGeneratorFactory

{ cf_name, provider, min_output_file_size, + max_compaction_size, }) } } @@ -72,6 +79,15 @@ impl SstPartitionerFactory use_guard: false, boundaries: vec![], pos: 0, + next_level_pos: 0, + next_level_boundaries: context + .next_level_boundaries + .iter() + .map(|v| v.to_vec()) + .collect(), + next_level_size: context.next_level_sizes.clone(), + current_next_level_size: 0, + max_compaction_size: self.max_compaction_size, }) } } @@ -86,7 +102,20 @@ pub struct CompactionGuardGenerator { use_guard: bool, // The boundary keys are exclusive. boundaries: Vec>, + /// The SST boundaries overlapped with the compaction input at the next + /// level of output level (let we call it L+2). When the output level is the + /// bottom-most level(usually L6), this will be empty. The boundaries + /// are the first key of the first sst concatenating with all ssts' end key. + next_level_boundaries: Vec>, + /// The size of each "segment" of L+2. If the `next_level_boundaries`(let we + /// call it NLB) isn't empty, `next_level_size` will have length + /// `NLB.len() - 1`, and at the position `N` stores the size of range + /// `[NLB[N], NLB[N+1]]` in L+2. + next_level_size: Vec, pos: usize, + next_level_pos: usize, + current_next_level_size: u64, + max_compaction_size: u64, } impl CompactionGuardGenerator

{ @@ -153,27 +182,52 @@ impl SstPartitioner for CompactionGuardGenerator

{ if !self.use_guard { return SstPartitionerResult::NotRequired; } - let mut pos = self.pos; - let mut skip_count = 0; - while pos < self.boundaries.len() && self.boundaries[pos].as_slice() <= req.prev_user_key { - pos += 1; - skip_count += 1; - if skip_count >= COMPACTION_GUARD_MAX_POS_SKIP { - let prev_user_key = req.prev_user_key.to_vec(); - pos = match self.boundaries.binary_search(&prev_user_key) { - Ok(search_pos) => search_pos + 1, - Err(search_pos) => search_pos, - }; - break; - } + self.pos = seek_to(&self.boundaries, req.prev_user_key, self.pos); + // Generally this shall be a noop... because each time we are moving the cursor + // to the previous key. + let left_next_level_pos = seek_to( + &self.next_level_boundaries, + req.prev_user_key, + self.next_level_pos, + ); + let right_next_level_pos = seek_to( + &self.next_level_boundaries, + req.current_user_key, + left_next_level_pos, + ); + // The cursor has been moved. + if right_next_level_pos > left_next_level_pos { + self.current_next_level_size += self.next_level_size + [left_next_level_pos..right_next_level_pos - 1] + .iter() + .map(|x| *x as u64) + .sum::(); } - self.pos = pos; - if pos < self.boundaries.len() && self.boundaries[pos].as_slice() <= req.current_user_key { - if req.current_output_file_size >= self.min_output_file_size { + self.next_level_pos = right_next_level_pos; + + if self.pos < self.boundaries.len() + && self.boundaries[self.pos].as_slice() <= req.current_user_key + { + if req.current_output_file_size >= self.min_output_file_size + // Or, the output file may make a huge compaction even greater than the max compaction size. + || self.current_next_level_size >= self.max_compaction_size + { COMPACTION_GUARD_ACTION_COUNTER .get(self.cf_name) .partition .inc(); + // The current pointer status should be like (let * be the current pos, ^ be + // where the previous user key is): + // boundaries: A B C D + // size: 1 3 2 + // ^ * + // You will notice that the previous user key is between B and C, which indices + // that there must still be something between previous user key and C. + // We still set `current_next_level_size` to zero here, so the segment will be + // forgotten. I think that will be acceptable given generally a segment won't be + // greater than the `max-sst-size`, which is tiny comparing to the + // `max-compaction-size` usually. + self.current_next_level_size = 0; SstPartitionerResult::Required } else { COMPACTION_GUARD_ACTION_COUNTER @@ -193,10 +247,28 @@ impl SstPartitioner for CompactionGuardGenerator

{ } } +fn seek_to(all_data: &Vec>, target_key: &[u8], from_pos: usize) -> usize { + let mut pos = from_pos; + let mut skip_count = 0; + while pos < all_data.len() && all_data[pos].as_slice() <= target_key { + pos += 1; + skip_count += 1; + if skip_count >= COMPACTION_GUARD_MAX_POS_SKIP { + pos = match all_data.binary_search_by(|probe| probe.as_slice().cmp(target_key)) { + Ok(search_pos) => search_pos + 1, + Err(search_pos) => search_pos, + }; + break; + } + } + pos +} + #[cfg(test)] mod tests { - use std::str; + use std::{path::Path, str}; + use collections::HashMap; use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, @@ -212,6 +284,13 @@ mod tests { use super::*; use crate::coprocessor::region_info_accessor::MockRegionInfoProvider; + impl CompactionGuardGenerator { + fn reset_next_level_size_state(&mut self) { + self.current_next_level_size = 0; + self.next_level_pos = 0; + } + } + #[test] fn test_compaction_guard_non_data() { let mut guard = CompactionGuardGenerator { @@ -224,6 +303,11 @@ mod tests { use_guard: false, boundaries: vec![], pos: 0, + current_next_level_size: 0, + next_level_pos: 0, + next_level_boundaries: vec![], + next_level_size: vec![], + max_compaction_size: 1 << 30, }; guard.smallest_key = keys::LOCAL_MIN_KEY.to_vec(); @@ -267,8 +351,16 @@ mod tests { provider: MockRegionInfoProvider::new(vec![]), initialized: true, use_guard: true, - boundaries: vec![b"bbb".to_vec(), b"ccc".to_vec()], + boundaries: vec![b"bbb".to_vec(), b"ccc".to_vec(), b"ddd".to_vec()], pos: 0, + current_next_level_size: 0, + next_level_pos: 0, + next_level_boundaries: (0..10) + .map(|x| format!("bbb{:02}", x).into_bytes()) + .chain((0..100).map(|x| format!("cccz{:03}", x).into_bytes())) + .collect(), + next_level_size: [&[1 << 18; 99][..], &[1 << 28; 10][..]].concat(), + max_compaction_size: 1 << 30, // 1GB }; // Crossing region boundary. let mut req = SstPartitionerRequest { @@ -277,7 +369,11 @@ mod tests { current_output_file_size: 32 << 20, }; assert_eq!(guard.should_partition(&req), SstPartitionerResult::Required); + assert_eq!(guard.next_level_pos, 10); assert_eq!(guard.pos, 0); + assert_eq!(guard.current_next_level_size, 0); + guard.reset_next_level_size_state(); + // Output file size too small. req = SstPartitionerRequest { prev_user_key: b"bba", @@ -289,6 +385,10 @@ mod tests { SstPartitionerResult::NotRequired ); assert_eq!(guard.pos, 0); + assert_eq!(guard.next_level_pos, 10); + assert_eq!(guard.current_next_level_size, 9 << 18); + guard.reset_next_level_size_state(); + // Not crossing boundary. req = SstPartitionerRequest { prev_user_key: b"aaa", @@ -300,6 +400,9 @@ mod tests { SstPartitionerResult::NotRequired ); assert_eq!(guard.pos, 0); + assert_eq!(guard.next_level_pos, 0); + guard.reset_next_level_size_state(); + // Move position req = SstPartitionerRequest { prev_user_key: b"cca", @@ -308,6 +411,30 @@ mod tests { }; assert_eq!(guard.should_partition(&req), SstPartitionerResult::Required); assert_eq!(guard.pos, 1); + assert_eq!(guard.next_level_pos, 110); + guard.reset_next_level_size_state(); + + // Move next level posistion + req = SstPartitionerRequest { + prev_user_key: b"cccz000", + current_user_key: b"cccz042", + current_output_file_size: 1 << 20, + }; + assert_eq!( + guard.should_partition(&req), + SstPartitionerResult::NotRequired + ); + assert_eq!(guard.pos, 2); + assert_eq!(guard.next_level_pos, 53); + + req = SstPartitionerRequest { + prev_user_key: b"cccz090", + current_user_key: b"dde", + current_output_file_size: 1 << 20, + }; + assert_eq!(guard.should_partition(&req), SstPartitionerResult::Required); + assert_eq!(guard.pos, 2); + assert_eq!(guard.next_level_pos, 110); } #[test] @@ -339,6 +466,11 @@ mod tests { b"aaa15".to_vec(), ], pos: 0, + current_next_level_size: 0, + next_level_pos: 0, + next_level_boundaries: vec![], + next_level_size: vec![], + max_compaction_size: 1 << 30, }; // Binary search meet exact match. guard.pos = 0; @@ -365,15 +497,23 @@ mod tests { const MIN_OUTPUT_FILE_SIZE: u64 = 1024; const MAX_OUTPUT_FILE_SIZE: u64 = 4096; + const MAX_COMPACTION_SIZE: u64 = 10240; fn new_test_db(provider: MockRegionInfoProvider) -> (RocksEngine, TempDir) { let temp_dir = TempDir::new().unwrap(); let mut cf_opts = RocksCfOptions::default(); + cf_opts.set_max_bytes_for_level_base(MAX_OUTPUT_FILE_SIZE); + cf_opts.set_max_bytes_for_level_multiplier(5); cf_opts.set_target_file_size_base(MAX_OUTPUT_FILE_SIZE); cf_opts.set_sst_partitioner_factory(RocksSstPartitionerFactory( - CompactionGuardGeneratorFactory::new(CF_DEFAULT, provider, MIN_OUTPUT_FILE_SIZE) - .unwrap(), + CompactionGuardGeneratorFactory::new( + CF_DEFAULT, + provider, + MIN_OUTPUT_FILE_SIZE, + MAX_COMPACTION_SIZE, + ) + .unwrap(), )); cf_opts.set_disable_auto_compactions(true); cf_opts.compression_per_level(&[ @@ -412,6 +552,16 @@ mod tests { ret } + fn get_sst_files(dir: &Path) -> Vec { + let files = dir.read_dir().unwrap(); + let mut sst_files = files + .map(|entry| entry.unwrap().path().to_str().unwrap().to_owned()) + .filter(|entry| entry.ends_with(".sst")) + .collect::>(); + sst_files.sort(); + sst_files + } + #[test] fn test_compaction_guard_with_rocks() { let provider = MockRegionInfoProvider::new(vec![ @@ -463,11 +613,7 @@ mod tests { ) .unwrap(); - let files = dir.path().read_dir().unwrap(); - let mut sst_files = files - .map(|entry| entry.unwrap().path().to_str().unwrap().to_owned()) - .filter(|entry| entry.ends_with(".sst")) - .collect::>(); + let mut sst_files = get_sst_files(dir.path()); sst_files.sort(); assert_eq!(3, sst_files.len()); assert_eq!(collect_keys(&sst_files[0]), [b"za1", b"zb1", b"zb2"]); @@ -477,4 +623,120 @@ mod tests { ); assert_eq!(collect_keys(&sst_files[2]), [b"zc6"]); } + + fn simple_regions() -> MockRegionInfoProvider { + MockRegionInfoProvider::new(vec![ + Region { + id: 1, + start_key: b"a".to_vec(), + end_key: b"b".to_vec(), + ..Default::default() + }, + Region { + id: 2, + start_key: b"b".to_vec(), + end_key: b"c".to_vec(), + ..Default::default() + }, + Region { + id: 3, + start_key: b"c".to_vec(), + end_key: b"d".to_vec(), + ..Default::default() + }, + ]) + } + + #[test] + fn test_next_level_compaction() { + let provider = simple_regions(); + let (db, _dir) = new_test_db(provider); + assert_eq!(b"z", DATA_PREFIX_KEY); + let tiny_value = [b'v'; 1]; + let value = vec![b'v'; 1024 * 10]; + ['a', 'b', 'c'] + .into_iter() + .flat_map(|x| (1..10).map(move |n| format!("z{x}{n}").into_bytes())) + .for_each(|key| db.put(&key, &value).unwrap()); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(2)).unwrap(); + db.put(b"za0", &tiny_value).unwrap(); + db.put(b"zd0", &tiny_value).unwrap(); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(1)).unwrap(); + + let level_1 = &level_files(&db)[&1]; + assert_eq!(level_1.len(), 2, "{:?}", level_1); + assert_eq!(level_1[0].smallestkey, b"za0", "{:?}", level_1); + assert_eq!(level_1[0].largestkey, b"za0", "{:?}", level_1); + assert_eq!(level_1[1].smallestkey, b"zd0", "{:?}", level_1); + assert_eq!(level_1[1].largestkey, b"zd0", "{:?}", level_1); + } + + #[test] + fn test_next_level_compaction_no_split() { + let provider = simple_regions(); + let (db, _dir) = new_test_db(provider); + assert_eq!(b"z", DATA_PREFIX_KEY); + let tiny_value = [b'v'; 1]; + let value = vec![b'v'; 1024 * 10]; + ['a', 'b', 'c'] + .into_iter() + .flat_map(|x| (1..10).map(move |n| format!("z{x}{n}").into_bytes())) + .for_each(|key| db.put(&key, &value).unwrap()); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(2)).unwrap(); + // So... the next-level size will be almost 1024 * 9, which doesn't exceeds the + // compaction size limit. + db.put(b"za0", &tiny_value).unwrap(); + db.put(b"za9", &tiny_value).unwrap(); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(1)).unwrap(); + + let level_1 = &level_files(&db)[&1]; + assert_eq!(level_1.len(), 1, "{:?}", level_1); + assert_eq!(level_1[0].smallestkey, b"za0", "{:?}", level_1); + assert_eq!(level_1[0].largestkey, b"za9", "{:?}", level_1); + db.compact_range(None, None, false, 1).unwrap(); + + // So... the next-level size will be almost 1024 * 15, which should reach the + // limit. + db.put(b"za30", &tiny_value).unwrap(); + db.put(b"zb90", &tiny_value).unwrap(); + db.flush_cfs(&[], true).unwrap(); + db.compact_files_in_range(None, None, Some(1)).unwrap(); + + let level_1 = &level_files(&db)[&1]; + assert_eq!(level_1.len(), 2, "{:?}", level_1); + assert_eq!(level_1[0].smallestkey, b"za30", "{:?}", level_1); + assert_eq!(level_1[1].largestkey, b"zb90", "{:?}", level_1); + } + + #[derive(Debug)] + #[allow(dead_code)] + struct OwnedSstFileMetadata { + name: String, + size: usize, + smallestkey: Vec, + largestkey: Vec, + } + + #[allow(unused)] + fn level_files(db: &RocksEngine) -> HashMap> { + let db = db.as_inner(); + let cf = db.cf_handle("default").unwrap(); + let md = db.get_column_family_meta_data(cf); + let mut res: HashMap> = HashMap::default(); + for (i, level) in md.get_levels().into_iter().enumerate() { + for file in level.get_files() { + res.entry(i).or_default().push(OwnedSstFileMetadata { + name: file.get_name(), + size: file.get_size(), + smallestkey: file.get_smallestkey().to_owned(), + largestkey: file.get_largestkey().to_owned(), + }); + } + } + res + } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8318556483e..d18d6f8cda0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -645,6 +645,7 @@ macro_rules! build_cf_opt { $cf_name, provider.clone(), $opt.compaction_guard_min_output_file_size.0, + $opt.max_compaction_bytes.0, ) .unwrap(); cf_opts.set_sst_partitioner_factory(factory); From 56091d5998745f7c741d1c6fa8aa1ba281e990ed Mon Sep 17 00:00:00 2001 From: lijie Date: Thu, 28 Sep 2023 11:48:41 +0800 Subject: [PATCH 0945/1149] chore: bump version to 7.5.0-alpha (#15708) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3842f92752..a10755f5a7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6526,7 +6526,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.4.0-alpha" +version = "7.5.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 81be4d36906..4d8cefa9fa4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.4.0-alpha" +version = "7.5.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From a7db07d72dcbf2c938ebd0b4661270fdc95f9a43 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 28 Sep 2023 17:40:50 +0800 Subject: [PATCH 0946/1149] raftstore-v2: gc removed_records and merged_records on tombstone store (#15677) close tikv/tikv#15669 Let leader directly GC removed_records and merged_records on tombstone store, instead of sending GcPeerRequests to such store. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 3 ++ components/raftstore-v2/src/operation/life.rs | 31 ++++++++++++ .../raftstore-v2/src/operation/ready/mod.rs | 7 +++ components/raftstore-v2/src/router/message.rs | 5 ++ components/test_pd_client/src/pd.rs | 4 +- components/test_raftstore-v2/src/server.rs | 5 ++ components/test_raftstore/src/server.rs | 4 +- components/tikv_kv/src/raft_extension.rs | 3 ++ src/server/lock_manager/deadlock.rs | 13 +---- src/server/lock_manager/mod.rs | 4 +- src/server/metrics.rs | 1 + src/server/raft_client.rs | 16 +++--- src/server/raftkv2/raft_extension.rs | 5 ++ src/server/resolve.rs | 50 +++++++++++++++++-- src/server/server.rs | 6 +-- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_life.rs | 36 +++++++++++++ .../config/dynamic/pessimistic_txn.rs | 16 +----- tests/integrations/server/raft_client.rs | 44 ++++++++-------- 19 files changed, 188 insertions(+), 66 deletions(-) create mode 100644 tests/failpoints/cases/test_life.rs diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 872b2c4e7e6..54729787271 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -315,6 +315,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, PeerMsg::StoreUnreachable { to_store_id } => { self.fsm.peer_mut().on_store_unreachable(to_store_id) } + PeerMsg::StoreMaybeTombstone { store_id } => { + self.fsm.peer_mut().on_store_maybe_tombstone(store_id) + } PeerMsg::SnapshotSent { to_peer_id, status } => { self.fsm.peer_mut().on_snapshot_sent(to_peer_id, status) } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 8591d5daf23..84bded8a9bb 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -713,6 +713,37 @@ impl Peer { ctx.confirmed_ids.push(gc_peer_id); } + // Clean up removed and merged records for peers on tombstone stores, + // otherwise it may keep sending gc peer request to the tombstone store. + pub fn on_store_maybe_tombstone_gc_peer(&mut self, store_id: u64) { + let mut peers_on_tombstone = vec![]; + let state = self.storage().region_state(); + for peer in state.get_removed_records() { + if peer.get_store_id() == store_id { + peers_on_tombstone.push(peer.clone()); + } + } + for record in state.get_merged_records() { + for peer in record.get_source_peers() { + if peer.get_store_id() == store_id { + peers_on_tombstone.push(peer.clone()); + } + } + } + if peers_on_tombstone.is_empty() { + return; + } + info!(self.logger, "gc peer on tombstone store"; + "tombstone_store_id" => store_id, + "peers" => ?peers_on_tombstone); + let ctx = self.gc_peer_context_mut(); + for peer in peers_on_tombstone { + if !ctx.confirmed_ids.contains(&peer.get_id()) { + ctx.confirmed_ids.push(peer.get_id()); + } + } + } + // Removes deleted peers from region state by proposing a `UpdateGcPeer` // command. pub fn on_gc_peer_tick(&mut self, ctx: &mut StoreContext) { diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 17845b5d0b8..1ff07f2ccc1 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -247,6 +247,13 @@ impl Peer { } } + pub fn on_store_maybe_tombstone(&mut self, store_id: u64) { + if !self.is_leader() { + return; + } + self.on_store_maybe_tombstone_gc_peer(store_id); + } + pub fn on_raft_message( &mut self, ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 2d364af44e1..16d43970e7a 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -197,6 +197,11 @@ pub enum PeerMsg { StoreUnreachable { to_store_id: u64, }, + // A store may be tombstone. Use it with caution, it also means store not + // found, PD can not distinguish them now, as PD may delete tombstone stores. + StoreMaybeTombstone { + store_id: u64, + }, /// Reports whether the snapshot sending is successful or not. SnapshotSent { to_peer_id: u64, diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index c81230f6a16..a9141bf6299 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -547,7 +547,9 @@ impl PdCluster { fn get_store(&self, store_id: u64) -> Result { match self.stores.get(&store_id) { Some(s) if s.store.get_id() != 0 => Ok(s.store.clone()), - _ => Err(box_err!("store {} not found", store_id)), + // Matches PD error message. + // See https://github.com/tikv/pd/blob/v7.3.0/server/grpc_service.go#L777-L780 + _ => Err(box_err!("invalid store ID {}, not found", store_id)), } } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 7b5d501a59f..299e93eb746 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -222,6 +222,11 @@ impl RaftExtension for TestExtension { self.extension.report_store_unreachable(store_id) } + #[inline] + fn report_store_maybe_tombstone(&self, store_id: u64) { + self.extension.report_store_maybe_tombstone(store_id) + } + #[inline] fn report_snapshot_status( &self, diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 8d26bae968d..0002f36d647 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -113,8 +113,8 @@ impl StoreAddrResolver for AddressMap { fn resolve( &self, store_id: u64, - cb: Box) + Send>, - ) -> ServerResult<()> { + cb: Box) + Send>, + ) -> resolve::Result<()> { let addr = self.get(store_id); match addr { Some(addr) => cb(Ok(addr)), diff --git a/components/tikv_kv/src/raft_extension.rs b/components/tikv_kv/src/raft_extension.rs index 26c9e687ef6..7ab4c1c030d 100644 --- a/components/tikv_kv/src/raft_extension.rs +++ b/components/tikv_kv/src/raft_extension.rs @@ -32,6 +32,9 @@ pub trait RaftExtension: Clone + Send { /// Report the target store is unreachable. fn report_store_unreachable(&self, _store_id: u64) {} + /// Report the target store may be tombstone. + fn report_store_maybe_tombstone(&self, _store_id: u64) {} + /// Report the status of snapshot. fn report_snapshot_status(&self, _region_id: u64, _to_peer_id: u64, _status: SnapshotStatus) {} diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index 9583df80dd6..fd749cc3175 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -1119,7 +1119,7 @@ pub mod tests { use tikv_util::worker::FutureWorker; use super::*; - use crate::server::resolve::Callback; + use crate::server::resolve; #[test] fn test_detect_table() { @@ -1467,15 +1467,6 @@ pub mod tests { impl PdClient for MockPdClient {} - #[derive(Clone)] - pub(crate) struct MockResolver; - - impl StoreAddrResolver for MockResolver { - fn resolve(&self, _store_id: u64, _cb: Callback) -> Result<()> { - Err(Error::Other(box_err!("unimplemented"))) - } - } - fn start_deadlock_detector( host: &mut CoprocessorHost, ) -> (FutureWorker, Scheduler) { @@ -1485,7 +1476,7 @@ pub mod tests { let detector_runner = Detector::new( 1, Arc::new(MockPdClient {}), - MockResolver {}, + resolve::MockStoreAddrResolver::default(), Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()), waiter_mgr_scheduler, &Config::default(), diff --git a/src/server/lock_manager/mod.rs b/src/server/lock_manager/mod.rs index 243d533a0e5..c42531ae0fd 100644 --- a/src/server/lock_manager/mod.rs +++ b/src/server/lock_manager/mod.rs @@ -318,7 +318,7 @@ mod tests { use self::{deadlock::tests::*, metrics::*, waiter_manager::tests::*}; use super::*; - use crate::storage::lock_manager::LockDigest; + use crate::{server::resolve::MockStoreAddrResolver, storage::lock_manager::LockDigest}; fn start_lock_manager() -> LockManager { let mut coprocessor_host = CoprocessorHost::::default(); @@ -336,7 +336,7 @@ mod tests { .start( 1, Arc::new(MockPdClient {}), - MockResolver {}, + MockStoreAddrResolver::default(), Arc::new(SecurityManager::new(&SecurityConfig::default()).unwrap()), &cfg, ) diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 2745be59a71..122748cdfa9 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -86,6 +86,7 @@ make_auto_flush_static_metric! { failed, success, tombstone, + not_found, } pub label_enum ReplicaReadLockCheckResult { diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index f30e5b36045..b120011c490 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -46,8 +46,11 @@ use tikv_util::{ use yatp::{task::future::TaskCell, ThreadPool}; use crate::server::{ - self, load_statistics::ThreadLoadPool, metrics::*, snap::Task as SnapTask, Config, - StoreAddrResolver, + load_statistics::ThreadLoadPool, + metrics::*, + resolve::{Error as ResolveError, Result as ResolveResult}, + snap::Task as SnapTask, + Config, StoreAddrResolver, }; pub struct MetadataSourceStoreId {} @@ -642,7 +645,7 @@ where S: StoreAddrResolver, R: RaftExtension + Unpin + 'static, { - fn resolve(&self) -> impl Future> { + fn resolve(&self) -> impl Future> { let (tx, rx) = oneshot::channel(); let store_id = self.store_id; let res = self.builder.resolver.resolve( @@ -673,7 +676,7 @@ where res?; match rx.await { Ok(a) => a, - Err(_) => Err(server::Error::Other( + Err(_) => Err(ResolveError::Other( "failed to receive resolve result".into(), )), } @@ -824,8 +827,7 @@ async fn start( RESOLVE_STORE_COUNTER.with_label_values(&["failed"]).inc(); back_end.clear_pending_message("resolve"); error_unknown!(?e; "resolve store address failed"; "store_id" => back_end.store_id,); - // TOMBSTONE - if format!("{}", e).contains("has been removed") { + if let ResolveError::StoreTombstone(_) = e { let mut pool = pool.lock().unwrap(); if let Some(s) = pool.connections.remove(&(back_end.store_id, conn_id)) { s.set_conn_state(ConnState::Disconnected); @@ -940,7 +942,7 @@ struct CachedQueue { /// ```text /// for m in msgs { /// if !raft_client.send(m) { -/// // handle error. +/// // handle error. /// } /// } /// raft_client.flush(); diff --git a/src/server/raftkv2/raft_extension.rs b/src/server/raftkv2/raft_extension.rs index f6bb66e9e11..8b15c73fb65 100644 --- a/src/server/raftkv2/raft_extension.rs +++ b/src/server/raftkv2/raft_extension.rs @@ -49,6 +49,11 @@ impl tikv_kv::RaftExtension for Extension .send_control(StoreMsg::StoreUnreachable { to_store_id }); } + fn report_store_maybe_tombstone(&self, store_id: u64) { + self.router + .broadcast_normal(|| PeerMsg::StoreMaybeTombstone { store_id }); + } + fn report_snapshot_status( &self, region_id: u64, diff --git a/src/server/resolve.rs b/src/server/resolve.rs index c831ff28d17..013511183e2 100644 --- a/src/server/resolve.rs +++ b/src/server/resolve.rs @@ -1,6 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + error::Error as StdError, fmt::{self, Display, Formatter}, sync::{Arc, Mutex}, }; @@ -9,16 +10,28 @@ use collections::HashMap; use kvproto::replication_modepb::ReplicationMode; use pd_client::{take_peer_address, PdClient}; use raftstore::store::GlobalReplicationState; +use thiserror::Error; use tikv_kv::RaftExtension; use tikv_util::{ + info, time::Instant, worker::{Runnable, Scheduler, Worker}, }; -use super::{metrics::*, Result}; +use super::metrics::*; const STORE_ADDRESS_REFRESH_SECONDS: u64 = 60; +#[derive(Debug, Error)] +pub enum Error { + #[error("{0:?}")] + Other(#[from] Box), + #[error("store {0} has been removed")] + StoreTombstone(u64), +} + +pub type Result = std::result::Result; + pub type Callback = Box) + Send>; pub fn store_address_refresh_interval_secs() -> u64 { @@ -95,9 +108,21 @@ where // it explicitly. Err(pd_client::Error::StoreTombstone(_)) => { RESOLVE_STORE_COUNTER_STATIC.tombstone.inc(); - return Err(box_err!("store {} has been removed", store_id)); + self.router.report_store_maybe_tombstone(store_id); + return Err(Error::StoreTombstone(store_id)); + } + Err(e) => { + // Tombstone store may be removed manually or automatically + // after 30 days of deletion. PD returns + // "invalid store ID %d, not found" for such store id. + // See https://github.com/tikv/pd/blob/v7.3.0/server/grpc_service.go#L777-L780 + if format!("{:?}", e).contains("not found") { + RESOLVE_STORE_COUNTER_STATIC.not_found.inc(); + info!("resolve store not found"; "store_id" => store_id); + self.router.report_store_maybe_tombstone(store_id); + } + return Err(box_err!(e)); } - Err(e) => return Err(box_err!(e)), }; let mut group_id = None; let mut state = self.state.lock().unwrap(); @@ -181,6 +206,25 @@ impl StoreAddrResolver for PdStoreAddrResolver { } } +#[derive(Clone)] +pub struct MockStoreAddrResolver { + pub resolve_fn: Arc Result<()> + Send + Sync>, +} + +impl StoreAddrResolver for MockStoreAddrResolver { + fn resolve(&self, store_id: u64, cb: Callback) -> Result<()> { + (self.resolve_fn)(store_id, cb) + } +} + +impl Default for MockStoreAddrResolver { + fn default() -> MockStoreAddrResolver { + MockStoreAddrResolver { + resolve_fn: Arc::new(|_, _| unimplemented!()), + } + } +} + #[cfg(test)] mod tests { use std::{net::SocketAddr, ops::Sub, str::FromStr, sync::Arc, thread, time::Duration}; diff --git a/src/server/server.rs b/src/server/server.rs index 948930ae7ae..a886f1232f4 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -533,8 +533,8 @@ mod tests { use super::{ super::{ - resolve::{Callback as ResolveCallback, StoreAddrResolver}, - Config, Result, + resolve::{self, Callback as ResolveCallback, StoreAddrResolver}, + Config, }, *, }; @@ -552,7 +552,7 @@ mod tests { } impl StoreAddrResolver for MockResolver { - fn resolve(&self, _: u64, cb: ResolveCallback) -> Result<()> { + fn resolve(&self, _: u64, cb: ResolveCallback) -> resolve::Result<()> { if self.quick_fail.load(Ordering::SeqCst) { return Err(box_err!("quick fail")); } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index a9dbd36a81a..ed2b8d79f9c 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -17,6 +17,7 @@ mod test_gc_worker; mod test_hibernate; mod test_import_service; mod test_kv_service; +mod test_life; mod test_local_read; mod test_memory_usage_limit; mod test_merge; diff --git a/tests/failpoints/cases/test_life.rs b/tests/failpoints/cases/test_life.rs new file mode 100644 index 00000000000..2bc833075c6 --- /dev/null +++ b/tests/failpoints/cases/test_life.rs @@ -0,0 +1,36 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Arc; + +use test_raftstore::*; +use test_raftstore_macro::test_case; +use tikv_util::config::ReadableDuration; + +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_gc_peer_on_tombstone_store() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + cluster.run(); + cluster.must_put(b"k1", b"v1"); + + let region = cluster.get_region(b"k1"); + + let peer_on_store1 = find_peer(®ion, 1).unwrap().clone(); + let peer_on_store3 = find_peer(®ion, 3).unwrap().clone(); + cluster.must_transfer_leader(region.get_id(), peer_on_store1); + cluster.add_send_filter(IsolationFilterFactory::new(3)); + pd_client.must_remove_peer(region.get_id(), peer_on_store3); + + // Immediately invalidate store address cache. + fail::cfg("mock_store_refresh_interval_secs", "return(0)").unwrap(); + + // Shutdown store 3 and wait for gc peer ticks. + cluster.stop_node(3); + cluster.clear_send_filters(); + sleep_ms(3 * cluster.cfg.raft_store.gc_peer_check_interval.as_millis()); + + cluster.must_empty_region_removed_records(region.get_id()); +} diff --git a/tests/integrations/config/dynamic/pessimistic_txn.rs b/tests/integrations/config/dynamic/pessimistic_txn.rs index 7af5455a199..dc88bbd93a3 100644 --- a/tests/integrations/config/dynamic/pessimistic_txn.rs +++ b/tests/integrations/config/dynamic/pessimistic_txn.rs @@ -9,11 +9,7 @@ use security::SecurityManager; use test_pd_client::TestPdClient; use tikv::{ config::*, - server::{ - lock_manager::*, - resolve::{Callback, StoreAddrResolver}, - Error, Result, - }, + server::{lock_manager::*, resolve}, }; use tikv_util::config::ReadableDuration; @@ -27,14 +23,6 @@ fn test_config_validate() { invalid_cfg.validate().unwrap_err(); } -#[derive(Clone)] -struct MockResolver; -impl StoreAddrResolver for MockResolver { - fn resolve(&self, _store_id: u64, _cb: Callback) -> Result<()> { - Err(Error::Other(box_err!("unimplemented"))) - } -} - fn setup( cfg: TikvConfig, ) -> ( @@ -50,7 +38,7 @@ fn setup( .start( 1, pd_client, - MockResolver, + resolve::MockStoreAddrResolver::default(), security_mgr, &cfg.pessimistic_txn, ) diff --git a/tests/integrations/server/raft_client.rs b/tests/integrations/server/raft_client.rs index aad9ab7ceb1..2b51bb1f21b 100644 --- a/tests/integrations/server/raft_client.rs +++ b/tests/integrations/server/raft_client.rs @@ -21,8 +21,8 @@ use kvproto::{ use raft::eraftpb::Entry; use raftstore::errors::DiscardReason; use tikv::server::{ - self, load_statistics::ThreadLoadPool, raftkv::RaftRouterWrap, resolve, resolve::Callback, - Config, ConnectionBuilder, RaftClient, StoreAddrResolver, TestRaftStoreRouter, + load_statistics::ThreadLoadPool, raftkv::RaftRouterWrap, resolve, Config, ConnectionBuilder, + RaftClient, StoreAddrResolver, TestRaftStoreRouter, }; use tikv_kv::{FakeExtension, RaftExtension}; use tikv_util::{ @@ -32,24 +32,6 @@ use tikv_util::{ use super::*; -#[derive(Clone)] -pub struct StaticResolver { - port: u16, -} - -impl StaticResolver { - fn new(port: u16) -> StaticResolver { - StaticResolver { port } - } -} - -impl StoreAddrResolver for StaticResolver { - fn resolve(&self, _store_id: u64, cb: Callback) -> server::Result<()> { - cb(Ok(format!("localhost:{}", self.port))); - Ok(()) - } -} - fn get_raft_client(router: R, resolver: T) -> RaftClient where R: RaftExtension + Unpin + 'static, @@ -75,8 +57,16 @@ where RaftClient::new(0, builder) } -fn get_raft_client_by_port(port: u16) -> RaftClient { - get_raft_client(FakeExtension, StaticResolver::new(port)) +fn get_raft_client_by_port(port: u16) -> RaftClient { + get_raft_client( + FakeExtension, + resolve::MockStoreAddrResolver { + resolve_fn: Arc::new(move |_, cb| { + cb(Ok(format!("localhost:{}", port))); + Ok(()) + }), + }, + ) } #[derive(Clone)] @@ -177,7 +167,15 @@ fn test_raft_client_reconnect() { let (significant_msg_sender, _significant_msg_receiver) = mpsc::channel(); let router = TestRaftStoreRouter::new(tx, significant_msg_sender); let wrap = RaftRouterWrap::new(router); - let mut raft_client = get_raft_client(wrap, StaticResolver::new(port)); + let mut raft_client = get_raft_client( + wrap, + resolve::MockStoreAddrResolver { + resolve_fn: Arc::new(move |_, cb| { + cb(Ok(format!("localhost:{}", port))); + Ok(()) + }), + }, + ); (0..50).for_each(|_| raft_client.send(RaftMessage::default()).unwrap()); raft_client.flush(); From fda1b5caf19f3ee87ab26c4458c64a6b3f3ea5ca Mon Sep 17 00:00:00 2001 From: Juan Grande Date: Thu, 28 Sep 2023 02:55:50 -0700 Subject: [PATCH 0947/1149] logger: added thread_id to logs (#15638) close tikv/tikv#13395 Added thread_id to logs Signed-off-by: Juan Grande Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/logger/mod.rs | 126 +++++++++++++++++++------ 1 file changed, 96 insertions(+), 30 deletions(-) diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 5ebe9468a50..c321f56a1b5 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -6,6 +6,7 @@ mod formatter; use std::{ env, fmt, io::{self, BufWriter}, + num::NonZeroU64, path::{Path, PathBuf}, sync::{ atomic::{AtomicUsize, Ordering}, @@ -15,7 +16,10 @@ use std::{ }; use log::{self, SetLoggerError}; -use slog::{self, slog_o, Drain, FnValue, Key, OwnedKVList, PushFnValue, Record, KV}; +use slog::{ + self, slog_o, Drain, FnValue, Key, OwnedKV, OwnedKVList, PushFnValue, Record, + SendSyncRefUnwindSafeKV, KV, +}; pub use slog::{FilterFn, Level}; use slog_async::{Async, AsyncGuard, OverflowStrategy}; use slog_term::{Decorator, PlainDecorator, RecordDecorator}; @@ -85,7 +89,7 @@ where }; let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); - (slog::Logger::root(filtered, slog_o!()), Some(guard)) + (slog::Logger::root(filtered, get_values()), Some(guard)) } else { let drain = LogAndFuse(Mutex::new(drain)); let drain = SlowLogFilter { @@ -93,7 +97,7 @@ where inner: drain, }; let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); - (slog::Logger::root(filtered, slog_o!()), None) + (slog::Logger::root(filtered, get_values()), None) }; set_global_logger(level, init_stdlog, logger, guard) @@ -628,6 +632,18 @@ fn write_log_fields( Ok(()) } +fn format_thread_id(thread_id: NonZeroU64) -> String { + format!("{:#0x}", thread_id) +} + +fn get_values() -> OwnedKV { + slog_o!( + "thread_id" => FnValue(|_| { + format_thread_id(std::thread::current().id().as_u64()) + }) + ) +} + struct Serializer<'a> { decorator: &'a mut dyn RecordDecorator, } @@ -679,7 +695,7 @@ impl<'a> slog::Serializer for Serializer<'a> { #[cfg(test)] mod tests { - use std::{cell::RefCell, io, io::Write, str::from_utf8}; + use std::{cell::RefCell, io, io::Write, str::from_utf8, sync::RwLock, time::Duration}; use chrono::DateTime; use regex::Regex; @@ -705,8 +721,6 @@ mod tests { } fn log_format_cases(logger: slog::Logger) { - use std::time::Duration; - // Empty message is not recommend, just for test purpose here. slog_info!(logger, ""); slog_info!(logger, "Welcome"); @@ -763,21 +777,25 @@ mod tests { fn test_log_format_text() { let decorator = PlainSyncDecorator::new(TestWriter); let drain = TikvFormat::new(decorator, true).fuse(); - let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); + let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); log_format_cases(logger); - let expect = r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:470] ["Welcome TiKV"] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:471] [欢迎] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:472] ["欢迎 TiKV"] -[2019/01/15 13:40:39.615 +08:00] [INFO] [mod.rs:455] ["failed to fetch URL"] [backoff=3s] [attempt=3] [url=http://example.com] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:460] ["failed to \"fetch\" [URL]: http://example.com"] -[2019/01/15 13:40:39.619 +08:00] [DEBUG] [mod.rs:463] ["Slow query"] ["process keys"=1500] [duration=123ns] [sql="SELECT * FROM TABLE WHERE ID=\"abc\""] -[2019/01/15 13:40:39.619 +08:00] [WARN] [mod.rs:473] [Type] [Other=-inf] [Score=inf] [Counter=NaN] -[2019/01/16 16:56:04.854 +08:00] [INFO] [mod.rs:391] ["more type tests"] [str_array="[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]"] [u8=34] [is_None=None] [is_false=false] [is_true=true] ["store ids"="[1, 2, 3]"] [url-peers="[\"peer1\", \"peer 2\"]"] [urls="[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]"] [field2="in quote"] [field1=no_quote] -"#; + let thread_id = format_thread_id(std::thread::current().id().as_u64()); + let expect = format!( + r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:470] ["Welcome TiKV"] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:471] [欢迎] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:472] ["欢迎 TiKV"] [thread_id={0}] +[2019/01/15 13:40:39.615 +08:00] [INFO] [mod.rs:455] ["failed to fetch URL"] [backoff=3s] [attempt=3] [url=http://example.com] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:460] ["failed to \"fetch\" [URL]: http://example.com"] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [DEBUG] [mod.rs:463] ["Slow query"] ["process keys"=1500] [duration=123ns] [sql="SELECT * FROM TABLE WHERE ID=\"abc\""] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [WARN] [mod.rs:473] [Type] [Other=-inf] [Score=inf] [Counter=NaN] [thread_id={0}] +[2019/01/16 16:56:04.854 +08:00] [INFO] [mod.rs:391] ["more type tests"] [str_array="[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]"] [u8=34] [is_None=None] [is_false=false] [is_true=true] ["store ids"="[1, 2, 3]"] [url-peers="[\"peer1\", \"peer 2\"]"] [urls="[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]"] [field2="in quote"] [field1=no_quote] [thread_id={0}] +"#, + thread_id + ); BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); @@ -811,21 +829,25 @@ mod tests { fn test_log_format_json() { use serde_json::{from_str, Value}; let drain = Mutex::new(json_format(TestWriter, true)).map(slog::Fuse); - let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); + let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); log_format_cases(logger); - let expect = r#"{"time":"2020/05/16 15:49:52.449 +08:00","level":"INFO","caller":"mod.rs:469","message":""} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:469","message":"Welcome"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:470","message":"Welcome TiKV"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:471","message":"欢迎"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:472","message":"欢迎 TiKV"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:455","message":"failed to fetch URL","backoff":"3s","attempt":3,"url":"http://example.com"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:460","message":"failed to \"fetch\" [URL]: http://example.com"} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"DEBUG","caller":"mod.rs:463","message":"Slow query","process keys":1500,"duration":"123ns","sql":"SELECT * FROM TABLE WHERE ID=\"abc\""} -{"time":"2020/05/16 15:49:52.450 +08:00","level":"WARN","caller":"mod.rs:473","message":"Type","Other":null,"Score":null,"Counter":null} -{"time":"2020/05/16 15:49:52.451 +08:00","level":"INFO","caller":"mod.rs:391","message":"more type tests","str_array":"[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]","u8":34,"is_None":null,"is_false":false,"is_true":true,"store ids":"[1, 2, 3]","url-peers":"[\"peer1\", \"peer 2\"]","urls":"[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]","field2":"in quote","field1":"no_quote"} -"#; + let thread_id = format_thread_id(std::thread::current().id().as_u64()); + let expect = format!( + r#"{{"time":"2020/05/16 15:49:52.449 +08:00","level":"INFO","caller":"mod.rs:469","message":"","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:469","message":"Welcome","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:470","message":"Welcome TiKV","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:471","message":"欢迎","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:472","message":"欢迎 TiKV","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:455","message":"failed to fetch URL","backoff":"3s","attempt":3,"url":"http://example.com","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:460","message":"failed to \"fetch\" [URL]: http://example.com","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"DEBUG","caller":"mod.rs:463","message":"Slow query","process keys":1500,"duration":"123ns","sql":"SELECT * FROM TABLE WHERE ID=\"abc\"","thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"WARN","caller":"mod.rs:473","message":"Type","Other":null,"Score":null,"Counter":null,"thread_id":"{0}"}} +{{"time":"2020/05/16 15:49:52.451 +08:00","level":"INFO","caller":"mod.rs:391","message":"more type tests","str_array":"[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]","u8":34,"is_None":null,"is_false":false,"is_true":true,"store ids":"[1, 2, 3]","url-peers":"[\"peer1\", \"peer 2\"]","urls":"[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]","field2":"in quote","field1":"no_quote","thread_id":"{0}"}} +"#, + thread_id + ); BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); @@ -1074,4 +1096,48 @@ mod tests { } }); } + + static THREAD_SAFE_BUFFER: RwLock> = RwLock::new(Vec::new()); + + struct ThreadSafeWriter; + impl Write for ThreadSafeWriter { + fn write(&mut self, data: &[u8]) -> io::Result { + let mut buffer = THREAD_SAFE_BUFFER.write().unwrap(); + buffer.write(data) + } + + fn flush(&mut self) -> io::Result<()> { + let mut buffer = THREAD_SAFE_BUFFER.write().unwrap(); + buffer.flush() + } + } + + #[test] + fn test_threadid() { + let drain = TikvFormat::new(PlainSyncDecorator::new(ThreadSafeWriter), true).fuse(); + let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); + + slog_info!(logger, "Hello from the first thread"); + let this_threadid = thread::current().id().as_u64(); + let this_threadid = format_thread_id(this_threadid); + + let handle = thread::spawn(move || { + slog_info!(logger, "Hello from the second thread"); + }); + let other_threadid = handle.thread().id().as_u64(); + let other_threadid = format_thread_id(other_threadid); + handle.join().unwrap(); + + let expected = vec![this_threadid, other_threadid]; + + let re = Regex::new(r"\[thread_id=(.*?)\]").unwrap(); + let buffer = THREAD_SAFE_BUFFER.read().unwrap(); + let output = from_utf8(&buffer).unwrap(); + let actual: Vec<&str> = output + .lines() + .map(|line| re.captures(line).unwrap()) + .map(|captures| captures.get(1).unwrap().as_str()) + .collect(); + assert_eq!(expected, actual); + } } From 58253e8b7cea59b414511753b75dd7fc980d99af Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Sun, 8 Oct 2023 13:03:22 +0800 Subject: [PATCH 0948/1149] raftstore: split bucket if the increment flow reach the limit (#15637) close tikv/tikv#15636 there are three reason may cause the bucket not split: 1. split check tick will refresh bucket info even info the bucket version not change 2. the suspect buckets only conside the increment flow 3. all the bucket increment flows are reset if one bucket is updated. To solve this, bucket stats only record the increment flow and reset it after meta size updated. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/pd_client/src/lib.rs | 9 + .../raftstore-v2/src/operation/bucket.rs | 536 ++++++++++++------ components/raftstore-v2/src/worker/pd/mod.rs | 6 +- .../raftstore-v2/src/worker/pd/region.rs | 22 +- components/raftstore/src/store/fsm/peer.rs | 25 +- components/raftstore/src/store/util.rs | 14 + 6 files changed, 391 insertions(+), 221 deletions(-) diff --git a/components/pd_client/src/lib.rs b/components/pd_client/src/lib.rs index 7a9d2cd2a61..21ae61ccd61 100644 --- a/components/pd_client/src/lib.rs +++ b/components/pd_client/src/lib.rs @@ -211,6 +211,15 @@ impl BucketStat { } } + pub fn clean_stats(&mut self, idx: usize) { + self.stats.write_keys[idx] = 0; + self.stats.write_bytes[idx] = 0; + self.stats.read_qps[idx] = 0; + self.stats.write_qps[idx] = 0; + self.stats.read_keys[idx] = 0; + self.stats.read_bytes[idx] = 0; + } + pub fn split(&mut self, idx: usize) { assert!(idx != 0); // inherit the traffic stats for splited bucket diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 432ea72456a..242b9a9b33b 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -11,10 +11,10 @@ use kvproto::{ }; use pd_client::{BucketMeta, BucketStat}; use raftstore::{ - coprocessor::RegionChangeEvent, + coprocessor::{Config, RegionChangeEvent}, store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, }; -use slog::{error, info, warn}; +use slog::{error, info}; use crate::{ batch::StoreContext, @@ -26,15 +26,13 @@ use crate::{ #[derive(Debug, Clone, Default)] pub struct BucketStatsInfo { + // the stats is increment flow. bucket_stat: Option, - // the last buckets records the stats that the recently refreshed. - last_bucket_stat: Option, // the report bucket stat records the increment stats after last report pd. // it will be reset after report pd. report_bucket_stat: Option, - // last bucket count. - // BucketStat.meta is Arc so it cannot be used for last bucket count - last_bucket_count: usize, + // avoid the version roll back, it record the last bucket version if bucket stat isn't none. + last_bucket_version: u64, } impl BucketStatsInfo { @@ -42,55 +40,33 @@ impl BucketStatsInfo { /// diff_size_threshold. pub fn gen_bucket_range_for_update( &self, - diff_size_threshold: u64, + region_bucket_max_size: u64, ) -> Option> { let region_buckets = self.bucket_stat.as_ref()?; let stats = ®ion_buckets.stats; let keys = ®ion_buckets.meta.keys; + let sizes = ®ion_buckets.meta.sizes; - let empty_last_keys = vec![]; - let empty_last_stats = metapb::BucketStats::default(); - let (last_keys, last_stats, stats_reset) = self - .last_bucket_stat - .as_ref() - .map(|b| { - ( - &b.meta.keys, - &b.stats, - region_buckets.create_time != b.create_time, - ) - }) - .unwrap_or((&empty_last_keys, &empty_last_stats, false)); - - let mut bucket_ranges = vec![]; - let mut j = 0; + let mut suspect_bucket_ranges = vec![]; assert_eq!(keys.len(), stats.write_bytes.len() + 1); for i in 0..stats.write_bytes.len() { - let mut diff_in_bytes = stats.write_bytes[i]; - while j < last_keys.len() && keys[i] > last_keys[j] { - j += 1; - } - if j < last_keys.len() && keys[i] == last_keys[j] { - if !stats_reset { - diff_in_bytes -= last_stats.write_bytes[j]; - } - j += 1; - } - if diff_in_bytes >= diff_size_threshold { - bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); + let estimated_bucket_size = stats.write_bytes[i] + sizes[i]; + if estimated_bucket_size >= region_bucket_max_size { + suspect_bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); } } - Some(bucket_ranges) + Some(suspect_bucket_ranges) } #[inline] pub fn version(&self) -> u64 { self.bucket_stat .as_ref() - .or(self.last_bucket_stat.as_ref()) .map(|b| b.meta.version) + .or(Some(self.last_bucket_version)) .unwrap_or_default() } + #[inline] pub fn add_bucket_flow(&mut self, delta: &Option) { if let (Some(buckets), Some(report_buckets), Some(delta)) = ( @@ -105,21 +81,18 @@ impl BucketStatsInfo { #[inline] pub fn set_bucket_stat(&mut self, buckets: Option) { - if let Some(b) = self.bucket_stat.take() { - self.last_bucket_stat = Some(b); - } - self.report_bucket_stat = buckets.clone(); - self.bucket_stat = buckets; - self.last_bucket_count = self - .bucket_stat - .as_ref() - .map_or(0, |bucket_stat| bucket_stat.meta.keys.len() - 1); - } - - #[inline] - pub fn clear_bucket_stat(&mut self) { - if let Some(bucket) = self.report_bucket_stat.as_mut() { - bucket.clear_stats(); + self.bucket_stat = buckets.clone(); + if let Some(new_buckets) = buckets { + self.last_bucket_version = new_buckets.meta.version; + let mut new_report_buckets = BucketStat::from_meta(new_buckets.meta); + if let Some(old) = &mut self.report_bucket_stat { + new_report_buckets.merge(old); + *old = new_report_buckets; + } else { + self.report_bucket_stat = Some(new_report_buckets); + } + } else { + self.report_bucket_stat = None; } } @@ -136,142 +109,163 @@ impl BucketStatsInfo { &self.bucket_stat } - #[inline] - pub fn last_bucket_count(&self) -> usize { - self.last_bucket_count - } -} - -impl Peer { - #[inline] - pub fn on_refresh_region_buckets( + pub fn on_refresh_region_buckets( &mut self, - store_ctx: &mut StoreContext, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, region_epoch: RegionEpoch, - mut buckets: Vec, + region: metapb::Region, bucket_ranges: Option>, - ) { - // bucket version layout - // term logical counter - // |-----------|-----------| - // high bits low bits - // term: given 10s election timeout, the 32 bit means 1362 year running time - let gen_bucket_version = |term, current_version| { - let current_version_term = current_version >> 32; - let bucket_version: u64 = if current_version_term == term { - current_version + 1 - } else { - if term > u32::MAX.into() { - error!( - self.logger, - "unexpected term {} more than u32::MAX. Bucket - version will be backward.", - term - ); - } - term << 32 - }; - bucket_version - }; - - let region = self.region(); - let current_version = self.region_buckets_info().version(); - let next_bucket_version = gen_bucket_version(self.term(), current_version); - let mut is_first_refresh = true; - let mut change_bucket_version = false; - let mut region_buckets: BucketStat; - + ) -> bool { + let change_bucket_version: bool; // The region buckets reset after this region happened split or merge. // The message should be dropped if it's epoch is lower than the regions. // The bucket ranges is none when the region buckets is also none. // So this condition indicates that the region buckets needs to refresh not // renew. - if let (Some(bucket_ranges), Some(peer_region_buckets)) = - (bucket_ranges, self.region_buckets_info().bucket_stat()) - { - is_first_refresh = false; + if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ assert_eq!(buckets.len(), bucket_ranges.len()); - let mut meta_idx = 0; - region_buckets = peer_region_buckets.clone(); - let mut meta = (*region_buckets.meta).clone(); - meta.region_epoch = region_epoch; - for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { - // the bucket ranges maybe need to split or merge not all the meta keys, so it - // needs to find the first keys. - while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { - meta_idx += 1; - } - // meta_idx can't be not the last entry (which is end key) - if meta_idx >= meta.keys.len() - 1 { - warn!( - self.logger, - "can't find the bucket key"; - "bucket_range_key" => log_wrappers::Value::key(&bucket_range.0)); - break; - } - // the bucket size is small and does not have split keys, - // then it should be merged with its left neighbor - let region_bucket_merge_size = store_ctx - .coprocessor_host - .cfg - .region_bucket_merge_size_ratio - * (store_ctx.coprocessor_host.cfg.region_bucket_size.0 as f64); - if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { - meta.sizes[meta_idx] = bucket.size; - // the region has more than one bucket - // and the left neighbor + current bucket size is not very big - if meta.keys.len() > 2 - && meta_idx != 0 - && meta.sizes[meta_idx - 1] + bucket.size - < store_ctx.coprocessor_host.cfg.region_bucket_size.0 * 2 - { - // bucket is too small - region_buckets.left_merge(meta_idx); - meta.left_merge(meta_idx); - change_bucket_version = true; - continue; - } - } else { - // update size - meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; - // insert new bucket keys (split the original bucket) - for bucket_key in bucket.keys { - meta_idx += 1; - region_buckets.split(meta_idx); - meta.split(meta_idx, bucket_key); - change_bucket_version = true; - } - } + change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); + }else{ + change_bucket_version = true; + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); + } + change_bucket_version + } + + fn update_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, + region_epoch: RegionEpoch, + bucket_ranges: &Vec, + ) -> bool { + let origin_region_buckets = self.bucket_stat.as_ref().unwrap(); + let mut change_bucket_version = false; + let mut meta_idx = 0; + let mut region_buckets = origin_region_buckets.clone(); + let mut meta = (*region_buckets.meta).clone(); + meta.region_epoch = region_epoch; + + // bucket stats will clean if the bucket size is updated. + for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { + // the bucket ranges maybe need to split or merge not all the meta keys, so it + // needs to find the first keys. + while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { meta_idx += 1; } - if self.region_buckets_info().last_bucket_count() != region_buckets.meta.keys.len() - 1 - { - change_bucket_version = true; + // meta_idx can't be not the last entry (which is end key) + if meta_idx >= meta.keys.len() - 1 { + break; } - if change_bucket_version { - meta.version = next_bucket_version; + // the bucket size is small and does not have split keys, + // then it should be merged with its left neighbor + let region_bucket_merge_size = + cfg.region_bucket_merge_size_ratio * (cfg.region_bucket_size.0 as f64); + if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { + meta.sizes[meta_idx] = bucket.size; + region_buckets.clean_stats(meta_idx); + // the region has more than one bucket + // and the left neighbor + current bucket size is not very big + if meta.keys.len() > 2 + && meta_idx != 0 + && meta.sizes[meta_idx - 1] + bucket.size < cfg.region_bucket_size.0 * 2 + { + // bucket is too small + region_buckets.left_merge(meta_idx); + meta.left_merge(meta_idx); + change_bucket_version = true; + continue; + } + } else { + // update size + meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; + region_buckets.clean_stats(meta_idx); + // insert new bucket keys (split the original bucket) + for bucket_key in bucket.keys { + meta_idx += 1; + region_buckets.split(meta_idx); + meta.split(meta_idx, bucket_key); + change_bucket_version = true; + } } - region_buckets.meta = Arc::new(meta); - } else { - // when the region buckets is none, the exclusive buckets includes all the - // bucket keys. - assert_eq!(buckets.len(), 1); - change_bucket_version = true; - let bucket_keys = buckets.pop().unwrap().keys; - let bucket_count = bucket_keys.len() + 1; - let mut meta = BucketMeta { - region_id: self.region_id(), - region_epoch, - version: next_bucket_version, - keys: bucket_keys, - sizes: vec![store_ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], - }; - // padding the boundary keys and initialize the flow. - meta.keys.insert(0, region.get_start_key().to_vec()); - meta.keys.push(region.get_end_key().to_vec()); - region_buckets = BucketStat::from_meta(Arc::new(meta)); + meta_idx += 1; + } + if change_bucket_version { + meta.version = next_bucket_version; } + region_buckets.meta = Arc::new(meta); + self.set_bucket_stat(Some(region_buckets)); + change_bucket_version + } + + fn init_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + mut buckets: Vec, + region_epoch: RegionEpoch, + region: metapb::Region, + ) { + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + assert_eq!(buckets.len(), 1); + let bucket_keys = buckets.pop().unwrap().keys; + let bucket_count = bucket_keys.len() + 1; + let mut meta = BucketMeta { + region_id: region.get_id(), + region_epoch, + version: next_bucket_version, + keys: bucket_keys, + sizes: vec![cfg.region_bucket_size.0; bucket_count], + }; + // padding the boundary keys and initialize the flow. + meta.keys.insert(0, region.get_start_key().to_vec()); + meta.keys.push(region.get_end_key().to_vec()); + let bucket_stats = BucketStat::from_meta(Arc::new(meta)); + self.set_bucket_stat(Some(bucket_stats)); + } +} +impl Peer { + #[inline] + pub fn on_refresh_region_buckets( + &mut self, + store_ctx: &mut StoreContext, + region_epoch: RegionEpoch, + buckets: Vec, + bucket_ranges: Option>, + ) { + if self.term() > u32::MAX.into() { + error!( + self.logger, + "unexpected term {} more than u32::MAX. Bucket version will be backward.", + self.term() + ); + } + + let current_version = self.region_buckets_info().version(); + let next_bucket_version = util::gen_bucket_version(self.term(), current_version); + // let mut is_first_refresh = true; + let region = self.region().clone(); + let change_bucket_version = self.region_buckets_info_mut().on_refresh_region_buckets( + &store_ctx.coprocessor_host.cfg, + next_bucket_version, + buckets, + region_epoch, + region, + bucket_ranges, + ); + let region_buckets = self + .region_buckets_info() + .bucket_stat() + .as_ref() + .unwrap() + .clone(); let buckets_count = region_buckets.meta.keys.len() - 1; if change_bucket_version { // TODO: we may need to make it debug once the coprocessor timeout is resolved. @@ -281,17 +275,18 @@ impl Peer { "bucket_version" => next_bucket_version, "buckets_count" => buckets_count, "estimated_region_size" => region_buckets.meta.total_size(), - "first_refresh" => is_first_refresh, ); + } else { + // it means the buckets key range not any change, so don't need to refresh. + return; } + store_ctx.coprocessor_host.on_region_changed( - region, + self.region(), RegionChangeEvent::UpdateBuckets(buckets_count), self.state_role(), ); let meta = region_buckets.meta.clone(); - self.region_buckets_info_mut() - .set_bucket_stat(Some(region_buckets.clone())); { let mut store_meta = store_ctx.store_meta.lock().unwrap(); if let Some(reader) = store_meta.readers.get_mut(&self.region_id()) { @@ -302,13 +297,13 @@ impl Peer { if let Some(apply_scheduler) = self.apply_scheduler() { apply_scheduler.send(ApplyTask::RefreshBucketStat(region_buckets.meta.clone())); } + if !self.is_leader() { + return; + } let version = region_buckets.meta.version; let keys = region_buckets.meta.keys.clone(); // Notify followers to flush their relevant memtables let peers = self.region().get_peers().to_vec(); - if !self.is_leader() { - return; - } for p in peers { if p == *self.peer() || p.is_witness { continue; @@ -397,9 +392,9 @@ impl Peer { if !ctx.coprocessor_host.cfg.enable_region_bucket() { return None; } - let bucket_update_diff_size_threshold = ctx.coprocessor_host.cfg.region_bucket_size.0 / 2; + let region_bucket_max_size = ctx.coprocessor_host.cfg.region_bucket_size.0 * 2; self.region_buckets_info() - .gen_bucket_range_for_update(bucket_update_diff_size_threshold) + .gen_bucket_range_for_update(region_bucket_max_size) } } @@ -448,3 +443,178 @@ where self.schedule_tick(PeerTick::ReportBuckets); } } + +#[cfg(test)] +mod tests { + use super::*; + + // create BucketStatsInfo include three keys: ["","100","200",""]. + fn mock_bucket_stats_info() -> BucketStatsInfo { + let mut bucket_stats_info = BucketStatsInfo::default(); + let cfg = Config::default(); + let next_bucket_version = 1; + let bucket_ranges = None; + let mut region_epoch = RegionEpoch::default(); + region_epoch.set_conf_ver(1); + region_epoch.set_version(1); + let mut region = metapb::Region::default(); + region.set_id(1); + + let mut buckets = vec![]; + let mut bucket = Bucket::default(); + bucket.keys.push(vec![100]); + bucket.keys.push(vec![200]); + buckets.insert(0, bucket); + + let _ = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + region, + bucket_ranges, + ); + bucket_stats_info + } + + #[test] + pub fn test_version() { + let mut bucket_stats_info = mock_bucket_stats_info(); + assert_eq!(1, bucket_stats_info.version()); + bucket_stats_info.set_bucket_stat(None); + assert_eq!(1, bucket_stats_info.version()); + + let mut meta = BucketMeta::default(); + meta.version = 2; + meta.keys.push(vec![]); + meta.keys.push(vec![]); + let bucket_stat = BucketStat::from_meta(Arc::new(meta)); + bucket_stats_info.set_bucket_stat(Some(bucket_stat)); + assert_eq!(2, bucket_stats_info.version()); + } + + #[test] + pub fn test_insert_new_buckets() { + let bucket_stats_info = mock_bucket_stats_info(); + + let cfg = Config::default(); + let bucket_stat = bucket_stats_info.bucket_stat.unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stat.meta.keys + ); + for i in 0..bucket_stat.stats.write_bytes.len() { + assert_eq!(cfg.region_bucket_size.0, bucket_stat.meta.sizes[i]); + assert_eq!(0, bucket_stat.stats.write_bytes[i]); + } + } + + #[test] + pub fn test_report_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats.clone())); + let bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![0, 0, 0], report_bucket_stats.stats.write_bytes); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + } + + #[test] + pub fn test_spilt_and_merge_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let next_bucket_version = 2; + let mut region = metapb::Region::default(); + region.set_id(1); + let cfg = Config::default(); + let bucket_size = cfg.region_bucket_size.0; + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let region_epoch = bucket_stats.meta.region_epoch.clone(); + + // step1: update buckets flow + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step2: tick not affect anything + let bucket_ranges = Some(vec![]); + let buckets = vec![]; + let mut change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch.clone(), + region.clone(), + bucket_ranges, + ); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert!(!change_bucket_version); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step3: split key 50 + let mut bucket_ranges = Some(vec![BucketRange(vec![], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![vec![50]]; + bucket.size = bucket_size; + let mut buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets.clone(), + region_epoch.clone(), + region.clone(), + bucket_ranges.clone(), + ); + assert!(change_bucket_version); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![50], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 0, 2], bucket_stats.stats.write_bytes); + + // step4: merge [50-100] to [0-50], + bucket_ranges = Some(vec![BucketRange(vec![50], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![]; + bucket.size = 0; + buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + region, + bucket_ranges, + ); + assert!(change_bucket_version); + + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 2], bucket_stats.stats.write_bytes); + + // report buckets doesn't be affected by the split and merge. + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![4, 0, 2], report_bucket_stats.stats.write_bytes); + } +} diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 061a5ad5126..77915dd0378 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -57,7 +57,6 @@ pub enum Task { }, // In region.rs. RegionHeartbeat(RegionHeartbeatTask), - ReportRegionBuckets(BucketStat), UpdateReadStats(ReadStats), UpdateWriteStats(WriteStats), UpdateRegionCpuRecords(Arc), @@ -85,6 +84,7 @@ pub enum Task { initial_status: u64, txn_ext: Arc, }, + // BucketStat is the delta write flow of the bucket. ReportBuckets(BucketStat), ReportMinResolvedTs { store_id: u64, @@ -123,7 +123,6 @@ impl Display for Task { hb_task.region, hb_task.peer.get_id(), ), - Task::ReportRegionBuckets(ref buckets) => write!(f, "report buckets: {:?}", buckets), Task::UpdateReadStats(ref stats) => { write!(f, "update read stats: {stats:?}") } @@ -314,7 +313,6 @@ where write_io_rates, } => self.handle_update_store_infos(cpu_usages, read_io_rates, write_io_rates), Task::RegionHeartbeat(task) => self.handle_region_heartbeat(task), - Task::ReportRegionBuckets(buckets) => self.handle_report_region_buckets(buckets), Task::UpdateReadStats(stats) => self.handle_update_read_stats(stats), Task::UpdateWriteStats(stats) => self.handle_update_write_stats(stats), Task::UpdateRegionCpuRecords(records) => self.handle_update_region_cpu_records(records), @@ -341,7 +339,7 @@ where initial_status, txn_ext, } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), - Task::ReportBuckets(buckets) => self.handle_report_region_buckets(buckets), + Task::ReportBuckets(delta_buckets) => self.handle_report_region_buckets(delta_buckets), Task::ReportMinResolvedTs { store_id, min_resolved_ts, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index 763e12fff07..d3ef54bd75a 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -339,9 +339,9 @@ where self.is_hb_receiver_scheduled = true; } - pub fn handle_report_region_buckets(&mut self, region_buckets: BucketStat) { - let region_id = region_buckets.meta.region_id; - self.merge_buckets(region_buckets); + pub fn handle_report_region_buckets(&mut self, delta_buckets: BucketStat) { + let region_id = delta_buckets.meta.region_id; + self.merge_buckets(delta_buckets); let report_buckets = self.region_buckets.get_mut(®ion_id).unwrap(); let last_report_ts = if report_buckets.last_report_ts.is_zero() { self.start_ts @@ -388,8 +388,8 @@ where .engine_total_query_num .add_query_stats(®ion_info.query_stats.0); } - for (_, region_buckets) in std::mem::take(&mut stats.region_buckets) { - self.merge_buckets(region_buckets); + for (_, delta_buckets) in std::mem::take(&mut stats.region_buckets) { + self.merge_buckets(delta_buckets); } if !stats.region_infos.is_empty() { self.stats_monitor.maybe_send_read_stats(stats); @@ -424,18 +424,18 @@ where } } - fn merge_buckets(&mut self, mut buckets: BucketStat) { - let region_id = buckets.meta.region_id; + fn merge_buckets(&mut self, mut delta: BucketStat) { + let region_id = delta.meta.region_id; self.region_buckets .entry(region_id) .and_modify(|report_bucket| { let current = &mut report_bucket.current_stat; - if current.meta < buckets.meta { - std::mem::swap(current, &mut buckets); + if current.meta < delta.meta { + std::mem::swap(current, &mut delta); } - current.merge(&buckets); + current.merge(&delta); }) - .or_insert_with(|| ReportBucket::new(buckets)); + .or_insert_with(|| ReportBucket::new(delta)); } fn calculate_region_cpu_records( diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 513e9c0636a..b6d7f8fcfcc 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5970,27 +5970,6 @@ where } }; - // bucket version layout - // term logical counter - // |-----------|-----------| - // high bits low bits - // term: given 10s election timeout, the 32 bit means 1362 year running time - let gen_bucket_version = |term, current_version| { - let current_version_term = current_version >> 32; - let bucket_version: u64 = if current_version_term == term { - current_version + 1 - } else { - if term > u32::MAX.into() { - error!( - "unexpected term {} more than u32::MAX. Bucket version will be backward.", - term - ); - } - term << 32 - }; - bucket_version - }; - let region = self.fsm.peer.region(); if util::is_epoch_stale(®ion_epoch, region.get_region_epoch()) { info!( @@ -6042,7 +6021,7 @@ where region_buckets = self.fsm.peer.region_buckets.clone().unwrap(); let mut meta = (*region_buckets.meta).clone(); if !buckets.is_empty() { - meta.version = gen_bucket_version(self.fsm.peer.term(), current_version); + meta.version = util::gen_bucket_version(self.fsm.peer.term(), current_version); } meta.region_epoch = region_epoch; for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { @@ -6096,7 +6075,7 @@ where let mut meta = BucketMeta { region_id: self.fsm.region_id(), region_epoch, - version: gen_bucket_version(self.fsm.peer.term(), current_version), + version: util::gen_bucket_version(self.fsm.peer.term(), current_version), keys: bucket_keys, sizes: vec![self.ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], }; diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 519d486102c..d9076a67d8a 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -160,6 +160,20 @@ pub fn new_empty_snapshot( snapshot } +pub fn gen_bucket_version(term: u64, current_version: u64) -> u64 { + // term logical counter + // |-----------|-----------| + // high bits low bits + // term: given 10s election timeout, the 32 bit means 1362 year running time + let current_version_term = current_version >> 32; + let bucket_version: u64 = if current_version_term == term { + current_version + 1 + } else { + term << 32 + }; + bucket_version +} + const STR_CONF_CHANGE_ADD_NODE: &str = "AddNode"; const STR_CONF_CHANGE_REMOVE_NODE: &str = "RemoveNode"; const STR_CONF_CHANGE_ADDLEARNER_NODE: &str = "AddLearner"; From 64d2129a0c21bc1e8521c38dd144a327baa88965 Mon Sep 17 00:00:00 2001 From: glorv Date: Tue, 10 Oct 2023 13:01:53 +0800 Subject: [PATCH 0949/1149] config: set a longer rocksdb io limiter smooth window for raft-v2 (#15734) ref tikv/tikv#11470 Signed-off-by: glorv --- Cargo.lock | 6 +++--- cmd/tikv-ctl/src/main.rs | 2 +- src/config/mod.rs | 21 ++++++++++++++++++--- src/server/engine_factory.rs | 2 +- tests/integrations/storage/test_titan.rs | 4 +++- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a10755f5a7f..c221af119e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2998,7 +2998,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" +source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -3017,7 +3017,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" +source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" dependencies = [ "bzip2-sys", "cc", @@ -4936,7 +4936,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f" +source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 6baa1fe6c39..df17e81f1ef 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1048,7 +1048,7 @@ fn build_rocks_opts(cfg: &TikvConfig) -> engine_rocks::RocksDbOptions { .unwrap() .map(Arc::new); let env = get_env(key_manager, None /* io_rate_limiter */).unwrap(); - let resource = cfg.rocksdb.build_resources(env); + let resource = cfg.rocksdb.build_resources(env, cfg.storage.engine); cfg.rocksdb.build_opt(&resource, cfg.storage.engine) } diff --git a/src/config/mod.rs b/src/config/mod.rs index d18d6f8cda0..911308809c6 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1412,14 +1412,25 @@ impl DbConfig { } } - pub fn build_resources(&self, env: Arc) -> DbResources { + pub fn build_resources(&self, env: Arc, engine: EngineType) -> DbResources { let rate_limiter = if self.rate_bytes_per_sec.0 > 0 { + // for raft-v2, we use a longer window to make the compaction io smoother + let (tune_per_secs, window_size, recent_size) = match engine { + // 1s tune duraion, long term window is 5m, short term window is 30s. + // this is the default settings. + EngineType::RaftKv => (1, 300, 30), + // 5s tune duraion, long term window is 1h, short term window is 5m + EngineType::RaftKv2 => (5, 720, 60), + }; Some(Arc::new(RateLimiter::new_writeampbased_with_auto_tuned( self.rate_bytes_per_sec.0 as i64, (self.rate_limiter_refill_period.as_millis() * 1000) as i64, 10, // fairness self.rate_limiter_mode, self.rate_limiter_auto_tuned, + tune_per_secs, + window_size, + recent_size, ))) } else { None @@ -4844,7 +4855,9 @@ mod tests { fn test_rocks_rate_limit_zero() { let mut tikv_cfg = TikvConfig::default(); tikv_cfg.rocksdb.rate_bytes_per_sec = ReadableSize(0); - let resource = tikv_cfg.rocksdb.build_resources(Arc::new(Env::default())); + let resource = tikv_cfg + .rocksdb + .build_resources(Arc::new(Env::default()), tikv_cfg.storage.engine); tikv_cfg .rocksdb .build_opt(&resource, tikv_cfg.storage.engine); @@ -5008,7 +5021,9 @@ mod tests { Arc, ) { assert_eq!(F::TAG, cfg.storage.api_version()); - let resource = cfg.rocksdb.build_resources(Arc::default()); + let resource = cfg + .rocksdb + .build_resources(Arc::default(), cfg.storage.engine); let engine = RocksDBEngine::new( &cfg.storage.data_dir, Some(cfg.rocksdb.build_opt(&resource, cfg.storage.engine)), diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 85de282b137..3593c01ca7f 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -56,7 +56,7 @@ impl KvEngineFactoryBuilder { flow_listener: None, sst_recovery_sender: None, encryption_key_manager: key_manager, - db_resources: config.rocksdb.build_resources(env), + db_resources: config.rocksdb.build_resources(env, config.storage.engine), cf_resources: config.rocksdb.build_cf_resources(cache), state_storage: None, lite: false, diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 9c3eeec0c83..4bb8fee4087 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -159,7 +159,9 @@ fn test_delete_files_in_range_for_titan() { cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize(0); cfg.rocksdb.defaultcf.titan.discardable_ratio = 0.4; cfg.rocksdb.defaultcf.titan.min_blob_size = ReadableSize(0); - let resource = cfg.rocksdb.build_resources(Default::default()); + let resource = cfg + .rocksdb + .build_resources(Default::default(), cfg.storage.engine); let kv_db_opts = cfg.rocksdb.build_opt(&resource, cfg.storage.engine); let kv_cfs_opts = cfg.rocksdb.build_cf_opts( &cfg.rocksdb.build_cf_resources(cache), From 905e8bffbee3a289198b31de70e418c101f3be78 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 10 Oct 2023 14:01:54 +0800 Subject: [PATCH 0950/1149] raftstore: disable region bucket for raftstore v1 by default (#15740) ref tikv/tikv#15719 disable region bucket for raftstore v1 by default Signed-off-by: SpadeA-Tang --- .../raftstore/src/coprocessor/config.rs | 21 ++++++++++--------- src/config/mod.rs | 11 +++++++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index e1246e8d59d..b1dc3830bbb 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -168,7 +168,7 @@ impl Config { Ok(()) } - pub fn validate(&mut self) -> Result<()> { + pub fn validate(&mut self, raft_kv_v2: bool) -> Result<()> { if self.region_split_keys.is_none() { self.region_split_keys = Some((self.region_split_size().as_mb_f64() * 10000.0) as u64); } @@ -199,8 +199,9 @@ impl Config { None => self.region_max_keys = Some(self.region_split_keys() / 2 * 3), } let res = self.validate_bucket_size(); - // If it's OK to enable bucket, we will prefer to enable it if useful. - if let Ok(()) = res && self.enable_region_bucket.is_none() { + // If it's OK to enable bucket, we will prefer to enable it if useful for + // raftstore-v2. + if let Ok(()) = res && self.enable_region_bucket.is_none() && raft_kv_v2 { let useful = self.region_split_size() >= self.region_bucket_size * 2; self.enable_region_bucket = Some(useful); } else if let Err(e) = res && self.enable_region_bucket() { @@ -237,39 +238,39 @@ mod tests { #[test] fn test_config_validate() { let mut cfg = Config::default(); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); cfg = Config::default(); cfg.region_max_size = Some(ReadableSize(10)); cfg.region_split_size = Some(ReadableSize(20)); - cfg.validate().unwrap_err(); + cfg.validate(false).unwrap_err(); cfg = Config::default(); cfg.region_max_size = None; cfg.region_split_size = Some(ReadableSize(20)); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); assert_eq!(cfg.region_max_size, Some(ReadableSize(30))); cfg = Config::default(); cfg.region_max_keys = Some(10); cfg.region_split_keys = Some(20); - cfg.validate().unwrap_err(); + cfg.validate(false).unwrap_err(); cfg = Config::default(); cfg.region_max_keys = None; cfg.region_split_keys = Some(20); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); assert_eq!(cfg.region_max_keys, Some(30)); cfg = Config::default(); cfg.enable_region_bucket = Some(false); cfg.region_split_size = Some(ReadableSize(20)); cfg.region_bucket_size = ReadableSize(30); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); cfg = Config::default(); cfg.region_split_size = Some(ReadableSize::mb(20)); - cfg.validate().unwrap(); + cfg.validate(false).unwrap(); assert_eq!(cfg.region_split_keys, Some(200000)); } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 911308809c6..0eb006363f0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3725,7 +3725,8 @@ impl TikvConfig { self.raft_engine.validate()?; self.server.validate()?; self.pd.validate()?; - self.coprocessor.validate()?; + self.coprocessor + .validate(self.storage.engine == EngineType::RaftKv2)?; self.raft_store.validate( self.coprocessor.region_split_size(), self.coprocessor.enable_region_bucket(), @@ -6238,21 +6239,25 @@ mod tests { let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); default_cfg.coprocessor.optimize_for(false); - default_cfg.coprocessor.validate().unwrap(); + default_cfg.coprocessor.validate(false).unwrap(); assert_eq!( default_cfg.coprocessor.region_split_size(), ReadableSize::mb(500) ); + assert!(!default_cfg.coprocessor.enable_region_bucket()); + default_cfg.coprocessor.validate(true).unwrap(); assert!(default_cfg.coprocessor.enable_region_bucket()); let mut default_cfg = TikvConfig::default(); default_cfg.coprocessor.region_split_size = Some(ReadableSize::mb(500)); default_cfg.coprocessor.optimize_for(true); - default_cfg.coprocessor.validate().unwrap(); + default_cfg.coprocessor.validate(false).unwrap(); assert_eq!( default_cfg.coprocessor.region_split_size(), ReadableSize::mb(500) ); + assert!(!default_cfg.coprocessor.enable_region_bucket()); + default_cfg.coprocessor.validate(true).unwrap(); assert!(default_cfg.coprocessor.enable_region_bucket()); } From 88aaaa3e7b1e194d389fee6a9831f7491d7f9acd Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 10 Oct 2023 05:18:25 -0500 Subject: [PATCH 0951/1149] status_server: Add symbol service to support remote fetching symbolized heap profile (#15695) close tikv/tikv#15732 Jeprof supports generating the svg by remote fetching, so we can add a symbol service following the [pprof format](https://gperftools.github.io/gperftools/pprof_remote_servers.html), then with ` jeprof --show_bytes http://:20180/debug/pprof/heap --svg` it can simply get the heap profiling svg from remote. With this PR, we can get rid of the limitation that the heap profile must be processed with the corresponding tikv binary and perl runtime which is used by `jeprof`. Later, we only need to install `jeprof` and `perl` in tidb_dashboard environment and collect the heap profile just like how CPU profile does. Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 22 +++--- src/server/status_server/mod.rs | 136 ++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c221af119e9..fccff7d7822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,9 +89,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.26" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "api_version" @@ -777,9 +777,9 @@ checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" [[package]] name = "byteorder" -version = "1.3.4" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" @@ -3168,9 +3168,9 @@ dependencies = [ [[package]] name = "memmap2" -version = "0.5.3" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" dependencies = [ "libc 0.2.146", ] @@ -5834,7 +5834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac457d054f793cedfde6f32d21d692b8351cfec9084fefd0470c0373f6d799bc" dependencies = [ "debugid", - "memmap2 0.5.3", + "memmap2 0.5.10", "stable_deref_trait", "uuid 1.2.1", ] @@ -7237,9 +7237,13 @@ dependencies = [ [[package]] name = "twox-hash" -version = "1.5.0" +version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bfd5b7557925ce778ff9b9ef90e3ade34c524b5ff10e239c69a42d546d2af56" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if 1.0.0", + "static_assertions", +] [[package]] name = "txn_types" diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 98077d9e93f..3e68b0b6310 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -3,6 +3,7 @@ /// Provides profilers for TiKV. mod profile; use std::{ + env::args, error::Error as StdError, net::SocketAddr, path::PathBuf, @@ -308,6 +309,83 @@ where }) } + async fn get_cmdline(_req: Request) -> hyper::Result> { + let args = args().into_iter().fold(String::new(), |mut a, b| { + a.push_str(&b); + a.push('\x00'); + a + }); + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("X-Content-Type-Options", "nosniff") + .body(args.into()) + .unwrap(); + Ok(response) + } + + async fn get_symbol_count(req: Request) -> hyper::Result> { + assert_eq!(req.method(), Method::GET); + // We don't know how many symbols we have, but we + // do have symbol information. pprof only cares whether + // this number is 0 (no symbols available) or > 0. + let text = "num_symbols: 1\n"; + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("X-Content-Type-Options", "nosniff") + .header("Content-Length", text.len()) + .body(text.into()) + .unwrap(); + Ok(response) + } + + // The request and response format follows pprof remote server + // https://gperftools.github.io/gperftools/pprof_remote_servers.html + // Here is the go pprof implementation: + // https://github.com/golang/go/blob/3857a89e7eb872fa22d569e70b7e076bec74ebbb/src/net/http/pprof/pprof.go#L191 + async fn get_symbol(req: Request) -> hyper::Result> { + assert_eq!(req.method(), Method::POST); + let mut text = String::new(); + let body_bytes = hyper::body::to_bytes(req.into_body()).await?; + let body = String::from_utf8(body_bytes.to_vec()).unwrap(); + + // The request body is a list of addr to be resolved joined by '+'. + // Resolve addrs with addr2line and write the symbols each per line in + // response. + for pc in body.split('+') { + let addr = usize::from_str_radix(pc.trim_start_matches("0x"), 16).unwrap_or(0); + if addr == 0 { + info!("invalid addr: {}", addr); + continue; + } + + // Would be multiple symbols if inlined. + let mut syms = vec![]; + backtrace::resolve(addr as *mut std::ffi::c_void, |sym| { + let name = sym + .name() + .unwrap_or_else(|| backtrace::SymbolName::new(b"")); + syms.push(name.to_string()); + }); + + if !syms.is_empty() { + // join inline functions with '--' + let f = syms.join("--"); + // should be + text.push_str(format!("{:#x} {}\n", addr, f).as_str()); + } else { + info!("can't resolve mapped addr: {:#x}", addr); + text.push_str(format!("{:#x} ??\n", addr).as_str()); + } + } + let response = Response::builder() + .header("Content-Type", mime::TEXT_PLAIN.to_string()) + .header("X-Content-Type-Options", "nosniff") + .header("Content-Length", text.len()) + .body(text.into()) + .unwrap(); + Ok(response) + } + async fn update_config( cfg_controller: ConfigController, req: Request, @@ -693,6 +771,11 @@ where (Method::GET, "/debug/pprof/heap") => { Self::dump_heap_prof_to_resp(req).await } + (Method::GET, "/debug/pprof/cmdline") => Self::get_cmdline(req).await, + (Method::GET, "/debug/pprof/symbol") => { + Self::get_symbol_count(req).await + } + (Method::POST, "/debug/pprof/symbol") => Self::get_symbol(req).await, (Method::GET, "/config") => { Self::get_config(req, &cfg_controller).await } @@ -1658,6 +1741,59 @@ mod tests { status_server.stop(); } + #[test] + fn test_pprof_symbol_service() { + let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); + let temp_dir = tempfile::TempDir::new().unwrap(); + let mut status_server = StatusServer::new( + 1, + ConfigController::default(), + Arc::new(SecurityConfig::default()), + MockRouter, + temp_dir.path().to_path_buf(), + None, + GrpcServiceManager::dummy(), + ) + .unwrap(); + let addr = "127.0.0.1:0".to_owned(); + let _ = status_server.start(addr); + let client = Client::new(); + + let mut addr = None; + backtrace::trace(|f| { + addr = Some(f.ip()); + false + }); + assert!(addr.is_some()); + + let uri = Uri::builder() + .scheme("http") + .authority(status_server.listening_addr().to_string().as_str()) + .path_and_query("/debug/pprof/symbol") + .build() + .unwrap(); + let req = Request::builder() + .method(Method::POST) + .uri(uri) + .body(Body::from(format!("{:p}", addr.unwrap()))) + .unwrap(); + let handle = status_server + .thread_pool + .spawn(async move { client.request(req).await.unwrap() }); + let resp = block_on(handle).unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body_bytes = block_on(hyper::body::to_bytes(resp.into_body())).unwrap(); + assert!( + String::from_utf8(body_bytes.as_ref().to_owned()) + .unwrap() + .split(' ') + .last() + .unwrap() + .starts_with("backtrace::backtrace") + ); + status_server.stop(); + } + #[test] fn test_metrics() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); From 262845cefc4810aa8bdcdc7ec18fa3d4469547de Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 11 Oct 2023 13:27:24 +0800 Subject: [PATCH 0952/1149] raftstore-v2: support to make protection when disk full. (#15558) close tikv/tikv#15170 This pr is used to protect `raftstore-v2` when disk full. And all checking and validation is transplant from `raftstore`. --- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/peer.rs | 1 + .../operation/command/admin/merge/prepare.rs | 36 +- .../src/operation/command/admin/mod.rs | 57 ++- .../src/operation/command/admin/split.rs | 16 + .../command/admin/transfer_leader.rs | 2 +- .../raftstore-v2/src/operation/command/mod.rs | 6 + .../src/operation/command/write/mod.rs | 10 +- components/raftstore-v2/src/operation/life.rs | 326 +++++++++++++- components/raftstore-v2/src/operation/pd.rs | 2 +- .../raftstore-v2/src/operation/query/lease.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 75 +++- .../raftstore-v2/src/operation/txn_ext.rs | 16 +- components/raftstore-v2/src/raft/peer.rs | 21 +- components/raftstore-v2/src/router/message.rs | 11 + components/raftstore/src/store/mod.rs | 4 +- components/raftstore/src/store/peer.rs | 9 + components/test_raftstore-v2/src/cluster.rs | 31 +- components/test_raftstore-v2/src/util.rs | 111 ++++- src/server/raftkv2/mod.rs | 1 + tests/failpoints/cases/test_disk_full.rs | 401 +++++++++--------- .../integrations/raftstore/test_stale_read.rs | 2 +- 22 files changed, 897 insertions(+), 251 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index cd5ae8f42f7..5ed84c70937 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -47,7 +47,7 @@ use tikv_util::{ box_err, config::{Tracker, VersionTrack}, log::SlogFormat, - sys::SysQuota, + sys::{disk::get_disk_status, SysQuota}, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant, Limiter}, timer::{SteadyTimer, GLOBAL_TIMER_HANDLE}, worker::{Builder, LazyWorker, Scheduler, Worker}, @@ -104,6 +104,10 @@ pub struct StoreContext { /// Disk usage for the store itself. pub self_disk_usage: DiskUsage, + // TODO: how to remove offlined stores? + /// Disk usage for other stores. The store itself is not included. + /// Only contains items which is not `DiskUsage::Normal`. + pub store_disk_usages: HashMap, pub snap_mgr: TabletSnapManager, pub global_stat: GlobalStoreStat, @@ -228,6 +232,7 @@ impl PollHandler PeerFsmDelegate<'a, EK, ER, write.header, write.data, write.ch, + Some(write.disk_full_opt), ); } PeerMsg::UnsafeWrite(write) => { diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 6ff982eea8c..4a5875f7097 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -219,22 +219,7 @@ impl Peer { if r.is_ok() { self.proposal_control_mut().set_pending_prepare_merge(false); } else { - // Match v1::post_propose_fail. - // If we just failed to propose PrepareMerge, the pessimistic locks status - // may become MergingRegion incorrectly. So, we have to revert it here. - // Note: The `is_merging` check from v1 is removed because proposed - // `PrepareMerge` rejects all writes (in `ProposalControl::check_conflict`). - assert!( - !self.proposal_control().is_merging(), - "{}", - SlogFormat(&self.logger) - ); - self.take_merge_context(); - self.proposal_control_mut().set_pending_prepare_merge(false); - let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); - if pessimistic_locks.status == LocksStatus::MergingRegion { - pessimistic_locks.status = LocksStatus::Normal; - } + self.post_prepare_merge_fail(); } r } @@ -707,6 +692,25 @@ impl Peer { self.propose(store_ctx, cmd.write_to_bytes().unwrap())?; Ok(()) } + + pub fn post_prepare_merge_fail(&mut self) { + // Match v1::post_propose_fail. + // If we just failed to propose PrepareMerge, the pessimistic locks status + // may become MergingRegion incorrectly. So, we have to revert it here. + // Note: The `is_merging` check from v1 is removed because proposed + // `PrepareMerge` rejects all writes (in `ProposalControl::check_conflict`). + assert!( + !self.proposal_control().is_merging(), + "{}", + SlogFormat(&self.logger) + ); + self.take_merge_context(); + self.proposal_control_mut().set_pending_prepare_merge(false); + let mut pessimistic_locks = self.txn_context().ext().pessimistic_locks.write(); + if pessimistic_locks.status == LocksStatus::MergingRegion { + pessimistic_locks.status = LocksStatus::Normal; + } + } } impl Apply { diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index d59a564c696..9d7fee55ae4 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -12,6 +12,7 @@ use compact_log::CompactLogResult; use conf_change::{ConfChangeResult, UpdateGcPeersResult}; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ + kvrpcpb::DiskFullOpt, metapb::{PeerRole, Region}, raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessageType, FlushMemtable, RaftMessage}, @@ -33,13 +34,13 @@ use raftstore::{ }, Error, }; -use slog::{error, info}; +use slog::{debug, error, info}; use split::SplitResult; pub use split::{ report_split_init_finish, temp_split_path, RequestHalfSplit, RequestSplit, SplitFlowControl, SplitInit, SplitPendingAppend, SPLIT_PREFIX, }; -use tikv_util::{box_err, log::SlogFormat, slog_panic}; +use tikv_util::{box_err, log::SlogFormat, slog_panic, sys::disk::DiskUsage}; use txn_types::WriteBatchFlags; use self::flashback::FlashbackResult; @@ -103,6 +104,18 @@ impl Peer { let pre_transfer_leader = cmd_type == AdminCmdType::TransferLeader && !WriteBatchFlags::from_bits_truncate(req.get_header().get_flags()) .contains(WriteBatchFlags::TRANSFER_LEADER_PROPOSAL); + let is_conf_change = apply::is_conf_change_cmd(&req); + + // Check whether the admin request can be proposed when disk full. + let can_skip_check = is_transfer_leader || pre_transfer_leader || is_conf_change; + if !can_skip_check && let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + self.post_propose_fail(cmd_type); + return; + } // The admin request is rejected because it may need to update epoch checker // which introduces an uncertainty and may breaks the correctness of epoch @@ -134,9 +147,11 @@ impl Peer { ch.report_error(resp); return; } + // Prepare Merge need to be broadcast to as many as followers when disk full. + self.on_prepare_merge(cmd_type, ctx); // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); - let res = if apply::is_conf_change_cmd(&req) { + let res = if is_conf_change { self.propose_conf_change(ctx, req) } else { // propose other admin command. @@ -258,6 +273,42 @@ impl Peer { self.post_propose_command(ctx, res, vec![ch], true); } + fn on_prepare_merge( + &mut self, + cmd_type: AdminCmdType, + ctx: &StoreContext, + ) { + let is_merge_cmd = + cmd_type == AdminCmdType::PrepareMerge || cmd_type == AdminCmdType::RollbackMerge; + let has_disk_full_peers = self.abnormal_peer_context().disk_full_peers().is_empty(); + let proposal_index = self.next_proposal_index(); + if is_merge_cmd + && (!matches!(ctx.self_disk_usage, DiskUsage::Normal) || !has_disk_full_peers) + { + self.has_region_merge_proposal = true; + self.region_merge_proposal_index = proposal_index; + let mut peers = vec![]; + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut() + .iter_mut() + .for_each(|(k, v)| { + if !matches!(v.0, DiskUsage::AlreadyFull) { + v.1 = true; + peers.push(*k); + } + }); + debug!( + self.logger, + "adjust max inflight msgs"; + "cmd_type" => ?cmd_type, + "raft_max_inflight_msgs" => ctx.cfg.raft_max_inflight_msgs, + "region" => self.region_id() + ); + self.adjust_peers_max_inflight_msgs(&peers, ctx.cfg.raft_max_inflight_msgs); + } + } + fn start_pre_flush( &mut self, ctx: &mut StoreContext, diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index 0f9cae7218d..cfbd7678c17 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -35,6 +35,7 @@ use engine_traits::{ use fail::fail_point; use futures::channel::oneshot; use kvproto::{ + kvrpcpb::DiskFullOpt, metapb::{self, Region, RegionEpoch}, pdpb::CheckPolicy, raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, SplitRequest}, @@ -332,6 +333,14 @@ impl Peer { )))); return; } + // Check whether the admin request can be proposed when disk full. + if let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + { + info!(self.logger, "disk is full, skip split"; "err" => ?e); + ch.set_result(cmd_resp::new_error(e)); + return; + } if let Err(e) = util::validate_split_region( self.region_id(), self.peer_id(), @@ -365,6 +374,13 @@ impl Peer { info!(self.logger, "not leader, skip."); return; } + // Check whether the admin request can be proposed when disk full. + if let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + { + info!(self.logger, "disk is full, skip half split"; "err" => ?e); + return; + } let region = self.region(); if util::is_epoch_stale(&rhs.epoch, region.get_region_epoch()) { diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index 4cdeba3bc41..bf9cb426255 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -118,7 +118,7 @@ impl Peer { transferee } - fn pre_transfer_leader(&mut self, peer: &metapb::Peer) -> bool { + pub fn pre_transfer_leader(&mut self, peer: &metapb::Peer) -> bool { if self.raft_group().raft.has_pending_conf() { info!( self.logger, diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index e579d22c6da..70cdbfda237 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -481,6 +481,12 @@ impl Peer { } self.check_unsafe_recovery_state(ctx); } + + pub fn post_propose_fail(&mut self, cmd_type: AdminCmdType) { + if cmd_type == AdminCmdType::PrepareMerge { + self.post_prepare_merge_fail(); + } + } } #[derive(Debug)] diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index a9d8bd664fe..6eacc75c0f1 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -5,7 +5,7 @@ use engine_traits::{ }; use fail::fail_point; use futures::channel::oneshot; -use kvproto::raft_cmdpb::RaftRequestHeader; +use kvproto::{kvrpcpb::DiskFullOpt, raft_cmdpb::RaftRequestHeader}; use raftstore::{ store::{ cmd_resp, @@ -42,6 +42,7 @@ impl Peer { header: Box, data: SimpleWriteBinary, ch: CmdResChannel, + disk_full_opt: Option, ) { if !self.serving() { apply::notify_req_region_removed(self.region_id(), ch); @@ -59,6 +60,13 @@ impl Peer { ch.report_error(resp); return; } + // Check whether the write request can be proposed with the given disk full + // option. + if let Some(opt) = disk_full_opt && let Err(e) = self.check_proposal_with_disk_full_opt(ctx, opt) { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); if let Some(conflict) = self.proposal_control_mut().check_conflict(None) { diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 84bded8a9bb..5828a7bb661 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -26,28 +26,34 @@ //! `merged_records`, to avoid race between destroy and merge, leader needs to //! ask target peer to destroy source peer. -use std::{cmp, mem}; +use std::{cmp, collections::HashSet, mem}; use batch_system::BasicMailbox; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; use kvproto::{ - metapb::{self, Region}, + kvrpcpb::DiskFullOpt, + metapb::{self, PeerRole, Region}, raft_cmdpb::{AdminCmdType, RaftCmdRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage}, }; -use raftstore::store::{ - fsm::{ - apply, - life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, - Proposal, +use raft::eraftpb::MessageType; +use raftstore::{ + store::{ + fsm::{ + apply, + life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, + Proposal, + }, + metrics::RAFT_PEER_PENDING_DURATION, + util, DiskFullPeers, Transport, WriteTask, }, - metrics::RAFT_PEER_PENDING_DURATION, - util, Transport, WriteTask, + Error, Result, }; use slog::{debug, error, info, warn}; use tikv_util::{ store::find_peer, + sys::disk::DiskUsage, time::{duration_to_sec, Instant}, }; @@ -126,16 +132,22 @@ pub struct AbnormalPeerContext { pending_peers: Vec<(u64, Instant)>, /// A inaccurate cache about which peer is marked as down. down_peers: Vec, + // disk full peer set. + disk_full_peers: DiskFullPeers, + // show whether an already disk full TiKV appears in the potential majority set. + dangerous_majority_set: bool, } impl AbnormalPeerContext { #[inline] pub fn is_empty(&self) -> bool { - self.pending_peers.is_empty() && self.down_peers.is_empty() + self.pending_peers.is_empty() && self.down_peers.is_empty() /* && self.disk_full_peers.is_empty() */ } #[inline] pub fn reset(&mut self) { + // No need to refresh disk_full_peers as it will be refreshed + // automatically when the disk usage updated. self.pending_peers.clear(); self.down_peers.clear(); } @@ -174,6 +186,26 @@ impl AbnormalPeerContext { RAFT_PEER_PENDING_DURATION.observe(elapsed); }); } + + #[inline] + pub fn disk_full_peers(&self) -> &DiskFullPeers { + &self.disk_full_peers + } + + #[inline] + pub fn disk_full_peers_mut(&mut self) -> &mut DiskFullPeers { + &mut self.disk_full_peers + } + + #[inline] + pub fn is_dangerous_majority_set(&self) -> bool { + self.dangerous_majority_set + } + + #[inline] + pub fn setup_dangerous_majority_set(&mut self, is_dangerous: bool) { + self.dangerous_majority_set = is_dangerous; + } } #[derive(Default)] @@ -415,6 +447,20 @@ impl Store { ctx.raft_metrics.message_dropped.stale_msg.inc(); return false; } + // Check whether this message should be dropped when disk full. + let msg_type = msg.get_message().get_msg_type(); + if matches!(ctx.self_disk_usage, DiskUsage::AlreadyFull) + && MessageType::MsgTimeoutNow == msg_type + { + debug!( + self.logger(), + "skip {:?} because of disk full", msg_type; + "region_id" => region_id, "peer_id" => to_peer.id, + ); + ctx.raft_metrics.message_dropped.disk_full.inc(); + return false; + } + let destroyed = match check_if_to_peer_destroyed(&ctx.engine, &msg, self.store_id()) { Ok(d) => d, Err(e) => { @@ -836,6 +882,266 @@ impl Peer { self.maybe_schedule_gc_peer_tick(); } + pub fn adjust_peers_max_inflight_msgs(&mut self, peers: &[u64], raft_max_inflight_msgs: usize) { + peers.iter().for_each(|id| { + self.raft_group_mut() + .raft + .adjust_max_inflight_msgs(*id, raft_max_inflight_msgs); + debug!( + self.logger, + "adjust max inflight msgs"; + "raft_max_inflight_msgs" => raft_max_inflight_msgs, + "peer_id" => id + ); + }); + } + + // Check disk usages for the peer itself and other peers in the raft group. + // The return value indicates whether the proposal is allowed or not. + pub fn check_proposal_with_disk_full_opt( + &mut self, + ctx: &StoreContext, + disk_full_opt: DiskFullOpt, + ) -> Result<()> { + let leader_allowed = match ctx.self_disk_usage { + DiskUsage::Normal => true, + DiskUsage::AlmostFull => !matches!(disk_full_opt, DiskFullOpt::NotAllowedOnFull), + DiskUsage::AlreadyFull => false, + }; + let mut disk_full_stores = Vec::new(); + let abnormal_peer_context = self.abnormal_peer_context(); + let disk_full_peers = abnormal_peer_context.disk_full_peers(); + if !leader_allowed { + disk_full_stores.push(ctx.store_id); + // Try to transfer leader to a node with disk usage normal to maintain write + // availability. If majority node is disk full, to transfer leader or not is not + // necessary. Note: Need to exclude learner node. + if !disk_full_peers.majority() { + let target_peer = self + .region() + .get_peers() + .iter() + .find(|x| { + !disk_full_peers.has(x.get_id()) + && x.get_id() != self.peer_id() + && !self + .abnormal_peer_context() + .down_peers() + .contains(&x.get_id()) + && !matches!(x.get_role(), PeerRole::Learner) + }) + .cloned(); + if let Some(p) = target_peer { + debug!( + self.logger, + "try to transfer leader because of current leader disk full"; + "region_id" => self.region().get_id(), + "peer_id" => self.peer_id(), + "target_peer_id" => p.get_id(), + ); + self.pre_transfer_leader(&p); + } + } + } else { + // Check followers. + if disk_full_peers.is_empty() { + return Ok(()); + } + if !abnormal_peer_context.is_dangerous_majority_set() { + if !disk_full_peers.majority() { + return Ok(()); + } + // Majority peers are in disk full status but the request carries a special + // flag. + if matches!(disk_full_opt, DiskFullOpt::AllowedOnAlmostFull) + && disk_full_peers.peers().values().any(|x| x.1) + { + return Ok(()); + } + } + for peer in self.region().get_peers() { + let (peer_id, store_id) = (peer.get_id(), peer.get_store_id()); + if disk_full_peers.peers().get(&peer_id).is_some() { + disk_full_stores.push(store_id); + } + } + } + let errmsg = format!( + "propose failed: tikv disk full, cmd diskFullOpt={:?}, leader diskUsage={:?}", + disk_full_opt, ctx.self_disk_usage + ); + Err(Error::DiskFull(disk_full_stores, errmsg)) + } + + pub fn clear_disk_full_peers(&mut self, ctx: &StoreContext) { + let disk_full_peers = mem::take(self.abnormal_peer_context_mut().disk_full_peers_mut()); + let raft = &mut self.raft_group_mut().raft; + for peer in disk_full_peers.peers().iter() { + raft.adjust_max_inflight_msgs(*peer.0, ctx.cfg.raft_max_inflight_msgs); + } + } + + pub fn refill_disk_full_peers(&mut self, ctx: &StoreContext) { + self.clear_disk_full_peers(ctx); + debug!( + self.logger, + "region id {}, peer id {}, store id {}: refill disk full peers when peer disk usage status changed or merge triggered", + self.region().get_id(), + self.peer_id(), + ctx.store_id, + ); + + // Collect disk full peers and all peers' `next_idx` to find a potential quorum. + let peers_len = self.region().get_peers().len(); + let mut normal_peers = HashSet::default(); + let mut next_idxs = Vec::with_capacity(peers_len); + let mut min_peer_index = u64::MAX; + for peer in self.region().get_peers() { + let (peer_id, store_id) = (peer.get_id(), peer.get_store_id()); + let usage = ctx.store_disk_usages.get(&store_id); + if usage.is_none() { + // Always treat the leader itself as normal. + normal_peers.insert(peer_id); + } + if let Some(pr) = self.raft_group().raft.prs().get(peer_id) { + // status 3-normal, 2-almostfull, 1-alreadyfull, only for simplying the sort + // func belowing. + let mut status = 3; + if let Some(usg) = usage { + status = match usg { + DiskUsage::Normal => 3, + DiskUsage::AlmostFull => 2, + DiskUsage::AlreadyFull => 1, + }; + } + + if !self.abnormal_peer_context().down_peers().contains(&peer_id) { + next_idxs.push((peer_id, pr.next_idx, usage, status)); + if min_peer_index > pr.next_idx { + min_peer_index = pr.next_idx; + } + } + } + } + if self.has_region_merge_proposal { + debug!( + self.logger, + "region id {}, peer id {}, store id {} has a merge request, with region_merge_proposal_index {}", + self.region_id(), + self.peer_id(), + ctx.store_id, + self.region_merge_proposal_index + ); + if min_peer_index > self.region_merge_proposal_index { + self.has_region_merge_proposal = false; + } + } + + if normal_peers.len() == peers_len { + return; + } + + // Reverse sort peers based on `next_idx`, `usage` and `store healthy status`, + // then try to get a potential quorum. + next_idxs.sort_by(|x, y| { + if x.3 == y.3 { + y.1.cmp(&x.1) + } else { + y.3.cmp(&x.3) + } + }); + + let majority = !self.raft_group().raft.prs().has_quorum(&normal_peers); + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .set_majority(majority); + // Here set all peers can be sent when merging. + for &(peer, _, usage, ..) in &next_idxs { + if let Some(usage) = usage { + if self.has_region_merge_proposal && !matches!(*usage, DiskUsage::AlreadyFull) { + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut() + .insert(peer, (*usage, true)); + self.raft_group_mut() + .raft + .adjust_max_inflight_msgs(peer, ctx.cfg.raft_max_inflight_msgs); + debug!( + self.logger, + "refill disk full peer max inflight to {} on a merging region: region id {}, peer id {}", + ctx.cfg.raft_max_inflight_msgs, + self.region_id(), + peer + ); + } else { + self.abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut() + .insert(peer, (*usage, false)); + self.raft_group_mut().raft.adjust_max_inflight_msgs(peer, 0); + debug!( + self.logger, + "refill disk full peer max inflight to {} on region without merging: region id {}, peer id {}", + 0, + self.region_id(), + peer + ); + } + } + } + + if !self.abnormal_peer_context().disk_full_peers().majority() { + // Less than majority peers are in disk full status. + return; + } + + let (mut potential_quorum, mut quorum_ok) = (HashSet::default(), false); + let mut is_dangerous_set = false; + for &(peer_id, _, _, status) in &next_idxs { + potential_quorum.insert(peer_id); + + if status == 1 { + // already full peer. + is_dangerous_set = true; + } + + if self.raft_group().raft.prs().has_quorum(&potential_quorum) { + quorum_ok = true; + break; + } + } + + self.abnormal_peer_context_mut() + .setup_dangerous_majority_set(is_dangerous_set); + + // For the Peer with AlreadFull in potential quorum set, we still need to send + // logs to it. To support incoming configure change. + if quorum_ok { + let has_region_merge_proposal = self.has_region_merge_proposal; + let peers = self + .abnormal_peer_context_mut() + .disk_full_peers_mut() + .peers_mut(); + let mut inflight_peers = vec![]; + for peer in potential_quorum { + if let Some(x) = peers.get_mut(&peer) { + // It can help to establish a quorum. + x.1 = true; + // for merge region, all peers have been set to the max. + if !has_region_merge_proposal { + inflight_peers.push(peer); + } + } + } + debug!( + self.logger, + "refill disk full peer max inflight to 1 in potential quorum set: region id {}", + self.region_id(), + ); + self.adjust_peers_max_inflight_msgs(&inflight_peers, 1); + } + } + /// A peer can be destroyed in four cases: /// /// 1. Received a gc message; diff --git a/components/raftstore-v2/src/operation/pd.rs b/components/raftstore-v2/src/operation/pd.rs index 9bce8f3ba02..8e392755c5e 100644 --- a/components/raftstore-v2/src/operation/pd.rs +++ b/components/raftstore-v2/src/operation/pd.rs @@ -103,7 +103,7 @@ impl Peer { let task = pd::Task::RegionHeartbeat(pd::RegionHeartbeatTask { term: self.term(), region: self.region().clone(), - down_peers: self.collect_down_peers(ctx.cfg.max_peer_down_duration.0), + down_peers: self.collect_down_peers(ctx), peer: self.peer().clone(), pending_peers: self.collect_pending_peers(ctx), written_bytes: self.self_stat().written_bytes, diff --git a/components/raftstore-v2/src/operation/query/lease.rs b/components/raftstore-v2/src/operation/query/lease.rs index 84a8ad09ed3..189986f93d2 100644 --- a/components/raftstore-v2/src/operation/query/lease.rs +++ b/components/raftstore-v2/src/operation/query/lease.rs @@ -168,7 +168,7 @@ impl Peer { header.set_term(self.term()); let empty_data = SimpleWriteEncoder::with_capacity(0).encode(); let (ch, _) = CmdResChannel::pair(); - self.on_simple_write(ctx, header, empty_data, ch); + self.on_simple_write(ctx, header, empty_data, ch, None); } /// response the read index request diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 1ff07f2ccc1..3ceb8693c0b 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -54,6 +54,7 @@ use tikv_util::{ log::SlogFormat, slog_panic, store::find_peer, + sys::disk::DiskUsage, time::{duration_to_sec, monotonic_raw_now, Duration}, }; @@ -265,6 +266,7 @@ impl Peer { "message_type" => %util::MsgType(&msg), "from_peer_id" => msg.get_from_peer().get_id(), "to_peer_id" => msg.get_to_peer().get_id(), + "disk_usage" => ?msg.disk_usage, ); if self.pause_for_replay() && msg.get_message().get_msg_type() == MessageType::MsgAppend { ctx.raft_metrics.message_dropped.recovery.inc(); @@ -287,6 +289,9 @@ impl Peer { return; } } + + self.handle_reported_disk_usage(ctx, &msg); + if msg.get_to_peer().get_store_id() != self.peer().get_store_id() { ctx.raft_metrics.message_dropped.mismatch_store_id.inc(); return; @@ -515,7 +520,11 @@ impl Peer { /// /// If the recipient can't be found, `None` is returned. #[inline] - fn build_raft_message(&mut self, msg: eraftpb::Message) -> Option { + fn build_raft_message( + &mut self, + msg: eraftpb::Message, + disk_usage: DiskUsage, + ) -> Option { let to_peer = match self.peer_from_cache(msg.to) { Some(p) => p, None => { @@ -530,6 +539,8 @@ impl Peer { }; let mut raft_msg = self.prepare_raft_message(); + // Fill in the disk usage. + raft_msg.set_disk_usage(disk_usage); raft_msg.set_to_peer(to_peer); if msg.from != self.peer().id { @@ -772,8 +783,9 @@ impl Peer { if !ready.messages().is_empty() { debug_assert!(self.is_leader()); + let disk_usage = ctx.self_disk_usage; for msg in ready.take_messages() { - if let Some(msg) = self.build_raft_message(msg) { + if let Some(msg) = self.build_raft_message(msg, disk_usage) { self.send_raft_message_on_leader(ctx, msg); } } @@ -802,10 +814,11 @@ impl Peer { self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); if !ready.persisted_messages().is_empty() { + let disk_usage = ctx.self_disk_usage; write_task.messages = ready .take_persisted_messages() .into_iter() - .flat_map(|m| self.build_raft_message(m)) + .flat_map(|m| self.build_raft_message(m, disk_usage)) .collect(); } if self.has_pending_messages() { @@ -1069,6 +1082,16 @@ impl Peer { // Exit entry cache warmup state when the peer becomes leader. self.entry_storage_mut().clear_entry_cache_warmup_state(); + if !ctx.store_disk_usages.is_empty() { + self.refill_disk_full_peers(ctx); + debug!( + self.logger, + "become leader refills disk full peers to {:?}", + self.abnormal_peer_context().disk_full_peers(); + "region_id" => self.region_id(), + ); + } + self.region_heartbeat_pd(ctx); self.add_pending_tick(PeerTick::CompactLog); self.add_pending_tick(PeerTick::SplitRegionCheck); @@ -1209,6 +1232,52 @@ impl Peer { ); } } + + fn handle_reported_disk_usage( + &mut self, + ctx: &mut StoreContext, + msg: &RaftMessage, + ) { + let store_id = msg.get_from_peer().get_store_id(); + let peer_id = msg.get_from_peer().get_id(); + let disk_full_peers = self.abnormal_peer_context().disk_full_peers(); + let refill_disk_usages = if matches!(msg.disk_usage, DiskUsage::Normal) { + ctx.store_disk_usages.remove(&store_id); + if !self.is_leader() { + return; + } + disk_full_peers.has(peer_id) + } else { + ctx.store_disk_usages.insert(store_id, msg.disk_usage); + if !self.is_leader() { + return; + } + + disk_full_peers.is_empty() + || disk_full_peers + .get(peer_id) + .map_or(true, |x| x != msg.disk_usage) + }; + + if refill_disk_usages || self.has_region_merge_proposal { + let prev = disk_full_peers.get(peer_id); + if Some(msg.disk_usage) != prev { + info!( + self.logger, + "reported disk usage changes {:?} -> {:?}", prev, msg.disk_usage; + "region_id" => self.region_id(), + "peer_id" => peer_id, + ); + } + self.refill_disk_full_peers(ctx); + debug!( + self.logger, + "raft message refills disk full peers to {:?}", + self.abnormal_peer_context().disk_full_peers(); + "region_id" => self.region_id(), + ); + } + } } impl Storage { diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 272b2526b39..4c875a675ef 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -9,7 +9,11 @@ use std::sync::{atomic::Ordering, Arc}; use crossbeam::atomic::AtomicCell; use engine_traits::{KvEngine, RaftEngine, CF_LOCK}; -use kvproto::{kvrpcpb::ExtraOp, metapb::Region, raft_cmdpb::RaftRequestHeader}; +use kvproto::{ + kvrpcpb::{DiskFullOpt, ExtraOp}, + metapb::Region, + raft_cmdpb::RaftRequestHeader, +}; use parking_lot::RwLockWriteGuard; use raft::eraftpb; use raftstore::store::{ @@ -266,8 +270,14 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write(header, encoder.encode()).0 else {unreachable!()}; - self.on_simple_write(ctx, write.header, write.data, write.ch); + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), DiskFullOpt::AllowedOnAlmostFull).0 else {unreachable!()}; + self.on_simple_write( + ctx, + write.header, + write.data, + write.ch, + Some(write.disk_full_opt), + ); true } } diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 4ff47c4b4bb..2c8b8cef1db 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -32,6 +32,7 @@ use tikv_util::{slog_panic, time::duration_to_sec}; use super::storage::Storage; use crate::{ + batch::StoreContext, fsm::ApplyScheduler, operation::{ AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, @@ -126,6 +127,10 @@ pub struct Peer { abnormal_peer_context: AbnormalPeerContext, + // region merge logic need to be broadcast to all followers when disk full happens. + pub has_region_merge_proposal: bool, + pub region_merge_proposal_index: u64, + /// Force leader state is only used in online recovery when the majority of /// peers are missing. In this state, it forces one peer to become leader /// out of accordance with Raft election rule, and forbids any @@ -227,6 +232,8 @@ impl Peer { pending_messages: vec![], gc_peer_context: GcPeerContext::default(), abnormal_peer_context: AbnormalPeerContext::default(), + has_region_merge_proposal: false, + region_merge_proposal_index: 0_u64, force_leader_state: None, unsafe_recovery_state: None, }; @@ -600,7 +607,7 @@ impl Peer { ) } - pub fn collect_down_peers(&mut self, max_duration: Duration) -> Vec { + pub fn collect_down_peers(&mut self, ctx: &StoreContext) -> Vec { let mut down_peers = Vec::new(); let mut down_peer_ids = Vec::new(); let now = Instant::now(); @@ -610,7 +617,7 @@ impl Peer { } if let Some(instant) = self.peer_heartbeats.get(&p.get_id()) { let elapsed = now.saturating_duration_since(*instant); - if elapsed >= max_duration { + if elapsed >= ctx.cfg.max_peer_down_duration.0 { let mut stats = pdpb::PeerStats::default(); stats.set_peer(p.clone()); stats.set_down_seconds(elapsed.as_secs()); @@ -619,8 +626,11 @@ impl Peer { } } } + let exist_down_peers = !down_peer_ids.is_empty(); *self.abnormal_peer_context_mut().down_peers_mut() = down_peer_ids; - // TODO: `refill_disk_full_peers` + if exist_down_peers { + self.refill_disk_full_peers(ctx); + } down_peers } @@ -925,6 +935,11 @@ impl Peer { self.last_sent_snapshot_index } + #[inline] + pub fn next_proposal_index(&self) -> u64 { + self.raft_group.raft.raft_log.last_index() + 1 + } + #[inline] pub fn index_term(&self, idx: u64) -> u64 { match self.raft_group.raft.raft_log.term(idx) { diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 16d43970e7a..830286bb142 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -6,6 +6,7 @@ use std::sync::{mpsc::SyncSender, Arc}; use collections::HashSet; use kvproto::{ import_sstpb::SstMeta, + kvrpcpb::DiskFullOpt, metapb, metapb::RegionEpoch, pdpb, @@ -134,6 +135,7 @@ pub struct SimpleWrite { pub header: Box, pub data: SimpleWriteBinary, pub ch: CmdResChannel, + pub disk_full_opt: DiskFullOpt, } #[derive(Debug)] @@ -296,6 +298,14 @@ impl PeerMsg { pub fn simple_write( header: Box, data: SimpleWriteBinary, + ) -> (Self, CmdResSubscriber) { + PeerMsg::simple_write_with_opt(header, data, DiskFullOpt::default()) + } + + pub fn simple_write_with_opt( + header: Box, + data: SimpleWriteBinary, + disk_full_opt: DiskFullOpt, ) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); ( @@ -304,6 +314,7 @@ impl PeerMsg { header, data, ch, + disk_full_opt, }), sub, ) diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index efd149e7c41..0ca99efffc4 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -57,8 +57,8 @@ pub use self::{ }, peer::{ can_amend_read, get_sync_log_from_request, make_transfer_leader_response, - propose_read_index, should_renew_lease, Peer, PeerStat, ProposalContext, ProposalQueue, - RequestInspector, RequestPolicy, TRANSFER_LEADER_COMMAND_REPLY_CTX, + propose_read_index, should_renew_lease, DiskFullPeers, Peer, PeerStat, ProposalContext, + ProposalQueue, RequestInspector, RequestPolicy, TRANSFER_LEADER_COMMAND_REPLY_CTX, }, peer_storage::{ clear_meta, do_snapshot, write_initial_apply_state, write_initial_raft_state, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8ef857bfa12..e9350ba7bb0 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5049,6 +5049,15 @@ impl DiskFullPeers { pub fn majority(&self) -> bool { self.majority } + pub fn set_majority(&mut self, majority: bool) { + self.majority = majority; + } + pub fn peers(&self) -> &HashMap { + &self.peers + } + pub fn peers_mut(&mut self) -> &mut HashMap { + &mut self.peers + } pub fn has(&self, peer_id: u64) -> bool { !self.peers.is_empty() && self.peers.contains_key(&peer_id) } diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 9d61918bd1f..496f8cc87dc 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -37,7 +37,7 @@ use pd_client::PdClient; use raftstore::{ store::{ cmd_resp, initial_region, region_meta::RegionMeta, util::check_key_in_region, Bucket, - BucketRange, Callback, RegionSnapshot, TabletSnapManager, WriteResponse, + BucketRange, Callback, RaftCmdExtraOpts, RegionSnapshot, TabletSnapManager, WriteResponse, INIT_EPOCH_CONF_VER, INIT_EPOCH_VER, }, Error, Result, @@ -283,9 +283,18 @@ pub trait Simulator { } fn async_command_on_node( + &mut self, + node_id: u64, + request: RaftCmdRequest, + ) -> BoxFuture<'static, RaftCmdResponse> { + self.async_command_on_node_with_opts(node_id, request, RaftCmdExtraOpts::default()) + } + + fn async_command_on_node_with_opts( &mut self, node_id: u64, mut request: RaftCmdRequest, + opts: RaftCmdExtraOpts, ) -> BoxFuture<'static, RaftCmdResponse> { let region_id = request.get_header().get_region_id(); @@ -316,7 +325,11 @@ pub trait Simulator { _ => unreachable!(), } } - PeerMsg::simple_write(Box::new(request.take_header()), write_encoder.encode()) + PeerMsg::simple_write_with_opt( + Box::new(request.take_header()), + write_encoder.encode(), + opts.disk_full_opt, + ) }; self.async_peer_msg_on_node(node_id, region_id, msg) @@ -1275,6 +1288,20 @@ impl, EK: KvEngine> Cluster { .async_command_on_node(leader.get_store_id(), req) } + pub fn async_request_with_opts( + &mut self, + mut req: RaftCmdRequest, + opts: RaftCmdExtraOpts, + ) -> Result> { + let region_id = req.get_header().get_region_id(); + let leader = self.leader_of_region(region_id).unwrap(); + req.mut_header().set_peer(leader.clone()); + Ok(self + .sim + .wl() + .async_command_on_node_with_opts(leader.get_store_id(), req, opts)) + } + pub fn async_put( &mut self, key: &[u8], diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index d83dff12e9a..af2bab26183 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -14,16 +14,19 @@ use engine_test::raft::RaftTestEngine; use engine_traits::{CfName, KvEngine, TabletRegistry, CF_DEFAULT}; use file_system::IoRateLimiter; use futures::future::BoxFuture; +use grpcio::{ChannelBuilder, Environment}; use kvproto::{ encryptionpb::EncryptionMethod, - kvrpcpb::Context, + kvrpcpb::{Context, DiskFullOpt, GetResponse, Mutation, PrewriteResponse}, metapb, raft_cmdpb::{CmdType, RaftCmdRequest, RaftCmdResponse}, + tikvpb::TikvClient, }; use raftstore::{store::ReadResponse, Result}; use rand::{prelude::SliceRandom, RngCore}; use server::common::ConfiguredRaftEngine; use tempfile::TempDir; +use test_pd_client::TestPdClient; use test_raftstore::{new_get_cmd, new_put_cf_cmd, new_request, new_snap_cmd, sleep_ms, Config}; use tikv::{ server::KvEngineFactoryBuilder, @@ -479,3 +482,109 @@ pub fn wait_region_epoch_change, EK: KvEngine>( sleep_ms(10); } } + +pub struct PeerClient { + pub cli: TikvClient, + pub ctx: Context, +} + +impl PeerClient { + pub fn new( + cluster: &Cluster, EK>, + region_id: u64, + peer: metapb::Peer, + ) -> PeerClient { + let cli = { + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(peer.get_store_id())); + TikvClient::new(channel) + }; + let ctx = { + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(peer); + ctx.set_region_epoch(epoch); + ctx + }; + PeerClient { cli, ctx } + } + + pub fn kv_read(&self, key: Vec, ts: u64) -> GetResponse { + test_raftstore::kv_read(&self.cli, self.ctx.clone(), key, ts) + } + + pub fn must_kv_read_equal(&self, key: Vec, val: Vec, ts: u64) { + test_raftstore::must_kv_read_equal(&self.cli, self.ctx.clone(), key, val, ts) + } + + pub fn must_kv_write(&self, pd_client: &TestPdClient, kvs: Vec, pk: Vec) -> u64 { + test_raftstore::must_kv_write(pd_client, &self.cli, self.ctx.clone(), kvs, pk) + } + + pub fn must_kv_prewrite(&self, muts: Vec, pk: Vec, ts: u64) { + test_raftstore::must_kv_prewrite(&self.cli, self.ctx.clone(), muts, pk, ts) + } + + pub fn try_kv_prewrite( + &self, + muts: Vec, + pk: Vec, + ts: u64, + opt: DiskFullOpt, + ) -> PrewriteResponse { + let mut ctx = self.ctx.clone(); + ctx.disk_full_opt = opt; + test_raftstore::try_kv_prewrite(&self.cli, ctx, muts, pk, ts) + } + + pub fn must_kv_prewrite_async_commit(&self, muts: Vec, pk: Vec, ts: u64) { + test_raftstore::must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + pk, + ts, + 0, + true, + false, + ) + } + + pub fn must_kv_prewrite_one_pc(&self, muts: Vec, pk: Vec, ts: u64) { + test_raftstore::must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + pk, + ts, + 0, + false, + true, + ) + } + + pub fn must_kv_commit(&self, keys: Vec>, start_ts: u64, commit_ts: u64) { + test_raftstore::must_kv_commit( + &self.cli, + self.ctx.clone(), + keys, + start_ts, + commit_ts, + commit_ts, + ) + } + + pub fn must_kv_rollback(&self, keys: Vec>, start_ts: u64) { + test_raftstore::must_kv_rollback(&self.cli, self.ctx.clone(), keys, start_ts) + } + + pub fn must_kv_pessimistic_lock(&self, key: Vec, ts: u64) { + test_raftstore::must_kv_pessimistic_lock(&self.cli, self.ctx.clone(), key, ts) + } + + pub fn must_kv_pessimistic_rollback(&self, key: Vec, ts: u64) { + test_raftstore::must_kv_pessimistic_rollback(&self.cli, self.ctx.clone(), key, ts, ts) + } +} diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 5183ecd6567..a80cdda392f 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -304,6 +304,7 @@ impl tikv_kv::Engine for RaftKv2 { data, ch, send_time: Instant::now_coarse(), + disk_full_opt: batch.disk_full_opt, }); let res = self .router diff --git a/tests/failpoints/cases/test_disk_full.rs b/tests/failpoints/cases/test_disk_full.rs index 217269bb5b8..d8b3fadb054 100644 --- a/tests/failpoints/cases/test_disk_full.rs +++ b/tests/failpoints/cases/test_disk_full.rs @@ -5,12 +5,12 @@ use std::{thread, time::Duration}; use kvproto::{ disk_usage::DiskUsage, kvrpcpb::{DiskFullOpt, Op}, - metapb::Region, raft_cmdpb::*, }; use raft::eraftpb::MessageType; use raftstore::store::msg::*; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, future::block_on_timeout, time::Instant}; fn assert_disk_full(resp: &RaftCmdResponse) { @@ -34,148 +34,147 @@ fn get_fp(usage: DiskUsage, store_id: u64) -> String { } // check the region new leader is elected. -fn assert_region_leader_changed( - cluster: &mut Cluster, - region_id: u64, - original_leader: u64, -) { - let timer = Instant::now(); - loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("Leader cannot change when the only disk full node is leader"); +macro_rules! assert_region_leader_changed { + ($cluster:expr, $region_id:expr, $original_leader:expr) => {{ + let timer = Instant::now(); + loop { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("Leader cannot change when the only disk full node is leader"); + } + let new_leader = $cluster.query_leader(1, $region_id, Duration::from_secs(1)); + if new_leader.is_none() { + sleep_ms(10); + continue; + } + if new_leader.unwrap().get_id() == $original_leader { + sleep_ms(10); + continue; + } else { + break; + } } - let new_leader = cluster.query_leader(1, region_id, Duration::from_secs(1)); - if new_leader.is_none() { - sleep_ms(10); - continue; - } - if new_leader.unwrap().get_id() == original_leader { - sleep_ms(10); - continue; - } else { - break; - } - } + }}; } -fn ensure_disk_usage_is_reported( - cluster: &mut Cluster, - peer_id: u64, - store_id: u64, - region: &Region, -) { - let peer = new_peer(store_id, peer_id); - let key = region.get_start_key(); - let ch = async_read_on_peer(cluster, peer, region.clone(), key, true, true); - block_on_timeout(ch, Duration::from_secs(1)).unwrap(); +macro_rules! ensure_disk_usage_is_reported { + ($cluster:expr, $peer_id:expr, $store_id:expr, $region:expr) => {{ + let peer = new_peer($store_id, $peer_id); + let key = $region.get_start_key(); + let ch = async_read_on_peer($cluster, peer, $region.clone(), key, true, true); + block_on_timeout(ch, Duration::from_secs(1)).unwrap(); + }}; } -fn test_disk_full_leader_behaviors(usage: DiskUsage) { - let mut cluster = new_node_cluster(0, 3); - cluster.pd_client.disable_default_operator(); - cluster.run(); - - // To ensure all replicas are not pending. - cluster.must_put(b"k1", b"v1"); - must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 1), "return").unwrap(); - - // Test new normal proposals won't be allowed when disk is full. - let old_last_index = cluster.raft_local_state(1, 1).last_index; - let rx = cluster.async_put(b"k2", b"v2").unwrap(); - assert_disk_full(&block_on_timeout(rx, Duration::from_secs(2)).unwrap()); - let new_last_index = cluster.raft_local_state(1, 1).last_index; - assert_eq!(old_last_index, new_last_index); - - assert_region_leader_changed(&mut cluster, 1, 1); - fail::remove(get_fp(usage, 1)); - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 1), "return").unwrap(); - - // merge/split is only allowed on disk almost full. - if usage != DiskUsage::AlreadyFull { - // Test split must be allowed when disk is full. - let region = cluster.get_region(b"k1"); - cluster.must_split(®ion, b"k1"); +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_disk_full_leader_behaviors() { + for usage in [DiskUsage::AlmostFull, DiskUsage::AlreadyFull] { + let mut cluster = new_cluster(0, 3); + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); // set gc duration for v2 + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // To ensure all replicas are not pending. + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 1), "return").unwrap(); + + // Test new normal proposals won't be allowed when disk is full. + let old_last_index = cluster.raft_local_state(1, 1).last_index; + let rx = cluster.async_put(b"k2", b"v2").unwrap(); + assert_disk_full(&block_on_timeout(rx, Duration::from_secs(2)).unwrap()); + let new_last_index = cluster.raft_local_state(1, 1).last_index; + assert_eq!(old_last_index, new_last_index); + + assert_region_leader_changed!(&cluster, 1, 1); + fail::remove(get_fp(usage, 1)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 1), "return").unwrap(); + + // merge/split is only allowed on disk almost full. + if usage != DiskUsage::AlreadyFull { + // Test split must be allowed when disk is full. + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k1"); + } + // Test transfer leader should be allowed. + cluster.must_transfer_leader(1, new_peer(2, 2)); + + // Transfer the leadership back to store 1. + fail::remove(get_fp(usage, 1)); + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 1), "return").unwrap(); + + // Test remove peer should be allowed. + cluster.pd_client.must_remove_peer(1, new_peer(3, 3)); + // Sleep for a while until the disk usage and peer changes have been synced. + thread::sleep(Duration::from_secs(1)); + must_get_none(&cluster.get_engine(3), b"k1"); + + // Test add peer should be allowed. It must be a higher peer-id in v2. + cluster.pd_client.must_add_peer(1, new_peer(3, 4)); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + fail::remove(get_fp(usage, 1)); + // Sleep for a while before next case to make it clear. + thread::sleep(Duration::from_secs(1)); } - // Test transfer leader should be allowed. - cluster.must_transfer_leader(1, new_peer(2, 2)); - - // Transfer the leadership back to store 1. - fail::remove(get_fp(usage, 1)); - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 1), "return").unwrap(); - - // Test remove peer should be allowed. - cluster.pd_client.must_remove_peer(1, new_peer(3, 3)); - must_get_none(&cluster.get_engine(3), b"k1"); - - // Test add peer should be allowed. - cluster.pd_client.must_add_peer(1, new_peer(3, 3)); - must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - - fail::remove(get_fp(usage, 1)); -} - -#[test] -fn test_disk_full_for_region_leader() { - test_disk_full_leader_behaviors(DiskUsage::AlmostFull); - test_disk_full_leader_behaviors(DiskUsage::AlreadyFull); -} - -fn test_disk_full_follower_behaviors(usage: DiskUsage) { - let mut cluster = new_node_cluster(0, 3); - cluster.pd_client.disable_default_operator(); - cluster.run(); - - // To ensure all replicas are not pending. - cluster.must_put(b"k1", b"v1"); - must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); - must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); - - cluster.must_transfer_leader(1, new_peer(1, 1)); - fail::cfg(get_fp(usage, 2), "return").unwrap(); - - // Test followers will reject pre-transfer-leader command. - let epoch = cluster.get_region_epoch(1); - let transfer = new_admin_request(1, &epoch, new_transfer_leader_cmd(new_peer(2, 2))); - cluster - .call_command_on_leader(transfer, Duration::from_secs(3)) - .unwrap(); - assert_eq!(cluster.leader_of_region(1).unwrap(), new_peer(1, 1)); - cluster.must_put(b"k2", b"v2"); - - // Test leader shouldn't append entries to disk full followers. - let old_last_index = cluster.raft_local_state(1, 2).last_index; - cluster.must_put(b"k3", b"v3"); - let new_last_index = cluster.raft_local_state(1, 2).last_index; - assert_eq!(old_last_index, new_last_index); - must_get_none(&cluster.get_engine(2), b"k3"); - - // Test followers will response votes when disk is full. - cluster.add_send_filter(CloneFilterFactory( - RegionPacketFilter::new(1, 1) - .direction(Direction::Send) - .msg_type(MessageType::MsgRequestVoteResponse), - )); - cluster.must_transfer_leader(1, new_peer(3, 3)); - - fail::remove(get_fp(usage, 2)); } -#[test] -fn test_disk_full_for_region_follower() { - test_disk_full_follower_behaviors(DiskUsage::AlmostFull); - test_disk_full_follower_behaviors(DiskUsage::AlreadyFull); +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_disk_full_follower_behaviors() { + for usage in [DiskUsage::AlmostFull, DiskUsage::AlreadyFull] { + let mut cluster = new_cluster(0, 3); + cluster.pd_client.disable_default_operator(); + cluster.run(); + + // To ensure all replicas are not pending. + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + fail::cfg(get_fp(usage, 2), "return").unwrap(); + + // Test followers will reject pre-transfer-leader command. + let epoch = cluster.get_region_epoch(1); + let transfer = new_admin_request(1, &epoch, new_transfer_leader_cmd(new_peer(2, 2))); + cluster + .call_command_on_leader(transfer, Duration::from_secs(3)) + .unwrap(); + assert_eq!(cluster.leader_of_region(1).unwrap(), new_peer(1, 1)); + cluster.must_put(b"k2", b"v2"); + + // Test leader shouldn't append entries to disk full followers. + let old_last_index = cluster.raft_local_state(1, 2).last_index; + cluster.must_put(b"k3", b"v3"); + let new_last_index = cluster.raft_local_state(1, 2).last_index; + assert_eq!(old_last_index, new_last_index); + must_get_none(&cluster.get_engine(2), b"k3"); + + // Test followers will response votes when disk is full. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(1, 1) + .direction(Direction::Send) + .msg_type(MessageType::MsgRequestVoteResponse), + )); + cluster.must_transfer_leader(1, new_peer(3, 3)); + + fail::remove(get_fp(usage, 2)); + } } -fn test_disk_full_txn_behaviors(usage: DiskUsage) { - let mut cluster = new_server_cluster(0, 3); +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] +fn test_disk_full_txn_behaviors() { + let usage = DiskUsage::AlmostFull; + let mut cluster = new_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.run(); @@ -199,7 +198,7 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { DiskFullOpt::NotAllowedOnFull, ); assert!(res.get_region_error().has_disk_full()); - assert_region_leader_changed(&mut cluster, 1, 1); + assert_region_leader_changed!(&cluster, 1, 1); fail::remove(get_fp(usage, 1)); cluster.must_transfer_leader(1, new_peer(1, 1)); @@ -269,16 +268,13 @@ fn test_disk_full_txn_behaviors(usage: DiskUsage) { fail::remove(get_fp(usage, 1)); } -#[test] -fn test_disk_full_for_txn_operations() { - test_disk_full_txn_behaviors(DiskUsage::AlmostFull); -} - -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_majority_disk_full() { - let mut cluster = new_node_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; + cluster.cfg.raft_store.gc_peer_check_interval = ReadableDuration::millis(500); // set gc duration for v2 cluster.pd_client.disable_default_operator(); cluster.run(); @@ -295,7 +291,7 @@ fn test_majority_disk_full() { // To ensure followers have reported disk usages to the leader. for i in 1..3 { fail::cfg(get_fp(DiskUsage::AlmostFull, i + 1), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); } // Normal proposals will be rejected because of majority peers' disk full. @@ -319,14 +315,14 @@ fn test_majority_disk_full() { // new disk usages are reported. for i in 1..3 { fail::remove(get_fp(DiskUsage::AlmostFull, i + 1)); - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); } // To ensure followers have reported disk usages to the leader. for i in 1..3 { fail::cfg(get_fp(DiskUsage::AlreadyFull, i + 1), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); } // Proposals with special `DiskFullOpt`s will still be rejected if majority @@ -342,10 +338,12 @@ fn test_majority_disk_full() { // Peer 2 disk usage changes from already full to almost full. fail::remove(get_fp(DiskUsage::AlreadyFull, 2)); fail::cfg(get_fp(DiskUsage::AlmostFull, 2), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, 2, 2, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, 2, 2, ®ion); - // Configuration change should be alloed. + // Configuration change should be allowed. cluster.pd_client.must_remove_peer(1, new_peer(2, 2)); + // Sleep for a while until the disk usage and peer changes have been synced. + thread::sleep(Duration::from_secs(1)); // After the last configuration change is applied, the raft group will be like // `[(1, DiskUsage::AlmostFull), (3, DiskUsage::AlreadyFull)]`. So no more @@ -364,9 +362,10 @@ fn test_majority_disk_full() { } } -#[test] +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_disk_full_followers_with_hibernate_regions() { - let mut cluster = new_node_cluster(0, 2); + let mut cluster = new_cluster(0, 2); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.pd_client.disable_default_operator(); @@ -391,31 +390,13 @@ fn test_disk_full_followers_with_hibernate_regions() { must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); } -// check the region new leader is elected. -fn assert_region_merged( - cluster: &mut Cluster, - left_region_key: &[u8], - right_region_key: &[u8], -) { - let timer = Instant::now(); - loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("region merge failed"); - } - let region_left = cluster.get_region(left_region_key); - let region_right = cluster.get_region(right_region_key); - if region_left.get_id() != region_right.get_id() { - sleep_ms(10); - continue; - } else { - break; - } - } -} - -#[test] +// #[test_case(test_raftstore_v2::new_server_cluster)] +// FIXME: #[test_case(test_raftstore_v2::new_server_cluster)] +// In v2 `must_try_merge` always return error. Also the last `must_merge` +// sometimes cannot get an updated min_matched. +#[test_case(test_raftstore::new_server_cluster)] fn test_merge_on_majority_disk_full() { - let mut cluster = new_server_cluster(0, 3); + let mut cluster = new_cluster(0, 3); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.pd_client.disable_default_operator(); @@ -448,23 +429,42 @@ fn test_merge_on_majority_disk_full() { fail::cfg(get_fp(DiskUsage::AlmostFull, i), "return").unwrap(); } for peer in region1.get_peers().iter() { - ensure_disk_usage_is_reported(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion1); + ensure_disk_usage_is_reported!(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion1); } for peer in region2.get_peers().iter() { - ensure_disk_usage_is_reported(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion2); + ensure_disk_usage_is_reported!(&mut cluster, peer.get_id(), peer.get_store_id(), ®ion2); } cluster.must_try_merge(region1.get_id(), region2.get_id()); - assert_region_merged(&mut cluster, b"k1", b"k3"); + + // check the region new leader is elected. + let assert_region_merged = |left_region_key: &[u8], right_region_key: &[u8]| { + let timer = Instant::now(); + loop { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("region merge failed"); + } + let region_left = cluster.get_region(left_region_key); + let region_right = cluster.get_region(right_region_key); + if region_left.get_id() != region_right.get_id() { + sleep_ms(10); + continue; + } else { + break; + } + } + }; + assert_region_merged(b"k1", b"k3"); for i in 1..3 { fail::remove(get_fp(DiskUsage::AlmostFull, i)); } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_almost_and_already_full_behavior() { - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.pd_client.disable_default_operator(); @@ -481,7 +481,7 @@ fn test_almost_and_already_full_behavior() { fail::cfg(get_fp(DiskUsage::AlreadyFull, i), "return").unwrap(); } for i in 1..5 { - ensure_disk_usage_is_reported(&mut cluster, i + 1, i + 1, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i + 1, i + 1, ®ion); } let lead_client = PeerClient::new(&cluster, 1, new_peer(1, 1)); @@ -521,29 +521,10 @@ fn test_almost_and_already_full_behavior() { } } -fn wait_down_peers_reported( - cluster: &Cluster, - total_down_count: u64, - target_report_peer: u64, -) { - let mut peers = cluster.get_down_peers(); - let timer = Instant::now(); - loop { - if timer.saturating_elapsed() > Duration::from_secs(5) { - panic!("Leader cannot change when the only disk full node is leader"); - } - - if peers.len() == total_down_count as usize && peers.contains_key(&target_report_peer) { - return; - } - sleep_ms(10); - peers = cluster.get_down_peers(); - } -} - -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_down_node_when_disk_full() { - let mut cluster = new_server_cluster(0, 5); + let mut cluster = new_cluster(0, 5); // To ensure the thread has full store disk usage infomation. cluster.cfg.raft_store.store_batch_system.pool_size = 1; cluster.cfg.raft_store.max_peer_down_duration = ReadableDuration::secs(1); @@ -555,7 +536,7 @@ fn test_down_node_when_disk_full() { let region = cluster.get_region(b"k1"); for i in 3..6 { fail::cfg(get_fp(DiskUsage::AlmostFull, i), "return").unwrap(); - ensure_disk_usage_is_reported(&mut cluster, i, i, ®ion); + ensure_disk_usage_is_reported!(&mut cluster, i, i, ®ion); } let lead_client = PeerClient::new(&cluster, 1, new_peer(1, 1)); @@ -574,7 +555,23 @@ fn test_down_node_when_disk_full() { ); cluster.stop_node(2); - wait_down_peers_reported(&cluster, 1, 2u64); + + let wait_down_peers_reported = |total_down_count: u64, target_report_peer: u64| { + let mut peers = cluster.get_down_peers(); + let timer = Instant::now(); + loop { + if timer.saturating_elapsed() > Duration::from_secs(5) { + panic!("Leader cannot change when the only disk full node is leader"); + } + + if peers.len() == total_down_count as usize && peers.contains_key(&target_report_peer) { + return; + } + sleep_ms(10); + peers = cluster.get_down_peers(); + } + }; + wait_down_peers_reported(1u64, 2u64); let prewrite_ts = get_tso(&cluster.pd_client); let res = lead_client.try_kv_prewrite( diff --git a/tests/integrations/raftstore/test_stale_read.rs b/tests/integrations/raftstore/test_stale_read.rs index 24e13003f7e..5de9bda1f64 100644 --- a/tests/integrations/raftstore/test_stale_read.rs +++ b/tests/integrations/raftstore/test_stale_read.rs @@ -8,7 +8,7 @@ use kvproto::{ metapb::{Peer, Region}, tikvpb_grpc::TikvClient, }; -use test_raftstore::{must_get_equal, new_mutation, new_peer, PeerClient}; +use test_raftstore::{must_get_equal, new_mutation, new_peer}; use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, time::Instant}; From e29d3a989d73f0a1c1534114dc530d3c3200d38d Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 11 Oct 2023 15:09:25 +0800 Subject: [PATCH 0953/1149] raftstore-v2: fix non-deterministic region merge (#15697) close tikv/tikv#15682 This commit addresses the issue where a "region corrupted" error still occurs in certain scenarios despite PR #15625 resolving the problem in the transfer leader scenario. The root cause of the issue is the non-deterministic nature of commit merge and rollback merge, allowing transient errors during propose to trigger the problem again. To fix this issue, the proposed solution ensures that TiKV only initiates rollback merge when either the target region is not found or the epoch has increased. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../operation/command/admin/merge/commit.rs | 106 +++++++++--------- .../tests/failpoints/test_merge.rs | 9 +- components/raftstore/src/store/peer.rs | 2 + tests/failpoints/cases/test_merge.rs | 94 +++++++++++++++- 4 files changed, 153 insertions(+), 58 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index e95a13600fb..b12ba9eaf9d 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -178,6 +178,11 @@ impl Peer { self.region_id() == 2, |_| {} ); + fail::fail_point!( + "ask_target_peer_to_commit_merge_store_1", + store_ctx.store_id == 1, + |_| {} + ); let state = self.applied_merge_state().unwrap(); let target = state.get_target(); let target_id = target.get_id(); @@ -295,7 +300,10 @@ impl Peer { target_id: self.region_id(), }, ); - } else if util::is_epoch_stale(expected_epoch, region.get_region_epoch()) { + return; + } + // current region_epoch > region epoch in commit merge. + if util::is_epoch_stale(expected_epoch, region.get_region_epoch()) { info!( self.logger, "reject commit merge because of stale"; @@ -306,63 +314,51 @@ impl Peer { let _ = store_ctx .router .force_send(source_id, PeerMsg::RejectCommitMerge { index }); - } else if expected_epoch == region.get_region_epoch() { - assert!( - util::is_sibling_regions(source_region, region), - "{}: {:?}, {:?}", - SlogFormat(&self.logger), - source_region, - region - ); - assert!( - region_on_same_stores(source_region, region), - "{:?}, {:?}", - source_region, - region - ); - assert!(!self.storage().has_dirty_data()); - if self.is_leader() && !self.leader_transferring() { - let index = commit_of_merge(req.get_admin_request().get_commit_merge()); - if self.proposal_control().is_merging() { - // `on_admin_command` may delay our request indefinitely. It's better to check - // directly. - info!( - self.logger, - "reject commit merge because of target is merging with another region"; - ); - } else { - let (ch, res) = CmdResChannel::pair(); - self.on_admin_command(store_ctx, req, ch); - if let Some(res) = res.take_result() - && res.get_header().has_error() - { - error!( - self.logger, - "failed to propose commit merge"; - "source" => source_id, - "res" => ?res, - ); - } else { - fail::fail_point!("on_propose_commit_merge_success"); - return; - } - } - let _ = store_ctx - .router - .force_send(source_id, PeerMsg::RejectCommitMerge { index }); - } else if self.leader_transferring() { - info!( - self.logger, - "not to propose commit merge when transferring leader"; - "transferee" => self.leader_transferee(), - ); - } - } else { + return; + } + // current region_epoch < region epoch in commit merge. + if util::is_epoch_stale(region.get_region_epoch(), expected_epoch) { info!( self.logger, - "ignore commit merge because self epoch is stale"; + "target region still not catch up, skip."; "source" => ?source_region, + "target_region_epoch" => ?expected_epoch, + "exist_region_epoch" => ?self.region().get_region_epoch(), ); + return; + } + assert!( + util::is_sibling_regions(source_region, region), + "{}: {:?}, {:?}", + SlogFormat(&self.logger), + source_region, + region + ); + assert!( + region_on_same_stores(source_region, region), + "{:?}, {:?}", + source_region, + region + ); + assert!(!self.storage().has_dirty_data()); + let (ch, res) = CmdResChannel::pair(); + self.on_admin_command(store_ctx, req, ch); + if let Some(res) = res.take_result() + && res.get_header().has_error() + { + error!( + self.logger, + "failed to propose commit merge"; + "source" => source_id, + "res" => ?res, + ); + fail::fail_point!( + "on_propose_commit_merge_fail_store_1", + store_ctx.store_id == 1, + |_| {} + ); + } else { + fail::fail_point!("on_propose_commit_merge_success"); } } @@ -691,6 +687,8 @@ impl Peer { info!( self.logger, "become follower for new logs"; + "first_log_term" => first.term, + "first_log_index" => first.index, "new_log_term" => last_log.term, "new_log_index" => last_log.index, "term" => self.term(), diff --git a/components/raftstore-v2/tests/failpoints/test_merge.rs b/components/raftstore-v2/tests/failpoints/test_merge.rs index 890b8c5e27a..11fe666b49b 100644 --- a/components/raftstore-v2/tests/failpoints/test_merge.rs +++ b/components/raftstore-v2/tests/failpoints/test_merge.rs @@ -7,7 +7,7 @@ use std::{ use engine_traits::Peekable; use raftstore_v2::router::{PeerMsg, PeerTick}; -use tikv_util::store::new_peer; +use tikv_util::{config::ReadableDuration, info, store::new_peer}; use crate::cluster::{ life_helper::assert_peer_not_exist, @@ -179,7 +179,9 @@ fn test_rollback() { // Target is merging. #[test] fn test_merge_conflict_0() { - let mut cluster = Cluster::default(); + let mut cluster = Cluster::with_configs(1, None, None, |cfg| { + cfg.merge_check_tick_interval = ReadableDuration::millis(100); + }); let store_id = cluster.node(0).id(); let router = &mut cluster.routers[0]; @@ -216,6 +218,7 @@ fn test_merge_conflict_0() { format!("k{}", region_3_id).as_bytes(), false, ); + info!("regions: {:?}, {:?}, {:?}", region_1, region_2, region_3); // pause merge progress of 2+3. let fp = fail::FailGuard::new("apply_commit_merge", "pause"); @@ -236,9 +239,9 @@ fn test_merge_conflict_0() { .unwrap(); let region_2 = cluster.routers[0].region_detail(region_2.get_id()); merge_region(&cluster, 0, region_1, peer_1, region_2, false); + drop(fp); // wait for rollback. rx.recv_timeout(std::time::Duration::from_secs(1)).unwrap(); - drop(fp); fail::remove("apply_rollback_merge"); // Check region 1 is not merged and can serve writes. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index e9350ba7bb0..85b8798bfb1 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1086,6 +1086,8 @@ where // of term explicitly to get correct metadata. info!( "become follower for new logs"; + "first_log_term" => first.term, + "first_log_index" => first.index, "new_log_term" => last_log.term, "new_log_index" => last_log.index, "term" => self.term(), diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 861e4a658ce..ffbd69dc05e 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -20,7 +20,7 @@ use kvproto::{ use pd_client::PdClient; use raft::eraftpb::MessageType; use raftstore::store::*; -use raftstore_v2::router::PeerMsg; +use raftstore_v2::router::{PeerMsg, PeerTick}; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv::storage::{kv::SnapshotExt, Snapshot}; @@ -1848,6 +1848,98 @@ fn test_concurrent_between_transfer_leader_and_merge() { cluster.must_put(b"k4", b"v4"); } +#[test] +fn test_deterministic_commit_rollback_merge() { + use test_raftstore_v2::*; + let mut cluster = new_node_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + // Use a large election tick to stable test. + configure_for_lease_read(&mut cluster.cfg, None, Some(1000)); + // Use 2 threads for polling peers, so that they can run concurrently. + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.cfg.raft_store.store_batch_system.max_batch_size = Some(1); + cluster.run(); + + let pd_client = Arc::clone(&cluster.pd_client); + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k3").unwrap(); + let right_1 = find_peer(&right, 1).unwrap().clone(); + cluster.must_transfer_leader(right.get_id(), right_1); + let left_2 = find_peer(&left, 2).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_2); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + for i in 0..3 { + must_get_equal(&cluster.get_engine(i + 1), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(i + 1), b"k3", b"v3"); + } + + // Delay 1003 apply by dropping append response, so that proposal will fail + // due to applied_term != current_term. + let target_region_id = left.get_id(); + cluster.add_recv_filter_on_node( + 1, + Box::new(DropMessageFilter::new(Arc::new(move |m| { + if m.get_region_id() == target_region_id { + return m.get_message().get_msg_type() != MessageType::MsgAppendResponse; + } + true + }))), + ); + + let left_1 = find_peer(&left, 1).unwrap().clone(); + cluster.must_transfer_leader(left.get_id(), left_1); + + // left(1000) <- right(1). + let (tx1, rx1) = channel(); + let (tx2, rx2) = channel(); + let tx1 = Mutex::new(tx1); + let rx2 = Mutex::new(rx2); + fail::cfg_callback("on_propose_commit_merge_fail_store_1", move || { + tx1.lock().unwrap().send(()).unwrap(); + rx2.lock().unwrap().recv().unwrap(); + }) + .unwrap(); + cluster.merge_region(right.get_id(), left.get_id(), Callback::None); + + // Wait for target fails to propose commit merge. + rx1.recv_timeout(Duration::from_secs(5)).unwrap(); + // Let target apply continue, and new AskCommitMerge messages will propose + // commit merge successfully. + cluster.clear_recv_filter_on_node(1); + + // Trigger a CheckMerge tick, so source will send a AskCommitMerge again. + fail::cfg("ask_target_peer_to_commit_merge_store_1", "pause").unwrap(); + let router = cluster.get_router(1).unwrap(); + router + .check_send(1, PeerMsg::Tick(PeerTick::CheckMerge)) + .unwrap(); + + // Send RejectCommitMerge to source. + tx2.send(()).unwrap(); + fail::remove("on_propose_commit_merge_fail_store_1"); + + // Wait for target applies to current term. + cluster.must_put(b"k1", b"v11"); + + // By remove the failpoint, CheckMerge tick sends a AskCommitMerge again. + fail::remove("ask_target_peer_to_commit_merge_store_1"); + // At this point, source region will propose rollback merge if commit merge + // is not deterministic. + + // Wait for source handle commit or rollback merge. + wait_region_epoch_change(&cluster, &left, Duration::from_secs(5)); + + // No matter commit merge or rollback merge, cluster must be available to + // process requests + cluster.must_put(b"k0", b"v0"); + cluster.must_put(b"k4", b"v4"); +} + struct MsgVoteFilter {} impl Filter for MsgVoteFilter { From 08a2d654549105104bb701179586256402dbcadd Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 12 Oct 2023 12:10:55 +0800 Subject: [PATCH 0954/1149] coprocessor: do not treat deadline exceeded error as other error (#15709) ref tikv/tikv#15566 Signed-off-by: glorv --- src/coprocessor/dag/mod.rs | 50 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/coprocessor/dag/mod.rs b/src/coprocessor/dag/mod.rs index 31a6df181d5..bd077c5c0ba 100644 --- a/src/coprocessor/dag/mod.rs +++ b/src/coprocessor/dag/mod.rs @@ -143,7 +143,9 @@ fn handle_qe_response( can_be_cached: bool, data_version: Option, ) -> Result { - use tidb_query_common::error::ErrorInner; + use tidb_query_common::error::{ErrorInner, EvaluateError}; + + use crate::coprocessor::Error; match result { Ok((sel_resp, range)) => { @@ -162,6 +164,7 @@ fn handle_qe_response( } Err(err) => match *err.0 { ErrorInner::Storage(err) => Err(err.into()), + ErrorInner::Evaluate(EvaluateError::DeadlineExceeded) => Err(Error::DeadlineExceeded), ErrorInner::Evaluate(err) => { let mut resp = Response::default(); let mut sel_resp = SelectResponse::default(); @@ -179,7 +182,9 @@ fn handle_qe_response( fn handle_qe_stream_response( result: tidb_query_common::Result<(Option<(StreamResponse, IntervalRange)>, bool)>, ) -> Result<(Option, bool)> { - use tidb_query_common::error::ErrorInner; + use tidb_query_common::error::{ErrorInner, EvaluateError}; + + use crate::coprocessor::Error; match result { Ok((Some((s_resp, range)), finished)) => { @@ -192,6 +197,7 @@ fn handle_qe_stream_response( Ok((None, finished)) => Ok((None, finished)), Err(err) => match *err.0 { ErrorInner::Storage(err) => Err(err.into()), + ErrorInner::Evaluate(EvaluateError::DeadlineExceeded) => Err(Error::DeadlineExceeded), ErrorInner::Evaluate(err) => { let mut resp = Response::default(); let mut s_resp = StreamResponse::default(); @@ -203,3 +209,43 @@ fn handle_qe_stream_response( }, } } + +#[cfg(test)] +mod tests { + use anyhow::anyhow; + use protobuf::Message; + use tidb_query_common::error::{Error as CommonError, EvaluateError, StorageError}; + + use super::*; + use crate::coprocessor::Error; + + #[test] + fn test_handle_qe_response() { + // Ok Response + let ok_res = Ok((SelectResponse::default(), None)); + let res = handle_qe_response(ok_res, true, Some(1)).unwrap(); + assert!(res.can_be_cached); + assert_eq!(res.get_cache_last_version(), 1); + let mut select_res = SelectResponse::new(); + Message::merge_from_bytes(&mut select_res, res.get_data()).unwrap(); + assert!(!select_res.has_error()); + + // Storage Error + let storage_err = CommonError::from(StorageError(anyhow!("unknown"))); + let res = handle_qe_response(Err(storage_err), false, None); + assert!(matches!(res, Err(Error::Other(_)))); + + // Evaluate Error + let err = CommonError::from(EvaluateError::DeadlineExceeded); + let res = handle_qe_response(Err(err), false, None); + assert!(matches!(res, Err(Error::DeadlineExceeded))); + + let err = CommonError::from(EvaluateError::InvalidCharacterString { + charset: "test".into(), + }); + let res = handle_qe_response(Err(err), false, None).unwrap(); + let mut select_res = SelectResponse::new(); + Message::merge_from_bytes(&mut select_res, res.get_data()).unwrap(); + assert_eq!(select_res.get_error().get_code(), 1300); + } +} From 2d7616e3f8e3d254bbfa8d82f3980547073d1948 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 12 Oct 2023 12:25:55 +0800 Subject: [PATCH 0955/1149] raftstore-v2: adjust max-background-flushes default value (#15723) ref tikv/tikv#14470 Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/config/mod.rs | 185 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 56 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 0eb006363f0..74f25a22ef6 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -244,22 +244,30 @@ const RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS: BackgroundJobLimits = BackgroundJobL // `defaults` serves as an upper bound for returning limits. fn get_background_job_limits_impl( + engine_type: EngineType, cpu_num: u32, defaults: &BackgroundJobLimits, ) -> BackgroundJobLimits { // At the minimum, we should have two background jobs: one for flush and one for // compaction. Otherwise, the number of background jobs should not exceed // cpu_num - 1. - let max_background_jobs = cmp::max(2, cmp::min(defaults.max_background_jobs, cpu_num - 1)); + let mut max_background_jobs = cmp::max(2, cmp::min(defaults.max_background_jobs, cpu_num - 1)); // Scale flush threads proportionally to cpu cores. Also make sure the number of // flush threads doesn't exceed total jobs. let max_background_flushes = cmp::min( (max_background_jobs + 3) / 4, defaults.max_background_flushes, ); - // Cap max_sub_compactions to allow at least two compactions. - let max_compactions = max_background_jobs - max_background_flushes; + + // set the default compaction threads differently for v1 and v2: + // v1: cap max_sub_compactions to allow at least two compactions. + // v2: decrease the compaction threads to make the qps more stable. + let max_compactions = match engine_type { + EngineType::RaftKv => max_background_jobs - max_background_flushes, + EngineType::RaftKv2 => (max_background_jobs + 7) / 8, + }; let max_sub_compactions: u32 = (max_compactions - 1).clamp(1, defaults.max_sub_compactions); + max_background_jobs = max_background_flushes + max_compactions; // Maximum background GC threads for Titan let max_titan_background_gc = cmp::min(defaults.max_titan_background_gc, cpu_num); @@ -271,9 +279,12 @@ fn get_background_job_limits_impl( } } -fn get_background_job_limits(defaults: &BackgroundJobLimits) -> BackgroundJobLimits { +fn get_background_job_limits( + engine_type: EngineType, + defaults: &BackgroundJobLimits, +) -> BackgroundJobLimits { let cpu_num = cmp::max(SysQuota::cpu_cores_quota() as u32, 1); - get_background_job_limits_impl(cpu_num, defaults) + get_background_job_limits_impl(engine_type, cpu_num, defaults) } macro_rules! cf_config { @@ -1308,19 +1319,14 @@ pub struct DbResources { impl Default for DbConfig { fn default() -> DbConfig { - let bg_job_limits = get_background_job_limits(&KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); - let titan_config = TitanDbConfig { - max_background_gc: bg_job_limits.max_titan_background_gc as i32, - ..Default::default() - }; DbConfig { wal_recovery_mode: DBRecoveryMode::PointInTime, wal_dir: "".to_owned(), wal_ttl_seconds: 0, wal_size_limit: ReadableSize::kb(0), max_total_wal_size: None, - max_background_jobs: bg_job_limits.max_background_jobs as i32, - max_background_flushes: bg_job_limits.max_background_flushes as i32, + max_background_jobs: 0, + max_background_flushes: 0, max_manifest_file_size: ReadableSize::mb(128), create_if_missing: true, max_open_files: 40960, @@ -1339,7 +1345,7 @@ impl Default for DbConfig { rate_limiter_auto_tuned: true, bytes_per_sync: ReadableSize::mb(1), wal_bytes_per_sync: ReadableSize::kb(512), - max_sub_compactions: bg_job_limits.max_sub_compactions, + max_sub_compactions: 0, writable_file_max_buffer_size: ReadableSize::mb(1), use_direct_io_for_flush_and_compaction: false, enable_pipelined_write: false, @@ -1354,7 +1360,7 @@ impl Default for DbConfig { writecf: WriteCfConfig::default(), lockcf: LockCfConfig::default(), raftcf: RaftCfConfig::default(), - titan: titan_config, + titan: TitanDbConfig::default(), } } } @@ -1410,6 +1416,19 @@ impl DbConfig { .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); } } + let bg_job_limits = get_background_job_limits(engine, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); + if self.max_background_jobs == 0 { + self.max_background_jobs = bg_job_limits.max_background_jobs as i32; + } + if self.max_background_flushes == 0 { + self.max_background_flushes = bg_job_limits.max_background_flushes as i32; + } + if self.max_sub_compactions == 0 { + self.max_sub_compactions = bg_job_limits.max_sub_compactions; + } + if self.titan.max_background_gc == 0 { + self.titan.max_background_gc = bg_job_limits.max_titan_background_gc as i32; + } } pub fn build_resources(&self, env: Arc, engine: EngineType) -> DbResources { @@ -1807,7 +1826,9 @@ pub struct RaftDbConfig { impl Default for RaftDbConfig { fn default() -> RaftDbConfig { - let bg_job_limits = get_background_job_limits(&RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS); + // raftdb should only be used for raftkv + let bg_job_limits = + get_background_job_limits(EngineType::RaftKv, &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS); let titan_config = TitanDbConfig { max_background_gc: bg_job_limits.max_titan_background_gc as i32, ..Default::default() @@ -5913,61 +5934,95 @@ mod tests { #[test] fn test_background_job_limits() { - // cpu num = 1 + for engine in [EngineType::RaftKv, EngineType::RaftKv2] { + // cpu num = 1 + assert_eq!( + get_background_job_limits_impl( + engine, + 1, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 1, + } + ); + assert_eq!( + get_background_job_limits_impl( + engine, + 1, // cpu_num + &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 1, + } + ); + // cpu num = 2 + assert_eq!( + get_background_job_limits_impl( + EngineType::RaftKv, + 2, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 2, + } + ); + assert_eq!( + get_background_job_limits_impl( + EngineType::RaftKv, + 2, // cpu_num + &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 2, + max_background_flushes: 1, + max_sub_compactions: 1, + max_titan_background_gc: 2, + } + ); + } + + // cpu num = 4 assert_eq!( get_background_job_limits_impl( - 1, // cpu_num + EngineType::RaftKv, + 4, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 2, - max_background_flushes: 1, - max_sub_compactions: 1, - max_titan_background_gc: 1, - } - ); - assert_eq!( - get_background_job_limits_impl( - 1, // cpu_num - &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS - ), - BackgroundJobLimits { - max_background_jobs: 2, + max_background_jobs: 3, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 1, + max_titan_background_gc: 4, } ); - // cpu num = 2 assert_eq!( get_background_job_limits_impl( - 2, // cpu_num + EngineType::RaftKv2, + 4, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { max_background_jobs: 2, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 2, - } - ); - assert_eq!( - get_background_job_limits_impl( - 2, // cpu_num - &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS - ), - BackgroundJobLimits { - max_background_jobs: 2, - max_background_flushes: 1, - max_sub_compactions: 1, - max_titan_background_gc: 2, + max_titan_background_gc: 4, } ); - // cpu num = 4 assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv, 4, // cpu_num - &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { max_background_jobs: 3, @@ -5976,33 +6031,36 @@ mod tests { max_titan_background_gc: 4, } ); + // cpu num = 8 assert_eq!( get_background_job_limits_impl( - 4, // cpu_num - &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS + EngineType::RaftKv, + 8, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 3, - max_background_flushes: 1, - max_sub_compactions: 1, + max_background_jobs: 7, + max_background_flushes: 2, + max_sub_compactions: 3, max_titan_background_gc: 4, } ); - // cpu num = 8 assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv2, 8, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 7, + max_background_jobs: 3, max_background_flushes: 2, - max_sub_compactions: 3, + max_sub_compactions: 1, max_titan_background_gc: 4, } ); assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv, 8, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), @@ -6011,6 +6069,7 @@ mod tests { // cpu num = 16 assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv, 16, // cpu_num &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), @@ -6018,6 +6077,20 @@ mod tests { ); assert_eq!( get_background_job_limits_impl( + EngineType::RaftKv2, + 16, // cpu_num + &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS + ), + BackgroundJobLimits { + max_background_jobs: 5, + max_background_flushes: 3, + max_sub_compactions: 1, + max_titan_background_gc: 4, + } + ); + assert_eq!( + get_background_job_limits_impl( + EngineType::RaftKv, 16, // cpu_num &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS ), From 272fcd04f645479c4fdc265e3083c250796c60df Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:18:25 +0800 Subject: [PATCH 0956/1149] raftstore-v2: avoid follower forwarding propose msg (#15704) ref tikv/tikv#14390 avoid follower forwarding propose msg Signed-off-by: SpadeA-Tang Co-authored-by: tonyxuqqi --- .../src/operation/command/write/mod.rs | 31 ++---- components/raftstore-v2/src/operation/mod.rs | 4 +- components/raftstore/src/store/fsm/apply.rs | 1 - .../raftstore/src/store/simple_write.rs | 30 +----- tests/failpoints/cases/test_transaction.rs | 101 +++++++++++++++++- 5 files changed, 112 insertions(+), 55 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index 6eacc75c0f1..cc71533a29a 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -12,7 +12,7 @@ use raftstore::{ fsm::{apply, MAX_PROPOSAL_SIZE_RATIO}, metrics::PEER_WRITE_CMD_COUNTER, msg::ErrorCallback, - util::{self, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER}, + util::{self}, }, Error, Result, }; @@ -80,13 +80,10 @@ impl Peer { ch.report_error(resp); return; } - // ProposalControl is reliable only when applied to current term. - let call_proposed_on_success = self.applied_to_current_term(); let mut encoder = SimpleWriteReqEncoder::new( header, data, (ctx.cfg.raft_entry_max_size.0 as f64 * MAX_PROPOSAL_SIZE_RATIO) as usize, - call_proposed_on_success, ); encoder.add_response_channel(ch); self.set_has_ready(); @@ -106,7 +103,6 @@ impl Peer { Box::::default(), data, ctx.cfg.raft_entry_max_size.0 as usize, - false, ) .encode() .0 @@ -118,30 +114,17 @@ impl Peer { pub fn propose_pending_writes(&mut self, ctx: &mut StoreContext) { if let Some(encoder) = self.simple_write_encoder_mut().take() { - let call_proposed_on_success = if encoder.notify_proposed() { - // The request has pass conflict check and called all proposed callbacks. + let header = encoder.header(); + let res = self.validate_command(header, None, &mut ctx.raft_metrics); + let call_proposed_on_success = if matches!(res, Err(Error::EpochNotMatch { .. })) { false } else { - // Epoch may have changed since last check. - let from_epoch = encoder.header().get_region_epoch(); - let res = util::compare_region_epoch( - from_epoch, - self.region(), - NORMAL_REQ_CHECK_CONF_VER, - NORMAL_REQ_CHECK_VER, - true, - ); - if let Err(e) = res { - // TODO: query sibling regions. - ctx.raft_metrics.invalid_proposal.epoch_not_match.inc(); - encoder.encode().1.report_error(cmd_resp::new_error(e)); - return; - } - // Only when it applies to current term, the epoch check can be reliable. self.applied_to_current_term() }; + let (data, chs) = encoder.encode(); - let res = self.propose(ctx, data); + let res = res.and_then(|_| self.propose(ctx, data)); + fail_point!("after_propose_pending_writes"); self.post_propose_command(ctx, res, chs, call_proposed_on_success); diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 8ce592dd753..6d5cba9fff8 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -87,7 +87,7 @@ pub mod test_util { let mut header = Box::::default(); header.set_region_id(region_id); header.set_region_epoch(region_epoch); - let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512); let (bin, _) = req_encoder.encode(); let mut e = Entry::default(); e.set_entry_type(EntryType::EntryNormal); @@ -112,7 +112,7 @@ pub mod test_util { let mut header = Box::::default(); header.set_region_id(region_id); header.set_region_epoch(region_epoch); - let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512, false); + let req_encoder = SimpleWriteReqEncoder::new(header, encoder.encode(), 512); let (bin, _) = req_encoder.encode(); let mut e = Entry::default(); e.set_entry_type(EntryType::EntryNormal); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index c170e5a35f9..038171d9715 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -5745,7 +5745,6 @@ mod tests { self.header.clone(), bin, 1000, - false, ); let (bytes, _) = req_encoder.encode(); self.entry.set_data(bytes.into()); diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index a303a586935..dd461e61867 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -49,7 +49,6 @@ where channels: Vec, size_limit: usize, write_type: WriteType, - notify_proposed: bool, } impl SimpleWriteReqEncoder @@ -57,14 +56,10 @@ where C: ErrorCallback + WriteCallback, { /// Create a request encoder. - /// - /// If `notify_proposed` is true, channels will be called `notify_proposed` - /// when it's appended. pub fn new( header: Box, bin: SimpleWriteBinary, size_limit: usize, - notify_proposed: bool, ) -> SimpleWriteReqEncoder { let mut buf = Vec::with_capacity(256); buf.push(MAGIC_PREFIX); @@ -77,7 +72,6 @@ where channels: vec![], size_limit, write_type: bin.write_type, - notify_proposed, } } @@ -112,18 +106,10 @@ where } #[inline] - pub fn add_response_channel(&mut self, mut ch: C) { - if self.notify_proposed { - ch.notify_proposed(); - } + pub fn add_response_channel(&mut self, ch: C) { self.channels.push(ch); } - #[inline] - pub fn notify_proposed(&self) -> bool { - self.notify_proposed - } - #[inline] pub fn header(&self) -> &RaftRequestHeader { &self.header @@ -558,7 +544,6 @@ mod tests { header.clone(), bin, usize::MAX, - false, ); let mut encoder = SimpleWriteEncoder::with_capacity(512); @@ -570,7 +555,6 @@ mod tests { header.clone(), bin, 0, - false, ); let (bytes, _) = req_encoder.encode(); @@ -619,9 +603,8 @@ mod tests { .collect(); encoder.ingest(exp.clone()); let bin = encoder.encode(); - let req_encoder = SimpleWriteReqEncoder::>::new( - header, bin, 0, false, - ); + let req_encoder = + SimpleWriteReqEncoder::>::new(header, bin, 0); let (bytes, _) = req_encoder.encode(); let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); @@ -683,7 +666,6 @@ mod tests { header.clone(), bin.clone(), 512, - false, ); let mut header2 = Box::::default(); @@ -700,7 +682,6 @@ mod tests { header.clone(), bin2.clone(), 512, - false, ); assert!(!req_encoder2.amend(&header, &bin)); @@ -735,7 +716,6 @@ mod tests { header.clone(), SimpleWriteEncoder::with_capacity(512).encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); assert_eq!( @@ -753,7 +733,6 @@ mod tests { header.clone(), encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) @@ -771,7 +750,6 @@ mod tests { header.clone(), encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) @@ -788,7 +766,6 @@ mod tests { header.clone(), encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) @@ -816,7 +793,6 @@ mod tests { header, encoder.encode(), 512, - false, ); let (bin, _) = req_encoder.encode(); let req = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bin, 0, 0) diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 14f4161c7ae..0b6e6269e95 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -2,6 +2,7 @@ use std::{ sync::{ + atomic::{AtomicBool, Ordering}, mpsc::{channel, sync_channel}, Arc, Mutex, }, @@ -9,13 +10,15 @@ use std::{ time::Duration, }; -use futures::executor::block_on; +use engine_traits::CF_DEFAULT; +use futures::{executor::block_on, StreamExt}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::{ self as pb, AssertionLevel, Context, GetRequest, Op, PessimisticLockRequest, PrewriteRequest, PrewriteRequestPessimisticAction::*, }, + raft_serverpb::RaftMessage, tikvpb::TikvClient, }; use raft::prelude::{ConfChangeType, MessageType}; @@ -45,7 +48,9 @@ use tikv::{ Snapshot, TestEngineBuilder, TestStorageBuilderApiV1, }, }; +use tikv_kv::{Engine, Modify, WriteData, WriteEvent}; use tikv_util::{ + config::ReadableDuration, store::{new_peer, peer::new_incoming_voter}, HandyRwLock, }; @@ -803,3 +808,97 @@ fn test_next_last_change_info_called_when_gc() { assert_eq!(h.join().unwrap().unwrap().as_slice(), b"v"); } + +fn must_put(ctx: &Context, engine: &E, key: &[u8], value: &[u8]) { + engine.put(ctx, Key::from_raw(key), value.to_vec()).unwrap(); +} + +fn must_delete(ctx: &Context, engine: &E, key: &[u8]) { + engine.delete(ctx, Key::from_raw(key)).unwrap(); +} + +// Before the fix, a proposal can be proposed twice, which is caused by that +// write proposal validation and propose are not atomic. So a raft message with +// higher term between them can make the proposal goes to msg proposal +// forwarding logic. However, raft proposal forawrd logic is not compatible with +// the raft store, as the failed proposal makes client retry. The retried +// proposal coupled with forward proposal makes the propsal applied twice. +#[test] +fn test_forbid_forward_propose() { + use test_raftstore_v2::*; + let count = 3; + let mut cluster = new_server_cluster(0, count); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.store_batch_system.pool_size = 2; + cluster.run(); + + let region = cluster.get_region(b""); + let peer1 = new_peer(1, 1); + let peer2 = new_peer(2, 2); + cluster.must_transfer_leader(region.id, peer2.clone()); + let storage = cluster.sim.rl().storages[&1].clone(); + let storage2 = cluster.sim.rl().storages[&2].clone(); + + let p = Arc::new(AtomicBool::new(false)); + let p2 = p.clone(); + let (tx, rx) = channel(); + let tx = Mutex::new(tx); + cluster.add_recv_filter_on_node( + 2, + Box::new(DropMessageFilter::new(Arc::new(move |_| { + if p2.load(Ordering::Relaxed) { + tx.lock().unwrap().send(()).unwrap(); + // One msg is enough + p2.store(false, Ordering::Relaxed); + true + } else { + false + } + }))), + ); + + let k = Key::from_raw(b"k"); + let mut ctx = Context::default(); + ctx.set_region_id(region.get_id()); + ctx.set_region_epoch(region.get_region_epoch().clone()); + ctx.set_peer(peer2); + + // block node when collecting message to make async write proposal and a raft + // message with higher term occured in a single batch. + fail::cfg("on_peer_collect_message_2", "pause").unwrap(); + let mut res = storage2.async_write( + &ctx, + WriteData::from_modifies(vec![Modify::Put(CF_DEFAULT, k.clone(), b"val".to_vec())]), + WriteEvent::EVENT_PROPOSED, + None, + ); + + // Make node 1 become leader + let router = cluster.get_router(1).unwrap(); + let mut raft_msg = RaftMessage::default(); + raft_msg.set_region_id(1); + raft_msg.set_to_peer(peer1.clone()); + raft_msg.set_region_epoch(region.get_region_epoch().clone()); + raft_msg + .mut_message() + .set_msg_type(MessageType::MsgTimeoutNow); + router.send_raft_message(Box::new(raft_msg)).unwrap(); + + std::thread::sleep(Duration::from_secs(1)); + + ctx.set_peer(peer1); + must_put(&ctx, &storage, b"k", b"val"); + must_delete(&ctx, &storage, b"k"); + + p.store(true, Ordering::Release); + rx.recv().unwrap(); + // Ensure the msg is sent by router. + std::thread::sleep(Duration::from_millis(100)); + fail::remove("on_peer_collect_message_2"); + + let r = block_on(async { res.next().await }).unwrap(); + assert!(matches!(r, WriteEvent::Finished(Err { .. }))); + + std::thread::sleep(Duration::from_secs(1)); + assert_eq!(cluster.get(k.as_encoded()), None); +} From b3ffab6d4e4fc3278eec51df88b8571724ba12c5 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 12 Oct 2023 19:34:56 +0800 Subject: [PATCH 0957/1149] [Dynamic Regions] Supplement extra uts for test_storage.rs. (#15750) ref tikv/tikv#15409 This pr contains several necessary uts and some enhancements for `raftstore-v2`: - Supply extra test cases, including integration tests and unit tests for raftstore-v2 on `storage`. - Transplant the necessary options on setting `deadline` for `SimpleWrite` in raftstore-v2. Signed-off-by: lucasliang --- components/raftstore-v2/src/fsm/peer.rs | 2 +- .../src/operation/command/write/mod.rs | 24 ++++-- .../raftstore-v2/src/operation/txn_ext.rs | 9 ++- components/raftstore-v2/src/router/message.rs | 11 ++- components/test_raftstore-v2/src/cluster.rs | 11 ++- src/server/raftkv2/mod.rs | 50 +++++++++---- tests/failpoints/cases/test_storage.rs | 73 +++++++++++-------- 7 files changed, 117 insertions(+), 63 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 6896f8caa5e..1734b46b25a 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -267,7 +267,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, write.header, write.data, write.ch, - Some(write.disk_full_opt), + Some(write.extra_opts), ); } PeerMsg::UnsafeWrite(write) => { diff --git a/components/raftstore-v2/src/operation/command/write/mod.rs b/components/raftstore-v2/src/operation/command/write/mod.rs index cc71533a29a..5806614e192 100644 --- a/components/raftstore-v2/src/operation/command/write/mod.rs +++ b/components/raftstore-v2/src/operation/command/write/mod.rs @@ -5,7 +5,7 @@ use engine_traits::{ }; use fail::fail_point; use futures::channel::oneshot; -use kvproto::{kvrpcpb::DiskFullOpt, raft_cmdpb::RaftRequestHeader}; +use kvproto::raft_cmdpb::RaftRequestHeader; use raftstore::{ store::{ cmd_resp, @@ -13,6 +13,7 @@ use raftstore::{ metrics::PEER_WRITE_CMD_COUNTER, msg::ErrorCallback, util::{self}, + RaftCmdExtraOpts, }, Error, Result, }; @@ -42,7 +43,7 @@ impl Peer { header: Box, data: SimpleWriteBinary, ch: CmdResChannel, - disk_full_opt: Option, + extra_opts: Option, ) { if !self.serving() { apply::notify_req_region_removed(self.region_id(), ch); @@ -60,12 +61,19 @@ impl Peer { ch.report_error(resp); return; } - // Check whether the write request can be proposed with the given disk full - // option. - if let Some(opt) = disk_full_opt && let Err(e) = self.check_proposal_with_disk_full_opt(ctx, opt) { - let resp = cmd_resp::new_error(e); - ch.report_error(resp); - return; + if let Some(opts) = extra_opts { + if let Some(Err(e)) = opts.deadline.map(|deadline| deadline.check()) { + let resp = cmd_resp::new_error(e.into()); + ch.report_error(resp); + return; + } + // Check whether the write request can be proposed with the given disk full + // option. + if let Err(e) = self.check_proposal_with_disk_full_opt(ctx, opts.disk_full_opt) { + let resp = cmd_resp::new_error(e); + ch.report_error(resp); + return; + } } // To maintain propose order, we need to make pending proposal first. self.propose_pending_writes(ctx); diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 4c875a675ef..6a379b9a1a2 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -17,7 +17,7 @@ use kvproto::{ use parking_lot::RwLockWriteGuard; use raft::eraftpb; use raftstore::store::{ - LocksStatus, PeerPessimisticLocks, TxnExt, TRANSFER_LEADER_COMMAND_REPLY_CTX, + LocksStatus, PeerPessimisticLocks, RaftCmdExtraOpts, TxnExt, TRANSFER_LEADER_COMMAND_REPLY_CTX, }; use slog::{error, info, Logger}; @@ -270,13 +270,16 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), DiskFullOpt::AllowedOnAlmostFull).0 else {unreachable!()}; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), RaftCmdExtraOpts { + disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, + ..Default::default() + }).0 else {unreachable!()}; self.on_simple_write( ctx, write.header, write.data, write.ch, - Some(write.disk_full_opt), + Some(write.extra_opts), ); true } diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 830286bb142..c9da5241fa8 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -6,7 +6,6 @@ use std::sync::{mpsc::SyncSender, Arc}; use collections::HashSet; use kvproto::{ import_sstpb::SstMeta, - kvrpcpb::DiskFullOpt, metapb, metapb::RegionEpoch, pdpb, @@ -15,7 +14,7 @@ use kvproto::{ }; use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, - util::LatencyInspector, FetchedLogs, GenSnapRes, TabletSnapKey, + util::LatencyInspector, FetchedLogs, GenSnapRes, RaftCmdExtraOpts, TabletSnapKey, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, }; @@ -135,7 +134,7 @@ pub struct SimpleWrite { pub header: Box, pub data: SimpleWriteBinary, pub ch: CmdResChannel, - pub disk_full_opt: DiskFullOpt, + pub extra_opts: RaftCmdExtraOpts, } #[derive(Debug)] @@ -299,13 +298,13 @@ impl PeerMsg { header: Box, data: SimpleWriteBinary, ) -> (Self, CmdResSubscriber) { - PeerMsg::simple_write_with_opt(header, data, DiskFullOpt::default()) + PeerMsg::simple_write_with_opt(header, data, RaftCmdExtraOpts::default()) } pub fn simple_write_with_opt( header: Box, data: SimpleWriteBinary, - disk_full_opt: DiskFullOpt, + extra_opts: RaftCmdExtraOpts, ) -> (Self, CmdResSubscriber) { let (ch, sub) = CmdResChannel::pair(); ( @@ -314,7 +313,7 @@ impl PeerMsg { header, data, ch, - disk_full_opt, + extra_opts, }), sub, ) diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 496f8cc87dc..53ff2c0f0b6 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -69,6 +69,9 @@ use tikv_util::{ }; use txn_types::WriteBatchFlags; +// MAX duration waiting for releasing store metas, default: 10s. +const MAX_WAIT_RELEASE_INTERVAL: u32 = 1000; + // We simulate 3 or 5 nodes, each has a store. // Sometimes, we use fixed id to test, which means the id // isn't allocated by pd, and node id, store id are same. @@ -328,7 +331,7 @@ pub trait Simulator { PeerMsg::simple_write_with_opt( Box::new(request.take_header()), write_encoder.encode(), - opts.disk_full_opt, + opts, ) }; @@ -1874,15 +1877,17 @@ impl, EK: KvEngine> Cluster { } self.leaders.clear(); for store_meta in self.store_metas.values() { - while Arc::strong_count(store_meta) != 1 { + // Limits the loop count of checking. + let mut idx = 0; + while Arc::strong_count(store_meta) != 1 && idx < MAX_WAIT_RELEASE_INTERVAL { std::thread::sleep(Duration::from_millis(10)); + idx += 1; } } self.store_metas.clear(); for sst_worker in self.sst_workers.drain(..) { sst_worker.stop_worker(); } - debug!("all nodes are shut down."); } diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index a80cdda392f..a9f7eb7586e 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -19,7 +19,13 @@ use kvproto::{ }; pub use node::NodeV2; pub use raft_extension::Extension; -use raftstore::store::{util::encode_start_ts_into_flag_data, RegionSnapshot}; +use raftstore::{ + store::{ + cmd_resp, msg::ErrorCallback, util::encode_start_ts_into_flag_data, RaftCmdExtraOpts, + RegionSnapshot, + }, + Error, +}; use raftstore_v2::{ router::{ message::SimpleWrite, CmdResChannelBuilder, CmdResEvent, CmdResStream, PeerMsg, RaftRouter, @@ -265,6 +271,17 @@ impl tikv_kv::Engine for RaftKv2 { let region_id = ctx.region_id; ASYNC_REQUESTS_COUNTER_VEC.write.all.inc(); + + let inject_region_not_found = (|| { + // If rid is some, only the specified region reports error. + // If rid is None, all regions report error. + fail_point!("raftkv_early_error_report", |rid| -> bool { + rid.and_then(|rid| rid.parse().ok()) + .map_or(true, |rid: u64| rid == region_id) + }); + false + })(); + let begin_instant = Instant::now_coarse(); let mut header = Box::new(new_request_header(ctx)); let mut flags = 0; @@ -299,18 +316,25 @@ impl tikv_kv::Engine for RaftKv2 { }); } let (ch, sub) = builder.build(); - let msg = PeerMsg::SimpleWrite(SimpleWrite { - header, - data, - ch, - send_time: Instant::now_coarse(), - disk_full_opt: batch.disk_full_opt, - }); - let res = self - .router - .store_router() - .check_send(region_id, msg) - .map_err(tikv_kv::Error::from); + let res = if inject_region_not_found { + ch.report_error(cmd_resp::new_error(Error::RegionNotFound(region_id))); + Err(tikv_kv::Error::from(Error::RegionNotFound(region_id))) + } else { + let msg = PeerMsg::SimpleWrite(SimpleWrite { + header, + data, + ch, + send_time: Instant::now_coarse(), + extra_opts: RaftCmdExtraOpts { + deadline: batch.deadline, + disk_full_opt: batch.disk_full_opt, + }, + }); + self.router + .store_router() + .check_send(region_id, msg) + .map_err(tikv_kv::Error::from) + }; (Transform { resp: CmdResStream::new(sub), early_err: res.err(), diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 57047bef9d4..533d8d0abd4 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -24,6 +24,7 @@ use kvproto::{ }; use resource_control::ResourceGroupManager; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv::{ config::{ConfigController, Module}, storage::{ @@ -44,10 +45,11 @@ use tikv::{ use tikv_util::{future::paired_future_callback, worker::dummy_scheduler, HandyRwLock}; use txn_types::{Key, Mutation, TimeStamp}; -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_scheduler_leader_change_twice() { let snapshot_fp = "scheduler_async_snapshot_finish"; - let mut cluster = new_server_cluster(0, 2); + let mut cluster = new_cluster(0, 2); cluster.run(); let region0 = cluster.get_region(b""); let peers = region0.get_peers(); @@ -108,10 +110,11 @@ fn test_scheduler_leader_change_twice() { } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_server_catching_api_error() { let raftkv_fp = "raftkv_early_error_report"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); @@ -168,10 +171,11 @@ fn test_server_catching_api_error() { must_get_equal(&cluster.get_engine(1), b"k3", b"v3"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_raftkv_early_error_report() { let raftkv_fp = "raftkv_early_error_report"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); cluster.must_split(&cluster.get_region(b"k0"), b"k1"); @@ -233,10 +237,12 @@ fn test_raftkv_early_error_report() { fail::remove(raftkv_fp); } -#[test] +// FIXME: #[test_case(test_raftstore_v2::new_server_cluster)] +// Raftstore-v2 not support get the storage engine, returning `None` currently. +#[test_case(test_raftstore::new_server_cluster)] fn test_scale_scheduler_pool() { let snapshot_fp = "scheduler_start_execute"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let origin_pool_size = cluster.cfg.storage.scheduler_worker_pool_size; @@ -332,9 +338,10 @@ fn test_scale_scheduler_pool() { fail::remove(snapshot_fp); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_scheduler_pool_auto_switch_for_resource_ctl() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1090,9 +1097,10 @@ fn test_async_apply_prewrite_impl( } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_async_apply_prewrite() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1149,7 +1157,6 @@ fn test_async_apply_prewrite() { true, true, ); - test_async_apply_prewrite_impl( &storage, ctx.clone(), @@ -1188,9 +1195,10 @@ fn test_async_apply_prewrite() { ); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_async_apply_prewrite_fallback() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1378,9 +1386,10 @@ fn test_async_apply_prewrite_1pc_impl( } } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_async_apply_prewrite_1pc() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1405,9 +1414,10 @@ fn test_async_apply_prewrite_1pc() { test_async_apply_prewrite_1pc_impl(&storage, ctx, b"key", b"value2", 20, true); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_atomic_cas_lock_by_latch() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1493,9 +1503,10 @@ fn test_atomic_cas_lock_by_latch() { assert_eq!(b"v2".to_vec(), ret); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_before_async_write_deadline() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1532,12 +1543,13 @@ fn test_before_async_write_deadline() { )); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_deadline_exceeded_on_get_and_batch_get() { use tikv_util::time::Instant; use tracker::INVALID_TRACKER_TOKEN; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster @@ -1591,9 +1603,10 @@ fn test_deadline_exceeded_on_get_and_batch_get() { fail::remove("after-snapshot"); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_before_propose_deadline() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster.sim.read().unwrap().storages[&1].clone(); @@ -1629,9 +1642,10 @@ fn test_before_propose_deadline() { ); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_resolve_lock_deadline() { - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let engine = cluster.sim.read().unwrap().storages[&1].clone(); @@ -1789,10 +1803,11 @@ fn test_mvcc_concurrent_commit_and_rollback_at_shutdown() { assert_eq!(get_resp.value, v); } -#[test] +#[test_case(test_raftstore::new_server_cluster)] +#[test_case(test_raftstore_v2::new_server_cluster)] fn test_raw_put_deadline() { let deadline_fp = "deadline_check_fail"; - let mut cluster = new_server_cluster(0, 1); + let mut cluster = new_cluster(0, 1); cluster.run(); let region = cluster.get_region(b""); let leader = region.get_peers()[0].clone(); From b4e0bf7bab6ad395b74c0be938119d82ded4cd2a Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 12 Oct 2023 21:16:56 -0500 Subject: [PATCH 0958/1149] raftstore: Introduce failed state for unsafe recovery to fix rollback merge timeout (#15635) close tikv/tikv#15629 Introduce failed state for unsafe recovery to fix rollback merge timeout. To rollback merge, it has to be in force leader state when performing online recovery. Force leader state would exit after executing the plan no matter succeeded or failed. While rollback merge is triggered on check merge tick periodically. So there is a chance that check merge can't always be in the time window of being force leader state. To solve that, let it skip exiting force leader state when failed to demote, so later rollback merge can be triggered. Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/fsm/peer.rs | 7 ++- .../src/operation/unsafe_recovery/demote.rs | 2 + .../operation/unsafe_recovery/force_leader.rs | 15 ++++- .../src/operation/unsafe_recovery/report.rs | 20 ++++-- components/raftstore/src/store/fsm/peer.rs | 63 +++++++++++++------ .../raftstore/src/store/unsafe_recovery.rs | 16 +++-- .../failpoints/cases/test_unsafe_recovery.rs | 44 ++++++++++++- 7 files changed, 132 insertions(+), 35 deletions(-) diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 1734b46b25a..94506a8a19f 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -382,9 +382,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, syncer, failed_stores, ), - PeerMsg::ExitForceLeaderState => { - self.fsm.peer_mut().on_exit_force_leader(self.store_ctx) - } + PeerMsg::ExitForceLeaderState => self + .fsm + .peer_mut() + .on_exit_force_leader(self.store_ctx, false), PeerMsg::ExitForceLeaderStateCampaign => { self.fsm.peer_mut().on_exit_force_leader_campaign() } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index 37962a45452..20a42b9f978 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -75,6 +75,7 @@ impl Peer { "Unsafe recovery, fail to finish demotion"; "err" => ?resp.get_header().get_error(), ); + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::Failed); return; } *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::DemoteFailedVoters { @@ -129,6 +130,7 @@ impl Peer { "Unsafe recovery, fail to exit joint state"; "err" => ?resp.get_header().get_error(), ); + *self.unsafe_recovery_state_mut()= Some(UnsafeRecoveryState::Failed); } } else { error!(self.logger, diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs index ba7e391dbef..e6af0fddb7b 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs @@ -5,7 +5,9 @@ use std::mem; use collections::HashSet; use engine_traits::{KvEngine, RaftEngine}; use raft::{eraftpb::MessageType, StateRole, Storage}; -use raftstore::store::{util::LeaseState, ForceLeaderState, UnsafeRecoveryForceLeaderSyncer}; +use raftstore::store::{ + util::LeaseState, ForceLeaderState, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, +}; use slog::{info, warn}; use tikv_util::time::Instant as TiInstant; @@ -182,11 +184,20 @@ impl Peer { self.set_has_ready(); } - pub fn on_exit_force_leader(&mut self, ctx: &StoreContext) { + // TODO: add exit force leader check tick for raftstore v2 + pub fn on_exit_force_leader(&mut self, ctx: &StoreContext, force: bool) { if !self.has_force_leader() { return; } + if let Some(UnsafeRecoveryState::Failed) = self.unsafe_recovery_state() && !force { + // Skip force leader if the plan failed, so wait for the next retry of plan with force leader state holding + info!( + self.logger, "skip exiting force leader state" + ); + return; + } + info!(self.logger, "exit force leader state"); *self.force_leader_mut() = None; // leader lease shouldn't be renewed in force leader state. diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 7173d00363a..90c8e3db34d 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -44,11 +44,19 @@ impl Peer { self.raft_group().raft.raft_log.committed }; - *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::WaitApply { - target_index, - syncer, - }); - self.unsafe_recovery_maybe_finish_wait_apply(!self.serving()); + if target_index > self.raft_group().raft.raft_log.applied { + info!( + self.logger, + "Unsafe recovery, start wait apply"; + "target_index" => target_index, + "applied" => self.raft_group().raft.raft_log.applied, + ); + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::WaitApply { + target_index, + syncer, + }); + self.unsafe_recovery_maybe_finish_wait_apply(!self.serving()); + } } pub fn unsafe_recovery_maybe_finish_wait_apply(&mut self, force: bool) { @@ -113,7 +121,7 @@ impl Peer { Some(UnsafeRecoveryState::DemoteFailedVoters { .. }) => { self.unsafe_recovery_maybe_finish_demote_failed_voters(ctx) } - Some(UnsafeRecoveryState::Destroy(_)) | None => {} + Some(UnsafeRecoveryState::Destroy(_)) | Some(UnsafeRecoveryState::Failed) | None => {} } } } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index b6d7f8fcfcc..584db92e8be 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -824,6 +824,8 @@ where target_index: self.fsm.peer.raft_group.raft.raft_log.last_index(), demote_after_exit: true, }); + } else { + self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::Failed); } } else { self.unsafe_recovery_demote_failed_voters(syncer, failed_voters); @@ -863,6 +865,8 @@ where target_index: self.fsm.peer.raft_group.raft.raft_log.last_index(), demote_after_exit: false, }); + } else { + self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::Failed); } } else { warn!( @@ -913,13 +917,22 @@ where self.fsm.peer.raft_group.raft.raft_log.committed }; - self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::WaitApply { - target_index, - syncer, - }); - self.fsm - .peer - .unsafe_recovery_maybe_finish_wait_apply(/* force= */ self.fsm.stopped); + if target_index > self.fsm.peer.raft_group.raft.raft_log.applied { + info!( + "Unsafe recovery, start wait apply"; + "region_id" => self.region().get_id(), + "peer_id" => self.fsm.peer_id(), + "target_index" => target_index, + "applied" => self.fsm.peer.raft_group.raft.raft_log.applied, + ); + self.fsm.peer.unsafe_recovery_state = Some(UnsafeRecoveryState::WaitApply { + target_index, + syncer, + }); + self.fsm + .peer + .unsafe_recovery_maybe_finish_wait_apply(/* force= */ self.fsm.stopped); + } } // func be invoked firstly after assigned leader by BR, wait all leader apply to @@ -1466,7 +1479,7 @@ where } => { self.on_enter_pre_force_leader(syncer, failed_stores); } - SignificantMsg::ExitForceLeaderState => self.on_exit_force_leader(), + SignificantMsg::ExitForceLeaderState => self.on_exit_force_leader(false), SignificantMsg::UnsafeRecoveryDemoteFailedVoters { syncer, failed_voters, @@ -1700,10 +1713,19 @@ where self.fsm.has_ready = true; } - fn on_exit_force_leader(&mut self) { + fn on_exit_force_leader(&mut self, force: bool) { if self.fsm.peer.force_leader.is_none() { return; } + if let Some(UnsafeRecoveryState::Failed) = self.fsm.peer.unsafe_recovery_state && !force { + // Skip force leader if the plan failed, so wait for the next retry of plan with force leader state holding + info!( + "skip exiting force leader state"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + ); + return; + } info!( "exit force leader state"; @@ -1712,7 +1734,7 @@ where ); self.fsm.peer.force_leader = None; // make sure it's not hibernated - assert_eq!(self.fsm.hibernate_state.group_state(), GroupState::Ordered); + assert_ne!(self.fsm.hibernate_state.group_state(), GroupState::Idle); // leader lease shouldn't be renewed in force leader state. assert_eq!( self.fsm.peer.leader_lease().inspect(None), @@ -2274,7 +2296,10 @@ where } } // Destroy does not need be processed, the state is cleaned up together with peer. - Some(_) | None => {} + Some(UnsafeRecoveryState::Destroy { .. }) + | Some(UnsafeRecoveryState::Failed) + | Some(UnsafeRecoveryState::WaitInitialize(..)) + | None => {} } } @@ -6360,13 +6385,6 @@ where return; } - if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { - // Clean up the force leader state after a timeout, since the PD recovery - // process may have been aborted for some reasons. - if time.saturating_elapsed() > UNSAFE_RECOVERY_STATE_TIMEOUT { - self.on_exit_force_leader(); - } - } if let Some(state) = &mut self.fsm.peer.unsafe_recovery_state { let unsafe_recovery_state_timeout_failpoint = || -> bool { fail_point!("unsafe_recovery_state_timeout", |_| true); @@ -6379,6 +6397,15 @@ where { info!("timeout, abort unsafe recovery"; "state" => ?state); state.abort(); + self.fsm.peer.unsafe_recovery_state = None; + } + } + + if let Some(ForceLeaderState::ForceLeader { time, .. }) = self.fsm.peer.force_leader { + // Clean up the force leader state after a timeout, since the PD recovery + // process may have been aborted for some reasons. + if time.saturating_elapsed() > UNSAFE_RECOVERY_STATE_TIMEOUT { + self.on_exit_force_leader(true); } } diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index f98fcaea581..28943ae7339 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -241,7 +241,7 @@ pub struct UnsafeRecoveryForceLeaderSyncer(Arc); impl UnsafeRecoveryForceLeaderSyncer { pub fn new(report_id: u64, router: Arc) -> Self { let inner = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, force leader finished."); + info!("Unsafe recovery, force leader finished."; "report_id" => report_id); start_unsafe_recovery_report(router, report_id, false); }))); UnsafeRecoveryForceLeaderSyncer(Arc::new(inner)) @@ -260,11 +260,11 @@ impl UnsafeRecoveryExecutePlanSyncer { let abort = Arc::new(Mutex::new(false)); let abort_clone = abort.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, plan execution finished"); if *abort_clone.lock().unwrap() { - warn!("Unsafe recovery, plan execution aborted"); + warn!("Unsafe recovery, plan execution aborted"; "report_id" => report_id); return; } + info!("Unsafe recovery, plan execution finished"; "report_id" => report_id); start_unsafe_recovery_report(router, report_id, true); }))); UnsafeRecoveryExecutePlanSyncer { @@ -330,7 +330,7 @@ impl UnsafeRecoveryWaitApplySyncer { let abort_clone = abort.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { if *abort_clone.lock().unwrap() { - warn!("Unsafe recovery, wait apply aborted"); + warn!("Unsafe recovery, wait apply aborted"; "report_id" => report_id); return; } info!("Unsafe recovery, wait apply finished"); @@ -363,7 +363,7 @@ impl UnsafeRecoveryFillOutReportSyncer { let reports = Arc::new(Mutex::new(vec![])); let reports_clone = reports.clone(); let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("Unsafe recovery, peer reports collected"); + info!("Unsafe recovery, peer reports collected"; "report_id" => report_id); let mut store_report = StoreReport::default(); { let mut reports_ptr = reports_clone.lock().unwrap(); @@ -420,6 +420,9 @@ pub enum UnsafeRecoveryState { }, Destroy(UnsafeRecoveryExecutePlanSyncer), WaitInitialize(UnsafeRecoveryExecutePlanSyncer), + // DemoteFailedVoter may fail due to some reasons. It's just a marker to avoid exiting force + // leader state + Failed, } impl UnsafeRecoveryState { @@ -429,6 +432,7 @@ impl UnsafeRecoveryState { UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } | UnsafeRecoveryState::Destroy(syncer) | UnsafeRecoveryState::WaitInitialize(syncer) => syncer.time, + UnsafeRecoveryState::Failed => return false, }; time.saturating_elapsed() >= timeout } @@ -439,6 +443,7 @@ impl UnsafeRecoveryState { UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } | UnsafeRecoveryState::Destroy(syncer) | UnsafeRecoveryState::WaitInitialize(syncer) => &syncer.abort, + UnsafeRecoveryState::Failed => return true, }; *abort.lock().unwrap() } @@ -449,6 +454,7 @@ impl UnsafeRecoveryState { UnsafeRecoveryState::DemoteFailedVoters { syncer, .. } | UnsafeRecoveryState::Destroy(syncer) | UnsafeRecoveryState::WaitInitialize(syncer) => syncer.abort(), + UnsafeRecoveryState::Failed => (), } } } diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index 978489b5cd6..9e5a5dffcd9 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -458,7 +458,7 @@ fn test_unsafe_recovery_rollback_merge() { } // Block merge commit, let go of the merge prepare. - fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); + fail::cfg("on_schedule_merge", "return()").unwrap(); let region = pd_client.get_region(b"k1").unwrap(); cluster.must_split(®ion, b"k2"); @@ -521,6 +521,48 @@ fn test_unsafe_recovery_rollback_merge() { pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); cluster.must_send_store_heartbeat(nodes[0]); + // Can't propose demotion as it's in merging mode + let mut store_report = None; + for _ in 0..20 { + store_report = pd_client.must_get_store_report(nodes[0]); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + let has_force_leader = store_report + .unwrap() + .get_peer_reports() + .iter() + .any(|p| p.get_is_force_leader()); + // Force leader is not exited due to demotion failure + assert!(has_force_leader); + + fail::remove("on_schedule_merge"); + fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); + + // Make sure merge check is scheduled, and rollback merge is triggered + sleep_ms(50); + + // Re-triggers the unsafe recovery plan execution. + pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); + cluster.must_send_store_heartbeat(nodes[0]); + let mut store_report = None; + for _ in 0..20 { + store_report = pd_client.must_get_store_report(nodes[0]); + if store_report.is_some() { + break; + } + sleep_ms(100); + } + assert_ne!(store_report, None); + // No force leader + for peer_report in store_report.unwrap().get_peer_reports() { + assert!(!peer_report.get_is_force_leader()); + } + + // Demotion is done let mut demoted = false; for _ in 0..10 { let new_left = block_on(pd_client.get_region_by_id(left.get_id())) From cb27f24b89c8107c9ead57be5016ee779996ac25 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 16 Oct 2023 12:36:27 +0800 Subject: [PATCH 0959/1149] retry leader read when stale read encounters data not ready (#15726) ref tikv/tikv#14553 Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore/src/store/worker/metrics.rs | 20 ++ components/raftstore/src/store/worker/read.rs | 330 +++++++++++++++--- tests/failpoints/cases/test_kv_service.rs | 57 ++- .../cases/test_replica_stale_read.rs | 18 +- 4 files changed, 358 insertions(+), 67 deletions(-) diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index fd3f54d239d..8dca3bcfd44 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -70,6 +70,8 @@ make_static_metric! { pub struct LocalReadMetrics { pub local_executed_requests: LocalIntCounter, pub local_executed_stale_read_requests: LocalIntCounter, + pub local_executed_stale_read_fallback_success_requests: LocalIntCounter, + pub local_executed_stale_read_fallback_failure_requests: LocalIntCounter, pub local_executed_replica_read_requests: LocalIntCounter, pub local_executed_snapshot_cache_hit: LocalIntCounter, pub reject_reason: LocalReadRejectCounter, @@ -82,6 +84,8 @@ thread_local! { LocalReadMetrics { local_executed_requests: LOCAL_READ_EXECUTED_REQUESTS.local(), local_executed_stale_read_requests: LOCAL_READ_EXECUTED_STALE_READ_REQUESTS.local(), + local_executed_stale_read_fallback_success_requests: LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_SUCCESS_REQUESTS.local(), + local_executed_stale_read_fallback_failure_requests: LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_FAILURE_REQUESTS.local(), local_executed_replica_read_requests: LOCAL_READ_EXECUTED_REPLICA_READ_REQUESTS.local(), local_executed_snapshot_cache_hit: LOCAL_READ_EXECUTED_CACHE_REQUESTS.local(), reject_reason: LocalReadRejectCounter::from(&LOCAL_READ_REJECT_VEC), @@ -100,6 +104,10 @@ pub fn maybe_tls_local_read_metrics_flush() { if m.last_flush_time.saturating_elapsed() >= Duration::from_millis(METRICS_FLUSH_INTERVAL) { m.local_executed_requests.flush(); m.local_executed_stale_read_requests.flush(); + m.local_executed_stale_read_fallback_success_requests + .flush(); + m.local_executed_stale_read_fallback_failure_requests + .flush(); m.local_executed_replica_read_requests.flush(); m.local_executed_snapshot_cache_hit.flush(); m.reject_reason.flush(); @@ -189,6 +197,18 @@ lazy_static! { "Total number of stale read requests directly executed by local reader." ) .unwrap(); + pub static ref LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_SUCCESS_REQUESTS: IntCounter = + register_int_counter!( + "tikv_raftstore_local_read_executed_stale_read_fallback_success_requests", + "Total number of stale read requests executed by local leader peer as snapshot read." + ) + .unwrap(); + pub static ref LOCAL_READ_EXECUTED_STALE_READ_FALLBACK_FAILURE_REQUESTS: IntCounter = + register_int_counter!( + "tikv_raftstore_local_read_executed_stale_read_fallback_failure_requests", + "Total number of stale read requests failed to be executed by local leader peer as snapshot read." + ) + .unwrap(); pub static ref LOCAL_READ_EXECUTED_REPLICA_READ_REQUESTS: IntCounter = register_int_counter!( "tikv_raftstore_local_read_executed_replica_read_requests", "Total number of stale read requests directly executed by local reader." diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 5d6ede9c193..2d54c00baa6 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -30,7 +30,7 @@ use tikv_util::{ }; use time::Timespec; use tracker::GLOBAL_TRACKERS; -use txn_types::TimeStamp; +use txn_types::{TimeStamp, WriteBatchFlags}; use super::metrics::*; use crate::{ @@ -974,80 +974,155 @@ where cmd.callback.set_result(read_resp); } + /// Try to handle the read request using local read, if the leader is valid + /// the read response is returned, otherwise None is returned. + fn try_local_leader_read( + &mut self, + req: &RaftCmdRequest, + delegate: &mut CachedReadDelegate, + read_id: Option, + snap_updated: &mut bool, + last_valid_ts: Timespec, + ) -> Option> { + let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, read_id); + + (*snap_updated) = + local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + + let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); + if !delegate.is_in_leader_lease(snapshot_ts) { + return None; + } + + let region = Arc::clone(&delegate.region); + let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + if let Some(snap) = response.snapshot.as_mut() { + snap.bucket_meta = delegate.bucket_meta.clone(); + } + // Try renew lease in advance + delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); + Some(response) + } + + /// Try to handle the stale read request, if the read_ts < safe_ts the read + /// response is returned, otherwise the raft command response with + /// `DataIsNotReady` error is returned. + fn try_local_stale_read( + &mut self, + req: &RaftCmdRequest, + delegate: &mut CachedReadDelegate, + snap_updated: &mut bool, + last_valid_ts: Timespec, + ) -> std::result::Result, RaftCmdResponse> { + let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); + delegate.check_stale_read_safe(read_ts)?; + + // Stale read does not use cache, so we pass None for read_id + let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); + (*snap_updated) = + local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + + let region = Arc::clone(&delegate.region); + // Getting the snapshot + let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + if let Some(snap) = response.snapshot.as_mut() { + snap.bucket_meta = delegate.bucket_meta.clone(); + } + // Double check in case `safe_ts` change after the first check and before + // getting snapshot + delegate.check_stale_read_safe(read_ts)?; + + TLS_LOCAL_READ_METRICS.with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); + Ok(response) + } + pub fn propose_raft_command( &mut self, read_id: Option, - req: RaftCmdRequest, + mut req: RaftCmdRequest, cb: Callback, ) { match self.pre_propose_raft_command(&req) { Ok(Some((mut delegate, policy))) => { - let snap_updated; + let mut snap_updated = false; let last_valid_ts = delegate.last_valid_ts; let mut response = match policy { // Leader can read local if and only if it is in lease. RequestPolicy::ReadLocal => { - let mut local_read_ctx = - LocalReadContext::new(&mut self.snap_cache, read_id); - - snap_updated = local_read_ctx - .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); - - let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); - if !delegate.is_in_leader_lease(snapshot_ts) { + if let Some(read_resp) = self.try_local_leader_read( + &req, + &mut delegate, + read_id, + &mut snap_updated, + last_valid_ts, + ) { + read_resp + } else { fail_point!("localreader_before_redirect", |_| {}); // Forward to raftstore. self.redirect(RaftCommand::new(req, cb)); return; } - - let region = Arc::clone(&delegate.region); - let mut response = - delegate.execute(&req, ®ion, None, Some(local_read_ctx)); - if let Some(snap) = response.snapshot.as_mut() { - snap.bucket_meta = delegate.bucket_meta.clone(); - } - // Try renew lease in advance - delegate.maybe_renew_lease_advance(&self.router, snapshot_ts); - response } // Replica can serve stale read if and only if its `safe_ts` >= `read_ts` RequestPolicy::StaleRead => { - let read_ts = decode_u64(&mut req.get_header().get_flag_data()).unwrap(); - if let Err(resp) = delegate.check_stale_read_safe(read_ts) { - cb.set_result(ReadResponse { - response: resp, - snapshot: None, - txn_extra_op: TxnExtraOp::Noop, - }); - return; + match self.try_local_stale_read( + &req, + &mut delegate, + &mut snap_updated, + last_valid_ts, + ) { + Ok(read_resp) => read_resp, + Err(err_resp) => { + // It's safe to change the header of the `RaftCmdRequest`, as it + // would not affect the `SnapCtx` used in upper layer like. + let unset_stale_flag = req.get_header().get_flags() + & (!WriteBatchFlags::STALE_READ.bits()); + req.mut_header().set_flags(unset_stale_flag); + let mut inspector = Inspector { + delegate: &delegate, + }; + // The read request could be handled using snapshot read if the + // local peer is a valid leader. + let allow_fallback_leader_read = inspector + .inspect(&req) + .map_or(false, |r| r == RequestPolicy::ReadLocal); + if !allow_fallback_leader_read { + cb.set_result(ReadResponse { + response: err_resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }); + return; + } + if let Some(read_resp) = self.try_local_leader_read( + &req, + &mut delegate, + None, + &mut snap_updated, + last_valid_ts, + ) { + TLS_LOCAL_READ_METRICS.with(|m| { + m.borrow_mut() + .local_executed_stale_read_fallback_success_requests + .inc() + }); + read_resp + } else { + TLS_LOCAL_READ_METRICS.with(|m| { + m.borrow_mut() + .local_executed_stale_read_fallback_failure_requests + .inc() + }); + cb.set_result(ReadResponse { + response: err_resp, + snapshot: None, + txn_extra_op: TxnExtraOp::Noop, + }); + return; + } + } } - - // Stale read does not use cache, so we pass None for read_id - let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); - snap_updated = local_read_ctx - .maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); - - let region = Arc::clone(&delegate.region); - // Getting the snapshot - let mut response = - delegate.execute(&req, ®ion, None, Some(local_read_ctx)); - if let Some(snap) = response.snapshot.as_mut() { - snap.bucket_meta = delegate.bucket_meta.clone(); - } - // Double check in case `safe_ts` change after the first check and before - // getting snapshot - if let Err(resp) = delegate.check_stale_read_safe(read_ts) { - cb.set_result(ReadResponse { - response: resp, - snapshot: None, - txn_extra_op: TxnExtraOp::Noop, - }); - return; - } - TLS_LOCAL_READ_METRICS - .with(|m| m.borrow_mut().local_executed_stale_read_requests.inc()); - response } _ => unreachable!(), }; @@ -1598,6 +1673,8 @@ mod tests { read_progress.update_safe_ts(1, 1); assert_eq!(read_progress.safe_ts(), 1); + // Expire lease manually to avoid local retry on leader peer. + lease.expire(); let data = { let mut d = [0u8; 8]; (&mut d[..]).encode_u64(2).unwrap(); @@ -1755,13 +1832,14 @@ mod tests { assert_eq!(kv_engine.path(), tablet.path()); } - fn prepare_read_delegate( + fn prepare_read_delegate_with_lease( store_id: u64, region_id: u64, term: u64, pr_ids: Vec, region_epoch: RegionEpoch, store_meta: Arc>, + max_lease: Duration, ) { let mut region = metapb::Region::default(); region.set_id(region_id); @@ -1770,7 +1848,7 @@ mod tests { let leader = prs[0].clone(); region.set_region_epoch(region_epoch); - let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. + let mut lease = Lease::new(max_lease, Duration::milliseconds(250)); // 1s is long enough. let read_progress = Arc::new(RegionReadProgress::new(®ion, 1, 1, 1)); // Register region @@ -1799,6 +1877,25 @@ mod tests { } } + fn prepare_read_delegate( + store_id: u64, + region_id: u64, + term: u64, + pr_ids: Vec, + region_epoch: RegionEpoch, + store_meta: Arc>, + ) { + prepare_read_delegate_with_lease( + store_id, + region_id, + term, + pr_ids, + region_epoch, + store_meta, + Duration::seconds(1), + ) + } + #[test] fn test_snap_across_regions() { let store_id = 2; @@ -2165,4 +2262,123 @@ mod tests { must_not_redirect(&mut reader, &rx, task); notify_rx.recv().unwrap(); } + + #[test] + fn test_stale_read_local_leader_fallback() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx) = new_reader( + "test-stale-local-leader-fallback", + store_id, + store_meta.clone(), + ); + reader.kv_engine.put(b"key", b"value").unwrap(); + + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let term6 = 6; + + // Register region1. + let pr_ids1 = vec![2, 3, 4]; + let prs1 = new_peers(store_id, pr_ids1.clone()); + // Ensure the leader lease is long enough so the fallback would work. + prepare_read_delegate_with_lease( + store_id, + 1, + term6, + pr_ids1.clone(), + epoch13.clone(), + store_meta.clone(), + Duration::seconds(10), + ); + let leader1 = prs1[0].clone(); + + // Local read. + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader1); + header.set_region_epoch(epoch13.clone()); + header.set_term(term6); + header.set_flags(header.get_flags() | WriteBatchFlags::STALE_READ.bits()); + cmd.set_header(header.clone()); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + // A peer can serve read_ts < safe_ts. + let safe_ts = TimeStamp::compose(2, 0); + { + let mut meta = store_meta.lock().unwrap(); + let delegate = meta.readers.get_mut(&1).unwrap(); + delegate + .read_progress + .update_safe_ts(1, safe_ts.into_inner()); + assert_eq!(delegate.read_progress.safe_ts(), safe_ts.into_inner()); + } + let read_ts_1 = TimeStamp::compose(1, 0); + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(read_ts_1.into_inner()).unwrap(); + header.set_flag_data(data.into()); + cmd.set_header(header.clone()); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task); + snap_rx.recv().unwrap().snapshot.unwrap(); + + // When read_ts > safe_ts, the leader peer could still serve if its lease is + // valid. + let read_ts_2 = TimeStamp::compose(safe_ts.physical() + 201, 0); + let mut data = [0u8; 8]; + (&mut data[..]).encode_u64(read_ts_2.into_inner()).unwrap(); + header.set_flag_data(data.into()); + cmd.set_header(header.clone()); + let (snap_tx, snap_rx) = channel(); + let task = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task); + snap_rx.recv().unwrap().snapshot.unwrap(); + + // The fallback would not happen if the lease is not valid. + prepare_read_delegate_with_lease( + store_id, + 1, + term6, + pr_ids1, + epoch13, + store_meta, + Duration::milliseconds(1), + ); + thread::sleep(std::time::Duration::from_millis(50)); + let (snap_tx, snap_rx) = channel(); + let task2 = RaftCommand::::new( + cmd.clone(), + Callback::read(Box::new(move |resp: ReadResponse| { + snap_tx.send(resp).unwrap(); + })), + ); + must_not_redirect(&mut reader, &rx, task2); + assert!( + snap_rx + .recv() + .unwrap() + .response + .get_header() + .get_error() + .has_data_is_not_ready() + ); + } } diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index f3831bb984b..00f5c3c778e 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -5,9 +5,10 @@ use std::{sync::Arc, time::Duration}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; use test_raftstore::{ - must_kv_prewrite, must_new_cluster_and_kv_client, must_new_cluster_mul, - try_kv_prewrite_with_impl, + configure_for_lease_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, + must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with_impl, }; +use tikv_util::{config::ReadableDuration, HandyRwLock}; #[test] fn test_batch_get_memory_lock() { @@ -103,3 +104,55 @@ fn test_undetermined_write_err() { // The previous panic hasn't been captured. assert!(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| drop(cluster))).is_err()); } +#[test] +fn test_stale_read_on_local_leader() { + let mut cluster = new_server_cluster(0, 1); + // Increase the election tick to make this test case running reliably. + configure_for_lease_read(&mut cluster.cfg, Some(50), Some(10_000)); + let max_lease = Duration::from_secs(2); + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration(max_lease); + cluster.pd_client.disable_default_operator(); + cluster.run(); + + let region_id = 1; + let leader = cluster.leader_of_region(region_id).unwrap(); + let epoch = cluster.get_region_epoch(region_id); + let mut ctx = Context::default(); + ctx.set_region_id(region_id); + ctx.set_peer(leader.clone()); + ctx.set_region_epoch(epoch); + let env = Arc::new(Environment::new(1)); + let channel = + ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let (k, v) = (b"key".to_vec(), b"value".to_vec()); + let v1 = b"value1".to_vec(); + + // Write record. + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v.clone()); + must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 10); + must_kv_commit(&client, ctx.clone(), vec![k.clone()], 10, 30, 30); + + // Prewrite and leave a lock. + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(k.clone()); + mutation.set_value(v1); + must_kv_prewrite(&client, ctx.clone(), vec![mutation], k.clone(), 50); + + let mut req = GetRequest::default(); + req.set_context(ctx); + req.set_key(k); + req.version = 40; + req.mut_context().set_stale_read(true); + + // The stale read should fallback and succeed on the leader peer. + let resp = client.kv_get(&req).unwrap(); + assert!(resp.error.is_none()); + assert!(resp.region_error.is_none()); + assert_eq!(v, resp.get_value()); +} diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index b7d436d92d7..cb986250d82 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -288,9 +288,11 @@ fn test_update_resoved_ts_before_apply_index() { sleep_ms(100); // The leader can't handle stale read with `commit_ts2` because its `safe_ts` - // can't update due to its `apply_index` not update + // can't update due to its `apply_index` not update. + // The request would be handled as a snapshot read on the valid leader peer + // after fallback. let resp = leader_client.kv_read(b"key1".to_vec(), commit_ts2); - assert!(resp.get_region_error().has_data_is_not_ready(),); + assert_eq!(resp.get_value(), b"value2"); // The follower can't handle stale read with `commit_ts2` because it don't // have enough data let resp = follower_client2.kv_read(b"key1".to_vec(), commit_ts2); @@ -667,10 +669,10 @@ fn test_stale_read_future_ts_not_update_max_ts() { b"key1".to_vec(), ); - // Perform stale read with a future ts should return error + // Perform stale read with a future ts, the stale read could be processed + // falling back to snapshot read on the leader peer. let read_ts = get_tso(&pd_client) + 10000000; - let resp = leader_client.kv_read(b"key1".to_vec(), read_ts); - assert!(resp.get_region_error().has_data_is_not_ready()); + leader_client.must_kv_read_equal(b"key1".to_vec(), b"value1".to_vec(), read_ts); // The `max_ts` should not updated by the stale read request, so we can prewrite // and commit `async_commit` transaction with a ts that smaller than the @@ -687,10 +689,10 @@ fn test_stale_read_future_ts_not_update_max_ts() { leader_client.must_kv_commit(vec![b"key2".to_vec()], prewrite_ts, commit_ts); leader_client.must_kv_read_equal(b"key2".to_vec(), b"value1".to_vec(), get_tso(&pd_client)); - // Perform stale read with a future ts should return error + // Perform stale read with a future ts, the stale read could be processed + // falling back to snapshot read on the leader peer. let read_ts = get_tso(&pd_client) + 10000000; - let resp = leader_client.kv_read(b"key1".to_vec(), read_ts); - assert!(resp.get_region_error().has_data_is_not_ready()); + leader_client.must_kv_read_equal(b"key2".to_vec(), b"value1".to_vec(), read_ts); // The `max_ts` should not updated by the stale read request, so 1pc transaction // with a ts that smaller than the `read_ts` should not be fallbacked to 2pc From 9fb1ce63a079cd486f0fc4661ff28abb76d0e734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:18:29 +0800 Subject: [PATCH 0960/1149] snap_restore: Abort last recover region (#15685) close tikv/tikv#15684 This PR will make `recover_region` return `ABORTED` once there are new `recover_region` RPCs in. Signed-off-by: hillium Co-authored-by: qupeng --- Cargo.toml | 2 +- components/raftstore/src/store/transport.rs | 16 +- components/snap_recovery/Cargo.toml | 7 + components/snap_recovery/src/leader_keeper.rs | 16 +- components/snap_recovery/src/services.rs | 147 ++++++++++++++++-- 5 files changed, 165 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4d8cefa9fa4..bd2b4946950 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -334,7 +334,7 @@ resource_metering = { path = "components/resource_metering" } security = { path = "components/security" } server = { path = "components/server" } service = { path = "components/service" } -snap_recovery = { path = "components/snap_recovery" } +snap_recovery = { path = "components/snap_recovery", default-features = false } sst_importer = { path = "components/sst_importer", default-features = false } test_backup = { path = "components/test_backup" } test_coprocessor = { path = "components/test_coprocessor", default-features = false } diff --git a/components/raftstore/src/store/transport.rs b/components/raftstore/src/store/transport.rs index 7f10e7cd249..2ca19fbe5fe 100644 --- a/components/raftstore/src/store/transport.rs +++ b/components/raftstore/src/store/transport.rs @@ -1,7 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. // #[PerformanceCriticalPath] -use std::sync::mpsc; +use std::sync::{mpsc, Mutex}; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, Snapshot}; @@ -46,6 +46,13 @@ where fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()>; } +impl<'a, T: SignificantRouter, EK: KvEngine> SignificantRouter for &'a Mutex { + #[inline] + fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()> { + Mutex::lock(self).unwrap().significant_send(region_id, msg) + } +} + /// Routes proposal to target region. pub trait ProposalRouter where @@ -79,6 +86,13 @@ where } } +impl<'a, EK: KvEngine, T: CasualRouter> CasualRouter for &'a Mutex { + #[inline] + fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { + CasualRouter::send(&*Mutex::lock(self).unwrap(), region_id, msg) + } +} + impl SignificantRouter for RaftRouter where EK: KvEngine, diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index 8b0b0ec4c3a..23cbdcfe098 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -5,6 +5,13 @@ edition = "2021" publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] +test-engine-kv-rocksdb = ["tikv/test-engine-kv-rocksdb"] +test-engine-raft-raft-engine = ["tikv/test-engine-raft-raft-engine"] +test-engines-rocksdb = ["tikv/test-engines-rocksdb"] +test-engines-panic = ["tikv/test-engines-panic"] + [dependencies] chrono = "0.4" encryption = { workspace = true } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index 417d5becca3..ca2623c82ca 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -9,18 +9,17 @@ use std::{ use engine_traits::KvEngine; use futures::compat::Future01CompatExt; -use itertools::Itertools; use raftstore::{ errors::{Error, Result}, store::{Callback, CasualMessage, CasualRouter, SignificantMsg, SignificantRouter}, }; use tikv_util::{future::paired_future_callback, timer::GLOBAL_TIMER_HANDLE}; -pub struct LeaderKeeper { +pub struct LeaderKeeper<'a, EK, Router: 'a> { router: Router, not_leader: HashSet, - _ek: PhantomData, + _ek: PhantomData<&'a EK>, } #[derive(Default)] @@ -51,10 +50,10 @@ impl std::fmt::Debug for StepResult { } } -impl LeaderKeeper +impl<'a, EK, Router> LeaderKeeper<'a, EK, Router> where EK: KvEngine, - Router: CasualRouter + SignificantRouter + 'static, + Router: CasualRouter + SignificantRouter + 'a, { pub fn new(router: Router, to_keep: impl IntoIterator) -> Self { Self { @@ -85,8 +84,9 @@ where const CONCURRENCY: usize = 256; let r = Mutex::new(StepResult::default()); let success = Mutex::new(HashSet::new()); - for batch in &self.not_leader.iter().chunks(CONCURRENCY) { - let tasks = batch.map(|region_id| async { + let regions = self.not_leader.iter().copied().collect::>(); + for batch in regions.as_slice().chunks(CONCURRENCY) { + let tasks = batch.iter().map(|region_id| async { match self.check_leader(*region_id).await { Ok(_) => { success.lock().unwrap().insert(*region_id); @@ -150,7 +150,7 @@ mod test { leaders: RefCell>, } - impl LeaderKeeper { + impl<'a, EK, Router> LeaderKeeper<'a, EK, Router> { fn mut_router(&mut self) -> &mut Router { &mut self.router } diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index 10f82d64917..daf6e7ed30f 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -2,8 +2,14 @@ use std::{ error::Error as StdError, + fmt::Display, + future::Future, result, - sync::mpsc::{sync_channel, SyncSender}, + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc::{sync_channel, SyncSender}, + Arc, Mutex, + }, thread::Builder, time::Instant, }; @@ -17,10 +23,12 @@ use engine_traits::{CfNamesExt, CfOptionsExt, Engines, Peekable, RaftEngine}; use futures::{ channel::mpsc, executor::{ThreadPool, ThreadPoolBuilder}, + stream::{AbortHandle, Aborted}, FutureExt, SinkExt, StreamExt, }; use grpcio::{ - ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, + ClientStreamingSink, RequestStream, RpcContext, RpcStatus, RpcStatusCode, ServerStreamingSink, + UnarySink, WriteFlags, }; use kvproto::{raft_serverpb::StoreIdent, recoverdatapb::*}; use raftstore::{ @@ -65,6 +73,44 @@ pub struct RecoveryService { engines: Engines, router: RaftRouter, threads: ThreadPool, + + /// The handle to last call of recover region RPC. + /// + /// We need to make sure the execution of keeping leader exits before next + /// `RecoverRegion` rpc gets in. Or the previous call may stuck at keep + /// leader forever, once the second caller request the leader to be at + /// another store. + // NOTE: Perhaps it would be better to abort the procedure as soon as the client + // stream has been closed, but yet it seems there isn't such hook like + // `on_client_go` for us, and the current implementation only start + // work AFTER the client closes their sender part(!) + last_recovery_region_rpc: Arc>>, +} + +struct RecoverRegionState { + start_at: Instant, + finished: Arc, + abort: AbortHandle, +} + +impl RecoverRegionState { + /// Create the state by wrapping a execution of recover region. + fn wrap_task, T>( + task: F, + ) -> (Self, impl Future>) { + let finished = Arc::new(AtomicBool::new(false)); + let (cancelable_task, abort) = futures::future::abortable(task); + let state = Self { + start_at: Instant::now(), + finished: Arc::clone(&finished), + abort, + }; + (state, async move { + let res = cancelable_task.await; + finished.store(true, Ordering::SeqCst); + res + }) + } } impl RecoveryService { @@ -99,6 +145,7 @@ impl RecoveryService { engines, router, threads, + last_recovery_region_rpc: Arc::default(), } } @@ -140,6 +187,34 @@ impl RecoveryService { Ok(store_id) } + fn abort_last_recover_region(&self, place: impl Display) { + let mut last_state_lock = self.last_recovery_region_rpc.lock().unwrap(); + Self::abort_last_recover_region_of(place, &mut last_state_lock) + } + + fn replace_last_recover_region(&self, place: impl Display, new_state: RecoverRegionState) { + let mut last_state_lock = self.last_recovery_region_rpc.lock().unwrap(); + Self::abort_last_recover_region_of(place, &mut last_state_lock); + *last_state_lock = Some(new_state); + } + + fn abort_last_recover_region_of( + place: impl Display, + last_state_lock: &mut Option, + ) { + if let Some(last_state) = last_state_lock.take() { + info!("Another task enter, checking last task."; + "finished" => ?last_state.finished, + "start_before" => ?last_state.start_at.elapsed(), + "abort_by" => %place, + ); + if !last_state.finished.load(Ordering::SeqCst) { + last_state.abort.abort(); + warn!("Last task not finished, aborting it."); + } + } + } + // a new wait apply syncer share with all regions, // when all region reached the target index, share reference decreased to 0, // trigger closure to send finish info back. @@ -190,7 +265,7 @@ impl RecoverData for RecoveryService { // 1. br start to ready region meta fn read_region_meta( &mut self, - _ctx: RpcContext<'_>, + ctx: RpcContext<'_>, _req: ReadRegionMetaRequest, mut sink: ServerStreamingSink, ) { @@ -215,6 +290,11 @@ impl RecoverData for RecoveryService { } }); + // Hacking: Sometimes, the client may omit the RPC call to `recover_region` if + // no leader should be register to some (unfortunate) store. So we abort + // last recover region here too, anyway this RPC implies a consequent + // `recover_region` for now. + self.abort_last_recover_region(format_args!("read_region_meta by {}", ctx.peer())); self.threads.spawn_ok(send_task); } @@ -222,11 +302,11 @@ impl RecoverData for RecoveryService { // assign region leader and wait leader apply to last log fn recover_region( &mut self, - _ctx: RpcContext<'_>, + ctx: RpcContext<'_>, mut stream: RequestStream, sink: ClientStreamingSink, ) { - let raft_router = self.router.clone(); + let mut raft_router = Mutex::new(self.router.clone()); let store_id = self.get_store_id(); info!("start to recover the region"); let task = async move { @@ -241,17 +321,15 @@ impl RecoverData for RecoveryService { } } - let mut lk = LeaderKeeper::new(raft_router.clone(), leaders.clone()); + let mut lk = LeaderKeeper::new(&raft_router, leaders.clone()); // We must use the tokio runtime here because there isn't a `block_in_place` // like thing in the futures executor. It simply panics when block // on the block_on context. // It is also impossible to directly `await` here, because that will make // borrowing to the raft router crosses the await point. - tokio::runtime::Builder::new_current_thread() - .build() - .expect("failed to build temporary tokio runtime.") - .block_on(lk.elect_and_wait_all_ready()); + lk.elect_and_wait_all_ready().await; info!("all region leader assigned done"; "count" => %leaders.len()); + drop(lk); let now = Instant::now(); // wait apply to the last log @@ -260,7 +338,7 @@ impl RecoverData for RecoveryService { let (tx, rx) = sync_channel(1); REGION_EVENT_COUNTER.start_wait_leader_apply.inc(); let wait_apply = SnapshotRecoveryWaitApplySyncer::new(region_id, tx.clone()); - if let Err(e) = raft_router.significant_send( + if let Err(e) = raft_router.get_mut().unwrap().significant_send( region_id, SignificantMsg::SnapshotRecoveryWaitApply(wait_apply.clone()), ) { @@ -277,6 +355,10 @@ impl RecoverData for RecoveryService { for (rid, rx) in leaders.iter().zip(rx_apply) { if let Some(rx) = rx { CURRENT_WAIT_APPLY_LEADER.set(*rid as _); + // FIXME: we cannot the former RPC when we get stuck at here. + // Perhaps we need to make `SnapshotRecoveryWaitApplySyncer` be able to support + // asynchronous channels. But for now, waiting seems won't cause live lock, so + // we are keeping it unchanged. match rx.recv() { Ok(region_id) => { debug!("leader apply to last log"; "region_id" => region_id); @@ -301,10 +383,20 @@ impl RecoverData for RecoveryService { Err(e) => error!("failed to get store id"; "error" => ?e), }; - let _ = sink.success(resp).await; + resp }; - self.threads.spawn_ok(task); + let (state, task) = RecoverRegionState::wrap_task(task); + self.replace_last_recover_region(format!("recover_region by {}", ctx.peer()), state); + self.threads.spawn_ok(async move { + let res = match task.await { + Ok(resp) => sink.success(resp), + Err(Aborted) => sink.fail(RpcStatus::new(RpcStatusCode::ABORTED)), + }; + if let Err(err) = res.await { + warn!("failed to response recover region rpc"; "err" => %err); + } + }); } // 3. ensure all region peer/follower apply to last @@ -381,3 +473,32 @@ impl RecoverData for RecoveryService { self.threads.spawn_ok(send_task); } } + +#[cfg(test)] +mod test { + use std::{sync::atomic::Ordering, time::Duration}; + + use futures::never::Never; + + use super::RecoverRegionState; + + #[test] + fn test_state() { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_time() + .build() + .unwrap(); + let (state, task) = RecoverRegionState::wrap_task(futures::future::pending::()); + let hnd = rt.spawn(task); + state.abort.abort(); + rt.block_on(async { tokio::time::timeout(Duration::from_secs(10), hnd).await }) + .unwrap() + .unwrap() + .unwrap_err(); + + let (state, task) = RecoverRegionState::wrap_task(futures::future::ready(42)); + assert_eq!(state.finished.load(Ordering::SeqCst), false); + assert_eq!(rt.block_on(task), Ok(42)); + assert_eq!(state.finished.load(Ordering::SeqCst), true); + } +} From 8c7d9e3b7d71b012fdf2a7e50423b61af1bf6092 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 16 Oct 2023 21:00:29 +0800 Subject: [PATCH 0961/1149] config: adjust rocksdb background compaction threads (#15769) ref tikv/tikv#14470 Signed-off-by: glorv Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/config/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 74f25a22ef6..d1fb1e4f8d8 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -264,7 +264,7 @@ fn get_background_job_limits_impl( // v2: decrease the compaction threads to make the qps more stable. let max_compactions = match engine_type { EngineType::RaftKv => max_background_jobs - max_background_flushes, - EngineType::RaftKv2 => (max_background_jobs + 7) / 8, + EngineType::RaftKv2 => (max_background_jobs + 3) / 4, }; let max_sub_compactions: u32 = (max_compactions - 1).clamp(1, defaults.max_sub_compactions); max_background_jobs = max_background_flushes + max_compactions; @@ -6052,7 +6052,7 @@ mod tests { &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 3, + max_background_jobs: 4, max_background_flushes: 2, max_sub_compactions: 1, max_titan_background_gc: 4, @@ -6082,9 +6082,9 @@ mod tests { &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS ), BackgroundJobLimits { - max_background_jobs: 5, + max_background_jobs: 6, max_background_flushes: 3, - max_sub_compactions: 1, + max_sub_compactions: 2, max_titan_background_gc: 4, } ); From d8756403ef730142d7eb5b3b79567b1576d5ed50 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Mon, 16 Oct 2023 12:56:00 -0500 Subject: [PATCH 0962/1149] import: write RPC will check region epoch before continue (#15013) close tikv/tikv#15003 Signed-off-by: lance6716 Co-authored-by: tonyxuqqi --- Makefile | 8 + .../src/operation/command/write/ingest.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 87 ++------ components/raftstore/src/store/msg.rs | 6 - .../raftstore/src/store/worker/cleanup.rs | 19 +- .../raftstore/src/store/worker/cleanup_sst.rs | 120 +----------- components/server/src/server.rs | 2 + components/server/src/server2.rs | 2 + components/sst_importer/src/import_file.rs | 49 +++-- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 8 +- components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/import/sst_service.rs | 185 +++++++++++++++++- 14 files changed, 271 insertions(+), 228 deletions(-) diff --git a/Makefile b/Makefile index bb1d7316e1b..ce8d4e8b793 100644 --- a/Makefile +++ b/Makefile @@ -406,6 +406,14 @@ docker_test: ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test +docker_shell: + docker build -f Dockerfile.test \ + -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + . + docker run -it -v $(shell pwd):/tikv \ + ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + /bin/bash + ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index e963434fe83..3d39c9a7369 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -43,6 +43,11 @@ impl Store { let import_size = box_try!(ctx.sst_importer.get_total_size()); STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); + // filter old version SSTs + let ssts: Vec<_> = ssts + .into_iter() + .filter(|sst| sst.api_version >= sst_importer::API_VERSION_2) + .collect(); if ssts.is_empty() { return Ok(()); } @@ -50,9 +55,9 @@ impl Store { let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts - .entry(sst.get_region_id()) + .entry(sst.meta.get_region_id()) .or_default() - .push(sst); + .push(sst.meta); } let ranges = ctx.sst_importer.ranges_in_import(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 2434dfdd8e6..33010a993a2 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -36,14 +36,13 @@ use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ - import_sstpb::{SstMeta, SwitchMode}, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; +use pd_client::{Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -810,9 +809,6 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } } StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event), - StoreMsg::ValidateSstResult { invalid_ssts } => { - self.on_validate_sst_result(invalid_ssts) - } StoreMsg::ClearRegionSizeInRange { start_key, end_key } => { self.clear_region_size_in_range(&start_key, &end_key) } @@ -1652,12 +1648,7 @@ impl RaftBatchSystem { ); let compact_runner = CompactRunner::new(engines.kv.clone()); - let cleanup_sst_runner = CleanupSstRunner::new( - meta.get_id(), - self.router.clone(), - Arc::clone(&importer), - Arc::clone(&pd_client), - ); + let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), self.router.clone(), // RaftRouter @@ -2755,44 +2746,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { - fn on_validate_sst_result(&mut self, ssts: Vec) { - if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import { - return; - } - // A stale peer can still ingest a stale Sst before it is - // destroyed. We need to make sure that no stale peer exists. - let mut delete_ssts = Vec::new(); - { - let meta = self.ctx.store_meta.lock().unwrap(); - for sst in ssts { - if !meta.regions.contains_key(&sst.get_region_id()) { - delete_ssts.push(sst); - } - } - } - if delete_ssts.is_empty() { - return; - } - - let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to delete ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } - fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); - let mut validate_ssts = Vec::new(); - let import_size = box_try!(self.ctx.importer.get_total_size()); - STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { @@ -2801,15 +2756,22 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER { let meta = self.ctx.store_meta.lock().unwrap(); for sst in ssts { - if let Some(r) = meta.regions.get(&sst.get_region_id()) { + if sst.api_version < sst_importer::API_VERSION_2 { + // SST of old versions are created by old TiKV and have different prerequisite + // we can't delete them here. They can only be deleted manually + continue; + } + if let Some(r) = meta.regions.get(&sst.meta.get_region_id()) { let region_epoch = r.get_region_epoch(); - if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) { + if util::is_epoch_stale(sst.meta.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. - delete_ssts.push(sst); + delete_ssts.push(sst.meta); } } else { - // If the peer doesn't exist, we need to validate the SST through PD. - validate_ssts.push(sst); + // The write RPC of import sst service have make sure the region do exist at the + // write time, and now the region is not found, sst can be + // deleted because it won't be used by ingest in future. + delete_ssts.push(sst.meta); } } } @@ -2829,27 +2791,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } - // When there is an import job running, the region which this sst belongs may - // has not been split from the origin region because the apply thread is so busy - // that it can not apply SplitRequest as soon as possible. So we can not - // delete this sst file. - if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { - let task = CleanupSstTask::ValidateSst { - ssts: validate_ssts, - }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to validate ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } - Ok(()) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 64c5be6d7e1..f7bf7f6d297 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -10,7 +10,6 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, - import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb, metapb::RegionEpoch, @@ -824,10 +823,6 @@ where { RaftMessage(InspectedRaftMessage), - ValidateSstResult { - invalid_ssts: Vec, - }, - // Clear region size and keys for all regions in the range, so we can force them to // re-calculate their size later. ClearRegionSizeInRange { @@ -884,7 +879,6 @@ where write!(fmt, "Store {} is unreachable", store_id) } StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()), - StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"), StoreMsg::ClearRegionSizeInRange { ref start_key, ref end_key, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 632e85f40cc..726b7abe5ce 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -3,7 +3,6 @@ use std::fmt::{self, Display, Formatter}; use engine_traits::{KvEngine, RaftEngine}; -use pd_client::PdClient; use tikv_util::worker::Runnable; use super::{ @@ -11,7 +10,6 @@ use super::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, }; -use crate::store::StoreRouter; pub enum Task { Compact(CompactTask), @@ -29,29 +27,26 @@ impl Display for Task { } } -pub struct Runner +pub struct Runner where E: KvEngine, R: RaftEngine, - S: StoreRouter, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } -impl Runner +impl Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, - ) -> Runner { + ) -> Runner { Runner { compact, cleanup_sst, @@ -60,12 +55,10 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { type Task = Task; diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 8174b872f4b..44f188e6f8f 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,62 +1,30 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; +use std::{fmt, sync::Arc}; -use engine_traits::KvEngine; -use kvproto::{import_sstpb::SstMeta, metapb::Region}; -use pd_client::PdClient; +use kvproto::import_sstpb::SstMeta; use sst_importer::SstImporter; -use tikv_util::{error, worker::Runnable}; - -use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; - -type Result = std::result::Result>; +use tikv_util::worker::Runnable; pub enum Task { DeleteSst { ssts: Vec }, - ValidateSst { ssts: Vec }, } impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Task::DeleteSst { ref ssts } => write!(f, "Delete {} ssts", ssts.len()), - Task::ValidateSst { ref ssts } => write!(f, "Validate {} ssts", ssts.len()), } } } -pub struct Runner -where - EK: KvEngine, - S: StoreRouter, -{ - store_id: u64, - store_router: S, +pub struct Runner { importer: Arc, - pd_client: Arc, - _engine: PhantomData, } -impl Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ - pub fn new( - store_id: u64, - store_router: S, - importer: Arc, - pd_client: Arc, - ) -> Runner { - Runner { - store_id, - store_router, - importer, - pd_client, - _engine: PhantomData, - } +impl Runner { + pub fn new(importer: Arc) -> Runner { + Runner { importer } } /// Deletes SST files from the importer. @@ -65,78 +33,9 @@ where let _ = self.importer.delete(sst); } } - - fn get_region_by_meta(&self, sst: &SstMeta) -> Result { - // The SST meta has been delivered with a range, use it directly. - // For now, no case will reach this. But this still could be a guard for - // reducing the superise in the future... - if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { - return self - .pd_client - .get_region(sst.get_range().get_start()) - .map_err(Into::into); - } - // Once there isn't range provided. - let query_by_start_key_of_full_meta = || { - let start_key = self - .importer - .load_start_key_by_meta::(sst)? - .ok_or_else(|| -> Box { - "failed to load start key from sst, the sst might be empty".into() - })?; - let region = self.pd_client.get_region(&start_key)?; - Result::Ok(region) - }; - query_by_start_key_of_full_meta() - .map_err(|err| - format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() - ) - } - - /// Validates whether the SST is stale or not. - fn handle_validate_sst(&self, ssts: Vec) { - let store_id = self.store_id; - let mut invalid_ssts = Vec::new(); - for sst in ssts { - match self.get_region_by_meta(&sst) { - Ok(r) => { - // The region id may or may not be the same as the - // SST file, but it doesn't matter, because the - // epoch of a range will not decrease anyway. - if is_epoch_stale(r.get_region_epoch(), sst.get_region_epoch()) { - // Region has not been updated. - continue; - } - if r.get_id() == sst.get_region_id() - && r.get_peers().iter().any(|p| p.get_store_id() == store_id) - { - // The SST still belongs to this store. - continue; - } - invalid_ssts.push(sst); - } - Err(e) => { - error!("get region failed"; "err" => %e); - } - } - } - - // We need to send back the result to check for the stale - // peer, which may ingest the stale SST before it is - // destroyed. - let msg = StoreMsg::ValidateSstResult { invalid_ssts }; - if let Err(e) = self.store_router.send(msg) { - error!(%e; "send validate sst result failed"); - } - } } -impl Runnable for Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { @@ -144,9 +43,6 @@ where Task::DeleteSst { ssts } => { self.handle_delete_sst(ssts); } - Task::ValidateSst { ssts } => { - self.handle_validate_sst(ssts); - } } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 8d44890e5a6..a4b6276a587 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -366,6 +366,7 @@ where router.clone(), config.coprocessor.clone(), )); + let region_info_accessor = RegionInfoAccessor::new(coprocessor_host.as_mut().unwrap()); // Initialize concurrency manager @@ -1080,6 +1081,7 @@ where servers.importer.clone(), None, self.resource_manager.clone(), + Arc::new(self.region_info_accessor.clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 2593035618d..65d02f58c08 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -948,6 +948,7 @@ where backup_worker.start(backup_endpoint); // Import SST service. + let region_info_accessor = self.region_info_accessor.as_ref().unwrap().clone(); let import_service = ImportSstService::new( self.core.config.import.clone(), self.core.config.raft_store.raft_entry_max_size, @@ -956,6 +957,7 @@ where servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), self.resource_manager.clone(), + Arc::new(region_info_accessor), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index b270d26a411..ae81cf01646 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -440,7 +440,7 @@ impl ImportDir { Ok(real_key.map(ToOwned::to_owned)) } - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { let e = e?; @@ -458,20 +458,33 @@ impl ImportDir { } const SST_SUFFIX: &str = ".sst"; - +// version 2: compared to version 1 which is the default version, we will check +// epoch of request and local region in write API. +pub const API_VERSION_2: i32 = 2; + +/// sst_meta_to_path will encode the filepath with default api version (current +/// is 2). So when the SstMeta is created in old version of TiKV and filepath +/// will not correspond to the real file, in the deletion logic we can't remove +/// these files. pub fn sst_meta_to_path(meta: &SstMeta) -> Result { Ok(PathBuf::from(format!( - "{}_{}_{}_{}_{}{}", + "{}_{}_{}_{}_{}_{}{}", UuidBuilder::from_slice(meta.get_uuid())?.build(), meta.get_region_id(), meta.get_region_epoch().get_conf_ver(), meta.get_region_epoch().get_version(), meta.get_cf_name(), + API_VERSION_2, SST_SUFFIX, ))) } -pub fn parse_meta_from_path>(path: P) -> Result { +pub struct SstMetaWithApiVersion { + pub meta: SstMeta, + pub api_version: i32, // in future we may move api_version into SstMeta +} + +pub fn parse_meta_from_path>(path: P) -> Result { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -500,7 +513,11 @@ pub fn parse_meta_from_path>(path: P) -> Result { // cf_name to path. meta.set_cf_name(elems[4].to_owned()); } - Ok(meta) + let mut api_version = 1; + if elems.len() > 5 { + api_version = elems[5].parse()?; + } + Ok(SstMetaWithApiVersion { meta, api_version }) } #[cfg(test)] @@ -520,11 +537,12 @@ mod test { meta.mut_region_epoch().set_version(3); let path = sst_meta_to_path(&meta).unwrap(); - let expected_path = format!("{}_1_2_3_default.sst", uuid); + let expected_path = format!("{}_1_2_3_default_2.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.meta); + assert_eq!(2, meta_with_ver.api_version); } #[test] @@ -543,8 +561,9 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.meta); + assert_eq!(1, meta_with_ver.api_version); } #[cfg(feature = "test-engines-rocksdb")] @@ -596,14 +615,20 @@ mod test { w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta| { + ssts.iter_mut().for_each(|meta_with_ver| { + let meta = &mut meta_with_ver.meta; let start = dir .load_start_key_by_meta::(meta, arcmgr.clone()) .unwrap() .unwrap(); meta.mut_range().set_start(start) }); - assert_eq!(ssts, vec![meta]); + assert_eq!( + ssts.iter() + .map(|meta_with_ver| { meta_with_ver.meta.clone() }) + .collect(), + vec![meta] + ); } #[test] diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index 0cfc3bab774..ff137005b09 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -27,7 +27,7 @@ pub mod sst_importer; pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, - import_file::sst_meta_to_path, + import_file::{sst_meta_to_path, API_VERSION_2}, import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5530862e6a3..f36016eb309 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -51,7 +51,7 @@ use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ caching::cache_map::{CacheMap, ShareOwned}, - import_file::{ImportDir, ImportFile}, + import_file::{ImportDir, ImportFile, SstMetaWithApiVersion}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, import_mode2::{HashRange, ImportModeSwitcherV2}, metrics::*, @@ -1387,7 +1387,7 @@ impl SstImporter { /// List the basic information of the current SST files. /// The information contains UUID, region ID, region Epoch. /// Other fields may be left blank. - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } @@ -1587,9 +1587,9 @@ mod tests { for sst in &ssts { ingested .iter() - .find(|s| s.get_uuid() == sst.get_uuid()) + .find(|s| s.get_uuid() == sst.meta.get_uuid()) .unwrap(); - dir.delete(sst, key_manager.as_deref()).unwrap(); + dir.delete(&sst.meta, key_manager.as_deref()).unwrap(); } assert!(dir.list_ssts().unwrap().is_empty()); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 299e93eb746..5073304e17a 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -561,6 +561,7 @@ impl ServerCluster { Arc::clone(&importer), Some(store_meta), resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0002f36d647..f5c64fa86e9 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -451,6 +451,7 @@ impl ServerCluster { Arc::clone(&importer), None, resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 68403e226f8..6f9f22c9cb4 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -15,6 +15,7 @@ use std::{ use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; +use futures_executor::block_on; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -27,7 +28,9 @@ use kvproto::{ WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, + metapb::RegionEpoch, }; +use raftstore::{coprocessor::RegionInfoProvider, store::util::is_epoch_stale, RegionInfoAccessor}; use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; use sst_importer::{ @@ -39,7 +42,7 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::create_stream_with_buffer, + future::{create_stream_with_buffer, paired_future_callback}, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, HandyRwLock, @@ -124,6 +127,7 @@ pub struct ImportSstService { limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, + region_info_accessor: Arc, writer: raft_writer::ThrottledTlsEngineWriter, @@ -318,6 +322,7 @@ impl ImportSstService { importer: Arc, store_meta: Option>>>, resource_manager: Option>, + region_info_accessor: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -365,6 +370,7 @@ impl ImportSstService { limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, + region_info_accessor, writer, store_meta, resource_manager, @@ -675,6 +681,59 @@ impl ImportSstService { } } +fn check_local_region_stale( + region_id: u64, + epoch: &RegionEpoch, + region_info_accessor: Arc, +) -> Result<()> { + let (cb, f) = paired_future_callback(); + region_info_accessor + .find_region_by_id(region_id, cb) + .map_err(|e| { + Error::Engine(format!("failed to find region {} err {:?}", region_id, e).into()) + })?; + match block_on(f)? { + Some(local_region_info) => { + let local_region_epoch = local_region_info.region.region_epoch.unwrap(); + + // TODO(lance6717): we should only need to check conf_ver because we require all + // peers have SST on the disk, and does not care about which one is + // leader. But since check_sst_for_ingestion also checks epoch version, + // we just keep it here for now. + + // when local region epoch is stale, client can retry write later + if is_epoch_stale(&local_region_epoch, epoch) { + return Err(Error::Engine( + format!("request region {} is ahead of local region, local epoch {:?}, request epoch {:?}, please retry write later", + region_id, local_region_epoch, epoch).into(), + )); + } + // when local region epoch is ahead, client need to rescan region from PD to get + // latest region later + if is_epoch_stale(epoch, &local_region_epoch) { + return Err(Error::Engine( + format!("request region {} is staler than local region, local epoch {:?}, request epoch {:?}, please rescan region later", + region_id, local_region_epoch, epoch).into(), + )); + } + + // not match means to rescan + Ok(()) + } + None => { + // when region not found, we can't tell whether it's stale or ahead, so we just + // return the safest case + Err(Error::Engine( + format!( + "region {} is not found, please rescan region later", + region_id + ) + .into(), + )) + } + } +} + #[macro_export] macro_rules! impl_write { ($fn:ident, $req_ty:ident, $resp_ty:ident, $chunk_ty:ident, $writer_fn:ident) => { @@ -686,6 +745,7 @@ macro_rules! impl_write { ) { let import = self.importer.clone(); let tablets = self.tablets.clone(); + let region_info_accessor = self.region_info_accessor.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -713,7 +773,15 @@ macro_rules! impl_write { } _ => return Err(Error::InvalidChunk), }; + // wait the region epoch on this TiKV to catch up with the epoch + // in request, which comes from PD and represents the majority + // peers' status. let region_id = meta.get_region_id(); + check_local_region_stale( + region_id, + meta.get_region_epoch(), + region_info_accessor, + )?; let tablet = match tablets.get(region_id) { Some(t) => t, None => { @@ -1387,19 +1455,30 @@ fn write_needs_restore(write: &[u8]) -> bool { #[cfg(test)] mod test { - use std::collections::HashMap; + use std::{ + collections::HashMap, + sync::{Arc, Mutex}, + }; use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, - metapb::RegionEpoch, + metapb::{Region, RegionEpoch}, raft_cmdpb::{RaftCmdRequest, Request}, }; - use protobuf::Message; + use protobuf::{Message, SingularPtrField}; + use raft::StateRole::Follower; + use raftstore::{ + coprocessor::{region_info_accessor::Callback, RegionInfoProvider}, + RegionInfo, + }; use tikv_kv::{Modify, WriteData}; use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::{import::sst_service::RequestCollector, server::raftkv}; + use crate::{ + import::sst_service::{check_local_region_stale, RequestCollector}, + server::raftkv, + }; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1683,4 +1762,100 @@ mod test { } assert_eq!(total, 100); } + + #[test] + fn test_write_rpc_check_region_epoch() { + struct MockRegionInfoProvider { + map: Mutex>, + } + impl RegionInfoProvider for MockRegionInfoProvider { + fn find_region_by_id( + &self, + region_id: u64, + callback: Callback>, + ) -> Result<(), raftstore::coprocessor::Error> { + callback(self.map.lock().unwrap().get(®ion_id).cloned()); + Ok(()) + } + } + + let mock_provider = Arc::new(MockRegionInfoProvider { + map: Mutex::new(HashMap::new()), + }); + + let mut req_epoch = RegionEpoch { + conf_ver: 10, + version: 10, + ..Default::default() + }; + // test for region not found + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + let mut local_region_info = RegionInfo { + region: Region { + id: 1, + region_epoch: SingularPtrField::some(req_epoch.clone()), + ..Default::default() + }, + role: Follower, + buckets: 1, + }; + mock_provider + .map + .lock() + .unwrap() + .insert(1, local_region_info.clone()); + // test the local region epoch is same as request + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + result.unwrap(); + + // test the local region epoch is ahead of request + local_region_info + .region + .region_epoch + .as_mut() + .unwrap() + .conf_ver = 11; + mock_provider + .map + .lock() + .unwrap() + .insert(1, local_region_info.clone()); + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + req_epoch.conf_ver = 11; + let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); + result.unwrap(); + + // test the local region epoch is staler than request + req_epoch.version = 12; + let result = check_local_region_stale(1, &req_epoch, mock_provider); + assert!(result.is_err()); + // check error message contains "retry write later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("retry write later") + ); + } } From 6e826308b9ca246ee5572bcdd24e6b26fd19c156 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 16 Oct 2023 12:28:57 -0700 Subject: [PATCH 0963/1149] add more metrics for slow commit log diagnostics (#15716) ref tikv/tikv#15175 Add more metrics for slow commit log duration investigation. In this PR, it adds raft message process wait duration and exposes raft message recv by store counter. Together with raft-engine write duration, we can further narrow reason of the commit log duration. With this PR, we still cannot tell if the slowness comes from network or raft-client's (grpc client). Signed-off-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/peer.rs | 6 +- components/raftstore-v2/src/operation/life.rs | 6 +- .../raftstore-v2/src/operation/ready/mod.rs | 10 +- components/raftstore-v2/src/router/message.rs | 2 +- components/raftstore/src/store/fsm/peer.rs | 15 +- components/raftstore/src/store/fsm/store.rs | 24 ++- .../raftstore/src/store/local_metrics.rs | 5 + components/raftstore/src/store/metrics.rs | 7 + components/raftstore/src/store/msg.rs | 4 +- metrics/grafana/tikv_details.json | 194 +++++++++++++++++- src/server/server.rs | 11 +- tests/failpoints/cases/test_merge.rs | 5 +- 13 files changed, 261 insertions(+), 36 deletions(-) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 5ed84c70937..23e41914012 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -990,16 +990,16 @@ impl StoreRouter { msg: Box, ) -> std::result::Result<(), TrySendError>> { let id = msg.get_region_id(); - let peer_msg = PeerMsg::RaftMessage(msg); + let peer_msg = PeerMsg::RaftMessage(msg, Some(TiInstant::now())); let store_msg = match self.router.try_send(id, peer_msg) { Either::Left(Ok(())) => return Ok(()), - Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(m)))) => { + Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(m, _)))) => { return Err(TrySendError::Full(m)); } - Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m)))) => { + Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m, _)))) => { return Err(TrySendError::Disconnected(m)); } - Either::Right(PeerMsg::RaftMessage(m)) => StoreMsg::RaftMessage(m), + Either::Right(PeerMsg::RaftMessage(m, _)) => StoreMsg::RaftMessage(m), _ => unreachable!(), }; match self.router.send_control(store_msg) { diff --git a/components/raftstore-v2/src/fsm/peer.rs b/components/raftstore-v2/src/fsm/peer.rs index 94506a8a19f..47a1aee1ef4 100644 --- a/components/raftstore-v2/src/fsm/peer.rs +++ b/components/raftstore-v2/src/fsm/peer.rs @@ -247,8 +247,10 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> PeerFsmDelegate<'a, EK, ER, pub fn on_msgs(&mut self, peer_msgs_buf: &mut Vec) { for msg in peer_msgs_buf.drain(..) { match msg { - PeerMsg::RaftMessage(msg) => { - self.fsm.peer.on_raft_message(self.store_ctx, msg); + PeerMsg::RaftMessage(msg, send_time) => { + self.fsm + .peer + .on_raft_message(self.store_ctx, msg, send_time); } PeerMsg::RaftQuery(cmd) => { self.on_receive_command(cmd.send_time, cmd.ch.read_tracker()); diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 5828a7bb661..00df317f73a 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -416,8 +416,8 @@ impl Store { ); let region_id = msg.get_region_id(); // The message can be sent when the peer is being created, so try send it first. - let mut msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m))) = - ctx.router.send(region_id, PeerMsg::RaftMessage(msg)) + let mut msg = if let Err(TrySendError::Disconnected(PeerMsg::RaftMessage(m, _))) = + ctx.router.send(region_id, PeerMsg::RaftMessage(msg, None)) { m } else { @@ -562,7 +562,7 @@ impl Store { if from_peer.id != raft::INVALID_ID { // For now the peer only exists in memory. It will persist its states when // handling its first readiness. - let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg)); + let _ = ctx.router.send(region_id, PeerMsg::RaftMessage(msg, None)); } true } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 3ceb8693c0b..a2697f29f02 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -55,7 +55,7 @@ use tikv_util::{ slog_panic, store::find_peer, sys::disk::DiskUsage, - time::{duration_to_sec, monotonic_raw_now, Duration}, + time::{duration_to_sec, monotonic_raw_now, Duration, Instant as TiInstant}, }; pub use self::{ @@ -259,6 +259,7 @@ impl Peer { &mut self, ctx: &mut StoreContext, mut msg: Box, + send_time: Option, ) { debug!( self.logger, @@ -268,6 +269,13 @@ impl Peer { "to_peer_id" => msg.get_to_peer().get_id(), "disk_usage" => ?msg.disk_usage, ); + if let Some(send_time) = send_time { + let process_wait_time = send_time.saturating_elapsed(); + ctx.raft_metrics + .process_wait_time + .observe(duration_to_sec(process_wait_time)); + } + if self.pause_for_replay() && msg.get_message().get_msg_type() == MessageType::MsgAppend { ctx.raft_metrics.message_dropped.recovery.inc(); return; diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index c9da5241fa8..59d1edd8198 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -157,7 +157,7 @@ pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. - RaftMessage(Box), + RaftMessage(Box, Option), /// Query won't change any state. A typical query is KV read. In most cases, /// it will be processed using lease or read index. RaftQuery(RaftRequest), diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 584db92e8be..7504f746abe 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -206,7 +206,7 @@ where let callback = match msg { PeerMsg::RaftCommand(cmd) => cmd.callback, PeerMsg::CasualMessage(CasualMessage::SplitRegion { callback, .. }) => callback, - PeerMsg::RaftMessage(im) => { + PeerMsg::RaftMessage(im, _) => { raft_messages_size += im.heap_size; continue; } @@ -617,10 +617,16 @@ where let count = msgs.len(); for m in msgs.drain(..) { match m { - PeerMsg::RaftMessage(msg) => { + PeerMsg::RaftMessage(msg, sent_time) => { + if let Some(sent_time) = sent_time { + let wait_time = sent_time.saturating_elapsed().as_secs_f64(); + self.ctx.raft_metrics.process_wait_time.observe(wait_time); + } + if !self.ctx.coprocessor_host.on_raft_message(&msg.msg) { continue; } + if let Err(e) = self.on_raft_message(msg) { error!(%e; "handle raft message err"; @@ -4298,7 +4304,10 @@ where .pending_msgs .swap_remove_front(|m| m.get_to_peer() == &meta_peer) { - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size: 0, msg }); + let peer_msg = PeerMsg::RaftMessage( + InspectedRaftMessage { heap_size: 0, msg }, + Some(TiInstant::now()), + ); if let Err(e) = self.ctx.router.force_send(new_region_id, peer_msg) { warn!("handle first requset failed"; "region_id" => region_id, "error" => ?e); } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 33010a993a2..3a22ef8434d 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -382,7 +382,10 @@ where for e in msg.get_message().get_entries() { heap_size += bytes_capacity(&e.data) + bytes_capacity(&e.context); } - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }); + let peer_msg = PeerMsg::RaftMessage( + InspectedRaftMessage { heap_size, msg }, + Some(TiInstant::now()), + ); let event = TraceEvent::Add(heap_size); let send_failed = Cell::new(true); @@ -397,13 +400,13 @@ where send_failed.set(false); return Ok(()); } - Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(im)))) => { + Either::Left(Err(TrySendError::Full(PeerMsg::RaftMessage(im, _)))) => { return Err(TrySendError::Full(im.msg)); } - Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im)))) => { + Either::Left(Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im, _)))) => { return Err(TrySendError::Disconnected(im.msg)); } - Either::Right(PeerMsg::RaftMessage(im)) => StoreMsg::RaftMessage(im), + Either::Right(PeerMsg::RaftMessage(im, _)) => StoreMsg::RaftMessage(im), _ => unreachable!(), }; match self.send_control(store_msg) { @@ -2067,14 +2070,18 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER }); let region_id = msg.msg.get_region_id(); - let msg = match self.ctx.router.send(region_id, PeerMsg::RaftMessage(msg)) { + let msg = match self + .ctx + .router + .send(region_id, PeerMsg::RaftMessage(msg, None)) + { Ok(()) => { forwarded.set(true); return Ok(()); } Err(TrySendError::Full(_)) => return Ok(()), Err(TrySendError::Disconnected(_)) if self.ctx.router.is_shutdown() => return Ok(()), - Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im))) => im.msg, + Err(TrySendError::Disconnected(PeerMsg::RaftMessage(im, None))) => im.msg, Err(_) => unreachable!(), }; @@ -2146,7 +2153,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER check_msg_status == CheckMsgStatus::NewPeerFirst, )? { // Peer created, send the message again. - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }); + let peer_msg = + PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }, None); if self.ctx.router.send(region_id, peer_msg).is_ok() { forwarded.set(true); } @@ -2169,7 +2177,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER store_meta.pending_msgs.push(msg); } else { drop(store_meta); - let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }); + let peer_msg = PeerMsg::RaftMessage(InspectedRaftMessage { heap_size, msg }, None); if let Err(e) = self.ctx.router.force_send(region_id, peer_msg) { warn!("handle first request failed"; "region_id" => region_id, "error" => ?e); } else { diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 5460a57ae0f..aceacdb81ee 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -112,7 +112,10 @@ pub struct RaftMetrics { // local histogram pub store_time: LocalHistogram, + // the wait time for processing a raft command pub propose_wait_time: LocalHistogram, + // the wait time for processing a raft message + pub process_wait_time: LocalHistogram, pub process_ready: LocalHistogram, pub event_time: RaftEventDurationVec, pub peer_msg_len: LocalHistogram, @@ -152,6 +155,7 @@ impl RaftMetrics { raft_log_gc_skipped: RaftLogGcSkippedCounterVec::from(&RAFT_LOG_GC_SKIPPED_VEC), store_time: STORE_TIME_HISTOGRAM.local(), propose_wait_time: REQUEST_WAIT_TIME_HISTOGRAM.local(), + process_wait_time: RAFT_MESSAGE_WAIT_TIME_HISTOGRAM.local(), process_ready: PEER_RAFT_PROCESS_DURATION .with_label_values(&["ready"]) .local(), @@ -190,6 +194,7 @@ impl RaftMetrics { self.store_time.flush(); self.propose_wait_time.flush(); + self.process_wait_time.flush(); self.process_ready.flush(); self.event_time.flush(); self.peer_msg_len.flush(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index a5aa164e63e..a4f2b7820cb 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -551,6 +551,13 @@ lazy_static! { exponential_buckets(0.00001, 2.0, 26).unwrap() ).unwrap(); + pub static ref RAFT_MESSAGE_WAIT_TIME_HISTOGRAM: Histogram = + register_histogram!( + "tikv_raftstore_raft_msg_wait_time_duration_secs", + "Bucketed histogram of raft message wait time duration.", + exponential_buckets(0.00001, 2.0, 26).unwrap() + ).unwrap(); + pub static ref PEER_GC_RAFT_LOG_COUNTER: IntCounter = register_int_counter!( "tikv_raftstore_gc_raft_log_total", diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index f7bf7f6d297..a92e5169549 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -740,7 +740,7 @@ pub enum PeerMsg { /// Raft message is the message sent between raft nodes in the same /// raft group. Messages need to be redirected to raftstore if target /// peer doesn't exist. - RaftMessage(InspectedRaftMessage), + RaftMessage(InspectedRaftMessage, Option), /// Raft command is the command that is expected to be proposed by the /// leader of the target raft group. If it's failed to be sent, callback /// usually needs to be called before dropping in case of resource leak. @@ -778,7 +778,7 @@ impl ResourceMetered for PeerMsg {} impl fmt::Debug for PeerMsg { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - PeerMsg::RaftMessage(_) => write!(fmt, "Raft Message"), + PeerMsg::RaftMessage(..) => write!(fmt, "Raft Message"), PeerMsg::RaftCommand(_) => write!(fmt, "Raft Command"), PeerMsg::Tick(tick) => write! { fmt, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 57c88782031..f2654ba3da1 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -179,6 +179,14 @@ "interval": "", "legendFormat": "Apply Duration .99", "refId": "E" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "Raft Message Wait .99", + "refId": "F" } ], "thresholds": [], @@ -5819,7 +5827,7 @@ "fillGradient": 0, "gridPos": { "h": 9, - "w": 24, + "w": 12, "x": 0, "y": 37 }, @@ -5908,6 +5916,111 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The count of gRPC raft message", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 37 + }, + "hiddenSeries": false, + "id": 24763573092, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tikv_raftstore_message_recv_by_store{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, store)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{store}}", + "metric": "tikv_raftstore_message_recv_by_store", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "gRPC message count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, @@ -13892,7 +14005,7 @@ "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_apply_wait_time_duration_secs_bucket", "refId": "A", "step": 4 } @@ -14070,7 +14183,7 @@ "interval": "", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_store_write_handle_msg_duration_secs_bucket", "refId": "A", "step": 4 } @@ -14144,7 +14257,7 @@ "interval": "", "intervalFactor": 2, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_store_write_trigger_wb_bytes_bucket", "refId": "A", "step": 4 } @@ -14333,7 +14446,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "store-{{type}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", + "metric": "tikv_raftstore_store_perf_context_time_duration_secs_bucket", "refId": "A", "step": 4 }, @@ -14387,6 +14500,77 @@ "align": false, "alignLevel": null } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each raft message", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 62 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 1977, + "legend": { + "show": false + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(delta(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "format": "heatmap", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "metric": "tikv_raftstore_raft_msg_wait_time_duration_secs_bucket", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft message wait duration", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null } ], "repeat": null, diff --git a/src/server/server.rs b/src/server/server.rs index a886f1232f4..09782be4e16 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -437,6 +437,7 @@ pub mod test_router { use engine_rocks::{RocksEngine, RocksSnapshot}; use kvproto::raft_serverpb::RaftMessage; use raftstore::{router::RaftStoreRouter, store::*, Result as RaftStoreResult}; + use tikv_util::time::Instant as TiInstant; use super::*; @@ -496,12 +497,10 @@ pub mod test_router { impl RaftStoreRouter for TestRaftStoreRouter { fn send_raft_msg(&self, msg: RaftMessage) -> RaftStoreResult<()> { - let _ = self - .tx - .send(Either::Left(PeerMsg::RaftMessage(InspectedRaftMessage { - heap_size: 0, - msg, - }))); + let _ = self.tx.send(Either::Left(PeerMsg::RaftMessage( + InspectedRaftMessage { heap_size: 0, msg }, + Some(TiInstant::now()), + ))); Ok(()) } diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index ffbd69dc05e..eb15c7e16fa 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -1831,7 +1831,10 @@ fn test_concurrent_between_transfer_leader_and_merge() { // Actually, store 1 should not reach the line of propose_commit_merge_1 let _ = rx.recv_timeout(Duration::from_secs(2)); router - .force_send(msg.get_region_id(), PeerMsg::RaftMessage(Box::new(msg))) + .force_send( + msg.get_region_id(), + PeerMsg::RaftMessage(Box::new(msg), None), + ) .unwrap(); // Wait region 1 of node 2 to become leader From 356ae2416bb53b1e104bc82ba536a56fad3fc47c Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 17 Oct 2023 10:52:58 +0800 Subject: [PATCH 0964/1149] s3: support backup with session token and assume role (#15722) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/tikv#15781, close pingcap/tidb#39832 Signed-off-by: 3pointer Signed-off-by: 3pointer Co-authored-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> --- components/cloud/aws/src/s3.rs | 85 +++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index a7ea47ec9d2..96031c91f06 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -1,5 +1,9 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error as StdError, io, time::Duration}; +use std::{ + error::Error as StdError, + io, + time::{Duration, SystemTime}, +}; use async_trait::async_trait; use cloud::{ @@ -16,6 +20,7 @@ pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, S3 as InputConfig}; use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; use rusoto_s3::{util::AddressingStyle, *}; +use rusoto_sts::{StsAssumeRoleSessionCredentialsProvider, StsClient}; use thiserror::Error; use tikv_util::{debug, stream::error_stream, time::Instant}; use tokio::time::{sleep, timeout}; @@ -29,6 +34,7 @@ pub const STORAGE_VENDOR_NAME_AWS: &str = "aws"; pub struct AccessKeyPair { pub access_key: StringNonEmpty, pub secret_access_key: StringNonEmpty, + pub session_token: Option, } impl std::fmt::Debug for AccessKeyPair { @@ -36,6 +42,7 @@ impl std::fmt::Debug for AccessKeyPair { f.debug_struct("AccessKeyPair") .field("access_key", &self.access_key) .field("secret_access_key", &"?") + .field("session_token", &self.session_token) .finish() } } @@ -51,6 +58,8 @@ pub struct Config { storage_class: Option, multi_part_size: usize, object_lock_enabled: bool, + role_arn: Option, + external_id: Option, } impl Config { @@ -66,6 +75,8 @@ impl Config { storage_class: None, multi_part_size: MINIMUM_PART_SIZE, object_lock_enabled: false, + role_arn: None, + external_id: None, } } @@ -78,12 +89,16 @@ impl Config { let access_key_opt = attrs.get("access_key"); let access_key_pair = if let Some(access_key) = access_key_opt { let secret_access_key = attrs.get("secret_access_key").unwrap_or(def).clone(); + let session_token = attrs + .get("session_token") + .and_then(|x| StringNonEmpty::opt(x.to_string())); Some(AccessKeyPair { access_key: StringNonEmpty::required_field(access_key.clone(), "access_key")?, secret_access_key: StringNonEmpty::required_field( secret_access_key, "secret_access_key", )?, + session_token, }) } else { None @@ -99,6 +114,8 @@ impl Config { sse_kms_key_id: StringNonEmpty::opt(attrs.get("sse_kms_key_id").unwrap_or(def).clone()), multi_part_size: MINIMUM_PART_SIZE, object_lock_enabled: false, + role_arn: StringNonEmpty::opt(attrs.get("role_arn").unwrap_or(def).clone()), + external_id: StringNonEmpty::opt(attrs.get("external_id").unwrap_or(def).clone()), }) } @@ -114,13 +131,17 @@ impl Config { }; let access_key_pair = match StringNonEmpty::opt(input.access_key) { None => None, - Some(ak) => Some(AccessKeyPair { - access_key: ak, - secret_access_key: StringNonEmpty::required_field( - input.secret_access_key, - "secret_access_key", - )?, - }), + Some(ak) => { + let session_token = StringNonEmpty::opt(input.session_token); + Some(AccessKeyPair { + access_key: ak, + secret_access_key: StringNonEmpty::required_field( + input.secret_access_key, + "secret_access_key", + )?, + session_token, + }) + } }; Ok(Config { storage_class, @@ -132,6 +153,8 @@ impl Config { sse_kms_key_id: StringNonEmpty::opt(input.sse_kms_key_id), multi_part_size: MINIMUM_PART_SIZE, object_lock_enabled: input.object_lock_enabled, + role_arn: StringNonEmpty::opt(input.role_arn), + external_id: StringNonEmpty::opt(input.external_id), }) } } @@ -198,20 +221,59 @@ impl S3Storage { Ok(S3Storage { config, client }) } + fn maybe_assume_role( + config: Config, + cred_provider: P, + dispatcher: D, + ) -> io::Result + where + P: ProvideAwsCredentials + Send + Sync + 'static, + D: DispatchSignedRequest + Send + Sync + 'static, + { + if config.role_arn.is_some() { + // try use role arn anyway with current creds when it's not nil. + let bucket_region = none_to_empty(config.bucket.region.clone()); + let bucket_endpoint = config.bucket.endpoint.clone(); + let region = util::get_region(&bucket_region, &none_to_empty(bucket_endpoint))?; + // cannot use the same dispatcher because of move, so use another http client. + let sts = StsClient::new_with(util::new_http_client()?, cred_provider, region); + let duration_since_epoch = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap(); + let timestamp_secs = duration_since_epoch.as_secs(); + let cred_provider = StsAssumeRoleSessionCredentialsProvider::new( + sts, + String::clone(config.role_arn.as_deref().unwrap()), + format!("{}", timestamp_secs), + config.external_id.as_deref().map(String::clone), + // default duration is 15min + None, + None, + None, + ); + Self::new_creds_dispatcher(config, dispatcher, cred_provider) + } else { + // or just use original cred_provider to access s3. + Self::new_creds_dispatcher(config, dispatcher, cred_provider) + } + } + pub fn with_request_dispatcher(config: Config, dispatcher: D) -> io::Result where D: DispatchSignedRequest + Send + Sync + 'static, { // static credentials are used with minio if let Some(access_key_pair) = &config.access_key_pair { - let cred_provider = StaticProvider::new_minimal( + let cred_provider = StaticProvider::new( (*access_key_pair.access_key).to_owned(), (*access_key_pair.secret_access_key).to_owned(), + access_key_pair.session_token.as_deref().map(String::clone), + None, ); - Self::new_creds_dispatcher(config, dispatcher, cred_provider) + Self::maybe_assume_role(config, cred_provider, dispatcher) } else { let cred_provider = util::CredentialsProvider::new()?; - Self::new_creds_dispatcher(config, dispatcher, cred_provider) + Self::maybe_assume_role(config, cred_provider, dispatcher) } } @@ -637,6 +699,7 @@ mod tests { config.access_key_pair = Some(AccessKeyPair { access_key: StringNonEmpty::required("abc".to_string()).unwrap(), secret_access_key: StringNonEmpty::required("xyz".to_string()).unwrap(), + session_token: Some(StringNonEmpty::required("token".to_string()).unwrap()), }); let mut s = S3Storage::new(config.clone()).unwrap(); // set a less than 5M value not work From f5d269496dba61827fd25dbfeec975b2c3f1af5a Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:33:29 +0800 Subject: [PATCH 0965/1149] tikv_util: Support customizing evict policy and operations without promoting for LruCache (#15747) ref tikv/tikv#11187 This PR makes the `LruCache` in `tikv_util` to support customizing how to determine an entries in the cache should be evicted. This is part of solving the issue #11187, which needs a `TxnStatusCache`. The `TxnStatusCache` is desinged to use `LruCache` internally, with ability to get or insert items without promoting items to the head (most-recently-used) position. This PR adds `get_no_promote` and `insert_if_not_exist` functions to `LruCache`. Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/lru.rs | 198 +++++++++++++++++++++++++++++--- 1 file changed, 183 insertions(+), 15 deletions(-) diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index 76fad6e8a34..b5bfdfbf7d5 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -135,6 +135,10 @@ impl Trace { r.key.as_ptr().read() } } + + fn get_tail(&self) -> &K { + unsafe { self.tail.as_ref().prev.as_ref().key.assume_init_ref() } + } } impl Drop for Trace { @@ -174,14 +178,51 @@ impl SizePolicy for CountTracker { } } -pub struct LruCache +/// Some [`EvictPolicy`] may need to know what the entry bing popped out is to +/// determine if it really can be popped. But there is performance cost to +/// always get the tail entry. So we pass this interface to the `should_evict` +/// function. An implementation of `EvictPolicy` can read the tail entry only +/// when it really needs. +pub trait GetTailEntry { + fn get_tail_entry(&self) -> Option<(&K, &V)>; +} + +/// An [`EvictPolicy`] defines how the [`LruCache`] should determine an entry +/// at the tail should be popped out. +pub trait EvictPolicy { + fn should_evict( + &self, + current_size: usize, + capacity: usize, + get_tail_entry: &impl GetTailEntry, + ) -> bool; +} + +/// The default [`EvictPolicy`] of [`LruCache`], which pops out entries at the +/// tail when the limit specified by `capacity` is exceeded. +pub struct EvictOnFull; + +impl EvictPolicy for EvictOnFull { + fn should_evict( + &self, + current_size: usize, + capacity: usize, + _: &impl GetTailEntry, + ) -> bool { + capacity < current_size + } +} + +pub struct LruCache where T: SizePolicy, + E: EvictPolicy, { map: HashMap>, trace: Trace, capacity: usize, size_policy: T, + evict_policy: E, } impl LruCache @@ -189,18 +230,30 @@ where T: SizePolicy, { pub fn with_capacity_sample_and_trace( - mut capacity: usize, + capacity: usize, sample_mask: usize, size_policy: T, ) -> LruCache { + Self::new(capacity, sample_mask, size_policy, EvictOnFull) + } +} + +impl LruCache +where + T: SizePolicy, + E: EvictPolicy, +{ + pub fn new(mut capacity: usize, sample_mask: usize, size_policy: T, evict_policy: E) -> Self { + // The capacity is at least 1. if capacity == 0 { capacity = 1; } - LruCache { + Self { map: HashMap::default(), trace: Trace::new(sample_mask), capacity, size_policy, + evict_policy, } } @@ -215,10 +268,18 @@ where self.trace.clear(); self.size_policy.on_reset(0); } + + /// Get the capacity limited on the `LruCache`. #[inline] pub fn capacity(&self) -> usize { self.capacity } + + /// Get the capacity actually allocated by the internal data structure. + #[inline] + pub fn internal_allocated_capacity(&self) -> usize { + self.map.capacity() + } } impl LruCache @@ -234,25 +295,36 @@ where } } -impl LruCache +impl LruCache where K: Eq + Hash + Clone + std::fmt::Debug, T: SizePolicy, + E: EvictPolicy, { #[inline] - pub fn insert(&mut self, key: K, value: V) { + fn insert_impl(&mut self, key: K, value: V, replace: bool) -> bool { + let mut inserted = true; let mut old_key = None; let current_size = SizePolicy::::current(&self.size_policy); + // In case the current size exactly equals to capacity, we also expect to reuse + // tail when inserting. Use `current_size + 1` to include the case. + let should_evict_on_insert = + self.evict_policy + .should_evict(current_size + 1, self.capacity, self); match self.map.entry(key) { HashMapEntry::Occupied(mut e) => { - self.size_policy.on_remove(e.key(), &e.get().value); - self.size_policy.on_insert(e.key(), &value); - let mut entry = e.get_mut(); - self.trace.promote(entry.record); - entry.value = value; + if replace { + self.size_policy.on_remove(e.key(), &e.get().value); + self.size_policy.on_insert(e.key(), &value); + let mut entry = e.get_mut(); + self.trace.promote(entry.record); + entry.value = value; + } else { + inserted = false; + } } HashMapEntry::Vacant(v) => { - let record = if self.capacity <= current_size { + let record = if should_evict_on_insert { let res = self.trace.reuse_tail(v.key().clone()); old_key = Some(res.0); res.1 @@ -274,7 +346,8 @@ where // Perhaps we can reject entries larger than capacity goes in the LRU cache, but // that is impossible for now: the `SizePolicy` trait doesn't provide the // interface of querying the actual size of an item. - self.evict_until_fit() + self.evict_until_fit(); + inserted } fn evict_until_fit(&mut self) { @@ -283,7 +356,7 @@ where let current_size = self.size_policy.current(); // Should we keep at least one entry? So our users won't lose their fresh record // once it exceeds the capacity. - if current_size <= cap || self.map.is_empty() { + if !self.evict_policy.should_evict(current_size, cap, self) || self.map.is_empty() { break; } let key = self.trace.remove_tail(); @@ -292,6 +365,18 @@ where } } + #[inline] + pub fn insert(&mut self, key: K, value: V) { + self.insert_impl(key, value, true); + } + + /// Insert an entry if the key doesn't exist before. The existing entry + /// won't be replaced and won't be promoted to the most-recent place. + #[inline] + pub fn insert_if_not_exist(&mut self, key: K, value: V) -> bool { + self.insert_impl(key, value, false) + } + #[inline] pub fn remove(&mut self, key: &K) -> Option { if let Some(v) = self.map.remove(key) { @@ -313,6 +398,12 @@ where } } + /// Get an item by key without promoting the item. + #[inline] + pub fn get_no_promote(&self, key: &K) -> Option<&V> { + self.map.get(key).map(|v| &v.value) + } + #[inline] pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { match self.map.get_mut(key) { @@ -355,17 +446,37 @@ where } } -unsafe impl Send for LruCache +impl GetTailEntry for LruCache +where + K: Eq + Hash + Clone + std::fmt::Debug, + T: SizePolicy, + E: EvictPolicy, +{ + fn get_tail_entry(&self) -> Option<(&K, &V)> { + if self.is_empty() { + return None; + } + + let k = self.trace.get_tail(); + self.map + .get_key_value(k) + .map(|(k, entry)| (k, &entry.value)) + } +} + +unsafe impl Send for LruCache where K: Send, V: Send, T: Send + SizePolicy, + E: Send + EvictPolicy, { } -impl Drop for LruCache +impl Drop for LruCache where T: SizePolicy, + E: EvictPolicy, { fn drop(&mut self) { self.clear(); @@ -626,4 +737,61 @@ mod tests { assert!(cache.size() <= 42); } } + + #[test] + fn test_get_no_promote() { + let mut cache = LruCache::with_capacity_sample_and_trace(3, 0, CountTracker::default()); + cache.insert(1, 1); + cache.insert(2, 2); + cache.insert(3, 3); + assert_eq!(cache.size(), 3); + assert_eq!(*cache.get_no_promote(&1).unwrap(), 1); + cache.insert(4, 4); + assert_eq!(cache.size(), 3); + // Key 1 is not promoted, so it's popped out first. + assert!(cache.get_no_promote(&1).is_none()); + // Other entries are not affected. + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + assert_eq!(*cache.get_no_promote(&3).unwrap(), 3); + assert_eq!(*cache.get_no_promote(&4).unwrap(), 4); + } + + #[test] + fn test_insert_if_not_exist() { + let mut cache = LruCache::with_capacity_sample_and_trace(4, 0, CountTracker::default()); + assert!(cache.insert_if_not_exist(1, 1)); + assert!(cache.insert_if_not_exist(2, 2)); + assert!(cache.insert_if_not_exist(3, 3)); + assert_eq!(cache.size(), 3); + assert_eq!(*cache.get_no_promote(&1).unwrap(), 1); + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + assert_eq!(*cache.get_no_promote(&3).unwrap(), 3); + + assert!(!cache.insert_if_not_exist(1, 11)); + // Not updated. + assert_eq!(*cache.get_no_promote(&1).unwrap(), 1); + + assert!(cache.insert_if_not_exist(4, 4)); + assert!(!cache.insert_if_not_exist(2, 22)); + // Not updated. + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + + assert_eq!(cache.size(), 4); + assert!(cache.insert_if_not_exist(5, 5)); + assert_eq!(cache.size(), 4); + // key 1 is not promoted, so it's first popped out. + assert!(cache.get_no_promote(&1).is_none()); + assert_eq!(*cache.get_no_promote(&2).unwrap(), 2); + + assert!(cache.insert_if_not_exist(6, 6)); + assert_eq!(cache.size(), 4); + // key 2 is not promoted either, so it's first popped out. + assert!(cache.get_no_promote(&2).is_none()); + assert_eq!(*cache.get_no_promote(&3).unwrap(), 3); + + assert!(cache.insert_if_not_exist(7, 7)); + assert_eq!(cache.size(), 4); + assert!(cache.get_no_promote(&3).is_none()); + assert_eq!(*cache.get_no_promote(&4).unwrap(), 4); + } } From 8f8da90e0fca0a9adacc77f5a1edc11e59872573 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 18 Oct 2023 00:12:28 -0500 Subject: [PATCH 0966/1149] Revert "import: write RPC will check region epoch before continue" (#15787) close tikv/tikv#15791 Signed-off-by: lance6716 --- Makefile | 8 - .../src/operation/command/write/ingest.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 87 ++++++-- components/raftstore/src/store/msg.rs | 6 + .../raftstore/src/store/worker/cleanup.rs | 19 +- .../raftstore/src/store/worker/cleanup_sst.rs | 120 +++++++++++- components/server/src/server.rs | 2 - components/server/src/server2.rs | 2 - components/sst_importer/src/import_file.rs | 49 ++--- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 8 +- components/test_raftstore-v2/src/server.rs | 1 - components/test_raftstore/src/server.rs | 1 - src/import/sst_service.rs | 185 +----------------- 14 files changed, 228 insertions(+), 271 deletions(-) diff --git a/Makefile b/Makefile index ce8d4e8b793..bb1d7316e1b 100644 --- a/Makefile +++ b/Makefile @@ -406,14 +406,6 @@ docker_test: ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test -docker_shell: - docker build -f Dockerfile.test \ - -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ - . - docker run -it -v $(shell pwd):/tikv \ - ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ - /bin/bash - ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 3d39c9a7369..e963434fe83 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -43,11 +43,6 @@ impl Store { let import_size = box_try!(ctx.sst_importer.get_total_size()); STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); - // filter old version SSTs - let ssts: Vec<_> = ssts - .into_iter() - .filter(|sst| sst.api_version >= sst_importer::API_VERSION_2) - .collect(); if ssts.is_empty() { return Ok(()); } @@ -55,9 +50,9 @@ impl Store { let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts - .entry(sst.meta.get_region_id()) + .entry(sst.get_region_id()) .or_default() - .push(sst.meta); + .push(sst); } let ranges = ctx.sst_importer.ranges_in_import(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 3a22ef8434d..aa8fa7c318e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -36,13 +36,14 @@ use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ + import_sstpb::{SstMeta, SwitchMode}, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{Feature, FeatureGate, PdClient}; +use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -812,6 +813,9 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } } StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event), + StoreMsg::ValidateSstResult { invalid_ssts } => { + self.on_validate_sst_result(invalid_ssts) + } StoreMsg::ClearRegionSizeInRange { start_key, end_key } => { self.clear_region_size_in_range(&start_key, &end_key) } @@ -1651,7 +1655,12 @@ impl RaftBatchSystem { ); let compact_runner = CompactRunner::new(engines.kv.clone()); - let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); + let cleanup_sst_runner = CleanupSstRunner::new( + meta.get_id(), + self.router.clone(), + Arc::clone(&importer), + Arc::clone(&pd_client), + ); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), self.router.clone(), // RaftRouter @@ -2754,8 +2763,44 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { + fn on_validate_sst_result(&mut self, ssts: Vec) { + if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import { + return; + } + // A stale peer can still ingest a stale Sst before it is + // destroyed. We need to make sure that no stale peer exists. + let mut delete_ssts = Vec::new(); + { + let meta = self.ctx.store_meta.lock().unwrap(); + for sst in ssts { + if !meta.regions.contains_key(&sst.get_region_id()) { + delete_ssts.push(sst); + } + } + } + if delete_ssts.is_empty() { + return; + } + + let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; + if let Err(e) = self + .ctx + .cleanup_scheduler + .schedule(CleanupTask::CleanupSst(task)) + { + error!( + "schedule to delete ssts failed"; + "store_id" => self.fsm.store.id, + "err" => ?e, + ); + } + } + fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); + let mut validate_ssts = Vec::new(); + let import_size = box_try!(self.ctx.importer.get_total_size()); + STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { @@ -2764,22 +2809,15 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER { let meta = self.ctx.store_meta.lock().unwrap(); for sst in ssts { - if sst.api_version < sst_importer::API_VERSION_2 { - // SST of old versions are created by old TiKV and have different prerequisite - // we can't delete them here. They can only be deleted manually - continue; - } - if let Some(r) = meta.regions.get(&sst.meta.get_region_id()) { + if let Some(r) = meta.regions.get(&sst.get_region_id()) { let region_epoch = r.get_region_epoch(); - if util::is_epoch_stale(sst.meta.get_region_epoch(), region_epoch) { + if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. - delete_ssts.push(sst.meta); + delete_ssts.push(sst); } } else { - // The write RPC of import sst service have make sure the region do exist at the - // write time, and now the region is not found, sst can be - // deleted because it won't be used by ingest in future. - delete_ssts.push(sst.meta); + // If the peer doesn't exist, we need to validate the SST through PD. + validate_ssts.push(sst); } } } @@ -2799,6 +2837,27 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + // When there is an import job running, the region which this sst belongs may + // has not been split from the origin region because the apply thread is so busy + // that it can not apply SplitRequest as soon as possible. So we can not + // delete this sst file. + if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { + let task = CleanupSstTask::ValidateSst { + ssts: validate_ssts, + }; + if let Err(e) = self + .ctx + .cleanup_scheduler + .schedule(CleanupTask::CleanupSst(task)) + { + error!( + "schedule to validate ssts failed"; + "store_id" => self.fsm.store.id, + "err" => ?e, + ); + } + } + Ok(()) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a92e5169549..a33ca0e476e 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -10,6 +10,7 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, + import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb, metapb::RegionEpoch, @@ -823,6 +824,10 @@ where { RaftMessage(InspectedRaftMessage), + ValidateSstResult { + invalid_ssts: Vec, + }, + // Clear region size and keys for all regions in the range, so we can force them to // re-calculate their size later. ClearRegionSizeInRange { @@ -879,6 +884,7 @@ where write!(fmt, "Store {} is unreachable", store_id) } StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()), + StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"), StoreMsg::ClearRegionSizeInRange { ref start_key, ref end_key, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 726b7abe5ce..632e85f40cc 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -3,6 +3,7 @@ use std::fmt::{self, Display, Formatter}; use engine_traits::{KvEngine, RaftEngine}; +use pd_client::PdClient; use tikv_util::worker::Runnable; use super::{ @@ -10,6 +11,7 @@ use super::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, }; +use crate::store::StoreRouter; pub enum Task { Compact(CompactTask), @@ -27,26 +29,29 @@ impl Display for Task { } } -pub struct Runner +pub struct Runner where E: KvEngine, R: RaftEngine, + S: StoreRouter, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } -impl Runner +impl Runner where E: KvEngine, R: RaftEngine, + C: PdClient, + S: StoreRouter, { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, - ) -> Runner { + ) -> Runner { Runner { compact, cleanup_sst, @@ -55,10 +60,12 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where E: KvEngine, R: RaftEngine, + C: PdClient, + S: StoreRouter, { type Task = Task; diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 44f188e6f8f..8174b872f4b 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,30 +1,62 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{fmt, sync::Arc}; +use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; -use kvproto::import_sstpb::SstMeta; +use engine_traits::KvEngine; +use kvproto::{import_sstpb::SstMeta, metapb::Region}; +use pd_client::PdClient; use sst_importer::SstImporter; -use tikv_util::worker::Runnable; +use tikv_util::{error, worker::Runnable}; + +use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; + +type Result = std::result::Result>; pub enum Task { DeleteSst { ssts: Vec }, + ValidateSst { ssts: Vec }, } impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Task::DeleteSst { ref ssts } => write!(f, "Delete {} ssts", ssts.len()), + Task::ValidateSst { ref ssts } => write!(f, "Validate {} ssts", ssts.len()), } } } -pub struct Runner { +pub struct Runner +where + EK: KvEngine, + S: StoreRouter, +{ + store_id: u64, + store_router: S, importer: Arc, + pd_client: Arc, + _engine: PhantomData, } -impl Runner { - pub fn new(importer: Arc) -> Runner { - Runner { importer } +impl Runner +where + EK: KvEngine, + C: PdClient, + S: StoreRouter, +{ + pub fn new( + store_id: u64, + store_router: S, + importer: Arc, + pd_client: Arc, + ) -> Runner { + Runner { + store_id, + store_router, + importer, + pd_client, + _engine: PhantomData, + } } /// Deletes SST files from the importer. @@ -33,9 +65,78 @@ impl Runner { let _ = self.importer.delete(sst); } } + + fn get_region_by_meta(&self, sst: &SstMeta) -> Result { + // The SST meta has been delivered with a range, use it directly. + // For now, no case will reach this. But this still could be a guard for + // reducing the superise in the future... + if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { + return self + .pd_client + .get_region(sst.get_range().get_start()) + .map_err(Into::into); + } + // Once there isn't range provided. + let query_by_start_key_of_full_meta = || { + let start_key = self + .importer + .load_start_key_by_meta::(sst)? + .ok_or_else(|| -> Box { + "failed to load start key from sst, the sst might be empty".into() + })?; + let region = self.pd_client.get_region(&start_key)?; + Result::Ok(region) + }; + query_by_start_key_of_full_meta() + .map_err(|err| + format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() + ) + } + + /// Validates whether the SST is stale or not. + fn handle_validate_sst(&self, ssts: Vec) { + let store_id = self.store_id; + let mut invalid_ssts = Vec::new(); + for sst in ssts { + match self.get_region_by_meta(&sst) { + Ok(r) => { + // The region id may or may not be the same as the + // SST file, but it doesn't matter, because the + // epoch of a range will not decrease anyway. + if is_epoch_stale(r.get_region_epoch(), sst.get_region_epoch()) { + // Region has not been updated. + continue; + } + if r.get_id() == sst.get_region_id() + && r.get_peers().iter().any(|p| p.get_store_id() == store_id) + { + // The SST still belongs to this store. + continue; + } + invalid_ssts.push(sst); + } + Err(e) => { + error!("get region failed"; "err" => %e); + } + } + } + + // We need to send back the result to check for the stale + // peer, which may ingest the stale SST before it is + // destroyed. + let msg = StoreMsg::ValidateSstResult { invalid_ssts }; + if let Err(e) = self.store_router.send(msg) { + error!(%e; "send validate sst result failed"); + } + } } -impl Runnable for Runner { +impl Runnable for Runner +where + EK: KvEngine, + C: PdClient, + S: StoreRouter, +{ type Task = Task; fn run(&mut self, task: Task) { @@ -43,6 +144,9 @@ impl Runnable for Runner { Task::DeleteSst { ssts } => { self.handle_delete_sst(ssts); } + Task::ValidateSst { ssts } => { + self.handle_validate_sst(ssts); + } } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a4b6276a587..8d44890e5a6 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -366,7 +366,6 @@ where router.clone(), config.coprocessor.clone(), )); - let region_info_accessor = RegionInfoAccessor::new(coprocessor_host.as_mut().unwrap()); // Initialize concurrency manager @@ -1081,7 +1080,6 @@ where servers.importer.clone(), None, self.resource_manager.clone(), - Arc::new(self.region_info_accessor.clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 65d02f58c08..2593035618d 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -948,7 +948,6 @@ where backup_worker.start(backup_endpoint); // Import SST service. - let region_info_accessor = self.region_info_accessor.as_ref().unwrap().clone(); let import_service = ImportSstService::new( self.core.config.import.clone(), self.core.config.raft_store.raft_entry_max_size, @@ -957,7 +956,6 @@ where servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), self.resource_manager.clone(), - Arc::new(region_info_accessor), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index ae81cf01646..b270d26a411 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -440,7 +440,7 @@ impl ImportDir { Ok(real_key.map(ToOwned::to_owned)) } - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { let e = e?; @@ -458,33 +458,20 @@ impl ImportDir { } const SST_SUFFIX: &str = ".sst"; -// version 2: compared to version 1 which is the default version, we will check -// epoch of request and local region in write API. -pub const API_VERSION_2: i32 = 2; - -/// sst_meta_to_path will encode the filepath with default api version (current -/// is 2). So when the SstMeta is created in old version of TiKV and filepath -/// will not correspond to the real file, in the deletion logic we can't remove -/// these files. + pub fn sst_meta_to_path(meta: &SstMeta) -> Result { Ok(PathBuf::from(format!( - "{}_{}_{}_{}_{}_{}{}", + "{}_{}_{}_{}_{}{}", UuidBuilder::from_slice(meta.get_uuid())?.build(), meta.get_region_id(), meta.get_region_epoch().get_conf_ver(), meta.get_region_epoch().get_version(), meta.get_cf_name(), - API_VERSION_2, SST_SUFFIX, ))) } -pub struct SstMetaWithApiVersion { - pub meta: SstMeta, - pub api_version: i32, // in future we may move api_version into SstMeta -} - -pub fn parse_meta_from_path>(path: P) -> Result { +pub fn parse_meta_from_path>(path: P) -> Result { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -513,11 +500,7 @@ pub fn parse_meta_from_path>(path: P) -> Result 5 { - api_version = elems[5].parse()?; - } - Ok(SstMetaWithApiVersion { meta, api_version }) + Ok(meta) } #[cfg(test)] @@ -537,12 +520,11 @@ mod test { meta.mut_region_epoch().set_version(3); let path = sst_meta_to_path(&meta).unwrap(); - let expected_path = format!("{}_1_2_3_default_2.sst", uuid); + let expected_path = format!("{}_1_2_3_default.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let meta_with_ver = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, meta_with_ver.meta); - assert_eq!(2, meta_with_ver.api_version); + let new_meta = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, new_meta); } #[test] @@ -561,9 +543,8 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let meta_with_ver = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, meta_with_ver.meta); - assert_eq!(1, meta_with_ver.api_version); + let new_meta = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, new_meta); } #[cfg(feature = "test-engines-rocksdb")] @@ -615,20 +596,14 @@ mod test { w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta_with_ver| { - let meta = &mut meta_with_ver.meta; + ssts.iter_mut().for_each(|meta| { let start = dir .load_start_key_by_meta::(meta, arcmgr.clone()) .unwrap() .unwrap(); meta.mut_range().set_start(start) }); - assert_eq!( - ssts.iter() - .map(|meta_with_ver| { meta_with_ver.meta.clone() }) - .collect(), - vec![meta] - ); + assert_eq!(ssts, vec![meta]); } #[test] diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index ff137005b09..0cfc3bab774 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -27,7 +27,7 @@ pub mod sst_importer; pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, - import_file::{sst_meta_to_path, API_VERSION_2}, + import_file::sst_meta_to_path, import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index f36016eb309..5530862e6a3 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -51,7 +51,7 @@ use txn_types::{Key, TimeStamp, WriteRef}; use crate::{ caching::cache_map::{CacheMap, ShareOwned}, - import_file::{ImportDir, ImportFile, SstMetaWithApiVersion}, + import_file::{ImportDir, ImportFile}, import_mode::{ImportModeSwitcher, RocksDbMetricsFn}, import_mode2::{HashRange, ImportModeSwitcherV2}, metrics::*, @@ -1387,7 +1387,7 @@ impl SstImporter { /// List the basic information of the current SST files. /// The information contains UUID, region ID, region Epoch. /// Other fields may be left blank. - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } @@ -1587,9 +1587,9 @@ mod tests { for sst in &ssts { ingested .iter() - .find(|s| s.get_uuid() == sst.meta.get_uuid()) + .find(|s| s.get_uuid() == sst.get_uuid()) .unwrap(); - dir.delete(&sst.meta, key_manager.as_deref()).unwrap(); + dir.delete(sst, key_manager.as_deref()).unwrap(); } assert!(dir.list_ssts().unwrap().is_empty()); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 5073304e17a..299e93eb746 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -561,7 +561,6 @@ impl ServerCluster { Arc::clone(&importer), Some(store_meta), resource_manager.clone(), - Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f5c64fa86e9..0002f36d647 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -451,7 +451,6 @@ impl ServerCluster { Arc::clone(&importer), None, resource_manager.clone(), - Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 6f9f22c9cb4..68403e226f8 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -15,7 +15,6 @@ use std::{ use engine_traits::{CompactExt, MiscExt, CF_DEFAULT, CF_WRITE}; use file_system::{set_io_type, IoType}; use futures::{sink::SinkExt, stream::TryStreamExt, FutureExt, TryFutureExt}; -use futures_executor::block_on; use grpcio::{ ClientStreamingSink, RequestStream, RpcContext, ServerStreamingSink, UnarySink, WriteFlags, }; @@ -28,9 +27,7 @@ use kvproto::{ WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, - metapb::RegionEpoch, }; -use raftstore::{coprocessor::RegionInfoProvider, store::util::is_epoch_stale, RegionInfoAccessor}; use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; use sst_importer::{ @@ -42,7 +39,7 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::{create_stream_with_buffer, paired_future_callback}, + future::create_stream_with_buffer, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, HandyRwLock, @@ -127,7 +124,6 @@ pub struct ImportSstService { limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, - region_info_accessor: Arc, writer: raft_writer::ThrottledTlsEngineWriter, @@ -322,7 +318,6 @@ impl ImportSstService { importer: Arc, store_meta: Option>>>, resource_manager: Option>, - region_info_accessor: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -370,7 +365,6 @@ impl ImportSstService { limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, - region_info_accessor, writer, store_meta, resource_manager, @@ -681,59 +675,6 @@ impl ImportSstService { } } -fn check_local_region_stale( - region_id: u64, - epoch: &RegionEpoch, - region_info_accessor: Arc, -) -> Result<()> { - let (cb, f) = paired_future_callback(); - region_info_accessor - .find_region_by_id(region_id, cb) - .map_err(|e| { - Error::Engine(format!("failed to find region {} err {:?}", region_id, e).into()) - })?; - match block_on(f)? { - Some(local_region_info) => { - let local_region_epoch = local_region_info.region.region_epoch.unwrap(); - - // TODO(lance6717): we should only need to check conf_ver because we require all - // peers have SST on the disk, and does not care about which one is - // leader. But since check_sst_for_ingestion also checks epoch version, - // we just keep it here for now. - - // when local region epoch is stale, client can retry write later - if is_epoch_stale(&local_region_epoch, epoch) { - return Err(Error::Engine( - format!("request region {} is ahead of local region, local epoch {:?}, request epoch {:?}, please retry write later", - region_id, local_region_epoch, epoch).into(), - )); - } - // when local region epoch is ahead, client need to rescan region from PD to get - // latest region later - if is_epoch_stale(epoch, &local_region_epoch) { - return Err(Error::Engine( - format!("request region {} is staler than local region, local epoch {:?}, request epoch {:?}, please rescan region later", - region_id, local_region_epoch, epoch).into(), - )); - } - - // not match means to rescan - Ok(()) - } - None => { - // when region not found, we can't tell whether it's stale or ahead, so we just - // return the safest case - Err(Error::Engine( - format!( - "region {} is not found, please rescan region later", - region_id - ) - .into(), - )) - } - } -} - #[macro_export] macro_rules! impl_write { ($fn:ident, $req_ty:ident, $resp_ty:ident, $chunk_ty:ident, $writer_fn:ident) => { @@ -745,7 +686,6 @@ macro_rules! impl_write { ) { let import = self.importer.clone(); let tablets = self.tablets.clone(); - let region_info_accessor = self.region_info_accessor.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -773,15 +713,7 @@ macro_rules! impl_write { } _ => return Err(Error::InvalidChunk), }; - // wait the region epoch on this TiKV to catch up with the epoch - // in request, which comes from PD and represents the majority - // peers' status. let region_id = meta.get_region_id(); - check_local_region_stale( - region_id, - meta.get_region_epoch(), - region_info_accessor, - )?; let tablet = match tablets.get(region_id) { Some(t) => t, None => { @@ -1455,30 +1387,19 @@ fn write_needs_restore(write: &[u8]) -> bool { #[cfg(test)] mod test { - use std::{ - collections::HashMap, - sync::{Arc, Mutex}, - }; + use std::collections::HashMap; use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, - metapb::{Region, RegionEpoch}, + metapb::RegionEpoch, raft_cmdpb::{RaftCmdRequest, Request}, }; - use protobuf::{Message, SingularPtrField}; - use raft::StateRole::Follower; - use raftstore::{ - coprocessor::{region_info_accessor::Callback, RegionInfoProvider}, - RegionInfo, - }; + use protobuf::Message; use tikv_kv::{Modify, WriteData}; use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::{ - import::sst_service::{check_local_region_stale, RequestCollector}, - server::raftkv, - }; + use crate::{import::sst_service::RequestCollector, server::raftkv}; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1762,100 +1683,4 @@ mod test { } assert_eq!(total, 100); } - - #[test] - fn test_write_rpc_check_region_epoch() { - struct MockRegionInfoProvider { - map: Mutex>, - } - impl RegionInfoProvider for MockRegionInfoProvider { - fn find_region_by_id( - &self, - region_id: u64, - callback: Callback>, - ) -> Result<(), raftstore::coprocessor::Error> { - callback(self.map.lock().unwrap().get(®ion_id).cloned()); - Ok(()) - } - } - - let mock_provider = Arc::new(MockRegionInfoProvider { - map: Mutex::new(HashMap::new()), - }); - - let mut req_epoch = RegionEpoch { - conf_ver: 10, - version: 10, - ..Default::default() - }; - // test for region not found - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - assert!(result.is_err()); - // check error message contains "rescan region later", client will match this - // string pattern - assert!( - result - .unwrap_err() - .to_string() - .contains("rescan region later") - ); - - let mut local_region_info = RegionInfo { - region: Region { - id: 1, - region_epoch: SingularPtrField::some(req_epoch.clone()), - ..Default::default() - }, - role: Follower, - buckets: 1, - }; - mock_provider - .map - .lock() - .unwrap() - .insert(1, local_region_info.clone()); - // test the local region epoch is same as request - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - result.unwrap(); - - // test the local region epoch is ahead of request - local_region_info - .region - .region_epoch - .as_mut() - .unwrap() - .conf_ver = 11; - mock_provider - .map - .lock() - .unwrap() - .insert(1, local_region_info.clone()); - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - assert!(result.is_err()); - // check error message contains "rescan region later", client will match this - // string pattern - assert!( - result - .unwrap_err() - .to_string() - .contains("rescan region later") - ); - - req_epoch.conf_ver = 11; - let result = check_local_region_stale(1, &req_epoch, mock_provider.clone()); - result.unwrap(); - - // test the local region epoch is staler than request - req_epoch.version = 12; - let result = check_local_region_stale(1, &req_epoch, mock_provider); - assert!(result.is_err()); - // check error message contains "retry write later", client will match this - // string pattern - assert!( - result - .unwrap_err() - .to_string() - .contains("retry write later") - ); - } } From 7953ea518ca2768cc22c847bf10e7890063c6549 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Tue, 17 Oct 2023 22:25:58 -0700 Subject: [PATCH 0967/1149] raftstore-v2: Allow rollback merge during unsafe recovery for raftstore v2 (#15780) ref tikv/tikv#15580 Allow rollback merge during unsafe recovery for raftstore v2 Signed-off-by: Yang Zhang Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../operation/command/admin/merge/commit.rs | 2 +- .../operation/command/admin/merge/prepare.rs | 2 +- .../src/operation/command/admin/mod.rs | 8 ++- .../raftstore-v2/src/operation/command/mod.rs | 13 ++-- components/raftstore/src/store/peer.rs | 1 + .../failpoints/cases/test_unsafe_recovery.rs | 63 +++---------------- 6 files changed, 25 insertions(+), 64 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index b12ba9eaf9d..da26a423a97 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -371,7 +371,7 @@ impl Peer { let mut proposal_ctx = ProposalContext::empty(); proposal_ctx.insert(ProposalContext::COMMIT_MERGE); let data = req.write_to_bytes().unwrap(); - self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + self.propose_with_ctx(store_ctx, data, proposal_ctx) } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 4a5875f7097..5de1c4cfe01 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -214,7 +214,7 @@ impl Peer { let mut proposal_ctx = ProposalContext::empty(); proposal_ctx.insert(ProposalContext::PREPARE_MERGE); let data = req.write_to_bytes().unwrap(); - self.propose_with_ctx(store_ctx, data, proposal_ctx.to_vec()) + self.propose_with_ctx(store_ctx, data, proposal_ctx) }); if r.is_ok() { self.proposal_control_mut().set_pending_prepare_merge(false); diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index 9d7fee55ae4..b861f86f859 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -30,7 +30,7 @@ use raftstore::{ cmd_resp, fsm::{apply, apply::validate_batch_split}, msg::ErrorCallback, - Transport, + ProposalContext, Transport, }, Error, }; @@ -237,10 +237,14 @@ impl Peer { } } AdminCmdType::CompactLog => self.propose_compact_log(ctx, req), - AdminCmdType::UpdateGcPeer | AdminCmdType::RollbackMerge => { + AdminCmdType::UpdateGcPeer => { let data = req.write_to_bytes().unwrap(); self.propose(ctx, data) } + AdminCmdType::RollbackMerge => { + let data = req.write_to_bytes().unwrap(); + self.propose_with_ctx(ctx, data, ProposalContext::ROLLBACK_MERGE) + } AdminCmdType::PrepareMerge => self.propose_prepare_merge(ctx, req), AdminCmdType::CommitMerge => self.propose_commit_merge(ctx, req), AdminCmdType::PrepareFlashback | AdminCmdType::FinishFlashback => { diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 70cdbfda237..b93ea700f80 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -43,7 +43,7 @@ use raftstore::{ }, msg::ErrorCallback, util::{self, check_flashback_state}, - Config, Transport, WriteCallback, + Config, ProposalContext, Transport, WriteCallback, }, Error, Result, }; @@ -202,7 +202,8 @@ impl Peer { // progress less error-prone. if !(admin_type.is_some() && (admin_type.unwrap() == AdminCmdType::ChangePeer - || admin_type.unwrap() == AdminCmdType::ChangePeerV2)) + || admin_type.unwrap() == AdminCmdType::ChangePeerV2 + || admin_type.unwrap() == AdminCmdType::RollbackMerge)) { return Err(Error::RecoveryInProgress(self.region_id())); } @@ -239,7 +240,7 @@ impl Peer { store_ctx: &mut StoreContext, data: Vec, ) -> Result { - self.propose_with_ctx(store_ctx, data, vec![]) + self.propose_with_ctx(store_ctx, data, ProposalContext::empty()) } #[inline] @@ -247,12 +248,12 @@ impl Peer { &mut self, store_ctx: &mut StoreContext, data: Vec, - proposal_ctx: Vec, + proposal_ctx: ProposalContext, ) -> Result { // Should not propose normal in force leader state. // In `pre_propose_raft_command`, it rejects all the requests expect // conf-change if in force leader state. - if self.has_force_leader() { + if self.has_force_leader() && proposal_ctx != ProposalContext::ROLLBACK_MERGE { store_ctx.raft_metrics.invalid_proposal.force_leader.inc(); panic!( "[{}] {} propose normal in force leader state {:?}", @@ -274,7 +275,7 @@ impl Peer { }); } let last_index = self.raft_group().raft.raft_log.last_index(); - self.raft_group_mut().propose(proposal_ctx, data)?; + self.raft_group_mut().propose(proposal_ctx.to_vec(), data)?; if self.raft_group().raft.raft_log.last_index() == last_index { // The message is dropped silently, this usually due to leader absence // or transferring leader. Both cases can be considered as NotLeader error. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 85b8798bfb1..e72d32f8e91 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -242,6 +242,7 @@ bitflags! { const SPLIT = 0b0000_0010; const PREPARE_MERGE = 0b0000_0100; const COMMIT_MERGE = 0b0000_1000; + const ROLLBACK_MERGE = 0b0001_0000; } } diff --git a/tests/failpoints/cases/test_unsafe_recovery.rs b/tests/failpoints/cases/test_unsafe_recovery.rs index 9e5a5dffcd9..95d45c8e99c 100644 --- a/tests/failpoints/cases/test_unsafe_recovery.rs +++ b/tests/failpoints/cases/test_unsafe_recovery.rs @@ -442,6 +442,7 @@ fn test_unsafe_recovery_demotion_reentrancy() { } #[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] fn test_unsafe_recovery_rollback_merge() { let mut cluster = new_cluster(0, 3); cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); @@ -471,11 +472,15 @@ fn test_unsafe_recovery_rollback_merge() { let right_peer_2 = find_peer(&right, nodes[2]).unwrap().to_owned(); cluster.must_transfer_leader(left.get_id(), left_peer_2); cluster.must_transfer_leader(right.get_id(), right_peer_2); - cluster.must_try_merge(left.get_id(), right.get_id()); + cluster.try_merge(left.get_id(), right.get_id()); + let right_peer_0 = find_peer(&right, nodes[0]).unwrap().to_owned(); + pd_client.must_remove_peer(right.get_id(), right_peer_0); + cluster.must_remove_region(nodes[0], right.get_id()); // Makes the group lose its quorum. cluster.stop_node(nodes[1]); cluster.stop_node(nodes[2]); + fail::remove("on_schedule_merge"); { let put = new_put_cmd(b"k2", b"v2"); let req = new_request( @@ -491,7 +496,8 @@ fn test_unsafe_recovery_rollback_merge() { } cluster.must_enter_force_leader(left.get_id(), nodes[0], vec![nodes[1], nodes[2]]); - cluster.must_enter_force_leader(right.get_id(), nodes[0], vec![nodes[1], nodes[2]]); + // Allow rollback merge to finish. + sleep_ms(100); // Construct recovery plan. let mut plan = pdpb::RecoveryPlan::default(); @@ -505,23 +511,12 @@ fn test_unsafe_recovery_rollback_merge() { let mut left_demote = pdpb::DemoteFailedVoters::default(); left_demote.set_region_id(left.get_id()); left_demote.set_failed_voters(left_demote_peers.into()); - let right_demote_peers: Vec = right - .get_peers() - .iter() - .filter(|&peer| peer.get_store_id() != nodes[0]) - .cloned() - .collect(); - let mut right_demote = pdpb::DemoteFailedVoters::default(); - right_demote.set_region_id(right.get_id()); - right_demote.set_failed_voters(right_demote_peers.into()); plan.mut_demotes().push(left_demote); - plan.mut_demotes().push(right_demote); // Triggers the unsafe recovery plan execution. pd_client.must_set_unsafe_recovery_plan(nodes[0], plan.clone()); cluster.must_send_store_heartbeat(nodes[0]); - // Can't propose demotion as it's in merging mode let mut store_report = None; for _ in 0..20 { store_report = pd_client.must_get_store_report(nodes[0]); @@ -531,58 +526,18 @@ fn test_unsafe_recovery_rollback_merge() { sleep_ms(100); } assert_ne!(store_report, None); - let has_force_leader = store_report - .unwrap() - .get_peer_reports() - .iter() - .any(|p| p.get_is_force_leader()); - // Force leader is not exited due to demotion failure - assert!(has_force_leader); - - fail::remove("on_schedule_merge"); - fail::cfg("on_schedule_merge_ret_err", "return()").unwrap(); - - // Make sure merge check is scheduled, and rollback merge is triggered - sleep_ms(50); - - // Re-triggers the unsafe recovery plan execution. - pd_client.must_set_unsafe_recovery_plan(nodes[0], plan); - cluster.must_send_store_heartbeat(nodes[0]); - let mut store_report = None; - for _ in 0..20 { - store_report = pd_client.must_get_store_report(nodes[0]); - if store_report.is_some() { - break; - } - sleep_ms(100); - } - assert_ne!(store_report, None); - // No force leader - for peer_report in store_report.unwrap().get_peer_reports() { - assert!(!peer_report.get_is_force_leader()); - } - // Demotion is done let mut demoted = false; for _ in 0..10 { let new_left = block_on(pd_client.get_region_by_id(left.get_id())) .unwrap() .unwrap(); - let new_right = block_on(pd_client.get_region_by_id(right.get_id())) - .unwrap() - .unwrap(); assert_eq!(new_left.get_peers().len(), 3); - assert_eq!(new_right.get_peers().len(), 3); demoted = new_left .get_peers() .iter() .filter(|peer| peer.get_store_id() != nodes[0]) - .all(|peer| peer.get_role() == metapb::PeerRole::Learner) - && new_right - .get_peers() - .iter() - .filter(|peer| peer.get_store_id() != nodes[0]) - .all(|peer| peer.get_role() == metapb::PeerRole::Learner); + .all(|peer| peer.get_role() == metapb::PeerRole::Learner); if demoted { break; } From 3f53e5976c2c1e8578897b73f46424fdf700bfe6 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Thu, 19 Oct 2023 15:10:59 +0800 Subject: [PATCH 0968/1149] storage/txn: Add txn_status_cache module (#15749) ref tikv/tikv#11187 This PR adds a module named txn_status_cache. It provides the type `TxnStatusCache`, which will be then put into scheduler to cache start_ts->commit_ts information of recently-committed transactions. This is part of fixing issue #11187 and will also be used for possible future optimizations such as checking txn status locally. Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/lru.rs | 11 +- src/storage/metrics.rs | 15 + src/storage/txn/mod.rs | 1 + src/storage/txn/txn_status_cache.rs | 961 ++++++++++++++++++++++++++++ 4 files changed, 983 insertions(+), 5 deletions(-) create mode 100644 src/storage/txn/txn_status_cache.rs diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index b5bfdfbf7d5..302bfc9264b 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -178,11 +178,12 @@ impl SizePolicy for CountTracker { } } -/// Some [`EvictPolicy`] may need to know what the entry bing popped out is to -/// determine if it really can be popped. But there is performance cost to -/// always get the tail entry. So we pass this interface to the `should_evict` -/// function. An implementation of `EvictPolicy` can read the tail entry only -/// when it really needs. +/// Some [`EvictPolicy`] (e.g. the `TxnStatusCache` in +/// `tikv::storage::txn::txn_status_cache` module) may need to know what the +/// entry bing popped out is to determine if it really can be popped. But there +/// is performance cost to always get the tail entry. So we pass this interface +/// to the `should_evict` function. An implementation of `EvictPolicy` can read +/// the tail entry only when it really needs. pub trait GetTailEntry { fn get_tail_entry(&self) -> Option<(&K, &V)>; } diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index e9477b56b0f..cf7956d76b7 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -370,6 +370,13 @@ make_static_metric! { keys, }, } + + pub struct TxnStatusCacheSizeGauge: IntGauge { + "type" => { + used, + allocated, + } + } } lazy_static! { @@ -601,4 +608,12 @@ lazy_static! { exponential_buckets(1.0, 2.0, 16).unwrap() ) .unwrap(); + + pub static ref SCHED_TXN_STATUS_CACHE_SIZE: TxnStatusCacheSizeGauge = register_static_int_gauge_vec!( + TxnStatusCacheSizeGauge, + "tikv_scheduler_txn_status_cache_size", + "Statistics of size and capacity of txn status cache (represented in count of entries)", + &["type"] + ) + .unwrap(); } diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 640c534fc86..8c30ae0a068 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -6,6 +6,7 @@ pub mod commands; pub mod flow_controller; pub mod sched_pool; pub mod scheduler; +pub mod txn_status_cache; mod actions; mod latch; diff --git a/src/storage/txn/txn_status_cache.rs b/src/storage/txn/txn_status_cache.rs new file mode 100644 index 00000000000..2428bbb99c5 --- /dev/null +++ b/src/storage/txn/txn_status_cache.rs @@ -0,0 +1,961 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module implements a cache for the status of recent finished +//! transactions. When a transaction is committed or rolled back, we store the +//! information in the cache for a while. Later, in some cases, one can find +//! the transaction status without accessing the physical storage. This helps +//! to quickly find out the transaction status in some cases. +//! +//! > **Note:** +//! > * Currently, only committed transactions are cached. We may also cache +//! > rolled-back transactions in the future. +//! > * Currently, the cache is only used to filter unnecessary stale prewrite +//! > requests. We will also consider use the cache for other purposes in the +//! > future. +//! +//! ## Why we need this? +//! +//! ### For filtering out unwanted late-arrived stale prewrite requests +//! +//! This solves a problem which has a complicated background. +//! +//! There's such an optimization in pessimistic transactions when TiKV runs +//! accompanied with TiDB: non-unique index keys don't need to be pessimistic- +//! locked, and WRITE CF don't need to be checked either when prewriting. The +//! correctness in case there's any kinds of conflicts will be protected by +//! the corresponding row key, as the index key is never written without +//! writing the corresponding row key. +//! +//! However, it's later found to be problematic, especially with async commit +//! and 1PC, as the prewrite requests on these index keys lost its idempotency. +//! You can see [this issue](https://github.com/tikv/tikv/issues/11187) to see +//! how it causes problems, including those that affects transaction +//! correctness. +//! +//! The problem happens when the prewrite request to the same index key is +//! sent more than once. Our first solution is to add a `is_retry_request` flag +//! to the second (or even more) requests, which is sent due to retrying from +//! the client side. But it's still imperfect, considering that it's +//! theoretically possible that the original request arrives to TiKV later than +//! the retried one. In fact, we once observed this happens in an environment +//! where the network is terribly unstable. +//! +//! Our second solution, additional to the previous one, is to use this cache. +//! Each committed transaction should be guaranteed to be kept in the cache for +//! [a long-enough time](CACHE_ITEMS_REQUIRED_KEEP_TIME). When a prewrite +//! request is received, it should check the cache before executing. If it finds +//! its belonging transaction is already committed, it won't skip constraint +//! check in WRITE CF. Note that if the index key is already committed but the +//! transaction info is not cached, then a late-arrived prewrite request cannot +//! be protected by this mechanism. This means we shouldn't miss any cacheable +//! transactions, and it is the reason why committed transactions should be +//! cached for *a long-enough time*. +//! +//! Unfortunately, the solution is still imperfect. As it's already known, it +//! may still be problematic due to the following reasons: +//! +//! 1. We don't have mechanism to refuse requests that have +//! past more than [CACHE_ITEMS_REQUIRED_KEEP_TIME] since they were sent. +//! 2. To prevent the cache from consuming too much more memory than expected, +//! we have a limit to the capacity (though the limit is very large), and it's +//! configurable (so the cache can be disabled, see how the `capacity` parameter +//! of function [TxnStatusCache::new] is used) as a way to escape from potential +//! faults. +//! 3. The cache can't be synced across different TiKV instances. +//! +//! The third case above needs detailed explanation to be clarified. This is +//! an example of the problem: +//! +//! 1. Client try to send prewrite request to TiKV A, who has the leader of the +//! region containing a index key. The request is not received by TiKV and the +//! client retries. +//! 2. The leader is transferred to TiKV B, and the retries prewrite request +//! is sent to it and processed successfully. +//! 3. The transaction is committed on TiKV B, not being known by TiKV A. +//! 4. The leader transferred back to TiKV A. +//! 5. The original request arrives to TiKV A and being executed. As the +//! status of the transaction is not in the cache in TiKV A, the prewrite +//! request will be handled in normal way, skipping constraint checks. +//! +//! As of the time when this module is written, the above remaining cases have +//! not yet been handled, considering the extremely low possibility to happen +//! and high complexity to fix. +//! +//! The perfect and most elegant way to fix all of these problem is never to +//! skip constraint checks or never skipping pessimistic locks for index keys. +//! Or to say, totally remove the optimization mentioned above on index keys. +//! But for historical reason, this may lead to significant performance +//! regression in existing clusters. +//! +//! ### For read data locked by large transactions more efficiently +//! +//! * Note: the `TxnStatusCache` is designed prepared for this usage, but not +//! used yet for now. +//! +//! Consider the case that a very-large transaction locked a lot of keys after +//! prewriting, while many simple reads and writes executes frequently, thus +//! these simple transactions frequently meets the lock left by the large +//! transaction. It will be very inefficient for these small transactions to +//! come back to the client and start resolve lock procedure. Even if the client +//! side has the cache of that transaction, it still wastes an RTT. +//! +//! There would be more possibilities if we have such a cache in TiKV side: for +//! read requests, it can check the cache to know whether it can read from the +//! lock; and for write requests, if it finds the transaction of that lock is +//! already committed, it can merge together the resolve-lock-committing and the +//! write operation that the request needs to perform. + +use std::{ + sync::{atomic::AtomicU64, Arc}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use crossbeam::utils::CachePadded; +use parking_lot::Mutex; +use tikv_util::{ + lru, + lru::{GetTailEntry, LruCache}, +}; +use txn_types::TimeStamp; + +use crate::storage::metrics::*; + +const TXN_STATUS_CACHE_SLOTS: usize = 128; + +/// An cache item should be kept for at least this time. +/// Actually this should be guaranteed only for committed transactions. See +/// [this section](# +/// for-filtering-out-unwanted-late-arrived-stale-prewrite-requests) for details +/// about why this is needed. +const CACHE_ITEMS_REQUIRED_KEEP_TIME: Duration = Duration::from_secs(30); + +struct CacheEntry { + commit_ts: TimeStamp, + /// The system timestamp in milliseconds when the entry is inserted to the + /// cache. + insert_time: u64, +} + +/// Defines the policy to evict expired entries from the cache. +/// [`TxnStatusCache`] needs to keep entries for a while, so the common +/// policy that only limiting capacity is not proper to be used here. +struct TxnStatusCacheEvictPolicy { + required_keep_time_millis: u64, + #[cfg(test)] + simulated_system_time: Option>, +} + +impl TxnStatusCacheEvictPolicy { + fn new( + required_keep_time: Duration, + #[allow(unused_variables)] simulated_system_time: Option>, + ) -> Self { + Self { + required_keep_time_millis: required_keep_time.as_millis() as u64, + #[cfg(test)] + simulated_system_time, + } + } + + #[inline] + #[cfg(not(test))] + fn now(&self) -> SystemTime { + SystemTime::now() + } + + /// When used in tests, the system time can be simulated by controlling the + /// field `simulated_system_time`. + #[inline] + #[cfg(test)] + fn now(&self) -> SystemTime { + // Always get the system time to simulate the latency. + let now = SystemTime::now(); + if let Some(pseudo_system_time) = &self.simulated_system_time { + UNIX_EPOCH + + std::time::Duration::from_millis( + pseudo_system_time.load(std::sync::atomic::Ordering::Acquire), + ) + } else { + now + } + } +} + +impl lru::EvictPolicy for TxnStatusCacheEvictPolicy { + fn should_evict( + &self, + current_size: usize, + capacity: usize, + get_tail_entry: &impl GetTailEntry, + ) -> bool { + // See how much time has been elapsed since the tail entry is inserted. + // If it's long enough, remove it. + if let Some((_, v)) = get_tail_entry.get_tail_entry() { + if self.now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + > self.required_keep_time_millis + v.insert_time + { + return true; + } + } + + // If the capacity limit is exceeded, remove it. + current_size > capacity + } +} + +type TxnStatusCacheSlot = + LruCache; + +/// The cache for storing transaction status. It holds recent +/// `start_ts` -> `commit_ts` pairs for a while, which can be useful for quickly +/// but not strictly determining transaction status. +/// +/// `TxnStatusCache` is divided into several slots +/// to make the lock more fine-grained. Each slot uses an [`LruCache`] as the +/// internal implementation, with customized evict policy. However, we do not +/// always adopt the LRU behavior. Some operation to an existing entry in the +/// cache won't promote it to the most-recent place. +/// +/// Note that the `TxnStatusCache` updates metrics in some operations assuming +/// there's at most one instance of `TxnStatusCache` in a process. +pub struct TxnStatusCache { + slots: Vec>>, + is_enabled: bool, +} + +unsafe impl Sync for TxnStatusCache {} + +impl TxnStatusCache { + fn new_impl( + slots: usize, + required_keep_time: Duration, + capacity: usize, + simulated_system_time: Option>, + ) -> Self { + if capacity == 0 { + return Self { + slots: vec![], + is_enabled: false, + }; + } + + // The limit of the LruCache of each slot. + let allowed_capacity_per_slot = capacity / slots; + // The total memory allocated initially by the LruCache's internal data + // structure for all slots. + + let mut initial_allocated_capacity_total = 0; + let res = Self { + slots: (0..slots) + .map(|_| { + let cache = LruCache::new( + allowed_capacity_per_slot, + 0, + lru::CountTracker::default(), + TxnStatusCacheEvictPolicy::new( + required_keep_time, + simulated_system_time.clone(), + ), + ); + let allocated_capacity = cache.internal_allocated_capacity(); + initial_allocated_capacity_total += allocated_capacity; + Mutex::new(cache).into() + }) + .collect(), + is_enabled: true, + }; + SCHED_TXN_STATUS_CACHE_SIZE + .allocated + .set(initial_allocated_capacity_total as i64); + res + } + + pub fn new(capacity: usize) -> Self { + Self::with_slots_and_time_limit( + TXN_STATUS_CACHE_SLOTS, + CACHE_ITEMS_REQUIRED_KEEP_TIME, + capacity, + ) + } + + #[cfg(test)] + pub fn new_for_test() -> Self { + // 1M capacity should be enough for tests. + Self::with_slots_and_time_limit(16, CACHE_ITEMS_REQUIRED_KEEP_TIME, 1 << 20) + } + + pub fn with_slots_and_time_limit( + slots: usize, + required_keep_time: Duration, + capacity: usize, + ) -> Self { + Self::new_impl(slots, required_keep_time, capacity, None) + } + + /// Create a `TxnStatusCache` instance for test purpose, with simulating + /// system time enabled. This helps when testing functionalities that are + /// related to system time. + /// + /// An `AtomicU64` will be returned. Store timestamps + /// in milliseconds in it to control the time. + #[cfg(test)] + fn with_simulated_system_time( + slots: usize, + requried_keep_time: Duration, + capacity: usize, + ) -> (Self, Arc) { + let system_time = Arc::new(AtomicU64::new(0)); + let res = Self::new_impl( + slots, + requried_keep_time, + capacity, + Some(system_time.clone()), + ); + (res, system_time) + } + + fn slot_index(&self, start_ts: TimeStamp) -> usize { + fxhash::hash(&start_ts) % self.slots.len() + } + + /// Insert a transaction status into the cache. The current system time + /// should be passed from outside to avoid getting system time repeatedly + /// when multiple items is being inserted. + /// + /// If the transaction's information is already in the cache, it will + /// **NOT** be promoted to the most-recent place of the internal LRU. + pub fn insert(&self, start_ts: TimeStamp, commit_ts: TimeStamp, now: SystemTime) { + if !self.is_enabled { + return; + } + + let insert_time = now.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64; + let mut slot = self.slots[self.slot_index(start_ts)].lock(); + let previous_size = slot.size(); + let previous_allocated = slot.internal_allocated_capacity(); + slot.insert_if_not_exist( + start_ts, + CacheEntry { + commit_ts, + insert_time, + }, + ); + let size = slot.size(); + let allocated = slot.internal_allocated_capacity(); + // Update statistics. + // CAUTION: Assuming that only one TxnStatusCache instance is in a TiKV process. + SCHED_TXN_STATUS_CACHE_SIZE + .used + .add(size as i64 - previous_size as i64); + SCHED_TXN_STATUS_CACHE_SIZE + .allocated + .add(allocated as i64 - previous_allocated as i64); + } + + /// Try to get an item from the cache, without promoting the item (if + /// exists) to the most recent place. + pub fn get_no_promote(&self, start_ts: TimeStamp) -> Option { + if !self.is_enabled { + return None; + } + + let slot = self.slots[self.slot_index(start_ts)].lock(); + slot.get_no_promote(&start_ts).map(|entry| entry.commit_ts) + } + + pub fn get(&self, start_ts: TimeStamp) -> Option { + if !self.is_enabled { + return None; + } + + let mut slot = self.slots[self.slot_index(start_ts)].lock(); + slot.get(&start_ts).map(|entry| entry.commit_ts) + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant, SystemTime}, + }; + + use rand::{prelude::SliceRandom, Rng}; + + use super::*; + + fn bench_insert_impl(b: &mut test::Bencher, init_size: usize) { + let (c, time) = TxnStatusCache::with_simulated_system_time( + TXN_STATUS_CACHE_SLOTS, + Duration::from_millis(init_size as u64), + 1 << 20, + ); + let start_time = SystemTime::now(); + // Spread these items evenly in a specific time limit, so that every time + // a new item is inserted, an item will be popped out. + for i in 1..=init_size { + c.insert( + (i as u64).into(), + (i as u64 + 1).into(), + start_time + Duration::from_millis(i as u64), + ); + } + let mut current_time_shift = (init_size + 1) as u64; + b.iter(|| { + let simulated_now = start_time + Duration::from_millis(current_time_shift); + // Simulate the system time advancing. + time.store( + simulated_now + .duration_since(UNIX_EPOCH) + .unwrap() + .as_millis() as u64, + Ordering::Release, + ); + c.insert( + current_time_shift.into(), + (current_time_shift + 1).into(), + simulated_now, + ); + current_time_shift += 1; + }); + test::black_box(&c); + } + + fn bench_get_impl(b: &mut test::Bencher, init_size: usize) { + let c = TxnStatusCache::with_slots_and_time_limit( + TXN_STATUS_CACHE_SLOTS, + CACHE_ITEMS_REQUIRED_KEEP_TIME, + 1 << 20, + ); + let now = SystemTime::now(); + for i in 1..=init_size { + c.insert( + (i as u64).into(), + (i as u64 + 1).into(), + now + Duration::from_millis(i as u64), + ); + } + let rand_range = if init_size == 0 { 10000 } else { init_size } as u64; + b.iter(|| { + let ts = rand::thread_rng().gen_range(0u64, rand_range); + let res = c.get_no_promote(ts.into()); + test::black_box(&res); + }) + } + + #[bench] + fn bench_insert_empty(b: &mut test::Bencher) { + bench_insert_impl(b, 0); + } + + #[bench] + fn bench_insert_100000(b: &mut test::Bencher) { + bench_insert_impl(b, 100000); + } + + #[bench] + fn bench_get_empty(b: &mut test::Bencher) { + bench_get_impl(b, 0); + } + + #[bench] + fn bench_get_100000(b: &mut test::Bencher) { + bench_get_impl(b, 100000); + } + + /// A simple statistic tool for collecting a set of data and calculating the + /// average, stddev, and percentiles (by using a linear histogram). + /// Data is collected in u128, and results are given in f64. + struct SimpleStatistics { + sum: u128, + sum_square: u128, + count: usize, + bucket_width: u128, + buckets: Vec, + } + + impl SimpleStatistics { + fn new(bucket_width: u128) -> Self { + Self { + sum: 0, + sum_square: 0, + count: 0, + bucket_width, + buckets: vec![], + } + } + + /// Merge another instance into the current one + fn add(&mut self, other: Self) { + self.sum += other.sum; + self.sum_square += other.sum_square; + self.count += other.count; + assert_eq!(self.bucket_width, other.bucket_width); + if self.buckets.len() < other.buckets.len() { + self.buckets.resize(other.buckets.len(), 0); + } + for (count, other_count) in self.buckets.iter_mut().zip(other.buckets.iter()) { + *count += *other_count + } + } + + fn avg(&self) -> f64 { + self.sum as f64 / (self.count as f64) + } + + fn stddev(&self) -> f64 { + let avg = self.avg(); + let sum_sqr_diff: f64 = + (self.sum_square as f64) - (self.sum as f64 * avg * 2.0) + avg * self.count as f64; + (sum_sqr_diff / (self.count - 1) as f64).sqrt() + } + + /// Calculate the percentile value at specified position (should be in + /// range [0, 1]) + fn percentile(&self, position: f64) -> f64 { + let mut bucket = self.buckets.len(); + let mut prefix_sum = self.count; + while bucket > 0 { + bucket -= 1; + prefix_sum -= self.buckets[bucket]; + let prefix_percentile = prefix_sum as f64 / self.count as f64; + if prefix_percentile <= position { + assert_le!(prefix_sum as f64, position * self.count as f64); + assert_lt!( + position * self.count as f64, + (prefix_sum + self.buckets[bucket]) as f64 + ); + break; + } + } + + bucket as f64 * self.bucket_width as f64 + + (position * self.count as f64 - prefix_sum as f64) * self.bucket_width as f64 + / self.buckets[bucket] as f64 + } + + fn observe(&mut self, value: u128) { + self.sum += value; + self.sum_square += value * value; + self.count += 1; + let bucket = (value / self.bucket_width) as usize; + if self.buckets.len() <= bucket { + self.buckets.resize(bucket + 1, 0); + } + self.buckets[bucket] += 1; + } + } + + fn bench_concurrent_impl( + name: &str, + threads: usize, + function: impl Fn(u64) -> T + Send + Sync + 'static, + ) { + let start_time = Instant::now(); + // Run the benchmark code repeatedly for 10 seconds. + const TIME_LIMIT: Duration = Duration::from_secs(10); + let iteration = Arc::new(AtomicU64::new(0)); + + // Make the lifetime checker happy. + let function = Arc::new(function); + + let mut handles = Vec::with_capacity(threads); + for _ in 0..threads { + let f = function.clone(); + let iteration = iteration.clone(); + let handle = std::thread::spawn(move || { + let mut stats = SimpleStatistics::new(20); + loop { + if start_time.elapsed() > TIME_LIMIT { + break; + } + let i = iteration.fetch_add(1, Ordering::SeqCst); + let iter_start_time = Instant::now(); + test::black_box(f(i)); + let duration = iter_start_time.elapsed(); + stats.observe(duration.as_nanos()); + } + stats + }); + handles.push(handle); + } + + let mut total_stats = SimpleStatistics::new(20); + for h in handles { + total_stats.add(h.join().unwrap()); + } + + println!( + "benchmark {}: duration per iter: avg: {:?}, stddev: {:?}, percentile .99: {:?}, percentile .999: {:?}", + name, + Duration::from_nanos(total_stats.avg() as u64), + Duration::from_nanos(total_stats.stddev() as u64), + Duration::from_nanos(total_stats.percentile(0.99) as u64), + Duration::from_nanos(total_stats.percentile(0.999) as u64), + ); + } + + fn bench_txn_status_cache_concurrent_impl( + threads: usize, + init_size: usize, + simulate_contention: bool, + get_before_insert: bool, + ) { + let slots = if simulate_contention { + 1 + } else { + TXN_STATUS_CACHE_SLOTS + }; + let (c, time) = TxnStatusCache::with_simulated_system_time( + slots, + Duration::from_millis(init_size as u64), + 1 << 20, + ); + let start_time = SystemTime::now(); + for i in 1..=init_size { + c.insert( + (i as u64).into(), + (i as u64 + 1).into(), + start_time + Duration::from_millis(i as u64), + ); + } + + let name = format!( + "bench_concurrent_{}_{}_size{}{}", + if get_before_insert { + "get_and_insert" + } else { + "insert" + }, + threads, + init_size, + if simulate_contention { + "_contention" + } else { + "" + }, + ); + + bench_concurrent_impl(&name, threads, move |iter| { + let time_shift = init_size as u64 + iter; + let now = start_time + Duration::from_millis(time_shift); + time.store( + now.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64, + Ordering::Release, + ); + + if get_before_insert { + test::black_box(c.get_no_promote(time_shift.into())); + } + c.insert(time_shift.into(), (time_shift + 1).into(), now); + test::black_box(&c); + }); + } + + #[bench] + #[ignore] + fn bench_txn_status_cache_concurrent(_b: &mut test::Bencher) { + // This case is implemented to run the concurrent benchmark in a handy way + // just like running other normal benchmarks. However, it doesn't seem + // to be possible to benchmark an operation in concurrent way by using + // either the built-in bencher or criterion. + // Here we test it in our own way without using the built-in bencher, + // and output the result by stdout. + // When you need to run this benchmark, comment out the `#[ignore]` and + // add --nocapture in your benchmark command line to get the result. + bench_txn_status_cache_concurrent_impl(16, 10000, false, false); + bench_txn_status_cache_concurrent_impl(16, 10000, true, false); + bench_txn_status_cache_concurrent_impl(16, 10000, false, true); + bench_txn_status_cache_concurrent_impl(16, 10000, true, true); + bench_txn_status_cache_concurrent_impl(64, 10000, false, false); + bench_txn_status_cache_concurrent_impl(64, 10000, true, false); + bench_txn_status_cache_concurrent_impl(64, 10000, false, true); + bench_txn_status_cache_concurrent_impl(64, 10000, true, true); + } + + #[test] + fn test_insert_and_get() { + let c = TxnStatusCache::new_for_test(); + assert!(c.get_no_promote(1.into()).is_none()); + + let now = SystemTime::now(); + + c.insert(1.into(), 2.into(), now); + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + c.insert(3.into(), 4.into(), now); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + + // This won't actually happen, since a transaction will never have commit info + // with two different commit_ts. We just use this to check replacing + // won't happen. + c.insert(1.into(), 4.into(), now); + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + + let mut start_ts_list: Vec<_> = (1..100).step_by(2).map(TimeStamp::from).collect(); + start_ts_list.shuffle(&mut rand::thread_rng()); + for &start_ts in &start_ts_list { + let commit_ts = start_ts.next(); + c.insert(start_ts, commit_ts, now); + } + start_ts_list.shuffle(&mut rand::thread_rng()); + for &start_ts in &start_ts_list { + let commit_ts = start_ts.next(); + assert_eq!(c.get_no_promote(start_ts).unwrap(), commit_ts); + } + } + + #[test] + fn test_evicting_expired() { + let (c, time) = + TxnStatusCache::with_simulated_system_time(1, Duration::from_millis(1000), 1000); + let time_base = SystemTime::now(); + let set_time = |offset_millis: u64| { + time.store( + time_base.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + offset_millis, + Ordering::Release, + ) + }; + let now = || UNIX_EPOCH + Duration::from_millis(time.load(Ordering::Acquire)); + + set_time(0); + assert_lt!( + time_base.duration_since(now()).unwrap(), + Duration::from_millis(1) + ); + + c.insert(1.into(), 2.into(), now()); + set_time(1); + c.insert(3.into(), 4.into(), now()); + set_time(2); + c.insert(5.into(), 6.into(), now()); + // Size should be calculated by count. + assert_eq!(c.slots[0].lock().size(), 3); + + // Insert entry 1 again. So if entry 1 is the first one to be popped out, it + // verifies that inserting an existing key won't promote it. + c.insert(1.into(), 2.into(), now()); + + // All the 3 entries are kept + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + assert_eq!(c.get_no_promote(5.into()).unwrap(), 6.into()); + + set_time(1001); + c.insert(7.into(), 8.into(), now()); + // Entry 1 will be popped out. + assert!(c.get_no_promote(1.into()).is_none()); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + assert_eq!(c.get_no_promote(5.into()).unwrap(), 6.into()); + set_time(1004); + c.insert(9.into(), 10.into(), now()); + // It pops more than 1 entries if there are many expired items at the tail. + // Entry 3 and 5 will be popped out. + assert!(c.get_no_promote(1.into()).is_none()); + assert!(c.get_no_promote(3.into()).is_none()); + assert!(c.get_no_promote(5.into()).is_none()); + assert_eq!(c.get_no_promote(7.into()).unwrap(), 8.into()); + assert_eq!(c.get_no_promote(9.into()).unwrap(), 10.into()); + + // Now the cache's contents are: + // 7@1001, 9@1004 + // Test `get` promotes an entry and entries are not in order on insert time. + assert_eq!(c.get(7.into()).unwrap(), 8.into()); + set_time(2003); + c.insert(11.into(), 12.into(), now()); + assert_eq!(c.get_no_promote(7.into()).unwrap(), 8.into()); + assert_eq!(c.get_no_promote(9.into()).unwrap(), 10.into()); + assert_eq!(c.get_no_promote(11.into()).unwrap(), 12.into()); + + set_time(2005); + c.insert(13.into(), 14.into(), now()); + assert!(c.get_no_promote(7.into()).is_none()); + assert!(c.get_no_promote(9.into()).is_none()); + assert_eq!(c.get_no_promote(11.into()).unwrap(), 12.into()); + + // Now the cache's contents are: + // 11@2003, 13@2005 + // Test inserting existed entries. + // According to the implementation of LruCache, though it won't do any update to + // the content, it still check the tail to see if anything can be + // evicted. + set_time(3004); + c.insert(13.into(), 14.into(), now()); + assert!(c.get_no_promote(11.into()).is_none()); + assert_eq!(c.get_no_promote(13.into()).unwrap(), 14.into()); + + set_time(3006); + c.insert(13.into(), 14.into(), now()); + assert!(c.get_no_promote(13.into()).is_none()); + + // Now the cache is empty. + c.insert(15.into(), 16.into(), now()); + set_time(3008); + c.insert(17.into(), 18.into(), now()); + // Test inserting existed entry doesn't promote it. + // Re-insert 15. + set_time(3009); + c.insert(15.into(), 16.into(), now()); + set_time(4007); + c.insert(19.into(), 20.into(), now()); + // 15's insert time is not updated, and is at the tail of the LRU, so it should + // be popped. + assert!(c.get_no_promote(15.into()).is_none()); + assert_eq!(c.get_no_promote(17.into()).unwrap(), 18.into()); + + // Now the cache's contents are: + // 17@3008, 19@4007 + // Test system time being changed, which can lead to current time being less + // than entries' insert time. + set_time(2000); + c.insert(21.into(), 22.into(), now()); + assert_eq!(c.get_no_promote(17.into()).unwrap(), 18.into()); + assert_eq!(c.get_no_promote(19.into()).unwrap(), 20.into()); + assert_eq!(c.get_no_promote(21.into()).unwrap(), 22.into()); + set_time(3500); + c.insert(23.into(), 24.into(), now()); + assert_eq!(c.get_no_promote(21.into()).unwrap(), 22.into()); + assert_eq!(c.get(17.into()).unwrap(), 18.into()); + assert_eq!(c.get(19.into()).unwrap(), 20.into()); + assert_eq!(c.get(23.into()).unwrap(), 24.into()); + // `get` promotes the entries, and entry 21 is put to the tail. + c.insert(23.into(), 24.into(), now()); + assert_eq!(c.get_no_promote(17.into()).unwrap(), 18.into()); + assert_eq!(c.get_no_promote(19.into()).unwrap(), 20.into()); + assert!(c.get_no_promote(21.into()).is_none()); + assert_eq!(c.get_no_promote(23.into()).unwrap(), 24.into()); + + // Now the cache's contents are: + // 17@3008, 19@4007, 23@3500 + // The time passed to `insert` may differ from the time fetched in + // the `TxnStatusCacheEvictPolicy` as they are fetched at different time. + set_time(4009); + // Insert with time 4007, but check with time 4009 + c.insert(25.into(), 26.into(), now() - Duration::from_millis(2)); + assert!(c.get_no_promote(17.into()).is_none()); + assert_eq!(c.get_no_promote(19.into()).unwrap(), 20.into()); + + // The cache's contents: + // 19@4007, 23@3500, 25@4007 + set_time(4010); + c.insert(27.into(), 28.into(), now()); + // The cache's contents: + // 19@4007, 23@3500, 25@4007, 27@4010 + + // It's also possible to check with a lower time considering that system time + // may be changed. Insert with time 5018, but check with time 5008 + set_time(5008); + c.insert(29.into(), 30.into(), now() + Duration::from_millis(10)); + assert!(c.get_no_promote(19.into()).is_none()); + assert!(c.get_no_promote(23.into()).is_none()); + assert!(c.get_no_promote(25.into()).is_none()); + assert_eq!(c.get_no_promote(27.into()).unwrap(), 28.into()); + assert_eq!(c.get_no_promote(29.into()).unwrap(), 30.into()); + + // Now the the cache's contents are: + // 27@4010, 29@5018 + // Considering the case that system time is being changed, it's even + // possible that the entry being inserted is already expired + // comparing to the current time. It doesn't matter whether the + // entry will be dropped immediately or not. We just ensure it won't + // trigger more troubles. + set_time(7000); + c.insert(31.into(), 32.into(), now() - Duration::from_millis(1001)); + assert!(c.get_no_promote(27.into()).is_none()); + assert!(c.get_no_promote(29.into()).is_none()); + assert!(c.get_no_promote(31.into()).is_none()); + assert_eq!(c.slots[0].lock().size(), 0); + } + + #[test] + fn test_setting_capacity() { + let c = TxnStatusCache::new_impl(2, Duration::from_millis(1000), 10, None); + assert!(c.is_enabled); + assert_eq!(c.slots.len(), 2); + assert_eq!(c.slots[0].lock().capacity(), 5); + assert_eq!(c.slots[1].lock().capacity(), 5); + + let c = TxnStatusCache::new_impl(2, Duration::from_millis(1000), 0, None); + assert!(!c.is_enabled); + assert_eq!(c.slots.len(), 0); + // All operations are noops and won't cause panic or return any incorrect + // result. + c.insert(1.into(), 2.into(), SystemTime::now()); + assert!(c.get_no_promote(1.into()).is_none()); + assert!(c.get(1.into()).is_none()); + } + + #[test] + fn test_evicting_by_capacity() { + let (c, time) = + TxnStatusCache::with_simulated_system_time(1, Duration::from_millis(1000), 5); + let time_base = SystemTime::now(); + let set_time = |offset_millis: u64| { + time.store( + time_base.duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + offset_millis, + Ordering::Release, + ) + }; + let now = || UNIX_EPOCH + Duration::from_millis(time.load(Ordering::Acquire)); + + set_time(0); + c.insert(1.into(), 2.into(), now()); + set_time(2); + c.insert(3.into(), 4.into(), now()); + set_time(4); + c.insert(5.into(), 6.into(), now()); + set_time(6); + c.insert(7.into(), 8.into(), now()); + + // The cache can keep at most 5 entries. + set_time(8); + c.insert(9.into(), 10.into(), now()); + // Entry 1 not evicted. 5 entries in the cache currently + assert_eq!(c.slots[0].lock().len(), 5); + assert_eq!(c.get_no_promote(1.into()).unwrap(), 2.into()); + set_time(10); + c.insert(11.into(), 12.into(), now()); + // Entry 1 evicted. Still 5 entries in the cache. + assert_eq!(c.slots[0].lock().len(), 5); + assert!(c.get_no_promote(1.into()).is_none()); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + + // Nothing will be evicted after trying to insert an existing key. + c.insert(11.into(), 12.into(), now()); + assert_eq!(c.slots[0].lock().len(), 5); + assert_eq!(c.get_no_promote(3.into()).unwrap(), 4.into()); + + // Current contents (key@time): + // 3@2, 5@4, 7@6. 9@8, 11@10 + // Evicting by time works as well. + set_time(1005); + c.insert(13.into(), 14.into(), now()); + assert_eq!(c.slots[0].lock().len(), 4); + assert!(c.get_no_promote(3.into()).is_none()); + assert!(c.get_no_promote(5.into()).is_none()); + assert_eq!(c.get_no_promote(7.into()).unwrap(), 8.into()); + + // Reorder the entries by `get` to prepare for testing the next case. + assert_eq!(c.get(7.into()).unwrap(), 8.into()); + assert_eq!(c.get(9.into()).unwrap(), 10.into()); + assert_eq!(c.get(11.into()).unwrap(), 12.into()); + + c.insert(15.into(), 16.into(), now()); + // Current contents: + // 13@1005, 7@6. 9@8, 11@10, 15@1005 + assert_eq!(c.slots[0].lock().len(), 5); + // Expired entries that are not the tail can be evicted after the tail + // is evicted due to capacity exceeded. + set_time(1011); + c.insert(17.into(), 18.into(), now()); + assert_eq!(c.slots[0].lock().len(), 2); + assert!(c.get_no_promote(13.into()).is_none()); + assert!(c.get_no_promote(7.into()).is_none()); + assert!(c.get_no_promote(9.into()).is_none()); + assert!(c.get_no_promote(11.into()).is_none()); + assert_eq!(c.get(15.into()).unwrap(), 16.into()); + assert_eq!(c.get(17.into()).unwrap(), 18.into()); + } +} From b48812fef7e9dc7ec7d2cd0d178ccbc6e7c9c2cb Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Sat, 21 Oct 2023 01:36:29 +0800 Subject: [PATCH 0969/1149] *: remove unused dyn and grpc external storage (#15808) close tikv/tikv#15807 `CloudDynamic` was initially designed to facilitate out-of-process access to external storages in a cloud environment. However, it has never been utilized in either production or testing scenarios, rendering it essentially dead code that adds unnecessary complexity. It would be best to remove it altogether. Also, it removes features "cloud-gcp", "cloud-aws" and "cloud-azure" for most crates, as they are always enabled, and there is no reason to enable and disable them at compile time. Signed-off-by: Neil Shen --- Cargo.lock | 71 +---- Cargo.toml | 10 +- cmd/tikv-ctl/Cargo.toml | 5 - components/backup-stream/Cargo.toml | 3 +- components/backup-stream/src/router.rs | 7 +- components/backup/Cargo.toml | 8 +- components/backup/src/endpoint.rs | 5 +- components/backup/src/service.rs | 2 +- components/backup/src/writer.rs | 7 +- components/cloud/aws/src/s3.rs | 107 +------- components/cloud/azure/src/azblob.rs | 84 +----- components/cloud/gcp/src/gcs.rs | 74 +----- components/cloud/src/blob.rs | 15 -- components/external_storage/Cargo.toml | 27 +- .../{export => }/examples/scli.rs | 62 +---- components/external_storage/export/Cargo.toml | 96 ------- .../export/src/bin/tikv-cloud-storage.rs | 61 ----- .../external_storage/export/src/dylib.rs | 247 ------------------ .../export/src/grpc_service.rs | 131 ---------- components/external_storage/export/src/lib.rs | 15 -- .../external_storage/export/src/request.rs | 91 ------- .../external_storage/src/dylib_client.rs | 169 ------------ .../{export => }/src/export.rs | 163 +----------- .../external_storage/src/grpc_client.rs | 134 ---------- components/external_storage/src/lib.rs | 8 +- components/external_storage/src/request.rs | 101 ------- components/raftstore-v2/Cargo.toml | 4 - components/raftstore/Cargo.toml | 6 +- components/sst_importer/Cargo.toml | 9 +- .../sst_importer/src/caching/storage_cache.rs | 4 +- components/sst_importer/src/sst_importer.rs | 62 +++-- components/sst_importer/src/util.rs | 2 +- components/test_backup/Cargo.toml | 8 +- components/test_backup/src/lib.rs | 2 +- tests/Cargo.toml | 7 +- tests/failpoints/cases/test_import_service.rs | 2 +- tests/integrations/backup/mod.rs | 2 +- tests/integrations/import/test_apply_log.rs | 2 +- tests/integrations/import/test_sst_service.rs | 2 +- tests/integrations/import/util.rs | 2 +- 40 files changed, 101 insertions(+), 1716 deletions(-) rename components/external_storage/{export => }/examples/scli.rs (75%) delete mode 100644 components/external_storage/export/Cargo.toml delete mode 100644 components/external_storage/export/src/bin/tikv-cloud-storage.rs delete mode 100644 components/external_storage/export/src/dylib.rs delete mode 100644 components/external_storage/export/src/grpc_service.rs delete mode 100644 components/external_storage/export/src/lib.rs delete mode 100644 components/external_storage/export/src/request.rs delete mode 100644 components/external_storage/src/dylib_client.rs rename components/external_storage/{export => }/src/export.rs (54%) delete mode 100644 components/external_storage/src/grpc_client.rs delete mode 100644 components/external_storage/src/request.rs diff --git a/Cargo.lock b/Cargo.lock index fccff7d7822..f222631d772 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -506,7 +506,6 @@ dependencies = [ "engine_traits", "error_code", "external_storage", - "external_storage_export", "file_system", "futures 0.3.15", "futures-util", @@ -560,7 +559,6 @@ dependencies = [ "error_code", "etcd-client", "external_storage", - "external_storage_export", "fail", "file_system", "futures 0.3.15", @@ -1835,44 +1833,6 @@ dependencies = [ [[package]] name = "external_storage" version = "0.0.1" -dependencies = [ - "async-compression", - "async-trait", - "bytes", - "encryption", - "engine_traits", - "fail", - "ffi-support", - "file_system", - "futures 0.3.15", - "futures-executor", - "futures-io", - "futures-util", - "grpcio", - "kvproto", - "lazy_static", - "libloading", - "matches", - "openssl", - "prometheus", - "protobuf", - "rand 0.8.5", - "rusoto_core", - "rust-ini", - "slog", - "slog-global", - "structopt", - "tempfile", - "tikv_alloc", - "tikv_util", - "tokio", - "tokio-util", - "url", -] - -[[package]] -name = "external_storage_export" -version = "0.0.1" dependencies = [ "async-compression", "async-trait", @@ -1881,30 +1841,23 @@ dependencies = [ "cloud", "encryption", "engine_traits", - "external_storage", - "ffi-support", "file_system", "futures 0.3.15", - "futures-executor", "futures-io", "futures-util", "gcp", - "grpcio", "kvproto", "lazy_static", - "libc 0.2.146", - "libloading", "matches", - "nix 0.24.1", - "once_cell", - "protobuf", + "openssl", + "prometheus", + "rand 0.8.5", "rust-ini", - "signal-hook", "slog", "slog-global", - "slog-term", "structopt", "tempfile", + "tikv_alloc", "tikv_util", "tokio", "tokio-util", @@ -1937,16 +1890,6 @@ dependencies = [ "instant", ] -[[package]] -name = "ffi-support" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f85d4d1be103c0b2d86968f0b0690dc09ac0ba205b90adb0389b552869e5000e" -dependencies = [ - "lazy_static", - "log", -] - [[package]] name = "file_system" version = "0.1.0" @@ -5689,7 +5632,7 @@ dependencies = [ "engine_test", "engine_traits", "error_code", - "external_storage_export", + "external_storage", "file_system", "futures 0.3.15", "futures-util", @@ -6004,7 +5947,7 @@ dependencies = [ "concurrency_manager", "crc64fast", "engine_traits", - "external_storage_export", + "external_storage", "file_system", "futures 0.3.15", "futures-executor", @@ -6259,7 +6202,7 @@ dependencies = [ "engine_test", "engine_traits", "error_code", - "external_storage_export", + "external_storage", "fail", "file_system", "futures 0.3.15", diff --git a/Cargo.toml b/Cargo.toml index bd2b4946950..32b2d858b6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,16 +22,14 @@ portable = ["engine_rocks/portable"] sse = ["engine_rocks/sse"] mem-profiling = ["tikv_alloc/mem-profiling"] failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", "engine_rocks/failpoints", "raft_log_engine/failpoints"] -cloud-aws = ["encryption_export/cloud-aws", "sst_importer/cloud-aws"] -cloud-gcp = ["encryption_export/cloud-gcp", "sst_importer/cloud-gcp"] -cloud-azure = ["encryption_export/cloud-azure", "sst_importer/cloud-azure"] +cloud-aws = ["encryption_export/cloud-aws"] +cloud-gcp = ["encryption_export/cloud-gcp"] +cloud-azure = ["encryption_export/cloud-azure"] testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport"] test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] test-engines-panic = ["engine_test/test-engines-panic"] -cloud-storage-grpc = ["sst_importer/cloud-storage-grpc"] -cloud-storage-dylib = ["sst_importer/cloud-storage-dylib"] pprof-fp = ["pprof/frame-pointer"] # for testing configure propegate to other crates @@ -239,7 +237,6 @@ members = [ # "components/engine_tirocks", "components/error_code", "components/external_storage", - "components/external_storage/export", "components/file_system", "components/into_other", "components/keys", @@ -314,7 +311,6 @@ engine_traits = { path = "components/engine_traits" } engine_traits_tests = { path = "components/engine_traits_tests", default-features = false } error_code = { path = "components/error_code" } external_storage = { path = "components/external_storage" } -external_storage_export = { path = "components/external_storage/export" } file_system = { path = "components/file_system" } gcp = { path = "components/cloud/gcp" } into_other = { path = "components/into_other" } diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index a36e72b3c64..0a630ebc023 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -17,18 +17,13 @@ mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] cloud-aws = [ "encryption_export/cloud-aws", - "backup/cloud-aws", ] cloud-gcp = [ "encryption_export/cloud-gcp", - "backup/cloud-gcp", ] cloud-azure = [ "encryption_export/cloud-azure", - "backup/cloud-azure", ] -cloud-storage-grpc = ["backup/cloud-storage-grpc"] -cloud-storage-dylib = ["backup/cloud-storage-dylib"] test-engine-kv-rocksdb = [ "tikv/test-engine-kv-rocksdb" ] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 8c1edc89a48..12979eab212 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -43,7 +43,6 @@ error_code = { workspace = true } # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"], optional = true } external_storage = { workspace = true } -external_storage_export = { workspace = true } fail = "0.5" file_system = { workspace = true } futures = "0.3" @@ -78,7 +77,7 @@ tikv_kv = { workspace = true } tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" -tokio-util = { version = "0.7", features = ["compat"] } +tokio-util = { version = "0.7", features = ["compat"] } tonic = { version = "0.8", optional = true } txn_types = { workspace = true } uuid = "0.8" diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 1786d513dc8..05f1a053392 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -14,8 +14,7 @@ use std::{ }; use engine_traits::{CfName, CF_DEFAULT, CF_LOCK, CF_WRITE}; -use external_storage::{BackendConfig, UnpinReader}; -use external_storage_export::{create_storage, ExternalStorage}; +use external_storage::{create_storage, BackendConfig, ExternalStorage, UnpinReader}; use futures::io::Cursor; use kvproto::{ brpb::{ @@ -1868,7 +1867,7 @@ mod tests { #[tokio::test] async fn test_do_flush() { let tmp_dir = tempfile::tempdir().unwrap(); - let backend = external_storage_export::make_local_backend(tmp_dir.path()); + let backend = external_storage::make_local_backend(tmp_dir.path()); let mut task_info = StreamBackupTaskInfo::default(); task_info.set_storage(backend); let stream_task = StreamTask { @@ -2241,7 +2240,7 @@ mod tests { async fn test_update_global_checkpoint() -> Result<()> { // create local storage let tmp_dir = tempfile::tempdir().unwrap(); - let backend = external_storage_export::make_local_backend(tmp_dir.path()); + let backend = external_storage::make_local_backend(tmp_dir.path()); // build a StreamTaskInfo let mut task_info = StreamBackupTaskInfo::default(); diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 225a88a3e8f..03b6e439879 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -5,12 +5,7 @@ edition = "2021" publish = false [features] -default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] -cloud-storage-grpc = ["external_storage_export/cloud-storage-grpc"] -cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] test-engine-kv-rocksdb = [ "tikv/test-engine-kv-rocksdb" ] @@ -45,7 +40,6 @@ engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } external_storage = { workspace = true } -external_storage_export = { workspace = true } file_system = { workspace = true } futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index a4efc162092..cc4d0bf0e28 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -12,8 +12,7 @@ use async_channel::SendError; use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; use concurrency_manager::ConcurrencyManager; use engine_traits::{name_to_cf, raw_ttl::ttl_current_ts, CfName, KvEngine, SstCompressionType}; -use external_storage::{BackendConfig, HdfsConfig}; -use external_storage_export::{create_storage, ExternalStorage}; +use external_storage::{create_storage, BackendConfig, ExternalStorage, HdfsConfig}; use futures::{channel::mpsc::*, executor::block_on}; use kvproto::{ brpb::*, @@ -1302,7 +1301,7 @@ pub mod tests { use api_version::{api_v2::RAW_KEY_PREFIX, dispatch_api_version, KvFormat, RawValue}; use collections::HashSet; use engine_traits::MiscExt; - use external_storage_export::{make_local_backend, make_noop_backend}; + use external_storage::{make_local_backend, make_noop_backend}; use file_system::{IoOp, IoRateLimiter, IoType}; use futures::{executor::block_on, stream::StreamExt}; use kvproto::metapb; diff --git a/components/backup/src/service.rs b/components/backup/src/service.rs index 237234c061e..8420b7ded9c 100644 --- a/components/backup/src/service.rs +++ b/components/backup/src/service.rs @@ -144,7 +144,7 @@ mod tests { use std::{sync::Arc, time::Duration}; use engine_rocks::RocksEngine; - use external_storage_export::make_local_backend; + use external_storage::make_local_backend; use tikv::storage::txn::tests::{must_commit, must_prewrite_put}; use tikv_util::worker::{dummy_scheduler, ReceiverWrapper}; use txn_types::TimeStamp; diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index 715c4f68291..dfbe36b60cf 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -7,7 +7,7 @@ use engine_traits::{ CfName, ExternalSstFileInfo, KvEngine, SstCompressionType, SstExt, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; -use external_storage_export::{ExternalStorage, UnpinReader}; +use external_storage::{ExternalStorage, UnpinReader}; use file_system::Sha256Reader; use futures_util::io::AllowStdIo; use kvproto::{ @@ -485,9 +485,8 @@ mod tests { .build() .unwrap(); let db = rocks.get_rocksdb(); - let backend = external_storage_export::make_local_backend(temp.path()); - let storage = - external_storage_export::create_storage(&backend, Default::default()).unwrap(); + let backend = external_storage::make_local_backend(temp.path()); + let storage = external_storage::create_storage(&backend, Default::default()).unwrap(); // Test empty file. let mut r = kvproto::metapb::Region::default(); diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 96031c91f06..73ddf479fd8 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -16,7 +16,7 @@ use futures_util::{ io::{AsyncRead, AsyncReadExt}, stream::TryStreamExt, }; -pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, S3 as InputConfig}; +pub use kvproto::brpb::{Bucket as InputBucket, S3 as InputConfig}; use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; use rusoto_s3::{util::AddressingStyle, *}; @@ -80,45 +80,6 @@ impl Config { } } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = BucketConf::from_cloud_dynamic(cloud_dynamic)?; - let attrs = &cloud_dynamic.attrs; - let def = &String::new(); - let force_path_style_str = attrs.get("force_path_style").unwrap_or(def).clone(); - let force_path_style = force_path_style_str == "true" || force_path_style_str == "True"; - let access_key_opt = attrs.get("access_key"); - let access_key_pair = if let Some(access_key) = access_key_opt { - let secret_access_key = attrs.get("secret_access_key").unwrap_or(def).clone(); - let session_token = attrs - .get("session_token") - .and_then(|x| StringNonEmpty::opt(x.to_string())); - Some(AccessKeyPair { - access_key: StringNonEmpty::required_field(access_key.clone(), "access_key")?, - secret_access_key: StringNonEmpty::required_field( - secret_access_key, - "secret_access_key", - )?, - session_token, - }) - } else { - None - }; - let storage_class = bucket.storage_class.clone(); - Ok(Config { - bucket, - storage_class, - sse: StringNonEmpty::opt(attrs.get("sse").unwrap_or(def).clone()), - acl: StringNonEmpty::opt(attrs.get("acl").unwrap_or(def).clone()), - access_key_pair, - force_path_style, - sse_kms_key_id: StringNonEmpty::opt(attrs.get("sse_kms_key_id").unwrap_or(def).clone()), - multi_part_size: MINIMUM_PART_SIZE, - object_lock_enabled: false, - role_arn: StringNonEmpty::opt(attrs.get("role_arn").unwrap_or(def).clone()), - external_id: StringNonEmpty::opt(attrs.get("external_id").unwrap_or(def).clone()), - }) - } - pub fn from_input(input: InputConfig) -> io::Result { let storage_class = StringNonEmpty::opt(input.storage_class); let endpoint = StringNonEmpty::opt(input.endpoint); @@ -185,10 +146,6 @@ impl S3Storage { Self::new(Config::from_input(input)?) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) - } - pub fn set_multi_part_size(&mut self, mut size: usize) { if size < MINIMUM_PART_SIZE { // default multi_part_size is 5MB, S3 cannot allow a smaller size. @@ -352,7 +309,7 @@ impl From> for UploadError { } } -/// try_read_exact tries to read exact length data as the buffer size. +/// try_read_exact tries to read exact length data as the buffer size. /// like [`std::io::Read::read_exact`], but won't return `UnexpectedEof` when /// cannot read anything more from the `Read`. once returning a size less than /// the buffer length, implies a EOF was meet, or nothing read. @@ -934,66 +891,6 @@ mod tests { ); } - #[test] - fn test_config_round_trip() { - let mut input = InputConfig::default(); - input.set_bucket("bucket".to_owned()); - input.set_prefix("backup 02/prefix/".to_owned()); - input.set_region("us-west-2".to_owned()); - let c1 = Config::from_input(input.clone()).unwrap(); - let c2 = Config::from_cloud_dynamic(&cloud_dynamic_from_input(input)).unwrap(); - assert_eq!(c1.bucket.bucket, c2.bucket.bucket); - assert_eq!(c1.bucket.prefix, c2.bucket.prefix); - assert_eq!(c1.bucket.region, c2.bucket.region); - assert_eq!( - c1.bucket.region, - StringNonEmpty::opt("us-west-2".to_owned()) - ); - } - - fn cloud_dynamic_from_input(mut s3: InputConfig) -> CloudDynamic { - let mut bucket = InputBucket::default(); - if !s3.endpoint.is_empty() { - bucket.endpoint = s3.take_endpoint(); - } - if !s3.region.is_empty() { - bucket.region = s3.take_region(); - } - if !s3.prefix.is_empty() { - bucket.prefix = s3.take_prefix(); - } - if !s3.storage_class.is_empty() { - bucket.storage_class = s3.take_storage_class(); - } - if !s3.bucket.is_empty() { - bucket.bucket = s3.take_bucket(); - } - let mut attrs = std::collections::HashMap::new(); - if !s3.sse.is_empty() { - attrs.insert("sse".to_owned(), s3.take_sse()); - } - if !s3.acl.is_empty() { - attrs.insert("acl".to_owned(), s3.take_acl()); - } - if !s3.access_key.is_empty() { - attrs.insert("access_key".to_owned(), s3.take_access_key()); - } - if !s3.secret_access_key.is_empty() { - attrs.insert("secret_access_key".to_owned(), s3.take_secret_access_key()); - } - if !s3.sse_kms_key_id.is_empty() { - attrs.insert("sse_kms_key_id".to_owned(), s3.take_sse_kms_key_id()); - } - if s3.force_path_style { - attrs.insert("force_path_style".to_owned(), "true".to_owned()); - } - let mut cd = CloudDynamic::default(); - cd.set_provider_name("aws".to_owned()); - cd.set_attrs(attrs); - cd.set_bucket(bucket); - cd - } - #[tokio::test] async fn test_try_read_exact() { use std::io::{self, Cursor, Read}; diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index d88020aa944..5a806c54faf 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -24,11 +24,8 @@ use futures_util::{ stream::StreamExt, TryStreamExt, }; -pub use kvproto::brpb::{ - AzureBlobStorage as InputConfig, AzureCustomerKey, Bucket as InputBucket, CloudDynamic, -}; +pub use kvproto::brpb::{AzureBlobStorage as InputConfig, AzureCustomerKey, Bucket as InputBucket}; use oauth2::{ClientId, ClientSecret}; -use openssl::sha::Sha256; use tikv_util::{ debug, stream::{retry, RetryError}, @@ -62,18 +59,6 @@ struct EncryptionCustomer { encryption_key_sha256: String, } -impl EncryptionCustomer { - fn new(encryption_key: &str) -> Self { - let mut hasher = Sha256::new(); - hasher.update(encryption_key.as_bytes()); - let encryption_key_sha256 = base64::encode(hasher.finish()); - EncryptionCustomer { - encryption_key: base64::encode(encryption_key), - encryption_key_sha256, - } - } -} - impl From for EncryptionCustomer { fn from(value: AzureCustomerKey) -> Self { EncryptionCustomer { @@ -164,28 +149,6 @@ impl Config { env::var(ENV_SHARED_KEY).ok().and_then(StringNonEmpty::opt) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = BucketConf::from_cloud_dynamic(cloud_dynamic)?; - let attrs = &cloud_dynamic.attrs; - let def = &String::new(); - - Ok(Config { - bucket, - account_name: StringNonEmpty::opt(attrs.get("account_name").unwrap_or(def).clone()), - shared_key: StringNonEmpty::opt(attrs.get("shared_key").unwrap_or(def).clone()), - sas_token: StringNonEmpty::opt(attrs.get("sas_token").unwrap_or(def).clone()), - credential_info: Self::load_credential_info(), - env_account_name: Self::load_env_account_name(), - env_shared_key: Self::load_env_shared_key(), - encryption_scope: StringNonEmpty::opt( - attrs.get("encryption_scope").unwrap_or(def).clone(), - ), - encryption_customer: attrs - .get("encryption_key") - .map(|encryption_key| EncryptionCustomer::new(encryption_key)), - }) - } - pub fn from_input(input: InputConfig) -> io::Result { let bucket = BucketConf { endpoint: StringNonEmpty::opt(input.endpoint), @@ -574,10 +537,6 @@ impl AzureStorage { }) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) - } - pub fn new(config: Config) -> io::Result { Self::check_config(&config)?; @@ -900,47 +859,6 @@ mod tests { assert_eq!(get_size, size); } - #[test] - fn test_config_round_trip() { - let mut input = InputConfig::default(); - input.set_bucket("bucket".to_owned()); - input.set_prefix("backup 02/prefix/".to_owned()); - input.set_account_name("user".to_owned()); - let c1 = Config::from_input(input.clone()).unwrap(); - let c2 = Config::from_cloud_dynamic(&cloud_dynamic_from_input(input)).unwrap(); - assert_eq!(c1.bucket.bucket, c2.bucket.bucket); - assert_eq!(c1.bucket.prefix, c2.bucket.prefix); - assert_eq!(c1.account_name, c2.account_name); - } - - fn cloud_dynamic_from_input(mut azure: InputConfig) -> CloudDynamic { - let mut bucket = InputBucket::default(); - if !azure.endpoint.is_empty() { - bucket.endpoint = azure.take_endpoint(); - } - if !azure.prefix.is_empty() { - bucket.prefix = azure.take_prefix(); - } - if !azure.storage_class.is_empty() { - bucket.storage_class = azure.take_storage_class(); - } - if !azure.bucket.is_empty() { - bucket.bucket = azure.take_bucket(); - } - let mut attrs = std::collections::HashMap::new(); - if !azure.account_name.is_empty() { - attrs.insert("account_name".to_owned(), azure.take_account_name()); - } - if !azure.shared_key.is_empty() { - attrs.insert("shared_key".to_owned(), azure.take_shared_key()); - } - let mut cd = CloudDynamic::default(); - cd.set_provider_name("azure".to_owned()); - cd.set_attrs(attrs); - cd.set_bucket(bucket); - cd - } - #[test] fn test_config_check() { { diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index c43e4e63969..56cd317c3f8 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -14,7 +14,7 @@ use futures_util::{ use http::HeaderValue; use hyper::{client::HttpConnector, Body, Client, Request, Response, StatusCode}; use hyper_tls::HttpsConnector; -pub use kvproto::brpb::{Bucket as InputBucket, CloudDynamic, Gcs as InputConfig}; +pub use kvproto::brpb::{Bucket as InputBucket, Gcs as InputConfig}; use tame_gcs::{ common::{PredefinedAcl, StorageClass}, objects::{InsertObjectOptional, Metadata, Object}, @@ -54,35 +54,6 @@ impl Config { io::Error::new(io::ErrorKind::InvalidInput, "missing credentials") } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = BucketConf::from_cloud_dynamic(cloud_dynamic)?; - let attrs = &cloud_dynamic.attrs; - let def = &String::new(); - let predefined_acl = parse_predefined_acl(attrs.get("predefined_acl").unwrap_or(def)) - .or_invalid_input("invalid predefined_acl")?; - let storage_class = parse_storage_class(&none_to_empty(bucket.storage_class.clone())) - .or_invalid_input("invalid storage_class")?; - - let credentials_blob_opt = StringNonEmpty::opt( - attrs - .get("credentials_blob") - .unwrap_or(&"".to_string()) - .to_string(), - ); - let svc_info = if let Some(cred) = credentials_blob_opt { - Some(deserialize_service_account_info(cred)?) - } else { - None - }; - - Ok(Config { - bucket, - predefined_acl, - svc_info, - storage_class, - }) - } - pub fn from_input(input: InputConfig) -> io::Result { let endpoint = StringNonEmpty::opt(input.endpoint); let bucket = BucketConf { @@ -241,10 +212,6 @@ impl GcsStorage { Self::new(Config::from_input(input)?) } - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - Self::new(Config::from_cloud_dynamic(cloud_dynamic)?) - } - /// Create a new GCS storage for the given config. pub fn new(config: Config) -> io::Result { let svc_access = if let Some(si) = &config.svc_info { @@ -618,17 +585,6 @@ mod tests { ); } - #[test] - fn test_config_round_trip() { - let mut input = InputConfig::default(); - input.set_bucket("bucket".to_owned()); - input.set_prefix("backup 02/prefix/".to_owned()); - let c1 = Config::from_input(input.clone()).unwrap(); - let c2 = Config::from_cloud_dynamic(&cloud_dynamic_from_input(input)).unwrap(); - assert_eq!(c1.bucket.bucket, c2.bucket.bucket); - assert_eq!(c1.bucket.prefix, c2.bucket.prefix); - } - enum ThrottleReadState { Spawning, Emitting, @@ -706,32 +662,4 @@ mod tests { assert_eq!(dst.len(), BENCH_READ_SIZE) }) } - - fn cloud_dynamic_from_input(mut gcs: InputConfig) -> CloudDynamic { - let mut bucket = InputBucket::default(); - if !gcs.endpoint.is_empty() { - bucket.endpoint = gcs.take_endpoint(); - } - if !gcs.prefix.is_empty() { - bucket.prefix = gcs.take_prefix(); - } - if !gcs.storage_class.is_empty() { - bucket.storage_class = gcs.take_storage_class(); - } - if !gcs.bucket.is_empty() { - bucket.bucket = gcs.take_bucket(); - } - let mut attrs = std::collections::HashMap::new(); - if !gcs.predefined_acl.is_empty() { - attrs.insert("predefined_acl".to_owned(), gcs.take_predefined_acl()); - } - if !gcs.credentials_blob.is_empty() { - attrs.insert("credentials_blob".to_owned(), gcs.take_credentials_blob()); - } - let mut cd = CloudDynamic::default(); - cd.set_provider_name("gcp".to_owned()); - cd.set_attrs(attrs); - cd.set_bucket(bucket); - cd - } } diff --git a/components/cloud/src/blob.rs b/components/cloud/src/blob.rs index 84ca77042d7..a0b5c26953c 100644 --- a/components/cloud/src/blob.rs +++ b/components/cloud/src/blob.rs @@ -4,7 +4,6 @@ use std::{io, marker::Unpin, pin::Pin, task::Poll}; use async_trait::async_trait; use futures_io::AsyncRead; -pub use kvproto::brpb::CloudDynamic; pub trait BlobConfig: 'static + Send + Sync { fn name(&self) -> &'static str; @@ -177,20 +176,6 @@ impl BucketConf { Ok(u) } } - - pub fn from_cloud_dynamic(cloud_dynamic: &CloudDynamic) -> io::Result { - let bucket = cloud_dynamic.bucket.clone().into_option().ok_or_else(|| { - io::Error::new(io::ErrorKind::Other, "Required field bucket is missing") - })?; - - Ok(Self { - endpoint: StringNonEmpty::opt(bucket.endpoint), - bucket: StringNonEmpty::required_field(bucket.bucket, "bucket")?, - prefix: StringNonEmpty::opt(bucket.prefix), - storage_class: StringNonEmpty::opt(bucket.storage_class), - region: StringNonEmpty::opt(bucket.region), - }) - } } pub fn none_to_empty(opt: Option) -> String { diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index aed49aad3ab..25a852f9d5c 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -4,39 +4,24 @@ version = "0.0.1" edition = "2021" publish = false -[features] -cloud-storage-dylib = [ - "ffi-support", - "libloading", - "protobuf", -] -cloud-storage-grpc = [ - "grpcio", -] -failpoints = ["fail/failpoints"] - [dependencies] async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } async-trait = "0.1" -bytes = "1.0" +aws = { workspace = true } +azure = { workspace = true } +cloud = { workspace = true } encryption = { workspace = true } engine_traits = { workspace = true } -fail = "0.5" -ffi-support = { optional = true, version = "0.4.2" } file_system = { workspace = true } futures = "0.3" -futures-executor = "0.3" futures-io = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } -grpcio = { workspace = true, optional = true } +gcp = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" -libloading = { optional = true, version = "0.7.0" } openssl = "0.10" prometheus = { version = "0.13", default-features = false, features = ["nightly", "push"] } -protobuf = { optional = true, version = "2" } rand = "0.8" -rusoto_core = "0.46.0" slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } @@ -52,3 +37,7 @@ rust-ini = "0.14.0" structopt = "0.3" tempfile = "3.1" tokio = { version = "1.5", features = ["macros"] } + +[[example]] +name = "scli" +path = "examples/scli.rs" diff --git a/components/external_storage/export/examples/scli.rs b/components/external_storage/examples/scli.rs similarity index 75% rename from components/external_storage/export/examples/scli.rs rename to components/external_storage/examples/scli.rs index 0ab54721b29..9621f840e6c 100644 --- a/components/external_storage/export/examples/scli.rs +++ b/components/external_storage/examples/scli.rs @@ -6,19 +6,13 @@ use std::{ path::Path, }; -#[cfg(feature = "cloud-azure")] -use external_storage_export::make_azblob_backend; -#[cfg(feature = "cloud-gcp")] -use external_storage_export::make_gcs_backend; -#[cfg(feature = "cloud-aws")] -use external_storage_export::make_s3_backend; -use external_storage_export::{ - create_storage, make_cloud_backend, make_hdfs_backend, make_local_backend, make_noop_backend, - ExternalStorage, UnpinReader, +use external_storage::{ + create_storage, make_azblob_backend, make_gcs_backend, make_hdfs_backend, make_local_backend, + make_noop_backend, make_s3_backend, ExternalStorage, UnpinReader, }; use futures_util::io::{copy, AllowStdIo}; use ini::ini::Ini; -use kvproto::brpb::{AzureBlobStorage, Bucket, CloudDynamic, Gcs, StorageBackend, S3}; +use kvproto::brpb::{AzureBlobStorage, Gcs, StorageBackend, S3}; use structopt::{clap::arg_enum, StructOpt}; use tikv_util::stream::block_on_external_io; use tokio::runtime::Runtime; @@ -32,7 +26,6 @@ arg_enum! { S3, GCS, Azure, - Cloud, } } @@ -67,8 +60,6 @@ pub struct Opt { /// Remote path prefix #[structopt(short = "x", long)] prefix: Option, - #[structopt(long)] - cloud_name: Option, #[structopt(subcommand)] command: Command, } @@ -82,35 +73,6 @@ enum Command { Load, } -fn create_cloud_storage(opt: &Opt) -> Result { - let mut bucket = Bucket::default(); - if let Some(endpoint) = &opt.endpoint { - bucket.endpoint = endpoint.to_string(); - } - if let Some(region) = &opt.region { - bucket.region = region.to_string(); - } - if let Some(bucket_name) = &opt.bucket { - bucket.bucket = bucket_name.to_string(); - } else { - return Err(Error::new(ErrorKind::Other, "missing bucket")); - } - if let Some(prefix) = &opt.prefix { - bucket.prefix = prefix.to_string(); - } - let mut config = CloudDynamic::default(); - config.set_bucket(bucket); - let mut attrs = std::collections::HashMap::new(); - if let Some(credential_file) = &opt.credential_file { - attrs.insert("credential_file".to_owned(), credential_file.clone()); - } - config.set_attrs(attrs); - if let Some(cloud_name) = &opt.cloud_name { - config.provider_name = cloud_name.clone(); - } - Ok(make_cloud_backend(config)) -} - fn create_s3_storage(opt: &Opt) -> Result { let mut config = S3::default(); @@ -150,10 +112,7 @@ fn create_s3_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - #[cfg(feature = "cloud-aws")] - return Ok(make_s3_backend(config)); - #[cfg(not(feature = "cloud-aws"))] - return Err(Error::new(ErrorKind::Other, "missing feature")); + Ok(make_s3_backend(config)) } fn create_gcs_storage(opt: &Opt) -> Result { @@ -173,10 +132,7 @@ fn create_gcs_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - #[cfg(feature = "cloud-gcp")] - return Ok(make_gcs_backend(config)); - #[cfg(not(feature = "cloud-gcp"))] - return Err(Error::new(ErrorKind::Other, "missing feature")); + Ok(make_gcs_backend(config)) } fn create_azure_storage(opt: &Opt) -> Result { @@ -212,10 +168,7 @@ fn create_azure_storage(opt: &Opt) -> Result { if let Some(prefix) = &opt.prefix { config.prefix = prefix.to_string(); } - #[cfg(feature = "cloud-azure")] - return Ok(make_azblob_backend(config)); - #[cfg(not(feature = "cloud-azure"))] - return Err(Error::new(ErrorKind::Other, "missing feature")); + Ok(make_azblob_backend(config)) } fn process() -> Result<()> { @@ -228,7 +181,6 @@ fn process() -> Result<()> { StorageType::S3 => create_s3_storage(&opt)?, StorageType::GCS => create_gcs_storage(&opt)?, StorageType::Azure => create_azure_storage(&opt)?, - StorageType::Cloud => create_cloud_storage(&opt)?, }), Default::default(), )?; diff --git a/components/external_storage/export/Cargo.toml b/components/external_storage/export/Cargo.toml deleted file mode 100644 index 6537eaf8995..00000000000 --- a/components/external_storage/export/Cargo.toml +++ /dev/null @@ -1,96 +0,0 @@ -[package] -name = "external_storage_export" -version = "0.0.1" -edition = "2021" -publish = false - -[[bin]] -name = "tikv-cloud-storage" -path = "src/bin/tikv-cloud-storage.rs" -required-features = ["cloud-storage-grpc"] - -[lib] -name = "external_storage_export" -# Experimental feature to load the cloud storage code dynamically -# crate-type = ["lib", "cdylib"] - -[features] -default = ["cloud-gcp", "cloud-aws", "cloud-azure"] -cloud-aws = ["aws"] -cloud-gcp = ["gcp"] -cloud-azure = ["azure"] -cloud-storage-dylib = [ - "external_storage/cloud-storage-dylib", - "ffi-support", - "file_system", - "futures", - "libloading", - "lazy_static", - "once_cell", - "protobuf", - "slog", - "slog-global", - "tokio", - "tokio-util", -] -cloud-storage-grpc = [ - "external_storage/cloud-storage-grpc", - "grpcio", - "file_system", - "futures", - "futures-executor", - "libc", - "signal-hook", - "slog", - "slog-global", - "slog-term", - "tokio", - "tokio-util", -] - -[dependencies] -async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } -async-trait = "0.1" -aws = { optional = true, workspace = true } -azure = { optional = true, workspace = true } -cloud = { workspace = true } -encryption = { workspace = true } -engine_traits = { workspace = true } -external_storage = { workspace = true } -ffi-support = { optional = true, version = "0.4.2" } -file_system = { workspace = true, optional = true } -futures = { optional = true, version = "0.3" } -futures-executor = { optional = true, version = "0.3" } -futures-io = { version = "0.3" } -futures-util = { version = "0.3", default-features = false, features = ["io"] } -gcp = { optional = true, workspace = true } -grpcio = { workspace = true, optional = true } -kvproto = { workspace = true } -lazy_static = { optional = true, version = "1.3" } -libloading = { optional = true, version = "0.7.0" } -once_cell = { optional = true, version = "1.3.1" } -protobuf = { optional = true, version = "2" } -slog-global = { optional = true, workspace = true } -tikv_util = { workspace = true } -tokio = { version = "1.5", features = ["time", "rt", "net"], optional = true } -tokio-util = { version = "0.7", features = ["compat"], optional = true } -url = "2.0" - -[dev-dependencies] -futures-util = { version = "0.3", default-features = false, features = ["io"] } -matches = "0.1.8" -rust-ini = "0.14.0" -structopt = "0.3" -tempfile = "3.1" -tokio = { version = "1.5", features = ["time"] } - -[[example]] -name = "scli" -path = "examples/scli.rs" - -[target.'cfg(unix)'.dependencies] -nix = { optional = true, version = "0.24" } -signal-hook = { optional = true, version = "0.3" } -libc = { optional = true, version = "0.2" } -slog = { optional = true, version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-term = { optional = true, version = "2.4" } diff --git a/components/external_storage/export/src/bin/tikv-cloud-storage.rs b/components/external_storage/export/src/bin/tikv-cloud-storage.rs deleted file mode 100644 index 07cd8507948..00000000000 --- a/components/external_storage/export/src/bin/tikv-cloud-storage.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use external_storage_export::new_service; -use grpcio::{self}; -use slog::{self}; -use slog_global::{info, warn}; -use tikv_util::logger::{self}; - -fn build_logger(drainer: D, log_level: slog::Level) -where - D: slog::Drain + Send + 'static, - ::Err: std::fmt::Display, -{ - // use async drainer and init std log. - logger::init_log(drainer, log_level, true, true, vec![], 100).unwrap_or_else(|e| { - println!("failed to initialize log: {}", e); - }); -} - -fn main() { - println!("starting GRPC cloud-storage service"); - let decorator = slog_term::PlainDecorator::new(std::io::stdout()); - let drain = slog_term::CompactFormat::new(decorator).build(); - build_logger(drain, slog::Level::Debug); - warn!("redirect grpcio logging"); - grpcio::redirect_log(); - info!("slog logging"); - let service = new_service().expect("GRPC service creation for tikv-cloud-storage"); - wait::for_signal(); - info!("service {:?}", service); -} - -#[cfg(unix)] -mod wait { - use libc::c_int; - use signal_hook::{ - consts::{SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2}, - iterator::Signals, - Signals, - }; - use slog_global::info; - - pub fn for_signal() { - let mut signals = Signals::new(&[SIGTERM, SIGINT, SIGHUP]).unwrap(); - for signal in &mut signals { - match signal { - SIGTERM | SIGINT | SIGHUP => { - info!("receive signal {}, stopping server...", signal); - break; - } - // TODO: handle more signals - _ => unreachable!(), - } - } - } -} - -#[cfg(not(unix))] -mod wait { - pub fn for_signal() {} -} diff --git a/components/external_storage/export/src/dylib.rs b/components/external_storage/export/src/dylib.rs deleted file mode 100644 index 308973de95e..00000000000 --- a/components/external_storage/export/src/dylib.rs +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::sync::Mutex; - -use anyhow::Context; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use lazy_static::lazy_static; -use once_cell::sync::OnceCell; -use protobuf::{self}; -use slog_global::{error, info}; -use tokio::runtime::{Builder, Runtime}; - -use crate::request::{restore_receiver, write_receiver}; - -static RUNTIME: OnceCell = OnceCell::new(); -lazy_static! { - static ref RUNTIME_INIT: Mutex<()> = Mutex::new(()); -} - -/// # Safety -/// Deref data pointer, thus unsafe -#[no_mangle] -pub extern "C" fn external_storage_init(error: &mut ffi_support::ExternError) { - ffi_support::call_with_result(error, || { - (|| -> anyhow::Result<()> { - let guarded = RUNTIME_INIT.lock().unwrap(); - if RUNTIME.get().is_some() { - return Ok(()); - } - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-dylib") - .core_threads(1) - .enable_all() - .build() - .context("build runtime")?; - if RUNTIME.set(runtime).is_err() { - error!("runtime already set") - } - #[allow(clippy::unit_arg)] - Ok(*guarded) - })() - .context("external_storage_init") - .map_err(anyhow_to_extern_err) - }) -} - -/// # Safety -/// Deref data pointer, thus unsafe -#[no_mangle] -pub unsafe extern "C" fn external_storage_write( - data: *const u8, - len: i32, - error: &mut ffi_support::ExternError, -) { - ffi_support::call_with_result(error, || { - (|| -> anyhow::Result<()> { - let runtime = RUNTIME - .get() - .context("must first call external_storage_init")?; - let buffer = get_buffer(data, len); - let req: proto::ExternalStorageWriteRequest = protobuf::parse_from_bytes(buffer)?; - info!("write request {:?}", req.get_object_name()); - write_receiver(&runtime, req) - })() - .context("external_storage_write") - .map_err(anyhow_to_extern_err) - }) -} - -/// # Safety -/// Deref data pointer, thus unsafe -pub unsafe extern "C" fn external_storage_restore( - data: *const u8, - len: i32, - error: &mut ffi_support::ExternError, -) { - ffi_support::call_with_result(error, || { - (|| -> anyhow::Result<()> { - let runtime = RUNTIME - .get() - .context("must first call external_storage_init")?; - let buffer = get_buffer(data, len); - let req: proto::ExternalStorageRestoreRequest = protobuf::parse_from_bytes(buffer)?; - info!("restore request {:?}", req.get_object_name()); - Ok(restore_receiver(runtime, req)?) - })() - .context("external_storage_restore") - .map_err(anyhow_to_extern_err) - }) -} - -unsafe fn get_buffer<'a>(data: *const u8, len: i32) -> &'a [u8] { - assert!(len >= 0, "Bad buffer len: {}", len); - if len == 0 { - // This will still fail, but as a bad protobuf format. - &[] - } else { - assert!(!data.is_null(), "Unexpected null data pointer"); - std::slice::from_raw_parts(data, len as usize) - } -} - -fn anyhow_to_extern_err(e: anyhow::Error) -> ffi_support::ExternError { - ffi_support::ExternError::new_error(ffi_support::ErrorCode::new(1), format!("{:?}", e)) -} - -pub mod staticlib { - use std::{ - io::{self}, - sync::Arc, - }; - - use external_storage::{ - dylib_client::extern_to_io_err, - request::{ - anyhow_to_io_log_error, file_name_for_write, restore_sender, write_sender, DropPath, - }, - ExternalStorage, - }; - use futures_io::AsyncRead; - use protobuf::Message; - use tikv_util::time::Limiter; - - use super::*; - - struct ExternalStorageClient { - backend: Backend, - runtime: Arc, - name: &'static str, - url: url::Url, - } - - pub fn new_client( - backend: Backend, - name: &'static str, - url: url::Url, - ) -> io::Result> { - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-dylib-client") - .core_threads(1) - .enable_all() - .build()?; - external_storage_init_ffi()?; - Ok(Box::new(ExternalStorageClient { - runtime: Arc::new(runtime), - backend, - name, - url, - }) as _) - } - - impl ExternalStorage for ExternalStorageClient { - fn name(&self) -> &'static str { - self.name - } - - fn url(&self) -> io::Result { - Ok(self.url.clone()) - } - - fn write( - &self, - name: &str, - reader: Box, - content_length: u64, - ) -> io::Result<()> { - info!("external storage writing"); - (|| -> anyhow::Result<()> { - let file_path = file_name_for_write(&self.name, &name); - let req = write_sender( - &self.runtime, - self.backend.clone(), - file_path.clone(), - name, - reader, - content_length, - )?; - let bytes = req.write_to_bytes()?; - info!("write request"); - external_storage_write_ffi(bytes)?; - DropPath(file_path); - Ok(()) - })() - .context("external storage write") - .map_err(anyhow_to_io_log_error) - } - - fn read(&self, _name: &str) -> crate::ExternalData<'_> { - unimplemented!("use restore instead of read") - } - - fn restore( - &self, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - speed_limiter: &Limiter, - ) -> io::Result<()> { - info!("external storage restore"); - let req = restore_sender( - self.backend.clone(), - storage_name, - restore_name, - expected_length, - speed_limiter, - )?; - let bytes = req.write_to_bytes()?; - external_storage_restore_ffi(bytes) - } - } - - fn external_storage_write_ffi(bytes: Vec) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - external_storage_write(bytes.as_ptr(), bytes.len() as i32, &mut e); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - Err(extern_to_io_err(e)) - } else { - Ok(()) - } - } - - fn external_storage_restore_ffi(bytes: Vec) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - external_storage_restore(bytes.as_ptr(), bytes.len() as i32, &mut e); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - Err(extern_to_io_err(e)) - } else { - Ok(()) - } - } - - fn external_storage_init_ffi() -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - external_storage_init(&mut e); - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - return Err(extern_to_io_err(e)); - } - Ok(()) - } -} diff --git a/components/external_storage/export/src/grpc_service.rs b/components/external_storage/export/src/grpc_service.rs deleted file mode 100644 index 7ef2bd093d1..00000000000 --- a/components/external_storage/export/src/grpc_service.rs +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - io::{self, ErrorKind}, - sync::Arc, -}; - -use anyhow::Context; -use external_storage::request::anyhow_to_io_log_error; -use grpcio::{self}; -use kvproto::brpb as proto; -use slog_global::{error, info}; -use tokio::runtime::{Builder, Runtime}; - -use crate::request::{restore_receiver, write_receiver}; - -#[derive(Debug)] -pub struct SocketService { - server: grpcio::Server, - listener: std::os::unix::net::UnixListener, -} - -pub fn new_service() -> io::Result { - (|| -> anyhow::Result { - let env = Arc::new(grpcio::EnvBuilder::new().build()); - let storage_service = Service::new().context("new storage service")?; - let builder = grpcio::ServerBuilder::new(env) - .register_service(proto::create_external_storage(storage_service)); - let grpc_socket_path = "/tmp/grpc-external-storage.sock"; - let socket_addr = format!("unix:{}", grpc_socket_path); - let socket_path = std::path::PathBuf::from(grpc_socket_path); - // Keep the listener in scope: otherwise the socket is destroyed - let listener = bind_socket(&socket_path).context("GRPC new service create socket")?; - let mut server = builder - .bind(socket_addr, 0) - .build() - .context("GRPC build server")?; - server.start(); - let (..) = server.bind_addrs().next().context("GRPC bind server")?; - Ok(SocketService { server, listener }) - })() - .context("new service") - .map_err(anyhow_to_io_log_error) -} - -/// Service handles the RPC messages for the `ExternalStorage` service. -#[derive(Clone)] -pub struct Service { - runtime: Arc, -} - -impl Service { - /// Create a new backup service. - pub fn new() -> io::Result { - let runtime = Arc::new( - Builder::new() - .basic_scheduler() - .thread_name("external-storage-grpc-service") - .core_threads(1) - .enable_all() - .build()?, - ); - Ok(Service { runtime }) - } -} - -impl proto::ExternalStorage for Service { - fn save( - &mut self, - _ctx: grpcio::RpcContext, - req: proto::ExternalStorageWriteRequest, - sink: grpcio::UnarySink, - ) { - info!("write request {:?}", req.get_object_name()); - let result = write_receiver(&self.runtime, req); - match result { - Ok(_) => { - let rsp = proto::ExternalStorageWriteResponse::default(); - info!("success write"); - sink.success(rsp); - } - Err(e) => { - error!("write {}", e); - sink.fail(make_rpc_error(anyhow_to_io_log_error(e))); - } - } - } - - fn restore( - &mut self, - _ctx: grpcio::RpcContext, - req: proto::ExternalStorageRestoreRequest, - sink: grpcio::UnarySink, - ) { - info!( - "restore request {:?} {:?}", - req.get_object_name(), - req.get_restore_name() - ); - let result = restore_receiver(&self.runtime, req); - match result { - Ok(_) => { - let rsp = proto::ExternalStorageRestoreResponse::default(); - info!("success restore"); - sink.success(rsp); - } - Err(e) => { - error!("restore {}", e); - sink.fail(make_rpc_error(e)); - } - } - } -} - -pub fn make_rpc_error(err: io::Error) -> grpcio::RpcStatus { - grpcio::RpcStatus::new( - match err.kind() { - ErrorKind::NotFound => grpcio::RpcStatusCode::NOT_FOUND, - ErrorKind::InvalidInput => grpcio::RpcStatusCode::INVALID_ARGUMENT, - ErrorKind::PermissionDenied => grpcio::RpcStatusCode::UNAUTHENTICATED, - _ => grpcio::RpcStatusCode::UNKNOWN, - }, - Some(format!("{:?}", err)), - ) -} - -fn bind_socket(socket_path: &std::path::Path) -> anyhow::Result { - let msg = format!("bind socket {:?}", &socket_path); - info!("{}", msg); - std::os::unix::net::UnixListener::bind(&socket_path).context(msg) -} diff --git a/components/external_storage/export/src/lib.rs b/components/external_storage/export/src/lib.rs deleted file mode 100644 index e04e5beb695..00000000000 --- a/components/external_storage/export/src/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -mod export; -pub use export::*; - -#[cfg(feature = "cloud-storage-grpc")] -mod grpc_service; -#[cfg(feature = "cloud-storage-grpc")] -pub use grpc_service::new_service; - -#[cfg(feature = "cloud-storage-dylib")] -mod dylib; - -#[cfg(any(feature = "cloud-storage-grpc", feature = "cloud-storage-dylib"))] -mod request; diff --git a/components/external_storage/export/src/request.rs b/components/external_storage/export/src/request.rs deleted file mode 100644 index 5623c0732d7..00000000000 --- a/components/external_storage/export/src/request.rs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::io::{self}; - -use anyhow::Context; -use external_storage::request::file_name_for_write; -use file_system::File; -use futures::executor::block_on; -use futures_io::AsyncRead; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use slog_global::info; -use tikv_util::time::Limiter; -use tokio::runtime::Runtime; -use tokio_util::compat::Tokio02AsyncReadCompatExt; - -use crate::export::{create_storage_no_client, read_external_storage_into_file, ExternalStorage}; - -pub fn write_receiver( - runtime: &Runtime, - req: proto::ExternalStorageWriteRequest, -) -> anyhow::Result<()> { - let storage_backend = req.get_storage_backend(); - let object_name = req.get_object_name(); - let content_length = req.get_content_length(); - let storage = create_storage_no_client(storage_backend).context("create storage")?; - let file_path = file_name_for_write(storage.name(), object_name); - let reader = runtime - .enter(|| block_on(open_file_as_async_read(file_path))) - .context("open file")?; - storage - .write(object_name, reader, content_length) - .context("storage write") -} - -pub fn restore_receiver( - runtime: &Runtime, - req: proto::ExternalStorageRestoreRequest, -) -> io::Result<()> { - let object_name = req.get_object_name(); - let storage_backend = req.get_storage_backend(); - let file_name = std::path::PathBuf::from(req.get_restore_name()); - let expected_length = req.get_content_length(); - runtime.enter(|| { - block_on(restore_inner( - storage_backend, - object_name, - file_name, - expected_length, - )) - }) -} - -pub async fn restore_inner( - storage_backend: &proto::StorageBackend, - object_name: &str, - file_name: std::path::PathBuf, - expected_length: u64, -) -> io::Result<()> { - let storage = create_storage_no_client(&storage_backend)?; - // TODO: support encryption. The service must be launched with or sent a - // DataKeyManager - let output: &mut dyn io::Write = &mut File::create(file_name)?; - // the minimum speed of reading data, in bytes/second. - // if reading speed is slower than this rate, we will stop with - // a "TimedOut" error. - // (at 8 KB/s for a 2 MB buffer, this means we timeout after 4m16s.) - const MINIMUM_READ_SPEED: usize = 8192; - let limiter = Limiter::new(f64::INFINITY); - let x = read_external_storage_into_file( - &mut storage.read(object_name), - output, - &limiter, - expected_length, - None, - MINIMUM_READ_SPEED, - ) - .await; - x -} - -async fn open_file_as_async_read( - file_path: std::path::PathBuf, -) -> anyhow::Result> { - info!("open file {:?}", &file_path); - let f = tokio::fs::File::open(file_path) - .await - .context("open file")?; - let reader: Box = Box::new(Box::pin(f.compat())); - Ok(reader) -} diff --git a/components/external_storage/src/dylib_client.rs b/components/external_storage/src/dylib_client.rs deleted file mode 100644 index 9e2748c2011..00000000000 --- a/components/external_storage/src/dylib_client.rs +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - io::{self, ErrorKind}, - sync::Arc, -}; - -use anyhow::Context; -use futures_io::AsyncRead; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use protobuf::{self, Message}; -use slog_global::info; -use tikv_util::time::Limiter; -use tokio::runtime::{Builder, Runtime}; - -use crate::{ - request::{ - anyhow_to_io_log_error, file_name_for_write, restore_sender, write_sender, DropPath, - }, - ExternalStorage, -}; - -struct ExternalStorageClient { - backend: Backend, - runtime: Arc, - library: libloading::Library, - name: &'static str, - url: url::Url, -} - -pub fn new_client( - backend: Backend, - name: &'static str, - url: url::Url, -) -> io::Result> { - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-dylib-client") - .core_threads(1) - .enable_all() - .build()?; - let library = unsafe { - libloading::Library::new( - std::path::Path::new("./") - .join(libloading::library_filename("external_storage_export")), - ) - .map_err(libloading_err_to_io)? - }; - external_storage_init_ffi_dynamic(&library)?; - Ok(Box::new(ExternalStorageClient { - runtime: Arc::new(runtime), - backend, - library, - name, - url, - }) as _) -} - -impl ExternalStorage for ExternalStorageClient { - fn name(&self) -> &'static str { - self.name - } - - fn url(&self) -> io::Result { - Ok(self.url.clone()) - } - - fn write( - &self, - name: &str, - reader: Box, - content_length: u64, - ) -> io::Result<()> { - info!("external storage writing"); - (|| -> anyhow::Result<()> { - let file_path = file_name_for_write(&self.name, &name); - let req = write_sender( - &self.runtime, - self.backend.clone(), - file_path.clone(), - name, - reader, - content_length, - )?; - let bytes = req.write_to_bytes()?; - info!("write request"); - call_ffi_dynamic(&self.library, b"external_storage_write", bytes)?; - DropPath(file_path); - Ok(()) - })() - .context("external storage write") - .map_err(anyhow_to_io_log_error) - } - - fn read(&self, _name: &str) -> crate::ExternalData<'_> { - unimplemented!("use restore instead of read") - } - - fn restore( - &self, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - speed_limiter: &Limiter, - ) -> io::Result<()> { - info!("external storage restore"); - let req = restore_sender( - self.backend.clone(), - storage_name, - restore_name, - expected_length, - speed_limiter, - )?; - let bytes = req.write_to_bytes()?; - call_ffi_dynamic(&self.library, b"external_storage_restore", bytes) - } -} - -pub fn extern_to_io_err(e: ffi_support::ExternError) -> io::Error { - io::Error::new(io::ErrorKind::Other, format!("{:?}", e)) -} - -type FfiInitFn<'a> = - libloading::Symbol<'a, unsafe extern "C" fn(error: &mut ffi_support::ExternError) -> ()>; -type FfiFn<'a> = libloading::Symbol< - 'a, - unsafe extern "C" fn(error: &mut ffi_support::ExternError, bytes: Vec) -> (), ->; - -fn external_storage_init_ffi_dynamic(library: &libloading::Library) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - let func: FfiInitFn = library - .get(b"external_storage_init") - .map_err(libloading_err_to_io)?; - func(&mut e); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - return Err(extern_to_io_err(e)); - } - Ok(()) -} - -fn call_ffi_dynamic( - library: &libloading::Library, - fn_name: &[u8], - bytes: Vec, -) -> io::Result<()> { - let mut e = ffi_support::ExternError::default(); - unsafe { - let func: FfiFn = library.get(fn_name).map_err(libloading_err_to_io)?; - func(&mut e, bytes); - } - if e.get_code() != ffi_support::ErrorCode::SUCCESS { - return Err(extern_to_io_err(e)); - } - Ok(()) -} - -fn libloading_err_to_io(e: libloading::Error) -> io::Error { - // TODO: custom error type - let kind = match e { - libloading::Error::DlOpen { .. } | libloading::Error::DlOpenUnknown => { - ErrorKind::AddrNotAvailable - } - _ => ErrorKind::Other, - }; - io::Error::new(kind, format!("{}", e)) -} diff --git a/components/external_storage/export/src/export.rs b/components/external_storage/src/export.rs similarity index 54% rename from components/external_storage/export/src/export.rs rename to components/external_storage/src/export.rs index ad31dc363ae..5b69a793c12 100644 --- a/components/external_storage/export/src/export.rs +++ b/components/external_storage/src/export.rs @@ -1,41 +1,23 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -//! To use External storage with protobufs as an application, import this -//! module. external_storage contains the actual library code -//! Cloud provider backends are under components/cloud use std::{io, path::Path, sync::Arc}; use async_trait::async_trait; -#[cfg(feature = "cloud-aws")] pub use aws::{Config as S3Config, S3Storage}; -#[cfg(feature = "cloud-azure")] pub use azure::{AzureStorage, Config as AzureConfig}; -#[cfg(any(feature = "cloud-storage-dylib", feature = "cloud-storage-grpc"))] -use cloud::blob::BlobConfig; use cloud::blob::{BlobStorage, PutResource}; use encryption::DataKeyManager; -#[cfg(feature = "cloud-storage-dylib")] -use external_storage::dylib_client; -#[cfg(feature = "cloud-storage-grpc")] -use external_storage::grpc_client; -pub use external_storage::{ - compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_info_buff, - read_external_storage_into_file, record_storage_create, BackendConfig, ExternalData, - ExternalStorage, HdfsStorage, LocalStorage, NoopStorage, RestoreConfig, UnpinReader, - MIN_READ_SPEED, +use gcp::GcsStorage; +use kvproto::brpb::{ + AzureBlobStorage, Gcs, Noop, StorageBackend, StorageBackend_oneof_backend as Backend, S3, }; -#[cfg(feature = "cloud-gcp")] -pub use gcp::{Config as GcsConfig, GcsStorage}; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -#[cfg(any(feature = "cloud-gcp", feature = "cloud-aws", feature = "cloud-azure"))] -use kvproto::brpb::{AzureBlobStorage, Gcs, S3}; -use kvproto::brpb::{CloudDynamic, Noop, StorageBackend}; use tikv_util::time::{Instant, Limiter}; -#[cfg(feature = "cloud-storage-dylib")] -use tikv_util::warn; -#[cfg(feature = "cloud-storage-dylib")] -use crate::dylib; +use crate::{ + compression_reader_dispatcher, encrypt_wrap_reader, read_external_storage_into_file, + record_storage_create, BackendConfig, ExternalData, ExternalStorage, HdfsStorage, LocalStorage, + NoopStorage, RestoreConfig, UnpinReader, +}; pub fn create_storage( storage_backend: &StorageBackend, @@ -48,20 +30,6 @@ pub fn create_storage( } } -// when the flag cloud-storage-dylib or cloud-storage-grpc is set create_storage -// is automatically wrapped with a client This function is used by the -// library/server to avoid any wrapping -pub fn create_storage_no_client( - storage_backend: &StorageBackend, - config: BackendConfig, -) -> io::Result> { - if let Some(backend) = &storage_backend.backend { - create_backend_inner(backend, config) - } else { - Err(bad_storage_backend(storage_backend)) - } -} - fn bad_storage_backend(storage_backend: &StorageBackend) -> io::Error { io::Error::new( io::ErrorKind::NotFound, @@ -77,93 +45,11 @@ fn bad_backend(backend: Backend) -> io::Error { bad_storage_backend(&storage_backend) } -#[cfg(any(feature = "cloud-gcp", feature = "cloud-aws", feature = "cloud-azure"))] fn blob_store(store: Blob) -> Box { Box::new(BlobStore::new(store)) as Box } -#[cfg(feature = "cloud-storage-grpc")] -pub fn create_backend(backend: &Backend) -> io::Result> { - match create_config(backend) { - Some(config) => { - let conf = config?; - grpc_client::new_client(backend.clone(), conf.name(), conf.url()?) - } - None => Err(bad_backend(backend.clone())), - } -} - -#[cfg(feature = "cloud-storage-dylib")] -pub fn create_backend(backend: &Backend) -> io::Result> { - match create_config(backend) { - Some(config) => { - let conf = config?; - let r = dylib_client::new_client(backend.clone(), conf.name(), conf.url()?); - match r { - Err(e) if e.kind() == io::ErrorKind::AddrNotAvailable => { - warn!("could not open dll for external_storage_export"); - dylib::staticlib::new_client(backend.clone(), conf.name(), conf.url()?) - } - _ => r, - } - } - None => Err(bad_backend(backend.clone())), - } -} - -#[cfg(all( - not(feature = "cloud-storage-grpc"), - not(feature = "cloud-storage-dylib") -))] -pub fn create_backend( - backend: &Backend, - config: BackendConfig, -) -> io::Result> { - create_backend_inner(backend, config) -} - -#[cfg(any(feature = "cloud-storage-dylib", feature = "cloud-storage-grpc"))] -fn create_config(backend: &Backend) -> Option>> { - match backend { - #[cfg(feature = "cloud-aws")] - Backend::S3(config) => { - let conf = S3Config::from_input(config.clone()); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-gcp")] - Backend::Gcs(config) => { - let conf = GcsConfig::from_input(config.clone()); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-azure")] - Backend::AzureBlobStorage(config) => { - let conf = AzureConfig::from_input(config.clone()); - Some(conf.map(|c| Box::new(c) as Box)) - } - Backend::CloudDynamic(dyn_backend) => match dyn_backend.provider_name.as_str() { - #[cfg(feature = "cloud-aws")] - "aws" | "s3" => { - let conf = S3Config::from_cloud_dynamic(&dyn_backend); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-gcp")] - "gcp" | "gcs" => { - let conf = GcsConfig::from_cloud_dynamic(&dyn_backend); - Some(conf.map(|c| Box::new(c) as Box)) - } - #[cfg(feature = "cloud-azure")] - "azure" | "azblob" => { - let conf = AzureConfig::from_cloud_dynamic(&dyn_backend); - Some(conf.map(|c| Box::new(c) as Box)) - } - _ => None, - }, - _ => None, - } -} - -/// Create a new storage from the given storage backend description. -fn create_backend_inner( +fn create_backend( backend: &Backend, backend_config: BackendConfig, ) -> io::Result> { @@ -176,30 +62,18 @@ fn create_backend_inner( Backend::Hdfs(hdfs) => { Box::new(HdfsStorage::new(&hdfs.remote, backend_config.hdfs_config)?) } - Backend::Noop(_) => { - Box::::default() as Box - } - #[cfg(feature = "cloud-aws")] + Backend::Noop(_) => Box::::default() as Box, Backend::S3(config) => { let mut s = S3Storage::from_input(config.clone())?; s.set_multi_part_size(backend_config.s3_multi_part_size); blob_store(s) } - #[cfg(feature = "cloud-gcp")] Backend::Gcs(config) => blob_store(GcsStorage::from_input(config.clone())?), - #[cfg(feature = "cloud-azure")] Backend::AzureBlobStorage(config) => blob_store(AzureStorage::from_input(config.clone())?), - Backend::CloudDynamic(dyn_backend) => match dyn_backend.provider_name.as_str() { - #[cfg(feature = "cloud-aws")] - "aws" | "s3" => blob_store(S3Storage::from_cloud_dynamic(dyn_backend)?), - #[cfg(feature = "cloud-gcp")] - "gcp" | "gcs" => blob_store(GcsStorage::from_cloud_dynamic(dyn_backend)?), - #[cfg(feature = "cloud-azure")] - "azure" | "azblob" => blob_store(AzureStorage::from_cloud_dynamic(dyn_backend)?), - _ => { - return Err(bad_backend(Backend::CloudDynamic(dyn_backend.clone()))); - } - }, + Backend::CloudDynamic(dyn_backend) => { + // CloudDynamic backend is no longer supported. + return Err(bad_backend(Backend::CloudDynamic(dyn_backend.clone()))); + } #[allow(unreachable_patterns)] _ => return Err(bad_backend(backend.clone())), }; @@ -207,7 +81,6 @@ fn create_backend_inner( Ok(storage) } -#[cfg(feature = "cloud-aws")] // Creates a S3 `StorageBackend` pub fn make_s3_backend(config: S3) -> StorageBackend { let mut backend = StorageBackend::default(); @@ -236,26 +109,18 @@ pub fn make_noop_backend() -> StorageBackend { backend } -#[cfg(feature = "cloud-gcp")] pub fn make_gcs_backend(config: Gcs) -> StorageBackend { let mut backend = StorageBackend::default(); backend.set_gcs(config); backend } -#[cfg(feature = "cloud-azure")] pub fn make_azblob_backend(config: AzureBlobStorage) -> StorageBackend { let mut backend = StorageBackend::default(); backend.set_azure_blob_storage(config); backend } -pub fn make_cloud_backend(config: CloudDynamic) -> StorageBackend { - let mut backend = StorageBackend::default(); - backend.set_cloud_dynamic(config); - backend -} - #[cfg(test)] mod tests { use tempfile::Builder; diff --git a/components/external_storage/src/grpc_client.rs b/components/external_storage/src/grpc_client.rs deleted file mode 100644 index e836d8fb58a..00000000000 --- a/components/external_storage/src/grpc_client.rs +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - io::{self, ErrorKind}, - sync::Arc, -}; - -use anyhow::Context; -use futures_io::AsyncRead; -use grpcio::{self}; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use tikv_util::time::Limiter; -use tokio::runtime::{Builder, Runtime}; - -use crate::{ - request::{ - anyhow_to_io_log_error, file_name_for_write, restore_sender, write_sender, DropPath, - }, - ExternalStorage, -}; - -struct ExternalStorageClient { - backend: Backend, - runtime: Arc, - rpc: proto::ExternalStorageClient, - name: &'static str, - url: url::Url, -} - -pub fn new_client( - backend: Backend, - name: &'static str, - url: url::Url, -) -> io::Result> { - let runtime = Builder::new() - .basic_scheduler() - .thread_name("external-storage-grpc-client") - .core_threads(1) - .enable_all() - .build()?; - Ok(Box::new(ExternalStorageClient { - backend, - runtime: Arc::new(runtime), - rpc: new_rpc_client()?, - name, - url, - })) -} - -fn new_rpc_client() -> io::Result { - let env = Arc::new(grpcio::EnvBuilder::new().build()); - let grpc_socket_path = "/tmp/grpc-external-storage.sock"; - let socket_addr = format!("unix:{}", grpc_socket_path); - let channel = grpcio::ChannelBuilder::new(env).connect(&socket_addr); - Ok(proto::ExternalStorageClient::new(channel)) -} - -impl ExternalStorage for ExternalStorageClient { - fn name(&self) -> &'static str { - self.name - } - - fn url(&self) -> io::Result { - Ok(self.url.clone()) - } - - fn write( - &self, - name: &str, - reader: Box, - content_length: u64, - ) -> io::Result<()> { - info!("external storage writing"); - (|| -> anyhow::Result<()> { - let file_path = file_name_for_write(&self.name, &name); - let req = write_sender( - &self.runtime, - self.backend.clone(), - file_path.clone(), - name, - reader, - content_length, - )?; - info!("grpc write request"); - self.rpc - .save(&req) - .map_err(rpc_error_to_io) - .context("rpc write")?; - info!("grpc write request finished"); - DropPath(file_path); - Ok(()) - })() - .context("external storage write") - .map_err(anyhow_to_io_log_error) - } - - fn read(&self, _name: &str) -> crate::ExternalData<'_> { - unimplemented!("use restore instead of read") - } - - fn restore( - &self, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - speed_limiter: &Limiter, - ) -> io::Result<()> { - info!("external storage restore"); - let req = restore_sender( - self.backend.clone(), - storage_name, - restore_name, - expected_length, - speed_limiter, - )?; - self.rpc.restore(&req).map_err(rpc_error_to_io).map(|_| ()) - } -} - -pub fn rpc_error_to_io(err: grpcio::Error) -> io::Error { - let msg = format!("{}", err); - match err { - grpcio::Error::RpcFailure(status) => match status.status { - grpcio::RpcStatusCode::NOT_FOUND => io::Error::new(ErrorKind::NotFound, msg), - grpcio::RpcStatusCode::INVALID_ARGUMENT => io::Error::new(ErrorKind::InvalidInput, msg), - grpcio::RpcStatusCode::UNAUTHENTICATED => { - io::Error::new(ErrorKind::PermissionDenied, msg) - } - _ => io::Error::new(ErrorKind::Other, msg), - }, - _ => io::Error::new(ErrorKind::Other, msg), - } -} diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index dd021f14bf8..082073abe4f 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -40,12 +40,8 @@ mod noop; pub use noop::NoopStorage; mod metrics; use metrics::EXT_STORAGE_CREATE_HISTOGRAM; -#[cfg(feature = "cloud-storage-dylib")] -pub mod dylib_client; -#[cfg(feature = "cloud-storage-grpc")] -pub mod grpc_client; -#[cfg(any(feature = "cloud-storage-dylib", feature = "cloud-storage-grpc"))] -pub mod request; +mod export; +pub use export::*; pub fn record_storage_create(start: Instant, storage: &dyn ExternalStorage) { EXT_STORAGE_CREATE_HISTOGRAM diff --git a/components/external_storage/src/request.rs b/components/external_storage/src/request.rs deleted file mode 100644 index 7f1a81d49b7..00000000000 --- a/components/external_storage/src/request.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. - -use std::io::{self, ErrorKind}; - -use anyhow::Context; -use futures::executor::block_on; -use futures_io::{AsyncRead, AsyncWrite}; -use kvproto::brpb as proto; -pub use kvproto::brpb::StorageBackend_oneof_backend as Backend; -use tikv_util::time::Limiter; -use tokio::runtime::Runtime; -use tokio_util::compat::Tokio02AsyncReadCompatExt; - -pub fn write_sender( - runtime: &Runtime, - backend: Backend, - file_path: std::path::PathBuf, - name: &str, - reader: Box, - content_length: u64, -) -> io::Result { - (|| -> anyhow::Result { - // TODO: the reader should write direct to the file_path - // currently it is copying into an intermediate buffer - // Writing to a file here uses up disk space - // But as a positive it gets the backup data out of the DB the fastest - // Currently this waits for the file to be completely written before sending to - // storage - runtime.enter(|| { - block_on(async { - let msg = |action: &str| format!("{} file {:?}", action, &file_path); - let f = tokio::fs::File::create(file_path.clone()) - .await - .context(msg("create"))?; - let mut writer: Box = Box::new(Box::pin(f.compat())); - futures_util::io::copy(reader, &mut writer) - .await - .context(msg("copy")) - }) - })?; - let mut req = proto::ExternalStorageWriteRequest::default(); - req.set_object_name(name.to_string()); - req.set_content_length(content_length); - let mut sb = proto::StorageBackend::default(); - sb.backend = Some(backend); - req.set_storage_backend(sb); - Ok(req) - })() - .context("write_sender") - .map_err(anyhow_to_io_log_error) -} - -pub fn restore_sender( - backend: Backend, - storage_name: &str, - restore_name: std::path::PathBuf, - expected_length: u64, - _speed_limiter: &Limiter, -) -> io::Result { - // TODO: send speed_limiter - let mut req = proto::ExternalStorageRestoreRequest::default(); - req.set_object_name(storage_name.to_string()); - let restore_str = restore_name.to_str().ok_or_else(|| { - io::Error::new( - ErrorKind::InvalidData, - format!("could not convert to str {:?}", &restore_name), - ) - })?; - req.set_restore_name(restore_str.to_string()); - req.set_content_length(expected_length); - let mut sb = proto::StorageBackend::default(); - sb.backend = Some(backend); - req.set_storage_backend(sb); - Ok(req) -} - -pub fn anyhow_to_io_log_error(err: anyhow::Error) -> io::Error { - let string = format!("{:#}", &err); - match err.downcast::() { - Ok(e) => { - // It will be difficult to propagate the context - // without changing the error type to anyhow or a custom TiKV error - error!("{}", string); - e - } - Err(_) => io::Error::new(ErrorKind::Other, string), - } -} - -pub fn file_name_for_write(storage_name: &str, object_name: &str) -> std::path::PathBuf { - let full_name = format!("{}-{}", storage_name, object_name); - std::env::temp_dir().join(full_name) -} - -pub struct DropPath(pub std::path::PathBuf); - -impl Drop for DropPath { - fn drop(&mut self) { - let _ = std::fs::remove_file(&self.0); - } -} diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index d281c0eca69..2bd7737ade4 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -24,10 +24,6 @@ test-engines-panic = [ "engine_test/test-engines-panic", ] -cloud-aws = ["raftstore/cloud-aws"] -cloud-gcp = ["raftstore/cloud-gcp"] -cloud-azure = ["raftstore/cloud-azure"] - [dependencies] batch-system = { workspace = true } bytes = "1.0" diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 27380a52882..1933bad6da9 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -23,10 +23,6 @@ test-engines-panic = [ "engine_test/test-engines-panic", ] -cloud-aws = ["sst_importer/cloud-aws"] -cloud-gcp = ["sst_importer/cloud-gcp"] -cloud-azure = ["sst_importer/cloud-azure"] - [dependencies] batch-system = { workspace = true } bitflags = "1.0.1" @@ -39,7 +35,7 @@ crc32fast = "1.2" crossbeam = "0.8" derivative = "2" encryption = { workspace = true } -engine_rocks = { workspace = true, optional = true } +engine_rocks = { workspace = true, optional = true } # Should be [dev-dependencies] but we need to control the features # https://github.com/rust-lang/cargo/issues/6915 diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index d292b44606e..f4f2504a8b3 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -5,12 +5,7 @@ edition = "2021" publish = false [features] -default = ["cloud-aws", "cloud-gcp", "cloud-azure", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] -cloud-storage-grpc = ["external_storage_export/cloud-storage-grpc"] -cloud-storage-dylib = ["external_storage_export/cloud-storage-dylib"] +default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] test-engines-rocksdb = [ "engine_test/test-engines-rocksdb", @@ -34,7 +29,7 @@ encryption = { workspace = true } engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } -external_storage_export = { workspace = true } +external_storage ={ workspace = true } file_system = { workspace = true } futures = { version = "0.3", features = ["thread-pool"] } futures-util = { version = "0.3", default-features = false, features = ["io"] } diff --git a/components/sst_importer/src/caching/storage_cache.rs b/components/sst_importer/src/caching/storage_cache.rs index 23732545b92..585772c2552 100644 --- a/components/sst_importer/src/caching/storage_cache.rs +++ b/components/sst_importer/src/caching/storage_cache.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use external_storage_export::ExternalStorage; +use external_storage::ExternalStorage; use kvproto::brpb::StorageBackend; use super::cache_map::{MakeCache, ShareOwned}; @@ -31,7 +31,7 @@ impl StoragePool { fn create(backend: &StorageBackend, size: usize) -> Result { let mut r = Vec::with_capacity(size); for _ in 0..size { - let s = external_storage_export::create_storage(backend, Default::default())?; + let s = external_storage::create_storage(backend, Default::default())?; r.push(Arc::from(s)); } Ok(Self(r.into_boxed_slice())) diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5530862e6a3..5cf9f1c6573 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -23,7 +23,7 @@ use engine_traits::{ IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, }; -use external_storage_export::{ +use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, }; use file_system::{get_io_rate_limiter, IoType, OpenOptions}; @@ -470,7 +470,7 @@ impl SstImporter { backend: &StorageBackend, support_kms: bool, speed_limiter: &Limiter, - restore_config: external_storage_export::RestoreConfig, + restore_config: external_storage::RestoreConfig, ) -> Result<()> { self._download_rt .block_on(self.async_download_file_from_external_storage( @@ -496,7 +496,7 @@ impl SstImporter { // TODO: pass a config to support hdfs let ext_storage = if cache_id.is_empty() { EXT_STORAGE_CACHE_COUNT.with_label_values(&["skip"]).inc(); - let s = external_storage_export::create_storage(backend, Default::default())?; + let s = external_storage::create_storage(backend, Default::default())?; Arc::from(s) } else { self.cached_storage.cached_or_create(cache_id, backend)? @@ -513,7 +513,7 @@ impl SstImporter { support_kms: bool, speed_limiter: &Limiter, cache_key: &str, - restore_config: external_storage_export::RestoreConfig, + restore_config: external_storage::RestoreConfig, ) -> Result<()> { let start_read = Instant::now(); if let Some(p) = dst_file.parent() { @@ -659,7 +659,7 @@ impl SstImporter { async fn exec_download( &self, meta: &KvMeta, - ext_storage: Arc, + ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { let start = Instant::now(); @@ -684,7 +684,7 @@ impl SstImporter { Some((meta.get_range_offset(), range_length)) } }; - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { range, compression_type: Some(meta.get_compression_type()), expected_sha256, @@ -715,7 +715,7 @@ impl SstImporter { pub async fn do_read_kv_file( &self, meta: &KvMeta, - ext_storage: Arc, + ext_storage: Arc, speed_limiter: &Limiter, ) -> Result { let start = Instant::now(); @@ -764,18 +764,16 @@ impl SstImporter { &self, ext_storage: Arc, support_kms: bool, - ) -> Arc { + ) -> Arc { // kv-files needn't are decrypted with KMS when download currently because these // files are not encrypted when log-backup. It is different from // sst-files because sst-files is encrypted when saved with rocksdb env // with KMS. to do: support KMS when log-backup and restore point. match (support_kms, self.key_manager.clone()) { - (true, Some(key_manager)) => { - Arc::new(external_storage_export::EncryptedExternalStorage { - key_manager, - storage: ext_storage, - }) - } + (true, Some(key_manager)) => Arc::new(external_storage::EncryptedExternalStorage { + key_manager, + storage: ext_storage, + }), _ => ext_storage, } } @@ -784,7 +782,7 @@ impl SstImporter { &self, file_length: u64, file_name: &str, - ext_storage: Arc, + ext_storage: Arc, speed_limiter: &Limiter, restore_config: RestoreConfig, ) -> Result> { @@ -806,12 +804,12 @@ impl SstImporter { encrypt_wrap_reader(file_crypter, inner)? }; - let r = external_storage_export::read_external_storage_info_buff( + let r = external_storage::read_external_storage_info_buff( &mut reader, speed_limiter, file_length, expected_sha256, - external_storage_export::MIN_READ_SPEED, + external_storage::MIN_READ_SPEED, ) .await; let url = ext_storage.url()?.to_string(); @@ -828,7 +826,7 @@ impl SstImporter { pub async fn read_from_kv_file( &self, meta: &KvMeta, - ext_storage: Arc, + ext_storage: Arc, backend: &StorageBackend, speed_limiter: &Limiter, ) -> Result> { @@ -893,7 +891,7 @@ impl SstImporter { } else { Some((offset, range_length)) }; - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { range, compression_type: Some(meta.compression_type), expected_sha256, @@ -1124,7 +1122,7 @@ impl SstImporter { iv: meta.cipher_iv.to_owned(), }); - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { file_crypter, ..Default::default() }; @@ -1500,7 +1498,7 @@ mod tests { collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; - use external_storage_export::read_external_storage_info_buff; + use external_storage::read_external_storage_info_buff; use file_system::File; use online_config::{ConfigManager, OnlineConfig}; use openssl::hash::{Hasher, MessageDigest}; @@ -1707,7 +1705,7 @@ mod tests { meta.mut_region_epoch().set_conf_ver(5); meta.mut_region_epoch().set_version(6); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); Ok((ext_sst_dir, backend, meta)) } @@ -1755,7 +1753,7 @@ mod tests { kv_meta.set_length(len as _); kv_meta.set_sha256(sha256.finish().unwrap().to_vec()); - let backend = external_storage_export::make_local_backend(ext_dir.path()); + let backend = external_storage::make_local_backend(ext_dir.path()); Ok((ext_dir, backend, kv_meta, buff.buffer().to_vec())) } @@ -1824,7 +1822,7 @@ mod tests { meta.mut_region_epoch().set_conf_ver(5); meta.mut_region_epoch().set_version(6); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); Ok((ext_sst_dir, backend, meta)) } @@ -1870,7 +1868,7 @@ mod tests { meta.mut_region_epoch().set_conf_ver(5); meta.mut_region_epoch().set_version(6); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); Ok((ext_sst_dir, backend, meta)) } @@ -1904,7 +1902,7 @@ mod tests { hasher.update(data).unwrap(); let hash256 = hasher.finish().unwrap().to_vec(); - block_on_external_io(external_storage_export::read_external_storage_into_file( + block_on_external_io(external_storage::read_external_storage_into_file( &mut input, &mut output, &Limiter::new(f64::INFINITY), @@ -1922,7 +1920,7 @@ mod tests { let mut input = pending::>().into_async_read(); let mut output = Vec::new(); - let err = block_on_external_io(external_storage_export::read_external_storage_into_file( + let err = block_on_external_io(external_storage::read_external_storage_into_file( &mut input, &mut output, &Limiter::new(f64::INFINITY), @@ -2139,7 +2137,7 @@ mod tests { }; // test read all of the file. - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { expected_sha256: Some(kv_meta.get_sha256().to_vec()), ..Default::default() }; @@ -2162,7 +2160,7 @@ mod tests { // test read range of the file. let (offset, len) = (5, 16); - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { range: Some((offset, len)), ..Default::default() }; @@ -2250,7 +2248,7 @@ mod tests { // perform download file into .temp dir. let file_name = "sample.sst"; let path = importer.dir.get_import_path(file_name).unwrap(); - let restore_config = external_storage_export::RestoreConfig::default(); + let restore_config = external_storage::RestoreConfig::default(); importer .download_file_from_external_storage( meta.get_length(), @@ -2285,7 +2283,7 @@ mod tests { .unwrap(); let path = importer.dir.get_import_path(kv_meta.get_name()).unwrap(); - let restore_config = external_storage_export::RestoreConfig { + let restore_config = external_storage::RestoreConfig { expected_sha256: Some(kv_meta.get_sha256().to_vec()), ..Default::default() }; @@ -2747,7 +2745,7 @@ mod tests { let cfg = Config::default(); let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); let db = create_sst_test_engine().unwrap(); - let backend = external_storage_export::make_local_backend(ext_sst_dir.path()); + let backend = external_storage::make_local_backend(ext_sst_dir.path()); let result = importer.download::( &meta, diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index ff7526172d5..4adfe3db51e 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -4,7 +4,7 @@ use std::path::Path; use encryption::DataKeyManager; use engine_traits::EncryptionKeyManager; -use external_storage_export::ExternalStorage; +use external_storage::ExternalStorage; use file_system::File; use super::Result; diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index 59300f993e3..c13b3008df9 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -4,12 +4,6 @@ version = "0.0.1" edition = "2021" publish = false -[features] -default = ["cloud-aws", "cloud-gcp", "cloud-azure"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] - [dependencies] api_version = { workspace = true } backup = { workspace = true } @@ -17,7 +11,7 @@ collections = { workspace = true } concurrency_manager = { workspace = true } crc64fast = "0.1" engine_traits = { workspace = true } -external_storage_export = { workspace = true } +external_storage ={ workspace = true } file_system = { workspace = true } futures = "0.3" futures-executor = "0.3" diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 3a5800e989b..6c6eae961d7 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -12,7 +12,7 @@ use api_version::{dispatch_api_version, keyspace::KvPair, ApiV1, KvFormat, RawVa use backup::Task; use collections::HashMap; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; -use external_storage_export::make_local_backend; +use external_storage::make_local_backend; use futures::{channel::mpsc as future_mpsc, executor::block_on}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{brpb::*, kvrpcpb::*, tikvpb::TikvClient}; diff --git a/tests/Cargo.toml b/tests/Cargo.toml index aa0c2c29dec..79f5439736d 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -40,11 +40,8 @@ harness = false path = "benches/deadlock_detector/mod.rs" [features] -default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] +default = ["failpoints", "testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] failpoints = ["fail/failpoints", "tikv/failpoints", "pd_client/failpoints"] -cloud-aws = ["external_storage_export/cloud-aws"] -cloud-gcp = ["external_storage_export/cloud-gcp"] -cloud-azure = ["external_storage_export/cloud-azure"] testexport = ["raftstore/testexport", "tikv/testexport", "pd_client/testexport"] profiling = ["profiler/profiling"] @@ -130,7 +127,7 @@ criterion-cpu-time = "0.1" engine_rocks = { workspace = true } engine_test = { workspace = true } engine_traits = { workspace = true } -external_storage_export = { workspace = true } +external_storage ={ workspace = true } file_system = { workspace = true } hyper = { version = "0.14", default-features = false, features = ["runtime"] } keys = { workspace = true } diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index e51c9862e47..010d12177b6 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -46,7 +46,7 @@ fn test_download_sst_blocking_sst_writer() { // Now perform a proper download. let mut download = DownloadRequest::default(); download.set_sst(meta.clone()); - download.set_storage_backend(external_storage_export::make_local_backend(temp_dir.path())); + download.set_storage_backend(external_storage::make_local_backend(temp_dir.path())); download.set_name("test.sst".to_owned()); download.mut_sst().mut_range().set_start(vec![sst_range.1]); download diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 4cfd4be07be..f89ef0c6faa 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -3,7 +3,7 @@ use std::{fs::File, time::Duration}; use engine_traits::{CF_DEFAULT, CF_WRITE}; -use external_storage_export::{create_storage, make_local_backend}; +use external_storage::{create_storage, make_local_backend}; use file_system::calc_crc32_bytes; use futures::{executor::block_on, AsyncReadExt, StreamExt}; use kvproto::{ diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index 3d8cf85b02c..0b11a12002e 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -1,5 +1,5 @@ use engine_traits::CF_DEFAULT; -use external_storage_export::LocalStorage; +use external_storage::LocalStorage; use kvproto::import_sstpb::ApplyRequest; use tempfile::TempDir; diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 6c56ab0018b..2eb1c10c72d 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -298,7 +298,7 @@ fn test_download_sst() { // Checks that downloading a non-existing storage returns error. let mut download = DownloadRequest::default(); download.set_sst(meta.clone()); - download.set_storage_backend(external_storage_export::make_local_backend(temp_dir.path())); + download.set_storage_backend(external_storage::make_local_backend(temp_dir.path())); download.set_name("missing.sst".to_owned()); let result = import.download(&download).unwrap(); diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index d8a11d50746..96ebc071bbc 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -10,7 +10,7 @@ use std::{ use collections::HashMap; use engine_rocks::RocksEngine; use engine_traits::CF_DEFAULT; -use external_storage_export::{ExternalStorage, UnpinReader}; +use external_storage::{ExternalStorage, UnpinReader}; use futures::{executor::block_on, io::Cursor as AsyncCursor, stream, SinkExt}; use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; use kvproto::{ From 6b0a515edcd6bffe362c05f82babe26fae51e99d Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 20 Oct 2023 12:54:29 -0500 Subject: [PATCH 0970/1149] server: make cpu and heap profiling can be running concurrently (#15761) close tikv/tikv#15760 Make cpu and heap profiling can be running concurrently. Beside, change the behavior of - "debug/pprof/heap_activate": do not dump heap profile periodically by default - "debug/pprof/heap": dump heap profile without any delay and use embedded jeprof script Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/status_server/jeprof.in | 5727 +++++++++++++++++++++++++++ src/server/status_server/mod.rs | 45 +- src/server/status_server/profile.rs | 185 +- 3 files changed, 5855 insertions(+), 102 deletions(-) create mode 100644 src/server/status_server/jeprof.in diff --git a/src/server/status_server/jeprof.in b/src/server/status_server/jeprof.in new file mode 100644 index 00000000000..cadf15d7d8e --- /dev/null +++ b/src/server/status_server/jeprof.in @@ -0,0 +1,5727 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/jeprof "program" "profile" +# Enters "interactive" mode +# +# % tools/jeprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/jeprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/jeprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list= pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm= pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; +use Cwd; + +my $JEPROF_VERSION = "unknown"; +my $PPROF_VERSION = "2.0"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the JEPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); +# These are used for dynamic profiles +my @URL_FETCHER = ("curl", "-s", "--fail"); + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +##### Argument parsing ##### + +sub usage_string { + return < + is a space separated list of profile names. +jeprof [options] + is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +jeprof [options] + is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/] - a location of a service to get profile from + + The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. + For instance: + jeprof http://myserver.com:80$HEAP_PAGE + If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +jeprof --symbols + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base= Subtract from before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds= Length of time for dynamic profiles [default=30 secs] + --add_lib= Read additional symbols and line info from the given library + --lib_prefix=

Comma separated list of library path prefixes + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --evince Generate PDF and display + --web Generate SVG and display + --list= Generate source listing of matching routines + --disasm= Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postcript to stdout + --pdf Generate PDF to stdout + --svg Generate SVG to stdout + --gif Generate GIF to stdout + --raw Generate symbolized jeprof data (useful with remote fetch) + --collapsed Generate collapsed stacks for building flame graphs + (see http://www.brendangregg.com/flamegraphs.html) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount= Show at most so many nodes [default=80] + --nodefraction= Hide nodes below *total [default=.005] + --edgefraction= Hide edges below *total [default=.001] + --maxdegree= Max incoming/outgoing edges per node [default=8] + --focus= Focus on backtraces with nodes matching + --thread= Show profile for thread + --ignore= Ignore backtraces with nodes matching + --scale= Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match + +Miscellaneous: + --tools=[,...] \$PATH for object tool pathnames + --test Run unit tests + --help This message + --version Version information + --debug-syms-by-id (Linux only) Find debug symbol files by build ID as well as by name + +Environment Variables: + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames + +Examples: + +jeprof /bin/ls ls.prof + Enters "interactive" mode +jeprof --text /bin/ls ls.prof + Outputs one line per procedure +jeprof --web /bin/ls ls.prof + Displays annotated call-graph in web browser +jeprof --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +jeprof --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +jeprof --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +jeprof --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() + +jeprof http://localhost:1234/ + Enters "interactive" mode +jeprof --text localhost:1234 + Outputs one line per procedure for localhost:1234 +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return < \$main::opt_help, + "version!" => \$main::opt_version, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, + "web!" => \$main::opt_web, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "collapsed!" => \$main::opt_collapsed, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, + "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + "debug-syms-by-id!" => \$main::opt_debug_syms_by_id, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_evince + + $main::opt_web + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_svg + + $main::opt_gif + + $main::opt_raw + + $main::opt_collapsed + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (@ARGV > 0) { + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Break the opt_lib_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } + + # Flag to prevent us from trying over and over to use + # elfutils if it's not installed (used only with + # --debug-syms-by-id option). + $main::gave_up_on_elfutils = 0; +} + +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; + + # Get total data in profile + my $total = TotalProfile($profile); + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_collapsed) { + PrintCollapsedStacks($symbols, $profile); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); + } + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); + } + } + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); + } +} + +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system(ShellEscape(@EVINCE, $fname) . $bg); +} + +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print STDERR "(jeprof) "; + $_ = ; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print STDERR "\n"; + return 0; + } + if (m/^\s*quit/) { + return 0; + } + if (m/^\s*help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_cum = 0; + + if (m/^\s*(text|top)(\d*)\s*(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $line_limit); + return 1; + } + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($2); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); + return 1; + } + if (m/^\s*disasm\s*(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^\s*(gv|web|evince)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($2); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } + $main::next_tmpfile++; + } + return 1; + } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; + return 1; +} + + +sub ProcessProfile { + my $total_count = shift; + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print STDERR <{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + my $profile_marker; + if ($main::profile_type eq 'heap') { + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'growth') { + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'contention') { + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } else { # elsif ($main::profile_type eq 'cpu') + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } + + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while () { + print $_; + } + close(SRC); + } else { + # --raw/http: For everything to work correctly for non-remote profiles, we + # would need to extend PrintProfileData() to handle all possible profile + # types, re-enable the code that is currently disabled in ReadCPUProfile() + # and FixCallerAddresses(), and remove the remote profile dumping code in + # the block above. + die "--raw/http: jeprof can only dump remote profiles for --raw\n"; + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $line_limit = shift; + + my $total = TotalProfile($flat); + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">$filename"); + printf CG ("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); + if (defined $6) { + printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); + printf CG ("calls=$count $callee_line\n"); + } + printf CG ("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + + my $total = TotalProfile($flat); + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # : + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $list_opts +sub PrintListing { + my $total = shift; + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + my $html = shift; + + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("
%s
Total: %s %s
\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); + last; + } + $addr = AddressInc($addr); + } + } + } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; + + + +Pprof listing + + + + +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; + + +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s//>/g; + return $text; +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $html = shift; + my $output = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return 0; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + my $first_indentation = -1; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("%s" . + ":%d", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } + + if ($html) { + printf $output ( + "

%s

%s\n
\n" .
+      "Total:%6s %6s (flat / cumulative %s)\n",
+      HtmlEscape(ShortFunctionName($routine)),
+      HtmlEscape(CleanFileName($filename)),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  } else {
+    printf $output (
+      "ROUTINE ====================== %s in %s\n" .
+      "%6s %6s Total %s (flat / cumulative)\n",
+      ShortFunctionName($routine),
+      CleanFileName($filename),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  }
+  if (!open(FILE, "<$filename")) {
+    print STDERR "$filename: $!\n";
+    return 0;
+  }
+  my $l = 0;
+  while () {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    $l++;
+    if ($l >= $firstline - 5 &&
+        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
+      chop;
+      my $text = $_;
+      if ($l == $firstline) { print $output $skip_marker; }
+      my $n1 = GetEntry($samples1, $l);
+      my $n2 = GetEntry($samples2, $l);
+      if ($html) {
+        # Emit a span that has one of the following classes:
+        #    livesrc -- has samples
+        #    deadsrc -- has disassembly, but with no samples
+        #    nop     -- has no matching disasembly
+        # Also emit an optional span containing disassembly.
+        my $dis = $disasm{$l};
+        my $asm = "";
+        if (defined($dis) && $dis ne '') {
+          $asm = "" . $dis . "";
+        }
+        my $source_class = (($n1 + $n2 > 0)
+                            ? "livesrc"
+                            : (($asm ne "") ? "deadsrc" : "nop"));
+        printf $output (
+          "%5d " .
+          "%6s %6s %s%s\n",
+          $l, $source_class,
+          HtmlPrintNumber($n1),
+          HtmlPrintNumber($n2),
+          HtmlEscape($text),
+          $asm);
+      } else {
+        printf $output(
+          "%6s %6s %4d: %s\n",
+          UnparseAlt($n1),
+          UnparseAlt($n2),
+          $l,
+          $text);
+      }
+      if ($l == $lastline)  { print $output $skip_marker; }
+    };
+  }
+  close(FILE);
+  if ($html) {
+    print $output "
\n"; + } + return 1; +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while () { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); + if ($main::opt_gv) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; + } elsif ($main::opt_evince) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; + } elsif ($main::opt_ps) { + $output = "| $escaped_dot -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; + } elsif ($main::opt_gif) { + $output = "| $escaped_dot -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $local_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $local_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + + return 1; +} + +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = ; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # + # + # ... + # + # + # + # Change it to + # + # + # $svg_javascript + # + # + # ... + # + # + # + + # Fix width, height; drop viewBox. + $svg =~ s/(?s) above first + my $svg_javascript = SvgJavascript(); + my $viewport = "\n"; + $svg =~ s/ above . + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; + +EOF +} + +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } + if ($j > 2) { + $func = "$func (inline)"; + } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolNameInlineStack { + my $symbols = shift; + my $address = shift; + + my @stack = (); + + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-0]; + + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + if ($fn eq '??') { + # If we can't get the symbol name, at least use the file information. + $fn = $file; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@stack, $fn.$suffix); + } + } + else { + # If we can't get a symbol name, at least fill in the address. + push (@stack, $address); + } + + return @stack; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my $destination = ExtractSymbolLocation($symbols, $address[0]); + AddEntry($calls, $destination, $count); + for (my $i = 1; $i <= $#address; $i++) { + my $source = ExtractSymbolLocation($symbols, $address[$i]); + my $call = "$source -> $destination"; + AddEntry($calls, $call, $count); + $destination = $source; + } + } + + return $calls; +} + +sub FilterFrames { + my $symbols = shift; + my $profile = shift; + + if ($main::opt_retain eq '' && $main::opt_exclude eq '') { + return $profile; + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + my $sym; + if (exists($symbols->{$a})) { + $sym = $symbols->{$a}->[0]; + } else { + $sym = $a; + } + if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { + next; + } + if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { + next; + } + push(@path, $a); + } + if (scalar(@path) > 0) { + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + } + + return $result; +} + +sub PrintCollapsedStacks { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address ); + printf("%s %d\n", join(";", @names), $count); + } +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('@JEMALLOC_PREFIX@calloc', + 'cfree', + '@JEMALLOC_PREFIX@malloc', + 'je_malloc_default', + 'newImpl', + 'void* newImpl', + 'fallbackNewImpl', + 'void* fallbackNewImpl', + '@JEMALLOC_PREFIX@free', + '@JEMALLOC_PREFIX@memalign', + '@JEMALLOC_PREFIX@posix_memalign', + '@JEMALLOC_PREFIX@aligned_alloc', + 'pvalloc', + '@JEMALLOC_PREFIX@valloc', + '@JEMALLOC_PREFIX@realloc', + '@JEMALLOC_PREFIX@mallocx', + '@JEMALLOC_PREFIX@rallocx', + 'do_rallocx', + '@JEMALLOC_PREFIX@xallocx', + '@JEMALLOC_PREFIX@dallocx', + '@JEMALLOC_PREFIX@sdallocx', + '@JEMALLOC_PREFIX@sdallocx_noflags', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu') { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + splice @addrs, 1, 1; + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + # Throw away the portion of the backtrace seen so far, under the + # assumption that previous frames were for functions internal to the + # allocator. + @path = (); + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + + $result = FilterFrames($symbols, $result); + + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); + my $line = ; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; +} + +sub ParseProfileURL { + my $profile_name = shift; + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = ShellEscape(@URL_FETCHER, $url); + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = ; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); + open(CMDLINE, "$command_line |") or error($command_line); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Add a timeout flat to URL_FETCHER. Returns a new list. +sub AddFetchTimeout { + my $timeout = shift; + my @fetcher = @_; + if (defined($timeout)) { + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); + } + } + return @fetcher; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +sub URLEncode { + my $str = shift; + $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; + return $str; +} + +sub AppendSymbolFilterParams { + my $url = shift; + my @params = (); + if ($main::opt_retain ne '') { + push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); + } + if ($main::opt_exclude ne '') { + push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); + } + if (scalar @params > 0) { + $url = sprintf("%s?%s", $url, join("&", @params)); + } + return $url; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + + my $command_line; + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $url = AppendSymbolFilterParams($url); + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); + } else { + $url = AppendSymbolFilterParams($url); + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); + } + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; + } else { + $url .= "?"; + } + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; + # Set $profile_type for consumption by PrintSymbolizedProfile. + $main::profile_type = 'cpu'; + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= $suffix; + # Set $profile_type for consumption by PrintSymbolizedProfile. + if ($path =~ m/$HEAP_PAGE/) { + $main::profile_type = 'heap'; + } elsif ($path =~ m/$GROWTH_PAGE/) { + $main::profile_type = 'growth'; + } elsif ($path =~ m/$CONTENTION_PAGE/) { + $main::profile_type = 'contention'; + } + } + + my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); + if (! -d $profile_dir) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + cleanup(); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, check if we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + $self->{perl_is_64bit} = 0; + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); + } + @$slots = @b64_values; + } + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to jeprof, which jeprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character + return ""; + } + while (defined($line = )) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } + } + return undef; # got to EOF without seeing a header line +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileHeader(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{threads} Map of thread IDs to profile objects +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + my $result; # return value + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $heap_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $header = ReadProfileHeader(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } + # Read the symbol section of the symbolized profile file. + $symbols = ReadSymbols(*PROFILE{IO}); + # Read the next line to get the header for the remaining profile. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { + # Skip "--- ..." line for profile types that have their own headers. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + $main::profile_type = ''; + + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap/) { + $main::profile_type = 'heap'; + $result = ReadThreadedHeapProfile($prog, $fname, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } + + close(PROFILE); + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() + # dumps unadjusted profiles. + { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + my $slots = CpuProfileStream->new(*PROFILE, $fname); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + $pc--; + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * 4, 0); + read(PROFILE, $map, (stat PROFILE)[7]); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub HeapProfileIndex { + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + return $index; +} + +sub ReadMappedLibraries { + my $fh = shift; + my $map = ""; + # Read the /proc/self/maps data + while (<$fh>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + return $map; +} + +sub ReadMemoryMap { + my $fh = shift; + my $map = ""; + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + return $map; +} + +sub AdjustSamples { + my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_; + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + return ($n1, $s1, $n2, $s2); +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = HeapProfileIndex(); + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 + # There are two pairs , the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ _v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # : [: ] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadThreadedHeapProfile { + my ($prog, $fname, $header) = @_; + + my $index = HeapProfileIndex(); + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + # Assuming a very specific type of header for now. + if ($header =~ m"^heap_v2/(\d+)") { + $type = "_v2"; + $sampling_algorithm = 2; + $sample_adjustment = int($1); + } + if ($type ne "_v2" || !defined($sample_adjustment)) { + die "Threaded heap profiles require v2 sampling with a sample rate\n"; + } + + my $profile = {}; + my $thread_profiles = {}; + my $pcs = {}; + my $map = ""; + my $stack = ""; + + while () { + s/\r//g; + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # @ a1 a2 ... an + # t*: : [: ] + # t1: : [: ] + # ... + # tn: : [: ] + s/^\s*//; + s/\s*$//; + if (m/^@\s+(.*)$/) { + $stack = $1; + } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) { + if ($stack eq "") { + # Still in the header, so this is just a per-thread summary. + next; + } + my $thread = $2; + my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); + if ($thread eq "*") { + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } else { + if (!exists($thread_profiles->{$thread})) { + $thread_profiles->{$thread} = {}; + } + AddEntries($thread_profiles->{$thread}, $pcs, + FixCallerAddresses($stack), $counts[$index]); + } + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{threads} = $thread_profiles; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in jeprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in jeprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. +sub HexExtend { + my $addr = shift; + + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; + } + return ("0" x $zeros_needed) . $addr; +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + + if ($file !~ m|^/|) { + return undef; + } + + # Find debug symbol file if it's named after the library's name. + + if (-f "/usr/lib/debug$file") { + if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; } + return "/usr/lib/debug$file"; + } elsif (-f "/usr/lib/debug$file.debug") { + if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; } + return "/usr/lib/debug$file.debug"; + } + + if(!$main::opt_debug_syms_by_id) { + if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" }; + return undef; + } + + # Find debug file if it's named after the library's build ID. + + my $readelf = ''; + if (!$main::gave_up_on_elfutils) { + $readelf = qx/eu-readelf -n ${file}/; + if ($?) { + print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n"; + $main::gave_up_on_elfutils = 1; + return undef; + } + my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s; + if (defined $buildID && length $buildID > 0) { + my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug'; + if (-e $symbolFile) { + if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" }; + return $symbolFile; + } else { + if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" }; + return undef; + } + } + } + + if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" }; + return undef; +} + + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line () { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = Cwd::abs_path(shift); + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) { + # PIEs and address space randomization do not play well with our + # default assumption that main executable is at lowest + # addresses. So we're detecting main executable in + # /proc/self/maps as well. + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } + # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in + # function procfs_doprocmap (sys/fs/procfs/procfs_map.c) + # + # Example: + # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s + # o.1 NCH -1 + elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) { + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = FindLibrary($5); + + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; } + push(@{$result}, [$lib, $start, $finish, $offset]); + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Use debug library if it exists + my $debug_libname = DebuggingLibrary($libname); + if ($debug_libname) { + $libname = $debug_libname; + } + + # Get list of pcs that belong in this library. + my $contained = []; + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat", $main::tmpfile_sym); + print("----\n"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); + print("----\n"); + } + + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while () { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = ; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template parameters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, so this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) JEPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as jeprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# JEPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + # --tools (or $JEPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # :. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"JEPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } + } + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$JEPROF_TOOLS) '$tools'\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # jeprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + +sub cleanup { + unlink($main::tmpfile_sym); + unlink(keys %main::tempnames); + + # We leave any collected profiles in $HOME/jeprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " jeprof \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $escaped_nm_command = shift; # shell-escaped + my $regexp = shift; + + my $symbol_table = {}; + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^\s*([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template parameters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurrence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) { + # In this mode, we do "nm --demangle " + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { + # In this mode, we do "nm | c++filt" + $cppfilt_flag = " | " . ShellEscape($cppfilt); + }; + my $flatten_flag = ""; + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + # 6nm is for Go binaries + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", + ); + + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by jeprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} \ No newline at end of file diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 3e68b0b6310..b76454ffab8 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -40,11 +40,7 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; -pub use profile::{ - activate_heap_profile, deactivate_heap_profile, heap_profiles_dir, jeprof_heap_profile, - list_heap_profiles, read_file, start_one_cpu_profile, start_one_heap_profile, - HEAP_PROFILE_REGEX, -}; +use profile::*; use prometheus::TEXT_FORMAT; use regex::Regex; use resource_control::ResourceGroupManager; @@ -170,16 +166,22 @@ where Ok(val) => val, Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), }, - None => 60, + None => 0, }; - let interval = Duration::from_secs(interval); - let period = GLOBAL_TIMER_HANDLE - .interval(Instant::now() + interval, interval) - .compat() - .map_ok(|_| ()) - .map_err(|_| TIMER_CANCELED.to_owned()) - .into_stream(); + let period = if interval == 0 { + None + } else { + let interval = Duration::from_secs(interval); + Some( + GLOBAL_TIMER_HANDLE + .interval(Instant::now() + interval, interval) + .compat() + .map_ok(|_| ()) + .map_err(|_| TIMER_CANCELED.to_owned()) + .into_stream(), + ) + }; let (tx, rx) = oneshot::channel(); let callback = move || tx.send(()).unwrap_or_default(); let res = Handle::current().spawn(activate_heap_profile(period, store_path, callback)); @@ -201,7 +203,6 @@ where Ok(make_response(StatusCode::OK, body)) } - #[allow(dead_code)] async fn dump_heap_prof_to_resp(req: Request) -> hyper::Result> { let query = req.uri().query().unwrap_or(""); let query_pairs: HashMap<_, _> = url::form_urlencoded::parse(query.as_bytes()).collect(); @@ -239,21 +240,7 @@ where return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); } } else { - let mut seconds = 10; - if let Some(s) = query_pairs.get("seconds") { - match s.parse() { - Ok(val) => seconds = val, - Err(_) => { - let errmsg = "request should have seconds argument".to_owned(); - return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); - } - } - } - let timer = GLOBAL_TIMER_HANDLE.delay(Instant::now() + Duration::from_secs(seconds)); - let end = Compat01As03::new(timer) - .map_err(|_| TIMER_CANCELED.to_owned()) - .into_future(); - start_one_heap_profile(end, use_jeprof).await + dump_one_heap_profile() }; match result { diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index dd49c394046..3941c6c12b6 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -1,11 +1,11 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ fs::{File, Metadata}, - io::Read, + io::{Read, Write}, path::PathBuf, pin::Pin, - process::Command, - sync::Mutex as StdMutex, + process::{Command, Stdio}, + sync::Mutex, time::{Duration, UNIX_EPOCH}, }; @@ -23,7 +23,6 @@ use regex::Regex; use tempfile::{NamedTempFile, TempDir}; #[cfg(not(test))] use tikv_alloc::{activate_prof, deactivate_prof, dump_prof}; -use tokio::sync::{Mutex, MutexGuard}; #[cfg(test)] pub use self::test_utils::TEST_PROFILE_MUTEX; @@ -35,10 +34,10 @@ pub const HEAP_PROFILE_SUFFIX: &str = ".heap"; pub const HEAP_PROFILE_REGEX: &str = r"^[0-9]{6,6}\.heap$"; lazy_static! { - // If it's locked it means there are already a heap or CPU profiling. - static ref PROFILE_MUTEX: Mutex<()> = Mutex::new(()); - // The channel is used to deactivate a profiling. - static ref PROFILE_ACTIVE: StdMutex, TempDir)>> = StdMutex::new(None); + // If it's some it means there are already a CPU profiling. + static ref CPU_PROFILE_ACTIVE: Mutex> = Mutex::new(None); + // If it's some it means there are already a heap profiling. The channel is used to deactivate a profiling. + static ref HEAP_PROFILE_ACTIVE: Mutex>, TempDir)>> = Mutex::new(None); // To normalize thread names. static ref THREAD_NAME_RE: Regex = @@ -48,32 +47,26 @@ lazy_static! { type OnEndFn = Box Result + Send + 'static>; -struct ProfileGuard<'a, I, T> { - _guard: MutexGuard<'a, ()>, +struct ProfileRunner { item: Option, on_end: Option>, end: BoxFuture<'static, Result<(), String>>, } -impl<'a, I, T> Unpin for ProfileGuard<'a, I, T> {} +impl Unpin for ProfileRunner {} -impl<'a, I, T> ProfileGuard<'a, I, T> { +impl ProfileRunner { fn new( on_start: F1, on_end: F2, end: BoxFuture<'static, Result<(), String>>, - ) -> Result, String> + ) -> Result where F1: FnOnce() -> Result, F2: FnOnce(I) -> Result + Send + 'static, { - let _guard = match PROFILE_MUTEX.try_lock() { - Ok(guard) => guard, - _ => return Err("Already in Profiling".to_owned()), - }; let item = on_start()?; - Ok(ProfileGuard { - _guard, + Ok(ProfileRunner { item: Some(item), on_end: Some(Box::new(on_end) as OnEndFn), end, @@ -81,7 +74,7 @@ impl<'a, I, T> ProfileGuard<'a, I, T> { } } -impl<'a, I, T> Future for ProfileGuard<'a, I, T> { +impl Future for ProfileRunner { type Output = Result; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { match self.end.as_mut().poll(cx) { @@ -99,34 +92,22 @@ impl<'a, I, T> Future for ProfileGuard<'a, I, T> { } } -/// Trigger a heap profie and return the content. -#[allow(dead_code)] -pub async fn start_one_heap_profile(end: F, use_jeprof: bool) -> Result, String> -where - F: Future> + Send + 'static, -{ - let on_start = || activate_prof().map_err(|e| format!("activate_prof: {}", e)); - - let on_end = move |_| { - deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e))?; - let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; - let path = f.path().to_str().unwrap(); - dump_prof(path).map_err(|e| format!("dump_prof: {}", e))?; - if use_jeprof { - jeprof_heap_profile(path) - } else { - read_file(path) - } - }; - - ProfileGuard::new(on_start, on_end, end.boxed())?.await +/// Trigger a heap profile and return the content. +pub fn dump_one_heap_profile() -> Result, String> { + if HEAP_PROFILE_ACTIVE.lock().unwrap().is_none() { + return Err("heap profiling is not activated".to_owned()); + } + let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; + let path = f.path().to_str().unwrap(); + dump_prof(path).map_err(|e| format!("dump_prof: {}", e))?; + read_file(path) } /// Activate heap profile and call `callback` if successfully. /// `deactivate_heap_profile` can only be called after it's notified from /// `callback`. pub async fn activate_heap_profile( - dump_period: S, + dump_period: Option, store_path: PathBuf, callback: F, ) -> Result<(), String> @@ -134,6 +115,10 @@ where S: Stream> + Send + Unpin + 'static, F: FnOnce() + Send + 'static, { + if HEAP_PROFILE_ACTIVE.lock().unwrap().is_some() { + return Err("Already in Heap Profiling".to_owned()); + } + let (tx, rx) = oneshot::channel(); let dir = tempfile::Builder::new() .prefix("heap-") @@ -142,40 +127,55 @@ where let dir_path = dir.path().to_str().unwrap().to_owned(); let on_start = move || { - let mut activate = PROFILE_ACTIVE.lock().unwrap(); + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); assert!(activate.is_none()); + *activate = Some((Some(tx), dir)); activate_prof().map_err(|e| format!("activate_prof: {}", e))?; - *activate = Some((tx, dir)); callback(); info!("periodical heap profiling is started"); Ok(()) }; let on_end = |_| { - deactivate_heap_profile(); - deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e)) + let res = deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e)); + *HEAP_PROFILE_ACTIVE.lock().unwrap() = None; + res }; let end = async move { - select! { - _ = rx.fuse() => { - info!("periodical heap profiling is canceled"); - Ok(()) - }, - res = dump_heap_profile_periodically(dump_period, dir_path).fuse() => { - warn!("the heap profiling dump loop shouldn't break"; "res" => ?res); - res + if let Some(dump_period) = dump_period { + select! { + _ = rx.fuse() => { + info!("periodical heap profiling is canceled"); + Ok(()) + }, + res = dump_heap_profile_periodically(dump_period, dir_path).fuse() => { + warn!("the heap profiling dump loop shouldn't break"; "res" => ?res); + res + } } + } else { + let _ = rx.await; + info!("periodical heap profiling is canceled"); + Ok(()) } }; - ProfileGuard::new(on_start, on_end, end.boxed())?.await + ProfileRunner::new(on_start, on_end, end.boxed())?.await } /// Deactivate heap profile. Return `false` if it hasn't been activated. pub fn deactivate_heap_profile() -> bool { - let mut activate = PROFILE_ACTIVE.lock().unwrap(); - activate.take().is_some() + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + match activate.as_mut() { + Some((tx, _)) => { + if let Some(tx) = tx.take() { + let _ = tx.send(()); + } + true + } + None => false, + } } /// Trigger one cpu profile. @@ -187,7 +187,14 @@ pub async fn start_one_cpu_profile( where F: Future> + Send + 'static, { + if CPU_PROFILE_ACTIVE.lock().unwrap().is_some() { + return Err("Already in CPU Profiling".to_owned()); + } + let on_start = || { + let mut activate = CPU_PROFILE_ACTIVE.lock().unwrap(); + assert!(activate.is_none()); + *activate = Some(()); let guard = pprof::ProfilerGuardBuilder::default() .frequency(frequency) .blocklist(&["libc", "libgcc", "pthread", "vdso"]) @@ -218,10 +225,13 @@ where .flamegraph(&mut body) .map_err(|e| format!("generate flamegraph from report fail: {}", e))?; } + drop(guard); + *CPU_PROFILE_ACTIVE.lock().unwrap() = None; + Ok(body) }; - ProfileGuard::new(on_start, on_end, end.boxed())?.await + ProfileRunner::new(on_start, on_end, end.boxed())?.await } pub fn read_file(path: &str) -> Result, String> { @@ -234,9 +244,26 @@ pub fn read_file(path: &str) -> Result, String> { pub fn jeprof_heap_profile(path: &str) -> Result, String> { info!("using jeprof to process {}", path); - let output = Command::new("./jeprof") - .args(["--show_bytes", "./bin/tikv-server", path, "--svg"]) - .output() + let bin = std::env::current_exe().map_err(|e| format!("get current exe path fail: {}", e))?; + let mut jeprof = Command::new("perl") + .args([ + "/dev/stdin", + "--show_bytes", + &bin.as_os_str().to_string_lossy(), + path, + "--svg", + ]) + .stdin(Stdio::piped()) + .spawn() + .map_err(|e| format!("spawn jeprof fail: {}", e))?; + jeprof + .stdin + .take() + .unwrap() + .write_all(include_bytes!("jeprof.in")) + .unwrap(); + let output = jeprof + .wait_with_output() .map_err(|e| format!("jeprof: {}", e))?; if !output.status.success() { let stderr = std::str::from_utf8(&output.stderr).unwrap_or("invalid utf8"); @@ -246,7 +273,7 @@ pub fn jeprof_heap_profile(path: &str) -> Result, String> { } pub fn heap_profiles_dir() -> Option { - PROFILE_ACTIVE + HEAP_PROFILE_ACTIVE .lock() .unwrap() .as_ref() @@ -381,7 +408,7 @@ mod tests { .build() .unwrap(); - let expected = "Already in Profiling"; + let expected = "Already in CPU Profiling"; let (tx1, rx1) = oneshot::channel(); let rx1 = rx1.map_err(|_| "channel canceled".to_owned()); @@ -393,17 +420,29 @@ mod tests { let res2 = rt.spawn(start_one_cpu_profile(rx2, 99, false)); assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); - let (_tx2, rx2) = oneshot::channel(); - let rx2 = rx2.map_err(|_| "channel canceled".to_owned()); - let res2 = rt.spawn(start_one_heap_profile(rx2, false)); - assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); + drop(tx1); + block_on(res1).unwrap().unwrap_err(); + + let expected = "Already in Heap Profiling"; + + let (tx1, rx1) = mpsc::channel(1); + let res1 = rt.spawn(activate_heap_profile( + Some(rx1), + std::env::temp_dir(), + || {}, + )); + thread::sleep(Duration::from_millis(100)); let (_tx2, rx2) = mpsc::channel(1); - let res2 = rt.spawn(activate_heap_profile(rx2, std::env::temp_dir(), || {})); + let res2 = rt.spawn(activate_heap_profile( + Some(rx2), + std::env::temp_dir(), + || {}, + )); assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); drop(tx1); - block_on(res1).unwrap().unwrap_err(); + block_on(res1).unwrap().unwrap(); } #[test] @@ -416,7 +455,7 @@ mod tests { // Test activated profiling can be stopped by canceling the period stream. let (tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(rx, std::env::temp_dir(), || {})); + let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); drop(tx); block_on(res).unwrap().unwrap(); @@ -427,7 +466,7 @@ mod tests { let (_tx, _rx) = mpsc::channel(1); let res = rt.spawn(activate_heap_profile( - _rx, + Some(_rx), std::env::temp_dir(), on_activated, )); @@ -446,7 +485,7 @@ mod tests { // Test heap profiling can be stopped by sending an error. let (mut tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(rx, std::env::temp_dir(), || {})); + let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); block_on(tx.send(Err("test".to_string()))).unwrap(); block_on(res).unwrap().unwrap_err(); @@ -457,7 +496,7 @@ mod tests { let (_tx, _rx) = mpsc::channel(1); let res = rt.spawn(activate_heap_profile( - _rx, + Some(_rx), std::env::temp_dir(), on_activated, )); From 19e1d949eb3521d88ed80d100738576fa0b2570c Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 23 Oct 2023 14:12:30 +0800 Subject: [PATCH 0971/1149] raftstore: fix an OOM issue by paginate scan unapplied config changes (#15806) close tikv/tikv#15770 Before start election, raft-rs has to check if there is any unapplied conf change entry. In the current implementation, this needs to scan logs from [unapplied_index, committed_index]. It essentially takes unbounded memory when raft peers that has many unapplied logs. To fix the issue, TiKV can paginate scan raft log which has a fixed memory usage upper bound. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f222631d772..ff8db731924 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4259,7 +4259,7 @@ dependencies = [ [[package]] name = "raft" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#9d360a3b0cdb691da8e500a4f73c457b605a1d73" +source = "git+https://github.com/tikv/raft-rs?branch=master#f60fb9e143e5b93f7db8917ea376cda04effcbb4" dependencies = [ "bytes", "fxhash", @@ -4318,7 +4318,7 @@ dependencies = [ [[package]] name = "raft-proto" version = "0.7.0" -source = "git+https://github.com/tikv/raft-rs?branch=master#9d360a3b0cdb691da8e500a4f73c457b605a1d73" +source = "git+https://github.com/tikv/raft-rs?branch=master#f60fb9e143e5b93f7db8917ea376cda04effcbb4" dependencies = [ "bytes", "protobuf", From 6075c3a00da1ea4bf9c2f1cc9d802ec44dfa0df5 Mon Sep 17 00:00:00 2001 From: TonsnakeLin <87681388+TonsnakeLin@users.noreply.github.com> Date: Mon, 23 Oct 2023 16:05:02 +0800 Subject: [PATCH 0972/1149] raftstore: calculate group id for every peer when it is syn-recover status (#15785) close tikv/tikv#15784 Signed-off-by: TonsnakeLin Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/peer.rs | 35 ++++---- components/test_pd_client/src/pd.rs | 22 ++++- .../raftstore/test_replication_mode.rs | 86 +++++++++++++++++-- 3 files changed, 119 insertions(+), 24 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index e72d32f8e91..cab88a26585 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -975,29 +975,32 @@ where pub fn switch_replication_mode(&mut self, state: &Mutex) { self.replication_sync = false; let guard = state.lock().unwrap(); - let enable_group_commit = if guard.status().get_mode() == ReplicationMode::Majority { - self.replication_mode_version = 0; - self.dr_auto_sync_state = DrAutoSyncState::Async; - false - } else { - self.dr_auto_sync_state = guard.status().get_dr_auto_sync().get_state(); - self.replication_mode_version = guard.status().get_dr_auto_sync().state_id; - match guard.status().get_dr_auto_sync().get_state() { - // SyncRecover will enable group commit after it catches up logs. - DrAutoSyncState::Async | DrAutoSyncState::SyncRecover => false, - _ => true, - } - }; + let (enable_group_commit, calculate_group_id) = + if guard.status().get_mode() == ReplicationMode::Majority { + self.replication_mode_version = 0; + self.dr_auto_sync_state = DrAutoSyncState::Async; + (false, false) + } else { + self.dr_auto_sync_state = guard.status().get_dr_auto_sync().get_state(); + self.replication_mode_version = guard.status().get_dr_auto_sync().state_id; + match guard.status().get_dr_auto_sync().get_state() { + // SyncRecover will enable group commit after it catches up logs. + DrAutoSyncState::Async => (false, false), + DrAutoSyncState::SyncRecover => (false, true), + _ => (true, true), + } + }; drop(guard); - self.switch_group_commit(enable_group_commit, state); + self.switch_group_commit(enable_group_commit, calculate_group_id, state); } fn switch_group_commit( &mut self, enable_group_commit: bool, + calculate_group_id: bool, state: &Mutex, ) { - if enable_group_commit { + if enable_group_commit || calculate_group_id { let mut guard = state.lock().unwrap(); let ids = mem::replace( guard.calculate_commit_group( @@ -5141,7 +5144,7 @@ where // should enable group commit to promise `IntegrityOverLabel`. then safe // to switch to the `Sync` phase. if self.dr_auto_sync_state == DrAutoSyncState::SyncRecover { - self.switch_group_commit(true, &ctx.global_replication_state) + self.switch_group_commit(true, true, &ctx.global_replication_state) } self.replication_sync = true; } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index a9141bf6299..7f00cf35ccd 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1437,15 +1437,33 @@ impl TestPdClient { cluster.replication_status = Some(status); } - pub fn switch_replication_mode(&self, state: DrAutoSyncState, available_stores: Vec) { + pub fn switch_replication_mode( + &self, + state: Option, + available_stores: Vec, + ) { let mut cluster = self.cluster.wl(); let status = cluster.replication_status.as_mut().unwrap(); + if state.is_none() { + status.set_mode(ReplicationMode::Majority); + let mut dr = status.mut_dr_auto_sync(); + dr.state_id += 1; + return; + } let mut dr = status.mut_dr_auto_sync(); dr.state_id += 1; - dr.set_state(state); + dr.set_state(state.unwrap()); dr.available_stores = available_stores; } + pub fn switch_to_drautosync_mode(&self) { + let mut cluster = self.cluster.wl(); + let status = cluster.replication_status.as_mut().unwrap(); + status.set_mode(ReplicationMode::DrAutoSync); + let mut dr = status.mut_dr_auto_sync(); + dr.state_id += 1; + } + pub fn region_replication_status(&self, region_id: u64) -> RegionReplicationStatus { self.cluster .rl() diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 367ac63aabb..38054c1a995 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -34,6 +34,18 @@ fn run_cluster(cluster: &mut Cluster) { cluster.must_put(b"k1", b"v0"); } +fn prepare_labels(cluster: &mut Cluster) { + cluster.add_label(1, "dc", "dc1"); + cluster.add_label(2, "dc", "dc1"); + cluster.add_label(3, "dc", "dc2"); + cluster.add_label(1, "zone", "z1"); + cluster.add_label(2, "zone", "z2"); + cluster.add_label(3, "zone", "z3"); + cluster.add_label(1, "host", "h1"); + cluster.add_label(2, "host", "h2"); + cluster.add_label(3, "host", "h3"); +} + /// When using DrAutoSync replication mode, data should be replicated to /// different labels before committed. #[test] @@ -119,7 +131,7 @@ fn test_sync_recover_after_apply_snapshot() { // swith to async cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); rx.recv_timeout(Duration::from_millis(100)).unwrap(); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); thread::sleep(Duration::from_millis(100)); @@ -136,7 +148,7 @@ fn test_sync_recover_after_apply_snapshot() { cluster .pd_client - .switch_replication_mode(DrAutoSyncState::SyncRecover, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); thread::sleep(Duration::from_millis(100)); // Add node 3 back, snapshot will apply cluster.clear_send_filters(); @@ -265,7 +277,7 @@ fn test_switching_replication_mode() { cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); rx.recv_timeout(Duration::from_millis(100)).unwrap(); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); thread::sleep(Duration::from_millis(100)); @@ -275,7 +287,7 @@ fn test_switching_replication_mode() { cluster .pd_client - .switch_replication_mode(DrAutoSyncState::SyncRecover, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); thread::sleep(Duration::from_millis(100)); let mut request = new_request( region.get_id(), @@ -331,7 +343,7 @@ fn test_replication_mode_allowlist() { run_cluster(&mut cluster); cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![1]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![1]); thread::sleep(Duration::from_millis(100)); // 2,3 are paused, so it should not be able to write. @@ -357,7 +369,7 @@ fn test_replication_mode_allowlist() { // clear allowlist. cluster .pd_client - .switch_replication_mode(DrAutoSyncState::Async, vec![]); + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); rx.recv_timeout(Duration::from_millis(100)).unwrap(); must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); } @@ -456,6 +468,68 @@ fn test_migrate_replication_mode() { assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); } +#[test] +fn test_migrate_majority_to_drautosync() { + // 1. start cluster, enable dr-auto-sync and set labels. + let mut cluster = new_server_cluster(0, 3); + cluster.pd_client.disable_default_operator(); + cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_log_gc_threshold = 10; + prepare_labels(&mut cluster); + cluster.run(); + cluster.must_transfer_leader(1, new_peer(1, 1)); + cluster.must_put(b"k1", b"v0"); + cluster.pd_client.configure_dr_auto_sync("dc"); + thread::sleep(Duration::from_millis(100)); + let region = cluster.get_region(b"k1"); + let mut request = new_request( + region.get_id(), + region.get_region_epoch().clone(), + vec![new_put_cf_cmd("default", b"k2", b"v2")], + false, + ); + request.mut_header().set_peer(new_peer(1, 1)); + let (cb, mut rx) = make_cb(&request); + cluster + .sim + .rl() + .async_command_on_node(1, request, cb) + .unwrap(); + assert_eq!(rx.recv_timeout(Duration::from_millis(100)).is_ok(), true); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); + let state = cluster.pd_client.region_replication_status(region.get_id()); + assert_eq!(state.state_id, 1); + assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); + + // 2. swith to marjority mode. + cluster.pd_client.switch_replication_mode(None, vec![]); + thread::sleep(Duration::from_millis(150)); + + // 3. spilt the region and make a new region, the regions status must be + // SimpleMajority. + cluster.must_split(®ion, b"m1"); + thread::sleep(Duration::from_millis(150)); + cluster.must_put(b"n4", b"v4"); + must_get_equal(&cluster.get_engine(1), b"n4", b"v4"); + let region_m = cluster.get_region(b"n4"); + let region_k = cluster.get_region(b"k1"); + + // 4. switch to dy-auto-sync mode, the new region generated at marjority mode + // becomes IntegrityOverLabel again. + cluster.pd_client.switch_to_drautosync_mode(); + thread::sleep(Duration::from_millis(100)); + let state_m = cluster + .pd_client + .region_replication_status(region_m.get_id()); + let state_k = cluster + .pd_client + .region_replication_status(region_k.get_id()); + assert_eq!(state_m.state_id, 3); + assert_eq!(state_m.state, RegionReplicationState::IntegrityOverLabel); + assert_eq!(state_k.state_id, 3); + assert_eq!(state_k.state, RegionReplicationState::IntegrityOverLabel); +} + /// Tests if labels are loaded correctly after rolling start. #[test] fn test_loading_label_after_rolling_start() { From 22364f2c3e936b5c3325ea724573559b12013688 Mon Sep 17 00:00:00 2001 From: Shenghui Wu <793703860@qq.com> Date: Mon, 23 Oct 2023 16:38:02 +0800 Subject: [PATCH 0973/1149] copr: fix cast_bytes_like_as_duration encoding failed (#15819) close tikv/tikv#15820 fix cast_bytes_like_as_duration encoding failed Signed-off-by: wshwsh12 <793703860@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tidb_query_expr/src/impl_cast.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 76e90f79c5b..16e33e71d13 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -1038,10 +1038,10 @@ fn cast_bytes_like_as_duration( val: &[u8], overflow_as_null: bool, ) -> Result> { - let val = std::str::from_utf8(val).map_err(Error::Encoding)?; + let val = String::from_utf8_lossy(val); let result = Duration::parse_consider_overflow( ctx, - val, + &val, extra.ret_field_type.get_decimal() as i8, overflow_as_null, ); @@ -6450,6 +6450,7 @@ mod tests { b"-17:51:04.78", b"17:51:04.78", b"-17:51:04.78", + b"\x92\x6b", ]; test_as_duration_helper( From 9a0504d89ac33e87538ef8781a4871795c9a3ba4 Mon Sep 17 00:00:00 2001 From: lijie Date: Mon, 23 Oct 2023 20:00:31 +0800 Subject: [PATCH 0974/1149] chore: bump version to 7.6.0-alpha (#15810) Signed-off-by: lijie --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff8db731924..52408df1ab2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6469,7 +6469,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.5.0-alpha" +version = "7.6.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index 32b2d858b6a..edebbc46f1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.5.0-alpha" +version = "7.6.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From e254b8ad44b4d346d71bd2241776035a6c674c6d Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 24 Oct 2023 00:56:02 +0800 Subject: [PATCH 0975/1149] metrics: Add extra necessary metrics for monitoring SlowTrend performance. (#15797) ref tikv/tikv#15271 This pr supply extra necessary metrics for `SlowTrend`, which is useful for users to monitor the performance of `SlowTrend`, including: 1. Slow Trend, shows the changing rate of the slowness on I/O operations. 'value > 0' means the related store might has a slow trend. 2. AVG Sampling Latency, records the sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store. Only when one of TiKV node has been marked with **Slow Trend**, and its severity reaches the limitation, it will be chosen as the slow node. 3. QPS of each store, store slow trend result, showing the QPS of each store. 4. QPS Changing Rate, shows the changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.json | 392 +++++++++++++++++++++++++++++- 1 file changed, 390 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index f2654ba3da1..fced6f6bab4 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -1580,7 +1580,8 @@ "intervalFactor": 2, "legendFormat": "{{instance}} - buckets", "refId": "B", - "step": 10 + "step": 10, + "hide": true } ], "thresholds": [], @@ -14520,7 +14521,7 @@ "defaults": {}, "overrides": [] }, - "gridPos": { + "gridPos": { "h": 7, "w": 12, "x": 0, @@ -48363,6 +48364,393 @@ ], "title": "Backup Log", "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 24763573238, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might has a slow trend.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 53 + }, + "hiddenSeries": false, + "id": 24763574116, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Slow Trend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 53 + }, + "hiddenSeries": false, + "id": 24763574117, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend_result{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "QPS Changing Trend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 60 + }, + "hiddenSeries": false, + "id": 24763574115, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend_l0{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [ + { + "value": 275000, + "colorMode": "critical", + "op": "gt", + "fill": false, + "line": true, + "visible": true, + "yaxis": "left" + } + ], + "timeRegions": [], + "title": "AVG Sampling Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The QPS of each store.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 60 + }, + "hiddenSeries": false, + "id": 24763573970, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "editorMode": "code", + "expr": "tikv_raftstore_slow_trend_result_value{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "QPS of each store", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Slow Trend Statistics", + "type": "row" } ], "refresh": "1m", From f3bfe13c6c479fa209de308684dd7e595631527e Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 25 Oct 2023 17:50:03 +0800 Subject: [PATCH 0976/1149] grafana: Fix wrong scheduler command variables of grafana in cloud env (#15833) close tikv/tikv#15832 Fix wrong scheduler command variables of grafana in cloud env by adding a `\b` to regex to make sure it's at the word boundary. Signed-off-by: Connor1996 --- metrics/grafana/tikv_details.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index fced6f6bab4..4a72d3c204a 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -48858,7 +48858,7 @@ "refId": "StandardVariableQuery" }, "refresh": 1, - "regex": "/type=\"([^\"]+)\"/", + "regex": "/\btype=\"([^\"]+)\"/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", From c6adb042c9a05fb0051bc34d11fb5d4ab5273033 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 26 Oct 2023 11:32:03 +0800 Subject: [PATCH 0977/1149] raftstore: Fix group commit is mistakenly enabled in sync recover state (#15830) close tikv/tikv#15817 When splitting a region, group commit is mistakenly enabled in the sync-recover state. If the region is in joint state and demoting voter is down, the commit condition can't meet. Fix group commit is mistakenly enabled in sync recover state Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/peer.rs | 5 +- components/test_pd_client/src/pd.rs | 9 +-- .../raftstore/test_replication_mode.rs | 71 +++++++++++++++++-- 3 files changed, 72 insertions(+), 13 deletions(-) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index cab88a26585..abe14bf7c2a 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -966,7 +966,10 @@ where return; } self.replication_mode_version = state.status().get_dr_auto_sync().state_id; - let enable = state.status().get_dr_auto_sync().get_state() != DrAutoSyncState::Async; + let enable = !matches!( + state.status().get_dr_auto_sync().get_state(), + DrAutoSyncState::Async | DrAutoSyncState::SyncRecover + ); self.raft_group.raft.enable_group_commit(enable); self.dr_auto_sync_state = state.status().get_dr_auto_sync().get_state(); } diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 7f00cf35ccd..341495cdb52 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1450,20 +1450,13 @@ impl TestPdClient { dr.state_id += 1; return; } + status.set_mode(ReplicationMode::DrAutoSync); let mut dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state.unwrap()); dr.available_stores = available_stores; } - pub fn switch_to_drautosync_mode(&self) { - let mut cluster = self.cluster.wl(); - let status = cluster.replication_status.as_mut().unwrap(); - status.set_mode(ReplicationMode::DrAutoSync); - let mut dr = status.mut_dr_auto_sync(); - dr.state_id += 1; - } - pub fn region_replication_status(&self, region_id: u64) -> RegionReplicationStatus { self.cluster .rl() diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 38054c1a995..76059fa8f87 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, thread, time::Duration}; +use std::{iter::FromIterator, sync::Arc, thread, time::Duration}; use kvproto::replication_modepb::*; use pd_client::PdClient; @@ -99,6 +99,67 @@ fn test_dr_auto_sync() { assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); } +// When in sync recover state, and the region is in joint state. The leave joint +// state should be committed successfully. +#[test] +fn test_sync_recover_joint_state() { + let mut cluster = new_server_cluster(0, 5); + cluster.pd_client.disable_default_operator(); + cluster.pd_client.configure_dr_auto_sync("zone"); + cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.raft_log_gc_threshold = 1; + cluster.add_label(1, "zone", "ES"); + cluster.add_label(2, "zone", "ES"); + cluster.add_label(3, "zone", "ES"); + cluster.add_label(4, "zone", "WS"); // old dr + cluster.add_label(5, "zone", "WS"); // new dr + + let pd_client = Arc::clone(&cluster.pd_client); + let region_id = cluster.run_conf_change(); + let nodes = Vec::from_iter(cluster.get_node_ids()); + assert_eq!(nodes.len(), 5); + cluster.must_put(b"k1", b"v1"); + + cluster + .pd_client + .switch_replication_mode(Some(DrAutoSyncState::Async), vec![]); + + pd_client.must_add_peer(region_id, new_peer(2, 2)); + pd_client.must_add_peer(region_id, new_peer(3, 3)); + pd_client.must_add_peer(region_id, new_peer(4, 4)); + pd_client.must_add_peer(region_id, new_learner_peer(5, 5)); + + // Make one node down + cluster.stop_node(4); + + // Switch to sync recover + cluster + .pd_client + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); + + cluster.must_put(b"k2", b"v2"); + assert_eq!(cluster.must_get(b"k2").unwrap(), b"v2"); + + // Enter joint, now we have C_old(1, 2, 3, 4) and C_new(1, 2, 3, 5) + pd_client.must_joint_confchange( + region_id, + vec![ + (ConfChangeType::AddLearnerNode, new_learner_peer(4, 4)), + (ConfChangeType::AddNode, new_peer(5, 5)), + ], + ); + + let region = pd_client.get_region(b"k1").unwrap(); + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + assert_ne!(left.get_id(), right.get_id()); + + // Leave joint + pd_client.must_leave_joint(left.get_id()); + pd_client.must_leave_joint(right.get_id()); +} + #[test] fn test_sync_recover_after_apply_snapshot() { let mut cluster = prepare_cluster(); @@ -501,7 +562,7 @@ fn test_migrate_majority_to_drautosync() { assert_eq!(state.state_id, 1); assert_eq!(state.state, RegionReplicationState::IntegrityOverLabel); - // 2. swith to marjority mode. + // 2. switch to majority mode. cluster.pd_client.switch_replication_mode(None, vec![]); thread::sleep(Duration::from_millis(150)); @@ -514,9 +575,11 @@ fn test_migrate_majority_to_drautosync() { let region_m = cluster.get_region(b"n4"); let region_k = cluster.get_region(b"k1"); - // 4. switch to dy-auto-sync mode, the new region generated at marjority mode + // 4. switch to dy-auto-sync mode, the new region generated at majority mode // becomes IntegrityOverLabel again. - cluster.pd_client.switch_to_drautosync_mode(); + cluster + .pd_client + .switch_replication_mode(Some(DrAutoSyncState::SyncRecover), vec![]); thread::sleep(Duration::from_millis(100)); let state_m = cluster .pd_client From 12c2cf10982979e2f47b37c1561f1e02ade62526 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Thu, 26 Oct 2023 12:39:33 +0800 Subject: [PATCH 0978/1149] raftstore: improve the bucket split strategy (#15798) close tikv/tikv#13671 there are three reason may cause the bucket not split: 1. split check tick will refresh bucket info even info the bucket version not change 2. the suspect buckets only conside the increment flow 3. all the bucket increment flows are reset if one bucket is updated. To solve this, bucket stats only record the increment flow and reset it after meta size updated. Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../raftstore-v2/src/operation/bucket.rs | 391 +---------------- components/raftstore-v2/src/operation/mod.rs | 1 - components/raftstore-v2/src/raft/peer.rs | 11 +- components/raftstore/src/store/fsm/apply.rs | 6 +- components/raftstore/src/store/fsm/peer.rs | 211 +++------- components/raftstore/src/store/mod.rs | 18 +- components/raftstore/src/store/peer.rs | 37 +- components/raftstore/src/store/worker/mod.rs | 3 +- components/raftstore/src/store/worker/read.rs | 6 +- .../raftstore/src/store/worker/split_check.rs | 396 +++++++++++++++++- .../raftstore/test_split_region.rs | 13 +- 11 files changed, 508 insertions(+), 585 deletions(-) diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 242b9a9b33b..920a4e68e8c 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -6,12 +6,12 @@ use std::sync::Arc; use engine_traits::{KvEngine, RaftEngine}; use kvproto::{ - metapb::{self, RegionEpoch}, + metapb::RegionEpoch, raft_serverpb::{ExtraMessageType, RaftMessage, RefreshBuckets}, }; -use pd_client::{BucketMeta, BucketStat}; +use pd_client::BucketMeta; use raftstore::{ - coprocessor::{Config, RegionChangeEvent}, + coprocessor::RegionChangeEvent, store::{util, Bucket, BucketRange, ReadProgress, SplitCheckTask, Transport}, }; use slog::{error, info}; @@ -24,213 +24,6 @@ use crate::{ worker::pd, }; -#[derive(Debug, Clone, Default)] -pub struct BucketStatsInfo { - // the stats is increment flow. - bucket_stat: Option, - // the report bucket stat records the increment stats after last report pd. - // it will be reset after report pd. - report_bucket_stat: Option, - // avoid the version roll back, it record the last bucket version if bucket stat isn't none. - last_bucket_version: u64, -} - -impl BucketStatsInfo { - /// returns all bucket ranges those's write_bytes exceed the given - /// diff_size_threshold. - pub fn gen_bucket_range_for_update( - &self, - region_bucket_max_size: u64, - ) -> Option> { - let region_buckets = self.bucket_stat.as_ref()?; - let stats = ®ion_buckets.stats; - let keys = ®ion_buckets.meta.keys; - let sizes = ®ion_buckets.meta.sizes; - - let mut suspect_bucket_ranges = vec![]; - assert_eq!(keys.len(), stats.write_bytes.len() + 1); - for i in 0..stats.write_bytes.len() { - let estimated_bucket_size = stats.write_bytes[i] + sizes[i]; - if estimated_bucket_size >= region_bucket_max_size { - suspect_bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); - } - } - Some(suspect_bucket_ranges) - } - - #[inline] - pub fn version(&self) -> u64 { - self.bucket_stat - .as_ref() - .map(|b| b.meta.version) - .or(Some(self.last_bucket_version)) - .unwrap_or_default() - } - - #[inline] - pub fn add_bucket_flow(&mut self, delta: &Option) { - if let (Some(buckets), Some(report_buckets), Some(delta)) = ( - self.bucket_stat.as_mut(), - self.report_bucket_stat.as_mut(), - delta, - ) { - buckets.merge(delta); - report_buckets.merge(delta); - } - } - - #[inline] - pub fn set_bucket_stat(&mut self, buckets: Option) { - self.bucket_stat = buckets.clone(); - if let Some(new_buckets) = buckets { - self.last_bucket_version = new_buckets.meta.version; - let mut new_report_buckets = BucketStat::from_meta(new_buckets.meta); - if let Some(old) = &mut self.report_bucket_stat { - new_report_buckets.merge(old); - *old = new_report_buckets; - } else { - self.report_bucket_stat = Some(new_report_buckets); - } - } else { - self.report_bucket_stat = None; - } - } - - #[inline] - pub fn report_bucket_stat(&mut self) -> BucketStat { - let current = self.report_bucket_stat.as_mut().unwrap(); - let delta = current.clone(); - current.clear_stats(); - delta - } - - #[inline] - pub fn bucket_stat(&self) -> &Option { - &self.bucket_stat - } - - pub fn on_refresh_region_buckets( - &mut self, - cfg: &Config, - next_bucket_version: u64, - buckets: Vec, - region_epoch: RegionEpoch, - region: metapb::Region, - bucket_ranges: Option>, - ) -> bool { - let change_bucket_version: bool; - // The region buckets reset after this region happened split or merge. - // The message should be dropped if it's epoch is lower than the regions. - // The bucket ranges is none when the region buckets is also none. - // So this condition indicates that the region buckets needs to refresh not - // renew. - if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ - assert_eq!(buckets.len(), bucket_ranges.len()); - change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); - }else{ - change_bucket_version = true; - // when the region buckets is none, the exclusive buckets includes all the - // bucket keys. - self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); - } - change_bucket_version - } - - fn update_buckets( - &mut self, - cfg: &Config, - next_bucket_version: u64, - buckets: Vec, - region_epoch: RegionEpoch, - bucket_ranges: &Vec, - ) -> bool { - let origin_region_buckets = self.bucket_stat.as_ref().unwrap(); - let mut change_bucket_version = false; - let mut meta_idx = 0; - let mut region_buckets = origin_region_buckets.clone(); - let mut meta = (*region_buckets.meta).clone(); - meta.region_epoch = region_epoch; - - // bucket stats will clean if the bucket size is updated. - for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { - // the bucket ranges maybe need to split or merge not all the meta keys, so it - // needs to find the first keys. - while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { - meta_idx += 1; - } - // meta_idx can't be not the last entry (which is end key) - if meta_idx >= meta.keys.len() - 1 { - break; - } - // the bucket size is small and does not have split keys, - // then it should be merged with its left neighbor - let region_bucket_merge_size = - cfg.region_bucket_merge_size_ratio * (cfg.region_bucket_size.0 as f64); - if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { - meta.sizes[meta_idx] = bucket.size; - region_buckets.clean_stats(meta_idx); - // the region has more than one bucket - // and the left neighbor + current bucket size is not very big - if meta.keys.len() > 2 - && meta_idx != 0 - && meta.sizes[meta_idx - 1] + bucket.size < cfg.region_bucket_size.0 * 2 - { - // bucket is too small - region_buckets.left_merge(meta_idx); - meta.left_merge(meta_idx); - change_bucket_version = true; - continue; - } - } else { - // update size - meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; - region_buckets.clean_stats(meta_idx); - // insert new bucket keys (split the original bucket) - for bucket_key in bucket.keys { - meta_idx += 1; - region_buckets.split(meta_idx); - meta.split(meta_idx, bucket_key); - change_bucket_version = true; - } - } - meta_idx += 1; - } - if change_bucket_version { - meta.version = next_bucket_version; - } - region_buckets.meta = Arc::new(meta); - self.set_bucket_stat(Some(region_buckets)); - change_bucket_version - } - - fn init_buckets( - &mut self, - cfg: &Config, - next_bucket_version: u64, - mut buckets: Vec, - region_epoch: RegionEpoch, - region: metapb::Region, - ) { - // when the region buckets is none, the exclusive buckets includes all the - // bucket keys. - assert_eq!(buckets.len(), 1); - let bucket_keys = buckets.pop().unwrap().keys; - let bucket_count = bucket_keys.len() + 1; - let mut meta = BucketMeta { - region_id: region.get_id(), - region_epoch, - version: next_bucket_version, - keys: bucket_keys, - sizes: vec![cfg.region_bucket_size.0; bucket_count], - }; - // padding the boundary keys and initialize the flow. - meta.keys.insert(0, region.get_start_key().to_vec()); - meta.keys.push(region.get_end_key().to_vec()); - let bucket_stats = BucketStat::from_meta(Arc::new(meta)); - self.set_bucket_stat(Some(bucket_stats)); - } -} - impl Peer { #[inline] pub fn on_refresh_region_buckets( @@ -250,14 +43,13 @@ impl Peer { let current_version = self.region_buckets_info().version(); let next_bucket_version = util::gen_bucket_version(self.term(), current_version); - // let mut is_first_refresh = true; let region = self.region().clone(); let change_bucket_version = self.region_buckets_info_mut().on_refresh_region_buckets( &store_ctx.coprocessor_host.cfg, next_bucket_version, buckets, region_epoch, - region, + ®ion, bucket_ranges, ); let region_buckets = self @@ -443,178 +235,3 @@ where self.schedule_tick(PeerTick::ReportBuckets); } } - -#[cfg(test)] -mod tests { - use super::*; - - // create BucketStatsInfo include three keys: ["","100","200",""]. - fn mock_bucket_stats_info() -> BucketStatsInfo { - let mut bucket_stats_info = BucketStatsInfo::default(); - let cfg = Config::default(); - let next_bucket_version = 1; - let bucket_ranges = None; - let mut region_epoch = RegionEpoch::default(); - region_epoch.set_conf_ver(1); - region_epoch.set_version(1); - let mut region = metapb::Region::default(); - region.set_id(1); - - let mut buckets = vec![]; - let mut bucket = Bucket::default(); - bucket.keys.push(vec![100]); - bucket.keys.push(vec![200]); - buckets.insert(0, bucket); - - let _ = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets, - region_epoch, - region, - bucket_ranges, - ); - bucket_stats_info - } - - #[test] - pub fn test_version() { - let mut bucket_stats_info = mock_bucket_stats_info(); - assert_eq!(1, bucket_stats_info.version()); - bucket_stats_info.set_bucket_stat(None); - assert_eq!(1, bucket_stats_info.version()); - - let mut meta = BucketMeta::default(); - meta.version = 2; - meta.keys.push(vec![]); - meta.keys.push(vec![]); - let bucket_stat = BucketStat::from_meta(Arc::new(meta)); - bucket_stats_info.set_bucket_stat(Some(bucket_stat)); - assert_eq!(2, bucket_stats_info.version()); - } - - #[test] - pub fn test_insert_new_buckets() { - let bucket_stats_info = mock_bucket_stats_info(); - - let cfg = Config::default(); - let bucket_stat = bucket_stats_info.bucket_stat.unwrap(); - assert_eq!( - vec![vec![], vec![100], vec![200], vec![]], - bucket_stat.meta.keys - ); - for i in 0..bucket_stat.stats.write_bytes.len() { - assert_eq!(cfg.region_bucket_size.0, bucket_stat.meta.sizes[i]); - assert_eq!(0, bucket_stat.stats.write_bytes[i]); - } - } - - #[test] - pub fn test_report_buckets() { - let mut bucket_stats_info = mock_bucket_stats_info(); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - let mut delta_bucket_stats = bucket_stats.clone(); - delta_bucket_stats.write_key(&[1], 1); - delta_bucket_stats.write_key(&[201], 1); - bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats.clone())); - let bucket_stats = bucket_stats_info.report_bucket_stat(); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - - let report_bucket_stats = bucket_stats_info.report_bucket_stat(); - assert_eq!(vec![0, 0, 0], report_bucket_stats.stats.write_bytes); - bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - } - - #[test] - pub fn test_spilt_and_merge_buckets() { - let mut bucket_stats_info = mock_bucket_stats_info(); - let next_bucket_version = 2; - let mut region = metapb::Region::default(); - region.set_id(1); - let cfg = Config::default(); - let bucket_size = cfg.region_bucket_size.0; - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - let region_epoch = bucket_stats.meta.region_epoch.clone(); - - // step1: update buckets flow - let mut delta_bucket_stats = bucket_stats.clone(); - delta_bucket_stats.write_key(&[1], 1); - delta_bucket_stats.write_key(&[201], 1); - bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - - // step2: tick not affect anything - let bucket_ranges = Some(vec![]); - let buckets = vec![]; - let mut change_bucket_version = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets, - region_epoch.clone(), - region.clone(), - bucket_ranges, - ); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert!(!change_bucket_version); - assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); - - // step3: split key 50 - let mut bucket_ranges = Some(vec![BucketRange(vec![], vec![100])]); - let mut bucket = Bucket::default(); - bucket.keys = vec![vec![50]]; - bucket.size = bucket_size; - let mut buckets = vec![bucket]; - change_bucket_version = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets.clone(), - region_epoch.clone(), - region.clone(), - bucket_ranges.clone(), - ); - assert!(change_bucket_version); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert_eq!( - vec![vec![], vec![50], vec![100], vec![200], vec![]], - bucket_stats.meta.keys - ); - assert_eq!( - vec![bucket_size / 2, bucket_size / 2, bucket_size, bucket_size], - bucket_stats.meta.sizes - ); - assert_eq!(vec![0, 0, 0, 2], bucket_stats.stats.write_bytes); - - // step4: merge [50-100] to [0-50], - bucket_ranges = Some(vec![BucketRange(vec![50], vec![100])]); - let mut bucket = Bucket::default(); - bucket.keys = vec![]; - bucket.size = 0; - buckets = vec![bucket]; - change_bucket_version = bucket_stats_info.on_refresh_region_buckets( - &cfg, - next_bucket_version, - buckets, - region_epoch, - region, - bucket_ranges, - ); - assert!(change_bucket_version); - - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); - assert_eq!( - vec![vec![], vec![100], vec![200], vec![]], - bucket_stats.meta.keys - ); - assert_eq!( - vec![bucket_size / 2, bucket_size, bucket_size], - bucket_stats.meta.sizes - ); - assert_eq!(vec![0, 0, 2], bucket_stats.stats.write_bytes); - - // report buckets doesn't be affected by the split and merge. - let report_bucket_stats = bucket_stats_info.report_bucket_stat(); - assert_eq!(vec![4, 0, 2], report_bucket_stats.stats.write_bytes); - } -} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 6d5cba9fff8..9ccf08d6d54 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -24,7 +24,6 @@ pub use ready::{ }; pub(crate) use self::{ - bucket::BucketStatsInfo, command::SplitInit, query::{LocalReader, ReadDelegatePair, SharedReadTablet}, txn_ext::TxnContext, diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 2c8b8cef1db..9b095b872e7 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -23,8 +23,9 @@ use raftstore::{ fsm::ApplyMetrics, metrics::RAFT_PEER_PENDING_DURATION, util::{Lease, RegionReadProgress}, - Config, EntryStorage, ForceLeaderState, PeerStat, ProposalQueue, ReadDelegate, - ReadIndexQueue, ReadProgress, TabletSnapManager, UnsafeRecoveryState, WriteTask, + BucketStatsInfo, Config, EntryStorage, ForceLeaderState, PeerStat, ProposalQueue, + ReadDelegate, ReadIndexQueue, ReadProgress, TabletSnapManager, UnsafeRecoveryState, + WriteTask, }, }; use slog::{debug, info, Logger}; @@ -35,9 +36,9 @@ use crate::{ batch::StoreContext, fsm::ApplyScheduler, operation::{ - AbnormalPeerContext, AsyncWriter, BucketStatsInfo, CompactLogContext, DestroyProgress, - GcPeerContext, MergeContext, ProposalControl, ReplayWatch, SimpleWriteReqEncoder, - SplitFlowControl, SplitPendingAppend, TxnContext, + AbnormalPeerContext, AsyncWriter, CompactLogContext, DestroyProgress, GcPeerContext, + MergeContext, ProposalControl, ReplayWatch, SimpleWriteReqEncoder, SplitFlowControl, + SplitPendingAppend, TxnContext, }, router::{ApplyTask, CmdResChannel, PeerTick, QueryResChannel}, Result, diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 038171d9715..339dff68e76 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -679,7 +679,7 @@ where exec_res: results, metrics: mem::take(&mut delegate.metrics), applied_term: delegate.applied_term, - bucket_stat: delegate.buckets.clone().map(Box::new), + bucket_stat: delegate.buckets.clone(), }); if !self.kv_wb().is_empty() { // Pending writes not flushed, need to set seqno to following ApplyRes later @@ -3874,7 +3874,7 @@ where pub applied_term: u64, pub exec_res: VecDeque>, pub metrics: ApplyMetrics, - pub bucket_stat: Option>, + pub bucket_stat: Option, pub write_seqno: Vec, } @@ -6929,7 +6929,7 @@ mod tests { router.schedule_task(1, Msg::apply(apply2)); let res = fetch_apply_res(&rx); - let bucket_version = res.bucket_stat.unwrap().as_ref().meta.version; + let bucket_version = res.bucket_stat.unwrap().meta.version; assert_eq!(bucket_version, 2); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 7504f746abe..49f558250ee 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -41,7 +41,7 @@ use kvproto::{ replication_modepb::{DrAutoSyncState, ReplicationMode}, }; use parking_lot::RwLockWriteGuard; -use pd_client::{new_bucket_stats, BucketMeta, BucketStat}; +use pd_client::BucketMeta; use protobuf::Message; use raft::{ self, @@ -2327,10 +2327,11 @@ where return; } let applied_index = res.apply_state.applied_index; - let buckets = self.fsm.peer.region_buckets.as_mut(); - if let (Some(delta), Some(buckets)) = (res.bucket_stat, buckets) { - buckets.merge(&delta); - } + self.fsm + .peer + .region_buckets_info_mut() + .add_bucket_flow(&res.bucket_stat); + self.fsm.has_ready |= self.fsm.peer.post_apply( self.ctx, res.apply_state, @@ -5989,7 +5990,7 @@ where fn on_refresh_region_buckets( &mut self, region_epoch: RegionEpoch, - mut buckets: Vec, + buckets: Vec, bucket_ranges: Option>, _cb: Callback, ) { @@ -6017,14 +6018,14 @@ where // test purpose #[cfg(any(test, feature = "testexport"))] { - let default_buckets = BucketStat::default(); test_only_callback( _cb, self.fsm .peer - .region_buckets + .region_buckets_info() + .bucket_stat() .as_ref() - .unwrap_or(&default_buckets) + .unwrap() .meta .clone(), ); @@ -6032,108 +6033,53 @@ where return; } - let mut current_version = self + let current_version = self.fsm.peer.region_buckets_info().version(); + let next_bucket_version = util::gen_bucket_version(self.fsm.peer.term(), current_version); + let region = self.region().clone(); + let change_bucket_version = self .fsm .peer - .region_buckets + .region_buckets_info_mut() + .on_refresh_region_buckets( + &self.ctx.coprocessor_host.cfg, + next_bucket_version, + buckets, + region_epoch, + ®ion, + bucket_ranges, + ); + let region_buckets = self + .fsm + .peer + .region_buckets_info() + .bucket_stat() .as_ref() - .map(|b| b.meta.version) - .unwrap_or_default(); - if current_version == 0 { - current_version = self - .fsm - .peer - .last_region_buckets - .as_ref() - .map(|b| b.meta.version) - .unwrap_or_default(); - } - let mut region_buckets: BucketStat; - if let Some(bucket_ranges) = bucket_ranges { - assert_eq!(buckets.len(), bucket_ranges.len()); - let mut i = 0; - region_buckets = self.fsm.peer.region_buckets.clone().unwrap(); - let mut meta = (*region_buckets.meta).clone(); - if !buckets.is_empty() { - meta.version = util::gen_bucket_version(self.fsm.peer.term(), current_version); - } - meta.region_epoch = region_epoch; - for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { - while i < meta.keys.len() && meta.keys[i] != bucket_range.0 { - i += 1; - } - assert!(i != meta.keys.len()); - // the bucket size is small and does not have split keys, - // then it should be merged with its left neighbor - let region_bucket_merge_size = - self.ctx.coprocessor_host.cfg.region_bucket_merge_size_ratio - * (self.ctx.coprocessor_host.cfg.region_bucket_size.0 as f64); - if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { - meta.sizes[i] = bucket.size; - // i is not the last entry (which is end key) - assert!(i < meta.keys.len() - 1); - // the region has more than one bucket - // and the left neighbor + current bucket size is not very big - if meta.keys.len() > 2 - && i != 0 - && meta.sizes[i - 1] + bucket.size - < self.ctx.coprocessor_host.cfg.region_bucket_size.0 * 2 - { - // bucket is too small - region_buckets.left_merge(i); - meta.left_merge(i); - continue; - } - } else { - // update size - meta.sizes[i] = bucket.size / (bucket.keys.len() + 1) as u64; - // insert new bucket keys (split the original bucket) - for bucket_key in bucket.keys { - i += 1; - region_buckets.split(i); - meta.split(i, bucket_key); - } - } - i += 1; - } - region_buckets.meta = Arc::new(meta); - } else { - debug!( - "refresh_region_buckets re-generates buckets"; + .unwrap() + .clone(); + let buckets_count = region_buckets.meta.keys.len() - 1; + if change_bucket_version { + // TODO: we may need to make it debug once the coprocessor timeout is resolved. + info!( + "finished on_refresh_region_buckets"; "region_id" => self.fsm.region_id(), + "buckets_count" => buckets_count, + "buckets_size" => ?region_buckets.meta.sizes, ); - assert_eq!(buckets.len(), 1); - let bucket_keys = buckets.pop().unwrap().keys; - let bucket_count = bucket_keys.len() + 1; - - let mut meta = BucketMeta { - region_id: self.fsm.region_id(), - region_epoch, - version: util::gen_bucket_version(self.fsm.peer.term(), current_version), - keys: bucket_keys, - sizes: vec![self.ctx.coprocessor_host.cfg.region_bucket_size.0; bucket_count], - }; - meta.keys.insert(0, region.get_start_key().to_vec()); - meta.keys.push(region.get_end_key().to_vec()); - region_buckets = BucketStat::from_meta(Arc::new(meta)); + } else { + // it means the buckets key range not any change, so don't need to refresh. + test_only_callback(_cb, region_buckets.meta); + return; } - - let buckets_count = region_buckets.meta.keys.len() - 1; self.ctx.coprocessor_host.on_region_changed( - region, + self.region(), RegionChangeEvent::UpdateBuckets(buckets_count), self.fsm.peer.get_role(), ); let keys = region_buckets.meta.keys.clone(); - let old_region_buckets: Option = - self.fsm.peer.region_buckets.replace(region_buckets); - self.fsm.peer.last_region_buckets = old_region_buckets; + let version = region_buckets.meta.version; let mut store_meta = self.ctx.store_meta.lock().unwrap(); - let version = self.fsm.peer.region_buckets.as_ref().unwrap().meta.version; if let Some(reader) = store_meta.readers.get_mut(&self.fsm.region_id()) { - reader.update(ReadProgress::region_buckets( - self.fsm.peer.region_buckets.as_ref().unwrap().meta.clone(), - )); + reader.update(ReadProgress::region_buckets(region_buckets.meta.clone())); } // Notify followers to refresh their buckets version @@ -6154,19 +6100,9 @@ where .send_extra_message(extra_msg, &mut self.ctx.trans, &p); } } - - debug!( - "finished on_refresh_region_buckets"; - "region_id" => self.fsm.region_id(), - "buckets_count" => buckets_count, - "buckets_size" => ?self.fsm.peer.region_buckets.as_ref().unwrap().meta.sizes, - ); // test purpose #[cfg(any(test, feature = "testexport"))] - test_only_callback( - _cb, - self.fsm.peer.region_buckets.as_ref().unwrap().meta.clone(), - ); + test_only_callback(_cb, region_buckets.meta); } pub fn on_msg_refresh_buckets(&mut self, msg: RaftMessage) { @@ -6205,50 +6141,11 @@ where if !self.ctx.coprocessor_host.cfg.enable_region_bucket() { return None; } - let region_buckets = self.fsm.peer.region_buckets.as_ref()?; - let stats = ®ion_buckets.stats; - let keys = ®ion_buckets.meta.keys; - - let empty_last_keys = vec![]; - let empty_last_stats = metapb::BucketStats::default(); - let (last_keys, last_stats, stats_reset) = self - .fsm + let region_bucket_max_size = self.ctx.coprocessor_host.cfg.region_bucket_size.0 * 2; + self.fsm .peer - .last_region_buckets - .as_ref() - .map(|b| { - ( - &b.meta.keys, - &b.stats, - region_buckets.create_time != b.create_time, - ) - }) - .unwrap_or((&empty_last_keys, &empty_last_stats, false)); - - let mut bucket_ranges = vec![]; - let mut j = 0; - assert_eq!(keys.len(), stats.write_bytes.len() + 1); - for i in 0..stats.write_bytes.len() { - let mut diff_in_bytes = stats.write_bytes[i]; - while j < last_keys.len() && keys[i] > last_keys[j] { - j += 1; - } - if j < last_keys.len() && keys[i] == last_keys[j] { - if !stats_reset { - diff_in_bytes -= last_stats.write_bytes[j]; - } - j += 1; - } - - // if the bucket's write_bytes exceed half of the configured region_bucket_size, - // add it to the bucket_ranges for checking update - let bucket_update_diff_size_threshold = - self.ctx.coprocessor_host.cfg.region_bucket_size.0 / 2; - if diff_in_bytes >= bucket_update_diff_size_threshold { - bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); - } - } - Some(bucket_ranges) + .region_buckets_info() + .gen_bucket_range_for_update(region_bucket_max_size) } fn on_schedule_half_split_region( @@ -6544,7 +6441,7 @@ where fn on_report_region_buckets_tick(&mut self) { if !self.fsm.peer.is_leader() - || self.fsm.peer.region_buckets.is_none() + || self.fsm.peer.region_buckets_info().bucket_stat().is_none() || self.fsm.hibernate_state.group_state() == GroupState::Idle { return; @@ -6552,11 +6449,11 @@ where let region_id = self.region_id(); let peer_id = self.fsm.peer_id(); - let region_buckets = self.fsm.peer.region_buckets.as_mut().unwrap(); + let region_buckets = self.fsm.peer.region_buckets_info_mut().report_bucket_stat(); if let Err(e) = self .ctx .pd_scheduler - .schedule(PdTask::ReportBuckets(region_buckets.clone())) + .schedule(PdTask::ReportBuckets(region_buckets)) { error!( "failed to report region buckets"; @@ -6565,8 +6462,6 @@ where "err" => ?e, ); } - // todo: it will delete in next pr. - region_buckets.stats = new_bucket_stats(®ion_buckets.meta); self.register_report_region_buckets_tick(); } @@ -6640,7 +6535,7 @@ where self.fsm.peer.approximate_keys = Some(self.fsm.peer.approximate_keys.unwrap_or_default() + keys); - if let Some(buckets) = &mut self.fsm.peer.region_buckets { + if let Some(buckets) = &mut self.fsm.peer.region_buckets_info_mut().bucket_stat_mut() { buckets.ingest_sst(keys, size); } // The ingested file may be overlapped with the data in engine, so we need to diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 0ca99efffc4..4cae84d1d25 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -85,14 +85,14 @@ pub use self::{ util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ metrics as worker_metrics, need_compact, AutoSplitController, BatchComponent, Bucket, - BucketRange, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, CompactThreshold, - FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, LocalReader, - LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, ReadExecutorProvider, - ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, - SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, - TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, - DEFAULT_BIG_REGION_BYTE_THRESHOLD, DEFAULT_BIG_REGION_QPS_THRESHOLD, - DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, - REGION_CPU_OVERLOAD_THRESHOLD_RATIO, + BucketRange, BucketStatsInfo, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, + CompactThreshold, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, + LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, + ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, + StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, WriterContoller, + BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, + DEFAULT_BIG_REGION_QPS_THRESHOLD, DEFAULT_BYTE_THRESHOLD, DEFAULT_QPS_THRESHOLD, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, REGION_CPU_OVERLOAD_THRESHOLD_RATIO, }, }; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index abe14bf7c2a..5511c976062 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -43,7 +43,7 @@ use kvproto::{ }, }; use parking_lot::RwLockUpgradableReadGuard; -use pd_client::{BucketStat, INVALID_ID}; +use pd_client::INVALID_ID; use protobuf::Message; use raft::{ self, @@ -80,6 +80,7 @@ use super::{ self, check_req_region_epoch, is_initial_msg, AdminCmdEpochState, ChangePeerI, ConfChangeKind, Lease, LeaseState, NORMAL_REQ_CHECK_CONF_VER, NORMAL_REQ_CHECK_VER, }, + worker::BucketStatsInfo, DestroyPeerJob, LocalReadContext, }; use crate::{ @@ -781,9 +782,8 @@ where persisted_number: u64, /// The context of applying snapshot. apply_snap_ctx: Option, - /// region buckets. - pub region_buckets: Option, - pub last_region_buckets: Option, + /// region buckets info in this region. + region_buckets_info: BucketStatsInfo, /// lead_transferee if this peer(leader) is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, @@ -932,8 +932,7 @@ where unpersisted_ready: None, persisted_number: 0, apply_snap_ctx: None, - region_buckets: None, - last_region_buckets: None, + region_buckets_info: BucketStatsInfo::default(), lead_transferee: raft::INVALID_ID, unsafe_recovery_state: None, snapshot_recovery_state: None, @@ -1321,6 +1320,16 @@ where self.get_store().region() } + #[inline] + pub fn region_buckets_info_mut(&mut self) -> &mut BucketStatsInfo { + &mut self.region_buckets_info + } + + #[inline] + pub fn region_buckets_info(&self) -> &BucketStatsInfo { + &self.region_buckets_info + } + /// Check whether the peer can be hibernated. /// /// This should be used with `check_after_tick` to get a correct conclusion. @@ -2849,7 +2858,10 @@ where commit_term, committed_entries, cbs, - self.region_buckets.as_ref().map(|b| b.meta.clone()), + self.region_buckets_info() + .bucket_stat() + .as_ref() + .map(|b| b.meta.clone()), ); apply.on_schedule(&ctx.raft_metrics); self.mut_store() @@ -3389,10 +3401,7 @@ where } pub fn reset_region_buckets(&mut self) { - if self.region_buckets.is_some() { - self.last_region_buckets = self.region_buckets.take(); - self.region_buckets = None; - } + self.region_buckets_info_mut().set_bucket_stat(None); } /// Try to renew leader lease. @@ -4709,7 +4718,11 @@ where let mut resp = reader.execute(&req, &Arc::new(region), read_index, None); if let Some(snap) = resp.snapshot.as_mut() { snap.txn_ext = Some(self.txn_ext.clone()); - snap.bucket_meta = self.region_buckets.as_ref().map(|b| b.meta.clone()); + snap.bucket_meta = self + .region_buckets_info() + .bucket_stat() + .as_ref() + .map(|s| s.meta.clone()); } resp.txn_extra_op = self.txn_extra_op.load(); cmd_resp::bind_term(&mut resp.response, self.term()); diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index e79f37a4bc4..c6783238520 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -42,7 +42,8 @@ pub use self::{ }, region::{Runner as RegionRunner, Task as RegionTask}, split_check::{ - Bucket, BucketRange, KeyEntry, Runner as SplitCheckRunner, Task as SplitCheckTask, + Bucket, BucketRange, BucketStatsInfo, KeyEntry, Runner as SplitCheckRunner, + Task as SplitCheckTask, }, split_config::{ SplitConfig, SplitConfigManager, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 2d54c00baa6..2694481494f 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -440,7 +440,11 @@ impl ReadDelegate { read_progress: peer.read_progress.clone(), pending_remove: false, wait_data: false, - bucket_meta: peer.region_buckets.as_ref().map(|b| b.meta.clone()), + bucket_meta: peer + .region_buckets_info() + .bucket_stat() + .as_ref() + .map(|b| b.meta.clone()), track_ver: TrackVer::new(), } } diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 4ff853f70a0..94708e84f7a 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -5,6 +5,7 @@ use std::{ collections::BinaryHeap, fmt::{self, Display, Formatter}, mem, + sync::Arc, }; use engine_traits::{ @@ -12,21 +13,23 @@ use engine_traits::{ }; use file_system::{IoType, WithIoType}; use itertools::Itertools; -use kvproto::{metapb::Region, pdpb::CheckPolicy}; +use kvproto::{ + metapb::{Region, RegionEpoch}, + pdpb::CheckPolicy, +}; use online_config::{ConfigChange, OnlineConfig}; +use pd_client::{BucketMeta, BucketStat}; use tikv_util::{ box_err, debug, error, info, keybuilder::KeyBuilder, warn, worker::Runnable, Either, }; use txn_types::Key; use super::metrics::*; -#[cfg(any(test, feature = "testexport"))] -use crate::coprocessor::Config; use crate::{ coprocessor::{ dispatcher::StoreHandle, split_observer::{is_valid_split_key, strip_timestamp_if_exists}, - CoprocessorHost, SplitCheckerHost, + Config, CoprocessorHost, SplitCheckerHost, }, Result, }; @@ -144,6 +147,216 @@ pub struct Bucket { pub size: u64, } +#[derive(Debug, Clone, Default)] +pub struct BucketStatsInfo { + // the stats is increment flow. + bucket_stat: Option, + // the report bucket stat records the increment stats after last report pd. + // it will be reset after report pd. + report_bucket_stat: Option, + // avoid the version roll back, it record the last bucket version if bucket stat isn't none. + last_bucket_version: u64, +} + +impl BucketStatsInfo { + /// returns all bucket ranges those's write_bytes exceed the given + /// diff_size_threshold. + pub fn gen_bucket_range_for_update( + &self, + region_bucket_max_size: u64, + ) -> Option> { + let region_buckets = self.bucket_stat.as_ref()?; + let stats = ®ion_buckets.stats; + let keys = ®ion_buckets.meta.keys; + let sizes = ®ion_buckets.meta.sizes; + + let mut suspect_bucket_ranges = vec![]; + assert_eq!(keys.len(), stats.write_bytes.len() + 1); + for i in 0..stats.write_bytes.len() { + let estimated_bucket_size = stats.write_bytes[i] + sizes[i]; + if estimated_bucket_size >= region_bucket_max_size { + suspect_bucket_ranges.push(BucketRange(keys[i].clone(), keys[i + 1].clone())); + } + } + Some(suspect_bucket_ranges) + } + + #[inline] + pub fn version(&self) -> u64 { + self.bucket_stat + .as_ref() + .map_or(self.last_bucket_version, |b| b.meta.version) + } + + #[inline] + pub fn add_bucket_flow(&mut self, delta: &Option) { + if let (Some(buckets), Some(report_buckets), Some(delta)) = ( + self.bucket_stat.as_mut(), + self.report_bucket_stat.as_mut(), + delta, + ) { + buckets.merge(delta); + report_buckets.merge(delta); + } + } + + #[inline] + pub fn set_bucket_stat(&mut self, buckets: Option) { + self.bucket_stat = buckets.clone(); + if let Some(new_buckets) = buckets { + self.last_bucket_version = new_buckets.meta.version; + let mut new_report_buckets = BucketStat::from_meta(new_buckets.meta); + if let Some(old) = &mut self.report_bucket_stat { + new_report_buckets.merge(old); + *old = new_report_buckets; + } else { + self.report_bucket_stat = Some(new_report_buckets); + } + } else { + self.report_bucket_stat = None; + } + } + + #[inline] + pub fn report_bucket_stat(&mut self) -> BucketStat { + let current = self.report_bucket_stat.as_mut().unwrap(); + let delta = current.clone(); + current.clear_stats(); + delta + } + + #[inline] + pub fn bucket_stat(&self) -> &Option { + &self.bucket_stat + } + + #[inline] + pub fn bucket_stat_mut(&mut self) -> Option<&mut BucketStat> { + self.bucket_stat.as_mut() + } + + pub fn on_refresh_region_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, + region_epoch: RegionEpoch, + region: &Region, + bucket_ranges: Option>, + ) -> bool { + let change_bucket_version: bool; + // The region buckets reset after this region happened split or merge. + // The message should be dropped if it's epoch is lower than the regions. + // The bucket ranges is none when the region buckets is also none. + // So this condition indicates that the region buckets needs to refresh not + // renew. + if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ + assert_eq!(buckets.len(), bucket_ranges.len()); + change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); + }else{ + change_bucket_version = true; + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); + } + change_bucket_version + } + + fn update_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + buckets: Vec, + region_epoch: RegionEpoch, + bucket_ranges: &Vec, + ) -> bool { + let origin_region_buckets = self.bucket_stat.as_ref().unwrap(); + let mut change_bucket_version = false; + let mut meta_idx = 0; + let mut region_buckets = origin_region_buckets.clone(); + let mut meta = (*region_buckets.meta).clone(); + meta.region_epoch = region_epoch; + + // bucket stats will clean if the bucket size is updated. + for (bucket, bucket_range) in buckets.into_iter().zip(bucket_ranges) { + // the bucket ranges maybe need to split or merge not all the meta keys, so it + // needs to find the first keys. + while meta_idx < meta.keys.len() && meta.keys[meta_idx] != bucket_range.0 { + meta_idx += 1; + } + // meta_idx can't be not the last entry (which is end key) + if meta_idx >= meta.keys.len() - 1 { + break; + } + // the bucket size is small and does not have split keys, + // then it should be merged with its left neighbor + let region_bucket_merge_size = + cfg.region_bucket_merge_size_ratio * (cfg.region_bucket_size.0 as f64); + if bucket.keys.is_empty() && bucket.size <= (region_bucket_merge_size as u64) { + meta.sizes[meta_idx] = bucket.size; + region_buckets.clean_stats(meta_idx); + // the region has more than one bucket + // and the left neighbor + current bucket size is not very big + if meta.keys.len() > 2 + && meta_idx != 0 + && meta.sizes[meta_idx - 1] + bucket.size < cfg.region_bucket_size.0 * 2 + { + // bucket is too small + region_buckets.left_merge(meta_idx); + meta.left_merge(meta_idx); + change_bucket_version = true; + continue; + } + } else { + // update size + meta.sizes[meta_idx] = bucket.size / (bucket.keys.len() + 1) as u64; + region_buckets.clean_stats(meta_idx); + // insert new bucket keys (split the original bucket) + for bucket_key in bucket.keys { + meta_idx += 1; + region_buckets.split(meta_idx); + meta.split(meta_idx, bucket_key); + change_bucket_version = true; + } + } + meta_idx += 1; + } + if change_bucket_version { + meta.version = next_bucket_version; + } + region_buckets.meta = Arc::new(meta); + self.set_bucket_stat(Some(region_buckets)); + change_bucket_version + } + + fn init_buckets( + &mut self, + cfg: &Config, + next_bucket_version: u64, + mut buckets: Vec, + region_epoch: RegionEpoch, + region: &Region, + ) { + // when the region buckets is none, the exclusive buckets includes all the + // bucket keys. + assert_eq!(buckets.len(), 1); + let bucket_keys = buckets.pop().unwrap().keys; + let bucket_count = bucket_keys.len() + 1; + let mut meta = BucketMeta { + region_id: region.get_id(), + region_epoch, + version: next_bucket_version, + keys: bucket_keys, + sizes: vec![cfg.region_bucket_size.0; bucket_count], + }; + // padding the boundary keys and initialize the flow. + meta.keys.insert(0, region.get_start_key().to_vec()); + meta.keys.push(region.get_end_key().to_vec()); + let bucket_stats = BucketStat::from_meta(Arc::new(meta)); + self.set_bucket_stat(Some(bucket_stats)); + } +} + pub enum Task { SplitCheckTask { region: Region, @@ -702,3 +915,178 @@ where } } } + +#[cfg(test)] +mod tests { + use super::*; + + // create BucketStatsInfo include three keys: ["","100","200",""]. + fn mock_bucket_stats_info() -> BucketStatsInfo { + let mut bucket_stats_info = BucketStatsInfo::default(); + let cfg = Config::default(); + let next_bucket_version = 1; + let bucket_ranges = None; + let mut region_epoch = RegionEpoch::default(); + region_epoch.set_conf_ver(1); + region_epoch.set_version(1); + let mut region = Region::default(); + region.set_id(1); + + let mut buckets = vec![]; + let mut bucket = Bucket::default(); + bucket.keys.push(vec![100]); + bucket.keys.push(vec![200]); + buckets.insert(0, bucket); + + let _ = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + ®ion, + bucket_ranges, + ); + bucket_stats_info + } + + #[test] + pub fn test_version() { + let mut bucket_stats_info = mock_bucket_stats_info(); + assert_eq!(1, bucket_stats_info.version()); + bucket_stats_info.set_bucket_stat(None); + assert_eq!(1, bucket_stats_info.version()); + + let mut meta = BucketMeta::default(); + meta.version = 2; + meta.keys.push(vec![]); + meta.keys.push(vec![]); + let bucket_stat = BucketStat::from_meta(Arc::new(meta)); + bucket_stats_info.set_bucket_stat(Some(bucket_stat)); + assert_eq!(2, bucket_stats_info.version()); + } + + #[test] + pub fn test_insert_new_buckets() { + let bucket_stats_info = mock_bucket_stats_info(); + + let cfg = Config::default(); + let bucket_stat = bucket_stats_info.bucket_stat.unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stat.meta.keys + ); + for i in 0..bucket_stat.stats.write_bytes.len() { + assert_eq!(cfg.region_bucket_size.0, bucket_stat.meta.sizes[i]); + assert_eq!(0, bucket_stat.stats.write_bytes[i]); + } + } + + #[test] + pub fn test_report_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats.clone())); + let bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![0, 0, 0], report_bucket_stats.stats.write_bytes); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + } + + #[test] + pub fn test_spilt_and_merge_buckets() { + let mut bucket_stats_info = mock_bucket_stats_info(); + let next_bucket_version = 2; + let mut region = Region::default(); + region.set_id(1); + let cfg = Config::default(); + let bucket_size = cfg.region_bucket_size.0; + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let region_epoch = bucket_stats.meta.region_epoch.clone(); + + // step1: update buckets flow + let mut delta_bucket_stats = bucket_stats.clone(); + delta_bucket_stats.write_key(&[1], 1); + delta_bucket_stats.write_key(&[201], 1); + bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step2: tick not affect anything + let bucket_ranges = Some(vec![]); + let buckets = vec![]; + let mut change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch.clone(), + ®ion, + bucket_ranges, + ); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert!(!change_bucket_version); + assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); + + // step3: split key 50 + let mut bucket_ranges = Some(vec![BucketRange(vec![], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![vec![50]]; + bucket.size = bucket_size; + let mut buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets.clone(), + region_epoch.clone(), + ®ion, + bucket_ranges.clone(), + ); + assert!(change_bucket_version); + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![50], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 0, 2], bucket_stats.stats.write_bytes); + + // step4: merge [50-100] to [0-50], + bucket_ranges = Some(vec![BucketRange(vec![50], vec![100])]); + let mut bucket = Bucket::default(); + bucket.keys = vec![]; + bucket.size = 0; + buckets = vec![bucket]; + change_bucket_version = bucket_stats_info.on_refresh_region_buckets( + &cfg, + next_bucket_version, + buckets, + region_epoch, + ®ion, + bucket_ranges, + ); + assert!(change_bucket_version); + + let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + assert_eq!( + vec![vec![], vec![100], vec![200], vec![]], + bucket_stats.meta.keys + ); + assert_eq!( + vec![bucket_size / 2, bucket_size, bucket_size], + bucket_stats.meta.sizes + ); + assert_eq!(vec![0, 0, 2], bucket_stats.stats.write_bytes); + + // report buckets doesn't be affected by the split and merge. + let report_bucket_stats = bucket_stats_info.report_bucket_stat(); + assert_eq!(vec![4, 0, 2], report_bucket_stats.stats.write_bytes); + } +} diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index c0f75487998..35ee1838865 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -976,14 +976,13 @@ fn test_refresh_region_bucket_keys() { cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); + // case: init bucket info cluster.must_put(b"k11", b"v1"); let mut region = pd_client.get_region(b"k11").unwrap(); - let bucket = Bucket { keys: vec![b"k11".to_vec()], size: 1024 * 1024 * 200, }; - let mut expected_buckets = metapb::Buckets::default(); expected_buckets.set_keys(bucket.clone().keys.into()); expected_buckets @@ -997,6 +996,8 @@ fn test_refresh_region_bucket_keys() { Option::None, Some(expected_buckets.clone()), ); + + // case: bucket range should refresh if epoch changed let conf_ver = region.get_region_epoch().get_conf_ver() + 1; region.mut_region_epoch().set_conf_ver(conf_ver); @@ -1018,6 +1019,7 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version2, bucket_version + 1); + // case: stale epoch will not refresh buckets info let conf_ver = 0; region.mut_region_epoch().set_conf_ver(conf_ver); let bucket_version3 = cluster.refresh_region_bucket_keys( @@ -1028,6 +1030,7 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version3, bucket_version2); + // case: bucket split // now the buckets is ["", "k12", ""]. further split ["", k12], [k12, ""] // buckets into more buckets let region = pd_client.get_region(b"k11").unwrap(); @@ -1066,6 +1069,7 @@ fn test_refresh_region_bucket_keys() { ); assert_eq!(bucket_version4, bucket_version3 + 1); + // case: merge buckets // remove k11~k12, k12~k121, k122~[] bucket let buckets = vec![ Bucket { @@ -1107,7 +1111,7 @@ fn test_refresh_region_bucket_keys() { assert_eq!(bucket_version5, bucket_version4 + 1); - // split the region + // case: split the region pd_client.must_split_region(region, pdpb::CheckPolicy::Usekey, vec![b"k11".to_vec()]); let mut buckets = vec![Bucket { keys: vec![b"k10".to_vec()], @@ -1132,7 +1136,7 @@ fn test_refresh_region_bucket_keys() { cluster.refresh_region_bucket_keys(®ion, buckets, None, Some(expected_buckets.clone())); assert_eq!(bucket_version6, bucket_version5 + 1); - // merge the region + // case: merge the region pd_client.must_merge(left_id, right.get_id()); let region = pd_client.get_region(b"k10").unwrap(); let buckets = vec![Bucket { @@ -1145,6 +1149,7 @@ fn test_refresh_region_bucket_keys() { cluster.refresh_region_bucket_keys(®ion, buckets, None, Some(expected_buckets.clone())); assert_eq!(bucket_version7, bucket_version6 + 1); + // case: nothing changed let bucket_version8 = cluster.refresh_region_bucket_keys( ®ion, vec![], From 763069ed660ffe31e5e57586bc7ccfba94cb8f71 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Thu, 26 Oct 2023 18:17:04 +0800 Subject: [PATCH 0979/1149] raftstore: make release work (#15850) close tikv/tikv#15851 Signed-off-by: bufferflies <1045931706@qq.com> --- components/raftstore/src/store/fsm/peer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 49f558250ee..14ad09dbde8 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6067,6 +6067,7 @@ where ); } else { // it means the buckets key range not any change, so don't need to refresh. + #[cfg(any(test, feature = "testexport"))] test_only_callback(_cb, region_buckets.meta); return; } From 69ef88b2e9d036d5975973c4d6d5a15278bcb2e0 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 27 Oct 2023 12:12:33 +0800 Subject: [PATCH 0980/1149] import: write RPC will check region epoch before continue (#15795) close tikv/tikv#15003 Signed-off-by: lance6716 Signed-off-by: tonyxuqqi Co-authored-by: tonyxuqqi --- Makefile | 8 + components/error_code/src/sst_importer.rs | 4 +- .../src/operation/command/write/ingest.rs | 9 +- components/raftstore/src/store/fsm/store.rs | 103 +++------ components/raftstore/src/store/msg.rs | 6 - .../raftstore/src/store/worker/cleanup.rs | 19 +- .../raftstore/src/store/worker/cleanup_sst.rs | 120 +---------- components/server/src/server.rs | 2 + components/server/src/server2.rs | 2 + components/sst_importer/src/errors.rs | 8 + components/sst_importer/src/import_file.rs | 50 +++-- components/sst_importer/src/lib.rs | 2 +- components/sst_importer/src/sst_importer.rs | 12 +- components/test_raftstore-v2/src/server.rs | 1 + components/test_raftstore/src/server.rs | 1 + src/import/sst_service.rs | 197 ++++++++++++++++-- 16 files changed, 300 insertions(+), 244 deletions(-) diff --git a/Makefile b/Makefile index bb1d7316e1b..ce8d4e8b793 100644 --- a/Makefile +++ b/Makefile @@ -406,6 +406,14 @@ docker_test: ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ make test +docker_shell: + docker build -f Dockerfile.test \ + -t ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + . + docker run -it -v $(shell pwd):/tikv \ + ${DEV_DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + /bin/bash + ## The driver for script/run-cargo.sh ## ---------------------------------- diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 117400e8aff..9e568ee00c1 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -25,5 +25,7 @@ define_error_codes!( RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""), SUSPENDED => ("Suspended", "this request has been suspended.", - "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them.") + "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them."), + REQUEST_TOO_NEW => ("RequestTooNew", "", ""), + REQUEST_TOO_OLD => ("RequestTooOld", "", "") ); diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index e963434fe83..45247b3f36f 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -43,6 +43,11 @@ impl Store { let import_size = box_try!(ctx.sst_importer.get_total_size()); STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(ctx.sst_importer.list_ssts()); + // filter old version SSTs + let ssts: Vec<_> = ssts + .into_iter() + .filter(|sst| sst.1 >= sst_importer::API_VERSION_2) + .collect(); if ssts.is_empty() { return Ok(()); } @@ -50,9 +55,9 @@ impl Store { let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default(); for sst in ssts { region_ssts - .entry(sst.get_region_id()) + .entry(sst.0.get_region_id()) .or_default() - .push(sst); + .push(sst.0); } let ranges = ctx.sst_importer.ranges_in_import(); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index aa8fa7c318e..2efcbf87b09 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -14,7 +14,7 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, Mutex, }, - time::{Duration, Instant}, + time::{Duration, Instant, SystemTime}, u64, }; @@ -36,14 +36,13 @@ use futures::{compat::Future01CompatExt, FutureExt}; use grpcio_health::HealthService; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ - import_sstpb::{SstMeta, SwitchMode}, metapb::{self, Region, RegionEpoch}, pdpb::{self, QueryStats, StoreStats}, raft_cmdpb::{AdminCmdType, AdminRequest}, raft_serverpb::{ExtraMessage, ExtraMessageType, PeerState, RaftMessage, RegionLocalState}, replication_modepb::{ReplicationMode, ReplicationStatus}, }; -use pd_client::{metrics::STORE_SIZE_EVENT_INT_VEC, Feature, FeatureGate, PdClient}; +use pd_client::{Feature, FeatureGate, PdClient}; use protobuf::Message; use raft::StateRole; use resource_control::{channel::unbounded, ResourceGroupManager}; @@ -813,9 +812,6 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> } } StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event), - StoreMsg::ValidateSstResult { invalid_ssts } => { - self.on_validate_sst_result(invalid_ssts) - } StoreMsg::ClearRegionSizeInRange { start_key, end_key } => { self.clear_region_size_in_range(&start_key, &end_key) } @@ -1655,12 +1651,7 @@ impl RaftBatchSystem { ); let compact_runner = CompactRunner::new(engines.kv.clone()); - let cleanup_sst_runner = CleanupSstRunner::new( - meta.get_id(), - self.router.clone(), - Arc::clone(&importer), - Arc::clone(&pd_client), - ); + let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), self.router.clone(), // RaftRouter @@ -2762,62 +2753,47 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } -impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { - fn on_validate_sst_result(&mut self, ssts: Vec) { - if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import { - return; - } - // A stale peer can still ingest a stale Sst before it is - // destroyed. We need to make sure that no stale peer exists. - let mut delete_ssts = Vec::new(); - { - let meta = self.ctx.store_meta.lock().unwrap(); - for sst in ssts { - if !meta.regions.contains_key(&sst.get_region_id()) { - delete_ssts.push(sst); - } - } - } - if delete_ssts.is_empty() { - return; - } - - let task = CleanupSstTask::DeleteSst { ssts: delete_ssts }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to delete ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } +// we will remove 1-week old version 1 SST files. +const VERSION_1_SST_CLEANUP_DURATION: Duration = Duration::from_secs(7 * 24 * 60 * 60); +impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> { fn on_cleanup_import_sst(&mut self) -> Result<()> { let mut delete_ssts = Vec::new(); - let mut validate_ssts = Vec::new(); - let import_size = box_try!(self.ctx.importer.get_total_size()); - STORE_SIZE_EVENT_INT_VEC.import_size.set(import_size as i64); let ssts = box_try!(self.ctx.importer.list_ssts()); if ssts.is_empty() { return Ok(()); } + let now = SystemTime::now(); { let meta = self.ctx.store_meta.lock().unwrap(); for sst in ssts { - if let Some(r) = meta.regions.get(&sst.get_region_id()) { + if let Some(r) = meta.regions.get(&sst.0.get_region_id()) { let region_epoch = r.get_region_epoch(); - if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) { + if util::is_epoch_stale(sst.0.get_region_epoch(), region_epoch) { // If the SST epoch is stale, it will not be ingested anymore. - delete_ssts.push(sst); + delete_ssts.push(sst.0); } + } else if sst.1 >= sst_importer::API_VERSION_2 { + // The write RPC of import sst service have make sure the region do exist at + // the write time, and now the region is not found, + // sst can be deleted because it won't be used by + // ingest in future. + delete_ssts.push(sst.0); } else { - // If the peer doesn't exist, we need to validate the SST through PD. - validate_ssts.push(sst); + // in the old protocol, we can't easily know if the SST will be used in the + // committed raft log, so we only delete the SST + // files that has not be modified for 1 week. + if let Ok(duration) = now.duration_since(sst.2) { + if duration > VERSION_1_SST_CLEANUP_DURATION { + warn!( + "found 1-week old SST file of version 1, will delete it"; + "sst_meta" => ?sst.0, + "last_modified" => ?sst.2 + ); + delete_ssts.push(sst.0); + } + } } } } @@ -2837,27 +2813,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } - // When there is an import job running, the region which this sst belongs may - // has not been split from the origin region because the apply thread is so busy - // that it can not apply SplitRequest as soon as possible. So we can not - // delete this sst file. - if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import { - let task = CleanupSstTask::ValidateSst { - ssts: validate_ssts, - }; - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::CleanupSst(task)) - { - error!( - "schedule to validate ssts failed"; - "store_id" => self.fsm.store.id, - "err" => ?e, - ); - } - } - Ok(()) } diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a33ca0e476e..a92e5169549 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -10,7 +10,6 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ brpb::CheckAdminResponse, - import_sstpb::SstMeta, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, metapb, metapb::RegionEpoch, @@ -824,10 +823,6 @@ where { RaftMessage(InspectedRaftMessage), - ValidateSstResult { - invalid_ssts: Vec, - }, - // Clear region size and keys for all regions in the range, so we can force them to // re-calculate their size later. ClearRegionSizeInRange { @@ -884,7 +879,6 @@ where write!(fmt, "Store {} is unreachable", store_id) } StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()), - StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"), StoreMsg::ClearRegionSizeInRange { ref start_key, ref end_key, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 632e85f40cc..726b7abe5ce 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -3,7 +3,6 @@ use std::fmt::{self, Display, Formatter}; use engine_traits::{KvEngine, RaftEngine}; -use pd_client::PdClient; use tikv_util::worker::Runnable; use super::{ @@ -11,7 +10,6 @@ use super::{ cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, compact::{Runner as CompactRunner, Task as CompactTask}, }; -use crate::store::StoreRouter; pub enum Task { Compact(CompactTask), @@ -29,29 +27,26 @@ impl Display for Task { } } -pub struct Runner +pub struct Runner where E: KvEngine, R: RaftEngine, - S: StoreRouter, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } -impl Runner +impl Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, - ) -> Runner { + ) -> Runner { Runner { compact, cleanup_sst, @@ -60,12 +55,10 @@ where } } -impl Runnable for Runner +impl Runnable for Runner where E: KvEngine, R: RaftEngine, - C: PdClient, - S: StoreRouter, { type Task = Task; diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 8174b872f4b..44f188e6f8f 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -1,62 +1,30 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error, fmt, marker::PhantomData, sync::Arc}; +use std::{fmt, sync::Arc}; -use engine_traits::KvEngine; -use kvproto::{import_sstpb::SstMeta, metapb::Region}; -use pd_client::PdClient; +use kvproto::import_sstpb::SstMeta; use sst_importer::SstImporter; -use tikv_util::{error, worker::Runnable}; - -use crate::store::{util::is_epoch_stale, StoreMsg, StoreRouter}; - -type Result = std::result::Result>; +use tikv_util::worker::Runnable; pub enum Task { DeleteSst { ssts: Vec }, - ValidateSst { ssts: Vec }, } impl fmt::Display for Task { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Task::DeleteSst { ref ssts } => write!(f, "Delete {} ssts", ssts.len()), - Task::ValidateSst { ref ssts } => write!(f, "Validate {} ssts", ssts.len()), } } } -pub struct Runner -where - EK: KvEngine, - S: StoreRouter, -{ - store_id: u64, - store_router: S, +pub struct Runner { importer: Arc, - pd_client: Arc, - _engine: PhantomData, } -impl Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ - pub fn new( - store_id: u64, - store_router: S, - importer: Arc, - pd_client: Arc, - ) -> Runner { - Runner { - store_id, - store_router, - importer, - pd_client, - _engine: PhantomData, - } +impl Runner { + pub fn new(importer: Arc) -> Runner { + Runner { importer } } /// Deletes SST files from the importer. @@ -65,78 +33,9 @@ where let _ = self.importer.delete(sst); } } - - fn get_region_by_meta(&self, sst: &SstMeta) -> Result { - // The SST meta has been delivered with a range, use it directly. - // For now, no case will reach this. But this still could be a guard for - // reducing the superise in the future... - if !sst.get_range().get_start().is_empty() || !sst.get_range().get_end().is_empty() { - return self - .pd_client - .get_region(sst.get_range().get_start()) - .map_err(Into::into); - } - // Once there isn't range provided. - let query_by_start_key_of_full_meta = || { - let start_key = self - .importer - .load_start_key_by_meta::(sst)? - .ok_or_else(|| -> Box { - "failed to load start key from sst, the sst might be empty".into() - })?; - let region = self.pd_client.get_region(&start_key)?; - Result::Ok(region) - }; - query_by_start_key_of_full_meta() - .map_err(|err| - format!("failed to load full sst meta from disk for {:?} and there isn't extra information provided: {err}", sst.get_uuid()).into() - ) - } - - /// Validates whether the SST is stale or not. - fn handle_validate_sst(&self, ssts: Vec) { - let store_id = self.store_id; - let mut invalid_ssts = Vec::new(); - for sst in ssts { - match self.get_region_by_meta(&sst) { - Ok(r) => { - // The region id may or may not be the same as the - // SST file, but it doesn't matter, because the - // epoch of a range will not decrease anyway. - if is_epoch_stale(r.get_region_epoch(), sst.get_region_epoch()) { - // Region has not been updated. - continue; - } - if r.get_id() == sst.get_region_id() - && r.get_peers().iter().any(|p| p.get_store_id() == store_id) - { - // The SST still belongs to this store. - continue; - } - invalid_ssts.push(sst); - } - Err(e) => { - error!("get region failed"; "err" => %e); - } - } - } - - // We need to send back the result to check for the stale - // peer, which may ingest the stale SST before it is - // destroyed. - let msg = StoreMsg::ValidateSstResult { invalid_ssts }; - if let Err(e) = self.store_router.send(msg) { - error!(%e; "send validate sst result failed"); - } - } } -impl Runnable for Runner -where - EK: KvEngine, - C: PdClient, - S: StoreRouter, -{ +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { @@ -144,9 +43,6 @@ where Task::DeleteSst { ssts } => { self.handle_delete_sst(ssts); } - Task::ValidateSst { ssts } => { - self.handle_validate_sst(ssts); - } } } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 8d44890e5a6..a4b6276a587 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -366,6 +366,7 @@ where router.clone(), config.coprocessor.clone(), )); + let region_info_accessor = RegionInfoAccessor::new(coprocessor_host.as_mut().unwrap()); // Initialize concurrency manager @@ -1080,6 +1081,7 @@ where servers.importer.clone(), None, self.resource_manager.clone(), + Arc::new(self.region_info_accessor.clone()), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 2593035618d..65d02f58c08 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -948,6 +948,7 @@ where backup_worker.start(backup_endpoint); // Import SST service. + let region_info_accessor = self.region_info_accessor.as_ref().unwrap().clone(); let import_service = ImportSstService::new( self.core.config.import.clone(), self.core.config.raft_store.raft_entry_max_size, @@ -956,6 +957,7 @@ where servers.importer.clone(), Some(self.router.as_ref().unwrap().store_meta().clone()), self.resource_manager.clone(), + Arc::new(region_info_accessor), ); let import_cfg_mgr = import_service.get_config_manager(); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index acca7523427..e03288bb3e1 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -118,6 +118,12 @@ pub enum Error { #[error("Importing a SST file with imcompatible api version")] IncompatibleApiVersion, + #[error("{0}, please retry write later")] + RequestTooNew(String), + + #[error("{0}, please rescan region later")] + RequestTooOld(String), + #[error("Key mode mismatched with the request mode, writer: {:?}, storage: {:?}, key: {}", .writer, .storage_api_version, .key)] InvalidKeyMode { writer: SstWriterType, @@ -213,6 +219,8 @@ impl ErrorCodeExt for Error { Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, Error::Suspended { .. } => error_code::sst_importer::SUSPENDED, + Error::RequestTooNew(_) => error_code::sst_importer::REQUEST_TOO_NEW, + Error::RequestTooOld(_) => error_code::sst_importer::REQUEST_TOO_OLD, } } } diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index b270d26a411..b3b7c051ce4 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -6,6 +6,7 @@ use std::{ io::{self, Write}, path::{Path, PathBuf}, sync::Arc, + time::SystemTime, }; use api_version::api_v2::TIDB_RANGES_COMPLEMENT; @@ -440,7 +441,7 @@ impl ImportDir { Ok(real_key.map(ToOwned::to_owned)) } - pub fn list_ssts(&self) -> Result> { + pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { let e = e?; @@ -449,7 +450,10 @@ impl ImportDir { } let path = e.path(); match parse_meta_from_path(&path) { - Ok(sst) => ssts.push(sst), + Ok(sst) => { + let last_modify = e.metadata()?.modified()?; + ssts.push((sst.0, sst.1, last_modify)) + } Err(e) => error!(%e; "path_to_sst_meta failed"; "path" => %path.display(),), } } @@ -458,20 +462,28 @@ impl ImportDir { } const SST_SUFFIX: &str = ".sst"; - +// version 2: compared to version 1 which is the default version, we will check +// epoch of request and local region in write API. +pub const API_VERSION_2: i32 = 2; + +/// sst_meta_to_path will encode the filepath with default api version (current +/// is 2). So when the SstMeta is created in old version of TiKV and filepath +/// will not correspond to the real file, in the deletion logic we can't remove +/// these files. pub fn sst_meta_to_path(meta: &SstMeta) -> Result { Ok(PathBuf::from(format!( - "{}_{}_{}_{}_{}{}", + "{}_{}_{}_{}_{}_{}{}", UuidBuilder::from_slice(meta.get_uuid())?.build(), meta.get_region_id(), meta.get_region_epoch().get_conf_ver(), meta.get_region_epoch().get_version(), meta.get_cf_name(), + API_VERSION_2, SST_SUFFIX, ))) } -pub fn parse_meta_from_path>(path: P) -> Result { +pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { Some(name) => name, @@ -500,7 +512,11 @@ pub fn parse_meta_from_path>(path: P) -> Result { // cf_name to path. meta.set_cf_name(elems[4].to_owned()); } - Ok(meta) + let mut api_version = 1; + if elems.len() > 5 { + api_version = elems[5].parse()?; + } + Ok((meta, api_version)) } #[cfg(test)] @@ -520,11 +536,12 @@ mod test { meta.mut_region_epoch().set_version(3); let path = sst_meta_to_path(&meta).unwrap(); - let expected_path = format!("{}_1_2_3_default.sst", uuid); + let expected_path = format!("{}_1_2_3_default_2.sst", uuid); assert_eq!(path.to_str().unwrap(), &expected_path); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.0); + assert_eq!(2, meta_with_ver.1); } #[test] @@ -543,8 +560,9 @@ mod test { meta.get_region_epoch().get_version(), SST_SUFFIX, )); - let new_meta = parse_meta_from_path(path).unwrap(); - assert_eq!(meta, new_meta); + let meta_with_ver = parse_meta_from_path(path).unwrap(); + assert_eq!(meta, meta_with_ver.0); + assert_eq!(1, meta_with_ver.1); } #[cfg(feature = "test-engines-rocksdb")] @@ -596,14 +614,20 @@ mod test { w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta| { + ssts.iter_mut().for_each(|meta_with_ver| { + let meta = &mut meta_with_ver.0; let start = dir .load_start_key_by_meta::(meta, arcmgr.clone()) .unwrap() .unwrap(); meta.mut_range().set_start(start) }); - assert_eq!(ssts, vec![meta]); + assert_eq!( + ssts.iter() + .map(|meta_with_ver| { meta_with_ver.0.clone() }) + .collect(), + vec![meta] + ); } #[test] diff --git a/components/sst_importer/src/lib.rs b/components/sst_importer/src/lib.rs index 0cfc3bab774..ff137005b09 100644 --- a/components/sst_importer/src/lib.rs +++ b/components/sst_importer/src/lib.rs @@ -27,7 +27,7 @@ pub mod sst_importer; pub use self::{ config::{Config, ConfigManager}, errors::{error_inc, Error, Result}, - import_file::sst_meta_to_path, + import_file::{sst_meta_to_path, API_VERSION_2}, import_mode2::range_overlaps, sst_importer::SstImporter, sst_writer::{RawSstWriter, TxnSstWriter}, diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 5cf9f1c6573..358bc0545de 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -11,7 +11,7 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, }, - time::Duration, + time::{Duration, SystemTime}, }; use collections::HashSet; @@ -1383,9 +1383,9 @@ impl SstImporter { } /// List the basic information of the current SST files. - /// The information contains UUID, region ID, region Epoch. - /// Other fields may be left blank. - pub fn list_ssts(&self) -> Result> { + /// The information contains UUID, region ID, region Epoch, api version, + /// last modified time. Other fields may be left blank. + pub fn list_ssts(&self) -> Result> { self.dir.list_ssts() } @@ -1585,9 +1585,9 @@ mod tests { for sst in &ssts { ingested .iter() - .find(|s| s.get_uuid() == sst.get_uuid()) + .find(|s| s.get_uuid() == sst.0.get_uuid()) .unwrap(); - dir.delete(sst, key_manager.as_deref()).unwrap(); + dir.delete(&sst.0, key_manager.as_deref()).unwrap(); } assert!(dir.list_ssts().unwrap().is_empty()); } diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 299e93eb746..5073304e17a 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -561,6 +561,7 @@ impl ServerCluster { Arc::clone(&importer), Some(store_meta), resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 0002f36d647..f5c64fa86e9 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -451,6 +451,7 @@ impl ServerCluster { Arc::clone(&importer), None, resource_manager.clone(), + Arc::new(region_info_accessor.clone()), ); // Create deadlock service. diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 68403e226f8..92e73ca9f8f 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -27,6 +27,12 @@ use kvproto::{ WriteRequest_oneof_chunk as Chunk, *, }, kvrpcpb::Context, + metapb::RegionEpoch, +}; +use raftstore::{ + coprocessor::{RegionInfo, RegionInfoProvider}, + store::util::is_epoch_stale, + RegionInfoAccessor, }; use raftstore_v2::StoreMeta; use resource_control::{with_resource_limiter, ResourceGroupManager}; @@ -39,7 +45,7 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::create_stream_with_buffer, + future::{create_stream_with_buffer, paired_future_callback}, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, HandyRwLock, @@ -124,6 +130,7 @@ pub struct ImportSstService { limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, + region_info_accessor: Arc, writer: raft_writer::ThrottledTlsEngineWriter, @@ -318,6 +325,7 @@ impl ImportSstService { importer: Arc, store_meta: Option>>>, resource_manager: Option>, + region_info_accessor: Arc, ) -> Self { let props = tikv_util::thread_group::current_properties(); let eng = Mutex::new(engine.clone()); @@ -365,6 +373,7 @@ impl ImportSstService { limiter: Limiter::new(f64::INFINITY), task_slots: Arc::new(Mutex::new(HashSet::default())), raft_entry_max_size, + region_info_accessor, writer, store_meta, resource_manager, @@ -675,6 +684,45 @@ impl ImportSstService { } } +fn check_local_region_stale( + region_id: u64, + epoch: &RegionEpoch, + local_region_info: Option, +) -> Result<()> { + match local_region_info { + Some(local_region_info) => { + let local_region_epoch = local_region_info.region.region_epoch.unwrap(); + + // when local region epoch is stale, client can retry write later + if is_epoch_stale(&local_region_epoch, epoch) { + return Err(Error::RequestTooNew(format!( + "request region {} is ahead of local region, local epoch {:?}, request epoch {:?}, please retry write later", + region_id, local_region_epoch, epoch + ))); + } + // when local region epoch is ahead, client need to rescan region from PD to get + // latest region later + if is_epoch_stale(epoch, &local_region_epoch) { + return Err(Error::RequestTooOld(format!( + "request region {} is staler than local region, local epoch {:?}, request epoch {:?}", + region_id, local_region_epoch, epoch + ))); + } + + // not match means to rescan + Ok(()) + } + None => { + // when region not found, we can't tell whether it's stale or ahead, so we just + // return the safest case + Err(Error::RequestTooOld(format!( + "region {} is not found", + region_id + ))) + } + } +} + #[macro_export] macro_rules! impl_write { ($fn:ident, $req_ty:ident, $resp_ty:ident, $chunk_ty:ident, $writer_fn:ident) => { @@ -686,6 +734,7 @@ macro_rules! impl_write { ) { let import = self.importer.clone(); let tablets = self.tablets.clone(); + let region_info_accessor = self.region_info_accessor.clone(); let (rx, buf_driver) = create_stream_with_buffer(stream, self.cfg.rl().stream_channel_window); let mut rx = rx.map_err(Error::from); @@ -694,8 +743,11 @@ macro_rules! impl_write { let label = stringify!($fn); let resource_manager = self.resource_manager.clone(); let handle_task = async move { - let res = async move { - let first_req = rx.try_next().await?; + let (res, rx) = async move { + let first_req = match rx.try_next().await { + Ok(r) => r, + Err(e) => return (Err(e), Some(rx)), + }; let (meta, resource_limiter) = match first_req { Some(r) => { let limiter = resource_manager.as_ref().and_then(|m| { @@ -708,18 +760,49 @@ macro_rules! impl_write { }); match r.chunk { Some($chunk_ty::Meta(m)) => (m, limiter), - _ => return Err(Error::InvalidChunk), + _ => return (Err(Error::InvalidChunk), Some(rx)), } } - _ => return Err(Error::InvalidChunk), + _ => return (Err(Error::InvalidChunk), Some(rx)), }; + // wait the region epoch on this TiKV to catch up with the epoch + // in request, which comes from PD and represents the majority + // peers' status. let region_id = meta.get_region_id(); + let (cb, f) = paired_future_callback(); + if let Err(e) = region_info_accessor + .find_region_by_id(region_id, cb) + .map_err(|e| { + // when region not found, we can't tell whether it's stale or ahead, so + // we just return the safest case + Error::RequestTooOld(format!( + "failed to find region {} err {:?}", + region_id, e + )) + }) + { + return (Err(e), Some(rx)); + }; + let res = match f.await { + Ok(r) => r, + Err(e) => return (Err(From::from(e)), Some(rx)), + }; + if let Err(e) = + check_local_region_stale(region_id, meta.get_region_epoch(), res) + { + return (Err(e), Some(rx)); + }; + let tablet = match tablets.get(region_id) { Some(t) => t, None => { - return Err(Error::Engine( - format!("region {} not found", region_id).into(), - )); + return ( + Err(Error::RequestTooOld(format!( + "region {} not found", + region_id + ))), + Some(rx), + ); } }; @@ -727,10 +810,10 @@ macro_rules! impl_write { Ok(w) => w, Err(e) => { error!("build writer failed {:?}", e); - return Err(Error::InvalidChunk); + return (Err(Error::InvalidChunk), Some(rx)); } }; - let (writer, resource_limiter) = rx + let result = rx .try_fold( (writer, resource_limiter), |(mut writer, limiter), req| async move { @@ -747,7 +830,11 @@ macro_rules! impl_write { .map(|w| (w, limiter)) }, ) - .await?; + .await; + let (writer, resource_limiter) = match result { + Ok(r) => r, + Err(e) => return (Err(e), None), + }; let finish_fn = async { let metas = writer.finish()?; @@ -756,13 +843,18 @@ macro_rules! impl_write { }; let metas: Result<_> = with_resource_limiter(finish_fn, resource_limiter).await; - let metas = metas?; + let metas = match metas { + Ok(r) => r, + Err(e) => return (Err(e), None), + }; let mut resp = $resp_ty::default(); resp.set_metas(metas.into()); - Ok(resp) + (Ok(resp), None) } .await; $crate::send_rpc_response!(res, sink, label, timer); + // don't drop rx before send response + _ = rx; }; self.threads.spawn(buf_driver); @@ -1392,14 +1484,19 @@ mod test { use engine_traits::{CF_DEFAULT, CF_WRITE}; use kvproto::{ kvrpcpb::Context, - metapb::RegionEpoch, + metapb::{Region, RegionEpoch}, raft_cmdpb::{RaftCmdRequest, Request}, }; - use protobuf::Message; + use protobuf::{Message, SingularPtrField}; + use raft::StateRole::Follower; + use raftstore::RegionInfo; use tikv_kv::{Modify, WriteData}; use txn_types::{Key, TimeStamp, Write, WriteBatchFlags, WriteType}; - use crate::{import::sst_service::RequestCollector, server::raftkv}; + use crate::{ + import::sst_service::{check_local_region_stale, RequestCollector}, + server::raftkv, + }; fn write(key: &[u8], ty: WriteType, commit_ts: u64, start_ts: u64) -> (Vec, Vec) { let k = Key::from_raw(key).append_ts(TimeStamp::new(commit_ts)); @@ -1683,4 +1780,72 @@ mod test { } assert_eq!(total, 100); } + + #[test] + fn test_write_rpc_check_region_epoch() { + let mut req_epoch = RegionEpoch { + conf_ver: 10, + version: 10, + ..Default::default() + }; + // test for region not found + let result = check_local_region_stale(1, &req_epoch, None); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + let mut local_region_info = RegionInfo { + region: Region { + id: 1, + region_epoch: SingularPtrField::some(req_epoch.clone()), + ..Default::default() + }, + role: Follower, + buckets: 1, + }; + // test the local region epoch is same as request + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info.clone())); + result.unwrap(); + + // test the local region epoch is ahead of request + local_region_info + .region + .region_epoch + .as_mut() + .unwrap() + .conf_ver = 11; + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info.clone())); + assert!(result.is_err()); + // check error message contains "rescan region later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("rescan region later") + ); + + req_epoch.conf_ver = 11; + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info.clone())); + result.unwrap(); + + // test the local region epoch is staler than request + req_epoch.version = 12; + let result = check_local_region_stale(1, &req_epoch, Some(local_region_info)); + assert!(result.is_err()); + // check error message contains "retry write later", client will match this + // string pattern + assert!( + result + .unwrap_err() + .to_string() + .contains("retry write later") + ); + } } From f5361da27d6f74070b0641eb72e1f09bd47d5b65 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 27 Oct 2023 14:41:04 +0800 Subject: [PATCH 0981/1149] doc: Add CPU and heap profiling HTTP API doc (#15852) ref tikv/tikv#15732 Add CPU and heap profiling HTTP API doc Signed-off-by: Connor1996 Signed-off-by: tonyxuqqi Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- doc/http.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 doc/http.md diff --git a/doc/http.md b/doc/http.md new file mode 100644 index 00000000000..5aff02e75ea --- /dev/null +++ b/doc/http.md @@ -0,0 +1,142 @@ +# HTTP API + +In the context of the following line: `TIKV_ADDRESS=$TIKV_IP:$TIKV_STATUS_PORT` + +By default: + +- `TIKV_IP` should be set to `127.0.0.1` +- `TIKV_STATUS_PORT` should be set to `20180` + +## CPU Profiling + +Collect and export CPU profiling data within a specified time range. + +```bash +curl -H 'Content-Type:' -X GET 'http://$TIKV_ADDRESS/debug/pprof/profile?seconds=&frequency=' +``` + +#### Parameters + +- **seconds** (optional): Specifies the number of seconds to collect CPU profiling data. + - Default: 10 + - Example: `?seconds=20` + +- **frequency** (optional): Specifies the sampling frequency for CPU profiling data. + - Default: 99 + - Example: `?frequency=100` + +- **type** (optional): Specifies the Content-Type of the response. + - Options: `application/protobuf` for raw profile data, any other types for flame graph. + - Default: `N/A` + - Example: `-H "Content-Type:application/protobuf"` + +#### Response + +The server will return CPU profiling data. The response format is determined by the Content-Type in the request header and can be either raw profile data in protobuf format or flame graph in SVG format. + +The raw profile data can be handled by `pprof` tool. For example, use `go tool pprof --http=0.0.0.0:1234 xxx.proto` to open a interactive web browser. + +## Activate Heap Profiling + +Activate heap profiling of jemalloc. When activated, jemalloc would collect memory usage at malloc, demalloc, etc., walking the call stack to capture a backtrace. So it would affect performance in some extent. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_activate?interval=' +``` + +#### Parameters + +- **interval** (optional): Specifies the interval (in seconds) for dumping heap profiles in a temporary directory under TiKV data directory. If set to 0, period dumping is disable. You can dump heap profiles manually by the other API. + - Default: 0 + - Example: `?interval=60` + +#### Response + +A confirmation message indicating whether heap profiling activation was successful. If it has been already activated, it would return a error message without any side effect. + +## Deactivate Heap Profiling + +Deactivate the currently running heap profiling. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_deactivate' +``` + +#### Response + +If heap profiling is active, it will be stopped. The server will return a message indicating whether the deactivation was successful. +If heap profiling is not currently active, the server will return a message indicating that no heap profiling is running. + +## List Heap Profiles + +List available heap profiling profiles which are periodically dumped when activated by `heap_activate` API with `interval` specified. + +Note that, once deactivation is performed, all existing profiles will be deleted. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_list' +``` + +#### Response + +It will return a list of profiles, each represented as a file name and last modification timestamp, in plain text format. The profiles are sorted in reverse order based on their modification timestamps. + +If there are no available heap profiles or heap profiling is inactive, the server will return an empty list. + +## Retrieve Heap Profile + +Collect and export heap profiling data. + +Note that, heap profile is not like CPU profile which is collected within the specified time range right after the request. Instead, heap profile is just a snapshot of the accumulated memory usage at the time of request, as the memory usage is always being collected once activated. + +```bash +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap?name=&jeprof=' +``` + +#### Parameters + +- **name** (optional): Specifies the name of the heap profile to retrieve. If not specified, a heap profile will be retrieved. + - Default: `` + - Example: `?name=000001.heap` + +- **jeprof** (optional): Indicates whether to use Jeprof to process the heap profile to generate call graph. It needs `perl` being installed. + - Default: false + - Example: `?jeprof=true` + +#### Response + +The server will return heap profiling data. The response format is determined by the `jeprof` parameter. If true, the response will be a call graph in SVG format. Otherwise, the response will be raw profile data in jemalloc dedicated format. + +## Heap Profile Symbolization + +The heap profile retrieved by `heap` API by default is a raw profile data in jemalloc dedicated format, which should be handled by `jeporf` to visualize. + +There are two ways to generate a call graph in SVG format from the raw profile data: + +- local: by provided profile and use TiKV binary to resolve symbols + +```bash +jeprof --svg +``` + +- remote: by latest heap profile retrieved by HTTP and use symbolization service provided by TiKV to resolve symbols + +```bash +jeprof --svg http://$TIKV_ADDRESS/debug/pprof/heap +``` + +To support the remote way, TiKV provides a symbolization service to resolve symbols from memory addresses. Jeprof would implicitly call the `.../debug/pprof/symbol` to map call stack's addresses to corresponding function names. For most of the cases, you don't need to +it explicitly. But if you want to use it for other purposes, you can refer as follows. + +```bash +curl -X POST -d '' 'http://$TIKV_ADDRESS/debug/pprof/symbol' +``` + +#### Parameters + +- **address_list** (required): A list of memory addresses to be resolved. The addresses should be provided in hexadecimal format(whether or not start with '0x' is okay), separated by a '+' character. + +#### Response + +A list of resolved symbols in plain text. Each line represented as a hexadecimal address followed by the corresponding function name. If a memory address cannot be resolved, it will be marked with "??". + From 913f783a62b5940186cca6239e4129b2cb3094ea Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 27 Oct 2023 17:06:34 +0800 Subject: [PATCH 0982/1149] metrics: fix TiKV Detail command regex escape issue (#15858) ref tikv/tikv#15832 Signed-off-by: Neil Shen --- metrics/grafana/tikv_details.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 4a72d3c204a..8a43cb24554 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -48858,7 +48858,7 @@ "refId": "StandardVariableQuery" }, "refresh": 1, - "regex": "/\btype=\"([^\"]+)\"/", + "regex": "/\\btype=\"([^\"]+)\"/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", @@ -48956,4 +48956,4 @@ "title": "Test-Cluster-TiKV-Details", "uid": "RDVQiEzZz", "version": 1 -} \ No newline at end of file +} From 36ff6881787f66a5fb234fd1795983910522c2fa Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 27 Oct 2023 17:50:05 +0800 Subject: [PATCH 0983/1149] metrics: fix 2 incorrect grafana expression (#15860) close tikv/tikv#15859 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/alertmanager/tikv.accelerate.rules.yml | 4 ++-- metrics/alertmanager/tikv.rules.yml | 4 ++-- metrics/grafana/performance_read.json | 4 ++-- metrics/grafana/tikv_details.json | 10 +++++----- metrics/grafana/tikv_fast_tune.json | 10 +++++----- metrics/grafana/tikv_trouble_shooting.json | 8 ++++---- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/metrics/alertmanager/tikv.accelerate.rules.yml b/metrics/alertmanager/tikv.accelerate.rules.yml index 4bc48336c60..e5ad2daa8cf 100644 --- a/metrics/alertmanager/tikv.accelerate.rules.yml +++ b/metrics/alertmanager/tikv.accelerate.rules.yml @@ -32,7 +32,7 @@ groups: - record: tikv_pd_request_duration_seconds:avg:1m expr: sum(rate(tikv_pd_request_duration_seconds_sum{instance=~".*"}[1m])) by (type) / sum(rate(tikv_pd_request_duration_seconds_count{instance=~".*"}[1m])) by (type) - record: tikv_coprocessor_request_wait_seconds:p95:1m - expr: histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{instance=~".*"}[1m])) by (le, instance,req)) + expr: histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{instance=~".*", type="all"}[1m])) by (le, instance,req)) - record: tikv_grpc_msg_duration_seconds:avg:1m expr: sum(rate(tikv_grpc_msg_duration_seconds_sum{instance=~".*"}[1m])) by (type) / sum(rate(tikv_grpc_msg_duration_seconds_count[1m])) by (type) - record: tikv_raftstore_apply_wait_time_duration_secs:p99:1m @@ -48,7 +48,7 @@ groups: - record: tikv_coprocessor_request_duration_seconds:1m expr: sum(rate(tikv_coprocessor_request_duration_seconds_bucket{instance=~".*"}[1m])) by (le) - record: tikv_futurepool_pending_task:1m - expr: sum(rate(tikv_futurepool_pending_task_total{instance=~".*"}[1m])) by (name) + expr: sum(avg_over_time(tikv_futurepool_pending_task_total{instance=~".*"}[1m])) by (name) - record: tikv_storage_engine_async_request:1m expr: sum(rate(tikv_storage_engine_async_request_total{instance=~".*", status!~"all|success"}[1m])) by (status) - record: tikv_thread_cpu_seconds_nogrpc:1m diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index e43ca401d42..bc092562773 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -98,12 +98,12 @@ groups: summary: TiKV async request write duration seconds more than 1s - alert: TiKV_coprocessor_request_wait_seconds - expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket[1m])) by (le, instance, req)) > 10 + expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{type="all"}[1m])) by (le, instance, req)) > 10 for: 1m labels: env: ENV_LABELS_ENV level: critical - expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket[1m])) by (le, instance, req)) > 10 + expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{type="all"}[1m])) by (le, instance, req)) > 10 annotations: description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' value: '{{ $value }}' diff --git a/metrics/grafana/performance_read.json b/metrics/grafana/performance_read.json index caa2635d34c..aaf24de396e 100644 --- a/metrics/grafana/performance_read.json +++ b/metrics/grafana/performance_read.json @@ -2686,14 +2686,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le,req))", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-100%", "refId": "D" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99%", diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 8a43cb24554..cc89e8aeae5 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -297,7 +297,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le))", "hide": false, "interval": "", "legendFormat": "Cop Wait .99", @@ -25117,7 +25117,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{name}}", @@ -26236,14 +26236,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-100%", "refId": "D" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99%", @@ -26340,7 +26340,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance,req))", + "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le, instance,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{req}}", diff --git a/metrics/grafana/tikv_fast_tune.json b/metrics/grafana/tikv_fast_tune.json index 85e9d5c7f02..f5c3a634c77 100644 --- a/metrics/grafana/tikv_fast_tune.json +++ b/metrics/grafana/tikv_fast_tune.json @@ -2712,7 +2712,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker-.*\"}[1m]))", + "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker-.*\"}[1m]))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -5629,7 +5629,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\", type=\"all\"}[1m])) by (le))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -5645,14 +5645,14 @@ "refId": "A" }, { - "expr": "histogram_quantile(0.999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\"}[1m])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\", type=\"all\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "duration-999%", "refId": "B" }, { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\"}[1m])) by (le))", + "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"select|index\", type=\"all\"}[1m])) by (le))", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -5763,7 +5763,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop-normal\"}[1m]))", + "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop-normal\"}[1m]))", "format": "time_series", "hide": false, "intervalFactor": 1, diff --git a/metrics/grafana/tikv_trouble_shooting.json b/metrics/grafana/tikv_trouble_shooting.json index bf1fd5baacf..f4f5261ad3c 100644 --- a/metrics/grafana/tikv_trouble_shooting.json +++ b/metrics/grafana/tikv_trouble_shooting.json @@ -3995,14 +3995,14 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99.99%", "refId": "D" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-99%", @@ -4010,7 +4010,7 @@ "step": 4 }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{req}}-95%", @@ -4234,7 +4234,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance,req))", + "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le, instance,req))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{req}}", From 0a34c6f4798e6462739152d2364a202996009984 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:32:05 +0800 Subject: [PATCH 0984/1149] txn: Fix to the prewrite requests retry problem by using TxnStatusCache (#15658) ref tikv/tikv#11187 Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/test_raftstore-v2/src/util.rs | 2 + components/test_raftstore/src/util.rs | 106 +++- src/storage/config.rs | 10 + src/storage/mod.rs | 505 ++++++++++++++++++ src/storage/mvcc/metrics.rs | 16 + .../txn/commands/acquire_pessimistic_lock.rs | 1 + .../acquire_pessimistic_lock_resumed.rs | 3 + src/storage/txn/commands/atomic_store.rs | 6 +- .../txn/commands/check_secondary_locks.rs | 14 +- src/storage/txn/commands/check_txn_status.rs | 10 + src/storage/txn/commands/cleanup.rs | 1 + src/storage/txn/commands/commit.rs | 1 + src/storage/txn/commands/compare_and_swap.rs | 8 +- .../txn/commands/flashback_to_version.rs | 1 + src/storage/txn/commands/mod.rs | 12 +- src/storage/txn/commands/pause.rs | 1 + .../txn/commands/pessimistic_rollback.rs | 3 + src/storage/txn/commands/prewrite.rs | 48 +- src/storage/txn/commands/resolve_lock.rs | 10 +- src/storage/txn/commands/resolve_lock_lite.rs | 6 + src/storage/txn/commands/rollback.rs | 1 + src/storage/txn/commands/txn_heart_beat.rs | 8 +- src/storage/txn/scheduler.rs | 26 + src/storage/txn/txn_status_cache.rs | 17 + tests/failpoints/cases/test_kv_service.rs | 113 +++- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 27 files changed, 912 insertions(+), 19 deletions(-) diff --git a/components/test_raftstore-v2/src/util.rs b/components/test_raftstore-v2/src/util.rs index af2bab26183..315150e29c2 100644 --- a/components/test_raftstore-v2/src/util.rs +++ b/components/test_raftstore-v2/src/util.rs @@ -544,6 +544,7 @@ impl PeerClient { &self.cli, self.ctx.clone(), muts, + vec![], pk, ts, 0, @@ -557,6 +558,7 @@ impl PeerClient { &self.cli, self.ctx.clone(), muts, + vec![], pk, ts, 0, diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index e88df1fb0ca..ff47525ea37 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -958,6 +958,7 @@ pub fn must_kv_prewrite_with( client: &TikvClient, ctx: Context, muts: Vec, + pessimistic_actions: Vec, pk: Vec, ts: u64, for_update_ts: u64, @@ -967,7 +968,7 @@ pub fn must_kv_prewrite_with( let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { - prewrite_req.pessimistic_actions = vec![DoPessimisticCheck; muts.len()]; + prewrite_req.pessimistic_actions = pessimistic_actions; } prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; @@ -994,6 +995,7 @@ pub fn try_kv_prewrite_with( client: &TikvClient, ctx: Context, muts: Vec, + pessimistic_actions: Vec, pk: Vec, ts: u64, for_update_ts: u64, @@ -1004,6 +1006,7 @@ pub fn try_kv_prewrite_with( client, ctx, muts, + pessimistic_actions, pk, ts, for_update_ts, @@ -1017,6 +1020,7 @@ pub fn try_kv_prewrite_with_impl( client: &TikvClient, ctx: Context, muts: Vec, + pessimistic_actions: Vec, pk: Vec, ts: u64, for_update_ts: u64, @@ -1026,7 +1030,7 @@ pub fn try_kv_prewrite_with_impl( let mut prewrite_req = PrewriteRequest::default(); prewrite_req.set_context(ctx); if for_update_ts != 0 { - prewrite_req.pessimistic_actions = vec![DoPessimisticCheck; muts.len()]; + prewrite_req.pessimistic_actions = pessimistic_actions; } prewrite_req.set_mutations(muts.into_iter().collect()); prewrite_req.primary_lock = pk; @@ -1046,7 +1050,7 @@ pub fn try_kv_prewrite( pk: Vec, ts: u64, ) -> PrewriteResponse { - try_kv_prewrite_with(client, ctx, muts, pk, ts, 0, false, false) + try_kv_prewrite_with(client, ctx, muts, vec![], pk, ts, 0, false, false) } pub fn try_kv_prewrite_pessimistic( @@ -1056,7 +1060,18 @@ pub fn try_kv_prewrite_pessimistic( pk: Vec, ts: u64, ) -> PrewriteResponse { - try_kv_prewrite_with(client, ctx, muts, pk, ts, ts, false, false) + let len = muts.len(); + try_kv_prewrite_with( + client, + ctx, + muts, + vec![DoPessimisticCheck; len], + pk, + ts, + ts, + false, + false, + ) } pub fn must_kv_prewrite( @@ -1066,7 +1081,7 @@ pub fn must_kv_prewrite( pk: Vec, ts: u64, ) { - must_kv_prewrite_with(client, ctx, muts, pk, ts, 0, false, false) + must_kv_prewrite_with(client, ctx, muts, vec![], pk, ts, 0, false, false) } pub fn must_kv_prewrite_pessimistic( @@ -1076,7 +1091,18 @@ pub fn must_kv_prewrite_pessimistic( pk: Vec, ts: u64, ) { - must_kv_prewrite_with(client, ctx, muts, pk, ts, ts, false, false) + let len = muts.len(); + must_kv_prewrite_with( + client, + ctx, + muts, + vec![DoPessimisticCheck; len], + pk, + ts, + ts, + false, + false, + ) } pub fn must_kv_commit( @@ -1232,6 +1258,50 @@ pub fn must_check_txn_status( resp } +pub fn must_kv_have_locks( + client: &TikvClient, + ctx: Context, + ts: u64, + start_key: &[u8], + end_key: &[u8], + expected_locks: &[( + // key + &[u8], + Op, + // start_ts + u64, + // for_update_ts + u64, + )], +) { + let mut req = ScanLockRequest::default(); + req.set_context(ctx); + req.set_limit(100); + req.set_start_key(start_key.to_vec()); + req.set_end_key(end_key.to_vec()); + req.set_max_version(ts); + let resp = client.kv_scan_lock(&req).unwrap(); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(resp.error.is_none(), "{:?}", resp.get_error()); + + assert_eq!( + resp.locks.len(), + expected_locks.len(), + "lock count not match, expected: {:?}; got: {:?}", + expected_locks, + resp.locks + ); + + for (lock_info, (expected_key, expected_op, expected_start_ts, expected_for_update_ts)) in + resp.locks.into_iter().zip(expected_locks.iter()) + { + assert_eq!(lock_info.get_key(), *expected_key); + assert_eq!(lock_info.get_lock_type(), *expected_op); + assert_eq!(lock_info.get_lock_version(), *expected_start_ts); + assert_eq!(lock_info.get_lock_for_update_ts(), *expected_for_update_ts); + } +} + pub fn get_tso(pd_client: &TestPdClient) -> u64 { block_on(pd_client.get_tso()).unwrap().into_inner() } @@ -1440,11 +1510,31 @@ impl PeerClient { } pub fn must_kv_prewrite_async_commit(&self, muts: Vec, pk: Vec, ts: u64) { - must_kv_prewrite_with(&self.cli, self.ctx.clone(), muts, pk, ts, 0, true, false) + must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + vec![], + pk, + ts, + 0, + true, + false, + ) } pub fn must_kv_prewrite_one_pc(&self, muts: Vec, pk: Vec, ts: u64) { - must_kv_prewrite_with(&self.cli, self.ctx.clone(), muts, pk, ts, 0, false, true) + must_kv_prewrite_with( + &self.cli, + self.ctx.clone(), + muts, + vec![], + pk, + ts, + 0, + false, + true, + ) } pub fn must_kv_commit(&self, keys: Vec>, start_ts: u64, commit_ts: u64) { diff --git a/src/storage/config.rs b/src/storage/config.rs index a40db2c424b..91c98ebf57b 100644 --- a/src/storage/config.rs +++ b/src/storage/config.rs @@ -31,6 +31,13 @@ const DEFAULT_SCHED_PENDING_WRITE_MB: u64 = 100; const DEFAULT_RESERVED_SPACE_GB: u64 = 5; const DEFAULT_RESERVED_RAFT_SPACE_GB: u64 = 1; +// In tests, we've observed 1.2M entries in the TxnStatusCache. We +// conservatively set the limit to 5M entries in total. +// As TxnStatusCache have 128 slots by default. We round it to 5.12M. +// This consumes at most around 300MB memory theoretically, but usually it's +// much less as it's hard to see the capacity being used up. +const DEFAULT_TXN_STATUS_CACHE_CAPACITY: usize = 40_000 * 128; + // Block cache capacity used when TikvConfig isn't validated. It should only // occur in tests. const FALLBACK_BLOCK_CACHE_CAPACITY: ReadableSize = ReadableSize::mb(128); @@ -76,6 +83,8 @@ pub struct Config { pub background_error_recovery_window: ReadableDuration, /// Interval to check TTL for all SSTs, pub ttl_check_poll_interval: ReadableDuration, + #[online_config(skip)] + pub txn_status_cache_capacity: usize, #[online_config(submodule)] pub flow_control: FlowControlConfig, #[online_config(submodule)] @@ -105,6 +114,7 @@ impl Default for Config { api_version: 1, enable_ttl: false, ttl_check_poll_interval: ReadableDuration::hours(12), + txn_status_cache_capacity: DEFAULT_TXN_STATUS_CACHE_CAPACITY, flow_control: FlowControlConfig::default(), block_cache: BlockCacheConfig::default(), io_rate_limit: IoRateLimitConfig::default(), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cb4057bfd7e..cc48d9e36e3 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3853,6 +3853,7 @@ mod tests { commands, commands::{AcquirePessimisticLock, Prewrite}, tests::must_rollback, + txn_status_cache::TxnStatusCache, Error as TxnError, ErrorInner as TxnErrorInner, }, types::{PessimisticLockKeyResult, PessimisticLockResults}, @@ -3884,6 +3885,7 @@ mod tests { statistics: &mut Statistics::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -10869,4 +10871,507 @@ mod tests { // Prewrite still succeeds rx.recv().unwrap().unwrap(); } + + #[test] + fn test_prewrite_cached_committed_transaction_do_not_skip_constraint_check() { + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .build() + .unwrap(); + let cm = storage.concurrency_manager.clone(); + let k1 = Key::from_raw(b"k1"); + let pk = b"pk"; + // Simulate the case that the current TiKV instance have a non-unique + // index key of a pessimistic transaction. It won't be pessimistic + // locked, and prewrite skips constraint checks. + // Simulate the case that a prewrite is performed twice, with async + // commit enabled, and max_ts changes when the second request arrives. + + // A retrying prewrite request arrives. + cm.update_max_ts(20.into()); + let mut ctx = Context::default(); + ctx.set_is_retry_request(true); + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::PrewritePessimistic::new( + vec![( + Mutation::make_put(k1.clone(), b"v".to_vec()), + SkipPessimisticCheck, + )], + pk.to_vec(), + 10.into(), + 3000, + 10.into(), + 1, + 11.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + vec![], + ctx, + ), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); + + let res = rx.recv().unwrap().unwrap(); + assert_eq!(res.min_commit_ts, 21.into()); + + // Commit it. + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::Commit::new(vec![k1.clone()], 10.into(), 21.into(), Context::default()), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // The txn's status is cached + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .unwrap(), + 21.into() + ); + + // Check committed; push max_ts to 30 + assert_eq!( + block_on(storage.get(Context::default(), k1.clone(), 30.into())) + .unwrap() + .0, + Some(b"v".to_vec()) + ); + + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::PrewritePessimistic::new( + vec![( + Mutation::make_put(k1.clone(), b"v".to_vec()), + SkipPessimisticCheck, + )], + pk.to_vec(), + 10.into(), + 3000, + 10.into(), + 1, + 11.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + vec![], + Context::default(), + ), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); + let res = rx.recv().unwrap().unwrap(); + assert_eq!(res.min_commit_ts, 21.into()); + + // Key must not be locked. + assert_eq!( + block_on(storage.get(Context::default(), k1, 50.into())) + .unwrap() + .0, + Some(b"v".to_vec()) + ); + } + + #[test] + fn test_updating_txn_status_cache() { + let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .build() + .unwrap(); + let cm = storage.concurrency_manager.clone(); + + // Commit + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::PrewritePessimistic::new( + vec![( + Mutation::make_put(Key::from_raw(b"k1"), b"v1".to_vec()), + SkipPessimisticCheck, + )], + b"k1".to_vec(), + 10.into(), + 3000, + 10.into(), + 1, + 11.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + vec![], + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .is_none() + ); + + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k1")], + 10.into(), + 20.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .unwrap(), + 20.into() + ); + + // Unsuccessful commit won't update cache + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k2")], + 30.into(), + 40.into(), + Context::default(), + ), + expect_fail_callback(tx, 0, |_| ()), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(30.into()) + .is_none() + ); + + // 1PC update + let (tx, rx) = channel(); + cm.update_max_ts(59.into()); + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k3"), b"v3".to_vec())], + b"k3".to_vec(), + 50.into(), + 3000, + false, + 1, + 51.into(), + 0.into(), + Some(vec![]), + true, + AssertionLevel::Off, + Context::default(), + ), + Box::new(move |res| { + tx.send(res).unwrap(); + }), + ) + .unwrap(); + let res = rx.recv().unwrap().unwrap(); + assert_eq!(res.one_pc_commit_ts, 60.into()); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(50.into()) + .unwrap(), + 60.into() + ); + + // Resolve lock commit + let (tx, rx) = channel(); + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k4"), b"v4".to_vec())], + b"pk".to_vec(), + 70.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + storage + .sched_txn_command( + commands::ResolveLockReadPhase::new( + vec![(TimeStamp::from(70), TimeStamp::from(80))] + .into_iter() + .collect(), + None, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(70.into()) + .unwrap(), + 80.into() + ); + + // Resolve lock lite + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k5"), b"v5".to_vec())], + b"pk".to_vec(), + 90.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + None, + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + storage + .sched_txn_command( + commands::ResolveLockLite::new( + 90.into(), + 100.into(), + vec![Key::from_raw(b"k5")], + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(90.into()) + .unwrap(), + 100.into() + ); + + // CheckTxnStatus: uncommitted transaction + storage + .sched_txn_command( + commands::CheckTxnStatus::new( + Key::from_raw(b"k1"), + 9.into(), + 110.into(), + 110.into(), + true, + false, + false, + false, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(9.into()) + .is_none() + ); + + // CheckTxnStatus: committed transaction + storage.sched.get_txn_status_cache().remove(10.into()); + storage + .sched_txn_command( + commands::CheckTxnStatus::new( + Key::from_raw(b"k1"), + 10.into(), + 110.into(), + 110.into(), + true, + false, + false, + false, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(10.into()) + .unwrap(), + 20.into() + ); + + // CheckSecondaryLocks: uncommitted transaction + storage + .sched_txn_command( + Prewrite::new( + vec![Mutation::make_put(Key::from_raw(b"k6"), b"v6".to_vec())], + b"pk".to_vec(), + 120.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + + // Lock exists but the transaction status is still unknown + storage + .sched_txn_command( + commands::CheckSecondaryLocks::new( + vec![Key::from_raw(b"k6")], + 120.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(120.into()) + .is_none() + ); + + // One of the lock doesn't exist so the transaction becomes rolled-back status. + storage + .sched_txn_command( + commands::CheckSecondaryLocks::new( + vec![Key::from_raw(b"k6"), Key::from_raw(b"k7")], + 120.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(120.into()) + .is_none() + ); + + // CheckSecondaryLocks: committed transaction + storage + .sched_txn_command( + Prewrite::new( + vec![ + Mutation::make_put(Key::from_raw(b"k8"), b"v8".to_vec()), + Mutation::make_put(Key::from_raw(b"k9"), b"v9".to_vec()), + ], + b"pk".to_vec(), + 130.into(), + 3000, + false, + 1, + 0.into(), + 0.into(), + Some(vec![]), + false, + AssertionLevel::Off, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + // Commit one of the key + storage + .sched_txn_command( + commands::Commit::new( + vec![Key::from_raw(b"k9")], + 130.into(), + 140.into(), + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .remove(130.into()) + .unwrap(), + 140.into() + ); + + storage + .sched_txn_command( + commands::CheckSecondaryLocks::new( + vec![Key::from_raw(b"k8"), Key::from_raw(b"k9")], + 130.into(), + Context::default(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + assert_eq!( + storage + .sched + .get_txn_status_cache() + .get_no_promote(130.into()) + .unwrap(), + 140.into() + ); + } } diff --git a/src/storage/mvcc/metrics.rs b/src/storage/mvcc/metrics.rs index 3c4bda63f7e..22d2760a769 100644 --- a/src/storage/mvcc/metrics.rs +++ b/src/storage/mvcc/metrics.rs @@ -51,6 +51,13 @@ make_static_metric! { pub struct MvccPrewriteAssertionPerfCounterVec: IntCounter { "type" => MvccPrewriteAssertionPerfKind, } + + pub struct MvccPrewriteRequestAfterCommitCounterVec: IntCounter { + "type" => { + non_retry_req, + retry_req, + }, + } } lazy_static! { @@ -104,4 +111,13 @@ lazy_static! { ) .unwrap() }; + pub static ref MVCC_PREWRITE_REQUEST_AFTER_COMMIT_COUNTER_VEC: MvccPrewriteRequestAfterCommitCounterVec = { + register_static_int_counter_vec!( + MvccPrewriteRequestAfterCommitCounterVec, + "tikv_storage_mvcc_prewrite_request_after_commit_counter", + "Counter of prewrite requests of already-committed transactions that are determined by checking TxnStatucCache", + &["type"] + ) + .unwrap() + }; } diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index 58c33706bbc..ceb7957c926 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -183,6 +183,7 @@ impl WriteCommand for AcquirePessimisticLock new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs index 7640edd7c0c..a1e2e6fc119 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs @@ -194,6 +194,7 @@ impl WriteCommand for AcquirePessimisticLockR new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnProposed, + known_txn_status: vec![], }) } } @@ -239,6 +240,7 @@ mod tests { txn::{ commands::pessimistic_rollback::tests::must_success as must_pessimistic_rollback, tests::{must_commit, must_pessimistic_locked, must_prewrite_put, must_rollback}, + txn_status_cache::TxnStatusCache, }, TestEngineBuilder, }; @@ -275,6 +277,7 @@ mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 9a54895e7e2..4bca5d514c5 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -63,6 +63,7 @@ impl WriteCommand for RawAtomicStore { new_acquired_locks: vec![], lock_guards: raw_ext.into_iter().map(|r| r.key_guard).collect(), response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -77,7 +78,9 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::MockLockManager, txn::scheduler::get_raw_ext, Statistics, TestEngineBuilder, + lock_manager::MockLockManager, + txn::{scheduler::get_raw_ext, txn_status_cache::TxnStatusCache}, + Statistics, TestEngineBuilder, }; #[test] @@ -116,6 +119,7 @@ mod tests { statistics: &mut statistic, async_apply_prewrite: false, raw_ext, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let cmd: Command = cmd.into(); let write_result = cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index 92985c4d90d..ceb169f79b2 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -201,6 +201,12 @@ impl WriteCommand for CheckSecondaryLocks { } } + let write_result_known_txn_status = + if let SecondaryLocksStatus::Committed(commit_ts) = &result { + vec![(self.start_ts, *commit_ts)] + } else { + vec![] + }; let mut rows = 0; if let SecondaryLocksStatus::RolledBack = &result { // One row is mutated only when a secondary lock is rolled back. @@ -220,6 +226,7 @@ impl WriteCommand for CheckSecondaryLocks { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: write_result_known_txn_status, }) } } @@ -235,7 +242,10 @@ pub mod tests { kv::TestEngineBuilder, lock_manager::MockLockManager, mvcc::tests::*, - txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, + txn::{ + commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, + }, Engine, }; @@ -265,6 +275,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -303,6 +314,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index dc99ebf3b01..9e9a6cc0895 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -131,6 +131,12 @@ impl WriteCommand for CheckTxnStatus { let mut released_locks = ReleasedLocks::new(); released_locks.push(released); + let write_result_known_txn_status = if let TxnStatus::Committed { commit_ts } = &txn_status + { + vec![(self.lock_ts, *commit_ts)] + } else { + vec![] + }; let pr = ProcessResult::TxnStatus { txn_status }; let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); @@ -145,6 +151,7 @@ impl WriteCommand for CheckTxnStatus { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: write_result_known_txn_status, }) } } @@ -168,6 +175,7 @@ pub mod tests { commands::{pessimistic_rollback, WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, }, types::TxnStatus, ProcessResult, TestEngineBuilder, @@ -211,6 +219,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -259,6 +268,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .map(|r| { diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index 302c4fe1308..886094a7f34 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -80,6 +80,7 @@ impl WriteCommand for Cleanup { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index 4f05df8fe83..8daff9b2aee 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -80,6 +80,7 @@ impl WriteCommand for Commit { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![(self.lock_ts, self.commit_ts)], }) } } diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index ca9213b57d3..3725de47273 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -117,6 +117,7 @@ impl WriteCommand for RawCompareAndSwap { new_acquired_locks: vec![], lock_guards, response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -134,8 +135,9 @@ mod tests { use super::*; use crate::storage::{ - lock_manager::MockLockManager, txn::scheduler::get_raw_ext, Engine, Statistics, - TestEngineBuilder, + lock_manager::MockLockManager, + txn::{scheduler::get_raw_ext, txn_status_cache::TxnStatusCache}, + Engine, Statistics, TestEngineBuilder, }; #[test] @@ -215,6 +217,7 @@ mod tests { statistics: &mut statistic, async_apply_prewrite: false, raw_ext, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; match ret.pr { @@ -269,6 +272,7 @@ mod tests { statistics: &mut statistic, async_apply_prewrite: false, raw_ext, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let cmd: Command = cmd.into(); let write_result = cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index 37d288fa266..efbeefa2494 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -185,6 +185,7 @@ impl WriteCommand for FlashbackToVersion { new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index 5896d6562f1..dabef707e61 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -70,7 +70,7 @@ use crate::storage::{ }, metrics, mvcc::{Lock as MvccLock, MvccReader, ReleasedLock, SnapshotReader}, - txn::{latch, ProcessResult, Result}, + txn::{latch, txn_status_cache::TxnStatusCache, ProcessResult, Result}, types::{ MvccInfo, PessimisticLockParameters, PessimisticLockResults, PrewriteResult, SecondaryLocksStatus, StorageCallbackType, TxnStatus, @@ -422,6 +422,12 @@ pub struct WriteResult { pub new_acquired_locks: Vec, pub lock_guards: Vec, pub response_policy: ResponsePolicy, + /// The txn status that can be inferred by the successful writing. This will + /// be used to update the cache. + /// + /// Currently only commit_ts of committed transactions will be collected. + /// Rolled-back transactions may also be collected in the future. + pub known_txn_status: Vec<(TimeStamp, TimeStamp)>, } pub struct WriteResultLockInfo { @@ -573,6 +579,7 @@ pub struct WriteContext<'a, L: LockManager> { pub statistics: &'a mut Statistics, pub async_apply_prewrite: bool, pub raw_ext: Option, // use for apiv2 + pub txn_status_cache: &'a TxnStatusCache, } pub struct ReaderWithStats<'a, S: Snapshot> { @@ -823,6 +830,7 @@ pub mod test_util { statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; let res = match ret.pr { @@ -983,6 +991,7 @@ pub mod test_util { statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; @@ -1008,6 +1017,7 @@ pub mod test_util { statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let ret = cmd.cmd.process_write(snap, context)?; diff --git a/src/storage/txn/commands/pause.rs b/src/storage/txn/commands/pause.rs index 5d3aa7f6d2f..1f5d40b2d4e 100644 --- a/src/storage/txn/commands/pause.rs +++ b/src/storage/txn/commands/pause.rs @@ -53,6 +53,7 @@ impl WriteCommand for Pause { new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 4e0bf8c8c56..531eb256c40 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -96,6 +96,7 @@ impl WriteCommand for PessimisticRollback { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -116,6 +117,7 @@ pub mod tests { commands::{WriteCommand, WriteContext}, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, }, TestEngineBuilder, }; @@ -146,6 +148,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let result = command.process_write(snapshot, write_context).unwrap(); write(engine, &ctx, result.to_be_write.modifies); diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 10446db6292..34c98dab156 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -24,7 +24,7 @@ use crate::storage::{ kv::WriteData, lock_manager::LockManager, mvcc::{ - has_data_in_range, Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, + has_data_in_range, metrics::*, Error as MvccError, ErrorInner as MvccErrorInner, MvccTxn, Result as MvccResult, SnapshotReader, TxnCommitRecord, }, txn::{ @@ -489,6 +489,36 @@ impl Prewriter { snapshot: impl Snapshot, mut context: WriteContext<'_, impl LockManager>, ) -> Result { + // Handle special cases about retried prewrite requests for pessimistic + // transactions. + if let TransactionKind::Pessimistic(_) = self.kind.txn_kind() { + if let Some(commit_ts) = context.txn_status_cache.get_no_promote(self.start_ts) { + fail_point!("before_prewrite_txn_status_cache_hit"); + if self.ctx.is_retry_request { + MVCC_PREWRITE_REQUEST_AFTER_COMMIT_COUNTER_VEC + .retry_req + .inc(); + } else { + MVCC_PREWRITE_REQUEST_AFTER_COMMIT_COUNTER_VEC + .non_retry_req + .inc(); + } + warn!("prewrite request received due to transaction is known to be already committed"; "start_ts" => %self.start_ts, "commit_ts" => %commit_ts); + // In normal cases if the transaction is committed, then the key should have + // been already prewritten successfully. But in order to + // simplify code as well as prevent possible corner cases or + // special cases in the future, we disallow skipping constraint + // check in this case. + // We regard this request as a retried request no matter if it really is (the + // original request may arrive later than retried request due to + // network latency, in which case we'd better handle it like a + // retried request). + self.ctx.is_retry_request = true; + } else { + fail_point!("before_prewrite_txn_status_cache_miss"); + } + } + self.kind .can_skip_constraint_check(&mut self.mutations, &snapshot, &mut context)?; self.check_max_ts_synced(&snapshot)?; @@ -748,6 +778,11 @@ impl Prewriter { new_acquired_locks, lock_guards, response_policy: ResponsePolicy::OnApplied, + known_txn_status: if !one_pc_commit_ts.is_zero() { + vec![(self.start_ts, one_pc_commit_ts)] + } else { + vec![] + }, } } else { // Skip write stage if some keys are locked. @@ -768,6 +803,7 @@ impl Prewriter { new_acquired_locks: vec![], lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], } }; @@ -1002,6 +1038,7 @@ mod tests { must_acquire_pessimistic_lock, must_acquire_pessimistic_lock_err, must_commit, must_prewrite_put_err_impl, must_prewrite_put_impl, must_rollback, }, + txn_status_cache::TxnStatusCache, Error, ErrorInner, }, types::TxnStatus, @@ -1647,6 +1684,7 @@ mod tests { statistics: &mut Statistics::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), } }; } @@ -1818,6 +1856,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: case.async_apply_prewrite, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let mut engine = TestEngineBuilder::new().build().unwrap(); let snap = engine.snapshot(Default::default()).unwrap(); @@ -1932,6 +1971,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -1960,6 +2000,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2043,6 +2084,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2075,6 +2117,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let result = cmd.cmd.process_write(snap, context).unwrap(); @@ -2345,6 +2388,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); @@ -2369,6 +2413,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); assert!(prewrite_cmd.cmd.process_write(snap, context).is_err()); @@ -2575,6 +2620,7 @@ mod tests { statistics: &mut statistics, async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }; let snap = engine.snapshot(Default::default()).unwrap(); let res = prewrite_cmd.cmd.process_write(snap, context).unwrap(); diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index f3d141807e8..cd01fc60475 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -83,6 +83,7 @@ impl WriteCommand for ResolveLock { let mut scan_key = self.scan_key.take(); let rows = key_locks.len(); let mut released_locks = ReleasedLocks::new(); + let mut known_txn_status = vec![]; for (current_key, current_lock) in key_locks { txn.start_ts = current_lock.ts; reader.start_ts = current_lock.ts; @@ -103,7 +104,10 @@ impl WriteCommand for ResolveLock { // type. They could be left if the transaction is finally committed and // pessimistic conflict retry happens during execution. match commit(&mut txn, &mut reader, current_key.clone(), commit_ts) { - Ok(res) => res, + Ok(res) => { + known_txn_status.push((current_lock.ts, commit_ts)); + res + } Err(MvccError(box MvccErrorInner::TxnLockNotFound { .. })) if current_lock.is_pessimistic_lock() => { @@ -125,6 +129,9 @@ impl WriteCommand for ResolveLock { } } + known_txn_status.sort(); + known_txn_status.dedup(); + let pr = if scan_key.is_none() { ProcessResult::Res } else { @@ -151,6 +158,7 @@ impl WriteCommand for ResolveLock { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status, }) } } diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index 63fe201596d..318e5d57313 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -63,6 +63,11 @@ impl WriteCommand for ResolveLockLite { }); } + let known_txn_status = if !self.commit_ts.is_zero() { + vec![(self.start_ts, self.commit_ts)] + } else { + vec![] + }; let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -76,6 +81,7 @@ impl WriteCommand for ResolveLockLite { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status, }) } } diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index f3b674f4916..df60767e716 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -71,6 +71,7 @@ impl WriteCommand for Rollback { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index 448395fc436..c900464099a 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -96,6 +96,7 @@ impl WriteCommand for TxnHeartBeat { new_acquired_locks, lock_guards: vec![], response_policy: ResponsePolicy::OnApplied, + known_txn_status: vec![], }) } } @@ -111,7 +112,10 @@ pub mod tests { kv::TestEngineBuilder, lock_manager::MockLockManager, mvcc::tests::*, - txn::{commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*}, + txn::{ + commands::WriteCommand, scheduler::DEFAULT_EXECUTION_DURATION_LIMIT, tests::*, + txn_status_cache::TxnStatusCache, + }, Engine, }; @@ -143,6 +147,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .unwrap(); @@ -185,6 +190,7 @@ pub mod tests { statistics: &mut Default::default(), async_apply_prewrite: false, raw_ext: None, + txn_status_cache: &TxnStatusCache::new_for_test(), }, ) .is_err() diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 3c6a66c3941..36492f22701 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -83,6 +83,7 @@ use crate::{ flow_controller::FlowController, latch::{Latches, Lock}, sched_pool::{tls_collect_query, tls_collect_scan_details, SchedPool}, + txn_status_cache::TxnStatusCache, Error, ErrorInner, ProcessResult, }, types::StorageCallback, @@ -293,6 +294,8 @@ struct TxnSchedulerInner { quota_limiter: Arc, resource_manager: Option>, feature_gate: FeatureGate, + + txn_status_cache: TxnStatusCache, } #[inline] @@ -484,6 +487,7 @@ impl TxnScheduler { quota_limiter, resource_manager, feature_gate, + txn_status_cache: TxnStatusCache::new(config.txn_status_cache_capacity), }); slow_log!( @@ -815,6 +819,7 @@ impl TxnScheduler { pipelined: bool, async_apply_prewrite: bool, new_acquired_locks: Vec, + known_txn_status: Vec<(TimeStamp, TimeStamp)>, tag: CommandKind, metadata: TaskMetadata<'_>, sched_details: &SchedulerDetails, @@ -837,6 +842,17 @@ impl TxnScheduler { debug!("write command finished"; "cid" => cid, "pipelined" => pipelined, "async_apply_prewrite" => async_apply_prewrite); drop(lock_guards); + + if result.is_ok() && !known_txn_status.is_empty() { + // Update cache before calling the callback. + // Reversing the order can lead to test failures as the cache may still + // remain not updated after receiving signal from the callback. + let now = std::time::SystemTime::now(); + for (start_ts, commit_ts) in known_txn_status { + self.inner.txn_status_cache.insert(start_ts, commit_ts, now); + } + } + let tctx = self.inner.dequeue_task_context(cid); let mut do_wake_up = !tctx.woken_up_resumable_lock_requests.is_empty(); @@ -1258,6 +1274,7 @@ impl TxnScheduler { statistics: &mut sched_details.stat, async_apply_prewrite: self.inner.enable_async_apply_prewrite, raw_ext, + txn_status_cache: &self.inner.txn_status_cache, }; let begin_instant = Instant::now(); let res = unsafe { @@ -1328,6 +1345,7 @@ impl TxnScheduler { new_acquired_locks, lock_guards, response_policy, + known_txn_status, } = match deadline .check() .map_err(StorageError::from) @@ -1406,6 +1424,7 @@ impl TxnScheduler { false, false, new_acquired_locks, + known_txn_status, tag, metadata, sched_details, @@ -1441,6 +1460,7 @@ impl TxnScheduler { false, false, new_acquired_locks, + known_txn_status, tag, metadata, sched_details, @@ -1636,6 +1656,7 @@ impl TxnScheduler { pipelined, is_async_apply_prewrite, new_acquired_locks, + known_txn_status, tag, metadata, sched_details, @@ -1879,6 +1900,11 @@ impl TxnScheduler { .push_lock_wait(entry, Default::default()); } } + + #[cfg(test)] + pub fn get_txn_status_cache(&self) -> &TxnStatusCache { + &self.inner.txn_status_cache + } } pub async fn get_raw_ext( diff --git a/src/storage/txn/txn_status_cache.rs b/src/storage/txn/txn_status_cache.rs index 2428bbb99c5..ab50bd0412e 100644 --- a/src/storage/txn/txn_status_cache.rs +++ b/src/storage/txn/txn_status_cache.rs @@ -371,6 +371,23 @@ impl TxnStatusCache { let mut slot = self.slots[self.slot_index(start_ts)].lock(); slot.get(&start_ts).map(|entry| entry.commit_ts) } + + /// Remove an entry from the cache. We usually don't need to remove anything + /// from the `TxnStatusCache`, but it's useful in tests to construct cache- + /// miss cases. + #[cfg(test)] + pub fn remove(&self, start_ts: TimeStamp) -> Option { + if !self.is_enabled { + return None; + } + + let res = { + let mut slot = self.slots[self.slot_index(start_ts)].lock(); + slot.remove(&start_ts).map(|e| e.commit_ts) + }; + debug_assert!(self.get_no_promote(start_ts).is_none()); + res + } } #[cfg(test)] diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index 00f5c3c778e..2ec1109edd4 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -3,10 +3,14 @@ use std::{sync::Arc, time::Duration}; use grpcio::{ChannelBuilder, Environment}; -use kvproto::{kvrpcpb::*, tikvpb::TikvClient}; +use kvproto::{ + kvrpcpb::{PrewriteRequestPessimisticAction::SkipPessimisticCheck, *}, + tikvpb::TikvClient, +}; use test_raftstore::{ - configure_for_lease_read, must_kv_commit, must_kv_prewrite, must_new_cluster_and_kv_client, - must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with_impl, + configure_for_lease_read, must_kv_commit, must_kv_have_locks, must_kv_prewrite, + must_kv_prewrite_with, must_new_cluster_and_kv_client, must_new_cluster_mul, + new_server_cluster, try_kv_prewrite_with, try_kv_prewrite_with_impl, }; use tikv_util::{config::ReadableDuration, HandyRwLock}; @@ -92,6 +96,7 @@ fn test_undetermined_write_err() { &client, ctx, vec![mutation], + vec![], b"k".to_vec(), 10, 0, @@ -156,3 +161,105 @@ fn test_stale_read_on_local_leader() { assert!(resp.region_error.is_none()); assert_eq!(v, resp.get_value()); } + +#[test] +fn test_storage_do_not_update_txn_status_cache_on_write_error() { + let cache_hit_fp = "before_prewrite_txn_status_cache_hit"; + let cache_miss_fp = "before_prewrite_txn_status_cache_miss"; + + let (cluster, leader, ctx) = must_new_cluster_mul(1); + let env = Arc::new(Environment::new(1)); + let channel = ChannelBuilder::new(env) + .connect(&cluster.sim.read().unwrap().get_addr(leader.get_store_id())); + let client = TikvClient::new(channel); + + let pk = b"pk".to_vec(); + + // Case 1: Test write successfully. + + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(b"k1".to_vec()); + mutation.set_value(b"v1".to_vec()); + must_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation.clone()], + vec![SkipPessimisticCheck], + pk.clone(), + 10, + 10, + true, + false, + ); + must_kv_commit(&client, ctx.clone(), vec![b"k1".to_vec()], 10, 15, 15); + + // Expect cache hit + fail::cfg(cache_miss_fp, "panic").unwrap(); + must_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation], + vec![SkipPessimisticCheck], + pk.clone(), + 10, + 10, + true, + false, + ); + // Key not locked. + must_kv_have_locks(&client, ctx.clone(), 19, b"k1", b"k2", &[]); + fail::remove(cache_miss_fp); + + // Case 2: Write failed. + + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(b"k2".to_vec()); + mutation.set_value(b"v2".to_vec()); + + try_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation.clone()], + vec![SkipPessimisticCheck], + pk.clone(), + 20, + 20, + true, + false, + ); + fail::cfg("raftkv_early_error_report", "return").unwrap(); + let mut commit_req = CommitRequest::default(); + commit_req.set_context(ctx.clone()); + commit_req.set_start_version(20); + commit_req.set_commit_version(25); + commit_req.set_keys(vec![b"k2".to_vec()].into()); + let commit_resp = client.kv_commit(&commit_req).unwrap(); + assert!(commit_resp.has_region_error()); + fail::remove("raftkv_early_error_report"); + must_kv_have_locks( + &client, + ctx.clone(), + 29, + b"k2", + b"k3", + &[(b"k2", Op::Put, 20, 20)], + ); + + // Expect cache miss + fail::cfg(cache_hit_fp, "panic").unwrap(); + try_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation], + vec![SkipPessimisticCheck], + pk, + 20, + 20, + true, + false, + ); + must_kv_have_locks(&client, ctx, 29, b"k2", b"k3", &[(b"k2", Op::Put, 20, 20)]); + fail::remove(cache_hit_fp); +} diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 1ac6e3840f1..2f4f5ba7695 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -761,6 +761,7 @@ fn test_serde_custom_tikv_config() { other_priority: IoPriority::Low, }, background_error_recovery_window: ReadableDuration::hours(1), + txn_status_cache_capacity: 1000, }; value.coprocessor = CopConfig { split_region_on_table: false, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index fe1fa066ae8..1bb52fad5fc 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -101,6 +101,7 @@ reserve-space = "10GB" reserve-raft-space = "2GB" enable-ttl = true ttl-check-poll-interval = "0s" +txn-status-cache-capacity = 1000 [storage.block-cache] capacity = "40GB" From 4093bda19289c12e201d6d940f18aa5beca0975a Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 1 Nov 2023 01:35:06 +0800 Subject: [PATCH 0985/1149] tidb_query_expr: fix the behavior of `field` function (#15879) close tikv/tikv#15878 Signed-off-by: Yang Keao --- components/tidb_query_expr/src/impl_string.rs | 52 +++++++++++++++---- components/tidb_query_expr/src/lib.rs | 10 +++- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index f3b9b03c287..25c9294d533 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -635,15 +635,22 @@ fn field(args: &[Option<&T>]) -> Result #[rpn_fn(nullable, varg, min_args = 1)] #[inline] -fn field_bytes(args: &[Option]) -> Result> { +fn field_bytes(args: &[Option]) -> Result> { Ok(Some(match args[0] { // As per the MySQL doc, if the first argument is NULL, this function always returns 0. None => 0, - Some(val) => args - .iter() - .skip(1) - .position(|&i| i == Some(val)) - .map_or(0, |pos| (pos + 1) as i64), + Some(val) => { + for (pos, arg) in args.iter().enumerate().skip(1) { + if arg.is_none() { + continue; + } + match C::sort_compare(val, arg.unwrap()) { + Ok(Ordering::Equal) => return Ok(Some(pos as i64)), + _ => continue, + } + } + 0 + } })) } @@ -3214,6 +3221,7 @@ mod tests { Some(b"baz".to_vec()), ], Some(1), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3223,6 +3231,7 @@ mod tests { Some(b"hello".to_vec()), ], Some(0), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3232,6 +3241,7 @@ mod tests { Some(b"hello".to_vec()), ], Some(3), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3244,6 +3254,7 @@ mod tests { Some(b"Hello".to_vec()), ], Some(6), + Collation::Utf8Mb4Bin, ), ( vec![ @@ -3252,14 +3263,37 @@ mod tests { Some(b"Hello World!".to_vec()), ], Some(0), + Collation::Utf8Mb4Bin, + ), + ( + vec![None, None, Some(b"Hello World!".to_vec())], + Some(0), + Collation::Utf8Mb4Bin, + ), + ( + vec![Some(b"Hello World!".to_vec())], + Some(0), + Collation::Utf8Mb4Bin, + ), + ( + vec![ + Some(b"a".to_vec()), + Some(b"A".to_vec()), + Some(b"a".to_vec()), + ], + Some(1), + Collation::Utf8Mb4GeneralCi, ), - (vec![None, None, Some(b"Hello World!".to_vec())], Some(0)), - (vec![Some(b"Hello World!".to_vec())], Some(0)), ]; - for (args, expect_output) in test_cases { + for (args, expect_output, collation) in test_cases { let output = RpnFnScalarEvaluator::new() .push_params(args) + .return_field_type( + FieldTypeBuilder::new() + .tp(FieldTypeTp::Long) + .collation(collation), + ) .evaluate(ScalarFuncSig::FieldString) .unwrap(); assert_eq!(output, expect_output); diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index c2ef6722148..61fb3612b63 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -409,6 +409,14 @@ fn map_lower_utf8_sig(value: ScalarFuncSig, children: &[Expr]) -> Result Result { + Ok(match_template_collator! { + TT, match ret_field_type.as_accessor().collation().map_err(tidb_query_datatype::codec::Error::from)? { + Collation::TT => field_bytes_fn_meta::() + } + }) +} + #[rustfmt::skip] fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { let value = expr.get_sig(); @@ -787,7 +795,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::Locate3Args => locate_3_args_fn_meta(), ScalarFuncSig::FieldInt => field_fn_meta::(), ScalarFuncSig::FieldReal => field_fn_meta::(), - ScalarFuncSig::FieldString => field_bytes_fn_meta(), + ScalarFuncSig::FieldString => map_field_string_sig(ft)?, ScalarFuncSig::Elt => elt_fn_meta(), ScalarFuncSig::MakeSet => make_set_fn_meta(), ScalarFuncSig::Space => space_fn_meta(), From 2a24cfc4b25de341cf6b93727d11dce0c0648a5b Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Tue, 31 Oct 2023 11:54:36 -0700 Subject: [PATCH 0986/1149] rafstore, engine_rocks: periodic full compaction (#12729) (#15853) ref tikv/tikv#12729 Signed-off-by: Alex Feinberg Co-authored-by: lucasliang --- Cargo.lock | 2 + components/online_config/Cargo.toml | 1 + components/online_config/src/lib.rs | 7 + components/raftstore/Cargo.toml | 1 + components/raftstore/src/store/config.rs | 16 +- components/raftstore/src/store/fsm/store.rs | 72 ++++- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/msg.rs | 2 + .../raftstore/src/store/worker/compact.rs | 75 +++++- .../raftstore/src/store/worker/metrics.rs | 5 + components/tikv_util/src/config.rs | 248 ++++++++++++++++++ tests/integrations/config/mod.rs | 4 +- 12 files changed, 429 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52408df1ab2..09459fd9123 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3565,6 +3565,7 @@ checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" name = "online_config" version = "0.1.0" dependencies = [ + "chrono", "online_config_derive", "serde", "serde_derive", @@ -4359,6 +4360,7 @@ dependencies = [ "byteorder", "bytes", "causal_ts", + "chrono", "collections", "concurrency_manager", "crc32fast", diff --git a/components/online_config/Cargo.toml b/components/online_config/Cargo.toml index 9d67f1cf1de..47e8996391c 100644 --- a/components/online_config/Cargo.toml +++ b/components/online_config/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" publish = false [dependencies] +chrono = "0.4" online_config_derive = { path = "./online_config_derive" } serde = { version = "1.0", features = ["derive"] } diff --git a/components/online_config/src/lib.rs b/components/online_config/src/lib.rs index 45694305a5f..5fec0cea9bc 100644 --- a/components/online_config/src/lib.rs +++ b/components/online_config/src/lib.rs @@ -5,9 +5,12 @@ use std::{ fmt::{self, Debug, Display, Formatter}, }; +use chrono::{FixedOffset, NaiveTime}; pub use online_config_derive::*; pub type ConfigChange = HashMap; +pub type OffsetTime = (NaiveTime, FixedOffset); +pub type Schedule = Vec; #[derive(Clone, PartialEq)] pub enum ConfigValue { @@ -21,6 +24,8 @@ pub enum ConfigValue { Bool(bool), String(String), Module(ConfigChange), + OffsetTime(OffsetTime), + Schedule(Schedule), Skip, None, } @@ -38,6 +43,8 @@ impl Display for ConfigValue { ConfigValue::Bool(v) => write!(f, "{}", v), ConfigValue::String(v) => write!(f, "{}", v), ConfigValue::Module(v) => write!(f, "{:?}", v), + ConfigValue::OffsetTime((t, o)) => write!(f, "{} {}", t, o), + ConfigValue::Schedule(v) => write!(f, "{:?}", v), ConfigValue::Skip => write!(f, "ConfigValue::Skip"), ConfigValue::None => write!(f, ""), } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 1933bad6da9..3a8caa421e5 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -29,6 +29,7 @@ bitflags = "1.0.1" byteorder = "1.2" bytes = "1.0" causal_ts = { workspace = true } +chrono = "0.4" collections = { workspace = true } concurrency_manager = { workspace = true } crc32fast = "1.2" diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 95c4aed9349..3d1b58a6e75 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use serde_with::with_prefix; use tikv_util::{ box_err, - config::{ReadableDuration, ReadableSize, VersionTrack}, + config::{ReadableDuration, ReadableSchedule, ReadableSize, VersionTrack}, error, info, sys::SysQuota, warn, @@ -152,6 +152,15 @@ pub struct Config { pub lock_cf_compact_interval: ReadableDuration, pub lock_cf_compact_bytes_threshold: ReadableSize, + /// Hours of the day during which we may execute a periodic full compaction. + /// If not set or empty, periodic full compaction will not run. In toml this + /// should be a list of timesin "HH:MM" format with an optional timezone + /// offset. If no timezone is specified, local timezone is used. E.g., + /// `["23:00 +0000", "03:00 +0700"]` or `["23:00", "03:00"]`. + pub periodic_full_compact_start_times: ReadableSchedule, + /// Do not start a full compaction if cpu utilization exceeds this number. + pub periodic_full_compact_start_max_cpu: f64, + #[online_config(skip)] pub notify_capacity: usize, pub messages_per_tick: usize, @@ -435,6 +444,11 @@ impl Default for Config { region_compact_redundant_rows_percent: None, pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), + // Disable periodic full compaction by default. + periodic_full_compact_start_times: ReadableSchedule::default(), + // If periodic full compaction is enabled, do not start a full compaction + // if the CPU utilization is over 10%. + periodic_full_compact_start_max_cpu: 0.1, notify_capacity: 40960, snap_mgr_gc_tick_interval: ReadableDuration::minutes(1), snap_gc_timeout: ReadableDuration::hours(4), diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 2efcbf87b09..950768055e4 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -59,8 +59,11 @@ use tikv_util::{ mpsc::{self, LooseBoundedSender, Receiver}, slow_log, store::{find_peer, region_on_stores}, - sys as sys_util, - sys::disk::{get_disk_status, DiskUsage}, + sys::{ + self as sys_util, + cpu_time::ProcessStat, + disk::{get_disk_status, DiskUsage}, + }, time::{duration_to_sec, monotonic_raw_now, Instant as TiInstant}, timer::SteadyTimer, warn, @@ -117,6 +120,10 @@ pub const PENDING_MSG_CAP: usize = 100; pub const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); pub const MULTI_FILES_SNAPSHOT_FEATURE: Feature = Feature::require(6, 1, 0); // it only makes sense for large region +// Every 30 minutes, check if we can run full compaction. This allows the config +// setting `periodic_full_compact_start_max_cpu` to be changed dynamically. +const PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION: Duration = Duration::from_secs(30 * 60); + pub struct StoreInfo { pub kv_engine: EK, pub raft_engine: ER, @@ -768,6 +775,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::SnapGc => self.on_snap_mgr_gc(), StoreTick::CompactLockCf => self.on_compact_lock_cf(), StoreTick::CompactCheck => self.on_compact_check_tick(), + StoreTick::PeriodicFullCompact => self.on_full_compact_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), } @@ -858,6 +866,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.fsm.store.start_time = Some(time::get_time()); self.register_cleanup_import_sst_tick(); self.register_compact_check_tick(); + self.register_full_compact_tick(); self.register_pd_store_heartbeat_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); @@ -2436,6 +2445,65 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn register_full_compact_tick(&self) { + if !self.ctx.cfg.periodic_full_compact_start_times.is_empty() { + self.ctx.schedule_store_tick( + StoreTick::PeriodicFullCompact, + PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION, + ) + } + } + + fn on_full_compact_tick(&mut self) { + self.register_full_compact_tick(); + + let local_time = chrono::Local::now(); + if !self + .ctx + .cfg + .periodic_full_compact_start_times + .is_scheduled_this_hour(&local_time) + { + debug!( + "full compaction may not run at this time"; + "local_time" => ?local_time, + "periodic_full_compact_start_times" => ?self.ctx.cfg.periodic_full_compact_start_times, + ); + return; + } + + if self.ctx.global_stat.stat.is_busy.load(Ordering::SeqCst) { + warn!("full compaction may not run at this time, `is_busy` flag is true",); + return; + } + + let mut proc_stats = ProcessStat::cur_proc_stat().unwrap(); + let cpu_usage = proc_stats.cpu_usage().unwrap(); + let max_start_cpu_usage = self.ctx.cfg.periodic_full_compact_start_max_cpu; + if cpu_usage > max_start_cpu_usage { + warn!( + "full compaction may not run at this time, cpu usage is above max"; + "cpu_usage" => cpu_usage, + "threshold" => max_start_cpu_usage, + ); + return; + } + + // Attempt executing a periodic full compaction. + // Note that full compaction will not run if other compaction tasks are running. + if let Err(e) = self + .ctx + .cleanup_scheduler + .schedule(CleanupTask::Compact(CompactTask::PeriodicFullCompact)) + { + error!( + "failed to schedule a periodic full compaction"; + "store_id" => self.fsm.store.id, + "err" => ?e + ); + } + } + fn register_compact_check_tick(&self) { self.ctx.schedule_store_tick( StoreTick::CompactCheck, diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index a4f2b7820cb..8f7bc8af226 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -214,6 +214,7 @@ make_static_metric! { pub label_enum RaftEventDurationType { compact_check, + periodic_full_compact, pd_store_heartbeat, snap_gc, compact_lock_cf, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a92e5169549..0d703143a08 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -435,6 +435,7 @@ impl PeerTick { #[derive(Debug, Clone, Copy)] pub enum StoreTick { CompactCheck, + PeriodicFullCompact, PdStoreHeartbeat, SnapGc, CompactLockCf, @@ -447,6 +448,7 @@ impl StoreTick { pub fn tag(self) -> RaftEventDurationType { match self { StoreTick::CompactCheck => RaftEventDurationType::compact_check, + StoreTick::PeriodicFullCompact => RaftEventDurationType::periodic_full_compact, StoreTick::PdStoreHeartbeat => RaftEventDurationType::pd_store_heartbeat, StoreTick::SnapGc => RaftEventDurationType::snap_gc, StoreTick::CompactLockCf => RaftEventDurationType::compact_lock_cf, diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index 3b2a2ec0404..abdbaf5e938 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -11,11 +11,13 @@ use fail::fail_point; use thiserror::Error; use tikv_util::{box_try, error, info, time::Instant, warn, worker::Runnable}; -use super::metrics::COMPACT_RANGE_CF; +use super::metrics::{COMPACT_RANGE_CF, FULL_COMPACT}; type Key = Vec; pub enum Task { + PeriodicFullCompact, + Compact { cf_name: String, start_key: Option, // None means smallest key @@ -58,6 +60,7 @@ impl CompactThreshold { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { + Task::PeriodicFullCompact => f.debug_struct("FullCompact").finish(), Task::Compact { ref cf_name, ref start_key, @@ -127,6 +130,31 @@ where Runner { engine } } + /// Periodic full compaction. + /// + /// NOTE this is a highly experimental feature! + /// + /// TODO: Do not start if there is heavy I/O. + /// TODO: Make it possible to rate limit, pause, or abort this by compacting + /// a range at a time. + pub fn full_compact(&mut self) -> Result<(), Error> { + fail_point!("on_full_compact"); + info!("full compaction started"); + let timer = Instant::now(); + let full_compact_timer = FULL_COMPACT.start_coarse_timer(); + box_try!(self.engine.compact_range( + None, None, // Compact the entire key range. + true, // no other compaction will run when this is running + 1, // number of threads threads + )); + full_compact_timer.observe_duration(); + info!( + "full compaction finished"; + "time_takes" => ?timer.saturating_elapsed(), + ); + Ok(()) + } + /// Sends a compact range command to RocksDB to compact the range of the cf. pub fn compact_range_cf( &mut self, @@ -163,6 +191,11 @@ where fn run(&mut self, task: Task) { match task { + Task::PeriodicFullCompact => { + if let Err(e) = self.full_compact() { + error!("periodic full compaction failed"; "err" => %e); + } + } Task::Compact { cf_name, start_key, @@ -456,4 +489,44 @@ mod tests { .unwrap(); assert_eq!(ranges_need_to_compact, expected_ranges); } + + #[test] + fn test_full_compact_deletes() { + let tmp_dir = Builder::new().prefix("test").tempdir().unwrap(); + let engine = open_db(tmp_dir.path().to_str().unwrap()); + let mut runner = Runner::new(engine.clone()); + + // mvcc_put 0..5 + for i in 0..5 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let (start, end) = (data_key(b"k0"), data_key(b"k5")); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries, stats.num_versions); + + for i in 0..5 { + let k = format!("k{}", i); + delete(&engine, k.as_bytes(), 3.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 5); + + runner.run(Task::PeriodicFullCompact); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 0); + } } diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index 8dca3bcfd44..bdf24459011 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -160,6 +160,11 @@ lazy_static! { &["cf"] ) .unwrap(); + pub static ref FULL_COMPACT: Histogram = register_histogram!( + "tikv_storage_full_compact_duration_seconds", + "Bucketed histogram of full compaction for the storage." + ) + .unwrap(); pub static ref REGION_HASH_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_hash_duration_seconds", "Bucketed histogram of raftstore hash computation duration" diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index c3d240d3c4f..39e143fc04c 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -15,6 +15,10 @@ use std::{ time::Duration, }; +use chrono::{ + format::{self, Fixed, Item, Parsed}, + DateTime, FixedOffset, Local, NaiveTime, TimeZone, Timelike, +}; use online_config::ConfigValue; use serde::{ de::{self, Unexpected, Visitor}, @@ -522,6 +526,166 @@ impl<'de> Deserialize<'de> for ReadableDuration { } } +#[derive(Clone, Debug, Copy, PartialEq)] +pub struct ReadableOffsetTime(pub NaiveTime, pub FixedOffset); + +impl From for ConfigValue { + fn from(ot: ReadableOffsetTime) -> ConfigValue { + ConfigValue::OffsetTime((ot.0, ot.1)) + } +} + +impl From for ReadableOffsetTime { + fn from(c: ConfigValue) -> ReadableOffsetTime { + if let ConfigValue::OffsetTime(ot) = c { + ReadableOffsetTime(ot.0, ot.1) + } else { + panic!("expect: ConfigValue::OffsetTime, got: {:?}", c) + } + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)] +pub struct ReadableSchedule(pub Vec); + +impl From for ConfigValue { + fn from(otv: ReadableSchedule) -> ConfigValue { + ConfigValue::Schedule(otv.0.into_iter().map(|ot| (ot.0, ot.1)).collect::>()) + } +} + +impl From for ReadableSchedule { + fn from(c: ConfigValue) -> ReadableSchedule { + if let ConfigValue::Schedule(otv) = c { + ReadableSchedule( + otv.into_iter() + .map(|(o, t)| ReadableOffsetTime(o, t)) + .collect::>(), + ) + } else { + panic!("expect: ConfigValue::Schedule, got: {:?}", c) + } + } +} + +impl ReadableSchedule { + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn is_scheduled_this_hour(&self, datetime: &DateTime) -> bool { + self.0.iter().any(|time| time.hour_matches(datetime)) + } + + pub fn is_scheduled_this_hour_minute(&self, datetime: &DateTime) -> bool { + self.0 + .iter() + .any(|time| time.hour_minutes_matches(datetime)) + } +} + +impl FromStr for ReadableOffsetTime { + type Err = String; + + fn from_str(ot_str: &str) -> Result { + let (time, offset) = if let Some((time_str, offset_str)) = ot_str.split_once(' ') { + let time = NaiveTime::parse_from_str(time_str, "%H:%M").map_err(|e| e.to_string())?; + let offset = parse_offset(offset_str)?; + (time, offset) + } else { + let time = NaiveTime::parse_from_str(ot_str, "%H:%M").map_err(|e| e.to_string())?; + (time, local_offset()) + }; + Ok(ReadableOffsetTime(time, offset)) + } +} + +/// Returns the `FixedOffset` for the timezone this `tikv` server has been +/// configured to use. +fn local_offset() -> FixedOffset { + let &offset = Local::now().offset(); + offset +} + +/// Parses the offset specified by `str`. +/// Note: `FixedOffset` in latest `chrono` implements `FromStr`. Once we are +/// able to upgrade to it (`components/tidb_query_datatype` requires a large +/// refactoring that is outside the scope of this PR), we can remove this +/// method. +fn parse_offset(offset_str: &str) -> Result { + let mut parsed = Parsed::new(); + format::parse( + &mut parsed, + offset_str, + [Item::Fixed(Fixed::TimezoneOffsetZ)].iter(), + ) + .map_err(|e| e.to_string())?; + parsed.to_fixed_offset().map_err(|e| e.to_string()) +} + +impl fmt::Display for ReadableOffsetTime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.0, self.1) + } +} + +impl ReadableOffsetTime { + /// Converts `datetime` from `Tz` to the same timezone as this instance and + /// returns `true` if the hour of the day is matches hour of this + /// instance. + pub fn hour_matches(&self, datetime: &DateTime) -> bool { + self.convert_to_this_offset(datetime).hour() == self.0.hour() + } + + /// Converts `datetime` from `Tz` to the same timezone as this instance and + /// returns `true` if hours and minutes match this instance. + pub fn hour_minutes_matches(&self, datetime: &DateTime) -> bool { + let time = self.convert_to_this_offset(datetime); + time.hour() == self.0.hour() && time.minute() == self.0.minute() + } + + fn convert_to_this_offset(&self, datetime: &DateTime) -> NaiveTime { + datetime.with_timezone(&self.1).time() + } +} + +impl Serialize for ReadableOffsetTime { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut buffer = String::new(); + write!(buffer, "{}", self).unwrap(); + serializer.serialize_str(&buffer) + } +} + +impl<'de> Deserialize<'de> for ReadableOffsetTime { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct OffTimeVisitor; + + impl<'de> Visitor<'de> for OffTimeVisitor { + type Value = ReadableOffsetTime; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("valid duration") + } + + fn visit_str(self, off_time_str: &str) -> Result + where + E: de::Error, + { + off_time_str.parse().map_err(E::custom) + } + } + + deserializer.deserialize_str(OffTimeVisitor) + } +} + pub fn normalize_path>(path: P) -> PathBuf { use std::path::Component; let mut components = path.as_ref().components().peekable(); @@ -1771,6 +1935,90 @@ mod tests { assert!(toml::from_str::("d = 23").is_err()); } + #[test] + fn test_readable_offset_time() { + let decode_cases = vec![ + ( + "23:00 +0000", + ReadableOffsetTime( + NaiveTime::from_hms_opt(23, 00, 00).unwrap(), + FixedOffset::east_opt(0).unwrap(), + ), + ), + ( + "03:00", + ReadableOffsetTime(NaiveTime::from_hms_opt(3, 00, 00).unwrap(), local_offset()), + ), + ( + "13:23 +09:30", + ReadableOffsetTime( + NaiveTime::from_hms_opt(13, 23, 00).unwrap(), + FixedOffset::east_opt(3600 * 9 + 1800).unwrap(), + ), + ), + ( + "09:30 -08:00", + ReadableOffsetTime( + NaiveTime::from_hms_opt(9, 30, 00).unwrap(), + FixedOffset::west_opt(3600 * 8).unwrap(), + ), + ), + ]; + for (encoded, expected) in decode_cases { + let actual = encoded.parse::().unwrap_or_else(|e| { + panic!( + "error parsing encoded={} expected={} error={}", + encoded, expected, e + ) + }); + assert_eq!(actual, expected); + } + let time = ReadableOffsetTime( + NaiveTime::from_hms_opt(9, 30, 00).unwrap(), + FixedOffset::west_opt(0).unwrap(), + ); + assert_eq!(format!("{}", time), "09:30:00 +00:00"); + let dt = DateTime::parse_from_rfc3339("2023-10-27T09:39:57-00:00").unwrap(); + assert!(time.hour_matches(&dt)); + assert!(!time.hour_minutes_matches(&dt)); + let dt = DateTime::parse_from_rfc3339("2023-10-27T09:30:57-00:00").unwrap(); + assert!(time.hour_minutes_matches(&dt)); + } + + #[test] + fn test_readable_schedule() { + let schedule = ReadableSchedule( + vec!["09:30 +00:00", "23:00 +00:00"] + .into_iter() + .flat_map(ReadableOffsetTime::from_str) + .collect::>(), + ); + + let time_a = DateTime::parse_from_rfc3339("2023-10-27T09:30:57-00:00").unwrap(); + let time_b = DateTime::parse_from_rfc3339("2023-10-28T09:00:57-00:00").unwrap(); + let time_c = DateTime::parse_from_rfc3339("2023-10-27T23:15:00-00:00").unwrap(); + let time_d = DateTime::parse_from_rfc3339("2023-10-27T23:00:00-00:00").unwrap(); + let time_e = DateTime::parse_from_rfc3339("2023-10-27T20:00:00-00:00").unwrap(); + + // positives for schedule by hour + assert!(schedule.is_scheduled_this_hour(&time_a)); + assert!(schedule.is_scheduled_this_hour(&time_b)); + assert!(schedule.is_scheduled_this_hour(&time_c)); + assert!(schedule.is_scheduled_this_hour(&time_d)); + + // negatives for schedule by hour + assert!(!schedule.is_scheduled_this_hour(&time_e)); + + // positives for schedule by hour and minute + assert!(schedule.is_scheduled_this_hour_minute(&time_a)); + assert!(schedule.is_scheduled_this_hour_minute(&time_d)); + + // negatives for schedule by hour and minute + assert!(!schedule.is_scheduled_this_hour_minute(&time_b)); + assert!(!schedule.is_scheduled_this_hour_minute(&time_c)); + assert!(!schedule.is_scheduled_this_hour_minute(&time_e)); + } + #[test] fn test_canonicalize_path() { let tmp = Builder::new() diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2f4f5ba7695..dc61cb1b5aa 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -37,7 +37,7 @@ use tikv::{ BlockCacheConfig, Config as StorageConfig, EngineType, FlowControlConfig, IoRateLimitConfig, }, }; -use tikv_util::config::{LogFormat, ReadableDuration, ReadableSize}; +use tikv_util::config::{LogFormat, ReadableDuration, ReadableSchedule, ReadableSize}; mod dynamic; mod test_config_client; @@ -270,6 +270,8 @@ fn test_serde_custom_tikv_config() { slow_trend_unsensitive_result: 0.5, enable_v2_compatible_learner: false, unsafe_disable_check_quorum: false, + periodic_full_compact_start_times: ReadableSchedule::default(), + periodic_full_compact_start_max_cpu: 0.1, }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { From b2300932ccde2347104c674632d2e473d89d6fa3 Mon Sep 17 00:00:00 2001 From: qupeng Date: Wed, 1 Nov 2023 14:54:37 +0800 Subject: [PATCH 0987/1149] cdc: limit cdc event fetching speed to reduce RocksDB read load (#15849) close tikv/tikv#11390 None Signed-off-by: qupeng --- components/cdc/src/endpoint.rs | 50 ++++++++++++++++++++-- components/cdc/src/initializer.rs | 28 +++++++----- src/config/mod.rs | 5 +++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 5 files changed, 71 insertions(+), 14 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index a5f00a08028..e62650c77c6 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -384,6 +384,7 @@ pub struct Endpoint { workers: Runtime, scan_concurrency_semaphore: Arc, scan_speed_limiter: Limiter, + fetch_speed_limiter: Limiter, max_scan_batch_bytes: usize, max_scan_batch_size: usize, sink_memory_quota: Arc, @@ -439,11 +440,16 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint 0 { + let scan_speed_limiter = Limiter::new(if config.incremental_scan_speed_limit.0 > 0 { config.incremental_scan_speed_limit.0 as f64 } else { f64::INFINITY }); + let fetch_speed_limiter = Limiter::new(if config.incremental_fetch_speed_limit.0 > 0 { + config.incremental_fetch_speed_limit.0 as f64 + } else { + f64::INFINITY + }); CDC_SINK_CAP.set(sink_memory_quota.capacity() as i64); // For scan efficiency, the scan batch bytes should be around 1MB. @@ -469,7 +475,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint 0 { + self.config.incremental_fetch_speed_limit.0 as f64 + } else { + f64::INFINITY + }; + + self.fetch_speed_limiter.set_speed_limit(new_speed_limit); + } } pub fn set_max_scan_batch_size(&mut self, max_scan_batch_size: usize) { @@ -793,7 +809,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, - pub(crate) speed_limiter: Limiter, + pub(crate) scan_speed_limiter: Limiter, + pub(crate) fetch_speed_limiter: Limiter, + pub(crate) max_scan_batch_bytes: usize, pub(crate) max_scan_batch_size: usize, @@ -404,16 +406,14 @@ impl Initializer { perf_delta, } = self.do_scan(scanner, old_value_cursors, &mut entries)?; - CDC_SCAN_BYTES.inc_by(emit as _); TLS_CDC_PERF_STATS.with(|x| *x.borrow_mut() += perf_delta); tls_flush_perf_stats(); - let require = if let Some(bytes) = disk_read { + if let Some(bytes) = disk_read { CDC_SCAN_DISK_READ_BYTES.inc_by(bytes as _); - bytes - } else { - perf_delta.block_read_byte as usize - }; - self.speed_limiter.consume(require).await; + self.scan_speed_limiter.consume(bytes).await; + } + CDC_SCAN_BYTES.inc_by(emit as _); + self.fetch_speed_limiter.consume(emit as _).await; if let Some(resolver) = resolver { // Track the locks. @@ -624,7 +624,8 @@ mod tests { } fn mock_initializer( - speed_limit: usize, + scan_limit: usize, + fetch_limit: usize, buffer: usize, engine: Option, kv_api: ChangeDataRequestKvApi, @@ -665,7 +666,8 @@ mod tests { conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), - speed_limiter: Limiter::new(speed_limit as _), + scan_speed_limiter: Limiter::new(scan_limit as _), + fetch_speed_limiter: Limiter::new(fetch_limit as _), max_scan_batch_bytes: 1024 * 1024, max_scan_batch_size: 1024, build_resolver: true, @@ -717,6 +719,7 @@ mod tests { // Buffer must be large enough to unblock async incremental scan. let buffer = 1000; let (mut worker, pool, mut initializer, rx, mut drain) = mock_initializer( + total_bytes, total_bytes, buffer, engine.kv_engine(), @@ -832,6 +835,7 @@ mod tests { // Buffer must be large enough to unblock async incremental scan. let buffer = 1000; let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + total_bytes, total_bytes, buffer, engine.kv_engine(), @@ -914,6 +918,7 @@ mod tests { // Do incremental scan with different `hint_min_ts` values. for checkpoint_ts in [200, 100, 150] { let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + usize::MAX, usize::MAX, 1000, engine.kv_engine(), @@ -979,6 +984,7 @@ mod tests { let total_bytes = 1; let buffer = 1; let (mut worker, _pool, mut initializer, rx, _drain) = mock_initializer( + total_bytes, total_bytes, buffer, None, @@ -1034,7 +1040,7 @@ mod tests { let total_bytes = 1; let buffer = 1; let (mut worker, pool, mut initializer, _rx, _drain) = - mock_initializer(total_bytes, buffer, None, kv_api, false); + mock_initializer(total_bytes, total_bytes, buffer, None, kv_api, false); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); diff --git a/src/config/mod.rs b/src/config/mod.rs index d1fb1e4f8d8..237ac3c7a72 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2940,7 +2940,11 @@ pub struct CdcConfig { #[online_config(skip)] pub incremental_scan_threads: usize, pub incremental_scan_concurrency: usize, + /// Limit scan speed based on disk I/O traffic. pub incremental_scan_speed_limit: ReadableSize, + /// Limit scan speed based on memory accesing traffic. + #[doc(hidden)] + pub incremental_fetch_speed_limit: ReadableSize, /// `TsFilter` can increase speed and decrease resource usage when /// incremental content is much less than total content. However in /// other cases, `TsFilter` can make performance worse because it needs @@ -2979,6 +2983,7 @@ impl Default for CdcConfig { // TiCDC requires a SSD, the typical write speed of SSD // is more than 500MB/s, so 128MB/s is enough. incremental_scan_speed_limit: ReadableSize::mb(128), + incremental_fetch_speed_limit: ReadableSize::mb(512), incremental_scan_ts_filter_ratio: 0.2, tso_worker_threads: 1, // 512MB memory for CDC sink. diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index dc61cb1b5aa..1239aa53fb8 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -851,6 +851,7 @@ fn test_serde_custom_tikv_config() { incremental_scan_threads: 3, incremental_scan_concurrency: 4, incremental_scan_speed_limit: ReadableSize(7), + incremental_fetch_speed_limit: ReadableSize(8), incremental_scan_ts_filter_ratio: 0.7, tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 1bb52fad5fc..ef7a4809168 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -699,6 +699,7 @@ hibernate-regions-compatible = false incremental-scan-threads = 3 incremental-scan-concurrency = 4 incremental-scan-speed-limit = 7 +incremental-fetch-speed-limit = 8 incremental-scan-ts-filter-ratio = 0.7 tso-worker-threads = 2 old-value-cache-memory-quota = "14MB" From 78d835d91b07fb5c18e1158c21841fd43116bc02 Mon Sep 17 00:00:00 2001 From: Smilencer Date: Wed, 1 Nov 2023 16:29:07 +0800 Subject: [PATCH 0988/1149] makefile: update cargo sort installation to remove dependency on rust-toolchain (#15854) close tikv/tikv#15818 update cargo sort installation to remove dependency on rust-toolchain Signed-off-by: Smityz Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ce8d4e8b793..103c502036e 100644 --- a/Makefile +++ b/Makefile @@ -331,7 +331,7 @@ unset-override: pre-format: unset-override @rustup component add rustfmt - @which cargo-sort &> /dev/null || cargo install -q cargo-sort + @which cargo-sort &> /dev/null || cargo +nightly install -q cargo-sort format: pre-format @cargo fmt From 4c369d2cdc19acd336226e49cdd6c903f3a47ab5 Mon Sep 17 00:00:00 2001 From: qupeng Date: Thu, 2 Nov 2023 14:24:37 +0800 Subject: [PATCH 0989/1149] cdc: incremental scans acquire snapshots before semaphores to avoid useless queue (#15865) close tikv/tikv#15866 cdc: incremental scans acquire snapshots before semaphores to avoid useless queue Signed-off-by: qupeng --- components/cdc/src/delegate.rs | 7 ++- components/cdc/src/endpoint.rs | 7 +-- components/cdc/src/initializer.rs | 93 ++++++++++--------------------- 3 files changed, 37 insertions(+), 70 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index c82c4cb6f13..780cfe8dea6 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -423,10 +423,15 @@ impl Delegate { downstream.state.store(DownstreamState::Stopped); let error_event = error.clone(); if let Err(err) = downstream.sink_error_event(region_id, error_event) { - warn!("cdc broadcast error failed"; + warn!("cdc send region error failed"; "region_id" => region_id, "error" => ?err, "origin_error" => ?error, "downstream_id" => ?downstream.id, "downstream" => ?downstream.peer, "request_id" => downstream.req_id, "conn_id" => ?downstream.conn_id); + } else { + info!("cdc send region error success"; + "region_id" => region_id, "origin_error" => ?error, + "downstream_id" => ?downstream.id, "downstream" => ?downstream.peer, + "request_id" => downstream.req_id, "conn_id" => ?downstream.conn_id); } Ok(()) }; diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index e62650c77c6..82233af8f14 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -809,6 +809,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint { CDC_SCAN_TASKS.with_label_values(&["finish"]).inc(); } diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index bd8f5e4e637..2882d2e975e 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -35,7 +35,7 @@ use tikv_kv::Iterator; use tikv_util::{ box_err, codec::number, - debug, error, info, + debug, defer, error, info, memory::MemoryQuota, sys::inspector::{self_thread_inspector, ThreadInspector}, time::{Instant, Limiter}, @@ -90,6 +90,7 @@ pub(crate) struct Initializer { pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, + pub(crate) scan_concurrency_semaphore: Arc, pub(crate) scan_speed_limiter: Limiter, pub(crate) fetch_speed_limiter: Limiter, @@ -109,30 +110,9 @@ impl Initializer { &mut self, change_observer: ChangeObserver, cdc_handle: T, - concurrency_semaphore: Arc, memory_quota: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); - let _permit = concurrency_semaphore.acquire().await; - - // When downstream_state is Stopped, it means the corresponding delegate - // is stopped. The initialization can be safely canceled. - // - // Acquiring a permit may take some time, it is possible that - // initialization can be canceled. - if self.downstream_state.load() == DownstreamState::Stopped { - info!("cdc async incremental scan canceled"; - "region_id" => self.region_id, - "downstream_id" => ?self.downstream_id, - "observe_id" => ?self.observe_id, - "conn_id" => ?self.conn_id); - return Err(box_err!("scan canceled")); - } - - CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); - tikv_util::defer!({ - CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec(); - }); // To avoid holding too many snapshots and holding them too long, // we need to acquire scan concurrency permit before taking snapshot. @@ -187,8 +167,8 @@ impl Initializer { memory_quota: Arc, ) -> Result<()> { if let Some(region_snapshot) = resp.snapshot { - assert_eq!(self.region_id, region_snapshot.get_region().get_id()); let region = region_snapshot.get_region().clone(); + assert_eq!(self.region_id, region.get_id()); self.async_incremental_scan(region_snapshot, region, memory_quota) .await } else { @@ -208,10 +188,29 @@ impl Initializer { region: Region, memory_quota: Arc, ) -> Result<()> { - let downstream_id = self.downstream_id; + let scan_concurrency_semaphore = self.scan_concurrency_semaphore.clone(); + let _permit = scan_concurrency_semaphore.acquire().await; + CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); + defer!(CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec()); + let region_id = region.get_id(); + let downstream_id = self.downstream_id; let observe_id = self.observe_id; + let conn_id = self.conn_id; let kv_api = self.kv_api; + let on_cancel = || -> Result<()> { + info!("cdc async incremental scan canceled"; + "region_id" => region_id, + "downstream_id" => ?downstream_id, + "observe_id" => ?observe_id, + "conn_id" => ?conn_id); + Err(box_err!("scan canceled")) + }; + + if self.downstream_state.load() == DownstreamState::Stopped { + return on_cancel(); + } + self.observed_range.update_region_key_range(®ion); debug!("cdc async incremental scan"; "region_id" => region_id, @@ -260,7 +259,6 @@ impl Initializer { }; fail_point!("cdc_incremental_scan_start"); - let conn_id = self.conn_id; let mut done = false; let start = Instant::now_coarse(); @@ -270,15 +268,6 @@ impl Initializer { DownstreamState::Initializing | DownstreamState::Stopped )); - let on_cancel = || -> Result<()> { - info!("cdc async incremental scan canceled"; - "region_id" => region_id, - "downstream_id" => ?downstream_id, - "observe_id" => ?observe_id, - "conn_id" => ?conn_id); - Err(box_err!("scan canceled")) - }; - while !done { // When downstream_state is Stopped, it means the corresponding // delegate is stopped. The initialization can be safely canceled. @@ -666,6 +655,7 @@ mod tests { conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), + scan_concurrency_semaphore: Arc::new(Semaphore::new(1)), scan_speed_limiter: Limiter::new(scan_limit as _), fetch_speed_limiter: Limiter::new(fetch_limit as _), max_scan_batch_bytes: 1024 * 1024, @@ -1044,51 +1034,26 @@ mod tests { let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); - let concurrency_semaphore = Arc::new(Semaphore::new(1)); let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer.downstream_state.store(DownstreamState::Stopped); - block_on(initializer.initialize( - change_cmd, - raft_router.clone(), - concurrency_semaphore.clone(), - memory_quota.clone(), - )) - .unwrap_err(); - - let (tx, rx) = sync_channel(1); - let concurrency_semaphore_ = concurrency_semaphore.clone(); - pool.spawn(async move { - let _permit = concurrency_semaphore_.acquire().await; - tx.send(()).unwrap(); - tx.send(()).unwrap(); - tx.send(()).unwrap(); - }); - rx.recv_timeout(Duration::from_millis(200)).unwrap(); + block_on(initializer.initialize(change_cmd, raft_router.clone(), memory_quota.clone())) + .unwrap_err(); let (tx1, rx1) = sync_channel(1); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); pool.spawn(async move { // Migrated to 2021 migration. This let statement is probably not needed, see // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html - let _ = ( - &initializer, - &change_cmd, - &raft_router, - &concurrency_semaphore, - ); let res = initializer - .initialize(change_cmd, raft_router, concurrency_semaphore, memory_quota) + .initialize(change_cmd, raft_router, memory_quota) .await; tx1.send(res).unwrap(); }); - // Must timeout because there is no enough permit. - rx1.recv_timeout(Duration::from_millis(200)).unwrap_err(); - // Release the permit - rx.recv_timeout(Duration::from_millis(200)).unwrap(); + // Shouldn't timeout, gets an error instead. let res = rx1.recv_timeout(Duration::from_millis(200)).unwrap(); - res.unwrap_err(); + assert!(res.is_err()); worker.stop(); } From f0ce447adf4e9d1bde7c7b1f9560a34d9fe77705 Mon Sep 17 00:00:00 2001 From: Xiaoya Wei Date: Thu, 2 Nov 2023 14:39:37 +0800 Subject: [PATCH 0990/1149] fuzz: Bump afl version (#15848) close tikv/tikv#15847 bump afl version Bump afl to a new version that can successfully builds on MacOS. close tikv/tikv#15847 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 93 ++++++++++++++++++++++++++++++++++---- fuzz/fuzzer-afl/Cargo.toml | 2 +- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 09459fd9123..f56c8ff1395 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,13 +31,13 @@ checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" [[package]] name = "afl" -version = "0.6.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59206260f98d163b3ca42fb29fe551dbcda1d43cf70a244066b2a0666a8fb2a9" +checksum = "330d7251127b228cb4187ac2373dc37f615d65199f93b5443edeeed839fff5df" dependencies = [ - "cc", - "clap 2.33.0", - "rustc_version 0.2.3", + "home", + "libc 0.2.146", + "rustc_version 0.4.0", "xdg", ] @@ -2515,6 +2515,15 @@ dependencies = [ "digest 0.10.6", ] +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "honggfuzz" version = "0.5.47" @@ -7555,21 +7564,51 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.0", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.0", "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -7582,6 +7621,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -7594,6 +7639,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -7606,6 +7657,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -7618,12 +7675,24 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -7636,6 +7705,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "winreg" version = "0.7.0" @@ -7672,9 +7747,9 @@ dependencies = [ [[package]] name = "xdg" -version = "2.2.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57" +checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" [[package]] name = "xml-rs" diff --git a/fuzz/fuzzer-afl/Cargo.toml b/fuzz/fuzzer-afl/Cargo.toml index 6c97305a253..5e9894fba3e 100644 --- a/fuzz/fuzzer-afl/Cargo.toml +++ b/fuzz/fuzzer-afl/Cargo.toml @@ -8,4 +8,4 @@ fuzz-targets = { path = "../targets" } # AFL only works for x86 targets [target.'cfg(all(not(target_os = "windows"), target_arch = "x86_64"))'.dependencies] -afl = "0.6" +afl = "0.14" From 9f46cdc90756f63e76320c2f51c7ed4a2ab5e7d2 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Fri, 3 Nov 2023 11:42:39 +0800 Subject: [PATCH 0991/1149] test: fix the flaky test for `test_gen_split_check_bucket_ranges ` (#15876) close tikv/tikv#15862 Signed-off-by: bufferflies <1045931706@qq.com> Signed-off-by: tongjian <1045931706@qq.com> Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 5 ++ components/test_raftstore/src/cluster.rs | 2 +- .../raftstore/test_split_region.rs | 48 +++++++++---------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 14ad09dbde8..5a7223dcaa3 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6206,6 +6206,11 @@ where cb(peer_stat); } } + + // only check the suspect buckets, not split region. + if source == "bucket" { + return; + } let task = SplitCheckTask::split_check_key_range( region.clone(), start_key, diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 2a4082893e7..a08f858c031 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1938,7 +1938,7 @@ impl Cluster { start_key: None, end_key: None, policy: CheckPolicy::Scan, - source: "test", + source: "bucket", cb, }, ) diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 35ee1838865..5439e5c8ba2 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -1162,9 +1162,9 @@ fn test_refresh_region_bucket_keys() { #[test] fn test_gen_split_check_bucket_ranges() { - let count = 5; - let mut cluster = new_server_cluster(0, count); - cluster.cfg.coprocessor.region_bucket_size = ReadableSize(5); + let mut cluster = new_server_cluster(0, 1); + let region_bucket_size = ReadableSize::kb(1); + cluster.cfg.coprocessor.region_bucket_size = region_bucket_size; cluster.cfg.coprocessor.enable_region_bucket = Some(true); // disable report buckets; as it will reset the user traffic stats to randomize // the test result @@ -1174,14 +1174,15 @@ fn test_gen_split_check_bucket_ranges() { cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); - cluster.must_put(b"k11", b"v1"); - let region = pd_client.get_region(b"k11").unwrap(); + let mut range = 1..; + let mid_key = put_till_size(&mut cluster, region_bucket_size.0, &mut range); + let second_key = put_till_size(&mut cluster, region_bucket_size.0, &mut range); + let region = pd_client.get_region(&second_key).unwrap(); let bucket = Bucket { - keys: vec![b"k11".to_vec()], - size: 1024 * 1024 * 200, + keys: vec![mid_key.clone()], + size: region_bucket_size.0 * 2, }; - let mut expected_buckets = metapb::Buckets::default(); expected_buckets.set_keys(bucket.clone().keys.into()); expected_buckets @@ -1197,32 +1198,28 @@ fn test_gen_split_check_bucket_ranges() { Option::None, Some(expected_buckets.clone()), ); - cluster.must_put(b"k10", b"v1"); - cluster.must_put(b"k12", b"v1"); - let expected_bucket_ranges = vec![ - BucketRange(vec![], b"k11".to_vec()), - BucketRange(b"k11".to_vec(), vec![]), - ]; + // put some data into the right buckets, so the bucket range will be check by + // split check. + let latest_key = put_till_size(&mut cluster, region_bucket_size.0 + 100, &mut range); + let expected_bucket_ranges = vec![BucketRange(mid_key.clone(), vec![])]; cluster.send_half_split_region_message(®ion, Some(expected_bucket_ranges)); - // set fsm.peer.last_bucket_regions + // reset bucket stats. cluster.refresh_region_bucket_keys( ®ion, buckets, Option::None, Some(expected_buckets.clone()), ); - // because the diff between last_bucket_regions and bucket_regions is zero, - // bucket range for split check should be empty. - let expected_bucket_ranges = vec![]; - cluster.send_half_split_region_message(®ion, Some(expected_bucket_ranges)); - // split the region - pd_client.must_split_region(region, pdpb::CheckPolicy::Usekey, vec![b"k11".to_vec()]); + thread::sleep(Duration::from_millis(100)); + cluster.send_half_split_region_message(®ion, Some(vec![])); - let left = pd_client.get_region(b"k10").unwrap(); - let right = pd_client.get_region(b"k12").unwrap(); + // split the region + pd_client.must_split_region(region, pdpb::CheckPolicy::Usekey, vec![second_key]); + let left = pd_client.get_region(&mid_key).unwrap(); + let right = pd_client.get_region(&latest_key).unwrap(); if right.get_id() == 1 { // the bucket_ranges should be None to refresh the bucket cluster.send_half_split_region_message(&right, None); @@ -1230,11 +1227,10 @@ fn test_gen_split_check_bucket_ranges() { // the bucket_ranges should be None to refresh the bucket cluster.send_half_split_region_message(&left, None); } - + thread::sleep(Duration::from_millis(300)); // merge the region pd_client.must_merge(left.get_id(), right.get_id()); - let region = pd_client.get_region(b"k10").unwrap(); - // the bucket_ranges should be None to refresh the bucket + let region = pd_client.get_region(&mid_key).unwrap(); cluster.send_half_split_region_message(®ion, None); } From e0fe14d57136f645457bcf14ee5ae1a478be04b8 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 3 Nov 2023 16:14:09 +0800 Subject: [PATCH 0992/1149] titan: update titan to avoid manifest io mutex (#15914) close tikv/tikv#15351 titan: update titan to avoid manifest io mutex Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f56c8ff1395..fba26935d1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2950,7 +2950,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" +source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2969,7 +2969,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" +source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" dependencies = [ "bzip2-sys", "cc", @@ -4890,7 +4890,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#b747689e1b94cb1507872e898b83553447e8f8de" +source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" dependencies = [ "libc 0.2.146", "librocksdb_sys", From 07141aad5a591306cacbe18aafc2c755d35a70bc Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Mon, 6 Nov 2023 10:27:39 +0800 Subject: [PATCH 0993/1149] server: make grpc metrics consistent (#15906) ref tikv/tikv#15803 the grpc duration should not include the stream sink. Signed-off-by: bufferflies <1045931706@qq.com> --- src/server/service/kv.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 77f92d33d95..8426143d502 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -491,8 +491,8 @@ impl Tikv for Service { let future = future_copr(&self.copr, Some(ctx.peer()), req); let task = async move { let resp = future.await?.consume(); - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .coprocessor .observe(elapsed.as_secs_f64()); @@ -529,8 +529,8 @@ impl Tikv for Service { let future = future_raw_coprocessor(&self.copr_v2, &self.storage, req); let task = async move { let resp = future.await?; - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .raw_coprocessor .observe(elapsed.as_secs_f64()); @@ -580,8 +580,8 @@ impl Tikv for Service { if let Err(e) = res { resp.set_error(format!("{}", e)); } - sink.success(resp).await?; let elapsed = begin_instant.saturating_elapsed(); + sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .unsafe_destroy_range .observe(elapsed.as_secs_f64()); @@ -863,10 +863,10 @@ impl Tikv for Service { } } } - sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .split_region .observe(begin_instant.saturating_elapsed().as_secs_f64()); + sink.success(resp).await?; ServerResult::Ok(()) } .map_err(|e| { @@ -1015,6 +1015,9 @@ impl Tikv for Service { .schedule(CheckLeaderTask::CheckLeader { leaders, cb }) .map_err(|e| Error::Other(format!("{}", e).into()))?; let regions = resp.await?; + GRPC_MSG_HISTOGRAM_STATIC + .check_leader + .observe(begin_instant.saturating_elapsed().as_secs_f64()); let mut resp = CheckLeaderResponse::default(); resp.set_ts(ts); resp.set_regions(regions); @@ -1026,10 +1029,6 @@ impl Tikv for Service { } return Err(Error::from(e)); } - let elapsed = begin_instant.saturating_elapsed(); - GRPC_MSG_HISTOGRAM_STATIC - .check_leader - .observe(elapsed.as_secs_f64()); ServerResult::Ok(()) } .map_err(move |e| { From 1a726454bcaa3f9087e02fe92bcf3ad7f779f07f Mon Sep 17 00:00:00 2001 From: lance6716 Date: Mon, 6 Nov 2023 10:47:10 +0800 Subject: [PATCH 0994/1149] sst_importer: join can fallback to version 1 filename (#15913) close tikv/tikv#15912 Signed-off-by: lance6716 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/sst_importer/src/import_file.rs | 121 ++++++++++++-------- components/sst_importer/src/sst_importer.rs | 46 +++----- 2 files changed, 89 insertions(+), 78 deletions(-) diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index b3b7c051ce4..850df867da8 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -13,8 +13,7 @@ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ - iter_option, EncryptionKeyManager, IterOptions, Iterator, KvEngine, RefIterable, SstExt, - SstMetaInfo, SstReader, + iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, }; use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; use keys::data_key; @@ -261,17 +260,36 @@ impl ImportDir { }) } - pub fn join(&self, meta: &SstMeta) -> Result { + pub fn join_for_write(&self, meta: &SstMeta) -> Result { let file_name = sst_meta_to_path(meta)?; self.get_import_path(file_name.to_str().unwrap()) } + /// Different with join_for_write, join_for_read will also handle the api + /// version 1 filenames which can be generated by old version TiKV. + pub fn join_for_read(&self, meta: &SstMeta) -> Result { + let file_name = sst_meta_to_path(meta)?; + let files_result = self.get_import_path(file_name.to_str().unwrap()); + // if files does not exists, it means the SstMeta is generated by old version + // TiKV, we try sst_meta_to_path_v1 + match files_result { + Ok(path) => { + if path.save.exists() { + return Ok(path); + } + let file_name = sst_meta_to_path_v1(meta)?; + self.get_import_path(file_name.to_str().unwrap()) + } + Err(e) => Err(e), + } + } + pub fn create( &self, meta: &SstMeta, key_manager: Option>, ) -> Result { - let path = self.join(meta)?; + let path = self.join_for_write(meta)?; if path.save.exists() { return Err(Error::FileExists(path.save, "create SST upload cache")); } @@ -290,7 +308,7 @@ impl ImportDir { } pub fn delete(&self, meta: &SstMeta, manager: Option<&DataKeyManager>) -> Result { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; self.delete_file(&path.save, manager)?; self.delete_file(&path.temp, manager)?; self.delete_file(&path.clone, manager)?; @@ -298,7 +316,7 @@ impl ImportDir { } pub fn exist(&self, meta: &SstMeta) -> Result { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; Ok(path.save.exists()) } @@ -307,7 +325,7 @@ impl ImportDir { meta: &SstMeta, key_manager: Option>, ) -> Result { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); let env = get_env(key_manager, get_io_rate_limiter())?; let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; @@ -334,7 +352,7 @@ impl ImportDir { // otherwise we are upgrade/downgrade between V1 and V2 // this can be done if all keys are written by TiDB _ => { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); let env = get_env(key_manager.clone(), get_io_rate_limiter())?; let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; @@ -382,7 +400,7 @@ impl ImportDir { let mut paths = HashMap::new(); let mut ingest_bytes = 0; for info in metas { - let path = self.join(&info.meta)?; + let path = self.join_for_read(&info.meta)?; let cf = info.meta.get_cf_name(); super::prepare_sst_for_ingestion(&path.save, &path.clone, key_manager.as_deref())?; ingest_bytes += info.total_bytes; @@ -407,7 +425,7 @@ impl ImportDir { key_manager: Option>, ) -> Result<()> { for meta in metas { - let path = self.join(meta)?; + let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); let env = get_env(key_manager.clone(), get_io_rate_limiter())?; let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; @@ -416,31 +434,6 @@ impl ImportDir { Ok(()) } - pub fn load_start_key_by_meta( - &self, - meta: &SstMeta, - km: Option>, - ) -> Result>> { - let path = self.join(meta)?; - let r = match km { - Some(km) => E::SstReader::open_encrypted(&path.save.to_string_lossy(), km)?, - None => E::SstReader::open(&path.save.to_string_lossy())?, - }; - let opts = IterOptions::new(None, None, false); - let mut i = r.iter(opts)?; - if !i.seek_to_first()? || !i.valid()? { - return Ok(None); - } - // Should we warn if the key doesn't start with the prefix key? (Is that - // possible?) - // Also note this brings implicit coupling between this and - // RocksEngine. Perhaps it is better to make the engine to provide - // decode functions. Anyway we have directly used the RocksSstReader - // somewhere... This won't make things worse. - let real_key = i.key().strip_prefix(keys::DATA_PREFIX_KEY); - Ok(real_key.map(ToOwned::to_owned)) - } - pub fn list_ssts(&self) -> Result> { let mut ssts = Vec::new(); for e in file_system::read_dir(&self.root_dir)? { @@ -483,6 +476,18 @@ pub fn sst_meta_to_path(meta: &SstMeta) -> Result { ))) } +pub fn sst_meta_to_path_v1(meta: &SstMeta) -> Result { + Ok(PathBuf::from(format!( + "{}_{}_{}_{}_{}{}", + UuidBuilder::from_slice(meta.get_uuid())?.build(), + meta.get_region_id(), + meta.get_region_epoch().get_conf_ver(), + meta.get_region_epoch().get_version(), + meta.get_cf_name(), + SST_SUFFIX, + ))) +} + pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { let path = path.as_ref(); let file_name = match path.file_name().and_then(|n| n.to_str()) { @@ -521,6 +526,8 @@ pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { #[cfg(test)] mod test { + use std::fs; + use engine_traits::CF_DEFAULT; use super::*; @@ -565,6 +572,35 @@ mod test { assert_eq!(1, meta_with_ver.1); } + #[test] + fn test_join_for_rw() { + use tempfile::TempDir; + use uuid::Uuid; + + let tmp = TempDir::new().unwrap(); + let dir = ImportDir::new(tmp.path()).unwrap(); + let mut meta = SstMeta::default(); + meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); + let filename_v1 = sst_meta_to_path_v1(&meta).unwrap(); + let path_v1 = tmp.path().join(filename_v1); + + let got = dir + .join_for_read(&meta) + .expect("fallback to version 1 because version 2 file does not exist"); + assert_eq!(got.save, path_v1); + + let filename_v2 = sst_meta_to_path(&meta).unwrap(); + let path_v2 = tmp.path().join(filename_v2); + fs::File::create(&path_v2).expect("create empty file"); + let got = dir.join_for_read(&meta).expect("read should succeed"); + assert_eq!(got.save, path_v2); + fs::remove_file(path_v2).expect("delete file"); + + fs::File::create(&path_v1).expect("create empty file"); + let got = dir.join_for_read(&meta).expect("read should succeed"); + assert_eq!(got.save, path_v1); + } + #[cfg(feature = "test-engines-rocksdb")] fn test_path_with_range_and_km(km: Option) { use engine_rocks::{RocksEngine, RocksSstWriterBuilder}; @@ -613,21 +649,6 @@ mod test { .unwrap(); w.finish().unwrap(); dp.save(arcmgr.as_deref()).unwrap(); - let mut ssts = dir.list_ssts().unwrap(); - ssts.iter_mut().for_each(|meta_with_ver| { - let meta = &mut meta_with_ver.0; - let start = dir - .load_start_key_by_meta::(meta, arcmgr.clone()) - .unwrap() - .unwrap(); - meta.mut_range().set_start(start) - }); - assert_eq!( - ssts.iter() - .map(|meta_with_ver| { meta_with_ver.0.clone() }) - .collect(), - vec![meta] - ); } #[test] diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 358bc0545de..7e1de9cf44e 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -289,7 +289,7 @@ impl SstImporter { } pub fn get_path(&self, meta: &SstMeta) -> PathBuf { - let path = self.dir.join(meta).unwrap(); + let path = self.dir.join_for_read(meta).unwrap(); path.save } @@ -1114,7 +1114,7 @@ impl SstImporter { engine: E, ext: DownloadExt<'_>, ) -> Result> { - let path = self.dir.join(meta)?; + let path = self.dir.join_for_write(meta)?; let file_crypter = crypter.map(|c| FileEncryptionInfo { method: to_engine_encryption_method(c.cipher_type), @@ -1389,20 +1389,10 @@ impl SstImporter { self.dir.list_ssts() } - /// Load the start key by a metadata. - /// This will open the internal SST and try to load the first user key. - /// (For RocksEngine, that is the key without the 'z' prefix.) - /// When the SST is empty or the first key cannot be parsed as user key, - /// return None. - pub fn load_start_key_by_meta(&self, meta: &SstMeta) -> Result>> { - self.dir - .load_start_key_by_meta::(meta, self.key_manager.clone()) - } - pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { let mut default_meta = meta.clone(); default_meta.set_cf_name(CF_DEFAULT.to_owned()); - let default_path = self.dir.join(&default_meta)?; + let default_path = self.dir.join_for_write(&default_meta)?; let default = E::SstWriterBuilder::new() .set_db(db) .set_cf(CF_DEFAULT) @@ -1412,7 +1402,7 @@ impl SstImporter { let mut write_meta = meta; write_meta.set_cf_name(CF_WRITE.to_owned()); - let write_path = self.dir.join(&write_meta)?; + let write_path = self.dir.join_for_write(&write_meta)?; let write = E::SstWriterBuilder::new() .set_db(db) .set_cf(CF_WRITE) @@ -1438,7 +1428,7 @@ impl SstImporter { mut meta: SstMeta, ) -> Result> { meta.set_cf_name(CF_DEFAULT.to_owned()); - let default_path = self.dir.join(&meta)?; + let default_path = self.dir.join_for_write(&meta)?; let default = E::SstWriterBuilder::new() .set_db(db) .set_cf(CF_DEFAULT) @@ -1519,7 +1509,7 @@ mod tests { let mut meta = SstMeta::default(); meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); - let path = dir.join(&meta).unwrap(); + let path = dir.join_for_write(&meta).unwrap(); // Test ImportDir::create() { @@ -2333,7 +2323,7 @@ mod tests { assert_eq!(range.get_end(), b"t123_r13"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); assert_eq!(sst_file_metadata.len(), meta.get_length()); @@ -2393,7 +2383,7 @@ mod tests { assert_eq!(range.get_end(), b"t123_r13"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); assert_eq!(sst_file_metadata.len(), meta.get_length()); @@ -2443,7 +2433,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size may be changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2488,7 +2478,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size may be changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2532,7 +2522,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size may be changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2674,7 +2664,7 @@ mod tests { // verifies that the file is saved to the correct place. // (the file size is changed, so not going to check the file size) - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2718,7 +2708,7 @@ mod tests { assert_eq!(range.get_end(), b"t5_r07"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); // verifies the SST content is correct. @@ -2851,7 +2841,7 @@ mod tests { assert_eq!(range.get_end(), b"d"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); assert_eq!(sst_file_metadata.len(), meta.get_length()); @@ -2910,7 +2900,7 @@ mod tests { assert_eq!(range.get_end(), b"c\x00"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); @@ -2965,7 +2955,7 @@ mod tests { assert_eq!(range.get_end(), b"c"); // verifies that the file is saved to the correct place. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; let sst_file_metadata = sst_file_path.metadata().unwrap(); assert!(sst_file_metadata.is_file()); @@ -3011,7 +3001,7 @@ mod tests { .unwrap(); // verifies the SST is compressed using Snappy. - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); @@ -3058,7 +3048,7 @@ mod tests { // verifies SST compression algorithm... for meta in metas { - let sst_file_path = importer.dir.join(&meta).unwrap().save; + let sst_file_path = importer.dir.join_for_read(&meta).unwrap().save; assert!(sst_file_path.is_file()); let sst_reader = new_sst_reader(sst_file_path.to_str().unwrap(), None); From 6a2c9733a8873089561d2b05545fbbb85fb96f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Tue, 7 Nov 2023 12:41:11 +0800 Subject: [PATCH 0995/1149] log-backup: use row-level memory usage statistic for initial scan (#15872) close tikv/tikv#15714 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 8 +- components/backup-stream/src/event_loader.rs | 129 ++++++----- components/backup-stream/src/lib.rs | 2 +- components/backup-stream/src/router.rs | 9 + .../backup-stream/tests/failpoints/mod.rs | 77 +++++-- .../backup-stream/tests/integration/mod.rs | 76 +++---- components/backup-stream/tests/suite.rs | 212 +++++++++++------- components/tikv_util/src/memory.rs | 52 ++++- 8 files changed, 362 insertions(+), 203 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 834a40f8bdd..6c19edc9f93 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -29,6 +29,7 @@ use tikv_util::{ box_err, config::ReadableDuration, debug, defer, info, + memory::MemoryQuota, sys::thread::ThreadBuildWrapper, time::{Instant, Limiter}, warn, @@ -51,7 +52,7 @@ use crate::{ GetCheckpointResult, RegionIdWithVersion, Subscription, }, errors::{Error, Result}, - event_loader::{InitialDataLoader, PendingMemoryQuota}, + event_loader::InitialDataLoader, future, metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, metrics::{self, TaskStatus}, @@ -139,8 +140,9 @@ where pool.spawn(Self::starts_flush_ticks(range_router.clone())); - let initial_scan_memory_quota = - PendingMemoryQuota::new(config.initial_scan_pending_memory_quota.0 as _); + let initial_scan_memory_quota = Arc::new(MemoryQuota::new( + config.initial_scan_pending_memory_quota.0 as _, + )); let limit = if config.initial_scan_rate_limit.0 > 0 { config.initial_scan_rate_limit.0 as f64 } else { diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index bfb88d5cd5f..0a957ea87ed 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -12,15 +12,16 @@ use raftstore::{ use tikv::storage::{ kv::StatisticsSummary, mvcc::{DeltaScanner, ScannerBuilder}, - txn::{EntryBatch, TxnEntry, TxnEntryScanner}, + txn::{TxnEntry, TxnEntryScanner}, Snapshot, Statistics, }; use tikv_util::{ box_err, + memory::{MemoryQuota, OwnedAllocated}, time::{Instant, Limiter}, worker::Scheduler, }; -use tokio::sync::{OwnedSemaphorePermit, Semaphore}; +use tokio::sync::Semaphore; use txn_types::{Key, Lock, TimeStamp}; use crate::{ @@ -34,41 +35,17 @@ use crate::{ const MAX_GET_SNAPSHOT_RETRY: usize = 5; -#[derive(Clone)] -pub struct PendingMemoryQuota(Arc); - -impl std::fmt::Debug for PendingMemoryQuota { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PendingMemoryQuota") - .field("remain", &self.0.available_permits()) - .field("total", &self.0) - .finish() - } -} - -pub struct PendingMemory(OwnedSemaphorePermit); - -impl PendingMemoryQuota { - pub fn new(quota: usize) -> Self { - Self(Arc::new(Semaphore::new(quota))) - } - - pub async fn pending(&self, size: usize) -> PendingMemory { - PendingMemory( - self.0 - .clone() - .acquire_many_owned(size as _) - .await - .expect("BUG: the semaphore is closed unexpectedly."), - ) - } +struct ScanResult { + more: bool, + out_of_memory: bool, + statistics: Statistics, } /// EventLoader transforms data from the snapshot into ApplyEvent. pub struct EventLoader { scanner: DeltaScanner, // pooling the memory. - entry_batch: EntryBatch, + entry_batch: Vec, } const ENTRY_BATCH_SIZE: usize = 1024; @@ -97,20 +74,48 @@ impl EventLoader { Ok(Self { scanner, - entry_batch: EntryBatch::with_capacity(ENTRY_BATCH_SIZE), + entry_batch: Vec::with_capacity(ENTRY_BATCH_SIZE), }) } + fn scan_result(&mut self, more: bool) -> ScanResult { + ScanResult { + more, + out_of_memory: false, + statistics: self.scanner.take_statistics(), + } + } + + fn out_of_memory(&mut self) -> ScanResult { + ScanResult { + more: true, + out_of_memory: true, + statistics: self.scanner.take_statistics(), + } + } + /// Scan a batch of events from the snapshot, and save them into the /// internal buffer. - fn fill_entries(&mut self) -> Result { + fn fill_entries(&mut self, memory_quota: &mut OwnedAllocated) -> Result { assert!( self.entry_batch.is_empty(), - "EventLoader: the entry batch isn't empty when filling entries, which is error-prone, please call `omit_entries` first. (len = {})", + "EventLoader: the entry batch isn't empty when filling entries, which is error-prone, please call `emit_entries_to` first. (len = {})", self.entry_batch.len() ); - self.scanner.scan_entries(&mut self.entry_batch)?; - Ok(self.scanner.take_statistics()) + let batch = &mut self.entry_batch; + while batch.len() < batch.capacity() { + match self.scanner.next_entry()? { + Some(entry) => { + let size = entry.size(); + batch.push(entry); + if memory_quota.alloc(size).is_err() { + return Ok(self.out_of_memory()); + } + } + None => return Ok(self.scan_result(false)), + } + } + Ok(self.scan_result(true)) } /// Drain the internal buffer, converting them to the [`ApplyEvents`], @@ -120,7 +125,7 @@ impl EventLoader { result: &mut ApplyEvents, resolver: &mut TwoPhaseResolver, ) -> Result<()> { - for entry in self.entry_batch.drain() { + for entry in self.entry_batch.drain(..) { match entry { TxnEntry::Prewrite { default: (key, value), @@ -180,7 +185,7 @@ pub struct InitialDataLoader { pub(crate) tracing: SubscriptionTracer, pub(crate) scheduler: Scheduler, - pub(crate) quota: PendingMemoryQuota, + pub(crate) quota: Arc, pub(crate) limit: Limiter, // If there are too many concurrent initial scanning, the limit of disk speed or pending memory // quota will probably be triggered. Then the whole scanning will be pretty slow. And when @@ -202,7 +207,7 @@ where sink: Router, tracing: SubscriptionTracer, sched: Scheduler, - quota: PendingMemoryQuota, + quota: Arc, limiter: Limiter, cdc_handle: H, concurrency_limit: Arc, @@ -384,40 +389,44 @@ where let mut events = ApplyEvents::with_capacity(1024, region.id); // Note: the call of `fill_entries` is the only step which would read the disk. // we only need to record the disk throughput of this. - let (stat, disk_read) = - utils::with_record_read_throughput(|| event_loader.fill_entries()); - // We must use the size of entry batch here to check whether we have progress. - // Or we may exit too early if there are only records: - // - can be inlined to `write` CF (hence it won't be written to default CF) - // - are prewritten. (hence it will only contains `Prewrite` records). - // In this condition, ALL records generate no ApplyEvent(only lock change), - // and we would exit after the first run of loop :( - let no_progress = event_loader.entry_batch.is_empty(); - let stat = stat?; + let mut allocated = OwnedAllocated::new(Arc::clone(&self.quota)); + let (res, disk_read) = + utils::with_record_read_throughput(|| event_loader.fill_entries(&mut allocated)); + let res = res?; self.with_resolver(region, handle, |r| { event_loader.emit_entries_to(&mut events, r) })?; - if no_progress { - metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); - return Ok(stats.stat); - } - stats.add_statistics(&stat); + stats.add_statistics(&res.statistics); let region_id = region.get_id(); let sink = self.sink.clone(); let event_size = events.size(); let sched = self.scheduler.clone(); - let permit = self.quota.pending(event_size).await; self.limit.consume(disk_read as _).await; debug!("sending events to router"; "size" => %event_size, "region" => %region_id); metrics::INCREMENTAL_SCAN_SIZE.observe(event_size as f64); metrics::INCREMENTAL_SCAN_DISK_READ.inc_by(disk_read as f64); metrics::HEAP_MEMORY.add(event_size as _); + fail::fail_point!("scan_and_async_send::about_to_consume"); join_handles.push(tokio::spawn(async move { utils::handle_on_event_result(&sched, sink.on_events(events).await); metrics::HEAP_MEMORY.sub(event_size as _); + drop(allocated); debug!("apply event done"; "size" => %event_size, "region" => %region_id); - drop(permit); })); + if !res.more { + metrics::INITIAL_SCAN_DURATION.observe(start.saturating_elapsed_secs()); + return Ok(stats.stat); + } + if res.out_of_memory { + futures::future::try_join_all(join_handles.drain(..)) + .await + .map_err(|err| { + annotate!( + err, + "failed to join tokio runtime during out-of-memory-quota" + ) + })?; + } } } @@ -465,10 +474,13 @@ where #[cfg(test)] mod tests { + use std::sync::Arc; + use futures::executor::block_on; use kvproto::metapb::*; use tikv::storage::{txn::tests::*, TestEngineBuilder}; use tikv_kv::SnapContext; + use tikv_util::memory::{MemoryQuota, OwnedAllocated}; use txn_types::TimeStamp; use super::EventLoader; @@ -498,10 +510,13 @@ mod tests { let snap = block_on(async { tikv_kv::snapshot(&mut engine, SnapContext::default()).await }) .unwrap(); + let quota_inf = Arc::new(MemoryQuota::new(usize::MAX)); let mut loader = EventLoader::load_from(snap, TimeStamp::zero(), TimeStamp::max(), &r).unwrap(); - let (r, data_load) = with_record_read_throughput(|| loader.fill_entries()); + let (r, data_load) = with_record_read_throughput(|| { + loader.fill_entries(&mut OwnedAllocated::new(quota_inf)) + }); r.unwrap(); let mut events = ApplyEvents::with_capacity(1024, 42); let mut res = TwoPhaseResolver::new(42, None); diff --git a/components/backup-stream/src/lib.rs b/components/backup-stream/src/lib.rs index 3d4690d7f48..0402e5d2ee3 100644 --- a/components/backup-stream/src/lib.rs +++ b/components/backup-stream/src/lib.rs @@ -10,7 +10,7 @@ mod endpoint; pub mod errors; mod event_loader; pub mod metadata; -pub(crate) mod metrics; +pub mod metrics; pub mod observer; pub mod router; mod service; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 05f1a053392..849a503e21b 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -539,6 +539,15 @@ impl RouterInner { let task_info = self.get_task_info(&task).await?; task_info.on_events(events).await?; let file_size_limit = self.temp_file_size_limit.load(Ordering::SeqCst); + #[cfg(features = "failpoints")] + { + let delayed = (|| { + fail::fail_point!("router_on_event_delay_ms", |v| { + v.and_then(|v| v.parse::().ok()).unwrap_or(0) + }) + })(); + tokio::time::sleep(Duration::from_millis(delayed)).await; + } // When this event make the size of temporary files exceeds the size limit, make // a flush. Note that we only flush if the size is less than the limit before diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index ff9b9f82ba1..8dfc21529e4 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -9,7 +9,13 @@ pub use suite::*; mod all { - use std::time::Duration; + use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::Duration, + }; use backup_stream::{ metadata::{ @@ -19,7 +25,7 @@ mod all { GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; use futures::executor::block_on; - use tikv_util::defer; + use tikv_util::{config::ReadableSize, defer}; use super::{ make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, @@ -30,7 +36,7 @@ mod all { let mut suite = SuiteBuilder::new_named("basic").build(); fail::cfg("try_start_observe", "1*return").unwrap(); - run_async_test(async { + let (round1, round2) = run_async_test(async { // write data before the task starting, for testing incremental scanning. let round1 = suite.write_records(0, 128, 1).await; suite.must_register_task(1, "test_basic"); @@ -38,13 +44,13 @@ mod all { let round2 = suite.write_records(256, 128, 1).await; suite.force_flush_files("test_basic"); suite.wait_for_flush(); - suite - .check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ) - .await; + (round1, round2) }); + suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ); + suite.cluster.shutdown(); } #[test] @@ -97,10 +103,10 @@ mod all { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("region_failure"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - )); + ); } #[test] fn initial_scan_failure() { @@ -121,10 +127,10 @@ mod all { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("initial_scan_failure"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - )); + ); } #[test] fn failed_during_refresh_region() { @@ -147,10 +153,10 @@ mod all { let keys2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("fail_to_refresh_region"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), keys.union(&keys2).map(|s| s.as_slice()), - )); + ); let leader = suite.cluster.leader_of_region(1).unwrap().store_id; let (tx, rx) = std::sync::mpsc::channel(); suite.endpoints[&leader] @@ -212,12 +218,7 @@ mod all { let items = run_async_test(suite.write_records(0, 128, 1)); suite.force_flush_files("retry_abort"); suite.wait_for_flush(); - run_async_test( - suite.check_for_write_records( - suite.flushed_files.path(), - items.iter().map(Vec::as_slice), - ), - ); + suite.check_for_write_records(suite.flushed_files.path(), items.iter().map(Vec::as_slice)); } #[test] fn failure_and_split() { @@ -240,12 +241,42 @@ mod all { let round2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("failure_and_split"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(Vec::as_slice), - )); + ); let cp = suite.global_checkpoint(); assert!(cp > 512, "it is {}", cp); suite.cluster.shutdown(); } + + #[test] + fn memory_quota() { + let mut suite = SuiteBuilder::new_named("memory_quota") + .cfg(|cfg| cfg.initial_scan_pending_memory_quota = ReadableSize::kb(2)) + .build(); + let keys = run_async_test(suite.write_records(0, 128, 1)); + let failed = Arc::new(AtomicBool::new(false)); + fail::cfg("router_on_event_delay_ms", "6*return(1000)").unwrap(); + fail::cfg_callback("scan_and_async_send::about_to_consume", { + let failed = failed.clone(); + move || { + let v = backup_stream::metrics::HEAP_MEMORY.get(); + // Not greater than max key length * concurrent initial scan number. + if v > 4096 * 6 { + println!("[[ FAILED ]] The memory usage is {v} which exceeds the quota"); + failed.store(true, Ordering::SeqCst); + } + } + }) + .unwrap(); + suite.must_register_task(1, "memory_quota"); + suite.force_flush_files("memory_quota"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys.iter().map(|v| v.as_slice()), + ); + assert!(!failed.load(Ordering::SeqCst)); + } } diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index 79a756f684d..395159060c1 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -28,20 +28,19 @@ mod all { #[test] fn with_split() { let mut suite = SuiteBuilder::new_named("with_split").build(); - run_async_test(async { + let (round1, round2) = run_async_test(async { let round1 = suite.write_records(0, 128, 1).await; suite.must_split(&make_split_key_at_record(1, 42)); suite.must_register_task(1, "test_with_split"); let round2 = suite.write_records(256, 128, 1).await; - suite.force_flush_files("test_with_split"); - suite.wait_for_flush(); - suite - .check_for_write_records( - suite.flushed_files.path(), - round1.union(&round2).map(Vec::as_slice), - ) - .await; + (round1, round2) }); + suite.force_flush_files("test_with_split"); + suite.wait_for_flush(); + suite.check_for_write_records( + suite.flushed_files.path(), + round1.union(&round2).map(Vec::as_slice), + ); suite.cluster.shutdown(); } @@ -63,7 +62,7 @@ mod all { #[test] fn with_split_txn() { let mut suite = SuiteBuilder::new_named("split_txn").build(); - run_async_test(async { + let (commit_ts, start_ts, keys) = run_async_test(async { let start_ts = suite.cluster.pd_client.get_tso().await.unwrap(); let keys = (1..1960).map(|i| make_record_key(1, i)).collect::>(); suite.must_kv_prewrite( @@ -76,26 +75,25 @@ mod all { start_ts, ); let commit_ts = suite.cluster.pd_client.get_tso().await.unwrap(); - suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); - suite.must_register_task(1, "test_split_txn"); - suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); - suite.force_flush_files("test_split_txn"); - suite.wait_for_flush(); - let keys_encoded = keys - .iter() - .map(|v| { - Key::from_raw(v.as_slice()) - .append_ts(commit_ts) - .into_encoded() - }) - .collect::>(); - suite - .check_for_write_records( - suite.flushed_files.path(), - keys_encoded.iter().map(Vec::as_slice), - ) - .await; + (commit_ts, start_ts, keys) }); + suite.commit_keys(keys[1913..].to_vec(), start_ts, commit_ts); + suite.must_register_task(1, "test_split_txn"); + suite.commit_keys(keys[..1913].to_vec(), start_ts, commit_ts); + suite.force_flush_files("test_split_txn"); + suite.wait_for_flush(); + let keys_encoded = keys + .iter() + .map(|v| { + Key::from_raw(v.as_slice()) + .append_ts(commit_ts) + .into_encoded() + }) + .collect::>(); + suite.check_for_write_records( + suite.flushed_files.path(), + keys_encoded.iter().map(Vec::as_slice), + ); suite.cluster.shutdown(); } @@ -111,10 +109,10 @@ mod all { let round2 = run_async_test(suite.write_records(256, 128, 1)); suite.force_flush_files("test_leader_down"); suite.wait_for_flush(); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(Vec::as_slice), - )); + ); suite.cluster.shutdown(); } @@ -346,10 +344,10 @@ mod all { } assert_eq!(items.last().unwrap().end_key, Vec::::default()); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(|x| x.as_slice()), - )); + ); } #[test] @@ -373,18 +371,18 @@ mod all { .unwrap(); suite.sync(); std::thread::sleep(Duration::from_secs(2)); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.iter().map(|x| x.as_slice()), - )); + ); assert!(suite.global_checkpoint() > 256); suite.force_flush_files("r"); suite.wait_for_flush(); assert!(suite.global_checkpoint() > 512); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.union(&round2).map(|x| x.as_slice()), - )); + ); } #[test] @@ -426,10 +424,10 @@ mod all { ts, cps ); - run_async_test(suite.check_for_write_records( + suite.check_for_write_records( suite.flushed_files.path(), round1.iter().map(|k| k.as_slice()), - )) + ) } #[test] diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 41a57f5858b..0e4038d07a0 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -2,7 +2,8 @@ use std::{ collections::{HashMap, HashSet}, - path::Path, + fmt::Display, + path::{Path, PathBuf}, sync::Arc, time::Duration, }; @@ -30,7 +31,6 @@ use kvproto::{ tikvpb::*, }; use pd_client::PdClient; -use protobuf::parse_from_bytes; use raftstore::{router::CdcRaftRouter, RegionInfoAccessor}; use resolved_ts::LeadershipResolver; use tempdir::TempDir; @@ -43,13 +43,25 @@ use tikv_util::{ number::NumberEncoder, stream_event::{EventIterator, Iterator}, }, - info, + debug, info, worker::LazyWorker, HandyRwLock, }; use txn_types::{Key, TimeStamp, WriteRef}; use walkdir::WalkDir; +#[derive(Debug)] +pub struct FileSegments { + path: PathBuf, + segments: Vec<(usize, usize)>, +} + +#[derive(Default, Debug)] +pub struct LogFiles { + default_cf: Vec, + write_cf: Vec, +} + pub type TestEndpoint = Endpoint< ErrorStore, RegionInfoAccessor, @@ -453,7 +465,12 @@ impl Suite { for ts in (from..(from + n)).map(|x| x * 2) { let ts = ts as u64; let key = make_record_key(for_table, ts); - let muts = vec![mutation(key.clone(), b"hello, world".to_vec())]; + let value = if ts % 4 == 0 { + b"hello, world".to_vec() + } else { + [0xdd; 4096].to_vec() + }; + let muts = vec![mutation(key.clone(), value)]; let enc_key = Key::from_raw(&key).into_encoded(); let region = self.cluster.get_region_id(&enc_key); let start_ts = self.cluster.pd_client.get_tso().await.unwrap(); @@ -510,45 +527,53 @@ impl Suite { } } - pub fn load_metadata_for_write_records( - &self, - path: &Path, - ) -> HashMap> { - let mut meta_map: HashMap> = HashMap::new(); - for entry in WalkDir::new(path) { - let entry = entry.unwrap(); - if entry.file_type().is_file() - && entry - .file_name() - .to_str() - .map_or(false, |s| s.ends_with(".meta")) - { - let content = std::fs::read(entry.path()).unwrap(); - let meta = parse_from_bytes::(content.as_ref()).unwrap(); - for g in meta.file_groups.into_iter() { - let path = g.path.split('/').last().unwrap(); - for f in g.data_files_info.into_iter() { - let file_info = meta_map.get_mut(path); - if let Some(v) = file_info { - v.push(( - f.range_offset as usize, - (f.range_offset + f.range_length) as usize, - )); + pub fn get_files_to_check(&self, path: &Path) -> std::io::Result { + let mut res = LogFiles::default(); + for entry in WalkDir::new(path.join("v1/backupmeta")) { + let entry = entry?; + println!("reading {}", entry.path().display()); + if entry.file_name().to_str().unwrap().ends_with(".meta") { + let content = std::fs::read(entry.path())?; + let meta = protobuf::parse_from_bytes::(&content)?; + for fg in meta.get_file_groups() { + let mut default_segs = vec![]; + let mut write_segs = vec![]; + for file in fg.get_data_files_info() { + let v = if file.cf == "default" || file.cf.is_empty() { + Some(&mut default_segs) + } else if file.cf == "write" { + Some(&mut write_segs) } else { - let v = vec![( - f.range_offset as usize, - (f.range_offset + f.range_length) as usize, - )]; - meta_map.insert(String::from(path), v); - } + None + }; + v.into_iter().for_each(|v| { + v.push(( + file.get_range_offset() as usize, + (file.get_range_offset() + file.get_range_length()) as usize, + )) + }); + } + let p = path.join(fg.get_path()); + if !default_segs.is_empty() { + res.default_cf.push(FileSegments { + path: p.clone(), + segments: default_segs, + }) + } + if !write_segs.is_empty() { + res.write_cf.push(FileSegments { + path: p, + segments: write_segs, + }) } } } } - meta_map + Ok(res) } - pub async fn check_for_write_records<'a>( + #[track_caller] + pub fn check_for_write_records<'a>( &self, path: &Path, key_set: impl std::iter::Iterator, @@ -557,45 +582,72 @@ impl Suite { let n = remain_keys.len(); let mut extra_key = 0; let mut extra_len = 0; - let meta_map = self.load_metadata_for_write_records(path); - for entry in WalkDir::new(path) { - let entry = entry.unwrap(); - println!("checking: {:?}", entry); - if entry.file_type().is_file() - && entry - .file_name() - .to_str() - .map_or(false, |s| s.ends_with(".log")) - { - let buf = std::fs::read(entry.path()).unwrap(); - let file_infos = meta_map.get(entry.file_name().to_str().unwrap()).unwrap(); - for &file_info in file_infos { - let mut decoder = ZstdDecoder::new(Vec::new()); - let pbuf: &[u8] = &buf[file_info.0..file_info.1]; - decoder.write_all(pbuf).await.unwrap(); - decoder.flush().await.unwrap(); - decoder.close().await.unwrap(); - let content = decoder.into_inner(); - - let mut iter = EventIterator::new(&content); - loop { - if !iter.valid() { - break; - } - iter.next().unwrap(); - if !remain_keys.remove(iter.key()) { - extra_key += 1; - extra_len += iter.key().len() + iter.value().len(); - } + let files = self.get_files_to_check(path).unwrap_or_default(); + let mut default_keys = HashSet::new(); + let content_of = |buf: &[u8], range: (usize, usize)| { + let mut decoder = ZstdDecoder::new(Vec::new()); + let pbuf: &[u8] = &buf[range.0..range.1]; + run_async_test(async { + decoder.write_all(pbuf).await.unwrap(); + decoder.flush().await.unwrap(); + decoder.close().await.unwrap(); + }); + decoder.into_inner() + }; + for entry in files.write_cf { + debug!("checking write: {:?}", entry); + + let buf = std::fs::read(&entry.path).unwrap(); + for &file_info in entry.segments.iter() { + let data = content_of(&buf, file_info); + let mut iter = EventIterator::new(&data); + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + if !remain_keys.remove(iter.key()) { + extra_key += 1; + extra_len += iter.key().len() + iter.value().len(); + } + + let value = iter.value(); + let wf = WriteRef::parse(value).unwrap(); + if wf.short_value.is_none() { + let mut key = Key::from_encoded_slice(iter.key()).truncate_ts().unwrap(); + key.append_ts_inplace(wf.start_ts); - let value = iter.value(); - let wf = WriteRef::parse(value).unwrap(); + default_keys.insert(key.into_encoded()); + } else { assert_eq!(wf.short_value, Some(b"hello, world" as &[u8])); } } } } + for entry in files.default_cf { + debug!("checking default: {:?}", entry); + + let buf = std::fs::read(&entry.path).unwrap(); + for &file_info in entry.segments.iter() { + let data = content_of(&buf, file_info); + let mut iter = EventIterator::new(&data); + loop { + if !iter.valid() { + break; + } + iter.next().unwrap(); + if !default_keys.remove(iter.key()) { + extra_key += 1; + extra_len += iter.key().len() + iter.value().len(); + } + + let value = iter.value(); + assert_eq!(value, &[0xdd; 4096]); + } + } + } + if extra_key != 0 { println!( "check_for_write_records of “{}”: extra {} keys ({:.02}% of recorded keys), extra {} bytes.", @@ -605,17 +657,19 @@ impl Suite { extra_len ) } - if !remain_keys.is_empty() { - panic!( - "not all keys are recorded: it remains {:?} (total = {})", - remain_keys - .iter() - .take(3) - .map(|v| hex::encode(v)) - .collect::>(), - remain_keys.len() - ); - } + assert_empty(&remain_keys, "not all keys are recorded"); + assert_empty(&default_keys, "some keys don't have default entry"); + } +} + +#[track_caller] +fn assert_empty(v: &HashSet>, msg: impl Display) { + if !v.is_empty() { + panic!( + "{msg}: it remains {:?}... (total = {})", + v.iter().take(3).map(|v| hex::encode(v)).collect::>(), + v.len() + ); } } diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 291254c5227..15ffece4425 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -2,7 +2,10 @@ use std::{ mem, - sync::atomic::{AtomicUsize, Ordering}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, }; use kvproto::{ @@ -87,6 +90,32 @@ pub struct MemoryQuota { capacity: AtomicUsize, } +pub struct OwnedAllocated { + allocated: usize, + from: Arc, +} + +impl OwnedAllocated { + pub fn new(target: Arc) -> Self { + Self { + allocated: 0, + from: target, + } + } + + pub fn alloc(&mut self, bytes: usize) -> Result<(), MemoryQuotaExceeded> { + self.from.alloc(bytes)?; + self.allocated += bytes; + Ok(()) + } +} + +impl Drop for OwnedAllocated { + fn drop(&mut self) { + self.from.free(self.allocated) + } +} + impl MemoryQuota { pub fn new(capacity: usize) -> MemoryQuota { MemoryQuota { @@ -182,4 +211,25 @@ mod tests { quota.alloc(40).unwrap(); assert_eq!(quota.in_use(), 50); } + + #[test] + fn test_allocated() { + let quota = Arc::new(MemoryQuota::new(100)); + let mut allocated = OwnedAllocated::new(Arc::clone("a)); + allocated.alloc(42).unwrap(); + assert_eq!(quota.in_use(), 42); + quota.alloc(59).unwrap_err(); + allocated.alloc(16).unwrap(); + assert_eq!(quota.in_use(), 58); + let mut allocated2 = OwnedAllocated::new(Arc::clone("a)); + allocated2.alloc(8).unwrap(); + allocated2.alloc(40).unwrap_err(); + assert_eq!(quota.in_use(), 66); + quota.alloc(4).unwrap(); + assert_eq!(quota.in_use(), 70); + drop(allocated); + assert_eq!(quota.in_use(), 12); + drop(allocated2); + assert_eq!(quota.in_use(), 4); + } } From 7fc3684c91f9a40ca351fc8a1c894871ad926f92 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 7 Nov 2023 14:54:41 -0600 Subject: [PATCH 0996/1149] raftstore: enhance split check (#15900) close tikv/tikv#15863 Signed-off-by: tonyxuqqi --- components/raftstore-v2/src/router/imp.rs | 14 +- .../raftstore/src/coprocessor/dispatcher.rs | 26 ++- .../src/coprocessor/split_check/keys.rs | 56 +++++-- .../src/coprocessor/split_check/size.rs | 43 ++++- components/raftstore/src/router.rs | 8 +- components/raftstore/src/store/fsm/peer.rs | 109 +++++++------ components/raftstore/src/store/msg.rs | 22 ++- components/raftstore/src/store/peer.rs | 150 ++++++++++++++---- .../raftstore/src/store/worker/split_check.rs | 32 +++- components/test_raftstore/src/util.rs | 40 ++++- components/tikv_util/src/log.rs | 12 ++ tests/failpoints/cases/test_split_region.rs | 62 ++++++++ .../raftstore/test_split_region.rs | 2 +- 13 files changed, 457 insertions(+), 119 deletions(-) diff --git a/components/raftstore-v2/src/router/imp.rs b/components/raftstore-v2/src/router/imp.rs index 23a8a3c7d4e..e7a63f6d48f 100644 --- a/components/raftstore-v2/src/router/imp.rs +++ b/components/raftstore-v2/src/router/imp.rs @@ -43,12 +43,18 @@ impl AsyncReadNotifier for StoreRouter { } impl raftstore::coprocessor::StoreHandle for StoreRouter { - fn update_approximate_size(&self, region_id: u64, size: u64) { - let _ = self.send(region_id, PeerMsg::UpdateRegionSize { size }); + // TODO: add splitable logic in raftstore-v2 + fn update_approximate_size(&self, region_id: u64, size: Option, _may_split: Option) { + if let Some(size) = size { + let _ = self.send(region_id, PeerMsg::UpdateRegionSize { size }); + } } - fn update_approximate_keys(&self, region_id: u64, keys: u64) { - let _ = self.send(region_id, PeerMsg::UpdateRegionKeys { keys }); + // TODO: add splitable logic in raftstore-v2 + fn update_approximate_keys(&self, region_id: u64, keys: Option, _may_split: Option) { + if let Some(keys) = keys { + let _ = self.send(region_id, PeerMsg::UpdateRegionKeys { keys }); + } } fn ask_split( diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index d082013cd2c..c7d6731d3e9 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -19,8 +19,8 @@ use crate::store::BucketRange; /// A handle for coprocessor to schedule some command back to raftstore. pub trait StoreHandle: Clone + Send { - fn update_approximate_size(&self, region_id: u64, size: u64); - fn update_approximate_keys(&self, region_id: u64, keys: u64); + fn update_approximate_size(&self, region_id: u64, size: Option, splitable: Option); + fn update_approximate_keys(&self, region_id: u64, keys: Option, splitable: Option); fn ask_split( &self, region_id: u64, @@ -48,11 +48,13 @@ pub trait StoreHandle: Clone + Send { pub enum SchedTask { UpdateApproximateSize { region_id: u64, - size: u64, + splitable: Option, + size: Option, }, UpdateApproximateKeys { region_id: u64, - keys: u64, + splitable: Option, + keys: Option, }, AskSplit { region_id: u64, @@ -75,12 +77,20 @@ pub enum SchedTask { } impl StoreHandle for std::sync::mpsc::SyncSender { - fn update_approximate_size(&self, region_id: u64, size: u64) { - let _ = self.try_send(SchedTask::UpdateApproximateSize { region_id, size }); + fn update_approximate_size(&self, region_id: u64, size: Option, splitable: Option) { + let _ = self.try_send(SchedTask::UpdateApproximateSize { + region_id, + splitable, + size, + }); } - fn update_approximate_keys(&self, region_id: u64, keys: u64) { - let _ = self.try_send(SchedTask::UpdateApproximateKeys { region_id, keys }); + fn update_approximate_keys(&self, region_id: u64, keys: Option, splitable: Option) { + let _ = self.try_send(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + keys, + }); } fn ask_split( diff --git a/components/raftstore/src/coprocessor/split_check/keys.rs b/components/raftstore/src/coprocessor/split_check/keys.rs index 2c0e71dd8cb..d6a49175441 100644 --- a/components/raftstore/src/coprocessor/split_check/keys.rs +++ b/components/raftstore/src/coprocessor/split_check/keys.rs @@ -157,9 +157,11 @@ impl SplitCheckObserver for KeysCheckObserver } }; - self.router.update_approximate_keys(region_id, region_keys); + self.router + .update_approximate_keys(region_id, Some(region_keys), None); REGION_KEYS_HISTOGRAM.observe(region_keys as f64); + // if bucket checker using scan is added, to utilize the scan, // add keys checker as well for free // It has the assumption that the size's checker is before the keys's check in @@ -299,12 +301,28 @@ mod tests { None, )); // keys has not reached the max_keys 100 yet. - match rx.try_recv() { - Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { - assert_eq!(region_id, region.get_id()); + let mut recv_cnt = 0; + loop { + match rx.try_recv() { + Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, region.get_id()); + assert!(splitable.is_none()); + recv_cnt += 1; + if recv_cnt == 2 { + break; + } + } + others => panic!("expect recv empty, but got {:?}", others), } - others => panic!("expect recv empty, but got {:?}", others), } put_data(&engine, 90, 160, true); @@ -403,12 +421,28 @@ mod tests { None, )); // keys has not reached the max_keys 100 yet. - match rx.try_recv() { - Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) => { - assert_eq!(region_id, region.get_id()); + let mut recv_cnt = 0; + loop { + match rx.try_recv() { + Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, region.get_id()); + assert!(splitable.is_none()); + recv_cnt += 1; + if recv_cnt == 2 { + break; + } + } + others => panic!("expect recv empty, but got {:?}", others), } - others => panic!("expect recv empty, but got {:?}", others), } put_data(&engine, 90, 160, true); diff --git a/components/raftstore/src/coprocessor/split_check/size.rs b/components/raftstore/src/coprocessor/split_check/size.rs index 4b320bef1b6..e5048a83826 100644 --- a/components/raftstore/src/coprocessor/split_check/size.rs +++ b/components/raftstore/src/coprocessor/split_check/size.rs @@ -158,13 +158,14 @@ impl SplitCheckObserver for SizeCheckObserver }; // send it to raftstore to update region approximate size - self.router.update_approximate_size(region_id, region_size); + self.router + .update_approximate_size(region_id, Some(region_size), None); + let need_split_region = region_size >= host.cfg.region_max_size().0; let need_bucket_checker = host.cfg.enable_region_bucket() && region_size >= 2 * host.cfg.region_bucket_size.0; REGION_SIZE_HISTOGRAM.observe(region_size as f64); - let need_split_region = region_size >= host.cfg.region_max_size().0; if need_split_region || need_bucket_checker { // when it's a large region use approximate way to produce split keys if need_split_region { @@ -265,11 +266,23 @@ pub mod tests { exp_split_keys: Vec>, ignore_split_keys: bool, ) { + let mut split = false; loop { match rx.try_recv() { - Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) - | Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { + Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, exp_region.get_id()); + split = split || splitable.unwrap_or(false); + } + Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { assert_eq!(region_id, exp_region.get_id()); } Ok(SchedTask::AskSplit { @@ -283,6 +296,7 @@ pub mod tests { if !ignore_split_keys { assert_eq!(split_keys, exp_split_keys); } + assert!(split); break; } others => panic!("expect split check result, but got {:?}", others), @@ -303,11 +317,23 @@ pub mod tests { exp_region: &Region, exp_split_keys_count: usize, ) { + let mut split = false; loop { match rx.try_recv() { - Ok(SchedTask::UpdateApproximateSize { region_id, .. }) - | Ok(SchedTask::UpdateApproximateKeys { region_id, .. }) - | Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { + Ok(SchedTask::UpdateApproximateSize { + region_id, + splitable, + .. + }) + | Ok(SchedTask::UpdateApproximateKeys { + region_id, + splitable, + .. + }) => { + assert_eq!(region_id, exp_region.get_id()); + split = split || splitable.unwrap_or(false); + } + Ok(SchedTask::RefreshRegionBuckets { region_id, .. }) => { assert_eq!(region_id, exp_region.get_id()); } Ok(SchedTask::AskSplit { @@ -319,6 +345,7 @@ pub mod tests { assert_eq!(region_id, exp_region.get_id()); assert_eq!(®ion_epoch, exp_region.get_region_epoch()); assert_eq!(split_keys.len(), exp_split_keys_count); + assert!(split); break; } others => panic!("expect split check result, but got {:?}", others), diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index 77d3a35e306..fd50357fa38 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -289,11 +289,11 @@ impl RaftStoreRouter for RaftRouter { // duplicated codes. impl crate::coprocessor::StoreHandle for RaftRouter { - fn update_approximate_size(&self, region_id: u64, size: u64) { + fn update_approximate_size(&self, region_id: u64, size: Option, splitable: Option) { if let Err(e) = CasualRouter::send( self, region_id, - CasualMessage::RegionApproximateSize { size }, + CasualMessage::RegionApproximateSize { size, splitable }, ) { warn!( "failed to send approximate region size"; @@ -304,11 +304,11 @@ impl crate::coprocessor::StoreHandle for RaftRoute } } - fn update_approximate_keys(&self, region_id: u64, keys: u64) { + fn update_approximate_keys(&self, region_id: u64, keys: Option, splitable: Option) { if let Err(e) = CasualRouter::send( self, region_id, - CasualMessage::RegionApproximateKeys { keys }, + CasualMessage::RegionApproximateKeys { keys, splitable }, ) { warn!( "failed to send approximate region keys"; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 5a7223dcaa3..03e31938aa0 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -51,7 +51,7 @@ use raft::{ use smallvec::SmallVec; use tikv_alloc::trace::TraceEvent; use tikv_util::{ - box_err, debug, defer, error, escape, info, is_zero_duration, + box_err, debug, defer, error, escape, info, info_or_debug, is_zero_duration, mpsc::{self, LooseBoundedSender, Receiver}, store::{find_peer, find_peer_by_id, is_learner, region_on_same_stores}, sys::disk::DiskUsage, @@ -1085,11 +1085,11 @@ where } => { self.on_hash_computed(index, context, hash); } - CasualMessage::RegionApproximateSize { size } => { - self.on_approximate_region_size(size); + CasualMessage::RegionApproximateSize { size, splitable } => { + self.on_approximate_region_size(size, splitable); } - CasualMessage::RegionApproximateKeys { keys } => { - self.on_approximate_region_keys(keys); + CasualMessage::RegionApproximateKeys { keys, splitable } => { + self.on_approximate_region_keys(keys, splitable); } CasualMessage::RefreshRegionBuckets { region_epoch, @@ -1367,9 +1367,7 @@ where } fn on_clear_region_size(&mut self) { - self.fsm.peer.approximate_size = None; - self.fsm.peer.approximate_keys = None; - self.fsm.peer.may_skip_split_check = false; + self.fsm.peer.split_check_trigger.on_clear_region_size(); self.register_split_region_check_tick(); } @@ -4113,8 +4111,18 @@ where // if share_source_region_size is true, it means the new region contains any // data from the origin region if share_source_region_size { - share_size = self.fsm.peer.approximate_size.map(|v| v / new_region_count); - share_keys = self.fsm.peer.approximate_keys.map(|v| v / new_region_count); + share_size = self + .fsm + .peer + .split_check_trigger + .approximate_size + .map(|v| v / new_region_count); + share_keys = self + .fsm + .peer + .split_check_trigger + .approximate_keys + .map(|v| v / new_region_count); } let mut meta = self.ctx.store_meta.lock().unwrap(); @@ -4126,14 +4134,11 @@ where ); self.fsm.peer.post_split(); - // It's not correct anymore, so set it to false to schedule a split check task. - self.fsm.peer.may_skip_split_check = false; - let is_leader = self.fsm.peer.is_leader(); if is_leader { if share_source_region_size { - self.fsm.peer.approximate_size = share_size; - self.fsm.peer.approximate_keys = share_keys; + self.fsm.peer.split_check_trigger.approximate_size = share_size; + self.fsm.peer.split_check_trigger.approximate_keys = share_keys; } self.fsm.peer.heartbeat_pd(self.ctx); // Notify pd immediately to let it update the region meta. @@ -4162,7 +4167,6 @@ where if meta.region_ranges.remove(&last_key).is_none() { panic!("{} original region should exist", self.fsm.peer.tag); } - let last_region_id = regions.last().unwrap().get_id(); for (new_region, locks) in regions.into_iter().zip(region_locks) { let new_region_id = new_region.get_id(); @@ -4269,8 +4273,8 @@ where new_peer.has_ready |= campaigned; if is_leader { - new_peer.peer.approximate_size = share_size; - new_peer.peer.approximate_keys = share_keys; + new_peer.peer.split_check_trigger.approximate_size = share_size; + new_peer.peer.split_check_trigger.approximate_keys = share_keys; *new_peer.peer.txn_ext.pessimistic_locks.write() = locks; // The new peer is likely to become leader, send a heartbeat immediately to // reduce client query miss. @@ -4288,11 +4292,6 @@ where .insert(new_region_id, ReadDelegate::from_peer(new_peer.get_peer())); meta.region_read_progress .insert(new_region_id, new_peer.peer.read_progress.clone()); - if last_region_id == new_region_id { - // To prevent from big region, the right region needs run split - // check again after split. - new_peer.peer.size_diff_hint = self.ctx.cfg.region_split_check_diff().0; - } let mailbox = BasicMailbox::new(sender, new_peer, self.ctx.router.state_cnt().clone()); self.ctx.router.register(new_region_id, mailbox); self.ctx @@ -4787,7 +4786,7 @@ where // make approximate size and keys updated in time. // the reason why follower need to update is that there is a issue that after // merge and then transfer leader, the new leader may have stale size and keys. - self.fsm.peer.size_diff_hint = self.ctx.cfg.region_split_check_diff().0; + self.fsm.peer.split_check_trigger.reset_skip_check(); self.fsm.peer.reset_region_buckets(); if self.fsm.peer.is_leader() { info!( @@ -5248,6 +5247,14 @@ where &mut self, msg: &RaftCmdRequest, ) -> Result> { + // failpoint + fail_point!( + "fail_pre_propose_split", + msg.has_admin_request() + && msg.get_admin_request().get_cmd_type() == AdminCmdType::BatchSplit, + |_| Err(Error::Other(box_err!("fail_point"))) + ); + // Check store_id, make sure that the msg is dispatched to the right place. if let Err(e) = util::check_store_id(msg.get_header(), self.store_id()) { self.ctx @@ -5472,7 +5479,10 @@ where return; } Err(e) => { - debug!( + // log for admin requests + let is_admin_request = msg.has_admin_request(); + info_or_debug!( + is_admin_request; "failed to propose"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), @@ -5840,9 +5850,11 @@ where // whether the region should split. // We assume that `may_skip_split_check` is only set true after the split check // task is scheduled. - if self.fsm.peer.may_skip_split_check - && self.fsm.peer.compaction_declined_bytes < self.ctx.cfg.region_split_check_diff().0 - && self.fsm.peer.size_diff_hint < self.ctx.cfg.region_split_check_diff().0 + if self + .fsm + .peer + .split_check_trigger + .should_skip(self.ctx.cfg.region_split_check_diff().0) { return; } @@ -5857,6 +5869,11 @@ where return; } + // To avoid run the check if it's splitting. + if self.fsm.peer.is_splitting() { + return; + } + // When Lightning or BR is importing data to TiKV, their ingest-request may fail // because of region-epoch not matched. So we hope TiKV do not check region size // and split region during importing. @@ -5895,10 +5912,7 @@ where ); return; } - self.fsm.peer.size_diff_hint = 0; - self.fsm.peer.compaction_declined_bytes = 0; - // the task is scheduled, next tick may skip it. - self.fsm.peer.may_skip_split_check = true; + self.fsm.peer.split_check_trigger.post_triggered(); } fn on_prepare_split_region( @@ -5974,15 +5988,21 @@ where } } - fn on_approximate_region_size(&mut self, size: u64) { - self.fsm.peer.approximate_size = Some(size); + fn on_approximate_region_size(&mut self, size: Option, splitable: Option) { + self.fsm + .peer + .split_check_trigger + .on_approximate_region_size(size, splitable); self.register_split_region_check_tick(); self.register_pd_heartbeat_tick(); fail_point!("on_approximate_region_size"); } - fn on_approximate_region_keys(&mut self, keys: u64) { - self.fsm.peer.approximate_keys = Some(keys); + fn on_approximate_region_keys(&mut self, keys: Option, splitable: Option) { + self.fsm + .peer + .split_check_trigger + .on_approximate_region_keys(keys, splitable); self.register_split_region_check_tick(); self.register_pd_heartbeat_tick(); } @@ -6130,8 +6150,10 @@ where } fn on_compaction_declined_bytes(&mut self, declined_bytes: u64) { - self.fsm.peer.compaction_declined_bytes += declined_bytes; - if self.fsm.peer.compaction_declined_bytes >= self.ctx.cfg.region_split_check_diff().0 { + self.fsm.peer.split_check_trigger.compaction_declined_bytes += declined_bytes; + if self.fsm.peer.split_check_trigger.compaction_declined_bytes + >= self.ctx.cfg.region_split_check_diff().0 + { UPDATE_REGION_SIZE_BY_COMPACTION_COUNTER.inc(); } self.register_split_region_check_tick(); @@ -6536,17 +6558,14 @@ where size += sst.total_bytes; keys += sst.total_kvs; } - self.fsm.peer.approximate_size = - Some(self.fsm.peer.approximate_size.unwrap_or_default() + size); - self.fsm.peer.approximate_keys = - Some(self.fsm.peer.approximate_keys.unwrap_or_default() + keys); + self.fsm + .peer + .split_check_trigger + .on_ingest_sst_result(size, keys); if let Some(buckets) = &mut self.fsm.peer.region_buckets_info_mut().bucket_stat_mut() { buckets.ingest_sst(keys, size); } - // The ingested file may be overlapped with the data in engine, so we need to - // check it again to get the accurate value. - self.fsm.peer.may_skip_split_check = false; if self.fsm.peer.is_leader() { self.on_pd_heartbeat_tick(); self.register_split_region_check_tick(); diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 0d703143a08..249c550db14 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -559,12 +559,14 @@ pub enum CasualMessage { /// Approximate size of target region. This message can only be sent by /// split-check thread. RegionApproximateSize { - size: u64, + size: Option, + splitable: Option, }, /// Approximate key count of target region. RegionApproximateKeys { - keys: u64, + keys: Option, + splitable: Option, }, CompactionDeclinedBytes { bytes: u64, @@ -649,11 +651,19 @@ impl fmt::Debug for CasualMessage { KeysInfoFormatter(split_keys.iter()), source, ), - CasualMessage::RegionApproximateSize { size } => { - write!(fmt, "Region's approximate size [size: {:?}]", size) + CasualMessage::RegionApproximateSize { size, splitable } => { + write!( + fmt, + "Region's approximate size [size: {:?}], [splitable: {:?}]", + size, splitable + ) } - CasualMessage::RegionApproximateKeys { keys } => { - write!(fmt, "Region's approximate keys [keys: {:?}]", keys) + CasualMessage::RegionApproximateKeys { keys, splitable } => { + write!( + fmt, + "Region's approximate keys [keys: {:?}], [splitable: {:?}", + keys, splitable + ) } CasualMessage::CompactionDeclinedBytes { bytes } => { write!(fmt, "compaction declined bytes {}", bytes) diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 5511c976062..da67784e734 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -571,6 +571,119 @@ pub fn can_amend_read( false } +/// The SplitCheckTrigger maintains the internal status to determine +/// if a split check task should be triggered. +#[derive(Default, Debug)] +pub struct SplitCheckTrigger { + /// An inaccurate difference in region size since last reset. + /// It is used to decide whether split check is needed. + size_diff_hint: u64, + /// An inaccurate difference in region size after compaction. + /// It is used to trigger check split to update approximate size and keys + /// after space reclamation of deleted entries. + pub compaction_declined_bytes: u64, + /// Approximate size of the region. + pub approximate_size: Option, + may_split_size: Option, + /// Approximate keys of the region. + pub approximate_keys: Option, + may_split_keys: Option, + /// Whether this region has scheduled a split check task. If we just + /// splitted the region or ingested one file which may be overlapped + /// with the existed data, reset the flag so that the region can be + /// splitted again. + may_skip_split_check: bool, +} + +impl SplitCheckTrigger { + pub fn should_skip(&self, threshold: u64) -> bool { + self.may_skip_split_check + && self.compaction_declined_bytes < threshold + && self.size_diff_hint < threshold + } + + pub fn post_triggered(&mut self) { + self.size_diff_hint = 0; + self.compaction_declined_bytes = 0; + // The task is scheduled, the next tick may skip it only when the size and keys + // are small. + // If either size or keys are big enough to do a split, + // keep split check tick until split is done + if !matches!(self.may_split_size, Some(true)) && !matches!(self.may_split_keys, Some(true)) + { + self.may_skip_split_check = true; + } + } + + pub fn post_split(&mut self) { + self.size_diff_hint = 0; + self.may_split_keys = None; + self.may_split_size = None; + // It's not correct anymore, so set it to false to schedule a split check task. + self.may_skip_split_check = false; + } + + pub fn add_size_diff(&mut self, size_diff: i64) { + let diff = self.size_diff_hint as i64 + size_diff; + self.size_diff_hint = cmp::max(diff, 0) as u64; + } + + pub fn reset_skip_check(&mut self) { + self.may_skip_split_check = false; + } + + pub fn on_clear_region_size(&mut self) { + self.approximate_size = None; + self.approximate_keys = None; + self.may_split_size = None; + self.may_split_keys = None; + self.may_skip_split_check = false; + } + + pub fn on_approximate_region_size(&mut self, size: Option, splitable: Option) { + // If size is none, it means no estimated size + if size.is_some() { + self.approximate_size = size; + } + + if splitable.is_some() { + self.may_split_size = splitable; + } + + // If the region is truly splitable, + // may_skip_split_check should be false + if matches!(splitable, Some(true)) { + self.may_skip_split_check = false; + } + } + + pub fn on_approximate_region_keys(&mut self, keys: Option, splitable: Option) { + // if keys is none, it means no estimated keys + if keys.is_some() { + self.approximate_keys = keys; + } + + if splitable.is_some() { + self.may_split_keys = splitable; + } + + // If the region is truly splitable, + // may_skip_split_check should be false + if matches!(splitable, Some(true)) { + self.may_skip_split_check = false; + } + } + + pub fn on_ingest_sst_result(&mut self, size: u64, keys: u64) { + self.approximate_size = Some(self.approximate_size.unwrap_or_default() + size); + self.approximate_keys = Some(self.approximate_keys.unwrap_or_default() + keys); + + // The ingested file may be overlapped with the data in engine, so we need to + // check it again to get the accurate value. + self.may_skip_split_check = false; + } +} + #[derive(Getters, MutGetters)] pub struct Peer where @@ -658,25 +771,10 @@ where pub peers_start_pending_time: Vec<(u64, Instant)>, /// A inaccurate cache about which peer is marked as down. down_peer_ids: Vec, - - /// An inaccurate difference in region size since last reset. - /// It is used to decide whether split check is needed. - pub size_diff_hint: u64, + /// the split check trigger + pub split_check_trigger: SplitCheckTrigger, /// The count of deleted keys since last reset. delete_keys_hint: u64, - /// An inaccurate difference in region size after compaction. - /// It is used to trigger check split to update approximate size and keys - /// after space reclamation of deleted entries. - pub compaction_declined_bytes: u64, - /// Approximate size of the region. - pub approximate_size: Option, - /// Approximate keys of the region. - pub approximate_keys: Option, - /// Whether this region has scheduled a split check task. If we just - /// splitted the region or ingested one file which may be overlapped - /// with the existed data, reset the flag so that the region can be - /// splitted again. - pub may_skip_split_check: bool, /// The state for consistency check. pub consistency_state: ConsistencyState, @@ -862,12 +960,8 @@ where wait_data_peers: Vec::default(), peers_start_pending_time: vec![], down_peer_ids: vec![], - size_diff_hint: 0, + split_check_trigger: SplitCheckTrigger::default(), delete_keys_hint: 0, - approximate_size: None, - approximate_keys: None, - may_skip_split_check: false, - compaction_declined_bytes: 0, leader_unreachable: false, pending_remove: false, wait_data, @@ -3361,8 +3455,8 @@ where self.peer_stat.written_keys += apply_metrics.written_keys; self.peer_stat.written_bytes += apply_metrics.written_bytes; self.delete_keys_hint += apply_metrics.delete_keys_hint; - let diff = self.size_diff_hint as i64 + apply_metrics.size_diff_hint; - self.size_diff_hint = cmp::max(diff, 0) as u64; + self.split_check_trigger + .add_size_diff(apply_metrics.size_diff_hint); if self.has_pending_snapshot() && self.ready_to_handle_pending_snap() { has_ready = true; @@ -3394,9 +3488,9 @@ where } pub fn post_split(&mut self) { - // Reset delete_keys_hint and size_diff_hint. self.delete_keys_hint = 0; - self.size_diff_hint = 0; + self.split_check_trigger.post_split(); + self.reset_region_buckets(); } @@ -5211,8 +5305,8 @@ where pending_peers: self.collect_pending_peers(ctx), written_bytes: self.peer_stat.written_bytes, written_keys: self.peer_stat.written_keys, - approximate_size: self.approximate_size, - approximate_keys: self.approximate_keys, + approximate_size: self.split_check_trigger.approximate_size, + approximate_keys: self.split_check_trigger.approximate_keys, replication_status: self.region_replication_status(ctx), wait_data_peers: self.wait_data_peers.clone(), }); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index 94708e84f7a..e3c0042acf0 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -695,6 +695,19 @@ impl Runner { }; if !split_keys.is_empty() { + // Notify peer that if the region is truly splitable. + // If it's truly splitable, then skip_split_check should be false; + self.router.update_approximate_size( + region.get_id(), + None, + Some(!split_keys.is_empty()), + ); + self.router.update_approximate_keys( + region.get_id(), + None, + Some(!split_keys.is_empty()), + ); + let region_epoch = region.get_region_epoch().clone(); self.router .ask_split(region_id, region_epoch, split_keys, "split checker".into()); @@ -736,6 +749,7 @@ impl Runner { } else { (!host.enable_region_bucket(), &empty_bucket) }; + let mut split_keys = vec![]; MergedIterator::<::Iterator>::new( tablet, LARGE_CFS, start_key, end_key, false, @@ -748,6 +762,7 @@ impl Runner { let mut skip_on_kv = false; while let Some(e) = iter.next() { if skip_on_kv && skip_check_bucket { + split_keys = host.split_keys(); return; } if !skip_on_kv && host.on_kv(region, &e) { @@ -810,6 +825,8 @@ impl Runner { } } + split_keys = host.split_keys(); + // if we scan the whole range, we can update approximate size and keys with // accurate value. if is_key_range { @@ -823,8 +840,17 @@ impl Runner { "bucket_count" => buckets.len(), "bucket_size" => bucket_size, ); - self.router.update_approximate_size(region.get_id(), size); - self.router.update_approximate_keys(region.get_id(), keys); + + self.router.update_approximate_size( + region.get_id(), + Some(size), + Some(!split_keys.is_empty()), + ); + self.router.update_approximate_keys( + region.get_id(), + Some(keys), + Some(!split_keys.is_empty()), + ); })?; if host.enable_region_bucket() { @@ -839,7 +865,7 @@ impl Runner { } timer.observe_duration(); - Ok(host.split_keys()) + Ok(split_keys) } fn change_cfg(&mut self, change: ConfigChange) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index ff47525ea37..5eb7d97796e 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -17,7 +17,7 @@ use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatist use engine_test::raft::RaftTestEngine; use engine_traits::{ CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, - CF_DEFAULT, CF_RAFT, + CF_DEFAULT, CF_RAFT, CF_WRITE, }; use file_system::IoRateLimiter; use futures::{executor::block_on, future::BoxFuture, StreamExt}; @@ -783,6 +783,14 @@ pub fn put_till_size( put_cf_till_size(cluster, CF_DEFAULT, limit, range) } +pub fn put_till_count( + cluster: &mut Cluster, + limit: u64, + range: &mut dyn Iterator, +) -> Vec { + put_cf_till_count(cluster, CF_WRITE, limit, range) +} + pub fn put_cf_till_size( cluster: &mut Cluster, cf: &'static str, @@ -815,6 +823,36 @@ pub fn put_cf_till_size( key.into_bytes() } +pub fn put_cf_till_count( + cluster: &mut Cluster, + cf: &'static str, + limit: u64, + range: &mut dyn Iterator, +) -> Vec { + assert!(limit > 0); + let mut len = 0; + let mut rng = rand::thread_rng(); + let mut key = String::new(); + let mut value = vec![0; 64]; + while len < limit { + let batch_size = std::cmp::min(5, limit - len); + let mut reqs = vec![]; + for _ in 0..batch_size { + key.clear(); + let key_id = range.next().unwrap(); + write!(key, "{:09}", key_id).unwrap(); + rng.fill_bytes(&mut value); + reqs.push(new_put_cf_cmd(cf, key.as_bytes(), &value)); + } + len += batch_size; + cluster.batch_put(key.as_bytes(), reqs).unwrap(); + // Approximate size of memtable is inaccurate for small data, + // we flush it to SST so we can use the size properties instead. + cluster.must_flush_cf(cf, true); + } + key.into_bytes() +} + pub fn new_mutation(op: Op, k: &[u8], v: &[u8]) -> Mutation { let mut mutation = Mutation::default(); mutation.set_op(op); diff --git a/components/tikv_util/src/log.rs b/components/tikv_util/src/log.rs index fd351eecbd4..91bd5013c1e 100644 --- a/components/tikv_util/src/log.rs +++ b/components/tikv_util/src/log.rs @@ -83,6 +83,18 @@ macro_rules! trace(($($args:tt)+) => { ::slog_global::trace!($($args)+) };); +/// Logs a infor or debug level message using the slog global logger. +#[macro_export] +macro_rules! info_or_debug{ + ($cond:expr; $($args:tt)+) => { + if $cond { + info!($($args)+) + } else { + debug!($($args)+) + } + }; +} + use std::fmt::{self, Display, Write}; use slog::{BorrowedKV, OwnedKVList, Record, KV}; diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 65c50793d7a..2ef3d499d22 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1549,3 +1549,65 @@ fn test_split_region_with_no_valid_split_keys() { rx.recv_timeout(Duration::from_secs(5)).unwrap(); rx.try_recv().unwrap_err(); } + +/// This test case test if a split failed for some reason, +/// it can continue run split check and eventually the split will finish +#[test_case(test_raftstore::new_node_cluster)] +fn test_split_by_split_check_on_size() { + let mut cluster = new_cluster(0, 1); + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); + let region_max_size = 1440; + let region_split_size = 960; + cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size)); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size)); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let _r = cluster.run_conf_change(); + + // make first split fail + // 1*return means it would run "return" action once + fail::cfg("fail_pre_propose_split", "1*return").unwrap(); + + // Insert region_max_size into the cluster. + // It should trigger the split + let mut range = 1..; + let key = put_till_size(&mut cluster, region_max_size / 2, &mut range); + let region = pd_client.get_region(&key).unwrap(); + put_till_size(&mut cluster, region_max_size / 2 + 100, &mut range); + // waiting the split, + cluster.wait_region_split(®ion); +} + +/// This test case test if a split failed for some reason, +/// it can continue run split check and eventually the split will finish +#[test_case(test_raftstore::new_node_cluster)] +fn test_split_by_split_check_on_keys() { + let mut cluster = new_cluster(0, 1); + cluster.cfg.raft_store.right_derive_when_split = true; + cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize(10)); + let region_max_keys = 15; + let region_split_keys = 10; + cluster.cfg.coprocessor.region_max_keys = Some(region_max_keys); + cluster.cfg.coprocessor.region_split_keys = Some(region_split_keys); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + let _r = cluster.run_conf_change(); + + // make first split fail + // 1*return means it would run "return" action once + fail::cfg("fail_pre_propose_split", "1*return").unwrap(); + + // Insert region_max_size into the cluster. + // It should trigger the split + let mut range = 1..; + let key = put_till_count(&mut cluster, region_max_keys / 2, &mut range); + let region = pd_client.get_region(&key).unwrap(); + put_till_count(&mut cluster, region_max_keys / 2 + 3, &mut range); + // waiting the split, + cluster.wait_region_split(®ion); +} diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 5439e5c8ba2..3affbadec4b 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -609,7 +609,7 @@ fn test_node_split_region_after_reboot_with_config_change() { sleep_ms(200); assert_eq!(pd_client.get_split_count(), 0); - // change the config to make the region splittable + // change the config to make the region splitable cluster.cfg.coprocessor.region_max_size = Some(ReadableSize(region_max_size / 3)); cluster.cfg.coprocessor.region_split_size = Some(ReadableSize(region_split_size / 3)); cluster.cfg.coprocessor.region_bucket_size = ReadableSize(region_split_size / 3); From a932082fe406dee928cd1823e2177cc7cc62ed28 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 8 Nov 2023 15:32:12 +0800 Subject: [PATCH 0997/1149] server: change the log level to debug for cop error response (#15882) ref tikv/tikv#15881 Change the coprocessor error response log level to DEBUG Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/endpoint.rs | 105 +++++++++++++++--------------------- src/read_pool.rs | 4 ++ 2 files changed, 48 insertions(+), 61 deletions(-) diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 7a12c7493e5..01cb571e8aa 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -544,8 +544,9 @@ impl Endpoint { if let Err(busy_err) = self.read_pool.check_busy_threshold(Duration::from_millis( req.get_context().get_busy_threshold_ms() as u64, )) { - let mut resp = coppb::Response::default(); - resp.mut_region_error().set_server_is_busy(busy_err); + let mut pb_error = errorpb::Error::new(); + pb_error.set_server_is_busy(busy_err); + let resp = make_error_response(Error::Region(pb_error)); return Either::Left(async move { resp.into() }); } @@ -810,77 +811,59 @@ impl Endpoint { } } +macro_rules! make_error_response_common { + ($resp:expr, $tag:expr, $e:expr) => {{ + match $e { + Error::Region(e) => { + $tag = storage::get_tag_from_header(&e); + $resp.set_region_error(e); + } + Error::Locked(info) => { + $tag = "meet_lock"; + $resp.set_locked(info); + } + Error::DeadlineExceeded => { + $tag = "deadline_exceeded"; + $resp.set_other_error($e.to_string()); + } + Error::MaxPendingTasksExceeded => { + $tag = "max_pending_tasks_exceeded"; + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason($e.to_string()); + let mut errorpb = errorpb::Error::default(); + errorpb.set_message($e.to_string()); + errorpb.set_server_is_busy(server_is_busy_err); + $resp.set_region_error(errorpb); + } + Error::Other(_) => { + $tag = "other"; + warn!("unexpected other error encountered processing coprocessor task"; + "error" => ?&$e, + ); + $resp.set_other_error($e.to_string()); + } + }; + COPR_REQ_ERROR.with_label_values(&[$tag]).inc(); + }}; +} + fn make_error_batch_response(batch_resp: &mut coppb::StoreBatchTaskResponse, e: Error) { - warn!( + debug!( "batch cop task error-response"; "err" => %e ); let tag; - match e { - Error::Region(e) => { - tag = storage::get_tag_from_header(&e); - batch_resp.set_region_error(e); - } - Error::Locked(info) => { - tag = "meet_lock"; - batch_resp.set_locked(info); - } - Error::DeadlineExceeded => { - tag = "deadline_exceeded"; - batch_resp.set_other_error(e.to_string()); - } - Error::MaxPendingTasksExceeded => { - tag = "max_pending_tasks_exceeded"; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(e.to_string()); - let mut errorpb = errorpb::Error::default(); - errorpb.set_message(e.to_string()); - errorpb.set_server_is_busy(server_is_busy_err); - batch_resp.set_region_error(errorpb); - } - Error::Other(_) => { - tag = "other"; - batch_resp.set_other_error(e.to_string()); - } - }; - COPR_REQ_ERROR.with_label_values(&[tag]).inc(); + make_error_response_common!(batch_resp, tag, e); } fn make_error_response(e: Error) -> coppb::Response { - warn!( + debug!( "error-response"; "err" => %e ); - let mut resp = coppb::Response::default(); let tag; - match e { - Error::Region(e) => { - tag = storage::get_tag_from_header(&e); - resp.set_region_error(e); - } - Error::Locked(info) => { - tag = "meet_lock"; - resp.set_locked(info); - } - Error::DeadlineExceeded => { - tag = "deadline_exceeded"; - resp.set_other_error(e.to_string()); - } - Error::MaxPendingTasksExceeded => { - tag = "max_pending_tasks_exceeded"; - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(e.to_string()); - let mut errorpb = errorpb::Error::default(); - errorpb.set_message(e.to_string()); - errorpb.set_server_is_busy(server_is_busy_err); - resp.set_region_error(errorpb); - } - Error::Other(_) => { - tag = "other"; - resp.set_other_error(e.to_string()); - } - }; - COPR_REQ_ERROR.with_label_values(&[tag]).inc(); + let mut resp = coppb::Response::default(); + make_error_response_common!(resp, tag, e); resp } diff --git a/src/read_pool.rs b/src/read_pool.rs index a5898ea4f63..7821f2f946e 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -312,6 +312,10 @@ impl ReadPoolHandle { let mut busy_err = errorpb::ServerIsBusy::default(); busy_err.set_reason("estimated wait time exceeds threshold".to_owned()); busy_err.estimated_wait_ms = u32::try_from(estimated_wait.as_millis()).unwrap_or(u32::MAX); + warn!("Already many pending tasks in the read queue, task is rejected"; + "busy_threshold" => ?&busy_threshold, + "busy_err" => ?&busy_err, + ); Err(busy_err) } } From f574ec0830f88a42290f1721a919b6a293cd7cfa Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 8 Nov 2023 15:56:12 +0800 Subject: [PATCH 0998/1149] raftstore: gc uninitialized stale peer after merge (#15934) close tikv/tikv#15919 A "stale peer" refers to a peer that still exists on a TiKV node but has been removed from the raft group, typically through a confchange operation. TiKV performs regular checks and validations on its peers to ensure that no such stale peer exists. However, the current stale peer check is not enough when dealing with uninitialized stale peers that its region has been merged. These uninitialized stale peers are left indefinitely, consuming CPU, memory, and blocking resolved ts. This commit introduces an in-memory state for peers whose raft messages create uninitialized stale peers. The stale peer then sends a MsgCheckStalePeer message to the corresponding peer, validating whether it should be removed. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 5 ++ components/raftstore/src/store/fsm/store.rs | 5 ++ components/raftstore/src/store/peer.rs | 13 ++++- components/raftstore/src/store/worker/pd.rs | 10 +++- tests/integrations/raftstore/test_merge.rs | 57 +++++++++++++++++++ .../integrations/raftstore/test_stale_peer.rs | 47 +++++++++++++++ 6 files changed, 134 insertions(+), 3 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 03e31938aa0..942514153c7 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -286,6 +286,7 @@ where region, meta_peer, wait_data, + None, )?, tick_registry: [false; PeerTick::VARIANT_COUNT], missing_ticks: 0, @@ -316,6 +317,7 @@ where engines: Engines, region_id: u64, peer: metapb::Peer, + create_by_peer: metapb::Peer, ) -> Result> { // We will remove tombstone key when apply snapshot info!( @@ -323,6 +325,8 @@ where "region_id" => region_id, "peer_id" => peer.get_id(), "store_id" => store_id, + "create_by_peer_id" => create_by_peer.get_id(), + "create_by_peer_store_id" => create_by_peer.get_store_id(), ); let mut region = metapb::Region::default(); @@ -342,6 +346,7 @@ where ®ion, peer, false, + Some(create_by_peer), )?, tick_registry: [false; PeerTick::VARIANT_COUNT], missing_ticks: 0, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 950768055e4..6227e28cd19 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -681,6 +681,8 @@ where "region_id" => region_id, "current_region_epoch" => ?cur_epoch, "msg_type" => ?msg_type, + "to_peer_id" => ?from_peer.get_id(), + "to_peer_store_id" => ?from_peer.get_store_id(), ); self.raft_metrics.message_dropped.stale_msg.inc(); @@ -699,6 +701,8 @@ where error!(?e; "send gc message failed"; "region_id" => region_id, + "to_peer_id" => ?from_peer.get_id(), + "to_peer_store_id" => ?from_peer.get_store_id(), ); } } @@ -2385,6 +2389,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.ctx.engines.clone(), region_id, target.clone(), + msg.get_from_peer().clone(), )?; // WARNING: The checking code must be above this line. diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index da67784e734..90676411bfc 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -708,6 +708,8 @@ where pub peer_heartbeats: HashMap, /// Record the waiting data status of each follower or learner peer. pub wait_data_peers: Vec, + /// This peer is created by a raft message from `create_by_peer`. + create_by_peer: Option, proposals: ProposalQueue>, leader_missing_time: Option, @@ -904,6 +906,7 @@ where region: &metapb::Region, peer: metapb::Peer, wait_data: bool, + create_by_peer: Option, ) -> Result> { let peer_id = peer.get_id(); if peer_id == raft::INVALID_ID { @@ -958,6 +961,7 @@ where peer_cache: RefCell::new(HashMap::default()), peer_heartbeats: HashMap::default(), wait_data_peers: Vec::default(), + create_by_peer, peers_start_pending_time: vec![], down_peer_ids: vec![], split_check_trigger: SplitCheckTrigger::default(), @@ -5436,9 +5440,16 @@ where &mut self, ctx: &mut PollContext, ) { - if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver() { + if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver() + || self.region().get_region_epoch().get_conf_ver() == 0 + { self.check_stale_conf_ver = self.region().get_region_epoch().get_conf_ver(); self.check_stale_peers = self.region().get_peers().to_vec(); + if let Some(create_by_peer) = self.create_by_peer.as_ref() { + // Push create_by_peer in case the peer is removed before + // initialization which has no peer in region. + self.check_stale_peers.push(create_by_peer.clone()); + } } for peer in &self.check_stale_peers { if peer.get_id() == self.peer_id() { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 606576b22e4..79810289669 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1567,8 +1567,14 @@ where } } Ok(None) => { - // splitted Region has not yet reported to PD. - // TODO: handle merge + // Splitted region has not yet reported to PD. + // + // Or region has been merged. This case is handled by + // message `MsgCheckStalePeer`, stale peers will be + // removed eventually. + PD_VALIDATE_PEER_COUNTER_VEC + .with_label_values(&["region not found"]) + .inc(); } Err(e) => { error!("get region failed"; "err" => ?e); diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 8d93d2c5a5c..7d964c03319 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -407,6 +407,63 @@ fn test_node_check_merged_message() { must_get_none(&engine3, b"v5"); } +/// Test if an uninitialized stale peer will be handled properly after merge. +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_gc_uninitialized_peer_after_merge() { + let mut cluster = new_cluster(0, 4); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::millis(150); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + // test if an uninitialized stale peer before conf removal is destroyed + // automatically + let region = pd_client.get_region(b"k1").unwrap(); + pd_client.must_add_peer(region.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(region.get_id(), new_peer(3, 3)); + + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(left.get_id(), 4) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + // Add peer (4,4), remove peer (4,4) and then merge regions. + // Peer (4,4) will be an an uninitialized stale peer. + pd_client.must_add_peer(left.get_id(), new_peer(4, 4)); + cluster.must_region_exist(left.get_id(), 4); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + pd_client.must_remove_peer(left.get_id(), new_peer(4, 4)); + pd_client.must_merge(left.get_id(), right.get_id()); + cluster.clear_send_filters(); + + // Wait for the peer (4,4) to be destroyed. + sleep_ms( + 2 * cluster + .cfg + .raft_store + .max_leader_missing_duration + .as_millis(), + ); + cluster.must_region_not_exist(left.get_id(), 4); +} + // Test if a merge handled properly when there is a unfinished slow split before // merge. // No v2, it requires all peers to be available to check trim status. diff --git a/tests/integrations/raftstore/test_stale_peer.rs b/tests/integrations/raftstore/test_stale_peer.rs index e12584d6c60..f76373756f9 100644 --- a/tests/integrations/raftstore/test_stale_peer.rs +++ b/tests/integrations/raftstore/test_stale_peer.rs @@ -6,8 +6,10 @@ use std::{sync::Arc, thread, time::*}; use engine_traits::{Peekable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RegionLocalState}; +use pd_client::PdClient; use raft::eraftpb::MessageType; use test_raftstore::*; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, HandyRwLock}; /// A helper function for testing the behaviour of the gc of stale peer @@ -310,3 +312,48 @@ fn test_stale_learner_with_read_index() { let state: RegionLocalState = engine3.get_msg_cf(CF_RAFT, &state_key).unwrap().unwrap(); assert_eq!(state.get_state(), PeerState::Tombstone); } + +/// Test if an uninitialized stale peer will be removed after restart. +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_restart_gc_uninitialized_peer_after_merge() { + let mut cluster = new_cluster(0, 4); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::millis(150); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + + // test if an uninitialized stale peer before conf removal is destroyed + // automatically + let region = pd_client.get_region(b"k1").unwrap(); + pd_client.must_add_peer(region.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(region.get_id(), new_peer(3, 3)); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(region.get_id(), 4) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + // Add peer (4,4), remove peer (4,4) and then merge regions. + // Peer (4,4) will be an an uninitialized stale peer. + pd_client.must_add_peer(region.get_id(), new_peer(4, 4)); + cluster.must_region_exist(region.get_id(), 4); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + pd_client.must_remove_peer(region.get_id(), new_peer(4, 4)); + + // An uninitialized stale peer is removed automatically after restart. + cluster.stop_node(4); + cluster.run_node(4).unwrap(); + cluster.must_region_not_exist(region.get_id(), 4); +} From 1819bb8f077aba4f4a28eb0aa89d43fcd3f873d7 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 9 Nov 2023 15:33:43 +0800 Subject: [PATCH 0999/1149] resource_control: add quota limiter per priority (#15918) ref tikv/tikv#15917 Signed-off-by: glorv --- components/backup/src/endpoint.rs | 2 +- components/resource_control/src/future.rs | 25 +- .../resource_control/src/resource_group.rs | 256 +++++++++++++++++- components/resource_control/src/service.rs | 7 +- components/resource_control/src/worker.rs | 20 +- src/coprocessor/endpoint.rs | 8 + src/import/sst_service.rs | 4 +- src/storage/mod.rs | 18 ++ src/storage/txn/scheduler.rs | 4 + 9 files changed, 319 insertions(+), 25 deletions(-) diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index cc4d0bf0e28..5c243a1e8d8 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -927,7 +927,7 @@ impl Endpoint { let sst_max_size = self.config_manager.0.read().unwrap().sst_max_size.0; let limit = self.softlimit.limit(); let resource_limiter = self.resource_ctl.as_ref().and_then(|r| { - r.get_resource_limiter(&request.resource_group_name, &request.source_tag) + r.get_background_resource_limiter(&request.resource_group_name, &request.source_tag) }); self.pool.borrow_mut().spawn(async move { diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index fd98fc9a092..a935c3b41fa 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -16,7 +16,7 @@ use tokio_timer::Delay; use crate::{ resource_group::{ResourceConsumeType, ResourceController}, - resource_limiter::ResourceLimiter, + resource_limiter::{ResourceLimiter, ResourceType}, }; const MAX_WAIT_DURATION: Duration = Duration::from_secs(10); @@ -125,13 +125,24 @@ impl Future for LimitedFuture { if this.res.is_ready() { return std::mem::replace(this.res, Poll::Pending); } - let last_io_bytes = match get_thread_io_bytes_stats() { - Ok(b) => Some(b), - Err(e) => { - warn!("load thread io bytes failed"; "err" => e); - None + // get io stats is very expensive, so we only do so if only io control is + // enabled. + let mut last_io_bytes = None; + if this + .resource_limiter + .get_limiter(ResourceType::Io) + .get_rate_limit() + .is_finite() + { + match get_thread_io_bytes_stats() { + Ok(b) => { + last_io_bytes = Some(b); + } + Err(e) => { + warn!("load thread io bytes failed"; "err" => e); + } } - }; + } let start = Instant::now(); let res = this.f.poll(cx); let dur = start.saturating_elapsed(); diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index a4b30e3d4ad..b7e7ca28705 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -22,6 +22,7 @@ use kvproto::{ resource_manager::{GroupMode, ResourceGroup as PbResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; +use strum::{EnumCount, EnumIter, IntoEnumIterator}; use tikv_util::{info, time::Instant}; use yatp::queue::priority::TaskPriorityProvider; @@ -56,21 +57,70 @@ pub enum ResourceConsumeType { IoBytes(u64), } +#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter)] +#[repr(usize)] +pub enum TaskPriority { + High = 0, + Medium = 1, + Low = 2, +} + +impl TaskPriority { + pub fn as_str(&self) -> &'static str { + match *self { + TaskPriority::High => "high", + TaskPriority::Medium => "medium", + TaskPriority::Low => "low", + } + } +} + +impl From for TaskPriority { + fn from(value: u32) -> Self { + // map the resource group priority value (1,8,16) to (Low,Medium,High) + if value < 6 { + Self::Low + } else if value < 11 { + Self::Medium + } else { + Self::High + } + } +} + /// ResourceGroupManager manages the metadata of each resource group. pub struct ResourceGroupManager { pub(crate) resource_groups: DashMap, + // the count of all groups, a fast path because call `DashMap::len` is a little slower. + group_count: AtomicU64, registry: RwLock>>, // auto incremental version generator used for mark the background // resource limiter has changed. version_generator: AtomicU64, + // the shared resource limiter of each priority + priority_limiters: [Arc; TaskPriority::COUNT], } impl Default for ResourceGroupManager { fn default() -> Self { + let priority_limiters = TaskPriority::iter() + .map(|p| { + Arc::new(ResourceLimiter::new( + p.as_str().to_owned(), + f64::INFINITY, + f64::INFINITY, + 0, + )) + }) + .collect::>() + .try_into() + .unwrap(); let manager = Self { resource_groups: Default::default(), + group_count: AtomicU64::new(0), registry: Default::default(), version_generator: AtomicU64::new(0), + priority_limiters, }; // init the default resource group by default. @@ -90,6 +140,11 @@ impl Default for ResourceGroupManager { } impl ResourceGroupManager { + #[inline] + pub fn get_group_count(&self) -> u64 { + self.group_count.load(Ordering::Relaxed) + } + fn get_ru_setting(rg: &PbResourceGroup, is_read: bool) -> u64 { match (rg.get_mode(), is_read) { // RU mode, read and write use the same setting. @@ -129,8 +184,13 @@ impl ResourceGroupManager { .and_then(|g| g.limiter.clone()); let limiter = self.build_resource_limiter(&rg, prev_limiter); - self.resource_groups - .insert(group_name, ResourceGroup::new(rg, limiter)); + if self + .resource_groups + .insert(group_name, ResourceGroup::new(rg, limiter)) + .is_none() + { + self.group_count.fetch_add(1, Ordering::Relaxed); + } } fn build_resource_limiter( @@ -161,6 +221,7 @@ impl ResourceGroupManager { if self.resource_groups.remove(&group_name).is_some() { deregister_metrics(name); info!("remove resource group"; "name"=> name); + self.group_count.fetch_sub(1, Ordering::Relaxed); } } @@ -184,6 +245,8 @@ impl ResourceGroupManager { controller.remove_resource_group(name.as_bytes()); } }); + self.group_count + .fetch_sub(removed_names.len() as u64, Ordering::Relaxed); } } @@ -234,24 +297,79 @@ impl ResourceGroupManager { } } + // only enable priority quota limiter when there is at least 1 user-defined + // resource group. + #[inline] + fn enable_priority_limiter(&self) -> bool { + self.get_group_count() > 1 + } + + // Always return the background resource limiter if any; + // Only return the foregroup limiter when priority is enabled. pub fn get_resource_limiter( &self, rg: &str, request_source: &str, + override_priority: u64, + ) -> Option> { + let (limiter, group_priority) = + self.get_background_resource_limiter_with_priority(rg, request_source); + if limiter.is_some() { + return limiter; + } + + // if there is only 1 resource group, priority quota limiter is useless so just + // return None for better performance. + if !self.enable_priority_limiter() { + return None; + } + + // request priority has higher priority, 0 means priority is not set. + let mut task_priority = override_priority as u32; + if task_priority == 0 { + task_priority = group_priority; + } + Some(self.priority_limiters[TaskPriority::from(task_priority) as usize].clone()) + } + + // return a ResourceLimiter for background tasks only. + pub fn get_background_resource_limiter( + &self, + rg: &str, + request_source: &str, ) -> Option> { + self.get_background_resource_limiter_with_priority(rg, request_source) + .0 + } + + fn get_background_resource_limiter_with_priority( + &self, + rg: &str, + request_source: &str, + ) -> (Option>, u32) { fail_point!("only_check_source_task_name", |name| { assert_eq!(&name.unwrap(), request_source); - None + (None, 8) }); + let mut group_priority = None; if let Some(group) = self.resource_groups.get(rg) { + group_priority = Some(group.group.priority); if !group.fallback_default { - return group.get_resource_limiter(request_source); + return ( + group.get_background_resource_limiter(request_source), + group.group.priority, + ); } } - self.resource_groups + let default_group = self + .resource_groups .get(DEFAULT_RESOURCE_GROUP_NAME) - .and_then(|g| g.get_resource_limiter(request_source)) + .unwrap(); + ( + default_group.get_background_resource_limiter(request_source), + group_priority.unwrap_or(default_group.group.priority), + ) } } @@ -286,7 +404,10 @@ impl ResourceGroup { .get_fill_rate() } - fn get_resource_limiter(&self, request_source: &str) -> Option> { + fn get_background_resource_limiter( + &self, + request_source: &str, + ) -> Option> { self.limiter.as_ref().and_then(|limiter| { // the source task name is the last part of `request_source` separated by "_" // the request_source is @@ -871,6 +992,35 @@ pub(crate) mod tests { ); } + #[test] + fn test_resource_group_crud() { + let resource_manager = ResourceGroupManager::default(); + assert_eq!(resource_manager.get_group_count(), 1); + + let group1 = new_resource_group_ru("test1".into(), 100, HIGH_PRIORITY); + resource_manager.add_resource_group(group1); + assert_eq!(resource_manager.get_group_count(), 2); + + let group2 = new_resource_group_ru("test2".into(), 200, LOW_PRIORITY); + resource_manager.add_resource_group(group2); + assert_eq!(resource_manager.get_group_count(), 3); + + let group1 = new_resource_group_ru("test1".into(), 150, HIGH_PRIORITY); + resource_manager.add_resource_group(group1.clone()); + assert_eq!(resource_manager.get_group_count(), 3); + assert_eq!( + resource_manager.get_resource_group("test1").unwrap().group, + group1 + ); + + resource_manager.remove_resource_group("test2"); + assert!(resource_manager.get_resource_group("test2").is_none()); + assert_eq!(resource_manager.get_group_count(), 2); + + resource_manager.remove_resource_group("test2"); + assert_eq!(resource_manager.get_group_count(), 2); + } + #[test] fn test_resource_group_priority() { let resource_manager = ResourceGroupManager::default(); @@ -1165,4 +1315,96 @@ pub(crate) mod tests { assert_eq!(metadata1.group_name(), group_name.as_bytes()); } } + + #[test] + fn test_get_resource_limiter() { + let mgr = ResourceGroupManager::default(); + + let default_group = new_background_resource_group_ru( + "default".into(), + 200, + MEDIUM_PRIORITY, + vec!["br".into(), "stats".into()], + ); + mgr.add_resource_group(default_group); + let default_limiter = mgr + .get_resource_group("default") + .unwrap() + .limiter + .clone() + .unwrap(); + + assert!(mgr.get_resource_limiter("default", "query", 0).is_none()); + assert!( + mgr.get_resource_limiter("default", "query", HIGH_PRIORITY as u64) + .is_none() + ); + + let group1 = new_resource_group("test1".into(), true, 100, 100, HIGH_PRIORITY); + mgr.add_resource_group(group1); + + let bg_group = new_background_resource_group_ru( + "bg".into(), + 50, + LOW_PRIORITY, + vec!["ddl".into(), "stats".into()], + ); + mgr.add_resource_group(bg_group); + let bg_limiter = mgr + .get_resource_group("bg") + .unwrap() + .limiter + .clone() + .unwrap(); + + assert!( + mgr.get_background_resource_limiter("test1", "ddl") + .is_none() + ); + assert!(Arc::ptr_eq( + &mgr.get_background_resource_limiter("test1", "stats") + .unwrap(), + &default_limiter + )); + + assert!(Arc::ptr_eq( + &mgr.get_background_resource_limiter("bg", "stats").unwrap(), + &bg_limiter + )); + assert!(mgr.get_background_resource_limiter("bg", "br").is_none()); + assert!( + mgr.get_background_resource_limiter("bg", "invalid") + .is_none() + ); + + assert!(Arc::ptr_eq( + &mgr.get_background_resource_limiter("unknown", "stats") + .unwrap(), + &default_limiter + )); + + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("test1", "stats", 0).unwrap(), + &default_limiter + )); + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("test1", "query", 0).unwrap(), + &mgr.priority_limiters[0] + )); + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("test1", "query", LOW_PRIORITY as u64) + .unwrap(), + &mgr.priority_limiters[2] + )); + + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("default", "query", LOW_PRIORITY as u64) + .unwrap(), + &mgr.priority_limiters[2] + )); + assert!(Arc::ptr_eq( + &mgr.get_resource_limiter("unknown", "query", 0).unwrap(), + &mgr.priority_limiters[1] + )); + } } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 5ecac9d74c4..2c2bbdc5549 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -565,7 +565,10 @@ pub mod tests { s_clone.report_ru_metrics().await; }); // Mock consume. - let bg_limiter = s.manager.get_resource_limiter("background", "br").unwrap(); + let bg_limiter = s + .manager + .get_background_resource_limiter("background", "br") + .unwrap(); bg_limiter.consume( Duration::from_secs(2), IoBytes { @@ -584,7 +587,7 @@ pub mod tests { s.manager.add_resource_group(background_group); let new_bg_limiter = s .manager - .get_resource_limiter("background", "lightning") + .get_background_resource_limiter("background", "lightning") .unwrap(); new_bg_limiter.consume( Duration::from_secs(5), diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index deb1b2e44de..7bc76691e1f 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -340,7 +340,11 @@ mod tests { let resource_ctl = Arc::new(ResourceGroupManager::default()); let rg1 = new_resource_group_ru("test".into(), 1000, 14); resource_ctl.add_resource_group(rg1); - assert!(resource_ctl.get_resource_limiter("test", "br").is_none()); + assert!( + resource_ctl + .get_background_resource_limiter("test", "br") + .is_none() + ); let test_provider = TestResourceStatsProvider::new(8.0, 10000.0); let mut worker = @@ -351,10 +355,12 @@ mod tests { resource_ctl.add_resource_group(default_bg); assert!( resource_ctl - .get_resource_limiter("default", "lightning") + .get_background_resource_limiter("default", "lightning") .is_none() ); - let limiter = resource_ctl.get_resource_limiter("default", "br").unwrap(); + let limiter = resource_ctl + .get_background_resource_limiter("default", "br") + .unwrap(); assert!( limiter .get_limiter(ResourceType::Cpu) @@ -513,13 +519,15 @@ mod tests { let default = new_background_resource_group_ru("default".into(), 2000, 8, vec!["br".into()]); resource_ctl.add_resource_group(default); - let new_limiter = resource_ctl.get_resource_limiter("default", "br").unwrap(); + let new_limiter = resource_ctl + .get_background_resource_limiter("default", "br") + .unwrap(); assert_eq!(&*new_limiter as *const _, &*limiter as *const _); let bg = new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); resource_ctl.add_resource_group(bg); let bg_limiter = resource_ctl - .get_resource_limiter("background", "br") + .get_background_resource_limiter("background", "br") .unwrap(); reset_quota(&mut worker, 5.0, 7000.0, Duration::from_secs(1)); @@ -581,7 +589,7 @@ mod tests { new_background_resource_group_ru("background".into(), 1000, 15, vec!["br".into()]); resource_ctl.add_resource_group(new_bg); let new_bg_limiter = resource_ctl - .get_resource_limiter("background", "br") + .get_background_resource_limiter("background", "br") .unwrap(); assert_ne!(&*bg_limiter as *const _, &*new_bg_limiter as *const _); assert!( diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 01cb571e8aa..005a18938de 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -511,6 +511,10 @@ impl Endpoint { .get_resource_control_context() .get_resource_group_name(), req_ctx.context.get_request_source(), + req_ctx + .context + .get_resource_control_context() + .get_override_priority(), ) }); // box the tracker so that moving it is cheap. @@ -757,6 +761,10 @@ impl Endpoint { .get_resource_control_context() .get_resource_group_name(), req_ctx.context.get_request_source(), + req_ctx + .context + .get_resource_control_context() + .get_override_priority(), ) }); let key_ranges = req_ctx diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 92e73ca9f8f..2dc4f76b194 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -751,7 +751,7 @@ macro_rules! impl_write { let (meta, resource_limiter) = match first_req { Some(r) => { let limiter = resource_manager.as_ref().and_then(|m| { - m.get_resource_limiter( + m.get_background_resource_limiter( r.get_context() .get_resource_control_context() .get_resource_group_name(), @@ -1060,7 +1060,7 @@ impl ImportSst for ImportSstService { let tablets = self.tablets.clone(); let start = Instant::now(); let resource_limiter = self.resource_manager.as_ref().and_then(|r| { - r.get_resource_limiter( + r.get_background_resource_limiter( req.get_context() .get_resource_control_context() .get_resource_group_name(), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index cc48d9e36e3..c0d6e6fc4a3 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -609,6 +609,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -782,6 +783,10 @@ impl Storage { .get_resource_control_context() .get_resource_group_name(), requests[0].get_context().get_request_source(), + requests[0] + .get_context() + .get_resource_control_context() + .get_override_priority(), ) }); let concurrency_manager = self.concurrency_manager.clone(); @@ -978,6 +983,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1170,6 +1176,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1346,6 +1353,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1662,6 +1670,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1754,6 +1763,10 @@ impl Storage { .get_resource_control_context() .get_resource_group_name(), gets[0].get_context().get_request_source(), + gets[0] + .get_context() + .get_resource_control_context() + .get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -1893,6 +1906,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2399,6 +2413,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2536,6 +2551,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2698,6 +2714,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); @@ -2879,6 +2896,7 @@ impl Storage { r.get_resource_limiter( ctx.get_resource_control_context().get_resource_group_name(), ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let priority_tag = get_priority_tag(priority); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 36492f22701..995c361e163 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -1236,6 +1236,10 @@ impl TxnScheduler { .get_resource_control_context() .get_resource_group_name(), task.cmd.ctx().get_request_source(), + task.cmd + .ctx() + .get_resource_control_context() + .get_override_priority(), ) }); let mut sample = quota_limiter.new_sample(true); From 7be1b17d72870bb95fcb7443b1f945f9c59657fa Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 9 Nov 2023 18:26:12 +0800 Subject: [PATCH 1000/1149] *: make unified-pool use FuturePool (#15925) close tikv/tikv#15924 *: make unified-pool use FuturePool Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/worker/pool.rs | 34 ++--- .../tikv_util/src/yatp_pool/future_pool.rs | 23 +++- components/tikv_util/src/yatp_pool/mod.rs | 15 ++- src/read_pool.rs | 127 ++++++------------ 4 files changed, 86 insertions(+), 113 deletions(-) diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index c3919e42619..9ef827b007a 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -7,7 +7,7 @@ use std::{ future::Future, sync::{ atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, Mutex, + Arc, }, time::{Duration, Instant}, }; @@ -20,13 +20,13 @@ use futures::{ stream::StreamExt, }; use prometheus::IntGauge; -use yatp::{Remote, ThreadPool}; +use yatp::Remote; use super::metrics::*; use crate::{ future::{block_on_timeout, poll_future_notify}, timer::GLOBAL_TIMER_HANDLE, - yatp_pool::{DefaultTicker, YatpPoolBuilder}, + yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}, }; #[derive(PartialEq)] @@ -222,7 +222,7 @@ impl LazyWorker { } pub fn remote(&self) -> Remote { - self.worker.remote.clone() + self.worker.remote() } } @@ -301,11 +301,8 @@ impl> Builder { let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(self.name) .thread_count(self.thread_count, self.thread_count, self.thread_count) - .build_single_level_pool(); - let remote = pool.remote().clone(); - let pool = Arc::new(Mutex::new(Some(pool))); + .build_future_pool(); Worker { - remote, stop: Arc::new(AtomicBool::new(false)), pool, counter: Arc::new(AtomicUsize::new(0)), @@ -318,8 +315,7 @@ impl> Builder { /// A worker that can schedule time consuming tasks. #[derive(Clone)] pub struct Worker { - pool: Arc>>>, - remote: Remote, + pool: FuturePool, pending_capacity: usize, counter: Arc, stop: Arc, @@ -371,7 +367,7 @@ impl Worker { .interval(std::time::Instant::now(), interval) .compat(); let stop = self.stop.clone(); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { while !stop.load(Ordering::Relaxed) && let Some(Ok(_)) = interval.next().await { @@ -389,7 +385,7 @@ impl Worker { .interval(std::time::Instant::now(), interval) .compat(); let stop = self.stop.clone(); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { while !stop.load(Ordering::Relaxed) && let Some(Ok(_)) = interval.next().await { @@ -403,7 +399,7 @@ impl Worker { where F: Future + Send + 'static, { - self.remote.spawn(f); + let _ = self.pool.spawn(f); } fn delay_notify(tx: UnboundedSender>, timeout: Duration) { @@ -438,10 +434,8 @@ impl Worker { /// Stops the worker thread. pub fn stop(&self) { - if let Some(pool) = self.pool.lock().unwrap().take() { - self.stop.store(true, Ordering::Release); - pool.shutdown(); - } + self.stop.store(true, Ordering::Release); + self.pool.shutdown(); } /// Checks if underlying worker can't handle task immediately. @@ -451,7 +445,7 @@ impl Worker { } pub fn remote(&self) -> Remote { - self.remote.clone() + self.pool.remote().clone() } fn start_impl( @@ -461,7 +455,7 @@ impl Worker { metrics_pending_task_count: IntGauge, ) { let counter = self.counter.clone(); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { let mut handle = RunnableWrapper { inner: runner }; while let Some(msg) = receiver.next().await { match msg { @@ -488,7 +482,7 @@ impl Worker { let counter = self.counter.clone(); let timeout = runner.get_interval(); Self::delay_notify(tx.clone(), timeout); - self.remote.spawn(async move { + let _ = self.pool.spawn(async move { let mut handle = RunnableWrapper { inner: runner }; while let Some(msg) = receiver.next().await { match msg { diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index 827ffbbdce2..c6a34b2673b 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -56,7 +56,7 @@ impl FuturePool { pool, env, pool_size: AtomicUsize::new(pool_size), - max_tasks, + max_tasks: AtomicUsize::new(max_tasks), }), } } @@ -119,6 +119,11 @@ impl FuturePool { pub fn shutdown(&self) { self.inner.pool.shutdown(); } + + // Get a remote queue for spawning tasks without owning the thread pool. + pub fn remote(&self) -> &yatp::Remote { + self.inner.pool.remote() + } } struct PoolInner { @@ -126,13 +131,20 @@ struct PoolInner { env: Env, // for accessing pool_size config since yatp doesn't offer such getter. pool_size: AtomicUsize, - max_tasks: usize, + max_tasks: AtomicUsize, } impl PoolInner { #[inline] fn scale_pool_size(&self, thread_count: usize) { self.pool.scale_workers(thread_count); + let mut max_tasks = self.max_tasks.load(Ordering::Acquire); + if max_tasks != std::usize::MAX { + max_tasks = max_tasks + .saturating_div(self.pool_size.load(Ordering::Acquire)) + .saturating_mul(thread_count); + self.max_tasks.store(max_tasks, Ordering::Release); + } self.pool_size.store(thread_count, Ordering::Release); } @@ -148,15 +160,16 @@ impl PoolInner { max_tasks: 100, })); - if self.max_tasks == std::usize::MAX { + let max_tasks = self.max_tasks.load(Ordering::Acquire); + if max_tasks == std::usize::MAX { return Ok(()); } let current_tasks = self.get_running_task_count(); - if current_tasks >= self.max_tasks { + if current_tasks >= max_tasks { Err(Full { current_tasks, - max_tasks: self.max_tasks, + max_tasks, }) } else { Ok(()) diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index fc80e69cd84..2752f3f3c51 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -369,7 +369,7 @@ impl YatpPoolBuilder { FuturePool::from_pool(pool, &name, size, task) } - pub fn build_single_level_pool(self) -> ThreadPool { + fn build_single_level_pool(self) -> ThreadPool { let (builder, runner) = self.create_builder(); builder.build_with_queue_and_runner( yatp::queue::QueueType::SingleLevel, @@ -377,7 +377,18 @@ impl YatpPoolBuilder { ) } - pub fn build_multi_level_pool(self) -> ThreadPool { + pub fn build_multi_level_future_pool(self) -> FuturePool { + let name = self + .name_prefix + .clone() + .unwrap_or_else(|| "yatp_pool".to_string()); + let size = self.core_thread_count; + let task = self.max_tasks; + let pool = self.build_multi_level_pool(); + FuturePool::from_pool(pool, &name, size, task) + } + + fn build_multi_level_pool(self) -> ThreadPool { let name = self .name_prefix .clone() diff --git a/src/read_pool.rs b/src/read_pool.rs index 7821f2f946e..8f5a459c5bc 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -27,9 +27,7 @@ use tikv_util::{ yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; use tracker::TrackedFuture; -use yatp::{ - metrics::MULTILEVEL_LEVEL_ELAPSED, pool::Remote, queue::Extras, task::future::TaskCell, -}; +use yatp::{metrics::MULTILEVEL_LEVEL_ELAPSED, queue::Extras}; use self::metrics::*; use crate::{ @@ -55,11 +53,9 @@ pub enum ReadPool { read_pool_low: FuturePool, }, Yatp { - pool: yatp::ThreadPool, + pool: FuturePool, + // deprecated. will remove in the v8.x. running_tasks: IntGauge, - running_threads: IntGauge, - max_tasks: usize, - pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -80,17 +76,11 @@ impl ReadPool { ReadPool::Yatp { pool, running_tasks, - running_threads, - max_tasks, - pool_size, resource_ctl, time_slice_inspector, } => ReadPoolHandle::Yatp { - remote: pool.remote().clone(), + remote: pool.clone(), running_tasks: running_tasks.clone(), - running_threads: running_threads.clone(), - max_tasks: *max_tasks, - pool_size: *pool_size, resource_ctl: resource_ctl.clone(), time_slice_inspector: time_slice_inspector.clone(), }, @@ -106,11 +96,8 @@ pub enum ReadPoolHandle { read_pool_low: FuturePool, }, Yatp { - remote: Remote, + remote: FuturePool, running_tasks: IntGauge, - running_threads: IntGauge, - max_tasks: usize, - pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -145,19 +132,10 @@ impl ReadPoolHandle { ReadPoolHandle::Yatp { remote, running_tasks, - max_tasks, resource_ctl, .. } => { let running_tasks = running_tasks.clone(); - // Note that the running task number limit is not strict. - // If several tasks are spawned at the same time while the running task number - // is close to the limit, they may all pass this check and the number of running - // tasks may exceed the limit. - if running_tasks.get() as usize >= *max_tasks { - return Err(ReadPoolError::UnifiedReadPoolFull); - } - running_tasks.inc(); let fixed_level = match priority { CommandPri::High => Some(0), @@ -167,31 +145,26 @@ impl ReadPoolHandle { let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); - let task_cell = if let Some(resource_ctl) = resource_ctl { - TaskCell::new( - TrackedFuture::new(with_resource_limiter( - ControlledFuture::new( - async move { - f.await; - running_tasks.dec(); - }, - resource_ctl.clone(), - group_name, - ), - resource_limiter, - )), - extras, - ) + if let Some(resource_ctl) = resource_ctl { + let fut = TrackedFuture::new(with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + running_tasks.dec(); + }, + resource_ctl.clone(), + group_name, + ), + resource_limiter, + )); + remote.spawn_with_extras(fut, extras)?; } else { - TaskCell::new( - TrackedFuture::new(async move { - f.await; - running_tasks.dec(); - }), - extras, - ) - }; - remote.spawn(task_cell); + let fut = async move { + f.await; + running_tasks.dec(); + }; + remote.spawn_with_extras(fut, extras)?; + } } } Ok(()) @@ -231,7 +204,7 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { read_pool_normal, .. } => read_pool_normal.get_pool_size(), - ReadPoolHandle::Yatp { pool_size, .. } => *pool_size, + ReadPoolHandle::Yatp { remote, .. } => remote.get_pool_size(), } } @@ -241,10 +214,10 @@ impl ReadPoolHandle { read_pool_normal, .. } => read_pool_normal.get_running_task_count() / read_pool_normal.get_pool_size(), ReadPoolHandle::Yatp { + remote, running_tasks, - pool_size, .. - } => running_tasks.get() as usize / *pool_size, + } => running_tasks.get() as usize / remote.get_pool_size(), } } @@ -253,19 +226,8 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { .. } => { unreachable!() } - ReadPoolHandle::Yatp { - remote, - running_threads, - max_tasks, - pool_size, - .. - } => { - remote.scale_workers(max_thread_count); - *max_tasks = max_tasks - .saturating_div(*pool_size) - .saturating_mul(max_thread_count); - running_threads.set(max_thread_count as i64); - *pool_size = max_thread_count; + ReadPoolHandle::Yatp { remote, .. } => { + remote.scale_pool_size(max_thread_count); } } } @@ -469,6 +431,11 @@ pub fn build_yatp_read_pool_with_name( config.max_thread_count, ), ) + .max_tasks( + config + .max_tasks_per_worker + .saturating_mul(config.max_thread_count), + ) .after_start(move || { let engine = raftkv.lock().unwrap().clone(); set_tls_engine(engine); @@ -478,21 +445,15 @@ pub fn build_yatp_read_pool_with_name( destroy_tls_engine::(); }); let pool = if let Some(ref r) = resource_ctl { - builder.build_priority_pool(r.clone()) + builder.build_priority_future_pool(r.clone()) } else { - builder.build_multi_level_pool() + builder.build_multi_level_future_pool() }; let time_slice_inspector = Arc::new(TimeSliceInspector::new(&unified_read_pool_name)); ReadPool::Yatp { pool, running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS .with_label_values(&[&unified_read_pool_name]), - running_threads: UNIFIED_READ_POOL_RUNNING_THREADS - .with_label_values(&[&unified_read_pool_name]), - max_tasks: config - .max_tasks_per_worker - .saturating_mul(config.max_thread_count), - pool_size: config.max_thread_count, resource_ctl, time_slice_inspector, } @@ -761,12 +722,6 @@ mod metrics { &["name"] ) .unwrap(); - pub static ref UNIFIED_READ_POOL_RUNNING_THREADS: IntGaugeVec = register_int_gauge_vec!( - "tikv_unified_read_pool_thread_count", - "The number of running threads in the unified read pool", - &["name"] - ) - .unwrap(); } } @@ -826,7 +781,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } tx1.send(()).unwrap(); @@ -875,7 +830,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } @@ -888,7 +843,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } } @@ -931,7 +886,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } @@ -948,7 +903,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::UnifiedReadPoolFull) => {} + Err(ReadPoolError::FuturePoolFull(..)) => {} _ => panic!("should return full error"), } } From 58aed5779d75900f5f1d4a429de93a1149da647c Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 9 Nov 2023 23:18:42 +0800 Subject: [PATCH 1001/1149] alert: tackle the false-postive case where alerting `gc not work`. (#15948) close tikv/tikv#15796 Signed-off-by: lucasliang Co-authored-by: tonyxuqqi --- metrics/alertmanager/tikv.rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index bc092562773..aa8530df45f 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -15,7 +15,7 @@ groups: - alert: TiKV_GC_can_not_work expr: sum(increase(tikv_gcworker_gc_tasks_vec{task="gc"}[1d])) < 1 and (sum(increase(tikv_gc_compaction_filter_perform[1d])) < 1 and sum(increase(tikv_engine_event_total{db="kv", cf="write", type="compaction"}[1d])) >= 1) - for: 1m + for: 5m labels: env: ENV_LABELS_ENV level: emergency From 157e09d1b109a79e3353ca6d32bef1d4ef80dca7 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 10 Nov 2023 11:24:42 +0800 Subject: [PATCH 1002/1149] server: Introduce heap profiling config (#15883) close tikv/tikv#15958 introduce heap profiling config Signed-off-by: Connor1996 Co-authored-by: tonyxuqqi --- cmd/tikv-server/src/main.rs | 3 + components/raftstore/src/store/config.rs | 23 ++- components/server/src/server.rs | 5 +- components/server/src/server2.rs | 3 +- components/server/src/setup.rs | 2 - components/tikv_alloc/src/default.rs | 9 ++ components/tikv_alloc/src/jemalloc.rs | 69 +++++++-- etc/config-template.toml | 12 ++ src/config/mod.rs | 142 ++++++++++++++++-- src/server/config.rs | 13 +- src/server/status_server/mod.rs | 1 + src/server/status_server/profile.rs | 12 +- tests/integrations/config/mod.rs | 17 +-- .../config/test-cache-compatible.toml | 2 + tests/integrations/config/test-custom.toml | 8 +- tests/integrations/config/test-default.toml | 2 + 16 files changed, 256 insertions(+), 67 deletions(-) diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 9fdcad81c58..3f4372c32cc 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -217,6 +217,9 @@ fn main() { process::exit(1) } + // Init memory related settings. + config.memory.init(); + let (service_event_tx, service_event_rx) = tikv_util::mpsc::unbounded(); // pipe for controling service match config.storage.engine { EngineType::RaftKv => server::server::run_tikv(config, service_event_tx, service_event_rx), diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 3d1b58a6e75..facaa1514d8 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -104,12 +104,11 @@ pub struct Config { pub max_manual_flush_rate: f64, // When a peer is not responding for this time, leader will not keep entry cache for it. pub raft_entry_cache_life_time: ReadableDuration, - // Deprecated! The configuration has no effect. - // They are preserved for compatibility check. // When a peer is newly added, reject transferring leader to the peer for a while. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been removed. It has no effect"] pub raft_reject_transfer_leader_duration: ReadableDuration, /// Whether to disable checking quorum for the raft group. This will make @@ -329,27 +328,26 @@ pub struct Config { pub io_reschedule_concurrent_max_count: usize, pub io_reschedule_hotpot_duration: ReadableDuration, - // Deprecated! Batch is done in raft client. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been removed. Batch is done in raft client."] pub raft_msg_flush_interval: ReadableDuration, - // Deprecated! These configuration has been moved to Coprocessor. - // They are preserved for compatibility check. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to coprocessor.region_max_size."] pub region_max_size: ReadableSize, #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to coprocessor.region_split_size."] pub region_split_size: ReadableSize, - // Deprecated! The time to clean stale peer safely can be decided based on RocksDB snapshot - // sequence number. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been removed. The time to clean stale peer safely can be decided based on RocksDB snapshot sequence number."] pub clean_stale_peer_delay: ReadableDuration, // Interval to inspect the latency of raftstore for slow store detection. @@ -409,6 +407,7 @@ pub struct Config { } impl Default for Config { + #[allow(deprecated)] fn default() -> Config { Config { prevote: true, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index a4b6276a587..006750fd518 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -75,7 +75,9 @@ use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; use tikv::{ - config::{ConfigController, DbConfigManger, DbType, LogConfigManager, TikvConfig}, + config::{ + ConfigController, DbConfigManger, DbType, LogConfigManager, MemoryConfigManager, TikvConfig, + }, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, import::{ImportSstService, SstImporter}, @@ -506,6 +508,7 @@ where ); cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); + cfg_controller.register(tikv::config::Module::Memory, Box::new(MemoryConfigManager)); // Create cdc. let mut cdc_worker = Box::new(LazyWorker::new("cdc")); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 65d02f58c08..fdbb18b6205 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -73,7 +73,7 @@ use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ config::{ loop_registry, ConfigController, ConfigurableDb, DbConfigManger, DbType, LogConfigManager, - TikvConfig, + MemoryConfigManager, TikvConfig, }, coprocessor::{self, MEMTRACE_ROOT as MEMTRACE_COPROCESSOR}, coprocessor_v2, @@ -441,6 +441,7 @@ where ); cfg_controller.register(tikv::config::Module::Log, Box::new(LogConfigManager)); + cfg_controller.register(tikv::config::Module::Memory, Box::new(MemoryConfigManager)); let lock_mgr = LockManager::new(&self.core.config.pessimistic_txn); cfg_controller.register( diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index b758b9e39df..b11ffbc45b6 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -245,12 +245,10 @@ pub fn initial_metric(cfg: &MetricConfig) { pub fn overwrite_config_with_cmd_args(config: &mut TikvConfig, matches: &ArgMatches<'_>) { if let Some(level) = matches.value_of("log-level") { config.log.level = logger::get_level_by_string(level).unwrap().into(); - config.log_level = slog::Level::Info.into(); } if let Some(file) = matches.value_of("log-file") { config.log.file.filename = file.to_owned(); - config.log_file = "".to_owned(); } if let Some(addr) = matches.value_of("addr") { diff --git a/components/tikv_alloc/src/default.rs b/components/tikv_alloc/src/default.rs index 2674331c3cd..5133d76e172 100644 --- a/components/tikv_alloc/src/default.rs +++ b/components/tikv_alloc/src/default.rs @@ -8,6 +8,7 @@ use crate::AllocStats; pub fn dump_stats() -> String { String::new() } + pub fn dump_prof(_path: &str) -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } @@ -24,6 +25,14 @@ pub fn deactivate_prof() -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } +pub fn set_prof_sample(_rate: u64) -> ProfResult<()> { + Err(ProfError::MemProfilingNotEnabled) +} + +pub fn is_profiling_active() -> bool { + false +} + /// # Safety /// /// It is safe. The unsafe marker is just for matching the function signature. diff --git a/components/tikv_alloc/src/jemalloc.rs b/components/tikv_alloc/src/jemalloc.rs index 876afa9fcd5..245f6280b71 100644 --- a/components/tikv_alloc/src/jemalloc.rs +++ b/components/tikv_alloc/src/jemalloc.rs @@ -133,7 +133,7 @@ pub fn remove_thread_memory_accessor() { use std::thread::ThreadId; -pub use self::profiling::{activate_prof, deactivate_prof, dump_prof}; +pub use self::profiling::*; pub fn dump_stats() -> String { let mut buf = Vec::with_capacity(1024); @@ -311,6 +311,21 @@ mod profiling { // C string should end with a '\0'. const PROF_ACTIVE: &[u8] = b"prof.active\0"; const PROF_DUMP: &[u8] = b"prof.dump\0"; + const PROF_RESET: &[u8] = b"prof.reset\0"; + const OPT_PROF: &[u8] = b"opt.prof\0"; + + pub fn set_prof_sample(rate: u64) -> ProfResult<()> { + let rate = (rate as f64).log2().ceil() as usize; + unsafe { + if let Err(e) = tikv_jemalloc_ctl::raw::write(PROF_RESET, rate) { + return Err(ProfError::JemallocError(format!( + "failed to set prof sample: {}", + e + ))); + } + } + Ok(()) + } pub fn activate_prof() -> ProfResult<()> { unsafe { @@ -351,22 +366,44 @@ mod profiling { Ok(()) } + pub fn is_profiling_active() -> bool { + match unsafe { tikv_jemalloc_ctl::raw::read(PROF_ACTIVE) } { + Err(e) => { + panic!("is_profiling_active: {:?}", e); + } + Ok(prof) => prof, + } + } + + pub fn is_profiling_enabled() -> bool { + match unsafe { tikv_jemalloc_ctl::raw::read(OPT_PROF) } { + Err(e) => { + // Shouldn't be possible since mem-profiling is set + panic!("is_profiling_enabled: {:?}", e); + } + Ok(prof) => prof, + } + } + #[cfg(test)] mod tests { use std::fs; use tempfile::Builder; - const OPT_PROF: &[u8] = b"opt.prof\0"; + use super::*; - fn is_profiling_on() -> bool { - match unsafe { tikv_jemalloc_ctl::raw::read(OPT_PROF) } { - Err(e) => { - // Shouldn't be possible since mem-profiling is set - panic!("is_profiling_on: {:?}", e); - } - Ok(prof) => prof, - } + #[test] + #[ignore = "#ifdef MALLOC_CONF"] + fn test_profiling_active() { + // Make sure somebody has turned on profiling + assert!(is_profiling_enabled(), "set MALLOC_CONF=prof:true"); + activate_prof().unwrap(); + assert!(is_profiling_active()); + deactivate_prof().unwrap(); + assert!(!is_profiling_active()); + + super::set_prof_sample(512 * 1024 * 1024).unwrap(); } // Only trigger this test with jemallocs `opt.prof` set to @@ -382,7 +419,7 @@ mod profiling { #[ignore = "#ifdef MALLOC_CONF"] fn test_profiling_memory_ifdef_malloc_conf() { // Make sure somebody has turned on profiling - assert!(is_profiling_on(), "set MALLOC_CONF=prof:true"); + assert!(is_profiling_enabled(), "set MALLOC_CONF=prof:true"); let dir = Builder::new() .prefix("test_profiling_memory") @@ -391,11 +428,11 @@ mod profiling { let os_path = dir.path().to_path_buf().join("test1.dump").into_os_string(); let path = os_path.into_string().unwrap(); - super::dump_prof(&path).unwrap(); + dump_prof(&path).unwrap(); let os_path = dir.path().to_path_buf().join("test2.dump").into_os_string(); let path = os_path.into_string().unwrap(); - super::dump_prof(&path).unwrap(); + dump_prof(&path).unwrap(); let files = fs::read_dir(dir.path()).unwrap().count(); assert_eq!(files, 2); @@ -431,4 +468,10 @@ mod profiling { pub fn deactivate_prof() -> ProfResult<()> { Err(ProfError::MemProfilingNotEnabled) } + pub fn set_prof_sample(_rate: u64) -> ProfResult<()> { + Err(ProfError::MemProfilingNotEnabled) + } + pub fn is_profiling_active() -> bool { + false + } } diff --git a/etc/config-template.toml b/etc/config-template.toml index 3c8a6015910..3e55004feb2 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -83,6 +83,18 @@ ## maximum number of old log files to retain # max-backups = 0 +[memory] +## Whether enable the heap profiling which may have a bit performance overhead about 2% for the +## default sample rate. +# enable-heap-profiling = true + +## Average interval between allocation samples, as measured in bytes of allocation activity. +## Increasing the sampling interval decreases profile fidelity, but also decreases the +## computational overhead. +## The default sample interval is 512 KB. It only accepts power of two, otherwise it will be +## rounded up to the next power of two. +# profiling-sample-per-bytes = "512KB" + ## Configurations for the single thread pool serving read requests. [readpool.unified] ## The minimal working thread count of the thread pool. diff --git a/src/config/mod.rs b/src/config/mod.rs index 237ac3c7a72..b192a7ac5f7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -82,6 +82,7 @@ use crate::{ server::{ gc_worker::{GcConfig, RawCompactionFilterFactory, WriteCompactionFilterFactory}, lock_manager::Config as PessimisticTxnConfig, + status_server::HEAP_PROFILE_ACTIVE, ttl::TtlCompactionFilterFactory, Config as ServerConfig, CONFIG_ROCKSDB_GAUGE, }, @@ -1263,10 +1264,10 @@ pub struct DbConfig { #[serde(with = "rocks_config::rate_limiter_mode_serde")] #[online_config(skip)] pub rate_limiter_mode: DBRateLimiterMode, - // deprecated. use rate_limiter_auto_tuned. - #[online_config(skip)] + #[online_config(hidden)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "The configuration has been removed. Use `rate_limiter_auto_tuned` instead"] pub auto_tuned: Option, pub rate_limiter_auto_tuned: bool, pub bytes_per_sync: ReadableSize, @@ -1318,6 +1319,7 @@ pub struct DbResources { } impl Default for DbConfig { + #[allow(deprecated)] fn default() -> DbConfig { DbConfig { wal_recovery_mode: DBRecoveryMode::PointInTime, @@ -2965,13 +2967,15 @@ pub struct CdcConfig { pub old_value_cache_memory_quota: ReadableSize, // Deprecated! preserved for compatibility check. - #[online_config(skip)] + #[online_config(hidden)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "The configuration has been removed."] pub old_value_cache_size: usize, } impl Default for CdcConfig { + #[allow(deprecated)] fn default() -> Self { Self { min_ts_interval: ReadableDuration::secs(1), @@ -3211,6 +3215,72 @@ impl ConfigManager for LogConfigManager { } } +#[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct MemoryConfig { + // Whether enable the heap profiling which may have a bit performance overhead about 2% for the + // default sample rate. + pub enable_heap_profiling: bool, + + // Average interval between allocation samples, as measured in bytes of allocation activity. + // Increasing the sampling interval decreases profile fidelity, but also decreases the + // computational overhead. + // The default sample interval is 512 KB. It only accepts power of two, otherwise it will be + // rounded up to the next power of two. + pub profiling_sample_per_bytes: ReadableSize, +} + +impl Default for MemoryConfig { + fn default() -> Self { + Self { + enable_heap_profiling: true, + profiling_sample_per_bytes: ReadableSize::kb(512), + } + } +} + +impl MemoryConfig { + pub fn init(&self) { + if self.enable_heap_profiling { + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + if let Err(e) = tikv_alloc::activate_prof() { + error!("failed to enable heap profiling"; "err" => ?e); + return; + } + *activate = Some(None); + tikv_alloc::set_prof_sample(self.profiling_sample_per_bytes.0).unwrap(); + } + } +} + +pub struct MemoryConfigManager; + +impl ConfigManager for MemoryConfigManager { + fn dispatch(&mut self, changes: ConfigChange) -> CfgResult<()> { + if let Some(ConfigValue::Bool(enable)) = changes.get("enable_heap_profiling") { + if *enable { + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + // already enabled by HTTP API, do nothing + if activate.is_none() { + tikv_alloc::activate_prof()?; + *activate = Some(None); + } + } else { + let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); + tikv_alloc::deactivate_prof()?; + *activate = None; + } + } + + if let Some(ConfigValue::Size(sample_rate)) = changes.get("profiling_sample_per_bytes") { + tikv_alloc::set_prof_sample(*sample_rate).unwrap(); + } + info!("update memory config"; "config" => ?changes); + Ok(()) + } +} + #[derive(Clone, Serialize, Deserialize, PartialEq, Debug, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -3261,21 +3331,29 @@ pub struct TikvConfig { #[online_config(hidden)] pub cfg_path: String, - // Deprecated! These configuration has been moved to LogConfig. - // They are preserved for compatibility check. #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.level."] pub log_level: LogLevel, #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.file.filename."] pub log_file: String, #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.format."] pub log_format: LogFormat, - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.file.max_days."] pub log_rotation_timespan: ReadableDuration, #[doc(hidden)] - #[online_config(skip)] + #[online_config(hidden)] + #[serde(skip_serializing)] + #[deprecated = "The configuration has been moved to log.file.max_size."] pub log_rotation_size: ReadableSize, #[online_config(skip)] @@ -3306,6 +3384,9 @@ pub struct TikvConfig { #[online_config(submodule)] pub log: LogConfig, + #[online_config(submodule)] + pub memory: MemoryConfig, + #[online_config(submodule)] pub quota: QuotaConfig, @@ -3383,6 +3464,7 @@ pub struct TikvConfig { } impl Default for TikvConfig { + #[allow(deprecated)] fn default() -> TikvConfig { TikvConfig { cfg_path: "".to_owned(), @@ -3399,6 +3481,7 @@ impl Default for TikvConfig { memory_usage_limit: None, memory_usage_high_water: 0.9, log: LogConfig::default(), + memory: MemoryConfig::default(), quota: QuotaConfig::default(), readpool: ReadPoolConfig::default(), server: ServerConfig::default(), @@ -3777,6 +3860,7 @@ impl TikvConfig { // As the init of `logger` is very early, this adjust needs to be separated and // called immediately after parsing the command line. + #[allow(deprecated)] pub fn logger_compatible_adjust(&mut self) { let default_tikv_cfg = TikvConfig::default(); let default_log_cfg = LogConfig::default(); @@ -3828,6 +3912,7 @@ impl TikvConfig { } } + #[allow(deprecated)] pub fn compatible_adjust(&mut self) { let default_raft_store = RaftstoreConfig::default(); let default_coprocessor = CopConfig::default(); @@ -4435,6 +4520,7 @@ pub enum Module { BackupStream, Quota, Log, + Memory, Unknown(String), } @@ -4463,6 +4549,7 @@ impl From<&str> for Module { "resource_metering" => Module::ResourceMetering, "quota" => Module::Quota, "log" => Module::Log, + "memory" => Module::Memory, n => Module::Unknown(n.to_owned()), } } @@ -4766,7 +4853,7 @@ mod tests { assert_eq!(last_cfg_metadata.modified().unwrap(), first_modified); // write to file when config is the inequivalent of last one. - cfg.log_level = slog::Level::Warning.into(); + cfg.log.level = slog::Level::Warning.into(); persist_config(&cfg).unwrap(); last_cfg_metadata = last_cfg_path.metadata().unwrap(); assert_ne!(last_cfg_metadata.modified().unwrap(), first_modified); @@ -5364,7 +5451,7 @@ mod tests { } #[test] - fn test_change_logconfig() { + fn test_change_log_config() { let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); let cfg_controller = ConfigController::new(cfg); @@ -5386,6 +5473,37 @@ mod tests { ); } + #[test] + #[cfg(feature = "mem-profiling")] + fn test_change_memory_config() { + let (cfg, _dir) = TikvConfig::with_tmp().unwrap(); + let cfg_controller = ConfigController::new(cfg); + + cfg_controller.register(Module::Memory, Box::new(MemoryConfigManager)); + cfg_controller + .update_config("memory.enable_heap_profiling", "false") + .unwrap(); + assert_eq!(tikv_alloc::is_profiling_active(), false); + cfg_controller + .update_config("memory.enable_heap_profiling", "true") + .unwrap(); + assert_eq!(tikv_alloc::is_profiling_active(), true); + + cfg_controller + .update_config("memory.profiling_sample_per_bytes", "1MB") + .unwrap(); + assert_eq!( + cfg_controller + .get_current() + .memory + .profiling_sample_per_bytes, + ReadableSize::mb(1), + ); + cfg_controller + .update_config("memory.profiling_sample_per_bytes", "invalid") + .unwrap_err(); + } + #[test] fn test_dispatch_titan_blob_run_mode_config() { let mut cfg = TikvConfig::default(); diff --git a/src/server/config.rs b/src/server/config.rs index 013d1a66238..4e66e5802c0 100644 --- a/src/server/config.rs +++ b/src/server/config.rs @@ -187,26 +187,27 @@ pub struct Config { #[online_config(skip)] pub labels: HashMap, - // deprecated. use readpool.coprocessor.xx_concurrency. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to readpool.coprocessor.*_concurrency."] pub end_point_concurrency: Option, - // deprecated. use readpool.coprocessor.stack_size. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to readpool.coprocessor.stack_size."] pub end_point_stack_size: Option, - // deprecated. use readpool.coprocessor.max_tasks_per_worker_xx. #[doc(hidden)] #[serde(skip_serializing)] - #[online_config(skip)] + #[online_config(hidden)] + #[deprecated = "The configuration has been moved to readpool.coprocessor.max_tasks_per_worker_*."] pub end_point_max_tasks: Option, } impl Default for Config { + #[allow(deprecated)] fn default() -> Config { let cpu_num = SysQuota::cpu_cores_quota(); let background_thread_count = if cpu_num > 16.0 { 3 } else { 2 }; diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index b76454ffab8..60b267a6d94 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -40,6 +40,7 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; +pub use profile::HEAP_PROFILE_ACTIVE; use profile::*; use prometheus::TEXT_FORMAT; use regex::Regex; diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 3941c6c12b6..dbf819b35fe 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -37,7 +37,7 @@ lazy_static! { // If it's some it means there are already a CPU profiling. static ref CPU_PROFILE_ACTIVE: Mutex> = Mutex::new(None); // If it's some it means there are already a heap profiling. The channel is used to deactivate a profiling. - static ref HEAP_PROFILE_ACTIVE: Mutex>, TempDir)>> = Mutex::new(None); + pub static ref HEAP_PROFILE_ACTIVE: Mutex, TempDir)>>> = Mutex::new(None); // To normalize thread names. static ref THREAD_NAME_RE: Regex = @@ -129,7 +129,7 @@ where let on_start = move || { let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); assert!(activate.is_none()); - *activate = Some((Some(tx), dir)); + *activate = Some(Some((tx, dir))); activate_prof().map_err(|e| format!("activate_prof: {}", e))?; callback(); info!("periodical heap profiling is started"); @@ -168,9 +168,11 @@ where pub fn deactivate_heap_profile() -> bool { let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); match activate.as_mut() { - Some((tx, _)) => { - if let Some(tx) = tx.take() { + Some(tx) => { + if let Some((tx, _)) = tx.take() { let _ = tx.send(()); + } else { + *activate = None; } true } @@ -277,7 +279,7 @@ pub fn heap_profiles_dir() -> Option { .lock() .unwrap() .as_ref() - .map(|(_, dir)| dir.path().to_owned()) + .and_then(|v| v.as_ref().map(|(_, dir)| dir.path().to_owned())) } pub fn list_heap_profiles() -> Result, String> { diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 1239aa53fb8..2ab4ce5cc09 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -65,7 +65,6 @@ fn read_file_in_project_dir(path: &str) -> String { #[test] fn test_serde_custom_tikv_config() { let mut value = TikvConfig::default(); - value.log_rotation_timespan = ReadableDuration::days(1); value.log.level = Level::Critical.into(); value.log.file.filename = "foo".to_owned(); value.log.format = LogFormat::Json; @@ -77,6 +76,8 @@ fn test_serde_custom_tikv_config() { value.abort_on_panic = true; value.memory_usage_limit = Some(ReadableSize::gb(10)); value.memory_usage_high_water = 0.65; + value.memory.enable_heap_profiling = false; + value.memory.profiling_sample_per_bytes = ReadableSize::mb(1); value.server = ServerConfig { cluster_id: 0, // KEEP IT ZERO, it is skipped by serde. addr: "example.com:443".to_owned(), @@ -103,9 +104,6 @@ fn test_serde_custom_tikv_config() { grpc_stream_initial_window_size: ReadableSize(12_345), grpc_keepalive_time: ReadableDuration::secs(3), grpc_keepalive_timeout: ReadableDuration::secs(60), - end_point_concurrency: None, - end_point_max_tasks: None, - end_point_stack_size: None, end_point_recursion_limit: 100, end_point_stream_channel_size: 16, end_point_batch_row_limit: 64, @@ -125,6 +123,7 @@ fn test_serde_custom_tikv_config() { forward_max_connections_per_address: 5, reject_messages_on_memory_ratio: 0.8, simplify_metrics: false, + ..Default::default() }; value.readpool = ReadPoolConfig { unified: UnifiedReadPoolConfig { @@ -191,11 +190,9 @@ fn test_serde_custom_tikv_config() { raft_engine_purge_interval: ReadableDuration::minutes(20), max_manual_flush_rate: 5.0, raft_entry_cache_life_time: ReadableDuration::secs(12), - raft_reject_transfer_leader_duration: ReadableDuration::secs(3), split_region_check_tick_interval: ReadableDuration::secs(12), region_split_check_diff: Some(ReadableSize::mb(20)), region_compact_check_interval: ReadableDuration::secs(12), - clean_stale_peer_delay: ReadableDuration::secs(0), region_compact_check_step: Some(1_234), region_compact_min_tombstones: 999, region_compact_tombstones_percent: 33, @@ -231,8 +228,6 @@ fn test_serde_custom_tikv_config() { use_delete_range: true, snap_generator_pool_size: 2, cleanup_import_sst_interval: ReadableDuration::minutes(12), - region_max_size: ReadableSize(0), - region_split_size: ReadableSize(0), local_read_batch_size: 33, apply_batch_system, store_batch_system, @@ -253,7 +248,6 @@ fn test_serde_custom_tikv_config() { io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), report_min_resolved_ts_interval: ReadableDuration::millis(233), - raft_msg_flush_interval: ReadableDuration::micros(250), check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), @@ -272,6 +266,7 @@ fn test_serde_custom_tikv_config() { unsafe_disable_check_quorum: false, periodic_full_compact_start_times: ReadableSchedule::default(), periodic_full_compact_start_max_cpu: 0.1, + ..Default::default() }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { @@ -318,7 +313,6 @@ fn test_serde_custom_tikv_config() { rate_bytes_per_sec: ReadableSize::kb(1), rate_limiter_refill_period: ReadableDuration::millis(10), rate_limiter_mode: DBRateLimiterMode::AllIo, - auto_tuned: None, rate_limiter_auto_tuned: false, bytes_per_sync: ReadableSize::mb(1), wal_bytes_per_sync: ReadableSize::kb(32), @@ -616,6 +610,7 @@ fn test_serde_custom_tikv_config() { write_buffer_limit: None, }, titan: titan_db_config.clone(), + ..Default::default() }; value.raftdb = RaftDbConfig { info_log_level: LogLevel::Info, @@ -846,7 +841,6 @@ fn test_serde_custom_tikv_config() { }; value.cdc = CdcConfig { min_ts_interval: ReadableDuration::secs(4), - old_value_cache_size: 0, hibernate_regions_compatible: false, incremental_scan_threads: 3, incremental_scan_concurrency: 4, @@ -856,6 +850,7 @@ fn test_serde_custom_tikv_config() { tso_worker_threads: 2, old_value_cache_memory_quota: ReadableSize::mb(14), sink_memory_quota: ReadableSize::mb(7), + ..Default::default() }; value.resolved_ts = ResolvedTsConfig { enable: true, diff --git a/tests/integrations/config/test-cache-compatible.toml b/tests/integrations/config/test-cache-compatible.toml index 9fce88833ed..f91b5cdafc3 100644 --- a/tests/integrations/config/test-cache-compatible.toml +++ b/tests/integrations/config/test-cache-compatible.toml @@ -2,6 +2,8 @@ [log.file] +[memory] + [readpool.coprocessor] [readpool.storage] diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index ef7a4809168..a9772e285af 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -1,9 +1,5 @@ -log-level = "info" -log-file = "" -log-format = "text" slow-log-file = "slow_foo" slow-log-threshold = "1s" -log-rotation-timespan = "1d" panic-when-unexpected-key-or-data = true abort-on-panic = true memory-usage-limit = "10GB" @@ -19,6 +15,10 @@ max-size = 1 max-backups = 2 max-days = 3 +[memory] +enable-heap-profiling = false +profiling-sample-per-bytes = "1MB" + [readpool.unified] min-thread-count = 5 max-thread-count = 10 diff --git a/tests/integrations/config/test-default.toml b/tests/integrations/config/test-default.toml index 23e53b9daf3..ca1abc0081b 100644 --- a/tests/integrations/config/test-default.toml +++ b/tests/integrations/config/test-default.toml @@ -2,6 +2,8 @@ [log.file] +[memory] + [readpool.unified] [readpool.storage] From 5f2f5e7e4d6266e4df891dd6c49f2153b6bcfdff Mon Sep 17 00:00:00 2001 From: qupeng Date: Fri, 10 Nov 2023 17:26:13 +0800 Subject: [PATCH 1003/1149] cdc: notify pending tasks if associated regions change (#15947) close tikv/tikv#15910 Signed-off-by: qupeng Signed-off-by: qupeng Co-authored-by: Ping Yu --- components/cdc/src/endpoint.rs | 7 +++- components/cdc/src/initializer.rs | 41 +++++++++++++++---- components/cdc/src/observer.rs | 30 ++++++++------ .../cdc/tests/failpoints/test_endpoint.rs | 26 ++++++++++++ .../cdc/tests/failpoints/test_register.rs | 6 ++- 5 files changed, 86 insertions(+), 24 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 82233af8f14..e62650c77c6 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -809,7 +809,6 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint { CDC_SCAN_TASKS.with_label_values(&["finish"]).inc(); } diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 2882d2e975e..ef39a693e3e 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -90,7 +90,6 @@ pub(crate) struct Initializer { pub(crate) request_id: u64, pub(crate) checkpoint_ts: TimeStamp, - pub(crate) scan_concurrency_semaphore: Arc, pub(crate) scan_speed_limiter: Limiter, pub(crate) fetch_speed_limiter: Limiter, @@ -110,9 +109,11 @@ impl Initializer { &mut self, change_observer: ChangeObserver, cdc_handle: T, + concurrency_semaphore: Arc, memory_quota: Arc, ) -> Result<()> { fail_point!("cdc_before_initialize"); + let _permit = concurrency_semaphore.acquire().await; // To avoid holding too many snapshots and holding them too long, // we need to acquire scan concurrency permit before taking snapshot. @@ -188,8 +189,6 @@ impl Initializer { region: Region, memory_quota: Arc, ) -> Result<()> { - let scan_concurrency_semaphore = self.scan_concurrency_semaphore.clone(); - let _permit = scan_concurrency_semaphore.acquire().await; CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); defer!(CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec()); @@ -655,7 +654,6 @@ mod tests { conn_id: ConnId::new(), request_id: 0, checkpoint_ts: 1.into(), - scan_concurrency_semaphore: Arc::new(Semaphore::new(1)), scan_speed_limiter: Limiter::new(scan_limit as _), fetch_speed_limiter: Limiter::new(fetch_limit as _), max_scan_batch_bytes: 1024 * 1024, @@ -1034,26 +1032,51 @@ mod tests { let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); let raft_router = CdcRaftRouter(MockRaftStoreRouter::new()); + let concurrency_semaphore = Arc::new(Semaphore::new(1)); let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); initializer.downstream_state.store(DownstreamState::Stopped); - block_on(initializer.initialize(change_cmd, raft_router.clone(), memory_quota.clone())) - .unwrap_err(); + block_on(initializer.initialize( + change_cmd, + raft_router.clone(), + concurrency_semaphore.clone(), + memory_quota.clone(), + )) + .unwrap_err(); + + let (tx, rx) = sync_channel(1); + let concurrency_semaphore_ = concurrency_semaphore.clone(); + pool.spawn(async move { + let _permit = concurrency_semaphore_.acquire().await; + tx.send(()).unwrap(); + tx.send(()).unwrap(); + tx.send(()).unwrap(); + }); + rx.recv_timeout(Duration::from_millis(200)).unwrap(); let (tx1, rx1) = sync_channel(1); let change_cmd = ChangeObserver::from_cdc(1, ObserveHandle::new()); pool.spawn(async move { // Migrated to 2021 migration. This let statement is probably not needed, see // https://doc.rust-lang.org/edition-guide/rust-2021/disjoint-capture-in-closures.html + let _ = ( + &initializer, + &change_cmd, + &raft_router, + &concurrency_semaphore, + ); let res = initializer - .initialize(change_cmd, raft_router, memory_quota) + .initialize(change_cmd, raft_router, concurrency_semaphore, memory_quota) .await; tx1.send(res).unwrap(); }); + // Must timeout because there is no enough permit. + rx1.recv_timeout(Duration::from_millis(200)).unwrap_err(); - // Shouldn't timeout, gets an error instead. + // Release the permit + rx.recv_timeout(Duration::from_millis(200)).unwrap(); let res = rx1.recv_timeout(Duration::from_millis(200)).unwrap(); - assert!(res.is_err()); + res.unwrap_err(); worker.stop(); } diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index aac2842e404..cfcedfeb59d 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -177,20 +177,26 @@ impl RegionChangeObserver for CdcObserver { event: RegionChangeEvent, _: StateRole, ) { - if let RegionChangeEvent::Destroy = event { - let region_id = ctx.region().get_id(); - if let Some(observe_id) = self.is_subscribed(region_id) { - // Unregister all downstreams. - let store_err = RaftStoreError::RegionNotFound(region_id); - let deregister = Deregister::Delegate { - region_id, - observe_id, - err: CdcError::request(store_err.into()), - }; - if let Err(e) = self.sched.schedule(Task::Deregister(deregister)) { - error!("cdc schedule cdc task failed"; "error" => ?e); + match event { + RegionChangeEvent::Destroy + | RegionChangeEvent::Update( + RegionChangeReason::Split | RegionChangeReason::CommitMerge, + ) => { + let region_id = ctx.region().get_id(); + if let Some(observe_id) = self.is_subscribed(region_id) { + // Unregister all downstreams. + let store_err = RaftStoreError::RegionNotFound(region_id); + let deregister = Deregister::Delegate { + region_id, + observe_id, + err: CdcError::request(store_err.into()), + }; + if let Err(e) = self.sched.schedule(Task::Deregister(deregister)) { + error!("cdc schedule cdc task failed"; "error" => ?e); + } } } + _ => {} } } } diff --git a/components/cdc/tests/failpoints/test_endpoint.rs b/components/cdc/tests/failpoints/test_endpoint.rs index f7cc387625d..42977cc3856 100644 --- a/components/cdc/tests/failpoints/test_endpoint.rs +++ b/components/cdc/tests/failpoints/test_endpoint.rs @@ -569,3 +569,29 @@ fn test_cdc_stream_multiplexing() { } assert!(request_2_ready); } + +// This case tests pending regions can still get region split/merge +// notifications. +#[test] +fn test_cdc_notify_pending_regions() { + let cluster = new_server_cluster(0, 1); + cluster.pd_client.disable_default_operator(); + let mut suite = TestSuiteBuilder::new().cluster(cluster).build(); + let region = suite.cluster.get_region(&[]); + let rid = region.id; + let (mut req_tx, _, receive_event) = new_event_feed_v2(suite.get_region_cdc_client(rid)); + + fail::cfg("cdc_before_initialize", "pause").unwrap(); + let mut req = suite.new_changedata_request(rid); + req.request_id = 1; + block_on(req_tx.send((req, WriteFlags::default()))).unwrap(); + + thread::sleep(Duration::from_millis(100)); + suite.cluster.must_split(®ion, b"x"); + let event = receive_event(false); + matches!( + event.get_events()[0].event, + Some(Event_oneof_event::Error(ref e)) if e.has_region_not_found(), + ); + fail::remove("cdc_before_initialize"); +} diff --git a/components/cdc/tests/failpoints/test_register.rs b/components/cdc/tests/failpoints/test_register.rs index 4558397f8a9..2b6be3744af 100644 --- a/components/cdc/tests/failpoints/test_register.rs +++ b/components/cdc/tests/failpoints/test_register.rs @@ -165,7 +165,11 @@ fn test_connections_register_impl() { let mut events = receive_event(false).events.to_vec(); match events.pop().unwrap().event.unwrap() { Event_oneof_event::Error(err) => { - assert!(err.has_epoch_not_match(), "{:?}", err); + assert!( + err.has_epoch_not_match() || err.has_region_not_found(), + "{:?}", + err + ); } other => panic!("unknown event {:?}", other), } From 91b35fb8d3f8507e8fcb4217ce1de5169d202764 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 10 Nov 2023 20:39:42 +0800 Subject: [PATCH 1004/1149] resource_control: support automatically tuning priority resource limiters (#15929) close tikv/tikv#15917 Signed-off-by: glorv --- components/resource_control/src/future.rs | 8 +- components/resource_control/src/lib.rs | 14 +- components/resource_control/src/metrics.rs | 6 + .../resource_control/src/resource_group.rs | 29 +- .../resource_control/src/resource_limiter.rs | 28 +- components/resource_control/src/worker.rs | 356 +++++++++++++++++- components/server/src/server.rs | 3 +- components/server/src/server2.rs | 3 +- components/tikv_util/src/yatp_pool/metrics.rs | 4 +- components/tikv_util/src/yatp_pool/mod.rs | 60 ++- src/read_pool.rs | 42 ++- src/server/service/kv.rs | 62 +-- src/storage/mod.rs | 23 +- src/storage/txn/sched_pool.rs | 30 +- src/storage/txn/scheduler.rs | 29 +- tests/failpoints/cases/test_storage.rs | 4 +- 16 files changed, 603 insertions(+), 98 deletions(-) diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index a935c3b41fa..53bca48b301 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -274,7 +274,13 @@ mod tests { .name_prefix("test") .build_future_pool(); - let resource_limiter = Arc::new(ResourceLimiter::new("".into(), f64::INFINITY, 1000.0, 0)); + let resource_limiter = Arc::new(ResourceLimiter::new( + "".into(), + f64::INFINITY, + 1000.0, + 0, + true, + )); fn spawn_and_wait(pool: &FuturePool, f: F, limiter: Arc) where diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 6cfd24914a1..a7b4cf03192 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. #![feature(test)] #![feature(local_key_cell_methods)] +#![feature(array_zip)] use std::sync::Arc; @@ -10,8 +11,8 @@ use serde::{Deserialize, Serialize}; mod resource_group; pub use resource_group::{ - ResourceConsumeType, ResourceController, ResourceGroupManager, TaskMetadata, - MIN_PRIORITY_UPDATE_INTERVAL, + priority_from_task_meta, ResourceConsumeType, ResourceController, ResourceGroupManager, + TaskMetadata, MIN_PRIORITY_UPDATE_INTERVAL, }; mod future; @@ -29,7 +30,9 @@ pub use channel::ResourceMetered; mod resource_limiter; pub use resource_limiter::ResourceLimiter; use tikv_util::worker::Worker; -use worker::{GroupQuotaAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION}; +use worker::{ + GroupQuotaAdjustWorker, PriorityLimiterAdjustWorker, BACKGROUND_LIMIT_ADJUST_DURATION, +}; mod metrics; pub mod worker; @@ -66,10 +69,13 @@ pub fn start_periodic_tasks( bg_worker.spawn_async_task(async move { resource_mgr_service_clone.watch_resource_groups().await; }); - // spawn a task to auto adjust background quota limiter. + // spawn a task to auto adjust background quota limiter and priority quota + // limiter. let mut worker = GroupQuotaAdjustWorker::new(mgr.clone(), io_bandwidth); + let mut priority_worker = PriorityLimiterAdjustWorker::new(mgr.clone()); bg_worker.spawn_interval_task(BACKGROUND_LIMIT_ADJUST_DURATION, move || { worker.adjust_quota(); + priority_worker.adjust(); }); // spawn a task to periodically upload resource usage statistics to PD. bg_worker.spawn_async_task(async move { diff --git a/components/resource_control/src/metrics.rs b/components/resource_control/src/metrics.rs index 16338f41c6c..c9404092501 100644 --- a/components/resource_control/src/metrics.rs +++ b/components/resource_control/src/metrics.rs @@ -22,6 +22,12 @@ lazy_static! { &["name"] ) .unwrap(); + pub static ref PRIORITY_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( + "tikv_resource_control_priority_quota_limit", + "The quota limiter for each priority in resource control", + &["priority"] + ) + .unwrap(); } pub fn deregister_metrics(name: &str) { diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index b7e7ca28705..b45a9833bb8 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -41,7 +41,6 @@ const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; /// The maximum RU quota that can be configured. const MAX_RU_QUOTA: u64 = i32::MAX as u64; -#[cfg(test)] const LOW_PRIORITY: u32 = 1; const MEDIUM_PRIORITY: u32 = 8; #[cfg(test)] @@ -57,7 +56,7 @@ pub enum ResourceConsumeType { IoBytes(u64), } -#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter)] +#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter, Debug)] #[repr(usize)] pub enum TaskPriority { High = 0, @@ -110,6 +109,7 @@ impl Default for ResourceGroupManager { f64::INFINITY, f64::INFINITY, 0, + false, )) }) .collect::>() @@ -206,6 +206,7 @@ impl ResourceGroupManager { f64::INFINITY, f64::INFINITY, version, + true, ))) }) } else { @@ -304,6 +305,14 @@ impl ResourceGroupManager { self.get_group_count() > 1 } + /// return the priority of target resource group. + #[inline] + pub fn get_resource_group_priority(&self, group: &str) -> u32 { + self.resource_groups + .get(group) + .map_or(LOW_PRIORITY, |g| g.group.priority) + } + // Always return the background resource limiter if any; // Only return the foregroup limiter when priority is enabled. pub fn get_resource_limiter( @@ -371,6 +380,11 @@ impl ResourceGroupManager { group_priority.unwrap_or(default_group.group.priority), ) } + + #[inline] + pub fn get_priority_resource_limiters(&self) -> [Arc; 3] { + self.priority_limiters.clone() + } } pub(crate) struct ResourceGroup { @@ -708,7 +722,7 @@ impl<'a> TaskMetadata<'a> { self.metadata.into_owned() } - fn override_priority(&self) -> u32 { + pub fn override_priority(&self) -> u32 { if self.metadata.is_empty() { return 0; } @@ -734,6 +748,15 @@ impl<'a> TaskMetadata<'a> { } } +// return the TaskPriority value from task metadata. +// This function is used for handling thread pool task waiting metrics. +pub fn priority_from_task_meta(meta: &[u8]) -> usize { + let priority = TaskMetadata::from_bytes(meta).override_priority(); + // mapping (high(15), medium(8), low(1)) -> (0, 1, 2) + debug_assert!(priority <= 16); + TaskPriority::from(priority) as usize +} + impl TaskPriorityProvider for ResourceController { fn priority_of(&self, extras: &yatp::queue::Extras) -> u64 { let metadata = TaskMetadata::from_bytes(extras.metadata()); diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index 8898b4eba23..bce6867ac2e 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -39,6 +39,8 @@ pub struct ResourceLimiter { name: String, version: u64, limiters: [QuotaLimiter; ResourceType::COUNT], + // whether the resource limiter is a background limiter or priority limiter. + is_background: bool, } impl std::fmt::Debug for ResourceLimiter { @@ -48,16 +50,27 @@ impl std::fmt::Debug for ResourceLimiter { } impl ResourceLimiter { - pub fn new(name: String, cpu_limit: f64, io_limit: f64, version: u64) -> Self { + pub fn new( + name: String, + cpu_limit: f64, + io_limit: f64, + version: u64, + is_background: bool, + ) -> Self { let cpu_limiter = QuotaLimiter::new(cpu_limit); let io_limiter = QuotaLimiter::new(io_limit); Self { name, version, limiters: [cpu_limiter, io_limiter], + is_background, } } + pub fn is_background(&self) -> bool { + self.is_background + } + pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { let cpu_dur = self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); @@ -86,7 +99,7 @@ impl ResourceLimiter { } pub(crate) fn get_limit_statistics(&self, ty: ResourceType) -> GroupStatistics { - let (total_consumed, total_wait_dur_us, read_consumed, write_consumed) = + let (total_consumed, total_wait_dur_us, read_consumed, write_consumed, request_count) = self.limiters[ty as usize].get_statistics(); GroupStatistics { version: self.version, @@ -94,6 +107,7 @@ impl ResourceLimiter { total_wait_dur_us, read_consumed, write_consumed, + request_count, } } } @@ -104,6 +118,7 @@ pub(crate) struct QuotaLimiter { total_wait_dur_us: AtomicU64, read_bytes: AtomicU64, write_bytes: AtomicU64, + req_count: AtomicU64, } impl QuotaLimiter { @@ -113,6 +128,7 @@ impl QuotaLimiter { total_wait_dur_us: AtomicU64::new(0), read_bytes: AtomicU64::new(0), write_bytes: AtomicU64::new(0), + req_count: AtomicU64::new(0), } } @@ -128,12 +144,13 @@ impl QuotaLimiter { self.limiter.set_speed_limit(limit); } - fn get_statistics(&self) -> (u64, u64, u64, u64) { + fn get_statistics(&self) -> (u64, u64, u64, u64, u64) { ( self.limiter.total_bytes_consumed() as u64, self.total_wait_dur_us.load(Ordering::Relaxed), self.read_bytes.load(Ordering::Relaxed), self.write_bytes.load(Ordering::Relaxed), + self.req_count.load(Ordering::Relaxed), ) } @@ -146,6 +163,7 @@ impl QuotaLimiter { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } + self.req_count.fetch_add(1, Ordering::Relaxed); dur } @@ -162,6 +180,7 @@ impl QuotaLimiter { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } + self.req_count.fetch_add(1, Ordering::Relaxed); dur } } @@ -173,6 +192,7 @@ pub struct GroupStatistics { pub total_wait_dur_us: u64, pub read_consumed: u64, pub write_consumed: u64, + pub request_count: u64, } impl std::ops::Sub for GroupStatistics { @@ -184,6 +204,7 @@ impl std::ops::Sub for GroupStatistics { total_wait_dur_us: self.total_wait_dur_us.saturating_sub(rhs.total_wait_dur_us), read_consumed: self.read_consumed.saturating_sub(rhs.read_consumed), write_consumed: self.write_consumed.saturating_sub(rhs.write_consumed), + request_count: self.request_count.saturating_sub(rhs.request_count), } } } @@ -198,6 +219,7 @@ impl std::ops::Div for GroupStatistics { total_wait_dur_us: (self.total_wait_dur_us as f64 / rhs) as u64, read_consumed: (self.read_consumed as f64 / rhs) as u64, write_consumed: (self.write_consumed as f64 / rhs) as u64, + request_count: (self.request_count as f64 / rhs) as u64, } } } diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 7bc76691e1f..79dea73d0ae 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -9,16 +9,19 @@ use std::{ }; use file_system::{fetch_io_bytes, IoBytes, IoType}; -use strum::EnumCount; +use prometheus::Histogram; +use strum::{EnumCount, IntoEnumIterator}; use tikv_util::{ + debug, sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, warn, + yatp_pool::metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC, }; use crate::{ metrics::*, - resource_group::ResourceGroupManager, + resource_group::{ResourceGroupManager, TaskPriority}, resource_limiter::{GroupStatistics, ResourceLimiter, ResourceType}, }; @@ -39,7 +42,7 @@ pub struct SysQuotaGetter { process_stat: ProcessStat, prev_io_stats: [IoBytes; IoType::COUNT], prev_io_ts: Instant, - io_bandwidth: u64, + io_bandwidth: f64, } impl ResourceStatsProvider for SysQuotaGetter { @@ -55,7 +58,7 @@ impl ResourceStatsProvider for SysQuotaGetter { } ResourceType::Io => { let mut stats = ResourceUsageStats { - total_quota: self.io_bandwidth as f64, + total_quota: self.io_bandwidth, current_used: 0.0, }; let now = Instant::now_coarse(); @@ -97,7 +100,7 @@ impl GroupQuotaAdjustWorker { process_stat: ProcessStat::cur_proc_stat().unwrap(), prev_io_stats: [IoBytes::default(); IoType::COUNT], prev_io_ts: Instant::now_coarse(), - io_bandwidth, + io_bandwidth: io_bandwidth as f64, }; Self::with_quota_getter(resource_ctl, resource_quota_getter) } @@ -295,6 +298,235 @@ struct GroupStats { expect_cost_rate: f64, } +/// PriorityLimiterAdjustWorker automically adjust the quota of each priority +/// limiter based on the statistics data during a certain period of time. +/// In general, caller should call this function in a fixed interval. +pub struct PriorityLimiterAdjustWorker { + resource_ctl: Arc, + trackers: [PriorityLimiterStatsTracker; 3], + resource_quota_getter: R, + last_adjust_time: Instant, + is_last_low_cpu: bool, + is_last_single_group: bool, +} + +impl PriorityLimiterAdjustWorker { + pub fn new(resource_ctl: Arc) -> Self { + let resource_quota_getter = SysQuotaGetter { + process_stat: ProcessStat::cur_proc_stat().unwrap(), + prev_io_stats: [IoBytes::default(); IoType::COUNT], + prev_io_ts: Instant::now_coarse(), + io_bandwidth: f64::INFINITY, + }; + Self::with_quota_getter(resource_ctl, resource_quota_getter) + } +} + +impl PriorityLimiterAdjustWorker { + fn with_quota_getter( + resource_ctl: Arc, + resource_quota_getter: R, + ) -> Self { + let priorities: [_; 3] = TaskPriority::iter().collect::>().try_into().unwrap(); + let trackers = resource_ctl + .get_priority_resource_limiters() + .zip(priorities) + .map(|(l, p)| PriorityLimiterStatsTracker::new(l, p.as_str())); + Self { + resource_ctl, + trackers, + resource_quota_getter, + last_adjust_time: Instant::now_coarse(), + is_last_low_cpu: true, + is_last_single_group: true, + } + } + pub fn adjust(&mut self) { + let now = Instant::now_coarse(); + let dur = now.saturating_duration_since(self.last_adjust_time); + if dur < Duration::from_secs(1) { + warn!("adjust duration too small, skip adjustment."; "dur" => ?dur); + return; + } + self.last_adjust_time = now; + + // fast path for only the default resource group which means resource + // control is not used at all. + let group_count = self.resource_ctl.get_group_count(); + if group_count == 1 { + if self.is_last_single_group { + return; + } + self.is_last_single_group = true; + self.trackers.iter().skip(1).for_each(|t| { + t.limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY) + }); + return; + } + self.is_last_single_group = false; + + let stats: [_; 3] = + std::array::from_fn(|i| self.trackers[i].get_and_update_last_stats(dur.as_secs_f64())); + + let process_cpu_stats = match self + .resource_quota_getter + .get_current_stats(ResourceType::Cpu) + { + Ok(s) => s, + Err(e) => { + warn!("get process total cpu failed; skip adjusment."; "err" => ?e); + return; + } + }; + + if process_cpu_stats.current_used < process_cpu_stats.total_quota * 0.5 { + if self.is_last_low_cpu { + return; + } + self.is_last_low_cpu = true; + self.trackers.iter().skip(1).for_each(|t| { + t.limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY); + // 0 represent infinity + PRIORITY_QUOTA_LIMIT_VEC + .get_metric_with_label_values(&[t.priority]) + .unwrap() + .set(0); + }); + return; + } + self.is_last_low_cpu = false; + + let total_reqs: u64 = stats.iter().map(|s| s.req_count).sum(); + let max_reqs = stats.iter().map(|s| s.req_count).max().unwrap(); + // there is only 1 active priority, do not restrict. + if total_reqs * 99 / 100 <= max_reqs { + self.trackers + .iter() + .skip(1) + .for_each(|t: &PriorityLimiterStatsTracker| { + t.limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY) + }); + return; + } + + let real_cpu_total: f64 = stats.iter().map(|s| s.cpu_secs).sum(); + let expect_pool_cpu_total = real_cpu_total * (process_cpu_stats.total_quota * 0.95) + / process_cpu_stats.current_used; + let mut limits = [0.0; 2]; + let level_expected: [_; 3] = + std::array::from_fn(|i| stats[i].cpu_secs + stats[i].wait_secs); + // substract the cpu time usage for priority high. + let mut expect_cpu_time_total = expect_pool_cpu_total - level_expected[0]; + + // still reserve a minimal cpu quota + let minimal_quota = process_cpu_stats.total_quota / MICROS_PER_SEC * 0.05; + for i in 1..self.trackers.len() { + if expect_cpu_time_total < minimal_quota { + expect_cpu_time_total = minimal_quota; + } + let limit = expect_cpu_time_total * MICROS_PER_SEC; + self.trackers[i] + .limiter + .get_limiter(ResourceType::Cpu) + .set_rate_limit(limit); + PRIORITY_QUOTA_LIMIT_VEC + .get_metric_with_label_values(&[self.trackers[i].priority]) + .unwrap() + .set(limit as i64); + limits[i - 1] = limit; + expect_cpu_time_total -= level_expected[i]; + } + debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, + "limits" => ?limits, "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); + } +} + +#[derive(Debug)] +struct LimiterStats { + // QuotaLimiter consumed cpu secs in total + cpu_secs: f64, + // QuotaLimiter waited secs in total. + wait_secs: f64, + // the total number of tasks that are scheduled. + req_count: u64, +} + +struct HistogramTracker { + metrics: Histogram, + last_sum: f64, + last_count: u64, +} + +impl HistogramTracker { + fn new(metrics: Histogram) -> Self { + let last_sum = metrics.get_sample_sum(); + let last_count = metrics.get_sample_count(); + Self { + metrics, + last_sum, + last_count, + } + } + + fn get_and_upate_statistics(&mut self) -> (f64, u64) { + let cur_sum = self.metrics.get_sample_sum(); + let cur_count = self.metrics.get_sample_count(); + let res = (cur_sum - self.last_sum, cur_count - self.last_count); + self.last_sum = cur_sum; + self.last_count = cur_count; + res + } +} + +struct PriorityLimiterStatsTracker { + priority: &'static str, + limiter: Arc, + last_stats: GroupStatistics, + // unified-read-pool and schedule-worker-pool wait duration metrics. + task_wait_dur_trakcers: [HistogramTracker; 2], +} + +impl PriorityLimiterStatsTracker { + fn new(limiter: Arc, priority: &'static str) -> Self { + let task_wait_dur_trakcers = + ["unified-read-pool", "sched-worker-priority"].map(|pool_name| { + HistogramTracker::new( + YATP_POOL_SCHEDULE_WAIT_DURATION_VEC + .get_metric_with_label_values(&[pool_name, priority]) + .unwrap(), + ) + }); + let last_stats = limiter.get_limit_statistics(ResourceType::Cpu); + Self { + priority, + limiter, + last_stats, + task_wait_dur_trakcers, + } + } + + fn get_and_update_last_stats(&mut self, dur_secs: f64) -> LimiterStats { + let cur_stats = self.limiter.get_limit_statistics(ResourceType::Cpu); + let stats_delta = (cur_stats - self.last_stats) / dur_secs; + self.last_stats = cur_stats; + let wait_stats: [_; 2] = + std::array::from_fn(|i| self.task_wait_dur_trakcers[i].get_and_upate_statistics()); + let schedule_wait_dur_secs = wait_stats.iter().map(|s| s.0).sum::() / dur_secs; + LimiterStats { + cpu_secs: stats_delta.total_consumed as f64 / MICROS_PER_SEC, + wait_secs: stats_delta.total_wait_dur_us as f64 / MICROS_PER_SEC + + schedule_wait_dur_secs, + req_count: stats_delta.request_count, + } + } +} + #[cfg(test)] mod tests { use std::time::Duration; @@ -658,4 +890,118 @@ mod tests { }, ); } + + #[test] + fn test_adjust_priority_resource_limiter() { + let resource_ctl = Arc::new(ResourceGroupManager::default()); + let priority_limiters = resource_ctl.get_priority_resource_limiters(); + let test_provider = TestResourceStatsProvider::new(8.0, f64::INFINITY); + let mut worker = + PriorityLimiterAdjustWorker::with_quota_getter(resource_ctl.clone(), test_provider); + + let reset_quota = |worker: &mut PriorityLimiterAdjustWorker, + cpu: f64| { + worker.resource_quota_getter.cpu_used = cpu; + worker.last_adjust_time = Instant::now_coarse() - Duration::from_secs(10); + priority_limiters[1] + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY); + priority_limiters[2] + .get_limiter(ResourceType::Cpu) + .set_rate_limit(f64::INFINITY); + }; + + fn check(val: f64, expected: f64) { + assert!( + (val.is_infinite() && expected.is_infinite()) + || (expected * 0.99 < val && val < expected * 1.01), + "actual: {}, expected: {}", + val, + expected + ); + } + + let check_limiter = |high: f64, medium: f64, low: f64| { + check( + priority_limiters[0] + .get_limiter(ResourceType::Cpu) + .get_rate_limit(), + high * MICROS_PER_SEC, + ); + check( + priority_limiters[1] + .get_limiter(ResourceType::Cpu) + .get_rate_limit(), + medium * MICROS_PER_SEC, + ); + check( + priority_limiters[2] + .get_limiter(ResourceType::Cpu) + .get_rate_limit(), + low * MICROS_PER_SEC, + ); + }; + + // only default group, always return infinity. + reset_quota(&mut worker, 6.4); + priority_limiters[1].consume(Duration::from_secs(50), IoBytes::default()); + worker.adjust(); + check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); + + let rg1 = new_resource_group_ru("test_high".into(), 1000, 16); + resource_ctl.add_resource_group(rg1); + let rg2 = new_resource_group_ru("test_low".into(), 2000, 1); + resource_ctl.add_resource_group(rg2); + + reset_quota(&mut worker, 6.4); + priority_limiters[1].consume(Duration::from_secs(64), IoBytes::default()); + worker.adjust(); + check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); + + reset_quota(&mut worker, 6.4); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[1].consume(Duration::from_millis(400), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 1.2); + + reset_quota(&mut worker, 6.4); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(120), IoBytes::default()); + priority_limiters[1].consume(Duration::from_millis(200), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 2.6, 0.6); + + reset_quota(&mut worker, 6.4); + for _i in 0..100 { + priority_limiters[2].consume(Duration::from_millis(200), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); + + reset_quota(&mut worker, 8.0); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[1].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[2].consume(Duration::from_millis(320), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 2.8); + + reset_quota(&mut worker, 6.0); + for _i in 0..100 { + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); + priority_limiters[2].consume(Duration::from_millis(360), IoBytes::default()); + } + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 5.2); + + // duration too small, unchanged. + worker.resource_quota_getter.cpu_used = 6.0; + worker.last_adjust_time = Instant::now_coarse() - Duration::from_millis(500); + worker.adjust(); + check_limiter(f64::INFINITY, 5.2, 5.2); + } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 006750fd518..72e09a9f8d8 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -70,7 +70,7 @@ use raftstore::{ RaftRouterCompactedEventSender, }; use resolved_ts::{LeadershipResolver, Task}; -use resource_control::ResourceGroupManager; +use resource_control::{priority_from_task_meta, ResourceGroupManager}; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; @@ -558,6 +558,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), + Some(Arc::new(priority_from_task_meta)), )) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index fdbb18b6205..eab384871e6 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -67,7 +67,7 @@ use raftstore_v2::{ StateStorage, }; use resolved_ts::Task; -use resource_control::ResourceGroupManager; +use resource_control::{priority_from_task_meta, ResourceGroupManager}; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ @@ -469,6 +469,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), + Some(Arc::new(priority_from_task_meta)), )) } else { None diff --git a/components/tikv_util/src/yatp_pool/metrics.rs b/components/tikv_util/src/yatp_pool/metrics.rs index 8ae1aa8910e..efb1379dcc7 100644 --- a/components/tikv_util/src/yatp_pool/metrics.rs +++ b/components/tikv_util/src/yatp_pool/metrics.rs @@ -19,8 +19,8 @@ lazy_static! { pub static ref YATP_POOL_SCHEDULE_WAIT_DURATION_VEC: HistogramVec = register_histogram_vec!( "tikv_yatp_pool_schedule_wait_duration", "Histogram of yatp pool schedule wait duration.", - &["name"], - exponential_buckets(1e-5, 4.0, 12).unwrap() // 10us ~ 41s + &["name", "priority"], + exponential_buckets(1e-5, 2.0, 18).unwrap() // 10us ~ 2.5s ) .unwrap(); } diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 2752f3f3c51..3cb237bad15 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -1,14 +1,14 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. mod future_pool; -mod metrics; +pub mod metrics; use std::sync::Arc; use fail::fail_point; pub use future_pool::{Full, FuturePool}; use futures::{compat::Stream01CompatExt, StreamExt}; -use prometheus::{local::LocalHistogram, Histogram}; +use prometheus::{local::LocalHistogram, Histogram, HistogramOpts}; use yatp::{ pool::{CloneRunnerBuilder, Local, Remote, Runner}, queue::{multilevel, priority, Extras, QueueType, TaskCell as _}, @@ -165,7 +165,10 @@ pub struct YatpPoolRunner { before_pause: Option>, // Statistics about the schedule wait duration. - schedule_wait_duration: LocalHistogram, + // local histogram for high,medium,low priority tasks. + schedule_wait_durations: [LocalHistogram; 3], + // return the index of `schedule_wait_durations` from task metadata. + metric_idx_from_task_meta: Arc usize + Send + Sync>, } impl Runner for YatpPoolRunner { @@ -190,12 +193,12 @@ impl Runner for YatpPoolRunner { fn handle(&mut self, local: &mut Local, mut task_cell: Self::TaskCell) -> bool { let extras = task_cell.mut_extras(); if let Some(schedule_time) = extras.schedule_time() { - self.schedule_wait_duration - .observe(schedule_time.elapsed().as_secs_f64()); + let idx = (*self.metric_idx_from_task_meta)(extras.metadata()); + self.schedule_wait_durations[idx].observe(schedule_time.elapsed().as_secs_f64()); } let finished = self.inner.handle(local, task_cell); if self.ticker.try_tick() { - self.schedule_wait_duration.flush(); + self.schedule_wait_durations.iter().for_each(|m| m.flush()); } finished } @@ -229,7 +232,8 @@ impl YatpPoolRunner { after_start: Option>, before_stop: Option>, before_pause: Option>, - schedule_wait_duration: Histogram, + schedule_wait_durations: [Histogram; 3], + metric_idx_from_task_meta: Arc usize + Send + Sync>, ) -> Self { YatpPoolRunner { inner, @@ -238,7 +242,8 @@ impl YatpPoolRunner { after_start, before_stop, before_pause, - schedule_wait_duration: schedule_wait_duration.local(), + schedule_wait_durations: schedule_wait_durations.map(|m| m.local()), + metric_idx_from_task_meta, } } } @@ -256,6 +261,10 @@ pub struct YatpPoolBuilder { max_tasks: usize, cleanup_method: CleanupMethod, + // whether to tracker task scheduling wait duration + enable_task_wait_metrics: bool, + metric_idx_from_task_meta: Option usize + Send + Sync>>, + #[cfg(test)] background_cleanup_hook: Option>, } @@ -275,6 +284,9 @@ impl YatpPoolBuilder { max_tasks: std::usize::MAX, cleanup_method: CleanupMethod::InPlace, + enable_task_wait_metrics: false, + metric_idx_from_task_meta: None, + #[cfg(test)] background_cleanup_hook: None, } @@ -344,6 +356,19 @@ impl YatpPoolBuilder { self } + pub fn enable_task_wait_metrics(mut self) -> Self { + self.enable_task_wait_metrics = true; + self + } + + pub fn metric_idx_from_task_meta( + mut self, + f: Arc usize + Send + Sync>, + ) -> Self { + self.metric_idx_from_task_meta = Some(f); + self + } + pub fn build_future_pool(self) -> FuturePool { let name = self .name_prefix @@ -480,15 +505,24 @@ impl YatpPoolBuilder { let after_start = self.after_start.take(); let before_stop = self.before_stop.take(); let before_pause = self.before_pause.take(); - let schedule_wait_duration = - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name]); + let schedule_wait_durations = if self.enable_task_wait_metrics { + ["high", "medium", "low"].map(|p| { + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name, p]) + }) + } else { + std::array::from_fn(|_| Histogram::with_opts(HistogramOpts::new("_", "_")).unwrap()) + }; + let metric_idx_from_task_meta = self + .metric_idx_from_task_meta + .unwrap_or_else(|| Arc::new(|_| 0)); let read_pool_runner = YatpPoolRunner::new( Default::default(), self.ticker.clone(), after_start, before_stop, before_pause, - schedule_wait_duration, + schedule_wait_durations, + metric_idx_from_task_meta, ); (builder, read_pool_runner) } @@ -511,6 +545,7 @@ mod tests { let name = "test_record_schedule_wait_duration"; let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(name) + .enable_task_wait_metrics() .build_single_level_pool(); let (tx, rx) = mpsc::channel(); for _ in 0..3 { @@ -529,7 +564,8 @@ mod tests { } // Drop the pool so the local metrics are flushed. drop(pool); - let histogram = metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name]); + let histogram = + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name, "high"]); assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); } diff --git a/src/read_pool.rs b/src/read_pool.rs index 8f5a459c5bc..32be95698da 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -395,6 +395,7 @@ pub fn build_yatp_read_pool( engine: E, resource_ctl: Option>, cleanup_method: CleanupMethod, + metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); build_yatp_read_pool_with_name( @@ -404,6 +405,7 @@ pub fn build_yatp_read_pool( resource_ctl, cleanup_method, unified_read_pool_name, + metric_idx_from_task_meta_fn, ) } @@ -414,9 +416,10 @@ pub fn build_yatp_read_pool_with_name( resource_ctl: Option>, cleanup_method: CleanupMethod, unified_read_pool_name: String, + metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, ) -> ReadPool { let raftkv = Arc::new(Mutex::new(engine)); - let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) + let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) .cleanup_method(cleanup_method) .stack_size(config.stack_size.0 as usize) @@ -444,6 +447,12 @@ pub fn build_yatp_read_pool_with_name( .before_stop(|| unsafe { destroy_tls_engine::(); }); + if let Some(metric_idx_from_task_meta_fn) = metric_idx_from_task_meta_fn { + builder = builder + .enable_task_wait_metrics() + .metric_idx_from_task_meta(metric_idx_from_task_meta_fn); + } + let pool = if let Some(ref r) = resource_ctl { builder.build_priority_future_pool(r.clone()) } else { @@ -755,8 +764,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = - build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); + let pool = build_yatp_read_pool( + &config, + DummyReporter, + engine, + None, + CleanupMethod::InPlace, + None, + ); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -803,8 +818,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = - build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); + let pool = build_yatp_read_pool( + &config, + DummyReporter, + engine, + None, + CleanupMethod::InPlace, + None, + ); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -859,8 +880,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = - build_yatp_read_pool(&config, DummyReporter, engine, None, CleanupMethod::InPlace); + let pool = build_yatp_read_pool( + &config, + DummyReporter, + engine, + None, + CleanupMethod::InPlace, + None, + ); let gen_task = || { let (tx, rx) = oneshot::channel::<()>(); @@ -986,6 +1013,7 @@ mod tests { resource_manager, CleanupMethod::InPlace, name.clone(), + None, ); let gen_task = || { diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 8426143d502..4a7395222f7 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -192,14 +192,14 @@ macro_rules! handle_request { handle_request!($fn_name, $future_name, $req_ty, $resp_ty, no_time_detail); }; ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident, $time_detail: tt) => { - fn $fn_name(&mut self, ctx: RpcContext<'_>, req: $req_ty, sink: UnarySink<$resp_ty>) { + fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -229,6 +229,20 @@ macro_rules! handle_request { } } +// consume resource group penalty and set explicit group priority +// We override the override_priority here to make handling tasks easier. +fn consume_penalty_and_set_priority( + resource_manager: &Arc, + resource_control_ctx: &mut ResourceControlContext, +) { + resource_manager.consume_penalty(resource_control_ctx); + if resource_control_ctx.get_override_priority() == 0 { + let prioirty = resource_manager + .get_resource_group_priority(resource_control_ctx.get_resource_group_name()); + resource_control_ctx.override_priority = prioirty as u64; + } +} + macro_rules! set_total_time { ($resp:ident, $duration:expr,no_time_detail) => {}; ($resp:ident, $duration:expr,has_time_detail) => { @@ -476,12 +490,12 @@ impl Tikv for Service { ctx.spawn(task); } - fn coprocessor(&mut self, ctx: RpcContext<'_>, req: Request, sink: UnarySink) { + fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -513,13 +527,13 @@ impl Tikv for Service { fn raw_coprocessor( &mut self, ctx: RpcContext<'_>, - req: RawCoprocessorRequest, + mut req: RawCoprocessorRequest, sink: UnarySink, ) { let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -602,13 +616,13 @@ impl Tikv for Service { fn coprocessor_stream( &mut self, ctx: RpcContext<'_>, - req: Request, + mut req: Request, mut sink: ServerStreamingSink, ) { let begin_instant = Instant::now(); - let resource_control_ctx = req.get_context().get_resource_control_context(); + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1148,10 +1162,10 @@ fn handle_batch_commands_request( let resp = future::ok(batch_commands_response::Response::default()); response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, - Some(batch_commands_request::request::Cmd::Get(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + Some(batch_commands_request::request::Cmd::Get(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1169,10 +1183,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); } }, - Some(batch_commands_request::request::Cmd::RawGet(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1190,10 +1204,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1224,10 +1238,10 @@ fn handle_batch_commands_request( String::default(), ); } - $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { - let resource_control_ctx = req.get_context().get_resource_control_context(); + $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { + let resource_control_ctx = req.mut_context().mut_resource_control_context(); if let Some(resource_manager) = resource_manager { - resource_manager.consume_penalty(resource_control_ctx); + consume_penalty_and_set_priority(resource_manager, resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c0d6e6fc4a3..c89a767a80b 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3344,7 +3344,8 @@ impl TestStorageBuilder { } else { None }; - + let manager = Arc::new(ResourceGroupManager::default()); + let resource_ctl = manager.derive_controller("test".into(), false); Storage::from_engine( self.engine, &self.config, @@ -3362,11 +3363,8 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), ts_provider, - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - false, - ))), - None, + Some(resource_ctl), + Some(manager), ) } @@ -3379,7 +3377,8 @@ impl TestStorageBuilder { &crate::config::StorageReadPoolConfig::default_for_test(), engine.clone(), ); - + let manager = Arc::new(ResourceGroupManager::default()); + let resource_ctl = manager.derive_controller("test".into(), false); Storage::from_engine( engine, &self.config, @@ -3397,16 +3396,14 @@ impl TestStorageBuilder { Arc::new(QuotaLimiter::default()), latest_feature_gate(), None, - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - false, - ))), - None, + Some(resource_ctl), + Some(manager), ) } pub fn build_for_resource_controller( self, + resource_manager: Arc, resource_controller: Arc, ) -> Result, L, F>> { let engine = TxnTestEngine { @@ -3436,7 +3433,7 @@ impl TestStorageBuilder { latest_feature_gate(), None, Some(resource_controller), - None, + Some(resource_manager), ) } } diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 19736304373..8674a581c72 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -12,7 +12,10 @@ use kvproto::{kvrpcpb::CommandPri, pdpb::QueryKind}; use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; -use resource_control::{ControlledFuture, ResourceController, TaskMetadata}; +use resource_control::{ + priority_from_task_meta, with_resource_limiter, ControlledFuture, ResourceController, + ResourceGroupManager, TaskMetadata, +}; use tikv_util::{ sys::SysQuota, yatp_pool::{Full, FuturePool, PoolTicker, YatpPoolBuilder}, @@ -101,6 +104,7 @@ impl VanillaQueue { struct PriorityQueue { worker_pool: FuturePool, resource_ctl: Arc, + resource_mgr: Arc, } impl PriorityQueue { @@ -118,15 +122,23 @@ impl PriorityQueue { // TODO: maybe use a better way to generate task_id let task_id = rand::random::(); let group_name = metadata.group_name().to_owned(); + let resource_limiter = self.resource_mgr.get_resource_limiter( + unsafe { std::str::from_utf8_unchecked(&group_name) }, + "", + metadata.override_priority() as u64, + ); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); self.worker_pool.spawn_with_extras( - ControlledFuture::new( - async move { - f.await; - }, - self.resource_ctl.clone(), - group_name, + with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + }, + self.resource_ctl.clone(), + group_name, + ), + resource_limiter, ), extras, ) @@ -155,6 +167,7 @@ impl SchedPool { reporter: R, feature_gate: FeatureGate, resource_ctl: Option>, + resource_mgr: Option>, ) -> Self { let builder = |pool_size: usize, name_prefix: &str| { let engine = Arc::new(Mutex::new(engine.clone())); @@ -181,6 +194,8 @@ impl SchedPool { destroy_tls_engine::(); tls_flush(&reporter); }) + .enable_task_wait_metrics() + .metric_idx_from_task_meta(Arc::new(priority_from_task_meta)) }; let vanilla = VanillaQueue { worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), @@ -191,6 +206,7 @@ impl SchedPool { worker_pool: builder(pool_size, "sched-worker-priority") .build_priority_future_pool(r.clone()), resource_ctl: r.clone(), + resource_mgr: resource_mgr.unwrap(), }); let queue_type = if resource_ctl.is_some() { QueueType::Dynamic diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 995c361e163..6d087d894df 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -472,6 +472,7 @@ impl TxnScheduler { reporter, feature_gate.clone(), resource_ctl, + resource_manager.clone(), ), control_mutex: Arc::new(tokio::sync::Mutex::new(false)), lock_mgr, @@ -1300,10 +1301,14 @@ impl TxnScheduler { // TODO: write bytes can be a bit inaccurate due to error requests or in-memory // pessimistic locks. sample.add_write_bytes(write_bytes); - // estimate the cpu time for write by the schdule cpu time and write bytes - let expected_dur = (sample.cpu_time() + Duration::from_micros(write_bytes as u64)) - * SCHEDULER_CPU_TIME_FACTOR; if let Some(limiter) = resource_limiter { + let expected_dur = if limiter.is_background() { + // estimate the cpu time for write by the schduling cpu time and write bytes + (sample.cpu_time() + Duration::from_micros(write_bytes as u64)) + * SCHEDULER_CPU_TIME_FACTOR + } else { + sample.cpu_time() + }; limiter .async_consume( expected_dur, @@ -2032,6 +2037,8 @@ mod tests { enable_async_apply_prewrite: false, ..Default::default() }; + let resource_manager = Arc::new(ResourceGroupManager::default()); + let controller = resource_manager.derive_controller("test".into(), false); ( TxnScheduler::new( engine.clone(), @@ -2049,11 +2056,8 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), latest_feature_gate(), - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - true, - ))), - None, + Some(controller), + Some(resource_manager), ), engine, ) @@ -2388,6 +2392,8 @@ mod tests { }; let feature_gate = FeatureGate::default(); feature_gate.set_version("6.0.0").unwrap(); + let resource_manager = Arc::new(ResourceGroupManager::default()); + let controller = resource_manager.derive_controller("test".into(), false); let scheduler = TxnScheduler::new( engine, @@ -2405,11 +2411,8 @@ mod tests { ResourceTagFactory::new_for_test(), Arc::new(QuotaLimiter::default()), feature_gate.clone(), - Some(Arc::new(ResourceController::new_for_test( - "test".to_owned(), - true, - ))), - None, + Some(controller), + Some(resource_manager), ); // Use sync mode if pipelined_pessimistic_lock is false. assert_eq!(scheduler.pessimistic_lock_mode(), PessimisticLockMode::Sync); diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index 533d8d0abd4..fec1ccc931d 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -352,12 +352,12 @@ fn test_scheduler_pool_auto_switch_for_resource_ctl() { .get(&1) .unwrap() .clone(); - let resource_manager = ResourceGroupManager::default(); + let resource_manager = Arc::new(ResourceGroupManager::default()); let resource_ctl = resource_manager.derive_controller("test".to_string(), true); let storage = TestStorageBuilderApiV1::from_engine_and_lock_mgr(engine, MockLockManager::new()) .config(cluster.cfg.tikv.storage.clone()) - .build_for_resource_controller(resource_ctl) + .build_for_resource_controller(resource_manager.clone(), resource_ctl) .unwrap(); let region = cluster.get_region(b"k1"); From eb28cf9927017b82d930ade81e1f844790f17ed3 Mon Sep 17 00:00:00 2001 From: glorv Date: Mon, 13 Nov 2023 15:06:44 +0800 Subject: [PATCH 1005/1149] test: do not capture test output in make test_with_nextest (#15968) ref tikv/tikv#15967 Signed-off-by: glorv --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 103c502036e..18ea19e4887 100644 --- a/Makefile +++ b/Makefile @@ -316,7 +316,7 @@ test: # Run tests with nextest. ifndef CUSTOM_TEST_COMMAND -test_with_nextest: export CUSTOM_TEST_COMMAND=nextest run +test_with_nextest: export CUSTOM_TEST_COMMAND=nextest run --nocapture endif test_with_nextest: export RUSTDOCFLAGS="-Z unstable-options --persist-doctests" test_with_nextest: From e29eae2fdc984042671c93541499cb4c20241066 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 13 Nov 2023 16:14:15 +0800 Subject: [PATCH 1006/1149] *: allow dynamic link openssl library (#15944) close tikv/tikv#15943 Currently, TiKV binaries are statically linked to the OpenSSL library, preventing the use of the host system's OpenSSL. This commit adds an option to build TiKV with dynamic linking of the OpenSSL library, enabling TiKV to utilize the host system's OpenSSL. This is particularly useful in FIPS scenarios where TiKV needs to delegate cryptographic operations to the host FIPS OpenSSL. By default, this option is disabled, and TiKV continues to statically link the OpenSSL library. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .dockerignore | 3 -- Cargo.lock | 6 +-- Cargo.toml | 17 +++++--- Dockerfile.FIPS | 43 +++++++++++++++++++ Makefile | 26 ++++++++--- cmd/tikv-ctl/Cargo.toml | 1 + cmd/tikv-server/Cargo.toml | 1 + components/backup-stream/Cargo.toml | 2 +- components/encryption/Cargo.toml | 3 ++ components/encryption/export/Cargo.toml | 1 + components/encryption/src/io.rs | 13 +++++- scripts/check-bins.py | 57 +++++++++++++++++++------ src/lib.rs | 8 +++- 13 files changed, 147 insertions(+), 34 deletions(-) create mode 100644 Dockerfile.FIPS diff --git a/.dockerignore b/.dockerignore index b0a83d43c41..4afd9fdf497 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,3 @@ -# This file is almost the same as .gitignore expect the next line. -.git - # OSX leaves these everywhere on SMB shares ._* diff --git a/Cargo.lock b/Cargo.lock index fba26935d1f..89fa63ed848 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2950,7 +2950,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2969,7 +2969,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" dependencies = [ "bzip2-sys", "cc", @@ -4890,7 +4890,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#aa41eb102d373f56846be88ffd250c2b581b48d4" +source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/Cargo.toml b/Cargo.toml index edebbc46f1d..82846e98acf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,17 @@ test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] test-engines-panic = ["engine_test/test-engines-panic"] pprof-fp = ["pprof/frame-pointer"] +openssl-vendored = [ + "openssl/vendored", + "hyper-tls/vendored", + # NB: the "openssl" feature does not make grpcio-sys v0.10 depends on + # openssl-sys, and it can not find the static openssl built by openssl-sys. + # Enabling "grpcio/openssl-vendored" explicitly makes grpcio-sys depends on + # openssl-sys and correctly links to the static openssl. + "grpcio/openssl-vendored", + # NB: Enable SM4 support if OpenSSL is built from source and statically linked. + "encryption_export/sm4", +] # for testing configure propegate to other crates # https://stackoverflow.com/questions/41700543/can-we-share-test-utilites-between-crates @@ -358,7 +369,7 @@ tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types" } # External libs raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } -grpcio = { version = "0.10.4", default-features = false, features = ["openssl-vendored", "protobuf-codec", "nightly"] } +grpcio = { version = "0.10.4", default-features = false, features = ["openssl", "protobuf-codec", "nightly"] } grpcio-health = { version = "0.10.4", default-features = false, features = ["protobuf-codec"] } tipb = { git = "https://github.com/pingcap/tipb.git" } kvproto = { git = "https://github.com/pingcap/kvproto.git" } @@ -380,10 +391,6 @@ opt-level = 1 debug = false opt-level = 1 -[profile.dev.package.tirocks-sys] -debug = false -opt-level = 1 - [profile.dev.package.tests] debug = 1 opt-level = 1 diff --git a/Dockerfile.FIPS b/Dockerfile.FIPS new file mode 100644 index 00000000000..fe34ab00f65 --- /dev/null +++ b/Dockerfile.FIPS @@ -0,0 +1,43 @@ +# This Docker image contains a minimal build environment for a FIPS compliant TiKV. + +FROM redhat/ubi8-minimal:8.6 as builder + +RUN microdnf install -y openssl-devel + +RUN microdnf install -y \ + gcc \ + gcc-c++ \ + libstdc++-static \ + make \ + cmake \ + perl \ + git \ + findutils \ + curl \ + python3 && \ + microdnf clean all + +# Install Rustup +RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y +ENV PATH /root/.cargo/bin/:$PATH + +# Checkout TiKV source code. +WORKDIR /tikv +COPY .git .git +ARG GIT_HASH +RUN git checkout ${GIT_HASH} && git checkout . + +# Do not static link OpenSSL. +ENV ENABLE_FIPS 1 +RUN make build_dist_release + +# Export to a clean image +FROM redhat/ubi8-minimal:8.6 +COPY --from=builder /tikv/target/release/tikv-server /tikv-server +COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl + +RUN microdnf install -y openssl + +EXPOSE 20160 20180 + +ENTRYPOINT ["/tikv-server"] diff --git a/Makefile b/Makefile index 18ea19e4887..b54d4403669 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,19 @@ ENABLE_FEATURES += cloud-gcp ENABLE_FEATURES += cloud-azure endif +export DOCKER_FILE ?= Dockerfile +export DOCKER_IMAGE_NAME ?= pingcap/tikv +export DOCKER_IMAGE_TAG ?= latest +export DEV_DOCKER_IMAGE_NAME ?= pingcap/tikv_dev +export ENABLE_FIPS ?= 0 + +ifeq ($(ENABLE_FIPS),1) +DOCKER_IMAGE_TAG := ${DOCKER_IMAGE_TAG}-fips +DOCKER_FILE := ${DOCKER_FILE}.FIPS +else +ENABLE_FEATURES += openssl-vendored +endif + PROJECT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) BIN_PATH = $(CURDIR)/bin @@ -135,10 +148,6 @@ export TIKV_BUILD_GIT_HASH ?= $(shell git rev-parse HEAD 2> /dev/null || echo ${ export TIKV_BUILD_GIT_TAG ?= $(shell git describe --tag || echo ${BUILD_INFO_GIT_FALLBACK}) export TIKV_BUILD_GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD 2> /dev/null || echo ${BUILD_INFO_GIT_FALLBACK}) -export DOCKER_IMAGE_NAME ?= "pingcap/tikv" -export DOCKER_IMAGE_TAG ?= "latest" -export DEV_DOCKER_IMAGE_NAME ?= "pingcap/tikv_dev" - # Turn on cargo pipelining to add more build parallelism. This has shown decent # speedups in TiKV. # @@ -155,6 +164,12 @@ ifeq ($(TIKV_BUILD_RUSTC_TARGET),aarch64-unknown-linux-gnu) export RUSTFLAGS := $(RUSTFLAGS) -Ctarget-feature=-outline-atomics endif +ifeq ($(shell basename $(shell which python 2>/dev/null)),python) +PY := python +else +PY := python3 +endif + # Almost all the rules in this Makefile are PHONY # Declaring a rule as PHONY could improve correctness # But probably instead just improves performance by a little bit @@ -248,7 +263,7 @@ dist_release: @mkdir -p ${BIN_PATH} @cp -f ${CARGO_TARGET_DIR}/release/tikv-ctl ${CARGO_TARGET_DIR}/release/tikv-server ${BIN_PATH}/ ifeq ($(shell uname),Linux) # Macs binary isn't elf format - @python scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server + $(PY) scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server endif # Build with release flag as if it were for distribution, but without @@ -393,6 +408,7 @@ error-code: etc/error_code.toml docker: docker build \ -t ${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} \ + -f ${DOCKER_FILE} \ --build-arg GIT_HASH=${TIKV_BUILD_GIT_HASH} \ --build-arg GIT_TAG=${TIKV_BUILD_GIT_TAG} \ --build-arg GIT_BRANCH=${TIKV_BUILD_GIT_BRANCH} \ diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 0a630ebc023..e16fadf0836 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -24,6 +24,7 @@ cloud-gcp = [ cloud-azure = [ "encryption_export/cloud-azure", ] +openssl-vendored = ["tikv/openssl-vendored"] test-engine-kv-rocksdb = [ "tikv/test-engine-kv-rocksdb" ] diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 409dc84a62d..ef278854dd7 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -18,6 +18,7 @@ failpoints = ["server/failpoints"] cloud-aws = ["server/cloud-aws"] cloud-gcp = ["server/cloud-gcp"] cloud-azure = ["server/cloud-azure"] +openssl-vendored = ["tikv/openssl-vendored"] test-engine-kv-rocksdb = [ "server/test-engine-kv-rocksdb" ] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 12979eab212..1308d10966f 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -41,7 +41,7 @@ engine_traits = { workspace = true } error_code = { workspace = true } # We cannot update the etcd-client to latest version because of the cyclic requirement. # Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl-vendored"], optional = true } +etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl"], optional = true } external_storage = { workspace = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 021c9f23002..336f2e1854f 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -6,6 +6,9 @@ publish = false [features] failpoints = ["fail/failpoints"] +# openssl/vendored is necssary in order to conditionally building SM4 encryption +# support, as SM4 is disabled on various openssl distributions, such as Rocky Linux 9. +sm4 = ["openssl/vendored"] [dependencies] async-trait = "0.1" diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index c1891a93480..829e33ae5aa 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -9,6 +9,7 @@ default = ["cloud-aws", "cloud-gcp", "cloud-azure"] cloud-aws = ["aws"] cloud-gcp = [] cloud-azure = ["azure"] +sm4 = ["encryption/sm4"] [dependencies] async-trait = "0.1" diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index d7b7eb76b8a..dc326e78427 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -390,7 +390,18 @@ pub fn create_aes_ctr_crypter( EncryptionMethod::Aes128Ctr => OCipher::aes_128_ctr(), EncryptionMethod::Aes192Ctr => OCipher::aes_192_ctr(), EncryptionMethod::Aes256Ctr => OCipher::aes_256_ctr(), - EncryptionMethod::Sm4Ctr => OCipher::sm4_ctr(), + EncryptionMethod::Sm4Ctr => { + #[cfg(feature = "sm4")] + { + OCipher::sm4_ctr() + } + #[cfg(not(feature = "sm4"))] + { + return Err(box_err!( + "sm4-ctr is not supported by dynamically linked openssl" + )); + } + } }; let crypter = OCrypter::new(cipher, mode, key, Some(iv.as_slice()))?; Ok((cipher, crypter)) diff --git a/scripts/check-bins.py b/scripts/check-bins.py index 1255472a76a..421a4df5ef4 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -21,6 +21,22 @@ SYS_LIB = ["libstdc++"] +def ensure_link(args, require_static, libs): + p = os.popen("uname") + if "Linux" not in p.readline(): + return + for bin in args: + p = os.popen("ldd " + bin) + requires = set(l.split()[0] for l in p.readlines()) + for lib in libs: + if any(lib in r for r in requires): + if require_static: + pr("error: %s should not requires dynamic library %s\n" % (bin, lib)) + sys.exit(1) + elif not require_static: + pr("error: %s should requires dynamic library %s\n" % (bin, lib)) + sys.exit(1) + def pr(s): if sys.stdout.isatty(): sys.stdout.write("\x1b[2K\r" + s) @@ -72,6 +88,24 @@ def check_sse(executable): print("fix this by building tikv with ROCKSDB_SYS_SSE=1") sys.exit(1) +def is_openssl_vendored_enabled(features): + return "openssl-vendored" in features + +def check_openssl(executable, is_static_link): + openssl_libs = ["libcrypto", "libssl"] + ensure_link([executable], is_static_link, openssl_libs) + if is_static_link: + return + openssl_symbols = ["EVP_", "OPENSSL"] + p = os.popen('nm %s | grep -iE " (t|T) (%s)"' % (executable, "|".join(openssl_symbols))) + lines = p.readlines() + if lines: + pr( + "error: %s contains OpenSSL symbol %s in text section:\n%s\n" + % (executable, openssl_symbols, "".join(lines)) + ) + sys.exit(1) + def check_tests(features): if not is_jemalloc_enabled(features): print("jemalloc not enabled, skip check!") @@ -95,28 +129,22 @@ def check_tests(features): pr("Checking binary %s" % name) check_jemalloc(executable) + check_openssl(executable, True) pr("") print("Done, takes %.2fs." % (time.time() - start)) -def ensure_link(args): - p = os.popen("uname") - if "Linux" not in p.readline(): - return - for bin in args: - p = os.popen("ldd " + bin) - requires = set(l.split()[0] for l in p.readlines()) - for lib in SYS_LIB: - if any(lib in r for r in requires): - pr("error: %s should not requires dynamic library %s\n" % (bin, lib)) - sys.exit(1) - def check_release(enabled_features, args): - ensure_link(args) + # Ensure statically link SYS_LIB. + ensure_link(args, True, SYS_LIB) checked_features = [] if is_jemalloc_enabled(enabled_features): checked_features.append("jemalloc") if is_sse_enabled(enabled_features): checked_features.append("SSE4.2") + if is_openssl_vendored_enabled(enabled_features): + checked_features.append("static-link-openssl") + else: + checked_features.append("dynamic-link-openssl") if not checked_features: print("Both jemalloc and SSE4.2 are disabled, skip check") return @@ -127,7 +155,8 @@ def check_release(enabled_features, args): check_jemalloc(arg) if is_sse_enabled(enabled_features): check_sse(arg) - pr("%s %s \033[32menabled\033[0m\n" % (arg, " ".join(checked_features))) + check_openssl(arg, is_openssl_vendored_enabled(enabled_features)) + pr("%s [%s] \033[32menabled\033[0m\n" % (arg, " ".join(checked_features))) def main(): argv = sys.argv diff --git a/src/lib.rs b/src/lib.rs index b3e9ebaf8e8..a0ccff3c8cb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,8 +79,12 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { } /// return the build version of tikv-server -pub fn tikv_build_version() -> &'static str { - env!("CARGO_PKG_VERSION") +pub fn tikv_build_version() -> String { + if option_env!("ENABLE_FIPS").map_or(false, |v| v == "1") { + format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") + } else { + env!("CARGO_PKG_VERSION").to_owned() + } } /// Prints the tikv version information to the standard output. From a50e36052a8c3c470b6ba96ab515c7a7ab067525 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 13 Nov 2023 16:38:14 +0800 Subject: [PATCH 1007/1149] backup-stream: remove dead code (#15976) close tikv/tikv#15975 Since v7.1.0, TiKV no longer connect PD etcd API directly, instead TiKV uses PD meta storage API. The code and crates guarded by "metastore-etcd" feature became dead code essentially. Signed-off-by: Neil Shen --- Cargo.lock | 314 +-------- components/backup-stream/Cargo.toml | 7 - components/backup-stream/src/errors.rs | 25 - components/backup-stream/src/metadata/mod.rs | 2 - .../backup-stream/src/metadata/store/etcd.rs | 627 ------------------ .../src/metadata/store/lazy_etcd.rs | 316 --------- .../backup-stream/src/metadata/store/mod.rs | 8 - components/error_code/src/backup_stream.rs | 3 - 8 files changed, 3 insertions(+), 1299 deletions(-) delete mode 100644 components/backup-stream/src/metadata/store/etcd.rs delete mode 100644 components/backup-stream/src/metadata/store/lazy_etcd.rs diff --git a/Cargo.lock b/Cargo.lock index 89fa63ed848..d629d2ac18a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -184,17 +184,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58982858be7540a465c790b95aaea6710e5139bf8956b1d1344d014fa40100b0" dependencies = [ - "async-stream-impl 0.2.0", - "futures-core", -] - -[[package]] -name = "async-stream" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" -dependencies = [ - "async-stream-impl 0.3.3", + "async-stream-impl", "futures-core", ] @@ -209,17 +199,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "async-stream-impl" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.103", -] - [[package]] name = "async-trait" version = "0.1.58" @@ -299,51 +278,6 @@ dependencies = [ "uuid 0.8.2", ] -[[package]] -name = "axum" -version = "0.5.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acee9fd5073ab6b045a275b3e709c163dd36c90685219cb21804a147b58dba43" -dependencies = [ - "async-trait", - "axum-core", - "bitflags", - "bytes", - "futures-util", - "http", - "http-body", - "hyper", - "itoa 1.0.1", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "serde", - "sync_wrapper", - "tokio", - "tower", - "tower-http", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e5939e02c56fecd5c017c37df4238c0a839fa76b7f97acdd7efb804fd181cc" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "mime", - "tower-layer", - "tower-service", -] - [[package]] name = "azure" version = "0.0.1" @@ -546,7 +480,6 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "cfg-if 1.0.0", "chrono", "concurrency_manager", "crossbeam", @@ -557,7 +490,6 @@ dependencies = [ "engine_test", "engine_traits", "error_code", - "etcd-client", "external_storage", "fail", "file_system", @@ -599,7 +531,6 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tonic", "txn_types", "url", "uuid 0.8.2", @@ -698,7 +629,7 @@ dependencies = [ "lazy_static", "lazycell", "peeking_take_while", - "prettyplease 0.2.6", + "prettyplease", "proc-macro2", "quote", "regex", @@ -1798,25 +1729,6 @@ dependencies = [ "tikv_alloc", ] -[[package]] -name = "etcd-client" -version = "0.10.2" -source = "git+https://github.com/pingcap/etcd-client?rev=41d393c32a7a7c728550cee1d9a138dafe6f3e27#41d393c32a7a7c728550cee1d9a138dafe6f3e27" -dependencies = [ - "http", - "hyper", - "hyper-openssl", - "openssl", - "prost", - "tokio", - "tokio-stream", - "tonic", - "tonic-build", - "tower", - "tower-service", - "visible", -] - [[package]] name = "event-listener" version = "2.5.1" @@ -1974,12 +1886,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "flate2" version = "1.0.11" @@ -2557,12 +2463,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" - [[package]] name = "http-types" version = "2.12.0" @@ -2643,18 +2543,6 @@ dependencies = [ "tower-layer", ] -[[package]] -name = "hyper-timeout" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" -dependencies = [ - "hyper", - "pin-project-lite", - "tokio", - "tokio-io-timeout", -] - [[package]] name = "hyper-tls" version = "0.5.0" @@ -3076,12 +2964,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -[[package]] -name = "matchit" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73cbba799671b762df5a175adf59ce145165747bb891505c43d09aefbbf38beb" - [[package]] name = "md-5" version = "0.9.1" @@ -3829,16 +3711,6 @@ dependencies = [ "ucd-trie", ] -[[package]] -name = "petgraph" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f" -dependencies = [ - "fixedbitset", - "indexmap", -] - [[package]] name = "phf" version = "0.9.0" @@ -4002,16 +3874,6 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" -[[package]] -name = "prettyplease" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c142c0e46b57171fe0c528bee8c5b7569e80f0c17e377cd0e30ea57dbc11bb51" -dependencies = [ - "proc-macro2", - "syn 1.0.103", -] - [[package]] name = "prettyplease" version = "0.2.6" @@ -4131,61 +3993,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "prost" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0841812012b2d4a6145fae9a6af1534873c32aa67fff26bd09f8fa42c83f95a" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8b442418ea0822409d9e7d047cbf1e7e9e1760b172bf9982cf29d517c93511" -dependencies = [ - "bytes", - "heck 0.4.1", - "itertools", - "lazy_static", - "log", - "multimap", - "petgraph", - "prettyplease 0.1.21", - "prost", - "prost-types", - "regex", - "syn 1.0.103", - "tempfile", - "which", -] - -[[package]] -name = "prost-derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164ae68b6587001ca506d3bf7f1000bfa248d0e1217b618108fba4ec1d0cc306" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 1.0.103", -] - -[[package]] -name = "prost-types" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747761bc3dc48f9a34553bf65605cf6cb6288ba219f3450b4275dbd81539551a" -dependencies = [ - "bytes", - "prost", -] - [[package]] name = "protobuf" version = "2.8.0" @@ -6484,7 +6291,7 @@ version = "7.6.0-alpha" dependencies = [ "anyhow", "api_version", - "async-stream 0.2.0", + "async-stream", "async-trait", "backtrace", "batch-system", @@ -6931,16 +6738,6 @@ dependencies = [ "futures 0.1.31", ] -[[package]] -name = "tokio-io-timeout" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" -dependencies = [ - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-macros" version = "1.7.0" @@ -7020,90 +6817,6 @@ dependencies = [ "serde", ] -[[package]] -name = "tonic" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55b9af819e54b8f33d453655bef9b9acc171568fb49523078d0cc4e7484200ec" -dependencies = [ - "async-stream 0.3.3", - "async-trait", - "axum", - "base64 0.13.0", - "bytes", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost", - "prost-derive", - "tokio", - "tokio-stream", - "tokio-util", - "tower", - "tower-layer", - "tower-service", - "tracing", - "tracing-futures", -] - -[[package]] -name = "tonic-build" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c6fd7c2581e36d63388a9e04c350c21beb7a8b059580b2e93993c526899ddc" -dependencies = [ - "prettyplease 0.1.21", - "proc-macro2", - "prost-build", - "quote", - "syn 1.0.103", -] - -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower-http" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c530c8675c1dbf98facee631536fa116b5fb6382d7dd6dc1b118d970eafe3ba" -dependencies = [ - "bitflags", - "bytes", - "futures-core", - "futures-util", - "http", - "http-body", - "http-range-header", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", -] - [[package]] name = "tower-layer" version = "0.3.1" @@ -7123,7 +6836,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f" dependencies = [ "cfg-if 1.0.0", - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -7149,16 +6861,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "tracing-futures" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" -dependencies = [ - "pin-project", - "tracing", -] - [[package]] name = "tracker" version = "0.0.1" @@ -7358,16 +7060,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "visible" -version = "0.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a044005fd5c0fc1ebd79c622e5606431c6b879a6a19acafb754be9926a2de73e" -dependencies = [ - "quote", - "syn 1.0.103", -] - [[package]] name = "waker-fn" version = "1.1.0" diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 1308d10966f..141954686c3 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -11,8 +11,6 @@ test-engines-rocksdb = ["tikv/test-engines-rocksdb"] failpoints = ["tikv/failpoints", "fail/failpoints"] backup-stream-debug = [] -metastore-etcd = ["tonic", "etcd-client"] - [[test]] name = "integration" path = "tests/integration/mod.rs" @@ -30,7 +28,6 @@ harness = true async-compression = { version = "0.3.14", features = ["tokio", "zstd"] } async-trait = { version = "0.1" } bytes = "1" -cfg-if = "1" chrono = "0.4" concurrency_manager = { workspace = true } crossbeam = "0.8" @@ -39,9 +36,6 @@ dashmap = "5" engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } -# We cannot update the etcd-client to latest version because of the cyclic requirement. -# Also we need wait until https://github.com/etcdv3/etcd-client/pull/43/files to be merged. -etcd-client = { git = "https://github.com/pingcap/etcd-client", rev = "41d393c32a7a7c728550cee1d9a138dafe6f3e27", features = ["pub-response-field", "tls-openssl"], optional = true } external_storage = { workspace = true } fail = "0.5" file_system = { workspace = true } @@ -78,7 +72,6 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } -tonic = { version = "0.8", optional = true } txn_types = { workspace = true } uuid = "0.8" yatp = { workspace = true } diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index c3cc91da9ff..67461e2978b 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -5,8 +5,6 @@ use std::{ }; use error_code::ErrorCodeExt; -#[cfg(feature = "metastore-etcd")] -use etcd_client::Error as EtcdError; use grpcio::Error as GrpcError; use kvproto::{errorpb::Error as StoreError, metapb::*}; use pd_client::Error as PdError; @@ -22,9 +20,6 @@ use crate::{endpoint::Task, metrics}; pub enum Error { #[error("gRPC meet error {0}")] Grpc(#[from] GrpcError), - #[cfg(feature = "metasotre-etcd")] - #[error("Etcd meet error {0}")] - Etcd(#[from] EtcdErrorExt), #[error("Protobuf meet error {0}")] Protobuf(#[from] ProtobufError), #[error("No such task {task_name:?}")] @@ -54,30 +49,10 @@ pub enum Error { Other(#[from] Box), } -#[cfg(feature = "metastore-etcd")] -impl From for Error { - fn from(value: EtcdError) -> Self { - Self::Etcd(value.into()) - } -} - -#[cfg(feature = "metastore-etcd")] -#[derive(ThisError, Debug)] -pub enum EtcdErrorExt { - #[error("{0}")] - Normal(#[from] EtcdError), - #[error("the watch canceled")] - WatchCanceled, - #[error("the required revision has been compacted, current is {current}")] - RevisionCompacted { current: i64 }, -} - impl ErrorCodeExt for Error { fn error_code(&self) -> error_code::ErrorCode { use error_code::backup_stream::*; match self { - #[cfg(feature = "metastore-etcd")] - Error::Etcd(_) => ETCD, Error::Protobuf(_) => PROTO, Error::NoSuchTask { .. } => NO_SUCH_TASK, Error::MalformedMetadata(_) => MALFORMED_META, diff --git a/components/backup-stream/src/metadata/mod.rs b/components/backup-stream/src/metadata/mod.rs index a96e2f9bcb6..1150c2932bd 100644 --- a/components/backup-stream/src/metadata/mod.rs +++ b/components/backup-stream/src/metadata/mod.rs @@ -8,5 +8,3 @@ pub mod store; pub mod test; pub use client::{Checkpoint, CheckpointProvider, MetadataClient, MetadataEvent, StreamTask}; -#[cfg(feature = "metastore-etcd")] -pub use store::lazy_etcd::{ConnectionConfig, LazyEtcdClient}; diff --git a/components/backup-stream/src/metadata/store/etcd.rs b/components/backup-stream/src/metadata/store/etcd.rs deleted file mode 100644 index 62a246a08ef..00000000000 --- a/components/backup-stream/src/metadata/store/etcd.rs +++ /dev/null @@ -1,627 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - cmp::Ordering, - collections::{HashMap, HashSet}, - pin::Pin, - sync::{Arc, Weak}, - time::Duration, -}; - -use async_trait::async_trait; -use etcd_client::{ - Client, Compare, CompareOp, DeleteOptions, EventType, GetOptions, Member, PutOptions, - SortOrder, SortTarget, Txn, TxnOp, WatchOptions, -}; -use futures::StreamExt; -use tikv_util::{info, warn}; -use tokio::sync::Mutex; -use tokio_stream::Stream; - -use super::{ - GetExtra, GetResponse, Keys, KvChangeSubscription, KvEventType, MetaStore, Snapshot, - TransactionOp, -}; -use crate::{ - annotate, - errors::{Error, EtcdErrorExt, Result}, - metadata::{ - keys::{KeyValue, MetaKey}, - metrics::METADATA_KEY_OPERATION, - store::{KvEvent, Subscription}, - }, -}; -// Can we get rid of the mutex? (which means, we must use a singleton client.) -// Or make a pool of clients? -#[derive(Clone)] -pub struct EtcdStore(Arc>); - -#[derive(Default)] -pub(super) struct TopologyUpdater { - last_urls: HashSet, - client: Weak>, - - // back off configs - pub(super) loop_interval: Duration, - pub(super) loop_failure_back_off: Duration, -} - -impl std::fmt::Debug for TopologyUpdater { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TopologyUpdater") - .field("last_urls", &self.last_urls) - .finish() - } -} - -#[async_trait] -pub(super) trait ClusterInfoProvider { - async fn get_members(&mut self) -> Result>; - async fn add_endpoint(&mut self, endpoint: &str) -> Result<()>; - async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()>; -} - -#[async_trait] -impl ClusterInfoProvider for Client { - async fn get_members(&mut self) -> Result> { - let result = self.member_list().await?; - Ok(result.members().to_vec()) - } - - async fn add_endpoint(&mut self, endpoint: &str) -> Result<()> { - Client::add_endpoint(self, endpoint) - .await - .map_err(|err| annotate!(err, "during adding the endpoint {}", endpoint))?; - Ok(()) - } - - async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()> { - Client::remove_endpoint(self, endpoint) - .await - .map_err(|err| annotate!(err, "during removing the endpoint {}", endpoint))?; - Ok(()) - } -} - -#[derive(Debug, Clone, Copy)] -enum DiffType { - Add, - Remove, -} - -#[derive(Clone)] -struct Diff { - diff_type: DiffType, - url: String, -} - -impl std::fmt::Debug for Diff { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let syn = match self.diff_type { - DiffType::Add => "+", - DiffType::Remove => "-", - }; - write!(f, "{}{}", syn, self.url) - } -} - -impl TopologyUpdater { - // Note: we may require the initial endpoints from the arguments directly. - // So the internal map won't get inconsistent when the cluster config changed - // during initializing. - // But that is impossible for now because we cannot query the node ID before - // connecting. - pub fn new(cluster_ref: Weak>) -> Self { - Self { - last_urls: Default::default(), - client: cluster_ref, - - loop_interval: Duration::from_secs(60), - loop_failure_back_off: Duration::from_secs(10), - } - } - - pub fn init(&mut self, members: impl Iterator) { - for mem in members { - self.last_urls.insert(mem); - } - } - - fn diff(&self, incoming: &[Member]) -> Vec { - let newer = incoming - .iter() - .flat_map(|mem| mem.client_urls().iter()) - .collect::>(); - let mut result = vec![]; - for url in &newer { - if !self.last_urls.contains(*url) { - result.push(Diff { - diff_type: DiffType::Add, - url: String::clone(url), - }) - } - } - for url in &self.last_urls { - if !newer.contains(url) { - result.push(Diff { - diff_type: DiffType::Remove, - url: String::clone(url), - }) - } - } - result - } - - fn apply(&mut self, diff: &Diff) -> Option { - match diff.diff_type { - DiffType::Add => match self.last_urls.insert(diff.url.clone()) { - true => None, - false => Some(format!( - "the member to adding with url {} overrides existing urls.", - diff.url - )), - }, - DiffType::Remove => match self.last_urls.remove(&diff.url) { - true => None, - false => Some(format!( - "the member to remove with url {} hasn't been added.", - diff.url - )), - }, - } - } - - async fn update_topology_by(&mut self, cli: &mut C, diff: &Diff) -> Result<()> { - match diff.diff_type { - DiffType::Add => cli.add_endpoint(&diff.url).await?, - DiffType::Remove => cli.remove_endpoint(&diff.url).await?, - } - Ok(()) - } - - async fn do_update(&mut self, cli: &mut C) -> Result<()> { - let cluster = cli.get_members().await?; - let diffs = self.diff(cluster.as_slice()); - if !diffs.is_empty() { - info!("log backup updating store topology."; "diffs" => ?diffs, "current_state" => ?self); - } - for diff in diffs { - match self.apply(&diff) { - Some(warning) => { - warn!("log backup meet some wrong status when updating PD clients, skipping this update."; "warn" => %warning); - } - None => self.update_topology_by(cli, &diff).await?, - } - } - Result::Ok(()) - } - - pub(super) async fn update_topology_loop(&mut self) { - while let Some(cli) = self.client.upgrade() { - let mut lock = cli.lock().await; - let result = self.do_update(&mut lock).await; - drop(lock); - match result { - Ok(_) => tokio::time::sleep(self.loop_interval).await, - Err(err) => { - err.report("during updating etcd topology"); - tokio::time::sleep(self.loop_failure_back_off).await; - } - } - } - } - - pub async fn main_loop(mut self) { - info!("log backup topology updater finish initialization."; "current_state" => ?self); - self.update_topology_loop().await - } -} - -impl EtcdStore { - pub fn connect, S: AsRef<[E]>>(endpoints: S) -> Self { - // TODO remove block_on - let cli = - futures::executor::block_on(etcd_client::Client::connect(&endpoints, None)).unwrap(); - Self(Arc::new(Mutex::new(cli))) - } - - pub fn inner(&self) -> &Arc> { - &self.0 - } -} - -impl From for EtcdStore { - fn from(cli: etcd_client::Client) -> Self { - Self(Arc::new(Mutex::new(cli))) - } -} - -impl From for KvEventType { - fn from(e: EventType) -> Self { - match e { - EventType::Put => Self::Put, - EventType::Delete => Self::Delete, - } - } -} - -impl From for KeyValue { - fn from(kv: etcd_client::KeyValue) -> Self { - // TODO: we can move out the vector in the KeyValue struct here. (instead of - // copying.) But that isn't possible for now because: - // - The raw KV pair(defined by the protocol buffer of etcd) is private. - // - That did could be exported by `pub-fields` feature of the client. However - // that feature isn't published in theirs Cargo.toml (Is that a mistake?). - // - Indeed, we can use `mem::transmute` here because `etcd_client::KeyValue` - // has `#[repr(transparent)]`. But before here become a known bottle neck, I'm - // not sure whether it's worthwhile for involving unsafe code. - KeyValue(MetaKey(kv.key().to_owned()), kv.value().to_owned()) - } -} - -/// Prepare the etcd options required by the keys. -/// Return the start key for requesting. -macro_rules! prepare_opt { - ($opt:ident, $keys:expr) => { - match $keys { - Keys::Prefix(key) => { - $opt = $opt.with_prefix(); - key - } - Keys::Range(key, end_key) => { - $opt = $opt.with_range(end_key); - key - } - Keys::Key(key) => key, - } - }; -} - -#[async_trait] -impl MetaStore for EtcdStore { - type Snap = EtcdSnapshot; - - async fn snapshot(&self) -> Result { - let status = self.0.lock().await.status().await?; - Ok(EtcdSnapshot { - store: self.clone(), - revision: status.header().unwrap().revision(), - }) - } - - async fn watch(&self, keys: Keys, start_rev: i64) -> Result { - let mut opt = WatchOptions::new(); - let key = prepare_opt!(opt, keys); - opt = opt.with_start_revision(start_rev); - let (mut watcher, stream) = self.0.lock().await.watch(key, Some(opt)).await?; - Ok(Subscription { - stream: Box::pin(stream.flat_map( - |events| -> Pin> + Send>> { - match events { - Err(err) => Box::pin(tokio_stream::once(Err(err.into()))), - Ok(events) => { - if events.compact_revision() > 0 && events.canceled() { - return Box::pin(tokio_stream::once(Err(Error::Etcd( - EtcdErrorExt::RevisionCompacted { - current: events.compact_revision(), - }, - )))); - } - if events.canceled() { - return Box::pin(tokio_stream::once(Err(Error::Etcd( - EtcdErrorExt::WatchCanceled, - )))); - } - Box::pin(tokio_stream::iter( - // TODO: remove the copy here via access the protobuf field - // directly. - #[allow(clippy::unnecessary_to_owned)] - events.events().to_owned().into_iter().filter_map(|event| { - let kv = event.kv()?; - Some(Ok(KvEvent { - kind: event.event_type().into(), - pair: kv.clone().into(), - })) - }), - )) - } - } - }, - )), - cancel: Box::pin(async move { - if let Err(err) = watcher.cancel().await { - warn!("failed to cancel watch stream!"; "err" => %err); - } - }), - }) - } - - async fn txn(&self, t: super::Transaction) -> Result<()> { - let mut cli = self.0.lock().await; - let txns = Self::make_txn(&mut cli, t).await?; - for txn in txns { - cli.txn(txn).await?; - } - Ok(()) - } - - async fn set(&self, pair: KeyValue) -> Result<()> { - self.0.lock().await.put(pair.0, pair.1, None).await?; - Ok(()) - } - - async fn delete(&self, keys: Keys) -> Result<()> { - let mut opt = DeleteOptions::new(); - let key = prepare_opt!(opt, keys); - - self.0.lock().await.delete(key, Some(opt)).await?; - Ok(()) - } - - async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { - let mut cli = self.0.lock().await; - let txn = Self::make_conditional_txn(&mut cli, txn).await?; - cli.txn(txn).await?; - Ok(()) - } -} - -impl EtcdStore { - fn collect_leases_needed(txn: &super::Transaction) -> HashSet { - txn.ops - .iter() - .filter_map(|op| match op { - TransactionOp::Put(_, opt) if opt.ttl.as_secs() > 0 => Some(opt.ttl), - _ => None, - }) - .collect() - } - - async fn make_leases( - cli: &mut Client, - needed: HashSet, - ) -> Result> { - let mut map = HashMap::with_capacity(needed.len()); - for lease_time in needed { - let lease_id = cli.lease_grant(lease_time.as_secs() as _, None).await?.id(); - map.insert(lease_time, lease_id); - } - Ok(map) - } - - fn partition_txns(mut txn: super::Transaction, leases: HashMap) -> Vec { - txn.ops - .chunks_mut(128) - .map(|txn| Txn::default().and_then(Self::to_txn(txn, &leases))) - .collect() - } - - fn to_compare(cond: super::Condition) -> Compare { - let op = match cond.result { - Ordering::Less => CompareOp::Less, - Ordering::Equal => CompareOp::Equal, - Ordering::Greater => CompareOp::Greater, - }; - Compare::value(cond.over_key, op, cond.arg) - } - - /// Convert the transaction operations to etcd transaction ops. - fn to_txn(ops: &mut [super::TransactionOp], leases: &HashMap) -> Vec { - ops.iter_mut().map(|op| match op { - TransactionOp::Put(key, opt) => { - let opts = if opt.ttl.as_secs() > 0 { - let lease = leases.get(&opt.ttl); - match lease { - None => { - warn!("lease not found, the request key may not have a ttl"; "dur" => ?opt.ttl); - None - } - Some(lease_id) => { - Some(PutOptions::new().with_lease(*lease_id)) - } - } - } else { - None - }; - TxnOp::put(key.take_key(), key.take_value(), opts) - }, - TransactionOp::Delete(rng) => { - let rng = std::mem::replace(rng, Keys::Key(MetaKey(vec![]))); - let mut opt = DeleteOptions::new(); - let key = prepare_opt!(opt, rng); - TxnOp::delete(key, Some(opt)) - }, - }).collect::>() - } - - /// Make a conditional txn. - /// For now, this wouldn't split huge transaction into smaller ones, - /// so when playing with etcd in PD, conditional transaction should be - /// small. - async fn make_conditional_txn( - cli: &mut Client, - mut txn: super::CondTransaction, - ) -> Result { - let cond = Self::to_compare(txn.cond); - - let mut leases_needed = Self::collect_leases_needed(&txn.success); - leases_needed.extend(Self::collect_leases_needed(&txn.failure).into_iter()); - let leases = Self::make_leases(cli, leases_needed).await?; - let success = Self::to_txn(&mut txn.success.ops, &leases); - let failure = Self::to_txn(&mut txn.failure.ops, &leases); - Ok(Txn::new().when([cond]).and_then(success).or_else(failure)) - } - - async fn make_txn(cli: &mut Client, etcd_txn: super::Transaction) -> Result> { - let (put_cnt, delete_cnt) = etcd_txn.ops.iter().fold((0, 0), |(p, d), item| match item { - TransactionOp::Put(..) => (p + 1, d), - TransactionOp::Delete(_) => (p, d + 1), - }); - METADATA_KEY_OPERATION - .with_label_values(&["put"]) - .inc_by(put_cnt); - METADATA_KEY_OPERATION - .with_label_values(&["del"]) - .inc_by(delete_cnt); - let needed_leases = Self::collect_leases_needed(&etcd_txn); - let leases = Self::make_leases(cli, needed_leases).await?; - let txns = Self::partition_txns(etcd_txn, leases); - Ok(txns) - } -} - -pub struct EtcdSnapshot { - store: EtcdStore, - revision: i64, -} - -#[async_trait] -impl Snapshot for EtcdSnapshot { - async fn get_extra(&self, keys: Keys, extra: GetExtra) -> Result { - let mut opt = GetOptions::new(); - let key = prepare_opt!(opt, keys); - opt = opt.with_revision(self.revision); - if extra.desc_order { - opt = opt.with_sort(SortTarget::Key, SortOrder::Descend); - } - if extra.limit > 0 { - opt = opt.with_limit(extra.limit as _); - } - let resp = self.store.0.lock().await.get(key.0, Some(opt)).await?; - Ok(GetResponse { - kvs: resp - .kvs() - .iter() - .map(|kv| KeyValue(MetaKey(kv.key().to_owned()), kv.value().to_owned())) - .collect(), - more: resp.more(), - }) - } - - fn revision(&self) -> i64 { - self.revision - } -} - -#[cfg(test)] -mod test { - use std::{ - collections::{HashMap, HashSet}, - fmt::Display, - sync::Arc, - time::Duration, - }; - - use async_trait::async_trait; - use etcd_client::{proto::PbMember, Member}; - use tokio::{sync::Mutex, time::timeout}; - - use super::{ClusterInfoProvider, TopologyUpdater}; - use crate::errors::Result; - - #[derive(Default, Debug)] - struct FakeCluster { - id_alloc: u64, - members: HashMap, - endpoints: HashSet, - } - - #[async_trait] - impl ClusterInfoProvider for FakeCluster { - async fn get_members(&mut self) -> Result> { - let members = self.members.values().cloned().collect(); - Ok(members) - } - - async fn add_endpoint(&mut self, endpoint: &str) -> Result<()> { - self.endpoints.insert(endpoint.to_owned()); - Ok(()) - } - - async fn remove_endpoint(&mut self, endpoint: &str) -> Result<()> { - self.endpoints.remove(endpoint); - Ok(()) - } - } - - impl FakeCluster { - fn new_id(&mut self) -> u64 { - let i = self.id_alloc; - self.id_alloc += 1; - i - } - - fn init_with_member(&mut self, n: usize) -> Vec { - let mut endpoints = Vec::with_capacity(n); - for _ in 0..n { - let mem = self.add_member(); - let url = format!("fakestore://{}", mem); - self.endpoints.insert(url.clone()); - endpoints.push(url); - } - endpoints - } - - fn add_member(&mut self) -> u64 { - let id = self.new_id(); - let mut mem = PbMember::default(); - mem.id = id; - mem.client_ur_ls = vec![format!("fakestore://{}", id)]; - // Safety: `Member` is #[repr(transparent)]. - self.members.insert(id, unsafe { std::mem::transmute(mem) }); - id - } - - fn remove_member(&mut self, id: u64) -> bool { - self.members.remove(&id).is_some() - } - - fn check_consistency(&self, message: impl Display) { - let urls = self - .members - .values() - .flat_map(|mem| mem.client_urls().iter().cloned()) - .collect::>(); - assert_eq!( - urls, self.endpoints, - "{}: consistency check not passed.", - message - ); - } - } - - #[test] - fn test_topology_updater() { - let mut c = FakeCluster::default(); - let eps = c.init_with_member(3); - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); - - let sc = Arc::new(Mutex::new(c)); - let mut tu = TopologyUpdater::new(Arc::downgrade(&sc)); - tu.loop_failure_back_off = Duration::ZERO; - tu.loop_interval = Duration::from_millis(100); - tu.init(eps.into_iter()); - - { - let mut sc = sc.blocking_lock(); - sc.check_consistency("after init"); - sc.add_member(); - rt.block_on(tu.do_update(&mut sc)).unwrap(); - sc.check_consistency("adding nodes"); - sc.add_member(); - sc.add_member(); - rt.block_on(tu.do_update(&mut sc)).unwrap(); - sc.check_consistency("adding more nodes"); - assert!(sc.remove_member(0), "{:?}", sc); - rt.block_on(tu.do_update(&mut sc)).unwrap(); - sc.check_consistency("removing nodes"); - } - - drop(sc); - rt.block_on(async { timeout(Duration::from_secs(1), tu.update_topology_loop()).await }) - .unwrap() - } -} diff --git a/components/backup-stream/src/metadata/store/lazy_etcd.rs b/components/backup-stream/src/metadata/store/lazy_etcd.rs deleted file mode 100644 index 7dacf45e697..00000000000 --- a/components/backup-stream/src/metadata/store/lazy_etcd.rs +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - sync::Arc, - time::{Duration, SystemTime}, -}; - -use etcd_client::{ConnectOptions, Error as EtcdError, OpenSslClientConfig}; -use futures::Future; -use openssl::{ - pkey::PKey, - x509::{verify::X509VerifyFlags, X509}, -}; -use security::SecurityManager; -use tikv_util::{ - info, - stream::{RetryError, RetryExt}, - warn, -}; -use tokio::sync::Mutex as AsyncMutex; - -use super::{ - etcd::{EtcdSnapshot, TopologyUpdater}, - EtcdStore, MetaStore, -}; -use crate::errors::{ContextualResultExt, Result}; - -const RPC_TIMEOUT: Duration = Duration::from_secs(5); - -#[derive(Clone)] -pub struct LazyEtcdClient(Arc>); - -#[derive(Clone)] -pub struct ConnectionConfig { - pub tls: Arc, - pub keep_alive_interval: Duration, - pub keep_alive_timeout: Duration, -} - -impl Default for ConnectionConfig { - fn default() -> Self { - Self { - tls: Default::default(), - keep_alive_interval: Duration::from_secs(10), - keep_alive_timeout: Duration::from_secs(3), - } - } -} - -impl std::fmt::Debug for ConnectionConfig { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ConnectionConfig") - .field("keep_alive_interval", &self.keep_alive_interval) - .field("keep_alive_timeout", &self.keep_alive_timeout) - .finish() - } -} - -impl ConnectionConfig { - /// Convert the config to the connection option. - fn to_connection_options(&self) -> ConnectOptions { - let mut opts = ConnectOptions::new(); - if let Some(tls) = &self - .tls - .client_suite() - .map_err(|err| warn!("failed to load client suite!"; "err" => %err)) - .ok() - { - opts = opts.with_openssl_tls( - OpenSslClientConfig::default() - .ca_cert_pem(&tls.ca) - // Some of users may prefer using multi-level self-signed certs. - // In this scenario, we must set this flag or openssl would probably complain it cannot found the root CA. - // (Because the flags we provide allows users providing exactly one CA cert.) - // We haven't make it configurable because it is enabled in gRPC by default too. - // TODO: Perhaps implement grpc-io based etcd client, fully remove the difference between gRPC TLS and our custom TLS? - .manually(|c| c.cert_store_mut().set_flags(X509VerifyFlags::PARTIAL_CHAIN)) - .manually(|c| { - let mut client_certs= X509::stack_from_pem(&tls.client_cert)?; - let client_key = PKey::private_key_from_pem(&tls.client_key.0)?; - if !client_certs.is_empty() { - c.set_certificate(&client_certs[0])?; - } - if client_certs.len() > 1 { - for i in client_certs.drain(1..) { - c.add_extra_chain_cert(i)?; - } - } - c.set_private_key(&client_key)?; - Ok(()) - }), - ) - } - opts = opts - .with_keep_alive(self.keep_alive_interval, self.keep_alive_timeout) - .with_keep_alive_while_idle(false) - .with_timeout(RPC_TIMEOUT); - - opts - } -} - -impl LazyEtcdClient { - pub fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { - let mut inner = LazyEtcdClientInner::new(endpoints, conf); - inner.normalize_urls(); - Self(Arc::new(AsyncMutex::new(inner))) - } - - // For testing -- check whether the endpoints are properly normalized. - #[cfg(test)] - pub(super) fn endpoints(&self) -> Vec { - self.0.blocking_lock().endpoints.clone() - } - - async fn get_cli(&self) -> Result { - let mut l = self.0.lock().await; - l.get_cli().await.cloned() - } -} - -#[derive(Clone)] -pub struct LazyEtcdClientInner { - conf: ConnectionConfig, - endpoints: Vec, - - last_modified: Option, - cli: Option, -} - -impl LazyEtcdClientInner { - fn new(endpoints: &[String], conf: ConnectionConfig) -> Self { - LazyEtcdClientInner { - conf, - endpoints: endpoints.iter().map(ToString::to_string).collect(), - last_modified: None, - cli: None, - } - } -} - -fn etcd_error_is_retryable(etcd_err: &EtcdError) -> bool { - match etcd_err { - EtcdError::InvalidArgs(_) - | EtcdError::InvalidUri(_) - | EtcdError::Utf8Error(_) - | EtcdError::InvalidHeaderValue(_) - | EtcdError::EndpointError(_) - | EtcdError::OpenSsl(_) => false, - EtcdError::TransportError(_) - | EtcdError::IoError(_) - | EtcdError::WatchError(_) - | EtcdError::LeaseKeepAliveError(_) - | EtcdError::ElectError(_) => true, - EtcdError::GRpcStatus(grpc) => matches!( - grpc.code(), - tonic::Code::Unavailable - | tonic::Code::Aborted - | tonic::Code::Internal - | tonic::Code::ResourceExhausted - ), - } -} - -#[derive(Debug)] -struct RetryableEtcdError(EtcdError); - -impl RetryError for RetryableEtcdError { - fn is_retryable(&self) -> bool { - etcd_error_is_retryable(&self.0) - } -} - -impl From for RetryableEtcdError { - fn from(e: EtcdError) -> Self { - Self(e) - } -} - -pub async fn retry(mut action: impl FnMut() -> F) -> Result -where - F: Future>, -{ - use futures::TryFutureExt; - let r = tikv_util::stream::retry_ext( - move || action().err_into::(), - RetryExt::default().with_fail_hook(|err| info!("retry it"; "err" => ?err)), - ) - .await; - r.map_err(|err| err.0.into()) -} - -impl LazyEtcdClientInner { - fn normalize_urls(&mut self) { - let enabled_tls = self.conf.tls.client_suite().is_ok(); - for endpoint in self.endpoints.iter_mut() { - // Don't touch them when the schemes already provided. - // Given etcd is based on gRPC (which relies on HTTP/2), - // there shouldn't be other schemes available (Hopefully...) - if endpoint.starts_with("http://") || endpoint.starts_with("https://") { - continue; - } - let expected_scheme = if enabled_tls { "https" } else { "http" }; - *endpoint = format!("{}://{}", expected_scheme, endpoint) - } - info!("log backup normalized etcd endpoints"; "endpoints" => ?self.endpoints); - } - - async fn connect(&mut self) -> Result<&EtcdStore> { - let store = retry(|| { - // For now, the interface of the `etcd_client` doesn't us to control - // how to create channels when connecting, hence we cannot update the tls config - // at runtime, now what we did is manually check that each time we are getting - // the clients. - etcd_client::Client::connect( - self.endpoints.clone(), - Some(self.conf.to_connection_options()), - ) - }) - .await - .context("during connecting to the etcd")?; - let store = EtcdStore::from(store); - let mut updater = TopologyUpdater::new(Arc::downgrade(store.inner())); - self.cli = Some(store); - updater.init(self.endpoints.iter().cloned()); - tokio::task::spawn(updater.main_loop()); - Ok(self.cli.as_ref().unwrap()) - } - - pub async fn get_cli(&mut self) -> Result<&EtcdStore> { - let modified = self.conf.tls.get_config().is_modified(&mut self.last_modified) - // Don't reload once we cannot check whether it is modified. - // Because when TLS disabled, this would always fail. - .unwrap_or(false); - if !modified && self.cli.is_some() { - return Ok(self.cli.as_ref().unwrap()); - } - info!("log backup reconnecting to the etcd service."; "tls_modified" => %modified, "connected_before" => %self.cli.is_some()); - self.connect().await - } -} - -#[async_trait::async_trait] -impl MetaStore for LazyEtcdClient { - type Snap = EtcdSnapshot; - - async fn snapshot(&self) -> Result { - self.get_cli().await?.snapshot().await - } - - async fn watch( - &self, - keys: super::Keys, - start_rev: i64, - ) -> Result { - self.get_cli().await?.watch(keys, start_rev).await - } - - async fn txn(&self, txn: super::Transaction) -> Result<()> { - self.get_cli().await?.txn(txn).await - } - - async fn txn_cond(&self, txn: super::CondTransaction) -> Result<()> { - self.get_cli().await?.txn_cond(txn).await - } -} - -#[cfg(test)] -mod tests { - use std::{fs::File, io::Write, path::PathBuf, sync::Arc}; - - use security::{SecurityConfig, SecurityManager}; - use tempfile::TempDir; - - use super::LazyEtcdClient; - use crate::{errors::Result, metadata::ConnectionConfig}; - - #[test] - fn test_normalize_url() -> Result<()> { - let endpoints = ["http://pd-1".to_owned(), "pd-2".to_owned()]; - let le = LazyEtcdClient::new(&endpoints, Default::default()); - assert_eq!(le.endpoints(), &["http://pd-1", "http://pd-2"]); - - let tempdir = TempDir::new()?; - let write_all = |path: &PathBuf, content| { - let mut f = File::create(path)?; - f.write_all(content)?; - Result::Ok(()) - }; - let ca = tempdir.path().join("ca"); - let cert = tempdir.path().join("cert"); - let key = tempdir.path().join("key"); - write_all(&ca, b"CA :3")?; - write_all(&cert, b"Cert :D")?; - write_all(&key, b"Key X)")?; - - let cfg = SecurityConfig { - ca_path: ca.to_string_lossy().into_owned(), - cert_path: cert.to_string_lossy().into_owned(), - key_path: key.to_string_lossy().into_owned(), - - ..Default::default() - }; - let sm = SecurityManager::new(&cfg).unwrap(); - let endpoints = ["https://pd-1".to_owned(), "pd-2".to_owned()]; - let le = LazyEtcdClient::new( - &endpoints, - ConnectionConfig { - tls: Arc::new(sm), - ..Default::default() - }, - ); - assert_eq!(le.endpoints(), &["https://pd-1", "https://pd-2"]); - Result::Ok(()) - } -} diff --git a/components/backup-stream/src/metadata/store/mod.rs b/components/backup-stream/src/metadata/store/mod.rs index 7cecda9720e..00f18c47042 100644 --- a/components/backup-stream/src/metadata/store/mod.rs +++ b/components/backup-stream/src/metadata/store/mod.rs @@ -1,13 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -cfg_if::cfg_if! { - if #[cfg(feature = "metastore-etcd")] { - pub mod etcd; - pub mod lazy_etcd; - pub use etcd::EtcdStore; - } -} - // Note: these mods also used for integration tests, // so we cannot compile them only when `#[cfg(test)]`. // (See https://github.com/rust-lang/rust/issues/84629) diff --git a/components/error_code/src/backup_stream.rs b/components/error_code/src/backup_stream.rs index a4b28b0e9ee..78cb544746d 100644 --- a/components/error_code/src/backup_stream.rs +++ b/components/error_code/src/backup_stream.rs @@ -3,9 +3,6 @@ define_error_codes! { "KV:LogBackup:", - ETCD => ("Etcd", - "Error during requesting the meta store(etcd)", - "Please check the connectivity between TiKV and PD."), PROTO => ("Proto", "Error during decode / encoding protocol buffer messages", "Please check the version of TiKV / BR are compatible, or whether data is corrupted." From 03499fff2e2bcb32cf33f76bc92f8b1cf3b4f0a3 Mon Sep 17 00:00:00 2001 From: Smilencer Date: Tue, 14 Nov 2023 14:26:14 +0800 Subject: [PATCH 1008/1149] metric: add ttl expire metric (#15949) close tikv/tikv#15873 metric: add ttl expire metric Signed-off-by: Smityz Signed-off-by: Smilencer Co-authored-by: Ping Yu Co-authored-by: Ping Yu --- metrics/grafana/tikv_details.json | 228 +++++++++++++++++++++++- src/server/ttl/ttl_compaction_filter.rs | 47 ++++- 2 files changed, 266 insertions(+), 9 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index cc89e8aeae5..3efa0e31066 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -8375,7 +8375,227 @@ "h": 7, "w": 12, "x": 0, - "y": 6 + "y": 62 + }, + "hiddenSeries": false, + "id": 23763573818, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_ttl_expire_kv_count_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 10, + "legendFormat": "{{instance}}", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL expire count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:35", + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:36", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 62 + }, + "hiddenSeries": false, + "id": 23763573819, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tikv_ttl_expire_kv_size_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 10, + "legendFormat": "{{instance}}", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL expire size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:35", + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:36", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 69 }, "id": 6985, "legend": { @@ -8471,7 +8691,7 @@ "h": 7, "w": 12, "x": 12, - "y": 6 + "y": 69 }, "id": 6987, "legend": { @@ -8564,7 +8784,7 @@ "h": 7, "w": 12, "x": 0, - "y": 13 + "y": 76 }, "id": 6986, "legend": { @@ -8691,7 +8911,7 @@ "h": 7, "w": 12, "x": 12, - "y": 13 + "y": 76 }, "id": 7326, "interval": null, diff --git a/src/server/ttl/ttl_compaction_filter.rs b/src/server/ttl/ttl_compaction_filter.rs index 06fc6981cf2..be4f0df6cf4 100644 --- a/src/server/ttl/ttl_compaction_filter.rs +++ b/src/server/ttl/ttl_compaction_filter.rs @@ -11,9 +11,23 @@ use engine_rocks::{ RocksTtlProperties, }; use engine_traits::raw_ttl::ttl_current_ts; +use prometheus::*; use crate::server::metrics::TTL_CHECKER_ACTIONS_COUNTER_VEC; +lazy_static! { + pub static ref TTL_EXPIRE_KV_SIZE_COUNTER: IntCounter = register_int_counter!( + "tikv_ttl_expire_kv_size_total", + "Total size of rawkv ttl expire", + ) + .unwrap(); + pub static ref TTL_EXPIRE_KV_COUNT_COUNTER: IntCounter = register_int_counter!( + "tikv_ttl_expire_kv_count_total", + "Total number of rawkv ttl expire", + ) + .unwrap(); +} + #[derive(Default)] pub struct TtlCompactionFilterFactory { _phantom: PhantomData, @@ -41,10 +55,7 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { } let name = CString::new("ttl_compaction_filter").unwrap(); - let filter = TtlCompactionFilter:: { - ts: current, - _phantom: PhantomData, - }; + let filter = TtlCompactionFilter::::new(); Some((name, filter)) } @@ -56,6 +67,28 @@ impl CompactionFilterFactory for TtlCompactionFilterFactory { pub struct TtlCompactionFilter { ts: u64, _phantom: PhantomData, + expire_count: u64, + expire_size: u64, +} + +impl Drop for TtlCompactionFilter { + fn drop(&mut self) { + // Accumulate counters would slightly improve performance as prometheus counters + // are atomic variables underlying + TTL_EXPIRE_KV_SIZE_COUNTER.inc_by(self.expire_size); + TTL_EXPIRE_KV_COUNT_COUNTER.inc_by(self.expire_count); + } +} + +impl TtlCompactionFilter { + fn new() -> Self { + Self { + ts: ttl_current_ts(), + _phantom: PhantomData, + expire_count: 0, + expire_size: 0, + } + } } impl CompactionFilter for TtlCompactionFilter { @@ -83,7 +116,11 @@ impl CompactionFilter for TtlCompactionFilter { Ok(RawValue { expire_ts: Some(expire_ts), .. - }) if expire_ts <= self.ts => CompactionFilterDecision::Remove, + }) if expire_ts <= self.ts => { + self.expire_size += key.len() as u64 + value.len() as u64; + self.expire_count += 1; + CompactionFilterDecision::Remove + } Err(err) => { TTL_CHECKER_ACTIONS_COUNTER_VEC .with_label_values(&["ts_error"]) From d4eabb0fbb8854b970605ff84b58277e0410036b Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 14 Nov 2023 15:14:14 +0800 Subject: [PATCH 1009/1149] makefile: polish the validation of python path. (#15981) close tikv/tikv#15980 Signed-off-by: lucasliang --- Makefile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index b54d4403669..632794f3208 100644 --- a/Makefile +++ b/Makefile @@ -164,11 +164,8 @@ ifeq ($(TIKV_BUILD_RUSTC_TARGET),aarch64-unknown-linux-gnu) export RUSTFLAGS := $(RUSTFLAGS) -Ctarget-feature=-outline-atomics endif -ifeq ($(shell basename $(shell which python 2>/dev/null)),python) -PY := python -else -PY := python3 -endif +# If both python and python3 are installed, it will choose python as a preferred option. +PYTHON := $(shell command -v python 2> /dev/null || command -v python3 2> /dev/null) # Almost all the rules in this Makefile are PHONY # Declaring a rule as PHONY could improve correctness @@ -263,7 +260,7 @@ dist_release: @mkdir -p ${BIN_PATH} @cp -f ${CARGO_TARGET_DIR}/release/tikv-ctl ${CARGO_TARGET_DIR}/release/tikv-server ${BIN_PATH}/ ifeq ($(shell uname),Linux) # Macs binary isn't elf format - $(PY) scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server + $(PYTHON) scripts/check-bins.py --features "${ENABLE_FEATURES}" --check-release ${BIN_PATH}/tikv-ctl ${BIN_PATH}/tikv-server endif # Build with release flag as if it were for distribution, but without From 36e2154f12e85ce5edc0a47d03757d826c37ac64 Mon Sep 17 00:00:00 2001 From: Smilencer Date: Tue, 14 Nov 2023 16:29:14 +0800 Subject: [PATCH 1010/1149] raftstore: Inherit raftdb-path when upgrading to raft-engine (#15809) close tikv/tikv#15771 Inherit raftdb-path when upgrading to raft-engine, and support to custom set raft-engine dir when upgrading Signed-off-by: Smilencer Signed-off-by: Smityz Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: lucasliang --- components/tikv_util/src/config.rs | 324 ++++++++++++++++++++++++++++- src/config/mod.rs | 250 +++++++++++++++++++++- 2 files changed, 560 insertions(+), 14 deletions(-) diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 39e143fc04c..7b3e6cd2469 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -1588,9 +1588,10 @@ macro_rules! numeric_enum_serializing_mod { /// States: /// 1. Init - Only source directory contains Raft data. /// 2. Migrating - A marker file contains the path of source directory. The -/// source directory contains a complete copy of Raft data. Target -/// directory may exist. 3. Completed - Only target directory contains Raft -/// data. Marker file may exist. +/// source directory contains a complete copy of Raft data. Target +/// directory may exist. +/// 3. Completed - Only target directory contains Raft data. Marker file may +/// exist. pub struct RaftDataStateMachine { root: PathBuf, in_progress_marker: PathBuf, @@ -1681,7 +1682,7 @@ impl RaftDataStateMachine { pub fn after_dump_data(&mut self) { assert!(Self::data_exists(&self.source)); assert!(Self::data_exists(&self.target)); - Self::must_remove(&self.source); // Enters the `Completed` state. + Self::must_remove_except(&self.source, &self.target); // Enters the `Completed` state. Self::must_remove(&self.in_progress_marker); } @@ -1733,6 +1734,31 @@ impl RaftDataStateMachine { } } + // Remove all files and directories under `remove_path` except `retain_path`. + fn must_remove_except(remove_path: &Path, retain_path: &Path) { + if !remove_path.exists() { + info!("Path not exists"; "path" => %remove_path.display()); + return; + } + if !remove_path.is_dir() { + info!("Path is not a directory, so remove directly"; "path" => %remove_path.display()); + Self::must_remove(remove_path); + return; + } + if !retain_path.starts_with(remove_path) { + info!("Removing directory as retain path is not under remove path"; "retain path" => %retain_path.display(), "remove path" => %remove_path.display()); + Self::must_remove(remove_path); + return; + } + + for entry in fs::read_dir(remove_path).unwrap() { + let sub_path = entry.unwrap().path(); + if sub_path != retain_path { + Self::must_remove(&sub_path); + } + } + } + fn must_rename_dir(from: &Path, to: &Path) { fs::rename(from, to).unwrap(); let mut dir = to.to_path_buf(); @@ -1740,11 +1766,35 @@ impl RaftDataStateMachine { Self::sync_dir(&dir); } - fn data_exists(path: &Path) -> bool { - if !path.exists() || !path.is_dir() { + #[inline] + fn dir_exists(path: &Path) -> bool { + path.exists() && path.is_dir() + } + + pub fn raftengine_exists(path: &Path) -> bool { + if !Self::dir_exists(path) { return false; } - fs::read_dir(path).unwrap().next().is_some() + fs::read_dir(path).unwrap().any(|entry| { + if let Ok(e) = entry { + let p = e.path(); + p.is_file() && p.extension().map_or(false, |ext| ext == "raftlog") + } else { + false + } + }) + } + + pub fn raftdb_exists(path: &Path) -> bool { + if !Self::dir_exists(path) { + return false; + } + let current_file_path = path.join("CURRENT"); + current_file_path.exists() && current_file_path.is_file() + } + + pub fn data_exists(path: &Path) -> bool { + Self::raftengine_exists(path) || Self::raftdb_exists(path) } fn sync_dir(dir: &Path) { @@ -2348,6 +2398,98 @@ yyy = 100 ); } + #[test] + fn test_raft_engine_switch() { + // default setting, raft-db and raft-engine are not in the same place, need + // dump raft data from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let raftdb_data = source.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + let target = root.join("target"); + fs::create_dir_all(&target).unwrap(); + let mut state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap(); + let should_dump = state.before_open_target(); + assert!(should_dump); + fs::remove_dir_all(&root).unwrap(); + + // raft-db is eventually moved, can't dump from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let target = root.join("target"); + fs::create_dir_all(&target).unwrap(); + state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap_err(); + fs::remove_dir_all(&root).unwrap(); + + // when setting raft-db dir, raft-engine dir is not set, raft-engine dir + // inherit from raft-db dir, need to dump raft data from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let raftdb_data = source.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + let target = source.join("target"); + fs::create_dir_all(&target).unwrap(); + state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap(); + let should_dump = state.before_open_target(); + assert!(should_dump); + fs::remove_dir_all(&root).unwrap(); + + // inherit scenario raft-db is eventually moved, can't dump from raft-db + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let target = source.join("target"); + fs::create_dir_all(&target).unwrap(); + state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap_err(); + fs::remove_dir_all(&root).unwrap(); + + // raft-db dump from raft-engine + let dir = tempfile::Builder::new().tempdir().unwrap(); + let root = dir.path().join("root"); + let source = root.join("source"); + fs::create_dir_all(&source).unwrap(); + let raftdb_data = source.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + let target = source.join("target"); + fs::create_dir_all(&target).unwrap(); + let mut state = RaftDataStateMachine::new( + root.to_str().unwrap(), + source.to_str().unwrap(), + target.to_str().unwrap(), + ); + state.validate(true).unwrap(); + let should_dump = state.before_open_target(); + assert!(should_dump); + fs::remove_dir_all(&root).unwrap(); + } + #[test] fn test_raft_data_migration() { fn run_migration(root: &Path, source: &Path, target: &Path, check: F) { @@ -2370,12 +2512,15 @@ yyy = 100 fs::write(&marker, backup_marker).unwrap(); } - let source_file = source.join("file"); - let target_file = target.join("file"); + let mut source_file = source.join("CURRENT"); + let target_file = target.join("0000000000000001.raftlog"); if !target.exists() { fs::create_dir_all(target).unwrap(); check(); } + if !source_file.exists() { + source_file = source.join("0000000000000001.raftlog"); + } fs::copy(source_file, target_file).unwrap(); check(); state.after_dump_data_with_check(&check); @@ -2407,7 +2552,7 @@ yyy = 100 let target = root.join("target"); fs::create_dir_all(&target).unwrap(); // Write some data into source. - let source_file = source.join("file"); + let source_file = source.join("CURRENT"); File::create(source_file).unwrap(); let backup = dir.path().join("backup"); @@ -2423,4 +2568,163 @@ yyy = 100 copy_dir(&backup, &root).unwrap(); }); } + + #[test] + fn test_must_remove_except() { + fn create_raftdb(path: &Path) { + fs::create_dir(path).unwrap(); + // CURRENT file as the marker of raftdb. + let raftdb_data = path.join("CURRENT"); + fs::File::create(raftdb_data).unwrap(); + } + + fn create_raftengine(path: &Path) { + fs::create_dir(path).unwrap(); + let raftengine_data = path.join("raftengine_data"); + fs::File::create(raftengine_data).unwrap(); + } + + fn create_test_root(path: &Path) { + fs::create_dir(path).unwrap(); + } + + fn raftengine_must_exist(path: &Path) { + assert!(path.exists()); + let raftengine_data = path.join("raftengine_data"); + assert!(raftengine_data.exists()); + } + + fn raftdb_must_not_exist(path: &Path) { + assert!(!path.exists()); + let raftdb_data = path.join("raftdb_data"); + assert!(!raftdb_data.exists()); + } + let test_dir = tempfile::Builder::new() + .tempdir() + .unwrap() + .into_path() + .join("test_must_remove_except"); + + // before: + // test_must_remove_except + // ├── raftdb + // │ └── raftdb_data + // └── raftengine + // └── raftengine_data + // + // after: + // test_must_remove_except + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftdb_dir = test_dir.join("raftdb"); + let raftengine_dir = test_dir.join("raftengine"); + create_raftdb(&raftdb_dir); + create_raftengine(&raftengine_dir); + RaftDataStateMachine::must_remove_except(&raftdb_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + raftdb_must_not_exist(&raftdb_dir); + fs::remove_dir_all(&test_dir).unwrap(); + + // before: + // test_must_remove_except/ + // └── raftdb + // ├── raftdb_data + // └── raftengine + // └── raftengine_data + // + // after: + // test_must_remove_except/ + // └── raftdb + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftdb_dir = test_dir.join("raftdb"); + let raftengine_dir = raftdb_dir.join("raftengine"); + create_raftdb(&raftdb_dir); + create_raftengine(&raftengine_dir); + RaftDataStateMachine::must_remove_except(&raftdb_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + assert!(!test_dir.join("raftdb/raftdb_data").exists()); + fs::remove_dir_all(&test_dir).unwrap(); + + // before: + // test_must_remove_except/ + // └── raftengine + // ├── raftdb + // │ └── raftdb_data + // └── raftengine_data + // + // after: + // test_must_remove_except/ + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftengine_dir = test_dir.join("raftengine"); + let raftdb_dir = raftengine_dir.join("raftdb"); + create_raftengine(&raftengine_dir); + create_raftdb(&raftdb_dir); + RaftDataStateMachine::must_remove_except(&raftdb_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + raftdb_must_not_exist(&raftdb_dir); + fs::remove_dir_all(&test_dir).unwrap(); + + // before: + // test_must_remove_except/ + // ├── raftdb_data + // └── raftengine + // └── raftengine_data + // + // after: + // test_must_remove_except/ + // └── raftengine + // └── raftengine_data + create_test_root(&test_dir); + let raftdb_data = test_dir.join("raftdb_data"); + fs::File::create(raftdb_data).unwrap(); + let raftengine_dir = test_dir.join("raftengine"); + create_raftengine(&raftengine_dir); + RaftDataStateMachine::must_remove_except(&test_dir, &raftengine_dir); + raftengine_must_exist(&raftengine_dir); + assert!(!test_dir.join("raftdb_data").exists()); + fs::remove_dir_all(&test_dir).unwrap(); + } + + #[test] + fn test_raft_data_exist() { + fn clear_dir(path: &PathBuf) { + if path.exists() { + fs::remove_dir_all(path).unwrap(); + } + fs::create_dir(path).unwrap(); + } + let test_dir = tempfile::Builder::new().tempdir().unwrap().into_path(); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("0000000000000001.raftlog")).unwrap(); + assert!(RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("0000000000000001.raftlog")).unwrap(); + fs::File::create(test_dir.join("trash")).unwrap(); + assert!(RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("raftlog")).unwrap(); + assert!(!RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + assert!(!RaftDataStateMachine::raftengine_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("CURRENT")).unwrap(); + assert!(RaftDataStateMachine::raftdb_exists(&test_dir)); + + clear_dir(&test_dir); + fs::File::create(test_dir.join("NOT_CURRENT")).unwrap(); + assert!(!RaftDataStateMachine::raftdb_exists(&test_dir)); + + clear_dir(&test_dir); + assert!(!RaftDataStateMachine::raftdb_exists(&test_dir)); + } } diff --git a/src/config/mod.rs b/src/config/mod.rs index b192a7ac5f7..4846b9465fc 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3544,8 +3544,24 @@ impl TikvConfig { .unwrap() .to_owned(); } - self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; - self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; + + match ( + self.raft_store.raftdb_path.is_empty(), + self.raft_engine.config.dir.is_empty(), + ) { + (false, true) => { + // If raftdb_path is specified, raft_engine_path will inherit it, this will be + // useful when updating from older version. + self.raft_engine.config.dir = + self.infer_raft_engine_path(Some(self.raft_store.raftdb_path.as_str()))?; + self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; + } + _ => { + self.raft_store.raftdb_path = self.infer_raft_db_path(None)?; + self.raft_engine.config.dir = self.infer_raft_engine_path(None)?; + } + } + if self.log_backup.temp_path.is_empty() { self.log_backup.temp_path = config::canonicalize_sub_path(&self.storage.data_dir, "log-backup-temp")?; @@ -4075,7 +4091,10 @@ impl TikvConfig { last_cfg.raftdb.wal_dir, self.raftdb.wal_dir )); } - if last_raft_engine_dir != self.raft_engine.config.dir { + + if RaftDataStateMachine::raftengine_exists(Path::new(&last_raft_engine_dir)) + && last_raft_engine_dir != self.raft_engine.config.dir + { return Err(format!( "raft engine dir have been changed, former is '{}', \ current is '{}', please check if it is expected.", @@ -4733,6 +4752,21 @@ mod tests { }, }; + fn create_mock_raftdb(path: &Path) { + fs::create_dir_all(path).unwrap(); + fs::File::create(path.join("CURRENT")).unwrap(); + } + + fn create_mock_raftengine(path: &Path) { + fs::create_dir_all(path).unwrap(); + fs::File::create(path.join("0000000000000001.raftlog")).unwrap(); + } + + fn create_mock_kv_data(path: &Path) { + fs::create_dir_all(path.join("db")).unwrap(); + fs::File::create(path.join("db").join("CURRENT")).unwrap(); + } + #[test] fn test_case_macro() { let h = kebab_case!(HelloWorld); @@ -4783,7 +4817,8 @@ mod tests { tikv_cfg.raft_engine.mut_config().dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); - tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); + // no actual raft engine data + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); last_cfg.raft_engine.mut_config().dir = "/raft/wal_dir".to_owned(); tikv_cfg.validate().unwrap(); @@ -4834,6 +4869,213 @@ mod tests { ); } } + + let test_dir = tempfile::Builder::new() + .tempdir() + .unwrap() + .into_path() + .join("unittest_raft_engine_dir"); + let data_dir = test_dir.join("data"); + + // simulate tikv restart + // enable raft engine: true + // need dump data from raftdb: false + // custom raft dir: true + { + let raft_dir = test_dir.join("raft"); + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = raft_dir.to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = raft_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_raftengine(&raft_dir); + create_mock_kv_data(&data_dir); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv restart + // enable raft engine: true + // need dump data from raftdb: false + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv update + // enable raft engine: true + // need dump data from raftdb: true + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&data_dir.join("raft")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // multi raft engine dir + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&data_dir.join("raft")); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap_err(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv update with custom raft dir + // enable raft engine: true + // need dump data from raftdb: true + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = + test_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&test_dir.join("raft")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv update with custom raft dir + // enable raft engine: true + // need dump data from raftdb: true + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.mut_config().dir = "".to_owned(); + tikv_cfg.raft_store.raftdb_path = test_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftdb(&test_dir.join("raft")); + + tikv_cfg.validate().unwrap(); + assert_eq!( + tikv_cfg.raft_engine.config.dir, + test_dir.join("raft").join("raft-engine").to_str().unwrap() + ); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + // simulate tikv downgrade to raftdb + // need dump data from raft-engine + // custom raft dir: false + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + last_cfg.raft_engine.enable = true; + + tikv_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + tikv_cfg.raft_engine.enable = false; + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap(); + fs::remove_dir_all(&test_dir).unwrap(); + } + + { + tikv_cfg = TikvConfig::default(); + last_cfg = TikvConfig::default(); + + last_cfg.raft_engine.mut_config().dir = + data_dir.join("raft-engine").to_str().unwrap().to_owned(); + last_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + last_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + last_cfg.raft_engine.enable = true; + + tikv_cfg.raft_engine.mut_config().dir = "".to_owned(); + tikv_cfg.raft_engine.enable = false; + tikv_cfg.raft_store.raftdb_path = data_dir.join("raft").to_str().unwrap().to_owned(); + tikv_cfg.storage.data_dir = data_dir.to_str().unwrap().to_owned(); + + create_mock_kv_data(&data_dir); + create_mock_raftengine(&data_dir.join("raft-engine")); + + tikv_cfg.validate().unwrap_err(); + tikv_cfg.check_critical_cfg_with(&last_cfg).unwrap_err(); + fs::remove_dir_all(&test_dir).unwrap(); + } } #[test] From 8c3676cb28a38b675368ae1cdd6f9ac0d15e05fb Mon Sep 17 00:00:00 2001 From: Lanqing Yang Date: Tue, 14 Nov 2023 22:32:16 -0800 Subject: [PATCH 1011/1149] chore: clean up fixme on latch.rs (#15974) close tikv/tikv#15973 clean up fixme on latch.rs Signed-off-by: lyang24 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/txn/latch.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index a662d9bab79..c76d71d7c7a 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -62,8 +62,6 @@ impl Latch { } self.waiting.push_front(item); } - // FIXME: remove this clippy attribute once https://github.com/rust-lang/rust-clippy/issues/6784 is fixed. - #[allow(clippy::manual_flatten)] for it in self.waiting.iter_mut() { if let Some((v, _)) = it { if *v == key_hash { From c9052f9bca8ef290bbd860f6439ef806c0149a1e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 15 Nov 2023 15:53:17 +0800 Subject: [PATCH 1012/1149] fips: enable OpenSSL FIPS mode on TiKV start if it's eligible (#15983) close tikv/tikv#15982 In this commit, a new crate, "fips", is introduced. This crate includes utilities designed to enable OpenSSL FIPS mode, catering to various OpenSSL releases. This commit ensures that TiKV starts with OpenSSL FIPS mode enabled if it is built with an environment variable "ENABLE_FIPS=1". Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 13 ++++++++ Cargo.toml | 7 +++- cmd/tikv-ctl/Cargo.toml | 1 + cmd/tikv-ctl/src/main.rs | 6 ++++ cmd/tikv-server/Cargo.toml | 1 + cmd/tikv-server/src/main.rs | 14 ++++++++ components/backup-stream/Cargo.toml | 2 +- components/cloud/Cargo.toml | 2 +- components/cloud/azure/Cargo.toml | 2 +- components/encryption/Cargo.toml | 2 +- components/encryption/export/Cargo.toml | 2 +- components/external_storage/Cargo.toml | 2 +- components/file_system/Cargo.toml | 2 +- components/fips/Cargo.toml | 12 +++++++ components/fips/build.rs | 32 ++++++++++++++++++ components/fips/src/lib.rs | 44 +++++++++++++++++++++++++ components/raftstore/Cargo.toml | 2 +- components/server/src/server.rs | 9 ----- components/server/src/server2.rs | 9 ----- components/server/src/setup.rs | 1 - components/sst_importer/Cargo.toml | 2 +- components/tidb_query_expr/Cargo.toml | 2 +- components/tikv_util/Cargo.toml | 2 +- scripts/check-bins.py | 2 +- src/lib.rs | 2 +- 25 files changed, 142 insertions(+), 33 deletions(-) create mode 100644 components/fips/Cargo.toml create mode 100644 components/fips/build.rs create mode 100644 components/fips/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index d629d2ac18a..390b748cddd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1866,6 +1866,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "fips" +version = "0.0.1" +dependencies = [ + "openssl", + "openssl-sys", + "slog", + "slog-global", +] + [[package]] name = "fix-hidden-lifetime-bug" version = "0.2.5" @@ -6317,6 +6327,7 @@ dependencies = [ "example_coprocessor_plugin", "fail", "file_system", + "fips", "flate2", "futures 0.3.15", "futures-executor", @@ -6433,6 +6444,7 @@ dependencies = [ "engine_traits", "error_code", "file_system", + "fips", "futures 0.3.15", "gag", "grpcio", @@ -6509,6 +6521,7 @@ dependencies = [ "clap 2.33.0", "encryption_export", "engine_traits", + "fips", "keys", "kvproto", "raft-engine", diff --git a/Cargo.toml b/Cargo.toml index 82846e98acf..2d26bf6afe4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,6 +78,7 @@ engine_traits_tests = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } +fips = { workspace = true } flate2 = { version = "1.0", default-features = false, features = ["zlib"] } futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" @@ -111,7 +112,7 @@ notify = "4" num-traits = "0.2.14" num_cpus = "1" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } parking_lot = "0.12" paste = "1.0" pd_client = { workspace = true } @@ -249,6 +250,7 @@ members = [ "components/error_code", "components/external_storage", "components/file_system", + "components/fips", "components/into_other", "components/keys", "components/log_wrappers", @@ -323,6 +325,7 @@ engine_traits_tests = { path = "components/engine_traits_tests", default-feature error_code = { path = "components/error_code" } external_storage = { path = "components/external_storage" } file_system = { path = "components/file_system" } +fips = { path = "components/fips" } gcp = { path = "components/cloud/gcp" } into_other = { path = "components/into_other" } keys = { path = "components/keys" } @@ -378,6 +381,8 @@ tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hot tokio-executor = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +openssl = "0.10" +openssl-sys = "0.9" [profile.dev.package.grpcio-sys] debug = false diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index e16fadf0836..82553a4b45a 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -54,6 +54,7 @@ engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } +fips = { workspace = true } futures = "0.3" gag = "1.0" grpcio = { workspace = true } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index df17e81f1ef..a3961bbc928 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -61,11 +61,17 @@ mod fork_readonly_tikv; mod util; fn main() { + // OpenSSL FIPS mode should be enabled at the very start. + fips::maybe_enable(); + let opt = Opt::from_args(); // Initialize logger. init_ctl_logger(&opt.log_level); + // Print OpenSSL FIPS mode status. + fips::log_status(); + // Initialize configuration and security manager. let cfg_path = opt.config.as_ref(); let mut cfg = cfg_path.map_or_else( diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index ef278854dd7..6f916d7476d 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -36,6 +36,7 @@ pprof-fp = ["tikv/pprof-fp"] clap = "2.32" encryption_export = { workspace = true } engine_traits = { workspace = true } +fips = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } raft-engine = { git = "https://github.com/tikv/raft-engine.git" } diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 3f4372c32cc..01354906b46 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -13,6 +13,9 @@ use tikv::{ }; fn main() { + // OpenSSL FIPS mode should be enabled at the very start. + fips::maybe_enable(); + let build_timestamp = option_env!("TIKV_BUILD_TIME"); let version_info = tikv::tikv_version_info(build_timestamp); @@ -217,6 +220,17 @@ fn main() { process::exit(1) } + // Sets the global logger ASAP. + // It is okay to use the config w/o `validate()`, + // because `initial_logger()` handles various conditions. + server::setup::initial_logger(&config); + + // Print version information. + tikv::log_tikv_info(build_timestamp); + + // Print OpenSSL FIPS mode status. + fips::log_status(); + // Init memory related settings. config.memory.init(); diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 141954686c3..6a5a0edbba5 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -49,7 +49,7 @@ kvproto = { workspace = true } lazy_static = "1.4" log_wrappers = { workspace = true } online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } pd_client = { workspace = true } pin-project = "1.0" prometheus = { version = "0.13", default-features = false, features = ["nightly"] } diff --git a/components/cloud/Cargo.toml b/components/cloud/Cargo.toml index 3931370390e..b5f1e8faffd 100644 --- a/components/cloud/Cargo.toml +++ b/components/cloud/Cargo.toml @@ -11,7 +11,7 @@ error_code = { workspace = true } futures-io = "0.3" kvproto = { workspace = true } lazy_static = "1.3" -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } rusoto_core = "0.46.0" diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 04f00c4bb60..7dd98224a73 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -21,7 +21,7 @@ futures = "0.3" futures-util = { version = "0.3", default-features = false, features = ["io"] } kvproto = { workspace = true } oauth2 = { version = "4.0.0", default-features = false } -openssl = { version = "0.10.50" } +openssl = { workspace = true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" slog = { workspace = true } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 336f2e1854f..9698618a4ba 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -28,7 +28,7 @@ hex = "0.4.2" kvproto = { workspace = true } lazy_static = "1.3" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8" diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index 829e33ae5aa..1a7b64eb7be 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -21,7 +21,7 @@ encryption = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } kvproto = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } slog = { workspace = true } # better to not use slog-global, but pass in the logger diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 25a852f9d5c..69de83e5474 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -19,7 +19,7 @@ futures-util = { version = "0.3", default-features = false, features = ["io"] } gcp = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", default-features = false, features = ["nightly", "push"] } rand = "0.8" slog = { workspace = true } diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index fbd96c3c348..ef7df46936d 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -15,7 +15,7 @@ fs2 = "0.4" lazy_static = "1.3" libc = "0.2" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } parking_lot = "0.12" prometheus = { version = "0.13", features = ["nightly"] } prometheus-static-metric = "0.5" diff --git a/components/fips/Cargo.toml b/components/fips/Cargo.toml new file mode 100644 index 00000000000..ab0d2aa1cf7 --- /dev/null +++ b/components/fips/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fips" +version = "0.0.1" +edition = "2021" +publish = false + +[dependencies] +openssl = { workspace = true } +openssl-sys = { workspace = true } +slog = { workspace = true } +# better to not use slog-global, but pass in the logger +slog-global = { workspace = true } diff --git a/components/fips/build.rs b/components/fips/build.rs new file mode 100644 index 00000000000..5bfe4920c2d --- /dev/null +++ b/components/fips/build.rs @@ -0,0 +1,32 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::env; + +fn main() { + if !option_env!("ENABLE_FIPS").map_or(false, |v| v == "1") { + println!("cargo:rustc-cfg=disable_fips"); + return; + } + if let Ok(version) = env::var("DEP_OPENSSL_VERSION_NUMBER") { + let version = u64::from_str_radix(&version, 16).unwrap(); + + #[allow(clippy::unusual_byte_groupings)] + // Follow OpenSSL numeric release version identifier style: + // MNNFFPPS: major minor fix patch status + // See https://github.com/openssl/openssl/blob/OpenSSL_1_0_0-stable/crypto/opensslv.h + if version >= 0x3_00_00_00_0 { + println!("cargo:rustc-cfg=ossl3"); + } else { + println!("cargo:rustc-cfg=ossl1"); + } + } else { + panic!( + " + +The DEP_OPENSSL_VERSION_NUMBER environment variable is not found. +Please make sure \"openssl-sys\" is in fips's dependencies. + +" + ) + } +} diff --git a/components/fips/src/lib.rs b/components/fips/src/lib.rs new file mode 100644 index 00000000000..b466401af4f --- /dev/null +++ b/components/fips/src/lib.rs @@ -0,0 +1,44 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::atomic::{AtomicUsize, Ordering}; + +static FIPS_VERSION: AtomicUsize = AtomicUsize::new(0); + +/// Enable OpenSSL FIPS mode if `can_enable` returns true. +/// It should be called at the very start of a program. +pub fn maybe_enable() { + if !can_enable() { + return; + } + #[cfg(ossl1)] + { + openssl::fips::enable(true).unwrap(); + FIPS_VERSION.store(1, Ordering::SeqCst); + return; + } + #[cfg(ossl3)] + { + std::mem::forget(openssl::provider::Provider::load(None, "fips").unwrap()); + FIPS_VERSION.store(3, Ordering::SeqCst); + return; + } + #[allow(unreachable_code)] + { + slog_global::warn!("OpenSSL FIPS mode is disabled unexpectedly"); + } +} + +/// Return true if it is built for FIPS mode. +pub fn can_enable() -> bool { + !cfg!(disable_fips) +} + +/// Prints OpenSSL FIPS mode status. +pub fn log_status() { + let ver = FIPS_VERSION.load(Ordering::SeqCst); + if ver == 0 { + slog_global::info!("OpenSSL FIPS mode is disabled"); + } else { + slog_global::info!("OpenSSL FIPS mode is enabled"; "openssl_major_version" => ver); + } +} diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 3a8caa421e5..81e809a0205 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -59,7 +59,7 @@ log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug log_wrappers = { workspace = true } memory_trace_macros = { workspace = true } online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } ordered-float = "2.6" parking_lot = "0.12" pd_client = { workspace = true } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 72e09a9f8d8..625d9b7cb4f 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -198,15 +198,6 @@ pub fn run_tikv( service_event_tx: TikvMpsc::Sender, service_event_rx: TikvMpsc::Receiver, ) { - // Sets the global logger ASAP. - // It is okay to use the config w/o `validate()`, - // because `initial_logger()` handles various conditions. - initial_logger(&config); - - // Print version information. - let build_timestamp = option_env!("TIKV_BUILD_TIME"); - tikv::log_tikv_info(build_timestamp); - // Print resource quota. SysQuota::log_quota(); CPU_CORES_QUOTA_GAUGE.set(SysQuota::cpu_cores_quota()); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index eab384871e6..1e170abb1c3 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -194,15 +194,6 @@ pub fn run_tikv( service_event_tx: TikvMpsc::Sender, service_event_rx: TikvMpsc::Receiver, ) { - // Sets the global logger ASAP. - // It is okay to use the config w/o `validate()`, - // because `initial_logger()` handles various conditions. - initial_logger(&config); - - // Print version information. - let build_timestamp = option_env!("TIKV_BUILD_TIME"); - tikv::log_tikv_info(build_timestamp); - // Print resource quota. SysQuota::log_quota(); CPU_CORES_QUOTA_GAUGE.set(SysQuota::cpu_cores_quota()); diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index b11ffbc45b6..0228e0c7f28 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -74,7 +74,6 @@ fn make_engine_log_path(path: &str, sub_path: &str, filename: &str) -> String { }) } -#[allow(dead_code)] pub fn initial_logger(config: &TikvConfig) { fail::fail_point!("mock_force_uninitial_logger", |_| { LOG_INITIALIZED.store(false, Ordering::SeqCst); diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index f4f2504a8b3..b501e509a8a 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -39,7 +39,7 @@ kvproto = { workspace = true } lazy_static = "1.3" log_wrappers = { workspace = true } online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } prometheus = { version = "0.13", default-features = false } protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8" diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 8a178401905..e09c0cd96de 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -17,7 +17,7 @@ log_wrappers = { workspace = true } match-template = "0.0.1" num = { version = "0.3", default-features = false } num-traits = "0.2" -openssl = { version = "0.10" } +openssl = { workspace = true } protobuf = "2" rand = "0.8.3" regex = "1.1" diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index b502a701136..0b6fc5978cb 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -37,7 +37,7 @@ nix = "0.24" num-traits = "0.2" num_cpus = "1" online_config = { workspace = true } -openssl = "0.10" +openssl = { workspace = true } parking_lot_core = "0.9.1" pin-project = "1.0" prometheus = { version = "0.13", features = ["nightly"] } diff --git a/scripts/check-bins.py b/scripts/check-bins.py index 421a4df5ef4..cbc748af958 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -14,7 +14,7 @@ "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_coprocessor_plugin", "memory_trace_macros", "case_macros", - "tracker", "test_raftstore_macro" + "tracker", "test_raftstore_macro", "fips" } JEMALLOC_SYMBOL = ["je_arena_boot", " malloc"] diff --git a/src/lib.rs b/src/lib.rs index a0ccff3c8cb..b300399e30a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { /// return the build version of tikv-server pub fn tikv_build_version() -> String { - if option_env!("ENABLE_FIPS").map_or(false, |v| v == "1") { + if fips::can_enable() { format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") } else { env!("CARGO_PKG_VERSION").to_owned() From 26f36d4b54a70267e029ee324a3aa267688bab2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mustafa=20Ate=C5=9F=20Uzun?= Date: Wed, 15 Nov 2023 11:08:16 +0300 Subject: [PATCH 1013/1149] chore: fix error message typo (#15363) ref tikv/tikv#15990 fix error message typo Co-authored-by: Xinye Tao Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/util.rs | 2 +- components/tikv_kv/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/pd_client/src/util.rs b/components/pd_client/src/util.rs index 66b084d4998..329448a6ac6 100644 --- a/components/pd_client/src/util.rs +++ b/components/pd_client/src/util.rs @@ -436,7 +436,7 @@ impl Client { } } -/// The context of sending requets. +/// The context of sending request. pub struct Request { remain_request_count: usize, request_sent: usize, diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 25f58352750..1fe61b78633 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -553,7 +553,7 @@ pub enum ErrorInner { Request(ErrorHeader), #[error("timeout after {0:?}")] Timeout(Duration), - #[error("an empty requets")] + #[error("an empty request")] EmptyRequest, #[error("key is locked (backoff or cleanup) {0:?}")] KeyIsLocked(kvproto::kvrpcpb::LockInfo), From 6a6188076637de06698f0709118d4dca02eaa4c8 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 16 Nov 2023 10:18:47 +0800 Subject: [PATCH 1014/1149] raftstore: fine-tune SlowScore. (#15991) ref tikv/tikv#15909 Signed-off-by: lucasliang --- components/raftstore/src/store/util.rs | 35 ++++++++++++++++++--- components/raftstore/src/store/worker/pd.rs | 4 ++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d9076a67d8a..d1ef3fde75a 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1733,13 +1733,38 @@ pub struct RaftstoreDuration { } impl RaftstoreDuration { + #[inline] pub fn sum(&self) -> std::time::Duration { - self.store_wait_duration.unwrap_or_default() - + self.store_process_duration.unwrap_or_default() + self.delays_on_disk_io(true) + self.delays_on_net_io() + } + + #[inline] + /// Returns the delayed duration on Disk I/O. + pub fn delays_on_disk_io(&self, include_wait_duration: bool) -> std::time::Duration { + let duration = self.store_process_duration.unwrap_or_default() + self.store_write_duration.unwrap_or_default() - + self.store_commit_duration.unwrap_or_default() - + self.apply_wait_duration.unwrap_or_default() - + self.apply_process_duration.unwrap_or_default() + + self.apply_process_duration.unwrap_or_default(); + if include_wait_duration { + duration + + self.store_wait_duration.unwrap_or_default() + + self.apply_wait_duration.unwrap_or_default() + } else { + duration + } + } + + #[inline] + /// Returns the delayed duration on Network I/O. + /// + /// Normally, it can be reflected by the duraiton on + /// `store_commit_duraiton`. + pub fn delays_on_net_io(&self) -> std::time::Duration { + // The `store_commit_duration` serves as an indicator for latency + // during the duration of transferring Raft logs to peers and appending + // logs. In most scenarios, instances of latency fluctuations in the + // network are reflected by this duration. Hence, it is selected as a + // representative of network latency. + self.store_commit_duration.unwrap_or_default() } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 79810289669..5e97adf8d3e 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -2261,7 +2261,9 @@ where } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => { - self.slow_score.record(id, duration.sum()); + // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. + self.slow_score + .record(id, duration.delays_on_disk_io(false)); self.slow_trend_cause.record( tikv_util::time::duration_to_us(duration.store_wait_duration.unwrap()), Instant::now(), From a0dbe2d0b893489015fc99ae73c6646f7989fe32 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 16 Nov 2023 12:53:16 +0800 Subject: [PATCH 1015/1149] resource_control: do not force set override priority at handle gRPC request (#16003) close tikv/tikv#15994 Signed-off-by: glorv --- .../resource_control/src/resource_group.rs | 21 ++++++- src/server/service/kv.rs | 62 +++++++------------ 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index b45a9833bb8..7e6d4279a25 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -77,7 +77,10 @@ impl TaskPriority { impl From for TaskPriority { fn from(value: u32) -> Self { // map the resource group priority value (1,8,16) to (Low,Medium,High) - if value < 6 { + // 0 means the priority is not set, so map it to medium by default. + if value == 0 { + Self::Medium + } else if value < 6 { Self::Low } else if value < 11 { Self::Medium @@ -1430,4 +1433,20 @@ pub(crate) mod tests { &mgr.priority_limiters[1] )); } + + #[test] + fn test_task_priority() { + use TaskPriority::*; + let cases = [ + (0, Medium), + (1, Low), + (7, Medium), + (8, Medium), + (15, High), + (16, High), + ]; + for (value, priority) in cases { + assert_eq!(TaskPriority::from(value), priority); + } + } } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 4a7395222f7..8426143d502 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -192,14 +192,14 @@ macro_rules! handle_request { handle_request!($fn_name, $future_name, $req_ty, $resp_ty, no_time_detail); }; ($fn_name: ident, $future_name: ident, $req_ty: ident, $resp_ty: ident, $time_detail: tt) => { - fn $fn_name(&mut self, ctx: RpcContext<'_>, mut req: $req_ty, sink: UnarySink<$resp_ty>) { + fn $fn_name(&mut self, ctx: RpcContext<'_>, req: $req_ty, sink: UnarySink<$resp_ty>) { forward_unary!(self.proxy, $fn_name, ctx, req, sink); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -229,20 +229,6 @@ macro_rules! handle_request { } } -// consume resource group penalty and set explicit group priority -// We override the override_priority here to make handling tasks easier. -fn consume_penalty_and_set_priority( - resource_manager: &Arc, - resource_control_ctx: &mut ResourceControlContext, -) { - resource_manager.consume_penalty(resource_control_ctx); - if resource_control_ctx.get_override_priority() == 0 { - let prioirty = resource_manager - .get_resource_group_priority(resource_control_ctx.get_resource_group_name()); - resource_control_ctx.override_priority = prioirty as u64; - } -} - macro_rules! set_total_time { ($resp:ident, $duration:expr,no_time_detail) => {}; ($resp:ident, $duration:expr,has_time_detail) => { @@ -490,12 +476,12 @@ impl Tikv for Service { ctx.spawn(task); } - fn coprocessor(&mut self, ctx: RpcContext<'_>, mut req: Request, sink: UnarySink) { + fn coprocessor(&mut self, ctx: RpcContext<'_>, req: Request, sink: UnarySink) { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -527,13 +513,13 @@ impl Tikv for Service { fn raw_coprocessor( &mut self, ctx: RpcContext<'_>, - mut req: RawCoprocessorRequest, + req: RawCoprocessorRequest, sink: UnarySink, ) { let source = req.get_context().get_request_source().to_owned(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -616,13 +602,13 @@ impl Tikv for Service { fn coprocessor_stream( &mut self, ctx: RpcContext<'_>, - mut req: Request, + req: Request, mut sink: ServerStreamingSink, ) { let begin_instant = Instant::now(); - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = &self.resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1162,10 +1148,10 @@ fn handle_batch_commands_request( let resp = future::ok(batch_commands_response::Response::default()); response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); }, - Some(batch_commands_request::request::Cmd::Get(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + Some(batch_commands_request::request::Cmd::Get(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1183,10 +1169,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); } }, - Some(batch_commands_request::request::Cmd::RawGet(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + Some(batch_commands_request::request::Cmd::RawGet(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1204,10 +1190,10 @@ fn handle_batch_commands_request( response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); } }, - Some(batch_commands_request::request::Cmd::Coprocessor(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1238,10 +1224,10 @@ fn handle_batch_commands_request( String::default(), ); } - $(Some(batch_commands_request::request::Cmd::$cmd(mut req)) => { - let resource_control_ctx = req.mut_context().mut_resource_control_context(); + $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { + let resource_control_ctx = req.get_context().get_resource_control_context(); if let Some(resource_manager) = resource_manager { - consume_penalty_and_set_priority(resource_manager, resource_control_ctx); + resource_manager.consume_penalty(resource_control_ctx); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) From 1cd4753e02a72eb840def71c53fa1bdfafff28de Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 16 Nov 2023 16:39:46 +0800 Subject: [PATCH 1016/1149] titan: Introduce titan zstd dict compression (#15979) close tikv/tikv#5743 Introduce titan zstd dict compression Signed-off-by: Connor1996 Signed-off-by: Connor Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: lucasliang --- etc/config-template.toml | 7 +++++++ src/config/mod.rs | 12 ++++++++++++ tests/integrations/config/mod.rs | 4 ++++ tests/integrations/config/test-custom.toml | 2 ++ 4 files changed, 25 insertions(+) diff --git a/etc/config-template.toml b/etc/config-template.toml index 3e55004feb2..22e60693921 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -939,6 +939,13 @@ ## default: lz4 # blob-file-compression = "lz4" +## Set blob file zstd dictionary compression, default(0) will use zstd compression. +## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally, +## the sample data size to train dictionary is of size 100X dictionary size innerly. +## It has no effect when `blob-file-compression` is not `zstd`. +## default: 0 +# zstd-dict-size = 0 + ## Specifics cache size for blob records ## default: 0 # blob-cache-size = "0GB" diff --git a/src/config/mod.rs b/src/config/mod.rs index 4846b9465fc..4c37ad0eafe 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -137,6 +137,8 @@ pub struct TitanCfConfig { #[online_config(skip)] pub blob_file_compression: CompressionType, #[online_config(skip)] + pub zstd_dict_size: ReadableSize, + #[online_config(skip)] pub blob_cache_size: ReadableSize, #[online_config(skip)] pub min_gc_batch_size: ReadableSize, @@ -170,6 +172,7 @@ impl Default for TitanCfConfig { Self { min_blob_size: ReadableSize::kb(1), // disable titan default blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), @@ -190,6 +193,15 @@ impl TitanCfConfig { let mut opts = RocksTitanDbOptions::new(); opts.set_min_blob_size(self.min_blob_size.0); opts.set_blob_file_compression(self.blob_file_compression.into()); + // To try zstd dict compression, set dict size to 4k, sample size to 100X dict + // size + opts.set_compression_options( + -14, // window_bits + 32767, // level + 0, // strategy + self.zstd_dict_size.0 as i32, // zstd dict size + self.zstd_dict_size.0 as i32 * 100, // zstd sample size + ); opts.set_blob_cache(self.blob_cache_size.0 as usize, -1, false, 0.0); opts.set_min_gc_batch_size(self.min_gc_batch_size.0); opts.set_max_gc_batch_size(self.max_gc_batch_size.0); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 2ab4ce5cc09..180ef37a94a 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -272,6 +272,7 @@ fn test_serde_custom_tikv_config() { let titan_cf_config = TitanCfConfig { min_blob_size: ReadableSize(2018), blob_file_compression: CompressionType::Zstd, + zstd_dict_size: ReadableSize::kb(16), blob_cache_size: ReadableSize::gb(12), min_gc_batch_size: ReadableSize::kb(12), max_gc_batch_size: ReadableSize::mb(12), @@ -432,6 +433,7 @@ fn test_serde_custom_tikv_config() { titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), @@ -506,6 +508,7 @@ fn test_serde_custom_tikv_config() { titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), @@ -580,6 +583,7 @@ fn test_serde_custom_tikv_config() { titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value blob_file_compression: CompressionType::Lz4, + zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index a9772e285af..0fe5df168cc 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -349,6 +349,7 @@ periodic-compaction-seconds = "10d" [rocksdb.defaultcf.titan] min-blob-size = "2018B" blob-file-compression = "zstd" +zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" max-gc-batch-size = "12MB" @@ -609,6 +610,7 @@ max-compactions = 3 [raftdb.defaultcf.titan] min-blob-size = "2018B" blob-file-compression = "zstd" +zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" max-gc-batch-size = "12MB" From 98e6f20be5cb2f817b56e050e97aa6df9222fb04 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Thu, 16 Nov 2023 15:23:16 -0800 Subject: [PATCH 1017/1149] config: Fix titan blob-run-mode setting (#15988) close tikv/tikv#15978, close tikv/tikv#15987 Fix titan config blob-run-mode's from implementation. Signed-off-by: tonyxuqqi --- components/engine_rocks/src/config.rs | 11 +++++++---- src/config/mod.rs | 23 ++++++++++++++++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/components/engine_rocks/src/config.rs b/components/engine_rocks/src/config.rs index e121a1cea18..d55c5cb3dfc 100644 --- a/components/engine_rocks/src/config.rs +++ b/components/engine_rocks/src/config.rs @@ -340,9 +340,9 @@ pub enum BlobRunMode { impl From for ConfigValue { fn from(mode: BlobRunMode) -> ConfigValue { let str_value = match mode { - BlobRunMode::Normal => "normal", - BlobRunMode::ReadOnly => "read-only", - BlobRunMode::Fallback => "fallback", + BlobRunMode::Normal => "kNormal", + BlobRunMode::ReadOnly => "kReadOnly", + BlobRunMode::Fallback => "kFallback", }; ConfigValue::String(str_value.into()) } @@ -366,8 +366,11 @@ impl FromStr for BlobRunMode { "normal" => Ok(BlobRunMode::Normal), "read-only" => Ok(BlobRunMode::ReadOnly), "fallback" => Ok(BlobRunMode::Fallback), + "kNormal" => Ok(BlobRunMode::Normal), + "kReadOnly" => Ok(BlobRunMode::ReadOnly), + "kFallback" => Ok(BlobRunMode::Fallback), m => Err(format!( - "expect: normal, read-only or fallback, got: {:?}", + "expect: normal, kNormal, read-only, kReadOnly, kFallback or fallback, got: {:?}", m )), } diff --git a/src/config/mod.rs b/src/config/mod.rs index 4c37ad0eafe..6620a485264 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5775,7 +5775,28 @@ mod tests { let diff = config_value_to_string(diff.into_iter().collect()); assert_eq!(diff.len(), 1); assert_eq!(diff[0].0.as_str(), "blob_run_mode"); - assert_eq!(diff[0].1.as_str(), "fallback"); + assert_eq!(diff[0].1.as_str(), "kFallback"); + } + + #[test] + fn test_update_titan_blob_run_mode_config() { + let mut cfg = TikvConfig::default(); + cfg.rocksdb.titan.enabled = true; + let (_, cfg_controller, ..) = new_engines::(cfg); + for run_mode in [ + "kFallback", + "kNormal", + "kReadOnly", + "fallback", + "normal", + "read-only", + ] { + let change = HashMap::from([( + "rocksdb.defaultcf.titan.blob-run-mode".to_string(), + run_mode.to_string(), + )]); + cfg_controller.update_without_persist(change).unwrap(); + } } #[test] From 2631094f5afb3d112d18b32ee1c9f79a68c73216 Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Fri, 17 Nov 2023 13:59:18 +0800 Subject: [PATCH 1018/1149] Copr: do not change expression type for `Lower` (#16010) close tikv/tikv#16009 Copr: do not change expression type for `Lower` Signed-off-by: gengliqi --- components/tidb_query_expr/src/impl_string.rs | 37 ------------------- components/tidb_query_expr/src/lib.rs | 26 ++----------- 2 files changed, 3 insertions(+), 60 deletions(-) diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index 25c9294d533..fb330f91e03 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -2941,43 +2941,6 @@ mod tests { #[test] fn test_lower() { - // Test non-binary string case - let cases = vec![ - (Some(b"HELLO".to_vec()), Some(b"hello".to_vec())), - (Some(b"123".to_vec()), Some(b"123".to_vec())), - ( - Some("CAFÉ".as_bytes().to_vec()), - Some("café".as_bytes().to_vec()), - ), - ( - Some("数据库".as_bytes().to_vec()), - Some("数据库".as_bytes().to_vec()), - ), - ( - Some("НОЧЬ НА ОКРАИНЕ МОСКВЫ".as_bytes().to_vec()), - Some("ночь на окраине москвы".as_bytes().to_vec()), - ), - ( - Some("قاعدة البيانات".as_bytes().to_vec()), - Some("قاعدة البيانات".as_bytes().to_vec()), - ), - (None, None), - ]; - - for (arg, exp) in cases { - let output = RpnFnScalarEvaluator::new() - .push_param_with_field_type( - arg.clone(), - FieldTypeBuilder::new() - .tp(FieldTypeTp::VarString) - .charset(CHARSET_UTF8MB4) - .build(), - ) - .evaluate(ScalarFuncSig::Lower) - .unwrap(); - assert_eq!(output, exp); - } - // Test binary string case let cases = vec![ (Some(b"hello".to_vec()), Some(b"hello".to_vec())), diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index 61fb3612b63..f1aae1de746 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -357,27 +357,7 @@ pub fn map_unary_minus_int_func(value: ScalarFuncSig, children: &[Expr]) -> Resu } } -fn map_lower_sig(value: ScalarFuncSig, children: &[Expr]) -> Result { - if children.len() != 1 { - return Err(other_err!( - "ScalarFunction {:?} (params = {}) is not supported in batch mode", - value, - children.len() - )); - } - if children[0].get_field_type().is_binary_string_like() { - Ok(lower_fn_meta()) - } else { - let ret_field_type = children[0].get_field_type(); - Ok(match_template_charset! { - TT, match Charset::from_name(ret_field_type.get_charset()).map_err(tidb_query_datatype::codec::Error::from)? { - Charset::TT => lower_utf8_fn_meta::(), - } - }) - } -} - -fn map_upper_sig(value: ScalarFuncSig, children: &[Expr]) -> Result { +fn map_upper_utf8_sig(value: ScalarFuncSig, children: &[Expr]) -> Result { if children.len() != 1 { return Err(other_err!( "ScalarFunction {:?} (params = {}) is not supported in batch mode", @@ -787,10 +767,10 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::Insert => insert_fn_meta(), ScalarFuncSig::InsertUtf8 => insert_utf8_fn_meta(), ScalarFuncSig::RightUtf8 => right_utf8_fn_meta(), - ScalarFuncSig::UpperUtf8 => map_upper_sig(value, children)?, + ScalarFuncSig::UpperUtf8 => map_upper_utf8_sig(value, children)?, ScalarFuncSig::Upper => upper_fn_meta(), - ScalarFuncSig::Lower => map_lower_sig(value, children)?, ScalarFuncSig::LowerUtf8 => map_lower_utf8_sig(value, children)?, + ScalarFuncSig::Lower => lower_fn_meta(), ScalarFuncSig::Locate2Args => locate_2_args_fn_meta(), ScalarFuncSig::Locate3Args => locate_3_args_fn_meta(), ScalarFuncSig::FieldInt => field_fn_meta::(), From eb7b1437714cb08cc185cebbfbd2cdc24e627343 Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 20 Nov 2023 20:29:40 +0800 Subject: [PATCH 1019/1149] titan: update titan to fix incorrect blob file size and change default value (#16020) close tikv/tikv#15971 fix titan incorrect blob file size metric and change default value Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- etc/config-template.toml | 8 ++++---- src/config/mod.rs | 9 ++++++--- tests/integrations/config/mod.rs | 20 ++++++++------------ tests/integrations/config/test-custom.toml | 4 ++-- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 390b748cddd..8ca813e3988 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2848,7 +2848,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" +source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2867,7 +2867,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" +source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" dependencies = [ "bzip2-sys", "cc", @@ -4707,7 +4707,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#bd84144327cfb22bee21b6043673d12b90415e24" +source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/etc/config-template.toml b/etc/config-template.toml index 22e60693921..cfa8e30af48 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -673,8 +673,8 @@ # enabled = false ## Maximum number of threads of `Titan` background gc jobs. -## default: 4 -# max-background-gc = 4 +## default: 1 +# max-background-gc = 1 ## Options for "Default" Column Family, which stores actual user data. [rocksdb.defaultcf] @@ -936,8 +936,8 @@ ## lz4: kLZ4Compression ## lz4hc: kLZ4HCCompression ## zstd: kZSTD -## default: lz4 -# blob-file-compression = "lz4" +## default: zstd +# blob-file-compression = "zstd" ## Set blob file zstd dictionary compression, default(0) will use zstd compression. ## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally, diff --git a/src/config/mod.rs b/src/config/mod.rs index 6620a485264..1c29c0637ee 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -150,6 +150,7 @@ pub struct TitanCfConfig { #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "Titan doesn't need to sample anymore"] pub sample_ratio: Option, #[online_config(skip)] pub merge_small_file_threshold: ReadableSize, @@ -160,18 +161,19 @@ pub struct TitanCfConfig { pub range_merge: bool, #[online_config(skip)] pub max_sorted_runs: i32, - // deprecated. #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] + #[deprecated = "The feature is removed"] pub gc_merge_rewrite: bool, } impl Default for TitanCfConfig { + #[allow(deprecated)] fn default() -> Self { Self { min_blob_size: ReadableSize::kb(1), // disable titan default - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), @@ -214,6 +216,7 @@ impl TitanCfConfig { opts } + #[allow(deprecated)] fn validate(&self) -> Result<(), Box> { if self.gc_merge_rewrite { return Err( @@ -1211,7 +1214,7 @@ impl Default for TitanDbConfig { enabled: false, dirname: "".to_owned(), disable_gc: false, - max_background_gc: 4, + max_background_gc: 1, purge_obsolete_files_period: ReadableDuration::secs(10), } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 180ef37a94a..3afcac261a4 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -271,19 +271,18 @@ fn test_serde_custom_tikv_config() { value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { min_blob_size: ReadableSize(2018), - blob_file_compression: CompressionType::Zstd, + blob_file_compression: CompressionType::Lz4, zstd_dict_size: ReadableSize::kb(16), blob_cache_size: ReadableSize::gb(12), min_gc_batch_size: ReadableSize::kb(12), max_gc_batch_size: ReadableSize::mb(12), discardable_ratio: 0.00156, - sample_ratio: None, merge_small_file_threshold: ReadableSize::kb(21), blob_run_mode: BlobRunMode::Fallback, level_merge: true, range_merge: true, max_sorted_runs: 100, - gc_merge_rewrite: false, + ..Default::default() }; let titan_db_config = TitanDbConfig { enabled: true, @@ -432,19 +431,18 @@ fn test_serde_custom_tikv_config() { force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::ReadOnly, level_merge: false, range_merge: true, max_sorted_runs: 20, - gc_merge_rewrite: false, + ..Default::default() }, prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, @@ -507,19 +505,18 @@ fn test_serde_custom_tikv_config() { force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::ReadOnly, // default value level_merge: false, range_merge: true, max_sorted_runs: 20, - gc_merge_rewrite: false, + ..Default::default() }, prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, @@ -582,19 +579,18 @@ fn test_serde_custom_tikv_config() { force_consistency_checks: true, titan: TitanCfConfig { min_blob_size: ReadableSize(1024), // default value - blob_file_compression: CompressionType::Lz4, + blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), min_gc_batch_size: ReadableSize::mb(16), max_gc_batch_size: ReadableSize::mb(64), discardable_ratio: 0.5, - sample_ratio: None, merge_small_file_threshold: ReadableSize::mb(8), blob_run_mode: BlobRunMode::ReadOnly, // default value level_merge: false, range_merge: true, max_sorted_runs: 20, - gc_merge_rewrite: false, + ..Default::default() }, prop_size_index_distance: 4000000, prop_keys_index_distance: 40000, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 0fe5df168cc..94184def8fb 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -348,7 +348,7 @@ periodic-compaction-seconds = "10d" [rocksdb.defaultcf.titan] min-blob-size = "2018B" -blob-file-compression = "zstd" +blob-file-compression = "lz4" zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" @@ -609,7 +609,7 @@ max-compactions = 3 [raftdb.defaultcf.titan] min-blob-size = "2018B" -blob-file-compression = "zstd" +blob-file-compression = "lz4" zstd-dict-size = "16KB" blob-cache-size = "12GB" min-gc-batch-size = "12KB" From 9993a63993e721105a2d52b949cd60481967173f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 22 Nov 2023 12:31:11 +0800 Subject: [PATCH 1020/1149] Dockerfile: update base image (#16033) close tikv/tikv#16032 Signed-off-by: Neil Shen --- Dockerfile.FIPS | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Dockerfile.FIPS b/Dockerfile.FIPS index fe34ab00f65..03195d4cf5b 100644 --- a/Dockerfile.FIPS +++ b/Dockerfile.FIPS @@ -1,21 +1,22 @@ # This Docker image contains a minimal build environment for a FIPS compliant TiKV. -FROM redhat/ubi8-minimal:8.6 as builder +FROM rockylinux:9 as builder -RUN microdnf install -y openssl-devel +RUN dnf install -y openssl-devel -RUN microdnf install -y \ +RUN dnf install -y \ gcc \ gcc-c++ \ - libstdc++-static \ make \ cmake \ perl \ git \ findutils \ curl \ - python3 && \ - microdnf clean all + python3 --allowerasing && \ + dnf --enablerepo=crb install -y \ + libstdc++-static && \ + dnf clean all # Install Rustup RUN curl https://sh.rustup.rs -sSf | sh -s -- --no-modify-path --default-toolchain none -y @@ -32,12 +33,13 @@ ENV ENABLE_FIPS 1 RUN make build_dist_release # Export to a clean image -FROM redhat/ubi8-minimal:8.6 -COPY --from=builder /tikv/target/release/tikv-server /tikv-server -COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl +FROM rockylinux:9-minimal RUN microdnf install -y openssl +COPY --from=builder /tikv/target/release/tikv-server /tikv-server +COPY --from=builder /tikv/target/release/tikv-ctl /tikv-ctl + EXPOSE 20160 20180 ENTRYPOINT ["/tikv-server"] From 86d4a49848cd920e081c2cf0249977149835d1c5 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 22 Nov 2023 15:08:41 +0800 Subject: [PATCH 1021/1149] raftstore: check stale peer on leader missing (#16038) close tikv/tikv#11847, close tikv/tikv#15520, close pingcap/tidb#39130 Stale peers can impede TiKV store resolved ts and impact RTO for essential functions. Default 2-hour interval for stale peer check is insufficient for stale reads, flashbacks, and ebs backup. To mitigate this, we speed up stale read check by allowing TiKV to check for stale peers every 10 minutes in the event that a leader is missing. Signed-off-by: Neil Shen Co-authored-by: tonyxuqqi --- components/raftstore/src/store/fsm/peer.rs | 33 +++++++---- .../raftstore/src/store/local_metrics.rs | 5 +- components/raftstore/src/store/metrics.rs | 5 ++ components/raftstore/src/store/peer.rs | 14 +++-- metrics/grafana/tikv_details.json | 9 +++ tests/integrations/raftstore/test_merge.rs | 59 +++++++++++++++++++ 6 files changed, 107 insertions(+), 18 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 942514153c7..ee2daf1c3c8 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6395,19 +6395,26 @@ where fail_point!("peer_check_stale_state", state != StaleState::Valid, |_| {}); match state { StaleState::Valid => (), - StaleState::LeaderMissing => { - warn!( - "leader missing longer than abnormal_leader_missing_duration"; - "region_id" => self.fsm.region_id(), - "peer_id" => self.fsm.peer_id(), - "expect" => %self.ctx.cfg.abnormal_leader_missing_duration, - ); - self.ctx - .raft_metrics - .leader_missing - .lock() - .unwrap() - .insert(self.region_id()); + StaleState::LeaderMissing | StaleState::MaybeLeaderMissing => { + if state == StaleState::LeaderMissing { + warn!( + "leader missing longer than abnormal_leader_missing_duration"; + "region_id" => self.fsm.region_id(), + "peer_id" => self.fsm.peer_id(), + "expect" => %self.ctx.cfg.abnormal_leader_missing_duration, + ); + self.ctx + .raft_metrics + .leader_missing + .lock() + .unwrap() + .insert(self.region_id()); + } + + // It's very likely that this is a stale peer. To prevent + // resolved ts from being blocked for too long, we check stale + // peer eagerly. + self.fsm.peer.bcast_check_stale_peer_message(self.ctx); } StaleState::ToValidate => { // for peer B in case 1 above diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index aceacdb81ee..7207ac7869d 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, Mutex}; use collections::HashSet; -use prometheus::local::LocalHistogram; +use prometheus::local::{LocalHistogram, LocalIntCounter}; use raft::eraftpb::MessageType; use tikv_util::time::{Duration, Instant}; use tracker::{Tracker, TrackerToken, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; @@ -135,6 +135,7 @@ pub struct RaftMetrics { // local statistics for slowness pub stat_commit_log: RaftCommitLogStatistics, + pub check_stale_peer: LocalIntCounter, pub leader_missing: Arc>>, last_flush_time: Instant, @@ -172,6 +173,7 @@ impl RaftMetrics { wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), stat_commit_log: RaftCommitLogStatistics::default(), + check_stale_peer: CHECK_STALE_PEER_COUNTER.local(), leader_missing: Arc::default(), last_flush_time: Instant::now_coarse(), } @@ -211,6 +213,7 @@ impl RaftMetrics { self.wf_commit_not_persist_log.flush(); } + self.check_stale_peer.flush(); let mut missing = self.leader_missing.lock().unwrap(); LEADER_MISSING.set(missing.len() as i64); missing.clear(); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 8f7bc8af226..c2aff17a907 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -659,6 +659,11 @@ lazy_static! { "Total number of leader missed region." ).unwrap(); + pub static ref CHECK_STALE_PEER_COUNTER: IntCounter = register_int_counter!( + "tikv_raftstore_check_stale_peer", + "Total number of checking stale peers." + ).unwrap(); + pub static ref INGEST_SST_DURATION_SECONDS: Histogram = register_histogram!( "tikv_snapshot_ingest_sst_duration_seconds", diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 90676411bfc..17d8e51f4cf 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -127,6 +127,7 @@ pub enum StaleState { Valid, ToValidate, LeaderMissing, + MaybeLeaderMissing, } #[derive(Debug)] @@ -2117,7 +2118,6 @@ where self.leader_missing_time = None; return StaleState::Valid; } - let naive_peer = !self.is_initialized() || !self.raft_group.raft.promotable(); // Updates the `leader_missing_time` according to the current state. // // If we are checking this it means we suspect the leader might be missing. @@ -2137,13 +2137,18 @@ where StaleState::ToValidate } Some(instant) - if instant.saturating_elapsed() >= ctx.cfg.abnormal_leader_missing_duration.0 - && !naive_peer => + if instant.saturating_elapsed() >= ctx.cfg.abnormal_leader_missing_duration.0 => { // A peer is considered as in the leader missing state // if it's initialized but is isolated from its leader or // something bad happens that the raft group can not elect a leader. - StaleState::LeaderMissing + if self.is_initialized() && self.raft_group.raft.promotable() { + StaleState::LeaderMissing + } else { + // Uninitialized peer and learner may not have leader info, + // even if there is a valid leader. + StaleState::MaybeLeaderMissing + } } _ => StaleState::Valid, } @@ -5440,6 +5445,7 @@ where &mut self, ctx: &mut PollContext, ) { + ctx.raft_metrics.check_stale_peer.inc(); if self.check_stale_conf_ver < self.region().get_region_epoch().get_conf_ver() || self.region().get_region_epoch().get_conf_ver() == 0 { diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 3efa0e31066..fab335a8fd2 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -40597,6 +40597,15 @@ "legendFormat": "{{instance}}-{{reason}}", "refId": "A", "step": 10 + }, + { + "expr": "sum(delta(tikv_raftstore_check_stale_peer{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{instance}}-stale-peer", + "refId": "B", + "step": 10 } ], "thresholds": [], diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 7d964c03319..8482feb8481 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -464,6 +464,65 @@ fn test_node_gc_uninitialized_peer_after_merge() { cluster.must_region_not_exist(left.get_id(), 4); } +/// Test leader missing should issue check stale peer requests. +#[test_case(test_raftstore::new_node_cluster)] +// #[test_case(test_raftstore_v2::new_node_cluster)] +fn test_node_gc_uninitialized_peer_after_merge_on_leader_missing() { + let mut cluster = new_cluster(0, 4); + configure_for_merge(&mut cluster.cfg); + ignore_merge_target_integrity(&mut cluster.cfg, &cluster.pd_client); + cluster.cfg.raft_store.raft_election_timeout_ticks = 5; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(40); + cluster.cfg.raft_store.peer_stale_state_check_interval = ReadableDuration::millis(100); + cluster.cfg.raft_store.abnormal_leader_missing_duration = ReadableDuration::millis(100); + // Set a large max_leader_missing_duration so that check stale peer will + // only be triggered by leader missing. + cluster.cfg.raft_store.max_leader_missing_duration = ReadableDuration::hours(1); + + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.run_conf_change(); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + // test if an uninitialized stale peer before conf removal is destroyed + // automatically + let region = pd_client.get_region(b"k1").unwrap(); + pd_client.must_add_peer(region.get_id(), new_peer(2, 2)); + pd_client.must_add_peer(region.get_id(), new_peer(3, 3)); + + cluster.must_split(®ion, b"k2"); + let left = pd_client.get_region(b"k1").unwrap(); + let right = pd_client.get_region(b"k2").unwrap(); + + // Block snapshot messages, so that new peers will never be initialized. + cluster.add_send_filter(CloneFilterFactory( + RegionPacketFilter::new(left.get_id(), 4) + .msg_type(MessageType::MsgSnapshot) + .direction(Direction::Recv), + )); + // Add peer (4,4), remove peer (4,4) and then merge regions. + // Peer (4,4) will be an an uninitialized stale peer. + pd_client.must_add_peer(left.get_id(), new_peer(4, 4)); + cluster.must_region_exist(left.get_id(), 4); + cluster.add_send_filter(IsolationFilterFactory::new(4)); + pd_client.must_remove_peer(left.get_id(), new_peer(4, 4)); + pd_client.must_merge(left.get_id(), right.get_id()); + cluster.clear_send_filters(); + + // Wait for the peer (4,4) to be destroyed. + sleep_ms( + 3 * cluster + .cfg + .raft_store + .abnormal_leader_missing_duration + .as_millis(), + ); + cluster.must_region_not_exist(left.get_id(), 4); +} + // Test if a merge handled properly when there is a unfinished slow split before // merge. // No v2, it requires all peers to be available to check trim status. From dce0e55ad79d91ce10a5c6a886c3264d75d7a5ac Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Wed, 22 Nov 2023 12:50:40 -0800 Subject: [PATCH 1022/1149] raftstore: make full compaction incremental, pause when load is high (#15995) ref tikv/tikv#15271 Makes full compaction incremental, by range. Currently regions' ranges are used as increments. Run a predicate ("load-check") function before starting full compaction and between each incremental range. If the function evaluates to false, pause with exponential backoff (up to a maximum duration) until it evaluates to true. If periodic full compaction is enabled, poll process CPU stats every 30 seconds to determine usage for the "load-check" function. If usage exceeds a certain threshold before full compaction starts, compaction will not be started, and if started, full compaction will be paused. This cpu usage is also exported as ``tikv_storage_process_stat_cpu_usage`` gauge metric. Signed-off-by: Alex Feinberg --- components/raftstore/src/store/fsm/store.rs | 121 ++++-- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/mod.rs | 6 +- components/raftstore/src/store/msg.rs | 2 + .../raftstore/src/store/worker/compact.rs | 275 ++++++++++-- .../raftstore/src/store/worker/metrics.rs | 15 + components/raftstore/src/store/worker/mod.rs | 5 +- metrics/grafana/tikv_details.json | 400 ++++++++++++++++++ 8 files changed, 772 insertions(+), 53 deletions(-) diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 6227e28cd19..1f72bcd9c90 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -107,9 +107,10 @@ use crate::{ ReadDelegate, RefreshConfigRunner, RefreshConfigTask, RegionRunner, RegionTask, SplitCheckTask, }, - Callback, CasualMessage, CompactThreshold, GlobalReplicationState, InspectedRaftMessage, - MergeResultKind, PdTask, PeerMsg, PeerTick, RaftCommand, SignificantMsg, SnapManager, - StoreMsg, StoreTick, + worker_metrics::PROCESS_STAT_CPU_USAGE, + Callback, CasualMessage, CompactThreshold, FullCompactController, GlobalReplicationState, + InspectedRaftMessage, MergeResultKind, PdTask, PeerMsg, PeerTick, RaftCommand, + SignificantMsg, SnapManager, StoreMsg, StoreTick, }, Error, Result, }; @@ -121,8 +122,11 @@ pub const ENTRY_CACHE_EVICT_TICK_DURATION: Duration = Duration::from_secs(1); pub const MULTI_FILES_SNAPSHOT_FEATURE: Feature = Feature::require(6, 1, 0); // it only makes sense for large region // Every 30 minutes, check if we can run full compaction. This allows the config -// setting `periodic_full_compact_start_max_cpu` to be changed dynamically. +// setting `periodic_full_compact_start_times` to be changed dynamically. const PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION: Duration = Duration::from_secs(30 * 60); +// If periodic full compaction is enabled (`periodic_full_compact_start_times` +// is set), sample load metrics every 10 minutes. +const LOAD_STATS_WINDOW_DURATION: Duration = Duration::from_secs(10 * 60); pub struct StoreInfo { pub kv_engine: EK, @@ -582,6 +586,8 @@ where pub pending_latency_inspect: Vec, pub safe_point: Arc, + + pub process_stat: Option, } impl PollContext @@ -780,6 +786,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::CompactLockCf => self.on_compact_lock_cf(), StoreTick::CompactCheck => self.on_compact_check_tick(), StoreTick::PeriodicFullCompact => self.on_full_compact_tick(), + StoreTick::LoadMetricsWindow => self.on_load_metrics_window_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), } @@ -871,6 +878,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.register_cleanup_import_sst_tick(); self.register_compact_check_tick(); self.register_full_compact_tick(); + self.register_load_metrics_window_tick(); self.register_pd_store_heartbeat_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); @@ -1468,6 +1476,7 @@ where sync_write_worker, pending_latency_inspect: vec![], safe_point: self.safe_point.clone(), + process_stat: None, }; ctx.update_ticks_timeout(); let tag = format!("[store {}]", ctx.store.get_id()); @@ -1625,7 +1634,7 @@ impl RaftBatchSystem { } else { None }; - + let bgworker_remote = background_worker.remote(); let workers = Workers { pd_worker, background_worker, @@ -1663,7 +1672,7 @@ impl RaftBatchSystem { ReadRunner::new(self.router.clone(), engines.raft.clone()), ); - let compact_runner = CompactRunner::new(engines.kv.clone()); + let compact_runner = CompactRunner::new(engines.kv.clone(), bgworker_remote); let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer)); let gc_snapshot_runner = GcSnapshotRunner::new( meta.get_id(), @@ -2450,6 +2459,26 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn register_load_metrics_window_tick(&self) { + // For now, we will only gather these metrics is periodic full compaction is + // enabled. + if !self.ctx.cfg.periodic_full_compact_start_times.is_empty() { + self.ctx + .schedule_store_tick(StoreTick::LoadMetricsWindow, LOAD_STATS_WINDOW_DURATION) + } + } + + fn on_load_metrics_window_tick(&mut self) { + self.register_load_metrics_window_tick(); + + let proc_stat = self + .ctx + .process_stat + .get_or_insert_with(|| ProcessStat::cur_proc_stat().unwrap()); + let cpu_usage: f64 = proc_stat.cpu_usage().unwrap(); + PROCESS_STAT_CPU_USAGE.set(cpu_usage); + } + fn register_full_compact_tick(&self) { if !self.ctx.cfg.periodic_full_compact_start_times.is_empty() { self.ctx.schedule_store_tick( @@ -2477,30 +2506,26 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER return; } - if self.ctx.global_stat.stat.is_busy.load(Ordering::SeqCst) { - warn!("full compaction may not run at this time, `is_busy` flag is true",); + let compact_predicate_fn = self.is_low_load_for_full_compact(); + // Do not start if the load is high. + if !compact_predicate_fn() { return; } - let mut proc_stats = ProcessStat::cur_proc_stat().unwrap(); - let cpu_usage = proc_stats.cpu_usage().unwrap(); - let max_start_cpu_usage = self.ctx.cfg.periodic_full_compact_start_max_cpu; - if cpu_usage > max_start_cpu_usage { - warn!( - "full compaction may not run at this time, cpu usage is above max"; - "cpu_usage" => cpu_usage, - "threshold" => max_start_cpu_usage, - ); - return; - } + let ranges = self.ranges_for_full_compact(); + + let compact_load_controller = + FullCompactController::new(1, 15 * 60, Box::new(compact_predicate_fn)); // Attempt executing a periodic full compaction. - // Note that full compaction will not run if other compaction tasks are running. - if let Err(e) = self - .ctx - .cleanup_scheduler - .schedule(CleanupTask::Compact(CompactTask::PeriodicFullCompact)) - { + // Note that full compaction will not run if another full compact tasks has + // started. + if let Err(e) = self.ctx.cleanup_scheduler.schedule(CleanupTask::Compact( + CompactTask::PeriodicFullCompact { + ranges, + compact_load_controller, + }, + )) { error!( "failed to schedule a periodic full compaction"; "store_id" => self.fsm.store.id, @@ -2509,6 +2534,52 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + /// Use ranges assigned to each region as increments for full compaction. + fn ranges_for_full_compact(&self) -> Vec<(Vec, Vec)> { + let meta = self.ctx.store_meta.lock().unwrap(); + let mut ranges = Vec::with_capacity(meta.regions.len()); + + for region in meta.regions.values() { + let start_key = keys::enc_start_key(region); + let end_key = keys::enc_end_key(region); + ranges.push((start_key, end_key)) + } + ranges + } + + /// Returns a predicate `Fn` which is evaluated: + /// 1. Before full compaction runs: if `false`, we return and wait for the + /// next full compaction tick + /// (`PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION`) before starting. If + /// true, we begin full compaction, which means the first incremental range + /// will be compactecd. See: ``StoreFsmDelegate::on_full_compact_tick`` + /// in this file. + /// + /// 2. After each incremental range finishes and before next one (if any) + /// starts. If `false`, we pause compaction and wait. See: + /// `CompactRunner::full_compact` in `worker/compact.rs`. + fn is_low_load_for_full_compact(&self) -> impl Fn() -> bool { + let max_start_cpu_usage = self.ctx.cfg.periodic_full_compact_start_max_cpu; + let global_stat = self.ctx.global_stat.clone(); + move || { + if global_stat.stat.is_busy.load(Ordering::SeqCst) { + warn!("full compaction may not run at this time, `is_busy` flag is true",); + return false; + } + + let cpu_usage = PROCESS_STAT_CPU_USAGE.get(); + if cpu_usage > max_start_cpu_usage { + warn!( + "full compaction may not run at this time, cpu usage is above max"; + "cpu_usage" => cpu_usage, + "threshold" => max_start_cpu_usage, + ); + return false; + } + true + } + } + fn register_compact_check_tick(&self) { self.ctx.schedule_store_tick( StoreTick::CompactCheck, diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c2aff17a907..d8282cfa486 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -215,6 +215,7 @@ make_static_metric! { pub label_enum RaftEventDurationType { compact_check, periodic_full_compact, + load_metrics_window, pd_store_heartbeat, snap_gc, compact_lock_cf, diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 4cae84d1d25..123289c2057 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -86,9 +86,9 @@ pub use self::{ worker::{ metrics as worker_metrics, need_compact, AutoSplitController, BatchComponent, Bucket, BucketRange, BucketStatsInfo, CachedReadDelegate, CheckLeaderRunner, CheckLeaderTask, - CompactThreshold, FlowStatistics, FlowStatsReporter, KeyEntry, LocalReadContext, - LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, ReadExecutor, - ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, + CompactThreshold, FlowStatistics, FlowStatsReporter, FullCompactController, KeyEntry, + LocalReadContext, LocalReader, LocalReaderCore, PdStatsMonitor, PdTask, ReadDelegate, + ReadExecutor, ReadExecutorProvider, ReadProgress, ReadStats, RefreshConfigTask, RegionTask, SplitCheckRunner, SplitCheckTask, SplitConfig, SplitConfigManager, SplitInfo, StoreMetaDelegate, StoreStatsReporter, TrackVer, WriteStats, WriterContoller, BIG_REGION_CPU_OVERLOAD_THRESHOLD_RATIO, DEFAULT_BIG_REGION_BYTE_THRESHOLD, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 249c550db14..bf3a0998647 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -436,6 +436,7 @@ impl PeerTick { pub enum StoreTick { CompactCheck, PeriodicFullCompact, + LoadMetricsWindow, PdStoreHeartbeat, SnapGc, CompactLockCf, @@ -454,6 +455,7 @@ impl StoreTick { StoreTick::CompactLockCf => RaftEventDurationType::compact_lock_cf, StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, + StoreTick::LoadMetricsWindow => RaftEventDurationType::load_metrics_window, } } } diff --git a/components/raftstore/src/store/worker/compact.rs b/components/raftstore/src/store/worker/compact.rs index abdbaf5e938..45fd7e586e7 100644 --- a/components/raftstore/src/store/worker/compact.rs +++ b/components/raftstore/src/store/worker/compact.rs @@ -4,19 +4,33 @@ use std::{ collections::VecDeque, error::Error as StdError, fmt::{self, Display, Formatter}, + sync::atomic::{AtomicBool, Ordering}, + time::Duration, }; use engine_traits::{KvEngine, RangeStats, CF_WRITE}; use fail::fail_point; +use futures_util::compat::Future01CompatExt; use thiserror::Error; -use tikv_util::{box_try, error, info, time::Instant, warn, worker::Runnable}; +use tikv_util::{ + box_try, debug, error, info, time::Instant, timer::GLOBAL_TIMER_HANDLE, warn, worker::Runnable, +}; +use yatp::Remote; -use super::metrics::{COMPACT_RANGE_CF, FULL_COMPACT}; +use super::metrics::{ + COMPACT_RANGE_CF, FULL_COMPACT, FULL_COMPACT_INCREMENTAL, FULL_COMPACT_PAUSE, +}; type Key = Vec; +static FULL_COMPACTION_IN_PROCESS: AtomicBool = AtomicBool::new(false); + pub enum Task { - PeriodicFullCompact, + PeriodicFullCompact { + // Ranges, or empty if we wish to compact the entire store + ranges: Vec<(Key, Key)>, + compact_load_controller: FullCompactController, + }, Compact { cf_name: String, @@ -34,6 +48,65 @@ pub enum Task { }, } +type CompactPredicateFn = Box bool + Send + Sync>; + +pub struct FullCompactController { + /// Initial delay between retries for ``FullCompactController::pause``. + pub initial_pause_duration_secs: u64, + /// Max delay between retries. + pub max_pause_duration_secs: u64, + /// Predicate function to evaluate that indicates if we can proceed with + /// full compaction. + pub incremental_compaction_pred: CompactPredicateFn, +} + +impl fmt::Debug for FullCompactController { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("FullCompactController") + .field( + "initial_pause_duration_secs", + &self.initial_pause_duration_secs, + ) + .field("max_pause_duration_secs", &self.max_pause_duration_secs) + .finish() + } +} +impl FullCompactController { + pub fn new( + initial_pause_duration_secs: u64, + max_pause_duration_secs: u64, + incremental_compaction_pred: CompactPredicateFn, + ) -> Self { + Self { + initial_pause_duration_secs, + max_pause_duration_secs, + incremental_compaction_pred, + } + } + + /// Pause until `incremental_compaction_pred` evaluates to `true`: delay + /// using exponential backoff (initial value + /// `initial_pause_duration_secs`, max value `max_pause_duration_secs`) + /// between retries. + pub async fn pause(&self) -> Result<(), Error> { + let mut duration_secs = self.initial_pause_duration_secs; + loop { + box_try!( + GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + Duration::from_secs(duration_secs)) + .compat() + .await + ); + if (self.incremental_compaction_pred)() { + break; + }; + duration_secs = self.max_pause_duration_secs.max(duration_secs * 2); + } + Ok(()) + } +} + +#[derive(Debug)] pub struct CompactThreshold { pub tombstones_num_threshold: u64, pub tombstones_percent_threshold: u64, @@ -60,7 +133,24 @@ impl CompactThreshold { impl Display for Task { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { - Task::PeriodicFullCompact => f.debug_struct("FullCompact").finish(), + Task::PeriodicFullCompact { + ref ranges, + ref compact_load_controller, + } => f + .debug_struct("PeriodicFullCompact") + .field( + "ranges", + &( + ranges + .first() + .map(|k| log_wrappers::Value::key(k.0.as_slice())), + ranges + .last() + .map(|k| log_wrappers::Value::key(k.1.as_slice())), + ), + ) + .field("compact_load_controller", compact_load_controller) + .finish(), Task::Compact { ref cf_name, ref start_key, @@ -120,33 +210,83 @@ pub enum Error { pub struct Runner { engine: E, + remote: Remote, } impl Runner where E: KvEngine, { - pub fn new(engine: E) -> Runner { - Runner { engine } + pub fn new(engine: E, remote: Remote) -> Runner { + Runner { engine, remote } } /// Periodic full compaction. + /// Note: this does not accept a `&self` due to async lifetime issues. /// - /// NOTE this is a highly experimental feature! + /// NOTE this is an experimental feature! /// - /// TODO: Do not start if there is heavy I/O. - /// TODO: Make it possible to rate limit, pause, or abort this by compacting - /// a range at a time. - pub fn full_compact(&mut self) -> Result<(), Error> { + /// TODO: Support stopping a full compaction. + async fn full_compact( + engine: E, + ranges: Vec<(Key, Key)>, + compact_controller: FullCompactController, + ) -> Result<(), Error> { fail_point!("on_full_compact"); info!("full compaction started"); + let mut ranges: VecDeque<_> = ranges + .iter() + .map(|(start, end)| (Some(start.as_slice()), Some(end.as_slice()))) + .collect(); + if ranges.is_empty() { + ranges.push_front((None, None)) + } + let timer = Instant::now(); let full_compact_timer = FULL_COMPACT.start_coarse_timer(); - box_try!(self.engine.compact_range( - None, None, // Compact the entire key range. - true, // no other compaction will run when this is running - 1, // number of threads threads - )); + + while let Some(range) = ranges.pop_front() { + debug!( + "incremental range full compaction started"; + "start_key" => ?range.0.map(log_wrappers::Value::key), + "end_key" => ?range.1.map(log_wrappers::Value::key), + ); + let incremental_timer = FULL_COMPACT_INCREMENTAL.start_coarse_timer(); + box_try!(engine.compact_range( + range.0, range.1, // Compact the entire key range. + false, // non-exclusive + 1, // number of threads threads + )); + incremental_timer.observe_duration(); + debug!( + "finished incremental range full compaction"; + "remaining" => ranges.len(), + ); + // If there is at least one range remaining in `ranges` remaining, evaluate + // `compact_controller.incremental_compaction_pred`. If `true`, proceed to next + // range; otherwise, pause this task + // (see `FullCompactController::pause` for details) until predicate + // evaluates to true. + if let Some(next_range) = ranges.front() { + if !(compact_controller.incremental_compaction_pred)() { + info!("pausing full compaction before next increment"; + "finished_start_key" => ?range.0.map(log_wrappers::Value::key), + "finished_end_key" => ?range.1.map(log_wrappers::Value::key), + "next_range_start_key" => ?next_range.0.map(log_wrappers::Value::key), + "next_range_end_key" => ?next_range.1.map(log_wrappers::Value::key), + "remaining" => ranges.len(), + ); + let pause_started = Instant::now(); + let pause_timer = FULL_COMPACT_PAUSE.start_coarse_timer(); + compact_controller.pause().await?; + pause_timer.observe_duration(); + info!("resuming incremental full compaction"; + "paused" => ?pause_started.saturating_elapsed(), + ); + } + } + } + full_compact_timer.observe_duration(); info!( "full compaction finished"; @@ -191,10 +331,28 @@ where fn run(&mut self, task: Task) { match task { - Task::PeriodicFullCompact => { - if let Err(e) = self.full_compact() { - error!("periodic full compaction failed"; "err" => %e); - } + Task::PeriodicFullCompact { + ranges, + compact_load_controller, + } => { + // Since periodic full compaction is submitted as a task to the background + // worker pool, verify we will not start full compaction if + // another full compaction is running in the background. + if FULL_COMPACTION_IN_PROCESS.load(Ordering::SeqCst) + || FULL_COMPACTION_IN_PROCESS.swap(true, Ordering::SeqCst) + { + info!("full compaction is already in process, not starting"); + return; + }; + let engine = self.engine.clone(); + self.remote.spawn(async move { + if let Err(e) = + Self::full_compact(engine, ranges, compact_load_controller).await + { + error!("periodic full compaction failed"; "err" => %e); + } + FULL_COMPACTION_IN_PROCESS.store(false, Ordering::SeqCst); + }); } Task::Compact { cf_name, @@ -315,10 +473,19 @@ mod tests { }; use keys::data_key; use tempfile::Builder; + use tikv_util::yatp_pool::{DefaultTicker, FuturePool, YatpPoolBuilder}; use txn_types::{Key, TimeStamp, Write, WriteType}; use super::*; + fn make_compact_runner(engine: E) -> (FuturePool, Runner) + where + E: KvEngine, + { + let pool = YatpPoolBuilder::new(DefaultTicker::default()).build_future_pool(); + (pool.clone(), Runner::new(engine, pool.remote().clone())) + } + #[test] fn test_compact_range() { let path = Builder::new() @@ -326,8 +493,7 @@ mod tests { .tempdir() .unwrap(); let db = new_engine(path.path().to_str().unwrap(), &[CF_DEFAULT]).unwrap(); - - let mut runner = Runner::new(db.clone()); + let (_pool, mut runner) = make_compact_runner(db.clone()); // Generate the first SST file. let mut wb = db.write_batch(); @@ -494,7 +660,7 @@ mod tests { fn test_full_compact_deletes() { let tmp_dir = Builder::new().prefix("test").tempdir().unwrap(); let engine = open_db(tmp_dir.path().to_str().unwrap()); - let mut runner = Runner::new(engine.clone()); + let (_pool, mut runner) = make_compact_runner(engine.clone()); // mvcc_put 0..5 for i in 0..5 { @@ -522,7 +688,68 @@ mod tests { .unwrap(); assert_eq!(stats.num_entries - stats.num_versions, 5); - runner.run(Task::PeriodicFullCompact); + runner.run(Task::PeriodicFullCompact { + ranges: Vec::new(), + compact_load_controller: FullCompactController::new(0, 0, Box::new(|| true)), + }); + std::thread::sleep(Duration::from_millis(500)); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 0); + } + + #[test] + fn test_full_compact_incremental_pausable() { + let tmp_dir = Builder::new().prefix("test").tempdir().unwrap(); + let engine = open_db(tmp_dir.path().to_str().unwrap()); + let (_pool, mut runner) = make_compact_runner(engine.clone()); + + // mvcc_put 0..100 + for i in 0..100 { + let (k, v) = (format!("k{}", i), format!("value{}", i)); + mvcc_put(&engine, k.as_bytes(), v.as_bytes(), 1.into(), 2.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let (start, end) = (data_key(b"k0"), data_key(b"k5")); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries, stats.num_versions); + + for i in 0..100 { + let k = format!("k{}", i); + delete(&engine, k.as_bytes(), 3.into()); + } + engine.flush_cf(CF_WRITE, true).unwrap(); + + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 100); + + let started_at = Instant::now(); + let pred_fn: CompactPredicateFn = + Box::new(move || Instant::now() - started_at > Duration::from_millis(500)); + let ranges = vec![ + (data_key(b"k0"), data_key(b"k25")), + (data_key(b"k25"), data_key(b"k50")), + (data_key(b"k50"), data_key(b"k100")), + ]; + runner.run(Task::PeriodicFullCompact { + ranges, + compact_load_controller: FullCompactController::new(1, 5, pred_fn), + }); + let stats = engine + .get_range_stats(CF_WRITE, &start, &end) + .unwrap() + .unwrap(); + assert_eq!(stats.num_entries - stats.num_versions, 100); + std::thread::sleep(Duration::from_secs(2)); let stats = engine .get_range_stats(CF_WRITE, &start, &end) .unwrap() diff --git a/components/raftstore/src/store/worker/metrics.rs b/components/raftstore/src/store/worker/metrics.rs index bdf24459011..2b10bc3e053 100644 --- a/components/raftstore/src/store/worker/metrics.rs +++ b/components/raftstore/src/store/worker/metrics.rs @@ -165,6 +165,21 @@ lazy_static! { "Bucketed histogram of full compaction for the storage." ) .unwrap(); + pub static ref FULL_COMPACT_INCREMENTAL: Histogram = register_histogram!( + "tikv_storage_full_compact_increment_duration_seconds", + "Bucketed histogram of full compaction increments for the storage." + ) + .unwrap(); + pub static ref FULL_COMPACT_PAUSE: Histogram = register_histogram!( + "tikv_storage_full_compact_pause_duration_seconds", + "Bucketed histogram of full compaction pauses for the storage." + ) + .unwrap(); + pub static ref PROCESS_STAT_CPU_USAGE: Gauge = register_gauge!( + "tikv_storage_process_stat_cpu_usage", + "CPU usage measured over a 30 second window", + ) + .unwrap(); pub static ref REGION_HASH_HISTOGRAM: Histogram = register_histogram!( "tikv_raftstore_hash_duration_seconds", "Bucketed histogram of raftstore hash computation duration" diff --git a/components/raftstore/src/store/worker/mod.rs b/components/raftstore/src/store/worker/mod.rs index c6783238520..c47461d62ff 100644 --- a/components/raftstore/src/store/worker/mod.rs +++ b/components/raftstore/src/store/worker/mod.rs @@ -23,7 +23,10 @@ pub use self::{ cleanup::{Runner as CleanupRunner, Task as CleanupTask}, cleanup_snapshot::{Runner as GcSnapshotRunner, Task as GcSnapshotTask}, cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask}, - compact::{need_compact, CompactThreshold, Runner as CompactRunner, Task as CompactTask}, + compact::{ + need_compact, CompactThreshold, FullCompactController, Runner as CompactRunner, + Task as CompactTask, + }, consistency_check::{Runner as ConsistencyCheckRunner, Task as ConsistencyCheckTask}, pd::{ new_change_peer_v2_request, FlowStatistics, FlowStatsReporter, HeartbeatTask, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index fab335a8fd2..b5d45d2fea0 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -19282,6 +19282,406 @@ "yBucketNumber": null, "yBucketSize": null }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "hiddenSeries": false, + "id": 24763574239, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_duration_seconds_bucket[5m])) by (le))", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Full compaction duration seconds", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + }, + "timeFrom": null, + "timeShift": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "hiddenSeries": false, + "id": 24763574241, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_pause_duration_seconds_bucket[5m])) by (le))", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Full compaction pause duration ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "hiddenSeries": false, + "id": 24763574240, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_increment_duration_seconds_bucket[5m])) by (le))", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Full compaction per-increment duration ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "hiddenSeries": false, + "id": 24763574242, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "tikv_storage_process_stat_cpu_usage", + "instant": false, + "interval": "", + "legendFormat": "", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process Stat Cpu Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:86", + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:87", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, From 1b097636c91c7fc7556f3f3f4b850057eeeb4576 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Wed, 22 Nov 2023 20:26:41 -0800 Subject: [PATCH 1023/1149] update default value of region_compact_redundant_rows_percent (#16051) ref tikv/tikv#15282 Change the default value of raftstore.redundant-rows-percent-threshold to 20 from 100. This would triggers a compaction when a region has 20% entries as stale MVCC versions. Signed-off-by: tonyxuqqi Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/raftstore/src/store/config.rs | 11 +---------- etc/config-template.toml | 2 +- src/config/mod.rs | 2 +- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index facaa1514d8..62de6b57faf 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -440,7 +440,7 @@ impl Default for Config { region_compact_min_tombstones: 10000, region_compact_tombstones_percent: 30, region_compact_min_redundant_rows: 50000, - region_compact_redundant_rows_percent: None, + region_compact_redundant_rows_percent: Some(20), pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), // Disable periodic full compaction by default. @@ -630,15 +630,6 @@ impl Config { } } - if self.region_compact_redundant_rows_percent.is_none() { - if raft_kv_v2 { - self.region_compact_redundant_rows_percent = Some(20); - } else { - // Disable redundant rows check in default for v1. - self.region_compact_redundant_rows_percent = Some(100); - } - } - // When use raft kv v2, we can set raft log gc size limit to a smaller value to // avoid too many entry logs in cache. // The snapshot support to increment snapshot sst, so the old snapshot files diff --git a/etc/config-template.toml b/etc/config-template.toml index cfa8e30af48..9e253e8ff79 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -456,7 +456,7 @@ ## It should be set between 1 and 100. Manual compaction is only triggered when the number of ## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys ## exceeds `region-compact-redundant-rows-percent`. -# region-compact-redundant-rows-percent = 100 +# region-compact-redundant-rows-percent = 20 ## Interval to check whether to start a manual compaction for Lock Column Family. ## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will diff --git a/src/config/mod.rs b/src/config/mod.rs index 1c29c0637ee..27f38abee4a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -7058,7 +7058,7 @@ mod tests { cfg.raft_store .region_compact_redundant_rows_percent .unwrap(), - 100 + 20 ); let content = r#" From 5ada40f64fd51796bf69aa0f676c954cc5a5599f Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 23 Nov 2023 16:11:42 +0800 Subject: [PATCH 1024/1149] raftstore: backport the detection of network-io from raftstore-v2. (#15908) ref tikv/tikv#15909 Make raftstore perspect the jitters of network-io by backporting the implementation from raftstore-v2. Signed-off-by: lucasliang --- components/raftstore/src/store/config.rs | 9 ++ components/raftstore/src/store/fsm/store.rs | 10 +- components/raftstore/src/store/peer.rs | 15 +- components/raftstore/src/store/worker/pd.rs | 171 ++++++++++++-------- tests/integrations/config/mod.rs | 1 + 5 files changed, 134 insertions(+), 72 deletions(-) diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 62de6b57faf..73c0bb760bc 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -357,6 +357,8 @@ pub struct Config { pub slow_trend_unsensitive_cause: f64, // The unsensitive(increase it to reduce sensitiveness) of the result-trend detection pub slow_trend_unsensitive_result: f64, + // The sensitiveness of slowness on network-io. + pub slow_trend_network_io_factor: f64, // Interval to report min resolved ts, if it is zero, it means disabled. pub report_min_resolved_ts_interval: ReadableDuration, @@ -521,6 +523,7 @@ impl Default for Config { // make it `10.0` to reduce a bit sensitiveness because SpikeFilter is disabled slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, + slow_trend_network_io_factor: 0.0, report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), @@ -933,6 +936,12 @@ impl Config { )); } + if self.slow_trend_network_io_factor < 0.0 { + return Err(box_err!( + "slow_trend_network_io_factor must be greater than 0" + )); + } + Ok(()) } diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 1f72bcd9c90..cfc0bb6e6a4 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -846,6 +846,9 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> mut inspector, } => { inspector.record_store_wait(send_time.saturating_elapsed()); + inspector.record_store_commit(self.ctx.raft_metrics.stat_commit_log.avg()); + // Reset the stat_commit_log and wait it to be refreshed in the next tick. + self.ctx.raft_metrics.stat_commit_log.reset(); self.ctx.pending_latency_inspect.push(inspector); } StoreMsg::UnsafeRecoveryReport(report) => self.store_heartbeat_pd(Some(report)), @@ -2841,16 +2844,17 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER fn on_wake_up_regions(&self, abnormal_stores: Vec) { info!("try to wake up all hibernated regions in this store"; "to_all" => abnormal_stores.is_empty()); + let store_id = self.ctx.store_id(); let meta = self.ctx.store_meta.lock().unwrap(); - for region_id in meta.regions.keys() { - let region = &meta.regions[region_id]; + + for (region_id, region) in &meta.regions { // Check whether the current region is not found on abnormal stores. If so, // this region is not the target to be awaken. if !region_on_stores(region, &abnormal_stores) { continue; } let peer = { - match find_peer(region, self.ctx.store_id()) { + match find_peer(region, store_id) { None => continue, Some(p) => p.clone(), } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 17d8e51f4cf..185ab9d2a92 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -1838,7 +1838,7 @@ where let has_snap_task = self.get_store().has_gen_snap_task(); let pre_commit_index = self.raft_group.raft.raft_log.committed; self.raft_group.step(m)?; - self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); + self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let mut for_balance = false; if !has_snap_task && self.get_store().has_gen_snap_task() { @@ -1883,7 +1883,7 @@ where } } - fn report_commit_log_duration(&self, pre_commit_index: u64, metrics: &RaftMetrics) { + fn report_commit_log_duration(&self, pre_commit_index: u64, metrics: &mut RaftMetrics) { if !metrics.waterfall_metrics || self.proposals.is_empty() { return; } @@ -1903,10 +1903,15 @@ where &metrics.wf_commit_not_persist_log }; for tracker in trackers { - tracker.observe(now, hist, |t| { + // Collect the metrics related to commit_log + // durations. + let duration = tracker.observe(now, hist, |t| { t.metrics.commit_not_persisted = !commit_persisted; &mut t.metrics.wf_commit_log_nanos }); + metrics + .stat_commit_log + .record(Duration::from_nanos(duration)); } } } @@ -3125,7 +3130,7 @@ where let pre_commit_index = self.raft_group.raft.raft_log.committed; self.raft_group.on_persist_ready(self.persisted_number); self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); - self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); + self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; self.mut_store().update_cache_persisted(persist_index); @@ -3170,7 +3175,7 @@ where let pre_commit_index = self.raft_group.raft.raft_log.committed; let mut light_rd = self.raft_group.advance_append(ready); self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); - self.report_commit_log_duration(pre_commit_index, &ctx.raft_metrics); + self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; if self.is_in_force_leader() { diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 5e97adf8d3e..b73198ecc75 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -919,6 +919,78 @@ impl SlowScore { } } +struct SlowTrendStatistics { + net_io_factor: f64, + /// Detector to detect NetIo&DiskIo jitters. + slow_cause: Trend, + /// Reactor as an assistant detector to detect the QPS jitters. + slow_result: Trend, + slow_result_recorder: RequestPerSecRecorder, +} + +impl SlowTrendStatistics { + #[inline] + fn new(cfg: &Config) -> Self { + Self { + slow_cause: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(180), + Duration::from_secs(30), + Duration::from_secs(120), + Duration::from_secs(600), + 1, + tikv_util::time::duration_to_us(Duration::from_micros(500)), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), + STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_cause, + ), + slow_result: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), + STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), + Duration::from_secs(120), + Duration::from_secs(15), + Duration::from_secs(60), + Duration::from_secs(300), + 1, + 2000, + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L1"]), + STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L2"]), + cfg.slow_trend_unsensitive_result, + ), + slow_result_recorder: RequestPerSecRecorder::new(), + net_io_factor: cfg.slow_trend_network_io_factor, /* FIXME: add extra parameter in + * Config to control it. */ + } + } + + #[inline] + fn record(&mut self, duration: RaftstoreDuration) { + // TODO: It's more appropriate to divide the factor into `Disk IO factor` and + // `Net IO factor`. + // Currently, when `network ratio == 1`, it summarizes all factors by `sum` + // simplily, approved valid to common cases when there exists IO jitters on + // Network or Disk. + let latency = || -> u64 { + if self.net_io_factor as u64 >= 1 { + return tikv_util::time::duration_to_us(duration.sum()); + } + let disk_io_latency = + tikv_util::time::duration_to_us(duration.delays_on_disk_io(true)) as f64; + let network_io_latency = + tikv_util::time::duration_to_us(duration.delays_on_net_io()) as f64; + (disk_io_latency + network_io_latency * self.net_io_factor) as u64 + }(); + self.slow_cause.record(latency, Instant::now()); + } +} + pub struct Runner where EK: KvEngine, @@ -949,9 +1021,7 @@ where snap_mgr: SnapManager, remote: Remote, slow_score: SlowScore, - slow_trend_cause: Trend, - slow_trend_result: Trend, - slow_trend_result_recorder: RequestPerSecRecorder, + slow_trend: SlowTrendStatistics, // The health status of the store is updated by the slow score mechanism. health_service: Option, @@ -1020,39 +1090,7 @@ where snap_mgr, remote, slow_score: SlowScore::new(cfg.inspect_interval.0), - slow_trend_cause: Trend::new( - // Disable SpikeFilter for now - Duration::from_secs(0), - STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), - STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), - Duration::from_secs(180), - Duration::from_secs(30), - Duration::from_secs(120), - Duration::from_secs(600), - 1, - tikv_util::time::duration_to_us(Duration::from_micros(500)), - STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), - STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), - cfg.slow_trend_unsensitive_cause, - ), - slow_trend_result: Trend::new( - // Disable SpikeFilter for now - Duration::from_secs(0), - STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), - STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), - Duration::from_secs(120), - Duration::from_secs(15), - Duration::from_secs(60), - Duration::from_secs(300), - 1, - 2000, - STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC - .with_label_values(&["L1"]), - STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC - .with_label_values(&["L2"]), - cfg.slow_trend_unsensitive_result, - ), - slow_trend_result_recorder: RequestPerSecRecorder::new(), + slow_trend: SlowTrendStatistics::new(cfg), health_service, curr_health_status: ServingStatus::Serving, coprocessor_host, @@ -1330,7 +1368,8 @@ where .engine_total_query_num .sub_query_stats(&self.store_stat.engine_last_query_num); let total_query_num = self - .slow_trend_result_recorder + .slow_trend + .slow_result_recorder .record_and_get_current_rps(res.get_all_query_num(), Instant::now()); stats.set_query_stats(res.0); @@ -1457,16 +1496,17 @@ where stats: &mut pdpb::StoreStats, total_query_num: Option, ) { - let slow_trend_cause_rate = self.slow_trend_cause.increasing_rate(); + let slow_trend_cause_rate = self.slow_trend.slow_cause.increasing_rate(); STORE_SLOW_TREND_GAUGE.set(slow_trend_cause_rate); let mut slow_trend = pdpb::SlowTrend::default(); slow_trend.set_cause_rate(slow_trend_cause_rate); - slow_trend.set_cause_value(self.slow_trend_cause.l0_avg()); + slow_trend.set_cause_value(self.slow_trend.slow_cause.l0_avg()); if let Some(total_query_num) = total_query_num { - self.slow_trend_result + self.slow_trend + .slow_result .record(total_query_num as u64, Instant::now()); - slow_trend.set_result_value(self.slow_trend_result.l0_avg()); - let slow_trend_result_rate = self.slow_trend_result.increasing_rate(); + slow_trend.set_result_value(self.slow_trend.slow_result.l0_avg()); + let slow_trend_result_rate = self.slow_trend.slow_result.increasing_rate(); slow_trend.set_result_rate(slow_trend_result_rate); STORE_SLOW_TREND_RESULT_GAUGE.set(slow_trend_result_rate); STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(total_query_num); @@ -1479,23 +1519,25 @@ where } fn write_slow_trend_metrics(&mut self) { - STORE_SLOW_TREND_L0_GAUGE.set(self.slow_trend_cause.l0_avg()); - STORE_SLOW_TREND_L1_GAUGE.set(self.slow_trend_cause.l1_avg()); - STORE_SLOW_TREND_L2_GAUGE.set(self.slow_trend_cause.l2_avg()); - STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slow_trend_cause.l0_l1_rate()); - STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend_cause.l1_l2_rate()); - STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l1_margin_error_base()); - STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE.set(self.slow_trend_cause.l2_margin_error_base()); + STORE_SLOW_TREND_L0_GAUGE.set(self.slow_trend.slow_cause.l0_avg()); + STORE_SLOW_TREND_L1_GAUGE.set(self.slow_trend.slow_cause.l1_avg()); + STORE_SLOW_TREND_L2_GAUGE.set(self.slow_trend.slow_cause.l2_avg()); + STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slow_trend.slow_cause.l0_l1_rate()); + STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend.slow_cause.l1_l2_rate()); + STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE + .set(self.slow_trend.slow_cause.l1_margin_error_base()); + STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE + .set(self.slow_trend.slow_cause.l2_margin_error_base()); // Report results of all slow Trends. - STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend_result.l0_avg()); - STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend_result.l1_avg()); - STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend_result.l2_avg()); - STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slow_trend_result.l0_l1_rate()); - STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slow_trend_result.l1_l2_rate()); + STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend.slow_result.l0_avg()); + STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend.slow_result.l1_avg()); + STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend.slow_result.l2_avg()); + STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slow_trend.slow_result.l0_l1_rate()); + STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slow_trend.slow_result.l1_l2_rate()); STORE_SLOW_TREND_RESULT_L1_MARGIN_ERROR_GAUGE - .set(self.slow_trend_result.l1_margin_error_base()); + .set(self.slow_trend.slow_result.l1_margin_error_base()); STORE_SLOW_TREND_RESULT_L2_MARGIN_ERROR_GAUGE - .set(self.slow_trend_result.l2_margin_error_base()); + .set(self.slow_trend.slow_result.l2_margin_error_base()); } fn handle_report_batch_split(&self, regions: Vec) { @@ -2264,10 +2306,7 @@ where // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. self.slow_score .record(id, duration.delays_on_disk_io(false)); - self.slow_trend_cause.record( - tikv_util::time::duration_to_us(duration.store_wait_duration.unwrap()), - Instant::now(), - ); + self.slow_trend.record(duration); } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), Task::ReportMinResolvedTs { @@ -2296,7 +2335,7 @@ where { fn on_timeout(&mut self) { // Record a fairly great value when timeout - self.slow_trend_cause.record(500_000, Instant::now()); + self.slow_trend.slow_cause.record(500_000, Instant::now()); // The health status is recovered to serving as long as any tick // does not timeout. @@ -2335,8 +2374,6 @@ where let inspector = LatencyInspector::new( id, Box::new(move |id, duration| { - let dur = duration.sum(); - STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_process"]) .observe(tikv_util::time::duration_to_sec( @@ -2347,9 +2384,15 @@ where .observe(tikv_util::time::duration_to_sec( duration.store_wait_duration.unwrap(), )); + STORE_INSPECT_DURATION_HISTOGRAM + .with_label_values(&["store_commit"]) + .observe(tikv_util::time::duration_to_sec( + duration.store_commit_duration.unwrap(), + )); + STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["all"]) - .observe(tikv_util::time::duration_to_sec(dur)); + .observe(tikv_util::time::duration_to_sec(duration.sum())); if let Err(e) = scheduler.schedule(Task::UpdateSlowScore { id, duration }) { warn!("schedule pd task failed"; "err" => ?e); } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 3afcac261a4..4bb75e0a95f 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -262,6 +262,7 @@ fn test_serde_custom_tikv_config() { check_request_snapshot_interval: ReadableDuration::minutes(1), slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, + slow_trend_network_io_factor: 0.0, enable_v2_compatible_learner: false, unsafe_disable_check_quorum: false, periodic_full_compact_start_times: ReadableSchedule::default(), From b23787ca7a44401157e8fcbf3a126dfaf3833af3 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 23 Nov 2023 16:37:11 +0800 Subject: [PATCH 1025/1149] readpool: fix pending tasks counter (#16031) ref tikv/tikv#16026 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/read_pool.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/read_pool.rs b/src/read_pool.rs index 32be95698da..22a11cb2b41 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -26,7 +26,6 @@ use tikv_util::{ worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; -use tracker::TrackedFuture; use yatp::{metrics::MULTILEVEL_LEVEL_ELAPSED, queue::Extras}; use self::metrics::*; @@ -145,8 +144,9 @@ impl ReadPoolHandle { let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); + let running_tasks1 = running_tasks.clone(); if let Some(resource_ctl) = resource_ctl { - let fut = TrackedFuture::new(with_resource_limiter( + let fut = with_resource_limiter( ControlledFuture::new( async move { f.await; @@ -156,14 +156,20 @@ impl ReadPoolHandle { group_name, ), resource_limiter, - )); - remote.spawn_with_extras(fut, extras)?; + ); + remote.spawn_with_extras(fut, extras).map_err(|e| { + running_tasks1.dec(); + e + })?; } else { let fut = async move { f.await; running_tasks.dec(); }; - remote.spawn_with_extras(fut, extras)?; + remote.spawn_with_extras(fut, extras).map_err(|e| { + running_tasks1.dec(); + e + })?; } } } @@ -764,12 +770,14 @@ mod tests { // max running tasks number should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); - let pool = build_yatp_read_pool( + let name = "test-yatp-full"; + let pool = build_yatp_read_pool_with_name( &config, DummyReporter, engine, None, CleanupMethod::InPlace, + name.to_owned(), None, ); @@ -805,6 +813,12 @@ mod tests { handle .spawn(task4, CommandPri::Normal, 4, TaskMetadata::default(), None) .unwrap(); + assert_eq!( + UNIFIED_READ_POOL_RUNNING_TASKS + .with_label_values(&[name]) + .get(), + 2 + ); } #[test] From bc1ae3043718418b43ef69d0b04190c0de721278 Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 24 Nov 2023 15:23:43 +0800 Subject: [PATCH 1026/1149] pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837) ref tikv/tikv#15184 - The min-resolved-ts will report periodically, no need to do retires - support dynamic change `min-resolved-ts` report interval Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/pd_client/src/client.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 1 - components/raftstore-v2/src/worker/pd/mod.rs | 15 ++---- .../tests/integrations/cluster.rs | 2 +- components/raftstore/src/store/config.rs | 9 ++-- components/raftstore/src/store/fsm/store.rs | 37 ++++++++++++-- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/msg.rs | 2 + components/raftstore/src/store/worker/pd.rs | 48 ++----------------- .../resolved_ts/tests/failpoints/mod.rs | 5 +- components/test_raftstore/src/util.rs | 12 +++++ etc/config-template.toml | 3 ++ tests/integrations/config/mod.rs | 2 +- tests/integrations/config/test-custom.toml | 2 +- 14 files changed, 69 insertions(+), 74 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 06ea6e9055d..80958e151d0 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -1098,9 +1098,7 @@ impl PdClient for RpcClient { }) as PdFuture<_> }; - self.pd_client - .request(req, executor, LEADER_CHANGE_RETRY) - .execute() + self.pd_client.request(req, executor, NO_RETRY).execute() } fn report_region_buckets(&self, bucket_stat: &BucketStat, period: Duration) -> PdFuture<()> { diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 23e41914012..73c4461024e 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -811,7 +811,6 @@ impl StoreSystem { causal_ts_provider, workers.pd.scheduler(), auto_split_controller, - store_meta.lock().unwrap().region_read_progress.clone(), collector_reg_handle, grpc_service_mgr, self.logger.clone(), diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 77915dd0378..7e07d26e61f 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -14,9 +14,9 @@ use pd_client::{BucketStat, PdClient}; use raftstore::store::{ metrics::STORE_INSPECT_DURATION_HISTOGRAM, util::{KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, - AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, - RegionReadProgressRegistry, SplitInfo, StoreStatsReporter, TabletSnapManager, TxnExt, - WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, SplitInfo, + StoreStatsReporter, TabletSnapManager, TxnExt, WriteStats, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }; use resource_metering::{Collector, CollectorRegHandle, RawRecords}; use service::service_manager::GrpcServiceManager; @@ -245,7 +245,6 @@ where causal_ts_provider: Option>, // used for rawkv apiv2 pd_scheduler: Scheduler, auto_split_controller: AutoSplitController, - region_read_progress: RegionReadProgressRegistry, collector_reg_handle: CollectorRegHandle, grpc_service_manager: GrpcServiceManager, logger: Logger, @@ -255,16 +254,10 @@ where let store_heartbeat_interval = cfg.value().pd_store_heartbeat_tick_interval.0; let mut stats_monitor = PdStatsMonitor::new( store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, - cfg.value().report_min_resolved_ts_interval.0, cfg.value().inspect_interval.0, PdReporter::new(pd_scheduler, logger.clone()), ); - stats_monitor.start( - auto_split_controller, - region_read_progress, - collector_reg_handle, - store_id, - )?; + stats_monitor.start(auto_split_controller, collector_reg_handle)?; let slowness_stats = slowness::SlownessStatistics::new(&cfg.value()); Ok(Self { store_id, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 5b3cc5feb93..88ad9a0e380 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -515,6 +515,7 @@ pub fn disable_all_auto_ticks(cfg: &mut Config) { cfg.region_compact_check_interval = ReadableDuration::ZERO; cfg.pd_heartbeat_tick_interval = ReadableDuration::ZERO; cfg.pd_store_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.pd_report_min_resolved_ts_interval = ReadableDuration::ZERO; cfg.snap_mgr_gc_tick_interval = ReadableDuration::ZERO; cfg.lock_cf_compact_interval = ReadableDuration::ZERO; cfg.peer_stale_state_check_interval = ReadableDuration::ZERO; @@ -524,7 +525,6 @@ pub fn disable_all_auto_ticks(cfg: &mut Config) { cfg.merge_check_tick_interval = ReadableDuration::ZERO; cfg.cleanup_import_sst_interval = ReadableDuration::ZERO; cfg.inspect_interval = ReadableDuration::ZERO; - cfg.report_min_resolved_ts_interval = ReadableDuration::ZERO; cfg.reactive_memory_lock_tick_interval = ReadableDuration::ZERO; cfg.report_region_buckets_tick_interval = ReadableDuration::ZERO; cfg.check_long_uncommitted_interval = ReadableDuration::ZERO; diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 73c0bb760bc..b09afb3c6e1 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -142,6 +142,7 @@ pub struct Config { pub region_compact_redundant_rows_percent: Option, pub pd_heartbeat_tick_interval: ReadableDuration, pub pd_store_heartbeat_tick_interval: ReadableDuration, + pub pd_report_min_resolved_ts_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, pub snap_gc_timeout: ReadableDuration, /// The duration of snapshot waits for region split. It prevents leader from @@ -360,9 +361,6 @@ pub struct Config { // The sensitiveness of slowness on network-io. pub slow_trend_network_io_factor: f64, - // Interval to report min resolved ts, if it is zero, it means disabled. - pub report_min_resolved_ts_interval: ReadableDuration, - /// Interval to check whether to reactivate in-memory pessimistic lock after /// being disabled before transferring leader. pub reactive_memory_lock_tick_interval: ReadableDuration, @@ -445,6 +443,7 @@ impl Default for Config { region_compact_redundant_rows_percent: Some(20), pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), + pd_report_min_resolved_ts_interval: ReadableDuration::secs(1), // Disable periodic full compaction by default. periodic_full_compact_start_times: ReadableSchedule::default(), // If periodic full compaction is enabled, do not start a full compaction @@ -524,7 +523,6 @@ impl Default for Config { slow_trend_unsensitive_cause: 10.0, slow_trend_unsensitive_result: 0.5, slow_trend_network_io_factor: 0.0, - report_min_resolved_ts_interval: ReadableDuration::secs(1), check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), allow_unsafe_vote_after_start: false, @@ -1042,6 +1040,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["pd_store_heartbeat_tick_interval"]) .set(self.pd_store_heartbeat_tick_interval.as_secs_f64()); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["pd_report_min_resolved_ts_interval"]) + .set(self.pd_report_min_resolved_ts_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["snap_mgr_gc_tick_interval"]) .set(self.snap_mgr_gc_tick_interval.as_secs_f64()); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index cfc0bb6e6a4..bef74e3ed29 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -789,6 +789,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::LoadMetricsWindow => self.on_load_metrics_window_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), + StoreTick::PdReportMinResolvedTs => self.on_pd_report_min_resolved_ts_tick(), } let elapsed = timer.saturating_elapsed(); self.ctx @@ -883,6 +884,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.register_full_compact_tick(); self.register_load_metrics_window_tick(); self.register_pd_store_heartbeat_tick(); + self.register_pd_report_min_resolved_ts_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); self.register_consistency_check_tick(); @@ -1702,7 +1704,6 @@ impl RaftBatchSystem { &cfg, )?; - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); let mut builder = RaftPollerBuilder { cfg, store: meta, @@ -1739,7 +1740,6 @@ impl RaftBatchSystem { mgr, pd_client, collector_reg_handle, - region_read_progress, health_service, causal_ts_provider, snap_generator_pool, @@ -1758,7 +1758,6 @@ impl RaftBatchSystem { snap_mgr: SnapManager, pd_client: Arc, collector_reg_handle: CollectorRegHandle, - region_read_progress: RegionReadProgressRegistry, health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 snap_generator_pool: FuturePool, @@ -1850,7 +1849,6 @@ impl RaftBatchSystem { snap_mgr, workers.pd_worker.remote(), collector_reg_handle, - region_read_progress, health_service, coprocessor_host, causal_ts_provider, @@ -2678,6 +2676,25 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn report_min_resolved_ts(&self) { + let read_progress = { + let meta = self.ctx.store_meta.lock().unwrap(); + meta.region_read_progress().clone() + }; + let min_resolved_ts = read_progress.get_min_resolved_ts(); + + let task = PdTask::ReportMinResolvedTs { + store_id: self.fsm.store.id, + min_resolved_ts, + }; + if let Err(e) = self.ctx.pd_scheduler.schedule(task) { + error!("failed to send min resolved ts to pd worker"; + "store_id" => self.fsm.store.id, + "err" => ?e + ); + } + } + fn store_heartbeat_pd(&mut self, report: Option) { let mut stats = StoreStats::default(); @@ -2784,6 +2801,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.register_pd_store_heartbeat_tick(); } + fn on_pd_report_min_resolved_ts_tick(&mut self) { + self.report_min_resolved_ts(); + self.register_pd_report_min_resolved_ts_tick(); + } + fn on_snap_mgr_gc(&mut self) { // refresh multi_snapshot_files enable flag self.ctx.snap_mgr.set_enable_multi_snapshot_files( @@ -2888,6 +2910,13 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER ); } + fn register_pd_report_min_resolved_ts_tick(&self) { + self.ctx.schedule_store_tick( + StoreTick::PdReportMinResolvedTs, + self.ctx.cfg.pd_report_min_resolved_ts_interval.0, + ); + } + fn register_snap_mgr_gc_tick(&self) { self.ctx .schedule_store_tick(StoreTick::SnapGc, self.ctx.cfg.snap_mgr_gc_tick_interval.0) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index d8282cfa486..908b650469c 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -217,6 +217,7 @@ make_static_metric! { periodic_full_compact, load_metrics_window, pd_store_heartbeat, + pd_report_min_resolved_ts, snap_gc, compact_lock_cf, consistency_check, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index bf3a0998647..52aed7d424f 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -442,6 +442,7 @@ pub enum StoreTick { CompactLockCf, ConsistencyCheck, CleanupImportSst, + PdReportMinResolvedTs, } impl StoreTick { @@ -456,6 +457,7 @@ impl StoreTick { StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, StoreTick::LoadMetricsWindow => RaftEventDurationType::load_metrics_window, + StoreTick::PdReportMinResolvedTs => RaftEventDurationType::pd_report_min_resolved_ts, } } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index b73198ecc75..b5bb189d84b 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -68,7 +68,7 @@ use crate::{ AutoSplitController, ReadStats, SplitConfigChange, WriteStats, }, Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, - RegionReadProgressRegistry, SnapManager, StoreInfo, StoreMsg, TxnExt, + SnapManager, StoreInfo, StoreMsg, TxnExt, }, }; @@ -450,16 +450,6 @@ fn default_collect_tick_interval() -> Duration { DEFAULT_COLLECT_TICK_INTERVAL } -fn config(interval: Duration) -> Duration { - fail_point!("mock_min_resolved_ts_interval", |_| { - Duration::from_millis(50) - }); - fail_point!("mock_min_resolved_ts_interval_disable", |_| { - Duration::from_millis(0) - }); - interval -} - #[inline] fn convert_record_pairs(m: HashMap) -> RecordPairVec { m.into_iter() @@ -562,7 +552,6 @@ where collect_store_infos_interval: Duration, load_base_split_check_interval: Duration, collect_tick_interval: Duration, - report_min_resolved_ts_interval: Duration, inspect_latency_interval: Duration, } @@ -570,12 +559,7 @@ impl StatsMonitor where T: StoreStatsReporter, { - pub fn new( - interval: Duration, - report_min_resolved_ts_interval: Duration, - inspect_latency_interval: Duration, - reporter: T, - ) -> Self { + pub fn new(interval: Duration, inspect_latency_interval: Duration, reporter: T) -> Self { StatsMonitor { reporter, handle: None, @@ -587,7 +571,6 @@ where DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL, interval, ), - report_min_resolved_ts_interval: config(report_min_resolved_ts_interval), // Use `inspect_latency_interval` as the minimal limitation for collecting tick. collect_tick_interval: cmp::min( inspect_latency_interval, @@ -602,9 +585,7 @@ where pub fn start( &mut self, mut auto_split_controller: AutoSplitController, - region_read_progress: RegionReadProgressRegistry, collector_reg_handle: CollectorRegHandle, - store_id: u64, ) -> Result<(), io::Error> { if self.collect_tick_interval < cmp::min( @@ -625,9 +606,6 @@ where let load_base_split_check_interval = self .load_base_split_check_interval .div_duration_f64(tick_interval) as u64; - let report_min_resolved_ts_interval = self - .report_min_resolved_ts_interval - .div_duration_f64(tick_interval) as u64; let update_latency_stats_interval = self .inspect_latency_interval .div_duration_f64(tick_interval) as u64; @@ -686,12 +664,6 @@ where &mut region_cpu_records_collector, ); } - if is_enable_tick(timer_cnt, report_min_resolved_ts_interval) { - reporter.report_min_resolved_ts( - store_id, - region_read_progress.get_min_resolved_ts(), - ); - } if is_enable_tick(timer_cnt, update_latency_stats_interval) { reporter.update_latency_stats(timer_cnt); } @@ -1050,7 +1022,6 @@ where snap_mgr: SnapManager, remote: Remote, collector_reg_handle: CollectorRegHandle, - region_read_progress: RegionReadProgressRegistry, health_service: Option, coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -1060,16 +1031,10 @@ where let interval = store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT; let mut stats_monitor = StatsMonitor::new( interval, - cfg.report_min_resolved_ts_interval.0, cfg.inspect_interval.0, WrappedScheduler(scheduler.clone()), ); - if let Err(e) = stats_monitor.start( - auto_split_controller, - region_read_progress, - collector_reg_handle, - store_id, - ) { + if let Err(e) = stats_monitor.start(auto_split_controller, collector_reg_handle) { error!("failed to start stats collector, error = {:?}", e); } @@ -2686,8 +2651,6 @@ mod tests { use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; use tikv_util::worker::LazyWorker; - use crate::store::fsm::StoreMeta; - struct RunnerTest { store_stat: Arc>, stats_monitor: StatsMonitor>, @@ -2701,17 +2664,12 @@ mod tests { ) -> RunnerTest { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), - Duration::from_secs(0), Duration::from_secs(interval), WrappedScheduler(scheduler), ); - let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); if let Err(e) = stats_monitor.start( AutoSplitController::default(), - region_read_progress, CollectorRegHandle::new_for_test(), - 1, ) { error!("failed to start stats collector, error = {:?}", e); } diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index 0c594ab1d1d..64b58e0ed22 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -63,15 +63,14 @@ fn test_report_min_resolved_ts() { fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); fail::cfg("mock_min_resolved_ts_interval", "return(0)").unwrap(); let mut suite = TestSuite::new(1); - // default config is 1s assert_eq!( suite .cluster .cfg .tikv .raft_store - .report_min_resolved_ts_interval, - ReadableDuration::secs(1) + .pd_report_min_resolved_ts_interval, + ReadableDuration::millis(50) ); let region = suite.cluster.get_region(&[]); let ts1 = suite.cluster.pd_client.get_min_resolved_ts(); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 5eb7d97796e..0bb948f13c9 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -19,6 +19,7 @@ use engine_traits::{ CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, CF_DEFAULT, CF_RAFT, CF_WRITE, }; +use fail::fail_point; use file_system::IoRateLimiter; use futures::{executor::block_on, future::BoxFuture, StreamExt}; use grpcio::{ChannelBuilder, Environment}; @@ -173,9 +174,20 @@ pub fn new_tikv_config_with_api_ver(cluster_id: u64, api_ver: ApiVersion) -> Tik let mut cfg = TEST_CONFIG.clone(); cfg.server.cluster_id = cluster_id; cfg.storage.set_api_version(api_ver); + cfg.raft_store.pd_report_min_resolved_ts_interval = config(ReadableDuration::secs(1)); cfg } +fn config(interval: ReadableDuration) -> ReadableDuration { + fail_point!("mock_min_resolved_ts_interval", |_| { + ReadableDuration::millis(50) + }); + fail_point!("mock_min_resolved_ts_interval_disable", |_| { + ReadableDuration::millis(0) + }); + interval +} + // Create a base request. pub fn new_base_request(region_id: u64, epoch: RegionEpoch, read_quorum: bool) -> RaftCmdRequest { let mut req = RaftCmdRequest::default(); diff --git a/etc/config-template.toml b/etc/config-template.toml index 9e253e8ff79..75c7eab0c10 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -394,6 +394,9 @@ ## Store heartbeat tick interval for reporting to PD. # pd-store-heartbeat-tick-interval = "10s" +## Store min resolved ts tick interval for reporting to PD. +# pd-report-min-resolved-ts-interval = "1s" + ## The threshold of triggering Region split check. ## When Region size change exceeds this config, TiKV will check whether the Region should be split ## or not. To reduce the cost of scanning data in the checking process, you can set the value to diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 4bb75e0a95f..d49f5e50c0d 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -200,6 +200,7 @@ fn test_serde_custom_tikv_config() { region_compact_redundant_rows_percent: Some(33), pd_heartbeat_tick_interval: ReadableDuration::minutes(12), pd_store_heartbeat_tick_interval: ReadableDuration::secs(12), + pd_report_min_resolved_ts_interval: ReadableDuration::millis(233), notify_capacity: 12_345, snap_mgr_gc_tick_interval: ReadableDuration::minutes(12), snap_gc_timeout: ReadableDuration::hours(12), @@ -247,7 +248,6 @@ fn test_serde_custom_tikv_config() { io_reschedule_concurrent_max_count: 1234, io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), - report_min_resolved_ts_interval: ReadableDuration::millis(233), check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 94184def8fb..30a501b1cee 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -173,6 +173,7 @@ region-compact-min-redundant-rows = 999 region-compact-redundant-rows-percent = 33 pd-heartbeat-tick-interval = "12m" pd-store-heartbeat-tick-interval = "12s" +pd-report-min-resolved-ts-interval = "233ms" snap-mgr-gc-tick-interval = "12m" snap-gc-timeout = "12h" snap-wait-split-duration = "12h" @@ -228,7 +229,6 @@ reactive-memory-lock-tick-interval = "566ms" reactive-memory-lock-timeout-tick = 8 check-long-uncommitted-interval = "1s" long-uncommitted-base-threshold = "1s" -report-min-resolved-ts-interval = "233ms" report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" unreachable-backoff = "111s" From 04b857fa7fa6dc153fd2b82aac7009b9c4eb6aa5 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 24 Nov 2023 18:04:16 +0800 Subject: [PATCH 1027/1149] test: enable fail-point cases in test_kv_service for raftstore-v2 (#15479) ref tikv/tikv#15409 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- src/server/raftkv2/mod.rs | 16 ++++++++++++++-- tests/failpoints/cases/test_kv_service.rs | 23 +++++++++++++++-------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ca813e3988..2ebbbef2f8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,9 +31,9 @@ checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" [[package]] name = "afl" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330d7251127b228cb4187ac2373dc37f615d65199f93b5443edeeed839fff5df" +checksum = "8c80b57a86234ee3e9238f5f2d33d37f8fd5c7ff168c07f2d5147d410e86db33" dependencies = [ "home", "libc 0.2.146", @@ -3370,9 +3370,9 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -3380,9 +3380,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" dependencies = [ "autocfg", "num-integer", @@ -3402,9 +3402,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", ] diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index a9f7eb7586e..321a6614350 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -202,9 +202,21 @@ impl tikv_kv::Engine for RaftKv2 { let mut cmd = RaftCmdRequest::default(); cmd.set_header(header); cmd.set_requests(vec![req].into()); - let f = self.router.snapshot(cmd); + let res: tikv_kv::Result<()> = (|| { + fail_point!("raftkv_async_snapshot_err", |_| { + Err(box_err!("injected error for async_snapshot")) + }); + Ok(()) + })(); + let f = if res.is_err() { + None + } else { + Some(self.router.snapshot(cmd)) + }; + async move { - let res = f.await; + res?; + let res = f.unwrap().await; match res { Ok(snap) => { let elapse = begin_instant.saturating_elapsed_secs(); diff --git a/tests/failpoints/cases/test_kv_service.rs b/tests/failpoints/cases/test_kv_service.rs index 2ec1109edd4..c8777282787 100644 --- a/tests/failpoints/cases/test_kv_service.rs +++ b/tests/failpoints/cases/test_kv_service.rs @@ -9,14 +9,16 @@ use kvproto::{ }; use test_raftstore::{ configure_for_lease_read, must_kv_commit, must_kv_have_locks, must_kv_prewrite, - must_kv_prewrite_with, must_new_cluster_and_kv_client, must_new_cluster_mul, - new_server_cluster, try_kv_prewrite_with, try_kv_prewrite_with_impl, + must_kv_prewrite_with, must_new_cluster_mul, new_server_cluster, try_kv_prewrite_with, + try_kv_prewrite_with_impl, }; +use test_raftstore_macro::test_case; use tikv_util::{config::ReadableDuration, HandyRwLock}; -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_batch_get_memory_lock() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut req = BatchGetRequest::default(); req.set_context(ctx); @@ -32,9 +34,10 @@ fn test_batch_get_memory_lock() { fail::remove("raftkv_async_snapshot_err"); } -#[test] +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] fn test_kv_scan_memory_lock() { - let (_cluster, client, ctx) = must_new_cluster_and_kv_client(); + let (_cluster, client, ctx) = new_cluster(); let mut req = ScanRequest::default(); req.set_context(ctx); @@ -50,9 +53,10 @@ fn test_kv_scan_memory_lock() { fail::remove("raftkv_async_snapshot_err"); } -#[test] +#[test_case(test_raftstore::must_new_cluster_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_mul)] fn test_snapshot_not_block_grpc() { - let (cluster, leader, ctx) = must_new_cluster_mul(1); + let (cluster, leader, ctx) = new_cluster(1); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env) .keepalive_time(Duration::from_millis(500)) @@ -77,6 +81,8 @@ fn test_snapshot_not_block_grpc() { fail::remove("after-snapshot"); } +// the result notify mechanism is different in raft-v2, so no need to add a +// equivalent case for v2. #[test] fn test_undetermined_write_err() { let (cluster, leader, ctx) = must_new_cluster_mul(1); @@ -109,6 +115,7 @@ fn test_undetermined_write_err() { // The previous panic hasn't been captured. assert!(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| drop(cluster))).is_err()); } + #[test] fn test_stale_read_on_local_leader() { let mut cluster = new_server_cluster(0, 1); From 3f7c63646ef5ea842f7ce6552826976feda2f609 Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 24 Nov 2023 18:40:14 +0800 Subject: [PATCH 1028/1149] ctl: backoff load key range in finish flashback when meet `notLeader` or `regionNotFound` (#16058) close tikv/tikv#15712 Root: After `PrepareFlashback` the region and the region leader transfer, when executing `FinishFlashback` will meet `notLeader`. Since the tikv ctl retry `FinishFlashback` for the same peer, it just keeps doing useless retries. Solution: neet to support backoff load key range to identify peer in finish flashback when meet `notLeader` or `regionNotFound` Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/executor.rs | 10 +++++----- cmd/tikv-ctl/src/main.rs | 18 ++++++++++++++++-- src/server/debug.rs | 16 ++++++++++------ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index a20d6ce2602..3e4e505a32a 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -715,7 +715,7 @@ pub trait DebugExecutor { _key_range: KeyRange, _start_ts: u64, _commit_ts: u64, - ) -> Result<(), KeyRange>; + ) -> Result<(), (KeyRange, grpcio::Error)>; fn get_region_read_progress(&self, region_id: u64, log: bool, min_start_ts: u64); } @@ -948,7 +948,7 @@ impl DebugExecutor for DebugClient { key_range: KeyRange, start_ts: u64, commit_ts: u64, - ) -> Result<(), KeyRange> { + ) -> Result<(), (KeyRange, grpcio::Error)> { let mut req = FlashbackToVersionRequest::default(); req.set_version(version); req.set_region_id(region_id); @@ -963,7 +963,7 @@ impl DebugExecutor for DebugClient { "flashback key_range {:?} with start_ts {:?}, commit_ts {:?} need to retry, err is {:?}", key_range, start_ts, commit_ts, err ); - Err(key_range) + Err((key_range, err)) } } } @@ -1293,7 +1293,7 @@ where _key_range: KeyRange, _start_ts: u64, _commit_ts: u64, - ) -> Result<(), KeyRange> { + ) -> Result<(), (KeyRange, grpcio::Error)> { unimplemented!("only available for remote mode"); } @@ -1515,7 +1515,7 @@ impl DebugExecutor for DebuggerImplV2 { _key_range: KeyRange, _start_ts: u64, _commit_ts: u64, - ) -> Result<(), KeyRange> { + ) -> Result<(), (KeyRange, grpcio::Error)> { unimplemented!("only available for remote mode"); } diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index a3961bbc928..b57a99f8345 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -912,7 +912,7 @@ fn flashback_whole_cluster( .await { Ok(res) => { - if let Err(key_range) = res { + if let Err((key_range, _)) = res { // Retry specific key range to prepare flashback. let stale_key_range = (key_range.start_key.clone(), key_range.end_key.clone()); let mut key_range_to_prepare = key_range_to_prepare.write().unwrap(); @@ -992,7 +992,21 @@ fn flashback_whole_cluster( { Ok(res) => match res { Ok(_) => break, - Err(_) => { + Err((key_range, err)) => { + // Retry `NotLeader` or `RegionNotFound`. + if err.to_string().contains("not leader") || err.to_string().contains("not found") { + // When finished `PrepareFlashback`, the region may change leader in the `flashback in progress` + // Neet to retry specific key range to finish flashback. + let stale_key_range = (key_range.start_key.clone(), key_range.end_key.clone()); + let mut key_range_to_finish = key_range_to_finish.write().unwrap(); + // Remove stale key range. + key_range_to_finish.remove(&stale_key_range); + load_key_range(&pd_client, stale_key_range.0.clone(), stale_key_range.1.clone()) + .into_iter().for_each(|(key_range, region_info)| { + // Need to update `key_range_to_finish` to replace stale key range. + key_range_to_finish.insert(key_range, region_info); + }); + } thread::sleep(Duration::from_micros(WAIT_APPLY_FLASHBACK_STATE)); continue; } diff --git a/src/server/debug.rs b/src/server/debug.rs index 9e01852455c..70e1df855d5 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -1111,9 +1111,11 @@ async fn async_key_range_flashback_to_version ?resp.get_error(), "region_err" => ?resp.get_region_error()); - return Err(Error::FlashbackFailed( - "exec prepare flashback failed.".into(), - )); + return Err(Error::FlashbackFailed(format!( + "exec prepare flashback failed: resp err is: {:?}, region err is: {:?}", + resp.get_error(), + resp.get_region_error() + ))); } } else { let mut req = kvrpcpb::FlashbackToVersionRequest::new(); @@ -1127,9 +1129,11 @@ async fn async_key_range_flashback_to_version ?resp.get_error(), "region_err" => ?resp.get_region_error()); - return Err(Error::FlashbackFailed( - "exec finish flashback failed.".into(), - )); + return Err(Error::FlashbackFailed(format!( + "exec finish flashback failed: resp err is: {:?}, region err is: {:?}", + resp.get_error(), + resp.get_region_error() + ))); } } Ok(()) From 53e05485b0030f2b26905d4c83a3463be561ff49 Mon Sep 17 00:00:00 2001 From: tongjian Date: Mon, 27 Nov 2023 10:16:13 +0800 Subject: [PATCH 1029/1149] server: grpc metrics consider request group priority (#15911) ref tikv/tikv#15803 Signed-off-by: bufferflies <1045931706@qq.com> Signed-off-by: GitHub Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.json | 30 +++++++++++++ src/server/metrics.rs | 26 ++++++++++- src/server/service/batch.rs | 44 +++++++++++++++--- src/server/service/kv.rs | 65 +++++++++++++++++++++++---- src/storage/mod.rs | 75 ++++++++++++++++++++++--------- 5 files changed, 201 insertions(+), 39 deletions(-) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index b5d45d2fea0..f73a59cf377 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4990,6 +4990,16 @@ "metric": "tikv_grpc_msg_duration_seconds_bucket", "refId": "A", "step": 10 + }, + { + "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type,priority)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}}--{{priority}}", + "metric": "tikv_grpc_msg_duration_seconds_bucket", + "refId": "B", + "step": 10, + "hide": true } ], "thresholds": [], @@ -5200,6 +5210,16 @@ "legendFormat": "{{type}}", "refId": "A", "step": 10 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type,priority))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}--{{priority}}", + "refId": "B", + "step": 10, + "hide": true } ], "thresholds": [], @@ -5305,6 +5325,16 @@ "legendFormat": "{{type}}", "refId": "A", "step": 10 + }, + { + "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority) / sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}--{{priority}}", + "refId": "B", + "step": 10, + "hide": true } ], "thresholds": [], diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 122748cdfa9..cef725c3f28 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -99,6 +99,13 @@ make_auto_flush_static_metric! { fail, } + pub label_enum ResourcePriority { + high, + medium, + low, + unknown, + } + pub struct GcCommandCounterVec: LocalIntCounter { "type" => GcCommandKind, } @@ -134,6 +141,7 @@ make_auto_flush_static_metric! { pub struct GrpcMsgHistogramVec: LocalHistogram { "type" => GrpcTypeKind, + "priority" => ResourcePriority, } pub struct ReplicaReadLockCheckHistogramVec: LocalHistogram { @@ -234,7 +242,7 @@ lazy_static! { pub static ref GRPC_MSG_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( "tikv_grpc_msg_duration_seconds", "Bucketed histogram of grpc server messages", - &["type"], + &["type","priority"], exponential_buckets(5e-5, 2.0, 22).unwrap() // 50us ~ 104s ) .unwrap(); @@ -600,3 +608,19 @@ pub fn record_request_source_metrics(source: String, duration: Duration) { } }); } + +impl From for ResourcePriority { + fn from(priority: u64) -> Self { + // the mapping definition of priority in TIDB repo, + // see: https://github.com/tikv/tikv/blob/a0dbe2d0b893489015fc99ae73c6646f7989fe32/components/resource_control/src/resource_group.rs#L79-L89 + if priority == 0 { + Self::unknown + } else if priority < 6 { + Self::low + } else if priority < 11 { + Self::medium + } else { + Self::high + } + } +} diff --git a/src/server/service/batch.rs b/src/server/service/batch.rs index ba377bed4d2..3cc9a45e9dc 100644 --- a/src/server/service/batch.rs +++ b/src/server/service/batch.rs @@ -12,7 +12,7 @@ use tracker::{with_tls_tracker, RequestInfo, RequestType, Tracker, TrackerToken, use crate::{ server::{ - metrics::{GrpcTypeKind, REQUEST_BATCH_SIZE_HISTOGRAM_VEC}, + metrics::{GrpcTypeKind, ResourcePriority, REQUEST_BATCH_SIZE_HISTOGRAM_VEC}, service::kv::{batch_commands_response, GrpcRequestDuration, MeasuredSingleResponse}, }, storage::{ @@ -162,6 +162,7 @@ impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponse res: Result<(Option>, Statistics)>, begin: Instant, request_source: String, + resource_priority: ResourcePriority, ) { let mut resp = GetResponse::default(); if let Some(err) = extract_region_error(&res) { @@ -185,9 +186,13 @@ impl ResponseBatchConsumer<(Option>, Statistics)> for GetCommandResponse cmd: Some(batch_commands_response::response::Cmd::Get(resp)), ..Default::default() }; - let mesure = - GrpcRequestDuration::new(begin, GrpcTypeKind::kv_batch_get_command, request_source); - let task = MeasuredSingleResponse::new(id, res, mesure); + let measure = GrpcRequestDuration::new( + begin, + GrpcTypeKind::kv_batch_get_command, + request_source, + resource_priority, + ); + let task = MeasuredSingleResponse::new(id, res, measure); if self.tx.send_with(task, WakePolicy::Immediately).is_err() { error!("KvService response batch commands fail"); } @@ -201,6 +206,7 @@ impl ResponseBatchConsumer>> for GetCommandResponseConsumer { res: Result>>, begin: Instant, request_source: String, + resource_priority: ResourcePriority, ) { let mut resp = RawGetResponse::default(); if let Some(err) = extract_region_error(&res) { @@ -216,9 +222,13 @@ impl ResponseBatchConsumer>> for GetCommandResponseConsumer { cmd: Some(batch_commands_response::response::Cmd::RawGet(resp)), ..Default::default() }; - let mesure = - GrpcRequestDuration::new(begin, GrpcTypeKind::raw_batch_get_command, request_source); - let task = MeasuredSingleResponse::new(id, res, mesure); + let measure = GrpcRequestDuration::new( + begin, + GrpcTypeKind::raw_batch_get_command, + request_source, + resource_priority, + ); + let task = MeasuredSingleResponse::new(id, res, measure); if self.tx.send_with(task, WakePolicy::Immediately).is_err() { error!("KvService response batch commands fail"); } @@ -241,6 +251,15 @@ fn future_batch_get_command( .zip(gets.iter()) .map(|(id, req)| (*id, req.get_context().get_request_source().to_string())) .collect(); + + let group_priority = gets + .first() + .unwrap() + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); + let res = storage.batch_get_command( gets, requests, @@ -266,6 +285,7 @@ fn future_batch_get_command( begin_instant, GrpcTypeKind::kv_batch_get_command, source, + resource_priority, ); let task = MeasuredSingleResponse::new(id, res, measure); if tx.send_with(task, WakePolicy::Immediately).is_err() { @@ -292,6 +312,15 @@ fn future_batch_raw_get_command( .zip(gets.iter()) .map(|(id, req)| (*id, req.get_context().get_request_source().to_string())) .collect(); + + let group_priority = gets + .first() + .unwrap() + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); + let res = storage.raw_batch_get_command( gets, requests, @@ -312,6 +341,7 @@ fn future_batch_raw_get_command( begin_instant, GrpcTypeKind::raw_batch_get_command, source, + resource_priority, ); let task = MeasuredSingleResponse::new(id, res, measure); if tx.send_with(task, WakePolicy::Immediately).is_err() { diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 8426143d502..01aae59fe18 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -198,8 +198,10 @@ macro_rules! handle_request { let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority= ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -212,6 +214,7 @@ macro_rules! handle_request { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .$fn_name + .get(resource_group_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -430,6 +433,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .kv_prepare_flashback_to_version + .unknown .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -461,6 +465,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .kv_flashback_to_version + .unknown .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -480,9 +485,13 @@ impl Tikv for Service { forward_unary!(self.proxy, coprocessor, ctx, req, sink); let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = + ResourcePriority::from(resource_control_ctx.override_priority); } + GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) .inc(); @@ -495,6 +504,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .coprocessor + .get(resource_group_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -518,8 +528,11 @@ impl Tikv for Service { ) { let source = req.get_context().get_request_source().to_owned(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = + ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -533,6 +546,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .raw_coprocessor + .get(resource_group_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -584,6 +598,7 @@ impl Tikv for Service { sink.success(resp).await?; GRPC_MSG_HISTOGRAM_STATIC .unsafe_destroy_range + .unknown .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); ServerResult::Ok(()) @@ -607,8 +622,11 @@ impl Tikv for Service { ) { let begin_instant = Instant::now(); let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = &self.resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = + ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -628,6 +646,7 @@ impl Tikv for Service { Ok(_) => { GRPC_MSG_HISTOGRAM_STATIC .coprocessor_stream + .get(resource_group_priority) .observe(begin_instant.saturating_elapsed().as_secs_f64()); let _ = sink.close().await; } @@ -865,6 +884,7 @@ impl Tikv for Service { } GRPC_MSG_HISTOGRAM_STATIC .split_region + .unknown .observe(begin_instant.saturating_elapsed().as_secs_f64()); sink.success(resp).await?; ServerResult::Ok(()) @@ -1017,6 +1037,7 @@ impl Tikv for Service { let regions = resp.await?; GRPC_MSG_HISTOGRAM_STATIC .check_leader + .unknown .observe(begin_instant.saturating_elapsed().as_secs_f64()); let mut resp = CheckLeaderResponse::default(); resp.set_ts(ts); @@ -1029,6 +1050,11 @@ impl Tikv for Service { } return Err(Error::from(e)); } + let elapsed = begin_instant.saturating_elapsed(); + GRPC_MSG_HISTOGRAM_STATIC + .check_leader + .unknown + .observe(elapsed.as_secs_f64()); ServerResult::Ok(()) } .map_err(move |e| { @@ -1098,6 +1124,7 @@ fn response_batch_commands_request( begin: Instant, label: GrpcTypeKind, source: String, + resource_priority: ResourcePriority, ) where MemoryTraceGuard: From, F: Future> + Send + 'static, @@ -1108,6 +1135,7 @@ fn response_batch_commands_request( begin, label, source, + resource_priority, }; let task = MeasuredSingleResponse::new(id, resp, measure); if let Err(e) = tx.send_with(task, WakePolicy::Immediately) { @@ -1146,15 +1174,18 @@ fn handle_batch_commands_request( // For some invalid requests. let begin_instant = Instant::now(); let resp = future::ok(batch_commands_response::Response::default()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default()); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::invalid, String::default(), ResourcePriority::unknown); }, Some(batch_commands_request::request::Cmd::Get(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } + GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ resource_control_ctx.get_resource_group_name()]) .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) @@ -1166,13 +1197,15 @@ fn handle_batch_commands_request( let resp = future_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::Get)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.kv_get.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::kv_get, source,resource_group_priority); } }, Some(batch_commands_request::request::Cmd::RawGet(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1187,17 +1220,19 @@ fn handle_batch_commands_request( let resp = future_raw_get(storage, req) .map_ok(oneof!(batch_commands_response::response::Cmd::RawGet)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.raw_get.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::raw_get, source,resource_group_priority); } }, Some(batch_commands_request::request::Cmd::Coprocessor(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority ); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) - .inc(); + .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .inc(); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); let resp = future_copr(copr, Some(peer.to_string()), req) @@ -1205,7 +1240,7 @@ fn handle_batch_commands_request( resp.map(oneof!(batch_commands_response::response::Cmd::Coprocessor)) }) .map_err(|_| GRPC_MSG_FAIL_COUNTER.coprocessor.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::coprocessor, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::coprocessor, source,resource_group_priority); }, Some(batch_commands_request::request::Cmd::Empty(req)) => { let begin_instant = Instant::now(); @@ -1222,12 +1257,15 @@ fn handle_batch_commands_request( begin_instant, GrpcTypeKind::invalid, String::default(), + ResourcePriority::unknown, ); } $(Some(batch_commands_request::request::Cmd::$cmd(req)) => { let resource_control_ctx = req.get_context().get_resource_control_context(); + let mut resource_group_priority = ResourcePriority::unknown; if let Some(resource_manager) = resource_manager { resource_manager.consume_penalty(resource_control_ctx); + resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC .with_label_values(&[resource_control_ctx.get_resource_group_name()]) @@ -1237,7 +1275,7 @@ fn handle_batch_commands_request( let resp = $future_fn($($arg,)* req) .map_ok(oneof!(batch_commands_response::response::Cmd::$cmd)) .map_err(|_| GRPC_MSG_FAIL_COUNTER.$metric_name.inc()); - response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source); + response_batch_commands_request(id, resp, tx.clone(), begin_instant, GrpcTypeKind::$metric_name, source,resource_group_priority); })* Some(batch_commands_request::request::Cmd::Import(_)) => unimplemented!(), } @@ -1287,10 +1325,12 @@ fn handle_measures_for_batch_commands(measures: &mut MeasuredBatchResponse) { label, begin, source, + resource_priority, } = measure; let elapsed = now.saturating_duration_since(begin); GRPC_MSG_HISTOGRAM_STATIC .get(label) + .get(resource_priority) .observe(elapsed.as_secs_f64()); record_request_source_metrics(source, elapsed); let exec_details = resp.cmd.as_mut().and_then(|cmd| match cmd { @@ -2233,13 +2273,20 @@ pub struct GrpcRequestDuration { pub begin: Instant, pub label: GrpcTypeKind, pub source: String, + pub resource_priority: ResourcePriority, } impl GrpcRequestDuration { - pub fn new(begin: Instant, label: GrpcTypeKind, source: String) -> Self { + pub fn new( + begin: Instant, + label: GrpcTypeKind, + source: String, + resource_priority: ResourcePriority, + ) -> Self { GrpcRequestDuration { begin, label, source, + resource_priority, } } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c89a767a80b..2bdc07625ee 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -121,7 +121,7 @@ pub use self::{ use self::{kv::SnapContext, test_util::latest_feature_gate}; use crate::{ read_pool::{ReadPool, ReadPoolHandle}, - server::lock_manager::waiter_manager, + server::{lock_manager::waiter_manager, metrics::ResourcePriority}, storage::{ config::Config, kv::{with_tls_engine, Modify, WriteData}, @@ -776,17 +776,20 @@ impl Storage { let priority = requests[0].get_context().get_priority(); let metadata = TaskMetadata::from_ctx(requests[0].get_context().get_resource_control_context()); + let resource_group_name = requests[0] + .get_context() + .get_resource_control_context() + .get_resource_group_name(); + let group_priority = requests[0] + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); let resource_limiter = self.resource_manager.as_ref().and_then(|r| { r.get_resource_limiter( - requests[0] - .get_context() - .get_resource_control_context() - .get_resource_group_name(), + resource_group_name, requests[0].get_context().get_request_source(), - requests[0] - .get_context() - .get_resource_control_context() - .get_override_priority(), + group_priority, ) }); let concurrency_manager = self.concurrency_manager.clone(); @@ -862,7 +865,7 @@ impl Storage { snap_ctx } Err(e) => { - consumer.consume(id, Err(e), begin_instant, source); + consumer.consume(id, Err(e), begin_instant, source, resource_priority); continue; } }; @@ -901,7 +904,13 @@ impl Storage { ) = req_snap; let snap_res = snap.await; if let Err(e) = deadline.check() { - consumer.consume(id, Err(Error::from(e)), begin_instant, source); + consumer.consume( + id, + Err(Error::from(e)), + begin_instant, + source, + resource_priority, + ); continue; } @@ -933,6 +942,7 @@ impl Storage { .map(|v| (v, stat)), begin_instant, source, + resource_priority, ); } Err(e) => { @@ -941,12 +951,13 @@ impl Storage { Err(Error::from(txn::Error::from(e))), begin_instant, source, + resource_priority, ); } } }), Err(e) => { - consumer.consume(id, Err(e), begin_instant, source); + consumer.consume(id, Err(e), begin_instant, source, resource_priority); } } } @@ -1756,17 +1767,20 @@ impl Storage { // all requests in a batch have the same region, epoch, term, replica_read let priority = gets[0].get_context().get_priority(); let metadata = TaskMetadata::from_ctx(gets[0].get_context().get_resource_control_context()); + let resource_group_name = gets[0] + .get_context() + .get_resource_control_context() + .get_resource_group_name(); + let group_priority = gets[0] + .get_context() + .get_resource_control_context() + .get_override_priority(); + let resource_priority = ResourcePriority::from(group_priority); let resource_limiter = self.resource_manager.as_ref().and_then(|r| { r.get_resource_limiter( - gets[0] - .get_context() - .get_resource_control_context() - .get_resource_group_name(), + resource_group_name, gets[0].get_context().get_request_source(), - gets[0] - .get_context() - .get_resource_control_context() - .get_override_priority(), + group_priority, ) }); let priority_tag = get_priority_tag(priority); @@ -1848,6 +1862,7 @@ impl Storage { .map_err(Error::from), begin_instant, ctx.take_request_source(), + resource_priority, ); tls_collect_read_flow( ctx.get_region_id(), @@ -1863,12 +1878,19 @@ impl Storage { Err(e), begin_instant, ctx.take_request_source(), + resource_priority, ); } } } Err(e) => { - consumer.consume(id, Err(e), begin_instant, ctx.take_request_source()); + consumer.consume( + id, + Err(e), + begin_instant, + ctx.take_request_source(), + resource_priority, + ); } } } @@ -3445,6 +3467,7 @@ pub trait ResponseBatchConsumer: Send { res: Result, begin: Instant, request_source: String, + resource_priority: ResourcePriority, ); } @@ -3745,6 +3768,7 @@ pub mod test_util { res: Result<(Option>, Statistics)>, _: Instant, _source: String, + _resource_priority: ResourcePriority, ) { self.data.lock().unwrap().push(GetResult { id, @@ -3754,7 +3778,14 @@ pub mod test_util { } impl ResponseBatchConsumer>> for GetConsumer { - fn consume(&self, id: u64, res: Result>>, _: Instant, _source: String) { + fn consume( + &self, + id: u64, + res: Result>>, + _: Instant, + _source: String, + _resource_priority: ResourcePriority, + ) { self.data.lock().unwrap().push(GetResult { id, res }); } } From 3b30d692c5ed0c5bdd8922a4129a715300182ed1 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 27 Nov 2023 12:08:43 +0800 Subject: [PATCH 1030/1149] cdc: limit pending scan tasks (#16048) close tikv/tikv#16035 When TiCDC starts changefeed, it may send numerous requests leading to the creation of numerous scan tasks. However, the initial surge of scan tasks may cause OOM. This commit aims to resolve the issue by implementing a mechanism that allows TiKV to reject requests when the number of pending tasks reaches a certain limit. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/endpoint.rs | 155 +++++++++++++++++++-- src/config/mod.rs | 24 ++++ tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 5 +- 4 files changed, 170 insertions(+), 15 deletions(-) diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index e62650c77c6..e1a985d4e98 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -5,7 +5,10 @@ use std::{ cmp::{Ord, Ordering as CmpOrdering, PartialOrd, Reverse}, collections::BinaryHeap, fmt, - sync::{Arc, Mutex as StdMutex}, + sync::{ + atomic::{AtomicIsize, Ordering}, + Arc, Mutex as StdMutex, + }, time::Duration, }; @@ -382,6 +385,8 @@ pub struct Endpoint { // Incremental scan workers: Runtime, + // The total number of scan tasks including running and pending. + scan_task_counter: Arc, scan_concurrency_semaphore: Arc, scan_speed_limiter: Limiter, fetch_speed_limiter: Limiter, @@ -475,6 +480,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint, E: KvEngine, S: StoreRegionMeta> Endpoint self.config.incremental_scan_concurrency_limit as isize { + debug!("cdc rejects registration, too many scan tasks"; + "region_id" => region_id, + "conn_id" => ?conn_id, + "req_id" => request_id, + "scan_task_count" => scan_task_count, + "incremental_scan_concurrency_limit" => self.config.incremental_scan_concurrency_limit, + ); + // To avoid OOM (e.g., https://github.com/tikv/tikv/issues/16035), + // TiKV needs to reject and return error immediately. + // + // TODO: TiKV is supposed to return a "busy" error, but for the sake + // of compatibility, it returns a "region not found" error. + let _ = downstream.sink_region_not_found(region_id); + return; + } + let txn_extra_op = match self.store_meta.lock().unwrap().reader(region_id) { Some(reader) => reader.txn_extra_op.clone(), None => { @@ -842,6 +870,7 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint Task { + fn set_conn_version_task(conn_id: ConnId, version: semver::Version) -> Task { Task::SetConnVersion { conn_id, version, @@ -1541,7 +1570,7 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task( + suite.run(set_conn_version_task( conn_id, FeatureGate::batch_resolved_ts(), )); @@ -1828,7 +1857,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task(conn_id, semver::Version::new(0, 0, 0))); + suite.run(set_conn_version_task( + conn_id, + semver::Version::new(0, 0, 0), + )); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -1880,7 +1912,7 @@ mod tests { // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2016,6 +2048,97 @@ mod tests { } } + #[test] + fn test_too_many_scan_tasks() { + let cfg = CdcConfig { + min_ts_interval: ReadableDuration(Duration::from_secs(60)), + incremental_scan_concurrency: 1, + incremental_scan_concurrency_limit: 1, + ..Default::default() + }; + let mut suite = mock_endpoint(&cfg, None, ApiVersion::V1); + + // Pause scan task runtime. + suite.endpoint.workers = Builder::new_multi_thread() + .worker_threads(1) + .build() + .unwrap(); + let (pause_tx, pause_rx) = std::sync::mpsc::channel::<()>(); + suite.endpoint.workers.spawn(async move { + let _ = pause_rx.recv(); + }); + + suite.add_region(1, 100); + let quota = Arc::new(MemoryQuota::new(usize::MAX)); + let (tx, mut rx) = channel::channel(1, quota); + let mut rx = rx.drain(); + + let conn = Conn::new(tx, String::new()); + let conn_id = conn.get_id(); + suite.run(Task::OpenConn { conn }); + + // Enable batch resolved ts in the test. + let version = FeatureGate::batch_resolved_ts(); + suite.run(set_conn_version_task(conn_id, version)); + + let mut req_header = Header::default(); + req_header.set_cluster_id(0); + let mut req = ChangeDataRequest::default(); + req.set_region_id(1); + req.set_request_id(1); + let region_epoch = req.get_region_epoch().clone(); + let downstream = Downstream::new( + "".to_string(), + region_epoch.clone(), + 1, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + assert_eq!(suite.endpoint.capture_regions.len(), 1); + + // Test too many scan tasks error. + req.set_request_id(2); + let downstream = Downstream::new( + "".to_string(), + region_epoch, + 2, + conn_id, + ChangeDataRequestKvApi::TiDb, + false, + ObservedRange::default(), + ); + suite.run(Task::Register { + request: req.clone(), + downstream, + conn_id, + }); + let cdc_event = channel::recv_timeout(&mut rx, Duration::from_millis(500)) + .unwrap() + .unwrap(); + if let CdcEvent::Event(mut e) = cdc_event.0 { + assert_eq!(e.region_id, 1); + assert_eq!(e.request_id, 2); + let event = e.event.take().unwrap(); + match event { + Event_oneof_event::Error(err) => { + assert!(err.has_region_not_found()); + } + other => panic!("unknown event {:?}", other), + } + } else { + panic!("unknown cdc event {:?}", cdc_event); + } + + drop(pause_tx); + } + #[test] fn test_raw_causal_min_ts() { let sleep_interval = Duration::from_secs(1); @@ -2062,7 +2185,7 @@ mod tests { // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2151,7 +2274,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task(conn_id, semver::Version::new(4, 0, 5))); + suite.run(set_conn_version_task( + conn_id, + semver::Version::new(4, 0, 5), + )); req.set_region_id(3); req.set_request_id(3); @@ -2222,7 +2348,10 @@ mod tests { let conn = Conn::new(tx, String::new()); let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); - suite.run(set_conn_verion_task(conn_id, semver::Version::new(0, 0, 0))); + suite.run(set_conn_version_task( + conn_id, + semver::Version::new(0, 0, 0), + )); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2375,7 +2504,7 @@ mod tests { let conn_id = conn.get_id(); suite.run(Task::OpenConn { conn }); let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); for region_id in region_ids { suite.add_region(region_id, 100); @@ -2488,7 +2617,7 @@ mod tests { let conn_a = Conn::new(tx1, String::new()); let conn_id_a = conn_a.get_id(); suite.run(Task::OpenConn { conn: conn_a }); - suite.run(set_conn_verion_task( + suite.run(set_conn_version_task( conn_id_a, semver::Version::new(0, 0, 0), )); @@ -2499,7 +2628,7 @@ mod tests { let conn_b = Conn::new(tx2, String::new()); let conn_id_b = conn_b.get_id(); suite.run(Task::OpenConn { conn: conn_b }); - suite.run(set_conn_verion_task( + suite.run(set_conn_version_task( conn_id_b, semver::Version::new(0, 0, 0), )); @@ -2656,7 +2785,7 @@ mod tests { suite.run(Task::OpenConn { conn }); // Enable batch resolved ts in the test. let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); @@ -2749,7 +2878,7 @@ mod tests { suite.run(Task::OpenConn { conn }); let version = FeatureGate::batch_resolved_ts(); - suite.run(set_conn_verion_task(conn_id, version)); + suite.run(set_conn_version_task(conn_id, version)); let mut req_header = Header::default(); req_header.set_cluster_id(0); diff --git a/src/config/mod.rs b/src/config/mod.rs index 27f38abee4a..a862d01ace4 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2956,7 +2956,12 @@ pub struct CdcConfig { // TODO(hi-rustin): Consider resizing the thread pool based on `incremental_scan_threads`. #[online_config(skip)] pub incremental_scan_threads: usize, + // The number of scan tasks that is allowed to run concurrently. pub incremental_scan_concurrency: usize, + // The number of scan tasks that is allowed to be created. In other words, + // there will be at most `incremental_scan_concurrency_limit - incremental_scan_concurrency` + // number of scan tasks that is waitting to run. + pub incremental_scan_concurrency_limit: usize, /// Limit scan speed based on disk I/O traffic. pub incremental_scan_speed_limit: ReadableSize, /// Limit scan speed based on memory accesing traffic. @@ -2999,6 +3004,8 @@ impl Default for CdcConfig { incremental_scan_threads: 4, // At most 6 concurrent running tasks. incremental_scan_concurrency: 6, + // At most 10000 tasks can exist simultaneously. + incremental_scan_concurrency_limit: 10000, // TiCDC requires a SSD, the typical write speed of SSD // is more than 500MB/s, so 128MB/s is enough. incremental_scan_speed_limit: ReadableSize::mb(128), @@ -3040,6 +3047,14 @@ impl CdcConfig { ); self.incremental_scan_concurrency = self.incremental_scan_threads } + if self.incremental_scan_concurrency_limit < self.incremental_scan_concurrency { + warn!( + "cdc.incremental-scan-concurrency-limit must be larger than cdc.incremental-scan-concurrency, + change it to {}", + self.incremental_scan_concurrency + ); + self.incremental_scan_concurrency_limit = self.incremental_scan_concurrency + } if self.incremental_scan_ts_filter_ratio < 0.0 || self.incremental_scan_ts_filter_ratio > 1.0 { @@ -6806,6 +6821,15 @@ mod tests { let mut cfg: TikvConfig = toml::from_str(content).unwrap(); cfg.validate().unwrap(); + let content = r#" + [cdc] + incremental-scan-concurrency = 6 + incremental-scan-concurrency-limit = 0 + "#; + let mut cfg: TikvConfig = toml::from_str(content).unwrap(); + cfg.validate().unwrap(); + assert!(cfg.cdc.incremental_scan_concurrency_limit >= cfg.cdc.incremental_scan_concurrency); + let content = r#" [storage] engine = "partitioned-raft-kv" diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index d49f5e50c0d..5e7e4529c40 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -845,6 +845,7 @@ fn test_serde_custom_tikv_config() { hibernate_regions_compatible: false, incremental_scan_threads: 3, incremental_scan_concurrency: 4, + incremental_scan_concurrency_limit: 5, incremental_scan_speed_limit: ReadableSize(7), incremental_fetch_speed_limit: ReadableSize(8), incremental_scan_ts_filter_ratio: 0.7, diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 30a501b1cee..d1e83663c24 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -16,8 +16,8 @@ max-backups = 2 max-days = 3 [memory] -enable-heap-profiling = false -profiling-sample-per-bytes = "1MB" +enable-heap-profiling = false +profiling-sample-per-bytes = "1MB" [readpool.unified] min-thread-count = 5 @@ -700,6 +700,7 @@ old-value-cache-size = 0 hibernate-regions-compatible = false incremental-scan-threads = 3 incremental-scan-concurrency = 4 +incremental-scan-concurrency-limit = 5 incremental-scan-speed-limit = 7 incremental-fetch-speed-limit = 8 incremental-scan-ts-filter-ratio = 0.7 From 9711e316e3c0581b3703ae2b485ef9abdbbb718f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 27 Nov 2023 14:37:15 +0800 Subject: [PATCH 1031/1149] raftstore: update apply state even if peer is removed (#16060) close tikv/tikv#16069, close pingcap/tidb#48802 When a peer is removed, it is necessary to update its apply state because this peer may be simultaneously taking a snapshot. An outdated apply state will invalidate the coprocessor cache assumption and potentially lead to a violation of linearizability (returning stale cache). Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/apply.rs | 146 +++++++++++++++++++- 1 file changed, 143 insertions(+), 3 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 339dff68e76..1639f441e38 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -662,9 +662,7 @@ where results: VecDeque>, ) { if self.host.pre_persist(&delegate.region, true, None) { - if !delegate.pending_remove { - delegate.maybe_write_apply_state(self); - } + delegate.maybe_write_apply_state(self); self.commit_opt(delegate, false); } else { debug!("do not persist when finish_for"; @@ -5532,6 +5530,21 @@ mod tests { ) } + fn cb_conf_change( + idx: u64, + term: u64, + tx: Sender, + ) -> Proposal> { + proposal( + true, + idx, + term, + Callback::write(Box::new(move |resp: WriteResponse| { + tx.send(resp.response).unwrap(); + })), + ) + } + struct EntryBuilder { entry: Entry, req: RaftCmdRequest, @@ -5659,6 +5672,14 @@ mod tests { self } + fn conf_change(mut self, changes: Vec) -> EntryBuilder { + let mut req = AdminRequest::default(); + req.set_cmd_type(AdminCmdType::ChangePeerV2); + req.mut_change_peer_v2().set_changes(changes.into()); + self.req.set_admin_request(req); + self + } + fn build(mut self) -> Entry { self.entry .set_data(self.req.write_to_bytes().unwrap().into()); @@ -7656,6 +7677,125 @@ mod tests { system.shutdown(); } + // When a peer is removed, it is necessary to update its apply state because + // this peer may be simultaneously taking a snapshot. An outdated apply state + // invalidates the coprocessor cache assumption (apply state must match data + // in the snapshot) and potentially lead to a violation of linearizability + // (returning stale cache). + #[test] + fn test_conf_change_remove_node_update_apply_state() { + let (_path, engine) = create_tmp_engine("test-delegate"); + let (_import_dir, importer) = create_tmp_importer("test-delegate"); + let peer_id = 3; + let mut reg = Registration { + id: peer_id, + term: 1, + ..Default::default() + }; + reg.region.set_id(1); + reg.region.set_end_key(b"k5".to_vec()); + reg.region.mut_region_epoch().set_version(3); + let peers = vec![new_peer(2, 3), new_peer(4, 5), new_learner_peer(6, 7)]; + reg.region.set_peers(peers.into()); + let (tx, apply_res_rx) = mpsc::channel(); + let sender = Box::new(TestNotifier { tx }); + let coprocessor_host = CoprocessorHost::::default(); + let (region_scheduler, _) = dummy_scheduler(); + let cfg = Arc::new(VersionTrack::new(Config::default())); + let (router, mut system) = create_apply_batch_system(&cfg.value(), None); + let pending_create_peers = Arc::new(Mutex::new(HashMap::default())); + let builder = super::Builder:: { + tag: "test-store".to_owned(), + cfg, + sender, + importer, + region_scheduler, + coprocessor_host, + engine: engine.clone(), + router: router.clone(), + store_id: 2, + pending_create_peers, + }; + system.spawn("test-conf-change".to_owned(), builder); + + router.schedule_task(1, Msg::Registration(reg.dup())); + + let mut index_id = 1; + let epoch = reg.region.get_region_epoch().to_owned(); + + // Write some data. + let (capture_tx, capture_rx) = mpsc::channel(); + let put_entry = EntryBuilder::new(index_id, 1) + .put(b"k1", b"v1") + .epoch(epoch.get_conf_ver(), epoch.get_version()) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 1, + vec![put_entry], + vec![cb(index_id, 1, capture_tx)], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + let initial_state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap(); + assert_ne!(initial_state.get_applied_index(), 0); + match apply_res_rx.recv_timeout(Duration::from_secs(3)) { + Ok(PeerMsg::ApplyRes { + res: TaskRes::Apply(apply_res), + }) => assert_eq!(apply_res.apply_state, initial_state), + e => panic!("unexpected result: {:?}", e), + } + index_id += 1; + + // Remove itself. + let (capture_tx, capture_rx) = mpsc::channel(); + let mut remove_node = ChangePeerRequest::default(); + remove_node.set_change_type(ConfChangeType::RemoveNode); + remove_node.set_peer(new_peer(2, 3)); + let conf_change = EntryBuilder::new(index_id, 1) + .conf_change(vec![remove_node]) + .epoch(epoch.get_conf_ver(), epoch.get_version()) + .build(); + router.schedule_task( + 1, + Msg::apply(apply( + peer_id, + 1, + 1, + vec![conf_change], + vec![cb_conf_change(index_id, 1, capture_tx)], + )), + ); + let resp = capture_rx.recv_timeout(Duration::from_secs(3)).unwrap(); + assert!(!resp.get_header().has_error(), "{:?}", resp); + + let apply_state: RaftApplyState = engine + .get_msg_cf(CF_RAFT, &keys::apply_state_key(1)) + .unwrap() + .unwrap(); + match apply_res_rx.recv_timeout(Duration::from_secs(3)) { + Ok(PeerMsg::ApplyRes { + res: TaskRes::Apply(apply_res), + }) => assert_eq!(apply_res.apply_state, apply_state), + e => panic!("unexpected result: {:?}", e), + } + assert!( + apply_state.get_applied_index() > initial_state.get_applied_index(), + "\n{:?}\n{:?}", + apply_state, + initial_state + ); + + system.shutdown(); + } + #[test] fn pending_cmd_leak() { let res = panic_hook::recover_safe(|| { From 88542955b6953815e1f5ca53071f60084f61632d Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 27 Nov 2023 15:52:15 +0800 Subject: [PATCH 1032/1149] sst_importer: Use generic sst reader for importer (#16059) ref tikv/tikv#15986 use generic sst reader for importer Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1 + components/engine_panic/src/sst.rs | 10 +- components/engine_rocks/src/encryption.rs | 13 +- components/engine_rocks/src/lib.rs | 6 +- components/engine_rocks/src/sst.rs | 54 +++--- components/engine_traits/src/sst.rs | 6 +- components/engine_traits_tests/src/sst.rs | 13 +- components/raftstore-v2/src/batch/store.rs | 8 +- components/raftstore-v2/src/fsm/apply.rs | 2 +- components/raftstore-v2/src/operation/mod.rs | 4 +- components/raftstore-v2/src/raft/apply.rs | 6 +- components/raftstore-v2/src/worker/tablet.rs | 4 +- .../raftstore/src/store/compaction_guard.rs | 3 +- components/raftstore/src/store/fsm/apply.rs | 8 +- components/raftstore/src/store/fsm/store.rs | 6 +- .../raftstore/src/store/worker/cleanup.rs | 4 +- .../raftstore/src/store/worker/cleanup_sst.rs | 11 +- components/server/src/server.rs | 2 +- components/server/src/server2.rs | 2 +- components/sst_importer/src/import_file.rs | 34 ++-- components/sst_importer/src/sst_importer.rs | 181 +++++++++++------- components/sst_importer/src/sst_writer.rs | 5 +- components/test_raftstore/src/server.rs | 2 +- components/tikv_kv/Cargo.toml | 1 + components/tikv_kv/src/rocksdb_engine.rs | 5 +- src/import/sst_service.rs | 12 +- src/server/node.rs | 4 +- src/server/raftkv2/node.rs | 4 +- 28 files changed, 229 insertions(+), 182 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ebbbef2f8a..d191ca34188 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6556,6 +6556,7 @@ version = "0.1.0" dependencies = [ "backtrace", "collections", + "encryption", "engine_panic", "engine_rocks", "engine_test", diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index 119cd5884a3..2e51c0b87b5 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -18,18 +18,18 @@ impl SstExt for PanicEngine { pub struct PanicSstReader; impl SstReader for PanicSstReader { - fn open(path: &str) -> Result { - panic!() - } - fn open_encrypted( + fn open( path: &str, - mgr: Arc, + mgr: Option>, ) -> Result { panic!() } fn verify_checksum(&self) -> Result<()> { panic!() } + fn kv_count_and_size(&self) -> (u64, u64) { + panic!() + } } impl RefIterable for PanicSstReader { diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 4dbe3ab10d2..58d359b39df 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -2,7 +2,6 @@ use std::{io::Result, sync::Arc}; -use encryption::{self, DataKeyManager}; use engine_traits::{EncryptionKeyManager, EncryptionMethod, FileEncryptionInfo}; use rocksdb::{ DBEncryptionMethod, EncryptionKeyManager as DBEncryptionKeyManager, @@ -12,16 +11,16 @@ use rocksdb::{ use crate::{r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. -pub(crate) fn get_env( +pub(crate) fn get_env( base_env: Option>, - key_manager: Option>, -) -> engine_traits::Result> { - let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); + key_manager: Option>, +) -> engine_traits::Result>> { if let Some(manager) = key_manager { - Ok(Arc::new( + let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); + Ok(Some(Arc::new( Env::new_key_managed_encrypted_env(base_env, WrappedEncryptionKeyManager { manager }) .map_err(r2e)?, - )) + ))) } else { Ok(base_env) } diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index b5561b3de42..3226a4592f0 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -117,10 +117,10 @@ pub use flow_control_factors::*; pub mod raw; -pub fn get_env( - key_manager: Option>, +pub fn get_env( + key_manager: Option>, limiter: Option>, ) -> engine_traits::Result> { let env = encryption::get_env(None /* base_env */, key_manager)?; - file_system::get_env(Some(env), limiter) + file_system::get_env(env, limiter) } diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 145fa9a7bce..0a234983aa3 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -3,21 +3,18 @@ use std::{path::PathBuf, sync::Arc}; use engine_traits::{ - EncryptionKeyManager, Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, - SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, + Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstCompressionType, + SstExt, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, }; use fail::fail_point; -use kvproto::import_sstpb::SstMeta; +use file_system::get_io_rate_limiter; use rocksdb::{ rocksdb::supported_compression, ColumnFamilyOptions, DBCompressionType, DBIterator, Env, EnvOptions, ExternalSstFileInfo as RawExternalSstFileInfo, SequentialFile, SstFileReader, SstFileWriter, DB, }; -use tikv_util::box_err; -use crate::{ - encryption::WrappedEncryptionKeyManager, engine::RocksEngine, options::RocksReadOptions, r2e, -}; +use crate::{engine::RocksEngine, get_env, options::RocksReadOptions, r2e}; impl SstExt for RocksEngine { type SstReader = RocksSstReader; @@ -30,19 +27,6 @@ pub struct RocksSstReader { } impl RocksSstReader { - pub fn sst_meta_info(&self, sst: SstMeta) -> SstMetaInfo { - let mut meta = SstMetaInfo { - total_kvs: 0, - total_bytes: 0, - meta: sst, - }; - self.inner.read_table_properties(|p| { - meta.total_kvs = p.num_entries(); - meta.total_bytes = p.raw_key_size() + p.raw_value_size(); - }); - meta - } - pub fn open_with_env(path: &str, env: Option>) -> Result { let mut cf_options = ColumnFamilyOptions::new(); if let Some(env) = env { @@ -63,20 +47,26 @@ impl RocksSstReader { } impl SstReader for RocksSstReader { - fn open(path: &str) -> Result { - Self::open_with_env(path, None) - } - fn open_encrypted(path: &str, mgr: Arc) -> Result { - let env = Env::new_key_managed_encrypted_env( - Arc::default(), - WrappedEncryptionKeyManager::new(mgr), - ) - .map_err(|err| Error::Other(box_err!("failed to open encrypted env: {}", err)))?; - Self::open_with_env(path, Some(Arc::new(env))) + fn open( + path: &str, + mgr: Option>, + ) -> Result { + let env = get_env(mgr, get_io_rate_limiter())?; + Self::open_with_env(path, Some(env)) } + fn verify_checksum(&self) -> Result<()> { - self.inner.verify_checksum().map_err(r2e)?; - Ok(()) + self.inner.verify_checksum().map_err(r2e) + } + + fn kv_count_and_size(&self) -> (u64, u64) { + let mut count = 0; + let mut bytes = 0; + self.inner.read_table_properties(|p| { + count = p.num_entries(); + bytes = p.raw_key_size() + p.raw_value_size(); + }); + (count, bytes) } } diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index 4a728df1e97..dccd3a2523d 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -20,10 +20,10 @@ pub trait SstExt: Sized { } /// SstReader is used to read an SST file. -pub trait SstReader: RefIterable + Sized { - fn open(path: &str) -> Result; - fn open_encrypted(path: &str, mgr: Arc) -> Result; +pub trait SstReader: RefIterable + Sized + Send { + fn open(path: &str, mgr: Option>) -> Result; fn verify_checksum(&self) -> Result<()>; + fn kv_count_and_size(&self) -> (u64, u64); } /// SstWriter is used to create sst files that can be added to database later. diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index 26ed686aad4..629c81df528 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -4,6 +4,7 @@ use std::fs; +use encryption::DataKeyManager; use engine_test::kv::KvTestEngine; use engine_traits::{ Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstExt, SstReader, @@ -48,7 +49,7 @@ fn basic() -> Result<()> { sst_writer.put(b"k1", b"v1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -77,7 +78,7 @@ fn forward() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -114,7 +115,7 @@ fn reverse() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_last()?; @@ -152,7 +153,7 @@ fn delete() -> Result<()> { sst_writer.delete(b"k1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -206,7 +207,7 @@ fn same_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -248,7 +249,7 @@ fn reverse_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open(&sst_path)?; + let sst_reader = ::SstReader::open::(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 73c4461024e..a637eca704b 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -112,7 +112,7 @@ pub struct StoreContext { pub snap_mgr: TabletSnapManager, pub global_stat: GlobalStoreStat, pub store_stat: LocalStoreStat, - pub sst_importer: Arc, + pub sst_importer: Arc>, pub key_manager: Option>, /// Inspector for latency inspecting @@ -366,7 +366,7 @@ struct StorePollerBuilder { shutdown: Arc, snap_mgr: TabletSnapManager, global_stat: GlobalStoreStat, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, node_start_time: Timespec, // monotonic_raw_now } @@ -386,7 +386,7 @@ impl StorePollerBuilder { shutdown: Arc, snap_mgr: TabletSnapManager, coprocessor_host: CoprocessorHost, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, node_start_time: Timespec, // monotonic_raw_now ) -> Self { @@ -694,7 +694,7 @@ impl StoreSystem { collector_reg_handle: CollectorRegHandle, background: Worker, pd_worker: LazyWorker, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, resource_ctl: Option>, diff --git a/components/raftstore-v2/src/fsm/apply.rs b/components/raftstore-v2/src/fsm/apply.rs index e55c143a33a..49530fcd6df 100644 --- a/components/raftstore-v2/src/fsm/apply.rs +++ b/components/raftstore-v2/src/fsm/apply.rs @@ -87,7 +87,7 @@ impl ApplyFsm { log_recovery: Option>, applied_term: u64, buckets: Option, - sst_importer: Arc, + sst_importer: Arc>, coprocessor_host: CoprocessorHost, logger: Logger, ) -> (ApplyScheduler, Self) { diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 9ccf08d6d54..24d025c0a4d 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -36,7 +36,7 @@ pub mod test_util { Arc, }; - use engine_traits::{CfName, CF_DEFAULT}; + use engine_traits::{CfName, KvEngine, CF_DEFAULT}; use kvproto::{kvrpcpb::ApiVersion, metapb::RegionEpoch, raft_cmdpb::RaftRequestHeader}; use raft::prelude::{Entry, EntryType}; use raftstore::store::simple_write::SimpleWriteEncoder; @@ -46,7 +46,7 @@ pub mod test_util { use super::{CatchUpLogs, SimpleWriteReqEncoder}; use crate::{fsm::ApplyResReporter, router::ApplyRes}; - pub fn create_tmp_importer() -> (TempDir, Arc) { + pub fn create_tmp_importer() -> (TempDir, Arc>) { let dir = TempDir::new().unwrap(); let importer = Arc::new( SstImporter::new(&Default::default(), dir.path(), None, ApiVersion::V1, true).unwrap(), diff --git a/components/raftstore-v2/src/raft/apply.rs b/components/raftstore-v2/src/raft/apply.rs index f3aa5a541c1..35959dd8aea 100644 --- a/components/raftstore-v2/src/raft/apply.rs +++ b/components/raftstore-v2/src/raft/apply.rs @@ -76,7 +76,7 @@ pub struct Apply { res_reporter: R, read_scheduler: Scheduler>, - sst_importer: Arc, + sst_importer: Arc>, observe: Observe, coprocessor_host: CoprocessorHost, @@ -102,7 +102,7 @@ impl Apply { log_recovery: Option>, applied_term: u64, buckets: Option, - sst_importer: Arc, + sst_importer: Arc>, coprocessor_host: CoprocessorHost, tablet_scheduler: Scheduler>, high_priority_pool: FuturePool, @@ -335,7 +335,7 @@ impl Apply { } #[inline] - pub fn sst_importer(&self) -> &SstImporter { + pub fn sst_importer(&self) -> &SstImporter { &self.sst_importer } diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index 206e87b3a8e..b2a6d46e39c 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -235,7 +235,7 @@ impl Task { pub struct Runner { tablet_registry: TabletRegistry, - sst_importer: Arc, + sst_importer: Arc>, snap_mgr: TabletSnapManager, logger: Logger, @@ -252,7 +252,7 @@ pub struct Runner { impl Runner { pub fn new( tablet_registry: TabletRegistry, - sst_importer: Arc, + sst_importer: Arc>, snap_mgr: TabletSnapManager, logger: Logger, ) -> Self { diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index 138d730fa29..f63a257c9f5 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -269,6 +269,7 @@ mod tests { use std::{path::Path, str}; use collections::HashMap; + use encryption::DataKeyManager; use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, @@ -541,7 +542,7 @@ mod tests { } fn collect_keys(path: &str) -> Vec> { - let reader = RocksSstReader::open(path).unwrap(); + let reader = RocksSstReader::open::(path, None).unwrap(); let mut sst_reader = reader.iter(IterOptions::default()).unwrap(); let mut valid = sst_reader.seek_to_first().unwrap(); let mut ret = vec![]; diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 1639f441e38..252249b74b2 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -392,7 +392,7 @@ where tag: String, timer: Option, host: CoprocessorHost, - importer: Arc, + importer: Arc>, region_scheduler: Scheduler>, router: ApplyRouter, notifier: Box>, @@ -475,7 +475,7 @@ where pub fn new( tag: String, host: CoprocessorHost, - importer: Arc, + importer: Arc>, region_scheduler: Scheduler>, engine: EK, router: ApplyRouter, @@ -4659,7 +4659,7 @@ pub struct Builder { tag: String, cfg: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, region_scheduler: Scheduler::Snapshot>>, engine: EK, sender: Box>, @@ -5060,7 +5060,7 @@ mod tests { (path, engine) } - pub fn create_tmp_importer(path: &str) -> (TempDir, Arc) { + pub fn create_tmp_importer(path: &str) -> (TempDir, Arc>) { let dir = Builder::new().prefix(path).tempdir().unwrap(); let importer = Arc::new( SstImporter::new( diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index bef74e3ed29..8c8919df67e 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -536,7 +536,7 @@ where pub region_scheduler: Scheduler>, pub apply_router: ApplyRouter, pub router: RaftRouter, - pub importer: Arc, + pub importer: Arc>, pub store_meta: Arc>, pub feature_gate: FeatureGate, /// region_id -> (peer_id, is_splitting) @@ -1209,7 +1209,7 @@ pub struct RaftPollerBuilder { pub region_scheduler: Scheduler>, apply_router: ApplyRouter, pub router: RaftRouter, - pub importer: Arc, + pub importer: Arc>, pub store_meta: Arc>, pub pending_create_peers: Arc>>, snap_mgr: SnapManager, @@ -1599,7 +1599,7 @@ impl RaftBatchSystem { pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, split_check_scheduler: Scheduler, background_worker: Worker, auto_split_controller: AutoSplitController, diff --git a/components/raftstore/src/store/worker/cleanup.rs b/components/raftstore/src/store/worker/cleanup.rs index 726b7abe5ce..da2f004f47c 100644 --- a/components/raftstore/src/store/worker/cleanup.rs +++ b/components/raftstore/src/store/worker/cleanup.rs @@ -33,7 +33,7 @@ where R: RaftEngine, { compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, } @@ -44,7 +44,7 @@ where { pub fn new( compact: CompactRunner, - cleanup_sst: CleanupSstRunner, + cleanup_sst: CleanupSstRunner, gc_snapshot: GcSnapshotRunner, ) -> Runner { Runner { diff --git a/components/raftstore/src/store/worker/cleanup_sst.rs b/components/raftstore/src/store/worker/cleanup_sst.rs index 44f188e6f8f..ca139a562a2 100644 --- a/components/raftstore/src/store/worker/cleanup_sst.rs +++ b/components/raftstore/src/store/worker/cleanup_sst.rs @@ -2,6 +2,7 @@ use std::{fmt, sync::Arc}; +use engine_traits::KvEngine; use kvproto::import_sstpb::SstMeta; use sst_importer::SstImporter; use tikv_util::worker::Runnable; @@ -18,12 +19,12 @@ impl fmt::Display for Task { } } -pub struct Runner { - importer: Arc, +pub struct Runner { + importer: Arc>, } -impl Runner { - pub fn new(importer: Arc) -> Runner { +impl Runner { + pub fn new(importer: Arc>) -> Self { Runner { importer } } @@ -35,7 +36,7 @@ impl Runner { } } -impl Runnable for Runner { +impl Runnable for Runner { type Task = Task; fn run(&mut self, task: Task) { diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 625d9b7cb4f..ed72d9ca12e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -259,7 +259,7 @@ struct Servers { lock_mgr: LockManager, server: LocalServer, node: Node, - importer: Arc, + importer: Arc>, cdc_scheduler: tikv_util::worker::Scheduler, cdc_memory_quota: Arc, rsmeter_pubsub_service: resource_metering::PubSubService, diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 1e170abb1c3..081d4b8f915 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -255,7 +255,7 @@ struct TikvEngines { struct Servers { lock_mgr: LockManager, server: LocalServer, - importer: Arc, + importer: Arc>, rsmeter_pubsub_service: resource_metering::PubSubService, } diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 850df867da8..7ae91d64b35 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -4,6 +4,7 @@ use std::{ collections::HashMap, fmt, io::{self, Write}, + marker::PhantomData, path::{Path, PathBuf}, sync::Arc, time::SystemTime, @@ -11,11 +12,10 @@ use std::{ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; -use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, }; -use file_system::{get_io_rate_limiter, sync_dir, File, OpenOptions}; +use file_system::{sync_dir, File, OpenOptions}; use keys::data_key; use kvproto::{import_sstpb::*, kvrpcpb::ApiVersion}; use tikv_util::time::Instant; @@ -215,17 +215,19 @@ impl Drop for ImportFile { /// The file being written is stored in `$root/.temp/$file_name`. After writing /// is completed, the file is moved to `$root/$file_name`. The file generated /// from the ingestion process will be placed in `$root/.clone/$file_name`. -pub struct ImportDir { +pub struct ImportDir { root_dir: PathBuf, temp_dir: PathBuf, clone_dir: PathBuf, + + _phantom: PhantomData, } -impl ImportDir { +impl ImportDir { const TEMP_DIR: &'static str = ".temp"; const CLONE_DIR: &'static str = ".clone"; - pub fn new>(root: P) -> Result { + pub fn new>(root: P) -> Result { let root_dir = root.as_ref().to_owned(); let temp_dir = root_dir.join(Self::TEMP_DIR); let clone_dir = root_dir.join(Self::CLONE_DIR); @@ -241,6 +243,7 @@ impl ImportDir { root_dir, temp_dir, clone_dir, + _phantom: PhantomData, }) } @@ -327,10 +330,14 @@ impl ImportDir { ) -> Result { let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); - let env = get_env(key_manager, get_io_rate_limiter())?; - let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; + let sst_reader = E::SstReader::open(path_str, key_manager)?; // TODO: check the length and crc32 of ingested file. - let meta_info = sst_reader.sst_meta_info(meta.to_owned()); + let (count, size) = sst_reader.kv_count_and_size(); + let meta_info = SstMetaInfo { + total_kvs: count, + total_bytes: size, + meta: meta.to_owned(), + }; Ok(meta_info) } @@ -354,8 +361,7 @@ impl ImportDir { _ => { let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); - let env = get_env(key_manager.clone(), get_io_rate_limiter())?; - let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; + let sst_reader = E::SstReader::open(path_str, key_manager.clone())?; for &(start, end) in TIDB_RANGES_COMPLEMENT { let opt = iter_option(&data_key(start), &data_key(end), false); @@ -377,7 +383,7 @@ impl ImportDir { Ok(true) } - pub fn ingest( + pub fn ingest( &self, metas: &[SstMetaInfo], engine: &E, @@ -427,8 +433,7 @@ impl ImportDir { for meta in metas { let path = self.join_for_read(meta)?; let path_str = path.save.to_str().unwrap(); - let env = get_env(key_manager.clone(), get_io_rate_limiter())?; - let sst_reader = RocksSstReader::open_with_env(path_str, Some(env))?; + let sst_reader = E::SstReader::open(path_str, key_manager.clone())?; sst_reader.verify_checksum()?; } Ok(()) @@ -528,6 +533,7 @@ pub fn parse_meta_from_path>(path: P) -> Result<(SstMeta, i32)> { mod test { use std::fs; + use engine_rocks::RocksEngine; use engine_traits::CF_DEFAULT; use super::*; @@ -578,7 +584,7 @@ mod test { use uuid::Uuid; let tmp = TempDir::new().unwrap(); - let dir = ImportDir::new(tmp.path()).unwrap(); + let dir = ImportDir::::new(tmp.path()).unwrap(); let mut meta = SstMeta::default(); meta.set_uuid(Uuid::new_v4().as_bytes().to_vec()); let filename_v1 = sst_meta_to_path_v1(&meta).unwrap(); diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 7e1de9cf44e..ab4512de692 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -17,7 +17,6 @@ use std::{ use collections::HashSet; use dashmap::{mapref::entry::Entry, DashMap}; use encryption::{to_engine_encryption_method, DataKeyManager}; -use engine_rocks::{get_env, RocksSstReader}; use engine_traits::{ name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, @@ -26,7 +25,7 @@ use engine_traits::{ use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, }; -use file_system::{get_io_rate_limiter, IoType, OpenOptions}; +use file_system::{IoType, OpenOptions}; use kvproto::{ brpb::{CipherInfo, StorageBackend}, import_sstpb::{Range, *}, @@ -153,8 +152,8 @@ impl CacheKvFile { } /// SstImporter manages SST files that are waiting for ingesting. -pub struct SstImporter { - dir: ImportDir, +pub struct SstImporter { + dir: ImportDir, key_manager: Option>, switcher: Either, // TODO: lift api_version as a type parameter. @@ -169,14 +168,14 @@ pub struct SstImporter { mem_limit: Arc, } -impl SstImporter { +impl SstImporter { pub fn new>( cfg: &Config, root: P, key_manager: Option>, api_version: ApiVersion, raft_kv_v2: bool, - ) -> Result { + ) -> Result { let switcher = if raft_kv_v2 { Either::Right(ImportModeSwitcherV2::new(cfg)) } else { @@ -281,7 +280,7 @@ impl SstImporter { } } - pub fn start_switch_mode_check(&self, executor: &Handle, db: Option) { + pub fn start_switch_mode_check(&self, executor: &Handle, db: Option) { match &self.switcher { Either::Left(switcher) => switcher.start(executor, db.unwrap()), Either::Right(switcher) => switcher.start(executor), @@ -355,7 +354,7 @@ impl SstImporter { .check_api_version(metas, self.key_manager.clone(), self.api_version) } - pub fn ingest(&self, metas: &[SstMetaInfo], engine: &E) -> Result<()> { + pub fn ingest(&self, metas: &[SstMetaInfo], engine: &E) -> Result<()> { match self .dir .ingest(metas, engine, self.key_manager.clone(), self.api_version) @@ -395,7 +394,7 @@ impl SstImporter { // // This method returns the *inclusive* key range (`[start, end]`) of SST // file created, or returns None if the SST is empty. - pub async fn download_ext( + pub async fn download_ext( &self, meta: &SstMeta, backend: &StorageBackend, @@ -413,7 +412,7 @@ impl SstImporter { "rewrite_rule" => ?rewrite_rule, "speed_limit" => speed_limiter.speed_limit(), ); - let r = self.do_download_ext::( + let r = self.do_download_ext( meta, backend, name, @@ -435,7 +434,7 @@ impl SstImporter { } } - pub fn enter_normal_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { + pub fn enter_normal_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { if let Either::Left(ref switcher) = self.switcher { switcher.enter_normal_mode(&db, mf) } else { @@ -443,7 +442,7 @@ impl SstImporter { } } - pub fn enter_import_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { + pub fn enter_import_mode(&self, db: E, mf: RocksDbMetricsFn) -> Result { if let Either::Left(ref switcher) = self.switcher { switcher.enter_import_mode(&db, mf) } else { @@ -1081,7 +1080,7 @@ impl SstImporter { // raw download, without ext, compatibility to old tests. #[cfg(test)] - fn download( + fn download( &self, meta: &SstMeta, backend: &StorageBackend, @@ -1103,7 +1102,7 @@ impl SstImporter { )) } - async fn do_download_ext( + async fn do_download_ext( &self, meta: &SstMeta, backend: &StorageBackend, @@ -1140,10 +1139,8 @@ impl SstImporter { .await?; // now validate the SST file. - let env = get_env(self.key_manager.clone(), get_io_rate_limiter())?; - // Use abstracted SstReader after Env is abstracted. let dst_file_name = path.temp.to_str().unwrap(); - let sst_reader = RocksSstReader::open_with_env(dst_file_name, Some(env))?; + let sst_reader = E::SstReader::open(dst_file_name, self.key_manager.clone())?; sst_reader.verify_checksum()?; // undo key rewrite so we could compare with the keys inside SST @@ -1389,7 +1386,7 @@ impl SstImporter { self.dir.list_ssts() } - pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { + pub fn new_txn_writer(&self, db: &E, meta: SstMeta) -> Result> { let mut default_meta = meta.clone(); default_meta.set_cf_name(CF_DEFAULT.to_owned()); let default_path = self.dir.join_for_write(&default_meta)?; @@ -1422,11 +1419,7 @@ impl SstImporter { )) } - pub fn new_raw_writer( - &self, - db: &E, - mut meta: SstMeta, - ) -> Result> { + pub fn new_raw_writer(&self, db: &E, mut meta: SstMeta) -> Result> { meta.set_cf_name(CF_DEFAULT.to_owned()); let default_path = self.dir.join_for_write(&meta)?; let default = E::SstWriterBuilder::new() @@ -1484,6 +1477,7 @@ mod tests { usize, }; + use engine_rocks::get_env; use engine_traits::{ collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, @@ -2005,7 +1999,8 @@ mod tests { ..Default::default() }; let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new(&cfg, import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, import_dir, None, ApiVersion::V1, false).unwrap(); let mem_limit_old = importer.mem_limit.load(Ordering::SeqCst); // create new config and get the diff config. @@ -2052,7 +2047,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager), @@ -2110,7 +2105,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager), @@ -2178,8 +2173,14 @@ mod tests { memory_use_ratio: 0.0, ..Default::default() }; - let importer = - SstImporter::new(&cfg, import_dir, Some(key_manager), ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &cfg, + import_dir, + Some(key_manager), + ApiVersion::V1, + false, + ) + .unwrap(); let ext_storage = { importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), @@ -2226,7 +2227,7 @@ mod tests { // create importer object. let import_dir = tempfile::tempdir().unwrap(); let (_, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager.clone()), @@ -2263,7 +2264,7 @@ mod tests { let (_, key_manager) = new_key_manager_for_test(); let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config::default(), import_dir, Some(key_manager), @@ -2303,11 +2304,13 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2353,7 +2356,7 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let (temp_dir, key_manager) = new_key_manager_for_test(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &cfg, &importer_dir, Some(key_manager.clone()), @@ -2367,7 +2370,7 @@ mod tests { let db = new_test_engine_with_env(db_path.to_str().unwrap(), DATA_CFS, env.clone()); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2412,11 +2415,13 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2457,14 +2462,16 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_default().unwrap(); let db = create_sst_test_engine().unwrap(); let _ = importer - .download::( + .download( &meta, &backend, "sample_default.sst", @@ -2501,14 +2508,16 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); // creates a sample SST file. let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file_txn_write().unwrap(); let db = create_sst_test_engine().unwrap(); let _ = importer - .download::( + .download( &meta, &backend, "sample_write.sst", @@ -2568,11 +2577,12 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let importer = - SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2640,14 +2650,16 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); // note: the range doesn't contain the DATA_PREFIX 'z'. meta.mut_range().set_start(b"t123_r02".to_vec()); meta.mut_range().set_end(b"t123_r12".to_vec()); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2686,13 +2698,15 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(b"t5_r02".to_vec()); meta.mut_range().set_end(b"t5_r12".to_vec()); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2733,11 +2747,13 @@ mod tests { meta.set_uuid(vec![0u8; 16]); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); let backend = external_storage::make_local_backend(ext_sst_dir.path()); - let result = importer.download::( + let result = importer.download( &meta, &backend, "sample.sst", @@ -2758,12 +2774,14 @@ mod tests { let (_ext_sst_dir, backend, mut meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); meta.mut_range().set_start(vec![b'x']); meta.mut_range().set_end(vec![b'y']); - let result = importer.download::( + let result = importer.download( &meta, &backend, "sample.sst", @@ -2784,10 +2802,12 @@ mod tests { let (_ext_sst_dir, backend, meta) = create_sample_external_sst_file().unwrap(); let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); let db = create_sst_test_engine().unwrap(); - let result = importer.download::( + let result = importer.download( &meta, &backend, "sample.sst", @@ -2821,11 +2841,12 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2880,11 +2901,12 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2935,11 +2957,12 @@ mod tests { // performs the download. let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db = create_sst_test_engine().unwrap(); let range = importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -2983,12 +3006,13 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let mut importer = - SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Snappy)); let db = create_sst_test_engine().unwrap(); importer - .download::( + .download( &meta, &backend, "sample.sst", @@ -3016,12 +3040,13 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); let mut importer = - SstImporter::new(&cfg, &importer_dir, None, ApiVersion::V1, false).unwrap(); + SstImporter::::new(&cfg, &importer_dir, None, ApiVersion::V1, false) + .unwrap(); importer.set_compression_type(CF_DEFAULT, Some(SstCompressionType::Zstd)); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); - let mut w = importer.new_txn_writer::(&db, meta).unwrap(); + let mut w = importer.new_txn_writer(&db, meta).unwrap(); let mut batch = WriteBatch::default(); let mut pairs = vec![]; @@ -3064,12 +3089,18 @@ mod tests { #[test] fn test_import_support_download() { let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + ApiVersion::V1, + false, + ) + .unwrap(); assert_eq!(importer.import_support_download(), false); let import_dir = tempfile::tempdir().unwrap(); - let importer = SstImporter::new( + let importer = SstImporter::::new( &Config { memory_use_ratio: 0.0, ..Default::default() @@ -3087,8 +3118,14 @@ mod tests { fn test_inc_mem_and_check() { // create importer object. let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + ApiVersion::V1, + false, + ) + .unwrap(); assert_eq!(importer.mem_use.load(Ordering::SeqCst), 0); // test inc_mem_and_check() and dec_mem() successfully. @@ -3115,8 +3152,14 @@ mod tests { #[test] fn test_dashmap_lock() { let import_dir = tempfile::tempdir().unwrap(); - let importer = - SstImporter::new(&Config::default(), import_dir, None, ApiVersion::V1, false).unwrap(); + let importer = SstImporter::::new( + &Config::default(), + import_dir, + None, + ApiVersion::V1, + false, + ) + .unwrap(); let key = "file1"; let r = Arc::new(OnceCell::new()); diff --git a/components/sst_importer/src/sst_writer.rs b/components/sst_importer/src/sst_writer.rs index f6f896a0923..1c6b06902a4 100644 --- a/components/sst_importer/src/sst_writer.rs +++ b/components/sst_importer/src/sst_writer.rs @@ -301,7 +301,7 @@ mod tests { use crate::{Config, SstImporter}; // Return the temp dir path to avoid it drop out of the scope. - fn new_writer Result>( + fn new_writer, &RocksEngine, SstMeta) -> Result>( f: F, api_version: ApiVersion, ) -> (W, TempDir) { @@ -310,7 +310,8 @@ mod tests { let importer_dir = tempfile::tempdir().unwrap(); let cfg = Config::default(); - let importer = SstImporter::new(&cfg, &importer_dir, None, api_version, false).unwrap(); + let importer = + SstImporter::::new(&cfg, &importer_dir, None, api_version, false).unwrap(); let db_path = importer_dir.path().join("db"); let db = new_test_engine(db_path.to_str().unwrap(), DATA_CFS); (f(&importer, &db, meta).unwrap(), importer_dir) diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index f5c64fa86e9..20e651ea1dc 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -147,7 +147,7 @@ pub struct ServerCluster { addrs: AddressMap, pub storages: HashMap, pub region_info_accessors: HashMap, - pub importers: HashMap>, + pub importers: HashMap>>, pub pending_services: HashMap, pub coprocessor_hooks: HashMap, pub health_services: HashMap, diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 7d517de2cba..6df829ad925 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -27,6 +27,7 @@ test-engines-panic = [ [dependencies] backtrace = "0.3" collections = { workspace = true } +encryption = { workspace = true } engine_panic = { workspace = true } engine_rocks = { workspace = true } engine_test = { workspace = true } diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 21099974d2d..332168a4e93 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -12,6 +12,7 @@ use std::{ }; use collections::HashMap; +use encryption::DataKeyManager; pub use engine_rocks::RocksSnapshot; use engine_rocks::{ get_env, RocksCfOptions, RocksDbOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, @@ -127,7 +128,9 @@ impl RocksEngine { let worker = Worker::new("engine-rocksdb"); let mut db_opts = db_opts.unwrap_or_default(); if io_rate_limiter.is_some() { - db_opts.set_env(get_env(None /* key_manager */, io_rate_limiter).unwrap()); + db_opts.set_env( + get_env::(None /* key_manager */, io_rate_limiter).unwrap(), + ); } let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 2dc4f76b194..d5b5c7c4103 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -126,7 +126,7 @@ pub struct ImportSstService { tablets: LocalTablets, engine: E, threads: Arc, - importer: Arc, + importer: Arc>, limiter: Limiter, task_slots: Arc>>, raft_entry_max_size: ReadableSize, @@ -322,7 +322,7 @@ impl ImportSstService { raft_entry_max_size: ReadableSize, engine: E, tablets: LocalTablets, - importer: Arc, + importer: Arc>, store_meta: Option>>>, resource_manager: Option>, region_info_accessor: Arc, @@ -350,7 +350,7 @@ impl ImportSstService { if let LocalTablets::Singleton(tablet) = &tablets { importer.start_switch_mode_check(threads.handle(), Some(tablet.clone())); } else { - importer.start_switch_mode_check::(threads.handle(), None); + importer.start_switch_mode_check(threads.handle(), None); } let writer = raft_writer::ThrottledTlsEngineWriter::default(); @@ -385,7 +385,7 @@ impl ImportSstService { self.cfg.clone() } - async fn tick(importer: Arc, cfg: ConfigManager) { + async fn tick(importer: Arc>, cfg: ConfigManager) { loop { sleep(Duration::from_secs(10)).await; @@ -563,7 +563,7 @@ impl ImportSstService { async fn apply_imp( mut req: ApplyRequest, - importer: Arc, + importer: Arc>, writer: raft_writer::ThrottledTlsEngineWriter, limiter: Limiter, max_raft_size: usize, @@ -1098,7 +1098,7 @@ impl ImportSst for ImportSstService { }; let res = with_resource_limiter( - importer.download_ext::( + importer.download_ext( req.get_sst(), req.get_storage_backend(), req.get_name(), diff --git a/src/server/node.rs b/src/server/node.rs index 228f679ed14..fb2f28d9c1a 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -167,7 +167,7 @@ where pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, split_check_scheduler: Scheduler, auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, @@ -455,7 +455,7 @@ where pd_worker: LazyWorker>, store_meta: Arc>, coprocessor_host: CoprocessorHost, - importer: Arc, + importer: Arc>, split_check_scheduler: Scheduler, auto_split_controller: AutoSplitController, concurrency_manager: ConcurrencyManager, diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index d9b17c5d35c..5fce5c0024b 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -113,7 +113,7 @@ where pd_worker: LazyWorker, store_cfg: Arc>, state: &Mutex, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, ) -> Result<()> @@ -218,7 +218,7 @@ where background: Worker, pd_worker: LazyWorker, store_cfg: Arc>, - sst_importer: Arc, + sst_importer: Arc>, key_manager: Option>, grpc_service_mgr: GrpcServiceManager, ) -> Result<()> From d96284cb29969ef8bd046b4d6f576b91fc3e3287 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 28 Nov 2023 15:32:46 +0800 Subject: [PATCH 1033/1149] encryption: remove useless `EncryptionKeyManager` trait (#16086) ref tikv/tikv#15986 remove useless EncryptionKeyManager trait Signed-off-by: Connor1996 --- Cargo.lock | 3 +- cmd/tikv-ctl/src/main.rs | 7 +-- components/encryption/Cargo.toml | 1 - components/encryption/export/src/lib.rs | 6 +- components/encryption/src/crypter.rs | 59 +++++++++++------- components/encryption/src/lib.rs | 5 +- components/encryption/src/manager/mod.rs | 49 +++++++-------- components/engine_panic/Cargo.toml | 1 + components/engine_panic/src/sst.rs | 6 +- components/engine_rocks/src/encryption.rs | 20 +++--- components/engine_rocks/src/lib.rs | 5 +- components/engine_rocks/src/sst.rs | 6 +- components/engine_traits/Cargo.toml | 1 + components/engine_traits/src/encryption.rs | 62 ------------------- components/engine_traits/src/lib.rs | 2 - components/engine_traits/src/sst.rs | 5 +- components/engine_traits_tests/src/ctor.rs | 2 +- components/engine_traits_tests/src/sst.rs | 13 ++-- components/external_storage/src/lib.rs | 5 +- components/raft_log_engine/src/engine.rs | 7 ++- .../src/operation/ready/snapshot.rs | 5 +- .../raftstore/src/store/compaction_guard.rs | 3 +- components/raftstore/src/store/snap.rs | 6 +- components/raftstore/src/store/snap/io.rs | 12 ++-- components/sst_importer/src/import_file.rs | 4 +- components/sst_importer/src/sst_importer.rs | 15 ++--- components/sst_importer/src/util.rs | 5 +- components/tikv_kv/src/rocksdb_engine.rs | 5 +- src/server/tablet_snap.rs | 2 +- 29 files changed, 126 insertions(+), 196 deletions(-) delete mode 100644 components/engine_traits/src/encryption.rs diff --git a/Cargo.lock b/Cargo.lock index d191ca34188..9c93540d3ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1465,7 +1465,6 @@ dependencies = [ "crc32fast", "crossbeam", "derive_more", - "engine_traits", "error_code", "fail", "file_system", @@ -1520,6 +1519,7 @@ dependencies = [ name = "engine_panic" version = "0.0.1" dependencies = [ + "encryption", "engine_traits", "kvproto", "raft", @@ -1612,6 +1612,7 @@ version = "0.0.1" dependencies = [ "case_macros", "collections", + "encryption", "error_code", "fail", "file_system", diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index b57a99f8345..5ed1bcbd9cc 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -21,11 +21,10 @@ use std::{ use collections::HashMap; use encryption_export::{ - create_backend, data_key_manager_from_config, from_engine_encryption_method, DataKeyManager, - DecrypterReader, Iv, + create_backend, data_key_manager_from_config, DataKeyManager, DecrypterReader, Iv, }; use engine_rocks::get_env; -use engine_traits::{EncryptionKeyManager, Peekable}; +use engine_traits::Peekable; use file_system::calc_crc32; use futures::{executor::block_on, future::try_join_all}; use gag::BufferRedirect; @@ -166,7 +165,7 @@ fn main() { let infile1 = Path::new(infile).canonicalize().unwrap(); let file_info = key_manager.get_file(infile1.to_str().unwrap()).unwrap(); - let mthd = from_engine_encryption_method(file_info.method); + let mthd = file_info.method; if mthd == EncryptionMethod::Plaintext { println!( "{} is not encrypted, skip to decrypt it into {}", diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 9698618a4ba..0f2eac6ad5a 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -18,7 +18,6 @@ cloud = { workspace = true } crc32fast = "1.2" crossbeam = "0.8" derive_more = "0.99.3" -engine_traits = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index 8820402be6b..a36406d44ea 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -9,9 +9,9 @@ use cloud::kms::Config as CloudConfig; #[cfg(feature = "cloud-aws")] pub use encryption::KmsBackend; pub use encryption::{ - clean_up_dir, clean_up_trash, from_engine_encryption_method, trash_dir_all, AzureConfig, - Backend, DataKeyImporter, DataKeyManager, DataKeyManagerArgs, DecrypterReader, - EncryptionConfig, Error, FileConfig, Iv, KmsConfig, MasterKeyConfig, Result, + clean_up_dir, clean_up_trash, trash_dir_all, AzureConfig, Backend, DataKeyImporter, + DataKeyManager, DataKeyManagerArgs, DecrypterReader, EncryptionConfig, Error, FileConfig, Iv, + KmsConfig, MasterKeyConfig, Result, }; use encryption::{cloud_convert_error, FileBackend, PlaintextBackend}; use tikv_util::{box_err, error, info}; diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index 3940d392be6..aafbe7cf88f 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -1,8 +1,9 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use std::fmt::{self, Debug, Formatter}; + use byteorder::{BigEndian, ByteOrder}; use cloud::kms::PlainKey; -use engine_traits::EncryptionMethod as EtEncryptionMethod; use kvproto::encryptionpb::EncryptionMethod; use openssl::symm::{self, Cipher as OCipher}; use rand::{rngs::OsRng, RngCore}; @@ -10,28 +11,6 @@ use tikv_util::box_err; use crate::{Error, Result}; -pub fn to_engine_encryption_method(method: EncryptionMethod) -> EtEncryptionMethod { - match method { - EncryptionMethod::Plaintext => EtEncryptionMethod::Plaintext, - EncryptionMethod::Aes128Ctr => EtEncryptionMethod::Aes128Ctr, - EncryptionMethod::Aes192Ctr => EtEncryptionMethod::Aes192Ctr, - EncryptionMethod::Aes256Ctr => EtEncryptionMethod::Aes256Ctr, - EncryptionMethod::Sm4Ctr => EtEncryptionMethod::Sm4Ctr, - EncryptionMethod::Unknown => EtEncryptionMethod::Unknown, - } -} - -pub fn from_engine_encryption_method(method: EtEncryptionMethod) -> EncryptionMethod { - match method { - EtEncryptionMethod::Plaintext => EncryptionMethod::Plaintext, - EtEncryptionMethod::Aes128Ctr => EncryptionMethod::Aes128Ctr, - EtEncryptionMethod::Aes192Ctr => EncryptionMethod::Aes192Ctr, - EtEncryptionMethod::Aes256Ctr => EncryptionMethod::Aes256Ctr, - EtEncryptionMethod::Sm4Ctr => EncryptionMethod::Sm4Ctr, - EtEncryptionMethod::Unknown => EncryptionMethod::Unknown, - } -} - pub fn get_method_key_length(method: EncryptionMethod) -> usize { match method { EncryptionMethod::Plaintext => 0, @@ -43,6 +22,40 @@ pub fn get_method_key_length(method: EncryptionMethod) -> usize { } } +#[derive(Clone, PartialEq)] +pub struct FileEncryptionInfo { + pub method: EncryptionMethod, + pub key: Vec, + pub iv: Vec, +} +impl Default for FileEncryptionInfo { + fn default() -> Self { + FileEncryptionInfo { + method: EncryptionMethod::Unknown, + key: vec![], + iv: vec![], + } + } +} + +impl Debug for FileEncryptionInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "FileEncryptionInfo [method={:?}, key=...<{} bytes>, iv=...<{} bytes>]", + self.method, + self.key.len(), + self.iv.len() + ) + } +} + +impl FileEncryptionInfo { + pub fn is_empty(&self) -> bool { + self.key.is_empty() && self.iv.is_empty() + } +} + // IV's the length should be 12 btyes for GCM mode. const GCM_IV_12: usize = 12; // IV's the length should be 16 btyes for CTR mode. diff --git a/components/encryption/src/lib.rs b/components/encryption/src/lib.rs index 38c38108dc5..2a9ad4c6f44 100644 --- a/components/encryption/src/lib.rs +++ b/components/encryption/src/lib.rs @@ -16,10 +16,7 @@ use std::{io::ErrorKind, path::Path}; pub use self::{ config::*, - crypter::{ - from_engine_encryption_method, to_engine_encryption_method, verify_encryption_config, - AesGcmCrypter, Iv, - }, + crypter::{verify_encryption_config, AesGcmCrypter, FileEncryptionInfo, Iv}, encrypted_file::EncryptedFile, errors::{cloud_convert_error, Error, Result, RetryCodedError}, file_dict_file::FileDictionaryFile, diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index f3594e8a96b..f5a203e9626 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -13,9 +13,6 @@ use std::{ }; use crossbeam::channel::{self, select, tick}; -use engine_traits::{ - EncryptionKeyManager, EncryptionMethod as EtEncryptionMethod, FileEncryptionInfo, -}; use fail::fail_point; use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; @@ -24,7 +21,7 @@ use tikv_util::{box_err, debug, error, info, sys::thread::StdThreadBuildWrapper, use crate::{ config::EncryptionConfig, - crypter::{self, Iv}, + crypter::{self, FileEncryptionInfo, Iv}, encrypted_file::EncryptedFile, file_dict_file::FileDictionaryFile, io::{DecrypterReader, EncrypterWriter}, @@ -661,9 +658,9 @@ impl DataKeyManager { }; EncrypterWriter::new( writer, - crypter::from_engine_encryption_method(file.method), + file.method, &file.key, - if file.method == EtEncryptionMethod::Plaintext { + if file.method == EncryptionMethod::Plaintext { debug_assert!(file.iv.is_empty()); Iv::Empty } else { @@ -691,9 +688,9 @@ impl DataKeyManager { let file = self.get_file(fname)?; DecrypterReader::new( reader, - crypter::from_engine_encryption_method(file.method), + file.method, &file.key, - if file.method == EtEncryptionMethod::Plaintext { + if file.method == EncryptionMethod::Plaintext { debug_assert!(file.iv.is_empty()); Iv::Empty } else { @@ -767,11 +764,7 @@ impl DataKeyManager { } } }; - let encrypted_file = FileEncryptionInfo { - key, - method: crypter::to_engine_encryption_method(method), - iv, - }; + let encrypted_file = FileEncryptionInfo { key, method, iv }; Ok(Some(encrypted_file)) } @@ -844,8 +837,8 @@ impl DataKeyManager { } /// Return which method this manager is using. - pub fn encryption_method(&self) -> engine_traits::EncryptionMethod { - crypter::to_engine_encryption_method(self.method) + pub fn encryption_method(&self) -> EncryptionMethod { + self.method } /// For tests. @@ -869,9 +862,9 @@ impl Drop for DataKeyManager { } } -impl EncryptionKeyManager for DataKeyManager { +impl DataKeyManager { // Get key to open existing file. - fn get_file(&self, fname: &str) -> IoResult { + pub fn get_file(&self, fname: &str) -> IoResult { match self.get_file_exists(fname) { Ok(Some(result)) => Ok(result), Ok(None) => { @@ -881,7 +874,7 @@ impl EncryptionKeyManager for DataKeyManager { let method = EncryptionMethod::Plaintext; Ok(FileEncryptionInfo { key: vec![], - method: crypter::to_engine_encryption_method(method), + method, iv: file.iv, }) } @@ -889,21 +882,25 @@ impl EncryptionKeyManager for DataKeyManager { } } - fn new_file(&self, fname: &str) -> IoResult { + pub fn new_file(&self, fname: &str) -> IoResult { let (_, data_key) = self.dicts.current_data_key(); let key = data_key.get_key().to_owned(); let file = self.dicts.new_file(fname, self.method, true)?; let encrypted_file = FileEncryptionInfo { key, - method: crypter::to_engine_encryption_method(file.method), + method: file.method, iv: file.get_iv().to_owned(), }; Ok(encrypted_file) } - // See comments of `remove_dir` for more details when using this with a - // directory. - fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> IoResult<()> { + // Can be used with both file and directory. See comments of `remove_dir` for + // more details when using this with a directory. + // + // `physical_fname` is a hint when `fname` was renamed physically. + // Depending on the implementation, providing false negative or false + // positive value may result in leaking encryption keys. + pub fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> IoResult<()> { fail_point!("key_manager_fails_before_delete_file", |_| IoResult::Err( io::ErrorKind::Other.into() )); @@ -924,7 +921,7 @@ impl EncryptionKeyManager for DataKeyManager { Ok(()) } - fn link_file(&self, src_fname: &str, dst_fname: &str) -> IoResult<()> { + pub fn link_file(&self, src_fname: &str, dst_fname: &str) -> IoResult<()> { let src_path = Path::new(src_fname); let dst_path = Path::new(dst_fname); if src_path.is_dir() { @@ -1120,8 +1117,8 @@ impl<'a> Drop for DataKeyImporter<'a> { #[cfg(test)] mod tests { - use engine_traits::EncryptionMethod as EtEncryptionMethod; use file_system::{remove_file, File}; + use kvproto::encryptionpb::EncryptionMethod; use matches::assert_matches; use tempfile::TempDir; use test_util::create_test_key_file; @@ -1243,7 +1240,7 @@ mod tests { let foo3 = manager.get_file("foo").unwrap(); assert_eq!(foo1, foo3); let bar = manager.new_file("bar").unwrap(); - assert_eq!(bar.method, EtEncryptionMethod::Plaintext); + assert_eq!(bar.method, EncryptionMethod::Plaintext); } // When enabling encryption, using insecure master key is not allowed. diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index f5da1dad550..7c41290993c 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -11,6 +11,7 @@ testexport = [] [dependencies] engine_traits = { workspace = true } kvproto = { workspace = true } +encryption = { workspace = true } raft = { workspace = true } tikv_alloc = { workspace = true } # FIXME: Remove this dep from the engine_traits interface diff --git a/components/engine_panic/src/sst.rs b/components/engine_panic/src/sst.rs index 2e51c0b87b5..59c23e67636 100644 --- a/components/engine_panic/src/sst.rs +++ b/components/engine_panic/src/sst.rs @@ -2,6 +2,7 @@ use std::{marker::PhantomData, path::PathBuf, sync::Arc}; +use ::encryption::DataKeyManager; use engine_traits::{ CfName, ExternalSstFileInfo, IterOptions, Iterable, Iterator, RefIterable, Result, SstCompressionType, SstExt, SstReader, SstWriter, SstWriterBuilder, @@ -18,10 +19,7 @@ impl SstExt for PanicEngine { pub struct PanicSstReader; impl SstReader for PanicSstReader { - fn open( - path: &str, - mgr: Option>, - ) -> Result { + fn open(path: &str, mgr: Option>) -> Result { panic!() } fn verify_checksum(&self) -> Result<()> { diff --git a/components/engine_rocks/src/encryption.rs b/components/engine_rocks/src/encryption.rs index 58d359b39df..75dc407e3c3 100644 --- a/components/engine_rocks/src/encryption.rs +++ b/components/engine_rocks/src/encryption.rs @@ -2,18 +2,18 @@ use std::{io::Result, sync::Arc}; -use engine_traits::{EncryptionKeyManager, EncryptionMethod, FileEncryptionInfo}; +use encryption::{DataKeyManager, FileEncryptionInfo}; +use kvproto::encryptionpb::EncryptionMethod; use rocksdb::{ - DBEncryptionMethod, EncryptionKeyManager as DBEncryptionKeyManager, - FileEncryptionInfo as DBFileEncryptionInfo, + DBEncryptionMethod, EncryptionKeyManager, FileEncryptionInfo as DBFileEncryptionInfo, }; use crate::{r2e, raw::Env}; // Use engine::Env directly since Env is not abstracted. -pub(crate) fn get_env( +pub(crate) fn get_env( base_env: Option>, - key_manager: Option>, + key_manager: Option>, ) -> engine_traits::Result>> { if let Some(manager) = key_manager { let base_env = base_env.unwrap_or_else(|| Arc::new(Env::default())); @@ -26,17 +26,17 @@ pub(crate) fn get_env( } } -pub struct WrappedEncryptionKeyManager { - manager: Arc, +pub struct WrappedEncryptionKeyManager { + manager: Arc, } -impl WrappedEncryptionKeyManager { - pub fn new(manager: Arc) -> Self { +impl WrappedEncryptionKeyManager { + pub fn new(manager: Arc) -> Self { Self { manager } } } -impl DBEncryptionKeyManager for WrappedEncryptionKeyManager { +impl EncryptionKeyManager for WrappedEncryptionKeyManager { fn get_file(&self, fname: &str) -> Result { self.manager .get_file(fname) diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 3226a4592f0..5afa5452344 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -113,12 +113,13 @@ pub use rocksdb::{ }; pub mod flow_control_factors; +use ::encryption::DataKeyManager; pub use flow_control_factors::*; pub mod raw; -pub fn get_env( - key_manager: Option>, +pub fn get_env( + key_manager: Option>, limiter: Option>, ) -> engine_traits::Result> { let env = encryption::get_env(None /* base_env */, key_manager)?; diff --git a/components/engine_rocks/src/sst.rs b/components/engine_rocks/src/sst.rs index 0a234983aa3..1030b7aa17f 100644 --- a/components/engine_rocks/src/sst.rs +++ b/components/engine_rocks/src/sst.rs @@ -2,6 +2,7 @@ use std::{path::PathBuf, sync::Arc}; +use ::encryption::DataKeyManager; use engine_traits::{ Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstCompressionType, SstExt, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, @@ -47,10 +48,7 @@ impl RocksSstReader { } impl SstReader for RocksSstReader { - fn open( - path: &str, - mgr: Option>, - ) -> Result { + fn open(path: &str, mgr: Option>) -> Result { let env = get_env(mgr, get_io_rate_limiter())?; Self::open_with_env(path, Some(env)) } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 2d11b59f623..8e8812ec6e2 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -20,6 +20,7 @@ lazy_static = "1.0" log_wrappers = { workspace = true } protobuf = "2" raft = { workspace = true } +encryption = { workspace = true } serde = "1.0" slog = { workspace = true } slog-global = { workspace = true } diff --git a/components/engine_traits/src/encryption.rs b/components/engine_traits/src/encryption.rs deleted file mode 100644 index 7376e2d5592..00000000000 --- a/components/engine_traits/src/encryption.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. - -use std::{ - fmt::{self, Debug, Formatter}, - io::Result, -}; - -pub trait EncryptionKeyManager: Sync + Send { - fn get_file(&self, fname: &str) -> Result; - fn new_file(&self, fname: &str) -> Result; - /// Can be used with both file and directory. - /// - /// `physical_fname` is a hint when `fname` was renamed physically. - /// Depending on the implementation, providing false negative or false - /// positive value may result in leaking encryption keys. - fn delete_file(&self, fname: &str, physical_fname: Option<&str>) -> Result<()>; - fn link_file(&self, src_fname: &str, dst_fname: &str) -> Result<()>; -} - -#[derive(Clone, PartialEq)] -pub struct FileEncryptionInfo { - pub method: EncryptionMethod, - pub key: Vec, - pub iv: Vec, -} -impl Default for FileEncryptionInfo { - fn default() -> Self { - FileEncryptionInfo { - method: EncryptionMethod::Unknown, - key: vec![], - iv: vec![], - } - } -} - -impl Debug for FileEncryptionInfo { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "FileEncryptionInfo [method={:?}, key=...<{} bytes>, iv=...<{} bytes>]", - self.method, - self.key.len(), - self.iv.len() - ) - } -} - -impl FileEncryptionInfo { - pub fn is_empty(&self) -> bool { - self.key.is_empty() && self.iv.is_empty() - } -} - -#[derive(Copy, Clone, Debug, PartialEq)] -pub enum EncryptionMethod { - Unknown = 0, - Plaintext = 1, - Aes128Ctr = 2, - Aes192Ctr = 3, - Aes256Ctr = 4, - Sm4Ctr = 5, -} diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index e09b1b52733..9cf4c22dd82 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -295,8 +295,6 @@ mod sst; pub use crate::sst::*; mod write_batch; pub use crate::write_batch::*; -mod encryption; -pub use crate::encryption::*; mod mvcc_properties; mod sst_partitioner; pub use crate::sst_partitioner::*; diff --git a/components/engine_traits/src/sst.rs b/components/engine_traits/src/sst.rs index dccd3a2523d..036c8999e3f 100644 --- a/components/engine_traits/src/sst.rs +++ b/components/engine_traits/src/sst.rs @@ -2,9 +2,10 @@ use std::{path::PathBuf, sync::Arc}; +use encryption::DataKeyManager; use kvproto::import_sstpb::SstMeta; -use crate::{errors::Result, EncryptionKeyManager, RefIterable}; +use crate::{errors::Result, RefIterable}; #[derive(Clone, Debug)] pub struct SstMetaInfo { @@ -21,7 +22,7 @@ pub trait SstExt: Sized { /// SstReader is used to read an SST file. pub trait SstReader: RefIterable + Sized + Send { - fn open(path: &str, mgr: Option>) -> Result; + fn open(path: &str, mgr: Option>) -> Result; fn verify_checksum(&self) -> Result<()>; fn kv_count_and_size(&self) -> (u64, u64); } diff --git a/components/engine_traits_tests/src/ctor.rs b/components/engine_traits_tests/src/ctor.rs index 5d987d64858..ba3154d9267 100644 --- a/components/engine_traits_tests/src/ctor.rs +++ b/components/engine_traits_tests/src/ctor.rs @@ -9,7 +9,7 @@ use engine_test::{ ctor::{CfOptions, DbOptions, KvEngineConstructorExt}, kv::KvTestEngine, }; -use engine_traits::{EncryptionKeyManager, KvEngine, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; +use engine_traits::{KvEngine, Peekable, SyncMutable, ALL_CFS, CF_DEFAULT}; use super::tempdir; diff --git a/components/engine_traits_tests/src/sst.rs b/components/engine_traits_tests/src/sst.rs index 629c81df528..77258e649ff 100644 --- a/components/engine_traits_tests/src/sst.rs +++ b/components/engine_traits_tests/src/sst.rs @@ -4,7 +4,6 @@ use std::fs; -use encryption::DataKeyManager; use engine_test::kv::KvTestEngine; use engine_traits::{ Error, ExternalSstFileInfo, IterOptions, Iterator, RefIterable, Result, SstExt, SstReader, @@ -49,7 +48,7 @@ fn basic() -> Result<()> { sst_writer.put(b"k1", b"v1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -78,7 +77,7 @@ fn forward() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -115,7 +114,7 @@ fn reverse() -> Result<()> { sst_writer.put(b"k2", b"v2")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_last()?; @@ -153,7 +152,7 @@ fn delete() -> Result<()> { sst_writer.delete(b"k1")?; sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -207,7 +206,7 @@ fn same_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; @@ -249,7 +248,7 @@ fn reverse_key() -> Result<()> { sst_writer.finish()?; - let sst_reader = ::SstReader::open::(&sst_path, None)?; + let sst_reader = ::SstReader::open(&sst_path, None)?; let mut iter = sst_reader.iter(IterOptions::default()).unwrap(); iter.seek_to_first()?; diff --git a/components/external_storage/src/lib.rs b/components/external_storage/src/lib.rs index 082073abe4f..05dbf6f965d 100644 --- a/components/external_storage/src/lib.rs +++ b/components/external_storage/src/lib.rs @@ -17,8 +17,7 @@ use std::{ use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; -use encryption::{from_engine_encryption_method, DecrypterReader, Iv}; -use engine_traits::FileEncryptionInfo; +use encryption::{DecrypterReader, FileEncryptionInfo, Iv}; use file_system::File; use futures::io::BufReader; use futures_io::AsyncRead; @@ -249,7 +248,7 @@ pub fn encrypt_wrap_reader( let input = match file_crypter { Some(x) => Box::new(DecrypterReader::new( reader, - from_engine_encryption_method(x.method), + x.method, &x.key, Iv::from_slice(&x.iv)?, )?), diff --git a/components/raft_log_engine/src/engine.rs b/components/raft_log_engine/src/engine.rs index 1f19a161b09..c71b9fd65d9 100644 --- a/components/raft_log_engine/src/engine.rs +++ b/components/raft_log_engine/src/engine.rs @@ -10,12 +10,13 @@ use std::{ use codec::number::NumberCodec; use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter}; use engine_traits::{ - CacheStats, EncryptionKeyManager, EncryptionMethod, PerfContextExt, PerfContextKind, PerfLevel, - RaftEngine, RaftEngineDebug, RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, Result, - CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, + CacheStats, PerfContextExt, PerfContextKind, PerfLevel, RaftEngine, RaftEngineDebug, + RaftEngineReadOnly, RaftLogBatch as RaftLogBatchTrait, Result, CF_DEFAULT, CF_LOCK, CF_RAFT, + CF_WRITE, }; use file_system::{IoOp, IoRateLimiter, IoType, WithIoType}; use kvproto::{ + encryptionpb::EncryptionMethod, metapb::Region, raft_serverpb::{ RaftApplyState, RaftLocalState, RegionLocalState, StoreIdent, StoreRecoverState, diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index 9e0ed449cef..c29399ac6a0 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -30,10 +30,7 @@ use std::{ }; use encryption_export::DataKeyManager; -use engine_traits::{ - EncryptionKeyManager, KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, - ALL_CFS, -}; +use engine_traits::{KvEngine, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, ALL_CFS}; use fail::fail_point; use kvproto::{ metapb::PeerRole, diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index f63a257c9f5..161a8f9c4db 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -269,7 +269,6 @@ mod tests { use std::{path::Path, str}; use collections::HashMap; - use encryption::DataKeyManager; use engine_rocks::{ raw::{BlockBasedOptions, DBCompressionType}, util::new_engine_opt, @@ -542,7 +541,7 @@ mod tests { } fn collect_keys(path: &str) -> Vec> { - let reader = RocksSstReader::open::(path, None).unwrap(); + let reader = RocksSstReader::open(path, None).unwrap(); let mut sst_reader = reader.iter(IterOptions::default()).unwrap(); let mut valid = sst_reader.seek_to_first().unwrap(); let mut ret = vec![]; diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6fe21fe9750..a857cbffdfd 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -15,8 +15,8 @@ use std::{ }; use collections::{HashMap, HashMapEntry as Entry}; -use encryption::{create_aes_ctr_crypter, from_engine_encryption_method, DataKeyManager, Iv}; -use engine_traits::{CfName, EncryptionKeyManager, KvEngine, CF_DEFAULT, CF_LOCK, CF_WRITE}; +use encryption::{create_aes_ctr_crypter, DataKeyManager, Iv}; +use engine_traits::{CfName, KvEngine, CF_DEFAULT, CF_LOCK, CF_WRITE}; use error_code::{self, ErrorCode, ErrorCodeExt}; use fail::fail_point; use file_system::{ @@ -614,7 +614,7 @@ impl Snapshot { if let Some(mgr) = &s.mgr.encryption_key_manager { let enc_info = mgr.new_file(&file_paths[idx])?; - let mthd = from_engine_encryption_method(enc_info.method); + let mthd = enc_info.method; if mthd != EncryptionMethod::Plaintext { let file_for_recving = cf_file.file_for_recving.last_mut().unwrap(); file_for_recving.encrypter = Some( diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 3cdee1e40f1..952f49baf44 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -8,12 +8,10 @@ use std::{ usize, }; -use encryption::{ - from_engine_encryption_method, DataKeyManager, DecrypterReader, EncrypterWriter, Iv, -}; +use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter, Iv}; use engine_traits::{ - CfName, EncryptionKeyManager, Error as EngineError, Iterable, KvEngine, Mutable, - SstCompressionType, SstWriter, SstWriterBuilder, WriteBatch, + CfName, Error as EngineError, Iterable, KvEngine, Mutable, SstCompressionType, SstWriter, + SstWriterBuilder, WriteBatch, }; use kvproto::encryptionpb::EncryptionMethod; use tikv_util::{ @@ -60,7 +58,7 @@ where if let Some(key_mgr) = key_mgr { let enc_info = box_try!(key_mgr.new_file(path)); - let mthd = from_engine_encryption_method(enc_info.method); + let mthd = enc_info.method; if mthd != EncryptionMethod::Plaintext { let writer = box_try!(EncrypterWriter::new( file.take().unwrap(), @@ -287,7 +285,7 @@ pub fn get_decrypter_reader( encryption_key_manager: &DataKeyManager, ) -> Result, Error> { let enc_info = box_try!(encryption_key_manager.get_file(file)); - let mthd = from_engine_encryption_method(enc_info.method); + let mthd = enc_info.method; debug!( "get_decrypter_reader gets enc_info for {:?}, method: {:?}", file, mthd diff --git a/components/sst_importer/src/import_file.rs b/components/sst_importer/src/import_file.rs index 7ae91d64b35..a8fdea6a564 100644 --- a/components/sst_importer/src/import_file.rs +++ b/components/sst_importer/src/import_file.rs @@ -12,9 +12,7 @@ use std::{ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use encryption::{DataKeyManager, EncrypterWriter}; -use engine_traits::{ - iter_option, EncryptionKeyManager, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader, -}; +use engine_traits::{iter_option, Iterator, KvEngine, RefIterable, SstMetaInfo, SstReader}; use file_system::{sync_dir, File, OpenOptions}; use keys::data_key; use kvproto::{import_sstpb::*, kvrpcpb::ApiVersion}; diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index ab4512de692..6eef07b1ebc 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -16,11 +16,11 @@ use std::{ use collections::HashSet; use dashmap::{mapref::entry::Entry, DashMap}; -use encryption::{to_engine_encryption_method, DataKeyManager}; +use encryption::{DataKeyManager, FileEncryptionInfo}; use engine_traits::{ - name_to_cf, util::check_key_in_range, CfName, EncryptionKeyManager, FileEncryptionInfo, - IterOptions, Iterator, KvEngine, RefIterable, SstCompressionType, SstExt, SstMetaInfo, - SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, CF_WRITE, + name_to_cf, util::check_key_in_range, CfName, IterOptions, Iterator, KvEngine, RefIterable, + SstCompressionType, SstExt, SstMetaInfo, SstReader, SstWriter, SstWriterBuilder, CF_DEFAULT, + CF_WRITE, }; use external_storage::{ compression_reader_dispatcher, encrypt_wrap_reader, ExternalStorage, RestoreConfig, @@ -1116,7 +1116,7 @@ impl SstImporter { let path = self.dir.join_for_write(meta)?; let file_crypter = crypter.map(|c| FileEncryptionInfo { - method: to_engine_encryption_method(c.cipher_type), + method: c.cipher_type, key: c.cipher_key, iv: meta.cipher_iv.to_owned(), }); @@ -1479,11 +1479,12 @@ mod tests { use engine_rocks::get_env; use engine_traits::{ - collect, EncryptionMethod, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, - RefIterable, SstReader, SstWriter, CF_DEFAULT, DATA_CFS, + collect, Error as TraitError, ExternalSstFileInfo, Iterable, Iterator, RefIterable, + SstReader, SstWriter, CF_DEFAULT, DATA_CFS, }; use external_storage::read_external_storage_info_buff; use file_system::File; + use kvproto::encryptionpb::EncryptionMethod; use online_config::{ConfigManager, OnlineConfig}; use openssl::hash::{Hasher, MessageDigest}; use tempfile::Builder; diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 4adfe3db51e..121daf49ea8 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -3,7 +3,6 @@ use std::path::Path; use encryption::DataKeyManager; -use engine_traits::EncryptionKeyManager; use external_storage::ExternalStorage; use file_system::File; @@ -127,8 +126,8 @@ mod tests { RocksTitanDbOptions, }; use engine_traits::{ - CfName, CfOptions, DbOptions, EncryptionKeyManager, ImportExt, Peekable, SstWriter, - SstWriterBuilder, TitanCfOptions, CF_DEFAULT, + CfName, CfOptions, DbOptions, ImportExt, Peekable, SstWriter, SstWriterBuilder, + TitanCfOptions, CF_DEFAULT, }; use tempfile::Builder; use test_util::encryption::new_test_key_manager; diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 332168a4e93..21099974d2d 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -12,7 +12,6 @@ use std::{ }; use collections::HashMap; -use encryption::DataKeyManager; pub use engine_rocks::RocksSnapshot; use engine_rocks::{ get_env, RocksCfOptions, RocksDbOptions, RocksEngine as BaseRocksEngine, RocksEngineIterator, @@ -128,9 +127,7 @@ impl RocksEngine { let worker = Worker::new("engine-rocksdb"); let mut db_opts = db_opts.unwrap_or_default(); if io_rate_limiter.is_some() { - db_opts.set_env( - get_env::(None /* key_manager */, io_rate_limiter).unwrap(), - ); + db_opts.set_env(get_env(None /* key_manager */, io_rate_limiter).unwrap()); } let db = engine_rocks::util::new_engine_opt(&path, db_opts, cfs_opts)?; diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index ca869f5c761..997a932be9d 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -35,7 +35,7 @@ use std::{ use collections::HashMap; use crc64fast::Digest; use encryption_export::{DataKeyImporter, DataKeyManager}; -use engine_traits::{Checkpointer, EncryptionKeyManager, KvEngine, TabletRegistry}; +use engine_traits::{Checkpointer, KvEngine, TabletRegistry}; use file_system::{IoType, OpenOptions, WithIoType}; use futures::{ future::FutureExt, From 6bb3d2eca36932e6545b6f00f0d5728354c45acf Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 28 Nov 2023 20:19:17 +0800 Subject: [PATCH 1034/1149] titan: update titan to fix compaction filter (#16092) close tikv/tikv#16091 update titan to fix compaction filter Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c93540d3ee..4211e2bfdf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2849,7 +2849,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" +source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2868,7 +2868,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" +source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" dependencies = [ "bzip2-sys", "cc", @@ -4708,7 +4708,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#1cdf55ba2fd2b132e8cd549146b96205ba4721ad" +source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" dependencies = [ "libc 0.2.146", "librocksdb_sys", From dd567e60799e38f2e250ad4c4b3054c4ef794014 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 29 Nov 2023 15:20:51 +0800 Subject: [PATCH 1035/1149] readpool: gate future-pool running tasks per priority (#16049) close tikv/tikv#16026 Signed-off-by: glorv Co-authored-by: tongjian --- Cargo.lock | 1 + components/resource_control/src/lib.rs | 4 +- .../resource_control/src/resource_group.rs | 221 ++---------------- components/resource_control/src/worker.rs | 38 +-- components/server/src/server.rs | 4 +- components/server/src/server2.rs | 4 +- components/tikv_util/Cargo.toml | 1 + components/tikv_util/src/lib.rs | 1 + components/tikv_util/src/resource_control.rs | 191 +++++++++++++++ .../tikv_util/src/yatp_pool/future_pool.rs | 50 +++- components/tikv_util/src/yatp_pool/metrics.rs | 2 +- components/tikv_util/src/yatp_pool/mod.rs | 28 +-- src/config/mod.rs | 1 - src/read_pool.rs | 60 +++-- src/storage/txn/sched_pool.rs | 6 +- 15 files changed, 345 insertions(+), 267 deletions(-) create mode 100644 components/tikv_util/src/resource_control.rs diff --git a/Cargo.lock b/Cargo.lock index 4211e2bfdf4..146e9aa04ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6639,6 +6639,7 @@ dependencies = [ "slog-global", "slog-json", "slog-term", + "strum 0.20.0", "sysinfo", "tempfile", "thiserror", diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index a7b4cf03192..917718e8409 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -11,9 +11,9 @@ use serde::{Deserialize, Serialize}; mod resource_group; pub use resource_group::{ - priority_from_task_meta, ResourceConsumeType, ResourceController, ResourceGroupManager, - TaskMetadata, MIN_PRIORITY_UPDATE_INTERVAL, + ResourceConsumeType, ResourceController, ResourceGroupManager, MIN_PRIORITY_UPDATE_INTERVAL, }; +pub use tikv_util::resource_control::*; mod future; pub use future::{with_resource_limiter, ControlledFuture}; diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index 7e6d4279a25..d6933d0a383 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -1,7 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::Cow, cell::Cell, cmp::{max, min}, collections::HashSet, @@ -22,8 +21,11 @@ use kvproto::{ resource_manager::{GroupMode, ResourceGroup as PbResourceGroup}, }; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; -use strum::{EnumCount, EnumIter, IntoEnumIterator}; -use tikv_util::{info, time::Instant}; +use tikv_util::{ + info, + resource_control::{TaskMetadata, TaskPriority, DEFAULT_RESOURCE_GROUP_NAME}, + time::Instant, +}; use yatp::queue::priority::TaskPriorityProvider; use crate::{metrics::deregister_metrics, resource_limiter::ResourceLimiter}; @@ -34,13 +36,12 @@ const DEFAULT_PRIORITY_PER_READ_TASK: u64 = 50; const TASK_EXTRA_FACTOR_BY_LEVEL: [u64; 3] = [0, 20, 100]; /// duration to update the minimal priority value of each resource group. pub const MIN_PRIORITY_UPDATE_INTERVAL: Duration = Duration::from_secs(1); -/// default resource group name -const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; /// default value of max RU quota. const DEFAULT_MAX_RU_QUOTA: u64 = 10_000; /// The maximum RU quota that can be configured. const MAX_RU_QUOTA: u64 = i32::MAX as u64; +#[cfg(test)] const LOW_PRIORITY: u32 = 1; const MEDIUM_PRIORITY: u32 = 8; #[cfg(test)] @@ -56,40 +57,6 @@ pub enum ResourceConsumeType { IoBytes(u64), } -#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter, Debug)] -#[repr(usize)] -pub enum TaskPriority { - High = 0, - Medium = 1, - Low = 2, -} - -impl TaskPriority { - pub fn as_str(&self) -> &'static str { - match *self { - TaskPriority::High => "high", - TaskPriority::Medium => "medium", - TaskPriority::Low => "low", - } - } -} - -impl From for TaskPriority { - fn from(value: u32) -> Self { - // map the resource group priority value (1,8,16) to (Low,Medium,High) - // 0 means the priority is not set, so map it to medium by default. - if value == 0 { - Self::Medium - } else if value < 6 { - Self::Low - } else if value < 11 { - Self::Medium - } else { - Self::High - } - } -} - /// ResourceGroupManager manages the metadata of each resource group. pub struct ResourceGroupManager { pub(crate) resource_groups: DashMap, @@ -100,24 +67,20 @@ pub struct ResourceGroupManager { // resource limiter has changed. version_generator: AtomicU64, // the shared resource limiter of each priority - priority_limiters: [Arc; TaskPriority::COUNT], + priority_limiters: [Arc; TaskPriority::PRIORITY_COUNT], } impl Default for ResourceGroupManager { fn default() -> Self { - let priority_limiters = TaskPriority::iter() - .map(|p| { - Arc::new(ResourceLimiter::new( - p.as_str().to_owned(), - f64::INFINITY, - f64::INFINITY, - 0, - false, - )) - }) - .collect::>() - .try_into() - .unwrap(); + let priority_limiters = TaskPriority::priorities().map(|p| { + Arc::new(ResourceLimiter::new( + p.as_str().to_owned(), + f64::INFINITY, + f64::INFINITY, + 0, + false, + )) + }); let manager = Self { resource_groups: Default::default(), group_count: AtomicU64::new(0), @@ -308,14 +271,6 @@ impl ResourceGroupManager { self.get_group_count() > 1 } - /// return the priority of target resource group. - #[inline] - pub fn get_resource_group_priority(&self, group: &str) -> u32 { - self.resource_groups - .get(group) - .map_or(LOW_PRIORITY, |g| g.group.priority) - } - // Always return the background resource limiter if any; // Only return the foregroup limiter when priority is enabled. pub fn get_resource_limiter( @@ -385,7 +340,9 @@ impl ResourceGroupManager { } #[inline] - pub fn get_priority_resource_limiters(&self) -> [Arc; 3] { + pub fn get_priority_resource_limiters( + &self, + ) -> [Arc; TaskPriority::PRIORITY_COUNT] { self.priority_limiters.clone() } } @@ -525,7 +482,9 @@ impl ResourceController { let mut max_ru_quota = self.max_ru_quota.lock().unwrap(); // skip to adjust max ru if it is the "default" group and the ru config eq // MAX_RU_QUOTA - if ru_quota > *max_ru_quota && (name != b"default" || ru_quota < MAX_RU_QUOTA) { + if ru_quota > *max_ru_quota + && (name != DEFAULT_RESOURCE_GROUP_NAME.as_bytes() || ru_quota < MAX_RU_QUOTA) + { *max_ru_quota = ru_quota; // adjust all group weight because the current value is too small. self.adjust_all_resource_group_factors(ru_quota); @@ -668,101 +627,9 @@ impl ResourceController { } } -const OVERRIDE_PRIORITY_MASK: u8 = 0b1000_0000; -const RESOURCE_GROUP_NAME_MASK: u8 = 0b0100_0000; - -#[derive(Clone, Default)] -pub struct TaskMetadata<'a> { - // The first byte is a bit map to indicate which field exists, - // then append override priority if nonzero, - // then append resource group name if not default - metadata: Cow<'a, [u8]>, -} - -impl<'a> TaskMetadata<'a> { - pub fn deep_clone(&self) -> TaskMetadata<'static> { - TaskMetadata { - metadata: Cow::Owned(self.metadata.to_vec()), - } - } - - pub fn from_ctx(ctx: &ResourceControlContext) -> Self { - let mut mask = 0; - let mut buf = vec![]; - if ctx.override_priority != 0 { - mask |= OVERRIDE_PRIORITY_MASK; - } - if !ctx.resource_group_name.is_empty() - && ctx.resource_group_name != DEFAULT_RESOURCE_GROUP_NAME - { - mask |= RESOURCE_GROUP_NAME_MASK; - } - if mask == 0 { - // if all are default value, no need to write anything to save copy cost - return Self { - metadata: Cow::Owned(buf), - }; - } - buf.push(mask); - if mask & OVERRIDE_PRIORITY_MASK != 0 { - buf.extend_from_slice(&(ctx.override_priority as u32).to_ne_bytes()); - } - if mask & RESOURCE_GROUP_NAME_MASK != 0 { - buf.extend_from_slice(ctx.resource_group_name.as_bytes()); - } - Self { - metadata: Cow::Owned(buf), - } - } - - fn from_bytes(bytes: &'a [u8]) -> Self { - Self { - metadata: Cow::Borrowed(bytes), - } - } - - pub fn to_vec(self) -> Vec { - self.metadata.into_owned() - } - - pub fn override_priority(&self) -> u32 { - if self.metadata.is_empty() { - return 0; - } - if self.metadata[0] & OVERRIDE_PRIORITY_MASK == 0 { - return 0; - } - u32::from_ne_bytes(self.metadata[1..5].try_into().unwrap()) - } - - pub fn group_name(&self) -> &[u8] { - if self.metadata.is_empty() { - return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); - } - if self.metadata[0] & RESOURCE_GROUP_NAME_MASK == 0 { - return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); - } - let start = if self.metadata[0] & OVERRIDE_PRIORITY_MASK != 0 { - 5 - } else { - 1 - }; - &self.metadata[start..] - } -} - -// return the TaskPriority value from task metadata. -// This function is used for handling thread pool task waiting metrics. -pub fn priority_from_task_meta(meta: &[u8]) -> usize { - let priority = TaskMetadata::from_bytes(meta).override_priority(); - // mapping (high(15), medium(8), low(1)) -> (0, 1, 2) - debug_assert!(priority <= 16); - TaskPriority::from(priority) as usize -} - impl TaskPriorityProvider for ResourceController { fn priority_of(&self, extras: &yatp::queue::Extras) -> u64 { - let metadata = TaskMetadata::from_bytes(extras.metadata()); + let metadata = TaskMetadata::from(extras.metadata()); self.resource_group(metadata.group_name()).get_priority( extras.current_level() as usize, if metadata.override_priority() == 0 { @@ -1316,32 +1183,6 @@ pub(crate) mod tests { assert!(v5 < v1); } - #[test] - fn test_task_metadata() { - let cases = [ - ("default", 0u32), - ("default", 6u32), - ("test", 0u32), - ("test", 15u32), - ]; - - let metadata = TaskMetadata::from_ctx(&ResourceControlContext::default()); - assert_eq!(metadata.group_name(), b"default"); - for (group_name, priority) in cases { - let metadata = TaskMetadata::from_ctx(&ResourceControlContext { - resource_group_name: group_name.to_string(), - override_priority: priority as u64, - ..Default::default() - }); - assert_eq!(metadata.override_priority(), priority); - assert_eq!(metadata.group_name(), group_name.as_bytes()); - let vec = metadata.to_vec(); - let metadata1 = TaskMetadata::from_bytes(&vec); - assert_eq!(metadata1.override_priority(), priority); - assert_eq!(metadata1.group_name(), group_name.as_bytes()); - } - } - #[test] fn test_get_resource_limiter() { let mgr = ResourceGroupManager::default(); @@ -1433,20 +1274,4 @@ pub(crate) mod tests { &mgr.priority_limiters[1] )); } - - #[test] - fn test_task_priority() { - use TaskPriority::*; - let cases = [ - (0, Medium), - (1, Low), - (7, Medium), - (8, Medium), - (15, High), - (16, High), - ]; - for (value, priority) in cases { - assert_eq!(TaskPriority::from(value), priority); - } - } } diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 79dea73d0ae..2ea72f132ee 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -10,9 +10,10 @@ use std::{ use file_system::{fetch_io_bytes, IoBytes, IoType}; use prometheus::Histogram; -use strum::{EnumCount, IntoEnumIterator}; +use strum::EnumCount; use tikv_util::{ debug, + resource_control::TaskPriority, sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, warn, @@ -21,13 +22,17 @@ use tikv_util::{ use crate::{ metrics::*, - resource_group::{ResourceGroupManager, TaskPriority}, + resource_group::ResourceGroupManager, resource_limiter::{GroupStatistics, ResourceLimiter, ResourceType}, }; pub const BACKGROUND_LIMIT_ADJUST_DURATION: Duration = Duration::from_secs(10); const MICROS_PER_SEC: f64 = 1_000_000.0; +// the minimal schedule wait duration due to the overhead of queue. +// We should exclude this cause when calculate the estimated total wait +// duration. +const MINIMAL_SCHEDULE_WAIT_SECS: f64 = 0.000_005; //5us pub struct ResourceUsageStats { total_quota: f64, @@ -303,7 +308,7 @@ struct GroupStats { /// In general, caller should call this function in a fixed interval. pub struct PriorityLimiterAdjustWorker { resource_ctl: Arc, - trackers: [PriorityLimiterStatsTracker; 3], + trackers: [PriorityLimiterStatsTracker; TaskPriority::PRIORITY_COUNT], resource_quota_getter: R, last_adjust_time: Instant, is_last_low_cpu: bool, @@ -327,10 +332,9 @@ impl PriorityLimiterAdjustWorker { resource_ctl: Arc, resource_quota_getter: R, ) -> Self { - let priorities: [_; 3] = TaskPriority::iter().collect::>().try_into().unwrap(); let trackers = resource_ctl .get_priority_resource_limiters() - .zip(priorities) + .zip(TaskPriority::priorities()) .map(|(l, p)| PriorityLimiterStatsTracker::new(l, p.as_str())); Self { resource_ctl, @@ -367,8 +371,8 @@ impl PriorityLimiterAdjustWorker { } self.is_last_single_group = false; - let stats: [_; 3] = - std::array::from_fn(|i| self.trackers[i].get_and_update_last_stats(dur.as_secs_f64())); + let stats: [_; TaskPriority::PRIORITY_COUNT] = + array::from_fn(|i| self.trackers[i].get_and_update_last_stats(dur.as_secs_f64())); let process_cpu_stats = match self .resource_quota_getter @@ -415,12 +419,13 @@ impl PriorityLimiterAdjustWorker { return; } - let real_cpu_total: f64 = stats.iter().map(|s| s.cpu_secs).sum(); + let cpu_duration: [_; TaskPriority::PRIORITY_COUNT] = array::from_fn(|i| stats[i].cpu_secs); + let real_cpu_total: f64 = cpu_duration.iter().sum(); let expect_pool_cpu_total = real_cpu_total * (process_cpu_stats.total_quota * 0.95) / process_cpu_stats.current_used; let mut limits = [0.0; 2]; - let level_expected: [_; 3] = - std::array::from_fn(|i| stats[i].cpu_secs + stats[i].wait_secs); + let level_expected: [_; TaskPriority::PRIORITY_COUNT] = + array::from_fn(|i| stats[i].cpu_secs + stats[i].wait_secs); // substract the cpu time usage for priority high. let mut expect_cpu_time_total = expect_pool_cpu_total - level_expected[0]; @@ -442,8 +447,10 @@ impl PriorityLimiterAdjustWorker { limits[i - 1] = limit; expect_cpu_time_total -= level_expected[i]; } - debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, - "limits" => ?limits, "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); + debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, + "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, + "cpu_costs" => ?cpu_duration, "limits" => ?limits, + "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); } } @@ -516,12 +523,15 @@ impl PriorityLimiterStatsTracker { let stats_delta = (cur_stats - self.last_stats) / dur_secs; self.last_stats = cur_stats; let wait_stats: [_; 2] = - std::array::from_fn(|i| self.task_wait_dur_trakcers[i].get_and_upate_statistics()); + array::from_fn(|i| self.task_wait_dur_trakcers[i].get_and_upate_statistics()); let schedule_wait_dur_secs = wait_stats.iter().map(|s| s.0).sum::() / dur_secs; + let expected_wait_dur_secs = stats_delta.request_count as f64 * MINIMAL_SCHEDULE_WAIT_SECS; + let normed_schedule_wait_dur_secs = + (schedule_wait_dur_secs - expected_wait_dur_secs).max(0.0); LimiterStats { cpu_secs: stats_delta.total_consumed as f64 / MICROS_PER_SEC, wait_secs: stats_delta.total_wait_dur_us as f64 / MICROS_PER_SEC - + schedule_wait_dur_secs, + + normed_schedule_wait_dur_secs, req_count: stats_delta.request_count, } } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index ed72d9ca12e..059cda0bb91 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -70,7 +70,7 @@ use raftstore::{ RaftRouterCompactedEventSender, }; use resolved_ts::{LeadershipResolver, Task}; -use resource_control::{priority_from_task_meta, ResourceGroupManager}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use snap_recovery::RecoveryService; @@ -549,7 +549,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), - Some(Arc::new(priority_from_task_meta)), + true, )) } else { None diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 081d4b8f915..636a4bc9282 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -67,7 +67,7 @@ use raftstore_v2::{ StateStorage, }; use resolved_ts::Task; -use resource_control::{priority_from_task_meta, ResourceGroupManager}; +use resource_control::ResourceGroupManager; use security::SecurityManager; use service::{service_event::ServiceEvent, service_manager::GrpcServiceManager}; use tikv::{ @@ -460,7 +460,7 @@ where engines.engine.clone(), resource_ctl, CleanupMethod::Remote(self.core.background_worker.remote()), - Some(Arc::new(priority_from_task_meta)), + true, )) } else { None diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 0b6fc5978cb..9250dd03cb0 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -52,6 +52,7 @@ slog-async = "2.3" slog-global = { workspace = true } slog-json = "2.3" slog-term = "2.4" +strum = { version = "0.20", features = ["derive"] } sysinfo = "0.26" thiserror = "1.0" tikv_alloc = { workspace = true } diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index b8aa578a878..cdcfc4673c9 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -54,6 +54,7 @@ pub mod memory; pub mod metrics; pub mod mpsc; pub mod quota_limiter; +pub mod resource_control; pub mod store; pub mod stream; pub mod sys; diff --git a/components/tikv_util/src/resource_control.rs b/components/tikv_util/src/resource_control.rs new file mode 100644 index 00000000000..c7b46c2ddab --- /dev/null +++ b/components/tikv_util/src/resource_control.rs @@ -0,0 +1,191 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +/// This mod provide some utility types and functions for resource control. +use std::borrow::Cow; + +use kvproto::kvrpcpb::ResourceControlContext; +use strum::{EnumCount, EnumIter}; + +/// default resource group name +pub const DEFAULT_RESOURCE_GROUP_NAME: &str = "default"; + +const OVERRIDE_PRIORITY_MASK: u8 = 0b1000_0000; +const RESOURCE_GROUP_NAME_MASK: u8 = 0b0100_0000; + +#[derive(Clone, Default)] +pub struct TaskMetadata<'a> { + // The first byte is a bit map to indicate which field exists, + // then append override priority if nonzero, + // then append resource group name if not default + metadata: Cow<'a, [u8]>, +} + +impl<'a> TaskMetadata<'a> { + pub fn deep_clone(&self) -> TaskMetadata<'static> { + TaskMetadata { + metadata: Cow::Owned(self.metadata.to_vec()), + } + } + + pub fn from_ctx(ctx: &ResourceControlContext) -> Self { + let mut mask = 0; + let mut buf = vec![]; + if ctx.override_priority != 0 { + mask |= OVERRIDE_PRIORITY_MASK; + } + if !ctx.resource_group_name.is_empty() + && ctx.resource_group_name != DEFAULT_RESOURCE_GROUP_NAME + { + mask |= RESOURCE_GROUP_NAME_MASK; + } + if mask == 0 { + // if all are default value, no need to write anything to save copy cost + return Self { + metadata: Cow::Owned(buf), + }; + } + buf.push(mask); + if mask & OVERRIDE_PRIORITY_MASK != 0 { + buf.extend_from_slice(&(ctx.override_priority as u32).to_ne_bytes()); + } + if mask & RESOURCE_GROUP_NAME_MASK != 0 { + buf.extend_from_slice(ctx.resource_group_name.as_bytes()); + } + Self { + metadata: Cow::Owned(buf), + } + } + + pub fn to_vec(self) -> Vec { + self.metadata.into_owned() + } + + pub fn override_priority(&self) -> u32 { + if self.metadata.is_empty() { + return 0; + } + if self.metadata[0] & OVERRIDE_PRIORITY_MASK == 0 { + return 0; + } + u32::from_ne_bytes(self.metadata[1..5].try_into().unwrap()) + } + + pub fn group_name(&self) -> &[u8] { + if self.metadata.is_empty() { + return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); + } + if self.metadata[0] & RESOURCE_GROUP_NAME_MASK == 0 { + return DEFAULT_RESOURCE_GROUP_NAME.as_bytes(); + } + let start = if self.metadata[0] & OVERRIDE_PRIORITY_MASK != 0 { + 5 + } else { + 1 + }; + &self.metadata[start..] + } +} + +impl<'a> From<&'a [u8]> for TaskMetadata<'a> { + fn from(bytes: &'a [u8]) -> Self { + Self { + metadata: Cow::Borrowed(bytes), + } + } +} + +// return the TaskPriority value from task metadata. +pub fn priority_from_task_meta(meta: &[u8]) -> TaskPriority { + let priority = TaskMetadata::from(meta).override_priority(); + // mapping (high(15), medium(8), low(1)) -> (0, 1, 2) + debug_assert!(priority <= 16); + TaskPriority::from(priority) +} + +#[derive(Copy, Clone, Eq, PartialEq, EnumCount, EnumIter, Debug)] +#[repr(usize)] +pub enum TaskPriority { + High = 0, + Medium = 1, + Low = 2, +} + +impl TaskPriority { + // reexport enum count, caller can use it without importing `EnumCount`. + pub const PRIORITY_COUNT: usize = Self::COUNT; + pub fn as_str(&self) -> &'static str { + match *self { + TaskPriority::High => "high", + TaskPriority::Medium => "medium", + TaskPriority::Low => "low", + } + } + + pub fn priorities() -> [Self; Self::COUNT] { + use TaskPriority::*; + [High, Medium, Low] + } +} + +impl From for TaskPriority { + fn from(value: u32) -> Self { + // map the resource group priority value (1,8,16) to (Low,Medium,High) + // 0 means the priority is not set, so map it to medium by default. + if value == 0 { + Self::Medium + } else if value < 6 { + Self::Low + } else if value < 11 { + Self::Medium + } else { + Self::High + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_task_metadata() { + let cases = [ + ("default", 0u32), + ("default", 6u32), + ("test", 0u32), + ("test", 15u32), + ]; + + let metadata = TaskMetadata::from_ctx(&ResourceControlContext::default()); + assert_eq!(metadata.group_name(), b"default"); + for (group_name, priority) in cases { + let metadata = TaskMetadata::from_ctx(&ResourceControlContext { + resource_group_name: group_name.to_string(), + override_priority: priority as u64, + ..Default::default() + }); + assert_eq!(metadata.override_priority(), priority); + assert_eq!(metadata.group_name(), group_name.as_bytes()); + let vec = metadata.to_vec(); + let metadata1 = TaskMetadata::from(vec.as_slice()); + assert_eq!(metadata1.override_priority(), priority); + assert_eq!(metadata1.group_name(), group_name.as_bytes()); + } + } + + #[test] + fn test_task_priority() { + use TaskPriority::*; + let cases = [ + (0, Medium), + (1, Low), + (7, Medium), + (8, Medium), + (15, High), + (16, High), + ]; + for (value, priority) in cases { + assert_eq!(TaskPriority::from(value), priority); + } + } +} diff --git a/components/tikv_util/src/yatp_pool/future_pool.rs b/components/tikv_util/src/yatp_pool/future_pool.rs index c6a34b2673b..75d65fe4641 100644 --- a/components/tikv_util/src/yatp_pool/future_pool.rs +++ b/components/tikv_util/src/yatp_pool/future_pool.rs @@ -17,13 +17,15 @@ use prometheus::{IntCounter, IntGauge}; use tracker::TrackedFuture; use yatp::{queue::Extras, task::future}; +use crate::resource_control::{priority_from_task_meta, TaskPriority}; + pub type ThreadPool = yatp::ThreadPool; use super::metrics; #[derive(Clone)] struct Env { - metrics_running_task_count: IntGauge, + metrics_running_task_count_by_priority: [IntGauge; TaskPriority::PRIORITY_COUNT], metrics_handled_task_count: IntCounter, } @@ -46,8 +48,9 @@ impl crate::AssertSync for FuturePool {} impl FuturePool { pub fn from_pool(pool: ThreadPool, name: &str, pool_size: usize, max_tasks: usize) -> Self { let env = Env { - metrics_running_task_count: metrics::FUTUREPOOL_RUNNING_TASK_VEC - .with_label_values(&[name]), + metrics_running_task_count_by_priority: TaskPriority::priorities().map(|p| { + metrics::FUTUREPOOL_RUNNING_TASK_VEC.with_label_values(&[name, p.as_str()]) + }), metrics_handled_task_count: metrics::FUTUREPOOL_HANDLED_TASK_VEC .with_label_values(&[name]), }; @@ -71,6 +74,16 @@ impl FuturePool { self.inner.scale_pool_size(thread_count) } + #[inline] + pub fn set_max_tasks_per_worker(&self, tasks_per_thread: usize) { + self.inner.set_max_tasks_per_worker(tasks_per_thread); + } + + #[inline] + pub fn get_max_tasks_count(&self) -> usize { + self.inner.max_tasks.load(Ordering::Relaxed) + } + /// Gets current running task count. #[inline] pub fn get_running_task_count(&self) -> usize { @@ -148,13 +161,25 @@ impl PoolInner { self.pool_size.store(thread_count, Ordering::Release); } + fn set_max_tasks_per_worker(&self, max_tasks_per_thread: usize) { + let max_tasks = self + .pool_size + .load(Ordering::Acquire) + .saturating_mul(max_tasks_per_thread); + self.max_tasks.store(max_tasks, Ordering::Release); + } + fn get_running_task_count(&self) -> usize { // As long as different future pool has different name prefix, we can safely use // the value in metrics. - self.env.metrics_running_task_count.get() as usize + self.env + .metrics_running_task_count_by_priority + .iter() + .map(|r| r.get()) + .sum::() as usize } - fn gate_spawn(&self) -> Result<(), Full> { + fn gate_spawn(&self, current_tasks: usize) -> Result<(), Full> { fail_point!("future_pool_spawn_full", |_| Err(Full { current_tasks: 100, max_tasks: 100, @@ -165,7 +190,6 @@ impl PoolInner { return Ok(()); } - let current_tasks = self.get_running_task_count(); if current_tasks >= max_tasks { Err(Full { current_tasks, @@ -181,9 +205,14 @@ impl PoolInner { F: Future + Send + 'static, { let metrics_handled_task_count = self.env.metrics_handled_task_count.clone(); - let metrics_running_task_count = self.env.metrics_running_task_count.clone(); + let task_priority = extras + .as_ref() + .map(|m| priority_from_task_meta(m.metadata())) + .unwrap_or(TaskPriority::Medium); + let metrics_running_task_count = + self.env.metrics_running_task_count_by_priority[task_priority as usize].clone(); - self.gate_spawn()?; + self.gate_spawn(metrics_running_task_count.get() as usize)?; metrics_running_task_count.inc(); @@ -210,9 +239,10 @@ impl PoolInner { F::Output: Send, { let metrics_handled_task_count = self.env.metrics_handled_task_count.clone(); - let metrics_running_task_count = self.env.metrics_running_task_count.clone(); + let metrics_running_task_count = + self.env.metrics_running_task_count_by_priority[TaskPriority::Medium as usize].clone(); - self.gate_spawn()?; + self.gate_spawn(metrics_running_task_count.get() as usize)?; let (tx, rx) = oneshot::channel(); metrics_running_task_count.inc(); diff --git a/components/tikv_util/src/yatp_pool/metrics.rs b/components/tikv_util/src/yatp_pool/metrics.rs index efb1379dcc7..a3e68b260db 100644 --- a/components/tikv_util/src/yatp_pool/metrics.rs +++ b/components/tikv_util/src/yatp_pool/metrics.rs @@ -7,7 +7,7 @@ lazy_static! { pub static ref FUTUREPOOL_RUNNING_TASK_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_futurepool_pending_task_total", "Current future_pool pending + running tasks.", - &["name"] + &["name", "priority"] ) .unwrap(); pub static ref FUTUREPOOL_HANDLED_TASK_VEC: IntCounterVec = register_int_counter_vec!( diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 3cb237bad15..0b4cffbdc14 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -17,6 +17,7 @@ use yatp::{ }; use crate::{ + resource_control::{priority_from_task_meta, TaskPriority}, thread_group::GroupProperties, time::{Duration, Instant}, timer::GLOBAL_TIMER_HANDLE, @@ -166,9 +167,7 @@ pub struct YatpPoolRunner { // Statistics about the schedule wait duration. // local histogram for high,medium,low priority tasks. - schedule_wait_durations: [LocalHistogram; 3], - // return the index of `schedule_wait_durations` from task metadata. - metric_idx_from_task_meta: Arc usize + Send + Sync>, + schedule_wait_durations: [LocalHistogram; TaskPriority::PRIORITY_COUNT], } impl Runner for YatpPoolRunner { @@ -193,7 +192,7 @@ impl Runner for YatpPoolRunner { fn handle(&mut self, local: &mut Local, mut task_cell: Self::TaskCell) -> bool { let extras = task_cell.mut_extras(); if let Some(schedule_time) = extras.schedule_time() { - let idx = (*self.metric_idx_from_task_meta)(extras.metadata()); + let idx = priority_from_task_meta(extras.metadata()) as usize; self.schedule_wait_durations[idx].observe(schedule_time.elapsed().as_secs_f64()); } let finished = self.inner.handle(local, task_cell); @@ -232,8 +231,7 @@ impl YatpPoolRunner { after_start: Option>, before_stop: Option>, before_pause: Option>, - schedule_wait_durations: [Histogram; 3], - metric_idx_from_task_meta: Arc usize + Send + Sync>, + schedule_wait_durations: [Histogram; TaskPriority::PRIORITY_COUNT], ) -> Self { YatpPoolRunner { inner, @@ -243,7 +241,6 @@ impl YatpPoolRunner { before_stop, before_pause, schedule_wait_durations: schedule_wait_durations.map(|m| m.local()), - metric_idx_from_task_meta, } } } @@ -356,8 +353,8 @@ impl YatpPoolBuilder { self } - pub fn enable_task_wait_metrics(mut self) -> Self { - self.enable_task_wait_metrics = true; + pub fn enable_task_wait_metrics(mut self, enable: bool) -> Self { + self.enable_task_wait_metrics = enable; self } @@ -506,15 +503,13 @@ impl YatpPoolBuilder { let before_stop = self.before_stop.take(); let before_pause = self.before_pause.take(); let schedule_wait_durations = if self.enable_task_wait_metrics { - ["high", "medium", "low"].map(|p| { - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[&name, p]) + TaskPriority::priorities().map(|p| { + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC + .with_label_values(&[&name, p.as_str()]) }) } else { std::array::from_fn(|_| Histogram::with_opts(HistogramOpts::new("_", "_")).unwrap()) }; - let metric_idx_from_task_meta = self - .metric_idx_from_task_meta - .unwrap_or_else(|| Arc::new(|_| 0)); let read_pool_runner = YatpPoolRunner::new( Default::default(), self.ticker.clone(), @@ -522,7 +517,6 @@ impl YatpPoolBuilder { before_stop, before_pause, schedule_wait_durations, - metric_idx_from_task_meta, ); (builder, read_pool_runner) } @@ -545,7 +539,7 @@ mod tests { let name = "test_record_schedule_wait_duration"; let pool = YatpPoolBuilder::new(DefaultTicker::default()) .name_prefix(name) - .enable_task_wait_metrics() + .enable_task_wait_metrics(true) .build_single_level_pool(); let (tx, rx) = mpsc::channel(); for _ in 0..3 { @@ -565,7 +559,7 @@ mod tests { // Drop the pool so the local metrics are flushed. drop(pool); let histogram = - metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name, "high"]); + metrics::YATP_POOL_SCHEDULE_WAIT_DURATION_VEC.with_label_values(&[name, "medium"]); assert_eq!(histogram.get_sample_count() as u32, 6, "{:?}", histogram); } diff --git a/src/config/mod.rs b/src/config/mod.rs index a862d01ace4..e5df8c3e153 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -2234,7 +2234,6 @@ pub struct UnifiedReadPoolConfig { pub max_thread_count: usize, #[online_config(skip)] pub stack_size: ReadableSize, - #[online_config(skip)] pub max_tasks_per_worker: usize, pub auto_adjust_pool_size: bool, // FIXME: Add more configs when they are effective in yatp diff --git a/src/read_pool.rs b/src/read_pool.rs index 22a11cb2b41..111d3f0ce8a 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -17,10 +17,11 @@ use kvproto::{errorpb, kvrpcpb::CommandPri}; use online_config::{ConfigChange, ConfigManager, ConfigValue, Result as CfgResult}; use prometheus::{core::Metric, Histogram, IntCounter, IntGauge}; use resource_control::{ - with_resource_limiter, ControlledFuture, ResourceController, ResourceLimiter, TaskMetadata, + with_resource_limiter, ControlledFuture, ResourceController, ResourceLimiter, }; use thiserror::Error; use tikv_util::{ + resource_control::TaskMetadata, sys::{cpu_time::ProcessStat, SysQuota}, time::Instant, worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, @@ -238,6 +239,17 @@ impl ReadPoolHandle { } } + pub fn set_max_tasks_per_worker(&self, tasks_per_thread: usize) { + match self { + ReadPoolHandle::FuturePools { .. } => { + unreachable!() + } + ReadPoolHandle::Yatp { remote, .. } => { + remote.set_max_tasks_per_worker(tasks_per_thread); + } + } + } + pub fn get_ewma_time_slice(&self) -> Option { match self { ReadPoolHandle::FuturePools { .. } => None, @@ -401,7 +413,7 @@ pub fn build_yatp_read_pool( engine: E, resource_ctl: Option>, cleanup_method: CleanupMethod, - metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, + enable_task_wait_metrics: bool, ) -> ReadPool { let unified_read_pool_name = get_unified_read_pool_name(); build_yatp_read_pool_with_name( @@ -411,7 +423,7 @@ pub fn build_yatp_read_pool( resource_ctl, cleanup_method, unified_read_pool_name, - metric_idx_from_task_meta_fn, + enable_task_wait_metrics, ) } @@ -422,10 +434,10 @@ pub fn build_yatp_read_pool_with_name( resource_ctl: Option>, cleanup_method: CleanupMethod, unified_read_pool_name: String, - metric_idx_from_task_meta_fn: Option usize + Send + Sync + 'static>>, + enable_task_wait_metrics: bool, ) -> ReadPool { let raftkv = Arc::new(Mutex::new(engine)); - let mut builder = YatpPoolBuilder::new(ReporterTicker { reporter }) + let builder = YatpPoolBuilder::new(ReporterTicker { reporter }) .name_prefix(&unified_read_pool_name) .cleanup_method(cleanup_method) .stack_size(config.stack_size.0 as usize) @@ -452,12 +464,8 @@ pub fn build_yatp_read_pool_with_name( }) .before_stop(|| unsafe { destroy_tls_engine::(); - }); - if let Some(metric_idx_from_task_meta_fn) = metric_idx_from_task_meta_fn { - builder = builder - .enable_task_wait_metrics() - .metric_idx_from_task_meta(metric_idx_from_task_meta_fn); - } + }) + .enable_task_wait_metrics(enable_task_wait_metrics); let pool = if let Some(ref r) = resource_ctl { builder.build_priority_future_pool(r.clone()) @@ -563,6 +571,9 @@ impl Runnable for ReadPoolConfigRunner { self.cur_thread_count = self.core_thread_count; } } + Task::MaxTasksPerWorker(s) => { + self.handle.set_max_tasks_per_worker(s); + } } } } @@ -647,6 +658,7 @@ impl ReadPoolConfigRunner { enum Task { PoolSize(usize), AutoAdjust(bool), + MaxTasksPerWorker(usize), } impl std::fmt::Display for Task { @@ -654,6 +666,7 @@ impl std::fmt::Display for Task { match self { Task::PoolSize(s) => write!(f, "PoolSize({})", *s), Task::AutoAdjust(s) => write!(f, "AutoAdjust({})", *s), + Task::MaxTasksPerWorker(s) => write!(f, "MaxTasksPerWorker({})", *s), } } } @@ -706,6 +719,10 @@ impl ConfigManager for ReadPoolConfigManager { if let Some(ConfigValue::Bool(b)) = unified.get("auto_adjust_pool_size") { self.scheduler.schedule(Task::AutoAdjust(*b))?; } + if let Some(ConfigValue::Usize(max_tasks)) = unified.get("max_tasks_per_worker") { + self.scheduler + .schedule(Task::MaxTasksPerWorker(*max_tasks))?; + } } info!( "readpool config changed"; @@ -745,6 +762,8 @@ mod tests { use std::{thread, time::Duration}; use futures::channel::oneshot; + use futures_executor::block_on; + use kvproto::kvrpcpb::ResourceControlContext; use raftstore::store::{ReadStats, WriteStats}; use resource_control::ResourceGroupManager; @@ -778,7 +797,7 @@ mod tests { None, CleanupMethod::InPlace, name.to_owned(), - None, + false, ); let gen_task = || { @@ -838,7 +857,7 @@ mod tests { engine, None, CleanupMethod::InPlace, - None, + false, ); let gen_task = || { @@ -891,7 +910,7 @@ mod tests { max_tasks_per_worker: 1, ..Default::default() }; - // max running tasks number should be 2*1 = 2 + // max running tasks number for each priority should be 2*1 = 2 let engine = TestEngineBuilder::new().build().unwrap(); let pool = build_yatp_read_pool( @@ -900,7 +919,7 @@ mod tests { engine, None, CleanupMethod::InPlace, - None, + false, ); let gen_task = || { @@ -931,6 +950,15 @@ mod tests { _ => panic!("should return full error"), } + // spawn a high-priority task, should not return Full error. + let (task_high, tx_h) = gen_task(); + let mut ctx = ResourceControlContext::default(); + ctx.override_priority = 16; // high priority + let metadata = TaskMetadata::from_ctx(&ctx); + let f = handle.spawn_handle(task_high, CommandPri::Normal, 6, metadata, None); + tx_h.send(()).unwrap(); + block_on(f).unwrap(); + tx1.send(()).unwrap(); tx2.send(()).unwrap(); thread::sleep(Duration::from_millis(300)); @@ -1027,7 +1055,7 @@ mod tests { resource_manager, CleanupMethod::InPlace, name.clone(), - None, + false, ); let gen_task = || { diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index 8674a581c72..c6d7b477db0 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -13,8 +13,7 @@ use pd_client::{Feature, FeatureGate}; use prometheus::local::*; use raftstore::store::WriteStats; use resource_control::{ - priority_from_task_meta, with_resource_limiter, ControlledFuture, ResourceController, - ResourceGroupManager, TaskMetadata, + with_resource_limiter, ControlledFuture, ResourceController, ResourceGroupManager, TaskMetadata, }; use tikv_util::{ sys::SysQuota, @@ -194,8 +193,7 @@ impl SchedPool { destroy_tls_engine::(); tls_flush(&reporter); }) - .enable_task_wait_metrics() - .metric_idx_from_task_meta(Arc::new(priority_from_task_meta)) + .enable_task_wait_metrics(true) }; let vanilla = VanillaQueue { worker_pool: builder(pool_size, "sched-worker-pool").build_future_pool(), From bc971dd5afa11ad6bd3a1641972e6b321636d6dc Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 29 Nov 2023 15:41:49 +0800 Subject: [PATCH 1036/1149] metrics: generate Grafana dashboards with python (#15857) close tikv/tikv#12796 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 1 + metrics/grafana/README.md | 11 + metrics/grafana/common.py | 1087 + metrics/grafana/tikv_details.dashboard.py | 8602 ++++ metrics/grafana/tikv_details.json | 47480 +++++++++++++------- metrics/grafana/tikv_details.json.sha256 | 1 + scripts/check-dashboards | 14 + scripts/gen-tikv-details-dashboard | 29 + 8 files changed, 42190 insertions(+), 15035 deletions(-) create mode 100644 metrics/grafana/README.md create mode 100644 metrics/grafana/common.py create mode 100644 metrics/grafana/tikv_details.dashboard.py create mode 100644 metrics/grafana/tikv_details.json.sha256 create mode 100755 scripts/check-dashboards create mode 100755 scripts/gen-tikv-details-dashboard diff --git a/Makefile b/Makefile index 632794f3208..d7b0940fd5c 100644 --- a/Makefile +++ b/Makefile @@ -360,6 +360,7 @@ pre-clippy: unset-override clippy: pre-clippy @./scripts/check-redact-log @./scripts/check-log-style + @./scripts/check-dashboards @./scripts/check-docker-build @./scripts/check-license @./scripts/clippy-all diff --git a/metrics/grafana/README.md b/metrics/grafana/README.md new file mode 100644 index 00000000000..dec76a67529 --- /dev/null +++ b/metrics/grafana/README.md @@ -0,0 +1,11 @@ +# TiKV Grafana Dashboard + +The "TiKV Details" dashboard is generated by the `tikv_details.dashboard.py` +Python script. + +## Updating the Dashboard + +To add or update panels on the dashboard, make your changes in +`tikv_details.dashboard.py` and then run `./scripts/gen-tikv-details-dashboard`. + +Please avoid manually modifying `tikv_details.json`. diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py new file mode 100644 index 00000000000..cb6757bee93 --- /dev/null +++ b/metrics/grafana/common.py @@ -0,0 +1,1087 @@ +from typing import Optional, Union + +import attr +from attr.validators import in_, instance_of +from grafanalib import formatunits as UNITS +from grafanalib.core import ( + NULL_AS_ZERO, + TIME_SERIES_TARGET_FORMAT, + DataSourceInput, + Graph, + GraphThreshold, + GridPos, + Heatmap, + HeatmapColor, + Legend, + Panel, + RowPanel, + SeriesOverride, + Stat, + StatValueMappings, + Target, + Template, + TimeSeries, + Tooltip, + YAxes, + YAxis, +) + +DATASOURCE_INPUT = DataSourceInput( + name="DS_TEST-CLUSTER", + label="test-cluster", + pluginId="prometheus", + pluginName="Prometheus", +) +DATASOURCE = f"${{{DATASOURCE_INPUT.name}}}" + + +@attr.s +class Expr(object): + """ + A prometheus expression that matches the following grammar: + + expr ::= ( + [aggr_param,] + [func]( + + [{,}] + [[]] + ) + ) [by (,)] [extra_expr] + """ + + metric: str = attr.ib(validator=instance_of(str)) + aggr_op: str = attr.ib( + default="", + validator=in_( + [ + "", + "sum", + "min", + "max", + "avg", + "group", + "stddev", + "stdvar", + "count", + "count_values", + "bottomk", + "topk", + "quantile", + ] + ), + ) + aggr_param: str = attr.ib(default="", validator=instance_of(str)) + func: str = attr.ib(default="", validator=instance_of(str)) + range_selector: str = attr.ib(default="", validator=instance_of(str)) + label_selectors: list[str] = attr.ib(default=[], validator=instance_of(list)) + by_labels: list[str] = attr.ib(default=[], validator=instance_of(list)) + default_label_selectors: list[str] = attr.ib( + default=[ + r'k8s_cluster="$k8s_cluster"', + r'tidb_cluster="$tidb_cluster"', + r'instance=~"$instance"', + ], + validator=instance_of(list), + ) + skip_default_instance: bool = attr.ib(default=False, validator=instance_of(bool)) + extra_expr: str = attr.ib(default="", validator=instance_of(str)) + + def __str__(self) -> str: + aggr_opeator = self.aggr_op if self.aggr_op else "" + aggr_param = self.aggr_param + "," if self.aggr_param else "" + by_clause = ( + "by ({})".format(", ".join(self.by_labels)) if self.by_labels else "" + ) + func = self.func if self.func else "" + label_selectors = self.default_label_selectors + self.label_selectors + if self.skip_default_instance: + # Remove instance=~"$instance" + label_selectors = [l for l in label_selectors if "$instance" not in l] + assert all( + ("=" in item or "~" in item) for item in label_selectors + ), f"Not all items contain '=' or '~', invalid {self.label_selectors}" + instant_selectors = ( + "{{{}}}".format(",".join(label_selectors)) if label_selectors else "" + ) + range_selector = f"[{self.range_selector}]" if self.range_selector else "" + extra_expr = self.extra_expr if self.extra_expr else "" + return f"""{aggr_opeator}({aggr_param}{func}( + {self.metric} + {instant_selectors} + {range_selector} +)) {by_clause} {extra_expr}""" + + def aggregate( + self, + aggr_op: str, + aggr_param: str = "", + by_labels: list[str] = [], + label_selectors: list[str] = [], + ) -> "Expr": + self.aggr_op = aggr_op + self.aggr_param = aggr_param + self.by_labels = by_labels + self.label_selectors = label_selectors + return self + + def function( + self, + func: str, + label_selectors: list[str] = [], + range_selector: str = "", + ) -> "Expr": + self.func = func + self.label_selectors = label_selectors + self.range_selector = range_selector + return self + + def extra( + self, + extra_expr: Optional[str] = None, + default_label_selectors: Optional[list[str]] = None, + ) -> "Expr": + if extra_expr is not None: + self.extra_expr = extra_expr + if default_label_selectors is not None: + self.default_label_selectors = default_label_selectors + return self + + def skip_default_instance_selector(self) -> "Expr": + self.skip_default_instance = True + return self + + +def expr_aggr( + metric: str, + aggr_op: str, + aggr_param: str = "", + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the aggregation of a metric. + + Example: + + sum(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + expr = Expr(metric=metric) + expr.aggregate( + aggr_op, + aggr_param=aggr_param, + by_labels=by_labels, + label_selectors=label_selectors, + ) + return expr + + +def expr_sum( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of a metric. + + Example: + + sum(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "sum", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_avg( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the avg of a metric. + + Example: + + avg(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "avg", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_max( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the max of a metric. + + Example: + + max(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "max", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_min( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the min of a metric. + + Example: + + min(( + tikv_store_size_bytes + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + )) by (instance) + """ + return expr_aggr( + metric, "min", label_selectors=label_selectors, by_labels=by_labels + ) + + +def expr_aggr_func( + metric: str, + aggr_op: str, + func: str, + aggr_param: str = "", + label_selectors: list[str] = [], + range_selector: str = "", + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the aggregation of function of a metric. + + Example: + + expr_aggr_func( + tikv_grpc_msg_duration_seconds_count, "sum", "rate", lables_selectors=['type!="kv_gc"'] + ) + + sum(rate( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + expr = Expr(metric=metric) + expr.aggregate( + aggr_op, + aggr_param=aggr_param, + by_labels=by_labels, + ) + expr.function( + func, + label_selectors=label_selectors, + range_selector=range_selector, + ) + return expr + + +def expr_sum_rate( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of rate of a metric. + + Example: + + sum(rate( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + # $__rate_interval is a Grafana variable that is specialized for Prometheus + # rate and increase function. + # See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func="rate", + label_selectors=label_selectors, + range_selector="$__rate_interval", + by_labels=by_labels, + ) + + +def expr_sum_delta( + metric: str, + label_selectors: list[str] = [], + range_selector: str = "$__rate_interval", + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of delta of a metric. + + Example: + + sum(delta( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func="delta", + label_selectors=label_selectors, + range_selector=range_selector, + by_labels=by_labels, + ) + + +def expr_sum_increase( + metric: str, + label_selectors: list[str] = [], + range_selector: str = "$__rate_interval", + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of increase of a metric. + + Example: + + sum(increase( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (instance) + """ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func="increase", + label_selectors=label_selectors, + range_selector=range_selector, + by_labels=by_labels, + ) + + +def expr_sum_aggr_over_time( + metric: str, + aggr: str, + range_selector: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the sum of average value of all points in the specified interval of a metric. + + Example: + + sum(avg_over_time( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [1m] + )) by (instance) + """ + return expr_aggr_func( + metric=metric, + aggr_op="sum", + func=f"{aggr}_over_time", + label_selectors=label_selectors, + range_selector=range_selector, + by_labels=by_labels, + ) + + +def expr_max_rate( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the max of rate of a metric. + + Example: + + max(rate( + tikv_thread_voluntary_context_switches + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (name) + """ + # $__rate_interval is a Grafana variable that is specialized for Prometheus + # rate and increase function. + # See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ + return expr_aggr_func( + metric=metric, + aggr_op="max", + func="rate", + label_selectors=label_selectors, + range_selector="$__rate_interval", + by_labels=by_labels, + ) + + +def expr_count_rate( + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> Expr: + """ + Calculate the count of rate of a metric. + + Example: + + count(rate( + tikv_thread_cpu_seconds_total + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",name=~"sst_.*"} + [$__rate_interval] + )) by (instance) + """ + # $__rate_interval is a Grafana variable that is specialized for Prometheus + # rate and increase function. + # See https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ + return expr_aggr_func( + metric=metric, + aggr_op="count", + func="rate", + label_selectors=label_selectors, + range_selector="$__rate_interval", + by_labels=by_labels, + ) + + +def expr_simple( + metric: str, + label_selectors: list[str] = [], +) -> Expr: + """ + Query an instant vector of a metric. + + Example: + + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + """ + expr = Expr(metric=metric) + expr.function("", label_selectors=label_selectors) + return expr + + +def expr_operator(lhs: Union[Expr, str], operator: str, rhs: Union[Expr, str]) -> str: + return f"""({lhs} {operator} {rhs})""" + + +def expr_histogram_quantile( + quantile: float, + metrics: str, + label_selectors: list[str] = [], + by_labels: list[str] = [], +) -> Expr: + """ + Query a quantile of a histogram metric. + + Example: + + histogram_quantile(0.99, sum(rate( + tikv_grpc_msg_duration_seconds_bucket + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance",type!="kv_gc"} + [$__rate_interval] + )) by (le)) + """ + # sum(rate(metrics_bucket{label_selectors}[$__rate_interval])) by (le) + assert not metrics.endswith( + "_bucket" + ), f"'{metrics}' should not specify '_bucket' suffix manually" + by_labels = list(filter(lambda label: label != "le", by_labels)) + sum_rate_of_buckets = expr_sum_rate( + metrics + "_bucket", + label_selectors=label_selectors, + by_labels=by_labels + ["le"], + ) + # histogram_quantile({quantile}, {sum_rate_of_buckets}) + return expr_aggr( + metric=f"{sum_rate_of_buckets}", + aggr_op="histogram_quantile", + aggr_param=f"{quantile}", + label_selectors=[], + by_labels=[], + ).extra( + # Do not attach default label selector again. + default_label_selectors=[] + ) + + +def expr_topk( + k: int, + metrics: str, +) -> Expr: + """ + Query topk of a metric. + + Example: + + topk(20, tikv_thread_voluntary_context_switches) + """ + # topk({k}, {metric}) + return expr_aggr( + metric=metrics, + aggr_op="topk", + aggr_param=f"{k}", + label_selectors=[], + by_labels=[], + ).extra( + # Do not attach default label selector again. + default_label_selectors=[] + ) + + +def expr_histogram_avg( + metrics: str, + label_selectors: list[str] = [], + by_labels: list[str] = ["instance"], +) -> str: + """ + Query the avg of a histogram metric. + + Example: + + sum(rate( + tikv_grpc_msg_duration_seconds_sum + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance"} + [$__rate_interval] + )) / sum(rate( + tikv_grpc_msg_duration_seconds_count + {k8s_cluster="$k8s_cluster",tidb_cluster="$tidb_cluster",instance=~"$instance"} + [$__rate_interval] + )) + """ + for suffix in ["_bucket", "_count", "_sum"]: + assert not metrics.endswith( + suffix + ), f"'{metrics}' should not specify '{suffix}' suffix manually" + + return expr_operator( + expr_sum_rate( + metrics + "_sum", + label_selectors=label_selectors, + by_labels=by_labels, + ), + "/", + expr_sum_rate( + metrics + "_count", + label_selectors=label_selectors, + by_labels=by_labels, + ), + ) + + +def target( + expr: Union[Expr, str], + legend_format: Optional[str] = None, + hide=False, + data_source=DATASOURCE, + interval_factor=1, # Prefer "high" resolution +) -> Target: + if legend_format is None and isinstance(expr, Expr) and expr.by_labels: + legend_format = "-".join(map(lambda x: "{{" + f"{x}" + "}}", expr.by_labels)) + return Target( + expr=f"{expr}", + hide=hide, + legendFormat=legend_format, + intervalFactor=interval_factor, + datasource=data_source, + ) + + +def template( + name, + query, + data_source, + hide, + regex=None, + multi=False, + include_all=False, + all_value=None, +) -> Template: + return Template( + dataSource=data_source, + hide=hide, + label=name, + multi=multi, + name=name, + query=query, + refresh=2, + sort=1, + type="query", + useTags=False, + regex=regex, + includeAll=include_all, + allValue=all_value, + ) + + +class Layout: + # Rows are always 24 "units" wide. + ROW_WIDTH = 24 + PANEL_HEIGHT = 7 + row_panel: RowPanel + current_row_y_pos: int + current_row_x_pos: int + + def __init__(self, title, collapsed=True, repeat: Optional[str] = None) -> None: + extraJson = None + if repeat: + extraJson = {"repeat": repeat} + title = f"{title} - ${repeat}" + self.current_row_y_pos = 0 + self.current_row_x_pos = 0 + self.row_panel = RowPanel( + title=title, + gridPos=GridPos(h=self.PANEL_HEIGHT, w=self.ROW_WIDTH, x=0, y=0), + collapsed=collapsed, + extraJson=extraJson, + ) + + def row(self, panels: list[Panel], width: int = ROW_WIDTH): + """Start a new row and evenly scales panels width""" + count = len(panels) + if count == 0: + return panels + width = width // count + remain = self.ROW_WIDTH % count + x = self.current_row_x_pos % self.ROW_WIDTH + for panel in panels: + panel.gridPos = GridPos( + h=self.PANEL_HEIGHT, + w=width, + x=x, + y=self.current_row_y_pos, + ) + x += width + panels[-1].gridPos.w += remain + self.row_panel.panels.extend(panels) + self.current_row_y_pos += self.PANEL_HEIGHT + self.current_row_x_pos = x + + def half_row(self, panels: list[Panel]): + self.row(panels, self.ROW_WIDTH // 2) + + +def timeseries_panel( + title, + targets, + legend_calcs=["max", "last"], + unit="s", + draw_style="line", + line_width=1, + fill_opacity=10, + gradient_mode="opacity", + tooltip_mode="multi", + legend_display_mode="table", + legend_placement="right", + description=None, + data_source=DATASOURCE, +) -> TimeSeries: + return TimeSeries( + title=title, + dataSource=data_source, + description=description, + targets=targets, + legendCalcs=legend_calcs, + drawStyle=draw_style, + lineWidth=line_width, + fillOpacity=fill_opacity, + gradientMode=gradient_mode, + unit=unit, + tooltipMode=tooltip_mode, + legendDisplayMode=legend_display_mode, + legendPlacement=legend_placement, + ) + + +def yaxis(format: str, log_base=1) -> YAxis: + assert format not in [ + UNITS.BYTES, + UNITS.BITS, + UNITS.KILO_BYTES, + UNITS.MEGA_BYTES, + UNITS.GIGA_BYTES, + UNITS.TERA_BYTES, + UNITS.PETA_BYTES, + UNITS.BYTES_SEC, + UNITS.KILO_BYTES_SEC, + UNITS.MEGA_BYTES_SEC, + UNITS.GIGA_BYTES_SEC, + UNITS.TERA_BYTES_SEC, + UNITS.PETA_BYTES_SEC, + ], "Must not use SI bytes" + return YAxis(format=format, logBase=log_base) + + +def yaxes(left_format: str, right_format: Optional[str] = None, log_base=1) -> YAxes: + ya = YAxes(left=yaxis(left_format, log_base=log_base)) + if right_format is not None: + ya.right = yaxis(right_format, log_base=log_base) + return ya + + +def graph_legend( + avg=False, + current=True, + max=True, + min=False, + show=True, + total=False, + align_as_table=True, + hide_empty=True, + hide_zero=True, + right_side=True, + side_width=None, + sort_desc=True, +) -> Legend: + sort = "max" if max else "current" + return Legend( + avg=avg, + current=current, + max=max, + min=min, + show=show, + total=total, + alignAsTable=align_as_table, + hideEmpty=hide_empty, + hideZero=hide_zero, + rightSide=right_side, + sideWidth=side_width, + sort=sort, + sortDesc=sort_desc, + ) + + +def graph_panel( + title: str, + targets: list[Target], + description=None, + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + legend=None, + tooltip=Tooltip(shared=True, valueType="individual"), + lines=True, + line_width=1, + fill=1, + fill_gradient=1, + stack=False, + thresholds: list[GraphThreshold] = [], + series_overrides: list[SeriesOverride] = [], + data_source=DATASOURCE, + null_point_mode=NULL_AS_ZERO, +) -> Panel: + # extraJson add patches grafanalib result. + extraJson = {} + if fill_gradient != 0: + # fillGradient is only valid when fill is 1. + if fill == 0: + fill = 1 + # fillGradient is not set correctly in grafanalib(0.7.0), so we need to + # set it manually. + # TODO: remove it when grafanalib fix this. + extraJson["fillGradient"] = 1 + for target in targets: + # Make sure target is in time_series format. + target.format = TIME_SERIES_TARGET_FORMAT + + return Graph( + title=title, + dataSource=data_source, + description=description, + targets=targets, + yAxes=yaxes, + legend=legend if legend else graph_legend(), + lines=lines, + bars=not lines, + lineWidth=line_width, + fill=fill, + fillGradient=fill_gradient, + stack=stack, + nullPointMode=null_point_mode, + thresholds=thresholds, + tooltip=tooltip, + seriesOverrides=series_overrides, + # Do not specify max max data points, let Grafana decide. + maxDataPoints=None, + extraJson=extraJson, + ) + + +def series_override( + alias: str, + bars: bool = False, + lines: bool = True, + yaxis: int = 1, + fill: int = 1, + zindex: int = 0, + dashes: Optional[bool] = None, + dash_length: Optional[int] = None, + space_length: Optional[int] = None, + transform_negative_y: bool = False, +) -> SeriesOverride: + class SeriesOverridePatch(SeriesOverride): + dashes_override: Optional[bool] + dash_length_override: Optional[int] + space_length_override: Optional[int] + transform_negative_y: bool + + def __init__(self, *args, **kwargs) -> None: + self.dashes_override = kwargs["dashes"] + if self.dashes_override is None: + del kwargs["dashes"] + self.dash_length_override = kwargs["dashLength"] + if self.dash_length_override is None: + del kwargs["dashLength"] + self.space_length_override = kwargs["spaceLength"] + if self.space_length_override is None: + del kwargs["spaceLength"] + self.transform_negative_y = kwargs["transform_negative_y"] + del kwargs["transform_negative_y"] + super().__init__(*args, **kwargs) + + def to_json_data(self): + data = super().to_json_data() + # The default 'null' color makes it transparent, remove it. + del data["color"] + # The default 'null' makes it a transparent line, remove it. + if self.dashes_override is None: + del data["dashes"] + if self.dash_length_override is None: + del data["dashLength"] + if self.space_length_override is None: + del data["spaceLength"] + # Add missing transform. + if self.transform_negative_y: + data["transform"] = "negative-Y" + return data + + return SeriesOverridePatch( + alias=alias, + bars=bars, + lines=lines, + yaxis=yaxis, + fill=fill, + zindex=zindex, + dashes=dashes, + dashLength=dash_length, + spaceLength=space_length, + transform_negative_y=transform_negative_y, + ) + + +def heatmap_color() -> HeatmapColor: + return HeatmapColor( + cardColor="#b4ff00", + colorScale="sqrt", + colorScheme="interpolateSpectral", + exponent=0.5, + mode="spectrum", + max=None, + min=None, + ) + + +def heatmap_panel( + title: str, + metric: str, + description=None, + label_selectors: list[str] = [], + yaxis=yaxis(UNITS.NO_FORMAT), + tooltip=Tooltip(shared=True, valueType="individual"), + color=heatmap_color(), + decimals=1, + data_source=DATASOURCE, +) -> Panel: + assert metric.endswith( + "_bucket" + ), f"'{metric}' should be a histogram metric with '_bucket' suffix" + t = target( + expr=expr_sum_rate(metric, label_selectors=label_selectors, by_labels=["le"]), + ) + # Make sure targets are in heatmap format. + t.format = "heatmap" + # Heatmap target legendFormat should be "{{le}}" + t.legendFormat = "{{le}}" + # Overrides yaxis decimal places. + yaxis.decimals = decimals + return Heatmap( + title=title, + dataSource=data_source, + description=description, + targets=[t], + yAxis=yaxis, + color=color, + dataFormat="tsbuckets", + yBucketBound="upper", + tooltip=tooltip, + extraJson={"tooltip": {"showHistogram": True}}, + hideZeroBuckets=True, + # Limit data points, because too many data points slows browser when + # the resolution is too high. + # See: https://grafana.com/blog/2020/06/23/how-to-visualize-prometheus-histograms-in-grafana/ + maxDataPoints=512, + ) + + +def stat_panel( + title: str, + targets: list[Target], + description=None, + format=UNITS.NONE_FORMAT, + graph_mode="none", + decimals: Optional[int] = None, + mappings: Optional[StatValueMappings] = None, + text_mode: str = "auto", + data_source=DATASOURCE, +) -> Panel: + for target in targets: + # Make sure target is in time_series format. + target.format = TIME_SERIES_TARGET_FORMAT + return Stat( + title=title, + dataSource=data_source, + description=description, + targets=targets, + format=format, + graphMode=graph_mode, + reduceCalc="lastNotNull", + decimals=decimals, + mappings=mappings, + textMode=text_mode, + ) + + +def graph_panel_histogram_quantiles( + title: str, + description: str, + yaxes: YAxes, + metric: str, + label_selectors: list[str] = [], + by_labels: list[str] = [], + hide_avg=False, + hide_count=False, +) -> Panel: + """ + Return a graph panel that shows histogram quantiles of a metric. + + Targets: + - 99.99% quantile + - 99% quantile + - avg + - count + """ + + def legend(prefix, labels): + if not labels: + return prefix + else: + return "-".join([prefix] + ["{{%s}}" % lb for lb in labels]) + + return graph_panel( + title=title, + description=description, + yaxes=yaxes, + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + f"{metric}", + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("99.99%", by_labels), + ), + target( + expr=expr_histogram_quantile( + 0.99, + f"{metric}", + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("99%", by_labels), + ), + target( + expr=expr_histogram_avg( + metric, + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("avg", by_labels), + hide=hide_avg, + ), + target( + expr=expr_sum_rate( + f"{metric}_count", + label_selectors=label_selectors, + by_labels=by_labels, + ), + legend_format=legend("count", by_labels), + hide=hide_count, + ), + ], + series_overrides=[ + series_override( + alias="count", + fill=2, + yaxis=2, + zindex=-3, + dashes=True, + dash_length=1, + space_length=1, + transform_negative_y=True, + ), + series_override( + alias="avg", + fill=7, + ), + ], + ) + + +def heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title: str, + heatmap_description: str, + graph_title: str, + graph_description: str, + yaxis_format: str, + metric: str, + label_selectors=[], + graph_by_labels=[], +) -> list[Panel]: + return [ + heatmap_panel( + title=heatmap_title, + description=heatmap_description, + yaxis=yaxis(format=yaxis_format), + metric=f"{metric}_bucket", + label_selectors=label_selectors, + ), + graph_panel_histogram_quantiles( + title=graph_title, + description=graph_description, + metric=f"{metric}", + yaxes=yaxes(left_format=yaxis_format), + by_labels=graph_by_labels, + hide_count=True, + ), + ] diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py new file mode 100644 index 00000000000..ade81f717fd --- /dev/null +++ b/metrics/grafana/tikv_details.dashboard.py @@ -0,0 +1,8602 @@ +import os +import sys + +sys.path.append(os.path.dirname(__file__)) + +from common import ( + DATASOURCE, + DATASOURCE_INPUT, + Layout, + expr_avg, + expr_count_rate, + expr_histogram_avg, + expr_histogram_quantile, + expr_max, + expr_max_rate, + expr_min, + expr_operator, + expr_simple, + expr_sum, + expr_sum_aggr_over_time, + expr_sum_delta, + expr_sum_increase, + expr_sum_rate, + expr_topk, + graph_legend, + graph_panel, + graph_panel_histogram_quantiles, + heatmap_panel, + heatmap_panel_graph_panel_histogram_quantile_pairs, + series_override, + stat_panel, + target, + template, + yaxes, + yaxis, +) +from grafanalib import formatunits as UNITS +from grafanalib.core import ( + GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, + HIDE_VARIABLE, + NULL_AS_NULL, + SHOW, + Dashboard, + GraphThreshold, + RowPanel, + StatValueMappingItem, + StatValueMappings, + Templating, +) + +#### Metrics Definition Start #### + + +def Templates() -> Templating: + return Templating( + list=[ + template( + name="k8s_cluster", + query="label_values(tikv_engine_block_cache_size_bytes, k8s_cluster)", + data_source=DATASOURCE, + hide=HIDE_VARIABLE, + ), + template( + name="tidb_cluster", + query='label_values(tikv_engine_block_cache_size_bytes{k8s_cluster ="$k8s_cluster"}, tidb_cluster)', + data_source=DATASOURCE, + hide=HIDE_VARIABLE, + ), + template( + name="db", + query='label_values(tikv_engine_block_cache_size_bytes{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"}, db)', + data_source=DATASOURCE, + hide=SHOW, + multi=True, + include_all=True, + ), + template( + name="command", + query='query_result(tikv_storage_command_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"} != 0)', + data_source=DATASOURCE, + hide=SHOW, + regex='/\\btype="([^"]+)"/', + multi=True, + include_all=True, + ), + template( + name="instance", + query='label_values(tikv_engine_size_bytes{k8s_cluster ="$k8s_cluster", tidb_cluster="$tidb_cluster"}, instance)', + data_source=DATASOURCE, + hide=SHOW, + include_all=True, + all_value=".*", + ), + template( + name="titan_db", + query='label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"}, db)', + data_source=DATASOURCE, + hide=HIDE_VARIABLE, + multi=True, + include_all=True, + ), + ] + ) + + +def Duration() -> RowPanel: + layout = Layout(title="Duration") + layout.row( + [ + graph_panel( + title="Write Pipeline Duration", + description="Write Pipeline Composition", + yaxes=yaxes(left_format=UNITS.SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_append_log_duration_seconds" + ), + legend_format="Write Raft Log .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_request_wait_time_duration_secs", + ), + legend_format="Propose Wait .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_apply_wait_time_duration_secs" + ), + legend_format="Apply Wait .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_commit_log_duration_seconds" + ), + legend_format="Replicate Raft Log .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, "tikv_raftstore_apply_log_duration_seconds" + ), + legend_format="Apply Duration .99", + ), + ], + ), + graph_panel( + title="Cop Read Duration", + description="Read Duration Composition", + yaxes=yaxes(left_format=UNITS.SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_storage_engine_async_request_duration_seconds", + ['type="snapshot"'], + ), + legend_format="Get Snapshot .99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_coprocessor_request_wait_seconds", + ['type="all"'], + ), + legend_format="Cop Wait .99", + ), + target( + expr=expr_histogram_quantile( + 0.95, "tikv_coprocessor_request_handle_seconds" + ), + legend_format="Cop Handle .99", + ), + ], + ), + ] + ) + return layout.row_panel + + +def Cluster() -> RowPanel: + layout = Layout(title="Cluster") + layout.row( + [ + graph_panel( + title="Store size", + description="The storage size per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + fill=1, + stack=True, + legend=graph_legend(max=False), + targets=[ + target( + expr=expr_sum( + "tikv_store_size_bytes", + label_selectors=['type = "used"'], + ), + ), + ], + ), + graph_panel( + title="Available size", + description="The available capacity size of each TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + fill=1, + stack=True, + legend=graph_legend(max=False), + targets=[ + target( + expr=expr_sum( + "tikv_store_size_bytes", + label_selectors=['type="available"'], + ), + ), + ], + ), + graph_panel( + title="Capacity size", + description="The capacity size per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + fill=1, + stack=True, + legend=graph_legend(max=False), + targets=[ + target( + expr=expr_sum( + "tikv_store_size_bytes", + label_selectors=['type="capacity"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="CPU", + description="The CPU usage of each TiKV instance", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "process_cpu_seconds_total", + ), + ), + ], + ), + graph_panel( + title="Memory", + description="The memory usage per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum("process_resident_memory_bytes"), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="IO utilization", + description="The I/O utilization per TiKV instance", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "node_disk_io_time_seconds_total", + ), + legend_format=r"{{instance}}-{{device}}", + ), + ], + ), + graph_panel( + title="MBps", + description="The total bytes of read and write in each TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=['type="wal_file_bytes"'], + ), + legend_format=r"{{instance}}-write", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=['type=~"bytes_read|iter_bytes_read"'], + ), + legend_format=r"{{instance}}-read", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="QPS", + description="The number of leaders on each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_duration_seconds_count", + label_selectors=['type!="kv_gc"'], + ), + legend_format=r"{{instance}}-{{type}}", + ), + ], + ), + graph_panel( + title="Errps", + description="The total number of the gRPC message failures", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_fail_total", + label_selectors=['type!="kv_gc"'], + ), + legend_format=r"{{instance}}-grpc-msg-fail", + ), + target( + expr=expr_sum_delta( + "tikv_pd_heartbeat_message_total", + label_selectors=['type="noop"'], + ).extra(extra_expr="< 1"), + legend_format=r"{{instance}}-pd-heartbeat", + ), + target( + expr=expr_sum_rate( + "tikv_critical_error_total", + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Leader", + description="The number of leaders on each TiKV instance", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="leader"'], + ), + ), + ], + ), + graph_panel( + title="Region", + description="The number of Regions and Buckets on each TiKV instance", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="region"'], + ), + ), + target( + expr=expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="buckets"'], + ), + legend_format=r"{{instance}}-buckets", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Uptime", + description="TiKV uptime since the last restart", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_operator( + "time()", "-", expr_simple("process_start_time_seconds") + ), + legend_format=r"{{instance}}", + ), + ], + ) + ] + ) + return layout.row_panel + + +def Errors() -> RowPanel: + layout = Layout(title="Errors") + layout.row( + [ + graph_panel( + title="Critical error", + targets=[ + target( + expr=expr_sum_rate( + "tikv_critical_error_total", + by_labels=["instance", "type"], + ), + ), + ], + thresholds=[GraphThreshold(value=0.0)], + ) + ] + ) + layout.row( + [ + graph_panel( + title="Server is busy", + description=""" +Indicates occurrences of events that make the TiKV instance unavailable +temporarily, such as Write Stall, Channel Full, Scheduler Busy, and Coprocessor +Full""", + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_too_busy_total", + ), + legend_format=r"scheduler-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_channel_full_total", + by_labels=["instance", "type"], + ), + legend_format=r"channelfull-{{instance}}-{{type}}", + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_error", + label_selectors=['type="full"'], + ), + legend_format=r"coprocessor-{{instance}}", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=[ + 'type="write_stall_percentile99"', + 'db=~"$db"', + ], + by_labels=["instance", "db"], + ), + legend_format=r"stall-{{instance}}-{{db}}", + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_store_write_msg_block_wait_duration_seconds_count", + ), + legend_format=r"store-write-channelfull-{{instance}}", + ), + ], + ), + graph_panel( + title="Server report failures", + description="The total number of reporting failure messages", + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_report_failure_msg_total", + by_labels=["type", "instance", "store_id"], + ), + legend_format=r"{{instance}}-{{type}}-to-{{store_id}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raftstore error", + description="The number of different raftstore errors on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_engine_async_request_total", + label_selectors=['status!~"success|all"'], + by_labels=["instance", "status"], + ), + ), + ], + ), + graph_panel( + title="Scheduler error", + description="The number of scheduler errors per type on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_stage_total", + label_selectors=['stage=~"snapshot_err|prepare_write_err"'], + by_labels=["instance", "stage"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Coprocessor error", + description="The number of different coprocessor errors on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_error", + by_labels=["instance", "reason"], + ), + ), + ], + ), + graph_panel( + title="gRPC message error", + description="The number of gRPC message errors per type on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_fail_total", + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Leader drop", + description="The count of dropped leaders per TiKV instance", + targets=[ + target( + expr=expr_sum_delta( + "tikv_raftstore_region_count", + label_selectors=['type="leader"'], + ), + ), + ], + ), + graph_panel( + title="Leader missing", + description="The count of missing leaders per TiKV instance", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_leader_missing", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Damaged files", + description="RocksDB damaged SST files", + targets=[ + target( + expr=expr_simple("tikv_rocksdb_damaged_files"), + legend_format=r"{{instance}}-existed", + ), + target( + expr=expr_simple("tikv_rocksdb_damaged_files_deleted"), + legend_format=r"{{instance}}-deleted", + ), + ], + ), + graph_panel( + title="Log Replication Rejected", + description="The count of Log Replication Reject caused by follower memory insufficient", + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_raft_append_rejects", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Server() -> RowPanel: + layout = Layout(title="Server") + layout.row( + [ + graph_panel( + title="CF size", + description="The size of each column family", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum("tikv_engine_size_bytes", by_labels=["type"]), + ), + ], + ), + graph_panel( + title="Channel full", + description="The total number of channel full errors on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_channel_full_total", by_labels=["instance", "type"] + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Active written leaders", + description="The number of leaders being written on each TiKV instance", + targets=[ + target( + expr=expr_sum_rate( + "tikv_region_written_keys_count", + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Approximate region size", + metric="tikv_raftstore_region_size_bucket", + yaxis=yaxis(format=UNITS.BYTES_IEC), + ), + graph_panel_histogram_quantiles( + title="Approximate region size", + description="The approximate Region size", + metric="tikv_raftstore_region_size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + hide_count=True, + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Region written bytes", + metric="tikv_region_written_bytes_bucket", + yaxis=yaxis(format=UNITS.BYTES_IEC), + ), + graph_panel( + title="Region average written bytes", + description="The average rate of writing bytes to Regions per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_avg("tikv_region_written_bytes"), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Region written keys", + metric="tikv_region_written_keys_bucket", + ), + graph_panel( + title="Region average written keys", + description="The average rate of written keys to Regions per TiKV instance", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_avg("tikv_region_written_keys"), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Hibernate Peers", + description="The number of peers in hibernated state", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_hibernated_peer_state", + by_labels=["instance", "state"], + ), + ), + ], + ), + graph_panel( + title="Memory trace", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_simple( + "tikv_server_mem_trace_sum", + label_selectors=['name=~"raftstore-.*"'], + ), + legend_format="{{instance}}-{{name}}", + ), + target( + expr=expr_simple( + "raft_engine_memory_usage", + ), + legend_format="{{instance}}-raft-engine", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft Entry Cache Evicts", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raft_entries_evict_bytes", + ), + ), + ], + ), + graph_panel( + title="Resolve address duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_address_resolve_duration_secs", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% Thread Pool Schedule Wait Duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_yatp_pool_schedule_wait_duration", + by_labels=["name"], + ), + legend_format="{{name}}", + ), + ], + thresholds=[GraphThreshold(value=1.0)], + ), + graph_panel( + title="Average Thread Pool Schedule Wait Duration", + description="The average rate of written keys to Regions per TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_avg( + "tikv_yatp_pool_schedule_wait_duration", + by_labels=["name"], + ), + legend_format="{{name}}", + ), + ], + thresholds=[GraphThreshold(value=1.0)], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Disk IO time per second", + yaxes=yaxes(left_format=UNITS.NANO_SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_rocksdb_perf", + label_selectors=['metric="block_read_time"'], + by_labels=["req"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=['metric="block_read_time"'], + by_labels=["req"], + ), + legend_format="copr-{{req}}", + ), + ], + ), + graph_panel( + title="Disk IO bytes per second", + yaxes=yaxes(left_format=UNITS.NANO_SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_rocksdb_perf", + label_selectors=['metric="block_read_byte"'], + by_labels=["req"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=['metric="block_read_byte"'], + by_labels=["req"], + ), + legend_format="copr-{{req}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def gRPC() -> RowPanel: + layout = Layout(title="gRPC") + layout.row( + [ + graph_panel( + title="gRPC message count", + description="The count of different kinds of gRPC message", + yaxes=yaxes(left_format=UNITS.REQUESTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_duration_seconds_count", + label_selectors=['type!="kv_gc"'], + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_grpc_msg_duration_seconds_count", + label_selectors=['type!="kv_gc"'], + by_labels=["type", "priority"], + ), + hide=True, + ), + ], + ), + graph_panel( + title="gRPC message failed", + description="The count of different kinds of gRPC message which is failed", + yaxes=yaxes(left_format=UNITS.REQUESTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_msg_fail_total", + label_selectors=['type!="kv_gc"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title=r"99% gRPC message duration", + description=r"The 99% percentile of execution time of gRPC message", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_grpc_msg_duration_seconds", + label_selectors=['type!="kv_gc"'], + by_labels=["type"], + ), + legend_format="{{type}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_grpc_msg_duration_seconds", + label_selectors=['type!="kv_gc"'], + by_labels=["type", "priority"], + ), + legend_format="{{type}}-{{priority}}", + hide=True, + ), + ], + ), + graph_panel( + title="Average gRPC message duration", + description="The average execution time of gRPC message", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_avg( + "tikv_grpc_msg_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}", + ), + target( + expr=expr_histogram_avg( + "tikv_grpc_msg_duration_seconds", + by_labels=["type", "priority"], + ), + legend_format="{{type}}-{{priority}}", + hide=True, + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="gRPC batch size", + description=r"The 99% percentile of execution time of gRPC message", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_grpc_req_batch_size", + ), + legend_format=r"99% request", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_grpc_resp_batch_size", + ), + legend_format=r"99% response", + ), + target( + expr=expr_histogram_avg( + "tikv_server_grpc_req_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg request", + ), + target( + expr=expr_histogram_avg( + "tikv_server_grpc_resp_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg response", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_request_batch_size", + ), + legend_format=r"99% kv get batch", + ), + target( + expr=expr_histogram_avg( + "tikv_server_request_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg kv batch", + ), + ], + ), + graph_panel( + title="raft message batch size", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_raft_message_batch_size", + ), + legend_format=r"99%", + ), + target( + expr=expr_histogram_avg( + "tikv_server_raft_message_batch_size", + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="gRPC request sources QPS", + description="The QPS of different sources of gRPC request", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_request_source_counter_vec", + by_labels=["source"], + ), + ), + ], + ), + graph_panel( + title="gRPC request sources duration", + description="The duration of different sources of gRPC request", + yaxes=yaxes(left_format=UNITS.SECONDS), + lines=False, + stack=True, + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_request_source_duration_vec", + by_labels=["source"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="gRPC resource group QPS", + description="The QPS of different resource groups of gRPC request", + targets=[ + target( + expr=expr_sum_rate( + "tikv_grpc_resource_group_total", by_labels=["name"] + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def ThreadCPU() -> RowPanel: + layout = Layout(title="Thread CPU") + layout.row( + [ + graph_panel( + title="Raft store CPU", + description="The CPU utilization of raftstore thread", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"(raftstore|rs)_.*"'], + ), + ), + ], + ), + graph_panel( + title="Async apply CPU", + description="The CPU utilization of async apply", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"apply_[0-9]+"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Store writer CPU", + description="The CPU utilization of store writer thread", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_write.*"'], + ), + ), + ], + thresholds=[GraphThreshold(value=0.8)], + ), + graph_panel( + title="gRPC poll CPU", + description="The CPU utilization of gRPC", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"grpc.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler worker CPU", + description="The CPU utilization of scheduler worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sched_.*"'], + ), + ), + ], + thresholds=[GraphThreshold(value=3.6)], + ), + graph_panel( + title="Storage ReadPool CPU", + description="The CPU utilization of readpool", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_read_norm.*"'], + ), + legend_format="{{instance}}-normal", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_read_high.*"'], + ), + legend_format="{{instance}}-high", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"store_read_low.*"'], + ), + legend_format="{{instance}}-low", + ), + ], + thresholds=[GraphThreshold(value=3.6)], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Unified read pool CPU", + description="The CPU utilization of the unified read pool", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"unified_read_po.*"'], + ), + ), + ], + thresholds=[GraphThreshold(value=7.2)], + ), + graph_panel( + title="RocksDB CPU", + description="The CPU utilization of RocksDB", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"rocksdb.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Coprocessor CPU", + description="The CPU utilization of coprocessor", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cop_normal.*"'], + ), + legend_format="{{instance}}-normal", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cop_high.*"'], + ), + legend_format="{{instance}}-high", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cop_low.*"'], + ), + legend_format="{{instance}}-low", + ), + ], + thresholds=[GraphThreshold(value=7.2)], + ), + graph_panel( + title="GC worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"gc_worker.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Background Worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"background.*"'], + ), + ), + ], + ), + graph_panel( + title="Raftlog fetch Worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"raftlog_fetch.*"'], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + ), + ), + ], + ), + graph_panel( + title="Backup CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"(backup-worker|bkwkr|backup_endpoint).*"' + ], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="CDC worker CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cdcwkr.*"'], + ), + legend_format="{{instance}}-worker", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"tso"'], + ), + legend_format="{{instance}}-tso", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"cdc_.*"'], + ), + legend_format="{{instance}}-endpoint", + ), + ], + ), + graph_panel( + title="TSO Worker CPU", + description="The CPU utilization of raftstore thread", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"tso_worker"'], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def TTL() -> RowPanel: + layout = Layout(title="TTL") + layout.row( + [ + graph_panel( + title="TTL expire count", + targets=[ + target( + expr=expr_sum_rate( + "tikv_ttl_expire_kv_count_total", + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="TTL expire size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_ttl_expire_kv_size_total", + ) + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="TTL check progress", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_ttl_checker_processed_regions", + ), + "/", + expr_sum_rate( + "tikv_raftstore_region_count", + label_selectors=['type="region"'], + ), + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="TTL checker actions", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_ttl_checker_actions", by_labels=["type"] + ) + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="TTL checker compact duration", + description="The time consumed when executing GC tasks", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_ttl_checker_compact_duration", + ), + stat_panel( + title="TTL checker poll interval", + format=UNITS.MILLI_SECONDS, + targets=[ + target( + expr=expr_max( + "tikv_ttl_checker_poll_interval", + label_selectors=['type="tikv_gc_run_interval"'], + by_labels=[], # override default by instance. + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def PD() -> RowPanel: + layout = Layout(title="PD") + layout.row( + [ + graph_panel( + title="PD requests", + description="The count of requests that TiKV sends to PD", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_pd_request_duration_seconds_count", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="PD request duration (average)", + description="The time consumed by requests that TiKV sends to PD", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_avg( + "tikv_pd_request_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="PD heartbeats", + description="The total number of PD heartbeat messages", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_pd_heartbeat_message_total", + by_labels=["type"], + ), + ), + target( + expr=expr_sum( + "tikv_pd_pending_heartbeat_total", + ), + legend_format="{{instance}}-pending", + ), + ], + ), + graph_panel( + title="PD validate peers", + description="The total number of peers validated by the PD worker", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_pd_validate_peer_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="PD reconnection", + description="The count of reconnection between TiKV and PD", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_pd_reconnect_total", + range_selector="$__rate_interval", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="PD forward status", + description="The forward status of PD client", + targets=[ + target( + expr=expr_simple( + "tikv_pd_request_forwarded", + ), + legend_format="{{instance}}-{{host}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Pending TSO Requests", + description="The number of TSO requests waiting in the queue.", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum( + "tikv_pd_pending_tso_request_total", + ), + ), + ], + ), + graph_panel( + title="Store Slow Score", + description="The slow score of stores", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_score", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Inspected duration per server", + description="The duration that recorded by inspecting messages.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_inspect_duration_seconds", + by_labels=["instance", "type"], + ), + legend_format="{{instance}}-{{type}}", + ), + ], + ) + ] + ) + return layout.row_panel + + +def IOBreakdown() -> RowPanel: + layout = Layout(title="IO Breakdown") + layout.row( + [ + graph_panel( + title="Write IO bytes", + description="The throughput of disk write per IO type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="write"'], + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="write"'], + by_labels=[], # override default by instance. + ), + legend_format="total", + ), + ], + ), + graph_panel( + title="Read IO bytes", + description="The throughput of disk read per IO type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="read"'], + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_io_bytes", + label_selectors=['op="read"'], + by_labels=[], # override default by instance. + ), + legend_format="total", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="IO threshold", + description="The threshold of disk IOs per priority", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_rate_limiter_max_bytes_per_sec", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Rate Limiter Request Wait Duration", + description="IO rate limiter request wait duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_rate_limiter_request_wait_duration_seconds", + by_labels=["type"], + ), + legend_format=r"{{type}}-99%", + ), + target( + expr=expr_histogram_avg( + "tikv_rate_limiter_request_wait_duration_seconds", + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftWaterfall() -> RowPanel: + layout = Layout(title="Raft Waterfall") + layout.row( + [ + graph_panel_histogram_quantiles( + title="Storage async write duration", + description="The time consumed by processing asynchronous write requests", + yaxes=yaxes(left_format=UNITS.SECONDS, right_format=UNITS.NONE_FORMAT), + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="write"'], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store duration", + description="The store time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_duration_secs", + ), + graph_panel_histogram_quantiles( + title="Apply duration", + description="The apply time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_apply_duration_secs", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store propose wait duration", + description="The propose wait time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_request_wait_time_duration_secs", + ), + graph_panel_histogram_quantiles( + title="Store batch wait duration", + description="The batch wait time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_batch_wait_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store send to write queue duration", + description="The send-to-write-queue time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_send_to_queue_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store send proposal duration", + description="The send raft message of the proposal duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_send_proposal_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store write kv db end duration", + description="The write kv db end duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_write_kvdb_end_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store before write duration", + description="The before write time duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_before_write_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store persist duration", + description="The persist duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_persist_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store write end duration", + description="The write end duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_write_end_duration_seconds", + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Store commit but not persist duration", + description="The commit but not persist duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds", + ), + graph_panel_histogram_quantiles( + title="Store commit and persist duration", + description="The commit and persist duration of each request", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_raftstore_store_wf_commit_log_duration_seconds", + ), + ] + ) + return layout.row_panel + + +def RaftIO() -> RowPanel: + layout = Layout(title="Raft IO") + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Process ready duration", + heatmap_description="The time consumed for peer processes to be ready in Raft", + graph_title="99% Process ready duration per server", + graph_description="The time consumed for peer processes to be ready in Raft", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_raft_process_duration_secs", + label_selectors=['type="ready"'], + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Store write loop duration", + heatmap_description="The time duration of store write loop when store-io-pool-size is not zero.", + graph_title="99% Store write loop duration per server", + graph_description="The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_store_write_loop_duration_seconds", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Append log duration", + heatmap_description="The time consumed when Raft appends log", + graph_title="99% Commit log duration per server", + graph_description="The time consumed when Raft commits log on each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_append_log_duration_seconds", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Commit log duration", + heatmap_description="The time consumed when Raft commits log", + graph_title="99% Commit log duration per server", + graph_description="The time consumed when Raft commits log on each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_commit_log_duration_seconds", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Apply log duration", + heatmap_description="The time consumed when Raft applies log", + graph_title="99% Apply log duration per server", + graph_description="The time consumed for Raft to apply logs per TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_apply_log_duration_seconds", + ) + ) + layout.row( + [ + graph_panel( + title="Store io task reschedule", + description="The throughput of disk write per IO type", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_io_reschedule_region_total", + ), + legend_format="rechedule-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_raftstore_io_reschedule_pending_tasks_total", + ), + legend_format="pending-task-{{instance}}", + ), + ], + ), + graph_panel( + title="99% Write task block duration per server", + description="The time consumed when store write task block on each TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_store_write_msg_block_wait_duration_seconds", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftPropose() -> RowPanel: + layout = Layout(title="Raft Propose") + layout.row( + [ + graph_panel( + title="Raft proposals per ready", + description="The proposal count of a Regions in a tick", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_apply_proposal", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Raft read/write proposals", + description="The number of proposals per type", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"local_read|normal|read_index"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft read proposals per server", + description="The number of read proposals which are made by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"local_read|read_index"'], + ), + ), + ], + ), + graph_panel( + title="Raft write proposals per server", + description="The number of write proposals which are made by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"normal"'], + ), + ), + ], + ), + ] + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Propose wait duration", + heatmap_description="The wait time of each proposal", + graph_title="99% Propose wait duration per server", + graph_description="The wait time of each proposal in each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_request_wait_time_duration_secs", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Store write wait duration", + heatmap_description="The wait time of each store write task", + graph_title="99% Store write wait duration per server", + graph_description="The wait time of each store write task in each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_store_write_task_wait_duration_secs", + ) + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Apply wait duration", + heatmap_description="The wait time of each apply task", + graph_title="99% Apply wait duration per server", + graph_description="The wait time of each apply task in each TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_raftstore_apply_wait_time_duration_secs", + ) + ) + layout.row( + [ + heatmap_panel( + title="Store write handle msg duration", + description="The handle duration of each store write task msg", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_raftstore_store_write_handle_msg_duration_secs_bucket", + ), + heatmap_panel( + title="Store write trigger size", + description="The distribution of write trigger size", + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_raftstore_store_write_trigger_wb_bytes_bucket", + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft propose speed", + description="The rate at which peers propose logs", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_propose_log_size_sum", + ), + ), + ], + ), + graph_panel( + title="Perf Context duration", + description="The rate at which peers propose logs", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_store_perf_context_time_duration_secs", + by_labels=["type"], + ), + legend_format="store-{{type}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_apply_perf_context_time_duration_secs", + by_labels=["type"], + ), + legend_format="apply-{{type}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftProcess() -> RowPanel: + layout = Layout(title="Raft Process") + layout.row( + [ + graph_panel( + title="Ready handled", + description="The count of different ready type of Raft", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_ready_handled_total", + by_labels=["type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_process_duration_secs_count", + label_selectors=['type="ready"'], + by_labels=[], # overwrite default by instance. + ), + legend_format="count", + ), + ], + ), + graph_panel( + title="Max duration of raft store events", + description="The max time consumed by raftstore events", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999999, + "tikv_raftstore_event_duration", + by_labels=["type"], + ), + legend_format="{{type}}", + ), + target( + expr=expr_histogram_quantile( + 0.999999, + "tikv_broadcast_normal_duration_seconds", + ), + legend_format="broadcast_normal", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Replica read lock checking duration", + description="Replica read lock checking duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_replica_read_lock_check_duration_seconds_bucket", + ), + heatmap_panel( + title="Peer msg length distribution", + description="The length of peer msgs for each round handling", + metric="tikv_raftstore_peer_msg_len_bucket", + ), + ] + ) + return layout.row_panel + + +def RaftMessage() -> RowPanel: + layout = Layout(title="Raft Message") + layout.row( + [ + graph_panel( + title="Sent messages per server", + description="The number of Raft messages sent by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_sent_message_total", + ), + ), + ], + ), + graph_panel( + title="Flush messages per server", + description="The number of Raft messages flushed by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_raft_message_flush_total", + by_labels=["instance", "reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Receive messages per server", + description="The number of Raft messages received by each TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_server_raft_message_recv_total", + ), + ), + ], + ), + graph_panel( + title="Messages", + description="The number of different types of Raft messages that are sent", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_sent_message_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Vote", + description="The total number of vote messages that are sent in Raft", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_sent_message_total", + label_selectors=['type="vote"'], + ), + ), + ], + ), + graph_panel( + title="Raft dropped messages", + description="The number of dropped Raft messages per type", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_dropped_message_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftAdmin() -> RowPanel: + layout = Layout(title="Raft Admin") + layout.row( + [ + graph_panel( + title="Admin proposals", + description="The number of admin proposals", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_proposal_total", + label_selectors=['type=~"conf_change|transfer_leader"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Admin apply", + description="The number of the processed apply command", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_admin_cmd_total", + label_selectors=['type!="compact"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Check split", + description="The number of raftstore split checks", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_check_split_total", + label_selectors=['type!="ignore"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="99.99% Check split duration", + description="The time consumed when running split check in .9999", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_check_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Load base split event", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_load_base_split_event", + range_selector="1m", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Load base split duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.80, + "tikv_load_base_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="80%-{{instance}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_load_base_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="99%-{{instance}}", + ), + target( + expr=expr_histogram_avg( + "tikv_load_base_split_duration_seconds", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Peer in Flashback State", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_peer_in_flashback_state", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftLog() -> RowPanel: + layout = Layout(title="Raft Log") + layout.row( + [ + graph_panel( + title="Raft log GC write duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=10), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_raft_log_gc_write_duration_secs", + by_labels=["instance"], + ), + legend_format="99.99%-{{instance}}", + ), + target( + expr=expr_histogram_avg( + "tikv_raftstore_raft_log_gc_write_duration_secs", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + ], + ), + graph_panel( + title="Raft log GC kv sync duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=10), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_raft_log_kv_sync_duration_secs", + by_labels=["instance"], + ), + legend_format="99.99%-{{instance}}", + ), + target( + expr=expr_histogram_avg( + "tikv_raftstore_raft_log_kv_sync_duration_secs", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log GC write operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_write_duration_secs_count", + ), + ), + ], + ), + graph_panel( + title="Raft log GC seek operations ", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_seek_operations_count", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log lag", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_log_lag_sum", + ), + ), + ], + ), + graph_panel( + title="Raft log gc skipped", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_skipped", + by_labels=["instance", "reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log GC failed", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_raft_log_gc_failed", + ), + ), + ], + ), + graph_panel( + title="Raft log fetch ", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_entry_fetches", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Raft log async fetch task duration", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=10), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_raftstore_entry_fetches_task_duration_seconds", + ), + legend_format="99.99%", + ), + target( + expr=expr_histogram_avg( + "tikv_raftstore_entry_fetches_task_duration_seconds", + by_labels=["instance"], + ), + legend_format="avg-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_worker_pending_task_total", + label_selectors=['name=~"raftlog-fetch-worker"'], + ), + legend_format="pending-task", + ), + ], + series_overrides=[ + series_override( + alias="/pending-task/", + yaxis=2, + transform_negative_y=True, + ), + ], + ), + ] + ) + return layout.row_panel + + +def LocalReader() -> RowPanel: + layout = Layout(title="Local Reader") + layout.row( + [ + graph_panel( + title="Raft log async fetch task duration", + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_local_read_reject_total", + by_labels=["instance", "reason"], + ), + legend_format="{{instance}}-reject-by-{{reason}}", + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_local_read_executed_requests", + ), + legend_format="{{instance}}-total", + ), + target( + expr=expr_sum_rate( + "tikv_raftstore_local_read_executed_stale_read_requests", + ), + legend_format="{{instance}}-stale-read", + ), + ], + series_overrides=[ + series_override( + alias="/.*-total/", + yaxis=2, + ), + ], + ), + ] + ) + return layout.row_panel + + +def UnifiedReadPool() -> RowPanel: + layout = Layout(title="Unified Read Pool") + layout.row( + [ + graph_panel( + title="Time used by level", + description="The time used by each level in the unified read pool per second. Level 0 refers to small queries.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum_rate( + "tikv_multilevel_level_elapsed", + label_selectors=['name="unified-read-pool"'], + by_labels=["level"], + ), + ), + ], + ), + graph_panel( + title="Level 0 chance", + description="The chance that level 0 (small) tasks are scheduled in the unified read pool.", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_simple( + "tikv_multilevel_level0_chance", + label_selectors=['name="unified-read-pool"'], + ), + legend_format="{{type}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Running tasks", + description="The number of concurrently running tasks in the unified read pool.", + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_unified_read_pool_running_tasks", + "avg", + "1m", + ), + ), + ], + ), + heatmap_panel( + title="Unified Read Pool Wait Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_yatp_pool_schedule_wait_duration_bucket", + label_selectors=['name=~"unified-read.*"'], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Duration of One Time Slice", + description="Unified read pool task execution time during one schedule.", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_yatp_task_poll_duration", + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Task Execute Duration", + description="Unified read pool task total execution duration.", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_yatp_task_exec_duration", + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Task Schedule Times", + description="Task schedule number of times.", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT, log_base=2), + metric="tikv_yatp_task_execute_times", + hide_count=True, + ), + ] + ) + return layout.row_panel + + +def Storage() -> RowPanel: + layout = Layout(title="Storage") + layout.row( + [ + graph_panel( + title="Storage command total", + description="The total count of different kinds of commands received", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC, log_base=10), + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_command_total", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Storage async request error", + description="The total number of engine asynchronous request errors", + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_engine_async_request_total", + label_selectors=['status!~"all|success"'], + by_labels=["status"], + ), + ), + ], + ), + ] + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Storage async write duration", + heatmap_description="The time consumed by processing asynchronous write requests", + graph_title="Storage async write duration", + graph_description="The storage async write duration", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="write"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Storage async snapshot duration", + heatmap_description="The time consumed by processing asynchronous snapshot requests", + graph_title="Storage async snapshot duration", + graph_description="The storage async snapshot duration", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Storage async snapshot duration (pure local read)", + heatmap_description="The storage async snapshot duration without the involving of raftstore", + graph_title="Storage async snapshot duration (pure local read)", + graph_description="The storage async snapshot duration without the involving of raftstore", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot_local_read"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Read index propose wait duration", + heatmap_description="Read index propose wait duration associated with async snapshot", + graph_title="Read index propose wait duration", + graph_description="Read index propose wait duration associated with async snapshot", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot_read_index_propose_wait"'], + ), + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Read index confirm duration", + heatmap_description="Read index confirm duration associated with async snapshot", + graph_title="Read index confirm duration", + graph_description="Read index confirm duration associated with async snapshot", + yaxis_format=UNITS.SECONDS, + metric="tikv_storage_engine_async_request_duration_seconds", + label_selectors=['type="snapshot_read_index_confirm"'], + ), + ) + layout.row( + [ + graph_panel( + title="Process Stat Cpu Usage", + description="CPU usage measured over a 30 second window", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum( + "tikv_storage_process_stat_cpu_usage", + ), + ), + ], + ), + graph_panel_histogram_quantiles( + title="Full compaction duration seconds", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_storage_full_compact_duration_seconds", + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Full compaction pause duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_storage_full_compact_pause_duration_seconds", + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Full compaction per-increment duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_storage_full_compact_increment_duration_seconds", + hide_count=True, + ), + ] + ) + return layout.row_panel + + +def FlowControl() -> RowPanel: + layout = Layout(title="Flow Control") + layout.row( + [ + graph_panel( + title="Scheduler flow", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_write_flow", + ), + legend_format="write-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_throttle_flow", + ).extra(" != 0"), + legend_format="throttle-{{instance}}", + ), + ], + ), + graph_panel( + title="Scheduler discard ratio", + description="", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_discard_ratio", + by_labels=["type"], + ).extra(" / 10000000"), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Throttle duration", + metric="tikv_scheduler_throttle_duration_seconds_bucket", + yaxis=yaxis(format=UNITS.SECONDS), + ), + graph_panel( + title="Scheduler throttled CF", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_simple( + "tikv_scheduler_throttle_cf", + ).extra(" != 0"), + legend_format="{{instance}}-{{cf}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Flow controller actions", + description="", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_throttle_action_total", + by_labels=["type", "cf"], + ), + ), + ], + ), + graph_panel( + title="Flush/L0 flow", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_l0_flow", + by_labels=["instance", "cf"], + ), + legend_format="{{cf}}_l0_flow-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_flush_flow", + by_labels=["instance", "cf"], + ), + legend_format="{{cf}}_flush_flow-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_l0_flow", + ), + legend_format="total_l0_flow-{{instance}}", + ), + target( + expr=expr_sum( + "tikv_scheduler_flush_flow", + ), + legend_format="total_flush_flow-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Flow controller factors", + description="", + targets=[ + target( + expr=expr_max( + "tikv_scheduler_l0", + ), + legend_format="l0-{{instance}}", + ), + target( + expr=expr_max( + "tikv_scheduler_memtable", + ), + legend_format="memtable-{{instance}}", + ), + target( + expr=expr_max( + "tikv_scheduler_l0_avg", + ), + legend_format="avg_l0-{{instance}}", + ), + ], + ), + graph_panel( + title="Compaction pending bytes", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_pending_compaction_bytes", + label_selectors=['db="kv"'], + by_labels=["cf"], + ), + ), + target( + expr=expr_sum( + "tikv_scheduler_pending_compaction_bytes", + by_labels=["cf"], + ).extra(" / 10000000"), + legend_format="pending-bytes-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Txn command throttled duration", + description="Throttle time for txn storage commands in 1 minute.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum_rate( + "tikv_txn_command_throttle_time_total", + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Non-txn command throttled duration", + description="Throttle time for non-txn related processing like analyze or dag in 1 minute.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum_rate( + "tikv_non_txn_command_throttle_time_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def SchedulerCommands() -> RowPanel: + layout = Layout(title="Scheduler", repeat="command") + layout.row( + [ + graph_panel( + title="Scheduler stage total", + description="The total number of commands on each stage in commit command", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_too_busy_total", + label_selectors=['type="$command"'], + ), + legend_format="busy-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_scheduler_stage_total", + label_selectors=['type="$command"'], + by_labels=["stage"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Scheduler command duration", + description="The time consumed when executing commit command", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_scheduler_command_duration_seconds", + label_selectors=['type="$command"'], + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Scheduler latch wait duration", + description="The time which is caused by latch wait in commit command", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_scheduler_latch_wait_duration_seconds", + label_selectors=['type="$command"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Scheduler keys read", + description="The count of keys read by a commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + metric="tikv_scheduler_kv_command_key_read", + label_selectors=['type="$command"'], + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Scheduler keys written", + description="The count of keys written by a commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + metric="tikv_scheduler_kv_command_key_write", + label_selectors=['type="$command"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler scan details", + description="The keys scan details of each CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command"'], + by_labels=["tag"], + ), + ), + ], + ), + graph_panel( + title="Scheduler scan details [lock]", + description="The keys scan details of lock CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command", cf="lock"'], + by_labels=["tag"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler scan details [write]", + description="The keys scan details of write CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command", cf="write"'], + by_labels=["tag"], + ), + ), + ], + ), + graph_panel( + title="Scheduler scan details [default]", + description="The keys scan details of default CF when executing commit command", + yaxes=yaxes(left_format=UNITS.NONE_FORMAT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_kv_scan_details", + label_selectors=['req="$command", cf="default"'], + by_labels=["tag"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Scheduler command read duration", + description="The time consumed on reading when executing commit command", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_scheduler_processing_read_duration_seconds", + label_selectors=['type="$command"'], + hide_count=True, + ), + heatmap_panel( + title="Check memory locks duration", + description="The time consumed on checking memory locks", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_storage_check_mem_lock_duration_seconds_bucket", + label_selectors=['type="$command"'], + ), + ] + ) + return layout.row_panel + + +def Scheduler() -> RowPanel: + layout = Layout(title="Scheduler") + layout.row( + [ + graph_panel( + title="Scheduler stage total", + description="The total number of commands on each stage", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_too_busy_total", + by_labels=["stage"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_scheduler_stage_total", + by_labels=["stage"], + ), + ), + ], + ), + graph_panel( + title="Scheduler writing bytes", + description="The total writing bytes of commands on each stage", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_writing_bytes", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Scheduler priority commands", + description="The count of different priority commands", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_scheduler_commands_pri_total", + by_labels=["priority"], + ), + ), + ], + ), + graph_panel( + title="Scheduler pending commands", + description="The count of pending commands per TiKV instance", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum( + "tikv_scheduler_contex_total", + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Txn Scheduler Pool Wait Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_yatp_pool_schedule_wait_duration_bucket", + label_selectors=['name=~"sched-worker.*"'], + ), + ] + ) + return layout.row_panel + + +def GC() -> RowPanel: + layout = Layout(title="GC") + layout.row( + [ + graph_panel( + title="GC tasks", + description="The count of GC tasks processed by gc_worker", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_tasks_vec", + by_labels=["task"], + ), + legend_format="total-{{task}}", + ), + target( + expr=expr_sum_rate( + "tikv_storage_gc_skipped_counter", + by_labels=["task"], + ), + legend_format="skipped-{{task}}", + ), + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_task_fail_vec", + by_labels=["task"], + ), + legend_format="failed-{{task}}", + ), + target( + expr=expr_sum_rate( + "tikv_gc_worker_too_busy", + by_labels=[], + ), + legend_format="gcworker-too-busy", + ), + ], + ), + graph_panel_histogram_quantiles( + title="GC tasks duration", + description="The time consumed when executing GC tasks", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_gcworker_gc_task_duration_vec", + label_selectors=['type="$command"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="TiDB GC seconds", + description="The GC duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 1, "tidb_tikvclient_gc_seconds", by_labels=["instance"] + ).skip_default_instance_selector(), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="TiDB GC worker actions", + description="The count of TiDB GC worker actions", + targets=[ + target( + expr=expr_sum_rate( + "tidb_tikvclient_gc_worker_actions_total", + by_labels=["type"], + ).skip_default_instance_selector(), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="ResolveLocks Progress", + description="Progress of ResolveLocks, the first phase of GC", + targets=[ + target( + expr=expr_max( + "tidb_tikvclient_range_task_stats", + label_selectors=['type=~"resolve-locks.*"'], + by_labels=["result"], + ).skip_default_instance_selector(), + ), + ], + ), + graph_panel( + title="TiKV Auto GC Progress", + description="Progress of TiKV's GC", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum( + "tikv_gcworker_autogc_processed_regions", + label_selectors=['type="scan"'], + ), + "/", + expr_sum( + "tikv_raftstore_region_count", + label_selectors=['type="region"'], + ), + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="GC speed", + description="keys / second", + targets=[ + target( + expr=expr_sum_rate( + "tikv_storage_mvcc_gc_delete_versions_sum", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_keys/s", + ), + ], + ), + graph_panel( + title="TiKV Auto GC SafePoint", + description="SafePoint used for TiKV's Auto GC", + yaxes=yaxes(left_format=UNITS.DATE_TIME_ISO), + targets=[ + target( + expr=expr_max( + "tikv_gcworker_autogc_safe_point", + ) + .extra("/ (2^18)") + .skip_default_instance_selector(), + ), + ], + ), + ] + ) + layout.half_row( + [ + stat_panel( + title="GC lifetime", + description="The lifetime of TiDB GC", + format=UNITS.SECONDS, + targets=[ + target( + expr=expr_max( + "tidb_tikvclient_gc_config", + label_selectors=['type="tikv_gc_life_time"'], + by_labels=[], + ).skip_default_instance_selector(), + ), + ], + ), + stat_panel( + title="GC interval", + description="The interval of TiDB GC", + format=UNITS.SECONDS, + targets=[ + target( + expr=expr_max( + "tidb_tikvclient_gc_config", + label_selectors=['type="tikv_gc_run_interval"'], + by_labels=[], + ).skip_default_instance_selector(), + ), + ], + ), + ] + ) + layout.half_row( + [ + graph_panel( + title="GC in Compaction Filter", + description="Keys handled in GC compaction filter", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filtered", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_filtered", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_skip", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_skipped", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_mvcc_rollback", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-rollback/mvcc-lock", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_orphan_versions", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_orphan-versions", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_perform", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_performed-times", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_failure", + by_labels=["key_mode", "type"], + ), + legend_format="{{key_mode}}_failure-{{type}}", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_mvcc_deletion_met", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-deletion-met", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_mvcc_deletion_handled", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-deletion-handled", + ), + target( + expr=expr_sum_rate( + "tikv_gc_compaction_filter_mvcc_deletion_wasted", + by_labels=["key_mode"], + ), + legend_format="{{key_mode}}_mvcc-deletion-wasted", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="GC scan write details", + description="GC scan write details", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_keys", + label_selectors=['cf="write"'], + by_labels=["key_mode", "tag"], + ), + ), + ], + ), + graph_panel( + title="GC scan default details", + description="GC scan default details", + targets=[ + target( + expr=expr_sum_rate( + "tikv_gcworker_gc_keys", + label_selectors=['cf="default"'], + by_labels=["key_mode", "tag"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Snapshot() -> RowPanel: + layout = Layout(title="Snapshot") + layout.row( + [ + graph_panel( + title="Rate snapshot message", + description="The rate of Raft snapshot messages sent", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_raftstore_raft_sent_message_total", + range_selector="1m", + label_selectors=['type="snapshot"'], + ), + ), + ], + ), + graph_panel( + title="Snapshot state count", + description="The number of snapshots in different states", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_snapshot_traffic_total", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% Snapshot generation wait duration", + description="The time snapshot generation tasks waited to be scheduled. ", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_snapshot_generation_wait_duration_seconds", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="99% Handle snapshot duration", + description="The time consumed when handling snapshots", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_server_send_snapshot_duration_seconds", + ), + legend_format="send", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_snapshot_duration_seconds", + label_selectors=['type="apply"'], + ), + legend_format="apply", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_raftstore_snapshot_duration_seconds", + label_selectors=['type="generate"'], + ), + legend_format="generate", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99.99% Snapshot size", + description="The snapshot size (P99.99).9999", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_snapshot_size", + ), + legend_format="size", + ), + ], + ), + graph_panel( + title="99.99% Snapshot KV count", + description="The number of KV within a snapshot in .9999", + targets=[ + target( + expr=expr_histogram_quantile( + 0.9999, + "tikv_snapshot_kv_count", + ), + legend_format="count", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Snapshot Actions", + description="Action stats for snapshot generating and applying", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_raftstore_snapshot_total", + range_selector="1m", + by_labels=["type", "status"], + ), + ), + target( + expr=expr_sum_delta( + "tikv_raftstore_clean_region_count", + range_selector="1m", + by_labels=["type", "status"], + ), + legend_format="clean-region-by-{{type}}", + ), + ], + ), + graph_panel( + title="Snapshot transport speed", + description="The speed of sending or receiving snapshot", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_snapshot_limit_transport_bytes", + by_labels=["instance", "type"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_snapshot_limit_generate_bytes", + ), + legend_format="{{instance}}-generate", + ), + ], + ), + ] + ) + return layout.row_panel + + +def Task() -> RowPanel: + layout = Layout(title="Task") + layout.row( + [ + graph_panel( + title="Worker handled tasks", + description="The number of tasks handled by worker", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_worker_handled_task_total", + by_labels=["name"], + ), + ), + ], + ), + graph_panel( + title="Worker pending tasks", + description="Current pending and running tasks of worker", + targets=[ + target( + expr=expr_sum( + "tikv_worker_pending_task_total", + by_labels=["name"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="FuturePool handled tasks", + description="The number of tasks handled by future_pool", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_futurepool_handled_task_total", + by_labels=["name"], + ), + ), + ], + ), + graph_panel( + title="FuturePool pending tasks", + description="Current pending and running tasks of future_pool", + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_futurepool_pending_task_total", + "avg", + range_selector="1m", + by_labels=["name"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def CoprocessorOverview() -> RowPanel: + layout = Layout(title="Coprocessor Overview") + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Request duration", + heatmap_description="The time consumed to handle coprocessor read requests", + graph_title="Request duration", + graph_description="The time consumed to handle coprocessor read requests", + yaxis_format=UNITS.SECONDS, + metric="tikv_coprocessor_request_duration_seconds", + graph_by_labels=["req"], + ), + ) + layout.row( + [ + graph_panel( + title="Total Requests", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_duration_seconds_count", + by_labels=["req"], + ), + ), + ], + ), + graph_panel( + title="Total Request Errors", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_request_error", + by_labels=["reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="KV Cursor Operations", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_keys_sum", + by_labels=["req"], + ), + ), + ], + ), + graph_panel_histogram_quantiles( + title="KV Cursor Operations", + description="", + metric="tikv_coprocessor_scan_keys", + yaxes=yaxes(left_format=UNITS.SHORT), + by_labels=["req"], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total RocksDB Perf Statistics", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=['metric="internal_delete_skipped_count"'], + by_labels=["req"], + ), + legend_format="delete_skipped-{{req}}", + ), + ], + ), + graph_panel( + title="Total Response Size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_response_bytes", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def CoprocessorDetail() -> RowPanel: + layout = Layout(title="Coprocessor Detail") + layout.row( + [ + graph_panel_histogram_quantiles( + title="Handle duration", + description="The time consumed when handling coprocessor requests", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_handle_seconds", + by_labels=["req"], + hide_avg=True, + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Handle duration by store", + description="The time consumed to handle coprocessor requests per TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_handle_seconds", + by_labels=["req", "instance"], + hide_avg=True, + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Wait duration", + description="The time consumed when coprocessor requests are wait for being handled", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_wait_seconds", + label_selectors=['type="all"'], + by_labels=["req"], + hide_avg=True, + hide_count=True, + ), + graph_panel_histogram_quantiles( + title="Wait duration by store", + description="The time consumed when coprocessor requests are wait for being handled in each TiKV instance", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_wait_seconds", + label_selectors=['type="all"'], + by_labels=["req", "instance"], + hide_avg=True, + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total DAG Requests", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_dag_request_count", + by_labels=["vec_type"], + ), + ), + ], + ), + graph_panel( + title="Total DAG Executors", + description="The total number of DAG executors", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_executor_count", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total Ops Details (Table Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="select"'], + by_labels=["tag"], + ), + ), + ], + ), + graph_panel( + title="Total Ops Details (Index Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="index"'], + by_labels=["tag"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Total Ops Details by CF (Table Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="select"'], + by_labels=["cf", "tag"], + ), + ), + ], + ), + graph_panel( + title="Total Ops Details by CF (Index Scan)", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_scan_details", + label_selectors=['req="index"'], + by_labels=["cf", "tag"], + ), + ), + ], + ), + ] + ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Memory lock checking duration", + heatmap_description="The time consumed on checking memory locks for coprocessor requests", + graph_title="Memory lock checking duration", + graph_description="The time consumed on checking memory locks for coprocessor requests", + yaxis_format=UNITS.SECONDS, + metric="tikv_coprocessor_mem_lock_check_duration_seconds", + ), + ) + return layout.row_panel + + +def Threads() -> RowPanel: + layout = Layout(title="Threads") + layout.row( + [ + graph_panel( + title="Threads state", + targets=[ + target( + expr=expr_sum( + "tikv_threads_state", + by_labels=["instance", "state"], + ), + ), + target( + expr=expr_sum( + "tikv_threads_state", + by_labels=["instance"], + ), + legend_format="{{instance}}-total", + ), + ], + ), + graph_panel( + title="Threads IO", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_sum_rate( + "tikv_threads_io_bytes_total", + by_labels=["name", "io"], + ).extra("> 1024"), + ), + legend_format="{{name}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Thread Voluntary Context Switches", + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_max_rate( + "tikv_thread_voluntary_context_switches", + by_labels=["name"], + ).extra("> 100"), + ), + legend_format="{{name}}", + ), + ], + ), + graph_panel( + title="Thread Nonvoluntary Context Switches", + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_max_rate( + "tikv_thread_nonvoluntary_context_switches", + by_labels=["name"], + ).extra("> 100"), + ), + legend_format="{{name}}", + ), + ], + ), + ] + ) + return layout.row_panel + + +def RocksDB() -> RowPanel: + layout = Layout(title="RocksDB", repeat="db") + layout.row( + [ + graph_panel( + title="Get operations", + description="The count of get operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_hit"', + ], + by_labels=[], # override default by instance. + ), + legend_format="memtable", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type=~"block_cache_data_hit|block_cache_filter_hit"', + ], + by_labels=[], # override default by instance. + ), + legend_format="block_cache", + ), + target( + expr=expr_sum_rate( + "tikv_engine_get_served", + label_selectors=[ + 'db="$db"', + 'type="get_hit_l0"', + ], + by_labels=[], # override default by instance. + ), + legend_format="l0", + ), + target( + expr=expr_sum_rate( + "tikv_engine_get_served", + label_selectors=[ + 'db="$db"', + 'type="get_hit_l1"', + ], + by_labels=[], # override default by instance. + ), + legend_format="l1", + ), + target( + expr=expr_sum_rate( + "tikv_engine_get_served", + label_selectors=[ + 'db="$db"', + 'type="get_hit_l2_and_up"', + ], + by_labels=[], # override default by instance. + ), + legend_format="l2_and_up", + ), + ], + ), + graph_panel( + title="Get duration", + description="The time consumed when executing get operations", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_get_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="get_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Seek operations", + description="The count of seek operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_seek"', + ], + by_labels=[], # override default by instance. + ), + legend_format="seek", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_seek_found"', + ], + by_labels=[], # override default by instance. + ), + legend_format="seek_found", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_next"', + ], + by_labels=[], # override default by instance. + ), + legend_format="next", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_next_found"', + ], + by_labels=[], # override default by instance. + ), + legend_format="next_found", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_prev"', + ], + by_labels=[], # override default by instance. + ), + legend_format="prev", + ), + target( + expr=expr_sum_rate( + "tikv_engine_locate", + label_selectors=[ + 'db="$db"', + 'type="number_db_prev_found"', + ], + by_labels=[], # override default by instance. + ), + legend_format="prev_found", + ), + ], + ), + graph_panel( + title="Seek duration", + description="The time consumed when executing seek operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_seek_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="seek_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Write operations", + description="The count of write operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_write_served", + label_selectors=[ + 'db="$db"', + 'type=~"write_done_by_self|write_done_by_other"', + ], + by_labels=[], # override default by instance. + ), + legend_format="done", + ), + target( + expr=expr_sum_rate( + "tikv_engine_write_served", + label_selectors=[ + 'db="$db"', + 'type="write_timeout"', + ], + by_labels=[], # override default by instance. + ), + legend_format="timeout", + ), + target( + expr=expr_sum_rate( + "tikv_engine_write_served", + label_selectors=[ + 'db="$db"', + 'type="write_with_wal"', + ], + by_labels=[], # override default by instance. + ), + legend_format="with_wal", + ), + ], + ), + graph_panel( + title="Write duration", + description="The time consumed when executing write operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_write_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="WAL sync operations", + description="The count of WAL sync operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_wal_file_synced", + label_selectors=[ + 'db="$db"', + ], + by_labels=[], # override default by instance. + ), + legend_format="sync", + ), + ], + ), + graph_panel( + title="Write WAL duration", + description="The time consumed when executing write wal operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_write_wal_time_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="write_wal_micros_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Compaction operations", + description="The count of compaction and flush operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_event_total", + label_selectors=[ + 'db="$db"', + ], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="WAL sync duration", + description="The time consumed when executing WAL sync operation", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=10), + targets=[ + target( + expr=expr_max( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_wal_file_sync_micro_seconds", + label_selectors=[ + 'db="$db"', + 'type="wal_file_sync_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Compaction guard actions", + description="Compaction guard actions", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_raftstore_compaction_guard_action_total", + label_selectors=[ + 'cf=~"default|write"', + ], + by_labels=["cf", " type"], + ), + ), + ], + ), + graph_panel( + title="Compaction duration", + description="The time consumed when executing the compaction and flush operations", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_compaction_time", + label_selectors=[ + 'db="$db"', + 'type="compaction_time_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="SST read duration", + description="The time consumed when reading SST files", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS, log_base=2), + targets=[ + target( + expr=expr_max( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_sst_read_micros", + label_selectors=[ + 'db="$db"', + 'type="sst_read_micros_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + graph_panel( + title="Compaction reason", + description=None, + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_compaction_reason", + label_selectors=[ + 'db="$db"', + ], + by_labels=["cf", "reason"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Block cache size", + description="The block cache size. Broken down by column family if shared block cache is disabled.", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_avg( + "tikv_engine_block_cache_size_bytes", + label_selectors=[ + 'db="$db"', + ], + by_labels=["cf", "instance"], + ), + ), + legend_format="{{instance}}-{{cf}}", + ), + ], + ), + graph_panel( + title="Memtable hit", + description="The hit rate of memtable", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_memtable_efficiency", + label_selectors=[ + 'db="$db"', + 'type="memtable_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="hit", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Block cache flow", + description="The flow of different kinds of block cache operations", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC, log_base=10), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="block_cache_byte_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="total_read", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="block_cache_byte_write"', + ], + by_labels=[], # override default by instance. + ), + legend_format="total_written", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_bytes_insert"', + ], + by_labels=[], # override default by instance. + ), + legend_format="data_insert", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_bytes_insert"', + ], + by_labels=[], # override default by instance. + ), + legend_format="filter_insert", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_bytes_evict"', + ], + by_labels=[], # override default by instance. + ), + legend_format="filter_evict", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_bytes_insert"', + ], + by_labels=[], # override default by instance. + ), + legend_format="index_insert", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_bytes_evict"', + ], + by_labels=[], # override default by instance. + ), + legend_format="index_evict", + ), + ], + ), + graph_panel( + title="Block cache hit", + description="The hit rate of block cache", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="all", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="data", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="filter", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="index", + ), + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="bloom_prefix_useful"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_sum_rate( + "tikv_engine_bloom_efficiency", + label_selectors=[ + 'db="$db"', + 'type="bloom_prefix_checked"', + ], + by_labels=[], # override default by instance. + ), + ), + legend_format="bloom prefix", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Keys flow", + description="The flow of different kinds of operations on keys", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="keys_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="read", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="keys_written"', + ], + by_labels=[], # override default by instance. + ), + legend_format="written", + ), + target( + expr=expr_sum_rate( + "tikv_engine_compaction_num_corrupt_keys", + label_selectors=[ + 'db="$db"', + ], + by_labels=[], # override default by instance. + ), + legend_format="corrupt", + ), + ], + ), + graph_panel( + title="Block cache operations", + description="The count of different kinds of block cache operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="total_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_data_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="data_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_filter_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="filter_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_index_add"', + ], + by_labels=[], # override default by instance. + ), + legend_format="index_add", + ), + target( + expr=expr_sum_rate( + "tikv_engine_cache_efficiency", + label_selectors=[ + 'db="$db"', + 'type="block_cache_add_failures"', + ], + by_labels=[], # override default by instance. + ), + legend_format="add_failures", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Read flow", + description="The flow rate of read operations per type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="get", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="iter_bytes_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="scan", + ), + ], + ), + graph_panel( + title="Total keys", + description="The count of keys in each column family", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum( + "tikv_engine_estimate_num_keys", + label_selectors=[ + 'db="$db"', + ], + by_labels=["cf"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Write flow", + description="The flow of different kinds of write operations", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="wal_file_bytes"', + ], + by_labels=[], # override default by instance. + ), + legend_format="wal", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_written"', + ], + by_labels=[], # override default by instance. + ), + legend_format="write", + ), + ], + ), + graph_panel( + title="Bytes / Read", + description="The bytes per read", + yaxes=yaxes(left_format=UNITS.BYTES_IEC, log_base=10), + targets=[ + target( + expr=expr_max( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_read", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_read_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Compaction flow", + description="The flow rate of compaction operations per type", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_compaction_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_read"', + ], + by_labels=[], # override default by instance. + ), + legend_format="read", + ), + target( + expr=expr_sum_rate( + "tikv_engine_compaction_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="bytes_written"', + ], + by_labels=[], # override default by instance. + ), + legend_format="written", + ), + target( + expr=expr_sum_rate( + "tikv_engine_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="flush_write_bytes"', + ], + by_labels=[], # override default by instance. + ), + legend_format="flushed", + ), + ], + ), + graph_panel( + title="Bytes / Write", + description="The bytes per write", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_max( + "tikv_engine_bytes_per_write", + label_selectors=['db="$db"', 'type="bytes_per_write_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_write", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_write_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_write", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_write_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_bytes_per_write", + label_selectors=[ + 'db="$db"', + 'type="bytes_per_write_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Read amplification", + description="The read amplification per TiKV instance", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_read_amp_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="read_amp_total_read_bytes"', + ], + ), + "/", + expr_sum_rate( + "tikv_engine_read_amp_flow_bytes", + label_selectors=[ + 'db="$db"', + 'type="read_amp_estimate_useful_bytes"', + ], + ), + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Compaction pending bytes", + description="The pending bytes to be compacted", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_pending_compaction_bytes", + label_selectors=['db="$db"'], + by_labels=["cf"], + ), + legend_format="{{cf}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Number of snapshots", + description="The number of snapshot of each TiKV instance", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_simple( + "tikv_engine_num_snapshots", + label_selectors=['db="$db"'], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Compression ratio", + description="The compression ratio of each level", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "tikv_engine_compression_ratio", + label_selectors=['db="$db"'], + by_labels=["cf", "level"], + ), + legend_format="{{cf}}-L{{level}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Number files at each level", + description="The number of SST files for different column families in each level", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "tikv_engine_num_files_at_level", + label_selectors=['db="$db"'], + by_labels=["cf", "level"], + ), + legend_format="{{cf}}-L{{level}}", + ), + ], + ), + graph_panel( + title="Oldest snapshots duration", + description="The time that the oldest unreleased snapshot survivals", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_simple( + "tikv_engine_oldest_snapshot_duration", + label_selectors=['db="$db"'], + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Stall conditions changed of each CF", + description="Stall conditions changed of each column family", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_simple( + "tikv_engine_stall_conditions_changed", + label_selectors=['db="$db"'], + ), + legend_format="{{instance}}-{{cf}}-{{type}}", + ), + ], + ), + graph_panel_histogram_quantiles( + title="Ingest SST duration seconds", + description="The time consumed when ingesting SST files", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_snapshot_ingest_sst_duration_seconds", + label_selectors=['db="$db"'], + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Write Stall Reason", + description=None, + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_write_stall_reason", + label_selectors=['db="$db"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Write stall duration", + description="The time which is caused by write stall", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_max( + "tikv_engine_write_stall", + label_selectors=['db="$db"', 'type="write_stall_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=[ + 'db="$db"', + 'type="write_stall_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=[ + 'db="$db"', + 'type="write_stall_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_write_stall", + label_selectors=['db="$db"', 'type="write_stall_average"'], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Ingestion picked level", + description="The level that the external file ingests into", + yaxis=yaxis(format=UNITS.SHORT), + metric="tikv_engine_ingestion_picked_level_bucket", + label_selectors=['db="$db"'], + ), + graph_panel( + title="Memtable size", + description="The memtable size of each column family", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_memory_bytes", + label_selectors=['db="$db"', 'type="mem-tables-all"'], + by_labels=["cf"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def RaftEngine() -> RowPanel: + layout = Layout(title="Raft Engine") + layout.row( + [ + graph_panel( + title="Operation", + description="The count of operations per second", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "raft_engine_write_apply_duration_seconds_count", + by_labels=[], # override default by instance. + ), + legend_format="write", + ), + target( + expr=expr_sum_rate( + "raft_engine_read_entry_duration_seconds_count", + by_labels=[], # override default by instance. + ), + legend_format="read_entry", + ), + target( + expr=expr_sum_rate( + "raft_engine_read_message_duration_seconds_count", + by_labels=[], # override default by instance. + ), + legend_format="read_message", + ), + ], + ), + graph_panel_histogram_quantiles( + title="Write Duration", + description="The time used in write operation", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="raft_engine_write_duration_seconds", + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel( + title="Flow", + description="The I/O flow rate", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "raft_engine_write_size_sum", + by_labels=[], # override default by instance. + ), + legend_format="write", + ), + target( + expr=expr_sum_rate( + "raft_engine_background_rewrite_bytes_sum", + by_labels=["type"], + ), + legend_format="rewrite-{{type}}", + ), + ], + ), + graph_panel( + title="Write Duration Breakdown (99%)", + description="99% duration breakdown of write operation", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, "raft_engine_write_preprocess_duration_seconds" + ), + legend_format="wait", + ), + target( + expr=expr_histogram_quantile( + 0.99, "raft_engine_write_leader_duration_seconds" + ), + legend_format="wal", + ), + target( + expr=expr_histogram_quantile( + 0.99, "raft_engine_write_apply_duration_seconds" + ), + legend_format="apply", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Bytes / Written", + description="The bytes per write", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + metric="raft_engine_write_size", + hide_count=True, + ), + graph_panel( + title="WAL Duration Breakdown (999%)", + description="999% duration breakdown of WAL write operation", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_write_leader_duration_seconds" + ), + legend_format="total", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_sync_log_duration_seconds" + ), + legend_format="sync", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_allocate_log_duration_seconds" + ), + legend_format="allocate", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_rotate_log_duration_seconds" + ), + legend_format="rotate", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="File Count", + description="The average number of files", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "raft_engine_log_file_count", + by_labels=["type"], + ), + ), + target( + expr=expr_avg( + "raft_engine_swap_file_count", + by_labels=[], # override default by instance. + ), + legend_format="swap", + ), + target( + expr=expr_avg( + "raft_engine_recycled_file_count", + by_labels=["type"], + ), + legend_format="{{type}}-recycle", + ), + ], + ), + graph_panel( + title="Other Durations (99%)", + description="The 99% duration of operations other than write", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_read_entry_duration_seconds" + ), + legend_format="read_entry", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_read_message_duration_seconds" + ), + legend_format="read_message", + ), + target( + expr=expr_histogram_quantile( + 0.999, "raft_engine_purge_duration_seconds" + ), + legend_format="purge", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Entry Count", + description="The average number of log entries", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_avg( + "raft_engine_log_entry_count", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Titan() -> RowPanel: + layout = Layout(title="Titan", repeat="titan_db") + layout.row( + [ + graph_panel( + title="Blob file count", + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_num_live_blob_file", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="live blob file num", + ), + target( + expr=expr_sum( + "tikv_engine_titandb_num_obsolete_blob_file", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="obsolete blob file num", + ), + ], + ), + graph_panel( + title="Blob file size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_live_blob_file_size", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="live blob file size", + ), + target( + expr=expr_sum( + "tikv_engine_titandb_obsolete_blob_file_size", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="obsolete blob file size", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Live blob size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_live_blob_size", + label_selectors=['db="$titan_db"'], + ), + legend_format="live blob size", + ), + ], + ), + graph_panel( + title="Blob cache hit", + description="The hit rate of block cache", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_engine_blob_cache_efficiency", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "/", + expr_operator( + expr_sum_rate( + "tikv_engine_blob_cache_efficiency", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_cache_hit"', + ], + by_labels=[], # override default by instance. + ), + "+", + expr_sum_rate( + "tikv_engine_blob_cache_efficiency", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_cache_miss"', + ], + by_labels=[], # override default by instance. + ), + ), + ), + legend_format="all", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Iter touched blob file count", + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_iter_touch_blob_file_count", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_iter_touch_blob_file_count_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob cache size", + description="The blob cache size.", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_topk( + 20, + "%s" + % expr_avg( + "tikv_engine_blob_cache_size_bytes", + label_selectors=['db="$titan_db"'], + by_labels=["cf", "instance"], + ), + ), + legend_format="{{instance}}-{{cf}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob key size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_key_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_key_size_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob value size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_value_size", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_value_size_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob get operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_get"', + ], + by_labels=[], # override default by instance. + ), + legend_format="get", + ), + ], + ), + graph_panel( + title="Blob get duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_get_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=["type"], + ), + legend_format="avg-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_get_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_get_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%-{{type}}", + ), + target( + expr=expr_max( + "tikv_engine_blob_get_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=["type"], + ), + legend_format="max-{{type}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob file discardable ratio distribution", + targets=[ + target( + expr=expr_sum( + "tikv_engine_titandb_blob_file_discardable_ratio", + label_selectors=['db="$titan_db"'], + by_labels=["ratio"], + ), + ), + ], + ), + graph_panel( + title="Blob iter operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_seek"', + ], + by_labels=[], # override default by instance. + ), + legend_format="seek", + ), + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_prev"', + ], + by_labels=[], # override default by instance. + ), + legend_format="prev", + ), + target( + expr=expr_sum_rate( + "tikv_engine_blob_locate", + label_selectors=[ + 'db="$titan_db"', + 'type="number_blob_next"', + ], + by_labels=[], # override default by instance. + ), + legend_format="next", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob seek duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_seek_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob next duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_next_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_next_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_next_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_next_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob prev duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_average"'], + by_labels=["type"], + ), + legend_format="avg-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%-{{type}}", + ), + target( + expr=expr_avg( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type=~".*_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%-{{type}}", + ), + target( + expr=expr_max( + "tikv_engine_blob_prev_micros_seconds", + label_selectors=['db="$titan_db"', 'type=~".*_max"'], + by_labels=["type"], + ), + legend_format="max-{{type}}", + ), + ], + ), + graph_panel( + title="Blob keys flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"keys.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob file read duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_file_read_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_read_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob bytes flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"bytes.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob file write duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_file_write_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_write_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob file sync operations", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_file_synced", + label_selectors=['db="$titan_db"'], + by_labels=[], # override default by instance. + ), + legend_format="sync", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC action", + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_action_count", + label_selectors=['db="$titan_db"'], + by_labels=["type"], + ), + ), + ], + ), + graph_panel( + title="Blob file sync duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_file_sync_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_file_sync_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC duration", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_average"', + ], + by_labels=["type"], + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_percentile95"', + ], + by_labels=["type"], + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_percentile99"', + ], + by_labels=["type"], + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_gc_micros_seconds", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_micros_max"', + ], + by_labels=["type"], + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob GC keys flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"keys.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC input file size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_gc_input_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_input_file_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob GC bytes flow", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_flow_bytes", + label_selectors=['db="$titan_db"', 'type=~"bytes.*"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Blob GC output file size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_average"', + ], + by_labels=[], # override default by instance. + ), + legend_format="avg", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_percentile95"', + ], + by_labels=[], # override default by instance. + ), + legend_format="95%", + ), + target( + expr=expr_avg( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_percentile99"', + ], + by_labels=[], # override default by instance. + ), + legend_format="99%", + ), + target( + expr=expr_max( + "tikv_engine_blob_gc_output_file", + label_selectors=[ + 'db="$titan_db"', + 'type="blob_gc_output_file_max"', + ], + by_labels=[], # override default by instance. + ), + legend_format="max", + ), + ], + ), + graph_panel( + title="Blob GC file count", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_engine_blob_gc_file_count", + label_selectors=['db="$titan_db"'], + by_labels=["type"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def PessimisticLocking() -> RowPanel: + layout = Layout(title="Pessimistic Locking") + layout.row( + [ + graph_panel( + title="Lock Manager Thread CPU", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"waiter_manager.*"'], + by_labels=["instance", "name"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"deadlock_detect.*"'], + by_labels=["instance", "name"], + ), + ), + ], + ), + graph_panel( + title="Lock Manager Handled tasks", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_lock_manager_task_counter", + by_labels=["type"], + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Waiter lifetime duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_lock_manager_waiter_lifetime_duration", + hide_count=True, + ), + graph_panel( + title="Lock Waiting Queue", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_lock_manager_wait_table_status", + "max", + "30s", + by_labels=["type"], + ), + ), + target( + expr=expr_sum_aggr_over_time( + "tikv_lock_wait_queue_entries_gauge_vec", + "max", + "30s", + by_labels=["type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Deadlock detect duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_lock_manager_detect_duration", + hide_count=True, + ), + graph_panel( + title="Detect error", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_lock_manager_error_counter", by_labels=["type"] + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Deadlock detector leader", + targets=[ + target( + expr=expr_sum_aggr_over_time( + "tikv_lock_manager_detector_leader_heartbeat", + "max", + "30s", + ), + ) + ], + ), + graph_panel( + title="Total pessimistic locks memory size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_simple("tikv_pessimistic_lock_memory_size"), + legend_format="{{instance}}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="In-memory pessimistic locking result", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_in_memory_pessimistic_locking", by_labels=["result"] + ), + ) + ], + ), + graph_panel( + title="Pessimistic lock activities", + description="The number of active keys and waiters.", + targets=[ + target( + expr=expr_sum( + "tikv_lock_wait_queue_entries_gauge_vec", by_labels=["type"] + ), + ) + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Lengths of lock wait queues when transaction enqueues", + description="The length includes the entering transaction itself", + yaxis=yaxis(format=UNITS.SHORT), + metric="tikv_lock_wait_queue_length_bucket", + ) + ] + ) + return layout.row_panel + + +def PointInTimeRestore() -> RowPanel: + layout = Layout(title="Point In Time Restore") + layout.row( + [ + graph_panel( + title="CPU Usage", + description=None, + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"sst_.*"', + ], + ), + ), + ], + ), + graph_panel( + title="P99 RPC Duration", + description=None, + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=1), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_import_rpc_duration", + label_selectors=[ + 'request="apply"', + ], + ), + legend_format="total-99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_import_apply_duration", + label_selectors=[ + 'type=~"queue|exec_download"', + ], + by_labels=["le", "type"], + ), + legend_format="(DL){{type}}-99", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_import_engine_request", + by_labels=["le", "type"], + ), + legend_format="(AP){{type}}-99", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import RPC Ops", + description="", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_rpc_duration_count", + label_selectors=[ + 'request="apply"', + ], + by_labels=["instance", "request"], + ), + ), + target( + expr=expr_sum_rate( + "tikv_import_rpc_duration_count", + label_selectors=[ + 'request!="switch_mode"', + ], + by_labels=["request"], + ), + legend_format="total-{{request}}", + ), + ], + ), + graph_panel( + title="Cache Events", + description=None, + yaxes=yaxes(left_format=UNITS.COUNTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_apply_cache_event", + label_selectors=[], + by_labels=["type", "instance"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Overall RPC Duration", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_rpc_duration_bucket", + label_selectors=[ + 'request="apply"', + ], + ), + heatmap_panel( + title="Read File into Memory Duration", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_apply_duration_bucket", + label_selectors=[ + 'type="exec_download"', + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Queuing Time", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_engine_request_bucket", + label_selectors=[ + 'type="queuing"', + ], + ), + graph_panel( + title="Apply Request Throughput", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_apply_bytes_sum", + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Downloaded File Size", + description=None, + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_import_download_bytes_bucket", + ), + heatmap_panel( + title="Apply Batch Size", + description=None, + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_import_apply_bytes_bucket", + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Blocked by Concurrency Time", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_engine_request_bucket", + label_selectors=[ + 'type="get_permit"', + ], + ), + graph_panel( + title="Apply Request Speed", + description=None, + yaxes=yaxes( + left_format=UNITS.OPS_PER_SEC, + log_base=1, + ), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_applier_event", + label_selectors=[ + 'type="begin_req"', + ], + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Cached File in Memory", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC, log_base=1), + targets=[ + target( + expr=expr_sum("tikv_import_apply_cached_bytes"), + ), + ], + ), + graph_panel( + title="Engine Requests Unfinished", + description=None, + yaxes=yaxes( + left_format=UNITS.SHORT, + log_base=1, + ), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_applier_event", + label_selectors=[ + 'type!="begin_req"', + ], + by_labels=["instance", "type"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Apply Time", + description=None, + yaxis=yaxis(format=UNITS.SECONDS, log_base=1), + metric="tikv_import_engine_request_bucket", + label_selectors=[ + 'type="apply"', + ], + ), + graph_panel( + title="Raft Store Memory Usage", + description="", + yaxes=yaxes(left_format=UNITS.BYTES_IEC, log_base=1), + targets=[ + target( + expr=expr_sum( + "tikv_server_mem_trace_sum", + label_selectors=[ + 'name=~"raftstore-.*"', + ], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def ResolvedTS() -> RowPanel: + layout = Layout(title="Resolved TS") + layout.row( + [ + graph_panel( + title="Resolved TS Worker CPU", + description="The CPU utilization of resolved ts worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"resolved_ts.*"', + ], + ), + ) + ], + ), + graph_panel( + title="Advance ts Worker CPU", + description="The CPU utilization of advance ts worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"advance_ts.*"', + ], + ), + ) + ], + ), + graph_panel( + title="Scan lock Worker CPU", + description="The CPU utilization of scan lock worker", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"inc_scan.*"', + ], + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Max gap of resolved-ts", + description="The gap between resolved ts (the maximum candidate of safe-ts) and current time.", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_resolved_ts_gap_millis", + ), + ) + ], + ), + graph_panel( + title="Max gap of follower safe-ts", + description="The gap between now() and the minimal (non-zero) safe ts for followers", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_follower_safe_ts_gap_millis", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Min Resolved TS Region", + description="The region that has minimal resolved ts", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_resolved_ts_region", + ), + ) + ], + ), + graph_panel( + title="Min Safe TS Follower Region", + description="The region id of the follower that has minimal safe ts", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_follower_safe_ts_region", + ), + ) + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Check leader duration", + description="The time consumed when handle a check leader request", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_resolved_ts_check_leader_duration_seconds_bucket", + ), + graph_panel( + title="Max gap of resolved-ts in region leaders", + description="The gap between resolved ts of leaders and current time", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_leader_resolved_ts_gap_millis", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% CheckLeader request region count", + description="Bucketed histogram of region count in a check leader request", + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_item_count", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ) + ], + ), + heatmap_panel( + title="Initial scan backoff duration", + description="The backoff duration before starting initial scan", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket", + ), + ] + ) + layout.row( + [ + graph_panel( + title="Lock heap size", + description="Total bytes in memory of resolved-ts observe regions's lock heap", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_resolved_ts_lock_heap_bytes", + ), + ) + ], + ), + graph_panel( + title="Min Leader Resolved TS Region", + description="The region that its leader has minimal resolved ts.", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_leader_resolved_ts_region", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Observe region status", + description="The status of resolved-ts observe regions", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_region_resolve_status", + by_labels=["type"], + ), + ) + ], + ), + graph_panel( + title="Fail advance ts count", + description="The count of fail to advance resolved-ts", + targets=[ + target( + expr=expr_sum_delta( + "tikv_resolved_ts_fail_advance_count", + by_labels=["instance", "reason"], + ), + ), + target( + expr=expr_sum_delta( + "tikv_raftstore_check_stale_peer", + by_labels=["instance"], + ), + legend_format="{{instance}}-stale-peer", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="99% CheckLeader request size", + description="Bucketed histogram of the check leader request size", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_size_bytes", + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_item_count", + by_labels=["instance"], + ), + legend_format="{{instance}}-check-num", + ), + ], + ), + graph_panel( + title="Pending command size", + description="Total bytes of pending commands in the channel", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_avg( + "tikv_resolved_ts_channel_penging_cmd_bytes_total", + ), + ) + ], + ), + ] + ) + return layout.row_panel + + +def Memory() -> RowPanel: + layout = Layout(title="Memory") + layout.row( + [ + graph_panel( + title="Allocator Stats", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum( + "tikv_allocator_stats", by_labels=["instance", "type"] + ) + ) + ], + ), + graph_panel( + title="Send Allocated(+) / Release Received(-) Bytes Rate", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_operator( + expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="alloc"'], + by_labels=["thread_name"], + ), + "-", + expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="dealloc"'], + by_labels=["thread_name"], + ), + ), + legend_format="{{thread_name}}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Newly Allocated Bytes by Thread", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="alloc"'], + by_labels=["thread_name"], + ), + ) + ], + ), + graph_panel( + title="Recently Released Bytes by Thread", + description=None, + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_allocator_thread_allocation", + label_selectors=['type="dealloc"'], + by_labels=["thread_name"], + ), + ) + ], + ), + ] + ) + return layout.row_panel + + +def BackupImport() -> RowPanel: + layout = Layout(title="Backup & Import") + layout.row( + [ + graph_panel( + title="Backup CPU Utilization", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"b.*k.*w.*k.*"', + ], + ), + legend_format="backup-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"backup_io"', + ], + ), + legend_format="backup-io-{{instance}}", + ), + target( + expr=expr_simple( + "tikv_backup_softlimit", + ), + legend_format="backup-auto-throttle-{{instance}}", + ), + ], + ), + graph_panel( + title="Backup Thread Count", + targets=[ + target( + expr=expr_sum( + "tikv_backup_thread_pool_size", + ), + ), + ], + ), + graph_panel( + title="Backup Errors", + description="", + targets=[ + target( + expr=expr_sum_delta( + "tikv_backup_error_counter", + by_labels=["instance", "error"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Backup Write CF SST Size", + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_backup_range_size_bytes_bucket", + label_selectors=['cf="write"'], + ), + heatmap_panel( + title="Backup Default CF SST Size", + yaxis=yaxis(format=UNITS.BYTES_IEC), + metric="tikv_backup_range_size_bytes_bucket", + label_selectors=['cf="default"'], + ), + graph_panel( + title="Backup SST Generation Throughput", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_backup_range_size_bytes_sum", + by_labels=[], # override default by instance. + ), + legend_format="total", + ), + target( + expr=expr_sum_rate( + "tikv_backup_range_size_bytes_sum", + by_labels=["instance", "cf"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Backup Scan SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_backup_range_duration_seconds_bucket", + label_selectors=['type="snapshot"'], + ), + heatmap_panel( + title="Backup Scan SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_backup_range_duration_seconds_bucket", + label_selectors=['type="scan"'], + ), + heatmap_panel( + title="Backup Save SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_backup_range_duration_seconds_bucket", + label_selectors=['type=~"save.*"'], + ), + graph_panel( + title="Backup SST Duration", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.999, + "tikv_backup_range_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}-99.9%", + ), + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_backup_range_duration_seconds", + by_labels=["type"], + ), + legend_format="{{type}}-99%", + ), + target( + expr=expr_operator( + expr_sum( + "tikv_backup_range_duration_seconds_sum", + by_labels=["type"], + ), + "/", + expr_sum( + "tikv_backup_range_duration_seconds_count", + by_labels=["type"], + ), + ), + legend_format="{{type}}-avg", + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="External Storage Create Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_external_storage_create_seconds_bucket", + ), + graph_panel_histogram_quantiles( + title="External Storage Create Duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_external_storage_create_seconds", + hide_avg=True, + hide_count=True, + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Checksum Request Duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_coprocessor_request_duration_seconds", + label_selectors=['req=~"analyze.*|checksum.*"'], + by_labels=["req"], + hide_avg=True, + hide_count=True, + ), + graph_panel( + title="IO Utilization", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "node_disk_io_time_seconds_total", + by_labels=["instance", "device"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import CPU Utilization", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + by_labels=["instance"], + ), + legend_format="import-{{instance}}", + ), + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + by_labels=["instance", "tid"], + ).extra("> 0"), + legend_format="import-{{instance}}-{{tid}}", + hide=True, + ), + target( + expr=expr_count_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + ), + legend_format="import-count-{{instance}}", + hide=True, + ), + ], + ), + graph_panel( + title="Import Thread Count", + targets=[ + target( + expr=expr_count_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=['name=~"sst_.*"'], + by_labels=["instance"], + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Import Errors", + targets=[ + target( + expr=expr_sum_delta( + "tikv_import_error_counter", + by_labels=["type", "error", "instance"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel_histogram_quantiles( + title="Import RPC Duration", + description="", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_import_rpc_duration", + by_labels=["request"], + hide_count=True, + ), + graph_panel( + title="Import RPC Ops", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_rpc_duration_count", + label_selectors=['request!="switch_mode"'], + by_labels=["request"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Import Write/Download RPC Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_rpc_duration_bucket", + label_selectors=['request=~"download|write"'], + ), + heatmap_panel( + title="Import Wait Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_download_duration_bucket", + label_selectors=['type="queue"'], + ), + heatmap_panel( + title="Import Read SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_download_duration_bucket", + label_selectors=['type="read"'], + ), + heatmap_panel( + title="Import Rewrite SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_download_duration_bucket", + label_selectors=['type="rewrite"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Import Ingest RPC Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_rpc_duration_bucket", + label_selectors=['request=~"ingest"'], + ), + heatmap_panel( + title="Import Ingest SST Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_ingest_duration_bucket", + label_selectors=['type=~"ingest"'], + ), + heatmap_panel( + title="Import Ingest SST Bytes", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_import_ingest_byte_bucket", + ), + graph_panel( + title="Import Download SST Throughput", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_download_bytes_sum", + ), + ), + target( + expr=expr_sum_rate( + "tikv_import_download_bytes_sum", + by_labels=[], + ), + legend_format="total", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Import Local Write keys", + targets=[ + target( + expr=expr_sum_delta( + "tikv_import_local_write_keys", + by_labels=["type", "instance"], + ), + ), + ], + ), + graph_panel( + title="Import Local Write bytes", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_import_local_write_bytes", + by_labels=["type", "instance"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="TTL Expired", + targets=[ + target( + expr=expr_sum( + "tikv_backup_raw_expired_count", + ), + ), + target( + expr=expr_sum( + "tikv_backup_raw_expired_count", + by_labels=[], + ), + legend_format="sum", + ), + ], + ), + graph_panel( + title="cloud request", + description="", + yaxes=yaxes(left_format=UNITS.SHORT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_cloud_request_duration_seconds_count", + by_labels=["cloud", "req"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +def Encryption() -> RowPanel: + layout = Layout(title="Encryption") + layout.row( + [ + graph_panel( + title="Encryption data keys", + description="Total number of encryption data keys in use", + targets=[ + target( + expr=expr_sum( + "tikv_encryption_data_key_storage_total", + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Encrypted files", + description="Number of files being encrypted", + targets=[ + target( + expr=expr_sum( + "tikv_encryption_file_num", + ), + legend_format="{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Encryption initialized", + description="Flag to indicate if encryption is initialized", + targets=[ + target( + expr=expr_simple( + "tikv_encryption_is_initialized", + ), + legend_format="{{instance}}", + ), + ], + ), + graph_panel( + title="Encryption meta files size", + description="Total size of encryption meta files", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_simple( + "tikv_encryption_meta_file_size_bytes", + ), + legend_format="{{name}}-{{instance}}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Encrypt/decrypt data nanos", + description="", + targets=[ + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=[ + 'metric="encrypt_data_nanos"', + ], + by_labels=["req"], + ), + legend_format="encrypt-{{req}}", + ), + target( + expr=expr_sum_rate( + "tikv_coprocessor_rocksdb_perf", + label_selectors=[ + 'metric="decrypt_data_nanos"', + ], + by_labels=["req"], + ), + legend_format="decrypt-{{req}}", + ), + ], + ), + graph_panel_histogram_quantiles( + title="Read/write encryption meta duration", + description="Writing or reading file duration (second)", + yaxes=yaxes(left_format=UNITS.SECONDS), + metric="tikv_encryption_write_read_file_duration_seconds", + hide_count=True, + ), + ] + ) + return layout.row_panel + + +def BackupLog() -> RowPanel: + layout = Layout(title="Backup Log") + layout.row( + [ + stat_panel( + title="Endpoint Status", + targets=[ + target( + expr=expr_simple("tikv_log_backup_enabled"), + legend_format="{{ instance }}", + ), + ], + mappings=[ + StatValueMappings( + StatValueMappingItem("Disabled", "0", "red"), + StatValueMappingItem("Enabled", "1", "green"), + ), + ], + ), + stat_panel( + title="Task Status", + targets=[ + target( + expr=expr_min("tikv_log_backup_task_status"), + ), + ], + mappings=[ + StatValueMappings( + StatValueMappingItem("Running", "0", "green"), + StatValueMappingItem("Paused", "1", "yellow"), + StatValueMappingItem("Error", "2", "red"), + ), + ], + ), + stat_panel( + title="Advancer Owner", + text_mode="name", + targets=[ + target( + expr="tidb_log_backup_advancer_owner > 0", + legend_format="{{ instance }}", + ), + ], + ), + stat_panel( + title="Average Flush Size", + description="The average flush size of last 30mins.", + format=UNITS.BYTES_IEC, + targets=[ + target( + expr=expr_operator( + expr_sum_increase( + "tikv_log_backup_flush_file_size_sum", + range_selector="30m", + ), + "/", + expr_sum_increase( + "tikv_log_backup_flush_duration_sec_count", + label_selectors=['stage=~"save_files"'], + range_selector="30m", + ), + ), + legend_format="{{ instance }}", + ), + ], + ), + ] + ) + layout.row( + [ + stat_panel( + title="Flushed Files (Last 30m) Per Host", + description="The current total flushed file number of this run.", + decimals=0, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_file_size_count", + range_selector="30m", + ).extra("> 0"), + ), + ], + ), + stat_panel( + title="Flush Times (Last 30m)", + description="This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + decimals=0, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_duration_sec_count", + range_selector="30m", + label_selectors=['stage=~"save_files"'], + ), + ), + ], + ), + stat_panel( + title="Total Flushed Size (Last 30m)", + description="This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + format=UNITS.BYTES_IEC, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_file_size_sum", + range_selector="30m", + ), + ), + ], + ), + stat_panel( + title="Flush Files (Last 30m)", + description="This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + decimals=0, + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_flush_file_size_count", + range_selector="30m", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="CPU Usage", + description="The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + yaxes=yaxes(left_format=UNITS.PERCENT_UNIT), + targets=[ + target( + expr=expr_sum_rate( + "tikv_thread_cpu_seconds_total", + label_selectors=[ + 'name=~"backup_stream|log-backup-scan(-[0-9]+)?"' + ], + ), + ) + ], + ), + graph_panel( + title="Handle Event Rate", + description="", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_handle_kv_batch_sum", + ), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Initial Scan Generate Event Throughput", + description="The data rate of initial scanning emitting events.", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_incremental_scan_bytes_sum", + ), + ) + ], + ), + graph_panel( + title="Abnormal Checkpoint TS Lag", + description=None, + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + targets=[ + target( + expr=expr_operator( + "time() * 1000", + "-", + expr_max( + "tidb_log_backup_last_checkpoint", by_labels=["task"] + ).extra("/ 262144 > 0"), + ), + legend_format="{{ task }}", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Memory Of Events", + description="The estimated memory usage by the streaming backup module.", + yaxes=yaxes(left_format=UNITS.BYTES_IEC), + targets=[ + target( + expr=expr_sum("tikv_log_backup_heap_memory"), + ) + ], + ), + graph_panel( + title="Observed Region Count", + description="", + targets=[ + target( + expr=expr_sum("tikv_log_backup_observed_region"), + ), + target( + expr=expr_sum( + "tikv_log_backup_observed_region", + ), + legend_format="{{instance}}-total", + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Errors", + description="The errors met when backing up.\n**They are retryable, don't worry.**", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_errors", + range_selector="1m", + by_labels=["type", "instance"], + ), + ), + ], + ), + graph_panel( + title="Fatal Errors", + description="The errors met when backing up.", + yaxes=yaxes(left_format=UNITS.OPS_PER_MIN), + targets=[ + target( + expr=expr_sum_delta( + "tikv_log_backup_fatal_errors", + range_selector="1m", + by_labels=["type", "instance"], + ), + ), + ], + ), + graph_panel( + title="Checkpoint TS of Tasks", + description=None, + yaxes=yaxes(left_format=UNITS.DATE_TIME_ISO_TODAY), + null_point_mode=NULL_AS_NULL, + targets=[ + target( + expr=expr_max( + "tidb_log_backup_last_checkpoint", by_labels=["task"] + ).extra("/ 262144 > 0"), + ), + target(expr="time() * 1000", legend_format="Current Time"), + ], + series_overrides=[ + series_override( + alias="Current Time", + fill=0, + dashes=True, + ), + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Flush Duration", + description="The duration of flushing a batch of file.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_flush_duration_sec_bucket", + label_selectors=['stage=~"save_files"'], + ), + heatmap_panel( + title="Initial scanning duration", + description="The duration of scanning the initial data from local DB and transform them into apply events.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_initial_scan_duration_sec_bucket", + ), + heatmap_panel( + title="Convert Raft Event duration", + description="The duration of converting a raft request into a apply event.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_event_handle_duration_sec_bucket", + label_selectors=['stage=~"to_stream_event"'], + ), + heatmap_panel( + title="Wait for Lock Duration", + description="The duration of waiting the mutex of the controller.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_event_handle_duration_sec_bucket", + label_selectors=['stage=~"get_router_lock"'], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Command Batch Size", + description="The number of KV-modify of each raft command observed.", + yaxis=yaxis(format=UNITS.SHORT), + metric="tikv_log_backup_handle_kv_batch_bucket", + ), + heatmap_panel( + title="Save to Temp File Duration", + description="The total cost of saving an event into temporary file.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_event_handle_duration_sec_bucket", + label_selectors=['stage=~"save_to_temp_file"'], + ), + heatmap_panel( + title="Write to Temp File Duration", + description="The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_on_event_duration_seconds_bucket", + label_selectors=['stage="write_to_tempfile"'], + ), + heatmap_panel( + title="System Write Call Duration", + description="The duration of collecting metadata and call the UNIX system call *write* for each event.", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_log_backup_on_event_duration_seconds_bucket", + label_selectors=['stage="syscall_write"'], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Internal Message Type", + description="The internal message type count.", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC, log_base=2), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_interal_actor_acting_duration_sec_count", + by_labels=["message"], + ), + ) + ], + ), + graph_panel( + title="Internal Message Handling Duration (P99)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tikv_log_backup_interal_actor_acting_duration_sec", + by_labels=["message"], + ), + legend_format="{{message}}", + ) + ], + ), + graph_panel( + title="Internal Message Handling Duration (P90)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9, + "tikv_log_backup_interal_actor_acting_duration_sec", + by_labels=["message"], + ), + legend_format="{{message}}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Initial Scan RocksDB Throughput", + description="The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + yaxes=yaxes(left_format=UNITS.BYTES_SEC_IEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_initial_scan_operations", + label_selectors=['op=~"read_bytes"'], + by_labels=["cf"], + ), + ) + ], + ), + graph_panel( + title="Initial Scan RocksDB Operation", + description="Misc statistics of RocksDB during initial scanning.", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_initial_scan_operations", + label_selectors=['op!~"read_bytes"'], + by_labels=["cf", "op"], + ).extra("> 0"), + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Initial Scanning Trigger Reason", + description="The reason of triggering initial scanning.", + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_initial_scan_reason", + by_labels=["reason"], + ), + ) + ], + ), + graph_panel( + title="Region Checkpoint Key Putting", + description="", + yaxes=yaxes(left_format=UNITS.COUNTS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_log_backup_metadata_key_operation", + by_labels=["type"], + ), + ) + ], + ), + ] + ) + layout.row( + [ + heatmap_panel( + title="Request Checkpoint Batch Size", + metric="tidb_log_backup_advancer_batch_size_bucket", + label_selectors=['type="checkpoint"'], + ), + heatmap_panel( + title="Tick Duration", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tidb_log_backup_advancer_tick_duration_sec_bucket", + label_selectors=['step="tick"'], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Region Checkpoint Failure Reason", + description="The reason of advancer failed to be advanced.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_region_request_failure", + label_selectors=['reason!="retryable-scan-region"'], + by_labels=["reason"], + ), + ), + ], + ), + graph_panel( + title="Request Result", + description="The result of getting region checkpoints.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_region_request", + by_labels=["result"], + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Tick Duration (P99)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.99, + "tidb_log_backup_advancer_tick_duration_sec", + by_labels=["step"], + ), + legend_format="{{ step }}", + ) + ], + ), + graph_panel( + title="Tick Duration (P90)", + description="The internal handling message duration.", + yaxes=yaxes(left_format=UNITS.SECONDS), + targets=[ + target( + expr=expr_histogram_quantile( + 0.9, + "tidb_log_backup_advancer_tick_duration_sec", + by_labels=["step"], + ), + legend_format="{{ step }}", + ) + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="Get Region Operation Count", + description="The frequent of getting region level checkpoint.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_advancer_tick_duration_sec_count", + label_selectors=['step="get-regions-in-range"'], + by_labels=["step", "instance"], + ), + ) + ], + ), + graph_panel( + title="Try Advance Trigger Time", + description="The variant of checkpoint group.", + targets=[ + target( + expr=expr_sum_rate( + "tidb_log_backup_advancer_tick_duration_sec_count", + label_selectors=['step="try-advance"'], + by_labels=["step", "instance"], + ), + ) + ], + ), + ] + ) + return layout.row_panel + + +def SlowTrendStatistics() -> RowPanel: + layout = Layout(title="Slow Trend Statistics") + layout.row( + [ + graph_panel( + title="Slow Trend", + description="The changing trend of the slowness on I/O operations. 'value > 0' means the related store might have a slow trend.", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend", + ), + ), + ], + ), + graph_panel( + title="QPS Changing Trend", + description="The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend_result", + ), + ), + ], + ), + ] + ) + layout.row( + [ + graph_panel( + title="AVG Sampling Latency", + description="The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + yaxes=yaxes(left_format=UNITS.MICRO_SECONDS), + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend_l0", + ), + ), + ], + ), + graph_panel( + title="QPS of each store", + description="The QPS of each store.", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum( + "tikv_raftstore_slow_trend_result_value", + ), + ), + ], + ), + ] + ) + return layout.row_panel + + +#### Metrics Definition End #### + + +dashboard = Dashboard( + title="Test-Cluster-TiKV-Details", + uid="RDVQiEzZz", + timezone="browser", + refresh="1m", + inputs=[DATASOURCE_INPUT], + editable=True, + graphTooltip=GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, + templating=Templates(), + panels=[ + Duration(), + Cluster(), + Errors(), + Server(), + gRPC(), + ThreadCPU(), + TTL(), + PD(), + IOBreakdown(), + RaftWaterfall(), + RaftIO(), + RaftPropose(), + RaftProcess(), + RaftMessage(), + RaftAdmin(), + RaftLog(), + LocalReader(), + UnifiedReadPool(), + Storage(), + FlowControl(), + SchedulerCommands(), + Scheduler(), + GC(), + Snapshot(), + Task(), + CoprocessorOverview(), + CoprocessorDetail(), + Threads(), + RocksDB(), + RaftEngine(), + Titan(), + PessimisticLocking(), + PointInTimeRestore(), + ResolvedTS(), + Memory(), + BackupImport(), + Encryption(), + BackupLog(), + SlowTrendStatistics(), + ], +).auto_panel_ids() diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index f73a59cf377..c36a81d522a 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -1,122 +1,102 @@ { "__inputs": [ { - "name": "DS_TEST-CLUSTER", - "label": "test-cluster", "description": "", - "type": "datasource", + "label": "test-cluster", + "name": "DS_TEST-CLUSTER", "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "7.5.11" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "panel", - "id": "heatmap", - "name": "Heatmap", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" + "pluginName": "Prometheus", + "type": "datasource" } ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "${DS_TEST-CLUSTER}", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] + "list": [] }, + "description": "", "editable": true, "gnetId": null, "graphTooltip": 1, + "hideControls": false, "id": null, - "iteration": 1689914850671, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, "y": 0 }, - "id": 13620, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Write Duration Composition", + "description": "Write Pipeline Composition", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 12842, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -124,85 +104,115 @@ "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_append_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "Write Raft Log .99", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Propose Wait .99", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Apply Wait .99", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_commit_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Replicate Raft Log .99", - "refId": "D" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Apply Duration .99", - "refId": "E" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "hide": false, - "interval": "", - "legendFormat": "Raft Message Wait .99", - "refId": "F" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Pipeline Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -210,6 +220,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -218,6 +229,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -228,42 +240,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Read Duration Composition", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 12970, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -271,61 +297,85 @@ "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "Get Snapshot .99", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Cop Wait .99", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.95,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "Cop Handle .99", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.95,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Cop Read Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -333,6 +383,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -341,6 +392,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -351,54 +403,92 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Duration", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 1 + "y": 0 }, - "id": 2742, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The storage size per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 5, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 0, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -410,72 +500,57 @@ "values": true }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"used\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-used", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"kv_size\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-kv_size", - "refId": "B", - "step": 10, - "hide": true - }, - { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"raft_size\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-raft_size", - "refId": "C", - "step": 10, - "hide": true - }, - { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"import_size\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type = \"used\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-import_size", - "refId": "D", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type = \"used\"}\n \n)) by (instance) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -483,14 +558,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -501,38 +578,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The available capacity size of each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 5, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 8, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 1706, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -544,45 +633,57 @@ "values": true }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"available\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"available\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"available\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Available size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -590,14 +691,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -608,38 +711,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The capacity size per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 5, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 16, - "y": 1 + "y": 0 }, - "hiddenSeries": false, - "id": 1707, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -651,45 +766,57 @@ "values": true }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_store_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"capacity\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"capacity\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_store_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"capacity\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Capacity size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -697,14 +824,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -715,38 +844,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU usage of each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 9 + "y": 7 }, - "hiddenSeries": false, - "id": 1708, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -760,43 +901,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(process_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=~\".*tikv\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -804,14 +957,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -822,44 +977,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The memory usage per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 9 + "y": 7 }, - "hiddenSeries": false, - "id": 1709, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -867,43 +1034,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(process_resident_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=~\".*tikv\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memory", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -911,14 +1090,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -929,38 +1110,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The I/O utilization per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1710, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -974,43 +1167,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_io_time_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{device}}", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{device}}", + "metric": "", + "query": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "IO utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1018,14 +1223,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1036,44 +1243,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total bytes of read and write in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1711, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1081,52 +1300,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"kv\", type=\"wal_file_bytes\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-write", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"kv\", type=~\"bytes_read|iter_bytes_read\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"bytes_read|iter_bytes_read\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-read", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"bytes_read|iter_bytes_read\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "MBps", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1134,14 +1371,16 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1152,46 +1391,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The QPS per command in each TiKV instance", + "description": "The number of leaders on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 21 }, - "hiddenSeries": false, - "id": 1713, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1199,44 +1448,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance,type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", - "refId": "A", - "step": 10 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "QPS", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1244,14 +1504,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1262,34 +1524,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of the gRPC message failures", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 25 + "y": 21 }, - "hiddenSeries": false, - "id": 1712, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -1301,7 +1573,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1309,58 +1581,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_fail_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-grpc-msg-fail", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(delta(tikv_pd_heartbeat_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"noop\"}[1m])) by (instance) < 1", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"noop\"}\n [$__rate_interval]\n)) by (instance) < 1", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-pd-heartbeat", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"noop\"}\n [$__rate_interval]\n)) by (instance) < 1", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_critical_error_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Errps", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1368,14 +1667,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1386,44 +1687,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe number of leaders on each TiKV instance", + "description": "The number of leaders on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 1715, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1431,57 +1744,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"leader\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 - }, - { - "expr": "delta(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"leader\"}[30s]) < -10", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Leader", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1489,14 +1800,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1507,44 +1820,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Regions and Buckets on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 1714, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1552,52 +1877,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"region\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"buckets\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"buckets\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - buckets", - "refId": "B", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-buckets", + "metric": "", + "query": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"buckets\"}\n \n)) by (instance) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1605,62 +1948,76 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "decimals": null, + "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": null, "description": "TiKV uptime since the last restart", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 41 + "y": 35 }, - "hiddenSeries": false, - "id": 4106, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1668,43 +2025,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "(time() - process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=~\".*tikv\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Uptime", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1712,14 +2081,16 @@ }, "yaxes": [ { - "format": "dtdurations", - "label": "", + "decimals": null, + "format": "s", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1730,87 +2101,107 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Cluster", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 2 + "y": 0 }, - "id": 2743, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Critical error alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.0, + "yaxis": "left" + } + ] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 3 + "y": 0 }, - "hiddenSeries": false, - "id": 2741, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1818,28 +2209,39 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_critical_error_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_critical_error_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -1848,22 +2250,23 @@ "fill": true, "line": true, "op": "gt", - "value": 0, - "visible": true + "value": 0.0, + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Critical error", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -1871,14 +2274,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -1889,34 +2294,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Indicates occurrences of events that make the TiKV instance unavailable temporarily, such as Write Stall, Channel Full, Scheduler Busy, and Coprocessor Full", + "description": "\nIndicates occurrences of events that make the TiKV instance unavailable\ntemporarily, such as Write Stall, Channel Full, Scheduler Busy, and Coprocessor\nFull", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 10 + "y": 7 }, - "hiddenSeries": false, - "id": 1584, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -1928,7 +2343,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -1936,77 +2351,115 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_too_busy_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "scheduler-{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_channel_full_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "channelfull-{{instance}}-{{type}}", "metric": "", - "refId": "B", - "step": 4 + "query": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_coprocessor_request_error{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type='full'}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"full\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "coprocessor-{{instance}}", "metric": "", - "refId": "C", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"full\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write_stall_percentile99\", db=~\"$db\"}) by (instance, db)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write_stall_percentile99\",db=~\"$db\"}\n \n)) by (instance, db) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "stall-{{instance}}-{{db}}", - "refId": "D" + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write_stall_percentile99\",db=~\"$db\"}\n \n)) by (instance, db) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_write_msg_block_wait_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "store-write-channelfull-{{instance}}", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Server is busy", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2014,86 +2467,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "10s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "10s", - "handler": 1, - "message": "TiKV server report failures", - "name": "server report failures alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of reporting failure messages", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 10 + "y": 7 }, - "hiddenSeries": false, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2113,53 +2544,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_report_failure_msg_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,instance,store_id)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_report_failure_msg_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance, store_id) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}} - to - {{store_id}}", - "metric": "tikv_server_raft_store_msg_total", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0, - "visible": true + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}-to-{{store_id}}", + "metric": "", + "query": "sum(rate(\n tikv_server_report_failure_msg_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance, store_id) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Server report failures", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2167,14 +2600,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2185,34 +2620,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of different raftstore errors on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1718, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2232,44 +2677,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_engine_async_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", status!~\"success|all\"}[1m])) by (instance, status)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"success|all\"}\n [$__rate_interval]\n)) by (instance, status) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{status}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"success|all\"}\n [$__rate_interval]\n)) by (instance, status) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raftstore error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2277,52 +2733,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of scheduler errors per type on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 17 + "y": 14 }, - "hiddenSeries": false, - "id": 1719, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2342,44 +2810,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_stage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"snapshot_err|prepare_write_err\"}[1m])) by (instance, stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"snapshot_err|prepare_write_err\"}\n [$__rate_interval]\n)) by (instance, stage) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{stage}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"snapshot_err|prepare_write_err\"}\n [$__rate_interval]\n)) by (instance, stage) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2387,52 +2866,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of different coprocessor errors on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 21 }, - "hiddenSeries": false, - "id": 1720, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2452,44 +2943,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_error{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{reason}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Coprocessor error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2497,52 +2999,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of gRPC message errors per type on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 21 }, - "hiddenSeries": false, - "id": 1721, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2562,44 +3076,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_fail_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC message error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2607,52 +3132,64 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of dropped leaders per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 28 }, - "hiddenSeries": false, - "id": 1722, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2664,7 +3201,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -2672,48 +3209,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"leader\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"leader\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Leader drop", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2721,14 +3265,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2739,34 +3285,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of missing leaders per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 28 }, - "hiddenSeries": false, - "id": 1723, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -2778,7 +3334,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -2786,48 +3342,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_leader_missing{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_leader_missing\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_raftstore_leader_missing\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Leader missing", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2835,14 +3398,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2853,88 +3418,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "RocksDB damaged SST files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 38 + "y": 35 }, - "hiddenSeries": false, - "id": 23763572510, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_rocksdb_damaged_files{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_rocksdb_damaged_files\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}-existed", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "((\n tikv_rocksdb_damaged_files\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "tikv_rocksdb_damaged_files_deleted{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_rocksdb_damaged_files_deleted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-deleted", - "refId": "B" + "metric": "", + "query": "((\n tikv_rocksdb_damaged_files_deleted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Damaged files", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -2942,7 +3546,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -2950,6 +3555,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -2960,80 +3566,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "the count of Log Replication Reject caused by follower memory insufficient", + "description": "The count of Log Replication Reject caused by follower memory insufficient", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 38 + "y": 35 }, - "hiddenSeries": false, - "id": 23763572588, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_server_raft_append_rejects{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_append_rejects\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}-memory", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_append_rejects\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Log Replication Rejected", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3041,7 +3679,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -3049,6 +3688,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3059,55 +3699,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Errors", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 3 + "y": 0 }, - "id": 2744, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The size of each column family", "editable": true, "error": false, - "fill": 3, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 3 + "y": 0 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideZero": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3115,39 +3798,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CF size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3155,14 +3854,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3173,71 +3874,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "datasourceId": 1, - "model": { - "expr": "sum(rate(tikv_channel_full_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", - "metric": "", - "refId": "A", - "step": 10 - }, - "params": [ - "A", - "10s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "10s", - "handler": 1, - "message": "TiKV channel full", - "name": "TiKV channel full alert", - "noDataState": "ok", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", "description": "The total number of channel full errors on each TiKV instance", "editable": true, "error": false, - "fill": 3, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 3 + "y": 0 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -3249,7 +3923,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3257,48 +3931,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_channel_full_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 + "query": "sum(rate(\n tikv_channel_full_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Channel full", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3306,14 +3987,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3324,39 +4007,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of leaders being written on each TiKV instance", "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 11 + "y": 7 }, - "id": 75, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3364,40 +4064,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_region_written_keys_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_region_written_keys_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Active written leaders", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3405,90 +4120,180 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1073741824 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "B", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "approximate region size alert", - "noDataState": "no_data", - "notifications": [] + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 33, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Approximate region size", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The approximate Region size", "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 11 + "y": 14 }, - "id": 1481, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3496,58 +4301,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_region_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_region_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "C", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_region_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_region_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_raftstore_region_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_region_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "D", - "step": 10 + "query": "(sum(rate(\n tikv_raftstore_region_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_region_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Approximate Region size", + "title": "Approximate region size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3555,320 +4425,180 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 19 - }, - "id": 3638, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": false, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_raftstore_region_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "metric": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Approximate Region size Histogram", - "tooltip": { - "msResolution": false, - "shared": false, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "histogram", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The average rate of writing bytes to Regions per TiKV instance", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 19 - }, - "id": 58, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_region_written_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance) / sum(rate(tikv_region_written_bytes_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_regi", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Region average written bytes", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dataFormat": "timeseries", + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 21 }, "heatmap": {}, - "hideZeroBuckets": false, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, "highlightCards": true, - "id": 3646, + "id": 35, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_region_written_bytes_bucket[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_regi", - "refId": "A", - "step": 10 + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_region_written_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_region_written_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Region written bytes", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The average rate of written keys to Regions per TiKV instance", + "description": "The average rate of writing bytes to Regions per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 21 }, - "id": 57, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -3876,40 +4606,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_region_written_keys_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_region_written_keys_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_region_written_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_bytes_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_region_written_keys_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "(sum(rate(\n tikv_region_written_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_bytes_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Region average written keys", + "title": "Region average written bytes", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -3917,7 +4662,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -3925,6 +4671,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -3935,10 +4682,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -3946,135 +4694,204 @@ "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dataFormat": "timeseries", + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 28 }, "heatmap": {}, - "hideZeroBuckets": false, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, "highlightCards": true, - "id": 3647, + "id": 37, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_region_written_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_region_written_keys_bucket", - "refId": "A", - "step": 10 + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_region_written_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_region_written_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Region written keys", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "short", + "decimals": 1, + "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The size of requests into request batch per TiKV instance", + "description": "The average rate of written keys to Regions per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 28 }, - "id": 3720, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_request_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_server_request_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{type}} avg", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_request_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_region_written_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} 99", - "refId": "B" + "legendFormat": "{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_region_written_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_region_written_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Request batch input", + "title": "Region average written keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4082,14 +4899,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "bytes", "label": null, - "logBase": 10, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4100,38 +4919,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe number of peers in hibernated state", + "description": "The number of peers in hibernated state", "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 43 + "y": 35 }, - "id": 3730, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -4139,45 +4976,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_hibernated_peer_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, state)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_hibernated_peer_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{state}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_raftstore_hibernated_peer_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Hibernate Peers", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4185,14 +5032,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4203,24 +5052,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 43 + "y": 35 }, - "id": 7266, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -4231,50 +5100,79 @@ "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-{{name}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "raft_engine_memory_usage{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n raft_engine_memory_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-raft-engine", - "refId": "B" + "metric": "", + "query": "((\n raft_engine_memory_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memory trace", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4282,6 +5180,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -4290,78 +5189,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 42 }, - "id": 9560, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(tikv_raft_entries_evict_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raft_entries_evict_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_raft_entries_evict_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft Entry Cache Evicts", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4369,7 +5313,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -4377,53 +5322,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 42 }, - "hiddenSeries": false, - "id": 12971, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -4431,44 +5390,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_address_resolve_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_address_resolve_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_address_resolve_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Resolve address duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4476,6 +5446,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -4484,6 +5455,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4494,79 +5466,130 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 49 }, - "hiddenSeries": false, - "id": 23763572581, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, name))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Thread Pool Schedule Wait Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4574,17 +5597,19 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, - "max": "30", + "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -4592,79 +5617,130 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The average rate of written keys to Regions per TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 49 }, - "hiddenSeries": false, - "id": 23763572692, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name) / sum(rate(tikv_yatp_pool_schedule_wait_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) )", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Average Thread Pool Schedule Wait Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4672,17 +5748,19 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, - "max": "30", + "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -4690,92 +5768,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 68 + "y": 56 }, - "hiddenSeries": false, - "id": 23763572784, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": false, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*/", - "stack": "A" - } - ], - "spaceLength": 10, - "stack": false, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_storage_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_time\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{req}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_time\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "copr-{{req}}", - "queryType": "randomWalk", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_time\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Disk IO time per second", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4783,6 +5896,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ns", "label": null, "logBase": 1, @@ -4791,6 +5905,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4801,90 +5916,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 68 + "y": 56 }, - "hiddenSeries": false, - "id": 23763572785, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": false, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*/", - "stack": "A" - } - ], - "spaceLength": 10, - "stack": false, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_storage_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_byte\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{req}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_storage_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", metric=\"block_read_byte\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "copr-{{req}}", - "queryType": "randomWalk", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"block_read_byte\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Disk IO bytes per second", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -4892,7 +6044,8 @@ }, "yaxes": [ { - "format": "binBps", + "decimals": null, + "format": "ns", "label": null, "logBase": 1, "max": null, @@ -4900,6 +6053,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -4910,57 +6064,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Server", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 4 + "y": 0 }, - "id": 2745, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different kinds of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 95, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -4968,53 +6163,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type,priority)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}--{{priority}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{priority}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC message count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5022,7 +6234,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "reqps", "label": null, "logBase": 1, "max": null, @@ -5030,6 +6243,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5040,41 +6254,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different kinds of gRPC message which is failed", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 107, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5082,43 +6311,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_fail_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_fail_total", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_grpc_msg_fail_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC message failed", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5126,7 +6367,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "reqps", "label": null, "logBase": 1, "max": null, @@ -5134,6 +6376,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5144,43 +6387,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The execution time of gRPC message", + "description": "The 99% percentile of execution time of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 13 + "y": 7 }, - "hiddenSeries": false, - "id": 98, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5188,53 +6444,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type,priority))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority, le) \n \n \n)) ", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}--{{priority}}", - "refId": "B", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{priority}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_grpc_msg_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (type, priority, le) \n \n \n)) ", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% gRPC message duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5242,14 +6515,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 10, + "logBase": 2, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5260,42 +6535,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The average execution time of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 13 + "y": 7 }, - "hiddenSeries": false, - "id": 2532, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5303,53 +6592,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority) / sum(rate(tikv_grpc_msg_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type,priority)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) )", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}--{{priority}}", - "refId": "B", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{priority}}", + "metric": "", + "query": "(sum(rate(\n tikv_grpc_msg_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) / sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, priority) )", + "refId": "", "step": 10, - "hide": true + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Average gRPC message duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5357,6 +6663,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -5365,6 +6672,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5375,42 +6683,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The 99% percentile of execution time of gRPC message", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 14 }, - "hiddenSeries": false, - "id": 2533, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5418,78 +6740,130 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_grpc_req_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_req_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99% request", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_req_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_grpc_resp_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_resp_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99% response", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_grpc_resp_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_grpc_req_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_grpc_req_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_grpc_req_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_req_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg request", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_server_grpc_req_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_req_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_grpc_resp_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_grpc_resp_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_grpc_resp_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_resp_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg response", - "refId": "D" + "metric": "", + "query": "(sum(rate(\n tikv_server_grpc_resp_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_grpc_resp_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_request_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_request_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99% kv get batch", - "refId": "E" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_request_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_request_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_request_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_request_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_request_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg kv batch", - "refId": "F" + "metric": "", + "query": "(sum(rate(\n tikv_server_request_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_request_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC batch size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5497,7 +6871,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -5505,6 +6880,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5515,42 +6891,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 14 }, - "hiddenSeries": false, - "id": 2534, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5558,50 +6948,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_raft_message_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99%", - "refId": "A", - "step": 10 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_message_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_server_raft_message_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_server_raft_message_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_message_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_raft_message_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_message_batch_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_server_raft_message_batch_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "raft message batch size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5609,7 +7019,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -5617,6 +7028,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5627,41 +7039,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The QPS of different sources of gRPC request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572858, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5669,45 +7096,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_grpc_request_source_counter_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (source)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_request_source_counter_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{source}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_request_source_counter_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC request sources QPS", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5715,6 +7152,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -5723,6 +7161,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5733,41 +7172,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": true, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The duration of different sources of gRPC request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572859, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5775,45 +7229,55 @@ "lines": false, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": true, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_grpc_request_source_duration_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (source)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_request_source_duration_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{source}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_request_source_duration_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (source) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC request sources duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -5821,7 +7285,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -5829,6 +7294,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -5839,41 +7305,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The QPS of different resource groups of gRPC request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 37 + "y": 28 }, - "hiddenSeries": false, - "id": 23763573090, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -5881,150 +7362,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_grpc_resource_group_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_grpc_resource_group_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{name}}", + "metric": "", + "query": "sum(rate(\n tikv_grpc_resource_group_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC resource group QPS", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of gRPC raft message", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 37 - }, - "hiddenSeries": false, - "id": 24763573092, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_raftstore_message_recv_by_store{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, store)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{store}}", - "metric": "tikv_raftstore_message_recv_by_store", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "gRPC message count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6032,7 +7418,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -6040,6 +7427,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6050,105 +7438,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "gRPC", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 5 + "y": 0 }, - "id": 2746, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1.7 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "datasourceId": 1, - "model": { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 20 - }, - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "message": "TiKV raftstore thread CPU usage is high", - "name": "TiKV raft store CPU alert", - "noDataState": "ok", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", "description": "The CPU utilization of raftstore thread", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6156,53 +7537,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(raftstore|rs)_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1.7, - "visible": true + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft store CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6210,6 +7593,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6218,6 +7602,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6228,79 +7613,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1.8 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV async apply thread CPU usage is high", - "name": "TiKV async apply CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of async apply", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 5 + "y": 0 }, - "hiddenSeries": false, - "id": 79, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6308,53 +7670,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"apply_[0-9]+\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1.8, - "visible": true + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Async apply CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6362,14 +7726,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6380,81 +7746,65 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 0.8 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "message": "TiKV store writer thread CPU usage is high", - "name": "Store writer CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of store writer thread", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 12 + "y": 7 }, - "hiddenSeries": false, - "id": 13115, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6462,30 +7812,39 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_write.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -6495,22 +7854,22 @@ "line": true, "op": "gt", "value": 0.8, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store writer CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6518,6 +7877,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6526,6 +7886,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6536,76 +7897,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3.6 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV gRPC poll thread CPU usage is high", - "name": "TiKV gRPC poll CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of gRPC", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 12 + "y": 7 }, - "hiddenSeries": false, - "id": 105, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6613,51 +7954,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"grpc.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3.6, - "visible": true + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "gRPC poll CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6665,6 +8010,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6673,6 +8019,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6683,69 +8030,53 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3.6 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV scheduler worker thread CPU usage is high", - "name": "TiKV scheduler worker CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of scheduler worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 19 + "y": 14 }, - "hiddenSeries": false, - "id": 64, + "height": null, + "hideTimeOverride": false, + "id": 62, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -6757,7 +8088,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6765,28 +8096,39 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -6796,22 +8138,22 @@ "line": true, "op": "gt", "value": 3.6, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6819,6 +8161,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6827,6 +8170,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -6837,69 +8181,53 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3.6 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV Storage ReadPool thread CPU usage is high", - "name": "TiKV Storage ReadPool CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of readpool", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 19 + "y": 14 }, - "hiddenSeries": false, - "id": 1908, + "height": null, + "hideTimeOverride": false, + "id": 63, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -6911,7 +8239,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -6919,46 +8247,69 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_read_norm.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - normal", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-normal", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_read_high.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - high", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-high", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"store_read_low.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - low", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-low", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -6968,22 +8319,22 @@ "line": true, "op": "gt", "value": 3.6, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage ReadPool CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -6991,6 +8342,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -6999,6 +8351,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7009,80 +8362,65 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 7.2 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV unified read pool thread CPU usage is high", - "name": "Unified read pool CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of the unified read pool", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 26 + "y": 21 }, - "hiddenSeries": false, - "id": 4287, + "height": null, + "hideTimeOverride": false, + "id": 64, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7090,28 +8428,39 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified_read_po.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -7121,22 +8470,22 @@ "line": true, "op": "gt", "value": 7.2, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Unified read pool CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7144,6 +8493,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7152,6 +8502,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7162,44 +8513,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of RocksDB", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 26 + "y": 21 }, - "hiddenSeries": false, - "id": 69, + "height": null, + "hideTimeOverride": false, + "id": 65, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7207,59 +8570,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"rocksdb.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "warning", - "fill": true, - "line": true, - "op": "gt", - "value": 1 - }, - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "RocksDB CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7267,6 +8626,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7275,6 +8635,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7285,80 +8646,65 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 7.2 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "1m", - "handler": 1, - "message": "TiKV Coprocessor thread CPU alert", - "name": "TiKV Coprocessor CPU alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The CPU utilization of coprocessor", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 78, + "height": null, + "hideTimeOverride": false, + "id": 66, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7366,46 +8712,69 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop_normal.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - normal", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-normal", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop_high.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - high", - "refId": "B", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-high", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cop_low.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - low", - "refId": "C", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-low", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [ @@ -7415,22 +8784,22 @@ "line": true, "op": "gt", "value": 7.2, - "visible": true + "yaxis": "left" } ], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Coprocessor CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7438,6 +8807,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7446,6 +8816,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7456,40 +8827,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 28 }, - "hiddenSeries": false, - "id": 2531, + "height": null, + "hideTimeOverride": false, + "id": 67, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7497,42 +8884,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"gc_worker.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC worker CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7540,6 +8940,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7548,6 +8949,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7558,42 +8960,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of split check", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 35 }, + "height": null, + "hideTimeOverride": false, "id": 68, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7601,41 +9017,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"background.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "BackGround Worker CPU", + "title": "Background Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7643,14 +9073,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7661,42 +9093,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 35 }, - "id": 692, + "height": null, + "hideTimeOverride": false, + "id": 69, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7704,67 +9150,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/import-count.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "import-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance, tid) > 0", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "import-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 - }, - { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "import-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Import CPU", + "title": "Raftlog fetch Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7772,6 +9206,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7780,6 +9215,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7790,42 +9226,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 42 }, - "id": 691, + "height": null, + "hideTimeOverride": false, + "id": 70, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7833,77 +9283,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/backup-count.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(backup-worker|bkwkr).*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_endpoint\"}[1m])) by (instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-endpoint", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"(backup-worker|bkwkr).*\"}[1m])) by (instance, tid) > 0", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 - }, - { - "expr": "sum(tikv_backup_thread_pool_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Backup Worker CPU", + "title": "Import CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -7911,6 +9339,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -7919,6 +9348,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -7929,38 +9359,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 42 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 71, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -7968,53 +9416,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*tso/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cdcwkr.*\"}[1m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - worker", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"tso\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - tso", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "CDC worker CPU", + "title": "Backup CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8022,6 +9472,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -8030,7 +9481,8 @@ "show": true }, { - "format": "percentunit", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8040,38 +9492,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 49 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 72, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -8079,40 +9549,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"cdc_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-worker", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-tso", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-endpoint", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "CDC endpoint CPU", + "title": "CDC worker CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8120,6 +9635,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -8128,6 +9644,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8138,40 +9655,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "The CPU utilization of raftstore thread", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 55 + "y": 49 }, - "hiddenSeries": false, - "id": 23763572511, + "height": null, + "hideTimeOverride": false, + "id": 73, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -8179,45 +9712,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, - "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftlog_fetch.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Raftlog fetch Worker CPU", + "title": "TSO Worker CPU", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8225,6 +9768,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -8233,6 +9777,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8243,44 +9788,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread CPU", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 74, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The CPU utilization of TSO worker", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 0 }, - "hiddenSeries": false, - "id": 9962, + "height": null, + "hideTimeOverride": false, + "id": 75, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -8288,63 +9887,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"tso_worker\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "warning", - "fill": true, - "line": true, - "op": "gt", - "value": 0.3, - "yaxis": "left" - }, - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" + "metric": "", + "query": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TSO Worker CPU", + "title": "TTL expire count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8352,7 +9943,8 @@ }, "yaxes": [ { - "format": "percentunit", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -8360,6 +9952,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8370,100 +9963,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } - } - ], - "repeat": null, - "title": "Thread CPU", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 6946, - "panels": [ + }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 62 + "x": 12, + "y": 0 }, - "hiddenSeries": false, - "id": 23763573818, + "height": null, + "hideTimeOverride": false, + "id": 76, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_ttl_expire_kv_count_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 10, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL expire count", + "title": "TTL expire size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8471,16 +10076,16 @@ }, "yaxes": [ { - "$$hashKey": "object:35", - "format": "short", - "label": "", + "decimals": null, + "format": "bytes", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:36", + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8491,89 +10096,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 62 + "x": 0, + "y": 7 }, - "hiddenSeries": false, - "id": 23763573819, + "height": null, + "hideTimeOverride": false, + "id": 77, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_ttl_expire_kv_size_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 10, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "E" + "metric": "", + "query": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL expire size", + "title": "TTL check progress", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8581,16 +10209,16 @@ }, "yaxes": [ { - "$$hashKey": "object:35", - "format": "decbytes", - "label": "", + "decimals": null, + "format": "percentunit", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "$$hashKey": "object:36", + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8601,82 +10229,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 69 + "x": 12, + "y": 7 }, - "id": 6985, + "height": null, + "hideTimeOverride": false, + "id": 78, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_ttl_checker_processed_regions{instance=~\"$instance\"}) by (instance) / sum(tikv_raftstore_region_count{instance=~\"$instance\", type=\"region\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 10, - "legendFormat": "{{instance}}", - "refId": "E" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL check progress", + "title": "TTL checker actions", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8684,14 +10342,16 @@ }, "yaxes": [ { - "format": "percentunit", - "label": "", + "decimals": null, + "format": "ops", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8702,194 +10362,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": "The time consumed when executing GC tasks", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 69 + "x": 0, + "y": 14 }, - "id": 6987, + "height": null, + "hideTimeOverride": false, + "id": 79, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(tikv_ttl_checker_actions{instance=~\"$instance\"}[30s])) by (type)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "E" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TTL checker actions", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ { - "format": "ops", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The time consumed when executing GC tasks", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 76 - }, - "id": 6986, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_ttl_checker_compact_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_ttl_checker_compact_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "metric": "tikv_storage_gc_skipped_counter", - "refId": "B", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_ttl_checker_compact_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_ttl_checker_compact_duration_sum{instance=~\"$instance\"}[1m])) / sum(rate(tikv_ttl_checker_compact_duration_count{instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "average", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TTL checker compact duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -8897,6 +10543,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -8905,6 +10552,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -8915,144 +10563,173 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "ms" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 76 + "y": 14 }, - "id": 7326, + "height": null, + "hideTimeOverride": false, + "id": 80, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "pluginVersion": "6.1.6", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "max(tikv_ttl_checker_poll_interval{instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": "", "timeFrom": null, "timeShift": null, "title": "TTL checker poll interval", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "transformations": [], + "transparent": false, + "type": "stat" } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "TTL", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 7 + "y": 0 }, - "id": 2747, + "height": null, + "hideTimeOverride": false, + "id": 81, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of requests that TiKV sends to PD", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 0 }, - "hiddenSeries": false, - "id": 1069, + "height": null, + "hideTimeOverride": false, + "id": 82, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9060,42 +10737,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD requests", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9103,7 +10793,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -9111,6 +10802,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9121,41 +10813,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed by requests that TiKV sends to PD", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 0 }, - "hiddenSeries": false, - "id": 1070, + "height": null, + "hideTimeOverride": false, + "id": 83, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9163,42 +10870,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_request_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_pd_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD request duration (average)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9206,6 +10926,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -9214,6 +10935,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9224,41 +10946,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe total number of PD heartbeat messages", + "description": "The total number of PD heartbeat messages", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 15 + "y": 7 }, - "hiddenSeries": false, - "id": 1215, + "height": null, + "hideTimeOverride": false, + "id": 84, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9266,54 +11003,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "pending", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_heartbeat_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "tikv_pd_pending_heartbeat_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "pending", - "refId": "B" + "legendFormat": "{{instance}}-pending", + "metric": "", + "query": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD heartbeats", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9321,6 +11074,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -9329,51 +11083,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of peers validated by the PD worker", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 15 + "y": 7 }, - "hiddenSeries": false, - "id": 1396, + "height": null, + "hideTimeOverride": false, + "id": 85, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9381,43 +11151,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_pd_validate_peer_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD validate peers", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9425,6 +11207,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -9433,6 +11216,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9443,41 +11227,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe count of reconnections between TiKV and PD", + "description": "The count of reconnection between TiKV and PD", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 23 + "y": 14 }, - "hiddenSeries": false, - "id": 7985, + "height": null, + "hideTimeOverride": false, + "id": 86, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9485,42 +11284,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_pd_reconnect_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "PD reconnections", + "title": "PD reconnection", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9528,6 +11340,7 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, @@ -9536,7 +11349,8 @@ "show": true }, { - "format": "opm", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9546,42 +11360,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe forward status of PD client", + "description": "The forward status of PD client", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 23 + "y": 14 }, - "hiddenSeries": false, - "id": 8376, + "height": null, + "hideTimeOverride": false, + "id": 87, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9589,42 +11417,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_pd_request_forwarded{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{host}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "PD forward status", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9632,7 +11473,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -9640,7 +11482,8 @@ "show": true }, { - "format": "opm", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9650,44 +11493,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of TSO requests waiting in the queue.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 21 }, - "hiddenSeries": false, - "id": 9963, + "height": null, + "hideTimeOverride": false, + "id": 88, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9695,46 +11550,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_pd_pending_tso_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Pending TSO Requests", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9742,7 +11606,8 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -9750,6 +11615,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9760,42 +11626,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The slow score of stores", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 21 }, - "hiddenSeries": false, - "id": 10365, + "height": null, + "hideTimeOverride": false, + "id": 89, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -9803,44 +11683,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_raftstore_slow_score{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store Slow Score", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9848,7 +11739,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -9856,7 +11748,8 @@ "show": true }, { - "format": "opm", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9866,88 +11759,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The duration that recorded by inspecting messages.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 38 + "y": 28 }, - "hiddenSeries": false, - "id": 10366, + "height": null, + "hideTimeOverride": false, + "id": 90, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_inspect_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{type}}", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Inspected duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -9955,6 +11872,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -9963,6 +11881,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -9973,58 +11892,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "PD", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 8 + "y": 0 }, - "id": 5265, + "height": null, + "hideTimeOverride": false, + "id": 91, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The throughput of disk write per IO type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 5993, + "height": null, + "hideTimeOverride": false, + "id": 92, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -10032,51 +11991,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"write\"}[45s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"write\"}[45s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "total", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write IO bytes", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10084,14 +12062,16 @@ }, "yaxes": [ { - "format": "Bps", - "label": "", + "decimals": null, + "format": "binBps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10102,42 +12082,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The throughput of disk read per IO type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 5994, + "height": null, + "hideTimeOverride": false, + "id": 93, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -10145,51 +12139,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"read\"}[45s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_io_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", op=\"read\"}[45s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "total", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Read IO bytes", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10197,14 +12210,16 @@ }, "yaxes": [ { - "format": "Bps", - "label": "", + "decimals": null, + "format": "binBps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10215,42 +12230,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The threshold of disk IOs per priority", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 7 }, - "hiddenSeries": false, - "id": 5995, + "height": null, + "hideTimeOverride": false, + "id": 94, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -10258,43 +12287,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_rate_limiter_max_bytes_per_sec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "IO threshold", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10302,14 +12343,16 @@ }, "yaxes": [ { - "format": "Bps", - "label": "", + "decimals": null, + "format": "binBps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10320,41 +12363,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "IO rate limiter request wait duration.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 7 }, - "hiddenSeries": false, - "id": 7225, + "height": null, + "hideTimeOverride": false, + "id": 95, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -10363,50 +12420,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_rate_limiter_request_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": " {{type}}-99%", - "refId": "A", - "step": 4 + "legendFormat": "{{type}}-99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "rate(tikv_rate_limiter_request_wait_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s]) / rate(tikv_rate_limiter_request_wait_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": " {{type}}-avg", - "refId": "B" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Rate Limiter Request Wait Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10414,6 +12491,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10422,6 +12500,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10432,75 +12511,128 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "IO Breakdown", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 9 + "y": 0 }, - "id": 13117, + "height": null, + "hideTimeOverride": false, + "id": 96, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed by processing asynchronous write requests", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 24, "x": 0, - "y": 10 + "y": 0 }, - "hiddenSeries": false, - "id": 13132, + "height": null, + "hideTimeOverride": false, + "id": 97, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10508,60 +12640,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{instance=~\"$instance\", type=\"write\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{instance=~\"$instance\", type=\"write\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{instance=~\"$instance\", type=\"write\"}[30s])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{instance=~\"$instance\", type=\"write\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_count{instance=~\"$instance\", type=\"write\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage async write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10569,6 +12734,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10577,7 +12743,8 @@ "show": true }, { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -10587,60 +12754,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The store time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 19 + "y": 7 }, - "hiddenSeries": false, - "id": 13257, + "height": null, + "hideTimeOverride": false, + "id": 98, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10648,60 +12841,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_duration_secs_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_duration_secs_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_duration_secs_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10709,6 +12935,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10717,6 +12944,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10727,60 +12955,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The apply time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 19 + "y": 7 }, - "hiddenSeries": false, - "id": 13259, + "height": null, + "hideTimeOverride": false, + "id": 99, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10788,60 +13042,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_apply_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_apply_duration_secs_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_apply_duration_secs_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_apply_duration_secs_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Apply duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10849,6 +13136,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10857,6 +13145,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -10867,60 +13156,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The propose wait time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 14 }, - "hiddenSeries": false, - "id": 13261, + "height": null, + "hideTimeOverride": false, + "id": 100, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -10928,60 +13243,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_request_wait_time_duration_secs_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_request_wait_time_duration_secs_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_request_wait_time_duration_secs_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store propose wait duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -10989,6 +13337,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -10997,6 +13346,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11007,60 +13357,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The batch wait time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 14 }, - "hiddenSeries": false, - "id": 13263, + "height": null, + "hideTimeOverride": false, + "id": 101, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11068,60 +13444,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_batch_wait_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store batch wait duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11129,6 +13538,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11137,6 +13547,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11147,60 +13558,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The send-to-write-queue time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 21 }, - "hiddenSeries": false, - "id": 13265, + "height": null, + "hideTimeOverride": false, + "id": 102, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11208,60 +13645,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_to_queue_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store send to write queue duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11269,6 +13739,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11277,6 +13748,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11287,60 +13759,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The send raft message of the proposal duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572857, + "height": null, + "hideTimeOverride": false, + "id": 103, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11348,60 +13846,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_send_proposal_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store send proposal duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11409,6 +13940,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11417,6 +13949,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11427,60 +13960,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The write kv db end duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 43 + "y": 28 }, - "hiddenSeries": false, - "id": 13269, + "height": null, + "hideTimeOverride": false, + "id": 104, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11488,60 +14047,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store write kv db end duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11549,6 +14141,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11557,6 +14150,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11567,60 +14161,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The before write time duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 43 + "y": 28 }, - "hiddenSeries": false, - "id": 13267, + "height": null, + "hideTimeOverride": false, + "id": 105, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11628,60 +14248,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_before_write_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store before write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11689,6 +14342,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11697,6 +14351,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11707,60 +14362,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The persist duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 35 }, - "hiddenSeries": false, - "id": 13273, + "height": null, + "hideTimeOverride": false, + "id": 106, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11768,60 +14449,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_persist_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store persist duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11829,6 +14543,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11837,6 +14552,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11847,60 +14563,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The write end duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 35 }, - "hiddenSeries": false, - "id": 13271, + "height": null, + "hideTimeOverride": false, + "id": 107, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -11908,60 +14650,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_write_end_duration_seconds_count{instance=~\"$instance\"}[30s]))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store write end duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -11969,6 +14744,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -11977,6 +14753,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -11987,60 +14764,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The commit but not persist duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 59 + "y": 42 }, - "hiddenSeries": false, - "id": 13277, + "height": null, + "hideTimeOverride": false, + "id": 108, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -12048,60 +14851,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store commit but not persist duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12109,6 +14945,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12117,6 +14954,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12127,60 +14965,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "99%": "#eab839", - "999%": "dark-red", - "count": "rgb(33, 250, 2)" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The commit and persist duration of each request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 59 + "y": 42 }, - "hiddenSeries": false, - "id": 13275, + "height": null, + "hideTimeOverride": false, + "id": 109, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "count", + "bars": false, "dashLength": 1, "dashes": true, "fill": 2, + "fillBelowTo": null, + "lines": true, "spaceLength": 1, "transform": "negative-Y", "yaxis": 2, @@ -12188,60 +15052,93 @@ }, { "alias": "avg", - "fill": 7 + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_bucket{instance=~\"$instance\"}[30s])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_sum{instance=~\"$instance\"}[30s])) / sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_store_wf_commit_log_duration_seconds_count{instance=~\"$instance\"}[30s]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store commit and persist duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12249,6 +15146,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12257,6 +15155,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12267,25 +15166,53 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Waterfall", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 10 + "y": 0 }, - "id": 2748, + "height": null, + "hideTimeOverride": false, + "id": 110, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12295,61 +15222,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed for peer processes to be ready in Raft", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 10 + "y": 0 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13279, + "id": 111, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_raft_process_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type='ready'}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Process ready duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -12358,32 +15318,44 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed for peer processes to be ready in Raft", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 10 + "y": 0 }, - "hiddenSeries": false, - "id": 13281, + "height": null, + "hideTimeOverride": false, + "id": 112, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -12397,54 +15369,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_process_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type='ready'}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "C", - "step": 4 - } - ], - "thresholds": [ + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1, - "yaxis": "left" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Process ready duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12452,14 +15493,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12470,10 +15513,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12483,65 +15527,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time duration of store write loop when store-io-pool-size is not zero.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 17 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13283, + "id": 113, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_loop_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write loop duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -12550,36 +15623,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 17 + "y": 7 }, - "hiddenSeries": false, - "id": 13285, + "height": null, + "hideTimeOverride": false, + "id": 114, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -12587,45 +15674,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_loop_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} ", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Store write loop duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12633,6 +15798,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12641,6 +15807,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12651,10 +15818,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12664,62 +15832,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft appends log", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 39, + "id": 115, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_append_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Append log duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -12728,36 +15928,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft appends log on each TiKV instance", + "description": "The time consumed when Raft commits log on each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 14 }, - "hiddenSeries": false, - "id": 13376, + "height": null, + "hideTimeOverride": false, + "id": 116, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -12765,78 +15979,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_append_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} ", - "refId": "A", - "step": 4 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_kvdb_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "hide": true, - "interval": "", - "legendFormat": "kvdb-{{instance}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_raftdb_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "raftdb-{{instance}}", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_send_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "sendmsg-{{instance}}", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_callback_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": true, + "instant": false, "interval": "", - "legendFormat": "callback-{{instance}}", - "refId": "E" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "99% Append log duration per server", + "title": "99% Commit log duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -12844,6 +16103,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -12852,6 +16112,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -12862,10 +16123,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -12875,135 +16137,270 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft commits log", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3690, + "id": 117, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_commit_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Commit log duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft commits log on each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 21 }, - "hiddenSeries": false, - "id": 3688, + "height": null, + "hideTimeOverride": false, + "id": 118, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_commit_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Commit log duration per server", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13011,6 +16408,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -13019,6 +16417,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13029,10 +16428,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -13042,62 +16442,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "mode": "spectrum" + "max": null, + "min": null, + "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when Raft applies log", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 38 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 31, + "id": 119, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_apply_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Apply log duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -13106,36 +16538,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed for Raft to apply logs per TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 38 + "y": 28 }, - "hiddenSeries": false, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 120, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -13143,43 +16589,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": " {{instance}}", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Apply log duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13187,6 +16713,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -13195,6 +16722,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13205,89 +16733,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The throughput of disk write per IO type", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 35 }, - "hiddenSeries": false, - "id": 13382, + "height": null, + "hideTimeOverride": false, + "id": 121, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_raftstore_io_reschedule_region_total{instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "rechedule-{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(tikv_raftstore_io_reschedule_pending_tasks_total{instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "pending-task-{{instance}}", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Store io task reschedule", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13295,7 +16861,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13303,6 +16870,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13313,81 +16881,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when store write task block on each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 35 }, - "hiddenSeries": false, - "id": 13380, + "height": null, + "hideTimeOverride": false, + "id": 122, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket{instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Write task block duration per server", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13395,6 +16994,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -13403,6 +17003,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13413,57 +17014,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft IO", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 11 + "y": 0 }, - "id": 2751, + "height": null, + "hideTimeOverride": false, + "id": 123, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The proposal count of all Regions in a mio tick", + "description": "The proposal count of a Regions in a tick", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 11 + "y": 0 }, - "hiddenSeries": false, - "id": 108, + "height": null, + "hideTimeOverride": false, + "id": 124, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -13471,43 +17113,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_proposal_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft proposals per ready", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13515,7 +17169,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13523,6 +17178,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13533,89 +17189,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of proposals per type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 11 + "y": 0 }, - "hiddenSeries": false, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 125, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"local_read|normal|read_index\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_proposal_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft read/write proposals", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13623,6 +17302,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -13631,6 +17311,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13641,89 +17322,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of read proposals which are made by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 18 + "y": 7 }, - "hiddenSeries": false, - "id": 119, + "height": null, + "hideTimeOverride": false, + "id": 126, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"local_read|read_index\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft read proposals per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13731,6 +17435,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -13739,6 +17444,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13749,89 +17455,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of write proposals which are made by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 18 + "y": 7 }, - "hiddenSeries": false, - "id": 120, + "height": null, + "hideTimeOverride": false, + "id": 127, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"normal\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_proposal_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft write proposals per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -13839,6 +17568,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -13847,6 +17577,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -13857,10 +17588,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -13870,62 +17602,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each proposal", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 41, + "id": 128, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_request_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Propose wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -13934,80 +17698,174 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each proposal in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 25 + "y": 14 }, - "hiddenSeries": false, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 129, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_request_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Propose wait duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14015,6 +17873,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14023,6 +17882,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14033,10 +17893,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14046,65 +17907,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each store write task", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13524, + "id": 130, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_task_wait_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_request_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -14113,82 +18003,174 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The wait time of each store write task in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 21 }, - "hiddenSeries": false, - "id": 13522, + "height": null, + "hideTimeOverride": false, + "id": 131, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_write_task_wait_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Store write wait duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14196,6 +18178,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14204,6 +18187,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14214,10 +18198,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14227,61 +18212,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each apply task", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 2535, + "id": 132, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_raftstore_apply_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_apply_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Apply wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -14290,79 +18308,174 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each apply task in each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 28 }, - "hiddenSeries": false, - "id": 2536, + "height": null, + "hideTimeOverride": false, + "id": 133, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Apply wait duration per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14370,6 +18483,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14378,6 +18492,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14388,10 +18503,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14401,71 +18517,101 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The handle duration of each store write task msg", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 46 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572700, + "id": 134, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_handle_msg_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_handle_msg_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_store_write_handle_msg_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_handle_msg_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write handle msg duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -14475,58 +18621,88 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, - "dashes": false, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The distribution of write trigger size", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 46 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572701, + "id": 135, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_store_write_trigger_wb_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_write_trigger_wb_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "metric": "tikv_raftstore_store_write_trigger_wb_bytes_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_write_trigger_wb_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Store write trigger size", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { + "decimals": 1, "format": "bytes", "label": null, "logBase": 1, @@ -14541,32 +18717,49 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The rate at which peers propose logs", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 42 }, - "hiddenSeries": false, - "id": 1975, + "height": null, + "hideTimeOverride": false, + "id": 136, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -14575,41 +18768,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(rate(tikv_raftstore_propose_log_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_propose_log_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_propose_log_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft propose speed", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14618,112 +18825,146 @@ "yaxes": [ { "decimals": null, - "format": "short", - "label": "bytes/s", + "format": "binBps", + "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The rate at which peers propose logs", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 54 + "y": 42 }, - "hiddenSeries": false, - "id": 1976, + "height": null, + "hideTimeOverride": false, + "id": 137, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_store_perf_context_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "store-{{type}}", - "metric": "tikv_raftstore_store_perf_context_time_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_perf_context_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "apply-{{type}}", - "refId": "B", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_perf_context_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Perf Context duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14731,6 +18972,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -14739,6 +18981,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14749,184 +18992,169 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each raft message", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 62 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 1977, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "expr": "sum(delta(tikv_raftstore_raft_msg_wait_time_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "tikv_raftstore_raft_msg_wait_time_duration_secs_bucket", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Raft message wait duration", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Propose", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 12 + "y": 0 }, - "id": 2749, + "height": null, + "hideTimeOverride": false, + "id": 138, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different ready type of Raft", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 12 + "y": 0 }, - "hiddenSeries": false, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 139, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_ready_handled_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_ready_handled_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_raft_ready_handled_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_ready_handled_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_raft_process_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"ready\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Ready handled", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -14934,7 +19162,8 @@ }, "yaxes": [ { - "format": "ops", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -14942,6 +19171,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -14952,38 +19182,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The max time consumed by raftstore events", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 12 + "y": 0 }, - "hiddenSeries": false, - "id": 123, + "height": null, + "hideTimeOverride": false, + "id": 140, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -14997,64 +19239,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(1.0, sum(rate(tikv_raftstore_event_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_raftstore_event_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_raftstore_event_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(1.0, sum(rate(tikv_broadcast_normal_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_broadcast_normal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "broadcast_normal", - "refId": "A", - "step": 4 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1 + "metric": "", + "query": "histogram_quantile(0.999999,(\n sum(rate(\n tikv_broadcast_normal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max duration of raft store events", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15062,14 +19310,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15080,10 +19330,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -15093,67 +19344,101 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for checking memory locks for replica reads", + "description": "Replica read lock checking duration", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 7235, + "id": 141, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_replica_read_lock_check_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_replica_read_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_replica_read_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Replica read lock checking duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -15163,163 +19448,244 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The length of peer msgs for each round handling", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572958, + "id": 142, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_raftstore_peer_msg_len_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_peer_msg_len_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "C", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_peer_msg_len_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Peer msg length distribution", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Process", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 13 + "y": 0 }, - "id": 2750, + "height": null, + "hideTimeOverride": false, + "id": 143, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Raft messages sent by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 0 }, - "hiddenSeries": false, - "id": 1615, + "height": null, + "hideTimeOverride": false, + "id": 144, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Sent messages per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15327,6 +19693,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15335,6 +19702,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15345,89 +19713,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Raft messages flushed by each TiKV instance", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 0 }, - "hiddenSeries": false, - "id": 1616, + "height": null, + "hideTimeOverride": false, + "id": 145, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_raft_message_flush_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_message_flush_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{reason}}", - "metric": "tikv_server_raft_message_flush_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_server_raft_message_flush_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flush messages per server", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15435,6 +19826,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15443,6 +19835,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15453,41 +19846,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of Raft messages received by each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 36 + "y": 7 }, - "hiddenSeries": false, - "id": 106, + "height": null, + "hideTimeOverride": false, + "id": 146, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -15495,42 +19903,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_server_raft_message_recv_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_message_recv_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_server_raft_message_recv_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Receive messages per server", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15538,6 +19959,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15546,6 +19968,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15556,88 +19979,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of different types of Raft messages that are sent", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 36 + "y": 7 }, - "hiddenSeries": false, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 147, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Messages", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15645,6 +20092,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15653,6 +20101,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15663,88 +20112,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of vote messages that are sent in Raft", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 43 + "y": 14 }, - "hiddenSeries": false, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 148, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"vote\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"vote\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"vote\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Vote", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15752,14 +20225,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15770,88 +20245,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of dropped Raft messages per type", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 43 + "y": 14 }, - "hiddenSeries": false, - "id": 1309, + "height": null, + "hideTimeOverride": false, + "id": 149, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_raft_dropped_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_dropped_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_dropped_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft dropped messages", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15859,6 +20358,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15867,6 +20367,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15877,95 +20378,154 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Message", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 14 + "y": 0 }, - "id": 2752, + "height": null, + "hideTimeOverride": false, + "id": 150, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of admin proposals", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 11 + "y": 0 }, - "id": 76, + "height": null, + "hideTimeOverride": false, + "id": 151, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_proposal_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"conf_change|transfer_leader\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"conf_change|transfer_leader\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_proposal_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"conf_change|transfer_leader\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Admin proposals", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -15973,6 +20533,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -15981,6 +20542,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -15991,79 +20553,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of the processed apply command", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 11 + "y": 0 }, - "id": 77, + "height": null, + "hideTimeOverride": false, + "id": 152, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_admin_cmd_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", status=\"success\", type!=\"compact\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_admin_cmd_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"compact\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_admin_cmd_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_admin_cmd_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"compact\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Admin apply", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16071,14 +20666,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16089,79 +20686,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The number of raftstore split checksss", + "description": "The number of raftstore split checks", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 18 + "y": 7 }, - "id": 70, + "height": null, + "hideTimeOverride": false, + "id": 153, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_check_split_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"ignore\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_check_split_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"ignore\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_check_split_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"ignore\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Check split", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16169,6 +20799,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -16177,6 +20808,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16187,80 +20819,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when running split check in .9999", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 18 + "y": 7 }, - "id": 71, + "height": null, + "hideTimeOverride": false, + "id": 154, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_raftstore_check_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_check_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_duration_seconds_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_check_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99.99% Check split duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16268,14 +20932,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16286,94 +20952,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {} - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 14 }, - "hiddenSeries": false, - "id": 3636, + "height": null, + "hideTimeOverride": false, + "id": 155, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 400, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.3.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_load_base_split_event[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_load_base_split_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 - }, - { - "expr": "", - "interval": "", - "legendFormat": "", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_load_base_split_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Load base split event", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16381,6 +21065,7 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, @@ -16389,6 +21074,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16399,105 +21085,142 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 36 + "x": 12, + "y": 14 }, - "hiddenSeries": false, - "id": 23763572060, + "height": null, + "hideTimeOverride": false, + "id": 156, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.80, sum(rate(tikv_load_base_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.8,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "80%-{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "histogram_quantile(0.8,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.90, sum(rate(tikv_load_base_split_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99%-{{instance}}", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_load_base_split_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_load_base_split_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_load_base_split_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_load_base_split_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "avg-{{instance}}", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_load_base_split_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_load_base_split_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Load base split duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16505,6 +21228,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -16513,6 +21237,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16523,87 +21248,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 36 + "w": 24, + "x": 0, + "y": 21 }, - "hiddenSeries": false, - "id": 23763573619, + "height": null, + "hideTimeOverride": false, + "id": 157, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 300, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_raftstore_peer_in_flashback_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_peer_in_flashback_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_peer_in_flashback_state", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_raftstore_peer_in_flashback_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Peer in Flashback State", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16611,7 +21361,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -16619,6 +21370,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16629,166 +21381,98 @@ ], "yaxis": { "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 25 - }, - "hiddenSeries": false, - "id": 3637, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.3.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "tikv_read_qps_topn{order=\"0\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TopN QPS exceeds threshold", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Admin", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 15 + "y": 0 }, - "id": 12797, + "height": null, + "hideTimeOverride": false, + "id": 158, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 16 + "y": 0 }, - "hiddenSeries": false, - "id": 12882, + "height": null, + "hideTimeOverride": false, + "id": 159, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -16796,66 +21480,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99% - {{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95% - {{instance}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg - {{instance}}", - "refId": "C", - "step": 10 + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16863,6 +21551,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 10, @@ -16871,6 +21560,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -16881,43 +21571,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 16 + "y": 0 }, - "hiddenSeries": false, - "id": 12886, + "height": null, + "hideTimeOverride": false, + "id": 160, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -16925,66 +21628,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99% - {{instance}}", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95% - {{instance}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance) / sum(rate(tikv_raftstore_raft_log_kv_sync_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg - {{instance}}", - "refId": "C", - "step": 10 + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_log_kv_sync_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC kv sync duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -16992,6 +21699,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 10, @@ -17000,6 +21708,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17010,91 +21719,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 7 }, - "hiddenSeries": false, - "id": 12881, + "height": null, + "hideTimeOverride": false, + "id": 161, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_write_duration_secs_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_write_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Raft log GC write operations ", + "title": "Raft log GC write operations", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17102,6 +21832,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17110,6 +21841,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17120,91 +21852,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 7 }, - "hiddenSeries": false, - "id": 12884, + "height": null, + "hideTimeOverride": false, + "id": 162, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_seek_operations_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_seek_operations_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_seek_operations_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC seek operations ", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17212,6 +21965,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17220,6 +21974,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17230,91 +21985,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 14 }, - "hiddenSeries": false, - "id": 12887, + "height": null, + "hideTimeOverride": false, + "id": 163, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_log_lag_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_log_lag_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_log_lag_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log lag", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17322,6 +22098,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -17330,6 +22107,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17340,91 +22118,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 14 }, - "hiddenSeries": false, - "id": 12975, + "height": null, + "hideTimeOverride": false, + "id": 164, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_skipped{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_skipped\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{reason}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{reason}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_skipped\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log gc skipped", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17432,6 +22231,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -17440,6 +22240,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17450,91 +22251,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 21 }, - "hiddenSeries": false, - "id": 12974, + "height": null, + "hideTimeOverride": false, + "id": 165, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_raft_log_gc_failed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_raft_log_gc_failed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_raft_log_gc_failed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log GC failed", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17542,6 +22364,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17550,6 +22373,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17560,91 +22384,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572229, + "height": null, + "hideTimeOverride": false, + "id": 166, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_entry_fetches{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_entry_fetches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_raftstore_check_split_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_raftstore_entry_fetches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Raft log fetch", + "title": "Raft log fetch ", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17652,6 +22497,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -17660,6 +22506,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17670,124 +22517,153 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 48 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572555, + "height": null, + "hideTimeOverride": false, + "id": 167, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": false + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { "alias": "/pending-task/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, "transform": "negative-Y", - "yaxis": 2 + "yaxis": 2, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_raftstore_entry_fetches_task_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "C", - "step": 10 + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_entry_fetches_task_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(tikv_worker_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\", name=~\"raftlog-fetch-worker\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog-fetch-worker\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "pending-task", - "refId": "D" + "metric": "", + "query": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog-fetch-worker\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft log async fetch task duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17804,6 +22680,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17814,48 +22691,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Log", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 16 + "y": 0 }, - "id": 2753, + "height": null, + "hideTimeOverride": false, + "id": 168, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of rejections from the local read thread and The number of total requests", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 12 + "y": 0 }, - "id": 2292, + "height": null, + "hideTimeOverride": false, + "id": 169, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -17863,56 +22790,95 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [ { - "alias": "/.*-total/i", - "yaxis": 2 + "alias": "/.*-total/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, + "yaxis": 2, + "zindex": 0 } ], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_local_read_reject_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-reject-by-{{reason}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_local_read_executed_requests{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-total", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_raftstore_local_read_executed_stale_read_requests{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-stale-read", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Local reader requests", + "title": "Raft log async fetch task duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -17920,7 +22886,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -17928,6 +22895,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -17938,86 +22906,154 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, - "title": "Local reader", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Local Reader", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 17 + "y": 0 }, - "id": 4200, + "height": null, + "hideTimeOverride": false, + "id": 170, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time used by each level in the unified read pool per second. Level 0 refers to small queries.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 13 + "y": 0 }, - "id": 4194, + "height": null, + "hideTimeOverride": false, + "id": 171, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 250, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_multilevel_level_elapsed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (level)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{level}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Time used by level", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18025,7 +23061,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -18033,6 +23070,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18043,69 +23081,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The chance that level 0 (small) tasks are scheduled in the unified read pool.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 13 + "y": 0 }, - "id": 4196, + "height": null, + "hideTimeOverride": false, + "id": 172, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_multilevel_level0_chance{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{type}}", + "metric": "", + "query": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Level 0 chance", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18113,6 +23194,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -18121,6 +23203,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18131,70 +23214,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of concurrently running tasks in the unified read pool.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 7 }, - "id": 4198, + "height": null, + "hideTimeOverride": false, + "id": 173, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(avg_over_time(tikv_unified_read_pool_running_tasks{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Running tasks", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18202,6 +23327,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -18210,6 +23336,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18220,10 +23347,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -18233,158 +23361,270 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 26 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572469, + "id": 174, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"unified-read.*\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Unified Read Pool Wait Duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Unified read pool task execution time during one schedule.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 34 + "y": 14 }, - "id": 4199, + "height": null, + "hideTimeOverride": false, + "id": 175, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "50%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_poll_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "999%", - "refId": "D" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Duration of One Time Slice", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18392,6 +23632,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -18400,6 +23641,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18410,101 +23652,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Unified read pool task total execution duration.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 34 + "y": 14 }, - "id": 4202, + "height": null, + "hideTimeOverride": false, + "id": 176, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "50%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_exec_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "999%", - "refId": "D" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Task Execute Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18512,6 +23833,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -18520,6 +23842,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18530,101 +23853,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Task schedule number of times.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 42 + "y": 21 }, - "id": 4204, + "height": null, + "hideTimeOverride": false, + "id": 177, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.50, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "50%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tikv_yatp_task_execute_times_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=\"unified-read-pool\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "999%", - "refId": "D" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Task Schedule Times", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18632,6 +24034,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 2, @@ -18640,6 +24043,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18650,43 +24054,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Unified Read Pool", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 18 + "y": 0 }, - "id": 2754, + "height": null, + "hideTimeOverride": false, + "id": 178, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total count of different kinds of commands received", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 0 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 179, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -18698,7 +24145,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -18706,39 +24153,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage command total", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18746,14 +24209,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18764,28 +24229,44 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of engine asynchronous request errors", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 180, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -18797,7 +24278,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -18805,40 +24286,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_engine_async_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", status!~\"all|success\"}[1m])) by (status)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{status}}", - "metric": "tikv_raftstore_raft_process_duration_secs_bucket", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage async request error", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -18846,7 +24342,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -18854,6 +24351,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -18864,10 +24362,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -18877,215 +24376,575 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous snapshot requests", + "description": "The time consumed by processing asynchronous write requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15, + "id": 181, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Storage async snapshot duration", + "title": "Storage async write duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "cards": { - "cardPadding": null, - "cardRound": null + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async write duration", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous write requests", "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 22 + "x": 12, + "y": 7 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 109, + "height": null, + "hideTimeOverride": false, + "id": 182, + "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, - "targets": [ + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Storage async write duration", "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, "show": true, - "showHistogram": false + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed by processing asynchronous snapshot requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 183, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage async snapshot duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The storage async snapshot duration", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 14 }, - "hiddenSeries": false, - "id": 20000, + "height": null, + "hideTimeOverride": false, + "id": 184, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "99%", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Storage async snapshot duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19093,6 +24952,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -19101,6 +24961,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -19111,10 +24972,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -19124,129 +24986,302 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The storage async snapshot duration without the involving of raftstore", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 48 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 31111, + "id": 185, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Storage async snapshot duration (pure local read)", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "cards": { - "cardPadding": null, - "cardRound": null + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The storage async snapshot duration without the involving of raftstore", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index propose wait duration associated with async snapshot", "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 60 + "x": 12, + "y": 21 }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 31112, + "height": null, + "hideTimeOverride": false, + "id": 186, + "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "reverseYBuckets": false, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le)", - "format": "heatmap", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read index propose wait duration", + "title": "Storage async snapshot duration (pure local read)", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, "show": true, - "splitFactor": null + "values": [] }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -19256,232 +25291,270 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index confirm duration associated with async snapshot", + "description": "Read index propose wait duration associated with async snapshot", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 72 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 31113, + "id": 187, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_engine_async_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Read index confirm duration", + "title": "Read index propose wait duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index propose wait duration associated with async snapshot", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 67 + "x": 12, + "y": 28 }, - "hiddenSeries": false, - "id": 24763574239, + "height": null, + "hideTimeOverride": false, + "id": 188, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_duration_seconds_bucket[5m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Full compaction duration seconds", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, { - "$$hashKey": "object:86", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "$$hashKey": "object:87", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - }, - "timeFrom": null, - "timeShift": null - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": { - "unit": "s" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 75 - }, - "hiddenSeries": false, - "id": 24763574241, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ { - "exemplar": false, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_pause_duration_seconds_bucket[5m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Full compaction pause duration ", + "title": "Read index propose wait duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19489,7 +25562,7 @@ }, "yaxes": [ { - "$$hashKey": "object:86", + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -19498,190 +25571,295 @@ "show": true }, { - "$$hashKey": "object:87", - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index confirm duration associated with async snapshot", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 75 + "x": 0, + "y": 35 }, - "hiddenSeries": false, - "id": 24763574240, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 189, + "interval": null, "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true + "show": false }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": false, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_full_compact_increment_duration_seconds_bucket[5m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Full compaction per-increment duration ", + "title": "Read index confirm duration", "tooltip": { + "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, - "type": "graph", - "xaxis": { - "buckets": null, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "$$hashKey": "object:86", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:87", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index confirm duration associated with async snapshot", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "percentunit" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 35 }, - "hiddenSeries": false, - "id": 24763574242, + "height": null, + "hideTimeOverride": false, + "id": 190, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "tikv_storage_process_stat_cpu_usage", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, "instant": false, "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Process Stat Cpu Usage", + "title": "Read index confirm duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19689,8 +25867,8 @@ }, "yaxes": [ { - "$$hashKey": "object:86", - "format": "percentunit", + "decimals": null, + "format": "s", "label": null, "logBase": 1, "max": null, @@ -19698,106 +25876,123 @@ "show": true }, { - "$$hashKey": "object:87", - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async write duration", + "description": "CPU usage measured over a 30 second window", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 22 + "x": 0, + "y": 42 }, - "hiddenSeries": false, - "id": 20001, + "height": null, + "hideTimeOverride": false, + "id": 191, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) by (le))", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "refId": "C" + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Storage async write duration", + "title": "Process Stat Cpu Usage", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19805,7 +26000,8 @@ }, "yaxes": [ { - "format": "s", + "decimals": null, + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -19813,6 +26009,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -19823,95 +26020,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async snapshot duration without the involving of raftstore", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 48 + "y": 42 }, - "hiddenSeries": false, - "id": 31114, + "height": null, + "hideTimeOverride": false, + "id": 192, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_local_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Storage async snapshot duration (pure local read)", + "title": "Full compaction duration seconds", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -19919,6 +26201,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -19927,6 +26210,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -19937,95 +26221,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index propose wait duration associated with async snapshot", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 60 + "x": 0, + "y": 49 }, - "hiddenSeries": false, - "id": 31115, + "height": null, + "hideTimeOverride": false, + "id": 193, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_propose_wait\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Read index propose wait duration", + "title": "Full compaction pause duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20033,6 +26402,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -20041,6 +26411,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20051,95 +26422,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index confirm duration associated with async snapshot", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 72 + "y": 49 }, - "hiddenSeries": false, - "id": 31116, + "height": null, + "hideTimeOverride": false, + "id": 194, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "A" + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_storage_engine_async_request_duration_seconds_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m])) / sum(rate(tikv_storage_engine_async_request_duration_seconds_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot_read_index_confirm\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "C" + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Read index confirm duration", + "title": "Full compaction per-increment duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20147,6 +26603,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -20155,6 +26612,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20165,116 +26623,169 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Storage", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 19 + "y": 0 }, - "id": 9160, + "height": null, + "hideTimeOverride": false, + "id": 195, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 20 + "y": 0 }, - "height": "", - "hiddenSeries": false, - "id": 9552, + "height": null, + "hideTimeOverride": false, + "id": 196, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_write_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "write-{{instance}}", "metric": "", - "refId": "A", - "step": 40 + "query": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_throttle_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance) != 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "throttle-{{instance}}", "metric": "", - "refId": "B", - "step": 40 + "query": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler flow", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20282,6 +26793,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -20290,6 +26802,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20300,92 +26813,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 20 + "y": 0 }, - "height": "", - "hiddenSeries": false, - "id": 9553, + "height": null, + "hideTimeOverride": false, + "id": 197, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_discard_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance) / 10000000", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 40 + "query": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler discard ratio", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20393,14 +26926,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20411,10 +26946,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -20424,150 +26960,202 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 11512, + "id": 198, + "interval": null, "legend": { - "show": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_scheduler_throttle_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Throttle duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, - "bars": true, + "bars": false, "cacheTimeout": null, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The count of pending commands per TiKV instance", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 7 }, - "height": "", - "hiddenSeries": false, - "id": 11906, + "height": null, + "hideTimeOverride": false, + "id": 199, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, - "linewidth": 2, + "lines": true, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_scheduler_throttle_cf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"} != 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "refId": "A", - "step": 40 + "query": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler throttled CF", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20575,14 +27163,16 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20593,35 +27183,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 36 + "y": 14 }, - "hiddenSeries": false, - "id": 9947, + "height": null, + "hideTimeOverride": false, + "id": 200, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -20635,43 +27240,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_throttle_action_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (cf, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}-{{cf}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flow controller actions", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20679,6 +27296,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -20687,6 +27305,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20697,120 +27316,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 36 + "y": 14 }, - "height": "", - "hiddenSeries": false, - "id": 10338, + "height": null, + "hideTimeOverride": false, + "id": 201, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_l0_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}_l0_flow-{{instance}}", "metric": "", - "refId": "D", - "step": 40 + "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_flush_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}_flush_flow-{{instance}}", "metric": "", - "refId": "E", - "step": 40 + "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_l0_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "hide": true, - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total_l0_flow-{{instance}}", "metric": "", - "refId": "B", - "step": 40 + "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_scheduler_flush_flow{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "hide": true, - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total_flush_flow-{{instance}}", "metric": "", - "refId": "C", - "step": 40 + "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flush/L0 flow", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20818,6 +27474,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -20826,6 +27483,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20836,109 +27494,142 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "height": "", - "hiddenSeries": false, - "id": 9944, + "height": null, + "hideTimeOverride": false, + "id": 202, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_scheduler_l0{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l0-{{instance}}", "metric": "", - "refId": "A", - "step": 40 + "query": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_scheduler_memtable{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "memtable-{{instance}}", "metric": "", - "refId": "B", - "step": 40 + "query": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_scheduler_l0_avg{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg_l0-{{instance}}", "metric": "", - "refId": "C", - "step": 40 + "query": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flow controller factors", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -20946,7 +27637,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -20954,6 +27646,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -20964,41 +27657,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "hiddenSeries": false, - "id": 9946, + "height": null, + "hideTimeOverride": false, + "id": 203, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -21006,65 +27714,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cf", - "yaxis": 2 - }, - { - "alias": "pending-bytes", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_pending_compaction_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"kv\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "metric": "tikv_engine_pending_compaction_bytes", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "max(tikv_scheduler_pending_compaction_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance) / 10000000", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "pending-bytes-{{instance}}", - "metric": "tikv_engine_pending_compaction_bytes", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction pending bytes", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21072,6 +27785,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -21080,90 +27794,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "30", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Throttle time for txn storage commands in 1 minute.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572363, + "height": null, + "hideTimeOverride": false, + "id": 204, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "total": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_txn_command_throttle_time_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Txn command throttled duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21171,7 +27918,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -21179,6 +27927,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21189,80 +27938,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Throttle time for non-txn related processing like analyze or dag in 1 minute.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 52 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572365, + "height": null, + "hideTimeOverride": false, + "id": 205, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "total": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_non_txn_command_throttle_time_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Non-txn command throttled duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21270,7 +28051,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -21278,6 +28060,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21288,57 +28071,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Flow Control", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 20 + "y": 0 }, - "id": 2756, + "height": null, + "hideTimeOverride": false, + "id": 206, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of commands on each stage in commit command", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, + "h": 7, "w": 24, "x": 0, - "y": 18 + "y": 0 }, - "height": "400", - "hiddenSeries": false, - "id": 168, + "height": null, + "hideTimeOverride": false, + "id": 207, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -21346,58 +28170,70 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 1, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_too_busy_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "busy", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "busy-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_stage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{stage}}", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler stage total", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21405,6 +28241,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -21413,6 +28250,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21423,116 +28261,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 7 }, - "hiddenSeries": false, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 208, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", "metric": "", - "refId": "A", - "step": 10 + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_command_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_command_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_command_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler command duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21540,6 +28442,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -21548,6 +28451,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21558,116 +28462,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time which is caused by latch wait in commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 7 }, - "hiddenSeries": false, - "id": 194, + "height": null, + "hideTimeOverride": false, + "id": 209, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_latch_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", "metric": "", - "refId": "A", - "step": 10 + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_latch_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_latch_wait_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_latch_wait_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler latch wait duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21675,6 +28643,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -21683,6 +28652,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21693,116 +28663,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of keys read by a commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 36 + "y": 14 }, - "hiddenSeries": false, - "id": 195, + "height": null, + "hideTimeOverride": false, + "id": 210, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_kv_command_key_read_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "metric": "kv_command_key", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_kv_command_key_read_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_kv_command_key_read_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_kv_command_key_read_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler keys read", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21810,7 +28844,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -21818,6 +28853,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21828,116 +28864,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of keys written by a commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 36 + "y": 14 }, - "hiddenSeries": false, - "id": 373, + "height": null, + "hideTimeOverride": false, + "id": 211, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_kv_command_key_write_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "metric": "kv_command_key", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_kv_command_key_write_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_kv_command_key_write_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_kv_command_key_write_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler keys written", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -21945,7 +29045,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -21953,6 +29054,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -21963,93 +29065,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of each CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "id": 560, + "height": null, + "hideTimeOverride": false, + "id": 212, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22057,7 +29178,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22065,6 +29187,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22075,93 +29198,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of lock CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "id": 675, + "height": null, + "hideTimeOverride": false, + "id": 213, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\", cf=\"lock\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details [lock]", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22169,7 +29311,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22177,6 +29320,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22187,93 +29331,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of write CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 28 }, - "id": 829, + "height": null, + "hideTimeOverride": false, + "id": 214, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\", cf=\"write\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details [write]", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22281,7 +29444,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22289,6 +29453,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22299,93 +29464,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The keys scan details of default CF when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 52 + "y": 28 }, - "id": 830, + "height": null, + "hideTimeOverride": false, + "id": 215, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_kv_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"$command\", cf=\"default\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{tag}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler scan details [default]", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22393,7 +29577,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -22401,6 +29586,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22411,125 +29597,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed on reading when executing commit command", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 35 }, - "hiddenSeries": false, - "id": 23763572710, + "height": null, + "hideTimeOverride": false, + "id": 216, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "refId": "A", - "step": 10 + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tikv_scheduler_processing_read_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%", + "intervalFactor": 1, + "legendFormat": "avg", "metric": "", - "refId": "B", - "step": 10 + "query": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_scheduler_processing_read_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) / sum(rate(tikv_scheduler_processing_read_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg", + "intervalFactor": 1, + "legendFormat": "count", "metric": "", - "refId": "C", - "step": 10 + "query": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler command read duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22537,6 +29778,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -22545,6 +29787,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22555,10 +29798,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -22568,72 +29812,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed on checking memory locks", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 7236, + "id": 217, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, "repeat": null, "repeatDirection": null, - "repeatedByRow": true, "reverseYBuckets": false, - "scopedVars": { - "command": { - "selected": false, - "text": "acquire_pessimistic_lock", - "value": "acquire_pessimistic_lock" - } - }, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_storage_check_mem_lock_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"$command\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Check memory locks duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -22641,47 +29907,93 @@ } ], "repeat": "command", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Scheduler - $command", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 26 + "y": 0 }, - "id": 2755, + "height": null, + "hideTimeOverride": false, + "id": 218, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of commands on each stage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, + "h": 7, "w": 12, "x": 0, - "y": 15 + "y": 0 }, - "height": "400", - "id": 167, + "height": null, + "hideTimeOverride": false, + "id": 219, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -22689,47 +30001,70 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 1, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_too_busy_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "busy", - "refId": "A", - "step": 20 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{stage}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_scheduler_stage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (stage)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{stage}}", - "refId": "B", - "step": 20 + "metric": "", + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler stage total", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22737,6 +30072,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -22745,6 +30081,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22755,36 +30092,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total writing bytes of commands on each stage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, + "h": 7, "w": 12, "x": 12, - "y": 15 + "y": 0 }, - "height": "400", - "id": 3834, + "height": null, + "hideTimeOverride": false, + "id": 220, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -22792,39 +30149,55 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 1, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_writing_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 20 + "metric": "", + "query": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler writing bytes", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22832,6 +30205,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -22840,7 +30214,8 @@ "show": true }, { - "format": "bytes", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -22850,81 +30225,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different priority commands", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 25 + "y": 7 }, - "height": "", - "id": 1, + "height": null, + "hideTimeOverride": false, + "id": 221, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_scheduler_commands_pri_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (priority)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{priority}}", "metric": "", - "refId": "A", - "step": 40 + "query": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler priority commands", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -22932,6 +30338,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -22940,6 +30347,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -22950,123 +30358,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 300 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "120s", - "handler": 1, - "message": "TiKV scheduler context total", - "name": "scheduler pending commands alert", - "noDataState": "ok", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of pending commands per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 25 + "y": 7 }, - "height": "", - "id": 193, + "height": null, + "hideTimeOverride": false, + "id": 222, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_scheduler_contex_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 40 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 300 + "query": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scheduler pending commands", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23074,6 +30471,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -23082,6 +30480,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23092,10 +30491,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -23105,108 +30505,188 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 45 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763572468, + "id": 223, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_yatp_pool_schedule_wait_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sched-worker.*\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Txn Scheduler Pool Wait Duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Scheduler", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 27 + "y": 0 }, - "id": 2758, + "height": null, + "hideTimeOverride": false, + "id": 224, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of GC tasks processed by gc_worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 46 + "y": 0 }, - "id": 121, + "height": null, + "hideTimeOverride": false, + "id": 225, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23214,62 +30694,100 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_tasks_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total-{{task}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_storage_gc_skipped_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "skipped-{{task}}", - "metric": "tikv_storage_gc_skipped_counter", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gcworker_gc_task_fail_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "failed-{{task}}", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_worker_too_busy{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "gcworker-too-busy", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC tasks", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23277,7 +30795,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -23285,6 +30804,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23295,35 +30815,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing GC tasks", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 46 + "y": 0 }, - "id": 2224, + "height": null, + "hideTimeOverride": false, + "id": 226, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23331,62 +30872,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_gcworker_gc_task_duration_vec_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, task))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max-{{task}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_gcworker_gc_task_duration_vec_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, task))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%-{{task}}", - "metric": "tikv_storage_gc_skipped_counter", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_gcworker_gc_task_duration_vec_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, task))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%-{{task}}", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gcworker_gc_task_duration_vec_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task) / sum(rate(tikv_gcworker_gc_task_duration_vec_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "average-{{task}}", - "refId": "D" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC tasks duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23394,6 +30996,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -23402,6 +31005,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23412,76 +31016,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The GC duration", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 7 }, - "id": 969, + "height": null, + "hideTimeOverride": false, + "id": 227, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1.0, sum(rate(tidb_tikvclient_gc_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (instance, le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiDB GC seconds", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23489,6 +31129,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -23497,6 +31138,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23507,79 +31149,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, "description": "The count of TiDB GC worker actions", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 53 + "y": 7 }, - "id": 966, + "height": null, + "hideTimeOverride": false, + "id": 228, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_tikvclient_gc_worker_actions_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiDB GC worker actions", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23587,14 +31262,16 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "decimals": null, + "format": "none", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23605,35 +31282,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "Progress of ResolveLocks, the first phase of GC", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 14 }, - "id": 2823, + "height": null, + "hideTimeOverride": false, + "id": 229, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23641,41 +31339,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tidb_tikvclient_range_task_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"resolve-locks.*\"}) by (result)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{result}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "ResolveLocks Progress", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23683,7 +31395,7 @@ }, "yaxes": [ { - "decimals": 0, + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -23692,45 +31404,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "Progress of TiKV's GC", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 14 }, - "id": 2821, + "height": null, + "hideTimeOverride": false, + "id": 230, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23738,41 +31472,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_gcworker_autogc_processed_regions{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"scan\"}) by (instance) / sum(tikv_raftstore_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"region\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiKV Auto GC Progress", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23784,92 +31532,128 @@ "format": "percentunit", "label": null, "logBase": 1, - "max": "1.1", + "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "keys / second", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 67 + "y": 21 }, - "id": 2589, + "height": null, + "hideTimeOverride": false, + "id": 231, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_storage_mvcc_gc_delete_versions_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{key_mode}}_keys/s", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC speed", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23877,14 +31661,16 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "decimals": null, + "format": "none", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -23895,35 +31681,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "SafePoint used for TiKV's Auto GC", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 21 }, - "id": 2822, + "height": null, + "hideTimeOverride": false, + "id": 232, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -23931,41 +31738,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_gcworker_autogc_safe_point{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}) by (instance) / (2^18)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "TiKV Auto GC SafePoint", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -23982,216 +31803,217 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, - "description": " \tThe lifetime of TiDB GC", + "description": "The lifetime of TiDB GC", "editable": true, "error": false, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 74 + "y": 28 }, - "id": 27, + "height": null, + "hideTimeOverride": false, + "id": 233, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "max(tidb_tikvclient_gc_config{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"tikv_gc_life_time\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "refId": "A", - "step": 60 + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": "", + "timeFrom": null, + "timeShift": null, "title": "GC lifetime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "The interval of TiDB GC", "editable": true, "error": false, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 74 + "y": 28 }, - "id": 28, + "height": null, + "hideTimeOverride": false, + "id": 234, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "max(tidb_tikvclient_gc_config{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"tikv_gc_run_interval\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 60 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": "", + "timeFrom": null, + "timeShift": null, "title": "GC interval", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "transformations": [], + "transparent": false, + "type": "stat" }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "Keys handled in GC compaction filter", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 74 + "y": 35 }, - "id": 6596, + "height": null, + "hideTimeOverride": false, + "id": 235, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24199,97 +32021,175 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gc_compaction_filtered{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{key_mode}}_filtered", - "metric": "tikv_storage_command_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_skip{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_skipped", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_mvcc_rollback{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-rollback/mvcc-lock", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_orphan_versions{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_orphan-versions", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_perform{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_performed-times", - "refId": "E" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_failure{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode,type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_failure-{{type}}", - "refId": "F" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_met{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-deletion-met", - "refId": "G" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_handled{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-deletion-handled", - "refId": "H" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_gc_compaction_filter_mvcc_deletion_wasted{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (key_mode)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{key_mode}}_mvcc-deletion-wasted", - "refId": "I" + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC in Compaction Filter", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24298,7 +32198,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24306,45 +32206,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "GC scan write details", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 86 + "y": 42 }, - "id": 8767, + "height": null, + "hideTimeOverride": false, + "id": 236, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24352,37 +32274,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"write\"}[1m])) by (key_mode,tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}_{{tag}}", - "refId": "A" + "legendFormat": "{{key_mode}}-{{tag}}", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC scan write details", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24391,7 +32331,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24399,45 +32339,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, "description": "GC scan default details", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 86 + "y": 42 }, - "id": 8768, + "height": null, + "hideTimeOverride": false, + "id": 237, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24445,37 +32407,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_gcworker_gc_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\", cf=\"default\"}[1m])) by (key_mode,tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}_{{tag}}", - "refId": "A" + "legendFormat": "{{key_mode}}-{{tag}}", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "GC scan default details", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24484,7 +32464,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24492,69 +32472,109 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "GC", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 28 + "y": 0 }, - "id": 2759, + "height": null, + "hideTimeOverride": false, + "id": 238, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The rate of Raft snapshot messages sent", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 0 }, - "hiddenSeries": false, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 239, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24562,43 +32582,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_raftstore_raft_sent_message_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": " ", - "refId": "A", - "step": 60 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Rate snapshot message", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24606,6 +32638,7 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, @@ -24614,6 +32647,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24624,43 +32658,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of snapshots in different states", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 0 }, - "hiddenSeries": false, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 240, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24668,44 +32715,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, - "steppedLine": true, + "steppedLine": false, "targets": [ { - "expr": "sum(tikv_raftstore_snapshot_traffic_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 60 + "query": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Snapshot state count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24713,7 +32771,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -24721,6 +32780,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24731,43 +32791,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time snapshot generation tasks waited to be scheduled. ", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 38 + "y": 7 }, - "hiddenSeries": false, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 241, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24775,45 +32848,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Snapshot generation wait duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24821,6 +32904,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -24829,6 +32913,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24839,43 +32924,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when handling snapshots", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 38 + "y": 7 }, - "hiddenSeries": false, - "id": 23763573704, + "height": null, + "hideTimeOverride": false, + "id": 242, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -24883,59 +32981,85 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_server_send_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "send", - "refId": "A", - "step": 60 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "apply", - "refId": "B", - "step": 60 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_snapshot_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"generate\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "generate", - "refId": "C", - "step": 60 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% Handle snapshot duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -24943,6 +33067,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -24951,6 +33076,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -24961,43 +33087,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The snapshot size (P99.99).9999", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 14 }, - "hiddenSeries": false, - "id": 44, + "height": null, + "hideTimeOverride": false, + "id": 243, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25005,44 +33144,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_snapshot_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "size", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99.99% Snapshot size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25050,6 +33200,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -25058,6 +33209,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25068,43 +33220,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of KV within a snapshot in .9999", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 14 }, - "hiddenSeries": false, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 244, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25112,44 +33277,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_snapshot_kv_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "count", - "metric": "tikv_snapshot_kv_count_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99.99% Snapshot KV count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25157,7 +33333,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -25165,6 +33342,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25175,43 +33353,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Action stats for snapshot generating and applying", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 52 + "y": 21 }, - "hiddenSeries": false, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 245, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25219,49 +33410,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_raftstore_snapshot_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type, status)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}-{{status}}", - "refId": "A" + "metric": "", + "query": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(delta(tikv_raftstore_clean_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "clean-region-by-{{type}}", - "refId": "B" + "metric": "", + "query": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Snapshot Actions", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25269,61 +33481,76 @@ }, "yaxes": [ { + "decimals": null, "format": "opm", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The speed of sending or receiving snapshot", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 52 + "y": 21 }, - "hiddenSeries": false, - "id": 4201, + "height": null, + "hideTimeOverride": false, + "id": 246, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25331,52 +33558,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(tikv_snapshot_limit_transport_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}--{{type}}", - "metric": "tikv_snapshot_limit_transport_bytes", - "refId": "A", - "step": 40 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "rate(tikv_snapshot_limit_generate_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", - "hide": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}--generate", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}-generate", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Snapshot transport speed", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25384,74 +33629,118 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Snapshot", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 29 + "y": 0 }, - "id": 2760, + "height": null, + "hideTimeOverride": false, + "id": 247, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of tasks handled by worker", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 0 }, - "id": 59, + "height": null, + "hideTimeOverride": false, + "id": 248, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25459,39 +33748,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_worker_handled_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Worker handled tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25499,14 +33804,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25517,40 +33824,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tCurrent pending and running tasks of worker", + "description": "Current pending and running tasks of worker", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 0 }, - "id": 1395, + "height": null, + "hideTimeOverride": false, + "id": 249, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25558,39 +33881,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_worker_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Worker pending tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25598,14 +33937,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25616,40 +33957,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of tasks handled by future_pool", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 7 }, - "id": 1876, + "height": null, + "hideTimeOverride": false, + "id": 250, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25657,39 +34014,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_futurepool_handled_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "FuturePool handled tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25697,14 +34070,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25715,40 +34090,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "Current pending and running tasks of future_pool", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 7 }, - "id": 1877, + "height": null, + "hideTimeOverride": false, + "id": 251, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25756,39 +34147,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(avg_over_time(tikv_futurepool_pending_task_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "FuturePool pending tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25796,14 +34203,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -25814,100 +34223,150 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Task", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 30 + "y": 0 }, - "id": 2757, + "height": null, + "hideTimeOverride": false, + "id": 252, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#5195ce", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed to handle coprocessor read requests", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 22 + "y": 0 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3062, + "id": 253, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Request duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -25916,33 +34375,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": "The time consumed to handle coprocessor read requests", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 22 + "y": 0 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 254, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -25950,46 +34426,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Request duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -25997,56 +34550,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 30 + "y": 7 }, - "id": 74, + "height": null, + "hideTimeOverride": false, + "id": 255, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26054,41 +34627,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{req}}", - "metric": "tikv_coprocessor_request_error", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Requests", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26096,56 +34683,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "ops", - "label": "", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 30 + "y": 7 }, - "id": 3128, + "height": null, + "hideTimeOverride": false, + "id": 256, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26153,41 +34760,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_request_error{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{reason}}", - "metric": "tikv_coprocessor_request_error", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Request Errors", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26195,56 +34816,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 14 }, - "id": 52, + "height": null, + "hideTimeOverride": false, + "id": 257, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26252,38 +34893,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_keys_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{req}}", - "refId": "D" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Total KV Cursor Operations", + "title": "KV Cursor Operations", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26291,56 +34949,76 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 14 }, - "id": 3129, + "height": null, + "hideTimeOverride": false, + "id": 258, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26348,45 +35026,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, avg(rate(tikv_coprocessor_scan_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, req)) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "100%-{{req}}", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, avg(rate(tikv_coprocessor_scan_keys_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, req)) ", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%-{{req}}", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "KV Cursor Operations", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26394,57 +35150,76 @@ }, "yaxes": [ { - "decimals": 0, + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "id": 2118, + "height": null, + "hideTimeOverride": false, + "id": 259, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26452,46 +35227,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "key_skipped", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"internal_delete_skipped_count\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "delete_skipped-{{req}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total RocksDB Perf Statistics", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26499,57 +35283,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { "decimals": null, "format": "short", - "label": "", + "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "id": 551, + "height": null, + "hideTimeOverride": false, + "id": 260, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26557,41 +35360,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_response_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "size", + "intervalFactor": 1, + "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Response Size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26599,72 +35416,118 @@ }, "yaxes": [ { - "decimals": 0, - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Coprocessor Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 31 + "y": 0 }, - "id": 3197, + "height": null, + "hideTimeOverride": false, + "id": 261, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when handling coprocessor requests", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 23 + "y": 0 }, - "id": 113, + "height": null, + "hideTimeOverride": false, + "id": 262, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26672,46 +35535,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Handle duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26719,58 +35659,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", - "label": "", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "decimals": 1, + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The time consumed to handle coprocessor requests per TiKV instance (P95)", + "description": "The time consumed to handle coprocessor requests per TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 23 + "y": 0 }, - "id": 117, + "height": null, + "hideTimeOverride": false, + "id": 263, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26778,39 +35736,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_handle_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{req}}", - "refId": "B", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "95% Handle duration by store", + "title": "Handle duration by store", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26818,56 +35860,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when coprocessor requests are wait for being handled", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 30 + "y": 7 }, - "id": 111, + "height": null, + "hideTimeOverride": false, + "id": 264, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26875,46 +35937,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "D" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Wait duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -26922,56 +36061,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when coprocessor requests are wait for being handled in each TiKV instance", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 30 + "y": 7 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 265, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -26979,39 +36138,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tikv_coprocessor_request_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"all\"}[1m])) by (le, instance,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{req}}", - "refId": "B", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "95% Wait duration by store", + "title": "Wait duration by store", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27019,57 +36262,76 @@ }, "yaxes": [ { - "decimals": 1, + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "decimals": 1, + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 14 }, - "id": 3195, + "height": null, + "hideTimeOverride": false, + "id": 266, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27077,41 +36339,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_dag_request_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (vec_type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{vec_type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total DAG Requests", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27119,57 +36395,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The total number of DAG executors", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 14 }, - "id": 3264, + "height": null, + "hideTimeOverride": false, + "id": 267, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27177,41 +36472,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_executor_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total DAG Executors", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27219,56 +36528,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 21 }, - "id": 552, + "height": null, + "hideTimeOverride": false, + "id": 268, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27276,41 +36605,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"select\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details (Table Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27318,56 +36661,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 21 }, - "id": 3263, + "height": null, + "hideTimeOverride": false, + "id": 269, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27375,41 +36738,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"index\"}[1m])) by (tag)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details (Index Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27417,56 +36794,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 28 }, - "id": 122, + "height": null, + "hideTimeOverride": false, + "id": 270, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27474,42 +36871,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"select\"}[1m])) by (tag,cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{cf}}-{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details by CF (Table Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27517,56 +36927,76 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 12, - "y": 53 + "y": 28 }, - "id": 554, + "height": null, + "hideTimeOverride": false, + "id": 271, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27574,43 +37004,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": "cf", - "repeatDirection": "h", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_scan_details{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=\"index\"}[1m])) by (tag,cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{cf}}-{{tag}}", - "metric": "scan_details", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total Ops Details by CF (Index Scan)", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27618,144 +37060,423 @@ }, "yaxes": [ { - "decimals": 1, - "format": "short", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#5195ce", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed on checking memory locks for coprocessor requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 119 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 7594, + "id": 272, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_mem_lock_check_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Memory lock checking duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed on checking memory locks for coprocessor requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 273, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory lock checking duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Coprocessor Detail", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 32 + "y": 0 }, - "id": 2761, + "height": null, + "hideTimeOverride": false, + "id": 274, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 0 }, - "id": 2108, + "height": null, + "hideTimeOverride": false, + "id": 275, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -27763,45 +37484,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_threads_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance, state)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-{{state}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_threads_state{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-total", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Threads state", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27809,6 +37555,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -27817,6 +37564,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -27827,28 +37575,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 0 }, - "id": 2258, + "height": null, + "hideTimeOverride": false, + "id": 276, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -27862,40 +37632,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, sum(rate(tikv_threads_io_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (name, io) > 1024)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}-{{io}}", - "refId": "A", - "step": 4 + "legendFormat": "{{name}}", + "metric": "", + "query": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Threads IO", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27903,7 +37688,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -27911,6 +37697,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -27921,28 +37708,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 31 + "y": 7 }, - "id": 2660, + "height": null, + "hideTimeOverride": false, + "id": 277, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -27956,40 +37765,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, max(rate(tikv_thread_voluntary_context_switches{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (name) > 200)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Thread Voluntary Context Switches", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -27997,6 +37821,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -28005,6 +37830,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28015,28 +37841,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 31 + "y": 7 }, - "id": 2661, + "height": null, + "hideTimeOverride": false, + "id": 278, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -28050,40 +37898,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, max(rate(tikv_thread_nonvoluntary_context_switches{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (name) > 100)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{name}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Thread Nonvoluntary Context Switches", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28091,6 +37954,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -28099,6 +37963,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28109,57 +37974,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Threads", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 33 + "y": 0 }, - "id": 2762, + "height": null, + "hideTimeOverride": false, + "id": 279, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of get operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 59 + "y": 0 }, - "hiddenSeries": false, - "id": 138, + "height": null, + "hideTimeOverride": false, + "id": 280, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28167,83 +38073,115 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"memtable_hit\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "memtable", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=~\"block_cache_data_hit|block_cache_filter_hit\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "block_cache", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_get_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"get_hit_l0\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l0", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_get_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"get_hit_l1\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l1", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_get_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"get_hit_l2_and_up\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "l2_and_up", - "refId": "F", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Get operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28251,6 +38189,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -28259,6 +38198,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28269,41 +38209,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing get operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 59 + "y": 0 }, - "hiddenSeries": false, - "id": 82, + "height": null, + "hideTimeOverride": false, + "id": 281, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28311,73 +38266,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_get_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"get_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Get duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28385,7 +38367,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -28393,6 +38376,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28403,41 +38387,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of seek operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 67 + "y": 7 }, - "hiddenSeries": false, - "id": 129, + "height": null, + "hideTimeOverride": false, + "id": 282, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28445,95 +38444,130 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_seek\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "seek", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_seek_found\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "seek_found", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_next\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "next", "metric": "", - "refId": "C", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_next_found\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "next_found", "metric": "", - "refId": "D", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_prev\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "prev", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"number_db_prev_found\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "prev_found", "metric": "", - "refId": "F", - "step": 10 + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Seek operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28541,6 +38575,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -28549,6 +38584,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28559,41 +38595,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing seek operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 7 }, - "hiddenSeries": false, - "id": 125, + "height": null, + "hideTimeOverride": false, + "id": 283, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28601,73 +38652,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_seek_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"seek_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Seek duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28675,7 +38753,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -28683,6 +38762,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28693,41 +38773,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of write operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 75 + "y": 14 }, - "hiddenSeries": false, - "id": 139, + "height": null, + "hideTimeOverride": false, + "id": 284, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28735,65 +38830,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_write_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=~\"write_done_by_self|write_done_by_other\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "done", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_write_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_timeout\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "timeout", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_write_served{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_with_wal\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "with_wal", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28801,6 +38916,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -28809,6 +38925,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28819,41 +38936,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 75 + "y": 14 }, - "hiddenSeries": false, - "id": 126, + "height": null, + "hideTimeOverride": false, + "id": 285, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28861,73 +38993,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -28935,7 +39094,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -28943,6 +39103,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -28953,41 +39114,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe count of WAL sync operations", + "description": "The count of WAL sync operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 83 + "y": 21 }, - "hiddenSeries": false, - "id": 137, + "height": null, + "hideTimeOverride": false, + "id": 286, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -28995,50 +39171,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_wal_file_synced{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "sync", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "WAL sync operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29046,6 +39227,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -29054,6 +39236,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29064,41 +39247,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing write wal operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 83 + "y": 21 }, - "hiddenSeries": false, - "id": 130, + "height": null, + "hideTimeOverride": false, + "id": 287, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29106,73 +39304,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_wal_time_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"write_wal_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write WAL duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29180,7 +39405,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -29188,6 +39414,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29198,41 +39425,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of compaction and flush operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 28 }, - "hiddenSeries": false, - "id": 128, + "height": null, + "hideTimeOverride": false, + "id": 288, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29240,50 +39482,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_event_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "metric": "tikv_engine_event_total", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29291,6 +39538,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -29299,6 +39547,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29309,41 +39558,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing WAL sync operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 28 }, - "hiddenSeries": false, - "id": 135, + "height": null, + "hideTimeOverride": false, + "id": 289, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29351,74 +39615,100 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_wal_file_sync_micro_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"wal_file_sync_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "WAL sync duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29426,14 +39716,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29444,41 +39736,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "Compaction guard actions", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 99 + "y": 35 }, - "hiddenSeries": false, - "id": 2453, + "height": null, + "hideTimeOverride": false, + "id": 290, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29486,50 +39793,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_raftstore_compaction_guard_action_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=~\"default|write\"}[1m])) by (cf, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cf}}-{{type}}", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-{{ type}}", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction guard actions", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29537,6 +39849,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -29545,6 +39858,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29555,41 +39869,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when executing the compaction and flush operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 99 + "y": 35 }, - "hiddenSeries": false, - "id": 136, + "height": null, + "hideTimeOverride": false, + "id": 291, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29597,74 +39926,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", "metric": "", - "refId": "A", - "step": 10 + "query": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_compaction_time{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"compaction_time_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29672,7 +40027,8 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 2, "max": null, @@ -29680,6 +40036,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29690,41 +40047,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time consumed when reading SST files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 107 + "y": 42 }, - "hiddenSeries": false, - "id": 140, + "height": null, + "hideTimeOverride": false, + "id": 292, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29732,77 +40104,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", "metric": "", - "refId": "A", - "step": 10 + "query": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", "metric": "", - "refId": "C", - "step": 10 + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_sst_read_micros{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"sst_read_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "D", - "step": 10 + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "SST read duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29810,14 +40205,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 10, + "logBase": 2, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -29828,40 +40225,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 107 + "y": 42 }, - "hiddenSeries": false, - "id": 2451, + "height": null, + "hideTimeOverride": false, + "id": 293, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29869,51 +40282,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_compaction_reason{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (cf, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{cf}} - {{reason}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-{{reason}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -29921,59 +40338,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The block cache size. Broken down by column family if shared block cache is disabled.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 115 + "y": 49 }, - "hiddenSeries": false, - "id": 102, + "height": null, + "hideTimeOverride": false, + "id": 294, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -29981,49 +40415,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, avg(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by(cf, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{cf}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30031,6 +40471,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -30039,6 +40480,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -30049,41 +40491,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The hit rate of memtable", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 115 + "y": 49 }, - "hiddenSeries": false, - "id": 88, + "height": null, + "hideTimeOverride": false, + "id": 295, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30091,49 +40548,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"memtable_hit\"}[1m])) / (sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$db\", type=\"memtable_hit\"}[1m])) + sum(rate(tikv_engine_memtable_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$db\", type=\"memtable_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "hit", - "refId": "A", - "step": 10 + "metric": "", + "query": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memtable hit", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30141,60 +40604,76 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "ops", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow of different kinds of block cache operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 123 + "y": 56 }, - "height": "", - "hiddenSeries": false, - "id": 467, + "height": null, + "hideTimeOverride": false, + "id": 296, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30202,116 +40681,145 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_byte_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "total_read", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_byte_write\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "total_written", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_bytes_insert\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "data_insert", "metric": "", - "refId": "D", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_bytes_insert\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "filter_insert", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_bytes_evict\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "filter_evict", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_bytes_insert\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "index_insert", "metric": "", - "refId": "F", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_bytes_evict\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "index_evict", "metric": "", - "refId": "G", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30319,59 +40827,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The hit rate of block cache", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 123 + "y": 56 }, - "hiddenSeries": false, - "id": 80, + "height": null, + "hideTimeOverride": false, + "id": 297, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30379,87 +40904,115 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "all", "metric": "", - "refId": "A", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "data", "metric": "", - "refId": "D", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "filter", "metric": "", - "refId": "B", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_hit\"}[1m])) / (sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_hit\"}[1m])) + sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "index", "metric": "", - "refId": "C", - "step": 10 + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_bloom_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bloom_prefix_useful\"}[1m])) / sum(rate(tikv_engine_bloom_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bloom_prefix_checked\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "bloom prefix", "metric": "", - "refId": "E", - "step": 10 + "query": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache hit", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30467,60 +41020,76 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "ops", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow of different kinds of operations on keys", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 131 + "y": 63 }, - "height": "", - "hiddenSeries": false, - "id": 132, + "height": null, + "hideTimeOverride": false, + "id": 298, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30528,72 +41097,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"keys_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "read", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"keys_written\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "written", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_compaction_num_corrupt_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "corrupt", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Keys flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30601,59 +41183,76 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of different kinds of block cache operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 131 + "y": 63 }, - "hiddenSeries": false, - "id": 468, + "height": null, + "hideTimeOverride": false, + "id": 299, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30661,86 +41260,115 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total_add", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_data_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "data_add", "metric": "", - "refId": "C", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_filter_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "filter_add", "metric": "", - "refId": "D", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_index_add\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "index_add", "metric": "", - "refId": "E", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"block_cache_add_failures\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "add_failures", "metric": "", - "refId": "B", - "step": 10 + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Block cache operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30748,6 +41376,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -30756,6 +41385,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -30766,42 +41396,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow rate of read operations per type", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 139 + "y": 70 }, - "height": "", - "hiddenSeries": false, - "id": 85, + "height": null, + "hideTimeOverride": false, + "id": 300, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30809,61 +41453,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "get", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"iter_bytes_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "scan", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Read flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30871,59 +41524,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of keys in each column family", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 139 + "y": 70 }, - "hiddenSeries": false, - "id": 131, + "height": null, + "hideTimeOverride": false, + "id": 301, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -30931,51 +41601,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_estimate_num_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "metric": "tikv_engine_estimate_num_keys", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -30983,60 +41657,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow of different kinds of write operations", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 147 + "y": 77 }, - "height": "", - "hiddenSeries": false, - "id": 86, + "height": null, + "hideTimeOverride": false, + "id": 302, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31044,59 +41734,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"wal_file_bytes\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "wal", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_written\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "write", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31104,59 +41805,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The bytes per read", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 147 + "y": 77 }, - "hiddenSeries": false, - "id": 133, + "height": null, + "hideTimeOverride": false, + "id": 303, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31164,74 +41882,100 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_read{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_read_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Bytes / Read", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31239,14 +41983,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -31257,41 +42003,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The flow rate of compaction operations per type", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 155 + "y": 84 }, - "hiddenSeries": false, - "id": 90, + "height": null, + "hideTimeOverride": false, + "id": 304, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31299,68 +42060,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_compaction_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_read\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "read", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_compaction_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"bytes_written\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "written", - "refId": "C", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"flush_write_bytes\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "flushed", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31368,59 +42146,76 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The bytes per write", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 155 + "y": 84 }, - "hiddenSeries": false, - "id": 134, + "height": null, + "hideTimeOverride": false, + "id": 305, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31428,74 +42223,100 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "A", - "step": 10 + "metric": "", + "query": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "C", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_bytes_per_write{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\",type=\"bytes_per_write_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "D", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Bytes / Write", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31503,14 +42324,16 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, - "logBase": 10, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -31521,41 +42344,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The read amplification per TiKV instance \t", + "description": "The read amplification per TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 163 + "y": 91 }, - "hiddenSeries": false, - "id": 518, + "height": null, + "hideTimeOverride": false, + "id": 306, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31563,51 +42401,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_read_amp_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"read_amp_total_read_bytes\"}[1m])) by (instance) / sum(rate(tikv_engine_read_amp_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$db\", type=\"read_amp_estimate_useful_bytes\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 10 + "query": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Read amplication", + "title": "Read amplification", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31615,59 +42457,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The pending bytes to be compacted", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 163 + "y": 91 }, - "hiddenSeries": false, - "id": 127, + "height": null, + "hideTimeOverride": false, + "id": 307, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31675,51 +42534,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_pending_compaction_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "metric": "tikv_engine_pending_compaction_bytes", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compaction pending bytes", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31727,59 +42590,76 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of snapshot of each TiKV instance", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 171 + "y": 98 }, - "hiddenSeries": false, - "id": 516, + "height": null, + "hideTimeOverride": false, + "id": 308, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31787,51 +42667,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_engine_num_snapshots{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "refId": "A", - "step": 10 + "query": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Number of snapshots", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31839,59 +42723,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The compression ratio of each level", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 171 + "y": 98 }, - "hiddenSeries": false, - "id": 863, + "height": null, + "hideTimeOverride": false, + "id": 309, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -31899,51 +42800,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_compression_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf, level)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{cf}} - level - {{level}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-L{{level}}", "metric": "", - "refId": "A", - "step": 10 + "query": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Compression ratio", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -31951,57 +42856,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The number of SST files for different column families in each level", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 179 + "y": 105 }, - "hiddenSeries": false, - "id": 2002, + "height": null, + "hideTimeOverride": false, + "id": 310, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32009,48 +42933,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_num_files_at_level{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}) by (cf, level)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "cf-{{cf}}, level-{{level}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cf}}-L{{level}}", + "metric": "", + "query": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Number files at each level", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32058,6 +42989,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32066,6 +42998,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32076,41 +43009,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time that the oldest unreleased snapshot survivals", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 179 + "y": 105 }, - "hiddenSeries": false, - "id": 517, + "height": null, + "hideTimeOverride": false, + "id": 311, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32118,51 +43066,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_engine_oldest_snapshot_duration{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_engine_oldest_snapshot_duration", - "refId": "A", - "step": 10 + "metric": "", + "query": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Oldest snapshots duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32170,58 +43122,76 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Stall conditions changed of each column family", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 187 + "y": 112 }, - "hiddenSeries": false, - "id": 2381, + "height": null, + "hideTimeOverride": false, + "id": 312, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32229,48 +43199,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_engine_stall_conditions_changed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{cf}}-{{type}}", - "refId": "B" + "metric": "", + "query": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Stall conditions changed of each CF", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32278,6 +43255,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32286,6 +43264,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32296,39 +43275,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when ingesting SST files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 187 + "y": 112 }, - "hiddenSeries": false, - "id": 2003, + "height": null, + "hideTimeOverride": false, + "id": 313, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32336,55 +43332,123 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } - }, - "seriesOverrides": [], - "spaceLength": 10, + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_snapshot_ingest_sst_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_snapshot_ingest_sst_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) / sum(rate(tikv_snapshot_ingest_sst_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "average", - "refId": "B" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Ingest SST duration seconds", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32392,6 +43456,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -32400,6 +43465,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32410,40 +43476,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 195 + "y": 119 }, - "hiddenSeries": false, - "id": 2452, + "height": null, + "hideTimeOverride": false, + "id": 314, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32451,51 +43533,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(increase(tikv_engine_write_stall_reason{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "refId": "A", - "step": 10 + "query": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Stall Reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32503,59 +43589,76 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The time which is caused by write stall", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 195 + "y": 119 }, - "hiddenSeries": false, - "id": 87, + "height": null, + "hideTimeOverride": false, + "id": 315, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32563,77 +43666,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", "metric": "", - "refId": "A", - "step": 10 + "query": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "refId": "B", - "step": 10 + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", "metric": "", - "refId": "C", - "step": 10 + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_write_stall{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"write_stall_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "refId": "D", - "step": 10 + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write stall duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32641,14 +43767,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32659,10 +43787,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -32672,71 +43801,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The level that the external file ingests into", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 203 + "y": 126 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 12712, + "id": 316, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_engine_ingestion_picked_level_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Ingestion picked level", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -32745,35 +43897,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The memtable size of each column family", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 203 + "y": 126 }, - "hiddenSeries": false, - "id": 103, + "height": null, + "hideTimeOverride": false, + "id": 317, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -32781,49 +43948,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_memory_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$db\", type=\"mem-tables-all\"}) by (cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{cf}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memtable size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32831,6 +44004,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -32839,6 +44013,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32849,54 +44024,97 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": "db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "RocksDB - $db", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 34 + "y": 0 }, - "id": 12802, + "height": null, + "hideTimeOverride": false, + "id": 318, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The count of operations per second", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 12892, + "height": null, + "hideTimeOverride": false, + "id": 319, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -32904,58 +44122,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_apply_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "write", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(raft_engine_read_entry_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_entry", - "refId": "B" + "metric": "", + "query": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(raft_engine_read_message_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_message", - "refId": "C" + "metric": "", + "query": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Operation", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -32963,6 +44209,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -32971,6 +44218,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -32981,38 +44229,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The time used in write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 32 + "y": 0 }, - "hiddenSeries": false, - "id": 12893, + "height": null, + "hideTimeOverride": false, + "id": 320, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33020,66 +44285,124 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(raft_engine_write_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "avg", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(raft_engine_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "95%", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "99%", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_write_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "hide": false, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33087,14 +44410,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33105,38 +44430,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The I/O flow rate", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 7 }, - "hiddenSeries": false, - "id": 12896, + "height": null, + "hideTimeOverride": false, + "id": 321, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33144,50 +44486,71 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "write", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(raft_engine_background_rewrite_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "rewrite {{type}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "rewrite-{{type}}", + "metric": "", + "query": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33195,6 +44558,7 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, "logBase": 1, @@ -33203,6 +44567,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33213,38 +44578,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "99% duration breakdown of write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 7 }, - "hiddenSeries": false, - "id": 12895, + "height": null, + "hideTimeOverride": false, + "id": 322, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33252,58 +44634,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_preprocess_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "wait", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "wal", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_apply_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "apply", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Write Duration Breakdown (99%)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33311,14 +44721,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33329,38 +44741,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The bytes per write", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 48 + "y": 14 }, - "hiddenSeries": false, - "id": 12898, + "height": null, + "hideTimeOverride": false, + "id": 323, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33368,58 +44797,124 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(raft_engine_write_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(raft_engine_write_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "avg", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_write_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_write_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "legendFormat": "999%", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Bytes / Written", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33427,6 +44922,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -33435,6 +44931,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33445,38 +44942,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "999% duration breakdown of WAL write operation", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 48 + "y": 14 }, - "hiddenSeries": false, - "id": 12933, + "height": null, + "hideTimeOverride": false, + "id": 324, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33484,66 +44998,101 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_write_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "total", - "refId": "D" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_sync_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "sync", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_allocate_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "allocate", - "refId": "G" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(raft_engine_rotate_log_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "rotate", - "refId": "H" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "WAL Duration Breakdown (999%)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33551,14 +45100,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33569,38 +45120,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The average number of files", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 56 + "y": 21 }, - "hiddenSeries": false, - "id": 12899, + "height": null, + "hideTimeOverride": false, + "id": 325, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33608,55 +45176,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "avg(raft_engine_log_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A" + "metric": "", + "query": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "avg(raft_engine_swap_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "swap", - "refId": "B" + "metric": "", + "query": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "avg(raft_engine_recycled_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} - recycle", - "refId": "C" + "legendFormat": "{{type}}-recycle", + "metric": "", + "query": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "File Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33664,6 +45263,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33672,6 +45272,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33682,38 +45283,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The 99% duration of operations other than write", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 56 + "y": 21 }, - "hiddenSeries": false, - "id": 12897, + "height": null, + "hideTimeOverride": false, + "id": 326, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33721,58 +45339,86 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_read_entry_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_entry", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_read_message_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "read_message", - "refId": "D" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(raft_engine_purge_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "purge", - "refId": "E" + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Other Durations (99%)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33780,6 +45426,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -33788,6 +45435,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33798,38 +45446,55 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The average number of log entries", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 64 + "y": 28 }, - "hiddenSeries": false, - "id": 12934, + "height": null, + "hideTimeOverride": false, + "id": 327, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -33837,43 +45502,56 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "avg(raft_engine_log_entry_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Entry Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33881,6 +45559,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33889,6 +45568,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -33899,99 +45579,169 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Raft Engine", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 35 + "y": 0 }, - "id": 3301, + "height": null, + "hideTimeOverride": false, + "id": 328, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 27 + "y": 0 }, - "id": 3555, + "height": null, + "hideTimeOverride": false, + "id": 329, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "live blob file num", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_engine_titandb_num_obsolete_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "obsolete blob file num", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -33999,7 +45749,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -34007,6 +45758,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34017,83 +45769,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 27 + "y": 0 }, - "id": 3557, + "height": null, + "hideTimeOverride": false, + "id": 330, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_titandb_live_blob_file_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "live blob file size", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(tikv_engine_titandb_obsolete_blob_file_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "obsolete blob file size", - "refId": "B" + "metric": "", + "query": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34101,6 +45897,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -34109,6 +45906,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34119,130 +45917,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 33 - }, - "id": 3523, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tikv_engine_titandb_live_blob_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "live blob size", - "refId": "A" + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Live blob size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The blob cache size.", "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 34 + "x": 0, + "y": 7 }, - "id": 4655, + "height": null, + "hideTimeOverride": false, + "id": 331, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34250,38 +45974,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(20, avg(tikv_engine_blob_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}) by(cf, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-{{cf}}", - "refId": "A", - "step": 10 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "live blob size", + "metric": "", + "query": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob cache size", + "title": "Live blob size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34289,6 +46030,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -34297,6 +46039,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34307,35 +46050,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The hit rate of block cache", - "fill": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 7 }, - "id": 4020, + "height": null, + "hideTimeOverride": false, + "id": 332, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34343,47 +46107,55 @@ "lines": true, "linewidth": 1, "links": [], - "maxPerRow": 2, - "nullPointMode": "connected", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_cache_hit\"}[1m])) / (sum(rate(tikv_engine_blob_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$titan_db\", type=\"blob_cache_hit\"}[1m])) + sum(rate(tikv_engine_blob_cache_efficiency{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", db=\"$titan_db\", type=\"blob_cache_miss\"}[1m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "all", "metric": "", - "refId": "A", - "step": 10 + "query": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob cache hit", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34391,50 +46163,76 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "ops", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 39 + "y": 14 }, - "id": 4023, + "height": null, + "hideTimeOverride": false, + "id": 333, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34442,69 +46240,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "D" + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_iter_touch_blob_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_iter_touch_blob_file_count_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "B" + "metric": "", + "query": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Iter touched blob file count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34512,7 +46341,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -34520,6 +46350,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34530,76 +46361,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The blob cache size.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 39 + "y": 14 }, - "id": 4025, + "height": null, + "hideTimeOverride": false, + "id": 334, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_engine_titandb_blob_file_discardable_ratio{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}) by (ratio)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ratio}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{cf}}", + "metric": "", + "query": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob file discardable ratio distribution", + "title": "Blob cache size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34607,7 +46474,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -34615,6 +46483,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34625,32 +46494,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 21 }, - "id": 3414, + "height": null, + "hideTimeOverride": false, + "id": 335, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34658,69 +46551,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "D" + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_key_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_key_size_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "B" + "metric": "", + "query": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob key size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34728,14 +46652,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34746,97 +46672,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 21 }, - "id": 3446, + "height": null, + "hideTimeOverride": false, + "id": 336, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_value_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_value_size_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob value size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34844,14 +46830,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34862,32 +46850,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 28 }, - "id": 3746, + "height": null, + "hideTimeOverride": false, + "id": 337, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -34895,47 +46907,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_get\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "get", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob get operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -34943,6 +46963,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -34951,6 +46972,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -34961,97 +46983,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 28 }, - "id": 3655, + "height": null, + "hideTimeOverride": false, + "id": 338, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_get_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}}", + "metric": "", + "query": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob get duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35059,14 +47141,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35077,94 +47161,275 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, "x": 0, - "y": 57 + "y": 35 }, - "id": 3338, + "height": null, + "hideTimeOverride": false, + "id": 339, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ratio}}", + "metric": "", + "query": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Blob file discardable ratio distribution", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 340, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_seek\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "seek", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_prev\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "prev", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_engine_blob_locate{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"number_blob_next\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "next", - "refId": "C" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob iter operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35172,6 +47437,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -35180,6 +47446,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35190,32 +47457,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 57 + "x": 0, + "y": 42 }, - "id": 3412, + "height": null, + "hideTimeOverride": false, + "id": 341, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -35223,66 +47514,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_seek_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob seek duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35290,14 +47615,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35308,32 +47635,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 63 + "x": 12, + "y": 42 }, - "id": 4092, + "height": null, + "hideTimeOverride": false, + "id": 342, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -35341,66 +47692,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_next_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob next duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35408,14 +47793,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35426,32 +47813,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 63 + "x": 0, + "y": 49 }, - "id": 4093, + "height": null, + "hideTimeOverride": false, + "id": 343, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -35459,66 +47870,100 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_average\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile95\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95%", - "refId": "B" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_percentile99\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "C" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_prev_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\".*_max\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "max", - "refId": "D" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}}", + "metric": "", + "query": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob prev duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35526,14 +47971,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35544,76 +47991,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 69 + "x": 12, + "y": 49 }, - "id": 3645, + "height": null, + "hideTimeOverride": false, + "id": 344, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"keys.*\"}[30s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob keys flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35621,7 +48104,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -35629,6 +48113,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35639,76 +48124,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 69 + "x": 0, + "y": 56 }, - "id": 3643, + "height": null, + "hideTimeOverride": false, + "id": 345, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"bytes.*\"}[2m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob bytes flow", + "title": "Blob file read duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35716,7 +48282,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -35724,6 +48291,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35734,97 +48302,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 75 + "x": 12, + "y": 56 }, - "id": 3657, + "height": null, + "hideTimeOverride": false, + "id": 346, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_average\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "avg", - "refId": "A" - }, - { - "expr": "avg(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_percentile99\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "B" - }, - { - "expr": "avg(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_percentile95\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "C" - }, - { - "expr": "max(tikv_engine_blob_file_read_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_read_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "D" + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob file read duration", + "title": "Blob bytes flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35832,14 +48415,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "binBps", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35850,97 +48435,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 75 + "x": 0, + "y": 63 }, - "id": 3408, + "height": null, + "hideTimeOverride": false, + "id": 347, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "refId": "B" + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "95%", - "refId": "C" + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_file_write_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_write_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file write duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -35948,14 +48593,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -35966,76 +48613,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 81 + "x": 12, + "y": 63 }, - "id": 3651, + "height": null, + "hideTimeOverride": false, + "id": 348, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_file_synced{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}[2m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "sync", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob file sync operations", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36043,6 +48726,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -36051,6 +48735,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36061,97 +48746,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 81 + "x": 0, + "y": 70 }, - "id": 3653, + "height": null, + "hideTimeOverride": false, + "id": 349, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_average\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" - }, - { - "expr": "avg(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_percentile95\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" - }, - { - "expr": "avg(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_percentile99\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" - }, - { - "expr": "max(tikv_engine_blob_file_sync_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_file_sync_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "D" + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob file sync duration", + "title": "Blob GC action", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36159,14 +48859,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36177,77 +48879,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 87 + "x": 12, + "y": 70 }, - "id": 5018, + "height": null, + "hideTimeOverride": false, + "id": 350, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_action_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}[2m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "B" + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob GC action", + "title": "Blob file sync duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36255,7 +49037,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -36263,6 +49046,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36273,97 +49057,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 87 + "x": 0, + "y": 77 }, - "id": 3410, + "height": null, + "hideTimeOverride": false, + "id": 351, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_micros_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_micros_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36371,14 +49215,16 @@ }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36389,76 +49235,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 93 + "x": 12, + "y": 77 }, - "id": 3649, + "height": null, + "hideTimeOverride": false, + "id": 352, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"keys.*\"}[30s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC keys flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36466,7 +49348,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -36474,8 +49357,9 @@ "show": true }, { - "format": "decbytes", - "label": "", + "decimals": null, + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -36484,76 +49368,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 93 + "x": 0, + "y": 84 }, - "id": 3340, + "height": null, + "hideTimeOverride": false, + "id": 353, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_flow_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=~\"bytes.*\"}[30s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob GC bytes flow", + "title": "Blob GC input file size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36561,7 +49526,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -36569,8 +49535,9 @@ "show": true }, { - "format": "decbytes", - "label": "", + "decimals": null, + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -36579,97 +49546,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 99 + "x": 12, + "y": 84 }, - "id": 4021, + "height": null, + "hideTimeOverride": false, + "id": 354, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_average\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" - }, - { - "expr": "avg(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_percentile95\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "95%", - "refId": "B" - }, - { - "expr": "avg(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_percentile99\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "99%", - "refId": "C" - }, - { - "expr": "max(tikv_engine_blob_gc_input_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_input_file_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "max", - "refId": "D" + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Blob GC input file size", + "title": "Blob GC bytes flow", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36677,7 +49659,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -36685,6 +49668,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36695,97 +49679,157 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 12, - "y": 99 + "x": 0, + "y": 91 }, - "id": 4022, + "height": null, + "hideTimeOverride": false, + "id": 355, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_engine_blob_gc_output_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_average\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "avg", - "refId": "A" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_output_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_percentile95\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "95%", - "refId": "B" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "avg(tikv_engine_blob_gc_output_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_percentile99\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "C" + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "max(tikv_engine_blob_gc_outputt_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\", type=\"blob_gc_output_file_max\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "max", - "refId": "D" + "metric": "", + "query": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC output file size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36793,7 +49837,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -36801,6 +49846,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36811,77 +49857,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, + "h": 7, "w": 12, - "x": 0, - "y": 105 + "x": 12, + "y": 91 }, - "id": 3344, + "height": null, + "hideTimeOverride": false, + "id": 356, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", - "paceLength": 10, + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "scopedVars": { - "titan_db": { - "selected": false, - "text": "kv", - "value": "kv" - } - }, + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_engine_blob_gc_file_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", db=\"$titan_db\"}[2m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Blob GC file count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -36889,6 +49970,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -36897,6 +49979,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -36907,43 +49990,86 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": "titan_db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Titan - $titan_db", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 36 + "y": 0 }, - "id": 2820, + "height": null, + "hideTimeOverride": false, + "id": 357, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 29 + "y": 0 }, - "id": 2991, + "height": null, + "hideTimeOverride": false, + "id": 358, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, @@ -36955,7 +50081,7 @@ "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -36963,47 +50089,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"waiter_manager.*\"}[1m])) by (instance, name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{name}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"deadlock_detect.*\"}[1m])) by (instance, name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{name}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Lock Manager Thread CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37011,6 +50160,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -37019,6 +50169,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37029,39 +50180,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 29 + "y": 0 }, - "id": 2877, + "height": null, + "hideTimeOverride": false, + "id": 359, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37069,39 +50237,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_task_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Lock Manager Handled tasks", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37109,14 +50293,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37127,31 +50313,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 37 + "y": 7 }, - "id": 2993, - "interval": "", + "height": null, + "hideTimeOverride": false, + "id": 360, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -37165,53 +50370,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_waiter_lifetime_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_lock_manager_waiter_lifetime_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A", - "step": 10 + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_lock_manager_waiter_lifetime_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.9999, sum(rate(tikv_lock_manager_waiter_lifetime_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": true, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", - "refId": "C" + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Waiter lifetime duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37219,6 +50494,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -37227,6 +50503,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37237,39 +50514,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 37 + "y": 7 }, - "id": 4018, + "height": null, + "hideTimeOverride": false, + "id": 361, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37277,46 +50571,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(max_over_time(tikv_lock_manager_wait_table_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(max_over_time(tikv_lock_wait_queue_entries_gauge_vec{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "B" + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Lock Waiting Queue", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37324,14 +50642,16 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37342,30 +50662,50 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 14 }, - "id": 2995, + "height": null, + "hideTimeOverride": false, + "id": 362, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, @@ -37379,44 +50719,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_detect_duration_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) / sum(rate(tikv_lock_manager_detect_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "A" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_lock_manager_detect_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "99%", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Deadlock detect duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37424,6 +50843,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 2, @@ -37432,6 +50852,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37442,39 +50863,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 14 }, - "id": 2934, + "height": null, + "hideTimeOverride": false, + "id": 363, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37482,39 +50920,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_lock_manager_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Detect error", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37522,14 +50976,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37540,84 +50996,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, + "bars": false, "cacheTimeout": null, - "dashLength": 10, - "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 0, + "description": null, "editable": true, "error": false, - "fill": 0, - "grid": {}, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 21 }, - "id": 4019, + "height": null, + "hideTimeOverride": false, + "id": 364, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "6.1.6", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(max_over_time(tikv_lock_manager_detector_leader_heartbeat{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[15s])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Deadlock detector leader", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37625,57 +51109,76 @@ }, "yaxes": [ { - "decimals": 0, + "decimals": null, "format": "none", - "label": "", + "label": null, "logBase": 1, - "max": "2", - "min": "0", - "show": false + "max": null, + "min": null, + "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 65 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572093, + "height": null, + "hideTimeOverride": false, + "id": 365, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37683,44 +51186,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_pessimistic_lock_memory_size{tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Total pessimistic locks memory size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37728,6 +51242,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -37736,6 +51251,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37746,45 +51262,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 73 + "y": 28 }, - "hiddenSeries": false, - "id": 23763572094, + "height": null, + "hideTimeOverride": false, + "id": 366, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37792,45 +51319,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_in_memory_pessimistic_locking{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (result)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{result}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "In-memory pessimistic locking result", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37838,14 +51375,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37856,46 +51395,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The number of active keys and waiters.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 75 + "y": 28 }, - "hiddenSeries": false, - "id": 23763573091, + "height": null, + "hideTimeOverride": false, + "id": 367, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, - "hideZero": false, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -37903,45 +51452,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_lock_wait_queue_entries_gauge_vec{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Pessimistic lock activities", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -37949,14 +51508,16 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -37967,10 +51528,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -37978,159 +51540,246 @@ "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateOranges", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The length includes the entering transaction itself", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 83 + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573092, + "id": 368, + "interval": null, "legend": { "show": false }, "links": [], - "pluginVersion": "7.5.11", + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_lock_wait_queue_length_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Lengths of lock wait queues when transaction enqueues", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Pessimistic Locking", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 37 + "y": 0 }, - "id": 23763573235, + "height": null, + "hideTimeOverride": false, + "id": 369, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "percentunit" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 45 + "y": 0 }, - "hiddenSeries": false, - "id": 23763573350, + "height": null, + "hideTimeOverride": false, + "id": 370, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "hide": false, - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CPU Usage", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38138,6 +51787,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -38146,6 +51796,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38156,117 +51807,142 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "(AP)apply-99": "#88509f", - "(AP)get_permit-99": "#922870", - "(AP)queuing-99": "#9d0041", - "(DL)exec_download-99": "#73a0fe", - "(DL)queue-99": "#7d78ce", - "exec_download-99": "light-orange", - "get_permit-99": "red", - "queuing-99": "blue", - "total-99": "rgb(252, 252, 252)" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { "h": 7, - "w": 9, - "x": 6, - "y": 45 + "w": 12, + "x": 12, + "y": 0 }, - "hiddenSeries": false, - "id": 23763573351, + "height": null, + "hideTimeOverride": false, + "id": 371, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total-99", - "bars": false, - "fill": 2, - "lines": true, - "linewidth": 0, - "stack": false, - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": true, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[1m])) by (le, request))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "total-99", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"queue|exec_download\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "(DL){{type}}-99", - "refId": "C" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "(AP){{type}}-99", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "P99 RPC Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38274,6 +51950,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -38282,7 +51959,8 @@ "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38292,41 +51970,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 9, - "x": 15, - "y": 45 + "w": 12, + "x": 0, + "y": 7 }, - "hiddenSeries": false, - "id": 23763573352, + "height": null, + "hideTimeOverride": false, + "id": 372, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -38334,56 +52027,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (instance, request)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} :: {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", "format": "time_series", - "hide": true, + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "total - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", - "step": 10 + "legendFormat": "total-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import RPC Ops", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38391,6 +52098,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -38399,6 +52107,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38409,78 +52118,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "cps" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 52 + "h": 7, + "w": 12, + "x": 12, + "y": 7 }, - "hiddenSeries": false, - "id": 23763573032, + "height": null, + "hideTimeOverride": false, + "id": 373, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_import_apply_cache_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (instance, type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} :: {{type}}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Cache Events", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38488,6 +52231,7 @@ }, "yaxes": [ { + "decimals": null, "format": "cps", "label": null, "logBase": 1, @@ -38496,6 +52240,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38506,285 +52251,424 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 52 + "h": 7, + "w": 12, + "x": 0, + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573348, + "id": 374, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=\"apply\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Overall RPC Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 52 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573558, + "id": 375, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_apply_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"exec_download\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Read File into Memory Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { - "cardColor": "#37872D", + "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 52 + "h": 7, + "w": 12, + "x": 0, + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573229, + "id": 376, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queuing\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Queuing Time", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "bytes" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 60 + "h": 7, + "w": 12, + "x": 12, + "y": 21 }, - "hiddenSeries": false, - "id": 23763573349, + "height": null, + "hideTimeOverride": false, + "id": 377, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_import_apply_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Apply Request Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -38792,6 +52676,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -38800,6 +52685,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -38810,287 +52696,424 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolateBlues", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 60 + "h": 7, + "w": 12, + "x": 0, + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573344, + "id": 378, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_download_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Downloaded File Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", - "colorScheme": "interpolatePurples", + "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 60 + "y": 28 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573233, + "id": 379, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_apply_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Apply Batch Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": null, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, - "format": "decbytes", + "decimals": 1, + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 60 + "h": 7, + "w": 12, + "x": 0, + "y": 35 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573230, + "id": 380, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"get_permit\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Blocked by Concurrency Time", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "ops" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 0, - "y": 66 + "h": 7, + "w": 12, + "x": 12, + "y": 35 }, - "hiddenSeries": false, - "id": 23763573118, + "height": null, + "hideTimeOverride": false, + "id": 381, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_import_applier_event{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"begin_req\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} :: {{type}}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Apply Request Speed", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39098,6 +53121,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -39106,6 +53130,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39116,78 +53141,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "decbytes" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 6, - "x": 6, - "y": 68 + "h": 7, + "w": 12, + "x": 0, + "y": 42 }, - "hiddenSeries": false, - "id": 23763573346, + "height": null, + "hideTimeOverride": false, + "id": 382, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_import_apply_cached_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Cached File in Memory", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39195,7 +53254,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -39203,6 +53263,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39213,76 +53274,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 6, + "h": 7, + "w": 12, "x": 12, - "y": 68 + "y": 42 }, - "hiddenSeries": false, - "id": 23763573119, + "height": null, + "hideTimeOverride": false, + "id": 383, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_import_applier_event{instance=~\"$instance\", type!=\"begin_req\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 3, - "legendFormat": "{{instance}} :: {{type}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Engine Requests Unfinished", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39290,6 +53387,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39298,6 +53396,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39308,149 +53407,216 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, - "cardRound": 2 + "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 68 + "h": 7, + "w": 12, + "x": 0, + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 23763573231, + "id": 384, + "interval": null, "legend": { "show": false }, - "pluginVersion": "7.5.11", + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_import_engine_request_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"apply\"}[$__rate_interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Apply Time", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": null, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", + "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "bytes" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 0, - "y": 71 + "h": 7, + "w": 12, + "x": 12, + "y": 49 }, - "hiddenSeries": false, - "id": 23763573449, + "height": null, + "hideTimeOverride": false, + "id": 385, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_server_mem_trace_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"raftstore-.*\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Raft Store Memory Usage", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39458,6 +53624,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -39466,6 +53633,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39476,59 +53644,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Point In Time Restore", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 38 + "y": 0 }, - "id": 8389, + "height": null, + "hideTimeOverride": false, + "id": 386, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of resolved ts worker", + "description": "The CPU utilization of resolved ts worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 0, - "y": 39 + "y": 0 }, - "hiddenSeries": false, - "id": 8385, + "height": null, + "hideTimeOverride": false, + "id": 387, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39536,44 +53743,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"resolved_ts.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Resolved TS Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39581,14 +53799,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39599,44 +53819,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of advance ts worker", + "description": "The CPU utilization of advance ts worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 8, - "y": 39 + "y": 0 }, - "hiddenSeries": false, - "id": 9162, + "height": null, + "hideTimeOverride": false, + "id": 388, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39644,44 +53876,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"advance_ts.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-tso", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Advance ts Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39689,14 +53932,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39707,44 +53952,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": " \tThe CPU utilization of scan lock worker", + "description": "The CPU utilization of scan lock worker", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 8, "x": 16, - "y": 39 + "y": 0 }, - "hiddenSeries": false, - "id": 9164, + "height": null, + "hideTimeOverride": false, + "id": 389, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39752,44 +54009,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"inc_scan.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}-scan", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Scan lock Worker CPU", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39797,14 +54065,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39815,43 +54085,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 7 }, - "hiddenSeries": false, - "id": 8387, + "height": null, + "hideTimeOverride": false, + "id": 390, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39859,43 +54142,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_resolved_ts_min_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max gap of resolved-ts", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -39903,6 +54198,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, @@ -39911,6 +54207,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -39921,43 +54218,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The gap between now() and the minimal (non-zero) safe ts for followers", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 7 }, - "hiddenSeries": false, - "id": 23763573805, + "height": null, + "hideTimeOverride": false, + "id": 391, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -39965,45 +54275,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max gap of follower safe-ts", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40011,6 +54331,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, @@ -40019,6 +54340,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40029,44 +54351,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The region that has minimal resolved ts", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 55 + "y": 14 }, - "hiddenSeries": false, - "id": 23763572078, + "height": null, + "hideTimeOverride": false, + "id": 392, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40074,51 +54408,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Min Resolved TS Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40126,15 +54464,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40144,44 +54484,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The region id of the follower that has minimal safe ts", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 55 + "y": 14 }, - "hiddenSeries": false, - "id": 23763573804, + "height": null, + "hideTimeOverride": false, + "id": 393, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40189,51 +54541,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_follower_safe_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Min Safe TS Follower Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40241,15 +54597,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40259,10 +54617,11 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { "cardPadding": null, "cardRound": null @@ -40272,135 +54631,94 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, + "max": null, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The time consumed when handle a check leader request", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 9168, + "id": 394, + "interval": null, "legend": { "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_check_leader_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Check leader duration", "tooltip": { - "show": true, - "showHistogram": false + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 0, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The backoff duration before starting initial scan", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 70 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 23763573950, - "legend": { - "show": false - }, - "links": [], - "reverseYBuckets": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", - "format": "heatmap", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{le}}", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Initial scan backoff duration", - "tooltip": { + "mode": "time", + "name": null, "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { - "decimals": 0, + "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -40409,37 +54727,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The gap between resolved ts of leaders and current time", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 21 }, - "hiddenSeries": false, - "id": 23763572077, + "height": null, + "hideTimeOverride": false, + "id": 395, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40447,45 +54778,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_gap_millis{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 60 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Max gap of resolved-ts in region leaders", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40493,6 +54834,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, @@ -40501,6 +54843,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40511,43 +54854,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Bucketed histogram of region count in a check leader request", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 71 + "y": 28 }, - "hiddenSeries": false, - "id": 12308, + "height": null, + "hideTimeOverride": false, + "id": 396, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40555,45 +54911,55 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% CheckLeader request region count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40601,7 +54967,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -40609,6 +54976,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40619,44 +54987,160 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The backoff duration before starting initial scan", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 397, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Initial scan backoff duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The region that its leader has minimal resolved ts.", + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 71 + "x": 0, + "y": 35 }, - "hiddenSeries": false, - "id": 23763572079, + "height": null, + "hideTimeOverride": false, + "id": 398, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40664,51 +55148,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_resolved_ts_min_leader_resolved_ts_region{tidb_cluster=~\"$tidb_cluster.*\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "{{instance}}", + "metric": "", + "query": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Min Leader Resolved TS Region", + "title": "Lock heap size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40716,15 +55204,17 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40734,44 +55224,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "description": "The region that its leader has minimal resolved ts.", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 79 + "x": 12, + "y": 35 }, - "hiddenSeries": false, - "id": 8379, + "height": null, + "hideTimeOverride": false, + "id": 399, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40779,43 +55281,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_lock_heap_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Lock heap size", + "title": "Min Leader Resolved TS Region", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40823,14 +55337,16 @@ }, "yaxes": [ { - "format": "bytes", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40841,44 +55357,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The status of resolved-ts observe regions", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 79 + "x": 0, + "y": 42 }, - "hiddenSeries": false, - "id": 8377, + "height": null, + "hideTimeOverride": false, + "id": 400, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -40886,49 +55414,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(tikv_resolved_ts_region_resolve_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Observe region status", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -40936,14 +55470,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -40954,45 +55490,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The count of fail to advance resolved-ts", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 87 + "x": 12, + "y": 42 }, - "hiddenSeries": false, - "id": 9166, + "height": null, + "hideTimeOverride": false, + "id": 401, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41000,58 +55547,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "lines": false - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(delta(tikv_resolved_ts_fail_advance_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-{{reason}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(delta(tikv_raftstore_check_stale_peer{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}-stale-peer", - "refId": "B", - "step": 10 + "metric": "", + "query": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Fail advance ts count", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41059,14 +55618,16 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41077,43 +55638,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Bucketed histogram of the check leader request size", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, - "y": 87 + "x": 0, + "y": 49 }, - "hiddenSeries": false, - "id": 8383, + "height": null, + "hideTimeOverride": false, + "id": 402, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41121,53 +55695,70 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "metric": "tikv_snapshot_size_bucket", - "refId": "A", - "step": 40 + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_check_leader_request_item_count_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", - "hide": true, + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-check-num", - "refId": "B" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "99% CheckLeader request size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41175,6 +55766,7 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, @@ -41183,6 +55775,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41193,44 +55786,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "Total bytes of pending commands in the channel", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 0, - "y": 95 + "x": 12, + "y": 49 }, - "hiddenSeries": false, - "id": 8381, + "height": null, + "hideTimeOverride": false, + "id": 403, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41238,43 +55843,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(tikv_resolved_ts_channel_penging_cmd_bytes_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", - "step": 10 + "metric": "", + "query": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Pending command size", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41282,14 +55899,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41300,53 +55919,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], - "title": "Resolved-TS", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Resolved TS", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 39 + "y": 0 }, - "id": 2763, + "height": null, + "hideTimeOverride": false, + "id": 404, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 0 }, - "hiddenSeries": false, - "id": 23763573729, + "height": null, + "hideTimeOverride": false, + "id": 405, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41354,42 +56018,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_allocator_stats{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Allocator Stats", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41397,7 +56074,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -41405,6 +56083,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41415,85 +56094,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "binBps" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 0 }, - "hiddenSeries": false, - "id": 23763573730, + "height": null, + "hideTimeOverride": false, + "id": 406, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name) - sum(rate(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"dealloc\"}[$__rate_interval])) by (thread_name) != 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{thread_name}}", - "refId": "A" + "metric": "", + "query": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Send Allocated(+) / Release Received(-) Bytes Rate", "tooltip": { + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41501,6 +56207,7 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, "logBase": 1, @@ -41509,6 +56216,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41519,83 +56227,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 51 + "y": 7 }, - "hiddenSeries": false, - "id": 2696, + "height": null, + "hideTimeOverride": false, + "id": 407, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, - "show": false, - "sort": "current", + "show": true, + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{thread_name}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Newly Allocated Bytes by Thread", "tooltip": { + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41603,7 +56340,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -41611,6 +56349,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41621,83 +56360,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 51 + "y": 7 }, - "hiddenSeries": false, - "id": 23763573731, + "height": null, + "hideTimeOverride": false, + "id": 408, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, - "show": false, - "sort": "current", + "show": true, + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(delta(tikv_allocator_thread_allocation{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"alloc\"}[$__rate_interval])) by (thread_name)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{thread_name}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Recently Released Bytes by Thread", "tooltip": { + "msResolution": true, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41705,7 +56473,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -41713,6 +56482,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41723,60 +56493,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Memory", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 40 + "y": 0 }, - "id": 3922, + "height": null, + "hideTimeOverride": false, + "id": 409, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 65 + "y": 0 }, - "hiddenSeries": false, - "id": 3924, + "height": null, + "hideTimeOverride": false, + "id": 410, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -41784,71 +56592,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/backup-auto-throttle/", - "fill": 5, - "fillGradient": 2, - "linewidth": 0 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"b.*k.*w.*k.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "intervalFactor": 2, + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "backup-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_io\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", - "hide": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "backup-io-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "B", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "tikv_backup_softlimit{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "backup-auto-throttle-{{instance}}", - "refId": "C" + "metric": "", + "query": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup CPU Utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -41856,6 +56678,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -41864,6 +56687,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -41874,144 +56698,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": "${DS_TEST-CLUSTER}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 12, - "y": 65 - }, - "id": 3926, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(tikv_backup_thread_pool_size{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Backup Thread Count", - "transform": "timeseries_aggregations", - "type": "table" - }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 + "h": 7, + "w": 8, + "x": 8, + "y": 0 }, - "hiddenSeries": false, - "id": 23763571993, + "height": null, + "hideTimeOverride": false, + "id": 411, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_cloud_request_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (cloud, req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{cloud}}-{{req}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "cloud request", + "title": "Backup Thread Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42019,7 +56811,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -42027,6 +56820,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -42037,81 +56831,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 65 + "y": 0 }, - "hiddenSeries": false, - "id": 5264, + "height": null, + "hideTimeOverride": false, + "id": 412, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_backup_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}-{{error}}", - "refId": "D" + "metric": "", + "query": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42119,184 +56944,232 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 72 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3927, + "id": 413, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"write\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Write CF SST Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 72 + "y": 7 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5266, + "id": 414, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_size_bytes_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", cf=\"default\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Default CF SST Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -42305,34 +57178,49 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 72 + "y": 7 }, - "hiddenSeries": false, - "id": 3929, + "height": null, + "hideTimeOverride": false, + "id": 415, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -42341,60 +57229,70 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "total", "metric": "", - "refId": "A", - "step": 4 + "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "rate(tikv_backup_range_size_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", "format": "time_series", "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}} {{cf}}", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "refId": "B", - "step": 4 + "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup SST Generation Throughput", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42402,7 +57300,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -42410,7 +57309,8 @@ "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -42420,247 +57320,316 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, - "w": 4, + "w": 6, "x": 0, - "y": 79 + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5597, + "id": 416, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"snapshot\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Scan SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 4, - "y": 79 + "x": 6, + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3931, + "id": 417, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"scan\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Scan SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 10, - "y": 79 + "x": 12, + "y": 14 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 6905, + "id": 418, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"save.*\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Backup Save SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -42669,34 +57638,49 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 79 + "w": 6, + "x": 18, + "y": 14 }, - "hiddenSeries": false, - "id": 3928, + "height": null, + "hideTimeOverride": false, + "id": 419, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "max", "sortDesc": true, "total": false, @@ -42705,60 +57689,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - 99%", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-99.9%", "metric": "", - "refId": "A", - "step": 4 + "query": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_backup_range_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - 95%", - "refId": "B", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_backup_range_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_backup_range_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}} - avg", - "refId": "C", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-avg", + "metric": "", + "query": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Backup SST Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42766,6 +57775,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -42774,6 +57784,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -42784,85 +57795,108 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 86 + "y": 21 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 3930, + "id": 420, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "External Storage Create Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -42871,37 +57905,50 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 86 + "y": 21 }, - "id": 4936, + "height": null, + "hideTimeOverride": false, + "id": 421, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -42909,47 +57956,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}-100%", - "refId": "E" + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_external_storage_create_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}-99%", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "External Storage Create Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -42966,53 +58089,67 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "grid": {}, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 93 + "y": 28 }, - "id": 5267, + "height": null, + "hideTimeOverride": false, + "id": 422, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -43020,47 +58157,123 @@ "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-100%", - "refId": "E" + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.99, sum(rate(tikv_coprocessor_request_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", req=~\"analyze.*|checksum.*\"}[1m])) by (le,req))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{req}}-99%", - "refId": "A", - "step": 4 + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Checksum Request Duration", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43077,94 +58290,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 93 + "y": 28 }, - "id": 5269, + "height": null, + "hideTimeOverride": false, + "id": 423, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, - "points": true, + "pointradius": 5, + "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_io_time_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - {{device}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{device}}", + "metric": "", + "query": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "IO Utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43172,6 +58414,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -43180,6 +58423,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43190,42 +58434,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, + "description": null, "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 100 + "y": 35 }, - "id": 5925, + "height": null, + "hideTimeOverride": false, + "id": 424, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -43233,67 +58491,85 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/import-count.*/", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "import-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance, tid) > 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", "format": "time_series", "hide": true, - "intervalFactor": 2, - "legendFormat": "backup-{{instance}}-{{tid}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "C", - "step": 4 + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "import-{{instance}}-{{tid}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "import-count-{{instance}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "D", - "step": 4 + "metric": "", + "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import CPU Utilization", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43301,6 +58577,7 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, @@ -43309,6 +58586,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43319,145 +58597,245 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fontSize": "100%", "gridPos": { "h": 7, - "w": 4, - "x": 12, - "y": 100 + "w": 8, + "x": 8, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 425, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, - "id": 5926, + "lines": true, + "linewidth": 1, "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(rate(tikv_thread_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"sst_.*\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Import Thread Count", - "transform": "timeseries_aggregations", - "type": "table" + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 100 + "y": 35 }, - "id": 5932, + "height": null, + "hideTimeOverride": false, + "id": 426, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_error_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{error}} {{instance}}", - "refId": "D" + "legendFormat": "{{type}}-{{error}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43465,100 +58843,200 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 107 + "y": 42 }, - "id": 5931, + "height": null, + "hideTimeOverride": false, + "id": 427, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, avg(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{request}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{request}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{request}}-99%", - "refId": "A" + "legendFormat": "avg-{{request}}", + "metric": "", + "query": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.5, sum(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, request, instance))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", "format": "time_series", "hide": true, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{request}}-50%", - "refId": "B" + "legendFormat": "count-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import RPC Duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43566,6 +59044,7 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, @@ -43574,6 +59053,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43584,39 +59064,56 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 107 + "y": 42 }, - "id": 6267, + "height": null, + "hideTimeOverride": false, + "id": 428, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -43624,51 +59121,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (instance, request)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(rate(tikv_import_rpc_duration_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request!=\"switch_mode\"}[30s])) by (request)", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "total - {{request}}", - "metric": "tikv_grpc_msg_duration_seconds_bucket", - "refId": "B", - "step": 10 + "legendFormat": "{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import RPC Ops", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -43676,6 +59177,7 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, @@ -43684,6 +59186,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -43694,571 +59197,732 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5930, + "id": 429, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"download|write\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Write/Download RPC Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5929, + "id": 430, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"queue\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Wait Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 6906, + "id": 431, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"read\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Read SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 114 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5928, + "id": 432, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_download_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"rewrite\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Rewrite SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 121 + "y": 56 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5939, + "id": 433, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_rpc_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", request=~\"ingest\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Ingest RPC Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 121 + "y": 56 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5938, + "id": 434, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_ingest_duration_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Ingest SST Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 121 + "y": 56 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 5937, + "id": 435, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "expr": "max(rate(tikv_import_ingest_byte{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, - "intervalFactor": 2, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Import Ingest SST Bytes", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -44267,79 +59931,121 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 121 + "y": 56 }, - "id": 5927, + "height": null, + "hideTimeOverride": false, + "id": 436, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_import_download_bytes_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "total", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import Download SST Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44347,7 +60053,8 @@ }, "yaxes": [ { - "format": "Bps", + "decimals": null, + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -44355,7 +60062,8 @@ "show": true }, { - "format": "Bps", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -44365,77 +60073,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 128 + "y": 63 }, - "id": 12309, + "height": null, + "hideTimeOverride": false, + "id": 437, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_local_write_keys{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{instance}}", - "refId": "D" + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Import Local Write keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44443,95 +60186,280 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "height": null, + "hideTimeOverride": false, + "id": 438, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Import Local Write bytes", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 128 + "x": 0, + "y": 70 }, - "id": 12310, + "height": null, + "hideTimeOverride": false, + "id": 439, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "delta(tikv_import_local_write_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}} {{instance}}", - "refId": "D" + "legendFormat": "sum", + "metric": "", + "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Import Local Write bytes", + "title": "TTL Expired", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44539,104 +60467,132 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The accumulated TTL expired KV count during backup", + "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 135 + "x": 12, + "y": 70 }, - "hiddenSeries": false, - "id": 23763572861, + "height": null, + "hideTimeOverride": false, + "id": 440, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": true, - "interval": "", - "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(tikv_backup_raw_expired_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "sum", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{cloud}}-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "TTL Expired", + "title": "cloud request", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44644,6 +60600,7 @@ }, "yaxes": [ { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44652,6 +60609,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44662,84 +60620,154 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Backup & Import", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 41 + "y": 0 }, - "id": 4466, + "height": null, + "hideTimeOverride": false, + "id": 441, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Total number of encryption data keys in use", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 58 + "y": 0 }, - "id": 4464, + "height": null, + "hideTimeOverride": false, + "id": 442, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_data_key_storage_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encryption data keys", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44747,8 +60775,8 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44756,7 +60784,7 @@ "show": true }, { - "decimals": 0, + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44767,69 +60795,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Number of files being encrypted", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 58 + "y": 0 }, - "id": 4554, + "height": null, + "hideTimeOverride": false, + "id": 443, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_file_num{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encrypted files", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44837,7 +60908,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44845,6 +60917,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44855,69 +60928,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Flag to indicate if encryption is initialized", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 66 + "y": 7 }, - "id": 4555, + "height": null, + "hideTimeOverride": false, + "id": 444, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_is_initialized{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encryption initialized", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -44925,8 +61041,8 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44934,7 +61050,7 @@ "show": true }, { - "decimals": 0, + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -44945,69 +61061,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Total size of encryption meta files", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 66 + "y": 7 }, - "id": 4556, + "height": null, + "hideTimeOverride": false, + "id": 445, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tikv_encryption_meta_file_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "{{name}}-{{instance}}", - "refId": "A" + "metric": "", + "query": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encryption meta files size", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45015,7 +61174,8 @@ }, "yaxes": [ { - "format": "decbytes", + "decimals": null, + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -45023,6 +61183,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45033,76 +61194,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 0, - "y": 74 + "y": 14 }, - "id": 4557, + "height": null, + "hideTimeOverride": false, + "id": 446, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"encrypt_data_nanos\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "encrypt-{{req}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_coprocessor_rocksdb_perf{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\" ,metric=\"decrypt_data_nanos\"}[1m])) by (req)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", + "hide": false, + "instant": false, + "interval": "", "intervalFactor": 1, "legendFormat": "decrypt-{{req}}", - "refId": "B" + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Encrypt/decrypt data nanos", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45110,7 +61322,8 @@ }, "yaxes": [ { - "format": "short", + "decimals": null, + "format": "none", "label": null, "logBase": 1, "max": null, @@ -45118,6 +61331,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45128,87 +61342,180 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Writing or reading file duration (second)", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 8, + "h": 7, "w": 12, "x": 12, - "y": 74 + "y": 14 }, - "id": 4559, + "height": null, + "hideTimeOverride": false, + "id": 447, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "max-{{type}}-{{operation}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "histogram_quantile(0.95, sum(rate(tikv_encryption_write_read_file_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "95%-{{type}}-{{operation}}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(rate(tikv_encryption_write_read_file_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation) / sum(rate(tikv_encryption_write_read_file_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, operation)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", + "hide": true, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "avg-{{type}}-{{operation}}", - "refId": "C" + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Read/write encryption meta duration", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45225,6 +61532,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45235,139 +61543,197 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Encryption", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 42 + "y": 0 }, - "id": 13016, + "height": null, + "hideTimeOverride": false, + "id": 448, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, + "custom": {}, + "decimals": null, "mappings": [ { - "from": "", - "id": 1, - "text": "Disabled", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Enabled", - "to": "", - "type": 1, - "value": "1" + "options": { + "0": { + "color": "red", + "index": null, + "text": "Disabled" + }, + "1": { + "color": "green", + "index": null, + "text": "Enabled" + } + }, + "type": "value" } ], - "noValue": "Disabled", + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-red", - "value": 0 - }, - { - "color": "dark-green", - "value": 1 - } - ] - } + "steps": "" + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 5, + "h": 7, + "w": 6, "x": 0, - "y": 55 + "y": 0 }, - "id": 14361, + "height": null, + "hideTimeOverride": false, + "id": 449, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "last" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_enabled{instance=~\"$instance\"}", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Endpoint Status", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The average flush size of last 30mins.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": null, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "index": null, + "text": "Running" + }, + "1": { + "color": "yellow", + "index": null, + "text": "Paused" + }, + "2": { + "color": "red", + "index": null, + "text": "Error" + } + }, + "type": "value" + } + ], + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 5, - "y": 55 + "h": 7, + "w": 6, + "x": 6, + "y": 0 }, - "id": 14507, + "height": null, + "hideTimeOverride": false, + "id": 450, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45380,55 +61746,69 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]) / on(instance) increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Average Flush Size ", + "title": "Task Status", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The current total flushed file number of this run.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 13, - "y": 55 + "h": 7, + "w": 6, + "x": 12, + "y": 0 }, - "id": 14363, + "height": null, + "hideTimeOverride": false, + "id": 451, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45441,55 +61821,69 @@ "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "name" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "round(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m]))", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "tidb_log_backup_advancer_owner > 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "tidb_log_backup_advancer_owner > 0", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Flushed Files (Last 30m) Per Host", + "title": "Advancer Owner", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The average flush size of last 30mins.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 2, - "w": 3, - "x": 21, - "y": 55 + "h": 7, + "w": 6, + "x": 18, + "y": 0 }, - "id": 14508, + "height": null, + "hideTimeOverride": false, + "id": 452, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45502,55 +61896,69 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_duration_sec_count{stage=~\"save_files\",instance=~\"$instance\"}[30m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ instance }}", - "refId": "B" + "metric": "", + "query": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Flush Times (Last 30m)", + "title": "Average Flush Size", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The current total flushed file number of this run.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 57 + "h": 7, + "w": 6, + "x": 0, + "y": 7 }, - "id": 14362, + "height": null, + "hideTimeOverride": false, + "id": 453, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45563,213 +61971,219 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_file_size_sum{instance=~\"$instance\"}[30m]))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total Flushed Size (Last 30m)", + "title": "Flushed Files (Last 30m) Per Host", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "Running", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "text": "Paused", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 3, - "text": "Error", - "to": "", - "type": 1, - "value": "2" - } - ], - "noValue": "Disabled", + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "rgba(0, 0, 0, 0.2)", - "value": null - }, - { - "color": "dark-green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1 - }, - { - "color": "dark-red", - "value": 2 - } - ] - } + "steps": "" + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 2, - "x": 0, - "y": 59 + "h": 7, + "w": 6, + "x": 6, + "y": 7 }, - "id": 14907, + "height": null, + "hideTimeOverride": false, + "id": 454, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "last" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "min(tikv_log_backup_task_status{instance=~\"$instance\"})", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Task Status", + "title": "Flush Times (Last 30m)", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 1, + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "dark-blue", - "value": null - } - ] + "steps": "" }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 3, - "x": 2, - "y": 59 + "h": 7, + "w": 6, + "x": 12, + "y": 7 }, - "id": 15361, + "height": null, + "hideTimeOverride": false, + "id": 455, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "none", - "justifyMode": "center", + "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "last" + "lastNotNull" ], "fields": "", "values": false }, - "text": {}, - "textMode": "name" + "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "tidb_log_backup_advancer_owner > 0", - "instant": true, + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Advancer Owner", + "title": "Total Flushed Size (Last 30m)", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot. \n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": "" }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, - "w": 3, - "x": 21, - "y": 60 + "h": 7, + "w": 6, + "x": 18, + "y": 7 }, - "id": 14911, + "height": null, + "hideTimeOverride": false, + "id": 456, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -45782,62 +62196,82 @@ "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.11", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "round(sum(increase(tikv_log_backup_flush_file_size_count{instance=~\"$instance\"}[30m])))", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", - "legendFormat": "{{ instance }}", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Flush Files (Last 30m)", + "transformations": [], + "transparent": false, "type": "stat" }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", "editable": true, "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, - "fill": 0, - "fillGradient": 0, - "grid": {}, "gridPos": { - "h": 10, - "w": 6, + "h": 7, + "w": 12, "x": 0, - "y": 63 + "y": 14 }, - "hiddenSeries": false, - "id": 13262, + "height": null, + "hideTimeOverride": false, + "id": 457, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, "hideEmpty": true, + "hideZero": true, "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -45845,47 +62279,55 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_thread_cpu_seconds_total{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}[2m])) by (instance)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "tikv_thread_cpu_seconds_total", - "refId": "A", - "step": 4 + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "CPU Usage", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45893,14 +62335,16 @@ }, "yaxes": [ { + "decimals": null, "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -45911,81 +62355,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 63 + "h": 7, + "w": 12, + "x": 12, + "y": 14 }, - "hiddenSeries": false, - "id": 12843, + "height": null, + "hideTimeOverride": false, + "id": 458, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_handle_kv_batch_sum{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Handle Event Rate", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -45993,14 +62468,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46011,79 +62488,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The data rate of initial scanning emitting events.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 63 + "h": 7, + "w": 12, + "x": 0, + "y": 21 }, - "hiddenSeries": false, - "id": 14135, + "height": null, + "hideTimeOverride": false, + "id": 459, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, - "avg": true, - "current": false, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tikv_log_backup_incremental_scan_bytes_sum{instance=~\"$instance\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Initial Scan Generate Event Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46091,6 +62601,7 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, "logBase": 1, @@ -46099,140 +62610,123 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 600000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "name": "Checkpoint Lag Too Huge", - "noDataState": "no_data", - "notifications": [] - }, "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "ms" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 63 + "h": 7, + "w": 12, + "x": 12, + "y": 21 }, - "hiddenSeries": false, - "id": 14774, + "height": null, + "hideTimeOverride": false, + "id": 460, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "time() * 1000 - max(tidb_log_backup_last_checkpoint / 262144 > 0) by (task)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", - "hide": true, - "interval": "", - "legendFormat": "Current Time", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 600000, - "visible": true + "metric": "", + "query": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Abnormal Checkpoint TS Lag", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46240,14 +62734,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ms", "label": null, "logBase": 1, - "max": "3000000", - "min": "0", + "max": null, + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46258,81 +62754,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The estimated memory usage by the streaming backup module.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, + "h": 7, + "w": 12, "x": 0, - "y": 73 + "y": 28 }, - "hiddenSeries": false, - "id": 13100, + "height": null, + "hideTimeOverride": false, + "id": 461, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_heap_memory{instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Memory Of Events", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46340,14 +62867,16 @@ }, "yaxes": [ { + "decimals": null, "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46358,91 +62887,127 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 73 + "h": 7, + "w": 12, + "x": 12, + "y": 28 }, - "hiddenSeries": false, - "id": 14630, + "height": null, + "hideTimeOverride": false, + "id": 462, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, "current": true, - "max": false, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "tikv_log_backup_observed_region{instance=~\"$instance\"}", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "sum(tikv_log_backup_observed_region{instance=~\"$instance\"})", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "total", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{instance}}-total", + "metric": "", + "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Observed Region Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46450,6 +63015,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -46458,7 +63024,8 @@ "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -46468,91 +63035,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The errors met when backing up.\n**They are retryable, don't worry.**", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 73 + "h": 7, + "w": 8, + "x": 0, + "y": 35 }, - "hiddenSeries": false, - "id": 13101, + "height": null, + "hideTimeOverride": false, + "id": 463, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_errors{instance=~\"$instance\"}[$__interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "tikv_log_backup_errors{instance=~\"$instance\"}", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46560,14 +63148,16 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46578,90 +63168,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", + "description": "The errors met when backing up.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 73 + "h": 7, + "w": 8, + "x": 8, + "y": 35 }, - "hiddenSeries": false, - "id": 14910, + "height": null, + "hideTimeOverride": false, + "id": 464, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Current Time", - "dashes": true, - "fill": 0 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "max(tidb_log_backup_last_checkpoint{instance=~\"$instance\"} / 262144 > 0) by (task)", - "instant": false, - "interval": "", - "legendFormat": "{{ task }}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "time() * 1000", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "Current Time", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{type}}-{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Checkpoint TS of Tasks", + "title": "Fatal Errors", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46669,7 +63281,8 @@ }, "yaxes": [ { - "format": "dateTimeAsIsoNoDateIfToday", + "decimals": null, + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -46677,6 +63290,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46687,100 +63301,138 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 78 + "h": 7, + "w": 8, + "x": 16, + "y": 35 }, - "hiddenSeries": false, - "id": 14908, + "height": null, + "hideTimeOverride": false, + "id": 465, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, "nullPointMode": "null", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "Current Time", + "bars": false, + "dashes": true, + "fill": 0, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tikv_log_backup_fatal_errors{instance=~\"$instance\"}[$__interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", "format": "time_series", "hide": false, "instant": false, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{type}}@{{instance}}", - "refId": "A" + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{task}}", + "metric": "", + "query": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", + "refId": "", + "step": 10, + "target": "" }, { - "exemplar": true, - "expr": "", - "hide": true, - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0, - "yaxis": "left" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "time() * 1000", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Current Time", + "metric": "", + "query": "time() * 1000", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Fatal Errors", + "title": "Checkpoint TS of Tasks", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -46788,14 +63440,16 @@ }, "yaxes": [ { - "format": "none", + "decimals": null, + "format": "dateTimeAsIsoNoDateIfToday", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -46806,668 +63460,836 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateBlues", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The duration of flushing a batch of file.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 14078, + "id": 466, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_flush_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_files\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Flush Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateReds", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of scanning the initial data from local DB and transform them into apply events. \n", + "description": "The duration of scanning the initial data from local DB and transform them into apply events.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 14136, + "id": 467, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Initial scanning duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of converting a raft request into a apply event. \n*This duration is for consuming a batch of events.*", + "description": "The duration of converting a raft request into a apply event.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13934, + "id": 468, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"to_stream_event\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Convert Raft Event duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateGreens", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of waiting the mutex of the controller. \n*This duration is for consuming a batch of events.*", + "description": "The duration of waiting the mutex of the controller.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 83 + "y": 42 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 12840, + "id": 469, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"get_router_lock\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Wait for Lock Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", - "colorScheme": "interpolateCividis", + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": "The number of KV-modify of each raft command observed.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 90 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15059, + "id": 470, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_handle_kv_batch_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Command Batch Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of saving an event into temporary file. \n*This duration is for consuming a batch of events.*", + "description": "The total cost of saving an event into temporary file.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 90 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 12841, + "id": 471, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_event_handle_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=~\"save_to_temp_file\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Save to Temp File Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task. \n*This duration is for consuming a batch of events, for one region or one table.*", + "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 90 + "y": 49 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 13552, + "id": 472, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"write_to_tempfile\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Write to Temp File Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of collecting metadata and call the UNIX system call *write* for each event. \n*This duration is for consuming a batch of events, for one region or one table.*", + "description": "The duration of collecting metadata and call the UNIX system call *write* for each event.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 90 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 13551, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "y": 49 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 473, + "interval": null, + "legend": { + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_on_event_duration_seconds_bucket{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", stage=\"syscall_write\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "System Write Call Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, @@ -47476,71 +64298,106 @@ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal message type count.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 97 + "y": 56 }, - "hiddenSeries": false, - "id": 14914, + "height": null, + "hideTimeOverride": false, + "id": 474, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_interal_actor_acting_duration_sec_count{instance=~\"$instance\"}[$__rate_interval])) by (message)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{message}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Internal Message Type", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47548,14 +64405,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 2, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47566,83 +64425,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 97 + "h": 7, + "w": 8, + "x": 8, + "y": 56 }, - "hiddenSeries": false, - "id": 14912, + "height": null, + "hideTimeOverride": false, + "id": 475, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{message}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Internal Message Handling Duration (P99)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47650,95 +64538,132 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 97 + "h": 7, + "w": 8, + "x": 16, + "y": 56 }, - "hiddenSeries": false, - "id": 14913, + "height": null, + "hideTimeOverride": false, + "id": 476, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tikv_log_backup_interal_actor_acting_duration_sec_bucket{instance=~\"$instance\"}[10m]))) by (message)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{message}}", + "metric": "", + "query": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Internal Message Handling Duration (P90)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47746,14 +64671,16 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47764,78 +64691,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, + "h": 7, + "w": 12, "x": 0, - "y": 103 + "y": 63 }, - "hiddenSeries": false, - "id": 14271, + "height": null, + "hideTimeOverride": false, + "id": 477, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op=~\"read_bytes\"}[$__rate_interval])) BY (op, cf)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ cf }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{cf}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Throughput ", + "title": "Initial Scan RocksDB Throughput", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47843,14 +64804,16 @@ }, "yaxes": [ { + "decimals": null, "format": "binBps", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47861,78 +64824,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "Misc statistics of RocksDB during initial scanning.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 103 + "h": 7, + "w": 12, + "x": 12, + "y": 63 }, - "hiddenSeries": false, - "id": 14270, + "height": null, + "hideTimeOverride": false, + "id": 478, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_initial_scan_operations{instance=~\"$instance\", op!~\"read_bytes\"}[$__rate_interval])) BY (op, cf) > 0", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ cf }}/{{ op }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{cf}}-{{op}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Initial Scan RocksDB Operation ", + "title": "Initial Scan RocksDB Operation", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -47940,14 +64937,16 @@ }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -47958,80 +64957,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "leader-changed": "blue", - "region-changed": "purple" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The reason of triggering initial scanning.", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 103 + "h": 7, + "w": 12, + "x": 0, + "y": 70 }, - "hiddenSeries": false, - "id": 14915, + "height": null, + "hideTimeOverride": false, + "id": 479, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": false + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tikv_log_backup_initial_scan_reason{instance=~\"$instance\"}[$__rate_interval])) by (reason)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ message }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{reason}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Initial Scanning Trigger Reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48039,6 +65070,7 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, @@ -48047,6 +65079,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -48057,80 +65090,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "del": "dark-red", - "put": "green" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "", + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 103 + "h": 7, + "w": 12, + "x": 12, + "y": 70 }, - "hiddenSeries": false, - "id": 15176, + "height": null, + "hideTimeOverride": false, + "id": 480, + "interval": null, + "isNew": true, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tikv_log_backup_metadata_key_operation{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ type }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Region Checkpoint Key Putting", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48138,6 +65203,7 @@ }, "yaxes": [ { + "decimals": null, "format": "cps", "label": null, "logBase": 1, @@ -48146,6 +65212,7 @@ "show": true }, { + "decimals": null, "format": "short", "label": null, "logBase": 1, @@ -48156,261 +65223,320 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 109 + "y": 77 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15544, + "id": 481, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_batch_size_bucket{type=\"checkpoint\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Request Checkpoint Batch Size", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "none", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { + "cacheTimeout": null, "cards": { - "cardPadding": 0, - "cardRound": 0 + "cardPadding": null, + "cardRound": null }, "color": { - "cardColor": "#FF9830", - "colorScale": "linear", + "cardColor": "#b4ff00", + "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, "max": null, - "min": 0, + "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 109 + "w": 12, + "x": 12, + "y": 77 }, "heatmap": {}, + "height": null, + "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 15716, + "id": 482, + "interval": null, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "show": false }, "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": null, + "repeat": null, + "repeatDirection": null, "reverseYBuckets": false, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_advancer_tick_duration_sec_bucket{tidb_cluster=\"$tidb_cluster\", step=~\"tick\"}[$__interval])) by (le)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{le}}", - "refId": "A" + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Tick Duration", "tooltip": { - "show": true, - "showHistogram": true + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" }, - "tooltipDecimals": 1, + "transformations": [], + "transparent": false, "type": "heatmap", "xAxis": { - "show": true + "mode": "time", + "name": null, + "show": true, + "values": [] }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": 1, "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true, - "splitFactor": null + "show": true }, "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null }, { - "aliasColors": { - "epoch-not-match": "purple", - "not-leader": "blue", - "watch_task": "orange" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The reason of advancer failed to be advanced.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 109 + "w": 12, + "x": 0, + "y": 84 }, - "hiddenSeries": false, - "id": 23763572666, + "height": null, + "hideTimeOverride": false, + "id": 483, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, - "stack": true, + "span": null, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request_failure{reason!=\"retryable-scan-region\"}[$__interval])) by (reason)", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ reason }}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{reason}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Region Checkpoint Failure Reason", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48418,15 +65544,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48436,101 +65564,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "fail": "red", - "success": "green", - "watch_task": "orange" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The result of getting region checkpoints.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 109 + "w": 12, + "x": 12, + "y": 84 }, - "hiddenSeries": false, - "id": 23763572665, + "height": null, + "hideTimeOverride": false, + "id": 484, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "fail", - "transform": "negative-Y", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(increase(tidb_log_backup_region_request[$__interval])) by (result)", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ result }}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{result}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Request Result", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48538,15 +65677,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "none", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48556,96 +65697,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 116 + "y": 91 }, - "hiddenSeries": false, - "id": 15359, + "height": null, + "hideTimeOverride": false, + "id": 485, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "consistency-check", - "yaxis": 1 - }, - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.99, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ step }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Tick Duration (P99)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48653,15 +65810,17 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48671,96 +65830,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The internal handling message duration.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "s" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 116 + "w": 12, + "x": 12, + "y": 91 }, - "hiddenSeries": false, - "id": 15360, + "height": null, + "hideTimeOverride": false, + "id": 486, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "alias": "consistency-check", - "yaxis": 1 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(histogram_quantile(0.9, rate(tidb_log_backup_advancer_tick_duration_sec_bucket[10m]))) by (step)", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 1, "legendFormat": "{{ step }}", - "queryType": "randomWalk", - "refId": "A" + "metric": "", + "query": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Tick Duration (P90)", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48768,15 +65943,17 @@ }, "yaxes": [ { + "decimals": null, "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48786,96 +65963,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, + "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The frequent of getting region level checkpoint.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 116 + "w": 12, + "x": 0, + "y": 98 }, - "hiddenSeries": false, - "id": 23763572733, + "height": null, + "hideTimeOverride": false, + "id": 487, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "alias": "consistency-check", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "rate(tidb_log_backup_advancer_tick_duration_sec_count{step=\"get-regions-in-range\"}[$__rate_interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{ step }} {{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{step}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Get Region Operation Count", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48883,15 +66076,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -48901,97 +66096,112 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } }, { - "aliasColors": { - "watch_task": "orange" - }, - "bars": true, - "dashLength": 10, - "dashes": false, + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The variant of checkpoint group.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "unit": "none" - }, - "overrides": [] + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 116 + "w": 12, + "x": 12, + "y": 98 }, - "hiddenSeries": false, - "id": 23763572734, + "height": null, + "hideTimeOverride": false, + "id": 488, + "interval": null, + "isNew": true, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": false, + "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "get-checkpoints-of-store", - "yaxis": 2 - }, - { - "alias": "get-checkpoints-in-range", - "yaxis": 2 - }, - { - "alias": "consistency-check", - "yaxis": 2 - } - ], - "spaceLength": 10, + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "increase(tidb_log_backup_advancer_tick_duration_sec_count{step=\"try-advance\"}[$__interval])", + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ step }} {{ instance }}", - "queryType": "randomWalk", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{step}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, "title": "Try Advance Trigger Time", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", "name": null, "show": true, @@ -48999,15 +66209,17 @@ }, "yaxes": [ { + "decimals": null, "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "s", + "decimals": null, + "format": "short", "label": null, "logBase": 1, "max": null, @@ -49017,50 +66229,98 @@ ], "yaxis": { "align": false, - "alignLevel": null + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Backup Log", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 52 + "y": 0 }, - "id": 24763573238, + "height": null, + "hideTimeOverride": false, + "id": 489, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might has a slow trend.", + "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might have a slow trend.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 53 + "y": 0 }, - "hiddenSeries": false, - "id": 24763574116, + "height": null, + "hideTimeOverride": false, + "id": 490, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49068,88 +66328,132 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], - "timeRegions": [], + "timeFrom": null, + "timeShift": null, "title": "Slow Trend", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { + "decimals": null, "format": "none", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 53 + "y": 0 }, - "hiddenSeries": false, - "id": 24763574117, + "height": null, + "hideTimeOverride": false, + "id": 491, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49157,88 +66461,132 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend_result{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], - "timeRegions": [], + "timeFrom": null, + "timeShift": null, "title": "QPS Changing Trend", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { + "decimals": null, "format": "none", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 60 + "y": 7 }, - "hiddenSeries": false, - "id": 24763574115, + "height": null, + "hideTimeOverride": false, + "id": 492, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49246,59 +66594,64 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend_l0{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [ - { - "value": 275000, - "colorMode": "critical", - "op": "gt", - "fill": false, - "line": true, - "visible": true, - "yaxis": "left" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], - "timeRegions": [], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "AVG Sampling Latency", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { - "format": "µs", + "decimals": null, + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -49306,41 +66659,67 @@ "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } }, { "aliasColors": {}, "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", "description": "The QPS of each store.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "fill": 1, - "fillGradient": 0, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 60 + "y": 7 }, - "hiddenSeries": false, - "id": 24763573970, + "height": null, + "hideTimeOverride": false, + "id": 493, + "interval": null, + "isNew": true, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, - "min": true, + "min": false, "rightSide": true, "show": true, - "sort": "current", + "sideWidth": null, + "sort": "max", "sortDesc": true, "total": false, "values": true @@ -49348,235 +66727,265 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "alertThreshold": true, + "dataLinks": [] }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, + "repeatDirection": null, "seriesOverrides": [], - "spaceLength": 10, + "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "editorMode": "code", - "expr": "tikv_raftstore_slow_trend_result_value{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, - "intervalFactor": 2, + "instant": false, + "interval": "", + "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, - "refId": "A" + "metric": "", + "query": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], - "timeRegions": [], + "timeFrom": null, + "timeShift": null, "title": "QPS of each store", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" }, + "transformations": [], + "transparent": false, "type": "graph", "xaxis": { "mode": "time", + "name": null, "show": true, "values": [] }, "yaxes": [ { + "decimals": null, "format": "ops", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "decimals": null, "format": "short", + "label": null, "logBase": 1, + "max": null, + "min": null, "show": true } ], "yaxis": { - "align": false + "align": false, + "alignLevel": 0 } } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Slow Trend Statistics", + "transformations": [], + "transparent": false, "type": "row" } ], "refresh": "1m", - "schemaVersion": 27, + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", "tags": [], "templating": { "list": [ { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 2, "includeAll": false, - "label": "K8s-cluster", + "label": "k8s_cluster", "multi": false, "name": "k8s_cluster", "options": [], - "query": { - "query": "label_values(tikv_engine_block_cache_size_bytes, k8s_cluster)", - "refId": "quota-k8s_cluster-Variable-Query" - }, + "query": "label_values(tikv_engine_block_cache_size_bytes, k8s_cluster)", "refresh": 2, - "regex": "", - "skipUrlSync": false, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 2, "includeAll": false, "label": "tidb_cluster", "multi": false, "name": "tidb_cluster", "options": [], - "query": { - "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\"}, tidb_cluster)", - "refId": "quota-tidb_cluster-Variable-Query" - }, + "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster =\"$k8s_cluster\"}, tidb_cluster)", "refresh": 2, - "regex": "", - "skipUrlSync": false, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "db", "multi": true, "name": "db", "options": [], - "query": { - "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", - "refId": "quota-db-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, + "query": "label_values(tikv_engine_block_cache_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", + "refresh": 2, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "label_values(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, type)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "command", "multi": true, "name": "command", "options": [], - "query": { - "query": "query_result(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"} != 0)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, + "query": "query_result(tikv_storage_command_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"} != 0)", + "refresh": 2, "regex": "/\\btype=\"([^\"]+)\"/", - "skipUrlSync": false, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": ".*", - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "instance", "multi": false, "name": "instance", "options": [], - "query": { - "query": "label_values(tikv_engine_size_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, instance)", - "refId": "quota-instance-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, + "query": "label_values(tikv_engine_size_bytes{k8s_cluster =\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, instance)", + "refresh": 2, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, - "current": {}, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, "datasource": "${DS_TEST-CLUSTER}", - "definition": "label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", - "description": null, - "error": null, "hide": 2, "includeAll": true, "label": "titan_db", "multi": true, "name": "titan_db", "options": [], - "query": { - "query": "label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", - "refId": "quota-titan_db-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, + "query": "label_values(tikv_engine_titandb_num_live_blob_file{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, db)", + "refresh": 2, + "regex": null, "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } @@ -49587,6 +66996,7 @@ "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -49614,5 +67024,5 @@ "timezone": "browser", "title": "Test-Cluster-TiKV-Details", "uid": "RDVQiEzZz", - "version": 1 + "version": 0 } diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 new file mode 100644 index 00000000000..abb8baa6770 --- /dev/null +++ b/metrics/grafana/tikv_details.json.sha256 @@ -0,0 +1 @@ +1b98912ed3e87960a2ce063f6063b4f4f6fe6bbba98518ceabd768036287763e ./metrics/grafana/tikv_details.json diff --git a/scripts/check-dashboards b/scripts/check-dashboards new file mode 100755 index 00000000000..fdb73c28168 --- /dev/null +++ b/scripts/check-dashboards @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -euo pipefail + +for sha256 in ./metrics/grafana/*.sha256; do + if ! sha256sum -c "$sha256"; then + dashboard=$(basename "$sha256" .sha256) + echo "Please avoid manually modifying $dashboard" + echo "Try ./scripts/gen-tikv-details-dashboard" + exit 1 + fi +done + +echo "Dashboards check passed." diff --git a/scripts/gen-tikv-details-dashboard b/scripts/gen-tikv-details-dashboard new file mode 100755 index 00000000000..2c91cf3dbb9 --- /dev/null +++ b/scripts/gen-tikv-details-dashboard @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -euo pipefail + +docker build -t tikv-dashboard-gen -f - . < /metrics/grafana/\$name.json.sha256 + done + " From 7b1ee1181bb15ccf8b04d1fbbe40c9e3c3df4602 Mon Sep 17 00:00:00 2001 From: wjHuang Date: Wed, 29 Nov 2023 16:53:48 +0800 Subject: [PATCH 1037/1149] tidb_query_expr: fix incorrect intdiv for decimal type (#16025) close tikv/tikv#16024 Signed-off-by: wjhuang2016 Co-authored-by: iosmanthus --- .../tidb_query_expr/src/impl_arithmetic.rs | 73 ++++++++++++++++--- components/tidb_query_expr/src/lib.rs | 9 ++- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/components/tidb_query_expr/src/impl_arithmetic.rs b/components/tidb_query_expr/src/impl_arithmetic.rs index 2f48fec4693..5960e69c2cd 100644 --- a/components/tidb_query_expr/src/impl_arithmetic.rs +++ b/components/tidb_query_expr/src/impl_arithmetic.rs @@ -4,7 +4,7 @@ use num_traits::identities::Zero; use tidb_query_codegen::rpn_fn; use tidb_query_common::Result; use tidb_query_datatype::{ - codec::{self, data_type::*, div_i64, div_i64_with_u64, div_u64_with_i64, Error}, + codec::{self, data_type::*, div_i64, div_i64_with_u64, div_u64_with_i64, mysql::Res, Error}, expr::EvalContext, }; @@ -452,21 +452,39 @@ fn int_divide_decimal(ctx: &mut EvalContext, lhs: &Decimal, rhs: &Decimal) -> Re let result = arithmetic_with_ctx::(ctx, lhs, rhs)?; if let Some(result) = result { let result = result.as_i64(); - Ok(if result.is_truncated() { - Some(result.unwrap()) - } else { - result - .into_result_with_overflow_err( - ctx, - Error::overflow("BIGINT", format!("({} / {})", lhs, rhs)), - ) - .map(Some)? - }) + match result { + Res::Ok(i) => Ok(Some(i)), + Res::Truncated(i) => Ok(Some(i)), + _ => Err(Error::overflow("BIGINT", format!("({} / {})", lhs, rhs)).into()), + } } else { Ok(None) } } +#[rpn_fn(capture = [ctx])] +#[inline] +fn int_divide_decimal_unsigned( + ctx: &mut EvalContext, + lhs: &Decimal, + rhs: &Decimal, +) -> Result> { + let result = arithmetic_with_ctx::(ctx, lhs, rhs)?; + if let Some(result) = result { + let unsigned_result = result.as_u64(); + if unsigned_result.is_overflow() { + let signed_result = result.as_i64(); + return if signed_result.unwrap() == 0 && signed_result.is_truncated() { + Ok(Some(0)) + } else { + Err(Error::overflow("BIGINT UNSIGNED", format!("({} / {})", lhs, rhs)).into()) + }; + } + return Ok(Some(unsigned_result.unwrap() as i64)); + } + Ok(None) +} + pub struct DecimalDivide; impl ArithmeticOpWithCtx for DecimalDivide { @@ -962,6 +980,7 @@ mod tests { // divide by zero (Some("0.0"), Some("0.0"), None), (None, None, None), + (Some("0"), Some("45584"), Some(0)), ]; for (lhs, rhs, expected) in test_cases { @@ -995,6 +1014,38 @@ mod tests { } } + #[test] + fn test_int_divide_decimal_unsigned_overflow() { + let lft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let rft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let output: Option = RpnFnScalarEvaluator::new() + .push_param_with_field_type(Decimal::from(1), lft) + .push_param_with_field_type(Decimal::from_f64(-2_f64).unwrap(), rft) + .evaluate(ScalarFuncSig::IntDivideDecimal) + .unwrap(); + assert_eq!(output, Some(0)); + + let lft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let rft = FieldTypeBuilder::new() + .tp(FieldTypeTp::NewDecimal) + .flag(FieldTypeFlag::UNSIGNED) + .build(); + let output: Result> = RpnFnScalarEvaluator::new() + .push_param_with_field_type(Decimal::from(1), lft) + .push_param_with_field_type(Decimal::from_f64(-1_f64).unwrap(), rft) + .evaluate(ScalarFuncSig::IntDivideDecimal); + assert!(output.is_err(), "should be error"); + } + #[test] fn test_real_multiply() { let should_pass = vec![(1.01001, -0.01, Real::new(-0.0101001).ok())]; diff --git a/components/tidb_query_expr/src/lib.rs b/components/tidb_query_expr/src/lib.rs index f1aae1de746..50e10681587 100644 --- a/components/tidb_query_expr/src/lib.rs +++ b/components/tidb_query_expr/src/lib.rs @@ -284,6 +284,13 @@ fn divide_mapper(lhs_is_unsigned: bool, rhs_is_unsigned: bool) -> RpnFnMeta { } } +fn divide_decimal_mapper(lhs_is_unsigned: bool, rhs_is_unsigned: bool) -> RpnFnMeta { + match (lhs_is_unsigned, rhs_is_unsigned) { + (false, false) => int_divide_decimal_fn_meta(), + _ => int_divide_decimal_unsigned_fn_meta(), + } +} + fn map_rhs_int_sig(value: ScalarFuncSig, children: &[Expr], mapper: F) -> Result where F: Fn(bool) -> RpnFnMeta, @@ -421,7 +428,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result { ScalarFuncSig::DivideDecimal => arithmetic_with_ctx_fn_meta::(), ScalarFuncSig::DivideReal => arithmetic_with_ctx_fn_meta::(), ScalarFuncSig::IntDivideInt => map_int_sig(value, children, divide_mapper)?, - ScalarFuncSig::IntDivideDecimal => int_divide_decimal_fn_meta(), + ScalarFuncSig::IntDivideDecimal => map_int_sig(value, children, divide_decimal_mapper)?, ScalarFuncSig::ModReal => arithmetic_fn_meta::(), ScalarFuncSig::ModDecimal => arithmetic_with_ctx_fn_meta::(), ScalarFuncSig::ModInt => map_int_sig(value, children, mod_mapper)?, From 1afb327c6d2a650ddac0e506e2d4727ebad78eb0 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Wed, 29 Nov 2023 18:34:18 +0800 Subject: [PATCH 1038/1149] cloud: update cloud sdk that supports fips 140 for cloud sdk (#16098) close tikv/tikv#16097 Signed-off-by: Leavrth Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 134 ++++-------------------------- Cargo.toml | 1 + components/cloud/aws/src/s3.rs | 2 + components/cloud/azure/Cargo.toml | 12 +-- 4 files changed, 27 insertions(+), 122 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 146e9aa04ab..13bd3c05781 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -310,7 +310,7 @@ dependencies = [ [[package]] name = "azure_core" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "async-trait", "base64 0.21.0", @@ -336,7 +336,7 @@ dependencies = [ [[package]] name = "azure_identity" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "async-lock", "async-trait", @@ -356,7 +356,7 @@ dependencies = [ [[package]] name = "azure_security_keyvault" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "async-trait", "azure_core", @@ -371,20 +371,19 @@ dependencies = [ [[package]] name = "azure_storage" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "RustyXML", "async-trait", "azure_core", "bytes", "futures 0.3.15", - "hmac 0.12.1", "log", "once_cell", + "openssl", "serde", "serde_derive", "serde_json", - "sha2 0.10.6", "time 0.3.20", "url", "uuid 1.2.1", @@ -393,7 +392,7 @@ dependencies = [ [[package]] name = "azure_storage_blobs" version = "0.12.0" -source = "git+https://github.com/Azure/azure-sdk-for-rust#69431158e9d39f2064fe207cf241d3fc748c851c" +source = "git+https://github.com/tikv/azure-sdk-for-rust?branch=release-7.5-fips#e3dc3e02573e60e70f00418255c417aa80b8e26b" dependencies = [ "RustyXML", "azure_core", @@ -665,15 +664,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "boolinator" version = "2.4.0" @@ -1098,15 +1088,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "cpufeatures" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" -dependencies = [ - "libc 0.2.146", -] - [[package]] name = "cpuid-bool" version = "0.1.2" @@ -1265,26 +1246,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "crypto-mac" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4857fd85a0c34b3c3297875b747c1e02e06b6a0ea32dd892d8192b9ce0813ea6" -dependencies = [ - "generic-array", - "subtle", -] - [[package]] name = "csv" version = "1.1.6" @@ -1393,17 +1354,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "digest" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" -dependencies = [ - "block-buffer 0.10.4", - "crypto-common", - "subtle", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -2413,25 +2363,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" -[[package]] -name = "hmac" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" -dependencies = [ - "crypto-mac", - "digest 0.9.0", -] - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest 0.10.6", -] - [[package]] name = "home" version = "0.5.5" @@ -2975,17 +2906,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" -[[package]] -name = "md-5" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" -dependencies = [ - "block-buffer 0.9.0", - "digest 0.9.0", - "opaque-debug", -] - [[package]] name = "md5" version = "0.7.0" @@ -3443,7 +3363,7 @@ dependencies = [ "serde", "serde_json", "serde_path_to_error", - "sha2 0.9.1", + "sha2", "thiserror", "url", ] @@ -4717,7 +4637,7 @@ dependencies = [ [[package]] name = "rusoto_core" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "base64 0.13.0", @@ -4741,7 +4661,7 @@ dependencies = [ [[package]] name = "rusoto_credential" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "chrono", @@ -4758,7 +4678,7 @@ dependencies = [ [[package]] name = "rusoto_kms" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "bytes", @@ -4771,7 +4691,7 @@ dependencies = [ [[package]] name = "rusoto_mock" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "chrono", @@ -4785,7 +4705,7 @@ dependencies = [ [[package]] name = "rusoto_s3" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "bytes", @@ -4799,32 +4719,29 @@ dependencies = [ [[package]] name = "rusoto_signature" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "base64 0.13.0", "bytes", "chrono", - "digest 0.9.0", "futures 0.3.15", "hex 0.4.2", - "hmac 0.10.1", "http", "hyper", "log", - "md-5", + "openssl", "percent-encoding", "pin-project-lite", "rusoto_credential", "rustc_version 0.3.3", "serde", - "sha2 0.9.1", "tokio", ] [[package]] name = "rusoto_sts" version = "0.46.0" -source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#0d6df7b119c4e757daaa715f261c3150c7ae0a3b" +source = "git+https://github.com/tikv/rusoto?branch=gh1482-s3-addr-styles#cc733208600bdb15a13940d6930c1fbd4ab604f2" dependencies = [ "async-trait", "bytes", @@ -5226,24 +5143,13 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1" dependencies = [ - "block-buffer 0.9.0", + "block-buffer", "cfg-if 0.1.10", "cpuid-bool", - "digest 0.9.0", + "digest", "opaque-debug", ] -[[package]] -name = "sha2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" -dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest 0.10.6", -] - [[package]] name = "shlex" version = "0.1.1" @@ -5593,12 +5499,6 @@ dependencies = [ "syn 2.0.18", ] -[[package]] -name = "subtle" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" - [[package]] name = "symbolic-common" version = "10.1.1" diff --git a/Cargo.toml b/Cargo.toml index 2d26bf6afe4..fdc86fb5f15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -189,6 +189,7 @@ protobuf = { git = "https://github.com/pingcap/rust-protobuf", branch = "v2.8" } protobuf-codegen = { git = "https://github.com/pingcap/rust-protobuf", branch = "v2.8" } # TODO: remove this replacement after rusoto_s3 truly supports virtual-host style (https://github.com/rusoto/rusoto/pull/1823). +# UPDATE: use openssl for signature to support fips 140 rusoto_core = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_credential = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_kms = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 73ddf479fd8..f06d86b37cb 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -330,6 +330,8 @@ async fn try_read_exact( } } +// NOTICE: the openssl fips doesn't support md5, therefore use md5 pakcage to +// hash fn get_content_md5(object_lock_enabled: bool, content: &[u8]) -> Option { object_lock_enabled.then(|| { let digest = md5::compute(content); diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 7dd98224a73..07a4752451e 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -9,11 +9,13 @@ failpoints = ["fail/failpoints"] [dependencies] async-trait = "0.1" -azure_core = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_identity = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_security_keyvault = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } -azure_storage = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust", default-features = false } -azure_storage_blobs = { version = "0.12.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +# TODO: The azure sdk with the newest version needs the rustc v1.70, but current version of rustc in TiKV is v1.67. +# Therefore use the patch to update sdk to support fips 140. +azure_core = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips" } +azure_identity = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips" } +azure_security_keyvault = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips", default-features = false } +azure_storage = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips", default-features = false } +azure_storage_blobs = { git = "https://github.com/tikv/azure-sdk-for-rust", branch = "release-7.5-fips" } base64 = "0.13" cloud = { workspace = true } fail = "0.5" From bab43d0e374b00dbf291c2870a22f8906c7a8431 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 30 Nov 2023 13:32:48 +0800 Subject: [PATCH 1039/1149] metrics: add extra filters to filter out redundant data. (#16103) close tikv/tikv#16102 Add some necessary filters to filter out unnecessary data in metrics. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.dashboard.py | 14 ++++++++++++-- metrics/grafana/tikv_details.json | 14 +++++++------- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index ade81f717fd..59cc8e5f596 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -247,6 +247,7 @@ def Cluster() -> RowPanel: target( expr=expr_sum_rate( "process_cpu_seconds_total", + label_selectors=['job=~".*tikv"'], ), ), ], @@ -257,7 +258,10 @@ def Cluster() -> RowPanel: yaxes=yaxes(left_format=UNITS.BYTES_IEC), targets=[ target( - expr=expr_sum("process_resident_memory_bytes"), + expr=expr_sum( + "process_resident_memory_bytes", + label_selectors=['job=~".*tikv"'], + ), ), ], ), @@ -376,6 +380,7 @@ def Cluster() -> RowPanel: label_selectors=['type="buckets"'], ), legend_format=r"{{instance}}-buckets", + hide=True, ), ], ), @@ -390,7 +395,12 @@ def Cluster() -> RowPanel: targets=[ target( expr=expr_operator( - "time()", "-", expr_simple("process_start_time_seconds") + "time()", + "-", + expr_simple( + "process_start_time_seconds", + label_selectors=['job=~".*tikv"'], + ), ), legend_format=r"{{instance}}", ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c36a81d522a..467ac073332 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -922,7 +922,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -930,7 +930,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n process_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -1055,7 +1055,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -1063,7 +1063,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n process_resident_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -1915,7 +1915,7 @@ "datasource": "${DS_TEST-CLUSTER}", "expr": "sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"buckets\"}\n \n)) by (instance) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, @@ -2046,7 +2046,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", + "expr": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) )", "format": "time_series", "hide": false, "instant": false, @@ -2054,7 +2054,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) )", + "query": "(time() - ((\n process_start_time_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",job=~\".*tikv\"}\n \n)) )", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index abb8baa6770..31188a36bed 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -1b98912ed3e87960a2ce063f6063b4f4f6fe6bbba98518ceabd768036287763e ./metrics/grafana/tikv_details.json +6ada9b4ad4a0bcbc847dbead88ff56bea77179e0a410bdd6148670a65ac94ed5 ./metrics/grafana/tikv_details.json From cd5c18c3bba403e586fe7da93ae1df8ebdf01c04 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 30 Nov 2023 14:43:18 +0800 Subject: [PATCH 1040/1149] chore: Use checkbox for check list in pull request template (#16104) close tikv/tikv#15990 Use checkbox for check list in pull request template Signed-off-by: Connor Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .github/pull_request_template.md | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 69bd19374c1..35c561124f5 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,56 +12,56 @@ PR Title Format: ### What is changed and how it works? -Issue Number: Close #xxx -What's Changed: +Issue Number: Close #xxx +What's Changed: + ```commit-message + ``` ### Related changes -- PR to update `pingcap/docs`/`pingcap/docs-cn`: -- Need to cherry-pick to the release branch +- [ ] PR to update `pingcap/docs`/`pingcap/docs-cn`: +- [ ] Need to cherry-pick to the release branch -### Check List +### Check List Tests -- Unit test -- Integration test -- Manual test (add detailed scripts or steps below) -- No code +- [ ] Unit test +- [ ] Integration test +- [ ] Manual test (add detailed scripts or steps below) +- [ ] No code Side effects -- Performance regression - - Consumes more CPU - - Consumes more MEM -- Breaking backward compatibility +- [ ] Performance regression: Consumes more CPU +- [ ] Performance regression: Consumes more Memory +- [ ] Breaking backward compatibility -### Release note - -```release-note -Please add a release note. +### Release note + + +```release-note + ``` From 2de7cad9fb5e1011a8853733d4a1b776bc16bce2 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 30 Nov 2023 17:31:19 +0800 Subject: [PATCH 1041/1149] tikv-ctl: enhance the easy of use on EncyptionMeta cmd. (#16095) close tikv/tikv#16094 In previous version, users who wanna use tikv-ctl to dump the encryption meta might be confused with the ambiguous errors on `data-dir` configuration. To make this tool easier to use, the hints on setting `data-dir` will be present to users if they miss the configuration `storage.data-dir` in tikv.toml and `--data-dir`, when using `encryption-meta` command. Signed-off-by: lucasliang --- cmd/tikv-ctl/src/main.rs | 64 +++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 5ed1bcbd9cc..ec0c8bfc915 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -120,6 +120,9 @@ fn main() { } } Cmd::RaftEngineCtl { args } => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .expect("data_key_manager_from_config should success"); @@ -141,6 +144,9 @@ fn main() { dump_snap_meta_file(path); } Cmd::DecryptFile { file, out_file } => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } let message = "This action will expose sensitive data as plaintext on persistent storage"; if !warning_prompt(message) { @@ -189,28 +195,36 @@ fn main() { io::copy(&mut reader, &mut outf).unwrap(); println!("crc32: {}", calc_crc32(outfile).unwrap()); } - Cmd::EncryptionMeta { cmd: subcmd } => match subcmd { - EncryptionMetaCmd::DumpKey { ids } => { - let message = "This action will expose encryption key(s) as plaintext. Do not output the \ + Cmd::EncryptionMeta { cmd: subcmd } => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } + match subcmd { + EncryptionMetaCmd::DumpKey { ids } => { + let message = "This action will expose encryption key(s) as plaintext. Do not output the \ result in file on disk."; - if !warning_prompt(message) { - return; + if !warning_prompt(message) { + return; + } + DataKeyManager::dump_key_dict( + create_backend(&cfg.security.encryption.master_key) + .expect("encryption-meta master key creation"), + &cfg.storage.data_dir, + ids, + ) + .unwrap(); + } + EncryptionMetaCmd::DumpFile { path } => { + let path = path + .map(|path| fs::canonicalize(path).unwrap().to_str().unwrap().to_owned()); + DataKeyManager::dump_file_dict(&cfg.storage.data_dir, path.as_deref()).unwrap(); } - DataKeyManager::dump_key_dict( - create_backend(&cfg.security.encryption.master_key) - .expect("encryption-meta master key creation"), - &cfg.storage.data_dir, - ids, - ) - .unwrap(); - } - EncryptionMetaCmd::DumpFile { path } => { - let path = - path.map(|path| fs::canonicalize(path).unwrap().to_str().unwrap().to_owned()); - DataKeyManager::dump_file_dict(&cfg.storage.data_dir, path.as_deref()).unwrap(); } - }, + } Cmd::CleanupEncryptionMeta {} => { + if !validate_storage_data_dir(&mut cfg, opt.data_dir) { + return; + } let key_manager = match data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .expect("data_key_manager_from_config should success") @@ -1329,3 +1343,17 @@ fn read_cluster_id(config: &TikvConfig) -> Result { .unwrap(); Ok(ident.cluster_id) } + +fn validate_storage_data_dir(config: &mut TikvConfig, data_dir: Option) -> bool { + if let Some(data_dir) = data_dir { + if !Path::new(&data_dir).exists() { + eprintln!("--data-dir {:?} not exists", data_dir); + return false; + } + config.storage.data_dir = data_dir; + } else if config.storage.data_dir.is_empty() { + eprintln!("--data-dir or data-dir in the config file should not be empty"); + return false; + } + true +} From 98d83f4c811454d480e54c73de14cc3385815e1e Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 30 Nov 2023 18:22:19 +0800 Subject: [PATCH 1042/1149] metrics/grafana: set shared crosshair tooltip (#16106) ref tikv/tikv#15990 Set shared crosshair tooltip for tikv_detail dashboard Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 5 ++++- metrics/grafana/tikv_details.json | 2 +- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 59cc8e5f596..6ab065f5433 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -8566,7 +8566,6 @@ def SlowTrendStatistics() -> RowPanel: refresh="1m", inputs=[DATASOURCE_INPUT], editable=True, - graphTooltip=GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, templating=Templates(), panels=[ Duration(), @@ -8609,4 +8608,8 @@ def SlowTrendStatistics() -> RowPanel: BackupLog(), SlowTrendStatistics(), ], + # Set 14 or larger to support shared crosshair or shared tooltip. + # See https://github.com/grafana/grafana/blob/v10.2.2/public/app/features/dashboard/state/DashboardMigrator.ts#L443-L445 + schemaVersion=14, + graphTooltip=GRAPH_TOOLTIP_MODE_SHARED_CROSSHAIR, ).auto_panel_ids() diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 467ac073332..6f8dffa27cd 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -66821,7 +66821,7 @@ ], "refresh": "1m", "rows": [], - "schemaVersion": 12, + "schemaVersion": 14, "sharedCrosshair": false, "style": "dark", "tags": [], diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 31188a36bed..81ae429e361 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -6ada9b4ad4a0bcbc847dbead88ff56bea77179e0a410bdd6148670a65ac94ed5 ./metrics/grafana/tikv_details.json +d496158baafb3f61d8f4dca2a8434031ad6092b93f3aeecb4fd2947df09a8caf ./metrics/grafana/tikv_details.json From fd989444fc9fb0dd2fdfafaece6130254b7b9f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 30 Nov 2023 20:31:18 +0800 Subject: [PATCH 1043/1149] log-backup: make initialize failure fatal error, release memory while task stopped. (#16071) close tikv/tikv#16056, close tikv/tikv#16070 Signed-off-by: hillium Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/backup-stream/src/endpoint.rs | 100 ++++++++++-------- .../backup-stream/src/metadata/client.rs | 14 ++- components/backup-stream/src/router.rs | 34 ++++++ components/backup-stream/src/tempfiles.rs | 5 + .../backup-stream/tests/failpoints/mod.rs | 29 ++++- .../backup-stream/tests/integration/mod.rs | 2 +- components/backup-stream/tests/suite.rs | 5 + 7 files changed, 139 insertions(+), 50 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 6c19edc9f93..f453469768c 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -211,6 +211,53 @@ where self.meta_client.clone() } + fn on_fatal_error_of_task(&self, task: &str, err: &Error) -> future![()] { + metrics::update_task_status(TaskStatus::Error, task); + let meta_cli = self.get_meta_client(); + let pdc = self.pd_client.clone(); + let store_id = self.store_id; + let sched = self.scheduler.clone(); + let safepoint_name = self.pause_guard_id_for_task(task); + let safepoint_ttl = self.pause_guard_duration(); + let code = err.error_code().code.to_owned(); + let msg = err.to_string(); + let task = task.to_owned(); + async move { + let err_fut = async { + let safepoint = meta_cli.global_progress_of_task(&task).await?; + pdc.update_service_safe_point( + safepoint_name, + TimeStamp::new(safepoint.saturating_sub(1)), + safepoint_ttl, + ) + .await?; + meta_cli.pause(&task).await?; + let mut last_error = StreamBackupError::new(); + last_error.set_error_code(code); + last_error.set_error_message(msg.clone()); + last_error.set_store_id(store_id); + last_error.set_happen_at(TimeStamp::physical_now()); + meta_cli.report_last_error(&task, last_error).await?; + Result::Ok(()) + }; + if let Err(err_report) = err_fut.await { + err_report.report(format_args!("failed to upload error {}", err_report)); + let name = task.to_owned(); + // Let's retry reporting after 5s. + tokio::task::spawn(async move { + tokio::time::sleep(Duration::from_secs(5)).await; + try_send!( + sched, + Task::FatalError( + TaskSelector::ByName(name), + Box::new(annotate!(err_report, "origin error: {}", msg)) + ) + ); + }); + } + } + } + fn on_fatal_error(&self, select: TaskSelector, err: Box) { err.report_fatal(); let tasks = self @@ -220,49 +267,7 @@ where for task in tasks { // Let's pause the task first. self.unload_task(&task); - metrics::update_task_status(TaskStatus::Error, &task); - - let meta_cli = self.get_meta_client(); - let pdc = self.pd_client.clone(); - let store_id = self.store_id; - let sched = self.scheduler.clone(); - let safepoint_name = self.pause_guard_id_for_task(&task); - let safepoint_ttl = self.pause_guard_duration(); - let code = err.error_code().code.to_owned(); - let msg = err.to_string(); - self.pool.block_on(async move { - let err_fut = async { - let safepoint = meta_cli.global_progress_of_task(&task).await?; - pdc.update_service_safe_point( - safepoint_name, - TimeStamp::new(safepoint.saturating_sub(1)), - safepoint_ttl, - ) - .await?; - meta_cli.pause(&task).await?; - let mut last_error = StreamBackupError::new(); - last_error.set_error_code(code); - last_error.set_error_message(msg.clone()); - last_error.set_store_id(store_id); - last_error.set_happen_at(TimeStamp::physical_now()); - meta_cli.report_last_error(&task, last_error).await?; - Result::Ok(()) - }; - if let Err(err_report) = err_fut.await { - err_report.report(format_args!("failed to upload error {}", err_report)); - // Let's retry reporting after 5s. - tokio::task::spawn(async move { - tokio::time::sleep(Duration::from_secs(5)).await; - try_send!( - sched, - Task::FatalError( - TaskSelector::ByName(task.to_owned()), - Box::new(annotate!(err_report, "origin error: {}", msg)) - ) - ); - }); - } - }); + self.pool.block_on(self.on_fatal_error_of_task(&task, &err)); } } @@ -637,6 +642,9 @@ where let run = async move { let task_name = task.info.get_name(); let ranges = cli.ranges_of_task(task_name).await?; + fail::fail_point!("load_task::error_when_fetching_ranges", |_| { + Err(Error::Other("what range? no such thing, go away.".into())) + }); info!( "register backup stream ranges"; "task" => ?task, @@ -664,10 +672,8 @@ where Result::Ok(()) }; if let Err(e) = run.await { - e.report(format!( - "failed to register backup stream task {} to router: ranges not found", - task_clone.info.get_name() - )); + self.on_fatal_error_of_task(&task_clone.info.name, &Box::new(e)) + .await; } }); metrics::update_task_status(TaskStatus::Running, &task_name); diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 1fdc1b3b1e8..2232770915f 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -286,7 +286,19 @@ impl MetadataClient { Ok(()) } - pub async fn get_last_error( + pub async fn get_last_error(&self, name: &str) -> Result> { + let key = MetaKey::last_errors_of(name); + + let r = self.meta_store.get_latest(Keys::Prefix(key)).await?.inner; + if r.is_empty() { + return Ok(None); + } + let r = &r[0]; + let err = protobuf::parse_from_bytes(r.value())?; + Ok(Some(err)) + } + + pub async fn get_last_error_of( &self, name: &str, store_id: u64, diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 849a503e21b..00ce93635e8 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -830,6 +830,28 @@ pub struct StreamTaskInfo { temp_file_pool: Arc, } +impl Drop for StreamTaskInfo { + fn drop(&mut self) { + let (success, failed): (Vec<_>, Vec<_>) = self + .flushing_files + .get_mut() + .drain(..) + .chain(self.flushing_meta_files.get_mut().drain(..)) + .map(|(_, f, _)| f.inner.path().to_owned()) + .map(|p| self.temp_file_pool.remove(&p)) + .partition(|r| *r); + info!("stream task info dropped[1/2], removing flushing_temp files"; "success" => %success.len(), "failure" => %failed.len()); + let (success, failed): (Vec<_>, Vec<_>) = self + .files + .get_mut() + .drain() + .map(|(_, f)| f.into_inner().inner.path().to_owned()) + .map(|p| self.temp_file_pool.remove(&p)) + .partition(|r| *r); + info!("stream task info dropped[2/2], removing temp files"; "success" => %success.len(), "failure" => %failed.len()); + } +} + impl std::fmt::Debug for StreamTaskInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StreamTaskInfo") @@ -2089,6 +2111,12 @@ mod tests { let (task, _path) = task("cleanup_test".to_owned()).await?; must_register_table(&router, task, 1).await; write_simple_data(&router).await; + let tempfiles = router + .get_task_info("cleanup_test") + .await + .unwrap() + .temp_file_pool + .clone(); router .get_task_info("cleanup_test") .await? @@ -2097,6 +2125,7 @@ mod tests { write_simple_data(&router).await; let mut w = walkdir::WalkDir::new(&tmp).into_iter(); assert!(w.next().is_some(), "the temp files doesn't created"); + assert!(tempfiles.mem_used() > 0, "the temp files doesn't created."); drop(router); let w = walkdir::WalkDir::new(&tmp) .into_iter() @@ -2114,6 +2143,11 @@ mod tests { "the temp files should be removed, but it is {:?}", w ); + assert_eq!( + tempfiles.mem_used(), + 0, + "the temp files hasn't been cleared." + ); Ok(()) } diff --git a/components/backup-stream/src/tempfiles.rs b/components/backup-stream/src/tempfiles.rs index add1ee67c12..b8f9c9e1120 100644 --- a/components/backup-stream/src/tempfiles.rs +++ b/components/backup-stream/src/tempfiles.rs @@ -259,6 +259,11 @@ impl TempFilePool { &self.cfg } + #[cfg(test)] + pub fn mem_used(&self) -> usize { + self.current.load(Ordering::Acquire) + } + /// Create a file for writting. /// This function is synchronous so we can call it easier in the polling /// context. (Anyway, it is really hard to call an async function in the diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index 8dfc21529e4..ea09e9c7a1f 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -30,6 +30,32 @@ mod all { use super::{ make_record_key, make_split_key_at_record, mutation, run_async_test, SuiteBuilder, }; + use crate::make_table_key; + + #[test] + fn failed_register_task() { + let suite = SuiteBuilder::new_named("failed_register_task").build(); + fail::cfg("load_task::error_when_fetching_ranges", "return").unwrap(); + let cli = suite.get_meta_cli(); + block_on(cli.insert_task_with_range( + &suite.simple_task("failed_register_task"), + &[(&make_table_key(1, b""), &make_table_key(2, b""))], + )) + .unwrap(); + + for _ in 0..10 { + if block_on(cli.get_last_error_of("failed_register_task", 1)) + .unwrap() + .is_some() + { + return; + } + std::thread::sleep(Duration::from_millis(100)); + } + + suite.dump_slash_etc(); + panic!("No error uploaded when failed to comminate to PD."); + } #[test] fn basic() { @@ -192,7 +218,8 @@ mod all { suite.must_split(&make_split_key_at_record(1, 42)); std::thread::sleep(Duration::from_secs(2)); - let error = run_async_test(suite.get_meta_cli().get_last_error("retry_abort", 1)).unwrap(); + let error = + run_async_test(suite.get_meta_cli().get_last_error_of("retry_abort", 1)).unwrap(); let error = error.expect("no error uploaded"); error .get_error_message() diff --git a/components/backup-stream/tests/integration/mod.rs b/components/backup-stream/tests/integration/mod.rs index 395159060c1..04fee6b2c09 100644 --- a/components/backup-stream/tests/integration/mod.rs +++ b/components/backup-stream/tests/integration/mod.rs @@ -160,7 +160,7 @@ mod all { let err = run_async_test( suite .get_meta_cli() - .get_last_error("test_fatal_error", *victim), + .get_last_error_of("test_fatal_error", *victim), ) .unwrap() .unwrap(); diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 0e4038d07a0..2886bb4f5d7 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -395,6 +395,11 @@ impl Suite { MetadataClient::new(self.meta_store.clone(), 0) } + #[allow(dead_code)] + pub fn dump_slash_etc(&self) { + self.meta_store.inner.blocking_lock().dump(); + } + pub fn must_split(&mut self, key: &[u8]) { let region = self.cluster.get_region(key); self.cluster.must_split(®ion, key); From 3544ed36b088d3cf706fcc037d62944e48b57028 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 4 Dec 2023 13:52:21 +0800 Subject: [PATCH 1044/1149] raftstore: fix a panic cause by peer destroy racing (#16112) close tikv/tikv#16111, close pingcap/tidb#49012 In case a node is isolated during the merge and the target peer is replaced by a peer with a larger ID, an "atomic_snapshot" is created which covers both the source peer and the snapshot of the target peer. In such cases, the snapshot needs to destroy the source peer too. However, if the source peer is already being destroyed triggered by gc message, it may result in a panic with a "no entry found for key" message. This commit resolves the issue by cleaning up atomic_snap_regions after the destroy, so the target peer is no longer expected to find the source peer. This cleanup is safe because the source region has already cleaned up its data and metadata from disk. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/apply.rs | 1 + components/raftstore/src/store/fsm/peer.rs | 15 ++- tests/failpoints/cases/test_merge.rs | 105 ++++++++++++++++++++ 3 files changed, 116 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 252249b74b2..f70e0a31181 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -4074,6 +4074,7 @@ where /// Handles peer destroy. When a peer is destroyed, the corresponding apply /// delegate should be removed too. fn handle_destroy(&mut self, ctx: &mut ApplyContext, d: Destroy) { + fail_point!("on_apply_handle_destroy"); assert_eq!(d.region_id, self.delegate.region_id()); if d.merge_from_snapshot { assert_eq!(self.delegate.stopped, false); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index ee2daf1c3c8..fbcfe498bb8 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3842,14 +3842,18 @@ where self.fsm.peer.tag ); } else { + // Remove itself from atomic_snap_regions as it has cleaned both + // data and metadata. let target_region_id = *meta.targets_map.get(®ion_id).unwrap(); - let is_ready = meta - .atomic_snap_regions + meta.atomic_snap_regions .get_mut(&target_region_id) .unwrap() - .get_mut(®ion_id) - .unwrap(); - *is_ready = true; + .remove(®ion_id); + info!("peer has destroyed, clean up for incoming overlapped snapshot"; + "region_id" => region_id, + "peer_id" => self.fsm.peer_id(), + "target_region_id" => target_region_id, + ); } } @@ -4984,6 +4988,7 @@ where "region_id" => self.fsm.region_id(), "peer_id" => self.fsm.peer_id(), "region" => ?region, + "destroy_regions" => ?persist_res.destroy_regions, ); let mut state = self.ctx.global_replication_state.lock().unwrap(); diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index eb15c7e16fa..929afeb70f4 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -2068,3 +2068,108 @@ fn test_restart_may_lose_merging_state() { cluster.must_put(b"k400", b"v400"); } + +// If a node is isolated during merge, and the target peer is replaced by a peer +// with a larger ID, then the snapshot of the target peer covers the source +// regions as well. +// In such cases, the snapshot becomes an "atomic_snapshot" which needs to +// destroy the source peer too. +// This test case checks the race between destroying the source peer by atomic +// snapshot and the gc message. The source peer must be successfully destroyed +// in this case. +#[test_case(test_raftstore::new_node_cluster)] +fn test_destroy_race_during_atomic_snapshot_after_merge() { + let mut cluster = new_cluster(0, 3); + configure_for_merge(&mut cluster.cfg); + cluster.run(); + let pd_client = Arc::clone(&cluster.pd_client); + pd_client.disable_default_operator(); + + cluster.must_transfer_leader(1, new_peer(1, 1)); + + cluster.must_put(b"k1", b"v1"); + cluster.must_put(b"k3", b"v3"); + + let region = cluster.get_region(b"k1"); + cluster.must_split(®ion, b"k2"); + let left = cluster.get_region(b"k1"); + let right = cluster.get_region(b"k3"); + + // Allow raft messages to source peer on store 3 before PrepareMerge. + let left_filter_block = Arc::new(atomic::AtomicBool::new(false)); + let left_filter_block_ = left_filter_block.clone(); + let left_blocked_messages = Arc::new(Mutex::new(vec![])); + let left_filter = RegionPacketFilter::new(left.get_id(), 3) + .direction(Direction::Recv) + .when(left_filter_block.clone()) + .reserve_dropped(left_blocked_messages.clone()) + .set_msg_callback(Arc::new(move |msg: &RaftMessage| { + debug!("dbg left msg_callback"; "msg" => ?msg); + if left_filter_block.load(atomic::Ordering::SeqCst) { + return; + } + for e in msg.get_message().get_entries() { + let ctx = raftstore::store::ProposalContext::from_bytes(&e.context); + if ctx.contains(raftstore::store::ProposalContext::PREPARE_MERGE) { + // Block further messages. + left_filter_block.store(true, atomic::Ordering::SeqCst); + } + } + })); + cluster.sim.wl().add_recv_filter(3, Box::new(left_filter)); + // Block messages to target peer on store 3. + let right_filter_block = Arc::new(atomic::AtomicBool::new(true)); + let new_peer_id = 1004; + let (new_peer_id_tx, new_peer_id_rx) = std::sync::mpsc::channel(); + let new_peer_id_tx = Mutex::new(Some(new_peer_id_tx)); + let (new_peer_snap_tx, new_peer_snap_rx) = std::sync::mpsc::channel(); + let new_peer_snap_tx = Mutex::new(new_peer_snap_tx); + let right_filter = RegionPacketFilter::new(right.get_id(), 3) + .direction(Direction::Recv) + .when(right_filter_block.clone()) + .set_msg_callback(Arc::new(move |msg: &RaftMessage| { + debug!("dbg right msg_callback"; "msg" => ?msg); + if msg.get_to_peer().get_id() == new_peer_id { + let _ = new_peer_id_tx.lock().unwrap().take().map(|tx| tx.send(())); + if msg.get_message().get_msg_type() == MessageType::MsgSnapshot { + let _ = new_peer_snap_tx.lock().unwrap().send(()); + } + } + })); + cluster.sim.wl().add_recv_filter(3, Box::new(right_filter)); + pd_client.must_merge(left.get_id(), right.get_id()); + + // Make target peer on store 3 a stale peer. + pd_client.must_remove_peer(right.get_id(), find_peer(&right, 3).unwrap().to_owned()); + pd_client.must_add_peer(right.get_id(), new_peer(3, new_peer_id)); + // Unblock messages to target peer on store 3. + right_filter_block.store(false, atomic::Ordering::SeqCst); + // Wait for receiving new peer id message to destroy stale target peer. + new_peer_id_rx.recv_timeout(Duration::from_secs(5)).unwrap(); + cluster.must_region_not_exist(right.get_id(), 3); + // Let source peer continue prepare merge. It will fails to schedule merge, + // because the target peer is destroyed. + left_filter_block_.store(false, atomic::Ordering::SeqCst); + // Before sending blocked messages, make sure source peer is paused at + // destroy apply delegate, so that the new right peer snapshot can will + // try to destroy source peer before applying snapshot. + fail::cfg("on_apply_handle_destroy", "pause").unwrap(); + // Send blocked messages to source peer. Prepare merge must fail to schedule + // CommitMerge because now target peer stale peer is destroyed. + let router = cluster.sim.wl().get_router(3).unwrap(); + for raft_msg in std::mem::take(&mut *left_blocked_messages.lock().unwrap()) { + router.send_raft_message(raft_msg).unwrap(); + } + // Wait the new right peer snapshot. + new_peer_snap_rx + .recv_timeout(Duration::from_secs(5)) + .unwrap(); + // Give it some time to step snapshot message. + sleep_ms(500); + // Let source peer destroy continue, so it races with atomic snapshot destroy. + fail::remove("on_apply_handle_destroy"); + + // New peer applies snapshot eventually. + cluster.must_transfer_leader(right.get_id(), new_peer(3, new_peer_id)); + cluster.must_put(b"k4", b"v4"); +} From ca8c70d9a0ca499007a8457c9d77cdfb958823e2 Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 4 Dec 2023 15:04:22 +0800 Subject: [PATCH 1045/1149] raftstore: Verify checksum right after SST files are generated (#16107) close tikv/tikv#15986 Verify checksum right after SST files are generated to avoid corrupted SST being transferred to other TiKVs Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/engine_test/src/lib.rs | 8 +++- components/raftstore/src/store/snap.rs | 20 ++++++-- components/raftstore/src/store/snap/io.rs | 57 ++++++++++++++++++++--- metrics/alertmanager/tikv.rules.yml | 12 +++++ tests/failpoints/cases/test_snap.rs | 17 +++++++ tests/integrations/storage/test_titan.rs | 2 + 6 files changed, 103 insertions(+), 13 deletions(-) diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index dd56d9a5db4..85d9d4c1b78 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -127,7 +127,7 @@ pub mod kv { } fn destroy_tablet(&self, _ctx: TabletContext, path: &Path) -> Result<()> { - encryption::trash_dir_all(path, self.db_opt.key_manager.as_deref())?; + encryption::trash_dir_all(path, self.db_opt.get_key_manager().as_deref())?; Ok(()) } @@ -202,13 +202,17 @@ pub mod ctor { #[derive(Clone, Default)] pub struct DbOptions { - pub(crate) key_manager: Option>, + key_manager: Option>, rate_limiter: Option>, state_storage: Option>, enable_multi_batch_write: bool, } impl DbOptions { + pub fn get_key_manager(&self) -> Option> { + self.key_manager.clone() + } + pub fn set_key_manager(&mut self, key_manager: Option>) { self.key_manager = key_manager; } diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index a857cbffdfd..6976f4614df 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -92,6 +92,12 @@ impl From for Error { } } +impl From for Error { + fn from(e: engine_traits::Error) -> Self { + Error::Other(Box::new(e)) + } +} + pub type Result = result::Result; impl ErrorCodeExt for Error { @@ -873,8 +879,13 @@ impl Snapshot { self.switch_to_cf_file(cf)?; let cf_file = &mut self.cf_files[self.cf_index]; let cf_stat = if plain_file_used(cf_file.cf) { - let key_mgr = self.mgr.encryption_key_manager.as_ref(); - snap_io::build_plain_cf_file::(cf_file, key_mgr, kv_snap, &begin_key, &end_key)? + snap_io::build_plain_cf_file::( + cf_file, + self.mgr.encryption_key_manager.as_ref(), + kv_snap, + &begin_key, + &end_key, + )? } else { snap_io::build_sst_cf_file_list::( cf_file, @@ -885,6 +896,7 @@ impl Snapshot { self.mgr .get_actual_max_per_file_size(allow_multi_files_snapshot), &self.mgr.limiter, + self.mgr.encryption_key_manager.clone(), )? }; SNAPSHOT_LIMIT_GENERATE_BYTES.inc_by(cf_stat.total_size as u64); @@ -1212,7 +1224,7 @@ impl Snapshot { if file_for_recving.written_size != cf_file.size[i] { return Err(io::Error::new( - ErrorKind::Other, + ErrorKind::InvalidData, format!( "snapshot file {} for cf {} size mismatches, \ real size {}, expected size {}", @@ -1227,7 +1239,7 @@ impl Snapshot { let checksum = file_for_recving.write_digest.finalize(); if checksum != cf_file.checksum[i] { return Err(io::Error::new( - ErrorKind::Other, + ErrorKind::InvalidData, format!( "snapshot file {} for cf {} checksum \ mismatches, real checksum {}, expected \ diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 952f49baf44..c897aaa2597 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -10,14 +10,15 @@ use std::{ use encryption::{DataKeyManager, DecrypterReader, EncrypterWriter, Iv}; use engine_traits::{ - CfName, Error as EngineError, Iterable, KvEngine, Mutable, SstCompressionType, SstWriter, - SstWriterBuilder, WriteBatch, + CfName, Error as EngineError, Iterable, KvEngine, Mutable, SstCompressionType, SstReader, + SstWriter, SstWriterBuilder, WriteBatch, }; +use fail::fail_point; use kvproto::encryptionpb::EncryptionMethod; use tikv_util::{ box_try, codec::bytes::{BytesEncoder, CompactBytesFromFileDecoder}, - debug, info, + debug, error, info, time::{Instant, Limiter}, }; @@ -114,6 +115,7 @@ pub fn build_sst_cf_file_list( end_key: &[u8], raw_size_per_file: u64, io_limiter: &Limiter, + key_mgr: Option>, ) -> Result where E: KvEngine, @@ -131,6 +133,48 @@ where let sst_writer = RefCell::new(create_sst_file_writer::(engine, cf, &path)?); let mut file_length: usize = 0; + let finish_sst_writer = |sst_writer: E::SstWriter, + path: String, + key_mgr: Option>| + -> Result<(), Error> { + sst_writer.finish()?; + (|| { + fail_point!("inject_sst_file_corruption", |_| { + static CALLED: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); + if CALLED + .compare_exchange( + false, + true, + std::sync::atomic::Ordering::SeqCst, + std::sync::atomic::Ordering::SeqCst, + ) + .is_err() + { + return; + } + // overwrite the file to break checksum + let mut f = OpenOptions::new().write(true).open(&path).unwrap(); + f.write_all(b"x").unwrap(); + }); + })(); + + let sst_reader = E::SstReader::open(&path, key_mgr)?; + if let Err(e) = sst_reader.verify_checksum() { + // use sst reader to verify block checksum, it would detect corrupted SST due to + // memory bit-flip + fs::remove_file(&path)?; + error!( + "failed to pass block checksum verification"; + "file" => path, + "err" => ?e, + ); + return Err(io::Error::new(io::ErrorKind::InvalidData, e).into()); + } + File::open(&path).and_then(|f| f.sync_all())?; + Ok(()) + }; + let instant = Instant::now(); box_try!(snap.scan(cf, start_key, end_key, false, |key, value| { let entry_len = key.len() + value.len(); @@ -149,8 +193,7 @@ where match result { Ok(new_sst_writer) => { let old_writer = sst_writer.replace(new_sst_writer); - box_try!(old_writer.finish()); - box_try!(File::open(prev_path).and_then(|f| f.sync_all())); + box_try!(finish_sst_writer(old_writer, prev_path, key_mgr.clone())); } Err(e) => { let io_error = io::Error::new(io::ErrorKind::Other, e); @@ -176,9 +219,8 @@ where Ok(true) })); if stats.key_count > 0 { + box_try!(finish_sst_writer(sst_writer.into_inner(), path, key_mgr)); cf_file.add_file(file_id); - box_try!(sst_writer.into_inner().finish()); - box_try!(File::open(path).and_then(|f| f.sync_all())); info!( "build_sst_cf_file_list builds {} files in cf {}. Total keys {}, total size {}. raw_size_per_file {}, total takes {:?}", file_id + 1, @@ -425,6 +467,7 @@ mod tests { &keys::data_key(b"z"), *max_file_size, &limiter, + db_opt.as_ref().and_then(|opt| opt.get_key_manager()), ) .unwrap(); if stats.key_count == 0 { diff --git a/metrics/alertmanager/tikv.rules.yml b/metrics/alertmanager/tikv.rules.yml index aa8530df45f..1b460311e60 100644 --- a/metrics/alertmanager/tikv.rules.yml +++ b/metrics/alertmanager/tikv.rules.yml @@ -1,6 +1,18 @@ groups: - name: alert.rules rules: + - alert: TiKV_critical_error + expr: sum(rate(tikv_critical_error_total[1m])) BY (type, instance) > 0 + # without the for clause will become active on the first evaluation. + labels: + env: ENV_LABELS_ENV + level: critical + expr: sum(rate(tikv_critical_error_total[1m])) BY (type, instance) > 0 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}' + value: '{{ $value }}' + summary: TiKV encounters critical error + - alert: TiKV_memory_used_too_fast expr: process_resident_memory_bytes{job=~"tikv",instance=~".*"} - (process_resident_memory_bytes{job=~"tikv",instance=~".*"} offset 5m) > 5*1024*1024*1024 for: 5m diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs index 7748b1d2985..ca23b4c5a17 100644 --- a/tests/failpoints/cases/test_snap.rs +++ b/tests/failpoints/cases/test_snap.rs @@ -992,3 +992,20 @@ fn test_snapshot_send_failed() { sleep_ms(100); assert!(mgr.list_snapshot().unwrap().is_empty()); } + +#[test] +/// Test a corrupted snapshot can be detected and retry to generate a new one. +fn test_retry_corrupted_snapshot() { + let mut cluster = new_node_cluster(0, 3); + let pd_client = cluster.pd_client.clone(); + pd_client.disable_default_operator(); + + let r = cluster.run_conf_change(); + cluster.must_put(b"k1", b"v1"); + must_get_none(&cluster.get_engine(3), b"k1"); + pd_client.must_add_peer(r, new_peer(2, 2)); + fail::cfg("inject_sst_file_corruption", "return").unwrap(); + pd_client.must_add_peer(r, new_peer(3, 3)); + + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); +} diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 4bb8fee4087..752c6aaee1a 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -376,6 +376,7 @@ fn test_delete_files_in_range_for_titan() { b"{", u64::MAX, &limiter, + None, ) .unwrap(); let mut cf_file_write = CfFile::new( @@ -392,6 +393,7 @@ fn test_delete_files_in_range_for_titan() { b"{", u64::MAX, &limiter, + None, ) .unwrap(); From e14a803902d9870266a260b3e30de2d5a3bd00a6 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 5 Dec 2023 11:20:50 +0800 Subject: [PATCH 1046/1149] raftstore: clean up destroyed_region_for_snap when a peer is destroyed (#16133) ref tikv/tikv#16111 Signed-off-by: Neil Shen --- components/raftstore/src/store/fsm/peer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index fbcfe498bb8..7c33bf66b87 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -3849,6 +3849,7 @@ where .get_mut(&target_region_id) .unwrap() .remove(®ion_id); + meta.destroyed_region_for_snap.remove(®ion_id); info!("peer has destroyed, clean up for incoming overlapped snapshot"; "region_id" => region_id, "peer_id" => self.fsm.peer_id(), From d76ab8f7325a4aa1c88fa94fc4122b5c415a96c5 Mon Sep 17 00:00:00 2001 From: qupeng Date: Tue, 5 Dec 2023 13:08:48 +0800 Subject: [PATCH 1047/1149] cdc: return server_is_busy to cdc clients if necessary (#16127) ref tikv/tikv#16035 return server_is_busy to cdc clients if necessary Signed-off-by: qupeng --- Cargo.lock | 2 +- components/cdc/src/delegate.rs | 6 ++++++ components/cdc/src/endpoint.rs | 8 +++----- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13bd3c05781..147b42405bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2715,7 +2715,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#87bebcc0d071a18cbbd94a4fc02de9c4988af815" +source = "git+https://github.com/pingcap/kvproto.git#96c40585233f176393213dbd4c04d76259bad8f9" dependencies = [ "futures 0.3.15", "grpcio", diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 780cfe8dea6..637ecab0440 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -206,6 +206,12 @@ impl Downstream { self.sink_error_event(region_id, err_event) } + pub fn sink_server_is_busy(&self, region_id: u64, reason: String) -> Result<()> { + let mut err_event = EventError::default(); + err_event.mut_server_is_busy().reason = reason; + self.sink_error_event(region_id, err_event) + } + pub fn set_sink(&mut self, sink: Sink) { self.sink = Some(sink); } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index e1a985d4e98..9f840ab49d5 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -742,10 +742,8 @@ impl, E: KvEngine, S: StoreRegionMeta> Endpoint { - assert!(err.has_region_not_found()); + assert!(err.has_server_is_busy()); } other => panic!("unknown event {:?}", other), } From 44301d2066ba7675067f1ac5c5d94eaf629ebd65 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 5 Dec 2023 16:34:19 +0800 Subject: [PATCH 1048/1149] raftstore: fine-tunes slow score (#16087) ref tikv/tikv#15909 In the previous implementation, SlowScore identified a node as slow if it had hotspot regions. That is, previous SlowScore has fairly high false-positive rate. Moreover, this approach needs adjustment in sensitivity to promptly detect I/O jitters. To address this, this pr refines the algorithm by incorporating CPU usage as an additional condition to determine whether a node is slow. And based on our testing records, this modification significantly reduces the false-positive rate. Additionally, this pr has updated the default value of `inspect-interval` to `100ms` to enhance sensitivity and improve overall performance. Signed-off-by: lucasliang Co-authored-by: tonyxuqqi --- components/raftstore-v2/src/operation/life.rs | 7 +- .../raftstore-v2/src/operation/ready/mod.rs | 16 ++-- components/raftstore/src/store/config.rs | 10 ++- components/raftstore/src/store/fsm/store.rs | 13 ++- .../raftstore/src/store/local_metrics.rs | 80 +++++++++++++++---- components/raftstore/src/store/peer.rs | 18 +++-- components/raftstore/src/store/worker/pd.rs | 56 ++++++++++--- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 65 +++------------ 9 files changed, 162 insertions(+), 104 deletions(-) diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 00df317f73a..e9fc84643da 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -45,6 +45,7 @@ use raftstore::{ life::{build_peer_destroyed_report, forward_destroy_to_source_peer}, Proposal, }, + local_metrics::IoType as InspectIoType, metrics::RAFT_PEER_PENDING_DURATION, util, DiskFullPeers, Transport, WriteTask, }, @@ -579,9 +580,9 @@ impl Store { { // Record the last statistics of commit-log-duration and store-write-duration. inspector.record_store_wait(start_ts.saturating_elapsed()); - inspector.record_store_commit(ctx.raft_metrics.stat_commit_log.avg()); - // Reset the stat_commit_log and wait it to be refreshed in the next tick. - ctx.raft_metrics.stat_commit_log.reset(); + inspector.record_store_commit(ctx.raft_metrics.health_stats.avg(InspectIoType::Network)); + // Reset the health_stats and wait it to be refreshed in the next tick. + ctx.raft_metrics.health_stats.reset(); ctx.pending_latency_inspect.push(inspector); } } diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index a2697f29f02..39ce9707359 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -43,6 +43,7 @@ use raftstore::{ coprocessor::{RegionChangeEvent, RoleChange}, store::{ fsm::store::StoreRegionMeta, + local_metrics::IoType, needs_evict_entry_cache, util::{self, is_first_append_entry, is_initial_msg}, worker_metrics::SNAP_COUNTER, @@ -989,7 +990,7 @@ impl Peer { return; } let now = Instant::now(); - let stat_raft_commit_log = &mut ctx.raft_metrics.stat_commit_log; + let health_stats = &mut ctx.raft_metrics.health_stats; for i in old_index + 1..=new_index { if let Some((term, trackers)) = self.proposals().find_trackers(i) { if self.entry_storage().term(i).map_or(false, |t| t == term) { @@ -1002,14 +1003,11 @@ impl Peer { for tracker in trackers { // Collect the metrics related to commit_log // durations. - stat_raft_commit_log.record(Duration::from_nanos(tracker.observe( - now, - hist, - |t| { - t.metrics.commit_not_persisted = !commit_persisted; - &mut t.metrics.wf_commit_log_nanos - }, - ))); + let duration = tracker.observe(now, hist, |t| { + t.metrics.commit_not_persisted = !commit_persisted; + &mut t.metrics.wf_commit_log_nanos + }); + health_stats.observe(Duration::from_nanos(duration), IoType::Network); } } } diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index b09afb3c6e1..c7c65e80d6c 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -353,6 +353,9 @@ pub struct Config { // Interval to inspect the latency of raftstore for slow store detection. pub inspect_interval: ReadableDuration, + /// Threshold of CPU utilization to inspect for slow store detection. + #[doc(hidden)] + pub inspect_cpu_util_thd: f64, // The unsensitive(increase it to reduce sensitiveness) of the cause-trend detection pub slow_trend_unsensitive_cause: f64, @@ -517,7 +520,12 @@ impl Default for Config { region_max_size: ReadableSize(0), region_split_size: ReadableSize(0), clean_stale_peer_delay: ReadableDuration::minutes(0), - inspect_interval: ReadableDuration::millis(500), + inspect_interval: ReadableDuration::millis(100), + // The default value of `inspect_cpu_util_thd` is 0.4, which means + // when the cpu utilization is greater than 40%, the store might be + // regarded as a slow node if there exists delayed inspected messages. + // It's good enough for most cases to reduce the false positive rate. + inspect_cpu_util_thd: 0.4, // The param `slow_trend_unsensitive_cause == 2.0` can yield good results, // make it `10.0` to reduce a bit sensitiveness because SpikeFilter is disabled slow_trend_unsensitive_cause: 10.0, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 8c8919df67e..9c3274d7945 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -93,7 +93,7 @@ use crate::{ ApplyBatchSystem, ApplyNotifier, ApplyPollerBuilder, ApplyRes, ApplyRouter, ApplyTaskRes, }, - local_metrics::RaftMetrics, + local_metrics::{IoType as InspectIoType, RaftMetrics}, memory::*, metrics::*, peer_storage, @@ -847,9 +847,14 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> mut inspector, } => { inspector.record_store_wait(send_time.saturating_elapsed()); - inspector.record_store_commit(self.ctx.raft_metrics.stat_commit_log.avg()); - // Reset the stat_commit_log and wait it to be refreshed in the next tick. - self.ctx.raft_metrics.stat_commit_log.reset(); + inspector.record_store_commit( + self.ctx + .raft_metrics + .health_stats + .avg(InspectIoType::Network), + ); + // Reset the health_stats and wait it to be refreshed in the next tick. + self.ctx.raft_metrics.health_stats.reset(); self.ctx.pending_latency_inspect.push(inspector); } StoreMsg::UnsafeRecoveryReport(report) => self.store_heartbeat_pd(Some(report)), diff --git a/components/raftstore/src/store/local_metrics.rs b/components/raftstore/src/store/local_metrics.rs index 7207ac7869d..dc94a3afbe7 100644 --- a/components/raftstore/src/store/local_metrics.rs +++ b/components/raftstore/src/store/local_metrics.rs @@ -68,35 +68,81 @@ impl RaftSendMessageMetrics { } } +/// Buffered statistics for recording local raftstore message duration. +/// +/// As it's only used for recording local raftstore message duration, +/// and it will be manually reset preiodically, so it's not necessary +/// to use `LocalHistogram`. #[derive(Default)] -pub struct RaftCommitLogStatistics { - pub last_commit_log_duration_sum: Duration, - pub last_commit_log_count_sum: u64, +struct LocalHealthStatistics { + duration_sum: Duration, + count: u64, } -impl RaftCommitLogStatistics { +impl LocalHealthStatistics { #[inline] - pub fn record(&mut self, dur: Duration) { - self.last_commit_log_count_sum += 1; - self.last_commit_log_duration_sum += dur; + fn observe(&mut self, dur: Duration) { + self.count += 1; + self.duration_sum += dur; } #[inline] - pub fn avg(&self) -> Duration { - if self.last_commit_log_count_sum > 0 { - Duration::from_micros( - self.last_commit_log_duration_sum.as_micros() as u64 - / self.last_commit_log_count_sum, - ) + fn avg(&self) -> Duration { + if self.count > 0 { + Duration::from_micros(self.duration_sum.as_micros() as u64 / self.count) } else { Duration::default() } } #[inline] + fn reset(&mut self) { + self.count = 0; + self.duration_sum = Duration::default(); + } +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IoType { + Disk = 0, + Network = 1, +} + +/// Buffered statistics for recording the health of raftstore. +#[derive(Default)] +pub struct HealthStatistics { + // represents periodic latency on the disk io. + disk_io_dur: LocalHealthStatistics, + // represents the latency of the network io. + network_io_dur: LocalHealthStatistics, +} + +impl HealthStatistics { + #[inline] + pub fn observe(&mut self, dur: Duration, io_type: IoType) { + match io_type { + IoType::Disk => self.disk_io_dur.observe(dur), + IoType::Network => self.network_io_dur.observe(dur), + } + } + + #[inline] + pub fn avg(&self, io_type: IoType) -> Duration { + match io_type { + IoType::Disk => self.disk_io_dur.avg(), + IoType::Network => self.network_io_dur.avg(), + } + } + + #[inline] + /// Reset HealthStatistics. + /// + /// Should be manually reset when the metrics are + /// accepted by slowness inspector. pub fn reset(&mut self) { - self.last_commit_log_count_sum = 0; - self.last_commit_log_duration_sum = Duration::default(); + self.disk_io_dur.reset(); + self.network_io_dur.reset(); } } @@ -133,7 +179,7 @@ pub struct RaftMetrics { pub wf_commit_not_persist_log: LocalHistogram, // local statistics for slowness - pub stat_commit_log: RaftCommitLogStatistics, + pub health_stats: HealthStatistics, pub check_stale_peer: LocalIntCounter, pub leader_missing: Arc>>, @@ -172,7 +218,7 @@ impl RaftMetrics { wf_persist_log: STORE_WF_PERSIST_LOG_DURATION_HISTOGRAM.local(), wf_commit_log: STORE_WF_COMMIT_LOG_DURATION_HISTOGRAM.local(), wf_commit_not_persist_log: STORE_WF_COMMIT_NOT_PERSIST_LOG_DURATION_HISTOGRAM.local(), - stat_commit_log: RaftCommitLogStatistics::default(), + health_stats: HealthStatistics::default(), check_stale_peer: CHECK_STALE_PEER_COUNTER.local(), leader_missing: Arc::default(), last_flush_time: Instant::now_coarse(), diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 185ab9d2a92..2d304490bb7 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -71,7 +71,7 @@ use uuid::Uuid; use super::{ cmd_resp, - local_metrics::RaftMetrics, + local_metrics::{IoType, RaftMetrics}, metrics::*, peer_storage::{write_peer_state, CheckApplyingSnapStatus, HandleReadyResult, PeerStorage}, read_queue::{ReadIndexQueue, ReadIndexRequest}, @@ -1860,7 +1860,7 @@ where Ok(()) } - fn report_persist_log_duration(&self, pre_persist_index: u64, metrics: &RaftMetrics) { + fn report_persist_log_duration(&self, pre_persist_index: u64, metrics: &mut RaftMetrics) { if !metrics.waterfall_metrics || self.proposals.is_empty() { return; } @@ -1909,9 +1909,15 @@ where t.metrics.commit_not_persisted = !commit_persisted; &mut t.metrics.wf_commit_log_nanos }); + // Normally, commit_log_duration both contains the duraiton on persisting + // raft logs and transferring raft logs to other nodes. Therefore, it can + // reflects slowness of the node on I/Os, whatever the reason is. + // Here, health_stats uses the recorded commit_log_duration as the + // latency to perspect whether there exists jitters on network. It's not + // accurate, but it's proved that it's a good approximation. metrics - .stat_commit_log - .record(Duration::from_nanos(duration)); + .health_stats + .observe(Duration::from_nanos(duration), IoType::Network); } } } @@ -3129,7 +3135,7 @@ where let pre_persist_index = self.raft_group.raft.raft_log.persisted; let pre_commit_index = self.raft_group.raft.raft_log.committed; self.raft_group.on_persist_ready(self.persisted_number); - self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); + self.report_persist_log_duration(pre_persist_index, &mut ctx.raft_metrics); self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; @@ -3174,7 +3180,7 @@ where let pre_persist_index = self.raft_group.raft.raft_log.persisted; let pre_commit_index = self.raft_group.raft.raft_log.committed; let mut light_rd = self.raft_group.advance_append(ready); - self.report_persist_log_duration(pre_persist_index, &ctx.raft_metrics); + self.report_persist_log_duration(pre_persist_index, &mut ctx.raft_metrics); self.report_commit_log_duration(pre_commit_index, &mut ctx.raft_metrics); let persist_index = self.raft_group.raft.raft_log.persisted; diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index b5bb189d84b..71ab6a9e2a9 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -41,7 +41,7 @@ use tikv_util::{ box_err, debug, error, info, metrics::ThreadInfoStatistics, store::QueryStats, - sys::thread::StdThreadBuildWrapper, + sys::{thread::StdThreadBuildWrapper, SysQuota}, thd_name, time::{Instant as TiInstant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, @@ -225,6 +225,9 @@ pub struct StoreStat { pub store_cpu_usages: RecordPairVec, pub store_read_io_rates: RecordPairVec, pub store_write_io_rates: RecordPairVec, + + store_cpu_quota: f64, // quota of cpu usage + store_cpu_busy_thd: f64, } impl Default for StoreStat { @@ -249,10 +252,33 @@ impl Default for StoreStat { store_cpu_usages: RecordPairVec::default(), store_read_io_rates: RecordPairVec::default(), store_write_io_rates: RecordPairVec::default(), + + store_cpu_quota: 0.0_f64, + store_cpu_busy_thd: 0.8_f64, } } } +impl StoreStat { + fn set_cpu_quota(&mut self, cpu_cores: f64, busy_thd: f64) { + self.store_cpu_quota = cpu_cores * 100.0; + self.store_cpu_busy_thd = busy_thd; + } + + fn maybe_busy(&self) -> bool { + if self.store_cpu_quota < 1.0 || self.store_cpu_busy_thd > 1.0 { + return false; + } + + let mut cpu_usage = 0_u64; + for record in self.store_cpu_usages.iter() { + cpu_usage += record.get_value(); + } + + (cpu_usage as f64 / self.store_cpu_quota) >= self.store_cpu_busy_thd + } +} + #[derive(Default)] pub struct PeerStat { pub read_bytes: u64, @@ -836,14 +862,14 @@ impl SlowScore { } } - fn record(&mut self, id: u64, duration: Duration) { + fn record(&mut self, id: u64, duration: Duration, not_busy: bool) { self.last_record_time = Instant::now(); if id != self.last_tick_id { return; } self.last_tick_finished = true; self.total_requests += 1; - if duration >= self.inspect_interval { + if not_busy && duration >= self.inspect_interval { self.timeout_requests += 1; } } @@ -1027,6 +1053,8 @@ where causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_manager: GrpcServiceManager, ) -> Runner { + let mut store_stat = StoreStat::default(); + store_stat.set_cpu_quota(SysQuota::cpu_cores_quota(), cfg.inspect_cpu_util_thd); let store_heartbeat_interval = cfg.pd_store_heartbeat_tick_interval.0; let interval = store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT; let mut stats_monitor = StatsMonitor::new( @@ -1045,7 +1073,7 @@ where is_hb_receiver_scheduled: false, region_peers: HashMap::default(), region_buckets: HashMap::default(), - store_stat: StoreStat::default(), + store_stat, start_ts: UnixSecs::now(), scheduler, store_heartbeat_interval, @@ -2269,8 +2297,11 @@ where Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => { // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. - self.slow_score - .record(id, duration.delays_on_disk_io(false)); + self.slow_score.record( + id, + duration.delays_on_disk_io(false), + !self.store_stat.maybe_busy(), + ); self.slow_trend.record(duration); } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), @@ -2310,7 +2341,12 @@ where self.update_health_status(ServingStatus::Serving); } if !self.slow_score.last_tick_finished { - self.slow_score.record_timeout(); + // If the last tick is not finished, it means that the current store might + // be busy on handling requests or delayed on I/O operations. And only when + // the current store is not busy, it should record the last_tick as a timeout. + if !self.store_stat.maybe_busy() { + self.slow_score.record_timeout(); + } // If the last slow_score already reached abnormal state and was delayed for // reporting by `store-heartbeat` to PD, we should report it here manually as // a FAKE `store-heartbeat`. @@ -2342,17 +2378,17 @@ where STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_process"]) .observe(tikv_util::time::duration_to_sec( - duration.store_process_duration.unwrap(), + duration.store_process_duration.unwrap_or_default(), )); STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_wait"]) .observe(tikv_util::time::duration_to_sec( - duration.store_wait_duration.unwrap(), + duration.store_wait_duration.unwrap_or_default(), )); STORE_INSPECT_DURATION_HISTOGRAM .with_label_values(&["store_commit"]) .observe(tikv_util::time::duration_to_sec( - duration.store_commit_duration.unwrap(), + duration.store_commit_duration.unwrap_or_default(), )); STORE_INSPECT_DURATION_HISTOGRAM diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 5e7e4529c40..f1628cda50e 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -248,6 +248,7 @@ fn test_serde_custom_tikv_config() { io_reschedule_concurrent_max_count: 1234, io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), + inspect_cpu_util_thd: 0.666, check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index d1e83663c24..61a2a24b43a 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -3,7 +3,7 @@ slow-log-threshold = "1s" panic-when-unexpected-key-or-data = true abort-on-panic = true memory-usage-limit = "10GB" -memory-usage-high-water= 0.65 +memory-usage-high-water = 0.65 [log] level = "fatal" @@ -134,9 +134,7 @@ export-priority = "high" other-priority = "low" [pd] -endpoints = [ - "example.com:443", -] +endpoints = ["example.com:443"] [metric] job = "tikv_1" @@ -223,6 +221,7 @@ waterfall-metrics = true io-reschedule-concurrent-max-count = 1234 io-reschedule-hotpot-duration = "4321s" inspect-interval = "444ms" +inspect-cpu-util-thd = 0.666 check-leader-lease-interval = "123ms" renew-leader-lease-advance-duration = "456ms" reactive-memory-lock-tick-interval = "566ms" @@ -302,15 +301,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] bottommost-level-compression = "disable" bottommost-zstd-compression-dict-size = 1024 bottommost-zstd-compression-sample-size = 1024 @@ -374,15 +365,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 @@ -428,15 +411,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" write-buffer-limit = "16MB" max-write-buffer-number = 12 @@ -483,15 +458,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 @@ -569,15 +536,7 @@ bloom-filter-bits-per-key = 123 block-based-bloom-filter = true ribbon-filter-above-level = 1 read-amp-bytes-per-bit = 0 -compression-per-level = [ - "no", - "no", - "zstd", - "zstd", - "no", - "zstd", - "lz4", -] +compression-per-level = ["no", "no", "zstd", "zstd", "no", "zstd", "lz4"] write-buffer-size = "1MB" max-write-buffer-number = 12 min-write-buffer-number-to-merge = 12 @@ -638,9 +597,7 @@ ca-path = "invalid path" cert-path = "invalid path" key-path = "invalid path" redact-info-log = true -cert-allowed-cn = [ - "example.tikv.com", -] +cert-allowed-cn = ["example.tikv.com"] [security.encryption] data-encryption-method = "aes128-ctr" @@ -688,9 +645,9 @@ enable-compaction-filter = false compaction-filter-skip-version-check = true [pessimistic-txn] -enabled = false # test backward compatibility +enabled = false # test backward compatibility wait-for-lock-timeout = "10ms" -wake-up-delay-duration = 100 # test backward compatibility +wake-up-delay-duration = 100 # test backward compatibility pipelined = false in-memory = false From 3c2fcd7ce1852c2412e3aa196c3fd0c91ecd477f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 6 Dec 2023 10:35:48 +0800 Subject: [PATCH 1049/1149] metrics: fix heatmap on grafana 9 (#16145) close tikv/tikv#16144 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/common.py | 4 + metrics/grafana/tikv_details.json | 216 +++++++++++------------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 113 insertions(+), 109 deletions(-) diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py index cb6757bee93..2c2ed7570ed 100644 --- a/metrics/grafana/common.py +++ b/metrics/grafana/common.py @@ -940,6 +940,9 @@ def heatmap_panel( # the resolution is too high. # See: https://grafana.com/blog/2020/06/23/how-to-visualize-prometheus-histograms-in-grafana/ maxDataPoints=512, + # Fix grafana heatmap migration panic if options is null. + # See: https://github.com/grafana/grafana/blob/v9.5.14/public/app/plugins/panel/heatmap/migrations.ts#L17 + options={}, ) @@ -1081,6 +1084,7 @@ def heatmap_panel_graph_panel_histogram_quantile_pairs( description=graph_description, metric=f"{metric}", yaxes=yaxes(left_format=yaxis_format), + label_selectors=label_selectors, by_labels=graph_by_labels, hide_count=True, ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 6f8dffa27cd..11833e02ce0 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -4191,7 +4191,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -4496,7 +4496,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -4733,7 +4733,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -15259,7 +15259,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -15413,7 +15413,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15421,14 +15421,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15436,14 +15436,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -15451,14 +15451,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -15466,7 +15466,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -15564,7 +15564,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -15869,7 +15869,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -16174,7 +16174,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -16479,7 +16479,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -17639,7 +17639,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -17944,7 +17944,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -18249,7 +18249,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -18554,7 +18554,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -18658,7 +18658,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -19381,7 +19381,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -19485,7 +19485,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -23398,7 +23398,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -24413,7 +24413,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -24567,7 +24567,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24575,14 +24575,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24590,14 +24590,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -24605,14 +24605,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -24620,7 +24620,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -24718,7 +24718,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -24872,7 +24872,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24880,14 +24880,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24895,14 +24895,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -24910,14 +24910,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -24925,7 +24925,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -25023,7 +25023,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -25177,7 +25177,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25185,14 +25185,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25200,14 +25200,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -25215,14 +25215,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -25230,7 +25230,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -25328,7 +25328,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -25482,7 +25482,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25490,14 +25490,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25505,14 +25505,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -25520,14 +25520,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -25535,7 +25535,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -25633,7 +25633,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -25787,7 +25787,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25795,14 +25795,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25810,14 +25810,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -25825,14 +25825,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -25840,7 +25840,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -26997,7 +26997,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -29849,7 +29849,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -30542,7 +30542,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -34316,7 +34316,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -37131,7 +37131,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -43838,7 +43838,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -51579,7 +51579,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52302,7 +52302,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52406,7 +52406,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52510,7 +52510,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52747,7 +52747,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52851,7 +52851,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -52955,7 +52955,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -53458,7 +53458,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -54668,7 +54668,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -55038,7 +55038,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57015,7 +57015,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57119,7 +57119,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57371,7 +57371,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57475,7 +57475,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57579,7 +57579,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -57846,7 +57846,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59248,7 +59248,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59352,7 +59352,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59456,7 +59456,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59560,7 +59560,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59664,7 +59664,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59768,7 +59768,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -59872,7 +59872,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63511,7 +63511,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63615,7 +63615,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63719,7 +63719,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63823,7 +63823,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -63927,7 +63927,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -64031,7 +64031,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -64135,7 +64135,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -64239,7 +64239,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -65274,7 +65274,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, @@ -65378,7 +65378,7 @@ "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": null, + "options": {}, "repeat": null, "repeatDirection": null, "reverseYBuckets": false, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 81ae429e361..978a1d4c9aa 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -d496158baafb3f61d8f4dca2a8434031ad6092b93f3aeecb4fd2947df09a8caf ./metrics/grafana/tikv_details.json +ad780a5aca1d52f0f3a84780fc94af71195f3b8a59bc915ea5bf37a15ae5a357 ./metrics/grafana/tikv_details.json From c1f7c4aab03398517de4ebc589a24f2239b44463 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 6 Dec 2023 14:07:20 +0800 Subject: [PATCH 1050/1149] Fix the QPS metrics in tikv_details.json. (#16149) close tikv/tikv#16148 Fix the issue where the filter label of QPS, in the tikv grafana, lacks the label `type`. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.dashboard.py | 1 + metrics/grafana/tikv_details.json | 4 ++-- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 6ab065f5433..5c84152174e 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -316,6 +316,7 @@ def Cluster() -> RowPanel: expr=expr_sum_rate( "tikv_grpc_msg_duration_seconds_count", label_selectors=['type!="kv_gc"'], + by_labels=["instance", "type"], ), legend_format=r"{{instance}}-{{type}}", ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 11833e02ce0..8c6bea17992 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -1469,7 +1469,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, @@ -1477,7 +1477,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_grpc_msg_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"kv_gc\"}\n [$__rate_interval]\n)) by (instance, type) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 978a1d4c9aa..330822d3c7b 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -ad780a5aca1d52f0f3a84780fc94af71195f3b8a59bc915ea5bf37a15ae5a357 ./metrics/grafana/tikv_details.json +dbcc3ef2b588c133dbe4b56196abb366da5b25631f6d42bbc6ae1811b21bbec5 ./metrics/grafana/tikv_details.json From 54d7d425c9813a066308ddea85ee027e8faf5682 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 6 Dec 2023 17:02:50 +0800 Subject: [PATCH 1051/1149] resource_control: replace limited future's post delay with delay before first poll (#16100) ref tikv/tikv#16026 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/future.rs | 63 +++++++++++-------- .../resource_control/src/resource_limiter.rs | 37 ++++++----- components/resource_control/src/service.rs | 2 + components/resource_control/src/worker.rs | 31 +++++---- 4 files changed, 81 insertions(+), 52 deletions(-) diff --git a/components/resource_control/src/future.rs b/components/resource_control/src/future.rs index 53bca48b301..0750a21c574 100644 --- a/components/resource_control/src/future.rs +++ b/components/resource_control/src/future.rs @@ -92,7 +92,9 @@ pub struct LimitedFuture { #[pin] post_delay: OptionalFuture>, resource_limiter: Arc, - res: Poll, + // if the future is first polled, we need to let it consume a 0 value + // to compensate the debt of previously finished tasks. + is_first_poll: bool, } impl LimitedFuture { @@ -102,7 +104,7 @@ impl LimitedFuture { pre_delay: None.into(), post_delay: None.into(), resource_limiter, - res: Poll::Pending, + is_first_poll: true, } } } @@ -112,19 +114,32 @@ impl Future for LimitedFuture { fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let mut this = self.project(); - if !this.post_delay.is_done() { - assert!(this.pre_delay.is_done()); + if *this.is_first_poll { + debug_assert!(this.pre_delay.finished && this.post_delay.finished); + *this.is_first_poll = false; + let wait_dur = this + .resource_limiter + .consume(Duration::ZERO, IoBytes::default(), true) + .min(MAX_WAIT_DURATION); + if wait_dur > Duration::ZERO { + *this.pre_delay = Some( + GLOBAL_TIMER_HANDLE + .delay(std::time::Instant::now() + wait_dur) + .compat(), + ) + .into(); + } + } + if !this.post_delay.finished { + assert!(this.pre_delay.finished); std::mem::swap(&mut *this.pre_delay, &mut *this.post_delay); } - if !this.pre_delay.is_done() { + if !this.pre_delay.finished { let res = this.pre_delay.poll(cx); if res.is_pending() { return Poll::Pending; } } - if this.res.is_ready() { - return std::mem::replace(this.res, Poll::Pending); - } // get io stats is very expensive, so we only do so if only io control is // enabled. let mut last_io_bytes = None; @@ -157,8 +172,10 @@ impl Future for LimitedFuture { } else { IoBytes::default() }; - let mut wait_dur = this.resource_limiter.consume(dur, io_bytes); - if wait_dur == Duration::ZERO { + let mut wait_dur = this + .resource_limiter + .consume(dur, io_bytes, res.is_pending()); + if wait_dur == Duration::ZERO || res.is_ready() { return res; } if wait_dur > MAX_WAIT_DURATION { @@ -171,31 +188,24 @@ impl Future for LimitedFuture { .compat(), ) .into(); - if this.post_delay.poll(cx).is_ready() { - return res; - } - *this.res = res; + _ = this.post_delay.poll(cx); Poll::Pending } } /// `OptionalFuture` is similar to futures::OptionFuture, but provide an extra -/// `is_done` method. +/// `finished` flag to determine if the future requires poll. #[pin_project] struct OptionalFuture { #[pin] f: Option, - done: bool, + finished: bool, } impl OptionalFuture { fn new(f: Option) -> Self { - let done = f.is_none(); - Self { f, done } - } - - fn is_done(&self) -> bool { - self.done + let finished = f.is_none(); + Self { f, finished } } } @@ -212,7 +222,7 @@ impl Future for OptionalFuture { let this = self.project(); match this.f.as_pin_mut() { Some(x) => x.poll(cx).map(|r| { - *this.done = true; + *this.finished = true; Some(r) }), None => Poll::Ready(None), @@ -312,7 +322,7 @@ mod tests { let delta = new_stats - stats; let dur = start.saturating_elapsed(); assert_eq!(delta.total_consumed, 150); - assert_eq!(delta.total_wait_dur_us, 150_000); + assert!(delta.total_wait_dur_us >= 140_000 && delta.total_wait_dur_us <= 160_000); assert!(dur >= Duration::from_millis(150) && dur <= Duration::from_millis(160)); // fetch io bytes failed, consumed value is 0. @@ -320,7 +330,10 @@ mod tests { { fail::cfg("failed_to_get_thread_io_bytes_stats", "1*return").unwrap(); spawn_and_wait(&pool, empty(), resource_limiter.clone()); - assert_eq!(resource_limiter.get_limit_statistics(Io), new_stats); + assert_eq!( + resource_limiter.get_limit_statistics(Io).total_consumed, + new_stats.total_consumed + ); fail::remove("failed_to_get_thread_io_bytes_stats"); } } diff --git a/components/resource_control/src/resource_limiter.rs b/components/resource_control/src/resource_limiter.rs index bce6867ac2e..ab2144f18cc 100644 --- a/components/resource_control/src/resource_limiter.rs +++ b/components/resource_control/src/resource_limiter.rs @@ -71,19 +71,22 @@ impl ResourceLimiter { self.is_background } - pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { + pub fn consume(&self, cpu_time: Duration, io_bytes: IoBytes, wait: bool) -> Duration { let cpu_dur = - self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64); - let io_dur = self.limiters[ResourceType::Io as usize].consume_io(io_bytes); + self.limiters[ResourceType::Cpu as usize].consume(cpu_time.as_micros() as u64, wait); + let io_dur = self.limiters[ResourceType::Io as usize].consume_io(io_bytes, wait); let wait_dur = cpu_dur.max(io_dur); - BACKGROUND_TASKS_WAIT_DURATION - .with_label_values(&[&self.name]) - .inc_by(wait_dur.as_micros() as u64); + if wait_dur > Duration::ZERO { + BACKGROUND_TASKS_WAIT_DURATION + .with_label_values(&[&self.name]) + .inc_by(wait_dur.as_micros() as u64); + } + wait_dur } pub async fn async_consume(&self, cpu_time: Duration, io_bytes: IoBytes) -> Duration { - let dur = self.consume(cpu_time, io_bytes); + let dur = self.consume(cpu_time, io_bytes, true); if !dur.is_zero() { _ = GLOBAL_TIMER_HANDLE .delay(Instant::now() + dur) @@ -154,12 +157,14 @@ impl QuotaLimiter { ) } - fn consume(&self, value: u64) -> Duration { - if value == 0 { + fn consume(&self, value: u64, wait: bool) -> Duration { + if value == 0 && self.limiter.speed_limit().is_infinite() { return Duration::ZERO; } - let dur = self.limiter.consume_duration(value as usize); - if dur != Duration::ZERO { + let mut dur = self.limiter.consume_duration(value as usize); + if !wait { + dur = Duration::ZERO; + } else if dur != Duration::ZERO { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } @@ -167,16 +172,18 @@ impl QuotaLimiter { dur } - fn consume_io(&self, value: IoBytes) -> Duration { + fn consume_io(&self, value: IoBytes, wait: bool) -> Duration { self.read_bytes.fetch_add(value.read, Ordering::Relaxed); self.write_bytes.fetch_add(value.write, Ordering::Relaxed); let value = value.read + value.write; - if value == 0 { + if value == 0 && self.limiter.speed_limit().is_infinite() { return Duration::ZERO; } - let dur = self.limiter.consume_duration(value as usize); - if dur != Duration::ZERO { + let mut dur = self.limiter.consume_duration(value as usize); + if !wait { + dur = Duration::ZERO; + } else if dur != Duration::ZERO { self.total_wait_dur_us .fetch_add(dur.as_micros() as u64, Ordering::Relaxed); } diff --git a/components/resource_control/src/service.rs b/components/resource_control/src/service.rs index 2c2bbdc5549..26652cda00e 100644 --- a/components/resource_control/src/service.rs +++ b/components/resource_control/src/service.rs @@ -575,6 +575,7 @@ pub mod tests { read: 1000, write: 1000, }, + true, ); // Wait for report ru metrics. std::thread::sleep(Duration::from_millis(100)); @@ -595,6 +596,7 @@ pub mod tests { read: 2000, write: 2000, }, + true, ); // Wait for report ru metrics. std::thread::sleep(Duration::from_millis(100)); diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index 2ea72f132ee..b90787914d6 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -700,6 +700,7 @@ mod tests { read: 1000, write: 1000, }, + true, ); worker.adjust_quota(); check_limiter( @@ -729,6 +730,7 @@ mod tests { read: 1000, write: 1000, }, + true, ); worker.adjust_quota(); check_limiter( @@ -747,6 +749,7 @@ mod tests { read: 5000, write: 5000, }, + true, ); worker.adjust_quota(); check_limiter( @@ -798,6 +801,7 @@ mod tests { read: 600, write: 600, }, + true, ); bg_limiter.consume( Duration::from_millis(1800), @@ -805,6 +809,7 @@ mod tests { read: 900, write: 900, }, + true, ); worker.adjust_quota(); check_limiter( @@ -873,6 +878,7 @@ mod tests { read: 600, write: 600, }, + true, ); new_bg_limiter.consume( Duration::from_millis(1800), @@ -880,6 +886,7 @@ mod tests { read: 900, write: 900, }, + true, ); worker.adjust_quota(); @@ -954,7 +961,7 @@ mod tests { // only default group, always return infinity. reset_quota(&mut worker, 6.4); - priority_limiters[1].consume(Duration::from_secs(50), IoBytes::default()); + priority_limiters[1].consume(Duration::from_secs(50), IoBytes::default(), true); worker.adjust(); check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); @@ -964,46 +971,46 @@ mod tests { resource_ctl.add_resource_group(rg2); reset_quota(&mut worker, 6.4); - priority_limiters[1].consume(Duration::from_secs(64), IoBytes::default()); + priority_limiters[1].consume(Duration::from_secs(64), IoBytes::default(), true); worker.adjust(); check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); reset_quota(&mut worker, 6.4); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[1].consume(Duration::from_millis(400), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[1].consume(Duration::from_millis(400), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 5.2, 1.2); reset_quota(&mut worker, 6.4); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(120), IoBytes::default()); - priority_limiters[1].consume(Duration::from_millis(200), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(120), IoBytes::default(), true); + priority_limiters[1].consume(Duration::from_millis(200), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 2.6, 0.6); reset_quota(&mut worker, 6.4); for _i in 0..100 { - priority_limiters[2].consume(Duration::from_millis(200), IoBytes::default()); + priority_limiters[2].consume(Duration::from_millis(200), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, f64::INFINITY, f64::INFINITY); reset_quota(&mut worker, 8.0); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[1].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[2].consume(Duration::from_millis(320), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[1].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[2].consume(Duration::from_millis(320), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 5.2, 2.8); reset_quota(&mut worker, 6.0); for _i in 0..100 { - priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default()); - priority_limiters[2].consume(Duration::from_millis(360), IoBytes::default()); + priority_limiters[0].consume(Duration::from_millis(240), IoBytes::default(), true); + priority_limiters[2].consume(Duration::from_millis(360), IoBytes::default(), true); } worker.adjust(); check_limiter(f64::INFINITY, 5.2, 5.2); From 8218b14797c8bdb0cb54ca0830448cafc56edd29 Mon Sep 17 00:00:00 2001 From: Shirly Date: Wed, 6 Dec 2023 17:19:21 +0800 Subject: [PATCH 1052/1149] raftstore/util: fix errors in comments of get_min_resolved_ts (#16140) ref tikv/tikv#13437 Signed-off-by: Shirly Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/util.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index d1ef3fde75a..6eef4c61686 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1193,14 +1193,15 @@ impl RegionReadProgressRegistry { } // Get the minimum `resolved_ts` which could ensure that there will be no more - // locks whose `start_ts` is greater than it. + // locks whose `commit_ts` is smaller than it. pub fn get_min_resolved_ts(&self) -> u64 { self.registry .lock() .unwrap() .iter() .map(|(_, rrp)| rrp.resolved_ts()) - .filter(|ts| *ts != 0) // ts == 0 means the peer is uninitialized + //TODO: the uninitialized peer should be taken into consideration instead of skipping it(https://github.com/tikv/tikv/issues/15506). + .filter(|ts| *ts != 0) // ts == 0 means the peer is uninitialized, .min() .unwrap_or(0) } From 213299221806959c95d05d0f2d7368e597fa9281 Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 6 Dec 2023 19:37:20 +0800 Subject: [PATCH 1053/1149] server: Remove periodic heap profiling (#16151) ref tikv/tikv#15927 Remove periodic heap profiling Signed-off-by: Connor1996 --- components/server/src/server.rs | 1 - components/server/src/server2.rs | 1 - doc/http.md | 58 +---- src/config/mod.rs | 12 +- src/server/status_server/mod.rs | 160 ++---------- src/server/status_server/profile.rs | 269 +-------------------- tests/integrations/server/status_server.rs | 1 - 7 files changed, 37 insertions(+), 465 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 059cda0bb91..00ab39a0e6a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1450,7 +1450,6 @@ where self.cfg_controller.take().unwrap(), Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), - self.core.store_path.clone(), self.resource_manager.clone(), self.grpc_service_mgr.clone(), ) { diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 636a4bc9282..38f5e94038f 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1296,7 +1296,6 @@ where self.cfg_controller.clone().unwrap(), Arc::new(self.core.config.security.clone()), self.engines.as_ref().unwrap().engine.raft_extension(), - self.core.store_path.clone(), self.resource_manager.clone(), self.grpc_service_mgr.clone(), ) { diff --git a/doc/http.md b/doc/http.md index 5aff02e75ea..625af034091 100644 --- a/doc/http.md +++ b/doc/http.md @@ -36,76 +36,25 @@ The server will return CPU profiling data. The response format is determined by The raw profile data can be handled by `pprof` tool. For example, use `go tool pprof --http=0.0.0.0:1234 xxx.proto` to open a interactive web browser. -## Activate Heap Profiling - -Activate heap profiling of jemalloc. When activated, jemalloc would collect memory usage at malloc, demalloc, etc., walking the call stack to capture a backtrace. So it would affect performance in some extent. - -```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_activate?interval=' -``` - -#### Parameters - -- **interval** (optional): Specifies the interval (in seconds) for dumping heap profiles in a temporary directory under TiKV data directory. If set to 0, period dumping is disable. You can dump heap profiles manually by the other API. - - Default: 0 - - Example: `?interval=60` - -#### Response - -A confirmation message indicating whether heap profiling activation was successful. If it has been already activated, it would return a error message without any side effect. - -## Deactivate Heap Profiling - -Deactivate the currently running heap profiling. - -```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_deactivate' -``` - -#### Response - -If heap profiling is active, it will be stopped. The server will return a message indicating whether the deactivation was successful. -If heap profiling is not currently active, the server will return a message indicating that no heap profiling is running. - -## List Heap Profiles - -List available heap profiling profiles which are periodically dumped when activated by `heap_activate` API with `interval` specified. - -Note that, once deactivation is performed, all existing profiles will be deleted. - -```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap_list' -``` - -#### Response - -It will return a list of profiles, each represented as a file name and last modification timestamp, in plain text format. The profiles are sorted in reverse order based on their modification timestamps. - -If there are no available heap profiles or heap profiling is inactive, the server will return an empty list. - -## Retrieve Heap Profile +## Heap Profiling Collect and export heap profiling data. Note that, heap profile is not like CPU profile which is collected within the specified time range right after the request. Instead, heap profile is just a snapshot of the accumulated memory usage at the time of request, as the memory usage is always being collected once activated. ```bash -curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap?name=&jeprof=' +curl -X GET 'http://$TIKV_ADDRESS/debug/pprof/heap?jeprof=' ``` #### Parameters -- **name** (optional): Specifies the name of the heap profile to retrieve. If not specified, a heap profile will be retrieved. - - Default: `` - - Example: `?name=000001.heap` - - **jeprof** (optional): Indicates whether to use Jeprof to process the heap profile to generate call graph. It needs `perl` being installed. - Default: false - Example: `?jeprof=true` #### Response -The server will return heap profiling data. The response format is determined by the `jeprof` parameter. If true, the response will be a call graph in SVG format. Otherwise, the response will be raw profile data in jemalloc dedicated format. +The server will return heap profiling data. The response format is determined by the `jeprof` parameter. If true, the response will be a call graph in SVG format generated by `jeprof` needing `perl` installed in the TiKV environment. Otherwise, the response will be raw profile data in jemalloc dedicated format. ## Heap Profile Symbolization @@ -139,4 +88,3 @@ curl -X POST -d '' 'http://$TIKV_ADDRESS/debug/pprof/symbol' #### Response A list of resolved symbols in plain text. Each line represented as a hexadecimal address followed by the corresponding function name. If a memory address cannot be resolved, it will be marked with "??". - diff --git a/src/config/mod.rs b/src/config/mod.rs index e5df8c3e153..aac91aaaedf 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -82,7 +82,6 @@ use crate::{ server::{ gc_worker::{GcConfig, RawCompactionFilterFactory, WriteCompactionFilterFactory}, lock_manager::Config as PessimisticTxnConfig, - status_server::HEAP_PROFILE_ACTIVE, ttl::TtlCompactionFilterFactory, Config as ServerConfig, CONFIG_ROCKSDB_GAUGE, }, @@ -3272,12 +3271,10 @@ impl Default for MemoryConfig { impl MemoryConfig { pub fn init(&self) { if self.enable_heap_profiling { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); if let Err(e) = tikv_alloc::activate_prof() { error!("failed to enable heap profiling"; "err" => ?e); return; } - *activate = Some(None); tikv_alloc::set_prof_sample(self.profiling_sample_per_bytes.0).unwrap(); } } @@ -3289,16 +3286,9 @@ impl ConfigManager for MemoryConfigManager { fn dispatch(&mut self, changes: ConfigChange) -> CfgResult<()> { if let Some(ConfigValue::Bool(enable)) = changes.get("enable_heap_profiling") { if *enable { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); - // already enabled by HTTP API, do nothing - if activate.is_none() { - tikv_alloc::activate_prof()?; - *activate = Some(None); - } + tikv_alloc::activate_prof()?; } else { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); tikv_alloc::deactivate_prof()?; - *activate = None; } } diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 60b267a6d94..9a2bb5743ae 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -6,7 +6,6 @@ use std::{ env::args, error::Error as StdError, net::SocketAddr, - path::PathBuf, pin::Pin, str::{self, FromStr}, sync::Arc, @@ -18,7 +17,7 @@ use async_stream::stream; use collections::HashMap; use flate2::{write::GzEncoder, Compression}; use futures::{ - compat::{Compat01As03, Stream01CompatExt}, + compat::Compat01As03, future::{ok, poll_fn}, prelude::*, }; @@ -40,7 +39,6 @@ use openssl::{ x509::X509, }; use pin_project::pin_project; -pub use profile::HEAP_PROFILE_ACTIVE; use profile::*; use prometheus::TEXT_FORMAT; use regex::Regex; @@ -57,7 +55,7 @@ use tikv_util::{ }; use tokio::{ io::{AsyncRead, AsyncWrite}, - runtime::{Builder, Handle, Runtime}, + runtime::{Builder, Runtime}, sync::oneshot::{self, Receiver, Sender}, }; use tokio_openssl::SslStream; @@ -91,7 +89,6 @@ pub struct StatusServer { cfg_controller: ConfigController, router: R, security_config: Arc, - store_path: PathBuf, resource_manager: Option>, grpc_service_mgr: GrpcServiceManager, } @@ -105,7 +102,6 @@ where cfg_controller: ConfigController, security_config: Arc, router: R, - store_path: PathBuf, resource_manager: Option>, grpc_service_mgr: GrpcServiceManager, ) -> Result { @@ -128,120 +124,27 @@ where cfg_controller, router, security_config, - store_path, resource_manager, grpc_service_mgr, }) } - fn list_heap_prof(_req: Request) -> hyper::Result> { - let profiles = match list_heap_profiles() { - Ok(s) => s, - Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), - }; - - let text = profiles - .into_iter() - .map(|(f, ct)| format!("{}\t\t{}", f, ct)) - .collect::>() - .join("\n") - .into_bytes(); - - let response = Response::builder() - .header("Content-Type", mime::TEXT_PLAIN.to_string()) - .header("Content-Length", text.len()) - .body(text.into()) - .unwrap(); - Ok(response) - } - - async fn activate_heap_prof( - req: Request, - store_path: PathBuf, - ) -> hyper::Result> { - let query = req.uri().query().unwrap_or(""); - let query_pairs: HashMap<_, _> = url::form_urlencoded::parse(query.as_bytes()).collect(); - - let interval: u64 = match query_pairs.get("interval") { - Some(val) => match val.parse() { - Ok(val) => val, - Err(err) => return Ok(make_response(StatusCode::BAD_REQUEST, err.to_string())), - }, - None => 0, - }; - - let period = if interval == 0 { - None - } else { - let interval = Duration::from_secs(interval); - Some( - GLOBAL_TIMER_HANDLE - .interval(Instant::now() + interval, interval) - .compat() - .map_ok(|_| ()) - .map_err(|_| TIMER_CANCELED.to_owned()) - .into_stream(), - ) - }; - let (tx, rx) = oneshot::channel(); - let callback = move || tx.send(()).unwrap_or_default(); - let res = Handle::current().spawn(activate_heap_profile(period, store_path, callback)); - if rx.await.is_ok() { - let msg = "activate heap profile success"; - Ok(make_response(StatusCode::OK, msg)) - } else { - let errmsg = format!("{:?}", res.await); - Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, errmsg)) - } - } - - fn deactivate_heap_prof(_req: Request) -> hyper::Result> { - let body = if deactivate_heap_profile() { - "deactivate heap profile success" - } else { - "no heap profile is running" - }; - Ok(make_response(StatusCode::OK, body)) - } - async fn dump_heap_prof_to_resp(req: Request) -> hyper::Result> { let query = req.uri().query().unwrap_or(""); let query_pairs: HashMap<_, _> = url::form_urlencoded::parse(query.as_bytes()).collect(); let use_jeprof = query_pairs.get("jeprof").map(|x| x.as_ref()) == Some("true"); - let result = if let Some(name) = query_pairs.get("name") { - let re = Regex::new(HEAP_PROFILE_REGEX).unwrap(); - if !re.is_match(name) { - let errmsg = format!("heap profile name {} is invalid", name); - return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); - } - let profiles = match list_heap_profiles() { - Ok(s) => s, + let result = { + let path = match dump_one_heap_profile() { + Ok(path) => path, Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), }; - if profiles.iter().any(|(f, _)| f == name) { - let dir = match heap_profiles_dir() { - Some(path) => path, - None => { - return Ok(make_response( - StatusCode::INTERNAL_SERVER_ERROR, - "heap profile is not active", - )); - } - }; - let path = dir.join(name.as_ref()); - if use_jeprof { - jeprof_heap_profile(path.to_str().unwrap()) - } else { - read_file(path.to_str().unwrap()) - } + if use_jeprof { + jeprof_heap_profile(path.to_str().unwrap()) } else { - let errmsg = format!("heap profile {} not found", name); - return Ok(make_response(StatusCode::BAD_REQUEST, errmsg)); + read_file(path.to_str().unwrap()) } - } else { - dump_one_heap_profile() }; match result { @@ -692,7 +595,6 @@ where let security_config = self.security_config.clone(); let cfg_controller = self.cfg_controller.clone(); let router = self.router.clone(); - let store_path = self.store_path.clone(); let resource_manager = self.resource_manager.clone(); let grpc_service_mgr = self.grpc_service_mgr.clone(); // Start to serve. @@ -701,7 +603,6 @@ where let security_config = security_config.clone(); let cfg_controller = cfg_controller.clone(); let router = router.clone(); - let store_path = store_path.clone(); let resource_manager = resource_manager.clone(); let grpc_service_mgr = grpc_service_mgr.clone(); async move { @@ -711,7 +612,6 @@ where let security_config = security_config.clone(); let cfg_controller = cfg_controller.clone(); let router = router.clone(); - let store_path = store_path.clone(); let resource_manager = resource_manager.clone(); let grpc_service_mgr = grpc_service_mgr.clone(); async move { @@ -749,12 +649,23 @@ where Self::handle_get_metrics(req, &cfg_controller) } (Method::GET, "/status") => Ok(Response::default()), - (Method::GET, "/debug/pprof/heap_list") => Self::list_heap_prof(req), + (Method::GET, "/debug/pprof/heap_list") => { + Ok(make_response( + StatusCode::GONE, + "Deprecated, heap profiling is always enabled by default, just use /debug/pprof/heap to get the heap profile when needed", + )) + } (Method::GET, "/debug/pprof/heap_activate") => { - Self::activate_heap_prof(req, store_path).await + Ok(make_response( + StatusCode::GONE, + "Deprecated, use config `memory.enable_heap_profiling` to toggle", + )) } (Method::GET, "/debug/pprof/heap_deactivate") => { - Self::deactivate_heap_prof(req) + Ok(make_response( + StatusCode::GONE, + "Deprecated, use config `memory.enable_heap_profiling` to toggle", + )) } (Method::GET, "/debug/pprof/heap") => { Self::dump_heap_prof_to_resp(req).await @@ -1221,13 +1132,11 @@ mod tests { #[test] fn test_status_service() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1271,13 +1180,11 @@ mod tests { #[test] fn test_config_endpoint() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1330,7 +1237,6 @@ mod tests { ConfigController::new(config), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1388,13 +1294,11 @@ mod tests { #[test] fn test_status_service_fail_endpoints() { let _guard = fail::FailScenario::setup(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1506,13 +1410,11 @@ mod tests { #[test] fn test_status_service_fail_endpoints_can_trigger_fails() { let _guard = fail::FailScenario::setup(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1552,13 +1454,11 @@ mod tests { #[test] fn test_status_service_fail_endpoints_should_give_404_when_failpoints_are_disable() { let _guard = fail::FailScenario::setup(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1590,13 +1490,11 @@ mod tests { } fn do_test_security_status_service(allowed_cn: HashSet, expected: bool) { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(new_security_cfg(Some(allowed_cn))), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1665,13 +1563,11 @@ mod tests { #[test] #[ignore] fn test_pprof_heap_service() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1697,13 +1593,11 @@ mod tests { #[test] fn test_pprof_profile_service() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1732,13 +1626,11 @@ mod tests { #[test] fn test_pprof_symbol_service() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1785,13 +1677,11 @@ mod tests { #[test] fn test_metrics() { let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1842,13 +1732,11 @@ mod tests { #[test] fn test_change_log_level() { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::default(), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1898,13 +1786,11 @@ mod tests { let cfgs = [TikvConfig::default(), multi_rocks_cfg]; let resp_strs = ["raft-kv", "partitioned-raft-kv"]; for (cfg, resp_str) in IntoIterator::into_iter(cfgs).zip(resp_strs) { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::new(cfg), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) @@ -1937,13 +1823,11 @@ mod tests { multi_rocks_cfg.storage.engine = EngineType::RaftKv2; let cfgs = [TikvConfig::default(), multi_rocks_cfg]; for cfg in IntoIterator::into_iter(cfgs) { - let temp_dir = tempfile::TempDir::new().unwrap(); let mut status_server = StatusServer::new( 1, ConfigController::new(cfg), Arc::new(SecurityConfig::default()), MockRouter, - temp_dir.path().to_path_buf(), None, GrpcServiceManager::dummy(), ) diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index dbf819b35fe..7d7e90741e4 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -1,43 +1,33 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - fs::{File, Metadata}, + fs::File, io::{Read, Write}, path::PathBuf, pin::Pin, process::{Command, Stdio}, sync::Mutex, - time::{Duration, UNIX_EPOCH}, }; -use chrono::{offset::Local, DateTime}; use futures::{ - channel::oneshot::{self, Sender}, future::BoxFuture, - select, task::{Context, Poll}, - Future, FutureExt, Stream, StreamExt, + Future, FutureExt, }; use lazy_static::lazy_static; use pprof::protos::Message; use regex::Regex; -use tempfile::{NamedTempFile, TempDir}; +use tempfile::NamedTempFile; #[cfg(not(test))] -use tikv_alloc::{activate_prof, deactivate_prof, dump_prof}; +use tikv_alloc::dump_prof; #[cfg(test)] -pub use self::test_utils::TEST_PROFILE_MUTEX; +use self::test_utils::dump_prof; #[cfg(test)] -use self::test_utils::{activate_prof, deactivate_prof, dump_prof}; - -// File name suffix for periodically dumped heap profiles. -pub const HEAP_PROFILE_SUFFIX: &str = ".heap"; -pub const HEAP_PROFILE_REGEX: &str = r"^[0-9]{6,6}\.heap$"; +pub use self::test_utils::TEST_PROFILE_MUTEX; lazy_static! { // If it's some it means there are already a CPU profiling. static ref CPU_PROFILE_ACTIVE: Mutex> = Mutex::new(None); - // If it's some it means there are already a heap profiling. The channel is used to deactivate a profiling. - pub static ref HEAP_PROFILE_ACTIVE: Mutex, TempDir)>>> = Mutex::new(None); // To normalize thread names. static ref THREAD_NAME_RE: Regex = @@ -93,91 +83,11 @@ impl Future for ProfileRunner { } /// Trigger a heap profile and return the content. -pub fn dump_one_heap_profile() -> Result, String> { - if HEAP_PROFILE_ACTIVE.lock().unwrap().is_none() { - return Err("heap profiling is not activated".to_owned()); - } +pub fn dump_one_heap_profile() -> Result { let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; - let path = f.path().to_str().unwrap(); - dump_prof(path).map_err(|e| format!("dump_prof: {}", e))?; - read_file(path) -} - -/// Activate heap profile and call `callback` if successfully. -/// `deactivate_heap_profile` can only be called after it's notified from -/// `callback`. -pub async fn activate_heap_profile( - dump_period: Option, - store_path: PathBuf, - callback: F, -) -> Result<(), String> -where - S: Stream> + Send + Unpin + 'static, - F: FnOnce() + Send + 'static, -{ - if HEAP_PROFILE_ACTIVE.lock().unwrap().is_some() { - return Err("Already in Heap Profiling".to_owned()); - } - - let (tx, rx) = oneshot::channel(); - let dir = tempfile::Builder::new() - .prefix("heap-") - .tempdir_in(store_path) - .map_err(|e| format!("create temp directory: {}", e))?; - let dir_path = dir.path().to_str().unwrap().to_owned(); - - let on_start = move || { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); - assert!(activate.is_none()); - *activate = Some(Some((tx, dir))); - activate_prof().map_err(|e| format!("activate_prof: {}", e))?; - callback(); - info!("periodical heap profiling is started"); - Ok(()) - }; - - let on_end = |_| { - let res = deactivate_prof().map_err(|e| format!("deactivate_prof: {}", e)); - *HEAP_PROFILE_ACTIVE.lock().unwrap() = None; - res - }; - - let end = async move { - if let Some(dump_period) = dump_period { - select! { - _ = rx.fuse() => { - info!("periodical heap profiling is canceled"); - Ok(()) - }, - res = dump_heap_profile_periodically(dump_period, dir_path).fuse() => { - warn!("the heap profiling dump loop shouldn't break"; "res" => ?res); - res - } - } - } else { - let _ = rx.await; - info!("periodical heap profiling is canceled"); - Ok(()) - } - }; - - ProfileRunner::new(on_start, on_end, end.boxed())?.await -} - -/// Deactivate heap profile. Return `false` if it hasn't been activated. -pub fn deactivate_heap_profile() -> bool { - let mut activate = HEAP_PROFILE_ACTIVE.lock().unwrap(); - match activate.as_mut() { - Some(tx) => { - if let Some((tx, _)) = tx.take() { - let _ = tx.send(()); - } else { - *activate = None; - } - true - } - None => false, - } + let path = f.path(); + dump_prof(path.to_str().unwrap()).map_err(|e| format!("dump_prof: {}", e))?; + Ok(path.to_owned()) } /// Trigger one cpu profile. @@ -274,57 +184,6 @@ pub fn jeprof_heap_profile(path: &str) -> Result, String> { Ok(output.stdout) } -pub fn heap_profiles_dir() -> Option { - HEAP_PROFILE_ACTIVE - .lock() - .unwrap() - .as_ref() - .and_then(|v| v.as_ref().map(|(_, dir)| dir.path().to_owned())) -} - -pub fn list_heap_profiles() -> Result, String> { - let path = match heap_profiles_dir() { - Some(path) => path.into_os_string().into_string().unwrap(), - None => return Ok(vec![]), - }; - - let dir = std::fs::read_dir(path).map_err(|e| format!("read dir fail: {}", e))?; - let mut profiles = Vec::new(); - for item in dir { - let item = match item { - Ok(x) => x, - _ => continue, - }; - let f = item.file_name().to_str().unwrap().to_owned(); - if !f.ends_with(HEAP_PROFILE_SUFFIX) { - continue; - } - let ct = item.metadata().map(|x| last_change_epoch(&x)).unwrap(); - let dt = DateTime::::from(UNIX_EPOCH + Duration::from_secs(ct)); - profiles.push((f, dt.format("%Y-%m-%d %H:%M:%S").to_string())); - } - - // Reverse sort them. - profiles.sort_by(|x, y| y.1.cmp(&x.1)); - info!("list_heap_profiles gets {} items", profiles.len()); - Ok(profiles) -} - -async fn dump_heap_profile_periodically(mut period: S, dir: String) -> Result<(), String> -where - S: Stream> + Send + Unpin + 'static, -{ - let mut id = 0; - while let Some(res) = period.next().await { - res?; - id += 1; - let path = format!("{}/{:0>6}{}", dir, id, HEAP_PROFILE_SUFFIX); - dump_prof(&path).map_err(|e| format!("dump_prof: {}", e))?; - info!("a heap profile is dumped to {}", path); - } - Ok(()) -} - fn extract_thread_name(thread_name: &str) -> String { THREAD_NAME_RE .captures(thread_name) @@ -350,43 +209,18 @@ mod test_utils { pub static ref TEST_PROFILE_MUTEX: Mutex<()> = Mutex::new(()); } - pub fn activate_prof() -> ProfResult<()> { - Ok(()) - } - pub fn deactivate_prof() -> ProfResult<()> { - Ok(()) - } pub fn dump_prof(_: &str) -> ProfResult<()> { Ok(()) } } -#[cfg(unix)] -fn last_change_epoch(metadata: &Metadata) -> u64 { - use std::os::unix::fs::MetadataExt; - metadata.ctime() as u64 -} - -#[cfg(not(unix))] -fn last_change_epoch(metadata: &Metadata) -> u64 { - 0 -} - #[cfg(test)] mod tests { - use std::sync::mpsc::sync_channel; - - use futures::{channel::mpsc, executor::block_on, SinkExt}; + use futures::executor::block_on; use tokio::runtime; use super::*; - #[test] - fn test_last_change_epoch() { - let f = tempfile::tempfile().unwrap(); - assert!(last_change_epoch(&f.metadata().unwrap()) > 0); - } - #[test] fn test_extract_thread_name() { assert_eq!(&extract_thread_name("test-name-1"), "test-name"); @@ -424,86 +258,5 @@ mod tests { drop(tx1); block_on(res1).unwrap().unwrap_err(); - - let expected = "Already in Heap Profiling"; - - let (tx1, rx1) = mpsc::channel(1); - let res1 = rt.spawn(activate_heap_profile( - Some(rx1), - std::env::temp_dir(), - || {}, - )); - thread::sleep(Duration::from_millis(100)); - - let (_tx2, rx2) = mpsc::channel(1); - let res2 = rt.spawn(activate_heap_profile( - Some(rx2), - std::env::temp_dir(), - || {}, - )); - assert_eq!(block_on(res2).unwrap().unwrap_err(), expected); - - drop(tx1); - block_on(res1).unwrap().unwrap(); - } - - #[test] - fn test_profile_guard_toggle() { - let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let rt = runtime::Builder::new_multi_thread() - .worker_threads(4) - .build() - .unwrap(); - - // Test activated profiling can be stopped by canceling the period stream. - let (tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); - drop(tx); - block_on(res).unwrap().unwrap(); - - // Test activated profiling can be stopped by the handle. - let (tx, rx) = sync_channel::(1); - let on_activated = move || drop(tx); - let check_activated = move || rx.recv().is_err(); - - let (_tx, _rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile( - Some(_rx), - std::env::temp_dir(), - on_activated, - )); - assert!(check_activated()); - assert!(deactivate_heap_profile()); - block_on(res).unwrap().unwrap(); - } - - #[test] - fn test_heap_profile_exit() { - let _test_guard = TEST_PROFILE_MUTEX.lock().unwrap(); - let rt = runtime::Builder::new_multi_thread() - .worker_threads(4) - .build() - .unwrap(); - - // Test heap profiling can be stopped by sending an error. - let (mut tx, rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile(Some(rx), std::env::temp_dir(), || {})); - block_on(tx.send(Err("test".to_string()))).unwrap(); - block_on(res).unwrap().unwrap_err(); - - // Test heap profiling can be activated again. - let (tx, rx) = sync_channel::(1); - let on_activated = move || drop(tx); - let check_activated = move || rx.recv().is_err(); - - let (_tx, _rx) = mpsc::channel(1); - let res = rt.spawn(activate_heap_profile( - Some(_rx), - std::env::temp_dir(), - on_activated, - )); - assert!(check_activated()); - assert!(deactivate_heap_profile()); - block_on(res).unwrap().unwrap(); } } diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index 3e1fbd4a9e8..a2921f77b95 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -45,7 +45,6 @@ fn test_region_meta_endpoint() { ConfigController::default(), Arc::new(SecurityConfig::default()), router, - std::env::temp_dir(), None, GrpcServiceManager::dummy(), ) From eefbdcba61bd3847a302fafcade6bf5f05627c35 Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 11 Dec 2023 16:51:18 +0800 Subject: [PATCH 1054/1149] *: uniform deadline exceeded error in cop response (#16155) close tikv/tikv#16154 Signed-off-by: crazycs520 --- components/raftstore/src/errors.rs | 25 ++++++++++++++- components/tikv_util/src/deadline.rs | 9 ++++++ src/coprocessor/endpoint.rs | 35 ++++++++++++++++++--- src/storage/errors.rs | 8 ++--- tests/failpoints/cases/test_coprocessor.rs | 36 ++++++++++++++++------ 5 files changed, 93 insertions(+), 20 deletions(-) diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index d1597a77121..49a52de26e1 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -7,7 +7,10 @@ use error_code::{self, ErrorCode, ErrorCodeExt}; use kvproto::{errorpb, metapb, raft_serverpb}; use protobuf::ProtobufError; use thiserror::Error; -use tikv_util::{codec, deadline::DeadlineError}; +use tikv_util::{ + codec, + deadline::{set_deadline_exceeded_busy_error, DeadlineError}, +}; use super::{coprocessor::Error as CopError, store::SnapError}; @@ -287,6 +290,9 @@ impl From for errorpb::Error { e.set_store_peer_id(store_peer_id); errorpb.set_mismatch_peer_id(e); } + Error::DeadlineExceeded => { + set_deadline_exceeded_busy_error(&mut errorpb); + } _ => {} }; @@ -350,3 +356,20 @@ impl ErrorCodeExt for Error { } } } + +#[cfg(test)] +mod tests { + use kvproto::errorpb; + + use crate::Error; + + #[test] + fn test_deadline_exceeded_error() { + let err: errorpb::Error = Error::DeadlineExceeded.into(); + assert_eq!( + err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!(err.get_message(), "Deadline is exceeded"); + } +} diff --git a/components/tikv_util/src/deadline.rs b/components/tikv_util/src/deadline.rs index 84463f507b9..64416999fe3 100644 --- a/components/tikv_util/src/deadline.rs +++ b/components/tikv_util/src/deadline.rs @@ -1,6 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use fail::fail_point; +use kvproto::errorpb; use super::time::{Duration, Instant}; @@ -58,3 +59,11 @@ impl Deadline { std::time::Instant::now() + self.deadline.duration_since(Instant::now_coarse()) } } + +const DEADLINE_EXCEEDED: &str = "deadline is exceeded"; + +pub fn set_deadline_exceeded_busy_error(e: &mut errorpb::Error) { + let mut server_is_busy_err = errorpb::ServerIsBusy::default(); + server_is_busy_err.set_reason(DEADLINE_EXCEEDED.to_owned()); + e.set_server_is_busy(server_is_busy_err); +} diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 005a18938de..001d1e94ca0 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -19,7 +19,9 @@ use resource_metering::{FutureExt, ResourceTagFactory, StreamExt}; use tidb_query_common::execute_stats::ExecSummary; use tikv_alloc::trace::MemoryTraceGuard; use tikv_kv::SnapshotExt; -use tikv_util::{quota_limiter::QuotaLimiter, time::Instant}; +use tikv_util::{ + deadline::set_deadline_exceeded_busy_error, quota_limiter::QuotaLimiter, time::Instant, +}; use tipb::{AnalyzeReq, AnalyzeType, ChecksumRequest, ChecksumScanOn, DagRequest, ExecType}; use tokio::sync::Semaphore; use txn_types::Lock; @@ -832,7 +834,10 @@ macro_rules! make_error_response_common { } Error::DeadlineExceeded => { $tag = "deadline_exceeded"; - $resp.set_other_error($e.to_string()); + let mut err = errorpb::Error::default(); + set_deadline_exceeded_busy_error(&mut err); + err.set_message($e.to_string()); + $resp.set_region_error(err); } Error::MaxPendingTasksExceeded => { $tag = "max_pending_tasks_exceeded"; @@ -1936,7 +1941,11 @@ mod tests { let resp = block_on(copr.handle_unary_request(config, handler_builder)).unwrap(); assert_eq!(resp.get_data().len(), 0); - assert!(!resp.get_other_error().is_empty()); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); } { @@ -1953,7 +1962,11 @@ mod tests { let resp = block_on(copr.handle_unary_request(config, handler_builder)).unwrap(); assert_eq!(resp.get_data().len(), 0); - assert!(!resp.get_other_error().is_empty()); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); } } @@ -2005,4 +2018,18 @@ mod tests { let resp = block_on(copr.parse_and_handle_unary_request(req, None)); assert_eq!(resp.get_locked().get_key(), b"key"); } + + #[test] + fn test_make_error_response() { + let resp = make_error_response(Error::DeadlineExceeded); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" + ); + } } diff --git a/src/storage/errors.rs b/src/storage/errors.rs index 0e7db9ffc96..b603b904708 100644 --- a/src/storage/errors.rs +++ b/src/storage/errors.rs @@ -12,7 +12,7 @@ use std::{ use error_code::{self, ErrorCode, ErrorCodeExt}; use kvproto::{errorpb, kvrpcpb, kvrpcpb::ApiVersion}; use thiserror::Error; -use tikv_util::deadline::DeadlineError; +use tikv_util::deadline::{set_deadline_exceeded_busy_error, DeadlineError}; use txn_types::{KvPair, TimeStamp}; use crate::storage::{ @@ -222,7 +222,6 @@ impl Display for ErrorHeaderKind { const SCHEDULER_IS_BUSY: &str = "scheduler is busy"; const GC_WORKER_IS_BUSY: &str = "gc worker is busy"; -const DEADLINE_EXCEEDED: &str = "deadline is exceeded"; /// Get the `ErrorHeaderKind` enum that corresponds to the error in the protobuf /// message. Returns `ErrorHeaderKind::Other` if no match found. @@ -319,9 +318,8 @@ pub fn extract_region_error_from_error(e: &Error) -> Option { } Error(box ErrorInner::DeadlineExceeded) => { let mut err = errorpb::Error::default(); - let mut server_is_busy_err = errorpb::ServerIsBusy::default(); - server_is_busy_err.set_reason(DEADLINE_EXCEEDED.to_owned()); - err.set_server_is_busy(server_is_busy_err); + err.set_message(e.to_string()); + set_deadline_exceeded_busy_error(&mut err); Some(err) } _ => None, diff --git a/tests/failpoints/cases/test_coprocessor.rs b/tests/failpoints/cases/test_coprocessor.rs index 0710f778aa7..be9d978b23a 100644 --- a/tests/failpoints/cases/test_coprocessor.rs +++ b/tests/failpoints/cases/test_coprocessor.rs @@ -31,8 +31,15 @@ fn test_deadline() { fail::cfg("deadline_check_fail", "return()").unwrap(); let resp = handle_request(&endpoint, req); - - assert!(resp.get_other_error().contains("exceeding the deadline")); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" + ); } #[test] @@ -46,8 +53,15 @@ fn test_deadline_2() { fail::cfg("rockskv_async_snapshot", "panic").unwrap(); fail::cfg("deadline_check_fail", "return()").unwrap(); let resp = handle_request(&endpoint, req); - - assert!(resp.get_other_error().contains("exceeding the deadline")); + let region_err = resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" + ); } /// Test deadline exceeded when request is handling @@ -80,12 +94,14 @@ fn test_deadline_3() { let mut resp = SelectResponse::default(); resp.merge_from_bytes(cop_resp.get_data()).unwrap(); - assert!( - cop_resp.other_error.contains("exceeding the deadline") - || resp - .get_error() - .get_msg() - .contains("exceeding the deadline") + let region_err = cop_resp.get_region_error(); + assert_eq!( + region_err.get_server_is_busy().reason, + "deadline is exceeded".to_string() + ); + assert_eq!( + region_err.get_message(), + "Coprocessor task terminated due to exceeding the deadline" ); } From 99916c0b9e70583597024aab001d9a3c7825ed10 Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Mon, 11 Dec 2023 18:39:18 +0800 Subject: [PATCH 1055/1149] coprocessor: rewrite `Upper` and `Lower` functions by porting the implementation from Go library (#16160) close tikv/tikv#16159 rewrite `Upper` and `Lower` functions by porting the implementation from Go library Signed-off-by: gengliqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/codec/collation/encoding/gbk.rs | 60 +- .../src/codec/collation/encoding/mod.rs | 2 + .../collation/encoding/unicode_letter.rs | 550 ++++++++++++++++++ .../src/codec/collation/mod.rs | 4 +- components/tidb_query_expr/src/impl_string.rs | 46 +- 5 files changed, 619 insertions(+), 43 deletions(-) create mode 100644 components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs index 6f27475ff2c..137d9dd22c3 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/gbk.rs @@ -28,45 +28,39 @@ impl Encoding for EncodingGbk { #[inline] // GBK lower and upper follows https://dev.mysql.com/worklog/task/?id=4583. fn lower(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(|ch| { - let c = ch as u32; - match c { - 0x216A..=0x216B => char::from_u32(c), - _ => char::from_u32(c).unwrap().to_lowercase().next(), - } + let res = s.chars().flat_map(|ch| match ch as u32 { + 0x216A..=0x216B => Some(ch), + _ => unicode_to_lower(ch), }); writer.write_from_char_iter(res) } #[inline] fn upper(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(|ch| { - let c = ch as u32; - match c { - 0x00E0..=0x00E1 - | 0x00E8..=0x00EA - | 0x00EC..=0x00ED - | 0x00F2..=0x00F3 - | 0x00F9..=0x00FA - | 0x00FC - | 0x0101 - | 0x0113 - | 0x011B - | 0x012B - | 0x0144 - | 0x0148 - | 0x014D - | 0x016B - | 0x01CE - | 0x01D0 - | 0x01D2 - | 0x01D4 - | 0x01D6 - | 0x01D8 - | 0x01DA - | 0x01DC => char::from_u32(c), - _ => char::from_u32(c).unwrap().to_uppercase().next(), - } + let res = s.chars().flat_map(|ch| match ch as u32 { + 0x00E0..=0x00E1 + | 0x00E8..=0x00EA + | 0x00EC..=0x00ED + | 0x00F2..=0x00F3 + | 0x00F9..=0x00FA + | 0x00FC + | 0x0101 + | 0x0113 + | 0x011B + | 0x012B + | 0x0144 + | 0x0148 + | 0x014D + | 0x016B + | 0x01CE + | 0x01D0 + | 0x01D2 + | 0x01D4 + | 0x01D6 + | 0x01D8 + | 0x01DA + | 0x01DC => Some(ch), + _ => unicode_to_upper(ch), }); writer.write_from_char_iter(res) } diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs b/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs index b2434105ce5..268b11aad41 100644 --- a/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/encoding/mod.rs @@ -2,12 +2,14 @@ mod ascii; mod gbk; +mod unicode_letter; mod utf8; use std::str; pub use ascii::*; pub use gbk::*; +pub use unicode_letter::*; pub use utf8::*; use super::Encoding; diff --git a/components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs b/components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs new file mode 100644 index 00000000000..e83af2723c5 --- /dev/null +++ b/components/tidb_query_datatype/src/codec/collation/encoding/unicode_letter.rs @@ -0,0 +1,550 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +/// In order to keep the same behavoir as TiDB that uses go standard library to +/// implement lower and upper functions. Below code is ported from https://github.com/golang/go/blob/go1.21.3/src/unicode/letter.go. +const UPPER_CASE: usize = 0; +const LOWER_CASE: usize = 1; +const TITLE_CASE: usize = 2; +const MAX_CASE: usize = 3; + +const MAX_ASCII: i32 = 0x7F; +const MAX_RUNE: i32 = 0x10FFFF; +const REPLACEMENT_CHAR: i32 = 0xFFFD; + +const UPPER_LOWER: i32 = MAX_RUNE + 1; + +static CASE_TABLE: &[(i32, i32, [i32; MAX_CASE])] = &[ + (0x0041, 0x005A, [0, 32, 0]), + (0x0061, 0x007A, [-32, 0, -32]), + (0x00B5, 0x00B5, [743, 0, 743]), + (0x00C0, 0x00D6, [0, 32, 0]), + (0x00D8, 0x00DE, [0, 32, 0]), + (0x00E0, 0x00F6, [-32, 0, -32]), + (0x00F8, 0x00FE, [-32, 0, -32]), + (0x00FF, 0x00FF, [121, 0, 121]), + (0x0100, 0x012F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0130, 0x0130, [0, -199, 0]), + (0x0131, 0x0131, [-232, 0, -232]), + (0x0132, 0x0137, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0139, 0x0148, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x014A, 0x0177, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0178, 0x0178, [0, -121, 0]), + (0x0179, 0x017E, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x017F, 0x017F, [-300, 0, -300]), + (0x0180, 0x0180, [195, 0, 195]), + (0x0181, 0x0181, [0, 210, 0]), + (0x0182, 0x0185, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0186, 0x0186, [0, 206, 0]), + (0x0187, 0x0188, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0189, 0x018A, [0, 205, 0]), + (0x018B, 0x018C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x018E, 0x018E, [0, 79, 0]), + (0x018F, 0x018F, [0, 202, 0]), + (0x0190, 0x0190, [0, 203, 0]), + (0x0191, 0x0192, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0193, 0x0193, [0, 205, 0]), + (0x0194, 0x0194, [0, 207, 0]), + (0x0195, 0x0195, [97, 0, 97]), + (0x0196, 0x0196, [0, 211, 0]), + (0x0197, 0x0197, [0, 209, 0]), + (0x0198, 0x0199, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x019A, 0x019A, [163, 0, 163]), + (0x019C, 0x019C, [0, 211, 0]), + (0x019D, 0x019D, [0, 213, 0]), + (0x019E, 0x019E, [130, 0, 130]), + (0x019F, 0x019F, [0, 214, 0]), + (0x01A0, 0x01A5, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01A6, 0x01A6, [0, 218, 0]), + (0x01A7, 0x01A8, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01A9, 0x01A9, [0, 218, 0]), + (0x01AC, 0x01AD, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01AE, 0x01AE, [0, 218, 0]), + (0x01AF, 0x01B0, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01B1, 0x01B2, [0, 217, 0]), + (0x01B3, 0x01B6, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01B7, 0x01B7, [0, 219, 0]), + (0x01B8, 0x01B9, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01BC, 0x01BD, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01BF, 0x01BF, [56, 0, 56]), + (0x01C4, 0x01C4, [0, 2, 1]), + (0x01C5, 0x01C5, [-1, 1, 0]), + (0x01C6, 0x01C6, [-2, 0, -1]), + (0x01C7, 0x01C7, [0, 2, 1]), + (0x01C8, 0x01C8, [-1, 1, 0]), + (0x01C9, 0x01C9, [-2, 0, -1]), + (0x01CA, 0x01CA, [0, 2, 1]), + (0x01CB, 0x01CB, [-1, 1, 0]), + (0x01CC, 0x01CC, [-2, 0, -1]), + (0x01CD, 0x01DC, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01DD, 0x01DD, [-79, 0, -79]), + (0x01DE, 0x01EF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01F1, 0x01F1, [0, 2, 1]), + (0x01F2, 0x01F2, [-1, 1, 0]), + (0x01F3, 0x01F3, [-2, 0, -1]), + (0x01F4, 0x01F5, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x01F6, 0x01F6, [0, -97, 0]), + (0x01F7, 0x01F7, [0, -56, 0]), + (0x01F8, 0x021F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0220, 0x0220, [0, -130, 0]), + (0x0222, 0x0233, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x023A, 0x023A, [0, 10795, 0]), + (0x023B, 0x023C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x023D, 0x023D, [0, -163, 0]), + (0x023E, 0x023E, [0, 10792, 0]), + (0x023F, 0x0240, [10815, 0, 10815]), + (0x0241, 0x0242, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0243, 0x0243, [0, -195, 0]), + (0x0244, 0x0244, [0, 69, 0]), + (0x0245, 0x0245, [0, 71, 0]), + (0x0246, 0x024F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0250, 0x0250, [10783, 0, 10783]), + (0x0251, 0x0251, [10780, 0, 10780]), + (0x0252, 0x0252, [10782, 0, 10782]), + (0x0253, 0x0253, [-210, 0, -210]), + (0x0254, 0x0254, [-206, 0, -206]), + (0x0256, 0x0257, [-205, 0, -205]), + (0x0259, 0x0259, [-202, 0, -202]), + (0x025B, 0x025B, [-203, 0, -203]), + (0x025C, 0x025C, [42319, 0, 42319]), + (0x0260, 0x0260, [-205, 0, -205]), + (0x0261, 0x0261, [42315, 0, 42315]), + (0x0263, 0x0263, [-207, 0, -207]), + (0x0265, 0x0265, [42280, 0, 42280]), + (0x0266, 0x0266, [42308, 0, 42308]), + (0x0268, 0x0268, [-209, 0, -209]), + (0x0269, 0x0269, [-211, 0, -211]), + (0x026A, 0x026A, [42308, 0, 42308]), + (0x026B, 0x026B, [10743, 0, 10743]), + (0x026C, 0x026C, [42305, 0, 42305]), + (0x026F, 0x026F, [-211, 0, -211]), + (0x0271, 0x0271, [10749, 0, 10749]), + (0x0272, 0x0272, [-213, 0, -213]), + (0x0275, 0x0275, [-214, 0, -214]), + (0x027D, 0x027D, [10727, 0, 10727]), + (0x0280, 0x0280, [-218, 0, -218]), + (0x0282, 0x0282, [42307, 0, 42307]), + (0x0283, 0x0283, [-218, 0, -218]), + (0x0287, 0x0287, [42282, 0, 42282]), + (0x0288, 0x0288, [-218, 0, -218]), + (0x0289, 0x0289, [-69, 0, -69]), + (0x028A, 0x028B, [-217, 0, -217]), + (0x028C, 0x028C, [-71, 0, -71]), + (0x0292, 0x0292, [-219, 0, -219]), + (0x029D, 0x029D, [42261, 0, 42261]), + (0x029E, 0x029E, [42258, 0, 42258]), + (0x0345, 0x0345, [84, 0, 84]), + (0x0370, 0x0373, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0376, 0x0377, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x037B, 0x037D, [130, 0, 130]), + (0x037F, 0x037F, [0, 116, 0]), + (0x0386, 0x0386, [0, 38, 0]), + (0x0388, 0x038A, [0, 37, 0]), + (0x038C, 0x038C, [0, 64, 0]), + (0x038E, 0x038F, [0, 63, 0]), + (0x0391, 0x03A1, [0, 32, 0]), + (0x03A3, 0x03AB, [0, 32, 0]), + (0x03AC, 0x03AC, [-38, 0, -38]), + (0x03AD, 0x03AF, [-37, 0, -37]), + (0x03B1, 0x03C1, [-32, 0, -32]), + (0x03C2, 0x03C2, [-31, 0, -31]), + (0x03C3, 0x03CB, [-32, 0, -32]), + (0x03CC, 0x03CC, [-64, 0, -64]), + (0x03CD, 0x03CE, [-63, 0, -63]), + (0x03CF, 0x03CF, [0, 8, 0]), + (0x03D0, 0x03D0, [-62, 0, -62]), + (0x03D1, 0x03D1, [-57, 0, -57]), + (0x03D5, 0x03D5, [-47, 0, -47]), + (0x03D6, 0x03D6, [-54, 0, -54]), + (0x03D7, 0x03D7, [-8, 0, -8]), + (0x03D8, 0x03EF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x03F0, 0x03F0, [-86, 0, -86]), + (0x03F1, 0x03F1, [-80, 0, -80]), + (0x03F2, 0x03F2, [7, 0, 7]), + (0x03F3, 0x03F3, [-116, 0, -116]), + (0x03F4, 0x03F4, [0, -60, 0]), + (0x03F5, 0x03F5, [-96, 0, -96]), + (0x03F7, 0x03F8, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x03F9, 0x03F9, [0, -7, 0]), + (0x03FA, 0x03FB, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x03FD, 0x03FF, [0, -130, 0]), + (0x0400, 0x040F, [0, 80, 0]), + (0x0410, 0x042F, [0, 32, 0]), + (0x0430, 0x044F, [-32, 0, -32]), + (0x0450, 0x045F, [-80, 0, -80]), + (0x0460, 0x0481, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x048A, 0x04BF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x04C0, 0x04C0, [0, 15, 0]), + (0x04C1, 0x04CE, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x04CF, 0x04CF, [-15, 0, -15]), + (0x04D0, 0x052F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x0531, 0x0556, [0, 48, 0]), + (0x0561, 0x0586, [-48, 0, -48]), + (0x10A0, 0x10C5, [0, 7264, 0]), + (0x10C7, 0x10C7, [0, 7264, 0]), + (0x10CD, 0x10CD, [0, 7264, 0]), + (0x10D0, 0x10FA, [3008, 0, 0]), + (0x10FD, 0x10FF, [3008, 0, 0]), + (0x13A0, 0x13EF, [0, 38864, 0]), + (0x13F0, 0x13F5, [0, 8, 0]), + (0x13F8, 0x13FD, [-8, 0, -8]), + (0x1C80, 0x1C80, [-6254, 0, -6254]), + (0x1C81, 0x1C81, [-6253, 0, -6253]), + (0x1C82, 0x1C82, [-6244, 0, -6244]), + (0x1C83, 0x1C84, [-6242, 0, -6242]), + (0x1C85, 0x1C85, [-6243, 0, -6243]), + (0x1C86, 0x1C86, [-6236, 0, -6236]), + (0x1C87, 0x1C87, [-6181, 0, -6181]), + (0x1C88, 0x1C88, [35266, 0, 35266]), + (0x1C90, 0x1CBA, [0, -3008, 0]), + (0x1CBD, 0x1CBF, [0, -3008, 0]), + (0x1D79, 0x1D79, [35332, 0, 35332]), + (0x1D7D, 0x1D7D, [3814, 0, 3814]), + (0x1D8E, 0x1D8E, [35384, 0, 35384]), + (0x1E00, 0x1E95, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x1E9B, 0x1E9B, [-59, 0, -59]), + (0x1E9E, 0x1E9E, [0, -7615, 0]), + (0x1EA0, 0x1EFF, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x1F00, 0x1F07, [8, 0, 8]), + (0x1F08, 0x1F0F, [0, -8, 0]), + (0x1F10, 0x1F15, [8, 0, 8]), + (0x1F18, 0x1F1D, [0, -8, 0]), + (0x1F20, 0x1F27, [8, 0, 8]), + (0x1F28, 0x1F2F, [0, -8, 0]), + (0x1F30, 0x1F37, [8, 0, 8]), + (0x1F38, 0x1F3F, [0, -8, 0]), + (0x1F40, 0x1F45, [8, 0, 8]), + (0x1F48, 0x1F4D, [0, -8, 0]), + (0x1F51, 0x1F51, [8, 0, 8]), + (0x1F53, 0x1F53, [8, 0, 8]), + (0x1F55, 0x1F55, [8, 0, 8]), + (0x1F57, 0x1F57, [8, 0, 8]), + (0x1F59, 0x1F59, [0, -8, 0]), + (0x1F5B, 0x1F5B, [0, -8, 0]), + (0x1F5D, 0x1F5D, [0, -8, 0]), + (0x1F5F, 0x1F5F, [0, -8, 0]), + (0x1F60, 0x1F67, [8, 0, 8]), + (0x1F68, 0x1F6F, [0, -8, 0]), + (0x1F70, 0x1F71, [74, 0, 74]), + (0x1F72, 0x1F75, [86, 0, 86]), + (0x1F76, 0x1F77, [100, 0, 100]), + (0x1F78, 0x1F79, [128, 0, 128]), + (0x1F7A, 0x1F7B, [112, 0, 112]), + (0x1F7C, 0x1F7D, [126, 0, 126]), + (0x1F80, 0x1F87, [8, 0, 8]), + (0x1F88, 0x1F8F, [0, -8, 0]), + (0x1F90, 0x1F97, [8, 0, 8]), + (0x1F98, 0x1F9F, [0, -8, 0]), + (0x1FA0, 0x1FA7, [8, 0, 8]), + (0x1FA8, 0x1FAF, [0, -8, 0]), + (0x1FB0, 0x1FB1, [8, 0, 8]), + (0x1FB3, 0x1FB3, [9, 0, 9]), + (0x1FB8, 0x1FB9, [0, -8, 0]), + (0x1FBA, 0x1FBB, [0, -74, 0]), + (0x1FBC, 0x1FBC, [0, -9, 0]), + (0x1FBE, 0x1FBE, [-7205, 0, -7205]), + (0x1FC3, 0x1FC3, [9, 0, 9]), + (0x1FC8, 0x1FCB, [0, -86, 0]), + (0x1FCC, 0x1FCC, [0, -9, 0]), + (0x1FD0, 0x1FD1, [8, 0, 8]), + (0x1FD8, 0x1FD9, [0, -8, 0]), + (0x1FDA, 0x1FDB, [0, -100, 0]), + (0x1FE0, 0x1FE1, [8, 0, 8]), + (0x1FE5, 0x1FE5, [7, 0, 7]), + (0x1FE8, 0x1FE9, [0, -8, 0]), + (0x1FEA, 0x1FEB, [0, -112, 0]), + (0x1FEC, 0x1FEC, [0, -7, 0]), + (0x1FF3, 0x1FF3, [9, 0, 9]), + (0x1FF8, 0x1FF9, [0, -128, 0]), + (0x1FFA, 0x1FFB, [0, -126, 0]), + (0x1FFC, 0x1FFC, [0, -9, 0]), + (0x2126, 0x2126, [0, -7517, 0]), + (0x212A, 0x212A, [0, -8383, 0]), + (0x212B, 0x212B, [0, -8262, 0]), + (0x2132, 0x2132, [0, 28, 0]), + (0x214E, 0x214E, [-28, 0, -28]), + (0x2160, 0x216F, [0, 16, 0]), + (0x2170, 0x217F, [-16, 0, -16]), + (0x2183, 0x2184, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x24B6, 0x24CF, [0, 26, 0]), + (0x24D0, 0x24E9, [-26, 0, -26]), + (0x2C00, 0x2C2F, [0, 48, 0]), + (0x2C30, 0x2C5F, [-48, 0, -48]), + (0x2C60, 0x2C61, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C62, 0x2C62, [0, -10743, 0]), + (0x2C63, 0x2C63, [0, -3814, 0]), + (0x2C64, 0x2C64, [0, -10727, 0]), + (0x2C65, 0x2C65, [-10795, 0, -10795]), + (0x2C66, 0x2C66, [-10792, 0, -10792]), + (0x2C67, 0x2C6C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C6D, 0x2C6D, [0, -10780, 0]), + (0x2C6E, 0x2C6E, [0, -10749, 0]), + (0x2C6F, 0x2C6F, [0, -10783, 0]), + (0x2C70, 0x2C70, [0, -10782, 0]), + (0x2C72, 0x2C73, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C75, 0x2C76, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2C7E, 0x2C7F, [0, -10815, 0]), + (0x2C80, 0x2CE3, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2CEB, 0x2CEE, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2CF2, 0x2CF3, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0x2D00, 0x2D25, [-7264, 0, -7264]), + (0x2D27, 0x2D27, [-7264, 0, -7264]), + (0x2D2D, 0x2D2D, [-7264, 0, -7264]), + (0xA640, 0xA66D, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA680, 0xA69B, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA722, 0xA72F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA732, 0xA76F, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA779, 0xA77C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA77D, 0xA77D, [0, -35332, 0]), + (0xA77E, 0xA787, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA78B, 0xA78C, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA78D, 0xA78D, [0, -42280, 0]), + (0xA790, 0xA793, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA794, 0xA794, [48, 0, 48]), + (0xA796, 0xA7A9, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7AA, 0xA7AA, [0, -42308, 0]), + (0xA7AB, 0xA7AB, [0, -42319, 0]), + (0xA7AC, 0xA7AC, [0, -42315, 0]), + (0xA7AD, 0xA7AD, [0, -42305, 0]), + (0xA7AE, 0xA7AE, [0, -42308, 0]), + (0xA7B0, 0xA7B0, [0, -42258, 0]), + (0xA7B1, 0xA7B1, [0, -42282, 0]), + (0xA7B2, 0xA7B2, [0, -42261, 0]), + (0xA7B3, 0xA7B3, [0, 928, 0]), + (0xA7B4, 0xA7C3, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7C4, 0xA7C4, [0, -48, 0]), + (0xA7C5, 0xA7C5, [0, -42307, 0]), + (0xA7C6, 0xA7C6, [0, -35384, 0]), + (0xA7C7, 0xA7CA, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7D0, 0xA7D1, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7D6, 0xA7D9, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xA7F5, 0xA7F6, [UPPER_LOWER, UPPER_LOWER, UPPER_LOWER]), + (0xAB53, 0xAB53, [-928, 0, -928]), + (0xAB70, 0xABBF, [-38864, 0, -38864]), + (0xFF21, 0xFF3A, [0, 32, 0]), + (0xFF41, 0xFF5A, [-32, 0, -32]), + (0x10400, 0x10427, [0, 40, 0]), + (0x10428, 0x1044F, [-40, 0, -40]), + (0x104B0, 0x104D3, [0, 40, 0]), + (0x104D8, 0x104FB, [-40, 0, -40]), + (0x10570, 0x1057A, [0, 39, 0]), + (0x1057C, 0x1058A, [0, 39, 0]), + (0x1058C, 0x10592, [0, 39, 0]), + (0x10594, 0x10595, [0, 39, 0]), + (0x10597, 0x105A1, [-39, 0, -39]), + (0x105A3, 0x105B1, [-39, 0, -39]), + (0x105B3, 0x105B9, [-39, 0, -39]), + (0x105BB, 0x105BC, [-39, 0, -39]), + (0x10C80, 0x10CB2, [0, 64, 0]), + (0x10CC0, 0x10CF2, [-64, 0, -64]), + (0x118A0, 0x118BF, [0, 32, 0]), + (0x118C0, 0x118DF, [-32, 0, -32]), + (0x16E40, 0x16E5F, [0, 32, 0]), + (0x16E60, 0x16E7F, [-32, 0, -32]), + (0x1E900, 0x1E921, [0, 34, 0]), + (0x1E922, 0x1E943, [-34, 0, -34]), +]; + +fn to_case(case: usize, ch: i32) -> i32 { + if case >= MAX_CASE { + return REPLACEMENT_CHAR; + } + // binary search over ranges + let mut lo = 0; + let mut hi = CASE_TABLE.len(); + while lo < hi { + let m = lo + (hi - lo) / 2; + let cr = CASE_TABLE[m]; + if cr.0 <= ch && ch <= cr.1 { + let delta = cr.2[case]; + if delta > MAX_RUNE { + // In an Upper-Lower sequence, which always starts with + // an UpperCase letter, the real deltas always look like: + // {0, 1, 0} UpperCase (Lower is next) + // {-1, 0, -1} LowerCase (Upper, Title are previous) + // The characters at even offsets from the beginning of the + // sequence are upper case; the ones at odd offsets are lower. + // The correct mapping can be done by clearing or setting the low + // bit in the sequence offset. + // The constants UpperCase and TitleCase are even while LowerCase + // is odd so we take the low bit from case. + return cr.0 + (((ch - cr.0) & !1) | (case as i32 & 1)); + } + return ch + delta; + } + if ch < cr.0 { + hi = m; + } else { + lo = m + 1; + } + } + ch +} + +pub fn unicode_to_upper(ch: char) -> Option { + let mut r = ch as i32; + if r < MAX_ASCII { + if 'a' as i32 <= r && r <= 'z' as i32 { + r -= ('a' as i32) - ('A' as i32); + } + char::from_u32(r as u32) + } else { + char::from_u32(to_case(UPPER_CASE, r) as u32) + } +} + +pub fn unicode_to_lower(ch: char) -> Option { + let mut r = ch as i32; + if r < MAX_ASCII { + if 'A' as i32 <= r && r <= 'Z' as i32 { + r += ('a' as i32) - ('A' as i32); + } + char::from_u32(r as u32) + } else { + char::from_u32(to_case(LOWER_CASE, r) as u32) + } +} + +pub fn unicode_to_title(ch: char) -> Option { + let mut r = ch as i32; + if r < MAX_ASCII { + if 'a' as i32 <= r && r <= 'z' as i32 { + r -= ('a' as i32) - ('A' as i32); + } + char::from_u32(r as u32) + } else { + char::from_u32(to_case(TITLE_CASE, r) as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + static CASE_TEST: &[(usize, u32, u32)] = &[ + // ASCII (special-cased so test carefully) + (UPPER_CASE, '\n' as u32, '\n' as u32), + (UPPER_CASE, 'a' as u32, 'A' as u32), + (UPPER_CASE, 'A' as u32, 'A' as u32), + (UPPER_CASE, '7' as u32, '7' as u32), + (LOWER_CASE, '\n' as u32, '\n' as u32), + (LOWER_CASE, 'a' as u32, 'a' as u32), + (LOWER_CASE, 'A' as u32, 'a' as u32), + (LOWER_CASE, '7' as u32, '7' as u32), + (TITLE_CASE, '\n' as u32, '\n' as u32), + (TITLE_CASE, 'a' as u32, 'A' as u32), + (TITLE_CASE, 'A' as u32, 'A' as u32), + (TITLE_CASE, '7' as u32, '7' as u32), + // Latin-1: easy to read the tests! + (UPPER_CASE, 0x80, 0x80), + (UPPER_CASE, 'Å' as u32, 'Å' as u32), + (UPPER_CASE, 'å' as u32, 'Å' as u32), + (LOWER_CASE, 0x80, 0x80), + (LOWER_CASE, 'Å' as u32, 'å' as u32), + (LOWER_CASE, 'å' as u32, 'å' as u32), + (TITLE_CASE, 0x80, 0x80), + (TITLE_CASE, 'Å' as u32, 'Å' as u32), + (TITLE_CASE, 'å' as u32, 'Å' as u32), + // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049 + (UPPER_CASE, 0x0130, 'İ' as u32), + (LOWER_CASE, 0x0130, 'i' as u32), + (UPPER_CASE, 0x0131, 'I' as u32), + (LOWER_CASE, 0x0131, 0x0131), + (TITLE_CASE, 0x0131, 'I' as u32), + // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L; 0069 006A;;;;N;LATIN SMALL LETTER I + // J;;0132;;0132 + (UPPER_CASE, 0x0133, 0x0132), + (LOWER_CASE, 0x0133, 0x0133), + (TITLE_CASE, 0x0133, 0x0132), + // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B; + (UPPER_CASE, 0x212A, 0x212A), + (LOWER_CASE, 0x212A, 'k' as u32), + (TITLE_CASE, 0x212A, 0x212A), + // From an UpperLower sequence + // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641; + (UPPER_CASE, 0xA640, 0xA640), + (LOWER_CASE, 0xA640, 0xA641), + (TITLE_CASE, 0xA640, 0xA640), + // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640 + (UPPER_CASE, 0xA641, 0xA640), + (LOWER_CASE, 0xA641, 0xA641), + (TITLE_CASE, 0xA641, 0xA640), + // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F; + (UPPER_CASE, 0xA64E, 0xA64E), + (LOWER_CASE, 0xA64E, 0xA64F), + (TITLE_CASE, 0xA64E, 0xA64E), + // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E + (UPPER_CASE, 0xA65F, 0xA65E), + (LOWER_CASE, 0xA65F, 0xA65F), + (TITLE_CASE, 0xA65F, 0xA65E), + // From another UpperLower sequence + // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L + // ACUTE;;;013A; + (UPPER_CASE, 0x0139, 0x0139), + (LOWER_CASE, 0x0139, 0x013A), + (TITLE_CASE, 0x0139, 0x0139), + // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L; 004C 00B7;;;;N;;;;0140; + (UPPER_CASE, 0x013f, 0x013f), + (LOWER_CASE, 0x013f, 0x0140), + (TITLE_CASE, 0x013f, 0x013f), + // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N + // HACEK;;0147;;0147 + (UPPER_CASE, 0x0148, 0x0147), + (LOWER_CASE, 0x0148, 0x0148), + (TITLE_CASE, 0x0148, 0x0147), + // Lowercase lower than uppercase. + // AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8 + (UPPER_CASE, 0xab78, 0x13a8), + (LOWER_CASE, 0xab78, 0xab78), + (TITLE_CASE, 0xab78, 0x13a8), + (UPPER_CASE, 0x13a8, 0x13a8), + (LOWER_CASE, 0x13a8, 0xab78), + (TITLE_CASE, 0x13a8, 0x13a8), + // Last block in the 5.1.0 table + // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428; + (UPPER_CASE, 0x10400, 0x10400), + (LOWER_CASE, 0x10400, 0x10428), + (TITLE_CASE, 0x10400, 0x10400), + // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F; + (UPPER_CASE, 0x10427, 0x10427), + (LOWER_CASE, 0x10427, 0x1044F), + (TITLE_CASE, 0x10427, 0x10427), + // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400 + (UPPER_CASE, 0x10428, 0x10400), + (LOWER_CASE, 0x10428, 0x10428), + (TITLE_CASE, 0x10428, 0x10400), + // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427 + (UPPER_CASE, 0x1044F, 0x10427), + (LOWER_CASE, 0x1044F, 0x1044F), + (TITLE_CASE, 0x1044F, 0x10427), + // First one not in the 5.1.0 table + // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;; + (UPPER_CASE, 0x10450, 0x10450), + (LOWER_CASE, 0x10450, 0x10450), + (TITLE_CASE, 0x10450, 0x10450), + // Non-letters with case. + (LOWER_CASE, 0x2161, 0x2171), + (UPPER_CASE, 0x0345, 0x0399), + ]; + + #[test] + fn test_case() { + for &(case, input, output) in CASE_TEST { + if case == UPPER_CASE { + assert_eq!( + unicode_to_upper(char::from_u32(input).unwrap()).unwrap() as u32, + output + ); + } else if case == LOWER_CASE { + assert_eq!( + unicode_to_lower(char::from_u32(input).unwrap()).unwrap() as u32, + output + ); + } else { + assert_eq!( + unicode_to_title(char::from_u32(input).unwrap()).unwrap() as u32, + output + ); + } + } + } +} diff --git a/components/tidb_query_datatype/src/codec/collation/mod.rs b/components/tidb_query_datatype/src/codec/collation/mod.rs index 22127e62f49..93cf0c8ca55 100644 --- a/components/tidb_query_datatype/src/codec/collation/mod.rs +++ b/components/tidb_query_datatype/src/codec/collation/mod.rs @@ -141,13 +141,13 @@ pub trait Encoding { #[inline] fn lower(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(char::to_lowercase); + let res = s.chars().flat_map(|ch| encoding::unicode_to_lower(ch)); writer.write_from_char_iter(res) } #[inline] fn upper(s: &str, writer: BytesWriter) -> BytesGuard { - let res = s.chars().flat_map(char::to_uppercase); + let res = s.chars().flat_map(|ch| encoding::unicode_to_upper(ch)); writer.write_from_char_iter(res) } } diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index fb330f91e03..c86e8d22ccb 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -2860,6 +2860,10 @@ mod tests { Some("قاعدة البيانات".as_bytes().to_vec()), Some("قاعدة البيانات".as_bytes().to_vec()), ), + ( + Some("ßßåı".as_bytes().to_vec()), + Some("ßßÅI".as_bytes().to_vec()), + ), (None, None), ]; @@ -2920,11 +2924,32 @@ mod tests { #[test] fn test_gbk_lower_upper() { // Test GBK string case - let sig = vec![ScalarFuncSig::Lower, ScalarFuncSig::Upper]; - for s in sig { - let output = RpnFnScalarEvaluator::new() + let cases = vec![ + ( + ScalarFuncSig::LowerUtf8, + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + ), + ( + ScalarFuncSig::UpperUtf8, + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + "àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec(), + ), + ( + ScalarFuncSig::LowerUtf8, + "İİIIÅI".as_bytes().to_vec(), + "iiiiåi".as_bytes().to_vec(), + ), + ( + ScalarFuncSig::UpperUtf8, + "ßßåı".as_bytes().to_vec(), + "ßßÅI".as_bytes().to_vec(), + ), + ]; + for (s, input, output) in cases { + let result = RpnFnScalarEvaluator::new() .push_param_with_field_type( - Some("àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec()).clone(), + Some(input).clone(), FieldTypeBuilder::new() .tp(FieldTypeTp::VarString) .charset(CHARSET_GBK) @@ -2932,10 +2957,7 @@ mod tests { ) .evaluate(s) .unwrap(); - assert_eq!( - output, - Some("àáèéêìíòóùúüāēěīńňōūǎǐǒǔǖǘǚǜⅪⅫ".as_bytes().to_vec()) - ); + assert_eq!(result, Some(output),); } } @@ -2960,6 +2982,10 @@ mod tests { Some("قاعدة البيانات".as_bytes().to_vec()), Some("قاعدة البيانات".as_bytes().to_vec()), ), + ( + Some("İİIIÅI".as_bytes().to_vec()), + Some("İİIIÅI".as_bytes().to_vec()), + ), (None, None), ]; @@ -3006,6 +3032,10 @@ mod tests { Some("قاعدة البيانات".as_bytes().to_vec()), Some("قاعدة البيانات".as_bytes().to_vec()), ), + ( + Some("İİIIÅI".as_bytes().to_vec()), + Some("iiiiåi".as_bytes().to_vec()), + ), (None, None), ]; From 462a5972814433f005dff74f89a26e5307f2e3d4 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:13:47 +0800 Subject: [PATCH 1056/1149] In-memory Engine: defining hybrid engine (#16114) ref tikv/tikv#16141 Introduce hybrid engine which implements KvEngine but without detailed implementation. Signed-off-by: SpadeA-Tang --- Cargo.lock | 10 ++ Cargo.toml | 4 +- components/engine_traits/src/lib.rs | 2 + components/engine_traits/src/memory_engine.rs | 17 +++ components/hybrid_engine/Cargo.toml | 13 ++ components/hybrid_engine/src/cf_names.rs | 15 +++ components/hybrid_engine/src/cf_options.rs | 21 +++ components/hybrid_engine/src/checkpoint.rs | 22 +++ components/hybrid_engine/src/compact.rs | 71 ++++++++++ components/hybrid_engine/src/db_options.rs | 21 +++ components/hybrid_engine/src/engine.rs | 120 +++++++++++++++++ .../hybrid_engine/src/engine_iterator.rs | 54 ++++++++ .../hybrid_engine/src/flow_control_factors.rs | 23 ++++ .../hybrid_engine/src/hybrid_metrics.rs | 25 ++++ components/hybrid_engine/src/import.rs | 17 +++ components/hybrid_engine/src/iterable.rs | 17 +++ components/hybrid_engine/src/lib.rs | 24 ++++ components/hybrid_engine/src/misc.rs | 127 ++++++++++++++++++ .../hybrid_engine/src/mvcc_properties.rs | 23 ++++ components/hybrid_engine/src/perf_context.rs | 20 +++ .../hybrid_engine/src/range_properties.rs | 60 +++++++++ components/hybrid_engine/src/snapshot.rs | 103 ++++++++++++++ components/hybrid_engine/src/sst.rs | 53 ++++++++ .../hybrid_engine/src/table_properties.rs | 21 +++ .../hybrid_engine/src/ttl_properties.rs | 21 +++ components/hybrid_engine/src/write_batch.rs | 101 ++++++++++++++ 26 files changed, 1004 insertions(+), 1 deletion(-) create mode 100644 components/engine_traits/src/memory_engine.rs create mode 100644 components/hybrid_engine/Cargo.toml create mode 100644 components/hybrid_engine/src/cf_names.rs create mode 100644 components/hybrid_engine/src/cf_options.rs create mode 100644 components/hybrid_engine/src/checkpoint.rs create mode 100644 components/hybrid_engine/src/compact.rs create mode 100644 components/hybrid_engine/src/db_options.rs create mode 100644 components/hybrid_engine/src/engine.rs create mode 100644 components/hybrid_engine/src/engine_iterator.rs create mode 100644 components/hybrid_engine/src/flow_control_factors.rs create mode 100644 components/hybrid_engine/src/hybrid_metrics.rs create mode 100644 components/hybrid_engine/src/import.rs create mode 100644 components/hybrid_engine/src/iterable.rs create mode 100644 components/hybrid_engine/src/lib.rs create mode 100644 components/hybrid_engine/src/misc.rs create mode 100644 components/hybrid_engine/src/mvcc_properties.rs create mode 100644 components/hybrid_engine/src/perf_context.rs create mode 100644 components/hybrid_engine/src/range_properties.rs create mode 100644 components/hybrid_engine/src/snapshot.rs create mode 100644 components/hybrid_engine/src/sst.rs create mode 100644 components/hybrid_engine/src/table_properties.rs create mode 100644 components/hybrid_engine/src/ttl_properties.rs create mode 100644 components/hybrid_engine/src/write_batch.rs diff --git a/Cargo.lock b/Cargo.lock index 147b42405bf..ab5c5d1663a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2443,6 +2443,15 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hybrid_engine" +version = "0.0.1" +dependencies = [ + "engine_traits", + "tikv_util", + "txn_types", +] + [[package]] name = "hyper" version = "0.14.23" @@ -6240,6 +6249,7 @@ dependencies = [ "grpcio-health", "hex 0.4.2", "http", + "hybrid_engine", "hyper", "hyper-openssl", "hyper-tls", diff --git a/Cargo.toml b/Cargo.toml index fdc86fb5f15..e66b7aee0fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ failpoints = ["fail/failpoints", "raftstore/failpoints", "tikv_util/failpoints", cloud-aws = ["encryption_export/cloud-aws"] cloud-gcp = ["encryption_export/cloud-gcp"] cloud-azure = ["encryption_export/cloud-azure"] -testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport"] +testexport = ["raftstore/testexport", "api_version/testexport", "causal_ts/testexport", "engine_traits/testexport", "engine_rocks/testexport", "engine_panic/testexport", "hybrid_engine/testexport"] test-engine-kv-rocksdb = ["engine_test/test-engine-kv-rocksdb"] test-engine-raft-raft-engine = ["engine_test/test-engine-raft-raft-engine"] test-engines-rocksdb = ["engine_test/test-engines-rocksdb"] @@ -90,6 +90,7 @@ grpcio = { workspace = true } grpcio-health = { workspace = true } hex = "0.4" http = "0" +hybrid_engine = { workspace = true } hyper = { version = "0.14", features = ["full"] } hyper-tls = "0.5" into_other = { workspace = true } @@ -319,6 +320,7 @@ encryption = { path = "components/encryption" } encryption_export = { path = "components/encryption/export" } engine_panic = { path = "components/engine_panic" } engine_rocks = { path = "components/engine_rocks" } +hybrid_engine = { path = "components/hybrid_engine" } engine_rocks_helper = { path = "components/engine_rocks_helper" } engine_test = { path = "components/engine_test", default-features = false } engine_traits = { path = "components/engine_traits" } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 9cf4c22dd82..53708994561 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -312,6 +312,8 @@ mod table_properties; pub use crate::table_properties::*; mod checkpoint; pub use crate::checkpoint::*; +mod memory_engine; +pub use memory_engine::RegionCacheEngine; // These modules contain more general traits, some of which may be implemented // by multiple types. diff --git a/components/engine_traits/src/memory_engine.rs b/components/engine_traits/src/memory_engine.rs new file mode 100644 index 00000000000..189c3bc0c28 --- /dev/null +++ b/components/engine_traits/src/memory_engine.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::fmt::Debug; + +use crate::{Iterable, Snapshot, WriteBatchExt}; + +/// RegionCacheEngine works as a region cache caching some regions (in Memory or +/// NVME for instance) to improve the read performance. +pub trait RegionCacheEngine: + WriteBatchExt + Iterable + Debug + Clone + Unpin + Send + Sync + 'static +{ + type Snapshot: Snapshot; + + // If None is returned, the RegionCacheEngine is currently not readable for this + // region or read_ts. + fn snapshot(&self, region_id: u64, read_ts: u64) -> Option; +} diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml new file mode 100644 index 00000000000..f38604a10c1 --- /dev/null +++ b/components/hybrid_engine/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "hybrid_engine" +version = "0.0.1" +edition = "2021" +publish = false + +[features] +testexport = [] + +[dependencies] +engine_traits = { workspace = true } +txn_types = { workspace = true } +tikv_util = { workspace = true } \ No newline at end of file diff --git a/components/hybrid_engine/src/cf_names.rs b/components/hybrid_engine/src/cf_names.rs new file mode 100644 index 00000000000..990fb4d0f76 --- /dev/null +++ b/components/hybrid_engine/src/cf_names.rs @@ -0,0 +1,15 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CfNamesExt, KvEngine, RegionCacheEngine}; + +use crate::engine::HybridEngine; + +impl CfNamesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn cf_names(&self) -> Vec<&str> { + self.disk_engine().cf_names() + } +} diff --git a/components/hybrid_engine/src/cf_options.rs b/components/hybrid_engine/src/cf_options.rs new file mode 100644 index 00000000000..61fe08da536 --- /dev/null +++ b/components/hybrid_engine/src/cf_options.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CfOptionsExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl CfOptionsExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type CfOptions = EK::CfOptions; + + fn get_options_cf(&self, cf: &str) -> Result { + self.disk_engine().get_options_cf(cf) + } + + fn set_options_cf(&self, cf: &str, options: &[(&str, &str)]) -> Result<()> { + self.disk_engine().set_options_cf(cf, options) + } +} diff --git a/components/hybrid_engine/src/checkpoint.rs b/components/hybrid_engine/src/checkpoint.rs new file mode 100644 index 00000000000..7d9bdb022ea --- /dev/null +++ b/components/hybrid_engine/src/checkpoint.rs @@ -0,0 +1,22 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{Checkpointable, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl Checkpointable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Checkpointer = EK::Checkpointer; + + fn new_checkpointer(&self) -> Result { + self.disk_engine().new_checkpointer() + } + + fn merge(&self, dbs: &[&Self]) -> Result<()> { + let disk_dbs: Vec<_> = dbs.iter().map(|&db| db.disk_engine()).collect(); + self.disk_engine().merge(&disk_dbs) + } +} diff --git a/components/hybrid_engine/src/compact.rs b/components/hybrid_engine/src/compact.rs new file mode 100644 index 00000000000..6afbba556b0 --- /dev/null +++ b/components/hybrid_engine/src/compact.rs @@ -0,0 +1,71 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{CompactExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl CompactExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type CompactedEvent = EK::CompactedEvent; + + fn auto_compactions_is_disabled(&self) -> Result { + self.disk_engine().auto_compactions_is_disabled() + } + + fn compact_range_cf( + &self, + cf: &str, + start_key: Option<&[u8]>, + end_key: Option<&[u8]>, + exclusive_manual: bool, + max_subcompactions: u32, + ) -> Result<()> { + self.disk_engine().compact_range_cf( + cf, + start_key, + end_key, + exclusive_manual, + max_subcompactions, + ) + } + + fn compact_files_in_range_cf( + &self, + cf: &str, + start: Option<&[u8]>, + end: Option<&[u8]>, + output_level: Option, + ) -> Result<()> { + self.disk_engine() + .compact_files_in_range_cf(cf, start, end, output_level) + } + + fn compact_files_in_range( + &self, + start: Option<&[u8]>, + end: Option<&[u8]>, + output_level: Option, + ) -> Result<()> { + self.disk_engine() + .compact_files_in_range(start, end, output_level) + } + + fn compact_files_cf( + &self, + cf: &str, + files: Vec, + output_level: Option, + max_subcompactions: u32, + exclude_l0: bool, + ) -> Result<()> { + self.disk_engine() + .compact_files_cf(cf, files, output_level, max_subcompactions, exclude_l0) + } + + fn check_in_range(&self, start: Option<&[u8]>, end: Option<&[u8]>) -> Result<()> { + self.disk_engine().check_in_range(start, end) + } +} diff --git a/components/hybrid_engine/src/db_options.rs b/components/hybrid_engine/src/db_options.rs new file mode 100644 index 00000000000..6b4be90a43f --- /dev/null +++ b/components/hybrid_engine/src/db_options.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{DbOptionsExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl DbOptionsExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type DbOptions = EK::DbOptions; + + fn get_db_options(&self) -> Self::DbOptions { + self.disk_engine().get_db_options() + } + + fn set_db_options(&self, options: &[(&str, &str)]) -> Result<()> { + self.disk_engine().set_db_options(options) + } +} diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs new file mode 100644 index 00000000000..deb544b91c6 --- /dev/null +++ b/components/hybrid_engine/src/engine.rs @@ -0,0 +1,120 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SyncMutable}; + +use crate::snapshot::HybridEngineSnapshot; + +/// This engine is structured with both a disk engine and an region cache +/// engine. The disk engine houses the complete database data, whereas the +/// region cache engine functions as a region cache, selectively caching certain +/// regions (in a better performance storage device such as NVME or RAM) to +/// enhance read performance. For the regions that are cached, region cache +/// engine retains all data that has not been garbage collected. +#[derive(Clone, Debug)] +pub struct HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + disk_engine: EK, + region_cache_engine: EC, +} + +impl HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + pub fn disk_engine(&self) -> &EK { + &self.disk_engine + } + + pub fn mut_disk_engine(&mut self) -> &mut EK { + &mut self.disk_engine + } + + pub fn region_cache_engine(&self) -> &EC { + &self.region_cache_engine + } + + pub fn mut_region_cache_engine(&mut self) -> &mut EC { + &mut self.region_cache_engine + } +} + +// todo: implement KvEngine methods as well as it's super traits. +impl KvEngine for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Snapshot = HybridEngineSnapshot; + + fn snapshot(&self) -> Self::Snapshot { + unimplemented!() + } + + fn sync(&self) -> engine_traits::Result<()> { + unimplemented!() + } + + fn bad_downcast(&self) -> &T { + unimplemented!() + } + + #[cfg(feature = "testexport")] + fn inner_refcount(&self) -> usize { + unimplemented!() + } +} + +impl Peekable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type DbVector = EK::DbVector; + + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + unimplemented!() + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> Result> { + unimplemented!() + } +} + +impl SyncMutable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&self, key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&self, cf: &str, key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/engine_iterator.rs b/components/hybrid_engine/src/engine_iterator.rs new file mode 100644 index 00000000000..642aac82b60 --- /dev/null +++ b/components/hybrid_engine/src/engine_iterator.rs @@ -0,0 +1,54 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{Iterator, KvEngine, RegionCacheEngine, Result}; +use tikv_util::Either; + +pub struct HybridEngineIterator +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + iter: Either, +} + +impl Iterator for HybridEngineIterator +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn seek(&mut self, _key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_for_prev(&mut self, _key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_to_first(&mut self) -> Result { + unimplemented!() + } + + fn seek_to_last(&mut self) -> Result { + unimplemented!() + } + + fn prev(&mut self) -> Result { + unimplemented!() + } + + fn next(&mut self) -> Result { + unimplemented!() + } + + fn key(&self) -> &[u8] { + unimplemented!() + } + + fn value(&self) -> &[u8] { + unimplemented!() + } + + fn valid(&self) -> Result { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/flow_control_factors.rs b/components/hybrid_engine/src/flow_control_factors.rs new file mode 100644 index 00000000000..9649671d418 --- /dev/null +++ b/components/hybrid_engine/src/flow_control_factors.rs @@ -0,0 +1,23 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{FlowControlFactorsExt, KvEngine, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl FlowControlFactorsExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_cf_num_files_at_level(&self, cf: &str, level: usize) -> Result> { + self.disk_engine().get_cf_num_files_at_level(cf, level) + } + + fn get_cf_num_immutable_mem_table(&self, cf: &str) -> Result> { + self.disk_engine().get_cf_num_immutable_mem_table(cf) + } + + fn get_cf_pending_compaction_bytes(&self, cf: &str) -> Result> { + self.disk_engine().get_cf_pending_compaction_bytes(cf) + } +} diff --git a/components/hybrid_engine/src/hybrid_metrics.rs b/components/hybrid_engine/src/hybrid_metrics.rs new file mode 100644 index 00000000000..2d49d9ad1d9 --- /dev/null +++ b/components/hybrid_engine/src/hybrid_metrics.rs @@ -0,0 +1,25 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RegionCacheEngine, StatisticsReporter}; + +use crate::engine::HybridEngine; + +pub struct HybridEngineStatisticsReporter {} + +impl StatisticsReporter> for HybridEngineStatisticsReporter +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn new(name: &str) -> Self { + unimplemented!() + } + + fn collect(&mut self, engine: &HybridEngine) { + unimplemented!() + } + + fn flush(&mut self) { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/import.rs b/components/hybrid_engine/src/import.rs new file mode 100644 index 00000000000..de40c83d214 --- /dev/null +++ b/components/hybrid_engine/src/import.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{ImportExt, KvEngine, RegionCacheEngine}; + +use crate::engine::HybridEngine; + +impl ImportExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type IngestExternalFileOptions = EK::IngestExternalFileOptions; + + fn ingest_external_file_cf(&self, cf: &str, files: &[&str]) -> engine_traits::Result<()> { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/iterable.rs b/components/hybrid_engine/src/iterable.rs new file mode 100644 index 00000000000..96933641b06 --- /dev/null +++ b/components/hybrid_engine/src/iterable.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{IterOptions, Iterable, KvEngine, RegionCacheEngine, Result}; + +use crate::{engine::HybridEngine, engine_iterator::HybridEngineIterator}; + +impl Iterable for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Iterator = HybridEngineIterator; + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs new file mode 100644 index 00000000000..147fcc5a8a5 --- /dev/null +++ b/components/hybrid_engine/src/lib.rs @@ -0,0 +1,24 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +#![allow(dead_code)] +#![allow(unused_variables)] + +mod cf_names; +mod cf_options; +mod checkpoint; +mod compact; +mod db_options; +mod engine; +mod engine_iterator; +mod flow_control_factors; +mod hybrid_metrics; +mod import; +mod iterable; +mod misc; +mod mvcc_properties; +mod perf_context; +mod range_properties; +mod snapshot; +mod sst; +mod table_properties; +mod ttl_properties; +mod write_batch; diff --git a/components/hybrid_engine/src/misc.rs b/components/hybrid_engine/src/misc.rs new file mode 100644 index 00000000000..9575344e65c --- /dev/null +++ b/components/hybrid_engine/src/misc.rs @@ -0,0 +1,127 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, MiscExt, RegionCacheEngine, Result}; + +use crate::{engine::HybridEngine, hybrid_metrics::HybridEngineStatisticsReporter}; + +impl MiscExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type StatisticsReporter = HybridEngineStatisticsReporter; + + fn flush_cf(&self, cf: &str, wait: bool) -> Result<()> { + unimplemented!() + } + + fn flush_cfs(&self, cfs: &[&str], wait: bool) -> Result<()> { + unimplemented!() + } + + fn flush_oldest_cf( + &self, + wait: bool, + threshold: Option, + ) -> Result { + unimplemented!() + } + + fn delete_ranges_cf( + &self, + wopts: &engine_traits::WriteOptions, + cf: &str, + strategy: engine_traits::DeleteStrategy, + ranges: &[engine_traits::Range<'_>], + ) -> Result { + unimplemented!() + } + + fn get_approximate_memtable_stats_cf( + &self, + cf: &str, + range: &engine_traits::Range<'_>, + ) -> Result<(u64, u64)> { + unimplemented!() + } + + fn ingest_maybe_slowdown_writes(&self, cf: &str) -> Result { + unimplemented!() + } + + fn get_sst_key_ranges(&self, cf: &str, level: usize) -> Result, Vec)>> { + unimplemented!() + } + + fn get_engine_used_size(&self) -> Result { + unimplemented!() + } + + fn path(&self) -> &str { + unimplemented!() + } + + fn sync_wal(&self) -> Result<()> { + unimplemented!() + } + + fn pause_background_work(&self) -> Result<()> { + unimplemented!() + } + + fn continue_background_work(&self) -> Result<()> { + unimplemented!() + } + + fn exists(path: &str) -> bool { + unimplemented!() + } + + fn locked(path: &str) -> Result { + unimplemented!() + } + + fn dump_stats(&self) -> Result { + unimplemented!() + } + + fn get_latest_sequence_number(&self) -> u64 { + unimplemented!() + } + + fn get_oldest_snapshot_sequence_number(&self) -> Option { + unimplemented!() + } + + fn get_total_sst_files_size_cf(&self, cf: &str) -> Result> { + unimplemented!() + } + + fn get_num_keys(&self) -> Result { + unimplemented!() + } + + fn get_range_stats( + &self, + cf: &str, + start: &[u8], + end: &[u8], + ) -> Result> { + unimplemented!() + } + + fn is_stalled_or_stopped(&self) -> bool { + unimplemented!() + } + + fn get_active_memtable_stats_cf( + &self, + cf: &str, + ) -> Result> { + unimplemented!() + } + + fn get_accumulated_flush_count_cf(cf: &str) -> Result { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/mvcc_properties.rs b/components/hybrid_engine/src/mvcc_properties.rs new file mode 100644 index 00000000000..0d03258d2de --- /dev/null +++ b/components/hybrid_engine/src/mvcc_properties.rs @@ -0,0 +1,23 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, MvccProperties, MvccPropertiesExt, RegionCacheEngine}; +use txn_types::TimeStamp; + +use crate::engine::HybridEngine; + +impl MvccPropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_mvcc_properties_cf( + &self, + cf: &str, + safe_point: TimeStamp, + start_key: &[u8], + end_key: &[u8], + ) -> Option { + self.disk_engine() + .get_mvcc_properties_cf(cf, safe_point, start_key, end_key) + } +} diff --git a/components/hybrid_engine/src/perf_context.rs b/components/hybrid_engine/src/perf_context.rs new file mode 100644 index 00000000000..1db4e8c9d27 --- /dev/null +++ b/components/hybrid_engine/src/perf_context.rs @@ -0,0 +1,20 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, PerfContextExt, PerfContextKind, RegionCacheEngine}; + +use crate::engine::HybridEngine; + +impl PerfContextExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type PerfContext = EK::PerfContext; + + fn get_perf_context( + level: engine_traits::PerfLevel, + kind: PerfContextKind, + ) -> Self::PerfContext { + EK::get_perf_context(level, kind) + } +} diff --git a/components/hybrid_engine/src/range_properties.rs b/components/hybrid_engine/src/range_properties.rs new file mode 100644 index 00000000000..7f38379f36d --- /dev/null +++ b/components/hybrid_engine/src/range_properties.rs @@ -0,0 +1,60 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, Range, RangePropertiesExt, RegionCacheEngine, Result}; + +use crate::engine::HybridEngine; + +impl RangePropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_range_approximate_keys(&self, range: Range<'_>, large_threshold: u64) -> Result { + self.disk_engine() + .get_range_approximate_keys(range, large_threshold) + } + + fn get_range_approximate_keys_cf( + &self, + cfname: &str, + range: Range<'_>, + large_threshold: u64, + ) -> Result { + self.disk_engine() + .get_range_approximate_keys_cf(cfname, range, large_threshold) + } + + fn get_range_approximate_size(&self, range: Range<'_>, large_threshold: u64) -> Result { + self.disk_engine() + .get_range_approximate_size(range, large_threshold) + } + + fn get_range_approximate_size_cf( + &self, + cfname: &str, + range: Range<'_>, + large_threshold: u64, + ) -> Result { + self.disk_engine() + .get_range_approximate_size_cf(cfname, range, large_threshold) + } + + fn get_range_approximate_split_keys( + &self, + range: Range<'_>, + key_count: usize, + ) -> Result>> { + self.disk_engine() + .get_range_approximate_split_keys(range, key_count) + } + + fn get_range_approximate_split_keys_cf( + &self, + cfname: &str, + range: Range<'_>, + key_count: usize, + ) -> Result>> { + self.disk_engine() + .get_range_approximate_split_keys_cf(cfname, range, key_count) + } +} diff --git a/components/hybrid_engine/src/snapshot.rs b/components/hybrid_engine/src/snapshot.rs new file mode 100644 index 00000000000..4ada590c3d6 --- /dev/null +++ b/components/hybrid_engine/src/snapshot.rs @@ -0,0 +1,103 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + fmt::{self, Debug, Formatter}, + marker::PhantomData, +}; + +use engine_traits::{ + CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, + Snapshot, SnapshotMiscExt, +}; + +use crate::engine_iterator::HybridEngineIterator; + +pub struct HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + disk_snap: EK::Snapshot, + + phantom: PhantomData, +} + +impl Snapshot for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ +} + +impl Debug for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!(fmt, "Hybrid Engine Snapshot Impl") + } +} + +impl Drop for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn drop(&mut self) { + unimplemented!() + } +} + +impl Iterable for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type Iterator = HybridEngineIterator; + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} + +impl Peekable for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type DbVector = EK::DbVector; + + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + unimplemented!() + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> Result> { + unimplemented!() + } +} + +impl CfNamesExt for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn cf_names(&self) -> Vec<&str> { + self.disk_snap.cf_names() + } +} + +impl SnapshotMiscExt for HybridEngineSnapshot +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn sequence_number(&self) -> u64 { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/sst.rs b/components/hybrid_engine/src/sst.rs new file mode 100644 index 00000000000..2bade295ec3 --- /dev/null +++ b/components/hybrid_engine/src/sst.rs @@ -0,0 +1,53 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{ + KvEngine, RegionCacheEngine, Result, SstCompressionType, SstExt, SstWriterBuilder, +}; + +use crate::engine::HybridEngine; + +pub struct HybridEngineSstWriteBuilder {} + +impl SstExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type SstReader = EK::SstReader; + type SstWriter = EK::SstWriter; + type SstWriterBuilder = HybridEngineSstWriteBuilder; +} + +impl SstWriterBuilder> for HybridEngineSstWriteBuilder +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn new() -> Self { + unimplemented!() + } + + fn set_db(self, _db: &HybridEngine) -> Self { + unimplemented!() + } + + fn set_cf(self, _cf: &str) -> Self { + unimplemented!() + } + + fn set_in_memory(self, _in_memory: bool) -> Self { + unimplemented!() + } + + fn set_compression_type(self, _compression: Option) -> Self { + unimplemented!() + } + + fn set_compression_level(self, level: i32) -> Self { + unimplemented!() + } + + fn build(self, _path: &str) -> Result< as SstExt>::SstWriter> { + unimplemented!() + } +} diff --git a/components/hybrid_engine/src/table_properties.rs b/components/hybrid_engine/src/table_properties.rs new file mode 100644 index 00000000000..6ad95e5931a --- /dev/null +++ b/components/hybrid_engine/src/table_properties.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, Range, RegionCacheEngine, Result, TablePropertiesExt}; + +use crate::engine::HybridEngine; + +impl TablePropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type TablePropertiesCollection = EK::TablePropertiesCollection; + + fn table_properties_collection( + &self, + cf: &str, + ranges: &[Range<'_>], + ) -> Result { + self.disk_engine().table_properties_collection(cf, ranges) + } +} diff --git a/components/hybrid_engine/src/ttl_properties.rs b/components/hybrid_engine/src/ttl_properties.rs new file mode 100644 index 00000000000..d5b7d8578b5 --- /dev/null +++ b/components/hybrid_engine/src/ttl_properties.rs @@ -0,0 +1,21 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{KvEngine, RegionCacheEngine, Result, TtlProperties, TtlPropertiesExt}; + +use crate::engine::HybridEngine; + +impl TtlPropertiesExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + fn get_range_ttl_properties_cf( + &self, + cf: &str, + start_key: &[u8], + end_key: &[u8], + ) -> Result> { + self.disk_engine() + .get_range_ttl_properties_cf(cf, start_key, end_key) + } +} diff --git a/components/hybrid_engine/src/write_batch.rs b/components/hybrid_engine/src/write_batch.rs new file mode 100644 index 00000000000..3aba34c9c85 --- /dev/null +++ b/components/hybrid_engine/src/write_batch.rs @@ -0,0 +1,101 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use engine_traits::{ + KvEngine, Mutable, RegionCacheEngine, Result, WriteBatch, WriteBatchExt, WriteOptions, +}; + +use crate::engine::HybridEngine; + +pub struct HybridEngineWriteBatch { + _disk_write_batch: EK::WriteBatch, + // todo: region_cache_engine write batch +} + +impl WriteBatchExt for HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + type WriteBatch = HybridEngineWriteBatch; + const WRITE_BATCH_MAX_KEYS: usize = EK::WRITE_BATCH_MAX_KEYS; + + fn write_batch(&self) -> Self::WriteBatch { + unimplemented!() + } + + fn write_batch_with_cap(&self, _: usize) -> Self::WriteBatch { + unimplemented!() + } +} + +impl WriteBatch for HybridEngineWriteBatch { + fn write_opt(&mut self, _: &WriteOptions) -> Result { + unimplemented!() + } + + fn write_callback_opt(&mut self, _opts: &WriteOptions, _cb: impl FnMut()) -> Result { + unimplemented!() + } + + fn data_size(&self) -> usize { + unimplemented!() + } + + fn count(&self) -> usize { + unimplemented!() + } + + fn is_empty(&self) -> bool { + unimplemented!() + } + + fn should_write_to_engine(&self) -> bool { + unimplemented!() + } + + fn clear(&mut self) { + unimplemented!() + } + + fn set_save_point(&mut self) { + unimplemented!() + } + + fn pop_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn rollback_to_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn merge(&mut self, _other: Self) -> Result<()> { + unimplemented!() + } +} + +impl Mutable for HybridEngineWriteBatch { + fn put(&mut self, _key: &[u8], _value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&mut self, _cf: &str, _key: &[u8], _value: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&mut self, _key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&mut self, _cf: &str, _key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&mut self, _begin_key: &[u8], _end_key: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&mut self, _cf: &str, _begin_key: &[u8], _end_key: &[u8]) -> Result<()> { + unimplemented!() + } +} From a07db9f1c72b0b4bbf41579665c656a9adcca9af Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Mon, 11 Dec 2023 22:43:49 -0800 Subject: [PATCH 1057/1149] server: make gc support multi-threads (#16096) close tikv/tikv#16101 do parallel region gc and expose the gc thread configuration. The configuration can be dynamically updated. Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- components/tikv_util/src/worker/pool.rs | 16 ++ src/server/gc_worker/compaction_filter.rs | 2 +- src/server/gc_worker/config.rs | 23 ++- src/server/gc_worker/gc_manager.rs | 103 +++++++++---- src/server/gc_worker/gc_worker.rs | 165 ++++++++++++++++----- tests/integrations/config/mod.rs | 1 + tests/integrations/config/test-custom.toml | 1 + 7 files changed, 246 insertions(+), 65 deletions(-) diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index 9ef827b007a..a22732a7aae 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -224,6 +224,14 @@ impl LazyWorker { pub fn remote(&self) -> Remote { self.worker.remote() } + + pub fn pool_size(&self) -> usize { + self.worker.pool_size() + } + + pub fn pool(&self) -> FuturePool { + self.worker.pool() + } } pub struct ReceiverWrapper { @@ -448,6 +456,14 @@ impl Worker { self.pool.remote().clone() } + pub fn pool_size(&self) -> usize { + self.pool.get_pool_size() + } + + pub fn pool(&self) -> FuturePool { + self.pool.clone() + } + fn start_impl( &self, runner: R, diff --git a/src/server/gc_worker/compaction_filter.rs b/src/server/gc_worker/compaction_filter.rs index 665824a1bac..2bea0cf347b 100644 --- a/src/server/gc_worker/compaction_filter.rs +++ b/src/server/gc_worker/compaction_filter.rs @@ -888,7 +888,7 @@ pub mod test_utils { cfg.ratio_threshold = ratio_threshold; } cfg.enable_compaction_filter = true; - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg)), None) }; let feature_gate = { let feature_gate = FeatureGate::default(); diff --git a/src/server/gc_worker/config.rs b/src/server/gc_worker/config.rs index 1816dd845e1..809c55e1268 100644 --- a/src/server/gc_worker/config.rs +++ b/src/server/gc_worker/config.rs @@ -3,7 +3,10 @@ use std::sync::Arc; use online_config::{ConfigChange, ConfigManager, OnlineConfig}; -use tikv_util::config::{ReadableSize, VersionTrack}; +use tikv_util::{ + config::{ReadableSize, VersionTrack}, + yatp_pool::FuturePool, +}; const DEFAULT_GC_RATIO_THRESHOLD: f64 = 1.1; pub const DEFAULT_GC_BATCH_KEYS: usize = 512; @@ -22,6 +25,8 @@ pub struct GcConfig { /// greater than 5.0.0. Change `compaction_filter_skip_version_check` /// can enable it by force. pub compaction_filter_skip_version_check: bool, + /// gc threads count + pub num_threads: usize, } impl Default for GcConfig { @@ -32,6 +37,7 @@ impl Default for GcConfig { max_write_bytes_per_sec: ReadableSize(DEFAULT_GC_MAX_WRITE_BYTES_PER_SEC), enable_compaction_filter: true, compaction_filter_skip_version_check: false, + num_threads: 1, } } } @@ -41,12 +47,15 @@ impl GcConfig { if self.batch_keys == 0 { return Err("gc.batch_keys should not be 0".into()); } + if self.num_threads == 0 { + return Err("gc.thread_count should not be 0".into()); + } Ok(()) } } #[derive(Clone, Default)] -pub struct GcWorkerConfigManager(pub Arc>); +pub struct GcWorkerConfigManager(pub Arc>, pub Option); impl ConfigManager for GcWorkerConfigManager { fn dispatch( @@ -55,6 +64,16 @@ impl ConfigManager for GcWorkerConfigManager { ) -> std::result::Result<(), Box> { { let change = change.clone(); + if let Some(pool) = self.1.as_ref() { + if let Some(v) = change.get("num_threads") { + let pool_size: usize = v.into(); + pool.scale_pool_size(pool_size); + info!( + "GC worker thread count is changed"; + "new_thread_count" => pool_size, + ); + } + } self.0 .update(move |cfg: &mut GcConfig| cfg.update(change))?; } diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index be18f8216d5..d9c5287b67d 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -4,7 +4,7 @@ use std::{ cmp::Ordering, sync::{ atomic::{AtomicU64, Ordering as AtomicOrdering}, - mpsc, Arc, + mpsc, Arc, Condvar, Mutex, }, thread::{self, Builder as ThreadBuilder, JoinHandle}, time::Duration, @@ -20,10 +20,10 @@ use txn_types::{Key, TimeStamp}; use super::{ compaction_filter::is_compaction_filter_allowed, config::GcWorkerConfigManager, - gc_worker::{sync_gc, GcSafePointProvider, GcTask}, + gc_worker::{schedule_gc, GcSafePointProvider, GcTask}, Result, }; -use crate::{server::metrics::*, tikv_util::sys::thread::StdThreadBuildWrapper}; +use crate::{server::metrics::*, storage::Callback, tikv_util::sys::thread::StdThreadBuildWrapper}; const POLL_SAFE_POINT_INTERVAL_SECS: u64 = 10; @@ -245,6 +245,8 @@ pub(super) struct GcManager GcManager { @@ -254,6 +256,7 @@ impl GcMan worker_scheduler: Scheduler>, cfg_tracker: GcWorkerConfigManager, feature_gate: FeatureGate, + concurrent_tasks: usize, ) -> GcManager { GcManager { cfg, @@ -263,6 +266,7 @@ impl GcMan gc_manager_ctx: GcManagerContext::new(), cfg_tracker, feature_gate, + max_concurrent_tasks: concurrent_tasks, } } @@ -442,13 +446,27 @@ impl GcMan let mut progress = Some(Key::from_encoded(BEGIN_KEY.to_vec())); // Records how many region we have GC-ed. - let mut processed_regions = 0; + let mut scheduled_regions = 0; + let task_controller = Arc::new((Mutex::new(0), Condvar::new())); + // the task_controller is the combination to control the number + // of tasks The mutex is used for protecting the number of current + // tasks, while the condvar is used for notifying/get notified when the + // number of current tasks is changed. + let (lock, cvar) = &*task_controller; + let maybe_wait = |max_tasks| { + let mut current_tasks: std::sync::MutexGuard<'_, usize> = lock.lock().unwrap(); + while *current_tasks > max_tasks { + // Wait until the number of current tasks is below the limit + current_tasks = cvar.wait(current_tasks).unwrap(); + } + }; info!("gc_worker: auto gc starts"; "safe_point" => self.curr_safe_point()); // The following loop iterates all regions whose leader is on this TiKV and does // GC on them. At the same time, check whether safe_point is updated // periodically. If it's updated, rewinding will happen. + loop { self.gc_manager_ctx.check_stopped()?; if is_compaction_filter_allowed(&self.cfg_tracker.value(), &self.feature_gate) { @@ -462,9 +480,9 @@ impl GcMan // We have worked to the end and we need to rewind. Restart from beginning. progress = Some(Key::from_encoded(BEGIN_KEY.to_vec())); need_rewind = false; - info!("gc_worker: auto gc rewinds"; "processed_regions" => processed_regions); + info!("gc_worker: auto gc rewinds"; "scheduled_regions" => scheduled_regions); - processed_regions = 0; + scheduled_regions = 0; // Set the metric to zero to show that rewinding has happened. AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC .with_label_values(&[PROCESS_TYPE_GC]) @@ -483,19 +501,40 @@ impl GcMan if finished { // We have worked to the end of the TiKV or our progress has reached `end`, and // we don't need to rewind. In this case, the round of GC has finished. - info!("gc_worker: auto gc finishes"; "processed_regions" => processed_regions); - return Ok(()); + info!("gc_worker: all regions task are scheduled"; + "processed_regions" => scheduled_regions, + ); + break; } } - assert!(progress.is_some()); // Before doing GC, check whether safe_point is updated periodically to // determine if rewinding is needed. self.check_if_need_rewind(&progress, &mut need_rewind, &mut end); - progress = self.gc_next_region(progress.unwrap(), &mut processed_regions)?; + let controller: Arc<(Mutex, Condvar)> = Arc::clone(&task_controller); + let cb = Box::new(move |_res| { + let (lock, cvar) = &*controller; + let mut current_tasks = lock.lock().unwrap(); + *current_tasks -= 1; + cvar.notify_one(); + AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC + .with_label_values(&[PROCESS_TYPE_GC]) + .inc(); + }); + maybe_wait(self.max_concurrent_tasks - 1); + let mut current_tasks = lock.lock().unwrap(); + progress = self.async_gc_next_region(progress.unwrap(), cb, &mut current_tasks)?; + scheduled_regions += 1; } + + // wait for all tasks finished + self.gc_manager_ctx.check_stopped()?; + maybe_wait(0); + info!("gc_worker: auto gc finishes"; "processed_regions" => scheduled_regions); + + Ok(()) } /// Checks whether we need to rewind in this round of GC. Only used in @@ -536,13 +575,14 @@ impl GcMan } } - /// Does GC on the next region after `from_key`. Returns the end key of the - /// region it processed. If we have processed to the end of all regions, - /// returns `None`. - fn gc_next_region( + /// Does GC on the next region after `from_key` asynchronously. Returns the + /// end key of the region it processed. If we have processed to the end + /// of all regions, returns `None`. + fn async_gc_next_region( &mut self, from_key: Key, - processed_regions: &mut usize, + callback: Callback<()>, + running_tasks: &mut usize, ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); @@ -552,16 +592,16 @@ impl GcMan let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); debug!("trying gc"; "region_id" => region.id, "start_key" => &hex_start, "end_key" => &hex_end); - if let Err(e) = sync_gc(&self.worker_scheduler, region, self.curr_safe_point()) { - // Ignore the error and continue, since it's useless to retry this. - // TODO: Find a better way to handle errors. Maybe we should retry. - warn!("failed gc"; "start_key" => &hex_start, "end_key" => &hex_end, "err" => ?e); - } - - *processed_regions += 1; - AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC - .with_label_values(&[PROCESS_TYPE_GC]) - .inc(); + let _ = schedule_gc( + &self.worker_scheduler, + region, + self.curr_safe_point(), + callback, + ) + .map(|_| { + *running_tasks += 1; + Ok::<(), GcManagerError>(()) + }); Ok(next_key) } @@ -710,8 +750,16 @@ mod tests { impl GcManagerTestUtil { pub fn new(regions: BTreeMap, RegionInfo>) -> Self { let (gc_task_sender, gc_task_receiver) = channel(); - let worker = WorkerBuilder::new("test-gc-manager").create(); - let scheduler = worker.start("gc-manager", MockGcRunner { tx: gc_task_sender }); + let worker = WorkerBuilder::new("test-gc-manager") + .thread_count(2) + .create(); + let scheduler = worker.start( + "gc-manager", + MockGcRunner { + tx: gc_task_sender.clone(), + }, + ); + worker.start("gc-manager", MockGcRunner { tx: gc_task_sender }); let (safe_point_sender, safe_point_receiver) = channel(); @@ -731,6 +779,7 @@ mod tests { scheduler, GcWorkerConfigManager::default(), Default::default(), + 2, ); Self { gc_manager: Some(gc_manager), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index c608470ba87..a0537a478d0 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -34,6 +34,7 @@ use tikv_util::{ Either, }; use txn_types::{Key, TimeStamp}; +use yatp::{task::future::TaskCell, Remote}; use super::{ check_need_gc, @@ -178,7 +179,7 @@ where } /// Used to perform GC operations on the engine. -pub struct GcRunner { +pub struct GcRunnerCore { store_id: u64, engine: E, @@ -193,6 +194,26 @@ pub struct GcRunner { stats_map: HashMap, } +impl Clone for GcRunnerCore { + fn clone(&self) -> Self { + GcRunnerCore { + store_id: self.store_id, + engine: self.engine.clone(), + flow_info_sender: self.flow_info_sender.clone(), + limiter: self.limiter.clone(), + cfg: self.cfg.clone(), + cfg_tracker: self.cfg_tracker.clone(), + stats_map: HashMap::default(), + } + } +} + +/// Used to perform GC operations on the engine. +pub struct GcRunner { + inner: GcRunnerCore, + pool: Remote, +} + pub const MAX_RAW_WRITE_SIZE: usize = 32 * 1024; pub struct MvccRaw { @@ -282,7 +303,7 @@ fn init_snap_ctx(store_id: u64, region: &Region) -> Context { ctx } -impl GcRunner { +impl GcRunnerCore { pub fn new( store_id: u64, engine: E, @@ -918,18 +939,12 @@ impl GcRunner { error!("failed to flush deletes, will leave garbage"; "err" => ?e); } } -} - -impl Runnable for GcRunner { - type Task = GcTask; #[inline] fn run(&mut self, task: GcTask) { let _io_type_guard = WithIoType::new(IoType::Gc); let enum_label = task.get_enum_label(); - GC_GCTASK_COUNTER_STATIC.get(enum_label).inc(); - let timer = SlowTimer::from_secs(GC_TASK_SLOW_SECONDS); let update_metrics = |is_err| { GC_TASK_DURATION_HISTOGRAM_VEC @@ -941,9 +956,6 @@ impl Runnable for GcRunner { } }; - // Refresh config before handle task - self.refresh_cfg(); - match task { GcTask::Gc { region, @@ -1062,6 +1074,37 @@ impl Runnable for GcRunner { } } +impl GcRunner { + pub fn new( + store_id: u64, + engine: E, + flow_info_sender: Sender, + cfg_tracker: Tracker, + cfg: GcConfig, + pool: Remote, + ) -> Self { + Self { + inner: GcRunnerCore::new(store_id, engine, flow_info_sender, cfg_tracker, cfg), + pool, + } + } +} + +impl Runnable for GcRunner { + type Task = GcTask; + + #[inline] + fn run(&mut self, task: GcTask) { + // Refresh config before handle task + self.inner.refresh_cfg(); + + let mut inner = self.inner.clone(); + self.pool.spawn(async move { + inner.run(task); + }); + } +} + /// When we failed to schedule a `GcTask` to `GcRunner`, use this to handle the /// `ScheduleError`. fn handle_gc_task_schedule_error(e: ScheduleError>) -> Result<()> { @@ -1081,7 +1124,7 @@ fn handle_gc_task_schedule_error(e: ScheduleError>) -> Res } /// Schedules a `GcTask` to the `GcRunner`. -fn schedule_gc( +pub fn schedule_gc( scheduler: &Scheduler>, region: Region, safe_point: TimeStamp, @@ -1174,13 +1217,18 @@ impl GcWorker { feature_gate: FeatureGate, region_info_provider: Arc, ) -> Self { - let worker_builder = WorkerBuilder::new("gc-worker").pending_capacity(GC_MAX_PENDING_TASKS); + let worker_builder = WorkerBuilder::new("gc-worker") + .pending_capacity(GC_MAX_PENDING_TASKS) + .thread_count(cfg.num_threads); let worker = worker_builder.create().lazy_build("gc-worker"); let worker_scheduler = worker.scheduler(); GcWorker { engine, flow_info_sender: Some(flow_info_sender), - config_manager: GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg))), + config_manager: GcWorkerConfigManager( + Arc::new(VersionTrack::new(cfg)), + Some(worker.pool()), + ), refs: Arc::new(AtomicUsize::new(1)), worker: Arc::new(Mutex::new(worker)), worker_scheduler, @@ -1219,6 +1267,7 @@ impl GcWorker { self.scheduler(), self.config_manager.clone(), self.feature_gate.clone(), + self.config_manager.value().num_threads, ) .start()?; *handle = Some(new_handle); @@ -1226,14 +1275,20 @@ impl GcWorker { } pub fn start(&mut self, store_id: u64) -> Result<()> { + let mut worker = self.worker.lock().unwrap(); let runner = GcRunner::new( store_id, self.engine.clone(), self.flow_info_sender.take().unwrap(), - self.config_manager.0.clone().tracker("gc-woker".to_owned()), + self.config_manager + .0 + .clone() + .tracker("gc-worker".to_owned()), self.config_manager.value().clone(), + worker.remote(), ); - self.worker.lock().unwrap().start(runner); + worker.start(runner); + Ok(()) } @@ -1296,6 +1351,10 @@ impl GcWorker { pub fn get_config_manager(&self) -> GcWorkerConfigManager { self.config_manager.clone() } + + pub fn get_worker_thread_count(&self) -> usize { + self.worker.lock().unwrap().pool_size() + } } #[cfg(any(test, feature = "testexport"))] @@ -1486,6 +1545,7 @@ mod tests { use engine_traits::Peekable as _; use futures::executor::block_on; use kvproto::{kvrpcpb::ApiVersion, metapb::Peer}; + use online_config::{ConfigChange, ConfigManager, ConfigValue}; use raft::StateRole; use raftstore::coprocessor::{ region_info_accessor::{MockRegionInfoProvider, RegionInfoAccessor}, @@ -1634,10 +1694,12 @@ mod tests { region2.mut_peers().push(new_peer(store_id, 2)); region2.set_start_key(split_key.to_vec()); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 2; let mut gc_worker = GcWorker::new( engine, tx, - GcConfig::default(), + gc_config, gate, Arc::new(MockRegionInfoProvider::new(vec![region1, region2])), ); @@ -1810,10 +1872,12 @@ mod tests { let mut host = CoprocessorHost::::default(); let ri_provider = RegionInfoAccessor::new(&mut host); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 2; let mut gc_worker = GcWorker::new( prefixed_engine.clone(), tx, - GcConfig::default(), + gc_config, feature_gate, Arc::new(ri_provider.clone()), ); @@ -1902,13 +1966,13 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); - let mut runner = GcRunner::new( + let mut runner = GcRunnerCore::new( store_id, prefixed_engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -1966,13 +2030,13 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); - let mut runner = GcRunner::new( + let mut runner = GcRunnerCore::new( store_id, prefixed_engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2067,13 +2131,13 @@ mod tests { let (tx, _rx) = mpsc::channel(); let cfg = GcConfig::default(); - let mut runner = GcRunner::new( + let mut runner = GcRunnerCore::new( 1, prefixed_engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2202,10 +2266,12 @@ mod tests { let mut region = Region::default(); region.mut_peers().push(new_peer(store_id, 1)); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 2; let mut gc_worker = GcWorker::new( engine.clone(), tx, - GcConfig::default(), + gc_config, gate, Arc::new(MockRegionInfoProvider::new(vec![region.clone()])), ); @@ -2333,7 +2399,7 @@ mod tests { ) -> ( MultiRocksEngine, Arc, - GcRunner, + GcRunnerCore, Vec, mpsc::Receiver, ) { @@ -2386,13 +2452,13 @@ mod tests { ])); let cfg = GcConfig::default(); - let gc_runner = GcRunner::new( + let gc_runner = GcRunnerCore::new( store_id, engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2564,13 +2630,13 @@ mod tests { let ri_provider = Arc::new(MockRegionInfoProvider::new(vec![r1, r2])); let cfg = GcConfig::default(); - let mut gc_runner = GcRunner::new( + let mut gc_runner = GcRunnerCore::new( store_id, engine.clone(), tx, - GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone()))) + GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg.clone())), None) .0 - .tracker("gc-woker".to_owned()), + .tracker("gc-worker".to_owned()), cfg, ); @@ -2756,4 +2822,33 @@ mod tests { test_destroy_range_for_multi_rocksdb_impl(b"k05", b"k195", vec![1, 2]); test_destroy_range_for_multi_rocksdb_impl(b"k099", b"k25", vec![2, 3]); } + + #[test] + fn test_update_gc_thread_count() { + let engine = TestEngineBuilder::new().build().unwrap(); + let (tx, _rx) = mpsc::channel(); + let gate = FeatureGate::default(); + gate.set_version("5.0.0").unwrap(); + let mut gc_config = GcConfig::default(); + gc_config.num_threads = 1; + let gc_worker = GcWorker::new( + engine, + tx, + gc_config, + gate, + Arc::new(MockRegionInfoProvider::new(vec![])), + ); + let mut config_change = ConfigChange::new(); + config_change.insert(String::from("num_threads"), ConfigValue::Usize(5)); + let mut cfg_manager = gc_worker.get_config_manager(); + cfg_manager.dispatch(config_change).unwrap(); + + assert_eq!(gc_worker.get_worker_thread_count(), 5); + + let mut config_change = ConfigChange::new(); + config_change.insert(String::from("num_threads"), ConfigValue::Usize(2)); + cfg_manager.dispatch(config_change).unwrap(); + + assert_eq!(gc_worker.get_worker_thread_count(), 2); + } } diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index f1628cda50e..05cbde827d2 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -834,6 +834,7 @@ fn test_serde_custom_tikv_config() { max_write_bytes_per_sec: ReadableSize::mb(10), enable_compaction_filter: false, compaction_filter_skip_version_check: true, + num_threads: 2, }; value.pessimistic_txn = PessimisticTxnConfig { wait_for_lock_timeout: ReadableDuration::millis(10), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 61a2a24b43a..9eb628b8dc5 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -643,6 +643,7 @@ batch-keys = 256 max-write-bytes-per-sec = "10MB" enable-compaction-filter = false compaction-filter-skip-version-check = true +num-threads = 2 [pessimistic-txn] enabled = false # test backward compatibility From 95da0269335fa0e05eb077bc8b7216a086d3aefe Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 12 Dec 2023 18:37:48 +0800 Subject: [PATCH 1058/1149] *: use OpenSSL for crypto RNG (#16170) ref tikv/tikv#15982 To comply with FIPS 140-2 requirements, it's essential to choose an RNG that meets these specifications. This commit replaces the `rand` crate with OpenSSL for cryptographic random number generation. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: lucasliang --- Cargo.lock | 32 ++++++++--------- Cargo.toml | 6 ++-- cmd/tikv-ctl/Cargo.toml | 3 +- cmd/tikv-ctl/src/main.rs | 1 + cmd/tikv-server/Cargo.toml | 2 +- cmd/tikv-server/src/main.rs | 1 + components/backup-stream/Cargo.toml | 2 +- components/backup/src/writer.rs | 2 +- components/{fips => crypto}/Cargo.toml | 2 +- components/{fips => crypto}/build.rs | 0 .../{fips/src/lib.rs => crypto/src/fips.rs} | 0 components/crypto/src/lib.rs | 13 +++++++ components/crypto/src/rand.rs | 17 +++++++++ components/encryption/Cargo.toml | 5 ++- components/encryption/src/crypter.rs | 22 ++++++------ .../encryption/src/encrypted_file/mod.rs | 4 +-- components/encryption/src/file_dict_file.rs | 4 +-- components/encryption/src/io.rs | 35 ++++++++----------- components/encryption/src/manager/mod.rs | 28 +++++++-------- components/encryption/src/master_key/file.rs | 2 +- components/encryption/src/master_key/kms.rs | 2 +- components/encryption/src/master_key/mem.rs | 4 ++- components/server/Cargo.toml | 1 - components/tidb_query_expr/Cargo.toml | 2 +- .../tidb_query_expr/src/impl_encryption.rs | 12 ++++--- components/tikv_util/src/lib.rs | 33 ----------------- scripts/check-bins.py | 2 +- src/lib.rs | 2 +- 28 files changed, 119 insertions(+), 120 deletions(-) rename components/{fips => crypto}/Cargo.toml (94%) rename components/{fips => crypto}/build.rs (100%) rename components/{fips/src/lib.rs => crypto/src/fips.rs} (100%) create mode 100644 components/crypto/src/lib.rs create mode 100644 components/crypto/src/rand.rs diff --git a/Cargo.lock b/Cargo.lock index ab5c5d1663a..0cd0c6cade1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1246,6 +1246,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crypto" +version = "0.0.1" +dependencies = [ + "openssl", + "openssl-sys", + "slog", + "slog-global", +] + [[package]] name = "csv" version = "1.1.6" @@ -1414,6 +1424,7 @@ dependencies = [ "cloud", "crc32fast", "crossbeam", + "crypto", "derive_more", "error_code", "fail", @@ -1428,7 +1439,6 @@ dependencies = [ "openssl", "prometheus", "protobuf", - "rand 0.8.5", "serde", "serde_derive", "slog", @@ -1817,16 +1827,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "fips" -version = "0.0.1" -dependencies = [ - "openssl", - "openssl-sys", - "slog", - "slog-global", -] - [[package]] name = "fix-hidden-lifetime-bug" version = "0.2.5" @@ -5116,7 +5116,6 @@ dependencies = [ "raft_log_engine", "raftstore", "raftstore-v2", - "rand 0.8.5", "resolved_ts", "resource_control", "resource_metering", @@ -6177,6 +6176,7 @@ dependencies = [ "byteorder", "chrono", "codec", + "crypto", "file_system", "flate2", "hex 0.4.2", @@ -6188,7 +6188,6 @@ dependencies = [ "panic_hook", "profiler", "protobuf", - "rand 0.8.5", "regex", "safemem", "serde", @@ -6226,6 +6225,7 @@ dependencies = [ "crc32fast", "crc64fast", "crossbeam", + "crypto", "dashmap", "encryption_export", "engine_panic", @@ -6237,7 +6237,6 @@ dependencies = [ "example_coprocessor_plugin", "fail", "file_system", - "fips", "flate2", "futures 0.3.15", "futures-executor", @@ -6350,12 +6349,12 @@ dependencies = [ "collections", "concurrency_manager", "crossbeam", + "crypto", "encryption_export", "engine_rocks", "engine_traits", "error_code", "file_system", - "fips", "futures 0.3.15", "gag", "grpcio", @@ -6373,7 +6372,6 @@ dependencies = [ "raft-engine-ctl", "raft_log_engine", "raftstore", - "rand 0.8.5", "regex", "security", "serde_json", @@ -6430,9 +6428,9 @@ version = "0.0.1" dependencies = [ "cc", "clap 2.33.0", + "crypto", "encryption_export", "engine_traits", - "fips", "keys", "kvproto", "raft-engine", diff --git a/Cargo.toml b/Cargo.toml index e66b7aee0fd..2d905a4115f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,7 @@ coprocessor_plugin_api = { workspace = true } crc32fast = "1.2" crc64fast = "0.1" crossbeam = "0.8" +crypto = { workspace = true } dashmap = "5" encryption_export = { workspace = true } engine_panic = { workspace = true } @@ -78,7 +79,6 @@ engine_traits_tests = { workspace = true } error_code = { workspace = true } fail = "0.5" file_system = { workspace = true } -fips = { workspace = true } flate2 = { version = "1.0", default-features = false, features = ["zlib"] } futures = { version = "0.3", features = ["thread-pool", "compat"] } futures-executor = "0.3.1" @@ -243,6 +243,7 @@ members = [ "components/collections", "components/concurrency_manager", "components/coprocessor_plugin_api", + "components/crypto", "components/encryption", "components/encryption/export", "components/engine_rocks_helper", @@ -252,7 +253,6 @@ members = [ "components/error_code", "components/external_storage", "components/file_system", - "components/fips", "components/into_other", "components/keys", "components/log_wrappers", @@ -328,7 +328,7 @@ engine_traits_tests = { path = "components/engine_traits_tests", default-feature error_code = { path = "components/error_code" } external_storage = { path = "components/external_storage" } file_system = { path = "components/file_system" } -fips = { path = "components/fips" } +crypto = { path = "components/crypto" } gcp = { path = "components/cloud/gcp" } into_other = { path = "components/into_other" } keys = { path = "components/keys" } diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 82553a4b45a..9504c3a4eae 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -49,12 +49,12 @@ clap = "2.32" collections = { workspace = true } concurrency_manager = { workspace = true } crossbeam = "0.8" +crypto = { workspace = true } encryption_export = { workspace = true } engine_rocks = { workspace = true } engine_traits = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } -fips = { workspace = true } futures = "0.3" gag = "1.0" grpcio = { workspace = true } @@ -72,7 +72,6 @@ raft-engine = { git = "https://github.com/tikv/raft-engine.git" } raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } raft_log_engine = { workspace = true } raftstore = { workspace = true } -rand = "0.8" regex = "1" security = { workspace = true } serde_json = "1.0" diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index ec0c8bfc915..25f8cc1337b 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -20,6 +20,7 @@ use std::{ }; use collections::HashMap; +use crypto::fips; use encryption_export::{ create_backend, data_key_manager_from_config, DataKeyManager, DecrypterReader, Iv, }; diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 6f916d7476d..cc99e05fb58 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -34,9 +34,9 @@ pprof-fp = ["tikv/pprof-fp"] [dependencies] clap = "2.32" +crypto = { workspace = true } encryption_export = { workspace = true } engine_traits = { workspace = true } -fips = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } raft-engine = { git = "https://github.com/tikv/raft-engine.git" } diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 01354906b46..4c1eb4fc2c5 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -5,6 +5,7 @@ use std::{path::Path, process}; use clap::{crate_authors, App, Arg}; +use crypto::fips; use serde_json::{Map, Value}; use server::setup::{ensure_no_unrecognized_config, validate_and_persist_config}; use tikv::{ diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 6a5a0edbba5..a91b3fb071d 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -57,7 +57,6 @@ prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { workspace = true } raftstore = { workspace = true } -rand = "0.8.0" regex = "1" resolved_ts = { workspace = true } security = { path = "../security" } @@ -83,6 +82,7 @@ engine_test = { workspace = true } grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } +rand = "0.8.0" tempdir = "0.3" tempfile = "3.0" test_pd = { workspace = true } diff --git a/components/backup/src/writer.rs b/components/backup/src/writer.rs index dfbe36b60cf..a2d8a31f0ea 100644 --- a/components/backup/src/writer.rs +++ b/components/backup/src/writer.rs @@ -121,7 +121,7 @@ impl Writer { .with_label_values(&[cf.into()]) .inc_by(self.total_kvs); let file_name = format!("{}_{}.sst", name, cf); - let iv = Iv::new_ctr(); + let iv = Iv::new_ctr().map_err(|e| Error::Other(box_err!("new IV error: {:?}", e)))?; let encrypter_reader = EncrypterReader::new(sst_reader, cipher.cipher_type, &cipher.cipher_key, iv) .map_err(|e| Error::Other(box_err!("new EncrypterReader error: {:?}", e)))?; diff --git a/components/fips/Cargo.toml b/components/crypto/Cargo.toml similarity index 94% rename from components/fips/Cargo.toml rename to components/crypto/Cargo.toml index ab0d2aa1cf7..26eb77ee057 100644 --- a/components/fips/Cargo.toml +++ b/components/crypto/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "fips" +name = "crypto" version = "0.0.1" edition = "2021" publish = false diff --git a/components/fips/build.rs b/components/crypto/build.rs similarity index 100% rename from components/fips/build.rs rename to components/crypto/build.rs diff --git a/components/fips/src/lib.rs b/components/crypto/src/fips.rs similarity index 100% rename from components/fips/src/lib.rs rename to components/crypto/src/fips.rs diff --git a/components/crypto/src/lib.rs b/components/crypto/src/lib.rs new file mode 100644 index 00000000000..5afb174040c --- /dev/null +++ b/components/crypto/src/lib.rs @@ -0,0 +1,13 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! A shim crate for cryptographic operations, with special considerations for +//! meeting FIPS 140 requirements. +//! +//! This crate provides a set of cryptographic functionalities, including +//! RNG (random number generator). It has been meticulously crafted +//! to adhere to the FIPS 140 standards, ensuring a secure and compliant +//! environment for cryptographic operations in regulated environments. +// TODO: add message digest. + +pub mod fips; +pub mod rand; diff --git a/components/crypto/src/rand.rs b/components/crypto/src/rand.rs new file mode 100644 index 00000000000..d0f97594f49 --- /dev/null +++ b/components/crypto/src/rand.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +//! Utilities for cryptographically strong random number generation. + +use openssl::{error::ErrorStack, rand}; + +/// Fill buffer with cryptographically strong pseudo-random bytes. +pub fn rand_bytes(buf: &mut [u8]) -> Result<(), ErrorStack> { + rand::rand_bytes(buf) +} + +/// Return a random u64. +pub fn rand_u64() -> Result { + let mut rand_id = [0u8; 8]; + rand_bytes(&mut rand_id)?; + Ok(u64::from_ne_bytes(rand_id)) +} diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 0f2eac6ad5a..7375a9c0b20 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -17,6 +17,7 @@ bytes = "1.0" cloud = { workspace = true } crc32fast = "1.2" crossbeam = "0.8" +crypto = { workspace = true } derive_more = "0.99.3" error_code = { workspace = true } fail = "0.5" @@ -30,7 +31,9 @@ online_config = { workspace = true } openssl = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } -rand = "0.8" +# For simplicity and compliance with FIPS 140 requirements for random number +# generation, do not use the 'rand' crate in encryption-related code. +# rand = "*" serde = "1.0" serde_derive = "1.0" slog = { workspace = true } diff --git a/components/encryption/src/crypter.rs b/components/encryption/src/crypter.rs index aafbe7cf88f..a60b9c9c20b 100644 --- a/components/encryption/src/crypter.rs +++ b/components/encryption/src/crypter.rs @@ -5,8 +5,10 @@ use std::fmt::{self, Debug, Formatter}; use byteorder::{BigEndian, ByteOrder}; use cloud::kms::PlainKey; use kvproto::encryptionpb::EncryptionMethod; -use openssl::symm::{self, Cipher as OCipher}; -use rand::{rngs::OsRng, RngCore}; +use openssl::{ + rand, + symm::{self, Cipher as OCipher}, +}; use tikv_util::box_err; use crate::{Error, Result}; @@ -70,17 +72,17 @@ pub enum Iv { impl Iv { /// Generate a random IV for AES-GCM. - pub fn new_gcm() -> Iv { + pub fn new_gcm() -> Result { let mut iv = [0u8; GCM_IV_12]; - OsRng.fill_bytes(&mut iv); - Iv::Gcm(iv) + rand::rand_bytes(&mut iv)?; + Ok(Iv::Gcm(iv)) } /// Generate a random IV for AES-CTR. - pub fn new_ctr() -> Iv { + pub fn new_ctr() -> Result { let mut iv = [0u8; CTR_IV_16]; - OsRng.fill_bytes(&mut iv); - Iv::Ctr(iv) + rand::rand_bytes(&mut iv)?; + Ok(Iv::Ctr(iv)) } pub fn from_slice(src: &[u8]) -> Result { @@ -212,9 +214,9 @@ mod tests { let mut ivs = Vec::with_capacity(100); for c in 0..100 { if c % 2 == 0 { - ivs.push(Iv::new_ctr()); + ivs.push(Iv::new_ctr().unwrap()); } else { - ivs.push(Iv::new_gcm()); + ivs.push(Iv::new_gcm().unwrap()); } } ivs.dedup_by(|a, b| a.as_slice() == b.as_slice()); diff --git a/components/encryption/src/encrypted_file/mod.rs b/components/encryption/src/encrypted_file/mod.rs index 9c76b857c70..8cac47077f4 100644 --- a/components/encryption/src/encrypted_file/mod.rs +++ b/components/encryption/src/encrypted_file/mod.rs @@ -5,10 +5,10 @@ use std::{ path::Path, }; +use crypto::rand; use file_system::{rename, File, OpenOptions}; use kvproto::encryptionpb::EncryptedContent; use protobuf::Message; -use rand::{thread_rng, RngCore}; use slog_global::error; use tikv_util::time::Instant; @@ -66,7 +66,7 @@ impl<'a> EncryptedFile<'a> { // TODO what if a tmp file already exists? let origin_path = self.base.join(self.name); let mut tmp_path = origin_path.clone(); - tmp_path.set_extension(format!("{}.{}", thread_rng().next_u64(), TMP_FILE_SUFFIX)); + tmp_path.set_extension(format!("{}.{}", rand::rand_u64()?, TMP_FILE_SUFFIX)); let mut tmp_file = OpenOptions::new() .create(true) .write(true) diff --git a/components/encryption/src/file_dict_file.rs b/components/encryption/src/file_dict_file.rs index 0d1dcbbbd6e..a40fb912b3b 100644 --- a/components/encryption/src/file_dict_file.rs +++ b/components/encryption/src/file_dict_file.rs @@ -6,10 +6,10 @@ use std::{ }; use byteorder::{BigEndian, ByteOrder}; +use crypto::rand; use file_system::{rename, File, OpenOptions}; use kvproto::encryptionpb::{EncryptedContent, FileDictionary, FileInfo}; use protobuf::Message; -use rand::{thread_rng, RngCore}; use tikv_util::{box_err, info, set_panic_mark, warn}; use crate::{ @@ -127,7 +127,7 @@ impl FileDictionaryFile { if self.enable_log { let origin_path = self.file_path(); let mut tmp_path = origin_path.clone(); - tmp_path.set_extension(format!("{}.{}", thread_rng().next_u64(), TMP_FILE_SUFFIX)); + tmp_path.set_extension(format!("{}.{}", rand::rand_u64()?, TMP_FILE_SUFFIX)); let mut tmp_file = OpenOptions::new() .create(true) .write(true) diff --git a/components/encryption/src/io.rs b/components/encryption/src/io.rs index dc326e78427..4884fc68b92 100644 --- a/components/encryption/src/io.rs +++ b/components/encryption/src/io.rs @@ -554,17 +554,10 @@ mod tests { use std::{cmp::min, io::Cursor}; use byteorder::{BigEndian, ByteOrder}; - use rand::{rngs::OsRng, RngCore}; + use openssl::rand; use super::*; - use crate::crypter; - - fn generate_data_key(method: EncryptionMethod) -> Vec { - let key_length = crypter::get_method_key_length(method); - let mut key = vec![0; key_length]; - OsRng.fill_bytes(&mut key); - key - } + use crate::manager::generate_data_key; struct DecoratedCursor { cursor: Cursor>, @@ -628,7 +621,7 @@ mod tests { EncryptionMethod::Sm4Ctr, ]; let ivs = [ - Iv::new_ctr(), + Iv::new_ctr().unwrap(), // Iv overflow Iv::from_slice(&{ let mut v = vec![0; 16]; @@ -645,10 +638,10 @@ mod tests { ]; for method in methods { for iv in ivs { - let key = generate_data_key(method); + let (_, key) = generate_data_key(method).unwrap(); let mut plaintext = vec![0; 1024]; - OsRng.fill_bytes(&mut plaintext); + rand::rand_bytes(&mut plaintext).unwrap(); let mut encrypter = EncrypterWriter::new( DecoratedCursor::new(plaintext.clone(), 1), method, @@ -704,12 +697,12 @@ mod tests { EncryptionMethod::Sm4Ctr, ]; let mut plaintext = vec![0; 10240]; - OsRng.fill_bytes(&mut plaintext); + rand::rand_bytes(&mut plaintext).unwrap(); let offsets = [1024, 1024 + 1, 10240 - 1, 10240, 10240 + 1]; let sizes = [1024, 10240]; for method in methods { - let key = generate_data_key(method); - let iv = Iv::new_ctr(); + let (_, key) = generate_data_key(method).unwrap(); + let iv = Iv::new_ctr().unwrap(); let encrypter = EncrypterReader::new(DecoratedCursor::new(plaintext.clone(), 1), method, &key, iv) .unwrap(); @@ -741,13 +734,13 @@ mod tests { EncryptionMethod::Sm4Ctr, ]; let mut plaintext = vec![0; 10240]; - OsRng.fill_bytes(&mut plaintext); + rand::rand_bytes(&mut plaintext).unwrap(); let offsets = [1024, 1024 + 1, 10240 - 1]; let sizes = [1024, 8000]; let written = vec![0; 10240]; for method in methods { - let key = generate_data_key(method); - let iv = Iv::new_ctr(); + let (_, key) = generate_data_key(method).unwrap(); + let iv = Iv::new_ctr().unwrap(); let encrypter = EncrypterWriter::new(DecoratedCursor::new(written.clone(), 1), method, &key, iv) .unwrap(); @@ -787,12 +780,12 @@ mod tests { EncryptionMethod::Aes256Ctr, EncryptionMethod::Sm4Ctr, ]; - let iv = Iv::new_ctr(); + let iv = Iv::new_ctr().unwrap(); let mut plain_text = vec![0; 10240]; - OsRng.fill_bytes(&mut plain_text); + rand::rand_bytes(&mut plain_text).unwrap(); for method in methods { - let key = generate_data_key(method); + let (_, key) = generate_data_key(method).unwrap(); // encrypt plaintext into encrypt_text let read_once = 16; let mut encrypt_reader = EncrypterReader::new( diff --git a/components/encryption/src/manager/mod.rs b/components/encryption/src/manager/mod.rs index f5a203e9626..0f20741e841 100644 --- a/components/encryption/src/manager/mod.rs +++ b/components/encryption/src/manager/mod.rs @@ -13,6 +13,7 @@ use std::{ }; use crossbeam::channel::{self, select, tick}; +use crypto::rand; use fail::fail_point; use file_system::File; use kvproto::encryptionpb::{DataKey, EncryptionMethod, FileDictionary, FileInfo, KeyDictionary}; @@ -200,7 +201,7 @@ impl Dicts { fn new_file(&self, fname: &str, method: EncryptionMethod, sync: bool) -> Result { let mut file_dict_file = self.file_dict_file.lock().unwrap(); let iv = if method != EncryptionMethod::Plaintext { - Iv::new_ctr() + Iv::new_ctr()? } else { Iv::Empty }; @@ -348,7 +349,9 @@ impl Dicts { // Generate new data key. for _ in 0..GENERATE_DATA_KEY_LIMIT { - let (key_id, key) = generate_data_key(method); + let Ok((key_id, key)) = generate_data_key(method) else { + continue; + }; if key_id == 0 { // 0 is invalid continue; @@ -436,14 +439,12 @@ fn run_background_rotate_work( } } -fn generate_data_key(method: EncryptionMethod) -> (u64, Vec) { - use rand::{rngs::OsRng, RngCore}; - - let key_id = OsRng.next_u64(); +pub(crate) fn generate_data_key(method: EncryptionMethod) -> Result<(u64, Vec)> { + let key_id = rand::rand_u64()?; let key_length = crypter::get_method_key_length(method); let mut key = vec![0; key_length]; - OsRng.fill_bytes(&mut key); - (key_id, key) + rand::rand_bytes(&mut key)?; + Ok((key_id, key)) } pub struct DataKeyManager { @@ -1003,8 +1004,7 @@ impl<'a> DataKeyImporter<'a> { if key_id.is_none() { for _ in 0..GENERATE_DATA_KEY_LIMIT { // Match `generate_data_key`. - use rand::{rngs::OsRng, RngCore}; - let id = OsRng.next_u64(); + let id = rand::rand_u64()?; if let Entry::Vacant(e) = key_dict.keys.entry(id) { key_id = Some(id); e.insert(new_key); @@ -1858,11 +1858,11 @@ mod tests { ) .unwrap(); // different key - let (_, key2) = generate_data_key(EncryptionMethod::Aes192Ctr); + let (_, key2) = generate_data_key(EncryptionMethod::Aes192Ctr).unwrap(); importer .add( "2", - Iv::new_ctr().as_slice().to_owned(), + Iv::new_ctr().unwrap().as_slice().to_owned(), DataKey { key: key2.clone(), method: EncryptionMethod::Aes192Ctr, @@ -1896,7 +1896,7 @@ mod tests { importer .add( "2", - Iv::new_ctr().as_slice().to_owned(), + Iv::new_ctr().unwrap().as_slice().to_owned(), DataKey { key: key2.clone(), method: EncryptionMethod::Aes192Ctr, @@ -1918,7 +1918,7 @@ mod tests { let tmp_dir = tempfile::TempDir::new().unwrap(); let manager = new_key_manager_def(&tmp_dir, Some(EncryptionMethod::Aes192Ctr)).unwrap(); - let (_, key) = generate_data_key(EncryptionMethod::Aes192Ctr); + let (_, key) = generate_data_key(EncryptionMethod::Aes192Ctr).unwrap(); let file0 = manager.new_file("0").unwrap(); let now = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/components/encryption/src/master_key/file.rs b/components/encryption/src/master_key/file.rs index ad1bfb75a87..1b24a95e497 100644 --- a/components/encryption/src/master_key/file.rs +++ b/components/encryption/src/master_key/file.rs @@ -49,7 +49,7 @@ impl FileBackend { impl Backend for FileBackend { fn encrypt(&self, plaintext: &[u8]) -> Result { - let iv = Iv::new_gcm(); + let iv = Iv::new_gcm()?; self.backend.encrypt_content(plaintext, iv) } diff --git a/components/encryption/src/master_key/kms.rs b/components/encryption/src/master_key/kms.rs index 643cb08a0c6..db3c62194fd 100644 --- a/components/encryption/src/master_key/kms.rs +++ b/components/encryption/src/master_key/kms.rs @@ -158,7 +158,7 @@ impl KmsBackend { impl Backend for KmsBackend { fn encrypt(&self, plaintext: &[u8]) -> Result { - self.encrypt_content(plaintext, Iv::new_gcm()) + self.encrypt_content(plaintext, Iv::new_gcm()?) } fn decrypt(&self, content: &EncryptedContent) -> Result> { diff --git a/components/encryption/src/master_key/mem.rs b/components/encryption/src/master_key/mem.rs index 619acc38ebf..c19351f5dc7 100644 --- a/components/encryption/src/master_key/mem.rs +++ b/components/encryption/src/master_key/mem.rs @@ -128,7 +128,9 @@ mod tests { .unwrap(); let backend = MemAesGcmBackend::new(key).unwrap(); - let encrypted_content = backend.encrypt_content(&pt, Iv::new_gcm()).unwrap(); + let encrypted_content = backend + .encrypt_content(&pt, Iv::new_gcm().unwrap()) + .unwrap(); let plaintext = backend.decrypt_content(&encrypted_content).unwrap(); assert_eq!(plaintext, pt); diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 55da894c6e8..c378f0dbd90 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -68,7 +68,6 @@ raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } -rand = "0.8" resolved_ts = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index e09c0cd96de..60bbde91c31 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -10,6 +10,7 @@ base64 = "0.13" bstr = "0.2.8" byteorder = "1.2" codec = { workspace = true } +crypto = { workspace = true } file_system = { workspace = true } flate2 = { version = "=1.0.11", default-features = false, features = ["zlib"] } hex = "0.4" @@ -19,7 +20,6 @@ num = { version = "0.3", default-features = false } num-traits = "0.2" openssl = { workspace = true } protobuf = "2" -rand = "0.8.3" regex = "1.1" safemem = { version = "0.3", default-features = false } serde = "1.0" diff --git a/components/tidb_query_expr/src/impl_encryption.rs b/components/tidb_query_expr/src/impl_encryption.rs index 9c26826c03b..03686d3755e 100644 --- a/components/tidb_query_expr/src/impl_encryption.rs +++ b/components/tidb_query_expr/src/impl_encryption.rs @@ -3,13 +3,14 @@ use std::io::Read; use byteorder::{ByteOrder, LittleEndian}; +use crypto::rand; use flate2::{ read::{ZlibDecoder, ZlibEncoder}, Compression, }; use openssl::hash::{self, MessageDigest}; use tidb_query_codegen::rpn_fn; -use tidb_query_common::Result; +use tidb_query_common::{error::EvaluateError, Result}; use tidb_query_datatype::{ codec::data_type::*, expr::{Error, EvalContext}, @@ -190,9 +191,12 @@ pub fn random_bytes(_ctx: &mut EvalContext, arg: Option<&Int>) -> Result MAX_RAND_BYTES_LENGTH { return Err(Error::overflow("length", "random_bytes").into()); } - Ok(Some( - (0..*arg as usize).map(|_| rand::random::()).collect(), - )) + let len = *arg as usize; + let mut rand_bytes = vec![0; len]; + rand::rand_bytes(&mut rand_bytes).map_err(|_| { + EvaluateError::Other("SSL library can't generate random bytes".to_owned()) + })?; + Ok(Some(rand_bytes)) } _ => Ok(None), } diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index cdcfc4673c9..908f32db86f 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -32,7 +32,6 @@ use nix::{ sys::wait::{wait, WaitStatus}, unistd::{fork, ForkResult}, }; -use rand::rngs::ThreadRng; use crate::sys::thread::StdThreadBuildWrapper; @@ -134,38 +133,6 @@ pub fn slices_in_range(entry: &VecDeque, low: usize, high: usize) -> (&[T] } } -pub struct DefaultRng { - rng: ThreadRng, -} - -impl DefaultRng { - fn new() -> DefaultRng { - DefaultRng { - rng: rand::thread_rng(), - } - } -} - -impl Default for DefaultRng { - fn default() -> DefaultRng { - DefaultRng::new() - } -} - -impl Deref for DefaultRng { - type Target = ThreadRng; - - fn deref(&self) -> &ThreadRng { - &self.rng - } -} - -impl DerefMut for DefaultRng { - fn deref_mut(&mut self) -> &mut ThreadRng { - &mut self.rng - } -} - /// A handy shortcut to replace `RwLock` write/read().unwrap() pattern to /// shortcut wl and rl. pub trait HandyRwLock { diff --git a/scripts/check-bins.py b/scripts/check-bins.py index cbc748af958..cd5a4879f27 100644 --- a/scripts/check-bins.py +++ b/scripts/check-bins.py @@ -14,7 +14,7 @@ "online_config", "online_config_derive", "tidb_query_codegen", "panic_hook", "fuzz", "fuzzer_afl", "fuzzer_honggfuzz", "fuzzer_libfuzzer", "coprocessor_plugin_api", "example_coprocessor_plugin", "memory_trace_macros", "case_macros", - "tracker", "test_raftstore_macro", "fips" + "tracker", "test_raftstore_macro", "crypto" } JEMALLOC_SYMBOL = ["je_arena_boot", " malloc"] diff --git a/src/lib.rs b/src/lib.rs index b300399e30a..6d1cc515907 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { /// return the build version of tikv-server pub fn tikv_build_version() -> String { - if fips::can_enable() { + if crypto::fips::can_enable() { format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") } else { env!("CARGO_PKG_VERSION").to_owned() From 51a5af2fb72133a9e0483bb0a9d6fc4331231ba3 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 12 Dec 2023 18:52:48 +0800 Subject: [PATCH 1059/1149] server: Fix heap profile temp file is dropped before reading (#16171) close tikv/tikv#16169 Fix heap profile temp file is dropped before reading Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/server/status_server/mod.rs | 6 +++--- src/server/status_server/profile.rs | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 9a2bb5743ae..ff8909fa852 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -136,10 +136,11 @@ where let use_jeprof = query_pairs.get("jeprof").map(|x| x.as_ref()) == Some("true"); let result = { - let path = match dump_one_heap_profile() { - Ok(path) => path, + let file = match dump_one_heap_profile() { + Ok(file) => file, Err(e) => return Ok(make_response(StatusCode::INTERNAL_SERVER_ERROR, e)), }; + let path = file.path(); if use_jeprof { jeprof_heap_profile(path.to_str().unwrap()) } else { @@ -1561,7 +1562,6 @@ mod tests { #[cfg(feature = "mem-profiling")] #[test] - #[ignore] fn test_pprof_heap_service() { let mut status_server = StatusServer::new( 1, diff --git a/src/server/status_server/profile.rs b/src/server/status_server/profile.rs index 7d7e90741e4..582e02066f8 100644 --- a/src/server/status_server/profile.rs +++ b/src/server/status_server/profile.rs @@ -2,7 +2,6 @@ use std::{ fs::File, io::{Read, Write}, - path::PathBuf, pin::Pin, process::{Command, Stdio}, sync::Mutex, @@ -83,11 +82,11 @@ impl Future for ProfileRunner { } /// Trigger a heap profile and return the content. -pub fn dump_one_heap_profile() -> Result { +pub fn dump_one_heap_profile() -> Result { let f = NamedTempFile::new().map_err(|e| format!("create tmp file fail: {}", e))?; let path = f.path(); dump_prof(path.to_str().unwrap()).map_err(|e| format!("dump_prof: {}", e))?; - Ok(path.to_owned()) + Ok(f) } /// Trigger one cpu profile. From 820b220f4e915fd98ea28195a256ef1952f9328c Mon Sep 17 00:00:00 2001 From: ShuNing Date: Wed, 13 Dec 2023 10:33:19 +0800 Subject: [PATCH 1060/1149] coprocessor: add query digest tracing in tikv slow query (#16172) ref tikv/tikv#15513 coprocessor: add query digest tracing in tikv slow query Signed-off-by: nolouch --- src/coprocessor/tracker.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coprocessor/tracker.rs b/src/coprocessor/tracker.rs index f6502c2459e..cacf69d2c61 100644 --- a/src/coprocessor/tracker.rs +++ b/src/coprocessor/tracker.rs @@ -6,8 +6,10 @@ use ::tracker::{get_tls_tracker_token, with_tls_tracker}; use engine_traits::{PerfContext, PerfContextExt, PerfContextKind}; use kvproto::{kvrpcpb, kvrpcpb::ScanDetailV2}; use pd_client::BucketMeta; +use protobuf::Message; use tikv_kv::Engine; use tikv_util::time::{self, Duration, Instant}; +use tipb::ResourceGroupTag; use txn_types::Key; use super::metrics::*; @@ -266,9 +268,14 @@ impl Tracker { let source_stmt = self.req_ctx.context.get_source_stmt(); with_tls_tracker(|tracker| { + let mut req_tag = ResourceGroupTag::new(); + req_tag + .merge_from_bytes(&tracker.req_info.resource_group_tag) + .unwrap_or_default(); info!(#"slow_log", "slow-query"; "connection_id" => source_stmt.get_connection_id(), "session_alias" => source_stmt.get_session_alias(), + "query_digest" => hex::encode(req_tag.get_sql_digest()), "region_id" => &self.req_ctx.context.get_region_id(), "remote_host" => &self.req_ctx.peer, "total_lifetime" => ?self.req_lifetime, From 8e8c6ab6dbd41861d012aec0654f77ff678e4ef1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:00:20 +0800 Subject: [PATCH 1061/1149] In-memory Engine: integrate hybrid engine with TiKV (#16132) ref tikv/tikv#16141 Integrate hybrid engine with TiKV. User can choose to use hybrid engine by set `memory_engine_enabled` in TiKV config. Signed-off-by: SpadeA-Tang --- Cargo.lock | 25 +- Cargo.toml | 2 + components/engine_panic/src/misc.rs | 5 + components/engine_rocks/src/misc.rs | 5 + components/engine_traits/src/misc.rs | 3 + components/hybrid_engine/Cargo.toml | 3 +- components/hybrid_engine/src/engine.rs | 13 + components/hybrid_engine/src/lib.rs | 2 + components/hybrid_engine/src/misc.rs | 5 + .../raftstore/src/compacted_event_sender.rs | 18 +- .../region_cache_memory_engine/Cargo.toml | 13 + .../region_cache_memory_engine/src/engine.rs | 307 ++++++++++++++++++ .../region_cache_memory_engine/src/lib.rs | 7 + components/server/Cargo.toml | 2 + components/server/src/common.rs | 18 + components/server/src/server.rs | 112 +++++-- components/snap_recovery/src/init_cluster.rs | 16 +- .../src/region_meta_collector.rs | 35 +- components/snap_recovery/src/services.rs | 42 ++- src/config/mod.rs | 4 + 20 files changed, 566 insertions(+), 71 deletions(-) create mode 100644 components/region_cache_memory_engine/Cargo.toml create mode 100644 components/region_cache_memory_engine/src/engine.rs create mode 100644 components/region_cache_memory_engine/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0cd0c6cade1..bcbb57979ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2447,6 +2447,7 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" name = "hybrid_engine" version = "0.0.1" dependencies = [ + "engine_rocks", "engine_traits", "tikv_util", "txn_types", @@ -4439,6 +4440,15 @@ version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +[[package]] +name = "region_cache_memory_engine" +version = "0.0.1" +dependencies = [ + "collections", + "engine_traits", + "skiplist-rs", +] + [[package]] name = "remove_dir_all" version = "0.5.2" @@ -5104,6 +5114,7 @@ dependencies = [ "grpcio", "grpcio-health", "hex 0.4.2", + "hybrid_engine", "keys", "kvproto", "libc 0.2.146", @@ -5116,6 +5127,7 @@ dependencies = [ "raft_log_engine", "raftstore", "raftstore-v2", + "region_cache_memory_engine", "resolved_ts", "resource_control", "resource_metering", @@ -5195,6 +5207,16 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" +[[package]] +name = "skiplist-rs" +version = "0.1.0" +source = "git+https://github.com/tikv/skiplist-rs.git?branch=main#618af619d9348ef89eaa71c5f6fbddbd9a5c09bf" +dependencies = [ + "bytes", + "rand 0.8.5", + "slog", +] + [[package]] name = "slab" version = "0.4.2" @@ -6290,6 +6312,7 @@ dependencies = [ "raftstore-v2", "rand 0.7.3", "regex", + "region_cache_memory_engine", "reqwest", "resource_control", "resource_metering", @@ -6821,7 +6844,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 2d905a4115f..3e5efdd40c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -129,6 +129,7 @@ raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } rand = "0.7.3" regex = "1.3" +region_cache_memory_engine = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } rev_lines = "0.2.1" @@ -321,6 +322,7 @@ encryption_export = { path = "components/encryption/export" } engine_panic = { path = "components/engine_panic" } engine_rocks = { path = "components/engine_rocks" } hybrid_engine = { path = "components/hybrid_engine" } +region_cache_memory_engine = { path = "components/region_cache_memory_engine" } engine_rocks_helper = { path = "components/engine_rocks_helper" } engine_test = { path = "components/engine_test", default-features = false } engine_traits = { path = "components/engine_traits" } diff --git a/components/engine_panic/src/misc.rs b/components/engine_panic/src/misc.rs index 8da5c48d3e6..6ebecd58a09 100644 --- a/components/engine_panic/src/misc.rs +++ b/components/engine_panic/src/misc.rs @@ -129,4 +129,9 @@ impl MiscExt for PanicEngine { fn get_accumulated_flush_count_cf(cf: &str) -> Result { panic!() } + + type DiskEngine = PanicEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine { + panic!() + } } diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index b1406cacdb8..f82e1e68832 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -448,6 +448,11 @@ impl MiscExt for RocksEngine { .get(); Ok(n) } + + type DiskEngine = RocksEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine { + self + } } #[cfg(test)] diff --git a/components/engine_traits/src/misc.rs b/components/engine_traits/src/misc.rs index 7871b3b8ecc..ad93db44231 100644 --- a/components/engine_traits/src/misc.rs +++ b/components/engine_traits/src/misc.rs @@ -178,4 +178,7 @@ pub trait MiscExt: CfNamesExt + FlowControlFactorsExt + WriteBatchExt { } Ok(n) } + + type DiskEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine; } diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index f38604a10c1..e0be90b179e 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -10,4 +10,5 @@ testexport = [] [dependencies] engine_traits = { workspace = true } txn_types = { workspace = true } -tikv_util = { workspace = true } \ No newline at end of file +tikv_util = { workspace = true } +engine_rocks = { workspace = true } diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index deb544b91c6..072f1d028ff 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -42,6 +42,19 @@ where } } +impl HybridEngine +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + pub fn new(disk_engine: EK, region_cache_engine: EC) -> Self { + Self { + disk_engine, + region_cache_engine, + } + } +} + // todo: implement KvEngine methods as well as it's super traits. impl KvEngine for HybridEngine where diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs index 147fcc5a8a5..367d985b094 100644 --- a/components/hybrid_engine/src/lib.rs +++ b/components/hybrid_engine/src/lib.rs @@ -22,3 +22,5 @@ mod sst; mod table_properties; mod ttl_properties; mod write_batch; + +pub use engine::HybridEngine; diff --git a/components/hybrid_engine/src/misc.rs b/components/hybrid_engine/src/misc.rs index 9575344e65c..d761322ae76 100644 --- a/components/hybrid_engine/src/misc.rs +++ b/components/hybrid_engine/src/misc.rs @@ -124,4 +124,9 @@ where fn get_accumulated_flush_count_cf(cf: &str) -> Result { unimplemented!() } + + type DiskEngine = EK::DiskEngine; + fn get_disk_engine(&self) -> &Self::DiskEngine { + self.disk_engine().get_disk_engine() + } } diff --git a/components/raftstore/src/compacted_event_sender.rs b/components/raftstore/src/compacted_event_sender.rs index 99ba70a0512..736332b52c5 100644 --- a/components/raftstore/src/compacted_event_sender.rs +++ b/components/raftstore/src/compacted_event_sender.rs @@ -1,18 +1,26 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::sync::Mutex; -use engine_rocks::{CompactedEventSender, RocksCompactedEvent, RocksEngine}; -use engine_traits::RaftEngine; +use engine_rocks::{CompactedEventSender, RocksCompactedEvent}; +use engine_traits::{KvEngine, RaftEngine}; use tikv_util::error_unknown; use crate::store::{fsm::store::RaftRouter, StoreMsg}; // raftstore v1's implementation -pub struct RaftRouterCompactedEventSender { - pub router: Mutex>, +pub struct RaftRouterCompactedEventSender +where + EK: KvEngine, + ER: RaftEngine, +{ + pub router: Mutex>, } -impl CompactedEventSender for RaftRouterCompactedEventSender { +impl CompactedEventSender for RaftRouterCompactedEventSender +where + EK: KvEngine, + ER: RaftEngine, +{ fn send(&self, event: RocksCompactedEvent) { let router = self.router.lock().unwrap(); let event = StoreMsg::CompactedEvent(event); diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml new file mode 100644 index 00000000000..89ae317aa94 --- /dev/null +++ b/components/region_cache_memory_engine/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "region_cache_memory_engine" +version = "0.0.1" +edition = "2021" +publish = false + +[features] +testexport = [] + +[dependencies] +engine_traits = { workspace = true } +collections = { workspace = true } +skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs new file mode 100644 index 00000000000..ad16e7e8407 --- /dev/null +++ b/components/region_cache_memory_engine/src/engine.rs @@ -0,0 +1,307 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::BTreeMap, + fmt::{self, Debug}, + ops::Deref, + sync::{Arc, Mutex}, +}; + +use collections::HashMap; +use engine_traits::{ + CfNamesExt, DbVector, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, + RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, +}; +use skiplist_rs::{ByteWiseComparator, IterRef, Skiplist}; + +/// RegionMemoryEngine stores data for a specific cached region +/// +/// todo: The skiplist used here currently is for test purpose. Replace it +/// with a formal implementation. +#[derive(Clone)] +pub struct RegionMemoryEngine { + data: [Arc>; 3], +} + +impl Debug for RegionMemoryEngine { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +type SnapshotList = BTreeMap; + +#[derive(Default)] +pub struct RegionMemoryMeta { + // It records the snapshots that have been granted previsously with specific snapshot_ts. We + // should guarantee that the data visible to any one of the snapshot in it will not be removed. + snapshots: SnapshotList, + // It indicates whether the region is readable. False means integrity of the data in this + // cached region is not satisfied due to being evicted for instance. + can_read: bool, + // Request with read_ts below it is not eligible for granting snapshot. + // Note: different region can have different safe_ts. + safe_ts: u64, +} + +#[derive(Default)] +pub struct RegionCacheMemoryEngineCore { + engine: HashMap, + region_metats: HashMap, +} + +/// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in +/// the leaders' store. Incoming writes that are written to disk engine (now, +/// RocksDB) are also written to the RegionCacheMemoryEngine, leading to a +/// mirrored data set in the cached regions with the disk engine. +/// +/// A load/evict unit manages the memory, deciding which regions should be +/// evicted when the memory used by the RegionCacheMemoryEngine reaches a +/// certain limit, and determining which regions should be loaded when there is +/// spare memory capacity. +/// +/// The safe point lifetime differs between RegionCacheMemoryEngine and the disk +/// engine, often being much shorter in RegionCacheMemoryEngine. This means that +/// RegionCacheMemoryEngine may filter out some keys that still exist in the +/// disk engine, thereby improving read performance as fewer duplicated keys +/// will be read. If there's a need to read keys that may have been filtered by +/// RegionCacheMemoryEngine (as indicated by read_ts and safe_point of the +/// cached region), we resort to using a the disk engine's snapshot instead. +#[derive(Clone, Default)] +pub struct RegionCacheMemoryEngine { + core: Arc>, +} + +impl Debug for RegionCacheMemoryEngine { + fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +impl RegionCacheMemoryEngine { + pub fn new() -> Self { + RegionCacheMemoryEngine::default() + } +} + +impl RegionCacheEngine for RegionCacheMemoryEngine { + type Snapshot = RegionCacheSnapshot; + + fn snapshot(&self, region_id: u64, read_ts: u64) -> Option { + unimplemented!() + } +} + +// todo: fill fields needed +pub struct RegionCacheWriteBatch; + +impl WriteBatchExt for RegionCacheMemoryEngine { + type WriteBatch = RegionCacheWriteBatch; + // todo: adjust it + const WRITE_BATCH_MAX_KEYS: usize = 256; + + fn write_batch(&self) -> Self::WriteBatch { + RegionCacheWriteBatch {} + } + + fn write_batch_with_cap(&self, _: usize) -> Self::WriteBatch { + RegionCacheWriteBatch {} + } +} + +pub struct RegionCacheIterator { + valid: bool, + prefix_same_as_start: bool, + prefix: Option>, + iter: IterRef, ByteWiseComparator>, + lower_bound: Option>, + upper_bound: Option>, +} + +impl Iterable for RegionCacheMemoryEngine { + type Iterator = RegionCacheIterator; + + fn iterator(&self, cf: &str) -> Result { + unimplemented!() + } + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} + +impl Iterator for RegionCacheIterator { + fn key(&self) -> &[u8] { + unimplemented!() + } + + fn value(&self) -> &[u8] { + unimplemented!() + } + + fn next(&mut self) -> Result { + unimplemented!() + } + + fn prev(&mut self) -> Result { + unimplemented!() + } + + fn seek(&mut self, key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_for_prev(&mut self, key: &[u8]) -> Result { + unimplemented!() + } + + fn seek_to_first(&mut self) -> Result { + unimplemented!() + } + + fn seek_to_last(&mut self) -> Result { + unimplemented!() + } + + fn valid(&self) -> Result { + unimplemented!() + } +} + +impl WriteBatch for RegionCacheWriteBatch { + fn write_opt(&mut self, _: &WriteOptions) -> Result { + unimplemented!() + } + + fn data_size(&self) -> usize { + unimplemented!() + } + + fn count(&self) -> usize { + unimplemented!() + } + + fn is_empty(&self) -> bool { + unimplemented!() + } + + fn should_write_to_engine(&self) -> bool { + unimplemented!() + } + + fn clear(&mut self) { + unimplemented!() + } + + fn set_save_point(&mut self) { + unimplemented!() + } + + fn pop_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn rollback_to_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn merge(&mut self, _: Self) -> Result<()> { + unimplemented!() + } +} + +impl Mutable for RegionCacheWriteBatch { + fn put(&mut self, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&mut self, _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&mut self, _: &str, _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&mut self, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } +} + +#[derive(Clone, Debug)] +pub struct RegionCacheSnapshot { + region_id: u64, + snapshot_ts: u64, + engine: RegionMemoryEngine, +} + +impl Snapshot for RegionCacheSnapshot {} + +impl Iterable for RegionCacheSnapshot { + type Iterator = RegionCacheIterator; + + fn iterator(&self, cf: &str) -> Result { + unimplemented!() + } + + fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { + unimplemented!() + } +} + +impl Peekable for RegionCacheSnapshot { + type DbVector = RegionCacheDbVector; + + fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { + unimplemented!() + } + + fn get_value_cf_opt( + &self, + opts: &ReadOptions, + cf: &str, + key: &[u8], + ) -> Result> { + unimplemented!() + } +} + +impl CfNamesExt for RegionCacheSnapshot { + fn cf_names(&self) -> Vec<&str> { + unimplemented!() + } +} + +impl SnapshotMiscExt for RegionCacheSnapshot { + fn sequence_number(&self) -> u64 { + self.snapshot_ts + } +} + +// todo: fill fields needed +#[derive(Debug)] +pub struct RegionCacheDbVector; + +impl Deref for RegionCacheDbVector { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + unimplemented!() + } +} + +impl DbVector for RegionCacheDbVector {} + +impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { + fn eq(&self, rhs: &&[u8]) -> bool { + unimplemented!() + } +} diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs new file mode 100644 index 00000000000..d512847efb4 --- /dev/null +++ b/components/region_cache_memory_engine/src/lib.rs @@ -0,0 +1,7 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +#![allow(dead_code)] +#![allow(unused_variables)] + +mod engine; +pub use engine::RegionCacheMemoryEngine; diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index c378f0dbd90..9062a9f094e 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -56,6 +56,7 @@ futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } hex = "0.4" +hybrid_engine = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } libc = "0.2" @@ -68,6 +69,7 @@ raft = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true, features = ["engine_rocks"] } raftstore-v2 = { workspace = true } +region_cache_memory_engine = { workspace = true } resolved_ts = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } diff --git a/components/server/src/common.rs b/components/server/src/common.rs index c8cf879d905..a2415facad1 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -28,8 +28,10 @@ use engine_traits::{ use error_code::ErrorCodeExt; use file_system::{get_io_rate_limiter, set_io_rate_limiter, BytesFetcher, File, IoBudgetAdjustor}; use grpcio::Environment; +use hybrid_engine::HybridEngine; use pd_client::{PdClient, RpcClient}; use raft_log_engine::RaftLogEngine; +use region_cache_memory_engine::RegionCacheMemoryEngine; use security::SecurityManager; use tikv::{ config::{ConfigController, DbConfigManger, DbType, TikvConfig}, @@ -695,6 +697,22 @@ impl Stop for LazyWorker { } } +pub trait KvEngineBuilder: KvEngine { + fn build(disk_engine: RocksEngine) -> Self; +} + +impl KvEngineBuilder for RocksEngine { + fn build(disk_engine: RocksEngine) -> Self { + disk_engine + } +} + +impl KvEngineBuilder for HybridEngine { + fn build(_disk_engine: RocksEngine) -> Self { + unimplemented!() + } +} + pub trait ConfiguredRaftEngine: RaftEngine { fn build( _: &TikvConfig, diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 00ab39a0e6a..594eac686fe 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -30,16 +30,19 @@ use backup_stream::{ use causal_ts::CausalTsProviderImpl; use cdc::CdcConfigManager; use concurrency_manager::ConcurrencyManager; -use engine_rocks::{from_rocks_compression_type, RocksEngine, RocksStatistics}; +use engine_rocks::{ + from_rocks_compression_type, RocksCompactedEvent, RocksEngine, RocksStatistics, +}; use engine_rocks_helper::sst_recovery::{RecoveryRunner, DEFAULT_CHECK_INTERVAL}; use engine_traits::{ - Engines, KvEngine, MiscExt, RaftEngine, SingletonFactory, TabletContext, TabletRegistry, - CF_DEFAULT, CF_WRITE, + Engines, KvEngine, RaftEngine, SingletonFactory, TabletContext, TabletRegistry, CF_DEFAULT, + CF_WRITE, }; use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; use grpcio_health::HealthService; +use hybrid_engine::HybridEngine; use kvproto::{ brpb::create_backup, cdcpb::create_change_data, deadlock::create_deadlock, debugpb::create_debug, diagnosticspb::create_diagnostics, import_sstpb::create_import_sst, @@ -69,6 +72,7 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; +use region_cache_memory_engine::RegionCacheMemoryEngine; use resolved_ts::{LeadershipResolver, Task}; use resource_control::ResourceGroupManager; use security::SecurityManager; @@ -110,7 +114,7 @@ use tikv::{ use tikv_alloc::{add_thread_memory_accessor, remove_thread_memory_accessor}; use tikv_util::{ check_environment_variables, - config::VersionTrack, + config::{ReadableSize, VersionTrack}, memory::MemoryQuota, mpsc as TikvMpsc, quota_limiter::{QuotaLimitConfigManager, QuotaLimiter}, @@ -124,7 +128,10 @@ use tikv_util::{ use tokio::runtime::Builder; use crate::{ - common::{ConfiguredRaftEngine, EngineMetricsManager, EnginesResourceInfo, TikvServerCore}, + common::{ + ConfiguredRaftEngine, EngineMetricsManager, EnginesResourceInfo, KvEngineBuilder, + TikvServerCore, + }, memory::*, setup::*, signal_handler, @@ -132,12 +139,16 @@ use crate::{ }; #[inline] -fn run_impl( +fn run_impl( config: TikvConfig, service_event_tx: TikvMpsc::Sender, service_event_rx: TikvMpsc::Receiver, -) { - let mut tikv = TikvServer::::init(config, service_event_tx.clone()); +) where + EK: KvEngine + KvEngineBuilder, + CER: ConfiguredRaftEngine, + F: KvFormat, +{ + let mut tikv = TikvServer::::init(config, service_event_tx.clone()); // Must be called after `TikvServer::init`. let memory_limit = tikv.core.config.memory_usage_limit.unwrap().0; let high_water = (tikv.core.config.memory_usage_high_water * memory_limit as f64) as u64; @@ -209,9 +220,33 @@ pub fn run_tikv( dispatch_api_version!(config.storage.api_version(), { if !config.raft_engine.enable { - run_impl::(config, service_event_tx, service_event_rx) + if config.region_cache_memory_limit == ReadableSize(0) { + run_impl::( + config, + service_event_tx, + service_event_rx, + ) + } else { + run_impl::, RocksEngine, API>( + config, + service_event_tx, + service_event_rx, + ) + } } else { - run_impl::(config, service_event_tx, service_event_rx) + if config.region_cache_memory_limit == ReadableSize(0) { + run_impl::( + config, + service_event_tx, + service_event_rx, + ) + } else { + run_impl::, RaftLogEngine, API>( + config, + service_event_tx, + service_event_rx, + ) + } } }) } @@ -221,21 +256,26 @@ const DEFAULT_MEMTRACE_FLUSH_INTERVAL: Duration = Duration::from_millis(1_000); const DEFAULT_STORAGE_STATS_INTERVAL: Duration = Duration::from_secs(1); /// A complete TiKV server. -struct TikvServer { +struct TikvServer +where + EK: KvEngine, + ER: RaftEngine, + F: KvFormat, +{ core: TikvServerCore, cfg_controller: Option, security_mgr: Arc, pd_client: Arc, - router: RaftRouter, - system: Option>, + router: RaftRouter, + system: Option>, resolver: Option, snap_mgr: Option, // Will be filled in `init_servers`. - engines: Option>, + engines: Option>, kv_statistics: Option>, raft_statistics: Option>, - servers: Option>, + servers: Option>, region_info_accessor: RegionInfoAccessor, - coprocessor_host: Option>, + coprocessor_host: Option>, concurrency_manager: ConcurrencyManager, env: Arc, check_leader_worker: Worker, @@ -270,12 +310,13 @@ struct Servers { type LocalServer = Server>; type LocalRaftKv = RaftKv>; -impl TikvServer +impl TikvServer where + EK: KvEngine, ER: RaftEngine, F: KvFormat, { - fn init(mut config: TikvConfig, tx: TikvMpsc::Sender) -> TikvServer { + fn init(mut config: TikvConfig, tx: TikvMpsc::Sender) -> TikvServer { tikv_util::thread_group::set_properties(Some(GroupProperties::default())); // It is okay use pd config and security config before `init_config`, // because these configs must be provided by command line, and only @@ -436,7 +477,7 @@ where } } - fn init_engines(&mut self, engines: Engines) { + fn init_engines(&mut self, engines: Engines) { let store_meta = Arc::new(Mutex::new(StoreMeta::new(PENDING_MSG_CAP))); let engine = RaftKv::new( ServerRaftStoreRouter::new( @@ -458,9 +499,7 @@ where }); } - fn init_gc_worker( - &mut self, - ) -> GcWorker>> { + fn init_gc_worker(&mut self) -> GcWorker>> { let engines = self.engines.as_ref().unwrap(); let gc_worker = GcWorker::new( engines.engine.clone(), @@ -526,7 +565,7 @@ where if let Some(sst_worker) = &mut self.sst_worker { let sst_runner = RecoveryRunner::new( - engines.engines.kv.clone(), + engines.engines.kv.get_disk_engine().clone(), engines.store_meta.clone(), self.core .config @@ -1041,7 +1080,10 @@ where // Create Debugger. let mut debugger = DebuggerImpl::new( - engines.engines.clone(), + Engines::new( + engines.engines.kv.get_disk_engine().clone(), + engines.engines.raft.clone(), + ), self.cfg_controller.as_ref().unwrap().clone(), Some(storage), ); @@ -1163,7 +1205,7 @@ where let mut backup_worker = Box::new(self.core.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); let backup_service = - backup::Service::::with_router(backup_scheduler, self.router.clone()); + backup::Service::::with_router(backup_scheduler, self.router.clone()); if servers .server .register_service(create_backup(backup_service)) @@ -1282,7 +1324,7 @@ where ); } - fn init_storage_stats_task(&self, engines: Engines) { + fn init_storage_stats_task(&self, engines: Engines) { let config_disk_capacity: u64 = self.core.config.raft_store.capacity.0; let data_dir = self.core.config.storage.data_dir.clone(); let store_path = self.core.store_path.clone(); @@ -1509,11 +1551,16 @@ where } } -impl TikvServer { +impl TikvServer +where + EK: KvEngine + KvEngineBuilder, + CER: ConfiguredRaftEngine, + F: KvFormat, +{ fn init_raw_engines( &mut self, flow_listener: engine_rocks::FlowListener, - ) -> (Engines, Arc) { + ) -> (Engines, Arc) { let block_cache = self.core.config.storage.block_cache.build_shared_cache(); let env = self .core @@ -1547,23 +1594,24 @@ impl TikvServer { .sst_recovery_sender(self.init_sst_recovery_sender()) .flow_listener(flow_listener); let factory = Box::new(builder.build()); - let kv_engine = factory + let disk_engine = factory .create_shared_db(&self.core.store_path) .unwrap_or_else(|s| fatal!("failed to create kv engine: {}", s)); + let kv_engine: EK = KvEngineBuilder::build(disk_engine.clone()); self.kv_statistics = Some(factory.rocks_statistics()); - let engines = Engines::new(kv_engine.clone(), raft_engine); + let engines = Engines::new(kv_engine, raft_engine); let cfg_controller = self.cfg_controller.as_mut().unwrap(); cfg_controller.register( tikv::config::Module::Rocksdb, Box::new(DbConfigManger::new( cfg_controller.get_current().rocksdb, - kv_engine.clone(), + disk_engine.clone(), DbType::Kv, )), ); let reg = TabletRegistry::new( - Box::new(SingletonFactory::new(kv_engine)), + Box::new(SingletonFactory::new(disk_engine)), &self.core.store_path, ) .unwrap(); diff --git a/components/snap_recovery/src/init_cluster.rs b/components/snap_recovery/src/init_cluster.rs index 7ece321d9dd..c6a14c1e0d3 100644 --- a/components/snap_recovery/src/init_cluster.rs +++ b/components/snap_recovery/src/init_cluster.rs @@ -3,8 +3,8 @@ use std::{cmp, error::Error as StdError, i32, result, sync::Arc, thread, time::Duration}; use encryption_export::data_key_manager_from_config; -use engine_rocks::{util::new_engine_opt, RocksEngine}; -use engine_traits::{Engines, Error as EngineError, Peekable, RaftEngine, SyncMutable}; +use engine_rocks::util::new_engine_opt; +use engine_traits::{Engines, Error as EngineError, KvEngine, RaftEngine}; use kvproto::{metapb, raft_serverpb::StoreIdent}; use pd_client::{Error as PdError, PdClient}; use raft_log_engine::RaftLogEngine; @@ -251,21 +251,21 @@ pub trait LocalEngineService { } // init engine and read local engine info -pub struct LocalEngines { - engines: Engines, +pub struct LocalEngines { + engines: Engines, } -impl LocalEngines { - pub fn new(engines: Engines) -> LocalEngines { +impl LocalEngines { + pub fn new(engines: Engines) -> LocalEngines { LocalEngines { engines } } - pub fn get_engine(&self) -> &Engines { + pub fn get_engine(&self) -> &Engines { &self.engines } } -impl LocalEngineService for LocalEngines { +impl LocalEngineService for LocalEngines { fn set_cluster_id(&self, cluster_id: u64) { let res = self .get_engine() diff --git a/components/snap_recovery/src/region_meta_collector.rs b/components/snap_recovery/src/region_meta_collector.rs index e3542d6691b..3a88931fae4 100644 --- a/components/snap_recovery/src/region_meta_collector.rs +++ b/components/snap_recovery/src/region_meta_collector.rs @@ -2,8 +2,7 @@ use std::{cell::RefCell, error::Error as StdError, result, thread::JoinHandle}; -use engine_rocks::RocksEngine; -use engine_traits::{Engines, Iterable, Peekable, RaftEngine, CF_RAFT}; +use engine_traits::{Engines, KvEngine, RaftEngine, CF_RAFT}; use futures::channel::mpsc::UnboundedSender; use kvproto::{ raft_serverpb::{PeerState, RaftApplyState, RaftLocalState, RegionLocalState}, @@ -30,9 +29,13 @@ pub enum Error { } /// `RegionMetaCollector` is the collector that collector all region meta -pub struct RegionMetaCollector { +pub struct RegionMetaCollector +where + EK: KvEngine, + ER: RaftEngine, +{ /// The engine we are working on - engines: Engines, + engines: Engines, /// region meta report to br tx: UnboundedSender, /// Current working workers @@ -40,8 +43,12 @@ pub struct RegionMetaCollector { } #[allow(dead_code)] -impl RegionMetaCollector { - pub fn new(engines: Engines, tx: UnboundedSender) -> Self { +impl RegionMetaCollector +where + EK: KvEngine, + ER: RaftEngine, +{ + pub fn new(engines: Engines, tx: UnboundedSender) -> Self { RegionMetaCollector { engines, tx, @@ -74,14 +81,22 @@ impl RegionMetaCollector { } } -struct CollectWorker { +struct CollectWorker +where + EK: KvEngine, + ER: RaftEngine, +{ /// The engine we are working on - engines: Engines, + engines: Engines, tx: UnboundedSender, } -impl CollectWorker { - pub fn new(engines: Engines, tx: UnboundedSender) -> Self { +impl CollectWorker +where + EK: KvEngine, + ER: RaftEngine, +{ + pub fn new(engines: Engines, tx: UnboundedSender) -> Self { CollectWorker { engines, tx } } diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index daf6e7ed30f..6bf706e158f 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -19,7 +19,7 @@ use engine_rocks::{ util::get_cf_handle, RocksEngine, }; -use engine_traits::{CfNamesExt, CfOptionsExt, Engines, Peekable, RaftEngine}; +use engine_traits::{CfNamesExt, CfOptionsExt, Engines, KvEngine, RaftEngine}; use futures::{ channel::mpsc, executor::{ThreadPool, ThreadPoolBuilder}, @@ -67,11 +67,16 @@ pub enum Error { #[error("{0:?}")] Other(#[from] Box), } + /// Service handles the recovery messages from backup restore. #[derive(Clone)] -pub struct RecoveryService { - engines: Engines, - router: RaftRouter, +pub struct RecoveryService +where + EK: KvEngine, + ER: RaftEngine, +{ + engines: Engines, + router: RaftRouter, threads: ThreadPool, /// The handle to last call of recover region RPC. @@ -113,13 +118,14 @@ impl RecoverRegionState { } } -impl RecoveryService { +impl RecoveryService +where + EK: KvEngine, + ER: RaftEngine, +{ /// Constructs a new `Service` with `Engines`, a `RaftStoreRouter` and a /// `thread pool`. - pub fn new( - engines: Engines, - router: RaftRouter, - ) -> RecoveryService { + pub fn new(engines: Engines, router: RaftRouter) -> RecoveryService { let props = tikv_util::thread_group::current_properties(); let threads = ThreadPoolBuilder::new() .pool_size(4) @@ -136,7 +142,7 @@ impl RecoveryService { // config rocksdb l0 to optimize the restore // also for massive data applied during the restore, it easy to reach the write // stop - let db = engines.kv.clone(); + let db: &RocksEngine = engines.kv.get_disk_engine(); for cf_name in db.cf_names() { Self::set_db_options(cf_name, db.clone()).expect("set db option failure"); } @@ -218,7 +224,7 @@ impl RecoveryService { // a new wait apply syncer share with all regions, // when all region reached the target index, share reference decreased to 0, // trigger closure to send finish info back. - pub fn wait_apply_last(router: RaftRouter, sender: SyncSender) { + pub fn wait_apply_last(router: RaftRouter, sender: SyncSender) { let wait_apply = SnapshotRecoveryWaitApplySyncer::new(0, sender); router.broadcast_normal(|| { PeerMsg::SignificantMsg(SignificantMsg::SnapshotRecoveryWaitApply( @@ -261,7 +267,11 @@ fn compact(engine: RocksEngine) -> Result<()> { Ok(()) } -impl RecoverData for RecoveryService { +impl RecoverData for RecoveryService +where + EK: KvEngine, + ER: RaftEngine, +{ // 1. br start to ready region meta fn read_region_meta( &mut self, @@ -444,10 +454,14 @@ impl RecoverData for RecoveryService { // implement a resolve/delete data funciton let resolved_ts = req.get_resolved_ts(); let (tx, rx) = mpsc::unbounded(); - let resolver = DataResolverManager::new(self.engines.kv.clone(), tx, resolved_ts.into()); + let resolver = DataResolverManager::new( + self.engines.kv.get_disk_engine().clone(), + tx, + resolved_ts.into(), + ); info!("start to resolve kv data"); resolver.start(); - let db = self.engines.kv.clone(); + let db = self.engines.kv.get_disk_engine().clone(); let store_id = self.get_store_id(); let send_task = async move { let id = store_id?; diff --git a/src/config/mod.rs b/src/config/mod.rs index aac91aaaedf..c0c2a679b5a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -3400,6 +3400,9 @@ pub struct TikvConfig { #[online_config(skip)] pub memory_usage_high_water: f64, + // Memory quota used for in-memory engine. 0 means not enable it. + pub region_cache_memory_limit: ReadableSize, + #[online_config(submodule)] pub log: LogConfig, @@ -3499,6 +3502,7 @@ impl Default for TikvConfig { abort_on_panic: false, memory_usage_limit: None, memory_usage_high_water: 0.9, + region_cache_memory_limit: ReadableSize::mb(0), log: LogConfig::default(), memory: MemoryConfig::default(), quota: QuotaConfig::default(), From ae8902d026479425c84e4e0f55fa96936609f297 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Wed, 13 Dec 2023 18:25:49 +0800 Subject: [PATCH 1062/1149] cloud: update gcs sdk to support fips 140 (#16177) close tikv/tikv#16097 update cloud sdk Signed-off-by: Leavrth Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 32 ++------------------------------ Cargo.toml | 3 +++ 2 files changed, 5 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bcbb57979ee..44cc3d1d1c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4629,21 +4629,6 @@ dependencies = [ "syn 1.0.103", ] -[[package]] -name = "ring" -version = "0.16.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b72b84d47e8ec5a4f2872e8262b8f8256c5be1c938a7d6d3a867a3ba8f722f74" -dependencies = [ - "cc", - "libc 0.2.146", - "once_cell", - "spin", - "untrusted", - "web-sys", - "winapi 0.3.9", -] - [[package]] name = "rocksdb" version = "0.3.0" @@ -5378,12 +5363,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "sst_importer" version = "0.1.0" @@ -5622,15 +5601,14 @@ dependencies = [ [[package]] name = "tame-oauth" version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9435c9348e480fad0f2215d5602e2dfad03df8a6398c4e7ceaeaa42758f26a8a" +source = "git+https://github.com/tikv/tame-oauth?branch=fips#176e3c69e9b5cd04b4248824ae6ee38ef57385be" dependencies = [ "base64 0.13.0", "chrono", "http", "lock_api", + "openssl", "parking_lot 0.11.1", - "ring", "serde", "serde_json", "twox-hash", @@ -6928,12 +6906,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "url" version = "2.2.2" diff --git a/Cargo.toml b/Cargo.toml index 3e5efdd40c3..7bf16f3e092 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -199,6 +199,9 @@ rusoto_mock = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr rusoto_s3 = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_sts = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } +# NOTICE: use openssl for signature to support fips 140 +tame-oauth = { git = "https://github.com/tikv/tame-oauth", branch = "fips" } + snappy-sys = { git = "https://github.com/busyjay/rust-snappy.git", branch = "static-link" } # remove this when https://github.com/danburkert/fs2-rs/pull/42 is merged. From f9727af132109754e63fbb4910b73563d0b1da45 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 14 Dec 2023 14:40:50 +0800 Subject: [PATCH 1063/1149] In-memory Engine: implement read flow part 1 (#16163) ref tikv/tikv#16141 implement in-memory engine read flow -- part 1 Signed-off-by: SpadeA-Tang --- Cargo.lock | 2 + components/backup-stream/src/utils.rs | 2 +- components/cdc/src/observer.rs | 3 +- components/cdc/src/old_value.rs | 6 +- components/engine_panic/src/engine.rs | 4 +- components/engine_rocks/src/engine.rs | 8 +- components/engine_traits/src/engine.rs | 10 +- components/engine_traits/src/errors.rs | 3 + .../engine_traits_tests/src/iterator.rs | 40 +- .../src/read_consistency.rs | 6 +- .../engine_traits_tests/src/snapshot_basic.rs | 8 +- components/error_code/src/engine.rs | 3 +- components/hybrid_engine/src/engine.rs | 6 +- .../src/operation/query/capture.rs | 2 +- .../raftstore-v2/src/operation/query/local.rs | 6 +- .../src/store/async_io/write_tests.rs | 4 +- components/raftstore/src/store/fsm/apply.rs | 6 +- components/raftstore/src/store/peer.rs | 2 +- .../raftstore/src/store/peer_storage.rs | 4 +- .../raftstore/src/store/region_snapshot.rs | 2 +- components/raftstore/src/store/snap.rs | 16 +- components/raftstore/src/store/snap/io.rs | 4 +- .../src/store/worker/consistency_check.rs | 2 +- components/raftstore/src/store/worker/read.rs | 4 +- .../raftstore/src/store/worker/region.rs | 4 +- .../region_cache_memory_engine/Cargo.toml | 2 + .../region_cache_memory_engine/src/engine.rs | 521 ++++++++++++++++-- .../region_cache_memory_engine/src/lib.rs | 2 + components/tikv_kv/src/rocksdb_engine.rs | 2 +- src/server/node.rs | 2 +- tests/benches/misc/raftkv/mod.rs | 4 +- tests/integrations/raftstore/test_snap.rs | 2 +- tests/integrations/storage/test_titan.rs | 4 +- 33 files changed, 597 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44cc3d1d1c7..335c4a5301f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4444,9 +4444,11 @@ checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" name = "region_cache_memory_engine" version = "0.0.1" dependencies = [ + "bytes", "collections", "engine_traits", "skiplist-rs", + "tikv_util", ] [[package]] diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 974b1762cf2..ed8b7579587 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -996,7 +996,7 @@ mod test { let (items, size) = super::with_record_read_throughput(|| { let mut items = vec![]; - let snap = engine.snapshot(); + let snap = engine.snapshot(None); snap.scan(CF_DEFAULT, b"", b"", false, |k, v| { items.push((k.to_owned(), v.to_owned())); Ok(true) diff --git a/components/cdc/src/observer.rs b/components/cdc/src/observer.rs index cfcedfeb59d..965a31ac7ff 100644 --- a/components/cdc/src/observer.rs +++ b/components/cdc/src/observer.rs @@ -121,7 +121,8 @@ impl CmdObserver for CdcObserver { // Create a snapshot here for preventing the old value was GC-ed. // TODO: only need it after enabling old value, may add a flag to indicate // whether to get it. - let snapshot = RegionSnapshot::from_snapshot(Arc::new(engine.snapshot()), Arc::new(region)); + let snapshot = + RegionSnapshot::from_snapshot(Arc::new(engine.snapshot(None)), Arc::new(region)); let get_old_value = move |key, query_ts, old_value_cache: &mut OldValueCache, diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index e343ccc226f..02f1bd00507 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -308,7 +308,7 @@ mod tests { value: Option, ) -> Statistics { let key = key.clone().append_ts(ts.into()); - let snapshot = Arc::new(kv_engine.snapshot()); + let snapshot = Arc::new(kv_engine.snapshot(None)); let mut cursor = new_write_cursor_on_key(&snapshot, &key); let load_default = Either::Left(&snapshot); let mut stats = Statistics::default(); @@ -527,7 +527,7 @@ mod tests { must_commit(&mut engine, &key, 200, 201); } - let snapshot = Arc::new(kv_engine.snapshot()); + let snapshot = Arc::new(kv_engine.snapshot(None)); let mut cursor = new_old_value_cursor(&snapshot, CF_WRITE); let mut default_cursor = new_old_value_cursor(&snapshot, CF_DEFAULT); let mut load_default = |use_default_cursor: bool| { @@ -598,7 +598,7 @@ mod tests { } let key = format!("zkey-{:0>3}", 0).into_bytes(); - let snapshot = Arc::new(kv_engine.snapshot()); + let snapshot = Arc::new(kv_engine.snapshot(None)); let perf_instant = ReadPerfInstant::new(); let value = get_old_value( &snapshot, diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index b5ce0d1516e..3f75d64f78d 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -1,7 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SyncMutable, + IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, WriteOptions, }; @@ -13,7 +13,7 @@ pub struct PanicEngine; impl KvEngine for PanicEngine { type Snapshot = PanicSnapshot; - fn snapshot(&self) -> Self::Snapshot { + fn snapshot(&self, _: Option) -> Self::Snapshot { panic!() } fn sync(&self) -> Result<()> { diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index 293b74e3bca..e0eed00ad53 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -2,7 +2,9 @@ use std::{any::Any, sync::Arc}; -use engine_traits::{IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SyncMutable}; +use engine_traits::{ + IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, +}; use rocksdb::{DBIterator, Writable, DB}; use crate::{ @@ -182,7 +184,7 @@ impl RocksEngine { impl KvEngine for RocksEngine { type Snapshot = RocksSnapshot; - fn snapshot(&self) -> RocksSnapshot { + fn snapshot(&self, _: Option) -> RocksSnapshot { RocksSnapshot::new(self.db.clone()) } @@ -292,7 +294,7 @@ mod tests { engine.put_msg(key, &r).unwrap(); engine.put_msg_cf(cf, key, &r).unwrap(); - let snap = engine.snapshot(); + let snap = engine.snapshot(None); let mut r1: Region = engine.get_msg(key).unwrap().unwrap(); assert_eq!(r, r1); diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index cc90f2ce075..44539f19419 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -39,7 +39,10 @@ pub trait KvEngine: type Snapshot: Snapshot; /// Create a snapshot - fn snapshot(&self) -> Self::Snapshot; + /// + /// SnapCtx will only be used by some type of trait implementors (ex: + /// HybridEngine) + fn snapshot(&self, snap_ctx: Option) -> Self::Snapshot; /// Syncs any writes to disk fn sync(&self) -> Result<()>; @@ -78,3 +81,8 @@ pub trait KvEngine: #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize; } + +pub struct SnapCtx { + pub region_id: u64, + pub read_ts: u64, +} diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index 6ef46ff7a70..6df2ef5a992 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -149,6 +149,8 @@ pub enum Error { EntriesUnavailable, #[error("The entries of region is compacted")] EntriesCompacted, + #[error("Iterator of RegionCacheSnapshot is only supported with boundary set")] + BoundaryNotSet, } pub type Result = result::Result; @@ -165,6 +167,7 @@ impl ErrorCodeExt for Error { Error::Other(_) => error_code::UNKNOWN, Error::EntriesUnavailable => error_code::engine::DATALOSS, Error::EntriesCompacted => error_code::engine::DATACOMPACTED, + Error::BoundaryNotSet => error_code::engine::BOUNDARY_NOT_SET, } } } diff --git a/components/engine_traits_tests/src/iterator.rs b/components/engine_traits_tests/src/iterator.rs index 714ca4cb0b4..fee6cda6f02 100644 --- a/components/engine_traits_tests/src/iterator.rs +++ b/components/engine_traits_tests/src/iterator.rs @@ -41,7 +41,9 @@ fn iter_empty_engine() { #[test] fn iter_empty_snapshot() { let db = default_engine(); - iter_empty(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_empty(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_forward(e: &E, i: IF) @@ -99,7 +101,9 @@ fn iter_forward_engine() { #[test] fn iter_forward_snapshot() { let db = default_engine(); - iter_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_forward(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_reverse(e: &E, i: IF) @@ -157,7 +161,9 @@ fn iter_reverse_engine() { #[test] fn iter_reverse_snapshot() { let db = default_engine(); - iter_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_reverse(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn seek_to_key_then_forward(e: &E, i: IF) @@ -198,7 +204,9 @@ fn seek_to_key_then_forward_engine() { #[test] fn seek_to_key_then_forward_snapshot() { let db = default_engine(); - seek_to_key_then_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_to_key_then_forward(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn seek_to_key_then_reverse(e: &E, i: IF) @@ -239,7 +247,9 @@ fn seek_to_key_then_reverse_engine() { #[test] fn seek_to_key_then_reverse_snapshot() { let db = default_engine(); - seek_to_key_then_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_to_key_then_reverse(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_forward_then_reverse(e: &E, i: IF) @@ -300,7 +310,9 @@ fn iter_forward_then_reverse_engine() { #[test] fn iter_forward_then_reverse_snapshot() { let db = default_engine(); - iter_forward_then_reverse(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_forward_then_reverse(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn iter_reverse_then_forward(e: &E, i: IF) @@ -361,7 +373,9 @@ fn iter_reverse_then_forward_engine() { #[test] fn iter_reverse_then_forward_snapshot() { let db = default_engine(); - iter_reverse_then_forward(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iter_reverse_then_forward(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } // When seek finds an exact key then seek_for_prev behaves just like seek @@ -405,7 +419,9 @@ fn seek_for_prev_engine() { #[test] fn seek_for_prev_snapshot() { let db = default_engine(); - seek_for_prev(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_for_prev(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } // When Seek::Key doesn't find an exact match, @@ -440,7 +456,9 @@ fn seek_key_miss_engine() { #[test] fn seek_key_miss_snapshot() { let db = default_engine(); - seek_key_miss(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_key_miss(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } fn seek_key_prev_miss(e: &E, i: IF) @@ -472,5 +490,7 @@ fn seek_key_prev_miss_engine() { #[test] fn seek_key_prev_miss_snapshot() { let db = default_engine(); - seek_key_prev_miss(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + seek_key_prev_miss(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } diff --git a/components/engine_traits_tests/src/read_consistency.rs b/components/engine_traits_tests/src/read_consistency.rs index 8c7ab50657f..35d0262fbcb 100644 --- a/components/engine_traits_tests/src/read_consistency.rs +++ b/components/engine_traits_tests/src/read_consistency.rs @@ -12,7 +12,7 @@ fn snapshot_with_writes() { db.engine.put(b"a", b"aa").unwrap(); - let snapshot = db.engine.snapshot(); + let snapshot = db.engine.snapshot(None); assert_eq!(snapshot.get_value(b"a").unwrap().unwrap(), b"aa"); @@ -77,5 +77,7 @@ fn iterator_with_writes_engine() { #[test] fn iterator_with_writes_snapshot() { let db = default_engine(); - iterator_with_writes(&db.engine, |e| e.snapshot().iterator(CF_DEFAULT).unwrap()); + iterator_with_writes(&db.engine, |e| { + e.snapshot(None).iterator(CF_DEFAULT).unwrap() + }); } diff --git a/components/engine_traits_tests/src/snapshot_basic.rs b/components/engine_traits_tests/src/snapshot_basic.rs index c0f93480830..83248abfb6e 100644 --- a/components/engine_traits_tests/src/snapshot_basic.rs +++ b/components/engine_traits_tests/src/snapshot_basic.rs @@ -10,7 +10,7 @@ fn snapshot_get_value() { db.engine.put(b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); let value = snap.get_value(b"a").unwrap(); let value = value.unwrap(); @@ -26,7 +26,7 @@ fn snapshot_get_value_after_put() { db.engine.put(b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); db.engine.put(b"a", b"aaa").unwrap(); @@ -41,7 +41,7 @@ fn snapshot_get_value_cf() { db.engine.put_cf(CF_WRITE, b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); let value = snap.get_value_cf(CF_WRITE, b"a").unwrap(); let value = value.unwrap(); @@ -57,7 +57,7 @@ fn snapshot_get_value_cf_after_put() { db.engine.put_cf(CF_WRITE, b"a", b"aa").unwrap(); - let snap = db.engine.snapshot(); + let snap = db.engine.snapshot(None); db.engine.put_cf(CF_WRITE, b"a", b"aaa").unwrap(); diff --git a/components/error_code/src/engine.rs b/components/error_code/src/engine.rs index 4bb66f09753..4ae712ffa58 100644 --- a/components/error_code/src/engine.rs +++ b/components/error_code/src/engine.rs @@ -10,5 +10,6 @@ define_error_codes!( CF_NAME => ("CfName", "", ""), CODEC => ("Codec", "", ""), DATALOSS => ("DataLoss", "", ""), - DATACOMPACTED => ("DataCompacted", "", "") + DATACOMPACTED => ("DataCompacted", "", ""), + BOUNDARY_NOT_SET => ("BoundaryNotSet", "", "") ); diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index 072f1d028ff..6ccb223baf1 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -1,6 +1,8 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SyncMutable}; +use engine_traits::{ + KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapCtx, SyncMutable, +}; use crate::snapshot::HybridEngineSnapshot; @@ -63,7 +65,7 @@ where { type Snapshot = HybridEngineSnapshot; - fn snapshot(&self) -> Self::Snapshot { + fn snapshot(&self, _: Option) -> Self::Snapshot { unimplemented!() } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index 5dd43f14e19..bc7e93a394b 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -116,7 +116,7 @@ impl Apply { self.flush(); let (applied_index, _) = self.apply_progress(); let snap = RegionSnapshot::from_snapshot( - Arc::new(self.tablet().snapshot()), + Arc::new(self.tablet().snapshot(None)), Arc::new(self.region().clone()), ); snap.set_apply_index(applied_index); diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index ea802650f3d..1829628ae48 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -209,7 +209,7 @@ where ReadRequestPolicy::ReadLocal => { let region = Arc::clone(&delegate.region); let snap = RegionSnapshot::from_snapshot( - Arc::new(delegate.cached_tablet.cache().snapshot()), + Arc::new(delegate.cached_tablet.cache().snapshot(None)), region, ); @@ -240,7 +240,7 @@ where let region = Arc::clone(&delegate.region); let snap = RegionSnapshot::from_snapshot( - Arc::new(delegate.cached_tablet.cache().snapshot()), + Arc::new(delegate.cached_tablet.cache().snapshot(None)), region, ); @@ -264,7 +264,7 @@ where let region = Arc::clone(&delegate.region); let snap = RegionSnapshot::from_snapshot( - Arc::new(delegate.cached_tablet.cache().snapshot()), + Arc::new(delegate.cached_tablet.cache().snapshot(None)), region, ); diff --git a/components/raftstore/src/store/async_io/write_tests.rs b/components/raftstore/src/store/async_io/write_tests.rs index 24abf24c4fd..97e865a6bfe 100644 --- a/components/raftstore/src/store/async_io/write_tests.rs +++ b/components/raftstore/src/store/async_io/write_tests.rs @@ -330,7 +330,7 @@ fn test_worker() { t.worker.write_to_db(true); - let snapshot = engines.kv.snapshot(); + let snapshot = engines.kv.snapshot(None); assert_eq!(snapshot.get_value(b"kv_k1").unwrap().unwrap(), b"kv_v1"); assert_eq!(snapshot.get_value(b"kv_k2").unwrap().unwrap(), b"kv_v2"); assert_eq!(snapshot.get_value(b"kv_k3").unwrap().unwrap(), b"kv_v3"); @@ -536,7 +536,7 @@ fn test_basic_flow() { must_wait_same_notifies(vec![(region_1, (1, 15)), (region_2, (2, 20))], &t.notify_rx); - let snapshot = engines.kv.snapshot(); + let snapshot = engines.kv.snapshot(None); assert!(snapshot.get_value(b"kv_k1").unwrap().is_none()); assert_eq!(snapshot.get_value(b"kv_k2").unwrap().unwrap(), b"kv_v2"); assert_eq!(snapshot.get_value(b"kv_k3").unwrap().unwrap(), b"kv_v3"); diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index f70e0a31181..221e5b1dcea 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -3244,7 +3244,7 @@ where // open files in rocksdb. // TODO: figure out another way to do consistency check without snapshot // or short life snapshot. - snap: ctx.engine.snapshot(), + snap: ctx.engine.snapshot(None), }) }, )) @@ -4198,7 +4198,7 @@ where } if let Err(e) = snap_task.generate_and_schedule_snapshot::( - apply_ctx.engine.snapshot(), + apply_ctx.engine.snapshot(None), self.delegate.applied_term, self.delegate.apply_state.clone(), &apply_ctx.region_scheduler, @@ -4270,7 +4270,7 @@ where ReadResponse { response: Default::default(), snapshot: Some(RegionSnapshot::from_snapshot( - Arc::new(apply_ctx.engine.snapshot()), + Arc::new(apply_ctx.engine.snapshot(None)), Arc::new(self.delegate.region.clone()), )), txn_extra_op: TxnExtraOp::Noop, diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 2d304490bb7..382b9e53b4b 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -5736,7 +5736,7 @@ where } fn get_snapshot(&mut self, _: &Option>) -> Arc { - Arc::new(self.engines.kv.snapshot()) + Arc::new(self.engines.kv.snapshot(None)) } } diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index a888929ca98..44ae3543e95 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -372,7 +372,7 @@ where #[inline] pub fn raw_snapshot(&self) -> EK::Snapshot { - self.engines.kv.snapshot() + self.engines.kv.snapshot(None) } #[inline] @@ -1607,7 +1607,7 @@ pub mod tests { .unwrap() .unwrap(); gen_task.generate_and_schedule_snapshot::( - engines.kv.clone().snapshot(), + engines.kv.clone().snapshot(None), entry.get_term(), apply_state, sched, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index bc22dfbf586..1c2c6251eba 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -59,7 +59,7 @@ where where EK: KvEngine, { - RegionSnapshot::from_snapshot(Arc::new(db.snapshot()), Arc::new(region)) + RegionSnapshot::from_snapshot(Arc::new(db.snapshot(None)), Arc::new(region)) } pub fn from_snapshot(snap: Arc, region: Arc) -> RegionSnapshot { diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index 6976f4614df..e7e7c6ccb10 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -2639,7 +2639,7 @@ pub mod tests { .tempdir() .unwrap(); let db = get_db(src_db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let src_dir = Builder::new() .prefix("test-snap-file-db-src") @@ -2747,7 +2747,7 @@ pub mod tests { .tempdir() .unwrap(); let db = get_db(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let dir = Builder::new() .prefix("test-snap-validation") @@ -2900,7 +2900,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let dir = Builder::new() .prefix("test-snap-corruption") @@ -2975,7 +2975,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db_with_100keys(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let dir = Builder::new() .prefix("test-snap-corruption-meta") @@ -3056,7 +3056,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let key1 = SnapKey::new(1, 1, 1); let mgr_core = create_manager_core(&path, u64::MAX); let mut s1 = Snapshot::new_for_building(&path, &key1, &mgr_core).unwrap(); @@ -3127,7 +3127,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(src_db_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let key = SnapKey::new(1, 1, 1); let region = gen_test_region(1, 1, 1); @@ -3209,7 +3209,7 @@ pub mod tests { .max_total_size(max_total_size) .build::<_>(snapfiles_path.path().to_str().unwrap()); snap_mgr.init().unwrap(); - let snapshot = engine.kv.snapshot(); + let snapshot = engine.kv.snapshot(None); // Add an oldest snapshot for receiving. let recv_key = SnapKey::new(100, 100, 100); @@ -3334,7 +3334,7 @@ pub mod tests { .tempdir() .unwrap(); let db: KvTestEngine = open_test_db(kv_dir.path(), None, None).unwrap(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let key = SnapKey::new(1, 1, 1); let region = gen_test_region(1, 1, 1); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index c897aaa2597..48919474135 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -378,7 +378,7 @@ mod tests { .unwrap(); let db1: KvTestEngine = open_test_empty_db(dir1.path(), db_opt, None).unwrap(); - let snap = db.snapshot(); + let snap = db.snapshot(None); for cf in SNAPSHOT_CFS { let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); let mut cf_file = CfFile { @@ -462,7 +462,7 @@ mod tests { let stats = build_sst_cf_file_list::( &mut cf_file, &db, - &db.snapshot(), + &db.snapshot(None), &keys::data_key(b"a"), &keys::data_key(b"z"), *max_file_size, diff --git a/components/raftstore/src/store/worker/consistency_check.rs b/components/raftstore/src/store/worker/consistency_check.rs index fef2bae332c..d034cd8604f 100644 --- a/components/raftstore/src/store/worker/consistency_check.rs +++ b/components/raftstore/src/store/worker/consistency_check.rs @@ -162,7 +162,7 @@ mod tests { index: 10, context: vec![ConsistencyCheckMethod::Raw as u8], region: region.clone(), - snap: db.snapshot(), + snap: db.snapshot(None), }); let mut checksum_bytes = vec![]; checksum_bytes.write_u32::(sum).unwrap(); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 2694481494f..dbec805fe97 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -242,7 +242,7 @@ where } self.snap_cache.cached_read_id = self.read_id.clone(); - self.snap_cache.snapshot = Some(Arc::new(engine.snapshot())); + self.snap_cache.snapshot = Some(Arc::new(engine.snapshot(None))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); @@ -250,7 +250,7 @@ where } else { // read_id being None means the snapshot acquired will only be used in this // request - self.snapshot = Some(Arc::new(engine.snapshot())); + self.snapshot = Some(Arc::new(engine.snapshot(None))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index 068904b2a67..dd2c8f90de1 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -1091,7 +1091,7 @@ pub(crate) mod tests { ranges.push(key); } engine.kv.put(b"k1", b"v1").unwrap(); - let snap = engine.kv.snapshot(); + let snap = engine.kv.snapshot(None); engine.kv.put(b"k2", b"v2").unwrap(); sched @@ -1204,7 +1204,7 @@ pub(crate) mod tests { sched .schedule(Task::Gen { region_id: id, - kv_snap: engine.kv.snapshot(), + kv_snap: engine.kv.snapshot(None), last_applied_term: entry.get_term(), last_applied_state: apply_state, canceled: Arc::new(AtomicBool::new(false)), diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index 89ae317aa94..c529698fa14 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -11,3 +11,5 @@ testexport = [] engine_traits = { workspace = true } collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } +bytes = "1.0" +tikv_util = { workspace = true } \ No newline at end of file diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index ad16e7e8407..93e4c1a6d74 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -1,5 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +use core::slice::SlicePattern; use std::{ collections::BTreeMap, fmt::{self, Debug}, @@ -7,12 +8,24 @@ use std::{ sync::{Arc, Mutex}, }; +use bytes::Bytes; use collections::HashMap; use engine_traits::{ - CfNamesExt, DbVector, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, + CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, + CF_DEFAULT, CF_LOCK, CF_WRITE, }; use skiplist_rs::{ByteWiseComparator, IterRef, Skiplist}; +use tikv_util::config::ReadableSize; + +fn cf_to_id(cf: &str) -> usize { + match cf { + CF_DEFAULT => 0, + CF_LOCK => 1, + CF_WRITE => 2, + _ => panic!("unrecognized cf {}", cf), + } +} /// RegionMemoryEngine stores data for a specific cached region /// @@ -23,19 +36,69 @@ pub struct RegionMemoryEngine { data: [Arc>; 3], } +impl RegionMemoryEngine { + pub fn with_capacity(arena_size: usize) -> Self { + RegionMemoryEngine { + data: [ + Arc::new(Skiplist::with_capacity( + ByteWiseComparator::default(), + arena_size, + true, + )), + Arc::new(Skiplist::with_capacity( + ByteWiseComparator::default(), + arena_size, + true, + )), + Arc::new(Skiplist::with_capacity( + ByteWiseComparator::default(), + arena_size, + true, + )), + ], + } + } +} + +impl Default for RegionMemoryEngine { + fn default() -> Self { + RegionMemoryEngine::with_capacity(ReadableSize::mb(1).0 as usize) + } +} + impl Debug for RegionMemoryEngine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - unimplemented!() + write!(f, "Region Memory Engine") } } -type SnapshotList = BTreeMap; +// read_ts -> ref_count +#[derive(Default)] +struct SnapshotList(BTreeMap); + +impl SnapshotList { + fn new_snapshot(&mut self, read_ts: u64) { + // snapshot with this ts may be granted before + let count = self.0.get(&read_ts).unwrap_or(&0) + 1; + self.0.insert(read_ts, count); + } + + fn remove_snapshot(&mut self, read_ts: u64) { + let count = self.0.get_mut(&read_ts).unwrap(); + assert!(*count >= 1); + if *count == 1 { + self.0.remove(&read_ts).unwrap(); + } else { + *count -= 1; + } + } +} #[derive(Default)] pub struct RegionMemoryMeta { // It records the snapshots that have been granted previsously with specific snapshot_ts. We // should guarantee that the data visible to any one of the snapshot in it will not be removed. - snapshots: SnapshotList, + snapshot_list: SnapshotList, // It indicates whether the region is readable. False means integrity of the data in this // cached region is not satisfied due to being evicted for instance. can_read: bool, @@ -47,7 +110,7 @@ pub struct RegionMemoryMeta { #[derive(Default)] pub struct RegionCacheMemoryEngineCore { engine: HashMap, - region_metats: HashMap, + region_metas: HashMap, } /// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in @@ -73,22 +136,29 @@ pub struct RegionCacheMemoryEngine { } impl Debug for RegionCacheMemoryEngine { - fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { - unimplemented!() + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Region Cache Memory Engine") } } impl RegionCacheMemoryEngine { - pub fn new() -> Self { - RegionCacheMemoryEngine::default() + pub fn new_region(&self, region_id: u64) { + let mut core = self.core.lock().unwrap(); + + assert!(core.engine.get(®ion_id).is_none()); + assert!(core.region_metas.get(®ion_id).is_none()); + core.engine.insert(region_id, RegionMemoryEngine::default()); + core.region_metas + .insert(region_id, RegionMemoryMeta::default()); } } impl RegionCacheEngine for RegionCacheMemoryEngine { type Snapshot = RegionCacheSnapshot; + // todo(SpadeA): add sequence number logic fn snapshot(&self, region_id: u64, read_ts: u64) -> Option { - unimplemented!() + RegionCacheSnapshot::new(self.clone(), region_id, read_ts) } } @@ -110,12 +180,14 @@ impl WriteBatchExt for RegionCacheMemoryEngine { } pub struct RegionCacheIterator { + cf: String, valid: bool, prefix_same_as_start: bool, prefix: Option>, iter: IterRef, ByteWiseComparator>, - lower_bound: Option>, - upper_bound: Option>, + // The lower bound is inclusive while the upper bound is exclusive if set + lower_bound: Vec, + upper_bound: Vec, } impl Iterable for RegionCacheMemoryEngine { @@ -132,39 +204,84 @@ impl Iterable for RegionCacheMemoryEngine { impl Iterator for RegionCacheIterator { fn key(&self) -> &[u8] { - unimplemented!() + assert!(self.valid); + self.iter.key().as_slice() } fn value(&self) -> &[u8] { - unimplemented!() + assert!(self.valid); + self.iter.value().as_slice() } fn next(&mut self) -> Result { - unimplemented!() + assert!(self.valid); + self.iter.next(); + self.valid = self.iter.valid() && self.iter.key().as_slice() < self.upper_bound.as_slice(); + + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + Ok(self.valid) } fn prev(&mut self) -> Result { - unimplemented!() + assert!(self.valid); + self.iter.prev(); + self.valid = self.iter.valid() && self.iter.key().as_slice() >= self.lower_bound.as_slice(); + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + Ok(self.valid) } fn seek(&mut self, key: &[u8]) -> Result { - unimplemented!() + let seek_key = if key < self.lower_bound.as_slice() { + self.lower_bound.as_slice() + } else { + key + }; + self.iter.seek(seek_key); + self.valid = self.iter.valid() && self.iter.key().as_slice() < self.upper_bound.as_slice(); + + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + + Ok(self.valid) } fn seek_for_prev(&mut self, key: &[u8]) -> Result { - unimplemented!() + let end = if key > self.upper_bound.as_slice() { + self.upper_bound.as_slice() + } else { + key + }; + self.iter.seek_for_prev(end); + self.valid = self.iter.valid() && self.iter.key().as_slice() >= self.lower_bound.as_slice(); + + if self.valid && self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + + Ok(self.valid) } fn seek_to_first(&mut self) -> Result { - unimplemented!() + let lower_bound = self.lower_bound.clone(); + self.seek(lower_bound.as_slice()) } fn seek_to_last(&mut self) -> Result { - unimplemented!() + let upper_bound = self.upper_bound.clone(); + self.seek_for_prev(upper_bound.as_slice()) } fn valid(&self) -> Result { - unimplemented!() + Ok(self.valid) } } @@ -240,7 +357,40 @@ impl Mutable for RegionCacheWriteBatch { pub struct RegionCacheSnapshot { region_id: u64, snapshot_ts: u64, - engine: RegionMemoryEngine, + region_memory_engine: RegionMemoryEngine, + engine: RegionCacheMemoryEngine, +} + +impl RegionCacheSnapshot { + pub fn new(engine: RegionCacheMemoryEngine, region_id: u64, read_ts: u64) -> Option { + let mut core = engine.core.lock().unwrap(); + let region_meta = core.region_metas.get_mut(®ion_id)?; + if !region_meta.can_read { + return None; + } + + if read_ts <= region_meta.safe_ts { + // todo(SpadeA): add metrics for it + return None; + } + + region_meta.snapshot_list.new_snapshot(read_ts); + + Some(RegionCacheSnapshot { + region_id, + snapshot_ts: read_ts, + region_memory_engine: core.engine.get(®ion_id).unwrap().clone(), + engine: engine.clone(), + }) + } +} + +impl Drop for RegionCacheSnapshot { + fn drop(&mut self) { + let mut core = self.engine.core.lock().unwrap(); + let meta = core.region_metas.get_mut(&self.region_id).unwrap(); + meta.snapshot_list.remove_snapshot(self.snapshot_ts); + } } impl Snapshot for RegionCacheSnapshot {} @@ -248,12 +398,23 @@ impl Snapshot for RegionCacheSnapshot {} impl Iterable for RegionCacheSnapshot { type Iterator = RegionCacheIterator; - fn iterator(&self, cf: &str) -> Result { - unimplemented!() - } - fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { - unimplemented!() + let iter = self.region_memory_engine.data[cf_to_id(cf)].iter(); + let prefix_same_as_start = opts.prefix_same_as_start(); + let (lower_bound, upper_bound) = opts.build_bounds(); + // only support with lower/upper bound set + if lower_bound.is_none() || upper_bound.is_none() { + return Err(Error::BoundaryNotSet); + } + Ok(RegionCacheIterator { + cf: String::from(cf), + valid: false, + prefix_same_as_start, + prefix: None, + lower_bound: lower_bound.unwrap(), + upper_bound: upper_bound.unwrap(), + iter, + }) } } @@ -261,16 +422,19 @@ impl Peekable for RegionCacheSnapshot { type DbVector = RegionCacheDbVector; fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { - unimplemented!() + self.get_value_cf_opt(opts, CF_DEFAULT, key) } fn get_value_cf_opt( &self, - opts: &ReadOptions, + _: &ReadOptions, cf: &str, key: &[u8], ) -> Result> { - unimplemented!() + Ok(self.region_memory_engine.data[cf_to_id(cf)] + .get(key) + .cloned() + .map(|v| RegionCacheDbVector(v))) } } @@ -286,15 +450,14 @@ impl SnapshotMiscExt for RegionCacheSnapshot { } } -// todo: fill fields needed #[derive(Debug)] -pub struct RegionCacheDbVector; +pub struct RegionCacheDbVector(Bytes); impl Deref for RegionCacheDbVector { type Target = [u8]; fn deref(&self) -> &[u8] { - unimplemented!() + self.0.as_slice() } } @@ -302,6 +465,296 @@ impl DbVector for RegionCacheDbVector {} impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { fn eq(&self, rhs: &&[u8]) -> bool { - unimplemented!() + self.0.as_slice() == *rhs + } +} + +#[cfg(test)] +mod tests { + use core::ops::Range; + use std::{iter::StepBy, sync::Arc}; + + use bytes::Bytes; + use engine_traits::{ + IterOptions, Iterable, Iterator, Peekable, ReadOptions, RegionCacheEngine, + }; + use skiplist_rs::{ByteWiseComparator, Skiplist}; + + use super::{cf_to_id, RegionCacheIterator}; + use crate::RegionCacheMemoryEngine; + + #[test] + fn test_snapshot() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + + let verify_snapshot_count = |snapshot_ts, count| { + let core = engine.core.lock().unwrap(); + if count > 0 { + assert_eq!( + *core + .region_metas + .get(&1) + .unwrap() + .snapshot_list + .0 + .get(&snapshot_ts) + .unwrap(), + count + ); + } else { + assert!( + core.region_metas + .get(&1) + .unwrap() + .snapshot_list + .0 + .get(&snapshot_ts) + .is_none() + ) + } + }; + + assert!(engine.snapshot(1, 5).is_none()); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + } + let s1 = engine.snapshot(1, 5).unwrap(); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + } + assert!(engine.snapshot(1, 5).is_none()); + let s2 = engine.snapshot(1, 10).unwrap(); + + verify_snapshot_count(5, 1); + verify_snapshot_count(10, 1); + let s3 = engine.snapshot(1, 10).unwrap(); + verify_snapshot_count(10, 2); + + drop(s1); + verify_snapshot_count(5, 0); + drop(s2); + verify_snapshot_count(10, 1); + let s4 = engine.snapshot(1, 10).unwrap(); + verify_snapshot_count(10, 2); + drop(s4); + verify_snapshot_count(10, 1); + drop(s3); + verify_snapshot_count(10, 0); + } + + fn construct_key(i: i32) -> String { + format!("key-{:08}", i) + } + + fn construct_value(i: i32) -> String { + format!("value-{:08}", i) + } + + fn fill_data_in_skiplist(sl: Arc>, range: StepBy>) { + for i in range { + let key = construct_key(i); + let val = construct_value(i); + sl.put(Bytes::from(key), Bytes::from(val)); + } + } + + fn verify_key_value(k: &[u8], v: &[u8], i: i32) { + let key = construct_key(i); + let val = construct_value(i); + assert_eq!(k, key.as_bytes()); + assert_eq!(v, val.as_bytes()); + } + + fn verify_key_values( + iter: &mut RegionCacheIterator, + step: i32, + mut start_idx: i32, + end_idx: i32, + ) { + let forward = step > 0; + while iter.valid().unwrap() { + let k = iter.key(); + let val = iter.value(); + verify_key_value(k, val, start_idx); + if forward { + iter.next().unwrap(); + } else { + iter.prev().unwrap(); + } + start_idx += step; + } + + if forward { + assert!(start_idx - step < end_idx); + } else { + assert!(start_idx - step > end_idx); + } + } + + #[test] + fn test_get_value() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + fill_data_in_skiplist(sl, (1..100).step_by(1)); + } + + let snapshot = engine.snapshot(1, 10).unwrap(); + let opts = ReadOptions::default(); + for i in 1..100 { + let k = construct_key(i); + let v = snapshot + .get_value_cf_opt(&opts, "write", k.as_bytes()) + .unwrap() + .unwrap(); + verify_key_value(k.as_bytes(), &v, i); + } + + let k = construct_key(100); + assert!( + snapshot + .get_value_cf_opt(&opts, "write", k.as_bytes()) + .unwrap() + .is_none() + ); + } + + #[test] + fn test_iterator_forawrd() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let step: i32 = 2; + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + fill_data_in_skiplist(sl, (1..100).step_by(step as usize)); + } + + let mut iter_opt = IterOptions::default(); + let snapshot = engine.snapshot(1, 10).unwrap(); + // boundaries are not set + assert!(snapshot.iterator_opt("lock", iter_opt.clone()).is_err()); + + let lower_bound = construct_key(1); + let upper_bound = construct_key(100); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + + let mut iter = snapshot.iterator_opt("lock", iter_opt.clone()).unwrap(); + assert!(!iter.seek_to_first().unwrap()); + + let mut iter = snapshot.iterator_opt("default", iter_opt.clone()).unwrap(); + assert!(!iter.seek_to_first().unwrap()); + + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + verify_key_values(&mut iter, step, 1, i32::MAX); + + // seek key that is in the skiplist + let seek_key = construct_key(11); + iter.seek(seek_key.as_bytes()).unwrap(); + verify_key_values(&mut iter, step, 11, i32::MAX); + + // seek key that is not in the skiplist + let seek_key = construct_key(12); + iter.seek(seek_key.as_bytes()).unwrap(); + verify_key_values(&mut iter, step, 13, i32::MAX); + + // with bounds + let lower_bound = construct_key(20); + let upper_bound = construct_key(40); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); + + assert!(iter.seek_to_first().unwrap()); + verify_key_values(&mut iter, step, 21, 40); + + // seek a key that is below the lower bound is the same with seek_to_first + let seek_key = construct_key(11); + assert!(iter.seek(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 21, 40); + + // seek a key that is larger or equal to upper bound won't get any key + let seek_key = construct_key(40); + assert!(!iter.seek(seek_key.as_bytes()).unwrap()); + assert!(!iter.valid().unwrap()); + + let seek_key = construct_key(22); + assert!(iter.seek(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 23, 40); + } + + #[test] + fn test_iterator_backward() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let mut step: i32 = 2; + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + fill_data_in_skiplist(sl, (1..100).step_by(step as usize)); + } + step = -step; + + let mut iter_opt = IterOptions::default(); + let lower_bound = construct_key(1); + let upper_bound = construct_key(100); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + + let snapshot = engine.snapshot(1, 10).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(iter.seek_to_last().unwrap()); + verify_key_values(&mut iter, step, 99, i32::MIN); + + // seek key that is in the skiplist + let seek_key = construct_key(81); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 81, i32::MIN); + + // seek key that is in the skiplist + let seek_key = construct_key(80); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 79, i32::MIN); + + let lower_bound = construct_key(20); + let upper_bound = construct_key(40); + iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); + iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); + + assert!(iter.seek_to_last().unwrap()); + verify_key_values(&mut iter, step, 39, 20); + + // seek a key that is above the upper bound is the same with seek_to_last + let seek_key = construct_key(45); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 39, 20); + + // seek a key that is less than the lower bound won't get any key + let seek_key = construct_key(19); + assert!(!iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + assert!(!iter.valid().unwrap()); + + let seek_key = construct_key(38); + assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); + verify_key_values(&mut iter, step, 37, 20); } } diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs index d512847efb4..fe15f4f936b 100644 --- a/components/region_cache_memory_engine/src/lib.rs +++ b/components/region_cache_memory_engine/src/lib.rs @@ -2,6 +2,8 @@ #![allow(dead_code)] #![allow(unused_variables)] +#![feature(let_chains)] +#![feature(slice_pattern)] mod engine; pub use engine::RegionCacheMemoryEngine; diff --git a/components/tikv_kv/src/rocksdb_engine.rs b/components/tikv_kv/src/rocksdb_engine.rs index 21099974d2d..551b933faeb 100644 --- a/components/tikv_kv/src/rocksdb_engine.rs +++ b/components/tikv_kv/src/rocksdb_engine.rs @@ -64,7 +64,7 @@ impl Runnable for Runner { match t { Task::Write(modifies, cb) => cb(write_modifies(&self.0.kv, modifies)), Task::Snapshot(sender) => { - let _ = sender.send(Arc::new(self.0.kv.snapshot())); + let _ = sender.send(Arc::new(self.0.kv.snapshot(None))); } Task::Pause(dur) => std::thread::sleep(dur), } diff --git a/src/server/node.rs b/src/server/node.rs index fb2f28d9c1a..bf19cb6c005 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -291,7 +291,7 @@ where }; if should_check { // Check if there are only TiDB data in the engine - let snapshot = engines.kv.snapshot(); + let snapshot = engines.kv.snapshot(None); for cf in DATA_CFS { for (start, end) in TIDB_RANGES_COMPLEMENT { let mut unexpected_data_key = None; diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index d567edd5add..eab0f38d749 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -53,7 +53,7 @@ impl SyncBenchRouter { cmd_resp::bind_term(&mut response, 1); match cmd.callback { Callback::Read { cb, .. } => { - let snapshot = self.db.snapshot(); + let snapshot = self.db.snapshot(None); let region = Arc::new(self.region.to_owned()); cb(ReadResponse { response, @@ -142,7 +142,7 @@ fn new_engine() -> (TempDir, RocksEngine) { #[bench] fn bench_async_snapshots_noop(b: &mut test::Bencher) { let (_dir, db) = new_engine(); - let snapshot = db.snapshot(); + let snapshot = db.snapshot(None); let resp = ReadResponse { response: RaftCmdResponse::default(), snapshot: Some(RegionSnapshot::from_snapshot( diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index 0b71978f63b..edef780ce31 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -619,7 +619,7 @@ fn test_gen_during_heavy_recv() { let snap = do_snapshot( snap_mgr.clone(), &engine, - engine.snapshot(), + engine.snapshot(None), r2, snap_term, snap_apply_state, diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 752c6aaee1a..fc84d56fd00 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -371,7 +371,7 @@ fn test_delete_files_in_range_for_titan() { build_sst_cf_file_list::( &mut cf_file, &engines.kv, - &engines.kv.snapshot(), + &engines.kv.snapshot(None), b"", b"{", u64::MAX, @@ -388,7 +388,7 @@ fn test_delete_files_in_range_for_titan() { build_sst_cf_file_list::( &mut cf_file_write, &engines.kv, - &engines.kv.snapshot(), + &engines.kv.snapshot(None), b"", b"{", u64::MAX, From 4626f8d774b720a10b094cc39e0a78b3cd27dd2b Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:30:22 +0800 Subject: [PATCH 1064/1149] In-memory Engine: integrate HybridEngine with integration test framework (#16186) ref tikv/tikv#16141 integrate HybridEngine with integration test framework Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 + components/backup-stream/tests/suite.rs | 3 +- components/cdc/tests/mod.rs | 11 +- components/resolved_ts/tests/mod.rs | 8 +- components/test_backup/Cargo.toml | 1 + components/test_backup/src/lib.rs | 3 +- components/test_raftstore/Cargo.toml | 2 + components/test_raftstore/src/cluster.rs | 64 ++++--- components/test_raftstore/src/lib.rs | 1 + components/test_raftstore/src/node.rs | 79 ++++---- components/test_raftstore/src/server.rs | 181 +++++++++++------- .../test_raftstore/src/transport_simulate.rs | 45 +++-- components/test_raftstore/src/util.rs | 139 +++++++++----- components/test_storage/Cargo.toml | 1 + components/test_storage/src/assert_storage.rs | 23 +-- components/test_storage/src/util.rs | 11 +- tests/benches/raftstore/mod.rs | 25 +-- tests/failpoints/cases/test_bootstrap.rs | 5 +- .../cases/test_cmd_epoch_checker.rs | 9 +- tests/failpoints/cases/test_conf_change.rs | 4 +- tests/failpoints/cases/test_merge.rs | 3 +- tests/failpoints/cases/test_rawkv.rs | 3 +- tests/failpoints/cases/test_replica_read.rs | 4 +- .../cases/test_replica_stale_read.rs | 15 +- tests/failpoints/cases/test_sst_recovery.rs | 7 +- tests/failpoints/cases/test_stale_read.rs | 7 +- tests/failpoints/cases/test_witness.rs | 5 +- tests/integrations/coprocessor/test_select.rs | 76 ++++---- tests/integrations/import/util.rs | 19 +- .../integrations/raftstore/test_bootstrap.rs | 6 +- .../raftstore/test_clear_stale_data.rs | 2 +- .../raftstore/test_compact_after_delete.rs | 4 +- .../raftstore/test_compact_lock_cf.rs | 12 +- .../raftstore/test_compact_log.rs | 13 +- .../raftstore/test_early_apply.rs | 13 +- .../integrations/raftstore/test_flashback.rs | 12 +- .../integrations/raftstore/test_hibernate.rs | 6 +- .../raftstore/test_joint_consensus.rs | 13 +- .../integrations/raftstore/test_lease_read.rs | 8 +- tests/integrations/raftstore/test_multi.rs | 46 +++-- tests/integrations/raftstore/test_prevote.rs | 20 +- .../raftstore/test_region_change_observer.rs | 3 +- .../raftstore/test_region_heartbeat.rs | 3 +- .../raftstore/test_region_info_accessor.rs | 6 +- .../raftstore/test_replication_mode.rs | 27 +-- tests/integrations/raftstore/test_snap.rs | 6 +- .../raftstore/test_split_region.rs | 6 +- .../integrations/raftstore/test_stale_peer.rs | 10 +- tests/integrations/raftstore/test_stats.rs | 24 ++- .../integrations/raftstore/test_tombstone.rs | 7 +- .../integrations/raftstore/test_transport.rs | 3 +- .../raftstore/test_update_region_size.rs | 7 +- tests/integrations/raftstore/test_witness.rs | 5 +- .../resource_metering/test_read_keys.rs | 10 +- tests/integrations/server/kv_service.rs | 4 +- tests/integrations/server/lock_manager.rs | 35 +++- tests/integrations/server_encryption.rs | 3 +- .../integrations/storage/test_raft_storage.rs | 7 +- tests/integrations/storage/test_raftkv.rs | 3 +- .../storage/test_region_info_accessor.rs | 5 +- 60 files changed, 681 insertions(+), 406 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 335c4a5301f..eb1fe04b424 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5685,6 +5685,7 @@ dependencies = [ "collections", "concurrency_manager", "crc64fast", + "engine_rocks", "engine_traits", "external_storage", "file_system", @@ -5784,6 +5785,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "grpcio-health", + "hybrid_engine", "keys", "kvproto", "lazy_static", @@ -5793,6 +5795,7 @@ dependencies = [ "raft", "raftstore", "rand 0.8.5", + "region_cache_memory_engine", "resolved_ts", "resource_control", "resource_metering", @@ -5887,6 +5890,7 @@ version = "0.0.1" dependencies = [ "api_version", "collections", + "engine_rocks", "futures 0.3.15", "kvproto", "pd_client", diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 2886bb4f5d7..434d81fff48 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -21,6 +21,7 @@ use backup_stream::{ utils, BackupStreamResolver, Endpoint, GetCheckpointResult, RegionCheckpointOperation, RegionSet, Service, Task, }; +use engine_rocks::RocksEngine; use futures::{executor::block_on, AsyncWriteExt, Future, Stream, StreamExt}; use grpcio::{ChannelBuilder, Server, ServerBuilder}; use kvproto::{ @@ -249,7 +250,7 @@ impl MetaStore for ErrorStore { pub struct Suite { pub endpoints: HashMap>, pub meta_store: ErrorStore, - pub cluster: Cluster, + pub cluster: Cluster>, tikv_cli: HashMap, log_backup_cli: HashMap, obs: HashMap, diff --git a/components/cdc/tests/mod.rs b/components/cdc/tests/mod.rs index afd209af2d3..b85c1db4493 100644 --- a/components/cdc/tests/mod.rs +++ b/components/cdc/tests/mod.rs @@ -130,7 +130,7 @@ fn create_event_feed( } pub struct TestSuiteBuilder { - cluster: Option>, + cluster: Option>>, memory_quota: Option, } @@ -143,7 +143,10 @@ impl TestSuiteBuilder { } #[must_use] - pub fn cluster(mut self, cluster: Cluster) -> TestSuiteBuilder { + pub fn cluster( + mut self, + cluster: Cluster>, + ) -> TestSuiteBuilder { self.cluster = Some(cluster); self } @@ -160,7 +163,7 @@ impl TestSuiteBuilder { pub fn build_with_cluster_runner(self, mut runner: F) -> TestSuite where - F: FnMut(&mut Cluster), + F: FnMut(&mut Cluster>), { init(); let memory_quota = self.memory_quota.unwrap_or(usize::MAX); @@ -249,7 +252,7 @@ impl TestSuiteBuilder { } pub struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, pub endpoints: HashMap>, pub obs: HashMap, tikv_cli: HashMap, diff --git a/components/resolved_ts/tests/mod.rs b/components/resolved_ts/tests/mod.rs index 830e2156e9f..fc3d5720929 100644 --- a/components/resolved_ts/tests/mod.rs +++ b/components/resolved_ts/tests/mod.rs @@ -4,6 +4,7 @@ use std::{sync::*, time::Duration}; use collections::HashMap; use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; use futures::{executor::block_on, stream, SinkExt}; use grpcio::{ChannelBuilder, ClientUnaryReceiver, Environment, Result, WriteFlags}; use kvproto::{ @@ -26,7 +27,7 @@ pub fn init() { } pub struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, pub endpoints: HashMap>, pub obs: HashMap, tikv_cli: HashMap, @@ -44,7 +45,10 @@ impl TestSuite { Self::with_cluster(count, cluster) } - pub fn with_cluster(count: usize, mut cluster: Cluster) -> Self { + pub fn with_cluster( + count: usize, + mut cluster: Cluster>, + ) -> Self { init(); let pd_cli = cluster.pd_client.clone(); let mut endpoints = HashMap::default(); diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index c13b3008df9..1dbe232fd9e 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -10,6 +10,7 @@ backup = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } crc64fast = "0.1" +engine_rocks = { workspace = true } engine_traits = { workspace = true } external_storage ={ workspace = true } file_system = { workspace = true } diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 6c6eae961d7..4331f072750 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -11,6 +11,7 @@ use std::{ use api_version::{dispatch_api_version, keyspace::KvPair, ApiV1, KvFormat, RawValue}; use backup::Task; use collections::HashMap; +use engine_rocks::RocksEngine; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; use external_storage::make_local_backend; use futures::{channel::mpsc as future_mpsc, executor::block_on}; @@ -39,7 +40,7 @@ use tikv_util::{ use txn_types::TimeStamp; pub struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, pub endpoints: HashMap>, pub tikv_cli: TikvClient, pub context: Context, diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index d48acc4e92b..33430ba3fa8 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -39,6 +39,7 @@ file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } +hybrid_engine = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" @@ -48,6 +49,7 @@ protobuf = { version = "2.8", features = ["bytes"] } raft = { workspace = true } raftstore = { workspace = true, features = ["testexport"] } rand = "0.8" +region_cache_memory_engine = { workspace = true } resolved_ts = { workspace = true } resource_control = { workspace = true } resource_metering = { workspace = true } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index a08f858c031..1e0c57c3706 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -12,14 +12,15 @@ use std::{ time::Duration, }; +use ::server::common::KvEngineBuilder; use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{RocksCompactedEvent, RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - CompactExt, Engines, Iterable, MiscExt, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, - WriteBatch, WriteBatchExt, CF_DEFAULT, CF_RAFT, + Engines, Iterable, KvEngine, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, WriteBatch, + CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture, StreamExt}; @@ -64,12 +65,15 @@ use txn_types::WriteBatchFlags; use super::*; use crate::Config; +pub trait KvEngineWithRocks = + KvEngine + KvEngineBuilder; + // We simulate 3 or 5 nodes, each has a store. // Sometimes, we use fixed id to test, which means the id // isn't allocated by pd, and node id, store id are same. // E,g, for node 1, the node id and store id are both 1. -pub trait Simulator { +pub trait Simulator { // Pass 0 to let pd allocate a node id if db is empty. // If node id > 0, the node must be created in db already, // and the node id must be the same as given argument. @@ -79,11 +83,11 @@ pub trait Simulator { &mut self, node_id: u64, cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, resource_manager: &Option>, ) -> ServerResult; fn stop_node(&mut self, node_id: u64); @@ -92,7 +96,7 @@ pub trait Simulator { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ) -> Result<()> { self.async_command_on_node_with_opts(node_id, request, cb, Default::default()) } @@ -100,13 +104,13 @@ pub trait Simulator { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, opts: RaftCmdExtraOpts, ) -> Result<()>; fn send_raft_msg(&mut self, msg: RaftMessage) -> Result<()>; fn get_snap_dir(&self, node_id: u64) -> String; fn get_snap_mgr(&self, node_id: u64) -> &SnapManager; - fn get_router(&self, node_id: u64) -> Option>; + fn get_router(&self, node_id: u64) -> Option>; fn add_send_filter(&mut self, node_id: u64, filter: Box); fn clear_send_filters(&mut self, node_id: u64); fn add_recv_filter(&mut self, node_id: u64, filter: Box); @@ -124,7 +128,7 @@ pub trait Simulator { timeout: Duration, ) -> Result { let node_id = request.get_header().get_peer().get_store_id(); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); self.async_read(node_id, batch_id, request, cb); rx.recv_timeout(timeout) .map_err(|_| Error::Timeout(format!("request timeout for {:?}", timeout))) @@ -135,7 +139,7 @@ pub trait Simulator { node_id: u64, batch_id: Option, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ); fn call_command_on_node( @@ -144,7 +148,7 @@ pub trait Simulator { request: RaftCmdRequest, timeout: Duration, ) -> Result { - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); match self.async_command_on_node(node_id, request, cb) { Ok(()) => {} @@ -159,17 +163,17 @@ pub trait Simulator { } } -pub struct Cluster { +pub struct Cluster> { pub cfg: Config, leaders: HashMap, pub count: usize, pub paths: Vec, - pub dbs: Vec>, + pub dbs: Vec>, pub store_metas: HashMap>>, key_managers: Vec>>, pub io_rate_limiter: Option>, - pub engines: HashMap>, + pub engines: HashMap>, key_managers_map: HashMap>>, pub labels: HashMap>, group_props: HashMap, @@ -182,7 +186,11 @@ pub struct Cluster { resource_manager: Option>, } -impl Cluster { +impl Cluster +where + EK: KvEngineWithRocks, + T: Simulator, +{ // Create the default Store cluster. pub fn new( id: u64, @@ -190,7 +198,7 @@ impl Cluster { sim: Arc>, pd_client: Arc, api_version: ApiVersion, - ) -> Cluster { + ) -> Cluster { // TODO: In the future, maybe it's better to test both case where // `use_delete_range` is true and false Cluster { @@ -250,7 +258,7 @@ impl Cluster { assert!(self.sst_workers_map.insert(node_id, offset).is_none()); } - fn create_engine(&mut self, router: Option>) { + fn create_engine(&mut self, router: Option>) { let (engines, key_manager, dir, sst_worker, kv_statistics, raft_statistics) = create_test_engine(router, self.io_rate_limiter.clone(), &self.cfg); self.dbs.push(engines); @@ -405,7 +413,7 @@ impl Cluster { tikv_util::thread_group::set_properties(previous_prop); } - pub fn get_engine(&self, node_id: u64) -> RocksEngine { + pub fn get_engine(&self, node_id: u64) -> EK { self.engines[&node_id].kv.clone() } @@ -413,7 +421,7 @@ impl Cluster { self.engines[&node_id].raft.clone() } - pub fn get_all_engines(&self, node_id: u64) -> Engines { + pub fn get_all_engines(&self, node_id: u64) -> Engines { self.engines[&node_id].clone() } @@ -770,7 +778,7 @@ impl Cluster { self.leaders.remove(®ion_id); } - pub fn assert_quorum bool>(&self, mut condition: F) { + pub fn assert_quorum bool>(&self, mut condition: F) { if self.engines.is_empty() { return; } @@ -983,7 +991,7 @@ impl Cluster { let region_id = req.get_header().get_region_id(); let leader = self.leader_of_region(region_id).unwrap(); req.mut_header().set_peer(leader.clone()); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb::(&req); self.sim .rl() .async_command_on_node_with_opts(leader.get_store_id(), req, cb, opts)?; @@ -1325,7 +1333,7 @@ impl Cluster { } } - pub fn restore_kv_meta(&self, region_id: u64, store_id: u64, snap: &RocksSnapshot) { + pub fn restore_kv_meta(&self, region_id: u64, store_id: u64, snap: &EK::Snapshot) { let (meta_start, meta_end) = ( keys::region_meta_prefix(region_id), keys::region_meta_prefix(region_id + 1), @@ -1453,7 +1461,7 @@ impl Cluster { &mut self, region: &metapb::Region, split_key: &[u8], - cb: Callback, + cb: Callback, ) { let leader = self.leader_of_region(region.get_id()).unwrap(); let router = self.sim.rl().get_router(leader.get_store_id()).unwrap(); @@ -1693,7 +1701,7 @@ impl Cluster { ) } - pub fn merge_region(&mut self, source: u64, target: u64, cb: Callback) { + pub fn merge_region(&mut self, source: u64, target: u64, cb: Callback) { let mut req = self.new_prepare_merge(source, target); let leader = self.leader_of_region(source).unwrap(); req.mut_header().set_peer(leader.clone()); @@ -1864,7 +1872,7 @@ impl Cluster { ctx } - pub fn get_router(&self, node_id: u64) -> Option> { + pub fn get_router(&self, node_id: u64) -> Option> { self.sim.rl().get_router(node_id) } @@ -1966,7 +1974,7 @@ impl Cluster { } } -impl Drop for Cluster { +impl> Drop for Cluster { fn drop(&mut self) { test_util::clear_failpoints(); self.shutdown(); diff --git a/components/test_raftstore/src/lib.rs b/components/test_raftstore/src/lib.rs index 04dfbd24de1..be38155af6c 100644 --- a/components/test_raftstore/src/lib.rs +++ b/components/test_raftstore/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![feature(let_chains)] +#![feature(trait_alias)] #[macro_use] extern crate lazy_static; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index f429f27ff8b..7564da0e27e 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -8,9 +8,9 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, MiscExt, Peekable}; +use engine_traits::{Engines, KvEngine}; use kvproto::{ kvrpcpb::ApiVersion, metapb, @@ -49,18 +49,18 @@ use tikv_util::{ use super::*; use crate::Config; -pub struct ChannelTransportCore { +pub struct ChannelTransportCore { snap_paths: HashMap, - routers: HashMap>>, + routers: HashMap, EK>>, } #[derive(Clone)] -pub struct ChannelTransport { - core: Arc>, +pub struct ChannelTransport { + core: Arc>>, } -impl ChannelTransport { - pub fn new() -> ChannelTransport { +impl ChannelTransport { + pub fn new() -> ChannelTransport { ChannelTransport { core: Arc::new(Mutex::new(ChannelTransportCore { snap_paths: HashMap::default(), @@ -70,13 +70,13 @@ impl ChannelTransport { } } -impl Default for ChannelTransport { +impl Default for ChannelTransport { fn default() -> Self { Self::new() } } -impl Transport for ChannelTransport { +impl Transport for ChannelTransport { fn send(&mut self, msg: RaftMessage) -> Result<()> { let from_store = msg.get_from_peer().get_store_id(); let to_store = msg.get_to_peer().get_store_id(); @@ -149,22 +149,22 @@ impl Transport for ChannelTransport { fn flush(&mut self) {} } -type SimulateChannelTransport = SimulateTransport; +type SimulateChannelTransport = SimulateTransport, EK>; -pub struct NodeCluster { - trans: ChannelTransport, +pub struct NodeCluster { + trans: ChannelTransport, pd_client: Arc, - nodes: HashMap>, + nodes: HashMap>, snap_mgrs: HashMap, cfg_controller: HashMap, - simulate_trans: HashMap, + simulate_trans: HashMap>, concurrency_managers: HashMap, #[allow(clippy::type_complexity)] - post_create_coprocessor_host: Option)>>, + post_create_coprocessor_host: Option)>>, } -impl NodeCluster { - pub fn new(pd_client: Arc) -> NodeCluster { +impl NodeCluster { + pub fn new(pd_client: Arc) -> NodeCluster { NodeCluster { trans: ChannelTransport::new(), pd_client, @@ -178,12 +178,12 @@ impl NodeCluster { } } -impl NodeCluster { +impl NodeCluster { #[allow(dead_code)] pub fn get_node_router( &self, node_id: u64, - ) -> SimulateTransport> { + ) -> SimulateTransport, EK> { self.trans .core .lock() @@ -198,17 +198,14 @@ impl NodeCluster { // first argument of `op` is the node_id. // Set this before invoking `run_node`. #[allow(clippy::type_complexity)] - pub fn post_create_coprocessor_host( - &mut self, - op: Box)>, - ) { + pub fn post_create_coprocessor_host(&mut self, op: Box)>) { self.post_create_coprocessor_host = Some(op) } pub fn get_node( &mut self, node_id: u64, - ) -> Option<&mut Node> { + ) -> Option<&mut Node> { self.nodes.get_mut(&node_id) } @@ -221,16 +218,16 @@ impl NodeCluster { } } -impl Simulator for NodeCluster { +impl Simulator for NodeCluster { fn run_node( &mut self, node_id: u64, cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, _resource_manager: &Option>, ) -> ServerResult { assert!(node_id == 0 || !self.nodes.contains_key(&node_id)); @@ -434,7 +431,7 @@ impl Simulator for NodeCluster { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, opts: RaftCmdExtraOpts, ) -> Result<()> { if !self @@ -465,7 +462,7 @@ impl Simulator for NodeCluster { node_id: u64, batch_id: Option, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ) { if !self .trans @@ -514,14 +511,25 @@ impl Simulator for NodeCluster { trans.routers.get_mut(&node_id).unwrap().clear_filters(); } - fn get_router(&self, node_id: u64) -> Option> { + fn get_router(&self, node_id: u64) -> Option> { self.nodes.get(&node_id).map(|node| node.get_router()) } } // Compare to server cluster, node cluster does not have server layer and // storage layer. -pub fn new_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_node_cluster(id: u64, count: usize) -> Cluster> { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +// the hybrid engine with disk engine "RocksEngine" and region cache engine +// "RegionCacheMemoryEngine" is used in the node cluster. +pub fn new_node_cluster_with_hybrid_engine( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) @@ -529,7 +537,10 @@ pub fn new_node_cluster(id: u64, count: usize) -> Cluster { // This cluster does not support batch split, we expect it to transfer the // `BatchSplit` request to `split` request -pub fn new_incompatible_node_cluster(id: u64, count: usize) -> Cluster { +pub fn new_incompatible_node_cluster( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(NodeCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 20e651ea1dc..bbc4ee2cf49 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -13,9 +13,9 @@ use causal_ts::CausalTsProviderImpl; use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, MiscExt}; +use engine_traits::{Engines, KvEngine}; use futures::executor::block_on; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; @@ -86,12 +86,12 @@ use txn_types::TxnExtraScheduler; use super::*; use crate::Config; -type SimulateStoreTransport = SimulateTransport>; +type SimulateStoreTransport = SimulateTransport, EK>; -pub type SimulateEngine = RaftKv; -type SimulateRaftExtension = ::RaftExtension; -type SimulateServerTransport = - SimulateTransport>; +pub type SimulateEngine = RaftKv>; +type SimulateRaftExtension = as Engine>::RaftExtension; +type SimulateServerTransport = + SimulateTransport, PdStoreAddrResolver>, EK>; #[derive(Default, Clone)] pub struct AddressMap { @@ -127,29 +127,29 @@ impl StoreAddrResolver for AddressMap { } } -struct ServerMeta { - node: Node, - server: Server, - sim_router: SimulateStoreTransport, - sim_trans: SimulateServerTransport, - raw_router: RaftRouter, - raw_apply_router: ApplyRouter, - gc_worker: GcWorker>, +struct ServerMeta { + node: Node, + server: Server>, + sim_router: SimulateStoreTransport, + sim_trans: SimulateServerTransport, + raw_router: RaftRouter, + raw_apply_router: ApplyRouter, + gc_worker: GcWorker>>, rts_worker: Option>, rsmeter_cleanup: Box, } type PendingServices = Vec Service>>; -type CopHooks = Vec)>>; +type CopHooks = Vec)>>; -pub struct ServerCluster { - metas: HashMap, +pub struct ServerCluster { + metas: HashMap>, addrs: AddressMap, - pub storages: HashMap, + pub storages: HashMap>, pub region_info_accessors: HashMap, - pub importers: HashMap>>, + pub importers: HashMap>>, pub pending_services: HashMap, - pub coprocessor_hooks: HashMap, + pub coprocessor_hooks: HashMap>, pub health_services: HashMap, pub security_mgr: Arc, pub txn_extra_schedulers: HashMap>, @@ -163,8 +163,8 @@ pub struct ServerCluster { pub causal_ts_providers: HashMap>, } -impl ServerCluster { - pub fn new(pd_client: Arc) -> ServerCluster { +impl ServerCluster { + pub fn new(pd_client: Arc) -> ServerCluster { let env = Arc::new( EnvBuilder::new() .cq_count(2) @@ -211,19 +211,16 @@ impl ServerCluster { self.addrs.get(node_id).unwrap() } - pub fn get_apply_router(&self, node_id: u64) -> ApplyRouter { + pub fn get_apply_router(&self, node_id: u64) -> ApplyRouter { self.metas.get(&node_id).unwrap().raw_apply_router.clone() } - pub fn get_server_router(&self, node_id: u64) -> SimulateStoreTransport { + pub fn get_server_router(&self, node_id: u64) -> SimulateStoreTransport { self.metas.get(&node_id).unwrap().sim_router.clone() } /// To trigger GC manually. - pub fn get_gc_worker( - &self, - node_id: u64, - ) -> &GcWorker> { + pub fn get_gc_worker(&self, node_id: u64) -> &GcWorker>> { &self.metas.get(&node_id).unwrap().gc_worker } @@ -264,11 +261,11 @@ impl ServerCluster { &mut self, node_id: u64, mut cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, resource_manager: &Option>, ) -> ServerResult { let (tmp_str, tmp) = if node_id == 0 || !self.snap_paths.contains_key(&node_id) { @@ -496,7 +493,7 @@ impl ServerCluster { ); let debugger = DebuggerImpl::new( - engines.clone(), + Engines::new(engines.kv.get_disk_engine().clone(), engines.raft.clone()), ConfigController::new(cfg.tikv.clone()), Some(store.clone()), ); @@ -668,16 +665,16 @@ impl ServerCluster { } } -impl Simulator for ServerCluster { +impl Simulator for ServerCluster { fn run_node( &mut self, node_id: u64, cfg: Config, - engines: Engines, + engines: Engines, store_meta: Arc>, key_manager: Option>, - router: RaftRouter, - system: RaftBatchSystem, + router: RaftRouter, + system: RaftBatchSystem, resource_manager: &Option>, ) -> ServerResult { dispatch_api_version!( @@ -728,7 +725,7 @@ impl Simulator for ServerCluster { &self, node_id: u64, request: RaftCmdRequest, - cb: Callback, + cb: Callback, opts: RaftCmdExtraOpts, ) -> Result<()> { let router = match self.metas.get(&node_id) { @@ -743,7 +740,7 @@ impl Simulator for ServerCluster { node_id: u64, batch_id: Option, request: RaftCmdRequest, - cb: Callback, + cb: Callback, ) { match self.metas.get_mut(&node_id) { None => { @@ -800,13 +797,13 @@ impl Simulator for ServerCluster { .clear_filters(); } - fn get_router(&self, node_id: u64) -> Option> { + fn get_router(&self, node_id: u64) -> Option> { self.metas.get(&node_id).map(|m| m.raw_router.clone()) } } -impl Cluster { - pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { +impl Cluster> { + pub fn must_get_snapshot_of_region(&mut self, region_id: u64) -> RegionSnapshot { self.must_get_snapshot_of_region_with_ctx(region_id, Default::default()) } @@ -814,8 +811,8 @@ impl Cluster { &mut self, region_id: u64, snap_ctx: SnapContext<'_>, - ) -> RegionSnapshot { - let mut try_snapshot = || -> Option> { + ) -> RegionSnapshot { + let mut try_snapshot = || -> Option> { let leader = self.leader_of_region(region_id)?; let store_id = leader.store_id; let epoch = self.get_region_epoch(region_id); @@ -840,7 +837,7 @@ impl Cluster { panic!("failed to get snapshot of region {}", region_id); } - pub fn raft_extension(&self, node_id: u64) -> SimulateRaftExtension { + pub fn raft_extension(&self, node_id: u64) -> SimulateRaftExtension { self.sim.rl().storages[&node_id].raft_extension() } @@ -848,11 +845,7 @@ impl Cluster { self.sim.rl().get_addr(node_id) } - pub fn register_hook( - &self, - node_id: u64, - register: Box)>, - ) { + pub fn register_hook(&self, node_id: u64, register: Box)>) { self.sim .wl() .coprocessor_hooks @@ -862,7 +855,21 @@ impl Cluster { } } -pub fn new_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_server_cluster( + id: u64, + count: usize, +) -> Cluster> { + let pd_client = Arc::new(TestPdClient::new(id, false)); + let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); + Cluster::new(id, count, sim, pd_client, ApiVersion::V1) +} + +// the hybrid engine with disk engine "RocksEngine" and region cache engine +// "RegionCacheMemoryEngine" is used in the server cluster. +pub fn new_server_cluster_with_hybrid_engine( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) @@ -872,32 +879,49 @@ pub fn new_server_cluster_with_api_ver( id: u64, count: usize, api_ver: ApiVersion, -) -> Cluster { +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, false)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, api_ver) } -pub fn new_incompatible_server_cluster(id: u64, count: usize) -> Cluster { +pub fn new_incompatible_server_cluster( + id: u64, + count: usize, +) -> Cluster> { let pd_client = Arc::new(TestPdClient::new(id, true)); let sim = Arc::new(RwLock::new(ServerCluster::new(Arc::clone(&pd_client)))); Cluster::new(id, count, sim, pd_client, ApiVersion::V1) } -pub fn must_new_cluster_mul(count: usize) -> (Cluster, metapb::Peer, Context) { +pub fn must_new_cluster_mul( + count: usize, +) -> ( + Cluster>, + metapb::Peer, + Context, +) { must_new_and_configure_cluster_mul(count, |_| ()) } pub fn must_new_and_configure_cluster( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + metapb::Peer, + Context, +) { must_new_and_configure_cluster_mul(1, configure) } fn must_new_and_configure_cluster_mul( count: usize, - mut configure: impl FnMut(&mut Cluster), -) -> (Cluster, metapb::Peer, Context) { + mut configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + metapb::Peer, + Context, +) { let mut cluster = new_server_cluster(0, count); configure(&mut cluster); cluster.run(); @@ -912,13 +936,21 @@ fn must_new_and_configure_cluster_mul( (cluster, leader, ctx) } -pub fn must_new_cluster_and_kv_client() -> (Cluster, TikvClient, Context) { +pub fn must_new_cluster_and_kv_client() -> ( + Cluster>, + TikvClient, + Context, +) { must_new_cluster_and_kv_client_mul(1) } pub fn must_new_cluster_and_kv_client_mul( count: usize, -) -> (Cluster, TikvClient, Context) { +) -> ( + Cluster>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_cluster_mul(count); let env = Arc::new(Environment::new(1)); @@ -929,7 +961,11 @@ pub fn must_new_cluster_and_kv_client_mul( (cluster, client, ctx) } -pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClient, u64) { +pub fn must_new_cluster_and_debug_client() -> ( + Cluster>, + DebugClient, + u64, +) { let (cluster, leader, _) = must_new_cluster_mul(1); let env = Arc::new(Environment::new(1)); @@ -940,8 +976,12 @@ pub fn must_new_cluster_and_debug_client() -> (Cluster, DebugClie (cluster, client, leader.get_store_id()) } -pub fn must_new_cluster_kv_client_and_debug_client() --> (Cluster, TikvClient, DebugClient, Context) { +pub fn must_new_cluster_kv_client_and_debug_client() -> ( + Cluster>, + TikvClient, + DebugClient, + Context, +) { let (cluster, leader, ctx) = must_new_cluster_mul(1); let env = Arc::new(Environment::new(1)); @@ -955,8 +995,12 @@ pub fn must_new_cluster_kv_client_and_debug_client() } pub fn must_new_and_configure_cluster_and_kv_client( - configure: impl FnMut(&mut Cluster), -) -> (Cluster, TikvClient, Context) { + configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_and_configure_cluster(configure); let env = Arc::new(Environment::new(1)); @@ -967,7 +1011,12 @@ pub fn must_new_and_configure_cluster_and_kv_client( (cluster, client, ctx) } -pub fn setup_cluster() -> (Cluster, TikvClient, String, Context) { +pub fn setup_cluster() -> ( + Cluster>, + TikvClient, + String, + Context, +) { let mut cluster = new_server_cluster(0, 3); cluster.run(); diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index ef569e3987a..4c21552cee5 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -11,7 +11,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; -use engine_rocks::{RocksEngine, RocksSnapshot}; +use engine_traits::KvEngine; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::eraftpb::MessageType; use raftstore::{ @@ -140,16 +140,19 @@ impl Filter for DelayFilter { } #[derive(Clone)] -pub struct SimulateTransport { +pub struct SimulateTransport { filters: Arc>>>, ch: C, + + _p: PhantomData, } -impl SimulateTransport { - pub fn new(ch: C) -> SimulateTransport { +impl SimulateTransport { + pub fn new(ch: C) -> SimulateTransport { SimulateTransport { filters: Arc::new(RwLock::new(vec![])), ch, + _p: PhantomData, } } @@ -195,7 +198,7 @@ where res } -impl Transport for SimulateTransport { +impl Transport for SimulateTransport { fn send(&mut self, m: RaftMessage) -> Result<()> { let ch = &mut self.ch; filter_send(&self.filters, m, |m| ch.send(m)) @@ -214,47 +217,49 @@ impl Transport for SimulateTransport { } } -impl> StoreRouter for SimulateTransport { - fn send(&self, msg: StoreMsg) -> Result<()> { +impl> StoreRouter for SimulateTransport { + fn send(&self, msg: StoreMsg) -> Result<()> { StoreRouter::send(&self.ch, msg) } } -impl> ProposalRouter for SimulateTransport { +impl> ProposalRouter<::Snapshot> + for SimulateTransport +{ fn send( &self, - cmd: RaftCommand, - ) -> std::result::Result<(), TrySendError>> { - ProposalRouter::::send(&self.ch, cmd) + cmd: RaftCommand<::Snapshot>, + ) -> std::result::Result<(), TrySendError::Snapshot>>> { + ProposalRouter::<::Snapshot>::send(&self.ch, cmd) } } -impl> CasualRouter for SimulateTransport { - fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { - CasualRouter::::send(&self.ch, region_id, msg) +impl> CasualRouter for SimulateTransport { + fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { + CasualRouter::::send(&self.ch, region_id, msg) } } -impl> SignificantRouter for SimulateTransport { - fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()> { +impl> SignificantRouter for SimulateTransport { + fn significant_send(&self, region_id: u64, msg: SignificantMsg) -> Result<()> { self.ch.significant_send(region_id, msg) } } -impl> RaftStoreRouter for SimulateTransport { +impl> RaftStoreRouter for SimulateTransport { fn send_raft_msg(&self, msg: RaftMessage) -> Result<()> { filter_send(&self.filters, msg, |m| self.ch.send_raft_msg(m)) } - fn broadcast_normal(&self, _: impl FnMut() -> PeerMsg) {} + fn broadcast_normal(&self, _: impl FnMut() -> PeerMsg) {} } -impl> LocalReadRouter for SimulateTransport { +impl> LocalReadRouter for SimulateTransport { fn read( &mut self, read_id: Option, req: RaftCmdRequest, - cb: Callback, + cb: Callback, ) -> RaftStoreResult<()> { self.ch.read(read_id, req, cb) } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 0bb948f13c9..8933f4dca74 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -13,7 +13,9 @@ use collections::HashMap; use encryption_export::{ data_key_manager_from_config, DataKeyManager, FileConfig, MasterKeyConfig, }; -use engine_rocks::{config::BlobRunMode, RocksEngine, RocksSnapshot, RocksStatistics}; +use engine_rocks::{ + config::BlobRunMode, RocksCompactedEvent, RocksEngine, RocksSnapshot, RocksStatistics, +}; use engine_test::raft::RaftTestEngine; use engine_traits::{ CfName, CfNamesExt, Engines, Iterable, KvEngine, Peekable, RaftEngineDebug, RaftEngineReadOnly, @@ -23,6 +25,7 @@ use fail::fail_point; use file_system::IoRateLimiter; use futures::{executor::block_on, future::BoxFuture, StreamExt}; use grpcio::{ChannelBuilder, Environment}; +use hybrid_engine::HybridEngine; use kvproto::{ encryptionpb::EncryptionMethod, kvrpcpb::{PrewriteRequestPessimisticAction::*, *}, @@ -44,7 +47,8 @@ use raftstore::{ RaftRouterCompactedEventSender, Result, }; use rand::{seq::SliceRandom, RngCore}; -use server::common::ConfiguredRaftEngine; +use region_cache_memory_engine::RegionCacheMemoryEngine; +use server::common::{ConfiguredRaftEngine, KvEngineBuilder}; use tempfile::TempDir; use test_pd_client::TestPdClient; use tikv::{ @@ -61,7 +65,9 @@ use tikv_util::{ }; use txn_types::Key; -use crate::{Cluster, Config, RawEngine, ServerCluster, Simulator}; +use crate::{Cluster, Config, KvEngineWithRocks, RawEngine, ServerCluster, Simulator}; + +pub type HybridEngineImpl = HybridEngine; pub fn must_get( engine: &impl RawEngine, @@ -396,14 +402,20 @@ pub fn check_raft_cmd_request(cmd: &RaftCmdRequest) -> bool { is_read } -pub fn make_cb( +pub fn make_cb_rocks( cmd: &RaftCmdRequest, ) -> (Callback, future::Receiver) { + make_cb::(cmd) +} + +pub fn make_cb( + cmd: &RaftCmdRequest, +) -> (Callback, future::Receiver) { let is_read = check_raft_cmd_request(cmd); let (tx, rx) = future::bounded(1, future::WakePolicy::Immediately); let mut detector = CallbackLeakDetector::default(); let cb = if is_read { - Callback::read(Box::new(move |resp: ReadResponse| { + Callback::read(Box::new(move |resp: ReadResponse| { detector.called = true; // we don't care error actually. let _ = tx.send(resp.response); @@ -418,12 +430,12 @@ pub fn make_cb( (cb, rx) } -pub fn make_cb_ext( +pub fn make_cb_ext( cmd: &RaftCmdRequest, proposed: Option, committed: Option, -) -> (Callback, future::Receiver) { - let (cb, receiver) = make_cb(cmd); +) -> (Callback, future::Receiver) { + let (cb, receiver) = make_cb::(cmd); if let Callback::Write { cb, .. } = cb { (Callback::write_ext(cb, proposed, committed), receiver) } else { @@ -432,8 +444,8 @@ pub fn make_cb_ext( } // Issue a read request on the specified peer. -pub fn read_on_peer( - cluster: &mut Cluster, +pub fn read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -450,8 +462,8 @@ pub fn read_on_peer( cluster.read(None, request, timeout) } -pub fn async_read_on_peer( - cluster: &mut Cluster, +pub fn async_read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -476,10 +488,10 @@ pub fn async_read_on_peer( }) } -pub fn batch_read_on_peer( - cluster: &mut Cluster, +pub fn batch_read_on_peer>( + cluster: &mut Cluster, requests: &[(metapb::Peer, metapb::Region)], -) -> Vec> { +) -> Vec> { let batch_id = Some(ThreadReadId::new()); let (tx, rx) = mpsc::sync_channel(3); let mut results = vec![]; @@ -510,8 +522,8 @@ pub fn batch_read_on_peer( results.into_iter().map(|resp| resp.1).collect() } -pub fn read_index_on_peer( - cluster: &mut Cluster, +pub fn read_index_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, read_quorum: bool, @@ -527,8 +539,8 @@ pub fn read_index_on_peer( cluster.read(None, request, timeout) } -pub fn async_read_index_on_peer( - cluster: &mut Cluster, +pub fn async_read_index_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -556,12 +568,12 @@ pub fn async_read_index_on_peer( }) } -pub fn async_command_on_node( - cluster: &mut Cluster, +pub fn async_command_on_node>( + cluster: &mut Cluster, node_id: u64, request: RaftCmdRequest, ) -> BoxFuture<'static, RaftCmdResponse> { - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); cluster .sim .rl() @@ -583,8 +595,8 @@ pub fn must_get_value(resp: &RaftCmdResponse) -> Vec { resp.get_responses()[0].get_get().get_value().to_vec() } -pub fn must_read_on_peer( - cluster: &mut Cluster, +pub fn must_read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -602,8 +614,8 @@ pub fn must_read_on_peer( } } -pub fn must_error_read_on_peer( - cluster: &mut Cluster, +pub fn must_error_read_on_peer>( + cluster: &mut Cluster, peer: metapb::Peer, region: metapb::Region, key: &[u8], @@ -628,19 +640,22 @@ pub fn must_contains_error(resp: &RaftCmdResponse, msg: &str) { assert!(err_msg.contains(msg), "{:?}", resp); } -pub fn create_test_engine( +pub fn create_test_engine( // TODO: pass it in for all cases. - router: Option>, + router: Option>, limiter: Option>, cfg: &Config, ) -> ( - Engines, + Engines, Option>, TempDir, LazyWorker, Arc, Option>, -) { +) +where + EK: KvEngine + KvEngineBuilder, +{ let dir = test_util::temp_dir("test_cluster", cfg.prefer_mem); let mut cfg = cfg.clone(); cfg.storage.data_dir = dir.path().to_str().unwrap().to_string(); @@ -668,8 +683,9 @@ pub fn create_test_engine( })); } let factory = builder.build(); - let engine = factory.create_shared_db(dir.path()).unwrap(); - let engines = Engines::new(engine, raft_engine); + let disk_engine = factory.create_shared_db(dir.path()).unwrap(); + let kv_engine: EK = KvEngineBuilder::build(disk_engine); + let engines = Engines::new(kv_engine, raft_engine); ( engines, key_manager, @@ -746,8 +762,8 @@ pub fn configure_for_lease_read( election_timeout } -pub fn configure_for_enable_titan( - cluster: &mut Cluster, +pub fn configure_for_enable_titan>( + cluster: &mut Cluster, min_blob_size: ReadableSize, ) { cluster.cfg.rocksdb.titan.enabled = true; @@ -758,11 +774,15 @@ pub fn configure_for_enable_titan( cluster.cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize::kb(0); } -pub fn configure_for_disable_titan(cluster: &mut Cluster) { +pub fn configure_for_disable_titan>( + cluster: &mut Cluster, +) { cluster.cfg.rocksdb.titan.enabled = false; } -pub fn configure_for_encryption(cluster: &mut Cluster) { +pub fn configure_for_encryption>( + cluster: &mut Cluster, +) { let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let master_key_file = manifest_dir.join("src/master-key.data"); @@ -776,8 +796,8 @@ pub fn configure_for_encryption(cluster: &mut Cluster) { } } -pub fn configure_for_causal_ts( - cluster: &mut Cluster, +pub fn configure_for_causal_ts>( + cluster: &mut Cluster, renew_interval: &str, renew_batch_min_size: u32, ) { @@ -787,24 +807,24 @@ pub fn configure_for_causal_ts( } /// Keep putting random kvs until specified size limit is reached. -pub fn put_till_size( - cluster: &mut Cluster, +pub fn put_till_size>( + cluster: &mut Cluster, limit: u64, range: &mut dyn Iterator, ) -> Vec { put_cf_till_size(cluster, CF_DEFAULT, limit, range) } -pub fn put_till_count( - cluster: &mut Cluster, +pub fn put_till_count>( + cluster: &mut Cluster, limit: u64, range: &mut dyn Iterator, ) -> Vec { put_cf_till_count(cluster, CF_WRITE, limit, range) } -pub fn put_cf_till_size( - cluster: &mut Cluster, +pub fn put_cf_till_size>( + cluster: &mut Cluster, cf: &'static str, limit: u64, range: &mut dyn Iterator, @@ -835,8 +855,8 @@ pub fn put_cf_till_size( key.into_bytes() } -pub fn put_cf_till_count( - cluster: &mut Cluster, +pub fn put_cf_till_count>( + cluster: &mut Cluster, cf: &'static str, limit: u64, range: &mut dyn Iterator, @@ -1513,7 +1533,11 @@ pub struct PeerClient { } impl PeerClient { - pub fn new(cluster: &Cluster, region_id: u64, peer: metapb::Peer) -> PeerClient { + pub fn new( + cluster: &Cluster>, + region_id: u64, + peer: metapb::Peer, + ) -> PeerClient { let cli = { let env = Arc::new(Environment::new(1)); let channel = @@ -1620,7 +1644,11 @@ pub fn peer_on_store(region: &metapb::Region, store_id: u64) -> metapb::Peer { .clone() } -pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, region_id: u64) { +pub fn wait_for_synced( + cluster: &mut Cluster>, + node_id: u64, + region_id: u64, +) { let mut storage = cluster .sim .read() @@ -1650,7 +1678,10 @@ pub fn wait_for_synced(cluster: &mut Cluster, node_id: u64, regio assert!(snapshot.ext().is_max_ts_synced()); } -pub fn test_delete_range(cluster: &mut Cluster, cf: CfName) { +pub fn test_delete_range>( + cluster: &mut Cluster, + cf: CfName, +) { let data_set: Vec<_> = (1..500) .map(|i| { ( @@ -1683,8 +1714,8 @@ pub fn test_delete_range(cluster: &mut Cluster, cf: CfName) { } } -pub fn put_with_timeout( - cluster: &mut Cluster, +pub fn put_with_timeout>( + cluster: &mut Cluster, node_id: u64, key: &[u8], value: &[u8], @@ -1701,7 +1732,11 @@ pub fn put_with_timeout( cluster.call_command_on_node(node_id, req, timeout) } -pub fn wait_down_peers(cluster: &Cluster, count: u64, peer: Option) { +pub fn wait_down_peers>( + cluster: &Cluster, + count: u64, + peer: Option, +) { let mut peers = cluster.get_down_peers(); for _ in 1..1000 { if peers.len() == count as usize && peer.as_ref().map_or(true, |p| peers.contains_key(p)) { diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index 17fa91f3005..97ea7bf0d24 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -23,6 +23,7 @@ test-engines-panic = [ [dependencies] api_version = { workspace = true } collections = { workspace = true } +engine_rocks = { workspace = true } futures = "0.3" kvproto = { workspace = true } pd_client = { workspace = true } diff --git a/components/test_storage/src/assert_storage.rs b/components/test_storage/src/assert_storage.rs index 3a641a322a2..d4cdbdb2698 100644 --- a/components/test_storage/src/assert_storage.rs +++ b/components/test_storage/src/assert_storage.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use api_version::{ApiV1, KvFormat}; +use engine_rocks::RocksEngine as RocksDb; use kvproto::{ kvrpcpb::{Context, KeyRange, LockInfo}, metapb, @@ -44,11 +45,11 @@ impl AssertionStorage { } } -impl AssertionStorage { +impl AssertionStorage, F> { pub fn new_raft_storage_with_store_count( count: usize, key: &str, - ) -> (Cluster, Self) { + ) -> (Cluster>, Self) { let (cluster, store, ctx) = new_raft_storage_with_store_count::(count, key); let storage = Self { store, ctx }; (cluster, storage) @@ -56,7 +57,7 @@ impl AssertionStorage { pub fn update_with_key_byte( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], ) -> metapb::Region { // ensure the leader of range which contains current key has been elected @@ -79,7 +80,7 @@ impl AssertionStorage { pub fn delete_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], start_ts: impl Into, commit_ts: impl Into, @@ -98,7 +99,7 @@ impl AssertionStorage { fn get_from_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], ts: impl Into, ) -> Option { @@ -116,7 +117,7 @@ impl AssertionStorage { pub fn get_none_from_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], ts: impl Into, ) { @@ -125,7 +126,7 @@ impl AssertionStorage { pub fn put_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], value: &[u8], start_ts: impl Into, @@ -138,7 +139,7 @@ impl AssertionStorage { pub fn batch_put_ok_for_cluster<'a>( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, keys: &[impl AsRef<[u8]>], vals: impl Iterator, start_ts: impl Into, @@ -162,7 +163,7 @@ impl AssertionStorage { fn two_pc_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, prewrite_mutations: Vec, key: &[u8], commit_keys: Vec, @@ -206,7 +207,7 @@ impl AssertionStorage { pub fn gc_ok_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, region_key: &[u8], mut region: metapb::Region, safe_point: impl Into, @@ -225,7 +226,7 @@ impl AssertionStorage { pub fn test_txn_store_gc3_for_cluster( &mut self, - cluster: &mut Cluster, + cluster: &mut Cluster>, key_prefix: u8, ) { let key_len = 10_000; diff --git a/components/test_storage/src/util.rs b/components/test_storage/src/util.rs index e91125ba001..54f82375afe 100644 --- a/components/test_storage/src/util.rs +++ b/components/test_storage/src/util.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. use api_version::KvFormat; +use engine_rocks::RocksEngine; use kvproto::kvrpcpb::Context; use test_raftstore::{new_server_cluster, Cluster, ServerCluster, SimulateEngine}; use tikv_util::HandyRwLock; @@ -55,7 +56,11 @@ macro_rules! follower_raft_engine { pub fn new_raft_engine( count: usize, key: &str, -) -> (Cluster, SimulateEngine, Context) { +) -> ( + Cluster>, + SimulateEngine, + Context, +) { let mut cluster = new_server_cluster(0, count); let (engine, ctx) = prepare_raft_engine!(cluster, key); (cluster, engine, ctx) @@ -65,8 +70,8 @@ pub fn new_raft_storage_with_store_count( count: usize, key: &str, ) -> ( - Cluster, - SyncTestStorage, + Cluster>, + SyncTestStorage, F>, Context, ) { let (cluster, engine, ctx) = new_raft_engine(count, key); diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index 05c602824c2..98b348722da 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -18,7 +18,10 @@ fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { wb.write().unwrap(); } -fn prepare_cluster(cluster: &mut Cluster, initial_kvs: &[(Vec, Vec)]) { +fn prepare_cluster>( + cluster: &mut Cluster, + initial_kvs: &[(Vec, Vec)], +) { cluster.run(); for engines in cluster.engines.values() { enc_write_kvs(&engines.kv, initial_kvs); @@ -35,7 +38,7 @@ struct SetConfig { fn bench_set(b: &mut Bencher<'_>, input: &SetConfig) where - T: Simulator, + T: Simulator, F: ClusterFactory, { let mut cluster = input.factory.build(input.nodes); @@ -57,7 +60,7 @@ struct GetConfig { fn bench_get(b: &mut Bencher<'_>, input: &GetConfig) where - T: Simulator, + T: Simulator, F: ClusterFactory, { let mut cluster = input.factory.build(input.nodes); @@ -84,7 +87,7 @@ struct DeleteConfig { fn bench_delete(b: &mut Bencher<'_>, input: &DeleteConfig) where - T: Simulator, + T: Simulator, F: ClusterFactory, { let mut cluster = input.factory.build(input.nodes); @@ -105,7 +108,7 @@ where fn bench_raft_cluster(c: &mut Criterion, factory: F, label: &str) where - T: Simulator + 'static, + T: Simulator + 'static, F: ClusterFactory, { let nodes_coll = vec![1, 3, 5]; @@ -136,15 +139,15 @@ where group.finish(); } -trait ClusterFactory: Clone + fmt::Debug + 'static { - fn build(&self, nodes: usize) -> Cluster; +trait ClusterFactory>: Clone + fmt::Debug + 'static { + fn build(&self, nodes: usize) -> Cluster; } #[derive(Clone)] struct NodeClusterFactory; -impl ClusterFactory for NodeClusterFactory { - fn build(&self, nodes: usize) -> Cluster { +impl ClusterFactory> for NodeClusterFactory { + fn build(&self, nodes: usize) -> Cluster> { new_node_cluster(1, nodes) } } @@ -158,8 +161,8 @@ impl fmt::Debug for NodeClusterFactory { #[derive(Clone)] struct ServerClusterFactory; -impl ClusterFactory for ServerClusterFactory { - fn build(&self, nodes: usize) -> Cluster { +impl ClusterFactory> for ServerClusterFactory { + fn build(&self, nodes: usize) -> Cluster> { new_server_cluster(1, nodes) } } diff --git a/tests/failpoints/cases/test_bootstrap.rs b/tests/failpoints/cases/test_bootstrap.rs index 8dc2eb8b371..9b4663616ed 100644 --- a/tests/failpoints/cases/test_bootstrap.rs +++ b/tests/failpoints/cases/test_bootstrap.rs @@ -2,6 +2,7 @@ use std::sync::{Arc, RwLock}; +use engine_rocks::RocksEngine; use engine_traits::Peekable; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb}; use test_pd_client::TestPdClient; @@ -9,7 +10,9 @@ use test_raftstore::*; fn test_bootstrap_half_way_failure(fp: &str) { let pd_client = Arc::new(TestPdClient::new(0, false)); - let sim = Arc::new(RwLock::new(NodeCluster::new(pd_client.clone()))); + let sim = Arc::new(RwLock::new(NodeCluster::::new( + pd_client.clone(), + ))); let mut cluster = Cluster::new(0, 5, sim, pd_client, ApiVersion::V1); // Try to start this node, return after persisted some keys. diff --git a/tests/failpoints/cases/test_cmd_epoch_checker.rs b/tests/failpoints/cases/test_cmd_epoch_checker.rs index 8af8e29f3ac..7c39dd2589b 100644 --- a/tests/failpoints/cases/test_cmd_epoch_checker.rs +++ b/tests/failpoints/cases/test_cmd_epoch_checker.rs @@ -5,7 +5,7 @@ use std::{ time::Duration, }; -use engine_rocks::RocksSnapshot; +use engine_rocks::{RocksEngine, RocksSnapshot}; use kvproto::raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}; use raft::eraftpb::MessageType; use raftstore::store::msg::*; @@ -61,7 +61,7 @@ impl CbReceivers { fn make_cb(cmd: &RaftCmdRequest) -> (Callback, CbReceivers) { let (proposed_tx, proposed_rx) = mpsc::channel(); let (committed_tx, committed_rx) = mpsc::channel(); - let (cb, applied_rx) = make_cb_ext( + let (cb, applied_rx) = make_cb_ext::( cmd, Some(Box::new(move || proposed_tx.send(()).unwrap())), Some(Box::new(move || committed_tx.send(()).unwrap())), @@ -76,7 +76,10 @@ fn make_cb(cmd: &RaftCmdRequest) -> (Callback, CbReceivers) { ) } -fn make_write_req(cluster: &mut Cluster, k: &[u8]) -> RaftCmdRequest { +fn make_write_req( + cluster: &mut Cluster>, + k: &[u8], +) -> RaftCmdRequest { let r = cluster.get_region(k); let mut req = new_request( r.get_id(), diff --git a/tests/failpoints/cases/test_conf_change.rs b/tests/failpoints/cases/test_conf_change.rs index c3612e64127..6f91a2ff55b 100644 --- a/tests/failpoints/cases/test_conf_change.rs +++ b/tests/failpoints/cases/test_conf_change.rs @@ -110,7 +110,7 @@ fn test_write_after_destroy() { let mut epoch = cluster.pd_client.get_region_epoch(r1); let mut admin_req = new_admin_request(r1, &epoch, conf_change); admin_req.mut_header().set_peer(new_peer(1, 1)); - let (cb1, mut rx1) = make_cb(&admin_req); + let (cb1, mut rx1) = make_cb_rocks(&admin_req); let engines_3 = cluster.get_all_engines(3); let region = block_on(cluster.pd_client.get_region_by_id(r1)) .unwrap() @@ -126,7 +126,7 @@ fn test_write_after_destroy() { .async_command_on_node(1, admin_req, cb1) .unwrap(); for _ in 0..100 { - let (cb2, _rx2) = make_cb(&put); + let (cb2, _rx2) = make_cb_rocks(&put); cluster .sim .rl() diff --git a/tests/failpoints/cases/test_merge.rs b/tests/failpoints/cases/test_merge.rs index 929afeb70f4..cc7311bfe75 100644 --- a/tests/failpoints/cases/test_merge.rs +++ b/tests/failpoints/cases/test_merge.rs @@ -10,6 +10,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_RAFT}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -1232,7 +1233,7 @@ fn test_prewrite_before_max_ts_is_synced() { let channel = ChannelBuilder::new(env).connect(&addr); let client = TikvClient::new(channel); - let do_prewrite = |cluster: &mut Cluster| { + let do_prewrite = |cluster: &mut Cluster>| { let region_id = right.get_id(); let leader = cluster.leader_of_region(region_id).unwrap(); let epoch = cluster.get_region_epoch(region_id); diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index a795422c120..b7886ce8267 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -3,6 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use causal_ts::{CausalTsProvider, CausalTsProviderImpl}; +use engine_rocks::RocksEngine; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ @@ -14,7 +15,7 @@ use test_raftstore::*; use tikv_util::{time::Instant, HandyRwLock}; struct TestSuite { - pub cluster: Cluster, + pub cluster: Cluster>, api_version: ApiVersion, } diff --git a/tests/failpoints/cases/test_replica_read.rs b/tests/failpoints/cases/test_replica_read.rs index 773d721da8b..624e7a6f788 100644 --- a/tests/failpoints/cases/test_replica_read.rs +++ b/tests/failpoints/cases/test_replica_read.rs @@ -315,7 +315,7 @@ fn test_read_after_cleanup_range_for_snap() { request.mut_header().set_peer(p3); request.mut_header().set_replica_read(true); // Send follower read request to peer 3 - let (cb1, mut rx1) = make_cb(&request); + let (cb1, mut rx1) = make_cb_rocks(&request); cluster .sim .rl() @@ -619,7 +619,7 @@ fn test_batch_read_index_after_transfer_leader() { let mut req = new_request(1, epoch, vec![new_read_index_cmd()], true); req.mut_header().set_peer(new_peer(2, 2)); - let (cb, rx) = make_cb(&req); + let (cb, rx) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(2, req, cb).unwrap(); resps.push(rx); } diff --git a/tests/failpoints/cases/test_replica_stale_read.rs b/tests/failpoints/cases/test_replica_stale_read.rs index cb986250d82..30ccda4fe21 100644 --- a/tests/failpoints/cases/test_replica_stale_read.rs +++ b/tests/failpoints/cases/test_replica_stale_read.rs @@ -2,20 +2,31 @@ use std::{sync::Arc, time::Duration}; +use engine_rocks::RocksEngine; use kvproto::{kvrpcpb::Op, metapb::Peer}; use pd_client::PdClient; use raft::eraftpb::MessageType; use test_pd_client::TestPdClient; use test_raftstore::*; -fn prepare_for_stale_read(leader: Peer) -> (Cluster, Arc, PeerClient) { +fn prepare_for_stale_read( + leader: Peer, +) -> ( + Cluster>, + Arc, + PeerClient, +) { prepare_for_stale_read_before_run(leader, None) } fn prepare_for_stale_read_before_run( leader: Peer, before_run: Option>, -) -> (Cluster, Arc, PeerClient) { +) -> ( + Cluster>, + Arc, + PeerClient, +) { let mut cluster = new_server_cluster(0, 3); let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_sst_recovery.rs b/tests/failpoints/cases/test_sst_recovery.rs index da5a3da1a32..05b0badd662 100644 --- a/tests/failpoints/cases/test_sst_recovery.rs +++ b/tests/failpoints/cases/test_sst_recovery.rs @@ -179,8 +179,11 @@ fn compact_files_to_target_level( engine.compact_files_cf(CF_DEFAULT, file_names, Some(level), 1, false) } -fn create_tikv_cluster_with_one_node_damaged() --> (Cluster, Arc, RocksEngine) { +fn create_tikv_cluster_with_one_node_damaged() -> ( + Cluster>, + Arc, + RocksEngine, +) { let mut cluster = new_server_cluster(0, 3); let pd_client = cluster.pd_client.clone(); pd_client.disable_default_operator(); diff --git a/tests/failpoints/cases/test_stale_read.rs b/tests/failpoints/cases/test_stale_read.rs index a9c6fa5d6e6..ceb018fc610 100644 --- a/tests/failpoints/cases/test_stale_read.rs +++ b/tests/failpoints/cases/test_stale_read.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use kvproto::metapb::{Peer, Region}; use pd_client::PdClient; use raft::eraftpb::MessageType; @@ -83,7 +84,7 @@ fn stale_read_during_splitting(right_derive: bool) { } fn must_not_stale_read( - cluster: &mut Cluster, + cluster: &mut Cluster>, stale_key: &[u8], old_region: &Region, old_leader: &Peer, @@ -166,7 +167,7 @@ fn must_not_stale_read( } fn must_not_eq_on_key( - cluster: &mut Cluster, + cluster: &mut Cluster>, key: &[u8], value: &[u8], read_quorum: bool, @@ -455,7 +456,7 @@ fn test_read_after_peer_destroyed() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index 33a62f0532b..e207525bcea 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -3,6 +3,7 @@ use std::{iter::FromIterator, sync::Arc, time::Duration}; use collections::HashMap; +use engine_rocks::RocksEngine; use futures::executor::block_on; use kvproto::{metapb, raft_serverpb::RaftApplyState}; use pd_client::PdClient; @@ -521,8 +522,8 @@ fn test_non_witness_replica_read() { assert_eq!(resp.get_header().has_error(), false); } -fn must_get_error_is_witness( - cluster: &mut Cluster, +fn must_get_error_is_witness>( + cluster: &mut Cluster, region: &metapb::Region, cmd: kvproto::raft_cmdpb::Request, ) { diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 9af28b6e3d6..5bcd258947c 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2,6 +2,7 @@ use std::{cmp, thread, time::Duration}; +use engine_rocks::RocksEngine; use engine_traits::CF_LOCK; use kvproto::{ coprocessor::{Request, Response, StoreBatchTask, StoreBatchTaskResponse}, @@ -2208,43 +2209,44 @@ fn test_batch_request() { true, ), ]; - let prepare_req = - |cluster: &mut Cluster, ranges: &Vec| -> Request { - let original_range = ranges.get(0).unwrap(); - let key_range = product.get_record_range(original_range.start, original_range.end); - let region_key = Key::from_raw(&key_range.start); - let mut req = DagSelect::from(&product) - .key_ranges(vec![key_range]) - .build_with(ctx.clone(), &[0]); - let mut new_ctx = Context::default(); - let new_region = cluster.get_region(region_key.as_encoded()); - let leader = cluster.leader_of_region(new_region.get_id()).unwrap(); - new_ctx.set_region_id(new_region.get_id()); - new_ctx.set_region_epoch(new_region.get_region_epoch().clone()); - new_ctx.set_peer(leader); - req.set_context(new_ctx); - req.set_start_ts(100); - - let batch_handle_ranges = &ranges.as_slice()[1..]; - for handle_range in batch_handle_ranges.iter() { - let range_start_key = Key::from_raw( - &product - .get_record_range(handle_range.start, handle_range.end) - .start, - ); - let batch_region = cluster.get_region(range_start_key.as_encoded()); - let batch_leader = cluster.leader_of_region(batch_region.get_id()).unwrap(); - let batch_key_ranges = - vec![product.get_record_range(handle_range.start, handle_range.end)]; - let mut store_batch_task = StoreBatchTask::new(); - store_batch_task.set_region_id(batch_region.get_id()); - store_batch_task.set_region_epoch(batch_region.get_region_epoch().clone()); - store_batch_task.set_peer(batch_leader); - store_batch_task.set_ranges(batch_key_ranges.into()); - req.tasks.push(store_batch_task); - } - req - }; + let prepare_req = |cluster: &mut Cluster>, + ranges: &Vec| + -> Request { + let original_range = ranges.get(0).unwrap(); + let key_range = product.get_record_range(original_range.start, original_range.end); + let region_key = Key::from_raw(&key_range.start); + let mut req = DagSelect::from(&product) + .key_ranges(vec![key_range]) + .build_with(ctx.clone(), &[0]); + let mut new_ctx = Context::default(); + let new_region = cluster.get_region(region_key.as_encoded()); + let leader = cluster.leader_of_region(new_region.get_id()).unwrap(); + new_ctx.set_region_id(new_region.get_id()); + new_ctx.set_region_epoch(new_region.get_region_epoch().clone()); + new_ctx.set_peer(leader); + req.set_context(new_ctx); + req.set_start_ts(100); + + let batch_handle_ranges = &ranges.as_slice()[1..]; + for handle_range in batch_handle_ranges.iter() { + let range_start_key = Key::from_raw( + &product + .get_record_range(handle_range.start, handle_range.end) + .start, + ); + let batch_region = cluster.get_region(range_start_key.as_encoded()); + let batch_leader = cluster.leader_of_region(batch_region.get_id()).unwrap(); + let batch_key_ranges = + vec![product.get_record_range(handle_range.start, handle_range.end)]; + let mut store_batch_task = StoreBatchTask::new(); + store_batch_task.set_region_id(batch_region.get_id()); + store_batch_task.set_region_epoch(batch_region.get_region_epoch().clone()); + store_batch_task.set_peer(batch_leader); + store_batch_task.set_ranges(batch_key_ranges.into()); + req.tasks.push(store_batch_task); + } + req + }; let verify_response = |result: &QueryResult, resp: &Response| { let (data, details, region_err, locked, other_err) = ( resp.get_data(), diff --git a/tests/integrations/import/util.rs b/tests/integrations/import/util.rs index 96ebc071bbc..92804860dd9 100644 --- a/tests/integrations/import/util.rs +++ b/tests/integrations/import/util.rs @@ -30,7 +30,7 @@ use uuid::Uuid; const CLEANUP_SST_MILLIS: u64 = 10; -pub fn new_cluster(cfg: TikvConfig) -> (Cluster, Context) { +pub fn new_cluster(cfg: TikvConfig) -> (Cluster>, Context) { let count = 1; let mut cluster = new_server_cluster(0, count); cluster.cfg = Config { @@ -77,7 +77,12 @@ pub fn new_cluster_v2( pub fn open_cluster_and_tikv_import_client( cfg: Option, -) -> (Cluster, Context, TikvClient, ImportSstClient) { +) -> ( + Cluster>, + Context, + TikvClient, + ImportSstClient, +) { let cfg = cfg.unwrap_or_else(|| { let mut config = TikvConfig::default(); config.server.addr = "127.0.0.1:0".to_owned(); @@ -150,14 +155,18 @@ pub fn open_cluster_and_tikv_import_client_v2( (cluster, ctx, tikv, import) } -pub fn new_cluster_and_tikv_import_client() --> (Cluster, Context, TikvClient, ImportSstClient) { +pub fn new_cluster_and_tikv_import_client() -> ( + Cluster>, + Context, + TikvClient, + ImportSstClient, +) { open_cluster_and_tikv_import_client(None) } pub fn new_cluster_and_tikv_import_client_tde() -> ( tempfile::TempDir, - Cluster, + Cluster>, Context, TikvClient, ImportSstClient, diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index bca389b26e6..74b4a73da43 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -6,6 +6,7 @@ use std::{ }; use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; use engine_traits::{ DbOptionsExt, Engines, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -27,7 +28,7 @@ use tikv_util::{ worker::{dummy_scheduler, Builder as WorkerBuilder, LazyWorker}, }; -fn test_bootstrap_idempotent(cluster: &mut Cluster) { +fn test_bootstrap_idempotent>(cluster: &mut Cluster) { // assume that there is a node bootstrap the cluster and add region in pd // successfully cluster.add_first_region().unwrap(); @@ -49,7 +50,8 @@ fn test_node_bootstrap_with_prepared_data() { let cfg = new_tikv_config(0); let (_, system) = fsm::create_raft_batch_system(&cfg.raft_store, &None); - let simulate_trans = SimulateTransport::new(ChannelTransport::new()); + let simulate_trans = + SimulateTransport::<_, RocksEngine>::new(ChannelTransport::::new()); let tmp_path = Builder::new().prefix("test_cluster").tempdir().unwrap(); let engine = engine_rocks::util::new_engine(tmp_path.path().to_str().unwrap(), ALL_CFS).unwrap(); diff --git a/tests/integrations/raftstore/test_clear_stale_data.rs b/tests/integrations/raftstore/test_clear_stale_data.rs index 8010d4c956c..69696a191d4 100644 --- a/tests/integrations/raftstore/test_clear_stale_data.rs +++ b/tests/integrations/raftstore/test_clear_stale_data.rs @@ -47,7 +47,7 @@ fn check_kv_in_all_cfs(db: &RocksEngine, i: u8, found: bool) { } } -fn test_clear_stale_data(cluster: &mut Cluster) { +fn test_clear_stale_data>(cluster: &mut Cluster) { // Disable compaction at level 0. cluster .cfg diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 24034c83192..1bea73d85ea 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -6,7 +6,7 @@ use std::{ }; use collections::HashMap; -use engine_rocks::{raw::Range, util::get_cf_handle}; +use engine_rocks::{raw::Range, util::get_cf_handle, RocksEngine}; use engine_traits::{CachedTablet, MiscExt, CF_WRITE}; use keys::{data_key, DATA_MAX_KEY}; use test_raftstore::*; @@ -32,7 +32,7 @@ fn gen_delete_k(k: &[u8], commit_ts: TimeStamp) -> Vec { k.as_encoded().clone() } -fn test_compact_after_delete(cluster: &mut Cluster) { +fn test_compact_after_delete>(cluster: &mut Cluster) { cluster.cfg.raft_store.region_compact_check_interval = ReadableDuration::millis(100); cluster.cfg.raft_store.region_compact_min_tombstones = 500; cluster.cfg.raft_store.region_compact_tombstones_percent = 50; diff --git a/tests/integrations/raftstore/test_compact_lock_cf.rs b/tests/integrations/raftstore/test_compact_lock_cf.rs index fbc7629c73f..2f3f882927e 100644 --- a/tests/integrations/raftstore/test_compact_lock_cf.rs +++ b/tests/integrations/raftstore/test_compact_lock_cf.rs @@ -1,17 +1,21 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use engine_rocks::raw::DBStatisticsTickerType; +use engine_rocks::{raw::DBStatisticsTickerType, RocksEngine}; use engine_traits::{MiscExt, CF_LOCK}; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &mut Cluster) { +fn flush>(cluster: &mut Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cf(CF_LOCK, true).unwrap(); } } -fn flush_then_check(cluster: &mut Cluster, interval: u64, written: bool) { +fn flush_then_check>( + cluster: &mut Cluster, + interval: u64, + written: bool, +) { flush(cluster); // Wait for compaction. sleep_ms(interval * 2); @@ -26,7 +30,7 @@ fn flush_then_check(cluster: &mut Cluster, interval: u64, writt } } -fn test_compact_lock_cf(cluster: &mut Cluster) { +fn test_compact_lock_cf>(cluster: &mut Cluster) { let interval = 500; // Set lock_cf_compact_interval. cluster.cfg.raft_store.lock_cf_compact_interval = ReadableDuration::millis(interval); diff --git a/tests/integrations/raftstore/test_compact_log.rs b/tests/integrations/raftstore/test_compact_log.rs index bc097dd27e9..fcafec4a82e 100644 --- a/tests/integrations/raftstore/test_compact_log.rs +++ b/tests/integrations/raftstore/test_compact_log.rs @@ -1,12 +1,13 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. use collections::HashMap; +use engine_rocks::RocksEngine; use kvproto::raft_serverpb::RaftApplyState; use raftstore::store::*; use test_raftstore::*; use tikv_util::config::*; -fn test_compact_log(cluster: &mut Cluster) { +fn test_compact_log>(cluster: &mut Cluster) { cluster.run(); let mut before_states = HashMap::default(); @@ -42,7 +43,7 @@ fn test_compact_log(cluster: &mut Cluster) { ); } -fn test_compact_count_limit(cluster: &mut Cluster) { +fn test_compact_count_limit>(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); cluster.cfg.raft_store.raft_log_gc_threshold = 500; cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); @@ -107,7 +108,7 @@ fn test_compact_count_limit(cluster: &mut Cluster) { ); } -fn test_compact_many_times(cluster: &mut Cluster) { +fn test_compact_many_times>(cluster: &mut Cluster) { let gc_limit: u64 = 100; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(gc_limit); cluster.cfg.raft_store.raft_log_gc_threshold = 500; @@ -176,7 +177,7 @@ fn test_node_compact_many_times() { test_compact_many_times(&mut cluster); } -fn test_compact_size_limit(cluster: &mut Cluster) { +fn test_compact_size_limit>(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100000); cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(1)); cluster.run(); @@ -251,7 +252,9 @@ fn test_node_compact_size_limit() { test_compact_size_limit(&mut cluster); } -fn test_compact_reserve_max_ticks(cluster: &mut Cluster) { +fn test_compact_reserve_max_ticks>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_log_gc_count_limit = Some(100); cluster.cfg.raft_store.raft_log_gc_threshold = 500; cluster.cfg.raft_store.raft_log_gc_size_limit = Some(ReadableSize::mb(20)); diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index 44537e8b409..91a63b1878c 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -2,6 +2,7 @@ use std::time::Duration; +use engine_rocks::RocksEngine; use engine_traits::{RaftEngine, RaftEngineDebug}; use kvproto::raft_serverpb::RaftLocalState; use raft::eraftpb::MessageType; @@ -43,10 +44,14 @@ enum DataLost { AllLost, } -fn test(cluster: &mut Cluster, action: A, check: C, mode: DataLost) -where - A: FnOnce(&mut Cluster), - C: FnOnce(&mut Cluster), +fn test( + cluster: &mut Cluster>, + action: A, + check: C, + mode: DataLost, +) where + A: FnOnce(&mut Cluster>), + C: FnOnce(&mut Cluster>), { let filter = match mode { DataLost::AllLost | DataLost::LeaderCommit => RegionPacketFilter::new(1, 1) diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 5a28646db65..9ca6092e624 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -555,9 +555,11 @@ trait ClusterI { ) -> raftstore::Result; } -impl ClusterI for Cluster { +impl ClusterI for Cluster> { fn region_local_state(&self, region_id: u64, store_id: u64) -> RegionLocalState { - Cluster::::region_local_state(self, region_id, store_id) + Cluster::>::region_local_state( + self, region_id, store_id, + ) } fn query_leader( &self, @@ -565,14 +567,16 @@ impl ClusterI for Cluster { region_id: u64, timeout: Duration, ) -> Option { - Cluster::::query_leader(self, store_id, region_id, timeout) + Cluster::>::query_leader( + self, store_id, region_id, timeout, + ) } fn call_command( &self, request: RaftCmdRequest, timeout: Duration, ) -> raftstore::Result { - Cluster::::call_command(self, request, timeout) + Cluster::>::call_command(self, request, timeout) } } diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index 86962330f0f..6e3c64d7851 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -62,7 +62,7 @@ fn test_proposal_prevent_sleep() { true, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); // send to peer 2 cluster .sim @@ -90,7 +90,7 @@ fn test_proposal_prevent_sleep() { let conf_change = new_change_peer_request(ConfChangeType::RemoveNode, new_peer(3, 3)); let mut admin_req = new_admin_request(1, region.get_region_epoch(), conf_change); admin_req.mut_header().set_peer(new_peer(1, 1)); - let (cb, _rx) = make_cb(&admin_req); + let (cb, _rx) = make_cb_rocks(&admin_req); cluster .sim .rl() @@ -482,7 +482,7 @@ fn test_leader_demoted_when_hibernated() { ); request.mut_header().set_peer(new_peer(3, 3)); // In case peer 3 is hibernated. - let (cb, _rx) = make_cb(&request); + let (cb, _rx) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_joint_consensus.rs b/tests/integrations/raftstore/test_joint_consensus.rs index 55def7a099b..e682aa9a656 100644 --- a/tests/integrations/raftstore/test_joint_consensus.rs +++ b/tests/integrations/raftstore/test_joint_consensus.rs @@ -2,6 +2,7 @@ use std::{sync::Arc, time::*}; +use engine_rocks::RocksEngine; use kvproto::{ metapb::{self, PeerRole, Region}, raft_cmdpb::{ChangePeerRequest, RaftCmdRequest, RaftCmdResponse}, @@ -473,12 +474,12 @@ fn test_leader_down_in_joint_state() { } fn call_conf_change_v2( - cluster: &mut Cluster, + cluster: &mut Cluster, region_id: u64, changes: Vec, ) -> Result where - T: Simulator, + T: Simulator, { let conf_change = new_change_peer_v2_request(changes); let epoch = cluster.pd_client.get_region_epoch(region_id); @@ -487,13 +488,13 @@ where } fn call_conf_change( - cluster: &mut Cluster, + cluster: &mut Cluster, region_id: u64, conf_change_type: ConfChangeType, peer: metapb::Peer, ) -> Result where - T: Simulator, + T: Simulator, { let conf_change = new_change_peer_request(conf_change_type, peer); let epoch = cluster.pd_client.get_region_epoch(region_id); @@ -501,9 +502,9 @@ where cluster.call_command_on_leader(admin_req, Duration::from_secs(3)) } -fn leave_joint(cluster: &mut Cluster, region_id: u64) -> Result +fn leave_joint(cluster: &mut Cluster, region_id: u64) -> Result where - T: Simulator, + T: Simulator, { call_conf_change_v2(cluster, region_id, vec![]) } diff --git a/tests/integrations/raftstore/test_lease_read.rs b/tests/integrations/raftstore/test_lease_read.rs index abf17e01e9d..f9e6747b660 100644 --- a/tests/integrations/raftstore/test_lease_read.rs +++ b/tests/integrations/raftstore/test_lease_read.rs @@ -427,7 +427,7 @@ fn test_node_callback_when_destroyed() { let get = new_get_cmd(b"k1"); let mut req = new_request(1, epoch, vec![get], true); req.mut_header().set_peer(leader); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb_rocks(&req); cluster .sim .rl() @@ -648,7 +648,7 @@ fn test_not_leader_read_lease() { true, ); req.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.must_transfer_leader(region_id, new_peer(3, 3)); @@ -701,7 +701,7 @@ fn test_read_index_after_write() { req.mut_header() .set_peer(new_peer(1, region_on_store1.get_id())); // Don't care about the first one's read index - let (cb, _) = make_cb(&req); + let (cb, _) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.must_put(b"k2", b"v2"); @@ -715,7 +715,7 @@ fn test_read_index_after_write() { ); req.mut_header() .set_peer(new_peer(1, region_on_store1.get_id())); - let (cb, mut rx) = make_cb(&req); + let (cb, mut rx) = make_cb_rocks(&req); cluster.sim.rl().async_command_on_node(1, req, cb).unwrap(); cluster.sim.wl().clear_recv_filters(2); diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index 8093a30872d..f40e6695599 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use engine_traits::Peekable; use kvproto::raft_cmdpb::RaftCmdResponse; use raft::eraftpb::MessageType; @@ -16,13 +17,15 @@ use tikv::storage::{kv::SnapshotExt, Snapshot}; use tikv_util::{config::*, HandyRwLock}; use txn_types::{Key, LastChange, PessimisticLock}; -fn test_multi_base(cluster: &mut Cluster) { +fn test_multi_base>(cluster: &mut Cluster) { cluster.run(); test_multi_base_after_bootstrap(cluster); } -fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { +fn test_multi_base_after_bootstrap>( + cluster: &mut Cluster, +) { let (key, value) = (b"k1", b"v1"); cluster.must_put(key, value); @@ -49,7 +52,7 @@ fn test_multi_base_after_bootstrap(cluster: &mut Cluster) { // TODO add epoch not match test cases. } -fn test_multi_leader_crash(cluster: &mut Cluster) { +fn test_multi_leader_crash>(cluster: &mut Cluster) { cluster.run(); let (key1, value1) = (b"k1", b"v1"); @@ -90,7 +93,7 @@ fn test_multi_leader_crash(cluster: &mut Cluster) { must_get_none(&cluster.engines[&last_leader.get_store_id()].kv, key1); } -fn test_multi_cluster_restart(cluster: &mut Cluster) { +fn test_multi_cluster_restart>(cluster: &mut Cluster) { cluster.run(); let (key, value) = (b"k1", b"v1"); @@ -110,7 +113,10 @@ fn test_multi_cluster_restart(cluster: &mut Cluster) { assert_eq!(cluster.get(key), Some(value.to_vec())); } -fn test_multi_lost_majority(cluster: &mut Cluster, count: usize) { +fn test_multi_lost_majority>( + cluster: &mut Cluster, + count: usize, +) { cluster.run(); let leader = cluster.leader_of_region(1); @@ -129,8 +135,8 @@ fn test_multi_lost_majority(cluster: &mut Cluster, count: usize assert!(cluster.leader_of_region(1).is_none()); } -fn test_multi_random_restart( - cluster: &mut Cluster, +fn test_multi_random_restart>( + cluster: &mut Cluster, node_count: usize, restart_count: u32, ) { @@ -173,7 +179,7 @@ fn test_multi_server_base() { test_multi_base(&mut cluster) } -fn test_multi_latency(cluster: &mut Cluster) { +fn test_multi_latency>(cluster: &mut Cluster) { cluster.run(); cluster.add_send_filter(CloneFilterFactory(DelayFilter::new(Duration::from_millis( 30, @@ -195,7 +201,7 @@ fn test_multi_server_latency() { test_multi_latency(&mut cluster); } -fn test_multi_random_latency(cluster: &mut Cluster) { +fn test_multi_random_latency>(cluster: &mut Cluster) { cluster.run(); cluster.add_send_filter(CloneFilterFactory(RandomLatencyFilter::new(50))); test_multi_base_after_bootstrap(cluster); @@ -215,7 +221,7 @@ fn test_multi_server_random_latency() { test_multi_random_latency(&mut cluster); } -fn test_multi_drop_packet(cluster: &mut Cluster) { +fn test_multi_drop_packet>(cluster: &mut Cluster) { cluster.run(); cluster.add_send_filter(CloneFilterFactory(DropPacketFilter::new(30))); test_multi_base_after_bootstrap(cluster); @@ -295,7 +301,9 @@ fn test_multi_server_random_restart() { test_multi_random_restart(&mut cluster, count, 10); } -fn test_leader_change_with_uncommitted_log(cluster: &mut Cluster) { +fn test_leader_change_with_uncommitted_log>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -485,7 +493,9 @@ fn test_node_leader_change_with_log_overlap() { panic!("callback has not been called after 5s."); } -fn test_read_leader_with_unapplied_log(cluster: &mut Cluster) { +fn test_read_leader_with_unapplied_log>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -574,8 +584,8 @@ fn test_server_read_leader_with_unapplied_log() { test_read_leader_with_unapplied_log(&mut cluster); } -fn get_with_timeout( - cluster: &mut Cluster, +fn get_with_timeout>( + cluster: &mut Cluster, key: &[u8], read_quorum: bool, timeout: Duration, @@ -591,7 +601,9 @@ fn get_with_timeout( cluster.call_command_on_leader(req, timeout) } -fn test_remove_leader_with_uncommitted_log(cluster: &mut Cluster) { +fn test_remove_leader_with_uncommitted_log>( + cluster: &mut Cluster, +) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -717,7 +729,7 @@ fn test_node_dropped_proposal() { .expect("callback should have been called with in 5s."); } -fn test_consistency_check(cluster: &mut Cluster) { +fn test_consistency_check>(cluster: &mut Cluster) { cluster.cfg.raft_store.raft_election_timeout_ticks = 50; // disable compact log to make test more stable. cluster.cfg.raft_store.raft_log_gc_threshold = 1000; @@ -740,7 +752,7 @@ fn test_node_consistency_check() { test_consistency_check(&mut cluster); } -fn test_batch_write(cluster: &mut Cluster) { +fn test_batch_write>(cluster: &mut Cluster) { cluster.run(); let r = cluster.get_region(b""); cluster.must_split(&r, b"k3"); diff --git a/tests/integrations/raftstore/test_prevote.rs b/tests/integrations/raftstore/test_prevote.rs index c81b34f0435..c843154b121 100644 --- a/tests/integrations/raftstore/test_prevote.rs +++ b/tests/integrations/raftstore/test_prevote.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use raft::eraftpb::MessageType; use test_raftstore::*; use tikv_util::HandyRwLock; @@ -15,7 +16,10 @@ enum FailureType<'a> { Reboot(&'a [u64]), } -fn attach_prevote_notifiers(cluster: &Cluster, peer: u64) -> mpsc::Receiver<()> { +fn attach_prevote_notifiers>( + cluster: &Cluster, + peer: u64, +) -> mpsc::Receiver<()> { // Setup a notifier let (tx, rx) = mpsc::channel(); let response_notifier = Box::new(MessageTypeNotifier::new( @@ -37,8 +41,8 @@ fn attach_prevote_notifiers(cluster: &Cluster, peer: u64) -> mp // Validate that prevote is used in elections after partition or reboot of some // nodes. -fn test_prevote( - cluster: &mut Cluster, +fn test_prevote>( + cluster: &mut Cluster, failure_type: FailureType<'_>, leader_after_failure_id: impl Into>, detect_during_failure: impl Into>, @@ -219,7 +223,7 @@ fn test_prevote_reboot_minority_followers() { // Test isolating a minority of the cluster and make sure that the remove // themselves. -fn test_pair_isolated(cluster: &mut Cluster) { +fn test_pair_isolated>(cluster: &mut Cluster) { let region = 1; let pd_client = Arc::clone(&cluster.pd_client); @@ -246,7 +250,9 @@ fn test_server_pair_isolated() { test_pair_isolated(&mut cluster); } -fn test_isolated_follower_leader_does_not_change(cluster: &mut Cluster) { +fn test_isolated_follower_leader_does_not_change>( + cluster: &mut Cluster, +) { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v1"); @@ -282,7 +288,9 @@ fn test_server_isolated_follower_leader_does_not_change() { test_isolated_follower_leader_does_not_change(&mut cluster); } -fn test_create_peer_from_pre_vote(cluster: &mut Cluster) { +fn test_create_peer_from_pre_vote>( + cluster: &mut Cluster, +) { let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_region_change_observer.rs b/tests/integrations/raftstore/test_region_change_observer.rs index 72bbfdc9b8f..4b37e8aa962 100644 --- a/tests/integrations/raftstore/test_region_change_observer.rs +++ b/tests/integrations/raftstore/test_region_change_observer.rs @@ -9,6 +9,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use kvproto::metapb::Region; use raft::StateRole; use raftstore::coprocessor::{ @@ -39,7 +40,7 @@ impl RegionChangeObserver for TestObserver { } } -fn test_region_change_observer_impl(mut cluster: Cluster) { +fn test_region_change_observer_impl(mut cluster: Cluster>) { let pd_client = Arc::clone(&cluster.pd_client); pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_region_heartbeat.rs b/tests/integrations/raftstore/test_region_heartbeat.rs index 29f7e8b10dd..1f9b7cb1eb8 100644 --- a/tests/integrations/raftstore/test_region_heartbeat.rs +++ b/tests/integrations/raftstore/test_region_heartbeat.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use test_raftstore::*; use test_raftstore_macro::test_case; use tikv_util::{ @@ -91,7 +92,7 @@ fn test_server_down_peers_without_hibernate_regions() { test_down_peers!(&mut cluster); } -fn test_pending_peers(cluster: &mut Cluster) { +fn test_pending_peers>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer count check. pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_region_info_accessor.rs b/tests/integrations/raftstore/test_region_info_accessor.rs index 24d90b66327..6da6c062e9e 100644 --- a/tests/integrations/raftstore/test_region_info_accessor.rs +++ b/tests/integrations/raftstore/test_region_info_accessor.rs @@ -6,6 +6,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use kvproto::metapb::Region; use raft::StateRole; use raftstore::coprocessor::{RangeKey, RegionInfo, RegionInfoAccessor}; @@ -47,7 +48,10 @@ fn check_region_ranges(regions: &[(Region, StateRole)], ranges: &[(&[u8], &[u8]) }) } -fn test_region_info_accessor_impl(cluster: &mut Cluster, c: &RegionInfoAccessor) { +fn test_region_info_accessor_impl( + cluster: &mut Cluster>, + c: &RegionInfoAccessor, +) { for i in 0..9 { let k = format!("k{}", i).into_bytes(); let v = format!("v{}", i).into_bytes(); diff --git a/tests/integrations/raftstore/test_replication_mode.rs b/tests/integrations/raftstore/test_replication_mode.rs index 76059fa8f87..db373106402 100644 --- a/tests/integrations/raftstore/test_replication_mode.rs +++ b/tests/integrations/raftstore/test_replication_mode.rs @@ -2,13 +2,14 @@ use std::{iter::FromIterator, sync::Arc, thread, time::Duration}; +use engine_rocks::RocksEngine; use kvproto::replication_modepb::*; use pd_client::PdClient; use raft::eraftpb::ConfChangeType; use test_raftstore::*; use tikv_util::{config::*, mpsc::future, HandyRwLock}; -fn prepare_cluster() -> Cluster { +fn prepare_cluster() -> Cluster> { let mut cluster = new_server_cluster(0, 3); cluster.pd_client.disable_default_operator(); cluster.pd_client.configure_dr_auto_sync("zone"); @@ -20,7 +21,7 @@ fn prepare_cluster() -> Cluster { cluster } -fn configure_for_snapshot(cluster: &mut Cluster) { +fn configure_for_snapshot(cluster: &mut Cluster>) { // Truncate the log quickly so that we can force sending snapshot. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(20); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(2); @@ -28,13 +29,13 @@ fn configure_for_snapshot(cluster: &mut Cluster) { cluster.cfg.raft_store.snap_mgr_gc_tick_interval = ReadableDuration::millis(50); } -fn run_cluster(cluster: &mut Cluster) { +fn run_cluster(cluster: &mut Cluster>) { cluster.run(); cluster.must_transfer_leader(1, new_peer(1, 1)); cluster.must_put(b"k1", b"v0"); } -fn prepare_labels(cluster: &mut Cluster) { +fn prepare_labels(cluster: &mut Cluster>) { cluster.add_label(1, "dc", "dc1"); cluster.add_label(2, "dc", "dc1"); cluster.add_label(3, "dc", "dc2"); @@ -61,7 +62,7 @@ fn test_dr_auto_sync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -83,7 +84,7 @@ fn test_dr_auto_sync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -174,7 +175,7 @@ fn test_sync_recover_after_apply_snapshot() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -321,7 +322,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -357,7 +358,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -385,7 +386,7 @@ fn test_switching_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -416,7 +417,7 @@ fn test_replication_mode_allowlist() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -504,7 +505,7 @@ fn test_migrate_replication_mode() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() @@ -550,7 +551,7 @@ fn test_migrate_majority_to_drautosync() { false, ); request.mut_header().set_peer(new_peer(1, 1)); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_snap.rs b/tests/integrations/raftstore/test_snap.rs index edef780ce31..23b2a37e6c9 100644 --- a/tests/integrations/raftstore/test_snap.rs +++ b/tests/integrations/raftstore/test_snap.rs @@ -12,6 +12,7 @@ use std::{ }; use collections::HashMap; +use engine_rocks::RocksEngine; use engine_traits::{Checkpointer, KvEngine, RaftEngineDebug}; use file_system::{IoOp, IoType}; use futures::executor::block_on; @@ -39,7 +40,10 @@ use tikv_util::{ HandyRwLock, }; -fn test_huge_snapshot(cluster: &mut Cluster, max_snapshot_file_size: u64) { +fn test_huge_snapshot>( + cluster: &mut Cluster, + max_snapshot_file_size: u64, +) { cluster.cfg.rocksdb.titan.enabled = true; cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 3affbadec4b..1dd5e7db6d0 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -7,6 +7,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_DEFAULT, CF_WRITE}; use keys::data_key; use kvproto::{ @@ -629,7 +630,10 @@ fn test_node_split_region_after_reboot_with_config_change() { } } -fn test_split_epoch_not_match(cluster: &mut Cluster, right_derive: bool) { +fn test_split_epoch_not_match>( + cluster: &mut Cluster, + right_derive: bool, +) { cluster.cfg.raft_store.right_derive_when_split = right_derive; cluster.run(); let pd_client = Arc::clone(&cluster.pd_client); diff --git a/tests/integrations/raftstore/test_stale_peer.rs b/tests/integrations/raftstore/test_stale_peer.rs index f76373756f9..5ef90e30e94 100644 --- a/tests/integrations/raftstore/test_stale_peer.rs +++ b/tests/integrations/raftstore/test_stale_peer.rs @@ -4,6 +4,7 @@ use std::{sync::Arc, thread, time::*}; +use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RegionLocalState}; use pd_client::PdClient; @@ -30,7 +31,7 @@ use tikv_util::{config::ReadableDuration, HandyRwLock}; /// time, and it would check with pd to confirm whether it's still a member of /// the cluster. If not, it should destroy itself as a stale peer which is /// removed out already. -fn test_stale_peer_out_of_region(cluster: &mut Cluster) { +fn test_stale_peer_out_of_region>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -113,7 +114,10 @@ fn test_server_stale_peer_out_of_region() { /// time, and it's an initialized peer without any data. It would destroy itself /// as stale peer directly and should not impact other region data on the /// same store. -fn test_stale_peer_without_data(cluster: &mut Cluster, right_derive: bool) { +fn test_stale_peer_without_data>( + cluster: &mut Cluster, + right_derive: bool, +) { cluster.cfg.raft_store.right_derive_when_split = right_derive; let pd_client = Arc::clone(&cluster.pd_client); @@ -299,7 +303,7 @@ fn test_stale_learner_with_read_index() { ); request.mut_header().set_peer(new_peer(3, 3)); request.mut_header().set_replica_read(true); - let (cb, _) = make_cb(&request); + let (cb, _) = make_cb_rocks(&request); cluster .sim .rl() diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 60f10936f2d..821fc19dff8 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -7,6 +7,7 @@ use std::{ }; use api_version::{test_kv_format_impl, KvFormat}; +use engine_rocks::RocksEngine; use engine_traits::MiscExt; use futures::{executor::block_on, SinkExt, StreamExt}; use grpcio::*; @@ -17,7 +18,7 @@ use test_raftstore::*; use tikv_util::{config::*, store::QueryStats}; use txn_types::Key; -fn check_available(cluster: &mut Cluster) { +fn check_available>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); let engine = cluster.get_engine(1); @@ -43,7 +44,7 @@ fn check_available(cluster: &mut Cluster) { panic!("available not changed") } -fn test_simple_store_stats(cluster: &mut Cluster) { +fn test_simple_store_stats>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); cluster.cfg.raft_store.pd_store_heartbeat_tick_interval = ReadableDuration::millis(20); @@ -142,7 +143,14 @@ fn test_store_heartbeat_report_hotspots() { fail::remove("mock_hotspot_threshold"); } -type Query = dyn Fn(Context, &Cluster, TikvClient, u64, u64, Vec); +type Query = dyn Fn( + Context, + &Cluster>, + TikvClient, + u64, + u64, + Vec, +); #[test] fn test_query_stats() { @@ -435,7 +443,7 @@ fn test_txn_query_stats_tmpl() { } fn raw_put( - _cluster: &Cluster, + _cluster: &Cluster>, client: &TikvClient, ctx: &Context, _store_id: u64, @@ -453,7 +461,7 @@ fn raw_put( } fn put( - cluster: &Cluster, + cluster: &Cluster>, client: &TikvClient, ctx: &Context, store_id: u64, @@ -674,7 +682,7 @@ fn test_txn_delete_query() { } fn check_query_num_read( - cluster: &Cluster, + cluster: &Cluster>, store_id: u64, region_id: u64, kind: QueryKind, @@ -700,7 +708,7 @@ fn check_query_num_read( } fn check_query_num_write( - cluster: &Cluster, + cluster: &Cluster>, store_id: u64, kind: QueryKind, expect: u64, @@ -720,7 +728,7 @@ fn check_query_num_write( } fn check_split_key( - cluster: &Cluster, + cluster: &Cluster>, start_key: Vec, end_key: Option>, ) -> bool { diff --git a/tests/integrations/raftstore/test_tombstone.rs b/tests/integrations/raftstore/test_tombstone.rs index c1cd0befcf1..f5c419ac65b 100644 --- a/tests/integrations/raftstore/test_tombstone.rs +++ b/tests/integrations/raftstore/test_tombstone.rs @@ -3,6 +3,7 @@ use std::{sync::Arc, thread, time::Duration}; use crossbeam::channel; +use engine_rocks::RocksEngine; use engine_traits::{CfNamesExt, Iterable, Peekable, RaftEngineDebug, SyncMutable, CF_RAFT}; use kvproto::raft_serverpb::{PeerState, RaftMessage, RegionLocalState, StoreIdent}; use protobuf::Message; @@ -10,7 +11,7 @@ use raft::eraftpb::MessageType; use test_raftstore::*; use tikv_util::{config::*, time::Instant}; -fn test_tombstone(cluster: &mut Cluster) { +fn test_tombstone>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); @@ -113,7 +114,7 @@ fn test_server_tombstone() { test_tombstone(&mut cluster); } -fn test_fast_destroy(cluster: &mut Cluster) { +fn test_fast_destroy>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. @@ -158,7 +159,7 @@ fn test_server_fast_destroy() { test_fast_destroy(&mut cluster); } -fn test_readd_peer(cluster: &mut Cluster) { +fn test_readd_peer>(cluster: &mut Cluster) { let pd_client = Arc::clone(&cluster.pd_client); // Disable default max peer number check. pd_client.disable_default_operator(); diff --git a/tests/integrations/raftstore/test_transport.rs b/tests/integrations/raftstore/test_transport.rs index 4ed3d8da160..cb1bcefbcad 100644 --- a/tests/integrations/raftstore/test_transport.rs +++ b/tests/integrations/raftstore/test_transport.rs @@ -1,8 +1,9 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. +use engine_rocks::RocksEngine; use test_raftstore::*; -fn test_partition_write(cluster: &mut Cluster) { +fn test_partition_write>(cluster: &mut Cluster) { cluster.run(); let (key, value) = (b"k1", b"v1"); diff --git a/tests/integrations/raftstore/test_update_region_size.rs b/tests/integrations/raftstore/test_update_region_size.rs index f2ff0d4f217..22a5e1f4534 100644 --- a/tests/integrations/raftstore/test_update_region_size.rs +++ b/tests/integrations/raftstore/test_update_region_size.rs @@ -2,18 +2,19 @@ use std::{sync::Arc, thread, time}; +use engine_rocks::RocksEngine; use engine_traits::MiscExt; use pd_client::PdClient; use test_raftstore::*; use tikv_util::config::*; -fn flush(cluster: &mut Cluster) { +fn flush>(cluster: &mut Cluster) { for engines in cluster.engines.values() { engines.kv.flush_cfs(&[], true).unwrap(); } } -fn test_update_region_size(cluster: &mut Cluster) { +fn test_update_region_size>(cluster: &mut Cluster) { cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(50); cluster.cfg.raft_store.split_region_check_tick_interval = ReadableDuration::millis(50); cluster.cfg.raft_store.region_split_check_diff = Some(ReadableSize::kb(1)); @@ -24,7 +25,7 @@ fn test_update_region_size(cluster: &mut Cluster) { .level0_file_num_compaction_trigger = 10; cluster.start().unwrap(); - let batch_put = |cluster: &mut Cluster, mut start, end| { + let batch_put = |cluster: &mut Cluster, mut start, end| { while start < end { let next = std::cmp::min(end, start + 50); let requests = (start..next) diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index d4332403cea..7879ffc49be 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -7,6 +7,7 @@ use std::{ }; use collections::HashMap; +use engine_rocks::RocksEngine; use futures::executor::block_on; use kvproto::{ metapb, @@ -484,8 +485,8 @@ fn test_witness_replica_read() { ); } -fn must_get_error_is_witness( - cluster: &mut Cluster, +fn must_get_error_is_witness>( + cluster: &mut Cluster, region: &metapb::Region, cmd: kvproto::raft_cmdpb::Request, ) { diff --git a/tests/integrations/resource_metering/test_read_keys.rs b/tests/integrations/resource_metering/test_read_keys.rs index f7a4ef86906..64c291049d9 100644 --- a/tests/integrations/resource_metering/test_read_keys.rs +++ b/tests/integrations/resource_metering/test_read_keys.rs @@ -4,6 +4,7 @@ use std::{sync::Arc, time::Duration}; use concurrency_manager::ConcurrencyManager; use crossbeam::channel::{unbounded, Receiver, RecvTimeoutError, Sender}; +use engine_rocks::RocksEngine as RocksDb; use grpcio::{ChannelBuilder, Environment}; use kvproto::{coprocessor, kvrpcpb::*, resource_usage_agent::ResourceUsageRecord, tikvpb::*}; use protobuf::Message; @@ -108,7 +109,14 @@ pub fn test_read_keys() { }); } -fn new_cluster(port: u16, env: Arc) -> (Cluster, TikvClient, Context) { +fn new_cluster( + port: u16, + env: Arc, +) -> ( + Cluster>, + TikvClient, + Context, +) { let (cluster, leader, ctx) = must_new_and_configure_cluster(|cluster| { cluster.cfg.resource_metering.receiver_address = format!("127.0.0.1:{}", port); cluster.cfg.resource_metering.precision = ReadableDuration::millis(100); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 24b6a87bfa5..4e087bb07b0 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -10,6 +10,7 @@ use std::{ use api_version::{ApiV1, ApiV1Ttl, ApiV2, KvFormat}; use concurrency_manager::ConcurrencyManager; +use engine_rocks::RocksEngine; use engine_traits::{ MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, RaftLogBatch, SyncMutable, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, @@ -1383,7 +1384,8 @@ fn test_double_run_node() { let mut sim = cluster.sim.wl(); let node = sim.get_node(id).unwrap(); let pd_worker = LazyWorker::new("test-pd-worker"); - let simulate_trans = SimulateTransport::new(ChannelTransport::new()); + let simulate_trans = + SimulateTransport::<_, RocksEngine>::new(ChannelTransport::::new()); let tmp = Builder::new().prefix("test_cluster").tempdir().unwrap(); let snap_mgr = SnapManager::new(tmp.path().to_str().unwrap()); let coprocessor_host = CoprocessorHost::new(router, raftstore::coprocessor::Config::default()); diff --git a/tests/integrations/server/lock_manager.rs b/tests/integrations/server/lock_manager.rs index 289b10303a8..2d8b8d326e3 100644 --- a/tests/integrations/server/lock_manager.rs +++ b/tests/integrations/server/lock_manager.rs @@ -10,6 +10,7 @@ use std::{ time::Duration, }; +use engine_rocks::RocksEngine; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ kvrpcpb::*, @@ -69,7 +70,10 @@ fn deadlock(client: &TikvClient, ctx: Context, key1: &[u8], ts: u64) -> bool { resp.errors[0].has_deadlock() } -fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (TikvClient, Context) { +fn build_leader_client( + cluster: &mut Cluster>, + key: &[u8], +) -> (TikvClient, Context) { let region_id = cluster.get_region_id(key); let leader = cluster.leader_of_region(region_id).unwrap(); let epoch = cluster.get_region_epoch(region_id); @@ -88,7 +92,11 @@ fn build_leader_client(cluster: &mut Cluster, key: &[u8]) -> (Tik } /// Creates a deadlock on the store containing key. -fn must_detect_deadlock(cluster: &mut Cluster, key: &[u8], ts: u64) { +fn must_detect_deadlock( + cluster: &mut Cluster>, + key: &[u8], + ts: u64, +) { // Sometimes, deadlocks can't be detected at once due to leader change, but it // will be detected. for _ in 0..5 { @@ -100,7 +108,10 @@ fn must_detect_deadlock(cluster: &mut Cluster, key: &[u8], ts: u6 panic!("failed to detect deadlock"); } -fn deadlock_detector_leader_must_be(cluster: &mut Cluster, store_id: u64) { +fn deadlock_detector_leader_must_be( + cluster: &mut Cluster>, + store_id: u64, +) { let leader_region = cluster.get_region(b""); assert_eq!( cluster @@ -115,7 +126,11 @@ fn deadlock_detector_leader_must_be(cluster: &mut Cluster, store_ .region_leader_must_be(leader_region.get_id(), leader_peer); } -fn must_transfer_leader(cluster: &mut Cluster, region_key: &[u8], store_id: u64) { +fn must_transfer_leader( + cluster: &mut Cluster>, + region_key: &[u8], + store_id: u64, +) { let region = cluster.get_region(region_key); let target_peer = find_peer_of_store(®ion, store_id); cluster.must_transfer_leader(region.get_id(), target_peer.clone()); @@ -130,7 +145,7 @@ fn must_transfer_leader(cluster: &mut Cluster, region_key: &[u8], /// REQUIRE: The source store must be the leader the region and the target store /// must not have this region. fn must_transfer_region( - cluster: &mut Cluster, + cluster: &mut Cluster>, region_key: &[u8], source_store_id: u64, target_store_id: u64, @@ -149,14 +164,18 @@ fn must_transfer_region( cluster.must_put(region_key, b"v"); } -fn must_split_region(cluster: &mut Cluster, region_key: &[u8], split_key: &[u8]) { +fn must_split_region( + cluster: &mut Cluster>, + region_key: &[u8], + split_key: &[u8], +) { let region = cluster.get_region(region_key); cluster.must_split(®ion, split_key); cluster.must_put(split_key, b"v"); } fn must_merge_region( - cluster: &mut Cluster, + cluster: &mut Cluster>, source_region_key: &[u8], target_region_key: &[u8], ) { @@ -179,7 +198,7 @@ fn find_peer_of_store(region: &Region, store_id: u64) -> Peer { /// Creates a cluster with only one region and store(1) is the leader of the /// region. -fn new_cluster_for_deadlock_test(count: usize) -> Cluster { +fn new_cluster_for_deadlock_test(count: usize) -> Cluster> { let mut cluster = new_server_cluster(0, count); cluster.cfg.pessimistic_txn.wait_for_lock_timeout = ReadableDuration::millis(500); cluster.cfg.pessimistic_txn.pipelined = false; diff --git a/tests/integrations/server_encryption.rs b/tests/integrations/server_encryption.rs index 7c88afde76a..041b15fd953 100644 --- a/tests/integrations/server_encryption.rs +++ b/tests/integrations/server_encryption.rs @@ -1,8 +1,9 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. +use engine_rocks::RocksEngine; use test_raftstore::*; -fn test_snapshot_encryption(cluster: &mut Cluster) { +fn test_snapshot_encryption>(cluster: &mut Cluster) { configure_for_encryption(cluster); cluster.pd_client.disable_default_operator(); let r1 = cluster.run_conf_change(); diff --git a/tests/integrations/storage/test_raft_storage.rs b/tests/integrations/storage/test_raft_storage.rs index 684b7a261fb..1b3ba6dc43b 100644 --- a/tests/integrations/storage/test_raft_storage.rs +++ b/tests/integrations/storage/test_raft_storage.rs @@ -8,6 +8,7 @@ use std::{ use api_version::{ApiV1, KvFormat}; use collections::HashMap; +use engine_rocks::RocksEngine; use error_code::{raftstore::STALE_COMMAND, ErrorCodeExt}; use kvproto::kvrpcpb::Context; use test_raftstore::*; @@ -25,8 +26,8 @@ use tikv_util::HandyRwLock; use txn_types::{Key, Mutation, TimeStamp}; fn new_raft_storage() -> ( - Cluster, - SyncTestStorageApiV1, + Cluster>, + SyncTestStorageApiV1>, Context, ) { new_raft_storage_with_store_count::(1, "") @@ -234,7 +235,7 @@ fn write_test_data( } fn check_data( - cluster: &mut Cluster, + cluster: &mut Cluster>, storages: &HashMap>, test_data: &[(Vec, Vec)], ts: impl Into, diff --git a/tests/integrations/storage/test_raftkv.rs b/tests/integrations/storage/test_raftkv.rs index 1fb8075e10f..4129d5bc721 100644 --- a/tests/integrations/storage/test_raftkv.rs +++ b/tests/integrations/storage/test_raftkv.rs @@ -4,6 +4,7 @@ use std::{ thread, time, }; +use engine_rocks::RocksEngine as RocksDb; use engine_traits::{CfName, IterOptions, CF_DEFAULT}; use futures::executor::block_on; use kvproto::kvrpcpb::{Context, KeyRange}; @@ -323,7 +324,7 @@ fn test_invalid_read_index_when_no_leader() { true, ); request.mut_header().set_peer(follower.clone()); - let (cb, mut rx) = make_cb(&request); + let (cb, mut rx) = make_cb::(&request); cluster .sim .rl() diff --git a/tests/integrations/storage/test_region_info_accessor.rs b/tests/integrations/storage/test_region_info_accessor.rs index 2df7238e1a9..344f9c6607e 100644 --- a/tests/integrations/storage/test_region_info_accessor.rs +++ b/tests/integrations/storage/test_region_info_accessor.rs @@ -3,12 +3,15 @@ use std::{sync::mpsc::channel, thread, time::Duration}; use collections::HashMap; +use engine_rocks::RocksEngine; use kvproto::metapb::Region; use raftstore::coprocessor::{RegionInfoAccessor, RegionInfoProvider}; use test_raftstore::*; use tikv_util::HandyRwLock; -fn prepare_cluster(cluster: &mut Cluster) -> Vec { +fn prepare_cluster>( + cluster: &mut Cluster, +) -> Vec { for i in 0..15 { let i = i + b'0'; let key = vec![b'k', i]; From 37b1dce422fd2bd077d7cfd707a2e8ec716c2aae Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 20 Dec 2023 00:16:52 +0800 Subject: [PATCH 1065/1149] *: add log and metric in raft_client (#16143) ref tikv/tikv#12362 add log and metric in raft_client. Signed-off-by: crazycs520 --- metrics/grafana/tikv_details.dashboard.py | 11 + metrics/grafana/tikv_details.json | 1055 +++++++++++++-------- metrics/grafana/tikv_details.json.sha256 | 2 +- src/server/metrics.rs | 7 + src/server/raft_client.rs | 21 +- 5 files changed, 719 insertions(+), 377 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 5c84152174e..29159740967 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -1857,6 +1857,17 @@ def RaftIO() -> RowPanel: metric="tikv_raftstore_apply_log_duration_seconds", ) ) + layout.row( + heatmap_panel_graph_panel_histogram_quantile_pairs( + heatmap_title="Raft Client Wait Connection Ready Duration", + heatmap_description="The time consumed for Raft Client wait connection ready", + graph_title="99% Raft Client Wait Connection Ready Duration", + graph_description="The time consumed for Raft Client wait connection ready per TiKV instance", + yaxis_format=UNITS.SECONDS, + metric="tikv_server_raft_client_wait_ready_duration", + graph_by_labels=["to"], + ) + ) layout.row( [ graph_panel( diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 8c6bea17992..88821ac7538 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -16736,6 +16736,311 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed for Raft Client wait connection ready", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 121, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft Client Wait Connection Ready Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed for Raft Client wait connection ready per TiKV instance", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 122, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{to}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{to}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{to}}", + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{to}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "99% Raft Client Wait Connection Ready Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, { "aliasColors": {}, "bars": false, @@ -16764,11 +17069,11 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 121, + "id": 123, "interval": null, "isNew": true, "legend": { @@ -16912,11 +17217,11 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 122, + "id": 124, "interval": null, "isNew": true, "legend": { @@ -17052,7 +17357,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 123, + "id": 125, "interval": null, "links": [], "maxDataPoints": 100, @@ -17091,7 +17396,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 124, + "id": 126, "interval": null, "isNew": true, "legend": { @@ -17224,7 +17529,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 125, + "id": 127, "interval": null, "isNew": true, "legend": { @@ -17357,7 +17662,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 126, + "id": 128, "interval": null, "isNew": true, "legend": { @@ -17490,7 +17795,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 127, + "id": 129, "interval": null, "isNew": true, "legend": { @@ -17630,7 +17935,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 128, + "id": 130, "interval": null, "legend": { "show": false @@ -17727,7 +18032,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 129, + "id": 131, "interval": null, "isNew": true, "legend": { @@ -17935,7 +18240,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 130, + "id": 132, "interval": null, "legend": { "show": false @@ -18032,7 +18337,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 131, + "id": 133, "interval": null, "isNew": true, "legend": { @@ -18240,7 +18545,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 132, + "id": 134, "interval": null, "legend": { "show": false @@ -18337,7 +18642,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 133, + "id": 135, "interval": null, "isNew": true, "legend": { @@ -18545,7 +18850,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 134, + "id": 136, "interval": null, "legend": { "show": false @@ -18649,7 +18954,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 135, + "id": 137, "interval": null, "legend": { "show": false @@ -18746,7 +19051,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 136, + "id": 138, "interval": null, "isNew": true, "legend": { @@ -18879,7 +19184,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 137, + "id": 139, "interval": null, "isNew": true, "legend": { @@ -19030,7 +19335,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 138, + "id": 140, "interval": null, "links": [], "maxDataPoints": 100, @@ -19069,7 +19374,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 139, + "id": 141, "interval": null, "isNew": true, "legend": { @@ -19217,7 +19522,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 140, + "id": 142, "interval": null, "isNew": true, "legend": { @@ -19372,7 +19677,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 141, + "id": 143, "interval": null, "legend": { "show": false @@ -19476,7 +19781,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 142, + "id": 144, "interval": null, "legend": { "show": false @@ -19576,7 +19881,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 143, + "id": 145, "interval": null, "links": [], "maxDataPoints": 100, @@ -19615,7 +19920,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 144, + "id": 146, "interval": null, "isNew": true, "legend": { @@ -19748,7 +20053,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 145, + "id": 147, "interval": null, "isNew": true, "legend": { @@ -19881,7 +20186,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 146, + "id": 148, "interval": null, "isNew": true, "legend": { @@ -20014,7 +20319,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 147, + "id": 149, "interval": null, "isNew": true, "legend": { @@ -20147,7 +20452,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 148, + "id": 150, "interval": null, "isNew": true, "legend": { @@ -20280,7 +20585,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 149, + "id": 151, "interval": null, "isNew": true, "legend": { @@ -20416,7 +20721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 150, + "id": 152, "interval": null, "links": [], "maxDataPoints": 100, @@ -20455,7 +20760,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 151, + "id": 153, "interval": null, "isNew": true, "legend": { @@ -20588,7 +20893,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 152, + "id": 154, "interval": null, "isNew": true, "legend": { @@ -20721,7 +21026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 153, + "id": 155, "interval": null, "isNew": true, "legend": { @@ -20854,7 +21159,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 154, + "id": 156, "interval": null, "isNew": true, "legend": { @@ -20987,7 +21292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 155, + "id": 157, "interval": null, "isNew": true, "legend": { @@ -21120,7 +21425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 156, + "id": 158, "interval": null, "isNew": true, "legend": { @@ -21283,7 +21588,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 157, + "id": 159, "interval": null, "isNew": true, "legend": { @@ -21419,7 +21724,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 158, + "id": 160, "interval": null, "links": [], "maxDataPoints": 100, @@ -21458,7 +21763,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 159, + "id": 161, "interval": null, "isNew": true, "legend": { @@ -21606,7 +21911,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 160, + "id": 162, "interval": null, "isNew": true, "legend": { @@ -21754,7 +22059,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 161, + "id": 163, "interval": null, "isNew": true, "legend": { @@ -21887,7 +22192,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 162, + "id": 164, "interval": null, "isNew": true, "legend": { @@ -22020,7 +22325,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 163, + "id": 165, "interval": null, "isNew": true, "legend": { @@ -22153,7 +22458,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 164, + "id": 166, "interval": null, "isNew": true, "legend": { @@ -22286,7 +22591,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 165, + "id": 167, "interval": null, "isNew": true, "legend": { @@ -22419,7 +22724,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 166, + "id": 168, "interval": null, "isNew": true, "legend": { @@ -22552,7 +22857,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 167, + "id": 169, "interval": null, "isNew": true, "legend": { @@ -22729,7 +23034,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 168, + "id": 170, "interval": null, "links": [], "maxDataPoints": 100, @@ -22768,7 +23073,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 169, + "id": 171, "interval": null, "isNew": true, "legend": { @@ -22944,7 +23249,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 170, + "id": 172, "interval": null, "links": [], "maxDataPoints": 100, @@ -22983,7 +23288,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 171, + "id": 173, "interval": null, "isNew": true, "legend": { @@ -23116,7 +23421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 172, + "id": 174, "interval": null, "isNew": true, "legend": { @@ -23249,7 +23554,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 173, + "id": 175, "interval": null, "isNew": true, "legend": { @@ -23389,7 +23694,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 174, + "id": 176, "interval": null, "legend": { "show": false @@ -23486,7 +23791,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 175, + "id": 177, "interval": null, "isNew": true, "legend": { @@ -23687,7 +23992,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 176, + "id": 178, "interval": null, "isNew": true, "legend": { @@ -23888,7 +24193,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 177, + "id": 179, "interval": null, "isNew": true, "legend": { @@ -24092,7 +24397,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 178, + "id": 180, "interval": null, "links": [], "maxDataPoints": 100, @@ -24131,7 +24436,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 179, + "id": 181, "interval": null, "isNew": true, "legend": { @@ -24264,7 +24569,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 180, + "id": 182, "interval": null, "isNew": true, "legend": { @@ -24404,7 +24709,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 181, + "id": 183, "interval": null, "legend": { "show": false @@ -24501,7 +24806,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 182, + "id": 184, "interval": null, "isNew": true, "legend": { @@ -24709,7 +25014,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 183, + "id": 185, "interval": null, "legend": { "show": false @@ -24806,7 +25111,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 184, + "id": 186, "interval": null, "isNew": true, "legend": { @@ -25014,7 +25319,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 185, + "id": 187, "interval": null, "legend": { "show": false @@ -25111,7 +25416,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 186, + "id": 188, "interval": null, "isNew": true, "legend": { @@ -25319,7 +25624,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 187, + "id": 189, "interval": null, "legend": { "show": false @@ -25416,7 +25721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 188, + "id": 190, "interval": null, "isNew": true, "legend": { @@ -25624,7 +25929,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 189, + "id": 191, "interval": null, "legend": { "show": false @@ -25721,7 +26026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 190, + "id": 192, "interval": null, "isNew": true, "legend": { @@ -25922,7 +26227,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 191, + "id": 193, "interval": null, "isNew": true, "legend": { @@ -26055,7 +26360,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 192, + "id": 194, "interval": null, "isNew": true, "legend": { @@ -26256,7 +26561,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 193, + "id": 195, "interval": null, "isNew": true, "legend": { @@ -26457,7 +26762,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 194, + "id": 196, "interval": null, "isNew": true, "legend": { @@ -26661,7 +26966,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 195, + "id": 197, "interval": null, "links": [], "maxDataPoints": 100, @@ -26700,7 +27005,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 196, + "id": 198, "interval": null, "isNew": true, "legend": { @@ -26848,7 +27153,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 197, + "id": 199, "interval": null, "isNew": true, "legend": { @@ -26988,7 +27293,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 198, + "id": 200, "interval": null, "legend": { "show": false @@ -27085,7 +27390,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 199, + "id": 201, "interval": null, "isNew": true, "legend": { @@ -27218,7 +27523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 200, + "id": 202, "interval": null, "isNew": true, "legend": { @@ -27351,7 +27656,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 201, + "id": 203, "interval": null, "isNew": true, "legend": { @@ -27529,7 +27834,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 202, + "id": 204, "interval": null, "isNew": true, "legend": { @@ -27692,7 +27997,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 203, + "id": 205, "interval": null, "isNew": true, "legend": { @@ -27840,7 +28145,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 204, + "id": 206, "interval": null, "isNew": true, "legend": { @@ -27973,7 +28278,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 205, + "id": 207, "interval": null, "isNew": true, "legend": { @@ -28109,7 +28414,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 206, + "id": 208, "interval": null, "links": [], "maxDataPoints": 100, @@ -28148,7 +28453,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 207, + "id": 209, "interval": null, "isNew": true, "legend": { @@ -28296,7 +28601,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 208, + "id": 210, "interval": null, "isNew": true, "legend": { @@ -28497,7 +28802,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 209, + "id": 211, "interval": null, "isNew": true, "legend": { @@ -28698,7 +29003,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 210, + "id": 212, "interval": null, "isNew": true, "legend": { @@ -28899,7 +29204,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 211, + "id": 213, "interval": null, "isNew": true, "legend": { @@ -29100,7 +29405,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 212, + "id": 214, "interval": null, "isNew": true, "legend": { @@ -29233,7 +29538,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 213, + "id": 215, "interval": null, "isNew": true, "legend": { @@ -29366,7 +29671,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 214, + "id": 216, "interval": null, "isNew": true, "legend": { @@ -29499,7 +29804,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 215, + "id": 217, "interval": null, "isNew": true, "legend": { @@ -29632,7 +29937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 216, + "id": 218, "interval": null, "isNew": true, "legend": { @@ -29840,7 +30145,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 217, + "id": 219, "interval": null, "legend": { "show": false @@ -29940,7 +30245,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 218, + "id": 220, "interval": null, "links": [], "maxDataPoints": 100, @@ -29979,7 +30284,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 219, + "id": 221, "interval": null, "isNew": true, "legend": { @@ -30127,7 +30432,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 220, + "id": 222, "interval": null, "isNew": true, "legend": { @@ -30260,7 +30565,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 221, + "id": 223, "interval": null, "isNew": true, "legend": { @@ -30393,7 +30698,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 222, + "id": 224, "interval": null, "isNew": true, "legend": { @@ -30533,7 +30838,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 223, + "id": 225, "interval": null, "legend": { "show": false @@ -30633,7 +30938,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 224, + "id": 226, "interval": null, "links": [], "maxDataPoints": 100, @@ -30672,7 +30977,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 225, + "id": 227, "interval": null, "isNew": true, "legend": { @@ -30850,7 +31155,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 226, + "id": 228, "interval": null, "isNew": true, "legend": { @@ -31051,7 +31356,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 227, + "id": 229, "interval": null, "isNew": true, "legend": { @@ -31184,7 +31489,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 228, + "id": 230, "interval": null, "isNew": true, "legend": { @@ -31317,7 +31622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 229, + "id": 231, "interval": null, "isNew": true, "legend": { @@ -31450,7 +31755,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 230, + "id": 232, "interval": null, "isNew": true, "legend": { @@ -31583,7 +31888,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 231, + "id": 233, "interval": null, "isNew": true, "legend": { @@ -31716,7 +32021,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 232, + "id": 234, "interval": null, "isNew": true, "legend": { @@ -31845,7 +32150,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 233, + "id": 235, "interval": null, "links": [], "maxDataPoints": 100, @@ -31920,7 +32225,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 234, + "id": 236, "interval": null, "links": [], "maxDataPoints": 100, @@ -31999,7 +32304,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 235, + "id": 237, "interval": null, "isNew": true, "legend": { @@ -32252,7 +32557,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 236, + "id": 238, "interval": null, "isNew": true, "legend": { @@ -32385,7 +32690,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 237, + "id": 239, "interval": null, "isNew": true, "legend": { @@ -32521,7 +32826,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 238, + "id": 240, "interval": null, "links": [], "maxDataPoints": 100, @@ -32560,7 +32865,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 239, + "id": 241, "interval": null, "isNew": true, "legend": { @@ -32693,7 +32998,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 240, + "id": 242, "interval": null, "isNew": true, "legend": { @@ -32826,7 +33131,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 241, + "id": 243, "interval": null, "isNew": true, "legend": { @@ -32959,7 +33264,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 242, + "id": 244, "interval": null, "isNew": true, "legend": { @@ -33122,7 +33427,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 243, + "id": 245, "interval": null, "isNew": true, "legend": { @@ -33255,7 +33560,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 244, + "id": 246, "interval": null, "isNew": true, "legend": { @@ -33388,7 +33693,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 245, + "id": 247, "interval": null, "isNew": true, "legend": { @@ -33536,7 +33841,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 246, + "id": 248, "interval": null, "isNew": true, "legend": { @@ -33687,7 +33992,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 247, + "id": 249, "interval": null, "links": [], "maxDataPoints": 100, @@ -33726,7 +34031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 248, + "id": 250, "interval": null, "isNew": true, "legend": { @@ -33859,7 +34164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 249, + "id": 251, "interval": null, "isNew": true, "legend": { @@ -33992,7 +34297,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 250, + "id": 252, "interval": null, "isNew": true, "legend": { @@ -34125,7 +34430,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 251, + "id": 253, "interval": null, "isNew": true, "legend": { @@ -34261,7 +34566,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 252, + "id": 254, "interval": null, "links": [], "maxDataPoints": 100, @@ -34307,7 +34612,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 253, + "id": 255, "interval": null, "legend": { "show": false @@ -34404,7 +34709,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 254, + "id": 256, "interval": null, "isNew": true, "legend": { @@ -34605,7 +34910,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 255, + "id": 257, "interval": null, "isNew": true, "legend": { @@ -34738,7 +35043,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 256, + "id": 258, "interval": null, "isNew": true, "legend": { @@ -34871,7 +35176,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 257, + "id": 259, "interval": null, "isNew": true, "legend": { @@ -35004,7 +35309,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 258, + "id": 260, "interval": null, "isNew": true, "legend": { @@ -35205,7 +35510,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 259, + "id": 261, "interval": null, "isNew": true, "legend": { @@ -35338,7 +35643,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 260, + "id": 262, "interval": null, "isNew": true, "legend": { @@ -35474,7 +35779,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 261, + "id": 263, "interval": null, "links": [], "maxDataPoints": 100, @@ -35513,7 +35818,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 262, + "id": 264, "interval": null, "isNew": true, "legend": { @@ -35714,7 +36019,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 263, + "id": 265, "interval": null, "isNew": true, "legend": { @@ -35915,7 +36220,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 264, + "id": 266, "interval": null, "isNew": true, "legend": { @@ -36116,7 +36421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 265, + "id": 267, "interval": null, "isNew": true, "legend": { @@ -36317,7 +36622,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 266, + "id": 268, "interval": null, "isNew": true, "legend": { @@ -36450,7 +36755,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 267, + "id": 269, "interval": null, "isNew": true, "legend": { @@ -36583,7 +36888,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 268, + "id": 270, "interval": null, "isNew": true, "legend": { @@ -36716,7 +37021,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 269, + "id": 271, "interval": null, "isNew": true, "legend": { @@ -36849,7 +37154,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 270, + "id": 272, "interval": null, "isNew": true, "legend": { @@ -36982,7 +37287,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 271, + "id": 273, "interval": null, "isNew": true, "legend": { @@ -37122,7 +37427,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 272, + "id": 274, "interval": null, "legend": { "show": false @@ -37219,7 +37524,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 273, + "id": 275, "interval": null, "isNew": true, "legend": { @@ -37423,7 +37728,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 274, + "id": 276, "interval": null, "links": [], "maxDataPoints": 100, @@ -37462,7 +37767,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 275, + "id": 277, "interval": null, "isNew": true, "legend": { @@ -37610,7 +37915,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 276, + "id": 278, "interval": null, "isNew": true, "legend": { @@ -37743,7 +38048,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 277, + "id": 279, "interval": null, "isNew": true, "legend": { @@ -37876,7 +38181,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 278, + "id": 280, "interval": null, "isNew": true, "legend": { @@ -38012,7 +38317,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 279, + "id": 281, "interval": null, "links": [], "maxDataPoints": 100, @@ -38051,7 +38356,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 280, + "id": 282, "interval": null, "isNew": true, "legend": { @@ -38244,7 +38549,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 281, + "id": 283, "interval": null, "isNew": true, "legend": { @@ -38422,7 +38727,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 282, + "id": 284, "interval": null, "isNew": true, "legend": { @@ -38630,7 +38935,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 283, + "id": 285, "interval": null, "isNew": true, "legend": { @@ -38808,7 +39113,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 284, + "id": 286, "interval": null, "isNew": true, "legend": { @@ -38971,7 +39276,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 285, + "id": 287, "interval": null, "isNew": true, "legend": { @@ -39149,7 +39454,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 286, + "id": 288, "interval": null, "isNew": true, "legend": { @@ -39282,7 +39587,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 287, + "id": 289, "interval": null, "isNew": true, "legend": { @@ -39460,7 +39765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 288, + "id": 290, "interval": null, "isNew": true, "legend": { @@ -39593,7 +39898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 289, + "id": 291, "interval": null, "isNew": true, "legend": { @@ -39771,7 +40076,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 290, + "id": 292, "interval": null, "isNew": true, "legend": { @@ -39904,7 +40209,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 291, + "id": 293, "interval": null, "isNew": true, "legend": { @@ -40082,7 +40387,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 292, + "id": 294, "interval": null, "isNew": true, "legend": { @@ -40260,7 +40565,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 293, + "id": 295, "interval": null, "isNew": true, "legend": { @@ -40393,7 +40698,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 294, + "id": 296, "interval": null, "isNew": true, "legend": { @@ -40526,7 +40831,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 295, + "id": 297, "interval": null, "isNew": true, "legend": { @@ -40659,7 +40964,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 296, + "id": 298, "interval": null, "isNew": true, "legend": { @@ -40882,7 +41187,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 297, + "id": 299, "interval": null, "isNew": true, "legend": { @@ -41075,7 +41380,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 298, + "id": 300, "interval": null, "isNew": true, "legend": { @@ -41238,7 +41543,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 299, + "id": 301, "interval": null, "isNew": true, "legend": { @@ -41431,7 +41736,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 300, + "id": 302, "interval": null, "isNew": true, "legend": { @@ -41579,7 +41884,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 301, + "id": 303, "interval": null, "isNew": true, "legend": { @@ -41712,7 +42017,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 302, + "id": 304, "interval": null, "isNew": true, "legend": { @@ -41860,7 +42165,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 303, + "id": 305, "interval": null, "isNew": true, "legend": { @@ -42038,7 +42343,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 304, + "id": 306, "interval": null, "isNew": true, "legend": { @@ -42201,7 +42506,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 305, + "id": 307, "interval": null, "isNew": true, "legend": { @@ -42379,7 +42684,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 306, + "id": 308, "interval": null, "isNew": true, "legend": { @@ -42512,7 +42817,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 307, + "id": 309, "interval": null, "isNew": true, "legend": { @@ -42645,7 +42950,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 308, + "id": 310, "interval": null, "isNew": true, "legend": { @@ -42778,7 +43083,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 309, + "id": 311, "interval": null, "isNew": true, "legend": { @@ -42911,7 +43216,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 310, + "id": 312, "interval": null, "isNew": true, "legend": { @@ -43044,7 +43349,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 311, + "id": 313, "interval": null, "isNew": true, "legend": { @@ -43177,7 +43482,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 312, + "id": 314, "interval": null, "isNew": true, "legend": { @@ -43310,7 +43615,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 313, + "id": 315, "interval": null, "isNew": true, "legend": { @@ -43511,7 +43816,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 314, + "id": 316, "interval": null, "isNew": true, "legend": { @@ -43644,7 +43949,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 315, + "id": 317, "interval": null, "isNew": true, "legend": { @@ -43829,7 +44134,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 316, + "id": 318, "interval": null, "legend": { "show": false @@ -43926,7 +44231,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 317, + "id": 319, "interval": null, "isNew": true, "legend": { @@ -44062,7 +44367,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 318, + "id": 320, "interval": null, "links": [], "maxDataPoints": 100, @@ -44101,7 +44406,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 319, + "id": 321, "interval": null, "isNew": true, "legend": { @@ -44264,7 +44569,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 320, + "id": 322, "interval": null, "isNew": true, "legend": { @@ -44465,7 +44770,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 321, + "id": 323, "interval": null, "isNew": true, "legend": { @@ -44613,7 +44918,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 322, + "id": 324, "interval": null, "isNew": true, "legend": { @@ -44776,7 +45081,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 323, + "id": 325, "interval": null, "isNew": true, "legend": { @@ -44977,7 +45282,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 324, + "id": 326, "interval": null, "isNew": true, "legend": { @@ -45155,7 +45460,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 325, + "id": 327, "interval": null, "isNew": true, "legend": { @@ -45318,7 +45623,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 326, + "id": 328, "interval": null, "isNew": true, "legend": { @@ -45481,7 +45786,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 327, + "id": 329, "interval": null, "isNew": true, "legend": { @@ -45617,7 +45922,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 328, + "id": 330, "interval": null, "links": [], "maxDataPoints": 100, @@ -45656,7 +45961,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 329, + "id": 331, "interval": null, "isNew": true, "legend": { @@ -45804,7 +46109,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 330, + "id": 332, "interval": null, "isNew": true, "legend": { @@ -45952,7 +46257,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 331, + "id": 333, "interval": null, "isNew": true, "legend": { @@ -46085,7 +46390,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 332, + "id": 334, "interval": null, "isNew": true, "legend": { @@ -46218,7 +46523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 333, + "id": 335, "interval": null, "isNew": true, "legend": { @@ -46396,7 +46701,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 334, + "id": 336, "interval": null, "isNew": true, "legend": { @@ -46529,7 +46834,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 335, + "id": 337, "interval": null, "isNew": true, "legend": { @@ -46707,7 +47012,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 336, + "id": 338, "interval": null, "isNew": true, "legend": { @@ -46885,7 +47190,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 337, + "id": 339, "interval": null, "isNew": true, "legend": { @@ -47018,7 +47323,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 338, + "id": 340, "interval": null, "isNew": true, "legend": { @@ -47196,7 +47501,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 339, + "id": 341, "interval": null, "isNew": true, "legend": { @@ -47329,7 +47634,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 340, + "id": 342, "interval": null, "isNew": true, "legend": { @@ -47492,7 +47797,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 341, + "id": 343, "interval": null, "isNew": true, "legend": { @@ -47670,7 +47975,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 342, + "id": 344, "interval": null, "isNew": true, "legend": { @@ -47848,7 +48153,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 343, + "id": 345, "interval": null, "isNew": true, "legend": { @@ -48026,7 +48331,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 344, + "id": 346, "interval": null, "isNew": true, "legend": { @@ -48159,7 +48464,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 345, + "id": 347, "interval": null, "isNew": true, "legend": { @@ -48337,7 +48642,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 346, + "id": 348, "interval": null, "isNew": true, "legend": { @@ -48470,7 +48775,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 347, + "id": 349, "interval": null, "isNew": true, "legend": { @@ -48648,7 +48953,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 348, + "id": 350, "interval": null, "isNew": true, "legend": { @@ -48781,7 +49086,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 349, + "id": 351, "interval": null, "isNew": true, "legend": { @@ -48914,7 +49219,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 350, + "id": 352, "interval": null, "isNew": true, "legend": { @@ -49092,7 +49397,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 351, + "id": 353, "interval": null, "isNew": true, "legend": { @@ -49270,7 +49575,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 352, + "id": 354, "interval": null, "isNew": true, "legend": { @@ -49403,7 +49708,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 353, + "id": 355, "interval": null, "isNew": true, "legend": { @@ -49581,7 +49886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 354, + "id": 356, "interval": null, "isNew": true, "legend": { @@ -49714,7 +50019,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 355, + "id": 357, "interval": null, "isNew": true, "legend": { @@ -49892,7 +50197,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 356, + "id": 358, "interval": null, "isNew": true, "legend": { @@ -50028,7 +50333,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 357, + "id": 359, "interval": null, "links": [], "maxDataPoints": 100, @@ -50067,7 +50372,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 358, + "id": 360, "interval": null, "isNew": true, "legend": { @@ -50215,7 +50520,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 359, + "id": 361, "interval": null, "isNew": true, "legend": { @@ -50348,7 +50653,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 360, + "id": 362, "interval": null, "isNew": true, "legend": { @@ -50549,7 +50854,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 361, + "id": 363, "interval": null, "isNew": true, "legend": { @@ -50697,7 +51002,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 362, + "id": 364, "interval": null, "isNew": true, "legend": { @@ -50898,7 +51203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 363, + "id": 365, "interval": null, "isNew": true, "legend": { @@ -51031,7 +51336,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 364, + "id": 366, "interval": null, "isNew": true, "legend": { @@ -51164,7 +51469,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 365, + "id": 367, "interval": null, "isNew": true, "legend": { @@ -51297,7 +51602,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 366, + "id": 368, "interval": null, "isNew": true, "legend": { @@ -51430,7 +51735,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 367, + "id": 369, "interval": null, "isNew": true, "legend": { @@ -51570,7 +51875,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 368, + "id": 370, "interval": null, "legend": { "show": false @@ -51670,7 +51975,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 369, + "id": 371, "interval": null, "links": [], "maxDataPoints": 100, @@ -51709,7 +52014,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 370, + "id": 372, "interval": null, "isNew": true, "legend": { @@ -51842,7 +52147,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52005,7 +52310,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52153,7 +52458,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 375, "interval": null, "isNew": true, "legend": { @@ -52293,7 +52598,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 374, + "id": 376, "interval": null, "legend": { "show": false @@ -52397,7 +52702,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 375, + "id": 377, "interval": null, "legend": { "show": false @@ -52501,7 +52806,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 376, + "id": 378, "interval": null, "legend": { "show": false @@ -52598,7 +52903,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 377, + "id": 379, "interval": null, "isNew": true, "legend": { @@ -52738,7 +53043,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 378, + "id": 380, "interval": null, "legend": { "show": false @@ -52842,7 +53147,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 379, + "id": 381, "interval": null, "legend": { "show": false @@ -52946,7 +53251,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 380, + "id": 382, "interval": null, "legend": { "show": false @@ -53043,7 +53348,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 381, + "id": 383, "interval": null, "isNew": true, "legend": { @@ -53176,7 +53481,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 382, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53309,7 +53614,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -53449,7 +53754,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 384, + "id": 386, "interval": null, "legend": { "show": false @@ -53546,7 +53851,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 387, "interval": null, "isNew": true, "legend": { @@ -53682,7 +53987,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 386, + "id": 388, "interval": null, "links": [], "maxDataPoints": 100, @@ -53721,7 +54026,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 389, "interval": null, "isNew": true, "legend": { @@ -53854,7 +54159,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 390, "interval": null, "isNew": true, "legend": { @@ -53987,7 +54292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54120,7 +54425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 392, "interval": null, "isNew": true, "legend": { @@ -54253,7 +54558,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -54386,7 +54691,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 394, "interval": null, "isNew": true, "legend": { @@ -54519,7 +54824,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -54659,7 +54964,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 394, + "id": 396, "interval": null, "legend": { "show": false @@ -54756,7 +55061,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 397, "interval": null, "isNew": true, "legend": { @@ -54889,7 +55194,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 396, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55029,7 +55334,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 397, + "id": 399, "interval": null, "legend": { "show": false @@ -55126,7 +55431,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 400, "interval": null, "isNew": true, "legend": { @@ -55259,7 +55564,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 399, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -55392,7 +55697,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -55525,7 +55830,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -55673,7 +55978,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -55821,7 +56126,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 405, "interval": null, "isNew": true, "legend": { @@ -55957,7 +56262,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 406, "interval": null, "links": [], "maxDataPoints": 100, @@ -55996,7 +56301,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 407, "interval": null, "isNew": true, "legend": { @@ -56129,7 +56434,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -56262,7 +56567,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 409, "interval": null, "isNew": true, "legend": { @@ -56395,7 +56700,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -56531,7 +56836,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 411, "interval": null, "links": [], "maxDataPoints": 100, @@ -56570,7 +56875,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 412, "interval": null, "isNew": true, "legend": { @@ -56733,7 +57038,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -56866,7 +57171,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57006,7 +57311,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 413, + "id": 415, "interval": null, "legend": { "show": false @@ -57110,7 +57415,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 414, + "id": 416, "interval": null, "legend": { "show": false @@ -57207,7 +57512,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 415, + "id": 417, "interval": null, "isNew": true, "legend": { @@ -57362,7 +57667,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 416, + "id": 418, "interval": null, "legend": { "show": false @@ -57466,7 +57771,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 417, + "id": 419, "interval": null, "legend": { "show": false @@ -57570,7 +57875,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 418, + "id": 420, "interval": null, "legend": { "show": false @@ -57667,7 +57972,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 419, + "id": 421, "interval": null, "isNew": true, "legend": { @@ -57837,7 +58142,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 420, + "id": 422, "interval": null, "legend": { "show": false @@ -57934,7 +58239,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 423, "interval": null, "isNew": true, "legend": { @@ -58135,7 +58440,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 422, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -58336,7 +58641,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -58469,7 +58774,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 426, "interval": null, "isNew": true, "legend": { @@ -58632,7 +58937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 427, "interval": null, "isNew": true, "legend": { @@ -58765,7 +59070,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -58898,7 +59203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 427, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -59099,7 +59404,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -59239,7 +59544,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 429, + "id": 431, "interval": null, "legend": { "show": false @@ -59343,7 +59648,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 430, + "id": 432, "interval": null, "legend": { "show": false @@ -59447,7 +59752,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 431, + "id": 433, "interval": null, "legend": { "show": false @@ -59551,7 +59856,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 432, + "id": 434, "interval": null, "legend": { "show": false @@ -59655,7 +59960,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 433, + "id": 435, "interval": null, "legend": { "show": false @@ -59759,7 +60064,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 434, + "id": 436, "interval": null, "legend": { "show": false @@ -59863,7 +60168,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 435, + "id": 437, "interval": null, "legend": { "show": false @@ -59960,7 +60265,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 436, + "id": 438, "interval": null, "isNew": true, "legend": { @@ -60108,7 +60413,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 437, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -60241,7 +60546,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 438, + "id": 440, "interval": null, "isNew": true, "legend": { @@ -60374,7 +60679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -60522,7 +60827,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -60658,7 +60963,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 441, + "id": 443, "interval": null, "links": [], "maxDataPoints": 100, @@ -60697,7 +61002,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 444, "interval": null, "isNew": true, "legend": { @@ -60830,7 +61135,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -60963,7 +61268,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -61096,7 +61401,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 447, "interval": null, "isNew": true, "legend": { @@ -61229,7 +61534,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 446, + "id": 448, "interval": null, "isNew": true, "legend": { @@ -61377,7 +61682,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 447, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -61581,7 +61886,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 448, + "id": 450, "interval": null, "links": [], "maxDataPoints": 100, @@ -61632,7 +61937,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 451, "interval": null, "links": [], "maxDataPoints": 100, @@ -61728,7 +62033,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 450, + "id": 452, "interval": null, "links": [], "maxDataPoints": 100, @@ -61803,7 +62108,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 451, + "id": 453, "interval": null, "links": [], "maxDataPoints": 100, @@ -61878,7 +62183,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 452, + "id": 454, "interval": null, "links": [], "maxDataPoints": 100, @@ -61953,7 +62258,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 453, + "id": 455, "interval": null, "links": [], "maxDataPoints": 100, @@ -62028,7 +62333,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 454, + "id": 456, "interval": null, "links": [], "maxDataPoints": 100, @@ -62103,7 +62408,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 455, + "id": 457, "interval": null, "links": [], "maxDataPoints": 100, @@ -62178,7 +62483,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 456, + "id": 458, "interval": null, "links": [], "maxDataPoints": 100, @@ -62257,7 +62562,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 457, + "id": 459, "interval": null, "isNew": true, "legend": { @@ -62390,7 +62695,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -62523,7 +62828,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -62656,7 +62961,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 462, "interval": null, "isNew": true, "legend": { @@ -62789,7 +63094,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 463, "interval": null, "isNew": true, "legend": { @@ -62922,7 +63227,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63070,7 +63375,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 465, "interval": null, "isNew": true, "legend": { @@ -63203,7 +63508,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 466, "interval": null, "isNew": true, "legend": { @@ -63336,7 +63641,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 467, "interval": null, "isNew": true, "legend": { @@ -63502,7 +63807,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 466, + "id": 468, "interval": null, "legend": { "show": false @@ -63606,7 +63911,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 467, + "id": 469, "interval": null, "legend": { "show": false @@ -63710,7 +64015,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 468, + "id": 470, "interval": null, "legend": { "show": false @@ -63814,7 +64119,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 471, "interval": null, "legend": { "show": false @@ -63918,7 +64223,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 470, + "id": 472, "interval": null, "legend": { "show": false @@ -64022,7 +64327,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 471, + "id": 473, "interval": null, "legend": { "show": false @@ -64126,7 +64431,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 472, + "id": 474, "interval": null, "legend": { "show": false @@ -64230,7 +64535,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 473, + "id": 475, "interval": null, "legend": { "show": false @@ -64327,7 +64632,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 474, + "id": 476, "interval": null, "isNew": true, "legend": { @@ -64460,7 +64765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 475, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -64593,7 +64898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -64726,7 +65031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -64859,7 +65164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -64992,7 +65297,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -65125,7 +65430,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -65265,7 +65570,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 481, + "id": 483, "interval": null, "legend": { "show": false @@ -65369,7 +65674,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 482, + "id": 484, "interval": null, "legend": { "show": false @@ -65466,7 +65771,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 483, + "id": 485, "interval": null, "isNew": true, "legend": { @@ -65599,7 +65904,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 484, + "id": 486, "interval": null, "isNew": true, "legend": { @@ -65732,7 +66037,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 485, + "id": 487, "interval": null, "isNew": true, "legend": { @@ -65865,7 +66170,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 488, "interval": null, "isNew": true, "legend": { @@ -65998,7 +66303,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 489, "interval": null, "isNew": true, "legend": { @@ -66131,7 +66436,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -66267,7 +66572,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 491, "interval": null, "links": [], "maxDataPoints": 100, @@ -66306,7 +66611,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 492, "interval": null, "isNew": true, "legend": { @@ -66439,7 +66744,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 493, "interval": null, "isNew": true, "legend": { @@ -66572,7 +66877,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 494, "interval": null, "isNew": true, "legend": { @@ -66705,7 +67010,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 495, "interval": null, "isNew": true, "legend": { diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 330822d3c7b..9fc86c86986 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -dbcc3ef2b588c133dbe4b56196abb366da5b25631f6d42bbc6ae1811b21bbec5 ./metrics/grafana/tikv_details.json +78b891e1edbbaa68d2c0638cd258ff0d80315e66f412225905434e63b6a14692 ./metrics/grafana/tikv_details.json diff --git a/src/server/metrics.rs b/src/server/metrics.rs index cef725c3f28..3ad9c5bdde0 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -409,6 +409,13 @@ lazy_static! { &["type", "store_id"] ) .unwrap(); + pub static ref RAFT_CLIENT_WAIT_CONN_READY_DURATION_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!( + "tikv_server_raft_client_wait_ready_duration", + "Duration of wait raft client connection ready", + &["to"], + exponential_buckets(5e-5, 2.0, 22).unwrap() // 50us ~ 104s + ) + .unwrap(); pub static ref RAFT_MESSAGE_FLUSH_COUNTER: RaftMessageFlushCounterVec = register_static_int_counter_vec!( RaftMessageFlushCounterVec, diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index b120011c490..700d409c129 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -40,6 +40,7 @@ use tikv_kv::RaftExtension; use tikv_util::{ config::{Tracker, VersionTrack}, lru::LruCache, + time::duration_to_sec, timer::GLOBAL_TIMER_HANDLE, worker::Scheduler, }; @@ -814,7 +815,13 @@ async fn start( let mut last_wake_time = None; let backoff_duration = back_end.builder.cfg.value().raft_client_max_backoff.0; let mut addr_channel = None; + let mut begin = None; + let mut try_count = 0; loop { + if begin.is_none() { + begin = Some(Instant::now()); + } + try_count += 1; maybe_backoff(backoff_duration, &mut last_wake_time).await; let f = back_end.resolve(); let addr = match f.await { @@ -862,7 +869,19 @@ async fn start( .report_store_unreachable(back_end.store_id); continue; } else { - debug!("connection established"; "store_id" => back_end.store_id, "addr" => %addr); + let wait_conn_duration = begin.unwrap_or_else(Instant::now).elapsed(); + info!("connection established"; + "store_id" => back_end.store_id, + "addr" => %addr, + "cost" => ?wait_conn_duration, + "msg_count" => ?back_end.queue.len(), + "try_count" => try_count, + ); + RAFT_CLIENT_WAIT_CONN_READY_DURATION_HISTOGRAM_VEC + .with_label_values(&[addr.as_str()]) + .observe(duration_to_sec(wait_conn_duration)); + begin = None; + try_count = 0; } let client = TikvClient::new(channel); From 1f384cfce5b41f49f19a06de33cf32d2f2a2eaf4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 20 Dec 2023 14:19:52 +0800 Subject: [PATCH 1066/1149] *: do not suffix "fips" for tikv version ouput (#16201) close tikv/tikv#16200 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/lib.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6d1cc515907..acccb2f55e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,11 +80,7 @@ pub fn tikv_version_info(build_time: Option<&str>) -> String { /// return the build version of tikv-server pub fn tikv_build_version() -> String { - if crypto::fips::can_enable() { - format!("{}-{}", env!("CARGO_PKG_VERSION"), "fips") - } else { - env!("CARGO_PKG_VERSION").to_owned() - } + env!("CARGO_PKG_VERSION").to_owned() } /// Prints the tikv version information to the standard output. From d7959b8194b4607175050289e7c093b3d5caad80 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 20 Dec 2023 15:52:53 +0800 Subject: [PATCH 1067/1149] txn: change memory pessimsitic lock to btree map and support scan (#16180) ref tikv/tikv#15066 Change in-memory pessimsitic locks from hash map to btree map, support collecting pessimistic locks for scan lock command. Then: 1. GC could collect expired pessimistic locks. 2. Pessimistic rollback could use read scan first and then clean up expired pessimistic locks at one time. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/txn_ext.rs | 264 +++++++++++++++++++--- src/storage/mod.rs | 144 +++++++++++- src/storage/mvcc/metrics.rs | 17 ++ src/storage/mvcc/reader/reader.rs | 112 +++++---- tests/integrations/server/kv_service.rs | 163 +++++++++++++ 5 files changed, 624 insertions(+), 76 deletions(-) diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 0091fd4e7bb..ae352ea08ab 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -1,16 +1,16 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ + collections::{BTreeMap, Bound}, fmt, sync::atomic::{AtomicU64, Ordering}, }; -use collections::HashMap; use kvproto::metapb; use lazy_static::lazy_static; use parking_lot::RwLock; use prometheus::{register_int_gauge, IntGauge}; -use txn_types::{Key, PessimisticLock}; +use txn_types::{Key, Lock, PessimisticLock}; /// Transaction extensions related to a peer. #[derive(Default)] @@ -106,7 +106,7 @@ pub struct PeerPessimisticLocks { /// skipped because of version mismatch. So, no lock should be deleted. /// It's correct that we include the locks that are marked deleted in the /// commit merge request. - map: HashMap, + map: BTreeMap, /// Status of the pessimistic lock map. /// The map is writable only in the Normal state. pub status: LocksStatus, @@ -143,7 +143,7 @@ impl fmt::Debug for PeerPessimisticLocks { impl Default for PeerPessimisticLocks { fn default() -> Self { PeerPessimisticLocks { - map: HashMap::default(), + map: BTreeMap::default(), status: LocksStatus::Normal, term: 0, version: 0, @@ -192,7 +192,7 @@ impl PeerPessimisticLocks { } pub fn clear(&mut self) { - self.map = HashMap::default(); + self.map = BTreeMap::default(); GLOBAL_MEM_SIZE.sub(self.memory_size as i64); self.memory_size = 0; } @@ -244,12 +244,20 @@ impl PeerPessimisticLocks { // Locks that are marked deleted still need to be moved to the new regions, // and the deleted mark should also be cleared. // Refer to the comment in `PeerPessimisticLocks` for details. - let removed_locks = self.map.drain_filter(|key, _| { - let key = &**key.as_encoded(); + // There is no drain_filter for BtreeMap, so extra clone are needed. + let mut removed_locks = Vec::new(); + self.map.retain(|key, value| { + let key_ref = key.as_encoded().as_slice(); let (start_key, end_key) = (derived.get_start_key(), derived.get_end_key()); - key < start_key || (!end_key.is_empty() && key >= end_key) + if key_ref < start_key || (!end_key.is_empty() && key_ref >= end_key) { + removed_locks.push((key.clone(), value.clone())); + false + } else { + true + } }); - for (key, (lock, _)) in removed_locks { + + for (key, (lock, _)) in removed_locks.into_iter() { let idx = match regions .binary_search_by_key(&&**key.as_encoded(), |region| region.get_start_key()) { @@ -264,6 +272,37 @@ impl PeerPessimisticLocks { res } + /// Scan and return locks in the current pessimistic lock map, the map + /// should be locked first before calling this method. + pub fn scan_locks( + &self, + start: Option<&Key>, + end: Option<&Key>, + filter: F, + limit: usize, + ) -> (Vec<(Key, Lock)>, bool) + where + F: Fn(&Key, &PessimisticLock) -> bool, + { + if let (Some(start_key), Some(end_key)) = (start, end) { + assert!(end_key >= start_key); + } + let mut locks = Vec::with_capacity(limit); + let mut iter = self.map.range(( + start.map_or(Bound::Unbounded, |k| Bound::Included(k)), + end.map_or(Bound::Unbounded, |k| Bound::Excluded(k)), + )); + while let Some((key, (lock, _))) = iter.next() { + if filter(key, lock) { + locks.push((key.clone(), lock.clone().into_lock())); + } + if limit > 0 && locks.len() >= limit { + return (locks, iter.next().is_some()); + } + } + (locks, false) + } + #[cfg(test)] fn from_locks(locks: impl IntoIterator) -> Self { let mut res = PeerPessimisticLocks::default(); @@ -277,7 +316,7 @@ impl PeerPessimisticLocks { impl<'a> IntoIterator for &'a PeerPessimisticLocks { type Item = (&'a Key, &'a (PessimisticLock, bool)); - type IntoIter = std::collections::hash_map::Iter<'a, Key, (PessimisticLock, bool)>; + type IntoIter = std::collections::btree_map::Iter<'a, Key, (PessimisticLock, bool)>; fn into_iter(self) -> Self::IntoIter { self.map.iter() @@ -331,6 +370,24 @@ mod tests { } } + fn lock_with_key(key: &[u8], deleted: bool) -> (Key, (PessimisticLock, bool)) { + ( + Key::from_raw(key), + ( + PessimisticLock { + primary: key.to_vec().into_boxed_slice(), + start_ts: 10.into(), + ttl: 1000, + for_update_ts: 10.into(), + min_commit_ts: 20.into(), + last_change: LastChange::make_exist(5.into(), 2), + is_locked_with_conflict: false, + }, + deleted, + ), + ) + } + #[test] fn test_memory_size() { let _guard = TEST_MUTEX.lock().unwrap(); @@ -418,23 +475,6 @@ mod tests { #[test] fn test_group_locks_by_regions() { - fn lock(key: &[u8], deleted: bool) -> (Key, (PessimisticLock, bool)) { - ( - Key::from_raw(key), - ( - PessimisticLock { - primary: key.to_vec().into_boxed_slice(), - start_ts: 10.into(), - ttl: 1000, - for_update_ts: 10.into(), - min_commit_ts: 20.into(), - last_change: LastChange::make_exist(5.into(), 2), - is_locked_with_conflict: false, - }, - deleted, - ), - ) - } fn region(start_key: &[u8], end_key: &[u8]) -> metapb::Region { let mut region = metapb::Region::default(); region.set_start_key(start_key.to_vec()); @@ -445,11 +485,11 @@ mod tests { defer!(GLOBAL_MEM_SIZE.set(0)); let mut original = PeerPessimisticLocks::from_locks(vec![ - lock(b"a", true), - lock(b"c", false), - lock(b"e", true), - lock(b"g", false), - lock(b"i", false), + lock_with_key(b"a", true), + lock_with_key(b"c", false), + lock_with_key(b"e", true), + lock_with_key(b"g", false), + lock_with_key(b"i", false), ]); let regions = vec![ region(b"", b"b"), // test leftmost region @@ -460,10 +500,10 @@ mod tests { ]; let output = original.group_by_regions(®ions, ®ions[4]); let expected: Vec<_> = vec![ - vec![lock(b"a", false)], + vec![lock_with_key(b"a", false)], vec![], - vec![lock(b"c", false)], - vec![lock(b"e", false), lock(b"g", false)], + vec![lock_with_key(b"c", false)], + vec![lock_with_key(b"e", false), lock_with_key(b"g", false)], vec![], // the position of the derived region is empty ] .into_iter() @@ -473,7 +513,159 @@ mod tests { // The lock that belongs to the derived region is kept in the original map. assert_eq!( original, - PeerPessimisticLocks::from_locks(vec![lock(b"i", false)]) + PeerPessimisticLocks::from_locks(vec![lock_with_key(b"i", false)]) ); } + + #[test] + fn test_scan_memory_lock() { + // Create a sample PeerPessimisticLocks instance with some locks. + let peer_locks = PeerPessimisticLocks::from_locks(vec![ + lock_with_key(b"key1", false), + lock_with_key(b"key2", false), + lock_with_key(b"key3", false), + ]); + + fn txn_lock(key: &[u8], deleted: bool) -> Lock { + let (_, (pessimistic_lock, _)) = lock_with_key(key, deleted); + pessimistic_lock.into_lock() + } + + let filter_pass_all = |_key: &Key, _lock: &PessimisticLock| true; + let filter_pass_key2 = + |key: &Key, _lock: &PessimisticLock| key.as_encoded().starts_with(b"key2"); + + // Case parameter: start_key, end_key, filter, limit, expected results, expected + // has more. + type LockFilter = fn(&Key, &PessimisticLock) -> bool; + let cases: [( + Option, + Option, + LockFilter, + usize, + Vec<(Key, Lock)>, + bool, + ); 12] = [ + ( + None, + None, + filter_pass_all, + 1, + vec![(Key::from_raw(b"key1"), txn_lock(b"key1", false))], + true, + ), + ( + None, + None, + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + (Key::from_raw(b"key3"), txn_lock(b"key3", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key0")), + Some(Key::from_raw(b"key1")), + filter_pass_all, + 10, + vec![], + false, + ), + ( + Some(Key::from_raw(b"key0")), + Some(Key::from_raw(b"key2")), + filter_pass_all, + 10, + vec![(Key::from_raw(b"key1"), txn_lock(b"key1", false))], + false, + ), + ( + Some(Key::from_raw(b"key1")), + Some(Key::from_raw(b"key3")), + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key1")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 2, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + ], + true, + ), + ( + Some(Key::from_raw(b"key1")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key1"), txn_lock(b"key1", false)), + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + (Key::from_raw(b"key3"), txn_lock(b"key3", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key2")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 10, + vec![ + (Key::from_raw(b"key2"), txn_lock(b"key2", false)), + (Key::from_raw(b"key3"), txn_lock(b"key3", false)), + ], + false, + ), + ( + Some(Key::from_raw(b"key4")), + Some(Key::from_raw(b"key4")), + filter_pass_all, + 10, + vec![], + false, + ), + ( + None, + None, + filter_pass_key2, + 10, + vec![(Key::from_raw(b"key2"), txn_lock(b"key2", false))], + false, + ), + ( + Some(Key::from_raw(b"key2")), + None, + filter_pass_key2, + 1, + vec![(Key::from_raw(b"key2"), txn_lock(b"key2", false))], + true, + ), + ( + None, + Some(Key::from_raw(b"key2")), + filter_pass_key2, + 1, + vec![], + false, + ), + ]; + + for (start_key, end_key, filter, limit, expected_locks, expected_has_more) in cases { + let (locks, has_more) = + peer_locks.scan_locks(start_key.as_ref(), end_key.as_ref(), filter, limit); + assert_eq!(locks, expected_locks); + assert_eq!(has_more, expected_has_more); + } + } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 2bdc07625ee..13d868849f4 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -80,6 +80,7 @@ use engine_traits::{ raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; use futures::{future::Either, prelude::*}; +use itertools::Itertools; use kvproto::{ kvrpcpb::{ ApiVersion, ChecksumAlgorithm, CommandPri, Context, GetRequest, IsolationLevel, KeyRange, @@ -1458,6 +1459,15 @@ impl Storage { Some(ScanMode::Forward), !ctx.get_not_fill_cache(), ); + let memory_locks = reader + .load_in_memory_pessimisitic_lock_range( + start_key.as_ref(), + end_key.as_ref(), + |_, lock| lock.start_ts <= max_ts, + limit, + ) + .map_err(txn::Error::from); + let (memory_lock_kv_pairs, _) = memory_locks?; let result = reader .scan_locks( start_key.as_ref(), @@ -1468,8 +1478,18 @@ impl Storage { .map_err(txn::Error::from); statistics.add(&reader.statistics); let (kv_pairs, _) = result?; - let mut locks = Vec::with_capacity(kv_pairs.len()); - for (key, lock) in kv_pairs { + + // Merge the results from in-memory pessimistic locks and the lock cf. + // The result order is decided by the key. + let memory_lock_iter = memory_lock_kv_pairs.into_iter(); + let lock_iter = kv_pairs.into_iter(); + let merged_iter = memory_lock_iter + .merge_by(lock_iter, |(memory_key, _), (key, _)| memory_key <= key); + let mut locks = Vec::with_capacity(limit); + for (key, lock) in merged_iter { + if limit > 0 && locks.len() >= limit { + break; + } let lock_info = lock.into_lock_info(key.into_raw().map_err(txn::Error::from)?); locks.push(lock_info); @@ -7396,6 +7416,126 @@ mod tests { ); } + #[test] + fn test_scan_lock_with_memory_lock() { + for in_memory_pessimistic_lock_enabled in [false, true] { + let txn_ext = Arc::new(TxnExt::default()); + let lock_mgr = MockLockManager::new(); + let storage = TestStorageBuilderApiV1::new(lock_mgr.clone()) + .pipelined_pessimistic_lock(in_memory_pessimistic_lock_enabled) + .in_memory_pessimistic_lock(in_memory_pessimistic_lock_enabled) + .build_for_txn(txn_ext.clone()) + .unwrap(); + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::AcquirePessimisticLock::new( + vec![(Key::from_raw(b"a"), false), (Key::from_raw(b"b"), false)], + b"a".to_vec(), + 20.into(), + 3000, + true, + 20.into(), + Some(WaitTimeout::Millis(1000)), + false, + 21.into(), + false, + false, + false, + Context::default(), + ), + expect_ok_callback(tx.clone(), 0), + ) + .unwrap(); + rx.recv().unwrap(); + if in_memory_pessimistic_lock_enabled { + // Check if the lock exists in the memory buffer. + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + let lock = pessimistic_locks.get(&Key::from_raw(b"a")).unwrap(); + assert_eq!( + lock, + &( + PessimisticLock { + primary: Box::new(*b"a"), + start_ts: 20.into(), + ttl: 3000, + for_update_ts: 20.into(), + min_commit_ts: 21.into(), + last_change: LastChange::NotExist, + is_locked_with_conflict: false, + }, + false + ) + ); + } + + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![ + Mutation::make_put(Key::from_raw(b"x"), b"foo".to_vec()), + Mutation::make_put(Key::from_raw(b"y"), b"foo".to_vec()), + Mutation::make_put(Key::from_raw(b"z"), b"foo".to_vec()), + ], + b"x".to_vec(), + 10.into(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + + let (lock_a, lock_b, lock_x, lock_y, lock_z) = ( + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"a".to_vec()); + lock.set_lock_version(20); + lock.set_lock_for_update_ts(20); + lock.set_key(b"a".to_vec()); + lock.set_min_commit_ts(21); + lock.set_lock_type(Op::PessimisticLock); + lock.set_lock_ttl(3000); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"a".to_vec()); + lock.set_lock_version(20); + lock.set_lock_for_update_ts(20); + lock.set_key(b"b".to_vec()); + lock.set_min_commit_ts(21); + lock.set_lock_type(Op::PessimisticLock); + lock.set_lock_ttl(3000); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"x".to_vec()); + lock.set_lock_version(10); + lock.set_key(b"x".to_vec()); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"x".to_vec()); + lock.set_lock_version(10); + lock.set_key(b"y".to_vec()); + lock + }, + { + let mut lock = LockInfo::default(); + lock.set_primary_lock(b"x".to_vec()); + lock.set_lock_version(10); + lock.set_key(b"z".to_vec()); + lock + }, + ); + let res = block_on(storage.scan_lock(Context::default(), 101.into(), None, None, 10)) + .unwrap(); + assert_eq!(res, vec![lock_a, lock_b, lock_x, lock_y, lock_z,]); + } + } + #[test] fn test_scan_lock() { let storage = TestStorageBuilderApiV1::new(MockLockManager::new()) diff --git a/src/storage/mvcc/metrics.rs b/src/storage/mvcc/metrics.rs index 22d2760a769..eaef1134d81 100644 --- a/src/storage/mvcc/metrics.rs +++ b/src/storage/mvcc/metrics.rs @@ -36,6 +36,11 @@ make_static_metric! { write_not_loaded_skip } + pub label_enum ScanLockReadTimeSource { + resolve_lock, + pessimistic_rollback, + } + pub struct MvccConflictCounterVec: IntCounter { "type" => MvccConflictKind, } @@ -58,6 +63,10 @@ make_static_metric! { retry_req, }, } + + pub struct ScanLockReadTimeVec: Histogram { + "type" => ScanLockReadTimeSource, + } } lazy_static! { @@ -120,4 +129,12 @@ lazy_static! { ) .unwrap() }; + pub static ref SCAN_LOCK_READ_TIME_VEC: ScanLockReadTimeVec = register_static_histogram_vec!( + ScanLockReadTimeVec, + "tikv_storage_mvcc_scan_lock_read_duration_seconds", + "Bucketed histogram of memory lock read lock hold for scan lock", + &["type"], + exponential_buckets(0.00001, 2.0, 20).unwrap() + ) + .unwrap(); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 48158eda946..257789b4765 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -8,9 +8,12 @@ use kvproto::{ errorpb::{self, EpochNotMatch, FlashbackInProgress, StaleCommand}, kvrpcpb::Context, }; -use raftstore::store::LocksStatus; +use raftstore::store::{LocksStatus, PeerPessimisticLocks}; use tikv_kv::{SnapshotExt, SEEK_BOUND}; -use txn_types::{Key, LastChange, Lock, OldValue, TimeStamp, Value, Write, WriteRef, WriteType}; +use tikv_util::time::Instant; +use txn_types::{ + Key, LastChange, Lock, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteRef, WriteType, +}; use crate::storage::{ kv::{ @@ -18,6 +21,7 @@ use crate::storage::{ }, mvcc::{ default_not_found_error, + metrics::SCAN_LOCK_READ_TIME_VEC, reader::{OverlappedWrite, TxnCommitRecord}, Result, }, @@ -251,44 +255,76 @@ impl MvccReader { Ok(res) } - fn load_in_memory_pessimistic_lock(&self, key: &Key) -> Result> { - self.snapshot - .ext() - .get_txn_ext() - .and_then(|txn_ext| { - // If the term or region version has changed, do not read the lock table. - // Instead, just return a StaleCommand or EpochNotMatch error, so the - // client will not receive a false error because the lock table has been - // cleared. - let locks = txn_ext.pessimistic_locks.read(); - if self.term != 0 && locks.term != self.term { - let mut err = errorpb::Error::default(); - err.set_stale_command(StaleCommand::default()); - return Some(Err(KvError::from(err).into())); - } - if self.version != 0 && locks.version != self.version { - let mut err = errorpb::Error::default(); - // We don't know the current regions. Just return an empty EpochNotMatch error. - err.set_epoch_not_match(EpochNotMatch::default()); - return Some(Err(KvError::from(err).into())); - } - // If the region is in the flashback state, it should not be allowed to read the - // locks. - if locks.status == LocksStatus::IsInFlashback && !self.allow_in_flashback { - let mut err = errorpb::Error::default(); - err.set_flashback_in_progress(FlashbackInProgress::default()); - return Some(Err(KvError::from(err).into())); + fn check_term_version_status(&self, locks: &PeerPessimisticLocks) -> Result<()> { + // If the term or region version has changed, do not read the lock table. + // Instead, just return a StaleCommand or EpochNotMatch error, so the + // client will not receive a false error because the lock table has been + // cleared. + if self.term != 0 && locks.term != self.term { + let mut err = errorpb::Error::default(); + err.set_stale_command(StaleCommand::default()); + return Err(KvError::from(err).into()); + } + if self.version != 0 && locks.version != self.version { + let mut err = errorpb::Error::default(); + err.set_epoch_not_match(EpochNotMatch::default()); + return Err(KvError::from(err).into()); + } + if locks.status == LocksStatus::IsInFlashback && !self.allow_in_flashback { + let mut err = errorpb::Error::default(); + err.set_flashback_in_progress(FlashbackInProgress::default()); + return Err(KvError::from(err).into()); + } + Ok(()) + } + + pub fn load_in_memory_pessimisitic_lock_range( + &self, + start_key: Option<&Key>, + end_key: Option<&Key>, + filter: F, + scan_limit: usize, + ) -> Result<(Vec<(Key, Lock)>, bool)> + where + F: Fn(&Key, &PessimisticLock) -> bool, + { + if let Some(txn_ext) = self.snapshot.ext().get_txn_ext() { + let begin_instant = Instant::now(); + let res = match self.check_term_version_status(&txn_ext.pessimistic_locks.read()) { + Ok(_) => { + // Scan locks within the specified range and filter by max_ts. + Ok(txn_ext + .pessimistic_locks + .read() + .scan_locks(start_key, end_key, filter, scan_limit)) } + Err(e) => Err(e), + }; + let elapsed = begin_instant.saturating_elapsed(); + SCAN_LOCK_READ_TIME_VEC + .resolve_lock + .observe(elapsed.as_secs_f64()); - locks.get(key).map(|(lock, _)| { - // For write commands that are executed in serial, it should be impossible - // to read a deleted lock. - // For read commands in the scheduler, it should read the lock marked deleted - // because the lock is not actually deleted from the underlying storage. - Ok(lock.to_lock()) - }) - }) - .transpose() + res + } else { + Ok((vec![], false)) + } + } + + fn load_in_memory_pessimistic_lock(&self, key: &Key) -> Result> { + if let Some(txn_ext) = self.snapshot.ext().get_txn_ext() { + let locks = txn_ext.pessimistic_locks.read(); + self.check_term_version_status(&locks)?; + Ok(locks.get(key).map(|(lock, _)| { + // For write commands that are executed in serial, it should be impossible + // to read a deleted lock. + // For read commands in the scheduler, it should read the lock marked deleted + // because the lock is not actually deleted from the underlying storage. + lock.to_lock() + })) + } else { + Ok(None) + } } fn get_scan_mode(&self, allow_backward: bool) -> ScanMode { diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 4e087bb07b0..845ae2bc969 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2759,3 +2759,166 @@ fn test_pessimistic_lock_execution_tracking() { handle.join().unwrap(); } + +#[test_case(test_raftstore::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] +fn test_mvcc_scan_memory_and_cf_locks() { + let (cluster, client, ctx) = new_cluster(); + + // Create both pessimistic and prewrite locks. + // The peer in memory limit is 512KiB, generate 1KiB key for pessimistic lock. + // So Writing 512 pessimistic locks may exceed the memory limit and later + // pessimistic locks would be written to the lock cf. + let byte_slice: &[u8] = &[b'k'; 512]; + let start_ts = 11; + let prewrite_start_ts = start_ts - 1; + let num_keys = 1040; + let prewrite_primary_key = b"prewrite_primary"; + let val = b"value"; + let format_key = |i| format!("{:?}{:04}", byte_slice, i).as_bytes().to_vec(); + for i in 0..num_keys { + let key = format_key(i); + if i % 2 == 0 { + must_kv_pessimistic_lock(&client, ctx.clone(), key, start_ts); + } else { + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(key); + mutation.set_value(val.to_vec()); + must_kv_prewrite_with( + &client, + ctx.clone(), + vec![mutation], + vec![], + prewrite_primary_key.to_vec(), + start_ts - 1, + 0, + false, + false, + ); + } + } + // Ensure the pessimistic locks are written to the memory. The first key should + // be written into the memory and the last key should be put to lock cf as + // memory limit is exceeded. + let engine = cluster.get_engine(1); + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(format_key(0).as_slice()).as_encoded()).as_slice(), + ) + .unwrap(); + assert!(cf_res.is_none()); + let cf_res = engine + .get_value_cf( + CF_LOCK, + keys::data_key(Key::from_raw(format_key(num_keys - 2).as_slice()).as_encoded()) + .as_slice(), + ) + .unwrap(); + assert!(cf_res.is_some()); + + // Scan lock, the pessimistic and prewrite results are returned. + // When limit is 0 or it's larger than num_keys, all keys should be returned. + // When limit is less than 512, in-memory pessimistic locks and prewrite locks + // should be returned. + // When limit is larger than 512, in-memory and lock cf pessimistic locks and + // prewrite locks should be returned. + for scan_limit in [0, 128, 256, 512, num_keys, num_keys * 2] { + let scan_ts = 20; + let scan_lock_max_version = scan_ts; + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = scan_lock_max_version; + scan_lock_req.limit = scan_limit as u32; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + let expected_key_num = if scan_limit == 0 || scan_limit >= num_keys { + num_keys + } else { + scan_limit + }; + assert_eq!(scan_lock_resp.locks.len(), expected_key_num); + + for (i, lock_info) in (0..expected_key_num).zip(scan_lock_resp.locks.iter()) { + let key = format_key(i); + if i % 2 == 0 { + assert_eq!(lock_info.lock_type, Op::PessimisticLock); + assert_eq!(lock_info.lock_version, start_ts); + assert_eq!(lock_info.key, key); + } else { + assert_eq!( + lock_info.lock_type, + Op::Put, + "i={:?} lock_info={:?} expected_key_num={:?}, scan_limit={:?}", + i, + lock_info, + expected_key_num, + scan_limit + ); + assert_eq!(lock_info.primary_lock, prewrite_primary_key); + assert_eq!(lock_info.lock_version, prewrite_start_ts); + assert_eq!(lock_info.key, key); + } + } + } + + // Scan with smaller ts returns empty result. + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = prewrite_start_ts - 1; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), 0); + + // Roll back the prewrite locks. + let rollback_start_version = prewrite_start_ts; + let mut rollback_req = BatchRollbackRequest::default(); + rollback_req.set_context(ctx.clone()); + rollback_req.start_version = rollback_start_version; + let keys = (0..num_keys) + .filter(|i| i % 2 != 0) + .map(|i| format_key(i)) + .collect(); + rollback_req.set_keys(keys); + let rollback_resp = client.kv_batch_rollback(&rollback_req).unwrap(); + assert!(!rollback_resp.has_region_error()); + assert!(!rollback_resp.has_error()); + + // Scan lock again after removing prewrite locks. + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx.clone()); + scan_lock_req.max_version = start_ts + 1; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), num_keys / 2); + for (i, lock_info) in (0..num_keys / 2).zip(scan_lock_resp.locks.iter()) { + let key = format_key(i * 2); + assert_eq!(lock_info.lock_version, start_ts); + assert_eq!(lock_info.key, key); + assert_eq!(lock_info.lock_type, Op::PessimisticLock); + } + + // Pessimistic rollabck all the locks. Scan lock should return empty result. + let mut pessimsitic_rollback_req = PessimisticRollbackRequest::default(); + pessimsitic_rollback_req.start_version = start_ts; + pessimsitic_rollback_req.for_update_ts = start_ts; + pessimsitic_rollback_req.set_context(ctx.clone()); + let keys = (0..num_keys) + .filter(|i| i % 2 == 0) + .map(|i| format_key(i)) + .collect(); + pessimsitic_rollback_req.set_keys(keys); + let pessimistic_rollback_resp = client + .kv_pessimistic_rollback(&pessimsitic_rollback_req) + .unwrap(); + assert!(!pessimistic_rollback_resp.has_region_error()); + + // Scan lock again after all the cleanup. + let mut scan_lock_req = ScanLockRequest::default(); + scan_lock_req.set_context(ctx); + scan_lock_req.max_version = start_ts + 1; + let scan_lock_resp = client.kv_scan_lock(&scan_lock_req).unwrap(); + assert!(!scan_lock_resp.has_region_error()); + assert_eq!(scan_lock_resp.locks.len(), 0); +} From 75cd21cbaf008fcab937de70fa731703d999aedf Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 20 Dec 2023 19:45:22 +0800 Subject: [PATCH 1068/1149] server: add metrics for status server API (#16198) close tikv/tikv#16197 This commit add histograms for status server APIs, so that we can inspect the duration for each API request. It helps us to identify any correlations between specific API requests and potential incidents. Signed-off-by: Neil Shen --- metrics/grafana/common.py | 2 + metrics/grafana/tikv_details.dashboard.py | 32 ++ metrics/grafana/tikv_details.json | 376 ++++++++++++++++++++++ metrics/grafana/tikv_details.json.sha256 | 2 +- src/server/status_server/metrics.rs | 13 + src/server/status_server/mod.rs | 24 +- 6 files changed, 445 insertions(+), 4 deletions(-) create mode 100644 src/server/status_server/metrics.rs diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py index 2c2ed7570ed..7f15c06998f 100644 --- a/metrics/grafana/common.py +++ b/metrics/grafana/common.py @@ -981,6 +981,7 @@ def graph_panel_histogram_quantiles( metric: str, label_selectors: list[str] = [], by_labels: list[str] = [], + hide_p9999=False, hide_avg=False, hide_count=False, ) -> Panel: @@ -1013,6 +1014,7 @@ def legend(prefix, labels): by_labels=by_labels, ), legend_format=legend("99.99%", by_labels), + hide=hide_p9999, ), target( expr=expr_histogram_quantile( diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 29159740967..966346f741e 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -8568,6 +8568,37 @@ def SlowTrendStatistics() -> RowPanel: return layout.row_panel +def StatusServer() -> RowPanel: + layout = Layout(title="Status Server") + layout.row( + [ + graph_panel_histogram_quantiles( + title="Status API Request Duration", + description="The 99 quantile durtion of status server API requests", + metric="tikv_status_server_request_duration_seconds", + yaxes=yaxes(left_format=UNITS.SECONDS), + by_labels=["path"], + hide_p9999=True, + hide_count=True, + hide_avg=True, + ), + graph_panel( + title="Status API Request (op/s)", + yaxes=yaxes(left_format=UNITS.OPS_PER_SEC), + targets=[ + target( + expr=expr_sum_rate( + "tikv_status_server_request_duration_seconds_count", + by_labels=["path"], + ), + ), + ], + ), + ] + ) + return layout.row_panel + + #### Metrics Definition End #### @@ -8619,6 +8650,7 @@ def SlowTrendStatistics() -> RowPanel: Encryption(), BackupLog(), SlowTrendStatistics(), + StatusServer(), ], # Set 14 or larger to support shared crosshair or shared tooltip. # See https://github.com/grafana/grafana/blob/v10.2.2/public/app/features/dashboard/state/DashboardMigrator.ts#L443-L445 diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 88821ac7538..3af1480137c 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -67122,6 +67122,382 @@ "transformations": [], "transparent": false, "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 496, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The 99 quantile durtion of status server API requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 497, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{path}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{path}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{path}}", + "metric": "", + "query": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{path}}", + "metric": "", + "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status API Request Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 498, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "metric": "", + "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status API Request (op/s)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Status Server", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "1m", diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 9fc86c86986..d715ccca351 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -78b891e1edbbaa68d2c0638cd258ff0d80315e66f412225905434e63b6a14692 ./metrics/grafana/tikv_details.json +ac3bba8b714ed9cad64ece88ff1f7b4bb075ca178f270e7e1f41461d5ac37bbd ./metrics/grafana/tikv_details.json diff --git a/src/server/status_server/metrics.rs b/src/server/status_server/metrics.rs new file mode 100644 index 00000000000..9786ebd0a10 --- /dev/null +++ b/src/server/status_server/metrics.rs @@ -0,0 +1,13 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use prometheus::{exponential_buckets, register_histogram_vec, HistogramVec}; + +lazy_static::lazy_static! { + pub static ref STATUS_REQUEST_DURATION: HistogramVec = register_histogram_vec!( + "tikv_status_server_request_duration_seconds", + "Bucketed histogram of TiKV status server request duration", + &["method", "path"], + exponential_buckets(0.0001, 2.0, 24).unwrap() // 0.1ms ~ 1677.7s + ) + .unwrap(); +} diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index ff8909fa852..90c966d13e2 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -1,7 +1,9 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. +mod metrics; /// Provides profilers for TiKV. mod profile; + use std::{ env::args, error::Error as StdError, @@ -33,6 +35,7 @@ use hyper::{ Body, Method, Request, Response, Server, StatusCode, }; use kvproto::resource_manager::ResourceGroup; +use metrics::STATUS_REQUEST_DURATION; use online_config::OnlineConfig; use openssl::{ ssl::{Ssl, SslAcceptor, SslContext, SslFiletype, SslMethod, SslVerifyMode}, @@ -645,7 +648,9 @@ where )); } - match (method, path.as_ref()) { + let mut is_unknown_path = false; + let start = Instant::now(); + let res = match (method.clone(), path.as_ref()) { (Method::GET, "/metrics") => { Self::handle_get_metrics(req, &cfg_controller) } @@ -717,8 +722,21 @@ where (Method::PUT, "/resume_grpc") => { Self::handle_resume_grpc(grpc_service_mgr).await } - _ => Ok(make_response(StatusCode::NOT_FOUND, "path not found")), - } + _ => { + is_unknown_path = true; + Ok(make_response(StatusCode::NOT_FOUND, "path not found")) + }, + }; + // Using "unknown" for unknown paths to void creating high cardinality. + let path_label = if is_unknown_path { + "unknown".to_owned() + } else { + path + }; + STATUS_REQUEST_DURATION + .with_label_values(&[method.as_str(), &path_label]) + .observe(start.elapsed().as_secs_f64()); + res } })) } From d9b70f7f3a3332aa4ad9946325d1877fa4f33da2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:58:53 +0800 Subject: [PATCH 1069/1149] In-memory Engine: implement read flow -- hybrid engine part (#16181) ref tikv/tikv#16141 implement read flow of in-memory engine -- hybrid engine part Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 + components/engine_panic/src/engine.rs | 6 +- components/engine_rocks/src/engine.rs | 4 +- components/engine_traits/src/engine.rs | 5 +- components/engine_traits/src/memory_engine.rs | 4 +- components/hybrid_engine/Cargo.toml | 5 + components/hybrid_engine/src/engine.rs | 82 ++++- .../hybrid_engine/src/engine_iterator.rs | 67 ++++- components/hybrid_engine/src/iterable.rs | 6 +- components/hybrid_engine/src/lib.rs | 1 + components/hybrid_engine/src/snapshot.rs | 33 ++- components/raftstore/Cargo.toml | 2 + components/raftstore/src/router.rs | 6 +- components/raftstore/src/store/peer.rs | 23 +- .../raftstore/src/store/region_snapshot.rs | 5 + components/raftstore/src/store/worker/read.rs | 279 ++++++++++++++++-- .../region_cache_memory_engine/Cargo.toml | 2 +- .../region_cache_memory_engine/src/engine.rs | 61 ++-- components/test_raftstore/src/cluster.rs | 17 +- components/test_raftstore/src/node.rs | 5 +- components/test_raftstore/src/server.rs | 7 +- .../test_raftstore/src/transport_simulate.rs | 5 +- components/test_raftstore/src/util.rs | 16 +- src/server/raftkv/mod.rs | 9 +- tests/benches/misc/raftkv/mod.rs | 3 +- tests/failpoints/cases/test_witness.rs | 8 +- tests/integrations/raftstore/mod.rs | 1 + .../raftstore/test_region_cache.rs | 17 ++ tests/integrations/raftstore/test_witness.rs | 2 +- 29 files changed, 561 insertions(+), 124 deletions(-) create mode 100644 tests/integrations/raftstore/test_region_cache.rs diff --git a/Cargo.lock b/Cargo.lock index eb1fe04b424..3f19e8d287f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2449,6 +2449,8 @@ version = "0.0.1" dependencies = [ "engine_rocks", "engine_traits", + "region_cache_memory_engine", + "tempfile", "tikv_util", "txn_types", ] @@ -4137,6 +4139,7 @@ dependencies = [ "futures-util", "getset", "grpcio-health", + "hybrid_engine", "into_other", "itertools", "keys", @@ -4157,6 +4160,7 @@ dependencies = [ "raft", "raft-proto", "rand 0.8.5", + "region_cache_memory_engine", "resource_control", "resource_metering", "serde", diff --git a/components/engine_panic/src/engine.rs b/components/engine_panic/src/engine.rs index 3f75d64f78d..7b8546af111 100644 --- a/components/engine_panic/src/engine.rs +++ b/components/engine_panic/src/engine.rs @@ -1,8 +1,8 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, - WriteOptions, + IterOptions, Iterable, Iterator, KvEngine, Peekable, ReadOptions, Result, SnapshotContext, + SyncMutable, WriteOptions, }; use crate::{db_vector::PanicDbVector, snapshot::PanicSnapshot, write_batch::PanicWriteBatch}; @@ -13,7 +13,7 @@ pub struct PanicEngine; impl KvEngine for PanicEngine { type Snapshot = PanicSnapshot; - fn snapshot(&self, _: Option) -> Self::Snapshot { + fn snapshot(&self, _: Option) -> Self::Snapshot { panic!() } fn sync(&self) -> Result<()> { diff --git a/components/engine_rocks/src/engine.rs b/components/engine_rocks/src/engine.rs index e0eed00ad53..7de0ffd0dbe 100644 --- a/components/engine_rocks/src/engine.rs +++ b/components/engine_rocks/src/engine.rs @@ -3,7 +3,7 @@ use std::{any::Any, sync::Arc}; use engine_traits::{ - IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SnapCtx, SyncMutable, + IterOptions, Iterable, KvEngine, Peekable, ReadOptions, Result, SnapshotContext, SyncMutable, }; use rocksdb::{DBIterator, Writable, DB}; @@ -184,7 +184,7 @@ impl RocksEngine { impl KvEngine for RocksEngine { type Snapshot = RocksSnapshot; - fn snapshot(&self, _: Option) -> RocksSnapshot { + fn snapshot(&self, _: Option) -> RocksSnapshot { RocksSnapshot::new(self.db.clone()) } diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 44539f19419..83f05180820 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -42,7 +42,7 @@ pub trait KvEngine: /// /// SnapCtx will only be used by some type of trait implementors (ex: /// HybridEngine) - fn snapshot(&self, snap_ctx: Option) -> Self::Snapshot; + fn snapshot(&self, snap_ctx: Option) -> Self::Snapshot; /// Syncs any writes to disk fn sync(&self) -> Result<()>; @@ -82,7 +82,8 @@ pub trait KvEngine: fn inner_refcount(&self) -> usize; } -pub struct SnapCtx { +#[derive(Debug, Clone)] +pub struct SnapshotContext { pub region_id: u64, pub read_ts: u64, } diff --git a/components/engine_traits/src/memory_engine.rs b/components/engine_traits/src/memory_engine.rs index 189c3bc0c28..9babc8580fc 100644 --- a/components/engine_traits/src/memory_engine.rs +++ b/components/engine_traits/src/memory_engine.rs @@ -13,5 +13,7 @@ pub trait RegionCacheEngine: // If None is returned, the RegionCacheEngine is currently not readable for this // region or read_ts. - fn snapshot(&self, region_id: u64, read_ts: u64) -> Option; + // Sequence number is shared between RegionCacheEngine and disk KvEnigne to + // provide atomic write + fn snapshot(&self, region_id: u64, read_ts: u64, seq_num: u64) -> Option; } diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index e0be90b179e..0ae04b1dc3e 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -12,3 +12,8 @@ engine_traits = { workspace = true } txn_types = { workspace = true } tikv_util = { workspace = true } engine_rocks = { workspace = true } + +[dev-dependencies] +engine_rocks = { workspace = true } +region_cache_memory_engine = { workspace = true } +tempfile = "3.0" diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index 6ccb223baf1..b76b999f1c3 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -1,7 +1,8 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapCtx, SyncMutable, + KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapshotContext, SnapshotMiscExt, + SyncMutable, }; use crate::snapshot::HybridEngineSnapshot; @@ -65,21 +66,31 @@ where { type Snapshot = HybridEngineSnapshot; - fn snapshot(&self, _: Option) -> Self::Snapshot { - unimplemented!() + fn snapshot(&self, ctx: Option) -> Self::Snapshot { + let disk_snap = self.disk_engine.snapshot(ctx.clone()); + let region_cache_snap = if let Some(ctx) = ctx { + self.region_cache_engine.snapshot( + ctx.region_id, + ctx.read_ts, + disk_snap.sequence_number(), + ) + } else { + None + }; + HybridEngineSnapshot::new(disk_snap, region_cache_snap) } fn sync(&self) -> engine_traits::Result<()> { - unimplemented!() + self.disk_engine.sync() } fn bad_downcast(&self) -> &T { - unimplemented!() + self.disk_engine.bad_downcast() } #[cfg(feature = "testexport")] fn inner_refcount(&self) -> usize { - unimplemented!() + self.disk_engine.inner_refcount() } } @@ -90,17 +101,19 @@ where { type DbVector = EK::DbVector; + // region cache engine only supports peekable trait in the snapshot of it fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { - unimplemented!() + self.disk_engine.get_value_opt(opts, key) } + // region cache engine only supports peekable trait in the snapshot of it fn get_value_cf_opt( &self, opts: &ReadOptions, cf: &str, key: &[u8], ) -> Result> { - unimplemented!() + self.disk_engine.get_value_cf_opt(opts, cf, key) } } @@ -133,3 +146,56 @@ where unimplemented!() } } + +#[cfg(test)] +mod tests { + use engine_rocks::util::new_engine; + use engine_traits::{KvEngine, SnapshotContext, CF_DEFAULT, CF_LOCK, CF_WRITE}; + use region_cache_memory_engine::RegionCacheMemoryEngine; + use tempfile::Builder; + + use crate::HybridEngine; + + #[test] + fn test_engine() { + let path = Builder::new().prefix("temp").tempdir().unwrap(); + let disk_engine = new_engine( + path.path().to_str().unwrap(), + &[CF_DEFAULT, CF_LOCK, CF_WRITE], + ) + .unwrap(); + let memory_engine = RegionCacheMemoryEngine::default(); + memory_engine.new_region(1); + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_region_meta(1).unwrap().set_safe_ts(10); + } + + let hybrid_engine = HybridEngine::new(disk_engine, memory_engine.clone()); + let s = hybrid_engine.snapshot(None); + assert!(!s.region_cache_snapshot_available()); + + let mut snap_ctx = SnapshotContext { + read_ts: 15, + region_id: 1, + }; + let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); + assert!(s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(false); + } + let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); + assert!(!s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + } + snap_ctx.read_ts = 5; + let s = hybrid_engine.snapshot(Some(snap_ctx)); + assert!(!s.region_cache_snapshot_available()); + } +} diff --git a/components/hybrid_engine/src/engine_iterator.rs b/components/hybrid_engine/src/engine_iterator.rs index 642aac82b60..7349240f2a9 100644 --- a/components/hybrid_engine/src/engine_iterator.rs +++ b/components/hybrid_engine/src/engine_iterator.rs @@ -11,44 +11,89 @@ where iter: Either, } +impl HybridEngineIterator +where + EK: KvEngine, + EC: RegionCacheEngine, +{ + pub fn disk_engine_iterator(iter: EK::Iterator) -> Self { + Self { + iter: Either::Left(iter), + } + } + + pub fn region_cache_engine_iterator(iter: EC::Iterator) -> Self { + Self { + iter: Either::Right(iter), + } + } +} + impl Iterator for HybridEngineIterator where EK: KvEngine, EC: RegionCacheEngine, { - fn seek(&mut self, _key: &[u8]) -> Result { - unimplemented!() + fn seek(&mut self, key: &[u8]) -> Result { + match self.iter { + Either::Left(ref mut iter) => iter.seek(key), + Either::Right(ref mut iter) => iter.seek(key), + } } - fn seek_for_prev(&mut self, _key: &[u8]) -> Result { - unimplemented!() + fn seek_for_prev(&mut self, key: &[u8]) -> Result { + match self.iter { + Either::Left(ref mut iter) => iter.seek_for_prev(key), + Either::Right(ref mut iter) => iter.seek_for_prev(key), + } } fn seek_to_first(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.seek_to_first(), + Either::Right(ref mut iter) => iter.seek_to_first(), + } } fn seek_to_last(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.seek_to_last(), + Either::Right(ref mut iter) => iter.seek_to_last(), + } } fn prev(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.prev(), + Either::Right(ref mut iter) => iter.prev(), + } } fn next(&mut self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref mut iter) => iter.next(), + Either::Right(ref mut iter) => iter.next(), + } } fn key(&self) -> &[u8] { - unimplemented!() + match self.iter { + Either::Left(ref iter) => iter.key(), + Either::Right(ref iter) => iter.key(), + } } fn value(&self) -> &[u8] { - unimplemented!() + match self.iter { + Either::Left(ref iter) => iter.value(), + Either::Right(ref iter) => iter.value(), + } } fn valid(&self) -> Result { - unimplemented!() + match self.iter { + Either::Left(ref iter) => iter.valid(), + Either::Right(ref iter) => iter.valid(), + } } } diff --git a/components/hybrid_engine/src/iterable.rs b/components/hybrid_engine/src/iterable.rs index 96933641b06..27a38570f01 100644 --- a/components/hybrid_engine/src/iterable.rs +++ b/components/hybrid_engine/src/iterable.rs @@ -12,6 +12,10 @@ where type Iterator = HybridEngineIterator; fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { - unimplemented!() + // Iterator of region cache engine should only be created from the + // snapshot of it + self.disk_engine() + .iterator_opt(cf, opts) + .map(|iter| HybridEngineIterator::disk_engine_iterator(iter)) } } diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs index 367d985b094..0778412a2c9 100644 --- a/components/hybrid_engine/src/lib.rs +++ b/components/hybrid_engine/src/lib.rs @@ -24,3 +24,4 @@ mod ttl_properties; mod write_batch; pub use engine::HybridEngine; +pub use snapshot::HybridEngineSnapshot; diff --git a/components/hybrid_engine/src/snapshot.rs b/components/hybrid_engine/src/snapshot.rs index 4ada590c3d6..3c7ab875a21 100644 --- a/components/hybrid_engine/src/snapshot.rs +++ b/components/hybrid_engine/src/snapshot.rs @@ -1,9 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - fmt::{self, Debug, Formatter}, - marker::PhantomData, -}; +use std::fmt::{self, Debug, Formatter}; use engine_traits::{ CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, @@ -18,34 +15,40 @@ where EC: RegionCacheEngine, { disk_snap: EK::Snapshot, - - phantom: PhantomData, + region_cache_snap: Option, } -impl Snapshot for HybridEngineSnapshot +impl HybridEngineSnapshot where EK: KvEngine, EC: RegionCacheEngine, { + pub fn new(disk_snap: EK::Snapshot, region_cache_snap: Option) -> Self { + HybridEngineSnapshot { + disk_snap, + region_cache_snap, + } + } + + pub fn region_cache_snapshot_available(&self) -> bool { + self.region_cache_snap.is_some() + } } -impl Debug for HybridEngineSnapshot +impl Snapshot for HybridEngineSnapshot where EK: KvEngine, EC: RegionCacheEngine, { - fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { - write!(fmt, "Hybrid Engine Snapshot Impl") - } } -impl Drop for HybridEngineSnapshot +impl Debug for HybridEngineSnapshot where EK: KvEngine, EC: RegionCacheEngine, { - fn drop(&mut self) { - unimplemented!() + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!(fmt, "Hybrid Engine Snapshot Impl") } } @@ -98,6 +101,6 @@ where EC: RegionCacheEngine, { fn sequence_number(&self) -> u64 { - unimplemented!() + self.disk_snap.sequence_number() } } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 81e809a0205..cde5c961f3f 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -95,5 +95,7 @@ yatp = { workspace = true } encryption_export = { workspace = true } engine_panic = { workspace = true } engine_rocks = { workspace = true } +hybrid_engine = { workspace = true } panic_hook = { workspace = true } +region_cache_memory_engine = { workspace = true } test_sst_importer = { workspace = true } diff --git a/components/raftstore/src/router.rs b/components/raftstore/src/router.rs index fd50357fa38..452616caf7e 100644 --- a/components/raftstore/src/router.rs +++ b/components/raftstore/src/router.rs @@ -7,7 +7,7 @@ use std::{ // #[PerformanceCriticalPath] use crossbeam::channel::TrySendError; -use engine_traits::{KvEngine, RaftEngine, Snapshot}; +use engine_traits::{KvEngine, RaftEngine, Snapshot, SnapshotContext}; use error_code::ErrorCodeExt; use kvproto::{metapb, raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::SnapshotStatus; @@ -121,6 +121,7 @@ where { fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, @@ -251,11 +252,12 @@ impl RaftStoreRouter for ServerRaftStoreRouter impl LocalReadRouter for ServerRaftStoreRouter { fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, ) -> RaftStoreResult<()> { - self.local_reader.read(read_id, req, cb); + self.local_reader.read(snap_ctx, read_id, req, cb); Ok(()) } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 382b9e53b4b..904d35fec2f 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -19,8 +19,8 @@ use bytes::Bytes; use collections::{HashMap, HashSet}; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_traits::{ - Engines, KvEngine, PerfContext, RaftEngine, Snapshot, WriteBatch, WriteOptions, CF_DEFAULT, - CF_LOCK, CF_WRITE, + Engines, KvEngine, PerfContext, RaftEngine, Snapshot, SnapshotContext, WriteBatch, + WriteOptions, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; use fail::fail_point; @@ -4829,7 +4829,16 @@ where } } - let mut resp = reader.execute(&req, &Arc::new(region), read_index, None); + let snap_ctx = if let Ok(read_ts) = decode_u64(&mut req.get_header().get_flag_data()) { + Some(SnapshotContext { + region_id: self.region_id, + read_ts, + }) + } else { + None + }; + + let mut resp = reader.execute(&req, &Arc::new(region), read_index, snap_ctx, None); if let Some(snap) = resp.snapshot.as_mut() { snap.txn_ext = Some(self.txn_ext.clone()); snap.bucket_meta = self @@ -5735,8 +5744,12 @@ where &self.engines.kv } - fn get_snapshot(&mut self, _: &Option>) -> Arc { - Arc::new(self.engines.kv.snapshot(None)) + fn get_snapshot( + &mut self, + snap_ctx: Option, + _: &Option>, + ) -> Arc { + Arc::new(self.engines.kv.snapshot(snap_ctx)) } } diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 1c2c6251eba..5232675f14a 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -175,6 +175,11 @@ where pub fn get_end_key(&self) -> &[u8] { self.region.get_end_key() } + + #[cfg(test)] + pub fn snap(&self) -> Arc { + self.snap.clone() + } } impl Clone for RegionSnapshot diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index dbec805fe97..778f4ce45f0 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -12,7 +12,7 @@ use std::{ }; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; -use engine_traits::{KvEngine, Peekable, RaftEngine}; +use engine_traits::{KvEngine, Peekable, RaftEngine, SnapshotContext}; use fail::fail_point; use kvproto::{ errorpb, @@ -57,6 +57,7 @@ pub trait ReadExecutor { /// Currently, only multi-rocksdb version may return `None`. fn get_snapshot( &mut self, + snap_ctx: Option, read_context: &Option>, ) -> Arc<::Snapshot>; @@ -64,6 +65,7 @@ pub trait ReadExecutor { &mut self, req: &Request, region: &metapb::Region, + snap_ctx: Option, read_context: &Option>, ) -> Result { let key = req.get_get().get_key(); @@ -71,7 +73,7 @@ pub trait ReadExecutor { util::check_key_in_region(key, region)?; let mut resp = Response::default(); - let snapshot = self.get_snapshot(read_context); + let snapshot = self.get_snapshot(snap_ctx, read_context); let res = if !req.get_get().get_cf().is_empty() { let cf = req.get_get().get_cf(); snapshot @@ -109,6 +111,7 @@ pub trait ReadExecutor { msg: &RaftCmdRequest, region: &Arc, read_index: Option, + snap_ctx: Option, local_read_ctx: Option>, ) -> ReadResponse<::Snapshot> { let requests = msg.get_requests(); @@ -121,20 +124,22 @@ pub trait ReadExecutor { for req in requests { let cmd_type = req.get_cmd_type(); let mut resp = match cmd_type { - CmdType::Get => match self.get_value(req, region.as_ref(), &local_read_ctx) { - Ok(resp) => resp, - Err(e) => { - error!(?e; - "failed to execute get command"; - "region_id" => region.get_id(), - ); - response.response = cmd_resp::new_error(e); - return response; + CmdType::Get => { + match self.get_value(req, region.as_ref(), snap_ctx.clone(), &local_read_ctx) { + Ok(resp) => resp, + Err(e) => { + error!(?e; + "failed to execute get command"; + "region_id" => region.get_id(), + ); + response.response = cmd_resp::new_error(e); + return response; + } } - }, + } CmdType::Snap => { let snapshot = RegionSnapshot::from_snapshot( - self.get_snapshot(&local_read_ctx), + self.get_snapshot(snap_ctx.clone(), &local_read_ctx), region.clone(), ); response.snapshot = Some(snapshot); @@ -226,9 +231,16 @@ where } } - /// Update the snapshot in the `snap_cache` if the read_id is None or does - /// not match. - fn maybe_update_snapshot(&mut self, engine: &E, delegate_last_valid_ts: Timespec) -> bool { + // Update the snapshot in the `snap_cache` if the read_id is None or does + // not match. + // snap_ctx is used (if not None) to acquire the snapshot of the relevant region + // from region cache engine + fn maybe_update_snapshot( + &mut self, + engine: &E, + snap_ctx: Option, + delegate_last_valid_ts: Timespec, + ) -> bool { // When the read_id is None, it means the `snap_cache` has been cleared // before and the `cached_read_id` of it is None because only a consecutive // requests will have the same cache and the cache will be cleared after the @@ -242,7 +254,7 @@ where } self.snap_cache.cached_read_id = self.read_id.clone(); - self.snap_cache.snapshot = Some(Arc::new(engine.snapshot(None))); + self.snap_cache.snapshot = Some(Arc::new(engine.snapshot(snap_ctx))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); @@ -250,7 +262,7 @@ where } else { // read_id being None means the snapshot acquired will only be used in this // request - self.snapshot = Some(Arc::new(engine.snapshot(None))); + self.snapshot = Some(Arc::new(engine.snapshot(snap_ctx))); // Ensures the snapshot is acquired before getting the time atomic::fence(atomic::Ordering::Release); @@ -984,14 +996,18 @@ where &mut self, req: &RaftCmdRequest, delegate: &mut CachedReadDelegate, + snap_ctx: Option, read_id: Option, snap_updated: &mut bool, last_valid_ts: Timespec, ) -> Option> { let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, read_id); - (*snap_updated) = - local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + (*snap_updated) = local_read_ctx.maybe_update_snapshot( + delegate.get_tablet(), + snap_ctx.clone(), + last_valid_ts, + ); let snapshot_ts = local_read_ctx.snapshot_ts().unwrap(); if !delegate.is_in_leader_lease(snapshot_ts) { @@ -999,7 +1015,7 @@ where } let region = Arc::clone(&delegate.region); - let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + let mut response = delegate.execute(req, ®ion, None, snap_ctx, Some(local_read_ctx)); if let Some(snap) = response.snapshot.as_mut() { snap.bucket_meta = delegate.bucket_meta.clone(); } @@ -1024,11 +1040,11 @@ where // Stale read does not use cache, so we pass None for read_id let mut local_read_ctx = LocalReadContext::new(&mut self.snap_cache, None); (*snap_updated) = - local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), last_valid_ts); + local_read_ctx.maybe_update_snapshot(delegate.get_tablet(), None, last_valid_ts); let region = Arc::clone(&delegate.region); // Getting the snapshot - let mut response = delegate.execute(req, ®ion, None, Some(local_read_ctx)); + let mut response = delegate.execute(req, ®ion, None, None, Some(local_read_ctx)); if let Some(snap) = response.snapshot.as_mut() { snap.bucket_meta = delegate.bucket_meta.clone(); } @@ -1042,6 +1058,7 @@ where pub fn propose_raft_command( &mut self, + snap_ctx: Option, read_id: Option, mut req: RaftCmdRequest, cb: Callback, @@ -1056,6 +1073,7 @@ where if let Some(read_resp) = self.try_local_leader_read( &req, &mut delegate, + snap_ctx, read_id, &mut snap_updated, last_valid_ts, @@ -1103,6 +1121,7 @@ where &req, &mut delegate, None, + None, &mut snap_updated, last_valid_ts, ) { @@ -1180,11 +1199,12 @@ where #[inline] pub fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, ) { - self.propose_raft_command(read_id, req, cb); + self.propose_raft_command(snap_ctx, read_id, req, cb); maybe_tls_local_read_metrics_flush(); } @@ -1218,7 +1238,11 @@ where &self.kv_engine } - fn get_snapshot(&mut self, read_context: &Option>) -> Arc { + fn get_snapshot( + &mut self, + _: Option, + read_context: &Option>, + ) -> Arc { read_context.as_ref().unwrap().snapshot().unwrap() } } @@ -1266,7 +1290,9 @@ mod tests { use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; use engine_traits::{MiscExt, Peekable, SyncMutable, ALL_CFS}; + use hybrid_engine::{HybridEngine, HybridEngineSnapshot}; use kvproto::{metapb::RegionEpoch, raft_cmdpb::*}; + use region_cache_memory_engine::RegionCacheMemoryEngine; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; use time::Duration; @@ -1351,6 +1377,7 @@ mod tests { cmd: RaftCmdRequest, ) { reader.propose_raft_command( + None, None, cmd.clone(), Callback::read(Box::new(|resp| { @@ -1379,7 +1406,7 @@ mod tests { task: RaftCommand, read_id: Option, ) { - reader.propose_raft_command(read_id, task.request, task.callback); + reader.propose_raft_command(None, read_id, task.request, task.callback); assert_eq!(rx.try_recv().unwrap_err(), TryRecvError::Empty); } @@ -1512,6 +1539,7 @@ mod tests { .mut_peer() .set_store_id(store_id + 1); reader.propose_raft_command( + None, None, cmd_store_id, Callback::read(Box::new(move |resp: ReadResponse| { @@ -1536,6 +1564,7 @@ mod tests { .mut_peer() .set_id(leader2.get_id() + 1); reader.propose_raft_command( + None, None, cmd_peer_id, Callback::read(Box::new(move |resp: ReadResponse| { @@ -1561,6 +1590,7 @@ mod tests { let mut cmd_term = cmd.clone(); cmd_term.mut_header().set_term(term6 - 2); reader.propose_raft_command( + None, None, cmd_term, Callback::read(Box::new(move |resp: ReadResponse| { @@ -1597,8 +1627,9 @@ mod tests { ); // Channel full. - reader.propose_raft_command(None, cmd.clone(), Callback::None); + reader.propose_raft_command(None, None, cmd.clone(), Callback::None); reader.propose_raft_command( + None, None, cmd.clone(), Callback::read(Box::new(move |resp: ReadResponse| { @@ -1631,6 +1662,7 @@ mod tests { .update(Progress::applied_term(term6 + 3)); } reader.propose_raft_command( + None, None, cmd9.clone(), Callback::read(Box::new(|resp| { @@ -2007,7 +2039,7 @@ mod tests { let compare_ts = monotonic_raw_now(); // Case 1: snap_cache_context.read_id is None - assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.maybe_update_snapshot(&db, None, Timespec::new(0, 0))); assert!(read_context.snapshot_ts().unwrap() > compare_ts); assert_eq!( read_context @@ -2022,7 +2054,7 @@ mod tests { // snap_cache_context is *not* created with read_id, so calling // `maybe_update_snapshot` again will update the snapshot let compare_ts = monotonic_raw_now(); - assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.maybe_update_snapshot(&db, None, Timespec::new(0, 0))); assert!(read_context.snapshot_ts().unwrap() > compare_ts); let read_id = ThreadReadId::new(); @@ -2032,7 +2064,7 @@ mod tests { let compare_ts = monotonic_raw_now(); // Case 2: snap_cache_context.read_id is not None but not equals to the // snap_cache.cached_read_id - assert!(read_context.maybe_update_snapshot(&db, Timespec::new(0, 0))); + assert!(read_context.maybe_update_snapshot(&db, None, Timespec::new(0, 0))); assert!(read_context.snapshot_ts().unwrap() > compare_ts); let snap_ts = read_context.snapshot_ts().unwrap(); assert_eq!( @@ -2050,7 +2082,7 @@ mod tests { // `maybe_update_snapshot` again will *not* update the snapshot // Case 3: snap_cache_context.read_id is not None and equals to the // snap_cache.cached_read_id - assert!(!read_context.maybe_update_snapshot(&db2, Timespec::new(0, 0))); + assert!(!read_context.maybe_update_snapshot(&db2, None, Timespec::new(0, 0))); assert_eq!(read_context.snapshot_ts().unwrap(), snap_ts); assert_eq!( read_context @@ -2065,7 +2097,7 @@ mod tests { // Case 4: delegate.last_valid_ts is larger than create_time of read_id let mut last_valid_ts = read_id_clone.create_time; last_valid_ts = last_valid_ts.add(Duration::nanoseconds(1)); - assert!(read_context.maybe_update_snapshot(&db2, last_valid_ts)); + assert!(read_context.maybe_update_snapshot(&db2, None, last_valid_ts)); assert!(read_context.snapshot_ts().unwrap() > snap_ts); assert!( read_context @@ -2385,4 +2417,187 @@ mod tests { .has_data_is_not_ready() ); } + + type HybridTestEnigne = HybridEngine; + type HybridEngineTestSnapshot = HybridEngineSnapshot; + + struct HybridEngineMockRouter { + p_router: SyncSender>, + c_router: SyncSender<(u64, CasualMessage)>, + } + + impl HybridEngineMockRouter { + #[allow(clippy::type_complexity)] + fn new() -> ( + HybridEngineMockRouter, + Receiver>, + Receiver<(u64, CasualMessage)>, + ) { + let (p_ch, p_rx) = sync_channel(1); + let (c_ch, c_rx) = sync_channel(1); + ( + HybridEngineMockRouter { + p_router: p_ch, + c_router: c_ch, + }, + p_rx, + c_rx, + ) + } + } + + impl ProposalRouter for HybridEngineMockRouter { + fn send( + &self, + cmd: RaftCommand, + ) -> std::result::Result<(), TrySendError>> { + ProposalRouter::send(&self.p_router, cmd) + } + } + + impl CasualRouter for HybridEngineMockRouter { + fn send(&self, region_id: u64, msg: CasualMessage) -> Result<()> { + CasualRouter::send(&self.c_router, region_id, msg) + } + } + + #[allow(clippy::type_complexity)] + fn new_hybrid_engine_reader( + path: &str, + store_id: u64, + store_meta: Arc>, + ) -> ( + TempDir, + LocalReader, + Receiver>, + RegionCacheMemoryEngine, + ) { + let path = Builder::new().prefix(path).tempdir().unwrap(); + let disk_engine = + engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); + let (ch, rx, _) = HybridEngineMockRouter::new(); + let memory_engine = RegionCacheMemoryEngine::default(); + let engine = HybridEngine::new(disk_engine, memory_engine.clone()); + let mut reader = LocalReader::new( + engine.clone(), + StoreMetaDelegate::new(store_meta, engine), + ch, + ); + reader.local_reader.store_id = Cell::new(Some(store_id)); + (path, reader, rx, memory_engine) + } + + fn get_snapshot( + snap_ctx: Option, + reader: &mut LocalReader, + request: RaftCmdRequest, + rx: &Receiver>, + ) -> Arc { + let (sender, receiver) = channel(); + reader.propose_raft_command( + snap_ctx, + None, + request, + Callback::read(Box::new(move |snap| { + sender.send(snap).unwrap(); + })), + ); + // no direct is expected + assert_eq!(rx.try_recv().unwrap_err(), TryRecvError::Empty); + receiver.recv().unwrap().snapshot.unwrap().snap() + } + + #[test] + fn test_hybrid_engine_read() { + let store_id = 2; + let store_meta = Arc::new(Mutex::new(StoreMeta::new(0))); + let (_tmp, mut reader, rx, memory_engine) = new_hybrid_engine_reader( + "test-local-hybrid-engine-reader", + store_id, + store_meta.clone(), + ); + + // set up region so we can acquire snapshot from local reader + let mut region1 = metapb::Region::default(); + region1.set_id(1); + let prs = new_peers(store_id, vec![2, 3, 4]); + region1.set_peers(prs.clone().into()); + let epoch13 = { + let mut ep = metapb::RegionEpoch::default(); + ep.set_conf_ver(1); + ep.set_version(3); + ep + }; + let leader2 = prs[0].clone(); + region1.set_region_epoch(epoch13.clone()); + let term6 = 6; + let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. + let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, 1)); + + lease.renew(monotonic_raw_now()); + let remote = lease.maybe_new_remote_lease(term6).unwrap(); + { + let mut meta = store_meta.lock().unwrap(); + let read_delegate = ReadDelegate { + tag: String::new(), + region: Arc::new(region1.clone()), + peer_id: leader2.get_id(), + term: term6, + applied_term: term6, + leader_lease: Some(remote), + last_valid_ts: Timespec::new(0, 0), + txn_extra_op: Arc::new(AtomicCell::new(TxnExtraOp::default())), + txn_ext: Arc::new(TxnExt::default()), + read_progress, + pending_remove: false, + wait_data: false, + track_ver: TrackVer::new(), + bucket_meta: None, + }; + meta.readers.insert(1, read_delegate); + } + + let mut cmd = RaftCmdRequest::default(); + let mut header = RaftRequestHeader::default(); + header.set_region_id(1); + header.set_peer(leader2); + header.set_region_epoch(epoch13); + header.set_term(term6); + cmd.set_header(header); + let mut req = Request::default(); + req.set_cmd_type(CmdType::Snap); + cmd.set_requests(vec![req].into()); + + let s = get_snapshot(None, &mut reader, cmd.clone(), &rx); + assert!(!s.region_cache_snapshot_available()); + + memory_engine.new_region(1); + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_region_meta(1).unwrap().set_safe_ts(10); + } + + let mut snap_ctx = SnapshotContext { + read_ts: 15, + region_id: 1, + }; + + let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); + assert!(s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(false); + } + let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); + assert!(!s.region_cache_snapshot_available()); + + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + } + snap_ctx.read_ts = 5; + assert!(!s.region_cache_snapshot_available()); + } } diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index c529698fa14..949b2596f46 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -12,4 +12,4 @@ engine_traits = { workspace = true } collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } bytes = "1.0" -tikv_util = { workspace = true } \ No newline at end of file +tikv_util = { workspace = true } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index 93e4c1a6d74..a8ee66a5b23 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -107,12 +107,28 @@ pub struct RegionMemoryMeta { safe_ts: u64, } +impl RegionMemoryMeta { + pub fn set_can_read(&mut self, can_read: bool) { + self.can_read = can_read; + } + + pub fn set_safe_ts(&mut self, safe_ts: u64) { + self.safe_ts = safe_ts; + } +} + #[derive(Default)] pub struct RegionCacheMemoryEngineCore { engine: HashMap, region_metas: HashMap, } +impl RegionCacheMemoryEngineCore { + pub fn mut_region_meta(&mut self, region_id: u64) -> Option<&mut RegionMemoryMeta> { + self.region_metas.get_mut(®ion_id) + } +} + /// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in /// the leaders' store. Incoming writes that are written to disk engine (now, /// RocksDB) are also written to the RegionCacheMemoryEngine, leading to a @@ -135,6 +151,12 @@ pub struct RegionCacheMemoryEngine { core: Arc>, } +impl RegionCacheMemoryEngine { + pub fn core(&self) -> &Arc> { + &self.core + } +} + impl Debug for RegionCacheMemoryEngine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Region Cache Memory Engine") @@ -157,8 +179,8 @@ impl RegionCacheEngine for RegionCacheMemoryEngine { type Snapshot = RegionCacheSnapshot; // todo(SpadeA): add sequence number logic - fn snapshot(&self, region_id: u64, read_ts: u64) -> Option { - RegionCacheSnapshot::new(self.clone(), region_id, read_ts) + fn snapshot(&self, region_id: u64, read_ts: u64, seq_num: u64) -> Option { + RegionCacheSnapshot::new(self.clone(), region_id, read_ts, seq_num) } } @@ -193,10 +215,6 @@ pub struct RegionCacheIterator { impl Iterable for RegionCacheMemoryEngine { type Iterator = RegionCacheIterator; - fn iterator(&self, cf: &str) -> Result { - unimplemented!() - } - fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { unimplemented!() } @@ -357,12 +375,20 @@ impl Mutable for RegionCacheWriteBatch { pub struct RegionCacheSnapshot { region_id: u64, snapshot_ts: u64, + // Sequence number is shared between RegionCacheEngine and disk KvEnigne to + // provide atomic write + sequence_number: u64, region_memory_engine: RegionMemoryEngine, engine: RegionCacheMemoryEngine, } impl RegionCacheSnapshot { - pub fn new(engine: RegionCacheMemoryEngine, region_id: u64, read_ts: u64) -> Option { + pub fn new( + engine: RegionCacheMemoryEngine, + region_id: u64, + read_ts: u64, + seq_num: u64, + ) -> Option { let mut core = engine.core.lock().unwrap(); let region_meta = core.region_metas.get_mut(®ion_id)?; if !region_meta.can_read { @@ -379,6 +405,7 @@ impl RegionCacheSnapshot { Some(RegionCacheSnapshot { region_id, snapshot_ts: read_ts, + sequence_number: seq_num, region_memory_engine: core.engine.get(®ion_id).unwrap().clone(), engine: engine.clone(), }) @@ -446,7 +473,7 @@ impl CfNamesExt for RegionCacheSnapshot { impl SnapshotMiscExt for RegionCacheSnapshot { fn sequence_number(&self) -> u64 { - self.snapshot_ts + self.sequence_number } } @@ -515,31 +542,31 @@ mod tests { } }; - assert!(engine.snapshot(1, 5).is_none()); + assert!(engine.snapshot(1, 5, u64::MAX).is_none()); { let mut core = engine.core.lock().unwrap(); core.region_metas.get_mut(&1).unwrap().can_read = true; } - let s1 = engine.snapshot(1, 5).unwrap(); + let s1 = engine.snapshot(1, 5, u64::MAX).unwrap(); { let mut core = engine.core.lock().unwrap(); core.region_metas.get_mut(&1).unwrap().safe_ts = 5; } - assert!(engine.snapshot(1, 5).is_none()); - let s2 = engine.snapshot(1, 10).unwrap(); + assert!(engine.snapshot(1, 5, u64::MAX).is_none()); + let s2 = engine.snapshot(1, 10, u64::MAX).unwrap(); verify_snapshot_count(5, 1); verify_snapshot_count(10, 1); - let s3 = engine.snapshot(1, 10).unwrap(); + let s3 = engine.snapshot(1, 10, u64::MAX).unwrap(); verify_snapshot_count(10, 2); drop(s1); verify_snapshot_count(5, 0); drop(s2); verify_snapshot_count(10, 1); - let s4 = engine.snapshot(1, 10).unwrap(); + let s4 = engine.snapshot(1, 10, u64::MAX).unwrap(); verify_snapshot_count(10, 2); drop(s4); verify_snapshot_count(10, 1); @@ -609,7 +636,7 @@ mod tests { fill_data_in_skiplist(sl, (1..100).step_by(1)); } - let snapshot = engine.snapshot(1, 10).unwrap(); + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); let opts = ReadOptions::default(); for i in 1..100 { let k = construct_key(i); @@ -644,7 +671,7 @@ mod tests { } let mut iter_opt = IterOptions::default(); - let snapshot = engine.snapshot(1, 10).unwrap(); + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); // boundaries are not set assert!(snapshot.iterator_opt("lock", iter_opt.clone()).is_err()); @@ -719,7 +746,7 @@ mod tests { iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); - let snapshot = engine.snapshot(1, 10).unwrap(); + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_last().unwrap()); verify_key_values(&mut iter, step, 99, i32::MIN); diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 1e0c57c3706..2521fccb694 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -19,8 +19,8 @@ use encryption_export::DataKeyManager; use engine_rocks::{RocksCompactedEvent, RocksEngine, RocksStatistics}; use engine_test::raft::RaftTestEngine; use engine_traits::{ - Engines, Iterable, KvEngine, Mutable, Peekable, RaftEngineReadOnly, SyncMutable, WriteBatch, - CF_DEFAULT, CF_RAFT, + Engines, Iterable, KvEngine, Mutable, Peekable, RaftEngineReadOnly, SnapshotContext, + SyncMutable, WriteBatch, CF_DEFAULT, CF_RAFT, }; use file_system::IoRateLimiter; use futures::{self, channel::oneshot, executor::block_on, future::BoxFuture, StreamExt}; @@ -123,19 +123,21 @@ pub trait Simulator { fn read( &mut self, + snap_ctx: Option, batch_id: Option, request: RaftCmdRequest, timeout: Duration, ) -> Result { let node_id = request.get_header().get_peer().get_store_id(); let (cb, mut rx) = make_cb::(&request); - self.async_read(node_id, batch_id, request, cb); + self.async_read(snap_ctx, node_id, batch_id, request, cb); rx.recv_timeout(timeout) .map_err(|_| Error::Timeout(format!("request timeout for {:?}", timeout))) } fn async_read( &mut self, + snap_ctx: Option, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -450,11 +452,16 @@ where pub fn read( &self, + snap_ctx: Option, batch_id: Option, request: RaftCmdRequest, timeout: Duration, ) -> Result { - match self.sim.wl().read(batch_id, request.clone(), timeout) { + match self + .sim + .wl() + .read(snap_ctx, batch_id, request.clone(), timeout) + { Err(e) => { warn!("failed to read {:?}: {:?}", request, e); Err(e) @@ -478,7 +485,7 @@ where } } let ret = if is_read { - self.sim.wl().read(None, request.clone(), timeout) + self.sim.wl().read(None, None, request.clone(), timeout) } else { self.sim.rl().call_command(request.clone(), timeout) }; diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 7564da0e27e..5fdd4f24822 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -10,7 +10,7 @@ use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, KvEngine}; +use engine_traits::{Engines, KvEngine, SnapshotContext}; use kvproto::{ kvrpcpb::ApiVersion, metapb, @@ -459,6 +459,7 @@ impl Simulator for NodeCluster { fn async_read( &mut self, + snap_ctx: Option, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -480,7 +481,7 @@ impl Simulator for NodeCluster { } let mut guard = self.trans.core.lock().unwrap(); let router = guard.routers.get_mut(&node_id).unwrap(); - router.read(batch_id, request, cb).unwrap(); + router.read(snap_ctx, batch_id, request, cb).unwrap(); } fn send_raft_msg(&mut self, msg: raft_serverpb::RaftMessage) -> Result<()> { diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index bbc4ee2cf49..883a38edb23 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -15,7 +15,7 @@ use concurrency_manager::ConcurrencyManager; use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; -use engine_traits::{Engines, KvEngine}; +use engine_traits::{Engines, KvEngine, SnapshotContext}; use futures::executor::block_on; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; @@ -737,6 +737,7 @@ impl Simulator for ServerCluster { fn async_read( &mut self, + snap_ctx: Option, node_id: u64, batch_id: Option, request: RaftCmdRequest, @@ -750,7 +751,9 @@ impl Simulator for ServerCluster { cb.invoke_with_response(resp); } Some(meta) => { - meta.sim_router.read(batch_id, request, cb).unwrap(); + meta.sim_router + .read(snap_ctx, batch_id, request, cb) + .unwrap(); } }; } diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 4c21552cee5..3824e0dbe75 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -11,7 +11,7 @@ use std::{ use collections::{HashMap, HashSet}; use crossbeam::channel::TrySendError; -use engine_traits::KvEngine; +use engine_traits::{KvEngine, SnapshotContext}; use kvproto::{raft_cmdpb::RaftCmdRequest, raft_serverpb::RaftMessage}; use raft::eraftpb::MessageType; use raftstore::{ @@ -257,11 +257,12 @@ impl> RaftStoreRouter for SimulateTrans impl> LocalReadRouter for SimulateTransport { fn read( &mut self, + snap_ctx: Option, read_id: Option, req: RaftCmdRequest, cb: Callback, ) -> RaftStoreResult<()> { - self.ch.read(read_id, req, cb) + self.ch.read(snap_ctx, read_id, req, cb) } fn release_snapshot_cache(&mut self) { diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 8933f4dca74..019a7416a7a 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -459,7 +459,7 @@ pub fn read_on_peer>( read_quorum, ); request.mut_header().set_peer(peer); - cluster.read(None, request, timeout) + cluster.read(None, None, request, timeout) } pub fn async_read_on_peer>( @@ -481,7 +481,10 @@ pub fn async_read_on_peer>( request.mut_header().set_replica_read(replica_read); let (tx, mut rx) = future::bounded(1, future::WakePolicy::Immediately); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); - cluster.sim.wl().async_read(node_id, None, request, cb); + cluster + .sim + .wl() + .async_read(None, node_id, None, request, cb); Box::pin(async move { let fut = rx.next(); fut.await.unwrap() @@ -512,7 +515,7 @@ pub fn batch_read_on_peer>( cluster .sim .wl() - .async_read(node_id, batch_id.clone(), request, cb); + .async_read(None, node_id, batch_id.clone(), request, cb); len += 1; } while results.len() < len { @@ -536,7 +539,7 @@ pub fn read_index_on_peer>( read_quorum, ); request.mut_header().set_peer(peer); - cluster.read(None, request, timeout) + cluster.read(None, None, request, timeout) } pub fn async_read_index_on_peer>( @@ -561,7 +564,10 @@ pub fn async_read_index_on_peer>( request.mut_header().set_peer(peer); let (tx, mut rx) = future::bounded(1, future::WakePolicy::Immediately); let cb = Callback::read(Box::new(move |resp| drop(tx.send(resp.response)))); - cluster.sim.wl().async_read(node_id, None, request, cb); + cluster + .sim + .wl() + .async_read(None, node_id, None, request, cb); Box::pin(async move { let fut = rx.next(); fut.await.unwrap() diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 82563666f04..9f42925b6d4 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -22,7 +22,7 @@ use std::{ use collections::{HashMap, HashSet}; use concurrency_manager::ConcurrencyManager; -use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot}; +use engine_traits::{CfName, KvEngine, MvccProperties, Snapshot, SnapshotContext}; use futures::{future::BoxFuture, task::AtomicWaker, Future, Stream, StreamExt, TryFutureExt}; use kvproto::{ errorpb, @@ -644,10 +644,15 @@ where })); let tracker = store_cb.read_tracker().unwrap(); + let snap_ctx = ctx.start_ts.map(|ts| SnapshotContext { + read_ts: ts.into_inner(), + region_id: ctx.pb_ctx.get_region_id(), + }); + if res.is_ok() { res = self .router - .read(ctx.read_id, cmd, store_cb) + .read(snap_ctx, ctx.read_id, cmd, store_cb) .map_err(kv::Error::from); } async move { diff --git a/tests/benches/misc/raftkv/mod.rs b/tests/benches/misc/raftkv/mod.rs index eab0f38d749..2650434c80f 100644 --- a/tests/benches/misc/raftkv/mod.rs +++ b/tests/benches/misc/raftkv/mod.rs @@ -5,7 +5,7 @@ use std::sync::{Arc, RwLock}; use collections::HashSet; use crossbeam::channel::TrySendError; use engine_rocks::{RocksEngine, RocksSnapshot}; -use engine_traits::{KvEngine, ALL_CFS, CF_DEFAULT}; +use engine_traits::{KvEngine, SnapshotContext, ALL_CFS, CF_DEFAULT}; use futures::future::FutureExt; use kvproto::{ kvrpcpb::{Context, ExtraOp as TxnExtraOp}, @@ -121,6 +121,7 @@ impl RaftStoreRouter for SyncBenchRouter { impl LocalReadRouter for SyncBenchRouter { fn read( &mut self, + _: Option, _: Option, req: RaftCmdRequest, cb: Callback, diff --git a/tests/failpoints/cases/test_witness.rs b/tests/failpoints/cases/test_witness.rs index e207525bcea..f6fec8b35de 100644 --- a/tests/failpoints/cases/test_witness.rs +++ b/tests/failpoints/cases/test_witness.rs @@ -53,7 +53,7 @@ fn test_witness_update_region_in_local_reader() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request.clone(), Duration::from_millis(100)) + .read(None, None, request.clone(), Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_is_witness(), @@ -105,7 +105,7 @@ fn test_witness_not_reported_while_disabled() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request.clone(), Duration::from_millis(100)) + .read(None, None, request.clone(), Duration::from_millis(100)) .unwrap(); assert!(resp.get_header().has_error()); assert!(!resp.get_header().get_error().has_is_witness()); @@ -492,7 +492,7 @@ fn test_non_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request, Duration::from_millis(100)) + .read(None, None, request, Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_is_witness(), @@ -517,7 +517,7 @@ fn test_non_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request, Duration::from_millis(100)) + .read(None, None, request, Duration::from_millis(100)) .unwrap(); assert_eq!(resp.get_header().has_error(), false); } diff --git a/tests/integrations/raftstore/mod.rs b/tests/integrations/raftstore/mod.rs index 3bb93f6809b..998269afb98 100644 --- a/tests/integrations/raftstore/mod.rs +++ b/tests/integrations/raftstore/mod.rs @@ -15,6 +15,7 @@ mod test_life; mod test_merge; mod test_multi; mod test_prevote; +mod test_region_cache; mod test_region_change_observer; mod test_region_heartbeat; mod test_region_info_accessor; diff --git a/tests/integrations/raftstore/test_region_cache.rs b/tests/integrations/raftstore/test_region_cache.rs new file mode 100644 index 00000000000..4d95ff6701c --- /dev/null +++ b/tests/integrations/raftstore/test_region_cache.rs @@ -0,0 +1,17 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use test_raftstore::new_node_cluster_with_hybrid_engine; + +#[test] +fn test_basic_read() { + let _cluster = new_node_cluster_with_hybrid_engine(1, 3); + // todo(SpadeA): add test logic +} + +#[test] +fn test_read_index() { + let _cluster = new_node_cluster_with_hybrid_engine(1, 3); + // todo(SpadeA): add test logic +} + +// todo(SpadeA): more tests when other relevant modules are ready. diff --git a/tests/integrations/raftstore/test_witness.rs b/tests/integrations/raftstore/test_witness.rs index 7879ffc49be..e42ac75598e 100644 --- a/tests/integrations/raftstore/test_witness.rs +++ b/tests/integrations/raftstore/test_witness.rs @@ -474,7 +474,7 @@ fn test_witness_replica_read() { request.mut_header().set_replica_read(true); let resp = cluster - .read(None, request, Duration::from_millis(100)) + .read(None, None, request, Duration::from_millis(100)) .unwrap(); assert_eq!( resp.get_header().get_error().get_is_witness(), From fe80806fcab1832c3cc3684dcd50b67f388fb5c1 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 21 Dec 2023 13:09:23 +0800 Subject: [PATCH 1070/1149] metrics: change resource group label name (#16192) close tikv/tikv#16191 change metrics label name for resource-group-name from "name" to "resource_group". To be backward compatible with old grafana panel, we add a new label name and keep the old one. We are going to deprecate the old label in v8.0. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/resource_control/src/metrics.rs | 6 +++--- src/server/metrics.rs | 3 ++- src/server/service/kv.rs | 25 +++++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/components/resource_control/src/metrics.rs b/components/resource_control/src/metrics.rs index c9404092501..45723063492 100644 --- a/components/resource_control/src/metrics.rs +++ b/components/resource_control/src/metrics.rs @@ -7,19 +7,19 @@ lazy_static! { pub static ref BACKGROUND_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( "tikv_resource_control_background_quota_limiter", "The quota limiter of background resource groups per resource type", - &["name", "type"] + &["resource_group", "type"] ) .unwrap(); pub static ref BACKGROUND_RESOURCE_CONSUMPTION: IntCounterVec = register_int_counter_vec!( "tikv_resource_control_background_resource_consumption", "Total resource consumed of background resource groups per resource type", - &["name", "type"] + &["resource_group", "type"] ) .unwrap(); pub static ref BACKGROUND_TASKS_WAIT_DURATION: IntCounterVec = register_int_counter_vec!( "tikv_resource_control_background_task_wait_duration", "Total wait duration of background tasks per resource group", - &["name"] + &["resource_group"] ) .unwrap(); pub static ref PRIORITY_QUOTA_LIMIT_VEC: IntGaugeVec = register_int_gauge_vec!( diff --git a/src/server/metrics.rs b/src/server/metrics.rs index 3ad9c5bdde0..c55a0c0ae8a 100644 --- a/src/server/metrics.rs +++ b/src/server/metrics.rs @@ -217,10 +217,11 @@ lazy_static! { &["type"] ) .unwrap(); + // TODO: deprecate the "name" label in v8.0. pub static ref GRPC_RESOURCE_GROUP_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( "tikv_grpc_resource_group_total", "Total number of handle grpc message for each resource group", - &["name"] + &["name", "resource_group"] ) .unwrap(); pub static ref GRPC_PROXY_MSG_COUNTER_VEC: IntCounterVec = register_int_counter_vec!( diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 01aae59fe18..02bfca0473e 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -204,7 +204,7 @@ macro_rules! handle_request { resource_group_priority= ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); let resp = $future_name(&self.storage, req); let task = async move { @@ -493,7 +493,10 @@ impl Tikv for Service { } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ + resource_control_ctx.get_resource_group_name(), + resource_control_ctx.get_resource_group_name(), + ]) .inc(); let begin_instant = Instant::now(); @@ -535,7 +538,10 @@ impl Tikv for Service { ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ + resource_control_ctx.get_resource_group_name(), + resource_control_ctx.get_resource_group_name(), + ]) .inc(); let begin_instant = Instant::now(); @@ -629,7 +635,10 @@ impl Tikv for Service { ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ + resource_control_ctx.get_resource_group_name(), + resource_control_ctx.get_resource_group_name(), + ]) .inc(); let mut stream = self @@ -1185,7 +1194,7 @@ fn handle_batch_commands_request( } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[ resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[ resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_get(&req) @@ -1208,7 +1217,7 @@ fn handle_batch_commands_request( resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); if batcher.as_mut().map_or(false, |req_batch| { req_batch.can_batch_raw_get(&req) @@ -1231,7 +1240,7 @@ fn handle_batch_commands_request( resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority ); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); @@ -1268,7 +1277,7 @@ fn handle_batch_commands_request( resource_group_priority = ResourcePriority::from(resource_control_ctx.override_priority); } GRPC_RESOURCE_GROUP_COUNTER_VEC - .with_label_values(&[resource_control_ctx.get_resource_group_name()]) + .with_label_values(&[resource_control_ctx.get_resource_group_name(), resource_control_ctx.get_resource_group_name()]) .inc(); let begin_instant = Instant::now(); let source = req.get_context().get_request_source().to_owned(); From 01498b0519d89fd9f6b72546312eded1134a3a2b Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Thu, 21 Dec 2023 15:36:23 +0800 Subject: [PATCH 1071/1149] metric: add read scan duration panel (#16205) ref tikv/tikv#15066 Add scan lock read duration panel to check read lock holding time. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 11 +- metrics/grafana/tikv_details.json | 459 ++++++++++++++++------ metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 341 insertions(+), 131 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 966346f741e..1ed32eb6fe5 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -6852,7 +6852,16 @@ def PessimisticLocking() -> RowPanel: description="The length includes the entering transaction itself", yaxis=yaxis(format=UNITS.SHORT), metric="tikv_lock_wait_queue_length_bucket", - ) + ), + graph_panel_histogram_quantiles( + title="In-memory scan lock read duration", + description="The duration scan in-memory pessimistic locks with read lock", + yaxes=yaxes(left_format=UNITS.SECONDS, log_base=2), + metric="tikv_storage_mvcc_scan_lock_read_duration_seconds", + by_labels=["type"], + hide_count=True, + hide_avg=True, + ), ] ) return layout.row_panel diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 3af1480137c..5dd36b73dfb 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -51866,7 +51866,7 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, "y": 35 }, @@ -51939,6 +51939,207 @@ "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The duration scan in-memory pessimistic locks with read lock", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 371, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{type}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{type}}", + "metric": "", + "query": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "In-memory scan lock read duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } } ], "repeat": null, @@ -51975,7 +52176,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 372, "interval": null, "links": [], "maxDataPoints": 100, @@ -52014,7 +52215,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 372, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52147,7 +52348,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52310,7 +52511,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 374, + "id": 375, "interval": null, "isNew": true, "legend": { @@ -52458,7 +52659,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 375, + "id": 376, "interval": null, "isNew": true, "legend": { @@ -52598,7 +52799,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 376, + "id": 377, "interval": null, "legend": { "show": false @@ -52702,7 +52903,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 377, + "id": 378, "interval": null, "legend": { "show": false @@ -52806,7 +53007,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 378, + "id": 379, "interval": null, "legend": { "show": false @@ -52903,7 +53104,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 379, + "id": 380, "interval": null, "isNew": true, "legend": { @@ -53043,7 +53244,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 380, + "id": 381, "interval": null, "legend": { "show": false @@ -53147,7 +53348,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 381, + "id": 382, "interval": null, "legend": { "show": false @@ -53251,7 +53452,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 382, + "id": 383, "interval": null, "legend": { "show": false @@ -53348,7 +53549,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 383, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53481,7 +53682,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 384, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -53614,7 +53815,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 386, "interval": null, "isNew": true, "legend": { @@ -53754,7 +53955,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 386, + "id": 387, "interval": null, "legend": { "show": false @@ -53851,7 +54052,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 387, + "id": 388, "interval": null, "isNew": true, "legend": { @@ -53987,7 +54188,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 389, "interval": null, "links": [], "maxDataPoints": 100, @@ -54026,7 +54227,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 389, + "id": 390, "interval": null, "isNew": true, "legend": { @@ -54159,7 +54360,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54292,7 +54493,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 392, "interval": null, "isNew": true, "legend": { @@ -54425,7 +54626,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 392, + "id": 393, "interval": null, "isNew": true, "legend": { @@ -54558,7 +54759,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 393, + "id": 394, "interval": null, "isNew": true, "legend": { @@ -54691,7 +54892,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 394, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -54824,7 +55025,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 396, "interval": null, "isNew": true, "legend": { @@ -54964,7 +55165,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 396, + "id": 397, "interval": null, "legend": { "show": false @@ -55061,7 +55262,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 397, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55194,7 +55395,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 399, "interval": null, "isNew": true, "legend": { @@ -55334,7 +55535,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 399, + "id": 400, "interval": null, "legend": { "show": false @@ -55431,7 +55632,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -55564,7 +55765,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -55697,7 +55898,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -55830,7 +56031,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 404, "interval": null, "isNew": true, "legend": { @@ -55978,7 +56179,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 405, "interval": null, "isNew": true, "legend": { @@ -56126,7 +56327,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 405, + "id": 406, "interval": null, "isNew": true, "legend": { @@ -56262,7 +56463,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 406, + "id": 407, "interval": null, "links": [], "maxDataPoints": 100, @@ -56301,7 +56502,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 407, + "id": 408, "interval": null, "isNew": true, "legend": { @@ -56434,7 +56635,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 408, + "id": 409, "interval": null, "isNew": true, "legend": { @@ -56567,7 +56768,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 409, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -56700,7 +56901,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 410, + "id": 411, "interval": null, "isNew": true, "legend": { @@ -56836,7 +57037,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 412, "interval": null, "links": [], "maxDataPoints": 100, @@ -56875,7 +57076,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 412, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -57038,7 +57239,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 413, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57171,7 +57372,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 414, + "id": 415, "interval": null, "isNew": true, "legend": { @@ -57311,7 +57512,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 415, + "id": 416, "interval": null, "legend": { "show": false @@ -57415,7 +57616,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 416, + "id": 417, "interval": null, "legend": { "show": false @@ -57512,7 +57713,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 417, + "id": 418, "interval": null, "isNew": true, "legend": { @@ -57667,7 +57868,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 418, + "id": 419, "interval": null, "legend": { "show": false @@ -57771,7 +57972,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 419, + "id": 420, "interval": null, "legend": { "show": false @@ -57875,7 +58076,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 420, + "id": 421, "interval": null, "legend": { "show": false @@ -57972,7 +58173,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 421, + "id": 422, "interval": null, "isNew": true, "legend": { @@ -58142,7 +58343,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 422, + "id": 423, "interval": null, "legend": { "show": false @@ -58239,7 +58440,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 423, + "id": 424, "interval": null, "isNew": true, "legend": { @@ -58440,7 +58641,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 424, + "id": 425, "interval": null, "isNew": true, "legend": { @@ -58641,7 +58842,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 425, + "id": 426, "interval": null, "isNew": true, "legend": { @@ -58774,7 +58975,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 426, + "id": 427, "interval": null, "isNew": true, "legend": { @@ -58937,7 +59138,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 427, + "id": 428, "interval": null, "isNew": true, "legend": { @@ -59070,7 +59271,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 428, + "id": 429, "interval": null, "isNew": true, "legend": { @@ -59203,7 +59404,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 429, + "id": 430, "interval": null, "isNew": true, "legend": { @@ -59404,7 +59605,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 430, + "id": 431, "interval": null, "isNew": true, "legend": { @@ -59544,7 +59745,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 431, + "id": 432, "interval": null, "legend": { "show": false @@ -59648,7 +59849,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 432, + "id": 433, "interval": null, "legend": { "show": false @@ -59752,7 +59953,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 433, + "id": 434, "interval": null, "legend": { "show": false @@ -59856,7 +60057,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 434, + "id": 435, "interval": null, "legend": { "show": false @@ -59960,7 +60161,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 435, + "id": 436, "interval": null, "legend": { "show": false @@ -60064,7 +60265,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 436, + "id": 437, "interval": null, "legend": { "show": false @@ -60168,7 +60369,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 437, + "id": 438, "interval": null, "legend": { "show": false @@ -60265,7 +60466,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 438, + "id": 439, "interval": null, "isNew": true, "legend": { @@ -60413,7 +60614,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 440, "interval": null, "isNew": true, "legend": { @@ -60546,7 +60747,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -60679,7 +60880,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 441, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -60827,7 +61028,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 443, "interval": null, "isNew": true, "legend": { @@ -60963,7 +61164,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 444, "interval": null, "links": [], "maxDataPoints": 100, @@ -61002,7 +61203,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 444, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -61135,7 +61336,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -61268,7 +61469,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 446, + "id": 447, "interval": null, "isNew": true, "legend": { @@ -61401,7 +61602,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 447, + "id": 448, "interval": null, "isNew": true, "legend": { @@ -61534,7 +61735,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 448, + "id": 449, "interval": null, "isNew": true, "legend": { @@ -61682,7 +61883,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 449, + "id": 450, "interval": null, "isNew": true, "legend": { @@ -61886,7 +62087,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 450, + "id": 451, "interval": null, "links": [], "maxDataPoints": 100, @@ -61937,7 +62138,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 451, + "id": 452, "interval": null, "links": [], "maxDataPoints": 100, @@ -62033,7 +62234,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 452, + "id": 453, "interval": null, "links": [], "maxDataPoints": 100, @@ -62108,7 +62309,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 453, + "id": 454, "interval": null, "links": [], "maxDataPoints": 100, @@ -62183,7 +62384,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 454, + "id": 455, "interval": null, "links": [], "maxDataPoints": 100, @@ -62258,7 +62459,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 455, + "id": 456, "interval": null, "links": [], "maxDataPoints": 100, @@ -62333,7 +62534,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 456, + "id": 457, "interval": null, "links": [], "maxDataPoints": 100, @@ -62408,7 +62609,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 457, + "id": 458, "interval": null, "links": [], "maxDataPoints": 100, @@ -62483,7 +62684,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 458, + "id": 459, "interval": null, "links": [], "maxDataPoints": 100, @@ -62562,7 +62763,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -62695,7 +62896,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -62828,7 +63029,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 462, "interval": null, "isNew": true, "legend": { @@ -62961,7 +63162,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 463, "interval": null, "isNew": true, "legend": { @@ -63094,7 +63295,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63227,7 +63428,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 465, "interval": null, "isNew": true, "legend": { @@ -63375,7 +63576,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 466, "interval": null, "isNew": true, "legend": { @@ -63508,7 +63709,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 466, + "id": 467, "interval": null, "isNew": true, "legend": { @@ -63641,7 +63842,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 467, + "id": 468, "interval": null, "isNew": true, "legend": { @@ -63807,7 +64008,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 468, + "id": 469, "interval": null, "legend": { "show": false @@ -63911,7 +64112,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 469, + "id": 470, "interval": null, "legend": { "show": false @@ -64015,7 +64216,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 470, + "id": 471, "interval": null, "legend": { "show": false @@ -64119,7 +64320,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 471, + "id": 472, "interval": null, "legend": { "show": false @@ -64223,7 +64424,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 472, + "id": 473, "interval": null, "legend": { "show": false @@ -64327,7 +64528,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 473, + "id": 474, "interval": null, "legend": { "show": false @@ -64431,7 +64632,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 474, + "id": 475, "interval": null, "legend": { "show": false @@ -64535,7 +64736,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 475, + "id": 476, "interval": null, "legend": { "show": false @@ -64632,7 +64833,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 476, + "id": 477, "interval": null, "isNew": true, "legend": { @@ -64765,7 +64966,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 477, + "id": 478, "interval": null, "isNew": true, "legend": { @@ -64898,7 +65099,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -65031,7 +65232,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -65164,7 +65365,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -65297,7 +65498,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 481, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -65430,7 +65631,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 482, + "id": 483, "interval": null, "isNew": true, "legend": { @@ -65570,7 +65771,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 483, + "id": 484, "interval": null, "legend": { "show": false @@ -65674,7 +65875,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 484, + "id": 485, "interval": null, "legend": { "show": false @@ -65771,7 +65972,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 485, + "id": 486, "interval": null, "isNew": true, "legend": { @@ -65904,7 +66105,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 487, "interval": null, "isNew": true, "legend": { @@ -66037,7 +66238,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 488, "interval": null, "isNew": true, "legend": { @@ -66170,7 +66371,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 489, "interval": null, "isNew": true, "legend": { @@ -66303,7 +66504,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -66436,7 +66637,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 491, "interval": null, "isNew": true, "legend": { @@ -66572,7 +66773,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 492, "interval": null, "links": [], "maxDataPoints": 100, @@ -66611,7 +66812,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 493, "interval": null, "isNew": true, "legend": { @@ -66744,7 +66945,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, + "id": 494, "interval": null, "isNew": true, "legend": { @@ -66877,7 +67078,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 494, + "id": 495, "interval": null, "isNew": true, "legend": { @@ -67010,7 +67211,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 495, + "id": 496, "interval": null, "isNew": true, "legend": { @@ -67146,7 +67347,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 496, + "id": 497, "interval": null, "links": [], "maxDataPoints": 100, @@ -67185,7 +67386,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 497, + "id": 498, "interval": null, "isNew": true, "legend": { @@ -67386,7 +67587,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 498, + "id": 499, "interval": null, "isNew": true, "legend": { diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index d715ccca351..cc9c7769755 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -ac3bba8b714ed9cad64ece88ff1f7b4bb075ca178f270e7e1f41461d5ac37bbd ./metrics/grafana/tikv_details.json +75c3d3d71080a5e3bd40273bc2250797ab929e6c6ab46df89cad79d837531a2d ./metrics/grafana/tikv_details.json From a0e8a7a163302bc9a7be5fd5a903b6a156797eb8 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 21 Dec 2023 16:34:53 +0800 Subject: [PATCH 1072/1149] Revert "*: make unified-pool use FuturePool (#15925)" (#16050) close tikv/tikv#16015 Revert "*: make unified-pool use FuturePool (#15925)" - revert due to performance regression Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/yatp_pool/mod.rs | 15 +-- src/read_pool.rs | 155 +++++++++++++--------- 2 files changed, 97 insertions(+), 73 deletions(-) diff --git a/components/tikv_util/src/yatp_pool/mod.rs b/components/tikv_util/src/yatp_pool/mod.rs index 0b4cffbdc14..cfdfc540b30 100644 --- a/components/tikv_util/src/yatp_pool/mod.rs +++ b/components/tikv_util/src/yatp_pool/mod.rs @@ -391,7 +391,7 @@ impl YatpPoolBuilder { FuturePool::from_pool(pool, &name, size, task) } - fn build_single_level_pool(self) -> ThreadPool { + pub fn build_single_level_pool(self) -> ThreadPool { let (builder, runner) = self.create_builder(); builder.build_with_queue_and_runner( yatp::queue::QueueType::SingleLevel, @@ -399,18 +399,7 @@ impl YatpPoolBuilder { ) } - pub fn build_multi_level_future_pool(self) -> FuturePool { - let name = self - .name_prefix - .clone() - .unwrap_or_else(|| "yatp_pool".to_string()); - let size = self.core_thread_count; - let task = self.max_tasks; - let pool = self.build_multi_level_pool(); - FuturePool::from_pool(pool, &name, size, task) - } - - fn build_multi_level_pool(self) -> ThreadPool { + pub fn build_multi_level_pool(self) -> ThreadPool { let name = self .name_prefix .clone() diff --git a/src/read_pool.rs b/src/read_pool.rs index 111d3f0ce8a..2ea6c7e36b2 100644 --- a/src/read_pool.rs +++ b/src/read_pool.rs @@ -27,7 +27,10 @@ use tikv_util::{ worker::{Runnable, RunnableWithTimer, Scheduler, Worker}, yatp_pool::{self, CleanupMethod, FuturePool, PoolTicker, YatpPoolBuilder}, }; -use yatp::{metrics::MULTILEVEL_LEVEL_ELAPSED, queue::Extras}; +use tracker::TrackedFuture; +use yatp::{ + metrics::MULTILEVEL_LEVEL_ELAPSED, pool::Remote, queue::Extras, task::future::TaskCell, +}; use self::metrics::*; use crate::{ @@ -53,9 +56,11 @@ pub enum ReadPool { read_pool_low: FuturePool, }, Yatp { - pool: FuturePool, - // deprecated. will remove in the v8.x. + pool: yatp::ThreadPool, running_tasks: IntGauge, + running_threads: IntGauge, + max_tasks: usize, + pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -76,11 +81,17 @@ impl ReadPool { ReadPool::Yatp { pool, running_tasks, + running_threads, + max_tasks, + pool_size, resource_ctl, time_slice_inspector, } => ReadPoolHandle::Yatp { - remote: pool.clone(), + remote: pool.remote().clone(), running_tasks: running_tasks.clone(), + running_threads: running_threads.clone(), + max_tasks: *max_tasks, + pool_size: *pool_size, resource_ctl: resource_ctl.clone(), time_slice_inspector: time_slice_inspector.clone(), }, @@ -96,8 +107,11 @@ pub enum ReadPoolHandle { read_pool_low: FuturePool, }, Yatp { - remote: FuturePool, + remote: Remote, running_tasks: IntGauge, + running_threads: IntGauge, + max_tasks: usize, + pool_size: usize, resource_ctl: Option>, time_slice_inspector: Arc, }, @@ -132,10 +146,19 @@ impl ReadPoolHandle { ReadPoolHandle::Yatp { remote, running_tasks, + max_tasks, resource_ctl, .. } => { let running_tasks = running_tasks.clone(); + // Note that the running task number limit is not strict. + // If several tasks are spawned at the same time while the running task number + // is close to the limit, they may all pass this check and the number of running + // tasks may exceed the limit. + if running_tasks.get() as usize >= *max_tasks { + return Err(ReadPoolError::UnifiedReadPoolFull); + } + running_tasks.inc(); let fixed_level = match priority { CommandPri::High => Some(0), @@ -145,33 +168,31 @@ impl ReadPoolHandle { let group_name = metadata.group_name().to_owned(); let mut extras = Extras::new_multilevel(task_id, fixed_level); extras.set_metadata(metadata.to_vec()); - let running_tasks1 = running_tasks.clone(); - if let Some(resource_ctl) = resource_ctl { - let fut = with_resource_limiter( - ControlledFuture::new( - async move { - f.await; - running_tasks.dec(); - }, - resource_ctl.clone(), - group_name, - ), - resource_limiter, - ); - remote.spawn_with_extras(fut, extras).map_err(|e| { - running_tasks1.dec(); - e - })?; + let task_cell = if let Some(resource_ctl) = resource_ctl { + TaskCell::new( + TrackedFuture::new(with_resource_limiter( + ControlledFuture::new( + async move { + f.await; + running_tasks.dec(); + }, + resource_ctl.clone(), + group_name, + ), + resource_limiter, + )), + extras, + ) } else { - let fut = async move { - f.await; - running_tasks.dec(); - }; - remote.spawn_with_extras(fut, extras).map_err(|e| { - running_tasks1.dec(); - e - })?; - } + TaskCell::new( + TrackedFuture::new(async move { + f.await; + running_tasks.dec(); + }), + extras, + ) + }; + remote.spawn(task_cell); } } Ok(()) @@ -211,7 +232,7 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { read_pool_normal, .. } => read_pool_normal.get_pool_size(), - ReadPoolHandle::Yatp { remote, .. } => remote.get_pool_size(), + ReadPoolHandle::Yatp { pool_size, .. } => *pool_size, } } @@ -221,10 +242,10 @@ impl ReadPoolHandle { read_pool_normal, .. } => read_pool_normal.get_running_task_count() / read_pool_normal.get_pool_size(), ReadPoolHandle::Yatp { - remote, running_tasks, + pool_size, .. - } => running_tasks.get() as usize / remote.get_pool_size(), + } => running_tasks.get() as usize / *pool_size, } } @@ -233,19 +254,34 @@ impl ReadPoolHandle { ReadPoolHandle::FuturePools { .. } => { unreachable!() } - ReadPoolHandle::Yatp { remote, .. } => { - remote.scale_pool_size(max_thread_count); + ReadPoolHandle::Yatp { + remote, + running_threads, + max_tasks, + pool_size, + .. + } => { + remote.scale_workers(max_thread_count); + *max_tasks = max_tasks + .saturating_div(*pool_size) + .saturating_mul(max_thread_count); + running_threads.set(max_thread_count as i64); + *pool_size = max_thread_count; } } } - pub fn set_max_tasks_per_worker(&self, tasks_per_thread: usize) { + pub fn set_max_tasks_per_worker(&mut self, tasks_per_thread: usize) { match self { ReadPoolHandle::FuturePools { .. } => { unreachable!() } - ReadPoolHandle::Yatp { remote, .. } => { - remote.set_max_tasks_per_worker(tasks_per_thread); + ReadPoolHandle::Yatp { + max_tasks, + pool_size, + .. + } => { + *max_tasks = tasks_per_thread.saturating_mul(*pool_size); } } } @@ -452,11 +488,6 @@ pub fn build_yatp_read_pool_with_name( config.max_thread_count, ), ) - .max_tasks( - config - .max_tasks_per_worker - .saturating_mul(config.max_thread_count), - ) .after_start(move || { let engine = raftkv.lock().unwrap().clone(); set_tls_engine(engine); @@ -468,15 +499,21 @@ pub fn build_yatp_read_pool_with_name( .enable_task_wait_metrics(enable_task_wait_metrics); let pool = if let Some(ref r) = resource_ctl { - builder.build_priority_future_pool(r.clone()) + builder.build_priority_pool(r.clone()) } else { - builder.build_multi_level_future_pool() + builder.build_multi_level_pool() }; let time_slice_inspector = Arc::new(TimeSliceInspector::new(&unified_read_pool_name)); ReadPool::Yatp { pool, running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS .with_label_values(&[&unified_read_pool_name]), + running_threads: UNIFIED_READ_POOL_RUNNING_THREADS + .with_label_values(&[&unified_read_pool_name]), + max_tasks: config + .max_tasks_per_worker + .saturating_mul(config.max_thread_count), + pool_size: config.max_thread_count, resource_ctl, time_slice_inspector, } @@ -754,6 +791,12 @@ mod metrics { &["name"] ) .unwrap(); + pub static ref UNIFIED_READ_POOL_RUNNING_THREADS: IntGaugeVec = register_int_gauge_vec!( + "tikv_unified_read_pool_thread_count", + "The number of running threads in the unified read pool", + &["name"] + ) + .unwrap(); } } @@ -762,8 +805,6 @@ mod tests { use std::{thread, time::Duration}; use futures::channel::oneshot; - use futures_executor::block_on; - use kvproto::kvrpcpb::ResourceControlContext; use raftstore::store::{ReadStats, WriteStats}; use resource_control::ResourceGroupManager; @@ -823,7 +864,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } tx1.send(()).unwrap(); @@ -884,7 +925,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } @@ -897,7 +938,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } } @@ -946,18 +987,12 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task3, CommandPri::Normal, 3, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } + // TODO: move running task by priority to read_pool. // spawn a high-priority task, should not return Full error. - let (task_high, tx_h) = gen_task(); - let mut ctx = ResourceControlContext::default(); - ctx.override_priority = 16; // high priority - let metadata = TaskMetadata::from_ctx(&ctx); - let f = handle.spawn_handle(task_high, CommandPri::Normal, 6, metadata, None); - tx_h.send(()).unwrap(); - block_on(f).unwrap(); tx1.send(()).unwrap(); tx2.send(()).unwrap(); @@ -972,7 +1007,7 @@ mod tests { thread::sleep(Duration::from_millis(300)); match handle.spawn(task5, CommandPri::Normal, 5, TaskMetadata::default(), None) { - Err(ReadPoolError::FuturePoolFull(..)) => {} + Err(ReadPoolError::UnifiedReadPoolFull) => {} _ => panic!("should return full error"), } } From 3d8f625b9e8d70663be247c85750feb2a11f27c7 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 21 Dec 2023 22:51:23 +0800 Subject: [PATCH 1073/1149] tikv_util: add handled task counter for LazyWorker (#16196) close tikv/tikv#16195 Add a metric to inspect the total number of tasks handled by a LazyWorker. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/worker/mod.rs | 4 +- components/tikv_util/src/worker/pool.rs | 99 ++++++++++++++----------- 2 files changed, 59 insertions(+), 44 deletions(-) diff --git a/components/tikv_util/src/worker/mod.rs b/components/tikv_util/src/worker/mod.rs index cba3a9989cb..63240a7c298 100644 --- a/components/tikv_util/src/worker/mod.rs +++ b/components/tikv_util/src/worker/mod.rs @@ -101,7 +101,7 @@ mod tests { assert!(worker.is_busy()); drop(worker); // when shutdown, StepRunner should send back a 0. - assert_eq!(0, rx.recv().unwrap()); + assert_eq!(0, rx.recv_timeout(Duration::from_secs(3)).unwrap()); } #[test] @@ -116,7 +116,7 @@ mod tests { assert_eq!(rx.recv_timeout(Duration::from_secs(3)).unwrap(), 90); assert_eq!(rx.recv_timeout(Duration::from_secs(3)).unwrap(), 110); worker.stop(); - assert_eq!(0, rx.recv().unwrap()); + assert_eq!(0, rx.recv_timeout(Duration::from_secs(3)).unwrap()); } #[test] diff --git a/components/tikv_util/src/worker/pool.rs b/components/tikv_util/src/worker/pool.rs index a22732a7aae..a5c9e1b323a 100644 --- a/components/tikv_util/src/worker/pool.rs +++ b/components/tikv_util/src/worker/pool.rs @@ -19,7 +19,7 @@ use futures::{ future::FutureExt, stream::StreamExt, }; -use prometheus::IntGauge; +use prometheus::{IntCounter, IntGauge}; use yatp::Remote; use super::metrics::*; @@ -92,6 +92,29 @@ enum Msg { Timeout, } +// A wrapper of Runnable that implements RunnableWithTimer with no timeout. +struct NoTimeoutRunnableWrapper(T); + +impl Runnable for NoTimeoutRunnableWrapper { + type Task = T::Task; + fn run(&mut self, task: Self::Task) { + self.0.run(task) + } + fn on_tick(&mut self) { + self.0.on_tick() + } + fn shutdown(&mut self) { + self.0.shutdown() + } +} + +impl RunnableWithTimer for NoTimeoutRunnableWrapper { + fn on_timeout(&mut self) {} + fn get_interval(&self) -> Duration { + Duration::ZERO + } +} + /// Scheduler provides interface to schedule task to underlying workers. pub struct Scheduler { counter: Arc, @@ -174,6 +197,7 @@ pub struct LazyWorker { worker: Worker, receiver: Option>>, metrics_pending_task_count: IntGauge, + metrics_handled_task_count: IntCounter, } impl LazyWorker { @@ -184,12 +208,8 @@ impl LazyWorker { } pub fn start>(&mut self, runner: R) -> bool { - if let Some(receiver) = self.receiver.take() { - self.worker - .start_impl(runner, receiver, self.metrics_pending_task_count.clone()); - return true; - } - false + let no_timeout_runner = NoTimeoutRunnableWrapper(runner); + self.start_with_timer(no_timeout_runner) } pub fn start_with_timer>( @@ -202,6 +222,7 @@ impl LazyWorker { self.scheduler.sender.clone(), receiver, self.metrics_pending_task_count.clone(), + self.metrics_handled_task_count.clone(), ); return true; } @@ -340,15 +361,8 @@ impl Worker { name: S, runner: R, ) -> Scheduler { - let (tx, rx) = unbounded(); - let metrics_pending_task_count = WORKER_PENDING_TASK_VEC.with_label_values(&[&name.into()]); - self.start_impl(runner, rx, metrics_pending_task_count.clone()); - Scheduler::new( - tx, - self.counter.clone(), - self.pending_capacity, - metrics_pending_task_count, - ) + let no_timeout_runner = NoTimeoutRunnableWrapper(runner); + self.start_with_timer(name, no_timeout_runner) } pub fn start_with_timer>( @@ -357,8 +371,16 @@ impl Worker { runner: R, ) -> Scheduler { let (tx, rx) = unbounded(); - let metrics_pending_task_count = WORKER_PENDING_TASK_VEC.with_label_values(&[&name.into()]); - self.start_with_timer_impl(runner, tx.clone(), rx, metrics_pending_task_count.clone()); + let name = name.into(); + let metrics_pending_task_count = WORKER_PENDING_TASK_VEC.with_label_values(&[&name]); + let metrics_handled_task_count = WORKER_HANDLED_TASK_VEC.with_label_values(&[&name]); + self.start_with_timer_impl( + runner, + tx.clone(), + rx, + metrics_pending_task_count.clone(), + metrics_handled_task_count, + ); Scheduler::new( tx, self.counter.clone(), @@ -410,7 +432,13 @@ impl Worker { let _ = self.pool.spawn(f); } - fn delay_notify(tx: UnboundedSender>, timeout: Duration) { + fn delay_notify( + tx: Option>>, + timeout: Duration, + ) { + let Some(tx) = tx else { + return; + }; let now = Instant::now(); let f = GLOBAL_TIMER_HANDLE .delay(now + timeout) @@ -426,7 +454,9 @@ impl Worker { name: S, ) -> LazyWorker { let (tx, rx) = unbounded(); - let metrics_pending_task_count = WORKER_PENDING_TASK_VEC.with_label_values(&[&name.into()]); + let name = name.into(); + let metrics_pending_task_count = WORKER_PENDING_TASK_VEC.with_label_values(&[&name]); + let metrics_handled_task_count = WORKER_HANDLED_TASK_VEC.with_label_values(&[&name]); LazyWorker { receiver: Some(rx), worker: self.clone(), @@ -437,6 +467,7 @@ impl Worker { metrics_pending_task_count.clone(), ), metrics_pending_task_count, + metrics_handled_task_count, } } @@ -464,39 +495,19 @@ impl Worker { self.pool.clone() } - fn start_impl( - &self, - runner: R, - mut receiver: UnboundedReceiver>, - metrics_pending_task_count: IntGauge, - ) { - let counter = self.counter.clone(); - let _ = self.pool.spawn(async move { - let mut handle = RunnableWrapper { inner: runner }; - while let Some(msg) = receiver.next().await { - match msg { - Msg::Task(task) => { - handle.inner.run(task); - counter.fetch_sub(1, Ordering::SeqCst); - metrics_pending_task_count.dec(); - } - Msg::Timeout => (), - } - } - }); - } - fn start_with_timer_impl( &self, runner: R, tx: UnboundedSender>, mut receiver: UnboundedReceiver>, metrics_pending_task_count: IntGauge, + metrics_handled_task_count: IntCounter, ) where R: RunnableWithTimer + 'static, { let counter = self.counter.clone(); let timeout = runner.get_interval(); + let tx = if !timeout.is_zero() { Some(tx) } else { None }; Self::delay_notify(tx.clone(), timeout); let _ = self.pool.spawn(async move { let mut handle = RunnableWrapper { inner: runner }; @@ -506,6 +517,7 @@ impl Worker { handle.inner.run(task); counter.fetch_sub(1, Ordering::SeqCst); metrics_pending_task_count.dec(); + metrics_handled_task_count.inc(); } Msg::Timeout => { handle.inner.on_timeout(); @@ -593,5 +605,8 @@ mod tests { // The worker need some time to trigger shutdown. std::thread::sleep(Duration::from_millis(50)); assert_eq!(12, count.load(atomic::Ordering::SeqCst)); + + // Handled task must be 3. + assert_eq!(3, worker.metrics_handled_task_count.get()); } } From 669dc7925fda34cefa19d4af3b7e3daace991baf Mon Sep 17 00:00:00 2001 From: Liqi Geng Date: Fri, 22 Dec 2023 11:54:27 +0800 Subject: [PATCH 1074/1149] expression: `cast_duration_as_time` should not consider time zone (#16212) close tikv/tikv#16211 `cast_duration_as_time` should not consider time zone Signed-off-by: gengliqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../src/codec/mysql/time/mod.rs | 5 +---- components/tidb_query_expr/src/impl_cast.rs | 15 +++++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 4c6c2f676d7..7b1ad248d2a 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -1349,10 +1349,7 @@ impl Time { ) -> Result { let dur = chrono::Duration::nanoseconds(duration.to_nanos()); - let time = Utc::today() - .and_hms(0, 0, 0) - .checked_add_signed(dur) - .map(|utc| utc.with_timezone(&ctx.cfg.tz)); + let time = Utc::today().and_hms(0, 0, 0).checked_add_signed(dur); let time = time.ok_or::(box_err!("parse from duration {} overflows", duration))?; diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 16e33e71d13..8913e9573f8 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -1612,7 +1612,7 @@ mod tests { mysql::{ charset::*, decimal::{max_decimal, max_or_min_dec}, - Decimal, Duration, Json, RoundMode, Time, TimeType, MAX_FSP, MIN_FSP, + Decimal, Duration, Json, RoundMode, Time, TimeType, Tz, MAX_FSP, MIN_FSP, }, }, expr::{EvalConfig, EvalContext, Flag}, @@ -2933,13 +2933,20 @@ mod tests { fn test_cast_duration_as_time() { use chrono::Datelike; - let cases = vec!["11:30:45.123456", "-35:30:46"]; + let cases = vec!["11:30:45.123456", "-35:30:46", "25:59:59.999999"]; for case in cases { - let mut ctx = EvalContext::default(); - + let mut cfg = EvalConfig::default(); + cfg.tz = Tz::from_tz_name("America/New_York").unwrap(); + let mut ctx = EvalContext::new(Arc::new(cfg)); let duration = Duration::parse(&mut ctx, case, MAX_FSP).unwrap(); + + let mut cfg2 = EvalConfig::default(); + cfg2.tz = Tz::from_tz_name("Asia/Tokyo").unwrap(); + let ctx2 = EvalContext::new(Arc::new(cfg2)); + let now = RpnFnScalarEvaluator::new() + .context(ctx2) .push_param(duration) .return_field_type( FieldTypeBuilder::new() From 0ec4d33e57cfa36254f0adf7def3df382a720e12 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 22 Dec 2023 15:24:24 +0800 Subject: [PATCH 1075/1149] tidb_query_datatype: fix timezone conversion by upgrading chrono-tz (#16221) ref tikv/tikv#16220, close pingcap/tidb#49586 Brazil no longer observes DST since 2020[1]. Updating chrono-tz from 0.5.1 to 0.5.2 bumps the timezone database from 2018i to 2020a, which includes this change, thus fixes the issue. [1]: https://en.wikipedia.org/wiki/Daylight_saving_time_in_Brazil Signed-off-by: Neil Shen Co-authored-by: Wenxuan --- Cargo.lock | 8 +-- .../src/codec/mysql/time/mod.rs | 54 +++++++++++++++---- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f19e8d287f..cb97e90095d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -891,9 +891,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.5.1" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0e430fad0384e4defc3dc6b1223d1b886087a8bf9b7080e5ae027f73851ea15" +checksum = "2554a3155fec064362507487171dcc4edc3df60cb10f3a1fb10ed8094822b120" dependencies = [ "chrono", "parse-zoneinfo", @@ -3569,9 +3569,9 @@ dependencies = [ [[package]] name = "parse-zoneinfo" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "089a398ccdcdd77b8c38909d5a1e4b67da1bc4c9dbfe6d5b536c828eddb779e5" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" dependencies = [ "regex", ] diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 7b1ad248d2a..41131fc5933 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -2401,15 +2401,19 @@ mod tests { #[test] fn test_parse_time_with_tz() -> Result<()> { - let ctx_with_tz = |tz: &str| { + let ctx_with_tz = |tz: &str, by_offset: bool| { let mut cfg = EvalConfig::default(); - let raw = tz.as_bytes(); - // brutally turn timezone in format +08:00 into offset in minute - let offset = if raw[0] == b'-' { -1 } else { 1 } - * ((raw[1] - b'0') as i64 * 10 + (raw[2] - b'0') as i64) - * 60 - + ((raw[4] - b'0') as i64 * 10 + (raw[5] - b'0') as i64); - cfg.set_time_zone_by_offset(offset * 60).unwrap(); + if by_offset { + let raw = tz.as_bytes(); + // brutally turn timezone in format +08:00 into offset in minute + let offset = if raw[0] == b'-' { -1 } else { 1 } + * ((raw[1] - b'0') as i64 * 10 + (raw[2] - b'0') as i64) + * 60 + + ((raw[4] - b'0') as i64 * 10 + (raw[5] - b'0') as i64); + cfg.set_time_zone_by_offset(offset * 60).unwrap(); + } else { + cfg.set_time_zone_by_name(tz).unwrap(); + } let warnings = cfg.new_eval_warnings(); EvalContext { cfg: Arc::new(cfg), @@ -2418,6 +2422,7 @@ mod tests { }; struct Case { tz: &'static str, + by_offset: bool, t: &'static str, r: Option<&'static str>, tp: TimeType, @@ -2425,60 +2430,70 @@ mod tests { let cases = vec![ Case { tz: "+00:00", + by_offset: true, t: "2020-10-10T10:10:10Z", r: Some("2020-10-10 10:10:10.000000"), tp: TimeType::DateTime, }, Case { tz: "+00:00", + by_offset: true, t: "2020-10-10T10:10:10+", r: None, tp: TimeType::DateTime, }, Case { tz: "+00:00", + by_offset: true, t: "2020-10-10T10:10:10+14:01", r: None, tp: TimeType::DateTime, }, Case { tz: "+00:00", + by_offset: true, t: "2020-10-10T10:10:10-00:00", r: None, tp: TimeType::DateTime, }, Case { tz: "-08:00", + by_offset: true, t: "2020-10-10T10:10:10-08", r: Some("2020-10-10 10:10:10.000000"), tp: TimeType::DateTime, }, Case { tz: "+08:00", + by_offset: true, t: "2020-10-10T10:10:10+08:00", r: Some("2020-10-10 10:10:10.000000"), tp: TimeType::DateTime, }, Case { tz: "+08:00", + by_offset: true, t: "2020-10-10T10:10:10+08:00", r: Some("2020-10-10 10:10:10.000000"), tp: TimeType::Timestamp, }, Case { tz: "+08:00", + by_offset: true, t: "2022-06-02T10:10:10Z", r: Some("2022-06-02 18:10:10.000000"), tp: TimeType::DateTime, }, Case { tz: "-08:00", + by_offset: true, t: "2022-06-02T10:10:10Z", r: Some("2022-06-02 02:10:10.000000"), tp: TimeType::DateTime, }, Case { tz: "+06:30", + by_offset: true, t: "2022-06-02T10:10:10-05:00", r: Some("2022-06-02 21:40:10.000000"), tp: TimeType::DateTime, @@ -2486,26 +2501,45 @@ mod tests { // Time with fraction Case { tz: "+08:00", + by_offset: true, t: "2022-06-02T10:10:10.123Z", r: Some("2022-06-02 18:10:10.123000"), tp: TimeType::DateTime, }, Case { tz: "-08:00", + by_offset: true, t: "2022-06-02T10:10:10.123Z", r: Some("2022-06-02 02:10:10.123000"), tp: TimeType::DateTime, }, Case { tz: "+06:30", + by_offset: true, t: "2022-06-02T10:10:10.654321-05:00", r: Some("2022-06-02 21:40:10.654321"), tp: TimeType::DateTime, }, + Case { + // Note: this case may fail if Brazil observes DST again. + // See https://github.com/pingcap/tidb/issues/49586 + tz: "Brazil/East", + by_offset: false, + t: "2023-11-30T17:02:00.654321+00:00", + r: Some("2023-11-30 14:02:00.654321"), + tp: TimeType::DateTime, + }, ]; let mut result: Vec> = vec![]; - for Case { tz, t, r: _, tp } in &cases { - let mut ctx = ctx_with_tz(tz); + for Case { + tz, + by_offset, + t, + r: _, + tp, + } in &cases + { + let mut ctx = ctx_with_tz(tz, *by_offset); let parsed = Time::parse(&mut ctx, t, *tp, 6, true); match parsed { Ok(p) => result.push(Some(p.to_string())), From 5e0d7b1ba9430569886de26b51ca553337a36455 Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 22 Dec 2023 16:36:25 +0800 Subject: [PATCH 1076/1149] *: fix issue of modify resolved-ts.advance-ts-interval from 5s to 2s is not work (#15836) close tikv/tikv#15835 Signed-off-by: crazycs520 --- components/resolved_ts/src/advance.rs | 5 +---- components/resolved_ts/src/endpoint.rs | 8 ++------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/components/resolved_ts/src/advance.rs b/components/resolved_ts/src/advance.rs index dd6e9c2002c..856d042a75d 100644 --- a/components/resolved_ts/src/advance.rs +++ b/components/resolved_ts/src/advance.rs @@ -51,7 +51,6 @@ const DEFAULT_GRPC_MIN_MESSAGE_SIZE_TO_COMPRESS: usize = 4096; pub struct AdvanceTsWorker { pd_client: Arc, - advance_ts_interval: Duration, timer: SteadyTimer, worker: Runtime, scheduler: Scheduler, @@ -65,7 +64,6 @@ pub struct AdvanceTsWorker { impl AdvanceTsWorker { pub fn new( - advance_ts_interval: Duration, pd_client: Arc, scheduler: Scheduler, concurrency_manager: ConcurrencyManager, @@ -81,7 +79,6 @@ impl AdvanceTsWorker { scheduler, pd_client, worker, - advance_ts_interval, timer: SteadyTimer::default(), concurrency_manager, last_pd_tso: Arc::new(std::sync::Mutex::new(None)), @@ -104,7 +101,7 @@ impl AdvanceTsWorker { let timeout = self.timer.delay(advance_ts_interval); let min_timeout = self.timer.delay(cmp::min( DEFAULT_CHECK_LEADER_TIMEOUT_DURATION, - self.advance_ts_interval, + advance_ts_interval, )); let last_pd_tso = self.last_pd_tso.clone(); diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index 9de21b27d9e..a668d8b0f52 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -658,12 +658,8 @@ where let meta = store_meta.lock().unwrap(); (meta.region_read_progress().clone(), meta.store_id()) }; - let advance_worker = AdvanceTsWorker::new( - cfg.advance_ts_interval.0, - pd_client.clone(), - scheduler.clone(), - concurrency_manager, - ); + let advance_worker = + AdvanceTsWorker::new(pd_client.clone(), scheduler.clone(), concurrency_manager); let scanner_pool = ScannerPool::new(cfg.scan_lock_pool_size, cdc_handle); let store_resolver_gc_interval = Duration::from_secs(60); let leader_resolver = LeadershipResolver::new( From fc04a50eec68159557468776ede09f2d49b0d463 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 25 Dec 2023 14:33:26 +0800 Subject: [PATCH 1077/1149] In-memory Engine: implement read flow -- support sequence number (#16193) ref tikv/tikv#16141 implement read flow -- support sequence number Signed-off-by: SpadeA-Tang --- .../region_cache_memory_engine/src/engine.rs | 1156 ++++++++++++++--- .../region_cache_memory_engine/src/keys.rs | 195 +++ .../region_cache_memory_engine/src/lib.rs | 1 + 3 files changed, 1169 insertions(+), 183 deletions(-) create mode 100644 components/region_cache_memory_engine/src/keys.rs diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index a8ee66a5b23..6d284cefdd1 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -15,9 +15,14 @@ use engine_traits::{ RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT, CF_LOCK, CF_WRITE, }; -use skiplist_rs::{ByteWiseComparator, IterRef, Skiplist}; +use skiplist_rs::{IterRef, Skiplist}; use tikv_util::config::ReadableSize; +use crate::keys::{ + decode_key, encode_seek_key, InternalKey, InternalKeyComparator, ValueType, + VALUE_TYPE_FOR_SEEK, VALUE_TYPE_FOR_SEEK_FOR_PREV, +}; + fn cf_to_id(cf: &str) -> usize { match cf { CF_DEFAULT => 0, @@ -33,7 +38,7 @@ fn cf_to_id(cf: &str) -> usize { /// with a formal implementation. #[derive(Clone)] pub struct RegionMemoryEngine { - data: [Arc>; 3], + data: [Arc>; 3], } impl RegionMemoryEngine { @@ -41,17 +46,17 @@ impl RegionMemoryEngine { RegionMemoryEngine { data: [ Arc::new(Skiplist::with_capacity( - ByteWiseComparator::default(), + InternalKeyComparator::default(), arena_size, true, )), Arc::new(Skiplist::with_capacity( - ByteWiseComparator::default(), + InternalKeyComparator::default(), arena_size, true, )), Arc::new(Skiplist::with_capacity( - ByteWiseComparator::default(), + InternalKeyComparator::default(), arena_size, true, )), @@ -201,15 +206,33 @@ impl WriteBatchExt for RegionCacheMemoryEngine { } } +#[derive(PartialEq)] +enum Direction { + Uninit, + Forward, + Backward, +} + pub struct RegionCacheIterator { cf: String, valid: bool, prefix_same_as_start: bool, prefix: Option>, - iter: IterRef, ByteWiseComparator>, + iter: IterRef, InternalKeyComparator>, // The lower bound is inclusive while the upper bound is exclusive if set + // Note: bounds (region boundaries) have no mvcc versions lower_bound: Vec, upper_bound: Vec, + // A snapshot sequence number passed from RocksEngine Snapshot to guarantee suitable + // visibility. + sequence_number: u64, + + saved_user_key: Vec, + // This is only used by backwawrd iteration where the value we want may not be pointed by the + // `iter` + saved_value: Option, + + direction: Direction, } impl Iterable for RegionCacheMemoryEngine { @@ -220,82 +243,228 @@ impl Iterable for RegionCacheMemoryEngine { } } +impl RegionCacheIterator { + // If `skipping_saved_key` is true, the function will keep iterating until it + // finds a user key that is larger than `saved_user_key`. + // If `prefix` is not None, the iterator needs to stop when all keys for the + // prefix are exhausted and the iterator is set to invalid. + fn find_next_visible_key(&mut self, mut skip_saved_key: bool) { + while self.iter.valid() { + let InternalKey { + user_key, + sequence, + v_type, + } = decode_key(self.iter.key().as_slice()); + + if user_key >= self.upper_bound.as_slice() { + break; + } + + if self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + + if self.is_visible(sequence) { + if skip_saved_key && user_key == self.saved_user_key.as_slice() { + // the user key has been met before, skip it. + // todo(SpadeA): add metrics if neede + } else { + self.saved_user_key.clear(); + self.saved_user_key.extend_from_slice(user_key); + + match v_type { + ValueType::Deletion => { + skip_saved_key = true; + } + ValueType::Value => { + self.valid = true; + return; + } + } + } + } else if skip_saved_key && user_key > self.saved_user_key.as_slice() { + // user key changed, so no need to skip it + skip_saved_key = false; + } + + self.iter.next(); + } + + self.valid = false; + } + + fn is_visible(&self, seq: u64) -> bool { + seq <= self.sequence_number + } + + fn seek_internal(&mut self, key: &[u8]) -> Result { + self.iter.seek(key); + if self.iter.valid() { + self.find_next_visible_key(false); + } + Ok(self.valid) + } + + fn seek_for_prev_internal(&mut self, key: &[u8]) -> Result { + self.iter.seek_for_prev(key); + self.prev_internal(); + + Ok(self.valid) + } + + fn prev_internal(&mut self) { + while self.iter.valid() { + let InternalKey { user_key, .. } = decode_key(self.iter.key()); + self.saved_user_key.clear(); + self.saved_user_key.extend_from_slice(user_key); + + if user_key < self.lower_bound.as_slice() { + break; + } + + if self.prefix_same_as_start { + // todo(SpadeA): support prefix seek + unimplemented!() + } + + if !self.find_value_for_current_key() { + return; + } + + self.find_user_key_before_saved(); + + if self.valid { + return; + } + } + + // We have not found any key + self.valid = false; + } + + // Used for backwards iteration. + // Looks at the entries with user key `saved_user_key` and finds the most + // up-to-date value for it. Sets `valid`` to true if the value is found and is + // ready to be presented to the user through value(). + fn find_value_for_current_key(&mut self) -> bool { + assert!(self.iter.valid()); + let mut last_key_entry_type = ValueType::Deletion; + while self.iter.valid() { + let InternalKey { + user_key, + sequence, + v_type, + } = decode_key(self.iter.key()); + + if !self.is_visible(sequence) || self.saved_user_key != user_key { + // no further version is visible or the user key changed + break; + } + + last_key_entry_type = v_type; + match v_type { + ValueType::Value => { + self.saved_value = Some(self.iter.value().clone()); + } + ValueType::Deletion => { + self.saved_value.take(); + } + } + + self.iter.prev(); + } + + self.valid = last_key_entry_type == ValueType::Value; + self.iter.valid() + } + + // Move backwards until the key smaller than `saved_user_key`. + // Changes valid only if return value is false. + fn find_user_key_before_saved(&mut self) { + while self.iter.valid() { + let InternalKey { user_key, .. } = decode_key(self.iter.key()); + + if user_key < self.saved_user_key.as_slice() { + return; + } + + self.iter.prev(); + } + } +} + impl Iterator for RegionCacheIterator { fn key(&self) -> &[u8] { assert!(self.valid); - self.iter.key().as_slice() + &self.saved_user_key } fn value(&self) -> &[u8] { assert!(self.valid); - self.iter.value().as_slice() + if let Some(saved_value) = self.saved_value.as_ref() { + saved_value.as_slice() + } else { + self.iter.value().as_slice() + } } fn next(&mut self) -> Result { assert!(self.valid); + assert!(self.direction == Direction::Forward); self.iter.next(); - self.valid = self.iter.valid() && self.iter.key().as_slice() < self.upper_bound.as_slice(); - - if self.valid && self.prefix_same_as_start { - // todo(SpadeA): support prefix seek - unimplemented!() + self.valid = self.iter.valid(); + if self.valid { + self.find_next_visible_key(true); } Ok(self.valid) } fn prev(&mut self) -> Result { assert!(self.valid); - self.iter.prev(); - self.valid = self.iter.valid() && self.iter.key().as_slice() >= self.lower_bound.as_slice(); - if self.valid && self.prefix_same_as_start { - // todo(SpadeA): support prefix seek - unimplemented!() - } + assert!(self.direction == Direction::Backward); + self.prev_internal(); Ok(self.valid) } fn seek(&mut self, key: &[u8]) -> Result { + self.direction = Direction::Forward; let seek_key = if key < self.lower_bound.as_slice() { self.lower_bound.as_slice() } else { key }; - self.iter.seek(seek_key); - self.valid = self.iter.valid() && self.iter.key().as_slice() < self.upper_bound.as_slice(); - - if self.valid && self.prefix_same_as_start { - // todo(SpadeA): support prefix seek - unimplemented!() - } - Ok(self.valid) + let seek_key = encode_seek_key(seek_key, self.sequence_number, VALUE_TYPE_FOR_SEEK); + self.seek_internal(&seek_key) } fn seek_for_prev(&mut self, key: &[u8]) -> Result { - let end = if key > self.upper_bound.as_slice() { - self.upper_bound.as_slice() + self.direction = Direction::Backward; + let seek_key = if key > self.upper_bound.as_slice() { + encode_seek_key( + self.upper_bound.as_slice(), + u64::MAX, + VALUE_TYPE_FOR_SEEK_FOR_PREV, + ) } else { - key + encode_seek_key(key, 0, VALUE_TYPE_FOR_SEEK_FOR_PREV) }; - self.iter.seek_for_prev(end); - self.valid = self.iter.valid() && self.iter.key().as_slice() >= self.lower_bound.as_slice(); - - if self.valid && self.prefix_same_as_start { - // todo(SpadeA): support prefix seek - unimplemented!() - } - Ok(self.valid) + self.seek_for_prev_internal(&seek_key) } fn seek_to_first(&mut self) -> Result { - let lower_bound = self.lower_bound.clone(); - self.seek(lower_bound.as_slice()) + self.direction = Direction::Forward; + let seek_key = + encode_seek_key(&self.lower_bound, self.sequence_number, VALUE_TYPE_FOR_SEEK); + self.seek_internal(&seek_key) } fn seek_to_last(&mut self) -> Result { - let upper_bound = self.upper_bound.clone(); - self.seek_for_prev(upper_bound.as_slice()) + self.direction = Direction::Backward; + let seek_key = encode_seek_key(&self.upper_bound, u64::MAX, VALUE_TYPE_FOR_SEEK_FOR_PREV); + self.seek_for_prev_internal(&seek_key) } fn valid(&self) -> Result { @@ -441,6 +610,10 @@ impl Iterable for RegionCacheSnapshot { lower_bound: lower_bound.unwrap(), upper_bound: upper_bound.unwrap(), iter, + sequence_number: self.sequence_number, + saved_user_key: vec![], + saved_value: None, + direction: Direction::Uninit, }) } } @@ -458,10 +631,23 @@ impl Peekable for RegionCacheSnapshot { cf: &str, key: &[u8], ) -> Result> { - Ok(self.region_memory_engine.data[cf_to_id(cf)] - .get(key) - .cloned() - .map(|v| RegionCacheDbVector(v))) + let seq = self.sequence_number; + let mut iter = self.region_memory_engine.data[cf_to_id(cf)].iter(); + let seek_key = encode_seek_key(key, self.sequence_number, VALUE_TYPE_FOR_SEEK); + + iter.seek(&seek_key); + if !iter.valid() { + return Ok(None); + } + + match decode_key(iter.key()) { + InternalKey { + user_key, + v_type: ValueType::Value, + .. + } if user_key == key => Ok(Some(RegionCacheDbVector(iter.value().clone()))), + _ => Ok(None), + } } } @@ -499,16 +685,19 @@ impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { #[cfg(test)] mod tests { use core::ops::Range; - use std::{iter::StepBy, sync::Arc}; + use std::{iter, iter::StepBy, ops::Deref, sync::Arc}; - use bytes::Bytes; + use bytes::{BufMut, Bytes}; use engine_traits::{ IterOptions, Iterable, Iterator, Peekable, ReadOptions, RegionCacheEngine, }; - use skiplist_rs::{ByteWiseComparator, Skiplist}; + use skiplist_rs::Skiplist; use super::{cf_to_id, RegionCacheIterator}; - use crate::RegionCacheMemoryEngine; + use crate::{ + keys::{encode_key, InternalKeyComparator, ValueType}, + RegionCacheMemoryEngine, + }; #[test] fn test_snapshot() { @@ -574,53 +763,112 @@ mod tests { verify_snapshot_count(10, 0); } - fn construct_key(i: i32) -> String { - format!("key-{:08}", i) + fn construct_user_key(i: u64) -> Vec { + let k = format!("k{:08}", i); + k.as_bytes().to_owned() } - fn construct_value(i: i32) -> String { - format!("value-{:08}", i) + fn construct_key(i: u64, mvcc: u64) -> Vec { + let k = format!("k{:08}", i); + let mut key = k.as_bytes().to_vec(); + // mvcc version should be make bit-wise reverse so that k-100 is less than k-99 + key.put_u64(!mvcc); + key } - fn fill_data_in_skiplist(sl: Arc>, range: StepBy>) { - for i in range { - let key = construct_key(i); - let val = construct_value(i); - sl.put(Bytes::from(key), Bytes::from(val)); + fn construct_value(i: u64, j: u64) -> String { + format!("value-{:04}-{:04}", i, j) + } + + fn fill_data_in_skiplist( + sl: Arc>, + key_range: StepBy>, + mvcc_range: Range, + mut start_seq: u64, + ) { + for mvcc in mvcc_range { + for i in key_range.clone() { + let key = construct_key(i, mvcc); + let val = construct_value(i, mvcc); + let key = encode_key(&key, start_seq, ValueType::Value); + sl.put(key, Bytes::from(val)); + } + start_seq += 1; } } - fn verify_key_value(k: &[u8], v: &[u8], i: i32) { - let key = construct_key(i); - let val = construct_value(i); - assert_eq!(k, key.as_bytes()); + fn delete_data_in_skiplist( + sl: Arc>, + key_range: StepBy>, + mvcc_range: Range, + mut seq: u64, + ) { + for i in key_range { + for mvcc in mvcc_range.clone() { + let key = construct_key(i, mvcc); + let key = encode_key(&key, seq, ValueType::Deletion); + sl.put(key, Bytes::default()); + } + seq += 1; + } + } + + fn construct_mvcc_key(key: &str, mvcc: u64) -> Vec { + let mut k = vec![]; + k.extend_from_slice(key.as_bytes()); + k.put_u64(!mvcc); + k + } + + fn put_key_val( + sl: &Arc>, + key: &str, + val: &str, + mvcc: u64, + seq: u64, + ) { + let key = construct_mvcc_key(key, mvcc); + let key = encode_key(&key, seq, ValueType::Value); + sl.put(key, Bytes::from(val.to_owned())); + } + + fn delete_key(sl: &Arc>, key: &str, mvcc: u64, seq: u64) { + let key = construct_mvcc_key(key, mvcc); + let key = encode_key(&key, seq, ValueType::Deletion); + sl.put(key, Bytes::default()); + } + + fn verify_key_value(k: &[u8], v: &[u8], i: u64, mvcc: u64) { + let key = construct_key(i, mvcc); + let val = construct_value(i, mvcc); + assert_eq!(k, &key); assert_eq!(v, val.as_bytes()); } - fn verify_key_values( + fn verify_key_not_equal(k: &[u8], i: u64, mvcc: u64) { + let key = construct_key(i, mvcc); + assert_ne!(k, &key); + } + + fn verify_key_values, J: iter::Iterator + Clone>( iter: &mut RegionCacheIterator, - step: i32, - mut start_idx: i32, - end_idx: i32, + key_range: I, + mvcc_range: J, + foward: bool, ) { - let forward = step > 0; - while iter.valid().unwrap() { - let k = iter.key(); - let val = iter.value(); - verify_key_value(k, val, start_idx); - if forward { - iter.next().unwrap(); - } else { - iter.prev().unwrap(); + for i in key_range { + for mvcc in mvcc_range.clone() { + let k = iter.key(); + let val = iter.value(); + verify_key_value(k, val, i as u64, mvcc as u64); + if foward { + iter.next().unwrap(); + } else { + iter.prev().unwrap(); + } } - start_idx += step; - } - - if forward { - assert!(start_idx - step < end_idx); - } else { - assert!(start_idx - step > end_idx); } + assert!(!iter.valid().unwrap()); } #[test] @@ -633,27 +881,72 @@ mod tests { core.region_metas.get_mut(&1).unwrap().can_read = true; core.region_metas.get_mut(&1).unwrap().safe_ts = 5; let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); - fill_data_in_skiplist(sl, (1..100).step_by(1)); + fill_data_in_skiplist(sl.clone(), (1..10).step_by(1), 1..50, 1); + // k1 is deleted at seq_num 150 while k49 is deleted at seq num 101 + delete_data_in_skiplist(sl, (1..10).step_by(1), 1..50, 100); } - let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); let opts = ReadOptions::default(); - for i in 1..100 { - let k = construct_key(i); - let v = snapshot - .get_value_cf_opt(&opts, "write", k.as_bytes()) - .unwrap() - .unwrap(); - verify_key_value(k.as_bytes(), &v, i); + { + let snapshot = engine.snapshot(1, 10, 60).unwrap(); + for i in 1..10 { + for mvcc in 1..50 { + let k = construct_key(i, mvcc); + let v = snapshot + .get_value_cf_opt(&opts, "write", &k) + .unwrap() + .unwrap(); + verify_key_value(&k, &v, i, mvcc); + } + let k = construct_key(i, 50); + assert!( + snapshot + .get_value_cf_opt(&opts, "write", &k) + .unwrap() + .is_none() + ); + } + } + + // all deletions + { + let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); + for i in 1..10 { + for mvcc in 1..50 { + let k = construct_key(i, mvcc); + assert!( + snapshot + .get_value_cf_opt(&opts, "write", &k) + .unwrap() + .is_none() + ); + } + } } - let k = construct_key(100); - assert!( - snapshot - .get_value_cf_opt(&opts, "write", k.as_bytes()) - .unwrap() - .is_none() - ); + // some deletions + { + let snapshot = engine.snapshot(1, 10, 105).unwrap(); + for mvcc in 1..50 { + for i in 1..7 { + let k = construct_key(i, mvcc); + assert!( + snapshot + .get_value_cf_opt(&opts, "write", &k) + .unwrap() + .is_none() + ); + } + for i in 7..10 { + let k = construct_key(i, mvcc); + let v = snapshot + .get_value_cf_opt(&opts, "write", &k) + .unwrap() + .unwrap(); + verify_key_value(&k, &v, i, mvcc); + } + } + } } #[test] @@ -667,7 +960,8 @@ mod tests { core.region_metas.get_mut(&1).unwrap().can_read = true; core.region_metas.get_mut(&1).unwrap().safe_ts = 5; let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); - fill_data_in_skiplist(sl, (1..100).step_by(step as usize)); + fill_data_in_skiplist(sl.clone(), (1..100).step_by(step as usize), 1..10, 1); + delete_data_in_skiplist(sl, (1..100).step_by(step as usize), 1..10, 200); } let mut iter_opt = IterOptions::default(); @@ -675,10 +969,10 @@ mod tests { // boundaries are not set assert!(snapshot.iterator_opt("lock", iter_opt.clone()).is_err()); - let lower_bound = construct_key(1); - let upper_bound = construct_key(100); - iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); - iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + let lower_bound = construct_user_key(1); + let upper_bound = construct_user_key(100); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); let mut iter = snapshot.iterator_opt("lock", iter_opt.clone()).unwrap(); assert!(!iter.seek_to_first().unwrap()); @@ -686,102 +980,598 @@ mod tests { let mut iter = snapshot.iterator_opt("default", iter_opt.clone()).unwrap(); assert!(!iter.seek_to_first().unwrap()); - let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); - iter.seek_to_first().unwrap(); - verify_key_values(&mut iter, step, 1, i32::MAX); - - // seek key that is in the skiplist - let seek_key = construct_key(11); - iter.seek(seek_key.as_bytes()).unwrap(); - verify_key_values(&mut iter, step, 11, i32::MAX); - - // seek key that is not in the skiplist - let seek_key = construct_key(12); - iter.seek(seek_key.as_bytes()).unwrap(); - verify_key_values(&mut iter, step, 13, i32::MAX); - - // with bounds - let lower_bound = construct_key(20); - let upper_bound = construct_key(40); - iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); - iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); - let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); - - assert!(iter.seek_to_first().unwrap()); - verify_key_values(&mut iter, step, 21, 40); - - // seek a key that is below the lower bound is the same with seek_to_first - let seek_key = construct_key(11); - assert!(iter.seek(seek_key.as_bytes()).unwrap()); - verify_key_values(&mut iter, step, 21, 40); - - // seek a key that is larger or equal to upper bound won't get any key - let seek_key = construct_key(40); - assert!(!iter.seek(seek_key.as_bytes()).unwrap()); - assert!(!iter.valid().unwrap()); + // Not restricted by bounds, no deletion (seq_num 150) + { + let snapshot = engine.snapshot(1, 100, 150).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + verify_key_values( + &mut iter, + (1..100).step_by(step as usize), + (1..10).rev(), + true, + ); + + // seek key that is in the skiplist + let seek_key = construct_key(11, u64::MAX); + iter.seek(&seek_key).unwrap(); + verify_key_values( + &mut iter, + (11..100).step_by(step as usize), + (1..10).rev(), + true, + ); + + // seek key that is not in the skiplist + let seek_key = construct_key(12, u64::MAX); + iter.seek(&seek_key).unwrap(); + verify_key_values( + &mut iter, + (13..100).step_by(step as usize), + (1..10).rev(), + true, + ); + } + + // Not restricted by bounds, some deletions (seq_num 230) + { + let snapshot = engine.snapshot(1, 10, 230).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + verify_key_values( + &mut iter, + (63..100).step_by(step as usize), + (1..10).rev(), + true, + ); + + // sequence can see the deletion + { + // seek key that is in the skiplist + let seek_key = construct_key(21, u64::MAX); + assert!(iter.seek(&seek_key).unwrap()); + verify_key_not_equal(iter.key(), 21, 9); + + // seek key that is not in the skiplist + let seek_key = construct_key(22, u64::MAX); + assert!(iter.seek(&seek_key).unwrap()); + verify_key_not_equal(iter.key(), 23, 9); + } - let seek_key = construct_key(22); - assert!(iter.seek(seek_key.as_bytes()).unwrap()); - verify_key_values(&mut iter, step, 23, 40); + // sequence cannot see the deletion + { + // seek key that is in the skiplist + let seek_key = construct_key(65, u64::MAX); + iter.seek(&seek_key).unwrap(); + verify_key_value(iter.key(), iter.value(), 65, 9); + + // seek key that is not in the skiplist + let seek_key = construct_key(66, u64::MAX); + iter.seek(&seek_key).unwrap(); + verify_key_value(iter.key(), iter.value(), 67, 9); + } + } + + // with bounds, no deletion (seq_num 150) + let lower_bound = construct_user_key(20); + let upper_bound = construct_user_key(40); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); + { + let snapshot = engine.snapshot(1, 10, 150).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + + assert!(iter.seek_to_first().unwrap()); + verify_key_values( + &mut iter, + (21..40).step_by(step as usize), + (1..10).rev(), + true, + ); + + // seek a key that is below the lower bound is the same with seek_to_first + let seek_key = construct_key(19, u64::MAX); + assert!(iter.seek(&seek_key).unwrap()); + verify_key_values( + &mut iter, + (21..40).step_by(step as usize), + (1..10).rev(), + true, + ); + + // seek a key that is larger or equal to upper bound won't get any key + let seek_key = construct_key(41, u64::MAX); + assert!(!iter.seek(&seek_key).unwrap()); + assert!(!iter.valid().unwrap()); + + let seek_key = construct_key(32, u64::MAX); + assert!(iter.seek(&seek_key).unwrap()); + verify_key_values( + &mut iter, + (33..40).step_by(step as usize), + (1..10).rev(), + true, + ); + } + + // with bounds, some deletions (seq_num 215) + { + let snapshot = engine.snapshot(1, 10, 215).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); + + // sequence can see the deletion + { + // seek key that is in the skiplist + let seek_key = construct_key(21, u64::MAX); + assert!(iter.seek(&seek_key).unwrap()); + verify_key_not_equal(iter.key(), 21, 9); + + // seek key that is not in the skiplist + let seek_key = construct_key(20, u64::MAX); + assert!(iter.seek(&seek_key).unwrap()); + verify_key_not_equal(iter.key(), 21, 9); + } + + // sequence cannot see the deletion + { + // seek key that is in the skiplist + let seek_key = construct_key(33, u64::MAX); + iter.seek(&seek_key).unwrap(); + verify_key_value(iter.key(), iter.value(), 33, 9); + + // seek key that is not in the skiplist + let seek_key = construct_key(32, u64::MAX); + iter.seek(&seek_key).unwrap(); + verify_key_value(iter.key(), iter.value(), 33, 9); + } + } } #[test] fn test_iterator_backward() { let engine = RegionCacheMemoryEngine::default(); engine.new_region(1); - let mut step: i32 = 2; + let step: i32 = 2; { let mut core = engine.core.lock().unwrap(); core.region_metas.get_mut(&1).unwrap().can_read = true; core.region_metas.get_mut(&1).unwrap().safe_ts = 5; let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); - fill_data_in_skiplist(sl, (1..100).step_by(step as usize)); + fill_data_in_skiplist(sl.clone(), (1..100).step_by(step as usize), 1..10, 1); + delete_data_in_skiplist(sl, (1..100).step_by(step as usize), 1..10, 200); } - step = -step; let mut iter_opt = IterOptions::default(); - let lower_bound = construct_key(1); - let upper_bound = construct_key(100); - iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); - iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); + let lower_bound = construct_user_key(1); + let upper_bound = construct_user_key(100); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); - let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); - let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); - assert!(iter.seek_to_last().unwrap()); - verify_key_values(&mut iter, step, 99, i32::MIN); - - // seek key that is in the skiplist - let seek_key = construct_key(81); - assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); - verify_key_values(&mut iter, step, 81, i32::MIN); - - // seek key that is in the skiplist - let seek_key = construct_key(80); - assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); - verify_key_values(&mut iter, step, 79, i32::MIN); - - let lower_bound = construct_key(20); - let upper_bound = construct_key(40); - iter_opt.set_upper_bound(upper_bound.as_bytes(), 0); - iter_opt.set_lower_bound(lower_bound.as_bytes(), 0); - let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); - - assert!(iter.seek_to_last().unwrap()); - verify_key_values(&mut iter, step, 39, 20); - - // seek a key that is above the upper bound is the same with seek_to_last - let seek_key = construct_key(45); - assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); - verify_key_values(&mut iter, step, 39, 20); - - // seek a key that is less than the lower bound won't get any key - let seek_key = construct_key(19); - assert!(!iter.seek_for_prev(seek_key.as_bytes()).unwrap()); - assert!(!iter.valid().unwrap()); + // Not restricted by bounds, no deletion (seq_num 150) + { + let snapshot = engine.snapshot(1, 10, 150).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(iter.seek_to_last().unwrap()); + verify_key_values( + &mut iter, + (1..100).step_by(step as usize).rev(), + 1..10, + false, + ); + + // seek key that is in the skiplist + let seek_key = construct_key(81, 0); + assert!(iter.seek_for_prev(&seek_key).unwrap()); + verify_key_values( + &mut iter, + (1..82).step_by(step as usize).rev(), + 1..10, + false, + ); + + // seek key that is in the skiplist + let seek_key = construct_key(80, 0); + assert!(iter.seek_for_prev(&seek_key).unwrap()); + verify_key_values( + &mut iter, + (1..80).step_by(step as usize).rev(), + 1..10, + false, + ); + } + + let lower_bound = construct_user_key(21); + let upper_bound = construct_user_key(39); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); + { + let snapshot = engine.snapshot(1, 10, 150).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); + + assert!(iter.seek_to_last().unwrap()); + verify_key_values( + &mut iter, + (21..38).step_by(step as usize).rev(), + 1..10, + false, + ); + + // seek a key that is above the upper bound is the same with seek_to_last + let seek_key = construct_key(40, 0); + assert!(iter.seek_for_prev(&seek_key).unwrap()); + verify_key_values( + &mut iter, + (21..38).step_by(step as usize).rev(), + 1..10, + false, + ); + + // seek a key that is less than the lower bound won't get any key + let seek_key = construct_key(20, u64::MAX); + assert!(!iter.seek_for_prev(&seek_key).unwrap()); + assert!(!iter.valid().unwrap()); + + let seek_key = construct_key(26, 0); + assert!(iter.seek_for_prev(&seek_key).unwrap()); + verify_key_values( + &mut iter, + (21..26).step_by(step as usize).rev(), + 1..10, + false, + ); + } + } + + #[test] + fn test_seq_visibility() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let step: i32 = 2; + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + + put_key_val(&sl, "aaa", "va1", 10, 1); + put_key_val(&sl, "aaa", "va2", 10, 3); + delete_key(&sl, "aaa", 10, 4); + put_key_val(&sl, "aaa", "va4", 10, 6); - let seek_key = construct_key(38); - assert!(iter.seek_for_prev(seek_key.as_bytes()).unwrap()); - verify_key_values(&mut iter, step, 37, 20); + put_key_val(&sl, "bbb", "vb1", 10, 2); + put_key_val(&sl, "bbb", "vb2", 10, 4); + + put_key_val(&sl, "ccc", "vc1", 10, 2); + put_key_val(&sl, "ccc", "vc2", 10, 4); + put_key_val(&sl, "ccc", "vc3", 10, 5); + delete_key(&sl, "ccc", 10, 6); + } + + let mut iter_opt = IterOptions::default(); + let lower_bound = b""; + let upper_bound = b"z"; + iter_opt.set_upper_bound(upper_bound, 0); + iter_opt.set_lower_bound(lower_bound, 0); + + // seq num 1 + { + let snapshot = engine.snapshot(1, u64::MAX, 1).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + assert_eq!(iter.value(), b"va1"); + assert!(!iter.next().unwrap()); + let key = construct_mvcc_key("aaa", 10); + assert_eq!( + snapshot + .get_value_cf("write", &key) + .unwrap() + .unwrap() + .deref(), + "va1".as_bytes() + ); + assert!(iter.seek(&key).unwrap()); + assert_eq!(iter.value(), "va1".as_bytes()); + + let key = construct_mvcc_key("bbb", 10); + assert!(snapshot.get_value_cf("write", &key).unwrap().is_none()); + assert!(!iter.seek(&key).unwrap()); + + let key = construct_mvcc_key("ccc", 10); + assert!(snapshot.get_value_cf("write", &key).unwrap().is_none()); + assert!(!iter.seek(&key).unwrap()); + } + + // seq num 2 + { + let snapshot = engine.snapshot(1, u64::MAX, 2).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + assert_eq!(iter.value(), b"va1"); + iter.next().unwrap(); + assert_eq!(iter.value(), b"vb1"); + iter.next().unwrap(); + assert_eq!(iter.value(), b"vc1"); + assert!(!iter.next().unwrap()); + } + + // seq num 5 + { + let snapshot = engine.snapshot(1, u64::MAX, 5).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + assert_eq!(iter.value(), b"vb2"); + iter.next().unwrap(); + assert_eq!(iter.value(), b"vc3"); + assert!(!iter.next().unwrap()); + } + + // seq num 6 + { + let snapshot = engine.snapshot(1, u64::MAX, 6).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + assert_eq!(iter.value(), b"va4"); + iter.next().unwrap(); + assert_eq!(iter.value(), b"vb2"); + assert!(!iter.next().unwrap()); + + let key = construct_mvcc_key("aaa", 10); + assert_eq!( + snapshot + .get_value_cf("write", &key) + .unwrap() + .unwrap() + .deref(), + "va4".as_bytes() + ); + assert!(iter.seek(&key).unwrap()); + assert_eq!(iter.value(), "va4".as_bytes()); + + let key = construct_mvcc_key("bbb", 10); + assert_eq!( + snapshot + .get_value_cf("write", &key) + .unwrap() + .unwrap() + .deref(), + "vb2".as_bytes() + ); + assert!(iter.seek(&key).unwrap()); + assert_eq!(iter.value(), "vb2".as_bytes()); + + let key = construct_mvcc_key("ccc", 10); + assert!(snapshot.get_value_cf("write", &key).unwrap().is_none()); + assert!(!iter.seek(&key).unwrap()); + } + } + + #[test] + fn test_seq_visibility_backward() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let step: i32 = 2; + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + + put_key_val(&sl, "aaa", "va1", 10, 2); + put_key_val(&sl, "aaa", "va2", 10, 4); + put_key_val(&sl, "aaa", "va3", 10, 5); + delete_key(&sl, "aaa", 10, 6); + + put_key_val(&sl, "bbb", "vb1", 10, 2); + put_key_val(&sl, "bbb", "vb2", 10, 4); + + put_key_val(&sl, "ccc", "vc1", 10, 1); + put_key_val(&sl, "ccc", "vc2", 10, 3); + delete_key(&sl, "ccc", 10, 4); + put_key_val(&sl, "ccc", "vc4", 10, 6); + } + + let mut iter_opt = IterOptions::default(); + let lower_bound = b""; + let upper_bound = b"z"; + iter_opt.set_upper_bound(upper_bound, 0); + iter_opt.set_lower_bound(lower_bound, 0); + + // seq num 1 + { + let snapshot = engine.snapshot(1, u64::MAX, 1).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_last().unwrap(); + assert_eq!(iter.value(), b"vc1"); + assert!(!iter.prev().unwrap()); + let key = construct_mvcc_key("aaa", 10); + assert!(!iter.seek_for_prev(&key).unwrap()); + + let key = construct_mvcc_key("bbb", 10); + assert!(!iter.seek_for_prev(&key).unwrap()); + + let key = construct_mvcc_key("ccc", 10); + assert!(iter.seek_for_prev(&key).unwrap()); + assert_eq!(iter.value(), "vc1".as_bytes()); + } + + // seq num 2 + { + let snapshot = engine.snapshot(1, u64::MAX, 2).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_last().unwrap(); + assert_eq!(iter.value(), b"vc1"); + iter.prev().unwrap(); + assert_eq!(iter.value(), b"vb1"); + iter.prev().unwrap(); + assert_eq!(iter.value(), b"va1"); + assert!(!iter.prev().unwrap()); + } + + // seq num 5 + { + let snapshot = engine.snapshot(1, u64::MAX, 5).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_last().unwrap(); + assert_eq!(iter.value(), b"vb2"); + iter.prev().unwrap(); + assert_eq!(iter.value(), b"va3"); + assert!(!iter.prev().unwrap()); + } + + // seq num 6 + { + let snapshot = engine.snapshot(1, u64::MAX, 6).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_last().unwrap(); + assert_eq!(iter.value(), b"vc4"); + iter.prev().unwrap(); + assert_eq!(iter.value(), b"vb2"); + assert!(!iter.prev().unwrap()); + + let key = construct_mvcc_key("ccc", 10); + assert!(iter.seek_for_prev(&key).unwrap()); + assert_eq!(iter.value(), "vc4".as_bytes()); + + let key = construct_mvcc_key("bbb", 10); + assert!(iter.seek_for_prev(&key).unwrap()); + assert_eq!(iter.value(), "vb2".as_bytes()); + + let key = construct_mvcc_key("aaa", 10); + assert!(!iter.seek_for_prev(&key).unwrap()); + } + } + + #[test] + fn test_iter_use_skip() { + let mut iter_opt = IterOptions::default(); + let lower_bound = b""; + let upper_bound = b"z"; + iter_opt.set_upper_bound(upper_bound, 0); + iter_opt.set_lower_bound(lower_bound, 0); + + // backward, all put + { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let sl = { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + }; + + for seq in 2..50 { + put_key_val(&sl, "a", "val", 10, 1); + for i in 2..50 { + let v = construct_value(i, i); + put_key_val(&sl, "b", v.as_str(), 10, i); + } + + let snapshot = engine.snapshot(1, 10, seq).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(iter.seek_to_last().unwrap()); + let k = construct_mvcc_key("b", 10); + let v = construct_value(seq, seq); + assert_eq!(iter.key(), &k); + assert_eq!(iter.value(), v.as_bytes()); + + assert!(iter.prev().unwrap()); + let k = construct_mvcc_key("a", 10); + assert_eq!(iter.key(), &k); + assert_eq!(iter.value(), b"val"); + assert!(!iter.prev().unwrap()); + assert!(!iter.valid().unwrap()); + } + } + + // backward, all deletes + { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let sl = { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + }; + + for seq in 2..50 { + put_key_val(&sl, "a", "val", 10, 1); + for i in 2..50 { + delete_key(&sl, "b", 10, i); + } + + let snapshot = engine.snapshot(1, 10, seq).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(iter.seek_to_last().unwrap()); + let k = construct_mvcc_key("a", 10); + assert_eq!(iter.key(), &k); + assert_eq!(iter.value(), b"val"); + assert!(!iter.prev().unwrap()); + assert!(!iter.valid().unwrap()); + } + } + + // backward, all deletes except for last put, last put's seq + { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let sl = { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + }; + put_key_val(&sl, "a", "val", 10, 1); + for i in 2..50 { + delete_key(&sl, "b", 10, i); + } + let v = construct_value(50, 50); + put_key_val(&sl, "b", v.as_str(), 10, 50); + let snapshot = engine.snapshot(1, 10, 50).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(iter.seek_to_last().unwrap()); + let k = construct_mvcc_key("b", 10); + let v = construct_value(50, 50); + assert_eq!(iter.key(), &k); + assert_eq!(iter.value(), v.as_bytes()); + + assert!(iter.prev().unwrap()); + let k = construct_mvcc_key("a", 10); + assert_eq!(iter.key(), &k); + assert_eq!(iter.value(), b"val"); + assert!(!iter.prev().unwrap()); + assert!(!iter.valid().unwrap()); + } + + // all deletes except for last put, deletions' seq + { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let sl = { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + }; + for seq in 2..50 { + for i in 2..50 { + delete_key(&sl, "b", 10, i); + } + let v = construct_value(50, 50); + put_key_val(&sl, "b", v.as_str(), 10, 50); + + let snapshot = engine.snapshot(1, 10, seq).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + assert!(!iter.seek_to_first().unwrap()); + assert!(!iter.valid().unwrap()); + + assert!(!iter.seek_to_last().unwrap()); + assert!(!iter.valid().unwrap()); + } + } } } diff --git a/components/region_cache_memory_engine/src/keys.rs b/components/region_cache_memory_engine/src/keys.rs new file mode 100644 index 00000000000..c2cb22a236e --- /dev/null +++ b/components/region_cache_memory_engine/src/keys.rs @@ -0,0 +1,195 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::cmp; + +use bytes::{BufMut, Bytes, BytesMut}; +use skiplist_rs::KeyComparator; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ValueType { + Deletion = 0, + Value = 1, +} + +// See `compare` of InternalKeyComparator, for the same user key and same +// sequence number, ValueType::Value is less than ValueType::Deletion +pub const VALUE_TYPE_FOR_SEEK: ValueType = ValueType::Value; +pub const VALUE_TYPE_FOR_SEEK_FOR_PREV: ValueType = ValueType::Deletion; + +impl TryFrom for ValueType { + type Error = String; + fn try_from(value: u8) -> std::prelude::v1::Result { + match value { + 0 => Ok(ValueType::Deletion), + 1 => Ok(ValueType::Value), + _ => Err(format!("invalid value: {}", value)), + } + } +} + +pub struct InternalKey<'a> { + pub user_key: &'a [u8], + pub v_type: ValueType, + pub sequence: u64, +} + +const ENC_KEY_SEQ_LENGTH: usize = std::mem::size_of::(); + +impl<'a> From<&'a [u8]> for InternalKey<'a> { + fn from(encoded_key: &'a [u8]) -> Self { + decode_key(encoded_key) + } +} + +#[inline] +pub fn decode_key(encoded_key: &[u8]) -> InternalKey<'_> { + assert!(encoded_key.len() >= ENC_KEY_SEQ_LENGTH); + let seq_offset = encoded_key.len() - ENC_KEY_SEQ_LENGTH; + let num = u64::from_be_bytes( + encoded_key[seq_offset..seq_offset + ENC_KEY_SEQ_LENGTH] + .try_into() + .unwrap(), + ); + let sequence = num >> 8; + let v_type = ((num & 0xff) as u8).try_into().unwrap(); + InternalKey { + user_key: &encoded_key[..seq_offset], + v_type, + sequence, + } +} + +#[inline] +pub fn extract_user_key_and_suffix_u64(encoded_key: &[u8]) -> (&[u8], u64) { + assert!(encoded_key.len() >= ENC_KEY_SEQ_LENGTH); + let seq_offset = encoded_key.len() - ENC_KEY_SEQ_LENGTH; + let num = u64::from_be_bytes( + encoded_key[seq_offset..seq_offset + ENC_KEY_SEQ_LENGTH] + .try_into() + .unwrap(), + ); + + (&encoded_key[..seq_offset], num) +} + +/// Format for an internal key (used by the skip list.) +/// ``` +/// contents: key of size n | value type | sequence number shifted by 8 bits +/// byte position: 0 .. n-1 | n | n + 1 .. n + 7 +/// ``` +/// value type 0 encodes deletion, value type 1 encodes value. +/// +/// It follows the pattern of RocksDB, where the most 8 significant bits of u64 +/// will not used by sequence number. +#[inline] +pub fn encode_key_internal( + key: &[u8], + seq: u64, + v_type: ValueType, + f: impl FnOnce(usize) -> T, +) -> T { + assert!(seq == u64::MAX || seq >> ((ENC_KEY_SEQ_LENGTH - 1) * 8) == 0); + let mut e = f(key.len() + ENC_KEY_SEQ_LENGTH); + e.put(key); + e.put_u64((seq << 8) | v_type as u64); + e +} + +#[inline] +pub fn encode_key(key: &[u8], seq: u64, v_type: ValueType) -> Bytes { + let e = encode_key_internal::(key, seq, v_type, BytesMut::with_capacity); + e.freeze() +} + +#[inline] +pub fn encode_seek_key(key: &[u8], seq: u64, v_type: ValueType) -> Vec { + encode_key_internal::>(key, seq, v_type, Vec::with_capacity) +} + +#[derive(Default, Debug, Clone, Copy)] +pub struct InternalKeyComparator {} + +impl InternalKeyComparator { + fn same_key(lhs: &[u8], rhs: &[u8]) -> bool { + let k_1 = decode_key(lhs); + let k_2 = decode_key(rhs); + k_1.user_key == k_2.user_key + } +} + +impl KeyComparator for InternalKeyComparator { + fn compare_key(&self, lhs: &[u8], rhs: &[u8]) -> cmp::Ordering { + let (k_1, s_1) = extract_user_key_and_suffix_u64(lhs); + let (k_2, s_2) = extract_user_key_and_suffix_u64(rhs); + let r = k_1.cmp(k_2); + if r.is_eq() { + match s_1.cmp(&s_2) { + cmp::Ordering::Greater => { + return cmp::Ordering::Less; + } + cmp::Ordering::Less => { + return cmp::Ordering::Greater; + } + cmp::Ordering::Equal => { + return cmp::Ordering::Equal; + } + } + } + r + } + + fn same_key(&self, lhs: &[u8], rhs: &[u8]) -> bool { + InternalKeyComparator::same_key(lhs, rhs) + } +} + +#[cfg(test)] +mod tests { + use bytes::BufMut; + use skiplist_rs::KeyComparator; + + use super::{InternalKeyComparator, ValueType}; + use crate::keys::encode_key; + + fn construct_key(i: u64, mvcc: u64) -> Vec { + let k = format!("k{:08}", i); + let mut key = k.as_bytes().to_vec(); + // mvcc version should be make bit-wise reverse so that k-100 is less than k-99 + key.put_u64(!mvcc); + key + } + + #[test] + fn test_compare_key() { + let c = InternalKeyComparator::default(); + let k = construct_key(1, 10); + // key1: k1_10_10_val + let key1 = encode_key(&k, 10, ValueType::Value); + // key2: k1_10_10_del + let key2 = encode_key(&k, 10, ValueType::Deletion); + assert!(c.compare_key(&key1, &key2).is_le()); + + // key2: k1_10_0_val + let key2 = encode_key(&k, 0, ValueType::Value); + assert!(c.compare_key(&key1, &key2).is_le()); + + // key1: k1_10_MAX_val + let key1 = encode_key(&k, u64::MAX, ValueType::Value); + assert!(c.compare_key(&key1, &key2).is_le()); + + let k = construct_key(1, 0); + // key2: k1_0_10_val + let key2 = encode_key(&k, 10, ValueType::Value); + assert!(c.compare_key(&key1, &key2).is_le()); + + // key1: k1_MAX_0_val + let k = construct_key(1, u64::MAX); + let key1 = encode_key(&k, 0, ValueType::Value); + assert!(c.compare_key(&key1, &key2).is_le()); + + let k = construct_key(2, u64::MAX); + // key2: k2_MAX_MAX_val + let key2 = encode_key(&k, u64::MAX, ValueType::Value); + assert!(c.compare_key(&key1, &key2).is_le()); + } +} diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs index fe15f4f936b..fc2136d3dab 100644 --- a/components/region_cache_memory_engine/src/lib.rs +++ b/components/region_cache_memory_engine/src/lib.rs @@ -6,4 +6,5 @@ #![feature(slice_pattern)] mod engine; +pub mod keys; pub use engine::RegionCacheMemoryEngine; From 9313afa5a05d6943179086409e339893e11a806a Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Tue, 26 Dec 2023 14:40:57 +0800 Subject: [PATCH 1078/1149] In-memory Engine: implement read flow -- prefix read (#16237) ref tikv/tikv#16141 implement read flow -- prefix read Signed-off-by: SpadeA-Tang --- Cargo.lock | 1 + .../region_cache_memory_engine/Cargo.toml | 1 + .../region_cache_memory_engine/src/engine.rs | 112 ++++++++++++++++-- 3 files changed, 103 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb97e90095d..bb305538ee6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4450,6 +4450,7 @@ version = "0.0.1" dependencies = [ "bytes", "collections", + "engine_rocks", "engine_traits", "skiplist-rs", "tikv_util", diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index 949b2596f46..448d49971c3 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -13,3 +13,4 @@ collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } bytes = "1.0" tikv_util = { workspace = true } +engine_rocks = { workspace = true } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index 6d284cefdd1..f18693e7d60 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -10,6 +10,7 @@ use std::{ use bytes::Bytes; use collections::HashMap; +use engine_rocks::{raw::SliceTransform, util::FixedSuffixSliceTransform}; use engine_traits::{ CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, @@ -216,8 +217,6 @@ enum Direction { pub struct RegionCacheIterator { cf: String, valid: bool, - prefix_same_as_start: bool, - prefix: Option>, iter: IterRef, InternalKeyComparator>, // The lower bound is inclusive while the upper bound is exclusive if set // Note: bounds (region boundaries) have no mvcc versions @@ -232,6 +231,11 @@ pub struct RegionCacheIterator { // `iter` saved_value: Option, + // Not None means we are performing prefix seek + // Note: prefix_seek doesn't support seek_to_first and seek_to_last. + prefix_extractor: Option, + prefix: Option>, + direction: Direction, } @@ -260,9 +264,11 @@ impl RegionCacheIterator { break; } - if self.prefix_same_as_start { - // todo(SpadeA): support prefix seek - unimplemented!() + if let Some(ref prefix) = self.prefix { + if prefix != self.prefix_extractor.as_mut().unwrap().transform(user_key) { + // stop iterating due to unmatched prefix + break; + } } if self.is_visible(sequence) { @@ -323,9 +329,11 @@ impl RegionCacheIterator { break; } - if self.prefix_same_as_start { - // todo(SpadeA): support prefix seek - unimplemented!() + if let Some(ref prefix) = self.prefix { + if prefix != self.prefix_extractor.as_mut().unwrap().transform(user_key) { + // stop iterating due to unmatched prefix + break; + } } if !self.find_value_for_current_key() { @@ -429,6 +437,11 @@ impl Iterator for RegionCacheIterator { fn seek(&mut self, key: &[u8]) -> Result { self.direction = Direction::Forward; + if let Some(ref mut extractor) = self.prefix_extractor { + assert!(key.len() >= 8); + self.prefix = Some(extractor.transform(key).to_vec()) + } + let seek_key = if key < self.lower_bound.as_slice() { self.lower_bound.as_slice() } else { @@ -441,6 +454,11 @@ impl Iterator for RegionCacheIterator { fn seek_for_prev(&mut self, key: &[u8]) -> Result { self.direction = Direction::Backward; + if let Some(ref mut extractor) = self.prefix_extractor { + assert!(key.len() >= 8); + self.prefix = Some(extractor.transform(key).to_vec()) + } + let seek_key = if key > self.upper_bound.as_slice() { encode_seek_key( self.upper_bound.as_slice(), @@ -455,6 +473,7 @@ impl Iterator for RegionCacheIterator { } fn seek_to_first(&mut self) -> Result { + assert!(self.prefix_extractor.is_none()); self.direction = Direction::Forward; let seek_key = encode_seek_key(&self.lower_bound, self.sequence_number, VALUE_TYPE_FOR_SEEK); @@ -462,6 +481,7 @@ impl Iterator for RegionCacheIterator { } fn seek_to_last(&mut self) -> Result { + assert!(self.prefix_extractor.is_none()); self.direction = Direction::Backward; let seek_key = encode_seek_key(&self.upper_bound, u64::MAX, VALUE_TYPE_FOR_SEEK_FOR_PREV); self.seek_for_prev_internal(&seek_key) @@ -596,16 +616,21 @@ impl Iterable for RegionCacheSnapshot { fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { let iter = self.region_memory_engine.data[cf_to_id(cf)].iter(); - let prefix_same_as_start = opts.prefix_same_as_start(); + let prefix_extractor = if opts.prefix_same_as_start() { + Some(FixedSuffixSliceTransform::new(8)) + } else { + None + }; + let (lower_bound, upper_bound) = opts.build_bounds(); // only support with lower/upper bound set if lower_bound.is_none() || upper_bound.is_none() { return Err(Error::BoundaryNotSet); } + Ok(RegionCacheIterator { cf: String::from(cf), valid: false, - prefix_same_as_start, prefix: None, lower_bound: lower_bound.unwrap(), upper_bound: upper_bound.unwrap(), @@ -614,6 +639,7 @@ impl Iterable for RegionCacheSnapshot { saved_user_key: vec![], saved_value: None, direction: Direction::Uninit, + prefix_extractor, }) } } @@ -1351,7 +1377,6 @@ mod tests { fn test_seq_visibility_backward() { let engine = RegionCacheMemoryEngine::default(); engine.new_region(1); - let step: i32 = 2; { let mut core = engine.core.lock().unwrap(); @@ -1574,4 +1599,69 @@ mod tests { } } } + + #[test] + fn test_prefix_seek() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + + { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + + for i in 1..5 { + for mvcc in 10..20 { + let user_key = construct_key(i, mvcc); + let internal_key = encode_key(&user_key, 10, ValueType::Value); + let v = format!("v{:02}{:02}", i, mvcc); + sl.put(internal_key, v); + } + } + } + + let mut iter_opt = IterOptions::default(); + let lower_bound = construct_user_key(1); + let upper_bound = construct_user_key(5); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); + iter_opt.set_prefix_same_as_start(true); + let snapshot = engine.snapshot(1, u64::MAX, u64::MAX).unwrap(); + let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); + + // prefix seek, forward + for i in 1..5 { + let seek_key = construct_key(i, 100); + assert!(iter.seek(&seek_key).unwrap()); + let mut start = 19; + while iter.valid().unwrap() { + let user_key = iter.key(); + let mvcc = !u64::from_be_bytes(user_key[user_key.len() - 8..].try_into().unwrap()); + assert_eq!(mvcc, start); + let v = format!("v{:02}{:02}", i, start); + assert_eq!(v.as_bytes(), iter.value()); + start -= 1; + iter.next().unwrap(); + } + assert_eq!(start, 9); + } + + // prefix seek, backward + for i in 1..5 { + let seek_key = construct_key(i, 0); + assert!(iter.seek_for_prev(&seek_key).unwrap()); + let mut start = 10; + while iter.valid().unwrap() { + let user_key = iter.key(); + let mvcc = !u64::from_be_bytes(user_key[user_key.len() - 8..].try_into().unwrap()); + assert_eq!(mvcc, start); + let v = format!("v{:02}{:02}", i, start); + assert_eq!(v.as_bytes(), iter.value()); + start += 1; + iter.prev().unwrap(); + } + assert_eq!(start, 20); + } + } } From 6a08b98b16dd212039f1a3f93ba16a74dc3cfa69 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 28 Dec 2023 10:39:28 +0800 Subject: [PATCH 1079/1149] readpool: add priority label for running tasks metrics (#16249) close tikv/tikv#16248 readpool: add priority label for running tasks Signed-off-by: nolouch --- metrics/grafana/tikv_details.dashboard.py | 10 ++++--- metrics/grafana/tikv_details.json | 18 ++++++------ metrics/grafana/tikv_details.json.sha256 | 2 +- scripts/gen-tikv-details-dashboard | 3 +- src/read_pool.rs | 36 +++++++++++++++-------- 5 files changed, 42 insertions(+), 27 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 1ed32eb6fe5..419e0626ebe 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -781,9 +781,9 @@ def Server() -> RowPanel: expr=expr_histogram_quantile( 0.99, "tikv_yatp_pool_schedule_wait_duration", - by_labels=["name"], + by_labels=["name", "priority"], ), - legend_format="{{name}}", + legend_format="{{name}}-{{priority}}", ), ], thresholds=[GraphThreshold(value=1.0)], @@ -796,9 +796,9 @@ def Server() -> RowPanel: target( expr=expr_histogram_avg( "tikv_yatp_pool_schedule_wait_duration", - by_labels=["name"], + by_labels=["name", "priority"], ), - legend_format="{{name}}", + legend_format="{{name}}-{{priority}}", ), ], thresholds=[GraphThreshold(value=1.0)], @@ -2600,7 +2600,9 @@ def UnifiedReadPool() -> RowPanel: "tikv_unified_read_pool_running_tasks", "avg", "1m", + by_labels=["priority"], ), + legend_format="{{priority}}", ), ], ), diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 5dd36b73dfb..911b4159b1c 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -5553,15 +5553,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, priority, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "{{name}}-{{priority}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, priority, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -5704,15 +5704,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) )", + "expr": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, priority) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, priority) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "{{name}}-{{priority}}", "metric": "", - "query": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) )", + "query": "(sum(rate(\n tikv_yatp_pool_schedule_wait_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, priority) / sum(rate(\n tikv_yatp_pool_schedule_wait_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, priority) )", "refId": "", "step": 10, "target": "" @@ -23597,15 +23597,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (instance) ", + "expr": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (priority) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{priority}}", "metric": "", - "query": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (instance) ", + "query": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (priority) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index cc9c7769755..c6e0ef68b99 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -75c3d3d71080a5e3bd40273bc2250797ab929e6c6ab46df89cad79d837531a2d ./metrics/grafana/tikv_details.json +cc2c86168805088535519a2b0872bad424fed55ab0ab9aba6ec2ff35e5a10f4a ./metrics/grafana/tikv_details.json diff --git a/scripts/gen-tikv-details-dashboard b/scripts/gen-tikv-details-dashboard index 2c91cf3dbb9..f51a14caea6 100755 --- a/scripts/gen-tikv-details-dashboard +++ b/scripts/gen-tikv-details-dashboard @@ -1,6 +1,7 @@ #!/usr/bin/env bash set -euo pipefail +root_dir=$(realpath "$(dirname "$(realpath "$0")")/..") docker build -t tikv-dashboard-gen -f - . <, - running_tasks: IntGauge, + running_tasks: [IntGauge; TaskPriority::PRIORITY_COUNT], running_threads: IntGauge, max_tasks: usize, pool_size: usize, @@ -108,7 +108,7 @@ pub enum ReadPoolHandle { }, Yatp { remote: Remote, - running_tasks: IntGauge, + running_tasks: [IntGauge; TaskPriority::PRIORITY_COUNT], running_threads: IntGauge, max_tasks: usize, pool_size: usize, @@ -150,7 +150,8 @@ impl ReadPoolHandle { resource_ctl, .. } => { - let running_tasks = running_tasks.clone(); + let task_priority = TaskPriority::from(metadata.override_priority()); + let running_tasks = running_tasks[task_priority as usize].clone(); // Note that the running task number limit is not strict. // If several tasks are spawned at the same time while the running task number // is close to the limit, they may all pass this check and the number of running @@ -158,7 +159,6 @@ impl ReadPoolHandle { if running_tasks.get() as usize >= *max_tasks { return Err(ReadPoolError::UnifiedReadPoolFull); } - running_tasks.inc(); let fixed_level = match priority { CommandPri::High => Some(0), @@ -245,7 +245,7 @@ impl ReadPoolHandle { running_tasks, pool_size, .. - } => running_tasks.get() as usize / *pool_size, + } => running_tasks.iter().map(|r| r.get()).sum::() as usize / *pool_size, } } @@ -506,8 +506,10 @@ pub fn build_yatp_read_pool_with_name( let time_slice_inspector = Arc::new(TimeSliceInspector::new(&unified_read_pool_name)); ReadPool::Yatp { pool, - running_tasks: UNIFIED_READ_POOL_RUNNING_TASKS - .with_label_values(&[&unified_read_pool_name]), + running_tasks: TaskPriority::priorities().map(|p| { + UNIFIED_READ_POOL_RUNNING_TASKS + .with_label_values(&[&unified_read_pool_name, p.as_str()]) + }), running_threads: UNIFIED_READ_POOL_RUNNING_THREADS .with_label_values(&[&unified_read_pool_name]), max_tasks: config @@ -628,7 +630,9 @@ impl RunnableWithTimer for ReadPoolConfigRunner { impl ReadPoolConfigRunner { fn running_tasks(&self) -> i64 { match &self.handle { - ReadPoolHandle::Yatp { running_tasks, .. } => running_tasks.get(), + ReadPoolHandle::Yatp { running_tasks, .. } => { + running_tasks.iter().map(|r| r.get()).sum() + } _ => unreachable!(), } } @@ -788,7 +792,7 @@ mod metrics { pub static ref UNIFIED_READ_POOL_RUNNING_TASKS: IntGaugeVec = register_int_gauge_vec!( "tikv_unified_read_pool_running_tasks", "The number of running tasks in the unified read pool", - &["name"] + &["name", "priority"] ) .unwrap(); pub static ref UNIFIED_READ_POOL_RUNNING_THREADS: IntGaugeVec = register_int_gauge_vec!( @@ -805,6 +809,8 @@ mod tests { use std::{thread, time::Duration}; use futures::channel::oneshot; + use futures_executor::block_on; + use kvproto::kvrpcpb::ResourceControlContext; use raftstore::store::{ReadStats, WriteStats}; use resource_control::ResourceGroupManager; @@ -875,7 +881,7 @@ mod tests { .unwrap(); assert_eq!( UNIFIED_READ_POOL_RUNNING_TASKS - .with_label_values(&[name]) + .with_label_values(&[name, "medium"]) .get(), 2 ); @@ -991,8 +997,14 @@ mod tests { _ => panic!("should return full error"), } - // TODO: move running task by priority to read_pool. // spawn a high-priority task, should not return Full error. + let (task_high, tx_h) = gen_task(); + let mut ctx = ResourceControlContext::default(); + ctx.override_priority = 16; // high priority + let metadata = TaskMetadata::from_ctx(&ctx); + let f = handle.spawn_handle(task_high, CommandPri::Normal, 6, metadata, None); + tx_h.send(()).unwrap(); + block_on(f).unwrap(); tx1.send(()).unwrap(); tx2.send(()).unwrap(); From 6510959aaeb8e41da4ae1d611e05da3c1d6eb658 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Thu, 28 Dec 2023 14:16:57 +0800 Subject: [PATCH 1080/1149] metrics: polish the metrics on Raft IO and Raft Propose sections. (#16250) close tikv/tikv#16251 This pr add missing legend on filtering statistics by TiKV instance in Raft IO and Raft Propose sections. Signed-off-by: lucasliang --- metrics/grafana/common.py | 12 +- metrics/grafana/tikv_details.dashboard.py | 16 ++ metrics/grafana/tikv_details.json | 208 +++++++++++----------- metrics/grafana/tikv_details.json.sha256 | 2 +- 4 files changed, 132 insertions(+), 106 deletions(-) diff --git a/metrics/grafana/common.py b/metrics/grafana/common.py index 7f15c06998f..ad575b27a0f 100644 --- a/metrics/grafana/common.py +++ b/metrics/grafana/common.py @@ -1072,7 +1072,16 @@ def heatmap_panel_graph_panel_histogram_quantile_pairs( metric: str, label_selectors=[], graph_by_labels=[], + graph_hides: list[str] = ["count"], ) -> list[Panel]: + hide_count = False + hide_avg = False + for hide in graph_hides: + if hide == "count": + hide_count = True + elif hide == "avg": + hide_avg = True + return [ heatmap_panel( title=heatmap_title, @@ -1088,6 +1097,7 @@ def heatmap_panel_graph_panel_histogram_quantile_pairs( yaxes=yaxes(left_format=yaxis_format), label_selectors=label_selectors, by_labels=graph_by_labels, - hide_count=True, + hide_count=hide_count, + hide_avg=hide_avg, ), ] diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 419e0626ebe..7eb6cd29205 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -1812,6 +1812,8 @@ def RaftIO() -> RowPanel: heatmap_description="The time consumed for peer processes to be ready in Raft", graph_title="99% Process ready duration per server", graph_description="The time consumed for peer processes to be ready in Raft", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_raft_process_duration_secs", label_selectors=['type="ready"'], @@ -1823,6 +1825,8 @@ def RaftIO() -> RowPanel: heatmap_description="The time duration of store write loop when store-io-pool-size is not zero.", graph_title="99% Store write loop duration per server", graph_description="The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_store_write_loop_duration_seconds", ) @@ -1833,6 +1837,8 @@ def RaftIO() -> RowPanel: heatmap_description="The time consumed when Raft appends log", graph_title="99% Commit log duration per server", graph_description="The time consumed when Raft commits log on each TiKV instance", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_append_log_duration_seconds", ) @@ -1843,6 +1849,8 @@ def RaftIO() -> RowPanel: heatmap_description="The time consumed when Raft commits log", graph_title="99% Commit log duration per server", graph_description="The time consumed when Raft commits log on each TiKV instance", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_commit_log_duration_seconds", ) @@ -1853,6 +1861,8 @@ def RaftIO() -> RowPanel: heatmap_description="The time consumed when Raft applies log", graph_title="99% Apply log duration per server", graph_description="The time consumed for Raft to apply logs per TiKV instance", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_apply_log_duration_seconds", ) @@ -1978,6 +1988,8 @@ def RaftPropose() -> RowPanel: heatmap_description="The wait time of each proposal", graph_title="99% Propose wait duration per server", graph_description="The wait time of each proposal in each TiKV instance", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_request_wait_time_duration_secs", ) @@ -1988,6 +2000,8 @@ def RaftPropose() -> RowPanel: heatmap_description="The wait time of each store write task", graph_title="99% Store write wait duration per server", graph_description="The wait time of each store write task in each TiKV instance", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_store_write_task_wait_duration_secs", ) @@ -1998,6 +2012,8 @@ def RaftPropose() -> RowPanel: heatmap_description="The wait time of each apply task", graph_title="99% Apply wait duration per server", graph_description="The wait time of each apply task in each TiKV instance", + graph_by_labels=["instance"], + graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, metric="tikv_raftstore_apply_wait_time_duration_secs", ) diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 911b4159b1c..c75ca380b78 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -15413,60 +15413,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -15718,60 +15718,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -16023,60 +16023,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -16328,60 +16328,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -16633,60 +16633,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -18098,60 +18098,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -18403,60 +18403,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -18708,60 +18708,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index c6e0ef68b99..254f78869af 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -cc2c86168805088535519a2b0872bad424fed55ab0ab9aba6ec2ff35e5a10f4a ./metrics/grafana/tikv_details.json +8f50008a4cb515602e8fe44d67cebbedd3e693d811051a223be5e08dc66eee30 ./metrics/grafana/tikv_details.json From 4702b9bf6ed8d72d20bedf3de184f63111055b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 29 Dec 2023 10:06:27 +0800 Subject: [PATCH 1081/1149] added async-backtrace for log backup (#15727) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/tikv#15759 added a new http endpoint for querying the asynchronous tasks at `/async_tasks`. Used `tracing-active-tree`: `root!` and `frame!` for tracing asynchronous tasks. see more details in the repo `tikv/tracing-active-tree`. Signed-off-by: hillium Signed-off-by: 山岚 <36239017+YuJuncen@users.noreply.github.com> --- Cargo.lock | 86 ++++++++++++++---- Cargo.toml | 6 +- Makefile | 4 + cmd/tikv-server/Cargo.toml | 3 + cmd/tikv-server/src/main.rs | 9 ++ components/backup-stream/Cargo.toml | 2 + .../backup-stream/src/checkpoint_manager.rs | 3 + components/backup-stream/src/endpoint.rs | 88 +++++++++++-------- components/backup-stream/src/event_loader.rs | 16 ++-- components/backup-stream/src/router.rs | 79 +++++++++++++---- .../backup-stream/src/subscription_manager.rs | 24 +++-- src/server/status_server/mod.rs | 13 +++ 12 files changed, 250 insertions(+), 83 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb305538ee6..436e1b9fb6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -530,6 +530,8 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", + "tracing", + "tracing-active-tree", "txn_types", "url", "uuid 0.8.2", @@ -983,6 +985,18 @@ dependencies = [ "cc", ] +[[package]] +name = "coarsetime" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71367d3385c716342014ad17e3d19f7788ae514885a1f4c24f500260fb365e1a" +dependencies = [ + "libc 0.2.146", + "once_cell", + "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + [[package]] name = "codec" version = "0.0.1" @@ -2543,6 +2557,12 @@ dependencies = [ "hashbrown 0.9.1", ] +[[package]] +name = "indextree" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c40411d0e5c63ef1323c3d09ce5ec6d84d71531e18daed0743fccea279d7deb6" + [[package]] name = "infer" version = "0.2.3" @@ -2867,12 +2887,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.14" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if 1.0.0", -] +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "log_wrappers" @@ -5162,6 +5179,15 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "0.1.1" @@ -5282,9 +5308,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.8.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" [[package]] name = "smartstring" @@ -6339,6 +6365,7 @@ dependencies = [ "tokio-openssl", "tokio-timer", "toml", + "tracing-active-tree", "tracker", "txn_types", "url", @@ -6453,6 +6480,8 @@ dependencies = [ "tikv_util", "time 0.1.42", "toml", + "tracing-active-tree", + "tracing-subscriber", ] [[package]] @@ -6767,34 +6796,59 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.25" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if 1.0.0", "pin-project-lite", "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-active-tree" +version = "0.1.0" +source = "git+https://github.com/tikv/tracing-active-tree.git?rev=a71f8f8148f88ab759deb6d3e1d62d07ab218347#a71f8f8148f88ab759deb6d3e1d62d07ab218347" +dependencies = [ + "coarsetime", + "dashmap", + "indextree", + "lazy_static", + "smallvec", + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" -version = "0.1.21" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 1.0.103", + "syn 2.0.18", ] [[package]] name = "tracing-core" -version = "0.1.17" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f50de3927f93d202783f4513cda820ab47ef17f624b03c096e86ef00c67e6b5f" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ - "lazy_static", + "once_cell", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7bf16f3e092..9235c4b07ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ openssl-vendored = [ # for testing configure propegate to other crates # https://stackoverflow.com/questions/41700543/can-we-share-test-utilites-between-crates -testing = [] +testing = [ ] [lib] name = "tikv" @@ -164,6 +164,7 @@ tokio = { version = "1.17", features = ["full"] } tokio-openssl = "0.6" tokio-timer = { workspace = true } toml = "0.5" +tracing-active-tree = { workspace = true } tracker = { workspace = true } txn_types = { workspace = true } url = "2" @@ -389,6 +390,9 @@ tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hot tokio-executor = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +tracing-active-tree = { git = "https://github.com/tikv/tracing-active-tree.git", features = ["coarsetime"], rev = "a71f8f8148f88ab759deb6d3e1d62d07ab218347" } +# This `tracing` is only used for `tracing-active-tree`, enable its attributes only. +tracing = { version = "0.1.39", default-features = false, features = [ "attributes", "std" ] } openssl = "0.10" openssl-sys = "0.9" diff --git a/Makefile b/Makefile index d7b0940fd5c..45b152bfb8f 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,10 @@ ENABLE_FEATURES += cloud-gcp ENABLE_FEATURES += cloud-azure endif +ifneq ($(NO_ASYNC_BACKTRACE),1) +ENABLE_FEATURES += trace-async-tasks +endif + export DOCKER_FILE ?= Dockerfile export DOCKER_IMAGE_NAME ?= pingcap/tikv export DOCKER_IMAGE_TAG ?= latest diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index cc99e05fb58..62211c1fcbc 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -7,6 +7,7 @@ publish = false [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] +trace-async-tasks = ["dep:tracing-active-tree", "dep:tracing-subscriber"] trace-tablet-lifetime = ["tikv/trace-tablet-lifetime"] tcmalloc = ["server/tcmalloc"] jemalloc = ["server/jemalloc"] @@ -46,6 +47,8 @@ server = { workspace = true } tikv = { workspace = true } tikv_util = { workspace = true } toml = "0.5" +tracing-active-tree = { workspace = true, optional = true } +tracing-subscriber = { version = "0.3.17", default-features = false, features = [ "registry", "smallvec" ], optional = true } [build-dependencies] cc = "1.0" diff --git a/cmd/tikv-server/src/main.rs b/cmd/tikv-server/src/main.rs index 4c1eb4fc2c5..c049fd848b4 100644 --- a/cmd/tikv-server/src/main.rs +++ b/cmd/tikv-server/src/main.rs @@ -221,6 +221,15 @@ fn main() { process::exit(1) } + // Initialize the async-backtrace. + #[cfg(feature = "trace-async-tasks")] + { + use tracing_subscriber::prelude::*; + tracing_subscriber::registry() + .with(tracing_active_tree::layer::global().clone()) + .init(); + } + // Sets the global logger ASAP. // It is okay to use the config w/o `validate()`, // because `initial_logger()` handles various conditions. diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index a91b3fb071d..d8174831792 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -71,6 +71,8 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "time", "sync"] } tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["compat"] } +tracing = { workspace = true } +tracing-active-tree = { workspace = true } txn_types = { workspace = true } uuid = "0.8" yatp = { workspace = true } diff --git a/components/backup-stream/src/checkpoint_manager.rs b/components/backup-stream/src/checkpoint_manager.rs index d32c2ea7c00..e511b104c23 100644 --- a/components/backup-stream/src/checkpoint_manager.rs +++ b/components/backup-stream/src/checkpoint_manager.rs @@ -15,6 +15,7 @@ use kvproto::{ }; use pd_client::PdClient; use tikv_util::{box_err, defer, info, time::Instant, warn, worker::Scheduler}; +use tracing::instrument; use txn_types::TimeStamp; use uuid::Uuid; @@ -83,6 +84,7 @@ impl SubscriptionManager { // NOTE: Maybe close all subscription streams here. } + #[instrument(skip_all, fields(length = events.len()))] async fn emit_events(&mut self, events: Box<[FlushEvent]>) { let mut canceled = vec![]; info!("log backup sending events"; "event_len" => %events.len(), "downstream" => %self.subscribers.len()); @@ -107,6 +109,7 @@ impl SubscriptionManager { } } + #[instrument(skip(self))] async fn remove_subscription(&mut self, id: &Uuid) { match self.subscribers.remove(id) { Some(sub) => { diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index f453469768c..74a8012bf4b 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -42,6 +42,8 @@ use tokio::{ sync::{oneshot, Semaphore}, }; use tokio_stream::StreamExt; +use tracing::instrument; +use tracing_active_tree::root; use txn_types::TimeStamp; use super::metrics::HANDLE_EVENT_DURATION_HISTOGRAM; @@ -131,14 +133,14 @@ where let meta_client_clone = meta_client.clone(); let scheduler_clone = scheduler.clone(); // TODO build a error handle mechanism #error 2 - pool.spawn(async { + pool.spawn(root!("flush_ticker"; Self::starts_flush_ticks(range_router.clone()))); + pool.spawn(root!("start_watch_tasks"; async { if let Err(err) = Self::start_and_watch_tasks(meta_client_clone, scheduler_clone).await { err.report("failed to start watch tasks"); } - }); - - pool.spawn(Self::starts_flush_ticks(range_router.clone())); + info!("started task watcher!"); + })); let initial_scan_memory_quota = Arc::new(MemoryQuota::new( config.initial_scan_pending_memory_quota.0 as _, @@ -173,9 +175,9 @@ where ((config.num_threads + 1) / 2).max(1), resolver, ); - pool.spawn(op_loop); + pool.spawn(root!(op_loop)); let mut checkpoint_mgr = CheckpointManager::default(); - pool.spawn(checkpoint_mgr.spawn_subscription_mgr()); + pool.spawn(root!(checkpoint_mgr.spawn_subscription_mgr())); let ep = Endpoint { initial_scan_semaphore, meta_client, @@ -195,7 +197,7 @@ where checkpoint_mgr, abort_last_storage_save: None, }; - ep.pool.spawn(ep.min_ts_worker()); + ep.pool.spawn(root!(ep.min_ts_worker())); ep } } @@ -221,28 +223,28 @@ where let safepoint_ttl = self.pause_guard_duration(); let code = err.error_code().code.to_owned(); let msg = err.to_string(); - let task = task.to_owned(); - async move { + let t = task.to_owned(); + let f = async move { let err_fut = async { - let safepoint = meta_cli.global_progress_of_task(&task).await?; + let safepoint = meta_cli.global_progress_of_task(&t).await?; pdc.update_service_safe_point( safepoint_name, TimeStamp::new(safepoint.saturating_sub(1)), safepoint_ttl, ) .await?; - meta_cli.pause(&task).await?; + meta_cli.pause(&t).await?; let mut last_error = StreamBackupError::new(); last_error.set_error_code(code); last_error.set_error_message(msg.clone()); last_error.set_store_id(store_id); last_error.set_happen_at(TimeStamp::physical_now()); - meta_cli.report_last_error(&task, last_error).await?; + meta_cli.report_last_error(&t, last_error).await?; Result::Ok(()) }; if let Err(err_report) = err_fut.await { err_report.report(format_args!("failed to upload error {}", err_report)); - let name = task.to_owned(); + let name = t.to_owned(); // Let's retry reporting after 5s. tokio::task::spawn(async move { tokio::time::sleep(Duration::from_secs(5)).await; @@ -255,7 +257,8 @@ where ); }); } - } + }; + tracing_active_tree::frame!("on_fatal_error_of_task"; f; %err, %task) } fn on_fatal_error(&self, select: TaskSelector, err: Box) { @@ -283,6 +286,7 @@ where } // TODO find a proper way to exit watch tasks + #[instrument(skip_all)] async fn start_and_watch_tasks( meta_client: MetadataClient, scheduler: Scheduler, @@ -318,19 +322,19 @@ where let meta_client_clone = meta_client.clone(); let scheduler_clone = scheduler.clone(); - Handle::current().spawn(async move { + Handle::current().spawn(root!("task_watcher"; async move { if let Err(err) = Self::starts_watch_task(meta_client_clone, scheduler_clone, revision).await { err.report("failed to start watch tasks"); } - }); + })); - Handle::current().spawn(async move { + Handle::current().spawn(root!("pause_watcher"; async move { if let Err(err) = Self::starts_watch_pause(meta_client, scheduler, revision).await { err.report("failed to start watch pause"); } - }); + })); Ok(()) } @@ -478,7 +482,14 @@ where let router = self.range_router.clone(); let sched = self.scheduler.clone(); let subs = self.subs.clone(); - self.pool.spawn(async move { + let region = batch.region_id; + let from_idx = batch.cmds.first().map(|c| c.index).unwrap_or(0); + let (to_idx, term) = batch + .cmds + .last() + .map(|c| (c.index, c.term)) + .unwrap_or((0, 0)); + self.pool.spawn(root!("backup_batch"; async move { let region_id = batch.region_id; let kvs = Self::record_batch(subs, batch); if kvs.as_ref().map(|x| x.is_empty()).unwrap_or(true) { @@ -504,7 +515,7 @@ where .with_label_values(&["save_to_temp_file"]) .observe(time_cost); drop(work) - }); + }; from_idx, to_idx, region, current_term = term)); } pub fn handle_watch_task(&self, op: TaskOp) { @@ -626,7 +637,7 @@ where let task_name = task.info.get_name().to_owned(); // clean the safepoint created at pause(if there is) - self.pool.spawn( + self.pool.spawn(root!("load_initial_task"; self.pd_client .update_service_safe_point( self.pause_guard_id_for_task(task.info.get_name()), @@ -635,8 +646,8 @@ where ) .map(|r| { r.map_err(|err| Error::from(err).report("removing safe point for pausing")) - }), - ); + }) + )); self.pool.block_on(async move { let task_clone = task.clone(); let run = async move { @@ -703,12 +714,12 @@ where Err(err) => { err.report(format!("failed to resume backup stream task {}", task_name)); let sched = self.scheduler.clone(); - tokio::task::spawn(async move { + tokio::task::spawn(root!("retry_resume"; async move { tokio::time::sleep(Duration::from_secs(5)).await; sched .schedule(Task::WatchTask(TaskOp::ResumeTask(task_name))) .unwrap(); - }); + })); } } } @@ -822,11 +833,12 @@ where } fn on_flush_with_min_ts(&self, task: String, min_ts: TimeStamp) { - self.pool.spawn(self.do_flush(task, min_ts).map(|r| { - if let Err(err) = r { - err.report("during updating flush status") - } - })); + self.pool + .spawn(root!("flush"; self.do_flush(task, min_ts).map(|r| { + if let Err(err) = r { + err.report("during updating flush status") + } + }); min_ts = min_ts.into_inner())); } fn update_global_checkpoint(&self, task: String) -> future![()] { @@ -891,7 +903,7 @@ where handle.abort(); } let (fut, handle) = futures::future::abortable(self.update_global_checkpoint(task)); - self.pool.spawn(fut); + self.pool.spawn(root!("update_global_checkpoint"; fut)); self.abort_last_storage_save = Some(handle); } @@ -919,12 +931,12 @@ where use std::cmp::Ordering::*; match diff.cmp(&0) { Less => { - self.pool.spawn( + self.pool.spawn(root!( Arc::clone(sema) .acquire_many_owned(-diff as _) // It is OK to trivially ignore the Error case (semaphore has been closed, we are shutting down the server.) - .map_ok(|p| p.forget()), - ); + .map_ok(|p| p.forget()) + )); } Equal => {} Greater => { @@ -956,10 +968,10 @@ where cb() } else { let sched = self.scheduler.clone(); - self.pool.spawn(async move { + self.pool.spawn(root!(async move { tokio::time::sleep(Duration::from_millis(500)).await; sched.schedule(Task::Sync(cb, cond)).unwrap(); - }); + })); } } Task::MarkFailover(t) => self.failover_time = Some(t), @@ -1015,11 +1027,11 @@ where } RegionCheckpointOperation::Subscribe(sub) => { let fut = self.checkpoint_mgr.add_subscriber(sub); - self.pool.spawn(async move { + self.pool.spawn(root!(async move { if let Err(err) = fut.await { err.report("adding subscription"); } - }); + })); } RegionCheckpointOperation::PrepareMinTsForResolve => { if self.observer.is_hibernating() { diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index 0a957ea87ed..c78c2c53a19 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -22,6 +22,8 @@ use tikv_util::{ worker::Scheduler, }; use tokio::sync::Semaphore; +use tracing::instrument; +use tracing_active_tree::frame; use txn_types::{Key, Lock, TimeStamp}; use crate::{ @@ -224,6 +226,7 @@ where } } + #[instrument(skip_all)] pub async fn capture_change( &self, region: &Region, @@ -276,6 +279,7 @@ where Ok(snap) } + #[instrument(skip_all)] pub async fn observe_over_with_retry( &self, region: &Region, @@ -373,6 +377,7 @@ where f(v.value_mut().resolver()) } + #[instrument(skip_all)] async fn scan_and_async_send( &self, region: &Region, @@ -430,6 +435,7 @@ where } } + #[instrument(skip_all)] pub async fn do_initial_scan( &self, region: &Region, @@ -438,16 +444,14 @@ where start_ts: TimeStamp, snap: impl Snapshot, ) -> Result { - let tr = self.tracing.clone(); let region_id = region.get_id(); let mut join_handles = Vec::with_capacity(8); - let permit = self - .concurrency_limit - .acquire() + let permit = frame!(self.concurrency_limit.acquire()) .await .expect("BUG: semaphore closed"); + // It is ok to sink more data than needed. So scan to +inf TS for convenance. let event_loader = EventLoader::load_from(snap, start_ts, TimeStamp::max(), region)?; let stats = self @@ -455,11 +459,11 @@ where .await?; drop(permit); - futures::future::try_join_all(join_handles) + frame!(futures::future::try_join_all(join_handles)) .await .map_err(|err| annotate!(err, "tokio runtime failed to join consuming threads"))?; - Self::with_resolver_by(&tr, region, &handle, |r| { + self.with_resolver(region, &handle, |r| { r.phase_one_done(); Ok(()) }) diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 00ce93635e8..31166cbe384 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -44,6 +44,8 @@ use tokio::{ sync::{Mutex, RwLock}, }; use tokio_util::compat::TokioAsyncReadCompatExt; +use tracing::instrument; +use tracing_active_tree::frame; use txn_types::{Key, Lock, TimeStamp, WriteRef}; use super::errors::Result; @@ -61,7 +63,7 @@ use crate::{ const FLUSH_FAILURE_BECOME_FATAL_THRESHOLD: usize = 30; -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum TaskSelector { ByName(String), ByKey(Vec), @@ -69,6 +71,12 @@ pub enum TaskSelector { All, } +impl std::fmt::Debug for TaskSelector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.reference().fmt(f) + } +} + impl TaskSelector { pub fn reference(&self) -> TaskSelectorRef<'_> { match self { @@ -80,7 +88,7 @@ impl TaskSelector { } } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy)] pub enum TaskSelectorRef<'a> { ByName(&'a str), ByKey(&'a [u8]), @@ -88,6 +96,24 @@ pub enum TaskSelectorRef<'a> { All, } +impl<'a> std::fmt::Debug for TaskSelectorRef<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ByName(name) => f.debug_tuple("ByName").field(name).finish(), + Self::ByKey(key) => f + .debug_tuple("ByKey") + .field(&format_args!("{}", utils::redact(key))) + .finish(), + Self::ByRange(start, end) => f + .debug_tuple("ByRange") + .field(&format_args!("{}", utils::redact(start))) + .field(&format_args!("{}", utils::redact(end))) + .finish(), + Self::All => write!(f, "All"), + } + } +} + impl<'a> TaskSelectorRef<'a> { fn matches<'c, 'd>( self, @@ -451,8 +477,7 @@ impl RouterInner { let cfg = self.tempfile_config_for_task(&task); let stream_task = StreamTaskInfo::new(task, ranges.clone(), merged_file_size_limit, cfg).await?; - self.tasks - .lock() + frame!(self.tasks.lock()) .await .insert(task_name.clone(), Arc::new(stream_task)); @@ -479,7 +504,7 @@ impl RouterInner { } pub async fn unregister_task(&self, task_name: &str) -> Option { - self.tasks.lock().await.remove(task_name).map(|t| { + frame!(self.tasks.lock()).await.remove(task_name).map(|t| { info!( "backup stream unregister task"; "task" => task_name, @@ -495,8 +520,9 @@ impl RouterInner { r.get_value_by_point(key).cloned() } + #[instrument(skip(self))] pub async fn select_task(&self, selector: TaskSelectorRef<'_>) -> Vec { - let s = self.tasks.lock().await; + let s = frame!(self.tasks.lock()).await; s.iter() .filter(|(name, info)| { selector.matches( @@ -522,8 +548,9 @@ impl RouterInner { tasks.insert(task_name.to_owned(), Arc::new(raw)); } + #[instrument(skip(self))] pub async fn get_task_info(&self, task_name: &str) -> Result> { - let task_info = match self.tasks.lock().await.get(task_name) { + let task_info = match frame!(self.tasks.lock()).await.get(task_name) { Some(t) => t.clone(), None => { info!("backup stream no task"; "task" => ?task_name); @@ -535,6 +562,7 @@ impl RouterInner { Ok(task_info) } + #[instrument(skip_all, fields(task))] async fn on_event(&self, task: String, events: ApplyEvents) -> Result<()> { let task_info = self.get_task_info(&task).await?; task_info.on_events(events).await?; @@ -583,6 +611,7 @@ impl RouterInner { /// flush the specified task, once once success, return the min resolved ts /// of this flush. returns `None` if failed. + #[instrument(skip(self, resolve_to))] pub async fn do_flush( &self, task_name: &str, @@ -619,6 +648,7 @@ impl RouterInner { } } + #[instrument(skip(self))] pub async fn update_global_checkpoint( &self, task_name: &str, @@ -632,6 +662,7 @@ impl RouterInner { } /// tick aims to flush log/meta to extern storage periodically. + #[instrument(skip_all)] pub async fn tick(&self) { let max_flush_interval = self.max_flush_interval.rl().to_owned(); @@ -896,16 +927,19 @@ impl StreamTaskInfo { }) } + #[instrument(skip(self, events), fields(event_len = events.len()))] async fn on_events_of_key(&self, key: TempFileKey, events: ApplyEvents) -> Result<()> { fail::fail_point!("before_generate_temp_file"); - if let Some(f) = self.files.read().await.get(&key) { - self.total_size - .fetch_add(f.lock().await.on_events(events).await?, Ordering::SeqCst); + if let Some(f) = frame!(self.files.read()).await.get(&key) { + self.total_size.fetch_add( + frame!(f.lock()).await.on_events(events).await?, + Ordering::SeqCst, + ); return Ok(()); } // slow path: try to insert the element. - let mut w = self.files.write().await; + let mut w = frame!(self.files.write()).await; // double check before insert. there may be someone already insert that // when we are waiting for the write lock. // silence the lint advising us to use the `Entry` API which may introduce @@ -918,14 +952,17 @@ impl StreamTaskInfo { } let f = w.get(&key).unwrap(); - self.total_size - .fetch_add(f.lock().await.on_events(events).await?, Ordering::SeqCst); + self.total_size.fetch_add( + frame!(f.lock()).await.on_events(events).await?, + Ordering::SeqCst, + ); fail::fail_point!("after_write_to_file"); Ok(()) } /// Append a event to the files. This wouldn't trigger `fsync` syscall. /// i.e. No guarantee of persistence. + #[instrument(skip_all)] pub async fn on_events(&self, kv: ApplyEvents) -> Result<()> { use futures::FutureExt; let now = Instant::now_coarse(); @@ -951,6 +988,7 @@ impl StreamTaskInfo { } /// Flush all template files and generate corresponding metadata. + #[instrument(skip_all)] pub async fn generate_metadata(&self, store_id: u64) -> Result { let mut w = self.flushing_files.write().await; let mut wm = self.flushing_meta_files.write().await; @@ -1000,6 +1038,7 @@ impl StreamTaskInfo { } /// move need-flushing files to flushing_files. + #[instrument(skip_all)] pub async fn move_to_flushing_files(&self) -> Result<&Self> { // if flushing_files is not empty, which represents this flush is a retry // operation. @@ -1009,9 +1048,9 @@ impl StreamTaskInfo { return Ok(self); } - let mut w = self.files.write().await; - let mut fw = self.flushing_files.write().await; - let mut fw_meta = self.flushing_meta_files.write().await; + let mut w = frame!(self.files.write()).await; + let mut fw = frame!(self.flushing_files.write()).await; + let mut fw_meta = frame!(self.flushing_meta_files.write()).await; for (k, v) in w.drain() { // we should generate file metadata(calculate sha256) when moving file. // because sha256 calculation is a unsafe move operation. @@ -1028,6 +1067,7 @@ impl StreamTaskInfo { Ok(self) } + #[instrument(skip_all)] pub async fn clear_flushing_files(&self) { for (_, data_file, _) in self.flushing_files.write().await.drain(..) { debug!("removing data file"; "size" => %data_file.file_size, "name" => %data_file.inner.path().display()); @@ -1047,6 +1087,7 @@ impl StreamTaskInfo { } } + #[instrument(skip_all)] async fn merge_and_flush_log_files_to( storage: Arc, files: &mut [(TempFileKey, DataFile, DataFileInfo)], @@ -1131,6 +1172,7 @@ impl StreamTaskInfo { Ok(()) } + #[instrument(skip_all)] pub async fn flush_log(&self, metadata: &mut MetadataInfo) -> Result<()> { let storage = self.storage.clone(); self.merge_log(metadata, storage.clone(), &self.flushing_files, false) @@ -1140,6 +1182,7 @@ impl StreamTaskInfo { Ok(()) } + #[instrument(skip_all)] async fn merge_log( &self, metadata: &mut MetadataInfo, @@ -1184,6 +1227,7 @@ impl StreamTaskInfo { Ok(()) } + #[instrument(skip_all)] pub async fn flush_meta(&self, metadata_info: MetadataInfo) -> Result<()> { if !metadata_info.file_groups.is_empty() { let meta_path = metadata_info.path_to_meta(); @@ -1212,6 +1256,7 @@ impl StreamTaskInfo { /// The caller can try to advance the resolved ts and provide it to the /// function, and we would use `max(resolved_ts_provided, /// resolved_ts_from_file)`. + #[instrument(skip_all)] pub async fn do_flush( &self, store_id: u64, @@ -1301,6 +1346,7 @@ impl StreamTaskInfo { Ok(()) } + #[instrument(skip_all)] pub async fn update_global_checkpoint( &self, global_checkpoint: u64, @@ -1435,6 +1481,7 @@ impl DataFile { } /// Add a new KV pair to the file, returning its size. + #[instrument(skip_all)] async fn on_events(&mut self, events: ApplyEvents) -> Result { let now = Instant::now_coarse(); let mut total_size = 0; diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 7aeecb775cc..88eb5dea6ec 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -18,6 +18,8 @@ use tikv_util::{ box_err, debug, info, sys::thread::ThreadBuildWrapper, time::Instant, warn, worker::Scheduler, }; use tokio::sync::mpsc::{channel, error::SendError, Receiver, Sender}; +use tracing::instrument; +use tracing_active_tree::root; use txn_types::TimeStamp; use crate::{ @@ -176,6 +178,7 @@ where impl ScanCmd { /// execute the initial scanning via the specificated [`InitialDataLoader`]. + #[instrument(skip_all)] async fn exec_by(&self, initial_scan: impl InitialScan) -> Result<()> { let Self { region, @@ -195,6 +198,7 @@ impl ScanCmd { } /// execute the command, when meeting error, retrying. + #[instrument(skip_all)] async fn exec_by_with_retry(self, init: impl InitialScan) { let mut retry_time = INITIAL_SCAN_FAILURE_MAX_RETRY_TIME; loop { @@ -232,7 +236,9 @@ async fn scan_executor_loop(init: impl InitialScan, mut cmds: Receiver) } let init = init.clone(); - tokio::task::spawn(async move { + let id = cmd.region.id; + let handle_id = cmd.handle.id; + tokio::task::spawn(root!("exec_initial_scan"; async move { metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["executing"]) .inc(); @@ -240,7 +246,7 @@ async fn scan_executor_loop(init: impl InitialScan, mut cmds: Receiver) metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["executing"]) .dec(); - }); + }; region = id, handle = ?handle_id)); } } @@ -251,9 +257,9 @@ fn spawn_executors( ) -> ScanPoolHandle { let (tx, rx) = tokio::sync::mpsc::channel(MESSAGE_BUFFER_SIZE); let pool = create_scan_pool(number); - pool.spawn(async move { + pool.spawn(root!("scan_executor_loop"; async move { scan_executor_loop(init, rx).await; - }); + })); ScanPoolHandle { tx, _pool: pool } } @@ -397,6 +403,7 @@ where } /// the handler loop. + #[instrument(skip_all)] async fn region_operator_loop( self, mut message_box: Receiver, @@ -532,6 +539,7 @@ where } } + #[instrument(skip_all)] async fn try_start_observe(&self, region: &Region, handle: ObserveHandle) -> Result<()> { match self.find_task_by_region(region) { None => { @@ -559,6 +567,7 @@ where Ok(()) } + #[instrument(skip_all)] async fn start_observe(&self, region: Region) { self.start_observe_with_failure_count(region, 0).await } @@ -569,7 +578,7 @@ where self.subs.add_pending_region(®ion); if let Err(err) = self.try_start_observe(®ion, handle.clone()).await { warn!("failed to start observe, would retry"; "err" => %err, utils::slog_region(®ion)); - tokio::spawn(async move { + tokio::spawn(root!("retry_start_observe"; async move { #[cfg(not(feature = "failpoints"))] let delay = backoff_for_start_observe(has_failed_for); #[cfg(feature = "failpoints")] @@ -593,7 +602,7 @@ where has_failed_for: has_failed_for + 1 }) ) - }); + })); } } @@ -668,6 +677,7 @@ where Ok(()) } + #[instrument(skip_all)] async fn get_last_checkpoint_of(&self, task: &str, region: &Region) -> Result { fail::fail_point!("get_last_checkpoint_of", |hint| Err(Error::Other( box_err!( @@ -688,6 +698,7 @@ where Ok(cp.ts) } + #[instrument(skip_all)] async fn spawn_scan(&self, cmd: ScanCmd) { // we should not spawn initial scanning tasks to the tokio blocking pool // because it is also used for converting sync File I/O to async. (for now!) @@ -702,6 +713,7 @@ where } } + #[instrument(skip_all)] async fn observe_over_with_initial_data_from_checkpoint( &self, region: &Region, diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 90c966d13e2..36402a3e5dc 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -62,6 +62,7 @@ use tokio::{ sync::oneshot::{self, Receiver, Sender}, }; use tokio_openssl::SslStream; +use tracing_active_tree::tree::formating::FormatFlat; use crate::{ config::{ConfigController, LogLevel}, @@ -458,6 +459,17 @@ impl StatusServer where R: 'static + Send + RaftExtension + Clone, { + async fn dump_async_trace() -> hyper::Result> { + Ok(make_response( + StatusCode::OK, + tracing_active_tree::layer::global().fmt_bytes_with(|t, buf| { + t.traverse_with(FormatFlat::new(buf)).unwrap_or_else(|err| { + error!("failed to format tree, unreachable!"; "err" => %err); + }) + }), + )) + } + async fn handle_pause_grpc( mut grpc_service_mgr: GrpcServiceManager, ) -> hyper::Result> { @@ -722,6 +734,7 @@ where (Method::PUT, "/resume_grpc") => { Self::handle_resume_grpc(grpc_service_mgr).await } + (Method::GET, "/async_tasks") => Self::dump_async_trace().await, _ => { is_unknown_path = true; Ok(make_response(StatusCode::NOT_FOUND, "path not found")) From 1a73761cc93b4914a3db0c1cb545f4b824372733 Mon Sep 17 00:00:00 2001 From: Shirly Date: Tue, 2 Jan 2024 15:33:32 +0800 Subject: [PATCH 1082/1149] raftstore/*: Print region information when overlap is detected. (#16241) close tikv/tikv#16240 Signed-off-by: Shirly --- components/raftstore/src/store/fsm/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 9c3274d7945..c42cdb66764 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -2333,7 +2333,7 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER break; } - debug!( + info!( "msg is overlapped with exist region"; "region_id" => region_id, "msg" => ?msg, From 369f7be1a7a64cf5dccbef40e2aef0381b01d516 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Wed, 3 Jan 2024 10:48:31 +0800 Subject: [PATCH 1083/1149] server: fix panic of gRPC threads due to thread group properties not set (#16258) close tikv/tikv#16236 Set thread group properties for gRPC threads to avoid panic when checking "is_shutdown". Signed-off-by: Ping Yu Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/server/src/server.rs | 5 ++++- components/server/src/server2.rs | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 594eac686fe..6fb1963bbfb 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -325,11 +325,14 @@ where SecurityManager::new(&config.security) .unwrap_or_else(|e| fatal!("failed to create security manager: {}", e)), ); + let props = tikv_util::thread_group::current_properties(); let env = Arc::new( EnvBuilder::new() .cq_count(config.server.grpc_concurrency) .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) - .after_start(|| { + .after_start(move || { + tikv_util::thread_group::set_properties(props.clone()); + // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. unsafe { add_thread_memory_accessor() }; }) diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 38f5e94038f..238b2f4982a 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -277,11 +277,14 @@ where SecurityManager::new(&config.security) .unwrap_or_else(|e| fatal!("failed to create security manager: {}", e)), ); + let props = tikv_util::thread_group::current_properties(); let env = Arc::new( EnvBuilder::new() .cq_count(config.server.grpc_concurrency) .name_prefix(thd_name!(GRPC_THREAD_PREFIX)) - .after_start(|| { + .after_start(move || { + tikv_util::thread_group::set_properties(props.clone()); + // SAFETY: we will call `remove_thread_memory_accessor` at before_stop. unsafe { add_thread_memory_accessor() }; }) From 8ba0dacb7326d06b01fe392ed3522d70da796b73 Mon Sep 17 00:00:00 2001 From: "TONG, Zhigao" Date: Wed, 3 Jan 2024 11:26:02 +0800 Subject: [PATCH 1084/1149] executor: omit truncating error when handling decimal multiplication in arithmetic operators (#16187) close tikv/tikv#16268, close pingcap/tidb#48332 omit truncating error when handling decimal multiplication in arithmetic operators Signed-off-by: TONG, Zhigao Co-authored-by: Liqi Geng --- .../src/codec/mysql/decimal.rs | 69 ++++++++++++++----- .../tidb_query_expr/src/impl_arithmetic.rs | 16 ++++- 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index bc18d7192f9..3a2be14758e 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -806,6 +806,9 @@ fn do_mul(lhs: &Decimal, rhs: &Decimal) -> Res { i32::from(word_cnt!(rhs.int_cnt)), i32::from(word_cnt!(rhs.frac_cnt)), ); + + let old_r_int_word_cnt = r_int_word_cnt; + let (int_word_to, frac_word_to) = ( word_cnt!(lhs.int_cnt + rhs.int_cnt) as usize, l_frac_word_cnt + r_frac_word_cnt, @@ -831,7 +834,7 @@ fn do_mul(lhs: &Decimal, rhs: &Decimal) -> Res { l_frac_word_cnt = 0; r_frac_word_cnt = 0; } else { - old_frac_word_to -= int_word_to as i32; + old_frac_word_to -= frac_word_to as i32; old_int_word_to = old_frac_word_to / 2; if l_frac_word_cnt <= r_frac_word_cnt { l_frac_word_cnt -= old_int_word_to; @@ -843,41 +846,43 @@ fn do_mul(lhs: &Decimal, rhs: &Decimal) -> Res { } } - let mut start_to = int_word_to + frac_word_to; - let (offset_min, offset_max) = (0, i32::from(WORD_BUF_LEN)); - let r_start = num::clamp(r_int_word_cnt + r_frac_word_cnt, offset_min, offset_max) as usize; - let left_stop = num::clamp(l_int_word_cnt + l_frac_word_cnt, offset_min, offset_max) as usize; - for l_idx in (0..left_stop).rev() { - if start_to < r_start { - break; - } + let mut start_to = (int_word_to + frac_word_to - 1) as isize; + let r_start = old_r_int_word_cnt + r_frac_word_cnt - 1; + let r_stop = old_r_int_word_cnt - r_int_word_cnt; + let mut l_idx = l_int_word_cnt + l_frac_word_cnt - 1; + + while l_idx >= 0 { let (mut carry, mut idx_to) = (0, start_to); - start_to -= 1; - for r_idx in (0..r_start).rev() { - idx_to -= 1; - let p = u64::from(lhs.word_buf[l_idx]) * u64::from(rhs.word_buf[r_idx]); + let mut r_idx = r_start; + while r_idx >= r_stop { + let p = + u64::from(lhs.word_buf[l_idx as usize]) * u64::from(rhs.word_buf[r_idx as usize]); let hi = p / u64::from(WORD_BASE); let lo = p - hi * u64::from(WORD_BASE); add( - dec.word_buf[idx_to], + dec.word_buf[idx_to as usize], lo as u32, &mut carry, - &mut dec.word_buf[idx_to], + &mut dec.word_buf[idx_to as usize], ); carry += hi as u32; + r_idx -= 1; + idx_to -= 1; } while carry > 0 { - if idx_to == 0 { + if idx_to < 0 { return Res::Overflow(dec); } - idx_to -= 1; add( - dec.word_buf[idx_to], + dec.word_buf[idx_to as usize], 0, &mut carry, - &mut dec.word_buf[idx_to], + &mut dec.word_buf[idx_to as usize], ); + idx_to -= 1; } + l_idx -= 1; + start_to -= 1; } // Now we have to check for -0.000 case @@ -3356,6 +3361,32 @@ mod tests { } } + #[test] + fn test_mul_truncated() { + let cases = vec![( + "999999999999999999999999999999999.9999", + "766507373740683764182618847769240.9770", + Res::Truncated( + "766507373740683764182618847769239999923349262625931623581738115223.07600000", + ), + Res::Truncated( + "766507373740683764182618847769240210492626259316235817381152230759.02300000", + ), + )]; + + for (lhs_str, rhs_str, exp_str, rev_exp_str) in cases { + let lhs: Decimal = lhs_str.parse().unwrap(); + let rhs: Decimal = rhs_str.parse().unwrap(); + let exp = exp_str.map(|s| s.to_owned()); + let res = (&lhs * &rhs).map(|d| d.to_string()); + assert_eq!(res, exp); + + let exp = rev_exp_str.map(|s| s.to_owned()); + let res = (&rhs * &lhs).map(|d| d.to_string()); + assert_eq!(res, exp); + } + } + #[test] fn test_div_mod() { let cases = vec![ diff --git a/components/tidb_query_expr/src/impl_arithmetic.rs b/components/tidb_query_expr/src/impl_arithmetic.rs index 5960e69c2cd..0e7284f457e 100644 --- a/components/tidb_query_expr/src/impl_arithmetic.rs +++ b/components/tidb_query_expr/src/impl_arithmetic.rs @@ -317,7 +317,12 @@ impl ArithmeticOp for DecimalMultiply { type T = Decimal; fn calc(lhs: &Decimal, rhs: &Decimal) -> Result> { - let res: codec::Result = (lhs * rhs).into(); + let res: codec::Result = match lhs * rhs { + codec::mysql::Res::Ok(t) => Ok(t), + codec::mysql::Res::Truncated(t) => Ok(t), + other => other.into(), + }; + Ok(Some(res?)) } } @@ -872,7 +877,14 @@ mod tests { #[test] fn test_multiply_decimal() { - let test_cases = vec![("1.1", "2.2", "2.42")]; + let test_cases = vec![ + ("1.1", "2.2", "2.42"), + ( + "999999999999999999999999999999999.9999", + "766507373740683764182618847769240.9770", + "766507373740683764182618847769239999923349262625931623581738115223.07600000", + ), + ]; for (lhs, rhs, expected) in test_cases { let expected: Option = expected.parse().ok(); let output = RpnFnScalarEvaluator::new() From defc9338fd9e0539db86e0998e4e889334cc94d4 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Wed, 3 Jan 2024 12:24:32 +0800 Subject: [PATCH 1085/1149] raftstore: fix load base split cannot works in pure follower/stale read scenario (#16261) close tikv/tikv#15539 raftstore: fix load base split cannot works in pure follower/stale read scenario - allow split command proposal on non leader peer Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/fsm/peer.rs | 4 + components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/peer.rs | 7 +- components/raftstore/src/store/worker/pd.rs | 21 ++++- .../src/store/worker/split_controller.rs | 3 +- .../raftstore/test_split_region.rs | 86 +++++++++++++++++++ 6 files changed, 117 insertions(+), 5 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 7c33bf66b87..7fd71022343 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5321,10 +5321,14 @@ where let allow_replica_read = read_only && msg.get_header().get_replica_read(); let flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); let allow_stale_read = read_only && flags.contains(WriteBatchFlags::STALE_READ); + let split_region = msg.has_admin_request() + && msg.get_admin_request().get_cmd_type() == AdminCmdType::BatchSplit; if !self.fsm.peer.is_leader() && !is_read_index_request && !allow_replica_read && !allow_stale_read + // allow proposal split command at non-leader, raft layer will forward it to leader. + && !split_region { self.ctx.raft_metrics.invalid_proposal.not_leader.inc(); let leader = self.fsm.peer.get_peer_from_cache(leader_id); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 908b650469c..911cf4646a4 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -193,6 +193,7 @@ make_static_metric! { conf_change, batch, dropped_read_index, + non_leader_split, } pub label_enum RaftInvalidProposal { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 904d35fec2f..8417766fc22 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4342,7 +4342,12 @@ where } match req.get_admin_request().get_cmd_type() { - AdminCmdType::Split | AdminCmdType::BatchSplit => ctx.insert(ProposalContext::SPLIT), + AdminCmdType::Split | AdminCmdType::BatchSplit => { + ctx.insert(ProposalContext::SPLIT); + if !self.is_leader() { + poll_ctx.raft_metrics.propose.non_leader_split.inc(); + } + } AdminCmdType::PrepareMerge => { self.pre_propose_prepare_merge(poll_ctx, req)?; ctx.insert(ProposalContext::PREPARE_MERGE); diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 71ab6a9e2a9..d082d0c2e58 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -469,6 +469,14 @@ where const DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL: Duration = Duration::from_secs(1); const DEFAULT_COLLECT_TICK_INTERVAL: Duration = Duration::from_secs(1); +fn default_load_base_split_check_interval() -> Duration { + fail_point!("mock_load_base_split_check_interval", |t| { + let t = t.unwrap().parse::().unwrap(); + Duration::from_millis(t) + }); + DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL +} + fn default_collect_tick_interval() -> Duration { fail_point!("mock_collect_tick_interval", |_| { Duration::from_millis(1) @@ -594,7 +602,7 @@ where cpu_stats_sender: None, collect_store_infos_interval: interval, load_base_split_check_interval: cmp::min( - DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL, + default_load_base_split_check_interval(), interval, ), // Use `inspect_latency_interval` as the minimal limitation for collecting tick. @@ -2130,8 +2138,15 @@ where let f = async move { for split_info in split_infos { - let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await else { continue }; + let Ok(Some((region, leader))) = + pd_client.get_region_leader_by_id(split_info.region_id).await else { continue }; + if leader.get_id() != split_info.peer.get_id() { + info!("load base split region on non-leader"; + "region_id" => region.get_id(), + "peer_id" => split_info.peer.get_id(), + "leader_id" => leader.get_id(), + ); + } // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::handle_ask_batch_split( diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 4bbcc773763..185d331bb6b 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -285,7 +285,7 @@ impl Recorder { } fn update_peer(&mut self, peer: &Peer) { - if self.peer != *peer { + if self.peer != *peer && peer.get_id() != 0 { self.peer = peer.clone(); } } @@ -845,6 +845,7 @@ impl AutoSplitController { "qps" => qps, "byte" => byte, "cpu_usage" => cpu_usage, + "peer" => ?recorder.peer, ); self.recorders.remove(®ion_id); } else if is_unified_read_pool_busy && is_region_busy { diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 1dd5e7db6d0..b54af465852 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -9,11 +9,14 @@ use std::{ use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_DEFAULT, CF_WRITE}; +use grpcio::{ChannelBuilder, Environment}; use keys::data_key; use kvproto::{ + kvrpcpb::{Context, Op}, metapb, pdpb, raft_cmdpb::*, raft_serverpb::{ExtraMessageType, RaftMessage}, + tikvpb_grpc::TikvClient, }; use pd_client::PdClient; use raft::eraftpb::MessageType; @@ -238,6 +241,89 @@ fn test_auto_split_region() { assert!(resp.get_header().get_error().has_key_not_in_region()); } +#[test_case(test_raftstore::new_server_cluster)] +fn test_load_base_auto_split_with_follower_read() { + fail::cfg("mock_tick_interval", "return(0)").unwrap(); + fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); + fail::cfg("mock_load_base_split_check_interval", "return(100)").unwrap(); + fail::cfg("mock_region_is_busy", "return(0)").unwrap(); + fail::cfg("mock_unified_read_pool_is_busy", "return(0)").unwrap(); + let count = 2; + let mut cluster = new_cluster(0, count); + cluster.cfg.split.qps_threshold = Some(10); + cluster.cfg.split.byte_threshold = Some(1); + cluster.cfg.split.sample_threshold = 10; + cluster.cfg.split.detect_times = 2; + cluster.cfg.split.split_balance_score = 0.5; + cluster.run(); + let pd_client = Arc::clone(&cluster.pd_client); + let target = pd_client.get_region(b"").unwrap(); + let leader = cluster.leader_of_region(target.get_id()).unwrap(); + let follower = target + .get_peers() + .iter() + .find(|p| p.get_id() != leader.get_id()) + .unwrap() + .clone(); + + let env: Arc = Arc::new(Environment::new(1)); + let new_client = |peer: metapb::Peer| { + let cli = TikvClient::new( + ChannelBuilder::new(env.clone()) + .connect(&cluster.sim.rl().get_addr(peer.get_store_id())), + ); + let epoch = cluster.get_region_epoch(target.get_id()); + let mut ctx = Context::default(); + ctx.set_region_id(target.get_id()); + ctx.set_peer(peer); + ctx.set_region_epoch(epoch); + PeerClient { cli, ctx } + }; + let mut region1 = pd_client.get_region(b"k1").unwrap(); + let mut region2 = pd_client.get_region(b"k3").unwrap(); + assert_eq!(region1.get_id(), region2.get_id()); + + let leader_client = new_client(leader); + let commit_ts1 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"k1"[..], &b"v1"[..])], + b"k1".to_vec(), + ); + let commit_ts2 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"k2"[..], &b"v2"[..])], + b"k2".to_vec(), + ); + let commit_ts3 = leader_client.must_kv_write( + &pd_client, + vec![new_mutation(Op::Put, &b"k3"[..], &b"v3"[..])], + b"k3".to_vec(), + ); + let mut follower_client = new_client(follower); + follower_client.ctx.set_replica_read(true); + for i in 0..100 { + follower_client.kv_read(b"k1".to_vec(), commit_ts1 + i); + follower_client.kv_read(b"k2".to_vec(), commit_ts2 + i); + follower_client.kv_read(b"k3".to_vec(), commit_ts3 + i); + } + thread::sleep(Duration::from_millis(100)); + follower_client.kv_read(b"k3".to_vec(), commit_ts3); + for _ in 1..250 { + region1 = pd_client.get_region(b"k0").unwrap(); + region2 = pd_client.get_region(b"k4").unwrap(); + if region1.get_id() != region2.get_id() { + break; + } + thread::sleep(Duration::from_millis(20)) + } + assert_ne!(region1.get_id(), region2.get_id()); + fail::remove("mock_tick_interval"); + fail::remove("mock_region_is_busy"); + fail::remove("mock_collect_tick_interval"); + fail::remove("mock_unified_read_pool_is_busy"); + fail::remove("mock_load_base_split_check_interval"); +} + // A filter that disable commitment by heartbeat. #[derive(Clone)] struct EraseHeartbeatCommit; From 0378b24e7b7e2ca58d8babb285b21cbe417bb43a Mon Sep 17 00:00:00 2001 From: lucasliang Date: Wed, 3 Jan 2024 14:41:02 +0800 Subject: [PATCH 1086/1149] metrics: fix typo errors in the Raft IO panel. (#16266) ref tikv/tikv#16265 Fix typo errors in the Raft IO panel. Signed-off-by: lucasliang --- metrics/grafana/tikv_details.dashboard.py | 4 ++-- metrics/grafana/tikv_details.json | 4 ++-- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 7eb6cd29205..8f26ad95cbc 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -1835,8 +1835,8 @@ def RaftIO() -> RowPanel: heatmap_panel_graph_panel_histogram_quantile_pairs( heatmap_title="Append log duration", heatmap_description="The time consumed when Raft appends log", - graph_title="99% Commit log duration per server", - graph_description="The time consumed when Raft commits log on each TiKV instance", + graph_title="99% Append log duration per server", + graph_description="The time consumed when Raft appends log on each TiKV instance", graph_by_labels=["instance"], graph_hides=["count", "avg"], yaxis_format=UNITS.SECONDS, diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index c75ca380b78..31fe27afa7e 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -15930,7 +15930,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft commits log on each TiKV instance", + "description": "The time consumed when Raft appends log on each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -16085,7 +16085,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Commit log duration per server", + "title": "99% Append log duration per server", "tooltip": { "msResolution": true, "shared": true, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 254f78869af..c384d535673 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -8f50008a4cb515602e8fe44d67cebbedd3e693d811051a223be5e08dc66eee30 ./metrics/grafana/tikv_details.json +774093bd523da2b611990ff638c64fcd3cec35b3c5d391643129cb7ee6b72b41 ./metrics/grafana/tikv_details.json From 7901e0c1456ea7396c0d9a3462b7e961a4560122 Mon Sep 17 00:00:00 2001 From: qupeng Date: Thu, 4 Jan 2024 11:21:32 +0800 Subject: [PATCH 1087/1149] cdc: add more metrics about output events queue time (#16281) close tikv/tikv#16282 Signed-off-by: qupeng --- components/cdc/src/channel.rs | 20 ++++++++++++-------- components/cdc/src/initializer.rs | 8 ++++++-- components/cdc/src/metrics.rs | 12 ++++++++++++ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/components/cdc/src/channel.rs b/components/cdc/src/channel.rs index b386c3561bb..af9caadd394 100644 --- a/components/cdc/src/channel.rs +++ b/components/cdc/src/channel.rs @@ -235,7 +235,7 @@ macro_rules! impl_from_future_send_error { impl_from_future_send_error! { FuturesSendError, - TrySendError<(CdcEvent, usize)>, + TrySendError<(Instant, CdcEvent, usize)>, } impl From for SendError { @@ -246,8 +246,8 @@ impl From for SendError { #[derive(Clone)] pub struct Sink { - unbounded_sender: UnboundedSender<(CdcEvent, usize)>, - bounded_sender: Sender<(CdcEvent, usize)>, + unbounded_sender: UnboundedSender<(Instant, CdcEvent, usize)>, + bounded_sender: Sender<(Instant, CdcEvent, usize)>, memory_quota: Arc, } @@ -258,7 +258,8 @@ impl Sink { if bytes != 0 { self.memory_quota.alloc(bytes)?; } - match self.unbounded_sender.unbounded_send((event, bytes)) { + let now = Instant::now_coarse(); + match self.unbounded_sender.unbounded_send((now, event, bytes)) { Ok(_) => Ok(()), Err(e) => { // Free quota if send fails. @@ -276,9 +277,11 @@ impl Sink { total_bytes += bytes; } self.memory_quota.alloc(total_bytes as _)?; + + let now = Instant::now_coarse(); for event in events { let bytes = event.size() as usize; - if let Err(e) = self.bounded_sender.feed((event, bytes)).await { + if let Err(e) = self.bounded_sender.feed((now, event, bytes)).await { // Free quota if send fails. self.memory_quota.free(total_bytes as _); return Err(SendError::from(e)); @@ -294,15 +297,16 @@ impl Sink { } pub struct Drain { - unbounded_receiver: UnboundedReceiver<(CdcEvent, usize)>, - bounded_receiver: Receiver<(CdcEvent, usize)>, + unbounded_receiver: UnboundedReceiver<(Instant, CdcEvent, usize)>, + bounded_receiver: Receiver<(Instant, CdcEvent, usize)>, memory_quota: Arc, } impl<'a> Drain { pub fn drain(&'a mut self) -> impl Stream + 'a { stream::select(&mut self.bounded_receiver, &mut self.unbounded_receiver).map( - |(mut event, size)| { + |(start, mut event, size)| { + CDC_EVENTS_PENDING_DURATION.observe(start.saturating_elapsed_secs() * 1000.0); if let CdcEvent::Barrier(ref mut barrier) = event { if let Some(barrier) = barrier.take() { // Unset barrier when it is received. diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index ef39a693e3e..0f1b1f5bb42 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -1,5 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::Arc; +use std::{sync::Arc, time::Duration}; use api_version::ApiV2; use crossbeam::atomic::AtomicCell; @@ -38,7 +38,7 @@ use tikv_util::{ debug, defer, error, info, memory::MemoryQuota, sys::inspector::{self_thread_inspector, ThreadInspector}, - time::{Instant, Limiter}, + time::{duration_to_sec, Instant, Limiter}, warn, worker::Scheduler, Either, @@ -260,6 +260,7 @@ impl Initializer { fail_point!("cdc_incremental_scan_start"); let mut done = false; let start = Instant::now_coarse(); + let mut sink_time = Duration::default(); let curr_state = self.downstream_state.load(); assert!(matches!( @@ -282,7 +283,9 @@ impl Initializer { } debug!("cdc scan entries"; "len" => entries.len(), "region_id" => region_id); fail_point!("before_schedule_incremental_scan"); + let start_sink = Instant::now_coarse(); self.sink_scan_events(entries, done).await?; + sink_time += start_sink.saturating_elapsed(); } fail_point!("before_post_incremental_scan"); @@ -302,6 +305,7 @@ impl Initializer { } CDC_SCAN_DURATION_HISTOGRAM.observe(takes.as_secs_f64()); + CDC_SCAN_SINK_DURATION_HISTOGRAM.observe(duration_to_sec(sink_time)); Ok(()) } diff --git a/components/cdc/src/metrics.rs b/components/cdc/src/metrics.rs index 5db91572112..6bef4313959 100644 --- a/components/cdc/src/metrics.rs +++ b/components/cdc/src/metrics.rs @@ -88,6 +88,11 @@ lazy_static! { exponential_buckets(0.005, 2.0, 20).unwrap() ) .unwrap(); + pub static ref CDC_SCAN_SINK_DURATION_HISTOGRAM: Histogram = register_histogram!( + "tikv_cdc_scan_sink_duration_seconds", + "Bucketed histogram of cdc async scan sink time duration", + ) + .unwrap(); pub static ref CDC_SCAN_BYTES: IntCounter = register_int_counter!( "tikv_cdc_scan_bytes_total", "Total fetched bytes of CDC incremental scan" @@ -214,6 +219,13 @@ lazy_static! { pub static ref CDC_ROCKSDB_PERF_COUNTER_STATIC: PerfCounter = auto_flush_from!(CDC_ROCKSDB_PERF_COUNTER, PerfCounter); + + pub static ref CDC_EVENTS_PENDING_DURATION: Histogram = register_histogram!( + "tikv_cdc_events_pending_duration", + "Pending duration for all events, in milliseconds", + exponential_buckets(0.01, 2.0, 17).unwrap(), + ) + .unwrap(); } thread_local! { From 39886146a2e314252c26af473808dea8b9dfb2ad Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 4 Jan 2024 18:47:03 +0800 Subject: [PATCH 1088/1149] config: Enable titan for newly created instance (#16285) ref tikv/tikv#16245 Enable titan for newly created instance Signed-off-by: Connor1996 --- Cargo.lock | 8 +-- src/config/mod.rs | 65 +++++++++++++++++++----- tests/integrations/storage/test_titan.rs | 1 + 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 436e1b9fb6a..f5b2437547d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2812,7 +2812,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" +source = "git+https://github.com/tikv/rust-rocksdb.git#d877018095b44b2933969fe7caf5c3e0cd86be5b" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2831,7 +2831,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" +source = "git+https://github.com/tikv/rust-rocksdb.git#d877018095b44b2933969fe7caf5c3e0cd86be5b" dependencies = [ "bzip2-sys", "cc", @@ -4656,7 +4656,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c4b7047314a9b27926a1b7b25d2e6d1a37a48d2b" +source = "git+https://github.com/tikv/rust-rocksdb.git#d877018095b44b2933969fe7caf5c3e0cd86be5b" dependencies = [ "libc 0.2.146", "librocksdb_sys", @@ -6887,7 +6887,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "static_assertions", ] diff --git a/src/config/mod.rs b/src/config/mod.rs index c0c2a679b5a..7d631eeb1a8 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -145,12 +145,6 @@ pub struct TitanCfConfig { pub max_gc_batch_size: ReadableSize, #[online_config(skip)] pub discardable_ratio: f64, - // deprecated. - #[online_config(skip)] - #[doc(hidden)] - #[serde(skip_serializing)] - #[deprecated = "Titan doesn't need to sample anymore"] - pub sample_ratio: Option, #[online_config(skip)] pub merge_small_file_threshold: ReadableSize, pub blob_run_mode: BlobRunMode, @@ -160,18 +154,24 @@ pub struct TitanCfConfig { pub range_merge: bool, #[online_config(skip)] pub max_sorted_runs: i32, + #[online_config(skip)] #[doc(hidden)] #[serde(skip_serializing)] #[deprecated = "The feature is removed"] pub gc_merge_rewrite: bool, + #[online_config(skip)] + #[doc(hidden)] + #[serde(skip_serializing)] + #[deprecated = "Titan doesn't need to sample anymore"] + pub sample_ratio: Option, } impl Default for TitanCfConfig { #[allow(deprecated)] fn default() -> Self { Self { - min_blob_size: ReadableSize::kb(1), // disable titan default + min_blob_size: ReadableSize::kb(1), blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), @@ -1196,8 +1196,6 @@ impl RaftCfConfig { #[derive(Clone, Serialize, Deserialize, PartialEq, Debug)] #[serde(default)] #[serde(rename_all = "kebab-case")] -// Note that Titan is still an experimental feature. Once enabled, it can't fall -// back. Forced fallback may result in data loss. pub struct TitanDbConfig { pub enabled: bool, pub dirname: String, @@ -1210,7 +1208,7 @@ pub struct TitanDbConfig { impl Default for TitanDbConfig { fn default() -> Self { Self { - enabled: false, + enabled: false, // Enabled only for newly created cluster dirname: "".to_owned(), disable_gc: false, max_background_gc: 1, @@ -1382,7 +1380,7 @@ impl Default for DbConfig { } impl DbConfig { - pub fn optimize_for(&mut self, engine: EngineType) { + pub fn optimize_for(&mut self, engine: EngineType, kv_data_exists: bool) { match engine { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); @@ -1394,6 +1392,9 @@ impl DbConfig { if self.lockcf.write_buffer_size.is_none() { self.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); } + if !kv_data_exists && !self.titan.enabled { + self.titan.enabled = true; + } } EngineType::RaftKv2 => { self.enable_multi_batch_write.get_or_insert(false); @@ -3663,7 +3664,8 @@ impl TikvConfig { .validate(kv_data_exists)?; // Optimize. - self.rocksdb.optimize_for(self.storage.engine); + self.rocksdb + .optimize_for(self.storage.engine, kv_data_exists); self.coprocessor .optimize_for(self.storage.engine == EngineType::RaftKv2); self.split @@ -4741,6 +4743,7 @@ mod tests { use grpcio::ResourceQuota; use itertools::Itertools; use kvproto::kvrpcpb::CommandPri; + use raft_log_engine::RaftLogEngine; use raftstore::{ coprocessor::{ config::{RAFTSTORE_V2_SPLIT_SIZE, SPLIT_SIZE}, @@ -5404,7 +5407,7 @@ mod tests { .rocksdb .build_resources(Arc::default(), cfg.storage.engine); let engine = RocksDBEngine::new( - &cfg.storage.data_dir, + &cfg.infer_kv_engine_path(None).unwrap(), Some(cfg.rocksdb.build_opt(&resource, cfg.storage.engine)), cfg.rocksdb.build_cf_opts( &cfg.rocksdb @@ -5417,6 +5420,12 @@ mod tests { None, ) .unwrap(); + let mut raft_cfg = cfg.raft_engine.config.clone(); + raft_cfg.dir = cfg.infer_raft_engine_path(None).unwrap(); + let _raft_db = RaftLogEngine::new( + raft_cfg, None, None, // io_rate_limiter + ) + .unwrap(); let storage = TestStorageBuilder::<_, _, F>::from_engine_and_lock_mgr(engine, MockLockManager::new()) .config(cfg.storage.clone()) @@ -5826,6 +5835,36 @@ mod tests { } } + #[test] + fn test_titan_auto_enable() { + // Do not auto enable titan for existing instances + let (cfg, dir) = TikvConfig::with_tmp().unwrap(); + persist_config(&cfg).unwrap(); + let (storage, ..) = new_engines::(cfg); + drop(storage); + let mut cfg = TikvConfig::from_file(&dir.path().join(LAST_CONFIG_FILE), None).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, false); + cfg.validate().unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, false); + let (_storage, cfg_controller, ..) = new_engines::(cfg); + assert_eq!(cfg_controller.get_current().rocksdb.titan.enabled, false); + drop(dir); + + // Auto enable titan for new instances + let (mut cfg, dir) = TikvConfig::with_tmp().unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, false); + cfg.validate().unwrap(); + persist_config(&cfg).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, true); + let (storage, cfg_controller, ..) = new_engines::(cfg); + assert_eq!(cfg_controller.get_current().rocksdb.titan.enabled, true); + drop(storage); + // The config is persisted + let cfg = TikvConfig::from_file(&dir.path().join(LAST_CONFIG_FILE), None).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, true); + drop(dir); + } + #[test] fn test_change_store_scheduler_worker_pool_size() { let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index fc84d56fd00..cc39a7ff0c6 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -30,6 +30,7 @@ use tikv_util::{ use txn_types::{Key, Write, WriteType}; #[test] +#[ignore] fn test_turnoff_titan() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.rocksdb.defaultcf.disable_auto_compactions = true; From 26054366368e2288ce43468610b34d29e417c20f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 5 Jan 2024 10:03:33 +0800 Subject: [PATCH 1089/1149] *: Upgrade rust toolchain to nightly-2023-12-10 (#16213) close tikv/tikv#15581 Upgrade rust toolchain to nightly-2023-12-10 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- cmd/tikv-ctl/src/executor.rs | 15 ++-- cmd/tikv-ctl/src/fork_readonly_tikv.rs | 1 + cmd/tikv-ctl/src/main.rs | 2 +- components/backup-stream/src/errors.rs | 3 +- .../backup-stream/src/metadata/client.rs | 6 +- components/backup-stream/src/router.rs | 13 +--- .../backup-stream/src/subscription_track.rs | 2 +- components/backup-stream/src/utils.rs | 6 +- .../backup-stream/tests/failpoints/mod.rs | 1 - components/backup/src/endpoint.rs | 8 +-- components/batch-system/src/fsm.rs | 8 ++- components/case_macros/src/lib.rs | 6 +- components/cdc/src/endpoint.rs | 2 +- components/cdc/src/initializer.rs | 15 ++-- components/cdc/src/service.rs | 4 +- components/cloud/aws/src/s3.rs | 2 +- components/cloud/azure/src/azblob.rs | 2 +- components/cloud/gcp/src/gcs.rs | 2 +- components/codec/src/lib.rs | 1 + .../concurrency_manager/src/lock_table.rs | 4 +- components/coprocessor_plugin_api/src/util.rs | 6 ++ components/encryption/src/config.rs | 8 +-- components/engine_rocks/src/lib.rs | 7 +- components/engine_rocks/src/misc.rs | 7 +- components/engine_rocks/src/properties.rs | 18 ++--- components/engine_rocks/src/raft_engine.rs | 4 +- components/engine_rocks/src/rocks_metrics.rs | 7 +- components/engine_test/src/lib.rs | 11 ++- components/engine_traits/src/flush.rs | 2 +- components/engine_traits/src/lib.rs | 3 +- components/engine_traits/src/tablet.rs | 2 +- components/external_storage/src/export.rs | 58 ++++++++-------- .../online_config_derive/src/lib.rs | 14 ++-- components/raftstore-v2/src/batch/store.rs | 6 +- components/raftstore-v2/src/fsm/store.rs | 24 +++++-- .../operation/command/admin/compact_log.rs | 17 +++-- .../operation/command/admin/merge/commit.rs | 8 +-- .../src/operation/command/admin/merge/mod.rs | 15 ++-- .../operation/command/admin/merge/prepare.rs | 25 ++++--- .../src/operation/command/admin/mod.rs | 9 ++- .../src/operation/command/admin/split.rs | 4 +- .../command/admin/transfer_leader.rs | 22 +++--- .../raftstore-v2/src/operation/command/mod.rs | 13 ++-- .../src/operation/command/write/ingest.rs | 16 +++-- components/raftstore-v2/src/operation/life.rs | 12 +++- components/raftstore-v2/src/operation/misc.rs | 10 ++- .../src/operation/query/capture.rs | 4 +- .../raftstore-v2/src/operation/query/local.rs | 6 +- .../src/operation/ready/apply_trace.rs | 7 +- .../src/operation/ready/snapshot.rs | 18 ++--- .../raftstore-v2/src/operation/txn_ext.rs | 16 +++-- .../src/operation/unsafe_recovery/create.rs | 4 +- .../src/operation/unsafe_recovery/demote.rs | 19 ++++-- .../src/operation/unsafe_recovery/destroy.rs | 4 +- .../operation/unsafe_recovery/force_leader.rs | 11 +-- .../src/operation/unsafe_recovery/report.rs | 16 +++-- .../src/worker/cleanup/compact.rs | 16 +++-- .../raftstore-v2/src/worker/pd/region.rs | 15 +--- .../raftstore-v2/src/worker/pd/split.rs | 6 +- components/raftstore-v2/src/worker/tablet.rs | 13 +++- .../tests/integrations/cluster.rs | 4 +- .../raftstore/src/coprocessor/config.rs | 9 ++- .../raftstore/src/coprocessor/dispatcher.rs | 5 +- .../src/coprocessor/region_info_accessor.rs | 8 ++- .../src/coprocessor/split_check/table.rs | 2 +- components/raftstore/src/errors.rs | 2 +- components/raftstore/src/lib.rs | 3 +- .../raftstore/src/store/async_io/write.rs | 11 ++- .../raftstore/src/store/compaction_guard.rs | 2 +- components/raftstore/src/store/config.rs | 12 ++-- .../raftstore/src/store/entry_storage.rs | 34 ++++++---- components/raftstore/src/store/fsm/apply.rs | 20 +++--- components/raftstore/src/store/fsm/peer.rs | 27 +++++--- components/raftstore/src/store/msg.rs | 24 +++++-- components/raftstore/src/store/peer.rs | 52 +++++++------- .../raftstore/src/store/peer_storage.rs | 2 +- .../raftstore/src/store/region_snapshot.rs | 2 +- .../raftstore/src/store/simple_write.rs | 24 +++++-- components/raftstore/src/store/snap.rs | 4 +- components/raftstore/src/store/snap/io.rs | 4 +- components/raftstore/src/store/txn_ext.rs | 2 +- components/raftstore/src/store/util.rs | 10 +-- components/raftstore/src/store/worker/pd.rs | 23 +++---- .../raftstore/src/store/worker/region.rs | 33 +++++---- .../raftstore/src/store/worker/split_check.rs | 18 +++-- .../src/store/worker/split_controller.rs | 15 ++-- components/resolved_ts/src/cmd.rs | 6 +- components/resolved_ts/src/endpoint.rs | 20 +++--- components/resolved_ts/src/scanner.rs | 2 +- components/resource_control/src/lib.rs | 2 - .../resource_control/src/resource_group.rs | 12 ++-- components/resource_control/src/worker.rs | 13 ++-- components/resource_metering/src/lib.rs | 3 +- components/resource_metering/src/model.rs | 2 +- .../src/recorder/sub_recorder/cpu.rs | 4 +- .../resource_metering/tests/recorder_test.rs | 6 +- components/server/src/common.rs | 4 +- components/server/src/signal_handler.rs | 8 ++- components/snap_recovery/src/leader_keeper.rs | 2 +- components/sst_importer/src/import_mode2.rs | 2 +- components/sst_importer/src/sst_importer.rs | 11 ++- components/sst_importer/src/util.rs | 1 + components/test_coprocessor/src/store.rs | 2 +- .../example_plugin/src/lib.rs | 2 +- components/test_pd_client/src/pd.rs | 4 +- components/test_raftstore-v2/src/cluster.rs | 2 +- components/test_raftstore-v2/src/lib.rs | 1 - components/test_raftstore-v2/src/node.rs | 2 +- components/test_raftstore-v2/src/server.rs | 14 +++- .../src/transport_simulate.rs | 16 ++--- components/test_raftstore/src/cluster.rs | 4 +- components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 12 +++- components/test_util/src/lib.rs | 1 - .../tidb_query_codegen/src/rpn_function.rs | 35 +++++----- .../tidb_query_datatype/src/codec/convert.rs | 12 ++-- .../src/codec/data_type/mod.rs | 2 +- .../tidb_query_datatype/src/codec/datum.rs | 8 +-- .../src/codec/mysql/decimal.rs | 2 +- .../src/codec/mysql/json/jcodec.rs | 8 +-- .../src/codec/mysql/time/mod.rs | 2 +- .../src/codec/row/v2/row_slice.rs | 2 +- .../tidb_query_datatype/src/codec/table.rs | 4 +- components/tidb_query_datatype/src/lib.rs | 1 + .../src/index_scan_executor.rs | 4 +- components/tidb_query_executors/src/runner.rs | 18 ++--- .../src/selection_executor.rs | 6 +- .../src/util/aggr_executor.rs | 4 +- .../tidb_query_executors/src/util/mod.rs | 4 +- components/tidb_query_expr/src/impl_cast.rs | 2 +- .../tidb_query_expr/src/impl_miscellaneous.rs | 9 +-- components/tidb_query_expr/src/impl_string.rs | 6 +- .../tidb_query_expr/src/types/expr_eval.rs | 2 + components/tikv_alloc/src/lib.rs | 1 + components/tikv_kv/src/cursor.rs | 2 +- components/tikv_kv/src/lib.rs | 1 + components/tikv_util/src/logger/formatter.rs | 6 +- components/tikv_util/src/lru.rs | 2 +- components/tikv_util/src/memory.rs | 2 +- .../src/metrics/allocator_metrics.rs | 2 +- components/tikv_util/src/store/region.rs | 2 +- components/tikv_util/src/sys/cpu_time.rs | 2 +- components/txn_types/src/types.rs | 18 ++--- rust-toolchain | 1 - rust-toolchain.toml | 3 + scripts/clippy | 6 ++ src/config/mod.rs | 68 ++++++++++--------- src/coprocessor/endpoint.rs | 19 +++--- src/coprocessor/metrics.rs | 2 +- src/coprocessor/mod.rs | 2 + src/lib.rs | 5 +- src/server/debug.rs | 12 ++-- src/server/debug2.rs | 19 +++--- src/server/engine_factory.rs | 11 ++- src/server/gc_worker/gc_manager.rs | 9 +-- src/server/gc_worker/gc_worker.rs | 8 +-- src/server/lock_manager/deadlock.rs | 9 +-- src/server/raft_client.rs | 2 +- src/server/raftkv/mod.rs | 6 +- src/server/raftkv/raft_extension.rs | 4 +- src/server/raftkv2/mod.rs | 6 +- src/server/raftkv2/node.rs | 4 +- src/server/raftkv2/raft_extension.rs | 4 +- src/server/service/debug.rs | 1 - src/server/service/diagnostics/log.rs | 16 ++--- src/server/status_server/mod.rs | 2 +- src/server/tablet_snap.rs | 18 +++-- src/storage/lock_manager/lock_wait_context.rs | 12 ++-- src/storage/metrics.rs | 2 +- src/storage/mod.rs | 30 ++++---- src/storage/mvcc/reader/point_getter.rs | 2 +- src/storage/mvcc/reader/reader.rs | 23 +++---- src/storage/mvcc/reader/scanner/forward.rs | 2 +- src/storage/raw/raw_mvcc.rs | 2 +- src/storage/txn/actions/prewrite.rs | 10 +-- src/storage/txn/commands/atomic_store.rs | 4 +- src/storage/txn/commands/prewrite.rs | 26 +++---- .../singleton_flow_controller.rs | 2 +- src/storage/txn/latch.rs | 20 +++--- src/storage/txn/sched_pool.rs | 2 +- tests/benches/raftstore/mod.rs | 2 +- tests/failpoints/cases/test_async_io.rs | 4 +- tests/failpoints/cases/test_early_apply.rs | 6 +- tests/failpoints/cases/test_engine.rs | 2 + tests/failpoints/cases/test_rawkv.rs | 2 +- .../cases/test_read_execution_tracker.rs | 16 +++-- tests/failpoints/cases/test_server.rs | 2 +- tests/failpoints/cases/test_split_region.rs | 3 +- tests/failpoints/cases/test_storage.rs | 4 +- tests/failpoints/cases/test_transaction.rs | 2 +- .../failpoints/cases/test_transfer_leader.rs | 4 +- tests/integrations/backup/mod.rs | 1 + tests/integrations/coprocessor/test_select.rs | 2 +- tests/integrations/import/test_apply_log.rs | 2 +- .../integrations/raftstore/test_bootstrap.rs | 4 +- .../raftstore/test_early_apply.rs | 8 +-- .../integrations/raftstore/test_flashback.rs | 2 +- .../integrations/raftstore/test_hibernate.rs | 2 +- tests/integrations/raftstore/test_merge.rs | 4 +- tests/integrations/raftstore/test_multi.rs | 2 +- .../raftstore/test_replica_read.rs | 2 +- .../raftstore/test_snap_recovery.rs | 4 +- .../raftstore/test_split_region.rs | 2 +- tests/integrations/raftstore/test_stats.rs | 12 ++-- tests/integrations/server/status_server.rs | 2 +- 205 files changed, 959 insertions(+), 778 deletions(-) delete mode 100644 rust-toolchain create mode 100644 rust-toolchain.toml diff --git a/cmd/tikv-ctl/src/executor.rs b/cmd/tikv-ctl/src/executor.rs index 3e4e505a32a..673b0cb3019 100644 --- a/cmd/tikv-ctl/src/executor.rs +++ b/cmd/tikv-ctl/src/executor.rs @@ -1,7 +1,7 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - borrow::ToOwned, cmp::Ordering, path::Path, pin::Pin, result, str, string::ToString, sync::Arc, + borrow::ToOwned, cmp::Ordering, path::Path, result, str, string::ToString, sync::Arc, time::Duration, }; @@ -13,7 +13,12 @@ use engine_traits::{ CF_WRITE, DATA_CFS, }; use file_system::read_dir; -use futures::{executor::block_on, future, stream, Stream, StreamExt, TryStreamExt}; +use futures::{ + executor::block_on, + future, + stream::{self, BoxStream}, + StreamExt, TryStreamExt, +}; use grpcio::{ChannelBuilder, Environment}; use kvproto::{ debugpb::{Db as DbType, *}, @@ -55,14 +60,16 @@ pub const METRICS_ROCKSDB_RAFT: &str = "rocksdb_raft"; pub const METRICS_JEMALLOC: &str = "jemalloc"; pub const LOCK_FILE_ERROR: &str = "IO error: While lock file"; -type MvccInfoStream = Pin, MvccInfo), String>>>>; +type MvccInfoStream = BoxStream<'static, result::Result<(Vec, MvccInfo), String>>; fn get_engine_type(dir: &str) -> EngineType { let mut entries = read_dir(dir).unwrap(); let mut engine1 = false; let mut engine2 = false; while let Some(Ok(e)) = entries.next() { - if let Ok(ty) = e.file_type() && ty.is_dir() { + if let Ok(ty) = e.file_type() + && ty.is_dir() + { if e.file_name() == "tablets" { engine2 = true; } else if e.file_name() == "db" { diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs index ef3ae7f8023..dd2ec039542 100644 --- a/cmd/tikv-ctl/src/fork_readonly_tikv.rs +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -270,6 +270,7 @@ fn add_write_permission>(path: P) -> Result<(), String> { let mut pmt = std::fs::metadata(path) .map_err(|e| format!("metadata({}): {}", path.display(), e))? .permissions(); + #[allow(clippy::permissions_set_readonly_false)] pmt.set_readonly(false); std::fs::set_permissions(path, pmt) .map_err(|e| format!("set_permissions({}): {}", path.display(), e)) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 25f8cc1337b..7ab9d9e9e37 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -1,6 +1,6 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -#![feature(once_cell)] +#![feature(lazy_cell)] #![feature(let_chains)] #[macro_use] diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index 67461e2978b..df3c5ea7032 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -133,7 +133,7 @@ where /// Like `errors.Annotate` in Go. /// Wrap an unknown error with [`Error::Other`]. -#[macro_export(crate)] +#[macro_export] macro_rules! annotate { ($inner: expr, $message: expr) => { { @@ -278,6 +278,7 @@ mod test { }) } + #[allow(clippy::unnecessary_literal_unwrap)] #[bench] // 773 ns/iter (+/- 8) fn baseline(b: &mut test::Bencher) { diff --git a/components/backup-stream/src/metadata/client.rs b/components/backup-stream/src/metadata/client.rs index 2232770915f..21ca2d60556 100644 --- a/components/backup-stream/src/metadata/client.rs +++ b/components/backup-stream/src/metadata/client.rs @@ -675,11 +675,11 @@ impl MetadataClient { let cp = match r.len() { 0 => { let global_cp = self.global_checkpoint_of(task).await?; - let cp = match global_cp { + + match global_cp { None => self.get_task_start_ts_checkpoint(task).await?, Some(cp) => cp, - }; - cp + } } _ => Checkpoint::from_kv(&r[0])?, }; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 31166cbe384..24d239a3f73 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -567,15 +567,6 @@ impl RouterInner { let task_info = self.get_task_info(&task).await?; task_info.on_events(events).await?; let file_size_limit = self.temp_file_size_limit.load(Ordering::SeqCst); - #[cfg(features = "failpoints")] - { - let delayed = (|| { - fail::fail_point!("router_on_event_delay_ms", |v| { - v.and_then(|v| v.parse::().ok()).unwrap_or(0) - }) - })(); - tokio::time::sleep(Duration::from_millis(delayed)).await; - } // When this event make the size of temporary files exceeds the size limit, make // a flush. Note that we only flush if the size is less than the limit before @@ -1023,7 +1014,9 @@ impl StreamTaskInfo { .last_flush_time .swap(Box::into_raw(Box::new(Instant::now())), Ordering::SeqCst); // manual gc last instant - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + }; } pub fn should_flush(&self, flush_interval: &Duration) -> bool { diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index c70ad9c8038..5a6b2e0753b 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -307,7 +307,7 @@ impl SubscriptionTracer { } }; - let mut subscription = sub.value_mut(); + let subscription = sub.value_mut(); let old_epoch = subscription.meta.get_region_epoch(); let new_epoch = new_region.get_region_epoch(); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index ed8b7579587..c6e9c031b0f 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -280,7 +280,7 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, /// once meet an error, would report it, with the current file and line (so it /// is made as a macro). returns whether it success. -#[macro_export(crate)] +#[macro_export] macro_rules! try_send { ($s:expr, $task:expr) => { match $s.schedule($task) { @@ -304,7 +304,7 @@ macro_rules! try_send { /// `backup_stream_debug`. because once we enable debug log for all crates, it /// would soon get too verbose to read. using this macro now we can enable debug /// log level for the crate only (even compile time...). -#[macro_export(crate)] +#[macro_export] macro_rules! debug { ($($t: tt)+) => { if cfg!(feature = "backup-stream-debug") { @@ -768,7 +768,7 @@ impl<'a> slog::KV for SlogRegion<'a> { } /// A shortcut for making an opaque future type for return type or argument -/// type, which is sendable and not borrowing any variables. +/// type, which is sendable and not borrowing any variables. /// /// `future![T]` == `impl Future + Send + 'static` #[macro_export] diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index ea09e9c7a1f..35f40c10574 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -284,7 +284,6 @@ mod all { .build(); let keys = run_async_test(suite.write_records(0, 128, 1)); let failed = Arc::new(AtomicBool::new(false)); - fail::cfg("router_on_event_delay_ms", "6*return(1000)").unwrap(); fail::cfg_callback("scan_and_async_send::about_to_consume", { let failed = failed.clone(); move || { diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 5c243a1e8d8..dabf9433b34 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -413,7 +413,7 @@ impl BackupRange { let entries = batch.drain(); if writer.need_split_keys() { - let this_end_key = entries.as_slice().get(0).map_or_else( + let this_end_key = entries.as_slice().first().map_or_else( || Err(Error::Other(box_err!("get entry error: nothing in batch"))), |x| { x.to_key().map(|k| k.into_raw().unwrap()).map_err(|e| { @@ -2492,8 +2492,8 @@ pub mod tests { fn test_backup_file_name() { let region = metapb::Region::default(); let store_id = 1; - let test_cases = vec!["s3", "local", "gcs", "azure", "hdfs"]; - let test_target = vec![ + let test_cases = ["s3", "local", "gcs", "azure", "hdfs"]; + let test_target = [ "1/0_0_000", "1/0_0_000", "1_0_0_000", @@ -2512,7 +2512,7 @@ pub mod tests { assert_eq!(target.to_string(), prefix_arr.join(delimiter)); } - let test_target = vec!["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; + let test_target = ["1/0_0", "1/0_0", "1_0_0", "1_0_0", "1_0_0"]; for (storage_name, target) in test_cases.iter().zip(test_target.iter()) { let key = None; let filename = backup_file_name(store_id, ®ion, key, storage_name); diff --git a/components/batch-system/src/fsm.rs b/components/batch-system/src/fsm.rs index 3fa5ad15a64..148550760c4 100644 --- a/components/batch-system/src/fsm.rs +++ b/components/batch-system/src/fsm.rs @@ -149,7 +149,9 @@ impl FsmState { Ok(_) => return, Err(Self::NOTIFYSTATE_DROP) => { let ptr = self.data.swap(ptr::null_mut(), Ordering::AcqRel); - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + }; return; } Err(s) => s, @@ -179,7 +181,9 @@ impl Drop for FsmState { fn drop(&mut self) { let ptr = self.data.swap(ptr::null_mut(), Ordering::SeqCst); if !ptr.is_null() { - unsafe { Box::from_raw(ptr) }; + unsafe { + let _ = Box::from_raw(ptr); + }; } self.state_cnt.fetch_sub(1, Ordering::Relaxed); } diff --git a/components/case_macros/src/lib.rs b/components/case_macros/src/lib.rs index 057b68065d2..db29cd3b3b9 100644 --- a/components/case_macros/src/lib.rs +++ b/components/case_macros/src/lib.rs @@ -53,7 +53,8 @@ fn to_snake(s: &str) -> String { /// e.g. `HelloWorld` -> `hello-world` #[proc_macro] pub fn kebab_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, |s: String| to_kebab(&s)) + let f = |s: String| to_kebab(&s); + transform_idents_in_stream_to_string!(stream, f) } /// Expands idents in the input stream as snake-case string literal @@ -61,5 +62,6 @@ pub fn kebab_case(stream: TokenStream) -> TokenStream { /// e.g. `HelloWorld` -> `hello_world` #[proc_macro] pub fn snake_case(stream: TokenStream) -> TokenStream { - transform_idents_in_stream_to_string!(stream, |s: String| to_snake(&s)) + let f = |s: String| to_snake(&s); + transform_idents_in_stream_to_string!(stream, f) } diff --git a/components/cdc/src/endpoint.rs b/components/cdc/src/endpoint.rs index 9f840ab49d5..3476298e1e1 100644 --- a/components/cdc/src/endpoint.rs +++ b/components/cdc/src/endpoint.rs @@ -2495,7 +2495,7 @@ mod tests { // region 3 to conn b. let mut conn_rxs = vec![]; let quota = Arc::new(MemoryQuota::new(usize::MAX)); - for region_ids in vec![vec![1, 2], vec![3]] { + for region_ids in [vec![1, 2], vec![3]] { let (tx, rx) = channel::channel(1, quota.clone()); conn_rxs.push(rx); let conn = Conn::new(tx, String::new()); diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 0f1b1f5bb42..86a4018fffb 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -719,12 +719,11 @@ mod tests { false, ); initializer.observed_range = observed_range.clone(); - let check_result = || loop { + let check_result = || { let task = rx.recv().unwrap(); match task { Task::ResolverReady { resolver, .. } => { assert_eq!(resolver.locks(), &expected_locks); - return; } t => panic!("unexpected task {} received", t), } @@ -774,13 +773,11 @@ mod tests { )) .unwrap(); - loop { - let task = rx.recv_timeout(Duration::from_millis(100)); - match task { - Ok(t) => panic!("unexpected task {} received", t), - Err(RecvTimeoutError::Timeout) => break, - Err(e) => panic!("unexpected err {:?}", e), - } + let task = rx.recv_timeout(Duration::from_millis(100)); + match task { + Ok(t) => panic!("unexpected task {} received", t), + Err(RecvTimeoutError::Timeout) => (), + Err(e) => panic!("unexpected err {:?}", e), } // Test cancellation. diff --git a/components/cdc/src/service.rs b/components/cdc/src/service.rs index 7cbf268f2b7..b2d40e62612 100644 --- a/components/cdc/src/service.rs +++ b/components/cdc/src/service.rs @@ -217,8 +217,8 @@ struct EventFeedHeaders { } impl EventFeedHeaders { - const FEATURES_KEY: &str = "features"; - const STREAM_MULTIPLEXING: &str = "stream-multiplexing"; + const FEATURES_KEY: &'static str = "features"; + const STREAM_MULTIPLEXING: &'static str = "stream-multiplexing"; const FEATURES: &'static [&'static str] = &[Self::STREAM_MULTIPLEXING]; fn parse_features(value: &[u8]) -> Result, String> { diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index f06d86b37cb..fc5e2a3a638 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -16,7 +16,7 @@ use futures_util::{ io::{AsyncRead, AsyncReadExt}, stream::TryStreamExt, }; -pub use kvproto::brpb::{Bucket as InputBucket, S3 as InputConfig}; +pub use kvproto::brpb::S3 as InputConfig; use rusoto_core::{request::DispatchSignedRequest, ByteStream, RusotoError}; use rusoto_credential::{ProvideAwsCredentials, StaticProvider}; use rusoto_s3::{util::AddressingStyle, *}; diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 5a806c54faf..078dc55be8f 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -24,7 +24,7 @@ use futures_util::{ stream::StreamExt, TryStreamExt, }; -pub use kvproto::brpb::{AzureBlobStorage as InputConfig, AzureCustomerKey, Bucket as InputBucket}; +pub use kvproto::brpb::{AzureBlobStorage as InputConfig, AzureCustomerKey}; use oauth2::{ClientId, ClientSecret}; use tikv_util::{ debug, diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index 56cd317c3f8..4406954992d 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -14,7 +14,7 @@ use futures_util::{ use http::HeaderValue; use hyper::{client::HttpConnector, Body, Client, Request, Response, StatusCode}; use hyper_tls::HttpsConnector; -pub use kvproto::brpb::{Bucket as InputBucket, Gcs as InputConfig}; +pub use kvproto::brpb::Gcs as InputConfig; use tame_gcs::{ common::{PredefinedAcl, StorageClass}, objects::{InsertObjectOptional, Metadata, Object}, diff --git a/components/codec/src/lib.rs b/components/codec/src/lib.rs index 71d63e34d94..0602ef1ffcc 100644 --- a/components/codec/src/lib.rs +++ b/components/codec/src/lib.rs @@ -1,6 +1,7 @@ // Copyright 2018 TiKV Project Authors. Licensed under Apache-2.0. #![cfg_attr(test, feature(test))] +#![allow(internal_features)] #![feature(core_intrinsics)] #![feature(min_specialization)] diff --git a/components/concurrency_manager/src/lock_table.rs b/components/concurrency_manager/src/lock_table.rs index 8f4fb8952c3..92621837b36 100644 --- a/components/concurrency_manager/src/lock_table.rs +++ b/components/concurrency_manager/src/lock_table.rs @@ -88,8 +88,8 @@ impl LockTable { /// Finds the first handle in the given range that `pred` returns `Some`. /// The `Some` return value of `pred` will be returned by `find_first`. - pub fn find_first<'m, T>( - &'m self, + pub fn find_first( + &self, start_key: Option<&Key>, end_key: Option<&Key>, mut pred: impl FnMut(Arc) -> Option, diff --git a/components/coprocessor_plugin_api/src/util.rs b/components/coprocessor_plugin_api/src/util.rs index 31d75610d75..e0e1d55b0c9 100644 --- a/components/coprocessor_plugin_api/src/util.rs +++ b/components/coprocessor_plugin_api/src/util.rs @@ -19,10 +19,16 @@ pub type PluginConstructorSignature = /// Type signature of the exported function with symbol /// [`PLUGIN_GET_BUILD_INFO_SYMBOL`]. +// FIXME: Fixing the warning breaks compatibility, maybe we should deprecated it +// by A new API? +#[allow(improper_ctypes_definitions)] pub type PluginGetBuildInfoSignature = extern "C" fn() -> BuildInfo; /// Type signature of the exported function with symbol /// [`PLUGIN_GET_PLUGIN_INFO_SYMBOL`]. +// FIXME: Fixing the warning breaks compatibility, maybe we should deprecated it +// by A new API? +#[allow(improper_ctypes_definitions)] pub type PluginGetPluginInfoSignature = extern "C" fn() -> PluginInfo; /// Automatically collected build information about the plugin that is exposed diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index 23e049e0df4..c66d494ebef 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -136,9 +136,11 @@ impl KmsConfig { #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "kebab-case", tag = "type")] +#[derive(Default)] pub enum MasterKeyConfig { // Store encryption metadata as plaintext. Data still get encrypted. Not allowed to use if // encryption is enabled. (i.e. when encryption_config.method != Plaintext). + #[default] Plaintext, // Pass master key from a file, with key encoded as a readable hex string. The file should end @@ -156,12 +158,6 @@ pub enum MasterKeyConfig { }, } -impl Default for MasterKeyConfig { - fn default() -> Self { - MasterKeyConfig::Plaintext - } -} - mod encryption_method_serde { use std::fmt; diff --git a/components/engine_rocks/src/lib.rs b/components/engine_rocks/src/lib.rs index 5afa5452344..28c7c97d0a8 100644 --- a/components/engine_rocks/src/lib.rs +++ b/components/engine_rocks/src/lib.rs @@ -27,13 +27,13 @@ extern crate tikv_alloc; extern crate test; mod cf_names; -pub use crate::cf_names::*; + mod cf_options; pub use crate::cf_options::*; mod checkpoint; pub use crate::checkpoint::*; mod compact; -pub use crate::compact::*; + mod db_options; pub use crate::db_options::*; mod db_vector; @@ -48,7 +48,7 @@ mod misc; pub use crate::misc::*; pub mod range_properties; mod snapshot; -pub use crate::{range_properties::*, snapshot::*}; +pub use crate::snapshot::*; mod sst; pub use crate::sst::*; mod sst_partitioner; @@ -114,7 +114,6 @@ pub use rocksdb::{ pub mod flow_control_factors; use ::encryption::DataKeyManager; -pub use flow_control_factors::*; pub mod raw; diff --git a/components/engine_rocks/src/misc.rs b/components/engine_rocks/src/misc.rs index f82e1e68832..66f56f5c4ba 100644 --- a/components/engine_rocks/src/misc.rs +++ b/components/engine_rocks/src/misc.rs @@ -195,10 +195,7 @@ impl MiscExt for RocksEngine { fopts.set_allow_write_stall(true); fopts.set_check_if_compaction_disabled(true); fopts.set_expected_oldest_key_time(time); - self - .as_inner() - .flush_cf(handle, &fopts) - .map_err(r2e)?; + self.as_inner().flush_cf(handle, &fopts).map_err(r2e)?; return Ok(true); } Ok(false) @@ -504,7 +501,7 @@ mod tests { .collect(); let mut kvs: Vec<(&[u8], &[u8])> = vec![]; - for (_, key) in keys.iter().enumerate() { + for key in keys.iter() { kvs.push((key.as_slice(), b"value")); } for &(k, v) in kvs.as_slice() { diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index 87ccab9e5ab..a5365532bfc 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -144,10 +144,7 @@ pub struct RangeProperties { impl RangeProperties { pub fn get(&self, key: &[u8]) -> &RangeOffsets { - let idx = self - .offsets - .binary_search_by_key(&key, |&(ref k, _)| k) - .unwrap(); + let idx = self.offsets.binary_search_by_key(&key, |(k, _)| k).unwrap(); &self.offsets[idx].1 } @@ -205,11 +202,11 @@ impl RangeProperties { if start == end { return (0, 0); } - let start_offset = match self.offsets.binary_search_by_key(&start, |&(ref k, _)| k) { + let start_offset = match self.offsets.binary_search_by_key(&start, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; - let end_offset = match self.offsets.binary_search_by_key(&end, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end, |(k, _)| k) { Ok(idx) => Some(idx), Err(next_idx) => next_idx.checked_sub(1), }; @@ -225,10 +222,7 @@ impl RangeProperties { start_key: &[u8], end_key: &[u8], ) -> Vec<(Vec, RangeOffsets)> { - let start_offset = match self - .offsets - .binary_search_by_key(&start_key, |&(ref k, _)| k) - { + let start_offset = match self.offsets.binary_search_by_key(&start_key, |(k, _)| k) { Ok(idx) => { if idx == self.offsets.len() - 1 { return vec![]; @@ -239,7 +233,7 @@ impl RangeProperties { Err(next_idx) => next_idx, }; - let end_offset = match self.offsets.binary_search_by_key(&end_key, |&(ref k, _)| k) { + let end_offset = match self.offsets.binary_search_by_key(&end_key, |(k, _)| k) { Ok(idx) => { if idx == 0 { return vec![]; @@ -869,7 +863,7 @@ mod tests { let mut collector = MvccPropertiesCollector::new(KeyMode::Txn); b.iter(|| { - for &(ref k, ref v) in &entries { + for (k, v) in &entries { collector.add(k, v, DBEntryType::Put, 0, 0); } }); diff --git a/components/engine_rocks/src/raft_engine.rs b/components/engine_rocks/src/raft_engine.rs index c11186ef443..df0988f4cdb 100644 --- a/components/engine_rocks/src/raft_engine.rs +++ b/components/engine_rocks/src/raft_engine.rs @@ -354,7 +354,9 @@ impl RaftLogBatch for RocksWriteBatchVec { entries: Vec, ) -> Result<()> { let overwrite_to = overwrite_to.unwrap_or(0); - if let Some(last) = entries.last() && last.get_index() + 1 < overwrite_to { + if let Some(last) = entries.last() + && last.get_index() + 1 < overwrite_to + { for index in last.get_index() + 1..overwrite_to { let key = keys::raft_log_key(raft_group_id, index); self.delete(&key).unwrap(); diff --git a/components/engine_rocks/src/rocks_metrics.rs b/components/engine_rocks/src/rocks_metrics.rs index 2b32af111ec..6a6065f35fd 100644 --- a/components/engine_rocks/src/rocks_metrics.rs +++ b/components/engine_rocks/src/rocks_metrics.rs @@ -1143,9 +1143,10 @@ impl StatisticsReporter for RocksStatisticsReporter { STORE_ENGINE_NUM_FILES_AT_LEVEL_VEC .with_label_values(&[&self.name, cf, &level.to_string()]) .set(num_files as i64); - if num_files > 0 && let Some(ratio) = level_stats.weighted_compression_ratio { - let normalized_compression_ratio = - ratio / num_files as f64; + if num_files > 0 + && let Some(ratio) = level_stats.weighted_compression_ratio + { + let normalized_compression_ratio = ratio / num_files as f64; STORE_ENGINE_COMPRESSION_RATIO_VEC .with_label_values(&[&self.name, cf, &level.to_string()]) .set(normalized_compression_ratio); diff --git a/components/engine_test/src/lib.rs b/components/engine_test/src/lib.rs index 85d9d4c1b78..eb3adf94213 100644 --- a/components/engine_test/src/lib.rs +++ b/components/engine_test/src/lib.rs @@ -415,13 +415,10 @@ pub mod ctor { rocks_db_opts.enable_multi_batch_write(false); rocks_db_opts.allow_concurrent_memtable_write(false); if let Some(storage) = db_opt.state_storage - && let Some(flush_state) = ctx.flush_state { - let listener = PersistenceListener::new( - ctx.id, - ctx.suffix.unwrap(), - flush_state, - storage, - ); + && let Some(flush_state) = ctx.flush_state + { + let listener = + PersistenceListener::new(ctx.id, ctx.suffix.unwrap(), flush_state, storage); rocks_db_opts.add_event_listener(RocksPersistenceListener::new(listener)); } let factory = diff --git a/components/engine_traits/src/flush.rs b/components/engine_traits/src/flush.rs index 8590236e126..46b1877a703 100644 --- a/components/engine_traits/src/flush.rs +++ b/components/engine_traits/src/flush.rs @@ -119,7 +119,7 @@ impl SstApplyState { for sst in ssts { let cf_index = data_cf_offset(sst.get_cf_name()); if let Some(metas) = sst_list.get_mut(cf_index) { - metas.drain_filter(|entry| entry.sst.get_uuid() == sst.get_uuid()); + metas.retain(|entry| entry.sst.get_uuid() != sst.get_uuid()); } } } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 53708994561..79c509c5a94 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -253,8 +253,7 @@ #![feature(assert_matches)] #![feature(linked_list_cursors)] #![feature(let_chains)] -#![feature(str_split_as_str)] -#![feature(drain_filter)] +#![feature(str_split_remainder)] #[macro_use(fail_point)] extern crate fail; diff --git a/components/engine_traits/src/tablet.rs b/components/engine_traits/src/tablet.rs index c88f1548513..64e6dcbd4b4 100644 --- a/components/engine_traits/src/tablet.rs +++ b/components/engine_traits/src/tablet.rs @@ -241,7 +241,7 @@ impl TabletRegistry { let mut parts = name.rsplit('_'); let suffix = parts.next()?.parse().ok()?; let id = parts.next()?.parse().ok()?; - let prefix = parts.as_str(); + let prefix = parts.remainder().unwrap_or(""); Some((prefix, id, suffix)) } diff --git a/components/external_storage/src/export.rs b/components/external_storage/src/export.rs index 5b69a793c12..7d34f8aed08 100644 --- a/components/external_storage/src/export.rs +++ b/components/external_storage/src/export.rs @@ -121,35 +121,6 @@ pub fn make_azblob_backend(config: AzureBlobStorage) -> StorageBackend { backend } -#[cfg(test)] -mod tests { - use tempfile::Builder; - - use super::*; - - #[test] - fn test_create_storage() { - let temp_dir = Builder::new().tempdir().unwrap(); - let path = temp_dir.path(); - let backend = make_local_backend(&path.join("not_exist")); - match create_storage(&backend, Default::default()) { - Ok(_) => panic!("must be NotFound error"), - Err(e) => { - assert_eq!(e.kind(), io::ErrorKind::NotFound); - } - } - - let backend = make_local_backend(path); - create_storage(&backend, Default::default()).unwrap(); - - let backend = make_noop_backend(); - create_storage(&backend, Default::default()).unwrap(); - - let backend = StorageBackend::default(); - assert!(create_storage(&backend, Default::default()).is_err()); - } -} - pub struct BlobStore(Blob); impl BlobStore { @@ -249,3 +220,32 @@ impl ExternalStorage for BlobStore { (**self).get_part(name, off, len) } } + +#[cfg(test)] +mod tests { + use tempfile::Builder; + + use super::*; + + #[test] + fn test_create_storage() { + let temp_dir = Builder::new().tempdir().unwrap(); + let path = temp_dir.path(); + let backend = make_local_backend(&path.join("not_exist")); + match create_storage(&backend, Default::default()) { + Ok(_) => panic!("must be NotFound error"), + Err(e) => { + assert_eq!(e.kind(), io::ErrorKind::NotFound); + } + } + + let backend = make_local_backend(path); + create_storage(&backend, Default::default()).unwrap(); + + let backend = make_noop_backend(); + create_storage(&backend, Default::default()).unwrap(); + + let backend = StorageBackend::default(); + assert!(create_storage(&backend, Default::default()).is_err()); + } +} diff --git a/components/online_config/online_config_derive/src/lib.rs b/components/online_config/online_config_derive/src/lib.rs index bb37aad5924..e48a540c6b8 100644 --- a/components/online_config/online_config_derive/src/lib.rs +++ b/components/online_config/online_config_derive/src/lib.rs @@ -330,15 +330,11 @@ fn is_option_type(ty: &Type) -> bool { // TODO store (with lazy static) the vec of string // TODO maybe optimization, reverse the order of segments fn extract_option_segment(path: &Path) -> Option<&PathSegment> { - let idents_of_path = path - .segments - .iter() - .into_iter() - .fold(String::new(), |mut acc, v| { - acc.push_str(&v.ident.to_string()); - acc.push('|'); - acc - }); + let idents_of_path = path.segments.iter().fold(String::new(), |mut acc, v| { + acc.push_str(&v.ident.to_string()); + acc.push('|'); + acc + }); vec!["Option|", "std|option|Option|", "core|option|Option|"] .into_iter() .find(|s| idents_of_path == *s) diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index a637eca704b..68d5855a437 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -493,7 +493,11 @@ impl StorePollerBuilder { self.remove_dir(&path)?; continue; } - let Some((prefix, region_id, tablet_index)) = self.tablet_registry.parse_tablet_name(&path) else { continue }; + let Some((prefix, region_id, tablet_index)) = + self.tablet_registry.parse_tablet_name(&path) + else { + continue; + }; if prefix == MERGE_SOURCE_PREFIX { continue; } diff --git a/components/raftstore-v2/src/fsm/store.rs b/components/raftstore-v2/src/fsm/store.rs index 2c3a220c969..0fa5927e3d4 100644 --- a/components/raftstore-v2/src/fsm/store.rs +++ b/components/raftstore-v2/src/fsm/store.rs @@ -63,13 +63,29 @@ impl StoreMeta { .regions .insert(region_id, (region.clone(), initialized)); // `prev` only makes sense when it's initialized. - if let Some((prev, prev_init)) = prev && prev_init { + if let Some((prev, prev_init)) = prev + && prev_init + { assert!(initialized, "{} region corrupted", SlogFormat(logger)); if prev.get_region_epoch().get_version() != version { - let prev_id = self.region_ranges.remove(&(data_end_key(prev.get_end_key()), prev.get_region_epoch().get_version())); - assert_eq!(prev_id, Some(region_id), "{} region corrupted", SlogFormat(logger)); + let prev_id = self.region_ranges.remove(&( + data_end_key(prev.get_end_key()), + prev.get_region_epoch().get_version(), + )); + assert_eq!( + prev_id, + Some(region_id), + "{} region corrupted", + SlogFormat(logger) + ); } else { - assert!(self.region_ranges.get(&(data_end_key(prev.get_end_key()), version)).is_some(), "{} region corrupted", SlogFormat(logger)); + assert!( + self.region_ranges + .get(&(data_end_key(prev.get_end_key()), version)) + .is_some(), + "{} region corrupted", + SlogFormat(logger) + ); return; } } diff --git a/components/raftstore-v2/src/operation/command/admin/compact_log.rs b/components/raftstore-v2/src/operation/command/admin/compact_log.rs index 1c4538ab51e..364871406d8 100644 --- a/components/raftstore-v2/src/operation/command/admin/compact_log.rs +++ b/components/raftstore-v2/src/operation/command/admin/compact_log.rs @@ -527,9 +527,10 @@ impl Peer { && let Some(index) = self.compact_log_index() { // Raft Engine doesn't care about first index. - if let Err(e) = store_ctx - .engine - .gc(self.region_id(), 0, index, self.state_changes_mut()) + if let Err(e) = + store_ctx + .engine + .gc(self.region_id(), 0, index, self.state_changes_mut()) { error!(self.logger, "failed to compact raft logs"; "err" => ?e); } @@ -571,13 +572,11 @@ impl Peer { && old_persisted < self.entry_storage().truncated_index() + 1 && let Some(index) = self.compact_log_index() { - let batch = task.extra_write.ensure_v2(|| self.entry_storage().raft_engine().log_batch(0)); + let batch = task + .extra_write + .ensure_v2(|| self.entry_storage().raft_engine().log_batch(0)); // Raft Engine doesn't care about first index. - if let Err(e) = - store_ctx - .engine - .gc(self.region_id(), 0, index, batch) - { + if let Err(e) = store_ctx.engine.gc(self.region_id(), 0, index, batch) { error!(self.logger, "failed to compact raft logs"; "err" => ?e); } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs index da26a423a97..166d3a98d86 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/commit.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/commit.rs @@ -615,10 +615,7 @@ impl Peer { if let Some(state) = self.applied_merge_state() && state.get_commit() == commit_of_merge(&catch_up_logs.merge) { - assert_eq!( - state.get_target().get_id(), - catch_up_logs.target_region_id - ); + assert_eq!(state.get_target().get_id(), catch_up_logs.target_region_id); self.finish_catch_up_logs(store_ctx, catch_up_logs); } else { // Directly append these logs to raft log and then commit them. @@ -636,7 +633,8 @@ impl Peer { } } catch_up_logs.merge.clear_entries(); - self.merge_context_mut().prepare_status = Some(PrepareStatus::CatchUpLogs(catch_up_logs)); + self.merge_context_mut().prepare_status = + Some(PrepareStatus::CatchUpLogs(catch_up_logs)); } } diff --git a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs index 78f909fb26e..94adc1e1c3c 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/mod.rs @@ -54,11 +54,7 @@ impl MergeContext { #[inline] pub fn maybe_take_pending_prepare(&mut self, applied: u64) -> Option { - if let Some(PrepareStatus::WaitForFence { - fence, - req, - .. - }) = self.prepare_status.as_mut() + if let Some(PrepareStatus::WaitForFence { fence, req, .. }) = self.prepare_status.as_mut() && applied >= *fence { // The status will be updated during processing the proposal. @@ -89,8 +85,13 @@ impl MergeContext { impl Peer { #[inline] pub fn update_merge_progress_on_became_follower(&mut self) { - if let Some(MergeContext { prepare_status: Some(status) }) = self.merge_context() - && matches!(status, PrepareStatus::WaitForTrimStatus { .. } | PrepareStatus::WaitForFence { .. }) + if let Some(MergeContext { + prepare_status: Some(status), + }) = self.merge_context() + && matches!( + status, + PrepareStatus::WaitForTrimStatus { .. } | PrepareStatus::WaitForFence { .. } + ) { self.take_merge_context(); self.proposal_control_mut().set_pending_prepare_merge(false); diff --git a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs index 5de1c4cfe01..44580144dce 100644 --- a/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs +++ b/components/raftstore-v2/src/operation/command/admin/merge/prepare.rs @@ -328,7 +328,9 @@ impl Peer { entry.get_data(), entry.get_index(), entry.get_term(), - ) else { continue }; + ) else { + continue; + }; let cmd_type = cmd.get_admin_request().get_cmd_type(); match cmd_type { AdminCmdType::TransferLeader @@ -414,10 +416,9 @@ impl Peer { ) { let region_id = self.region_id(); if self.merge_context().is_some() - && let Some(PrepareStatus::WaitForTrimStatus { pending_peers, req, .. }) = self - .merge_context_mut() - .prepare_status - .as_mut() + && let Some(PrepareStatus::WaitForTrimStatus { + pending_peers, req, .. + }) = self.merge_context_mut().prepare_status.as_mut() && req.is_some() { assert!(resp.has_availability_context()); @@ -453,17 +454,21 @@ impl Peer { } }; let mut req = req.take().unwrap(); - req.mut_header().set_flags(WriteBatchFlags::PRE_FLUSH_FINISHED.bits()); + req.mut_header() + .set_flags(WriteBatchFlags::PRE_FLUSH_FINISHED.bits()); let logger = self.logger.clone(); let on_flush_finish = move || { let (ch, _) = CmdResChannel::pair(); - if let Err(e) = mailbox.force_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) { + if let Err(e) = + mailbox.force_send(PeerMsg::AdminCommand(RaftRequest::new(req, ch))) + { error!( logger, "send PrepareMerge request failed after pre-flush finished"; "err" => ?e, ); - // We rely on `maybe_clean_up_stale_merge_context` to clean this up. + // We rely on `maybe_clean_up_stale_merge_context` to + // clean this up. } }; self.start_pre_flush( @@ -599,9 +604,7 @@ impl Peer { // `propose_prepare_merge`. // If the req is still inflight and reaches `propose_prepare_merge` later, // `already_checked_trim_status` will restore the status. - if let Some(PrepareStatus::WaitForTrimStatus { - start_time, .. - }) = self + if let Some(PrepareStatus::WaitForTrimStatus { start_time, .. }) = self .merge_context() .as_ref() .and_then(|c| c.prepare_status.as_ref()) diff --git a/components/raftstore-v2/src/operation/command/admin/mod.rs b/components/raftstore-v2/src/operation/command/admin/mod.rs index b861f86f859..db836086172 100644 --- a/components/raftstore-v2/src/operation/command/admin/mod.rs +++ b/components/raftstore-v2/src/operation/command/admin/mod.rs @@ -108,8 +108,9 @@ impl Peer { // Check whether the admin request can be proposed when disk full. let can_skip_check = is_transfer_leader || pre_transfer_leader || is_conf_change; - if !can_skip_check && let Err(e) = - self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) + if !can_skip_check + && let Err(e) = + self.check_proposal_with_disk_full_opt(ctx, DiskFullOpt::AllowedOnAlmostFull) { let resp = cmd_resp::new_error(e); ch.report_error(resp); @@ -135,7 +136,9 @@ impl Peer { } // Do not check conflict for transfer leader, otherwise we may not // transfer leadership out of busy nodes in time. - if !is_transfer_leader && let Some(conflict) = self.proposal_control_mut().check_conflict(Some(cmd_type)) { + if !is_transfer_leader + && let Some(conflict) = self.proposal_control_mut().check_conflict(Some(cmd_type)) + { conflict.delay_channel(ch); return; } diff --git a/components/raftstore-v2/src/operation/command/admin/split.rs b/components/raftstore-v2/src/operation/command/admin/split.rs index cfbd7678c17..1f7ba9b9075 100644 --- a/components/raftstore-v2/src/operation/command/admin/split.rs +++ b/components/raftstore-v2/src/operation/command/admin/split.rs @@ -1114,7 +1114,9 @@ mod test { } } - let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { panic!() }; + let AdminCmdResult::SplitRegion(SplitResult { tablet, .. }) = apply_res else { + panic!() + }; // update cache let mut cache = apply.tablet_registry().get(parent_id).unwrap(); cache.set(*tablet.downcast().unwrap()); diff --git a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs index bf9cb426255..accd93ec3c9 100644 --- a/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs +++ b/components/raftstore-v2/src/operation/command/admin/transfer_leader.rs @@ -50,21 +50,21 @@ impl Peer { /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: - /// Leader will send a MsgTransferLeader to follower. - /// 2. execute_transfer_leader on follower - /// If follower passes all necessary checks, it will reply an - /// ACK with type MsgTransferLeader and its promised applied index. - /// 3. ready_to_transfer_leader on leader: - /// Leader checks if it's appropriate to transfer leadership. If it - /// does, it calls raft transfer_leader API to do the remaining work. + /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader + /// to follower. + /// 2. execute_transfer_leader on follower: If follower passes all necessary + /// checks, it will reply an ACK with type MsgTransferLeader and its + /// promised applied index. + /// 3. ready_to_transfer_leader on leader: Leader checks if it's appropriate + /// to transfer leadership. If it does, it calls raft transfer_leader API + /// to do the remaining work. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. The follower applies the TransferLeader command and replies an - /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. The follower applies the TransferLeader command and replies an ACK + /// with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. /// /// See also: tikv/rfcs#37. pub fn propose_transfer_leader( @@ -100,7 +100,7 @@ impl Peer { }); let peer = match peers.len() { 0 => transfer_leader.get_peer(), - 1 => peers.get(0).unwrap(), + 1 => peers.first().unwrap(), _ => peers.choose(&mut rand::thread_rng()).unwrap(), }; diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index b93ea700f80..af8dbe0afa0 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -343,7 +343,9 @@ impl Peer { if !queue.is_empty() { for e in committed_entries { let mut proposal = queue.find_proposal(e.term, e.index, current_term); - if let Some(p) = &mut proposal && p.must_pass_epoch_check { + if let Some(p) = &mut proposal + && p.must_pass_epoch_check + { // In this case the apply can be guaranteed to be successful. Invoke the // on_committed callback if necessary. p.cb.notify_committed(); @@ -844,7 +846,9 @@ impl Apply { } control.need_flush = false; let flush_state = self.flush_state().clone(); - if let Some(wb) = &self.write_batch && !wb.is_empty() { + if let Some(wb) = &self.write_batch + && !wb.is_empty() + { self.perf_context().start_observe(); let mut write_opt = WriteOptions::default(); write_opt.set_disable_wal(true); @@ -864,10 +868,7 @@ impl Apply { let tokens: Vec<_> = self .callbacks_mut() .iter() - .flat_map(|(v, _)| { - v.write_trackers() - .flat_map(|t| t.as_tracker_token()) - }) + .flat_map(|(v, _)| v.write_trackers().flat_map(|t| t.as_tracker_token())) .collect(); self.perf_context().report_metrics(&tokens); } diff --git a/components/raftstore-v2/src/operation/command/write/ingest.rs b/components/raftstore-v2/src/operation/command/write/ingest.rs index 45247b3f36f..147bd83312f 100644 --- a/components/raftstore-v2/src/operation/command/write/ingest.rs +++ b/components/raftstore-v2/src/operation/command/write/ingest.rs @@ -62,9 +62,14 @@ impl Store { let ranges = ctx.sst_importer.ranges_in_import(); for (region_id, ssts) in region_ssts { - if let Err(TrySendError::Disconnected(msg)) = ctx.router.send(region_id, PeerMsg::CleanupImportSst(ssts.into())) - && !ctx.router.is_shutdown() { - let PeerMsg::CleanupImportSst( ssts) = msg else { unreachable!() }; + if let Err(TrySendError::Disconnected(msg)) = ctx + .router + .send(region_id, PeerMsg::CleanupImportSst(ssts.into())) + && !ctx.router.is_shutdown() + { + let PeerMsg::CleanupImportSst(ssts) = msg else { + unreachable!() + }; let mut ssts = ssts.into_vec(); ssts.retain(|sst| { for range in &ranges { @@ -74,7 +79,10 @@ impl Store { } true }); - let _ = ctx.schedulers.tablet.schedule(tablet::Task::CleanupImportSst(ssts.into())); + let _ = ctx + .schedulers + .tablet + .schedule(tablet::Task::CleanupImportSst(ssts.into())); } } diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index e9fc84643da..864ac0f234c 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -240,7 +240,9 @@ fn check_if_to_peer_destroyed( if util::is_epoch_stale(msg.get_region_epoch(), local_epoch) { return Ok(true); } - if let Some(local_peer) = find_peer(local_state.get_region(), store_id) && to_peer.id <= local_peer.get_id() { + if let Some(local_peer) = find_peer(local_state.get_region(), store_id) + && to_peer.id <= local_peer.get_id() + { return Ok(true); } // If the peer is destroyed by conf change, all above checks will pass. @@ -709,8 +711,12 @@ impl Peer { let check_peer_id = check.get_check_peer().get_id(); let records = self.storage().region_state().get_merged_records(); let Some(record) = records.iter().find(|r| { - r.get_source_peers().iter().any(|p| p.get_id() == check_peer_id) - }) else { return }; + r.get_source_peers() + .iter() + .any(|p| p.get_id() == check_peer_id) + }) else { + return; + }; let source_index = record.get_source_index(); forward_destroy_to_source_peer(msg, |m| { let source_checkpoint = super::merge_source_path( diff --git a/components/raftstore-v2/src/operation/misc.rs b/components/raftstore-v2/src/operation/misc.rs index fafca29ea85..0509722ebb3 100644 --- a/components/raftstore-v2/src/operation/misc.rs +++ b/components/raftstore-v2/src/operation/misc.rs @@ -139,9 +139,13 @@ impl Store { region_keys.entry(key.region_id).or_default().push(key); } for (region_id, keys) in region_keys { - if let Err(TrySendError::Disconnected(msg)) = ctx.router.send(region_id, PeerMsg::SnapGc(keys.into())) - && !ctx.router.is_shutdown() { - let PeerMsg::SnapGc(keys) = msg else { unreachable!() }; + if let Err(TrySendError::Disconnected(msg)) = + ctx.router.send(region_id, PeerMsg::SnapGc(keys.into())) + && !ctx.router.is_shutdown() + { + let PeerMsg::SnapGc(keys) = msg else { + unreachable!() + }; let _ = ctx.schedulers.tablet.schedule(tablet::Task::SnapGc(keys)); } } diff --git a/components/raftstore-v2/src/operation/query/capture.rs b/components/raftstore-v2/src/operation/query/capture.rs index bc7e93a394b..868ed12ed32 100644 --- a/components/raftstore-v2/src/operation/query/capture.rs +++ b/components/raftstore-v2/src/operation/query/capture.rs @@ -56,7 +56,9 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: raftstore::store::Transport> let id = self.fsm.peer().region_id(); let term = self.fsm.peer().term(); let (ch, _) = QueryResChannel::with_callback(Box::new(move |res| { - if let QueryResult::Response(resp) = res && resp.get_header().has_error() { + if let QueryResult::Response(resp) = res + && resp.get_header().has_error() + { // Return error capture_change.snap_cb.report_error(resp.clone()); return; diff --git a/components/raftstore-v2/src/operation/query/local.rs b/components/raftstore-v2/src/operation/query/local.rs index 1829628ae48..dd540762a69 100644 --- a/components/raftstore-v2/src/operation/query/local.rs +++ b/components/raftstore-v2/src/operation/query/local.rs @@ -351,14 +351,16 @@ where match fut.await? { Some(query_res) => { if query_res.read().is_none() { - let QueryResult::Response(res) = query_res else { unreachable!() }; + let QueryResult::Response(res) = query_res else { + unreachable!() + }; // Get an error explicitly in header, // or leader reports KeyIsLocked error via read index. assert!( res.get_header().has_error() || res .get_responses() - .get(0) + .first() .map_or(false, |r| r.get_read_index().has_locked()), "{:?}", res diff --git a/components/raftstore-v2/src/operation/ready/apply_trace.rs b/components/raftstore-v2/src/operation/ready/apply_trace.rs index e839089837d..53756465cc4 100644 --- a/components/raftstore-v2/src/operation/ready/apply_trace.rs +++ b/components/raftstore-v2/src/operation/ready/apply_trace.rs @@ -287,7 +287,10 @@ impl ApplyTrace { } }) .max(); - if let Some(m) = last_modified && m >= self.admin.flushed + 4096000 && m >= self.last_flush_trigger + 4096000 { + if let Some(m) = last_modified + && m >= self.admin.flushed + 4096000 + && m >= self.last_flush_trigger + 4096000 + { self.last_flush_trigger = m; true } else { @@ -774,7 +777,7 @@ impl Peer { flushed = true; let flush_state = self.flush_state().clone(); - let mut apply_trace = self.storage_mut().apply_trace_mut(); + let apply_trace = self.storage_mut().apply_trace_mut(); let flushed_indexes = flush_state.as_ref().flushed_index(); for i in 0..flushed_indexes.len() { diff --git a/components/raftstore-v2/src/operation/ready/snapshot.rs b/components/raftstore-v2/src/operation/ready/snapshot.rs index c29399ac6a0..b6a02d70eac 100644 --- a/components/raftstore-v2/src/operation/ready/snapshot.rs +++ b/components/raftstore-v2/src/operation/ready/snapshot.rs @@ -567,10 +567,9 @@ impl Storage { pub fn cancel_generating_snap_due_to_compacted(&self, compact_to: u64) { let mut states = self.snap_states.borrow_mut(); states.retain(|id, state| { - let SnapState::Generating { - ref index, - .. - } = *state else { return true; }; + let SnapState::Generating { ref index, .. } = *state else { + return true; + }; let snap_index = index.load(Ordering::SeqCst); if snap_index == 0 || compact_to <= snap_index + 1 { return true; @@ -597,10 +596,9 @@ impl Storage { } let (mut snapshot, to_peer_id) = *res.unwrap(); if let Some(state) = self.snap_states.borrow_mut().get_mut(&to_peer_id) { - let SnapState::Generating { - ref index, - .. - } = *state else { return false }; + let SnapState::Generating { ref index, .. } = *state else { + return false; + }; if snapshot.get_metadata().get_index() < index.load(Ordering::SeqCst) { warn!( self.logger(), @@ -614,7 +612,9 @@ impl Storage { // Set commit index for learner snapshots. It's needed to address // compatibility issues between v1 and v2 snapshots. // See https://github.com/pingcap/tiflash/issues/7568#issuecomment-1576382311 - if let Some(p) = find_peer_by_id(self.region(), to_peer_id) && p.get_role() == PeerRole::Learner { + if let Some(p) = find_peer_by_id(self.region(), to_peer_id) + && p.get_role() == PeerRole::Learner + { let mut snapshot_data = RaftSnapshotData::default(); if snapshot_data.merge_from_bytes(snapshot.get_data()).is_ok() { snapshot_data.mut_meta().set_commit_index_hint(commit_index); diff --git a/components/raftstore-v2/src/operation/txn_ext.rs b/components/raftstore-v2/src/operation/txn_ext.rs index 6a379b9a1a2..7aee3664d98 100644 --- a/components/raftstore-v2/src/operation/txn_ext.rs +++ b/components/raftstore-v2/src/operation/txn_ext.rs @@ -270,10 +270,18 @@ impl Peer { self.logger, "propose {} locks before transferring leader", lock_count; ); - let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt(header, encoder.encode(), RaftCmdExtraOpts { - disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, - ..Default::default() - }).0 else {unreachable!()}; + let PeerMsg::SimpleWrite(write) = PeerMsg::simple_write_with_opt( + header, + encoder.encode(), + RaftCmdExtraOpts { + disk_full_opt: DiskFullOpt::AllowedOnAlmostFull, + ..Default::default() + }, + ) + .0 + else { + unreachable!() + }; self.on_simple_write( ctx, write.header, diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/create.rs b/components/raftstore-v2/src/operation/unsafe_recovery/create.rs index 5795d68c1b9..c96f3dc55c5 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/create.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/create.rs @@ -110,7 +110,9 @@ impl Store { impl Peer { pub fn on_unsafe_recovery_wait_initialized(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { - if let Some(state) = self.unsafe_recovery_state() && !state.is_abort() { + if let Some(state) = self.unsafe_recovery_state() + && !state.is_abort() + { warn!(self.logger, "Unsafe recovery, can't wait initialize, another plan is executing in progress"; "state" => ?state, diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs index 20a42b9f978..0eb722a94c7 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/demote.rs @@ -40,7 +40,9 @@ impl Peer { let exit_joint = exit_joint_request(self.region(), self.peer()); let (ch, sub) = CmdResChannel::pair(); self.on_admin_command(ctx, exit_joint, ch); - if let Some(resp) = sub.try_result() && resp.get_header().has_error() { + if let Some(resp) = sub.try_result() + && resp.get_header().has_error() + { error!(self.logger, "Unsafe recovery, fail to exit residual joint state"; "err" => ?resp.get_header().get_error(), @@ -70,7 +72,9 @@ impl Peer { "req" => ?req); let (ch, sub) = CmdResChannel::pair(); self.on_admin_command(ctx, req, ch); - if let Some(resp) = sub.try_result() && resp.get_header().has_error() { + if let Some(resp) = sub.try_result() + && resp.get_header().has_error() + { error!(self.logger, "Unsafe recovery, fail to finish demotion"; "err" => ?resp.get_header().get_error(), @@ -101,7 +105,10 @@ impl Peer { failed_voters, target_index, demote_after_exit, - }) = self.unsafe_recovery_state() else { return }; + }) = self.unsafe_recovery_state() + else { + return; + }; if self.raft_group().raft.raft_log.applied < *target_index { return; @@ -125,12 +132,14 @@ impl Peer { let exit_joint = exit_joint_request(self.region(), self.peer()); let (ch, sub) = CmdResChannel::pair(); self.on_admin_command(ctx, exit_joint, ch); - if let Some(resp) = sub.try_result() && resp.get_header().has_error() { + if let Some(resp) = sub.try_result() + && resp.get_header().has_error() + { error!(self.logger, "Unsafe recovery, fail to exit joint state"; "err" => ?resp.get_header().get_error(), ); - *self.unsafe_recovery_state_mut()= Some(UnsafeRecoveryState::Failed); + *self.unsafe_recovery_state_mut() = Some(UnsafeRecoveryState::Failed); } } else { error!(self.logger, diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs b/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs index 70275f93590..28e7927f430 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/destroy.rs @@ -8,7 +8,9 @@ use crate::raft::Peer; impl Peer { pub fn on_unsafe_recovery_destroy_peer(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { - if let Some(state) = self.unsafe_recovery_state() && !state.is_abort() { + if let Some(state) = self.unsafe_recovery_state() + && !state.is_abort() + { warn!(self.logger, "Unsafe recovery, can't destroy, another plan is executing in progress"; "state" => ?state, diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs index e6af0fddb7b..be9fa82991f 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/force_leader.rs @@ -190,11 +190,12 @@ impl Peer { return; } - if let Some(UnsafeRecoveryState::Failed) = self.unsafe_recovery_state() && !force { - // Skip force leader if the plan failed, so wait for the next retry of plan with force leader state holding - info!( - self.logger, "skip exiting force leader state" - ); + if let Some(UnsafeRecoveryState::Failed) = self.unsafe_recovery_state() + && !force + { + // Skip force leader if the plan failed, so wait for the next retry of plan with + // force leader state holding + info!(self.logger, "skip exiting force leader state"); return; } diff --git a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs index 90c8e3db34d..db78c61a0e7 100644 --- a/components/raftstore-v2/src/operation/unsafe_recovery/report.rs +++ b/components/raftstore-v2/src/operation/unsafe_recovery/report.rs @@ -27,13 +27,15 @@ impl Store { impl Peer { pub fn on_unsafe_recovery_wait_apply(&mut self, syncer: UnsafeRecoveryWaitApplySyncer) { - if let Some(state) = self.unsafe_recovery_state() && !state.is_abort() { - warn!(self.logger, - "Unsafe recovery, can't wait apply, another plan is executing in progress"; - "state" => ?state, - ); - syncer.abort(); - return; + if let Some(state) = self.unsafe_recovery_state() + && !state.is_abort() + { + warn!(self.logger, + "Unsafe recovery, can't wait apply, another plan is executing in progress"; + "state" => ?state, + ); + syncer.abort(); + return; } let target_index = if self.has_force_leader() { // For regions that lose quorum (or regions have force leader), whatever has diff --git a/components/raftstore-v2/src/worker/cleanup/compact.rs b/components/raftstore-v2/src/worker/cleanup/compact.rs index 7acdb943b91..feb519a04ad 100644 --- a/components/raftstore-v2/src/worker/cleanup/compact.rs +++ b/components/raftstore-v2/src/worker/cleanup/compact.rs @@ -97,8 +97,12 @@ where ) { Ok(mut region_ids) => { for region_id in region_ids.drain(..) { - let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else {continue}; - let Some(tablet) = tablet_cache.latest() else {continue}; + let Some(mut tablet_cache) = self.tablet_registry.get(region_id) else { + continue; + }; + let Some(tablet) = tablet_cache.latest() else { + continue; + }; for cf in &cf_names { if let Err(e) = tablet.compact_range_cf(cf, None, None, false, 1 /* threads */) @@ -143,8 +147,12 @@ fn collect_regions_to_compact( ); let mut regions_to_compact = vec![]; for id in region_ids { - let Some(mut tablet_cache) = reg.get(id) else {continue}; - let Some(tablet) = tablet_cache.latest() else {continue}; + let Some(mut tablet_cache) = reg.get(id) else { + continue; + }; + let Some(tablet) = tablet_cache.latest() else { + continue; + }; if tablet.auto_compactions_is_disabled().expect("cf") { info!( logger, diff --git a/components/raftstore-v2/src/worker/pd/region.rs b/components/raftstore-v2/src/worker/pd/region.rs index d3ef54bd75a..7e74405dced 100644 --- a/components/raftstore-v2/src/worker/pd/region.rs +++ b/components/raftstore-v2/src/worker/pd/region.rs @@ -113,10 +113,7 @@ where let approximate_keys = task.approximate_keys.unwrap_or_default(); let region_id = task.region.get_id(); - let peer_stat = self - .region_peers - .entry(region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(region_id).or_default(); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; @@ -373,10 +370,7 @@ where pub fn handle_update_read_stats(&mut self, mut stats: ReadStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -398,10 +392,7 @@ where pub fn handle_update_write_stats(&mut self, mut stats: WriteStats) { for (region_id, region_info) in stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num diff --git a/components/raftstore-v2/src/worker/pd/split.rs b/components/raftstore-v2/src/worker/pd/split.rs index 7fec5a31bb6..7bafb6c442a 100644 --- a/components/raftstore-v2/src/worker/pd/split.rs +++ b/components/raftstore-v2/src/worker/pd/split.rs @@ -142,8 +142,10 @@ where let f = async move { for split_info in split_infos { - let Ok(Some(region)) = - pd_client.get_region_by_id(split_info.region_id).await else { continue }; + let Ok(Some(region)) = pd_client.get_region_by_id(split_info.region_id).await + else { + continue; + }; // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::ask_batch_split_imp( diff --git a/components/raftstore-v2/src/worker/tablet.rs b/components/raftstore-v2/src/worker/tablet.rs index b2a6d46e39c..9bd093ed1dd 100644 --- a/components/raftstore-v2/src/worker/tablet.rs +++ b/components/raftstore-v2/src/worker/tablet.rs @@ -467,7 +467,8 @@ impl Runner { let Some(Some(tablet)) = self .tablet_registry .get(region_id) - .map(|mut cache| cache.latest().cloned()) else { + .map(|mut cache| cache.latest().cloned()) + else { warn!( self.logger, "flush memtable failed to acquire tablet"; @@ -555,7 +556,15 @@ impl Runner { } fn delete_range(&self, delete_range: Task) { - let Task::DeleteRange { region_id, tablet, cf, start_key, end_key, cb } = delete_range else { + let Task::DeleteRange { + region_id, + tablet, + cf, + start_key, + end_key, + cb, + } = delete_range + else { slog_panic!(self.logger, "unexpected task"; "task" => format!("{}", delete_range)) }; diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 88ad9a0e380..1a52e86f098 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -137,7 +137,9 @@ impl TestRouter { match res { Ok(_) => return block_on(sub.result()).is_some(), Err(TrySendError::Disconnected(m)) => { - let PeerMsg::WaitFlush(ch) = m else { unreachable!() }; + let PeerMsg::WaitFlush(ch) = m else { + unreachable!() + }; match self .store_router() .send_control(StoreMsg::WaitFlush { region_id, ch }) diff --git a/components/raftstore/src/coprocessor/config.rs b/components/raftstore/src/coprocessor/config.rs index b1dc3830bbb..8abfe38bb51 100644 --- a/components/raftstore/src/coprocessor/config.rs +++ b/components/raftstore/src/coprocessor/config.rs @@ -201,10 +201,15 @@ impl Config { let res = self.validate_bucket_size(); // If it's OK to enable bucket, we will prefer to enable it if useful for // raftstore-v2. - if let Ok(()) = res && self.enable_region_bucket.is_none() && raft_kv_v2 { + if let Ok(()) = res + && self.enable_region_bucket.is_none() + && raft_kv_v2 + { let useful = self.region_split_size() >= self.region_bucket_size * 2; self.enable_region_bucket = Some(useful); - } else if let Err(e) = res && self.enable_region_bucket() { + } else if let Err(e) = res + && self.enable_region_bucket() + { return Err(e); } Ok(()) diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index c7d6731d3e9..d007c7102bf 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -481,10 +481,7 @@ impl CoprocessorHost { BoxSplitCheckObserver::new(KeysCheckObserver::new(ch)), ); registry.register_split_check_observer(100, BoxSplitCheckObserver::new(HalfCheckObserver)); - registry.register_split_check_observer( - 400, - BoxSplitCheckObserver::new(TableCheckObserver::default()), - ); + registry.register_split_check_observer(400, BoxSplitCheckObserver::new(TableCheckObserver)); registry.register_admin_observer(100, BoxAdminObserver::new(SplitObserver)); CoprocessorHost { registry, cfg } } diff --git a/components/raftstore/src/coprocessor/region_info_accessor.rs b/components/raftstore/src/coprocessor/region_info_accessor.rs index 37403310baf..38ffbab3198 100644 --- a/components/raftstore/src/coprocessor/region_info_accessor.rs +++ b/components/raftstore/src/coprocessor/region_info_accessor.rs @@ -514,7 +514,9 @@ impl RegionCollector { // epoch is properly set and an Update message was sent. return; } - if let RaftStoreEvent::RoleChange { initialized, .. } = &event && !initialized { + if let RaftStoreEvent::RoleChange { initialized, .. } = &event + && !initialized + { // Ignore uninitialized peers. return; } @@ -723,7 +725,9 @@ impl RegionInfoProvider for RegionInfoAccessor { self.seek_region( key, Box::new(move |iter| { - if let Some(info) = iter.next() && info.region.get_start_key() <= key_in_vec.as_slice() { + if let Some(info) = iter.next() + && info.region.get_start_key() <= key_in_vec.as_slice() + { if let Err(e) = tx.send(info.region.clone()) { warn!("failed to send find_region_by_key result: {:?}", e); } diff --git a/components/raftstore/src/coprocessor/split_check/table.rs b/components/raftstore/src/coprocessor/split_check/table.rs index eec7b15b9b3..df825bc2641 100644 --- a/components/raftstore/src/coprocessor/split_check/table.rs +++ b/components/raftstore/src/coprocessor/split_check/table.rs @@ -300,7 +300,7 @@ mod tests { // ["t1", "") => t2_xx (Some(1), None, data_keys.get(1).cloned()), // ["t1", "t2") => t1_xx - (Some(1), Some(2), data_keys.get(0).cloned()), + (Some(1), Some(2), data_keys.first().cloned()), ]); } diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index 49a52de26e1..f55ae2ed2bf 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -226,7 +226,7 @@ impl From for errorpb::Error { .mut_proposal_in_merging_mode() .set_region_id(region_id); } - Error::Transport(reason) if reason == DiscardReason::Full => { + Error::Transport(DiscardReason::Full) => { let mut server_is_busy_err = errorpb::ServerIsBusy::default(); server_is_busy_err.set_reason(RAFTSTORE_IS_BUSY.to_owned()); errorpb.set_server_is_busy(server_is_busy_err); diff --git a/components/raftstore/src/lib.rs b/components/raftstore/src/lib.rs index 1db5f79d226..b8fbd2ac9af 100644 --- a/components/raftstore/src/lib.rs +++ b/components/raftstore/src/lib.rs @@ -5,10 +5,11 @@ #![feature(div_duration)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(hash_drain_filter)] +#![feature(hash_extract_if)] #![feature(let_chains)] #![feature(assert_matches)] #![feature(type_alias_impl_trait)] +#![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] #[cfg(test)] diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index eedd5052bbb..8a63380213a 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -419,7 +419,11 @@ where } self.state_size = 0; if let ExtraBatchWrite::V2(_) = self.extra_batch_write { - let ExtraBatchWrite::V2(lb) = mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) else { unreachable!() }; + let ExtraBatchWrite::V2(lb) = + mem::replace(&mut self.extra_batch_write, ExtraBatchWrite::None) + else { + unreachable!() + }; wb.merge(lb).unwrap(); } } @@ -451,7 +455,10 @@ where .unwrap(); if let Some(raft_state) = task.raft_state.take() - && self.raft_states.insert(task.region_id, raft_state).is_none() + && self + .raft_states + .insert(task.region_id, raft_state) + .is_none() { self.state_size += std::mem::size_of::(); } diff --git a/components/raftstore/src/store/compaction_guard.rs b/components/raftstore/src/store/compaction_guard.rs index 161a8f9c4db..ae5abb7990a 100644 --- a/components/raftstore/src/store/compaction_guard.rs +++ b/components/raftstore/src/store/compaction_guard.rs @@ -247,7 +247,7 @@ impl SstPartitioner for CompactionGuardGenerator

{ } } -fn seek_to(all_data: &Vec>, target_key: &[u8], from_pos: usize) -> usize { +fn seek_to(all_data: &[Vec], target_key: &[u8], from_pos: usize) -> usize { let mut pos = from_pos; let mut skip_count = 0; while pos < all_data.len() && all_data[pos].as_slice() <= target_key { diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index c7c65e80d6c..9c677cd1271 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -507,12 +507,12 @@ impl Default for Config { reactive_memory_lock_tick_interval: ReadableDuration::secs(2), reactive_memory_lock_timeout_tick: 5, check_long_uncommitted_interval: ReadableDuration::secs(10), - /// In some cases, such as rolling upgrade, some regions' commit log - /// duration can be 12 seconds. Before #13078 is merged, - /// the commit log duration can be 2.8 minutes. So maybe - /// 20s is a relatively reasonable base threshold. Generally, - /// the log commit duration is less than 1s. Feel free to adjust - /// this config :) + // In some cases, such as rolling upgrade, some regions' commit log + // duration can be 12 seconds. Before #13078 is merged, + // the commit log duration can be 2.8 minutes. So maybe + // 20s is a relatively reasonable base threshold. Generally, + // the log commit duration is less than 1s. Feel free to adjust + // this config :) long_uncommitted_base_threshold: ReadableDuration::secs(20), max_entry_cache_warmup_duration: ReadableDuration::secs(1), diff --git a/components/raftstore/src/store/entry_storage.rs b/components/raftstore/src/store/entry_storage.rs index c91c68538dd..98277763fe3 100644 --- a/components/raftstore/src/store/entry_storage.rs +++ b/components/raftstore/src/store/entry_storage.rs @@ -1336,26 +1336,30 @@ pub mod tests { }; // Test the initial data structure size. - let (tx, rx) = mpsc::sync_channel(8); + let (tx, rx) = mpsc::sync_channel(1); + let check_mem_size_change = |expect: i64| { + assert_eq!(rx.try_recv().unwrap(), expect); + rx.try_recv().unwrap_err(); + }; let mut cache = EntryCache::new_with_cb(move |c: i64| tx.send(c).unwrap()); - assert_eq!(rx.try_recv().unwrap(), 896); + check_mem_size_change(0); cache.append( 0, 0, &[new_padded_entry(101, 1, 1), new_padded_entry(102, 1, 2)], ); - assert_eq!(rx.try_recv().unwrap(), 3); + check_mem_size_change(419); cache.prepend(vec![new_padded_entry(100, 1, 1)]); - assert_eq!(rx.try_recv().unwrap(), 1); + check_mem_size_change(1); cache.persisted = 100; cache.compact_to(101); - assert_eq!(rx.try_recv().unwrap(), -1); + check_mem_size_change(-1); // Test size change for one overlapped entry. cache.append(0, 0, &[new_padded_entry(102, 2, 3)]); - assert_eq!(rx.try_recv().unwrap(), 1); + check_mem_size_change(1); // Test size change for all overlapped entries. cache.append( @@ -1363,42 +1367,42 @@ pub mod tests { 0, &[new_padded_entry(101, 3, 4), new_padded_entry(102, 3, 5)], ); - assert_eq!(rx.try_recv().unwrap(), 5); + check_mem_size_change(5); cache.append(0, 0, &[new_padded_entry(103, 3, 6)]); - assert_eq!(rx.try_recv().unwrap(), 6); + check_mem_size_change(6); // Test trace a dangle entry. let cached_entries = CachedEntries::new(vec![new_padded_entry(100, 1, 1)]); cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 1); + check_mem_size_change(97); // Test trace an entry which is still in cache. let cached_entries = CachedEntries::new(vec![new_padded_entry(102, 3, 5)]); cache.trace_cached_entries(cached_entries); - assert_eq!(rx.try_recv().unwrap(), 0); + check_mem_size_change(0); // Test compare `cached_last` with `trunc_to_idx` in `EntryCache::append_impl`. cache.append(0, 0, &[new_padded_entry(103, 4, 7)]); - assert_eq!(rx.try_recv().unwrap(), 1); + check_mem_size_change(1); // Test compact one traced dangle entry and one entry in cache. cache.persisted = 101; cache.compact_to(102); - assert_eq!(rx.try_recv().unwrap(), -5); + check_mem_size_change(-5); // Test compact the last traced dangle entry. cache.persisted = 102; cache.compact_to(103); - assert_eq!(rx.try_recv().unwrap(), -5); + check_mem_size_change(-5); // Test compact all entries. cache.persisted = 103; cache.compact_to(104); - assert_eq!(rx.try_recv().unwrap(), -7); + check_mem_size_change(-7); drop(cache); - assert_eq!(rx.try_recv().unwrap(), -896); + check_mem_size_change(-512); } #[test] diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 221e5b1dcea..539d1eb67a8 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -1260,9 +1260,9 @@ where apply_ctx.host.on_empty_cmd(&self.region, index, term); // 1. When a peer become leader, it will send an empty entry. - // 2. When a leader tries to read index during transferring leader, - // it will also propose an empty entry. But that entry will not contain - // any associated callback. So no need to clear callback. + // 2. When a leader tries to read index during transferring leader, it will also + // propose an empty entry. But that entry will not contain any associated + // callback. So no need to clear callback. while let Some(mut cmd) = self.pending_cmds.pop_normal(u64::MAX, term - 1) { if let Some(cb) = cmd.cb.take() { apply_ctx @@ -4786,12 +4786,12 @@ where // command may not read the writes of previous commands and break ACID. If // it's still leader, there are two possibility that mailbox is closed: // 1. The process is shutting down. - // 2. The leader is destroyed. A leader won't propose to destroy itself, so - // it should either destroyed by older leaders or newer leaders. Leader - // won't respond to read until it has applied to current term, so no - // command will be proposed until command from older leaders have applied, - // which will then stop it from accepting proposals. If the command is - // proposed by new leader, then it won't be able to propose new proposals. + // 2. The leader is destroyed. A leader won't propose to destroy itself, so it + // should either destroyed by older leaders or newer leaders. Leader won't + // respond to read until it has applied to current term, so no command will + // be proposed until command from older leaders have applied, which will then + // stop it from accepting proposals. If the command is proposed by new + // leader, then it won't be able to propose new proposals. // So only shutdown needs to be checked here. if !tikv_util::thread_group::is_shutdown(!cfg!(test)) { for p in apply.cbs.drain(..) { @@ -7118,7 +7118,7 @@ mod tests { share_source_region_size: _, } = apply_res.exec_res.front().unwrap() { - let r8 = regions.get(0).unwrap(); + let r8 = regions.first().unwrap(); let r1 = regions.get(1).unwrap(); assert_eq!(r8.get_id(), 8); assert_eq!(r1.get_id(), 1); diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 7fd71022343..0ec0e331be5 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -784,7 +784,9 @@ where syncer: UnsafeRecoveryExecutePlanSyncer, failed_voters: Vec, ) { - if let Some(state) = &self.fsm.peer.unsafe_recovery_state && !state.is_abort() { + if let Some(state) = &self.fsm.peer.unsafe_recovery_state + && !state.is_abort() + { warn!( "Unsafe recovery, demote failed voters has already been initiated"; "region_id" => self.region().get_id(), @@ -890,7 +892,9 @@ where } fn on_unsafe_recovery_destroy(&mut self, syncer: UnsafeRecoveryExecutePlanSyncer) { - if let Some(state) = &self.fsm.peer.unsafe_recovery_state && !state.is_abort() { + if let Some(state) = &self.fsm.peer.unsafe_recovery_state + && !state.is_abort() + { warn!( "Unsafe recovery, can't destroy, another plan is executing in progress"; "region_id" => self.region_id(), @@ -909,7 +913,9 @@ where } fn on_unsafe_recovery_wait_apply(&mut self, syncer: UnsafeRecoveryWaitApplySyncer) { - if let Some(state) = &self.fsm.peer.unsafe_recovery_state && !state.is_abort() { + if let Some(state) = &self.fsm.peer.unsafe_recovery_state + && !state.is_abort() + { warn!( "Unsafe recovery, can't wait apply, another plan is executing in progress"; "region_id" => self.region_id(), @@ -1039,10 +1045,10 @@ where // in snapshot recovery after we stopped all conf changes from PD. // if the follower slow than leader and has the pending conf change. // that's means - // 1. if the follower didn't finished the conf change - // => it cannot be chosen to be leader during recovery. - // 2. if the follower has been chosen to be leader - // => it already apply the pending conf change already. + // 1. if the follower didn't finished the conf change => it cannot be chosen to + // be leader during recovery. + // 2. if the follower has been chosen to be leader => it already apply the + // pending conf change already. return; } debug!( @@ -1726,8 +1732,11 @@ where if self.fsm.peer.force_leader.is_none() { return; } - if let Some(UnsafeRecoveryState::Failed) = self.fsm.peer.unsafe_recovery_state && !force { - // Skip force leader if the plan failed, so wait for the next retry of plan with force leader state holding + if let Some(UnsafeRecoveryState::Failed) = self.fsm.peer.unsafe_recovery_state + && !force + { + // Skip force leader if the plan failed, so wait for the next retry of plan with + // force leader state holding info!( "skip exiting force leader state"; "region_id" => self.fsm.region_id(), diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 52aed7d424f..a26a698073d 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -168,19 +168,25 @@ where } pub fn has_proposed_cb(&self) -> bool { - let Callback::Write { proposed_cb, .. } = self else { return false; }; + let Callback::Write { proposed_cb, .. } = self else { + return false; + }; proposed_cb.is_some() } pub fn invoke_proposed(&mut self) { - let Callback::Write { proposed_cb, .. } = self else { return; }; + let Callback::Write { proposed_cb, .. } = self else { + return; + }; if let Some(cb) = proposed_cb.take() { cb(); } } pub fn invoke_committed(&mut self) { - let Callback::Write { committed_cb, .. } = self else { return; }; + let Callback::Write { committed_cb, .. } = self else { + return; + }; if let Some(cb) = committed_cb.take() { cb(); } @@ -194,12 +200,16 @@ where } pub fn take_proposed_cb(&mut self) -> Option { - let Callback::Write { proposed_cb, .. } = self else { return None; }; + let Callback::Write { proposed_cb, .. } = self else { + return None; + }; proposed_cb.take() } pub fn take_committed_cb(&mut self) -> Option { - let Callback::Write { committed_cb, .. } = self else { return None; }; + let Callback::Write { committed_cb, .. } = self else { + return None; + }; committed_cb.take() } } @@ -257,7 +267,9 @@ impl ReadCallback for Callback { } fn read_tracker(&self) -> Option { - let Callback::Read { tracker, .. } = self else { return None; }; + let Callback::Read { tracker, .. } = self else { + return None; + }; Some(*tracker) } } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 8417766fc22..dd8384e2f66 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -550,7 +550,7 @@ pub fn can_amend_read( if let Some(read) = last_pending_read { let is_read_index_request = req .get_requests() - .get(0) + .first() .map(|req| req.has_read_index()) .unwrap_or_default(); // A read index request or a read with addition request always needs the @@ -2446,14 +2446,14 @@ where CheckApplyingSnapStatus::Applying => { // If this peer is applying snapshot, we should not get a new ready. // There are two reasons in my opinion: - // 1. If we handle a new ready and persist the data(e.g. entries), - // we can not tell raft-rs that this ready has been persisted because - // the ready need to be persisted one by one from raft-rs's view. - // 2. When this peer is applying snapshot, the response msg should not - // be sent to leader, thus the leader will not send new entries to - // this peer. Although it's possible a new leader may send a AppendEntries - // msg to this peer, this possibility is very low. In most cases, there - // is no msg need to be handled. + // 1. If we handle a new ready and persist the data(e.g. entries), we can not + // tell raft-rs that this ready has been persisted because the ready need + // to be persisted one by one from raft-rs's view. + // 2. When this peer is applying snapshot, the response msg should not be sent + // to leader, thus the leader will not send new entries to this peer. + // Although it's possible a new leader may send a AppendEntries msg to this + // peer, this possibility is very low. In most cases, there is no msg need + // to be handled. // So we choose to not get a new ready which makes the logic more clear. debug!( "still applying snapshot, skip further handling"; @@ -4604,27 +4604,25 @@ where /// to target follower first to ensures it's ready to become leader. /// After that the real transfer leader process begin. /// - /// 1. pre_transfer_leader on leader: - /// Leader will send a MsgTransferLeader to follower. - /// 2. pre_ack_transfer_leader_msg on follower: - /// If follower passes all necessary checks, it will try to warmup - /// the entry cache. - /// 3. ack_transfer_leader_msg on follower: - /// When the entry cache has been warmed up or the operator is timeout, - /// the follower reply an ACK with type MsgTransferLeader and - /// its promised persistent index. + /// 1. pre_transfer_leader on leader: Leader will send a MsgTransferLeader + /// to follower. + /// 2. pre_ack_transfer_leader_msg on follower: If follower passes all + /// necessary checks, it will try to warmup the entry cache. + /// 3. ack_transfer_leader_msg on follower: When the entry cache has been + /// warmed up or the operator is timeout, the follower reply an ACK with + /// type MsgTransferLeader and its promised persistent index. /// /// Additional steps when there are remaining pessimistic /// locks to propose (detected in function on_transfer_leader_msg). /// 1. Leader firstly proposes pessimistic locks and then proposes a /// TransferLeader command. - /// 2. ack_transfer_leader_msg on follower again: - /// The follower applies the TransferLeader command and replies an - /// ACK with special context TRANSFER_LEADER_COMMAND_REPLY_CTX. + /// 2. ack_transfer_leader_msg on follower again: The follower applies + /// the TransferLeader command and replies an ACK with special context + /// TRANSFER_LEADER_COMMAND_REPLY_CTX. /// - /// 4. ready_to_transfer_leader on leader: - /// Leader checks if it's appropriate to transfer leadership. If it - /// does, it calls raft transfer_leader API to do the remaining work. + /// 4. ready_to_transfer_leader on leader: Leader checks if it's appropriate + /// to transfer leadership. If it does, it calls raft transfer_leader API + /// to do the remaining work. /// /// See also: tikv/rfcs#37. fn propose_transfer_leader( @@ -4658,7 +4656,7 @@ where }); let peer = match peers.len() { 0 => transfer_leader.get_peer(), - 1 => peers.get(0).unwrap(), + 1 => peers.first().unwrap(), _ => peers.choose(&mut rand::thread_rng()).unwrap(), }; @@ -5991,7 +5989,7 @@ mod tests { admin_req.clear_transfer_leader(); req.clear_admin_request(); - for (op, policy) in vec![ + for (op, policy) in [ (CmdType::Get, RequestPolicy::ReadLocal), (CmdType::Snap, RequestPolicy::ReadLocal), (CmdType::Put, RequestPolicy::ProposeNormal), @@ -6144,7 +6142,7 @@ mod tests { // (1, 4) and (1, 5) is not committed let entries = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7)]; - let committed = vec![(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; + let committed = [(1, 1), (1, 2), (1, 3), (2, 6), (2, 7)]; for (index, term) in entries.clone() { if term != 1 { continue; diff --git a/components/raftstore/src/store/peer_storage.rs b/components/raftstore/src/store/peer_storage.rs index 44ae3543e95..2a9dfec5863 100644 --- a/components/raftstore/src/store/peer_storage.rs +++ b/components/raftstore/src/store/peer_storage.rs @@ -96,7 +96,7 @@ impl PartialEq for SnapState { (&SnapState::Relax, &SnapState::Relax) | (&SnapState::ApplyAborted, &SnapState::ApplyAborted) | (&SnapState::Generating { .. }, &SnapState::Generating { .. }) => true, - (&SnapState::Applying(ref b1), &SnapState::Applying(ref b2)) => { + (SnapState::Applying(b1), SnapState::Applying(b2)) => { b1.load(Ordering::Relaxed) == b2.load(Ordering::Relaxed) } _ => false, diff --git a/components/raftstore/src/store/region_snapshot.rs b/components/raftstore/src/store/region_snapshot.rs index 5232675f14a..562f04a18db 100644 --- a/components/raftstore/src/store/region_snapshot.rs +++ b/components/raftstore/src/store/region_snapshot.rs @@ -443,7 +443,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for &(ref k, ref v) in &base_data { + for (k, v) in &base_data { engines.kv.put(&data_key(k), v).unwrap(); } let store = new_peer_storage(engines, &r); diff --git a/components/raftstore/src/store/simple_write.rs b/components/raftstore/src/store/simple_write.rs index dd461e61867..9c3f9611675 100644 --- a/components/raftstore/src/store/simple_write.rs +++ b/components/raftstore/src/store/simple_write.rs @@ -563,13 +563,17 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let write = decoder.next().unwrap(); - let SimpleWrite::Put(put) = write else { panic!("should be put") }; + let SimpleWrite::Put(put) = write else { + panic!("should be put") + }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); let write = decoder.next().unwrap(); - let SimpleWrite::Delete(delete) = write else { panic!("should be delete") }; + let SimpleWrite::Delete(delete) = write else { + panic!("should be delete") + }; assert_eq!(delete.cf, CF_WRITE); assert_eq!(delete.key, &delete_key); assert_matches!(decoder.next(), None); @@ -577,14 +581,18 @@ mod tests { let (bytes, _) = req_encoder2.encode(); decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; + let SimpleWrite::DeleteRange(dr) = write else { + panic!("should be delete range") + }; assert_eq!(dr.cf, CF_LOCK); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); assert!(dr.notify_only); let write = decoder.next().unwrap(); - let SimpleWrite::DeleteRange(dr) = write else { panic!("should be delete range") }; + let SimpleWrite::DeleteRange(dr) = write else { + panic!("should be delete range") + }; assert_eq!(dr.cf, "cf"); assert_eq!(dr.start_key, b"key"); assert_eq!(dr.end_key, b"key"); @@ -609,7 +617,9 @@ mod tests { let mut decoder = SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); let write = decoder.next().unwrap(); - let SimpleWrite::Ingest(ssts) = write else { panic!("should be ingest") }; + let SimpleWrite::Ingest(ssts) = write else { + panic!("should be ingest") + }; assert_eq!(exp, ssts); assert_matches!(decoder.next(), None); } @@ -696,7 +706,9 @@ mod tests { SimpleWriteReqDecoder::new(decoder_fallback, &logger, &bytes, 0, 0).unwrap(); assert_eq!(*decoder.header(), *header); let req = decoder.next().unwrap(); - let SimpleWrite::Put(put) = req else { panic!("should be put") }; + let SimpleWrite::Put(put) = req else { + panic!("should be put") + }; assert_eq!(put.cf, CF_DEFAULT); assert_eq!(put.key, b"key"); assert_eq!(put.value, b""); diff --git a/components/raftstore/src/store/snap.rs b/components/raftstore/src/store/snap.rs index e7e7c6ccb10..71ef09c5413 100644 --- a/components/raftstore/src/store/snap.rs +++ b/components/raftstore/src/store/snap.rs @@ -1335,7 +1335,7 @@ impl Write for Snapshot { } assert!(cf_file.size[self.cf_file_index] != 0); - let mut file_for_recving = cf_file + let file_for_recving = cf_file .file_for_recving .get_mut(self.cf_file_index) .unwrap(); @@ -2174,7 +2174,7 @@ impl TabletSnapManager { .stats .lock() .unwrap() - .drain_filter(|_, (_, stat)| stat.get_region_id() > 0) + .extract_if(|_, (_, stat)| stat.get_region_id() > 0) .map(|(_, (_, stat))| stat) .filter(|stat| stat.get_total_duration_sec() > 1) .collect(); diff --git a/components/raftstore/src/store/snap/io.rs b/components/raftstore/src/store/snap/io.rs index 48919474135..641afb3ad36 100644 --- a/components/raftstore/src/store/snap/io.rs +++ b/components/raftstore/src/store/snap/io.rs @@ -367,7 +367,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_plain_files_enc"); - for db_opt in vec![None, Some(enc_opts)] { + for db_opt in [None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db: KvTestEngine = db_creater(dir.path(), db_opt.clone(), None).unwrap(); // Collect keys via the key_callback into a collection. @@ -448,7 +448,7 @@ mod tests { for db_creater in db_creaters { let (_enc_dir, enc_opts) = gen_db_options_with_encryption("test_cf_build_and_apply_sst_files_enc"); - for db_opt in vec![None, Some(enc_opts)] { + for db_opt in [None, Some(enc_opts)] { let dir = Builder::new().prefix("test-snap-cf-db").tempdir().unwrap(); let db = db_creater(dir.path(), db_opt.clone(), None).unwrap(); let snap_cf_dir = Builder::new().prefix("test-snap-cf").tempdir().unwrap(); diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index ae352ea08ab..3c628502505 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -84,7 +84,7 @@ pub struct PeerPessimisticLocks { /// likely to be proposed successfully, while the leader will need at /// least another round to receive the transfer leader message from the /// transferee. - /// + /// /// - Split region The lock with the deleted mark SHOULD be moved to new /// regions on region split. Considering the following cases with /// different orders: 1. Propose write -> propose split -> apply write -> diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 6eef4c61686..367013a0adc 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -124,8 +124,7 @@ pub fn is_vote_msg(msg: &eraftpb::Message) -> bool { /// peer or not. // There could be two cases: // 1. Target peer already exists but has not established communication with leader yet -// 2. Target peer is added newly due to member change or region split, but it's not -// created yet +// 2. Target peer is added newly due to member change or region split, but it's not created yet // For both cases the region start key and end key are attached in RequestVote and // Heartbeat message for the store of that peer to check whether to create a new peer // when receiving these messages, or just to wait for a pending region split to perform @@ -365,8 +364,7 @@ pub fn check_flashback_state( ) -> Result<()> { // The admin flashback cmd could be proposed/applied under any state. if let Some(ty) = admin_type - && (ty == AdminCmdType::PrepareFlashback - || ty == AdminCmdType::FinishFlashback) + && (ty == AdminCmdType::PrepareFlashback || ty == AdminCmdType::FinishFlashback) { return Ok(()); } @@ -1299,7 +1297,9 @@ impl RegionReadProgress { } pub fn notify_advance_resolved_ts(&self) { - if let Ok(core) = self.core.try_lock() && let Some(advance_notify) = &core.advance_notify { + if let Ok(core) = self.core.try_lock() + && let Some(advance_notify) = &core.advance_notify + { advance_notify.notify_waiters(); } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index d082d0c2e58..3ec4c65c4c5 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -1753,10 +1753,7 @@ where fn handle_read_stats(&mut self, mut read_stats: ReadStats) { for (region_id, region_info) in read_stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.read_bytes += region_info.flow.read_bytes as u64; peer_stat.read_keys += region_info.flow.read_keys as u64; self.store_stat.engine_total_bytes_read += region_info.flow.read_bytes as u64; @@ -1778,10 +1775,7 @@ where fn handle_write_stats(&mut self, mut write_stats: WriteStats) { for (region_id, region_info) in write_stats.region_infos.iter_mut() { - let peer_stat = self - .region_peers - .entry(*region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(*region_id).or_default(); peer_stat.query_stats.add_query_stats(®ion_info.0); self.store_stat .engine_total_query_num @@ -2138,8 +2132,12 @@ where let f = async move { for split_info in split_infos { - let Ok(Some((region, leader))) = - pd_client.get_region_leader_by_id(split_info.region_id).await else { continue }; + let Ok(Some((region, leader))) = pd_client + .get_region_leader_by_id(split_info.region_id) + .await + else { + continue; + }; if leader.get_id() != split_info.peer.get_id() { info!("load base split region on non-leader"; "region_id" => region.get_id(), @@ -2211,10 +2209,7 @@ where cpu_usage, ) = { let region_id = hb_task.region.get_id(); - let peer_stat = self - .region_peers - .entry(region_id) - .or_insert_with(PeerStat::default); + let peer_stat = self.region_peers.entry(region_id).or_default(); peer_stat.approximate_size = approximate_size; peer_stat.approximate_keys = approximate_keys; diff --git a/components/raftstore/src/store/worker/region.rs b/components/raftstore/src/store/worker/region.rs index dd2c8f90de1..ddb485d9b1e 100644 --- a/components/raftstore/src/store/worker/region.rs +++ b/components/raftstore/src/store/worker/region.rs @@ -179,7 +179,7 @@ impl PendingDeleteRanges { ) -> Vec<(u64, Vec, Vec, u64)> { let ranges = self.find_overlap_ranges(start_key, end_key); - for &(_, ref s_key, ..) in &ranges { + for (_, s_key, ..) in &ranges { self.ranges.remove(s_key).unwrap(); } ranges @@ -1293,22 +1293,6 @@ pub(crate) mod tests { } }; - #[allow(dead_code)] - let must_not_finish = |ids: &[u64]| { - for id in ids { - let region_key = keys::region_state_key(*id); - assert_eq!( - engine - .kv - .get_msg_cf::(CF_RAFT, ®ion_key) - .unwrap() - .unwrap() - .get_state(), - PeerState::Applying - ) - } - }; - // snapshot will not ingest cause already write stall gen_and_apply_snap(1); assert_eq!( @@ -1447,6 +1431,21 @@ pub(crate) mod tests { #[cfg(feature = "failpoints")] { + let must_not_finish = |ids: &[u64]| { + for id in ids { + let region_key = keys::region_state_key(*id); + assert_eq!( + engine + .kv + .get_msg_cf::(CF_RAFT, ®ion_key) + .unwrap() + .unwrap() + .get_state(), + PeerState::Applying + ) + } + }; + engine.kv.compact_files_in_range(None, None, None).unwrap(); fail::cfg("handle_new_pending_applies", "return").unwrap(); gen_and_apply_snap(7); diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index e3c0042acf0..a35e6a32c76 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -250,14 +250,22 @@ impl BucketStatsInfo { // The bucket ranges is none when the region buckets is also none. // So this condition indicates that the region buckets needs to refresh not // renew. - if let Some(bucket_ranges) = bucket_ranges&&self.bucket_stat.is_some(){ + if let Some(bucket_ranges) = bucket_ranges + && self.bucket_stat.is_some() + { assert_eq!(buckets.len(), bucket_ranges.len()); - change_bucket_version=self.update_buckets(cfg, next_bucket_version, buckets, region_epoch, &bucket_ranges); - }else{ + change_bucket_version = self.update_buckets( + cfg, + next_bucket_version, + buckets, + region_epoch, + &bucket_ranges, + ); + } else { change_bucket_version = true; // when the region buckets is none, the exclusive buckets includes all the // bucket keys. - self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); + self.init_buckets(cfg, next_bucket_version, buckets, region_epoch, region); } change_bucket_version } @@ -500,7 +508,7 @@ impl Runner { region: &Region, bucket_ranges: &Vec, ) { - for (mut bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { + for (bucket, bucket_range) in &mut buckets.iter_mut().zip(bucket_ranges) { let mut bucket_region = region.clone(); bucket_region.set_start_key(bucket_range.0.clone()); bucket_region.set_end_key(bucket_range.1.clone()); diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index 185d331bb6b..eb281db4f4e 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -178,7 +178,7 @@ impl Samples { // evaluate the samples according to the given key range, it will update the // sample's left, right and contained counter. fn evaluate(&mut self, key_range: &KeyRange) { - for mut sample in self.0.iter_mut() { + for sample in self.0.iter_mut() { let order_start = if key_range.start_key.is_empty() { Ordering::Greater } else { @@ -452,7 +452,9 @@ impl ReadStats { region_info.flow.add(data); // the bucket of the follower only have the version info and not needs to be // recorded the hot bucket. - if let Some(buckets) = buckets && !buckets.sizes.is_empty() { + if let Some(buckets) = buckets + && !buckets.sizes.is_empty() + { let bucket_stat = self .region_buckets .entry(region_id) @@ -496,10 +498,7 @@ pub struct WriteStats { impl WriteStats { pub fn add_query_num(&mut self, region_id: u64, kind: QueryKind) { - let query_stats = self - .region_infos - .entry(region_id) - .or_insert_with(QueryStats::default); + let query_stats = self.region_infos.entry(region_id).or_default(); query_stats.add_query_num(kind, 1); } @@ -989,8 +988,8 @@ mod tests { #[test] fn test_prefix_sum() { - let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; - let expect = vec![1, 3, 6, 10, 15, 21, 28, 36, 45]; + let v = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + let expect = [1, 3, 6, 10, 15, 21, 28, 36, 45]; let pre = prefix_sum(v.iter(), |x| *x); for i in 0..v.len() { assert_eq!(expect[i], pre[i]); diff --git a/components/resolved_ts/src/cmd.rs b/components/resolved_ts/src/cmd.rs index 47d14304112..328f725edaa 100644 --- a/components/resolved_ts/src/cmd.rs +++ b/components/resolved_ts/src/cmd.rs @@ -213,13 +213,13 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) CF_WRITE => { if let Ok(ts) = key.decode_ts() { let key = key.truncate_ts().unwrap(); - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); assert!(row.write.is_none()); row.write = Some(KeyOp::Put(Some(ts), value)); } } CF_LOCK => { - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); assert!(row.lock.is_none()); row.lock = Some(KeyOp::Put(None, value)); } @@ -239,7 +239,7 @@ fn group_row_changes(requests: Vec) -> (HashMap, bool) match delete.cf.as_str() { CF_LOCK => { let key = Key::from_encoded(delete.take_key()); - let mut row = changes.entry(key).or_default(); + let row = changes.entry(key).or_default(); row.lock = Some(KeyOp::Delete); } "" | CF_WRITE | CF_DEFAULT => {} diff --git a/components/resolved_ts/src/endpoint.rs b/components/resolved_ts/src/endpoint.rs index a668d8b0f52..28bf6437a8b 100644 --- a/components/resolved_ts/src/endpoint.rs +++ b/components/resolved_ts/src/endpoint.rs @@ -65,7 +65,8 @@ impl Drop for ResolverStatus { locks, memory_quota, .. - } = self else { + } = self + else { return; }; if locks.is_empty() { @@ -96,7 +97,8 @@ impl ResolverStatus { locks, memory_quota, .. - } = self else { + } = self + else { panic!("region {:?} resolver has ready", region_id) }; // Check if adding a new lock or unlock will exceed the memory @@ -110,10 +112,7 @@ impl ResolverStatus { } fn update_tracked_index(&mut self, index: u64, region_id: u64) { - let ResolverStatus::Pending { - tracked_index, - .. - } = self else { + let ResolverStatus::Pending { tracked_index, .. } = self else { panic!("region {:?} resolver has ready", region_id) }; assert!( @@ -135,7 +134,8 @@ impl ResolverStatus { memory_quota, tracked_index, .. - } = self else { + } = self + else { panic!("region {:?} resolver has ready", region_id) }; // Must take locks, otherwise it may double free memory quota on drop. @@ -683,7 +683,7 @@ where scanner_pool, scan_concurrency_semaphore, regions: HashMap::default(), - _phantom: PhantomData::default(), + _phantom: PhantomData, }; ep.handle_advance_resolved_ts(leader_resolver); ep @@ -866,7 +866,7 @@ where // Tracking or untracking locks with incoming commands that corresponding // observe id is valid. - #[allow(clippy::drop_ref)] + #[allow(dropping_references)] fn handle_change_log(&mut self, cmd_batch: Vec) { let size = cmd_batch.iter().map(|b| b.size()).sum::(); RTS_CHANNEL_PENDING_CMD_BYTES.sub(size as i64); @@ -926,7 +926,7 @@ where } fn handle_advance_resolved_ts(&self, leader_resolver: LeadershipResolver) { - let regions = self.regions.keys().into_iter().copied().collect(); + let regions = self.regions.keys().copied().collect(); self.advance_worker.advance_ts_for_regions( regions, leader_resolver, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 6c8c90dc38f..7f6d491e4a4 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -100,7 +100,7 @@ impl, E: KvEngine> ScannerPool { Self { workers, cdc_handle, - _phantom: PhantomData::default(), + _phantom: PhantomData, } } diff --git a/components/resource_control/src/lib.rs b/components/resource_control/src/lib.rs index 917718e8409..b9a79e1f9ae 100644 --- a/components/resource_control/src/lib.rs +++ b/components/resource_control/src/lib.rs @@ -1,7 +1,5 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. #![feature(test)] -#![feature(local_key_cell_methods)] -#![feature(array_zip)] use std::sync::Arc; diff --git a/components/resource_control/src/resource_group.rs b/components/resource_control/src/resource_group.rs index d6933d0a383..85730e60481 100644 --- a/components/resource_control/src/resource_group.rs +++ b/components/resource_control/src/resource_group.rs @@ -342,8 +342,8 @@ impl ResourceGroupManager { #[inline] pub fn get_priority_resource_limiters( &self, - ) -> [Arc; TaskPriority::PRIORITY_COUNT] { - self.priority_limiters.clone() + ) -> &[Arc; TaskPriority::PRIORITY_COUNT] { + &self.priority_limiters } } @@ -406,8 +406,8 @@ pub struct ResourceController { // 1. the priority factor is calculate based on read/write RU settings. // 2. for read request, we increase a constant virtual time delta at each `get_priority` call // because the cost can't be calculated at start, so we only increase a constant delta and - // increase the real cost after task is executed; but don't increase it at write because - // the cost is known so we just pre-consume it. + // increase the real cost after task is executed; but don't increase it at write because the + // cost is known so we just pre-consume it. is_read: bool, // Track the maximum ru quota used to calculate the factor of each resource group. // factor = max_ru_quota / group_ru_quota * 10.0 @@ -606,8 +606,8 @@ impl ResourceController { }); if near_overflow { let end = Instant::now_coarse(); - info!("all resource groups' virtual time are near overflow, do reset"; - "min" => min_vt, "max" => max_vt, "dur" => ?end.duration_since(start), + info!("all resource groups' virtual time are near overflow, do reset"; + "min" => min_vt, "max" => max_vt, "dur" => ?end.duration_since(start), "reset_dur" => ?end.duration_since(self.last_rest_vt_time.get())); max_vt -= RESET_VT_THRESHOLD; self.last_rest_vt_time.set(end); diff --git a/components/resource_control/src/worker.rs b/components/resource_control/src/worker.rs index b90787914d6..4957ee1aa3f 100644 --- a/components/resource_control/src/worker.rs +++ b/components/resource_control/src/worker.rs @@ -332,10 +332,11 @@ impl PriorityLimiterAdjustWorker { resource_ctl: Arc, resource_quota_getter: R, ) -> Self { - let trackers = resource_ctl - .get_priority_resource_limiters() - .zip(TaskPriority::priorities()) - .map(|(l, p)| PriorityLimiterStatsTracker::new(l, p.as_str())); + let limiters = resource_ctl.get_priority_resource_limiters(); + let priorities = TaskPriority::priorities(); + let trackers = std::array::from_fn(|i| { + PriorityLimiterStatsTracker::new(limiters[i].clone(), priorities[i].as_str()) + }); Self { resource_ctl, trackers, @@ -447,9 +448,9 @@ impl PriorityLimiterAdjustWorker { limits[i - 1] = limit; expect_cpu_time_total -= level_expected[i]; } - debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, + debug!("adjsut cpu limiter by priority"; "cpu_quota" => process_cpu_stats.total_quota, "process_cpu" => process_cpu_stats.current_used, "expected_cpu" => ?level_expected, - "cpu_costs" => ?cpu_duration, "limits" => ?limits, + "cpu_costs" => ?cpu_duration, "limits" => ?limits, "limit_cpu_total" => expect_pool_cpu_total, "pool_cpu_cost" => real_cpu_total); } } diff --git a/components/resource_metering/src/lib.rs b/components/resource_metering/src/lib.rs index ba8e2174e19..52b568fb9e7 100644 --- a/components/resource_metering/src/lib.rs +++ b/components/resource_metering/src/lib.rs @@ -2,7 +2,8 @@ // TODO(mornyx): crate doc. -#![feature(hash_drain_filter)] +#![feature(hash_extract_if)] +#![allow(internal_features)] #![feature(core_intrinsics)] use std::{ diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 6f7118ef9e1..03cd500eb2e 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -87,7 +87,7 @@ impl RawRecords { pdqselect::select_by(&mut buf, k, |a, b| b.cmp(a)); let kth = buf[k]; // Evict records with cpu time less or equal than `kth` - let evicted_records = self.records.drain_filter(|_, r| r.cpu_time <= kth); + let evicted_records = self.records.extract_if(|_, r| r.cpu_time <= kth); // Record evicted into others for (_, record) in evicted_records { others.merge(&record); diff --git a/components/resource_metering/src/recorder/sub_recorder/cpu.rs b/components/resource_metering/src/recorder/sub_recorder/cpu.rs index 8c4053a80ab..08675bb6153 100644 --- a/components/resource_metering/src/recorder/sub_recorder/cpu.rs +++ b/components/resource_metering/src/recorder/sub_recorder/cpu.rs @@ -9,7 +9,7 @@ use crate::{ localstorage::{LocalStorage, SharedTagInfos}, SubRecorder, }, - RawRecord, RawRecords, + RawRecords, }; /// An implementation of [SubRecorder] for collecting cpu statistics. @@ -37,7 +37,7 @@ impl SubRecorder for CpuRecorder { if *last_stat != cur_stat { let delta_ms = (cur_stat.total_cpu_time() - last_stat.total_cpu_time()) * 1_000.; - let record = records.entry(cur_tag).or_insert_with(RawRecord::default); + let record = records.entry(cur_tag).or_default(); record.cpu_time += delta_ms as u32; } thread_stat.stat = cur_stat; diff --git a/components/resource_metering/tests/recorder_test.rs b/components/resource_metering/tests/recorder_test.rs index daa371e7477..9f0ec504917 100644 --- a/components/resource_metering/tests/recorder_test.rs +++ b/components/resource_metering/tests/recorder_test.rs @@ -55,7 +55,7 @@ mod tests { if let Some(tag) = self.current_ctx { self.records .entry(tag.as_bytes().to_vec()) - .or_insert_with(RawRecord::default) + .or_default() .cpu_time += ms; } self.ops.push(op); @@ -156,10 +156,10 @@ mod tests { let mut records = self.records.lock().unwrap(); for k in expected.keys() { - records.entry(k.clone()).or_insert_with(RawRecord::default); + records.entry(k.clone()).or_default(); } for k in records.keys() { - expected.entry(k.clone()).or_insert_with(RawRecord::default); + expected.entry(k.clone()).or_default(); } for (k, expected_value) in expected { let value = records.get(&k).unwrap(); diff --git a/components/server/src/common.rs b/components/server/src/common.rs index a2415facad1..9ecaab0770a 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -560,7 +560,9 @@ impl EnginesResourceInfo { }); for (_, cache) in cached_latest_tablets.iter_mut() { - let Some(tablet) = cache.latest() else { continue }; + let Some(tablet) = cache.latest() else { + continue; + }; for cf in DATA_CFS { fetch_engine_cf(tablet, cf); } diff --git a/components/server/src/signal_handler.rs b/components/server/src/signal_handler.rs index 0644bb1b13a..97efdb1fd2a 100644 --- a/components/server/src/signal_handler.rs +++ b/components/server/src/signal_handler.rs @@ -43,11 +43,15 @@ mod imp { info!("{}", metrics::dump(false)); if let Some(ref engines) = engines { info!("{:?}", MiscExt::dump_stats(&engines.kv)); - if let Some(s) = kv_statistics.as_ref() && let Some(s) = s.to_string() { + if let Some(s) = kv_statistics.as_ref() + && let Some(s) = s.to_string() + { info!("{:?}", s); } info!("{:?}", RaftEngine::dump_stats(&engines.raft)); - if let Some(s) = raft_statistics.as_ref() && let Some(s) = s.to_string() { + if let Some(s) = raft_statistics.as_ref() + && let Some(s) = s.to_string() + { info!("{:?}", s); } } diff --git a/components/snap_recovery/src/leader_keeper.rs b/components/snap_recovery/src/leader_keeper.rs index ca2623c82ca..0115e8657c3 100644 --- a/components/snap_recovery/src/leader_keeper.rs +++ b/components/snap_recovery/src/leader_keeper.rs @@ -217,7 +217,7 @@ mod test { #[test] fn test_failure() { - let leaders = vec![1, 2, 3]; + let leaders = [1, 2, 3]; let mut store = MockStore::default(); store.regions = leaders.iter().copied().collect(); let mut lk = LeaderKeeper::::new(store, vec![1, 2, 3, 4]); diff --git a/components/sst_importer/src/import_mode2.rs b/components/sst_importer/src/import_mode2.rs index 70b7d7fac5e..4db29c47a6f 100644 --- a/components/sst_importer/src/import_mode2.rs +++ b/components/sst_importer/src/import_mode2.rs @@ -139,7 +139,7 @@ impl ImportModeSwitcherV2 { pub fn ranges_in_import(&self) -> HashSet { let inner = self.inner.lock().unwrap(); - HashSet::from_iter(inner.import_mode_ranges.keys().into_iter().cloned()) + HashSet::from_iter(inner.import_mode_ranges.keys().cloned()) } } diff --git a/components/sst_importer/src/sst_importer.rs b/components/sst_importer/src/sst_importer.rs index 6eef07b1ebc..e74a1f6978c 100644 --- a/components/sst_importer/src/sst_importer.rs +++ b/components/sst_importer/src/sst_importer.rs @@ -383,8 +383,8 @@ impl SstImporter { // This method is blocking. It performs the following transformations before // writing to disk: // - // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. - // (set the range to `["", ""]` to import everything). + // 1. only KV pairs in the *inclusive* range (`[start, end]`) are used. (set + // the range to `["", ""]` to import everything). // 2. keys are rewritten according to the given rewrite rule. // // Both the range and rewrite keys are specified using origin keys. However, @@ -1541,7 +1541,7 @@ mod tests { let env = get_env(key_manager.clone(), None /* io_rate_limiter */).unwrap(); let db = new_test_engine_with_env(db_path.to_str().unwrap(), &[CF_DEFAULT], env); - let cases = vec![(0, 10), (5, 15), (10, 20), (0, 100)]; + let cases = [(0, 10), (5, 15), (10, 20), (0, 100)]; let mut ingested = Vec::new(); @@ -2057,11 +2057,10 @@ mod tests { ) .unwrap(); let ext_storage = { - let inner = importer.wrap_kms( + importer.wrap_kms( importer.external_storage_or_cache(&backend, "").unwrap(), false, - ); - inner + ) }; // test do_read_kv_file() diff --git a/components/sst_importer/src/util.rs b/components/sst_importer/src/util.rs index 121daf49ea8..55ae771c8ae 100644 --- a/components/sst_importer/src/util.rs +++ b/components/sst_importer/src/util.rs @@ -96,6 +96,7 @@ pub fn copy_sst_for_ingestion, Q: AsRef>( let mut pmts = file_system::metadata(clone)?.permissions(); if pmts.readonly() { + #[allow(clippy::permissions_set_readonly_false)] pmts.set_readonly(false); file_system::set_permissions(clone, pmts)?; } diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 96f405d8f39..6763ea7bb1a 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -203,7 +203,7 @@ impl Store { } pub fn put(&mut self, ctx: Context, mut kv: Vec<(Vec, Vec)>) { - self.handles.extend(kv.iter().map(|&(ref k, _)| k.clone())); + self.handles.extend(kv.iter().map(|(k, _)| k.clone())); let pk = kv[0].0.clone(); let kv = kv .drain(..) diff --git a/components/test_coprocessor_plugin/example_plugin/src/lib.rs b/components/test_coprocessor_plugin/example_plugin/src/lib.rs index afcaa4962b9..d383797c069 100644 --- a/components/test_coprocessor_plugin/example_plugin/src/lib.rs +++ b/components/test_coprocessor_plugin/example_plugin/src/lib.rs @@ -18,4 +18,4 @@ impl CoprocessorPlugin for ExamplePlugin { } } -declare_plugin!(ExamplePlugin::default()); +declare_plugin!(ExamplePlugin); diff --git a/components/test_pd_client/src/pd.rs b/components/test_pd_client/src/pd.rs index 341495cdb52..95d159eb709 100644 --- a/components/test_pd_client/src/pd.rs +++ b/components/test_pd_client/src/pd.rs @@ -1446,12 +1446,12 @@ impl TestPdClient { let status = cluster.replication_status.as_mut().unwrap(); if state.is_none() { status.set_mode(ReplicationMode::Majority); - let mut dr = status.mut_dr_auto_sync(); + let dr = status.mut_dr_auto_sync(); dr.state_id += 1; return; } status.set_mode(ReplicationMode::DrAutoSync); - let mut dr = status.mut_dr_auto_sync(); + let dr = status.mut_dr_auto_sync(); dr.state_id += 1; dr.set_state(state.unwrap()); dr.available_stores = available_stores; diff --git a/components/test_raftstore-v2/src/cluster.rs b/components/test_raftstore-v2/src/cluster.rs index 53ff2c0f0b6..8cc4879dd21 100644 --- a/components/test_raftstore-v2/src/cluster.rs +++ b/components/test_raftstore-v2/src/cluster.rs @@ -223,7 +223,7 @@ pub trait Simulator { None => { error!("call_query_on_node receives none response"; "request" => ?request); // Do not unwrap here, sometimes raftstore v2 may return none. - return Err(box_err!("receives none response {:?}", request)); + Err(box_err!("receives none response {:?}", request)) } } } diff --git a/components/test_raftstore-v2/src/lib.rs b/components/test_raftstore-v2/src/lib.rs index 685affe45d0..04939d56155 100644 --- a/components/test_raftstore-v2/src/lib.rs +++ b/components/test_raftstore-v2/src/lib.rs @@ -1,7 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. #![allow(incomplete_features)] #![feature(type_alias_impl_trait)] -#![feature(return_position_impl_trait_in_trait)] #![feature(let_chains)] mod cluster; diff --git a/components/test_raftstore-v2/src/node.rs b/components/test_raftstore-v2/src/node.rs index d63ca0aa2f2..70b6ccb1407 100644 --- a/components/test_raftstore-v2/src/node.rs +++ b/components/test_raftstore-v2/src/node.rs @@ -258,7 +258,7 @@ impl Simulator for NodeCluster { ) } else { let trans = self.trans.core.lock().unwrap(); - let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + let (snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; self.snap_mgrs.insert(node_id, snap_mgr.clone()); diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 5073304e17a..074e0731abf 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -1012,7 +1012,18 @@ pub fn must_new_cluster_and_kv_client_mul( TikvClient, Context, ) { - let (cluster, leader, ctx) = must_new_cluster_mul(count); + must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) +} + +pub fn must_new_cluster_with_cfg_and_kv_client_mul( + count: usize, + configure: impl FnMut(&mut Cluster, RocksEngine>), +) -> ( + Cluster, RocksEngine>, + TikvClient, + Context, +) { + let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = @@ -1021,6 +1032,7 @@ pub fn must_new_cluster_and_kv_client_mul( (cluster, client, ctx) } + pub fn must_new_cluster_mul( count: usize, ) -> ( diff --git a/components/test_raftstore-v2/src/transport_simulate.rs b/components/test_raftstore-v2/src/transport_simulate.rs index 7b9333aae83..995662ac484 100644 --- a/components/test_raftstore-v2/src/transport_simulate.rs +++ b/components/test_raftstore-v2/src/transport_simulate.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, RwLock}; use engine_traits::{KvEngine, RaftEngine}; -use futures::Future; +use futures::future::{BoxFuture, FutureExt}; use kvproto::{ raft_cmdpb::{RaftCmdRequest, RaftCmdResponse}, raft_serverpb::RaftMessage, @@ -64,30 +64,30 @@ impl Transport for SimulateTransport { } } -pub trait SnapshotRouter { +pub trait SnapshotRouter { fn snapshot( &mut self, req: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send; + ) -> BoxFuture<'static, std::result::Result, RaftCmdResponse>>; } impl SnapshotRouter for RaftRouter { fn snapshot( &mut self, req: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send + ) -> BoxFuture<'static, std::result::Result, RaftCmdResponse>> { - self.snapshot(req) + self.snapshot(req).boxed() } } -impl> SnapshotRouter for SimulateTransport { +impl> SnapshotRouter for SimulateTransport { fn snapshot( &mut self, req: RaftCmdRequest, - ) -> impl Future, RaftCmdResponse>> + Send + ) -> BoxFuture<'static, std::result::Result, RaftCmdResponse>> { - self.ch.snapshot(req) + self.ch.snapshot(req).boxed() } } diff --git a/components/test_raftstore/src/cluster.rs b/components/test_raftstore/src/cluster.rs index 2521fccb694..c7fe39f1434 100644 --- a/components/test_raftstore/src/cluster.rs +++ b/components/test_raftstore/src/cluster.rs @@ -1305,7 +1305,9 @@ where engine_traits::CF_RAFT, &keys::region_state_key(region_id), ) - .unwrap() && state.get_state() == peer_state { + .unwrap() + && state.get_state() == peer_state + { return; } sleep_ms(10); diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 5fdd4f24822..db438d4233a 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -278,7 +278,7 @@ impl Simulator for NodeCluster { (snap_mgr, Some(tmp)) } else { let trans = self.trans.core.lock().unwrap(); - let &(ref snap_mgr, _) = &trans.snap_paths[&node_id]; + let (snap_mgr, _) = &trans.snap_paths[&node_id]; (snap_mgr.clone(), None) }; diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 883a38edb23..4ee99d18e12 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -954,8 +954,18 @@ pub fn must_new_cluster_and_kv_client_mul( TikvClient, Context, ) { - let (cluster, leader, ctx) = must_new_cluster_mul(count); + must_new_cluster_with_cfg_and_kv_client_mul(count, |_| {}) +} +pub fn must_new_cluster_with_cfg_and_kv_client_mul( + count: usize, + configure: impl FnMut(&mut Cluster>), +) -> ( + Cluster>, + TikvClient, + Context, +) { + let (cluster, leader, ctx) = must_new_and_configure_cluster_mul(count, configure); let env = Arc::new(Environment::new(1)); let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader.get_store_id())); diff --git a/components/test_util/src/lib.rs b/components/test_util/src/lib.rs index 453ed7fb7f1..222af48fd4e 100644 --- a/components/test_util/src/lib.rs +++ b/components/test_util/src/lib.rs @@ -27,7 +27,6 @@ pub use crate::{ encryption::*, kv_generator::*, logging::*, - macros::*, runner::{clear_failpoints, run_failpoint_tests, run_test_with_hook, run_tests, TestHook}, security::*, }; diff --git a/components/tidb_query_codegen/src/rpn_function.rs b/components/tidb_query_codegen/src/rpn_function.rs index 33976939c83..ea3017d5d02 100644 --- a/components/tidb_query_codegen/src/rpn_function.rs +++ b/components/tidb_query_codegen/src/rpn_function.rs @@ -1739,27 +1739,24 @@ mod tests_normal { /// Compare TokenStream with all white chars trimmed. fn assert_token_stream_equal(l: TokenStream, r: TokenStream) { - let result = l - .clone() - .into_iter() - .eq_by(r.clone().into_iter(), |x, y| match x { - TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), - TokenTree::Literal(x) => { - matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) - } - TokenTree::Punct(x) => { - matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) - } - TokenTree::Group(x) => { - if let TokenTree::Group(y) = y { - assert_token_stream_equal(x.stream(), y.stream()); + let result = l.clone().into_iter().eq_by(r.clone(), |x, y| match x { + TokenTree::Ident(x) => matches!(y, TokenTree::Ident(y) if x == y), + TokenTree::Literal(x) => { + matches!(y, TokenTree::Literal(y) if x.to_string() == y.to_string()) + } + TokenTree::Punct(x) => { + matches!(y, TokenTree::Punct(y) if x.to_string() == y.to_string()) + } + TokenTree::Group(x) => { + if let TokenTree::Group(y) = y { + assert_token_stream_equal(x.stream(), y.stream()); - true - } else { - false - } + true + } else { + false } - }); + } + }); assert!(result, "expect: {:#?}, actual: {:#?}", &l, &r); } diff --git a/components/tidb_query_datatype/src/codec/convert.rs b/components/tidb_query_datatype/src/codec/convert.rs index 418841547ca..d2bbee78078 100644 --- a/components/tidb_query_datatype/src/codec/convert.rs +++ b/components/tidb_query_datatype/src/codec/convert.rs @@ -574,13 +574,13 @@ pub fn bytes_to_int_without_context(bytes: &[u8]) -> Result { if let Some(&c) = trimed.next() { if c == b'-' { negative = true; - } else if (b'0'..=b'9').contains(&c) { + } else if c.is_ascii_digit() { r = Some(i64::from(c) - i64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimed.take_while(|&c| c.is_ascii_digit()) { let cur = i64::from(*c - b'0'); r = r.and_then(|r| r.checked_mul(10)).and_then(|r| { if negative { @@ -605,13 +605,13 @@ pub fn bytes_to_uint_without_context(bytes: &[u8]) -> Result { let mut trimed = bytes.iter().skip_while(|&&b| b == b' ' || b == b'\t'); let mut r = Some(0u64); if let Some(&c) = trimed.next() { - if (b'0'..=b'9').contains(&c) { + if c.is_ascii_digit() { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(0); } - for c in trimed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimed.take_while(|&c| c.is_ascii_digit()) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -856,7 +856,7 @@ pub fn get_valid_int_prefix_helper<'a>( if (c == '+' || c == '-') && i == 0 { continue; } - if ('0'..='9').contains(&c) { + if c.is_ascii_digit() { valid_len = i + 1; continue; } @@ -917,7 +917,7 @@ pub fn get_valid_float_prefix_helper<'a>( break; } e_idx = i - } else if !('0'..='9').contains(&c) { + } else if !c.is_ascii_digit() { break; } else { saw_digit = true; diff --git a/components/tidb_query_datatype/src/codec/data_type/mod.rs b/components/tidb_query_datatype/src/codec/data_type/mod.rs index 8ca36790824..b464b1119c8 100644 --- a/components/tidb_query_datatype/src/codec/data_type/mod.rs +++ b/components/tidb_query_datatype/src/codec/data_type/mod.rs @@ -248,7 +248,7 @@ macro_rules! impl_evaluable_type { } #[inline] - fn borrow_scalar_value_ref<'a>(v: ScalarValueRef<'a>) -> Option<&'a Self> { + fn borrow_scalar_value_ref(v: ScalarValueRef<'_>) -> Option<&Self> { match v { ScalarValueRef::$ty(x) => x, other => panic!( diff --git a/components/tidb_query_datatype/src/codec/datum.rs b/components/tidb_query_datatype/src/codec/datum.rs index dde98003475..f91d204b3b0 100644 --- a/components/tidb_query_datatype/src/codec/datum.rs +++ b/components/tidb_query_datatype/src/codec/datum.rs @@ -668,7 +668,7 @@ impl Datum { Datum::F64(res) } } - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { + (Datum::Dec(l), Datum::Dec(r)) => { let dec: Result = (l + r).into(); return dec.map(Datum::Dec); } @@ -700,7 +700,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_sub(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l - r)), - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => { + (Datum::Dec(l), Datum::Dec(r)) => { let dec: Result = (l - r).into(); return dec.map(Datum::Dec); } @@ -724,7 +724,7 @@ impl Datum { } (&Datum::U64(l), &Datum::U64(r)) => l.checked_mul(r).into(), (&Datum::F64(l), &Datum::F64(r)) => return Ok(Datum::F64(l * r)), - (&Datum::Dec(ref l), &Datum::Dec(ref r)) => return Ok(Datum::Dec((l * r).unwrap())), + (Datum::Dec(l), Datum::Dec(r)) => return Ok(Datum::Dec((l * r).unwrap())), (l, r) => return Err(invalid_type!("{} can't multiply {}", l, r)), }; @@ -1179,7 +1179,7 @@ mod tests { | (&Datum::Null, &Datum::Null) | (&Datum::Time(_), &Datum::Time(_)) | (&Datum::Json(_), &Datum::Json(_)) => true, - (&Datum::Dec(ref d1), &Datum::Dec(ref d2)) => d1.prec_and_frac() == d2.prec_and_frac(), + (Datum::Dec(d1), Datum::Dec(d2)) => d1.prec_and_frac() == d2.prec_and_frac(), _ => false, } } diff --git a/components/tidb_query_datatype/src/codec/mysql/decimal.rs b/components/tidb_query_datatype/src/codec/mysql/decimal.rs index 3a2be14758e..fe0f9150beb 100644 --- a/components/tidb_query_datatype/src/codec/mysql/decimal.rs +++ b/components/tidb_query_datatype/src/codec/mysql/decimal.rs @@ -1885,7 +1885,7 @@ impl<'a> ConvertTo for JsonRef<'a> { fn first_non_digit(bs: &[u8], start_idx: usize) -> usize { bs.iter() .skip(start_idx) - .position(|c| !(b'0'..=b'9').contains(c)) + .position(|c| !c.is_ascii_digit()) .map_or_else(|| bs.len(), |s| s + start_idx) } diff --git a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs index 867d8ec2c20..f76b29790f9 100644 --- a/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs +++ b/components/tidb_query_datatype/src/codec/mysql/json/jcodec.rs @@ -28,9 +28,9 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryObject` in TiDB `types/json/binary.go` - fn write_json_obj_from_keys_values<'a>( + fn write_json_obj_from_keys_values( &mut self, - mut entries: Vec<(&[u8], JsonRef<'a>)>, + mut entries: Vec<(&[u8], JsonRef<'_>)>, ) -> Result<()> { entries.sort_by(|a, b| a.0.cmp(b.0)); // object: element-count size key-entry* value-entry* key* value* @@ -122,7 +122,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryArray` in TiDB `types/json/binary.go` - fn write_json_ref_array<'a>(&mut self, data: &[JsonRef<'a>]) -> Result<()> { + fn write_json_ref_array(&mut self, data: &[JsonRef<'_>]) -> Result<()> { let element_count = data.len(); let value_entries_len = VALUE_ENTRY_LEN * element_count; let values_len = data.iter().fold(0, |acc, v| acc + v.encoded_len()); @@ -167,7 +167,7 @@ pub trait JsonEncoder: NumberEncoder { } // See `appendBinaryValElem` in TiDB `types/json/binary.go` - fn write_value_entry<'a>(&mut self, value_offset: &mut u32, v: &JsonRef<'a>) -> Result<()> { + fn write_value_entry(&mut self, value_offset: &mut u32, v: &JsonRef<'_>) -> Result<()> { let tp = v.get_type(); self.write_u8(tp as u8)?; match tp { diff --git a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs index 41131fc5933..4befe30c3c1 100644 --- a/components/tidb_query_datatype/src/codec/mysql/time/mod.rs +++ b/components/tidb_query_datatype/src/codec/mysql/time/mod.rs @@ -2701,7 +2701,7 @@ mod tests { #[test] fn test_no_zero_in_date() -> Result<()> { - let cases = vec!["2019-01-00", "2019-00-01"]; + let cases = ["2019-01-00", "2019-00-01"]; for &case in cases.iter() { // Enable NO_ZERO_IN_DATE only. If zero-date is encountered, a warning is diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index da117c96e2c..aa5eb3fc56f 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -298,7 +298,7 @@ impl<'a, T: PrimInt> LeBytes<'a, T> { fn new(slice: &'a [u8]) -> Self { Self { slice, - _marker: PhantomData::default(), + _marker: PhantomData, } } diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index 37becbfb801..a8585d83b3e 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -319,7 +319,7 @@ pub fn decode_row( cols: &HashMap, ) -> Result> { let mut values = datum::decode(data)?; - if values.get(0).map_or(true, |d| *d == Datum::Null) { + if values.first().map_or(true, |d| *d == Datum::Null) { return Ok(HashMap::default()); } if values.len() & 1 == 1 { @@ -528,7 +528,7 @@ pub fn generate_index_data_for_test( let mut expect_row = HashMap::default(); let mut v: Vec<_> = indice .iter() - .map(|&(ref cid, ref value)| { + .map(|(cid, value)| { expect_row.insert( *cid, datum::encode_key(&mut EvalContext::default(), &[value.clone()]).unwrap(), diff --git a/components/tidb_query_datatype/src/lib.rs b/components/tidb_query_datatype/src/lib.rs index 49fd8db06bd..0c46d5f61db 100644 --- a/components/tidb_query_datatype/src/lib.rs +++ b/components/tidb_query_datatype/src/lib.rs @@ -5,6 +5,7 @@ #![feature(proc_macro_hygiene)] #![feature(min_specialization)] #![feature(test)] +#![allow(internal_features)] #![feature(str_internals)] #[macro_use] diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 3a5c53a4d09..5ebf8a031d3 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -611,8 +611,8 @@ impl IndexScanExecutorImpl { } #[inline] - fn build_operations<'a, 'b>( - &'b self, + fn build_operations<'a>( + &self, mut key_payload: &'a [u8], index_value: &'a [u8], ) -> Result<(DecodeHandleOp<'a>, DecodePartitionIdOp<'a>, RestoreData<'a>)> { diff --git a/components/tidb_query_executors/src/runner.rs b/components/tidb_query_executors/src/runner.rs index 7c410befb25..27e52dde288 100644 --- a/components/tidb_query_executors/src/runner.rs +++ b/components/tidb_query_executors/src/runner.rs @@ -137,31 +137,31 @@ impl BatchExecutorsRunner<()> { .map_err(|e| other_err!("BatchProjectionExecutor: {}", e))?; } ExecType::TypeJoin => { - other_err!("Join executor not implemented"); + return Err(other_err!("Join executor not implemented")); } ExecType::TypeKill => { - other_err!("Kill executor not implemented"); + return Err(other_err!("Kill executor not implemented")); } ExecType::TypeExchangeSender => { - other_err!("ExchangeSender executor not implemented"); + return Err(other_err!("ExchangeSender executor not implemented")); } ExecType::TypeExchangeReceiver => { - other_err!("ExchangeReceiver executor not implemented"); + return Err(other_err!("ExchangeReceiver executor not implemented")); } ExecType::TypePartitionTableScan => { - other_err!("PartitionTableScan executor not implemented"); + return Err(other_err!("PartitionTableScan executor not implemented")); } ExecType::TypeSort => { - other_err!("Sort executor not implemented"); + return Err(other_err!("Sort executor not implemented")); } ExecType::TypeWindow => { - other_err!("Window executor not implemented"); + return Err(other_err!("Window executor not implemented")); } ExecType::TypeExpand => { - other_err!("Expand executor not implemented"); + return Err(other_err!("Expand executor not implemented")); } ExecType::TypeExpand2 => { - other_err!("Expand2 executor not implemented"); + return Err(other_err!("Expand2 executor not implemented")); } } } diff --git a/components/tidb_query_executors/src/selection_executor.rs b/components/tidb_query_executors/src/selection_executor.rs index bd65547109d..8d6b245e894 100644 --- a/components/tidb_query_executors/src/selection_executor.rs +++ b/components/tidb_query_executors/src/selection_executor.rs @@ -537,8 +537,7 @@ mod tests { }) .collect(); - for predicates in vec![ - // Swap predicates should produce same results. + for predicates in [ vec![predicate[0](), predicate[1]()], vec![predicate[1](), predicate[0]()], ] { @@ -572,8 +571,7 @@ mod tests { }) .collect(); - for predicates in vec![ - // Swap predicates should produce same results. + for predicates in [ vec![predicate[0](), predicate[1](), predicate[2]()], vec![predicate[1](), predicate[2](), predicate[0]()], ] { diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index 0535e8dbd83..a5d760dc80d 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -641,8 +641,8 @@ pub mod tests { )) as Box> }; - let test_paging_size = vec![2, 5, 7]; - let expect_call_num = vec![1, 3, 4]; + let test_paging_size = [2, 5, 7]; + let expect_call_num = [1, 3, 4]; let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; let executor_builders: Vec) -> _>> = vec![Box::new(exec_fast), Box::new(exec_slow)]; diff --git a/components/tidb_query_executors/src/util/mod.rs b/components/tidb_query_executors/src/util/mod.rs index ca05e49fcd3..db456a84883 100644 --- a/components/tidb_query_executors/src/util/mod.rs +++ b/components/tidb_query_executors/src/util/mod.rs @@ -28,13 +28,13 @@ pub fn ensure_columns_decoded( /// Evaluates expressions and outputs the result into the given Vec. Lifetime of /// the expressions are erased. -pub unsafe fn eval_exprs_decoded_no_lifetime<'a>( +pub unsafe fn eval_exprs_decoded_no_lifetime( ctx: &mut EvalContext, exprs: &[RpnExpression], schema: &[FieldType], input_physical_columns: &LazyBatchColumnVec, input_logical_rows: &[usize], - output: &mut Vec>, + output: &mut Vec>, ) -> Result<()> { unsafe fn erase_lifetime<'a, T: ?Sized>(v: &T) -> &'a T { &*(v as *const T) diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs index 8913e9573f8..292396cfcc8 100644 --- a/components/tidb_query_expr/src/impl_cast.rs +++ b/components/tidb_query_expr/src/impl_cast.rs @@ -6536,7 +6536,7 @@ mod tests { "cast_decimal_as_duration", ); - let values = vec![ + let values = [ Decimal::from_bytes(b"9995959").unwrap().unwrap(), Decimal::from_bytes(b"-9995959").unwrap().unwrap(), ]; diff --git a/components/tidb_query_expr/src/impl_miscellaneous.rs b/components/tidb_query_expr/src/impl_miscellaneous.rs index 5d2daed7f9a..5ab17c1226b 100644 --- a/components/tidb_query_expr/src/impl_miscellaneous.rs +++ b/components/tidb_query_expr/src/impl_miscellaneous.rs @@ -58,7 +58,7 @@ pub fn inet_aton(addr: BytesRef) -> Result> { } let (mut byte_result, mut result, mut dot_count): (u64, u64, usize) = (0, 0, 0); for c in addr.chars() { - if ('0'..='9').contains(&c) { + if c.is_ascii_digit() { let digit = c as u64 - '0' as u64; byte_result = byte_result * 10 + digit; if byte_result > 255 { @@ -502,7 +502,8 @@ mod tests { (Some(hex("0A000509")), Some(b"10.0.5.9".to_vec())), ( Some(hex("00000000000000000000000001020304")), - Some(b"::1.2.3.4".to_vec()), + // See https://github.com/rust-lang/libs-team/issues/239 + Some(b"::102:304".to_vec()), ), ( Some(hex("00000000000000000000FFFF01020304")), @@ -529,12 +530,12 @@ mod tests { (None, None), ]; - for (input, expect_output) in test_cases { + for (i, (input, expect_output)) in test_cases.into_iter().enumerate() { let output = RpnFnScalarEvaluator::new() .push_param(input) .evaluate::(ScalarFuncSig::Inet6Ntoa) .unwrap(); - assert_eq!(output, expect_output); + assert_eq!(output, expect_output, "case {}", i); } } diff --git a/components/tidb_query_expr/src/impl_string.rs b/components/tidb_query_expr/src/impl_string.rs index c86e8d22ccb..aa4c20f20e0 100644 --- a/components/tidb_query_expr/src/impl_string.rs +++ b/components/tidb_query_expr/src/impl_string.rs @@ -63,13 +63,13 @@ pub fn oct_string(s: BytesRef, writer: BytesWriter) -> Result { if let Some(&c) = trimmed.next() { if c == b'-' { negative = true; - } else if (b'0'..=b'9').contains(&c) { + } else if c.is_ascii_digit() { r = Some(u64::from(c) - u64::from(b'0')); } else if c != b'+' { return Ok(writer.write(Some(b"0".to_vec()))); } - for c in trimmed.take_while(|&c| (b'0'..=b'9').contains(c)) { + for c in trimmed.take_while(|&c| c.is_ascii_digit()) { r = r .and_then(|r| r.checked_mul(10)) .and_then(|r| r.checked_add(u64::from(*c - b'0'))); @@ -886,7 +886,7 @@ impl TrimDirection { } #[inline] -fn trim<'a, 'b>(string: &'a [u8], pattern: &'b [u8], direction: TrimDirection) -> &'a [u8] { +fn trim<'a>(string: &'a [u8], pattern: &[u8], direction: TrimDirection) -> &'a [u8] { if pattern.is_empty() { return string; } diff --git a/components/tidb_query_expr/src/types/expr_eval.rs b/components/tidb_query_expr/src/types/expr_eval.rs index b892333b0ef..9a289fc2715 100644 --- a/components/tidb_query_expr/src/types/expr_eval.rs +++ b/components/tidb_query_expr/src/types/expr_eval.rs @@ -1091,6 +1091,7 @@ mod tests { use tipb::{Expr, ScalarFuncSig}; #[allow(clippy::trivially_copy_pass_by_ref)] + #[allow(clippy::extra_unused_type_parameters)] #[rpn_fn(capture = [metadata], metadata_mapper = prepare_a::)] fn fn_a_nonnull( metadata: &i64, @@ -1100,6 +1101,7 @@ mod tests { Ok(Some(v + *metadata)) } + #[allow(clippy::extra_unused_type_parameters)] fn prepare_a(_expr: &mut Expr) -> Result { Ok(42) } diff --git a/components/tikv_alloc/src/lib.rs b/components/tikv_alloc/src/lib.rs index cbe1d8590bf..3b27d13e030 100644 --- a/components/tikv_alloc/src/lib.rs +++ b/components/tikv_alloc/src/lib.rs @@ -84,6 +84,7 @@ #![cfg_attr(test, feature(test))] #![cfg_attr(test, feature(custom_test_frameworks))] #![cfg_attr(test, test_runner(runner::run_env_conditional_tests))] +#![allow(internal_features)] #![feature(core_intrinsics)] #[cfg(feature = "jemalloc")] diff --git a/components/tikv_kv/src/cursor.rs b/components/tikv_kv/src/cursor.rs index 576aa5cfa76..858edfffec2 100644 --- a/components/tikv_kv/src/cursor.rs +++ b/components/tikv_kv/src/cursor.rs @@ -605,7 +605,7 @@ mod tests { (b"a9".to_vec(), b"v9".to_vec()), ]; - for &(ref k, ref v) in &base_data { + for (k, v) in &base_data { engine.put(&data_key(k), v).unwrap(); } (r, base_data) diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 1fe61b78633..236d38ac0b5 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -8,6 +8,7 @@ #![feature(bound_map)] #![feature(min_specialization)] #![feature(type_alias_impl_trait)] +#![feature(impl_trait_in_assoc_type)] #![feature(associated_type_defaults)] #[macro_use(fail_point)] diff --git a/components/tikv_util/src/logger/formatter.rs b/components/tikv_util/src/logger/formatter.rs index c53c5896519..b786d2aa681 100644 --- a/components/tikv_util/src/logger/formatter.rs +++ b/components/tikv_util/src/logger/formatter.rs @@ -11,9 +11,9 @@ where let mut start = 0; let bytes = file_name.as_bytes(); for (index, &b) in bytes.iter().enumerate() { - if (b'A'..=b'Z').contains(&b) - || (b'a'..=b'z').contains(&b) - || (b'0'..=b'9').contains(&b) + if b.is_ascii_uppercase() + || b.is_ascii_lowercase() + || b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'_' diff --git a/components/tikv_util/src/lru.rs b/components/tikv_util/src/lru.rs index 302bfc9264b..e2ea1e8c64b 100644 --- a/components/tikv_util/src/lru.rs +++ b/components/tikv_util/src/lru.rs @@ -317,7 +317,7 @@ where if replace { self.size_policy.on_remove(e.key(), &e.get().value); self.size_policy.on_insert(e.key(), &value); - let mut entry = e.get_mut(); + let entry = e.get_mut(); self.trace.promote(entry.record); entry.value = value; } else { diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 15ffece4425..259a44e5614 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -36,7 +36,7 @@ pub trait HeapSize { impl HeapSize for [u8] { fn heap_size(&self) -> usize { - self.len() * mem::size_of::() + std::mem::size_of_val(self) } } diff --git a/components/tikv_util/src/metrics/allocator_metrics.rs b/components/tikv_util/src/metrics/allocator_metrics.rs index 260aa88ac8e..af22e411767 100644 --- a/components/tikv_util/src/metrics/allocator_metrics.rs +++ b/components/tikv_util/src/metrics/allocator_metrics.rs @@ -64,7 +64,7 @@ impl Collector for AllocStatsCollector { .set(dealloc as _); }); let mut g = self.memory_stats.collect(); - g.extend(self.allocation.collect().into_iter()); + g.extend(self.allocation.collect()); g } } diff --git a/components/tikv_util/src/store/region.rs b/components/tikv_util/src/store/region.rs index 58af4e9fdfa..9725bdae548 100644 --- a/components/tikv_util/src/store/region.rs +++ b/components/tikv_util/src/store/region.rs @@ -42,7 +42,7 @@ pub fn region_on_same_stores(lhs: &Region, rhs: &Region) -> bool { /// Check if the given region exists on stores, by checking whether any one of /// the peers belonging to this region exist on the given stores. -pub fn region_on_stores(region: &Region, store_ids: &Vec) -> bool { +pub fn region_on_stores(region: &Region, store_ids: &[u64]) -> bool { if store_ids.is_empty() { return true; } diff --git a/components/tikv_util/src/sys/cpu_time.rs b/components/tikv_util/src/sys/cpu_time.rs index 6ec1621c629..61608d1518f 100644 --- a/components/tikv_util/src/sys/cpu_time.rs +++ b/components/tikv_util/src/sys/cpu_time.rs @@ -333,7 +333,7 @@ mod tests { for _ in 0..num * 10 { std::thread::spawn(move || { loop { - let _ = (0..10_000_000).into_iter().sum::(); + let _ = (0..10_000_000).sum::(); } }); } diff --git a/components/txn_types/src/types.rs b/components/txn_types/src/types.rs index 624ac81212d..2b95042fcce 100644 --- a/components/txn_types/src/types.rs +++ b/components/txn_types/src/types.rs @@ -451,7 +451,7 @@ impl From for Mutation { /// `OldValue` is used by cdc to read the previous value associated with some /// key during the prewrite process. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Default)] pub enum OldValue { /// A real `OldValue`. Value { value: Value }, @@ -460,18 +460,13 @@ pub enum OldValue { /// `None` means we don't found a previous value. None, /// The user doesn't care about the previous value. + #[default] Unspecified, /// Not sure whether the old value exists or not. users can seek CF_WRITE to /// the give position to take a look. SeekWrite(Key), } -impl Default for OldValue { - fn default() -> Self { - OldValue::Unspecified - } -} - impl OldValue { pub fn value(value: Value) -> Self { OldValue::Value { value } @@ -590,8 +585,9 @@ impl WriteBatchFlags { /// The position info of the last actual write (PUT or DELETE) of a LOCK record. /// Note that if the last change is a DELETE, its LastChange can be either /// Exist(which points to it) or NotExist. -#[derive(Clone, Eq, PartialEq, Debug)] +#[derive(Clone, Eq, PartialEq, Debug, Default)] pub enum LastChange { + #[default] Unknown, /// The pointer may point to a PUT or a DELETE record. Exist { @@ -647,12 +643,6 @@ impl LastChange { } } -impl Default for LastChange { - fn default() -> Self { - LastChange::Unknown - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 4e5f9a4d82b..00000000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -nightly-2022-11-15 diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000000..86192639647 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly-2023-12-10" +components = ["rustfmt", "clippy", "rust-src", "rust-analyzer"] diff --git a/scripts/clippy b/scripts/clippy index 7685cddfeeb..23ed1a2cd9c 100755 --- a/scripts/clippy +++ b/scripts/clippy @@ -21,6 +21,8 @@ fi # - `derive_partial_eq_without_eq` has compilation overhead. # - Blocking issue for enabling `result_large_err` is the protobuf messages. # - Blocking issue for clippy::large_enum_variant is the raftstore peer message. +# - Enables `clippy::needless_return_with_question_mark` after +# https://github.com/rust-lang/rust-clippy/issues/11982 is fixed. CLIPPY_LINTS=( -A clippy::module_inception \ -A clippy::result_large_err \ @@ -48,6 +50,10 @@ CLIPPY_LINTS=( -D clippy::disallowed_methods \ -D rust-2018-idioms \ -D clippy::assertions_on_result_states \ + -A clippy::needless_return_with_question_mark \ + -A clippy::non_canonical_partial_ord_impl \ + -A clippy::arc_with_non_send_sync \ + -A clippy::let_underscore_future \ ) cargo clippy --workspace \ diff --git a/src/config/mod.rs b/src/config/mod.rs index 7d631eeb1a8..8f34711f8c8 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1530,7 +1530,7 @@ impl DbConfig { opts.set_paranoid_checks(b); } if for_engine == EngineType::RaftKv { - opts.set_info_log(RocksdbLogger::default()); + opts.set_info_log(RocksdbLogger); } opts.set_info_log_level(self.info_log_level.into()); if self.titan.enabled { @@ -1553,29 +1553,26 @@ impl DbConfig { pub fn build_cf_resources(&self, cache: Cache) -> CfResources { let mut compaction_thread_limiters = HashMap::new(); - if let Some(n) = self.defaultcf.max_compactions && n > 0 { - compaction_thread_limiters.insert( - CF_DEFAULT, - ConcurrentTaskLimiter::new(CF_DEFAULT, n), - ); + if let Some(n) = self.defaultcf.max_compactions + && n > 0 + { + compaction_thread_limiters + .insert(CF_DEFAULT, ConcurrentTaskLimiter::new(CF_DEFAULT, n)); } - if let Some(n) = self.writecf.max_compactions && n > 0 { - compaction_thread_limiters.insert( - CF_WRITE, - ConcurrentTaskLimiter::new(CF_WRITE, n), - ); + if let Some(n) = self.writecf.max_compactions + && n > 0 + { + compaction_thread_limiters.insert(CF_WRITE, ConcurrentTaskLimiter::new(CF_WRITE, n)); } - if let Some(n) = self.lockcf.max_compactions && n > 0 { - compaction_thread_limiters.insert( - CF_LOCK, - ConcurrentTaskLimiter::new(CF_LOCK, n), - ); + if let Some(n) = self.lockcf.max_compactions + && n > 0 + { + compaction_thread_limiters.insert(CF_LOCK, ConcurrentTaskLimiter::new(CF_LOCK, n)); } - if let Some(n) = self.raftcf.max_compactions && n > 0 { - compaction_thread_limiters.insert( - CF_RAFT, - ConcurrentTaskLimiter::new(CF_RAFT, n), - ); + if let Some(n) = self.raftcf.max_compactions + && n > 0 + { + compaction_thread_limiters.insert(CF_RAFT, ConcurrentTaskLimiter::new(CF_RAFT, n)); } let mut write_buffer_managers = HashMap::default(); self.lockcf.write_buffer_limit.map(|limit| { @@ -1757,7 +1754,9 @@ impl Default for RaftDefaultCfConfig { impl RaftDefaultCfConfig { pub fn build_opt(&self, cache: &Cache) -> RocksCfOptions { - let limiter = if let Some(n) = self.max_compactions && n > 0 { + let limiter = if let Some(n) = self.max_compactions + && n > 0 + { Some(ConcurrentTaskLimiter::new(CF_DEFAULT, n)) } else { None @@ -1908,7 +1907,7 @@ impl RaftDbConfig { opts.set_max_log_file_size(self.info_log_max_size.0); opts.set_log_file_time_to_roll(self.info_log_roll_time.as_secs()); opts.set_keep_log_file_num(self.info_log_keep_log_file_num); - opts.set_info_log(RaftDbLogger::default()); + opts.set_info_log(RaftDbLogger); opts.set_info_log_level(self.info_log_level.into()); opts.set_max_subcompactions(self.max_sub_compactions); opts.set_writable_file_max_buffer_size(self.writable_file_max_buffer_size.0 as i32); @@ -2065,7 +2064,7 @@ impl ConfigManager for DbConfigManger { self.cfg.update(change.clone())?; let change_str = format!("{:?}", change); let mut change: Vec<(String, ConfigValue)> = change.into_iter().collect(); - let cf_config = change.drain_filter(|(name, _)| name.ends_with("cf")); + let cf_config = change.extract_if(|(name, _)| name.ends_with("cf")); for (cf_name, cf_change) in cf_config { if let ConfigValue::Module(mut cf_change) = cf_change { // defaultcf -> default @@ -2099,7 +2098,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .drain_filter(|(name, _)| name == "rate_bytes_per_sec") + .extract_if(|(name, _)| name == "rate_bytes_per_sec") .next() { let rate_bytes_per_sec: ReadableSize = rate_bytes_config.1.into(); @@ -2108,7 +2107,7 @@ impl ConfigManager for DbConfigManger { } if let Some(rate_bytes_config) = change - .drain_filter(|(name, _)| name == "rate_limiter_auto_tuned") + .extract_if(|(name, _)| name == "rate_limiter_auto_tuned") .next() { let rate_limiter_auto_tuned: bool = rate_bytes_config.1.into(); @@ -2117,7 +2116,7 @@ impl ConfigManager for DbConfigManger { } if let Some(size) = change - .drain_filter(|(name, _)| name == "write_buffer_limit") + .extract_if(|(name, _)| name == "write_buffer_limit") .next() { let size: ReadableSize = size.1.into(); @@ -2125,14 +2124,14 @@ impl ConfigManager for DbConfigManger { } if let Some(f) = change - .drain_filter(|(name, _)| name == "write_buffer_flush_oldest_first") + .extract_if(|(name, _)| name == "write_buffer_flush_oldest_first") .next() { self.db.set_flush_oldest_first(f.1.into())?; } if let Some(background_jobs_config) = change - .drain_filter(|(name, _)| name == "max_background_jobs") + .extract_if(|(name, _)| name == "max_background_jobs") .next() { let max_background_jobs: i32 = background_jobs_config.1.into(); @@ -2140,7 +2139,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_subcompactions_config) = change - .drain_filter(|(name, _)| name == "max_sub_compactions") + .extract_if(|(name, _)| name == "max_sub_compactions") .next() { let max_subcompactions: u32 = background_subcompactions_config.1.into(); @@ -2149,7 +2148,7 @@ impl ConfigManager for DbConfigManger { } if let Some(background_flushes_config) = change - .drain_filter(|(name, _)| name == "max_background_flushes") + .extract_if(|(name, _)| name == "max_background_flushes") .next() { let max_background_flushes: i32 = background_flushes_config.1.into(); @@ -4044,7 +4043,12 @@ impl TikvConfig { && let Some(b) = self.rocksdb.writecf.block_cache_size && let Some(c) = self.rocksdb.lockcf.block_cache_size { - let d = self.raftdb.defaultcf.block_cache_size.map(|s| s.0).unwrap_or_default(); + let d = self + .raftdb + .defaultcf + .block_cache_size + .map(|s| s.0) + .unwrap_or_default(); let sum = a.0 + b.0 + c.0 + d; self.storage.block_cache.capacity = Some(ReadableSize(sum)); } diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 001d1e94ca0..63434a85ca1 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -424,15 +424,16 @@ impl Endpoint { // Check if the buckets version is latest. // skip if request don't carry this bucket version. - if let Some(ref buckets) = latest_buckets&& - buckets.version > tracker.req_ctx.context.buckets_version && - tracker.req_ctx.context.buckets_version!=0 { - let mut bucket_not_match = errorpb::BucketVersionNotMatch::default(); - bucket_not_match.set_version(buckets.version); - bucket_not_match.set_keys(buckets.keys.clone().into()); - let mut err = errorpb::Error::default(); - err.set_bucket_version_not_match(bucket_not_match); - return Err(Error::Region(err)); + if let Some(ref buckets) = latest_buckets + && buckets.version > tracker.req_ctx.context.buckets_version + && tracker.req_ctx.context.buckets_version != 0 + { + let mut bucket_not_match = errorpb::BucketVersionNotMatch::default(); + bucket_not_match.set_version(buckets.version); + bucket_not_match.set_keys(buckets.keys.clone().into()); + let mut err = errorpb::Error::default(); + err.set_bucket_version_not_match(bucket_not_match); + return Err(Error::Region(err)); } // When snapshot is retrieved, deadline may exceed. tracker.on_snapshot_finished(); diff --git a/src/coprocessor/metrics.rs b/src/coprocessor/metrics.rs index 02f45d35311..7d2d7e9e947 100644 --- a/src/coprocessor/metrics.rs +++ b/src/coprocessor/metrics.rs @@ -285,7 +285,7 @@ pub fn tls_collect_scan_details(cmd: ReqTag, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/src/coprocessor/mod.rs b/src/coprocessor/mod.rs index fcd16f9b947..6b8e62c01a4 100644 --- a/src/coprocessor/mod.rs +++ b/src/coprocessor/mod.rs @@ -21,6 +21,8 @@ //! //! Please refer to `Endpoint` for more details. +#![allow(clippy::diverging_sub_expression)] + mod cache; mod checksum; pub mod dag; diff --git a/src/lib.rs b/src/lib.rs index acccb2f55e5..5460bfd66ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,13 +23,14 @@ #![feature(proc_macro_hygiene)] #![feature(min_specialization)] #![feature(box_patterns)] -#![feature(drain_filter)] +#![feature(extract_if)] #![feature(deadline_api)] #![feature(let_chains)] #![feature(read_buf)] #![feature(type_alias_impl_trait)] +#![feature(impl_trait_in_assoc_type)] #![allow(incomplete_features)] -#![feature(return_position_impl_trait_in_trait)] +#![feature(core_io_borrowed_buf)] #[macro_use(fail_point)] extern crate fail; diff --git a/src/server/debug.rs b/src/server/debug.rs index 70e1df855d5..2a1a069f06e 100644 --- a/src/server/debug.rs +++ b/src/server/debug.rs @@ -155,7 +155,7 @@ pub trait Debugger { start: &[u8], end: &[u8], limit: u64, - ) -> Result, MvccInfo)>> + Send>; + ) -> Result, MvccInfo)>> + Send + 'static>; /// Compact the cf[start..end) in the db. fn compact( @@ -887,7 +887,7 @@ where start: &[u8], end: &[u8], limit: u64, - ) -> Result, MvccInfo)>> + Send> { + ) -> Result, MvccInfo)>> + Send + 'static> { if end.is_empty() && limit == 0 { return Err(Error::InvalidArgument("no limit and to_key".to_owned())); } @@ -959,7 +959,9 @@ where fn dump_kv_stats(&self) -> Result { let mut kv_str = box_try!(MiscExt::dump_stats(&self.engines.kv)); - if let Some(s) = self.kv_statistics.as_ref() && let Some(s) = s.to_string() { + if let Some(s) = self.kv_statistics.as_ref() + && let Some(s) = s.to_string() + { kv_str.push_str(&s); } Ok(kv_str) @@ -967,7 +969,9 @@ where fn dump_raft_stats(&self) -> Result { let mut raft_str = box_try!(RaftEngine::dump_stats(&self.engines.raft)); - if let Some(s) = self.raft_statistics.as_ref() && let Some(s) = s.to_string() { + if let Some(s) = self.raft_statistics.as_ref() + && let Some(s) = s.to_string() + { raft_str.push_str(&s); } Ok(raft_str) diff --git a/src/server/debug2.rs b/src/server/debug2.rs index 4230828dff1..68404441eed 100644 --- a/src/server/debug2.rs +++ b/src/server/debug2.rs @@ -9,7 +9,6 @@ use engine_traits::{ CachedTablet, Iterable, MiscExt, Peekable, RaftEngine, RaftLogBatch, TabletContext, TabletRegistry, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; -use futures::future::Future; use keys::{data_key, enc_end_key, enc_start_key, DATA_MAX_KEY, DATA_PREFIX_KEY}; use kvproto::{ debugpb::Db as DbType, @@ -721,7 +720,7 @@ impl Debugger for DebuggerImplV2 { start: &[u8], end: &[u8], limit: u64, - ) -> Result, MvccInfo)>> + Send> { + ) -> Result, MvccInfo)>> + Send + 'static> { if end.is_empty() && limit == 0 { return Err(Error::InvalidArgument("no limit and to_key".to_owned())); } @@ -813,7 +812,9 @@ impl Debugger for DebuggerImplV2 { } true }); - if let Some(s) = self.kv_statistics.as_ref() && let Some(s) = s.to_string() { + if let Some(s) = self.kv_statistics.as_ref() + && let Some(s) = s.to_string() + { kv_str.push_str(&s); } Ok(kv_str) @@ -821,7 +822,9 @@ impl Debugger for DebuggerImplV2 { fn dump_raft_stats(&self) -> Result { let mut raft_str = box_try!(RaftEngine::dump_stats(&self.raft_engine)); - if let Some(s) = self.raft_statistics.as_ref() && let Some(s) = s.to_string() { + if let Some(s) = self.raft_statistics.as_ref() + && let Some(s) = s.to_string() + { raft_str.push_str(&s); } Ok(raft_str) @@ -905,7 +908,7 @@ impl Debugger for DebuggerImplV2 { self.raft_statistics = s; } - fn key_range_flashback_to_version( + async fn key_range_flashback_to_version( &self, _version: u64, _region_id: u64, @@ -913,8 +916,8 @@ impl Debugger for DebuggerImplV2 { _end_key: &[u8], _start_ts: u64, _commit_ts: u64, - ) -> impl Future> + Send { - async move { unimplemented!() } + ) -> Result<()> { + unimplemented!() } fn get_range_properties(&self, start: &[u8], end: &[u8]) -> Result> { @@ -1113,7 +1116,7 @@ fn get_tablet_cache( "tablet load failed, region_state {:?}", region_state.get_state() ); - return Err(box_err!(e)); + Err(box_err!(e)) } } } diff --git a/src/server/engine_factory.rs b/src/server/engine_factory.rs index 3593c01ca7f..1d7f1eed000 100644 --- a/src/server/engine_factory.rs +++ b/src/server/engine_factory.rs @@ -211,13 +211,10 @@ impl TabletFactory for KvEngineFactory { db_opts.add_event_listener(listener.clone_with(ctx.id)); } if let Some(storage) = &self.inner.state_storage - && let Some(flush_state) = ctx.flush_state { - let listener = PersistenceListener::new( - ctx.id, - ctx.suffix.unwrap(), - flush_state, - storage.clone(), - ); + && let Some(flush_state) = ctx.flush_state + { + let listener = + PersistenceListener::new(ctx.id, ctx.suffix.unwrap(), flush_state, storage.clone()); db_opts.add_event_listener(RocksPersistenceListener::new(listener)); } let kv_engine = diff --git a/src/server/gc_worker/gc_manager.rs b/src/server/gc_worker/gc_manager.rs index d9c5287b67d..d0d0d295f9a 100644 --- a/src/server/gc_worker/gc_manager.rs +++ b/src/server/gc_worker/gc_manager.rs @@ -586,7 +586,9 @@ impl GcMan ) -> GcManagerResult> { // Get the information of the next region to do GC. let (region, next_key) = self.get_next_gc_context(from_key); - let Some(region) = region else { return Ok(None) }; + let Some(region) = region else { + return Ok(None); + }; let hex_start = format!("{:?}", log_wrappers::Value::key(region.get_start_key())); let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key())); @@ -856,7 +858,7 @@ mod tests { // Following code asserts gc_tasks == expected_gc_tasks. assert_eq!(gc_tasks.len(), expected_gc_tasks.len()); - let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks.into_iter()).all( + let all_passed = gc_tasks.into_iter().zip(expected_gc_tasks).all( |((region, safe_point), (expect_region, expect_safe_point))| { region == expect_region && safe_point == expect_safe_point.into() }, @@ -933,8 +935,7 @@ mod tests { #[test] fn test_auto_gc_rewinding() { - for regions in vec![ - // First region starts with empty and last region ends with empty. + for regions in [ vec![ (b"".to_vec(), b"1".to_vec(), 1), (b"1".to_vec(), b"2".to_vec(), 2), diff --git a/src/server/gc_worker/gc_worker.rs b/src/server/gc_worker/gc_worker.rs index a0537a478d0..878b2d3ab83 100644 --- a/src/server/gc_worker/gc_worker.rs +++ b/src/server/gc_worker/gc_worker.rs @@ -275,7 +275,7 @@ fn get_keys_in_region(keys: &mut Peekable>, region: &Region) -> Ve let mut keys_in_region = Vec::new(); loop { - let Some(key) = keys.peek() else {break}; + let Some(key) = keys.peek() else { break }; let key = key.as_encoded().as_slice(); if key < region.get_start_key() { @@ -690,10 +690,7 @@ impl GcRunnerCore { } pub fn mut_stats(&mut self, key_mode: GcKeyMode) -> &mut Statistics { - let stats = self - .stats_map - .entry(key_mode) - .or_insert_with(Default::default); + let stats = self.stats_map.entry(key_mode).or_default(); stats } @@ -2335,7 +2332,6 @@ mod tests { fn generate_keys(start: u64, end: u64) -> Vec { (start..end) - .into_iter() .map(|i| { let key = format!("k{:02}", i); Key::from_raw(key.as_bytes()) diff --git a/src/server/lock_manager/deadlock.rs b/src/server/lock_manager/deadlock.rs index fd749cc3175..7fc8e046ef0 100644 --- a/src/server/lock_manager/deadlock.rs +++ b/src/server/lock_manager/deadlock.rs @@ -361,20 +361,15 @@ impl DetectTable { } /// The role of the detector. -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy, Default)] pub enum Role { /// The node is the leader of the detector. Leader, /// The node is a follower of the leader. + #[default] Follower, } -impl Default for Role { - fn default() -> Role { - Role::Follower - } -} - impl From for Role { fn from(role: StateRole) -> Role { match role { diff --git a/src/server/raft_client.rs b/src/server/raft_client.rs index 700d409c129..83373103c54 100644 --- a/src/server/raft_client.rs +++ b/src/server/raft_client.rs @@ -57,7 +57,7 @@ use crate::server::{ pub struct MetadataSourceStoreId {} impl MetadataSourceStoreId { - pub const KEY: &str = "source_store_id"; + pub const KEY: &'static str = "source_store_id"; pub fn parse(value: &[u8]) -> u64 { let value = std::str::from_utf8(value).unwrap(); diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 9f42925b6d4..883b0e2d684 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -581,7 +581,9 @@ where tx.notify(res); } rx.inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { return }; + let WriteEvent::Finished(res) = ev else { + return; + }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); @@ -667,7 +669,7 @@ where match res { Ok(CmdRes::Resp(mut r)) => { let e = if r - .get(0) + .first() .map(|resp| resp.get_read_index().has_locked()) .unwrap_or(false) { diff --git a/src/server/raftkv/raft_extension.rs b/src/server/raftkv/raft_extension.rs index 733d60c838c..6605115e481 100644 --- a/src/server/raftkv/raft_extension.rs +++ b/src/server/raftkv/raft_extension.rs @@ -64,7 +64,9 @@ where let region_id = msg.get_region_id(); let msg_ty = msg.get_message().get_msg_type(); // Channel full and region not found are ignored unless it's a key message. - if let Err(e) = self.router.send_raft_msg(msg) && key_message { + if let Err(e) = self.router.send_raft_msg(msg) + && key_message + { error!("failed to send raft message"; "region_id" => region_id, "msg_ty" => ?msg_ty, "err" => ?e); } } diff --git a/src/server/raftkv2/mod.rs b/src/server/raftkv2/mod.rs index 321a6614350..c9d53ef7c99 100644 --- a/src/server/raftkv2/mod.rs +++ b/src/server/raftkv2/mod.rs @@ -251,7 +251,7 @@ impl tikv_kv::Engine for RaftKv2 { Err(mut resp) => { if resp .get_responses() - .get(0) + .first() .map_or(false, |r| r.get_read_index().has_locked()) { let locked = resp.mut_responses()[0].mut_read_index().take_locked(); @@ -352,7 +352,9 @@ impl tikv_kv::Engine for RaftKv2 { early_err: res.err(), }) .inspect(move |ev| { - let WriteEvent::Finished(res) = ev else { return }; + let WriteEvent::Finished(res) = ev else { + return; + }; match res { Ok(()) => { ASYNC_REQUESTS_COUNTER_VEC.write.success.inc(); diff --git a/src/server/raftkv2/node.rs b/src/server/raftkv2/node.rs index 5fce5c0024b..ab4b9c5f03c 100644 --- a/src/server/raftkv2/node.rs +++ b/src/server/raftkv2/node.rs @@ -269,7 +269,9 @@ where /// Stops the Node. pub fn stop(&mut self) { let store_id = self.store.get_id(); - let Some((_, mut system)) = self.system.take() else { return }; + let Some((_, mut system)) = self.system.take() else { + return; + }; info!(self.logger, "stop raft store thread"; "store_id" => store_id); system.shutdown(); } diff --git a/src/server/raftkv2/raft_extension.rs b/src/server/raftkv2/raft_extension.rs index 8b15c73fb65..b2b6ce4db00 100644 --- a/src/server/raftkv2/raft_extension.rs +++ b/src/server/raftkv2/raft_extension.rs @@ -24,7 +24,9 @@ impl tikv_kv::RaftExtension for Extension let region_id = msg.get_region_id(); let msg_ty = msg.get_message().get_msg_type(); // Channel full and region not found are ignored unless it's a key message. - if let Err(e) = self.router.send_raft_message(Box::new(msg)) && key_message { + if let Err(e) = self.router.send_raft_message(Box::new(msg)) + && key_message + { error!("failed to send raft message"; "region_id" => region_id, "msg_ty" => ?msg_ty, "err" => ?e); } } diff --git a/src/server/service/debug.rs b/src/server/service/debug.rs index d0b715542d5..497d8240684 100644 --- a/src/server/service/debug.rs +++ b/src/server/service/debug.rs @@ -300,7 +300,6 @@ where let debugger = self.debugger.clone(); let res = self.pool.spawn(async move { - let req = req; debugger .compact( req.get_db(), diff --git a/src/server/service/diagnostics/log.rs b/src/server/service/diagnostics/log.rs index 8e77d65233e..f64ff440121 100644 --- a/src/server/service/diagnostics/log.rs +++ b/src/server/service/diagnostics/log.rs @@ -612,7 +612,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -639,7 +639,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:56.387 +08:00", "2019/08/23 18:09:56.387 +08:00", // for invalid line "2019/08/23 18:09:57.387 +08:00", @@ -662,7 +662,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec!["2019/08/23 18:09:53.387 +08:00"] + let expected = ["2019/08/23 18:09:53.387 +08:00"] .iter() .map(|s| timestamp(s)) .collect::>(); @@ -680,7 +680,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:09:59.387 +08:00", "2019/08/23 18:10:06.387 +08:00", @@ -704,7 +704,7 @@ Some invalid logs 4: Welcome to TiKV - test-filter"# vec![regex::Regex::new(".*test-filter.*").unwrap()], ) .unwrap(); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:10:06.387 +08:00", // for invalid line ] @@ -783,7 +783,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# req.set_end_time(i64::MAX); req.set_levels(vec![LogLevel::Warn as _]); req.set_patterns(vec![".*test-filter.*".to_string()].into()); - let expected = vec![ + let expected = [ "2019/08/23 18:09:58.387 +08:00", "2019/08/23 18:11:58.387 +08:00", "2019/08/23 18:11:59.387 +08:00", // for invalid line @@ -796,9 +796,7 @@ Some invalid logs 2: Welcome to TiKV - test-filter"# s.collect::>() .await .into_iter() - .map(|mut resp| resp.take_messages().into_iter()) - .into_iter() - .flatten() + .flat_map(|mut resp| resp.take_messages().into_iter()) .map(|msg| msg.get_time()) .collect::>() }); diff --git a/src/server/status_server/mod.rs b/src/server/status_server/mod.rs index 36402a3e5dc..862b2b19c72 100644 --- a/src/server/status_server/mod.rs +++ b/src/server/status_server/mod.rs @@ -206,7 +206,7 @@ where } async fn get_cmdline(_req: Request) -> hyper::Result> { - let args = args().into_iter().fold(String::new(), |mut a, b| { + let args = args().fold(String::new(), |mut a, b| { a.push_str(&b); a.push('\x00'); a diff --git a/src/server/tablet_snap.rs b/src/server/tablet_snap.rs index 997a932be9d..7f5178d6b27 100644 --- a/src/server/tablet_snap.rs +++ b/src/server/tablet_snap.rs @@ -154,13 +154,17 @@ pub trait SnapCacheBuilder: Send + Sync { impl SnapCacheBuilder for TabletRegistry { fn build(&self, region_id: u64, path: &Path) -> Result<()> { - if let Some(mut c) = self.get(region_id) && let Some(db) = c.latest() { + if let Some(mut c) = self.get(region_id) + && let Some(db) = c.latest() + { let mut checkpointer = db.new_checkpointer()?; // Avoid flush. checkpointer.create_at(path, None, u64::MAX)?; Ok(()) } else { - Err(Error::Other(format!("region {} not found", region_id).into())) + Err(Error::Other( + format!("region {} not found", region_id).into(), + )) } } } @@ -326,7 +330,9 @@ async fn cleanup_cache( }; let mut buffer = Vec::with_capacity(PREVIEW_CHUNK_LEN); for meta in preview.take_metas().into_vec() { - if is_sst(&meta.file_name) && let Some(p) = exists.remove(&meta.file_name) { + if is_sst(&meta.file_name) + && let Some(p) = exists.remove(&meta.file_name) + { if is_sst_match_preview(&meta, &p, &mut buffer, limiter, key_manager).await? { reused += meta.file_size; continue; @@ -1029,7 +1035,11 @@ pub fn copy_tablet_snapshot( if let Some(m) = sender_snap_mgr.key_manager() && let Some((iv, key)) = m.get_file_internal(path.to_str().unwrap())? { - key_importer.as_mut().unwrap().add(recv.to_str().unwrap(), iv, key).unwrap(); + key_importer + .as_mut() + .unwrap() + .add(recv.to_str().unwrap(), iv, key) + .unwrap(); } } if let Some(i) = key_importer { diff --git a/src/storage/lock_manager/lock_wait_context.rs b/src/storage/lock_manager/lock_wait_context.rs index 32c99867a3f..1eba8cd81b7 100644 --- a/src/storage/lock_manager/lock_wait_context.rs +++ b/src/storage/lock_manager/lock_wait_context.rs @@ -387,9 +387,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. })) - ))) + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::WriteConflict { .. }, + ))))) )); // The tx should be dropped. rx.recv().unwrap_err(); @@ -422,9 +422,9 @@ mod tests { let res = rx.recv().unwrap().unwrap_err(); assert!(matches!( &res, - StorageError(box StorageErrorInner::Txn(TxnError( - box TxnErrorInner::Mvcc(MvccError(box MvccErrorInner::KeyIsLocked(_))) - ))) + StorageError(box StorageErrorInner::Txn(TxnError(box TxnErrorInner::Mvcc(MvccError( + box MvccErrorInner::KeyIsLocked(_), + ))))) )); // Since the cancellation callback can fully execute only when it's successfully // removed from the lock waiting queues, it's impossible that `finish_request` diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index cf7956d76b7..d62f7862fae 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -63,7 +63,7 @@ pub fn tls_collect_scan_details(cmd: CommandKind, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 13d868849f4..1a38c781d0f 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2002,7 +2002,7 @@ impl Storage { key_ranges.push(build_key_range(k.as_encoded(), k.as_encoded(), false)); (k, v) }) - .filter(|&(_, ref v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) + .filter(|(_, v)| !(v.is_ok() && v.as_ref().unwrap().is_none())) .map(|(k, v)| match v { Ok(v) => { let (user_key, _) = F::decode_raw_key_owned(k, false).unwrap(); @@ -2146,7 +2146,7 @@ impl Storage { }) } - fn check_ttl_valid(key_cnt: usize, ttls: &Vec) -> Result<()> { + fn check_ttl_valid(key_cnt: usize, ttls: &[u64]) -> Result<()> { if !F::IS_TTL_ENABLED { if ttls.iter().any(|&x| x != 0) { return Err(Error::from(ErrorInner::TtlNotEnabled)); @@ -3960,9 +3960,9 @@ mod tests { let result = block_on(storage.get(Context::default(), Key::from_raw(b"x"), 100.into())); assert!(matches!( result, - Err(Error(box ErrorInner::Txn(txn::Error( - box txn::ErrorInner::Mvcc(mvcc::Error(box mvcc::ErrorInner::KeyIsLocked { .. })) - )))) + Err(Error(box ErrorInner::Txn(txn::Error(box txn::ErrorInner::Mvcc(mvcc::Error( + box mvcc::ErrorInner::KeyIsLocked { .. }, + )))))) )); } @@ -5812,7 +5812,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -5871,7 +5871,7 @@ mod tests { let mut total_bytes: u64 = 0; let mut is_first = true; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6313,7 +6313,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6328,7 +6328,7 @@ mod tests { } // Verify pairs in a batch - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, @@ -6360,7 +6360,7 @@ mod tests { ]; // Write key-value pairs one by one - for &(ref key, ref value) in &test_data { + for (key, value) in &test_data { storage .raw_put( ctx.clone(), @@ -6378,7 +6378,7 @@ mod tests { let mut ids = vec![]; let cmds = test_data .iter() - .map(|&(ref k, _)| { + .map(|(k, _)| { let mut req = RawGetRequest::default(); req.set_context(ctx.clone()); req.set_key(k.clone()); @@ -6449,10 +6449,10 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data .iter() - .map(|&(ref k, ref v)| Some((k.clone(), v.clone()))) + .map(|(k, v)| Some((k.clone(), v.clone()))) .collect(); expect_multi_values( results, @@ -6580,7 +6580,7 @@ mod tests { // Scan pairs with key only let mut results: Vec> = test_data .iter() - .map(|&(ref k, _)| Some((k.clone(), vec![]))) + .map(|(k, _)| Some((k.clone(), vec![]))) .collect(); expect_multi_values( results.clone(), @@ -6977,7 +6977,7 @@ mod tests { rx.recv().unwrap(); // Verify pairs exist - let keys = test_data.iter().map(|&(ref k, _)| k.clone()).collect(); + let keys = test_data.iter().map(|(k, _)| k.clone()).collect(); let results = test_data.into_iter().map(|(k, v)| Some((k, v))).collect(); expect_multi_values( results, diff --git a/src/storage/mvcc/reader/point_getter.rs b/src/storage/mvcc/reader/point_getter.rs index cc4403229c1..474c789a31d 100644 --- a/src/storage/mvcc/reader/point_getter.rs +++ b/src/storage/mvcc/reader/point_getter.rs @@ -1287,7 +1287,7 @@ mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (1..30).into_iter().step_by(2) { + for start_ts in (1..30).step_by(2) { must_prewrite_lock(&mut engine, k, k, start_ts); must_commit(&mut engine, k, start_ts, start_ts + 1); } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 257789b4765..27484655af6 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -454,11 +454,10 @@ impl MvccReader { estimated_versions_to_last_change, } if estimated_versions_to_last_change >= SEEK_BOUND => { let key_with_ts = key.clone().append_ts(commit_ts); - let Some(value) = self - .snapshot - .get_cf(CF_WRITE, &key_with_ts)? else { - return Ok(None); - }; + let Some(value) = self.snapshot.get_cf(CF_WRITE, &key_with_ts)? + else { + return Ok(None); + }; self.statistics.write.get += 1; let write = WriteRef::parse(&value)?.to_owned(); assert!( @@ -1192,7 +1191,7 @@ pub mod tests { (Bound::Unbounded, Bound::Excluded(8), vec![2u64, 4, 6, 8]), ]; - for (_, &(min, max, ref res)) in tests.iter().enumerate() { + for &(min, max, ref res) in tests.iter() { let mut iopt = IterOptions::default(); iopt.set_hint_min_ts(min); iopt.set_hint_max_ts(max); @@ -2457,7 +2456,7 @@ pub mod tests { engine.commit(k, 1, 2); // Write enough LOCK recrods - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2466,7 +2465,7 @@ pub mod tests { engine.commit(k, 45, 46); // Write enough LOCK recrods - for start_ts in (50..80).into_iter().step_by(2) { + for start_ts in (50..80).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2521,7 +2520,7 @@ pub mod tests { let k = b"k"; // Write enough LOCK recrods - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2558,7 +2557,7 @@ pub mod tests { engine.put(k, 1, 2); // 10 locks were put - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } @@ -2585,7 +2584,7 @@ pub mod tests { feature_gate.set_version("6.1.0").unwrap(); set_tls_feature_gate(feature_gate); engine.delete(k, 51, 52); - for start_ts in (56..80).into_iter().step_by(2) { + for start_ts in (56..80).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } let feature_gate = FeatureGate::default(); @@ -2617,7 +2616,7 @@ pub mod tests { let k = b"k"; engine.put(k, 1, 2); - for start_ts in (6..30).into_iter().step_by(2) { + for start_ts in (6..30).step_by(2) { engine.lock(k, start_ts, start_ts + 1); } engine.rollback(k, 30); diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 3437a1e5432..4abb91314cc 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -1636,7 +1636,7 @@ mod latest_kv_tests { must_prewrite_put(&mut engine, b"k4", b"v41", b"k4", 3); must_commit(&mut engine, b"k4", 3, 7); - for start_ts in (10..30).into_iter().step_by(2) { + for start_ts in (10..30).step_by(2) { must_prewrite_lock(&mut engine, b"k1", b"k1", start_ts); must_commit(&mut engine, b"k1", start_ts, start_ts + 1); must_prewrite_lock(&mut engine, b"k3", b"k1", start_ts); diff --git a/src/storage/raw/raw_mvcc.rs b/src/storage/raw/raw_mvcc.rs index 8c4ad5da08b..aa635827961 100644 --- a/src/storage/raw/raw_mvcc.rs +++ b/src/storage/raw/raw_mvcc.rs @@ -290,7 +290,7 @@ mod tests { RawEncodeSnapshot::from_snapshot(raw_mvcc_snapshot); // get_cf - for &(ref key, ref value, _) in &test_data[6..12] { + for (key, value, _) in &test_data[6..12] { let res = encode_snapshot.get_cf(CF_DEFAULT, &ApiV2::encode_raw_key(key, None)); assert_eq!(res.unwrap(), Some(value.to_owned())); } diff --git a/src/storage/txn/actions/prewrite.rs b/src/storage/txn/actions/prewrite.rs index 64e22a13585..6d045db7e79 100644 --- a/src/storage/txn/actions/prewrite.rs +++ b/src/storage/txn/actions/prewrite.rs @@ -351,7 +351,9 @@ impl<'a> PrewriteMutation<'a> { .into()); } - if let Some(ts) = expected_for_update_ts && lock.for_update_ts != ts { + if let Some(ts) = expected_for_update_ts + && lock.for_update_ts != ts + { // The constraint on for_update_ts of the pessimistic lock is violated. // Consider the following case: // @@ -362,8 +364,8 @@ impl<'a> PrewriteMutation<'a> { // pessimistic lock. // 3. Another transaction `T2` writes the key and committed. // 4. The key then receives a stale pessimistic lock request of `T1` that has - // been received in step 1 (maybe because of retrying due to network issue - // in step 1). Since it allows locking with conflict, though there's a newer + // been received in step 1 (maybe because of retrying due to network issue in + // step 1). Since it allows locking with conflict, though there's a newer // version that's later than the request's `for_update_ts`, the request can // still acquire the lock. However no one will check the response, which // tells the latest commit_ts it met. @@ -766,7 +768,6 @@ fn async_commit_timestamps( #[cfg(not(feature = "failpoints"))] let injected_fallback = false; - let max_commit_ts = max_commit_ts; if (!max_commit_ts.is_zero() && min_commit_ts > max_commit_ts) || injected_fallback { warn!("commit_ts is too large, fallback to normal 2PC"; "key" => log_wrappers::Value::key(key.as_encoded()), @@ -1875,7 +1876,6 @@ pub mod tests { // At most 12 ops per-case. let ops_count = rg.gen::() % 12; let ops = (0..ops_count) - .into_iter() .enumerate() .map(|(i, _)| { if i == 0 { diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 4bca5d514c5..3dd0b053d12 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -91,8 +91,8 @@ mod tests { fn test_atomic_process_write_impl() { let mut engine = TestEngineBuilder::new().build().unwrap(); let cm = concurrency_manager::ConcurrencyManager::new(1.into()); - let raw_keys = vec![b"ra", b"rz"]; - let raw_values = vec![b"valuea", b"valuez"]; + let raw_keys = [b"ra", b"rz"]; + let raw_values = [b"valuea", b"valuez"]; let ts_provider = super::super::test_util::gen_ts_provider(F::TAG); let mut modifies = vec![]; diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 34c98dab156..9142b09e481 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1892,9 +1892,7 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::AlreadyExist { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::AlreadyExist { .. }))) )); assert_eq!(cm.max_ts().into_inner(), 15); @@ -1917,9 +1915,7 @@ mod tests { .unwrap_err(); assert!(matches!( res, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::WriteConflict { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::WriteConflict { .. }))) )); } @@ -2329,9 +2325,9 @@ mod tests { .unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); must_unlocked(&mut engine, b"k2"); // However conflict still won't be checked if there's a non-retry request @@ -2514,9 +2510,9 @@ mod tests { let err = prewrite_command(&mut engine, cm.clone(), &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); // Passing keys in different order gets the same result: let cmd = PrewritePessimistic::with_defaults( @@ -2537,9 +2533,9 @@ mod tests { let err = prewrite_command(&mut engine, cm, &mut stat, cmd).unwrap_err(); assert!(matches!( err, - Error(box ErrorInner::Mvcc(MvccError( - box MvccErrorInner::PessimisticLockNotFound { .. } - ))) + Error(box ErrorInner::Mvcc(MvccError(box MvccErrorInner::PessimisticLockNotFound { + .. + }))) )); // If the two keys are sent in different requests, it would be the client's duty diff --git a/src/storage/txn/flow_controller/singleton_flow_controller.rs b/src/storage/txn/flow_controller/singleton_flow_controller.rs index 5d52c272db6..e7b4f109570 100644 --- a/src/storage/txn/flow_controller/singleton_flow_controller.rs +++ b/src/storage/txn/flow_controller/singleton_flow_controller.rs @@ -332,7 +332,7 @@ where } // Split the record into left and right by the middle of time range - for (_, r) in self.records.iter().enumerate() { + for r in self.records.iter() { let elapsed_secs = r.1.saturating_elapsed_secs(); if elapsed_secs > time_span / 2.0 { left += r.0; diff --git a/src/storage/txn/latch.rs b/src/storage/txn/latch.rs index c76d71d7c7a..5c6000961f1 100644 --- a/src/storage/txn/latch.rs +++ b/src/storage/txn/latch.rs @@ -222,7 +222,7 @@ impl Latches { keep_latches_for_next_cmd: Option<(u64, &Lock)>, ) -> Vec { // Used to - let dummy_vec = vec![]; + let dummy_vec = []; let (keep_latches_for_cid, mut keep_latches_it) = match keep_latches_for_next_cmd { Some((cid, lock)) => (Some(cid), lock.required_hashes.iter().peekable()), None => (None, dummy_vec.iter().peekable()), @@ -280,9 +280,9 @@ mod tests { fn test_wakeup() { let latches = Latches::new(256); - let keys_a = vec!["k1", "k3", "k5"]; + let keys_a = ["k1", "k3", "k5"]; let mut lock_a = Lock::new(keys_a.iter()); - let keys_b = vec!["k4", "k5", "k6"]; + let keys_b = ["k4", "k5", "k6"]; let mut lock_b = Lock::new(keys_b.iter()); let cid_a: u64 = 1; let cid_b: u64 = 2; @@ -308,9 +308,9 @@ mod tests { fn test_wakeup_by_multi_cmds() { let latches = Latches::new(256); - let keys_a = vec!["k1", "k2", "k3"]; - let keys_b = vec!["k4", "k5", "k6"]; - let keys_c = vec!["k3", "k4"]; + let keys_a = ["k1", "k2", "k3"]; + let keys_b = ["k4", "k5", "k6"]; + let keys_c = ["k3", "k4"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); @@ -351,10 +351,10 @@ mod tests { fn test_wakeup_by_small_latch_slot() { let latches = Latches::new(5); - let keys_a = vec!["k1", "k2", "k3"]; - let keys_b = vec!["k6", "k7", "k8"]; - let keys_c = vec!["k3", "k4"]; - let keys_d = vec!["k7", "k10"]; + let keys_a = ["k1", "k2", "k3"]; + let keys_b = ["k6", "k7", "k8"]; + let keys_c = ["k3", "k4"]; + let keys_d = ["k7", "k10"]; let mut lock_a = Lock::new(keys_a.iter()); let mut lock_b = Lock::new(keys_b.iter()); let mut lock_c = Lock::new(keys_c.iter()); diff --git a/src/storage/txn/sched_pool.rs b/src/storage/txn/sched_pool.rs index c6d7b477db0..3ba486a6496 100644 --- a/src/storage/txn/sched_pool.rs +++ b/src/storage/txn/sched_pool.rs @@ -281,7 +281,7 @@ pub fn tls_collect_scan_details(cmd: &'static str, stats: &Statistics) { m.borrow_mut() .local_scan_details .entry(cmd) - .or_insert_with(Default::default) + .or_default() .add(stats); }); } diff --git a/tests/benches/raftstore/mod.rs b/tests/benches/raftstore/mod.rs index 98b348722da..f32c9e49626 100644 --- a/tests/benches/raftstore/mod.rs +++ b/tests/benches/raftstore/mod.rs @@ -12,7 +12,7 @@ const DEFAULT_DATA_SIZE: usize = 100_000; fn enc_write_kvs(db: &RocksEngine, kvs: &[(Vec, Vec)]) { let mut wb = db.write_batch(); - for &(ref k, ref v) in kvs { + for (k, v) in kvs { wb.put(&keys::data_key(k), v).unwrap(); } wb.write().unwrap(); diff --git a/tests/failpoints/cases/test_async_io.rs b/tests/failpoints/cases/test_async_io.rs index 8ce349805b0..1ca41abb2ae 100644 --- a/tests/failpoints/cases/test_async_io.rs +++ b/tests/failpoints/cases/test_async_io.rs @@ -34,7 +34,7 @@ fn test_async_io_commit_without_leader_persist() { fail::cfg(raft_before_save_on_store_1_fp, "pause").unwrap(); for i in 2..10 { - cluster + let _ = cluster .async_put(format!("k{}", i).as_bytes(), b"v1") .unwrap(); } @@ -79,7 +79,7 @@ fn test_async_io_delay_destroy_after_conf_change() { fail::cfg(raft_before_save_on_store_1_fp, "pause").unwrap(); for i in 2..10 { - cluster + let _ = cluster .async_put(format!("k{}", i).as_bytes(), b"v") .unwrap(); } diff --git a/tests/failpoints/cases/test_early_apply.rs b/tests/failpoints/cases/test_early_apply.rs index bf403fb4668..104c1871343 100644 --- a/tests/failpoints/cases/test_early_apply.rs +++ b/tests/failpoints/cases/test_early_apply.rs @@ -26,7 +26,7 @@ fn test_singleton_cannot_early_apply() { // Check singleton region can be scheduled correctly. fail::cfg(store_1_fp, "pause").unwrap(); - cluster.async_put(b"k1", b"v1").unwrap(); + let _ = cluster.async_put(b"k1", b"v1").unwrap(); sleep_ms(100); must_get_none(&cluster.get_engine(1), b"k1"); @@ -70,10 +70,10 @@ fn test_multi_early_apply() { } })), )); - cluster.async_put(b"k4", b"v4").unwrap(); + let _ = cluster.async_put(b"k4", b"v4").unwrap(); // Sleep a while so that follower will send append response sleep_ms(100); - cluster.async_put(b"k11", b"v22").unwrap(); + let _ = cluster.async_put(b"k11", b"v22").unwrap(); // Sleep a while so that follower will send append response. sleep_ms(100); // Now the store thread of store 1 pauses on `store_1_fp`. diff --git a/tests/failpoints/cases/test_engine.rs b/tests/failpoints/cases/test_engine.rs index 073f7276419..55148098aef 100644 --- a/tests/failpoints/cases/test_engine.rs +++ b/tests/failpoints/cases/test_engine.rs @@ -62,6 +62,7 @@ fn test_write_buffer_manager() { // Case: // Assume FlushMemtable cf1 (schedule flush task) and BackgroundCallFlush cf1 // (execute flush task) are performed concurrently. +// ```text // t FlushMemtable cf1 BackgroundCallFlush cf1 // 1. lock // 2. convert memtable t2(seqno. 10-20) @@ -77,6 +78,7 @@ fn test_write_buffer_manager() { // update last_flushed to 20 // 9. on_memtable_sealed // 10 > 20 *panic* +// ``` #[test] fn test_rocksdb_listener() { use test_raftstore_v2::*; diff --git a/tests/failpoints/cases/test_rawkv.rs b/tests/failpoints/cases/test_rawkv.rs index b7886ce8267..458b72ecf67 100644 --- a/tests/failpoints/cases/test_rawkv.rs +++ b/tests/failpoints/cases/test_rawkv.rs @@ -209,7 +209,7 @@ fn test_leader_transfer() { #[test] fn test_region_merge() { let mut suite = TestSuite::new(3, ApiVersion::V2); - let keys = vec![b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; + let keys = [b"rk0", b"rk1", b"rk2", b"rk3", b"rk4", b"rk5"]; suite.must_raw_put(keys[1], b"v1"); suite.must_raw_put(keys[3], b"v3"); diff --git a/tests/failpoints/cases/test_read_execution_tracker.rs b/tests/failpoints/cases/test_read_execution_tracker.rs index 7351044b297..372c01bcad2 100644 --- a/tests/failpoints/cases/test_read_execution_tracker.rs +++ b/tests/failpoints/cases/test_read_execution_tracker.rs @@ -2,13 +2,21 @@ use kvproto::kvrpcpb::*; use test_coprocessor::{init_with_data, DagSelect, ProductTable}; -use test_raftstore::{kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite}; +use test_raftstore::{ + configure_for_lease_read, kv_batch_read, kv_read, must_kv_commit, must_kv_prewrite, +}; use test_raftstore_macro::test_case; +use tikv_util::config::ReadableDuration; -#[test_case(test_raftstore::must_new_cluster_and_kv_client)] -#[test_case(test_raftstore_v2::must_new_cluster_and_kv_client)] +#[test_case(test_raftstore::must_new_cluster_with_cfg_and_kv_client_mul)] +#[test_case(test_raftstore_v2::must_new_cluster_with_cfg_and_kv_client_mul)] fn test_read_execution_tracking() { - let (_cluster, client, ctx) = new_cluster(); + let (_cluster, client, ctx) = new_cluster(1, |c| { + // set a small renew duration to avoid trigger pre-renew that can affact the + // metrics. + c.cfg.tikv.raft_store.renew_leader_lease_advance_duration = ReadableDuration::millis(1); + configure_for_lease_read(&mut c.cfg, Some(50), Some(10_000)); + }); let (k1, v1) = (b"k1".to_vec(), b"v1".to_vec()); let (k2, v2) = (b"k2".to_vec(), b"v2".to_vec()); diff --git a/tests/failpoints/cases/test_server.rs b/tests/failpoints/cases/test_server.rs index 9c34fd13529..62d66af1efc 100644 --- a/tests/failpoints/cases/test_server.rs +++ b/tests/failpoints/cases/test_server.rs @@ -95,7 +95,7 @@ fn test_send_raft_channel_full() { fail::cfg(on_batch_raft_stream_drop_by_err_fp, "panic").unwrap(); // send request while channel full should not cause the connection drop - cluster.async_put(b"k2", b"v2").unwrap(); + let _ = cluster.async_put(b"k2", b"v2").unwrap(); fail::remove(send_raft_message_full_fp); cluster.must_put(b"k3", b"v3"); diff --git a/tests/failpoints/cases/test_split_region.rs b/tests/failpoints/cases/test_split_region.rs index 2ef3d499d22..28ceba892d0 100644 --- a/tests/failpoints/cases/test_split_region.rs +++ b/tests/failpoints/cases/test_split_region.rs @@ -1426,8 +1426,7 @@ impl Filter for TeeFilter { // 2. the splitted region set has_dirty_data be true in `apply_snapshot` // 3. the splitted region schedule tablet trim task in `on_applied_snapshot` // with tablet index 5 -// 4. the splitted region received a snapshot sent from its -// leader +// 4. the splitted region received a snapshot sent from its leader // 5. after finishing applying this snapshot, the tablet index in storage // changed to 6 // 6. tablet trim complete and callbacked to raftstore diff --git a/tests/failpoints/cases/test_storage.rs b/tests/failpoints/cases/test_storage.rs index fec1ccc931d..95ae4e82b74 100644 --- a/tests/failpoints/cases/test_storage.rs +++ b/tests/failpoints/cases/test_storage.rs @@ -1633,9 +1633,7 @@ fn test_before_propose_deadline() { assert!( matches!( res, - Err(StorageError(box StorageErrorInner::Kv(KvError( - box KvErrorInner::Request(_), - )))) + Err(StorageError(box StorageErrorInner::Kv(KvError(box KvErrorInner::Request(_))))) ), "actual: {:?}", res diff --git a/tests/failpoints/cases/test_transaction.rs b/tests/failpoints/cases/test_transaction.rs index 0b6e6269e95..bb1d291e816 100644 --- a/tests/failpoints/cases/test_transaction.rs +++ b/tests/failpoints/cases/test_transaction.rs @@ -756,7 +756,7 @@ fn test_proposal_concurrent_with_conf_change_and_transfer_leader() { let handle = std::thread::spawn(move || { let mut mutations = vec![]; - for key in vec![b"key3".to_vec(), b"key4".to_vec()] { + for key in [b"key3".to_vec(), b"key4".to_vec()] { let mut mutation = kvproto::kvrpcpb::Mutation::default(); mutation.set_op(Op::Put); mutation.set_key(key); diff --git a/tests/failpoints/cases/test_transfer_leader.rs b/tests/failpoints/cases/test_transfer_leader.rs index 75eb62bab99..02fb8c046c8 100644 --- a/tests/failpoints/cases/test_transfer_leader.rs +++ b/tests/failpoints/cases/test_transfer_leader.rs @@ -361,8 +361,8 @@ fn test_read_lock_after_become_follower() { /// 1. Inserted 5 entries and make all stores commit and apply them. /// 2. Prevent the store 3 from append following logs. /// 3. Insert another 20 entries. -/// 4. Wait for some time so that part of the entry cache are compacted -/// on the leader(store 1). +/// 4. Wait for some time so that part of the entry cache are compacted on the +/// leader(store 1). macro_rules! run_cluster_for_test_warmup_entry_cache { ($cluster:expr) => { // Let the leader compact the entry cache. diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index f89ef0c6faa..9c5bf113af9 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -518,6 +518,7 @@ fn test_invalid_external_storage() { let resps = block_on(rx.collect::>()); assert!(resps[0].has_error()); + #[allow(clippy::permissions_set_readonly_false)] perms.set_readonly(false); f.set_permissions(perms).unwrap(); diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 5bcd258947c..1a062924dae 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2212,7 +2212,7 @@ fn test_batch_request() { let prepare_req = |cluster: &mut Cluster>, ranges: &Vec| -> Request { - let original_range = ranges.get(0).unwrap(); + let original_range = ranges.first().unwrap(); let key_range = product.get_record_range(original_range.start, original_range.end); let region_key = Key::from_raw(&key_range.start); let mut req = DagSelect::from(&product) diff --git a/tests/integrations/import/test_apply_log.rs b/tests/integrations/import/test_apply_log.rs index 0b11a12002e..9dda44888bb 100644 --- a/tests/integrations/import/test_apply_log.rs +++ b/tests/integrations/import/test_apply_log.rs @@ -67,6 +67,6 @@ fn test_apply_twice() { &tikv, &ctx, CF_DEFAULT, - default_fst.into_iter().chain(default_snd.into_iter()), + default_fst.into_iter().chain(default_snd), ); } diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 74b4a73da43..8126ab0ffd5 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -289,8 +289,8 @@ fn test_flush_before_stop2() { // 1. lock `k` with index 6 // 2. on_applied_res => lockcf's last_modified = 6 // 3. flush lock cf => lockcf's flushed_index = 6 -// 4. batch {unlock `k`, write `k`} with index 7 -// (last_modified is updated in store but RocksDB is modified in apply. So, +// 4. batch {unlock `k`, write `k`} with index 7 (last_modified is updated in +// store but RocksDB is modified in apply. So, // before on_apply_res, the last_modified is not updated.) // // flush-before-close: diff --git a/tests/integrations/raftstore/test_early_apply.rs b/tests/integrations/raftstore/test_early_apply.rs index 91a63b1878c..ec42ceda52d 100644 --- a/tests/integrations/raftstore/test_early_apply.rs +++ b/tests/integrations/raftstore/test_early_apply.rs @@ -127,7 +127,7 @@ fn test_early_apply(mode: DataLost) { test( &mut cluster, |c| { - c.async_put(b"k2", b"v2").unwrap(); + let _ = c.async_put(b"k2", b"v2").unwrap(); }, |c| must_get_equal(&c.get_engine(1), b"k2", b"v2"), mode, @@ -145,7 +145,7 @@ fn test_early_apply(mode: DataLost) { test( &mut cluster, |c| { - c.async_remove_peer(1, new_peer(1, 1)).unwrap(); + let _ = c.async_remove_peer(1, new_peer(1, 1)).unwrap(); }, |c| must_get_none(&c.get_engine(1), b"k2"), mode, @@ -191,8 +191,8 @@ fn test_update_internal_apply_index() { .direction(Direction::Recv); cluster.add_send_filter(CloneFilterFactory(filter)); let last_index = cluster.raft_local_state(1, 1).get_last_index(); - cluster.async_remove_peer(1, new_peer(4, 4)).unwrap(); - cluster.async_put(b"k2", b"v2").unwrap(); + let _ = cluster.async_remove_peer(1, new_peer(4, 4)).unwrap(); + let _ = cluster.async_put(b"k2", b"v2").unwrap(); let mut snaps = Vec::new(); for id in 1..3 { cluster.wait_last_index(1, id, last_index + 2, Duration::from_secs(3)); diff --git a/tests/integrations/raftstore/test_flashback.rs b/tests/integrations/raftstore/test_flashback.rs index 9ca6092e624..d6ba8c62629 100644 --- a/tests/integrations/raftstore/test_flashback.rs +++ b/tests/integrations/raftstore/test_flashback.rs @@ -239,7 +239,7 @@ fn test_prepare_flashback_after_conf_change() { let on_handle_apply_fp = "on_handle_apply"; fail::cfg(on_handle_apply_fp, "pause").unwrap(); // Send the conf change msg. - cluster.async_add_peer(region_id, new_peer(2, 2)).unwrap(); + let _ = cluster.async_add_peer(region_id, new_peer(2, 2)).unwrap(); // Make sure the conf change cmd is ready. sleep(Duration::from_millis(100)); // Send the prepare flashback msg. diff --git a/tests/integrations/raftstore/test_hibernate.rs b/tests/integrations/raftstore/test_hibernate.rs index 6e3c64d7851..b9289bf8309 100644 --- a/tests/integrations/raftstore/test_hibernate.rs +++ b/tests/integrations/raftstore/test_hibernate.rs @@ -450,7 +450,7 @@ fn test_leader_demoted_when_hibernated() { )); } // Leave joint. - cluster.async_exit_joint(r).unwrap(); + let _ = cluster.async_exit_joint(r).unwrap(); // Ensure peer 3 can campaign. cluster.wait_last_index(r, 3, 11, Duration::from_secs(5)); cluster.add_send_filter(CloneFilterFactory( diff --git a/tests/integrations/raftstore/test_merge.rs b/tests/integrations/raftstore/test_merge.rs index 8482feb8481..18f42035d66 100644 --- a/tests/integrations/raftstore/test_merge.rs +++ b/tests/integrations/raftstore/test_merge.rs @@ -895,8 +895,8 @@ fn test_node_merge_update_region() { let new_leader = left .get_peers() .iter() + .find(|&p| p.get_id() != origin_leader.get_id()) .cloned() - .find(|p| p.get_id() != origin_leader.get_id()) .unwrap(); // Make sure merge is done in the new_leader. @@ -1552,7 +1552,7 @@ fn test_merge_pessimistic_locks_when_gap_is_too_large() { let large_bytes = vec![b'v'; 32 << 10]; // 32 KiB // 4 * 32 KiB = 128 KiB > raft_entry_max_size for _ in 0..4 { - cluster.async_put(b"k1", &large_bytes).unwrap(); + let _ = cluster.async_put(b"k1", &large_bytes).unwrap(); } cluster.merge_region(left.id, right.id, Callback::None); diff --git a/tests/integrations/raftstore/test_multi.rs b/tests/integrations/raftstore/test_multi.rs index f40e6695599..b56d864e7ce 100644 --- a/tests/integrations/raftstore/test_multi.rs +++ b/tests/integrations/raftstore/test_multi.rs @@ -815,7 +815,7 @@ fn test_node_catch_up_logs() { cluster.stop_node(3); for i in 0..10 { let v = format!("{:04}", i); - cluster.async_put(v.as_bytes(), v.as_bytes()).unwrap(); + let _ = cluster.async_put(v.as_bytes(), v.as_bytes()).unwrap(); } must_get_equal(&cluster.get_engine(1), b"0009", b"0009"); cluster.run_node(3).unwrap(); diff --git a/tests/integrations/raftstore/test_replica_read.rs b/tests/integrations/raftstore/test_replica_read.rs index bd3c10657c2..1f0b8330c10 100644 --- a/tests/integrations/raftstore/test_replica_read.rs +++ b/tests/integrations/raftstore/test_replica_read.rs @@ -396,7 +396,7 @@ fn test_read_index_retry_lock_checking() { !resp.get_header().has_error() && resp .get_responses() - .get(0) + .first() .map_or(true, |r| !r.get_read_index().has_locked()), "{:?}", resp, diff --git a/tests/integrations/raftstore/test_snap_recovery.rs b/tests/integrations/raftstore/test_snap_recovery.rs index 70f9ae8d97c..5d68bdabcbb 100644 --- a/tests/integrations/raftstore/test_snap_recovery.rs +++ b/tests/integrations/raftstore/test_snap_recovery.rs @@ -33,7 +33,7 @@ fn test_check_pending_admin() { // make a admin request to let leader has pending conf change. let leader = new_peer(1, 4); - cluster.async_add_peer(1, leader).unwrap(); + let _ = cluster.async_add_peer(1, leader).unwrap(); std::thread::sleep(Duration::from_millis(800)); @@ -89,7 +89,7 @@ fn test_snap_wait_apply() { )); // make a async put request to let leader has inflight raft log. - cluster.async_put(b"k2", b"v2").unwrap(); + let _ = cluster.async_put(b"k2", b"v2").unwrap(); std::thread::sleep(Duration::from_millis(800)); let router = cluster.sim.wl().get_router(1).unwrap(); diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index b54af465852..831ce113a64 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -914,8 +914,8 @@ fn test_node_split_update_region_right_derive() { let new_leader = right .get_peers() .iter() + .find(|&p| p.get_id() != origin_leader.get_id()) .cloned() - .find(|p| p.get_id() != origin_leader.get_id()) .unwrap(); // Make sure split is done in the new_leader. diff --git a/tests/integrations/raftstore/test_stats.rs b/tests/integrations/raftstore/test_stats.rs index 821fc19dff8..3b6d9434e11 100644 --- a/tests/integrations/raftstore/test_stats.rs +++ b/tests/integrations/raftstore/test_stats.rs @@ -433,7 +433,7 @@ fn test_txn_query_stats_tmpl() { // enabled, disable it. test_query_num::(batch_get_command, false, false); test_query_num::(batch_coprocessor, false, false); - test_txn_delete_query::(); + test_txn_delete_query(); test_pessimistic_lock(); test_rollback(); fail::remove("mock_tick_interval"); @@ -442,7 +442,7 @@ fn test_txn_query_stats_tmpl() { fail::remove("only_check_source_task_name"); } -fn raw_put( +fn raw_put( _cluster: &Cluster>, client: &TikvClient, ctx: &Context, @@ -621,7 +621,7 @@ fn test_query_num(query: Box, is_raw_kv: bool, auto_split: b let store_id = 1; if is_raw_kv { k = b"r_key".to_vec(); // "r" is key prefix of RawKV. - raw_put::(&cluster, &client, &ctx, store_id, k.clone()); + raw_put(&cluster, &client, &ctx, store_id, k.clone()); } else { k = b"x_key".to_vec(); // "x" is key prefix of TxnKV. put(&cluster, &client, &ctx, store_id, k.clone()); @@ -642,7 +642,7 @@ fn test_raw_delete_query() { ctx.set_api_version(F::CLIENT_TAG); ctx.set_request_source("test_stats".to_owned()); - raw_put::(&cluster, &client, &ctx, store_id, k.clone()); + raw_put(&cluster, &client, &ctx, store_id, k.clone()); // Raw Delete let mut delete_req = RawDeleteRequest::default(); delete_req.set_context(ctx.clone()); @@ -650,7 +650,7 @@ fn test_raw_delete_query() { client.raw_delete(&delete_req).unwrap(); // skip raw kv write query check - raw_put::(&cluster, &client, &ctx, store_id, k.clone()); + raw_put(&cluster, &client, &ctx, store_id, k.clone()); // Raw DeleteRange let mut delete_req = RawDeleteRangeRequest::default(); delete_req.set_context(ctx); @@ -661,7 +661,7 @@ fn test_raw_delete_query() { } } -fn test_txn_delete_query() { +fn test_txn_delete_query() { let k = b"t_key".to_vec(); let store_id = 1; diff --git a/tests/integrations/server/status_server.rs b/tests/integrations/server/status_server.rs index a2921f77b95..90d1122b13a 100644 --- a/tests/integrations/server/status_server.rs +++ b/tests/integrations/server/status_server.rs @@ -36,7 +36,7 @@ fn test_region_meta_endpoint() { cluster.run(); let region = cluster.get_region(b""); let region_id = region.get_id(); - let peer = region.get_peers().get(0); + let peer = region.get_peers().first(); assert!(peer.is_some()); let store_id = peer.unwrap().get_store_id(); let router = cluster.raft_extension(store_id); From 3194c4adcd5ab5999cfa333b8f1f9ebb30443555 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 4 Jan 2024 23:44:05 -0800 Subject: [PATCH 1090/1149] Titan: refine Titan dir empty check (#16296) close tikv/tikv#16295 Refine Titan dir empty check Signed-off-by: Yang Zhang --- components/server/src/common.rs | 2 +- components/server/src/server.rs | 2 +- components/server/src/server2.rs | 2 +- .../test_raftstore/src/common-test.toml | 2 + components/test_raftstore/src/util.rs | 4 +- etc/config-template.toml | 5 +- src/config/mod.rs | 83 ++++++++++++------- src/storage/mod.rs | 2 +- tests/integrations/config/mod.rs | 2 +- .../raftstore/test_compact_after_delete.rs | 6 +- tests/integrations/raftstore/test_merge.rs | 4 +- tests/integrations/raftstore/test_snap.rs | 6 +- tests/integrations/storage/test_titan.rs | 2 +- 13 files changed, 75 insertions(+), 47 deletions(-) diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 9ecaab0770a..2c43abccf44 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -448,7 +448,7 @@ const RESERVED_OPEN_FDS: u64 = 1000; pub fn check_system_config(config: &TikvConfig) { info!("beginning system configuration check"); let mut rocksdb_max_open_files = config.rocksdb.max_open_files; - if config.rocksdb.titan.enabled { + if let Some(true) = config.rocksdb.titan.enabled { // Titan engine maintains yet another pool of blob files and uses the same max // number of open files setup as rocksdb does. So we double the max required // open files here diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 6fb1963bbfb..292fdb8e56f 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -1289,7 +1289,7 @@ where let mut engine_metrics = EngineMetricsManager::::new( self.tablet_registry.clone().unwrap(), self.kv_statistics.clone(), - self.core.config.rocksdb.titan.enabled, + self.core.config.rocksdb.titan.enabled.map_or(false, |v| v), self.engines.as_ref().unwrap().engines.raft.clone(), self.raft_statistics.clone(), ); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index 238b2f4982a..bfc09f483bc 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -1085,7 +1085,7 @@ where let mut engine_metrics = EngineMetricsManager::::new( self.tablet_registry.clone().unwrap(), self.kv_statistics.clone(), - self.core.config.rocksdb.titan.enabled, + self.core.config.rocksdb.titan.enabled.map_or(false, |v| v), self.engines.as_ref().unwrap().raft_engine.clone(), self.raft_statistics.clone(), ); diff --git a/components/test_raftstore/src/common-test.toml b/components/test_raftstore/src/common-test.toml index 7eace3ac745..8e4bed8b62b 100644 --- a/components/test_raftstore/src/common-test.toml +++ b/components/test_raftstore/src/common-test.toml @@ -81,6 +81,7 @@ max-sub-compactions = 1 [rocksdb.titan] max-background-gc = 1 +min-blob-size = 0 [rocksdb.defaultcf] @@ -98,6 +99,7 @@ max-sub-compactions = 1 [raftdb.titan] max-background-gc = 1 +min-blob-size = 0 [raftdb.defaultcf] diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 019a7416a7a..2c04d2d1442 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -772,7 +772,7 @@ pub fn configure_for_enable_titan>( cluster: &mut Cluster, min_blob_size: ReadableSize, ) { - cluster.cfg.rocksdb.titan.enabled = true; + cluster.cfg.rocksdb.titan.enabled = Some(true); cluster.cfg.rocksdb.titan.purge_obsolete_files_period = ReadableDuration::secs(1); cluster.cfg.rocksdb.titan.max_background_gc = 10; cluster.cfg.rocksdb.defaultcf.titan.min_blob_size = min_blob_size; @@ -783,7 +783,7 @@ pub fn configure_for_enable_titan>( pub fn configure_for_disable_titan>( cluster: &mut Cluster, ) { - cluster.cfg.rocksdb.titan.enabled = false; + cluster.cfg.rocksdb.titan.enabled = Some(false); } pub fn configure_for_encryption>( diff --git a/etc/config-template.toml b/etc/config-template.toml index 75c7eab0c10..e5a8e621dca 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -672,8 +672,9 @@ [rocksdb.titan] ## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once ## enabled, it can't fall back. Forced fallback may result in data loss. -## default: false -# enabled = false +## Titan is default on since v7.6.0. This won't affect deployments existed before v7.6.0. +## default: true +# enabled = true ## Maximum number of threads of `Titan` background gc jobs. ## default: 1 diff --git a/src/config/mod.rs b/src/config/mod.rs index 8f34711f8c8..c4f811f63de 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1197,7 +1197,7 @@ impl RaftCfConfig { #[serde(default)] #[serde(rename_all = "kebab-case")] pub struct TitanDbConfig { - pub enabled: bool, + pub enabled: Option, pub dirname: String, pub disable_gc: bool, pub max_background_gc: i32, @@ -1208,7 +1208,7 @@ pub struct TitanDbConfig { impl Default for TitanDbConfig { fn default() -> Self { Self { - enabled: false, // Enabled only for newly created cluster + enabled: None, // Enabled only for newly created cluster dirname: "".to_owned(), disable_gc: false, max_background_gc: 1, @@ -1380,7 +1380,12 @@ impl Default for DbConfig { } impl DbConfig { - pub fn optimize_for(&mut self, engine: EngineType, kv_data_exists: bool) { + pub fn optimize_for( + &mut self, + engine: EngineType, + kv_data_exists: bool, + is_titan_dir_empty: bool, + ) { match engine { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); @@ -1392,8 +1397,14 @@ impl DbConfig { if self.lockcf.write_buffer_size.is_none() { self.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); } - if !kv_data_exists && !self.titan.enabled { - self.titan.enabled = true; + if self.titan.enabled.is_none() { + // If the user doesn't specify titan.enabled, we enable it by default for newly + // created clusters. + if kv_data_exists && is_titan_dir_empty { + self.titan.enabled = Some(false); + } else { + self.titan.enabled = Some(true); + } } } EngineType::RaftKv2 => { @@ -1533,7 +1544,7 @@ impl DbConfig { opts.set_info_log(RocksdbLogger); } opts.set_info_log_level(self.info_log_level.into()); - if self.titan.enabled { + if let Some(true) = self.titan.enabled { opts.set_titandb_options(&self.titan.build_opts()); } opts.set_env(shared.env.clone()); @@ -1644,7 +1655,7 @@ impl DbConfig { return Err("raftcf does not support cf based write buffer manager".into()); } if self.enable_unordered_write { - if self.titan.enabled { + if let Some(true) = self.titan.enabled { return Err("RocksDB.unordered_write does not support Titan".into()); } self.enable_pipelined_write = false; @@ -1921,7 +1932,7 @@ impl RaftDbConfig { opts.set_bytes_per_sync(self.bytes_per_sync.0); opts.set_wal_bytes_per_sync(self.wal_bytes_per_sync.0); // TODO maybe create a new env for raft engine - if self.titan.enabled { + if let Some(true) = self.titan.enabled { opts.set_titandb_options(&self.titan.build_opts()); } opts.set_env(env); @@ -1935,7 +1946,7 @@ impl RaftDbConfig { fn validate(&mut self) -> Result<(), Box> { self.defaultcf.validate()?; if self.enable_unordered_write { - if self.titan.enabled { + if let Some(true) = self.titan.enabled { return Err("raftdb: unordered_write is not compatible with Titan".into()); } if self.enable_pipelined_write { @@ -3594,6 +3605,8 @@ impl TikvConfig { if self.raft_engine.config.dir == self.raft_store.raftdb_path { return Err("raft_engine.config.dir can't be same as raft_store.raftdb_path".into()); } + // Newly created dbs will be optimized with certain options. e.g. Titan. + let mut is_titan_dir_empty = true; let kv_data_exists = match self.storage.engine { EngineType::RaftKv => { let kv_db_path = self.infer_kv_engine_path(None)?; @@ -3622,16 +3635,18 @@ impl TikvConfig { } } // Check blob file dir is empty when titan is disabled - if !self.rocksdb.titan.enabled { - let titandb_path = if self.rocksdb.titan.dirname.is_empty() { - Path::new(&kv_db_path).join("titandb") - } else { - Path::new(&self.rocksdb.titan.dirname).to_path_buf() - }; - if let Err(e) = tikv_util::config::check_data_dir_empty( - titandb_path.to_str().unwrap(), - "blob", - ) { + let titandb_path = if self.rocksdb.titan.dirname.is_empty() { + Path::new(&kv_db_path).join("titandb") + } else { + Path::new(&self.rocksdb.titan.dirname).to_path_buf() + }; + if let Err(e) = + tikv_util::config::check_data_dir_empty(titandb_path.to_str().unwrap(), "blob") + { + is_titan_dir_empty = false; + if let Some(false) = self.rocksdb.titan.enabled { + // If Titan is disabled explicitly but Titan's data directory is not empty, + // return an error. return Err(format!( "check: titandb-data-dir-empty; err: \"{}\"; \ hint: You have disabled titan when its data directory is not empty. \ @@ -3664,7 +3679,7 @@ impl TikvConfig { // Optimize. self.rocksdb - .optimize_for(self.storage.engine, kv_data_exists); + .optimize_for(self.storage.engine, kv_data_exists, is_titan_dir_empty); self.coprocessor .optimize_for(self.storage.engine == EngineType::RaftKv2); self.split @@ -3708,7 +3723,7 @@ impl TikvConfig { self.raft_engine.config.purge_threshold, ); } - if self.rocksdb.titan.enabled { + if let Some(true) = self.rocksdb.titan.enabled { return Err("partitioned-raft-kv doesn't support titan.".into()); } if self.raft_store.enable_v2_compatible_learner { @@ -5805,7 +5820,7 @@ mod tests { #[test] fn test_update_titan_blob_run_mode_config() { let mut cfg = TikvConfig::default(); - cfg.rocksdb.titan.enabled = true; + cfg.rocksdb.titan.enabled = Some(true); let (_, cfg_controller, ..) = new_engines::(cfg); for run_mode in [ "kFallback", @@ -5847,25 +5862,34 @@ mod tests { let (storage, ..) = new_engines::(cfg); drop(storage); let mut cfg = TikvConfig::from_file(&dir.path().join(LAST_CONFIG_FILE), None).unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, false); + // titan.enabled is not specified. + assert_eq!(cfg.rocksdb.titan.enabled, None); cfg.validate().unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, false); + // Config optimized with titan.enabled = false, since it is an existing + // instance. + assert_eq!(cfg.rocksdb.titan.enabled, Some(false)); let (_storage, cfg_controller, ..) = new_engines::(cfg); - assert_eq!(cfg_controller.get_current().rocksdb.titan.enabled, false); + assert_eq!( + cfg_controller.get_current().rocksdb.titan.enabled, + Some(false) + ); drop(dir); // Auto enable titan for new instances let (mut cfg, dir) = TikvConfig::with_tmp().unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, false); + assert_eq!(cfg.rocksdb.titan.enabled, None); cfg.validate().unwrap(); persist_config(&cfg).unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, true); + assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); let (storage, cfg_controller, ..) = new_engines::(cfg); - assert_eq!(cfg_controller.get_current().rocksdb.titan.enabled, true); + assert_eq!( + cfg_controller.get_current().rocksdb.titan.enabled, + Some(true) + ); drop(storage); // The config is persisted let cfg = TikvConfig::from_file(&dir.path().join(LAST_CONFIG_FILE), None).unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, true); + assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); drop(dir); } @@ -6649,6 +6673,7 @@ mod tests { default_cfg.rocksdb.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); default_cfg.raftdb.defaultcf.target_file_size_base = Some(ReadableSize::mb(8)); default_cfg.raft_store.region_compact_check_step = Some(100); + default_cfg.rocksdb.titan.enabled = Some(true); // Other special cases. cfg.pd.retry_max_count = default_cfg.pd.retry_max_count; // Both -1 and isize::MAX are the same. diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 1a38c781d0f..a54bb8893ac 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4428,7 +4428,7 @@ mod tests { fn test_scan_with_key_only() { let db_config = crate::config::DbConfig { titan: TitanDbConfig { - enabled: true, + enabled: Some(true), ..Default::default() }, ..Default::default() diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 05cbde827d2..6ba675082ff 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -287,7 +287,7 @@ fn test_serde_custom_tikv_config() { ..Default::default() }; let titan_db_config = TitanDbConfig { - enabled: true, + enabled: Some(true), dirname: "bar".to_owned(), disable_gc: false, max_background_gc: 9, diff --git a/tests/integrations/raftstore/test_compact_after_delete.rs b/tests/integrations/raftstore/test_compact_after_delete.rs index 1bea73d85ea..564676aa82d 100644 --- a/tests/integrations/raftstore/test_compact_after_delete.rs +++ b/tests/integrations/raftstore/test_compact_after_delete.rs @@ -38,7 +38,7 @@ fn test_compact_after_delete>(cluster: &mut Cluster>( cluster: &mut Cluster, max_snapshot_file_size: u64, ) { - cluster.cfg.rocksdb.titan.enabled = true; + cluster.cfg.rocksdb.titan.enabled = Some(true); cluster.cfg.raft_store.raft_log_gc_count_limit = Some(1000); cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::millis(10); cluster.cfg.raft_store.snap_apply_batch_size = ReadableSize(500); @@ -236,7 +236,7 @@ fn test_concurrent_snap() { // Test that the handling of snapshot is correct when there are multiple // snapshots which have overlapped region ranges arrive at the same // raftstore. - cluster.cfg.rocksdb.titan.enabled = true; + cluster.cfg.rocksdb.titan.enabled = Some(true); // Disable raft log gc in this test case. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); // For raftstore v2, after split, follower delays first messages (see @@ -289,7 +289,7 @@ fn test_concurrent_snap_v2() { // Test that the handling of snapshot is correct when there are multiple // snapshots which have overlapped region ranges arrive at the same // raftstore. - // cluster.cfg.rocksdb.titan.enabled = true; + // cluster.cfg.rocksdb.titan.enabled = Some(true); // Disable raft log gc in this test case. cluster.cfg.raft_store.raft_log_gc_tick_interval = ReadableDuration::secs(60); // For raftstore v2, after split, follower delays first messages (see diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index cc39a7ff0c6..9490b30b2f8 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -151,7 +151,7 @@ fn test_delete_files_in_range_for_titan() { // Set configs and create engines let mut cfg = TikvConfig::default(); let cache = cfg.storage.block_cache.build_shared_cache(); - cfg.rocksdb.titan.enabled = true; + cfg.rocksdb.titan.enabled = Some(true); cfg.rocksdb.titan.disable_gc = true; cfg.rocksdb.titan.purge_obsolete_files_period = ReadableDuration::secs(1); cfg.rocksdb.defaultcf.disable_auto_compactions = true; From df108bf9b70da040428fe7bcf683ca6f37815e82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 5 Jan 2024 17:35:41 +0800 Subject: [PATCH 1091/1149] log-backup: fix a typo (#16257) ref tikv/tikv#15990 Renamed Router::udpate_config to Router::update_config. Signed-off-by: hillium Co-authored-by: iosmanthus --- components/backup-stream/src/endpoint.rs | 2 +- components/backup-stream/src/router.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 74a8012bf4b..3bc1d7669d9 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -915,7 +915,7 @@ where "config" => ?cfg, "concurrency_diff" => concurrency_diff, ); - self.range_router.udpate_config(&cfg); + self.range_router.update_config(&cfg); self.update_semaphore_capacity(&self.initial_scan_semaphore, concurrency_diff); self.config = cfg; diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 24d239a3f73..95dad89588a 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -407,7 +407,7 @@ impl RouterInner { } } - pub fn udpate_config(&self, config: &BackupStreamConfig) { + pub fn update_config(&self, config: &BackupStreamConfig) { *self.max_flush_interval.write().unwrap() = config.max_flush_interval.0; self.temp_file_size_limit .store(config.file_size_limit.0, Ordering::SeqCst); @@ -2481,7 +2481,7 @@ mod tests { match &cmds[0] { Task::ChangeConfig(cfg) => { assert!(matches!(cfg, _new_cfg)); - router.udpate_config(cfg); + router.update_config(cfg); assert_eq!( router.max_flush_interval.rl().to_owned(), _new_cfg.max_flush_interval.0 From 2472fd4d85c220b74dc889b493e70bc95dcc75c2 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Fri, 5 Jan 2024 20:33:21 +0800 Subject: [PATCH 1092/1149] txn: introduce pessimistic rollback read phase (#16185) close tikv/tikv#16158 Reduce the cost of expired lock cleaning by introducing pessimsitic rollback read phase, the expired pessimistic locks belonging to the same transcation could be cleaned up within one RPC request. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/raftstore/src/store/txn_ext.rs | 13 +- components/resolved_ts/src/scanner.rs | 4 +- components/test_raftstore/src/util.rs | 52 ++++ components/txn_types/src/lib.rs | 2 +- components/txn_types/src/lock.rs | 49 ++++ src/storage/metrics.rs | 1 + src/storage/mod.rs | 238 +++++++++++++++--- src/storage/mvcc/reader/reader.rs | 131 +++++++++- .../txn/actions/flashback_to_version.rs | 4 +- src/storage/txn/commands/mod.rs | 34 ++- .../txn/commands/pessimistic_rollback.rs | 24 +- .../pessimistic_rollback_read_phase.rs | 84 +++++++ .../txn/commands/resolve_lock_readphase.rs | 4 +- src/storage/txn/scheduler.rs | 1 + tests/integrations/server/kv_service.rs | 114 +++++++++ 15 files changed, 688 insertions(+), 67 deletions(-) create mode 100644 src/storage/txn/commands/pessimistic_rollback_read_phase.rs diff --git a/components/raftstore/src/store/txn_ext.rs b/components/raftstore/src/store/txn_ext.rs index 3c628502505..818ba8d2da1 100644 --- a/components/raftstore/src/store/txn_ext.rs +++ b/components/raftstore/src/store/txn_ext.rs @@ -531,13 +531,18 @@ mod tests { pessimistic_lock.into_lock() } - let filter_pass_all = |_key: &Key, _lock: &PessimisticLock| true; - let filter_pass_key2 = - |key: &Key, _lock: &PessimisticLock| key.as_encoded().starts_with(b"key2"); + type LockFilter = fn(&Key, &PessimisticLock) -> bool; + + fn filter_pass_all(_: &Key, _: &PessimisticLock) -> bool { + true + } + + fn filter_pass_key2(key: &Key, _: &PessimisticLock) -> bool { + key.as_encoded().starts_with(b"key2") + } // Case parameter: start_key, end_key, filter, limit, expected results, expected // has more. - type LockFilter = fn(&Key, &PessimisticLock) -> bool; let cases: [( Option, Option, diff --git a/components/resolved_ts/src/scanner.rs b/components/resolved_ts/src/scanner.rs index 7f6d491e4a4..c0715b42ff1 100644 --- a/components/resolved_ts/src/scanner.rs +++ b/components/resolved_ts/src/scanner.rs @@ -229,10 +229,10 @@ impl, E: KvEngine> ScannerPool { _checkpoint_ts: TimeStamp, ) -> Result<(Vec<(Key, Lock)>, bool)> { let (locks, has_remaining) = reader - .scan_locks( + .scan_locks_from_storage( start, None, - |lock| matches!(lock.lock_type, LockType::Put | LockType::Delete), + |_, lock| matches!(lock.lock_type, LockType::Put | LockType::Delete), DEFAULT_SCAN_BATCH_SIZE, ) .map_err(|e| Error::Other(box_err!("{:?}", e)))?; diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 2c04d2d1442..951a99074b6 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -1313,6 +1313,21 @@ pub fn must_kv_pessimistic_rollback( assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); } +pub fn must_kv_pessimistic_rollback_with_scan_first( + client: &TikvClient, + ctx: Context, + ts: u64, + for_update_ts: u64, +) { + let mut req = PessimisticRollbackRequest::default(); + req.set_context(ctx); + req.start_version = ts; + req.for_update_ts = for_update_ts; + let resp = client.kv_pessimistic_rollback(&req).unwrap(); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(resp.errors.is_empty(), "{:?}", resp.get_errors()); +} + pub fn must_check_txn_status( client: &TikvClient, ctx: Context, @@ -1378,6 +1393,43 @@ pub fn must_kv_have_locks( } } +/// Scan scan_limit number of locks within [start_key, end_key), the returned +/// lock number should equal the input expected_cnt. +pub fn must_lock_cnt( + client: &TikvClient, + ctx: Context, + ts: u64, + start_key: &[u8], + end_key: &[u8], + lock_type: Op, + expected_cnt: usize, + scan_limit: usize, +) { + let mut req = ScanLockRequest::default(); + req.set_context(ctx); + req.set_limit(scan_limit as u32); + req.set_start_key(start_key.to_vec()); + req.set_end_key(end_key.to_vec()); + req.set_max_version(ts); + let resp = client.kv_scan_lock(&req).unwrap(); + assert!(!resp.has_region_error(), "{:?}", resp.get_region_error()); + assert!(resp.error.is_none(), "{:?}", resp.get_error()); + + let lock_cnt = resp + .locks + .iter() + .filter(|lock_info| lock_info.get_lock_type() == lock_type) + .count(); + + assert_eq!( + lock_cnt, + expected_cnt, + "lock count not match, expected: {:?}; got: {:?}", + expected_cnt, + resp.locks.len() + ); +} + pub fn get_tso(pd_client: &TestPdClient) -> u64 { block_on(pd_client.get_tso()).unwrap().into_inner() } diff --git a/components/txn_types/src/lib.rs b/components/txn_types/src/lib.rs index 61d2093366a..bba770e0936 100644 --- a/components/txn_types/src/lib.rs +++ b/components/txn_types/src/lib.rs @@ -10,7 +10,7 @@ use std::io; use error_code::{self, ErrorCode, ErrorCodeExt}; use kvproto::kvrpcpb; -pub use lock::{Lock, LockType, PessimisticLock}; +pub use lock::{Lock, LockType, PessimisticLock, TxnLockRef}; use thiserror::Error; pub use timestamp::{TimeStamp, TsSet, TSO_PHYSICAL_SHIFT_BITS}; pub use types::{ diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 17d9dbe37e2..8bd63b33fa9 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -604,6 +604,55 @@ impl std::fmt::Debug for PessimisticLock { } } +/// TxnLock is a wrapper for in-memory pessimistic locks and storage locks. +#[derive(PartialEq, Clone, Debug)] +pub enum TxnLockRef<'a> { + InMemory(&'a PessimisticLock), + Persisted(&'a Lock), +} + +impl<'a> TxnLockRef<'a> { + pub fn get_start_ts(&self) -> TimeStamp { + match self { + TxnLockRef::InMemory(pessimistic_lock) => pessimistic_lock.start_ts, + TxnLockRef::Persisted(lock) => lock.ts, + } + } + + pub fn get_for_update_ts(&self) -> TimeStamp { + match self { + TxnLockRef::InMemory(pessimistic_lock) => pessimistic_lock.for_update_ts, + TxnLockRef::Persisted(lock) => lock.for_update_ts, + } + } + + pub fn is_pessimistic_lock(&self) -> bool { + match self { + TxnLockRef::InMemory(_) => true, + TxnLockRef::Persisted(lock) => lock.is_pessimistic_lock(), + } + } + + pub fn get_lock_type(&self) -> LockType { + match self { + TxnLockRef::InMemory(_) => LockType::Pessimistic, + TxnLockRef::Persisted(lock) => lock.lock_type, + } + } +} + +impl<'a> From<&'a PessimisticLock> for TxnLockRef<'a> { + fn from(in_memory_pessimistic_lock: &'a PessimisticLock) -> Self { + Self::InMemory(in_memory_pessimistic_lock) + } +} + +impl<'a> From<&'a Lock> for TxnLockRef<'a> { + fn from(lock: &'a Lock) -> Self { + Self::Persisted(lock) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/storage/metrics.rs b/src/storage/metrics.rs index d62f7862fae..25fa7e1073e 100644 --- a/src/storage/metrics.rs +++ b/src/storage/metrics.rs @@ -131,6 +131,7 @@ make_auto_flush_static_metric! { cleanup, rollback, pessimistic_rollback, + pessimistic_rollback_read_phase, txn_heart_beat, check_txn_status, check_secondary_locks, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a54bb8893ac..284fad1e491 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -80,7 +80,6 @@ use engine_traits::{ raw_ttl::ttl_to_expire_ts, CfName, CF_DEFAULT, CF_LOCK, CF_WRITE, DATA_CFS, DATA_CFS_LEN, }; use futures::{future::Either, prelude::*}; -use itertools::Itertools; use kvproto::{ kvrpcpb::{ ApiVersion, ChecksumAlgorithm, CommandPri, Context, GetRequest, IsolationLevel, KeyRange, @@ -128,7 +127,7 @@ use crate::{ kv::{with_tls_engine, Modify, WriteData}, lock_manager::{LockManager, MockLockManager}, metrics::{CommandKind, *}, - mvcc::{MvccReader, PointGetterBuilder}, + mvcc::{metrics::ScanLockReadTimeSource::resolve_lock, MvccReader, PointGetterBuilder}, txn::{ commands::{RawAtomicStore, RawCompareAndSwap, TypedCommand}, flow_controller::{EngineFlowController, FlowController}, @@ -1459,37 +1458,19 @@ impl Storage { Some(ScanMode::Forward), !ctx.get_not_fill_cache(), ); - let memory_locks = reader - .load_in_memory_pessimisitic_lock_range( - start_key.as_ref(), - end_key.as_ref(), - |_, lock| lock.start_ts <= max_ts, - limit, - ) - .map_err(txn::Error::from); - let (memory_lock_kv_pairs, _) = memory_locks?; - let result = reader + let read_res = reader .scan_locks( start_key.as_ref(), end_key.as_ref(), - |lock| lock.ts <= max_ts, + |_, lock| lock.get_start_ts() <= max_ts, limit, + resolve_lock, ) .map_err(txn::Error::from); statistics.add(&reader.statistics); - let (kv_pairs, _) = result?; - - // Merge the results from in-memory pessimistic locks and the lock cf. - // The result order is decided by the key. - let memory_lock_iter = memory_lock_kv_pairs.into_iter(); - let lock_iter = kv_pairs.into_iter(); - let merged_iter = memory_lock_iter - .merge_by(lock_iter, |(memory_key, _), (key, _)| memory_key <= key); - let mut locks = Vec::with_capacity(limit); - for (key, lock) in merged_iter { - if limit > 0 && locks.len() >= limit { - break; - } + let (read_locks, _) = read_res?; + let mut locks = Vec::with_capacity(read_locks.len()); + for (key, lock) in read_locks.into_iter() { let lock_info = lock.into_lock_info(key.into_raw().map_err(txn::Error::from)?); locks.push(lock_info); @@ -3729,20 +3710,61 @@ pub mod test_util { ) } - pub fn delete_pessimistic_lock( + pub fn acquire_pessimistic_lock( storage: &Storage, key: Key, start_ts: u64, for_update_ts: u64, + ) { + acquire_pessimistic_lock_impl( + storage, + vec![(key, false)], + start_ts, + for_update_ts, + false, + false, + ) + } + + fn acquire_pessimistic_lock_impl( + storage: &Storage, + keys: Vec<(Key, bool)>, + start_ts: u64, + for_update_ts: u64, + return_values: bool, + check_existence: bool, ) { let (tx, rx) = channel(); storage .sched_txn_command( - commands::PessimisticRollback::new( - vec![key], + new_acquire_pessimistic_lock_command( + keys, + start_ts, + for_update_ts, + return_values, + check_existence, + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + rx.recv().unwrap(); + } + + #[cfg(test)] + pub fn prewrite_lock( + storage: &Storage, + key: Key, + primary_key: &[u8], + value: &[u8], + start_ts: u64, + ) { + let (tx, rx) = channel(); + storage + .sched_txn_command( + commands::Prewrite::with_defaults( + vec![txn_types::Mutation::make_put(key, value.to_vec())], + primary_key.to_vec(), start_ts.into(), - for_update_ts.into(), - Context::default(), ), expect_ok_callback(tx, 0), ) @@ -3750,6 +3772,59 @@ pub mod test_util { rx.recv().unwrap(); } + pub fn delete_pessimistic_lock( + storage: &Storage, + key: Key, + start_ts: u64, + for_update_ts: u64, + ) { + delete_pessimistic_lock_impl(storage, Some(key), start_ts, for_update_ts) + } + + pub fn delete_pessimistic_lock_with_scan_first( + storage: &Storage, + start_ts: u64, + for_update_ts: u64, + ) { + delete_pessimistic_lock_impl(storage, None, start_ts, for_update_ts) + } + + fn delete_pessimistic_lock_impl( + storage: &Storage, + key: Option, + start_ts: u64, + for_update_ts: u64, + ) { + let (tx, rx) = channel(); + if let Some(key) = key { + storage + .sched_txn_command( + commands::PessimisticRollback::new( + vec![key], + start_ts.into(), + for_update_ts.into(), + None, + Context::default(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + } else { + storage + .sched_txn_command( + commands::PessimisticRollbackReadPhase::new( + start_ts.into(), + for_update_ts.into(), + None, + Context::default(), + ), + expect_ok_callback(tx, 0), + ) + .unwrap(); + }; + rx.recv().unwrap(); + } + pub struct GetResult { id: u64, res: Result>>, @@ -3914,7 +3989,7 @@ mod tests { CancellationCallback, DiagnosticContext, KeyLockWaitInfo, LockDigest, LockWaitToken, UpdateWaitForEvent, WaitTimeout, }, - mvcc::LockType, + mvcc::{tests::must_locked, LockType}, txn::{ commands, commands::{AcquirePessimisticLock, Prewrite}, @@ -9590,6 +9665,7 @@ mod tests { keys.clone(), 50.into(), 50.into(), + None, Context::default(), ), expect_ok_callback(tx.clone(), 0), @@ -11560,4 +11636,102 @@ mod tests { 140.into() ); } + + #[test] + fn test_pessimistic_rollback_with_scan_first() { + use crate::storage::txn::tests::must_pessimistic_locked; + let format_key = |prefix: char, i: usize| format!("{}{:04}", prefix, i).as_bytes().to_vec(); + let k1 = format_key('k', 1); + let k2 = format_key('k', 2); + let start_ts = 10; + let for_update_ts = 10; + for enable_in_memory_lock in [true, false] { + let txn_ext = Arc::new(TxnExt::default()); + let mut storage = TestStorageBuilderApiV1::new(MockLockManager::new()) + .pipelined_pessimistic_lock(enable_in_memory_lock) + .in_memory_pessimistic_lock(enable_in_memory_lock) + .build_for_txn(txn_ext.clone()) + .unwrap(); + + // Basic case, two keys could be rolled back within one pessimistic rollback + // request. + acquire_pessimistic_lock( + &storage, + Key::from_raw(k1.as_slice()), + start_ts, + for_update_ts, + ); + acquire_pessimistic_lock( + &storage, + Key::from_raw(k2.as_slice()), + start_ts, + for_update_ts, + ); + must_pessimistic_locked(&mut storage.engine, k1.as_slice(), start_ts, for_update_ts); + delete_pessimistic_lock_with_scan_first(&storage, start_ts, for_update_ts); + must_unlocked(&mut storage.engine, k1.as_slice()); + must_unlocked(&mut storage.engine, k2.as_slice()); + + // Acquire pessimistic locks for more than 256 keys. + // Only pessimistic locks should be rolled back. + let start_ts = 11; + let for_update_ts = 11; + let num_keys = 400; + let prewrite_primary_key = format_key('k', 1); + for i in 0..num_keys { + let key = format_key('k', i); + if i % 2 == 0 { + acquire_pessimistic_lock( + &storage, + Key::from_raw(key.as_slice()), + start_ts, + for_update_ts, + ); + } else { + prewrite_lock( + &storage, + Key::from_raw(key.as_slice()), + prewrite_primary_key.as_slice(), + b"value", + start_ts, + ); + } + } + { + let pessimistic_locks = txn_ext.pessimistic_locks.read(); + if enable_in_memory_lock { + let k0 = format_key('k', 0); + let lock = pessimistic_locks + .get(&Key::from_raw(k0.as_slice())) + .unwrap(); + assert_eq!( + lock, + &( + PessimisticLock { + primary: Box::new(*b"k0000"), + start_ts: start_ts.into(), + ttl: 3000, + for_update_ts: for_update_ts.into(), + min_commit_ts: (for_update_ts + 1).into(), + last_change: LastChange::NotExist, + is_locked_with_conflict: false, + }, + false + ) + ); + } else { + assert_eq!(pessimistic_locks.len(), 0); + } + } + delete_pessimistic_lock_with_scan_first(&storage, start_ts, for_update_ts); + for i in 0..num_keys { + let key = format_key('k', i); + if i % 2 == 0 { + must_unlocked(&mut storage.engine, key.as_slice()); + } else { + must_locked(&mut storage.engine, key.as_slice(), start_ts); + } + } + } + } } diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index 27484655af6..dc373d147c5 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -12,7 +12,8 @@ use raftstore::store::{LocksStatus, PeerPessimisticLocks}; use tikv_kv::{SnapshotExt, SEEK_BOUND}; use tikv_util::time::Instant; use txn_types::{ - Key, LastChange, Lock, OldValue, PessimisticLock, TimeStamp, Value, Write, WriteRef, WriteType, + Key, LastChange, Lock, OldValue, PessimisticLock, TimeStamp, TxnLockRef, Value, Write, + WriteRef, WriteType, }; use crate::storage::{ @@ -21,7 +22,7 @@ use crate::storage::{ }, mvcc::{ default_not_found_error, - metrics::SCAN_LOCK_READ_TIME_VEC, + metrics::{ScanLockReadTimeSource, SCAN_LOCK_READ_TIME_VEC}, reader::{OverlappedWrite, TxnCommitRecord}, Result, }, @@ -236,7 +237,7 @@ impl MvccReader { } if self.scan_mode.is_some() { - self.create_lock_cursor()?; + self.create_lock_cursor_if_not_exist()?; } let res = if let Some(ref mut cursor) = self.lock_cursor { @@ -278,12 +279,116 @@ impl MvccReader { Ok(()) } - pub fn load_in_memory_pessimisitic_lock_range( + /// Scan all types of locks(pessimitic, prewrite) satisfying `filter` + /// condition from both in-memory pessimitic lock table and the storage + /// within [start_key, end_key) . + pub fn scan_locks( + &mut self, + start_key: Option<&Key>, + end_key: Option<&Key>, + filter: F, + limit: usize, + source: ScanLockReadTimeSource, + ) -> Result<(Vec<(Key, Lock)>, bool)> + where + F: Fn(&Key, TxnLockRef<'_>) -> bool, + { + let (memory_locks, memory_has_remain) = self.load_in_memory_pessimistic_lock_range( + start_key, + end_key, + |k, l| filter(k, l.into()), + limit, + source, + )?; + if memory_locks.is_empty() { + return self.scan_locks_from_storage( + start_key, + end_key, + |k, l| filter(k, l.into()), + limit, + ); + } + + let mut lock_cursor_seeked = false; + let mut storage_iteration_finished = false; + let mut next_pair_from_storage = || -> Result> { + if storage_iteration_finished { + return Ok(None); + } + self.create_lock_cursor_if_not_exist()?; + let cursor = self.lock_cursor.as_mut().unwrap(); + if !lock_cursor_seeked { + let ok = match start_key { + Some(x) => cursor.seek(x, &mut self.statistics.lock)?, + None => cursor.seek_to_first(&mut self.statistics.lock), + }; + if !ok { + storage_iteration_finished = true; + return Ok(None); + } + lock_cursor_seeked = true; + } else { + cursor.next(&mut self.statistics.lock); + } + + while cursor.valid()? { + let key = Key::from_encoded_slice(cursor.key(&mut self.statistics.lock)); + if let Some(end) = end_key { + if key >= *end { + storage_iteration_finished = true; + return Ok(None); + } + } + let lock = Lock::parse(cursor.value(&mut self.statistics.lock))?; + if filter(&key, TxnLockRef::Persisted(&lock)) { + self.statistics.lock.processed_keys += 1; + return Ok(Some((key, lock))); + } + cursor.next(&mut self.statistics.lock); + } + storage_iteration_finished = true; + Ok(None) + }; + + let mut locks = Vec::with_capacity(limit.min(memory_locks.len())); + let mut memory_iter = memory_locks.into_iter(); + let mut memory_pair = memory_iter.next(); + let mut storage_pair = next_pair_from_storage()?; + let has_remain = loop { + match (memory_pair.as_ref(), storage_pair.as_ref()) { + (Some((memory_key, _)), Some((storage_key, _))) => { + if storage_key <= memory_key { + locks.push(storage_pair.take().unwrap()); + storage_pair = next_pair_from_storage()?; + } else { + locks.push(memory_pair.take().unwrap()); + memory_pair = memory_iter.next(); + } + } + (Some(_), None) => { + locks.push(memory_pair.take().unwrap()); + memory_pair = memory_iter.next(); + } + (None, Some(_)) => { + locks.push(storage_pair.take().unwrap()); + storage_pair = next_pair_from_storage()?; + } + (None, None) => break memory_has_remain, + } + if limit > 0 && locks.len() >= limit { + break memory_pair.is_some() || storage_pair.is_some() || memory_has_remain; + } + }; + Ok((locks, has_remain)) + } + + pub fn load_in_memory_pessimistic_lock_range( &self, start_key: Option<&Key>, end_key: Option<&Key>, filter: F, scan_limit: usize, + source: ScanLockReadTimeSource, ) -> Result<(Vec<(Key, Lock)>, bool)> where F: Fn(&Key, &PessimisticLock) -> bool, @@ -302,7 +407,7 @@ impl MvccReader { }; let elapsed = begin_instant.saturating_elapsed(); SCAN_LOCK_READ_TIME_VEC - .resolve_lock + .get(source) .observe(elapsed.as_secs_f64()); res @@ -547,7 +652,7 @@ impl MvccReader { Ok(()) } - fn create_lock_cursor(&mut self) -> Result<()> { + fn create_lock_cursor_if_not_exist(&mut self) -> Result<()> { if self.lock_cursor.is_none() { let cursor = CursorBuilder::new(&self.snapshot, CF_LOCK) .fill_cache(self.fill_cache) @@ -579,13 +684,13 @@ impl MvccReader { Ok(None) } - /// Scan locks that satisfies `filter(lock)` returns true in the key range + /// Scan locks that satisfies `filter(lock)` from storage in the key range /// [start, end). At most `limit` locks will be returned. If `limit` is /// set to `0`, it means unlimited. /// /// The return type is `(locks, has_remain)`. `has_remain` indicates whether /// there MAY be remaining locks that can be scanned. - pub fn scan_locks( + pub fn scan_locks_from_storage( &mut self, start: Option<&Key>, end: Option<&Key>, @@ -593,9 +698,9 @@ impl MvccReader { limit: usize, ) -> Result<(Vec<(Key, Lock)>, bool)> where - F: Fn(&Lock) -> bool, + F: Fn(&Key, &Lock) -> bool, { - self.create_lock_cursor()?; + self.create_lock_cursor_if_not_exist()?; let cursor = self.lock_cursor.as_mut().unwrap(); let ok = match start { Some(x) => cursor.seek(x, &mut self.statistics.lock)?, @@ -616,7 +721,7 @@ impl MvccReader { } let lock = Lock::parse(cursor.value(&mut self.statistics.lock))?; - if filter(&lock) { + if filter(&key, &lock) { locks.push((key, lock)); if limit > 0 && locks.len() == limit { has_remain = true; @@ -1759,10 +1864,10 @@ pub mod tests { let snap = RegionSnapshot::::from_raw(db.clone(), region.clone()); let mut reader = MvccReader::new(snap, None, false); let res = reader - .scan_locks( + .scan_locks_from_storage( start_key.as_ref(), end_key.as_ref(), - |l| l.ts <= 10.into(), + |_, l| l.ts <= 10.into(), limit, ) .unwrap(); diff --git a/src/storage/txn/actions/flashback_to_version.rs b/src/storage/txn/actions/flashback_to_version.rs index 47edac8b513..dddc7cf0d15 100644 --- a/src/storage/txn/actions/flashback_to_version.rs +++ b/src/storage/txn/actions/flashback_to_version.rs @@ -16,11 +16,11 @@ pub fn flashback_to_version_read_lock( end_key: Option<&Key>, flashback_start_ts: TimeStamp, ) -> TxnResult> { - let result = reader.scan_locks( + let result = reader.scan_locks_from_storage( Some(&next_lock_key), end_key, // Skip the `prewrite_lock`. This lock will appear when retrying prepare - |lock| lock.ts != flashback_start_ts, + |_, lock| lock.ts != flashback_start_ts, FLASHBACK_BATCH_SIZE, ); let (key_locks, _) = result?; diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index dabef707e61..eb4026a84d0 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -18,6 +18,7 @@ pub(crate) mod mvcc_by_key; pub(crate) mod mvcc_by_start_ts; pub(crate) mod pause; pub(crate) mod pessimistic_rollback; +mod pessimistic_rollback_read_phase; pub(crate) mod prewrite; pub(crate) mod resolve_lock; pub(crate) mod resolve_lock_lite; @@ -52,6 +53,7 @@ pub use mvcc_by_key::MvccByKey; pub use mvcc_by_start_ts::MvccByStartTs; pub use pause::Pause; pub use pessimistic_rollback::PessimisticRollback; +pub use pessimistic_rollback_read_phase::PessimisticRollbackReadPhase; pub use prewrite::{one_pc_commit, Prewrite, PrewritePessimistic}; pub use resolve_lock::{ResolveLock, RESOLVE_LOCK_BATCH_SIZE}; pub use resolve_lock_lite::ResolveLockLite; @@ -95,6 +97,7 @@ pub enum Command { Cleanup(Cleanup), Rollback(Rollback), PessimisticRollback(PessimisticRollback), + PessimisticRollbackReadPhase(PessimisticRollbackReadPhase), TxnHeartBeat(TxnHeartBeat), CheckTxnStatus(CheckTxnStatus), CheckSecondaryLocks(CheckSecondaryLocks), @@ -274,14 +277,26 @@ impl From for TypedCommand<()> { impl From for TypedCommand>> { fn from(mut req: PessimisticRollbackRequest) -> Self { - let keys = req.get_keys().iter().map(|x| Key::from_raw(x)).collect(); - - PessimisticRollback::new( - keys, - req.get_start_version().into(), - req.get_for_update_ts().into(), - req.take_context(), - ) + // If the keys are empty, try to scan locks with specified `start_ts` and + // `for_update_ts`, and then pass them to a new pessimitic rollback + // command to clean up, just like resolve lock with read phase. + if req.get_keys().is_empty() { + PessimisticRollbackReadPhase::new( + req.get_start_version().into(), + req.get_for_update_ts().into(), + None, + req.take_context(), + ) + } else { + let keys = req.get_keys().iter().map(|x| Key::from_raw(x)).collect(); + PessimisticRollback::new( + keys, + req.get_start_version().into(), + req.get_for_update_ts().into(), + None, + req.take_context(), + ) + } } } @@ -626,6 +641,7 @@ impl Command { Command::Cleanup(t) => t, Command::Rollback(t) => t, Command::PessimisticRollback(t) => t, + Command::PessimisticRollbackReadPhase(t) => t, Command::TxnHeartBeat(t) => t, Command::CheckTxnStatus(t) => t, Command::CheckSecondaryLocks(t) => t, @@ -652,6 +668,7 @@ impl Command { Command::Cleanup(t) => t, Command::Rollback(t) => t, Command::PessimisticRollback(t) => t, + Command::PessimisticRollbackReadPhase(t) => t, Command::TxnHeartBeat(t) => t, Command::CheckTxnStatus(t) => t, Command::CheckSecondaryLocks(t) => t, @@ -675,6 +692,7 @@ impl Command { ) -> Result { match self { Command::ResolveLockReadPhase(t) => t.process_read(snapshot, statistics), + Command::PessimisticRollbackReadPhase(t) => t.process_read(snapshot, statistics), Command::MvccByKey(t) => t.process_read(snapshot, statistics), Command::MvccByStartTs(t) => t.process_read(snapshot, statistics), Command::FlashbackToVersionReadPhase(t) => t.process_read(snapshot, statistics), diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 531eb256c40..551ba931e53 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -11,8 +11,8 @@ use crate::storage::{ mvcc::{MvccTxn, Result as MvccResult, SnapshotReader}, txn::{ commands::{ - Command, CommandExt, ReaderWithStats, ReleasedLocks, ResponsePolicy, TypedCommand, - WriteCommand, WriteContext, WriteResult, + Command, CommandExt, PessimisticRollbackReadPhase, ReaderWithStats, ReleasedLocks, + ResponsePolicy, TypedCommand, WriteCommand, WriteContext, WriteResult, }, Result, }, @@ -32,6 +32,8 @@ command! { /// The transaction timestamp. start_ts: TimeStamp, for_update_ts: TimeStamp, + /// The next key to scan using pessimistic rollback read phase. + scan_key: Option, } } @@ -83,6 +85,21 @@ impl WriteCommand for PessimisticRollback { released_locks.push(released_lock?); } + let pr = if self.scan_key.is_none() { + ProcessResult::MultiRes { results: vec![] } + } else { + let next_cmd = PessimisticRollbackReadPhase { + ctx: ctx.clone(), + deadline: self.deadline, + start_ts: self.start_ts, + for_update_ts: self.for_update_ts, + scan_key: self.scan_key.take(), + }; + ProcessResult::NextCommand { + cmd: Command::PessimisticRollbackReadPhase(next_cmd), + } + }; + let new_acquired_locks = txn.take_new_locks(); let mut write_data = WriteData::from_modifies(txn.into_modifies()); write_data.set_allowed_on_disk_almost_full(); @@ -90,7 +107,7 @@ impl WriteCommand for PessimisticRollback { ctx, to_be_write: write_data, rows, - pr: ProcessResult::MultiRes { results: vec![] }, + pr, lock_info: vec![], released_locks, new_acquired_locks, @@ -139,6 +156,7 @@ pub mod tests { start_ts, for_update_ts, deadline: Deadline::from_now(DEFAULT_EXECUTION_DURATION_LIMIT), + scan_key: None, }; let lock_mgr = MockLockManager::new(); let write_context = WriteContext { diff --git a/src/storage/txn/commands/pessimistic_rollback_read_phase.rs b/src/storage/txn/commands/pessimistic_rollback_read_phase.rs new file mode 100644 index 00000000000..ea0e1bf0729 --- /dev/null +++ b/src/storage/txn/commands/pessimistic_rollback_read_phase.rs @@ -0,0 +1,84 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +// #[PerformanceCriticalPath] +use txn_types::{Key, TimeStamp}; + +use crate::storage::{ + mvcc::{metrics::ScanLockReadTimeSource::pessimistic_rollback, MvccReader}, + txn, + txn::{ + commands::{Command, CommandExt, PessimisticRollback, ReadCommand, TypedCommand}, + sched_pool::tls_collect_keyread_histogram_vec, + ProcessResult, Result, StorageResult, RESOLVE_LOCK_BATCH_SIZE, + }, + ScanMode, Snapshot, Statistics, +}; +command! { + PessimisticRollbackReadPhase: + cmd_ty => Vec>, + display => "kv::pessimistic_rollback_read_phase", (), + content => { + start_ts: TimeStamp, + for_update_ts: TimeStamp, + scan_key: Option, + } +} + +impl CommandExt for PessimisticRollbackReadPhase { + ctx!(); + tag!(pessimistic_rollback_read_phase); + request_type!(KvPessimisticRollback); + property!(readonly); + + fn write_bytes(&self) -> usize { + 0 + } + + gen_lock!(empty); +} + +impl ReadCommand for PessimisticRollbackReadPhase { + fn process_read(self, snapshot: S, statistics: &mut Statistics) -> Result { + let tag = self.tag(); + let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &self.ctx); + let res = reader + .scan_locks( + self.scan_key.as_ref(), + None, + |_, lock| { + lock.get_start_ts() == self.start_ts + && lock.is_pessimistic_lock() + && lock.get_for_update_ts() <= self.for_update_ts + }, + RESOLVE_LOCK_BATCH_SIZE, + pessimistic_rollback, + ) + .map_err(txn::Error::from); + statistics.add(&reader.statistics); + let (locks, has_remain) = res?; + tls_collect_keyread_histogram_vec(tag.get_str(), locks.len() as f64); + + if locks.is_empty() { + Ok(ProcessResult::MultiRes { results: vec![] }) + } else { + let next_scan_key = if has_remain { + // There might be more locks. + locks.last().map(|(k, _lock)| k.clone()) + } else { + // All locks are scanned + None + }; + let next_cmd = PessimisticRollback { + ctx: self.ctx.clone(), + deadline: self.deadline, + keys: locks.into_iter().map(|(key, _)| key).collect(), + start_ts: self.start_ts, + for_update_ts: self.for_update_ts, + scan_key: next_scan_key, + }; + Ok(ProcessResult::NextCommand { + cmd: Command::PessimisticRollback(next_cmd), + }) + } + } +} diff --git a/src/storage/txn/commands/resolve_lock_readphase.rs b/src/storage/txn/commands/resolve_lock_readphase.rs index 588303e0a3d..bdd81283cd3 100644 --- a/src/storage/txn/commands/resolve_lock_readphase.rs +++ b/src/storage/txn/commands/resolve_lock_readphase.rs @@ -48,10 +48,10 @@ impl ReadCommand for ResolveLockReadPhase { let tag = self.tag(); let (ctx, txn_status) = (self.ctx, self.txn_status); let mut reader = MvccReader::new_with_ctx(snapshot, Some(ScanMode::Forward), &ctx); - let result = reader.scan_locks( + let result = reader.scan_locks_from_storage( self.scan_key.as_ref(), None, - |lock| txn_status.contains_key(&lock.ts), + |_, lock| txn_status.contains_key(&lock.ts), RESOLVE_LOCK_BATCH_SIZE, ); statistics.add(&reader.statistics); diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 6d087d894df..89ca750d282 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -2115,6 +2115,7 @@ mod tests { vec![Key::from_raw(b"k")], 10.into(), 20.into(), + None, Context::default(), ) .into(), diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 845ae2bc969..29b1abb01f3 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2922,3 +2922,117 @@ fn test_mvcc_scan_memory_and_cf_locks() { assert!(!scan_lock_resp.has_region_error()); assert_eq!(scan_lock_resp.locks.len(), 0); } + +#[test_case(test_raftstore::must_new_and_configure_cluster)] +#[test_case(test_raftstore_v2::must_new_and_configure_cluster)] +fn test_pessimistic_rollback_with_read_first() { + for enable_in_memory_lock in [true, false] { + let (cluster, leader, ctx) = new_cluster(|cluster| { + cluster.cfg.pessimistic_txn.pipelined = enable_in_memory_lock; + cluster.cfg.pessimistic_txn.in_memory = enable_in_memory_lock; + + // Disable region split. + const MAX_REGION_SIZE: u64 = 1024; + const MAX_SPLIT_KEY: u64 = 1 << 31; + cluster.cfg.coprocessor.region_max_size = Some(ReadableSize::gb(MAX_REGION_SIZE)); + cluster.cfg.coprocessor.region_split_size = Some(ReadableSize::gb(MAX_REGION_SIZE)); + cluster.cfg.coprocessor.region_max_keys = Some(MAX_SPLIT_KEY); + cluster.cfg.coprocessor.region_split_keys = Some(MAX_SPLIT_KEY); + }); + let env = Arc::new(Environment::new(1)); + let leader_store_id = leader.get_store_id(); + let channel = ChannelBuilder::new(env).connect(&cluster.sim.rl().get_addr(leader_store_id)); + let client = TikvClient::new(channel); + + let format_key = |prefix: char, i: usize| format!("{}{:04}", prefix, i).as_bytes().to_vec(); + let (k1, k2, k3) = (format_key('k', 1), format_key('k', 2), format_key('k', 3)); + + // Basic case, two keys could be rolled back within one pessimistic rollback + // request. + let start_ts = 10; + must_kv_pessimistic_lock(&client, ctx.clone(), k1.clone(), start_ts); + must_kv_pessimistic_lock(&client, ctx.clone(), k2, start_ts); + must_lock_cnt( + &client, + ctx.clone(), + start_ts + 10, + k1.as_slice(), + k3.as_slice(), + Op::PessimisticLock, + 2, + 100, + ); + must_kv_pessimistic_rollback_with_scan_first(&client, ctx.clone(), start_ts, start_ts); + must_lock_cnt( + &client, + ctx.clone(), + start_ts + 10, + k1.as_slice(), + k3.as_slice(), + Op::PessimisticLock, + 0, + 100, + ); + + // Acquire pessimistic locks for more than 256(RESOLVE_LOCK_BATCH_SIZE) keys. + let start_ts = 11; + let num_keys = 1000; + let prewrite_primary_key = format_key('k', 1); + let val = b"value"; + for i in 0..num_keys { + let key = format_key('k', i); + if i % 2 == 0 { + must_kv_pessimistic_lock(&client, ctx.clone(), key, start_ts); + } else { + let mut mutation = Mutation::default(); + mutation.set_op(Op::Put); + mutation.set_key(key); + mutation.set_value(val.to_vec()); + must_kv_prewrite( + &client, + ctx.clone(), + vec![mutation], + prewrite_primary_key.clone(), + start_ts, + ); + } + } + + // Pessimistic roll back one key. + must_kv_pessimistic_rollback(&client, ctx.clone(), format_key('k', 0), start_ts, start_ts); + must_lock_cnt( + &client, + ctx.clone(), + start_ts + 10, + format_key('k', 0).as_slice(), + format_key('k', num_keys + 1).as_slice(), + Op::PessimisticLock, + num_keys / 2 - 1, + 0, + ); + + // All the pessimistic locks belonging to the same transaction are pessimistic + // rolled back within one request. + must_kv_pessimistic_rollback_with_scan_first(&client, ctx.clone(), start_ts, start_ts); + must_lock_cnt( + &client, + ctx.clone(), + start_ts + 10, + format_key('k', 0).as_slice(), + format_key('k', num_keys + 1).as_slice(), + Op::PessimisticLock, + 0, + 0, + ); + must_lock_cnt( + &client, + ctx, + start_ts + 10, + format_key('k', 0).as_slice(), + format_key('k', num_keys + 1).as_slice(), + Op::Put, + num_keys / 2, + 0, + ); + } +} From e0d70726b332a33e503c3f2addc66b9794303aea Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Fri, 5 Jan 2024 15:33:21 -0800 Subject: [PATCH 1093/1149] raftstore: fix Display for ReadableOffsetTime (#16300) close tikv/tikv#16299 Use "%H:%M" to display time for `ReadableOffsetTime` Signed-off-by: Alex Feinberg --- components/tikv_util/src/config.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/components/tikv_util/src/config.rs b/components/tikv_util/src/config.rs index 7b3e6cd2469..23fb72603f1 100644 --- a/components/tikv_util/src/config.rs +++ b/components/tikv_util/src/config.rs @@ -625,7 +625,7 @@ fn parse_offset(offset_str: &str) -> Result { impl fmt::Display for ReadableOffsetTime { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} {}", self.0, self.1) + write!(f, "{} {}", self.0.format("%H:%M"), self.1) } } @@ -2022,12 +2022,27 @@ mod tests { ) }); assert_eq!(actual, expected); + let actual = format!("{}", expected) + .parse::() + .unwrap(); + assert_eq!(actual, expected); } + let (encoded, actual) = ( + "23:00 +00:00", + ReadableOffsetTime( + NaiveTime::from_hms_opt(23, 00, 00).unwrap(), + FixedOffset::east_opt(0).unwrap(), + ), + ); + let actual = format!("{}", actual); + let expected = encoded.to_owned(); + assert_eq!(actual, expected); + let time = ReadableOffsetTime( NaiveTime::from_hms_opt(9, 30, 00).unwrap(), FixedOffset::west_opt(0).unwrap(), ); - assert_eq!(format!("{}", time), "09:30:00 +00:00"); + assert_eq!(format!("{}", time), "09:30 +00:00"); let dt = DateTime::parse_from_rfc3339("2023-10-27T09:39:57-00:00").unwrap(); assert!(time.hour_matches(&dt)); assert!(!time.hour_minutes_matches(&dt)); From 7b1e009305f895ce41d250b13b2f04754795b358 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 8 Jan 2024 15:42:25 +0800 Subject: [PATCH 1094/1149] engine_rocks: log SST corruption reason (#16253) ref tikv/tikv#16308 Signed-off-by: Neil Shen Co-authored-by: glorv --- components/engine_rocks/src/event_listener.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/engine_rocks/src/event_listener.rs b/components/engine_rocks/src/event_listener.rs index 03a40d005c8..4ba4061a60f 100644 --- a/components/engine_rocks/src/event_listener.rs +++ b/components/engine_rocks/src/event_listener.rs @@ -130,6 +130,7 @@ impl rocksdb::EventListener for RocksEventListener { if let Some(path) = resolve_sst_filename_from_err(&err) { warn!( "detected rocksdb background error"; + "reason" => r, "sst" => &path, "err" => &err ); From 110d9f907dee89dfda61f65b1f76cae4cf92e84b Mon Sep 17 00:00:00 2001 From: 3pointer Date: Mon, 8 Jan 2024 18:12:24 +0800 Subject: [PATCH 1095/1149] raftstore: real batch ingest sst files in different region (#16298) ref tikv/tikv#16267 raftstore: real batch ingest sst files in different region Signed-off-by: 3pointer --- components/raftstore/src/store/fsm/apply.rs | 49 ++++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 539d1eb67a8..84d2c78c830 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -555,7 +555,8 @@ where delegate.unfinished_write_seqno.push(seqno); } self.prepare_for(delegate); - delegate.last_flush_applied_index = delegate.apply_state.get_applied_index() + delegate.last_flush_applied_index = delegate.apply_state.get_applied_index(); + delegate.has_pending_ssts = false; } self.kv_wb_last_bytes = self.kv_wb().data_size() as u64; self.kv_wb_last_keys = self.kv_wb().count() as u64; @@ -791,7 +792,7 @@ pub fn notify_stale_req_with_msg(term: u64, msg: String, cb: impl ErrorCallback) } /// Checks if a write is needed to be issued before handling the command. -fn should_write_to_engine(cmd: &RaftCmdRequest) -> bool { +fn should_write_to_engine(has_pending_writes: bool, cmd: &RaftCmdRequest) -> bool { if cmd.has_admin_request() { match cmd.get_admin_request().get_cmd_type() { // ComputeHash require an up to date snapshot. @@ -809,7 +810,7 @@ fn should_write_to_engine(cmd: &RaftCmdRequest) -> bool { if req.has_delete_range() { return true; } - if req.has_ingest_sst() { + if req.has_ingest_sst() && has_pending_writes { return true; } } @@ -1043,6 +1044,8 @@ where buckets: Option, unfinished_write_seqno: Vec, + + has_pending_ssts: bool, } impl ApplyDelegate @@ -1077,6 +1080,7 @@ where trace: ApplyMemoryTrace::default(), buckets: None, unfinished_write_seqno: vec![], + has_pending_ssts: false, } } @@ -1227,9 +1231,15 @@ where if apply_ctx.yield_high_latency_operation && has_high_latency_operation(&cmd) { self.priority = Priority::Low; } + if self.has_pending_ssts { + // we are in low priority handler and to avoid overlapped ssts with same region + // just return Yield + return ApplyResult::Yield; + } let mut has_unflushed_data = self.last_flush_applied_index != self.apply_state.get_applied_index(); - if (has_unflushed_data && should_write_to_engine(&cmd) + if (has_unflushed_data + && should_write_to_engine(!apply_ctx.kv_wb().is_empty(), &cmd) || apply_ctx.kv_wb().should_write_to_engine()) && apply_ctx.host.pre_persist(&self.region, false, Some(&cmd)) { @@ -1997,6 +2007,7 @@ where match ctx.importer.validate(sst) { Ok(meta_info) => { ctx.pending_ssts.push(meta_info.clone()); + self.has_pending_ssts = true; ssts.push(meta_info) } Err(e) => { @@ -2005,7 +2016,6 @@ where panic!("{} ingest {:?}: {:?}", self.tag, sst, e); } }; - Ok(()) } } @@ -4646,6 +4656,7 @@ where self.apply_ctx.flush(); for fsm in fsms.iter_mut().flatten() { fsm.delegate.last_flush_applied_index = fsm.delegate.apply_state.get_applied_index(); + fsm.delegate.has_pending_ssts = false; fsm.delegate.update_memory_trace(&mut self.trace_event); } MEMTRACE_APPLYS.trace(mem::take(&mut self.trace_event)); @@ -5193,7 +5204,7 @@ mod tests { req.set_ingest_sst(IngestSstRequest::default()); let mut cmd = RaftCmdRequest::default(); cmd.mut_requests().push(req); - assert_eq!(should_write_to_engine(&cmd), true); + assert_eq!(should_write_to_engine(true, &cmd), true); assert_eq!(should_sync_log(&cmd), true); // Normal command @@ -5207,7 +5218,17 @@ mod tests { let mut req = RaftCmdRequest::default(); req.mut_admin_request() .set_cmd_type(AdminCmdType::ComputeHash); - assert_eq!(should_write_to_engine(&req), true); + assert_eq!(should_write_to_engine(true, &req), true); + assert_eq!(should_write_to_engine(false, &req), true); + + // DeleteRange command + let mut req = Request::default(); + req.set_cmd_type(CmdType::DeleteRange); + req.set_delete_range(DeleteRangeRequest::default()); + let mut cmd = RaftCmdRequest::default(); + cmd.mut_requests().push(req); + assert_eq!(should_write_to_engine(true, &cmd), true); + assert_eq!(should_write_to_engine(false, &cmd), true); // IngestSst command let mut req = Request::default(); @@ -5215,7 +5236,8 @@ mod tests { req.set_ingest_sst(IngestSstRequest::default()); let mut cmd = RaftCmdRequest::default(); cmd.mut_requests().push(req); - assert_eq!(should_write_to_engine(&cmd), true); + assert_eq!(should_write_to_engine(true, &cmd), true); + assert_eq!(should_write_to_engine(false, &cmd), false); } #[test] @@ -6214,7 +6236,7 @@ mod tests { // nomral put command, so the first apple_res.exec_res should be empty. let apply_res = fetch_apply_res(&rx); assert!(apply_res.exec_res.is_empty()); - // The region was rescheduled low-priority becasuee of ingest command, + // The region was rescheduled low-priority because of ingest command, // only put entry has been applied; let apply_res = fetch_apply_res(&rx); assert_eq!(apply_res.applied_term, 3); @@ -6853,9 +6875,12 @@ mod tests { assert!(!resp.get_header().has_error(), "{:?}", resp); } let mut res = fetch_apply_res(&rx); - // There may be one or two ApplyRes which depends on whether these two apply - // msgs are batched together. - if res.apply_state.get_applied_index() == 3 { + // There are five entries [put, ingest, put, ingest, put] in one region. + // so the apply results should be notified at index 2/4. + if res.apply_state.get_applied_index() == 2 { + res = fetch_apply_res(&rx); + } + if res.apply_state.get_applied_index() == 4 { res = fetch_apply_res(&rx); } assert_eq!(res.apply_state.get_applied_index(), 5); From 5f5431a95351f1177dbad96889611a1de197ad2e Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 9 Jan 2024 13:21:23 +0800 Subject: [PATCH 1096/1149] raft_log_engine: update to latest version. (#16294) close tikv/tikv#16324 Update `raft_log_engine` lib to the latest version, to fix some issues, including: - rewrite: optimize the interval of sync when rewriting memtables #347. - Return error instead of panicking if rewriting fails #343. --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5b2437547d..272efad5391 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2747,7 +2747,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#96c40585233f176393213dbd4c04d76259bad8f9" +source = "git+https://github.com/pingcap/kvproto.git#4f28b82c78601a0819f02b1e903603735ec60fe9" dependencies = [ "futures 0.3.15", "grpcio", @@ -2971,9 +2971,9 @@ dependencies = [ [[package]] name = "memmap2" -version = "0.7.0" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180d4b35be83d33392d1d1bfbd2ae1eca7ff5de1a94d3fc87faaa99a069e7cbd" +checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" dependencies = [ "libc 0.2.146", ] @@ -4051,7 +4051,7 @@ dependencies = [ [[package]] name = "raft-engine" version = "0.4.1" -source = "git+https://github.com/tikv/raft-engine.git#fa56f891fdf0b1cb5b7849b7bee3c5dadbb96103" +source = "git+https://github.com/tikv/raft-engine.git#e505d631c8c6d63f7fc63d83ea6e8fb88cf970a5" dependencies = [ "byteorder", "crc32fast", @@ -4065,7 +4065,7 @@ dependencies = [ "libc 0.2.146", "log", "lz4-sys", - "memmap2 0.7.0", + "memmap2 0.9.3", "nix 0.26.2", "num-derive 0.4.0", "num-traits", @@ -7436,7 +7436,7 @@ checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" [[package]] name = "yatp" version = "0.0.1" -source = "git+https://github.com/tikv/yatp.git?branch=master#5572a78702572087cab8ddcdd1fe30e5bf76ae42" +source = "git+https://github.com/tikv/yatp.git?branch=master#793be4d789d4bd15292fe4d06e38063b4ec9d48e" dependencies = [ "crossbeam-deque", "crossbeam-skiplist", From 65308d67289eca893a53ad1bf75423ec041b3409 Mon Sep 17 00:00:00 2001 From: qupeng Date: Tue, 9 Jan 2024 16:16:53 +0800 Subject: [PATCH 1097/1149] engine: calculate table properties correctly for Titan (#16320) close tikv/tikv#16319 Signed-off-by: qupeng --- components/cdc/src/initializer.rs | 76 +++++++++++++++++-- components/engine_rocks/src/properties.rs | 61 ++++++++------- components/engine_rocks/src/ttl_properties.rs | 1 + src/config/mod.rs | 32 ++++++-- src/storage/kv/test_engine_builder.rs | 4 +- 5 files changed, 134 insertions(+), 40 deletions(-) diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 86a4018fffb..6a674c683a5 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -563,11 +563,14 @@ mod tests { use std::{ collections::BTreeMap, fmt::Display, - sync::mpsc::{channel, sync_channel, Receiver, RecvTimeoutError, Sender}, + sync::{ + mpsc::{channel, sync_channel, Receiver, RecvTimeoutError, Sender}, + Arc, + }, time::Duration, }; - use engine_rocks::RocksEngine; + use engine_rocks::{BlobRunMode, RocksEngine}; use engine_traits::{MiscExt, CF_WRITE}; use futures::{executor::block_on, StreamExt}; use kvproto::{ @@ -577,15 +580,19 @@ mod tests { use raftstore::{coprocessor::ObserveHandle, router::CdcRaftRouter, store::RegionSnapshot}; use resolved_ts::TxnLocks; use test_raftstore::MockRaftStoreRouter; - use tikv::storage::{ - kv::Engine, - txn::tests::{ - must_acquire_pessimistic_lock, must_commit, must_prewrite_delete, must_prewrite_put, - must_prewrite_put_with_txn_soucre, + use tikv::{ + config::DbConfig, + storage::{ + kv::Engine, + txn::tests::{ + must_acquire_pessimistic_lock, must_commit, must_prewrite_delete, + must_prewrite_put, must_prewrite_put_with_txn_soucre, + }, + TestEngineBuilder, }, - TestEngineBuilder, }; use tikv_util::{ + config::ReadableSize, memory::MemoryQuota, sys::thread::ThreadBuildWrapper, worker::{LazyWorker, Runnable}, @@ -1081,4 +1088,57 @@ mod tests { worker.stop(); } + + #[test] + fn test_scanner_with_titan() { + let mut cfg = DbConfig::default(); + cfg.titan.enabled = Some(true); + cfg.defaultcf.titan.blob_run_mode = BlobRunMode::Normal; + cfg.defaultcf.titan.min_blob_size = ReadableSize(0); + cfg.writecf.titan.blob_run_mode = BlobRunMode::Normal; + cfg.writecf.titan.min_blob_size = ReadableSize(0); + cfg.lockcf.titan.blob_run_mode = BlobRunMode::Normal; + cfg.lockcf.titan.min_blob_size = ReadableSize(0); + let mut engine = TestEngineBuilder::new().build_with_cfg(&cfg).unwrap(); + + must_prewrite_put(&mut engine, b"zkey", b"value", b"zkey", 100); + must_commit(&mut engine, b"zkey", 100, 110); + for cf in &[CF_WRITE, CF_DEFAULT] { + engine.kv_engine().unwrap().flush_cf(cf, true).unwrap(); + } + must_prewrite_put(&mut engine, b"zkey", b"value", b"zkey", 150); + must_commit(&mut engine, b"zkey", 150, 160); + for cf in &[CF_WRITE, CF_DEFAULT] { + engine.kv_engine().unwrap().flush_cf(cf, true).unwrap(); + } + + let (mut worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + usize::MAX, + usize::MAX, + 1000, + engine.kv_engine(), + ChangeDataRequestKvApi::TiDb, + false, + ); + initializer.checkpoint_ts = 120.into(); + let snap = engine.snapshot(Default::default()).unwrap(); + + let th = pool.spawn(async move { + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + initializer + .async_incremental_scan(snap, Region::default(), memory_quota) + .await + .unwrap(); + }); + + let mut total_entries = 0; + while let Some((event, _)) = block_on(drain.drain().next()) { + if let CdcEvent::Event(e) = event { + total_entries += e.get_entries().get_entries().len(); + } + } + assert_eq!(total_entries, 2); + block_on(th).unwrap(); + worker.stop(); + } } diff --git a/components/engine_rocks/src/properties.rs b/components/engine_rocks/src/properties.rs index a5365532bfc..b9032e53f8f 100644 --- a/components/engine_rocks/src/properties.rs +++ b/components/engine_rocks/src/properties.rs @@ -408,7 +408,10 @@ impl TablePropertiesCollector for MvccPropertiesCollector { // TsFilter filters sst based on max_ts and min_ts during iterating. // To prevent seeing outdated (GC) records, we should consider // RocksDB delete entry type. - if entry_type != DBEntryType::Put && entry_type != DBEntryType::Delete { + if entry_type != DBEntryType::Put + && entry_type != DBEntryType::Delete + && entry_type != DBEntryType::BlobIndex + { return; } @@ -446,37 +449,43 @@ impl TablePropertiesCollector for MvccPropertiesCollector { self.props.max_row_versions = self.row_versions; } - if self.key_mode == KeyMode::Raw { - let decode_raw_value = ApiV2::decode_raw_value(value); - match decode_raw_value { - Ok(raw_value) => { - if raw_value.is_valid(self.current_ts) { - self.props.num_puts += 1; - } else { - self.props.num_deletes += 1; + if entry_type != DBEntryType::BlobIndex { + if self.key_mode == KeyMode::Raw { + let decode_raw_value = ApiV2::decode_raw_value(value); + match decode_raw_value { + Ok(raw_value) => { + if raw_value.is_valid(self.current_ts) { + self.props.num_puts += 1; + } else { + self.props.num_deletes += 1; + } + if let Some(expire_ts) = raw_value.expire_ts { + self.props.ttl.add(expire_ts); + } } - if let Some(expire_ts) = raw_value.expire_ts { - self.props.ttl.add(expire_ts); + Err(_) => { + self.num_errors += 1; } } - Err(_) => { - self.num_errors += 1; + } else { + let write_type = match Write::parse_type(value) { + Ok(v) => v, + Err(_) => { + self.num_errors += 1; + return; + } + }; + + match write_type { + WriteType::Put => self.props.num_puts += 1, + WriteType::Delete => self.props.num_deletes += 1, + _ => {} } } } else { - let write_type = match Write::parse_type(value) { - Ok(v) => v, - Err(_) => { - self.num_errors += 1; - return; - } - }; - - match write_type { - WriteType::Put => self.props.num_puts += 1, - WriteType::Delete => self.props.num_deletes += 1, - _ => {} - } + // NOTE: if titan is enabled, the entry will always be treated as PUT. + // Be careful if you try to enable Titan on CF_WRITE. + self.props.num_puts += 1; } // Add new row. diff --git a/components/engine_rocks/src/ttl_properties.rs b/components/engine_rocks/src/ttl_properties.rs index 8e6021939bc..62731ac1aa4 100644 --- a/components/engine_rocks/src/ttl_properties.rs +++ b/components/engine_rocks/src/ttl_properties.rs @@ -74,6 +74,7 @@ pub struct TtlPropertiesCollector { impl TablePropertiesCollector for TtlPropertiesCollector { fn add(&mut self, key: &[u8], value: &[u8], entry_type: DBEntryType, _: u64, _: u64) { + // DBEntryType::BlobIndex will be skipped because we can't parse the value. if entry_type != DBEntryType::Put { return; } diff --git a/src/config/mod.rs b/src/config/mod.rs index c4f811f63de..0c8c940a1ef 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1382,11 +1382,11 @@ impl Default for DbConfig { impl DbConfig { pub fn optimize_for( &mut self, - engine: EngineType, + storage_config: &StorageConfig, kv_data_exists: bool, is_titan_dir_empty: bool, ) { - match engine { + match storage_config.engine { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); self.max_total_wal_size.get_or_insert(ReadableSize::gb(4)); @@ -1400,7 +1400,7 @@ impl DbConfig { if self.titan.enabled.is_none() { // If the user doesn't specify titan.enabled, we enable it by default for newly // created clusters. - if kv_data_exists && is_titan_dir_empty { + if (kv_data_exists && is_titan_dir_empty) || storage_config.enable_ttl { self.titan.enabled = Some(false); } else { self.titan.enabled = Some(true); @@ -1444,7 +1444,8 @@ impl DbConfig { .get_or_insert(DEFAULT_LOCK_BUFFER_MEMORY_LIMIT); } } - let bg_job_limits = get_background_job_limits(engine, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); + let bg_job_limits = + get_background_job_limits(storage_config.engine, &KVDB_DEFAULT_BACKGROUND_JOB_LIMITS); if self.max_background_jobs == 0 { self.max_background_jobs = bg_job_limits.max_background_jobs as i32; } @@ -3679,7 +3680,7 @@ impl TikvConfig { // Optimize. self.rocksdb - .optimize_for(self.storage.engine, kv_data_exists, is_titan_dir_empty); + .optimize_for(&self.storage, kv_data_exists, is_titan_dir_empty); self.coprocessor .optimize_for(self.storage.engine == EngineType::RaftKv2); self.split @@ -3910,6 +3911,11 @@ impl TikvConfig { self.quota.validate()?; self.causal_ts.validate()?; + // Validate feature TTL with Titan configuration. + if matches!(self.rocksdb.titan.enabled, Some(true)) && self.storage.enable_ttl { + return Err("Titan is unavailable for feature TTL".to_string().into()); + } + Ok(()) } @@ -4893,6 +4899,7 @@ mod tests { // Check api version. { + tikv_cfg.rocksdb.titan.enabled = Some(false); let cases = [ (ApiVersion::V1, ApiVersion::V1, true), (ApiVersion::V1, ApiVersion::V1ttl, false), @@ -6292,6 +6299,21 @@ mod tests { cfg.validate().unwrap_err(); cfg.rocksdb.writecf.format_version = Some(5); cfg.validate().unwrap(); + + let mut valid_cfg = TikvConfig::default(); + valid_cfg.storage.api_version = 2; + valid_cfg.storage.enable_ttl = true; + valid_cfg.rocksdb.titan.enabled = None; + valid_cfg.validate().unwrap(); + + let mut invalid_cfg = TikvConfig::default(); + invalid_cfg.storage.api_version = 2; + invalid_cfg.storage.enable_ttl = true; + invalid_cfg.rocksdb.titan.enabled = Some(true); + assert_eq!( + invalid_cfg.validate().unwrap_err().to_string(), + "Titan is unavailable for feature TTL" + ); } #[test] diff --git a/src/storage/kv/test_engine_builder.rs b/src/storage/kv/test_engine_builder.rs index 23a0bfcd594..30b14d22274 100644 --- a/src/storage/kv/test_engine_builder.rs +++ b/src/storage/kv/test_engine_builder.rs @@ -126,7 +126,9 @@ impl TestEngineBuilder { _ => (*cf, RocksCfOptions::default()), }) .collect(); - let engine = RocksEngine::new(&path, None, cfs_opts, self.io_rate_limiter)?; + let resources = cfg_rocksdb.build_resources(Default::default(), EngineType::RaftKv); + let db_opts = cfg_rocksdb.build_opt(&resources, EngineType::RaftKv); + let engine = RocksEngine::new(&path, Some(db_opts), cfs_opts, self.io_rate_limiter)?; Ok(engine) } } From 4a77d18d055349e77bab20fd6672ae902fd140d3 Mon Sep 17 00:00:00 2001 From: Connor Date: Tue, 9 Jan 2024 20:57:23 +0800 Subject: [PATCH 1098/1149] titan: Fix small blob file can't be gced in fallback mode (#16338) close tikv/tikv#16336 Fix titan small blbo file can't be gced in fallback mode Signed-off-by: Connor1996 --- Cargo.lock | 6 +++--- src/config/mod.rs | 21 +++++++++++---------- tests/integrations/storage/test_titan.rs | 1 - 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 272efad5391..28a87bb578f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2812,7 +2812,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d877018095b44b2933969fe7caf5c3e0cd86be5b" +source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2831,7 +2831,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#d877018095b44b2933969fe7caf5c3e0cd86be5b" +source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" dependencies = [ "bzip2-sys", "cc", @@ -4656,7 +4656,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#d877018095b44b2933969fe7caf5c3e0cd86be5b" +source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" dependencies = [ "libc 0.2.146", "librocksdb_sys", diff --git a/src/config/mod.rs b/src/config/mod.rs index 0c8c940a1ef..f5cb4b633b6 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -247,14 +247,14 @@ const KVDB_DEFAULT_BACKGROUND_JOB_LIMITS: BackgroundJobLimits = BackgroundJobLim max_background_jobs: 9, max_background_flushes: 3, max_sub_compactions: 3, - max_titan_background_gc: 4, + max_titan_background_gc: 1, }; const RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS: BackgroundJobLimits = BackgroundJobLimits { max_background_jobs: 4, max_background_flushes: 1, max_sub_compactions: 2, - max_titan_background_gc: 4, + max_titan_background_gc: 1, }; // `defaults` serves as an upper bound for returning limits. @@ -1858,6 +1858,7 @@ impl Default for RaftDbConfig { let bg_job_limits = get_background_job_limits(EngineType::RaftKv, &RAFTDB_DEFAULT_BACKGROUND_JOB_LIMITS); let titan_config = TitanDbConfig { + enabled: Some(false), max_background_gc: bg_job_limits.max_titan_background_gc as i32, ..Default::default() }; @@ -6471,7 +6472,7 @@ mod tests { max_background_jobs: 2, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 2, + max_titan_background_gc: 1, } ); assert_eq!( @@ -6484,7 +6485,7 @@ mod tests { max_background_jobs: 2, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 2, + max_titan_background_gc: 1, } ); } @@ -6500,7 +6501,7 @@ mod tests { max_background_jobs: 3, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 4, + max_titan_background_gc: 1, } ); assert_eq!( @@ -6513,7 +6514,7 @@ mod tests { max_background_jobs: 2, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 4, + max_titan_background_gc: 1, } ); assert_eq!( @@ -6526,7 +6527,7 @@ mod tests { max_background_jobs: 3, max_background_flushes: 1, max_sub_compactions: 1, - max_titan_background_gc: 4, + max_titan_background_gc: 1, } ); // cpu num = 8 @@ -6540,7 +6541,7 @@ mod tests { max_background_jobs: 7, max_background_flushes: 2, max_sub_compactions: 3, - max_titan_background_gc: 4, + max_titan_background_gc: 1, } ); assert_eq!( @@ -6553,7 +6554,7 @@ mod tests { max_background_jobs: 4, max_background_flushes: 2, max_sub_compactions: 1, - max_titan_background_gc: 4, + max_titan_background_gc: 1, } ); assert_eq!( @@ -6583,7 +6584,7 @@ mod tests { max_background_jobs: 6, max_background_flushes: 3, max_sub_compactions: 2, - max_titan_background_gc: 4, + max_titan_background_gc: 1, } ); assert_eq!( diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 9490b30b2f8..0cd6c631633 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -30,7 +30,6 @@ use tikv_util::{ use txn_types::{Key, Write, WriteType}; #[test] -#[ignore] fn test_turnoff_titan() { let mut cluster = new_node_cluster(0, 3); cluster.cfg.rocksdb.defaultcf.disable_auto_compactions = true; From c041ee3f37ba4c079b6b4be424690b6a9043d68b Mon Sep 17 00:00:00 2001 From: Fred Wulff Date: Tue, 9 Jan 2024 18:10:54 -0800 Subject: [PATCH 1099/1149] tikv-ctl: Don't send compact commands to TiFlash stores (#16190) close tikv/tikv#16189 tikv-ctl compact-cluster now works with clusters that have TiFlash nodes Signed-off-by: Fred Wulff Co-authored-by: tonyxuqqi --- cmd/tikv-ctl/src/main.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 7ab9d9e9e37..74a0a01ca65 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -796,12 +796,18 @@ fn compact_whole_cluster( threads: u32, bottommost: BottommostLevelCompaction, ) { - let stores = pd_client + let all_stores = pd_client .get_all_stores(true) // Exclude tombstone stores. .unwrap_or_else(|e| perror_and_exit("Get all cluster stores from PD failed", e)); + let tikv_stores = all_stores.iter().filter(|s| { + !s.get_labels() + .iter() + .any(|l| l.get_key() == "engine" && l.get_value() == "tiflash") + }); + let mut handles = Vec::new(); - for s in stores { + for s in tikv_stores { let cfg = cfg.clone(); let mgr = Arc::clone(&mgr); let addr = s.address.clone(); From d447120e32741f5de00fbb7c183c788e52871ab1 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 10 Jan 2024 11:48:24 +0800 Subject: [PATCH 1100/1149] txn: try to fix the possible deadlock caused by scan lock (#16342) ref tikv/tikv#16340 Try to fix the possible deadlock caused by scan lock. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/storage/mvcc/reader/reader.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/storage/mvcc/reader/reader.rs b/src/storage/mvcc/reader/reader.rs index dc373d147c5..e982b9e18e5 100644 --- a/src/storage/mvcc/reader/reader.rs +++ b/src/storage/mvcc/reader/reader.rs @@ -395,16 +395,16 @@ impl MvccReader { { if let Some(txn_ext) = self.snapshot.ext().get_txn_ext() { let begin_instant = Instant::now(); - let res = match self.check_term_version_status(&txn_ext.pessimistic_locks.read()) { + let pessimistic_locks_guard = txn_ext.pessimistic_locks.read(); + let res = match self.check_term_version_status(&pessimistic_locks_guard) { Ok(_) => { - // Scan locks within the specified range and filter by max_ts. - Ok(txn_ext - .pessimistic_locks - .read() - .scan_locks(start_key, end_key, filter, scan_limit)) + // Scan locks within the specified range and filter. + Ok(pessimistic_locks_guard.scan_locks(start_key, end_key, filter, scan_limit)) } Err(e) => Err(e), }; + drop(pessimistic_locks_guard); + let elapsed = begin_instant.saturating_elapsed(); SCAN_LOCK_READ_TIME_VEC .get(source) From 7f708a1c5a4188c67e18ef082099928881e587ed Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 10 Jan 2024 13:07:54 +0800 Subject: [PATCH 1101/1149] *: ban unwanted crypto crates (#16352) ref tikv/tikv#16328 To comply with FIPS 140-2, we should not use crypto algorithms implemented in rust, instead we should delegate these operation to OpenSSL, including symmetric encryption, asymmetric encryption, hash functions, key establishment, digital signatures and random number generators. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 1 + components/cloud/aws/src/s3.rs | 2 +- components/raftstore/src/store/fsm/apply.rs | 8 +-- components/raftstore/src/store/fsm/peer.rs | 2 +- deny.toml | 77 +++++++++++++++++++++ scripts/check-log-style | 2 +- scripts/deny | 6 ++ 7 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 deny.toml create mode 100755 scripts/deny diff --git a/Makefile b/Makefile index 45b152bfb8f..2fbbf1308eb 100644 --- a/Makefile +++ b/Makefile @@ -367,6 +367,7 @@ clippy: pre-clippy @./scripts/check-dashboards @./scripts/check-docker-build @./scripts/check-license + @./scripts/deny @./scripts/clippy-all pre-audit: diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index fc5e2a3a638..5962912a0bd 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -330,7 +330,7 @@ async fn try_read_exact( } } -// NOTICE: the openssl fips doesn't support md5, therefore use md5 pakcage to +// NOTICE: the openssl fips doesn't support md5, therefore use md5 package to // hash fn get_content_md5(object_lock_enabled: bool, content: &[u8]) -> Option { object_lock_enabled.then(|| { diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 84d2c78c830..911d1fb23a7 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -2476,8 +2476,8 @@ where "region_id" => self.region_id(), "peer_id" => self.id(), "changes" => ?changes, - "original region" => ?&self.region, - "current region" => ?®ion, + "original_region" => ?&self.region, + "current_region" => ?®ion, ); Ok(region) } @@ -3345,8 +3345,8 @@ where "region_id" => self.region_id(), "peer_id" => self.id(), "switches" => ?switches, - "original region" => ?&self.region, - "current region" => ?®ion, + "original_region" => ?&self.region, + "current_region" => ?®ion, ); let state = if self.pending_remove { diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 0ec0e331be5..e653b4043e6 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6312,7 +6312,7 @@ where .send_extra_message(msg, &mut self.ctx.trans, &peer); debug!( "check peer availability"; - "target peer id" => *peer_id, + "target_peer_id" => *peer_id, ); } None => invalid_peers.push(*peer_id), diff --git a/deny.toml b/deny.toml new file mode 100644 index 00000000000..a37fbc3491f --- /dev/null +++ b/deny.toml @@ -0,0 +1,77 @@ +# To comply with FIPS 140-2, we should not use crypto algorithms implemented in +# rust, instead we should delegate these operation to OpenSSL, including +# symmetric encryption, asymmetric encryption, hash functions, +# key establishment, digital signatures and random number generators. +[bans] +deny = [ + # Hash functions + # We allow md5 for AWS S3 object lock feature which requires + # computting object's md5. + { name = "md5" , wrappers = [ "aws" ] }, + { name = "md-5" }, + { name = "sha1" }, + { name = "sha-1" }, + # We allow sha2 for oauth2 crate, because it does use sha2 in TiKV use case. + { name = "sha2", wrappers = [ "oauth2" ] }, + { name = "sha3" }, + # Symmetric encryption + { name = "aes" }, + { name = "aes-gcm" }, + { name = "aes-gcm-siv" }, + { name = "chacha20poly1305" }, + # Asymmetric encryption + { name = "elliptic-curves" }, + { name = "rsa" }, + # Digital signatures + { name = "dsa" }, + { name = "ecdsa" }, + { name = "ed25519" }, + # Message authentication codes + { name = "hamc" }, + # We prefer the system native TLS or OpenSSL. + { name = "rustls" }, + { name = "ring" }, + # Ban trait crates from RustCrypto. + { name = "aead" }, + { name = "cipher" }, + { name = "digest", wrappers = [ "sha2" ] }, + { name = "password-hash" }, + { name = "signature" }, +] +multiple-versions = "allow" + +[advisories] +vulnerability = "allow" # FIXME: Deny it. +unmaintained = "allow" # FIXME: Deny it. +unsound = "allow" # FIXME: Deny it. +yanked = "allow" # FIXME: Deny it. +notice = "warn" + +[licenses] +unlicensed = "allow" # FIXME: Deny it. +copyleft = "deny" +private = { ignore = true } +allow = [ + "0BSD", + "Apache-2.0", + "BSD-3-Clause", + "CC0-1.0", + "ISC", + "MIT", + "MPL-2.0", + "Zlib", +] +exceptions = [ + # unicode-ident includes data generated from Unicode Character Database + # which is licensed under Unicode-DFS-2016. + # See https://github.com/dtolnay/unicode-ident/pull/4 + { name = "unicode-ident", allow = ["Unicode-DFS-2016"] }, + # FIXME: Remove this crate. + { name = "mnt", allow = ["LGPL-3.0"] }, + { name = "inferno", allow = ["CDDL-1.0"] }, +] + +[sources] +unknown-git = "allow" # FIXME: Deny it. +unknown-registry = "deny" +allow-org = { github = ["tikv"] } diff --git a/scripts/check-log-style b/scripts/check-log-style index b6a1893ac7d..835df10a7a2 100755 --- a/scripts/check-log-style +++ b/scripts/check-log-style @@ -7,7 +7,7 @@ function error_msg() { } if grep -r -n --color=always \ - -E '"[a-zA-Z0-9_ -]*( |-)[a-zA-Z0-9_ -]*" ?=>[\?% ]*[a-zA-Z0-9:\._\(\["]+[,|\)|\n]?' \ + -E '"[a-zA-Z0-9_ -]*( |-)[a-zA-Z0-9_ -]*" ?=>[\?% ]*[\*&a-zA-Z0-9:\._\(\["]+[,|\)|\n]?' \ --include \*.rs \ --exclude-dir target . \ | grep -v -E "config\.rs|tikv_util/src/logger|file_system/src/rate_limiter.rs" ; # ignore files contain kebab-case names. diff --git a/scripts/deny b/scripts/deny new file mode 100755 index 00000000000..cf677b9f1fb --- /dev/null +++ b/scripts/deny @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -euo pipefail + +cargo install cargo-deny 2> /dev/null || echo "Install cargo-deny failed" +cargo deny check --show-stats From 956c9f377d759b45b92531e0eebda1b880a8ecc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Wed, 10 Jan 2024 14:18:23 +0800 Subject: [PATCH 1102/1149] snapshot_backup: enhanced prepare stage (#15946) close tikv/tikv#15739 It contains: - A coprocessor that can fully reject all admin and ingest commands. - A new region-leveled wait apply implementation, which allow us to wait all pending commands to be applied. Signed-off-by: Yu Juncen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 3 +- Cargo.toml | 2 +- components/backup/src/disk_snap.rs | 354 ++++++++++++++++ components/backup/src/endpoint.rs | 9 +- components/backup/src/lib.rs | 1 + components/backup/src/service.rs | 160 ++++--- .../src/operation/disk_snapshot_backup.rs | 37 ++ components/raftstore-v2/src/operation/mod.rs | 2 + components/raftstore-v2/src/router/mod.rs | 1 + .../raftstore/src/coprocessor/dispatcher.rs | 4 + components/raftstore/src/coprocessor/error.rs | 4 +- components/raftstore/src/coprocessor/mod.rs | 13 +- components/raftstore/src/errors.rs | 9 + components/raftstore/src/store/fsm/peer.rs | 57 ++- components/raftstore/src/store/metrics.rs | 50 +++ components/raftstore/src/store/mod.rs | 8 +- components/raftstore/src/store/msg.rs | 12 +- components/raftstore/src/store/peer.rs | 71 +++- .../raftstore/src/store/snapshot_backup.rs | 392 ++++++++++++++++++ .../raftstore/src/store/unsafe_recovery.rs | 49 +-- components/server/src/server.rs | 31 +- components/server/src/server2.rs | 8 +- components/snap_recovery/src/services.rs | 56 ++- components/sst_importer/src/errors.rs | 1 + components/test_backup/Cargo.toml | 1 + components/test_backup/src/disk_snap.rs | 243 +++++++++++ components/test_backup/src/lib.rs | 8 +- .../test_raftstore/src/transport_simulate.rs | 6 + components/test_raftstore/src/util.rs | 1 + components/test_util/src/lib.rs | 19 + components/tikv_util/src/time.rs | 3 +- src/import/sst_service.rs | 11 +- tests/Cargo.toml | 6 +- tests/failpoints/cases/mod.rs | 1 + tests/failpoints/cases/test_disk_snap_br.rs | 42 ++ tests/integrations/backup/disk_snap.rs | 194 +++++++++ tests/integrations/backup/mod.rs | 2 + tests/integrations/import/test_sst_service.rs | 22 +- .../raftstore/test_snap_recovery.rs | 36 +- 39 files changed, 1718 insertions(+), 211 deletions(-) create mode 100644 components/backup/src/disk_snap.rs create mode 100644 components/raftstore-v2/src/operation/disk_snapshot_backup.rs create mode 100644 components/raftstore/src/store/snapshot_backup.rs create mode 100644 components/test_backup/src/disk_snap.rs create mode 100644 tests/failpoints/cases/test_disk_snap_br.rs create mode 100644 tests/integrations/backup/disk_snap.rs diff --git a/Cargo.lock b/Cargo.lock index 28a87bb578f..50a6150890a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2747,7 +2747,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#4f28b82c78601a0819f02b1e903603735ec60fe9" +source = "git+https://github.com/pingcap/kvproto.git#932639606bcf3db9676627d55430e1dd10670570" dependencies = [ "futures 0.3.15", "grpcio", @@ -5726,6 +5726,7 @@ dependencies = [ "grpcio", "kvproto", "protobuf", + "raftstore", "rand 0.8.5", "tempfile", "test_raftstore", diff --git a/Cargo.toml b/Cargo.toml index 9235c4b07ed..a08cd21baf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -355,7 +355,7 @@ server = { path = "components/server" } service = { path = "components/service" } snap_recovery = { path = "components/snap_recovery", default-features = false } sst_importer = { path = "components/sst_importer", default-features = false } -test_backup = { path = "components/test_backup" } +test_backup = { path = "components/test_backup", default-features = false } test_coprocessor = { path = "components/test_coprocessor", default-features = false } example_coprocessor_plugin = { path = "components/test_coprocessor_plugin/example_plugin" } test_pd = { path = "components/test_pd" } diff --git a/components/backup/src/disk_snap.rs b/components/backup/src/disk_snap.rs new file mode 100644 index 00000000000..27c5b2e2b19 --- /dev/null +++ b/components/backup/src/disk_snap.rs @@ -0,0 +1,354 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. +//! This module contains things about disk snapshot. + +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + task::Poll, + time::Duration, +}; + +use futures::future; +use futures_util::{ + future::{BoxFuture, FutureExt}, + sink::SinkExt, + stream::StreamExt, +}; +use grpcio::{RpcStatus, WriteFlags}; +use kvproto::{ + brpb::{ + PrepareSnapshotBackupEventType as PEvnT, PrepareSnapshotBackupRequest as PReq, + PrepareSnapshotBackupRequestType as PReqT, PrepareSnapshotBackupResponse as PResp, + }, + errorpb::{self, StaleCommand}, + metapb::Region, +}; +use raftstore::store::{ + snapshot_backup::{ + AbortReason, PrepareDiskSnapObserver, SnapshotBrHandle, SnapshotBrWaitApplyRequest, + }, + SnapshotBrWaitApplySyncer, +}; +use tikv_util::{sys::thread::ThreadBuildWrapper, warn, Either}; +use tokio::{ + runtime::{Handle, Runtime}, + sync::oneshot, +}; +use tokio_stream::Stream; + +const DEFAULT_RT_THREADS: usize = 2; + +type Result = std::result::Result; + +enum Error { + Uninitialized, + LeaseExpired, + /// Wait apply has been aborted. + /// When the `reason` is `None`, implies the request itself has been + /// canceled (seldom) due to message lost or something. + WaitApplyAborted(Option), + RaftStore(raftstore::Error), +} + +enum HandleErr { + AbortStream(RpcStatus), + SendErrResp(errorpb::Error), +} + +pub struct ResultSink(grpcio::DuplexSink); + +impl From> for ResultSink { + fn from(value: grpcio::DuplexSink) -> Self { + Self(value) + } +} + +impl ResultSink { + async fn send( + mut self, + result: Result, + error_extra_info: impl FnOnce(&mut PResp), + ) -> grpcio::Result { + match result { + // Note: should we batch here? + Ok(item) => self.0.send((item, WriteFlags::default())).await?, + Err(err) => match err.into() { + HandleErr::AbortStream(status) => { + self.0.fail(status.clone()).await?; + return Err(grpcio::Error::RpcFinished(Some(status))); + } + HandleErr::SendErrResp(err) => { + let mut resp = PResp::new(); + error_extra_info(&mut resp); + resp.set_error(err); + self.0.send((resp, WriteFlags::default())).await?; + } + }, + } + Ok(self) + } +} + +impl From for HandleErr { + fn from(value: Error) -> Self { + match value { + Error::Uninitialized => HandleErr::AbortStream(RpcStatus::with_message( + grpcio::RpcStatusCode::UNAVAILABLE, + "coprocessor not initialized".to_owned(), + )), + Error::RaftStore(r) => HandleErr::SendErrResp(errorpb::Error::from(r)), + Error::WaitApplyAborted(reason) => HandleErr::SendErrResp({ + let mut err = errorpb::Error::new(); + err.set_message(format!("wait apply has been aborted, perhaps epoch not match or leadership changed, note = {:?}", reason)); + match reason { + Some(AbortReason::EpochNotMatch(enm)) => err.set_epoch_not_match(enm), + Some(AbortReason::StaleCommand { .. }) => { + err.set_stale_command(StaleCommand::new()) + } + _ => {} + } + err + }), + Error::LeaseExpired => HandleErr::AbortStream(RpcStatus::with_message( + grpcio::RpcStatusCode::FAILED_PRECONDITION, + "the lease has expired, you may not send `wait_apply` because it is no meaning" + .to_string(), + )), + } + } +} + +#[derive(Clone)] +pub struct Env { + pub(crate) handle: SR, + rejector: Arc, + active_stream: Arc, + // Left: a shared tokio runtime. + // Right: a hosted runtime(usually for test cases). + runtime: Either>, +} + +impl Env { + pub fn new( + handle: SR, + rejector: Arc, + runtime: Option, + ) -> Self { + let runtime = match runtime { + None => Either::Right(Self::default_runtime()), + Some(rt) => Either::Left(rt), + }; + Self { + handle, + rejector, + active_stream: Arc::new(AtomicU64::new(0)), + runtime, + } + } + + pub fn active_stream(&self) -> u64 { + self.active_stream.load(Ordering::SeqCst) + } + + pub fn get_async_runtime(&self) -> &Handle { + match &self.runtime { + Either::Left(h) => h, + Either::Right(rt) => rt.handle(), + } + } + + fn check_initialized(&self) -> Result<()> { + if !self.rejector.initialized() { + return Err(Error::Uninitialized); + } + Ok(()) + } + + fn check_rejected(&self) -> Result<()> { + self.check_initialized()?; + if self.rejector.allowed() { + return Err(Error::LeaseExpired); + } + Ok(()) + } + + fn update_lease(&self, lease_dur: Duration) -> Result { + self.check_initialized()?; + let mut event = PResp::new(); + event.set_ty(PEvnT::UpdateLeaseResult); + event.set_last_lease_is_valid(self.rejector.update_lease(lease_dur)); + Ok(event) + } + + fn reset(&self) -> PResp { + let rejected = !self.rejector.allowed(); + self.rejector.reset(); + let mut event = PResp::new(); + event.set_ty(PEvnT::UpdateLeaseResult); + event.set_last_lease_is_valid(rejected); + event + } + + fn default_runtime() -> Arc { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(DEFAULT_RT_THREADS) + .enable_all() + .with_sys_hooks() + .thread_name("snap_br_backup_prepare") + .build() + .unwrap(); + Arc::new(rt) + } +} + +pub struct StreamHandleLoop { + pending_regions: Vec)>>, + env: Env, +} + +impl Drop for StreamHandleLoop { + fn drop(&mut self) { + self.env.active_stream.fetch_sub(1, Ordering::SeqCst); + } +} + +enum StreamHandleEvent { + Req(PReq), + WaitApplyDone(Region, Result<()>), + ConnectionGone(Option), +} + +impl StreamHandleLoop { + pub fn new(env: Env) -> Self { + env.active_stream.fetch_add(1, Ordering::SeqCst); + Self { + env, + pending_regions: vec![], + } + } + + fn async_wait_apply(&mut self, region: &Region) -> BoxFuture<'static, (Region, Result<()>)> { + if let Err(err) = self.env.check_rejected() { + return Box::pin(future::ready((region.clone(), Err(err)))); + } + + let (tx, rx) = oneshot::channel(); + let syncer = SnapshotBrWaitApplySyncer::new(region.id, tx); + let handle = self.env.handle.clone(); + let region = region.clone(); + let epoch = region.get_region_epoch().clone(); + let id = region.get_id(); + let send_res = handle + .send_wait_apply(id, SnapshotBrWaitApplyRequest::strict(syncer, epoch)) + .map_err(Error::RaftStore); + Box::pin( + async move { + send_res?; + rx.await + .map_err(|_| Error::WaitApplyAborted(None)) + .and_then(|report| match report.aborted { + Some(reason) => Err(Error::WaitApplyAborted(Some(reason))), + None => Ok(()), + }) + } + .map(move |res| (region, res)), + ) + } + + async fn next_event( + &mut self, + input: &mut (impl Stream> + Unpin), + ) -> StreamHandleEvent { + let wait_applies = future::poll_fn(|cx| { + let selected = + self.pending_regions + .iter_mut() + .enumerate() + .find_map(|(i, fut)| match fut.poll_unpin(cx) { + Poll::Ready(r) => Some((i, r)), + Poll::Pending => None, + }); + match selected { + Some((i, region)) => { + // We have polled the future (and make sure it has ready) before, it is + // safe to drop this future directly. + let _ = self.pending_regions.swap_remove(i); + region.into() + } + None => Poll::Pending, + } + }); + + tokio::select! { + wres = wait_applies => { + StreamHandleEvent::WaitApplyDone(wres.0, wres.1) + } + req = input.next() => { + match req { + Some(Ok(req)) => StreamHandleEvent::Req(req), + Some(Err(err)) => StreamHandleEvent::ConnectionGone(Some(err)), + None => StreamHandleEvent::ConnectionGone(None) + } + } + } + } + + pub async fn run( + mut self, + mut input: impl Stream> + Unpin, + mut sink: ResultSink, + ) -> grpcio::Result<()> { + loop { + match self.next_event(&mut input).await { + StreamHandleEvent::Req(req) => match req.get_ty() { + PReqT::UpdateLease => { + let lease_dur = Duration::from_secs(req.get_lease_in_seconds()); + sink = sink + .send(self.env.update_lease(lease_dur), |resp| { + resp.set_ty(PEvnT::UpdateLeaseResult); + }) + .await?; + } + PReqT::WaitApply => { + let regions = req.get_regions(); + for region in regions { + let res = self.async_wait_apply(region); + self.pending_regions.push(res); + } + } + PReqT::Finish => { + sink.send(Ok(self.env.reset()), |_| {}) + .await? + .0 + .close() + .await?; + return Ok(()); + } + }, + StreamHandleEvent::WaitApplyDone(region, res) => { + let resp = res.map(|_| { + let mut resp = PResp::new(); + resp.set_region(region.clone()); + resp.set_ty(PEvnT::WaitApplyDone); + resp + }); + sink = sink + .send(resp, |resp| { + resp.set_ty(PEvnT::WaitApplyDone); + resp.set_region(region); + }) + .await?; + } + StreamHandleEvent::ConnectionGone(err) => { + warn!("the client has gone, aborting loop"; "err" => ?err); + return match err { + None => Ok(()), + Some(err) => Err(err), + }; + } + } + } + } +} diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index dabf9433b34..5a0a8bca141 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -43,7 +43,7 @@ use tikv_util::{ warn, worker::Runnable, }; -use tokio::runtime::Runtime; +use tokio::runtime::{Handle, Runtime}; use txn_types::{Key, Lock, TimeStamp}; use crate::{ @@ -1154,6 +1154,13 @@ impl Endpoint { )); } } + + /// Get the internal handle of the io thread pool used by the backup + /// endpoint. This is mainly shared for disk snapshot backup (so they + /// don't need to spawn on the gRPC pool.) + pub fn io_pool_handle(&self) -> &Handle { + self.io_pool.handle() + } } impl Runnable for Endpoint { diff --git a/components/backup/src/lib.rs b/components/backup/src/lib.rs index bf333424603..30345665369 100644 --- a/components/backup/src/lib.rs +++ b/components/backup/src/lib.rs @@ -5,6 +5,7 @@ #[allow(unused_extern_crates)] extern crate tikv_alloc; +pub mod disk_snap; mod endpoint; mod errors; mod metrics; diff --git a/components/backup/src/service.rs b/components/backup/src/service.rs index 8420b7ded9c..04d996944a4 100644 --- a/components/backup/src/service.rs +++ b/components/backup/src/service.rs @@ -2,87 +2,81 @@ use std::sync::atomic::*; -use engine_traits::{KvEngine, RaftEngine}; use futures::{channel::mpsc, FutureExt, SinkExt, StreamExt, TryFutureExt}; use grpcio::{self, *}; use kvproto::brpb::*; -use raftstore::store::{ - fsm::store::RaftRouter, - msg::{PeerMsg, SignificantMsg}, -}; -use tikv_util::{error, info, worker::*}; +use raftstore::store::snapshot_backup::SnapshotBrHandle; +use tikv_util::{error, info, warn, worker::*}; use super::Task; +use crate::disk_snap::{self, StreamHandleLoop}; /// Service handles the RPC messages for the `Backup` service. #[derive(Clone)] -pub struct Service { +pub struct Service { scheduler: Scheduler, - router: Option>, + snap_br_env: disk_snap::Env, } -impl Service +impl Service where - EK: KvEngine, - ER: RaftEngine, + H: SnapshotBrHandle, { - // Create a new backup service without router, this used for raftstore v2. - // because we don't have RaftStoreRouter any more. - pub fn new(scheduler: Scheduler) -> Self { + /// Create a new backup service. + pub fn new(scheduler: Scheduler, env: disk_snap::Env) -> Self { Service { scheduler, - router: None, - } - } - - // Create a new backup service with router, this used for raftstore v1. - pub fn with_router(scheduler: Scheduler, router: RaftRouter) -> Self { - Service { - scheduler, - router: Some(router), + snap_br_env: env, } } } -impl Backup for Service +impl Backup for Service where - EK: KvEngine, - ER: RaftEngine, + H: SnapshotBrHandle + 'static, { + /// Check a region whether there is pending admin requests(including pending + /// merging). + /// + /// In older versions of disk snapshot backup, this will be called after we + /// paused all scheduler. + /// + /// This is kept for compatibility with previous versions. fn check_pending_admin_op( &mut self, ctx: RpcContext<'_>, _req: CheckAdminRequest, mut sink: ServerStreamingSink, ) { - let (tx, rx) = mpsc::unbounded(); - match &self.router { - Some(router) => { - router.broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::CheckPendingAdmin(tx.clone())) - }); - let send_task = async move { - let mut s = rx.map(|resp| Ok((resp, WriteFlags::default()))); - sink.send_all(&mut s).await?; - sink.close().await?; - Ok(()) - } - .map(|res: Result<()>| match res { - Ok(_) => { - info!("check admin closed"); - } - Err(e) => { - error!("check admin canceled"; "error" => ?e); - } - }); - ctx.spawn(send_task); + let handle = self.snap_br_env.handle.clone(); + let tokio_handle = self.snap_br_env.get_async_runtime().clone(); + let peer = ctx.peer(); + let task = async move { + let (tx, rx) = mpsc::unbounded(); + if let Err(err) = handle.broadcast_check_pending_admin(tx) { + return sink + .fail(RpcStatus::with_message( + RpcStatusCode::INTERNAL, + format!("{err}"), + )) + .await; } - None => { - // check pending admin reqeust is used for EBS Backup. - // for raftstore v2. we don't need it for now. so just return unimplemented - unimplemented_call!(ctx, sink) + sink.send_all(&mut rx.map(|resp| Ok((resp, WriteFlags::default())))) + .await?; + sink.close().await?; + Ok(()) + }; + + tokio_handle.spawn(async move { + match task.await { + Err(err) => { + warn!("check admin canceled"; "peer" => %peer, "err" => %err); + } + Ok(()) => { + info!("check admin closed"; "peer" => %peer); + } } - } + }); } fn backup( @@ -137,25 +131,81 @@ where ctx.spawn(send_task); } + + /// The new method for preparing a disk snapshot backup. + /// Generally there will be some steps for the client to do: + /// 1. Establish a `prepare_snapshot_backup` connection. + /// 2. Send a initial `UpdateLease`. And we should update the lease + /// periodically. + /// 3. Send `WaitApply` to each leader peer in this store. + /// 4. Once `WaitApply` for all regions have done, we can take disk + /// snapshot. + /// 5. Once all snapshots have been taken, send `Finalize` to stop. + fn prepare_snapshot_backup( + &mut self, + ctx: grpcio::RpcContext<'_>, + stream: grpcio::RequestStream, + sink: grpcio::DuplexSink, + ) { + let l = StreamHandleLoop::new(self.snap_br_env.clone()); + // Note: should we disconnect here once there are more than one stream...? + // Generally once two streams enter here, one may exit + info!("A new prepare snapshot backup stream created!"; + "peer" => %ctx.peer(), + "stream_count" => %self.snap_br_env.active_stream(), + ); + self.snap_br_env.get_async_runtime().spawn(async move { + if let Err(err) = l.run(stream, sink.into()).await { + warn!("stream closed; perhaps a problem cannot be retried happens"; "reason" => ?err); + } + }); + } } #[cfg(test)] mod tests { use std::{sync::Arc, time::Duration}; - use engine_rocks::RocksEngine; use external_storage::make_local_backend; use tikv::storage::txn::tests::{must_commit, must_prewrite_put}; use tikv_util::worker::{dummy_scheduler, ReceiverWrapper}; use txn_types::TimeStamp; use super::*; - use crate::endpoint::tests::*; + use crate::{disk_snap::Env, endpoint::tests::*}; + + #[derive(Clone)] + struct PanicHandle; + + impl SnapshotBrHandle for PanicHandle { + fn send_wait_apply( + &self, + _region: u64, + _req: raftstore::store::snapshot_backup::SnapshotBrWaitApplyRequest, + ) -> raftstore::Result<()> { + panic!("this case shouldn't call this!") + } + + fn broadcast_wait_apply( + &self, + _req: raftstore::store::snapshot_backup::SnapshotBrWaitApplyRequest, + ) -> raftstore::Result<()> { + panic!("this case shouldn't call this!") + } + + fn broadcast_check_pending_admin( + &self, + _tx: mpsc::UnboundedSender, + ) -> raftstore::Result<()> { + panic!("this case shouldn't call this!") + } + } fn new_rpc_suite() -> (Server, BackupClient, ReceiverWrapper) { let env = Arc::new(EnvBuilder::new().build()); let (scheduler, rx) = dummy_scheduler(); - let backup_service = super::Service::::new(scheduler); + let backup_service = + super::Service::new(scheduler, Env::new(PanicHandle, Default::default(), None)); let builder = ServerBuilder::new(env.clone()).register_service(create_backup(backup_service)); let mut server = builder.bind("127.0.0.1", 0).build().unwrap(); diff --git a/components/raftstore-v2/src/operation/disk_snapshot_backup.rs b/components/raftstore-v2/src/operation/disk_snapshot_backup.rs new file mode 100644 index 00000000000..1e033248b23 --- /dev/null +++ b/components/raftstore-v2/src/operation/disk_snapshot_backup.rs @@ -0,0 +1,37 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use futures::channel::mpsc::UnboundedSender; +use kvproto::brpb::CheckAdminResponse; +use raftstore::store::snapshot_backup::{SnapshotBrHandle, SnapshotBrWaitApplyRequest}; +use tikv_util::box_err; + +const REASON: &str = "Raftstore V2 doesn't support snapshot backup yet."; + +#[derive(Clone, Copy)] +pub struct UnimplementedHandle; + +impl SnapshotBrHandle for UnimplementedHandle { + fn send_wait_apply(&self, _region: u64, _req: SnapshotBrWaitApplyRequest) -> crate::Result<()> { + Err(crate::Error::Other(box_err!( + "send_wait_apply not implemented; note: {}", + REASON + ))) + } + + fn broadcast_wait_apply(&self, _req: SnapshotBrWaitApplyRequest) -> crate::Result<()> { + Err(crate::Error::Other(box_err!( + "broadcast_wait_apply not implemented; note: {}", + REASON + ))) + } + + fn broadcast_check_pending_admin( + &self, + _tx: UnboundedSender, + ) -> crate::Result<()> { + Err(crate::Error::Other(box_err!( + "broadcast_check_pending_admin not implemented; note: {}", + REASON + ))) + } +} diff --git a/components/raftstore-v2/src/operation/mod.rs b/components/raftstore-v2/src/operation/mod.rs index 24d025c0a4d..df82f1abfe9 100644 --- a/components/raftstore-v2/src/operation/mod.rs +++ b/components/raftstore-v2/src/operation/mod.rs @@ -2,6 +2,7 @@ mod bucket; mod command; +mod disk_snapshot_backup; mod life; mod misc; mod pd; @@ -17,6 +18,7 @@ pub use command::{ SplitFlowControl, SplitPendingAppend, MERGE_IN_PROGRESS_PREFIX, MERGE_SOURCE_PREFIX, SPLIT_PREFIX, }; +pub use disk_snapshot_backup::UnimplementedHandle as DiskSnapBackupHandle; pub use life::{AbnormalPeerContext, DestroyProgress, GcPeerContext}; pub use ready::{ write_initial_states, ApplyTrace, AsyncWriter, DataTrace, GenSnapTask, ReplayWatch, SnapState, diff --git a/components/raftstore-v2/src/router/mod.rs b/components/raftstore-v2/src/router/mod.rs index 83a2497b331..d63e1abc733 100644 --- a/components/raftstore-v2/src/router/mod.rs +++ b/components/raftstore-v2/src/router/mod.rs @@ -20,3 +20,4 @@ pub use self::{ DebugInfoSubscriber, QueryResChannel, QueryResult, ReadResponse, }, }; +pub use super::operation::DiskSnapBackupHandle; diff --git a/components/raftstore/src/coprocessor/dispatcher.rs b/components/raftstore/src/coprocessor/dispatcher.rs index d007c7102bf..d1e7bb51dd6 100644 --- a/components/raftstore/src/coprocessor/dispatcher.rs +++ b/components/raftstore/src/coprocessor/dispatcher.rs @@ -669,6 +669,10 @@ impl CoprocessorHost { ); } + pub fn pre_transfer_leader(&self, r: &Region, tr: &TransferLeaderRequest) -> Result<()> { + try_loop_ob!(r, &self.registry.admin_observers, pre_transfer_leader, tr) + } + pub fn post_apply_snapshot( &self, region: &Region, diff --git a/components/raftstore/src/coprocessor/error.rs b/components/raftstore/src/coprocessor/error.rs index 233c7c4197a..d979cac98dd 100644 --- a/components/raftstore/src/coprocessor/error.rs +++ b/components/raftstore/src/coprocessor/error.rs @@ -1,12 +1,14 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use std::{error::Error as StdError, result::Result as StdResult}; +use std::{error::Error as StdError, result::Result as StdResult, time::Duration}; use error_code::{self, ErrorCode, ErrorCodeExt}; use thiserror::Error; #[derive(Debug, Error)] pub enum Error { + #[error("required retry after {after:?}, hint: {reason:?}")] + RequireDelay { after: Duration, reason: String }, #[error("{0}")] Other(#[from] Box), } diff --git a/components/raftstore/src/coprocessor/mod.rs b/components/raftstore/src/coprocessor/mod.rs index 0592e23200b..2e05d01f905 100644 --- a/components/raftstore/src/coprocessor/mod.rs +++ b/components/raftstore/src/coprocessor/mod.rs @@ -13,7 +13,10 @@ use engine_traits::{CfName, SstMetaInfo}; use kvproto::{ metapb::Region, pdpb::CheckPolicy, - raft_cmdpb::{AdminRequest, AdminResponse, RaftCmdRequest, RaftCmdResponse, Request}, + raft_cmdpb::{ + AdminRequest, AdminResponse, RaftCmdRequest, RaftCmdResponse, Request, + TransferLeaderRequest, + }, raft_serverpb::RaftApplyState, }; use raft::{eraftpb, StateRole}; @@ -130,6 +133,14 @@ pub trait AdminObserver: Coprocessor { ) -> bool { false } + + fn pre_transfer_leader( + &self, + _ctx: &mut ObserverContext<'_>, + _tr: &TransferLeaderRequest, + ) -> Result<()> { + Ok(()) + } } pub trait QueryObserver: Coprocessor { diff --git a/components/raftstore/src/errors.rs b/components/raftstore/src/errors.rs index f55ae2ed2bf..fce8eb2ef16 100644 --- a/components/raftstore/src/errors.rs +++ b/components/raftstore/src/errors.rs @@ -293,6 +293,15 @@ impl From for errorpb::Error { Error::DeadlineExceeded => { set_deadline_exceeded_busy_error(&mut errorpb); } + Error::Coprocessor(CopError::RequireDelay { + after, + reason: hint, + }) => { + let mut e = errorpb::ServerIsBusy::new(); + e.set_backoff_ms(after.as_millis() as _); + e.set_reason(hint); + errorpb.set_server_is_busy(e); + } _ => {} }; diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index e653b4043e6..a871788fe89 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -89,15 +89,14 @@ use crate::{ TRANSFER_LEADER_COMMAND_REPLY_CTX, }, region_meta::RegionMeta, + snapshot_backup::{AbortReason, SnapshotBrState, SnapshotBrWaitApplyRequest}, transport::Transport, unsafe_recovery::{ - exit_joint_request, ForceLeaderState, SnapshotRecoveryState, - SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, + exit_joint_request, ForceLeaderState, UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, }, - util, - util::{KeysInfoFormatter, LeaseState}, + util::{self, compare_region_epoch, KeysInfoFormatter, LeaseState}, worker::{ Bucket, BucketRange, CleanupTask, ConsistencyCheckTask, GcSnapshotTask, RaftlogGcTask, ReadDelegate, ReadProgress, RegionTask, SplitCheckTask, @@ -955,7 +954,7 @@ where // func be invoked firstly after assigned leader by BR, wait all leader apply to // last log index func be invoked secondly wait follower apply to last // index, however the second call is broadcast, it may improve in future - fn on_snapshot_recovery_wait_apply(&mut self, syncer: SnapshotRecoveryWaitApplySyncer) { + fn on_snapshot_br_wait_apply(&mut self, req: SnapshotBrWaitApplyRequest) { if let Some(state) = &self.fsm.peer.snapshot_recovery_state { warn!( "can't wait apply, another recovery in progress"; @@ -963,20 +962,47 @@ where "peer_id" => self.fsm.peer_id(), "state" => ?state, ); - syncer.abort(); + req.syncer.abort(AbortReason::Duplicated); return; } let target_index = self.fsm.peer.raft_group.raft.raft_log.last_index(); + let applied_index = self.fsm.peer.raft_group.raft.raft_log.applied; + let term = self.fsm.peer.raft_group.raft.term; + if let Some(e) = &req.expected_epoch { + if let Err(err) = compare_region_epoch(e, self.region(), true, true, true) { + warn!("epoch not match for wait apply, aborting."; "err" => %err, + "peer" => self.fsm.peer.peer_id(), + "region" => self.fsm.peer.region().get_id()); + let mut pberr = errorpb::Error::from(err); + req.syncer + .abort(AbortReason::EpochNotMatch(pberr.take_epoch_not_match())); + return; + } + } + + // trivial case: no need to wait apply -- already the latest. + // Return directly for avoiding to print tons of logs. + if target_index == applied_index { + debug!( + "skip trivial case of waiting apply."; + "region_id" => self.region_id(), + "peer_id" => self.fsm.peer_id(), + "target_index" => target_index, + "applied_index" => applied_index, + ); + SNAP_BR_WAIT_APPLY_EVENT.trivial.inc(); + return; + } // during the snapshot recovery, broadcast waitapply, some peer may stale if !self.fsm.peer.is_leader() { info!( - "snapshot follower recovery started"; + "snapshot follower wait apply started"; "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), "target_index" => target_index, - "applied_index" => self.fsm.peer.raft_group.raft.raft_log.applied, + "applied_index" => applied_index, "pending_remove" => self.fsm.peer.pending_remove, "voter" => self.fsm.peer.raft_group.raft.vote, ); @@ -986,7 +1012,8 @@ where // case#2 if peer is suppose to remove if self.fsm.peer.raft_group.raft.vote == 0 || self.fsm.peer.pending_remove { info!( - "this peer is never vote before or pending remove, it should be skip to wait apply" + "this peer is never vote before or pending remove, it should be skip to wait apply"; + "region" => %self.region_id(), ); return; } @@ -996,13 +1023,15 @@ where "region_id" => self.region_id(), "peer_id" => self.fsm.peer_id(), "target_index" => target_index, - "applied_index" => self.fsm.peer.raft_group.raft.raft_log.applied, + "applied_index" => applied_index, ); } + SNAP_BR_WAIT_APPLY_EVENT.accepted.inc(); - self.fsm.peer.snapshot_recovery_state = Some(SnapshotRecoveryState::WaitLogApplyToLast { + self.fsm.peer.snapshot_recovery_state = Some(SnapshotBrState::WaitLogApplyToLast { target_index, - syncer, + valid_for_term: req.abort_when_term_change.then_some(term), + syncer: req.syncer, }); self.fsm .peer @@ -1509,9 +1538,7 @@ where self.on_unsafe_recovery_fill_out_report(syncer) } // for snapshot recovery (safe recovery) - SignificantMsg::SnapshotRecoveryWaitApply(syncer) => { - self.on_snapshot_recovery_wait_apply(syncer) - } + SignificantMsg::SnapshotBrWaitApply(syncer) => self.on_snapshot_br_wait_apply(syncer), SignificantMsg::CheckPendingAdmin(ch) => self.on_check_pending_admin(ch), } } diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index 911cf4646a4..ccc28306059 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -261,6 +261,31 @@ make_static_metric! { unable_to_split_cpu_top, } + pub label_enum SnapshotBrWaitApplyEventType { + sent, + trivial, + accepted, + term_not_match, + epoch_not_match, + duplicated, + finished, + } + + pub struct SnapshotBrWaitApplyEvent : IntCounter { + "event" => SnapshotBrWaitApplyEventType + } + + pub label_enum SnapshotBrLeaseEventType { + create, + renew, + expired, + reset, + } + + pub struct SnapshotBrLeaseEvent : IntCounter { + "event" => SnapshotBrLeaseEventType + } + pub struct HibernatedPeerStateGauge: IntGauge { "state" => { awaken, @@ -897,4 +922,29 @@ lazy_static! { "tikv_raftstore_peer_in_flashback_state", "Total number of peers in the flashback state" ).unwrap(); + + pub static ref SNAP_BR_SUSPEND_COMMAND_TYPE: IntCounterVec = register_int_counter_vec!( + "tikv_raftstore_snap_br_suspend_command_type", + "The statistic of rejecting some admin commands being proposed.", + &["type"] + ).unwrap(); + + pub static ref SNAP_BR_WAIT_APPLY_EVENT: SnapshotBrWaitApplyEvent = register_static_int_counter_vec!( + SnapshotBrWaitApplyEvent, + "tikv_raftstore_snap_br_wait_apply_event", + "The events of wait apply issued by snapshot br.", + &["event"] + ).unwrap(); + + pub static ref SNAP_BR_SUSPEND_COMMAND_LEASE_UNTIL: IntGauge = register_int_gauge!( + "tikv_raftstore_snap_br_suspend_command_lease_until", + "The lease that snapshot br holds of rejecting some type of commands. (In unix timestamp.)" + ).unwrap(); + + pub static ref SNAP_BR_LEASE_EVENT: SnapshotBrLeaseEvent = register_static_int_counter_vec!( + SnapshotBrLeaseEvent, + "tikv_raftstore_snap_br_lease_event", + "The events of the lease to denying new admin commands being proposed by snapshot br.", + &["event"] + ).unwrap(); } diff --git a/components/raftstore/src/store/mod.rs b/components/raftstore/src/store/mod.rs index 123289c2057..971c9038594 100644 --- a/components/raftstore/src/store/mod.rs +++ b/components/raftstore/src/store/mod.rs @@ -11,6 +11,7 @@ pub mod msg; mod peer; mod read_queue; pub mod region_meta; +pub mod snapshot_backup; pub mod transport; #[macro_use] pub mod util; @@ -74,13 +75,14 @@ pub use self::{ ApplyOptions, CfFile, Error as SnapError, SnapEntry, SnapKey, SnapManager, SnapManagerBuilder, Snapshot, SnapshotStatistics, TabletSnapKey, TabletSnapManager, }, + snapshot_backup::SnapshotBrWaitApplySyncer, transport::{CasualRouter, ProposalRouter, SignificantRouter, StoreRouter, Transport}, txn_ext::{LocksStatus, PeerPessimisticLocks, PessimisticLockPair, TxnExt}, unsafe_recovery::{ demote_failed_voters_request, exit_joint_request, ForceLeaderState, - SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, - UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, - UnsafeRecoveryState, UnsafeRecoveryWaitApplySyncer, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, UnsafeRecoveryState, + UnsafeRecoveryWaitApplySyncer, }, util::{RegionReadProgress, RegionReadProgressRegistry}, worker::{ diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index a26a698073d..0e52edc5012 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -26,14 +26,16 @@ use smallvec::{smallvec, SmallVec}; use tikv_util::{deadline::Deadline, escape, memory::HeapSize, time::Instant}; use tracker::{get_tls_tracker_token, TrackerToken}; -use super::{local_metrics::TimeTracker, region_meta::RegionMeta, FetchedLogs, RegionSnapshot}; +use super::{ + local_metrics::TimeTracker, region_meta::RegionMeta, + snapshot_backup::SnapshotBrWaitApplyRequest, FetchedLogs, RegionSnapshot, +}; use crate::store::{ fsm::apply::{CatchUpLogs, ChangeObserver, TaskRes as ApplyTaskRes}, metrics::RaftEventDurationType, unsafe_recovery::{ - SnapshotRecoveryWaitApplySyncer, UnsafeRecoveryExecutePlanSyncer, - UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, - UnsafeRecoveryWaitApplySyncer, + UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, }, util::{KeysInfoFormatter, LatencyInspector}, worker::{Bucket, BucketRange}, @@ -546,7 +548,7 @@ where UnsafeRecoveryDestroy(UnsafeRecoveryExecutePlanSyncer), UnsafeRecoveryWaitApply(UnsafeRecoveryWaitApplySyncer), UnsafeRecoveryFillOutReport(UnsafeRecoveryFillOutReportSyncer), - SnapshotRecoveryWaitApply(SnapshotRecoveryWaitApplySyncer), + SnapshotBrWaitApply(SnapshotBrWaitApplyRequest), CheckPendingAdmin(UnboundedSender), } diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index dd8384e2f66..b9a3a491563 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -101,8 +101,9 @@ use crate::{ memory::{needs_evict_entry_cache, MEMTRACE_RAFT_ENTRIES}, msg::{CasualMessage, ErrorCallback, RaftCommand}, peer_storage::HandleSnapshotResult, + snapshot_backup::{AbortReason, SnapshotBrState}, txn_ext::LocksStatus, - unsafe_recovery::{ForceLeaderState, SnapshotRecoveryState, UnsafeRecoveryState}, + unsafe_recovery::{ForceLeaderState, UnsafeRecoveryState}, util::{admin_cmd_epoch_lookup, RegionReadProgress}, worker::{ CleanupTask, CompactTask, HeartbeatTask, RaftlogGcTask, ReadDelegate, ReadExecutor, @@ -888,7 +889,7 @@ where /// lead_transferee if this peer(leader) is in a leadership transferring. pub lead_transferee: u64, pub unsafe_recovery_state: Option, - pub snapshot_recovery_state: Option, + pub snapshot_recovery_state: Option, last_record_safe_point: u64, } @@ -3640,7 +3641,7 @@ where self.check_normal_proposal_with_disk_full_opt(ctx, disk_full_opt) .and_then(|_| self.propose_normal(ctx, req)) } - Ok(RequestPolicy::ProposeConfChange) => self.propose_conf_change(ctx, &req), + Ok(RequestPolicy::ProposeConfChange) => self.propose_conf_change(ctx, req), Err(e) => Err(e), }; fail_point!("after_propose"); @@ -4631,9 +4632,23 @@ where req: RaftCmdRequest, cb: Callback, ) -> bool { + let transfer_leader = get_transfer_leader_cmd(&req).unwrap(); + if let Err(err) = ctx + .coprocessor_host + .pre_transfer_leader(self.region(), transfer_leader) + { + warn!("Coprocessor rejected transfer leader."; "err" => ?err, + "region_id" => self.region_id, + "peer_id" => self.peer.get_id(), + "transferee" => transfer_leader.get_peer().get_id()); + let mut resp = RaftCmdResponse::new(); + *resp.mut_header().mut_error() = Error::from(err).into(); + cb.invoke_with_response(resp); + return false; + } + ctx.raft_metrics.propose.transfer_leader.inc(); - let transfer_leader = get_transfer_leader_cmd(&req).unwrap(); let prs = self.raft_group.raft.prs(); let (_, peers) = transfer_leader @@ -4686,7 +4701,7 @@ where fn propose_conf_change( &mut self, ctx: &mut PollContext, - req: &RaftCmdRequest, + mut req: RaftCmdRequest, ) -> Result> { if self.pending_merge_state.is_some() { return Err(Error::ProposalInMergingMode(self.region_id)); @@ -4714,7 +4729,24 @@ where self.term() )); } - if let Some(index) = self.cmd_epoch_checker.propose_check_epoch(req, self.term()) { + + if let Err(err) = ctx.coprocessor_host.pre_propose(self.region(), &mut req) { + warn!("Coprocessor rejected proposing conf change."; + "err" => ?err, + "region_id" => self.region_id, + "peer_id" => self.peer.get_id(), + ); + return Err(box_err!( + "{} rejected by coprocessor(reason = {})", + self.tag, + err + )); + } + + if let Some(index) = self + .cmd_epoch_checker + .propose_check_epoch(&req, self.term()) + { return Ok(Either::Right(index)); } @@ -5159,10 +5191,31 @@ where } pub fn snapshot_recovery_maybe_finish_wait_apply(&mut self, force: bool) { - if let Some(SnapshotRecoveryState::WaitLogApplyToLast { target_index, .. }) = - &self.snapshot_recovery_state + if let Some(SnapshotBrState::WaitLogApplyToLast { + target_index, + valid_for_term, + .. + }) = &self.snapshot_recovery_state { - if self.raft_group.raft.term != self.raft_group.raft.raft_log.last_term() { + if valid_for_term + .map(|vt| vt != self.raft_group.raft.term) + .unwrap_or(false) + { + info!("leadership changed, aborting syncer because required."; "region_id" => self.region().id); + match self.snapshot_recovery_state.take() { + Some(SnapshotBrState::WaitLogApplyToLast { + syncer, + valid_for_term, + .. + }) => { + syncer.abort(AbortReason::StaleCommand { + region_id: self.region().get_id(), + expected_term: valid_for_term.unwrap_or_default(), + current_term: self.raft_group.raft.term, + }); + } + _ => unreachable!(), + }; return; } diff --git a/components/raftstore/src/store/snapshot_backup.rs b/components/raftstore/src/store/snapshot_backup.rs new file mode 100644 index 00000000000..0d972594d05 --- /dev/null +++ b/components/raftstore/src/store/snapshot_backup.rs @@ -0,0 +1,392 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, Mutex, + }, + time::Duration, +}; + +use engine_traits::{KvEngine, RaftEngine}; +use futures::channel::mpsc::UnboundedSender; +use kvproto::{brpb::CheckAdminResponse, metapb::RegionEpoch, raft_cmdpb::AdminCmdType}; +use tikv_util::{info, warn}; +use tokio::sync::oneshot; + +use super::{metrics, PeerMsg, RaftRouter, SignificantMsg, SignificantRouter}; +use crate::coprocessor::{ + AdminObserver, BoxAdminObserver, BoxQueryObserver, Coprocessor, CoprocessorHost, + Error as CopError, QueryObserver, +}; + +fn epoch_second_coarse() -> u64 { + let spec = tikv_util::time::monotonic_coarse_now(); + spec.sec as u64 +} + +#[derive(Debug, Clone)] +pub struct SnapshotBrWaitApplyRequest { + pub syncer: SnapshotBrWaitApplySyncer, + pub expected_epoch: Option, + pub abort_when_term_change: bool, +} + +impl SnapshotBrWaitApplyRequest { + /// Create a "relax" request for waiting apply. + /// This only waits to the last index, without checking the region epoch or + /// leadership migrating. + pub fn relaxed(syncer: SnapshotBrWaitApplySyncer) -> Self { + Self { + syncer, + expected_epoch: None, + abort_when_term_change: false, + } + } + + /// Create a "strict" request for waiting apply. + /// This will wait to last applied index, and aborts if the region epoch not + /// match or the last index may not be committed. + pub fn strict(syncer: SnapshotBrWaitApplySyncer, epoch: RegionEpoch) -> Self { + Self { + syncer, + expected_epoch: Some(epoch), + abort_when_term_change: true, + } + } +} + +pub trait SnapshotBrHandle: Sync + Send + Clone { + fn send_wait_apply(&self, region: u64, req: SnapshotBrWaitApplyRequest) -> crate::Result<()>; + fn broadcast_wait_apply(&self, req: SnapshotBrWaitApplyRequest) -> crate::Result<()>; + fn broadcast_check_pending_admin( + &self, + tx: UnboundedSender, + ) -> crate::Result<()>; +} + +impl SnapshotBrHandle for Arc>> { + fn send_wait_apply(&self, region: u64, req: SnapshotBrWaitApplyRequest) -> crate::Result<()> { + let msg = SignificantMsg::SnapshotBrWaitApply(req); + metrics::SNAP_BR_WAIT_APPLY_EVENT.sent.inc(); + self.lock().unwrap().significant_send(region, msg) + } + + fn broadcast_wait_apply(&self, req: SnapshotBrWaitApplyRequest) -> crate::Result<()> { + let msg_gen = || { + metrics::SNAP_BR_WAIT_APPLY_EVENT.sent.inc(); + PeerMsg::SignificantMsg(SignificantMsg::SnapshotBrWaitApply(req.clone())) + }; + self.lock().unwrap().broadcast_normal(msg_gen); + Ok(()) + } + + fn broadcast_check_pending_admin( + &self, + tx: UnboundedSender, + ) -> crate::Result<()> { + self.lock().unwrap().broadcast_normal(|| { + PeerMsg::SignificantMsg(SignificantMsg::CheckPendingAdmin(tx.clone())) + }); + Ok(()) + } +} + +#[derive(Default)] +pub struct PrepareDiskSnapObserver { + before: AtomicU64, + initialized: AtomicBool, +} + +impl PrepareDiskSnapObserver { + pub fn register_to(self: &Arc, coprocessor_host: &mut CoprocessorHost) { + let reg = &mut coprocessor_host.registry; + reg.register_query_observer(0, BoxQueryObserver::new(Arc::clone(self))); + reg.register_admin_observer(0, BoxAdminObserver::new(Arc::clone(self))); + info!("registered reject ingest and admin coprocessor to TiKV."); + } + + pub fn remained_secs(&self) -> u64 { + self.before + .load(Ordering::Acquire) + .saturating_sub(epoch_second_coarse()) + } + + fn reject(&self) -> CopError { + CopError::RequireDelay { + after: Duration::from_secs(self.remained_secs()), + reason: + "[Suspended] Preparing disk snapshot backup, ingests and some of admin commands are suspended." + .to_owned(), + } + } + + pub fn allowed(&self) -> bool { + let mut v = self.before.load(Ordering::Acquire); + if v == 0 { + return true; + } + let mut expired = v < epoch_second_coarse(); + while expired { + match self + .before + .compare_exchange(v, 0, Ordering::SeqCst, Ordering::SeqCst) + { + Ok(_) => { + metrics::SNAP_BR_SUSPEND_COMMAND_LEASE_UNTIL.set(0); + metrics::SNAP_BR_LEASE_EVENT.expired.inc(); + break; + } + Err(new_val) => { + v = new_val; + expired = v < epoch_second_coarse(); + } + } + } + + expired + } + + pub fn initialized(&self) -> bool { + self.initialized.load(Ordering::Acquire) + } + + /// Extend the lease. + /// + /// # Returns + /// + /// Whether previously there is a lease. + pub fn update_lease(&self, lease: Duration) -> bool { + let mut v = self.before.load(Ordering::SeqCst); + let now = epoch_second_coarse(); + let new_lease = now + lease.as_secs(); + let last_lease_valid = v > now; + while v < new_lease { + let res = self + .before + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |v| { + if v > new_lease { None } else { Some(new_lease) } + }); + match res { + Ok(_) => { + metrics::SNAP_BR_SUSPEND_COMMAND_LEASE_UNTIL.set(new_lease as _); + break; + } + Err(prev) => v = prev, + } + } + if last_lease_valid { + metrics::SNAP_BR_LEASE_EVENT.renew.inc(); + } else { + metrics::SNAP_BR_LEASE_EVENT.create.inc(); + } + last_lease_valid + } + + pub fn reset(&self) { + self.before.store(0, Ordering::SeqCst); + metrics::SNAP_BR_SUSPEND_COMMAND_LEASE_UNTIL.set(0); + metrics::SNAP_BR_LEASE_EVENT.reset.inc(); + } +} + +impl Coprocessor for Arc { + fn start(&self) { + self.initialized.store(true, Ordering::Release) + } + + fn stop(&self) { + self.initialized.store(false, Ordering::Release) + } +} + +impl QueryObserver for Arc { + fn pre_propose_query( + &self, + cx: &mut crate::coprocessor::ObserverContext<'_>, + reqs: &mut Vec, + ) -> crate::coprocessor::Result<()> { + if self.allowed() { + return Ok(()); + } + for req in reqs { + if req.has_ingest_sst() { + // Note: this will reject the batch of commands, which isn't so effective. + // But we cannot reject proposing a subset of command for now... + cx.bypass = true; + metrics::SNAP_BR_SUSPEND_COMMAND_TYPE + .with_label_values(&["Ingest"]) + .inc(); + return Err(self.reject()); + } + } + Ok(()) + } +} + +impl AdminObserver for Arc { + fn pre_propose_admin( + &self, + _: &mut crate::coprocessor::ObserverContext<'_>, + admin: &mut kvproto::raft_cmdpb::AdminRequest, + ) -> crate::coprocessor::Result<()> { + if self.allowed() { + return Ok(()); + } + // NOTE: We should disable `CompactLog` here because if the log get truncated, + // we may take a long time to send snapshots during restoring. + // + // However it may impact the TP workload if we are preparing for a long time. + // With this risk, we need more evidence of its adventage to reject CompactLogs. + let should_reject = matches!( + admin.get_cmd_type(), + AdminCmdType::Split | + AdminCmdType::BatchSplit | + // We will allow `Commit/RollbackMerge` here because the + // `wait_pending_admin` will wait until the merge get finished. + // If we reject them, they won't be able to see the merge get finished. + // And will finally time out. + AdminCmdType::PrepareMerge | + AdminCmdType::ChangePeer | + AdminCmdType::ChangePeerV2 | + AdminCmdType::BatchSwitchWitness + ); + if should_reject { + metrics::SNAP_BR_SUSPEND_COMMAND_TYPE + .with_label_values(&[&format!("{:?}", admin.get_cmd_type())]) + .inc(); + return Err(self.reject()); + } + Ok(()) + } + + fn pre_transfer_leader( + &self, + _ctx: &mut crate::coprocessor::ObserverContext<'_>, + _tr: &kvproto::raft_cmdpb::TransferLeaderRequest, + ) -> crate::coprocessor::Result<()> { + if self.allowed() { + return Ok(()); + } + metrics::SNAP_BR_SUSPEND_COMMAND_TYPE + .with_label_values(&["TransferLeader"]) + .inc(); + Err(self.reject()) + } +} + +#[derive(Debug)] +struct SyncerCore { + report_id: u64, + feedback: Option>, +} + +#[derive(Debug, PartialEq)] +pub struct SyncReport { + pub report_id: u64, + pub aborted: Option, +} + +impl SyncerCore { + fn new(report_id: u64, feedback: oneshot::Sender) -> Self { + Self { + report_id, + feedback: Some(feedback), + } + } + + fn is_aborted(&self) -> bool { + self.feedback.is_none() + } + + /// Abort this syncer. + /// This will fire a message right now. + /// And disable all clones of this syncer. + /// If already aborted, this will do nothing. + fn abort(&mut self, reason: AbortReason) { + if let Some(ch) = self.feedback.take() { + let report = SyncReport { + report_id: self.report_id, + aborted: Some(reason), + }; + if let Err(report) = ch.send(report) { + warn!("reply waitapply states failure."; "report" => ?report); + } + } + } + + fn make_success_result(&self) -> SyncReport { + SyncReport { + report_id: self.report_id, + aborted: None, + } + } +} + +impl Drop for SyncerCore { + fn drop(&mut self) { + if let Some(ch) = self.feedback.take() { + let report = self.make_success_result(); + if let Err(report) = ch.send(report) { + warn!("reply waitapply states failure."; "report" => ?report); + } + metrics::SNAP_BR_WAIT_APPLY_EVENT.finished.inc() + } else { + warn!("wait apply aborted."; "report" => self.report_id); + } + } +} + +/// A syncer for wait apply. +/// The sender used for constructing this structure will: +/// Be closed, if the `abort` has been called. +/// Send the report id to the caller, if all replicas of this Syncer has been +/// dropped. +#[derive(Debug, Clone)] +pub struct SnapshotBrWaitApplySyncer(Arc>); + +impl SnapshotBrWaitApplySyncer { + pub fn new(report_id: u64, sender: oneshot::Sender) -> Self { + let core = SyncerCore::new(report_id, sender); + Self(Arc::new(Mutex::new(core))) + } + + pub fn abort(self, reason: AbortReason) { + let mut core = self.0.lock().unwrap(); + warn!("aborting wait apply."; "reason" => ?reason, "id" => %core.report_id, "already_aborted" => %core.is_aborted()); + match reason { + AbortReason::EpochNotMatch(_) => { + metrics::SNAP_BR_WAIT_APPLY_EVENT.epoch_not_match.inc() + } + AbortReason::StaleCommand { .. } => { + metrics::SNAP_BR_WAIT_APPLY_EVENT.term_not_match.inc() + } + AbortReason::Duplicated => metrics::SNAP_BR_WAIT_APPLY_EVENT.duplicated.inc(), + } + core.abort(reason); + } +} + +#[derive(Debug, PartialEq)] +pub enum AbortReason { + EpochNotMatch(kvproto::errorpb::EpochNotMatch), + StaleCommand { + expected_term: u64, + current_term: u64, + region_id: u64, + }, + Duplicated, +} + +#[derive(Debug)] +pub enum SnapshotBrState { + // This state is set by the leader peer fsm. Once set, it sync and check leader commit index + // and force forward to last index once follower appended and then it also is checked + // every time this peer applies a the last index, if the last index is met, this state is + // reset / droppeds. The syncer is dropped and send the response to the invoker. + WaitLogApplyToLast { + target_index: u64, + valid_for_term: Option, + syncer: SnapshotBrWaitApplySyncer, + }, +} diff --git a/components/raftstore/src/store/unsafe_recovery.rs b/components/raftstore/src/store/unsafe_recovery.rs index 28943ae7339..4bc84ebe2a7 100644 --- a/components/raftstore/src/store/unsafe_recovery.rs +++ b/components/raftstore/src/store/unsafe_recovery.rs @@ -2,7 +2,7 @@ use std::{ fmt, mem, - sync::{mpsc::SyncSender, Arc, Mutex}, + sync::{Arc, Mutex}, time::Duration, }; @@ -278,40 +278,6 @@ impl UnsafeRecoveryExecutePlanSyncer { *self.abort.lock().unwrap() = true; } } -// Syncer only send to leader in 2nd BR restore -#[derive(Clone, Debug)] -pub struct SnapshotRecoveryWaitApplySyncer { - _closure: Arc, - abort: Arc>, -} - -impl SnapshotRecoveryWaitApplySyncer { - pub fn new(region_id: u64, sender: SyncSender) -> Self { - let thread_safe_router = Mutex::new(sender); - let abort = Arc::new(Mutex::new(false)); - let abort_clone = abort.clone(); - let closure = InvokeClosureOnDrop(Some(Box::new(move || { - info!("region {} wait apply finished", region_id); - if *abort_clone.lock().unwrap() { - warn!("wait apply aborted"); - return; - } - let router_ptr = thread_safe_router.lock().unwrap(); - - _ = router_ptr.send(region_id).map_err(|_| { - warn!("reply waitapply states failure."); - }); - }))); - SnapshotRecoveryWaitApplySyncer { - _closure: Arc::new(closure), - abort, - } - } - - pub fn abort(&self) { - *self.abort.lock().unwrap() = true; - } -} #[derive(Clone, Debug)] pub struct UnsafeRecoveryWaitApplySyncer { @@ -386,19 +352,6 @@ impl UnsafeRecoveryFillOutReportSyncer { } } -#[derive(Debug)] -pub enum SnapshotRecoveryState { - // This state is set by the leader peer fsm. Once set, it sync and check leader commit index - // and force forward to last index once follower appended and then it also is checked - // every time this peer applies a the last index, if the last index is met, this state is - // reset / droppeds. The syncer is droped and send the response to the invoker, triggers - // the next step of recovery process. - WaitLogApplyToLast { - target_index: u64, - syncer: SnapshotRecoveryWaitApplySyncer, - }, -} - #[derive(Debug)] pub enum UnsafeRecoveryState { // Stores the state that is necessary for the wait apply stage of unsafe recovery process. diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 292fdb8e56f..24e8b55c03a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -67,6 +67,7 @@ use raftstore::{ RaftBatchSystem, RaftRouter, StoreMeta, MULTI_FILES_SNAPSHOT_FEATURE, PENDING_MSG_CAP, }, memory::MEMTRACE_ROOT as MEMTRACE_RAFTSTORE, + snapshot_backup::PrepareDiskSnapObserver, AutoSplitController, CheckLeaderRunner, LocalReader, SnapManager, SnapManagerBuilder, SplitCheckRunner, SplitConfigManager, StoreMetaDelegate, }, @@ -287,6 +288,7 @@ where br_snap_recovery_mode: bool, // use for br snapshot recovery resolved_ts_scheduler: Option>, grpc_service_mgr: GrpcServiceManager, + snap_br_rejector: Option>, } struct TikvEngines { @@ -477,6 +479,7 @@ where br_snap_recovery_mode: is_recovering_marked, resolved_ts_scheduler: None, grpc_service_mgr: GrpcServiceManager::new(tx), + snap_br_rejector: None, } } @@ -859,6 +862,10 @@ where )), ); + let rejector = Arc::new(PrepareDiskSnapObserver::default()); + rejector.register_to(self.coprocessor_host.as_mut().unwrap()); + self.snap_br_rejector = Some(rejector); + // Start backup stream let backup_stream_scheduler = if self.core.config.log_backup.enable { // Create backup stream. @@ -1207,16 +1214,6 @@ where // Backup service. let mut backup_worker = Box::new(self.core.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); - let backup_service = - backup::Service::::with_router(backup_scheduler, self.router.clone()); - if servers - .server - .register_service(create_backup(backup_service)) - .is_some() - { - fatal!("failed to register backup service"); - } - let backup_endpoint = backup::Endpoint::new( servers.node.id(), engines.engine.clone(), @@ -1228,6 +1225,20 @@ where self.causal_ts_provider.clone(), self.resource_manager.clone(), ); + let env = backup::disk_snap::Env::new( + Arc::new(Mutex::new(self.router.clone())), + self.snap_br_rejector.take().unwrap(), + Some(backup_endpoint.io_pool_handle().clone()), + ); + let backup_service = backup::Service::new(backup_scheduler, env); + if servers + .server + .register_service(create_backup(backup_service)) + .is_some() + { + fatal!("failed to register backup service"); + } + self.cfg_controller.as_mut().unwrap().register( tikv::config::Module::Backup, Box::new(backup_endpoint.get_config_manager()), diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index bfc09f483bc..b5cabccf43c 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -27,6 +27,7 @@ use std::{ }; use api_version::{dispatch_api_version, KvFormat}; +use backup::disk_snap::Env; use backup_stream::{ config::BackupStreamConfigManager, metadata::store::PdStore, observer::BackupStreamObserver, BackupStreamResolver, @@ -63,7 +64,7 @@ use raftstore::{ RegionInfoAccessor, }; use raftstore_v2::{ - router::{PeerMsg, RaftRouter}, + router::{DiskSnapBackupHandle, PeerMsg, RaftRouter}, StateStorage, }; use resolved_ts::Task; @@ -917,7 +918,10 @@ where // Backup service. let mut backup_worker = Box::new(self.core.background_worker.lazy_build("backup-endpoint")); let backup_scheduler = backup_worker.scheduler(); - let backup_service = backup::Service::::new(backup_scheduler); + let backup_service = backup::Service::new( + backup_scheduler, + Env::new(DiskSnapBackupHandle, Default::default(), None), + ); if servers .server .register_service(create_backup(backup_service)) diff --git a/components/snap_recovery/src/services.rs b/components/snap_recovery/src/services.rs index 6bf706e158f..ff83db76bf2 100644 --- a/components/snap_recovery/src/services.rs +++ b/components/snap_recovery/src/services.rs @@ -7,7 +7,6 @@ use std::{ result, sync::{ atomic::{AtomicBool, Ordering}, - mpsc::{sync_channel, SyncSender}, Arc, Mutex, }, thread::Builder, @@ -36,12 +35,14 @@ use raftstore::{ store::{ fsm::RaftRouter, msg::{PeerMsg, SignificantMsg}, + snapshot_backup::{SnapshotBrWaitApplyRequest, SyncReport}, transport::SignificantRouter, - SnapshotRecoveryWaitApplySyncer, + SnapshotBrWaitApplySyncer, }, }; use thiserror::Error; use tikv_util::sys::thread::{StdThreadBuildWrapper, ThreadBuildWrapper}; +use tokio::sync::oneshot::{self, Sender}; use crate::{ data_resolver::DataResolverManager, @@ -224,11 +225,11 @@ where // a new wait apply syncer share with all regions, // when all region reached the target index, share reference decreased to 0, // trigger closure to send finish info back. - pub fn wait_apply_last(router: RaftRouter, sender: SyncSender) { - let wait_apply = SnapshotRecoveryWaitApplySyncer::new(0, sender); + pub fn wait_apply_last(router: RaftRouter, sender: Sender) { + let wait_apply = SnapshotBrWaitApplySyncer::new(0, sender); router.broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::SnapshotRecoveryWaitApply( - wait_apply.clone(), + PeerMsg::SignificantMsg(SignificantMsg::SnapshotBrWaitApply( + SnapshotBrWaitApplyRequest::relaxed(wait_apply.clone()), )) }); } @@ -345,12 +346,14 @@ where // wait apply to the last log let mut rx_apply = Vec::with_capacity(leaders.len()); for ®ion_id in &leaders { - let (tx, rx) = sync_channel(1); + let (tx, rx) = oneshot::channel(); REGION_EVENT_COUNTER.start_wait_leader_apply.inc(); - let wait_apply = SnapshotRecoveryWaitApplySyncer::new(region_id, tx.clone()); + let wait_apply = SnapshotBrWaitApplySyncer::new(region_id, tx); if let Err(e) = raft_router.get_mut().unwrap().significant_send( region_id, - SignificantMsg::SnapshotRecoveryWaitApply(wait_apply.clone()), + SignificantMsg::SnapshotBrWaitApply(SnapshotBrWaitApplyRequest::relaxed( + wait_apply.clone(), + )), ) { error!( "failed to send wait apply"; @@ -358,27 +361,21 @@ where "err" => ?e, ); } - rx_apply.push(Some(rx)); + rx_apply.push(rx); } // leader apply to last log for (rid, rx) in leaders.iter().zip(rx_apply) { - if let Some(rx) = rx { - CURRENT_WAIT_APPLY_LEADER.set(*rid as _); - // FIXME: we cannot the former RPC when we get stuck at here. - // Perhaps we need to make `SnapshotRecoveryWaitApplySyncer` be able to support - // asynchronous channels. But for now, waiting seems won't cause live lock, so - // we are keeping it unchanged. - match rx.recv() { - Ok(region_id) => { - debug!("leader apply to last log"; "region_id" => region_id); - } - Err(e) => { - error!("leader failed to apply to last log"; "error" => ?e); - } + CURRENT_WAIT_APPLY_LEADER.set(*rid as _); + match rx.await { + Ok(_) => { + debug!("leader apply to last log"; "region_id" => rid); + } + Err(e) => { + error!("leader failed to apply to last log"; "error" => ?e); } - REGION_EVENT_COUNTER.finish_wait_leader_apply.inc(); } + REGION_EVENT_COUNTER.finish_wait_leader_apply.inc(); } CURRENT_WAIT_APPLY_LEADER.set(0); @@ -420,14 +417,11 @@ where info!("wait_apply start"); let task = async move { let now = Instant::now(); - // FIXME: this function will exit once the first region finished apply. - // BUT for the flashback resolve KV implementation, that is fine because the - // raft log stats is consistent. - let (tx, rx) = sync_channel(1); - RecoveryService::wait_apply_last(router, tx.clone()); - match rx.recv() { + let (tx, rx) = oneshot::channel(); + RecoveryService::wait_apply_last(router, tx); + match rx.await { Ok(id) => { - info!("follower apply to last log"; "error" => id); + info!("follower apply to last log"; "report" => ?id); } Err(e) => { error!("follower failed to apply to last log"; "error" => ?e); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index e03288bb3e1..e5e235e9761 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -178,6 +178,7 @@ impl From for import_sstpb::Error { let mut server_is_busy = errorpb::ServerIsBusy::default(); server_is_busy.set_backoff_ms(time_to_lease_expire.as_millis() as _); store_err.set_server_is_busy(server_is_busy); + store_err.set_message(format!("{}", e)); err.set_store_error(store_err); err.set_message(format!("{}", e)); } diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index 1dbe232fd9e..ea6ff5e30ea 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -20,6 +20,7 @@ futures-util = { version = "0.3", default-features = false, features = ["io"] } grpcio = { workspace = true } kvproto = { workspace = true } protobuf = "2" +raftstore = { workspace = true } rand = "0.8" tempfile = "3.0" test_raftstore = { workspace = true } diff --git a/components/test_backup/src/disk_snap.rs b/components/test_backup/src/disk_snap.rs new file mode 100644 index 00000000000..aa1c94f8e5e --- /dev/null +++ b/components/test_backup/src/disk_snap.rs @@ -0,0 +1,243 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + collections::{HashMap, HashSet}, + sync::{Arc, Mutex}, + time::Duration, +}; + +use backup::disk_snap::Env as BEnv; +use engine_rocks::RocksEngine as KTE; +use futures_executor::block_on; +use futures_util::{ + sink::SinkExt, + stream::{Fuse, StreamExt}, +}; +use grpcio::{ + ChannelBuilder, ClientDuplexReceiver, Environment, Server, ServerBuilder, StreamingCallSink, + WriteFlags, +}; +use kvproto::{ + brpb::{ + self, PrepareSnapshotBackupEventType, PrepareSnapshotBackupRequest, + PrepareSnapshotBackupRequestType, PrepareSnapshotBackupResponse, + }, + metapb::Region, + raft_cmdpb::RaftCmdResponse, +}; +use raftstore::store::{snapshot_backup::PrepareDiskSnapObserver, Callback, WriteResponse}; +use test_raftstore::*; +use tikv_util::{ + future::{block_on_timeout, paired_future_callback}, + worker::dummy_scheduler, + HandyRwLock, +}; + +pub struct Node { + service: Option, + pub rejector: Arc, + pub backup_client: Option, +} + +pub struct Suite { + pub cluster: Cluster>, + pub nodes: HashMap, + grpc_env: Arc, +} + +impl Suite { + fn crate_node(&mut self, id: u64) { + let rej = Arc::new(PrepareDiskSnapObserver::default()); + let rej2 = rej.clone(); + let mut w = self.cluster.sim.wl(); + w.coprocessor_hooks + .entry(id) + .or_default() + .push(Box::new(move |host| { + rej2.register_to(host); + })); + self.nodes.insert( + id, + Node { + service: None, + rejector: rej, + backup_client: None, + }, + ); + } + + fn start_backup(&mut self, id: u64) { + let (sched, _) = dummy_scheduler(); + let w = self.cluster.sim.wl(); + let router = Arc::new(Mutex::new(w.get_router(id).unwrap())); + let env = BEnv::new(router, self.nodes[&id].rejector.clone(), None); + let service = backup::Service::new(sched, env); + let builder = ServerBuilder::new(Arc::clone(&self.grpc_env)) + .register_service(brpb::create_backup(service)); + let mut server = builder.bind("127.0.0.1", 0).build().unwrap(); + server.start(); + let (_, port) = server.bind_addrs().next().unwrap(); + let addr = format!("127.0.0.1:{}", port); + let channel = ChannelBuilder::new(self.grpc_env.clone()).connect(&addr); + println!("connecting channel to {} for store {}", addr, id); + let client = brpb::BackupClient::new(channel); + let node = self.nodes.get_mut(&id).unwrap(); + node.service = Some(server); + node.backup_client = Some(client); + } + + pub fn try_split(&mut self, split_key: &[u8]) -> WriteResponse { + let region = self.cluster.get_region(split_key); + let (tx, rx) = paired_future_callback(); + self.cluster + .split_region(®ion, split_key, Callback::write(tx)); + block_on(rx).unwrap() + } + + pub fn split(&mut self, split_key: &[u8]) { + let region = self.cluster.get_region(split_key); + self.try_split(split_key); + self.cluster.wait_region_split(®ion); + } + + fn backup(&self, id: u64) -> &brpb::BackupClient { + self.nodes[&id].backup_client.as_ref().unwrap() + } + + pub fn prepare_backup(&self, node: u64) -> PrepareBackup { + let cli = self.backup(node); + let (tx, rx) = cli.prepare_snapshot_backup().unwrap(); + PrepareBackup { + store_id: node, + tx, + rx: rx.fuse(), + } + } + + pub fn new(node_count: u64) -> Self { + Self::new_with_cfg(node_count, |_| {}) + } + + pub fn new_with_cfg(node_count: u64, cfg: impl FnOnce(&mut Config)) -> Self { + let cluster = new_server_cluster(42, node_count as usize); + let grpc_env = Arc::new(Environment::new(1)); + let mut suite = Suite { + cluster, + nodes: HashMap::default(), + grpc_env, + }; + for id in 1..=node_count { + suite.crate_node(id); + } + cfg(&mut suite.cluster.cfg); + suite.cluster.run(); + for id in 1..=node_count { + suite.start_backup(id); + } + suite + } +} + +pub struct PrepareBackup { + tx: StreamingCallSink, + rx: Fuse>, + + pub store_id: u64, +} + +impl PrepareBackup { + pub fn prepare(&mut self, lease_sec: u64) { + let mut req = PrepareSnapshotBackupRequest::new(); + req.set_ty(PrepareSnapshotBackupRequestType::UpdateLease); + req.set_lease_in_seconds(lease_sec); + block_on(async { + self.tx.send((req, WriteFlags::default())).await.unwrap(); + self.rx.next().await.unwrap().unwrap(); + }); + } + + pub fn wait_apply(&mut self, r: impl IntoIterator) { + let mut req = PrepareSnapshotBackupRequest::new(); + req.set_ty(PrepareSnapshotBackupRequestType::WaitApply); + req.set_regions(r.into_iter().collect()); + let mut regions = req + .get_regions() + .iter() + .map(|x| x.id) + .collect::>(); + block_on(async { + self.tx.send((req, WriteFlags::default())).await.unwrap(); + while !regions.is_empty() { + let resp = self.rx.next().await.unwrap().unwrap(); + assert_eq!(resp.ty, PrepareSnapshotBackupEventType::WaitApplyDone); + assert!(!resp.has_error(), "{resp:?}"); + assert!(regions.remove(&resp.get_region().id), "{regions:?}"); + } + }); + } + + pub fn send_wait_apply(&mut self, r: impl IntoIterator) { + let mut req = PrepareSnapshotBackupRequest::new(); + req.set_ty(PrepareSnapshotBackupRequestType::WaitApply); + req.set_regions(r.into_iter().collect()); + block_on(async { + self.tx.send((req, WriteFlags::default())).await.unwrap(); + }) + } + + pub fn send_finalize(mut self) -> bool { + block_on(self.tx.send({ + let mut req = PrepareSnapshotBackupRequest::new(); + req.set_ty(PrepareSnapshotBackupRequestType::Finish); + (req, WriteFlags::default()) + })) + .unwrap(); + block_on_timeout( + async { + while let Some(item) = self.rx.next().await { + let item = item.unwrap(); + if item.ty == PrepareSnapshotBackupEventType::UpdateLeaseResult { + return item.last_lease_is_valid; + } + } + false + }, + Duration::from_secs(2), + ) + .expect("take too long to finalize the stream") + } + + pub fn next(&mut self) -> PrepareSnapshotBackupResponse { + block_on(self.rx.next()).unwrap().unwrap() + } +} + +#[track_caller] +pub fn must_wait_apply_success(res: &PrepareSnapshotBackupResponse) -> u64 { + assert!(!res.has_error(), "{res:?}"); + assert_eq!(res.ty, PrepareSnapshotBackupEventType::WaitApplyDone); + res.get_region().id +} + +#[track_caller] +pub fn assert_success(resp: &RaftCmdResponse) { + assert!(!resp.get_header().has_error(), "{:?}", resp); +} + +#[track_caller] +pub fn assert_failure(resp: &RaftCmdResponse) { + assert!(resp.get_header().has_error(), "{:?}", resp); +} + +#[track_caller] +pub fn assert_failure_because(resp: &RaftCmdResponse, reason_contains: &str) { + assert!(resp.get_header().has_error(), "{:?}", resp); + assert!( + resp.get_header() + .get_error() + .get_message() + .contains(reason_contains), + "{:?}", + resp + ); +} diff --git a/components/test_backup/src/lib.rs b/components/test_backup/src/lib.rs index 4331f072750..5ea853799b5 100644 --- a/components/test_backup/src/lib.rs +++ b/components/test_backup/src/lib.rs @@ -11,7 +11,9 @@ use std::{ use api_version::{dispatch_api_version, keyspace::KvPair, ApiV1, KvFormat, RawValue}; use backup::Task; use collections::HashMap; -use engine_rocks::RocksEngine; +// NOTE: Perhaps we'd better use test engine here. But it seems for now we cannot initialize a +// mock cluster with `PanicEngine` and in our CI environment clippy will complain that. +use engine_rocks::RocksEngine as KTE; use engine_traits::{CfName, IterOptions, CF_DEFAULT, CF_WRITE, DATA_KEY_PREFIX_LEN}; use external_storage::make_local_backend; use futures::{channel::mpsc as future_mpsc, executor::block_on}; @@ -39,8 +41,10 @@ use tikv_util::{ }; use txn_types::TimeStamp; +pub mod disk_snap; + pub struct TestSuite { - pub cluster: Cluster>, + pub cluster: Cluster>, pub endpoints: HashMap>, pub tikv_cli: TikvClient, pub context: Context, diff --git a/components/test_raftstore/src/transport_simulate.rs b/components/test_raftstore/src/transport_simulate.rs index 3824e0dbe75..6fe4560dfe7 100644 --- a/components/test_raftstore/src/transport_simulate.rs +++ b/components/test_raftstore/src/transport_simulate.rs @@ -274,6 +274,12 @@ pub trait FilterFactory { fn generate(&self, node_id: u64) -> Vec>; } +impl Fl, Fl: Filter + 'static> FilterFactory for F { + fn generate(&self, node_id: u64) -> Vec> { + vec![Box::new(self(node_id)) as _] + } +} + #[derive(Default)] pub struct DefaultFilterFactory(PhantomData); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 951a99074b6..1ee7955090c 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -639,6 +639,7 @@ pub fn must_error_read_on_peer>( } } +#[track_caller] pub fn must_contains_error(resp: &RaftCmdResponse, msg: &str) { let header = resp.get_header(); assert!(header.has_error()); diff --git a/components/test_util/src/lib.rs b/components/test_util/src/lib.rs index 222af48fd4e..653d246e0fb 100644 --- a/components/test_util/src/lib.rs +++ b/components/test_util/src/lib.rs @@ -18,6 +18,7 @@ use std::{ fmt::Debug, sync::atomic::{AtomicU16, Ordering}, thread, + time::Duration, }; use rand::Rng; @@ -153,3 +154,21 @@ pub fn assert_eq_debug(lhs: &C, rhs: &C) { lhs_diff, rhs_diff ); } + +#[track_caller] +pub fn eventually(tick: Duration, total: Duration, mut check: impl FnMut() -> bool) { + let start = std::time::Instant::now(); + loop { + if check() { + return; + } + if start.elapsed() < total { + std::thread::sleep(tick); + continue; + } + panic!( + "failed to pass the check after {:?} elapsed", + start.elapsed() + ); + } +} diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index 8594379a9bd..f329247c563 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -200,10 +200,9 @@ impl Drop for Monitor { } } -use self::inner::monotonic_coarse_now; -pub use self::inner::monotonic_now; /// Returns the monotonic raw time since some unspecified starting point. pub use self::inner::monotonic_raw_now; +pub use self::inner::{monotonic_coarse_now, monotonic_now}; use crate::sys::thread::StdThreadBuildWrapper; const NANOSECONDS_PER_SECOND: u64 = 1_000_000_000; diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index d5b5c7c4103..bd12053031f 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -1139,12 +1139,14 @@ impl ImportSst for ImportSstService { ) { let label = "ingest"; let timer = Instant::now_coarse(); + let mut resp = IngestResponse::default(); + if let Err(err) = self.check_suspend() { - ctx.spawn(async move { crate::send_rpc_response!(Err(err), sink, label, timer) }); + resp.set_error(ImportPbError::from(err).take_store_error()); + ctx.spawn(async move { crate::send_rpc_response!(Ok(resp), sink, label, timer) }); return; } - let mut resp = IngestResponse::default(); let region_id = req.get_context().get_region_id(); if let Some(errorpb) = self.check_write_stall(region_id) { resp.set_error(errorpb); @@ -1186,12 +1188,13 @@ impl ImportSst for ImportSstService { ) { let label = "multi-ingest"; let timer = Instant::now_coarse(); + let mut resp = IngestResponse::default(); if let Err(err) = self.check_suspend() { - ctx.spawn(async move { crate::send_rpc_response!(Err(err), sink, label, timer) }); + resp.set_error(ImportPbError::from(err).take_store_error()); + ctx.spawn(async move { crate::send_rpc_response!(Ok(resp), sink, label, timer) }); return; } - let mut resp = IngestResponse::default(); if let Some(errorpb) = self.check_write_stall(req.get_context().get_region_id()) { resp.set_error(errorpb); ctx.spawn( diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 79f5439736d..549c7baf293 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -52,10 +52,10 @@ test-engine-raft-raft-engine = [ "raftstore/test-engine-raft-raft-engine" ] test-engines-rocksdb = [ - "raftstore/test-engines-rocksdb", + "raftstore/test-engines-rocksdb" ] test-engines-panic = [ - "raftstore/test-engines-panic", + "raftstore/test-engines-panic" ] jemalloc = ["tikv/jemalloc"] mimalloc = ["tikv/mimalloc"] @@ -138,7 +138,7 @@ resource_metering = { workspace = true } security = { workspace = true } serde_json = "1.0" sst_importer = { workspace = true } -test_backup = { workspace = true } +test_backup = { workspace = true, default-features = false } test_coprocessor = { workspace = true } test_pd = { workspace = true } test_pd_client = { workspace = true } diff --git a/tests/failpoints/cases/mod.rs b/tests/failpoints/cases/mod.rs index ed2b8d79f9c..caf994fc1cd 100644 --- a/tests/failpoints/cases/mod.rs +++ b/tests/failpoints/cases/mod.rs @@ -9,6 +9,7 @@ mod test_conf_change; mod test_coprocessor; mod test_debugger; mod test_disk_full; +mod test_disk_snap_br; mod test_early_apply; mod test_encryption; mod test_engine; diff --git a/tests/failpoints/cases/test_disk_snap_br.rs b/tests/failpoints/cases/test_disk_snap_br.rs new file mode 100644 index 00000000000..83956aa9367 --- /dev/null +++ b/tests/failpoints/cases/test_disk_snap_br.rs @@ -0,0 +1,42 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +// FIXME: Now, for making sure there isn't a regression after the advanced +// prepare patch (anyway it is just a patch...), we won't reject the +// `CommitMerge` command, or the client may fall into an eternal wait over it +// while waiting pending admin command finish. +// +// Omitting rejecting the command won't break the consistency (at least won't +// make things worse), but will break the case: this case itself wants to prove +// that the `CommitMerge` won't be proposed. +#[test] +#[ignore = "See the comment of `test_merge`"] +fn test_merge() { + use std::time::Duration; + + use test_backup::disk_snap::{assert_success, Suite}; + + let mut suite = Suite::new(1); + suite.split(b"k"); + let mut source = suite.cluster.get_region(b"a"); + let target = suite.cluster.get_region(b"z"); + assert_ne!(source.id, target.id); + fail::cfg("on_schedule_merge", "pause").unwrap(); + let resp = suite.cluster.try_merge(source.id, target.id); + assert_success(&resp); + let mut call = suite.prepare_backup(1); + call.prepare(60); + fail::remove("on_schedule_merge"); + // Manually "apply" the prepare merge on region epoch. + source.mut_region_epoch().set_conf_ver(2); + source.mut_region_epoch().set_version(3); + call.wait_apply([&source, &target].into_iter().cloned()); + let source = suite.cluster.get_region(b"a"); + let target = suite.cluster.get_region(b"z"); + assert_ne!(source.id, target.id); + suite.nodes[&1].rejector.reset(); + test_util::eventually(Duration::from_secs(1), Duration::from_secs(10), || { + let source = suite.cluster.get_region(b"a"); + let target = suite.cluster.get_region(b"z"); + source.id == target.id + }) +} diff --git a/tests/integrations/backup/disk_snap.rs b/tests/integrations/backup/disk_snap.rs new file mode 100644 index 00000000000..bdef242b1a1 --- /dev/null +++ b/tests/integrations/backup/disk_snap.rs @@ -0,0 +1,194 @@ +// Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{collections::HashSet, time::Duration}; + +use futures::executor::block_on; +use kvproto::raft_cmdpb::{CmdType, PutRequest, RaftCmdRequest, Request}; +use raft::prelude::MessageType; +use raftstore::store::Callback; +use test_backup::disk_snap::{ + assert_failure, assert_failure_because, assert_success, must_wait_apply_success, Suite, +}; +use test_raftstore::{must_contains_error, Direction, RegionPacketFilter, Simulator}; +use test_util::eventually; +use tikv_util::HandyRwLock; + +#[test] +fn test_basic() { + let mut suite = Suite::new(1); + let mut call = suite.prepare_backup(1); + call.prepare(60); + let resp = suite.try_split(b"k"); + debug!("Failed to split"; "err" => ?resp.response.get_header().get_error()); + must_contains_error(&resp.response, "[Suspended] Preparing disk snapshot backup"); +} + +#[test] +fn test_conf_change() { + let mut suite = Suite::new(4); + let the_region = suite.cluster.get_region(b""); + let last_peer = the_region.peers.last().unwrap(); + let res = block_on( + suite + .cluster + .async_remove_peer(the_region.get_id(), last_peer.clone()) + .unwrap(), + ); + assert_success(&res); + eventually(Duration::from_millis(100), Duration::from_secs(2), || { + let r = suite.cluster.get_region(b""); + !r.peers.iter().any(|p| p.id == last_peer.id) + }); + let mut calls = vec![]; + for i in 1..=4 { + let mut call = suite.prepare_backup(i); + call.prepare(60); + calls.push(call); + } + + // Make sure the change has been synchronized to all stores. + std::thread::sleep(Duration::from_millis(500)); + let the_region = suite.cluster.get_region(b""); + let res2 = block_on( + suite + .cluster + .async_remove_peer(the_region.get_id(), last_peer.clone()) + .unwrap(), + ); + assert_failure_because(&res2, "rejected by coprocessor"); + let last_peer = the_region.peers.last().unwrap(); + calls.into_iter().for_each(|c| assert!(c.send_finalize())); + let res3 = block_on( + suite + .cluster + .async_remove_peer(the_region.get_id(), last_peer.clone()) + .unwrap(), + ); + assert_success(&res3); + eventually(Duration::from_millis(100), Duration::from_secs(2), || { + let r = suite.cluster.get_region(b""); + !r.peers.iter().any(|p| p.id == last_peer.id) + }); +} + +#[test] +fn test_transfer_leader() { + let mut suite = Suite::new(3); + let mut calls = vec![]; + for i in 1..=3 { + let mut call = suite.prepare_backup(i); + call.prepare(60); + calls.push(call); + } + let region = suite.cluster.get_region(b""); + let leader = suite.cluster.leader_of_region(region.get_id()).unwrap(); + let new_leader = region.peers.iter().find(|r| r.id != leader.id).unwrap(); + let res = suite + .cluster + .try_transfer_leader(region.id, new_leader.clone()); + assert_failure_because(&res, "[Suspended] Preparing disk snapshot backup"); + calls.into_iter().for_each(|c| assert!(c.send_finalize())); + let res = suite + .cluster + .try_transfer_leader(region.id, new_leader.clone()); + assert_success(&res); +} + +#[test] +fn test_prepare_merge() { + let mut suite = Suite::new(1); + suite.split(b"k"); + let source = suite.cluster.get_region(b"a"); + let target = suite.cluster.get_region(b"z"); + assert_ne!(source.id, target.id); + let mut call = suite.prepare_backup(1); + call.prepare(60); + let resp = suite.cluster.try_merge(source.id, target.id); + assert_failure(&resp); +} + +#[test] +fn test_wait_apply() { + let mut suite = Suite::new(3); + for key in 'a'..'k' { + suite.split(&[key as u8]); + } + let rc = suite.cluster.get_region(b"ca"); + suite.cluster.add_send_filter(|i| { + RegionPacketFilter::new(rc.id, i) + .msg_type(MessageType::MsgAppend) + .direction(Direction::Send) + }); + let (tx, rx) = std::sync::mpsc::channel::<()>(); + let mut ld_sid = None; + // Propose a simple write command to each region. + for c in 'a'..'k' { + let region = suite.cluster.get_region(&[c as u8]); + let mut cmd = RaftCmdRequest::new(); + let mut put = PutRequest::new(); + put.set_key(vec![c as u8, b'a']); + put.set_value(b"meow?".to_vec()); + let mut req = Request::new(); + req.set_put(put); + req.set_cmd_type(CmdType::Put); + cmd.mut_requests().push(req); + cmd.mut_header().set_region_id(region.id); + cmd.mut_header() + .set_region_epoch(region.get_region_epoch().clone()); + let ld = suite.cluster.leader_of_region(region.id).unwrap(); + if let Some(lid) = ld_sid { + assert_eq!( + lid, ld.store_id, + "not all leader are in the same store, this case cannot run" + ); + } + ld_sid = Some(ld.store_id); + cmd.mut_header().set_peer(ld); + let r = suite.cluster.sim.rl(); + r.async_command_on_node( + ld_sid.unwrap(), + cmd, + Callback::write_ext( + Box::new(|resp| assert_success(&resp.response)), + Some(Box::new({ + let tx = tx.clone(); + move || drop(tx) + })), + None, + ), + ) + .unwrap(); + } + let mut call = suite.prepare_backup(ld_sid.unwrap()); + call.prepare(60); + + drop(tx); + rx.recv_timeout(Duration::from_secs(5)).unwrap_err(); + + let v = ('a'..'k') + .map(|c| suite.cluster.get_region(&[c as u8])) + .collect::>(); + let mut regions_ok = v + .iter() + .map(|r| r.id) + .filter(|id| *id != rc.id) + .collect::>(); + call.send_wait_apply(v); + + // The regions w/o network isolation must success to wait apply. + while !regions_ok.is_empty() { + let res = call.next(); + let removed = regions_ok.remove(&must_wait_apply_success(&res)); + let mut k = res.get_region().start_key.clone(); + k.push(b'a'); + let v = suite.cluster.must_get(&k); + // Due to we have wait to it applied, this write result must be observable. + assert_eq!(v.as_deref(), Some(b"meow?".as_slice()), "{res:?}"); + assert!(removed, "{regions_ok:?} {res:?}"); + } + + suite.cluster.clear_send_filters(); + // After the network partition restored, the item must be restored. + let res = call.next(); + assert_eq!(must_wait_apply_success(&res), rc.id); +} diff --git a/tests/integrations/backup/mod.rs b/tests/integrations/backup/mod.rs index 9c5bf113af9..1d82065df58 100644 --- a/tests/integrations/backup/mod.rs +++ b/tests/integrations/backup/mod.rs @@ -17,6 +17,8 @@ use tikv::coprocessor::checksum_crc64_xor; use tikv_util::HandyRwLock; use txn_types::TimeStamp; +mod disk_snap; + fn assert_same_file_name(s1: String, s2: String) { let tokens1: Vec<&str> = s1.split('_').collect(); let tokens2: Vec<&str> = s2.split('_').collect(); diff --git a/tests/integrations/import/test_sst_service.rs b/tests/integrations/import/test_sst_service.rs index 2eb1c10c72d..f1b2e23014c 100644 --- a/tests/integrations/import/test_sst_service.rs +++ b/tests/integrations/import/test_sst_service.rs @@ -609,10 +609,18 @@ fn test_suspend_import() { ); let write_res = write(sst_range); write_res.unwrap(); - let ingest_res = ingest(&sst); - assert_to_string_contains!(ingest_res.unwrap_err(), "Suspended"); - let multi_ingest_res = multi_ingest(&[sst.clone()]); - assert_to_string_contains!(multi_ingest_res.unwrap_err(), "Suspended"); + let ingest_res = ingest(&sst).unwrap(); + assert!( + ingest_res.get_error().has_server_is_busy(), + "{:?}", + ingest_res + ); + let multi_ingest_res = multi_ingest(&[sst.clone()]).unwrap(); + assert!( + multi_ingest_res.get_error().has_server_is_busy(), + "{:?}", + multi_ingest_res + ); assert!( import @@ -637,7 +645,11 @@ fn test_suspend_import() { let write_res = write(sst_range); let sst = write_res.unwrap().metas; let res = multi_ingest(&sst); - assert_to_string_contains!(res.unwrap_err(), "Suspended"); + assert!( + res.as_ref().unwrap().get_error().has_server_is_busy(), + "{:?}", + res + ); std::thread::sleep(Duration::from_secs(1)); multi_ingest(&sst).unwrap(); diff --git a/tests/integrations/raftstore/test_snap_recovery.rs b/tests/integrations/raftstore/test_snap_recovery.rs index 5d68bdabcbb..5411e8ec75b 100644 --- a/tests/integrations/raftstore/test_snap_recovery.rs +++ b/tests/integrations/raftstore/test_snap_recovery.rs @@ -2,11 +2,15 @@ use std::time::Duration; -use futures::StreamExt; +use futures::{executor::block_on, StreamExt}; use raft::eraftpb::MessageType; -use raftstore::store::{PeerMsg, SignificantMsg, SnapshotRecoveryWaitApplySyncer}; +use raftstore::store::{ + snapshot_backup::{SnapshotBrWaitApplyRequest, SyncReport}, + PeerMsg, SignificantMsg, SnapshotBrWaitApplySyncer, +}; use test_raftstore::*; -use tikv_util::HandyRwLock; +use tikv_util::{future::block_on_timeout, HandyRwLock}; +use tokio::sync::oneshot; #[test] fn test_check_pending_admin() { @@ -94,17 +98,17 @@ fn test_snap_wait_apply() { let router = cluster.sim.wl().get_router(1).unwrap(); - let (tx, rx) = std::sync::mpsc::sync_channel(1); - + let (tx, rx) = oneshot::channel(); + let syncer = SnapshotBrWaitApplySyncer::new(1, tx); router.broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::SnapshotRecoveryWaitApply( - SnapshotRecoveryWaitApplySyncer::new(1, tx.clone()), + PeerMsg::SignificantMsg(SignificantMsg::SnapshotBrWaitApply( + SnapshotBrWaitApplyRequest::relaxed(syncer.clone()), )) }); // we expect recv timeout because the leader peer on store 1 cannot finished the // apply. so the wait apply will timeout. - rx.recv_timeout(Duration::from_secs(1)).unwrap_err(); + block_on_timeout(rx, Duration::from_secs(1)).unwrap_err(); // clear filter so we can make wait apply finished. cluster.clear_send_filters(); @@ -112,13 +116,21 @@ fn test_snap_wait_apply() { // after clear the filter the leader peer on store 1 can finsihed the wait // apply. - let (tx, rx) = std::sync::mpsc::sync_channel(1); + let (tx, rx) = oneshot::channel(); + let syncer = SnapshotBrWaitApplySyncer::new(1, tx); router.broadcast_normal(|| { - PeerMsg::SignificantMsg(SignificantMsg::SnapshotRecoveryWaitApply( - SnapshotRecoveryWaitApplySyncer::new(1, tx.clone()), + PeerMsg::SignificantMsg(SignificantMsg::SnapshotBrWaitApply( + SnapshotBrWaitApplyRequest::relaxed(syncer.clone()), )) }); + drop(syncer); // we expect recv the region id from rx. - assert_eq!(rx.recv(), Ok(1)); + assert_eq!( + block_on(rx), + Ok(SyncReport { + report_id: 1, + aborted: None + }) + ); } From c75656de25f17cfaebe47d3ab8f0835ad69be075 Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 11 Jan 2024 17:24:25 +0800 Subject: [PATCH 1103/1149] encryption: support kms encryption for gcp (#16335) close tikv/tikv#8906 support kms encryption for gcp Signed-off-by: glorv --- Cargo.lock | 284 ++++++------ Cargo.toml | 3 +- components/cloud/aws/src/kms.rs | 18 +- components/cloud/azure/src/kms.rs | 10 +- components/cloud/gcp/Cargo.toml | 9 +- components/cloud/gcp/src/client.rs | 266 +++++++++++ components/cloud/gcp/src/gcs.rs | 168 +------ components/cloud/gcp/src/kms.rs | 412 ++++++++++++++++++ components/cloud/gcp/src/lib.rs | 6 + components/cloud/src/error.rs | 42 +- components/cloud/src/kms.rs | 40 +- components/encryption/export/Cargo.toml | 3 +- components/encryption/export/examples/ecli.rs | 33 +- components/encryption/export/src/lib.rs | 14 +- components/encryption/src/config.rs | 72 ++- 15 files changed, 1066 insertions(+), 314 deletions(-) create mode 100644 components/cloud/gcp/src/client.rs create mode 100644 components/cloud/gcp/src/kms.rs diff --git a/Cargo.lock b/Cargo.lock index 50a6150890a..112ca7d041f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,7 +36,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c80b57a86234ee3e9238f5f2d33d37f8fd5c7ff168c07f2d5147d410e86db33" dependencies = [ "home", - "libc 0.2.146", + "libc 0.2.151", "rustc_version 0.4.0", "xdg", ] @@ -47,7 +47,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.11", "once_cell", "version_check 0.9.4", ] @@ -65,9 +65,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -225,7 +225,7 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -317,7 +317,7 @@ dependencies = [ "bytes", "dyn-clone", "futures 0.3.15", - "getrandom 0.2.3", + "getrandom 0.2.11", "http-types", "log", "paste", @@ -417,7 +417,7 @@ dependencies = [ "addr2line", "cc", "cfg-if 1.0.0", - "libc 0.2.146", + "libc 0.2.151", "miniz_oxide 0.4.4", "object", "rustc-demangle", @@ -584,7 +584,7 @@ dependencies = [ "bcc-sys", "bitflags", "byteorder", - "libc 0.2.146", + "libc 0.2.151", "regex", "thiserror", ] @@ -680,7 +680,7 @@ checksum = "8d6c2c5b58ab920a4f5aeaaca34b4488074e8cc7596af94e6f8c6ff247c60245" dependencies = [ "lazy_static", "memchr", - "regex-automata", + "regex-automata 0.1.8", "serde", ] @@ -718,7 +718,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" dependencies = [ "cc", - "libc 0.2.146", + "libc 0.2.151", "pkg-config", ] @@ -744,7 +744,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7f788eaf239475a3c1e1acf89951255a46c4b9b46cf3e866fc4d0707b4b9e36" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "valgrind_request", ] @@ -805,11 +805,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.73" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", + "libc 0.2.151", ] [[package]] @@ -908,7 +909,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1" dependencies = [ "glob", - "libc 0.2.146", + "libc 0.2.151", "libloading", ] @@ -991,7 +992,7 @@ version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71367d3385c716342014ad17e3d19f7788ae514885a1f4c24f500260fb365e1a" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "once_cell", "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", @@ -1004,7 +1005,7 @@ dependencies = [ "byteorder", "bytes", "error_code", - "libc 0.2.146", + "libc 0.2.151", "panic_hook", "protobuf", "rand 0.8.5", @@ -1083,7 +1084,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" dependencies = [ "core-foundation-sys", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -1098,7 +1099,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -1108,6 +1109,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" +[[package]] +name = "crc32c" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8f48d60e5b4d2c53d5c2b1d8a58c849a70ae5e5509b08a48d047e3b65714a74" +dependencies = [ + "rustc_version 0.4.0", +] + [[package]] name = "crc32fast" version = "1.2.0" @@ -1156,7 +1166,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63aaaf47e457badbcb376c65a49d0f182c317ebd97dc6d1ced94c8e1d09c0f3a" dependencies = [ "criterion", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -1338,6 +1348,12 @@ dependencies = [ "parking_lot 0.12.1", ] +[[package]] +name = "data-encoding" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" + [[package]] name = "debugid" version = "0.8.0" @@ -1394,7 +1410,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "redox_users", "winapi 0.3.9", ] @@ -1479,6 +1495,7 @@ dependencies = [ "encryption", "error_code", "file_system", + "gcp", "kvproto", "openssl", "protobuf", @@ -1668,7 +1685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" dependencies = [ "errno-dragonfly", - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -1679,7 +1696,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" dependencies = [ "cc", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -1787,7 +1804,7 @@ dependencies = [ "crossbeam-utils", "fs2", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "maligned", "online_config", "openssl", @@ -1812,7 +1829,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed3d8a5e20435ff00469e51a0d82049bae66504b5c429920dadf9bb54d47b3f" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "thiserror", "winapi 0.3.9", ] @@ -1824,7 +1841,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.146", + "libc 0.2.151", "redox_syscall 0.2.11", "winapi 0.3.9", ] @@ -1837,7 +1854,7 @@ checksum = "d691fdb3f817632d259d09220d4cf0991dbb2c9e59e044a02a59194bf6e14484" dependencies = [ "cc", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -1868,7 +1885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2adaffba6388640136149e18ed080b77a78611c1e1d6de75aedcdf78df5d4682" dependencies = [ "crc32fast", - "libc 0.2.146", + "libc 0.2.151", "libz-sys", "miniz_oxide 0.3.7", ] @@ -1909,7 +1926,7 @@ name = "fs2" version = "0.4.3" source = "git+https://github.com/tabokie/fs2-rs?branch=tikv#cd503764a19a99d74c1ab424dd13d6bcd093fcae" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -1935,7 +1952,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f41b048a94555da0f42f1d632e2e19510084fb8e303b0daa2816e733fb3644a0" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2152,14 +2169,21 @@ name = "gcp" version = "0.0.1" dependencies = [ "async-trait", + "base64 0.13.0", "cloud", + "crc32c", + "crypto", "futures-util", "http", "hyper", "hyper-tls", "kvproto", + "lazy_static", "matches", "pin-project", + "regex", + "serde", + "serde_json", "slog", "slog-global", "tame-gcs", @@ -2186,20 +2210,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.146", + "libc 0.2.151", "wasi 0.7.0", ] [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if 1.0.0", "js-sys", - "libc 0.2.146", - "wasi 0.10.2+wasi-snapshot-preview1", + "libc 0.2.151", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2247,7 +2271,7 @@ dependencies = [ "futures-executor", "futures-util", "grpcio-sys", - "libc 0.2.146", + "libc 0.2.151", "log", "parking_lot 0.11.1", "protobuf", @@ -2284,7 +2308,7 @@ dependencies = [ "bindgen 0.59.2", "cc", "cmake", - "libc 0.2.146", + "libc 0.2.151", "libz-sys", "openssl-sys", "pkg-config", @@ -2353,7 +2377,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "307c3c9f937f38e3534b1d6447ecf090cafcc9744e4a6360e8b037b2cf5af120" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2362,7 +2386,7 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2595,7 +2619,7 @@ checksum = "4816c66d2c8ae673df83366c18341538f234a26d65a9ecea5c348b453ac1d02f" dependencies = [ "bitflags", "inotify-sys", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2604,7 +2628,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2631,7 +2655,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "windows-sys 0.42.0", ] @@ -2641,7 +2665,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2699,7 +2723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b1d42ef453b30b7387e113da1c83ab1605d90c5b4e0eb8e96d016ed3b8c160" dependencies = [ "getrandom 0.1.12", - "libc 0.2.146", + "libc 0.2.151", "log", ] @@ -2776,9 +2800,9 @@ checksum = "e32a70cf75e5846d53a673923498228bbec6a8624708a9ea5645f075d6276122" [[package]] name = "libc" -version = "0.2.146" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libfuzzer-sys" @@ -2818,7 +2842,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.146", + "libc 0.2.151", "libtitan_sys", "libz-sys", "lz4-sys", @@ -2836,7 +2860,7 @@ dependencies = [ "bzip2-sys", "cc", "cmake", - "libc 0.2.146", + "libc 0.2.151", "libz-sys", "lz4-sys", "snappy-sys", @@ -2850,7 +2874,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66" dependencies = [ "cc", - "libc 0.2.146", + "libc 0.2.151", "pkg-config", "vcpkg", ] @@ -2909,7 +2933,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" dependencies = [ "cc", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2943,12 +2967,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.4.1" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" -dependencies = [ - "libc 0.2.146", -] +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memmap" @@ -2956,7 +2977,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -2966,7 +2987,7 @@ version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -2975,7 +2996,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -3055,7 +3076,7 @@ dependencies = [ "fuchsia-zircon-sys", "iovec", "kernel32-sys", - "libc 0.2.146", + "libc 0.2.151", "log", "miow", "net2", @@ -3069,7 +3090,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "log", "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.42.0", @@ -3115,7 +3136,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1587ebb20a5b04738f16cffa7e2526f1b8496b84f92920facd518362ff1559eb" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -3146,7 +3167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" dependencies = [ "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "log", "openssl", "openssl-probe", @@ -3164,7 +3185,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae" dependencies = [ "cfg-if 0.1.10", - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -3176,7 +3197,7 @@ checksum = "8f17df307904acd05aa8e32e97bb20f2a0df1728bbc2d771ae8f9a90463441e9" dependencies = [ "bitflags", "cfg-if 1.0.0", - "libc 0.2.146", + "libc 0.2.151", "memoffset 0.6.4", ] @@ -3188,7 +3209,7 @@ checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ "bitflags", "cfg-if 1.0.0", - "libc 0.2.146", + "libc 0.2.151", "memoffset 0.7.1", "pin-utils", "static_assertions", @@ -3248,7 +3269,7 @@ dependencies = [ "fsevent", "fsevent-sys", "inotify", - "libc 0.2.146", + "libc 0.2.151", "mio 0.6.23", "mio-extras", "walkdir", @@ -3366,7 +3387,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi 0.1.3", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -3375,7 +3396,7 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -3386,7 +3407,7 @@ checksum = "80e47cfc4c0a1a519d9a025ebfbac3a2439d1b5cdf397d72dcb79b11d9920dab" dependencies = [ "base64 0.13.0", "chrono", - "getrandom 0.2.3", + "getrandom 0.2.11", "http", "rand 0.8.5", "serde", @@ -3453,7 +3474,7 @@ dependencies = [ "bitflags", "cfg-if 1.0.0", "foreign-types", - "libc 0.2.146", + "libc 0.2.151", "once_cell", "openssl-macros", "openssl-sys", @@ -3492,7 +3513,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d3d193fb1488ad46ffe3aaabc912cc931d02ee8518fe2959aea8ef52718b0c0" dependencies = [ "cc", - "libc 0.2.146", + "libc 0.2.151", "openssl-src", "pkg-config", "vcpkg", @@ -3522,7 +3543,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -3565,7 +3586,7 @@ checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ "cfg-if 1.0.0", "instant", - "libc 0.2.146", + "libc 0.2.151", "redox_syscall 0.2.11", "smallvec", "winapi 0.3.9", @@ -3578,7 +3599,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.146", + "libc 0.2.151", "redox_syscall 0.2.11", "smallvec", "windows-sys 0.32.0", @@ -3655,7 +3676,7 @@ checksum = "b8f94885300e262ef461aa9fd1afbf7df3caf9e84e271a74925d1c6c8b24830f" dependencies = [ "bitflags", "byteorder", - "libc 0.2.146", + "libc 0.2.151", "mmap", "nom 4.2.3", "phf", @@ -3788,7 +3809,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d27361d7578b410d0eb5fe815c2b2105b01ab770a7c738cb9a231457a809fcc7" dependencies = [ "ipnetwork", - "libc 0.2.146", + "libc 0.2.151", "pnet_base", "pnet_sys", "winapi 0.2.8", @@ -3800,7 +3821,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82f881a6d75ac98c5541db6144682d1773bb14c6fc50c6ebac7086c8f7f23c29" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.2.8", "ws2_32-sys", ] @@ -3815,7 +3836,7 @@ dependencies = [ "cfg-if 1.0.0", "findshlibs", "inferno", - "libc 0.2.146", + "libc 0.2.151", "log", "nix 0.26.2", "once_cell", @@ -3899,7 +3920,7 @@ dependencies = [ "byteorder", "hex 0.4.2", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -3908,7 +3929,7 @@ version = "0.4.2" source = "git+https://github.com/tikv/procinfo-rs?rev=7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1#7693954bd1dd86eb1709572fd7b62fd5f7ff2ea1" dependencies = [ "byteorder", - "libc 0.2.146", + "libc 0.2.151", "nom 2.2.1", "rustc_version 0.2.3", ] @@ -3933,7 +3954,7 @@ dependencies = [ "cfg-if 1.0.0", "fnv", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "memchr", "parking_lot 0.11.1", "protobuf", @@ -4062,7 +4083,7 @@ dependencies = [ "hex 0.4.2", "if_chain", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "log", "lz4-sys", "memmap2 0.9.3", @@ -4258,7 +4279,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" dependencies = [ "fuchsia-cprng", - "libc 0.2.146", + "libc 0.2.151", "rand_core 0.3.1", "rdrand", "winapi 0.3.9", @@ -4271,7 +4292,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ "getrandom 0.1.12", - "libc 0.2.146", + "libc 0.2.151", "rand_chacha 0.2.1", "rand_core 0.5.1", "rand_hc", @@ -4283,7 +4304,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "rand_chacha 0.3.0", "rand_core 0.6.2", ] @@ -4338,7 +4359,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.11", ] [[package]] @@ -4431,18 +4452,19 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.11", "redox_syscall 0.2.11", ] [[package]] name = "regex" -version = "1.5.6" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", + "regex-automata 0.4.3", "regex-syntax", ] @@ -4455,11 +4477,22 @@ dependencies = [ "byteorder", ] +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + [[package]] name = "regex-syntax" -version = "0.6.26" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "region_cache_memory_engine" @@ -4597,7 +4630,7 @@ dependencies = [ "grpcio", "kvproto", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "log", "online_config", "pdqselect", @@ -4658,7 +4691,7 @@ name = "rocksdb" version = "0.3.0" source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "librocksdb_sys", ] @@ -4840,7 +4873,7 @@ dependencies = [ "bitflags", "errno", "io-lifetimes", - "libc 0.2.146", + "libc 0.2.151", "linux-raw-sys", "windows-sys 0.42.0", ] @@ -4918,7 +4951,7 @@ dependencies = [ "bitflags", "core-foundation", "core-foundation-sys", - "libc 0.2.146", + "libc 0.2.151", "security-framework-sys", ] @@ -4929,7 +4962,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3676258fd3cfe2c9a0ec99ce3038798d847ce3e4bb17746373eb9f0f1ac16339" dependencies = [ "core-foundation-sys", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -5126,7 +5159,7 @@ dependencies = [ "hybrid_engine", "keys", "kvproto", - "libc 0.2.146", + "libc 0.2.151", "log", "log_wrappers", "pd_client", @@ -5206,7 +5239,7 @@ version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "signal-hook-registry", ] @@ -5216,7 +5249,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -5364,7 +5397,7 @@ version = "0.1.0" source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", - "libc 0.2.146", + "libc 0.2.151", "pkg-config", ] @@ -5392,7 +5425,7 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "winapi 0.3.9", ] @@ -5598,7 +5631,7 @@ source = "git+https://github.com/tikv/sysinfo?branch=0.26-fix-cpu#5a1bcf08816979 dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", - "libc 0.2.146", + "libc 0.2.151", "ntapi", "once_cell", "rayon", @@ -5633,11 +5666,10 @@ dependencies = [ [[package]] name = "tame-oauth" -version = "0.4.7" -source = "git+https://github.com/tikv/tame-oauth?branch=fips#176e3c69e9b5cd04b4248824ae6ee38ef57385be" +version = "0.9.6" +source = "git+https://github.com/tikv/tame-oauth?branch=fips-0.9#487e287c0d316b832dc44735cd9b7f7c432a10aa" dependencies = [ - "base64 0.13.0", - "chrono", + "data-encoding", "http", "lock_api", "openssl", @@ -5680,7 +5712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.146", + "libc 0.2.151", "rand 0.8.5", "redox_syscall 0.2.11", "remove_dir_all", @@ -5986,7 +6018,7 @@ dependencies = [ "hyper", "keys", "kvproto", - "libc 0.2.146", + "libc 0.2.151", "log_wrappers", "more-asserts", "online_config", @@ -6296,7 +6328,7 @@ dependencies = [ "keys", "kvproto", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "libloading", "log", "log_wrappers", @@ -6401,7 +6433,7 @@ dependencies = [ "hex 0.4.2", "keys", "kvproto", - "libc 0.2.146", + "libc 0.2.151", "log", "log_wrappers", "pd_client", @@ -6436,7 +6468,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "paste", "tikv-jemalloc-sys", ] @@ -6449,7 +6481,7 @@ checksum = "aeab4310214fe0226df8bfeb893a291a58b19682e8a07e1e1d4483ad4200d315" dependencies = [ "cc", "fs_extra", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -6458,7 +6490,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "tikv-jemalloc-sys", ] @@ -6491,7 +6523,7 @@ version = "0.1.0" dependencies = [ "fxhash", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "mimalloc", "snmalloc-rs", "tcmalloc", @@ -6561,7 +6593,7 @@ dependencies = [ "http", "kvproto", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", "log", "log_wrappers", "mnt", @@ -6611,7 +6643,7 @@ version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "redox_syscall 0.1.56", "winapi 0.3.9", ] @@ -6623,7 +6655,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ "itoa 1.0.1", - "libc 0.2.146", + "libc 0.2.151", "num_threads", "serde", "time-core", @@ -6683,7 +6715,7 @@ checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" dependencies = [ "autocfg", "bytes", - "libc 0.2.146", + "libc 0.2.151", "memchr", "mio 0.8.5", "num_cpus", @@ -6992,7 +7024,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "055058552ca15c566082fc61da433ae678f78986a6f16957e33162d1b218792a" dependencies = [ "kernel32-sys", - "libc 0.2.146", + "libc 0.2.151", "winapi 0.2.8", ] @@ -7002,7 +7034,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.11", "serde", ] @@ -7012,7 +7044,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.11", ] [[package]] @@ -7078,12 +7110,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d" -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -7176,7 +7202,7 @@ checksum = "2a5a7e487e921cf220206864a94a89b6c6905bfc19f1057fa26a4cb360e5c1d2" dependencies = [ "either", "lazy_static", - "libc 0.2.146", + "libc 0.2.151", ] [[package]] @@ -7481,7 +7507,7 @@ version = "5.0.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" dependencies = [ - "libc 0.2.146", + "libc 0.2.151", "zstd-sys", ] @@ -7492,5 +7518,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", - "libc 0.2.146", + "libc 0.2.151", ] diff --git a/Cargo.toml b/Cargo.toml index a08cd21baf4..fe7382e3c2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -199,9 +199,8 @@ rusoto_kms = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr- rusoto_mock = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_s3 = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } rusoto_sts = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr-styles" } - # NOTICE: use openssl for signature to support fips 140 -tame-oauth = { git = "https://github.com/tikv/tame-oauth", branch = "fips" } +tame-oauth = { git = "https://github.com/tikv/tame-oauth", branch = "fips-0.9" } snappy-sys = { git = "https://github.com/busyjay/rust-snappy.git", branch = "static-link" } diff --git a/components/cloud/aws/src/kms.rs b/components/cloud/aws/src/kms.rs index f158dfb709e..87b4c48d568 100644 --- a/components/cloud/aws/src/kms.rs +++ b/components/cloud/aws/src/kms.rs @@ -4,7 +4,7 @@ use std::ops::Deref; use async_trait::async_trait; use cloud::{ - error::{Error, KmsError, Result}, + error::{Error, KmsError, OtherError, Result}, kms::{Config, CryptographyType, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}, }; use rusoto_core::{request::DispatchSignedRequest, RusotoError}; @@ -148,11 +148,13 @@ fn classify_generate_data_key_error(err: RusotoError) -> E match &e { GenerateDataKeyError::NotFound(_) => Error::ApiNotFound(err.into()), GenerateDataKeyError::InvalidKeyUsage(_) => { - Error::KmsError(KmsError::Other(err.into())) + Error::KmsError(KmsError::Other(OtherError::from_box(err.into()))) } GenerateDataKeyError::DependencyTimeout(_) => Error::ApiTimeout(err.into()), GenerateDataKeyError::KMSInternal(_) => Error::ApiInternal(err.into()), - _ => Error::KmsError(KmsError::Other(FixRusotoErrorDisplay(err).into())), + _ => Error::KmsError(KmsError::Other(OtherError::from_box( + FixRusotoErrorDisplay(err).into(), + ))), } } else { classify_error(err) @@ -167,7 +169,9 @@ fn classify_decrypt_error(err: RusotoError) -> Error { } DecryptError::DependencyTimeout(_) => Error::ApiTimeout(err.into()), DecryptError::KMSInternal(_) => Error::ApiInternal(err.into()), - _ => Error::KmsError(KmsError::Other(FixRusotoErrorDisplay(err).into())), + _ => Error::KmsError(KmsError::Other(OtherError::from_box( + FixRusotoErrorDisplay(err).into(), + ))), } } else { classify_error(err) @@ -179,7 +183,9 @@ fn classify_error(err: RusotoError RusotoError::HttpDispatch(_) => Error::ApiTimeout(err.into()), RusotoError::Credentials(_) => Error::ApiAuthentication(err.into()), e if e.is_retryable() => Error::ApiInternal(err.into()), - _ => Error::KmsError(KmsError::Other(FixRusotoErrorDisplay(err).into())), + _ => Error::KmsError(KmsError::Other(OtherError::from_box( + FixRusotoErrorDisplay(err).into(), + ))), } } @@ -219,6 +225,7 @@ mod tests { endpoint: String::new(), }, azure: None, + gcp: None, }; let dispatcher = @@ -263,6 +270,7 @@ mod tests { endpoint: String::new(), }, azure: None, + gcp: None, }; // IncorrectKeyException diff --git a/components/cloud/azure/src/kms.rs b/components/cloud/azure/src/kms.rs index c743ae415f5..f1afd021c1f 100644 --- a/components/cloud/azure/src/kms.rs +++ b/components/cloud/azure/src/kms.rs @@ -9,7 +9,7 @@ use azure_identity::{ }; use azure_security_keyvault::{prelude::*, KeyClient}; use cloud::{ - error::{Error as CloudError, KmsError, Result}, + error::{Error as CloudError, KmsError, OtherError, Result}, kms::{Config, CryptographyType, DataKeyPair, EncryptedKey, KeyId, KmsProvider, PlainKey}, }; use tikv_util::box_err; @@ -139,8 +139,8 @@ impl AzureKms { ); Self::new_with_credentials(config, keyvault_credential, hsm_credential) } else { - Err(CloudError::KmsError(KmsError::Other(box_err!( - "invalid configurations for Azure KMS" + Err(CloudError::KmsError(KmsError::Other(OtherError::from_box( + box_err!("invalid configurations for Azure KMS"), )))) } } @@ -242,7 +242,7 @@ fn convert_azure_error(err: AzureError) -> CloudError { "unknown error", )) }; - CloudError::KmsError(KmsError::Other(err_msg)) + CloudError::KmsError(KmsError::Other(OtherError::from_box(err_msg))) } #[inline] @@ -281,6 +281,7 @@ mod tests { endpoint: String::new(), }, azure: Some(err_azure_cfg.clone()), + gcp: None, }; AzureKms::new(err_config.clone()).unwrap_err(); let azure_cfg = SubConfigAzure { @@ -324,6 +325,7 @@ mod tests { endpoint: String::new(), }, azure: Some(azure_cfg), + gcp: None, }; if config.vendor != STORAGE_VENDOR_NAME_AZURE { AzureKms::new(config).unwrap(); diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index f47506a6222..d4bba10302b 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -6,17 +6,24 @@ publish = false [dependencies] async-trait = "0.1" +base64 = "0.13.0" cloud = { workspace = true } +crc32c = "0.6" +crypto = { workspace = true } futures-util = { version = "0.3", default-features = false, features = ["io"] } http = "0.2.0" hyper = "0.14" hyper-tls = "0.5" kvproto = { workspace = true } +lazy_static = "1.3" +regex = "1.10" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" slog = { workspace = true } # better to not use slog-global, but pass in the logger slog-global = { workspace = true } tame-gcs = { version = "0.10", features = ["async-multipart"] } -tame-oauth = "0.4.7" +tame-oauth = "0.9.6" tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time"] } url = "2.0" diff --git a/components/cloud/gcp/src/client.rs b/components/cloud/gcp/src/client.rs new file mode 100644 index 00000000000..7dc99c0e1f2 --- /dev/null +++ b/components/cloud/gcp/src/client.rs @@ -0,0 +1,266 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + convert::TryInto, + fmt::{self, Display}, + io, + result::Result as StdResult, + sync::Arc, +}; + +use hyper::{client::HttpConnector, Body, Client, Request, Response, StatusCode}; +use hyper_tls::HttpsConnector; +use serde::Deserialize; +use tame_oauth::gcp::{ + end_user::EndUserCredentialsInner, service_account::ServiceAccountProviderInner, + EndUserCredentialsInfo, ServiceAccountInfo, TokenOrRequest, TokenProvider, + TokenProviderWrapper, TokenProviderWrapperInner, +}; +use tikv_util::stream::RetryError; + +// GCS compatible storage +#[derive(Clone)] +pub(crate) struct GcpClient { + token_provider: Option>, + client: Client, Body>, +} + +impl GcpClient { + /// Create a new gcp cleint for the given config. + pub fn with_svc_info(svc_info: Option) -> io::Result { + let token_provider = if let Some(info) = svc_info { + let svc_info_provider = ServiceAccountProviderInner::new(info) + .or_invalid_input("invalid credentials_blob")?; + Some(TokenProviderWrapperInner::ServiceAccount(svc_info_provider)) + } else { + None + }; + Ok(Self::with_token_provider(token_provider)) + } + + fn with_token_provider(token_provider: Option) -> Self { + let client = Client::builder().build(HttpsConnector::new()); + Self { + token_provider: token_provider.map(|t| Arc::new(TokenProviderWrapper::wrap(t))), + client, + } + } + + pub fn with_default_provider() -> io::Result { + let provider = TokenProviderWrapperInner::get_default_provider() + .map_err(|e| RequestError::OAuth(e, "default_provider".into()))?; + Ok(Self::with_token_provider(provider)) + } + + pub fn load_from(credentail_path: Option<&str>) -> io::Result { + if let Some(path) = credentail_path { + let json_data = std::fs::read(path)?; + let cred_type = CredentialType::parse_from_json(&json_data)?; + match cred_type { + CredentialType::ServiceAccount => { + let svc_info = serde_json::from_slice(&json_data)?; + return Self::with_svc_info(Some(svc_info)); + } + CredentialType::AuthorizedUser => { + let user_credential: EndUserCredentialsInfo = + serde_json::from_slice(&json_data)?; + let provider = EndUserCredentialsInner::new(user_credential); + return Ok(Self::with_token_provider(Some( + TokenProviderWrapperInner::EndUser(provider), + ))); + } + } + }; + Self::with_default_provider() + } + + pub(crate) async fn set_auth( + &self, + req: &mut Request, + scope: tame_gcs::Scopes, + token_provider: Arc, + ) -> StdResult<(), RequestError> { + let token_or_request = token_provider + .get_token(&[scope]) + .map_err(|e| RequestError::OAuth(e, "get_token".to_string()))?; + let token = match token_or_request { + TokenOrRequest::Token(token) => token, + TokenOrRequest::Request { + request, + scope_hash, + .. + } => { + let res = self + .client + .request(request.map(From::from)) + .await + .map_err(|e| RequestError::Hyper(e, "set auth request".to_owned()))?; + if !res.status().is_success() { + return Err(status_code_error( + res.status(), + "set auth request".to_string(), + )); + } + let (parts, body) = res.into_parts(); + let body = hyper::body::to_bytes(body) + .await + .map_err(|e| RequestError::Hyper(e, "set auth body".to_owned()))?; + token_provider + .parse_token_response(scope_hash, Response::from_parts(parts, body)) + .map_err(|e| RequestError::OAuth(e, "set auth parse token".to_string()))? + } + }; + req.headers_mut().insert( + http::header::AUTHORIZATION, + token + .try_into() + .map_err(|e| RequestError::OAuth(e, "set auth add auth header".to_string()))?, + ); + + Ok(()) + } + + pub async fn make_request( + &self, + mut req: Request, + scope: tame_gcs::Scopes, + ) -> StdResult, RequestError> { + if let Some(svc_access) = &self.token_provider { + self.set_auth(&mut req, scope, svc_access.clone()).await?; + } + let uri = req.uri().to_string(); + let res = self + .client + .request(req) + .await + .map_err(|e| RequestError::Hyper(e, uri.clone()))?; + if !res.status().is_success() { + return Err(status_code_error(res.status(), uri)); + } + Ok(res) + } +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +enum CredentialType { + ServiceAccount, + AuthorizedUser, +} + +impl CredentialType { + fn parse_from_json(data: &[u8]) -> StdResult { + let wrapper: TypeWrapper = serde_json::from_slice(data)?; + Ok(wrapper.cred_type) + } +} + +#[derive(Clone, Debug, Deserialize)] +struct TypeWrapper { + #[serde(rename = "type")] + cred_type: CredentialType, +} + +trait ResultExt { + type Ok; + + // Maps the error of this result as an `std::io::Error` with `Other` error + // kind. + fn or_io_error(self, msg: D) -> io::Result; + + // Maps the error of this result as an `std::io::Error` with `InvalidInput` + // error kind. + fn or_invalid_input(self, msg: D) -> io::Result; +} + +impl ResultExt for StdResult { + type Ok = T; + fn or_io_error(self, msg: D) -> io::Result { + self.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{}: {}", msg, e))) + } + fn or_invalid_input(self, msg: D) -> io::Result { + self.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, format!("{}: {}", msg, e))) + } +} + +#[derive(Debug)] +pub enum RequestError { + Hyper(hyper::Error, String), + OAuth(tame_oauth::Error, String), + Gcs(tame_gcs::Error), + InvalidEndpoint(http::uri::InvalidUri), +} + +impl Display for RequestError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl std::error::Error for RequestError {} + +impl From for RequestError { + fn from(err: http::uri::InvalidUri) -> Self { + Self::InvalidEndpoint(err) + } +} + +pub fn status_code_error(code: StatusCode, msg: String) -> RequestError { + RequestError::OAuth(tame_oauth::Error::HttpStatus(code), msg) +} + +impl From for io::Error { + fn from(err: RequestError) -> Self { + match err { + RequestError::Hyper(e, msg) => { + Self::new(io::ErrorKind::InvalidInput, format!("HTTP {}: {}", msg, e)) + } + RequestError::OAuth(tame_oauth::Error::Io(e), _) => e, + RequestError::OAuth(tame_oauth::Error::HttpStatus(sc), msg) => { + let fmt = format!("GCS OAuth: {}: {}", msg, sc); + match sc.as_u16() { + 401 | 403 => Self::new(io::ErrorKind::PermissionDenied, fmt), + 404 => Self::new(io::ErrorKind::NotFound, fmt), + _ if sc.is_server_error() => Self::new(io::ErrorKind::Interrupted, fmt), + _ => Self::new(io::ErrorKind::InvalidInput, fmt), + } + } + RequestError::OAuth(tame_oauth::Error::Auth(e), msg) => Self::new( + io::ErrorKind::PermissionDenied, + format!("authorization failed: {}: {}", msg, e), + ), + RequestError::OAuth(e, msg) => Self::new( + io::ErrorKind::InvalidInput, + format!("oauth failed: {}: {}", msg, e), + ), + RequestError::Gcs(e) => Self::new( + io::ErrorKind::InvalidInput, + format!("invalid GCS request: {}", e), + ), + RequestError::InvalidEndpoint(e) => Self::new( + io::ErrorKind::InvalidInput, + format!("invalid GCS endpoint URI: {}", e), + ), + } + } +} + +impl RetryError for RequestError { + fn is_retryable(&self) -> bool { + match self { + // FIXME: Inspect the error source? + Self::Hyper(e, _) => { + e.is_closed() + || e.is_connect() + || e.is_incomplete_message() + || e.is_body_write_aborted() + } + // See https://cloud.google.com/storage/docs/exponential-backoff. + Self::OAuth(tame_oauth::Error::HttpStatus(StatusCode::TOO_MANY_REQUESTS), _) => true, + Self::OAuth(tame_oauth::Error::HttpStatus(StatusCode::REQUEST_TIMEOUT), _) => true, + Self::OAuth(tame_oauth::Error::HttpStatus(status), _) => status.is_server_error(), + // Consider everything else not retryable. + _ => false, + } + } +} diff --git a/components/cloud/gcp/src/gcs.rs b/components/cloud/gcp/src/gcs.rs index 4406954992d..bee9714e03d 100644 --- a/components/cloud/gcp/src/gcs.rs +++ b/components/cloud/gcp/src/gcs.rs @@ -1,5 +1,5 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use std::{convert::TryInto, fmt::Display, io, sync::Arc}; +use std::{fmt::Display, io}; use async_trait::async_trait; use cloud::{ @@ -12,21 +12,23 @@ use futures_util::{ stream::{StreamExt, TryStreamExt}, }; use http::HeaderValue; -use hyper::{client::HttpConnector, Body, Client, Request, Response, StatusCode}; -use hyper_tls::HttpsConnector; +use hyper::{Body, Request, Response}; pub use kvproto::brpb::Gcs as InputConfig; use tame_gcs::{ common::{PredefinedAcl, StorageClass}, objects::{InsertObjectOptional, Metadata, Object}, types::{BucketName, ObjectId}, }; -use tame_oauth::gcp::{ServiceAccountAccess, ServiceAccountInfo, TokenOrRequest}; +use tame_oauth::gcp::ServiceAccountInfo; use tikv_util::{ - stream::{error_stream, AsyncReadAsSyncStreamOfBytes, RetryError}, + stream::{error_stream, AsyncReadAsSyncStreamOfBytes}, time::Instant, }; -use crate::utils::retry; +use crate::{ + client::{status_code_error, GcpClient, RequestError}, + utils::retry, +}; const GOOGLE_APIS: &str = "https://www.googleapis.com"; const HARDCODED_ENDPOINTS_SUFFIX: &[&str] = &["upload/storage/v1/", "storage/v1/"]; @@ -107,8 +109,7 @@ impl BlobConfig for Config { #[derive(Clone)] pub struct GcsStorage { config: Config, - svc_access: Option>, - client: Client, Body>, + client: GcpClient, } trait ResultExt { @@ -133,80 +134,6 @@ impl ResultExt for Result { } } -#[derive(Debug)] -enum RequestError { - Hyper(hyper::Error, String), - OAuth(tame_oauth::Error, String), - Gcs(tame_gcs::Error), - InvalidEndpoint(http::uri::InvalidUri), -} - -impl From for RequestError { - fn from(err: http::uri::InvalidUri) -> Self { - Self::InvalidEndpoint(err) - } -} - -fn status_code_error(code: StatusCode, msg: String) -> RequestError { - RequestError::OAuth(tame_oauth::Error::HttpStatus(code), msg) -} - -impl From for io::Error { - fn from(err: RequestError) -> Self { - match err { - RequestError::Hyper(e, msg) => { - Self::new(io::ErrorKind::InvalidInput, format!("HTTP {}: {}", msg, e)) - } - RequestError::OAuth(tame_oauth::Error::Io(e), _) => e, - RequestError::OAuth(tame_oauth::Error::HttpStatus(sc), msg) => { - let fmt = format!("GCS OAuth: {}: {}", msg, sc); - match sc.as_u16() { - 401 | 403 => Self::new(io::ErrorKind::PermissionDenied, fmt), - 404 => Self::new(io::ErrorKind::NotFound, fmt), - _ if sc.is_server_error() => Self::new(io::ErrorKind::Interrupted, fmt), - _ => Self::new(io::ErrorKind::InvalidInput, fmt), - } - } - RequestError::OAuth(tame_oauth::Error::AuthError(e), msg) => Self::new( - io::ErrorKind::PermissionDenied, - format!("authorization failed: {}: {}", msg, e), - ), - RequestError::OAuth(e, msg) => Self::new( - io::ErrorKind::InvalidInput, - format!("oauth failed: {}: {}", msg, e), - ), - RequestError::Gcs(e) => Self::new( - io::ErrorKind::InvalidInput, - format!("invalid GCS request: {}", e), - ), - RequestError::InvalidEndpoint(e) => Self::new( - io::ErrorKind::InvalidInput, - format!("invalid GCS endpoint URI: {}", e), - ), - } - } -} - -impl RetryError for RequestError { - fn is_retryable(&self) -> bool { - match self { - // FIXME: Inspect the error source? - Self::Hyper(e, _) => { - e.is_closed() - || e.is_connect() - || e.is_incomplete_message() - || e.is_body_write_aborted() - } - // See https://cloud.google.com/storage/docs/exponential-backoff. - Self::OAuth(tame_oauth::Error::HttpStatus(StatusCode::TOO_MANY_REQUESTS), _) => true, - Self::OAuth(tame_oauth::Error::HttpStatus(StatusCode::REQUEST_TIMEOUT), _) => true, - Self::OAuth(tame_oauth::Error::HttpStatus(status), _) => status.is_server_error(), - // Consider everything else not retryable. - _ => false, - } - } -} - impl GcsStorage { pub fn from_input(input: InputConfig) -> io::Result { Self::new(Config::from_input(input)?) @@ -214,21 +141,8 @@ impl GcsStorage { /// Create a new GCS storage for the given config. pub fn new(config: Config) -> io::Result { - let svc_access = if let Some(si) = &config.svc_info { - Some( - ServiceAccountAccess::new(si.clone()) - .or_invalid_input("invalid credentials_blob")?, - ) - } else { - None - }; - - let client = Client::builder().build(HttpsConnector::new()); - Ok(GcsStorage { - config, - svc_access: svc_access.map(Arc::new), - client, - }) + let client = GcpClient::with_svc_info(config.svc_info.clone())?; + Ok(GcsStorage { config, client }) } fn maybe_prefix_key(&self, key: &str) -> String { @@ -238,52 +152,6 @@ impl GcsStorage { key.to_owned() } - async fn set_auth( - &self, - req: &mut Request, - scope: tame_gcs::Scopes, - svc_access: Arc, - ) -> Result<(), RequestError> { - let token_or_request = svc_access - .get_token(&[scope]) - .map_err(|e| RequestError::OAuth(e, "get_token".to_string()))?; - let token = match token_or_request { - TokenOrRequest::Token(token) => token, - TokenOrRequest::Request { - request, - scope_hash, - .. - } => { - let res = self - .client - .request(request.map(From::from)) - .await - .map_err(|e| RequestError::Hyper(e, "set auth request".to_owned()))?; - if !res.status().is_success() { - return Err(status_code_error( - res.status(), - "set auth request".to_string(), - )); - } - let (parts, body) = res.into_parts(); - let body = hyper::body::to_bytes(body) - .await - .map_err(|e| RequestError::Hyper(e, "set auth body".to_owned()))?; - svc_access - .parse_token_response(scope_hash, Response::from_parts(parts, body)) - .map_err(|e| RequestError::OAuth(e, "set auth parse token".to_string()))? - } - }; - req.headers_mut().insert( - http::header::AUTHORIZATION, - token - .try_into() - .map_err(|e| RequestError::OAuth(e, "set auth add auth header".to_string()))?, - ); - - Ok(()) - } - async fn make_request( &self, mut req: Request, @@ -299,19 +167,7 @@ impl GcsStorage { } } - if let Some(svc_access) = &self.svc_access { - self.set_auth(&mut req, scope, svc_access.clone()).await?; - } - let uri = req.uri().to_string(); - let res = self - .client - .request(req) - .await - .map_err(|e| RequestError::Hyper(e, uri.clone()))?; - if !res.status().is_success() { - return Err(status_code_error(res.status(), uri)); - } - Ok(res) + self.client.make_request(req, scope).await } fn error_to_async_read(kind: io::ErrorKind, e: E) -> cloud::blob::BlobStream<'static> diff --git a/components/cloud/gcp/src/kms.rs b/components/cloud/gcp/src/kms.rs new file mode 100644 index 00000000000..ec1c689adcd --- /dev/null +++ b/components/cloud/gcp/src/kms.rs @@ -0,0 +1,412 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{fmt, result::Result as StdResult}; + +use async_trait::async_trait; +use cloud::{ + error::{Error as CloudError, KmsError, Result}, + kms::{Config, CryptographyType, DataKeyPair, EncryptedKey, KmsProvider, PlainKey}, + metrics, KeyId, +}; +use futures_util::stream::StreamExt; +use http::Method; +use hyper::Body; +use lazy_static::lazy_static; +use regex::Regex; +use serde::{Deserialize, Deserializer, Serialize}; +use tame_gcs::error::HttpStatusError; +use tikv_util::{box_err, stream::RetryError, time::Instant}; + +use crate::{ + client::{GcpClient, RequestError}, + STORAGE_VENDOR_NAME_GCP, +}; + +// generated random encryption data key length. +const DEFAULT_DATAKEY_SIZE: usize = 32; +// google kms endpoint. +const GCP_KMS_ENDPOINT: &str = "https://cloudkms.googleapis.com/v1/"; + +// following are related kms api method names: +const METHOD_ENCRYPT: &str = "encrypt"; +const METHOD_DECRYPT: &str = "decrypt"; +const METHOD_GEN_RANDOM_BYTES: &str = "generateRandomBytes"; + +/// Protection level of the generated random key, always using HSM(Hardware +/// Security Module). +const RANDOMIZE_PROTECTION_LEVEL: &str = "HSM"; + +/// The encryption key_id pattern of gcp ksm: +/// projects/{project_name}/locations/{location}/keyRings/{key_ring}/ +/// cryptoKeys/{key} +const KEY_ID_PATTERN: &str = + r"^projects/([^/]+)/locations/([^/]+)/keyRings/([^/]+)/cryptoKeys/([^/]+)/?$"; + +lazy_static! { + //The encryption key_id pattern regexp. + static ref KEY_ID_REGEX: Regex = Regex::new(KEY_ID_PATTERN).unwrap(); +} + +pub struct GcpKms { + config: Config, + // the location prefix of key id, + // format: projects/{project_name}/locations/{location} + location: String, + client: GcpClient, +} + +impl GcpKms { + pub fn new(mut config: Config) -> Result { + assert!(config.gcp.is_some()); + if !KEY_ID_REGEX.is_match(&config.key_id) { + return Err(CloudError::KmsError(KmsError::WrongMasterKey(box_err!( + "invalid key: '{}'", + &config.key_id + )))); + } + // remove the end '/' + if config.key_id.ends_with('/') { + let mut key = config.key_id.into_inner(); + key.pop(); + config.key_id = KeyId::new(key)?; + } + let location = { + let key = config.key_id.as_str(); + key.match_indices('/') + .nth(3) + .map(|(index, _)| key[..index].to_owned()) + .unwrap() + }; + + let client = GcpClient::load_from( + config + .gcp + .as_ref() + .and_then(|c| c.credential_file_path.as_deref()), + )?; + Ok(Self { + config, + location, + client, + }) + } + + async fn do_json_request( + &self, + key_name: &str, + method: &'static str, + data: Q, + ) -> std::result::Result + where + Q: Serialize + Send + Sync, + R: for<'a> Deserialize<'a> + Send + Sync, + { + let begin = Instant::now_coarse(); + let url = self.format_call_url(key_name, method); + let req_builder = http::Request::builder().header( + http::header::CONTENT_TYPE, + http::header::HeaderValue::from_static("application/json"), + ); + + let body = serde_json::to_string(&data).unwrap(); + let req = req_builder + .method(Method::POST) + .uri(url.clone()) + .body(Body::from(body)) + .map_err(|e| { + RequestError::Gcs(tame_gcs::error::Error::Http(tame_gcs::error::HttpError(e))) + })?; + let resp = self + .client + .make_request(req, tame_gcs::Scopes::CloudPlatform) + .await?; + metrics::CLOUD_REQUEST_HISTOGRAM_VEC + .with_label_values(&["gcp", method]) + .observe(begin.saturating_elapsed_secs()); + if !resp.status().is_success() { + return Err(RequestError::Gcs(tame_gcs::Error::HttpStatus( + HttpStatusError(resp.status()), + ))); + } + let mut data: Vec<_> = vec![]; + let mut body = resp.into_body(); + while let Some(bytes) = body.next().await { + match bytes { + Ok(b) => data.extend(b), + Err(e) => { + return Err(RequestError::Hyper(e, "fetch encrypt resp failed".into())); + } + } + } + serde_json::from_slice(&data).map_err(|e| RequestError::Gcs(e.into())) + } + + fn format_call_url(&self, key: &str, method: &str) -> String { + format!("{}{}/:{}?alt=json", GCP_KMS_ENDPOINT, key, method) + } +} + +impl fmt::Debug for GcpKms { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GcpKmsClient") + .field("key", &self.config.key_id) + .finish() + } +} + +#[async_trait] +impl KmsProvider for GcpKms { + fn name(&self) -> &str { + STORAGE_VENDOR_NAME_GCP + } + + // On decrypt failure, the rule is to return WrongMasterKey error in case it is + // possible that a wrong master key has been used, or other error + // otherwise. + async fn decrypt_data_key(&self, data_key: &EncryptedKey) -> Result> { + let decrypt_req = DecryptRequest { + ciphertext: data_key.clone().into_inner(), + ciphertext_crc32c: crc32c::crc32c(data_key.as_raw()), + }; + let resp: DecryptResp = self + .do_json_request(self.config.key_id.as_str(), METHOD_DECRYPT, decrypt_req) + .await + .map_err(|e| KmsError::Other(e.into()))?; + check_crc32(&resp.plaintext, resp.plaintext_crc32c)?; + Ok(resp.plaintext) + } + + async fn generate_data_key(&self) -> Result { + let random_bytes_req = GenRandomBytesReq { + length_bytes: DEFAULT_DATAKEY_SIZE, + protection_level: RANDOMIZE_PROTECTION_LEVEL.into(), + }; + let rb_resp: GenRandomBytesResp = self + .do_json_request(&self.location, METHOD_GEN_RANDOM_BYTES, random_bytes_req) + .await + .map_err(|e| KmsError::Other(e.into()))?; + check_crc32(&rb_resp.data, rb_resp.data_crc32c)?; + + let encrypt_request = EncryptRequest { + plaintext: rb_resp.data.clone(), + plaintext_crc32c: crc32c::crc32c(&rb_resp.data), + }; + let resp: EncryptResp = self + .do_json_request(self.config.key_id.as_str(), METHOD_ENCRYPT, encrypt_request) + .await + .map_err(|e| KmsError::Other(e.into()))?; + check_crc32(&resp.ciphertext, resp.ciphertext_crc32c)?; + + to_data_key(resp, rb_resp.data) + } +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct EncryptRequest { + #[serde(with = "serde_base64_bytes")] + plaintext: Vec, + plaintext_crc32c: u32, +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct EncryptResp { + #[serde(with = "serde_base64_bytes")] + ciphertext: Vec, + #[serde(deserialize_with = "deseralize_u32_from_str")] + ciphertext_crc32c: u32, +} + +fn to_data_key(encrypt_resp: EncryptResp, raw_bytes: Vec) -> Result { + Ok(DataKeyPair { + encrypted: EncryptedKey::new(encrypt_resp.ciphertext)?, + plaintext: PlainKey::new(raw_bytes, CryptographyType::AesGcm256)?, + }) +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct DecryptRequest { + #[serde(with = "serde_base64_bytes")] + ciphertext: Vec, + ciphertext_crc32c: u32, +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct DecryptResp { + #[serde(with = "serde_base64_bytes")] + plaintext: Vec, + #[serde(deserialize_with = "deseralize_u32_from_str")] + plaintext_crc32c: u32, +} + +fn check_crc32(data: &[u8], expected: u32) -> StdResult<(), Crc32Error> { + let crc = crc32c::crc32c(data); + if crc != expected { + return Err(Crc32Error { expected, got: crc }); + } + Ok(()) +} + +#[derive(Debug)] +pub struct Crc32Error { + expected: u32, + got: u32, +} + +impl fmt::Display for Crc32Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "crc32c mismatch, expected: {}, got: {}", + self.expected, self.got + ) + } +} + +impl std::error::Error for Crc32Error {} + +impl RetryError for Crc32Error { + fn is_retryable(&self) -> bool { + true + } +} + +impl From for CloudError { + fn from(e: Crc32Error) -> Self { + Self::KmsError(KmsError::Other(e.into())) + } +} + +mod serde_base64_bytes { + use serde::{Deserialize, Deserializer, Serializer}; + + // deserialize bytes from base64 encoded string. + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + D::Error: serde::de::Error, + { + let v = String::deserialize(deserializer)?; + base64::decode(v) + .map_err(|e| serde::de::Error::custom(format!("base64 decode failed: {:?}", e,))) + } + + // serialize bytes with base64 encoding. + pub fn serialize(data: &Vec, serializer: S) -> Result + where + S: Serializer, + { + let str_data = base64::encode(data); + serializer.serialize_str(&str_data) + } +} + +fn deseralize_u32_from_str<'de, D>(deserializer: D) -> StdResult +where + D: Deserializer<'de>, + D::Error: serde::de::Error, +{ + let v = String::deserialize(deserializer)?; + v.parse().map_err(|e| { + serde::de::Error::custom(format!("case crc32 string '{}' as u32 failed: {:?}", &v, e,)) + }) +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct GenRandomBytesReq { + length_bytes: usize, + // we always use "HSM" currently, maybe export it as + // a config in the future. + protection_level: String, +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct GenRandomBytesResp { + #[serde(with = "serde_base64_bytes")] + data: Vec, + #[serde(deserialize_with = "deseralize_u32_from_str")] + data_crc32c: u32, +} + +#[cfg(test)] +mod tests { + use cloud::kms::{Location, SubConfigGcp}; + + use super::*; + + #[test] + fn test_new_gcp_kms() { + for bad_key in [ + "abc", + "projects/test-project/locations/us-west-2/keyRings/tikv-gpc-kms-test/cryptoKeys/gl-dev-test//", + // key with version + "projects/test-project/locations/us-west-2/keyRings/tikv-gpc-kms-test/cryptoKeys/gl-dev-test/cryptoKeyVersions/1", + ] { + let cfg = Config { + key_id: KeyId::new(bad_key.into()).unwrap(), + location: Location { + region: "".into(), + endpoint: "".into(), + }, + vendor: "gcp".into(), + azure: None, + gcp: Some(SubConfigGcp { + credential_file_path: None, + }), + }; + + _ = GcpKms::new(cfg).unwrap_err(); + } + + for key in [ + "projects/test-project/locations/us-east-1/keyRings/tikv-gpc-kms-test/cryptoKeys/test", + "projects/test-project/locations/us-east-1/keyRings/tikv-gpc-kms-test/cryptoKeys/test/", + ] { + let cfg = Config { + key_id: KeyId::new(key.into()).unwrap(), + location: Location { + region: "".into(), + endpoint: "".into(), + }, + vendor: "gcp".into(), + azure: None, + gcp: Some(SubConfigGcp { + credential_file_path: None, + }), + }; + + let res = GcpKms::new(cfg).unwrap(); + assert_eq!(&res.location, "projects/test-project/locations/us-east-1"); + assert_eq!( + res.config.key_id.as_str(), + "projects/test-project/locations/us-east-1/keyRings/tikv-gpc-kms-test/cryptoKeys/test" + ); + } + } + + #[test] + fn test_serde_base64() { + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] + struct S { + #[serde(with = "serde_base64_bytes")] + data: Vec, + } + + let st = S { + data: "abcdedfa\r中文😅".into(), + }; + let str_data = serde_json::to_string(&st).unwrap(); + assert_eq!( + &str_data, + &format!("{{\"data\":\"{}\"}}", base64::encode(&st.data)) + ); + + let restored: S = serde_json::from_str(&str_data).unwrap(); + assert_eq!(restored, st); + } +} diff --git a/components/cloud/gcp/src/lib.rs b/components/cloud/gcp/src/lib.rs index 9ad97793988..4d81dd7189e 100644 --- a/components/cloud/gcp/src/lib.rs +++ b/components/cloud/gcp/src/lib.rs @@ -7,6 +7,12 @@ extern crate slog_global; mod gcs; pub use gcs::{Config, GcsStorage}; +mod client; +mod kms; +pub use kms::GcpKms; + +pub const STORAGE_VENDOR_NAME_GCP: &str = "gcp"; + pub mod utils { use std::future::Future; diff --git a/components/cloud/src/error.rs b/components/cloud/src/error.rs index c25c16fe62f..8fd1dda3e8e 100644 --- a/components/cloud/src/error.rs +++ b/components/cloud/src/error.rs @@ -2,7 +2,7 @@ use std::{ error, - fmt::{Debug, Display}, + fmt::{self, Debug, Display}, io::{Error as IoError, ErrorKind}, result, }; @@ -46,7 +46,13 @@ pub enum KmsError { #[error("Empty key {0}")] EmptyKey(String), #[error("Kms error {0}")] - Other(Box), + Other(OtherError), +} + +impl From for Error { + fn from(e: KmsError) -> Self { + Error::KmsError(e) + } } impl From for IoError { @@ -105,7 +111,37 @@ impl RetryError for KmsError { match self { KmsError::WrongMasterKey(_) => false, KmsError::EmptyKey(_) => false, - KmsError::Other(_) => true, + KmsError::Other(e) => e.retryable, + } + } +} + +#[derive(Debug)] +pub struct OtherError { + retryable: bool, + err: Box, +} + +impl OtherError { + pub fn from_box(err: Box) -> Self { + Self { + retryable: false, + err, + } + } +} + +impl From for OtherError { + fn from(e: E) -> Self { + Self { + retryable: e.is_retryable(), + err: Box::new(e), } } } + +impl fmt::Display for OtherError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.err) + } +} diff --git a/components/cloud/src/kms.rs b/components/cloud/src/kms.rs index 31a09b7cad7..c5d41c96ea0 100644 --- a/components/cloud/src/kms.rs +++ b/components/cloud/src/kms.rs @@ -5,7 +5,7 @@ use derive_more::Deref; use kvproto::encryptionpb::MasterKeyKms; use tikv_util::box_err; -use crate::error::{Error, KmsError, Result}; +use crate::error::{Error, KmsError, OtherError, Result}; #[derive(Debug, Clone)] pub struct Location { @@ -35,12 +35,19 @@ pub struct SubConfigAzure { pub client_secret: Option, } +/// Configurations for GCP KMS. +#[derive(Debug, Default, Clone)] +pub struct SubConfigGcp { + pub credential_file_path: Option, +} + #[derive(Debug, Clone)] pub struct Config { pub key_id: KeyId, pub location: Location, - pub azure: Option, pub vendor: String, + pub azure: Option, + pub gcp: Option, } impl Config { @@ -51,8 +58,9 @@ impl Config { region: mk.region, endpoint: mk.endpoint, }, - azure: None, vendor: mk.vendor, + azure: None, + gcp: None, }) } @@ -61,6 +69,12 @@ impl Config { cfg.azure = Some(azure_kms_cfg); Ok(cfg) } + + pub fn from_gcp_kms_config(mk: MasterKeyKms, gcp_kms_cfg: SubConfigGcp) -> Result { + let mut cfg = Config::from_proto(mk)?; + cfg.gcp = Some(gcp_kms_cfg); + Ok(cfg) + } } #[derive(PartialEq, Debug, Clone, Deref)] @@ -84,6 +98,12 @@ impl KeyId { } } +impl std::fmt::Display for KeyId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + // EncryptedKey is a newtype used to mark data as an encrypted key // It requires the vec to be non-empty #[derive(PartialEq, Clone, Debug, Deref)] @@ -103,6 +123,10 @@ impl EncryptedKey { pub fn into_inner(self) -> Vec { self.0 } + + pub fn as_raw(&self) -> &[u8] { + &self.0 + } } #[repr(u8)] @@ -134,11 +158,13 @@ impl PlainKey { pub fn new(key: Vec, t: CryptographyType) -> Result { let limitation = t.target_key_size(); if limitation > 0 && key.len() != limitation { - Err(Error::KmsError(KmsError::Other(box_err!( - "encryption method and key length mismatch, expect {} get + Err(Error::KmsError(KmsError::Other(OtherError::from_box( + box_err!( + "encryption method and key length mismatch, expect {} get {}", - limitation, - key.len() + limitation, + key.len() + ), )))) } else { Ok(Self { key, tag: t }) diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index 1a7b64eb7be..feb4ce23c8c 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -7,7 +7,7 @@ publish = false [features] default = ["cloud-aws", "cloud-gcp", "cloud-azure"] cloud-aws = ["aws"] -cloud-gcp = [] +cloud-gcp = ["gcp"] cloud-azure = ["azure"] sm4 = ["encryption/sm4"] @@ -20,6 +20,7 @@ derive_more = "0.99.3" encryption = { workspace = true } error_code = { workspace = true } file_system = { workspace = true } +gcp = { workspace = true, optional = true } kvproto = { workspace = true } openssl = { workspace = true } protobuf = { version = "2.8", features = ["bytes"] } diff --git a/components/encryption/export/examples/ecli.rs b/components/encryption/export/examples/ecli.rs index 9f53a92df51..e641f7d36e3 100644 --- a/components/encryption/export/examples/ecli.rs +++ b/components/encryption/export/examples/ecli.rs @@ -4,10 +4,12 @@ use std::io::{Read, Write}; use azure::STORAGE_VENDOR_NAME_AZURE; pub use cloud::kms::Config as CloudConfig; +use encryption::GcpConfig; #[cfg(feature = "cloud-aws")] use encryption_export::{create_cloud_backend, KmsConfig}; use encryption_export::{AzureConfig, Backend, Error, Result}; use file_system::{File, OpenOptions}; +use gcp::STORAGE_VENDOR_NAME_GCP; use ini::ini::Ini; use kvproto::encryptionpb::EncryptedContent; use protobuf::Message; @@ -48,6 +50,7 @@ pub struct Opt { enum Command { Aws(SubCommandAws), Azure(SubCommandAzure), + Gcp(SubCommandGcp), } #[derive(StructOpt)] @@ -86,6 +89,15 @@ struct SubCommandAzure { secret: Option, } +#[derive(StructOpt)] +#[structopt(rename_all = "kebab-case")] +/// KMS backend. +struct SubCommandGcp { + /// KMS key id of backend. + #[structopt(long)] + key_id: String, +} + fn create_aws_backend( cmd: &SubCommandAws, credential_file: Option<&String>, @@ -133,6 +145,20 @@ fn create_azure_backend( create_cloud_backend(&config) } +fn create_gcp_backend( + cmd: &SubCommandGcp, + credential_file: Option<&String>, +) -> Result> { + let mut config = KmsConfig::default(); + config.gcp = Some(GcpConfig { + credential_file_path: credential_file + .and_then(|f| if f.is_empty() { None } else { Some(f.clone()) }), + }); + config.key_id = cmd.key_id.to_owned(); + config.vendor = STORAGE_VENDOR_NAME_GCP.to_owned(); + create_cloud_backend(&config) +} + #[allow(irrefutable_let_patterns)] fn process() -> Result<()> { let opt: Opt = Opt::from_args(); @@ -142,9 +168,10 @@ fn process() -> Result<()> { file.read_to_end(&mut content)?; let credential_file = opt.credential_file.as_ref(); - let backend = match opt.command { - Command::Aws(ref cmd) => create_aws_backend(cmd, credential_file)?, - Command::Azure(ref cmd) => create_azure_backend(cmd, credential_file)?, + let backend = match &opt.command { + Command::Aws(cmd) => create_aws_backend(cmd, credential_file)?, + Command::Azure(cmd) => create_azure_backend(cmd, credential_file)?, + Command::Gcp(cmd) => create_gcp_backend(cmd, credential_file)?, }; let output = match opt.operation { diff --git a/components/encryption/export/src/lib.rs b/components/encryption/export/src/lib.rs index a36406d44ea..6f056bb618e 100644 --- a/components/encryption/export/src/lib.rs +++ b/components/encryption/export/src/lib.rs @@ -14,6 +14,8 @@ pub use encryption::{ KmsConfig, MasterKeyConfig, Result, }; use encryption::{cloud_convert_error, FileBackend, PlaintextBackend}; +#[cfg(feature = "cloud-gcp")] +use gcp::{GcpKms, STORAGE_VENDOR_NAME_GCP}; use tikv_util::{box_err, error, info}; pub fn data_key_manager_from_config( @@ -68,7 +70,16 @@ pub fn create_cloud_backend(config: &KmsConfig) -> Result> { let keyvault_provider = Box::new( AzureKms::new(conf).map_err(cloud_convert_error("new Azure KMS".to_owned()))?, ); - Ok(Box::new(KmsBackend::new(keyvault_provider)?) as Box) + Ok(Box::new(KmsBackend::new(keyvault_provider)?)) + } + #[cfg(feature = "cloud-gcp")] + STORAGE_VENDOR_NAME_GCP => { + let (mk, gcp_cfg) = config.clone().convert_to_gcp_config(); + let conf = CloudConfig::from_gcp_kms_config(mk, gcp_cfg) + .map_err(cloud_convert_error("gcp from proto".to_owned()))?; + let kms_provider = + GcpKms::new(conf).map_err(cloud_convert_error("new GCP KMS".to_owned()))?; + Ok(Box::new(KmsBackend::new(Box::new(kms_provider))?)) } provider => Err(Error::Other(box_err!("provider not found {}", provider))), } @@ -105,6 +116,7 @@ mod tests { client_secret: Some("client_secret".to_owned()), ..AzureConfig::default() }), + gcp: None, }; let invalid_config = KmsConfig { azure: None, diff --git a/components/encryption/src/config.rs b/components/encryption/src/config.rs index c66d494ebef..4c5805248e8 100644 --- a/components/encryption/src/config.rs +++ b/components/encryption/src/config.rs @@ -1,6 +1,6 @@ // Copyright 2020 TiKV Project Authors. Licensed under Apache-2.0. -use cloud::kms::SubConfigAzure; +use cloud::kms::{SubConfigAzure, SubConfigGcp}; use kvproto::encryptionpb::{EncryptionMethod, MasterKeyKms}; use online_config::OnlineConfig; use serde_derive::{Deserialize, Serialize}; @@ -84,6 +84,17 @@ impl std::fmt::Debug for AzureConfig { } } +// TODO: the representation of GCP KMS to users needs to be discussed. +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct GcpConfig { + /// User credential file path. Currently, only service account and + /// authorized user are supported. If set to None, will try to build the + /// `TokenProvider` following the "Google Default Credentials" flow. + pub credential_file_path: Option, +} + #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, OnlineConfig)] #[serde(default)] #[serde(rename_all = "kebab-case")] @@ -95,6 +106,9 @@ pub struct KmsConfig { // followings are used for Azure Kms #[online_config(skip)] pub azure: Option, + // Gcp Kms configuration. + #[online_config(skip)] + pub gcp: Option, } impl KmsConfig { @@ -132,6 +146,20 @@ impl KmsConfig { }; (mk, azure_kms_cfg) } + + pub fn convert_to_gcp_config(self) -> (MasterKeyKms, SubConfigGcp) { + let gcp_cfg = SubConfigGcp { + credential_file_path: self.gcp.unwrap().credential_file_path, + }; + let mk = MasterKeyKms { + key_id: self.key_id, + region: self.region, + endpoint: self.endpoint, + vendor: self.vendor, + ..MasterKeyKms::default() + }; + (mk, gcp_cfg) + } } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] @@ -239,6 +267,7 @@ mod tests { endpoint: "endpoint".to_owned(), vendor: "".to_owned(), azure: None, + gcp: None, }, }, previous_master_key: MasterKeyConfig::Plaintext, @@ -260,10 +289,28 @@ mod tests { hsm_url: "hsm_url".to_owned(), ..AzureConfig::default() }), + gcp: None, }, }, ..kms_config.clone() }; + + let kms_config_gcp = EncryptionConfig { + master_key: MasterKeyConfig::Kms { + config: KmsConfig { + key_id: "key_id".to_owned(), + region: "region".to_owned(), + endpoint: "endpoint".to_owned(), + vendor: "gcp".to_owned(), + azure: None, + gcp: Some(GcpConfig { + credential_file_path: Some("/tmp/credential.json".into()), + }), + }, + }, + ..kms_config.clone() + }; + // KMS with default(aws). let kms_str = r#" data-encryption-method = "aes128-ctr" @@ -302,7 +349,28 @@ mod tests { [previous-master-key] type = 'plaintext' "#; - for (kms_cfg, kms_str) in [(kms_config, kms_str), (kms_config_azure, kms_str_azure)] { + // KMS with gcp + let kms_str_gcp = r#" + data-encryption-method = 'aes128-ctr' + data-key-rotation-period = '14d' + enable-file-dictionary-log = true + file-dictionary-rewrite-threshold = 1000000 + + [master-key] + type = 'kms' + key-id = 'key_id' + region = 'region' + endpoint = 'endpoint' + vendor = 'gcp' + + [master-key.gcp] + credential-file-path = '/tmp/credential.json' + "#; + for (kms_cfg, kms_str) in [ + (kms_config, kms_str), + (kms_config_azure, kms_str_azure), + (kms_config_gcp, kms_str_gcp), + ] { let cfg: EncryptionConfig = toml::from_str(kms_str).unwrap(); assert_eq!( cfg, From 8130cb35731d894fe0b5bf7dcf9496b9fcd57bc8 Mon Sep 17 00:00:00 2001 From: lijie Date: Thu, 11 Jan 2024 19:51:45 +0800 Subject: [PATCH 1104/1149] chore: bump version to v8.0.0-alpha (#16333) Signed-off-by: lijie Co-authored-by: Purelind --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 112ca7d041f..051c8b3a1bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6276,7 +6276,7 @@ dependencies = [ [[package]] name = "tikv" -version = "7.6.0-alpha" +version = "8.0.0-alpha" dependencies = [ "anyhow", "api_version", diff --git a/Cargo.toml b/Cargo.toml index fe7382e3c2b..77cba294c60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tikv" -version = "7.6.0-alpha" +version = "8.0.0-alpha" authors = ["The TiKV Authors"] description = "A distributed transactional key-value database powered by Rust and Raft" license = "Apache-2.0" From d5cb209ebf675fb734ec47240c64bc904385aa08 Mon Sep 17 00:00:00 2001 From: glorv Date: Fri, 12 Jan 2024 10:54:24 +0800 Subject: [PATCH 1105/1149] test: fix unstable test test_limit_concurrency (#16361) ref tikv/tikv#15990 The original test use select! to check t1 is finished before t2, but this is unstable when the select! is called when both t1 and t2 is finished. From the rust doc we can see " If multiple futures are ready, one will be pseudo-randomly selected at runtime." which means if both future is ready, which is polled first is not determined. So this PR uses a atomic value to determine t1's complete to make the result stable. Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../interceptors/concurrency_limiter.rs | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/coprocessor/interceptors/concurrency_limiter.rs b/src/coprocessor/interceptors/concurrency_limiter.rs index 590dd5d7180..196d398bf71 100644 --- a/src/coprocessor/interceptors/concurrency_limiter.rs +++ b/src/coprocessor/interceptors/concurrency_limiter.rs @@ -126,7 +126,13 @@ where #[cfg(test)] mod tests { - use std::{sync::Arc, thread}; + use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread, + }; use futures::future::FutureExt; use tokio::{ @@ -162,11 +168,15 @@ mod tests { // than t1, it starts with t1 smp.add_permits(1); let smp2 = smp.clone(); - let mut t1 = - tokio::spawn( - async move { limit_concurrency(work(8), &smp2, Duration::default()).await }, - ) - .fuse(); + + let t1_finished = Arc::new(AtomicBool::new(false)); + + let t1_finished_cloned = t1_finished.clone(); + let mut t1 = tokio::spawn(async move { + limit_concurrency(work(8), &smp2, Duration::default()).await; + t1_finished_cloned.store(true, Ordering::Release); + }) + .fuse(); sleep(Duration::from_millis(100)).await; let smp2 = smp.clone(); @@ -178,14 +188,11 @@ mod tests { let deadline = sleep(Duration::from_millis(1500)).fuse(); futures::pin_mut!(deadline); - let mut t1_finished = false; loop { futures_util::select! { - _ = t1 => { - t1_finished = true; - }, + _ = t1 => {}, _ = t2 => { - if t1_finished { + if t1_finished.load(Ordering::Acquire) { return; } else { panic!("t2 should finish later than t1"); From 115bff47805c87baa2750f5724ff5b1a2a1b18ca Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 12 Jan 2024 11:10:55 +0800 Subject: [PATCH 1106/1149] *: deny vulnerable crates (#16360) ref tikv/tikv#16328 * Fix RUSTSEC-2023-0044 and RUSTSEC-2023-0072 by upgrading rust-openssl. * Fix RUSTSEC-2023-0034 by upgrade h2. * Fix RUSTSEC-2023-0001 by upgrading tokio. * Fix RUSTSEC-2020-0159 by upgrading chrono. * Fix RUSTSEC-2023-0018 by removing `remove_dir_all` in non-test code. * Mitigate RUSTSEC-2020-0071 by banning unsound time 0.1 APIs. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 286 ++++++++++++++++------ Cargo.toml | 8 +- clippy.toml | 5 + cmd/tikv-ctl/Cargo.toml | 3 +- cmd/tikv-server/Cargo.toml | 2 +- components/backup-stream/Cargo.toml | 3 +- components/backup-stream/src/router.rs | 4 +- components/backup-stream/src/utils.rs | 12 +- components/backup-stream/tests/suite.rs | 6 +- components/engine_rocks/Cargo.toml | 2 +- components/online_config/Cargo.toml | 2 +- components/raft_log_engine/Cargo.toml | 1 - components/raftstore-v2/Cargo.toml | 2 +- components/raftstore/Cargo.toml | 4 +- components/server/Cargo.toml | 2 +- components/snap_recovery/Cargo.toml | 2 +- components/test_util/Cargo.toml | 3 +- components/test_util/src/logging.rs | 6 +- components/tidb_query_common/Cargo.toml | 2 +- components/tidb_query_datatype/Cargo.toml | 2 +- components/tidb_query_expr/Cargo.toml | 4 +- components/tikv_util/Cargo.toml | 4 +- deny.toml | 12 +- tests/Cargo.toml | 2 +- 24 files changed, 261 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 051c8b3a1bb..320fc8957ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,7 +97,7 @@ checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" name = "api_version" version = "0.1.0" dependencies = [ - "bitflags", + "bitflags 1.3.2", "codec", "engine_traits", "kvproto", @@ -496,7 +496,7 @@ dependencies = [ "futures-io", "grpcio", "hex 0.4.2", - "indexmap", + "indexmap 1.6.2", "kvproto", "lazy_static", "log_wrappers", @@ -515,7 +515,6 @@ dependencies = [ "security", "slog", "slog-global", - "tempdir", "tempfile", "test_pd", "test_pd_client", @@ -582,7 +581,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5dbbe5cc2887bc0bc8506b26dcd4c41d1b54bdf4ff1de8e12d404deee60e4ec" dependencies = [ "bcc-sys", - "bitflags", + "bitflags 1.3.2", "byteorder", "libc 0.2.151", "regex", @@ -601,7 +600,7 @@ version = "0.59.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr", "clang-sys", "clap 2.33.0", @@ -624,7 +623,7 @@ version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr", "clang-sys", "lazy_static", @@ -657,6 +656,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + [[package]] name = "block-buffer" version = "0.9.0" @@ -818,7 +823,7 @@ name = "cdc" version = "0.0.1" dependencies = [ "api_version", - "bitflags", + "bitflags 1.3.2", "causal_ts", "collections", "concurrency_manager", @@ -882,14 +887,17 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.11" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" +checksum = "6127248204b9aba09a362f6c930ef6a78f2c1b2215f8a7b398c06e1083f17af0" dependencies = [ + "js-sys", "num-integer", "num-traits", "serde", - "time 0.1.42", + "time 0.1.43", + "wasm-bindgen", + "winapi 0.3.9", ] [[package]] @@ -921,7 +929,7 @@ checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim 0.8.0", "textwrap 0.11.0", "unicode-width", @@ -935,9 +943,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8c93436c21e4698bacadf42917db28b23017027a4deccb35dbe47a7e7840123" dependencies = [ "atty", - "bitflags", + "bitflags 1.3.2", "clap_derive", - "indexmap", + "indexmap 1.6.2", "lazy_static", "os_str_bytes", "strsim 0.10.0", @@ -1553,7 +1561,7 @@ dependencies = [ "tempfile", "tikv_alloc", "tikv_util", - "time 0.1.42", + "time 0.1.43", "toml", "tracker", "txn_types", @@ -1678,6 +1686,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.2.8" @@ -1689,6 +1703,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc 0.2.151", + "windows-sys 0.52.0", +] + [[package]] name = "errno-dragonfly" version = "0.1.2" @@ -1794,6 +1818,12 @@ dependencies = [ "instant", ] +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + [[package]] name = "file_system" version = "0.1.0" @@ -1942,7 +1972,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ab7d1bd1bd33cc98b0889831b72da23c0aa4df9cec7e0702f46ecea04b35db6" dependencies = [ - "bitflags", + "bitflags 1.3.2", "fsevent-sys", ] @@ -1967,7 +1997,7 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" dependencies = [ - "bitflags", + "bitflags 1.3.2", "fuchsia-zircon-sys", ] @@ -2038,7 +2068,7 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" dependencies = [ - "fastrand", + "fastrand 1.9.0", "futures-core", "futures-io", "memchr", @@ -2317,9 +2347,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.15" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" dependencies = [ "bytes", "fnv", @@ -2327,7 +2357,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 2.0.1", "slab", "tokio", "tokio-util", @@ -2581,6 +2611,16 @@ dependencies = [ "hashbrown 0.9.1", ] +[[package]] +name = "indexmap" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", +] + [[package]] name = "indextree" version = "4.6.0" @@ -2601,7 +2641,7 @@ checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ "ahash 0.7.4", "atty", - "indexmap", + "indexmap 1.6.2", "itoa 1.0.1", "lazy_static", "log", @@ -2617,7 +2657,7 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4816c66d2c8ae673df83366c18341538f234a26d65a9ecea5c348b453ac1d02f" dependencies = [ - "bitflags", + "bitflags 1.3.2", "inotify-sys", "libc 0.2.151", ] @@ -2691,7 +2731,7 @@ checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" dependencies = [ "hermit-abi 0.2.6", "io-lifetimes", - "rustix", + "rustix 0.36.7", "windows-sys 0.42.0", ] @@ -2752,7 +2792,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d63b6407b66fc81fc539dccf3ddecb669f393c5101b6a2be3976c95099a06e8" dependencies = [ - "indexmap", + "indexmap 1.6.2", ] [[package]] @@ -2900,6 +2940,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + [[package]] name = "lock_api" version = "0.4.6" @@ -3195,7 +3241,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f17df307904acd05aa8e32e97bb20f2a0df1728bbc2d771ae8f9a90463441e9" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if 1.0.0", "libc 0.2.151", "memoffset 0.6.4", @@ -3207,7 +3253,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if 1.0.0", "libc 0.2.151", "memoffset 0.7.1", @@ -3264,7 +3310,7 @@ version = "4.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae03c8c853dba7bfd23e571ff0cff7bc9dceb40a4cd684cd1681824183f45257" dependencies = [ - "bitflags", + "bitflags 1.3.2", "filetime", "fsevent", "fsevent-sys", @@ -3467,11 +3513,11 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openssl" -version = "0.10.50" +version = "0.10.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e30d8bc91859781f0a943411186324d580f2bbeb71b452fe91ae344806af3f1" +checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" dependencies = [ - "bitflags", + "bitflags 2.4.1", "cfg-if 1.0.0", "foreign-types", "libc 0.2.151", @@ -3508,9 +3554,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.85" +version = "0.9.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d3d193fb1488ad46ffe3aaabc912cc931d02ee8518fe2959aea8ef52718b0c0" +checksum = "db7e971c2c2bba161b2d2fdf37080177eff520b3bc044787c7f1f5f9e78d869b" dependencies = [ "cc", "libc 0.2.151", @@ -3674,7 +3720,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8f94885300e262ef461aa9fd1afbf7df3caf9e84e271a74925d1c6c8b24830f" dependencies = [ - "bitflags", + "bitflags 1.3.2", "byteorder", "libc 0.2.151", "mmap", @@ -3916,7 +3962,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0941606b9934e2d98a3677759a971756eb821f75764d0e0d26946d08e74d9104" dependencies = [ - "bitflags", + "bitflags 1.3.2", "byteorder", "hex 0.4.2", "lazy_static", @@ -3990,7 +4036,7 @@ version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c852d9625b912c3e50480cdc701f60f49890b5d7ad46198dd583600f15e7c6ec" dependencies = [ - "bitflags", + "bitflags 1.3.2", "grpcio-compiler", "protobuf", "protobuf-codegen", @@ -4144,7 +4190,6 @@ dependencies = [ "slog-global", "tempfile", "tikv_util", - "time 0.1.42", "tracker", ] @@ -4153,7 +4198,7 @@ name = "raftstore" version = "0.0.1" dependencies = [ "batch-system", - "bitflags", + "bitflags 1.3.2", "byteorder", "bytes", "causal_ts", @@ -4215,7 +4260,7 @@ dependencies = [ "tidb_query_datatype", "tikv_alloc", "tikv_util", - "time 0.1.42", + "time 0.1.43", "tokio", "tracker", "txn_types", @@ -4265,7 +4310,7 @@ dependencies = [ "test_util", "thiserror", "tikv_util", - "time 0.1.42", + "time 0.1.43", "tracker", "txn_types", "walkdir", @@ -4395,7 +4440,7 @@ version = "10.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "929f54e29691d4e6a9cc558479de70db7aa3d98cd6fe7ab86d7507aa2886b9d2" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -4433,17 +4478,20 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.1.56" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" +dependencies = [ + "bitflags 1.3.2", +] [[package]] name = "redox_syscall" -version = "0.2.11" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -4508,9 +4556,9 @@ dependencies = [ [[package]] name = "remove_dir_all" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" dependencies = [ "winapi 0.3.9", ] @@ -4667,7 +4715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f06953bb8b9e4307cb7ccc0d9d018e2ddd25a30d32831f631ce4fe8f17671f7" dependencies = [ "ahash 0.7.4", - "bitflags", + "bitflags 1.3.2", "instant", "num-traits", "rhai_codegen", @@ -4870,14 +4918,27 @@ version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" dependencies = [ - "bitflags", - "errno", + "bitflags 1.3.2", + "errno 0.2.8", "io-lifetimes", "libc 0.2.151", - "linux-raw-sys", + "linux-raw-sys 0.1.4", "windows-sys 0.42.0", ] +[[package]] +name = "rustix" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +dependencies = [ + "bitflags 2.4.1", + "errno 0.3.8", + "libc 0.2.151", + "linux-raw-sys 0.4.12", + "windows-sys 0.48.0", +] + [[package]] name = "rustversion" version = "1.0.4" @@ -4948,7 +5009,7 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3670b1d2fdf6084d192bc71ead7aabe6c06aa2ea3fbd9cc3ac111fa5c2b1bd84" dependencies = [ - "bitflags", + "bitflags 1.3.2", "core-foundation", "core-foundation-sys", "libc 0.2.151", @@ -5059,7 +5120,7 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" dependencies = [ - "indexmap", + "indexmap 1.6.2", "itoa 0.4.4", "ryu", "serde", @@ -5707,16 +5768,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.2.0" +version = "3.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" dependencies = [ "cfg-if 1.0.0", - "libc 0.2.151", - "rand 0.8.5", - "redox_syscall 0.2.11", - "remove_dir_all", - "winapi 0.3.9", + "fastrand 2.0.1", + "redox_syscall 0.3.5", + "rustix 0.38.3", + "windows-sys 0.48.0", ] [[package]] @@ -5971,6 +6031,7 @@ name = "test_util" version = "0.0.1" dependencies = [ "backtrace", + "chrono", "collections", "encryption_export", "fail", @@ -5983,7 +6044,7 @@ dependencies = [ "slog-global", "tempfile", "tikv_util", - "time 0.1.42", + "time 0.1.43", ] [[package]] @@ -6063,7 +6124,7 @@ dependencies = [ "tikv", "tikv_kv", "tikv_util", - "time 0.1.42", + "time 0.1.43", "tipb", "tipb_helper", "tokio", @@ -6162,7 +6223,7 @@ dependencies = [ "serde_json", "thiserror", "tikv_util", - "time 0.1.42", + "time 0.1.43", "yatp", ] @@ -6173,7 +6234,7 @@ dependencies = [ "api_version", "base64 0.13.0", "bitfield", - "bitflags", + "bitflags 1.3.2", "boolinator", "bstr", "chrono", @@ -6267,7 +6328,7 @@ dependencies = [ "tidb_query_common", "tidb_query_datatype", "tikv_util", - "time 0.1.42", + "time 0.1.43", "tipb", "tipb_helper", "twoway", @@ -6392,7 +6453,7 @@ dependencies = [ "tikv_alloc", "tikv_kv", "tikv_util", - "time 0.1.42", + "time 0.1.43", "tipb", "tokio", "tokio-openssl", @@ -6416,7 +6477,6 @@ dependencies = [ "backup", "cc", "cdc", - "chrono", "clap 2.33.0", "collections", "concurrency_manager", @@ -6456,7 +6516,7 @@ dependencies = [ "tikv", "tikv_alloc", "tikv_util", - "time 0.1.42", + "time 0.1.43", "tokio", "toml", "txn_types", @@ -6511,7 +6571,7 @@ dependencies = [ "server", "tikv", "tikv_util", - "time 0.1.42", + "time 0.1.43", "toml", "tracing-active-tree", "tracing-subscriber", @@ -6626,7 +6686,7 @@ dependencies = [ "tempfile", "thiserror", "tikv_alloc", - "time 0.1.42", + "time 0.1.43", "tokio", "tokio-executor", "tokio-timer", @@ -6639,12 +6699,11 @@ dependencies = [ [[package]] name = "time" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc 0.2.151", - "redox_syscall 0.1.56", "winapi 0.3.9", ] @@ -6709,14 +6768,13 @@ dependencies = [ [[package]] name = "tokio" -version = "1.21.2" +version = "1.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" +checksum = "8666f87015685834a42aa61a391303d3bee0b1442dd9cf93e3adf4cbaf8de75a" dependencies = [ "autocfg", "bytes", "libc 0.2.151", - "memchr", "mio 0.8.5", "num_cpus", "parking_lot 0.12.1", @@ -6724,7 +6782,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "winapi 0.3.9", + "windows-sys 0.42.0", ] [[package]] @@ -6920,7 +6978,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "static_assertions", ] @@ -6928,7 +6986,7 @@ dependencies = [ name = "txn_types" version = "0.1.0" dependencies = [ - "bitflags", + "bitflags 1.3.2", "byteorder", "codec", "collections", @@ -7282,7 +7340,16 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] @@ -7300,6 +7367,21 @@ dependencies = [ "windows_x86_64_msvc 0.48.5", ] +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" @@ -7312,6 +7394,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.32.0" @@ -7330,6 +7418,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.32.0" @@ -7348,6 +7442,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.32.0" @@ -7366,6 +7466,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.32.0" @@ -7384,6 +7490,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" @@ -7396,6 +7508,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.32.0" @@ -7414,6 +7532,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "winreg" version = "0.7.0" @@ -7440,7 +7564,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "637be4bacc6c06570eb05a3ba513f81d63e52862ced82db542215dd48dbab1e5" dependencies = [ "bit_field", - "bitflags", + "bitflags 1.3.2", "csv", "phf", "phf_codegen", diff --git a/Cargo.toml b/Cargo.toml index 77cba294c60..249ee380b37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,7 @@ batch-system = { workspace = true } byteorder = "1.2" case_macros = { workspace = true } causal_ts = { workspace = true } -chrono = "0.4" +chrono = { workspace = true } codec = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } @@ -158,7 +158,7 @@ tidb_query_expr = { workspace = true } tikv_alloc = { workspace = true } tikv_kv = { workspace = true } tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } tipb = { workspace = true } tokio = { version = "1.17", features = ["full"] } tokio-openssl = "0.6" @@ -315,6 +315,9 @@ batch-system = { path = "components/batch-system" } case_macros = { path = "components/case_macros" } causal_ts = { path = "components/causal_ts" } cdc = { path = "components/cdc", default-features = false } +# Do not enable default features that implicitly enables oldtime which is +# vulnerable to RUSTSEC-2020-0071, see more in deny.toml. +chrono = { version = "0.4", default-features = false } cloud = { path = "components/cloud" } codec = { path = "components/codec" } collections = { path = "components/collections" } @@ -376,6 +379,7 @@ tikv_alloc = { path = "components/tikv_alloc" } tikv_kv = { path = "components/tikv_kv", default-features = false } tikv_util = { path = "components/tikv_util" } tipb_helper = { path = "components/tipb_helper" } +time = { version = "0.1" } tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types" } # External libs diff --git a/clippy.toml b/clippy.toml index 08a5b6beb4c..c1a11598a5a 100644 --- a/clippy.toml +++ b/clippy.toml @@ -6,6 +6,11 @@ disallowed-methods = [ { path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, { path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, + + # See more about RUSTSEC-2020-0071 in deny.toml. + { path = "time::now", reason = "time::now is unsound, see RUSTSEC-2020-0071" }, + { path = "time::at", reason = "time::at is unsound, see RUSTSEC-2020-0071" }, + { path = "time::at_utc", reason = "time::at_utc is unsound, see RUSTSEC-2020-0071" }, ] avoid-breaking-exported-api = false upper-case-acronyms-aggressive = true diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 9504c3a4eae..056bc602224 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -44,7 +44,6 @@ nortcheck = ["engine_rocks/nortcheck"] api_version = { workspace = true } backup = { workspace = true } cdc = { workspace = true } -chrono = "0.4" clap = "2.32" collections = { workspace = true } concurrency_manager = { workspace = true } @@ -89,7 +88,7 @@ txn_types = { workspace = true } [build-dependencies] cc = "1.0" -time = "0.1" +time = { workspace = true } [target.'cfg(unix)'.dependencies] signal-hook = "0.3" diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 62211c1fcbc..1e306d9be34 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -52,4 +52,4 @@ tracing-subscriber = { version = "0.3.17", default-features = false, features = [build-dependencies] cc = "1.0" -time = "0.1" +time = { workspace = true } diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index d8174831792..3fb9d484a02 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -28,7 +28,7 @@ harness = true async-compression = { version = "0.3.14", features = ["tokio", "zstd"] } async-trait = { version = "0.1" } bytes = "1" -chrono = "0.4" +chrono = { workspace = true } concurrency_manager = { workspace = true } crossbeam = "0.8" crossbeam-channel = "0.5" @@ -85,7 +85,6 @@ grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8.0" -tempdir = "0.3" tempfile = "3.0" test_pd = { workspace = true } test_pd_client = { workspace = true } diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 95dad89588a..9a34fa75e94 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -1594,7 +1594,7 @@ mod tests { use futures::AsyncReadExt; use kvproto::brpb::{Local, Noop, StorageBackend, StreamBackupTaskInfo}; use online_config::{ConfigManager, OnlineConfig}; - use tempdir::TempDir; + use tempfile::TempDir; use tikv_util::{ codec::number::NumberEncoder, config::ReadableDuration, @@ -2426,7 +2426,7 @@ mod tests { let file_name = format!("{}", uuid::Uuid::new_v4()); let file_path = Path::new(&file_name); - let tempfile = TempDir::new("test_est_len_in_flush").unwrap(); + let tempfile = TempDir::new().unwrap(); let cfg = make_tempfiles_cfg(tempfile.path()); let pool = Arc::new(TempFilePool::new(cfg).unwrap()); let mut f = pool.open_for_write(file_path).unwrap(); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index c6e9c031b0f..33e6ba044c3 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -978,9 +978,9 @@ mod test { #[test] fn test_recorder() { use engine_traits::{Iterable, KvEngine, Mutable, WriteBatch, WriteBatchExt, CF_DEFAULT}; - use tempdir::TempDir; + use tempfile::TempDir; - let p = TempDir::new("test_db").unwrap(); + let p = TempDir::new().unwrap(); let engine = engine_rocks::util::new_engine(p.path().to_str().unwrap(), &[CF_DEFAULT]).unwrap(); let mut wb = engine.write_batch(); @@ -1025,12 +1025,12 @@ mod test { #[tokio::test] async fn test_files_reader() { - use tempdir::TempDir; + use tempfile::TempDir; use tokio::{fs::File, io::AsyncReadExt}; use super::FilesReader; - let dir = TempDir::new("test_files").unwrap(); + let dir = TempDir::new().unwrap(); let files_num = 5; let mut files_path = Vec::new(); let mut expect_content = String::new(); @@ -1063,12 +1063,12 @@ mod test { #[tokio::test] async fn test_compression_writer() { use kvproto::brpb::CompressionType; - use tempdir::TempDir; + use tempfile::TempDir; use tokio::{fs::File, io::AsyncReadExt}; use super::compression_writer_dispatcher; - let dir = TempDir::new("test_files").unwrap(); + let dir = TempDir::new().unwrap(); let content = "test for compression writer. try to write to local path, and read it back."; // uncompressed writer diff --git a/components/backup-stream/tests/suite.rs b/components/backup-stream/tests/suite.rs index 434d81fff48..534faffb6d8 100644 --- a/components/backup-stream/tests/suite.rs +++ b/components/backup-stream/tests/suite.rs @@ -34,7 +34,7 @@ use kvproto::{ use pd_client::PdClient; use raftstore::{router::CdcRaftRouter, RegionInfoAccessor}; use resolved_ts::LeadershipResolver; -use tempdir::TempDir; +use tempfile::TempDir; use test_pd_client::TestPdClient; use test_raftstore::{new_server_cluster, Cluster, ServerCluster}; use test_util::retry; @@ -187,8 +187,8 @@ impl SuiteBuilder { env: Arc::new(grpcio::Environment::new(1)), cluster, - temp_files: TempDir::new("temp").unwrap(), - flushed_files: TempDir::new("flush").unwrap(), + temp_files: TempDir::new().unwrap(), + flushed_files: TempDir::new().unwrap(), case_name: case, }; for id in 1..=(n as u64) { diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index 1d275b788c2..774055ad526 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -52,7 +52,7 @@ slog_derive = "0.2" tempfile = "3.0" tikv_alloc = { workspace = true } tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } tracker = { workspace = true } txn_types = { workspace = true } diff --git a/components/online_config/Cargo.toml b/components/online_config/Cargo.toml index 47e8996391c..1ee16e9b639 100644 --- a/components/online_config/Cargo.toml +++ b/components/online_config/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" publish = false [dependencies] -chrono = "0.4" +chrono = { workspace = true } online_config_derive = { path = "./online_config_derive" } serde = { version = "1.0", features = ["derive"] } diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 0e640991eea..913a0d18ae1 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -24,7 +24,6 @@ serde_derive = "1.0" slog = { workspace = true } slog-global = { workspace = true } tikv_util = { workspace = true } -time = "0.1" tracker = { workspace = true } [dev-dependencies] diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 2bd7737ade4..c925a8c472e 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -57,7 +57,7 @@ smallvec = "1.4" sst_importer = { workspace = true } thiserror = "1.0" tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } tracker = { workspace = true } txn_types = { workspace = true } yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index cde5c961f3f..115b06b347b 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -29,7 +29,7 @@ bitflags = "1.0.1" byteorder = "1.2" bytes = "1.0" causal_ts = { workspace = true } -chrono = "0.4" +chrono = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } crc32fast = "1.2" @@ -84,7 +84,7 @@ thiserror = "1.0" tidb_query_datatype = { workspace = true } tikv_alloc = { workspace = true } tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } tokio = { version = "1.5", features = ["sync", "rt-multi-thread"] } tracker = { workspace = true } txn_types = { workspace = true } diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 9062a9f094e..8ca413f040e 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -38,7 +38,7 @@ backup = { workspace = true } backup-stream = { workspace = true } causal_ts = { workspace = true } cdc = { workspace = true } -chrono = "0.4" +chrono = { workspace = true } clap = "2.32" collections = { workspace = true } concurrency_manager = { workspace = true } diff --git a/components/snap_recovery/Cargo.toml b/components/snap_recovery/Cargo.toml index 23cbdcfe098..72049f5a318 100644 --- a/components/snap_recovery/Cargo.toml +++ b/components/snap_recovery/Cargo.toml @@ -13,7 +13,7 @@ test-engines-rocksdb = ["tikv/test-engines-rocksdb"] test-engines-panic = ["tikv/test-engines-panic"] [dependencies] -chrono = "0.4" +chrono = { workspace = true } encryption = { workspace = true } encryption_export = { workspace = true } engine_rocks = { workspace = true } diff --git a/components/test_util/Cargo.toml b/components/test_util/Cargo.toml index b4a23b5eda1..eb4b07030a0 100644 --- a/components/test_util/Cargo.toml +++ b/components/test_util/Cargo.toml @@ -12,6 +12,7 @@ cloud-azure = ["encryption_export/cloud-azure"] [dependencies] backtrace = "0.3" +chrono = { workspace = true } collections = { workspace = true } encryption_export = { workspace = true } fail = "0.5" @@ -24,4 +25,4 @@ slog = { workspace = true } slog-global = { workspace = true } tempfile = "3.0" tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } diff --git a/components/test_util/src/logging.rs b/components/test_util/src/logging.rs index 5c717c09b01..4212102df38 100644 --- a/components/test_util/src/logging.rs +++ b/components/test_util/src/logging.rs @@ -6,8 +6,10 @@ use std::{ io, io::prelude::*, sync::{Mutex, Once}, + time::SystemTime, }; +use chrono::{offset::Local, DateTime}; use slog::{self, Drain, OwnedKVList, Record}; struct Serializer<'a>(&'a mut dyn std::io::Write); @@ -48,8 +50,8 @@ impl CaseTraceLogger { } let tag = tikv_util::get_tag_from_thread_name().map_or_else(|| "".to_owned(), |s| s + " "); - let t = time::now(); - let time_str = time::strftime("%Y/%m/%d %H:%M:%S.%f", &t).unwrap(); + let date_time: DateTime = SystemTime::now().into(); + let time_str = format!("{}", date_time.format("%Y/%m/%d %H:%M:%S.%f")); write!( w, "{}{} {}:{}: [{}] {}", diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index 32dee06f46d..32d1a92df1b 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -20,7 +20,7 @@ prometheus-static-metric = "0.5" serde_json = "1.0" thiserror = "1.0" tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } yatp = { workspace = true } [dev-dependencies] diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index db25dacc74a..5f52580d454 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -12,7 +12,7 @@ bitfield = "0.13.2" bitflags = "1.0.1" boolinator = "2.4.0" bstr = "0.2.8" -chrono = "0.4" +chrono = { workspace = true } chrono-tz = "0.5.1" codec = { workspace = true } collections = { workspace = true } diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 60bbde91c31..298944df4c0 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -29,14 +29,14 @@ tidb_query_codegen = { workspace = true } tidb_query_common = { workspace = true } tidb_query_datatype = { workspace = true } tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } tipb = { workspace = true } twoway = "0.2.0" uuid = { version = "0.8.1", features = ["v4"] } [dev-dependencies] bstr = "0.2.8" -chrono = "0.4" +chrono = { workspace = true } panic_hook = { workspace = true } profiler = { workspace = true } tipb_helper = { workspace = true } diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 9250dd03cb0..0e731fcd154 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -13,7 +13,7 @@ async-speed-limit = "0.4.0" backtrace = "0.3.9" byteorder = "1.2" bytes = "1.0" -chrono = "0.4" +chrono = { workspace = true } codec = { workspace = true } collections = { workspace = true } cpu-time = "1.0.0" @@ -56,7 +56,7 @@ strum = { version = "0.20", features = ["derive"] } sysinfo = "0.26" thiserror = "1.0" tikv_alloc = { workspace = true } -time = "0.1" +time = { workspace = true } tokio = { version = "1.5", features = ["rt-multi-thread"] } tokio-executor = { workspace = true } tokio-timer = { workspace = true } diff --git a/deny.toml b/deny.toml index a37fbc3491f..1cc62067c7c 100644 --- a/deny.toml +++ b/deny.toml @@ -41,11 +41,21 @@ deny = [ multiple-versions = "allow" [advisories] -vulnerability = "allow" # FIXME: Deny it. +vulnerability = "deny" unmaintained = "allow" # FIXME: Deny it. unsound = "allow" # FIXME: Deny it. yanked = "allow" # FIXME: Deny it. notice = "warn" +ignore = [ + # Ignore time 0.1 RUSTSEC-2020-0071 as 1) we have taken measures (see + # clippy.toml) to mitigate the issue and 2) time 0.1 has no fix availble. + # + # Note: Upgrading to time 0.3 do fix the issue but it's an imcompatible + # versoin which removes some necessary APIs (`time::precise_time_ns`) that + # requiresd by TiKV. + # See https://github.com/time-rs/time/blob/8067540c/CHANGELOG.md#L703 + "RUSTSEC-2020-0071", +] [licenses] unlicensed = "allow" # FIXME: Deny it. diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 549c7baf293..c1f09fb3d45 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -106,7 +106,7 @@ tidb_query_executors = { workspace = true } tidb_query_expr = { workspace = true } tikv = { workspace = true } tikv_util = { workspace = true } -time = "0.1" +time = { workspace = true } tipb = { workspace = true } toml = "0.5" tracker = { workspace = true } From 00749cc2a481e28b7307cf1fcf64a8f4111ff8d1 Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Thu, 11 Jan 2024 22:22:55 -0800 Subject: [PATCH 1107/1149] In-memory engine: API changes for write-path. (#16322) ref tikv/tikv#16141 API and other changes to pass the sequence number from the disk engine to the cache engine's on write: - Make the callback`WriteBatch::write_callback_opt` take a u64 - Keep track of sequence number in `RegionCacheWriteBatch` - integrate `RegionCacheWriteBatch` with HybridEngine Signed-off-by: Alex Feinberg Co-authored-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> --- components/engine_rocks/src/write_batch.rs | 10 +- components/engine_traits/src/write_batch.rs | 4 +- components/hybrid_engine/Cargo.toml | 3 +- components/hybrid_engine/src/engine.rs | 3 +- components/hybrid_engine/src/misc.rs | 3 +- components/hybrid_engine/src/write_batch.rs | 81 ++++++++++-- .../raftstore-v2/src/operation/command/mod.rs | 2 +- .../region_cache_memory_engine/src/engine.rs | 90 +------------ .../region_cache_memory_engine/src/lib.rs | 2 + .../src/write_batch.rs | 120 ++++++++++++++++++ 10 files changed, 204 insertions(+), 114 deletions(-) create mode 100644 components/region_cache_memory_engine/src/write_batch.rs diff --git a/components/engine_rocks/src/write_batch.rs b/components/engine_rocks/src/write_batch.rs index 3659a7628d6..8c5aa1dd9b9 100644 --- a/components/engine_rocks/src/write_batch.rs +++ b/components/engine_rocks/src/write_batch.rs @@ -98,7 +98,7 @@ impl RocksWriteBatchVec { } #[inline] - fn write_impl(&mut self, opts: &WriteOptions, mut cb: impl FnMut()) -> Result { + fn write_impl(&mut self, opts: &WriteOptions, mut cb: impl FnMut(u64)) -> Result { let opt: RocksWriteOptions = opts.into(); let mut seq = 0; if self.support_write_batch_vec { @@ -106,14 +106,14 @@ impl RocksWriteBatchVec { self.get_db() .multi_batch_write_callback(self.as_inner(), &opt.into_raw(), |s| { seq = s; - cb(); + cb(s); }) .map_err(r2e)?; } else { self.get_db() .write_callback(&self.wbs[0], &opt.into_raw(), |s| { seq = s; - cb(); + cb(s); }) .map_err(r2e)?; } @@ -123,10 +123,10 @@ impl RocksWriteBatchVec { impl engine_traits::WriteBatch for RocksWriteBatchVec { fn write_opt(&mut self, opts: &WriteOptions) -> Result { - self.write_impl(opts, || {}) + self.write_impl(opts, |_| {}) } - fn write_callback_opt(&mut self, opts: &WriteOptions, cb: impl FnMut()) -> Result { + fn write_callback_opt(&mut self, opts: &WriteOptions, cb: impl FnMut(u64)) -> Result { self.write_impl(opts, cb) } diff --git a/components/engine_traits/src/write_batch.rs b/components/engine_traits/src/write_batch.rs index 8a92ac7c382..b1904c2335a 100644 --- a/components/engine_traits/src/write_batch.rs +++ b/components/engine_traits/src/write_batch.rs @@ -74,9 +74,9 @@ pub trait WriteBatch: Mutable { fn write_opt(&mut self, opts: &WriteOptions) -> Result; // TODO: it should be `FnOnce`. - fn write_callback_opt(&mut self, opts: &WriteOptions, mut cb: impl FnMut()) -> Result { + fn write_callback_opt(&mut self, opts: &WriteOptions, mut cb: impl FnMut(u64)) -> Result { let seq = self.write_opt(opts)?; - cb(); + cb(seq); Ok(seq) } diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index 0ae04b1dc3e..79fedc4bcbd 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -12,8 +12,7 @@ engine_traits = { workspace = true } txn_types = { workspace = true } tikv_util = { workspace = true } engine_rocks = { workspace = true } +region_cache_memory_engine = { workspace = true } [dev-dependencies] -engine_rocks = { workspace = true } -region_cache_memory_engine = { workspace = true } tempfile = "3.0" diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index b76b999f1c3..e0020f97b36 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -2,7 +2,7 @@ use engine_traits::{ KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapshotContext, SnapshotMiscExt, - SyncMutable, + SyncMutable, WriteBatchExt, }; use crate::snapshot::HybridEngineSnapshot; @@ -63,6 +63,7 @@ impl KvEngine for HybridEngine where EK: KvEngine, EC: RegionCacheEngine, + HybridEngine: WriteBatchExt, { type Snapshot = HybridEngineSnapshot; diff --git a/components/hybrid_engine/src/misc.rs b/components/hybrid_engine/src/misc.rs index d761322ae76..42339a83cca 100644 --- a/components/hybrid_engine/src/misc.rs +++ b/components/hybrid_engine/src/misc.rs @@ -1,6 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, MiscExt, RegionCacheEngine, Result}; +use engine_traits::{KvEngine, MiscExt, RegionCacheEngine, Result, WriteBatchExt}; use crate::{engine::HybridEngine, hybrid_metrics::HybridEngineStatisticsReporter}; @@ -8,6 +8,7 @@ impl MiscExt for HybridEngine where EK: KvEngine, EC: RegionCacheEngine, + HybridEngine: WriteBatchExt, { type StatisticsReporter = HybridEngineStatisticsReporter; diff --git a/components/hybrid_engine/src/write_batch.rs b/components/hybrid_engine/src/write_batch.rs index 3aba34c9c85..ec124a2e831 100644 --- a/components/hybrid_engine/src/write_batch.rs +++ b/components/hybrid_engine/src/write_batch.rs @@ -1,40 +1,52 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{ - KvEngine, Mutable, RegionCacheEngine, Result, WriteBatch, WriteBatchExt, WriteOptions, -}; +use engine_traits::{KvEngine, Mutable, Result, WriteBatch, WriteBatchExt, WriteOptions}; +use region_cache_memory_engine::{RegionCacheMemoryEngine, RegionCacheWriteBatch}; use crate::engine::HybridEngine; pub struct HybridEngineWriteBatch { - _disk_write_batch: EK::WriteBatch, - // todo: region_cache_engine write batch + disk_write_batch: EK::WriteBatch, + cache_write_batch: RegionCacheWriteBatch, } -impl WriteBatchExt for HybridEngine +impl WriteBatchExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, { type WriteBatch = HybridEngineWriteBatch; const WRITE_BATCH_MAX_KEYS: usize = EK::WRITE_BATCH_MAX_KEYS; fn write_batch(&self) -> Self::WriteBatch { - unimplemented!() + HybridEngineWriteBatch { + disk_write_batch: self.disk_engine().write_batch(), + cache_write_batch: self.region_cache_engine().write_batch(), + } } - fn write_batch_with_cap(&self, _: usize) -> Self::WriteBatch { - unimplemented!() + fn write_batch_with_cap(&self, cap: usize) -> Self::WriteBatch { + HybridEngineWriteBatch { + disk_write_batch: self.disk_engine().write_batch_with_cap(cap), + cache_write_batch: self.region_cache_engine().write_batch_with_cap(cap), + } } } impl WriteBatch for HybridEngineWriteBatch { - fn write_opt(&mut self, _: &WriteOptions) -> Result { - unimplemented!() + fn write_opt(&mut self, opts: &WriteOptions) -> Result { + self.write_callback_opt(opts, |_| ()) } - fn write_callback_opt(&mut self, _opts: &WriteOptions, _cb: impl FnMut()) -> Result { - unimplemented!() + fn write_callback_opt(&mut self, opts: &WriteOptions, mut cb: impl FnMut(u64)) -> Result { + self.disk_write_batch + .write_callback_opt(opts, |s| { + self.cache_write_batch.set_sequence_number(s).unwrap(); + self.cache_write_batch.write_opt(opts).unwrap(); + }) + .map(|s| { + cb(s); + s + }) } fn data_size(&self) -> usize { @@ -99,3 +111,44 @@ impl Mutable for HybridEngineWriteBatch { unimplemented!() } } + +#[cfg(test)] +mod tests { + use engine_rocks::util::new_engine; + use engine_traits::{WriteBatchExt, CF_DEFAULT, CF_LOCK, CF_WRITE}; + use region_cache_memory_engine::RegionCacheMemoryEngine; + use tempfile::Builder; + + use crate::HybridEngine; + + #[test] + fn test_region_cache_memory_engine() { + let path = Builder::new().prefix("temp").tempdir().unwrap(); + let disk_engine = new_engine( + path.path().to_str().unwrap(), + &[CF_DEFAULT, CF_LOCK, CF_WRITE], + ) + .unwrap(); + let memory_engine = RegionCacheMemoryEngine::default(); + memory_engine.new_region(1); + { + let mut core = memory_engine.core().lock().unwrap(); + core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_region_meta(1).unwrap().set_safe_ts(10); + } + + let hybrid_engine = + HybridEngine::<_, RegionCacheMemoryEngine>::new(disk_engine, memory_engine.clone()); + let mut write_batch = hybrid_engine.write_batch(); + write_batch + .cache_write_batch + .set_sequence_number(0) + .unwrap(); // First call ok. + assert!( + write_batch + .cache_write_batch + .set_sequence_number(0) + .is_err() + ); // Second call err. + } +} diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index af8dbe0afa0..01ba82dd17a 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -853,7 +853,7 @@ impl Apply { let mut write_opt = WriteOptions::default(); write_opt.set_disable_wal(true); let wb = self.write_batch.as_mut().unwrap(); - if let Err(e) = wb.write_callback_opt(&write_opt, || { + if let Err(e) = wb.write_callback_opt(&write_opt, |_| { flush_state.set_applied_index(index); }) { slog_panic!(self.logger, "failed to write data"; "error" => ?e); diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index f18693e7d60..6f9e6f6b75e 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -12,9 +12,8 @@ use bytes::Bytes; use collections::HashMap; use engine_rocks::{raw::SliceTransform, util::FixedSuffixSliceTransform}; use engine_traits::{ - CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Mutable, Peekable, ReadOptions, - RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, WriteBatch, WriteBatchExt, WriteOptions, - CF_DEFAULT, CF_LOCK, CF_WRITE, + CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Peekable, ReadOptions, + RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use skiplist_rs::{IterRef, Skiplist}; use tikv_util::config::ReadableSize; @@ -190,23 +189,6 @@ impl RegionCacheEngine for RegionCacheMemoryEngine { } } -// todo: fill fields needed -pub struct RegionCacheWriteBatch; - -impl WriteBatchExt for RegionCacheMemoryEngine { - type WriteBatch = RegionCacheWriteBatch; - // todo: adjust it - const WRITE_BATCH_MAX_KEYS: usize = 256; - - fn write_batch(&self) -> Self::WriteBatch { - RegionCacheWriteBatch {} - } - - fn write_batch_with_cap(&self, _: usize) -> Self::WriteBatch { - RegionCacheWriteBatch {} - } -} - #[derive(PartialEq)] enum Direction { Uninit, @@ -492,74 +474,6 @@ impl Iterator for RegionCacheIterator { } } -impl WriteBatch for RegionCacheWriteBatch { - fn write_opt(&mut self, _: &WriteOptions) -> Result { - unimplemented!() - } - - fn data_size(&self) -> usize { - unimplemented!() - } - - fn count(&self) -> usize { - unimplemented!() - } - - fn is_empty(&self) -> bool { - unimplemented!() - } - - fn should_write_to_engine(&self) -> bool { - unimplemented!() - } - - fn clear(&mut self) { - unimplemented!() - } - - fn set_save_point(&mut self) { - unimplemented!() - } - - fn pop_save_point(&mut self) -> Result<()> { - unimplemented!() - } - - fn rollback_to_save_point(&mut self) -> Result<()> { - unimplemented!() - } - - fn merge(&mut self, _: Self) -> Result<()> { - unimplemented!() - } -} - -impl Mutable for RegionCacheWriteBatch { - fn put(&mut self, _: &[u8], _: &[u8]) -> Result<()> { - unimplemented!() - } - - fn put_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { - unimplemented!() - } - - fn delete(&mut self, _: &[u8]) -> Result<()> { - unimplemented!() - } - - fn delete_cf(&mut self, _: &str, _: &[u8]) -> Result<()> { - unimplemented!() - } - - fn delete_range(&mut self, _: &[u8], _: &[u8]) -> Result<()> { - unimplemented!() - } - - fn delete_range_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { - unimplemented!() - } -} - #[derive(Clone, Debug)] pub struct RegionCacheSnapshot { region_id: u64, diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs index fc2136d3dab..016f4f2be53 100644 --- a/components/region_cache_memory_engine/src/lib.rs +++ b/components/region_cache_memory_engine/src/lib.rs @@ -7,4 +7,6 @@ mod engine; pub mod keys; +mod write_batch; pub use engine::RegionCacheMemoryEngine; +pub use write_batch::RegionCacheWriteBatch; diff --git a/components/region_cache_memory_engine/src/write_batch.rs b/components/region_cache_memory_engine/src/write_batch.rs new file mode 100644 index 00000000000..674b3434525 --- /dev/null +++ b/components/region_cache_memory_engine/src/write_batch.rs @@ -0,0 +1,120 @@ +use bytes::Bytes; +use engine_traits::{Mutable, Result, WriteBatch, WriteBatchExt, WriteOptions}; +use tikv_util::box_err; + +use crate::RegionCacheMemoryEngine; + +/// RegionCacheWriteBatch maintains its own in-memory buffer. +#[derive(Default, Clone, Debug)] +pub struct RegionCacheWriteBatch { + buffer: Vec, + sequence_number: Option, +} + +impl RegionCacheWriteBatch { + pub fn with_capacity(cap: usize) -> Self { + Self { + buffer: Vec::with_capacity(cap), + sequence_number: None, + } + } + + /// Sets the sequence number for this batch. This should only be called + /// prior to writing the batch. + pub fn set_sequence_number(&mut self, seq: u64) -> Result<()> { + if let Some(seqno) = self.sequence_number { + return Err(box_err!("Sequence number {} already set", seqno)); + }; + self.sequence_number = Some(seq); + Ok(()) + } +} + +#[derive(Clone, Debug)] +struct RegionCacheWriteBatchEntry { + cf: String, + key: Bytes, + mutation: (), // TODO, +} + +impl WriteBatchExt for RegionCacheMemoryEngine { + type WriteBatch = RegionCacheWriteBatch; + // todo: adjust it + const WRITE_BATCH_MAX_KEYS: usize = 256; + + fn write_batch(&self) -> Self::WriteBatch { + RegionCacheWriteBatch::default() + } + + fn write_batch_with_cap(&self, cap: usize) -> Self::WriteBatch { + RegionCacheWriteBatch::with_capacity(cap) + } +} + +impl WriteBatch for RegionCacheWriteBatch { + fn write_opt(&mut self, _: &WriteOptions) -> Result { + unimplemented!() + } + + fn data_size(&self) -> usize { + unimplemented!() + } + + fn count(&self) -> usize { + unimplemented!() + } + + fn is_empty(&self) -> bool { + unimplemented!() + } + + fn should_write_to_engine(&self) -> bool { + unimplemented!() + } + + fn clear(&mut self) { + unimplemented!() + } + + fn set_save_point(&mut self) { + unimplemented!() + } + + fn pop_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn rollback_to_save_point(&mut self) -> Result<()> { + unimplemented!() + } + + fn merge(&mut self, _: Self) -> Result<()> { + unimplemented!() + } +} + +impl Mutable for RegionCacheWriteBatch { + fn put(&mut self, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn put_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete(&mut self, _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_cf(&mut self, _: &str, _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range(&mut self, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } + + fn delete_range_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + unimplemented!() + } +} From dafd1476c628288011525c310e12feac486130a0 Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Fri, 12 Jan 2024 18:25:56 -0800 Subject: [PATCH 1108/1149] In-memory Engine: conditional compilation (#16357) ref tikv/tikv#16141 Do not start in-memory engine unless the `memory-engine` feature is enabled. Signed-off-by: Alex Feinberg --- cmd/tikv-server/Cargo.toml | 1 + components/server/Cargo.toml | 1 + components/server/src/server.rs | 16 ++++++++++------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index 1e306d9be34..be02fe4915b 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -15,6 +15,7 @@ mimalloc = ["server/mimalloc"] portable = ["server/portable"] sse = ["server/sse"] mem-profiling = ["server/mem-profiling"] +memory-engine = ["server/memory-engine"] failpoints = ["server/failpoints"] cloud-aws = ["server/cloud-aws"] cloud-gcp = ["server/cloud-gcp"] diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index 8ca413f040e..cf2d653bf52 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -12,6 +12,7 @@ mimalloc = ["tikv/mimalloc"] snmalloc = ["tikv/snmalloc"] portable = ["tikv/portable"] sse = ["tikv/sse"] +memory-engine = [] mem-profiling = ["tikv/mem-profiling"] failpoints = ["tikv/failpoints"] cloud-aws = ["encryption_export/cloud-aws"] diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 24e8b55c03a..bc0769b751a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -221,28 +221,32 @@ pub fn run_tikv( dispatch_api_version!(config.storage.api_version(), { if !config.raft_engine.enable { - if config.region_cache_memory_limit == ReadableSize(0) { - run_impl::( + if cfg!(feature = "memory-engine") + && config.region_cache_memory_limit != ReadableSize(0) + { + run_impl::, RocksEngine, API>( config, service_event_tx, service_event_rx, ) } else { - run_impl::, RocksEngine, API>( + run_impl::( config, service_event_tx, service_event_rx, ) } } else { - if config.region_cache_memory_limit == ReadableSize(0) { - run_impl::( + if cfg!(feature = "memory-engine") + && config.region_cache_memory_limit != ReadableSize(0) + { + run_impl::, RaftLogEngine, API>( config, service_event_tx, service_event_rx, ) } else { - run_impl::, RaftLogEngine, API>( + run_impl::( config, service_event_tx, service_event_rx, From 2a553aa6ec1e8588b9c129cff76c512da510ccaf Mon Sep 17 00:00:00 2001 From: ShuNing Date: Mon, 15 Jan 2024 11:19:18 +0800 Subject: [PATCH 1109/1149] Revert "raftstore: fix load base split cannot works in pure follower scenario (#16376) ref tikv/tikv#16314 Signed-off-by: nolouch --- components/raftstore/src/store/fsm/peer.rs | 4 - components/raftstore/src/store/metrics.rs | 1 - components/raftstore/src/store/peer.rs | 7 +- components/raftstore/src/store/worker/pd.rs | 22 +---- .../src/store/worker/split_controller.rs | 3 +- .../raftstore/test_split_region.rs | 86 ------------------- 6 files changed, 5 insertions(+), 118 deletions(-) diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index a871788fe89..07ac28297b1 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -5357,14 +5357,10 @@ where let allow_replica_read = read_only && msg.get_header().get_replica_read(); let flags = WriteBatchFlags::from_bits_check(msg.get_header().get_flags()); let allow_stale_read = read_only && flags.contains(WriteBatchFlags::STALE_READ); - let split_region = msg.has_admin_request() - && msg.get_admin_request().get_cmd_type() == AdminCmdType::BatchSplit; if !self.fsm.peer.is_leader() && !is_read_index_request && !allow_replica_read && !allow_stale_read - // allow proposal split command at non-leader, raft layer will forward it to leader. - && !split_region { self.ctx.raft_metrics.invalid_proposal.not_leader.inc(); let leader = self.fsm.peer.get_peer_from_cache(leader_id); diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index ccc28306059..8595ed0bcf6 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -193,7 +193,6 @@ make_static_metric! { conf_change, batch, dropped_read_index, - non_leader_split, } pub label_enum RaftInvalidProposal { diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index b9a3a491563..fa5c8346c0c 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4343,12 +4343,7 @@ where } match req.get_admin_request().get_cmd_type() { - AdminCmdType::Split | AdminCmdType::BatchSplit => { - ctx.insert(ProposalContext::SPLIT); - if !self.is_leader() { - poll_ctx.raft_metrics.propose.non_leader_split.inc(); - } - } + AdminCmdType::Split | AdminCmdType::BatchSplit => ctx.insert(ProposalContext::SPLIT), AdminCmdType::PrepareMerge => { self.pre_propose_prepare_merge(poll_ctx, req)?; ctx.insert(ProposalContext::PREPARE_MERGE); diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 3ec4c65c4c5..152dc7b3ef6 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -469,14 +469,6 @@ where const DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL: Duration = Duration::from_secs(1); const DEFAULT_COLLECT_TICK_INTERVAL: Duration = Duration::from_secs(1); -fn default_load_base_split_check_interval() -> Duration { - fail_point!("mock_load_base_split_check_interval", |t| { - let t = t.unwrap().parse::().unwrap(); - Duration::from_millis(t) - }); - DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL -} - fn default_collect_tick_interval() -> Duration { fail_point!("mock_collect_tick_interval", |_| { Duration::from_millis(1) @@ -602,7 +594,7 @@ where cpu_stats_sender: None, collect_store_infos_interval: interval, load_base_split_check_interval: cmp::min( - default_load_base_split_check_interval(), + DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL, interval, ), // Use `inspect_latency_interval` as the minimal limitation for collecting tick. @@ -2132,19 +2124,11 @@ where let f = async move { for split_info in split_infos { - let Ok(Some((region, leader))) = pd_client - .get_region_leader_by_id(split_info.region_id) - .await + let Ok(Some(region)) = + pd_client.get_region_by_id(split_info.region_id).await else { continue; }; - if leader.get_id() != split_info.peer.get_id() { - info!("load base split region on non-leader"; - "region_id" => region.get_id(), - "peer_id" => split_info.peer.get_id(), - "leader_id" => leader.get_id(), - ); - } // Try to split the region with the given split key. if let Some(split_key) = split_info.split_key { Self::handle_ask_batch_split( diff --git a/components/raftstore/src/store/worker/split_controller.rs b/components/raftstore/src/store/worker/split_controller.rs index eb281db4f4e..b3d97413ab3 100644 --- a/components/raftstore/src/store/worker/split_controller.rs +++ b/components/raftstore/src/store/worker/split_controller.rs @@ -285,7 +285,7 @@ impl Recorder { } fn update_peer(&mut self, peer: &Peer) { - if self.peer != *peer && peer.get_id() != 0 { + if self.peer != *peer { self.peer = peer.clone(); } } @@ -844,7 +844,6 @@ impl AutoSplitController { "qps" => qps, "byte" => byte, "cpu_usage" => cpu_usage, - "peer" => ?recorder.peer, ); self.recorders.remove(®ion_id); } else if is_unified_read_pool_busy && is_region_busy { diff --git a/tests/integrations/raftstore/test_split_region.rs b/tests/integrations/raftstore/test_split_region.rs index 831ce113a64..b6874f10df2 100644 --- a/tests/integrations/raftstore/test_split_region.rs +++ b/tests/integrations/raftstore/test_split_region.rs @@ -9,14 +9,11 @@ use std::{ use engine_rocks::RocksEngine; use engine_traits::{Peekable, CF_DEFAULT, CF_WRITE}; -use grpcio::{ChannelBuilder, Environment}; use keys::data_key; use kvproto::{ - kvrpcpb::{Context, Op}, metapb, pdpb, raft_cmdpb::*, raft_serverpb::{ExtraMessageType, RaftMessage}, - tikvpb_grpc::TikvClient, }; use pd_client::PdClient; use raft::eraftpb::MessageType; @@ -241,89 +238,6 @@ fn test_auto_split_region() { assert!(resp.get_header().get_error().has_key_not_in_region()); } -#[test_case(test_raftstore::new_server_cluster)] -fn test_load_base_auto_split_with_follower_read() { - fail::cfg("mock_tick_interval", "return(0)").unwrap(); - fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); - fail::cfg("mock_load_base_split_check_interval", "return(100)").unwrap(); - fail::cfg("mock_region_is_busy", "return(0)").unwrap(); - fail::cfg("mock_unified_read_pool_is_busy", "return(0)").unwrap(); - let count = 2; - let mut cluster = new_cluster(0, count); - cluster.cfg.split.qps_threshold = Some(10); - cluster.cfg.split.byte_threshold = Some(1); - cluster.cfg.split.sample_threshold = 10; - cluster.cfg.split.detect_times = 2; - cluster.cfg.split.split_balance_score = 0.5; - cluster.run(); - let pd_client = Arc::clone(&cluster.pd_client); - let target = pd_client.get_region(b"").unwrap(); - let leader = cluster.leader_of_region(target.get_id()).unwrap(); - let follower = target - .get_peers() - .iter() - .find(|p| p.get_id() != leader.get_id()) - .unwrap() - .clone(); - - let env: Arc = Arc::new(Environment::new(1)); - let new_client = |peer: metapb::Peer| { - let cli = TikvClient::new( - ChannelBuilder::new(env.clone()) - .connect(&cluster.sim.rl().get_addr(peer.get_store_id())), - ); - let epoch = cluster.get_region_epoch(target.get_id()); - let mut ctx = Context::default(); - ctx.set_region_id(target.get_id()); - ctx.set_peer(peer); - ctx.set_region_epoch(epoch); - PeerClient { cli, ctx } - }; - let mut region1 = pd_client.get_region(b"k1").unwrap(); - let mut region2 = pd_client.get_region(b"k3").unwrap(); - assert_eq!(region1.get_id(), region2.get_id()); - - let leader_client = new_client(leader); - let commit_ts1 = leader_client.must_kv_write( - &pd_client, - vec![new_mutation(Op::Put, &b"k1"[..], &b"v1"[..])], - b"k1".to_vec(), - ); - let commit_ts2 = leader_client.must_kv_write( - &pd_client, - vec![new_mutation(Op::Put, &b"k2"[..], &b"v2"[..])], - b"k2".to_vec(), - ); - let commit_ts3 = leader_client.must_kv_write( - &pd_client, - vec![new_mutation(Op::Put, &b"k3"[..], &b"v3"[..])], - b"k3".to_vec(), - ); - let mut follower_client = new_client(follower); - follower_client.ctx.set_replica_read(true); - for i in 0..100 { - follower_client.kv_read(b"k1".to_vec(), commit_ts1 + i); - follower_client.kv_read(b"k2".to_vec(), commit_ts2 + i); - follower_client.kv_read(b"k3".to_vec(), commit_ts3 + i); - } - thread::sleep(Duration::from_millis(100)); - follower_client.kv_read(b"k3".to_vec(), commit_ts3); - for _ in 1..250 { - region1 = pd_client.get_region(b"k0").unwrap(); - region2 = pd_client.get_region(b"k4").unwrap(); - if region1.get_id() != region2.get_id() { - break; - } - thread::sleep(Duration::from_millis(20)) - } - assert_ne!(region1.get_id(), region2.get_id()); - fail::remove("mock_tick_interval"); - fail::remove("mock_region_is_busy"); - fail::remove("mock_collect_tick_interval"); - fail::remove("mock_unified_read_pool_is_busy"); - fail::remove("mock_load_base_split_check_interval"); -} - // A filter that disable commitment by heartbeat. #[derive(Clone)] struct EraseHeartbeatCommit; From 67c7fa1d7d34b521e8b54f2084836b88c41d85b6 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Mon, 15 Jan 2024 11:34:18 +0800 Subject: [PATCH 1110/1149] cop: fix the scan panic when checksum is enabled (#16373) close tikv/tikv#16371 Fix the scan panic issue when checksum is enabled. Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/test_coprocessor/src/store.rs | 20 +++++++++++++++++++ .../src/codec/row/v2/row_slice.rs | 12 +++++++++-- tests/integrations/coprocessor/test_select.rs | 9 +++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 6763ea7bb1a..221ed5afe46 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -299,6 +299,26 @@ impl Store { .collect(); FixtureStore::new(data) } + + pub fn insert_all_null_row( + &mut self, + tbl: &Table, + ctx: Context, + with_checksum: bool, + extra_checksum: Option, + ) { + self.begin(); + let inserts = self + .insert_into(tbl) + .set(&tbl["id"], Datum::Null) + .set(&tbl["name"], Datum::Null) + .set(&tbl["count"], Datum::Null) + .set_v2(&tbl["id"], ScalarValue::Int(None)) + .set_v2(&tbl["name"], ScalarValue::Bytes(None)) + .set_v2(&tbl["count"], ScalarValue::Int(None)); + inserts.execute_with_v2_checksum(ctx.clone(), with_checksum, extra_checksum); + self.commit(); + } } /// A trait for a general implementation to convert to a Txn store. diff --git a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs index aa5eb3fc56f..e86ebe28802 100644 --- a/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs +++ b/components/tidb_query_datatype/src/codec/row/v2/row_slice.rs @@ -233,7 +233,11 @@ impl RowSlice<'_> { RowSlice::Big { offsets, values, .. } => { - let last_slice_idx = offsets.get(non_null_col_num - 1).unwrap() as usize; + let last_slice_idx = if non_null_col_num == 0 { + 0 + } else { + offsets.get(non_null_col_num - 1).unwrap() as usize + }; let slice = values.slice; *values = LeBytes::new(&slice[..last_slice_idx]); &slice[last_slice_idx..] @@ -241,7 +245,11 @@ impl RowSlice<'_> { RowSlice::Small { offsets, values, .. } => { - let last_slice_idx = offsets.get(non_null_col_num - 1).unwrap() as usize; + let last_slice_idx = if non_null_col_num == 0 { + 0 + } else { + offsets.get(non_null_col_num - 1).unwrap() as usize + }; let slice = values.slice; *values = LeBytes::new(&slice[..last_slice_idx]); &slice[last_slice_idx..] diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 1a062924dae..4e5418cdc14 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -2087,11 +2087,16 @@ fn test_select_v2_format_with_checksum() { for extra_checksum in [None, Some(132423)] { // The row value encoded with checksum bytes should have no impact on cop task // processing and related result chunk filling. - let (_, endpoint) = + let (mut store, endpoint) = init_data_with_commit_v2_checksum(&product, &data, true, extra_checksum); + store.insert_all_null_row(&product, Context::default(), true, extra_checksum); let req = DagSelect::from(&product).build(); let mut resp = handle_select(&endpoint, req); - let spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); + let mut spliter = DagChunkSpliter::new(resp.take_chunks().into(), 3); + let first_row = spliter.next().unwrap(); + assert_eq!(first_row[0], Datum::I64(0)); + assert_eq!(first_row[1], Datum::Null); + assert_eq!(first_row[2], Datum::Null); for (row, (id, name, cnt)) in spliter.zip(data.clone()) { let name_datum = name.map(|s| s.as_bytes()).into(); let expected_encoded = datum::encode_value( From 6bf8b5e2ac8d2f5e8e74c183aafcdeeee6bf9e54 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Mon, 15 Jan 2024 14:30:15 +0800 Subject: [PATCH 1111/1149] *: deny yanked, unsound and unknown git source crates (#16372) ref tikv/tikv#16328 Deny yanked crates * Upgrade yanked crossbeam-channel crossbeam-channel v0.5.6 has yanked, it may cause segfault. See https://github.com/crossbeam-rs/crossbeam/issues/971 * Upgrade yanked ahash ahash v0.7.4 and v0.8.3 have been yanked. Deny unsound crates * Fix RUSTSEC-2023-0072 by baning unsound `X509StoreRef::objects`. * Mitigate RUSTSEC-2021-0145 by disabling clap default features. * Fix RUSTSEC-2022-0078 by upgrading bumpalo. Deny crates from unknown git source * Replace xiongjiwei/encoding_rs with tikv/encoding_rs * Replace busyjay/rust-snappy with tikv/rust-snappy * Replace tabokie/fs2-rs with tikv/fs2-rs Signed-off-by: Neil Shen Co-authored-by: glorv --- Cargo.lock | 99 ++++++++++--------- Cargo.toml | 13 ++- clippy.toml | 61 +++++++++--- cmd/tikv-ctl/Cargo.toml | 6 +- cmd/tikv-server/Cargo.toml | 4 +- components/file_system/Cargo.toml | 1 - .../file_system/src/io_stats/biosnoop.rs | 43 ++++---- components/file_system/src/io_stats/mod.rs | 8 ++ components/file_system/src/io_stats/proc.rs | 23 +++-- components/raft_log_engine/Cargo.toml | 2 +- components/server/Cargo.toml | 2 +- components/tidb_query_datatype/Cargo.toml | 2 +- deny.toml | 23 ++++- 13 files changed, 182 insertions(+), 105 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 320fc8957ba..db9f25c9fb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,9 +43,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.7.4" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" dependencies = [ "getrandom 0.2.11", "once_cell", @@ -54,13 +54,14 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if 1.0.0", "once_cell", "version_check 0.9.4", + "zerocopy", ] [[package]] @@ -635,7 +636,7 @@ dependencies = [ "regex", "rustc-hash", "shlex 1.1.0", - "syn 2.0.18", + "syn 2.0.43", ] [[package]] @@ -691,9 +692,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.2.1" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytemuck" @@ -1213,11 +1214,10 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" dependencies = [ - "cfg-if 1.0.0", "crossbeam-utils", ] @@ -1270,13 +1270,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.8" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" -dependencies = [ - "cfg-if 1.0.0", - "lazy_static", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crypto" @@ -1447,7 +1443,7 @@ dependencies = [ [[package]] name = "encoding_rs" version = "0.8.29" -source = "git+https://github.com/xiongjiwei/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c#68e0bc5a72a37a78228d80cd98047326559cf43c" +source = "git+https://github.com/tikv/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c#68e0bc5a72a37a78228d80cd98047326559cf43c" dependencies = [ "cfg-if 1.0.0", ] @@ -1835,7 +1831,6 @@ dependencies = [ "fs2", "lazy_static", "libc 0.2.151", - "maligned", "online_config", "openssl", "parking_lot 0.12.1", @@ -1954,7 +1949,7 @@ dependencies = [ [[package]] name = "fs2" version = "0.4.3" -source = "git+https://github.com/tabokie/fs2-rs?branch=tikv#cd503764a19a99d74c1ab424dd13d6bcd093fcae" +source = "git+https://github.com/tikv/fs2-rs?branch=tikv#cd503764a19a99d74c1ab424dd13d6bcd093fcae" dependencies = [ "libc 0.2.151", "winapi 0.3.9", @@ -2382,7 +2377,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ - "ahash 0.8.3", + "ahash 0.8.7", "allocator-api2", ] @@ -2639,7 +2634,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16d4bde3a7105e59c66a4104cfe9606453af1c7a0eac78cb7d5bc263eb762a70" dependencies = [ - "ahash 0.7.4", + "ahash 0.7.7", "atty", "indexmap 1.6.2", "itoa 1.0.1", @@ -2876,7 +2871,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" +source = "git+https://github.com/tikv/rust-rocksdb.git#c247909c279e89b3eabb4f200d580ee50f27fda6" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2895,7 +2890,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" +source = "git+https://github.com/tikv/rust-rocksdb.git#c247909c279e89b3eabb4f200d580ee50f27fda6" dependencies = [ "bzip2-sys", "cc", @@ -2982,12 +2977,6 @@ dependencies = [ "libc 0.2.151", ] -[[package]] -name = "maligned" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e88c3cbe8288f77f293e48a28b3232e3defd203a6d839fa7f68ea4329e83464" - [[package]] name = "match-template" version = "0.0.1" @@ -3372,7 +3361,7 @@ checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.43", ] [[package]] @@ -3475,9 +3464,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.16.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "online_config" @@ -3908,7 +3897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1" dependencies = [ "proc-macro2", - "syn 2.0.18", + "syn 2.0.43", ] [[package]] @@ -3949,9 +3938,9 @@ checksum = "369a6ed065f249a159e06c45752c780bda2fb53c995718f9e484d08daa9eb42e" [[package]] name = "proc-macro2" -version = "1.0.60" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] @@ -4714,7 +4703,7 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f06953bb8b9e4307cb7ccc0d9d018e2ddd25a30d32831f631ce4fe8f17671f7" dependencies = [ - "ahash 0.7.4", + "ahash 0.7.7", "bitflags 1.3.2", "instant", "num-traits", @@ -4737,7 +4726,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#73ba736143699fa623486c335527dd2a284bd0df" +source = "git+https://github.com/tikv/rust-rocksdb.git#c247909c279e89b3eabb4f200d580ee50f27fda6" dependencies = [ "libc 0.2.151", "librocksdb_sys", @@ -5356,7 +5345,7 @@ dependencies = [ [[package]] name = "slog-global" version = "0.1.0" -source = "git+https://github.com/breeswish/slog-global.git?rev=d592f88e4dbba5eb439998463054f1a44fbf17b9#d592f88e4dbba5eb439998463054f1a44fbf17b9" +source = "git+https://github.com/tikv/slog-global.git?rev=d592f88e4dbba5eb439998463054f1a44fbf17b9#d592f88e4dbba5eb439998463054f1a44fbf17b9" dependencies = [ "arc-swap", "lazy_static", @@ -5455,7 +5444,7 @@ dependencies = [ [[package]] name = "snappy-sys" version = "0.1.0" -source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" +source = "git+https://github.com/tikv/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", "libc 0.2.151", @@ -5632,7 +5621,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.18", + "syn 2.0.43", ] [[package]] @@ -5670,9 +5659,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.18" +version = "2.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" dependencies = [ "proc-macro2", "quote", @@ -6242,7 +6231,7 @@ dependencies = [ "codec", "collections", "crc32fast", - "encoding_rs 0.8.29 (git+https://github.com/xiongjiwei/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c)", + "encoding_rs 0.8.29 (git+https://github.com/tikv/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c)", "error_code", "hex 0.4.2", "kvproto", @@ -6918,7 +6907,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.43", ] [[package]] @@ -6978,7 +6967,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "static_assertions", ] @@ -7601,6 +7590,26 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.43", +] + [[package]] name = "zeroize" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 249ee380b37..757f154b165 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -202,10 +202,10 @@ rusoto_sts = { git = "https://github.com/tikv/rusoto", branch = "gh1482-s3-addr- # NOTICE: use openssl for signature to support fips 140 tame-oauth = { git = "https://github.com/tikv/tame-oauth", branch = "fips-0.9" } -snappy-sys = { git = "https://github.com/busyjay/rust-snappy.git", branch = "static-link" } +snappy-sys = { git = "https://github.com/tikv/rust-snappy.git", branch = "static-link" } # remove this when https://github.com/danburkert/fs2-rs/pull/42 is merged. -fs2 = { git = "https://github.com/tabokie/fs2-rs", branch = "tikv" } +fs2 = { git = "https://github.com/tikv/fs2-rs", branch = "tikv" } # Remove this when a new version is release. We need to solve rust-lang/cmake-rs#143. cmake = { git = "https://github.com/rust-lang/cmake-rs" } @@ -315,9 +315,12 @@ batch-system = { path = "components/batch-system" } case_macros = { path = "components/case_macros" } causal_ts = { path = "components/causal_ts" } cdc = { path = "components/cdc", default-features = false } -# Do not enable default features that implicitly enables oldtime which is +# Do not enable default-features because it implicitly enables oldtime which is # vulnerable to RUSTSEC-2020-0071, see more in deny.toml. chrono = { version = "0.4", default-features = false } +# Do not enable default-features because it implicitly enables the unsound +# "atty" crate, see more about RUSTSEC-2021-0145 in deny.toml. +clap = { version = "2.32", default-features = false, features = ["suggestions", "vec_map"] } cloud = { path = "components/cloud" } codec = { path = "components/codec" } collections = { path = "components/collections" } @@ -384,6 +387,8 @@ tracker = { path = "components/tracker" } txn_types = { path = "components/txn_types" } # External libs raft = { version = "0.7.0", default-features = false, features = ["protobuf-codec"] } +raft-engine = { git = "https://github.com/tikv/raft-engine.git", features = ["swap"] } +raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } grpcio = { version = "0.10.4", default-features = false, features = ["openssl", "protobuf-codec", "nightly"] } grpcio-health = { version = "0.10.4", default-features = false, features = ["protobuf-codec"] } tipb = { git = "https://github.com/pingcap/tipb.git" } @@ -392,7 +397,7 @@ yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } tokio-timer = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } tokio-executor = { git = "https://github.com/tikv/tokio", branch = "tokio-timer-hotfix" } slog = { version = "2.3", features = ["max_level_trace", "release_max_level_debug"] } -slog-global = { version = "0.1", git = "https://github.com/breeswish/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } +slog-global = { version = "0.1", git = "https://github.com/tikv/slog-global.git", rev = "d592f88e4dbba5eb439998463054f1a44fbf17b9" } tracing-active-tree = { git = "https://github.com/tikv/tracing-active-tree.git", features = ["coarsetime"], rev = "a71f8f8148f88ab759deb6d3e1d62d07ab218347" } # This `tracing` is only used for `tracing-active-tree`, enable its attributes only. tracing = { version = "0.1.39", default-features = false, features = [ "attributes", "std" ] } diff --git a/clippy.toml b/clippy.toml index c1a11598a5a..15e0f1f549c 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,16 +1,55 @@ -disallowed-methods = [ - { path = "std::thread::Builder::spawn", reason = "Wrapper function `::spawn_wrapper` should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details." }, +[[disallowed-methods]] +path = "std::thread::Builder::spawn" +reason = """ +Wrapper function `::spawn_wrapper` +should be used instead, refer to https://github.com/tikv/tikv/pull/12442 for more details. +""" +[[disallowed-methods]] +path = "tokio::runtime::builder::Builder::on_thread_start" +reason = """ +Adding hooks directly will omit system hooks, please use +::with_sys_and_custom_hooks +refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details. +""" +[[disallowed-methods]] +path = "tokio::runtime::builder::Builder::on_thread_stop" +reason = """ +Adding hooks directly will omit system hooks, please use +::with_sys_and_custom_hooks +refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details. +""" +[[disallowed-methods]] +path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start" +reason = """ +Adding hooks directly will omit system hooks, please use +::with_sys_and_custom_hooks +refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details. +""" +[[disallowed-methods]] +path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop" +reason = """ +Adding hooks directly will omit system hooks, please use +::with_sys_and_custom_hooks +refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details. +""" - { path = "tokio::runtime::builder::Builder::on_thread_start", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, - { path = "tokio::runtime::builder::Builder::on_thread_stop", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, +# See more about RUSTSEC-2020-0071 in deny.toml. +[[disallowed-methods]] +path = "time::now" +reason = "time::now is unsound, see RUSTSEC-2020-0071" +[[disallowed-methods]] +path = "time::at" +reason = "time::at is unsound, see RUSTSEC-2020-0071" +[[disallowed-methods]] +path = "time::at_utc" +reason = "time::at_utc is unsound, see RUSTSEC-2020-0071" - { path = "futures_executor::thread_pool::ThreadPoolBuilder::after_start", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, - { path = "futures_executor::thread_pool::ThreadPoolBuilder::before_stop", reason = "Adding hooks directly will omit system hooks, please use ::with_sys_and_custom_hooks refer to https://github.com/tikv/tikv/pull/12442 and https://github.com/tikv/tikv/pull/15017 for more details." }, +# See more about RUSTSEC-2023-0072 in deny.toml. +[[disallowed-methods]] +path = "openssl::x509::store::X509StoreRef::objects" +reason = """ +X509StoreRef::objects is unsound, see RUSTSEC-2020-0071 +""" - # See more about RUSTSEC-2020-0071 in deny.toml. - { path = "time::now", reason = "time::now is unsound, see RUSTSEC-2020-0071" }, - { path = "time::at", reason = "time::at is unsound, see RUSTSEC-2020-0071" }, - { path = "time::at_utc", reason = "time::at_utc is unsound, see RUSTSEC-2020-0071" }, -] avoid-breaking-exported-api = false upper-case-acronyms-aggressive = true diff --git a/cmd/tikv-ctl/Cargo.toml b/cmd/tikv-ctl/Cargo.toml index 056bc602224..e55ef234e8d 100644 --- a/cmd/tikv-ctl/Cargo.toml +++ b/cmd/tikv-ctl/Cargo.toml @@ -44,7 +44,7 @@ nortcheck = ["engine_rocks/nortcheck"] api_version = { workspace = true } backup = { workspace = true } cdc = { workspace = true } -clap = "2.32" +clap = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } crossbeam = "0.8" @@ -67,8 +67,8 @@ pd_client = { workspace = true } prometheus = { version = "0.13", features = ["nightly"] } protobuf = { version = "2.8", features = ["bytes"] } raft = { workspace = true } -raft-engine = { git = "https://github.com/tikv/raft-engine.git" } -raft-engine-ctl = { git = "https://github.com/tikv/raft-engine.git" } +raft-engine = { workspace = true } +raft-engine-ctl = { workspace = true } raft_log_engine = { workspace = true } raftstore = { workspace = true } regex = "1" diff --git a/cmd/tikv-server/Cargo.toml b/cmd/tikv-server/Cargo.toml index be02fe4915b..fdc42f35c3a 100644 --- a/cmd/tikv-server/Cargo.toml +++ b/cmd/tikv-server/Cargo.toml @@ -35,13 +35,13 @@ nortcheck = ["server/nortcheck"] pprof-fp = ["tikv/pprof-fp"] [dependencies] -clap = "2.32" +clap = { workspace = true } crypto = { workspace = true } encryption_export = { workspace = true } engine_traits = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } -raft-engine = { git = "https://github.com/tikv/raft-engine.git" } +raft-engine = { workspace = true } regex = "1" serde_json = { version = "1.0", features = ["preserve_order"] } server = { workspace = true } diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index ef7df46936d..5c778f87454 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -29,7 +29,6 @@ tikv_util = { workspace = true } tokio = { version = "1.5", features = ["time"] } [dev-dependencies] -maligned = "0.2.1" tempfile = "3.0" [target.'cfg(target_os = "linux")'.dependencies] diff --git a/components/file_system/src/io_stats/biosnoop.rs b/components/file_system/src/io_stats/biosnoop.rs index 6b804bfed87..2267193a3ec 100644 --- a/components/file_system/src/io_stats/biosnoop.rs +++ b/components/file_system/src/io_stats/biosnoop.rs @@ -150,10 +150,11 @@ pub fn get_io_type() -> IoType { } pub fn fetch_io_bytes() -> [IoBytes; IoType::COUNT] { - let mut bytes = Default::default(); + let mut bytes: [IoBytes; IoType::COUNT] = Default::default(); unsafe { if let Some(ctx) = BPF_CONTEXT.as_mut() { for io_type in IoType::iter() { + let mut io_type = io_type; let io_type_buf_ptr = &mut io_type as *mut IoType as *mut u8; let mut io_type_buf = std::slice::from_raw_parts_mut(io_type_buf_ptr, std::mem::size_of::()); @@ -269,15 +270,19 @@ pub fn flush_io_latency_metrics() { } } +pub fn get_thread_io_bytes_total() -> Result { + Err("unimplemented".into()) +} + #[cfg(test)] mod tests { use std::{ io::{Read, Seek, SeekFrom, Write}, + os::unix::fs::OpenOptionsExt, sync::{Arc, Condvar, Mutex}, }; use libc::O_DIRECT; - use maligned::{AsBytes, AsBytesMut, A512}; use rand::Rng; use tempfile::TempDir; use test::Bencher; @@ -286,7 +291,7 @@ mod tests { fetch_io_bytes, flush_io_latency_metrics, get_io_type, init, set_io_type, BPF_CONTEXT, MAX_THREAD_IDX, }; - use crate::{metrics::*, IoType, OpenOptions}; + use crate::{io_stats::A512, metrics::*, IoType, OpenOptions}; #[test] fn test_biosnoop() { @@ -311,10 +316,10 @@ mod tests { .custom_flags(O_DIRECT) .open(&file_path) .unwrap(); - let mut w = vec![A512::default(); 2]; - w.as_bytes_mut()[512] = 42; + let mut w = Box::new(A512([0u8; 512 * 2])); + w.0[512] = 42; let mut compaction_bytes_before = fetch_io_bytes()[IoType::Compaction as usize]; - f.write(w.as_bytes()).unwrap(); + f.write(&w.0).unwrap(); f.sync_all().unwrap(); let compaction_bytes = fetch_io_bytes()[IoType::Compaction as usize]; assert_ne!((compaction_bytes - compaction_bytes_before).write, 0); @@ -330,8 +335,8 @@ mod tests { .custom_flags(O_DIRECT) .open(&file_path) .unwrap(); - let mut r = vec![A512::default(); 2]; - assert_ne!(f.read(&mut r.as_bytes_mut()).unwrap(), 0); + let mut r = Box::new(A512([0u8; 512 * 2])); + assert_ne!(f.read(&mut r.0).unwrap(), 0); drop(f); }) .join() @@ -450,10 +455,10 @@ mod tests { .open(&file_path) .unwrap(); - let mut w = vec![A512::default(); 1]; - w.as_bytes_mut()[64] = 42; + let mut w = Box::new(A512([0u8; 512 * 1])); + w.0[64] = 42; for _ in 1..=100 { - f.write(w.as_bytes()).unwrap(); + f.write(&w.0).unwrap(); } f.sync_all().unwrap(); @@ -472,12 +477,12 @@ mod tests { .open(&file_path) .unwrap(); - let mut w = vec![A512::default(); 1]; - w.as_bytes_mut()[64] = 42; + let mut w = Box::new(A512([0u8; 512 * 1])); + w.0[64] = 42; b.iter(|| { set_io_type(IoType::ForegroundWrite); - f.write(w.as_bytes()).unwrap(); + f.write(&w.0).unwrap(); f.sync_all().unwrap(); }); } @@ -493,10 +498,10 @@ mod tests { .open(&file_path) .unwrap(); - let mut w = vec![A512::default(); 2]; - w.as_bytes_mut()[64] = 42; + let mut w = Box::new(A512([0u8; 512 * 2])); + w.0[64] = 42; for _ in 0..100 { - f.write(w.as_bytes()).unwrap(); + f.write(&w.0).unwrap(); } f.sync_all().unwrap(); drop(f); @@ -507,12 +512,12 @@ mod tests { .custom_flags(O_DIRECT) .open(&file_path) .unwrap(); - let mut r = vec![A512::default(); 2]; + let mut r = Box::new(A512([0u8; 512 * 2])); b.iter(|| { set_io_type(IoType::ForegroundRead); f.seek(SeekFrom::Start(rng.gen_range(0..100) * 512)) .unwrap(); - assert_ne!(f.read(&mut r.as_bytes_mut()).unwrap(), 0); + assert_ne!(f.read(&mut r.0).unwrap(), 0); }); } } diff --git a/components/file_system/src/io_stats/mod.rs b/components/file_system/src/io_stats/mod.rs index 216c6ad7897..f65d7c57884 100644 --- a/components/file_system/src/io_stats/mod.rs +++ b/components/file_system/src/io_stats/mod.rs @@ -47,6 +47,14 @@ mod proc; #[cfg(all(target_os = "linux", not(feature = "bcc-iosnoop")))] pub use proc::*; +// A struct assists testing IO stats. +// +// O_DIRECT requires I/O to be 512-byte aligned. +// See https://man7.org/linux/man-pages/man2/open.2.html#NOTES +#[cfg(test)] +#[repr(align(512))] +pub(crate) struct A512(pub [u8; SZ]); + #[cfg(test)] mod tests { use tikv_util::sys::thread::StdThreadBuildWrapper; diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index d66a04aa4e8..8da6876d70f 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -196,12 +196,11 @@ mod tests { }; use libc::O_DIRECT; - use maligned::{AsBytes, AsBytesMut, A512}; use tempfile::{tempdir, tempdir_in}; use tikv_util::sys::thread::StdThreadBuildWrapper; use super::*; - use crate::{OpenOptions, WithIoType}; + use crate::{io_stats::A512, OpenOptions, WithIoType}; #[test] fn test_read_bytes() { @@ -216,8 +215,8 @@ mod tests { .custom_flags(O_DIRECT) .open(&file_path) .unwrap(); - let w = vec![A512::default(); 10]; - f.write_all(w.as_bytes()).unwrap(); + let w = Box::new(A512([0u8; 512 * 10])); + f.write_all(&w.0).unwrap(); f.sync_all().unwrap(); } let mut f = OpenOptions::new() @@ -225,10 +224,10 @@ mod tests { .custom_flags(O_DIRECT) .open(&file_path) .unwrap(); - let mut w = vec![A512::default(); 1]; + let mut w = A512([0u8; 512]); let base_local_bytes = id.fetch_io_bytes().unwrap(); for i in 1..=10 { - f.read_exact(w.as_bytes_mut()).unwrap(); + f.read_exact(&mut w.0).unwrap(); let local_bytes = id.fetch_io_bytes().unwrap(); assert_eq!(i * 512 + base_local_bytes.read, local_bytes.read); @@ -247,10 +246,10 @@ mod tests { .custom_flags(O_DIRECT) .open(file_path) .unwrap(); - let w = vec![A512::default(); 8]; + let w = Box::new(A512([0u8; 512 * 8])); let base_local_bytes = id.fetch_io_bytes().unwrap(); for i in 1..=10 { - f.write_all(w.as_bytes()).unwrap(); + f.write_all(&w.0).unwrap(); f.sync_all().unwrap(); let local_bytes = id.fetch_io_bytes().unwrap(); @@ -275,8 +274,8 @@ mod tests { .custom_flags(O_DIRECT) .open(file_path) .unwrap(); - let w = vec![A512::default(); 8]; - f.write_all(w.as_bytes()).unwrap(); + let w = Box::new(A512([0u8; 512 * 8])); + f.write_all(&w.0).unwrap(); f.sync_all().unwrap(); tx1.send(()).unwrap(); tx1.send(()).unwrap(); @@ -293,8 +292,8 @@ mod tests { .custom_flags(O_DIRECT) .open(file_path) .unwrap(); - let w = vec![A512::default(); 8]; - f.write_all(w.as_bytes()).unwrap(); + let w = Box::new(A512([0u8; 512 * 8])); + f.write_all(&w.0).unwrap(); f.sync_all().unwrap(); tx2.send(()).unwrap(); tx2.send(()).unwrap(); diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index 913a0d18ae1..dc98bae0d5e 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -18,7 +18,7 @@ num_cpus = "1" online_config = { workspace = true } protobuf = "2" raft = { workspace = true } -raft-engine = { git = "https://github.com/tikv/raft-engine.git", features = ["swap"] } +raft-engine = { workspace = true } serde = "1.0" serde_derive = "1.0" slog = { workspace = true } diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index cf2d653bf52..c03c42e8f71 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -40,7 +40,7 @@ backup-stream = { workspace = true } causal_ts = { workspace = true } cdc = { workspace = true } chrono = { workspace = true } -clap = "2.32" +clap = { workspace = true } collections = { workspace = true } concurrency_manager = { workspace = true } crossbeam = "0.8" diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 5f52580d454..33aff4c7b5e 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -17,7 +17,7 @@ chrono-tz = "0.5.1" codec = { workspace = true } collections = { workspace = true } crc32fast = "1.2" -encoding_rs = { git = "https://github.com/xiongjiwei/encoding_rs.git", rev = "68e0bc5a72a37a78228d80cd98047326559cf43c" } +encoding_rs = { git = "https://github.com/tikv/encoding_rs.git", rev = "68e0bc5a72a37a78228d80cd98047326559cf43c" } error_code = { workspace = true } hex = "0.4" kvproto = { workspace = true } diff --git a/deny.toml b/deny.toml index 1cc62067c7c..8bd0ea33df7 100644 --- a/deny.toml +++ b/deny.toml @@ -43,18 +43,31 @@ multiple-versions = "allow" [advisories] vulnerability = "deny" unmaintained = "allow" # FIXME: Deny it. -unsound = "allow" # FIXME: Deny it. -yanked = "allow" # FIXME: Deny it. +unsound = "deny" +yanked = "deny" notice = "warn" ignore = [ # Ignore time 0.1 RUSTSEC-2020-0071 as 1) we have taken measures (see # clippy.toml) to mitigate the issue and 2) time 0.1 has no fix availble. # - # Note: Upgrading to time 0.3 do fix the issue but it's an imcompatible + # NB: Upgrading to time 0.3 do fix the issue but it's an imcompatible # versoin which removes some necessary APIs (`time::precise_time_ns`) that # requiresd by TiKV. # See https://github.com/time-rs/time/blob/8067540c/CHANGELOG.md#L703 "RUSTSEC-2020-0071", + # Ignore RUSTSEC-2023-0072 as we bans the unsound `X509StoreRef::objects`. + # + # NB: Upgrading rust-openssl the latest version do fix the issue but it + # also upgrade the OpenSSL to v3.x which causes performance degradation. + # See https://github.com/openssl/openssl/issues/17064 + "RUSTSEC-2023-0072", + # Ignore RUSTSEC-2023-0072 (unsound issue of "atty" crate) as it only + # affects Windows plaform which is not supported offically by TiKV, and 2) + # we have disabled the clap feature "color" so that the "atty" crate is not + # included in production code. + # + # TODO: Upgrade clap to v4.x. + "RUSTSEC-2021-0145", ] [licenses] @@ -82,6 +95,6 @@ exceptions = [ ] [sources] -unknown-git = "allow" # FIXME: Deny it. +unknown-git = "deny" unknown-registry = "deny" -allow-org = { github = ["tikv"] } +allow-org = { github = ["tikv", "pingcap", "rust-lang"] } From 31cdbb6d920baf40d16c3b1a773de4de323f8a25 Mon Sep 17 00:00:00 2001 From: qupeng Date: Tue, 16 Jan 2024 00:43:44 +0800 Subject: [PATCH 1112/1149] cdc: incremental scans use correct specified ranges (#16252) close tikv/tikv#16362 Signed-off-by: qupeng Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/delegate.rs | 4 +- components/cdc/src/initializer.rs | 149 ++++++++++++++++++++++++------ 2 files changed, 124 insertions(+), 29 deletions(-) diff --git a/components/cdc/src/delegate.rs b/components/cdc/src/delegate.rs index 637ecab0440..74e8fbc93ec 100644 --- a/components/cdc/src/delegate.rs +++ b/components/cdc/src/delegate.rs @@ -1164,8 +1164,8 @@ fn decode_default(value: Vec, row: &mut EventRow, has_value: &mut bool) { /// Observed key range. #[derive(Clone, Default)] pub struct ObservedRange { - start_key_encoded: Vec, - end_key_encoded: Vec, + pub(crate) start_key_encoded: Vec, + pub(crate) end_key_encoded: Vec, start_key_raw: Vec, end_key_raw: Vec, pub(crate) all_key_covered: bool, diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 6a674c683a5..d34faad1335 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -56,7 +56,8 @@ use crate::{ Error, Result, Task, }; -struct ScanStat { +#[derive(Copy, Clone, Debug, Default)] +pub(crate) struct ScanStat { // Fetched bytes to the scanner. emit: usize, // Bytes from the device, `None` if not possible to get it. @@ -172,6 +173,7 @@ impl Initializer { assert_eq!(self.region_id, region.get_id()); self.async_incremental_scan(region_snapshot, region, memory_quota) .await + .map(|_| ()) } else { assert!( resp.response.get_header().has_error(), @@ -188,7 +190,7 @@ impl Initializer { snap: S, region: Region, memory_quota: Arc, - ) -> Result<()> { + ) -> Result { CDC_SCAN_TASKS.with_label_values(&["ongoing"]).inc(); defer!(CDC_SCAN_TASKS.with_label_values(&["ongoing"]).dec()); @@ -197,7 +199,7 @@ impl Initializer { let observe_id = self.observe_id; let conn_id = self.conn_id; let kv_api = self.kv_api; - let on_cancel = || -> Result<()> { + let on_cancel = || -> Result { info!("cdc async incremental scan canceled"; "region_id" => region_id, "downstream_id" => ?downstream_id, @@ -211,13 +213,29 @@ impl Initializer { } self.observed_range.update_region_key_range(®ion); + + // Be compatible with old TiCDC clients, which won't give `observed_range`. + let (start_key, end_key): (Key, Key); + if self.observed_range.start_key_encoded <= region.start_key { + start_key = Key::from_encoded_slice(®ion.start_key); + } else { + start_key = Key::from_encoded_slice(&self.observed_range.start_key_encoded); + } + if self.observed_range.end_key_encoded.is_empty() + || self.observed_range.end_key_encoded >= region.end_key && !region.end_key.is_empty() + { + end_key = Key::from_encoded_slice(®ion.end_key); + } else { + end_key = Key::from_encoded_slice(&self.observed_range.end_key_encoded) + } + debug!("cdc async incremental scan"; "region_id" => region_id, "downstream_id" => ?downstream_id, "observe_id" => ?self.observe_id, "all_key_covered" => ?self.observed_range.all_key_covered, - "start_key" => log_wrappers::Value::key(snap.lower_bound().unwrap_or_default()), - "end_key" => log_wrappers::Value::key(snap.upper_bound().unwrap_or_default())); + "start_key" => log_wrappers::Value::key(start_key.as_encoded()), + "end_key" => log_wrappers::Value::key(end_key.as_encoded())); let mut resolver = if self.build_resolver { Some(Resolver::new(region_id, memory_quota)) @@ -227,17 +245,23 @@ impl Initializer { let (mut hint_min_ts, mut old_value_cursors) = (None, None); let mut scanner = if kv_api == ChangeDataRequestKvApi::TiDb { - if self.ts_filter_is_helpful(&snap) { + if self.ts_filter_is_helpful(&start_key, &end_key) { hint_min_ts = Some(self.checkpoint_ts); let wc = new_old_value_cursor(&snap, CF_WRITE); let dc = new_old_value_cursor(&snap, CF_DEFAULT); old_value_cursors = Some(OldValueCursors::new(wc, dc)); } + let upper_boundary = if end_key.as_encoded().is_empty() { + // Region upper boundary could be an empty slice. + None + } else { + Some(end_key) + }; // Time range: (checkpoint_ts, max] let txnkv_scanner = ScannerBuilder::new(snap, TimeStamp::max()) .fill_cache(false) - .range(None, None) + .range(Some(start_key), upper_boundary) .hint_min_ts(hint_min_ts) .build_delta_scanner(self.checkpoint_ts, TxnExtraOp::ReadOldValue) .unwrap(); @@ -268,6 +292,7 @@ impl Initializer { DownstreamState::Initializing | DownstreamState::Stopped )); + let mut scan_stat = ScanStat::default(); while !done { // When downstream_state is Stopped, it means the corresponding // delegate is stopped. The initialization can be safely canceled. @@ -276,7 +301,9 @@ impl Initializer { } let cursors = old_value_cursors.as_mut(); let resolver = resolver.as_mut(); - let entries = self.scan_batch(&mut scanner, cursors, resolver).await?; + let entries = self + .scan_batch(&mut scanner, cursors, resolver, &mut scan_stat) + .await?; if let Some(None) = entries.last() { // If the last element is None, it means scanning is finished. done = true; @@ -306,7 +333,7 @@ impl Initializer { CDC_SCAN_DURATION_HISTOGRAM.observe(takes.as_secs_f64()); CDC_SCAN_SINK_DURATION_HISTOGRAM.observe(duration_to_sec(sink_time)); - Ok(()) + Ok(scan_stat) } // It's extracted from `Initializer::scan_batch` to avoid becoming an @@ -390,22 +417,24 @@ impl Initializer { scanner: &mut Scanner, old_value_cursors: Option<&mut OldValueCursors>, resolver: Option<&mut Resolver>, + scan_stat: &mut ScanStat, ) -> Result>> { let mut entries = Vec::with_capacity(self.max_scan_batch_size); - let ScanStat { - emit, - disk_read, - perf_delta, - } = self.do_scan(scanner, old_value_cursors, &mut entries)?; + let delta = self.do_scan(scanner, old_value_cursors, &mut entries)?; + scan_stat.emit += delta.emit; + scan_stat.perf_delta += delta.perf_delta; + if let Some(disk_read) = delta.disk_read { + *scan_stat.disk_read.get_or_insert(0) += disk_read; + } - TLS_CDC_PERF_STATS.with(|x| *x.borrow_mut() += perf_delta); + TLS_CDC_PERF_STATS.with(|x| *x.borrow_mut() += delta.perf_delta); tls_flush_perf_stats(); - if let Some(bytes) = disk_read { + if let Some(bytes) = delta.disk_read { CDC_SCAN_DISK_READ_BYTES.inc_by(bytes as _); self.scan_speed_limiter.consume(bytes).await; } - CDC_SCAN_BYTES.inc_by(emit as _); - self.fetch_speed_limiter.consume(emit as _).await; + CDC_SCAN_BYTES.inc_by(delta.emit as _); + self.fetch_speed_limiter.consume(delta.emit as _).await; if let Some(resolver) = resolver { // Track the locks. @@ -506,13 +535,13 @@ impl Initializer { } } - fn ts_filter_is_helpful(&self, snap: &S) -> bool { + fn ts_filter_is_helpful(&self, start_key: &Key, end_key: &Key) -> bool { if self.ts_filter_ratio < f64::EPSILON { return false; } + let start_key = data_key(start_key.as_encoded()); + let end_key = data_end_key(end_key.as_encoded()); - let start_key = data_key(snap.lower_bound().unwrap_or_default()); - let end_key = data_end_key(snap.upper_bound().unwrap_or_default()); let range = Range::new(&start_key, &end_key); let tablet = match self.tablet.as_ref() { Some(t) => t, @@ -705,12 +734,14 @@ mod tests { total_bytes += v.len(); let ts = TimeStamp::new(i as _); must_prewrite_put(&mut engine, k, v, k, ts); - let txn_locks = expected_locks.entry(ts).or_insert_with(|| { - let mut txn_locks = TxnLocks::default(); - txn_locks.sample_lock = Some(k.to_vec().into()); - txn_locks - }); - txn_locks.lock_count += 1; + if i < 90 { + let txn_locks = expected_locks.entry(ts).or_insert_with(|| { + let mut txn_locks = TxnLocks::default(); + txn_locks.sample_lock = Some(k.to_vec().into()); + txn_locks + }); + txn_locks.lock_count += 1; + } } let region = Region::default(); @@ -1141,4 +1172,68 @@ mod tests { block_on(th).unwrap(); worker.stop(); } + + #[test] + fn test_initialize_scan_range() { + let mut cfg = DbConfig::default(); + cfg.writecf.disable_auto_compactions = true; + let mut engine = TestEngineBuilder::new().build_with_cfg(&cfg).unwrap(); + + // Must start with 'z', otherwise table property collector doesn't work. + let ka = Key::from_raw(b"zaaa").into_encoded(); + let km = Key::from_raw(b"zmmm").into_encoded(); + let ky = Key::from_raw(b"zyyy").into_encoded(); + let kz = Key::from_raw(b"zzzz").into_encoded(); + + // Incremental scan iterator shouldn't access the key because it's out of range. + must_prewrite_put(&mut engine, &ka, b"value", &ka, 200); + must_commit(&mut engine, &ka, 200, 210); + for cf in &[CF_WRITE, CF_DEFAULT] { + let kv = engine.kv_engine().unwrap(); + kv.flush_cf(cf, true).unwrap(); + } + + // Incremental scan iterator shouldn't access the key because it's skiped by ts + // filter. + must_prewrite_put(&mut engine, &km, b"value", &km, 100); + must_commit(&mut engine, &km, 100, 110); + for cf in &[CF_WRITE, CF_DEFAULT] { + let kv = engine.kv_engine().unwrap(); + kv.flush_cf(cf, true).unwrap(); + } + + must_prewrite_put(&mut engine, &ky, b"value", &ky, 200); + must_commit(&mut engine, &ky, 200, 210); + for cf in &[CF_WRITE, CF_DEFAULT] { + let kv = engine.kv_engine().unwrap(); + kv.flush_cf(cf, true).unwrap(); + } + + let (mut _worker, pool, mut initializer, _rx, mut drain) = mock_initializer( + usize::MAX, + usize::MAX, + 1000, + engine.kv_engine(), + ChangeDataRequestKvApi::TiDb, + false, + ); + + initializer.observed_range = ObservedRange::new(km, kz).unwrap(); + initializer.checkpoint_ts = 150.into(); + + let th = pool.spawn(async move { + let snap = engine.snapshot(Default::default()).unwrap(); + let region = Region::default(); + let memory_quota = Arc::new(MemoryQuota::new(usize::MAX)); + let scan_stat = initializer + .async_incremental_scan(snap, region, memory_quota) + .await + .unwrap(); + let block_reads = scan_stat.perf_delta.block_read_count; + let block_gets = scan_stat.perf_delta.block_cache_hit_count; + assert_eq!(block_reads + block_gets, 1); + }); + while block_on(drain.drain().next()).is_some() {} + block_on(th).unwrap(); + } } From 1b926f2d388bdd2c90676d4596a7b666c2b1da1f Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Tue, 16 Jan 2024 16:13:45 +0800 Subject: [PATCH 1113/1149] test: fix lint (#16384) ref tikv/tikv#16371 Fix lint Signed-off-by: cfzjywxk Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/test_coprocessor/src/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index 221ed5afe46..d5baede9b84 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -316,7 +316,7 @@ impl Store { .set_v2(&tbl["id"], ScalarValue::Int(None)) .set_v2(&tbl["name"], ScalarValue::Bytes(None)) .set_v2(&tbl["count"], ScalarValue::Int(None)); - inserts.execute_with_v2_checksum(ctx.clone(), with_checksum, extra_checksum); + inserts.execute_with_v2_checksum(ctx, with_checksum, extra_checksum); self.commit(); } } From bbcc9e25aaafd270fa7c6e4840200f5f65876fb1 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Tue, 16 Jan 2024 16:48:17 +0800 Subject: [PATCH 1114/1149] *: deny strong copyleft and unlicensed crates (#16389) ref tikv/tikv#16328 *: deny unlicensed crates *: add licenses to private crates *: prune strong copyleft crates Remove the "mnt" crate which is licensed under LGPL 3.0. Explicitly allow crates that are licensed "weak" copyleft. Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 16 +--- components/api_version/Cargo.toml | 1 + components/backup-stream/Cargo.toml | 1 + components/backup/Cargo.toml | 1 + components/batch-system/Cargo.toml | 1 + components/case_macros/Cargo.toml | 1 + components/causal_ts/Cargo.toml | 1 + components/cdc/Cargo.toml | 1 + components/cloud/Cargo.toml | 1 + components/cloud/aws/Cargo.toml | 1 + components/cloud/azure/Cargo.toml | 1 + components/cloud/gcp/Cargo.toml | 1 + components/codec/Cargo.toml | 1 + components/collections/Cargo.toml | 1 + components/concurrency_manager/Cargo.toml | 1 + components/coprocessor_plugin_api/Cargo.toml | 1 + components/crypto/Cargo.toml | 1 + components/encryption/Cargo.toml | 1 + components/encryption/export/Cargo.toml | 1 + components/engine_panic/Cargo.toml | 1 + components/engine_rocks/Cargo.toml | 1 + components/engine_rocks_helper/Cargo.toml | 1 + components/engine_test/Cargo.toml | 1 + components/engine_tirocks/Cargo.toml | 1 + components/engine_traits/Cargo.toml | 1 + components/engine_traits_tests/Cargo.toml | 1 + components/error_code/Cargo.toml | 1 + components/external_storage/Cargo.toml | 1 + components/file_system/Cargo.toml | 1 + components/hybrid_engine/Cargo.toml | 1 + components/into_other/Cargo.toml | 1 + components/keys/Cargo.toml | 1 + components/log_wrappers/Cargo.toml | 1 + components/memory_trace_macros/Cargo.toml | 1 + components/online_config/Cargo.toml | 1 + .../online_config_derive/Cargo.toml | 1 + components/panic_hook/Cargo.toml | 1 + components/pd_client/Cargo.toml | 1 + components/profiler/Cargo.toml | 1 + components/raft_log_engine/Cargo.toml | 1 + components/raftstore-v2/Cargo.toml | 1 + .../region_cache_memory_engine/Cargo.toml | 1 + components/resolved_ts/Cargo.toml | 1 + components/resource_control/Cargo.toml | 1 + components/resource_metering/Cargo.toml | 1 + components/security/Cargo.toml | 1 + components/sst_importer/Cargo.toml | 1 + components/test_backup/Cargo.toml | 1 + components/test_coprocessor/Cargo.toml | 1 + .../example_plugin/Cargo.toml | 1 + components/test_pd/Cargo.toml | 1 + components/test_pd_client/Cargo.toml | 1 + components/test_raftstore-v2/Cargo.toml | 1 + components/test_raftstore/Cargo.toml | 1 + components/test_raftstore_macro/Cargo.toml | 1 + components/test_sst_importer/Cargo.toml | 1 + components/test_storage/Cargo.toml | 1 + components/test_util/Cargo.toml | 1 + components/tidb_query_aggr/Cargo.toml | 1 + components/tidb_query_codegen/Cargo.toml | 1 + components/tidb_query_common/Cargo.toml | 1 + components/tidb_query_datatype/Cargo.toml | 1 + components/tidb_query_executors/Cargo.toml | 1 + components/tidb_query_expr/Cargo.toml | 1 + components/tikv_alloc/Cargo.toml | 1 + components/tikv_kv/Cargo.toml | 1 + components/tikv_util/Cargo.toml | 2 +- components/tikv_util/src/sys/mod.rs | 95 ++++++++++++++++++- components/tipb_helper/Cargo.toml | 1 + components/tracker/Cargo.toml | 1 + components/txn_types/Cargo.toml | 1 + deny.toml | 18 ++-- fuzz/Cargo.toml | 1 + fuzz/fuzzer-afl/Cargo.toml | 1 + fuzz/fuzzer-honggfuzz/Cargo.toml | 1 + fuzz/fuzzer-libfuzzer/Cargo.toml | 1 + fuzz/targets/Cargo.toml | 1 + tests/Cargo.toml | 1 + 78 files changed, 183 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db9f25c9fb8..7de7b5f441c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2871,7 +2871,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c247909c279e89b3eabb4f200d580ee50f27fda6" +source = "git+https://github.com/tikv/rust-rocksdb.git#256c9ca2f45fef644b518223707de50f841fe9e8" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2890,7 +2890,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#c247909c279e89b3eabb4f200d580ee50f27fda6" +source = "git+https://github.com/tikv/rust-rocksdb.git#256c9ca2f45fef644b518223707de50f841fe9e8" dependencies = [ "bzip2-sys", "cc", @@ -3165,15 +3165,6 @@ dependencies = [ "tempdir", ] -[[package]] -name = "mnt" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1587ebb20a5b04738f16cffa7e2526f1b8496b84f92920facd518362ff1559eb" -dependencies = [ - "libc 0.2.151", -] - [[package]] name = "more-asserts" version = "0.2.1" @@ -4726,7 +4717,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#c247909c279e89b3eabb4f200d580ee50f27fda6" +source = "git+https://github.com/tikv/rust-rocksdb.git#256c9ca2f45fef644b518223707de50f841fe9e8" dependencies = [ "libc 0.2.151", "librocksdb_sys", @@ -6645,7 +6636,6 @@ dependencies = [ "libc 0.2.151", "log", "log_wrappers", - "mnt", "nix 0.24.1", "num-traits", "num_cpus", diff --git a/components/api_version/Cargo.toml b/components/api_version/Cargo.toml index 3518e99030f..fd3f1c765e9 100644 --- a/components/api_version/Cargo.toml +++ b/components/api_version/Cargo.toml @@ -3,6 +3,7 @@ name = "api_version" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [features] testexport = [] diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 3fb9d484a02..50b28f8d2f9 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -2,6 +2,7 @@ name = "backup-stream" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/backup/Cargo.toml b/components/backup/Cargo.toml index 03b6e439879..af5e74d0eec 100644 --- a/components/backup/Cargo.toml +++ b/components/backup/Cargo.toml @@ -3,6 +3,7 @@ name = "backup" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/batch-system/Cargo.toml b/components/batch-system/Cargo.toml index bd1ae6c56b4..b68bf6b79c6 100644 --- a/components/batch-system/Cargo.toml +++ b/components/batch-system/Cargo.toml @@ -2,6 +2,7 @@ name = "batch-system" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [features] default = ["test-runner"] diff --git a/components/case_macros/Cargo.toml b/components/case_macros/Cargo.toml index ff6e837938a..a118f6adba1 100644 --- a/components/case_macros/Cargo.toml +++ b/components/case_macros/Cargo.toml @@ -2,6 +2,7 @@ name = "case_macros" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [lib] proc-macro = true diff --git a/components/causal_ts/Cargo.toml b/components/causal_ts/Cargo.toml index fb049a22ca7..c17f07cbfaf 100644 --- a/components/causal_ts/Cargo.toml +++ b/components/causal_ts/Cargo.toml @@ -3,6 +3,7 @@ name = "causal_ts" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] testexport = [] diff --git a/components/cdc/Cargo.toml b/components/cdc/Cargo.toml index 4a7ee57a193..eb9de9d4e5d 100644 --- a/components/cdc/Cargo.toml +++ b/components/cdc/Cargo.toml @@ -3,6 +3,7 @@ name = "cdc" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/cloud/Cargo.toml b/components/cloud/Cargo.toml index b5f1e8faffd..3a103679094 100644 --- a/components/cloud/Cargo.toml +++ b/components/cloud/Cargo.toml @@ -3,6 +3,7 @@ name = "cloud" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] async-trait = "0.1" diff --git a/components/cloud/aws/Cargo.toml b/components/cloud/aws/Cargo.toml index 6c387e99974..75cddac7cea 100644 --- a/components/cloud/aws/Cargo.toml +++ b/components/cloud/aws/Cargo.toml @@ -3,6 +3,7 @@ name = "aws" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/cloud/azure/Cargo.toml b/components/cloud/azure/Cargo.toml index 07a4752451e..41a7a2821e4 100644 --- a/components/cloud/azure/Cargo.toml +++ b/components/cloud/azure/Cargo.toml @@ -3,6 +3,7 @@ name = "azure" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/cloud/gcp/Cargo.toml b/components/cloud/gcp/Cargo.toml index d4bba10302b..f6c774fee7e 100644 --- a/components/cloud/gcp/Cargo.toml +++ b/components/cloud/gcp/Cargo.toml @@ -3,6 +3,7 @@ name = "gcp" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] async-trait = "0.1" diff --git a/components/codec/Cargo.toml b/components/codec/Cargo.toml index 08cf49aff16..f5f9252a410 100644 --- a/components/codec/Cargo.toml +++ b/components/codec/Cargo.toml @@ -3,6 +3,7 @@ name = "codec" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] byteorder = "1.2" diff --git a/components/collections/Cargo.toml b/components/collections/Cargo.toml index e92618a884b..706f6fa5d8b 100644 --- a/components/collections/Cargo.toml +++ b/components/collections/Cargo.toml @@ -3,6 +3,7 @@ name = "collections" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] fxhash = "0.2.1" diff --git a/components/concurrency_manager/Cargo.toml b/components/concurrency_manager/Cargo.toml index 846f140dc46..0ffee70899b 100644 --- a/components/concurrency_manager/Cargo.toml +++ b/components/concurrency_manager/Cargo.toml @@ -3,6 +3,7 @@ edition = "2021" name = "concurrency_manager" publish = false version = "0.0.1" +license = "Apache-2.0" [dependencies] crossbeam-skiplist = "0.1" diff --git a/components/coprocessor_plugin_api/Cargo.toml b/components/coprocessor_plugin_api/Cargo.toml index 0c4753bc2ce..886f8910490 100644 --- a/components/coprocessor_plugin_api/Cargo.toml +++ b/components/coprocessor_plugin_api/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" description = "Types and trait for custom coprocessor plugins for TiKV." edition = "2021" publish = false +license = "Apache-2.0" [dependencies] async-trait = "0.1" diff --git a/components/crypto/Cargo.toml b/components/crypto/Cargo.toml index 26eb77ee057..924e8e89e20 100644 --- a/components/crypto/Cargo.toml +++ b/components/crypto/Cargo.toml @@ -3,6 +3,7 @@ name = "crypto" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] openssl = { workspace = true } diff --git a/components/encryption/Cargo.toml b/components/encryption/Cargo.toml index 7375a9c0b20..ae10ab78843 100644 --- a/components/encryption/Cargo.toml +++ b/components/encryption/Cargo.toml @@ -3,6 +3,7 @@ name = "encryption" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/encryption/export/Cargo.toml b/components/encryption/export/Cargo.toml index feb4ce23c8c..c8eebfd98fd 100644 --- a/components/encryption/export/Cargo.toml +++ b/components/encryption/export/Cargo.toml @@ -3,6 +3,7 @@ name = "encryption_export" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["cloud-aws", "cloud-gcp", "cloud-azure"] diff --git a/components/engine_panic/Cargo.toml b/components/engine_panic/Cargo.toml index 7c41290993c..246f0bf9d9b 100644 --- a/components/engine_panic/Cargo.toml +++ b/components/engine_panic/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" description = "An example TiKV storage engine that does nothing but panic" edition = "2021" publish = false +license = "Apache-2.0" [features] testexport = [] diff --git a/components/engine_rocks/Cargo.toml b/components/engine_rocks/Cargo.toml index 774055ad526..3a21461164f 100644 --- a/components/engine_rocks/Cargo.toml +++ b/components/engine_rocks/Cargo.toml @@ -3,6 +3,7 @@ name = "engine_rocks" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] trace-lifetime = [] diff --git a/components/engine_rocks_helper/Cargo.toml b/components/engine_rocks_helper/Cargo.toml index 632e2e1a6f5..31355157a1a 100644 --- a/components/engine_rocks_helper/Cargo.toml +++ b/components/engine_rocks_helper/Cargo.toml @@ -3,6 +3,7 @@ name = "engine_rocks_helper" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/engine_test/Cargo.toml b/components/engine_test/Cargo.toml index f834d5556a4..3ac42ba73ef 100644 --- a/components/engine_test/Cargo.toml +++ b/components/engine_test/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" description = "A single engine that masquerades as all other engines, for testing" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/engine_tirocks/Cargo.toml b/components/engine_tirocks/Cargo.toml index b3cac78b502..0fa5073877c 100644 --- a/components/engine_tirocks/Cargo.toml +++ b/components/engine_tirocks/Cargo.toml @@ -2,6 +2,7 @@ name = "engine_tirocks" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [dependencies] api_version = { workspace = true } diff --git a/components/engine_traits/Cargo.toml b/components/engine_traits/Cargo.toml index 8e8812ec6e2..63cd5d172f4 100644 --- a/components/engine_traits/Cargo.toml +++ b/components/engine_traits/Cargo.toml @@ -3,6 +3,7 @@ name = "engine_traits" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/engine_traits_tests/Cargo.toml b/components/engine_traits_tests/Cargo.toml index f0b230efccd..321f79f3245 100644 --- a/components/engine_traits_tests/Cargo.toml +++ b/components/engine_traits_tests/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" description = "Engine-agnostic tests for the engine_traits interface" edition = "2021" publish = false +license = "Apache-2.0" [lib] doctest = false diff --git a/components/error_code/Cargo.toml b/components/error_code/Cargo.toml index 04179f5bd53..0be4d7fa58c 100644 --- a/components/error_code/Cargo.toml +++ b/components/error_code/Cargo.toml @@ -3,6 +3,7 @@ name = "error_code" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [lib] name = "error_code" diff --git a/components/external_storage/Cargo.toml b/components/external_storage/Cargo.toml index 69de83e5474..52a06cdb9d2 100644 --- a/components/external_storage/Cargo.toml +++ b/components/external_storage/Cargo.toml @@ -3,6 +3,7 @@ name = "external_storage" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] async-compression = { version = "0.3.14", features = ["futures-io", "zstd"] } diff --git a/components/file_system/Cargo.toml b/components/file_system/Cargo.toml index 5c778f87454..a6c7007ada7 100644 --- a/components/file_system/Cargo.toml +++ b/components/file_system/Cargo.toml @@ -3,6 +3,7 @@ name = "file_system" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [features] bcc-iosnoop = ["bcc"] diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index 79fedc4bcbd..c83b6bd48d2 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -3,6 +3,7 @@ name = "hybrid_engine" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] testexport = [] diff --git a/components/into_other/Cargo.toml b/components/into_other/Cargo.toml index dac98342f20..15b66df8696 100644 --- a/components/into_other/Cargo.toml +++ b/components/into_other/Cargo.toml @@ -3,6 +3,7 @@ name = "into_other" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] engine_traits = { workspace = true } diff --git a/components/keys/Cargo.toml b/components/keys/Cargo.toml index a34ae0df79d..7ffbc023956 100644 --- a/components/keys/Cargo.toml +++ b/components/keys/Cargo.toml @@ -3,6 +3,7 @@ name = "keys" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] byteorder = "1.2" diff --git a/components/log_wrappers/Cargo.toml b/components/log_wrappers/Cargo.toml index c472755947f..4d7e25f128a 100644 --- a/components/log_wrappers/Cargo.toml +++ b/components/log_wrappers/Cargo.toml @@ -3,6 +3,7 @@ name = "log_wrappers" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] hex = "0.4" diff --git a/components/memory_trace_macros/Cargo.toml b/components/memory_trace_macros/Cargo.toml index 8dc800cf7b1..ba5bcf8d8c9 100644 --- a/components/memory_trace_macros/Cargo.toml +++ b/components/memory_trace_macros/Cargo.toml @@ -2,6 +2,7 @@ name = "memory_trace_macros" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [lib] proc-macro = true diff --git a/components/online_config/Cargo.toml b/components/online_config/Cargo.toml index 1ee16e9b639..d5b4bf7a76a 100644 --- a/components/online_config/Cargo.toml +++ b/components/online_config/Cargo.toml @@ -3,6 +3,7 @@ name = "online_config" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] chrono = { workspace = true } diff --git a/components/online_config/online_config_derive/Cargo.toml b/components/online_config/online_config_derive/Cargo.toml index bcc206e907c..ecf34979dc6 100644 --- a/components/online_config/online_config_derive/Cargo.toml +++ b/components/online_config/online_config_derive/Cargo.toml @@ -3,6 +3,7 @@ name = "online_config_derive" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [lib] proc-macro = true diff --git a/components/panic_hook/Cargo.toml b/components/panic_hook/Cargo.toml index cfe4030bd08..cca5293bdc8 100644 --- a/components/panic_hook/Cargo.toml +++ b/components/panic_hook/Cargo.toml @@ -3,3 +3,4 @@ name = "panic_hook" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" diff --git a/components/pd_client/Cargo.toml b/components/pd_client/Cargo.toml index 7be69dd4136..a5925a584b2 100644 --- a/components/pd_client/Cargo.toml +++ b/components/pd_client/Cargo.toml @@ -3,6 +3,7 @@ name = "pd_client" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/profiler/Cargo.toml b/components/profiler/Cargo.toml index a3382229791..02096ab8d0d 100644 --- a/components/profiler/Cargo.toml +++ b/components/profiler/Cargo.toml @@ -3,6 +3,7 @@ name = "profiler" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] profiling = ["lazy_static", "gperftools", "callgrind", "valgrind_request"] diff --git a/components/raft_log_engine/Cargo.toml b/components/raft_log_engine/Cargo.toml index dc98bae0d5e..d0a604abbd6 100644 --- a/components/raft_log_engine/Cargo.toml +++ b/components/raft_log_engine/Cargo.toml @@ -3,6 +3,7 @@ name = "raft_log_engine" version = "0.0.1" publish = false edition = "2021" +license = "Apache-2.0" [features] failpoints = ["raft-engine/failpoints"] diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index c925a8c472e..86c93e02e57 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -2,6 +2,7 @@ name = "raftstore-v2" version = "0.1.0" edition = "2021" +license = "Apache-2.0" [features] default = ["testexport", "test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index 448d49971c3..2cd006b8e57 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -3,6 +3,7 @@ name = "region_cache_memory_engine" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] testexport = [] diff --git a/components/resolved_ts/Cargo.toml b/components/resolved_ts/Cargo.toml index c6c2695fada..8bcca29480d 100644 --- a/components/resolved_ts/Cargo.toml +++ b/components/resolved_ts/Cargo.toml @@ -3,6 +3,7 @@ name = "resolved_ts" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] tcmalloc = ["tikv/tcmalloc"] diff --git a/components/resource_control/Cargo.toml b/components/resource_control/Cargo.toml index 21f5cad1514..ab44b0ab675 100644 --- a/components/resource_control/Cargo.toml +++ b/components/resource_control/Cargo.toml @@ -3,6 +3,7 @@ name = "resource_control" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] diff --git a/components/resource_metering/Cargo.toml b/components/resource_metering/Cargo.toml index f6776970aa0..31ac4d7131c 100644 --- a/components/resource_metering/Cargo.toml +++ b/components/resource_metering/Cargo.toml @@ -2,6 +2,7 @@ name = "resource_metering" version = "0.0.1" edition = "2021" +license = "Apache-2.0" [dependencies] collections = { workspace = true } diff --git a/components/security/Cargo.toml b/components/security/Cargo.toml index 8a7421be75d..e103ae235df 100644 --- a/components/security/Cargo.toml +++ b/components/security/Cargo.toml @@ -3,6 +3,7 @@ name = "security" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] collections = { workspace = true } diff --git a/components/sst_importer/Cargo.toml b/components/sst_importer/Cargo.toml index b501e509a8a..41f29fb6c70 100644 --- a/components/sst_importer/Cargo.toml +++ b/components/sst_importer/Cargo.toml @@ -3,6 +3,7 @@ name = "sst_importer" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/test_backup/Cargo.toml b/components/test_backup/Cargo.toml index ea6ff5e30ea..a9d19120453 100644 --- a/components/test_backup/Cargo.toml +++ b/components/test_backup/Cargo.toml @@ -3,6 +3,7 @@ name = "test_backup" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] api_version = { workspace = true } diff --git a/components/test_coprocessor/Cargo.toml b/components/test_coprocessor/Cargo.toml index a09626eedac..f3af09512eb 100644 --- a/components/test_coprocessor/Cargo.toml +++ b/components/test_coprocessor/Cargo.toml @@ -3,6 +3,7 @@ name = "test_coprocessor" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/test_coprocessor_plugin/example_plugin/Cargo.toml b/components/test_coprocessor_plugin/example_plugin/Cargo.toml index 854eacbb2c1..8dd5ae04cee 100644 --- a/components/test_coprocessor_plugin/example_plugin/Cargo.toml +++ b/components/test_coprocessor_plugin/example_plugin/Cargo.toml @@ -3,6 +3,7 @@ name = "example_coprocessor_plugin" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [lib] crate-type = ["dylib"] diff --git a/components/test_pd/Cargo.toml b/components/test_pd/Cargo.toml index 811b9e7b0ca..21aec3b524f 100644 --- a/components/test_pd/Cargo.toml +++ b/components/test_pd/Cargo.toml @@ -3,6 +3,7 @@ name = "test_pd" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] collections = { workspace = true } diff --git a/components/test_pd_client/Cargo.toml b/components/test_pd_client/Cargo.toml index f1256938535..90bf7a24759 100644 --- a/components/test_pd_client/Cargo.toml +++ b/components/test_pd_client/Cargo.toml @@ -3,6 +3,7 @@ name = "test_pd_client" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] collections = { workspace = true } diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 14d2192852a..5f71f7f99a1 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -3,6 +3,7 @@ name = "test_raftstore-v2" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index 33430ba3fa8..e648eef86b9 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -3,6 +3,7 @@ name = "test_raftstore" version = "0.0.1" edition = "2018" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine", "cloud-aws", "cloud-gcp", "cloud-azure"] diff --git a/components/test_raftstore_macro/Cargo.toml b/components/test_raftstore_macro/Cargo.toml index 327527f3ae5..cdea9c7b0f0 100644 --- a/components/test_raftstore_macro/Cargo.toml +++ b/components/test_raftstore_macro/Cargo.toml @@ -3,6 +3,7 @@ name = "test_raftstore_macro" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [lib] proc-macro = true diff --git a/components/test_sst_importer/Cargo.toml b/components/test_sst_importer/Cargo.toml index 09bdb722de3..56d00183180 100644 --- a/components/test_sst_importer/Cargo.toml +++ b/components/test_sst_importer/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" publish = false description = "test helpers for sst_importer" +license = "Apache-2.0" [lib] test = false diff --git a/components/test_storage/Cargo.toml b/components/test_storage/Cargo.toml index 97ea7bf0d24..d6ca443e54a 100644 --- a/components/test_storage/Cargo.toml +++ b/components/test_storage/Cargo.toml @@ -3,6 +3,7 @@ name = "test_storage" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/test_util/Cargo.toml b/components/test_util/Cargo.toml index eb4b07030a0..b5cc4c5781b 100644 --- a/components/test_util/Cargo.toml +++ b/components/test_util/Cargo.toml @@ -3,6 +3,7 @@ name = "test_util" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["cloud-aws", "cloud-gcp", "cloud-azure"] diff --git a/components/tidb_query_aggr/Cargo.toml b/components/tidb_query_aggr/Cargo.toml index f7b0378a173..7594321f535 100644 --- a/components/tidb_query_aggr/Cargo.toml +++ b/components/tidb_query_aggr/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" edition = "2021" publish = false description = "Vector aggr functions of query engine to run TiDB pushed down executors" +license = "Apache-2.0" [dependencies] match-template = "0.0.1" diff --git a/components/tidb_query_codegen/Cargo.toml b/components/tidb_query_codegen/Cargo.toml index c3ae8d8106c..72e48656424 100644 --- a/components/tidb_query_codegen/Cargo.toml +++ b/components/tidb_query_codegen/Cargo.toml @@ -3,6 +3,7 @@ name = "tidb_query_codegen" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [lib] proc-macro = true diff --git a/components/tidb_query_common/Cargo.toml b/components/tidb_query_common/Cargo.toml index 32d1a92df1b..ff7c0ca58a2 100644 --- a/components/tidb_query_common/Cargo.toml +++ b/components/tidb_query_common/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" edition = "2021" publish = false description = "Common utility of a query engine to run TiDB pushed down executors" +license = "Apache-2.0" [dependencies] anyhow = "1.0" diff --git a/components/tidb_query_datatype/Cargo.toml b/components/tidb_query_datatype/Cargo.toml index 33aff4c7b5e..e789e8c856d 100644 --- a/components/tidb_query_datatype/Cargo.toml +++ b/components/tidb_query_datatype/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" edition = "2021" publish = false description = "Data type of a query engine to run TiDB pushed down executors" +license = "Apache-2.0" [dependencies] api_version = { workspace = true } diff --git a/components/tidb_query_executors/Cargo.toml b/components/tidb_query_executors/Cargo.toml index 30fe64252ac..ceb55a8e942 100644 --- a/components/tidb_query_executors/Cargo.toml +++ b/components/tidb_query_executors/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" edition = "2021" publish = false description = "A vector query engine to run TiDB pushed down executors" +license = "Apache-2.0" [dependencies] api_version = { workspace = true } diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml index 298944df4c0..29c25957c69 100644 --- a/components/tidb_query_expr/Cargo.toml +++ b/components/tidb_query_expr/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" edition = "2021" publish = false description = "Vector expressions of query engine to run TiDB pushed down executors" +license = "Apache-2.0" [dependencies] base64 = "0.13" diff --git a/components/tikv_alloc/Cargo.toml b/components/tikv_alloc/Cargo.toml index 968969f3332..31e0ea0e804 100644 --- a/components/tikv_alloc/Cargo.toml +++ b/components/tikv_alloc/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" authors = ["Brian Anderson "] publish = false +license = "Apache-2.0" [features] jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl", "tikv-jemalloc-sys"] diff --git a/components/tikv_kv/Cargo.toml b/components/tikv_kv/Cargo.toml index 6df829ad925..672462071b9 100644 --- a/components/tikv_kv/Cargo.toml +++ b/components/tikv_kv/Cargo.toml @@ -5,6 +5,7 @@ authors = ["The TiKV Authors"] description = "The key-value abstraction directly used by TiKV" edition = "2021" publish = false +license = "Apache-2.0" [features] default = ["test-engine-kv-rocksdb", "test-engine-raft-raft-engine"] diff --git a/components/tikv_util/Cargo.toml b/components/tikv_util/Cargo.toml index 0e731fcd154..b99a722a02b 100644 --- a/components/tikv_util/Cargo.toml +++ b/components/tikv_util/Cargo.toml @@ -3,6 +3,7 @@ name = "tikv_util" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [features] failpoints = ["fail/failpoints"] @@ -32,7 +33,6 @@ lazy_static = "1.3" libc = "0.2" log = { version = "0.4", features = ["max_level_trace", "release_max_level_debug"] } log_wrappers = { workspace = true } -mnt = "0.3.1" nix = "0.24" num-traits = "0.2" num_cpus = "1" diff --git a/components/tikv_util/src/sys/mod.rs b/components/tikv_util/src/sys/mod.rs index 797da2aea54..0ab8060d46f 100644 --- a/components/tikv_util/src/sys/mod.rs +++ b/components/tikv_util/src/sys/mod.rs @@ -17,8 +17,6 @@ use std::{ use fail::fail_point; #[cfg(target_os = "linux")] use lazy_static::lazy_static; -#[cfg(target_os = "linux")] -use mnt::get_mount; use sysinfo::RefreshKind; pub use sysinfo::{CpuExt, DiskExt, NetworkExt, ProcessExt, SystemExt}; @@ -169,6 +167,12 @@ pub fn path_in_diff_mount_point(path1: impl AsRef, path2: impl AsRef if empty_path(path1) || empty_path(path2) { return false; } + let get_mount = |path| -> std::io::Result<_> { + let mounts = std::fs::File::open("/proc/mounts")?; + let mount_point = get_path_mount_point(Box::new(std::io::BufReader::new(mounts)), path); + Ok(mount_point) + }; + match (get_mount(path1), get_mount(path2)) { (Err(e1), _) => { warn!("Get mount point error for path {}, {}", path1.display(), e1); @@ -190,6 +194,48 @@ pub fn path_in_diff_mount_point(path1: impl AsRef, path2: impl AsRef } } +#[cfg(target_os = "linux")] +fn get_path_mount_point(mounts: Box, path: &Path) -> Option { + use std::io::BufRead; + + // (fs_file, mount point) + let mut ret = None; + // Each filesystem is described on a separate line. Fields on each line are + // separated by tabs or spaces. Lines starting with '#' are comments. + // Blank lines are ignored. + // See man 5 fstab. + for line in mounts.lines() { + let line = match line { + Ok(line) => line, + Err(e) => { + warn!("fail to read mounts line, error {}", e); + continue; + } + }; + if line.is_empty() || line.starts_with('#') { + continue; + } + // We only care about the second field (fs_file). + let mut idx = 0; + for field in line.split(&[' ', '\t']) { + if field.is_empty() { + continue; + } + if idx == 1 { + if path.starts_with(field) { + // Keep the longest match. + if ret.as_ref().map_or(0, |r: &(String, String)| r.0.len()) < field.len() { + ret = Some((field.to_owned(), line.clone())); + } + } + break; + } + idx += 1; + } + } + ret.map(|r| r.1) +} + #[cfg(not(target_os = "linux"))] pub fn path_in_diff_mount_point(_path1: impl AsRef, _path2: impl AsRef) -> bool { false @@ -217,4 +263,49 @@ mod tests { let result = path_in_diff_mount_point(normal_path1, normal_path2); assert_eq!(result, false); } + + #[test] + fn test_get_path_mount_point() { + let mounts = " +sysfs /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0 +proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0 +tmpfs /sys/fs/cgroup tmpfs ro,nosuid,nodev,noexec,mode=755 0 0 +cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0 +pstore /sys/fs/pstore pstore rw,nosuid,nodev,noexec,relatime 0 0 +bpf /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0 +none /sys/kernel/tracing tracefs rw,relatime 0 0 +configfs /sys/kernel/config configfs rw,relatime 0 0 +systemd-1 /proc/sys/fs/binfmt_misc autofs rw,relatime,fd=32,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=16122 0 0 +mqueue /dev/mqueue mqueue rw,relatime 0 0 +/dev/vda2 /boot ext4 rw,relatime 0 0 +/dev/vda3 / ext4 rw,relatime 0 0 + +# Double spaces in below. +/dev/nvme1n1 /data/nvme1n1 xfs rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota 0 0 +# \\t in below. +/dev/nvme0n1\t/data/nvme0n1/data ext4 rw,seclabel,relatime 0 0 +"; + let reader = mounts.as_bytes(); + let check = |path: &str, expected: Option<&str>| { + let mp = get_path_mount_point(Box::new(reader), Path::new(path)); + if let Some(expected) = expected { + assert!( + mp.as_ref().unwrap().starts_with(expected), + "{:?}: {:?}", + mp, + expected + ); + } else { + assert!(mp.is_none(), "{:?}: {:?}", mp, expected); + }; + }; + check("/data/nvme1n1", Some("/dev/nvme1n1 /data/nvme1n1 xfs")); + check( + "/data/nvme0n1/data/tikv", + Some("/dev/nvme0n1\t/data/nvme0n1/data ext4"), + ); + check("/data/nvme0n1", Some("/dev/vda3 / ext4")); + check("/home", Some("/dev/vda3 / ext4")); + check("unknown/path", None); + } } diff --git a/components/tipb_helper/Cargo.toml b/components/tipb_helper/Cargo.toml index 2954a1f0d4f..020239b0105 100644 --- a/components/tipb_helper/Cargo.toml +++ b/components/tipb_helper/Cargo.toml @@ -3,6 +3,7 @@ name = "tipb_helper" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] codec = { workspace = true } diff --git a/components/tracker/Cargo.toml b/components/tracker/Cargo.toml index a43dd0c566d..1a00f6eb513 100644 --- a/components/tracker/Cargo.toml +++ b/components/tracker/Cargo.toml @@ -3,6 +3,7 @@ name = "tracker" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] collections = { workspace = true } diff --git a/components/txn_types/Cargo.toml b/components/txn_types/Cargo.toml index 0db4d0a6dff..19eb2f99b30 100644 --- a/components/txn_types/Cargo.toml +++ b/components/txn_types/Cargo.toml @@ -3,6 +3,7 @@ name = "txn_types" version = "0.1.0" edition = "2021" publish = false +license = "Apache-2.0" [dependencies] bitflags = "1.0.1" diff --git a/deny.toml b/deny.toml index 8bd0ea33df7..209ebd2fe6f 100644 --- a/deny.toml +++ b/deny.toml @@ -52,7 +52,7 @@ ignore = [ # # NB: Upgrading to time 0.3 do fix the issue but it's an imcompatible # versoin which removes some necessary APIs (`time::precise_time_ns`) that - # requiresd by TiKV. + # are required by TiKV. # See https://github.com/time-rs/time/blob/8067540c/CHANGELOG.md#L703 "RUSTSEC-2020-0071", # Ignore RUSTSEC-2023-0072 as we bans the unsound `X509StoreRef::objects`. @@ -70,10 +70,15 @@ ignore = [ "RUSTSEC-2021-0145", ] +# TiKV is licensed under Apache 2.0, according to ASF 3RD PARTY LICENSE POLICY, +# TiKV can include licenses in Category A, and include licenses in Category B +# under certain conditions. +# See https://www.apache.org/legal/resolved.html. [licenses] -unlicensed = "allow" # FIXME: Deny it. +unlicensed = "deny" copyleft = "deny" -private = { ignore = true } +private = { ignore = false } +# Allow licenses in Category A allow = [ "0BSD", "Apache-2.0", @@ -81,7 +86,6 @@ allow = [ "CC0-1.0", "ISC", "MIT", - "MPL-2.0", "Zlib", ] exceptions = [ @@ -89,8 +93,10 @@ exceptions = [ # which is licensed under Unicode-DFS-2016. # See https://github.com/dtolnay/unicode-ident/pull/4 { name = "unicode-ident", allow = ["Unicode-DFS-2016"] }, - # FIXME: Remove this crate. - { name = "mnt", allow = ["LGPL-3.0"] }, + + # Allow licenses in Category B explicitly, make their usage more prominent. + { name = "slog-json", allow = ["MPL-2.0"] }, + { name = "smartstring", allow = ["MPL-2.0"] }, { name = "inferno", allow = ["CDDL-1.0"] }, ] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a617ae8b693..f11543d571c 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -3,6 +3,7 @@ name = "fuzz" version = "0.0.1" publish = false edition = "2021" +license = "Apache-2.0" [[bin]] name = "fuzz" diff --git a/fuzz/fuzzer-afl/Cargo.toml b/fuzz/fuzzer-afl/Cargo.toml index 5e9894fba3e..ae2d288160e 100644 --- a/fuzz/fuzzer-afl/Cargo.toml +++ b/fuzz/fuzzer-afl/Cargo.toml @@ -2,6 +2,7 @@ name = "fuzzer-afl" version = "0.0.1" publish = false +license = "Apache-2.0" [dependencies] fuzz-targets = { path = "../targets" } diff --git a/fuzz/fuzzer-honggfuzz/Cargo.toml b/fuzz/fuzzer-honggfuzz/Cargo.toml index 500f7466af3..efd43dedc2b 100644 --- a/fuzz/fuzzer-honggfuzz/Cargo.toml +++ b/fuzz/fuzzer-honggfuzz/Cargo.toml @@ -2,6 +2,7 @@ name = "fuzzer-honggfuzz" version = "0.0.1" publish = false +license = "Apache-2.0" [dependencies] fuzz-targets = { path = "../targets" } diff --git a/fuzz/fuzzer-libfuzzer/Cargo.toml b/fuzz/fuzzer-libfuzzer/Cargo.toml index db508147afa..90a9969330a 100644 --- a/fuzz/fuzzer-libfuzzer/Cargo.toml +++ b/fuzz/fuzzer-libfuzzer/Cargo.toml @@ -2,6 +2,7 @@ name = "fuzzer-libfuzzer" version = "0.0.1" publish = false +license = "Apache-2.0" [dependencies] fuzz-targets = { path = "../targets" } diff --git a/fuzz/targets/Cargo.toml b/fuzz/targets/Cargo.toml index 92d8eb48b08..e5fb9366a10 100644 --- a/fuzz/targets/Cargo.toml +++ b/fuzz/targets/Cargo.toml @@ -3,6 +3,7 @@ name = "fuzz-targets" version = "0.0.1" publish = false edition = "2021" +license = "Apache-2.0" [lib] path = "mod.rs" diff --git a/tests/Cargo.toml b/tests/Cargo.toml index c1f09fb3d45..c16094b3327 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -3,6 +3,7 @@ name = "tests" version = "0.0.1" edition = "2021" publish = false +license = "Apache-2.0" [[test]] name = "failpoints" From a94f47ad01cb1bf13622a3265e7bc459ff663369 Mon Sep 17 00:00:00 2001 From: glorv Date: Wed, 17 Jan 2024 16:36:17 +0800 Subject: [PATCH 1115/1149] *: update rust-toolchain (#16397) ref tikv/tikv#15581 update rust-toolchain to nightly-2023-12-28 to fix code coverage build Signed-off-by: glorv --- cmd/tikv-ctl/src/fork_readonly_tikv.rs | 2 +- cmd/tikv-ctl/src/main.rs | 5 +---- components/backup-stream/src/endpoint.rs | 1 + components/backup/src/endpoint.rs | 4 ++-- components/cloud/aws/src/s3.rs | 10 +++++----- components/cloud/azure/src/azblob.rs | 6 +++--- components/file_system/src/io_stats/mod.rs | 2 +- components/file_system/src/io_stats/proc.rs | 2 +- components/raftstore-v2/src/operation/bucket.rs | 7 +------ .../raftstore-v2/src/operation/command/mod.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 1 - components/raftstore/src/store/fsm/peer.rs | 2 -- components/raftstore/src/store/peer.rs | 2 -- components/raftstore/src/store/worker/read.rs | 1 - .../raftstore/src/store/worker/split_check.rs | 16 ++++++++-------- components/resource_metering/src/model.rs | 2 +- components/test_coprocessor/src/store.rs | 3 +-- components/test_util/src/runner.rs | 2 +- .../tidb_query_datatype/src/codec/table.rs | 2 +- .../src/index_scan_executor.rs | 8 ++++---- .../src/table_scan_executor.rs | 2 +- .../src/util/aggr_executor.rs | 4 ++-- components/tikv_kv/src/lib.rs | 2 +- components/tikv_kv/src/stats.rs | 2 +- components/tikv_util/src/logger/mod.rs | 10 +++++----- components/tikv_util/src/thread_group.rs | 2 +- components/tikv_util/src/time.rs | 2 +- components/tracker/src/slab.rs | 2 +- components/tracker/src/tls.rs | 2 +- rust-toolchain.toml | 2 +- src/server/load_statistics/mod.rs | 2 +- src/storage/txn/commands/prewrite.rs | 2 +- tests/integrations/coprocessor/test_select.rs | 2 +- tests/integrations/server/debugger.rs | 2 +- .../storage/test_region_info_accessor.rs | 6 +++--- 35 files changed, 55 insertions(+), 69 deletions(-) diff --git a/cmd/tikv-ctl/src/fork_readonly_tikv.rs b/cmd/tikv-ctl/src/fork_readonly_tikv.rs index dd2ec039542..934ef173a67 100644 --- a/cmd/tikv-ctl/src/fork_readonly_tikv.rs +++ b/cmd/tikv-ctl/src/fork_readonly_tikv.rs @@ -229,7 +229,7 @@ fn rocksdb_files_should_copy(iter: &mut dyn Iterator) -> Vec>(path: P) -> Result { diff --git a/cmd/tikv-ctl/src/main.rs b/cmd/tikv-ctl/src/main.rs index 74a0a01ca65..6fb558e7601 100644 --- a/cmd/tikv-ctl/src/main.rs +++ b/cmd/tikv-ctl/src/main.rs @@ -127,10 +127,7 @@ fn main() { let key_manager = data_key_manager_from_config(&cfg.security.encryption, &cfg.storage.data_dir) .expect("data_key_manager_from_config should success"); - let file_system = Arc::new(ManagedFileSystem::new( - key_manager.map(|m| Arc::new(m)), - None, - )); + let file_system = Arc::new(ManagedFileSystem::new(key_manager.map(Arc::new), None)); raft_engine_ctl::run_command(args, file_system); } Cmd::BadSsts { manifest, pd } => { diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index 3bc1d7669d9..e0cc3a91dfb 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -1051,6 +1051,7 @@ where }) ); } + #[allow(clippy::blocks_in_conditions)] RegionCheckpointOperation::Resolve { min_ts, start_time } => { let sched = self.scheduler.clone(); try_send!( diff --git a/components/backup/src/endpoint.rs b/components/backup/src/endpoint.rs index 5a0a8bca141..2ae7633eb1d 100644 --- a/components/backup/src/endpoint.rs +++ b/components/backup/src/endpoint.rs @@ -1583,7 +1583,7 @@ pub mod tests { }; // Test whether responses contain correct range. - #[allow(clippy::blocks_in_if_conditions)] + #[allow(clippy::blocks_in_conditions)] let test_handle_backup_task_range = |start_key: &[u8], end_key: &[u8], expect: Vec<(&[u8], &[u8])>| { let tmp = TempDir::new().unwrap(); @@ -1830,7 +1830,7 @@ pub mod tests { }; // Test whether responses contain correct range. - #[allow(clippy::blocks_in_if_conditions)] + #[allow(clippy::blocks_in_conditions)] let test_handle_backup_task_ranges = |sub_ranges: Vec<(&[u8], &[u8])>, expect: Vec<(&[u8], &[u8])>| { let tmp = TempDir::new().unwrap(); diff --git a/components/cloud/aws/src/s3.rs b/components/cloud/aws/src/s3.rs index 5962912a0bd..71c890a61c3 100644 --- a/components/cloud/aws/src/s3.rs +++ b/components/cloud/aws/src/s3.rs @@ -202,7 +202,7 @@ impl S3Storage { sts, String::clone(config.role_arn.as_deref().unwrap()), format!("{}", timestamp_secs), - config.external_id.as_deref().map(String::clone), + config.external_id.as_deref().cloned(), // default duration is 15min None, None, @@ -224,7 +224,7 @@ impl S3Storage { let cred_provider = StaticProvider::new( (*access_key_pair.access_key).to_owned(), (*access_key_pair.secret_access_key).to_owned(), - access_key_pair.session_token.as_deref().map(String::clone), + access_key_pair.session_token.as_deref().cloned(), None, ); Self::maybe_assume_role(config, cred_provider, dispatcher) @@ -487,7 +487,7 @@ impl<'client> S3Uploader<'client> { part_number: i64, data: &[u8], ) -> Result> { - match timeout(Self::get_timeout(), async { + let res = timeout(Self::get_timeout(), async { let start = Instant::now(); let r = self .client @@ -507,8 +507,8 @@ impl<'client> S3Uploader<'client> { .observe(start.saturating_elapsed().as_secs_f64()); r }) - .await - { + .await; + match res { Ok(part) => Ok(CompletedPart { e_tag: part?.e_tag, part_number: Some(part_number), diff --git a/components/cloud/azure/src/azblob.rs b/components/cloud/azure/src/azblob.rs index 078dc55be8f..662c5643584 100644 --- a/components/cloud/azure/src/azblob.rs +++ b/components/cloud/azure/src/azblob.rs @@ -317,7 +317,7 @@ impl AzureUploader { /// This should be used only when the data is known to be short, and thus /// relatively cheap to retry the entire upload. async fn upload(&self, data: &[u8]) -> Result<(), RequestError> { - match timeout(Self::get_timeout(), async { + let res = timeout(Self::get_timeout(), async { let builder = self .client_builder .get_client() @@ -331,8 +331,8 @@ impl AzureUploader { builder.await?; Ok(()) }) - .await - { + .await; + match res { Ok(res) => match res { Ok(_) => Ok(()), Err(err) => Err(RequestError::InvalidInput( diff --git a/components/file_system/src/io_stats/mod.rs b/components/file_system/src/io_stats/mod.rs index f65d7c57884..b303d725f06 100644 --- a/components/file_system/src/io_stats/mod.rs +++ b/components/file_system/src/io_stats/mod.rs @@ -13,7 +13,7 @@ mod stub { } thread_local! { - static IO_TYPE: Cell = Cell::new(IoType::Other); + static IO_TYPE: Cell = const {Cell::new(IoType::Other)}; } pub fn set_io_type(new_io_type: IoType) { diff --git a/components/file_system/src/io_stats/proc.rs b/components/file_system/src/io_stats/proc.rs index 8da6876d70f..087672d4fc2 100644 --- a/components/file_system/src/io_stats/proc.rs +++ b/components/file_system/src/io_stats/proc.rs @@ -26,7 +26,7 @@ lazy_static! { thread_local! { /// A private copy of I/O type. Optimized for local access. - static IO_TYPE: Cell = Cell::new(IoType::Other); + static IO_TYPE: Cell = const { Cell::new(IoType::Other) }; } #[derive(Debug)] diff --git a/components/raftstore-v2/src/operation/bucket.rs b/components/raftstore-v2/src/operation/bucket.rs index 920a4e68e8c..db57b815576 100644 --- a/components/raftstore-v2/src/operation/bucket.rs +++ b/components/raftstore-v2/src/operation/bucket.rs @@ -52,12 +52,7 @@ impl Peer { ®ion, bucket_ranges, ); - let region_buckets = self - .region_buckets_info() - .bucket_stat() - .as_ref() - .unwrap() - .clone(); + let region_buckets = self.region_buckets_info().bucket_stat().unwrap().clone(); let buckets_count = region_buckets.meta.keys.len() - 1; if change_bucket_version { // TODO: we may need to make it debug once the coprocessor timeout is resolved. diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 01ba82dd17a..46577e11b43 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -137,7 +137,7 @@ impl Peer { }; let logger = self.logger.clone(); let read_scheduler = self.storage().read_scheduler(); - let buckets = self.region_buckets_info().bucket_stat().clone(); + let buckets = self.region_buckets_info().bucket_stat().cloned(); let sst_apply_state = self.sst_apply_state().clone(); let (apply_scheduler, mut apply_fsm) = ApplyFsm::new( &store_ctx.cfg, diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index 9b095b872e7..c2f09ef19dd 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -791,7 +791,6 @@ impl Peer { self.read_progress().clone(), self.region_buckets_info() .bucket_stat() - .as_ref() .map(|b| b.meta.clone()), ) } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 07ac28297b1..ad00a0aa887 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -6091,7 +6091,6 @@ where .peer .region_buckets_info() .bucket_stat() - .as_ref() .unwrap() .meta .clone(), @@ -6120,7 +6119,6 @@ where .peer .region_buckets_info() .bucket_stat() - .as_ref() .unwrap() .clone(); let buckets_count = region_buckets.meta.keys.len() - 1; diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index fa5c8346c0c..1625383b929 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -2975,7 +2975,6 @@ where cbs, self.region_buckets_info() .bucket_stat() - .as_ref() .map(|b| b.meta.clone()), ); apply.on_schedule(&ctx.raft_metrics); @@ -4874,7 +4873,6 @@ where snap.bucket_meta = self .region_buckets_info() .bucket_stat() - .as_ref() .map(|s| s.meta.clone()); } resp.txn_extra_op = self.txn_extra_op.load(); diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 778f4ce45f0..666b0d34796 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -455,7 +455,6 @@ impl ReadDelegate { bucket_meta: peer .region_buckets_info() .bucket_stat() - .as_ref() .map(|b| b.meta.clone()), track_ver: TrackVer::new(), } diff --git a/components/raftstore/src/store/worker/split_check.rs b/components/raftstore/src/store/worker/split_check.rs index a35e6a32c76..ce2b2a6d10f 100644 --- a/components/raftstore/src/store/worker/split_check.rs +++ b/components/raftstore/src/store/worker/split_check.rs @@ -226,8 +226,8 @@ impl BucketStatsInfo { } #[inline] - pub fn bucket_stat(&self) -> &Option { - &self.bucket_stat + pub fn bucket_stat(&self) -> Option<&BucketStat> { + self.bucket_stat.as_ref() } #[inline] @@ -1018,7 +1018,7 @@ mod tests { #[test] pub fn test_report_buckets() { let mut bucket_stats_info = mock_bucket_stats_info(); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let bucket_stats = bucket_stats_info.bucket_stat().unwrap(); let mut delta_bucket_stats = bucket_stats.clone(); delta_bucket_stats.write_key(&[1], 1); delta_bucket_stats.write_key(&[201], 1); @@ -1040,7 +1040,7 @@ mod tests { region.set_id(1); let cfg = Config::default(); let bucket_size = cfg.region_bucket_size.0; - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let bucket_stats = bucket_stats_info.bucket_stat().unwrap(); let region_epoch = bucket_stats.meta.region_epoch.clone(); // step1: update buckets flow @@ -1048,7 +1048,7 @@ mod tests { delta_bucket_stats.write_key(&[1], 1); delta_bucket_stats.write_key(&[201], 1); bucket_stats_info.add_bucket_flow(&Some(delta_bucket_stats)); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let bucket_stats = bucket_stats_info.bucket_stat().unwrap(); assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); // step2: tick not affect anything @@ -1062,7 +1062,7 @@ mod tests { ®ion, bucket_ranges, ); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let bucket_stats = bucket_stats_info.bucket_stat().unwrap(); assert!(!change_bucket_version); assert_eq!(vec![2, 0, 2], bucket_stats.stats.write_bytes); @@ -1081,7 +1081,7 @@ mod tests { bucket_ranges.clone(), ); assert!(change_bucket_version); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let bucket_stats = bucket_stats_info.bucket_stat().unwrap(); assert_eq!( vec![vec![], vec![50], vec![100], vec![200], vec![]], bucket_stats.meta.keys @@ -1108,7 +1108,7 @@ mod tests { ); assert!(change_bucket_version); - let bucket_stats = bucket_stats_info.bucket_stat().as_ref().unwrap(); + let bucket_stats = bucket_stats_info.bucket_stat().unwrap(); assert_eq!( vec![vec![], vec![100], vec![200], vec![]], bucket_stats.meta.keys diff --git a/components/resource_metering/src/model.rs b/components/resource_metering/src/model.rs index 03cd500eb2e..07396bbec50 100644 --- a/components/resource_metering/src/model.rs +++ b/components/resource_metering/src/model.rs @@ -16,7 +16,7 @@ use tikv_util::warn; use crate::TagInfos; thread_local! { - static STATIC_BUF: Cell> = Cell::new(vec![]); + static STATIC_BUF: Cell> = const {Cell::new(vec![])}; } /// Raw resource statistics record. diff --git a/components/test_coprocessor/src/store.rs b/components/test_coprocessor/src/store.rs index d5baede9b84..e5589969911 100644 --- a/components/test_coprocessor/src/store.rs +++ b/components/test_coprocessor/src/store.rs @@ -271,8 +271,7 @@ impl Store { ) .unwrap() .into_iter() - .filter(Result::is_ok) - .map(Result::unwrap) + .flatten() .collect() } diff --git a/components/test_util/src/runner.rs b/components/test_util/src/runner.rs index ee2b6548c23..11dc3a2986d 100644 --- a/components/test_util/src/runner.rs +++ b/components/test_util/src/runner.rs @@ -79,7 +79,7 @@ pub fn run_test_with_hook(cases: &[&TestDescAndFn], hook: impl TestHook + Send + test_main(&args, cases, None) } -thread_local!(static FS: RefCell>> = RefCell::new(None)); +thread_local!(static FS: RefCell>> = const { RefCell::new(None) }); #[derive(Clone)] struct FailpointHook; diff --git a/components/tidb_query_datatype/src/codec/table.rs b/components/tidb_query_datatype/src/codec/table.rs index a8585d83b3e..c61bbbbd034 100644 --- a/components/tidb_query_datatype/src/codec/table.rs +++ b/components/tidb_query_datatype/src/codec/table.rs @@ -524,7 +524,7 @@ pub fn generate_index_data_for_test( col_val: &Datum, unique: bool, ) -> (HashMap>, Vec) { - let indice = vec![(2, col_val.clone()), (3, Datum::Dec(handle.into()))]; + let indice = [(2, col_val.clone()), (3, Datum::Dec(handle.into()))]; let mut expect_row = HashMap::default(); let mut v: Vec<_> = indice .iter() diff --git a/components/tidb_query_executors/src/index_scan_executor.rs b/components/tidb_query_executors/src/index_scan_executor.rs index 5ebf8a031d3..02bce4bc98e 100644 --- a/components/tidb_query_executors/src/index_scan_executor.rs +++ b/components/tidb_query_executors/src/index_scan_executor.rs @@ -1403,7 +1403,7 @@ mod tests { ]; let columns = vec![Column::new(1, 2), Column::new(2, 3), Column::new(3, 4.0)]; - let datums = vec![Datum::U64(2), Datum::U64(3), Datum::F64(4.0)]; + let datums = [Datum::U64(2), Datum::U64(3), Datum::F64(4.0)]; let mut value_prefix = vec![]; let mut restore_data = vec![]; @@ -1557,7 +1557,7 @@ mod tests { FieldTypeTp::Double.into(), ]; - let datums = vec![Datum::U64(2), Datum::U64(3), Datum::F64(4.0)]; + let datums = [Datum::U64(2), Datum::U64(3), Datum::F64(4.0)]; let common_handle = datum::encode_key( &mut EvalContext::default(), @@ -1655,7 +1655,7 @@ mod tests { ]; let columns = vec![Column::new(1, 2), Column::new(2, 3.0), Column::new(3, 4)]; - let datums = vec![Datum::U64(2), Datum::F64(3.0), Datum::U64(4)]; + let datums = [Datum::U64(2), Datum::F64(3.0), Datum::U64(4)]; let index_data = datum::encode_key(&mut EvalContext::default(), &datums[0..2]).unwrap(); let key = table::encode_index_seek_key(TABLE_ID, INDEX_ID, &index_data); @@ -1948,7 +1948,7 @@ mod tests { ]; let columns = vec![Column::new(1, 2), Column::new(2, 3), Column::new(3, 4.0)]; - let datums = vec![Datum::U64(2), Datum::U64(3), Datum::F64(4.0)]; + let datums = [Datum::U64(2), Datum::U64(3), Datum::F64(4.0)]; let mut value_prefix = vec![]; let mut restore_data = vec![]; diff --git a/components/tidb_query_executors/src/table_scan_executor.rs b/components/tidb_query_executors/src/table_scan_executor.rs index fa05071e8bd..13e0b8df672 100644 --- a/components/tidb_query_executors/src/table_scan_executor.rs +++ b/components/tidb_query_executors/src/table_scan_executor.rs @@ -480,7 +480,7 @@ mod tests { fn new() -> TableScanTestHelper { const TABLE_ID: i64 = 7; // [(row_id, columns)] where each column: (column id, datum) - let data = vec![ + let data = [ ( 1, vec![ diff --git a/components/tidb_query_executors/src/util/aggr_executor.rs b/components/tidb_query_executors/src/util/aggr_executor.rs index a5d760dc80d..59272e2d402 100644 --- a/components/tidb_query_executors/src/util/aggr_executor.rs +++ b/components/tidb_query_executors/src/util/aggr_executor.rs @@ -643,7 +643,7 @@ pub mod tests { let test_paging_size = [2, 5, 7]; let expect_call_num = [1, 3, 4]; - let expect_row_num = vec![vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; + let expect_row_num = [vec![4], vec![0, 0, 5], vec![0, 0, 0, 6]]; let executor_builders: Vec) -> _>> = vec![Box::new(exec_fast), Box::new(exec_slow)]; for test_case in 0..test_paging_size.len() { @@ -665,7 +665,7 @@ pub mod tests { } } - let expect_row_num2 = vec![vec![4], vec![3, 0, 2], vec![3, 0, 1, 2]]; + let expect_row_num2 = [vec![4], vec![3, 0, 2], vec![3, 0, 1, 2]]; let exec_stream = |src_exec, paging_size| { let mut config = EvalConfig::default(); config.paging_size = paging_size; diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index 236d38ac0b5..ce9095c8950 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -629,7 +629,7 @@ impl ErrorCodeExt for Error { thread_local! { // A pointer to thread local engine. Use raw pointer and `UnsafeCell` to reduce runtime check. - static TLS_ENGINE_ANY: UnsafeCell<*mut ()> = UnsafeCell::new(ptr::null_mut()); + static TLS_ENGINE_ANY: UnsafeCell<*mut ()> = const { UnsafeCell::new(ptr::null_mut())}; } /// Execute the closure on the thread local engine. diff --git a/components/tikv_kv/src/stats.rs b/components/tikv_kv/src/stats.rs index d38c97397ee..9d1337e8283 100644 --- a/components/tikv_kv/src/stats.rs +++ b/components/tikv_kv/src/stats.rs @@ -24,7 +24,7 @@ const STAT_SEEK_FOR_PREV_TOMBSTONE: &str = "seek_for_prev_tombstone"; const STAT_RAW_VALUE_TOMBSTONE: &str = "raw_value_tombstone"; thread_local! { - pub static RAW_VALUE_TOMBSTONE : RefCell = RefCell::new(0); + pub static RAW_VALUE_TOMBSTONE : RefCell = const{ RefCell::new(0)}; } pub enum StatsKind { diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index c321f56a1b5..96d9d5b18e0 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -708,7 +708,7 @@ mod tests { // lifetime we need to make a Thread Local, // and implement a custom writer. thread_local! { - static BUFFER: RefCell> = RefCell::new(Vec::new()); + static BUFFER: RefCell> = const { RefCell::new(Vec::new()) }; } struct TestWriter; impl Write for TestWriter { @@ -1000,10 +1000,10 @@ mod tests { } thread_local! { - static NORMAL_BUFFER: RefCell> = RefCell::new(Vec::new()); - static ROCKSDB_BUFFER: RefCell> = RefCell::new(Vec::new()); - static SLOW_BUFFER: RefCell> = RefCell::new(Vec::new()); - static RAFTDB_BUFFER: RefCell> = RefCell::new(Vec::new()); + static NORMAL_BUFFER: RefCell> = const {RefCell::new(Vec::new())}; + static ROCKSDB_BUFFER: RefCell> = const {RefCell::new(Vec::new())}; + static SLOW_BUFFER: RefCell> = const {RefCell::new(Vec::new())}; + static RAFTDB_BUFFER: RefCell> = const {RefCell::new(Vec::new())}; } struct NormalWriter; diff --git a/components/tikv_util/src/thread_group.rs b/components/tikv_util/src/thread_group.rs index 65f715d7af9..300c39f751c 100644 --- a/components/tikv_util/src/thread_group.rs +++ b/components/tikv_util/src/thread_group.rs @@ -28,7 +28,7 @@ impl GroupProperties { } thread_local! { - static PROPERTIES: RefCell> = RefCell::new(None); + static PROPERTIES: RefCell> = const { RefCell::new(None) }; } pub fn current_properties() -> Option { diff --git a/components/tikv_util/src/time.rs b/components/tikv_util/src/time.rs index f329247c563..c710b174dee 100644 --- a/components/tikv_util/src/time.rs +++ b/components/tikv_util/src/time.rs @@ -510,7 +510,7 @@ pub struct ThreadReadId { pub create_time: Timespec, } -thread_local!(static READ_SEQUENCE: RefCell = RefCell::new(0)); +thread_local!(static READ_SEQUENCE: RefCell = const { RefCell::new(0) }); impl ThreadReadId { pub fn new() -> ThreadReadId { diff --git a/components/tracker/src/slab.rs b/components/tracker/src/slab.rs index c7b9efa9944..3ba4ca5953f 100644 --- a/components/tracker/src/slab.rs +++ b/components/tracker/src/slab.rs @@ -20,7 +20,7 @@ lazy_static! { fn next_shard_id() -> usize { thread_local! { - static CURRENT_SHARD_ID: Cell = Cell::new(0); + static CURRENT_SHARD_ID: Cell = const {Cell::new(0)}; } CURRENT_SHARD_ID.with(|c| { let shard_id = c.get(); diff --git a/components/tracker/src/tls.rs b/components/tracker/src/tls.rs index 982f483c8bc..af7aa22eb65 100644 --- a/components/tracker/src/tls.rs +++ b/components/tracker/src/tls.rs @@ -12,7 +12,7 @@ use pin_project::pin_project; use crate::{slab::TrackerToken, Tracker, GLOBAL_TRACKERS, INVALID_TRACKER_TOKEN}; thread_local! { - static TLS_TRACKER_TOKEN: Cell = Cell::new(INVALID_TRACKER_TOKEN); + static TLS_TRACKER_TOKEN: Cell = const { Cell::new(INVALID_TRACKER_TOKEN) }; } pub fn set_tls_tracker_token(token: TrackerToken) { diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 86192639647..653a1a5c13c 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2023-12-10" +channel = "nightly-2023-12-28" components = ["rustfmt", "clippy", "rust-src", "rust-analyzer"] diff --git a/src/server/load_statistics/mod.rs b/src/server/load_statistics/mod.rs index 5cb856e2948..f88dd465bef 100644 --- a/src/server/load_statistics/mod.rs +++ b/src/server/load_statistics/mod.rs @@ -13,7 +13,7 @@ use parking_lot::Mutex; use tikv_util::sys::thread::{self, Pid}; thread_local! { - static CURRENT_LOAD: RefCell>> = RefCell::new(None); + static CURRENT_LOAD: RefCell>> = const { RefCell::new(None)}; } /// A load metric for all threads. diff --git a/src/storage/txn/commands/prewrite.rs b/src/storage/txn/commands/prewrite.rs index 9142b09e481..42cc9ba1a01 100644 --- a/src/storage/txn/commands/prewrite.rs +++ b/src/storage/txn/commands/prewrite.rs @@ -1753,7 +1753,7 @@ mod tests { async_apply_prewrite: bool, } - let cases = vec![ + let cases = [ Case { // basic case expected: ResponsePolicy::OnApplied, diff --git a/tests/integrations/coprocessor/test_select.rs b/tests/integrations/coprocessor/test_select.rs index 4e5418cdc14..1677d007e67 100644 --- a/tests/integrations/coprocessor/test_select.rs +++ b/tests/integrations/coprocessor/test_select.rs @@ -172,7 +172,7 @@ fn test_stream_batch_row_limit() { let resps = handle_streaming_select(&endpoint, req, check_range); assert_eq!(resps.len(), 3); - let expected_output_counts = vec![vec![2_i64], vec![2_i64], vec![1_i64]]; + let expected_output_counts = [vec![2_i64], vec![2_i64], vec![1_i64]]; for (i, resp) in resps.into_iter().enumerate() { let mut chunk = Chunk::default(); chunk.merge_from_bytes(resp.get_data()).unwrap(); diff --git a/tests/integrations/server/debugger.rs b/tests/integrations/server/debugger.rs index 9eabb30b58c..61bc570aafc 100644 --- a/tests/integrations/server/debugger.rs +++ b/tests/integrations/server/debugger.rs @@ -41,7 +41,7 @@ fn gen_delete_k(k: &[u8], commit_ts: TimeStamp) -> Vec { fn test_compact() { let (split_key, _) = gen_mvcc_put_kv(b"k10", b"", 1.into(), 2.into()); let (split_key2, _) = gen_mvcc_put_kv(b"k20", b"", 1.into(), 2.into()); - let regions = vec![ + let regions = [ (1, b"".to_vec(), split_key.clone()), (1000, split_key.clone(), split_key2.clone()), (1002, split_key2.clone(), b"".to_vec()), diff --git a/tests/integrations/storage/test_region_info_accessor.rs b/tests/integrations/storage/test_region_info_accessor.rs index 344f9c6607e..a5fe1ea6b08 100644 --- a/tests/integrations/storage/test_region_info_accessor.rs +++ b/tests/integrations/storage/test_region_info_accessor.rs @@ -19,7 +19,7 @@ fn prepare_cluster>( cluster.must_put(&key, &value); } - let end_keys = vec![ + let end_keys = [ b"k1".to_vec(), b"k3".to_vec(), b"k5".to_vec(), @@ -28,7 +28,7 @@ fn prepare_cluster>( b"".to_vec(), ]; - let start_keys = vec![ + let start_keys = [ b"".to_vec(), b"k1".to_vec(), b"k3".to_vec(), @@ -39,7 +39,7 @@ fn prepare_cluster>( let mut regions = Vec::new(); - for mut key in end_keys.iter().take(end_keys.len() - 1).map(Vec::clone) { + for mut key in end_keys.iter().take(end_keys.len() - 1).cloned() { let region = cluster.get_region(&key); cluster.must_split(®ion, &key); From 246c73be2423b2d97e8bda075af976013117c899 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 17 Jan 2024 17:04:45 +0800 Subject: [PATCH 1116/1149] metrics: rearrange resolved ts panels (#16400) ref tikv/tikv#16265 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- metrics/grafana/tikv_details.dashboard.py | 128 +++---- metrics/grafana/tikv_details.json | 394 +++++++++++----------- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 262 insertions(+), 262 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index 8f26ad95cbc..c10177be29f 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -2537,7 +2537,7 @@ def LocalReader() -> RowPanel: layout.row( [ graph_panel( - title="Raft log async fetch task duration", + title="Local reader requests", targets=[ target( expr=expr_sum_rate( @@ -7211,13 +7211,12 @@ def ResolvedTS() -> RowPanel: ], ), graph_panel( - title="Max gap of follower safe-ts", - description="The gap between now() and the minimal (non-zero) safe ts for followers", - yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), + title="Min Resolved TS Region", + description="The region that has minimal resolved ts", targets=[ target( expr=expr_sum( - "tikv_resolved_ts_min_follower_safe_ts_gap_millis", + "tikv_resolved_ts_min_resolved_ts_region", ), ) ], @@ -7227,12 +7226,13 @@ def ResolvedTS() -> RowPanel: layout.row( [ graph_panel( - title="Min Resolved TS Region", - description="The region that has minimal resolved ts", + title="Max gap of follower safe-ts", + description="The gap between now() and the minimal (non-zero) safe ts for followers", + yaxes=yaxes(left_format=UNITS.MILLI_SECONDS), targets=[ target( expr=expr_sum( - "tikv_resolved_ts_min_resolved_ts_region", + "tikv_resolved_ts_min_follower_safe_ts_gap_millis", ), ) ], @@ -7252,12 +7252,6 @@ def ResolvedTS() -> RowPanel: ) layout.row( [ - heatmap_panel( - title="Check leader duration", - description="The time consumed when handle a check leader request", - yaxis=yaxis(format=UNITS.SECONDS), - metric="tikv_resolved_ts_check_leader_duration_seconds_bucket", - ), graph_panel( title="Max gap of resolved-ts in region leaders", description="The gap between resolved ts of leaders and current time", @@ -7270,10 +7264,27 @@ def ResolvedTS() -> RowPanel: ) ], ), + graph_panel( + title="Min Leader Resolved TS Region", + description="The region that its leader has minimal resolved ts.", + targets=[ + target( + expr=expr_sum( + "tikv_resolved_ts_min_leader_resolved_ts_region", + ), + ) + ], + ), ] ) layout.row( [ + heatmap_panel( + title="Check leader duration", + description="The time consumed when handle a check leader request", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_resolved_ts_check_leader_duration_seconds_bucket", + ), graph_panel( title="99% CheckLeader request region count", description="Bucketed histogram of region count in a check leader request", @@ -7288,53 +7299,31 @@ def ResolvedTS() -> RowPanel: ) ], ), - heatmap_panel( - title="Initial scan backoff duration", - description="The backoff duration before starting initial scan", - yaxis=yaxis(format=UNITS.SECONDS), - metric="tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket", - ), ] ) layout.row( [ graph_panel( - title="Lock heap size", - description="Total bytes in memory of resolved-ts observe regions's lock heap", + title="99% CheckLeader request size", + description="Bucketed histogram of the check leader request size", yaxes=yaxes(left_format=UNITS.BYTES_IEC), targets=[ target( - expr=expr_avg( - "tikv_resolved_ts_lock_heap_bytes", - ), - ) - ], - ), - graph_panel( - title="Min Leader Resolved TS Region", - description="The region that its leader has minimal resolved ts.", - targets=[ - target( - expr=expr_sum( - "tikv_resolved_ts_min_leader_resolved_ts_region", + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_size_bytes", + by_labels=["instance"], ), - ) - ], - ), - ] - ) - layout.row( - [ - graph_panel( - title="Observe region status", - description="The status of resolved-ts observe regions", - targets=[ + legend_format="{{instance}}", + ), target( - expr=expr_sum( - "tikv_resolved_ts_region_resolve_status", - by_labels=["type"], + expr=expr_histogram_quantile( + 0.99, + "tikv_check_leader_request_item_count", + by_labels=["instance"], ), - ) + legend_format="{{instance}}-check-num", + ), ], ), graph_panel( @@ -7361,26 +7350,37 @@ def ResolvedTS() -> RowPanel: layout.row( [ graph_panel( - title="99% CheckLeader request size", - description="Bucketed histogram of the check leader request size", + title="Lock heap size", + description="Total bytes in memory of resolved-ts observe regions's lock heap", yaxes=yaxes(left_format=UNITS.BYTES_IEC), targets=[ target( - expr=expr_histogram_quantile( - 0.99, - "tikv_check_leader_request_size_bytes", - by_labels=["instance"], + expr=expr_avg( + "tikv_resolved_ts_lock_heap_bytes", ), - legend_format="{{instance}}", - ), + ) + ], + ), + heatmap_panel( + title="Initial scan backoff duration", + description="The backoff duration before starting initial scan", + yaxis=yaxis(format=UNITS.SECONDS), + metric="tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket", + ), + ] + ) + layout.row( + [ + graph_panel( + title="Observe region status", + description="The status of resolved-ts observe regions", + targets=[ target( - expr=expr_histogram_quantile( - 0.99, - "tikv_check_leader_request_item_count", - by_labels=["instance"], + expr=expr_sum( + "tikv_resolved_ts_region_resolve_status", + by_labels=["type"], ), - legend_format="{{instance}}-check-num", - ), + ) ], ), graph_panel( diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 31fe27afa7e..0ebd7fdbff0 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -23173,7 +23173,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft log async fetch task duration", + "title": "Local reader requests", "tooltip": { "msResolution": true, "shared": true, @@ -54732,7 +54732,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between now() and the minimal (non-zero) safe ts for followers", + "description": "The region that has minimal resolved ts", "editable": true, "error": false, "fieldConfig": { @@ -54802,7 +54802,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -54810,7 +54810,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -54819,7 +54819,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Max gap of follower safe-ts", + "title": "Min Resolved TS Region", "tooltip": { "msResolution": true, "shared": true, @@ -54838,7 +54838,7 @@ "yaxes": [ { "decimals": null, - "format": "ms", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -54865,7 +54865,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The region that has minimal resolved ts", + "description": "The gap between now() and the minimal (non-zero) safe ts for followers", "editable": true, "error": false, "fieldConfig": { @@ -54935,7 +54935,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -54943,7 +54943,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -54952,7 +54952,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Min Resolved TS Region", + "title": "Max gap of follower safe-ts", "tooltip": { "msResolution": true, "shared": true, @@ -54971,7 +54971,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -55126,110 +55126,6 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handle a check leader request", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 21 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 397, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Check leader duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, @@ -55257,12 +55153,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 397, "interval": null, "isNew": true, "legend": { @@ -55368,7 +55264,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of region count in a check leader request", + "description": "The region that its leader has minimal resolved ts.", "editable": true, "error": false, "fieldConfig": { @@ -55390,12 +55286,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 28 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 399, + "id": 398, "interval": null, "isNew": true, "legend": { @@ -55438,7 +55334,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -55446,7 +55342,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -55455,7 +55351,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% CheckLeader request region count", + "title": "Min Leader Resolved TS Region", "tooltip": { "msResolution": true, "shared": true, @@ -55513,7 +55409,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The backoff duration before starting initial scan", + "description": "The time consumed when handle a check leader request", "editable": true, "error": false, "fieldConfig": { @@ -55527,7 +55423,7 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 28 }, "heatmap": {}, @@ -55535,7 +55431,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 400, + "id": 399, "interval": null, "legend": { "show": false @@ -55552,7 +55448,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -55560,7 +55456,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -55568,7 +55464,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Initial scan backoff duration", + "title": "Check leader duration", "tooltip": { "msResolution": true, "shared": true, @@ -55605,7 +55501,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "description": "Bucketed histogram of region count in a check leader request", "editable": true, "error": false, "fieldConfig": { @@ -55627,12 +55523,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 35 + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 401, + "id": 400, "interval": null, "isNew": true, "legend": { @@ -55675,7 +55571,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -55683,7 +55579,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -55692,7 +55588,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Lock heap size", + "title": "99% CheckLeader request region count", "tooltip": { "msResolution": true, "shared": true, @@ -55711,7 +55607,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -55738,7 +55634,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The region that its leader has minimal resolved ts.", + "description": "Bucketed histogram of the check leader request size", "editable": true, "error": false, "fieldConfig": { @@ -55760,12 +55656,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 402, + "id": 401, "interval": null, "isNew": true, "legend": { @@ -55808,7 +55704,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -55816,7 +55712,22 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-check-num", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -55825,7 +55736,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Min Leader Resolved TS Region", + "title": "99% CheckLeader request size", "tooltip": { "msResolution": true, "shared": true, @@ -55844,7 +55755,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -55871,7 +55782,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The status of resolved-ts observe regions", + "description": "The count of fail to advance resolved-ts", "editable": true, "error": false, "fieldConfig": { @@ -55893,12 +55804,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 42 + "x": 12, + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 403, + "id": 402, "interval": null, "isNew": true, "legend": { @@ -55941,15 +55852,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}-{{reason}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-stale-peer", + "metric": "", + "query": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -55958,7 +55884,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Observe region status", + "title": "Fail advance ts count", "tooltip": { "msResolution": true, "shared": true, @@ -56004,7 +55930,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of fail to advance resolved-ts", + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", "editable": true, "error": false, "fieldConfig": { @@ -56026,12 +55952,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 404, + "id": 403, "interval": null, "isNew": true, "legend": { @@ -56074,30 +56000,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{reason}}", - "metric": "", - "query": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-stale-peer", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -56106,7 +56017,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Fail advance ts count", + "title": "Lock heap size", "tooltip": { "msResolution": true, "shared": true, @@ -56125,7 +56036,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -56147,12 +56058,116 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The backoff duration before starting initial scan", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 404, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Initial scan backoff duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of the check leader request size", + "description": "The status of resolved-ts observe regions", "editable": true, "error": false, "fieldConfig": { @@ -56222,30 +56237,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-check-num", + "legendFormat": "{{type}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -56254,7 +56254,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% CheckLeader request size", + "title": "Observe region status", "tooltip": { "msResolution": true, "shared": true, @@ -56273,7 +56273,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index c384d535673..2e24b367bd3 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -774093bd523da2b611990ff638c64fcd3cec35b3c5d391643129cb7ee6b72b41 ./metrics/grafana/tikv_details.json +362db1df47c4787354f52f32b4664f96e020b89f8622710adc3d5b47c8352dbb ./metrics/grafana/tikv_details.json From 7b50d052bc7cce218ca03a0f1f45c63634195fdd Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 17 Jan 2024 17:54:17 +0800 Subject: [PATCH 1117/1149] logger: fix thread_id in log (#16399) close tikv/tikv#16398 The current "thread_id" in the log is always 0x5. This is because: 1)TiKV logs asynchronously by sending all log records to a dedicated thread called "slogger", which is the fifth thread spawned by TiKV; and 2) "thread_id" is evaluated lazily by the "slogger" thread. To fix this issue, this commit obtains the "thread_id" before sending it to the "slogger" thread. Signed-off-by: Neil Shen --- components/tikv_util/src/logger/mod.rs | 265 +++++++++++-------------- 1 file changed, 117 insertions(+), 148 deletions(-) diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 96d9d5b18e0..037465cc18b 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -6,7 +6,6 @@ mod formatter; use std::{ env, fmt, io::{self, BufWriter}, - num::NonZeroU64, path::{Path, PathBuf}, sync::{ atomic::{AtomicUsize, Ordering}, @@ -16,10 +15,7 @@ use std::{ }; use log::{self, SetLoggerError}; -use slog::{ - self, slog_o, Drain, FnValue, Key, OwnedKV, OwnedKVList, PushFnValue, Record, - SendSyncRefUnwindSafeKV, KV, -}; +use slog::{self, slog_o, Drain, FnValue, Key, OwnedKVList, PushFnValue, Record, KV}; pub use slog::{FilterFn, Level}; use slog_async::{Async, AsyncGuard, OverflowStrategy}; use slog_term::{Decorator, PlainDecorator, RecordDecorator}; @@ -76,6 +72,30 @@ where } }; + fn build_log_drain( + drain: I, + threshold: u64, + filter: impl FilterFn, + ) -> impl Drain + where + I: Drain, + { + let drain = SlowLogFilter { + threshold, + inner: drain, + }; + // ThreadIDrain discards all previous `slog::OwnedKVList`, anything that + // wraps it should not pass `slog::OwnedKVList`. + // + // NB: slog macros (slog::info!() and others) only produce one + // `slog::Record`, `slog::OwnedKVList` are provided by `slog::Drain` and + // `slog::Logger`. + let drain = ThreadIDrain(drain); + // Let GlobalLevelFilter wrap ThreadIDrain, so that it saves getting + // thread id for flittered logs. + GlobalLevelFilter::new(drain.filter(filter).fuse()) + } + let (logger, guard) = if use_async { let (async_log, guard) = Async::new(LogAndFuse(drain)) .chan_size(SLOG_CHANNEL_SIZE) @@ -83,21 +103,12 @@ where .thread_name(thd_name!("slogger")) .build_with_guard(); let drain = async_log.fuse(); - let drain = SlowLogFilter { - threshold: slow_threshold, - inner: drain, - }; - let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); - - (slog::Logger::root(filtered, get_values()), Some(guard)) + let drain = build_log_drain(drain, slow_threshold, filter); + (slog::Logger::root(drain, slog_o!()), Some(guard)) } else { let drain = LogAndFuse(Mutex::new(drain)); - let drain = SlowLogFilter { - threshold: slow_threshold, - inner: drain, - }; - let filtered = GlobalLevelFilter::new(drain.filter(filter).fuse()); - (slog::Logger::root(filtered, get_values()), None) + let drain = build_log_drain(drain, slow_threshold, filter); + (slog::Logger::root(drain, slog_o!()), None) }; set_global_logger(level, init_stdlog, logger, guard) @@ -632,16 +643,21 @@ fn write_log_fields( Ok(()) } -fn format_thread_id(thread_id: NonZeroU64) -> String { - format!("{:#0x}", thread_id) -} +struct ThreadIDrain(pub D); -fn get_values() -> OwnedKV { - slog_o!( - "thread_id" => FnValue(|_| { - format_thread_id(std::thread::current().id().as_u64()) - }) - ) +impl Drain for ThreadIDrain +where + D: Drain, +{ + type Ok = D::Ok; + type Err = D::Err; + fn log(&self, record: &Record<'_>, _: &OwnedKVList) -> Result { + let thread_id = std::thread::current().id().as_u64().get(); + self.0.log( + record, + &OwnedKVList::from(slog::o!("thread_id" => thread_id)), + ) + } } struct Serializer<'a> { @@ -695,7 +711,7 @@ impl<'a> slog::Serializer for Serializer<'a> { #[cfg(test)] mod tests { - use std::{cell::RefCell, io, io::Write, str::from_utf8, sync::RwLock, time::Duration}; + use std::{cell::RefCell, io, io::Write, str::from_utf8, sync::Arc, time::Duration}; use chrono::DateTime; use regex::Regex; @@ -704,19 +720,13 @@ mod tests { use super::*; - // Due to the requirements of `Logger::root*` on a writer with a 'static - // lifetime we need to make a Thread Local, - // and implement a custom writer. - thread_local! { - static BUFFER: RefCell> = const { RefCell::new(Vec::new()) }; - } - struct TestWriter; + struct TestWriter(Arc>>); impl Write for TestWriter { fn write(&mut self, buf: &[u8]) -> io::Result { - BUFFER.with(|buffer| buffer.borrow_mut().write(buf)) + self.0.lock().unwrap().write(buf) } fn flush(&mut self) -> io::Result<()> { - BUFFER.with(|buffer| buffer.borrow_mut().flush()) + self.0.lock().unwrap().flush() } } @@ -775,13 +785,15 @@ mod tests { #[test] fn test_log_format_text() { - let decorator = PlainSyncDecorator::new(TestWriter); + let buffer: Arc>> = Arc::default(); + let decorator = PlainSyncDecorator::new(TestWriter(buffer.clone())); let drain = TikvFormat::new(decorator, true).fuse(); - let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); + let drain = ThreadIDrain(drain); + let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); log_format_cases(logger); - let thread_id = format_thread_id(std::thread::current().id().as_u64()); + let thread_id = std::thread::current().id().as_u64(); let expect = format!( r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] [thread_id={0}] [2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] [thread_id={0}] @@ -797,99 +809,100 @@ mod tests { thread_id ); - BUFFER.with(|buffer| { - let mut buffer = buffer.borrow_mut(); - let output = from_utf8(&buffer).unwrap(); - assert_eq!(output.lines().count(), expect.lines().count()); - - let re = Regex::new(r"(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s?(?P\[.*\])?").unwrap(); - - for (output_line, expect_line) in output.lines().zip(expect.lines()) { - let expect_segments = re.captures(expect_line).unwrap(); - let output_segments = re.captures(output_line).unwrap(); + let buffer = buffer.lock().unwrap(); + let output = from_utf8(&buffer).unwrap(); + assert_eq!( + output.lines().count(), + expect.lines().count(), + "{}\n===\n{}", + output, + expect + ); - validate_log_datetime(peel(&output_segments["datetime"])); + let re = Regex::new(r"(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s(?P\[.*?\])\s?(?P\[.*\])?").unwrap(); - assert!(validate_log_source_file( - peel(&expect_segments["source_file"]), - peel(&output_segments["source_file"]) - )); - assert_eq!(expect_segments["level"], output_segments["level"]); - assert_eq!(expect_segments["msg"], output_segments["msg"]); - assert_eq!( - expect_segments.name("kvs").map(|s| s.as_str()), - output_segments.name("kvs").map(|s| s.as_str()) - ); - } - buffer.clear(); - }); + for (output_line, expect_line) in output.lines().zip(expect.lines()) { + let expect_segments = re.captures(expect_line).unwrap(); + let output_segments = re.captures(output_line).unwrap(); + + validate_log_datetime(peel(&output_segments["datetime"])); + + assert!(validate_log_source_file( + peel(&expect_segments["source_file"]), + peel(&output_segments["source_file"]) + )); + assert_eq!(expect_segments["level"], output_segments["level"]); + assert_eq!(expect_segments["msg"], output_segments["msg"]); + assert_eq!( + expect_segments.name("kvs").map(|s| s.as_str()), + output_segments.name("kvs").map(|s| s.as_str()) + ); + } } #[test] fn test_log_format_json() { use serde_json::{from_str, Value}; - let drain = Mutex::new(json_format(TestWriter, true)).map(slog::Fuse); - let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); + let buffer: Arc>> = Arc::default(); + let drain = Mutex::new(json_format(TestWriter(buffer.clone()), true)).map(slog::Fuse); + let drain = ThreadIDrain(drain); + let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); log_format_cases(logger); - let thread_id = format_thread_id(std::thread::current().id().as_u64()); + let thread_id = std::thread::current().id().as_u64(); let expect = format!( - r#"{{"time":"2020/05/16 15:49:52.449 +08:00","level":"INFO","caller":"mod.rs:469","message":"","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:469","message":"Welcome","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:470","message":"Welcome TiKV","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:471","message":"欢迎","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:472","message":"欢迎 TiKV","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:455","message":"failed to fetch URL","backoff":"3s","attempt":3,"url":"http://example.com","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:460","message":"failed to \"fetch\" [URL]: http://example.com","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"DEBUG","caller":"mod.rs:463","message":"Slow query","process keys":1500,"duration":"123ns","sql":"SELECT * FROM TABLE WHERE ID=\"abc\"","thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.450 +08:00","level":"WARN","caller":"mod.rs:473","message":"Type","Other":null,"Score":null,"Counter":null,"thread_id":"{0}"}} -{{"time":"2020/05/16 15:49:52.451 +08:00","level":"INFO","caller":"mod.rs:391","message":"more type tests","str_array":"[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]","u8":34,"is_None":null,"is_false":false,"is_true":true,"store ids":"[1, 2, 3]","url-peers":"[\"peer1\", \"peer 2\"]","urls":"[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]","field2":"in quote","field1":"no_quote","thread_id":"{0}"}} + r#"{{"time":"2020/05/16 15:49:52.449 +08:00","level":"INFO","caller":"mod.rs:469","message":"","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:469","message":"Welcome","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:470","message":"Welcome TiKV","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:471","message":"欢迎","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:472","message":"欢迎 TiKV","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:455","message":"failed to fetch URL","backoff":"3s","attempt":3,"url":"http://example.com","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"INFO","caller":"mod.rs:460","message":"failed to \"fetch\" [URL]: http://example.com","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"DEBUG","caller":"mod.rs:463","message":"Slow query","process keys":1500,"duration":"123ns","sql":"SELECT * FROM TABLE WHERE ID=\"abc\"","thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.450 +08:00","level":"WARN","caller":"mod.rs:473","message":"Type","Other":null,"Score":null,"Counter":null,"thread_id":{0}}} +{{"time":"2020/05/16 15:49:52.451 +08:00","level":"INFO","caller":"mod.rs:391","message":"more type tests","str_array":"[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]","u8":34,"is_None":null,"is_false":false,"is_true":true,"store ids":"[1, 2, 3]","url-peers":"[\"peer1\", \"peer 2\"]","urls":"[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]","field2":"in quote","field1":"no_quote","thread_id":{0}}} "#, thread_id ); - BUFFER.with(|buffer| { - let mut buffer = buffer.borrow_mut(); - let output = from_utf8(&buffer).unwrap(); - assert_eq!(output.lines().count(), expect.lines().count()); + let buffer = buffer.lock().unwrap(); + let output = from_utf8(&buffer).unwrap(); + assert_eq!(output.lines().count(), expect.lines().count()); - for (output_line, expect_line) in output.lines().zip(expect.lines()) { - let mut expect_json = from_str::(expect_line).unwrap(); - let mut output_json = from_str::(output_line).unwrap(); + for (output_line, expect_line) in output.lines().zip(expect.lines()) { + let mut expect_json = from_str::(expect_line).unwrap(); + let mut output_json = from_str::(output_line).unwrap(); - validate_log_datetime(output_json["time"].take().as_str().unwrap()); - // Remove time field to bypass timestamp mismatch. - let _ = expect_json["time"].take(); + validate_log_datetime(output_json["time"].take().as_str().unwrap()); + // Remove time field to bypass timestamp mismatch. + let _ = expect_json["time"].take(); - validate_log_source_file( - output_json["caller"].take().as_str().unwrap(), - expect_json["caller"].take().as_str().unwrap(), - ); + validate_log_source_file( + output_json["caller"].take().as_str().unwrap(), + expect_json["caller"].take().as_str().unwrap(), + ); - assert_eq!(expect_json, output_json); - } - buffer.clear(); - }); + assert_eq!(expect_json, output_json); + } } #[test] fn test_global_level_filter() { - let decorator = PlainSyncDecorator::new(TestWriter); + let buffer: Arc>> = Arc::default(); + let decorator = PlainSyncDecorator::new(TestWriter(buffer.clone())); let drain = TikvFormat::new(decorator, true).fuse(); let logger = slog::Logger::root_typed(GlobalLevelFilter::new(drain), slog_o!()).into_erased(); let expected = "[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:871] [Welcome]\n"; let check_log = |log: &str| { - BUFFER.with(|buffer| { - let mut buffer = buffer.borrow_mut(); - let output = from_utf8(&buffer).unwrap(); - // only check the log len here as some field like timestamp, location may - // change. - assert_eq!(output.len(), log.len()); - buffer.clear(); - }); + let mut buffer = buffer.lock().unwrap(); + let output = from_utf8(&buffer).unwrap(); + // only check the log len here as some field like timestamp, location may + // change. + assert_eq!(output.len(), log.len()); + buffer.clear(); }; set_log_level(Level::Info); @@ -1096,48 +1109,4 @@ mod tests { } }); } - - static THREAD_SAFE_BUFFER: RwLock> = RwLock::new(Vec::new()); - - struct ThreadSafeWriter; - impl Write for ThreadSafeWriter { - fn write(&mut self, data: &[u8]) -> io::Result { - let mut buffer = THREAD_SAFE_BUFFER.write().unwrap(); - buffer.write(data) - } - - fn flush(&mut self) -> io::Result<()> { - let mut buffer = THREAD_SAFE_BUFFER.write().unwrap(); - buffer.flush() - } - } - - #[test] - fn test_threadid() { - let drain = TikvFormat::new(PlainSyncDecorator::new(ThreadSafeWriter), true).fuse(); - let logger = slog::Logger::root_typed(drain, get_values()).into_erased(); - - slog_info!(logger, "Hello from the first thread"); - let this_threadid = thread::current().id().as_u64(); - let this_threadid = format_thread_id(this_threadid); - - let handle = thread::spawn(move || { - slog_info!(logger, "Hello from the second thread"); - }); - let other_threadid = handle.thread().id().as_u64(); - let other_threadid = format_thread_id(other_threadid); - handle.join().unwrap(); - - let expected = vec![this_threadid, other_threadid]; - - let re = Regex::new(r"\[thread_id=(.*?)\]").unwrap(); - let buffer = THREAD_SAFE_BUFFER.read().unwrap(); - let output = from_utf8(&buffer).unwrap(); - let actual: Vec<&str> = output - .lines() - .map(|line| re.captures(line).unwrap()) - .map(|captures| captures.get(1).unwrap().as_str()) - .collect(); - assert_eq!(expected, actual); - } } From a882d2f7c20577ed38ddc6d1290fff634bfe8071 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 18 Jan 2024 11:36:17 +0800 Subject: [PATCH 1118/1149] coprocessor: make the error format the same as tidb (#16404) close tikv/tikv#16407 Signed-off-by: xufei --- components/tidb_query_expr/src/impl_math.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/tidb_query_expr/src/impl_math.rs b/components/tidb_query_expr/src/impl_math.rs index beeeef288b4..dd416c0502a 100644 --- a/components/tidb_query_expr/src/impl_math.rs +++ b/components/tidb_query_expr/src/impl_math.rs @@ -329,7 +329,7 @@ fn cot(arg: &Real) -> Result> { fn pow(lhs: &Real, rhs: &Real) -> Result> { let pow = (lhs.into_inner()).pow(rhs.into_inner()); if pow.is_infinite() { - Err(Error::overflow("DOUBLE", format!("{}.pow({})", lhs, rhs)).into()) + Err(Error::overflow("DOUBLE", format!("pow({}, {})", lhs, rhs)).into()) } else { Ok(Real::new(pow).ok()) } From 8bfd4a91bd5a8f5e07ea158099382190fb24f7e7 Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Wed, 17 Jan 2024 20:00:17 -0800 Subject: [PATCH 1119/1149] Cargo.toml: fix h2 vulnerability (#16406) close tikv/tikv#16405 Fix h2 vulnerability caught by `make clippy`. Signed-off-by: Alex Feinberg Co-authored-by: tonyxuqqi --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7de7b5f441c..518b5f133ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,9 +2342,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -6957,7 +6957,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "static_assertions", ] From d0ffb526aaebaec46d866d584e1d30e8251afed6 Mon Sep 17 00:00:00 2001 From: Connor Date: Thu, 18 Jan 2024 13:25:46 +0800 Subject: [PATCH 1120/1149] storage: Use write cf stats to decide load action for default cf instead of near seek (#16131) ref tikv/tikv#16245 Use write cf stats to decide load action for default cf instead of near seek Signed-off-by: Connor1996 --- components/cdc/src/initializer.rs | 22 ++++++------ components/cdc/src/old_value.rs | 17 +++++---- components/tikv_kv/src/lib.rs | 4 +-- components/tikv_kv/src/stats.rs | 36 ++++++++++++++++++- src/storage/mod.rs | 2 +- src/storage/mvcc/reader/scanner/backward.rs | 1 + src/storage/mvcc/reader/scanner/forward.rs | 39 ++++++++++----------- src/storage/mvcc/reader/scanner/mod.rs | 20 ++++++++--- 8 files changed, 95 insertions(+), 46 deletions(-) diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index d34faad1335..551b01ad83e 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -51,7 +51,7 @@ use crate::{ delegate::{post_init_downstream, Delegate, DownstreamId, DownstreamState, ObservedRange}, endpoint::Deregister, metrics::*, - old_value::{near_seek_old_value, new_old_value_cursor, OldValueCursors}, + old_value::{near_seek_old_value, OldValueCursors}, service::ConnId, Error, Result, Task, }; @@ -247,9 +247,7 @@ impl Initializer { let mut scanner = if kv_api == ChangeDataRequestKvApi::TiDb { if self.ts_filter_is_helpful(&start_key, &end_key) { hint_min_ts = Some(self.checkpoint_ts); - let wc = new_old_value_cursor(&snap, CF_WRITE); - let dc = new_old_value_cursor(&snap, CF_DEFAULT); - old_value_cursors = Some(OldValueCursors::new(wc, dc)); + old_value_cursors = Some(OldValueCursors::new(&snap)); } let upper_boundary = if end_key.as_encoded().is_empty() { // Region upper boundary could be an empty slice. @@ -342,16 +340,20 @@ impl Initializer { fn do_scan( &self, scanner: &mut Scanner, - mut old_value_cursors: Option<&mut OldValueCursors>, + mut old_value_cursors: Option<&mut OldValueCursors>, entries: &mut Vec>, ) -> Result { let mut read_old_value = |v: &mut OldValue, stats: &mut Statistics| -> Result<()> { - let (wc, dc) = match old_value_cursors { - Some(ref mut x) => (&mut x.write, &mut x.default), - None => return Ok(()), + let Some(cursors) = old_value_cursors.as_mut() else { + return Ok(()); }; if let OldValue::SeekWrite(ref key) = v { - match near_seek_old_value(key, wc, Either::<&S, _>::Right(dc), stats)? { + match near_seek_old_value( + key, + &mut cursors.write, + Either::<&S, _>::Right(&mut cursors.default), + stats, + )? { Some(x) => *v = OldValue::value(x), None => *v = OldValue::None, } @@ -415,7 +417,7 @@ impl Initializer { async fn scan_batch( &self, scanner: &mut Scanner, - old_value_cursors: Option<&mut OldValueCursors>, + old_value_cursors: Option<&mut OldValueCursors>, resolver: Option<&mut Resolver>, scan_stat: &mut ScanStat, ) -> Result>> { diff --git a/components/cdc/src/old_value.rs b/components/cdc/src/old_value.rs index 02f1bd00507..269a70d477e 100644 --- a/components/cdc/src/old_value.rs +++ b/components/cdc/src/old_value.rs @@ -8,7 +8,7 @@ use tikv::storage::{ mvcc::near_load_data_by_write, Cursor, CursorBuilder, ScanMode, Snapshot as EngineSnapshot, Statistics, }; -use tikv_kv::Iterator; +use tikv_kv::Snapshot; use tikv_util::{ config::ReadableSize, lru::{LruCache, SizePolicy}, @@ -235,13 +235,15 @@ pub fn near_seek_old_value( } } -pub struct OldValueCursors { - pub write: Cursor, - pub default: Cursor, +pub struct OldValueCursors { + pub write: Cursor, + pub default: Cursor, } -impl OldValueCursors { - pub fn new(write: Cursor, default: Cursor) -> Self { +impl OldValueCursors { + pub fn new(snapshot: &S) -> Self { + let write = new_old_value_cursor(snapshot, CF_WRITE); + let default = new_old_value_cursor(snapshot, CF_DEFAULT); OldValueCursors { write, default } } } @@ -571,7 +573,8 @@ mod tests { assert_eq!(stats.write.next, 144); if use_default_cursor { assert_eq!(stats.data.seek, 2); - assert_eq!(stats.data.next, 144); + // some unnecessary near seek is avoided + assert!(stats.data.next < stats.write.next); assert_eq!(stats.data.get, 0); } else { assert_eq!(stats.data.seek, 0); diff --git a/components/tikv_kv/src/lib.rs b/components/tikv_kv/src/lib.rs index ce9095c8950..7a9bfeabd0f 100644 --- a/components/tikv_kv/src/lib.rs +++ b/components/tikv_kv/src/lib.rs @@ -63,8 +63,8 @@ pub use self::{ raft_extension::{FakeExtension, RaftExtension}, rocksdb_engine::{RocksEngine, RocksSnapshot}, stats::{ - CfStatistics, FlowStatistics, FlowStatsReporter, StageLatencyStats, Statistics, - StatisticsSummary, RAW_VALUE_TOMBSTONE, + CfStatistics, FlowStatistics, FlowStatsReporter, LoadDataHint, StageLatencyStats, + Statistics, StatisticsSummary, RAW_VALUE_TOMBSTONE, }, }; diff --git a/components/tikv_kv/src/stats.rs b/components/tikv_kv/src/stats.rs index 9d1337e8283..f4333c0b0c4 100644 --- a/components/tikv_kv/src/stats.rs +++ b/components/tikv_kv/src/stats.rs @@ -176,7 +176,7 @@ impl CfStatistics { } } -#[derive(Default, Clone, Debug)] +#[derive(Default, Debug)] pub struct Statistics { pub lock: CfStatistics, pub write: CfStatistics, @@ -190,9 +190,43 @@ pub struct Statistics { // Note that a value comes from either write cf (due to it's a short value) or default cf, we // can't embed this `processed_size` field into `CfStatistics`. pub processed_size: usize, + + // When getting data from default cf, we can check write cf statistics to decide which method + // should be used to get the data. + load_data_hint: LoadDataHintStatistics, +} + +#[derive(Default, Debug)] +struct LoadDataHintStatistics { + // The value of `over_seek_bound` when the last time calling `load_data_hint()`. + last_write_over_seek_bound: usize, +} + +#[derive(Default, PartialEq, Debug, Clone)] +pub enum LoadDataHint { + #[default] + NearSeek, + Seek, } impl Statistics { + // Use write cf stats to decide load action for default cf + pub fn load_data_hint(&mut self) -> LoadDataHint { + let stats = &mut self.load_data_hint; + + let hint = if self.write.over_seek_bound != stats.last_write_over_seek_bound { + // Over seek bound indicates the next valid key may be far away from current + // position, so use seek directly + LoadDataHint::Seek + } else { + // The next valid key may be around current position, so use near seek which + // calls next() multiple times before calling seek() + LoadDataHint::NearSeek + }; + stats.last_write_over_seek_bound = self.write.over_seek_bound; + hint + } + pub fn details(&self) -> [(&'static str, [(&'static str, usize); STATS_COUNT]); 3] { [ (CF_DEFAULT, self.data.details()), diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 284fad1e491..6d62e50aa55 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3933,7 +3933,7 @@ pub mod test_util { } /// All statistics related to KvGet/KvBatchGet. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default)] pub struct KvGetStatistics { pub stats: Statistics, pub latency_stats: StageLatencyStats, diff --git a/src/storage/mvcc/reader/scanner/backward.rs b/src/storage/mvcc/reader/scanner/backward.rs index b786807b3f3..818410358ce 100644 --- a/src/storage/mvcc/reader/scanner/backward.rs +++ b/src/storage/mvcc/reader/scanner/backward.rs @@ -467,6 +467,7 @@ impl BackwardKvScanner { } } + self.statistics.write.over_seek_bound += 1; // We have not found another user key for now, so we directly `seek_for_prev()`. // After that, we must pointing to another key, or out of bound. self.write_cursor diff --git a/src/storage/mvcc/reader/scanner/forward.rs b/src/storage/mvcc/reader/scanner/forward.rs index 4abb91314cc..3d2c2f831bf 100644 --- a/src/storage/mvcc/reader/scanner/forward.rs +++ b/src/storage/mvcc/reader/scanner/forward.rs @@ -91,6 +91,7 @@ impl Cursors { } } } + statistics.write.over_seek_bound += 1; // We have not found another user key for now, so we directly `seek()`. // After that, we must pointing to another key, or out of bound. @@ -314,7 +315,6 @@ impl> ForwardScanner { // and if we have not reached where we want, we use `seek()`. // Whether we have *not* reached where we want by `next()`. - let mut needs_seek = true; for i in 0..SEEK_BOUND { if i > 0 { @@ -333,8 +333,7 @@ impl> ForwardScanner { let key_commit_ts = Key::decode_ts_from(current_key)?; if key_commit_ts <= self.cfg.ts { // Founded, don't need to seek again. - needs_seek = false; - break; + return Ok(true); } else if self.met_newer_ts_data == NewerTsCheckState::NotMetYet { self.met_newer_ts_data = NewerTsCheckState::Met; } @@ -356,24 +355,22 @@ impl> ForwardScanner { } } } - // If we have not found `${user_key}_${ts}` in a few `next()`, directly - // `seek()`. - if needs_seek { - // `user_key` must have reserved space here, so its clone has reserved space - // too. So no reallocation happens in `append_ts`. - self.cursors.write.seek( - &user_key.clone().append_ts(self.cfg.ts), - &mut self.statistics.write, - )?; - if !self.cursors.write.valid()? { - // Key space ended. - return Ok(false); - } - let current_key = self.cursors.write.key(&mut self.statistics.write); - if !Key::is_user_key_eq(current_key, user_key.as_encoded().as_slice()) { - // Meet another key. - return Ok(false); - } + self.statistics.write.over_seek_bound += 1; + + // `user_key` must have reserved space here, so its clone has reserved space + // too. So no reallocation happens in `append_ts`. + self.cursors.write.seek( + &user_key.clone().append_ts(self.cfg.ts), + &mut self.statistics.write, + )?; + if !self.cursors.write.valid()? { + // Key space ended. + return Ok(false); + } + let current_key = self.cursors.write.key(&mut self.statistics.write); + if !Key::is_user_key_eq(current_key, user_key.as_encoded().as_slice()) { + // Meet another key. + return Ok(false); } Ok(true) } diff --git a/src/storage/mvcc/reader/scanner/mod.rs b/src/storage/mvcc/reader/scanner/mod.rs index d0cfde82704..7f4fc664bb8 100644 --- a/src/storage/mvcc/reader/scanner/mod.rs +++ b/src/storage/mvcc/reader/scanner/mod.rs @@ -20,7 +20,9 @@ use self::{ }, }; use crate::storage::{ - kv::{CfStatistics, Cursor, CursorBuilder, Iterator, ScanMode, Snapshot, Statistics}, + kv::{ + CfStatistics, Cursor, CursorBuilder, Iterator, LoadDataHint, ScanMode, Snapshot, Statistics, + }, mvcc::{default_not_found_error, NewerTsCheckState, Result}, need_check_locks, txn::{Result as TxnResult, Scanner as StoreScanner}, @@ -342,7 +344,8 @@ impl ScannerConfig { /// Reads user key's value in default CF according to the given write CF value /// (`write`). /// -/// Internally, there will be a `near_seek` operation. +/// Internally, there will be a `near_seek` or `seek` operation depending on +/// write CF stats. /// /// Notice that the value may be already carried in the `write` (short value). /// In this case, you should not call this function. @@ -363,7 +366,11 @@ where I: Iterator, { let seek_key = user_key.clone().append_ts(write_start_ts); - default_cursor.near_seek(&seek_key, &mut statistics.data)?; + match statistics.load_data_hint() { + LoadDataHint::NearSeek => default_cursor.near_seek(&seek_key, &mut statistics.data)?, + LoadDataHint::Seek => default_cursor.seek(&seek_key, &mut statistics.data)?, + }; + if !default_cursor.valid()? || default_cursor.key(&mut statistics.data) != seek_key.as_encoded().as_slice() { @@ -388,7 +395,12 @@ where I: Iterator, { let seek_key = user_key.clone().append_ts(write_start_ts); - default_cursor.near_seek_for_prev(&seek_key, &mut statistics.data)?; + match statistics.load_data_hint() { + LoadDataHint::NearSeek => { + default_cursor.near_seek_for_prev(&seek_key, &mut statistics.data)? + } + LoadDataHint::Seek => default_cursor.seek_for_prev(&seek_key, &mut statistics.data)?, + }; if !default_cursor.valid()? || default_cursor.key(&mut statistics.data) != seek_key.as_encoded().as_slice() { From 2293b822824768c158541fc45af0776fca30f60e Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Wed, 17 Jan 2024 21:40:46 -0800 Subject: [PATCH 1121/1149] In-Memory Engine: WriteBatch implementation. (#16381) ref tikv/tikv#16323 Basic WriteBatch implementation for In-Memory Engine. Signed-off-by: Alex Feinberg Co-authored-by: tonyxuqqi Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/hybrid_engine/src/write_batch.rs | 54 ++-- .../region_cache_memory_engine/src/engine.rs | 14 +- .../src/write_batch.rs | 241 ++++++++++++++++-- 3 files changed, 255 insertions(+), 54 deletions(-) diff --git a/components/hybrid_engine/src/write_batch.rs b/components/hybrid_engine/src/write_batch.rs index ec124a2e831..fe228ac9372 100644 --- a/components/hybrid_engine/src/write_batch.rs +++ b/components/hybrid_engine/src/write_batch.rs @@ -50,65 +50,75 @@ impl WriteBatch for HybridEngineWriteBatch { } fn data_size(&self) -> usize { - unimplemented!() + self.disk_write_batch.data_size() } fn count(&self) -> usize { - unimplemented!() + self.disk_write_batch.count() } fn is_empty(&self) -> bool { - unimplemented!() + self.disk_write_batch.is_empty() } fn should_write_to_engine(&self) -> bool { - unimplemented!() + self.disk_write_batch.should_write_to_engine() } fn clear(&mut self) { - unimplemented!() + self.disk_write_batch.clear(); + self.cache_write_batch.clear() } fn set_save_point(&mut self) { - unimplemented!() + self.disk_write_batch.set_save_point(); + self.cache_write_batch.set_save_point() } fn pop_save_point(&mut self) -> Result<()> { - unimplemented!() + self.disk_write_batch.pop_save_point()?; + self.cache_write_batch.pop_save_point() } fn rollback_to_save_point(&mut self) -> Result<()> { - unimplemented!() + self.disk_write_batch.rollback_to_save_point()?; + self.cache_write_batch.rollback_to_save_point() } - fn merge(&mut self, _other: Self) -> Result<()> { - unimplemented!() + fn merge(&mut self, other: Self) -> Result<()> { + self.disk_write_batch.merge(other.disk_write_batch)?; + self.cache_write_batch.merge(other.cache_write_batch) } } impl Mutable for HybridEngineWriteBatch { - fn put(&mut self, _key: &[u8], _value: &[u8]) -> Result<()> { - unimplemented!() + fn put(&mut self, key: &[u8], value: &[u8]) -> Result<()> { + self.disk_write_batch.put(key, value)?; + self.cache_write_batch.put(key, value) } - fn put_cf(&mut self, _cf: &str, _key: &[u8], _value: &[u8]) -> Result<()> { - unimplemented!() + fn put_cf(&mut self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { + self.disk_write_batch.put_cf(cf, key, value)?; + self.cache_write_batch.put_cf(cf, key, value) } - fn delete(&mut self, _key: &[u8]) -> Result<()> { - unimplemented!() + fn delete(&mut self, key: &[u8]) -> Result<()> { + self.disk_write_batch.delete(key)?; + self.cache_write_batch.delete(key) } - fn delete_cf(&mut self, _cf: &str, _key: &[u8]) -> Result<()> { - unimplemented!() + fn delete_cf(&mut self, cf: &str, key: &[u8]) -> Result<()> { + self.disk_write_batch.delete_cf(cf, key)?; + self.cache_write_batch.delete_cf(cf, key) } - fn delete_range(&mut self, _begin_key: &[u8], _end_key: &[u8]) -> Result<()> { - unimplemented!() + fn delete_range(&mut self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + self.disk_write_batch.delete_range(begin_key, end_key) } - fn delete_range_cf(&mut self, _cf: &str, _begin_key: &[u8], _end_key: &[u8]) -> Result<()> { - unimplemented!() + fn delete_range_cf(&mut self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { + self.disk_write_batch + .delete_range_cf(cf, begin_key, end_key) } } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index 6f9e6f6b75e..17da5bdaea8 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -23,7 +23,7 @@ use crate::keys::{ VALUE_TYPE_FOR_SEEK, VALUE_TYPE_FOR_SEEK_FOR_PREV, }; -fn cf_to_id(cf: &str) -> usize { +pub(crate) fn cf_to_id(cf: &str) -> usize { match cf { CF_DEFAULT => 0, CF_LOCK => 1, @@ -38,7 +38,7 @@ fn cf_to_id(cf: &str) -> usize { /// with a formal implementation. #[derive(Clone)] pub struct RegionMemoryEngine { - data: [Arc>; 3], + pub(crate) data: [Arc>; 3], } impl RegionMemoryEngine { @@ -106,10 +106,10 @@ pub struct RegionMemoryMeta { snapshot_list: SnapshotList, // It indicates whether the region is readable. False means integrity of the data in this // cached region is not satisfied due to being evicted for instance. - can_read: bool, + pub(crate) can_read: bool, // Request with read_ts below it is not eligible for granting snapshot. // Note: different region can have different safe_ts. - safe_ts: u64, + pub(crate) safe_ts: u64, } impl RegionMemoryMeta { @@ -124,8 +124,8 @@ impl RegionMemoryMeta { #[derive(Default)] pub struct RegionCacheMemoryEngineCore { - engine: HashMap, - region_metas: HashMap, + pub(crate) engine: HashMap, + pub(crate) region_metas: HashMap, } impl RegionCacheMemoryEngineCore { @@ -153,7 +153,7 @@ impl RegionCacheMemoryEngineCore { /// cached region), we resort to using a the disk engine's snapshot instead. #[derive(Clone, Default)] pub struct RegionCacheMemoryEngine { - core: Arc>, + pub(crate) core: Arc>, } impl RegionCacheMemoryEngine { diff --git a/components/region_cache_memory_engine/src/write_batch.rs b/components/region_cache_memory_engine/src/write_batch.rs index 674b3434525..55bbb808980 100644 --- a/components/region_cache_memory_engine/src/write_batch.rs +++ b/components/region_cache_memory_engine/src/write_batch.rs @@ -1,21 +1,53 @@ use bytes::Bytes; -use engine_traits::{Mutable, Result, WriteBatch, WriteBatchExt, WriteOptions}; +use engine_traits::{Mutable, Result, WriteBatch, WriteBatchExt, WriteOptions, CF_DEFAULT}; use tikv_util::box_err; -use crate::RegionCacheMemoryEngine; +use crate::{ + engine::{cf_to_id, RegionMemoryEngine}, + keys::{encode_key, ValueType}, + RegionCacheMemoryEngine, +}; + +/// Callback to apply an encoded entry to cache engine. +/// +/// Arguments: &str - cf name, Bytes - (encoded) key, Bytes - value. +/// +/// TODO: consider refactoring into a trait once RegionCacheMemoryEngine API +/// stabilizes. +type ApplyEncodedEntryCb = Box Result<()> + Send + Sync>; /// RegionCacheWriteBatch maintains its own in-memory buffer. -#[derive(Default, Clone, Debug)] pub struct RegionCacheWriteBatch { buffer: Vec, + apply_cb: ApplyEncodedEntryCb, sequence_number: Option, + save_points: Vec, +} + +impl std::fmt::Debug for RegionCacheWriteBatch { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RegionCacheWriteBatch") + .field("buffer", &self.buffer) + .finish() + } } impl RegionCacheWriteBatch { - pub fn with_capacity(cap: usize) -> Self { + pub fn new(apply_cb: ApplyEncodedEntryCb) -> Self { + Self { + buffer: Vec::new(), + apply_cb, + sequence_number: None, + save_points: Vec::new(), + } + } + + pub fn with_capacity(apply_cb: ApplyEncodedEntryCb, cap: usize) -> Self { Self { buffer: Vec::with_capacity(cap), + apply_cb, sequence_number: None, + save_points: Vec::new(), } } @@ -28,13 +60,88 @@ impl RegionCacheWriteBatch { self.sequence_number = Some(seq); Ok(()) } + + fn write_impl(&mut self, seq: u64) -> Result<()> { + self.buffer + .iter() + .map(|e| (e.cf.as_str(), e.encode(seq))) + .try_for_each(|(cf, (key, value))| (self.apply_cb)(cf, key, value)) + } +} + +#[derive(Clone, Debug)] +enum CacheWriteBatchEntryMutation { + PutValue(Bytes), + Deletion, } +impl CacheWriteBatchEntryMutation { + fn encode(&self, key: &[u8], seq: u64) -> (Bytes, Bytes) { + match self { + CacheWriteBatchEntryMutation::PutValue(value) => { + (encode_key(key, seq, ValueType::Value), value.clone()) + } + CacheWriteBatchEntryMutation::Deletion => { + (encode_key(key, seq, ValueType::Deletion), Bytes::new()) + } + } + } + fn data_size(&self) -> usize { + match self { + CacheWriteBatchEntryMutation::PutValue(value) => value.len(), + CacheWriteBatchEntryMutation::Deletion => 0, + } + } +} #[derive(Clone, Debug)] struct RegionCacheWriteBatchEntry { cf: String, key: Bytes, - mutation: (), // TODO, + mutation: CacheWriteBatchEntryMutation, +} + +impl RegionCacheWriteBatchEntry { + pub fn put_value(cf: &str, key: &[u8], value: &[u8]) -> Self { + Self { + cf: cf.to_owned(), + key: Bytes::copy_from_slice(key), + mutation: CacheWriteBatchEntryMutation::PutValue(Bytes::copy_from_slice(value)), + } + } + + pub fn deletion(cf: &str, key: &[u8]) -> Self { + Self { + cf: cf.to_owned(), + key: Bytes::copy_from_slice(key), + mutation: CacheWriteBatchEntryMutation::Deletion, + } + } + + #[inline] + pub fn encode(&self, seq: u64) -> (Bytes, Bytes) { + self.mutation.encode(&self.key, seq) + } + + pub fn data_size(&self) -> usize { + self.key.len() + std::mem::size_of::() + self.mutation.data_size() + } +} +impl RegionCacheMemoryEngine { + fn apply_cb(&self) -> ApplyEncodedEntryCb { + // TODO: use the stabilized API for appending to the skip list here. + Box::new(|_cf, _key, _value| Ok(())) + } +} + +impl From<&RegionMemoryEngine> for RegionCacheWriteBatch { + fn from(engine: &RegionMemoryEngine) -> Self { + let engine_clone = engine.clone(); + let apply_cb = Box::new(move |cf: &'_ str, key, value| { + engine_clone.data[cf_to_id(cf)].put(key, value); + Ok(()) + }); + RegionCacheWriteBatch::new(apply_cb) + } } impl WriteBatchExt for RegionCacheMemoryEngine { @@ -43,29 +150,35 @@ impl WriteBatchExt for RegionCacheMemoryEngine { const WRITE_BATCH_MAX_KEYS: usize = 256; fn write_batch(&self) -> Self::WriteBatch { - RegionCacheWriteBatch::default() + RegionCacheWriteBatch::new(self.apply_cb()) } fn write_batch_with_cap(&self, cap: usize) -> Self::WriteBatch { - RegionCacheWriteBatch::with_capacity(cap) + RegionCacheWriteBatch::with_capacity(self.apply_cb(), cap) } } impl WriteBatch for RegionCacheWriteBatch { fn write_opt(&mut self, _: &WriteOptions) -> Result { - unimplemented!() + self.sequence_number + .map(|seq| self.write_impl(seq).map(|()| seq)) + .transpose() + .map(|o| o.ok_or_else(|| box_err!("sequence_number must be set!")))? } fn data_size(&self) -> usize { - unimplemented!() + self.buffer + .iter() + .map(RegionCacheWriteBatchEntry::data_size) + .sum() } fn count(&self) -> usize { - unimplemented!() + self.buffer.len() } fn is_empty(&self) -> bool { - unimplemented!() + self.buffer.is_empty() } fn should_write_to_engine(&self) -> bool { @@ -73,41 +186,56 @@ impl WriteBatch for RegionCacheWriteBatch { } fn clear(&mut self) { - unimplemented!() + self.buffer.clear(); + self.save_points.clear(); + _ = self.sequence_number.take(); } fn set_save_point(&mut self) { - unimplemented!() + self.save_points.push(self.buffer.len()) } fn pop_save_point(&mut self) -> Result<()> { - unimplemented!() + self.save_points + .pop() + .map(|_| ()) + .ok_or_else(|| box_err!("no save points available")) } fn rollback_to_save_point(&mut self) -> Result<()> { - unimplemented!() + self.save_points + .pop() + .map(|sp| { + self.buffer.truncate(sp); + }) + .ok_or_else(|| box_err!("no save point available!")) } - fn merge(&mut self, _: Self) -> Result<()> { - unimplemented!() + fn merge(&mut self, mut other: Self) -> Result<()> { + self.buffer.append(&mut other.buffer); + Ok(()) } } impl Mutable for RegionCacheWriteBatch { - fn put(&mut self, _: &[u8], _: &[u8]) -> Result<()> { - unimplemented!() + fn put(&mut self, key: &[u8], val: &[u8]) -> Result<()> { + self.put_cf(CF_DEFAULT, key, val) } - fn put_cf(&mut self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { - unimplemented!() + fn put_cf(&mut self, cf: &str, key: &[u8], val: &[u8]) -> Result<()> { + self.buffer + .push(RegionCacheWriteBatchEntry::put_value(cf, key, val)); + Ok(()) } - fn delete(&mut self, _: &[u8]) -> Result<()> { - unimplemented!() + fn delete(&mut self, key: &[u8]) -> Result<()> { + self.delete_cf(CF_DEFAULT, key) } - fn delete_cf(&mut self, _: &str, _: &[u8]) -> Result<()> { - unimplemented!() + fn delete_cf(&mut self, cf: &str, key: &[u8]) -> Result<()> { + self.buffer + .push(RegionCacheWriteBatchEntry::deletion(cf, key)); + Ok(()) } fn delete_range(&mut self, _: &[u8], _: &[u8]) -> Result<()> { @@ -118,3 +246,66 @@ impl Mutable for RegionCacheWriteBatch { unimplemented!() } } + +#[cfg(test)] +mod tests { + use engine_traits::{Peekable, RegionCacheEngine, WriteBatch}; + + use super::*; + + #[test] + fn test_write_to_skiplist() { + let engine = RegionMemoryEngine::default(); + let mut wb = RegionCacheWriteBatch::from(&engine); + wb.put(b"aaa", b"bbb").unwrap(); + wb.set_sequence_number(1).unwrap(); + assert_eq!(wb.write().unwrap(), 1); + let sl = engine.data[cf_to_id(CF_DEFAULT)].clone(); + let actual = sl.get(&encode_key(b"aaa", 1, ValueType::Value)).unwrap(); + assert_eq!(&b"bbb"[..], actual) + } + + #[test] + fn test_savepoints() { + let engine = RegionMemoryEngine::default(); + let mut wb = RegionCacheWriteBatch::from(&engine); + wb.put(b"aaa", b"bbb").unwrap(); + wb.set_save_point(); + wb.put(b"aaa", b"ccc").unwrap(); + wb.put(b"ccc", b"ddd").unwrap(); + wb.rollback_to_save_point().unwrap(); + wb.set_sequence_number(1).unwrap(); + assert_eq!(wb.write().unwrap(), 1); + let sl = engine.data[cf_to_id(CF_DEFAULT)].clone(); + let actual = sl.get(&encode_key(b"aaa", 1, ValueType::Value)).unwrap(); + assert_eq!(&b"bbb"[..], actual); + assert!(sl.get(&encode_key(b"ccc", 1, ValueType::Value)).is_none()) + } + + #[test] + fn test_put_write_clear_delete_put_write() { + let engine = RegionCacheMemoryEngine::default(); + engine.new_region(1); + let engine_for_writes = { + let mut core = engine.core.lock().unwrap(); + core.region_metas.get_mut(&1).unwrap().can_read = true; + core.region_metas.get_mut(&1).unwrap().safe_ts = 10; + core.engine.get_mut(&1).unwrap().clone() + }; + let mut wb = RegionCacheWriteBatch::from(&engine_for_writes); + wb.put(b"aaa", b"bbb").unwrap(); + wb.set_sequence_number(1).unwrap(); + _ = wb.write().unwrap(); + wb.clear(); + wb.put(b"bbb", b"ccc").unwrap(); + wb.delete(b"aaa").unwrap(); + wb.set_sequence_number(2).unwrap(); + _ = wb.write().unwrap(); + let snapshot = engine.snapshot(1, u64::MAX, 2).unwrap(); + assert_eq!( + snapshot.get_value(&b"bbb"[..]).unwrap().unwrap(), + &b"ccc"[..] + ); + assert!(snapshot.get_value(&b"aaa"[..]).unwrap().is_none()) + } +} From e2a2e87a890ad488446b3c4a2900a74d9313a8ba Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 18 Jan 2024 17:16:48 +0800 Subject: [PATCH 1122/1149] scripts: update RustSec advisory db before checking cargo deny (#16409) ref tikv/tikv#16328 Signed-off-by: Neil Shen --- scripts/deny | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/deny b/scripts/deny index cf677b9f1fb..6862fe8bebe 100755 --- a/scripts/deny +++ b/scripts/deny @@ -3,4 +3,5 @@ set -euo pipefail cargo install cargo-deny 2> /dev/null || echo "Install cargo-deny failed" +cargo deny fetch all cargo deny check --show-stats From fc93c89d761000580dab1623ee7f6366dd63b948 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 18 Jan 2024 18:42:18 +0800 Subject: [PATCH 1123/1149] In-memory Engine: refactor from region based to range based (#16383) ref tikv/tikv#16141 refactor from region based to range based Signed-off-by: SpadeA-Tang --- Cargo.lock | 4 +- components/engine_traits/src/engine.rs | 9 +- components/engine_traits/src/errors.rs | 2 +- components/engine_traits/src/lib.rs | 2 +- components/engine_traits/src/memory_engine.rs | 98 ++- components/hybrid_engine/src/cf_names.rs | 4 +- components/hybrid_engine/src/cf_options.rs | 4 +- components/hybrid_engine/src/checkpoint.rs | 4 +- components/hybrid_engine/src/compact.rs | 4 +- components/hybrid_engine/src/db_options.rs | 4 +- components/hybrid_engine/src/engine.rs | 37 +- .../hybrid_engine/src/engine_iterator.rs | 8 +- .../hybrid_engine/src/flow_control_factors.rs | 4 +- .../hybrid_engine/src/hybrid_metrics.rs | 4 +- components/hybrid_engine/src/import.rs | 4 +- components/hybrid_engine/src/iterable.rs | 4 +- components/hybrid_engine/src/misc.rs | 4 +- .../hybrid_engine/src/mvcc_properties.rs | 4 +- components/hybrid_engine/src/perf_context.rs | 4 +- .../hybrid_engine/src/range_properties.rs | 4 +- components/hybrid_engine/src/snapshot.rs | 18 +- components/hybrid_engine/src/sst.rs | 6 +- .../hybrid_engine/src/table_properties.rs | 4 +- .../hybrid_engine/src/ttl_properties.rs | 4 +- components/hybrid_engine/src/write_batch.rs | 23 +- components/raftstore/src/store/peer.rs | 4 +- components/raftstore/src/store/worker/read.rs | 33 +- .../region_cache_memory_engine/src/engine.rs | 796 ++++++++++++------ .../region_cache_memory_engine/src/keys.rs | 21 + .../region_cache_memory_engine/src/lib.rs | 5 +- .../src/range_manager.rs | 266 ++++++ .../src/write_batch.rs | 75 +- components/server/src/common.rs | 4 +- components/server/src/server.rs | 6 +- components/test_raftstore/src/node.rs | 2 +- components/test_raftstore/src/server.rs | 2 +- components/test_raftstore/src/util.rs | 4 +- src/server/raftkv/mod.rs | 2 +- 38 files changed, 1083 insertions(+), 404 deletions(-) create mode 100644 components/region_cache_memory_engine/src/range_manager.rs diff --git a/Cargo.lock b/Cargo.lock index 518b5f133ae..067c01db532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5302,11 +5302,13 @@ checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" [[package]] name = "skiplist-rs" version = "0.1.0" -source = "git+https://github.com/tikv/skiplist-rs.git?branch=main#618af619d9348ef89eaa71c5f6fbddbd9a5c09bf" +source = "git+https://github.com/tikv/skiplist-rs.git?branch=main#79280c29c3d309189fc39b2d8df48c67ccc998bf" dependencies = [ "bytes", "rand 0.8.5", "slog", + "tikv-jemalloc-ctl", + "tikv-jemallocator", ] [[package]] diff --git a/components/engine_traits/src/engine.rs b/components/engine_traits/src/engine.rs index 83f05180820..b3b24033a3e 100644 --- a/components/engine_traits/src/engine.rs +++ b/components/engine_traits/src/engine.rs @@ -84,6 +84,13 @@ pub trait KvEngine: #[derive(Debug, Clone)] pub struct SnapshotContext { - pub region_id: u64, + pub range: Option, pub read_ts: u64, } + +impl SnapshotContext { + pub fn set_range(&mut self, range: CacheRange) { + assert!(self.range.is_none()); + self.range = Some(range); + } +} diff --git a/components/engine_traits/src/errors.rs b/components/engine_traits/src/errors.rs index 6df2ef5a992..574a950dd59 100644 --- a/components/engine_traits/src/errors.rs +++ b/components/engine_traits/src/errors.rs @@ -149,7 +149,7 @@ pub enum Error { EntriesUnavailable, #[error("The entries of region is compacted")] EntriesCompacted, - #[error("Iterator of RegionCacheSnapshot is only supported with boundary set")] + #[error("Iterator of RangeCacheSnapshot is only supported with boundary set")] BoundaryNotSet, } diff --git a/components/engine_traits/src/lib.rs b/components/engine_traits/src/lib.rs index 79c509c5a94..8296449d0aa 100644 --- a/components/engine_traits/src/lib.rs +++ b/components/engine_traits/src/lib.rs @@ -312,7 +312,7 @@ pub use crate::table_properties::*; mod checkpoint; pub use crate::checkpoint::*; mod memory_engine; -pub use memory_engine::RegionCacheEngine; +pub use memory_engine::{CacheRange, RangeCacheEngine}; // These modules contain more general traits, some of which may be implemented // by multiple types. diff --git a/components/engine_traits/src/memory_engine.rs b/components/engine_traits/src/memory_engine.rs index 9babc8580fc..a430a1b89bd 100644 --- a/components/engine_traits/src/memory_engine.rs +++ b/components/engine_traits/src/memory_engine.rs @@ -1,19 +1,105 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::fmt::Debug; +use std::{cmp, fmt::Debug}; + +use keys::{enc_end_key, enc_start_key}; +use kvproto::metapb; use crate::{Iterable, Snapshot, WriteBatchExt}; -/// RegionCacheEngine works as a region cache caching some regions (in Memory or +/// RangeCacheEngine works as a range cache caching some ranges (in Memory or /// NVME for instance) to improve the read performance. -pub trait RegionCacheEngine: +pub trait RangeCacheEngine: WriteBatchExt + Iterable + Debug + Clone + Unpin + Send + Sync + 'static { type Snapshot: Snapshot; - // If None is returned, the RegionCacheEngine is currently not readable for this + // If None is returned, the RangeCacheEngine is currently not readable for this // region or read_ts. - // Sequence number is shared between RegionCacheEngine and disk KvEnigne to + // Sequence number is shared between RangeCacheEngine and disk KvEnigne to // provide atomic write - fn snapshot(&self, region_id: u64, read_ts: u64, seq_num: u64) -> Option; + fn snapshot(&self, range: CacheRange, read_ts: u64, seq_num: u64) -> Option; +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CacheRange { + pub start: Vec, + pub end: Vec, +} + +impl CacheRange { + pub fn new(start: Vec, end: Vec) -> Self { + Self { start, end } + } + + pub fn from_region(region: &metapb::Region) -> Self { + Self { + start: enc_start_key(region), + end: enc_end_key(region), + } + } +} + +impl PartialOrd for CacheRange { + fn partial_cmp(&self, other: &Self) -> Option { + if self.end <= other.start { + return Some(cmp::Ordering::Less); + } + + if other.end <= self.start { + return Some(cmp::Ordering::Greater); + } + + if self == other { + return Some(cmp::Ordering::Equal); + } + + None + } +} + +impl Ord for CacheRange { + fn cmp(&self, other: &Self) -> cmp::Ordering { + let c = self.start.cmp(&other.start); + if !c.is_eq() { + return c; + } + self.end.cmp(&other.end) + } +} + +impl CacheRange { + // todo: need to consider ""? + pub fn contains_range(&self, other: &CacheRange) -> bool { + self.start <= other.start && self.end >= other.end + } + + pub fn contains_key(&self, key: &[u8]) -> bool { + self.start.as_slice() <= key && key < self.end.as_slice() + } + + pub fn overlaps(&self, other: &CacheRange) -> bool { + self.start < other.end && other.start < self.end + } + + pub fn split_off(&self, key: &CacheRange) -> (Option, Option) { + let left = if self.start != key.start { + Some(CacheRange { + start: self.start.clone(), + end: key.start.clone(), + }) + } else { + None + }; + let right = if self.end != key.end { + Some(CacheRange { + start: key.end.clone(), + end: self.end.clone(), + }) + } else { + None + }; + + (left, right) + } } diff --git a/components/hybrid_engine/src/cf_names.rs b/components/hybrid_engine/src/cf_names.rs index 990fb4d0f76..3393f720973 100644 --- a/components/hybrid_engine/src/cf_names.rs +++ b/components/hybrid_engine/src/cf_names.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CfNamesExt, KvEngine, RegionCacheEngine}; +use engine_traits::{CfNamesExt, KvEngine, RangeCacheEngine}; use crate::engine::HybridEngine; impl CfNamesExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn cf_names(&self) -> Vec<&str> { self.disk_engine().cf_names() diff --git a/components/hybrid_engine/src/cf_options.rs b/components/hybrid_engine/src/cf_options.rs index 61fe08da536..84ec83272f1 100644 --- a/components/hybrid_engine/src/cf_options.rs +++ b/components/hybrid_engine/src/cf_options.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CfOptionsExt, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{CfOptionsExt, KvEngine, RangeCacheEngine, Result}; use crate::engine::HybridEngine; impl CfOptionsExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type CfOptions = EK::CfOptions; diff --git a/components/hybrid_engine/src/checkpoint.rs b/components/hybrid_engine/src/checkpoint.rs index 7d9bdb022ea..d1a12ca0d7e 100644 --- a/components/hybrid_engine/src/checkpoint.rs +++ b/components/hybrid_engine/src/checkpoint.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{Checkpointable, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{Checkpointable, KvEngine, RangeCacheEngine, Result}; use crate::engine::HybridEngine; impl Checkpointable for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type Checkpointer = EK::Checkpointer; diff --git a/components/hybrid_engine/src/compact.rs b/components/hybrid_engine/src/compact.rs index 6afbba556b0..b5c909ad511 100644 --- a/components/hybrid_engine/src/compact.rs +++ b/components/hybrid_engine/src/compact.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{CompactExt, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{CompactExt, KvEngine, RangeCacheEngine, Result}; use crate::engine::HybridEngine; impl CompactExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type CompactedEvent = EK::CompactedEvent; diff --git a/components/hybrid_engine/src/db_options.rs b/components/hybrid_engine/src/db_options.rs index 6b4be90a43f..7a6f3dc5ce5 100644 --- a/components/hybrid_engine/src/db_options.rs +++ b/components/hybrid_engine/src/db_options.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{DbOptionsExt, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{DbOptionsExt, KvEngine, RangeCacheEngine, Result}; use crate::engine::HybridEngine; impl DbOptionsExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type DbOptions = EK::DbOptions; diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index e0020f97b36..3759554d49f 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -1,7 +1,7 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, SnapshotContext, SnapshotMiscExt, + KvEngine, Peekable, RangeCacheEngine, ReadOptions, Result, SnapshotContext, SnapshotMiscExt, SyncMutable, WriteBatchExt, }; @@ -17,7 +17,7 @@ use crate::snapshot::HybridEngineSnapshot; pub struct HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { disk_engine: EK, region_cache_engine: EC, @@ -26,7 +26,7 @@ where impl HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { pub fn disk_engine(&self) -> &EK { &self.disk_engine @@ -48,7 +48,7 @@ where impl HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { pub fn new(disk_engine: EK, region_cache_engine: EC) -> Self { Self { @@ -62,7 +62,7 @@ where impl KvEngine for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, HybridEngine: WriteBatchExt, { type Snapshot = HybridEngineSnapshot; @@ -71,7 +71,7 @@ where let disk_snap = self.disk_engine.snapshot(ctx.clone()); let region_cache_snap = if let Some(ctx) = ctx { self.region_cache_engine.snapshot( - ctx.region_id, + ctx.range.unwrap(), ctx.read_ts, disk_snap.sequence_number(), ) @@ -98,7 +98,7 @@ where impl Peekable for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type DbVector = EK::DbVector; @@ -121,7 +121,7 @@ where impl SyncMutable for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { unimplemented!() @@ -150,9 +150,11 @@ where #[cfg(test)] mod tests { + use std::sync::Arc; + use engine_rocks::util::new_engine; - use engine_traits::{KvEngine, SnapshotContext, CF_DEFAULT, CF_LOCK, CF_WRITE}; - use region_cache_memory_engine::RegionCacheMemoryEngine; + use engine_traits::{CacheRange, KvEngine, SnapshotContext, CF_DEFAULT, CF_LOCK, CF_WRITE}; + use region_cache_memory_engine::RangeCacheMemoryEngine; use tempfile::Builder; use crate::HybridEngine; @@ -165,12 +167,13 @@ mod tests { &[CF_DEFAULT, CF_LOCK, CF_WRITE], ) .unwrap(); - let memory_engine = RegionCacheMemoryEngine::default(); - memory_engine.new_region(1); + let memory_engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); + memory_engine.new_range(range.clone()); { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(true); - core.mut_region_meta(1).unwrap().set_safe_ts(10); + core.mut_range_manager().set_range_readable(&range, true); + core.mut_range_manager().set_safe_ts(&range, 10); } let hybrid_engine = HybridEngine::new(disk_engine, memory_engine.clone()); @@ -179,21 +182,21 @@ mod tests { let mut snap_ctx = SnapshotContext { read_ts: 15, - region_id: 1, + range: Some(range.clone()), }; let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); assert!(s.region_cache_snapshot_available()); { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(false); + core.mut_range_manager().set_range_readable(&range, false); } let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); assert!(!s.region_cache_snapshot_available()); { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_range_manager().set_range_readable(&range, true); } snap_ctx.read_ts = 5; let s = hybrid_engine.snapshot(Some(snap_ctx)); diff --git a/components/hybrid_engine/src/engine_iterator.rs b/components/hybrid_engine/src/engine_iterator.rs index 7349240f2a9..19422656a98 100644 --- a/components/hybrid_engine/src/engine_iterator.rs +++ b/components/hybrid_engine/src/engine_iterator.rs @@ -1,12 +1,12 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{Iterator, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{Iterator, KvEngine, RangeCacheEngine, Result}; use tikv_util::Either; pub struct HybridEngineIterator where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { iter: Either, } @@ -14,7 +14,7 @@ where impl HybridEngineIterator where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { pub fn disk_engine_iterator(iter: EK::Iterator) -> Self { Self { @@ -32,7 +32,7 @@ where impl Iterator for HybridEngineIterator where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn seek(&mut self, key: &[u8]) -> Result { match self.iter { diff --git a/components/hybrid_engine/src/flow_control_factors.rs b/components/hybrid_engine/src/flow_control_factors.rs index 9649671d418..2634ffa1ccc 100644 --- a/components/hybrid_engine/src/flow_control_factors.rs +++ b/components/hybrid_engine/src/flow_control_factors.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{FlowControlFactorsExt, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{FlowControlFactorsExt, KvEngine, RangeCacheEngine, Result}; use crate::engine::HybridEngine; impl FlowControlFactorsExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn get_cf_num_files_at_level(&self, cf: &str, level: usize) -> Result> { self.disk_engine().get_cf_num_files_at_level(cf, level) diff --git a/components/hybrid_engine/src/hybrid_metrics.rs b/components/hybrid_engine/src/hybrid_metrics.rs index 2d49d9ad1d9..2be75f95ead 100644 --- a/components/hybrid_engine/src/hybrid_metrics.rs +++ b/components/hybrid_engine/src/hybrid_metrics.rs @@ -1,6 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, RegionCacheEngine, StatisticsReporter}; +use engine_traits::{KvEngine, RangeCacheEngine, StatisticsReporter}; use crate::engine::HybridEngine; @@ -9,7 +9,7 @@ pub struct HybridEngineStatisticsReporter {} impl StatisticsReporter> for HybridEngineStatisticsReporter where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn new(name: &str) -> Self { unimplemented!() diff --git a/components/hybrid_engine/src/import.rs b/components/hybrid_engine/src/import.rs index de40c83d214..91d26a5105a 100644 --- a/components/hybrid_engine/src/import.rs +++ b/components/hybrid_engine/src/import.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{ImportExt, KvEngine, RegionCacheEngine}; +use engine_traits::{ImportExt, KvEngine, RangeCacheEngine}; use crate::engine::HybridEngine; impl ImportExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type IngestExternalFileOptions = EK::IngestExternalFileOptions; diff --git a/components/hybrid_engine/src/iterable.rs b/components/hybrid_engine/src/iterable.rs index 27a38570f01..892aca8a2e6 100644 --- a/components/hybrid_engine/src/iterable.rs +++ b/components/hybrid_engine/src/iterable.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{IterOptions, Iterable, KvEngine, RegionCacheEngine, Result}; +use engine_traits::{IterOptions, Iterable, KvEngine, RangeCacheEngine, Result}; use crate::{engine::HybridEngine, engine_iterator::HybridEngineIterator}; impl Iterable for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type Iterator = HybridEngineIterator; diff --git a/components/hybrid_engine/src/misc.rs b/components/hybrid_engine/src/misc.rs index 42339a83cca..994ce2d63cb 100644 --- a/components/hybrid_engine/src/misc.rs +++ b/components/hybrid_engine/src/misc.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, MiscExt, RegionCacheEngine, Result, WriteBatchExt}; +use engine_traits::{KvEngine, MiscExt, RangeCacheEngine, Result, WriteBatchExt}; use crate::{engine::HybridEngine, hybrid_metrics::HybridEngineStatisticsReporter}; impl MiscExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, HybridEngine: WriteBatchExt, { type StatisticsReporter = HybridEngineStatisticsReporter; diff --git a/components/hybrid_engine/src/mvcc_properties.rs b/components/hybrid_engine/src/mvcc_properties.rs index 0d03258d2de..51a2434bad2 100644 --- a/components/hybrid_engine/src/mvcc_properties.rs +++ b/components/hybrid_engine/src/mvcc_properties.rs @@ -1,6 +1,6 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, MvccProperties, MvccPropertiesExt, RegionCacheEngine}; +use engine_traits::{KvEngine, MvccProperties, MvccPropertiesExt, RangeCacheEngine}; use txn_types::TimeStamp; use crate::engine::HybridEngine; @@ -8,7 +8,7 @@ use crate::engine::HybridEngine; impl MvccPropertiesExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn get_mvcc_properties_cf( &self, diff --git a/components/hybrid_engine/src/perf_context.rs b/components/hybrid_engine/src/perf_context.rs index 1db4e8c9d27..86b22958b0e 100644 --- a/components/hybrid_engine/src/perf_context.rs +++ b/components/hybrid_engine/src/perf_context.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, PerfContextExt, PerfContextKind, RegionCacheEngine}; +use engine_traits::{KvEngine, PerfContextExt, PerfContextKind, RangeCacheEngine}; use crate::engine::HybridEngine; impl PerfContextExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type PerfContext = EK::PerfContext; diff --git a/components/hybrid_engine/src/range_properties.rs b/components/hybrid_engine/src/range_properties.rs index 7f38379f36d..14deb77ec52 100644 --- a/components/hybrid_engine/src/range_properties.rs +++ b/components/hybrid_engine/src/range_properties.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, Range, RangePropertiesExt, RegionCacheEngine, Result}; +use engine_traits::{KvEngine, Range, RangeCacheEngine, RangePropertiesExt, Result}; use crate::engine::HybridEngine; impl RangePropertiesExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn get_range_approximate_keys(&self, range: Range<'_>, large_threshold: u64) -> Result { self.disk_engine() diff --git a/components/hybrid_engine/src/snapshot.rs b/components/hybrid_engine/src/snapshot.rs index 3c7ab875a21..d30334aad84 100644 --- a/components/hybrid_engine/src/snapshot.rs +++ b/components/hybrid_engine/src/snapshot.rs @@ -3,7 +3,7 @@ use std::fmt::{self, Debug, Formatter}; use engine_traits::{ - CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, ReadOptions, RegionCacheEngine, Result, + CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, RangeCacheEngine, ReadOptions, Result, Snapshot, SnapshotMiscExt, }; @@ -12,7 +12,7 @@ use crate::engine_iterator::HybridEngineIterator; pub struct HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { disk_snap: EK::Snapshot, region_cache_snap: Option, @@ -21,7 +21,7 @@ where impl HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { pub fn new(disk_snap: EK::Snapshot, region_cache_snap: Option) -> Self { HybridEngineSnapshot { @@ -38,14 +38,14 @@ where impl Snapshot for HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { } impl Debug for HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { write!(fmt, "Hybrid Engine Snapshot Impl") @@ -55,7 +55,7 @@ where impl Iterable for HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type Iterator = HybridEngineIterator; @@ -67,7 +67,7 @@ where impl Peekable for HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type DbVector = EK::DbVector; @@ -88,7 +88,7 @@ where impl CfNamesExt for HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn cf_names(&self) -> Vec<&str> { self.disk_snap.cf_names() @@ -98,7 +98,7 @@ where impl SnapshotMiscExt for HybridEngineSnapshot where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn sequence_number(&self) -> u64 { self.disk_snap.sequence_number() diff --git a/components/hybrid_engine/src/sst.rs b/components/hybrid_engine/src/sst.rs index 2bade295ec3..e34eab09d6e 100644 --- a/components/hybrid_engine/src/sst.rs +++ b/components/hybrid_engine/src/sst.rs @@ -1,7 +1,7 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - KvEngine, RegionCacheEngine, Result, SstCompressionType, SstExt, SstWriterBuilder, + KvEngine, RangeCacheEngine, Result, SstCompressionType, SstExt, SstWriterBuilder, }; use crate::engine::HybridEngine; @@ -11,7 +11,7 @@ pub struct HybridEngineSstWriteBuilder {} impl SstExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type SstReader = EK::SstReader; type SstWriter = EK::SstWriter; @@ -21,7 +21,7 @@ where impl SstWriterBuilder> for HybridEngineSstWriteBuilder where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn new() -> Self { unimplemented!() diff --git a/components/hybrid_engine/src/table_properties.rs b/components/hybrid_engine/src/table_properties.rs index 6ad95e5931a..0d5c2c5fd39 100644 --- a/components/hybrid_engine/src/table_properties.rs +++ b/components/hybrid_engine/src/table_properties.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, Range, RegionCacheEngine, Result, TablePropertiesExt}; +use engine_traits::{KvEngine, Range, RangeCacheEngine, Result, TablePropertiesExt}; use crate::engine::HybridEngine; impl TablePropertiesExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { type TablePropertiesCollection = EK::TablePropertiesCollection; diff --git a/components/hybrid_engine/src/ttl_properties.rs b/components/hybrid_engine/src/ttl_properties.rs index d5b7d8578b5..47e362bccf7 100644 --- a/components/hybrid_engine/src/ttl_properties.rs +++ b/components/hybrid_engine/src/ttl_properties.rs @@ -1,13 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use engine_traits::{KvEngine, RegionCacheEngine, Result, TtlProperties, TtlPropertiesExt}; +use engine_traits::{KvEngine, RangeCacheEngine, Result, TtlProperties, TtlPropertiesExt}; use crate::engine::HybridEngine; impl TtlPropertiesExt for HybridEngine where EK: KvEngine, - EC: RegionCacheEngine, + EC: RangeCacheEngine, { fn get_range_ttl_properties_cf( &self, diff --git a/components/hybrid_engine/src/write_batch.rs b/components/hybrid_engine/src/write_batch.rs index fe228ac9372..054e6d116d8 100644 --- a/components/hybrid_engine/src/write_batch.rs +++ b/components/hybrid_engine/src/write_batch.rs @@ -1,16 +1,16 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{KvEngine, Mutable, Result, WriteBatch, WriteBatchExt, WriteOptions}; -use region_cache_memory_engine::{RegionCacheMemoryEngine, RegionCacheWriteBatch}; +use region_cache_memory_engine::{RangeCacheMemoryEngine, RangeCacheWriteBatch}; use crate::engine::HybridEngine; pub struct HybridEngineWriteBatch { disk_write_batch: EK::WriteBatch, - cache_write_batch: RegionCacheWriteBatch, + cache_write_batch: RangeCacheWriteBatch, } -impl WriteBatchExt for HybridEngine +impl WriteBatchExt for HybridEngine where EK: KvEngine, { @@ -124,9 +124,11 @@ impl Mutable for HybridEngineWriteBatch { #[cfg(test)] mod tests { + use std::sync::Arc; + use engine_rocks::util::new_engine; - use engine_traits::{WriteBatchExt, CF_DEFAULT, CF_LOCK, CF_WRITE}; - use region_cache_memory_engine::RegionCacheMemoryEngine; + use engine_traits::{CacheRange, WriteBatchExt, CF_DEFAULT, CF_LOCK, CF_WRITE}; + use region_cache_memory_engine::RangeCacheMemoryEngine; use tempfile::Builder; use crate::HybridEngine; @@ -139,16 +141,17 @@ mod tests { &[CF_DEFAULT, CF_LOCK, CF_WRITE], ) .unwrap(); - let memory_engine = RegionCacheMemoryEngine::default(); - memory_engine.new_region(1); + let memory_engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); + memory_engine.new_range(range.clone()); { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(true); - core.mut_region_meta(1).unwrap().set_safe_ts(10); + core.mut_range_manager().set_range_readable(&range, true); + core.mut_range_manager().set_safe_ts(&range, 10); } let hybrid_engine = - HybridEngine::<_, RegionCacheMemoryEngine>::new(disk_engine, memory_engine.clone()); + HybridEngine::<_, RangeCacheMemoryEngine>::new(disk_engine, memory_engine.clone()); let mut write_batch = hybrid_engine.write_batch(); write_batch .cache_write_batch diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 1625383b929..9d5c059c3cd 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -19,7 +19,7 @@ use bytes::Bytes; use collections::{HashMap, HashSet}; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; use engine_traits::{ - Engines, KvEngine, PerfContext, RaftEngine, Snapshot, SnapshotContext, WriteBatch, + CacheRange, Engines, KvEngine, PerfContext, RaftEngine, Snapshot, SnapshotContext, WriteBatch, WriteOptions, CF_DEFAULT, CF_LOCK, CF_WRITE, }; use error_code::ErrorCodeExt; @@ -4860,7 +4860,7 @@ where let snap_ctx = if let Ok(read_ts) = decode_u64(&mut req.get_header().get_flag_data()) { Some(SnapshotContext { - region_id: self.region_id, + range: Some(CacheRange::from_region(®ion)), read_ts, }) } else { diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index 666b0d34796..b760435f22e 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -12,7 +12,7 @@ use std::{ }; use crossbeam::{atomic::AtomicCell, channel::TrySendError}; -use engine_traits::{KvEngine, Peekable, RaftEngine, SnapshotContext}; +use engine_traits::{CacheRange, KvEngine, Peekable, RaftEngine, SnapshotContext}; use fail::fail_point; use kvproto::{ errorpb, @@ -1057,13 +1057,17 @@ where pub fn propose_raft_command( &mut self, - snap_ctx: Option, + mut snap_ctx: Option, read_id: Option, mut req: RaftCmdRequest, cb: Callback, ) { match self.pre_propose_raft_command(&req) { Ok(Some((mut delegate, policy))) => { + if let Some(ref mut ctx) = snap_ctx { + ctx.set_range(CacheRange::from_region(&delegate.region)) + } + let mut snap_updated = false; let last_valid_ts = delegate.last_valid_ts; let mut response = match policy { @@ -1288,10 +1292,10 @@ mod tests { use crossbeam::channel::TrySendError; use engine_test::kv::{KvTestEngine, KvTestSnapshot}; - use engine_traits::{MiscExt, Peekable, SyncMutable, ALL_CFS}; + use engine_traits::{CacheRange, MiscExt, Peekable, SyncMutable, ALL_CFS}; use hybrid_engine::{HybridEngine, HybridEngineSnapshot}; use kvproto::{metapb::RegionEpoch, raft_cmdpb::*}; - use region_cache_memory_engine::RegionCacheMemoryEngine; + use region_cache_memory_engine::RangeCacheMemoryEngine; use tempfile::{Builder, TempDir}; use tikv_util::{codec::number::NumberEncoder, time::monotonic_raw_now}; use time::Duration; @@ -2417,8 +2421,8 @@ mod tests { ); } - type HybridTestEnigne = HybridEngine; - type HybridEngineTestSnapshot = HybridEngineSnapshot; + type HybridTestEnigne = HybridEngine; + type HybridEngineTestSnapshot = HybridEngineSnapshot; struct HybridEngineMockRouter { p_router: SyncSender>, @@ -2469,13 +2473,13 @@ mod tests { TempDir, LocalReader, Receiver>, - RegionCacheMemoryEngine, + RangeCacheMemoryEngine, ) { let path = Builder::new().prefix(path).tempdir().unwrap(); let disk_engine = engine_test::kv::new_engine(path.path().to_str().unwrap(), ALL_CFS).unwrap(); let (ch, rx, _) = HybridEngineMockRouter::new(); - let memory_engine = RegionCacheMemoryEngine::default(); + let memory_engine = RangeCacheMemoryEngine::new(Arc::default()); let engine = HybridEngine::new(disk_engine, memory_engine.clone()); let mut reader = LocalReader::new( engine.clone(), @@ -2570,16 +2574,17 @@ mod tests { let s = get_snapshot(None, &mut reader, cmd.clone(), &rx); assert!(!s.region_cache_snapshot_available()); - memory_engine.new_region(1); + let range = CacheRange::from_region(®ion1); + memory_engine.new_range(range.clone()); { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(true); - core.mut_region_meta(1).unwrap().set_safe_ts(10); + core.mut_range_manager().set_range_readable(&range, true); + core.mut_range_manager().set_safe_ts(&range, 10); } let mut snap_ctx = SnapshotContext { read_ts: 15, - region_id: 1, + range: None, }; let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); @@ -2587,14 +2592,14 @@ mod tests { { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(false); + core.mut_range_manager().set_range_readable(&range, false); } let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); assert!(!s.region_cache_snapshot_available()); { let mut core = memory_engine.core().lock().unwrap(); - core.mut_region_meta(1).unwrap().set_can_read(true); + core.mut_range_manager().set_range_readable(&range, true); } snap_ctx.read_ts = 5; assert!(!s.region_cache_snapshot_available()); diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index 17da5bdaea8..dc5c93c38a8 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -9,20 +9,25 @@ use std::{ }; use bytes::Bytes; -use collections::HashMap; +use collections::{HashMap, HashSet}; use engine_rocks::{raw::SliceTransform, util::FixedSuffixSliceTransform}; use engine_traits::{ - CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Peekable, ReadOptions, - RegionCacheEngine, Result, Snapshot, SnapshotMiscExt, CF_DEFAULT, CF_LOCK, CF_WRITE, + CacheRange, CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Peekable, + RangeCacheEngine, ReadOptions, Result, Snapshot, SnapshotMiscExt, CF_DEFAULT, CF_LOCK, + CF_WRITE, }; -use skiplist_rs::{IterRef, Skiplist}; -use tikv_util::config::ReadableSize; - -use crate::keys::{ - decode_key, encode_seek_key, InternalKey, InternalKeyComparator, ValueType, - VALUE_TYPE_FOR_SEEK, VALUE_TYPE_FOR_SEEK_FOR_PREV, +use skiplist_rs::{AllocationRecorder, IterRef, MemoryLimiter, Node, Skiplist, MIB}; + +use crate::{ + keys::{ + decode_key, encode_key_for_eviction, encode_seek_key, InternalKey, InternalKeyComparator, + ValueType, VALUE_TYPE_FOR_SEEK, VALUE_TYPE_FOR_SEEK_FOR_PREV, + }, + range_manager::RangeManager, }; +const EVICTION_KEY_BUFFER_LIMIT: usize = 5 * MIB as usize; + pub(crate) fn cf_to_id(cf: &str) -> usize { match cf { CF_DEFAULT => 0, @@ -32,63 +37,121 @@ pub(crate) fn cf_to_id(cf: &str) -> usize { } } -/// RegionMemoryEngine stores data for a specific cached region -/// -/// todo: The skiplist used here currently is for test purpose. Replace it -/// with a formal implementation. +// todo: implement a real memory limiter. Now, it is used for test. +#[derive(Clone, Default)] +pub struct GlobalMemoryLimiter { + recorder: Arc>>, + removed: Arc>>>, +} + +impl MemoryLimiter for GlobalMemoryLimiter { + fn acquire(&self, n: usize) -> bool { + true + } + + fn mem_usage(&self) -> usize { + 0 + } + + fn reclaim(&self, n: usize) {} +} + +impl AllocationRecorder for GlobalMemoryLimiter { + fn alloc(&self, addr: usize, size: usize) { + let mut recorder = self.recorder.lock().unwrap(); + assert!(!recorder.contains_key(&addr)); + recorder.insert(addr, size); + } + + fn free(&self, addr: usize, size: usize) { + let node = addr as *mut Node; + let mut removed = self.removed.lock().unwrap(); + removed.insert(unsafe { (*node).key().to_vec() }); + let mut recorder = self.recorder.lock().unwrap(); + assert_eq!(recorder.remove(&addr).unwrap(), size); + } +} + +impl Drop for GlobalMemoryLimiter { + fn drop(&mut self) { + assert!(self.recorder.lock().unwrap().is_empty()); + } +} + +/// A single global set of skiplists shared by all cached ranges #[derive(Clone)] -pub struct RegionMemoryEngine { - pub(crate) data: [Arc>; 3], +pub struct SkiplistEngine { + pub(crate) data: [Arc>; 3], } -impl RegionMemoryEngine { - pub fn with_capacity(arena_size: usize) -> Self { - RegionMemoryEngine { +impl SkiplistEngine { + pub fn new(global_limiter: Arc) -> Self { + SkiplistEngine { data: [ - Arc::new(Skiplist::with_capacity( + Arc::new(Skiplist::new( InternalKeyComparator::default(), - arena_size, - true, + global_limiter.clone(), )), - Arc::new(Skiplist::with_capacity( + Arc::new(Skiplist::new( InternalKeyComparator::default(), - arena_size, - true, + global_limiter.clone(), )), - Arc::new(Skiplist::with_capacity( + Arc::new(Skiplist::new( InternalKeyComparator::default(), - arena_size, - true, + global_limiter.clone(), )), ], } } -} -impl Default for RegionMemoryEngine { - fn default() -> Self { - RegionMemoryEngine::with_capacity(ReadableSize::mb(1).0 as usize) + fn delete_range(&self, range: &CacheRange) { + self.data.iter().for_each(|d| { + let mut key_buffer: Vec = vec![]; + let mut key_buffer_size = 0; + let (start, end) = encode_key_for_eviction(range); + + let mut iter = d.iter(); + iter.seek(&start); + while iter.valid() && iter.key() < &end { + if key_buffer_size + iter.key().len() >= EVICTION_KEY_BUFFER_LIMIT { + for key in key_buffer.drain(..) { + d.remove(key.as_slice()); + } + iter = d.iter(); + iter.seek(&start); + continue; + } + + key_buffer_size += iter.key().len(); + key_buffer.push(iter.key().clone()); + iter.next(); + } + + for key in key_buffer { + d.remove(key.as_slice()); + } + }); } } -impl Debug for RegionMemoryEngine { +impl Debug for SkiplistEngine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Region Memory Engine") + write!(f, "Range Memory Engine") } } // read_ts -> ref_count -#[derive(Default)] -struct SnapshotList(BTreeMap); +#[derive(Default, Debug)] +pub(crate) struct SnapshotList(BTreeMap); impl SnapshotList { - fn new_snapshot(&mut self, read_ts: u64) { + pub(crate) fn new_snapshot(&mut self, read_ts: u64) { // snapshot with this ts may be granted before let count = self.0.get(&read_ts).unwrap_or(&0) + 1; self.0.insert(read_ts, count); } - fn remove_snapshot(&mut self, read_ts: u64) { + pub(crate) fn remove_snapshot(&mut self, read_ts: u64) { let count = self.0.get_mut(&read_ts).unwrap(); assert!(*count >= 1); if *count == 1 { @@ -97,95 +160,104 @@ impl SnapshotList { *count -= 1; } } + + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub(crate) fn len(&self) -> usize { + self.0.keys().len() + } } -#[derive(Default)] -pub struct RegionMemoryMeta { - // It records the snapshots that have been granted previsously with specific snapshot_ts. We - // should guarantee that the data visible to any one of the snapshot in it will not be removed. - snapshot_list: SnapshotList, - // It indicates whether the region is readable. False means integrity of the data in this - // cached region is not satisfied due to being evicted for instance. - pub(crate) can_read: bool, - // Request with read_ts below it is not eligible for granting snapshot. - // Note: different region can have different safe_ts. - pub(crate) safe_ts: u64, +pub struct RangeCacheMemoryEngineCore { + engine: SkiplistEngine, + range_manager: RangeManager, } -impl RegionMemoryMeta { - pub fn set_can_read(&mut self, can_read: bool) { - self.can_read = can_read; +impl RangeCacheMemoryEngineCore { + pub fn new(limiter: Arc) -> RangeCacheMemoryEngineCore { + RangeCacheMemoryEngineCore { + engine: SkiplistEngine::new(limiter), + range_manager: RangeManager::default(), + } } - pub fn set_safe_ts(&mut self, safe_ts: u64) { - self.safe_ts = safe_ts; + pub fn engine(&self) -> SkiplistEngine { + self.engine.clone() } -} -#[derive(Default)] -pub struct RegionCacheMemoryEngineCore { - pub(crate) engine: HashMap, - pub(crate) region_metas: HashMap, -} + pub fn range_manager(&self) -> &RangeManager { + &self.range_manager + } -impl RegionCacheMemoryEngineCore { - pub fn mut_region_meta(&mut self, region_id: u64) -> Option<&mut RegionMemoryMeta> { - self.region_metas.get_mut(®ion_id) + pub fn mut_range_manager(&mut self) -> &mut RangeManager { + &mut self.range_manager } } -/// The RegionCacheMemoryEngine serves as a region cache, storing hot regions in +/// The RangeCacheMemoryEngine serves as a range cache, storing hot ranges in /// the leaders' store. Incoming writes that are written to disk engine (now, -/// RocksDB) are also written to the RegionCacheMemoryEngine, leading to a -/// mirrored data set in the cached regions with the disk engine. +/// RocksDB) are also written to the RangeCacheMemoryEngine, leading to a +/// mirrored data set in the cached ranges with the disk engine. /// -/// A load/evict unit manages the memory, deciding which regions should be -/// evicted when the memory used by the RegionCacheMemoryEngine reaches a -/// certain limit, and determining which regions should be loaded when there is +/// A load/evict unit manages the memory, deciding which ranges should be +/// evicted when the memory used by the RangeCacheMemoryEngine reaches a +/// certain limit, and determining which ranges should be loaded when there is /// spare memory capacity. /// -/// The safe point lifetime differs between RegionCacheMemoryEngine and the disk -/// engine, often being much shorter in RegionCacheMemoryEngine. This means that -/// RegionCacheMemoryEngine may filter out some keys that still exist in the +/// The safe point lifetime differs between RangeCacheMemoryEngine and the disk +/// engine, often being much shorter in RangeCacheMemoryEngine. This means that +/// RangeCacheMemoryEngine may filter out some keys that still exist in the /// disk engine, thereby improving read performance as fewer duplicated keys /// will be read. If there's a need to read keys that may have been filtered by -/// RegionCacheMemoryEngine (as indicated by read_ts and safe_point of the +/// RangeCacheMemoryEngine (as indicated by read_ts and safe_point of the /// cached region), we resort to using a the disk engine's snapshot instead. -#[derive(Clone, Default)] -pub struct RegionCacheMemoryEngine { - pub(crate) core: Arc>, +#[derive(Clone)] +pub struct RangeCacheMemoryEngine { + pub(crate) core: Arc>, + memory_limiter: Arc, } -impl RegionCacheMemoryEngine { - pub fn core(&self) -> &Arc> { - &self.core +impl RangeCacheMemoryEngine { + pub fn new(limiter: Arc) -> Self { + let engine = RangeCacheMemoryEngineCore::new(limiter.clone()); + Self { + core: Arc::new(Mutex::new(engine)), + memory_limiter: limiter, + } } -} -impl Debug for RegionCacheMemoryEngine { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Region Cache Memory Engine") + pub fn new_range(&self, range: CacheRange) { + let mut core = self.core.lock().unwrap(); + core.range_manager.new_range(range); } -} -impl RegionCacheMemoryEngine { - pub fn new_region(&self, region_id: u64) { + pub fn evict_range(&mut self, range: &CacheRange) { let mut core = self.core.lock().unwrap(); + if core.range_manager.evict_range(range) { + core.engine.delete_range(range); + } + } +} - assert!(core.engine.get(®ion_id).is_none()); - assert!(core.region_metas.get(®ion_id).is_none()); - core.engine.insert(region_id, RegionMemoryEngine::default()); - core.region_metas - .insert(region_id, RegionMemoryMeta::default()); +impl RangeCacheMemoryEngine { + pub fn core(&self) -> &Arc> { + &self.core } } -impl RegionCacheEngine for RegionCacheMemoryEngine { - type Snapshot = RegionCacheSnapshot; +impl Debug for RangeCacheMemoryEngine { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Range Cache Memory Engine") + } +} + +impl RangeCacheEngine for RangeCacheMemoryEngine { + type Snapshot = RangeCacheSnapshot; - // todo(SpadeA): add sequence number logic - fn snapshot(&self, region_id: u64, read_ts: u64, seq_num: u64) -> Option { - RegionCacheSnapshot::new(self.clone(), region_id, read_ts, seq_num) + fn snapshot(&self, range: CacheRange, read_ts: u64, seq_num: u64) -> Option { + RangeCacheSnapshot::new(self.clone(), range, read_ts, seq_num) } } @@ -196,10 +268,14 @@ enum Direction { Backward, } -pub struct RegionCacheIterator { +pub struct RangeCacheIterator { cf: String, valid: bool, - iter: IterRef, InternalKeyComparator>, + iter: IterRef< + Skiplist, + InternalKeyComparator, + GlobalMemoryLimiter, + >, // The lower bound is inclusive while the upper bound is exclusive if set // Note: bounds (region boundaries) have no mvcc versions lower_bound: Vec, @@ -221,15 +297,15 @@ pub struct RegionCacheIterator { direction: Direction, } -impl Iterable for RegionCacheMemoryEngine { - type Iterator = RegionCacheIterator; +impl Iterable for RangeCacheMemoryEngine { + type Iterator = RangeCacheIterator; fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { unimplemented!() } } -impl RegionCacheIterator { +impl RangeCacheIterator { // If `skipping_saved_key` is true, the function will keep iterating until it // finds a user key that is larger than `saved_user_key`. // If `prefix` is not None, the iterator needs to stop when all keys for the @@ -384,7 +460,7 @@ impl RegionCacheIterator { } } -impl Iterator for RegionCacheIterator { +impl Iterator for RangeCacheIterator { fn key(&self) -> &[u8] { assert!(self.valid); &self.saved_user_key @@ -475,61 +551,73 @@ impl Iterator for RegionCacheIterator { } #[derive(Clone, Debug)] -pub struct RegionCacheSnapshot { - region_id: u64, - snapshot_ts: u64, - // Sequence number is shared between RegionCacheEngine and disk KvEnigne to +pub struct RagneCacheSnapshotMeta { + pub(crate) range_id: u64, + pub(crate) range: CacheRange, + pub(crate) snapshot_ts: u64, + // Sequence number is shared between RangeCacheEngine and disk KvEnigne to // provide atomic write - sequence_number: u64, - region_memory_engine: RegionMemoryEngine, - engine: RegionCacheMemoryEngine, + pub(crate) sequence_number: u64, +} + +impl RagneCacheSnapshotMeta { + fn new(range_id: u64, range: CacheRange, snapshot_ts: u64, sequence_number: u64) -> Self { + Self { + range_id, + range, + snapshot_ts, + sequence_number, + } + } } -impl RegionCacheSnapshot { +#[derive(Clone, Debug)] +pub struct RangeCacheSnapshot { + snapshot_meta: RagneCacheSnapshotMeta, + skiplist_engine: SkiplistEngine, + engine: RangeCacheMemoryEngine, +} + +impl RangeCacheSnapshot { pub fn new( - engine: RegionCacheMemoryEngine, - region_id: u64, + engine: RangeCacheMemoryEngine, + range: CacheRange, read_ts: u64, seq_num: u64, ) -> Option { let mut core = engine.core.lock().unwrap(); - let region_meta = core.region_metas.get_mut(®ion_id)?; - if !region_meta.can_read { - return None; - } - - if read_ts <= region_meta.safe_ts { - // todo(SpadeA): add metrics for it - return None; + if let Some(range_id) = core.range_manager.range_snapshot(&range, read_ts) { + return Some(RangeCacheSnapshot { + snapshot_meta: RagneCacheSnapshotMeta::new(range_id, range, read_ts, seq_num), + skiplist_engine: core.engine.clone(), + engine: engine.clone(), + }); } - region_meta.snapshot_list.new_snapshot(read_ts); - - Some(RegionCacheSnapshot { - region_id, - snapshot_ts: read_ts, - sequence_number: seq_num, - region_memory_engine: core.engine.get(®ion_id).unwrap().clone(), - engine: engine.clone(), - }) + None } } -impl Drop for RegionCacheSnapshot { +impl Drop for RangeCacheSnapshot { fn drop(&mut self) { let mut core = self.engine.core.lock().unwrap(); - let meta = core.region_metas.get_mut(&self.region_id).unwrap(); - meta.snapshot_list.remove_snapshot(self.snapshot_ts); + for range_removable in core + .range_manager + .remove_range_snapshot(&self.snapshot_meta) + { + // todo: schedule it to a separate thread + core.engine.delete_range(&self.snapshot_meta.range); + } } } -impl Snapshot for RegionCacheSnapshot {} +impl Snapshot for RangeCacheSnapshot {} -impl Iterable for RegionCacheSnapshot { - type Iterator = RegionCacheIterator; +impl Iterable for RangeCacheSnapshot { + type Iterator = RangeCacheIterator; fn iterator_opt(&self, cf: &str, opts: IterOptions) -> Result { - let iter = self.region_memory_engine.data[cf_to_id(cf)].iter(); + let iter = self.skiplist_engine.data[cf_to_id(cf)].iter(); let prefix_extractor = if opts.prefix_same_as_start() { Some(FixedSuffixSliceTransform::new(8)) } else { @@ -542,14 +630,14 @@ impl Iterable for RegionCacheSnapshot { return Err(Error::BoundaryNotSet); } - Ok(RegionCacheIterator { + Ok(RangeCacheIterator { cf: String::from(cf), valid: false, prefix: None, lower_bound: lower_bound.unwrap(), upper_bound: upper_bound.unwrap(), iter, - sequence_number: self.sequence_number, + sequence_number: self.sequence_number(), saved_user_key: vec![], saved_value: None, direction: Direction::Uninit, @@ -558,8 +646,8 @@ impl Iterable for RegionCacheSnapshot { } } -impl Peekable for RegionCacheSnapshot { - type DbVector = RegionCacheDbVector; +impl Peekable for RangeCacheSnapshot { + type DbVector = RangeCacheDbVector; fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { self.get_value_cf_opt(opts, CF_DEFAULT, key) @@ -571,9 +659,9 @@ impl Peekable for RegionCacheSnapshot { cf: &str, key: &[u8], ) -> Result> { - let seq = self.sequence_number; - let mut iter = self.region_memory_engine.data[cf_to_id(cf)].iter(); - let seek_key = encode_seek_key(key, self.sequence_number, VALUE_TYPE_FOR_SEEK); + let seq = self.sequence_number(); + let mut iter = self.skiplist_engine.data[cf_to_id(cf)].iter(); + let seek_key = encode_seek_key(key, self.sequence_number(), VALUE_TYPE_FOR_SEEK); iter.seek(&seek_key); if !iter.valid() { @@ -585,28 +673,28 @@ impl Peekable for RegionCacheSnapshot { user_key, v_type: ValueType::Value, .. - } if user_key == key => Ok(Some(RegionCacheDbVector(iter.value().clone()))), + } if user_key == key => Ok(Some(RangeCacheDbVector(iter.value().clone()))), _ => Ok(None), } } } -impl CfNamesExt for RegionCacheSnapshot { +impl CfNamesExt for RangeCacheSnapshot { fn cf_names(&self) -> Vec<&str> { unimplemented!() } } -impl SnapshotMiscExt for RegionCacheSnapshot { +impl SnapshotMiscExt for RangeCacheSnapshot { fn sequence_number(&self) -> u64 { - self.sequence_number + self.snapshot_meta.sequence_number } } #[derive(Debug)] -pub struct RegionCacheDbVector(Bytes); +pub struct RangeCacheDbVector(Bytes); -impl Deref for RegionCacheDbVector { +impl Deref for RangeCacheDbVector { type Target = [u8]; fn deref(&self) -> &[u8] { @@ -614,9 +702,9 @@ impl Deref for RegionCacheDbVector { } } -impl DbVector for RegionCacheDbVector {} +impl DbVector for RangeCacheDbVector {} -impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { +impl<'a> PartialEq<&'a [u8]> for RangeCacheDbVector { fn eq(&self, rhs: &&[u8]) -> bool { self.0.as_slice() == *rhs } @@ -624,35 +712,37 @@ impl<'a> PartialEq<&'a [u8]> for RegionCacheDbVector { #[cfg(test)] mod tests { - use core::ops::Range; + use core::{ops::Range, slice::SlicePattern}; use std::{iter, iter::StepBy, ops::Deref, sync::Arc}; use bytes::{BufMut, Bytes}; use engine_traits::{ - IterOptions, Iterable, Iterator, Peekable, ReadOptions, RegionCacheEngine, + CacheRange, IterOptions, Iterable, Iterator, Peekable, RangeCacheEngine, ReadOptions, }; use skiplist_rs::Skiplist; - use super::{cf_to_id, RegionCacheIterator}; + use super::{cf_to_id, GlobalMemoryLimiter, RangeCacheIterator, SkiplistEngine}; use crate::{ - keys::{encode_key, InternalKeyComparator, ValueType}, - RegionCacheMemoryEngine, + keys::{decode_key, encode_key, InternalKeyComparator, ValueType}, + RangeCacheMemoryEngine, }; #[test] fn test_snapshot() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::new(GlobalMemoryLimiter::default())); + let range = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); + engine.new_range(range.clone()); let verify_snapshot_count = |snapshot_ts, count| { let core = engine.core.lock().unwrap(); if count > 0 { assert_eq!( *core - .region_metas - .get(&1) + .range_manager + .ranges() + .get(&range) .unwrap() - .snapshot_list + .range_snapshot_list() .0 .get(&snapshot_ts) .unwrap(), @@ -660,10 +750,11 @@ mod tests { ); } else { assert!( - core.region_metas - .get(&1) + core.range_manager + .ranges() + .get(&range) .unwrap() - .snapshot_list + .range_snapshot_list() .0 .get(&snapshot_ts) .is_none() @@ -671,36 +762,48 @@ mod tests { } }; - assert!(engine.snapshot(1, 5, u64::MAX).is_none()); + assert!(engine.snapshot(range.clone(), 5, u64::MAX).is_none()); { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; + core.range_manager.set_range_readable(&range, true); } - let s1 = engine.snapshot(1, 5, u64::MAX).unwrap(); + let s1 = engine.snapshot(range.clone(), 5, u64::MAX).unwrap(); { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; + let t_range = CacheRange::new(b"k00".to_vec(), b"k02".to_vec()); + assert!(!core.range_manager.set_safe_ts(&t_range, 5)); + assert!(core.range_manager.set_safe_ts(&range, 5)); } - assert!(engine.snapshot(1, 5, u64::MAX).is_none()); - let s2 = engine.snapshot(1, 10, u64::MAX).unwrap(); + assert!(engine.snapshot(range.clone(), 5, u64::MAX).is_none()); + let s2 = engine.snapshot(range.clone(), 10, u64::MAX).unwrap(); verify_snapshot_count(5, 1); verify_snapshot_count(10, 1); - let s3 = engine.snapshot(1, 10, u64::MAX).unwrap(); + let s3 = engine.snapshot(range.clone(), 10, u64::MAX).unwrap(); verify_snapshot_count(10, 2); drop(s1); verify_snapshot_count(5, 0); drop(s2); verify_snapshot_count(10, 1); - let s4 = engine.snapshot(1, 10, u64::MAX).unwrap(); + let s4 = engine.snapshot(range.clone(), 10, u64::MAX).unwrap(); verify_snapshot_count(10, 2); drop(s4); verify_snapshot_count(10, 1); drop(s3); - verify_snapshot_count(10, 0); + { + let core = engine.core.lock().unwrap(); + assert!( + core.range_manager + .ranges() + .get(&range) + .unwrap() + .range_snapshot_list() + .is_empty() + ); + } } fn construct_user_key(i: u64) -> Vec { @@ -721,7 +824,7 @@ mod tests { } fn fill_data_in_skiplist( - sl: Arc>, + sl: Arc>, key_range: StepBy>, mvcc_range: Range, mut start_seq: u64, @@ -738,7 +841,7 @@ mod tests { } fn delete_data_in_skiplist( - sl: Arc>, + sl: Arc>, key_range: StepBy>, mvcc_range: Range, mut seq: u64, @@ -761,7 +864,7 @@ mod tests { } fn put_key_val( - sl: &Arc>, + sl: &Arc>, key: &str, val: &str, mvcc: u64, @@ -772,7 +875,12 @@ mod tests { sl.put(key, Bytes::from(val.to_owned())); } - fn delete_key(sl: &Arc>, key: &str, mvcc: u64, seq: u64) { + fn delete_key( + sl: &Arc>, + key: &str, + mvcc: u64, + seq: u64, + ) { let key = construct_mvcc_key(key, mvcc); let key = encode_key(&key, seq, ValueType::Deletion); sl.put(key, Bytes::default()); @@ -791,10 +899,11 @@ mod tests { } fn verify_key_values, J: iter::Iterator + Clone>( - iter: &mut RegionCacheIterator, + iter: &mut RangeCacheIterator, key_range: I, mvcc_range: J, foward: bool, + ended: bool, ) { for i in key_range { for mvcc in mvcc_range.clone() { @@ -808,19 +917,23 @@ mod tests { } } } - assert!(!iter.valid().unwrap()); + + if ended { + assert!(!iter.valid().unwrap()); + } } #[test] fn test_get_value() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); + engine.new_range(range.clone()); { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); fill_data_in_skiplist(sl.clone(), (1..10).step_by(1), 1..50, 1); // k1 is deleted at seq_num 150 while k49 is deleted at seq num 101 delete_data_in_skiplist(sl, (1..10).step_by(1), 1..50, 100); @@ -828,7 +941,7 @@ mod tests { let opts = ReadOptions::default(); { - let snapshot = engine.snapshot(1, 10, 60).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 60).unwrap(); for i in 1..10 { for mvcc in 1..50 { let k = construct_key(i, mvcc); @@ -850,7 +963,7 @@ mod tests { // all deletions { - let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, u64::MAX).unwrap(); for i in 1..10 { for mvcc in 1..50 { let k = construct_key(i, mvcc); @@ -866,7 +979,7 @@ mod tests { // some deletions { - let snapshot = engine.snapshot(1, 10, 105).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 105).unwrap(); for mvcc in 1..50 { for i in 1..7 { let k = construct_key(i, mvcc); @@ -891,21 +1004,22 @@ mod tests { #[test] fn test_iterator_forawrd() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); + engine.new_range(range.clone()); let step: i32 = 2; { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); fill_data_in_skiplist(sl.clone(), (1..100).step_by(step as usize), 1..10, 1); delete_data_in_skiplist(sl, (1..100).step_by(step as usize), 1..10, 200); } let mut iter_opt = IterOptions::default(); - let snapshot = engine.snapshot(1, 10, u64::MAX).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, u64::MAX).unwrap(); // boundaries are not set assert!(snapshot.iterator_opt("lock", iter_opt.clone()).is_err()); @@ -922,7 +1036,7 @@ mod tests { // Not restricted by bounds, no deletion (seq_num 150) { - let snapshot = engine.snapshot(1, 100, 150).unwrap(); + let snapshot = engine.snapshot(range.clone(), 100, 150).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_first().unwrap(); verify_key_values( @@ -930,6 +1044,7 @@ mod tests { (1..100).step_by(step as usize), (1..10).rev(), true, + true, ); // seek key that is in the skiplist @@ -940,6 +1055,7 @@ mod tests { (11..100).step_by(step as usize), (1..10).rev(), true, + true, ); // seek key that is not in the skiplist @@ -950,12 +1066,13 @@ mod tests { (13..100).step_by(step as usize), (1..10).rev(), true, + true, ); } // Not restricted by bounds, some deletions (seq_num 230) { - let snapshot = engine.snapshot(1, 10, 230).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 230).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_first().unwrap(); verify_key_values( @@ -963,6 +1080,7 @@ mod tests { (63..100).step_by(step as usize), (1..10).rev(), true, + true, ); // sequence can see the deletion @@ -998,7 +1116,7 @@ mod tests { iter_opt.set_upper_bound(&upper_bound, 0); iter_opt.set_lower_bound(&lower_bound, 0); { - let snapshot = engine.snapshot(1, 10, 150).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 150).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_first().unwrap()); @@ -1007,6 +1125,7 @@ mod tests { (21..40).step_by(step as usize), (1..10).rev(), true, + true, ); // seek a key that is below the lower bound is the same with seek_to_first @@ -1017,6 +1136,7 @@ mod tests { (21..40).step_by(step as usize), (1..10).rev(), true, + true, ); // seek a key that is larger or equal to upper bound won't get any key @@ -1031,12 +1151,13 @@ mod tests { (33..40).step_by(step as usize), (1..10).rev(), true, + true, ); } // with bounds, some deletions (seq_num 215) { - let snapshot = engine.snapshot(1, 10, 215).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 215).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); // sequence can see the deletion @@ -1069,15 +1190,16 @@ mod tests { #[test] fn test_iterator_backward() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); + engine.new_range(range.clone()); let step: i32 = 2; { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); fill_data_in_skiplist(sl.clone(), (1..100).step_by(step as usize), 1..10, 1); delete_data_in_skiplist(sl, (1..100).step_by(step as usize), 1..10, 200); } @@ -1090,7 +1212,7 @@ mod tests { // Not restricted by bounds, no deletion (seq_num 150) { - let snapshot = engine.snapshot(1, 10, 150).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 150).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_last().unwrap()); verify_key_values( @@ -1098,6 +1220,7 @@ mod tests { (1..100).step_by(step as usize).rev(), 1..10, false, + true, ); // seek key that is in the skiplist @@ -1108,6 +1231,7 @@ mod tests { (1..82).step_by(step as usize).rev(), 1..10, false, + true, ); // seek key that is in the skiplist @@ -1118,6 +1242,7 @@ mod tests { (1..80).step_by(step as usize).rev(), 1..10, false, + true, ); } @@ -1126,7 +1251,7 @@ mod tests { iter_opt.set_upper_bound(&upper_bound, 0); iter_opt.set_lower_bound(&lower_bound, 0); { - let snapshot = engine.snapshot(1, 10, 150).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 150).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt).unwrap(); assert!(iter.seek_to_last().unwrap()); @@ -1135,6 +1260,7 @@ mod tests { (21..38).step_by(step as usize).rev(), 1..10, false, + true, ); // seek a key that is above the upper bound is the same with seek_to_last @@ -1145,6 +1271,7 @@ mod tests { (21..38).step_by(step as usize).rev(), 1..10, false, + true, ); // seek a key that is less than the lower bound won't get any key @@ -1159,21 +1286,23 @@ mod tests { (21..26).step_by(step as usize).rev(), 1..10, false, + true, ); } } #[test] fn test_seq_visibility() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); + engine.new_range(range.clone()); let step: i32 = 2; { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); put_key_val(&sl, "aaa", "va1", 10, 1); put_key_val(&sl, "aaa", "va2", 10, 3); @@ -1197,7 +1326,7 @@ mod tests { // seq num 1 { - let snapshot = engine.snapshot(1, u64::MAX, 1).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 1).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!(iter.value(), b"va1"); @@ -1225,7 +1354,7 @@ mod tests { // seq num 2 { - let snapshot = engine.snapshot(1, u64::MAX, 2).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 2).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!(iter.value(), b"va1"); @@ -1238,7 +1367,7 @@ mod tests { // seq num 5 { - let snapshot = engine.snapshot(1, u64::MAX, 5).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 5).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!(iter.value(), b"vb2"); @@ -1249,7 +1378,7 @@ mod tests { // seq num 6 { - let snapshot = engine.snapshot(1, u64::MAX, 6).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 6).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_first().unwrap(); assert_eq!(iter.value(), b"va4"); @@ -1289,14 +1418,15 @@ mod tests { #[test] fn test_seq_visibility_backward() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); + engine.new_range(range.clone()); { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); put_key_val(&sl, "aaa", "va1", 10, 2); put_key_val(&sl, "aaa", "va2", 10, 4); @@ -1320,7 +1450,7 @@ mod tests { // seq num 1 { - let snapshot = engine.snapshot(1, u64::MAX, 1).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 1).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_last().unwrap(); assert_eq!(iter.value(), b"vc1"); @@ -1338,7 +1468,7 @@ mod tests { // seq num 2 { - let snapshot = engine.snapshot(1, u64::MAX, 2).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 2).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_last().unwrap(); assert_eq!(iter.value(), b"vc1"); @@ -1351,7 +1481,7 @@ mod tests { // seq num 5 { - let snapshot = engine.snapshot(1, u64::MAX, 5).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 5).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_last().unwrap(); assert_eq!(iter.value(), b"vb2"); @@ -1362,7 +1492,7 @@ mod tests { // seq num 6 { - let snapshot = engine.snapshot(1, u64::MAX, 6).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, 6).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); iter.seek_to_last().unwrap(); assert_eq!(iter.value(), b"vc4"); @@ -1390,26 +1520,28 @@ mod tests { let upper_bound = b"z"; iter_opt.set_upper_bound(upper_bound, 0); iter_opt.set_lower_bound(lower_bound, 0); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); // backward, all put { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + engine.new_range(range.clone()); let sl = { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + core.engine.data[cf_to_id("write")].clone() }; + let mut s = 1; for seq in 2..50 { - put_key_val(&sl, "a", "val", 10, 1); + put_key_val(&sl, "a", "val", 10, s + 1); for i in 2..50 { let v = construct_value(i, i); - put_key_val(&sl, "b", v.as_str(), 10, i); + put_key_val(&sl, "b", v.as_str(), 10, s + i); } - let snapshot = engine.snapshot(1, 10, seq).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, s + seq).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_last().unwrap()); let k = construct_mvcc_key("b", 10); @@ -1423,27 +1555,29 @@ mod tests { assert_eq!(iter.value(), b"val"); assert!(!iter.prev().unwrap()); assert!(!iter.valid().unwrap()); + s += 100; } } // backward, all deletes { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + engine.new_range(range.clone()); let sl = { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + core.engine.data[cf_to_id("write")].clone() }; + let mut s = 1; for seq in 2..50 { - put_key_val(&sl, "a", "val", 10, 1); + put_key_val(&sl, "a", "val", 10, s + 1); for i in 2..50 { - delete_key(&sl, "b", 10, i); + delete_key(&sl, "b", 10, s + i); } - let snapshot = engine.snapshot(1, 10, seq).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, s + seq).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_last().unwrap()); let k = construct_mvcc_key("a", 10); @@ -1451,18 +1585,19 @@ mod tests { assert_eq!(iter.value(), b"val"); assert!(!iter.prev().unwrap()); assert!(!iter.valid().unwrap()); + s += 100; } } // backward, all deletes except for last put, last put's seq { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + engine.new_range(range.clone()); let sl = { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + core.engine.data[cf_to_id("write")].clone() }; put_key_val(&sl, "a", "val", 10, 1); for i in 2..50 { @@ -1470,7 +1605,7 @@ mod tests { } let v = construct_value(50, 50); put_key_val(&sl, "b", v.as_str(), 10, 50); - let snapshot = engine.snapshot(1, 10, 50).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, 50).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(iter.seek_to_last().unwrap()); let k = construct_mvcc_key("b", 10); @@ -1488,42 +1623,46 @@ mod tests { // all deletes except for last put, deletions' seq { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + engine.new_range(range.clone()); let sl = { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone() + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + core.engine.data[cf_to_id("write")].clone() }; + let mut s = 1; for seq in 2..50 { for i in 2..50 { - delete_key(&sl, "b", 10, i); + delete_key(&sl, "b", 10, s + i); } let v = construct_value(50, 50); - put_key_val(&sl, "b", v.as_str(), 10, 50); + put_key_val(&sl, "b", v.as_str(), 10, s + 50); - let snapshot = engine.snapshot(1, 10, seq).unwrap(); + let snapshot = engine.snapshot(range.clone(), 10, s + seq).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); assert!(!iter.seek_to_first().unwrap()); assert!(!iter.valid().unwrap()); assert!(!iter.seek_to_last().unwrap()); assert!(!iter.valid().unwrap()); + + s += 100; } } } #[test] fn test_prefix_seek() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"k000".to_vec(), b"k100".to_vec()); + engine.new_range(range.clone()); { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 5; - let sl = core.engine.get_mut(&1).unwrap().data[cf_to_id("write")].clone(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); for i in 1..5 { for mvcc in 10..20 { @@ -1541,7 +1680,7 @@ mod tests { iter_opt.set_upper_bound(&upper_bound, 0); iter_opt.set_lower_bound(&lower_bound, 0); iter_opt.set_prefix_same_as_start(true); - let snapshot = engine.snapshot(1, u64::MAX, u64::MAX).unwrap(); + let snapshot = engine.snapshot(range.clone(), u64::MAX, u64::MAX).unwrap(); let mut iter = snapshot.iterator_opt("write", iter_opt.clone()).unwrap(); // prefix seek, forward @@ -1578,4 +1717,147 @@ mod tests { assert_eq!(start, 20); } } + + #[test] + fn test_skiplist_engine_evict_range() { + let sl_engine = SkiplistEngine::new(Arc::default()); + sl_engine.data.iter().for_each(|sl| { + fill_data_in_skiplist(sl.clone(), (1..60).step_by(1), 1..2, 1); + }); + + let evict_range = CacheRange::new(construct_user_key(20), construct_user_key(40)); + sl_engine.delete_range(&evict_range); + sl_engine.data.iter().for_each(|sl| { + let mut iter = sl.iter(); + iter.seek_to_first(); + for i in 1..20 { + let internal_key = decode_key(iter.key()); + let expected_key = construct_key(i, 1); + assert_eq!(internal_key.user_key, &expected_key); + iter.next(); + } + + for i in 40..60 { + let internal_key = decode_key(iter.key()); + let expected_key = construct_key(i, 1); + assert_eq!(internal_key.user_key, &expected_key); + iter.next(); + } + assert!(!iter.valid()); + }); + } + + #[test] + fn test_evict_range_without_snapshot() { + let mut engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(construct_user_key(0), construct_user_key(30)); + let evict_range = CacheRange::new(construct_user_key(10), construct_user_key(20)); + engine.new_range(range.clone()); + + { + let mut core = engine.core.lock().unwrap(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); + for i in 0..30 { + let user_key = construct_key(i, 10); + let internal_key = encode_key(&user_key, 10, ValueType::Value); + let v = construct_value(i, 10); + sl.put(internal_key.clone(), v.clone()); + } + } + + engine.evict_range(&evict_range); + assert!(engine.snapshot(range.clone(), 10, 200).is_none()); + assert!(engine.snapshot(evict_range, 10, 200).is_none()); + + { + let removed = engine.memory_limiter.removed.lock().unwrap(); + for i in 10..20 { + let user_key = construct_key(i, 10); + let internal_key = encode_key(&user_key, 10, ValueType::Value); + assert!(removed.contains(internal_key.as_slice())); + } + } + + let r_left = CacheRange::new(construct_user_key(0), construct_user_key(10)); + let r_right = CacheRange::new(construct_user_key(20), construct_user_key(30)); + let snap_left = engine.snapshot(r_left, 10, 200).unwrap(); + + let mut iter_opt = IterOptions::default(); + let lower_bound = construct_user_key(0); + let upper_bound = construct_user_key(10); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); + let mut iter = snap_left.iterator_opt("write", iter_opt.clone()).unwrap(); + iter.seek_to_first().unwrap(); + verify_key_values(&mut iter, (0..10).step_by(1), 10..11, true, true); + + let lower_bound = construct_user_key(20); + let upper_bound = construct_user_key(30); + iter_opt.set_upper_bound(&upper_bound, 0); + iter_opt.set_lower_bound(&lower_bound, 0); + let mut iter = snap_left.iterator_opt("write", iter_opt).unwrap(); + iter.seek_to_first().unwrap(); + verify_key_values(&mut iter, (20..30).step_by(1), 10..11, true, true); + } + + #[test] + fn test_evict_range_with_snapshot() { + let mut engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(construct_user_key(0), construct_user_key(30)); + let evict_range = CacheRange::new(construct_user_key(10), construct_user_key(20)); + engine.new_range(range.clone()); + { + let mut core = engine.core.lock().unwrap(); + core.range_manager.set_range_readable(&range, true); + core.range_manager.set_safe_ts(&range, 5); + let sl = core.engine.data[cf_to_id("write")].clone(); + for i in 0..30 { + let user_key = construct_key(i, 10); + let internal_key = encode_key(&user_key, 10, ValueType::Value); + let v = construct_value(i, 10); + sl.put(internal_key.clone(), v.clone()); + } + } + + let s1 = engine.snapshot(range.clone(), 10, 10); + let s2 = engine.snapshot(range, 20, 20); + engine.evict_range(&evict_range); + let range_left = CacheRange::new(construct_user_key(0), construct_user_key(10)); + let s3 = engine.snapshot(range_left, 20, 20).unwrap(); + let range_right = CacheRange::new(construct_user_key(20), construct_user_key(30)); + let s4 = engine.snapshot(range_right, 20, 20).unwrap(); + + drop(s3); + let range_left_eviction = CacheRange::new(construct_user_key(0), construct_user_key(5)); + engine.evict_range(&range_left_eviction); + + { + let removed = engine.memory_limiter.removed.lock().unwrap(); + assert!(removed.is_empty()); + } + + drop(s1); + { + let removed = engine.memory_limiter.removed.lock().unwrap(); + for i in 10..20 { + let user_key = construct_key(i, 10); + let internal_key = encode_key(&user_key, 10, ValueType::Value); + assert!(!removed.contains(internal_key.as_slice())); + } + } + + drop(s2); + // s2 is dropped, so the range of `evict_range` is removed. The snapshot of s3 + // and s4 does not prevent it as they are not overlapped. + { + let removed = engine.memory_limiter.removed.lock().unwrap(); + for i in 10..20 { + let user_key = construct_key(i, 10); + let internal_key = encode_key(&user_key, 10, ValueType::Value); + assert!(removed.contains(internal_key.as_slice())); + } + } + } } diff --git a/components/region_cache_memory_engine/src/keys.rs b/components/region_cache_memory_engine/src/keys.rs index c2cb22a236e..9b0564594f7 100644 --- a/components/region_cache_memory_engine/src/keys.rs +++ b/components/region_cache_memory_engine/src/keys.rs @@ -3,7 +3,9 @@ use std::cmp; use bytes::{BufMut, Bytes, BytesMut}; +use engine_traits::CacheRange; use skiplist_rs::KeyComparator; +use tikv_util::codec::number::NumberEncoder; #[derive(Debug, Clone, Copy, PartialEq)] pub enum ValueType { @@ -106,6 +108,25 @@ pub fn encode_seek_key(key: &[u8], seq: u64, v_type: ValueType) -> Vec { encode_key_internal::>(key, seq, v_type, Vec::with_capacity) } +// range keys deos not contain mvcc version and sequence number +#[inline] +pub fn encode_key_for_eviction(range: &CacheRange) -> (Vec, Vec) { + // Both encoded_start and encoded_end should be the smallest key in the + // respective of user key, so that the eviction covers all versions of the range + // start and covers nothing of range end. + let mut encoded_start = Vec::with_capacity(range.start.len() + 16); + encoded_start.extend_from_slice(&range.start); + encoded_start.encode_u64_desc(u64::MAX).unwrap(); + encoded_start.put_u64((u64::MAX << 8) | VALUE_TYPE_FOR_SEEK as u64); + + let mut encoded_end = Vec::with_capacity(range.end.len() + 16); + encoded_end.extend_from_slice(&range.end); + encoded_end.encode_u64_desc(u64::MAX).unwrap(); + encoded_end.put_u64((u64::MAX << 8) | VALUE_TYPE_FOR_SEEK as u64); + + (encoded_start, encoded_end) +} + #[derive(Default, Debug, Clone, Copy)] pub struct InternalKeyComparator {} diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs index 016f4f2be53..2bf35f96bfa 100644 --- a/components/region_cache_memory_engine/src/lib.rs +++ b/components/region_cache_memory_engine/src/lib.rs @@ -7,6 +7,7 @@ mod engine; pub mod keys; +pub use engine::RangeCacheMemoryEngine; +pub mod range_manager; mod write_batch; -pub use engine::RegionCacheMemoryEngine; -pub use write_batch::RegionCacheWriteBatch; +pub use write_batch::RangeCacheWriteBatch; diff --git a/components/region_cache_memory_engine/src/range_manager.rs b/components/region_cache_memory_engine/src/range_manager.rs new file mode 100644 index 00000000000..2fda42c35af --- /dev/null +++ b/components/region_cache_memory_engine/src/range_manager.rs @@ -0,0 +1,266 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::collections::{BTreeMap, BTreeSet}; + +use engine_traits::CacheRange; + +use crate::engine::{RagneCacheSnapshotMeta, SnapshotList}; + +#[derive(Debug, Default)] +pub struct RangeMeta { + id: u64, + range_snapshot_list: SnapshotList, + can_read: bool, + safe_point: u64, +} + +impl RangeMeta { + fn new(id: u64) -> Self { + Self { + id, + range_snapshot_list: SnapshotList::default(), + can_read: false, + safe_point: 0, + } + } + + fn derive_from(id: u64, r: &RangeMeta) -> Self { + Self { + id, + range_snapshot_list: SnapshotList::default(), + can_read: r.can_read, + safe_point: r.safe_point, + } + } + + pub(crate) fn range_snapshot_list(&self) -> &SnapshotList { + &self.range_snapshot_list + } +} + +#[derive(Default)] +struct IdAllocator(u64); + +impl IdAllocator { + fn allocate_id(&mut self) -> u64 { + self.0 += 1; + self.0 + } +} + +// RangeManger manges the ranges for RangeCacheMemoryEngine. Every new ranges +// (whether created by new_range or by splitted due to eviction) has an unique +// id so that range + id can exactly locate the position. +// When an eviction occured, say we now have k1-k10 in self.ranges and the +// eviction range is k3-k5. k1-k10 will be splitted to three ranges: k1-k3, +// k3-k5, and k5-k10. +// k1-k3 and k5-k10 will be new ranges inserted in self.ranges with meta dervied +// from meta of k1-k10 (only safe_ts and can_read will be derived). k1-k10 will +// be removed from self.ranges and inserted to self.historical_ranges. Then, +// k3-k5 will be in the self.evicted_ranges. Now, we cannot remove the data of +// k3-k5 as there may be some snapshot of k1-k10. After these snapshot are +// dropped, k3-k5 can be acutally removed. +#[derive(Default)] +pub struct RangeManager { + // Each new range will increment it by one. + id_allocator: IdAllocator, + // Range before an eviction. It is recorded due to some undropped snapshot, which block the + // evicted range deleting the relevant data. + historical_ranges: BTreeMap, + evicted_ranges: BTreeSet, + // ranges that are cached now + ranges: BTreeMap, +} + +impl RangeManager { + pub(crate) fn ranges(&self) -> &BTreeMap { + &self.ranges + } + + pub(crate) fn new_range(&mut self, range: CacheRange) { + assert!(!self.overlap_with_range(&range)); + let range_meta = RangeMeta::new(self.id_allocator.allocate_id()); + self.ranges.insert(range, range_meta); + } + + pub fn set_range_readable(&mut self, range: &CacheRange, set_readable: bool) { + let meta = self.ranges.get_mut(range).unwrap(); + meta.can_read = set_readable; + } + + pub fn set_safe_ts(&mut self, range: &CacheRange, safe_ts: u64) -> bool { + if let Some(meta) = self.ranges.get_mut(range) { + if meta.safe_point > safe_ts { + return false; + } + meta.safe_point = safe_ts; + true + } else { + false + } + } + + pub fn contains(&self, key: &[u8]) -> bool { + self.ranges.keys().any(|r| r.contains_key(key)) + } + + pub(crate) fn overlap_with_range(&self, range: &CacheRange) -> bool { + self.ranges.keys().any(|r| r.overlaps(range)) + } + + // Acquire a snapshot of the `range` with `read_ts`. If the range is not + // accessable, None will be returned. Otherwise, the range id will be returned. + pub(crate) fn range_snapshot(&mut self, range: &CacheRange, read_ts: u64) -> Option { + let Some(range_key) = self + .ranges + .keys() + .find(|&r| r.contains_range(range)) + .cloned() + else { + return None; + }; + let meta = self.ranges.get_mut(&range_key).unwrap(); + + if read_ts <= meta.safe_point || !meta.can_read { + // todo(SpadeA): add metrics for it + return None; + } + + meta.range_snapshot_list.new_snapshot(read_ts); + Some(meta.id) + } + + // If the snapshot is the last one in the snapshot list of one cache range in + // historical_ranges, it means one or some evicted_ranges may be ready to be + // removed physically. + // So, here, we return a vector of ranges to denote the ranges that are ready to + // be removed. + pub(crate) fn remove_range_snapshot( + &mut self, + snapshot_meta: &RagneCacheSnapshotMeta, + ) -> Vec { + if let Some(range_key) = self + .historical_ranges + .iter() + .find(|&(range, meta)| { + range.contains_range(&snapshot_meta.range) && meta.id == snapshot_meta.range_id + }) + .map(|(r, _)| r.clone()) + { + let meta = self.historical_ranges.get_mut(&range_key).unwrap(); + meta.range_snapshot_list + .remove_snapshot(snapshot_meta.snapshot_ts); + if meta.range_snapshot_list.is_empty() { + self.historical_ranges.remove(&range_key); + } + + return self + .evicted_ranges + .iter() + .filter(|evicted_range| { + !self + .historical_ranges + .keys() + .any(|r| r.overlaps(evicted_range)) + }) + .cloned() + .collect::>(); + } + + // It must belong to the `self.ranges` if not found in `self.historical_ranges` + let range_key = self + .ranges + .iter() + .find(|&(range, meta)| { + range.contains_range(&snapshot_meta.range) && meta.id == snapshot_meta.range_id + }) + .map(|(r, _)| r.clone()) + .unwrap(); + let meta = self.ranges.get_mut(&range_key).unwrap(); + meta.range_snapshot_list + .remove_snapshot(snapshot_meta.snapshot_ts); + vec![] + } + + // return whether the range can be already removed + pub(crate) fn evict_range(&mut self, evict_range: &CacheRange) -> bool { + let range_key = self + .ranges + .keys() + .find(|&r| r.contains_range(evict_range)) + .unwrap() + .clone(); + let meta = self.ranges.remove(&range_key).unwrap(); + let (left_range, right_range) = range_key.split_off(evict_range); + assert!((left_range.is_some() || right_range.is_some()) || &range_key == evict_range); + + if let Some(left_range) = left_range { + let left_meta = RangeMeta::derive_from(self.id_allocator.allocate_id(), &meta); + self.ranges.insert(left_range, left_meta); + } + + if let Some(right_range) = right_range { + let right_meta = RangeMeta::derive_from(self.id_allocator.allocate_id(), &meta); + self.ranges.insert(right_range, right_meta); + } + + self.evicted_ranges.insert(evict_range.clone()); + + if !meta.range_snapshot_list.is_empty() { + self.historical_ranges.insert(range_key, meta); + return false; + } + + // we also need to check with previous historical_ranges + !self + .historical_ranges + .keys() + .any(|r| r.overlaps(evict_range)) + } +} + +#[cfg(test)] +mod tests { + use engine_traits::CacheRange; + + use super::RangeManager; + + #[test] + fn test_range_manager() { + let mut range_mgr = RangeManager::default(); + let r1 = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); + + range_mgr.new_range(r1.clone()); + range_mgr.set_range_readable(&r1, true); + range_mgr.set_safe_ts(&r1, 5); + assert!(range_mgr.range_snapshot(&r1, 5).is_none()); + assert!(range_mgr.range_snapshot(&r1, 8).is_some()); + assert!(range_mgr.range_snapshot(&r1, 10).is_some()); + let tmp_r = CacheRange::new(b"k08".to_vec(), b"k15".to_vec()); + assert!(range_mgr.range_snapshot(&tmp_r, 8).is_none()); + let tmp_r = CacheRange::new(b"k10".to_vec(), b"k11".to_vec()); + assert!(range_mgr.range_snapshot(&tmp_r, 8).is_none()); + + let r_evict = CacheRange::new(b"k03".to_vec(), b"k06".to_vec()); + let r_left = CacheRange::new(b"k00".to_vec(), b"k03".to_vec()); + let r_right = CacheRange::new(b"k06".to_vec(), b"k10".to_vec()); + range_mgr.evict_range(&r_evict); + let meta1 = range_mgr.historical_ranges.get(&r1).unwrap(); + assert!(range_mgr.evicted_ranges.contains(&r_evict)); + assert!(range_mgr.ranges.get(&r1).is_none()); + let meta2 = range_mgr.ranges.get(&r_left).unwrap(); + let meta3 = range_mgr.ranges.get(&r_right).unwrap(); + assert!(meta1.safe_point == meta2.safe_point && meta1.safe_point == meta3.safe_point); + assert!(meta2.can_read && meta3.can_read); + + // evict a range with accurate match + range_mgr.range_snapshot(&r_left, 10); + range_mgr.evict_range(&r_left); + assert!(range_mgr.historical_ranges.get(&r_left).is_some()); + assert!(range_mgr.evicted_ranges.contains(&r_left)); + assert!(range_mgr.ranges.get(&r_left).is_none()); + + assert!(!range_mgr.evict_range(&r_right)); + assert!(range_mgr.historical_ranges.get(&r_right).is_none()); + } +} diff --git a/components/region_cache_memory_engine/src/write_batch.rs b/components/region_cache_memory_engine/src/write_batch.rs index 55bbb808980..31cf844ea0f 100644 --- a/components/region_cache_memory_engine/src/write_batch.rs +++ b/components/region_cache_memory_engine/src/write_batch.rs @@ -3,36 +3,36 @@ use engine_traits::{Mutable, Result, WriteBatch, WriteBatchExt, WriteOptions, CF use tikv_util::box_err; use crate::{ - engine::{cf_to_id, RegionMemoryEngine}, + engine::{cf_to_id, SkiplistEngine}, keys::{encode_key, ValueType}, - RegionCacheMemoryEngine, + RangeCacheMemoryEngine, }; /// Callback to apply an encoded entry to cache engine. /// /// Arguments: &str - cf name, Bytes - (encoded) key, Bytes - value. /// -/// TODO: consider refactoring into a trait once RegionCacheMemoryEngine API +/// TODO: consider refactoring into a trait once RangeCacheMemoryEngine API /// stabilizes. type ApplyEncodedEntryCb = Box Result<()> + Send + Sync>; -/// RegionCacheWriteBatch maintains its own in-memory buffer. -pub struct RegionCacheWriteBatch { - buffer: Vec, +/// RangeCacheWriteBatch maintains its own in-memory buffer. +pub struct RangeCacheWriteBatch { + buffer: Vec, apply_cb: ApplyEncodedEntryCb, sequence_number: Option, save_points: Vec, } -impl std::fmt::Debug for RegionCacheWriteBatch { +impl std::fmt::Debug for RangeCacheWriteBatch { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RegionCacheWriteBatch") + f.debug_struct("RangeCacheWriteBatch") .field("buffer", &self.buffer) .finish() } } -impl RegionCacheWriteBatch { +impl RangeCacheWriteBatch { pub fn new(apply_cb: ApplyEncodedEntryCb) -> Self { Self { buffer: Vec::new(), @@ -94,13 +94,13 @@ impl CacheWriteBatchEntryMutation { } } #[derive(Clone, Debug)] -struct RegionCacheWriteBatchEntry { +struct RangeCacheWriteBatchEntry { cf: String, key: Bytes, mutation: CacheWriteBatchEntryMutation, } -impl RegionCacheWriteBatchEntry { +impl RangeCacheWriteBatchEntry { pub fn put_value(cf: &str, key: &[u8], value: &[u8]) -> Self { Self { cf: cf.to_owned(), @@ -126,39 +126,39 @@ impl RegionCacheWriteBatchEntry { self.key.len() + std::mem::size_of::() + self.mutation.data_size() } } -impl RegionCacheMemoryEngine { +impl RangeCacheMemoryEngine { fn apply_cb(&self) -> ApplyEncodedEntryCb { // TODO: use the stabilized API for appending to the skip list here. Box::new(|_cf, _key, _value| Ok(())) } } -impl From<&RegionMemoryEngine> for RegionCacheWriteBatch { - fn from(engine: &RegionMemoryEngine) -> Self { +impl From<&SkiplistEngine> for RangeCacheWriteBatch { + fn from(engine: &SkiplistEngine) -> Self { let engine_clone = engine.clone(); let apply_cb = Box::new(move |cf: &'_ str, key, value| { engine_clone.data[cf_to_id(cf)].put(key, value); Ok(()) }); - RegionCacheWriteBatch::new(apply_cb) + RangeCacheWriteBatch::new(apply_cb) } } -impl WriteBatchExt for RegionCacheMemoryEngine { - type WriteBatch = RegionCacheWriteBatch; +impl WriteBatchExt for RangeCacheMemoryEngine { + type WriteBatch = RangeCacheWriteBatch; // todo: adjust it const WRITE_BATCH_MAX_KEYS: usize = 256; fn write_batch(&self) -> Self::WriteBatch { - RegionCacheWriteBatch::new(self.apply_cb()) + RangeCacheWriteBatch::new(self.apply_cb()) } fn write_batch_with_cap(&self, cap: usize) -> Self::WriteBatch { - RegionCacheWriteBatch::with_capacity(self.apply_cb(), cap) + RangeCacheWriteBatch::with_capacity(self.apply_cb(), cap) } } -impl WriteBatch for RegionCacheWriteBatch { +impl WriteBatch for RangeCacheWriteBatch { fn write_opt(&mut self, _: &WriteOptions) -> Result { self.sequence_number .map(|seq| self.write_impl(seq).map(|()| seq)) @@ -169,7 +169,7 @@ impl WriteBatch for RegionCacheWriteBatch { fn data_size(&self) -> usize { self.buffer .iter() - .map(RegionCacheWriteBatchEntry::data_size) + .map(RangeCacheWriteBatchEntry::data_size) .sum() } @@ -217,14 +217,14 @@ impl WriteBatch for RegionCacheWriteBatch { } } -impl Mutable for RegionCacheWriteBatch { +impl Mutable for RangeCacheWriteBatch { fn put(&mut self, key: &[u8], val: &[u8]) -> Result<()> { self.put_cf(CF_DEFAULT, key, val) } fn put_cf(&mut self, cf: &str, key: &[u8], val: &[u8]) -> Result<()> { self.buffer - .push(RegionCacheWriteBatchEntry::put_value(cf, key, val)); + .push(RangeCacheWriteBatchEntry::put_value(cf, key, val)); Ok(()) } @@ -234,7 +234,7 @@ impl Mutable for RegionCacheWriteBatch { fn delete_cf(&mut self, cf: &str, key: &[u8]) -> Result<()> { self.buffer - .push(RegionCacheWriteBatchEntry::deletion(cf, key)); + .push(RangeCacheWriteBatchEntry::deletion(cf, key)); Ok(()) } @@ -249,14 +249,16 @@ impl Mutable for RegionCacheWriteBatch { #[cfg(test)] mod tests { - use engine_traits::{Peekable, RegionCacheEngine, WriteBatch}; + use std::sync::Arc; + + use engine_traits::{CacheRange, Peekable, RangeCacheEngine, WriteBatch}; use super::*; #[test] fn test_write_to_skiplist() { - let engine = RegionMemoryEngine::default(); - let mut wb = RegionCacheWriteBatch::from(&engine); + let engine = SkiplistEngine::new(Arc::default()); + let mut wb = RangeCacheWriteBatch::from(&engine); wb.put(b"aaa", b"bbb").unwrap(); wb.set_sequence_number(1).unwrap(); assert_eq!(wb.write().unwrap(), 1); @@ -267,8 +269,8 @@ mod tests { #[test] fn test_savepoints() { - let engine = RegionMemoryEngine::default(); - let mut wb = RegionCacheWriteBatch::from(&engine); + let engine = SkiplistEngine::new(Arc::default()); + let mut wb = RangeCacheWriteBatch::from(&engine); wb.put(b"aaa", b"bbb").unwrap(); wb.set_save_point(); wb.put(b"aaa", b"ccc").unwrap(); @@ -284,15 +286,16 @@ mod tests { #[test] fn test_put_write_clear_delete_put_write() { - let engine = RegionCacheMemoryEngine::default(); - engine.new_region(1); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let r = CacheRange::new(b"".to_vec(), b"z".to_vec()); + engine.new_range(r.clone()); let engine_for_writes = { let mut core = engine.core.lock().unwrap(); - core.region_metas.get_mut(&1).unwrap().can_read = true; - core.region_metas.get_mut(&1).unwrap().safe_ts = 10; - core.engine.get_mut(&1).unwrap().clone() + core.mut_range_manager().set_range_readable(&r, true); + core.mut_range_manager().set_safe_ts(&r, 10); + core.engine() }; - let mut wb = RegionCacheWriteBatch::from(&engine_for_writes); + let mut wb = RangeCacheWriteBatch::from(&engine_for_writes); wb.put(b"aaa", b"bbb").unwrap(); wb.set_sequence_number(1).unwrap(); _ = wb.write().unwrap(); @@ -301,7 +304,7 @@ mod tests { wb.delete(b"aaa").unwrap(); wb.set_sequence_number(2).unwrap(); _ = wb.write().unwrap(); - let snapshot = engine.snapshot(1, u64::MAX, 2).unwrap(); + let snapshot = engine.snapshot(r, u64::MAX, 2).unwrap(); assert_eq!( snapshot.get_value(&b"bbb"[..]).unwrap().unwrap(), &b"ccc"[..] diff --git a/components/server/src/common.rs b/components/server/src/common.rs index 2c43abccf44..49d9a1a865c 100644 --- a/components/server/src/common.rs +++ b/components/server/src/common.rs @@ -31,7 +31,7 @@ use grpcio::Environment; use hybrid_engine::HybridEngine; use pd_client::{PdClient, RpcClient}; use raft_log_engine::RaftLogEngine; -use region_cache_memory_engine::RegionCacheMemoryEngine; +use region_cache_memory_engine::RangeCacheMemoryEngine; use security::SecurityManager; use tikv::{ config::{ConfigController, DbConfigManger, DbType, TikvConfig}, @@ -709,7 +709,7 @@ impl KvEngineBuilder for RocksEngine { } } -impl KvEngineBuilder for HybridEngine { +impl KvEngineBuilder for HybridEngine { fn build(_disk_engine: RocksEngine) -> Self { unimplemented!() } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index bc0769b751a..5856563b49e 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -73,7 +73,7 @@ use raftstore::{ }, RaftRouterCompactedEventSender, }; -use region_cache_memory_engine::RegionCacheMemoryEngine; +use region_cache_memory_engine::RangeCacheMemoryEngine; use resolved_ts::{LeadershipResolver, Task}; use resource_control::ResourceGroupManager; use security::SecurityManager; @@ -224,7 +224,7 @@ pub fn run_tikv( if cfg!(feature = "memory-engine") && config.region_cache_memory_limit != ReadableSize(0) { - run_impl::, RocksEngine, API>( + run_impl::, RocksEngine, API>( config, service_event_tx, service_event_rx, @@ -240,7 +240,7 @@ pub fn run_tikv( if cfg!(feature = "memory-engine") && config.region_cache_memory_limit != ReadableSize(0) { - run_impl::, RaftLogEngine, API>( + run_impl::, RaftLogEngine, API>( config, service_event_tx, service_event_rx, diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index db438d4233a..1c4296d59db 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -526,7 +526,7 @@ pub fn new_node_cluster(id: u64, count: usize) -> Cluster; +pub type HybridEngineImpl = HybridEngine; pub fn must_get( engine: &impl RawEngine, diff --git a/src/server/raftkv/mod.rs b/src/server/raftkv/mod.rs index 883b0e2d684..04fb41ec0ac 100644 --- a/src/server/raftkv/mod.rs +++ b/src/server/raftkv/mod.rs @@ -648,7 +648,7 @@ where let snap_ctx = ctx.start_ts.map(|ts| SnapshotContext { read_ts: ts.into_inner(), - region_id: ctx.pb_ctx.get_region_id(), + range: None, }); if res.is_ok() { From 1fbdf652ef9747cdb6a360ab302943114e8535c4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Fri, 19 Jan 2024 14:19:47 +0800 Subject: [PATCH 1124/1149] logger: fix missing log fields (#16411) close tikv/tikv#16410 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/tikv_util/src/logger/mod.rs | 40 ++++++++++++-------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/components/tikv_util/src/logger/mod.rs b/components/tikv_util/src/logger/mod.rs index 037465cc18b..3af4213500b 100644 --- a/components/tikv_util/src/logger/mod.rs +++ b/components/tikv_util/src/logger/mod.rs @@ -84,12 +84,6 @@ where threshold, inner: drain, }; - // ThreadIDrain discards all previous `slog::OwnedKVList`, anything that - // wraps it should not pass `slog::OwnedKVList`. - // - // NB: slog macros (slog::info!() and others) only produce one - // `slog::Record`, `slog::OwnedKVList` are provided by `slog::Drain` and - // `slog::Logger`. let drain = ThreadIDrain(drain); // Let GlobalLevelFilter wrap ThreadIDrain, so that it saves getting // thread id for flittered logs. @@ -651,12 +645,13 @@ where { type Ok = D::Ok; type Err = D::Err; - fn log(&self, record: &Record<'_>, _: &OwnedKVList) -> Result { - let thread_id = std::thread::current().id().as_u64().get(); - self.0.log( - record, - &OwnedKVList::from(slog::o!("thread_id" => thread_id)), - ) + fn log(&self, record: &Record<'_>, values: &OwnedKVList) -> Result { + let values = slog::o!( + "thread_id" => std::thread::current().id().as_u64().get(), + // OwnedKVList is essentially an Arc, clone is cheap. + values.clone(), + ); + self.0.log(record, &OwnedKVList::from(values)) } } @@ -789,22 +784,23 @@ mod tests { let decorator = PlainSyncDecorator::new(TestWriter(buffer.clone())); let drain = TikvFormat::new(decorator, true).fuse(); let drain = ThreadIDrain(drain); + let drain = slog::Logger::root_typed(drain, slog_o!("raft_id" => 1)).into_erased(); let logger = slog::Logger::root_typed(drain, slog_o!()).into_erased(); log_format_cases(logger); let thread_id = std::thread::current().id().as_u64(); let expect = format!( - r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:470] ["Welcome TiKV"] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:471] [欢迎] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:472] ["欢迎 TiKV"] [thread_id={0}] -[2019/01/15 13:40:39.615 +08:00] [INFO] [mod.rs:455] ["failed to fetch URL"] [backoff=3s] [attempt=3] [url=http://example.com] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:460] ["failed to \"fetch\" [URL]: http://example.com"] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [DEBUG] [mod.rs:463] ["Slow query"] ["process keys"=1500] [duration=123ns] [sql="SELECT * FROM TABLE WHERE ID=\"abc\""] [thread_id={0}] -[2019/01/15 13:40:39.619 +08:00] [WARN] [mod.rs:473] [Type] [Other=-inf] [Score=inf] [Counter=NaN] [thread_id={0}] -[2019/01/16 16:56:04.854 +08:00] [INFO] [mod.rs:391] ["more type tests"] [str_array="[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]"] [u8=34] [is_None=None] [is_false=false] [is_true=true] ["store ids"="[1, 2, 3]"] [url-peers="[\"peer1\", \"peer 2\"]"] [urls="[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]"] [field2="in quote"] [field1=no_quote] [thread_id={0}] + r#"[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:469] [Welcome] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:470] ["Welcome TiKV"] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:471] [欢迎] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:472] ["欢迎 TiKV"] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.615 +08:00] [INFO] [mod.rs:455] ["failed to fetch URL"] [backoff=3s] [attempt=3] [url=http://example.com] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [INFO] [mod.rs:460] ["failed to \"fetch\" [URL]: http://example.com"] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [DEBUG] [mod.rs:463] ["Slow query"] ["process keys"=1500] [duration=123ns] [sql="SELECT * FROM TABLE WHERE ID=\"abc\""] [raft_id=1] [thread_id={0}] +[2019/01/15 13:40:39.619 +08:00] [WARN] [mod.rs:473] [Type] [Other=-inf] [Score=inf] [Counter=NaN] [raft_id=1] [thread_id={0}] +[2019/01/16 16:56:04.854 +08:00] [INFO] [mod.rs:391] ["more type tests"] [str_array="[\"💖\", \"�\", \"☺☻☹\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©\", \"\\\\x80\\\\x80\\\\x80\\\\x80\", \"XML\"]"] [u8=34] [is_None=None] [is_false=false] [is_true=true] ["store ids"="[1, 2, 3]"] [url-peers="[\"peer1\", \"peer 2\"]"] [urls="[\"http://xxx.com:2347\", \"http://xxx.com:2432\"]"] [field2="in quote"] [field1=no_quote] [raft_id=1] [thread_id={0}] "#, thread_id ); From 6ef0d504cc736425fd8245f6c3a319ea08a1a7d5 Mon Sep 17 00:00:00 2001 From: Connor Date: Fri, 19 Jan 2024 14:49:48 +0800 Subject: [PATCH 1125/1149] config: Change titan min blob size default value to 32KB (#16402) close tikv/tikv#16370 Change titan min blob size default value to 32KB Signed-off-by: Connor1996 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- components/cdc/src/initializer.rs | 6 +- components/server/src/setup.rs | 18 +- components/test_raftstore/src/util.rs | 2 +- etc/config-template.toml | 5 +- src/config/mod.rs | 375 +++++++++++++++++------ tests/integrations/config/mod.rs | 8 +- tests/integrations/storage/test_titan.rs | 2 +- 7 files changed, 298 insertions(+), 118 deletions(-) diff --git a/components/cdc/src/initializer.rs b/components/cdc/src/initializer.rs index 551b01ad83e..41997252c6b 100644 --- a/components/cdc/src/initializer.rs +++ b/components/cdc/src/initializer.rs @@ -1127,11 +1127,11 @@ mod tests { let mut cfg = DbConfig::default(); cfg.titan.enabled = Some(true); cfg.defaultcf.titan.blob_run_mode = BlobRunMode::Normal; - cfg.defaultcf.titan.min_blob_size = ReadableSize(0); + cfg.defaultcf.titan.min_blob_size = Some(ReadableSize(0)); cfg.writecf.titan.blob_run_mode = BlobRunMode::Normal; - cfg.writecf.titan.min_blob_size = ReadableSize(0); + cfg.writecf.titan.min_blob_size = Some(ReadableSize(0)); cfg.lockcf.titan.blob_run_mode = BlobRunMode::Normal; - cfg.lockcf.titan.min_blob_size = ReadableSize(0); + cfg.lockcf.titan.min_blob_size = Some(ReadableSize(0)); let mut engine = TestEngineBuilder::new().build_with_cfg(&cfg).unwrap(); must_prewrite_put(&mut engine, b"zkey", b"value", b"zkey", 100); diff --git a/components/server/src/setup.rs b/components/server/src/setup.rs index 0228e0c7f28..53981385265 100644 --- a/components/server/src/setup.rs +++ b/components/server/src/setup.rs @@ -11,7 +11,7 @@ use chrono::Local; use clap::ArgMatches; use collections::HashMap; use fail; -use tikv::config::{check_critical_config, persist_config, MetricConfig, TikvConfig}; +use tikv::config::{MetricConfig, TikvConfig}; use tikv_util::{self, config, logger}; // A workaround for checking if log is initialized. @@ -303,21 +303,9 @@ pub fn overwrite_config_with_cmd_args(config: &mut TikvConfig, matches: &ArgMatc } } -#[allow(dead_code)] pub fn validate_and_persist_config(config: &mut TikvConfig, persist: bool) { - config.compatible_adjust(); - if let Err(e) = config.validate() { - fatal!("invalid configuration: {}", e); - } - - if let Err(e) = check_critical_config(config) { - fatal!("critical config check failed: {}", e); - } - - if persist { - if let Err(e) = persist_config(config) { - fatal!("persist critical config failed: {}", e); - } + if let Err(e) = tikv::config::validate_and_persist_config(config, persist) { + fatal!("failed to validate config: {}", e); } } diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index fbeeecbfac2..91e34ce0699 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -776,7 +776,7 @@ pub fn configure_for_enable_titan>( cluster.cfg.rocksdb.titan.enabled = Some(true); cluster.cfg.rocksdb.titan.purge_obsolete_files_period = ReadableDuration::secs(1); cluster.cfg.rocksdb.titan.max_background_gc = 10; - cluster.cfg.rocksdb.defaultcf.titan.min_blob_size = min_blob_size; + cluster.cfg.rocksdb.defaultcf.titan.min_blob_size = Some(min_blob_size); cluster.cfg.rocksdb.defaultcf.titan.blob_run_mode = BlobRunMode::Normal; cluster.cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize::kb(0); } diff --git a/etc/config-template.toml b/etc/config-template.toml index e5a8e621dca..7482fbb9e5d 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -673,7 +673,6 @@ ## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once ## enabled, it can't fall back. Forced fallback may result in data loss. ## Titan is default on since v7.6.0. This won't affect deployments existed before v7.6.0. -## default: true # enabled = true ## Maximum number of threads of `Titan` background gc jobs. @@ -928,8 +927,8 @@ [rocksdb.defaultcf.titan] ## The smallest value to store in blob files. Value smaller than ## this threshold will be inlined in base DB. -## default: 1KB -# min-blob-size = "1KB" +## The default value is 32KB since v7.6.0. But it won't affect deployments existed before v7.6.0 of which the default value is 1KB. +# min-blob-size = "32KB" ## The compression algorithm used to compress data in blob files. ## Compression method. diff --git a/src/config/mod.rs b/src/config/mod.rs index f5cb4b633b6..5768e9be15a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -132,7 +132,7 @@ fn bloom_filter_ratio(et: EngineType) -> f64 { #[serde(rename_all = "kebab-case")] pub struct TitanCfConfig { #[online_config(skip)] - pub min_blob_size: ReadableSize, + pub min_blob_size: Option, #[online_config(skip)] pub blob_file_compression: CompressionType, #[online_config(skip)] @@ -166,12 +166,15 @@ pub struct TitanCfConfig { #[deprecated = "Titan doesn't need to sample anymore"] pub sample_ratio: Option, } +const DEFAULT_MIN_BLOB_SIZE: ReadableSize = ReadableSize::kb(32); impl Default for TitanCfConfig { #[allow(deprecated)] fn default() -> Self { Self { - min_blob_size: ReadableSize::kb(1), + min_blob_size: None, /* 32KB for newly created instances, and keep config value from + * old installation. + * The logic is in `optional_default_cfg_adjust_with` */ blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), @@ -190,9 +193,16 @@ impl Default for TitanCfConfig { } impl TitanCfConfig { + fn default_for_disabled() -> Self { + Self { + blob_run_mode: BlobRunMode::ReadOnly, + ..Default::default() + } + } + fn build_opts(&self) -> RocksTitanDbOptions { let mut opts = RocksTitanDbOptions::new(); - opts.set_min_blob_size(self.min_blob_size.0); + opts.set_min_blob_size(self.min_blob_size.unwrap_or(DEFAULT_MIN_BLOB_SIZE).0); opts.set_blob_file_compression(self.blob_file_compression.into()); // To try zstd dict compression, set dict size to 4k, sample size to 100X dict // size @@ -560,7 +570,7 @@ macro_rules! write_into_metrics { // Titan specific metrics. $metrics .with_label_values(&[$tag, "titan_min_blob_size"]) - .set($cf.titan.min_blob_size.0 as f64); + .set($cf.titan.min_blob_size.unwrap_or_default().0 as f64); $metrics .with_label_values(&[$tag, "titan_blob_cache_size"]) .set($cf.titan.blob_cache_size.0 as f64); @@ -874,12 +884,6 @@ cf_config!(WriteCfConfig); impl Default for WriteCfConfig { fn default() -> WriteCfConfig { - // Setting blob_run_mode=read_only effectively disable Titan. - let titan = TitanCfConfig { - blob_run_mode: BlobRunMode::ReadOnly, - ..Default::default() - }; - WriteCfConfig { block_size: ReadableSize::kb(32), block_cache_size: None, @@ -939,7 +943,7 @@ impl Default for WriteCfConfig { max_compactions: None, ttl: None, periodic_compaction_seconds: None, - titan, + titan: TitanCfConfig::default_for_disabled(), write_buffer_limit: None, } } @@ -1008,12 +1012,6 @@ cf_config!(LockCfConfig); impl Default for LockCfConfig { fn default() -> LockCfConfig { - // Setting blob_run_mode=read_only effectively disable Titan. - let titan = TitanCfConfig { - blob_run_mode: BlobRunMode::ReadOnly, - ..Default::default() - }; - LockCfConfig { block_size: ReadableSize::kb(16), block_cache_size: None, @@ -1065,7 +1063,7 @@ impl Default for LockCfConfig { max_compactions: None, ttl: None, periodic_compaction_seconds: None, - titan, + titan: TitanCfConfig::default_for_disabled(), write_buffer_limit: None, } } @@ -1112,11 +1110,6 @@ cf_config!(RaftCfConfig); impl Default for RaftCfConfig { fn default() -> RaftCfConfig { - // Setting blob_run_mode=read_only effectively disable Titan. - let titan = TitanCfConfig { - blob_run_mode: BlobRunMode::ReadOnly, - ..Default::default() - }; RaftCfConfig { block_size: ReadableSize::kb(16), block_cache_size: None, @@ -1168,7 +1161,7 @@ impl Default for RaftCfConfig { max_compactions: None, ttl: None, periodic_compaction_seconds: None, - titan, + titan: TitanCfConfig::default_for_disabled(), write_buffer_limit: None, } } @@ -1380,12 +1373,7 @@ impl Default for DbConfig { } impl DbConfig { - pub fn optimize_for( - &mut self, - storage_config: &StorageConfig, - kv_data_exists: bool, - is_titan_dir_empty: bool, - ) { + pub fn optimize_for(&mut self, storage_config: &StorageConfig) { match storage_config.engine { EngineType::RaftKv => { self.allow_concurrent_memtable_write.get_or_insert(true); @@ -1397,15 +1385,6 @@ impl DbConfig { if self.lockcf.write_buffer_size.is_none() { self.lockcf.write_buffer_size = Some(ReadableSize::mb(32)); } - if self.titan.enabled.is_none() { - // If the user doesn't specify titan.enabled, we enable it by default for newly - // created clusters. - if (kv_data_exists && is_titan_dir_empty) || storage_config.enable_ttl { - self.titan.enabled = Some(false); - } else { - self.titan.enabled = Some(true); - } - } } EngineType::RaftKv2 => { self.enable_multi_batch_write.get_or_insert(false); @@ -1655,6 +1634,12 @@ impl DbConfig { if self.raftcf.write_buffer_limit.is_some() { return Err("raftcf does not support cf based write buffer manager".into()); } + if self.writecf.titan.blob_run_mode != BlobRunMode::ReadOnly { + return Err( + "writecf does not support enabling Titan due to compaction filter incompatibility" + .into(), + ); + } if self.enable_unordered_write { if let Some(true) = self.titan.enabled { return Err("RocksDB.unordered_write does not support Titan".into()); @@ -3608,7 +3593,6 @@ impl TikvConfig { return Err("raft_engine.config.dir can't be same as raft_store.raftdb_path".into()); } // Newly created dbs will be optimized with certain options. e.g. Titan. - let mut is_titan_dir_empty = true; let kv_data_exists = match self.storage.engine { EngineType::RaftKv => { let kv_db_path = self.infer_kv_engine_path(None)?; @@ -3645,7 +3629,6 @@ impl TikvConfig { if let Err(e) = tikv_util::config::check_data_dir_empty(titandb_path.to_str().unwrap(), "blob") { - is_titan_dir_empty = false; if let Some(false) = self.rocksdb.titan.enabled { // If Titan is disabled explicitly but Titan's data directory is not empty, // return an error. @@ -3680,8 +3663,7 @@ impl TikvConfig { .validate(kv_data_exists)?; // Optimize. - self.rocksdb - .optimize_for(&self.storage, kv_data_exists, is_titan_dir_empty); + self.rocksdb.optimize_for(&self.storage); self.coprocessor .optimize_for(self.storage.engine == EngineType::RaftKv2); self.split @@ -3920,6 +3902,37 @@ impl TikvConfig { Ok(()) } + fn titan_data_exists(&self) -> Result> { + let exist = match self.storage.engine { + EngineType::RaftKv => { + // Check blob file dir is empty when titan is disabled + let titandb_path = if self.rocksdb.titan.dirname.is_empty() { + let kv_db_path = self.infer_kv_engine_path(None)?; + Path::new(&kv_db_path).join("titandb") + } else { + Path::new(&self.rocksdb.titan.dirname).to_path_buf() + }; + tikv_util::config::check_data_dir_empty(titandb_path.to_str().unwrap(), "blob") + .is_err() + } + EngineType::RaftKv2 => false, + }; + Ok(exist) + } + + fn kv_data_exists(&self) -> Result> { + let kv_data_exists = match self.storage.engine { + EngineType::RaftKv => { + let kv_db_path = self.infer_kv_engine_path(None)?; + RocksEngine::exists(&kv_db_path) + } + EngineType::RaftKv2 => Path::new(&self.storage.data_dir) + .join(DEFAULT_TABLET_SUB_DIR) + .exists(), + }; + Ok(kv_data_exists) + } + // As the init of `logger` is very early, this adjust needs to be separated and // called immediately after parsing the command line. #[allow(deprecated)] @@ -3974,6 +3987,55 @@ impl TikvConfig { } } + pub fn optional_default_cfg_adjust_with( + &mut self, + last_cfg: &Option, + ) -> Result<(), Box> { + let kv_data_exists = self.kv_data_exists()?; + let titan_data_exists = self.titan_data_exists()?; + + match self.storage.engine { + EngineType::RaftKv => { + if self.rocksdb.titan.enabled.is_none() { + // If the user doesn't specify titan.enabled, we enable it by default for newly + // created clusters. + if (kv_data_exists && !titan_data_exists) || self.storage.enable_ttl { + self.rocksdb.titan.enabled = Some(false); + } else { + self.rocksdb.titan.enabled = Some(true); + } + } + if self.rocksdb.defaultcf.titan.min_blob_size.is_none() { + // get blob size from last config + self.rocksdb.defaultcf.titan.min_blob_size = + Some(if let Some(last_cfg) = &last_cfg { + // If previous config has titan enabled, we use the previous + // min-blob-size. + if last_cfg.rocksdb.titan.enabled.unwrap_or(false) { + last_cfg + .rocksdb + .defaultcf + .titan + .min_blob_size + .unwrap_or(DEFAULT_MIN_BLOB_SIZE) + } else { + // If previous config has titan disabled, we use the current default + // value + DEFAULT_MIN_BLOB_SIZE + } + } else { + DEFAULT_MIN_BLOB_SIZE + }); + } + } + EngineType::RaftKv2 => { + self.rocksdb.titan.enabled = Some(false); + self.rocksdb.defaultcf.titan.min_blob_size = Some(DEFAULT_MIN_BLOB_SIZE); + } + } + Ok(()) + } + #[allow(deprecated)] pub fn compatible_adjust(&mut self) { let default_raft_store = RaftstoreConfig::default(); @@ -4229,7 +4291,21 @@ impl TikvConfig { let tmp = tempfile::tempdir()?; let mut cfg = TikvConfig::default(); cfg.storage.data_dir = tmp.path().display().to_string(); - cfg.cfg_path = tmp.path().join(LAST_CONFIG_FILE).display().to_string(); + cfg.cfg_path = tmp.path().join("config.toml").display().to_string(); + + // create tmp config file + let mut f = fs::File::create(&cfg.cfg_path)?; + // write storage data dir to tmp config file + let content = format!( + r#" + [storage] + data-dir = "{}" + "#, + cfg.storage.data_dir, + ); + f.write_all(content.as_bytes())?; + f.sync_all()?; + Ok((cfg, tmp)) } @@ -4258,25 +4334,43 @@ impl TikvConfig { } } -/// Prevents launching with an incompatible configuration -/// -/// Loads the previously-loaded configuration from `last_tikv.toml`, -/// compares key configuration items and fails if they are not -/// identical. -pub fn check_critical_config(config: &TikvConfig) -> Result<(), String> { +pub fn validate_and_persist_config(config: &mut TikvConfig, persist: bool) -> Result<(), String> { // Check current critical configurations with last time, if there are some // changes, user must guarantee relevant works have been done. - if let Some(mut cfg) = get_last_config(&config.storage.data_dir) { - cfg.compatible_adjust(); - if let Err(e) = cfg.validate() { + let mut last_cfg = get_last_config(&config.storage.data_dir); + if let Some(last_cfg) = &mut last_cfg { + last_cfg.compatible_adjust(); + if let Err(e) = last_cfg.validate() { warn!("last_tikv.toml is invalid but ignored: {:?}", e); } - config.check_critical_cfg_with(&cfg)?; + } + + config.compatible_adjust(); + if let Err(e) = config.validate() { + return Err(format!("invalid configuration: {}", e)); + } + if let Err(e) = config.optional_default_cfg_adjust_with(&last_cfg) { + return Err(format!( + "failed to adjust optional default configuration: {}", + e + )); + } + + if let Some(ref last_cfg) = last_cfg { + if let Err(e) = config.check_critical_cfg_with(last_cfg) { + return Err(format!("critical config check failed: {}", e)); + } + } + + if persist { + if let Err(e) = persist_config(config) { + return Err(format!("persist critical config failed: {}", e)); + } } Ok(()) } -fn get_last_config(data_dir: &str) -> Option { +pub fn get_last_config(data_dir: &str) -> Option { let store_path = Path::new(data_dir); let last_cfg_path = store_path.join(LAST_CONFIG_FILE); if last_cfg_path.exists() { @@ -5827,7 +5921,7 @@ mod tests { #[test] fn test_update_titan_blob_run_mode_config() { - let mut cfg = TikvConfig::default(); + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); cfg.rocksdb.titan.enabled = Some(true); let (_, cfg_controller, ..) = new_engines::(cfg); for run_mode in [ @@ -5863,42 +5957,139 @@ mod tests { } #[test] - fn test_titan_auto_enable() { - // Do not auto enable titan for existing instances - let (cfg, dir) = TikvConfig::with_tmp().unwrap(); - persist_config(&cfg).unwrap(); - let (storage, ..) = new_engines::(cfg); - drop(storage); - let mut cfg = TikvConfig::from_file(&dir.path().join(LAST_CONFIG_FILE), None).unwrap(); - // titan.enabled is not specified. - assert_eq!(cfg.rocksdb.titan.enabled, None); - cfg.validate().unwrap(); - // Config optimized with titan.enabled = false, since it is an existing - // instance. - assert_eq!(cfg.rocksdb.titan.enabled, Some(false)); - let (_storage, cfg_controller, ..) = new_engines::(cfg); - assert_eq!( - cfg_controller.get_current().rocksdb.titan.enabled, - Some(false) - ); - drop(dir); + fn test_titan_config_compatible_upgrade() { + // Case 1: Upgrade from existing instance with titan disabled + { + // Mock a existing instance with titan disabled + let (mut cfg, dir) = TikvConfig::with_tmp().unwrap(); + cfg.rocksdb.titan.enabled = Some(false); + cfg.rocksdb.defaultcf.titan.min_blob_size = Some(ReadableSize::kb(1)); + persist_config(&cfg).unwrap(); + + let (storage, ..) = new_engines::(cfg); + drop(storage); + + let mut cfg = TikvConfig::from_file(&dir.path().join("config.toml"), None).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, None); + validate_and_persist_config(&mut cfg, true).unwrap(); + // Titan is kept disabled + assert_eq!(cfg.rocksdb.titan.enabled, Some(false)); + assert_eq!( + cfg.rocksdb.defaultcf.titan.min_blob_size, + Some(ReadableSize::kb(32)), + ); + let (_storage, cfg_controller, ..) = new_engines::(cfg); + assert_eq!( + cfg_controller.get_current().rocksdb.titan.enabled, + Some(false) + ); + assert_eq!( + cfg_controller + .get_current() + .rocksdb + .defaultcf + .titan + .min_blob_size, + Some(ReadableSize::kb(32)), + ); + drop(dir); + } - // Auto enable titan for new instances - let (mut cfg, dir) = TikvConfig::with_tmp().unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, None); - cfg.validate().unwrap(); - persist_config(&cfg).unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); - let (storage, cfg_controller, ..) = new_engines::(cfg); - assert_eq!( - cfg_controller.get_current().rocksdb.titan.enabled, - Some(true) - ); - drop(storage); - // The config is persisted - let cfg = TikvConfig::from_file(&dir.path().join(LAST_CONFIG_FILE), None).unwrap(); - assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); - drop(dir); + // Case 2: Upgrade from existing instance with titan enabled + { + // Mock a existing instance with titan enabled + let (mut cfg, dir) = TikvConfig::with_tmp().unwrap(); + cfg.rocksdb.titan.enabled = Some(true); + cfg.rocksdb.defaultcf.titan.min_blob_size = Some(ReadableSize::kb(1)); + persist_config(&cfg).unwrap(); + let (storage, ..) = new_engines::(cfg.clone()); + drop(storage); + + let mut cfg = TikvConfig::from_file(&dir.path().join("config.toml"), None).unwrap(); + cfg.rocksdb.titan.enabled = Some(true); + validate_and_persist_config(&mut cfg, true).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); + // The min blob size is kept + assert_eq!( + cfg.rocksdb.defaultcf.titan.min_blob_size, + Some(ReadableSize::kb(1)), + ); + let (_storage, cfg_controller, ..) = new_engines::(cfg); + assert_eq!( + cfg_controller.get_current().rocksdb.titan.enabled, + Some(true) + ); + assert_eq!( + cfg_controller + .get_current() + .rocksdb + .defaultcf + .titan + .min_blob_size, + Some(ReadableSize::kb(1)), + ); + } + + // Case 3: Upgrade from existing instance with titan disabled and enable it now + { + // Mock a existing instance with titan disabled + let (mut cfg, dir) = TikvConfig::with_tmp().unwrap(); + cfg.rocksdb.titan.enabled = Some(false); + cfg.rocksdb.defaultcf.titan.min_blob_size = Some(ReadableSize::kb(1)); + persist_config(&cfg).unwrap(); + let (storage, ..) = new_engines::(cfg); + drop(storage); + + let mut cfg = TikvConfig::from_file(&dir.path().join("config.toml"), None).unwrap(); + cfg.rocksdb.titan.enabled = Some(true); + validate_and_persist_config(&mut cfg, true).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); + // The min blob size is current default value + assert_eq!( + cfg.rocksdb.defaultcf.titan.min_blob_size, + Some(ReadableSize::kb(32)), + ); + let (_storage, cfg_controller, ..) = new_engines::(cfg); + assert_eq!( + cfg_controller.get_current().rocksdb.titan.enabled, + Some(true) + ); + assert_eq!( + cfg_controller + .get_current() + .rocksdb + .defaultcf + .titan + .min_blob_size, + Some(ReadableSize::kb(32)), + ); + } + + // Case 4: Create a new instance + { + let (mut cfg, _dir) = TikvConfig::with_tmp().unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, None); + validate_and_persist_config(&mut cfg, true).unwrap(); + assert_eq!(cfg.rocksdb.titan.enabled, Some(true)); + assert_eq!( + cfg.rocksdb.defaultcf.titan.min_blob_size, + Some(ReadableSize::kb(32)), + ); + let (_storage, cfg_controller, ..) = new_engines::(cfg); + assert_eq!( + cfg_controller.get_current().rocksdb.titan.enabled, + Some(true) + ); + assert_eq!( + cfg_controller + .get_current() + .rocksdb + .defaultcf + .titan + .min_blob_size, + Some(ReadableSize::kb(32)), + ); + } } #[test] @@ -6767,6 +6958,8 @@ mod tests { cfg.rocksdb.raftcf.max_compactions = None; cfg.raftdb.defaultcf.max_compactions = None; + cfg.rocksdb.defaultcf.titan.min_blob_size = None; + cfg.coprocessor .optimize_for(default_cfg.storage.engine == EngineType::RaftKv2); diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 6ba675082ff..798d7fd224d 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -272,7 +272,7 @@ fn test_serde_custom_tikv_config() { }; value.pd = PdConfig::new(vec!["example.com:443".to_owned()]); let titan_cf_config = TitanCfConfig { - min_blob_size: ReadableSize(2018), + min_blob_size: Some(ReadableSize(2018)), blob_file_compression: CompressionType::Lz4, zstd_dict_size: ReadableSize::kb(16), blob_cache_size: ReadableSize::gb(12), @@ -432,7 +432,7 @@ fn test_serde_custom_tikv_config() { hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: TitanCfConfig { - min_blob_size: ReadableSize(1024), // default value + min_blob_size: None, // default value blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), @@ -506,7 +506,7 @@ fn test_serde_custom_tikv_config() { hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: TitanCfConfig { - min_blob_size: ReadableSize(1024), // default value + min_blob_size: None, // default value blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), @@ -580,7 +580,7 @@ fn test_serde_custom_tikv_config() { hard_pending_compaction_bytes_limit: Some(ReadableSize::gb(12)), force_consistency_checks: true, titan: TitanCfConfig { - min_blob_size: ReadableSize(1024), // default value + min_blob_size: None, // default value blob_file_compression: CompressionType::Zstd, zstd_dict_size: ReadableSize::kb(0), blob_cache_size: ReadableSize::mb(0), diff --git a/tests/integrations/storage/test_titan.rs b/tests/integrations/storage/test_titan.rs index 0cd6c631633..62b019234ae 100644 --- a/tests/integrations/storage/test_titan.rs +++ b/tests/integrations/storage/test_titan.rs @@ -158,7 +158,7 @@ fn test_delete_files_in_range_for_titan() { cfg.rocksdb.defaultcf.dynamic_level_bytes = false; cfg.rocksdb.defaultcf.titan.min_gc_batch_size = ReadableSize(0); cfg.rocksdb.defaultcf.titan.discardable_ratio = 0.4; - cfg.rocksdb.defaultcf.titan.min_blob_size = ReadableSize(0); + cfg.rocksdb.defaultcf.titan.min_blob_size = Some(ReadableSize(0)); let resource = cfg .rocksdb .build_resources(Default::default(), cfg.storage.engine); From cf0560a5e21b21d387e02253e5d324f10dd9e8eb Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 19 Jan 2024 10:48:18 -0800 Subject: [PATCH 1126/1149] raftstore: check last heartbeat time before doing conf change remove node (#16174) close tikv/tikv#15799 Check the last heartbeat time before doing remove node operation. It defines 8*heartbeat interval as the threshold of slow peer. And if the remove node operation will lead to at least half of the peers are slow, then the remove node operation will fail. Signed-off-by: Qi Xu Co-authored-by: Qi Xu --- .../operation/command/admin/conf_change.rs | 1 + components/raftstore-v2/src/raft/peer.rs | 5 + components/raftstore/src/store/peer.rs | 1 + components/raftstore/src/store/util.rs | 150 ++++++++++++++++++ .../raftstore/test_conf_change.rs | 37 +++++ 5 files changed, 194 insertions(+) diff --git a/components/raftstore-v2/src/operation/command/admin/conf_change.rs b/components/raftstore-v2/src/operation/command/admin/conf_change.rs index 55cee490e52..5c7ff96a955 100644 --- a/components/raftstore-v2/src/operation/command/admin/conf_change.rs +++ b/components/raftstore-v2/src/operation/command/admin/conf_change.rs @@ -106,6 +106,7 @@ impl Peer { changes.as_ref(), &cc, self.is_in_force_leader(), + self.get_peer_heartbeats(), )?; // TODO: check if the new peer is already in history record. diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index c2f09ef19dd..b535d7f9a47 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -590,6 +590,11 @@ impl Peer { self.peer_heartbeats.remove(&peer_id); } + #[inline] + pub fn get_peer_heartbeats(&self) -> &HashMap { + &self.peer_heartbeats + } + #[inline] pub fn has_peer(&self, peer_id: u64) -> bool { self.region() diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index 9d5c059c3cd..da6fb628231 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -4784,6 +4784,7 @@ where changes.as_ref(), &cc, self.is_in_force_leader(), + &self.peer_heartbeats, )?; ctx.raft_metrics.propose.conf_change.inc(); diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 367013a0adc..68225a982b3 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1010,6 +1010,7 @@ pub fn check_conf_change( change_peers: &[ChangePeerRequest], cc: &impl ConfChangeI, ignore_safety: bool, + peer_heartbeat: &collections::HashMap, ) -> Result<()> { let current_progress = node.status().progress.unwrap().clone(); let mut after_progress = current_progress.clone(); @@ -1093,6 +1094,13 @@ pub fn check_conf_change( return Err(box_err!("multiple changes that only effect learner")); } + check_remove_or_demote_voter( + region.get_id(), + cfg, + change_peers, + leader.get_id(), + peer_heartbeat, + )?; if !ignore_safety { let promoted_commit_index = after_progress.maximal_committed_index().0; let first_index = node.raft.raft_log.first_index(); @@ -1121,6 +1129,68 @@ pub fn check_conf_change( } } +fn check_remove_or_demote_voter( + region_id: u64, + cfg: &Config, + change_peers: &[ChangePeerRequest], + leader_id: u64, + peer_heartbeat: &collections::HashMap, +) -> Result<()> { + let mut slow_peer_count = 0; + let mut normal_peer_count = 0; + // Here we assume if the last beartbeat is within 2 election timeout, the peer + // is healthy. This is to be tolerant to some slightly slow peers when + // the leader is in hibernate mode. + let slow_peer_threshold = + 2 * cfg.raft_base_tick_interval.0 * cfg.raft_max_election_timeout_ticks as u32; + for (id, last_heartbeat) in peer_heartbeat { + // leader itself is not a slow peer + if *id == leader_id || last_heartbeat.elapsed() <= slow_peer_threshold { + normal_peer_count += 1; + } else { + slow_peer_count += 1; + } + } + + let mut normal_peers_to_remove = vec![]; + for cp in change_peers { + let (change_type, peer) = (cp.get_change_type(), cp.get_peer()); + if change_type == ConfChangeType::RemoveNode + || change_type == ConfChangeType::AddLearnerNode + { + // If the change_type is AddLearnerNode and the last heartbeat is found, it + // means it's a demote from voter as AddLearnerNode on existing learner node is + // not allowed. + if let Some(last_heartbeat) = peer_heartbeat.get(&peer.get_id()) { + // peer itself is *not* slow peer, but current slow peer is >= total peers/2 + if last_heartbeat.elapsed() <= slow_peer_threshold { + normal_peer_count -= 1; + normal_peers_to_remove.push(peer.clone()); + } + } + } + } + + // only block the conf change when there's chance to improve the availability + // For example, if there's no normal peers actually, then we still allow the + // option to finish as there's no choice. + // We only block the operation when normal peers are going to be removed and it + // could lead to slow peers more than normal peers + if !normal_peers_to_remove.is_empty() + && slow_peer_count > 0 + && slow_peer_count >= normal_peer_count + { + return Err(box_err!( + "Ignore conf change command on region {} because RemoveNode or Demote a voter on peers {:?} may lead to unavailability. There're {} slow peers and {} normal peers", + region_id, + &normal_peers_to_remove, + slow_peer_count, + normal_peer_count + )); + } + + Ok(()) +} pub struct MsgType<'a>(pub &'a RaftMessage); impl Display for MsgType<'_> { @@ -2505,4 +2575,84 @@ mod tests { mismatch_err.set_store_peer_id(2); assert_eq!(region_err.get_mismatch_peer_id(), &mismatch_err) } + + #[test] + fn test_check_conf_change_upon_slow_peers() { + // Create a sample configuration + let mut cfg = Config::default(); + cfg.raft_max_election_timeout_ticks = 10; + // Initialize change_peers + let change_peers = vec![ + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::RemoveNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddLearnerNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ]; + + for i in 0..change_peers.len() { + // Call the function under test and assert that the function returns failed + let mut cp = vec![change_peers[i].clone()]; + let mut peer_heartbeat = collections::HashMap::default(); + peer_heartbeat.insert( + 1, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + peer_heartbeat.insert( + 2, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + peer_heartbeat.insert( + 3, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + // Call the function under test and assert that the function returns Ok + check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + + // now make one peer slow + if let Some(peer_heartbeat) = peer_heartbeat.get_mut(&3) { + *peer_heartbeat = std::time::Instant::now() - std::time::Duration::from_secs(100); + } + + // Call the function under test + let result = check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat); + // Assert that the function returns failed + assert!(result.is_err()); + + // remove the slow peer instead + cp[0].peer = Some(metapb::Peer { + id: 3, + ..Default::default() + }) + .into(); + // Call the function under test + check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + + // there's no remove node, it's fine with slow peers. + cp[0] = ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }; + // Call the function under test + check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + } + } } diff --git a/tests/integrations/raftstore/test_conf_change.rs b/tests/integrations/raftstore/test_conf_change.rs index 79b3488d868..08a2ff48d17 100644 --- a/tests/integrations/raftstore/test_conf_change.rs +++ b/tests/integrations/raftstore/test_conf_change.rs @@ -863,3 +863,40 @@ fn test_conf_change_fast() { must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); assert!(timer.saturating_elapsed() < Duration::from_secs(5)); } + +#[test_case(test_raftstore::new_node_cluster)] +#[test_case(test_raftstore_v2::new_node_cluster)] +fn test_remove_node_on_partition() { + let count = 3; + let mut cluster = new_cluster(0, count); + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer number check. + pd_client.disable_default_operator(); + cluster.cfg.raft_store.raft_heartbeat_ticks = 1; + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(10); + cluster.cfg.raft_store.raft_election_timeout_ticks = 3; + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(20); + let r1 = cluster.run_conf_change(); + + cluster.must_put(b"k0", b"v0"); + pd_client.must_add_peer(r1, new_peer(2, 2)); + must_get_equal(&cluster.get_engine(2), b"k0", b"v0"); + pd_client.must_add_peer(r1, new_peer(3, 3)); + must_get_equal(&cluster.get_engine(3), b"k0", b"v0"); + + // peer 3 isolation + cluster.add_send_filter(IsolationFilterFactory::new(3)); + // sleep for 13 heartbeat interval (>12 should be ok) + let sleep_time = cluster.cfg.raft_store.raft_base_tick_interval.0 + * (4 * cluster.cfg.raft_store.raft_election_timeout_ticks as u32 + 1); + thread::sleep(sleep_time); + pd_client.remove_peer(r1, new_peer(2, 2)); + cluster.must_put(b"k1", b"v1"); + thread::sleep(Duration::from_millis(500)); + // remove peer 2 should not work + pd_client.must_have_peer(r1, new_peer(2, 2)); + + // remove peer 3 should work + pd_client.must_remove_peer(r1, new_peer(3, 3)); + cluster.must_put(b"k3", b"v3"); +} From 66301257e4d029743f666e75c942997a7461f4a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 22 Jan 2024 01:19:48 +0800 Subject: [PATCH 1127/1149] log_backup: stop task while memory out of quota (#16008) close tikv/tikv#15414 This PR have refactored the subscription manager. Generally, this: - Replace the instance itself with a handle. This make it a real reactor(with an real event loop). - Handle the result of subscripting a region via the message system instead of asynchronously, this will be the basis of making subscription tracker thread safe and (someday, hopefully) merge the basic libraries with TiCDC. Based on the changes above, this PR also allows a region to be temporarily deregistered while we are about to reach the memory quota. Signed-off-by: Yu Juncen Co-authored-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1 + components/backup-stream/Cargo.toml | 3 + components/backup-stream/src/endpoint.rs | 113 ++- components/backup-stream/src/errors.rs | 22 +- components/backup-stream/src/event_loader.rs | 27 +- components/backup-stream/src/observer.rs | 2 + components/backup-stream/src/router.rs | 12 +- .../backup-stream/src/subscription_manager.rs | 850 +++++++++++++----- .../backup-stream/src/subscription_track.rs | 53 +- components/backup-stream/src/utils.rs | 1 + .../backup-stream/tests/failpoints/mod.rs | 3 + components/error_code/src/backup_stream.rs | 4 + components/tikv_util/src/memory.rs | 10 + 13 files changed, 826 insertions(+), 275 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 067c01db532..927570ac42c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -516,6 +516,7 @@ dependencies = [ "security", "slog", "slog-global", + "tempdir", "tempfile", "test_pd", "test_pd_client", diff --git a/components/backup-stream/Cargo.toml b/components/backup-stream/Cargo.toml index 50b28f8d2f9..d37ba3cacb6 100644 --- a/components/backup-stream/Cargo.toml +++ b/components/backup-stream/Cargo.toml @@ -58,6 +58,7 @@ prometheus-static-metric = "0.5" protobuf = { version = "2.8", features = ["bytes"] } raft = { workspace = true } raftstore = { workspace = true } +rand = "0.8.0" regex = "1" resolved_ts = { workspace = true } security = { path = "../security" } @@ -86,10 +87,12 @@ grpcio = { workspace = true } hex = "0.4" protobuf = { version = "2.8", features = ["bytes"] } rand = "0.8.0" +tempdir = "0.3" tempfile = "3.0" test_pd = { workspace = true } test_pd_client = { workspace = true } test_raftstore = { workspace = true } test_util = { workspace = true } +tokio = { version = "1.5", features = ["test-util"] } url = "2" walkdir = "2" diff --git a/components/backup-stream/src/endpoint.rs b/components/backup-stream/src/endpoint.rs index e0cc3a91dfb..a2271b10331 100644 --- a/components/backup-stream/src/endpoint.rs +++ b/components/backup-stream/src/endpoint.rs @@ -15,7 +15,7 @@ use error_code::ErrorCodeExt; use futures::{stream::AbortHandle, FutureExt, TryFutureExt}; use kvproto::{ brpb::{StreamBackupError, StreamBackupTaskInfo}, - metapb::Region, + metapb::{Region, RegionEpoch}, }; use pd_client::PdClient; use raft::StateRole; @@ -39,7 +39,7 @@ use tikv_util::{ use tokio::{ io::Result as TokioResult, runtime::{Handle, Runtime}, - sync::{oneshot, Semaphore}, + sync::{mpsc::Sender, oneshot, Semaphore}, }; use tokio_stream::StreamExt; use tracing::instrument; @@ -53,7 +53,7 @@ use crate::{ BasicFlushObserver, CheckpointManager, CheckpointV3FlushObserver, FlushObserver, GetCheckpointResult, RegionIdWithVersion, Subscription, }, - errors::{Error, Result}, + errors::{Error, ReportableResult, Result}, event_loader::InitialDataLoader, future, metadata::{store::MetaStore, MetadataClient, MetadataEvent, StreamTask}, @@ -88,7 +88,7 @@ pub struct Endpoint { pub range_router: Router, observer: BackupStreamObserver, pool: Runtime, - region_operator: RegionSubscriptionManager, + region_operator: Sender, failover_time: Option, // We holds the config before, even it is useless for now, // however probably it would be useful in the future. @@ -169,9 +169,7 @@ where Arc::clone(&initial_scan_semaphore), ), accessor.clone(), - observer.clone(), meta_client.clone(), - pd_client.clone(), ((config.num_threads + 1) / 2).max(1), resolver, ); @@ -446,13 +444,15 @@ where /// Convert a batch of events to the cmd batch, and update the resolver /// status. - fn record_batch(subs: SubscriptionTracer, batch: CmdBatch) -> Option { + fn record_batch(subs: SubscriptionTracer, batch: CmdBatch) -> Result { let region_id = batch.region_id; let mut resolver = match subs.get_subscription_of(region_id) { Some(rts) => rts, None => { debug!("the region isn't registered (no resolver found) but sent to backup_batch, maybe stale."; "region_id" => %region_id); - return None; + // Sadly, we know nothing about the epoch in this context. Thankfully this is a + // local error and won't be sent to outside. + return Err(Error::ObserveCanceled(region_id, RegionEpoch::new())); } }; // Stale data is acceptable, while stale locks may block the checkpoint @@ -469,11 +469,11 @@ where // ``` if batch.pitr_id != resolver.value().handle.id { debug!("stale command"; "region_id" => %region_id, "now" => ?resolver.value().handle.id, "remote" => ?batch.pitr_id); - return None; + return Err(Error::ObserveCanceled(region_id, RegionEpoch::new())); } - let kvs = ApplyEvents::from_cmd_batch(batch, resolver.value_mut().resolver()); - Some(kvs) + let kvs = ApplyEvents::from_cmd_batch(batch, resolver.value_mut().resolver())?; + Ok(kvs) } fn backup_batch(&self, batch: CmdBatch, work: Work) { @@ -482,6 +482,7 @@ where let router = self.range_router.clone(); let sched = self.scheduler.clone(); let subs = self.subs.clone(); + let region_op = self.region_operator.clone(); let region = batch.region_id; let from_idx = batch.cmds.first().map(|c| c.index).unwrap_or(0); let (to_idx, term) = batch @@ -492,10 +493,27 @@ where self.pool.spawn(root!("backup_batch"; async move { let region_id = batch.region_id; let kvs = Self::record_batch(subs, batch); - if kvs.as_ref().map(|x| x.is_empty()).unwrap_or(true) { - return; - } - let kvs = kvs.unwrap(); + let kvs = match kvs { + Err(Error::OutOfQuota { region_id }) => { + region_op.send(ObserveOp::HighMemUsageWarning { region_id }).await + .map_err(|err| Error::Other(box_err!("failed to send, are we shutting down? {}", err))) + .report_if_err(""); + return + } + Err(Error::ObserveCanceled(..)) => { + return; + } + Err(err) => { + err.report(format_args!("unexpected error during handing region event for {}.", region_id)); + return; + } + Ok(batch) => { + if batch.is_empty() { + return + } + batch + } + }; HANDLE_EVENT_DURATION_HISTOGRAM .with_label_values(&["to_stream_event"]) @@ -595,6 +613,7 @@ where .try_for_each(|r| { tx.blocking_send(ObserveOp::Start { region: r.region.clone(), + handle: ObserveHandle::new(), }) }); }), @@ -609,11 +628,26 @@ where // Don't reschedule this command: or once the endpoint's mailbox gets // full, the system might deadlock. while let Some(cmd) = rx.recv().await { - self.region_operator.request(cmd).await; + self.region_op(cmd).await; } Ok(()) } + /// send an operation request to the manager. + /// the returned future would be resolved after send is success. + /// the operation would be executed asynchronously. + async fn region_op(&self, cmd: ObserveOp) { + self.region_operator + .send(cmd) + .await + .map_err(|err| { + Error::Other( + format!("cannot send to region operator, are we shutting down? ({err})").into(), + ) + }) + .report_if_err("send region cmd") + } + // register task ranges pub fn on_register(&self, task: StreamTask) { let name = task.info.name.clone(); @@ -779,7 +813,10 @@ where }), min_ts, }; - op.request(req).await; + if let Err(err) = op.send(req).await { + annotate!(err, "BUG: region operator channel closed.") + .report("when executing region op"); + } rx.await .map_err(|err| annotate!(err, "failed to send request for resolve regions")) } @@ -924,7 +961,15 @@ where /// Modify observe over some region. /// This would register the region to the RaftStore. pub fn on_modify_observe(&self, op: ObserveOp) { - self.pool.block_on(self.region_operator.request(op)); + self.pool + .block_on(self.region_operator.send(op)) + .map_err(|err| { + Error::Other(box_err!( + "cannot send to region operator, are we shutting down? ({})", + err + )) + }) + .report_if_err("during on_modify_observe"); } fn update_semaphore_capacity(&self, sema: &Arc, diff: isize) { @@ -1104,6 +1149,9 @@ pub enum BackupStreamResolver { V1(LeadershipResolver), // for raftstore-v2, it has less regions. we use CDCHandler to check leadership of a region. V2(RT, PhantomData), + #[cfg(test)] + // for some test cases, it is OK to don't check leader. + Nop, } impl BackupStreamResolver @@ -1118,6 +1166,8 @@ where let x = x.clone(); resolve_by_raft(regions, min_ts, x).await } + #[cfg(test)] + BackupStreamResolver::Nop => regions, } } } @@ -1216,6 +1266,7 @@ type ResolveRegionsCallback = Box; pub enum ObserveOp { Start { region: Region, + handle: ObserveHandle, }, Stop { region: Region, @@ -1230,24 +1281,27 @@ pub enum ObserveOp { RefreshResolver { region: Region, }, - NotifyFailToStartObserve { + NotifyStartObserveResult { region: Region, handle: ObserveHandle, - err: Box, - has_failed_for: u8, + err: Option>, }, ResolveRegions { callback: ResolveRegionsCallback, min_ts: TimeStamp, }, + HighMemUsageWarning { + region_id: u64, + }, } impl std::fmt::Debug for ObserveOp { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Start { region } => f + Self::Start { region, handle } => f .debug_struct("Start") .field("region", &utils::debug_region(region)) + .field("handle", &handle) .finish(), Self::Stop { region } => f .debug_struct("Stop") @@ -1261,23 +1315,27 @@ impl std::fmt::Debug for ObserveOp { .debug_struct("RefreshResolver") .field("region", &utils::debug_region(region)) .finish(), - Self::NotifyFailToStartObserve { + Self::NotifyStartObserveResult { region, handle, err, - has_failed_for, } => f - .debug_struct("NotifyFailToStartObserve") + .debug_struct("NotifyStartObserveResult") .field("region", &utils::debug_region(region)) .field("handle", handle) .field("err", err) - .field("has_failed_for", has_failed_for) .finish(), Self::ResolveRegions { min_ts, .. } => f .debug_struct("ResolveRegions") .field("min_ts", min_ts) .field("callback", &format_args!("fn {{ .. }}")) .finish(), + Self::HighMemUsageWarning { + region_id: inconsistent_region_id, + } => f + .debug_struct("HighMemUsageWarning") + .field("inconsistent_region", &inconsistent_region_id) + .finish(), } } } @@ -1338,8 +1396,9 @@ impl Task { ObserveOp::Stop { .. } => "modify_observe.stop", ObserveOp::Destroy { .. } => "modify_observe.destroy", ObserveOp::RefreshResolver { .. } => "modify_observe.refresh_resolver", - ObserveOp::NotifyFailToStartObserve { .. } => "modify_observe.retry", + ObserveOp::NotifyStartObserveResult { .. } => "modify_observe.retry", ObserveOp::ResolveRegions { .. } => "modify_observe.resolve", + ObserveOp::HighMemUsageWarning { .. } => "modify_observe.high_mem", }, Task::ForceFlush(..) => "force_flush", Task::FatalError(..) => "fatal_error", diff --git a/components/backup-stream/src/errors.rs b/components/backup-stream/src/errors.rs index df3c5ea7032..eaad82d638c 100644 --- a/components/backup-stream/src/errors.rs +++ b/components/backup-stream/src/errors.rs @@ -1,7 +1,8 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. use std::{ - error::Error as StdError, fmt::Display, io::Error as IoError, result::Result as StdResult, + error::Error as StdError, fmt::Display, io::Error as IoError, panic::Location, + result::Result as StdResult, }; use error_code::ErrorCodeExt; @@ -18,16 +19,19 @@ use crate::{endpoint::Task, metrics}; #[derive(ThisError, Debug)] pub enum Error { - #[error("gRPC meet error {0}")] - Grpc(#[from] GrpcError), - #[error("Protobuf meet error {0}")] - Protobuf(#[from] ProtobufError), #[error("No such task {task_name:?}")] NoSuchTask { task_name: String }, #[error("Observe have already canceled for region {0} (version = {1:?})")] ObserveCanceled(u64, RegionEpoch), #[error("Malformed metadata {0}")] MalformedMetadata(String), + #[error("Out of quota for region {region_id}")] + OutOfQuota { region_id: u64 }, + + #[error("gRPC meet error {0}")] + Grpc(#[from] GrpcError), + #[error("Protobuf meet error {0}")] + Protobuf(#[from] ProtobufError), #[error("I/O Error: {0}")] Io(#[from] IoError), #[error("Txn error: {0}")] @@ -40,6 +44,7 @@ pub enum Error { RaftRequest(StoreError), #[error("Error from raftstore: {0}")] RaftStore(#[from] RaftStoreError), + #[error("{context}: {inner_error}")] Contextual { context: String, @@ -65,6 +70,7 @@ impl ErrorCodeExt for Error { Error::Other(_) => OTHER, Error::RaftStore(_) => RAFTSTORE, Error::ObserveCanceled(..) => OBSERVE_CANCELED, + Error::OutOfQuota { .. } => OUT_OF_QUOTA, Error::Grpc(_) => GRPC, } } @@ -124,6 +130,7 @@ where Error: From, { #[inline(always)] + #[track_caller] fn report_if_err(self, context: impl ToString) { if let Err(err) = self { Error::from(err).report(context.to_string()) @@ -147,8 +154,11 @@ macro_rules! annotate { } impl Error { + #[track_caller] pub fn report(&self, context: impl Display) { - warn!("backup stream meet error"; "context" => %context, "err" => %self, "verbose_err" => ?self); + warn!("backup stream meet error"; "context" => %context, "err" => %self, + "verbose_err" => ?self, + "position" => ?Location::caller()); metrics::STREAM_ERROR .with_label_values(&[self.kind()]) .inc() diff --git a/components/backup-stream/src/event_loader.rs b/components/backup-stream/src/event_loader.rs index c78c2c53a19..467b0bcaa92 100644 --- a/components/backup-stream/src/event_loader.rs +++ b/components/backup-stream/src/event_loader.rs @@ -36,6 +36,10 @@ use crate::{ }; const MAX_GET_SNAPSHOT_RETRY: usize = 5; +/// The threshold of slowing down initial scanning. +/// While the memory usage reaches this ratio, we will consume the result of +/// initial scanning more frequently. +const SLOW_DOWN_INITIAL_SCAN_RATIO: f64 = 0.7; struct ScanResult { more: bool, @@ -47,6 +51,7 @@ struct ScanResult { pub struct EventLoader { scanner: DeltaScanner, // pooling the memory. + region: Region, entry_batch: Vec, } @@ -76,6 +81,7 @@ impl EventLoader { Ok(Self { scanner, + region: region.clone(), entry_batch: Vec::with_capacity(ENTRY_BATCH_SIZE), }) } @@ -110,7 +116,9 @@ impl EventLoader { Some(entry) => { let size = entry.size(); batch.push(entry); - if memory_quota.alloc(size).is_err() { + if memory_quota.alloc(size).is_err() + || memory_quota.source().used_ratio() > SLOW_DOWN_INITIAL_SCAN_RATIO + { return Ok(self.out_of_memory()); } } @@ -151,7 +159,11 @@ impl EventLoader { })?; debug!("meet lock during initial scanning."; "key" => %utils::redact(&lock_at), "ts" => %lock.ts); if utils::should_track_lock(&lock) { - resolver.track_phase_one_lock(lock.ts, lock_at); + resolver + .track_phase_one_lock(lock.ts, lock_at) + .map_err(|_| Error::OutOfQuota { + region_id: self.region.id, + })?; } } TxnEntry::Commit { default, write, .. } => { @@ -444,8 +456,6 @@ where start_ts: TimeStamp, snap: impl Snapshot, ) -> Result { - let region_id = region.get_id(); - let mut join_handles = Vec::with_capacity(8); let permit = frame!(self.concurrency_limit.acquire()) @@ -463,15 +473,6 @@ where .await .map_err(|err| annotate!(err, "tokio runtime failed to join consuming threads"))?; - self.with_resolver(region, &handle, |r| { - r.phase_one_done(); - Ok(()) - }) - .context(format_args!( - "failed to finish phase 1 for region {:?}", - region_id - ))?; - Ok(stats) } } diff --git a/components/backup-stream/src/observer.rs b/components/backup-stream/src/observer.rs index 8947d2068c3..6a40a336fb8 100644 --- a/components/backup-stream/src/observer.rs +++ b/components/backup-stream/src/observer.rs @@ -56,6 +56,7 @@ impl BackupStreamObserver { .scheduler .schedule(Task::ModifyObserve(ObserveOp::Start { region: region.clone(), + handle: ObserveHandle::new(), })) { use crate::errors::Error; @@ -128,6 +129,7 @@ impl CmdObserver for BackupStreamObserver { self.scheduler, Task::ModifyObserve(ObserveOp::Start { region: region.clone(), + handle: ObserveHandle::new(), }) ); } diff --git a/components/backup-stream/src/router.rs b/components/backup-stream/src/router.rs index 9a34fa75e94..9ad8521a1b7 100644 --- a/components/backup-stream/src/router.rs +++ b/components/backup-stream/src/router.rs @@ -153,7 +153,7 @@ impl ApplyEvents { /// those keys. /// Note: the resolved ts cannot be advanced if there is no command, maybe /// we also need to update resolved_ts when flushing? - pub fn from_cmd_batch(cmd: CmdBatch, resolver: &mut TwoPhaseResolver) -> Self { + pub fn from_cmd_batch(cmd: CmdBatch, resolver: &mut TwoPhaseResolver) -> Result { let region_id = cmd.region_id; let mut result = vec![]; for req in cmd @@ -197,7 +197,9 @@ impl ApplyEvents { }) { Ok(lock) => { if utils::should_track_lock(&lock) { - resolver.track_lock(lock.ts, key) + resolver + .track_lock(lock.ts, key) + .map_err(|_| Error::OutOfQuota { region_id })?; } } Err(err) => err.report(format!("region id = {}", region_id)), @@ -220,11 +222,11 @@ impl ApplyEvents { } result.push(item); } - Self { + Ok(Self { events: result, region_id, region_resolved_ts: resolver.resolved_ts().into_inner(), - } + }) } pub fn push(&mut self, event: ApplyEvent) { @@ -316,7 +318,7 @@ impl ApplyEvent { /// The shared version of router. #[derive(Debug, Clone)] -pub struct Router(Arc); +pub struct Router(pub(crate) Arc); pub struct Config { pub prefix: PathBuf, diff --git a/components/backup-stream/src/subscription_manager.rs b/components/backup-stream/src/subscription_manager.rs index 88eb5dea6ec..7641d400fec 100644 --- a/components/backup-stream/src/subscription_manager.rs +++ b/components/backup-stream/src/subscription_manager.rs @@ -1,23 +1,23 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{sync::Arc, time::Duration}; +use std::{collections::HashMap, sync::Arc, time::Duration}; use engine_traits::KvEngine; -use error_code::ErrorCodeExt; use futures::FutureExt; use kvproto::metapb::Region; -use pd_client::PdClient; use raft::StateRole; use raftstore::{ coprocessor::{ObserveHandle, RegionInfoProvider}, router::CdcHandle, store::fsm::ChangeObserver, }; +use rand::Rng; use tikv::storage::Statistics; use tikv_util::{ - box_err, debug, info, sys::thread::ThreadBuildWrapper, time::Instant, warn, worker::Scheduler, + box_err, debug, info, memory::MemoryQuota, sys::thread::ThreadBuildWrapper, time::Instant, + warn, worker::Scheduler, }; -use tokio::sync::mpsc::{channel, error::SendError, Receiver, Sender}; +use tokio::sync::mpsc::{channel, error::SendError, Receiver, Sender, WeakSender}; use tracing::instrument; use tracing_active_tree::root; use txn_types::TimeStamp; @@ -25,14 +25,13 @@ use txn_types::TimeStamp; use crate::{ annotate, endpoint::{BackupStreamResolver, ObserveOp}, - errors::{Error, Result}, + errors::{Error, ReportableResult, Result}, event_loader::InitialDataLoader, future, metadata::{store::MetaStore, CheckpointProvider, MetadataClient}, metrics, - observer::BackupStreamObserver, router::{Router, TaskSelector}, - subscription_track::{CheckpointType, ResolveResult, SubscriptionTracer}, + subscription_track::{CheckpointType, Ref, RefMut, ResolveResult, SubscriptionTracer}, try_send, utils::{self, CallbackWaitGroup, Work}, Task, @@ -40,8 +39,6 @@ use crate::{ type ScanPool = tokio::runtime::Runtime; -const INITIAL_SCAN_FAILURE_MAX_RETRY_TIME: usize = 10; - // The retry parameters for failed to get last checkpoint ts. // When PD is temporarily disconnected, we may need this retry. // The total duration of retrying is about 345s ( 20 * 16 + 15 ), @@ -49,12 +46,20 @@ const INITIAL_SCAN_FAILURE_MAX_RETRY_TIME: usize = 10; const TRY_START_OBSERVE_MAX_RETRY_TIME: u8 = 24; const RETRY_AWAIT_BASIC_DURATION: Duration = Duration::from_secs(1); const RETRY_AWAIT_MAX_DURATION: Duration = Duration::from_secs(16); +const OOM_BACKOFF_BASE: Duration = Duration::from_secs(60); +const OOM_BACKOFF_JITTER_SECS: u64 = 60; fn backoff_for_start_observe(failed_for: u8) -> Duration { - Ord::min( + let res = Ord::min( RETRY_AWAIT_BASIC_DURATION * (1 << failed_for), RETRY_AWAIT_MAX_DURATION, - ) + ); + fail::fail_point!("subscribe_mgr_retry_start_observe_delay", |v| { + v.and_then(|x| x.parse::().ok()) + .map(Duration::from_millis) + .unwrap_or(res) + }); + res } /// a request for doing initial scanning. @@ -62,6 +67,11 @@ struct ScanCmd { region: Region, handle: ObserveHandle, last_checkpoint: TimeStamp, + + // This channel will be used to send the result of the initial scanning. + // NOTE: perhaps we can make them an closure so it will be more flexible. + // but for now there isn't requirement of that. + feedback_channel: Sender, _work: Work, } @@ -196,33 +206,11 @@ impl ScanCmd { utils::record_cf_stat("default", &stat.data); Ok(()) } - - /// execute the command, when meeting error, retrying. - #[instrument(skip_all)] - async fn exec_by_with_retry(self, init: impl InitialScan) { - let mut retry_time = INITIAL_SCAN_FAILURE_MAX_RETRY_TIME; - loop { - match self.exec_by(init.clone()).await { - Err(err) if should_retry(&err) && retry_time > 0 => { - tokio::time::sleep(Duration::from_millis(500)).await; - warn!("meet retryable error"; "err" => %err, "retry_time" => retry_time); - retry_time -= 1; - continue; - } - Err(err) if retry_time == 0 => { - init.handle_fatal_error(&self.region, err.context("retry time exceeds")); - break; - } - // Errors which `should_retry` returns false means they can be ignored. - Err(_) | Ok(_) => break, - } - } - } } async fn scan_executor_loop(init: impl InitialScan, mut cmds: Receiver) { while let Some(cmd) = cmds.recv().await { - debug!("handling initial scan request"; "region_id" => %cmd.region.get_id()); + debug!("handling initial scan request"; utils::slog_region(&cmd.region)); metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["queuing"]) .dec(); @@ -242,7 +230,21 @@ async fn scan_executor_loop(init: impl InitialScan, mut cmds: Receiver) metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["executing"]) .inc(); - cmd.exec_by_with_retry(init).await; + let res = cmd.exec_by(init).await; + cmd.feedback_channel + .send(ObserveOp::NotifyStartObserveResult { + region: cmd.region, + handle: cmd.handle, + err: res.map_err(Box::new).err(), + }) + .await + .map_err(|err| { + Error::Other(box_err!( + "failed to send result, are we shutting down? {}", + err + )) + }) + .report_if_err("exec initial scan"); metrics::PENDING_INITIAL_SCAN_LEN .with_label_values(&["executing"]) .dec(); @@ -257,10 +259,17 @@ fn spawn_executors( ) -> ScanPoolHandle { let (tx, rx) = tokio::sync::mpsc::channel(MESSAGE_BUFFER_SIZE); let pool = create_scan_pool(number); - pool.spawn(root!("scan_executor_loop"; async move { + let handle = pool.handle().clone(); + handle.spawn(async move { scan_executor_loop(init, rx).await; - })); - ScanPoolHandle { tx, _pool: pool } + // The behavior of log backup is undefined while TiKV shutting down. + // (Recording the logs doesn't require any local persisted information.) + // So it is OK to make works in the pool fully asynchronous (i.e. We + // don't syncing it with shutting down.). This trick allows us get rid + // of the long long panic information during testing. + tokio::task::block_in_place(move || drop(pool)); + }); + ScanPoolHandle { tx } } struct ScanPoolHandle { @@ -268,8 +277,6 @@ struct ScanPoolHandle { // thread. But that will make `SubscribeManager` holds a reference to the implementation of // `InitialScan`, which will get the type information a mass. tx: Sender, - - _pool: ScanPool, } impl ScanPoolHandle { @@ -289,42 +296,20 @@ const MESSAGE_BUFFER_SIZE: usize = 32768; /// we should only modify the `SubscriptionTracer` itself (i.e. insert records, /// remove records) at here. So the order subscription / desubscription won't be /// broken. -pub struct RegionSubscriptionManager { +pub struct RegionSubscriptionManager { // Note: these fields appear everywhere, maybe make them a `context` type? regions: R, meta_cli: MetadataClient, - pd_client: Arc, range_router: Router, scheduler: Scheduler, - observer: BackupStreamObserver, subs: SubscriptionTracer, - messenger: Sender, - scan_pool_handle: Arc, - scans: Arc, -} + failure_count: HashMap, + memory_manager: Arc, -impl Clone for RegionSubscriptionManager -where - S: MetaStore + 'static, - R: RegionInfoProvider + Clone + 'static, - PDC: PdClient + 'static, -{ - fn clone(&self) -> Self { - Self { - regions: self.regions.clone(), - meta_cli: self.meta_cli.clone(), - // We should manually call Arc::clone here or rustc complains that `PDC` isn't `Clone`. - pd_client: Arc::clone(&self.pd_client), - range_router: self.range_router.clone(), - scheduler: self.scheduler.clone(), - observer: self.observer.clone(), - subs: self.subs.clone(), - messenger: self.messenger.clone(), - scan_pool_handle: self.scan_pool_handle.clone(), - scans: CallbackWaitGroup::new(), - } - } + messenger: WeakSender, + scan_pool_handle: ScanPoolHandle, + scans: Arc, } /// Create a pool for doing initial scanning. @@ -343,11 +328,10 @@ fn create_scan_pool(num_threads: usize) -> ScanPool { .unwrap() } -impl RegionSubscriptionManager +impl RegionSubscriptionManager where S: MetaStore + 'static, R: RegionInfoProvider + Clone + 'static, - PDC: PdClient + 'static, { /// create a [`RegionSubscriptionManager`]. /// @@ -358,12 +342,10 @@ where pub fn start( initial_loader: InitialDataLoader, regions: R, - observer: BackupStreamObserver, meta_cli: MetadataClient, - pd_client: Arc, scan_pool_size: usize, resolver: BackupStreamResolver, - ) -> (Self, future![()]) + ) -> (Sender, future![()]) where E: KvEngine, HInit: CdcHandle + Sync + 'static, @@ -374,27 +356,17 @@ where let op = Self { regions, meta_cli, - pd_client, range_router: initial_loader.sink.clone(), scheduler: initial_loader.scheduler.clone(), - observer, subs: initial_loader.tracing, - messenger: tx, - scan_pool_handle: Arc::new(scan_pool_handle), + messenger: tx.downgrade(), + scan_pool_handle, scans: CallbackWaitGroup::new(), + failure_count: HashMap::new(), + memory_manager: Arc::clone(&initial_loader.quota), }; - let fut = op.clone().region_operator_loop(rx, resolver); - (op, fut) - } - - /// send an operation request to the manager. - /// the returned future would be resolved after send is success. - /// the opeartion would be executed asynchronously. - pub async fn request(&self, op: ObserveOp) { - if let Err(err) = self.messenger.send(op).await { - annotate!(err, "BUG: region operator channel closed.") - .report("when executing region op"); - } + let fut = op.region_operator_loop(rx, resolver); + (tx, fut) } /// wait initial scanning get finished. @@ -402,10 +374,20 @@ where tokio::time::timeout(timeout, self.scans.wait()).map(|result| result.is_err()) } + fn issue_fatal_of(&self, region: &Region, err: Error) { + try_send!( + self.scheduler, + Task::FatalError( + TaskSelector::ByRange(region.start_key.to_owned(), region.end_key.to_owned()), + Box::new(err) + ) + ); + } + /// the handler loop. #[instrument(skip_all)] async fn region_operator_loop( - self, + mut self, mut message_box: Receiver, mut resolver: BackupStreamResolver, ) where @@ -418,9 +400,9 @@ where info!("backup stream: on_modify_observe"; "op" => ?op); } match op { - ObserveOp::Start { region } => { + ObserveOp::Start { region, handle } => { fail::fail_point!("delay_on_start_observe"); - self.start_observe(region).await; + self.start_observe(region, handle).await; metrics::INITIAL_SCAN_REASON .with_label_values(&["leader-changed"]) .inc(); @@ -442,34 +424,12 @@ where }); } ObserveOp::RefreshResolver { ref region } => self.refresh_resolver(region).await, - ObserveOp::NotifyFailToStartObserve { + ObserveOp::NotifyStartObserveResult { region, handle, err, - has_failed_for, } => { - info!("retry observe region"; "region" => %region.get_id(), "err" => %err); - // No need for retrying observe canceled. - if err.error_code() == error_code::backup_stream::OBSERVE_CANCELED { - return; - } - let (start, end) = ( - region.get_start_key().to_owned(), - region.get_end_key().to_owned(), - ); - match self.retry_observe(region, handle, has_failed_for).await { - Ok(()) => {} - Err(e) => { - let msg = Task::FatalError( - TaskSelector::ByRange(start, end), - Box::new(Error::Contextual { - context: format!("retry meet error, origin error is {}", err), - inner_error: Box::new(e), - }), - ); - try_send!(self.scheduler, msg); - } - } + self.on_observe_result(region, handle, err).await; } ObserveOp::ResolveRegions { callback, min_ts } => { let now = Instant::now(); @@ -492,10 +452,108 @@ where } callback(ResolvedRegions::new(rts, cps)); } + ObserveOp::HighMemUsageWarning { region_id } => { + self.on_high_memory_usage(region_id).await; + } } } } + async fn on_observe_result( + &mut self, + region: Region, + handle: ObserveHandle, + err: Option>, + ) { + let err = match err { + None => { + self.failure_count.remove(®ion.id); + let sub = self.subs.get_subscription_of(region.id); + if let Some(mut sub) = sub { + if sub.value().handle.id == handle.id { + sub.value_mut().resolver.phase_one_done(); + } + } + return; + } + Some(err) => { + if !should_retry(&err) { + self.failure_count.remove(®ion.id); + self.subs + .deregister_region_if(®ion, |sub, _| sub.handle.id == handle.id); + return; + } + err + } + }; + + let region_id = region.id; + match self.retry_observe(region.clone(), handle).await { + Ok(has_resent_req) => { + if !has_resent_req { + self.failure_count.remove(®ion_id); + } + } + Err(e) => { + self.issue_fatal_of( + ®ion, + e.context(format_args!( + "retry encountered error, origin error is {}", + err + )), + ); + self.failure_count.remove(®ion_id); + } + } + } + + async fn on_high_memory_usage(&mut self, inconsistent_region_id: u64) { + let mut lame_region = Region::new(); + lame_region.set_id(inconsistent_region_id); + let mut act_region = None; + self.subs.deregister_region_if(&lame_region, |act, _| { + act_region = Some(act.meta.clone()); + true + }); + let delay = OOM_BACKOFF_BASE + + Duration::from_secs(rand::thread_rng().gen_range(0..OOM_BACKOFF_JITTER_SECS)); + info!("log backup triggering high memory usage."; + "region" => %inconsistent_region_id, + "mem_usage" => %self.memory_manager.used_ratio(), + "mem_max" => %self.memory_manager.capacity()); + if let Some(region) = act_region { + self.schedule_start_observe(delay, region, None); + } + } + + fn schedule_start_observe( + &self, + backoff: Duration, + region: Region, + handle: Option, + ) { + let tx = self.messenger.upgrade(); + let region_id = region.id; + if tx.is_none() { + warn!( + "log backup subscription manager: cannot upgrade self-sender, are we shutting down?" + ); + return; + } + let tx = tx.unwrap(); + // tikv_util::Instant cannot be converted to std::time::Instant :( + let start = std::time::Instant::now(); + let scheduled = async move { + tokio::time::sleep_until((start + backoff).into()).await; + let handle = handle.unwrap_or_else(|| ObserveHandle::new()); + if let Err(err) = tx.send(ObserveOp::Start { region, handle }).await { + warn!("log backup failed to schedule start observe."; "err" => %err); + } + }; + tokio::spawn(root!("scheduled_subscription"; scheduled; "after" = ?backoff, region_id)); + } + + #[instrument(skip_all, fields(id = region.id))] async fn refresh_resolver(&self, region: &Region) { let need_refresh_all = !self.subs.try_update_region(region); @@ -519,13 +577,13 @@ where } .await; if let Err(e) = r { + warn!("failed to refresh region: will retry."; "err" => %e, utils::slog_region(region)); try_send!( self.scheduler, - Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { + Task::ModifyObserve(ObserveOp::NotifyStartObserveResult { region: region.clone(), handle, - err: Box::new(e), - has_failed_for: 0, + err: Some(Box::new(e)), }) ); } @@ -544,11 +602,9 @@ where match self.find_task_by_region(region) { None => { warn!( - "the region {:?} is register to no task but being observed (start_key = {}; end_key = {}; task_stat = {:?}): maybe stale, aborting", - region, - utils::redact(®ion.get_start_key()), - utils::redact(®ion.get_end_key()), - self.range_router + "the region is register to no task but being observed: maybe stale, skipping"; + utils::slog_region(region), + "task_status" => ?self.range_router, ); } @@ -567,62 +623,36 @@ where Ok(()) } - #[instrument(skip_all)] - async fn start_observe(&self, region: Region) { - self.start_observe_with_failure_count(region, 0).await - } - - async fn start_observe_with_failure_count(&self, region: Region, has_failed_for: u8) { - let handle = ObserveHandle::new(); - let schd = self.scheduler.clone(); + async fn start_observe(&self, region: Region, handle: ObserveHandle) { + match self.is_available(®ion, &handle).await { + Ok(false) => { + warn!("stale start observe command."; utils::slog_region(®ion), "handle" => ?handle); + return; + } + Err(err) => { + self.issue_fatal_of(®ion, err.context("failed to check stale")); + return; + } + _ => {} + } self.subs.add_pending_region(®ion); - if let Err(err) = self.try_start_observe(®ion, handle.clone()).await { + let res = self.try_start_observe(®ion, handle.clone()).await; + if let Err(err) = res { warn!("failed to start observe, would retry"; "err" => %err, utils::slog_region(®ion)); - tokio::spawn(root!("retry_start_observe"; async move { - #[cfg(not(feature = "failpoints"))] - let delay = backoff_for_start_observe(has_failed_for); - #[cfg(feature = "failpoints")] - let delay = (|| { - fail::fail_point!("subscribe_mgr_retry_start_observe_delay", |v| { - let dur = v - .expect("should provide delay time (in ms)") - .parse::() - .expect("should be number (in ms)"); - Duration::from_millis(dur) - }); - backoff_for_start_observe(has_failed_for) - })(); - tokio::time::sleep(delay).await; - try_send!( - schd, - Task::ModifyObserve(ObserveOp::NotifyFailToStartObserve { - region, - handle, - err: Box::new(err), - has_failed_for: has_failed_for + 1 - }) - ) - })); + try_send!( + self.scheduler, + Task::ModifyObserve(ObserveOp::NotifyStartObserveResult { + region, + handle, + err: Some(Box::new(err)), + }) + ); } } - async fn retry_observe( - &self, - region: Region, - handle: ObserveHandle, - failure_count: u8, - ) -> Result<()> { - if failure_count > TRY_START_OBSERVE_MAX_RETRY_TIME { - return Err(Error::Other( - format!( - "retry time exceeds for region {:?}", - utils::debug_region(®ion) - ) - .into(), - )); - } - - let (tx, rx) = crossbeam::channel::bounded(1); + #[instrument(skip_all)] + async fn is_available(&self, region: &Region, handle: &ObserveHandle) -> Result { + let (tx, rx) = tokio::sync::oneshot::channel(); self.regions .find_region_by_id( region.get_id(), @@ -639,27 +669,36 @@ where ) })?; let new_region_info = rx - .recv() + .await .map_err(|err| annotate!(err, "BUG?: unexpected channel message dropped."))?; if new_region_info.is_none() { metrics::SKIP_RETRY .with_label_values(&["region-absent"]) .inc(); - return Ok(()); + return Ok(false); } let new_region_info = new_region_info.unwrap(); if new_region_info.role != StateRole::Leader { metrics::SKIP_RETRY.with_label_values(&["not-leader"]).inc(); - return Ok(()); + return Ok(false); + } + if raftstore::store::util::is_epoch_stale( + region.get_region_epoch(), + new_region_info.region.get_region_epoch(), + ) { + metrics::SKIP_RETRY + .with_label_values(&["epoch-not-match"]) + .inc(); + return Ok(false); } // Note: we may fail before we insert the region info to the subscription map. // At that time, the command isn't steal and we should retry it. let mut exists = false; - let removed = self.subs.deregister_region_if(®ion, |old, _| { + let removed = self.subs.deregister_region_if(region, |old, _| { exists = true; let should_remove = old.handle().id == handle.id; if !should_remove { - warn!("stale retry command"; utils::slog_region(®ion), "handle" => ?handle, "old_handle" => ?old.handle()); + warn!("stale retry command"; utils::slog_region(region), "handle" => ?handle, "old_handle" => ?old.handle()); } should_remove }); @@ -667,14 +706,36 @@ where metrics::SKIP_RETRY .with_label_values(&["stale-command"]) .inc(); - return Ok(()); + return Ok(false); + } + Ok(true) + } + + async fn retry_observe(&mut self, region: Region, handle: ObserveHandle) -> Result { + let failure_count = self.failure_count.entry(region.id).or_insert(0); + *failure_count += 1; + let failure_count = *failure_count; + + info!("retry observe region"; "region" => %region.get_id(), "failure_count" => %failure_count, "handle" => ?handle); + if failure_count > TRY_START_OBSERVE_MAX_RETRY_TIME { + return Err(Error::Other( + format!( + "retry time exceeds for region {:?}", + utils::debug_region(®ion) + ) + .into(), + )); + } + + let should_retry = self.is_available(®ion, &handle).await?; + if !should_retry { + return Ok(false); } + self.schedule_start_observe(backoff_for_start_observe(failure_count), region, None); metrics::INITIAL_SCAN_REASON .with_label_values(&["retry"]) .inc(); - self.start_observe_with_failure_count(region, failure_count) - .await; - Ok(()) + Ok(true) } #[instrument(skip_all)] @@ -722,10 +783,19 @@ where ) { self.subs .register_region(region, handle.clone(), Some(last_checkpoint)); + let feedback_channel = match self.messenger.upgrade() { + Some(ch) => ch, + None => { + warn!("log backup subscription manager is shutting down, aborting new scan."; + utils::slog_region(region), "handle" => ?handle.id); + return; + } + }; self.spawn_scan(ScanCmd { region: region.clone(), handle, last_checkpoint, + feedback_channel, _work: self.scans.clone().work(), }) .await @@ -739,23 +809,66 @@ where #[cfg(test)] mod test { - use kvproto::metapb::Region; - use tikv::storage::Statistics; + use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, + time::{Duration, Instant}, + }; + + use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; + use kvproto::{ + brpb::{Noop, StorageBackend, StreamBackupTaskInfo}, + metapb::{Region, RegionEpoch}, + }; + use raftstore::{ + coprocessor::{ObserveHandle, RegionInfoCallback, RegionInfoProvider}, + router::{CdcRaftRouter, ServerRaftStoreRouter}, + RegionInfo, + }; + use tikv::{config::BackupStreamConfig, storage::Statistics}; + use tikv_util::{info, memory::MemoryQuota, worker::dummy_scheduler}; + use tokio::{sync::mpsc::Sender, task::JoinHandle}; + use txn_types::TimeStamp; - use super::InitialScan; + use super::{spawn_executors, InitialScan, RegionSubscriptionManager}; + use crate::{ + errors::Error, + metadata::{store::SlashEtcStore, MetadataClient, StreamTask}, + router::{Router, RouterInner}, + subscription_manager::{OOM_BACKOFF_BASE, OOM_BACKOFF_JITTER_SECS}, + subscription_track::{CheckpointType, SubscriptionTracer}, + utils::CallbackWaitGroup, + BackupStreamResolver, ObserveOp, Task, + }; #[derive(Clone, Copy)] - struct NoopInitialScan; + struct FuncInitialScan(F) + where + F: Fn(&Region, TimeStamp, ObserveHandle) -> crate::errors::Result + + Clone + + Sync + + Send + + 'static; #[async_trait::async_trait] - impl InitialScan for NoopInitialScan { + impl InitialScan for FuncInitialScan + where + F: Fn(&Region, TimeStamp, ObserveHandle) -> crate::errors::Result + + Clone + + Sync + + Send + + 'static, + { async fn do_initial_scan( &self, - _region: &Region, - _start_ts: txn_types::TimeStamp, - _handle: raftstore::coprocessor::ObserveHandle, + region: &Region, + start_ts: txn_types::TimeStamp, + handle: raftstore::coprocessor::ObserveHandle, ) -> crate::errors::Result { - Ok(Statistics::default()) + (self.0)(region, start_ts, handle) } fn handle_fatal_error(&self, region: &Region, err: crate::errors::Error) { @@ -768,6 +881,8 @@ mod test { fn test_message_delay_and_exit() { use std::time::Duration; + use futures::executor::block_on; + use super::ScanCmd; use crate::{subscription_manager::spawn_executors, utils::CallbackWaitGroup}; @@ -785,21 +900,22 @@ mod test { pool.block_on(tokio::time::timeout(d, rx)).unwrap().unwrap(); } - let pool = spawn_executors(NoopInitialScan, 1); + let pool = spawn_executors(FuncInitialScan(|_, _, _| Ok(Statistics::default())), 1); let wg = CallbackWaitGroup::new(); + let (tx, _) = tokio::sync::mpsc::channel(1); fail::cfg("execute_scan_command_sleep_100", "return").unwrap(); for _ in 0..100 { let wg = wg.clone(); assert!( - pool._pool - .block_on(pool.request(ScanCmd { - region: Default::default(), - handle: Default::default(), - last_checkpoint: Default::default(), - // Note: Maybe make here a Box or some other trait? - _work: wg.work(), - })) - .is_ok() + block_on(pool.request(ScanCmd { + region: Default::default(), + handle: Default::default(), + last_checkpoint: Default::default(), + feedback_channel: tx.clone(), + // Note: Maybe make here a Box or some other trait? + _work: wg.work(), + })) + .is_ok() ) } @@ -833,4 +949,330 @@ mod test { super::RETRY_AWAIT_MAX_DURATION ); } + + struct Suite { + rt: tokio::runtime::Runtime, + bg_tasks: Vec>, + cancel: Arc, + + events: Arc>>, + task_start_ts: TimeStamp, + handle: Option>, + regions: RegionMem, + subs: SubscriptionTracer, + } + + #[derive(Debug, Eq, PartialEq)] + enum ObserveEvent { + Start(u64), + Stop(u64), + StartResult(u64, bool), + HighMemUse(u64), + } + + impl ObserveEvent { + fn of(op: &ObserveOp) -> Option { + match op { + ObserveOp::Start { region, .. } => Some(Self::Start(region.id)), + ObserveOp::Stop { region } => Some(Self::Stop(region.id)), + ObserveOp::NotifyStartObserveResult { region, err, .. } => { + Some(Self::StartResult(region.id, err.is_none())) + } + ObserveOp::HighMemUsageWarning { + region_id: inconsistent_region_id, + } => Some(Self::HighMemUse(*inconsistent_region_id)), + + _ => None, + } + } + } + + #[derive(Clone, Default)] + struct RegionMem { + regions: Arc>>, + } + + impl RegionInfoProvider for RegionMem { + fn find_region_by_id( + &self, + region_id: u64, + callback: RegionInfoCallback>, + ) -> raftstore::coprocessor::Result<()> { + let rs = self.regions.lock().unwrap(); + let info = rs.get(®ion_id).cloned(); + drop(rs); + callback(info); + Ok(()) + } + } + + impl Suite { + fn new(init: impl InitialScan) -> Self { + let task_name = "test"; + let task_start_ts = TimeStamp::new(42); + let pool = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + let regions = RegionMem::default(); + let meta_cli = SlashEtcStore::default(); + let meta_cli = MetadataClient::new(meta_cli, 1); + let (scheduler, mut output) = dummy_scheduler(); + let subs = SubscriptionTracer::default(); + let memory_manager = Arc::new(MemoryQuota::new(1024)); + let (tx, mut rx) = tokio::sync::mpsc::channel(8); + let router = RouterInner::new(scheduler.clone(), BackupStreamConfig::default().into()); + let mut task = StreamBackupTaskInfo::new(); + task.set_name(task_name.to_owned()); + task.set_storage({ + let nop = Noop::new(); + let mut backend = StorageBackend::default(); + backend.set_noop(nop); + backend + }); + task.set_start_ts(task_start_ts.into_inner()); + let mut task_wrapped = StreamTask::default(); + task_wrapped.info = task; + pool.block_on(meta_cli.insert_task_with_range(&task_wrapped, &[(b"", b"\xFF\xFF")])) + .unwrap(); + pool.block_on(router.register_task( + task_wrapped, + vec![(vec![], vec![0xff, 0xff])], + 1024 * 1024, + )) + .unwrap(); + let subs_mgr = RegionSubscriptionManager { + regions: regions.clone(), + meta_cli, + range_router: Router(Arc::new(router)), + scheduler, + subs: subs.clone(), + failure_count: Default::default(), + memory_manager, + messenger: tx.downgrade(), + scan_pool_handle: spawn_executors(init, 2), + scans: CallbackWaitGroup::new(), + }; + let events = Arc::new(Mutex::new(vec![])); + let ob_events = Arc::clone(&events); + let (ob_tx, ob_rx) = tokio::sync::mpsc::channel(1); + let mut bg_tasks = vec![]; + bg_tasks.push(pool.spawn(async move { + while let Some(item) = rx.recv().await { + if let Some(record) = ObserveEvent::of(&item) { + ob_events.lock().unwrap().push(record); + } + ob_tx.send(item).await.unwrap(); + } + })); + let self_tx = tx.clone(); + let canceled = Arc::new(AtomicBool::new(false)); + let cancel = canceled.clone(); + bg_tasks.push(pool.spawn_blocking(move || { + loop { + match output.recv_timeout(Duration::from_millis(10)) { + Ok(Some(item)) => match item { + Task::ModifyObserve(ob) => tokio::runtime::Handle::current() + .block_on(self_tx.send(ob)) + .unwrap(), + Task::FatalError(select, err) => { + panic!( + "Background handler received fatal error {err} for {select:?}!" + ) + } + _ => {} + }, + Ok(None) => return, + Err(_) => { + if canceled.load(Ordering::SeqCst) { + return; + } + } + } + } + })); + bg_tasks.push( + pool.spawn(subs_mgr.region_operator_loop::, + >>(ob_rx, BackupStreamResolver::Nop)), + ); + + Self { + rt: pool, + events, + regions, + handle: Some(tx), + task_start_ts, + bg_tasks, + cancel, + subs, + } + } + + fn run(&self, op: ObserveOp) { + self.rt + .block_on(self.handle.as_ref().unwrap().send(op)) + .unwrap() + } + + fn start_region(&self, region: Region) { + self.regions.regions.lock().unwrap().insert( + region.id, + RegionInfo { + region: region.clone(), + role: raft::StateRole::Leader, + buckets: 0, + }, + ); + self.run(ObserveOp::Start { + region, + handle: ObserveHandle::new(), + }); + } + + fn region( + &self, + id: u64, + version: u64, + conf_ver: u64, + start_key: &[u8], + end_key: &[u8], + ) -> Region { + let mut region = Region::default(); + region.set_id(id); + region.set_region_epoch({ + let mut rp = RegionEpoch::new(); + rp.set_conf_ver(conf_ver); + rp.set_version(version); + rp + }); + region.set_start_key(start_key.to_vec()); + region.set_end_key(end_key.to_vec()); + region + } + + fn wait_shutdown(&mut self) { + drop(self.handle.take()); + self.cancel.store(true, Ordering::SeqCst); + self.rt + .block_on(futures::future::try_join_all(std::mem::take( + &mut self.bg_tasks, + ))) + .unwrap(); + } + + #[track_caller] + fn wait_initial_scan_all_finish(&self, expected_region: usize) { + info!("[TEST] Start waiting initial scanning finish."); + self.rt.block_on(async move { + let max_wait = Duration::from_secs(1); + let start = Instant::now(); + loop { + let (tx, rx) = tokio::sync::oneshot::channel(); + if start.elapsed() > max_wait { + panic!( + "wait initial scan takes too long! events = {:?}", + self.events + ); + } + self.handle + .as_ref() + .unwrap() + .send(ObserveOp::ResolveRegions { + callback: Box::new(move |result| { + let no_initial_scan = result.items.iter().all(|r| { + r.checkpoint_type != CheckpointType::StartTsOfInitialScan + }); + let all_region_done = result.items.len() == expected_region; + tx.send(no_initial_scan && all_region_done).unwrap() + }), + min_ts: self.task_start_ts.next(), + }) + .await + .unwrap(); + if rx.await.unwrap() { + info!("[TEST] Finish waiting initial scanning finish."); + return; + } + // Advance the global timer in case of someone is waiting for timer. + tokio::time::advance(Duration::from_secs(16)).await; + } + }) + } + + fn advance_ms(&self, n: u64) { + self.rt + .block_on(tokio::time::advance(Duration::from_millis(n))) + } + } + + #[test] + fn test_basic_retry() { + test_util::init_log_for_test(); + use ObserveEvent::*; + let failed = Arc::new(AtomicBool::new(false)); + let mut suite = Suite::new(FuncInitialScan(move |r, _, _| { + if r.id != 1 || failed.load(Ordering::SeqCst) { + return Ok(Statistics::default()); + } + failed.store(true, Ordering::SeqCst); + Err(Error::OutOfQuota { region_id: r.id }) + })); + let _guard = suite.rt.enter(); + tokio::time::pause(); + suite.start_region(suite.region(1, 1, 1, b"a", b"b")); + suite.start_region(suite.region(2, 1, 1, b"b", b"c")); + suite.wait_initial_scan_all_finish(2); + suite.wait_shutdown(); + assert_eq!( + &*suite.events.lock().unwrap(), + &[ + Start(1), + Start(2), + StartResult(1, false), + StartResult(2, true), + Start(1), + StartResult(1, true) + ] + ); + } + + #[test] + fn test_on_high_mem() { + let mut suite = Suite::new(FuncInitialScan(|_, _, _| Ok(Statistics::default()))); + let _guard = suite.rt.enter(); + tokio::time::pause(); + suite.start_region(suite.region(1, 1, 1, b"a", b"b")); + suite.start_region(suite.region(2, 1, 1, b"b", b"c")); + suite.advance_ms(0); + let mut rs = suite.subs.current_regions(); + rs.sort(); + assert_eq!(rs, [1, 2]); + suite.wait_initial_scan_all_finish(2); + suite.run(ObserveOp::HighMemUsageWarning { region_id: 1 }); + suite.advance_ms(0); + assert_eq!(suite.subs.current_regions(), [2]); + suite.advance_ms( + (OOM_BACKOFF_BASE + Duration::from_secs(OOM_BACKOFF_JITTER_SECS + 1)).as_millis() as _, + ); + suite.wait_initial_scan_all_finish(2); + suite.wait_shutdown(); + let mut rs = suite.subs.current_regions(); + rs.sort(); + assert_eq!(rs, [1, 2]); + + use ObserveEvent::*; + assert_eq!( + &*suite.events.lock().unwrap(), + &[ + Start(1), + Start(2), + StartResult(1, true), + StartResult(2, true), + HighMemUse(1), + Start(1), + StartResult(1, true), + ] + ); + } } diff --git a/components/backup-stream/src/subscription_track.rs b/components/backup-stream/src/subscription_track.rs index 5a6b2e0753b..8f3fe69a7ac 100644 --- a/components/backup-stream/src/subscription_track.rs +++ b/components/backup-stream/src/subscription_track.rs @@ -1,6 +1,6 @@ // Copyright 2022 TiKV Project Authors. Licensed under Apache-2.0. -use std::{collections::HashSet, sync::Arc}; +use std::{collections::HashSet, result::Result, sync::Arc}; use dashmap::{ mapref::{entry::Entry, one::RefMut as DashRefMut}, @@ -9,7 +9,11 @@ use dashmap::{ use kvproto::metapb::Region; use raftstore::coprocessor::*; use resolved_ts::{Resolver, TsSource, TxnLocks}; -use tikv_util::{info, memory::MemoryQuota, warn}; +use tikv_util::{ + info, + memory::{MemoryQuota, MemoryQuotaExceeded}, + warn, +}; use txn_types::TimeStamp; use crate::{debug, metrics::TRACK_REGION, utils}; @@ -27,7 +31,7 @@ pub struct SubscriptionTracer(Arc>); /// You may notice there are also some state transforms in the /// [`TwoPhaseResolver`] struct, states there are sub-states of the `RUNNING` /// stage here. -enum SubscribeState { +pub enum SubscribeState { // NOTE: shall we add `SubscriptionHandle` here? // (So we can check this when calling `remove_if`.) Pending(Region), @@ -205,7 +209,7 @@ impl SubscriptionTracer { handle: ObserveHandle, start_ts: Option, ) { - info!("start listen stream from store"; "observer" => ?handle); + info!("start listen stream from store"; "observer" => ?handle, utils::slog_region(region)); TRACK_REGION.inc(); let e = self.0.entry(region.id); match e { @@ -338,7 +342,7 @@ impl SubscriptionTracer { ) -> Option + '_> { self.0 .get_mut(®ion_id) - .and_then(|x| SubscriptionRef::try_from_dash(x)) + .and_then(|x| ActiveSubscriptionRef::try_from_dash(x)) } } @@ -354,7 +358,7 @@ pub trait RefMut: Ref { fn value_mut(&mut self) -> &mut ::Value; } -impl<'a> Ref for SubscriptionRef<'a> { +impl<'a> Ref for ActiveSubscriptionRef<'a> { type Key = u64; type Value = ActiveSubscription; @@ -367,15 +371,15 @@ impl<'a> Ref for SubscriptionRef<'a> { } } -impl<'a> RefMut for SubscriptionRef<'a> { +impl<'a> RefMut for ActiveSubscriptionRef<'a> { fn value_mut(&mut self) -> &mut ::Value { self.sub_mut() } } -struct SubscriptionRef<'a>(DashRefMut<'a, u64, SubscribeState>); +struct ActiveSubscriptionRef<'a>(DashRefMut<'a, u64, SubscribeState>); -impl<'a> SubscriptionRef<'a> { +impl<'a> ActiveSubscriptionRef<'a> { fn try_from_dash(mut d: DashRefMut<'a, u64, SubscribeState>) -> Option { match d.value_mut() { SubscribeState::Pending(_) => None, @@ -476,21 +480,29 @@ impl TwoPhaseResolver { self.stable_ts.is_some() } - pub fn track_phase_one_lock(&mut self, start_ts: TimeStamp, key: Vec) { + pub fn track_phase_one_lock( + &mut self, + start_ts: TimeStamp, + key: Vec, + ) -> Result<(), MemoryQuotaExceeded> { if !self.in_phase_one() { warn!("backup stream tracking lock as if in phase one"; "start_ts" => %start_ts, "key" => %utils::redact(&key)) } - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - self.resolver.track_lock(start_ts, key, None).unwrap(); + self.resolver.track_lock(start_ts, key, None)?; + Ok(()) } - pub fn track_lock(&mut self, start_ts: TimeStamp, key: Vec) { + pub fn track_lock( + &mut self, + start_ts: TimeStamp, + key: Vec, + ) -> Result<(), MemoryQuotaExceeded> { if self.in_phase_one() { self.future_locks.push(FutureLock::Lock(key, start_ts)); - return; + return Ok(()); } - // TODO: handle memory quota exceed, for now, quota is set to usize::MAX. - self.resolver.track_lock(start_ts, key, None).unwrap(); + self.resolver.track_lock(start_ts, key, None)?; + Ok(()) } pub fn untrack_lock(&mut self, key: &[u8]) { @@ -584,13 +596,13 @@ mod test { let key = b"somewhere_over_the_rainbow"; let ts = TimeStamp::new; let mut r = TwoPhaseResolver::new(42, Some(ts(42))); - r.track_phase_one_lock(ts(48), key.to_vec()); + r.track_phase_one_lock(ts(48), key.to_vec()).unwrap(); // When still in phase one, the resolver should not be advanced. r.untrack_lock(&key[..]); assert_eq!(r.resolve(ts(50)), ts(42)); // Even new lock tracked... - r.track_lock(ts(52), key.to_vec()); + r.track_lock(ts(52), key.to_vec()).unwrap(); r.untrack_lock(&key[..]); assert_eq!(r.resolve(ts(53)), ts(42)); @@ -599,7 +611,7 @@ mod test { assert_eq!(r.resolve(ts(54)), ts(54)); // It should be able to track incremental locks. - r.track_lock(ts(55), key.to_vec()); + r.track_lock(ts(55), key.to_vec()).unwrap(); assert_eq!(r.resolve(ts(56)), ts(55)); r.untrack_lock(&key[..]); assert_eq!(r.resolve(ts(57)), ts(57)); @@ -655,7 +667,8 @@ mod test { region4_sub .value_mut() .resolver - .track_lock(TimeStamp::new(128), b"Alpi".to_vec()); + .track_lock(TimeStamp::new(128), b"Alpi".to_vec()) + .unwrap(); subs.register_region(®ion(5, 8, 1), ObserveHandle::new(), None); subs.deregister_region_if(®ion(5, 8, 1), |_, _| true); drop(region4_sub); diff --git a/components/backup-stream/src/utils.rs b/components/backup-stream/src/utils.rs index 33e6ba044c3..7606004786e 100644 --- a/components/backup-stream/src/utils.rs +++ b/components/backup-stream/src/utils.rs @@ -280,6 +280,7 @@ pub fn request_to_triple(mut req: Request) -> Either<(Vec, Vec, CfName), /// `try_send!(s: Scheduler, task: T)` tries to send a task to the scheduler, /// once meet an error, would report it, with the current file and line (so it /// is made as a macro). returns whether it success. +// Note: perhaps we'd better using std::panic::Location. #[macro_export] macro_rules! try_send { ($s:expr, $task:expr) => { diff --git a/components/backup-stream/tests/failpoints/mod.rs b/components/backup-stream/tests/failpoints/mod.rs index 35f40c10574..8d357ed2073 100644 --- a/components/backup-stream/tests/failpoints/mod.rs +++ b/components/backup-stream/tests/failpoints/mod.rs @@ -25,6 +25,7 @@ mod all { GetCheckpointResult, RegionCheckpointOperation, RegionSet, Task, }; use futures::executor::block_on; + use raftstore::coprocessor::ObserveHandle; use tikv_util::{config::ReadableSize, defer}; use super::{ @@ -107,9 +108,11 @@ mod all { suite.run(|| { Task::ModifyObserve(backup_stream::ObserveOp::Start { region: suite.cluster.get_region(&make_record_key(1, 886)), + handle: ObserveHandle::new(), }) }); fail::cfg("scan_after_get_snapshot", "off").unwrap(); + std::thread::sleep(Duration::from_secs(1)); suite.force_flush_files("frequent_initial_scan"); suite.wait_for_flush(); std::thread::sleep(Duration::from_secs(1)); diff --git a/components/error_code/src/backup_stream.rs b/components/error_code/src/backup_stream.rs index 78cb544746d..c2135becaa3 100644 --- a/components/error_code/src/backup_stream.rs +++ b/components/error_code/src/backup_stream.rs @@ -11,6 +11,10 @@ define_error_codes! { "A task not found.", "Please check the spell of your task name." ), + OUT_OF_QUOTA => ("OutOfQuota", + "Some of quota has been exceed, hence the task cannot continue.", + "For memory quotas, please check whether there are huge transactions. You may also increase the quota by modifying config." + ), OBSERVE_CANCELED => ( "ObserveCancel", "When doing initial scanning, the observe of that region has been canceled", diff --git a/components/tikv_util/src/memory.rs b/components/tikv_util/src/memory.rs index 259a44e5614..3a0e146d98e 100644 --- a/components/tikv_util/src/memory.rs +++ b/components/tikv_util/src/memory.rs @@ -108,6 +108,10 @@ impl OwnedAllocated { self.allocated += bytes; Ok(()) } + + pub fn source(&self) -> &MemoryQuota { + &self.from + } } impl Drop for OwnedAllocated { @@ -128,6 +132,12 @@ impl MemoryQuota { self.in_use.load(Ordering::Relaxed) } + /// Returns a floating number between [0, 1] presents the current memory + /// status. + pub fn used_ratio(&self) -> f64 { + self.in_use() as f64 / self.capacity() as f64 + } + pub fn capacity(&self) -> usize { self.capacity.load(Ordering::Relaxed) } From 866eda664e45f2bea1977a2c10ed50c0e6aa74c3 Mon Sep 17 00:00:00 2001 From: lucasliang Date: Tue, 23 Jan 2024 14:07:49 +0800 Subject: [PATCH 1128/1149] raftstore: address the corner case on WakeUp hibernate regions. (#16408) close tikv/tikv#16368 This pull request addresses a corner case where `WakeUp` messages were being ignored during I/O hang scenarios. Signed-off-by: lucasliang --- Cargo.lock | 8 +-- components/raftstore/src/store/fsm/peer.rs | 28 ++++++----- deny.toml | 4 ++ tests/failpoints/cases/test_hibernate.rs | 57 +++++++++++++++++++++- 4 files changed, 81 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 927570ac42c..aa3daec32c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -615,7 +615,7 @@ dependencies = [ "quote", "regex", "rustc-hash", - "shlex 1.1.0", + "shlex 1.3.0", "which", ] @@ -636,7 +636,7 @@ dependencies = [ "quote", "regex", "rustc-hash", - "shlex 1.1.0", + "shlex 1.3.0", "syn 2.0.43", ] @@ -5271,9 +5271,9 @@ checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" [[package]] name = "shlex" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook" diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index ad00a0aa887..5dac5d9d488 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -2180,6 +2180,11 @@ where self.fsm.hibernate_state.group_state() == GroupState::Idle, |_| {} ); + fail_point!( + "on_raft_base_tick_chaos", + self.fsm.hibernate_state.group_state() == GroupState::Chaos, + |_| {} + ); if self.fsm.peer.pending_remove { self.fsm.peer.mut_store().flush_entry_cache_metrics(); @@ -2864,18 +2869,19 @@ where fn on_extra_message(&mut self, mut msg: RaftMessage) { match msg.get_extra_msg().get_type() { ExtraMessageType::MsgRegionWakeUp | ExtraMessageType::MsgCheckStalePeer => { - if self.fsm.hibernate_state.group_state() == GroupState::Idle { - if msg.get_extra_msg().forcely_awaken { - // Forcely awaken this region by manually setting this GroupState - // into Chaos to trigger a new voting in this RaftGroup. - self.reset_raft_tick(if !self.fsm.peer.is_leader() { - GroupState::Chaos - } else { - GroupState::Ordered - }); + if msg.get_extra_msg().forcely_awaken { + // Forcely awaken this region by manually setting the GroupState + // into `Chaos` to trigger a new voting in the Raft Group. + // Meanwhile, it avoids the peer entering the `PreChaos` state, + // which would wait for another long tick to enter the `Chaos` state. + self.reset_raft_tick(if !self.fsm.peer.is_leader() { + GroupState::Chaos } else { - self.reset_raft_tick(GroupState::Ordered); - } + GroupState::Ordered + }); + } + if self.fsm.hibernate_state.group_state() == GroupState::Idle { + self.reset_raft_tick(GroupState::Ordered); } if msg.get_extra_msg().get_type() == ExtraMessageType::MsgRegionWakeUp && self.fsm.peer.is_leader() diff --git a/deny.toml b/deny.toml index 209ebd2fe6f..ee4099d1370 100644 --- a/deny.toml +++ b/deny.toml @@ -68,6 +68,10 @@ ignore = [ # # TODO: Upgrade clap to v4.x. "RUSTSEC-2021-0145", + # Ignore RUSTSEC-2024-0006 as it only included by "rusoto_credential" crate. + # + # TODO: Upgrade shlex@0.1.1 to v1.3.x. + "RUSTSEC-2024-0006", ] # TiKV is licensed under Apache 2.0, according to ASF 3RD PARTY LICENSE POLICY, diff --git a/tests/failpoints/cases/test_hibernate.rs b/tests/failpoints/cases/test_hibernate.rs index d2eb9aa10dd..b3c8714931b 100644 --- a/tests/failpoints/cases/test_hibernate.rs +++ b/tests/failpoints/cases/test_hibernate.rs @@ -6,7 +6,7 @@ use std::{ time::Duration, }; -use kvproto::raft_serverpb::RaftMessage; +use kvproto::raft_serverpb::{ExtraMessage, ExtraMessageType, RaftMessage}; use raft::eraftpb::MessageType; use raftstore::store::{PeerMsg, PeerTick}; use test_raftstore::*; @@ -82,6 +82,61 @@ fn test_break_leadership_on_restart() { rx.recv_timeout(Duration::from_secs(2)).unwrap_err(); } +#[test] +fn test_forcely_awaken_hibenrate_regions() { + let mut cluster = new_node_cluster(0, 3); + let base_tick_ms = 50; + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(base_tick_ms); + cluster.cfg.raft_store.raft_heartbeat_ticks = 2; + cluster.cfg.raft_store.raft_election_timeout_ticks = 10; + // So the random election timeout will always be 10, which makes the case more + // stable. + cluster.cfg.raft_store.raft_min_election_timeout_ticks = 10; + cluster.cfg.raft_store.raft_max_election_timeout_ticks = 11; + configure_for_hibernate(&mut cluster.cfg); + cluster.pd_client.disable_default_operator(); + let r = cluster.run_conf_change(); + cluster.pd_client.must_add_peer(r, new_peer(2, 2)); + cluster.pd_client.must_add_peer(r, new_peer(3, 3)); + + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + // Wait until all peers of region 1 hibernate. + thread::sleep(Duration::from_millis(base_tick_ms * 30)); + + // Firstly, send `CheckPeerStaleState` message to trigger the check. + let router = cluster.sim.rl().get_router(3).unwrap(); + router + .send(1, PeerMsg::Tick(PeerTick::CheckPeerStaleState)) + .unwrap(); + + // Secondly, forcely send `MsgRegionWakeUp` message for awakening hibernated + // regions. + let (tx, rx) = mpsc::sync_channel(128); + fail::cfg_callback("on_raft_base_tick_chaos", move || { + tx.send(base_tick_ms).unwrap() + }) + .unwrap(); + let mut message = RaftMessage::default(); + message.region_id = 1; + message.set_from_peer(new_peer(3, 3)); + message.set_to_peer(new_peer(3, 3)); + message.mut_region_epoch().version = 1; + message.mut_region_epoch().conf_ver = 3; + let mut msg = ExtraMessage::default(); + msg.set_type(ExtraMessageType::MsgRegionWakeUp); + msg.forcely_awaken = true; + message.set_extra_msg(msg); + router.send_raft_message(message).unwrap(); + assert_eq!( + rx.recv_timeout(Duration::from_secs(1)).unwrap(), + base_tick_ms + ); + fail::remove("on_raft_base_tick_chaos"); +} + // This case creates a cluster with 3 TiKV instances, and then wait all peers // hibernate. // From fe15ec27a24d3b8fe6651d088ae2b2101c332391 Mon Sep 17 00:00:00 2001 From: Connor Date: Wed, 24 Jan 2024 12:40:50 +0800 Subject: [PATCH 1129/1149] grafana: Refine the order of grafana dashboard to localize related panels (#16432) ref tikv/tikv#15990 Refine the order of grafana dashboard to localize related panels Signed-off-by: Connor1996 --- metrics/grafana/tikv_details.dashboard.py | 45 +- metrics/grafana/tikv_details.json | 27096 ++++++++++---------- metrics/grafana/tikv_details.json.sha256 | 2 +- 3 files changed, 13577 insertions(+), 13566 deletions(-) diff --git a/metrics/grafana/tikv_details.dashboard.py b/metrics/grafana/tikv_details.dashboard.py index c10177be29f..81b12dc28cb 100644 --- a/metrics/grafana/tikv_details.dashboard.py +++ b/metrics/grafana/tikv_details.dashboard.py @@ -8638,15 +8638,19 @@ def StatusServer() -> RowPanel: editable=True, templating=Templates(), panels=[ + # Overview Duration(), Cluster(), Errors(), Server(), + # Entrance of Write and Read gRPC(), + Storage(), + LocalReader(), + # CPU and IO ThreadCPU(), - TTL(), - PD(), IOBreakdown(), + # Raftstore RaftWaterfall(), RaftIO(), RaftPropose(), @@ -8654,30 +8658,37 @@ def StatusServer() -> RowPanel: RaftMessage(), RaftAdmin(), RaftLog(), - LocalReader(), - UnifiedReadPool(), - Storage(), + # Engine + RaftEngine(), + RocksDB(), + Titan(), + # Scheduler and Read Pools FlowControl(), - SchedulerCommands(), Scheduler(), - GC(), - Snapshot(), - Task(), + SchedulerCommands(), CoprocessorOverview(), CoprocessorDetail(), - Threads(), - RocksDB(), - RaftEngine(), - Titan(), + UnifiedReadPool(), + # Transaction + GC(), PessimisticLocking(), - PointInTimeRestore(), + # Background Tasks + Task(), + PD(), + SlowTrendStatistics(), + Snapshot(), + # Tools ResolvedTS(), - Memory(), + PointInTimeRestore(), BackupImport(), - Encryption(), BackupLog(), - SlowTrendStatistics(), + # Advanced Debugging for CPU and Memory + Threads(), + Memory(), + # Infrequently Used StatusServer(), + Encryption(), + TTL(), ], # Set 14 or larger to support shared crosshair or shared tooltip. # See https://github.com/grafana/grafana/blob/v10.2.2/public/app/features/dashboard/state/DashboardMigrator.ts#L443-L445 diff --git a/metrics/grafana/tikv_details.json b/metrics/grafana/tikv_details.json index 0ebd7fdbff0..45cc7c23431 100644 --- a/metrics/grafana/tikv_details.json +++ b/metrics/grafana/tikv_details.json @@ -7488,7 +7488,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of raftstore thread", + "description": "The total count of different kinds of commands received", "editable": true, "error": false, "fieldConfig": { @@ -7558,15 +7558,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -7575,7 +7575,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft store CPU", + "title": "Storage command total", "tooltip": { "msResolution": true, "shared": true, @@ -7594,9 +7594,9 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "ops", "label": null, - "logBase": 1, + "logBase": 10, "max": null, "min": null, "show": true @@ -7621,7 +7621,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of async apply", + "description": "The total number of engine asynchronous request errors", "editable": true, "error": false, "fieldConfig": { @@ -7691,15 +7691,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{status}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", "refId": "", "step": 10, "target": "" @@ -7708,7 +7708,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Async apply CPU", + "title": "Storage async request error", "tooltip": { "msResolution": true, "shared": true, @@ -7727,7 +7727,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -7750,162 +7750,115 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of store writer thread", + "description": "The time consumed by processing asynchronous write requests", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ] + "steps": [] } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, "id": 60, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ], "timeFrom": null, "timeShift": null, - "title": "Store writer CPU", + "title": "Storage async write duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of gRPC", + "description": "The storage async write duration", "editable": true, "error": false, "fieldConfig": { @@ -7968,22 +7921,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -7992,7 +8013,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "gRPC poll CPU", + "title": "Storage async write duration", "tooltip": { "msResolution": true, "shared": true, @@ -8011,7 +8032,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8034,178 +8055,122 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of scheduler worker", + "description": "The time consumed by processing asynchronous snapshot requests", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3.6, - "yaxis": "left" - } - ] + "steps": [] } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, "id": 62, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3.6, - "yaxis": "left" - } - ], "timeFrom": null, "timeShift": null, - "title": "Scheduler worker CPU", + "title": "Storage async snapshot duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of readpool", + "description": "The storage async snapshot duration", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3.6, - "yaxis": "left" - } - ] + "steps": [] } } }, @@ -8261,70 +8226,99 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-normal", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-high", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-low", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [ + }, { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3.6, - "yaxis": "left" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage ReadPool CPU", + "title": "Storage async snapshot duration", "tooltip": { "msResolution": true, "shared": true, @@ -8343,7 +8337,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8366,162 +8360,115 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of the unified read pool", + "description": "The storage async snapshot duration without the involving of raftstore", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 7.2, - "yaxis": "left" - } - ] + "steps": [] } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 21 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, "id": 64, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 7.2, - "yaxis": "left" - } - ], "timeFrom": null, "timeShift": null, - "title": "Unified read pool CPU", + "title": "Storage async snapshot duration (pure local read)", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of RocksDB", + "description": "The storage async snapshot duration without the involving of raftstore", "editable": true, "error": false, "fieldConfig": { @@ -8584,22 +8531,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -8608,7 +8623,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "RocksDB CPU", + "title": "Storage async snapshot duration (pure local read)", "tooltip": { "msResolution": true, "shared": true, @@ -8627,7 +8642,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8649,28 +8664,123 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "Read index propose wait duration associated with async snapshot", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 66, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Read index propose wait duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of coprocessor", + "description": "Read index propose wait duration associated with async snapshot", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 7.2, - "yaxis": "left" - } - ] + "steps": [] } } }, @@ -8685,12 +8795,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, + "x": 12, "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 66, + "id": 67, "interval": null, "isNew": true, "legend": { @@ -8726,70 +8836,99 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-normal", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-high", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-low", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [ + }, { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 7.2, - "yaxis": "left" + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Coprocessor CPU", + "title": "Read index propose wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -8808,7 +8947,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8831,11 +8970,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Read index confirm duration associated with async snapshot", "editable": true, "error": false, "fieldConfig": { @@ -8846,129 +8997,88 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 28 + "x": 0, + "y": 35 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 67, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 68, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC worker CPU", + "title": "Read index confirm duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Read index confirm duration associated with async snapshot", "editable": true, "error": false, "fieldConfig": { @@ -8990,12 +9100,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, + "x": 12, "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 68, + "id": 69, "interval": null, "isNew": true, "legend": { @@ -9031,155 +9141,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "seriesOverrides": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Background Worker CPU", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 }, { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 } ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 35 - }, - "height": null, - "hideTimeOverride": false, - "id": 69, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -9188,7 +9233,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raftlog fetch Worker CPU", + "title": "Read index confirm duration", "tooltip": { "msResolution": true, "shared": true, @@ -9207,7 +9252,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9234,7 +9279,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "CPU usage measured over a 30 second window", "editable": true, "error": false, "fieldConfig": { @@ -9304,7 +9349,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -9312,7 +9357,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -9321,7 +9366,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import CPU", + "title": "Process Stat Cpu Usage", "tooltip": { "msResolution": true, "shared": true, @@ -9367,7 +9412,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -9430,22 +9475,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -9454,7 +9567,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup CPU", + "title": "Full compaction duration seconds", "tooltip": { "msResolution": true, "shared": true, @@ -9473,7 +9586,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9500,7 +9613,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -9563,52 +9676,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-worker", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-tso", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-endpoint", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -9617,7 +9768,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "CDC worker CPU", + "title": "Full compaction pause duration", "tooltip": { "msResolution": true, "shared": true, @@ -9636,7 +9787,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9663,7 +9814,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of raftstore thread", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -9726,22 +9877,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -9750,7 +9969,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TSO Worker CPU", + "title": "Full compaction per-increment duration", "tooltip": { "msResolution": true, "shared": true, @@ -9769,7 +9988,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9798,7 +10017,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Thread CPU", + "title": "Storage", "transformations": [], "transparent": false, "type": "row" @@ -9859,7 +10078,7 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, "y": 0 }, @@ -9901,22 +10120,62 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*-total/", + "bars": false, + "fill": 1, + "fillBelowTo": null, + "lines": true, + "yaxis": 2, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-reject-by-{{reason}}", "metric": "", - "query": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-total", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-stale-read", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -9925,7 +10184,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TTL expire count", + "title": "Local reader requests", "tooltip": { "msResolution": true, "shared": true, @@ -9965,13 +10224,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Local Reader", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 76, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The CPU utilization of raftstore thread", "editable": true, "error": false, "fieldConfig": { @@ -9993,12 +10294,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 76, + "id": 77, "interval": null, "isNew": true, "legend": { @@ -10041,7 +10342,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -10049,7 +10350,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(raftstore|rs)_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -10058,7 +10359,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TTL expire size", + "title": "Raft store CPU", "tooltip": { "msResolution": true, "shared": true, @@ -10077,7 +10378,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -10104,7 +10405,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The CPU utilization of async apply", "editable": true, "error": false, "fieldConfig": { @@ -10126,12 +10427,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 77, + "id": 78, "interval": null, "isNew": true, "legend": { @@ -10174,7 +10475,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -10182,7 +10483,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"apply_[0-9]+\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -10191,7 +10492,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TTL check progress", + "title": "Async apply CPU", "tooltip": { "msResolution": true, "shared": true, @@ -10237,14 +10538,23 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The CPU utilization of store writer thread", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ] } } }, @@ -10259,12 +10569,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 78, + "id": 79, "interval": null, "isNew": true, "legend": { @@ -10307,24 +10617,33 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_write.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ], "timeFrom": null, "timeShift": null, - "title": "TTL checker actions", + "title": "Store writer CPU", "tooltip": { "msResolution": true, "shared": true, @@ -10343,7 +10662,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -10370,7 +10689,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing GC tasks", + "description": "The CPU utilization of gRPC", "editable": true, "error": false, "fieldConfig": { @@ -10392,12 +10711,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 79, + "id": 80, "interval": null, "isNew": true, "legend": { @@ -10433,90 +10752,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"grpc.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -10525,7 +10776,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TTL checker compact duration", + "title": "gRPC poll CPU", "tooltip": { "msResolution": true, "shared": true, @@ -10544,7 +10795,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -10566,136 +10817,28 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": null, - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": "" - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 14 - }, - "height": null, - "hideTimeOverride": false, - "id": 80, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": null, - "metric": "", - "query": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "TTL checker poll interval", - "transformations": [], - "transparent": false, - "type": "stat" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "TTL", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 81, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of requests that TiKV sends to PD", + "description": "The CPU utilization of scheduler worker", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ] } } }, @@ -10711,11 +10854,11 @@ "h": 7, "w": 12, "x": 0, - "y": 0 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 82, + "id": 81, "interval": null, "isNew": true, "legend": { @@ -10758,24 +10901,33 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ], "timeFrom": null, "timeShift": null, - "title": "PD requests", + "title": "Scheduler worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -10794,7 +10946,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -10821,14 +10973,23 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by requests that TiKV sends to PD", + "description": "The CPU utilization of readpool", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ] } } }, @@ -10844,11 +11005,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 83, + "id": 82, "interval": null, "isNew": true, "legend": { @@ -10891,24 +11052,63 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}-normal", "metric": "", - "query": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_norm.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-high", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-low", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"store_read_low.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.6, + "yaxis": "left" + } + ], "timeFrom": null, "timeShift": null, - "title": "PD request duration (average)", + "title": "Storage ReadPool CPU", "tooltip": { "msResolution": true, "shared": true, @@ -10927,7 +11127,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -10954,14 +11154,23 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total number of PD heartbeat messages", + "description": "The CPU utilization of the unified read pool", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ] } } }, @@ -10977,11 +11186,11 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 84, + "id": 83, "interval": null, "isNew": true, "legend": { @@ -11024,39 +11233,33 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified_read_po.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" - }, + } + ], + "thresholds": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-pending", - "metric": "", - "query": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "PD heartbeats", + "title": "Unified read pool CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11075,7 +11278,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11102,7 +11305,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total number of peers validated by the PD worker", + "description": "The CPU utilization of RocksDB", "editable": true, "error": false, "fieldConfig": { @@ -11125,11 +11328,11 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 85, + "id": 84, "interval": null, "isNew": true, "legend": { @@ -11172,15 +11375,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"rocksdb.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -11189,7 +11392,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "PD validate peers", + "title": "RocksDB CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11208,7 +11411,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11235,14 +11438,23 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of reconnection between TiKV and PD", + "description": "The CPU utilization of coprocessor", "editable": true, "error": false, "fieldConfig": { "defaults": { "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ] } } }, @@ -11258,11 +11470,11 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 86, + "id": 85, "interval": null, "isNew": true, "legend": { @@ -11305,24 +11517,63 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}-normal", "metric": "", - "query": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_normal.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-high", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_high.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-low", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cop_low.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 7.2, + "yaxis": "left" + } + ], "timeFrom": null, "timeShift": null, - "title": "PD reconnection", + "title": "Coprocessor CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11341,7 +11592,7 @@ "yaxes": [ { "decimals": null, - "format": "opm", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11368,7 +11619,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The forward status of PD client", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -11391,11 +11642,11 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 87, + "id": 86, "interval": null, "isNew": true, "legend": { @@ -11438,15 +11689,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{host}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"gc_worker.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -11455,7 +11706,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "PD forward status", + "title": "GC worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11474,7 +11725,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11501,7 +11752,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of TSO requests waiting in the queue.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -11524,11 +11775,11 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 88, + "id": 87, "interval": null, "isNew": true, "legend": { @@ -11571,7 +11822,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -11579,7 +11830,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"background.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -11588,7 +11839,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Pending TSO Requests", + "title": "Background Worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11607,7 +11858,7 @@ "yaxes": [ { "decimals": null, - "format": "opm", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11634,7 +11885,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The slow score of stores", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -11657,11 +11908,11 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 89, + "id": 88, "interval": null, "isNew": true, "legend": { @@ -11704,7 +11955,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -11712,7 +11963,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftlog_fetch.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -11721,7 +11972,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store Slow Score", + "title": "Raftlog fetch Worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11740,7 +11991,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11767,7 +12018,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration that recorded by inspecting messages.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -11788,13 +12039,13 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, - "y": 28 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 90, + "id": 89, "interval": null, "isNew": true, "legend": { @@ -11837,15 +12088,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -11854,7 +12105,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Inspected duration per server", + "title": "Import CPU", "tooltip": { "msResolution": true, "shared": true, @@ -11873,7 +12124,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -11894,55 +12145,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "PD", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 91, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The throughput of disk write per IO type", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -11964,12 +12173,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 92, + "id": 90, "interval": null, "isNew": true, "legend": { @@ -12012,30 +12221,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "metric": "", - "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"(backup-worker|bkwkr|backup_endpoint).*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -12044,7 +12238,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write IO bytes", + "title": "Backup CPU", "tooltip": { "msResolution": true, "shared": true, @@ -12063,7 +12257,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -12090,7 +12284,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The throughput of disk read per IO type", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -12112,12 +12306,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 93, + "id": 91, "interval": null, "isNew": true, "legend": { @@ -12160,30 +12354,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}-worker", "metric": "", - "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdcwkr.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total", + "legendFormat": "{{instance}}-tso", "metric": "", - "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-endpoint", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"cdc_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -12192,7 +12401,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read IO bytes", + "title": "CDC worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -12211,7 +12420,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -12238,7 +12447,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The threshold of disk IOs per priority", + "description": "The CPU utilization of raftstore thread", "editable": true, "error": false, "fieldConfig": { @@ -12260,12 +12469,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 94, + "id": 92, "interval": null, "isNew": true, "legend": { @@ -12308,15 +12517,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"tso_worker\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -12325,7 +12534,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "IO threshold", + "title": "TSO Worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -12344,7 +12553,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -12365,13 +12574,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread CPU", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 93, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "IO rate limiter request wait duration.", + "description": "The throughput of disk write per IO type", "editable": true, "error": false, "fieldConfig": { @@ -12393,12 +12644,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 95, + "id": 94, "interval": null, "isNew": true, "legend": { @@ -12441,30 +12692,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-99%", + "legendFormat": "{{type}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "total", "metric": "", - "query": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"write\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -12473,7 +12724,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Rate Limiter Request Wait Duration", + "title": "Write IO bytes", "tooltip": { "msResolution": true, "shared": true, @@ -12492,7 +12743,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -12513,55 +12764,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "IO Breakdown", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 96, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous write requests", + "description": "The throughput of disk read per IO type", "editable": true, "error": false, "fieldConfig": { @@ -12582,13 +12791,13 @@ }, "gridPos": { "h": 7, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 97, + "id": 95, "interval": null, "isNew": true, "legend": { @@ -12624,90 +12833,37 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{type}}", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "total", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_io_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=\"read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -12716,7 +12872,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async write duration", + "title": "Read IO bytes", "tooltip": { "msResolution": true, "shared": true, @@ -12735,7 +12891,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -12744,7 +12900,7 @@ }, { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -12762,7 +12918,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The store time duration of each request", + "description": "The threshold of disk IOs per priority", "editable": true, "error": false, "fieldConfig": { @@ -12789,7 +12945,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 98, + "id": 96, "interval": null, "isNew": true, "legend": { @@ -12825,90 +12981,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_rate_limiter_max_bytes_per_sec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -12917,7 +13005,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store duration", + "title": "IO threshold", "tooltip": { "msResolution": true, "shared": true, @@ -12936,7 +13024,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -12963,7 +13051,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The apply time duration of each request", + "description": "IO rate limiter request wait duration.", "editable": true, "error": false, "fieldConfig": { @@ -12990,7 +13078,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 99, + "id": 97, "interval": null, "isNew": true, "legend": { @@ -13026,67 +13114,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "{{type}}-99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -13094,22 +13144,7 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", - "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_rate_limiter_request_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" @@ -13118,7 +13153,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Apply duration", + "title": "Rate Limiter Request Wait Duration", "tooltip": { "msResolution": true, "shared": true, @@ -13158,13 +13193,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "IO Breakdown", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 98, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The propose wait time duration of each request", + "description": "The time consumed by processing asynchronous write requests", "editable": true, "error": false, "fieldConfig": { @@ -13185,13 +13262,13 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 14 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 100, + "id": 99, "interval": null, "isNew": true, "legend": { @@ -13257,7 +13334,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13265,14 +13342,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13280,14 +13357,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -13295,14 +13372,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13310,7 +13387,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -13319,7 +13396,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store propose wait duration", + "title": "Storage async write duration", "tooltip": { "msResolution": true, "shared": true, @@ -13347,7 +13424,7 @@ }, { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -13365,7 +13442,208 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The batch wait time duration of each request", + "description": "The store time duration of each request", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 100, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_store_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_store_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Store duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The apply time duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -13388,7 +13666,7 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 7 }, "height": null, "hideTimeOverride": false, @@ -13458,7 +13736,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13466,14 +13744,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13481,14 +13759,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -13496,14 +13774,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_apply_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13511,7 +13789,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_apply_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -13520,7 +13798,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store batch wait duration", + "title": "Apply duration", "tooltip": { "msResolution": true, "shared": true, @@ -13566,7 +13844,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The send-to-write-queue time duration of each request", + "description": "The propose wait time duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -13589,7 +13867,7 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 14 }, "height": null, "hideTimeOverride": false, @@ -13659,7 +13937,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13667,14 +13945,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13682,14 +13960,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -13697,14 +13975,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13712,7 +13990,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -13721,7 +13999,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store send to write queue duration", + "title": "Store propose wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -13767,7 +14045,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The send raft message of the proposal duration of each request", + "description": "The batch wait time duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -13790,7 +14068,7 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 14 }, "height": null, "hideTimeOverride": false, @@ -13860,7 +14138,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13868,14 +14146,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13883,14 +14161,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -13898,14 +14176,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -13913,7 +14191,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_batch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -13922,7 +14200,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store send proposal duration", + "title": "Store batch wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -13968,7 +14246,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The write kv db end duration of each request", + "description": "The send-to-write-queue time duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -13991,7 +14269,7 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 21 }, "height": null, "hideTimeOverride": false, @@ -14061,7 +14339,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14069,14 +14347,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14084,14 +14362,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -14099,14 +14377,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14114,7 +14392,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_send_to_queue_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -14123,7 +14401,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store write kv db end duration", + "title": "Store send to write queue duration", "tooltip": { "msResolution": true, "shared": true, @@ -14169,7 +14447,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The before write time duration of each request", + "description": "The send raft message of the proposal duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -14192,7 +14470,7 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 21 }, "height": null, "hideTimeOverride": false, @@ -14262,7 +14540,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14270,14 +14548,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14285,14 +14563,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -14300,14 +14578,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14315,7 +14593,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_send_proposal_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -14324,7 +14602,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store before write duration", + "title": "Store send proposal duration", "tooltip": { "msResolution": true, "shared": true, @@ -14370,7 +14648,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The persist duration of each request", + "description": "The write kv db end duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -14393,7 +14671,7 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 28 }, "height": null, "hideTimeOverride": false, @@ -14463,7 +14741,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14471,14 +14749,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14486,14 +14764,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -14501,14 +14779,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14516,7 +14794,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_kvdb_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -14525,7 +14803,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store persist duration", + "title": "Store write kv db end duration", "tooltip": { "msResolution": true, "shared": true, @@ -14571,7 +14849,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The write end duration of each request", + "description": "The before write time duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -14594,7 +14872,7 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 28 }, "height": null, "hideTimeOverride": false, @@ -14664,7 +14942,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14672,14 +14950,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14687,14 +14965,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -14702,14 +14980,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14717,7 +14995,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_before_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -14726,7 +15004,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store write end duration", + "title": "Store before write duration", "tooltip": { "msResolution": true, "shared": true, @@ -14772,7 +15050,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The commit but not persist duration of each request", + "description": "The persist duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -14795,7 +15073,7 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -14865,7 +15143,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14873,14 +15151,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14888,14 +15166,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -14903,14 +15181,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -14918,7 +15196,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_persist_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -14927,7 +15205,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store commit but not persist duration", + "title": "Store persist duration", "tooltip": { "msResolution": true, "shared": true, @@ -14973,7 +15251,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The commit and persist duration of each request", + "description": "The write end duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -14996,7 +15274,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -15066,7 +15344,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15074,14 +15352,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15089,14 +15367,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -15104,14 +15382,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, @@ -15119,7 +15397,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_write_end_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -15128,7 +15406,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store commit and persist duration", + "title": "Store write end duration", "tooltip": { "msResolution": true, "shared": true, @@ -15168,159 +15446,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Raft Waterfall", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 110, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for peer processes to be ready in Raft", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 0 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 111, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process ready duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for peer processes to be ready in Raft", + "description": "The commit but not persist duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -15342,12 +15474,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 112, + "id": 110, "interval": null, "isNew": true, "legend": { @@ -15413,60 +15545,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%-{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{instance}}", + "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{instance}}", + "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count-{{instance}}", + "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_commit_not_persist_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -15475,7 +15607,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Process ready duration per server", + "title": "Store commit but not persist duration", "tooltip": { "msResolution": true, "shared": true, @@ -15516,116 +15648,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time duration of store write loop when store-io-pool-size is not zero.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 7 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 113, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Store write loop duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", + "description": "The commit and persist duration of each request", "editable": true, "error": false, "fieldConfig": { @@ -15648,11 +15676,11 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 114, + "id": 111, "interval": null, "isNew": true, "legend": { @@ -15718,60 +15746,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%-{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{instance}}", + "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{instance}}", + "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count-{{instance}}", + "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_store_wf_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -15780,7 +15808,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Store write loop duration per server", + "title": "Store commit and persist duration", "tooltip": { "msResolution": true, "shared": true, @@ -15820,7 +15848,49 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Raft Waterfall", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 112, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "cacheTimeout": null, "cards": { @@ -15838,7 +15908,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft appends log", + "description": "The time consumed for peer processes to be ready in Raft", "editable": true, "error": false, "fieldConfig": { @@ -15853,14 +15923,14 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 0 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 115, + "id": 113, "interval": null, "legend": { "show": false @@ -15877,7 +15947,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -15885,7 +15955,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -15893,7 +15963,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Append log duration", + "title": "Process ready duration", "tooltip": { "msResolution": true, "shared": true, @@ -15930,7 +16000,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft appends log on each TiKV instance", + "description": "The time consumed for peer processes to be ready in Raft", "editable": true, "error": false, "fieldConfig": { @@ -15953,11 +16023,11 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 116, + "id": 114, "interval": null, "isNew": true, "legend": { @@ -16023,7 +16093,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16031,14 +16101,14 @@ "intervalFactor": 1, "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16046,14 +16116,14 @@ "intervalFactor": 1, "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_raft_process_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": true, "instant": false, @@ -16061,14 +16131,14 @@ "intervalFactor": 1, "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_raft_process_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, @@ -16076,7 +16146,7 @@ "intervalFactor": 1, "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_raft_process_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"ready\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -16085,7 +16155,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Append log duration per server", + "title": "99% Process ready duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -16143,7 +16213,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft commits log", + "description": "The time duration of store write loop when store-io-pool-size is not zero.", "editable": true, "error": false, "fieldConfig": { @@ -16158,14 +16228,14 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 7 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 117, + "id": 115, "interval": null, "legend": { "show": false @@ -16182,7 +16252,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -16190,7 +16260,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -16198,7 +16268,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Commit log duration", + "title": "Store write loop duration", "tooltip": { "msResolution": true, "shared": true, @@ -16235,7 +16305,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft commits log on each TiKV instance", + "description": "The time duration of store write loop on each TiKV instance when store-io-pool-size is not zero.", "editable": true, "error": false, "fieldConfig": { @@ -16258,11 +16328,11 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 118, + "id": 116, "interval": null, "isNew": true, "legend": { @@ -16328,7 +16398,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16336,14 +16406,14 @@ "intervalFactor": 1, "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16351,14 +16421,14 @@ "intervalFactor": 1, "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": true, "instant": false, @@ -16366,14 +16436,14 @@ "intervalFactor": 1, "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, @@ -16381,7 +16451,7 @@ "intervalFactor": 1, "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_store_write_loop_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -16390,7 +16460,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Commit log duration per server", + "title": "99% Store write loop duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -16448,7 +16518,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when Raft applies log", + "description": "The time consumed when Raft appends log", "editable": true, "error": false, "fieldConfig": { @@ -16463,14 +16533,14 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 14 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 119, + "id": 117, "interval": null, "legend": { "show": false @@ -16487,7 +16557,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -16495,7 +16565,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -16503,7 +16573,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Apply log duration", + "title": "Append log duration", "tooltip": { "msResolution": true, "shared": true, @@ -16540,7 +16610,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for Raft to apply logs per TiKV instance", + "description": "The time consumed when Raft appends log on each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -16563,11 +16633,11 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 120, + "id": 118, "interval": null, "isNew": true, "legend": { @@ -16633,7 +16703,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16641,14 +16711,14 @@ "intervalFactor": 1, "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -16656,14 +16726,14 @@ "intervalFactor": 1, "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_append_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": true, "instant": false, @@ -16671,14 +16741,14 @@ "intervalFactor": 1, "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_append_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, @@ -16686,7 +16756,7 @@ "intervalFactor": 1, "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_append_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -16695,7 +16765,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Apply log duration per server", + "title": "99% Append log duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -16753,7 +16823,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for Raft Client wait connection ready", + "description": "The time consumed when Raft commits log", "editable": true, "error": false, "fieldConfig": { @@ -16768,14 +16838,14 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 21 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 121, + "id": 119, "interval": null, "legend": { "show": false @@ -16792,7 +16862,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -16800,7 +16870,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -16808,7 +16878,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Raft Client Wait Connection Ready Duration", + "title": "Commit log duration", "tooltip": { "msResolution": true, "shared": true, @@ -16845,7 +16915,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed for Raft Client wait connection ready per TiKV instance", + "description": "The time consumed when Raft commits log on each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -16868,11 +16938,11 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 122, + "id": 120, "interval": null, "isNew": true, "legend": { @@ -16938,60 +17008,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%-{{to}}", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{to}}", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_commit_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "expr": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{to}}", + "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "query": "(sum(rate(\n tikv_raftstore_commit_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "expr": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count-{{to}}", + "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "query": "sum(rate(\n tikv_raftstore_commit_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -17000,7 +17070,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Raft Client Wait Connection Ready Duration", + "title": "99% Commit log duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -17041,12 +17111,116 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed when Raft applies log", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 121, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply log duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The throughput of disk write per IO type", + "description": "The time consumed for Raft to apply logs per TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -17068,12 +17242,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 42 + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 123, + "id": 122, "interval": null, "isNew": true, "legend": { @@ -17109,37 +17283,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "rechedule-{{instance}}", + "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "pending-task-{{instance}}", + "legendFormat": "99%-{{instance}}", "metric": "", - "query": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_log_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_log_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -17148,7 +17375,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Store io task reschedule", + "title": "99% Apply log duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -17167,7 +17394,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -17189,12 +17416,116 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed for Raft Client wait connection ready", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 123, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft Client Wait Connection Ready Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when store write task block on each TiKV instance", + "description": "The time consumed for Raft Client wait connection ready per TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -17217,7 +17548,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -17257,22 +17588,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%-{{to}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{to}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_raft_client_wait_ready_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{to}}", + "metric": "", + "query": "(sum(rate(\n tikv_server_raft_client_wait_ready_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) / sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{to}}", + "metric": "", + "query": "sum(rate(\n tikv_server_raft_client_wait_ready_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (to) ", "refId": "", "step": 10, "target": "" @@ -17281,7 +17680,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Write task block duration per server", + "title": "99% Raft Client Wait Connection Ready Duration", "tooltip": { "msResolution": true, "shared": true, @@ -17321,55 +17720,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Raft IO", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 125, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The proposal count of a Regions in a tick", + "description": "The throughput of disk write per IO type", "editable": true, "error": false, "fieldConfig": { @@ -17392,11 +17749,11 @@ "h": 7, "w": 12, "x": 0, - "y": 0 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 126, + "id": 125, "interval": null, "isNew": true, "legend": { @@ -17439,15 +17796,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "rechedule-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum((\n tikv_raftstore_io_reschedule_region_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "pending-task-{{instance}}", + "metric": "", + "query": "sum((\n tikv_raftstore_io_reschedule_pending_tasks_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -17456,7 +17828,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft proposals per ready", + "title": "Store io task reschedule", "tooltip": { "msResolution": true, "shared": true, @@ -17502,7 +17874,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of proposals per type", + "description": "The time consumed when store write task block on each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -17525,11 +17897,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 127, + "id": 126, "interval": null, "isNew": true, "legend": { @@ -17572,15 +17944,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_msg_block_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -17589,7 +17961,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft read/write proposals", + "title": "99% Write task block duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -17608,7 +17980,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -17629,13 +18001,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Raft IO", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 127, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of read proposals which are made by each TiKV instance", + "description": "The proposal count of a Regions in a tick", "editable": true, "error": false, "fieldConfig": { @@ -17658,7 +18072,7 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -17705,7 +18119,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -17713,7 +18127,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_proposal_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -17722,7 +18136,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft read proposals per server", + "title": "Raft proposals per ready", "tooltip": { "msResolution": true, "shared": true, @@ -17741,7 +18155,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -17768,7 +18182,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of write proposals which are made by each TiKV instance", + "description": "The number of proposals per type", "editable": true, "error": false, "fieldConfig": { @@ -17791,7 +18205,7 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -17838,15 +18252,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|normal|read_index\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -17855,7 +18269,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft write proposals per server", + "title": "Raft read/write proposals", "tooltip": { "msResolution": true, "shared": true, @@ -17897,23 +18311,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each proposal", + "description": "The number of read proposals which are made by each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -17924,88 +18326,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 130, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"local_read|read_index\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Propose wait duration", + "title": "Raft read proposals per server", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each proposal in each TiKV instance", + "description": "The number of write proposals which are made by each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -18028,7 +18471,7 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 7 }, "height": null, "hideTimeOverride": false, @@ -18068,90 +18511,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{instance}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{instance}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{instance}}", - "metric": "", - "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{instance}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_proposal_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"normal\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -18160,7 +18535,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Propose wait duration per server", + "title": "Raft write proposals per server", "tooltip": { "msResolution": true, "shared": true, @@ -18179,7 +18554,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -18218,7 +18593,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each store write task", + "description": "The wait time of each proposal", "editable": true, "error": false, "fieldConfig": { @@ -18233,7 +18608,7 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 14 }, "heatmap": {}, "height": null, @@ -18257,7 +18632,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -18265,7 +18640,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -18273,7 +18648,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Store write wait duration", + "title": "Propose wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -18310,7 +18685,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each store write task in each TiKV instance", + "description": "The wait time of each proposal in each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -18333,7 +18708,7 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 14 }, "height": null, "hideTimeOverride": false, @@ -18403,7 +18778,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -18411,14 +18786,14 @@ "intervalFactor": 1, "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -18426,14 +18801,14 @@ "intervalFactor": 1, "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": true, "instant": false, @@ -18441,14 +18816,14 @@ "intervalFactor": 1, "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, @@ -18456,7 +18831,7 @@ "intervalFactor": 1, "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_request_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -18465,7 +18840,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Store write wait duration per server", + "title": "99% Propose wait duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -18523,7 +18898,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each apply task", + "description": "The wait time of each store write task", "editable": true, "error": false, "fieldConfig": { @@ -18538,7 +18913,7 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 21 }, "heatmap": {}, "height": null, @@ -18562,7 +18937,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -18570,7 +18945,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -18578,7 +18953,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Apply wait duration", + "title": "Store write wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -18615,7 +18990,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The wait time of each apply task in each TiKV instance", + "description": "The wait time of each store write task in each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -18638,7 +19013,7 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 21 }, "height": null, "hideTimeOverride": false, @@ -18708,7 +19083,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -18716,14 +19091,14 @@ "intervalFactor": 1, "legendFormat": "99.99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -18731,14 +19106,14 @@ "intervalFactor": 1, "legendFormat": "99%-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": true, "instant": false, @@ -18746,14 +19121,14 @@ "intervalFactor": 1, "legendFormat": "avg-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "(sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": true, "instant": false, @@ -18761,7 +19136,7 @@ "intervalFactor": 1, "legendFormat": "count-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_store_write_task_wait_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -18770,7 +19145,312 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Apply wait duration per server", + "title": "99% Store write wait duration per server", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each apply task", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 136, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply wait duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The wait time of each apply task in each TiKV instance", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "height": null, + "hideTimeOverride": false, + "id": 137, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{instance}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{instance}}", + "metric": "", + "query": "(sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_raftstore_apply_wait_time_duration_secs_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "99% Apply wait duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -18850,7 +19530,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 136, + "id": 138, "interval": null, "legend": { "show": false @@ -18954,7 +19634,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 137, + "id": 139, "interval": null, "legend": { "show": false @@ -19051,7 +19731,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 138, + "id": 140, "interval": null, "isNew": true, "legend": { @@ -19184,7 +19864,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 139, + "id": 141, "interval": null, "isNew": true, "legend": { @@ -19335,7 +20015,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 140, + "id": 142, "interval": null, "links": [], "maxDataPoints": 100, @@ -19374,7 +20054,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 141, + "id": 143, "interval": null, "isNew": true, "legend": { @@ -19522,7 +20202,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 142, + "id": 144, "interval": null, "isNew": true, "legend": { @@ -19677,7 +20357,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 143, + "id": 145, "interval": null, "legend": { "show": false @@ -19781,7 +20461,7 @@ "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 144, + "id": 146, "interval": null, "legend": { "show": false @@ -19881,7 +20561,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 145, + "id": 147, "interval": null, "links": [], "maxDataPoints": 100, @@ -19920,7 +20600,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 146, + "id": 148, "interval": null, "isNew": true, "legend": { @@ -20053,7 +20733,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 147, + "id": 149, "interval": null, "isNew": true, "legend": { @@ -20186,7 +20866,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 148, + "id": 150, "interval": null, "isNew": true, "legend": { @@ -20319,7 +20999,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 149, + "id": 151, "interval": null, "isNew": true, "legend": { @@ -20452,7 +21132,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 150, + "id": 152, "interval": null, "isNew": true, "legend": { @@ -20585,7 +21265,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 151, + "id": 153, "interval": null, "isNew": true, "legend": { @@ -20721,7 +21401,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 152, + "id": 154, "interval": null, "links": [], "maxDataPoints": 100, @@ -20760,7 +21440,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 153, + "id": 155, "interval": null, "isNew": true, "legend": { @@ -20893,7 +21573,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 154, + "id": 156, "interval": null, "isNew": true, "legend": { @@ -21026,7 +21706,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 155, + "id": 157, "interval": null, "isNew": true, "legend": { @@ -21159,7 +21839,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 156, + "id": 158, "interval": null, "isNew": true, "legend": { @@ -21292,7 +21972,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 157, + "id": 159, "interval": null, "isNew": true, "legend": { @@ -21425,7 +22105,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 158, + "id": 160, "interval": null, "isNew": true, "legend": { @@ -21588,7 +22268,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 159, + "id": 161, "interval": null, "isNew": true, "legend": { @@ -21724,7 +22404,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 160, + "id": 162, "interval": null, "links": [], "maxDataPoints": 100, @@ -21763,7 +22443,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 161, + "id": 163, "interval": null, "isNew": true, "legend": { @@ -21911,7 +22591,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 162, + "id": 164, "interval": null, "isNew": true, "legend": { @@ -22059,7 +22739,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 163, + "id": 165, "interval": null, "isNew": true, "legend": { @@ -22192,7 +22872,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 164, + "id": 166, "interval": null, "isNew": true, "legend": { @@ -22325,7 +23005,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 165, + "id": 167, "interval": null, "isNew": true, "legend": { @@ -22458,7 +23138,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 166, + "id": 168, "interval": null, "isNew": true, "legend": { @@ -22591,7 +23271,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 167, + "id": 169, "interval": null, "isNew": true, "legend": { @@ -22724,7 +23404,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 168, + "id": 170, "interval": null, "isNew": true, "legend": { @@ -22857,7 +23537,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 169, + "id": 171, "interval": null, "isNew": true, "legend": { @@ -23034,7 +23714,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 170, + "id": 172, "interval": null, "links": [], "maxDataPoints": 100, @@ -23046,7 +23726,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The count of operations per second", "editable": true, "error": false, "fieldConfig": { @@ -23067,13 +23747,13 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 171, + "id": 173, "interval": null, "isNew": true, "legend": { @@ -23109,237 +23789,52 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "/.*-total/", - "bars": false, - "fill": 1, - "fillBelowTo": null, - "lines": true, - "yaxis": 2, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-reject-by-{{reason}}", - "metric": "", - "query": "sum(rate(\n tikv_raftstore_local_read_reject_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-total", + "legendFormat": "write", "metric": "", - "query": "sum(rate(\n tikv_raftstore_local_read_executed_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-stale-read", + "legendFormat": "read_entry", "metric": "", - "query": "sum(rate(\n tikv_raftstore_local_read_executed_stale_read_requests\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Local reader requests", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Local Reader", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 172, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time used by each level in the unified read pool per second. Level 0 refers to small queries.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 173, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", + "expr": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{level}}", + "legendFormat": "read_message", "metric": "", - "query": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", + "query": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -23348,7 +23843,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Time used by level", + "title": "Operation", "tooltip": { "msResolution": true, "shared": true, @@ -23367,7 +23862,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -23394,7 +23889,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The chance that level 0 (small) tasks are scheduled in the unified read pool.", + "description": "The time used in write operation", "editable": true, "error": false, "fieldConfig": { @@ -23457,22 +23952,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "99.99%", "metric": "", - "query": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -23481,7 +24044,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Level 0 chance", + "title": "Write Duration", "tooltip": { "msResolution": true, "shared": true, @@ -23500,7 +24063,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -23527,7 +24090,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of concurrently running tasks in the unified read pool.", + "description": "The I/O flow rate", "editable": true, "error": false, "fieldConfig": { @@ -23597,15 +24160,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (priority) ", + "expr": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{priority}}", + "legendFormat": "write", "metric": "", - "query": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (priority) ", + "query": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "rewrite-{{type}}", + "metric": "", + "query": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -23614,7 +24192,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Running tasks", + "title": "Flow", "tooltip": { "msResolution": true, "shared": true, @@ -23633,7 +24211,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -23656,23 +24234,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "99% duration breakdown of write operation", "editable": true, "error": false, "fieldConfig": { @@ -23683,88 +24249,159 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 176, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "wait", "metric": "", - "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "wal", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "apply", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Unified Read Pool Wait Duration", + "title": "Write Duration Breakdown (99%)", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Unified read pool task execution time during one schedule.", + "description": "The bytes per write", "editable": true, "error": false, "fieldConfig": { @@ -23857,7 +24494,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -23865,14 +24502,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -23880,14 +24517,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -23895,14 +24532,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -23910,7 +24547,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -23919,7 +24556,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Duration of One Time Slice", + "title": "Bytes / Written", "tooltip": { "msResolution": true, "shared": true, @@ -23938,9 +24575,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -23965,7 +24602,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Unified read pool task total execution duration.", + "description": "999% duration breakdown of WAL write operation", "editable": true, "error": false, "fieldConfig": { @@ -24028,90 +24665,67 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "total", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "sync", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "allocate", "metric": "", - "query": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "rotate", "metric": "", - "query": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -24120,7 +24734,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Task Execute Duration", + "title": "WAL Duration Breakdown (999%)", "tooltip": { "msResolution": true, "shared": true, @@ -24141,7 +24755,7 @@ "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -24166,7 +24780,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Task schedule number of times.", + "description": "The average number of files", "editable": true, "error": false, "fieldConfig": { @@ -24187,7 +24801,7 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, "y": 21 }, @@ -24229,90 +24843,52 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "{{type}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "swap", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{type}}-recycle", "metric": "", - "query": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -24321,7 +24897,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Task Schedule Times", + "title": "File Count", "tooltip": { "msResolution": true, "shared": true, @@ -24340,9 +24916,9 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -24361,55 +24937,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Unified Read Pool", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 180, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total count of different kinds of commands received", + "description": "The 99% duration of operations other than write", "editable": true, "error": false, "fieldConfig": { @@ -24431,12 +24965,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 181, + "id": 180, "interval": null, "isNew": true, "legend": { @@ -24479,15 +25013,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "read_entry", "metric": "", - "query": "sum(rate(\n tikv_storage_command_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read_message", + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "purge", + "metric": "", + "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -24496,7 +25060,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage command total", + "title": "Other Durations (99%)", "tooltip": { "msResolution": true, "shared": true, @@ -24515,9 +25079,9 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, - "logBase": 10, + "logBase": 2, "max": null, "min": null, "show": true @@ -24542,7 +25106,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total number of engine asynchronous request errors", + "description": "The average number of log entries", "editable": true, "error": false, "fieldConfig": { @@ -24563,13 +25127,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 0 + "w": 24, + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 182, + "id": 181, "interval": null, "isNew": true, "legend": { @@ -24612,15 +25176,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", + "expr": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{status}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",status!~\"all|success\"}\n [$__rate_interval]\n)) by (status) ", + "query": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -24629,7 +25193,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async request error", + "title": "Entry Count", "tooltip": { "msResolution": true, "shared": true, @@ -24648,7 +25212,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -24669,25 +25233,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Raft Engine", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 182, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous write requests", + "description": "The count of get operations", "editable": true, "error": false, "fieldConfig": { @@ -24698,88 +25292,189 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 183, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "memtable", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "block_cache", + "metric": "", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "l0", + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "l1", + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "l2_and_up", + "metric": "", + "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async write duration", + "title": "Get operations", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async write duration", + "description": "The time consumed when executing get operations", "editable": true, "error": false, "fieldConfig": { @@ -24802,7 +25497,7 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -24842,52 +25537,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -24895,37 +25567,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"write\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -24934,7 +25606,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async write duration", + "title": "Get duration", "tooltip": { "msResolution": true, "shared": true, @@ -24953,9 +25625,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -24976,23 +25648,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed by processing asynchronous snapshot requests", + "description": "The count of seek operations", "editable": true, "error": false, "fieldConfig": { @@ -25003,88 +25663,204 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 185, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "seek", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "seek_found", + "metric": "", + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "next", + "metric": "", + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "next_found", + "metric": "", + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "prev", + "metric": "", + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "prev_found", + "metric": "", + "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async snapshot duration", + "title": "Seek operations", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async snapshot duration", + "description": "The time consumed when executing seek operation", "editable": true, "error": false, "fieldConfig": { @@ -25107,7 +25883,7 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 7 }, "height": null, "hideTimeOverride": false, @@ -25147,52 +25923,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25200,37 +25953,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -25239,7 +25992,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async snapshot duration", + "title": "Seek duration", "tooltip": { "msResolution": true, "shared": true, @@ -25258,9 +26011,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -25281,23 +26034,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async snapshot duration without the involving of raftstore", + "description": "The count of write operations", "editable": true, "error": false, "fieldConfig": { @@ -25308,88 +26049,159 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 14 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 187, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "done", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "timeout", + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "with_wal", + "metric": "", + "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async snapshot duration (pure local read)", + "title": "Write operations", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The storage async snapshot duration without the involving of raftstore", + "description": "The time consumed when executing write operation", "editable": true, "error": false, "fieldConfig": { @@ -25412,7 +26224,7 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 14 }, "height": null, "hideTimeOverride": false, @@ -25452,52 +26264,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25505,37 +26294,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_local_read\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -25544,7 +26333,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Storage async snapshot duration (pure local read)", + "title": "Write duration", "tooltip": { "msResolution": true, "shared": true, @@ -25563,9 +26352,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -25586,23 +26375,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index propose wait duration associated with async snapshot", + "description": "The count of WAL sync operations", "editable": true, "error": false, "fieldConfig": { @@ -25613,88 +26390,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 21 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 189, "interval": null, + "isNew": true, "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "sync", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read index propose wait duration", + "title": "WAL sync operations", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index propose wait duration associated with async snapshot", + "description": "The time consumed when executing write wal operation", "editable": true, "error": false, "fieldConfig": { @@ -25717,7 +26535,7 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 21 }, "height": null, "hideTimeOverride": false, @@ -25757,52 +26575,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -25810,37 +26605,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_propose_wait\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -25849,7 +26644,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read index propose wait duration", + "title": "Write WAL duration", "tooltip": { "msResolution": true, "shared": true, @@ -25868,9 +26663,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -25891,23 +26686,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index confirm duration associated with async snapshot", + "description": "The count of compaction and flush operations", "editable": true, "error": false, "fieldConfig": { @@ -25918,88 +26701,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 28 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 191, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read index confirm duration", + "title": "Compaction operations", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Read index confirm duration associated with async snapshot", + "description": "The time consumed when executing WAL sync operation", "editable": true, "error": false, "fieldConfig": { @@ -26022,7 +26846,7 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 28 }, "height": null, "hideTimeOverride": false, @@ -26062,52 +26886,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -26115,37 +26916,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_engine_async_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_engine_async_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_engine_async_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot_read_index_confirm\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -26154,7 +26955,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read index confirm duration", + "title": "WAL sync duration", "tooltip": { "msResolution": true, "shared": true, @@ -26173,9 +26974,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 10, "max": null, "min": null, "show": true @@ -26200,7 +27001,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "CPU usage measured over a 30 second window", + "description": "Compaction guard actions", "editable": true, "error": false, "fieldConfig": { @@ -26223,7 +27024,7 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -26270,15 +27071,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{cf}}-{{ type}}", "metric": "", - "query": "sum((\n tikv_storage_process_stat_cpu_usage\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", "refId": "", "step": 10, "target": "" @@ -26287,7 +27088,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Process Stat Cpu Usage", + "title": "Compaction guard actions", "tooltip": { "msResolution": true, "shared": true, @@ -26306,7 +27107,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -26333,7 +27134,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The time consumed when executing the compaction and flush operations", "editable": true, "error": false, "fieldConfig": { @@ -26356,7 +27157,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -26396,52 +27197,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -26449,37 +27227,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_full_compact_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_full_compact_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -26488,7 +27266,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Full compaction duration seconds", + "title": "Compaction duration", "tooltip": { "msResolution": true, "shared": true, @@ -26507,9 +27285,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -26534,7 +27312,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The time consumed when reading SST files", "editable": true, "error": false, "fieldConfig": { @@ -26557,7 +27335,7 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 42 }, "height": null, "hideTimeOverride": false, @@ -26597,52 +27375,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -26650,37 +27405,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_full_compact_pause_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -26689,7 +27444,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Full compaction pause duration", + "title": "SST read duration", "tooltip": { "msResolution": true, "shared": true, @@ -26708,9 +27463,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -26735,7 +27490,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -26758,7 +27513,7 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 42 }, "height": null, "hideTimeOverride": false, @@ -26798,90 +27553,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{cf}}-{{reason}}", "metric": "", - "query": "sum(rate(\n tikv_storage_full_compact_increment_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", "refId": "", "step": 10, "target": "" @@ -26890,7 +27577,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Full compaction per-increment duration", + "title": "Compaction reason", "tooltip": { "msResolution": true, "shared": true, @@ -26909,7 +27596,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -26930,55 +27617,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Storage", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 197, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The block cache size. Broken down by column family if shared block cache is disabled.", "editable": true, "error": false, "fieldConfig": { @@ -27001,11 +27646,11 @@ "h": 7, "w": 12, "x": 0, - "y": 0 + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 198, + "id": 197, "interval": null, "isNew": true, "legend": { @@ -27048,30 +27693,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write-{{instance}}", - "metric": "", - "query": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", + "expr": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "throttle-{{instance}}", + "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "query": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", + "query": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -27080,7 +27710,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler flow", + "title": "Block cache size", "tooltip": { "msResolution": true, "shared": true, @@ -27126,7 +27756,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The hit rate of memtable", "editable": true, "error": false, "fieldConfig": { @@ -27149,11 +27779,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 199, + "id": 198, "interval": null, "isNew": true, "legend": { @@ -27196,15 +27826,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", + "expr": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "hit", "metric": "", - "query": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", + "query": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", "refId": "", "step": 10, "target": "" @@ -27213,7 +27843,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler discard ratio", + "title": "Memtable hit", "tooltip": { "msResolution": true, "shared": true, @@ -27254,116 +27884,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 7 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 200, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Throttle duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The flow of different kinds of block cache operations", "editable": true, "error": false, "fieldConfig": { @@ -27385,12 +27911,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 201, + "id": 199, "interval": null, "isNew": true, "legend": { @@ -27433,15 +27959,105 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}", + "legendFormat": "total_read", "metric": "", - "query": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "total_written", + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "data_insert", + "metric": "", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "filter_insert", + "metric": "", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "filter_evict", + "metric": "", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "index_insert", + "metric": "", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "index_evict", + "metric": "", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -27450,7 +28066,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler throttled CF", + "title": "Block cache flow", "tooltip": { "msResolution": true, "shared": true, @@ -27469,9 +28085,9 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "binBps", "label": null, - "logBase": 1, + "logBase": 10, "max": null, "min": null, "show": true @@ -27496,7 +28112,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The hit rate of block cache", "editable": true, "error": false, "fieldConfig": { @@ -27518,12 +28134,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 202, + "id": 200, "interval": null, "isNew": true, "legend": { @@ -27566,15 +28182,75 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{cf}}", + "legendFormat": "all", "metric": "", - "query": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "data", + "metric": "", + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "filter", + "metric": "", + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "index", + "metric": "", + "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "bloom prefix", + "metric": "", + "query": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" @@ -27583,7 +28259,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Flow controller actions", + "title": "Block cache hit", "tooltip": { "msResolution": true, "shared": true, @@ -27602,7 +28278,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -27629,7 +28305,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The flow of different kinds of operations on keys", "editable": true, "error": false, "fieldConfig": { @@ -27651,12 +28327,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 63 }, "height": null, "hideTimeOverride": false, - "id": 203, + "id": 201, "interval": null, "isNew": true, "legend": { @@ -27699,60 +28375,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{cf}}_l0_flow-{{instance}}", - "metric": "", - "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}_flush_flow-{{instance}}", + "legendFormat": "read", "metric": "", - "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total_l0_flow-{{instance}}", + "legendFormat": "written", "metric": "", - "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total_flush_flow-{{instance}}", + "legendFormat": "corrupt", "metric": "", - "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -27761,7 +28422,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Flush/L0 flow", + "title": "Keys flow", "tooltip": { "msResolution": true, "shared": true, @@ -27780,7 +28441,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -27807,7 +28468,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The count of different kinds of block cache operations", "editable": true, "error": false, "fieldConfig": { @@ -27829,12 +28490,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 21 + "x": 12, + "y": 63 }, "height": null, "hideTimeOverride": false, - "id": 204, + "id": 202, "interval": null, "isNew": true, "legend": { @@ -27877,193 +28538,75 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "l0-{{instance}}", + "legendFormat": "total_add", "metric": "", - "query": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "memtable-{{instance}}", + "legendFormat": "data_add", "metric": "", - "query": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg_l0-{{instance}}", + "legendFormat": "filter_add", "metric": "", - "query": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Flow controller factors", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 21 - }, - "height": null, - "hideTimeOverride": false, - "id": 205, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}", + "legendFormat": "index_add", "metric": "", - "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", + "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "pending-bytes-{{instance}}", + "legendFormat": "add_failures", "metric": "", - "query": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", + "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -28072,7 +28615,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction pending bytes", + "title": "Block cache operations", "tooltip": { "msResolution": true, "shared": true, @@ -28091,7 +28634,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -28118,7 +28661,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Throttle time for txn storage commands in 1 minute.", + "description": "The flow rate of read operations per type", "editable": true, "error": false, "fieldConfig": { @@ -28141,11 +28684,11 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 70 }, "height": null, "hideTimeOverride": false, - "id": 206, + "id": 203, "interval": null, "isNew": true, "legend": { @@ -28188,15 +28731,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "get", "metric": "", - "query": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "scan", + "metric": "", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -28205,7 +28763,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Txn command throttled duration", + "title": "Read flow", "tooltip": { "msResolution": true, "shared": true, @@ -28224,7 +28782,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -28251,7 +28809,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Throttle time for non-txn related processing like analyze or dag in 1 minute.", + "description": "The count of keys in each column family", "editable": true, "error": false, "fieldConfig": { @@ -28274,11 +28832,11 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 70 }, "height": null, "hideTimeOverride": false, - "id": 207, + "id": 204, "interval": null, "isNew": true, "legend": { @@ -28321,15 +28879,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "refId": "", "step": 10, "target": "" @@ -28338,7 +28896,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Non-txn command throttled duration", + "title": "Total keys", "tooltip": { "msResolution": true, "shared": true, @@ -28357,7 +28915,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -28378,55 +28936,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Flow Control", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 208, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total number of commands on each stage in commit command", + "description": "The flow of different kinds of write operations", "editable": true, "error": false, "fieldConfig": { @@ -28447,13 +28963,13 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, - "y": 0 + "y": 77 }, "height": null, "hideTimeOverride": false, - "id": 209, + "id": 205, "interval": null, "isNew": true, "legend": { @@ -28496,30 +29012,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "busy-{{instance}}", + "legendFormat": "wal", "metric": "", - "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{stage}}", + "legendFormat": "write", "metric": "", - "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -28528,7 +29044,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler stage total", + "title": "Write flow", "tooltip": { "msResolution": true, "shared": true, @@ -28547,7 +29063,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -28574,7 +29090,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing commit command", + "description": "The bytes per read", "editable": true, "error": false, "fieldConfig": { @@ -28596,12 +29112,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 77 }, "height": null, "hideTimeOverride": false, - "id": 210, + "id": 206, "interval": null, "isNew": true, "legend": { @@ -28637,52 +29153,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -28690,37 +29183,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -28729,7 +29222,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler command duration", + "title": "Bytes / Read", "tooltip": { "msResolution": true, "shared": true, @@ -28748,9 +29241,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, - "logBase": 1, + "logBase": 10, "max": null, "min": null, "show": true @@ -28775,7 +29268,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time which is caused by latch wait in commit command", + "description": "The flow rate of compaction operations per type", "editable": true, "error": false, "fieldConfig": { @@ -28797,12 +29290,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 84 }, "height": null, "hideTimeOverride": false, - "id": 211, + "id": 207, "interval": null, "isNew": true, "legend": { @@ -28838,90 +29331,52 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "read", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "written", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "flushed", "metric": "", - "query": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -28930,7 +29385,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler latch wait duration", + "title": "Compaction flow", "tooltip": { "msResolution": true, "shared": true, @@ -28949,7 +29404,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -28976,7 +29431,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of keys read by a commit command", + "description": "The bytes per write", "editable": true, "error": false, "fieldConfig": { @@ -28998,12 +29453,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 84 }, "height": null, "hideTimeOverride": false, - "id": 212, + "id": 208, "interval": null, "isNew": true, "legend": { @@ -29039,52 +29494,29 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "max", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -29092,37 +29524,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "95%", "metric": "", - "query": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -29131,7 +29563,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler keys read", + "title": "Bytes / Write", "tooltip": { "msResolution": true, "shared": true, @@ -29150,7 +29582,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -29177,7 +29609,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of keys written by a commit command", + "description": "The read amplification per TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -29199,12 +29631,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 91 }, "height": null, "hideTimeOverride": false, - "id": 213, + "id": 209, "interval": null, "isNew": true, "legend": { @@ -29240,90 +29672,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" @@ -29332,7 +29696,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler keys written", + "title": "Read amplification", "tooltip": { "msResolution": true, "shared": true, @@ -29351,7 +29715,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -29378,7 +29742,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The keys scan details of each CF when executing commit command", + "description": "The pending bytes to be compacted", "editable": true, "error": false, "fieldConfig": { @@ -29400,12 +29764,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 21 + "x": 12, + "y": 91 }, "height": null, "hideTimeOverride": false, - "id": 214, + "id": 210, "interval": null, "isNew": true, "legend": { @@ -29448,15 +29812,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", "refId": "", "step": 10, "target": "" @@ -29465,7 +29829,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler scan details", + "title": "Compaction pending bytes", "tooltip": { "msResolution": true, "shared": true, @@ -29484,7 +29848,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -29511,7 +29875,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The keys scan details of lock CF when executing commit command", + "description": "The number of snapshot of each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -29533,12 +29897,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 98 }, "height": null, "hideTimeOverride": false, - "id": 215, + "id": 211, "interval": null, "isNew": true, "legend": { @@ -29581,15 +29945,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", + "expr": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", + "query": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -29598,7 +29962,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler scan details [lock]", + "title": "Number of snapshots", "tooltip": { "msResolution": true, "shared": true, @@ -29617,7 +29981,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -29644,7 +30008,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The keys scan details of write CF when executing commit command", + "description": "The compression ratio of each level", "editable": true, "error": false, "fieldConfig": { @@ -29666,12 +30030,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 28 + "x": 12, + "y": 98 }, "height": null, "hideTimeOverride": false, - "id": 216, + "id": 212, "interval": null, "isNew": true, "legend": { @@ -29714,15 +30078,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", + "expr": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{cf}}-L{{level}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", + "query": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "refId": "", "step": 10, "target": "" @@ -29731,7 +30095,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler scan details [write]", + "title": "Compression ratio", "tooltip": { "msResolution": true, "shared": true, @@ -29750,7 +30114,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -29777,7 +30141,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The keys scan details of default CF when executing commit command", + "description": "The number of SST files for different column families in each level", "editable": true, "error": false, "fieldConfig": { @@ -29799,12 +30163,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 28 + "x": 0, + "y": 105 }, "height": null, "hideTimeOverride": false, - "id": 217, + "id": 213, "interval": null, "isNew": true, "legend": { @@ -29847,15 +30211,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", + "expr": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{cf}}-L{{level}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", + "query": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", "refId": "", "step": 10, "target": "" @@ -29864,7 +30228,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler scan details [default]", + "title": "Number files at each level", "tooltip": { "msResolution": true, "shared": true, @@ -29883,7 +30247,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -29910,7 +30274,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed on reading when executing commit command", + "description": "The time that the oldest unreleased snapshot survivals", "editable": true, "error": false, "fieldConfig": { @@ -29932,12 +30296,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 35 + "x": 12, + "y": 105 }, "height": null, "hideTimeOverride": false, - "id": 218, + "id": 214, "interval": null, "isNew": true, "legend": { @@ -29973,90 +30337,155 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%", + "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "refId": "", "step": 10, "target": "" - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Oldest snapshots duration", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Stall conditions changed of each column family", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 112 + }, + "height": null, + "hideTimeOverride": false, + "id": 215, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "expr": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{instance}}-{{cf}}-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "query": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -30065,7 +30494,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler command read duration", + "title": "Stall conditions changed of each CF", "tooltip": { "msResolution": true, "shared": true, @@ -30084,7 +30513,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -30106,158 +30535,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed on checking memory locks", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 35 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 219, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Check memory locks duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - } - ], - "repeat": "command", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Scheduler - $command", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 220, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total number of commands on each stage", + "description": "The time consumed when ingesting SST files", "editable": true, "error": false, "fieldConfig": { @@ -30279,12 +30562,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 112 }, "height": null, "hideTimeOverride": false, - "id": 221, + "id": 216, "interval": null, "isNew": true, "legend": { @@ -30320,170 +30603,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{stage}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{stage}}", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Scheduler stage total", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The total writing bytes of commands on each stage", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 222, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "avg", "metric": "", - "query": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -30492,7 +30695,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler writing bytes", + "title": "Ingest SST duration seconds", "tooltip": { "msResolution": true, "shared": true, @@ -30511,7 +30714,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -30538,7 +30741,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of different priority commands", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -30561,11 +30764,11 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 119 }, "height": null, "hideTimeOverride": false, - "id": 223, + "id": 217, "interval": null, "isNew": true, "legend": { @@ -30608,15 +30811,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", + "expr": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{priority}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", + "query": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -30625,7 +30828,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler priority commands", + "title": "Write Stall Reason", "tooltip": { "msResolution": true, "shared": true, @@ -30644,7 +30847,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -30671,7 +30874,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of pending commands per TiKV instance", + "description": "The time which is caused by write stall", "editable": true, "error": false, "fieldConfig": { @@ -30694,11 +30897,11 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 119 }, "height": null, "hideTimeOverride": false, - "id": 224, + "id": 218, "interval": null, "isNew": true, "legend": { @@ -30741,15 +30944,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "max", "metric": "", - "query": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -30758,7 +31006,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scheduler pending commands", + "title": "Write stall duration", "tooltip": { "msResolution": true, "shared": true, @@ -30777,7 +31025,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -30816,7 +31064,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The level that the external file ingests into", "editable": true, "error": false, "fieldConfig": { @@ -30829,16 +31077,16 @@ }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, - "y": 14 + "y": 126 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 225, + "id": 219, "interval": null, "legend": { "show": false @@ -30855,7 +31103,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -30863,7 +31111,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -30871,7 +31119,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Txn Scheduler Pool Wait Duration", + "title": "Ingestion picked level", "tooltip": { "msResolution": true, "shared": true, @@ -30892,7 +31140,7 @@ "xBucketSize": null, "yAxis": { "decimals": 1, - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -30902,55 +31150,13 @@ "yBucketBound": "upper", "yBucketNumber": null, "yBucketSize": null - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Scheduler", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 226, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of GC tasks processed by gc_worker", + "description": "The memtable size of each column family", "editable": true, "error": false, "fieldConfig": { @@ -30972,12 +31178,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 126 }, "height": null, "hideTimeOverride": false, - "id": 227, + "id": 220, "interval": null, "isNew": true, "legend": { @@ -31020,60 +31226,205 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "expr": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total-{{task}}", + "legendFormat": "{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "query": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", "refId": "", "step": 10, "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memtable size", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "skipped-{{task}}", - "metric": "", - "query": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", - "refId": "", - "step": 10, - "target": "" - }, + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + } + ], + "repeat": "db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "RocksDB - $db", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 221, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 222, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "expr": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "failed-{{task}}", + "legendFormat": "live blob file num", "metric": "", - "query": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "query": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "gcworker-too-busy", + "legendFormat": "obsolete blob file num", "metric": "", - "query": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -31082,7 +31433,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC tasks", + "title": "Blob file count", "tooltip": { "msResolution": true, "shared": true, @@ -31128,7 +31479,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing GC tasks", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -31155,7 +31506,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 228, + "id": 223, "interval": null, "isNew": true, "legend": { @@ -31191,90 +31542,37 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "live blob file size", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "expr": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "obsolete blob file size", "metric": "", - "query": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "query": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -31283,7 +31581,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC tasks duration", + "title": "Blob file size", "tooltip": { "msResolution": true, "shared": true, @@ -31302,7 +31600,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -31329,7 +31627,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The GC duration", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -31356,7 +31654,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 229, + "id": 224, "interval": null, "isNew": true, "legend": { @@ -31399,15 +31697,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "live blob size", "metric": "", - "query": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -31416,7 +31714,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TiDB GC seconds", + "title": "Live blob size", "tooltip": { "msResolution": true, "shared": true, @@ -31435,7 +31733,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -31462,7 +31760,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of TiDB GC worker actions", + "description": "The hit rate of block cache", "editable": true, "error": false, "fieldConfig": { @@ -31489,7 +31787,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 230, + "id": 225, "interval": null, "isNew": true, "legend": { @@ -31532,15 +31830,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "all", "metric": "", - "query": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", + "query": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", "refId": "", "step": 10, "target": "" @@ -31549,7 +31847,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TiDB GC worker actions", + "title": "Blob cache hit", "tooltip": { "msResolution": true, "shared": true, @@ -31568,7 +31866,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -31595,7 +31893,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Progress of ResolveLocks, the first phase of GC", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -31622,7 +31920,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 231, + "id": 226, "interval": null, "isNew": true, "legend": { @@ -31665,15 +31963,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{result}}", + "legendFormat": "avg", "metric": "", - "query": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -31682,7 +32025,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "ResolveLocks Progress", + "title": "Iter touched blob file count", "tooltip": { "msResolution": true, "shared": true, @@ -31728,7 +32071,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Progress of TiKV's GC", + "description": "The blob cache size.", "editable": true, "error": false, "fieldConfig": { @@ -31755,7 +32098,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 232, + "id": 227, "interval": null, "isNew": true, "legend": { @@ -31798,15 +32141,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", + "expr": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "query": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", + "query": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -31815,7 +32158,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TiKV Auto GC Progress", + "title": "Blob cache size", "tooltip": { "msResolution": true, "shared": true, @@ -31834,7 +32177,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -31861,7 +32204,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "keys / second", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -31888,7 +32231,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 233, + "id": 228, "interval": null, "isNew": true, "legend": { @@ -31931,15 +32274,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}_keys/s", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -31948,7 +32336,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC speed", + "title": "Blob key size", "tooltip": { "msResolution": true, "shared": true, @@ -31967,7 +32355,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -31994,7 +32382,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "SafePoint used for TiKV's Auto GC", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -32021,7 +32409,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 234, + "id": 229, "interval": null, "isNew": true, "legend": { @@ -32064,15 +32452,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "avg", "metric": "", - "query": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -32081,7 +32514,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TiKV Auto GC SafePoint", + "title": "Blob value size", "tooltip": { "msResolution": true, "shared": true, @@ -32100,7 +32533,7 @@ "yaxes": [ { "decimals": null, - "format": "dateTimeAsIso", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -32122,162 +32555,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The lifetime of TiDB GC", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": null, - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": "" - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 28 - }, - "height": null, - "hideTimeOverride": false, - "id": 235, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": null, - "metric": "", - "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC lifetime", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The interval of TiDB GC", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": null, - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": "" - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 28 - }, - "height": null, - "hideTimeOverride": false, - "id": 236, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": null, - "metric": "", - "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC interval", - "transformations": [], - "transparent": false, - "type": "stat" - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Keys handled in GC compaction filter", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -32299,12 +32582,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 35 + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 237, + "id": 230, "interval": null, "isNew": true, "legend": { @@ -32347,135 +32630,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_filtered", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_skipped", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_mvcc-rollback/mvcc-lock", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_orphan-versions", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_performed-times", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_failure-{{type}}", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_mvcc-deletion-met", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{key_mode}}_mvcc-deletion-handled", - "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}_mvcc-deletion-wasted", + "legendFormat": "get", "metric": "", - "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -32484,7 +32647,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC in Compaction Filter", + "title": "Blob get operations", "tooltip": { "msResolution": true, "shared": true, @@ -32503,7 +32666,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -32530,7 +32693,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "GC scan write details", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -32552,12 +32715,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 42 + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 238, + "id": 231, "interval": null, "isNew": true, "legend": { @@ -32600,15 +32763,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}-{{tag}}", + "legendFormat": "avg-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{type}}", + "metric": "", + "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}}", + "metric": "", + "query": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -32617,7 +32825,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC scan write details", + "title": "Blob get duration", "tooltip": { "msResolution": true, "shared": true, @@ -32636,7 +32844,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -32663,7 +32871,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "GC scan default details", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -32685,12 +32893,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 42 + "x": 0, + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 239, + "id": 232, "interval": null, "isNew": true, "legend": { @@ -32733,15 +32941,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "expr": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{key_mode}}-{{tag}}", + "legendFormat": "{{ratio}}", "metric": "", - "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", + "query": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", "refId": "", "step": 10, "target": "" @@ -32750,7 +32958,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "GC scan default details", + "title": "Blob file discardable ratio distribution", "tooltip": { "msResolution": true, "shared": true, @@ -32790,55 +32998,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "GC", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 240, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The rate of Raft snapshot messages sent", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -32860,12 +33026,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 241, + "id": 233, "interval": null, "isNew": true, "legend": { @@ -32908,15 +33074,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "seek", "metric": "", - "query": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "prev", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "next", + "metric": "", + "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -32925,7 +33121,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Rate snapshot message", + "title": "Blob iter operations", "tooltip": { "msResolution": true, "shared": true, @@ -32944,7 +33140,7 @@ "yaxes": [ { "decimals": null, - "format": "opm", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -32971,7 +33167,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of snapshots in different states", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -32993,12 +33189,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 242, + "id": 234, "interval": null, "isNew": true, "legend": { @@ -33041,15 +33237,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "avg", "metric": "", - "query": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -33058,7 +33299,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Snapshot state count", + "title": "Blob seek duration", "tooltip": { "msResolution": true, "shared": true, @@ -33077,7 +33318,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -33104,7 +33345,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time snapshot generation tasks waited to be scheduled. ", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33126,12 +33367,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 243, + "id": 235, "interval": null, "isNew": true, "legend": { @@ -33174,15 +33415,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "avg", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -33191,7 +33477,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Snapshot generation wait duration", + "title": "Blob next duration", "tooltip": { "msResolution": true, "shared": true, @@ -33210,7 +33496,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -33237,7 +33523,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handling snapshots", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33259,12 +33545,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 244, + "id": 236, "interval": null, "isNew": true, "legend": { @@ -33307,45 +33593,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "send", + "legendFormat": "avg-{{type}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "apply", + "legendFormat": "95%-{{type}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "generate", + "legendFormat": "99%-{{type}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max-{{type}}", + "metric": "", + "query": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -33354,7 +33655,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% Handle snapshot duration", + "title": "Blob prev duration", "tooltip": { "msResolution": true, "shared": true, @@ -33373,7 +33674,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -33400,7 +33701,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The snapshot size (P99.99).9999", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33422,12 +33723,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 245, + "id": 237, "interval": null, "isNew": true, "legend": { @@ -33470,15 +33771,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "size", + "legendFormat": "{{type}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -33487,7 +33788,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99.99% Snapshot size", + "title": "Blob keys flow", "tooltip": { "msResolution": true, "shared": true, @@ -33506,7 +33807,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -33533,7 +33834,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of KV within a snapshot in .9999", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33555,12 +33856,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 246, + "id": 238, "interval": null, "isNew": true, "legend": { @@ -33603,15 +33904,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "avg", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -33620,7 +33966,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99.99% Snapshot KV count", + "title": "Blob file read duration", "tooltip": { "msResolution": true, "shared": true, @@ -33639,7 +33985,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -33666,7 +34012,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Action stats for snapshot generating and applying", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33688,12 +34034,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 21 + "x": 12, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 247, + "id": 239, "interval": null, "isNew": true, "legend": { @@ -33736,30 +34082,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}-{{status}}", - "metric": "", - "query": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "clean-region-by-{{type}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -33768,7 +34099,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Snapshot Actions", + "title": "Blob bytes flow", "tooltip": { "msResolution": true, "shared": true, @@ -33787,7 +34118,7 @@ "yaxes": [ { "decimals": null, - "format": "opm", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -33814,7 +34145,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The speed of sending or receiving snapshot", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -33836,12 +34167,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 63 }, "height": null, "hideTimeOverride": false, - "id": 248, + "id": 240, "interval": null, "isNew": true, "legend": { @@ -33884,30 +34215,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-generate", + "legendFormat": "95%", "metric": "", - "query": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -33916,7 +34277,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Snapshot transport speed", + "title": "Blob file write duration", "tooltip": { "msResolution": true, "shared": true, @@ -33935,7 +34296,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -33956,55 +34317,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Snapshot", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 249, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of tasks handled by worker", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -34026,12 +34345,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 63 }, "height": null, "hideTimeOverride": false, - "id": 250, + "id": 241, "interval": null, "isNew": true, "legend": { @@ -34074,15 +34393,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "expr": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "sync", "metric": "", - "query": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "query": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -34091,7 +34410,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Worker handled tasks", + "title": "Blob file sync operations", "tooltip": { "msResolution": true, "shared": true, @@ -34137,7 +34456,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Current pending and running tasks of worker", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -34159,12 +34478,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 70 }, "height": null, "hideTimeOverride": false, - "id": 251, + "id": 242, "interval": null, "isNew": true, "legend": { @@ -34207,15 +34526,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", + "expr": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", + "query": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -34224,7 +34543,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Worker pending tasks", + "title": "Blob GC action", "tooltip": { "msResolution": true, "shared": true, @@ -34270,7 +34589,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of tasks handled by future_pool", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -34292,12 +34611,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 70 }, "height": null, "hideTimeOverride": false, - "id": 252, + "id": 243, "interval": null, "isNew": true, "legend": { @@ -34340,15 +34659,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -34357,7 +34721,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "FuturePool handled tasks", + "title": "Blob file sync duration", "tooltip": { "msResolution": true, "shared": true, @@ -34376,7 +34740,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -34403,7 +34767,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Current pending and running tasks of future_pool", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -34425,12 +34789,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 77 }, "height": null, "hideTimeOverride": false, - "id": 253, + "id": 244, "interval": null, "isNew": true, "legend": { @@ -34473,15 +34837,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "avg", "metric": "", - "query": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "metric": "", + "query": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -34490,7 +34899,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "FuturePool pending tasks", + "title": "Blob GC duration", "tooltip": { "msResolution": true, "shared": true, @@ -34509,7 +34918,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -34530,159 +34939,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Task", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 254, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed to handle coprocessor read requests", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 0 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 255, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed to handle coprocessor read requests", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -34705,11 +34968,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 77 }, "height": null, "hideTimeOverride": false, - "id": 256, + "id": 245, "interval": null, "isNew": true, "legend": { @@ -34745,90 +35008,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{req}}", - "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{req}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -34837,7 +35032,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Request duration", + "title": "Blob GC keys flow", "tooltip": { "msResolution": true, "shared": true, @@ -34856,7 +35051,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -34906,11 +35101,11 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 84 }, "height": null, "hideTimeOverride": false, - "id": 257, + "id": 246, "interval": null, "isNew": true, "legend": { @@ -34953,148 +35148,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{req}}", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total Requests", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + }, { - "decimals": null, - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" }, { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 258, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "datasource": "${DS_TEST-CLUSTER}", + "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "expr": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{reason}}", + "legendFormat": "max", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "query": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -35103,7 +35210,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total Request Errors", + "title": "Blob GC input file size", "tooltip": { "msResolution": true, "shared": true, @@ -35122,7 +35229,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -35171,12 +35278,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 84 }, "height": null, "hideTimeOverride": false, - "id": 259, + "id": 247, "interval": null, "isNew": true, "legend": { @@ -35219,15 +35326,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{req}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -35236,7 +35343,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "KV Cursor Operations", + "title": "Blob GC bytes flow", "tooltip": { "msResolution": true, "shared": true, @@ -35255,7 +35362,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -35282,7 +35389,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -35304,12 +35411,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 91 }, "height": null, "hideTimeOverride": false, - "id": 260, + "id": 248, "interval": null, "isNew": true, "legend": { @@ -35345,90 +35452,67 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}", + "legendFormat": "avg", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{req}}", + "legendFormat": "95%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{req}}", + "legendFormat": "99%", "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "expr": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count-{{req}}", + "legendFormat": "max", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "query": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -35437,7 +35521,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "KV Cursor Operations", + "title": "Blob GC output file size", "tooltip": { "msResolution": true, "shared": true, @@ -35456,7 +35540,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -35505,12 +35589,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 21 + "x": 12, + "y": 91 }, "height": null, "hideTimeOverride": false, - "id": 261, + "id": 249, "interval": null, "isNew": true, "legend": { @@ -35553,15 +35637,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", + "expr": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "delete_skipped-{{req}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", + "query": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -35570,7 +35654,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total RocksDB Perf Statistics", + "title": "Blob GC file count", "tooltip": { "msResolution": true, "shared": true, @@ -35589,7 +35673,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -35610,13 +35694,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": "titan_db", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Titan - $titan_db", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 250, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -35638,12 +35764,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 262, + "id": 251, "interval": null, "isNew": true, "legend": { @@ -35686,15 +35812,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "write-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum((\n tikv_scheduler_write_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "throttle-{{instance}}", + "metric": "", + "query": "sum((\n tikv_scheduler_throttle_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) != 0", "refId": "", "step": 10, "target": "" @@ -35703,7 +35844,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total Response Size", + "title": "Scheduler flow", "tooltip": { "msResolution": true, "shared": true, @@ -35743,55 +35884,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Coprocessor Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 263, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handling coprocessor requests", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -35813,12 +35912,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, + "x": 12, "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 264, + "id": 252, "interval": null, "isNew": true, "legend": { @@ -35854,90 +35953,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "expr": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{req}}", - "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{req}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "query": "sum((\n tikv_scheduler_discard_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / 10000000", "refId": "", "step": 10, "target": "" @@ -35946,7 +35977,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Handle duration", + "title": "Scheduler discard ratio", "tooltip": { "msResolution": true, "shared": true, @@ -35965,7 +35996,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -35987,12 +36018,116 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 253, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_throttle_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Throttle duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed to handle coprocessor requests per TiKV instance", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -36015,11 +36150,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 265, + "id": 254, "interval": null, "isNew": true, "legend": { @@ -36055,90 +36190,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}-{{instance}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "expr": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{req}}-{{instance}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{req}}-{{instance}}", - "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{req}}-{{instance}}", + "legendFormat": "{{instance}}-{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", + "query": "((\n tikv_scheduler_throttle_cf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) != 0", "refId": "", "step": 10, "target": "" @@ -36147,7 +36214,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Handle duration by store", + "title": "Scheduler throttled CF", "tooltip": { "msResolution": true, "shared": true, @@ -36166,7 +36233,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -36193,7 +36260,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when coprocessor requests are wait for being handled", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -36216,11 +36283,11 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 266, + "id": 255, "interval": null, "isNew": true, "legend": { @@ -36256,90 +36323,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{req}}", - "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{req}}", + "legendFormat": "{{type}}-{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", + "query": "sum(rate(\n tikv_scheduler_throttle_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, cf) ", "refId": "", "step": 10, "target": "" @@ -36348,7 +36347,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Wait duration", + "title": "Flow controller actions", "tooltip": { "msResolution": true, "shared": true, @@ -36367,7 +36366,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -36394,7 +36393,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when coprocessor requests are wait for being handled in each TiKV instance", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -36417,11 +36416,11 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 267, + "id": 256, "interval": null, "isNew": true, "legend": { @@ -36457,90 +36456,67 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}-{{instance}}", + "legendFormat": "{{cf}}_l0_flow-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{req}}-{{instance}}", + "legendFormat": "{{cf}}_flush_flow-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", + "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, cf) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "expr": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{req}}-{{instance}}", + "legendFormat": "total_l0_flow-{{instance}}", "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "query": "sum((\n tikv_scheduler_l0_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", + "expr": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count-{{req}}-{{instance}}", + "legendFormat": "total_flush_flow-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", + "query": "sum((\n tikv_scheduler_flush_flow\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -36549,7 +36525,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Wait duration by store", + "title": "Flush/L0 flow", "tooltip": { "msResolution": true, "shared": true, @@ -36568,7 +36544,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -36595,7 +36571,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -36618,11 +36594,11 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 268, + "id": 257, "interval": null, "isNew": true, "legend": { @@ -36665,148 +36641,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", + "expr": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{vec_type}}", + "legendFormat": "l0-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", + "query": "max((\n tikv_scheduler_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total DAG Requests", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The total number of DAG executors", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 14 - }, - "height": null, - "hideTimeOverride": false, - "id": 269, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "memtable-{{instance}}", + "metric": "", + "query": "max((\n tikv_scheduler_memtable\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "avg_l0-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "max((\n tikv_scheduler_l0_avg\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -36815,7 +36688,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total DAG Executors", + "title": "Flow controller factors", "tooltip": { "msResolution": true, "shared": true, @@ -36834,7 +36707,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -36861,7 +36734,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -36883,12 +36756,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, + "x": 12, "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 270, + "id": 258, "interval": null, "isNew": true, "legend": { @@ -36931,15 +36804,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", + "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", + "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"kv\"}\n \n)) by (cf) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "pending-bytes-{{instance}}", + "metric": "", + "query": "sum((\n tikv_scheduler_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (cf) / 10000000", "refId": "", "step": 10, "target": "" @@ -36948,7 +36836,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total Ops Details (Table Scan)", + "title": "Compaction pending bytes", "tooltip": { "msResolution": true, "shared": true, @@ -36967,7 +36855,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -36994,7 +36882,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Throttle time for txn storage commands in 1 minute.", "editable": true, "error": false, "fieldConfig": { @@ -37016,12 +36904,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 271, + "id": 259, "interval": null, "isNew": true, "legend": { @@ -37064,15 +36952,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", + "expr": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{tag}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", + "query": "sum(rate(\n tikv_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -37081,7 +36969,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total Ops Details (Index Scan)", + "title": "Txn command throttled duration", "tooltip": { "msResolution": true, "shared": true, @@ -37100,7 +36988,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -37127,7 +37015,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Throttle time for non-txn related processing like analyze or dag in 1 minute.", "editable": true, "error": false, "fieldConfig": { @@ -37149,12 +37037,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, + "x": 12, "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 272, + "id": 260, "interval": null, "isNew": true, "legend": { @@ -37197,15 +37085,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "expr": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-{{tag}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "query": "sum(rate(\n tikv_non_txn_command_throttle_time_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -37214,7 +37102,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total Ops Details by CF (Table Scan)", + "title": "Non-txn command throttled duration", "tooltip": { "msResolution": true, "shared": true, @@ -37233,7 +37121,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -37254,13 +37142,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Flow Control", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 261, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The total number of commands on each stage", "editable": true, "error": false, "fieldConfig": { @@ -37282,12 +37212,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 28 + "x": 0, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 273, + "id": 262, "interval": null, "isNew": true, "legend": { @@ -37330,15 +37260,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-{{tag}}", + "legendFormat": "{{stage}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{stage}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (stage) ", "refId": "", "step": 10, "target": "" @@ -37347,7 +37292,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total Ops Details by CF (Index Scan)", + "title": "Scheduler stage total", "tooltip": { "msResolution": true, "shared": true, @@ -37366,7 +37311,7 @@ "yaxes": [ { "decimals": null, - "format": "opm", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -37388,116 +37333,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed on checking memory locks for coprocessor requests", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 35 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 274, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory lock checking duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed on checking memory locks for coprocessor requests", + "description": "The total writing bytes of commands on each stage", "editable": true, "error": false, "fieldConfig": { @@ -37520,11 +37361,11 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 275, + "id": 263, "interval": null, "isNew": true, "legend": { @@ -37560,90 +37401,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum((\n tikv_scheduler_writing_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -37652,7 +37425,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memory lock checking duration", + "title": "Scheduler writing bytes", "tooltip": { "msResolution": true, "shared": true, @@ -37671,7 +37444,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -37692,55 +37465,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Coprocessor Detail", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 276, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The count of different priority commands", "editable": true, "error": false, "fieldConfig": { @@ -37763,11 +37494,11 @@ "h": 7, "w": 12, "x": 0, - "y": 0 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 277, + "id": 264, "interval": null, "isNew": true, "legend": { @@ -37810,30 +37541,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{state}}", - "metric": "", - "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-total", + "legendFormat": "{{priority}}", "metric": "", - "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_scheduler_commands_pri_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (priority) ", "refId": "", "step": 10, "target": "" @@ -37842,7 +37558,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Threads state", + "title": "Scheduler priority commands", "tooltip": { "msResolution": true, "shared": true, @@ -37861,7 +37577,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -37888,7 +37604,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The count of pending commands per TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -37911,11 +37627,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 278, + "id": 265, "interval": null, "isNew": true, "legend": { @@ -37958,15 +37674,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", + "expr": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", + "query": "sum((\n tikv_scheduler_contex_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -37975,7 +37691,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Threads IO", + "title": "Scheduler pending commands", "tooltip": { "msResolution": true, "shared": true, @@ -37994,7 +37710,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -38016,12 +37732,158 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 266, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sched-worker.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Txn Scheduler Pool Wait Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Scheduler", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 267, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The total number of commands on each stage in commit command", "editable": true, "error": false, "fieldConfig": { @@ -38042,13 +37904,13 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 7 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 279, + "id": 268, "interval": null, "isNew": true, "legend": { @@ -38091,15 +37953,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "expr": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "busy-{{instance}}", "metric": "", - "query": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "query": "sum(rate(\n tikv_scheduler_too_busy_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{stage}}", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_stage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (stage) ", "refId": "", "step": 10, "target": "" @@ -38108,7 +37985,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Thread Voluntary Context Switches", + "title": "Scheduler stage total", "tooltip": { "msResolution": true, "shared": true, @@ -38127,7 +38004,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -38154,7 +38031,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The time consumed when executing commit command", "editable": true, "error": false, "fieldConfig": { @@ -38176,12 +38053,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 280, + "id": 269, "interval": null, "isNew": true, "legend": { @@ -38217,31 +38094,99 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "seriesOverrides": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{name}}", - "metric": "", - "query": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", - "refId": "", - "step": 10, - "target": "" + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_command_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_scheduler_command_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_scheduler_command_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Thread Nonvoluntary Context Switches", + "title": "Scheduler command duration", "tooltip": { "msResolution": true, "shared": true, @@ -38260,7 +38205,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -38281,55 +38226,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Threads", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 281, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of get operations", + "description": "The time which is caused by latch wait in commit command", "editable": true, "error": false, "fieldConfig": { @@ -38351,12 +38254,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 282, + "id": 270, "interval": null, "isNew": true, "legend": { @@ -38392,82 +38295,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "memtable", - "metric": "", - "query": "sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "block_cache", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"block_cache_data_hit|block_cache_filter_hit\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "l0", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l0\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", + "expr": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "l1", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l1\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "l2_and_up", + "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_engine_get_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_hit_l2_and_up\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_scheduler_latch_wait_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -38476,7 +38387,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Get operations", + "title": "Scheduler latch wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -38495,7 +38406,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -38522,7 +38433,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing get operations", + "description": "The count of keys read by a commit command", "editable": true, "error": false, "fieldConfig": { @@ -38544,12 +38455,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 283, + "id": 271, "interval": null, "isNew": true, "legend": { @@ -38585,29 +38496,52 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "99.99%", "metric": "", - "query": "max((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_max\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -38615,37 +38549,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile99\"}\n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_read_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", + "legendFormat": "avg", "metric": "", - "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_percentile95\"}\n \n)) ", + "query": "(sum(rate(\n tikv_scheduler_kv_command_key_read_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "count", "metric": "", - "query": "avg((\n tikv_engine_get_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"get_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_scheduler_kv_command_key_read_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -38654,7 +38588,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Get duration", + "title": "Scheduler keys read", "tooltip": { "msResolution": true, "shared": true, @@ -38673,9 +38607,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -38700,7 +38634,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of seek operations", + "description": "The count of keys written by a commit command", "editable": true, "error": false, "fieldConfig": { @@ -38722,12 +38656,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 284, + "id": 272, "interval": null, "isNew": true, "legend": { @@ -38763,97 +38697,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "seek", - "metric": "", - "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "seek_found", - "metric": "", - "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_seek_found\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "next", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "next_found", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_next_found\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_kv_command_key_write_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", + "expr": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "prev", + "legendFormat": "avg", "metric": "", - "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_scheduler_kv_command_key_write_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "prev_found", + "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_engine_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"number_db_prev_found\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_scheduler_kv_command_key_write_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -38862,7 +38789,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Seek operations", + "title": "Scheduler keys written", "tooltip": { "msResolution": true, "shared": true, @@ -38881,7 +38808,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -38908,7 +38835,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing seek operation", + "description": "The keys scan details of each CF when executing commit command", "editable": true, "error": false, "fieldConfig": { @@ -38930,12 +38857,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 285, + "id": 273, "interval": null, "isNew": true, "legend": { @@ -38978,60 +38905,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max", - "metric": "", - "query": "max((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_max\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{tag}}", "metric": "", - "query": "avg((\n tikv_engine_seek_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"seek_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\"}\n [$__rate_interval]\n)) by (tag) ", "refId": "", "step": 10, "target": "" @@ -39040,7 +38922,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Seek duration", + "title": "Scheduler scan details", "tooltip": { "msResolution": true, "shared": true, @@ -39059,9 +38941,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -39086,7 +38968,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of write operations", + "description": "The keys scan details of lock CF when executing commit command", "editable": true, "error": false, "fieldConfig": { @@ -39108,12 +38990,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 286, + "id": 274, "interval": null, "isNew": true, "legend": { @@ -39156,45 +39038,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "done", - "metric": "", - "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=~\"write_done_by_self|write_done_by_other\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "timeout", - "metric": "", - "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_timeout\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "with_wal", + "legendFormat": "{{tag}}", "metric": "", - "query": "sum(rate(\n tikv_engine_write_served\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_with_wal\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"lock\"}\n [$__rate_interval]\n)) by (tag) ", "refId": "", "step": 10, "target": "" @@ -39203,7 +39055,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write operations", + "title": "Scheduler scan details [lock]", "tooltip": { "msResolution": true, "shared": true, @@ -39222,7 +39074,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -39249,7 +39101,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing write operation", + "description": "The keys scan details of write CF when executing commit command", "editable": true, "error": false, "fieldConfig": { @@ -39271,12 +39123,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 287, + "id": 275, "interval": null, "isNew": true, "legend": { @@ -39319,60 +39171,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max", - "metric": "", - "query": "max((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_max\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{tag}}", "metric": "", - "query": "avg((\n tikv_engine_write_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"write\"}\n [$__rate_interval]\n)) by (tag) ", "refId": "", "step": 10, "target": "" @@ -39381,7 +39188,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write duration", + "title": "Scheduler scan details [write]", "tooltip": { "msResolution": true, "shared": true, @@ -39400,9 +39207,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -39427,7 +39234,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of WAL sync operations", + "description": "The keys scan details of default CF when executing commit command", "editable": true, "error": false, "fieldConfig": { @@ -39449,12 +39256,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 21 + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 288, + "id": 276, "interval": null, "isNew": true, "legend": { @@ -39497,15 +39304,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "sync", + "legendFormat": "{{tag}}", "metric": "", - "query": "sum(rate(\n tikv_engine_wal_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_scheduler_kv_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"$command\", cf=\"default\"}\n [$__rate_interval]\n)) by (tag) ", "refId": "", "step": 10, "target": "" @@ -39514,7 +39321,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "WAL sync operations", + "title": "Scheduler scan details [default]", "tooltip": { "msResolution": true, "shared": true, @@ -39533,7 +39340,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -39560,7 +39367,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing write wal operation", + "description": "The time consumed on reading when executing commit command", "editable": true, "error": false, "fieldConfig": { @@ -39582,12 +39389,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 289, + "id": 277, "interval": null, "isNew": true, "legend": { @@ -39623,29 +39430,52 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "99.99%", "metric": "", - "query": "max((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_max\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -39653,37 +39483,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile99\"}\n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_scheduler_processing_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", + "legendFormat": "avg", "metric": "", - "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_percentile95\"}\n \n)) ", + "query": "(sum(rate(\n tikv_scheduler_processing_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "count", "metric": "", - "query": "avg((\n tikv_engine_write_wal_time_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_wal_micros_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_scheduler_processing_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -39692,7 +39522,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write WAL duration", + "title": "Scheduler command read duration", "tooltip": { "msResolution": true, "shared": true, @@ -39711,9 +39541,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -39734,11 +39564,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of compaction and flush operations", + "description": "The time consumed on checking memory locks", "editable": true, "error": false, "fieldConfig": { @@ -39749,129 +39591,234 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 28 + "x": 12, + "y": 35 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 290, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 278, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_engine_event_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_storage_check_mem_lock_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction operations", + "title": "Check memory locks duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "repeat": "command", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Scheduler - $command", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 279, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed to handle coprocessor read requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 0 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 280, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "timeFrom": null, + "timeShift": null, + "title": "Request duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing WAL sync operation", + "description": "The time consumed to handle coprocessor read requests", "editable": true, "error": false, "fieldConfig": { @@ -39894,11 +39841,11 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 291, + "id": 281, "interval": null, "isNew": true, "legend": { @@ -39934,67 +39881,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "99.99%-{{req}}", "metric": "", - "query": "max((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_max\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{req}}", "metric": "", - "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile99\"}\n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", + "legendFormat": "avg-{{req}}", "metric": "", - "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_percentile95\"}\n \n)) ", + "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "count-{{req}}", "metric": "", - "query": "avg((\n tikv_engine_wal_file_sync_micro_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_sync_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -40003,7 +39973,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "WAL sync duration", + "title": "Request duration", "tooltip": { "msResolution": true, "shared": true, @@ -40022,9 +39992,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "s", "label": null, - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true @@ -40049,7 +40019,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Compaction guard actions", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -40072,11 +40042,11 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 292, + "id": 282, "interval": null, "isNew": true, "legend": { @@ -40119,15 +40089,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-{{ type}}", + "legendFormat": "{{req}}", "metric": "", - "query": "sum(rate(\n tikv_raftstore_compaction_guard_action_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=~\"default|write\"}\n [$__rate_interval]\n)) by (cf, type) ", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -40136,7 +40106,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction guard actions", + "title": "Total Requests", "tooltip": { "msResolution": true, "shared": true, @@ -40182,7 +40152,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when executing the compaction and flush operations", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -40205,11 +40175,11 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 293, + "id": 283, "interval": null, "isNew": true, "legend": { @@ -40252,60 +40222,148 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{reason}}", "metric": "", - "query": "max((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_max\"}\n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_request_error\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", "refId": "", "step": 10, "target": "" - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Request Errors", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 284, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{req}}", "metric": "", - "query": "avg((\n tikv_engine_compaction_time\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"compaction_time_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -40314,7 +40372,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction duration", + "title": "KV Cursor Operations", "tooltip": { "msResolution": true, "shared": true, @@ -40333,9 +40391,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -40360,7 +40418,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when reading SST files", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -40382,12 +40440,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 42 + "x": 12, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 294, + "id": 285, "interval": null, "isNew": true, "legend": { @@ -40423,67 +40481,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "99.99%-{{req}}", "metric": "", - "query": "max((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_max\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "99%-{{req}}", "metric": "", - "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile99\"}\n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_scan_keys_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", + "legendFormat": "avg-{{req}}", "metric": "", - "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_percentile95\"}\n \n)) ", + "query": "(sum(rate(\n tikv_coprocessor_scan_keys_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "count-{{req}}", "metric": "", - "query": "avg((\n tikv_engine_sst_read_micros\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"sst_read_micros_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_scan_keys_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -40492,7 +40573,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "SST read duration", + "title": "KV Cursor Operations", "tooltip": { "msResolution": true, "shared": true, @@ -40511,9 +40592,9 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "short", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -40560,12 +40641,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 42 + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 295, + "id": 286, "interval": null, "isNew": true, "legend": { @@ -40608,15 +40689,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-{{reason}}", + "legendFormat": "delete_skipped-{{req}}", "metric": "", - "query": "sum(rate(\n tikv_engine_compaction_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (cf, reason) ", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"internal_delete_skipped_count\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -40625,7 +40706,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction reason", + "title": "Total RocksDB Perf Statistics", "tooltip": { "msResolution": true, "shared": true, @@ -40644,7 +40725,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -40671,7 +40752,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The block cache size. Broken down by column family if shared block cache is disabled.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -40693,12 +40774,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 49 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 296, + "id": 287, "interval": null, "isNew": true, "legend": { @@ -40741,15 +40822,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "topk(20,(\n avg((\n tikv_engine_block_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_response_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -40758,7 +40839,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Block cache size", + "title": "Total Response Size", "tooltip": { "msResolution": true, "shared": true, @@ -40798,13 +40879,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Coprocessor Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 288, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The hit rate of memtable", + "description": "The time consumed when handling coprocessor requests", "editable": true, "error": false, "fieldConfig": { @@ -40826,12 +40949,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 49 + "x": 0, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 297, + "id": 289, "interval": null, "isNew": true, "legend": { @@ -40867,22 +40990,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "hit", + "legendFormat": "99.99%-{{req}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_memtable_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"memtable_miss\"}\n [$__rate_interval]\n)) ))", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -40891,7 +41082,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memtable hit", + "title": "Handle duration", "tooltip": { "msResolution": true, "shared": true, @@ -40910,7 +41101,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -40937,7 +41128,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The flow of different kinds of block cache operations", + "description": "The time consumed to handle coprocessor requests per TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -40959,12 +41150,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 56 + "x": 12, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 298, + "id": 290, "interval": null, "isNew": true, "legend": { @@ -41000,112 +41191,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "total_read", - "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_read\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "total_written", - "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_byte_write\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "data_insert", - "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_bytes_insert\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "filter_insert", + "legendFormat": "99.99%-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_insert\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "filter_evict", + "legendFormat": "99%-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_bytes_evict\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_handle_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", + "expr": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "index_insert", + "legendFormat": "avg-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_insert\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_coprocessor_request_handle_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "index_evict", + "legendFormat": "count-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_bytes_evict\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_coprocessor_request_handle_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (req, instance) ", "refId": "", "step": 10, "target": "" @@ -41114,7 +41283,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Block cache flow", + "title": "Handle duration by store", "tooltip": { "msResolution": true, "shared": true, @@ -41133,9 +41302,9 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "s", "label": null, - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true @@ -41160,7 +41329,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The hit rate of block cache", + "description": "The time consumed when coprocessor requests are wait for being handled", "editable": true, "error": false, "fieldConfig": { @@ -41182,12 +41351,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 56 + "x": 0, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 299, + "id": 291, "interval": null, "isNew": true, "legend": { @@ -41223,82 +41392,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "all", - "metric": "", - "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_miss\"}\n [$__rate_interval]\n)) ))", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "data", + "legendFormat": "99.99%-{{req}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_miss\"}\n [$__rate_interval]\n)) ))", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "filter", + "legendFormat": "99%-{{req}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_miss\"}\n [$__rate_interval]\n)) ))", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", + "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "index", + "legendFormat": "avg-{{req}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_miss\"}\n [$__rate_interval]\n)) ))", + "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", + "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "bloom prefix", + "legendFormat": "count-{{req}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_useful\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_engine_bloom_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bloom_prefix_checked\"}\n [$__rate_interval]\n)) )", + "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -41307,7 +41484,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Block cache hit", + "title": "Wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -41326,7 +41503,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -41353,7 +41530,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The flow of different kinds of operations on keys", + "description": "The time consumed when coprocessor requests are wait for being handled in each TiKV instance", "editable": true, "error": false, "fieldConfig": { @@ -41375,12 +41552,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 63 + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 300, + "id": 292, "interval": null, "isNew": true, "legend": { @@ -41416,52 +41593,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read", + "legendFormat": "99.99%-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_read\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "written", + "legendFormat": "99%-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"keys_written\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_wait_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "expr": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "corrupt", + "legendFormat": "avg-{{req}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_compaction_num_corrupt_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_coprocessor_request_wait_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) / sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}-{{instance}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_wait_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"all\"}\n [$__rate_interval]\n)) by (req, instance) ", "refId": "", "step": 10, "target": "" @@ -41470,7 +41685,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Keys flow", + "title": "Wait duration by store", "tooltip": { "msResolution": true, "shared": true, @@ -41489,7 +41704,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -41516,7 +41731,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of different kinds of block cache operations", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -41538,12 +41753,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 63 + "x": 0, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 301, + "id": 293, "interval": null, "isNew": true, "legend": { @@ -41586,75 +41801,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "total_add", - "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "data_add", - "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_data_add\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "filter_add", - "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_filter_add\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "index_add", - "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_index_add\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "add_failures", + "legendFormat": "{{vec_type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"block_cache_add_failures\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_coprocessor_dag_request_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (vec_type) ", "refId": "", "step": 10, "target": "" @@ -41663,7 +41818,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Block cache operations", + "title": "Total DAG Requests", "tooltip": { "msResolution": true, "shared": true, @@ -41682,7 +41837,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -41709,7 +41864,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The flow rate of read operations per type", + "description": "The total number of DAG executors", "editable": true, "error": false, "fieldConfig": { @@ -41731,12 +41886,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 70 + "x": 12, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 302, + "id": 294, "interval": null, "isNew": true, "legend": { @@ -41779,30 +41934,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "get", - "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "scan", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"iter_bytes_read\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_coprocessor_executor_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -41811,7 +41951,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read flow", + "title": "Total DAG Executors", "tooltip": { "msResolution": true, "shared": true, @@ -41830,7 +41970,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -41857,7 +41997,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of keys in each column family", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -41879,12 +42019,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 70 + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 303, + "id": 295, "interval": null, "isNew": true, "legend": { @@ -41927,15 +42067,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}", + "legendFormat": "{{tag}}", "metric": "", - "query": "sum((\n tikv_engine_estimate_num_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (tag) ", "refId": "", "step": 10, "target": "" @@ -41944,7 +42084,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total keys", + "title": "Total Ops Details (Table Scan)", "tooltip": { "msResolution": true, "shared": true, @@ -41963,7 +42103,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -41990,7 +42130,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The flow of different kinds of write operations", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -42012,12 +42152,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 77 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 304, + "id": 296, "interval": null, "isNew": true, "legend": { @@ -42060,30 +42200,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "wal", - "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"wal_file_bytes\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "write", + "legendFormat": "{{tag}}", "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (tag) ", "refId": "", "step": 10, "target": "" @@ -42092,7 +42217,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write flow", + "title": "Total Ops Details (Index Scan)", "tooltip": { "msResolution": true, "shared": true, @@ -42111,7 +42236,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -42138,7 +42263,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The bytes per read", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -42160,12 +42285,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 77 + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 305, + "id": 297, "interval": null, "isNew": true, "legend": { @@ -42208,60 +42333,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max", - "metric": "", - "query": "max((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_max\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{cf}}-{{tag}}", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_read\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_read_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"select\"}\n [$__rate_interval]\n)) by (cf, tag) ", "refId": "", "step": 10, "target": "" @@ -42270,7 +42350,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Bytes / Read", + "title": "Total Ops Details by CF (Table Scan)", "tooltip": { "msResolution": true, "shared": true, @@ -42289,9 +42369,9 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "ops", "label": null, - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true @@ -42316,7 +42396,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The flow rate of compaction operations per type", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -42338,12 +42418,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 84 + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 306, + "id": 298, "interval": null, "isNew": true, "legend": { @@ -42386,45 +42466,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "read", - "metric": "", - "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_read\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "written", - "metric": "", - "query": "sum(rate(\n tikv_engine_compaction_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_written\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "flushed", + "legendFormat": "{{cf}}-{{tag}}", "metric": "", - "query": "sum(rate(\n tikv_engine_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"flush_write_bytes\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_coprocessor_scan_details\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=\"index\"}\n [$__rate_interval]\n)) by (cf, tag) ", "refId": "", "step": 10, "target": "" @@ -42433,7 +42483,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction flow", + "title": "Total Ops Details by CF (Index Scan)", "tooltip": { "msResolution": true, "shared": true, @@ -42452,7 +42502,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -42474,12 +42524,116 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed on checking memory locks for coprocessor requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 299, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory lock checking duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The bytes per write", + "description": "The time consumed on checking memory locks for coprocessor requests", "editable": true, "error": false, "fieldConfig": { @@ -42502,11 +42656,11 @@ "h": 7, "w": 12, "x": 12, - "y": 84 + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 307, + "id": 300, "interval": null, "isNew": true, "legend": { @@ -42542,29 +42696,52 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "99.99%", "metric": "", - "query": "max((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_max\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -42572,37 +42749,37 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile99\"}\n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", + "legendFormat": "avg", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_percentile95\"}\n \n)) ", + "query": "(sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", + "expr": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "count", "metric": "", - "query": "avg((\n tikv_engine_bytes_per_write\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"bytes_per_write_average\"}\n \n)) ", + "query": "sum(rate(\n tikv_coprocessor_mem_lock_check_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -42611,7 +42788,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Bytes / Write", + "title": "Memory lock checking duration", "tooltip": { "msResolution": true, "shared": true, @@ -42630,7 +42807,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -42651,13 +42828,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Coprocessor Detail", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 301, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The read amplification per TiKV instance", + "description": "The time used by each level in the unified read pool per second. Level 0 refers to small queries.", "editable": true, "error": false, "fieldConfig": { @@ -42680,11 +42899,11 @@ "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 308, + "id": 302, "interval": null, "isNew": true, "legend": { @@ -42727,15 +42946,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", + "expr": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{level}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_total_read_bytes\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_engine_read_amp_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"read_amp_estimate_useful_bytes\"}\n [$__rate_interval]\n)) by (instance) )", + "query": "sum(rate(\n tikv_multilevel_level_elapsed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n [$__rate_interval]\n)) by (level) ", "refId": "", "step": 10, "target": "" @@ -42744,7 +42963,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read amplification", + "title": "Time used by level", "tooltip": { "msResolution": true, "shared": true, @@ -42763,7 +42982,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -42790,7 +43009,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The pending bytes to be compacted", + "description": "The chance that level 0 (small) tasks are scheduled in the unified read pool.", "editable": true, "error": false, "fieldConfig": { @@ -42813,11 +43032,11 @@ "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 309, + "id": 303, "interval": null, "isNew": true, "legend": { @@ -42860,15 +43079,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "expr": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum((\n tikv_engine_pending_compaction_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf) ", + "query": "((\n tikv_multilevel_level0_chance\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=\"unified-read-pool\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -42877,7 +43096,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compaction pending bytes", + "title": "Level 0 chance", "tooltip": { "msResolution": true, "shared": true, @@ -42896,7 +43115,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -42923,7 +43142,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of snapshot of each TiKV instance", + "description": "The number of concurrently running tasks in the unified read pool.", "editable": true, "error": false, "fieldConfig": { @@ -42946,11 +43165,11 @@ "h": 7, "w": 12, "x": 0, - "y": 98 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 310, + "id": 304, "interval": null, "isNew": true, "legend": { @@ -42993,15 +43212,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "expr": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (priority) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{priority}}", "metric": "", - "query": "((\n tikv_engine_num_snapshots\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "query": "sum(avg_over_time(\n tikv_unified_read_pool_running_tasks\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (priority) ", "refId": "", "step": 10, "target": "" @@ -43010,7 +43229,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Number of snapshots", + "title": "Running tasks", "tooltip": { "msResolution": true, "shared": true, @@ -43029,7 +43248,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -43051,12 +43270,116 @@ "alignLevel": 0 } }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 305, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_yatp_pool_schedule_wait_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"unified-read.*\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Unified Read Pool Wait Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The compression ratio of each level", + "description": "Unified read pool task execution time during one schedule.", "editable": true, "error": false, "fieldConfig": { @@ -43078,12 +43401,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 98 + "x": 0, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 311, + "id": 306, "interval": null, "isNew": true, "legend": { @@ -43119,22 +43442,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-L{{level}}", + "legendFormat": "99.99%", "metric": "", - "query": "avg((\n tikv_engine_compression_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_poll_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_poll_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_poll_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -43143,7 +43534,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Compression ratio", + "title": "Duration of One Time Slice", "tooltip": { "msResolution": true, "shared": true, @@ -43162,9 +43553,9 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -43189,7 +43580,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of SST files for different column families in each level", + "description": "Unified read pool task total execution duration.", "editable": true, "error": false, "fieldConfig": { @@ -43211,12 +43602,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 105 + "x": 12, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 312, + "id": 307, "interval": null, "isNew": true, "legend": { @@ -43252,22 +43643,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-L{{level}}", + "legendFormat": "99.99%", "metric": "", - "query": "avg((\n tikv_engine_num_files_at_level\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) by (cf, level) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_exec_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_exec_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_exec_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -43276,7 +43735,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Number files at each level", + "title": "Task Execute Duration", "tooltip": { "msResolution": true, "shared": true, @@ -43295,9 +43754,9 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -43322,7 +43781,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time that the oldest unreleased snapshot survivals", + "description": "Task schedule number of times.", "editable": true, "error": false, "fieldConfig": { @@ -43343,13 +43802,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 105 + "w": 24, + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 313, + "id": 308, "interval": null, "isNew": true, "legend": { @@ -43385,22 +43844,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "((\n tikv_engine_oldest_snapshot_duration\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_yatp_task_execute_times_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_yatp_task_execute_times_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_yatp_task_execute_times_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -43409,7 +43936,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Oldest snapshots duration", + "title": "Task Schedule Times", "tooltip": { "msResolution": true, "shared": true, @@ -43428,9 +43955,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -43449,13 +43976,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Unified Read Pool", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 309, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Stall conditions changed of each column family", + "description": "The count of GC tasks processed by gc_worker", "editable": true, "error": false, "fieldConfig": { @@ -43478,11 +44047,11 @@ "h": 7, "w": 12, "x": 0, - "y": 112 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 314, + "id": 310, "interval": null, "isNew": true, "legend": { @@ -43525,15 +44094,60 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "expr": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}-{{type}}", + "legendFormat": "total-{{task}}", "metric": "", - "query": "((\n tikv_engine_stall_conditions_changed\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n \n)) ", + "query": "sum(rate(\n tikv_gcworker_gc_tasks_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "skipped-{{task}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_gc_skipped_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "failed-{{task}}", + "metric": "", + "query": "sum(rate(\n tikv_gcworker_gc_task_fail_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (task) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "gcworker-too-busy", + "metric": "", + "query": "sum(rate(\n tikv_gc_worker_too_busy\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -43542,7 +44156,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Stall conditions changed of each CF", + "title": "GC tasks", "tooltip": { "msResolution": true, "shared": true, @@ -43561,7 +44175,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -43588,7 +44202,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when ingesting SST files", + "description": "The time consumed when executing GC tasks", "editable": true, "error": false, "fieldConfig": { @@ -43611,11 +44225,11 @@ "h": 7, "w": 12, "x": 12, - "y": 112 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 315, + "id": 311, "interval": null, "isNew": true, "legend": { @@ -43681,7 +44295,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -43689,14 +44303,14 @@ "intervalFactor": 1, "legendFormat": "99.99%", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -43704,14 +44318,14 @@ "intervalFactor": 1, "legendFormat": "99%", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_gcworker_gc_task_duration_vec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "expr": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, @@ -43719,14 +44333,14 @@ "intervalFactor": 1, "legendFormat": "avg", "metric": "", - "query": "(sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) )", + "query": "(sum(rate(\n tikv_gcworker_gc_task_duration_vec_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": true, "instant": false, @@ -43734,7 +44348,7 @@ "intervalFactor": 1, "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_snapshot_ingest_sst_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_gcworker_gc_task_duration_vec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"$command\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -43743,7 +44357,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Ingest SST duration seconds", + "title": "GC tasks duration", "tooltip": { "msResolution": true, "shared": true, @@ -43789,7 +44403,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The GC duration", "editable": true, "error": false, "fieldConfig": { @@ -43812,11 +44426,11 @@ "h": 7, "w": 12, "x": 0, - "y": 119 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 316, + "id": 312, "interval": null, "isNew": true, "legend": { @@ -43859,15 +44473,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_write_stall_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (type) ", + "query": "histogram_quantile(1,(\n sum(rate(\n tidb_tikvclient_gc_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -43876,7 +44490,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write Stall Reason", + "title": "TiDB GC seconds", "tooltip": { "msResolution": true, "shared": true, @@ -43895,7 +44509,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -43922,7 +44536,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time which is caused by write stall", + "description": "The count of TiDB GC worker actions", "editable": true, "error": false, "fieldConfig": { @@ -43945,11 +44559,11 @@ "h": 7, "w": 12, "x": 12, - "y": 119 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 317, + "id": 313, "interval": null, "isNew": true, "legend": { @@ -43992,60 +44606,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "max", - "metric": "", - "query": "max((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_max\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", + "expr": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{type}}", "metric": "", - "query": "avg((\n tikv_engine_write_stall\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"write_stall_average\"}\n \n)) ", + "query": "sum(rate(\n tidb_tikvclient_gc_worker_actions_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -44054,7 +44623,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write stall duration", + "title": "TiDB GC worker actions", "tooltip": { "msResolution": true, "shared": true, @@ -44073,7 +44642,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44095,116 +44664,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The level that the external file ingests into", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 126 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 318, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_engine_ingestion_picked_level_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Ingestion picked level", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The memtable size of each column family", + "description": "Progress of ResolveLocks, the first phase of GC", "editable": true, "error": false, "fieldConfig": { @@ -44226,12 +44691,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 126 + "x": 0, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 319, + "id": 314, "interval": null, "isNew": true, "legend": { @@ -44274,15 +44739,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", + "expr": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}", + "legendFormat": "{{result}}", "metric": "", - "query": "avg((\n tikv_engine_memory_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$db\",type=\"mem-tables-all\"}\n \n)) by (cf) ", + "query": "max((\n tidb_tikvclient_range_task_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=~\"resolve-locks.*\"}\n \n)) by (result) ", "refId": "", "step": 10, "target": "" @@ -44291,7 +44756,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memtable size", + "title": "ResolveLocks Progress", "tooltip": { "msResolution": true, "shared": true, @@ -44310,7 +44775,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44331,55 +44796,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": "db", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "RocksDB - $db", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 320, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of operations per second", + "description": "Progress of TiKV's GC", "editable": true, "error": false, "fieldConfig": { @@ -44401,12 +44824,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 321, + "id": 315, "interval": null, "isNew": true, "legend": { @@ -44449,45 +44872,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write", - "metric": "", - "query": "sum(rate(\n raft_engine_write_apply_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "read_entry", - "metric": "", - "query": "sum(rate(\n raft_engine_read_entry_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "expr": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read_message", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n raft_engine_read_message_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "(sum((\n tikv_gcworker_autogc_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n \n)) by (instance) / sum((\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n \n)) by (instance) )", "refId": "", "step": 10, "target": "" @@ -44496,7 +44889,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Operation", + "title": "TiKV Auto GC Progress", "tooltip": { "msResolution": true, "shared": true, @@ -44515,7 +44908,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -44542,7 +44935,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The time used in write operation", + "description": "keys / second", "editable": true, "error": false, "fieldConfig": { @@ -44564,12 +44957,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 322, + "id": 316, "interval": null, "isNew": true, "legend": { @@ -44605,90 +44998,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n raft_engine_write_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{key_mode}}_keys/s", "metric": "", - "query": "sum(rate(\n raft_engine_write_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_storage_mvcc_gc_delete_versions_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" @@ -44697,7 +45022,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write Duration", + "title": "GC speed", "tooltip": { "msResolution": true, "shared": true, @@ -44716,7 +45041,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -44743,7 +45068,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The I/O flow rate", + "description": "SafePoint used for TiKV's Auto GC", "editable": true, "error": false, "fieldConfig": { @@ -44765,12 +45090,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 323, + "id": 317, "interval": null, "isNew": true, "legend": { @@ -44813,30 +45138,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write", - "metric": "", - "query": "sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "rewrite-{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n raft_engine_background_rewrite_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "max((\n tikv_gcworker_autogc_safe_point\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\"}\n \n)) by (instance) / (2^18)", "refId": "", "step": 10, "target": "" @@ -44845,7 +45155,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Flow", + "title": "TiKV Auto GC SafePoint", "tooltip": { "msResolution": true, "shared": true, @@ -44864,7 +45174,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "dateTimeAsIso", "label": null, "logBase": 1, "max": null, @@ -44887,375 +45197,161 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "99% duration breakdown of write operation", + "description": "The lifetime of TiDB GC", "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" + "steps": "" + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 7 + "w": 6, + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 324, + "id": 318, "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", "options": { - "alertThreshold": true, - "dataLinks": [] + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "wait", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_preprocess_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "wal", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "apply", + "legendFormat": null, "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_apply_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_life_time\"}\n \n)) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write Duration Breakdown (99%)", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, + "title": "GC lifetime", "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "type": "stat" }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The bytes per write", + "description": "The interval of TiDB GC", "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" + "steps": "" + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 14 + "w": 6, + "x": 6, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 325, + "id": 319, "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", "options": { - "alertThreshold": true, - "dataLinks": [] + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n raft_engine_write_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n raft_engine_write_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": null, "metric": "", - "query": "sum(rate(\n raft_engine_write_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "max((\n tidb_tikvclient_gc_config\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",type=\"tikv_gc_run_interval\"}\n \n)) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Bytes / Written", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, + "title": "GC interval", "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "type": "stat" }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "999% duration breakdown of WAL write operation", + "description": "Keys handled in GC compaction filter", "editable": true, "error": false, "fieldConfig": { @@ -45278,11 +45374,11 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 326, + "id": 320, "interval": null, "isNew": true, "legend": { @@ -45325,223 +45421,135 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total", + "legendFormat": "{{key_mode}}_filtered", "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_write_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_gc_compaction_filtered\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "sync", + "legendFormat": "{{key_mode}}_skipped", "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_sync_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_gc_compaction_filter_skip\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "allocate", + "legendFormat": "{{key_mode}}_mvcc-rollback/mvcc-lock", "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_allocate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_gc_compaction_mvcc_rollback\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "rotate", + "legendFormat": "{{key_mode}}_orphan-versions", "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_rotate_log_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_gc_compaction_filter_orphan_versions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "WAL Duration Breakdown (999%)", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + }, { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{key_mode}}_performed-times", + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_filter_perform\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", + "refId": "", + "step": 10, + "target": "" }, { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The average number of files", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 21 - }, - "height": null, - "hideTimeOverride": false, - "id": 327, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{key_mode}}_failure-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_gc_compaction_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode, type) ", + "refId": "", + "step": 10, + "target": "" + }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{key_mode}}_mvcc-deletion-met", "metric": "", - "query": "avg((\n raft_engine_log_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_met\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "swap", + "legendFormat": "{{key_mode}}_mvcc-deletion-handled", "metric": "", - "query": "avg((\n raft_engine_swap_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_handled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-recycle", + "legendFormat": "{{key_mode}}_mvcc-deletion-wasted", "metric": "", - "query": "avg((\n raft_engine_recycled_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_gc_compaction_filter_mvcc_deletion_wasted\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (key_mode) ", "refId": "", "step": 10, "target": "" @@ -45550,7 +45558,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "File Count", + "title": "GC in Compaction Filter", "tooltip": { "msResolution": true, "shared": true, @@ -45569,7 +45577,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -45596,7 +45604,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The 99% duration of operations other than write", + "description": "GC scan write details", "editable": true, "error": false, "fieldConfig": { @@ -45618,12 +45626,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 328, + "id": 321, "interval": null, "isNew": true, "legend": { @@ -45666,45 +45674,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "read_entry", - "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_entry_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "read_message", - "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_read_message_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "purge", + "legendFormat": "{{key_mode}}-{{tag}}", "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n raft_engine_purge_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "refId": "", "step": 10, "target": "" @@ -45713,7 +45691,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Other Durations (99%)", + "title": "GC scan write details", "tooltip": { "msResolution": true, "shared": true, @@ -45732,9 +45710,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -45759,7 +45737,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The average number of log entries", + "description": "GC scan default details", "editable": true, "error": false, "fieldConfig": { @@ -45780,13 +45758,13 @@ }, "gridPos": { "h": 7, - "w": 24, - "x": 0, - "y": 28 + "w": 12, + "x": 12, + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 329, + "id": 322, "interval": null, "isNew": true, "legend": { @@ -45829,15 +45807,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{key_mode}}-{{tag}}", "metric": "", - "query": "avg((\n raft_engine_log_entry_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_gcworker_gc_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (key_mode, tag) ", "refId": "", "step": 10, "target": "" @@ -45846,7 +45824,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Entry Count", + "title": "GC scan default details", "tooltip": { "msResolution": true, "shared": true, @@ -45865,7 +45843,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -45894,7 +45872,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Raft Engine", + "title": "GC", "transformations": [], "transparent": false, "type": "row" @@ -45922,7 +45900,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 330, + "id": 323, "interval": null, "links": [], "maxDataPoints": 100, @@ -45961,7 +45939,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 331, + "id": 324, "interval": null, "isNew": true, "legend": { @@ -46004,30 +45982,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "live blob file num", + "legendFormat": "{{instance}}-{{name}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_num_live_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "obsolete blob file num", + "legendFormat": "{{instance}}-{{name}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_num_obsolete_blob_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", "refId": "", "step": 10, "target": "" @@ -46036,7 +46014,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file count", + "title": "Lock Manager Thread CPU", "tooltip": { "msResolution": true, "shared": true, @@ -46055,7 +46033,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -46109,7 +46087,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 332, + "id": 325, "interval": null, "isNew": true, "legend": { @@ -46152,30 +46130,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "expr": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "live blob file size", - "metric": "", - "query": "sum((\n tikv_engine_titandb_live_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "obsolete blob file size", + "legendFormat": "{{type}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_obsolete_blob_file_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) ", + "query": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -46184,7 +46147,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file size", + "title": "Lock Manager Handled tasks", "tooltip": { "msResolution": true, "shared": true, @@ -46203,7 +46166,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -46230,7 +46193,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -46257,7 +46220,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 333, + "id": 326, "interval": null, "isNew": true, "legend": { @@ -46293,22 +46256,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "live blob size", + "legendFormat": "99.99%", "metric": "", - "query": "sum((\n tikv_engine_titandb_live_blob_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -46317,7 +46348,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Live blob size", + "title": "Waiter lifetime duration", "tooltip": { "msResolution": true, "shared": true, @@ -46336,9 +46367,9 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -46363,7 +46394,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The hit rate of block cache", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -46390,7 +46421,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 334, + "id": 327, "interval": null, "isNew": true, "legend": { @@ -46433,15 +46464,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", + "expr": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "all", + "legendFormat": "{{type}}", "metric": "", - "query": "(sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) / (sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_hit\"}\n [$__rate_interval]\n)) + sum(rate(\n tikv_engine_blob_cache_efficiency\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_cache_miss\"}\n [$__rate_interval]\n)) ))", + "query": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "metric": "", + "query": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -46450,7 +46496,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob cache hit", + "title": "Lock Waiting Queue", "tooltip": { "msResolution": true, "shared": true, @@ -46469,7 +46515,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -46496,7 +46542,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -46523,7 +46569,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 335, + "id": 328, "interval": null, "isNew": true, "legend": { @@ -46559,67 +46605,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "99.99%", "metric": "", - "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_average\"}\n \n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "95%", + "legendFormat": "99%", "metric": "", - "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile95\"}\n \n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "avg", "metric": "", - "query": "avg((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_percentile99\"}\n \n)) ", + "query": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", + "expr": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "count", "metric": "", - "query": "max((\n tikv_engine_blob_iter_touch_blob_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_iter_touch_blob_file_count_max\"}\n \n)) ", + "query": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -46628,7 +46697,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Iter touched blob file count", + "title": "Deadlock detect duration", "tooltip": { "msResolution": true, "shared": true, @@ -46647,9 +46716,9 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -46674,7 +46743,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The blob cache size.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -46701,7 +46770,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 336, + "id": 329, "interval": null, "isNew": true, "legend": { @@ -46744,15 +46813,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "expr": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}", + "legendFormat": "{{type}}", "metric": "", - "query": "topk(20,(\n avg((\n tikv_engine_blob_cache_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (cf, instance) \n \n \n)) ", + "query": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -46761,7 +46830,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob cache size", + "title": "Detect error", "tooltip": { "msResolution": true, "shared": true, @@ -46780,7 +46849,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -46834,7 +46903,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 337, + "id": 330, "interval": null, "isNew": true, "legend": { @@ -46877,60 +46946,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_average\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", + "expr": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}", "metric": "", - "query": "max((\n tikv_engine_blob_key_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_key_size_max\"}\n \n)) ", + "query": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -46939,7 +46963,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob key size", + "title": "Deadlock detector leader", "tooltip": { "msResolution": true, "shared": true, @@ -46958,7 +46982,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -47012,7 +47036,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 338, + "id": 331, "interval": null, "isNew": true, "legend": { @@ -47055,60 +47079,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_average\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", + "expr": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}", "metric": "", - "query": "max((\n tikv_engine_blob_value_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_value_size_max\"}\n \n)) ", + "query": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -47117,7 +47096,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob value size", + "title": "Total pessimistic locks memory size", "tooltip": { "msResolution": true, "shared": true, @@ -47190,7 +47169,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 339, + "id": 332, "interval": null, "isNew": true, "legend": { @@ -47233,15 +47212,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "get", + "legendFormat": "{{result}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_get\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", "refId": "", "step": 10, "target": "" @@ -47250,7 +47229,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob get operations", + "title": "In-memory pessimistic locking result", "tooltip": { "msResolution": true, "shared": true, @@ -47296,7 +47275,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The number of active keys and waiters.", "editable": true, "error": false, "fieldConfig": { @@ -47323,7 +47302,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 340, + "id": 333, "interval": null, "isNew": true, "legend": { @@ -47366,60 +47345,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{type}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%-{{type}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%-{{type}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "expr": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max-{{type}}", + "legendFormat": "{{type}}", "metric": "", - "query": "max((\n tikv_engine_blob_get_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "query": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -47428,7 +47362,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob get duration", + "title": "Pessimistic lock activities", "tooltip": { "msResolution": true, "shared": true, @@ -47447,7 +47381,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -47470,11 +47404,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The length includes the entering transaction itself", "editable": true, "error": false, "fieldConfig": { @@ -47485,129 +47431,88 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 35 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 341, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 334, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ratio}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum((\n tikv_engine_titandb_blob_file_discardable_ratio\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n \n)) by (ratio) ", + "query": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file discardable ratio distribution", + "title": "Lengths of lock wait queues when transaction enqueues", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The duration scan in-memory pessimistic locks with read lock", "editable": true, "error": false, "fieldConfig": { @@ -47634,7 +47539,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 342, + "id": 335, "interval": null, "isNew": true, "legend": { @@ -47670,52 +47575,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "seek", + "legendFormat": "99.99%-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_seek\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "prev", + "legendFormat": "99%-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_prev\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", + "expr": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "next", + "legendFormat": "avg-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_locate\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"number_blob_next\"}\n [$__rate_interval]\n)) ", + "query": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -47724,7 +47667,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob iter operations", + "title": "In-memory scan lock read duration", "tooltip": { "msResolution": true, "shared": true, @@ -47743,9 +47686,9 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, "min": null, "show": true @@ -47764,13 +47707,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Pessimistic Locking", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 336, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The number of tasks handled by worker", "editable": true, "error": false, "fieldConfig": { @@ -47793,11 +47778,11 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 343, + "id": 337, "interval": null, "isNew": true, "legend": { @@ -47840,60 +47825,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "expr": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{name}}", "metric": "", - "query": "max((\n tikv_engine_blob_seek_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "query": "sum(rate(\n tikv_worker_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "refId": "", "step": 10, "target": "" @@ -47902,7 +47842,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob seek duration", + "title": "Worker handled tasks", "tooltip": { "msResolution": true, "shared": true, @@ -47921,7 +47861,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -47948,7 +47888,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Current pending and running tasks of worker", "editable": true, "error": false, "fieldConfig": { @@ -47971,11 +47911,11 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 344, + "id": 338, "interval": null, "isNew": true, "legend": { @@ -48018,60 +47958,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "expr": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{name}}", "metric": "", - "query": "max((\n tikv_engine_blob_next_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) ", + "query": "sum((\n tikv_worker_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (name) ", "refId": "", "step": 10, "target": "" @@ -48080,7 +47975,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob next duration", + "title": "Worker pending tasks", "tooltip": { "msResolution": true, "shared": true, @@ -48099,7 +47994,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -48126,7 +48021,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The number of tasks handled by future_pool", "editable": true, "error": false, "fieldConfig": { @@ -48149,11 +48044,11 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 345, + "id": 339, "interval": null, "isNew": true, "legend": { @@ -48196,60 +48091,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{type}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_average\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%-{{type}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile95\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%-{{type}}", - "metric": "", - "query": "avg((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_percentile99\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max-{{type}}", + "legendFormat": "{{name}}", "metric": "", - "query": "max((\n tikv_engine_blob_prev_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\".*_max\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_futurepool_handled_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) ", "refId": "", "step": 10, "target": "" @@ -48258,7 +48108,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob prev duration", + "title": "FuturePool handled tasks", "tooltip": { "msResolution": true, "shared": true, @@ -48277,7 +48127,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -48304,7 +48154,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Current pending and running tasks of future_pool", "editable": true, "error": false, "fieldConfig": { @@ -48327,11 +48177,11 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 346, + "id": 340, "interval": null, "isNew": true, "legend": { @@ -48374,15 +48224,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{name}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(avg_over_time(\n tikv_futurepool_pending_task_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (name) ", "refId": "", "step": 10, "target": "" @@ -48391,7 +48241,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob keys flow", + "title": "FuturePool pending tasks", "tooltip": { "msResolution": true, "shared": true, @@ -48410,7 +48260,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -48431,13 +48281,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 341, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The count of requests that TiKV sends to PD", "editable": true, "error": false, "fieldConfig": { @@ -48460,11 +48352,11 @@ "h": 7, "w": 12, "x": 0, - "y": 56 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 347, + "id": 342, "interval": null, "isNew": true, "legend": { @@ -48507,60 +48399,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_average\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile99\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_percentile95\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{type}}", "metric": "", - "query": "max((\n tikv_engine_blob_file_read_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_read_micros_max\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -48569,7 +48416,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file read duration", + "title": "PD requests", "tooltip": { "msResolution": true, "shared": true, @@ -48588,7 +48435,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -48615,7 +48462,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The time consumed by requests that TiKV sends to PD", "editable": true, "error": false, "fieldConfig": { @@ -48638,11 +48485,11 @@ "h": 7, "w": 12, "x": 12, - "y": 56 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 348, + "id": 343, "interval": null, "isNew": true, "legend": { @@ -48685,7 +48532,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "format": "time_series", "hide": false, "instant": false, @@ -48693,7 +48540,7 @@ "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "query": "(sum(rate(\n tikv_pd_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_pd_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", "refId": "", "step": 10, "target": "" @@ -48702,7 +48549,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob bytes flow", + "title": "PD request duration (average)", "tooltip": { "msResolution": true, "shared": true, @@ -48721,7 +48568,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -48748,7 +48595,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The total number of PD heartbeat messages", "editable": true, "error": false, "fieldConfig": { @@ -48771,11 +48618,11 @@ "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 349, + "id": 344, "interval": null, "isNew": true, "legend": { @@ -48818,60 +48665,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_average\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile99\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", + "legendFormat": "{{type}}", "metric": "", - "query": "avg((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_percentile95\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_pd_heartbeat_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", + "expr": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}-pending", "metric": "", - "query": "max((\n tikv_engine_blob_file_write_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_write_micros_max\"}\n \n)) by (type) ", + "query": "sum((\n tikv_pd_pending_heartbeat_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -48880,7 +48697,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file write duration", + "title": "PD heartbeats", "tooltip": { "msResolution": true, "shared": true, @@ -48899,7 +48716,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -48926,7 +48743,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The total number of peers validated by the PD worker", "editable": true, "error": false, "fieldConfig": { @@ -48949,11 +48766,11 @@ "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 350, + "id": 345, "interval": null, "isNew": true, "legend": { @@ -48996,15 +48813,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", + "expr": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "sync", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_file_synced\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_pd_validate_peer_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -49013,7 +48830,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file sync operations", + "title": "PD validate peers", "tooltip": { "msResolution": true, "shared": true, @@ -49059,7 +48876,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The count of reconnection between TiKV and PD", "editable": true, "error": false, "fieldConfig": { @@ -49082,11 +48899,11 @@ "h": 7, "w": 12, "x": 0, - "y": 70 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 351, + "id": 346, "interval": null, "isNew": true, "legend": { @@ -49129,7 +48946,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, @@ -49137,7 +48954,7 @@ "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_gc_action_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(delta(\n tikv_pd_reconnect_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -49146,7 +48963,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC action", + "title": "PD reconnection", "tooltip": { "msResolution": true, "shared": true, @@ -49165,7 +48982,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -49192,7 +49009,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The forward status of PD client", "editable": true, "error": false, "fieldConfig": { @@ -49215,11 +49032,11 @@ "h": 7, "w": 12, "x": 12, - "y": 70 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 352, + "id": 347, "interval": null, "isNew": true, "legend": { @@ -49262,60 +49079,148 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "expr": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", + "legendFormat": "{{instance}}-{{host}}", "metric": "", - "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_average\"}\n \n)) by (type) ", + "query": "((\n tikv_pd_request_forwarded\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PD forward status", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile95\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_percentile99\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The number of TSO requests waiting in the queue.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "height": null, + "hideTimeOverride": false, + "id": 348, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "expr": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}", "metric": "", - "query": "max((\n tikv_engine_blob_file_sync_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_file_sync_micros_max\"}\n \n)) by (type) ", + "query": "sum((\n tikv_pd_pending_tso_request_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -49324,7 +49229,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob file sync duration", + "title": "Pending TSO Requests", "tooltip": { "msResolution": true, "shared": true, @@ -49343,7 +49248,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -49370,7 +49275,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The slow score of stores", "editable": true, "error": false, "fieldConfig": { @@ -49392,12 +49297,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 77 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 353, + "id": 349, "interval": null, "isNew": true, "legend": { @@ -49440,60 +49345,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_average\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile95\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_percentile99\"}\n \n)) by (type) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", + "expr": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}", "metric": "", - "query": "max((\n tikv_engine_blob_gc_micros_seconds\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_micros_max\"}\n \n)) by (type) ", + "query": "sum((\n tikv_raftstore_slow_score\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -49502,7 +49362,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC duration", + "title": "Store Slow Score", "tooltip": { "msResolution": true, "shared": true, @@ -49521,7 +49381,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -49548,7 +49408,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The duration that recorded by inspecting messages.", "editable": true, "error": false, "fieldConfig": { @@ -49569,13 +49429,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 77 + "w": 24, + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 354, + "id": 350, "interval": null, "isNew": true, "legend": { @@ -49618,15 +49478,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"keys.*\"}\n [$__rate_interval]\n)) by (type) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_inspect_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -49635,7 +49495,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC keys flow", + "title": "Inspected duration per server", "tooltip": { "msResolution": true, "shared": true, @@ -49654,7 +49514,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -49675,13 +49535,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "PD", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 351, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might have a slow trend.", "editable": true, "error": false, "fieldConfig": { @@ -49704,11 +49606,11 @@ "h": 7, "w": 12, "x": 0, - "y": 84 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 355, + "id": 352, "interval": null, "isNew": true, "legend": { @@ -49751,60 +49653,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_average\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", + "expr": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}", "metric": "", - "query": "max((\n tikv_engine_blob_gc_input_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_input_file_max\"}\n \n)) ", + "query": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -49813,7 +49670,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC input file size", + "title": "Slow Trend", "tooltip": { "msResolution": true, "shared": true, @@ -49832,7 +49689,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -49859,7 +49716,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", "editable": true, "error": false, "fieldConfig": { @@ -49882,11 +49739,11 @@ "h": 7, "w": 12, "x": 12, - "y": 84 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 356, + "id": 353, "interval": null, "isNew": true, "legend": { @@ -49929,15 +49786,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_gc_flow_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=~\"bytes.*\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -49946,7 +49803,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC bytes flow", + "title": "QPS Changing Trend", "tooltip": { "msResolution": true, "shared": true, @@ -49965,7 +49822,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -49992,7 +49849,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", "editable": true, "error": false, "fieldConfig": { @@ -50015,11 +49872,11 @@ "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 357, + "id": 354, "interval": null, "isNew": true, "legend": { @@ -50062,60 +49919,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_average\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "95%", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile95\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "avg((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_percentile99\"}\n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", + "expr": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}}", "metric": "", - "query": "max((\n tikv_engine_blob_gc_output_file\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\",type=\"blob_gc_output_file_max\"}\n \n)) ", + "query": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -50124,7 +49936,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC output file size", + "title": "AVG Sampling Latency", "tooltip": { "msResolution": true, "shared": true, @@ -50143,7 +49955,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "\u00b5s", "label": null, "logBase": 1, "max": null, @@ -50170,7 +49982,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The QPS of each store.", "editable": true, "error": false, "fieldConfig": { @@ -50193,11 +50005,11 @@ "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 358, + "id": 355, "interval": null, "isNew": true, "legend": { @@ -50240,15 +50052,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_engine_blob_gc_file_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",db=\"$titan_db\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -50257,7 +50069,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Blob GC file count", + "title": "QPS of each store", "tooltip": { "msResolution": true, "shared": true, @@ -50299,13 +50111,13 @@ } } ], - "repeat": "titan_db", + "repeat": null, "repeatDirection": null, "span": null, "targets": [], "timeFrom": null, "timeShift": null, - "title": "Titan - $titan_db", + "title": "Slow Trend Statistics", "transformations": [], "transparent": false, "type": "row" @@ -50333,7 +50145,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 359, + "id": 356, "interval": null, "links": [], "maxDataPoints": 100, @@ -50345,7 +50157,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The rate of Raft snapshot messages sent", "editable": true, "error": false, "fieldConfig": { @@ -50372,7 +50184,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 360, + "id": 357, "interval": null, "isNew": true, "legend": { @@ -50415,30 +50227,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{name}}", - "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"waiter_manager.*\"}\n [$__rate_interval]\n)) by (instance, name) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", + "expr": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{name}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"deadlock_detect.*\"}\n [$__rate_interval]\n)) by (instance, name) ", + "query": "sum(delta(\n tikv_raftstore_raft_sent_message_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [1m]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -50447,7 +50244,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Lock Manager Thread CPU", + "title": "Rate snapshot message", "tooltip": { "msResolution": true, "shared": true, @@ -50466,7 +50263,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -50493,7 +50290,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The number of snapshots in different states", "editable": true, "error": false, "fieldConfig": { @@ -50520,7 +50317,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 361, + "id": 358, "interval": null, "isNew": true, "legend": { @@ -50563,7 +50360,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, @@ -50571,7 +50368,7 @@ "intervalFactor": 1, "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_lock_manager_task_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum((\n tikv_raftstore_snapshot_traffic_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -50580,7 +50377,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Lock Manager Handled tasks", + "title": "Snapshot state count", "tooltip": { "msResolution": true, "shared": true, @@ -50599,7 +50396,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -50626,7 +50423,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The time snapshot generation tasks waited to be scheduled. ", "editable": true, "error": false, "fieldConfig": { @@ -50653,7 +50450,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 362, + "id": 359, "interval": null, "isNew": true, "legend": { @@ -50689,90 +50486,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_lock_manager_waiter_lifetime_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_generation_wait_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -50781,7 +50510,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Waiter lifetime duration", + "title": "99% Snapshot generation wait duration", "tooltip": { "msResolution": true, "shared": true, @@ -50802,7 +50531,7 @@ "decimals": null, "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -50827,7 +50556,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The time consumed when handling snapshots", "editable": true, "error": false, "fieldConfig": { @@ -50854,7 +50583,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 363, + "id": 360, "interval": null, "isNew": true, "legend": { @@ -50897,30 +50626,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "send", "metric": "", - "query": "sum(max_over_time(\n tikv_lock_manager_wait_table_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_server_send_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "apply", "metric": "", - "query": "sum(max_over_time(\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (type) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "generate", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_raftstore_snapshot_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"generate\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -50929,7 +50673,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Lock Waiting Queue", + "title": "99% Handle snapshot duration", "tooltip": { "msResolution": true, "shared": true, @@ -50948,7 +50692,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -50975,7 +50719,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The snapshot size (P99.99).9999", "editable": true, "error": false, "fieldConfig": { @@ -51002,7 +50746,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 364, + "id": 361, "interval": null, "isNew": true, "legend": { @@ -51038,90 +50782,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_lock_manager_detect_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_lock_manager_detect_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "size", "metric": "", - "query": "sum(rate(\n tikv_lock_manager_detect_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -51130,7 +50806,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Deadlock detect duration", + "title": "99.99% Snapshot size", "tooltip": { "msResolution": true, "shared": true, @@ -51149,9 +50825,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -51176,7 +50852,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The number of KV within a snapshot in .9999", "editable": true, "error": false, "fieldConfig": { @@ -51203,7 +50879,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 365, + "id": 362, "interval": null, "isNew": true, "legend": { @@ -51246,15 +50922,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "count", "metric": "", - "query": "sum(rate(\n tikv_lock_manager_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_snapshot_kv_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -51263,7 +50939,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Detect error", + "title": "99.99% Snapshot KV count", "tooltip": { "msResolution": true, "shared": true, @@ -51282,7 +50958,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -51309,7 +50985,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Action stats for snapshot generating and applying", "editable": true, "error": false, "fieldConfig": { @@ -51336,7 +51012,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 366, + "id": 363, "interval": null, "isNew": true, "legend": { @@ -51379,15 +51055,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", + "expr": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}-{{status}}", "metric": "", - "query": "sum(max_over_time(\n tikv_lock_manager_detector_leader_heartbeat\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30s]\n)) by (instance) ", + "query": "sum(delta(\n tikv_raftstore_snapshot_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "clean-region-by-{{type}}", + "metric": "", + "query": "sum(delta(\n tikv_raftstore_clean_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, status) ", "refId": "", "step": 10, "target": "" @@ -51396,7 +51087,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Deadlock detector leader", + "title": "Snapshot Actions", "tooltip": { "msResolution": true, "shared": true, @@ -51415,7 +51106,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -51442,7 +51133,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The speed of sending or receiving snapshot", "editable": true, "error": false, "fieldConfig": { @@ -51469,7 +51160,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 367, + "id": 364, "interval": null, "isNew": true, "legend": { @@ -51512,7 +51203,197 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_limit_transport_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, type) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-generate", + "metric": "", + "query": "sum(rate(\n tikv_snapshot_limit_generate_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Snapshot transport speed", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Snapshot", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 365, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The CPU utilization of resolved ts worker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 366, + "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": null, + "maxPerRow": null, + "minSpan": null, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": null, + "seriesOverrides": [], + "span": null, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -51520,7 +51401,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "((\n tikv_pessimistic_lock_memory_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -51529,7 +51410,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total pessimistic locks memory size", + "title": "Resolved TS Worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -51548,7 +51429,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -51575,7 +51456,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The CPU utilization of advance ts worker", "editable": true, "error": false, "fieldConfig": { @@ -51596,13 +51477,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 28 + "w": 8, + "x": 8, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 368, + "id": 367, "interval": null, "isNew": true, "legend": { @@ -51645,15 +51526,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{result}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_in_memory_pessimistic_locking\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -51662,7 +51543,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "In-memory pessimistic locking result", + "title": "Advance ts Worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -51681,7 +51562,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -51708,7 +51589,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of active keys and waiters.", + "description": "The CPU utilization of scan lock worker", "editable": true, "error": false, "fieldConfig": { @@ -51729,13 +51610,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 28 + "w": 8, + "x": 16, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 369, + "id": 368, "interval": null, "isNew": true, "legend": { @@ -51778,15 +51659,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_lock_wait_queue_entries_gauge_vec\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -51795,7 +51676,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Pessimistic lock activities", + "title": "Scan lock Worker CPU", "tooltip": { "msResolution": true, "shared": true, @@ -51814,7 +51695,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -51837,23 +51718,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The length includes the entering transaction itself", + "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", "editable": true, "error": false, "fieldConfig": { @@ -51864,88 +51733,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 370, + "id": 369, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_lock_wait_queue_length_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Lengths of lock wait queues when transaction enqueues", + "title": "Max gap of resolved-ts", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration scan in-memory pessimistic locks with read lock", + "description": "The region that has minimal resolved ts", "editable": true, "error": false, "fieldConfig": { @@ -51968,11 +51878,11 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 371, + "id": 370, "interval": null, "isNew": true, "legend": { @@ -52008,90 +51918,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{type}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{type}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{type}}", - "metric": "", - "query": "(sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) / sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_storage_mvcc_scan_lock_read_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -52100,7 +51942,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "In-memory scan lock read duration", + "title": "Min Resolved TS Region", "tooltip": { "msResolution": true, "shared": true, @@ -52119,9 +51961,9 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -52140,55 +51982,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Pessimistic Locking", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 372, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The gap between now() and the minimal (non-zero) safe ts for followers", "editable": true, "error": false, "fieldConfig": { @@ -52211,11 +52011,11 @@ "h": 7, "w": 12, "x": 0, - "y": 0 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 373, + "id": 371, "interval": null, "isNew": true, "legend": { @@ -52258,7 +52058,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -52266,7 +52066,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -52275,7 +52075,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Max gap of follower safe-ts", "tooltip": { "msResolution": true, "shared": true, @@ -52294,7 +52094,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -52321,7 +52121,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The region id of the follower that has minimal safe ts", "editable": true, "error": false, "fieldConfig": { @@ -52344,11 +52144,11 @@ "h": 7, "w": 12, "x": 12, - "y": 0 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 374, + "id": 372, "interval": null, "isNew": true, "legend": { @@ -52391,45 +52191,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "total-99", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "(DL){{type}}-99", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "(AP){{type}}-99", + "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -52438,7 +52208,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "P99 RPC Duration", + "title": "Min Safe TS Follower Region", "tooltip": { "msResolution": true, "shared": true, @@ -52457,7 +52227,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -52484,7 +52254,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The gap between resolved ts of leaders and current time", "editable": true, "error": false, "fieldConfig": { @@ -52507,11 +52277,11 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 375, + "id": 373, "interval": null, "isNew": true, "legend": { @@ -52554,30 +52324,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{request}}", - "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "total-{{request}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -52586,7 +52341,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import RPC Ops", + "title": "Max gap of resolved-ts in region leaders", "tooltip": { "msResolution": true, "shared": true, @@ -52605,7 +52360,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -52632,7 +52387,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The region that its leader has minimal resolved ts.", "editable": true, "error": false, "fieldConfig": { @@ -52655,11 +52410,11 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 376, + "id": 374, "interval": null, "isNew": true, "legend": { @@ -52702,15 +52457,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -52719,7 +52474,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Cache Events", + "title": "Min Leader Resolved TS Region", "tooltip": { "msResolution": true, "shared": true, @@ -52738,7 +52493,7 @@ "yaxes": [ { "decimals": null, - "format": "cps", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -52777,7 +52532,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The time consumed when handle a check leader request", "editable": true, "error": false, "fieldConfig": { @@ -52792,14 +52547,14 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 28 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 377, + "id": 375, "interval": null, "legend": { "show": false @@ -52816,7 +52571,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -52824,7 +52579,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -52832,7 +52587,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Overall RPC Duration", + "title": "Check leader duration", "tooltip": { "msResolution": true, "shared": true, @@ -52865,23 +52620,11 @@ "yBucketSize": null }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Bucketed histogram of region count in a check leader request", "editable": true, "error": false, "fieldConfig": { @@ -52892,192 +52635,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 28 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 378, + "id": 376, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read File into Memory Duration", + "title": "99% CheckLeader request region count", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 21 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 379, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ + "yaxes": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Queuing Time", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Bucketed histogram of the check leader request size", "editable": true, "error": false, "fieldConfig": { @@ -53099,12 +52779,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 380, + "id": 377, "interval": null, "isNew": true, "legend": { @@ -53147,7 +52827,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, @@ -53155,7 +52835,22 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-check-num", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -53164,7 +52859,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Apply Request Throughput", + "title": "99% CheckLeader request size", "tooltip": { "msResolution": true, "shared": true, @@ -53206,23 +52901,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The count of fail to advance resolved-ts", "editable": true, "error": false, "fieldConfig": { @@ -53233,100 +52916,144 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 28 + "x": 12, + "y": 35 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 381, + "id": 378, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}-{{reason}}", "metric": "", - "query": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-stale-peer", + "metric": "", + "query": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Downloaded File Size", + "title": "Fail advance ts count", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Total bytes in memory of resolved-ts observe regions's lock heap", "editable": true, "error": false, "fieldConfig": { @@ -53337,81 +53064,122 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 28 + "x": 0, + "y": 42 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 382, + "id": 379, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Apply Batch Size", + "title": "Lock heap size", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "cacheTimeout": null, @@ -53430,7 +53198,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The backoff duration before starting initial scan", "editable": true, "error": false, "fieldConfig": { @@ -53444,15 +53212,15 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 35 + "x": 12, + "y": 42 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 383, + "id": 380, "interval": null, "legend": { "show": false @@ -53469,7 +53237,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -53477,7 +53245,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -53485,7 +53253,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Blocked by Concurrency Time", + "title": "Initial scan backoff duration", "tooltip": { "msResolution": true, "shared": true, @@ -53522,7 +53290,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The status of resolved-ts observe regions", "editable": true, "error": false, "fieldConfig": { @@ -53544,12 +53312,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 35 + "x": 0, + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 384, + "id": 381, "interval": null, "isNew": true, "legend": { @@ -53592,15 +53360,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "expr": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "query": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -53609,7 +53377,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Apply Request Speed", + "title": "Observe region status", "tooltip": { "msResolution": true, "shared": true, @@ -53628,7 +53396,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -53655,7 +53423,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Total bytes of pending commands in the channel", "editable": true, "error": false, "fieldConfig": { @@ -53677,12 +53445,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 42 + "x": 12, + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 385, + "id": 382, "interval": null, "isNew": true, "legend": { @@ -53725,7 +53493,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -53733,7 +53501,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -53742,7 +53510,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Cached File in Memory", + "title": "Pending command size", "tooltip": { "msResolution": true, "shared": true, @@ -53782,7 +53550,49 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Resolved TS", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 383, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, @@ -53810,12 +53620,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 42 + "x": 0, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 386, + "id": 384, "interval": null, "isNew": true, "legend": { @@ -53858,15 +53668,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -53875,7 +53685,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Engine Requests Unfinished", + "title": "CPU Usage", "tooltip": { "msResolution": true, "shared": true, @@ -53894,7 +53704,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -53916,116 +53726,12 @@ "alignLevel": 0 } }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 387, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Apply Time", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -54048,11 +53754,11 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 388, + "id": 385, "interval": null, "isNew": true, "legend": { @@ -54095,15 +53801,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "total-99", "metric": "", - "query": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "(DL){{type}}-99", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"queue|exec_download\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "(AP){{type}}-99", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -54112,7 +53848,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Raft Store Memory Usage", + "title": "P99 RPC Duration", "tooltip": { "msResolution": true, "shared": true, @@ -54131,7 +53867,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -54152,55 +53888,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Point In Time Restore", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 389, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of resolved ts worker", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -54221,13 +53915,13 @@ }, "gridPos": { "h": 7, - "w": 8, + "w": 12, "x": 0, - "y": 0 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 390, + "id": 386, "interval": null, "isNew": true, "legend": { @@ -54270,15 +53964,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{request}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"resolved_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (instance, request) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "total-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", "refId": "", "step": 10, "target": "" @@ -54287,7 +53996,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Resolved TS Worker CPU", + "title": "Import RPC Ops", "tooltip": { "msResolution": true, "shared": true, @@ -54306,7 +54015,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -54333,7 +54042,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of advance ts worker", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -54354,13 +54063,13 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 0 + "w": 12, + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 391, + "id": 387, "interval": null, "isNew": true, "legend": { @@ -54403,15 +54112,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"advance_ts.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_import_apply_cache_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "refId": "", "step": 10, "target": "" @@ -54420,7 +54129,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Advance ts Worker CPU", + "title": "Cache Events", "tooltip": { "msResolution": true, "shared": true, @@ -54439,7 +54148,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "cps", "label": null, "logBase": 1, "max": null, @@ -54462,11 +54171,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of scan lock worker", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -54477,129 +54198,100 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 14 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 392, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 388, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"inc_scan.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Scan lock Worker CPU", + "title": "Overall RPC Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved ts (the maximum candidate of safe-ts) and current time.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -54610,129 +54302,100 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 14 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 393, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 389, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_import_apply_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"exec_download\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Max gap of resolved-ts", + "title": "Read File into Memory Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The region that has minimal resolved ts", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -54743,129 +54406,88 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 21 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 394, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 390, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queuing\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Min Resolved TS Region", + "title": "Queuing Time", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between now() and the minimal (non-zero) safe ts for followers", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -54887,12 +54509,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 395, + "id": 391, "interval": null, "isNew": true, "legend": { @@ -54935,7 +54557,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -54943,7 +54565,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_import_apply_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -54952,7 +54574,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Max gap of follower safe-ts", + "title": "Apply Request Throughput", "tooltip": { "msResolution": true, "shared": true, @@ -54971,7 +54593,7 @@ "yaxes": [ { "decimals": null, - "format": "ms", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -54994,11 +54616,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The region id of the follower that has minimal safe ts", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -55009,23 +54643,323 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 14 + "x": 0, + "y": 28 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 396, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 392, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_import_download_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Downloaded File Size", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 393, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_import_apply_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Apply Batch Size", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 394, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"get_permit\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Blocked by Concurrency Time", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "height": null, + "hideTimeOverride": false, + "id": 395, "interval": null, "isNew": true, "legend": { @@ -55068,15 +55002,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_follower_safe_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", "refId": "", "step": 10, "target": "" @@ -55085,7 +55019,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Min Safe TS Follower Region", + "title": "Apply Request Speed", "tooltip": { "msResolution": true, "shared": true, @@ -55104,7 +55038,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -55131,7 +55065,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The gap between resolved ts of leaders and current time", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -55154,11 +55088,11 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 397, + "id": 396, "interval": null, "isNew": true, "legend": { @@ -55201,7 +55135,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -55209,7 +55143,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_gap_millis\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n tikv_import_apply_cached_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -55218,7 +55152,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Max gap of resolved-ts in region leaders", + "title": "Cached File in Memory", "tooltip": { "msResolution": true, "shared": true, @@ -55237,7 +55171,7 @@ "yaxes": [ { "decimals": null, - "format": "ms", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -55264,7 +55198,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The region that its leader has minimal resolved ts.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -55287,11 +55221,11 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 42 }, "height": null, "hideTimeOverride": false, - "id": 398, + "id": 397, "interval": null, "isNew": true, "legend": { @@ -55334,15 +55268,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_min_leader_resolved_ts_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_import_applier_event\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type!=\"begin_req\"}\n [$__rate_interval]\n)) by (instance, type) ", "refId": "", "step": 10, "target": "" @@ -55351,7 +55285,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Min Leader Resolved TS Region", + "title": "Engine Requests Unfinished", "tooltip": { "msResolution": true, "shared": true, @@ -55370,7 +55304,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -55409,7 +55343,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The time consumed when handle a check leader request", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -55424,14 +55358,14 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 49 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 399, + "id": 398, "interval": null, "legend": { "show": false @@ -55448,7 +55382,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -55456,7 +55390,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_resolved_ts_check_leader_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_import_engine_request_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"apply\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -55464,7 +55398,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Check leader duration", + "title": "Apply Time", "tooltip": { "msResolution": true, "shared": true, @@ -55501,7 +55435,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of region count in a check leader request", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -55524,11 +55458,11 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 49 }, "height": null, "hideTimeOverride": false, - "id": 400, + "id": 399, "interval": null, "isNew": true, "legend": { @@ -55571,7 +55505,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -55579,7 +55513,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum((\n tikv_server_mem_trace_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"raftstore-.*\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -55588,7 +55522,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% CheckLeader request region count", + "title": "Raft Store Memory Usage", "tooltip": { "msResolution": true, "shared": true, @@ -55607,7 +55541,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -55628,13 +55562,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Point In Time Restore", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 400, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Bucketed histogram of the check leader request size", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -55655,9 +55631,9 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 35 + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -55704,30 +55680,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "backup-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-check-num", + "legendFormat": "backup-io-{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_check_leader_request_item_count_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, le) \n \n \n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "backup-auto-throttle-{{instance}}", + "metric": "", + "query": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -55736,7 +55727,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "99% CheckLeader request size", + "title": "Backup CPU Utilization", "tooltip": { "msResolution": true, "shared": true, @@ -55755,7 +55746,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -55782,7 +55773,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The count of fail to advance resolved-ts", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -55803,9 +55794,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 35 + "w": 8, + "x": 8, + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -55852,30 +55843,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{reason}}", - "metric": "", - "query": "sum(delta(\n tikv_resolved_ts_fail_advance_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, reason) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-stale-peer", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(delta(\n tikv_raftstore_check_stale_peer\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -55884,7 +55860,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Fail advance ts count", + "title": "Backup Thread Count", "tooltip": { "msResolution": true, "shared": true, @@ -55930,7 +55906,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total bytes in memory of resolved-ts observe regions's lock heap", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -55951,9 +55927,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 42 + "w": 8, + "x": 16, + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -56000,15 +55976,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-{{error}}", "metric": "", - "query": "avg((\n tikv_resolved_ts_lock_heap_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", "refId": "", "step": 10, "target": "" @@ -56017,7 +55993,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Lock heap size", + "title": "Backup Errors", "tooltip": { "msResolution": true, "shared": true, @@ -56036,7 +56012,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -56075,7 +56051,7 @@ }, "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The backoff duration before starting initial scan", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -56088,9 +56064,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 42 + "w": 8, + "x": 0, + "y": 7 }, "heatmap": {}, "height": null, @@ -56114,7 +56090,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -56122,7 +56098,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_resolved_ts_initial_scan_backoff_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -56130,7 +56106,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Initial scan backoff duration", + "title": "Backup Write CF SST Size", "tooltip": { "msResolution": true, "shared": true, @@ -56151,7 +56127,7 @@ "xBucketSize": null, "yAxis": { "decimals": 1, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -56163,11 +56139,23 @@ "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The status of resolved-ts observe regions", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -56178,129 +56166,88 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 49 + "w": 8, + "x": 8, + "y": 7 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, "id": 405, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum((\n tikv_resolved_ts_region_resolve_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) ", + "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Observe region status", + "title": "Backup Default CF SST Size", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total bytes of pending commands in the channel", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -56321,9 +56268,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 49 + "w": 8, + "x": 16, + "y": 7 }, "height": null, "hideTimeOverride": false, @@ -56370,15 +56317,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "total", "metric": "", - "query": "avg((\n tikv_resolved_ts_channel_penging_cmd_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{cf}}", + "metric": "", + "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", "refId": "", "step": 10, "target": "" @@ -56387,7 +56349,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Pending command size", + "title": "Backup SST Generation Throughput", "tooltip": { "msResolution": true, "shared": true, @@ -56406,7 +56368,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -56427,53 +56389,23 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Resolved TS", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 407, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -56486,127 +56418,98 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, + "w": 6, "x": 0, - "y": 0 + "y": 14 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 408, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 407, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Allocator Stats", + "title": "Backup Scan SST Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -56619,127 +56522,98 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 0 + "w": 6, + "x": 6, + "y": 14 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 409, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 408, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", - "format": "time_series", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{thread_name}}", + "legendFormat": "{{le}}", "metric": "", - "query": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Send Allocated(+) / Release Received(-) Bytes Rate", + "title": "Backup Scan SST Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "binBps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -56752,122 +56626,81 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 7 + "w": 6, + "x": 12, + "y": 14 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 410, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 409, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{thread_name}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Newly Allocated Bytes by Thread", + "title": "Backup Save SST Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, @@ -56895,13 +56728,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 7 + "w": 6, + "x": 18, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 411, + "id": 410, "interval": null, "isNew": true, "legend": { @@ -56944,15 +56777,45 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "expr": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{thread_name}}", + "legendFormat": "{{type}}-99.9%", "metric": "", - "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", + "query": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}-avg", + "metric": "", + "query": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", "refId": "", "step": 10, "target": "" @@ -56961,7 +56824,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Recently Released Bytes by Thread", + "title": "Backup SST Duration", "tooltip": { "msResolution": true, "shared": true, @@ -56980,7 +56843,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -57001,55 +56864,117 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Memory", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 412, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 411, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "External Storage Create Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -57070,13 +56995,13 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 0, - "y": 0 + "w": 12, + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 413, + "id": 412, "interval": null, "isNew": true, "legend": { @@ -57112,52 +57037,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "backup-{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"b.*k.*w.*k.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "backup-io-{{instance}}", + "legendFormat": "99%", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_io\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", "format": "time_series", - "hide": false, + "hide": true, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "backup-auto-throttle-{{instance}}", + "legendFormat": "avg", "metric": "", - "query": "((\n tikv_backup_softlimit\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -57166,7 +57129,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup CPU Utilization", + "title": "External Storage Create Duration", "tooltip": { "msResolution": true, "shared": true, @@ -57185,7 +57148,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -57212,7 +57175,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -57233,13 +57196,13 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 0 + "w": 12, + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 414, + "id": 413, "interval": null, "isNew": true, "legend": { @@ -57275,22 +57238,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "99.99%-{{req}}", "metric": "", - "query": "sum((\n tikv_backup_thread_pool_size\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{req}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{req}}", + "metric": "", + "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -57299,7 +57330,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup Thread Count", + "title": "Checksum Request Duration", "tooltip": { "msResolution": true, "shared": true, @@ -57318,7 +57349,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -57345,7 +57376,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -57366,13 +57397,13 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 415, + "id": 414, "interval": null, "isNew": true, "legend": { @@ -57415,15 +57446,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", + "expr": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{error}}", + "legendFormat": "{{instance}}-{{device}}", "metric": "", - "query": "sum(delta(\n tikv_backup_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, error) ", + "query": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", "refId": "", "step": 10, "target": "" @@ -57432,7 +57463,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup Errors", + "title": "IO Utilization", "tooltip": { "msResolution": true, "shared": true, @@ -57451,7 +57482,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -57474,21 +57505,9 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -57501,98 +57520,157 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 7 + "y": 35 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 416, + "id": 415, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "import-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"write\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "import-{{instance}}-{{tid}}", + "metric": "", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "import-count-{{instance}}", + "metric": "", + "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup Write CF SST Size", + "title": "Import CPU Utilization", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -57605,81 +57683,122 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 7 + "y": 35 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 417, + "id": 416, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_backup_range_size_bytes_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",cf=\"default\"}\n [$__rate_interval]\n)) by (le) ", + "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup Default CF SST Size", + "title": "Import Thread Count", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, @@ -57709,11 +57828,11 @@ "h": 7, "w": 8, "x": 16, - "y": 7 + "y": 35 }, "height": null, "hideTimeOverride": false, - "id": 418, + "id": 417, "interval": null, "isNew": true, "legend": { @@ -57756,30 +57875,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "total", - "metric": "", - "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", + "expr": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{cf}}", + "legendFormat": "{{type}}-{{error}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_backup_range_size_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, cf) ", + "query": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", "refId": "", "step": 10, "target": "" @@ -57788,7 +57892,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup SST Generation Throughput", + "title": "Import Errors", "tooltip": { "msResolution": true, "shared": true, @@ -57807,7 +57911,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -57830,23 +57934,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -57857,98 +57949,195 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 14 + "y": 42 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 419, + "id": 418, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "99.99%-{{request}}", "metric": "", - "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"snapshot\"}\n [$__rate_interval]\n)) by (le) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%-{{request}}", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{request}}", + "metric": "", + "query": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{request}}", + "metric": "", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup Scan SST Duration", + "title": "Import RPC Duration", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -57961,81 +58150,122 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 14 + "w": 12, + "x": 12, + "y": 42 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 420, + "id": 419, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{request}}", "metric": "", - "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"scan\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup Scan SST Duration", + "title": "Import RPC Ops", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "cacheTimeout": null, @@ -58068,15 +58298,15 @@ "gridPos": { "h": 7, "w": 6, - "x": 12, - "y": 14 + "x": 0, + "y": 49 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 421, + "id": 420, "interval": null, "legend": { "show": false @@ -58093,7 +58323,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -58101,7 +58331,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"save.*\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -58109,7 +58339,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Backup Save SST Duration", + "title": "Import Write/Download RPC Duration", "tooltip": { "msResolution": true, "shared": true, @@ -58142,9 +58372,21 @@ "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -58157,152 +58399,81 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, "w": 6, - "x": 18, - "y": 14 + "x": 6, + "y": 49 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 422, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 421, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}-99.9%", - "metric": "", - "query": "histogram_quantile(0.999,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}-99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_backup_range_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-avg", + "legendFormat": "{{le}}", "metric": "", - "query": "(sum((\n tikv_backup_range_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) / sum((\n tikv_backup_range_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (type) )", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Backup SST Duration", + "title": "Import Wait Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, @@ -58334,16 +58505,16 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 21 + "w": 6, + "x": 12, + "y": 49 }, "heatmap": {}, "height": null, "hideTimeOverride": false, "hideZeroBuckets": true, "highlightCards": true, - "id": 423, + "id": 422, "interval": null, "legend": { "show": false @@ -58360,7 +58531,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", "format": "heatmap", "hide": false, "instant": false, @@ -58368,7 +58539,7 @@ "intervalFactor": 1, "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -58376,7 +58547,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "External Storage Create Duration", + "title": "Import Read SST Duration", "tooltip": { "msResolution": true, "shared": true, @@ -58409,11 +58580,23 @@ "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -58424,197 +58607,100 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 21 + "w": 6, + "x": 18, + "y": 49 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 424, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 423, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_external_storage_create_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_external_storage_create_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_external_storage_create_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "External Storage Create Duration", + "title": "Import Rewrite SST Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -58625,195 +58711,202 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, + "w": 6, "x": 0, - "y": 28 + "y": 56 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 425, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 424, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99.99%-{{req}}", + "legendFormat": "{{le}}", "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", + "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%-{{req}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_coprocessor_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{req}}", - "metric": "", - "query": "(sum(rate(\n tikv_coprocessor_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) / sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) )", - "refId": "", - "step": 10, - "target": "" - }, + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Import Ingest RPC Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 56 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 425, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", - "format": "time_series", - "hide": true, + "expr": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "count-{{req}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",req=~\"analyze.*|checksum.*\"}\n [$__rate_interval]\n)) by (req) ", + "query": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Checksum Request Duration", + "title": "Import Ingest SST Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, @@ -58826,122 +58919,81 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, + "w": 6, "x": 12, - "y": 28 + "y": 56 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, "id": 426, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-{{device}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n node_disk_io_time_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance, device) ", + "query": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "IO Utilization", + "title": "Import Ingest SST Bytes", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, @@ -58969,9 +59021,9 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 0, - "y": 35 + "w": 6, + "x": 18, + "y": 56 }, "height": null, "hideTimeOverride": false, @@ -59018,45 +59070,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "import-{{instance}}", - "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "import-{{instance}}-{{tid}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance, tid) > 0", + "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "format": "time_series", - "hide": true, + "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "import-count-{{instance}}", + "legendFormat": "total", "metric": "", - "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -59065,7 +59102,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import CPU Utilization", + "title": "Import Download SST Throughput", "tooltip": { "msResolution": true, "shared": true, @@ -59084,7 +59121,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -59132,9 +59169,9 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 35 + "w": 12, + "x": 0, + "y": 63 }, "height": null, "hideTimeOverride": false, @@ -59181,15 +59218,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}-{{instance}}", "metric": "", - "query": "count(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"sst_.*\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "refId": "", "step": 10, "target": "" @@ -59198,7 +59235,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import Thread Count", + "title": "Import Local Write keys", "tooltip": { "msResolution": true, "shared": true, @@ -59265,9 +59302,9 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 35 + "w": 12, + "x": 12, + "y": 63 }, "height": null, "hideTimeOverride": false, @@ -59314,15 +59351,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", + "expr": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{error}}-{{instance}}", + "legendFormat": "{{type}}-{{instance}}", "metric": "", - "query": "sum(delta(\n tikv_import_error_counter\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, error, instance) ", + "query": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", "refId": "", "step": 10, "target": "" @@ -59331,7 +59368,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import Errors", + "title": "Import Local Write bytes", "tooltip": { "msResolution": true, "shared": true, @@ -59350,7 +59387,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -59377,7 +59414,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -59400,7 +59437,7 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 70 }, "height": null, "hideTimeOverride": false, @@ -59440,90 +59477,37 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{request}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{request}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request, le) \n \n \n)) ", + "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", + "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg-{{request}}", - "metric": "", - "query": "(sum(rate(\n tikv_import_rpc_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) / sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{request}}", + "legendFormat": "sum", "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (request) ", + "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -59532,7 +59516,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import RPC Duration", + "title": "TTL Expired", "tooltip": { "msResolution": true, "shared": true, @@ -59551,7 +59535,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -59578,7 +59562,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -59601,7 +59585,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 70 }, "height": null, "hideTimeOverride": false, @@ -59648,15 +59632,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "expr": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{request}}", + "legendFormat": "{{cloud}}-{{req}}", "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request!=\"switch_mode\"}\n [$__rate_interval]\n)) by (request) ", + "query": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", "refId": "", "step": 10, "target": "" @@ -59665,7 +59649,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import RPC Ops", + "title": "cloud request", "tooltip": { "msResolution": true, "shared": true, @@ -59684,7 +59668,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -59705,72 +59689,128 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Backup & Import", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 432, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": null, + "text": "Disabled" + }, + "1": { + "color": "green", + "index": null, + "text": "Enabled" + } + }, + "type": "value" + } + ], + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 49 + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 432, + "id": 433, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"download|write\"}\n [$__rate_interval]\n)) by (le) ", + "query": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -59778,103 +59818,95 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Write/Download RPC Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Endpoint Status", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "type": "stat" }, { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "index": null, + "text": "Running" + }, + "1": { + "color": "yellow", + "index": null, + "text": "Paused" + }, + "2": { + "color": "red", + "index": null, + "text": "Error" + } + }, + "type": "value" + } + ], + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 49 + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 433, + "id": 434, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"queue\"}\n [$__rate_interval]\n)) by (le) ", + "query": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -59882,103 +59914,74 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Wait Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Task Status", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "type": "stat" }, { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 49 + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 434, + "id": 435, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name" + }, "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "tidb_log_backup_advancer_owner > 0", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", "metric": "", - "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"read\"}\n [$__rate_interval]\n)) by (le) ", + "query": "tidb_log_backup_advancer_owner > 0", "refId": "", "step": 10, "target": "" @@ -59986,103 +59989,74 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Read SST Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Advancer Owner", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "type": "stat" }, { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The average flush size of last 30mins.", "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "bytes" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 49 + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 435, + "id": 436, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{ instance }}", "metric": "", - "query": "sum(rate(\n tikv_import_download_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"rewrite\"}\n [$__rate_interval]\n)) by (le) ", + "query": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", "refId": "", "step": 10, "target": "" @@ -60090,103 +60064,74 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Rewrite SST Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Average Flush Size", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "type": "stat" }, { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The current total flushed file number of this run.", "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 56 + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 436, + "id": 437, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_rpc_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",request=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", "refId": "", "step": 10, "target": "" @@ -60194,103 +60139,74 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Ingest RPC Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Flushed Files (Last 30m) Per Host", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "type": "stat" }, { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 56 + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 437, + "id": 438, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_ingest_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=~\"ingest\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -60298,103 +60214,74 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Ingest SST Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Flush Times (Last 30m)", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "type": "stat" }, { "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [] - } - } + "steps": "" + }, + "unit": "bytes" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 56 + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 438, + "id": 439, "interval": null, - "legend": { - "show": false - }, "links": [], - "maxDataPoints": 512, + "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": {}, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_ingest_byte_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -60402,44 +60289,92 @@ ], "timeFrom": null, "timeShift": null, - "title": "Import Ingest SST Bytes", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, + "title": "Total Flushed Size (Last 30m)", "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 0, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "none" + }, + "overrides": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 7 }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "height": null, + "hideTimeOverride": false, + "id": 440, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Flush Files (Last 30m)", + "transformations": [], + "transparent": false, + "type": "stat" }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", "editable": true, "error": false, "fieldConfig": { @@ -60460,13 +60395,13 @@ }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 56 + "w": 12, + "x": 0, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 439, + "id": 441, "interval": null, "isNew": true, "legend": { @@ -60509,7 +60444,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -60517,22 +60452,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "total", - "metric": "", - "query": "sum(rate(\n tikv_import_download_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -60541,7 +60461,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import Download SST Throughput", + "title": "CPU Usage", "tooltip": { "msResolution": true, "shared": true, @@ -60560,7 +60480,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -60587,7 +60507,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -60609,12 +60529,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 63 + "x": 12, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 440, + "id": 442, "interval": null, "isNew": true, "legend": { @@ -60657,15 +60577,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(delta(\n tikv_import_local_write_keys\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -60674,7 +60594,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import Local Write keys", + "title": "Handle Event Rate", "tooltip": { "msResolution": true, "shared": true, @@ -60693,7 +60613,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -60720,7 +60640,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The data rate of initial scanning emitting events.", "editable": true, "error": false, "fieldConfig": { @@ -60742,12 +60662,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 63 + "x": 0, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 441, + "id": 443, "interval": null, "isNew": true, "legend": { @@ -60790,15 +60710,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "expr": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_import_local_write_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type, instance) ", + "query": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -60807,7 +60727,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Import Local Write bytes", + "title": "Initial Scan Generate Event Throughput", "tooltip": { "msResolution": true, "shared": true, @@ -60875,12 +60795,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 70 + "x": 12, + "y": 21 }, "height": null, "hideTimeOverride": false, - "id": 442, + "id": 444, "interval": null, "isNew": true, "legend": { @@ -60923,30 +60843,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "sum", + "legendFormat": "{{ task }}", "metric": "", - "query": "sum((\n tikv_backup_raw_expired_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", "refId": "", "step": 10, "target": "" @@ -60955,7 +60860,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "TTL Expired", + "title": "Abnormal Checkpoint TS Lag", "tooltip": { "msResolution": true, "shared": true, @@ -60974,7 +60879,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -61001,7 +60906,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The estimated memory usage by the streaming backup module.", "editable": true, "error": false, "fieldConfig": { @@ -61023,12 +60928,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 70 + "x": 0, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 443, + "id": 445, "interval": null, "isNew": true, "legend": { @@ -61071,15 +60976,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", + "expr": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cloud}}-{{req}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_cloud_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (cloud, req) ", + "query": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -61088,7 +60993,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "cloud request", + "title": "Memory Of Events", "tooltip": { "msResolution": true, "shared": true, @@ -61107,7 +61012,7 @@ "yaxes": [ { "decimals": null, - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -61128,55 +61033,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Backup & Import", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 444, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total number of encryption data keys in use", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -61198,12 +61061,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 28 }, "height": null, "hideTimeOverride": false, - "id": 445, + "id": 446, "interval": null, "isNew": true, "legend": { @@ -61246,7 +61109,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -61254,140 +61117,22 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Encryption data keys", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "Number of files being encrypted", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 446, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}}-total", "metric": "", - "query": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -61396,7 +61141,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Encrypted files", + "title": "Observed Region Count", "tooltip": { "msResolution": true, "shared": true, @@ -61442,7 +61187,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Flag to indicate if encryption is initialized", + "description": "The errors met when backing up.\n**They are retryable, don't worry.**", "editable": true, "error": false, "fieldConfig": { @@ -61463,9 +61208,9 @@ }, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 7 + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -61512,15 +61257,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}-{{instance}}", "metric": "", - "query": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", "refId": "", "step": 10, "target": "" @@ -61529,7 +61274,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Encryption initialized", + "title": "Errors", "tooltip": { "msResolution": true, "shared": true, @@ -61548,7 +61293,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -61575,7 +61320,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Total size of encryption meta files", + "description": "The errors met when backing up.", "editable": true, "error": false, "fieldConfig": { @@ -61596,9 +61341,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 7 + "w": 8, + "x": 8, + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -61645,15 +61390,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "expr": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{name}}-{{instance}}", + "legendFormat": "{{type}}-{{instance}}", "metric": "", - "query": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", "refId": "", "step": 10, "target": "" @@ -61662,7 +61407,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Encryption meta files size", + "title": "Fatal Errors", "tooltip": { "msResolution": true, "shared": true, @@ -61681,7 +61426,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "opm", "label": null, "logBase": 1, "max": null, @@ -61708,7 +61453,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -61729,9 +61474,9 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 14 + "w": 8, + "x": 16, + "y": 35 }, "height": null, "hideTimeOverride": false, @@ -61760,7 +61505,7 @@ "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true, "dataLinks": [] @@ -61771,37 +61516,48 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Current Time", + "bars": false, + "dashes": true, + "fill": 0, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "expr": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "encrypt-{{req}}", + "legendFormat": "{{task}}", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "query": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", "refId": "", "step": 10, "target": "" }, { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "expr": "time() * 1000", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "decrypt-{{req}}", + "legendFormat": "Current Time", "metric": "", - "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "query": "time() * 1000", "refId": "", "step": 10, "target": "" @@ -61810,7 +61566,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Encrypt/decrypt data nanos", + "title": "Checkpoint TS of Tasks", "tooltip": { "msResolution": true, "shared": true, @@ -61829,7 +61585,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "dateTimeAsIsoNoDateIfToday", "label": null, "logBase": 1, "max": null, @@ -61852,11 +61608,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "Writing or reading file duration (second)", + "description": "The duration of flushing a batch of file.", "editable": true, "error": false, "fieldConfig": { @@ -61867,312 +61635,147 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 14 + "w": 6, + "x": 0, + "y": 42 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, "id": 450, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99%", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "avg", - "metric": "", - "query": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "query": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Read/write encryption meta duration", + "title": "Flush Duration", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Encryption", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 451, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The duration of scanning the initial data from local DB and transform them into apply events.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": [ - { - "options": { - "0": { - "color": "red", - "index": null, - "text": "Disabled" - }, - "1": { - "color": "green", - "index": null, - "text": "Enabled" - } - }, - "type": "value" - } - ], - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 0, - "y": 0 + "x": 6, + "y": 42 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 452, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 451, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ instance }}", + "legendFormat": "{{le}}", "metric": "", - "query": "((\n tikv_log_backup_enabled\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", + "query": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62180,95 +61783,103 @@ ], "timeFrom": null, "timeShift": null, - "title": "Endpoint Status", + "title": "Initial scanning duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The duration of converting a raft request into a apply event.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": [ - { - "options": { - "0": { - "color": "green", - "index": null, - "text": "Running" - }, - "1": { - "color": "yellow", - "index": null, - "text": "Paused" - }, - "2": { - "color": "red", - "index": null, - "text": "Error" - } - }, - "type": "value" - } - ], - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 6, - "y": 0 + "x": 12, + "y": 42 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 453, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 452, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "min((\n tikv_log_backup_task_status\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62276,74 +61887,103 @@ ], "timeFrom": null, "timeShift": null, - "title": "Task Status", + "title": "Convert Raft Event duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The duration of waiting the mutex of the controller.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": null, - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 12, - "y": 0 + "x": 18, + "y": 42 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 454, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 453, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "tidb_log_backup_advancer_owner > 0", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ instance }}", + "legendFormat": "{{le}}", "metric": "", - "query": "tidb_log_backup_advancer_owner > 0", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62351,74 +61991,103 @@ ], "timeFrom": null, "timeShift": null, - "title": "Advancer Owner", + "title": "Wait for Lock Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The average flush size of last 30mins.", + "description": "The number of KV-modify of each raft command observed.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": null, - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "bytes" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 18, - "y": 0 + "x": 0, + "y": 49 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 455, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 454, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ instance }}", + "legendFormat": "{{le}}", "metric": "", - "query": "(sum(increase(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) / sum(increase(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) )", + "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62426,74 +62095,103 @@ ], "timeFrom": null, "timeShift": null, - "title": "Average Flush Size", + "title": "Command Batch Size", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The current total flushed file number of this run.", + "description": "The total cost of saving an event into temporary file.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": 0, - "mappings": null, - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 0, - "y": 7 + "x": 6, + "y": 49 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 456, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 455, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) > 0", + "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62501,74 +62199,103 @@ ], "timeFrom": null, "timeShift": null, - "title": "Flushed Files (Last 30m) Per Host", + "title": "Save to Temp File Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": 0, - "mappings": null, - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 6, - "y": 7 + "x": 12, + "y": 49 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 457, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 456, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(delta(\n tikv_log_backup_flush_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [30m]\n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62576,74 +62303,103 @@ ], "timeFrom": null, "timeShift": null, - "title": "Flush Times (Last 30m)", + "title": "Write to Temp File Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the size has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The duration of collecting metadata and call the UNIX system call *write* for each event.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": null, - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "bytes" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { "h": 7, "w": 6, - "x": 12, - "y": 7 + "x": 18, + "y": 49 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 458, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 457, "interval": null, + "legend": { + "show": false + }, "links": [], - "maxDataPoints": 100, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, + "options": {}, "repeat": null, "repeatDirection": null, + "reverseYBuckets": false, "span": null, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", - "format": "time_series", + "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(delta(\n tikv_log_backup_flush_file_size_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" @@ -62651,92 +62407,177 @@ ], "timeFrom": null, "timeShift": null, - "title": "Total Flushed Size (Last 30m)", + "title": "System Write Call Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "This is the summary of the file count has been flushed, summered by the data each TiKV has flushed since last boot.\n**NOTE: The size may get reduced if some of TiKVs reboot.**", + "description": "The internal message type count.", "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": 0, - "mappings": null, - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": "" - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } + }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 7 + "w": 8, + "x": 0, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 459, + "id": 458, "interval": null, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 100, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, + "nullPointMode": "null as zero", "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + "alertThreshold": true, + "dataLinks": [] }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{message}}", "metric": "", - "query": "sum(delta(\n tikv_log_backup_flush_file_size_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [30m]\n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Flush Files (Last 30m)", + "title": "Internal Message Type", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, "transformations": [], "transparent": false, - "type": "stat" + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The CPU utilization of log backup threads. \n**(Note this is the average usage for a period of time, some peak of CPU usage may be lost.)**", + "description": "The internal handling message duration.", "editable": true, "error": false, "fieldConfig": { @@ -62757,13 +62598,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 14 + "w": 8, + "x": 8, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 460, + "id": 459, "interval": null, "isNew": true, "legend": { @@ -62806,15 +62647,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{message}}", "metric": "", - "query": "sum(rate(\n tikv_thread_cpu_seconds_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",name=~\"backup_stream|log-backup-scan(-[0-9]+)?\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -62823,7 +62664,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Internal Message Handling Duration (P99)", "tooltip": { "msResolution": true, "shared": true, @@ -62842,7 +62683,7 @@ "yaxes": [ { "decimals": null, - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -62869,7 +62710,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": "The internal handling message duration.", "editable": true, "error": false, "fieldConfig": { @@ -62890,13 +62731,13 @@ }, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 14 + "w": 8, + "x": 16, + "y": 56 }, "height": null, "hideTimeOverride": false, - "id": 461, + "id": 460, "interval": null, "isNew": true, "legend": { @@ -62939,15 +62780,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{message}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -62956,7 +62797,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Handle Event Rate", + "title": "Internal Message Handling Duration (P90)", "tooltip": { "msResolution": true, "shared": true, @@ -62975,7 +62816,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -63002,7 +62843,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The data rate of initial scanning emitting events.", + "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", "editable": true, "error": false, "fieldConfig": { @@ -63025,11 +62866,11 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 63 }, "height": null, "hideTimeOverride": false, - "id": 462, + "id": 461, "interval": null, "isNew": true, "legend": { @@ -63072,15 +62913,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{cf}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_incremental_scan_bytes_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", "refId": "", "step": 10, "target": "" @@ -63089,7 +62930,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Initial Scan Generate Event Throughput", + "title": "Initial Scan RocksDB Throughput", "tooltip": { "msResolution": true, "shared": true, @@ -63135,7 +62976,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "Misc statistics of RocksDB during initial scanning.", "editable": true, "error": false, "fieldConfig": { @@ -63158,11 +62999,11 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 63 }, "height": null, "hideTimeOverride": false, - "id": 463, + "id": 462, "interval": null, "isNew": true, "legend": { @@ -63205,15 +63046,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ task }}", + "legendFormat": "{{cf}}-{{op}}", "metric": "", - "query": "(time() * 1000 - max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0)", + "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", "refId": "", "step": 10, "target": "" @@ -63222,7 +63063,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Abnormal Checkpoint TS Lag", + "title": "Initial Scan RocksDB Operation", "tooltip": { "msResolution": true, "shared": true, @@ -63241,7 +63082,7 @@ "yaxes": [ { "decimals": null, - "format": "ms", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -63268,7 +63109,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The estimated memory usage by the streaming backup module.", + "description": "The reason of triggering initial scanning.", "editable": true, "error": false, "fieldConfig": { @@ -63291,11 +63132,11 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 70 }, "height": null, "hideTimeOverride": false, - "id": 464, + "id": 463, "interval": null, "isNew": true, "legend": { @@ -63338,15 +63179,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}", + "legendFormat": "{{reason}}", "metric": "", - "query": "sum((\n tikv_log_backup_heap_memory\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", "refId": "", "step": 10, "target": "" @@ -63355,7 +63196,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memory Of Events", + "title": "Initial Scanning Trigger Reason", "tooltip": { "msResolution": true, "shared": true, @@ -63374,7 +63215,7 @@ "yaxes": [ { "decimals": null, - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -63424,11 +63265,11 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 70 }, "height": null, "hideTimeOverride": false, - "id": 465, + "id": 464, "interval": null, "isNew": true, "legend": { @@ -63471,30 +63312,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}}-total", + "legendFormat": "{{type}}", "metric": "", - "query": "sum((\n tikv_log_backup_observed_region\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -63503,7 +63329,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Observed Region Count", + "title": "Region Checkpoint Key Putting", "tooltip": { "msResolution": true, "shared": true, @@ -63522,7 +63348,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "cps", "label": null, "logBase": 1, "max": null, @@ -63545,11 +63371,23 @@ } }, { - "aliasColors": {}, - "bars": false, "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.\n**They are retryable, don't worry.**", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -63560,129 +63398,192 @@ } } }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, "gridPos": { "h": 7, - "w": 8, + "w": 12, "x": 0, - "y": 35 + "y": 77 }, + "heatmap": {}, "height": null, "hideTimeOverride": false, - "id": 466, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 465, "interval": null, - "isNew": true, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true + "show": false }, - "lines": true, - "linewidth": 1, "links": [], - "maxDataPoints": null, + "maxDataPoints": 512, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": {}, "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "reverseYBuckets": false, "span": null, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", - "format": "time_series", + "expr": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "{{le}}", "metric": "", - "query": "sum(delta(\n tikv_log_backup_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "query": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", "refId": "", "step": 10, "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Errors", + "title": "Request Checkpoint Batch Size", "tooltip": { "msResolution": true, "shared": true, + "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "graph", - "xaxis": { + "type": "heatmap", + "xAxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "yaxes": [ - { - "decimals": null, - "format": "opm", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cacheTimeout": null, + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 77 + }, + "heatmap": {}, + "height": null, + "hideTimeOverride": false, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 466, + "interval": null, + "legend": { + "show": false + }, + "links": [], + "maxDataPoints": 512, + "maxPerRow": null, + "minSpan": null, + "options": {}, + "repeat": null, + "repeatDirection": null, + "reverseYBuckets": false, + "span": null, + "targets": [ { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "metric": "", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", + "refId": "", + "step": 10, + "target": "" } ], - "yaxis": { - "align": false, - "alignLevel": 0 - } + "timeFrom": null, + "timeShift": null, + "title": "Tick Duration", + "tooltip": { + "msResolution": true, + "shared": true, + "showHistogram": true, + "sort": 0, + "value_type": "individual" + }, + "transformations": [], + "transparent": false, + "type": "heatmap", + "xAxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The errors met when backing up.", + "description": "The reason of advancer failed to be advanced.", "editable": true, "error": false, "fieldConfig": { @@ -63703,9 +63604,9 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 35 + "w": 12, + "x": 0, + "y": 84 }, "height": null, "hideTimeOverride": false, @@ -63752,15 +63653,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "expr": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}-{{instance}}", + "legendFormat": "{{reason}}", "metric": "", - "query": "sum(delta(\n tikv_log_backup_fatal_errors\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [1m]\n)) by (type, instance) ", + "query": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", "refId": "", "step": 10, "target": "" @@ -63769,7 +63670,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Fatal Errors", + "title": "Region Checkpoint Failure Reason", "tooltip": { "msResolution": true, "shared": true, @@ -63788,7 +63689,7 @@ "yaxes": [ { "decimals": null, - "format": "opm", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -63815,7 +63716,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The result of getting region checkpoints.", "editable": true, "error": false, "fieldConfig": { @@ -63836,9 +63737,9 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 35 + "w": 12, + "x": 12, + "y": 84 }, "height": null, "hideTimeOverride": false, @@ -63867,7 +63768,7 @@ "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true, "dataLinks": [] @@ -63878,48 +63779,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "Current Time", - "bars": false, - "dashes": true, - "fill": 0, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{task}}", - "metric": "", - "query": "max((\n tidb_log_backup_last_checkpoint\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (task) / 262144 > 0", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "time() * 1000", + "expr": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "Current Time", + "legendFormat": "{{result}}", "metric": "", - "query": "time() * 1000", + "query": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", "refId": "", "step": 10, "target": "" @@ -63928,7 +63803,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Checkpoint TS of Tasks", + "title": "Request Result", "tooltip": { "msResolution": true, "shared": true, @@ -63947,7 +63822,7 @@ "yaxes": [ { "decimals": null, - "format": "dateTimeAsIsoNoDateIfToday", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -63970,23 +63845,11 @@ } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of flushing a batch of file.", + "description": "The internal handling message duration.", "editable": true, "error": false, "fieldConfig": { @@ -63997,100 +63860,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 0, - "y": 42 + "y": 91 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 469, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{ step }}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_flush_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_files\"}\n [$__rate_interval]\n)) by (le) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Flush Duration", + "title": "Tick Duration (P99)", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of scanning the initial data from local DB and transform them into apply events.", + "description": "The internal handling message duration.", "editable": true, "error": false, "fieldConfig": { @@ -64101,100 +63993,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 42 + "w": 12, + "x": 12, + "y": 91 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 470, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{ step }}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_initial_scan_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Initial scanning duration", + "title": "Tick Duration (P90)", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of converting a raft request into a apply event.", + "description": "The frequent of getting region level checkpoint.", "editable": true, "error": false, "fieldConfig": { @@ -64205,204 +64126,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 12, - "y": 42 + "w": 12, + "x": 0, + "y": 98 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 471, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{step}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"to_stream_event\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Convert Raft Event duration", + "title": "Get Region Operation Count", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of waiting the mutex of the controller.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 42 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 472, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ + "yaxes": [ { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"get_router_lock\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "timeFrom": null, - "timeShift": null, - "title": "Wait for Lock Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The number of KV-modify of each raft command observed.", + "description": "The variant of checkpoint group.", "editable": true, "error": false, "fieldConfig": { @@ -64413,100 +64259,171 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 0, - "y": 49 + "w": 12, + "x": 12, + "y": 98 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 473, + "id": 472, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{step}}-{{instance}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_handle_kv_batch_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Command Batch Size", + "title": "Try Advance Trigger Time", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Backup Log", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 473, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of saving an event into temporary file.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -64517,100 +64434,144 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 6, - "y": 49 + "w": 12, + "x": 0, + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 474, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{instance}}-{{state}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_event_handle_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=~\"save_to_temp_file\"}\n [$__rate_interval]\n)) by (le) ", + "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, state) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-total", + "metric": "", + "query": "sum((\n tikv_threads_state\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Save to Temp File Duration", + "title": "Threads state", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The total cost of writing a event into temporary file.\nComparing to the ***Save*** duration, it doesn't contain the time cost of routing the task by range / task.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -64621,100 +64582,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, + "w": 12, "x": 12, - "y": 49 + "y": 0 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 475, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{name}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"write_to_tempfile\"}\n [$__rate_interval]\n)) by (le) ", + "query": "topk(20,(\n sum(rate(\n tikv_threads_io_bytes_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name, io) > 1024\n \n \n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Write to Temp File Duration", + "title": "Threads IO", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { + "aliasColors": {}, + "bars": false, "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of collecting metadata and call the UNIX system call *write* for each event.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -64725,88 +64715,129 @@ } } }, + "fill": 1, + "fillGradient": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, "gridPos": { "h": 7, - "w": 6, - "x": 18, - "y": 49 + "w": 12, + "x": 0, + "y": 7 }, - "heatmap": {}, "height": null, "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, "id": 476, "interval": null, + "isNew": true, "legend": { - "show": false + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true }, + "lines": true, + "linewidth": 1, "links": [], - "maxDataPoints": 512, + "maxDataPoints": null, "maxPerRow": null, "minSpan": null, - "options": {}, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true, + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, "repeatDirection": null, - "reverseYBuckets": false, + "seriesOverrides": [], "span": null, + "stack": false, + "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", + "expr": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", + "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{le}}", + "legendFormat": "{{name}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_on_event_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",stage=\"syscall_write\"}\n [$__rate_interval]\n)) by (le) ", + "query": "topk(20,(\n max(rate(\n tikv_thread_voluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "refId": "", "step": 10, "target": "" } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "System Write Call Duration", + "title": "Thread Voluntary Context Switches", "tooltip": { "msResolution": true, "shared": true, - "showHistogram": true, "sort": 0, "value_type": "individual" }, "transformations": [], "transparent": false, - "type": "heatmap", - "xAxis": { + "type": "graph", + "xaxis": { "mode": "time", "name": null, "show": true, "values": [] }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 0 + } }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal message type count.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -64827,9 +64858,9 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 0, - "y": 56 + "w": 12, + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, @@ -64876,15 +64907,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", + "expr": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{message}}", + "legendFormat": "{{name}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message) ", + "query": "topk(20,(\n max(rate(\n tikv_thread_nonvoluntary_context_switches\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (name) > 100\n \n \n)) ", "refId": "", "step": 10, "target": "" @@ -64893,7 +64924,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Internal Message Type", + "title": "Thread Nonvoluntary Context Switches", "tooltip": { "msResolution": true, "shared": true, @@ -64912,9 +64943,9 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "none", "label": null, - "logBase": 2, + "logBase": 1, "max": null, "min": null, "show": true @@ -64933,13 +64964,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Threads", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 478, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -64960,13 +65033,13 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 56 + "w": 12, + "x": 0, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 478, + "id": 479, "interval": null, "isNew": true, "legend": { @@ -65009,15 +65082,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "expr": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{message}}", + "legendFormat": "{{instance}}-{{type}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "query": "sum((\n tikv_allocator_stats\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance, type) ", "refId": "", "step": 10, "target": "" @@ -65026,7 +65099,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Internal Message Handling Duration (P99)", + "title": "Allocator Stats", "tooltip": { "msResolution": true, "shared": true, @@ -65045,7 +65118,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -65072,7 +65145,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -65093,13 +65166,13 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 56 + "w": 12, + "x": 12, + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 479, + "id": 480, "interval": null, "isNew": true, "legend": { @@ -65142,15 +65215,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "expr": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{message}}", + "legendFormat": "{{thread_name}}", "metric": "", - "query": "histogram_quantile(0.9,(\n sum(rate(\n tikv_log_backup_interal_actor_acting_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (message, le) \n \n \n)) ", + "query": "(sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) - sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) )", "refId": "", "step": 10, "target": "" @@ -65159,7 +65232,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Internal Message Handling Duration (P90)", + "title": "Send Allocated(+) / Release Received(-) Bytes Rate", "tooltip": { "msResolution": true, "shared": true, @@ -65178,7 +65251,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "binBps", "label": null, "logBase": 1, "max": null, @@ -65205,7 +65278,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal read throughput of RocksDB during initial scanning. This panel can roughly present the read through to the hard disk of initial scanning.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -65228,11 +65301,11 @@ "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 480, + "id": 481, "interval": null, "isNew": true, "legend": { @@ -65275,15 +65348,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", + "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}", + "legendFormat": "{{thread_name}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op=~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf) ", + "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"alloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "refId": "", "step": 10, "target": "" @@ -65292,7 +65365,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Initial Scan RocksDB Throughput", + "title": "Newly Allocated Bytes by Thread", "tooltip": { "msResolution": true, "shared": true, @@ -65311,7 +65384,7 @@ "yaxes": [ { "decimals": null, - "format": "binBps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -65338,7 +65411,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "Misc statistics of RocksDB during initial scanning.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -65361,11 +65434,11 @@ "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 481, + "id": 482, "interval": null, "isNew": true, "legend": { @@ -65408,15 +65481,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", + "expr": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{cf}}-{{op}}", + "legendFormat": "{{thread_name}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_initial_scan_operations\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",op!~\"read_bytes\"}\n [$__rate_interval]\n)) by (cf, op) > 0", + "query": "sum(rate(\n tikv_allocator_thread_allocation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"dealloc\"}\n [$__rate_interval]\n)) by (thread_name) ", "refId": "", "step": 10, "target": "" @@ -65425,7 +65498,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Initial Scan RocksDB Operation", + "title": "Recently Released Bytes by Thread", "tooltip": { "msResolution": true, "shared": true, @@ -65444,7 +65517,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -65465,13 +65538,55 @@ "align": false, "alignLevel": 0 } - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 483, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The reason of triggering initial scanning.", + "description": "The 99 quantile durtion of status server API requests", "editable": true, "error": false, "fieldConfig": { @@ -65494,11 +65609,11 @@ "h": 7, "w": 12, "x": 0, - "y": 70 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 482, + "id": 484, "interval": null, "isNew": true, "legend": { @@ -65534,22 +65649,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99.99%-{{path}}", + "metric": "", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{reason}}", + "legendFormat": "99%-{{path}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_initial_scan_reason\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (reason) ", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg-{{path}}", + "metric": "", + "query": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count-{{path}}", + "metric": "", + "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", "refId": "", "step": 10, "target": "" @@ -65558,7 +65741,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Initial Scanning Trigger Reason", + "title": "Status API Request Duration", "tooltip": { "msResolution": true, "shared": true, @@ -65577,7 +65760,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -65604,7 +65787,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -65627,11 +65810,11 @@ "h": 7, "w": 12, "x": 12, - "y": 70 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 483, + "id": 485, "interval": null, "isNew": true, "legend": { @@ -65674,15 +65857,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{path}}", "metric": "", - "query": "sum(rate(\n tikv_log_backup_metadata_key_operation\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", + "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", "refId": "", "step": 10, "target": "" @@ -65691,7 +65874,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Region Checkpoint Key Putting", + "title": "Status API Request (op/s)", "tooltip": { "msResolution": true, "shared": true, @@ -65710,7 +65893,7 @@ "yaxes": [ { "decimals": null, - "format": "cps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -65731,221 +65914,55 @@ "align": false, "alignLevel": 0 } - }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 77 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 484, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tidb_log_backup_advancer_batch_size_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"checkpoint\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Checkpoint Batch Size", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cacheTimeout": null, - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "max": null, - "min": null, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_TEST-CLUSTER}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 77 - }, - "heatmap": {}, - "height": null, - "hideTimeOverride": false, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 485, - "interval": null, - "legend": { - "show": false - }, - "links": [], - "maxDataPoints": 512, - "maxPerRow": null, - "minSpan": null, - "options": {}, - "repeat": null, - "repeatDirection": null, - "reverseYBuckets": false, - "span": null, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{le}}", - "metric": "", - "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"tick\"}\n [$__rate_interval]\n)) by (le) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tick Duration", - "tooltip": { - "msResolution": true, - "shared": true, - "showHistogram": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "heatmap", - "xAxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - "yBucketBound": "upper", - "yBucketNumber": null, - "yBucketSize": null - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Status Server", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 486, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The reason of advancer failed to be advanced.", + "description": "Total number of encryption data keys in use", "editable": true, "error": false, "fieldConfig": { @@ -65968,11 +65985,11 @@ "h": 7, "w": 12, "x": 0, - "y": 84 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 486, + "id": 487, "interval": null, "isNew": true, "legend": { @@ -66015,15 +66032,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", + "expr": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{reason}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tidb_log_backup_region_request_failure\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",reason!=\"retryable-scan-region\"}\n [$__rate_interval]\n)) by (reason) ", + "query": "sum((\n tikv_encryption_data_key_storage_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -66032,7 +66049,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Region Checkpoint Failure Reason", + "title": "Encryption data keys", "tooltip": { "msResolution": true, "shared": true, @@ -66078,7 +66095,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The result of getting region checkpoints.", + "description": "Number of files being encrypted", "editable": true, "error": false, "fieldConfig": { @@ -66101,11 +66118,11 @@ "h": 7, "w": 12, "x": 12, - "y": 84 + "y": 0 }, "height": null, "hideTimeOverride": false, - "id": 487, + "id": 488, "interval": null, "isNew": true, "legend": { @@ -66148,15 +66165,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "expr": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{result}}", + "legendFormat": "{{instance}}", "metric": "", - "query": "sum(rate(\n tidb_log_backup_region_request\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (result) ", + "query": "sum((\n tikv_encryption_file_num\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -66165,7 +66182,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Request Result", + "title": "Encrypted files", "tooltip": { "msResolution": true, "shared": true, @@ -66211,7 +66228,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "Flag to indicate if encryption is initialized", "editable": true, "error": false, "fieldConfig": { @@ -66234,11 +66251,11 @@ "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 488, + "id": 489, "interval": null, "isNew": true, "legend": { @@ -66281,15 +66298,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "expr": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ step }}", + "legendFormat": "{{instance}}", "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "query": "((\n tikv_encryption_is_initialized\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -66298,7 +66315,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Tick Duration (P99)", + "title": "Encryption initialized", "tooltip": { "msResolution": true, "shared": true, @@ -66317,7 +66334,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -66344,7 +66361,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The internal handling message duration.", + "description": "Total size of encryption meta files", "editable": true, "error": false, "fieldConfig": { @@ -66367,11 +66384,11 @@ "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 489, + "id": 490, "interval": null, "isNew": true, "legend": { @@ -66414,15 +66431,15 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "expr": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ step }}", + "legendFormat": "{{name}}-{{instance}}", "metric": "", - "query": "histogram_quantile(0.9,(\n sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (step, le) \n \n \n)) ", + "query": "((\n tikv_encryption_meta_file_size_bytes\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) ", "refId": "", "step": 10, "target": "" @@ -66431,7 +66448,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Tick Duration (P90)", + "title": "Encryption meta files size", "tooltip": { "msResolution": true, "shared": true, @@ -66450,7 +66467,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -66477,7 +66494,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The frequent of getting region level checkpoint.", + "description": "", "editable": true, "error": false, "fieldConfig": { @@ -66500,11 +66517,11 @@ "h": 7, "w": 12, "x": 0, - "y": 98 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 490, + "id": 491, "interval": null, "isNew": true, "legend": { @@ -66547,15 +66564,30 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{step}}-{{instance}}", + "legendFormat": "encrypt-{{req}}", "metric": "", - "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"get-regions-in-range\"}\n [$__rate_interval]\n)) by (step, instance) ", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"encrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "decrypt-{{req}}", + "metric": "", + "query": "sum(rate(\n tikv_coprocessor_rocksdb_perf\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",metric=\"decrypt_data_nanos\"}\n [$__rate_interval]\n)) by (req) ", "refId": "", "step": 10, "target": "" @@ -66564,7 +66596,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Get Region Operation Count", + "title": "Encrypt/decrypt data nanos", "tooltip": { "msResolution": true, "shared": true, @@ -66610,7 +66642,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The variant of checkpoint group.", + "description": "Writing or reading file duration (second)", "editable": true, "error": false, "fieldConfig": { @@ -66633,11 +66665,11 @@ "h": 7, "w": 12, "x": 12, - "y": 98 + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 491, + "id": 492, "interval": null, "isNew": true, "legend": { @@ -66673,22 +66705,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{step}}-{{instance}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tidb_log_backup_advancer_tick_duration_sec_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",step=\"try-advance\"}\n [$__rate_interval]\n)) by (step, instance) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_encryption_write_read_file_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_encryption_write_read_file_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_encryption_write_read_file_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -66697,7 +66797,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Try Advance Trigger Time", + "title": "Read/write encryption meta duration", "tooltip": { "msResolution": true, "shared": true, @@ -66716,7 +66816,7 @@ "yaxes": [ { "decimals": null, - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -66745,7 +66845,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Backup Log", + "title": "Encryption", "transformations": [], "transparent": false, "type": "row" @@ -66773,7 +66873,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 492, + "id": 493, "interval": null, "links": [], "maxDataPoints": 100, @@ -66785,7 +66885,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The changing trend of the slowness on I/O operations. 'value > 0' means the related store might have a slow trend.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -66812,139 +66912,6 @@ }, "height": null, "hideTimeOverride": false, - "id": 493, - "interval": null, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": null, - "maxPerRow": null, - "minSpan": null, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true, - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": null, - "seriesOverrides": [], - "span": null, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "query": "sum((\n tikv_raftstore_slow_trend\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", - "refId": "", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Slow Trend", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transformations": [], - "transparent": false, - "type": "graph", - "xaxis": { - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": 0 - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "datasource": "${DS_TEST-CLUSTER}", - "description": "The changing trend of QPS on each store. 'value < 0' means the QPS has a dropping trend.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "fill": 1, - "fillGradient": 1, - "grid": { - "threshold1": null, - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2": null, - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, "id": 494, "interval": null, "isNew": true, @@ -66988,7 +66955,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -66996,7 +66963,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_raftstore_slow_trend_result\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_ttl_expire_kv_count_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -67005,7 +66972,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "QPS Changing Trend", + "title": "TTL expire count", "tooltip": { "msResolution": true, "shared": true, @@ -67051,7 +67018,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The sampling latency of recent queries. A larger value indicates that the store is more likely to be the slowest store.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -67073,8 +67040,8 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 7 + "x": 12, + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -67121,7 +67088,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "format": "time_series", "hide": false, "instant": false, @@ -67129,7 +67096,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_raftstore_slow_trend_l0\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "sum(rate(\n tikv_ttl_expire_kv_size_total\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) ", "refId": "", "step": 10, "target": "" @@ -67138,7 +67105,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "AVG Sampling Latency", + "title": "TTL expire size", "tooltip": { "msResolution": true, "shared": true, @@ -67157,7 +67124,7 @@ "yaxes": [ { "decimals": null, - "format": "\u00b5s", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -67184,7 +67151,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The QPS of each store.", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -67206,7 +67173,7 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 7 }, "height": null, @@ -67254,7 +67221,7 @@ "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "expr": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", "format": "time_series", "hide": false, "instant": false, @@ -67262,7 +67229,7 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "metric": "", - "query": "sum((\n tikv_raftstore_slow_trend_result_value\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n \n)) by (instance) ", + "query": "(sum(rate(\n tikv_ttl_checker_processed_regions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (instance) / sum(rate(\n tikv_raftstore_region_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"region\"}\n [$__rate_interval]\n)) by (instance) )", "refId": "", "step": 10, "target": "" @@ -67271,7 +67238,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "QPS of each store", + "title": "TTL check progress", "tooltip": { "msResolution": true, "shared": true, @@ -67290,7 +67257,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -67311,55 +67278,13 @@ "align": false, "alignLevel": 0 } - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Slow Trend Statistics", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 497, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "aliasColors": {}, "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": "The 99 quantile durtion of status server API requests", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -67381,12 +67306,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 498, + "id": 497, "interval": null, "isNew": true, "legend": { @@ -67422,90 +67347,22 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [ - { - "alias": "count", - "bars": false, - "dashLength": 1, - "dashes": true, - "fill": 2, - "fillBelowTo": null, - "lines": true, - "spaceLength": 1, - "transform": "negative-Y", - "yaxis": 2, - "zindex": -3 - }, - { - "alias": "avg", - "bars": false, - "fill": 7, - "fillBelowTo": null, - "lines": true, - "yaxis": 1, - "zindex": 0 - } - ], + "seriesOverrides": [], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "99.99%-{{path}}", - "metric": "", - "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", + "expr": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "99%-{{path}}", - "metric": "", - "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_status_server_request_duration_seconds_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path, le) \n \n \n)) ", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "avg-{{path}}", - "metric": "", - "query": "(sum(rate(\n tikv_status_server_request_duration_seconds_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) / sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) )", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", - "format": "time_series", - "hide": true, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "count-{{path}}", + "legendFormat": "{{type}}", "metric": "", - "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "query": "sum(rate(\n tikv_ttl_checker_actions\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (type) ", "refId": "", "step": 10, "target": "" @@ -67514,7 +67371,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Status API Request Duration", + "title": "TTL checker actions", "tooltip": { "msResolution": true, "shared": true, @@ -67533,7 +67390,7 @@ "yaxes": [ { "decimals": null, - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -67560,7 +67417,7 @@ "bars": false, "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "description": null, + "description": "The time consumed when executing GC tasks", "editable": true, "error": false, "fieldConfig": { @@ -67582,12 +67439,12 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 14 }, "height": null, "hideTimeOverride": false, - "id": 499, + "id": 498, "interval": null, "isNew": true, "legend": { @@ -67623,22 +67480,90 @@ "renderer": "flot", "repeat": null, "repeatDirection": null, - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "count", + "bars": false, + "dashLength": 1, + "dashes": true, + "fill": 2, + "fillBelowTo": null, + "lines": true, + "spaceLength": 1, + "transform": "negative-Y", + "yaxis": 2, + "zindex": -3 + }, + { + "alias": "avg", + "bars": false, + "fill": 7, + "fillBelowTo": null, + "lines": true, + "yaxis": 1, + "zindex": 0 + } + ], "span": null, "stack": false, "steppedLine": false, "targets": [ { "datasource": "${DS_TEST-CLUSTER}", - "expr": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "expr": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{path}}", + "legendFormat": "99.99%", "metric": "", - "query": "sum(rate(\n tikv_status_server_request_duration_seconds_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (path) ", + "query": "histogram_quantile(0.9999,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "metric": "", + "query": "histogram_quantile(0.99,(\n sum(rate(\n tikv_ttl_checker_compact_duration_bucket\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) by (le) \n \n \n)) ", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg", + "metric": "", + "query": "(sum(rate(\n tikv_ttl_checker_compact_duration_sum\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) / sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) )", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "count", + "metric": "", + "query": "sum(rate(\n tikv_ttl_checker_compact_duration_count\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\"}\n [$__rate_interval]\n)) ", "refId": "", "step": 10, "target": "" @@ -67647,7 +67572,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Status API Request (op/s)", + "title": "TTL checker compact duration", "tooltip": { "msResolution": true, "shared": true, @@ -67666,7 +67591,7 @@ "yaxes": [ { "decimals": null, - "format": "ops", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -67687,6 +67612,81 @@ "align": false, "alignLevel": 0 } + }, + { + "cacheTimeout": null, + "datasource": "${DS_TEST-CLUSTER}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": null, + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": "" + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 499, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": "${DS_TEST-CLUSTER}", + "expr": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": null, + "metric": "", + "query": "max((\n tikv_ttl_checker_poll_interval\n {k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\",instance=~\"$instance\",type=\"tikv_gc_run_interval\"}\n \n)) ", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "TTL checker poll interval", + "transformations": [], + "transparent": false, + "type": "stat" } ], "repeat": null, @@ -67695,7 +67695,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Status Server", + "title": "TTL", "transformations": [], "transparent": false, "type": "row" diff --git a/metrics/grafana/tikv_details.json.sha256 b/metrics/grafana/tikv_details.json.sha256 index 2e24b367bd3..cc7044006e2 100644 --- a/metrics/grafana/tikv_details.json.sha256 +++ b/metrics/grafana/tikv_details.json.sha256 @@ -1 +1 @@ -362db1df47c4787354f52f32b4664f96e020b89f8622710adc3d5b47c8352dbb ./metrics/grafana/tikv_details.json +97870c84b16acacb37d33d8db8704e653cdc60128afce351da0b0c22eda1f8dd ./metrics/grafana/tikv_details.json From 2ec92ec3cf0aa93ff5a5bb64213396b6740eac91 Mon Sep 17 00:00:00 2001 From: Ping Yu Date: Wed, 24 Jan 2024 16:16:52 +0800 Subject: [PATCH 1130/1149] txn: Reserve lock data prefix `T` for future use (#16439) close tikv/tikv#16438 txn: Reserve lock data prefix `T` for future use Signed-off-by: Ping Yu --- components/txn_types/src/lock.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/components/txn_types/src/lock.rs b/components/txn_types/src/lock.rs index 8bd63b33fa9..31b2f68f807 100644 --- a/components/txn_types/src/lock.rs +++ b/components/txn_types/src/lock.rs @@ -35,6 +35,7 @@ const ASYNC_COMMIT_PREFIX: u8 = b'a'; const ROLLBACK_TS_PREFIX: u8 = b'r'; const LAST_CHANGE_PREFIX: u8 = b'l'; const TXN_SOURCE_PREFIX: u8 = b's'; +const _RESERVED_PREFIX: u8 = b'T'; // Reserved for future use. const PESSIMISTIC_LOCK_WITH_CONFLICT_PREFIX: u8 = b'F'; impl LockType { From 8780c0494be1cb10a0000097e3ebb6db3f286b4f Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 25 Jan 2024 11:48:20 +0800 Subject: [PATCH 1131/1149] storage: refactor command marco and task (#16440) ref tikv/tikv#16234 * txn: refactor task into a module * storage: refactor commands marco Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../txn/commands/acquire_pessimistic_lock.rs | 7 +- .../acquire_pessimistic_lock_resumed.rs | 3 +- src/storage/txn/commands/atomic_store.rs | 2 +- .../txn/commands/check_secondary_locks.rs | 2 +- src/storage/txn/commands/check_txn_status.rs | 8 +- src/storage/txn/commands/cleanup.rs | 2 +- src/storage/txn/commands/commit.rs | 2 +- src/storage/txn/commands/compare_and_swap.rs | 2 +- .../txn/commands/flashback_to_version.rs | 5 +- .../flashback_to_version_read_phase.rs | 5 +- src/storage/txn/commands/macros.rs | 45 ++--- src/storage/txn/commands/mod.rs | 3 +- src/storage/txn/commands/mvcc_by_key.rs | 2 +- src/storage/txn/commands/mvcc_by_start_ts.rs | 2 +- src/storage/txn/commands/pause.rs | 2 +- .../txn/commands/pessimistic_rollback.rs | 5 +- .../pessimistic_rollback_read_phase.rs | 2 +- src/storage/txn/commands/resolve_lock.rs | 5 +- src/storage/txn/commands/resolve_lock_lite.rs | 5 +- .../txn/commands/resolve_lock_readphase.rs | 2 +- src/storage/txn/commands/rollback.rs | 5 +- src/storage/txn/commands/txn_heart_beat.rs | 5 +- src/storage/txn/mod.rs | 1 + src/storage/txn/scheduler.rs | 161 ++++++++---------- src/storage/txn/task.rs | 76 +++++++++ 25 files changed, 213 insertions(+), 146 deletions(-) create mode 100644 src/storage/txn/task.rs diff --git a/src/storage/txn/commands/acquire_pessimistic_lock.rs b/src/storage/txn/commands/acquire_pessimistic_lock.rs index ceb7957c926..3147b594759 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock.rs @@ -28,8 +28,11 @@ command! { /// This can be rolled back with a [`PessimisticRollback`](Command::PessimisticRollback) command. AcquirePessimisticLock: cmd_ty => StorageResult, - display => "kv::command::acquirepessimisticlock keys({:?}) @ {} {} {} {:?} {} {} {} | {:?}", - (keys, start_ts, lock_ttl, for_update_ts, wait_timeout, min_commit_ts, check_existence, lock_only_if_exists, ctx), + display => { + "kv::command::acquirepessimisticlock keys({:?}) @ {} {} {} {:?} {} {} {} | {:?}", + (keys, start_ts, lock_ttl, for_update_ts, wait_timeout, min_commit_ts, + check_existence, lock_only_if_exists, ctx), + } content => { /// The set of keys to lock. keys: Vec<(Key, bool)>, diff --git a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs index a1e2e6fc119..4fb25d47ba0 100644 --- a/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs +++ b/src/storage/txn/commands/acquire_pessimistic_lock_resumed.rs @@ -54,8 +54,7 @@ command! { /// This can be rolled back with a [`PessimisticRollback`](Command::PessimisticRollback) command. AcquirePessimisticLockResumed: cmd_ty => StorageResult, - display => "kv::command::acquirepessimisticlockresumed {:?}", - (items), + display => { "kv::command::acquirepessimisticlockresumed {:?}", (items), } content => { items: Vec, } diff --git a/src/storage/txn/commands/atomic_store.rs b/src/storage/txn/commands/atomic_store.rs index 3dd0b053d12..3e56b99e719 100644 --- a/src/storage/txn/commands/atomic_store.rs +++ b/src/storage/txn/commands/atomic_store.rs @@ -20,7 +20,7 @@ command! { /// Run Put or Delete for keys which may be changed by `RawCompareAndSwap`. RawAtomicStore: cmd_ty => (), - display => "kv::command::atomic_store {:?}", (ctx), + display => { "kv::command::atomic_store {:?}", (ctx), } content => { /// The set of mutations to apply. cf: CfName, diff --git a/src/storage/txn/commands/check_secondary_locks.rs b/src/storage/txn/commands/check_secondary_locks.rs index ceb169f79b2..1bf5c536427 100644 --- a/src/storage/txn/commands/check_secondary_locks.rs +++ b/src/storage/txn/commands/check_secondary_locks.rs @@ -29,7 +29,7 @@ command! { /// status being changed, a rollback may be written. CheckSecondaryLocks: cmd_ty => SecondaryLocksStatus, - display => "kv::command::CheckSecondaryLocks {:?} keys@{} | {:?}", (keys, start_ts, ctx), + display => { "kv::command::CheckSecondaryLocks {:?} keys@{} | {:?}", (keys, start_ts, ctx), } content => { /// The keys of secondary locks. keys: Vec, diff --git a/src/storage/txn/commands/check_txn_status.rs b/src/storage/txn/commands/check_txn_status.rs index 9e9a6cc0895..37f29f6cced 100644 --- a/src/storage/txn/commands/check_txn_status.rs +++ b/src/storage/txn/commands/check_txn_status.rs @@ -29,9 +29,11 @@ command! { /// [`Prewrite`](Command::Prewrite). CheckTxnStatus: cmd_ty => TxnStatus, - display => "kv::command::check_txn_status {} @ {} curr({}, {}, {}, {}, {}) | {:?}", - (primary_key, lock_ts, caller_start_ts, current_ts, rollback_if_not_exist, - force_sync_commit, resolving_pessimistic_lock, ctx), + display => { + "kv::command::check_txn_status {} @ {} curr({}, {}, {}, {}, {}) | {:?}", + (primary_key, lock_ts, caller_start_ts, current_ts, rollback_if_not_exist, + force_sync_commit, resolving_pessimistic_lock, ctx), + } content => { /// The primary key of the transaction. primary_key: Key, diff --git a/src/storage/txn/commands/cleanup.rs b/src/storage/txn/commands/cleanup.rs index 886094a7f34..37247afbd1d 100644 --- a/src/storage/txn/commands/cleanup.rs +++ b/src/storage/txn/commands/cleanup.rs @@ -24,7 +24,7 @@ command! { /// This should be following a [`Prewrite`](Command::Prewrite) on the given key. Cleanup: cmd_ty => (), - display => "kv::command::cleanup {} @ {} | {:?}", (key, start_ts, ctx), + display => { "kv::command::cleanup {} @ {} | {:?}", (key, start_ts, ctx), } content => { key: Key, /// The transaction timestamp. diff --git a/src/storage/txn/commands/commit.rs b/src/storage/txn/commands/commit.rs index 8daff9b2aee..2cfd0045740 100644 --- a/src/storage/txn/commands/commit.rs +++ b/src/storage/txn/commands/commit.rs @@ -23,7 +23,7 @@ command! { /// This should be following a [`Prewrite`](Command::Prewrite). Commit: cmd_ty => TxnStatus, - display => "kv::command::commit {:?} {} -> {} | {:?}", (keys, lock_ts, commit_ts, ctx), + display => { "kv::command::commit {:?} {} -> {} | {:?}", (keys, lock_ts, commit_ts, ctx), } content => { /// The keys affected. keys: Vec, diff --git a/src/storage/txn/commands/compare_and_swap.rs b/src/storage/txn/commands/compare_and_swap.rs index 3725de47273..6925562bf5a 100644 --- a/src/storage/txn/commands/compare_and_swap.rs +++ b/src/storage/txn/commands/compare_and_swap.rs @@ -29,7 +29,7 @@ command! { /// The previous value is always returned regardless of whether the new value is set. RawCompareAndSwap: cmd_ty => (Option, bool), - display => "kv::command::raw_compare_and_swap {:?}", (ctx), + display => { "kv::command::raw_compare_and_swap {:?}", (ctx), } content => { cf: CfName, key: Key, diff --git a/src/storage/txn/commands/flashback_to_version.rs b/src/storage/txn/commands/flashback_to_version.rs index efbeefa2494..f369f3669b3 100644 --- a/src/storage/txn/commands/flashback_to_version.rs +++ b/src/storage/txn/commands/flashback_to_version.rs @@ -28,7 +28,10 @@ use crate::storage::{ command! { FlashbackToVersion: cmd_ty => (), - display => "kv::command::flashback_to_version -> {} | {} {} | {:?}", (version, start_ts, commit_ts, ctx), + display => { + "kv::command::flashback_to_version -> {} | {} {} | {:?}", + (version, start_ts, commit_ts, ctx), + } content => { start_ts: TimeStamp, commit_ts: TimeStamp, diff --git a/src/storage/txn/commands/flashback_to_version_read_phase.rs b/src/storage/txn/commands/flashback_to_version_read_phase.rs index 8af482069d9..aba2ffdda0a 100644 --- a/src/storage/txn/commands/flashback_to_version_read_phase.rs +++ b/src/storage/txn/commands/flashback_to_version_read_phase.rs @@ -84,7 +84,10 @@ pub fn new_flashback_write_cmd( command! { FlashbackToVersionReadPhase: cmd_ty => (), - display => "kv::command::flashback_to_version_read_phase -> {} | {} {} | {:?}", (version, start_ts, commit_ts, ctx), + display => { + "kv::command::flashback_to_version_read_phase -> {} | {} {} | {:?}", + (version, start_ts, commit_ts, ctx), + } content => { start_ts: TimeStamp, commit_ts: TimeStamp, diff --git a/src/storage/txn/commands/macros.rs b/src/storage/txn/commands/macros.rs index c57e7bcb5fb..909ca794340 100644 --- a/src/storage/txn/commands/macros.rs +++ b/src/storage/txn/commands/macros.rs @@ -17,49 +17,34 @@ macro_rules! ctx { /// Generate the struct definition and Debug, Display methods for a passed-in /// storage command. +/// /// Parameters: -/// cmd -> Used as the type name for the generated struct. A variant of the +/// +/// * cmd -> Used as the type name for the generated struct. A variant of the /// enum `storage::txns::commands::Command` must exist whose name matches the /// value of `cmd` and which accepts one parameter whose type name matches /// the value of `cmd`. -/// cmd_ty -> The type of the result of executing this command. -/// display -> Information needed to implement the `Display` trait for the -/// command. content -> The fields of the struct definition for the command. +/// * cmd_ty -> The type of the result of executing this command. +/// * display -> Information needed to implement the `Display` trait for the +/// command. +/// * content -> The fields of the struct definition for the command. macro_rules! command { ( $(#[$outer_doc: meta])* $cmd: ident: cmd_ty => $cmd_ty: ty, - display => $format_str: expr, ($($fields: ident$(.$sub_field:ident)?),*), + display => { $format_str: expr, ($($fields: ident$(.$sub_field:ident)?),*), } content => { $($(#[$inner_doc:meta])* $arg: ident : $arg_ty: ty,)* } ) => { - $(#[$outer_doc])* - pub struct $cmd { - pub ctx: crate::storage::Context, - pub deadline: ::tikv_util::deadline::Deadline, - $($(#[$inner_doc])* pub $arg: $arg_ty,)* - } - - impl $cmd { - /// Return a `TypedCommand` that encapsulates the result of executing this command. - pub fn new( - $($arg: $arg_ty,)* - ctx: crate::storage::Context, - ) -> TypedCommand<$cmd_ty> { - let execution_duration_limit = if ctx.max_execution_duration_ms == 0 { - crate::storage::txn::scheduler::DEFAULT_EXECUTION_DURATION_LIMIT - } else { - ::std::time::Duration::from_millis(ctx.max_execution_duration_ms) - }; - let deadline = ::tikv_util::deadline::Deadline::from_now(execution_duration_limit); - Command::$cmd($cmd { - ctx, - deadline, - $($arg,)* - }).into() - } + command! { + $(#[$outer_doc])* + $cmd: + cmd_ty => $cmd_ty, + content => { + $($(#[$inner_doc])* $arg: $arg_ty,)* + } } impl std::fmt::Display for $cmd { diff --git a/src/storage/txn/commands/mod.rs b/src/storage/txn/commands/mod.rs index eb4026a84d0..f4ea6757f97 100644 --- a/src/storage/txn/commands/mod.rs +++ b/src/storage/txn/commands/mod.rs @@ -124,7 +124,8 @@ pub enum Command { /// 2. The `From` impl for `TypedCommand` gets chosen, and its /// generic parameter indicates that the result type for this instance of /// `TypedCommand` is going to be `TxnStatus` - one of the variants of the -/// `StorageCallback` enum. 3. In the above `from` method, the details of the +/// `StorageCallback` enum. +/// 3. In the above `from` method, the details of the /// commit request are captured by creating an instance of the struct /// `storage::txn::commands::commit::Command` via its `new` method. /// 4. This struct is wrapped in a variant of the enum diff --git a/src/storage/txn/commands/mvcc_by_key.rs b/src/storage/txn/commands/mvcc_by_key.rs index 986147fdee1..57ef1653971 100644 --- a/src/storage/txn/commands/mvcc_by_key.rs +++ b/src/storage/txn/commands/mvcc_by_key.rs @@ -17,7 +17,7 @@ command! { /// Retrieve MVCC information for the given key. MvccByKey: cmd_ty => MvccInfo, - display => "kv::command::mvccbykey {:?} | {:?}", (key, ctx), + display => { "kv::command::mvccbykey {:?} | {:?}", (key, ctx), } content => { key: Key, } diff --git a/src/storage/txn/commands/mvcc_by_start_ts.rs b/src/storage/txn/commands/mvcc_by_start_ts.rs index aae02fe79a3..5617390bd94 100644 --- a/src/storage/txn/commands/mvcc_by_start_ts.rs +++ b/src/storage/txn/commands/mvcc_by_start_ts.rs @@ -17,7 +17,7 @@ command! { /// Retrieve MVCC info for the first committed key which `start_ts == ts`. MvccByStartTs: cmd_ty => Option<(Key, MvccInfo)>, - display => "kv::command::mvccbystartts {:?} | {:?}", (start_ts, ctx), + display => { "kv::command::mvccbystartts {:?} | {:?}", (start_ts, ctx), } content => { start_ts: TimeStamp, } diff --git a/src/storage/txn/commands/pause.rs b/src/storage/txn/commands/pause.rs index 1f5d40b2d4e..a92bd940241 100644 --- a/src/storage/txn/commands/pause.rs +++ b/src/storage/txn/commands/pause.rs @@ -24,7 +24,7 @@ command! { /// This means other write operations that involve these keys will be blocked. Pause: cmd_ty => (), - display => "kv::command::pause keys:({}) {} ms | {:?}", (keys.len, duration, ctx), + display => { "kv::command::pause keys:({}) {} ms | {:?}", (keys.len, duration, ctx), } content => { /// The keys to hold latches on. keys: Vec, diff --git a/src/storage/txn/commands/pessimistic_rollback.rs b/src/storage/txn/commands/pessimistic_rollback.rs index 551ba931e53..63a86d6622c 100644 --- a/src/storage/txn/commands/pessimistic_rollback.rs +++ b/src/storage/txn/commands/pessimistic_rollback.rs @@ -25,7 +25,10 @@ command! { /// This can roll back an [`AcquirePessimisticLock`](Command::AcquirePessimisticLock) command. PessimisticRollback: cmd_ty => Vec>, - display => "kv::command::pessimistic_rollback keys({:?}) @ {} {} | {:?}", (keys, start_ts, for_update_ts, ctx), + display => { + "kv::command::pessimistic_rollback keys({:?}) @ {} {} | {:?}", + (keys, start_ts, for_update_ts, ctx), + } content => { /// The keys to be rolled back. keys: Vec, diff --git a/src/storage/txn/commands/pessimistic_rollback_read_phase.rs b/src/storage/txn/commands/pessimistic_rollback_read_phase.rs index ea0e1bf0729..a239d20d75d 100644 --- a/src/storage/txn/commands/pessimistic_rollback_read_phase.rs +++ b/src/storage/txn/commands/pessimistic_rollback_read_phase.rs @@ -16,7 +16,7 @@ use crate::storage::{ command! { PessimisticRollbackReadPhase: cmd_ty => Vec>, - display => "kv::pessimistic_rollback_read_phase", (), + display => { "kv::pessimistic_rollback_read_phase", (), } content => { start_ts: TimeStamp, for_update_ts: TimeStamp, diff --git a/src/storage/txn/commands/resolve_lock.rs b/src/storage/txn/commands/resolve_lock.rs index cd01fc60475..84f0ee9d544 100644 --- a/src/storage/txn/commands/resolve_lock.rs +++ b/src/storage/txn/commands/resolve_lock.rs @@ -30,7 +30,10 @@ command! { /// This should follow after a `ResolveLockReadPhase`. ResolveLock: cmd_ty => (), - display => "kv::resolve_lock {:?} scan_key({:?}) key_locks({:?})", (txn_status, scan_key, key_locks), + display => { + "kv::resolve_lock {:?} scan_key({:?}) key_locks({:?})", + (txn_status, scan_key, key_locks), + } content => { /// Maps lock_ts to commit_ts. If a transaction was rolled back, it is mapped to 0. /// diff --git a/src/storage/txn/commands/resolve_lock_lite.rs b/src/storage/txn/commands/resolve_lock_lite.rs index 318e5d57313..ce36d414477 100644 --- a/src/storage/txn/commands/resolve_lock_lite.rs +++ b/src/storage/txn/commands/resolve_lock_lite.rs @@ -22,7 +22,10 @@ command! { /// Resolve locks on `resolve_keys` according to `start_ts` and `commit_ts`. ResolveLockLite: cmd_ty => (), - display => "kv::resolve_lock_lite resolve_keys({:?}) {} {} | {:?}", (resolve_keys, start_ts, commit_ts, ctx), + display => { + "kv::resolve_lock_lite resolve_keys({:?}) {} {} | {:?}", + (resolve_keys, start_ts, commit_ts, ctx), + } content => { /// The transaction timestamp. start_ts: TimeStamp, diff --git a/src/storage/txn/commands/resolve_lock_readphase.rs b/src/storage/txn/commands/resolve_lock_readphase.rs index bdd81283cd3..3f68211e72c 100644 --- a/src/storage/txn/commands/resolve_lock_readphase.rs +++ b/src/storage/txn/commands/resolve_lock_readphase.rs @@ -22,7 +22,7 @@ command! { /// This should followed by a `ResolveLock`. ResolveLockReadPhase: cmd_ty => (), - display => "kv::resolve_lock_readphase", (), + display => { "kv::resolve_lock_readphase", (), } content => { /// Maps lock_ts to commit_ts. See ./resolve_lock.rs for details. txn_status: HashMap, diff --git a/src/storage/txn/commands/rollback.rs b/src/storage/txn/commands/rollback.rs index df60767e716..1d4b189f2bb 100644 --- a/src/storage/txn/commands/rollback.rs +++ b/src/storage/txn/commands/rollback.rs @@ -24,7 +24,10 @@ command! { /// This should be following a [`Prewrite`](Command::Prewrite) on the given key. Rollback: cmd_ty => (), - display => "kv::command::rollback keys({:?}) @ {} | {:?}", (keys, start_ts, ctx), + display => { + "kv::command::rollback keys({:?}) @ {} | {:?}", + (keys, start_ts, ctx), + } content => { keys: Vec, /// The transaction timestamp. diff --git a/src/storage/txn/commands/txn_heart_beat.rs b/src/storage/txn/commands/txn_heart_beat.rs index c900464099a..a2f355c950f 100644 --- a/src/storage/txn/commands/txn_heart_beat.rs +++ b/src/storage/txn/commands/txn_heart_beat.rs @@ -25,7 +25,10 @@ command! { /// [`Prewrite`](Command::Prewrite). TxnHeartBeat: cmd_ty => TxnStatus, - display => "kv::command::txn_heart_beat {} @ {} ttl {} | {:?}", (primary_key, start_ts, advise_ttl, ctx), + display => { + "kv::command::txn_heart_beat {} @ {} ttl {} | {:?}", + (primary_key, start_ts, advise_ttl, ctx), + } content => { /// The primary key of the transaction. primary_key: Key, diff --git a/src/storage/txn/mod.rs b/src/storage/txn/mod.rs index 8c30ae0a068..66521238f4e 100644 --- a/src/storage/txn/mod.rs +++ b/src/storage/txn/mod.rs @@ -11,6 +11,7 @@ pub mod txn_status_cache; mod actions; mod latch; mod store; +mod task; use std::{error::Error as StdError, io::Error as IoError}; diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 89ca750d282..47920cc0ade 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -42,7 +42,7 @@ use engine_traits::{CF_DEFAULT, CF_LOCK, CF_WRITE}; use file_system::IoBytes; use futures::{compat::Future01CompatExt, StreamExt}; use kvproto::{ - kvrpcpb::{self, CommandPri, Context, DiskFullOpt, ExtraOp}, + kvrpcpb::{self, CommandPri, Context, DiskFullOpt}, pdpb::QueryKind, }; use parking_lot::{Mutex, MutexGuard, RwLockWriteGuard}; @@ -53,9 +53,10 @@ use resource_metering::{FutureExt, ResourceTagFactory}; use smallvec::{smallvec, SmallVec}; use tikv_kv::{Modify, Snapshot, SnapshotExt, WriteData, WriteEvent}; use tikv_util::{quota_limiter::QuotaLimiter, time::Instant, timer::GLOBAL_TIMER_HANDLE}; -use tracker::{get_tls_tracker_token, set_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS}; +use tracker::{set_tls_tracker_token, TrackerToken, GLOBAL_TRACKERS}; use txn_types::TimeStamp; +use super::task::Task; use crate::{ server::lock_manager::waiter_manager, storage::{ @@ -107,26 +108,6 @@ const SCHEDULER_CPU_TIME_FACTOR: u32 = 5; type SVec = SmallVec<[T; 4]>; -/// Task is a running command. -pub(super) struct Task { - pub(super) cid: u64, - pub(super) tracker: TrackerToken, - pub(super) cmd: Command, - pub(super) extra_op: ExtraOp, -} - -impl Task { - /// Creates a task for a running command. - pub(super) fn new(cid: u64, tracker: TrackerToken, cmd: Command) -> Task { - Task { - cid, - tracker, - cmd, - extra_op: ExtraOp::Noop, - } - } -} - struct CmdTimer { tag: CommandKind, begin: Instant, @@ -162,16 +143,16 @@ struct TaskContext { impl TaskContext { fn new(task: Task, cb: SchedulerTaskCallback, prepared_latches: Option) -> TaskContext { - let tag = task.cmd.tag(); - let lock = prepared_latches.unwrap_or_else(|| task.cmd.gen_lock()); + let tag = task.cmd().tag(); + let lock = prepared_latches.unwrap_or_else(|| task.cmd().gen_lock()); // The initial locks should be either all acquired or all not acquired. assert!(lock.owned_count == 0 || lock.owned_count == lock.required_hashes.len()); // Write command should acquire write lock. - if !task.cmd.readonly() && !lock.is_write_lock() { - panic!("write lock is expected for command {}", task.cmd); + if !task.cmd().readonly() && !lock.is_write_lock() { + panic!("write lock is expected for command {}", task.cmd()); } let write_bytes = if lock.is_write_lock() { - task.cmd.write_bytes() + task.cmd().write_bytes() } else { 0 }; @@ -196,7 +177,7 @@ impl TaskContext { fn on_schedule(&mut self) { let elapsed = self.latch_timer.saturating_elapsed(); if let Some(task) = &self.task.as_ref() { - GLOBAL_TRACKERS.with_tracker(task.tracker, |tracker| { + GLOBAL_TRACKERS.with_tracker(task.tracker(), |tracker| { tracker.metrics.latch_wait_nanos = elapsed.as_nanos() as u64; }); } @@ -394,7 +375,7 @@ impl TxnSchedulerInner { let tctx = task_slot.get_mut(&cid).unwrap(); // Check deadline early during acquiring latches to avoid expired requests // blocking other requests. - let cmd = &tctx.task.as_ref().unwrap().cmd; + let cmd = tctx.task.as_ref().unwrap().cmd(); if let Err(e) = cmd.deadline().check() { // `acquire_lock_on_wakeup` is called when another command releases its locks // and wakes up command `cid`. This command inserted its lock before @@ -518,9 +499,10 @@ impl TxnScheduler { }); return; } + let cid = self.inner.gen_id(); + let task = Task::new(cid, cmd); self.schedule_command( - None, - cmd, + task, SchedulerTaskCallback::NormalRequestCallback(callback), None, ); @@ -544,13 +526,13 @@ impl TxnScheduler { fn schedule_command( &self, - specified_cid: Option, - cmd: Command, + task: Task, callback: SchedulerTaskCallback, prepared_latches: Option, ) { - let cid = specified_cid.unwrap_or_else(|| self.inner.gen_id()); - let tracker = get_tls_tracker_token(); + let cid = task.cid(); + let tracker = task.tracker(); + let cmd = task.cmd(); debug!("received new command"; "cid" => cid, "cmd" => ?cmd, "tracker" => ?tracker); let tag = cmd.tag(); @@ -564,7 +546,7 @@ impl TxnScheduler { let mut task_slot = self.inner.get_task_slot(cid); let tctx = task_slot.entry(cid).or_insert_with(|| { self.inner - .new_task_context(Task::new(cid, tracker, cmd), callback, prepared_latches) + .new_task_context(task, callback, prepared_latches) }); if self.inner.latches.acquire(&mut tctx.lock, cid) { @@ -576,7 +558,7 @@ impl TxnScheduler { return; } let task = tctx.task.as_ref().unwrap(); - self.fail_fast_or_check_deadline(cid, &task.cmd); + self.fail_fast_or_check_deadline(cid, task.cmd()); fail_point!("txn_scheduler_acquire_fail"); } @@ -669,11 +651,12 @@ impl TxnScheduler { .collect(); let cmd = commands::AcquirePessimisticLockResumed::from_lock_wait_entries(awakened_entries); + let cid = specified_cid.unwrap_or_else(|| self.inner.gen_id()); + let task = Task::new(cid, cmd.into()); // TODO: Make flow control take effect on this thing. self.schedule_command( - specified_cid, - cmd.into(), + task, SchedulerTaskCallback::LockKeyCallbacks(key_callbacks), prepared_latches, ); @@ -686,26 +669,26 @@ impl TxnScheduler { /// Executes the task in the sched pool. fn execute(&self, mut task: Task) { - set_tls_tracker_token(task.tracker); + set_tls_tracker_token(task.tracker()); let sched = self.clone(); - let metadata = TaskMetadata::from_ctx(task.cmd.resource_control_ctx()); + let metadata = TaskMetadata::from_ctx(task.cmd().resource_control_ctx()); self.get_sched_pool() - .spawn(metadata, task.cmd.priority(), async move { + .spawn(metadata, task.cmd().priority(), async move { fail_point!("scheduler_start_execute"); if sched.check_task_deadline_exceeded(&task, None) { return; } - let tag = task.cmd.tag(); + let tag = task.cmd().tag(); SCHED_STAGE_COUNTER_VEC.get(tag).snapshot.inc(); let mut snap_ctx = SnapContext { - pb_ctx: task.cmd.ctx(), + pb_ctx: task.cmd().ctx(), ..Default::default() }; if matches!( - task.cmd, + task.cmd(), Command::FlashbackToVersionReadPhase { .. } | Command::FlashbackToVersion { .. } ) { @@ -722,13 +705,13 @@ impl TxnScheduler { let extra_op = snapshot.ext().get_txn_extra_op(); if !sched .inner - .get_task_slot(task.cid) - .get(&task.cid) + .get_task_slot(task.cid()) + .get(&task.cid()) .unwrap() .try_own() { sched.finish_with_err( - task.cid, + task.cid(), StorageErrorInner::DeadlineExceeded, None, ); @@ -736,22 +719,22 @@ impl TxnScheduler { } if let Some(term) = term { - task.cmd.ctx_mut().set_term(term.get()); + task.cmd_mut().ctx_mut().set_term(term.get()); } - task.extra_op = extra_op; + task.set_extra_op(extra_op); debug!( "process cmd with snapshot"; - "cid" => task.cid, "term" => ?term, "extra_op" => ?extra_op, - "trakcer" => ?task.tracker + "cid" => task.cid(), "term" => ?term, "extra_op" => ?extra_op, + "tracker" => ?task.tracker() ); sched.process(snapshot, task).await; } Err(err) => { SCHED_STAGE_COUNTER_VEC.get(tag).snapshot_err.inc(); - info!("get snapshot failed"; "cid" => task.cid, "err" => ?err); - sched.finish_with_err(task.cid, Error::from(err), None); + info!("get snapshot failed"; "cid" => task.cid(), "err" => ?err); + sched.finish_with_err(task.cid(), Error::from(err), None); } } }) @@ -802,7 +785,8 @@ impl TxnScheduler { let tctx = self.inner.dequeue_task_context(cid); if let ProcessResult::NextCommand { cmd } = pr { SCHED_STAGE_COUNTER_VEC.get(tag).next_cmd.inc(); - self.schedule_command(None, cmd, tctx.cb.unwrap(), None); + let task = Task::new(self.inner.gen_id(), cmd); + self.schedule_command(task, tctx.cb.unwrap(), None); } else { tctx.cb.unwrap().execute(pr); } @@ -880,7 +864,8 @@ impl TxnScheduler { }; if let ProcessResult::NextCommand { cmd } = pr { SCHED_STAGE_COUNTER_VEC.get(tag).next_cmd.inc(); - self.schedule_command(None, cmd, cb, None); + let task = Task::new(self.inner.gen_id(), cmd); + self.schedule_command(task, cb, None); } else { GLOBAL_TRACKERS.with_tracker(sched_details.tracker, |tracker| { tracker.metrics.scheduler_process_nanos = sched_details @@ -1143,18 +1128,18 @@ impl TxnScheduler { return; } - let resource_tag = self.inner.resource_tag_factory.new_tag(task.cmd.ctx()); + let resource_tag = self.inner.resource_tag_factory.new_tag(task.cmd().ctx()); async { - let tag = task.cmd.tag(); + let tag = task.cmd().tag(); fail_point!("scheduler_async_snapshot_finish"); SCHED_STAGE_COUNTER_VEC.get(tag).process.inc(); let timer = Instant::now(); - let region_id = task.cmd.ctx().get_region_id(); - let ts = task.cmd.ts(); - let mut sched_details = SchedulerDetails::new(task.tracker, timer); - match &task.cmd { + let region_id = task.cmd().ctx().get_region_id(); + let ts = task.cmd().ts(); + let mut sched_details = SchedulerDetails::new(task.tracker(), timer); + match task.cmd() { Command::Prewrite(_) | Command::PrewritePessimistic(_) => { tls_collect_query(region_id, QueryKind::Prewrite); } @@ -1171,7 +1156,7 @@ impl TxnScheduler { } fail_point!("scheduler_process"); - if task.cmd.readonly() { + if task.cmd().readonly() { self.process_read(snapshot, task, &mut sched_details); } else { self.process_write(snapshot, task, &mut sched_details).await; @@ -1195,22 +1180,22 @@ impl TxnScheduler { /// `ReadFinished` message back to the `TxnScheduler`. fn process_read(self, snapshot: E::Snap, task: Task, sched_details: &mut SchedulerDetails) { fail_point!("txn_before_process_read"); - debug!("process read cmd in worker pool"; "cid" => task.cid); + let cid = task.cid(); + debug!("process read cmd in worker pool"; "cid" => cid); - let tag = task.cmd.tag(); + let tag = task.cmd().tag(); let begin_instant = Instant::now(); - let cmd = task.cmd; let pr = unsafe { with_perf_context::(tag, || { - cmd.process_read(snapshot, &mut sched_details.stat) + task.process_read(snapshot, &mut sched_details.stat) .unwrap_or_else(|e| ProcessResult::Failed { err: e.into() }) }) }; SCHED_PROCESSING_READ_HISTOGRAM_STATIC .get(tag) .observe(begin_instant.saturating_elapsed_secs()); - self.on_read_finished(task.cid, pr, tag); + self.on_read_finished(cid, pr, tag); } /// Processes a write command within a worker thread, then posts either a @@ -1223,24 +1208,19 @@ impl TxnScheduler { sched_details: &mut SchedulerDetails, ) { fail_point!("txn_before_process_write"); - let write_bytes = task.cmd.write_bytes(); - let tag = task.cmd.tag(); - let cid = task.cid; - let metadata = TaskMetadata::from_ctx(task.cmd.resource_control_ctx()); - let tracker = task.tracker; + let write_bytes = task.cmd().write_bytes(); + let tag = task.cmd().tag(); + let cid = task.cid(); + let metadata = TaskMetadata::from_ctx(task.cmd().resource_control_ctx()); + let tracker = task.tracker(); let scheduler = self.clone(); let quota_limiter = self.inner.quota_limiter.clone(); let resource_limiter = self.inner.resource_manager.as_ref().and_then(|m| { + let ctx = task.cmd().ctx(); m.get_resource_limiter( - task.cmd - .ctx() - .get_resource_control_context() - .get_resource_group_name(), - task.cmd.ctx().get_request_source(), - task.cmd - .ctx() - .get_resource_control_context() - .get_override_priority(), + ctx.get_resource_control_context().get_resource_group_name(), + ctx.get_request_source(), + ctx.get_resource_control_context().get_override_priority(), ) }); let mut sample = quota_limiter.new_sample(true); @@ -1248,8 +1228,8 @@ impl TxnScheduler { sample.enable_cpu_limit(); } let pessimistic_lock_mode = self.pessimistic_lock_mode(); - let pipelined = - task.cmd.can_be_pipelined() && pessimistic_lock_mode == PessimisticLockMode::Pipelined; + let pipelined = task.cmd().can_be_pipelined() + && pessimistic_lock_mode == PessimisticLockMode::Pipelined; let txn_ext = snapshot.ext().get_txn_ext().cloned(); let max_ts_synced = snapshot.ext().is_max_ts_synced(); let causal_ts_provider = self.inner.causal_ts_provider.clone(); @@ -1259,7 +1239,7 @@ impl TxnScheduler { causal_ts_provider, concurrency_manager.clone(), max_ts_synced, - &task.cmd, + task.cmd(), ) .await; if let Err(err) = raw_ext { @@ -1269,13 +1249,13 @@ impl TxnScheduler { } let raw_ext = raw_ext.unwrap(); - let deadline = task.cmd.deadline(); + let deadline = task.cmd().deadline(); let write_result = { let _guard = sample.observe_cpu(); let context = WriteContext { lock_mgr: &self.inner.lock_mgr, concurrency_manager, - extra_op: task.extra_op, + extra_op: task.extra_op(), statistics: &mut sched_details.stat, async_apply_prewrite: self.inner.enable_async_apply_prewrite, raw_ext, @@ -1284,8 +1264,7 @@ impl TxnScheduler { let begin_instant = Instant::now(); let res = unsafe { with_perf_context::(tag, || { - task.cmd - .process_write(snapshot, context) + task.process_write(snapshot, context) .map_err(StorageError::from) }) }; @@ -1751,8 +1730,8 @@ impl TxnScheduler { task: &Task, sched_details: Option<&SchedulerDetails>, ) -> bool { - if let Err(e) = task.cmd.deadline().check() { - self.finish_with_err(task.cid, e, sched_details); + if let Err(e) = task.cmd().deadline().check() { + self.finish_with_err(task.cid(), e, sched_details); true } else { false diff --git a/src/storage/txn/task.rs b/src/storage/txn/task.rs new file mode 100644 index 00000000000..6773de59110 --- /dev/null +++ b/src/storage/txn/task.rs @@ -0,0 +1,76 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use kvproto::kvrpcpb::ExtraOp; +use tikv_kv::Snapshot; +use tracker::{get_tls_tracker_token, TrackerToken}; + +use crate::storage::{ + kv::Statistics, + lock_manager::LockManager, + txn::{ + commands::{Command, WriteContext, WriteResult}, + ProcessResult, + }, +}; + +pub(super) struct Task { + cid: u64, + tracker: TrackerToken, + cmd: Option, + extra_op: ExtraOp, +} + +impl Task { + /// Creates a task for a running command. + pub(super) fn new(cid: u64, cmd: Command) -> Task { + let tracker = get_tls_tracker_token(); + Task { + cid, + tracker, + cmd: Some(cmd), + extra_op: ExtraOp::Noop, + } + } + + pub(super) fn cid(&self) -> u64 { + self.cid + } + + pub(super) fn tracker(&self) -> TrackerToken { + self.tracker + } + + pub(super) fn cmd(&self) -> &Command { + self.cmd.as_ref().unwrap() + } + + pub(super) fn cmd_mut(&mut self) -> &mut Command { + self.cmd.as_mut().unwrap() + } + + pub(super) fn extra_op(&self) -> ExtraOp { + self.extra_op + } + + pub(super) fn set_extra_op(&mut self, extra_op: ExtraOp) { + self.extra_op = extra_op + } + + pub(super) fn process_write( + mut self, + snapshot: S, + context: WriteContext<'_, L>, + ) -> super::Result { + let cmd = self.cmd.take().unwrap(); + cmd.process_write(snapshot, context) + } + + pub(super) fn process_read( + mut self, + snapshot: S, + statistics: &mut Statistics, + ) -> super::Result { + let cmd = self.cmd.take().unwrap(); + cmd.process_read(snapshot, statistics) + } +} From 5cf15aacef1df5af22a29637ee33b742e02ec2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:43:51 +0800 Subject: [PATCH 1132/1149] snap_backup: abort last connection of preparing while there are many (#16388) close tikv/tikv#16382 Now, a newly established prepare disk snapshot backup stream will abort the former one. Signed-off-by: Yu Juncen --- components/backup/src/disk_snap.rs | 46 +++++++++++++++++-------- components/backup/src/service.rs | 22 +++++++++--- components/test_backup/src/disk_snap.rs | 6 +++- tests/integrations/backup/disk_snap.rs | 12 +++++++ 4 files changed, 66 insertions(+), 20 deletions(-) diff --git a/components/backup/src/disk_snap.rs b/components/backup/src/disk_snap.rs index 27c5b2e2b19..94d956cc11c 100644 --- a/components/backup/src/disk_snap.rs +++ b/components/backup/src/disk_snap.rs @@ -2,6 +2,7 @@ //! This module contains things about disk snapshot. use std::{ + future::Pending, sync::{ atomic::{AtomicU64, Ordering}, Arc, @@ -14,9 +15,9 @@ use futures::future; use futures_util::{ future::{BoxFuture, FutureExt}, sink::SinkExt, - stream::StreamExt, + stream::{AbortHandle, Abortable, StreamExt}, }; -use grpcio::{RpcStatus, WriteFlags}; +use grpcio::{RpcStatus, RpcStatusCode, WriteFlags}; use kvproto::{ brpb::{ PrepareSnapshotBackupEventType as PEvnT, PrepareSnapshotBackupRequest as PReq, @@ -206,6 +207,7 @@ impl Env { pub struct StreamHandleLoop { pending_regions: Vec)>>, env: Env, + aborted: Abortable>, } impl Drop for StreamHandleLoop { @@ -218,15 +220,19 @@ enum StreamHandleEvent { Req(PReq), WaitApplyDone(Region, Result<()>), ConnectionGone(Option), + Abort, } impl StreamHandleLoop { - pub fn new(env: Env) -> Self { + pub fn new(env: Env) -> (Self, AbortHandle) { + let (aborted, handle) = futures_util::future::abortable(std::future::pending()); env.active_stream.fetch_add(1, Ordering::SeqCst); - Self { + let this = Self { env, + aborted, pending_regions: vec![], - } + }; + (this, handle) } fn async_wait_apply(&mut self, region: &Region) -> BoxFuture<'static, (Region, Result<()>)> { @@ -261,20 +267,19 @@ impl StreamHandleLoop { &mut self, input: &mut (impl Stream> + Unpin), ) -> StreamHandleEvent { + let pending_regions = &mut self.pending_regions; let wait_applies = future::poll_fn(|cx| { - let selected = - self.pending_regions - .iter_mut() - .enumerate() - .find_map(|(i, fut)| match fut.poll_unpin(cx) { - Poll::Ready(r) => Some((i, r)), - Poll::Pending => None, - }); + let selected = pending_regions.iter_mut().enumerate().find_map(|(i, fut)| { + match fut.poll_unpin(cx) { + Poll::Ready(r) => Some((i, r)), + Poll::Pending => None, + } + }); match selected { Some((i, region)) => { // We have polled the future (and make sure it has ready) before, it is // safe to drop this future directly. - let _ = self.pending_regions.swap_remove(i); + let _ = pending_regions.swap_remove(i); region.into() } None => Poll::Pending, @@ -292,6 +297,9 @@ impl StreamHandleLoop { None => StreamHandleEvent::ConnectionGone(None) } } + _ = &mut self.aborted => { + StreamHandleEvent::Abort + } } } @@ -348,6 +356,16 @@ impl StreamHandleLoop { Some(err) => Err(err), }; } + StreamHandleEvent::Abort => { + warn!("Aborted disk snapshot prepare loop by the server."); + return sink + .0 + .fail(RpcStatus::with_message( + RpcStatusCode::CANCELLED, + "the loop has been aborted by server".to_string(), + )) + .await; + } } } } diff --git a/components/backup/src/service.rs b/components/backup/src/service.rs index 04d996944a4..7e38093df53 100644 --- a/components/backup/src/service.rs +++ b/components/backup/src/service.rs @@ -1,8 +1,9 @@ // Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. -use std::sync::atomic::*; +use std::sync::{atomic::*, Arc, Mutex}; use futures::{channel::mpsc, FutureExt, SinkExt, StreamExt, TryFutureExt}; +use futures_util::stream::AbortHandle; use grpcio::{self, *}; use kvproto::brpb::*; use raftstore::store::snapshot_backup::SnapshotBrHandle; @@ -16,6 +17,7 @@ use crate::disk_snap::{self, StreamHandleLoop}; pub struct Service { scheduler: Scheduler, snap_br_env: disk_snap::Env, + abort_last_req: Arc>>, } impl Service @@ -27,6 +29,7 @@ where Service { scheduler, snap_br_env: env, + abort_last_req: Arc::default(), } } } @@ -147,17 +150,26 @@ where stream: grpcio::RequestStream, sink: grpcio::DuplexSink, ) { - let l = StreamHandleLoop::new(self.snap_br_env.clone()); + let (l, new_cancel) = StreamHandleLoop::new(self.snap_br_env.clone()); + let peer = ctx.peer(); // Note: should we disconnect here once there are more than one stream...? // Generally once two streams enter here, one may exit info!("A new prepare snapshot backup stream created!"; - "peer" => %ctx.peer(), + "peer" => %peer, "stream_count" => %self.snap_br_env.active_stream(), ); + let abort_last_req = self.abort_last_req.clone(); self.snap_br_env.get_async_runtime().spawn(async move { - if let Err(err) = l.run(stream, sink.into()).await { - warn!("stream closed; perhaps a problem cannot be retried happens"; "reason" => ?err); + { + let mut lock = abort_last_req.lock().unwrap(); + if let Some(cancel) = &*lock { + cancel.abort(); + } + *lock = Some(new_cancel); } + let res = l.run(stream, sink.into()).await; + info!("stream closed; probably everything is done or a problem cannot be retried happens"; + "result" => ?res, "peer" => %peer); }); } } diff --git a/components/test_backup/src/disk_snap.rs b/components/test_backup/src/disk_snap.rs index aa1c94f8e5e..c252f68d09d 100644 --- a/components/test_backup/src/disk_snap.rs +++ b/components/test_backup/src/disk_snap.rs @@ -208,7 +208,11 @@ impl PrepareBackup { } pub fn next(&mut self) -> PrepareSnapshotBackupResponse { - block_on(self.rx.next()).unwrap().unwrap() + self.try_next().unwrap() + } + + pub fn try_next(&mut self) -> grpcio::Result { + block_on(self.rx.next()).unwrap() } } diff --git a/tests/integrations/backup/disk_snap.rs b/tests/integrations/backup/disk_snap.rs index bdef242b1a1..23a61a937e9 100644 --- a/tests/integrations/backup/disk_snap.rs +++ b/tests/integrations/backup/disk_snap.rs @@ -107,6 +107,18 @@ fn test_prepare_merge() { assert_failure(&resp); } +#[test] +fn test_abort_last_one() { + let suite = Suite::new(1); + let mut call = suite.prepare_backup(1); + call.prepare(10); + let mut call2 = suite.prepare_backup(1); + call2.prepare(10); + let should_err = call.try_next(); + assert!(should_err.is_err(), "{:?}", should_err); + assert!(call2.send_finalize()); +} + #[test] fn test_wait_apply() { let mut suite = Suite::new(3); From 1f870ee38fd0081615856e7ed1de9e65bf252c00 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Thu, 25 Jan 2024 16:51:21 +0800 Subject: [PATCH 1133/1149] In-memory Engine: implement garbage collection -- backend part (#16238) ref tikv/tikv#16141 implement the garbage collection of the in-memory engine -- backend part Signed-off-by: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Co-authored-by: tongjian --- Cargo.lock | 4 + .../region_cache_memory_engine/Cargo.toml | 4 + .../region_cache_memory_engine/src/engine.rs | 56 +- .../region_cache_memory_engine/src/gc.rs | 500 ++++++++++++++++++ .../region_cache_memory_engine/src/keys.rs | 15 +- .../region_cache_memory_engine/src/lib.rs | 2 + .../src/memory_limiter.rs | 47 ++ .../src/range_manager.rs | 13 + 8 files changed, 596 insertions(+), 45 deletions(-) create mode 100644 components/region_cache_memory_engine/src/gc.rs create mode 100644 components/region_cache_memory_engine/src/memory_limiter.rs diff --git a/Cargo.lock b/Cargo.lock index aa3daec32c5..8550a1dc3cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4531,8 +4531,12 @@ dependencies = [ "collections", "engine_rocks", "engine_traits", + "log_wrappers", "skiplist-rs", + "slog", + "slog-global", "tikv_util", + "txn_types", ] [[package]] diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index 2cd006b8e57..33014798782 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -14,4 +14,8 @@ collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } bytes = "1.0" tikv_util = { workspace = true } +txn_types = { workspace = true } +log_wrappers = { workspace = true } +slog-global = { workspace = true } +slog = { workspace = true } engine_rocks = { workspace = true } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index dc5c93c38a8..48f5d95b8de 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -9,24 +9,24 @@ use std::{ }; use bytes::Bytes; -use collections::{HashMap, HashSet}; use engine_rocks::{raw::SliceTransform, util::FixedSuffixSliceTransform}; use engine_traits::{ CacheRange, CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Peekable, RangeCacheEngine, ReadOptions, Result, Snapshot, SnapshotMiscExt, CF_DEFAULT, CF_LOCK, CF_WRITE, }; -use skiplist_rs::{AllocationRecorder, IterRef, MemoryLimiter, Node, Skiplist, MIB}; +use skiplist_rs::{IterRef, Skiplist, MIB}; use crate::{ keys::{ decode_key, encode_key_for_eviction, encode_seek_key, InternalKey, InternalKeyComparator, ValueType, VALUE_TYPE_FOR_SEEK, VALUE_TYPE_FOR_SEEK_FOR_PREV, }, + memory_limiter::GlobalMemoryLimiter, range_manager::RangeManager, }; -const EVICTION_KEY_BUFFER_LIMIT: usize = 5 * MIB as usize; +pub(crate) const EVICTION_KEY_BUFFER_LIMIT: usize = 5 * MIB as usize; pub(crate) fn cf_to_id(cf: &str) -> usize { match cf { @@ -37,47 +37,6 @@ pub(crate) fn cf_to_id(cf: &str) -> usize { } } -// todo: implement a real memory limiter. Now, it is used for test. -#[derive(Clone, Default)] -pub struct GlobalMemoryLimiter { - recorder: Arc>>, - removed: Arc>>>, -} - -impl MemoryLimiter for GlobalMemoryLimiter { - fn acquire(&self, n: usize) -> bool { - true - } - - fn mem_usage(&self) -> usize { - 0 - } - - fn reclaim(&self, n: usize) {} -} - -impl AllocationRecorder for GlobalMemoryLimiter { - fn alloc(&self, addr: usize, size: usize) { - let mut recorder = self.recorder.lock().unwrap(); - assert!(!recorder.contains_key(&addr)); - recorder.insert(addr, size); - } - - fn free(&self, addr: usize, size: usize) { - let node = addr as *mut Node; - let mut removed = self.removed.lock().unwrap(); - removed.insert(unsafe { (*node).key().to_vec() }); - let mut recorder = self.recorder.lock().unwrap(); - assert_eq!(recorder.remove(&addr).unwrap(), size); - } -} - -impl Drop for GlobalMemoryLimiter { - fn drop(&mut self) { - assert!(self.recorder.lock().unwrap().is_empty()); - } -} - /// A single global set of skiplists shared by all cached ranges #[derive(Clone)] pub struct SkiplistEngine { @@ -104,6 +63,10 @@ impl SkiplistEngine { } } + pub fn cf_handle(&self, cf: &str) -> Arc> { + self.data[cf_to_id(cf)].clone() + } + fn delete_range(&self, range: &CacheRange) { self.data.iter().for_each(|d| { let mut key_buffer: Vec = vec![]; @@ -161,6 +124,11 @@ impl SnapshotList { } } + // returns the min snapshot_ts (read_ts) if there's any + pub fn min_snapshot_ts(&self) -> Option { + self.0.first_key_value().map(|(ts, _)| *ts) + } + pub(crate) fn is_empty(&self) -> bool { self.0.is_empty() } diff --git a/components/region_cache_memory_engine/src/gc.rs b/components/region_cache_memory_engine/src/gc.rs new file mode 100644 index 00000000000..2d81c4879c3 --- /dev/null +++ b/components/region_cache_memory_engine/src/gc.rs @@ -0,0 +1,500 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use core::slice::SlicePattern; +use std::{fmt::Display, sync::Arc}; + +use engine_traits::{CacheRange, CF_DEFAULT, CF_WRITE}; +use skiplist_rs::Skiplist; +use slog_global::{info, warn}; +use txn_types::{Key, WriteRef, WriteType}; + +use crate::{ + keys::{decode_key, encoding_for_filter, InternalKey, InternalKeyComparator}, + memory_limiter::GlobalMemoryLimiter, + RangeCacheMemoryEngine, +}; + +/// Try to extract the key and `u64` timestamp from `encoded_key`. +/// +/// See also: [`txn_types::Key::split_on_ts_for`] +fn split_ts(key: &[u8]) -> Result<(&[u8], u64), String> { + match Key::split_on_ts_for(key) { + Ok((key, ts)) => Ok((key, ts.into_inner())), + Err(_) => Err(format!( + "invalid write cf key: {}", + log_wrappers::Value(key) + )), + } +} + +fn parse_write(value: &[u8]) -> Result, String> { + match WriteRef::parse(value) { + Ok(write) => Ok(write), + Err(_) => Err(format!( + "invalid write cf value: {}", + log_wrappers::Value(value) + )), + } +} + +#[derive(Debug)] +pub struct GcTask { + pub safe_point: u64, +} + +impl Display for GcTask { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GcTask") + .field("safe_point", &self.safe_point) + .finish() + } +} + +pub struct GcRunner { + memory_engine: RangeCacheMemoryEngine, +} + +impl GcRunner { + pub fn new(memory_engine: RangeCacheMemoryEngine) -> Self { + Self { memory_engine } + } + + fn gc_range(&mut self, range: &CacheRange, safe_point: u64) { + let (skiplist_engine, safe_ts) = { + let mut core = self.memory_engine.core().lock().unwrap(); + let Some(range_meta) = core.mut_range_manager().mut_range_meta(range) else { + return; + }; + let min_snapshot = range_meta + .range_snapshot_list() + .min_snapshot_ts() + .unwrap_or(u64::MAX); + let safe_point = u64::min(safe_point, min_snapshot); + + if safe_point <= range_meta.safe_point() { + info!( + "safe point not large enough"; + "prev" => range_meta.safe_point(), + "current" => safe_point, + ); + return; + } + + // todo: change it to debug! + info!( + "safe point update"; + "prev" => range_meta.safe_point(), + "current" => safe_point, + "range" => ?range, + ); + range_meta.set_safe_point(safe_point); + (core.engine(), safe_point) + }; + + let write_cf_handle = skiplist_engine.cf_handle(CF_WRITE); + let default_cf_handle = skiplist_engine.cf_handle(CF_DEFAULT); + let mut filter = Filter::new(safe_ts, default_cf_handle, write_cf_handle.clone()); + + let mut iter = write_cf_handle.iter(); + iter.seek_to_first(); + let mut count = 0; + while iter.valid() { + let k = iter.key(); + let v = iter.value(); + if let Err(e) = filter.filter(k, v) { + warn!( + "Something Wrong in memory engine GC"; + "error" => ?e, + ); + } + iter.next(); + count += 1; + } + + info!( + "range gc complete"; + "range" => ?range, + "total_version" => count, + "unique_keys" => filter.unique_key, + "outdated_version" => filter.versions, + "outdated_delete_version" => filter.delete_versions, + "filtered_version" => filter.filtered, + ); + } +} + +struct Filter { + safe_point: u64, + mvcc_key_prefix: Vec, + remove_older: bool, + + default_cf_handle: Arc>, + write_cf_handle: Arc>, + + // the total size of the keys buffered, when it exceeds the limit, all keys in the buffer will + // be removed + filtered_write_key_size: usize, + filtered_write_key_buffer: Vec>, + cached_delete_key: Option>, + + versions: usize, + delete_versions: usize, + filtered: usize, + unique_key: usize, + mvcc_rollback_and_locks: usize, +} + +impl Drop for Filter { + fn drop(&mut self) { + if let Some(cached_delete_key) = self.cached_delete_key.take() { + self.write_cf_handle.remove(cached_delete_key.as_slice()); + } + } +} + +impl Filter { + fn new( + safe_point: u64, + default_cf_handle: Arc>, + write_cf_handle: Arc>, + ) -> Self { + Self { + safe_point, + default_cf_handle, + write_cf_handle, + unique_key: 0, + filtered_write_key_size: 0, + filtered_write_key_buffer: Vec::with_capacity(100), + mvcc_key_prefix: vec![], + delete_versions: 0, + versions: 0, + filtered: 0, + cached_delete_key: None, + mvcc_rollback_and_locks: 0, + remove_older: false, + } + } + + fn filter(&mut self, key: &[u8], value: &[u8]) -> Result<(), String> { + let InternalKey { user_key, .. } = decode_key(key); + + let (mvcc_key_prefix, commit_ts) = split_ts(user_key)?; + if commit_ts > self.safe_point { + return Ok(()); + } + + self.versions += 1; + if self.mvcc_key_prefix != mvcc_key_prefix { + self.unique_key += 1; + self.mvcc_key_prefix.clear(); + self.mvcc_key_prefix.extend_from_slice(mvcc_key_prefix); + self.remove_older = false; + if let Some(cached_delete_key) = self.cached_delete_key.take() { + self.write_cf_handle.remove(&cached_delete_key); + } + } + + let mut filtered = self.remove_older; + let write = parse_write(value)?; + if !self.remove_older { + match write.write_type { + WriteType::Rollback | WriteType::Lock => { + self.mvcc_rollback_and_locks += 1; + filtered = true; + } + WriteType::Put => self.remove_older = true, + WriteType::Delete => { + self.delete_versions += 1; + self.remove_older = true; + + // The first mvcc type below safe point is the mvcc delete. We should delay to + // remove it until all the followings with the same user key have been deleted + // to avoid older version apper. + self.cached_delete_key = Some(key.to_vec()); + } + } + } + + if !filtered { + return Ok(()); + } + self.filtered += 1; + self.write_cf_handle.remove(key); + self.handle_filtered_write(write)?; + + Ok(()) + } + + fn handle_filtered_write(&mut self, write: WriteRef<'_>) -> std::result::Result<(), String> { + if write.short_value.is_none() && write.write_type == WriteType::Put { + // todo(SpadeA): We don't know the sequence number of the key in the skiplist so + // we cannot delete it directly. So we encoding a key with MAX sequence number + // so we can find the mvcc key with sequence number in the skiplist by using + // get_with_key and delete it with the result key. It involes more than one + // seek(both get and remove invovle seek). Maybe we can provide the API to + // delete the mvcc keys with all sequence numbers. + let default_key = encoding_for_filter(&self.mvcc_key_prefix, write.start_ts); + while let Some((key, val)) = self.default_cf_handle.get_with_key(&default_key) { + self.default_cf_handle.remove(key.as_slice()); + } + } + Ok(()) + } +} + +#[cfg(test)] +pub mod tests { + use core::slice::SlicePattern; + use std::sync::Arc; + + use bytes::Bytes; + use engine_traits::{CacheRange, RangeCacheEngine, CF_DEFAULT, CF_WRITE}; + use skiplist_rs::Skiplist; + use txn_types::{Key, TimeStamp, Write, WriteType}; + + use super::Filter; + use crate::{ + engine::SkiplistEngine, + gc::GcRunner, + keys::{encode_key, encoding_for_filter, InternalKeyComparator, ValueType}, + memory_limiter::GlobalMemoryLimiter, + RangeCacheMemoryEngine, + }; + + fn put_data( + key: &[u8], + value: &[u8], + start_ts: u64, + commit_ts: u64, + seq_num: u64, + short_value: bool, + default_cf: &Arc>, + write_cf: &Arc>, + ) { + let write_k = Key::from_raw(key) + .append_ts(TimeStamp::new(commit_ts)) + .into_encoded(); + let write_k = encode_key(&write_k, seq_num, ValueType::Value); + let write_v = Write::new( + WriteType::Put, + TimeStamp::new(start_ts), + if short_value { + Some(value.to_vec()) + } else { + None + }, + ); + write_cf.put(write_k, Bytes::from(write_v.as_ref().to_bytes())); + + if !short_value { + let default_k = Key::from_raw(key) + .append_ts(TimeStamp::new(start_ts)) + .into_encoded(); + let default_k = encode_key(&default_k, seq_num + 1, ValueType::Value); + default_cf.put(default_k, Bytes::from(value.to_vec())); + } + } + + fn delete_data( + key: &[u8], + ts: u64, + seq_num: u64, + write_cf: &Arc>, + ) { + let write_k = Key::from_raw(key) + .append_ts(TimeStamp::new(ts)) + .into_encoded(); + let write_k = encode_key(&write_k, seq_num, ValueType::Value); + let write_v = Write::new(WriteType::Delete, TimeStamp::new(ts), None); + write_cf.put(write_k, Bytes::from(write_v.as_ref().to_bytes())); + } + + fn rollback_data( + key: &[u8], + ts: u64, + seq_num: u64, + write_cf: &Arc>, + ) { + let write_k = Key::from_raw(key) + .append_ts(TimeStamp::new(ts)) + .into_encoded(); + let write_k = encode_key(&write_k, seq_num, ValueType::Value); + let write_v = Write::new(WriteType::Rollback, TimeStamp::new(ts), None); + write_cf.put(write_k, Bytes::from(write_v.as_ref().to_bytes())); + } + + fn element_count(sklist: &Arc>) -> u64 { + let mut count = 0; + let mut iter = sklist.iter(); + iter.seek_to_first(); + while iter.valid() { + count += 1; + iter.next(); + } + count + } + + #[test] + fn test_filter() { + let skiplist_engine = SkiplistEngine::new(Arc::default()); + let write = skiplist_engine.cf_handle(CF_WRITE); + let default = skiplist_engine.cf_handle(CF_DEFAULT); + + put_data(b"key1", b"value1", 10, 15, 10, false, &default, &write); + put_data(b"key2", b"value21", 10, 15, 12, false, &default, &write); + put_data(b"key2", b"value22", 20, 25, 14, false, &default, &write); + // mock repeate apply + put_data(b"key2", b"value22", 20, 25, 15, false, &default, &write); + put_data(b"key2", b"value23", 30, 35, 16, false, &default, &write); + put_data(b"key3", b"value31", 20, 25, 18, false, &default, &write); + put_data(b"key3", b"value32", 30, 35, 20, false, &default, &write); + delete_data(b"key3", 40, 22, &write); + assert_eq!(7, element_count(&default)); + assert_eq!(8, element_count(&write)); + + let mut filter = Filter::new(50, default.clone(), write.clone()); + let mut count = 0; + let mut iter = write.iter(); + iter.seek_to_first(); + while iter.valid() { + let k = iter.key(); + let v = iter.value(); + filter.filter(k.as_slice(), v.as_slice()).unwrap(); + count += 1; + iter.next(); + } + assert_eq!(count, 8); + drop(filter); + + assert_eq!(2, element_count(&write)); + assert_eq!(2, element_count(&default)); + + let encode_key = |key, ts| { + let key = Key::from_raw(key); + encoding_for_filter(key.as_encoded(), ts) + }; + + let key = encode_key(b"key1", TimeStamp::new(15)); + assert!(write.get(&key).is_some()); + + let key = encode_key(b"key2", TimeStamp::new(35)); + assert!(write.get(&key).is_some()); + + let key = encode_key(b"key3", TimeStamp::new(35)); + assert!(write.get(&key).is_none()); + + let key = encode_key(b"key1", TimeStamp::new(10)); + assert!(default.get(&key).is_some()); + + let key = encode_key(b"key2", TimeStamp::new(30)); + assert!(default.get(&key).is_some()); + + let key = encode_key(b"key3", TimeStamp::new(30)); + assert!(default.get(&key).is_none()); + } + + #[test] + fn test_gc() { + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"".to_vec(), b"z".to_vec()); + engine.new_range(range.clone()); + let (write, default) = { + let mut core = engine.core().lock().unwrap(); + let skiplist_engine = core.engine(); + core.mut_range_manager().set_range_readable(&range, true); + ( + skiplist_engine.cf_handle(CF_WRITE), + skiplist_engine.cf_handle(CF_DEFAULT), + ) + }; + + let encode_key = |key, ts| { + let key = Key::from_raw(key); + encoding_for_filter(key.as_encoded(), ts) + }; + + put_data(b"key1", b"value1", 10, 11, 10, false, &default, &write); + put_data(b"key1", b"value2", 12, 13, 12, false, &default, &write); + put_data(b"key1", b"value3", 14, 15, 14, false, &default, &write); + assert_eq!(3, element_count(&default)); + assert_eq!(3, element_count(&write)); + + let mut worker = GcRunner::new(engine); + + // gc will not remove the latest mvcc put below safe point + worker.gc_range(&range, 14); + assert_eq!(2, element_count(&default)); + assert_eq!(2, element_count(&write)); + + worker.gc_range(&range, 16); + assert_eq!(1, element_count(&default)); + assert_eq!(1, element_count(&write)); + + // rollback will not make the first older version be filtered + rollback_data(b"key1", 17, 16, &write); + worker.gc_range(&range, 17); + assert_eq!(1, element_count(&default)); + assert_eq!(1, element_count(&write)); + let key = encode_key(b"key1", TimeStamp::new(15)); + assert!(write.get(&key).is_some()); + let key = encode_key(b"key1", TimeStamp::new(14)); + assert!(default.get(&key).is_some()); + + // unlike in WriteCompactionFilter, the latest mvcc delete below safe point will + // be filtered + delete_data(b"key1", 19, 18, &write); + worker.gc_range(&range, 19); + assert_eq!(0, element_count(&write)); + assert_eq!(0, element_count(&default)); + } + + #[test] + fn test_snapshot_block_gc() { + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let range = CacheRange::new(b"".to_vec(), b"z".to_vec()); + engine.new_range(range.clone()); + let (write, default) = { + let mut core = engine.core().lock().unwrap(); + let skiplist_engine = core.engine(); + core.mut_range_manager().set_range_readable(&range, true); + ( + skiplist_engine.cf_handle(CF_WRITE), + skiplist_engine.cf_handle(CF_DEFAULT), + ) + }; + + put_data(b"key1", b"value1", 10, 11, 10, false, &default, &write); + put_data(b"key2", b"value21", 10, 11, 12, false, &default, &write); + put_data(b"key2", b"value22", 15, 16, 14, false, &default, &write); + put_data(b"key2", b"value23", 20, 21, 16, false, &default, &write); + put_data(b"key3", b"value31", 5, 6, 18, false, &default, &write); + put_data(b"key3", b"value32", 10, 11, 20, false, &default, &write); + assert_eq!(6, element_count(&default)); + assert_eq!(6, element_count(&write)); + + let mut worker = GcRunner::new(engine.clone()); + let s1 = engine.snapshot(range.clone(), 10, u64::MAX); + let s2 = engine.snapshot(range.clone(), 11, u64::MAX); + let s3 = engine.snapshot(range.clone(), 20, u64::MAX); + + // nothing will be removed due to snapshot 5 + worker.gc_range(&range, 30); + assert_eq!(6, element_count(&default)); + assert_eq!(6, element_count(&write)); + + drop(s1); + worker.gc_range(&range, 30); + assert_eq!(5, element_count(&default)); + assert_eq!(5, element_count(&write)); + + drop(s2); + worker.gc_range(&range, 30); + assert_eq!(4, element_count(&default)); + assert_eq!(4, element_count(&write)); + + drop(s3); + worker.gc_range(&range, 30); + assert_eq!(3, element_count(&default)); + assert_eq!(3, element_count(&write)); + } +} diff --git a/components/region_cache_memory_engine/src/keys.rs b/components/region_cache_memory_engine/src/keys.rs index 9b0564594f7..ec412dafee2 100644 --- a/components/region_cache_memory_engine/src/keys.rs +++ b/components/region_cache_memory_engine/src/keys.rs @@ -6,6 +6,7 @@ use bytes::{BufMut, Bytes, BytesMut}; use engine_traits::CacheRange; use skiplist_rs::KeyComparator; use tikv_util::codec::number::NumberEncoder; +use txn_types::{Key, TimeStamp}; #[derive(Debug, Clone, Copy, PartialEq)] pub enum ValueType { @@ -30,12 +31,13 @@ impl TryFrom for ValueType { } pub struct InternalKey<'a> { + // key with mvcc version pub user_key: &'a [u8], pub v_type: ValueType, pub sequence: u64, } -const ENC_KEY_SEQ_LENGTH: usize = std::mem::size_of::(); +pub const ENC_KEY_SEQ_LENGTH: usize = std::mem::size_of::(); impl<'a> From<&'a [u8]> for InternalKey<'a> { fn from(encoded_key: &'a [u8]) -> Self { @@ -127,6 +129,17 @@ pub fn encode_key_for_eviction(range: &CacheRange) -> (Vec, Vec) { (encoded_start, encoded_end) } +#[inline] +pub fn encoding_for_filter(mvcc_prefix: &[u8], start_ts: TimeStamp) -> Vec { + let mut default_key = Vec::with_capacity(mvcc_prefix.len() + 2 * ENC_KEY_SEQ_LENGTH); + default_key.extend_from_slice(mvcc_prefix); + let mut default_key = Key::from_encoded(default_key) + .append_ts(start_ts) + .into_encoded(); + default_key.put_u64((u64::MAX << 8) | VALUE_TYPE_FOR_SEEK as u64); + default_key +} + #[derive(Default, Debug, Clone, Copy)] pub struct InternalKeyComparator {} diff --git a/components/region_cache_memory_engine/src/lib.rs b/components/region_cache_memory_engine/src/lib.rs index 2bf35f96bfa..99f4d0bc0fb 100644 --- a/components/region_cache_memory_engine/src/lib.rs +++ b/components/region_cache_memory_engine/src/lib.rs @@ -6,8 +6,10 @@ #![feature(slice_pattern)] mod engine; +mod gc; pub mod keys; pub use engine::RangeCacheMemoryEngine; pub mod range_manager; mod write_batch; pub use write_batch::RangeCacheWriteBatch; +mod memory_limiter; diff --git a/components/region_cache_memory_engine/src/memory_limiter.rs b/components/region_cache_memory_engine/src/memory_limiter.rs new file mode 100644 index 00000000000..245c7c5432f --- /dev/null +++ b/components/region_cache_memory_engine/src/memory_limiter.rs @@ -0,0 +1,47 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::{Arc, Mutex}; + +use collections::{HashMap, HashSet}; +use skiplist_rs::{AllocationRecorder, MemoryLimiter, Node}; + +// todo: implement a real memory limiter. Now, it is used for test. +#[derive(Clone, Default)] +pub struct GlobalMemoryLimiter { + pub(crate) recorder: Arc>>, + pub(crate) removed: Arc>>>, +} + +impl MemoryLimiter for GlobalMemoryLimiter { + fn acquire(&self, n: usize) -> bool { + true + } + + fn mem_usage(&self) -> usize { + 0 + } + + fn reclaim(&self, n: usize) {} +} + +impl AllocationRecorder for GlobalMemoryLimiter { + fn alloc(&self, addr: usize, size: usize) { + let mut recorder = self.recorder.lock().unwrap(); + assert!(!recorder.contains_key(&addr)); + recorder.insert(addr, size); + } + + fn free(&self, addr: usize, size: usize) { + let node = addr as *mut Node; + let mut removed = self.removed.lock().unwrap(); + removed.insert(unsafe { (*node).key().to_vec() }); + let mut recorder = self.recorder.lock().unwrap(); + assert_eq!(recorder.remove(&addr).unwrap(), size); + } +} + +impl Drop for GlobalMemoryLimiter { + fn drop(&mut self) { + assert!(self.recorder.lock().unwrap().is_empty()); + } +} diff --git a/components/region_cache_memory_engine/src/range_manager.rs b/components/region_cache_memory_engine/src/range_manager.rs index 2fda42c35af..78fb8c3a2da 100644 --- a/components/region_cache_memory_engine/src/range_manager.rs +++ b/components/region_cache_memory_engine/src/range_manager.rs @@ -24,6 +24,15 @@ impl RangeMeta { } } + pub(crate) fn safe_point(&self) -> u64 { + self.safe_point + } + + pub(crate) fn set_safe_point(&mut self, safe_point: u64) { + assert!(self.safe_point <= safe_point); + self.safe_point = safe_point; + } + fn derive_from(id: u64, r: &RangeMeta) -> Self { Self { id, @@ -88,6 +97,10 @@ impl RangeManager { meta.can_read = set_readable; } + pub fn mut_range_meta(&mut self, range: &CacheRange) -> Option<&mut RangeMeta> { + self.ranges.get_mut(range) + } + pub fn set_safe_ts(&mut self, range: &CacheRange, safe_ts: u64) -> bool { if let Some(meta) = self.ranges.get_mut(range) { if meta.safe_point > safe_ts { From 43d0e061159100092d89147480a94b51b6a158b4 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Thu, 25 Jan 2024 21:48:21 +0800 Subject: [PATCH 1134/1149] Cargo: fix cargo vendor by upgrading encoding_rs (#16446) close tikv/tikv#16445 Signed-off-by: Neil Shen Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8550a1dc3cc..c9453b02862 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1435,16 +1435,16 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "encoding_rs" version = "0.8.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a74ea89a0a1b98f6332de42c95baff457ada66d1cb4030f9ff151b2041a1c746" +source = "git+https://github.com/tikv/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c#68e0bc5a72a37a78228d80cd98047326559cf43c" dependencies = [ "cfg-if 1.0.0", ] [[package]] name = "encoding_rs" -version = "0.8.29" -source = "git+https://github.com/tikv/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c#68e0bc5a72a37a78228d80cd98047326559cf43c" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" dependencies = [ "cfg-if 1.0.0", ] @@ -4556,7 +4556,7 @@ checksum = "0460542b551950620a3648c6aa23318ac6b3cd779114bd873209e6e8b5eb1c34" dependencies = [ "base64 0.13.0", "bytes", - "encoding_rs 0.8.29 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.33", "futures-core", "futures-util", "http", @@ -6229,7 +6229,7 @@ dependencies = [ "codec", "collections", "crc32fast", - "encoding_rs 0.8.29 (git+https://github.com/tikv/encoding_rs.git?rev=68e0bc5a72a37a78228d80cd98047326559cf43c)", + "encoding_rs 0.8.29", "error_code", "hex 0.4.2", "kvproto", From 550ecc39be8cb22df4cbf4731e5fde8e365d51ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8C=E6=89=8B=E6=8E=89=E5=8C=85=E5=B7=A5=E7=A8=8B?= =?UTF-8?q?=E5=B8=88?= Date: Tue, 30 Jan 2024 17:39:23 +0800 Subject: [PATCH 1135/1149] statistics: use std from trait (#16464) ref tikv/tikv#16463 Use the standard from and into traits. Signed-off-by: hi-rustin --- src/coprocessor/statistics/analyze.rs | 78 ++++++++++++++----------- src/coprocessor/statistics/cmsketch.rs | 8 ++- src/coprocessor/statistics/fmsketch.rs | 18 +++--- src/coprocessor/statistics/histogram.rs | 44 +++++++------- 4 files changed, 83 insertions(+), 65 deletions(-) diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index a49ac72398e..3935bc01d62 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -93,7 +93,7 @@ impl AnalyzeContext { let (col_res, _) = builder.collect_columns_stats().await?; let res_data = { - let res = col_res.into_proto(); + let res: tipb::AnalyzeColumnsResp = col_res.into(); box_try!(res.write_to_bytes()) }; Ok(res_data) @@ -103,13 +103,13 @@ impl AnalyzeContext { let (col_res, idx_res) = builder.collect_columns_stats().await?; let res_data = { - let resp = AnalyzeMixedResult::new( + let resp: tipb::AnalyzeMixedResp = AnalyzeMixedResult::new( col_res, idx_res.ok_or_else(|| { Error::Other("Mixed analyze type should have index response.".into()) })?, ) - .into_proto(); + .into(); box_try!(resp.write_to_bytes()) }; Ok(res_data) @@ -118,7 +118,7 @@ impl AnalyzeContext { async fn handle_full_sampling(builder: &mut RowSampleBuilder) -> Result> { let sample_res = builder.collect_column_stats().await?; let res_data = { - let res = sample_res.into_proto(); + let res: tipb::AnalyzeColumnsResp = sample_res.into(); box_try!(res.write_to_bytes()) }; Ok(res_data) @@ -208,7 +208,7 @@ impl AnalyzeContext { } } - let res = AnalyzeIndexResult::new(hist, cms, Some(fms)).into_proto(); + let res: tipb::AnalyzeIndexResp = AnalyzeIndexResult::new(hist, cms, Some(fms)).into(); let dt = box_try!(res.write_to_bytes()); Ok(dt) } @@ -597,7 +597,7 @@ impl BaseRowSampleCollector { proto_collector.set_count(self.count as i64); let pb_fm_sketches = mem::take(&mut self.fm_sketches) .into_iter() - .map(|fm_sketch| fm_sketch.into_proto()) + .map(|fm_sketch| fm_sketch.into()) .collect(); proto_collector.set_fm_sketch(pb_fm_sketches); proto_collector.set_total_size(self.total_sizes.clone()); @@ -1068,19 +1068,6 @@ impl SampleCollector { } } - fn into_proto(self) -> tipb::SampleCollector { - let mut s = tipb::SampleCollector::default(); - s.set_null_count(self.null_count as i64); - s.set_count(self.count as i64); - s.set_fm_sketch(self.fm_sketch.into_proto()); - s.set_samples(self.samples.into()); - if let Some(c) = self.cm_sketch { - s.set_cm_sketch(c.into_proto()) - } - s.set_total_size(self.total_size as i64); - s - } - pub fn collect(&mut self, data: Vec) { if data[0] == NIL_FLAG { self.null_count += 1; @@ -1105,6 +1092,21 @@ impl SampleCollector { } } +impl From for tipb::SampleCollector { + fn from(collector: SampleCollector) -> tipb::SampleCollector { + let mut s = tipb::SampleCollector::default(); + s.set_null_count(collector.null_count as i64); + s.set_count(collector.count as i64); + s.set_fm_sketch(collector.fm_sketch.into()); + s.set_samples(collector.samples.into()); + if let Some(c) = collector.cm_sketch { + s.set_cm_sketch(c.into()) + } + s.set_total_size(collector.total_size as i64); + s + } +} + struct AnalyzeSamplingResult { row_sample_collector: Box, } @@ -1115,9 +1117,11 @@ impl AnalyzeSamplingResult { row_sample_collector, } } +} - fn into_proto(mut self) -> tipb::AnalyzeColumnsResp { - let pb_collector = self.row_sample_collector.to_proto(); +impl From for tipb::AnalyzeColumnsResp { + fn from(mut result: AnalyzeSamplingResult) -> tipb::AnalyzeColumnsResp { + let pb_collector = result.row_sample_collector.to_proto(); let mut res = tipb::AnalyzeColumnsResp::default(); res.set_row_collector(pb_collector); res @@ -1144,13 +1148,15 @@ impl AnalyzeColumnsResult { pk_hist, } } +} - fn into_proto(self) -> tipb::AnalyzeColumnsResp { - let hist = self.pk_hist.into_proto(); - let cols: Vec = self +impl From for tipb::AnalyzeColumnsResp { + fn from(result: AnalyzeColumnsResult) -> tipb::AnalyzeColumnsResp { + let hist = result.pk_hist.into(); + let cols: Vec = result .sample_collectors .into_iter() - .map(|col| col.into_proto()) + .map(|col| col.into()) .collect(); let mut res = tipb::AnalyzeColumnsResp::default(); res.set_collectors(cols.into()); @@ -1171,16 +1177,18 @@ impl AnalyzeIndexResult { fn new(hist: Histogram, cms: Option, fms: Option) -> AnalyzeIndexResult { AnalyzeIndexResult { hist, cms, fms } } +} - fn into_proto(self) -> tipb::AnalyzeIndexResp { +impl From for tipb::AnalyzeIndexResp { + fn from(result: AnalyzeIndexResult) -> tipb::AnalyzeIndexResp { let mut res = tipb::AnalyzeIndexResp::default(); - res.set_hist(self.hist.into_proto()); - if let Some(c) = self.cms { - res.set_cms(c.into_proto()); + res.set_hist(result.hist.into()); + if let Some(c) = result.cms { + res.set_cms(c.into()); } - if let Some(f) = self.fms { + if let Some(f) = result.fms { let mut s = tipb::SampleCollector::default(); - s.set_fm_sketch(f.into_proto()); + s.set_fm_sketch(f.into()); res.set_collector(s); } res @@ -1198,11 +1206,13 @@ impl AnalyzeMixedResult { fn new(col_res: AnalyzeColumnsResult, idx_res: AnalyzeIndexResult) -> AnalyzeMixedResult { AnalyzeMixedResult { col_res, idx_res } } +} - fn into_proto(self) -> tipb::AnalyzeMixedResp { +impl From for tipb::AnalyzeMixedResp { + fn from(result: AnalyzeMixedResult) -> tipb::AnalyzeMixedResp { let mut res = tipb::AnalyzeMixedResp::default(); - res.set_index_resp(self.idx_res.into_proto()); - res.set_columns_resp(self.col_res.into_proto()); + res.set_index_resp(result.idx_res.into()); + res.set_columns_resp(result.col_res.into()); res } } diff --git a/src/coprocessor/statistics/cmsketch.rs b/src/coprocessor/statistics/cmsketch.rs index 754a05b0bb2..2663df4e4b9 100644 --- a/src/coprocessor/statistics/cmsketch.rs +++ b/src/coprocessor/statistics/cmsketch.rs @@ -58,10 +58,12 @@ impl CmSketch { pub fn push_to_top_n(&mut self, b: Vec, cnt: u64) { self.top_n.push((b, cnt)) } +} - pub fn into_proto(self) -> tipb::CmSketch { +impl From for tipb::CmSketch { + fn from(cm: CmSketch) -> tipb::CmSketch { let mut proto = tipb::CmSketch::default(); - let rows = self + let rows = cm .table .into_iter() .map(|row| { @@ -71,7 +73,7 @@ impl CmSketch { }) .collect(); proto.set_rows(rows); - let top_n_data = self + let top_n_data = cm .top_n .into_iter() .map(|(item, cnt)| { diff --git a/src/coprocessor/statistics/fmsketch.rs b/src/coprocessor/statistics/fmsketch.rs index 341223215f3..0418183b367 100644 --- a/src/coprocessor/statistics/fmsketch.rs +++ b/src/coprocessor/statistics/fmsketch.rs @@ -27,14 +27,6 @@ impl FmSketch { self.insert_hash_value(hash); } - pub fn into_proto(self) -> tipb::FmSketch { - let mut proto = tipb::FmSketch::default(); - proto.set_mask(self.mask); - let hash = self.hash_set.into_iter().collect(); - proto.set_hashset(hash); - proto - } - pub fn insert_hash_value(&mut self, hash_val: u64) { if (hash_val & self.mask) != 0 { return; @@ -48,6 +40,16 @@ impl FmSketch { } } +impl From for tipb::FmSketch { + fn from(fm: FmSketch) -> tipb::FmSketch { + let mut proto = tipb::FmSketch::default(); + proto.set_mask(fm.mask); + let hash = fm.hash_set.into_iter().collect(); + proto.set_hashset(hash); + proto + } +} + #[cfg(test)] mod tests { use std::{iter::repeat, slice::from_ref}; diff --git a/src/coprocessor/statistics/histogram.rs b/src/coprocessor/statistics/histogram.rs index b7a70600e39..f499cfbc3ee 100644 --- a/src/coprocessor/statistics/histogram.rs +++ b/src/coprocessor/statistics/histogram.rs @@ -48,15 +48,17 @@ impl Bucket { self.ndv += 1; } } +} - fn into_proto(self) -> tipb::Bucket { - let mut bucket = tipb::Bucket::default(); - bucket.set_repeats(self.repeats as i64); - bucket.set_count(self.count as i64); - bucket.set_lower_bound(self.lower_bound); - bucket.set_upper_bound(self.upper_bound); - bucket.set_ndv(self.ndv as i64); - bucket +impl From for tipb::Bucket { + fn from(bucket: Bucket) -> tipb::Bucket { + let mut b = tipb::Bucket::default(); + b.set_repeats(bucket.repeats as i64); + b.set_count(bucket.count as i64); + b.set_lower_bound(bucket.lower_bound); + b.set_upper_bound(bucket.upper_bound); + b.set_ndv(bucket.ndv as i64); + b } } @@ -82,18 +84,6 @@ impl Histogram { } } - pub fn into_proto(self) -> tipb::Histogram { - let mut hist = tipb::Histogram::default(); - hist.set_ndv(self.ndv as i64); - let buckets: Vec = self - .buckets - .into_iter() - .map(|bucket| bucket.into_proto()) - .collect(); - hist.set_buckets(buckets.into()); - hist - } - // insert a data bigger than or equal to the max value in current histogram. pub fn append(&mut self, data: &[u8], with_bucket_ndv: bool) { if let Some(bucket) = self.buckets.last_mut() { @@ -173,6 +163,20 @@ impl Histogram { } } +impl From for tipb::Histogram { + fn from(hist: Histogram) -> tipb::Histogram { + let mut h = tipb::Histogram::default(); + h.set_ndv(hist.ndv as i64); + let buckets: Vec = hist + .buckets + .into_iter() + .map(|bucket| bucket.into()) + .collect(); + h.set_buckets(buckets.into()); + h + } +} + #[cfg(test)] mod tests { use std::iter::repeat; From b67dd09c4f00dc98bce91ed64eef3496209cf359 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Tue, 30 Jan 2024 21:10:53 -0800 Subject: [PATCH 1136/1149] raftstore: improve the remove peer check (#16467) close tikv/tikv#16465 improve the remove peer check. Only check when the updating role is voter Signed-off-by: tonyxuqqi --- components/raftstore/src/store/util.rs | 69 ++++++++++++++++++-------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 68225a982b3..cee7691875d 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1094,13 +1094,7 @@ pub fn check_conf_change( return Err(box_err!("multiple changes that only effect learner")); } - check_remove_or_demote_voter( - region.get_id(), - cfg, - change_peers, - leader.get_id(), - peer_heartbeat, - )?; + check_remove_or_demote_voter(region, cfg, change_peers, leader.get_id(), peer_heartbeat)?; if !ignore_safety { let promoted_commit_index = after_progress.maximal_committed_index().0; let first_index = node.raft.raft_log.first_index(); @@ -1130,7 +1124,7 @@ pub fn check_conf_change( } fn check_remove_or_demote_voter( - region_id: u64, + region: &metapb::Region, cfg: &Config, change_peers: &[ChangePeerRequest], leader_id: u64, @@ -1139,16 +1133,24 @@ fn check_remove_or_demote_voter( let mut slow_peer_count = 0; let mut normal_peer_count = 0; // Here we assume if the last beartbeat is within 2 election timeout, the peer - // is healthy. This is to be tolerant to some slightly slow peers when - // the leader is in hibernate mode. + // is healthy. When a region is hibernate, we expect all its peers are *slow* + // and it would still allow the operation let slow_peer_threshold = 2 * cfg.raft_base_tick_interval.0 * cfg.raft_max_election_timeout_ticks as u32; for (id, last_heartbeat) in peer_heartbeat { - // leader itself is not a slow peer - if *id == leader_id || last_heartbeat.elapsed() <= slow_peer_threshold { - normal_peer_count += 1; - } else { - slow_peer_count += 1; + // for slow and normal peer calculation, we only count voter role + if region + .get_peers() + .iter() + .find(|p| p.get_id() == *id) + .map_or(false, |p| p.role == PeerRole::Voter) + { + // leader itself is not a slow peer + if *id == leader_id || last_heartbeat.elapsed() <= slow_peer_threshold { + normal_peer_count += 1; + } else { + slow_peer_count += 1; + } } } @@ -1158,10 +1160,16 @@ fn check_remove_or_demote_voter( if change_type == ConfChangeType::RemoveNode || change_type == ConfChangeType::AddLearnerNode { + let is_voter = region + .get_peers() + .iter() + .find(|p| p.get_id() == peer.get_id()) + .map_or(false, |p| p.role == PeerRole::Voter); + // If the change_type is AddLearnerNode and the last heartbeat is found, it // means it's a demote from voter as AddLearnerNode on existing learner node is // not allowed. - if let Some(last_heartbeat) = peer_heartbeat.get(&peer.get_id()) { + if is_voter && let Some(last_heartbeat) = peer_heartbeat.get(&peer.get_id()) { // peer itself is *not* slow peer, but current slow peer is >= total peers/2 if last_heartbeat.elapsed() <= slow_peer_threshold { normal_peer_count -= 1; @@ -1182,7 +1190,7 @@ fn check_remove_or_demote_voter( { return Err(box_err!( "Ignore conf change command on region {} because RemoveNode or Demote a voter on peers {:?} may lead to unavailability. There're {} slow peers and {} normal peers", - region_id, + region.get_id(), &normal_peers_to_remove, slow_peer_count, normal_peer_count @@ -2603,6 +2611,13 @@ mod tests { }, ]; + let mut region = Region::default(); + for i in 1..4 { + region.mut_peers().push(metapb::Peer { + id: i, + ..Default::default() + }); + } for i in 0..change_peers.len() { // Call the function under test and assert that the function returns failed let mut cp = vec![change_peers[i].clone()]; @@ -2620,7 +2635,7 @@ mod tests { std::time::Instant::now() - std::time::Duration::from_secs(1), ); // Call the function under test and assert that the function returns Ok - check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); // now make one peer slow if let Some(peer_heartbeat) = peer_heartbeat.get_mut(&3) { @@ -2628,7 +2643,7 @@ mod tests { } // Call the function under test - let result = check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat); + let result = check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat); // Assert that the function returns failed assert!(result.is_err()); @@ -2639,7 +2654,19 @@ mod tests { }) .into(); // Call the function under test - check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + + // make peer to learner and remove the peer 2 + region.mut_peers()[1].set_role(metapb::PeerRole::Learner); + cp[0].peer = Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(); + // Call the function under test + check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + // set peer 2 voter again + region.mut_peers()[1].set_role(metapb::PeerRole::Voter); // there's no remove node, it's fine with slow peers. cp[0] = ChangePeerRequest { @@ -2652,7 +2679,7 @@ mod tests { ..Default::default() }; // Call the function under test - check_remove_or_demote_voter(1, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); } } } From 87d9a97185e21cc4891ee7d977ff8b1d60803bad Mon Sep 17 00:00:00 2001 From: Alex Feinberg Date: Tue, 30 Jan 2024 21:27:23 -0800 Subject: [PATCH 1137/1149] In-Memory Engine: WriteBatch with Skiplist Engine (#16433) ref tikv/tikv#16323 Update WriteBatch to assume a single skiplist and use RangeManager::contains. Implement and test `get_value_cf_opt` for `HybridEngineSnapshot`. Integrate single WriteBatch with HybridEngine. Signed-off-by: Alex Feinberg Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 1 + components/hybrid_engine/Cargo.toml | 1 + components/hybrid_engine/src/engine.rs | 41 ++++-- components/hybrid_engine/src/lib.rs | 1 + components/hybrid_engine/src/snapshot.rs | 59 +++++++- components/hybrid_engine/src/util.rs | 46 ++++++ components/hybrid_engine/src/write_batch.rs | 73 ++++++--- components/raftstore/src/store/worker/read.rs | 19 ++- .../region_cache_memory_engine/Cargo.toml | 1 + .../region_cache_memory_engine/src/engine.rs | 49 ++++--- .../region_cache_memory_engine/src/gc.rs | 6 +- .../src/write_batch.rs | 138 ++++++++++-------- 12 files changed, 302 insertions(+), 133 deletions(-) create mode 100644 components/hybrid_engine/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index c9453b02862..b49b15805ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4529,6 +4529,7 @@ version = "0.0.1" dependencies = [ "bytes", "collections", + "crossbeam", "engine_rocks", "engine_traits", "log_wrappers", diff --git a/components/hybrid_engine/Cargo.toml b/components/hybrid_engine/Cargo.toml index c83b6bd48d2..95bb090666e 100644 --- a/components/hybrid_engine/Cargo.toml +++ b/components/hybrid_engine/Cargo.toml @@ -14,6 +14,7 @@ txn_types = { workspace = true } tikv_util = { workspace = true } engine_rocks = { workspace = true } region_cache_memory_engine = { workspace = true } +tempfile = "3.0" [dev-dependencies] tempfile = "3.0" diff --git a/components/hybrid_engine/src/engine.rs b/components/hybrid_engine/src/engine.rs index 3759554d49f..ccfa141a40c 100644 --- a/components/hybrid_engine/src/engine.rs +++ b/components/hybrid_engine/src/engine.rs @@ -1,8 +1,8 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. use engine_traits::{ - KvEngine, Peekable, RangeCacheEngine, ReadOptions, Result, SnapshotContext, SnapshotMiscExt, - SyncMutable, WriteBatchExt, + KvEngine, Mutable, Peekable, RangeCacheEngine, ReadOptions, Result, SnapshotContext, + SnapshotMiscExt, SyncMutable, WriteBatch, WriteBatchExt, }; use crate::snapshot::HybridEngineSnapshot; @@ -122,29 +122,48 @@ impl SyncMutable for HybridEngine where EK: KvEngine, EC: RangeCacheEngine, + HybridEngine: WriteBatchExt, { fn put(&self, key: &[u8], value: &[u8]) -> Result<()> { - unimplemented!() + let mut batch = self.write_batch(); + batch.put(key, value)?; + let _ = batch.write()?; + Ok(()) } fn put_cf(&self, cf: &str, key: &[u8], value: &[u8]) -> Result<()> { - unimplemented!() + let mut batch = self.write_batch(); + batch.put_cf(cf, key, value)?; + let _ = batch.write()?; + Ok(()) } fn delete(&self, key: &[u8]) -> Result<()> { - unimplemented!() + let mut batch = self.write_batch(); + batch.delete(key)?; + let _ = batch.write()?; + Ok(()) } fn delete_cf(&self, cf: &str, key: &[u8]) -> Result<()> { - unimplemented!() + let mut batch = self.write_batch(); + batch.delete_cf(cf, key)?; + let _ = batch.write()?; + Ok(()) } fn delete_range(&self, begin_key: &[u8], end_key: &[u8]) -> Result<()> { - unimplemented!() + let mut batch = self.write_batch(); + batch.delete_range(begin_key, end_key)?; + let _ = batch.write()?; + Ok(()) } fn delete_range_cf(&self, cf: &str, begin_key: &[u8], end_key: &[u8]) -> Result<()> { - unimplemented!() + let mut batch = self.write_batch(); + batch.delete_range_cf(cf, begin_key, end_key)?; + let _ = batch.write()?; + Ok(()) } } @@ -171,7 +190,7 @@ mod tests { let range = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); memory_engine.new_range(range.clone()); { - let mut core = memory_engine.core().lock().unwrap(); + let mut core = memory_engine.core().write().unwrap(); core.mut_range_manager().set_range_readable(&range, true); core.mut_range_manager().set_safe_ts(&range, 10); } @@ -188,14 +207,14 @@ mod tests { assert!(s.region_cache_snapshot_available()); { - let mut core = memory_engine.core().lock().unwrap(); + let mut core = memory_engine.core().write().unwrap(); core.mut_range_manager().set_range_readable(&range, false); } let s = hybrid_engine.snapshot(Some(snap_ctx.clone())); assert!(!s.region_cache_snapshot_available()); { - let mut core = memory_engine.core().lock().unwrap(); + let mut core = memory_engine.core().write().unwrap(); core.mut_range_manager().set_range_readable(&range, true); } snap_ctx.read_ts = 5; diff --git a/components/hybrid_engine/src/lib.rs b/components/hybrid_engine/src/lib.rs index 0778412a2c9..4212b5aac90 100644 --- a/components/hybrid_engine/src/lib.rs +++ b/components/hybrid_engine/src/lib.rs @@ -21,6 +21,7 @@ mod snapshot; mod sst; mod table_properties; mod ttl_properties; +pub mod util; mod write_batch; pub use engine::HybridEngine; diff --git a/components/hybrid_engine/src/snapshot.rs b/components/hybrid_engine/src/snapshot.rs index d30334aad84..7e8809b34e6 100644 --- a/components/hybrid_engine/src/snapshot.rs +++ b/components/hybrid_engine/src/snapshot.rs @@ -1,10 +1,13 @@ // Copyright 2023 TiKV Project Authors. Licensed under Apache-2.0. -use std::fmt::{self, Debug, Formatter}; +use std::{ + fmt::{self, Debug, Formatter}, + ops::Deref, +}; use engine_traits::{ - CfNamesExt, IterOptions, Iterable, KvEngine, Peekable, RangeCacheEngine, ReadOptions, Result, - Snapshot, SnapshotMiscExt, + CfNamesExt, DbVector, IterOptions, Iterable, KvEngine, Peekable, RangeCacheEngine, ReadOptions, + Result, Snapshot, SnapshotMiscExt, CF_DEFAULT, }; use crate::engine_iterator::HybridEngineIterator; @@ -33,6 +36,14 @@ where pub fn region_cache_snapshot_available(&self) -> bool { self.region_cache_snap.is_some() } + + pub fn region_cache_snap(&self) -> Option<&EC::Snapshot> { + self.region_cache_snap.as_ref() + } + + pub fn disk_snap(&self) -> &EK::Snapshot { + &self.disk_snap + } } impl Snapshot for HybridEngineSnapshot @@ -64,15 +75,40 @@ where } } +/// TODO: May be possible to replace this with an Either. +pub struct HybridDbVector(Box); + +impl DbVector for HybridDbVector {} + +impl Deref for HybridDbVector { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.0 + } +} + +impl Debug for HybridDbVector { + fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + write!(formatter, "{:?}", &**self) + } +} + +impl<'a> PartialEq<&'a [u8]> for HybridDbVector { + fn eq(&self, rhs: &&[u8]) -> bool { + **rhs == **self + } +} + impl Peekable for HybridEngineSnapshot where EK: KvEngine, EC: RangeCacheEngine, { - type DbVector = EK::DbVector; + type DbVector = HybridDbVector; fn get_value_opt(&self, opts: &ReadOptions, key: &[u8]) -> Result> { - unimplemented!() + self.get_value_cf_opt(opts, CF_DEFAULT, key) } fn get_value_cf_opt( @@ -81,7 +117,18 @@ where cf: &str, key: &[u8], ) -> Result> { - unimplemented!() + self.region_cache_snap.as_ref().map_or_else( + || { + self.disk_snap + .get_value_cf_opt(opts, cf, key) + .map(|r| r.map(|e| HybridDbVector(Box::new(e)))) + }, + |cache_snapshot| { + cache_snapshot + .get_value_cf_opt(opts, cf, key) + .map(|r| r.map(|e| HybridDbVector(Box::new(e)))) + }, + ) } } diff --git a/components/hybrid_engine/src/util.rs b/components/hybrid_engine/src/util.rs new file mode 100644 index 00000000000..f539dccba75 --- /dev/null +++ b/components/hybrid_engine/src/util.rs @@ -0,0 +1,46 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::sync::Arc; + +use engine_rocks::{util::new_engine, RocksEngine}; +use engine_traits::{Result, CF_DEFAULT, CF_LOCK, CF_WRITE}; +use region_cache_memory_engine::RangeCacheMemoryEngine; +use tempfile::{Builder, TempDir}; + +use crate::HybridEngine; + +/// Create a [`HybridEngine`] using temporary storage in `prefix`. +/// Once the memory engine is created, runs `configure_memory_engine_fn`. +/// Returns the handle to temporary directory and HybridEngine. +/// # Example +/// +/// ``` +/// use hybrid_engine::util::hybrid_engine_for_tests; +/// let (_path, _hybrid_engine) = hybrid_engine_for_tests("temp", |memory_engine| { +/// let range = engine_traits::CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); +/// memory_engine.new_range(range.clone()); +/// { +/// let mut core = memory_engine.core().write().unwrap(); +/// core.mut_range_manager().set_range_readable(&range, true); +/// core.mut_range_manager().set_safe_ts(&range, 10); +/// } +/// }) +/// .unwrap(); +/// ``` +pub fn hybrid_engine_for_tests( + prefix: &str, + configure_memory_engine_fn: F, +) -> Result<(TempDir, HybridEngine)> +where + F: FnOnce(&RangeCacheMemoryEngine), +{ + let path = Builder::new().prefix(prefix).tempdir()?; + let disk_engine = new_engine( + path.path().to_str().unwrap(), + &[CF_DEFAULT, CF_LOCK, CF_WRITE], + )?; + let memory_engine = RangeCacheMemoryEngine::new(Arc::default()); + configure_memory_engine_fn(&memory_engine); + let hybrid_engine = HybridEngine::new(disk_engine, memory_engine); + Ok((path, hybrid_engine)) +} diff --git a/components/hybrid_engine/src/write_batch.rs b/components/hybrid_engine/src/write_batch.rs index 054e6d116d8..6857b01e38a 100644 --- a/components/hybrid_engine/src/write_batch.rs +++ b/components/hybrid_engine/src/write_batch.rs @@ -124,34 +124,63 @@ impl Mutable for HybridEngineWriteBatch { #[cfg(test)] mod tests { - use std::sync::Arc; + use engine_traits::{ + CacheRange, KvEngine, Mutable, Peekable, SnapshotContext, WriteBatch, WriteBatchExt, + }; - use engine_rocks::util::new_engine; - use engine_traits::{CacheRange, WriteBatchExt, CF_DEFAULT, CF_LOCK, CF_WRITE}; - use region_cache_memory_engine::RangeCacheMemoryEngine; - use tempfile::Builder; + use crate::util::hybrid_engine_for_tests; - use crate::HybridEngine; + #[test] + fn test_write_to_both_engines() { + let range = CacheRange::new(b"".to_vec(), b"z".to_vec()); + let range_clone = range.clone(); + let (_path, hybrid_engine) = hybrid_engine_for_tests("temp", move |memory_engine| { + memory_engine.new_range(range_clone.clone()); + { + let mut core = memory_engine.core().write().unwrap(); + core.mut_range_manager() + .set_range_readable(&range_clone, true); + core.mut_range_manager().set_safe_ts(&range_clone, 5); + } + }) + .unwrap(); + let mut write_batch = hybrid_engine.write_batch(); + write_batch.put(b"hello", b"world").unwrap(); + let seq = write_batch.write().unwrap(); + assert!(seq > 0); + let actual: &[u8] = &hybrid_engine.get_value(b"hello").unwrap().unwrap(); + assert_eq!(b"world", &actual); + let ctx = SnapshotContext { + range: Some(range.clone()), + read_ts: 10, + }; + let snap = hybrid_engine.snapshot(Some(ctx)); + let actual: &[u8] = &snap.get_value(b"hello").unwrap().unwrap(); + assert_eq!(b"world", &actual); + let actual: &[u8] = &snap.disk_snap().get_value(b"hello").unwrap().unwrap(); + assert_eq!(b"world", &actual); + let actual: &[u8] = &snap + .region_cache_snap() + .unwrap() + .get_value(b"hello") + .unwrap() + .unwrap(); + assert_eq!(b"world", &actual); + } #[test] - fn test_region_cache_memory_engine() { - let path = Builder::new().prefix("temp").tempdir().unwrap(); - let disk_engine = new_engine( - path.path().to_str().unwrap(), - &[CF_DEFAULT, CF_LOCK, CF_WRITE], - ) + fn test_range_cache_memory_engine() { + let (_path, hybrid_engine) = hybrid_engine_for_tests("temp", |memory_engine| { + let range = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); + memory_engine.new_range(range.clone()); + { + let mut core = memory_engine.core().write().unwrap(); + core.mut_range_manager().set_range_readable(&range, true); + core.mut_range_manager().set_safe_ts(&range, 10); + } + }) .unwrap(); - let memory_engine = RangeCacheMemoryEngine::new(Arc::default()); - let range = CacheRange::new(b"k00".to_vec(), b"k10".to_vec()); - memory_engine.new_range(range.clone()); - { - let mut core = memory_engine.core().lock().unwrap(); - core.mut_range_manager().set_range_readable(&range, true); - core.mut_range_manager().set_safe_ts(&range, 10); - } - let hybrid_engine = - HybridEngine::<_, RangeCacheMemoryEngine>::new(disk_engine, memory_engine.clone()); let mut write_batch = hybrid_engine.write_batch(); write_batch .cache_write_batch diff --git a/components/raftstore/src/store/worker/read.rs b/components/raftstore/src/store/worker/read.rs index b760435f22e..304d420bb68 100644 --- a/components/raftstore/src/store/worker/read.rs +++ b/components/raftstore/src/store/worker/read.rs @@ -1294,6 +1294,7 @@ mod tests { use engine_test::kv::{KvTestEngine, KvTestSnapshot}; use engine_traits::{CacheRange, MiscExt, Peekable, SyncMutable, ALL_CFS}; use hybrid_engine::{HybridEngine, HybridEngineSnapshot}; + use keys::DATA_PREFIX; use kvproto::{metapb::RegionEpoch, raft_cmdpb::*}; use region_cache_memory_engine::RangeCacheMemoryEngine; use tempfile::{Builder, TempDir}; @@ -2533,6 +2534,15 @@ mod tests { }; let leader2 = prs[0].clone(); region1.set_region_epoch(epoch13.clone()); + let range = CacheRange::from_region(®ion1); + memory_engine.new_range(range.clone()); + { + let mut core = memory_engine.core().write().unwrap(); + core.mut_range_manager().set_range_readable(&range, true); + core.mut_range_manager().set_safe_ts(&range, 1); + } + let kv = (&[DATA_PREFIX, b'a'], b"b"); + reader.kv_engine.put(kv.0, kv.1).unwrap(); let term6 = 6; let mut lease = Lease::new(Duration::seconds(1), Duration::milliseconds(250)); // 1s is long enough. let read_progress = Arc::new(RegionReadProgress::new(®ion1, 1, 1, 1)); @@ -2574,10 +2584,8 @@ mod tests { let s = get_snapshot(None, &mut reader, cmd.clone(), &rx); assert!(!s.region_cache_snapshot_available()); - let range = CacheRange::from_region(®ion1); - memory_engine.new_range(range.clone()); { - let mut core = memory_engine.core().lock().unwrap(); + let mut core = memory_engine.core().write().unwrap(); core.mut_range_manager().set_range_readable(&range, true); core.mut_range_manager().set_safe_ts(&range, 10); } @@ -2589,16 +2597,17 @@ mod tests { let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); assert!(s.region_cache_snapshot_available()); + assert_eq!(s.get_value(kv.0).unwrap().unwrap(), kv.1); { - let mut core = memory_engine.core().lock().unwrap(); + let mut core = memory_engine.core().write().unwrap(); core.mut_range_manager().set_range_readable(&range, false); } let s = get_snapshot(Some(snap_ctx.clone()), &mut reader, cmd.clone(), &rx); assert!(!s.region_cache_snapshot_available()); { - let mut core = memory_engine.core().lock().unwrap(); + let mut core = memory_engine.core().write().unwrap(); core.mut_range_manager().set_range_readable(&range, true); } snap_ctx.read_ts = 5; diff --git a/components/region_cache_memory_engine/Cargo.toml b/components/region_cache_memory_engine/Cargo.toml index 33014798782..1ad885b7b49 100644 --- a/components/region_cache_memory_engine/Cargo.toml +++ b/components/region_cache_memory_engine/Cargo.toml @@ -13,6 +13,7 @@ engine_traits = { workspace = true } collections = { workspace = true } skiplist-rs = { git = "https://github.com/tikv/skiplist-rs.git", branch = "main" } bytes = "1.0" +crossbeam = "0.8" tikv_util = { workspace = true } txn_types = { workspace = true } log_wrappers = { workspace = true } diff --git a/components/region_cache_memory_engine/src/engine.rs b/components/region_cache_memory_engine/src/engine.rs index 48f5d95b8de..1e240a6dc9e 100644 --- a/components/region_cache_memory_engine/src/engine.rs +++ b/components/region_cache_memory_engine/src/engine.rs @@ -5,10 +5,11 @@ use std::{ collections::BTreeMap, fmt::{self, Debug}, ops::Deref, - sync::{Arc, Mutex}, + sync::Arc, }; use bytes::Bytes; +use crossbeam::sync::ShardedLock; use engine_rocks::{raw::SliceTransform, util::FixedSuffixSliceTransform}; use engine_traits::{ CacheRange, CfNamesExt, DbVector, Error, IterOptions, Iterable, Iterator, Peekable, @@ -183,7 +184,7 @@ impl RangeCacheMemoryEngineCore { /// cached region), we resort to using a the disk engine's snapshot instead. #[derive(Clone)] pub struct RangeCacheMemoryEngine { - pub(crate) core: Arc>, + pub(crate) core: Arc>, memory_limiter: Arc, } @@ -191,18 +192,18 @@ impl RangeCacheMemoryEngine { pub fn new(limiter: Arc) -> Self { let engine = RangeCacheMemoryEngineCore::new(limiter.clone()); Self { - core: Arc::new(Mutex::new(engine)), + core: Arc::new(ShardedLock::new(engine)), memory_limiter: limiter, } } pub fn new_range(&self, range: CacheRange) { - let mut core = self.core.lock().unwrap(); + let mut core = self.core.write().unwrap(); core.range_manager.new_range(range); } pub fn evict_range(&mut self, range: &CacheRange) { - let mut core = self.core.lock().unwrap(); + let mut core = self.core.write().unwrap(); if core.range_manager.evict_range(range) { core.engine.delete_range(range); } @@ -210,7 +211,7 @@ impl RangeCacheMemoryEngine { } impl RangeCacheMemoryEngine { - pub fn core(&self) -> &Arc> { + pub fn core(&self) -> &Arc> { &self.core } } @@ -553,7 +554,7 @@ impl RangeCacheSnapshot { read_ts: u64, seq_num: u64, ) -> Option { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); if let Some(range_id) = core.range_manager.range_snapshot(&range, read_ts) { return Some(RangeCacheSnapshot { snapshot_meta: RagneCacheSnapshotMeta::new(range_id, range, read_ts, seq_num), @@ -568,7 +569,7 @@ impl RangeCacheSnapshot { impl Drop for RangeCacheSnapshot { fn drop(&mut self) { - let mut core = self.engine.core.lock().unwrap(); + let mut core = self.engine.core.write().unwrap(); for range_removable in core .range_manager .remove_range_snapshot(&self.snapshot_meta) @@ -702,7 +703,7 @@ mod tests { engine.new_range(range.clone()); let verify_snapshot_count = |snapshot_ts, count| { - let core = engine.core.lock().unwrap(); + let core = engine.core.read().unwrap(); if count > 0 { assert_eq!( *core @@ -733,13 +734,13 @@ mod tests { assert!(engine.snapshot(range.clone(), 5, u64::MAX).is_none()); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); } let s1 = engine.snapshot(range.clone(), 5, u64::MAX).unwrap(); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); let t_range = CacheRange::new(b"k00".to_vec(), b"k02".to_vec()); assert!(!core.range_manager.set_safe_ts(&t_range, 5)); assert!(core.range_manager.set_safe_ts(&range, 5)); @@ -762,7 +763,7 @@ mod tests { verify_snapshot_count(10, 1); drop(s3); { - let core = engine.core.lock().unwrap(); + let core = engine.core.write().unwrap(); assert!( core.range_manager .ranges() @@ -898,7 +899,7 @@ mod tests { engine.new_range(range.clone()); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -978,7 +979,7 @@ mod tests { let step: i32 = 2; { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -1164,7 +1165,7 @@ mod tests { let step: i32 = 2; { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -1267,7 +1268,7 @@ mod tests { let step: i32 = 2; { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -1391,7 +1392,7 @@ mod tests { engine.new_range(range.clone()); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -1495,7 +1496,7 @@ mod tests { let engine = RangeCacheMemoryEngine::new(Arc::default()); engine.new_range(range.clone()); let sl = { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); core.engine.data[cf_to_id("write")].clone() @@ -1532,7 +1533,7 @@ mod tests { let engine = RangeCacheMemoryEngine::new(Arc::default()); engine.new_range(range.clone()); let sl = { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); core.engine.data[cf_to_id("write")].clone() @@ -1562,7 +1563,7 @@ mod tests { let engine = RangeCacheMemoryEngine::new(Arc::default()); engine.new_range(range.clone()); let sl = { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); core.engine.data[cf_to_id("write")].clone() @@ -1594,7 +1595,7 @@ mod tests { let engine = RangeCacheMemoryEngine::new(Arc::default()); engine.new_range(range.clone()); let sl = { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); core.engine.data[cf_to_id("write")].clone() @@ -1627,7 +1628,7 @@ mod tests { engine.new_range(range.clone()); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -1723,7 +1724,7 @@ mod tests { engine.new_range(range.clone()); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); @@ -1777,7 +1778,7 @@ mod tests { let evict_range = CacheRange::new(construct_user_key(10), construct_user_key(20)); engine.new_range(range.clone()); { - let mut core = engine.core.lock().unwrap(); + let mut core = engine.core.write().unwrap(); core.range_manager.set_range_readable(&range, true); core.range_manager.set_safe_ts(&range, 5); let sl = core.engine.data[cf_to_id("write")].clone(); diff --git a/components/region_cache_memory_engine/src/gc.rs b/components/region_cache_memory_engine/src/gc.rs index 2d81c4879c3..7f7d5f8da4b 100644 --- a/components/region_cache_memory_engine/src/gc.rs +++ b/components/region_cache_memory_engine/src/gc.rs @@ -61,7 +61,7 @@ impl GcRunner { fn gc_range(&mut self, range: &CacheRange, safe_point: u64) { let (skiplist_engine, safe_ts) = { - let mut core = self.memory_engine.core().lock().unwrap(); + let mut core = self.memory_engine.core().write().unwrap(); let Some(range_meta) = core.mut_range_manager().mut_range_meta(range) else { return; }; @@ -399,7 +399,7 @@ pub mod tests { let range = CacheRange::new(b"".to_vec(), b"z".to_vec()); engine.new_range(range.clone()); let (write, default) = { - let mut core = engine.core().lock().unwrap(); + let mut core = engine.core().write().unwrap(); let skiplist_engine = core.engine(); core.mut_range_manager().set_range_readable(&range, true); ( @@ -454,7 +454,7 @@ pub mod tests { let range = CacheRange::new(b"".to_vec(), b"z".to_vec()); engine.new_range(range.clone()); let (write, default) = { - let mut core = engine.core().lock().unwrap(); + let mut core = engine.core().write().unwrap(); let skiplist_engine = core.engine(); core.mut_range_manager().set_range_readable(&range, true); ( diff --git a/components/region_cache_memory_engine/src/write_batch.rs b/components/region_cache_memory_engine/src/write_batch.rs index 31cf844ea0f..5a73e6b28a0 100644 --- a/components/region_cache_memory_engine/src/write_batch.rs +++ b/components/region_cache_memory_engine/src/write_batch.rs @@ -5,49 +5,45 @@ use tikv_util::box_err; use crate::{ engine::{cf_to_id, SkiplistEngine}, keys::{encode_key, ValueType}, + range_manager::RangeManager, RangeCacheMemoryEngine, }; -/// Callback to apply an encoded entry to cache engine. -/// -/// Arguments: &str - cf name, Bytes - (encoded) key, Bytes - value. -/// -/// TODO: consider refactoring into a trait once RangeCacheMemoryEngine API -/// stabilizes. -type ApplyEncodedEntryCb = Box Result<()> + Send + Sync>; - -/// RangeCacheWriteBatch maintains its own in-memory buffer. pub struct RangeCacheWriteBatch { buffer: Vec, - apply_cb: ApplyEncodedEntryCb, - sequence_number: Option, + engine: RangeCacheMemoryEngine, save_points: Vec, + sequence_number: Option, } impl std::fmt::Debug for RangeCacheWriteBatch { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("RangeCacheWriteBatch") .field("buffer", &self.buffer) + .field("save_points", &self.save_points) + .field("sequence_number", &self.sequence_number) .finish() } } -impl RangeCacheWriteBatch { - pub fn new(apply_cb: ApplyEncodedEntryCb) -> Self { +impl From<&RangeCacheMemoryEngine> for RangeCacheWriteBatch { + fn from(engine: &RangeCacheMemoryEngine) -> Self { Self { buffer: Vec::new(), - apply_cb, - sequence_number: None, + engine: engine.clone(), save_points: Vec::new(), + sequence_number: None, } } +} - pub fn with_capacity(apply_cb: ApplyEncodedEntryCb, cap: usize) -> Self { +impl RangeCacheWriteBatch { + pub fn with_capacity(engine: &RangeCacheMemoryEngine, cap: usize) -> Self { Self { buffer: Vec::with_capacity(cap), - apply_cb, - sequence_number: None, + engine: engine.clone(), save_points: Vec::new(), + sequence_number: None, } } @@ -62,85 +58,91 @@ impl RangeCacheWriteBatch { } fn write_impl(&mut self, seq: u64) -> Result<()> { - self.buffer - .iter() - .map(|e| (e.cf.as_str(), e.encode(seq))) - .try_for_each(|(cf, (key, value))| (self.apply_cb)(cf, key, value)) + let (engine, filtered_keys) = { + let core = self.engine.core().read().unwrap(); + ( + core.engine().clone(), + self.buffer + .iter() + .filter(|&e| e.should_write_to_memory(core.range_manager())) + .collect::>(), + ) + }; + filtered_keys + .into_iter() + .try_for_each(|e| e.write_to_memory(&engine, seq)) } } #[derive(Clone, Debug)] -enum CacheWriteBatchEntryMutation { +enum WriteBatchEntryInternal { PutValue(Bytes), Deletion, } -impl CacheWriteBatchEntryMutation { +impl WriteBatchEntryInternal { fn encode(&self, key: &[u8], seq: u64) -> (Bytes, Bytes) { match self { - CacheWriteBatchEntryMutation::PutValue(value) => { + WriteBatchEntryInternal::PutValue(value) => { (encode_key(key, seq, ValueType::Value), value.clone()) } - CacheWriteBatchEntryMutation::Deletion => { + WriteBatchEntryInternal::Deletion => { (encode_key(key, seq, ValueType::Deletion), Bytes::new()) } } } fn data_size(&self) -> usize { match self { - CacheWriteBatchEntryMutation::PutValue(value) => value.len(), - CacheWriteBatchEntryMutation::Deletion => 0, + WriteBatchEntryInternal::PutValue(value) => value.len(), + WriteBatchEntryInternal::Deletion => 0, } } } + #[derive(Clone, Debug)] struct RangeCacheWriteBatchEntry { - cf: String, + cf: usize, key: Bytes, - mutation: CacheWriteBatchEntryMutation, + inner: WriteBatchEntryInternal, } impl RangeCacheWriteBatchEntry { pub fn put_value(cf: &str, key: &[u8], value: &[u8]) -> Self { Self { - cf: cf.to_owned(), + cf: cf_to_id(cf), key: Bytes::copy_from_slice(key), - mutation: CacheWriteBatchEntryMutation::PutValue(Bytes::copy_from_slice(value)), + inner: WriteBatchEntryInternal::PutValue(Bytes::copy_from_slice(value)), } } pub fn deletion(cf: &str, key: &[u8]) -> Self { Self { - cf: cf.to_owned(), + cf: cf_to_id(cf), key: Bytes::copy_from_slice(key), - mutation: CacheWriteBatchEntryMutation::Deletion, + inner: WriteBatchEntryInternal::Deletion, } } #[inline] pub fn encode(&self, seq: u64) -> (Bytes, Bytes) { - self.mutation.encode(&self.key, seq) + self.inner.encode(&self.key, seq) } pub fn data_size(&self) -> usize { - self.key.len() + std::mem::size_of::() + self.mutation.data_size() + self.key.len() + std::mem::size_of::() + self.inner.data_size() } -} -impl RangeCacheMemoryEngine { - fn apply_cb(&self) -> ApplyEncodedEntryCb { - // TODO: use the stabilized API for appending to the skip list here. - Box::new(|_cf, _key, _value| Ok(())) + + #[inline] + pub fn should_write_to_memory(&self, range_manager: &RangeManager) -> bool { + range_manager.contains(&self.key) } -} -impl From<&SkiplistEngine> for RangeCacheWriteBatch { - fn from(engine: &SkiplistEngine) -> Self { - let engine_clone = engine.clone(); - let apply_cb = Box::new(move |cf: &'_ str, key, value| { - engine_clone.data[cf_to_id(cf)].put(key, value); - Ok(()) - }); - RangeCacheWriteBatch::new(apply_cb) + #[inline] + pub fn write_to_memory(&self, skiplist_engine: &SkiplistEngine, seq: u64) -> Result<()> { + let handle = &skiplist_engine.data[self.cf]; + let (key, value) = self.encode(seq); + let _ = handle.put(key, value); + Ok(()) } } @@ -150,14 +152,13 @@ impl WriteBatchExt for RangeCacheMemoryEngine { const WRITE_BATCH_MAX_KEYS: usize = 256; fn write_batch(&self) -> Self::WriteBatch { - RangeCacheWriteBatch::new(self.apply_cb()) + RangeCacheWriteBatch::from(self) } fn write_batch_with_cap(&self, cap: usize) -> Self::WriteBatch { - RangeCacheWriteBatch::with_capacity(self.apply_cb(), cap) + RangeCacheWriteBatch::with_capacity(self, cap) } } - impl WriteBatch for RangeCacheWriteBatch { fn write_opt(&mut self, _: &WriteOptions) -> Result { self.sequence_number @@ -257,19 +258,33 @@ mod tests { #[test] fn test_write_to_skiplist() { - let engine = SkiplistEngine::new(Arc::default()); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let r = CacheRange::new(b"".to_vec(), b"z".to_vec()); + engine.new_range(r.clone()); + { + let mut core = engine.core.write().unwrap(); + core.mut_range_manager().set_range_readable(&r, true); + core.mut_range_manager().set_safe_ts(&r, 10); + } let mut wb = RangeCacheWriteBatch::from(&engine); wb.put(b"aaa", b"bbb").unwrap(); wb.set_sequence_number(1).unwrap(); assert_eq!(wb.write().unwrap(), 1); - let sl = engine.data[cf_to_id(CF_DEFAULT)].clone(); + let sl = engine.core.read().unwrap().engine().data[cf_to_id(CF_DEFAULT)].clone(); let actual = sl.get(&encode_key(b"aaa", 1, ValueType::Value)).unwrap(); assert_eq!(&b"bbb"[..], actual) } #[test] fn test_savepoints() { - let engine = SkiplistEngine::new(Arc::default()); + let engine = RangeCacheMemoryEngine::new(Arc::default()); + let r = CacheRange::new(b"".to_vec(), b"z".to_vec()); + engine.new_range(r.clone()); + { + let mut core = engine.core.write().unwrap(); + core.mut_range_manager().set_range_readable(&r, true); + core.mut_range_manager().set_safe_ts(&r, 10); + } let mut wb = RangeCacheWriteBatch::from(&engine); wb.put(b"aaa", b"bbb").unwrap(); wb.set_save_point(); @@ -278,7 +293,7 @@ mod tests { wb.rollback_to_save_point().unwrap(); wb.set_sequence_number(1).unwrap(); assert_eq!(wb.write().unwrap(), 1); - let sl = engine.data[cf_to_id(CF_DEFAULT)].clone(); + let sl = engine.core.read().unwrap().engine().data[cf_to_id(CF_DEFAULT)].clone(); let actual = sl.get(&encode_key(b"aaa", 1, ValueType::Value)).unwrap(); assert_eq!(&b"bbb"[..], actual); assert!(sl.get(&encode_key(b"ccc", 1, ValueType::Value)).is_none()) @@ -289,13 +304,12 @@ mod tests { let engine = RangeCacheMemoryEngine::new(Arc::default()); let r = CacheRange::new(b"".to_vec(), b"z".to_vec()); engine.new_range(r.clone()); - let engine_for_writes = { - let mut core = engine.core.lock().unwrap(); + { + let mut core = engine.core.write().unwrap(); core.mut_range_manager().set_range_readable(&r, true); core.mut_range_manager().set_safe_ts(&r, 10); - core.engine() - }; - let mut wb = RangeCacheWriteBatch::from(&engine_for_writes); + } + let mut wb = RangeCacheWriteBatch::from(&engine); wb.put(b"aaa", b"bbb").unwrap(); wb.set_sequence_number(1).unwrap(); _ = wb.write().unwrap(); From a73405d8a4f8840cc9960d6957c617b2c56129ae Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Wed, 31 Jan 2024 14:36:23 +0800 Subject: [PATCH 1138/1149] server: add grpc exec duration and wait duration (#16447) close tikv/tikv#16449 1. report the exec duration in grpc pool in every request 2. report the wait duration from other pool to grpc pool Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 4 +-- components/tracker/src/lib.rs | 5 ++++ src/coprocessor/endpoint.rs | 9 +++++-- src/server/service/kv.rs | 33 ++++++++++++++++--------- src/storage/mod.rs | 8 ++++++ src/storage/txn/scheduler.rs | 5 +++- tests/integrations/server/kv_service.rs | 14 +++++++++++ 7 files changed, 61 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b49b15805ec..e803d8f928a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2807,7 +2807,7 @@ dependencies = [ [[package]] name = "kvproto" version = "0.0.2" -source = "git+https://github.com/pingcap/kvproto.git#932639606bcf3db9676627d55430e1dd10670570" +source = "git+https://github.com/pingcap/kvproto.git#705bb9244fd9557b45c0f4f1530ba239c782068b" dependencies = [ "futures 0.3.15", "grpcio", @@ -6965,7 +6965,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "static_assertions", ] diff --git a/components/tracker/src/lib.rs b/components/tracker/src/lib.rs index 6307c51f907..19f1f04bf84 100644 --- a/components/tracker/src/lib.rs +++ b/components/tracker/src/lib.rs @@ -29,6 +29,10 @@ impl Tracker { } } + pub fn write_time_detail(&self, detail_v2: &mut pb::TimeDetailV2) { + detail_v2.set_kv_grpc_process_time_ns(self.metrics.grpc_process_nanos); + } + pub fn write_scan_detail(&self, detail_v2: &mut pb::ScanDetailV2) { detail_v2.set_rocksdb_block_read_byte(self.metrics.block_read_byte); detail_v2.set_rocksdb_block_read_count(self.metrics.block_read_count); @@ -129,6 +133,7 @@ pub enum RequestType { #[derive(Debug, Default, Clone)] pub struct RequestMetrics { + pub grpc_process_nanos: u64, pub get_snapshot_nanos: u64, pub read_index_propose_wait_nanos: u64, pub read_index_confirm_wait_nanos: u64, diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 63434a85ca1..5200a96c158 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -522,7 +522,6 @@ impl Endpoint { }); // box the tracker so that moving it is cheap. let tracker = Box::new(Tracker::new(req_ctx, self.slow_log_threshold)); - let res = self .read_pool .spawn_handle( @@ -546,6 +545,7 @@ impl Endpoint { mut req: coppb::Request, peer: Option, ) -> impl Future> { + let now = Instant::now(); // Check the load of the read pool. If it's too busy, generate and return // error in the gRPC thread to avoid waiting in the queue of the read pool. if let Err(busy_err) = self.read_pool.check_busy_threshold(Duration::from_millis( @@ -567,6 +567,9 @@ impl Endpoint { let result_of_future = self .parse_request_and_check_memory_locks(req, peer, false) .map(|(handler_builder, req_ctx)| self.handle_unary_request(req_ctx, handler_builder)); + with_tls_tracker(|tracker| { + tracker.metrics.grpc_process_nanos = now.saturating_elapsed().as_nanos() as u64; + }); let fut = async move { let res = match result_of_future { Err(e) => { @@ -580,7 +583,9 @@ impl Endpoint { let mut res = handle_res.unwrap_or_else(|e| make_error_response(e).into()); res.set_batch_responses(batch_res.into()); GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(res.mut_exec_details_v2().mut_scan_detail_v2()); + let exec_detail_v2 = res.mut_exec_details_v2(); + tracker.write_scan_detail(exec_detail_v2.mut_scan_detail_v2()); + tracker.write_time_detail(exec_detail_v2.mut_time_detail_v2()); }); res } diff --git a/src/server/service/kv.rs b/src/server/service/kv.rs index 02bfca0473e..4d7bda51ca4 100644 --- a/src/server/service/kv.rs +++ b/src/server/service/kv.rs @@ -1140,12 +1140,7 @@ fn response_batch_commands_request( { let task = async move { if let Ok(resp) = resp.await { - let measure = GrpcRequestDuration { - begin, - label, - source, - resource_priority, - }; + let measure = GrpcRequestDuration::new(begin, label, source, resource_priority); let task = MeasuredSingleResponse::new(id, resp, measure); if let Err(e) = tx.send_with(task, WakePolicy::Immediately) { error!("KvService response batch commands fail"; "err" => ?e); @@ -1335,8 +1330,10 @@ fn handle_measures_for_batch_commands(measures: &mut MeasuredBatchResponse) { begin, source, resource_priority, + sent, } = measure; let elapsed = now.saturating_duration_since(begin); + let wait = now.saturating_duration_since(sent); GRPC_MSG_HISTOGRAM_STATIC .get(label) .get(resource_priority) @@ -1362,6 +1359,9 @@ fn handle_measures_for_batch_commands(measures: &mut MeasuredBatchResponse) { exec_details .mut_time_detail_v2() .set_total_rpc_wall_time_ns(elapsed.as_nanos() as u64); + exec_details + .mut_time_detail_v2() + .set_kv_grpc_wait_time_ns(wait.as_nanos() as u64); } } } @@ -1412,10 +1412,12 @@ fn future_get( match v { Ok((val, stats)) => { let exec_detail_v2 = resp.mut_exec_details_v2(); - let scan_detail_v2 = exec_detail_v2.mut_scan_detail_v2(); - stats.stats.write_scan_detail(scan_detail_v2); + stats + .stats + .write_scan_detail(exec_detail_v2.mut_scan_detail_v2()); GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(scan_detail_v2); + tracker.write_scan_detail(exec_detail_v2.mut_scan_detail_v2()); + tracker.write_time_detail(exec_detail_v2.mut_time_detail_v2()); }); set_time_detail(exec_detail_v2, duration, &stats.latency_stats); match val { @@ -1527,10 +1529,12 @@ fn future_batch_get( Ok((kv_res, stats)) => { let pairs = map_kv_pairs(kv_res); let exec_detail_v2 = resp.mut_exec_details_v2(); - let scan_detail_v2 = exec_detail_v2.mut_scan_detail_v2(); - stats.stats.write_scan_detail(scan_detail_v2); + stats + .stats + .write_scan_detail(exec_detail_v2.mut_scan_detail_v2()); GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { - tracker.write_scan_detail(scan_detail_v2); + tracker.write_scan_detail(exec_detail_v2.mut_scan_detail_v2()); + tracker.write_time_detail(exec_detail_v2.mut_time_detail_v2()); }); set_time_detail(exec_detail_v2, duration, &stats.latency_stats); resp.set_pairs(pairs.into()); @@ -2070,6 +2074,7 @@ macro_rules! txn_command_future { GLOBAL_TRACKERS.with_tracker($tracker, |tracker| { tracker.write_scan_detail($resp.mut_exec_details_v2().mut_scan_detail_v2()); tracker.write_write_detail($resp.mut_exec_details_v2().mut_write_detail()); + tracker.write_time_detail($resp.mut_exec_details_v2().mut_time_detail_v2()); }); }); }; @@ -2080,6 +2085,7 @@ macro_rules! txn_command_future { GLOBAL_TRACKERS.with_tracker($tracker, |tracker| { tracker.write_scan_detail($resp.mut_exec_details_v2().mut_scan_detail_v2()); tracker.write_write_detail($resp.mut_exec_details_v2().mut_write_detail()); + tracker.write_time_detail($resp.mut_exec_details_v2().mut_time_detail_v2()); }); }); }; @@ -2283,7 +2289,9 @@ pub struct GrpcRequestDuration { pub label: GrpcTypeKind, pub source: String, pub resource_priority: ResourcePriority, + pub sent: Instant, } + impl GrpcRequestDuration { pub fn new( begin: Instant, @@ -2296,6 +2304,7 @@ impl GrpcRequestDuration { label, source, resource_priority, + sent: Instant::now(), } } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6d62e50aa55..34387daf6c0 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -623,6 +623,10 @@ impl Storage { let quota_limiter = self.quota_limiter.clone(); let mut sample = quota_limiter.new_sample(true); + with_tls_tracker(|tracker| { + tracker.metrics.grpc_process_nanos = + stage_begin_ts.saturating_elapsed().as_nanos() as u64; + }); self.read_pool_spawn_with_busy_check( busy_threshold, @@ -1010,6 +1014,10 @@ impl Storage { let busy_threshold = Duration::from_millis(ctx.busy_threshold_ms as u64); let quota_limiter = self.quota_limiter.clone(); let mut sample = quota_limiter.new_sample(true); + with_tls_tracker(|tracker| { + tracker.metrics.grpc_process_nanos = + stage_begin_ts.saturating_elapsed().as_nanos() as u64; + }); self.read_pool_spawn_with_busy_check( busy_threshold, async move { diff --git a/src/storage/txn/scheduler.rs b/src/storage/txn/scheduler.rs index 47920cc0ade..00056cad08f 100644 --- a/src/storage/txn/scheduler.rs +++ b/src/storage/txn/scheduler.rs @@ -530,6 +530,7 @@ impl TxnScheduler { callback: SchedulerTaskCallback, prepared_latches: Option, ) { + let now = Instant::now(); let cid = task.cid(); let tracker = task.tracker(); let cmd = task.cmd(); @@ -548,7 +549,9 @@ impl TxnScheduler { self.inner .new_task_context(task, callback, prepared_latches) }); - + GLOBAL_TRACKERS.with_tracker(tracker, |tracker| { + tracker.metrics.grpc_process_nanos = now.saturating_elapsed().as_nanos() as u64; + }); if self.inner.latches.acquire(&mut tctx.lock, cid) { fail_point!("txn_scheduler_acquire_success"); tctx.on_schedule(); diff --git a/tests/integrations/server/kv_service.rs b/tests/integrations/server/kv_service.rs index 29b1abb01f3..fadb3de4a8d 100644 --- a/tests/integrations/server/kv_service.rs +++ b/tests/integrations/server/kv_service.rs @@ -2719,6 +2719,20 @@ fn test_rpc_wall_time() { .get_total_rpc_wall_time_ns() > 0 ); + assert!( + resp.get_get() + .get_exec_details_v2() + .get_time_detail_v2() + .get_kv_grpc_process_time_ns() + > 0 + ); + assert!( + resp.get_get() + .get_exec_details_v2() + .get_time_detail_v2() + .get_kv_grpc_wait_time_ns() + > 0 + ); } } From 5d190fa5c2af88e0cd643aa6368383bb22a83241 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Wed, 31 Jan 2024 21:52:55 -0800 Subject: [PATCH 1139/1149] raftstore: Improve readability (#16476) ref tikv/tikv#16465 Improve readability Signed-off-by: Yang Zhang --- components/raftstore/src/store/util.rs | 32 +++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index cee7691875d..f0d76a10deb 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1130,12 +1130,12 @@ fn check_remove_or_demote_voter( leader_id: u64, peer_heartbeat: &collections::HashMap, ) -> Result<()> { - let mut slow_peer_count = 0; - let mut normal_peer_count = 0; + let mut slow_voters_count = 0; + let mut normal_voters_count = 0; // Here we assume if the last beartbeat is within 2 election timeout, the peer // is healthy. When a region is hibernate, we expect all its peers are *slow* // and it would still allow the operation - let slow_peer_threshold = + let slow_voter_threshold = 2 * cfg.raft_base_tick_interval.0 * cfg.raft_max_election_timeout_ticks as u32; for (id, last_heartbeat) in peer_heartbeat { // for slow and normal peer calculation, we only count voter role @@ -1146,15 +1146,15 @@ fn check_remove_or_demote_voter( .map_or(false, |p| p.role == PeerRole::Voter) { // leader itself is not a slow peer - if *id == leader_id || last_heartbeat.elapsed() <= slow_peer_threshold { - normal_peer_count += 1; + if *id == leader_id || last_heartbeat.elapsed() <= slow_voter_threshold { + normal_voters_count += 1; } else { - slow_peer_count += 1; + slow_voters_count += 1; } } } - let mut normal_peers_to_remove = vec![]; + let mut normal_voters_to_remove = vec![]; for cp in change_peers { let (change_type, peer) = (cp.get_change_type(), cp.get_peer()); if change_type == ConfChangeType::RemoveNode @@ -1171,9 +1171,9 @@ fn check_remove_or_demote_voter( // not allowed. if is_voter && let Some(last_heartbeat) = peer_heartbeat.get(&peer.get_id()) { // peer itself is *not* slow peer, but current slow peer is >= total peers/2 - if last_heartbeat.elapsed() <= slow_peer_threshold { - normal_peer_count -= 1; - normal_peers_to_remove.push(peer.clone()); + if last_heartbeat.elapsed() <= slow_voter_threshold { + normal_voters_count -= 1; + normal_voters_to_remove.push(peer.clone()); } } } @@ -1184,16 +1184,16 @@ fn check_remove_or_demote_voter( // option to finish as there's no choice. // We only block the operation when normal peers are going to be removed and it // could lead to slow peers more than normal peers - if !normal_peers_to_remove.is_empty() - && slow_peer_count > 0 - && slow_peer_count >= normal_peer_count + if !normal_voters_to_remove.is_empty() + && slow_voters_count > 0 + && slow_voters_count >= normal_voters_count { return Err(box_err!( "Ignore conf change command on region {} because RemoveNode or Demote a voter on peers {:?} may lead to unavailability. There're {} slow peers and {} normal peers", region.get_id(), - &normal_peers_to_remove, - slow_peer_count, - normal_peer_count + &normal_voters_to_remove, + slow_voters_count, + normal_voters_count )); } From 997eabc7f64d0bd1addc5d160e7d17cde1f52c3c Mon Sep 17 00:00:00 2001 From: lucasliang Date: Fri, 2 Feb 2024 12:24:24 +0800 Subject: [PATCH 1140/1149] raftstore: report busy to PD when restarting if exists apply log lags. (#16239) ref tikv/tikv#15874 This pr inspect the gap of each peer's `applied_log_index` and `commit_log_index` when restarting. And if the gap exceeds the `leader_transfer_max_log_lag`, the related peer will be marked as `pending for recovery` state. After the gap is less than `leader_transfer_max_log_lag`, it means that the pending logs is acceptable. Only if the count of ready peers exceeds the given configuration, that is, `min_recovery_ready_region_percent`, this store is ready for re-balancing leaders. Before this stage, the state of this store will be marked `is_busy` to avoid transferring leaders to it. Signed-off-by: lucasliang --- .../raftstore-v2/src/operation/command/mod.rs | 2 +- .../raftstore-v2/src/operation/ready/mod.rs | 2 +- components/raftstore-v2/src/raft/peer.rs | 2 +- components/raftstore/src/store/config.rs | 14 +++ components/raftstore/src/store/fsm/peer.rs | 61 ++++++++++++- components/raftstore/src/store/fsm/store.rs | 86 +++++++++++++++++-- components/raftstore/src/store/peer.rs | 12 ++- tests/failpoints/cases/test_pending_peers.rs | 60 +++++++++++++ 8 files changed, 226 insertions(+), 13 deletions(-) diff --git a/components/raftstore-v2/src/operation/command/mod.rs b/components/raftstore-v2/src/operation/command/mod.rs index 46577e11b43..4103551041b 100644 --- a/components/raftstore-v2/src/operation/command/mod.rs +++ b/components/raftstore-v2/src/operation/command/mod.rs @@ -470,7 +470,7 @@ impl Peer { apply_res.applied_index, progress_to_be_updated, ); - self.try_compelete_recovery(); + self.try_complete_recovery(); if !self.pause_for_replay() && self.storage_mut().apply_trace_mut().should_flush() { if let Some(scheduler) = self.apply_scheduler() { scheduler.send(ApplyTask::ManualFlush); diff --git a/components/raftstore-v2/src/operation/ready/mod.rs b/components/raftstore-v2/src/operation/ready/mod.rs index 39ce9707359..95eee272a80 100644 --- a/components/raftstore-v2/src/operation/ready/mod.rs +++ b/components/raftstore-v2/src/operation/ready/mod.rs @@ -819,7 +819,7 @@ impl Peer { self.merge_state_changes_to(&mut write_task); self.storage_mut() .handle_raft_ready(ctx, &mut ready, &mut write_task); - self.try_compelete_recovery(); + self.try_complete_recovery(); self.on_advance_persisted_apply_index(ctx, prev_persisted, &mut write_task); if !ready.persisted_messages().is_empty() { diff --git a/components/raftstore-v2/src/raft/peer.rs b/components/raftstore-v2/src/raft/peer.rs index b535d7f9a47..75d5b1729a3 100644 --- a/components/raftstore-v2/src/raft/peer.rs +++ b/components/raftstore-v2/src/raft/peer.rs @@ -507,7 +507,7 @@ impl Peer { // we may have skipped scheduling raft tick when start due to noticable gap // between commit index and apply index. We should scheduling it when raft log // apply catches up. - pub fn try_compelete_recovery(&mut self) { + pub fn try_complete_recovery(&mut self) { if self.pause_for_replay() && self.storage().entry_storage().commit_index() <= self.storage().entry_storage().applied_index() diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 9c677cd1271..2427c438bf8 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -407,6 +407,13 @@ pub struct Config { #[online_config(hidden)] #[serde(alias = "enable-partitioned-raft-kv-compatible-learner")] pub enable_v2_compatible_learner: bool, + + /// The minimal count of region pending on applying raft logs. + /// Only when the count of regions which not pending on applying logs is + /// less than the threshold, can the raftstore supply service. + #[doc(hidden)] + #[online_config(hidden)] + pub min_pending_apply_region_count: u64, } impl Default for Config { @@ -544,6 +551,7 @@ impl Default for Config { check_request_snapshot_interval: ReadableDuration::minutes(1), enable_v2_compatible_learner: false, unsafe_disable_check_quorum: false, + min_pending_apply_region_count: 10, } } } @@ -948,6 +956,12 @@ impl Config { )); } + if self.min_pending_apply_region_count == 0 { + return Err(box_err!( + "min_pending_apply_region_count must be greater than 0" + )); + } + Ok(()) } diff --git a/components/raftstore/src/store/fsm/peer.rs b/components/raftstore/src/store/fsm/peer.rs index 5dac5d9d488..c048093177f 100644 --- a/components/raftstore/src/store/fsm/peer.rs +++ b/components/raftstore/src/store/fsm/peer.rs @@ -103,7 +103,7 @@ use crate::{ }, CasualMessage, Config, LocksStatus, MergeResultKind, PdTask, PeerMsg, PeerTick, ProposalContext, RaftCmdExtraOpts, RaftCommand, RaftlogFetchResult, ReadCallback, ReadTask, - SignificantMsg, SnapKey, StoreMsg, WriteCallback, + SignificantMsg, SnapKey, StoreMsg, WriteCallback, RAFT_INIT_LOG_INDEX, }, Error, Result, }; @@ -752,6 +752,9 @@ where } self.fsm.batch_req_builder.request = Some(cmd); } + // Update the state whether the peer is pending on applying raft + // logs if necesssary. + self.on_check_peer_complete_apply_logs(); } /// Flushes all pending raft commands for immediate execution. @@ -3807,6 +3810,9 @@ where "is_latest_initialized" => is_latest_initialized, ); + // Ensure this peer is removed in the pending apply list. + meta.busy_apply_peers.remove(&self.fsm.peer_id()); + if meta.atomic_snap_regions.contains_key(&self.region_id()) { drop(meta); panic!( @@ -6555,6 +6561,59 @@ where fn register_report_region_buckets_tick(&mut self) { self.schedule_tick(PeerTick::ReportBuckets) } + + /// Check whether the peer is pending on applying raft logs. + /// + /// If busy, the peer will be recorded, until the pending logs are + /// applied. And after it completes applying, it will be removed from + /// the recording list. + fn on_check_peer_complete_apply_logs(&mut self) { + // Already completed, skip. + if self.fsm.peer.busy_on_apply.is_none() { + return; + } + + let peer_id = self.fsm.peer.peer_id(); + let applied_idx = self.fsm.peer.get_store().applied_index(); + let last_idx = self.fsm.peer.get_store().last_index(); + // If the peer is newly added or created, no need to check the apply status. + if last_idx <= RAFT_INIT_LOG_INDEX { + self.fsm.peer.busy_on_apply = None; + return; + } + assert!(self.fsm.peer.busy_on_apply.is_some()); + // If the peer has large unapplied logs, this peer should be recorded until + // the lag is less than the given threshold. + if last_idx >= applied_idx + self.ctx.cfg.leader_transfer_max_log_lag { + if !self.fsm.peer.busy_on_apply.unwrap() { + let mut meta = self.ctx.store_meta.lock().unwrap(); + meta.busy_apply_peers.insert(peer_id); + } + self.fsm.peer.busy_on_apply = Some(true); + debug!( + "peer is busy on applying logs"; + "last_commit_idx" => last_idx, + "last_applied_idx" => applied_idx, + "region_id" => self.fsm.region_id(), + "peer_id" => peer_id, + ); + } else { + // Already finish apply, remove it from recording list. + { + let mut meta = self.ctx.store_meta.lock().unwrap(); + meta.busy_apply_peers.remove(&peer_id); + meta.completed_apply_peers_count += 1; + } + debug!( + "peer completes applying logs"; + "last_commit_idx" => last_idx, + "last_applied_idx" => applied_idx, + "region_id" => self.fsm.region_id(), + "peer_id" => peer_id, + ); + self.fsm.peer.busy_on_apply = None; + } + } } impl<'a, EK, ER, T: Transport> PeerFsmDelegate<'a, EK, ER, T> diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index c42cdb66764..ab468adfd95 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -127,6 +127,14 @@ const PERIODIC_FULL_COMPACT_TICK_INTERVAL_DURATION: Duration = Duration::from_se // If periodic full compaction is enabled (`periodic_full_compact_start_times` // is set), sample load metrics every 10 minutes. const LOAD_STATS_WINDOW_DURATION: Duration = Duration::from_secs(10 * 60); +// When the store is started, it will take some time for applying pending +// snapshots and delayed raft logs. Before the store is ready, it will report +// `is_busy` to PD, so PD will not schedule operators to the store. +const STORE_CHECK_PENDING_APPLY_DURATION: Duration = Duration::from_secs(5 * 60); +// The minimal percent of region finishing applying pending logs. +// Only when the count of regions which finish applying logs exceed +// the threshold, can the raftstore supply service. +const STORE_CHECK_COMPLETE_APPLY_REGIONS_PERCENT: u64 = 99; pub struct StoreInfo { pub kv_engine: EK, @@ -180,6 +188,16 @@ pub struct StoreMeta { pub region_read_progress: RegionReadProgressRegistry, /// record sst_file_name -> (sst_smallest_key, sst_largest_key) pub damaged_ranges: HashMap, Vec)>, + /// Record peers are busy with applying logs + /// (applied_index <= last_idx - leader_transfer_max_log_lag). + /// `busy_apply_peers` and `completed_apply_peers_count` are used + /// to record the accurate count of busy apply peers and peers complete + /// applying logs + pub busy_apply_peers: HashSet, + /// Record the number of peers done for applying logs. + /// Without `completed_apply_peers_count`, it's hard to know whether all + /// peers are ready for applying logs. + pub completed_apply_peers_count: u64, } impl StoreRegionMeta for StoreMeta { @@ -230,6 +248,8 @@ impl StoreMeta { destroyed_region_for_snap: HashMap::default(), region_read_progress: RegionReadProgressRegistry::new(), damaged_ranges: HashMap::default(), + busy_apply_peers: HashSet::default(), + completed_apply_peers_count: 0, } } @@ -2700,10 +2720,53 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn check_store_is_busy_on_apply( + &self, + start_ts_sec: u32, + region_count: u64, + busy_apply_peers_count: u64, + completed_apply_peers_count: u64, + ) -> bool { + let during_starting_stage = { + (time::get_time().sec as u32).saturating_sub(start_ts_sec) + <= STORE_CHECK_PENDING_APPLY_DURATION.as_secs() as u32 + }; + // If the store is busy in handling applying logs when starting, it should not + // be treated as a normal store for balance. Only when the store is + // almost idle (no more pending regions on applying logs), it can be + // regarded as the candidate for balancing leaders. + if during_starting_stage { + let completed_target_count = (|| { + fail_point!("on_mock_store_completed_target_count", |_| 0); + std::cmp::max( + 1, + STORE_CHECK_COMPLETE_APPLY_REGIONS_PERCENT * region_count / 100, + ) + })(); + // If the number of regions on completing applying logs does not occupy the + // majority of regions, the store is regarded as busy. + if completed_apply_peers_count < completed_target_count { + true + } else { + let pending_target_count = std::cmp::min( + self.ctx.cfg.min_pending_apply_region_count, + region_count.saturating_sub(completed_target_count), + ); + busy_apply_peers_count >= pending_target_count + } + } else { + // Already started for a fairy long time. + false + } + } + fn store_heartbeat_pd(&mut self, report: Option) { let mut stats = StoreStats::default(); stats.set_store_id(self.ctx.store_id()); + + let completed_apply_peers_count: u64; + let busy_apply_peers_count: u64; { let meta = self.ctx.store_meta.lock().unwrap(); stats.set_region_count(meta.regions.len() as u32); @@ -2712,6 +2775,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER let damaged_regions_id = meta.get_all_damaged_region_ids().into_iter().collect(); stats.set_damaged_regions_id(damaged_regions_id); } + completed_apply_peers_count = meta.completed_apply_peers_count; + busy_apply_peers_count = meta.busy_apply_peers.len() as u64; } let snap_stats = self.ctx.snap_mgr.stats(); @@ -2726,7 +2791,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER .with_label_values(&["receiving"]) .set(snap_stats.receiving_count as i64); - stats.set_start_time(self.fsm.store.start_time.unwrap().sec as u32); + let start_time = self.fsm.store.start_time.unwrap().sec as u32; + stats.set_start_time(start_time); // report store write flow to pd stats.set_bytes_written( @@ -2744,13 +2810,19 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER .swap(0, Ordering::Relaxed), ); - stats.set_is_busy( - self.ctx - .global_stat - .stat - .is_busy - .swap(false, Ordering::Relaxed), + let store_is_busy = self + .ctx + .global_stat + .stat + .is_busy + .swap(false, Ordering::Relaxed); + let busy_on_apply = self.check_store_is_busy_on_apply( + start_time, + stats.get_region_count() as u64, + busy_apply_peers_count, + completed_apply_peers_count, ); + stats.set_is_busy(store_is_busy || busy_on_apply); let mut query_stats = QueryStats::default(); query_stats.set_put( diff --git a/components/raftstore/src/store/peer.rs b/components/raftstore/src/store/peer.rs index da6fb628231..9be253b1041 100644 --- a/components/raftstore/src/store/peer.rs +++ b/components/raftstore/src/store/peer.rs @@ -892,6 +892,12 @@ where pub snapshot_recovery_state: Option, last_record_safe_point: u64, + /// Used for checking whether the peer is busy on apply. + /// * `None` => the peer has no pending logs for apply or already finishes + /// applying. + /// * `Some(false)` => initial state, not be recorded. + /// * `Some(true)` => busy on apply, and already recorded. + pub busy_on_apply: Option, } impl Peer @@ -1036,6 +1042,7 @@ where lead_transferee: raft::INVALID_ID, unsafe_recovery_state: None, snapshot_recovery_state: None, + busy_on_apply: Some(false), }; // If this region has only one peer and I am the one, campaign directly. @@ -2677,9 +2684,10 @@ where if let Some(hs) = ready.hs() { let pre_commit_index = self.get_store().commit_index(); - assert!(hs.get_commit() >= pre_commit_index); + let cur_commit_index = hs.get_commit(); + assert!(cur_commit_index >= pre_commit_index); if self.is_leader() { - self.on_leader_commit_idx_changed(pre_commit_index, hs.get_commit()); + self.on_leader_commit_idx_changed(pre_commit_index, cur_commit_index); } } diff --git a/tests/failpoints/cases/test_pending_peers.rs b/tests/failpoints/cases/test_pending_peers.rs index c41c97034b4..6390bc562cb 100644 --- a/tests/failpoints/cases/test_pending_peers.rs +++ b/tests/failpoints/cases/test_pending_peers.rs @@ -109,3 +109,63 @@ fn test_pending_snapshot() { state2 ); } + +// Tests if store is marked with busy when there exists peers on +// busy on applying raft logs. +#[test] +fn test_on_check_busy_on_apply_peers() { + let mut cluster = new_node_cluster(0, 3); + cluster.cfg.raft_store.raft_base_tick_interval = ReadableDuration::millis(5); + cluster.cfg.raft_store.raft_store_max_leader_lease = ReadableDuration::millis(100); + cluster.cfg.raft_store.leader_transfer_max_log_lag = 10; + cluster.cfg.raft_store.check_long_uncommitted_interval = ReadableDuration::millis(10); // short check interval for recovery + cluster.cfg.raft_store.pd_heartbeat_tick_interval = ReadableDuration::millis(50); + + let pd_client = Arc::clone(&cluster.pd_client); + // Disable default max peer count check. + pd_client.disable_default_operator(); + + let r1 = cluster.run_conf_change(); + pd_client.must_add_peer(r1, new_peer(2, 1002)); + pd_client.must_add_peer(r1, new_peer(3, 1003)); + + cluster.must_put(b"k1", b"v1"); + must_get_equal(&cluster.get_engine(2), b"k1", b"v1"); + must_get_equal(&cluster.get_engine(3), b"k1", b"v1"); + + // Pause peer 1003 on applying logs to make it pending. + let before_apply_stat = cluster.apply_state(r1, 3); + cluster.stop_node(3); + for i in 0..=cluster.cfg.raft_store.leader_transfer_max_log_lag { + let bytes = format!("k{:03}", i).into_bytes(); + cluster.must_put(&bytes, &bytes); + } + cluster.must_put(b"k2", b"v2"); + must_get_equal(&cluster.get_engine(1), b"k2", b"v2"); + must_get_equal(&cluster.get_engine(2), b"k2", b"v2"); + + // Restart peer 1003 and make it busy for applying pending logs. + fail::cfg("on_handle_apply_1003", "return").unwrap(); + cluster.run_node(3).unwrap(); + let after_apply_stat = cluster.apply_state(r1, 3); + assert!(after_apply_stat.applied_index == before_apply_stat.applied_index); + // Case 1: no completed regions. + cluster.must_send_store_heartbeat(3); + sleep_ms(100); + let stats = cluster.pd_client.get_store_stats(3).unwrap(); + assert!(stats.is_busy); + // Case 2: completed_apply_peers_count > completed_target_count but + // there exists busy peers. + fail::cfg("on_mock_store_completed_target_count", "return").unwrap(); + sleep_ms(100); + cluster.must_send_store_heartbeat(3); + sleep_ms(100); + let stats = cluster.pd_client.get_store_stats(3).unwrap(); + assert!(!stats.is_busy); + fail::remove("on_mock_store_completed_target_count"); + fail::remove("on_handle_apply_1003"); + sleep_ms(100); + // After peer 1003 is recovered, store should not be marked with busy. + let stats = cluster.pd_client.get_store_stats(3).unwrap(); + assert!(!stats.is_busy); +} From 00a2518938f6328a305f70215d60355bfd2a3a9e Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 2 Feb 2024 17:31:55 +0800 Subject: [PATCH 1141/1149] *: Add module health_controller and move SlowScore, SlowTrend, HealthService from PdWorker to it (#16456) ref tikv/tikv#16297 Add module health_controller and move SlowScore, SlowTrend, HealthService from PdWorker to it Signed-off-by: MyonKeminta Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Cargo.lock | 22 + Cargo.toml | 3 + components/health_controller/Cargo.toml | 17 + components/health_controller/src/lib.rs | 451 ++++++++++++++++++ components/health_controller/src/reporters.rs | 244 ++++++++++ .../health_controller/src/slow_score.rs | 210 ++++++++ .../src/trend.rs | 11 +- components/health_controller/src/types.rs | 107 +++++ components/raftstore-v2/Cargo.toml | 1 + components/raftstore-v2/src/batch/store.rs | 2 +- components/raftstore-v2/src/operation/life.rs | 3 +- components/raftstore-v2/src/router/message.rs | 7 +- components/raftstore-v2/src/worker/pd/mod.rs | 9 +- .../raftstore-v2/src/worker/pd/slowness.rs | 7 +- .../raftstore-v2/src/worker/pd/store.rs | 5 +- components/raftstore/Cargo.toml | 1 + .../raftstore/src/store/async_io/write.rs | 5 +- components/raftstore/src/store/fsm/apply.rs | 3 +- components/raftstore/src/store/fsm/store.rs | 12 +- components/raftstore/src/store/msg.rs | 3 +- components/raftstore/src/store/util.rs | 104 ---- components/raftstore/src/store/worker/pd.rs | 428 ++++------------- components/server/Cargo.toml | 1 + components/server/src/server.rs | 8 +- components/server/src/server2.rs | 6 +- components/server/src/signal_handler.rs | 3 + components/test_raftstore-v2/Cargo.toml | 1 + components/test_raftstore-v2/src/server.rs | 8 +- components/test_raftstore/Cargo.toml | 1 + components/test_raftstore/src/node.rs | 3 +- components/test_raftstore/src/server.rs | 14 +- components/tikv_util/src/lib.rs | 1 - src/server/node.rs | 10 +- src/server/server.rs | 22 +- tests/Cargo.toml | 1 + tests/failpoints/cases/test_server.rs | 16 +- .../integrations/config/dynamic/raftstore.rs | 3 +- .../integrations/raftstore/test_bootstrap.rs | 3 +- .../raftstore/test_status_command.rs | 3 +- 39 files changed, 1245 insertions(+), 514 deletions(-) create mode 100644 components/health_controller/Cargo.toml create mode 100644 components/health_controller/src/lib.rs create mode 100644 components/health_controller/src/reporters.rs create mode 100644 components/health_controller/src/slow_score.rs rename components/{tikv_util => health_controller}/src/trend.rs (99%) create mode 100644 components/health_controller/src/types.rs diff --git a/Cargo.lock b/Cargo.lock index e803d8f928a..f036ff1e32d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2382,6 +2382,21 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "health_controller" +version = "0.1.0" +dependencies = [ + "grpcio-health", + "kvproto", + "ordered-float", + "parking_lot 0.12.1", + "prometheus", + "prometheus-static-metric", + "slog", + "slog-global", + "tikv_util", +] + [[package]] name = "heck" version = "0.3.1" @@ -4203,6 +4218,7 @@ dependencies = [ "futures-util", "getset", "grpcio-health", + "health_controller", "hybrid_engine", "into_other", "itertools", @@ -4268,6 +4284,7 @@ dependencies = [ "file_system", "fs2", "futures 0.3.15", + "health_controller", "keys", "kvproto", "log_wrappers", @@ -5202,6 +5219,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "grpcio-health", + "health_controller", "hex 0.4.2", "hybrid_engine", "keys", @@ -5897,6 +5915,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "grpcio-health", + "health_controller", "hybrid_engine", "keys", "kvproto", @@ -5946,6 +5965,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "grpcio-health", + "health_controller", "keys", "kvproto", "lazy_static", @@ -6064,6 +6084,7 @@ dependencies = [ "futures 0.3.15", "grpcio", "grpcio-health", + "health_controller", "hyper", "keys", "kvproto", @@ -6365,6 +6386,7 @@ dependencies = [ "getset", "grpcio", "grpcio-health", + "health_controller", "hex 0.4.2", "http", "hybrid_engine", diff --git a/Cargo.toml b/Cargo.toml index 757f154b165..ad5973d618b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,6 +88,7 @@ fxhash = "0.2.1" getset = "0.1" grpcio = { workspace = true } grpcio-health = { workspace = true } +health_controller = { workspace = true } hex = "0.4" http = "0" hybrid_engine = { workspace = true } @@ -257,6 +258,7 @@ members = [ "components/error_code", "components/external_storage", "components/file_system", + "components/health_controller", "components/into_other", "components/keys", "components/log_wrappers", @@ -341,6 +343,7 @@ external_storage = { path = "components/external_storage" } file_system = { path = "components/file_system" } crypto = { path = "components/crypto" } gcp = { path = "components/cloud/gcp" } +health_controller = { path = "components/health_controller" } into_other = { path = "components/into_other" } keys = { path = "components/keys" } log_wrappers = { path = "components/log_wrappers" } diff --git a/components/health_controller/Cargo.toml b/components/health_controller/Cargo.toml new file mode 100644 index 00000000000..064ba91611d --- /dev/null +++ b/components/health_controller/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "health_controller" +version = "0.1.0" +license = "Apache-2.0" +edition = "2021" +publish = false + +[dependencies] +grpcio-health = { workspace = true } +kvproto = { workspace = true } +ordered-float = "2.6" +parking_lot = "0.12.1" +prometheus = { version = "0.13", features = ["nightly"] } +prometheus-static-metric = "0.5" +slog = { workspace = true } +slog-global = { workspace = true } +tikv_util = { workspace = true } diff --git a/components/health_controller/src/lib.rs b/components/health_controller/src/lib.rs new file mode 100644 index 00000000000..4e5504932e2 --- /dev/null +++ b/components/health_controller/src/lib.rs @@ -0,0 +1,451 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +//! This module contains utilities to manage and retrieve the health status of +//! TiKV instance in a unified way. +//! +//! ## [`HealthController`] +//! +//! [`HealthController`] is the core of the module. It's a unified place where +//! the server's health status is managed and collected, including the [gRPC +//! `HealthService`](grpcio_health::HealthService). It provides interfaces to +//! retrieve the collected information, and actively setting whether +//! the gRPC `HealthService` should report a `Serving` or `NotServing` status. +//! +//! ## Reporters +//! +//! [`HealthController`] doesn't provide ways to update most of the states +//! directly. Instead, each module in TiKV tha need to report its health status +//! need to create a corresponding reporter. +//! +//! The reason why the reporters is split out from the `HealthController` is: +//! +//! * Reporters can have different designs to fit the special use patterns of +//! different modules. +//! * `HealthController` internally contains states that are shared in different +//! modules and threads. If some module need to store internal states to +//! calculate the health status, they can be put in the reporter instead of +//! the `HealthController`, which makes it possible to avoid unnecessary +//! synchronization like mutexes. +//! * To avoid the `HealthController` itself contains too many different APIs +//! that are specific to different modules, increasing the complexity and +//! possibility to misuse of `HealthController`. + +pub mod reporters; +pub mod slow_score; +pub mod trend; +pub mod types; + +use std::{ + collections::HashSet, + ops::Deref, + sync::{ + atomic::{AtomicU64, AtomicUsize, Ordering}, + Arc, + }, +}; + +use grpcio_health::HealthService; +use kvproto::pdpb::SlowTrend as SlowTrendPb; +use parking_lot::{Mutex, RwLock}; +pub use types::{LatencyInspector, RaftstoreDuration}; + +struct ServingStatus { + is_serving: bool, + unhealthy_modules: HashSet<&'static str>, +} + +impl ServingStatus { + fn to_serving_status_pb(&self) -> grpcio_health::ServingStatus { + match (self.is_serving, self.unhealthy_modules.is_empty()) { + (true, true) => grpcio_health::ServingStatus::Serving, + (true, false) => grpcio_health::ServingStatus::ServiceUnknown, + (false, _) => grpcio_health::ServingStatus::NotServing, + } + } +} + +struct HealthControllerInner { + // Internally stores a `f64` type. + raftstore_slow_score: AtomicU64, + raftstore_slow_trend: RollingRetriever, + + /// gRPC's builtin `HealthService`. + /// + /// **Note**: DO NOT update its state directly. Only change its state while + /// holding the mutex of `current_serving_status`, and keep consistent + /// with value of `current_serving_status`, unless `health_service` is + /// already shutdown. + /// + /// TiKV uses gRPC's builtin `HealthService` to provide information about + /// whether the TiKV server is normally running. To keep its behavior + /// consistent with earlier versions without the `HealthController`, + /// it's used in such pattern: + /// + /// * Only an empty service name is used, representing the status of the + /// whole server. + /// * When `current_serving_status.is_serving` is set to false (by calling + /// [`set_is_serving(false)`](HealthController::set_is_serving)), the + /// serving status is set to `NotServing`. + /// * If `current_serving_status.is_serving` is true, but + /// `current_serving_status.unhealthy_modules` is not empty, the serving + /// status is set to `ServiceUnknown`. + /// * Otherwise, the TiKV instance is regarded operational and the serving + /// status is set to `Serving`. + health_service: HealthService, + current_serving_status: Mutex, +} + +impl HealthControllerInner { + fn new() -> Self { + let health_service = HealthService::default(); + health_service.set_serving_status("", grpcio_health::ServingStatus::NotServing); + Self { + raftstore_slow_score: AtomicU64::new(1), + raftstore_slow_trend: RollingRetriever::new(), + + health_service, + current_serving_status: Mutex::new(ServingStatus { + is_serving: false, + unhealthy_modules: HashSet::default(), + }), + } + } + + /// Marks a module (identified by name) to be unhealthy. Adding an unhealthy + /// will make the serving status of the TiKV server, reported via the + /// gRPC `HealthService`, to become `ServiceUnknown`. + /// + /// This is not an public API. This method is expected to be called only + /// from reporters. + fn add_unhealthy_module(&self, module_name: &'static str) { + let mut status = self.current_serving_status.lock(); + if !status.unhealthy_modules.insert(module_name) { + // Nothing changed. + return; + } + if status.unhealthy_modules.len() == 1 && status.is_serving { + debug_assert_eq!( + status.to_serving_status_pb(), + grpcio_health::ServingStatus::ServiceUnknown + ); + self.health_service + .set_serving_status("", grpcio_health::ServingStatus::ServiceUnknown); + } + } + + /// Removes a module (identified by name) that was marked unhealthy before. + /// When the unhealthy modules are cleared, the serving status reported + /// via the gRPC `HealthService` will change from `ServiceUnknown` to + /// `Serving`. + /// + /// This is not an public API. This method is expected to be called only + /// from reporters. + fn remove_unhealthy_module(&self, module_name: &'static str) { + let mut status = self.current_serving_status.lock(); + if !status.unhealthy_modules.remove(module_name) { + // Nothing changed. + return; + } + if status.unhealthy_modules.is_empty() && status.is_serving { + debug_assert_eq!( + status.to_serving_status_pb(), + grpcio_health::ServingStatus::Serving + ); + self.health_service + .set_serving_status("", grpcio_health::ServingStatus::Serving); + } + } + + /// Sets whether the TiKV server is serving. This is currently used to pause + /// the server, which has implementation in code but not commonly used. + /// + /// The effect of setting not serving overrides the effect of + /// [`add_on_healthy_module`](Self::add_unhealthy_module). + fn set_is_serving(&self, is_serving: bool) { + let mut status = self.current_serving_status.lock(); + if is_serving == status.is_serving { + // Nothing to do. + return; + } + status.is_serving = is_serving; + self.health_service + .set_serving_status("", status.to_serving_status_pb()); + } + + /// Gets the current serving status that is being reported by + /// `health_service`, if it's not shutdown. + fn get_serving_status(&self) -> grpcio_health::ServingStatus { + let status = self.current_serving_status.lock(); + status.to_serving_status_pb() + } + + fn update_raftstore_slow_score(&self, value: f64) { + self.raftstore_slow_score + .store(value.to_bits(), Ordering::Release); + } + + fn get_raftstore_slow_score(&self) -> f64 { + f64::from_bits(self.raftstore_slow_score.load(Ordering::Acquire)) + } + + fn update_raftstore_slow_trend(&self, slow_trend_pb: SlowTrendPb) { + self.raftstore_slow_trend.put(slow_trend_pb); + } + + fn get_raftstore_slow_trend(&self) -> SlowTrendPb { + self.raftstore_slow_trend.get_cloned() + } + + fn shutdown(&self) { + self.health_service.shutdown(); + } +} + +#[derive(Clone)] +pub struct HealthController { + inner: Arc, +} + +impl HealthController { + pub fn new() -> Self { + Self { + inner: Arc::new(HealthControllerInner::new()), + } + } + + pub fn get_raftstore_slow_score(&self) -> f64 { + self.inner.get_raftstore_slow_score() + } + + pub fn get_raftstore_slow_trend(&self) -> SlowTrendPb { + self.inner.get_raftstore_slow_trend() + } + + /// Get the gRPC `HealthService`. + /// + /// Only use this when it's necessary to startup the gRPC server or for test + /// purpose. Do not change the `HealthService`'s state manually. + /// + /// If it's necessary to update `HealthService`'s state, consider using + /// [`set_is_serving`](Self::set_is_serving) or use a reporter to add an + /// unhealthy module. An example: + /// [`RaftstoreReporter::set_is_healthy`](reporters::RaftstoreReporter::set_is_healthy). + pub fn get_grpc_health_service(&self) -> HealthService { + self.inner.health_service.clone() + } + + pub fn get_serving_status(&self) -> grpcio_health::ServingStatus { + self.inner.get_serving_status() + } + + /// Set whether the TiKV server is serving. This controls the state reported + /// by the gRPC `HealthService`. + pub fn set_is_serving(&self, is_serving: bool) { + self.inner.set_is_serving(is_serving); + } + + pub fn shutdown(&self) { + self.inner.shutdown(); + } +} + +// Make clippy happy. +impl Default for HealthControllerInner { + fn default() -> Self { + Self::new() + } +} + +impl Default for HealthController { + fn default() -> Self { + Self::new() + } +} + +/// An alternative util to simple RwLock. It allows writing not blocking +/// reading, at the expense of linearizability between reads and writes. +/// +/// This is suitable for use cases where atomic storing and loading is expected, +/// but atomic variables is not applicable due to the inner type larger than 8 +/// bytes. When writing is in progress, readings will get the previous value. +/// Writes will block each other, and fast and frequent writes may also block or +/// be blocked by slow reads. +struct RollingRetriever { + content: [RwLock; 2], + current_index: AtomicUsize, + write_mutex: Mutex<()>, +} + +impl RollingRetriever { + pub fn new() -> Self { + Self { + content: [RwLock::new(T::default()), RwLock::new(T::default())], + current_index: AtomicUsize::new(0), + write_mutex: Mutex::new(()), + } + } +} + +impl RollingRetriever { + #[inline] + pub fn put(&self, new_value: T) { + self.put_with(|| new_value) + } + + fn put_with(&self, f: impl FnOnce() -> T) { + let _write_guard = self.write_mutex.lock(); + // Update the item that is not the currently active one + let index = self.current_index.load(Ordering::Acquire) ^ 1; + + let mut data_guard = self.content[index].write(); + *data_guard = f(); + + drop(data_guard); + self.current_index.store(index, Ordering::Release); + } + + pub fn read(&self, f: impl FnOnce(&T) -> R) -> R { + let index = self.current_index.load(Ordering::Acquire); + let guard = self.content[index].read(); + f(guard.deref()) + } +} + +impl RollingRetriever { + pub fn get_cloned(&self) -> T { + self.read(|r| r.clone()) + } +} + +#[cfg(test)] +mod tests { + use std::{ + sync::mpsc::{sync_channel, RecvTimeoutError}, + time::Duration, + }; + + use super::*; + + #[test] + fn test_health_controller_update_service_status() { + let h = HealthController::new(); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::NotServing + ); + + h.set_is_serving(true); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::Serving + ); + + h.inner.add_unhealthy_module("A"); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::ServiceUnknown + ); + h.inner.add_unhealthy_module("B"); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::ServiceUnknown + ); + + h.inner.remove_unhealthy_module("A"); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::ServiceUnknown + ); + h.inner.remove_unhealthy_module("B"); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::Serving + ); + + h.set_is_serving(false); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::NotServing + ); + h.inner.add_unhealthy_module("A"); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::NotServing + ); + + h.set_is_serving(true); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::ServiceUnknown + ); + + h.inner.remove_unhealthy_module("A"); + assert_eq!( + h.get_serving_status(), + grpcio_health::ServingStatus::Serving + ); + } + + #[test] + fn test_rolling_retriever() { + let r = Arc::new(RollingRetriever::::new()); + assert_eq!(r.get_cloned(), 0); + + for i in 1..=10 { + r.put(i); + assert_eq!(r.get_cloned(), i); + } + + // Writing doesn't block reading. + let r1 = r.clone(); + let (write_continue_tx, rx) = sync_channel(0); + let write_handle = std::thread::spawn(move || { + r1.put_with(move || { + rx.recv().unwrap(); + 11 + }) + }); + for _ in 1..10 { + std::thread::sleep(Duration::from_millis(5)); + assert_eq!(r.get_cloned(), 10) + } + write_continue_tx.send(()).unwrap(); + write_handle.join().unwrap(); + assert_eq!(r.get_cloned(), 11); + + // Writing block each other. + let r1 = r.clone(); + let (write1_tx, rx1) = sync_channel(0); + let write1_handle = std::thread::spawn(move || { + r1.put_with(move || { + // Receive once for notifying lock acquired. + rx1.recv().unwrap(); + // Receive again to be notified ready to continue. + rx1.recv().unwrap(); + 12 + }) + }); + write1_tx.send(()).unwrap(); + let r1 = r.clone(); + let (write2_tx, rx2) = sync_channel(0); + let write2_handle = std::thread::spawn(move || { + r1.put_with(move || { + write2_tx.send(()).unwrap(); + 13 + }) + }); + // Write 2 cannot continue as blocked by write 1. + assert_eq!( + rx2.recv_timeout(Duration::from_millis(50)).unwrap_err(), + RecvTimeoutError::Timeout + ); + // Continue write1 + write1_tx.send(()).unwrap(); + write1_handle.join().unwrap(); + assert_eq!(r.get_cloned(), 12); + // Continue write2 + rx2.recv().unwrap(); + write2_handle.join().unwrap(); + assert_eq!(r.get_cloned(), 13); + } +} diff --git a/components/health_controller/src/reporters.rs b/components/health_controller/src/reporters.rs new file mode 100644 index 00000000000..c80bb96057c --- /dev/null +++ b/components/health_controller/src/reporters.rs @@ -0,0 +1,244 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; + +use kvproto::pdpb; +use pdpb::SlowTrend as SlowTrendPb; +use prometheus::IntGauge; + +use crate::{ + slow_score::{SlowScore, SlowScoreTickResult}, + trend::{RequestPerSecRecorder, Trend}, + HealthController, HealthControllerInner, RaftstoreDuration, +}; + +/// The parameters for building a [`RaftstoreReporter`]. +/// +/// For slow trend related parameters (unsensitive_cause, unsensitive_result, +/// cause_*, result_*), please refer to : [`SlowTrendStatistics::new`] and +/// [`Trend`]. +pub struct RaftstoreReporterConfig { + /// The interval to tick the [`RaftstoreReporter`]. + /// + /// The `RaftstoreReporter` doesn't tick by itself, the caller (the PD + /// worker) is expected to tick it. But the interval is necessary in + /// some internal calculations. + pub inspect_interval: Duration, + + pub unsensitive_cause: f64, + pub unsensitive_result: f64, + pub net_io_factor: f64, + + // Metrics about slow trend. + pub cause_spike_filter_value_gauge: IntGauge, + pub cause_spike_filter_count_gauge: IntGauge, + pub cause_l1_gap_gauges: IntGauge, + pub cause_l2_gap_gauges: IntGauge, + pub result_spike_filter_value_gauge: IntGauge, + pub result_spike_filter_count_gauge: IntGauge, + pub result_l1_gap_gauges: IntGauge, + pub result_l2_gap_gauges: IntGauge, +} + +pub struct RaftstoreReporter { + health_controller_inner: Arc, + slow_score: SlowScore, + slow_trend: SlowTrendStatistics, + is_healthy: bool, +} + +impl RaftstoreReporter { + const MODULE_NAME: &'static str = "raftstore"; + + pub fn new(health_controller: &HealthController, cfg: RaftstoreReporterConfig) -> Self { + RaftstoreReporter { + health_controller_inner: health_controller.inner.clone(), + slow_score: SlowScore::new(cfg.inspect_interval), + slow_trend: SlowTrendStatistics::new(cfg), + is_healthy: true, + } + } + + pub fn get_tick_interval(&self) -> Duration { + self.slow_score.get_inspect_interval() + } + + pub fn get_slow_score(&self) -> f64 { + self.slow_score.get() + } + + pub fn get_slow_trend(&self) -> &SlowTrendStatistics { + &self.slow_trend + } + + pub fn record_raftstore_duration( + &mut self, + id: u64, + duration: RaftstoreDuration, + store_not_busy: bool, + ) { + // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. + self.slow_score + .record(id, duration.delays_on_disk_io(false), store_not_busy); + self.slow_trend.record(duration); + + // Publish slow score to health controller + self.health_controller_inner + .update_raftstore_slow_score(self.slow_score.get()); + } + + fn is_healthy(&self) -> bool { + self.is_healthy + } + + fn set_is_healthy(&mut self, is_healthy: bool) { + if is_healthy == self.is_healthy { + return; + } + + self.is_healthy = is_healthy; + if is_healthy { + self.health_controller_inner + .remove_unhealthy_module(Self::MODULE_NAME); + } else { + self.health_controller_inner + .add_unhealthy_module(Self::MODULE_NAME); + } + } + + pub fn tick(&mut self, store_maybe_busy: bool) -> SlowScoreTickResult { + // Record a fairly great value when timeout + self.slow_trend.slow_cause.record(500_000, Instant::now()); + + // The health status is recovered to serving as long as any tick + // does not timeout. + if !self.is_healthy() && self.slow_score.last_tick_finished() { + self.set_is_healthy(true); + } + if !self.slow_score.last_tick_finished() { + // If the last tick is not finished, it means that the current store might + // be busy on handling requests or delayed on I/O operations. And only when + // the current store is not busy, it should record the last_tick as a timeout. + if !store_maybe_busy { + self.slow_score.record_timeout(); + } + } + + let slow_score_tick_result = self.slow_score.tick(); + if slow_score_tick_result.updated_score.is_some() && !slow_score_tick_result.has_new_record + { + self.set_is_healthy(false); + } + + // Publish the slow score to health controller + if let Some(slow_score_value) = slow_score_tick_result.updated_score { + self.health_controller_inner + .update_raftstore_slow_score(slow_score_value); + } + + slow_score_tick_result + } + + pub fn update_slow_trend( + &mut self, + observed_request_count: u64, + now: Instant, + ) -> (Option, SlowTrendPb) { + let requests_per_sec = self + .slow_trend + .slow_result_recorder + .record_and_get_current_rps(observed_request_count, now); + + let slow_trend_cause_rate = self.slow_trend.slow_cause.increasing_rate(); + let mut slow_trend_pb = SlowTrendPb::default(); + slow_trend_pb.set_cause_rate(slow_trend_cause_rate); + slow_trend_pb.set_cause_value(self.slow_trend.slow_cause.l0_avg()); + if let Some(requests_per_sec) = requests_per_sec { + self.slow_trend + .slow_result + .record(requests_per_sec as u64, Instant::now()); + slow_trend_pb.set_result_value(self.slow_trend.slow_result.l0_avg()); + let slow_trend_result_rate = self.slow_trend.slow_result.increasing_rate(); + slow_trend_pb.set_result_rate(slow_trend_result_rate); + } + + // Publish the result to health controller. + self.health_controller_inner + .update_raftstore_slow_trend(slow_trend_pb.clone()); + + (requests_per_sec, slow_trend_pb) + } +} + +pub struct SlowTrendStatistics { + net_io_factor: f64, + /// Detector to detect NetIo&DiskIo jitters. + pub slow_cause: Trend, + /// Reactor as an assistant detector to detect the QPS jitters. + pub slow_result: Trend, + pub slow_result_recorder: RequestPerSecRecorder, +} + +impl SlowTrendStatistics { + #[inline] + pub fn new(config: RaftstoreReporterConfig) -> Self { + Self { + slow_cause: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + config.cause_spike_filter_value_gauge, + config.cause_spike_filter_count_gauge, + Duration::from_secs(180), + Duration::from_secs(30), + Duration::from_secs(120), + Duration::from_secs(600), + 1, + tikv_util::time::duration_to_us(Duration::from_micros(500)), + config.cause_l1_gap_gauges, + config.cause_l2_gap_gauges, + config.unsensitive_cause, + ), + slow_result: Trend::new( + // Disable SpikeFilter for now + Duration::from_secs(0), + config.result_spike_filter_value_gauge, + config.result_spike_filter_count_gauge, + Duration::from_secs(120), + Duration::from_secs(15), + Duration::from_secs(60), + Duration::from_secs(300), + 1, + 2000, + config.result_l1_gap_gauges, + config.result_l2_gap_gauges, + config.unsensitive_result, + ), + slow_result_recorder: RequestPerSecRecorder::new(), + net_io_factor: config.net_io_factor, /* FIXME: add extra parameter in + * Config to control it. */ + } + } + + #[inline] + pub fn record(&mut self, duration: RaftstoreDuration) { + // TODO: It's more appropriate to divide the factor into `Disk IO factor` and + // `Net IO factor`. + // Currently, when `network ratio == 1`, it summarizes all factors by `sum` + // simplily, approved valid to common cases when there exists IO jitters on + // Network or Disk. + let latency = || -> u64 { + if self.net_io_factor as u64 >= 1 { + return tikv_util::time::duration_to_us(duration.sum()); + } + let disk_io_latency = + tikv_util::time::duration_to_us(duration.delays_on_disk_io(true)) as f64; + let network_io_latency = + tikv_util::time::duration_to_us(duration.delays_on_net_io()) as f64; + (disk_io_latency + network_io_latency * self.net_io_factor) as u64 + }(); + self.slow_cause.record(latency, Instant::now()); + } +} diff --git a/components/health_controller/src/slow_score.rs b/components/health_controller/src/slow_score.rs new file mode 100644 index 00000000000..12e043b5668 --- /dev/null +++ b/components/health_controller/src/slow_score.rs @@ -0,0 +1,210 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{ + cmp, + time::{Duration, Instant}, +}; + +use ordered_float::OrderedFloat; + +// Slow score is a value that represents the speed of a store and ranges in [1, +// 100]. It is maintained in the AIMD way. +// If there are some inspecting requests timeout during a round, by default the +// score will be increased at most 1x when above 10% inspecting requests +// timeout. If there is not any timeout inspecting requests, the score will go +// back to 1 in at least 5min. +pub struct SlowScore { + value: OrderedFloat, + last_record_time: Instant, + last_update_time: Instant, + + timeout_requests: usize, + total_requests: usize, + + inspect_interval: Duration, + // The maximal tolerated timeout ratio. + ratio_thresh: OrderedFloat, + // Minimal time that the score could be decreased from 100 to 1. + min_ttr: Duration, + + // After how many ticks the value need to be updated. + round_ticks: u64, + // Identify every ticks. + last_tick_id: u64, + // If the last tick does not finished, it would be recorded as a timeout. + last_tick_finished: bool, +} + +impl SlowScore { + pub fn new(inspect_interval: Duration) -> SlowScore { + SlowScore { + value: OrderedFloat(1.0), + + timeout_requests: 0, + total_requests: 0, + + inspect_interval, + ratio_thresh: OrderedFloat(0.1), + min_ttr: Duration::from_secs(5 * 60), + last_record_time: Instant::now(), + last_update_time: Instant::now(), + round_ticks: 30, + last_tick_id: 0, + last_tick_finished: true, + } + } + + pub fn record(&mut self, id: u64, duration: Duration, not_busy: bool) { + self.last_record_time = Instant::now(); + if id != self.last_tick_id { + return; + } + self.last_tick_finished = true; + self.total_requests += 1; + if not_busy && duration >= self.inspect_interval { + self.timeout_requests += 1; + } + } + + pub fn record_timeout(&mut self) { + self.last_tick_finished = true; + self.total_requests += 1; + self.timeout_requests += 1; + } + + pub fn update(&mut self) -> f64 { + let elapsed = self.last_update_time.elapsed(); + self.update_impl(elapsed).into() + } + + pub fn get(&self) -> f64 { + self.value.into() + } + + // Update the score in a AIMD way. + fn update_impl(&mut self, elapsed: Duration) -> OrderedFloat { + if self.timeout_requests == 0 { + let desc = 100.0 * (elapsed.as_millis() as f64 / self.min_ttr.as_millis() as f64); + if OrderedFloat(desc) > self.value - OrderedFloat(1.0) { + self.value = 1.0.into(); + } else { + self.value -= desc; + } + } else { + let timeout_ratio = self.timeout_requests as f64 / self.total_requests as f64; + let near_thresh = + cmp::min(OrderedFloat(timeout_ratio), self.ratio_thresh) / self.ratio_thresh; + let value = self.value * (OrderedFloat(1.0) + near_thresh); + self.value = cmp::min(OrderedFloat(100.0), value); + } + + self.total_requests = 0; + self.timeout_requests = 0; + self.last_update_time = Instant::now(); + self.value + } + + pub fn should_force_report_slow_store(&self) -> bool { + self.value >= OrderedFloat(100.0) && (self.last_tick_id % self.round_ticks == 0) + } + + pub fn get_inspect_interval(&self) -> Duration { + self.inspect_interval + } + + pub fn last_tick_finished(&self) -> bool { + self.last_tick_finished + } + + pub fn tick(&mut self) -> SlowScoreTickResult { + let should_force_report_slow_store = self.should_force_report_slow_store(); + + let id = self.last_tick_id + 1; + self.last_tick_id += 1; + self.last_tick_finished = false; + + let (updated_score, has_new_record) = if self.last_tick_id % self.round_ticks == 0 { + // `last_update_time` is refreshed every round. If no update happens in a whole + // round, we set the status to unknown. + let has_new_record = self.last_record_time >= self.last_update_time; + + let slow_score = self.update(); + (Some(slow_score), has_new_record) + } else { + (None, false) + }; + + SlowScoreTickResult { + tick_id: id, + updated_score, + has_new_record, + should_force_report_slow_store, + } + } +} + +pub struct SlowScoreTickResult { + pub tick_id: u64, + // None if skipped in this tick + pub updated_score: Option, + pub has_new_record: bool, + pub should_force_report_slow_store: bool, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_slow_score() { + let mut slow_score = SlowScore::new(Duration::from_millis(500)); + slow_score.timeout_requests = 5; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(1.5), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 10; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(3.0), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 20; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(6.0), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 100; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(12.0), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 11; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(24.0), + slow_score.update_impl(Duration::from_secs(10)) + ); + + slow_score.timeout_requests = 0; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(19.0), + slow_score.update_impl(Duration::from_secs(15)) + ); + + slow_score.timeout_requests = 0; + slow_score.total_requests = 100; + assert_eq!( + OrderedFloat(1.0), + slow_score.update_impl(Duration::from_secs(57)) + ); + } +} diff --git a/components/tikv_util/src/trend.rs b/components/health_controller/src/trend.rs similarity index 99% rename from components/tikv_util/src/trend.rs rename to components/health_controller/src/trend.rs index 8ae3bb3d5aa..605ab263cdb 100644 --- a/components/tikv_util/src/trend.rs +++ b/components/health_controller/src/trend.rs @@ -6,6 +6,7 @@ use std::{ }; use prometheus::IntGauge; +use tikv_util::info; pub struct SampleValue { value: u64, @@ -660,7 +661,7 @@ impl CurvesComposer { pub struct RequestPerSecRecorder { previous_ts: Instant, - inited: bool, + initialized: bool, } impl Default for RequestPerSecRecorder { @@ -673,7 +674,7 @@ impl RequestPerSecRecorder { pub fn new() -> Self { Self { previous_ts: Instant::now(), - inited: false, + initialized: false, } } @@ -683,12 +684,12 @@ impl RequestPerSecRecorder { observed_request_count: u64, now: Instant, ) -> Option { - if !self.inited { - self.inited = true; + if !self.initialized { + self.initialized = true; self.previous_ts = now; None } else { - self.inited = true; + self.initialized = true; let secs = now.saturating_duration_since(self.previous_ts).as_secs(); self.previous_ts = now; if secs == 0 { diff --git a/components/health_controller/src/types.rs b/components/health_controller/src/types.rs new file mode 100644 index 00000000000..5cbf5490511 --- /dev/null +++ b/components/health_controller/src/types.rs @@ -0,0 +1,107 @@ +// Copyright 2024 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{fmt::Debug, u64}; + +/// Represent the duration of all stages of raftstore recorded by one +/// inspecting. +#[derive(Default, Debug)] +pub struct RaftstoreDuration { + pub store_wait_duration: Option, + pub store_process_duration: Option, + pub store_write_duration: Option, + pub store_commit_duration: Option, + pub apply_wait_duration: Option, + pub apply_process_duration: Option, +} + +impl RaftstoreDuration { + #[inline] + pub fn sum(&self) -> std::time::Duration { + self.delays_on_disk_io(true) + self.delays_on_net_io() + } + + #[inline] + /// Returns the delayed duration on Disk I/O. + pub fn delays_on_disk_io(&self, include_wait_duration: bool) -> std::time::Duration { + let duration = self.store_process_duration.unwrap_or_default() + + self.store_write_duration.unwrap_or_default() + + self.apply_process_duration.unwrap_or_default(); + if include_wait_duration { + duration + + self.store_wait_duration.unwrap_or_default() + + self.apply_wait_duration.unwrap_or_default() + } else { + duration + } + } + + #[inline] + /// Returns the delayed duration on Network I/O. + /// + /// Normally, it can be reflected by the duraiton on + /// `store_commit_duraiton`. + pub fn delays_on_net_io(&self) -> std::time::Duration { + // The `store_commit_duration` serves as an indicator for latency + // during the duration of transferring Raft logs to peers and appending + // logs. In most scenarios, instances of latency fluctuations in the + // network are reflected by this duration. Hence, it is selected as a + // representative of network latency. + self.store_commit_duration.unwrap_or_default() + } +} + +/// Used to inspect the latency of all stages of raftstore. +pub struct LatencyInspector { + id: u64, + duration: RaftstoreDuration, + cb: Box, +} + +impl Debug for LatencyInspector { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + fmt, + "LatencyInspector: id {} duration: {:?}", + self.id, self.duration + ) + } +} + +impl LatencyInspector { + pub fn new(id: u64, cb: Box) -> Self { + Self { + id, + cb, + duration: RaftstoreDuration::default(), + } + } + + pub fn record_store_wait(&mut self, duration: std::time::Duration) { + self.duration.store_wait_duration = Some(duration); + } + + pub fn record_store_process(&mut self, duration: std::time::Duration) { + self.duration.store_process_duration = Some(duration); + } + + pub fn record_store_write(&mut self, duration: std::time::Duration) { + self.duration.store_write_duration = Some(duration); + } + + pub fn record_store_commit(&mut self, duration: std::time::Duration) { + self.duration.store_commit_duration = Some(duration); + } + + pub fn record_apply_wait(&mut self, duration: std::time::Duration) { + self.duration.apply_wait_duration = Some(duration); + } + + pub fn record_apply_process(&mut self, duration: std::time::Duration) { + self.duration.apply_process_duration = Some(duration); + } + + /// Call the callback. + pub fn finish(self) { + (self.cb)(self.id, self.duration); + } +} diff --git a/components/raftstore-v2/Cargo.toml b/components/raftstore-v2/Cargo.toml index 86c93e02e57..ef0f2cfbee5 100644 --- a/components/raftstore-v2/Cargo.toml +++ b/components/raftstore-v2/Cargo.toml @@ -39,6 +39,7 @@ fail = "0.5" file_system = { workspace = true } fs2 = "0.4" futures = { version = "0.3", features = ["compat"] } +health_controller = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } log_wrappers = { workspace = true } diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 68d5855a437..056cd122e67 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -21,6 +21,7 @@ use encryption_export::DataKeyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use file_system::{set_io_type, IoType, WithIoType}; use futures::compat::Future01CompatExt; +use health_controller::types::LatencyInspector; use kvproto::{disk_usage::DiskUsage, raft_serverpb::RaftMessage}; use pd_client::PdClient; use raft::{StateRole, INVALID_ID}; @@ -32,7 +33,6 @@ use raftstore::{ GlobalStoreStat, LocalStoreStat, }, local_metrics::RaftMetrics, - util::LatencyInspector, AutoSplitController, Config, ReadRunner, ReadTask, RefreshConfigTask, SplitCheckRunner, SplitCheckTask, StoreWriters, StoreWritersContext, TabletSnapManager, Transport, WriteRouterContext, WriteSenders, WriterContoller, diff --git a/components/raftstore-v2/src/operation/life.rs b/components/raftstore-v2/src/operation/life.rs index 864ac0f234c..6dd0589b27c 100644 --- a/components/raftstore-v2/src/operation/life.rs +++ b/components/raftstore-v2/src/operation/life.rs @@ -31,6 +31,7 @@ use std::{cmp, collections::HashSet, mem}; use batch_system::BasicMailbox; use crossbeam::channel::{SendError, TrySendError}; use engine_traits::{KvEngine, RaftEngine, RaftLogBatch}; +use health_controller::types; use kvproto::{ kvrpcpb::DiskFullOpt, metapb::{self, PeerRole, Region}, @@ -574,7 +575,7 @@ impl Store { &self, ctx: &mut StoreContext, start_ts: Instant, - mut inspector: util::LatencyInspector, + mut inspector: types::LatencyInspector, ) where EK: KvEngine, ER: RaftEngine, diff --git a/components/raftstore-v2/src/router/message.rs b/components/raftstore-v2/src/router/message.rs index 59d1edd8198..b66c84d9740 100644 --- a/components/raftstore-v2/src/router/message.rs +++ b/components/raftstore-v2/src/router/message.rs @@ -4,6 +4,7 @@ use std::sync::{mpsc::SyncSender, Arc}; use collections::HashSet; +use health_controller::types::LatencyInspector; use kvproto::{ import_sstpb::SstMeta, metapb, @@ -14,9 +15,9 @@ use kvproto::{ }; use raftstore::store::{ fsm::ChangeObserver, metrics::RaftEventDurationType, simple_write::SimpleWriteBinary, - util::LatencyInspector, FetchedLogs, GenSnapRes, RaftCmdExtraOpts, TabletSnapKey, - UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, - UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, + FetchedLogs, GenSnapRes, RaftCmdExtraOpts, TabletSnapKey, UnsafeRecoveryExecutePlanSyncer, + UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, + UnsafeRecoveryWaitApplySyncer, }; use resource_control::ResourceMetered; use tikv_util::time::Instant; diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 7e07d26e61f..3ae31083d9f 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -9,14 +9,13 @@ use causal_ts::CausalTsProviderImpl; use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; +use health_controller::types::{LatencyInspector, RaftstoreDuration}; use kvproto::{metapb, pdpb}; use pd_client::{BucketStat, PdClient}; use raftstore::store::{ - metrics::STORE_INSPECT_DURATION_HISTOGRAM, - util::{KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, - AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, SplitInfo, - StoreStatsReporter, TabletSnapManager, TxnExt, WriteStats, - NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + metrics::STORE_INSPECT_DURATION_HISTOGRAM, util::KeysInfoFormatter, AutoSplitController, + Config, FlowStatsReporter, PdStatsMonitor, ReadStats, SplitInfo, StoreStatsReporter, + TabletSnapManager, TxnExt, WriteStats, NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, }; use resource_metering::{Collector, CollectorRegHandle, RawRecords}; use service::service_manager::GrpcServiceManager; diff --git a/components/raftstore-v2/src/worker/pd/slowness.rs b/components/raftstore-v2/src/worker/pd/slowness.rs index a715d06ae83..4f2aee6102e 100644 --- a/components/raftstore-v2/src/worker/pd/slowness.rs +++ b/components/raftstore-v2/src/worker/pd/slowness.rs @@ -4,10 +4,13 @@ use std::time::{Duration, Instant}; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; +use health_controller::{ + trend::{RequestPerSecRecorder, Trend}, + types::RaftstoreDuration, +}; use kvproto::pdpb; use pd_client::PdClient; -use raftstore::store::{metrics::*, util::RaftstoreDuration, Config}; -use tikv_util::trend::{RequestPerSecRecorder, Trend}; +use raftstore::store::{metrics::*, Config}; use super::Runner; pub struct SlownessStatistics { diff --git a/components/raftstore-v2/src/worker/pd/store.rs b/components/raftstore-v2/src/worker/pd/store.rs index b3fd3245be6..926ad307cf0 100644 --- a/components/raftstore-v2/src/worker/pd/store.rs +++ b/components/raftstore-v2/src/worker/pd/store.rs @@ -5,6 +5,7 @@ use std::{cmp, sync::Arc}; use collections::{HashMap, HashSet}; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; +use health_controller::types::LatencyInspector; use kvproto::pdpb; use pd_client::{ metrics::{ @@ -15,8 +16,8 @@ use pd_client::{ }; use prometheus::local::LocalHistogram; use raftstore::store::{ - metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, util::LatencyInspector, - UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, + metrics::STORE_SNAPSHOT_TRAFFIC_GAUGE_VEC, UnsafeRecoveryExecutePlanSyncer, + UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, }; use slog::{error, info, warn}; use tikv_util::{ diff --git a/components/raftstore/Cargo.toml b/components/raftstore/Cargo.toml index 115b06b347b..9e69afa9c0b 100644 --- a/components/raftstore/Cargo.toml +++ b/components/raftstore/Cargo.toml @@ -50,6 +50,7 @@ futures = "0.3" futures-util = { version = "0.3.1", default-features = false, features = ["io"] } getset = "0.1" grpcio-health = { workspace = true } +health_controller = { workspace = true } into_other = { workspace = true } itertools = "0.10" keys = { workspace = true } diff --git a/components/raftstore/src/store/async_io/write.rs b/components/raftstore/src/store/async_io/write.rs index 8a63380213a..1fa9b7ce950 100644 --- a/components/raftstore/src/store/async_io/write.rs +++ b/components/raftstore/src/store/async_io/write.rs @@ -21,6 +21,7 @@ use engine_traits::{ use error_code::ErrorCodeExt; use fail::fail_point; use file_system::{set_io_type, IoType}; +use health_controller::types::LatencyInspector; use kvproto::{ metapb::RegionEpoch, raft_serverpb::{RaftLocalState, RaftMessage}, @@ -50,9 +51,7 @@ use crate::{ local_metrics::{RaftSendMessageMetrics, StoreWriteMetrics, TimeTracker}, metrics::*, transport::Transport, - util, - util::LatencyInspector, - PeerMsg, + util, PeerMsg, }, Result, }; diff --git a/components/raftstore/src/store/fsm/apply.rs b/components/raftstore/src/store/fsm/apply.rs index 911d1fb23a7..1a80e5300cf 100644 --- a/components/raftstore/src/store/fsm/apply.rs +++ b/components/raftstore/src/store/fsm/apply.rs @@ -34,6 +34,7 @@ use engine_traits::{ WriteOptions, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; use fail::fail_point; +use health_controller::types::LatencyInspector; use kvproto::{ import_sstpb::SstMeta, kvrpcpb::ExtraOp as TxnExtraOp, @@ -92,7 +93,7 @@ use crate::{ peer_storage::{write_initial_apply_state, write_peer_state}, util::{ self, admin_cmd_epoch_lookup, check_flashback_state, check_req_region_epoch, - compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, LatencyInspector, + compare_region_epoch, ChangePeerI, ConfChangeKind, KeysInfoFormatter, }, Config, RegionSnapshot, RegionTask, WriteCallback, }, diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index ab468adfd95..d10340b041d 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -33,7 +33,7 @@ use engine_traits::{ use fail::fail_point; use file_system::{IoType, WithIoType}; use futures::{compat::Future01CompatExt, FutureExt}; -use grpcio_health::HealthService; +use health_controller::{types::LatencyInspector, HealthController}; use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key}; use kvproto::{ metapb::{self, Region, RegionEpoch}, @@ -603,7 +603,7 @@ where pub store_disk_usages: HashMap, pub write_senders: WriteSenders, pub sync_write_worker: Option, T>>, - pub pending_latency_inspect: Vec, + pub pending_latency_inspect: Vec, pub safe_point: Arc, @@ -1631,7 +1631,7 @@ impl RaftBatchSystem { global_replication_state: Arc>, concurrency_manager: ConcurrencyManager, collector_reg_handle: CollectorRegHandle, - health_service: Option, + health_controller: HealthController, causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_mgr: GrpcServiceManager, safe_point: Arc, @@ -1765,7 +1765,7 @@ impl RaftBatchSystem { mgr, pd_client, collector_reg_handle, - health_service, + health_controller, causal_ts_provider, snap_generator_pool, grpc_service_mgr, @@ -1783,7 +1783,7 @@ impl RaftBatchSystem { snap_mgr: SnapManager, pd_client: Arc, collector_reg_handle: CollectorRegHandle, - health_service: Option, + health_controller: HealthController, causal_ts_provider: Option>, // used for rawkv apiv2 snap_generator_pool: FuturePool, grpc_service_mgr: GrpcServiceManager, @@ -1874,7 +1874,7 @@ impl RaftBatchSystem { snap_mgr, workers.pd_worker.remote(), collector_reg_handle, - health_service, + health_controller, coprocessor_host, causal_ts_provider, grpc_service_mgr, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 0e52edc5012..fa0d89a82a9 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -8,6 +8,7 @@ use std::{borrow::Cow, fmt}; use collections::HashSet; use engine_traits::{CompactedEvent, KvEngine, Snapshot}; use futures::channel::mpsc::UnboundedSender; +use health_controller::types::LatencyInspector; use kvproto::{ brpb::CheckAdminResponse, kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp}, @@ -37,7 +38,7 @@ use crate::store::{ UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryFillOutReportSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryWaitApplySyncer, }, - util::{KeysInfoFormatter, LatencyInspector}, + util::KeysInfoFormatter, worker::{Bucket, BucketRange}, SnapKey, }; diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index f0d76a10deb..01f27b12ac6 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1799,110 +1799,6 @@ impl RegionReadProgressCore { } } -/// Represent the duration of all stages of raftstore recorded by one -/// inspecting. -#[derive(Default, Debug)] -pub struct RaftstoreDuration { - pub store_wait_duration: Option, - pub store_process_duration: Option, - pub store_write_duration: Option, - pub store_commit_duration: Option, - pub apply_wait_duration: Option, - pub apply_process_duration: Option, -} - -impl RaftstoreDuration { - #[inline] - pub fn sum(&self) -> std::time::Duration { - self.delays_on_disk_io(true) + self.delays_on_net_io() - } - - #[inline] - /// Returns the delayed duration on Disk I/O. - pub fn delays_on_disk_io(&self, include_wait_duration: bool) -> std::time::Duration { - let duration = self.store_process_duration.unwrap_or_default() - + self.store_write_duration.unwrap_or_default() - + self.apply_process_duration.unwrap_or_default(); - if include_wait_duration { - duration - + self.store_wait_duration.unwrap_or_default() - + self.apply_wait_duration.unwrap_or_default() - } else { - duration - } - } - - #[inline] - /// Returns the delayed duration on Network I/O. - /// - /// Normally, it can be reflected by the duraiton on - /// `store_commit_duraiton`. - pub fn delays_on_net_io(&self) -> std::time::Duration { - // The `store_commit_duration` serves as an indicator for latency - // during the duration of transferring Raft logs to peers and appending - // logs. In most scenarios, instances of latency fluctuations in the - // network are reflected by this duration. Hence, it is selected as a - // representative of network latency. - self.store_commit_duration.unwrap_or_default() - } -} - -/// Used to inspect the latency of all stages of raftstore. -pub struct LatencyInspector { - id: u64, - duration: RaftstoreDuration, - cb: Box, -} - -impl Debug for LatencyInspector { - fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - fmt, - "LatencyInspector: id {} duration: {:?}", - self.id, self.duration - ) - } -} - -impl LatencyInspector { - pub fn new(id: u64, cb: Box) -> Self { - Self { - id, - cb, - duration: RaftstoreDuration::default(), - } - } - - pub fn record_store_wait(&mut self, duration: std::time::Duration) { - self.duration.store_wait_duration = Some(duration); - } - - pub fn record_store_process(&mut self, duration: std::time::Duration) { - self.duration.store_process_duration = Some(duration); - } - - pub fn record_store_write(&mut self, duration: std::time::Duration) { - self.duration.store_write_duration = Some(duration); - } - - pub fn record_store_commit(&mut self, duration: std::time::Duration) { - self.duration.store_commit_duration = Some(duration); - } - - pub fn record_apply_wait(&mut self, duration: std::time::Duration) { - self.duration.apply_wait_duration = Some(duration); - } - - pub fn record_apply_process(&mut self, duration: std::time::Duration) { - self.duration.apply_process_duration = Some(duration); - } - - /// Call the callback. - pub fn finish(self) { - (self.cb)(self.id, self.duration); - } -} - pub fn validate_split_region( region_id: u64, peer_id: u64, diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index 152dc7b3ef6..26d3ab5c279 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -20,7 +20,11 @@ use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine}; use fail::fail_point; use futures::{compat::Future01CompatExt, FutureExt}; -use grpcio_health::{HealthService, ServingStatus}; +use health_controller::{ + reporters::{RaftstoreReporter, RaftstoreReporterConfig}, + types::{LatencyInspector, RaftstoreDuration}, + HealthController, +}; use kvproto::{ kvrpcpb::DiskFullOpt, metapb, pdpb, @@ -31,7 +35,6 @@ use kvproto::{ raft_serverpb::RaftMessage, replication_modepb::{RegionReplicationStatus, StoreDrAutoSyncStatus}, }; -use ordered_float::OrderedFloat; use pd_client::{metrics::*, BucketStat, Error, PdClient, RegionStat}; use prometheus::local::LocalHistogram; use raft::eraftpb::ConfChangeType; @@ -46,7 +49,6 @@ use tikv_util::{ time::{Instant as TiInstant, UnixSecs}, timer::GLOBAL_TIMER_HANDLE, topn::TopN, - trend::{RequestPerSecRecorder, Trend}, warn, worker::{Runnable, RunnableWithTimer, ScheduleError, Scheduler}, }; @@ -62,7 +64,7 @@ use crate::{ unsafe_recovery::{ UnsafeRecoveryExecutePlanSyncer, UnsafeRecoveryForceLeaderSyncer, UnsafeRecoveryHandle, }, - util::{is_epoch_stale, KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, + util::{is_epoch_stale, KeysInfoFormatter}, worker::{ split_controller::{SplitInfo, TOP_N}, AutoSplitController, ReadStats, SplitConfigChange, WriteStats, @@ -815,180 +817,6 @@ fn hotspot_query_num_report_threshold() -> u64 { /// Max limitation of delayed store_heartbeat. const STORE_HEARTBEAT_DELAY_LIMIT: u64 = 5 * 60; -// Slow score is a value that represents the speed of a store and ranges in [1, -// 100]. It is maintained in the AIMD way. -// If there are some inspecting requests timeout during a round, by default the -// score will be increased at most 1x when above 10% inspecting requests -// timeout. If there is not any timeout inspecting requests, the score will go -// back to 1 in at least 5min. -struct SlowScore { - value: OrderedFloat, - last_record_time: Instant, - last_update_time: Instant, - - timeout_requests: usize, - total_requests: usize, - - inspect_interval: Duration, - // The maximal tolerated timeout ratio. - ratio_thresh: OrderedFloat, - // Minimal time that the score could be decreased from 100 to 1. - min_ttr: Duration, - - // After how many ticks the value need to be updated. - round_ticks: u64, - // Identify every ticks. - last_tick_id: u64, - // If the last tick does not finished, it would be recorded as a timeout. - last_tick_finished: bool, -} - -impl SlowScore { - fn new(inspect_interval: Duration) -> SlowScore { - SlowScore { - value: OrderedFloat(1.0), - - timeout_requests: 0, - total_requests: 0, - - inspect_interval, - ratio_thresh: OrderedFloat(0.1), - min_ttr: Duration::from_secs(5 * 60), - last_record_time: Instant::now(), - last_update_time: Instant::now(), - round_ticks: 30, - last_tick_id: 0, - last_tick_finished: true, - } - } - - fn record(&mut self, id: u64, duration: Duration, not_busy: bool) { - self.last_record_time = Instant::now(); - if id != self.last_tick_id { - return; - } - self.last_tick_finished = true; - self.total_requests += 1; - if not_busy && duration >= self.inspect_interval { - self.timeout_requests += 1; - } - } - - fn record_timeout(&mut self) { - self.last_tick_finished = true; - self.total_requests += 1; - self.timeout_requests += 1; - } - - fn update(&mut self) -> f64 { - let elapsed = self.last_update_time.elapsed(); - self.update_impl(elapsed).into() - } - - fn get(&self) -> f64 { - self.value.into() - } - - // Update the score in a AIMD way. - fn update_impl(&mut self, elapsed: Duration) -> OrderedFloat { - if self.timeout_requests == 0 { - let desc = 100.0 * (elapsed.as_millis() as f64 / self.min_ttr.as_millis() as f64); - if OrderedFloat(desc) > self.value - OrderedFloat(1.0) { - self.value = 1.0.into(); - } else { - self.value -= desc; - } - } else { - let timeout_ratio = self.timeout_requests as f64 / self.total_requests as f64; - let near_thresh = - cmp::min(OrderedFloat(timeout_ratio), self.ratio_thresh) / self.ratio_thresh; - let value = self.value * (OrderedFloat(1.0) + near_thresh); - self.value = cmp::min(OrderedFloat(100.0), value); - } - - self.total_requests = 0; - self.timeout_requests = 0; - self.last_update_time = Instant::now(); - self.value - } - - fn should_force_report_slow_store(&self) -> bool { - self.value >= OrderedFloat(100.0) && (self.last_tick_id % self.round_ticks == 0) - } -} - -struct SlowTrendStatistics { - net_io_factor: f64, - /// Detector to detect NetIo&DiskIo jitters. - slow_cause: Trend, - /// Reactor as an assistant detector to detect the QPS jitters. - slow_result: Trend, - slow_result_recorder: RequestPerSecRecorder, -} - -impl SlowTrendStatistics { - #[inline] - fn new(cfg: &Config) -> Self { - Self { - slow_cause: Trend::new( - // Disable SpikeFilter for now - Duration::from_secs(0), - STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), - STORE_SLOW_TREND_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), - Duration::from_secs(180), - Duration::from_secs(30), - Duration::from_secs(120), - Duration::from_secs(600), - 1, - tikv_util::time::duration_to_us(Duration::from_micros(500)), - STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L1"]), - STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC.with_label_values(&["L2"]), - cfg.slow_trend_unsensitive_cause, - ), - slow_result: Trend::new( - // Disable SpikeFilter for now - Duration::from_secs(0), - STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_value"]), - STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC.with_label_values(&["spike_filter_count"]), - Duration::from_secs(120), - Duration::from_secs(15), - Duration::from_secs(60), - Duration::from_secs(300), - 1, - 2000, - STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC - .with_label_values(&["L1"]), - STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC - .with_label_values(&["L2"]), - cfg.slow_trend_unsensitive_result, - ), - slow_result_recorder: RequestPerSecRecorder::new(), - net_io_factor: cfg.slow_trend_network_io_factor, /* FIXME: add extra parameter in - * Config to control it. */ - } - } - - #[inline] - fn record(&mut self, duration: RaftstoreDuration) { - // TODO: It's more appropriate to divide the factor into `Disk IO factor` and - // `Net IO factor`. - // Currently, when `network ratio == 1`, it summarizes all factors by `sum` - // simplily, approved valid to common cases when there exists IO jitters on - // Network or Disk. - let latency = || -> u64 { - if self.net_io_factor as u64 >= 1 { - return tikv_util::time::duration_to_us(duration.sum()); - } - let disk_io_latency = - tikv_util::time::duration_to_us(duration.delays_on_disk_io(true)) as f64; - let network_io_latency = - tikv_util::time::duration_to_us(duration.delays_on_net_io()) as f64; - (disk_io_latency + network_io_latency * self.net_io_factor) as u64 - }(); - self.slow_cause.record(latency, Instant::now()); - } -} - pub struct Runner where EK: KvEngine, @@ -1018,12 +846,10 @@ where concurrency_manager: ConcurrencyManager, snap_mgr: SnapManager, remote: Remote, - slow_score: SlowScore, - slow_trend: SlowTrendStatistics, - // The health status of the store is updated by the slow score mechanism. - health_service: Option, - curr_health_status: ServingStatus, + health_reporter: RaftstoreReporter, + health_controller: HealthController, + coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -1048,7 +874,7 @@ where snap_mgr: SnapManager, remote: Remote, collector_reg_handle: CollectorRegHandle, - health_service: Option, + health_controller: HealthController, coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 grpc_service_manager: GrpcServiceManager, @@ -1066,6 +892,33 @@ where error!("failed to start stats collector, error = {:?}", e); } + let health_reporter_config = RaftstoreReporterConfig { + inspect_interval: cfg.inspect_interval.0, + + unsensitive_cause: cfg.slow_trend_unsensitive_cause, + unsensitive_result: cfg.slow_trend_unsensitive_result, + net_io_factor: cfg.slow_trend_network_io_factor, + + cause_spike_filter_value_gauge: STORE_SLOW_TREND_MISC_GAUGE_VEC + .with_label_values(&["spike_filter_value"]), + cause_spike_filter_count_gauge: STORE_SLOW_TREND_MISC_GAUGE_VEC + .with_label_values(&["spike_filter_count"]), + cause_l1_gap_gauges: STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L1"]), + cause_l2_gap_gauges: STORE_SLOW_TREND_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L2"]), + result_spike_filter_value_gauge: STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC + .with_label_values(&["spike_filter_value"]), + result_spike_filter_count_gauge: STORE_SLOW_TREND_RESULT_MISC_GAUGE_VEC + .with_label_values(&["spike_filter_count"]), + result_l1_gap_gauges: STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L1"]), + result_l2_gap_gauges: STORE_SLOW_TREND_RESULT_MARGIN_ERROR_WINDOW_GAP_GAUGE_VEC + .with_label_values(&["L2"]), + }; + + let health_reporter = RaftstoreReporter::new(&health_controller, health_reporter_config); + Runner { store_id, pd_client, @@ -1082,10 +935,8 @@ where concurrency_manager, snap_mgr, remote, - slow_score: SlowScore::new(cfg.inspect_interval.0), - slow_trend: SlowTrendStatistics::new(cfg), - health_service, - curr_health_status: ServingStatus::Serving, + health_reporter, + health_controller, coprocessor_host, causal_ts_provider, grpc_service_manager, @@ -1360,10 +1211,7 @@ where .store_stat .engine_total_query_num .sub_query_stats(&self.store_stat.engine_last_query_num); - let total_query_num = self - .slow_trend - .slow_result_recorder - .record_and_get_current_rps(res.get_all_query_num(), Instant::now()); + let all_query_num = res.get_all_query_num(); stats.set_query_stats(res.0); stats.set_cpu_usages(self.store_stat.store_cpu_usages.clone().into()); @@ -1395,9 +1243,13 @@ where STORE_SIZE_EVENT_INT_VEC.available.set(available as i64); STORE_SIZE_EVENT_INT_VEC.used.set(used_size as i64); - let slow_score = self.slow_score.get(); + let slow_score = self.health_reporter.get_slow_score(); stats.set_slow_score(slow_score as u64); - self.set_slow_trend_to_store_stats(&mut stats, total_query_num); + let (rps, slow_trend_pb) = self + .health_reporter + .update_slow_trend(all_query_num, Instant::now()); + self.flush_slow_trend_metrics(rps, &slow_trend_pb); + stats.set_slow_trend(slow_trend_pb); stats.set_is_grpc_paused(self.grpc_service_manager.is_paused()); @@ -1484,53 +1336,40 @@ where self.remote.spawn(f); } - fn set_slow_trend_to_store_stats( + fn flush_slow_trend_metrics( &mut self, - stats: &mut pdpb::StoreStats, - total_query_num: Option, + requests_per_sec: Option, + slow_trend_pb: &pdpb::SlowTrend, ) { - let slow_trend_cause_rate = self.slow_trend.slow_cause.increasing_rate(); - STORE_SLOW_TREND_GAUGE.set(slow_trend_cause_rate); - let mut slow_trend = pdpb::SlowTrend::default(); - slow_trend.set_cause_rate(slow_trend_cause_rate); - slow_trend.set_cause_value(self.slow_trend.slow_cause.l0_avg()); - if let Some(total_query_num) = total_query_num { - self.slow_trend - .slow_result - .record(total_query_num as u64, Instant::now()); - slow_trend.set_result_value(self.slow_trend.slow_result.l0_avg()); - let slow_trend_result_rate = self.slow_trend.slow_result.increasing_rate(); - slow_trend.set_result_rate(slow_trend_result_rate); - STORE_SLOW_TREND_RESULT_GAUGE.set(slow_trend_result_rate); - STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(total_query_num); + let slow_trend = self.health_reporter.get_slow_trend(); + // Latest result. + STORE_SLOW_TREND_GAUGE.set(slow_trend_pb.get_cause_rate()); + if let Some(requests_per_sec) = requests_per_sec { + STORE_SLOW_TREND_RESULT_GAUGE.set(slow_trend_pb.get_result_rate()); + STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(requests_per_sec); } else { // Just to mark the invalid range on the graphic STORE_SLOW_TREND_RESULT_VALUE_GAUGE.set(-100.0); } - stats.set_slow_trend(slow_trend); - self.write_slow_trend_metrics(); - } - - fn write_slow_trend_metrics(&mut self) { - STORE_SLOW_TREND_L0_GAUGE.set(self.slow_trend.slow_cause.l0_avg()); - STORE_SLOW_TREND_L1_GAUGE.set(self.slow_trend.slow_cause.l1_avg()); - STORE_SLOW_TREND_L2_GAUGE.set(self.slow_trend.slow_cause.l2_avg()); - STORE_SLOW_TREND_L0_L1_GAUGE.set(self.slow_trend.slow_cause.l0_l1_rate()); - STORE_SLOW_TREND_L1_L2_GAUGE.set(self.slow_trend.slow_cause.l1_l2_rate()); - STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE - .set(self.slow_trend.slow_cause.l1_margin_error_base()); - STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE - .set(self.slow_trend.slow_cause.l2_margin_error_base()); + + // Current internal states. + STORE_SLOW_TREND_L0_GAUGE.set(slow_trend.slow_cause.l0_avg()); + STORE_SLOW_TREND_L1_GAUGE.set(slow_trend.slow_cause.l1_avg()); + STORE_SLOW_TREND_L2_GAUGE.set(slow_trend.slow_cause.l2_avg()); + STORE_SLOW_TREND_L0_L1_GAUGE.set(slow_trend.slow_cause.l0_l1_rate()); + STORE_SLOW_TREND_L1_L2_GAUGE.set(slow_trend.slow_cause.l1_l2_rate()); + STORE_SLOW_TREND_L1_MARGIN_ERROR_GAUGE.set(slow_trend.slow_cause.l1_margin_error_base()); + STORE_SLOW_TREND_L2_MARGIN_ERROR_GAUGE.set(slow_trend.slow_cause.l2_margin_error_base()); // Report results of all slow Trends. - STORE_SLOW_TREND_RESULT_L0_GAUGE.set(self.slow_trend.slow_result.l0_avg()); - STORE_SLOW_TREND_RESULT_L1_GAUGE.set(self.slow_trend.slow_result.l1_avg()); - STORE_SLOW_TREND_RESULT_L2_GAUGE.set(self.slow_trend.slow_result.l2_avg()); - STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(self.slow_trend.slow_result.l0_l1_rate()); - STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(self.slow_trend.slow_result.l1_l2_rate()); + STORE_SLOW_TREND_RESULT_L0_GAUGE.set(slow_trend.slow_result.l0_avg()); + STORE_SLOW_TREND_RESULT_L1_GAUGE.set(slow_trend.slow_result.l1_avg()); + STORE_SLOW_TREND_RESULT_L2_GAUGE.set(slow_trend.slow_result.l2_avg()); + STORE_SLOW_TREND_RESULT_L0_L1_GAUGE.set(slow_trend.slow_result.l0_l1_rate()); + STORE_SLOW_TREND_RESULT_L1_L2_GAUGE.set(slow_trend.slow_result.l1_l2_rate()); STORE_SLOW_TREND_RESULT_L1_MARGIN_ERROR_GAUGE - .set(self.slow_trend.slow_result.l1_margin_error_base()); + .set(slow_trend.slow_result.l1_margin_error_base()); STORE_SLOW_TREND_RESULT_L2_MARGIN_ERROR_GAUGE - .set(self.slow_trend.slow_result.l2_margin_error_base()); + .set(slow_trend.slow_result.l2_margin_error_base()); } fn handle_report_batch_split(&self, regions: Vec) { @@ -1974,13 +1813,6 @@ where .or_insert_with(|| ReportBucket::new(buckets)); } - fn update_health_status(&mut self, status: ServingStatus) { - self.curr_health_status = status; - if let Some(health_service) = &self.health_service { - health_service.set_serving_status("", status); - } - } - /// Force to send a special heartbeat to pd when current store is hung on /// some special circumstances, i.e. disk busy, handler busy and others. fn handle_fake_store_heartbeat(&mut self) { @@ -2024,7 +1856,7 @@ where fn handle_control_grpc_server(&mut self, event: pdpb::ControlGrpcEvent) { info!("forcely control grpc server"; - "curr_health_status" => ?self.curr_health_status, + "curr_health_status" => ?self.health_controller.get_serving_status(), "event" => ?event, ); match event { @@ -2033,7 +1865,7 @@ where warn!("failed to send service event to PAUSE grpc server"; "err" => ?e); } else { - self.update_health_status(ServingStatus::NotServing); + self.health_controller.set_is_serving(false); } } pdpb::ControlGrpcEvent::Resume => { @@ -2041,7 +1873,7 @@ where warn!("failed to send service event to RESUME grpc server"; "err" => ?e); } else { - self.update_health_status(ServingStatus::Serving); + self.health_controller.set_is_serving(true); } } } @@ -2290,13 +2122,11 @@ where } => self.handle_update_max_timestamp(region_id, initial_status, txn_ext), Task::QueryRegionLeader { region_id } => self.handle_query_region_leader(region_id), Task::UpdateSlowScore { id, duration } => { - // Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account. - self.slow_score.record( + self.health_reporter.record_raftstore_duration( id, - duration.delays_on_disk_io(false), + duration, !self.store_stat.maybe_busy(), ); - self.slow_trend.record(duration); } Task::RegionCpuRecords(records) => self.handle_region_cpu_records(records), Task::ReportMinResolvedTs { @@ -2324,48 +2154,23 @@ where T: PdClient + 'static, { fn on_timeout(&mut self) { - // Record a fairly great value when timeout - self.slow_trend.slow_cause.record(500_000, Instant::now()); + let slow_score_tick_result = self.health_reporter.tick(self.store_stat.maybe_busy()); + if let Some(score) = slow_score_tick_result.updated_score { + STORE_SLOW_SCORE_GAUGE.set(score); + } - // The health status is recovered to serving as long as any tick - // does not timeout. - if self.curr_health_status == ServingStatus::ServiceUnknown - && self.slow_score.last_tick_finished + // If the last slow_score already reached abnormal state and was delayed for + // reporting by `store-heartbeat` to PD, we should report it here manually as + // a FAKE `store-heartbeat`. + if slow_score_tick_result.should_force_report_slow_store + && self.is_store_heartbeat_delayed() { - self.update_health_status(ServingStatus::Serving); - } - if !self.slow_score.last_tick_finished { - // If the last tick is not finished, it means that the current store might - // be busy on handling requests or delayed on I/O operations. And only when - // the current store is not busy, it should record the last_tick as a timeout. - if !self.store_stat.maybe_busy() { - self.slow_score.record_timeout(); - } - // If the last slow_score already reached abnormal state and was delayed for - // reporting by `store-heartbeat` to PD, we should report it here manually as - // a FAKE `store-heartbeat`. - if self.slow_score.should_force_report_slow_store() && self.is_store_heartbeat_delayed() - { - self.handle_fake_store_heartbeat(); - } - } - let scheduler = self.scheduler.clone(); - let id = self.slow_score.last_tick_id + 1; - self.slow_score.last_tick_id += 1; - self.slow_score.last_tick_finished = false; - - if self.slow_score.last_tick_id % self.slow_score.round_ticks == 0 { - // `last_update_time` is refreshed every round. If no update happens in a whole - // round, we set the status to unknown. - if self.curr_health_status == ServingStatus::Serving - && self.slow_score.last_record_time < self.slow_score.last_update_time - { - self.update_health_status(ServingStatus::ServiceUnknown); - } - let slow_score = self.slow_score.update(); - STORE_SLOW_SCORE_GAUGE.set(slow_score); + self.handle_fake_store_heartbeat(); } + let id = slow_score_tick_result.tick_id; + + let scheduler = self.scheduler.clone(); let inspector = LatencyInspector::new( id, Box::new(move |id, duration| { @@ -2403,7 +2208,7 @@ where } fn get_interval(&self) -> Duration { - self.slow_score.inspect_interval + self.health_reporter.get_tick_interval() } } @@ -2792,59 +2597,6 @@ mod tests { assert_eq!(store_stats.peer_stats.len(), 3) } - #[test] - fn test_slow_score() { - let mut slow_score = SlowScore::new(Duration::from_millis(500)); - slow_score.timeout_requests = 5; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(1.5), - slow_score.update_impl(Duration::from_secs(10)) - ); - - slow_score.timeout_requests = 10; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(3.0), - slow_score.update_impl(Duration::from_secs(10)) - ); - - slow_score.timeout_requests = 20; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(6.0), - slow_score.update_impl(Duration::from_secs(10)) - ); - - slow_score.timeout_requests = 100; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(12.0), - slow_score.update_impl(Duration::from_secs(10)) - ); - - slow_score.timeout_requests = 11; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(24.0), - slow_score.update_impl(Duration::from_secs(10)) - ); - - slow_score.timeout_requests = 0; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(19.0), - slow_score.update_impl(Duration::from_secs(15)) - ); - - slow_score.timeout_requests = 0; - slow_score.total_requests = 100; - assert_eq!( - OrderedFloat(1.0), - slow_score.update_impl(Duration::from_secs(57)) - ); - } - use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; use metapb::Peer; use resource_metering::{RawRecord, TagInfos}; diff --git a/components/server/Cargo.toml b/components/server/Cargo.toml index c03c42e8f71..64476107adf 100644 --- a/components/server/Cargo.toml +++ b/components/server/Cargo.toml @@ -56,6 +56,7 @@ fs2 = "0.4" futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } +health_controller = { workspace = true } hex = "0.4" hybrid_engine = { workspace = true } keys = { workspace = true } diff --git a/components/server/src/server.rs b/components/server/src/server.rs index 5856563b49e..09f4ac3449a 100644 --- a/components/server/src/server.rs +++ b/components/server/src/server.rs @@ -41,7 +41,7 @@ use engine_traits::{ use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; -use grpcio_health::HealthService; +use health_controller::HealthController; use hybrid_engine::HybridEngine; use kvproto::{ brpb::create_backup, cdcpb::create_change_data, deadlock::create_deadlock, @@ -815,7 +815,7 @@ where ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); let raft_store = Arc::new(VersionTrack::new(self.core.config.raft_store.clone())); - let health_service = HealthService::default(); + let health_controller = HealthController::new(); let mut node = Node::new( self.system.take().unwrap(), &server_config.value().clone(), @@ -824,7 +824,7 @@ where self.pd_client.clone(), state, self.core.background_worker.clone(), - Some(health_service.clone()), + health_controller.clone(), None, ); node.try_bootstrap_store(engines.engines.clone()) @@ -853,7 +853,7 @@ where self.env.clone(), unified_read_pool, debug_thread_pool, - health_service, + health_controller, self.resource_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); diff --git a/components/server/src/server2.rs b/components/server/src/server2.rs index b5cabccf43c..750e73b0e5b 100644 --- a/components/server/src/server2.rs +++ b/components/server/src/server2.rs @@ -40,7 +40,7 @@ use engine_traits::{Engines, KvEngine, MiscExt, RaftEngine, TabletRegistry, CF_D use file_system::{get_io_rate_limiter, BytesFetcher, MetricsManager as IoMetricsManager}; use futures::executor::block_on; use grpcio::{EnvBuilder, Environment}; -use grpcio_health::HealthService; +use health_controller::HealthController; use kvproto::{ brpb::create_backup, cdcpb_grpc::create_change_data, deadlock::create_deadlock, debugpb_grpc::create_debug, diagnosticspb::create_diagnostics, @@ -749,7 +749,7 @@ where ) .unwrap_or_else(|e| fatal!("failed to validate raftstore config {}", e)); let raft_store = Arc::new(VersionTrack::new(self.core.config.raft_store.clone())); - let health_service = HealthService::default(); + let health_controller = HealthController::new(); let node = self.node.as_ref().unwrap(); @@ -776,7 +776,7 @@ where self.env.clone(), unified_read_pool, debug_thread_pool, - health_service, + health_controller, self.resource_manager.clone(), ) .unwrap_or_else(|e| fatal!("failed to create server: {}", e)); diff --git a/components/server/src/signal_handler.rs b/components/server/src/signal_handler.rs index 97efdb1fd2a..d68dfa98d6f 100644 --- a/components/server/src/signal_handler.rs +++ b/components/server/src/signal_handler.rs @@ -65,12 +65,15 @@ mod imp { #[cfg(not(unix))] mod imp { + use service::service_event::ServiceEvent; + use super::*; pub fn wait_for_signal( _: Option>, _: Option>, _: Option>, + _: Option>, ) { } } diff --git a/components/test_raftstore-v2/Cargo.toml b/components/test_raftstore-v2/Cargo.toml index 5f71f7f99a1..7df2462fe3d 100644 --- a/components/test_raftstore-v2/Cargo.toml +++ b/components/test_raftstore-v2/Cargo.toml @@ -40,6 +40,7 @@ file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } +health_controller = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } lazy_static = "1.3" diff --git a/components/test_raftstore-v2/src/server.rs b/components/test_raftstore-v2/src/server.rs index 074e0731abf..7f6d036403d 100644 --- a/components/test_raftstore-v2/src/server.rs +++ b/components/test_raftstore-v2/src/server.rs @@ -18,6 +18,7 @@ use engine_traits::{KvEngine, RaftEngine, TabletRegistry}; use futures::{executor::block_on, future::BoxFuture, Future}; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; use grpcio_health::HealthService; +use health_controller::HealthController; use kvproto::{ deadlock_grpc::create_deadlock, debugpb_grpc::{create_debug, DebugClient}, @@ -605,7 +606,7 @@ impl ServerCluster { cfg.slow_log_file.clone(), ); - let health_service = HealthService::default(); + let health_controller = HealthController::new(); for _ in 0..100 { let mut svr = Server::new( @@ -622,7 +623,7 @@ impl ServerCluster { self.env.clone(), None, debug_thread_pool.clone(), - health_service.clone(), + health_controller.clone(), resource_manager.clone(), ) .unwrap(); @@ -691,7 +692,8 @@ impl ServerCluster { self.region_info_accessors .insert(node_id, region_info_accessor); // todo: importer - self.health_services.insert(node_id, health_service); + self.health_services + .insert(node_id, health_controller.get_grpc_health_service()); lock_mgr .start( diff --git a/components/test_raftstore/Cargo.toml b/components/test_raftstore/Cargo.toml index e648eef86b9..1f5064f0544 100644 --- a/components/test_raftstore/Cargo.toml +++ b/components/test_raftstore/Cargo.toml @@ -40,6 +40,7 @@ file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } +health_controller = { workspace = true } hybrid_engine = { workspace = true } keys = { workspace = true } kvproto = { workspace = true } diff --git a/components/test_raftstore/src/node.rs b/components/test_raftstore/src/node.rs index 1c4296d59db..5a5b86150c2 100644 --- a/components/test_raftstore/src/node.rs +++ b/components/test_raftstore/src/node.rs @@ -11,6 +11,7 @@ use encryption_export::DataKeyManager; use engine_rocks::RocksEngine; use engine_test::raft::RaftTestEngine; use engine_traits::{Engines, KvEngine, SnapshotContext}; +use health_controller::HealthController; use kvproto::{ kvrpcpb::ApiVersion, metapb, @@ -253,7 +254,7 @@ impl Simulator for NodeCluster { Arc::clone(&self.pd_client), Arc::default(), bg_worker.clone(), - None, + HealthController::new(), None, ); diff --git a/components/test_raftstore/src/server.rs b/components/test_raftstore/src/server.rs index 3b65ca0d980..09eb5a11f66 100644 --- a/components/test_raftstore/src/server.rs +++ b/components/test_raftstore/src/server.rs @@ -18,7 +18,7 @@ use engine_test::raft::RaftTestEngine; use engine_traits::{Engines, KvEngine, SnapshotContext}; use futures::executor::block_on; use grpcio::{ChannelBuilder, EnvBuilder, Environment, Error as GrpcError, Service}; -use grpcio_health::HealthService; +use health_controller::HealthController; use kvproto::{ deadlock::create_deadlock, debugpb::{create_debug, DebugClient}, @@ -150,7 +150,7 @@ pub struct ServerCluster { pub importers: HashMap>>, pub pending_services: HashMap, pub coprocessor_hooks: HashMap>, - pub health_services: HashMap, + pub health_controllers: HashMap, pub security_mgr: Arc, pub txn_extra_schedulers: HashMap>, snap_paths: HashMap, @@ -197,7 +197,7 @@ impl ServerCluster { snap_mgrs: HashMap::default(), pending_services: HashMap::default(), coprocessor_hooks: HashMap::default(), - health_services: HashMap::default(), + health_controllers: HashMap::default(), raft_clients: HashMap::default(), conn_builder, concurrency_managers: HashMap::default(), @@ -518,7 +518,7 @@ impl ServerCluster { false, ) .unwrap(); - let health_service = HealthService::default(); + let health_controller = HealthController::new(); let mut node = Node::new( system, &server_cfg.value().clone(), @@ -527,7 +527,7 @@ impl ServerCluster { Arc::clone(&self.pd_client), state, bg_worker.clone(), - Some(health_service.clone()), + health_controller.clone(), None, ); node.try_bootstrap_store(engines.clone())?; @@ -548,7 +548,7 @@ impl ServerCluster { self.env.clone(), None, debug_thread_pool.clone(), - health_service.clone(), + health_controller.clone(), resource_manager.clone(), ) .unwrap(); @@ -625,7 +625,7 @@ impl ServerCluster { self.region_info_accessors .insert(node_id, region_info_accessor); self.importers.insert(node_id, importer); - self.health_services.insert(node_id, health_service); + self.health_controllers.insert(node_id, health_controller); lock_mgr .start( diff --git a/components/tikv_util/src/lib.rs b/components/tikv_util/src/lib.rs index 908f32db86f..6ff7939ca16 100644 --- a/components/tikv_util/src/lib.rs +++ b/components/tikv_util/src/lib.rs @@ -61,7 +61,6 @@ pub mod thread_group; pub mod time; pub mod timer; pub mod topn; -pub mod trend; pub mod worker; pub mod yatp_pool; diff --git a/src/server/node.rs b/src/server/node.rs index bf19cb6c005..c631df6c64b 100644 --- a/src/server/node.rs +++ b/src/server/node.rs @@ -10,7 +10,7 @@ use api_version::api_v2::TIDB_RANGES_COMPLEMENT; use causal_ts::CausalTsProviderImpl; use concurrency_manager::ConcurrencyManager; use engine_traits::{Engines, Iterable, KvEngine, RaftEngine, DATA_CFS, DATA_KEY_PREFIX_LEN}; -use grpcio_health::HealthService; +use health_controller::HealthController; use kvproto::{ kvrpcpb::ApiVersion, metapb, raft_serverpb::StoreIdent, replication_modepb::ReplicationStatus, }; @@ -103,7 +103,7 @@ pub struct Node { pd_client: Arc, state: Arc>, bg_worker: Worker, - health_service: Option, + health_controller: HealthController, } impl Node @@ -121,7 +121,7 @@ where pd_client: Arc, state: Arc>, bg_worker: Worker, - health_service: Option, + health_controller: HealthController, default_store: Option, ) -> Node { let store = init_store(default_store, cfg); @@ -136,7 +136,7 @@ where has_started: false, state, bg_worker, - health_service, + health_controller, } } @@ -494,7 +494,7 @@ where self.state.clone(), concurrency_manager, collector_reg_handle, - self.health_service.clone(), + self.health_controller.clone(), causal_ts_provider, grpc_service_mgr, safe_point, diff --git a/src/server/server.rs b/src/server/server.rs index 09782be4e16..c54a0cb75be 100644 --- a/src/server/server.rs +++ b/src/server/server.rs @@ -11,7 +11,8 @@ use std::{ use api_version::KvFormat; use futures::{compat::Stream01CompatExt, stream::StreamExt}; use grpcio::{ChannelBuilder, Environment, ResourceQuota, Server as GrpcServer, ServerBuilder}; -use grpcio_health::{create_health, HealthService, ServingStatus}; +use grpcio_health::{create_health, HealthService}; +use health_controller::HealthController; use kvproto::tikvpb::*; use raftstore::store::{CheckLeaderTask, SnapManager, TabletSnapManager}; use resource_control::ResourceGroupManager; @@ -135,7 +136,7 @@ pub struct Server { grpc_thread_load: Arc, yatp_read_pool: Option, debug_thread_pool: Arc, - health_service: HealthService, + health_controller: HealthController, timer: Handle, builder_factory: Box, } @@ -161,7 +162,7 @@ where env: Arc, yatp_read_pool: Option, debug_thread_pool: Arc, - health_service: HealthService, + health_controller: HealthController, resource_manager: Option>, ) -> Result { // A helper thread (or pool) for transport layer. @@ -204,7 +205,7 @@ where kv_service, cfg.clone(), security_mgr.clone(), - health_service.clone(), + health_controller.get_grpc_health_service(), )); let addr = SocketAddr::from_str(&cfg.value().addr)?; @@ -224,7 +225,6 @@ where let raft_client = RaftClient::new(store_id, conn_builder); let trans = ServerTransport::new(raft_client); - health_service.set_serving_status("", ServingStatus::NotServing); let svr = Server { env: Arc::clone(&env), @@ -239,7 +239,7 @@ where grpc_thread_load, yatp_read_pool, debug_thread_pool, - health_service, + health_controller, timer: GLOBAL_TIMER_HANDLE.clone(), builder_factory, }; @@ -300,8 +300,7 @@ where let mut grpc_server = self.builder_or_server.take().unwrap().right().unwrap(); grpc_server.start(); self.builder_or_server = Some(Either::Right(grpc_server)); - self.health_service - .set_serving_status("", ServingStatus::Serving); + self.health_controller.set_is_serving(true); } /// Starts the TiKV server. @@ -391,7 +390,7 @@ where pool.shutdown_background(); } let _ = self.yatp_read_pool.take(); - self.health_service.shutdown(); + self.health_controller.shutdown(); Ok(()) } @@ -403,8 +402,7 @@ where if let Some(Either::Right(server)) = self.builder_or_server.take() { drop(server); } - self.health_service - .set_serving_status("", ServingStatus::NotServing); + self.health_controller.set_is_serving(false); self.builder_or_server = Some(builder); info!("paused the grpc server"; "takes" => ?start.elapsed(),); Ok(()) @@ -667,7 +665,7 @@ mod tests { env, None, debug_thread_pool, - HealthService::default(), + HealthController::new(), None, ) .unwrap(); diff --git a/tests/Cargo.toml b/tests/Cargo.toml index c16094b3327..847bd60627a 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -81,6 +81,7 @@ file_system = { workspace = true } futures = "0.3" grpcio = { workspace = true } grpcio-health = { workspace = true } +health_controller = { workspace = true } kvproto = { workspace = true } libc = "0.2" log_wrappers = { workspace = true } diff --git a/tests/failpoints/cases/test_server.rs b/tests/failpoints/cases/test_server.rs index 62d66af1efc..dfbb883179c 100644 --- a/tests/failpoints/cases/test_server.rs +++ b/tests/failpoints/cases/test_server.rs @@ -112,9 +112,9 @@ fn test_serving_status() { cluster.cfg.raft_store.inspect_interval = ReadableDuration::millis(10); cluster.run(); - let service = cluster.sim.rl().health_services.get(&1).unwrap().clone(); - let builder = - ServerBuilder::new(Arc::new(Environment::new(1))).register_service(create_health(service)); + let health_controller = cluster.sim.rl().health_controllers.get(&1).unwrap().clone(); + let builder = ServerBuilder::new(Arc::new(Environment::new(1))) + .register_service(create_health(health_controller.get_grpc_health_service())); let mut server = builder.bind("127.0.0.1", 0).build().unwrap(); server.start(); @@ -135,11 +135,21 @@ fn test_serving_status() { thread::sleep(Duration::from_millis(500)); assert_eq!(check(), ServingStatus::Serving); + health_controller.set_is_serving(false); + assert_eq!(check(), ServingStatus::NotServing); + health_controller.set_is_serving(true); + assert_eq!(check(), ServingStatus::Serving); + fail::cfg("pause_on_peer_collect_message", "pause").unwrap(); thread::sleep(Duration::from_secs(1)); assert_eq!(check(), ServingStatus::ServiceUnknown); + health_controller.set_is_serving(false); + assert_eq!(check(), ServingStatus::NotServing); + health_controller.set_is_serving(true); + assert_eq!(check(), ServingStatus::ServiceUnknown); + fail::remove("pause_on_peer_collect_message"); // It should recover within one round. diff --git a/tests/integrations/config/dynamic/raftstore.rs b/tests/integrations/config/dynamic/raftstore.rs index 4d6551ea27c..eb5d2dda710 100644 --- a/tests/integrations/config/dynamic/raftstore.rs +++ b/tests/integrations/config/dynamic/raftstore.rs @@ -9,6 +9,7 @@ use std::{ use concurrency_manager::ConcurrencyManager; use engine_rocks::RocksEngine; use engine_traits::{Engines, ALL_CFS, CF_DEFAULT}; +use health_controller::HealthController; use kvproto::raft_serverpb::RaftMessage; use raftstore::{ coprocessor::CoprocessorHost, @@ -111,7 +112,7 @@ fn start_raftstore( Arc::default(), ConcurrencyManager::new(1.into()), CollectorRegHandle::new_for_test(), - None, + HealthController::new(), None, GrpcServiceManager::dummy(), Arc::new(AtomicU64::new(0)), diff --git a/tests/integrations/raftstore/test_bootstrap.rs b/tests/integrations/raftstore/test_bootstrap.rs index 8126ab0ffd5..e3a1f50100d 100644 --- a/tests/integrations/raftstore/test_bootstrap.rs +++ b/tests/integrations/raftstore/test_bootstrap.rs @@ -11,6 +11,7 @@ use engine_traits::{ DbOptionsExt, Engines, MiscExt, Peekable, RaftEngine, RaftEngineReadOnly, ALL_CFS, CF_DEFAULT, CF_LOCK, CF_RAFT, CF_WRITE, }; +use health_controller::HealthController; use kvproto::{kvrpcpb::ApiVersion, metapb, raft_serverpb::RegionLocalState}; use raftstore::{ coprocessor::CoprocessorHost, @@ -69,7 +70,7 @@ fn test_node_bootstrap_with_prepared_data() { Arc::clone(&pd_client), Arc::default(), bg_worker, - None, + HealthController::new(), None, ); let snap_mgr = SnapManager::new(tmp_mgr.path().to_str().unwrap()); diff --git a/tests/integrations/raftstore/test_status_command.rs b/tests/integrations/raftstore/test_status_command.rs index 8565d936d9f..37e78de3d50 100644 --- a/tests/integrations/raftstore/test_status_command.rs +++ b/tests/integrations/raftstore/test_status_command.rs @@ -1,6 +1,7 @@ // Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0. -use raftstore::store::{msg::StoreMsg as StoreMsgV1, util::LatencyInspector}; +use health_controller::types::LatencyInspector; +use raftstore::store::msg::StoreMsg as StoreMsgV1; use raftstore_v2::router::StoreMsg as StoreMsgV2; use test_raftstore::Simulator as S1; use test_raftstore_v2::Simulator as S2; From 8b7f6470dd3d2577a246c435186520ec193c1a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8C=E6=89=8B=E6=8E=89=E5=8C=85=E5=B7=A5=E7=A8=8B?= =?UTF-8?q?=E5=B8=88?= Date: Fri, 2 Feb 2024 21:49:55 +0800 Subject: [PATCH 1142/1149] statistics: move analyze context out (#16481) ref tikv/tikv#16463 Moved the analyze connect struct out of the big analyze.rs file and used the enum to represent the analyze version. Signed-off-by: hi-rustin Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- src/coprocessor/endpoint.rs | 7 +- src/coprocessor/statistics/analyze.rs | 350 ++---------------- src/coprocessor/statistics/analyze_context.rs | 308 +++++++++++++++ src/coprocessor/statistics/mod.rs | 1 + 4 files changed, 354 insertions(+), 312 deletions(-) create mode 100644 src/coprocessor/statistics/analyze_context.rs diff --git a/src/coprocessor/endpoint.rs b/src/coprocessor/endpoint.rs index 5200a96c158..1019240f313 100644 --- a/src/coprocessor/endpoint.rs +++ b/src/coprocessor/endpoint.rs @@ -27,7 +27,10 @@ use tokio::sync::Semaphore; use txn_types::Lock; use crate::{ - coprocessor::{cache::CachedRequestHandler, interceptors::*, metrics::*, tracker::Tracker, *}, + coprocessor::{ + cache::CachedRequestHandler, interceptors::*, metrics::*, + statistics::analyze_context::AnalyzeContext, tracker::Tracker, *, + }, read_pool::ReadPoolHandle, server::Config, storage::{ @@ -302,7 +305,7 @@ impl Endpoint { let quota_limiter = self.quota_limiter.clone(); builder = Box::new(move |snap, req_ctx| { - statistics::analyze::AnalyzeContext::<_, F>::new( + AnalyzeContext::<_, F>::new( analyze, req_ctx.ranges.clone(), start_ts, diff --git a/src/coprocessor/statistics/analyze.rs b/src/coprocessor/statistics/analyze.rs index 3935bc01d62..f86d3232463 100644 --- a/src/coprocessor/statistics/analyze.rs +++ b/src/coprocessor/statistics/analyze.rs @@ -1,25 +1,14 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. -use std::{ - cmp::Reverse, collections::BinaryHeap, hash::Hasher, marker::PhantomData, mem, sync::Arc, -}; +use std::{cmp::Reverse, collections::BinaryHeap, hash::Hasher, mem, sync::Arc}; -use api_version::{keyspace::KvPair, KvFormat}; -use async_trait::async_trait; -use kvproto::coprocessor::{KeyRange, Response}; +use api_version::KvFormat; +use kvproto::coprocessor::KeyRange; use mur3::Hasher128; -use protobuf::Message; use rand::{rngs::StdRng, Rng}; -use tidb_query_common::storage::{ - scanner::{RangesScanner, RangesScannerOptions}, - Range, -}; use tidb_query_datatype::{ codec::{ - datum::{ - encode_value, split_datum, Datum, DatumDecoder, DURATION_FLAG, INT_FLAG, NIL_FLAG, - UINT_FLAG, - }, + datum::{encode_value, Datum, DatumDecoder, DURATION_FLAG, INT_FLAG, NIL_FLAG, UINT_FLAG}, table, }, def::Collation, @@ -28,289 +17,23 @@ use tidb_query_datatype::{ }; use tidb_query_executors::{interface::BatchExecutor, BatchTableScanExecutor}; use tidb_query_expr::BATCH_MAX_SIZE; -use tikv_alloc::trace::{MemoryTraceGuard, TraceEvent}; +use tikv_alloc::trace::TraceEvent; use tikv_util::{ metrics::{ThrottleType, NON_TXN_COMMAND_THROTTLE_TIME_COUNTER_VEC_STATIC}, quota_limiter::QuotaLimiter, }; -use tipb::{self, AnalyzeColumnsReq, AnalyzeIndexReq, AnalyzeReq, AnalyzeType}; +use tipb::{self, AnalyzeColumnsReq}; use super::{cmsketch::CmSketch, fmsketch::FmSketch, histogram::Histogram}; use crate::{ - coprocessor::{dag::TikvStorage, MEMTRACE_ANALYZE, *}, - storage::{Snapshot, SnapshotStore, Statistics}, + coprocessor::{ + dag::TikvStorage, statistics::analyze_context::AnalyzeVersion, MEMTRACE_ANALYZE, *, + }, + storage::{Snapshot, SnapshotStore}, }; -const ANALYZE_VERSION_V1: i32 = 1; -const ANALYZE_VERSION_V2: i32 = 2; - -// `AnalyzeContext` is used to handle `AnalyzeReq` -pub struct AnalyzeContext { - req: AnalyzeReq, - storage: Option>>, - ranges: Vec, - storage_stats: Statistics, - quota_limiter: Arc, - is_auto_analyze: bool, - _phantom: PhantomData, -} - -impl AnalyzeContext { - pub fn new( - req: AnalyzeReq, - ranges: Vec, - start_ts: u64, - snap: S, - req_ctx: &ReqContext, - quota_limiter: Arc, - ) -> Result { - let store = SnapshotStore::new( - snap, - start_ts.into(), - req_ctx.context.get_isolation_level(), - !req_ctx.context.get_not_fill_cache(), - req_ctx.bypass_locks.clone(), - req_ctx.access_locks.clone(), - false, - ); - let is_auto_analyze = req.get_flags() & REQ_FLAG_TIDB_SYSSESSION > 0; - - Ok(Self { - req, - storage: Some(TikvStorage::new(store, false)), - ranges, - storage_stats: Statistics::default(), - quota_limiter, - is_auto_analyze, - _phantom: PhantomData, - }) - } - - // handle_column is used to process `AnalyzeColumnsReq` - // it would build a histogram for the primary key(if needed) and - // collectors for each column value. - async fn handle_column(builder: &mut SampleBuilder) -> Result> { - let (col_res, _) = builder.collect_columns_stats().await?; - - let res_data = { - let res: tipb::AnalyzeColumnsResp = col_res.into(); - box_try!(res.write_to_bytes()) - }; - Ok(res_data) - } - - async fn handle_mixed(builder: &mut SampleBuilder) -> Result> { - let (col_res, idx_res) = builder.collect_columns_stats().await?; - - let res_data = { - let resp: tipb::AnalyzeMixedResp = AnalyzeMixedResult::new( - col_res, - idx_res.ok_or_else(|| { - Error::Other("Mixed analyze type should have index response.".into()) - })?, - ) - .into(); - box_try!(resp.write_to_bytes()) - }; - Ok(res_data) - } - - async fn handle_full_sampling(builder: &mut RowSampleBuilder) -> Result> { - let sample_res = builder.collect_column_stats().await?; - let res_data = { - let res: tipb::AnalyzeColumnsResp = sample_res.into(); - box_try!(res.write_to_bytes()) - }; - Ok(res_data) - } - - // handle_index is used to handle `AnalyzeIndexReq`, - // it would build a histogram and count-min sketch of index values. - async fn handle_index( - req: AnalyzeIndexReq, - scanner: &mut RangesScanner>, F>, - is_common_handle: bool, - ) -> Result> { - let mut hist = Histogram::new(req.get_bucket_size() as usize); - let mut cms = CmSketch::new( - req.get_cmsketch_depth() as usize, - req.get_cmsketch_width() as usize, - ); - let mut fms = FmSketch::new(req.get_sketch_size() as usize); - let mut topn_heap = BinaryHeap::new(); - // cur_val recording the current value's data and its counts when iterating - // index's rows. Once we met a new value, the old value will be pushed - // into the topn_heap to maintain the top-n information. - let mut cur_val: (u32, Vec) = (0, vec![]); - let top_n_size = req.get_top_n_size() as usize; - let stats_version = if req.has_version() { - req.get_version() - } else { - ANALYZE_VERSION_V1 - }; - while let Some(row) = scanner.next().await? { - let mut key = row.key(); - if is_common_handle { - table::check_record_key(key)?; - key = &key[table::PREFIX_LEN..]; - } else { - table::check_index_key(key)?; - key = &key[table::PREFIX_LEN + table::ID_LEN..]; - } - let mut datums = key; - let mut data = Vec::with_capacity(key.len()); - for i in 0..req.get_num_columns() as usize { - if datums.is_empty() { - return Err(box_err!( - "{}th column is missing in datum buffer: {}", - i, - log_wrappers::Value::key(key) - )); - } - let (column, remaining) = split_datum(datums, false)?; - datums = remaining; - data.extend_from_slice(column); - if let Some(cms) = cms.as_mut() { - cms.insert(&data); - } - } - fms.insert(&data); - if stats_version == ANALYZE_VERSION_V2 { - hist.append(&data, true); - if cur_val.1 == data { - cur_val.0 += 1; - } else { - if cur_val.0 > 0 { - topn_heap.push(Reverse(cur_val)); - } - if topn_heap.len() > top_n_size { - topn_heap.pop(); - } - cur_val = (1, data); - } - } else { - hist.append(&data, false); - } - } - - if stats_version == ANALYZE_VERSION_V2 { - if cur_val.0 > 0 { - topn_heap.push(Reverse(cur_val)); - if topn_heap.len() > top_n_size { - topn_heap.pop(); - } - } - if let Some(c) = cms.as_mut() { - for heap_item in topn_heap { - c.sub(&(heap_item.0).1, (heap_item.0).0); - c.push_to_top_n((heap_item.0).1, (heap_item.0).0 as u64); - } - } - } - - let res: tipb::AnalyzeIndexResp = AnalyzeIndexResult::new(hist, cms, Some(fms)).into(); - let dt = box_try!(res.write_to_bytes()); - Ok(dt) - } -} - -#[async_trait] -impl RequestHandler for AnalyzeContext { - async fn handle_request(&mut self) -> Result> { - let ret = match self.req.get_tp() { - AnalyzeType::TypeIndex | AnalyzeType::TypeCommonHandle => { - let req = self.req.take_idx_req(); - let ranges = std::mem::take(&mut self.ranges); - table::check_table_ranges::(&ranges)?; - let mut scanner = RangesScanner::<_, F>::new(RangesScannerOptions { - storage: self.storage.take().unwrap(), - ranges: ranges - .into_iter() - .map(|r| Range::from_pb_range(r, false)) - .collect(), - scan_backward_in_range: false, - is_key_only: true, - is_scanned_range_aware: false, - }); - let res = AnalyzeContext::handle_index( - req, - &mut scanner, - self.req.get_tp() == AnalyzeType::TypeCommonHandle, - ) - .await; - scanner.collect_storage_stats(&mut self.storage_stats); - res - } - - AnalyzeType::TypeColumn => { - let col_req = self.req.take_col_req(); - let storage = self.storage.take().unwrap(); - let ranges = std::mem::take(&mut self.ranges); - let mut builder = SampleBuilder::<_, F>::new(col_req, None, storage, ranges)?; - let res = AnalyzeContext::handle_column(&mut builder).await; - builder.data.collect_storage_stats(&mut self.storage_stats); - res - } - - // Type mixed is analyze common handle and columns by scan table rows once. - AnalyzeType::TypeMixed => { - let col_req = self.req.take_col_req(); - let idx_req = self.req.take_idx_req(); - let storage = self.storage.take().unwrap(); - let ranges = std::mem::take(&mut self.ranges); - let mut builder = - SampleBuilder::<_, F>::new(col_req, Some(idx_req), storage, ranges)?; - let res = AnalyzeContext::handle_mixed(&mut builder).await; - builder.data.collect_storage_stats(&mut self.storage_stats); - res - } - - AnalyzeType::TypeFullSampling => { - let col_req = self.req.take_col_req(); - let storage = self.storage.take().unwrap(); - let ranges = std::mem::take(&mut self.ranges); - - let mut builder = RowSampleBuilder::<_, F>::new( - col_req, - storage, - ranges, - self.quota_limiter.clone(), - self.is_auto_analyze, - )?; - - let res = AnalyzeContext::handle_full_sampling(&mut builder).await; - builder.data.collect_storage_stats(&mut self.storage_stats); - res - } - - AnalyzeType::TypeSampleIndex => Err(Error::Other( - "Analyze of this kind not implemented".to_string(), - )), - }; - match ret { - Ok(data) => { - let memory_size = data.capacity(); - let mut resp = Response::default(); - resp.set_data(data); - Ok(MEMTRACE_ANALYZE.trace_guard(resp, memory_size)) - } - Err(Error::Other(e)) => { - let mut resp = Response::default(); - resp.set_other_error(e); - Ok(resp.into()) - } - Err(e) => Err(e), - } - } - - fn collect_scan_statistics(&mut self, dest: &mut Statistics) { - dest.add(&self.storage_stats); - self.storage_stats = Statistics::default(); - } -} - -struct RowSampleBuilder { - data: BatchTableScanExecutor>, F>, +pub(crate) struct RowSampleBuilder { + pub(crate) data: BatchTableScanExecutor>, F>, max_sample_size: usize, max_fm_sketch_size: usize, @@ -322,7 +45,7 @@ struct RowSampleBuilder { } impl RowSampleBuilder { - fn new( + pub(crate) fn new( mut req: AnalyzeColumnsReq, storage: TikvStorage>, ranges: Vec, @@ -371,7 +94,7 @@ impl RowSampleBuilder { )) } - async fn collect_column_stats(&mut self) -> Result { + pub(crate) async fn collect_column_stats(&mut self) -> Result { use tidb_query_datatype::{codec::collation::Collator, match_template_collator}; let mut is_drained = false; @@ -804,15 +527,15 @@ impl Drop for BaseRowSampleCollector { } } -struct SampleBuilder { - data: BatchTableScanExecutor>, F>, +pub(crate) struct SampleBuilder { + pub(crate) data: BatchTableScanExecutor>, F>, max_bucket_size: usize, max_sample_size: usize, max_fm_sketch_size: usize, cm_sketch_depth: usize, cm_sketch_width: usize, - stats_version: i32, + stats_version: AnalyzeVersion, top_n_size: usize, columns_info: Vec, analyze_common_handle: bool, @@ -823,7 +546,7 @@ struct SampleBuilder { /// the result set using Reservoir Sampling algorithm, estimates NDVs /// using FM Sketch during the collecting process, and builds count-min sketch. impl SampleBuilder { - fn new( + pub(crate) fn new( mut req: AnalyzeColumnsReq, common_handle_req: Option, storage: TikvStorage>, @@ -852,10 +575,10 @@ impl SampleBuilder { cm_sketch_depth: req.get_cmsketch_depth() as usize, cm_sketch_width: req.get_cmsketch_width() as usize, stats_version: common_handle_req.as_ref().map_or_else( - || ANALYZE_VERSION_V1, + || AnalyzeVersion::V1, |req| match req.has_version() { - true => req.get_version(), - _ => ANALYZE_VERSION_V1, + true => req.get_version().into(), + _ => AnalyzeVersion::V1, }, ), top_n_size: common_handle_req @@ -872,16 +595,16 @@ impl SampleBuilder { // also returns the statistic builder for PK which contains the histogram. // When PK is common handle, it returns index stats for PK. // See https://en.wikipedia.org/wiki/Reservoir_sampling - async fn collect_columns_stats( + pub(crate) async fn collect_columns_stats( &mut self, ) -> Result<(AnalyzeColumnsResult, Option)> { use tidb_query_datatype::{codec::collation::Collator, match_template_collator}; - let columns_without_handle_len = - self.columns_info.len() - self.columns_info[0].get_pk_handle() as usize; - // The number of columns need to be sampled is `columns_without_handle_len`. // It equals to `columns_info.len()` if the first column doesn't contain a // handle. Otherwise, it equals to `columns_info.len() - 1`. + let columns_without_handle_len = + self.columns_info.len() - self.columns_info[0].get_pk_handle() as usize; + let mut pk_builder = Histogram::new(self.max_bucket_size); let mut collectors = vec![ SampleCollector::new( @@ -935,7 +658,7 @@ impl SampleBuilder { } } common_handle_fms.insert(&data); - if self.stats_version == ANALYZE_VERSION_V2 { + if self.stats_version == AnalyzeVersion::V2 { common_handle_hist.append(&data, true); if cur_val.1 == data { cur_val.0 += 1; @@ -952,7 +675,7 @@ impl SampleBuilder { common_handle_hist.append(&data, false) } } - if self.stats_version == ANALYZE_VERSION_V2 { + if self.stats_version == AnalyzeVersion::V2 { if cur_val.0 > 0 { topn_heap.push(Reverse(cur_val)); if topn_heap.len() > self.top_n_size { @@ -1107,7 +830,7 @@ impl From for tipb::SampleCollector { } } -struct AnalyzeSamplingResult { +pub(crate) struct AnalyzeSamplingResult { row_sample_collector: Box, } @@ -1136,7 +859,7 @@ impl Default for AnalyzeSamplingResult { /// `AnalyzeColumnsResult` collect the result of analyze columns request. #[derive(Default)] -struct AnalyzeColumnsResult { +pub(crate) struct AnalyzeColumnsResult { sample_collectors: Vec, pk_hist: Histogram, } @@ -1167,14 +890,18 @@ impl From for tipb::AnalyzeColumnsResp { /// `AnalyzeIndexResult` collect the result of analyze index request. #[derive(Default)] -struct AnalyzeIndexResult { +pub(crate) struct AnalyzeIndexResult { hist: Histogram, cms: Option, fms: Option, } impl AnalyzeIndexResult { - fn new(hist: Histogram, cms: Option, fms: Option) -> AnalyzeIndexResult { + pub(crate) fn new( + hist: Histogram, + cms: Option, + fms: Option, + ) -> AnalyzeIndexResult { AnalyzeIndexResult { hist, cms, fms } } } @@ -1197,13 +924,16 @@ impl From for tipb::AnalyzeIndexResp { /// `AnalyzeMixedResult` collect the result of analyze mixed request. #[derive(Default)] -struct AnalyzeMixedResult { +pub(crate) struct AnalyzeMixedResult { col_res: AnalyzeColumnsResult, idx_res: AnalyzeIndexResult, } impl AnalyzeMixedResult { - fn new(col_res: AnalyzeColumnsResult, idx_res: AnalyzeIndexResult) -> AnalyzeMixedResult { + pub(crate) fn new( + col_res: AnalyzeColumnsResult, + idx_res: AnalyzeIndexResult, + ) -> AnalyzeMixedResult { AnalyzeMixedResult { col_res, idx_res } } } diff --git a/src/coprocessor/statistics/analyze_context.rs b/src/coprocessor/statistics/analyze_context.rs new file mode 100644 index 00000000000..4911feae09a --- /dev/null +++ b/src/coprocessor/statistics/analyze_context.rs @@ -0,0 +1,308 @@ +// Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. + +use std::{cmp::Reverse, collections::BinaryHeap, marker::PhantomData, sync::Arc}; + +use api_version::{keyspace::KvPair, KvFormat}; +use async_trait::async_trait; +use kvproto::coprocessor::{KeyRange, Response}; +use protobuf::Message; +use tidb_query_common::storage::{ + scanner::{RangesScanner, RangesScannerOptions}, + Range, +}; +use tidb_query_datatype::codec::{datum::split_datum, table}; +use tidb_query_executors::interface::BatchExecutor; +use tikv_alloc::trace::MemoryTraceGuard; +use tikv_util::quota_limiter::QuotaLimiter; +use tipb::{self, AnalyzeIndexReq, AnalyzeReq, AnalyzeType}; + +use super::{cmsketch::CmSketch, fmsketch::FmSketch, histogram::Histogram}; +use crate::{ + coprocessor::{ + dag::TikvStorage, + statistics::analyze::{ + AnalyzeIndexResult, AnalyzeMixedResult, RowSampleBuilder, SampleBuilder, + }, + MEMTRACE_ANALYZE, *, + }, + storage::{Snapshot, SnapshotStore, Statistics}, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum AnalyzeVersion { + V1, + V2, +} + +impl From for AnalyzeVersion { + fn from(v: i32) -> Self { + match v { + 1 => AnalyzeVersion::V1, + 2 => AnalyzeVersion::V2, + _ => panic!("Unknown analyze version: {}", v), + } + } +} + +/// Used to handle analyze request. +pub struct AnalyzeContext { + req: AnalyzeReq, + storage: Option>>, + ranges: Vec, + storage_stats: Statistics, + quota_limiter: Arc, + // is_auto_analyze is used to indicate whether the analyze request is sent by TiDB itself. + is_auto_analyze: bool, + _phantom: PhantomData, +} + +impl AnalyzeContext { + pub fn new( + req: AnalyzeReq, + ranges: Vec, + start_ts: u64, + snap: S, + req_ctx: &ReqContext, + quota_limiter: Arc, + ) -> Result { + let store = SnapshotStore::new( + snap, + start_ts.into(), + req_ctx.context.get_isolation_level(), + !req_ctx.context.get_not_fill_cache(), + req_ctx.bypass_locks.clone(), + req_ctx.access_locks.clone(), + false, + ); + let is_auto_analyze = req.get_flags() & REQ_FLAG_TIDB_SYSSESSION > 0; + + Ok(Self { + req, + storage: Some(TikvStorage::new(store, false)), + ranges, + storage_stats: Statistics::default(), + quota_limiter, + is_auto_analyze, + _phantom: PhantomData, + }) + } + + // handle_column is used to process `AnalyzeColumnsReq` + // it would build a histogram for the primary key(if needed) and + // collectors for each column value. + async fn handle_column(builder: &mut SampleBuilder) -> Result> { + let (col_res, _) = builder.collect_columns_stats().await?; + + let res_data = { + let res: tipb::AnalyzeColumnsResp = col_res.into(); + box_try!(res.write_to_bytes()) + }; + Ok(res_data) + } + + // Handle mixed request, it would build histograms for common handle and columns + // by scan table rows once. + async fn handle_mixed(builder: &mut SampleBuilder) -> Result> { + let (col_res, idx_res) = builder.collect_columns_stats().await?; + + let res_data = { + let resp: tipb::AnalyzeMixedResp = AnalyzeMixedResult::new( + col_res, + idx_res.ok_or_else(|| { + Error::Other("Mixed analyze type should have index response.".into()) + })?, + ) + .into(); + box_try!(resp.write_to_bytes()) + }; + Ok(res_data) + } + + async fn handle_full_sampling(builder: &mut RowSampleBuilder) -> Result> { + let sample_res = builder.collect_column_stats().await?; + let res_data = { + let res: tipb::AnalyzeColumnsResp = sample_res.into(); + box_try!(res.write_to_bytes()) + }; + Ok(res_data) + } + + // handle_index is used to handle `AnalyzeIndexReq`, + // it would build a histogram and count-min sketch of index values. + async fn handle_index( + req: AnalyzeIndexReq, + scanner: &mut RangesScanner>, F>, + is_common_handle: bool, + ) -> Result> { + let mut hist = Histogram::new(req.get_bucket_size() as usize); + let mut cms = CmSketch::new( + req.get_cmsketch_depth() as usize, + req.get_cmsketch_width() as usize, + ); + let mut fms = FmSketch::new(req.get_sketch_size() as usize); + let mut topn_heap = BinaryHeap::new(); + // cur_val recording the current value's data and its counts when iterating + // index's rows. Once we met a new value, the old value will be pushed + // into the topn_heap to maintain the top-n information. + let mut cur_val: (u32, Vec) = (0, vec![]); + let top_n_size = req.get_top_n_size() as usize; + let stats_version = if req.has_version() { + req.get_version().into() + } else { + AnalyzeVersion::V1 + }; + while let Some(row) = scanner.next().await? { + let mut key = row.key(); + if is_common_handle { + table::check_record_key(key)?; + key = &key[table::PREFIX_LEN..]; + } else { + table::check_index_key(key)?; + key = &key[table::PREFIX_LEN + table::ID_LEN..]; + } + let mut datums = key; + let mut data = Vec::with_capacity(key.len()); + for i in 0..req.get_num_columns() as usize { + if datums.is_empty() { + return Err(box_err!( + "{}th column is missing in datum buffer: {}", + i, + log_wrappers::Value::key(key) + )); + } + let (column, remaining) = split_datum(datums, false)?; + datums = remaining; + data.extend_from_slice(column); + if let Some(cms) = cms.as_mut() { + cms.insert(&data); + } + } + fms.insert(&data); + if stats_version == AnalyzeVersion::V2 { + hist.append(&data, true); + if cur_val.1 == data { + cur_val.0 += 1; + } else { + if cur_val.0 > 0 { + topn_heap.push(Reverse(cur_val)); + } + if topn_heap.len() > top_n_size { + topn_heap.pop(); + } + cur_val = (1, data); + } + } else { + hist.append(&data, false); + } + } + + if stats_version == AnalyzeVersion::V2 { + if cur_val.0 > 0 { + topn_heap.push(Reverse(cur_val)); + if topn_heap.len() > top_n_size { + topn_heap.pop(); + } + } + if let Some(c) = cms.as_mut() { + for heap_item in topn_heap { + c.sub(&(heap_item.0).1, (heap_item.0).0); + c.push_to_top_n((heap_item.0).1, (heap_item.0).0 as u64); + } + } + } + + let res: tipb::AnalyzeIndexResp = AnalyzeIndexResult::new(hist, cms, Some(fms)).into(); + let dt = box_try!(res.write_to_bytes()); + Ok(dt) + } +} + +#[async_trait] +impl RequestHandler for AnalyzeContext { + async fn handle_request(&mut self) -> Result> { + let ret = match self.req.get_tp() { + AnalyzeType::TypeIndex | AnalyzeType::TypeCommonHandle => { + let req = self.req.take_idx_req(); + let ranges = std::mem::take(&mut self.ranges); + table::check_table_ranges::(&ranges)?; + let mut scanner = RangesScanner::<_, F>::new(RangesScannerOptions { + storage: self.storage.take().unwrap(), + ranges: ranges + .into_iter() + .map(|r| Range::from_pb_range(r, false)) + .collect(), + scan_backward_in_range: false, + is_key_only: true, + is_scanned_range_aware: false, + }); + let res = AnalyzeContext::handle_index( + req, + &mut scanner, + self.req.get_tp() == AnalyzeType::TypeCommonHandle, + ) + .await; + scanner.collect_storage_stats(&mut self.storage_stats); + res + } + + AnalyzeType::TypeColumn => { + let col_req = self.req.take_col_req(); + let storage = self.storage.take().unwrap(); + let ranges = std::mem::take(&mut self.ranges); + let mut builder = SampleBuilder::<_, F>::new(col_req, None, storage, ranges)?; + let res = AnalyzeContext::handle_column(&mut builder).await; + builder.data.collect_storage_stats(&mut self.storage_stats); + res + } + + // Type mixed is analyze common handle and columns by scan table rows once. + AnalyzeType::TypeMixed => { + let col_req = self.req.take_col_req(); + let idx_req = self.req.take_idx_req(); + let storage = self.storage.take().unwrap(); + let ranges = std::mem::take(&mut self.ranges); + let mut builder = + SampleBuilder::<_, F>::new(col_req, Some(idx_req), storage, ranges)?; + let res = AnalyzeContext::handle_mixed(&mut builder).await; + builder.data.collect_storage_stats(&mut self.storage_stats); + res + } + + AnalyzeType::TypeFullSampling => { + let col_req = self.req.take_col_req(); + let storage = self.storage.take().unwrap(); + let ranges = std::mem::take(&mut self.ranges); + + let mut builder = RowSampleBuilder::<_, F>::new( + col_req, + storage, + ranges, + self.quota_limiter.clone(), + self.is_auto_analyze, + )?; + + let res = AnalyzeContext::handle_full_sampling(&mut builder).await; + builder.data.collect_storage_stats(&mut self.storage_stats); + res + } + + AnalyzeType::TypeSampleIndex => Err(Error::Other( + "Analyze of this kind not implemented".to_string(), + )), + }; + match ret { + Ok(data) => { + let memory_size = data.capacity(); + let mut resp = Response::default(); + resp.set_data(data); + Ok(MEMTRACE_ANALYZE.trace_guard(resp, memory_size)) + } + Err(Error::Other(e)) => { + let mut resp = Response::default(); + resp.set_other_error(e); + Ok(resp.into()) + } + Err(e) => Err(e), + } + } +} diff --git a/src/coprocessor/statistics/mod.rs b/src/coprocessor/statistics/mod.rs index 5616cef3b26..a136fd53fca 100644 --- a/src/coprocessor/statistics/mod.rs +++ b/src/coprocessor/statistics/mod.rs @@ -1,6 +1,7 @@ // Copyright 2017 TiKV Project Authors. Licensed under Apache-2.0. pub mod analyze; +pub mod analyze_context; pub mod cmsketch; pub mod fmsketch; pub mod histogram; From 04370e9ef47c2768d368cb62309631018be3eaa8 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 2 Feb 2024 10:12:25 -0800 Subject: [PATCH 1143/1149] raftstore: polish the availability check on conf change requests (#16486) close tikv/tikv#16465 When calculating the impact of conf change, include all operations into considerations. Signed-off-by: tonyxuqqi --- components/raftstore/src/store/util.rs | 472 +++++++++++++++++++------ 1 file changed, 362 insertions(+), 110 deletions(-) diff --git a/components/raftstore/src/store/util.rs b/components/raftstore/src/store/util.rs index 01f27b12ac6..856cfb12885 100644 --- a/components/raftstore/src/store/util.rs +++ b/components/raftstore/src/store/util.rs @@ -1010,7 +1010,7 @@ pub fn check_conf_change( change_peers: &[ChangePeerRequest], cc: &impl ConfChangeI, ignore_safety: bool, - peer_heartbeat: &collections::HashMap, + peer_heartbeats: &collections::HashMap, ) -> Result<()> { let current_progress = node.status().progress.unwrap().clone(); let mut after_progress = current_progress.clone(); @@ -1094,7 +1094,13 @@ pub fn check_conf_change( return Err(box_err!("multiple changes that only effect learner")); } - check_remove_or_demote_voter(region, cfg, change_peers, leader.get_id(), peer_heartbeat)?; + check_availability_by_last_heartbeats( + region, + cfg, + change_peers, + leader.get_id(), + peer_heartbeats, + )?; if !ignore_safety { let promoted_commit_index = after_progress.maximal_committed_index().0; let first_index = node.raft.raft_log.first_index(); @@ -1123,77 +1129,103 @@ pub fn check_conf_change( } } -fn check_remove_or_demote_voter( +/// Check the would-be availability if the operation proceed. +/// If the slow peers count would be equal or larger than normal peers count, +/// then the operations would be rejected +fn check_availability_by_last_heartbeats( region: &metapb::Region, cfg: &Config, change_peers: &[ChangePeerRequest], leader_id: u64, - peer_heartbeat: &collections::HashMap, + peer_heartbeats: &collections::HashMap, ) -> Result<()> { - let mut slow_voters_count = 0; - let mut normal_voters_count = 0; + let mut slow_voters = vec![]; + let mut normal_voters = vec![]; + // Here we assume if the last beartbeat is within 2 election timeout, the peer // is healthy. When a region is hibernate, we expect all its peers are *slow* // and it would still allow the operation let slow_voter_threshold = 2 * cfg.raft_base_tick_interval.0 * cfg.raft_max_election_timeout_ticks as u32; - for (id, last_heartbeat) in peer_heartbeat { + for (id, last_heartbeat) in peer_heartbeats { // for slow and normal peer calculation, we only count voter role if region .get_peers() .iter() .find(|p| p.get_id() == *id) - .map_or(false, |p| p.role == PeerRole::Voter) + .map_or(false, |p| { + p.role == PeerRole::Voter || p.role == PeerRole::IncomingVoter + }) { // leader itself is not a slow peer if *id == leader_id || last_heartbeat.elapsed() <= slow_voter_threshold { - normal_voters_count += 1; + normal_voters.push(*id); } else { - slow_voters_count += 1; + slow_voters.push(*id); } } } + let is_healthy = normal_voters.len() > slow_voters.len(); + // if it's already unhealthy, let it go + if !is_healthy { + return Ok(()); + } + let mut normal_voters_to_remove = vec![]; + let mut slow_voters_to_add = vec![]; for cp in change_peers { let (change_type, peer) = (cp.get_change_type(), cp.get_peer()); - if change_type == ConfChangeType::RemoveNode - || change_type == ConfChangeType::AddLearnerNode - { - let is_voter = region - .get_peers() - .iter() - .find(|p| p.get_id() == peer.get_id()) - .map_or(false, |p| p.role == PeerRole::Voter); + let is_voter = region + .get_peers() + .iter() + .find(|p| p.get_id() == peer.get_id()) + .map_or(false, |p| { + p.role == PeerRole::Voter || p.role == PeerRole::IncomingVoter + }); + if !is_voter && change_type == ConfChangeType::AddNode { + // exiting peers, promoting from learner to voter + if let Some(last_heartbeat) = peer_heartbeats.get(&peer.get_id()) { + if last_heartbeat.elapsed() <= slow_voter_threshold { + normal_voters.push(peer.get_id()); + } else { + slow_voters.push(peer.get_id()); + slow_voters_to_add.push(peer.get_id()); + } + } else { + // it's a new peer, assuming it's a normal voter + normal_voters.push(peer.get_id()); + } + } + if is_voter + && (change_type == ConfChangeType::RemoveNode + || change_type == ConfChangeType::AddLearnerNode) + { // If the change_type is AddLearnerNode and the last heartbeat is found, it // means it's a demote from voter as AddLearnerNode on existing learner node is // not allowed. - if is_voter && let Some(last_heartbeat) = peer_heartbeat.get(&peer.get_id()) { - // peer itself is *not* slow peer, but current slow peer is >= total peers/2 + if let Some(last_heartbeat) = peer_heartbeats.get(&peer.get_id()) { if last_heartbeat.elapsed() <= slow_voter_threshold { - normal_voters_count -= 1; + normal_voters.retain(|id| *id != peer.get_id()); normal_voters_to_remove.push(peer.clone()); } } } } - // only block the conf change when there's chance to improve the availability - // For example, if there's no normal peers actually, then we still allow the - // option to finish as there's no choice. - // We only block the operation when normal peers are going to be removed and it - // could lead to slow peers more than normal peers - if !normal_voters_to_remove.is_empty() - && slow_voters_count > 0 - && slow_voters_count >= normal_voters_count - { + // Only block the conf change when currently it's healthy, but would be + // unhealthy. If currently it's already unhealthy, let it go. + if slow_voters.len() >= normal_voters.len() { return Err(box_err!( - "Ignore conf change command on region {} because RemoveNode or Demote a voter on peers {:?} may lead to unavailability. There're {} slow peers and {} normal peers", + "Ignore conf change command on [region_id={}] because the operations may lead to unavailability.\ + Normal voters to remove {:?}, slow voters to add {:?}.\ + Normal voters would be {:?}, slow voters would be {:?}.", region.get_id(), &normal_voters_to_remove, - slow_voters_count, - normal_voters_count + &slow_voters_to_add, + &normal_voters, + &slow_voters )); } @@ -2485,97 +2517,317 @@ mod tests { // Create a sample configuration let mut cfg = Config::default(); cfg.raft_max_election_timeout_ticks = 10; - // Initialize change_peers - let change_peers = vec![ - ChangePeerRequest { - change_type: eraftpb::ConfChangeType::RemoveNode, - peer: Some(metapb::Peer { - id: 2, - ..Default::default() - }) - .into(), - ..Default::default() - }, - ChangePeerRequest { - change_type: eraftpb::ConfChangeType::AddLearnerNode, - peer: Some(metapb::Peer { - id: 2, - ..Default::default() - }) - .into(), - ..Default::default() - }, - ]; + // peer 1, 2, 3 are voters, 4, 5 are learners. let mut region = Region::default(); - for i in 1..4 { + for i in 1..3 { region.mut_peers().push(metapb::Peer { id: i, + role: PeerRole::Voter, ..Default::default() }); } - for i in 0..change_peers.len() { + region.mut_peers().push(metapb::Peer { + id: 3, + role: PeerRole::IncomingVoter, + ..Default::default() + }); + for i in 4..6 { + region.mut_peers().push(metapb::Peer { + id: i, + role: PeerRole::Learner, + ..Default::default() + }); + } + + // heartbeats: peer 3, 5 are slow + let mut peer_heartbeat = collections::HashMap::default(); + peer_heartbeat.insert( + 1, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + peer_heartbeat.insert( + 2, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + peer_heartbeat.insert( + 3, + std::time::Instant::now() - std::time::Duration::from_secs(100), + ); + peer_heartbeat.insert( + 4, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + peer_heartbeat.insert( + 5, + std::time::Instant::now() - std::time::Duration::from_secs(100), + ); + + // Initialize change_peers + let change_peers_and_expect = vec![ + // promote peer 4 from learner to voter, it should work + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 4, + ..Default::default() + }) + .into(), + ..Default::default() + }], + true, + ), + // promote peer 5 from learner to voter, it should be rejected (two slow voters vs two + // normal voters) + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 4, + ..Default::default() + }) + .into(), + ..Default::default() + }], + true, + ), + // remove a peer 3, it should work as peer 3 is slow + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::RemoveNode, + peer: Some(metapb::Peer { + id: 3, + ..Default::default() + }) + .into(), + ..Default::default() + }], + true, + ), + // remove a peer 2, it should be rejected as peer 3 is slow + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::RemoveNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }], + false, + ), + // demote peer2, it should be rejected + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddLearnerNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }], + false, + ), + // demote peer 2, but promote peer 4 as voter, it should work + ( + vec![ + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 4, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddLearnerNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ], + true, + ), + // demote peer 2, but promote peer 5 as voter, it should be rejected because peer 5 is + // slow + ( + vec![ + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 5, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddLearnerNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ], + false, + ), + // promote peer 4 and 5 as voter, it should be ok + ( + vec![ + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 4, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 5, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ], + true, + ), + ]; + + for (cp, expect_result) in change_peers_and_expect { // Call the function under test and assert that the function returns failed - let mut cp = vec![change_peers[i].clone()]; - let mut peer_heartbeat = collections::HashMap::default(); - peer_heartbeat.insert( - 1, - std::time::Instant::now() - std::time::Duration::from_secs(1), - ); - peer_heartbeat.insert( - 2, - std::time::Instant::now() - std::time::Duration::from_secs(1), - ); - peer_heartbeat.insert( - 3, - std::time::Instant::now() - std::time::Duration::from_secs(1), - ); // Call the function under test and assert that the function returns Ok - check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); - - // now make one peer slow - if let Some(peer_heartbeat) = peer_heartbeat.get_mut(&3) { - *peer_heartbeat = std::time::Instant::now() - std::time::Duration::from_secs(100); + let result = + check_availability_by_last_heartbeats(®ion, &cfg, &cp, 1, &peer_heartbeat); + if expect_result { + assert!(result.is_ok()); + } else { + assert!(result.is_err(), "{:?}", cp); } + } + } - // Call the function under test - let result = check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat); - // Assert that the function returns failed - assert!(result.is_err()); + #[test] + fn test_check_conf_change_on_unhealthy_status() { + // Create a sample configuration + let mut cfg = Config::default(); + cfg.raft_max_election_timeout_ticks = 10; - // remove the slow peer instead - cp[0].peer = Some(metapb::Peer { - id: 3, - ..Default::default() - }) - .into(); - // Call the function under test - check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); - - // make peer to learner and remove the peer 2 - region.mut_peers()[1].set_role(metapb::PeerRole::Learner); - cp[0].peer = Some(metapb::Peer { - id: 2, + // peer 1, 2, 3 are voters, 4 is learner + let mut region = Region::default(); + region.mut_peers().push(metapb::Peer { + id: 1, + role: PeerRole::Voter, + ..Default::default() + }); + for i in 2..4 { + region.mut_peers().push(metapb::Peer { + id: i, + role: PeerRole::IncomingVoter, ..Default::default() - }) - .into(); - // Call the function under test - check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); - // set peer 2 voter again - region.mut_peers()[1].set_role(metapb::PeerRole::Voter); - - // there's no remove node, it's fine with slow peers. - cp[0] = ChangePeerRequest { - change_type: eraftpb::ConfChangeType::AddNode, - peer: Some(metapb::Peer { - id: 2, + }); + } + region.mut_peers().push(metapb::Peer { + id: 4, + role: PeerRole::Learner, + ..Default::default() + }); + + // heartbeats: peer 2, 3, 4 are slow, it's already unhealthy now + let mut peer_heartbeat = collections::HashMap::default(); + peer_heartbeat.insert( + 1, + std::time::Instant::now() - std::time::Duration::from_secs(1), + ); + peer_heartbeat.insert( + 2, + std::time::Instant::now() - std::time::Duration::from_secs(100), + ); + peer_heartbeat.insert( + 3, + std::time::Instant::now() - std::time::Duration::from_secs(100), + ); + peer_heartbeat.insert( + 4, + std::time::Instant::now() - std::time::Duration::from_secs(100), + ); + + // Initialize change_peers + let change_peers_and_expect = vec![ + // promote peer 4 from learner to voter, it should work + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddNode, + peer: Some(metapb::Peer { + id: 4, + ..Default::default() + }) + .into(), ..Default::default() - }) - .into(), - ..Default::default() - }; - // Call the function under test - check_remove_or_demote_voter(®ion, &cfg, &cp, 1, &peer_heartbeat).unwrap(); + }], + true, + ), + // remove a peer 3, it should work as peer 3 is slow + ( + vec![ChangePeerRequest { + change_type: eraftpb::ConfChangeType::RemoveNode, + peer: Some(metapb::Peer { + id: 3, + ..Default::default() + }) + .into(), + ..Default::default() + }], + true, + ), + // remove a peer 2, 3, it should work + ( + vec![ + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::RemoveNode, + peer: Some(metapb::Peer { + id: 2, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ChangePeerRequest { + change_type: eraftpb::ConfChangeType::AddLearnerNode, + peer: Some(metapb::Peer { + id: 3, + ..Default::default() + }) + .into(), + ..Default::default() + }, + ], + true, + ), + ]; + + for (cp, expect_result) in change_peers_and_expect { + // Call the function under test and assert that the function returns failed + // Call the function under test and assert that the function returns Ok + let result = + check_availability_by_last_heartbeats(®ion, &cfg, &cp, 1, &peer_heartbeat); + if expect_result { + assert!(result.is_ok()); + } else { + assert!(result.is_err(), "{:?}", cp); + } } } } From 6e79df9f73173d37d09f5a0db4782ddb1b7a1467 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 16 Feb 2024 04:05:22 +0100 Subject: [PATCH 1144/1149] debug wal log Signed-off-by: tonyxuqqi --- Cargo.lock | 13 +++++++++---- Cargo.toml | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f036ff1e32d..9f2514edacd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2887,7 +2887,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#256c9ca2f45fef644b518223707de50f841fe9e8" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#8d39f03c176f1e0d415c34224089e1e7e55636b6" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2906,7 +2906,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tikv/rust-rocksdb.git#256c9ca2f45fef644b518223707de50f841fe9e8" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#8d39f03c176f1e0d415c34224089e1e7e55636b6" dependencies = [ "bzip2-sys", "cc", @@ -4740,7 +4740,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tikv/rust-rocksdb.git#256c9ca2f45fef644b518223707de50f841fe9e8" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#8d39f03c176f1e0d415c34224089e1e7e55636b6" dependencies = [ "libc 0.2.151", "librocksdb_sys", @@ -5461,7 +5461,7 @@ dependencies = [ [[package]] name = "snappy-sys" version = "0.1.0" -source = "git+https://github.com/tikv/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" +source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", "libc 0.2.151", @@ -7673,3 +7673,8 @@ dependencies = [ "cc", "libc 0.2.151", ] + +[[patch.unused]] +name = "snappy-sys" +version = "0.1.0" +source = "git+https://github.com/tikv/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" diff --git a/Cargo.toml b/Cargo.toml index ad5973d618b..3d7597697ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -222,8 +222,8 @@ procinfo = { git = "https://github.com/tikv/procinfo-rs", rev = "7693954bd1dd86e # kvproto = { git = "https://github.com/your_github_id/kvproto", branch = "your_branch" } # # After the PR to rust-rocksdb is merged, remember to comment this out and run `cargo update -p rocksdb`. -# [patch.'https://github.com/tikv/rust-rocksdb'] -# rocksdb = { git = "https://github.com/your_github_id/rust-rocksdb", branch = "your_branch" } +[patch.'https://github.com/tikv/rust-rocksdb'] +rocksdb = { git = "https://github.com/tonyxuqqi/rust-rocksdb", branch = "rocksdb_wal_debug" } [workspace] # See https://github.com/rust-lang/rfcs/blob/master/text/2957-cargo-features2.md From 2f0290e35034f061ff12342b0db84d80f7cfd355 Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 16 Feb 2024 04:37:29 +0100 Subject: [PATCH 1145/1149] update Signed-off-by: tonyxuqqi --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9f2514edacd..46bc0c7034b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2887,7 +2887,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#8d39f03c176f1e0d415c34224089e1e7e55636b6" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#6e26337c2b17cc1b90a18c13ce166c3d40ae2e0f" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2906,7 +2906,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#8d39f03c176f1e0d415c34224089e1e7e55636b6" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#6e26337c2b17cc1b90a18c13ce166c3d40ae2e0f" dependencies = [ "bzip2-sys", "cc", @@ -4740,7 +4740,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#8d39f03c176f1e0d415c34224089e1e7e55636b6" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#6e26337c2b17cc1b90a18c13ce166c3d40ae2e0f" dependencies = [ "libc 0.2.151", "librocksdb_sys", From 145afb5d29d87065ba022bc035fb42995085f5da Mon Sep 17 00:00:00 2001 From: tonyxuqqi Date: Fri, 16 Feb 2024 19:14:24 +0100 Subject: [PATCH 1146/1149] sync latest rocksdb Signed-off-by: tonyxuqqi --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46bc0c7034b..aeb7e276fa0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2887,7 +2887,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#6e26337c2b17cc1b90a18c13ce166c3d40ae2e0f" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#01407e4c5ba89e5cacf1410beabb0fc7d27c4d25" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2906,7 +2906,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#6e26337c2b17cc1b90a18c13ce166c3d40ae2e0f" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#01407e4c5ba89e5cacf1410beabb0fc7d27c4d25" dependencies = [ "bzip2-sys", "cc", @@ -4740,7 +4740,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#6e26337c2b17cc1b90a18c13ce166c3d40ae2e0f" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#01407e4c5ba89e5cacf1410beabb0fc7d27c4d25" dependencies = [ "libc 0.2.151", "librocksdb_sys", From 9e7ba6e1f6eea9c6661bc440540691a983a09dcd Mon Sep 17 00:00:00 2001 From: Qi Xu Date: Sun, 10 Mar 2024 19:34:59 -0700 Subject: [PATCH 1147/1149] test Signed-off-by: Qi Xu --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aeb7e276fa0..d63cda6632e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2887,7 +2887,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#01407e4c5ba89e5cacf1410beabb0fc7d27c4d25" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#fb59db9383811a0230b2dcc178d14bb8f68a5541" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2906,7 +2906,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#01407e4c5ba89e5cacf1410beabb0fc7d27c4d25" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#fb59db9383811a0230b2dcc178d14bb8f68a5541" dependencies = [ "bzip2-sys", "cc", @@ -4740,7 +4740,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#01407e4c5ba89e5cacf1410beabb0fc7d27c4d25" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#fb59db9383811a0230b2dcc178d14bb8f68a5541" dependencies = [ "libc 0.2.151", "librocksdb_sys", From c30c9d6bc2f84d30c7475a8819aa8c23fee668d6 Mon Sep 17 00:00:00 2001 From: Qi Xu Date: Sun, 10 Mar 2024 21:05:49 -0700 Subject: [PATCH 1148/1149] sync Signed-off-by: Qi Xu --- Cargo.lock | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d63cda6632e..d65fd1c54e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2887,7 +2887,7 @@ dependencies = [ [[package]] name = "librocksdb_sys" version = "0.1.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#fb59db9383811a0230b2dcc178d14bb8f68a5541" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#3b86672e75c24833165fa663a9ac5a14de350e7e" dependencies = [ "bindgen 0.65.1", "bzip2-sys", @@ -2906,7 +2906,7 @@ dependencies = [ [[package]] name = "libtitan_sys" version = "0.0.1" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#fb59db9383811a0230b2dcc178d14bb8f68a5541" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#3b86672e75c24833165fa663a9ac5a14de350e7e" dependencies = [ "bzip2-sys", "cc", @@ -4740,7 +4740,7 @@ dependencies = [ [[package]] name = "rocksdb" version = "0.3.0" -source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#fb59db9383811a0230b2dcc178d14bb8f68a5541" +source = "git+https://github.com/tonyxuqqi/rust-rocksdb?branch=rocksdb_wal_debug#3b86672e75c24833165fa663a9ac5a14de350e7e" dependencies = [ "libc 0.2.151", "librocksdb_sys", @@ -5461,7 +5461,7 @@ dependencies = [ [[package]] name = "snappy-sys" version = "0.1.0" -source = "git+https://github.com/busyjay/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" +source = "git+https://github.com/tikv/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" dependencies = [ "cmake", "libc 0.2.151", @@ -7673,8 +7673,3 @@ dependencies = [ "cc", "libc 0.2.151", ] - -[[patch.unused]] -name = "snappy-sys" -version = "0.1.0" -source = "git+https://github.com/tikv/rust-snappy.git?branch=static-link#8c12738bad811397600455d6982aff754ea2ac44" From fec3704d8ff1a98ce50ee98de4669cb43bd1f02f Mon Sep 17 00:00:00 2001 From: Qi Xu Date: Mon, 11 Mar 2024 09:17:07 -0700 Subject: [PATCH 1149/1149] fix compile issue for test Signed-off-by: Qi Xu --- deny.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deny.toml b/deny.toml index ee4099d1370..aa4abee4c67 100644 --- a/deny.toml +++ b/deny.toml @@ -108,3 +108,6 @@ exceptions = [ unknown-git = "deny" unknown-registry = "deny" allow-org = { github = ["tikv", "pingcap", "rust-lang"] } +allow-git = [ + "https://github.com/tonyxuqqi/rust-rocksdb", +] \ No newline at end of file